AMD-GFX Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3 1/6] drm/amdgpu: Updated naming of SRIOV critical region offsets/sizes with _V1 suffix
@ 2025-10-10 18:48 Ellen Pan
  2025-10-10 18:48 ` [PATCH v3 2/6] drm/amdgpu: Add SRIOV crit_region_version support Ellen Pan
                   ` (5 more replies)
  0 siblings, 6 replies; 11+ messages in thread
From: Ellen Pan @ 2025-10-10 18:48 UTC (permalink / raw)
  To: amd-gfx
  Cc: Alexander.Deucher, Christian.Koenig, Lijo.Lazar, Jeffrey.Chan,
	Ellen Pan

 - This change prepares the later patches to intro  _v2 suffix to SRIOV critical regions

Signed-off-by: Ellen Pan <yunru.pan@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c    | 20 ++++----
 drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h | 57 ++++++++++++++-------
 2 files changed, 49 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 3328ab63376b..e95adf0407a0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -686,7 +686,7 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev)
 		/* got through this logic in early init stage to get necessary flags, e.g. rlcg_acc related*/
 		adev->virt.fw_reserve.p_pf2vf =
 			(struct amd_sriov_msg_pf2vf_info_header *)
-			(adev->bios + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10));
+			(adev->bios + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 << 10));
 
 		amdgpu_virt_read_pf2vf_data(adev);
 	}
@@ -703,21 +703,21 @@ void amdgpu_virt_exchange_data(struct amdgpu_device *adev)
 		if (adev->mman.fw_vram_usage_va) {
 			adev->virt.fw_reserve.p_pf2vf =
 				(struct amd_sriov_msg_pf2vf_info_header *)
-				(adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10));
+				(adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 << 10));
 			adev->virt.fw_reserve.p_vf2pf =
 				(struct amd_sriov_msg_vf2pf_info_header *)
-				(adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB << 10));
+				(adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB_V1 << 10));
 			adev->virt.fw_reserve.ras_telemetry =
-				(adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB << 10));
+				(adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB_V1 << 10));
 		} else if (adev->mman.drv_vram_usage_va) {
 			adev->virt.fw_reserve.p_pf2vf =
 				(struct amd_sriov_msg_pf2vf_info_header *)
-				(adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10));
+				(adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 << 10));
 			adev->virt.fw_reserve.p_vf2pf =
 				(struct amd_sriov_msg_vf2pf_info_header *)
-				(adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB << 10));
+				(adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB_V1 << 10));
 			adev->virt.fw_reserve.ras_telemetry =
-				(adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB << 10));
+				(adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB_V1 << 10));
 		}
 
 		amdgpu_virt_read_pf2vf_data(adev);
@@ -1304,7 +1304,7 @@ static int amdgpu_virt_cache_host_error_counts(struct amdgpu_device *adev,
 	checksum = host_telemetry->header.checksum;
 	used_size = host_telemetry->header.used_size;
 
-	if (used_size > (AMD_SRIOV_RAS_TELEMETRY_SIZE_KB << 10))
+	if (used_size > (AMD_SRIOV_MSG_RAS_TELEMETRY_SIZE_KB_V1 << 10))
 		return 0;
 
 	tmp = kmemdup(&host_telemetry->body.error_count, used_size, GFP_KERNEL);
@@ -1383,7 +1383,7 @@ amdgpu_virt_write_cpers_to_ring(struct amdgpu_device *adev,
 	checksum = host_telemetry->header.checksum;
 	used_size = host_telemetry->header.used_size;
 
-	if (used_size > (AMD_SRIOV_RAS_TELEMETRY_SIZE_KB << 10))
+	if (used_size > (AMD_SRIOV_MSG_RAS_TELEMETRY_SIZE_KB_V1 << 10))
 		return -EINVAL;
 
 	cper_dump = kmemdup(&host_telemetry->body.cper_dump, used_size, GFP_KERNEL);
@@ -1515,7 +1515,7 @@ static int amdgpu_virt_cache_chk_criti_hit(struct amdgpu_device *adev,
 	checksum = host_telemetry->header.checksum;
 	used_size = host_telemetry->header.used_size;
 
-	if (used_size > (AMD_SRIOV_RAS_TELEMETRY_SIZE_KB << 10))
+	if (used_size > (AMD_SRIOV_MSG_RAS_TELEMETRY_SIZE_KB_V1 << 10))
 		return 0;
 
 	tmp = kmemdup(&host_telemetry->body.chk_criti, used_size, GFP_KERNEL);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
index 3a79ed7d8031..3b35154e2df6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
@@ -23,26 +23,47 @@
 #ifndef AMDGV_SRIOV_MSG__H_
 #define AMDGV_SRIOV_MSG__H_
 
-/* unit in kilobytes */
-#define AMD_SRIOV_MSG_VBIOS_OFFSET	     0
-#define AMD_SRIOV_MSG_VBIOS_SIZE_KB	     64
-#define AMD_SRIOV_MSG_DATAEXCHANGE_OFFSET_KB AMD_SRIOV_MSG_VBIOS_SIZE_KB
-#define AMD_SRIOV_MSG_DATAEXCHANGE_SIZE_KB   4
-#define AMD_SRIOV_MSG_TMR_OFFSET_KB	     2048
-#define AMD_SRIOV_MSG_BAD_PAGE_SIZE_KB	     2
-#define AMD_SRIOV_RAS_TELEMETRY_SIZE_KB	     64
 /*
- * layout
+ * layout v1
  * 0           64KB        65KB        66KB           68KB                   132KB
  * |   VBIOS   |   PF2VF   |   VF2PF   |   Bad Page   | RAS Telemetry Region | ...
  * |   64KB    |   1KB     |   1KB     |   2KB        | 64KB                 | ...
  */
 
-#define AMD_SRIOV_MSG_SIZE_KB                   1
-#define AMD_SRIOV_MSG_PF2VF_OFFSET_KB           AMD_SRIOV_MSG_DATAEXCHANGE_OFFSET_KB
-#define AMD_SRIOV_MSG_VF2PF_OFFSET_KB           (AMD_SRIOV_MSG_PF2VF_OFFSET_KB + AMD_SRIOV_MSG_SIZE_KB)
-#define AMD_SRIOV_MSG_BAD_PAGE_OFFSET_KB        (AMD_SRIOV_MSG_VF2PF_OFFSET_KB + AMD_SRIOV_MSG_SIZE_KB)
-#define AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB   (AMD_SRIOV_MSG_BAD_PAGE_OFFSET_KB + AMD_SRIOV_MSG_BAD_PAGE_SIZE_KB)
+/*
+ * layout v2 (offsets are dynamically allocated and the offsets below are examples)
+ * 0           1KB         64KB        65KB        66KB           68KB                   132KB
+ * |  INITD_H  |   VBIOS   |   PF2VF   |   VF2PF   |   Bad Page   | RAS Telemetry Region | ...
+ * |   1KB     |   64KB    |   1KB     |   1KB     |   2KB        | 64KB                 | ...
+ *
+ * Note: PF2VF + VF2PF + Bad Page = DataExchange region (allocated contiguously)
+ */
+
+/* v1 layout sizes */
+#define AMD_SRIOV_MSG_VBIOS_SIZE_KB_V1			64
+#define AMD_SRIOV_MSG_PF2VF_SIZE_KB_V1			1
+#define AMD_SRIOV_MSG_VF2PF_SIZE_KB_V1			1
+#define AMD_SRIOV_MSG_BAD_PAGE_SIZE_KB_V1		2
+#define AMD_SRIOV_MSG_RAS_TELEMETRY_SIZE_KB_V1		64
+#define AMD_SRIOV_MSG_DATAEXCHANGE_SIZE_KB_V1		\
+	(AMD_SRIOV_MSG_PF2VF_SIZE_KB_V1 + AMD_SRIOV_MSG_VF2PF_SIZE_KB_V1 + \
+	 AMD_SRIOV_MSG_BAD_PAGE_SIZE_KB_V1)
+
+/* v1 offsets */
+#define AMD_SRIOV_MSG_VBIOS_OFFSET_V1			0
+#define AMD_SRIOV_MSG_DATAEXCHANGE_OFFSET_KB_V1		AMD_SRIOV_MSG_VBIOS_SIZE_KB_V1
+#define AMD_SRIOV_MSG_TMR_OFFSET_KB			2048
+#define AMD_SRIOV_MSG_SIZE_KB_V1			1
+#define AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1		AMD_SRIOV_MSG_DATAEXCHANGE_OFFSET_KB_V1
+#define AMD_SRIOV_MSG_VF2PF_OFFSET_KB_V1		\
+	(AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 + AMD_SRIOV_MSG_SIZE_KB_V1)
+#define AMD_SRIOV_MSG_BAD_PAGE_OFFSET_KB_V1		\
+	(AMD_SRIOV_MSG_VF2PF_OFFSET_KB_V1 + AMD_SRIOV_MSG_SIZE_KB_V1)
+#define AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB_V1	\
+	(AMD_SRIOV_MSG_BAD_PAGE_OFFSET_KB_V1 + AMD_SRIOV_MSG_BAD_PAGE_SIZE_KB_V1)
+#define AMD_SRIOV_MSG_INIT_DATA_TOT_SIZE_KB_V1		\
+	(AMD_SRIOV_MSG_VBIOS_SIZE_KB_V1 + AMD_SRIOV_MSG_DATAEXCHANGE_SIZE_KB_V1 + \
+	 AMD_SRIOV_MSG_RAS_TELEMETRY_SIZE_KB_V1)
 
 /*
  * PF2VF history log:
@@ -436,12 +457,12 @@ unsigned int amd_sriov_msg_checksum(void *obj, unsigned long obj_size, unsigned
 #define _stringification(s) #s
 
 _Static_assert(
-	sizeof(struct amd_sriov_msg_vf2pf_info) == AMD_SRIOV_MSG_SIZE_KB << 10,
-	"amd_sriov_msg_vf2pf_info must be " stringification(AMD_SRIOV_MSG_SIZE_KB) " KB");
+	sizeof(struct amd_sriov_msg_vf2pf_info) == AMD_SRIOV_MSG_SIZE_KB_V1 << 10,
+	"amd_sriov_msg_vf2pf_info must be " stringification(AMD_SRIOV_MSG_SIZE_KB_V1) " KB");
 
 _Static_assert(
-	sizeof(struct amd_sriov_msg_pf2vf_info) == AMD_SRIOV_MSG_SIZE_KB << 10,
-	"amd_sriov_msg_pf2vf_info must be " stringification(AMD_SRIOV_MSG_SIZE_KB) " KB");
+	sizeof(struct amd_sriov_msg_pf2vf_info) == AMD_SRIOV_MSG_SIZE_KB_V1 << 10,
+	"amd_sriov_msg_pf2vf_info must be " stringification(AMD_SRIOV_MSG_SIZE_KB_V1) " KB");
 
 _Static_assert(AMD_SRIOV_MSG_RESERVE_UCODE % 4 == 0,
 	       "AMD_SRIOV_MSG_RESERVE_UCODE must be multiple of 4");
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH v3 2/6] drm/amdgpu: Add SRIOV crit_region_version support
  2025-10-10 18:48 [PATCH v3 1/6] drm/amdgpu: Updated naming of SRIOV critical region offsets/sizes with _V1 suffix Ellen Pan
@ 2025-10-10 18:48 ` Ellen Pan
  2025-10-13  4:57   ` Lazar, Lijo
  2025-10-10 18:48 ` [PATCH v3 3/6] drm/amdgpu: Introduce SRIOV critical regions v2 during VF init Ellen Pan
                   ` (4 subsequent siblings)
  5 siblings, 1 reply; 11+ messages in thread
From: Ellen Pan @ 2025-10-10 18:48 UTC (permalink / raw)
  To: amd-gfx
  Cc: Alexander.Deucher, Christian.Koenig, Lijo.Lazar, Jeffrey.Chan,
	Ellen Pan

1. Added enum amd_sriov_crit_region_version to support multi versions
2. Added logic in SRIOV mailbox to regonize crit_region version during
   req_gpu_init_data

Signed-off-by: Ellen Pan <yunru.pan@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c    |  3 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h    |  8 +++++++
 drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h |  5 +++++
 drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c       | 23 ++++++++++++++-------
 4 files changed, 31 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index e95adf0407a0..3a6b0e1084d7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -150,7 +150,8 @@ void amdgpu_virt_request_init_data(struct amdgpu_device *adev)
 		virt->ops->req_init_data(adev);
 
 	if (adev->virt.req_init_data_ver > 0)
-		DRM_INFO("host supports REQ_INIT_DATA handshake\n");
+		DRM_INFO("host supports REQ_INIT_DATA handshake of critical_region_version %d\n",
+				 adev->virt.req_init_data_ver);
 	else
 		DRM_WARN("host doesn't support REQ_INIT_DATA handshake\n");
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index d1172c8e58c4..36247a160aa6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -262,6 +262,11 @@ struct amdgpu_virt_ras {
 
 DECLARE_ATTR_CAP_CLASS(amdgpu_virt, AMDGPU_VIRT_CAPS_LIST);
 
+struct amdgpu_virt_region {
+	uint32_t offset;
+	uint32_t size_kb;
+};
+
 /* GPU virtualization */
 struct amdgpu_virt {
 	uint32_t			caps;
@@ -289,6 +294,9 @@ struct amdgpu_virt {
 	bool ras_init_done;
 	uint32_t reg_access;
 
+	/* dynamic(v2) critical regions */
+	struct amdgpu_virt_region init_data_header;
+
 	/* vf2pf message */
 	struct delayed_work vf2pf_work;
 	uint32_t vf2pf_update_interval_ms;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
index 3b35154e2df6..b53caab5b706 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
@@ -65,6 +65,11 @@
 	(AMD_SRIOV_MSG_VBIOS_SIZE_KB_V1 + AMD_SRIOV_MSG_DATAEXCHANGE_SIZE_KB_V1 + \
 	 AMD_SRIOV_MSG_RAS_TELEMETRY_SIZE_KB_V1)
 
+enum amd_sriov_crit_region_version {
+	GPU_CRIT_REGION_V1 = 1,
+	GPU_CRIT_REGION_V2 = 2,
+};
+
 /*
  * PF2VF history log:
  * v1 defined in amdgim
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
index e5282a5d05d9..cd5b2f07edb8 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
@@ -222,12 +222,20 @@ static int xgpu_nv_send_access_requests_with_param(struct amdgpu_device *adev,
 				adev->virt.req_init_data_ver = 0;
 		} else {
 			if (req == IDH_REQ_GPU_INIT_DATA) {
-				adev->virt.req_init_data_ver =
-					RREG32_NO_KIQ(mmMAILBOX_MSGBUF_RCV_DW1);
-
-				/* assume V1 in case host doesn't set version number */
-				if (adev->virt.req_init_data_ver < 1)
-					adev->virt.req_init_data_ver = 1;
+				switch (RREG32_NO_KIQ(mmMAILBOX_MSGBUF_RCV_DW1)) {
+				case GPU_CRIT_REGION_V2:
+					adev->virt.req_init_data_ver = GPU_CRIT_REGION_V2;
+					adev->virt.init_data_header.offset =
+						RREG32_NO_KIQ(mmMAILBOX_MSGBUF_RCV_DW2);
+					adev->virt.init_data_header.size_kb =
+						RREG32_NO_KIQ(mmMAILBOX_MSGBUF_RCV_DW3);
+					break;
+				default:
+					adev->virt.req_init_data_ver = GPU_CRIT_REGION_V1;
+					adev->virt.init_data_header.offset = -1;
+					adev->virt.init_data_header.size_kb = 0;
+					break;
+				}
 			}
 		}
 
@@ -285,7 +293,8 @@ static int xgpu_nv_release_full_gpu_access(struct amdgpu_device *adev,
 
 static int xgpu_nv_request_init_data(struct amdgpu_device *adev)
 {
-	return xgpu_nv_send_access_requests(adev, IDH_REQ_GPU_INIT_DATA);
+	return xgpu_nv_send_access_requests_with_param(adev, IDH_REQ_GPU_INIT_DATA,
+			0, GPU_CRIT_REGION_V2, 0);
 }
 
 static int xgpu_nv_mailbox_ack_irq(struct amdgpu_device *adev,
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH v3 3/6] drm/amdgpu: Introduce SRIOV critical regions v2 during VF init
  2025-10-10 18:48 [PATCH v3 1/6] drm/amdgpu: Updated naming of SRIOV critical region offsets/sizes with _V1 suffix Ellen Pan
  2025-10-10 18:48 ` [PATCH v3 2/6] drm/amdgpu: Add SRIOV crit_region_version support Ellen Pan
@ 2025-10-10 18:48 ` Ellen Pan
  2025-10-13  5:14   ` Lazar, Lijo
  2025-10-10 18:48 ` [PATCH v3 4/6] drm/amdgpu: Reuse fw_vram_usage_* for dynamic critical region in SRIOV Ellen Pan
                   ` (3 subsequent siblings)
  5 siblings, 1 reply; 11+ messages in thread
From: Ellen Pan @ 2025-10-10 18:48 UTC (permalink / raw)
  To: amd-gfx
  Cc: Alexander.Deucher, Christian.Koenig, Lijo.Lazar, Jeffrey.Chan,
	Ellen Pan

    1. Introduced amdgpu_virt_init_critical_region during VF init.
     - VFs use init_data_header_offset and init_data_header_size_kb
            transmitted via PF2VF mailbox to fetch the offset of
            critical regions' offsets/sizes in VRAM and save to
            adev->virt.crit_region_offsets and adev->virt.crit_region_sizes_kb.

Signed-off-by: Ellen Pan <yunru.pan@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c  |   4 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c    | 113 ++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h    |   7 ++
 drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h |  31 ++++++
 4 files changed, 155 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 929936c8d87c..351cfe03a1aa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2754,6 +2754,10 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
 		r = amdgpu_virt_request_full_gpu(adev, true);
 		if (r)
 			return r;
+
+		r = amdgpu_virt_init_critical_region(adev);
+		if (r)
+			return r;
 	}
 
 	switch (adev->asic_type) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 3a6b0e1084d7..6eca5e8a7375 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -843,6 +843,119 @@ static void amdgpu_virt_init_ras(struct amdgpu_device *adev)
 	adev->virt.ras.cper_rptr = 0;
 }
 
+static uint8_t amdgpu_virt_crit_region_calc_checksum(uint8_t *buf_start, uint8_t *buf_end)
+{
+	uint32_t sum = 0;
+
+	if (buf_start >= buf_end)
+		return 0;
+
+	for (; buf_start < buf_end; buf_start++)
+		sum += buf_start[0];
+
+	return 0xffffffff - sum;
+}
+
+int amdgpu_virt_init_critical_region(struct amdgpu_device *adev)
+{
+	struct amd_sriov_msg_init_data_header *init_data_hdr = NULL;
+	uint32_t init_hdr_offset = adev->virt.init_data_header.offset;
+	uint32_t init_hdr_size = adev->virt.init_data_header.size_kb << 10;
+	uint64_t pos = 0;
+	uint64_t vram_size;
+	int r = 0;
+	uint8_t checksum = 0;
+
+	/* Skip below init if critical region version != v2 */
+	if (adev->virt.req_init_data_ver != GPU_CRIT_REGION_V2)
+		return 0;
+
+	if (init_hdr_offset < 0) {
+		dev_err(adev->dev, "Invalid init header offset\n");
+		return -EINVAL;
+	}
+
+	vram_size = RREG32(mmRCC_CONFIG_MEMSIZE);
+	if (!vram_size || vram_size == U32_MAX)
+		return -EINVAL;
+	vram_size <<= 20;
+
+	if ((init_hdr_offset + init_hdr_size) > vram_size) {
+		dev_err(adev->dev, "init_data_header exceeds VRAM size, exiting\n");
+		return -EINVAL;
+	}
+
+	/* Allocate for init_data_hdr */
+	init_data_hdr = kzalloc(sizeof(struct amd_sriov_msg_init_data_header), GFP_KERNEL);
+	if (!init_data_hdr)
+		return -ENOMEM;
+
+	pos = (uint64_t)init_hdr_offset;
+	amdgpu_device_vram_access(adev, pos, (uint32_t *)init_data_hdr,
+					sizeof(struct amd_sriov_msg_init_data_header), false);
+
+	switch (init_data_hdr->version) {
+	case GPU_CRIT_REGION_V2:
+		if (strncmp(init_data_hdr->signature, "INDA", 4) != 0) {
+			dev_err(adev->dev, "Invalid init data signature: %.4s\n",
+					init_data_hdr->signature);
+			r = -EINVAL;
+			goto out;
+		}
+
+		checksum = amdgpu_virt_crit_region_calc_checksum(
+				(uint8_t *)&init_data_hdr->initdata_offset,
+				(uint8_t *)init_data_hdr +
+				sizeof(struct amd_sriov_msg_init_data_header));
+		if (checksum != init_data_hdr->checksum) {
+			dev_err(adev->dev, "Found unmatching checksum from calculation 0x%x and init_data 0x%x\n",
+					checksum, init_data_hdr->checksum);
+			r = -EINVAL;
+			goto out;
+		}
+
+		/* Initialize critical region offsets */
+		adev->virt.crit_regn.offset = init_data_hdr->initdata_offset;
+		adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_IPD_TABLE_ID].offset =
+			init_data_hdr->ip_discovery_offset;
+		adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID].offset =
+			init_data_hdr->vbios_img_offset;
+		adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID].offset =
+			init_data_hdr->ras_tele_info_offset;
+		adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].offset =
+			init_data_hdr->dataexchange_offset;
+		adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID].offset =
+			init_data_hdr->bad_page_info_offset;
+
+		/* Initialize critical region sizes */
+		adev->virt.crit_regn.size_kb = init_data_hdr->initdata_size_in_kb;
+		adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_IPD_TABLE_ID].size_kb =
+			init_data_hdr->ip_discovery_size_in_kb;
+		adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID].size_kb =
+			init_data_hdr->vbios_img_size_in_kb;
+		adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID].size_kb =
+			init_data_hdr->ras_tele_info_size_in_kb;
+		adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].size_kb =
+			init_data_hdr->dataexchange_size_in_kb;
+		adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID].size_kb =
+			init_data_hdr->bad_page_size_in_kb;
+
+		adev->virt.is_dynamic_crit_regn_enabled = true;
+		break;
+	default:
+		dev_err(adev->dev, "Invalid init header version: %u\n",
+				init_data_hdr->version);
+		r = -EINVAL;
+		goto out;
+	}
+
+out:
+	kfree(init_data_hdr);
+	init_data_hdr = NULL;
+
+	return r;
+}
+
 void amdgpu_virt_init(struct amdgpu_device *adev)
 {
 	bool is_sriov = false;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index 36247a160aa6..f46edc03f57f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -52,6 +52,8 @@
 /* tonga/fiji use this offset */
 #define mmBIF_IOV_FUNC_IDENTIFIER 0x1503
 
+#define mmRCC_CONFIG_MEMSIZE	0xde3
+
 #define AMDGPU_VF2PF_UPDATE_MAX_RETRY_LIMIT 2
 
 enum amdgpu_sriov_vf_mode {
@@ -296,6 +298,9 @@ struct amdgpu_virt {
 
 	/* dynamic(v2) critical regions */
 	struct amdgpu_virt_region init_data_header;
+	struct amdgpu_virt_region crit_regn;
+	struct amdgpu_virt_region crit_regn_tbl[AMD_SRIOV_MSG_MAX_TABLE_ID];
+	bool is_dynamic_crit_regn_enabled;
 
 	/* vf2pf message */
 	struct delayed_work vf2pf_work;
@@ -432,6 +437,8 @@ void amdgpu_virt_exchange_data(struct amdgpu_device *adev);
 void amdgpu_virt_fini_data_exchange(struct amdgpu_device *adev);
 void amdgpu_virt_init(struct amdgpu_device *adev);
 
+int amdgpu_virt_init_critical_region(struct amdgpu_device *adev);
+
 bool amdgpu_virt_can_access_debugfs(struct amdgpu_device *adev);
 int amdgpu_virt_enable_access_debugfs(struct amdgpu_device *adev);
 void amdgpu_virt_disable_access_debugfs(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
index b53caab5b706..d15c256f9abd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
@@ -70,6 +70,37 @@ enum amd_sriov_crit_region_version {
 	GPU_CRIT_REGION_V2 = 2,
 };
 
+/* v2 layout offset enum (in order of allocation) */
+enum amd_sriov_msg_table_id_enum {
+	AMD_SRIOV_MSG_IPD_TABLE_ID = 0,
+	AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID,
+	AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID,
+	AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID,
+	AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID,
+	AMD_SRIOV_MSG_INITD_H_TABLE_ID,
+	AMD_SRIOV_MSG_MAX_TABLE_ID,
+};
+
+struct amd_sriov_msg_init_data_header {
+	char     signature[4];  /* "INDA"  */
+	uint32_t version;
+	uint32_t checksum;
+	uint32_t initdata_offset; /* 0 */
+	uint32_t initdata_size_in_kb; /* 5MB */
+	uint32_t valid_tables;
+	uint32_t vbios_img_offset;
+	uint32_t vbios_img_size_in_kb;
+	uint32_t dataexchange_offset;
+	uint32_t dataexchange_size_in_kb;
+	uint32_t ras_tele_info_offset;
+	uint32_t ras_tele_info_size_in_kb;
+	uint32_t ip_discovery_offset;
+	uint32_t ip_discovery_size_in_kb;
+	uint32_t bad_page_info_offset;
+	uint32_t bad_page_size_in_kb;
+	uint32_t reserved[8];
+};
+
 /*
  * PF2VF history log:
  * v1 defined in amdgim
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH v3 4/6] drm/amdgpu: Reuse fw_vram_usage_* for dynamic critical region in SRIOV
  2025-10-10 18:48 [PATCH v3 1/6] drm/amdgpu: Updated naming of SRIOV critical region offsets/sizes with _V1 suffix Ellen Pan
  2025-10-10 18:48 ` [PATCH v3 2/6] drm/amdgpu: Add SRIOV crit_region_version support Ellen Pan
  2025-10-10 18:48 ` [PATCH v3 3/6] drm/amdgpu: Introduce SRIOV critical regions v2 during VF init Ellen Pan
@ 2025-10-10 18:48 ` Ellen Pan
  2025-10-10 18:48 ` [PATCH v3 5/6] drm/amdgpu: Add logic for VF ipd and VF bios to init from dynamic crit_region offsets Ellen Pan
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 11+ messages in thread
From: Ellen Pan @ 2025-10-10 18:48 UTC (permalink / raw)
  To: amd-gfx
  Cc: Alexander.Deucher, Christian.Koenig, Lijo.Lazar, Jeffrey.Chan,
	Ellen Pan

- During guest driver init, asa VFs receive PF msg to
	init dynamic critical region(v2), VFs reuse fw_vram_usage_*
	 from ttm to store critical region tables in a 5MB chunk.

Signed-off-by: Ellen Pan <yunru.pan@amd.com>
---
 .../gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c  | 29 ++++++++++---------
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c       | 12 ++++----
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c      |  9 ++++++
 3 files changed, 31 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
index c7d32fb216e4..636385c80f64 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
@@ -181,19 +181,22 @@ int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev)
 	u8 frev, crev;
 	int usage_bytes = 0;
 
-	if (amdgpu_atom_parse_data_header(ctx, index, NULL, &frev, &crev, &data_offset)) {
-		if (frev == 2 && crev == 1) {
-			fw_usage_v2_1 =
-				(struct vram_usagebyfirmware_v2_1 *)(ctx->bios + data_offset);
-			amdgpu_atomfirmware_allocate_fb_v2_1(adev,
-					fw_usage_v2_1,
-					&usage_bytes);
-		} else if (frev >= 2 && crev >= 2) {
-			fw_usage_v2_2 =
-				(struct vram_usagebyfirmware_v2_2 *)(ctx->bios + data_offset);
-			amdgpu_atomfirmware_allocate_fb_v2_2(adev,
-					fw_usage_v2_2,
-					&usage_bytes);
+	/* Skip atomfirmware allocation for SRIOV VFs when dynamic crit regn is enabled */
+	if (!(amdgpu_sriov_vf(adev) && adev->virt.is_dynamic_crit_regn_enabled)) {
+		if (amdgpu_atom_parse_data_header(ctx, index, NULL, &frev, &crev, &data_offset)) {
+			if (frev == 2 && crev == 1) {
+				fw_usage_v2_1 =
+					(struct vram_usagebyfirmware_v2_1 *)(ctx->bios + data_offset);
+				amdgpu_atomfirmware_allocate_fb_v2_1(adev,
+						fw_usage_v2_1,
+						&usage_bytes);
+			} else if (frev >= 2 && crev >= 2) {
+				fw_usage_v2_2 =
+					(struct vram_usagebyfirmware_v2_2 *)(ctx->bios + data_offset);
+				amdgpu_atomfirmware_allocate_fb_v2_2(adev,
+						fw_usage_v2_2,
+						&usage_bytes);
+			}
 		}
 	}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 96bd0185f936..b5148a33b6f5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1944,19 +1944,19 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
 		return r;
 
 	/*
-	 *The reserved vram for driver must be pinned to the specified
-	 *place on the VRAM, so reserve it early.
+	 * The reserved VRAM for the driver must be pinned to a specific
+	 * location in VRAM, so reserve it early.
 	 */
 	r = amdgpu_ttm_drv_reserve_vram_init(adev);
 	if (r)
 		return r;
 
 	/*
-	 * only NAVI10 and onwards ASIC support for IP discovery.
-	 * If IP discovery enabled, a block of memory should be
-	 * reserved for IP discovey.
+	 * only NAVI10 and later ASICs support IP discovery.
+	 * If IP discovery is enabled, a block of memory should be
+	 * reserved for it.
 	 */
-	if (adev->mman.discovery_bin) {
+	if (adev->mman.discovery_bin && !adev->virt.is_dynamic_crit_regn_enabled) {
 		r = amdgpu_ttm_reserve_tmr(adev);
 		if (r)
 			return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 6eca5e8a7375..461e83728594 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -940,6 +940,15 @@ int amdgpu_virt_init_critical_region(struct amdgpu_device *adev)
 		adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID].size_kb =
 			init_data_hdr->bad_page_size_in_kb;
 
+		/* reserved memory starts from crit region base offset with the size of 5MB */
+		adev->mman.fw_vram_usage_start_offset = adev->virt.crit_regn.offset;
+		adev->mman.fw_vram_usage_size = adev->virt.crit_regn.size_kb << 10;
+		dev_info(adev->dev,
+			"critical region v%d requested to reserve memory start at %08x with %d KB.\n",
+			  init_data_hdr->version,
+			  adev->mman.fw_vram_usage_start_offset,
+			  adev->mman.fw_vram_usage_size >> 10);
+
 		adev->virt.is_dynamic_crit_regn_enabled = true;
 		break;
 	default:
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH v3 5/6] drm/amdgpu: Add logic for VF ipd and VF bios to init from dynamic crit_region offsets
  2025-10-10 18:48 [PATCH v3 1/6] drm/amdgpu: Updated naming of SRIOV critical region offsets/sizes with _V1 suffix Ellen Pan
                   ` (2 preceding siblings ...)
  2025-10-10 18:48 ` [PATCH v3 4/6] drm/amdgpu: Reuse fw_vram_usage_* for dynamic critical region in SRIOV Ellen Pan
@ 2025-10-10 18:48 ` Ellen Pan
  2025-10-13  5:35   ` Lazar, Lijo
  2025-10-10 18:48 ` [PATCH v3 6/6] drm/amdgpu: Add logic for VF data exchange region " Ellen Pan
  2025-10-13 13:32 ` [PATCH v3 1/6] drm/amdgpu: Updated naming of SRIOV critical region offsets/sizes with _V1 suffix Alex Deucher
  5 siblings, 1 reply; 11+ messages in thread
From: Ellen Pan @ 2025-10-10 18:48 UTC (permalink / raw)
  To: amd-gfx
  Cc: Alexander.Deucher, Christian.Koenig, Lijo.Lazar, Jeffrey.Chan,
	Ellen Pan

1. Added VF logic in amdgpu_virt to init IP discovery using the offsets from dynamic(v2) critical regions;
2. Added VF logic in amdgpu_virt to init bios image using the offsets from dynamic(v2) critical regions;

Signed-off-by: Ellen Pan <yunru.pan@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c      | 17 +++++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 11 +++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c      | 53 +++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h      |  2 +
 4 files changed, 80 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
index 00e96419fcda..41f8fe04126f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
@@ -96,7 +96,8 @@ void amdgpu_bios_release(struct amdgpu_device *adev)
  * part of the system bios.  On boot, the system bios puts a
  * copy of the igp rom at the start of vram if a discrete card is
  * present.
- * For SR-IOV, the vbios image is also put in VRAM in the VF.
+ * For SR-IOV, if dynamic critical region is not enabled,
+ * the vbios image is also put at the start of VRAM in the VF.
  */
 static bool amdgpu_read_bios_from_vram(struct amdgpu_device *adev)
 {
@@ -114,7 +115,19 @@ static bool amdgpu_read_bios_from_vram(struct amdgpu_device *adev)
 
 	adev->bios = NULL;
 	vram_base = pci_resource_start(adev->pdev, 0);
-	bios = ioremap_wc(vram_base, size);
+
+	/* For SR-IOV, if dynamic critical region is enabled,
+	 * the vbios image is put at a dynamic offset of VRAM in the VF.
+	 * If dynamic critical region is disabled, exit early to proceed
+	 * the same seq as on baremetal.
+	 */
+	if (amdgpu_sriov_vf(adev) && adev->virt.is_dynamic_crit_regn_enabled) {
+		if (amdgpu_virt_get_dynamic_data_info(adev, AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID,
+				(uint8_t *)&bios, &size))
+			return false;
+	} else
+		bios = ioremap_wc(vram_base, size);
+
 	if (!bios)
 		return false;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index 73401f0aeb34..23aec57295c0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -283,7 +283,6 @@ static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev,
 		 * wait for this to complete.  Once the C2PMSG is updated, we can
 		 * continue.
 		 */
-
 		for (i = 0; i < 2000; i++) {
 			msg = RREG32(mmMP0_SMN_C2PMSG_33);
 			if (msg & 0x80000000)
@@ -292,6 +291,16 @@ static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev,
 		}
 	}
 
+	/* For SRIOV VFs, if dynamic critical region is enabled,
+	 * IPD binary is retrieved via this call.
+	 * If dynamic critical is disabled, fallthrough to normal seq below.
+	 */
+	if (amdgpu_sriov_vf(adev) && adev->virt.is_dynamic_crit_regn_enabled) {
+		ret = amdgpu_virt_get_dynamic_data_info(adev,
+				AMD_SRIOV_MSG_IPD_TABLE_ID, binary, NULL);
+		return ret;
+	}
+
 	vram_size = RREG32(mmRCC_CONFIG_MEMSIZE);
 	if (!vram_size || vram_size == U32_MAX)
 		sz_valid = false;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 461e83728594..4a7125122ae7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -965,6 +965,59 @@ int amdgpu_virt_init_critical_region(struct amdgpu_device *adev)
 	return r;
 }
 
+int amdgpu_virt_get_dynamic_data_info(struct amdgpu_device *adev,
+	int data_id, uint8_t *binary, uint64_t *size)
+{
+	uint32_t data_offset = 0;
+	uint32_t data_size = 0;
+	enum amd_sriov_msg_table_id_enum data_table_id = data_id;
+	char *data_name;
+	uint8_t __iomem *buf;
+
+	if (data_table_id >= AMD_SRIOV_MSG_MAX_TABLE_ID)
+		return -EINVAL;
+
+	data_offset = adev->virt.crit_regn_tbl[data_table_id].offset;
+	data_size = adev->virt.crit_regn_tbl[data_table_id].size_kb << 10;
+
+	switch (data_id) {
+	case AMD_SRIOV_MSG_IPD_TABLE_ID:
+		data_name = "IPD";
+		if (!IS_ALIGNED(data_offset, 4) || !IS_ALIGNED(data_size, 4)) {
+			dev_err(adev->dev, "IP discovery data not aligned to 4 bytes\n");
+			return -EINVAL;
+		}
+
+		if (data_size > DISCOVERY_TMR_SIZE) {
+			dev_err(adev->dev, "Invalid IP discovery size: 0x%x\n", data_size);
+			return -EINVAL;
+		}
+
+		amdgpu_device_vram_access(adev,
+				(uint64_t)data_offset, (uint32_t *)binary, data_size, false);
+		break;
+
+	case AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID:
+		data_name = "BIOS";
+		if (data_size > *size) {
+			dev_err(adev->dev, "Invalid vbios size: 0x%x\n", data_size);
+			return -EINVAL;
+		}
+
+		buf = ioremap_wc(pci_resource_start(adev->pdev, 0) + data_offset, data_size);
+
+		*(uint8_t __iomem **)binary = buf;
+		*size = (uint64_t)data_size;
+		break;
+	}
+
+	dev_info(adev->dev,
+		"Got %s info from dynamic crit_region_table at offset 0x%x with size of 0x%x bytes.\n",
+		data_name, data_offset, data_size);
+
+	return 0;
+}
+
 void amdgpu_virt_init(struct amdgpu_device *adev)
 {
 	bool is_sriov = false;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index f46edc03f57f..5d8e3260f677 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -438,6 +438,8 @@ void amdgpu_virt_fini_data_exchange(struct amdgpu_device *adev);
 void amdgpu_virt_init(struct amdgpu_device *adev);
 
 int amdgpu_virt_init_critical_region(struct amdgpu_device *adev);
+int amdgpu_virt_get_dynamic_data_info(struct amdgpu_device *adev,
+	int data_id, uint8_t *binary, uint64_t *size);
 
 bool amdgpu_virt_can_access_debugfs(struct amdgpu_device *adev);
 int amdgpu_virt_enable_access_debugfs(struct amdgpu_device *adev);
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH v3 6/6] drm/amdgpu: Add logic for VF data exchange region to init from dynamic crit_region offsets
  2025-10-10 18:48 [PATCH v3 1/6] drm/amdgpu: Updated naming of SRIOV critical region offsets/sizes with _V1 suffix Ellen Pan
                   ` (3 preceding siblings ...)
  2025-10-10 18:48 ` [PATCH v3 5/6] drm/amdgpu: Add logic for VF ipd and VF bios to init from dynamic crit_region offsets Ellen Pan
@ 2025-10-10 18:48 ` Ellen Pan
  2025-10-13 13:34   ` Alex Deucher
  2025-10-13 13:32 ` [PATCH v3 1/6] drm/amdgpu: Updated naming of SRIOV critical region offsets/sizes with _V1 suffix Alex Deucher
  5 siblings, 1 reply; 11+ messages in thread
From: Ellen Pan @ 2025-10-10 18:48 UTC (permalink / raw)
  To: amd-gfx
  Cc: Alexander.Deucher, Christian.Koenig, Lijo.Lazar, Jeffrey.Chan,
	Ellen Pan

1. Added VF logic to init data exchange region using the offsets from dynamic(v2) critical regions;

Signed-off-by: Ellen Pan <yunru.pan@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 88 ++++++++++++++++++++----
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h |  1 +
 2 files changed, 77 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 4a7125122ae7..d99120b98188 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -670,6 +670,8 @@ void amdgpu_virt_fini_data_exchange(struct amdgpu_device *adev)
 
 void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev)
 {
+	uint32_t *pfvf_data = NULL;
+
 	adev->virt.fw_reserve.p_pf2vf = NULL;
 	adev->virt.fw_reserve.p_vf2pf = NULL;
 	adev->virt.vf2pf_update_interval_ms = 0;
@@ -685,11 +687,34 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev)
 		schedule_delayed_work(&(adev->virt.vf2pf_work), msecs_to_jiffies(adev->virt.vf2pf_update_interval_ms));
 	} else if (adev->bios != NULL) {
 		/* got through this logic in early init stage to get necessary flags, e.g. rlcg_acc related*/
-		adev->virt.fw_reserve.p_pf2vf =
-			(struct amd_sriov_msg_pf2vf_info_header *)
-			(adev->bios + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 << 10));
+		if (adev->virt.req_init_data_ver == GPU_CRIT_REGION_V2) {
+			pfvf_data =
+				kzalloc(adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].size_kb << 10,
+					GFP_KERNEL);
+			if (!pfvf_data) {
+				DRM_ERROR("Failed to allocate memory for pfvf_data\n");
+				return;
+			}
 
-		amdgpu_virt_read_pf2vf_data(adev);
+			if (amdgpu_virt_read_exchange_data_from_mem(adev, pfvf_data))
+				goto free_pfvf_data;
+
+			adev->virt.fw_reserve.p_pf2vf =
+				(struct amd_sriov_msg_pf2vf_info_header *)pfvf_data;
+
+			amdgpu_virt_read_pf2vf_data(adev);
+
+free_pfvf_data:
+			kfree(pfvf_data);
+			pfvf_data = NULL;
+			adev->virt.fw_reserve.p_pf2vf = NULL;
+		} else {
+			adev->virt.fw_reserve.p_pf2vf =
+				(struct amd_sriov_msg_pf2vf_info_header *)
+				(adev->bios + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 << 10));
+
+			amdgpu_virt_read_pf2vf_data(adev);
+		}
 	}
 }
 
@@ -702,14 +727,29 @@ void amdgpu_virt_exchange_data(struct amdgpu_device *adev)
 
 	if (adev->mman.fw_vram_usage_va || adev->mman.drv_vram_usage_va) {
 		if (adev->mman.fw_vram_usage_va) {
-			adev->virt.fw_reserve.p_pf2vf =
-				(struct amd_sriov_msg_pf2vf_info_header *)
-				(adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 << 10));
-			adev->virt.fw_reserve.p_vf2pf =
-				(struct amd_sriov_msg_vf2pf_info_header *)
-				(adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB_V1 << 10));
-			adev->virt.fw_reserve.ras_telemetry =
-				(adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB_V1 << 10));
+			if (adev->virt.req_init_data_ver == GPU_CRIT_REGION_V2) {
+				adev->virt.fw_reserve.p_pf2vf =
+					(struct amd_sriov_msg_pf2vf_info_header *)
+					(adev->mman.fw_vram_usage_va +
+					adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].offset);
+				adev->virt.fw_reserve.p_vf2pf =
+					(struct amd_sriov_msg_vf2pf_info_header *)
+					(adev->mman.fw_vram_usage_va +
+					adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].offset +
+					(AMD_SRIOV_MSG_SIZE_KB_V1 << 10));
+				adev->virt.fw_reserve.ras_telemetry =
+					(adev->mman.fw_vram_usage_va +
+					adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID].offset);
+			} else {
+				adev->virt.fw_reserve.p_pf2vf =
+					(struct amd_sriov_msg_pf2vf_info_header *)
+					(adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 << 10));
+				adev->virt.fw_reserve.p_vf2pf =
+					(struct amd_sriov_msg_vf2pf_info_header *)
+					(adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB_V1 << 10));
+				adev->virt.fw_reserve.ras_telemetry =
+					(adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB_V1 << 10));
+			}
 		} else if (adev->mman.drv_vram_usage_va) {
 			adev->virt.fw_reserve.p_pf2vf =
 				(struct amd_sriov_msg_pf2vf_info_header *)
@@ -1018,6 +1058,30 @@ int amdgpu_virt_get_dynamic_data_info(struct amdgpu_device *adev,
 	return 0;
 }
 
+int amdgpu_virt_read_exchange_data_from_mem(struct amdgpu_device *adev, uint32_t *pfvf_data)
+{
+	uint32_t dataexchange_offset =
+		adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].offset;
+	uint32_t dataexchange_size =
+		adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].size_kb << 10;
+	uint64_t pos = 0;
+
+	dev_info(adev->dev,
+			"Got data exchange info from dynamic crit_region_table at offset 0x%x with size of 0x%x bytes.\n",
+			dataexchange_offset, dataexchange_size);
+
+	if (!IS_ALIGNED(dataexchange_offset, 4) || !IS_ALIGNED(dataexchange_size, 4)) {
+		DRM_ERROR("Data exchange data not aligned to 4 bytes\n");
+		return -EINVAL;
+	}
+
+	pos = (uint64_t)dataexchange_offset;
+	amdgpu_device_vram_access(adev, pos, pfvf_data,
+					dataexchange_size, false);
+
+	return 0;
+}
+
 void amdgpu_virt_init(struct amdgpu_device *adev)
 {
 	bool is_sriov = false;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index 5d8e3260f677..4e9489ff295c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -440,6 +440,7 @@ void amdgpu_virt_init(struct amdgpu_device *adev);
 int amdgpu_virt_init_critical_region(struct amdgpu_device *adev);
 int amdgpu_virt_get_dynamic_data_info(struct amdgpu_device *adev,
 	int data_id, uint8_t *binary, uint64_t *size);
+int amdgpu_virt_read_exchange_data_from_mem(struct amdgpu_device *adev, uint32_t *pfvf_data);
 
 bool amdgpu_virt_can_access_debugfs(struct amdgpu_device *adev);
 int amdgpu_virt_enable_access_debugfs(struct amdgpu_device *adev);
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* RE: [PATCH v3 2/6] drm/amdgpu: Add SRIOV crit_region_version support
  2025-10-10 18:48 ` [PATCH v3 2/6] drm/amdgpu: Add SRIOV crit_region_version support Ellen Pan
@ 2025-10-13  4:57   ` Lazar, Lijo
  0 siblings, 0 replies; 11+ messages in thread
From: Lazar, Lijo @ 2025-10-13  4:57 UTC (permalink / raw)
  To: Pan, Ellen, amd-gfx@lists.freedesktop.org
  Cc: Deucher, Alexander, Koenig, Christian, Chan, Hing Pong

[Public]

>-----Original Message-----
>From: Pan, Ellen <Yunru.Pan@amd.com>
>Sent: Saturday, October 11, 2025 12:19 AM
>To: amd-gfx@lists.freedesktop.org
>Cc: Deucher, Alexander <Alexander.Deucher@amd.com>; Koenig, Christian
><Christian.Koenig@amd.com>; Lazar, Lijo <Lijo.Lazar@amd.com>; Chan, Hing
>Pong <Jeffrey.Chan@amd.com>; Pan, Ellen <Yunru.Pan@amd.com>
>Subject: [PATCH v3 2/6] drm/amdgpu: Add SRIOV crit_region_version support
>
>1. Added enum amd_sriov_crit_region_version to support multi versions 2.
>Added logic in SRIOV mailbox to regonize crit_region version during
>   req_gpu_init_data
>
>Signed-off-by: Ellen Pan <yunru.pan@amd.com>
>---
> drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c    |  3 ++-
> drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h    |  8 +++++++
> drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h |  5 +++++
> drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c       | 23 ++++++++++++++-------
> 4 files changed, 31 insertions(+), 8 deletions(-)
>
>diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
>b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
>index e95adf0407a0..3a6b0e1084d7 100644
>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
>@@ -150,7 +150,8 @@ void amdgpu_virt_request_init_data(struct
>amdgpu_device *adev)
>               virt->ops->req_init_data(adev);
>
>       if (adev->virt.req_init_data_ver > 0)
>-              DRM_INFO("host supports REQ_INIT_DATA handshake\n");
>+              DRM_INFO("host supports REQ_INIT_DATA handshake of
>critical_region_version %d\n",
>+                               adev->virt.req_init_data_ver);
[lijo]

Please use dev_info here.

Thanks,
Lijo

>       else
>               DRM_WARN("host doesn't support REQ_INIT_DATA
>handshake\n");  } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
>b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
>index d1172c8e58c4..36247a160aa6 100644
>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
>@@ -262,6 +262,11 @@ struct amdgpu_virt_ras {
>
> DECLARE_ATTR_CAP_CLASS(amdgpu_virt, AMDGPU_VIRT_CAPS_LIST);
>
>+struct amdgpu_virt_region {
>+      uint32_t offset;
>+      uint32_t size_kb;
>+};
>+
> /* GPU virtualization */
> struct amdgpu_virt {
>       uint32_t                        caps;
>@@ -289,6 +294,9 @@ struct amdgpu_virt {
>       bool ras_init_done;
>       uint32_t reg_access;
>
>+      /* dynamic(v2) critical regions */
>+      struct amdgpu_virt_region init_data_header;
>+
>       /* vf2pf message */
>       struct delayed_work vf2pf_work;
>       uint32_t vf2pf_update_interval_ms;
>diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
>b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
>index 3b35154e2df6..b53caab5b706 100644
>--- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
>+++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
>@@ -65,6 +65,11 @@
>       (AMD_SRIOV_MSG_VBIOS_SIZE_KB_V1 +
>AMD_SRIOV_MSG_DATAEXCHANGE_SIZE_KB_V1 + \
>        AMD_SRIOV_MSG_RAS_TELEMETRY_SIZE_KB_V1)
>
>+enum amd_sriov_crit_region_version {
>+      GPU_CRIT_REGION_V1 = 1,
>+      GPU_CRIT_REGION_V2 = 2,
>+};
>+
> /*
>  * PF2VF history log:
>  * v1 defined in amdgim
>diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
>b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
>index e5282a5d05d9..cd5b2f07edb8 100644
>--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
>+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
>@@ -222,12 +222,20 @@ static int
>xgpu_nv_send_access_requests_with_param(struct amdgpu_device *adev,
>                               adev->virt.req_init_data_ver = 0;
>               } else {
>                       if (req == IDH_REQ_GPU_INIT_DATA) {
>-                              adev->virt.req_init_data_ver =
>-
>       RREG32_NO_KIQ(mmMAILBOX_MSGBUF_RCV_DW1);
>-
>-                              /* assume V1 in case host doesn't set version
>number */
>-                              if (adev->virt.req_init_data_ver < 1)
>-                                      adev->virt.req_init_data_ver = 1;
>+                              switch
>(RREG32_NO_KIQ(mmMAILBOX_MSGBUF_RCV_DW1)) {
>+                              case GPU_CRIT_REGION_V2:
>+                                      adev->virt.req_init_data_ver =
>GPU_CRIT_REGION_V2;
>+                                      adev->virt.init_data_header.offset =
>+
>       RREG32_NO_KIQ(mmMAILBOX_MSGBUF_RCV_DW2);
>+                                      adev->virt.init_data_header.size_kb =
>+
>       RREG32_NO_KIQ(mmMAILBOX_MSGBUF_RCV_DW3);
>+                                      break;
>+                              default:
>+                                      adev->virt.req_init_data_ver =
>GPU_CRIT_REGION_V1;
>+                                      adev->virt.init_data_header.offset = -1;
>+                                      adev->virt.init_data_header.size_kb =
>0;
>+                                      break;
>+                              }
>                       }
>               }
>
>@@ -285,7 +293,8 @@ static int xgpu_nv_release_full_gpu_access(struct
>amdgpu_device *adev,
>
> static int xgpu_nv_request_init_data(struct amdgpu_device *adev)  {
>-      return xgpu_nv_send_access_requests(adev,
>IDH_REQ_GPU_INIT_DATA);
>+      return xgpu_nv_send_access_requests_with_param(adev,
>IDH_REQ_GPU_INIT_DATA,
>+                      0, GPU_CRIT_REGION_V2, 0);
> }
>
> static int xgpu_nv_mailbox_ack_irq(struct amdgpu_device *adev,
>--
>2.34.1


^ permalink raw reply	[flat|nested] 11+ messages in thread

* RE: [PATCH v3 3/6] drm/amdgpu: Introduce SRIOV critical regions v2 during VF init
  2025-10-10 18:48 ` [PATCH v3 3/6] drm/amdgpu: Introduce SRIOV critical regions v2 during VF init Ellen Pan
@ 2025-10-13  5:14   ` Lazar, Lijo
  0 siblings, 0 replies; 11+ messages in thread
From: Lazar, Lijo @ 2025-10-13  5:14 UTC (permalink / raw)
  To: Pan, Ellen, amd-gfx@lists.freedesktop.org
  Cc: Deucher, Alexander, Koenig, Christian, Chan, Hing Pong

[Public]

>-----Original Message-----
>From: Pan, Ellen <Yunru.Pan@amd.com>
>Sent: Saturday, October 11, 2025 12:19 AM
>To: amd-gfx@lists.freedesktop.org
>Cc: Deucher, Alexander <Alexander.Deucher@amd.com>; Koenig, Christian
><Christian.Koenig@amd.com>; Lazar, Lijo <Lijo.Lazar@amd.com>; Chan, Hing
>Pong <Jeffrey.Chan@amd.com>; Pan, Ellen <Yunru.Pan@amd.com>
>Subject: [PATCH v3 3/6] drm/amdgpu: Introduce SRIOV critical regions v2
>during VF init
>
>    1. Introduced amdgpu_virt_init_critical_region during VF init.
>     - VFs use init_data_header_offset and init_data_header_size_kb
>            transmitted via PF2VF mailbox to fetch the offset of
>            critical regions' offsets/sizes in VRAM and save to
>            adev->virt.crit_region_offsets and adev->virt.crit_region_sizes_kb.
>
>Signed-off-by: Ellen Pan <yunru.pan@amd.com>
>---
> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c  |   4 +
> drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c    | 113
>++++++++++++++++++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h    |   7 ++
> drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h |  31 ++++++
> 4 files changed, 155 insertions(+)
>
>diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>index 929936c8d87c..351cfe03a1aa 100644
>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>@@ -2754,6 +2754,10 @@ static int amdgpu_device_ip_early_init(struct
>amdgpu_device *adev)
>               r = amdgpu_virt_request_full_gpu(adev, true);
>               if (r)
>                       return r;
>+
>+              r = amdgpu_virt_init_critical_region(adev);
>+              if (r)
>+                      return r;
>       }
>
>       switch (adev->asic_type) {
>diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
>b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
>index 3a6b0e1084d7..6eca5e8a7375 100644
>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
>@@ -843,6 +843,119 @@ static void amdgpu_virt_init_ras(struct
>amdgpu_device *adev)
>       adev->virt.ras.cper_rptr = 0;
> }
>
>+static uint8_t amdgpu_virt_crit_region_calc_checksum(uint8_t
>+*buf_start, uint8_t *buf_end) {
>+      uint32_t sum = 0;
>+
>+      if (buf_start >= buf_end)
>+              return 0;
>+
>+      for (; buf_start < buf_end; buf_start++)
>+              sum += buf_start[0];
>+
>+      return 0xffffffff - sum;
>+}
>+
>+int amdgpu_virt_init_critical_region(struct amdgpu_device *adev) {
>+      struct amd_sriov_msg_init_data_header *init_data_hdr = NULL;
>+      uint32_t init_hdr_offset = adev->virt.init_data_header.offset;
>+      uint32_t init_hdr_size = adev->virt.init_data_header.size_kb << 10;
>+      uint64_t pos = 0;
[lijo]
This variable is not required. Seems it's always reading from init_hdr_offset.

>+      uint64_t vram_size;
>+      int r = 0;
>+      uint8_t checksum = 0;
>+
>+      /* Skip below init if critical region version != v2 */
>+      if (adev->virt.req_init_data_ver != GPU_CRIT_REGION_V2)
>+              return 0;
>+
>+      if (init_hdr_offset < 0) {
>+              dev_err(adev->dev, "Invalid init header offset\n");
>+              return -EINVAL;
>+      }
>+
>+      vram_size = RREG32(mmRCC_CONFIG_MEMSIZE);
>+      if (!vram_size || vram_size == U32_MAX)
>+              return -EINVAL;
>+      vram_size <<= 20;
>+
>+      if ((init_hdr_offset + init_hdr_size) > vram_size) {
>+              dev_err(adev->dev, "init_data_header exceeds VRAM size,
>exiting\n");
>+              return -EINVAL;
>+      }
>+
>+      /* Allocate for init_data_hdr */
>+      init_data_hdr = kzalloc(sizeof(struct
>amd_sriov_msg_init_data_header), GFP_KERNEL);
>+      if (!init_data_hdr)
>+              return -ENOMEM;
>+
>+      pos = (uint64_t)init_hdr_offset;
>+      amdgpu_device_vram_access(adev, pos, (uint32_t *)init_data_hdr,
>+                                      sizeof(struct
>amd_sriov_msg_init_data_header), false);
>+
>+      switch (init_data_hdr->version) {
>+      case GPU_CRIT_REGION_V2:
[lijo]
There is already a version check at the beginning of this function.

>+              if (strncmp(init_data_hdr->signature, "INDA", 4) != 0) {
[lijo]

Suggest keeping this signature as a #define

>+                      dev_err(adev->dev, "Invalid init data signature:
>%.4s\n",
>+                                      init_data_hdr->signature);
>+                      r = -EINVAL;
>+                      goto out;
>+              }
>+
>+              checksum = amdgpu_virt_crit_region_calc_checksum(
>+                              (uint8_t *)&init_data_hdr->initdata_offset,
>+                              (uint8_t *)init_data_hdr +
>+                              sizeof(struct
>amd_sriov_msg_init_data_header));
>+              if (checksum != init_data_hdr->checksum) {
>+                      dev_err(adev->dev, "Found unmatching checksum
>from calculation 0x%x and init_data 0x%x\n",
>+                                      checksum, init_data_hdr->checksum);
>+                      r = -EINVAL;
>+                      goto out;
>+              }
>+
>+              /* Initialize critical region offsets */
>+              adev->virt.crit_regn.offset = init_data_hdr->initdata_offset;
>+              adev-
>>virt.crit_regn_tbl[AMD_SRIOV_MSG_IPD_TABLE_ID].offset =
>+                      init_data_hdr->ip_discovery_offset;
>+              adev-
>>virt.crit_regn_tbl[AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID].offset =
>+                      init_data_hdr->vbios_img_offset;
>+              adev-
>>virt.crit_regn_tbl[AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID].offset =
>+                      init_data_hdr->ras_tele_info_offset;
>+              adev-
>>virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].offset =
>+                      init_data_hdr->dataexchange_offset;
>+              adev-
>>virt.crit_regn_tbl[AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID].offset =
>+                      init_data_hdr->bad_page_info_offset;
>+
>+              /* Initialize critical region sizes */
>+              adev->virt.crit_regn.size_kb = init_data_hdr-
>>initdata_size_in_kb;
>+              adev-
>>virt.crit_regn_tbl[AMD_SRIOV_MSG_IPD_TABLE_ID].size_kb =
>+                      init_data_hdr->ip_discovery_size_in_kb;
>+              adev-
>>virt.crit_regn_tbl[AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID].size_kb =
>+                      init_data_hdr->vbios_img_size_in_kb;
>+              adev-
>>virt.crit_regn_tbl[AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID].size_kb =
>+                      init_data_hdr->ras_tele_info_size_in_kb;
>+              adev-
>>virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].size_kb =
>+                      init_data_hdr->dataexchange_size_in_kb;
>+              adev-
>>virt.crit_regn_tbl[AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID].size_kb =
>+                      init_data_hdr->bad_page_size_in_kb;
>+
>+              adev->virt.is_dynamic_crit_regn_enabled = true;
>+              break;
>+      default:
>+              dev_err(adev->dev, "Invalid init header version: %u\n",
>+                              init_data_hdr->version);
>+              r = -EINVAL;
>+              goto out;
>+      }
>+
>+out:
>+      kfree(init_data_hdr);
>+      init_data_hdr = NULL;
>+
>+      return r;
>+}
>+
> void amdgpu_virt_init(struct amdgpu_device *adev)  {
>       bool is_sriov = false;
>diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
>b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
>index 36247a160aa6..f46edc03f57f 100644
>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
>@@ -52,6 +52,8 @@
> /* tonga/fiji use this offset */
> #define mmBIF_IOV_FUNC_IDENTIFIER 0x1503
>
>+#define mmRCC_CONFIG_MEMSIZE  0xde3
[lijo]

Alex already commented about this. Keeping this here will pollute other files where amdgpu_virt.h is included and the real definition.

Thanks,
Lijo

>+
> #define AMDGPU_VF2PF_UPDATE_MAX_RETRY_LIMIT 2
>
> enum amdgpu_sriov_vf_mode {
>@@ -296,6 +298,9 @@ struct amdgpu_virt {
>
>       /* dynamic(v2) critical regions */
>       struct amdgpu_virt_region init_data_header;
>+      struct amdgpu_virt_region crit_regn;
>+      struct amdgpu_virt_region
>crit_regn_tbl[AMD_SRIOV_MSG_MAX_TABLE_ID];
>+      bool is_dynamic_crit_regn_enabled;
>
>       /* vf2pf message */
>       struct delayed_work vf2pf_work;
>@@ -432,6 +437,8 @@ void amdgpu_virt_exchange_data(struct
>amdgpu_device *adev);  void amdgpu_virt_fini_data_exchange(struct
>amdgpu_device *adev);  void amdgpu_virt_init(struct amdgpu_device *adev);
>
>+int amdgpu_virt_init_critical_region(struct amdgpu_device *adev);
>+
> bool amdgpu_virt_can_access_debugfs(struct amdgpu_device *adev);  int
>amdgpu_virt_enable_access_debugfs(struct amdgpu_device *adev);  void
>amdgpu_virt_disable_access_debugfs(struct amdgpu_device *adev); diff --git
>a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
>b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
>index b53caab5b706..d15c256f9abd 100644
>--- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
>+++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
>@@ -70,6 +70,37 @@ enum amd_sriov_crit_region_version {
>       GPU_CRIT_REGION_V2 = 2,
> };
>
>+/* v2 layout offset enum (in order of allocation) */ enum
>+amd_sriov_msg_table_id_enum {
>+      AMD_SRIOV_MSG_IPD_TABLE_ID = 0,
>+      AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID,
>+      AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID,
>+      AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID,
>+      AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID,
>+      AMD_SRIOV_MSG_INITD_H_TABLE_ID,
>+      AMD_SRIOV_MSG_MAX_TABLE_ID,
>+};
>+
>+struct amd_sriov_msg_init_data_header {
>+      char     signature[4];  /* "INDA"  */
>+      uint32_t version;
>+      uint32_t checksum;
>+      uint32_t initdata_offset; /* 0 */
>+      uint32_t initdata_size_in_kb; /* 5MB */
>+      uint32_t valid_tables;
>+      uint32_t vbios_img_offset;
>+      uint32_t vbios_img_size_in_kb;
>+      uint32_t dataexchange_offset;
>+      uint32_t dataexchange_size_in_kb;
>+      uint32_t ras_tele_info_offset;
>+      uint32_t ras_tele_info_size_in_kb;
>+      uint32_t ip_discovery_offset;
>+      uint32_t ip_discovery_size_in_kb;
>+      uint32_t bad_page_info_offset;
>+      uint32_t bad_page_size_in_kb;
>+      uint32_t reserved[8];
>+};
>+
> /*
>  * PF2VF history log:
>  * v1 defined in amdgim
>--
>2.34.1


^ permalink raw reply	[flat|nested] 11+ messages in thread

* RE: [PATCH v3 5/6] drm/amdgpu: Add logic for VF ipd and VF bios to init from dynamic crit_region offsets
  2025-10-10 18:48 ` [PATCH v3 5/6] drm/amdgpu: Add logic for VF ipd and VF bios to init from dynamic crit_region offsets Ellen Pan
@ 2025-10-13  5:35   ` Lazar, Lijo
  0 siblings, 0 replies; 11+ messages in thread
From: Lazar, Lijo @ 2025-10-13  5:35 UTC (permalink / raw)
  To: Pan, Ellen, amd-gfx@lists.freedesktop.org
  Cc: Deucher, Alexander, Koenig, Christian, Chan, Hing Pong

[AMD Official Use Only - AMD Internal Distribution Only]

>-----Original Message-----
>From: Pan, Ellen <Yunru.Pan@amd.com>
>Sent: Saturday, October 11, 2025 12:19 AM
>To: amd-gfx@lists.freedesktop.org
>Cc: Deucher, Alexander <Alexander.Deucher@amd.com>; Koenig, Christian
><Christian.Koenig@amd.com>; Lazar, Lijo <Lijo.Lazar@amd.com>; Chan, Hing
>Pong <Jeffrey.Chan@amd.com>; Pan, Ellen <Yunru.Pan@amd.com>
>Subject: [PATCH v3 5/6] drm/amdgpu: Add logic for VF ipd and VF bios to init
>from dynamic crit_region offsets
>
>1. Added VF logic in amdgpu_virt to init IP discovery using the offsets from
>dynamic(v2) critical regions; 2. Added VF logic in amdgpu_virt to init bios image
>using the offsets from dynamic(v2) critical regions;
>
>Signed-off-by: Ellen Pan <yunru.pan@amd.com>
>---
> drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c      | 17 +++++-
> drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 11 +++-
> drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c      | 53
>+++++++++++++++++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h      |  2 +
> 4 files changed, 80 insertions(+), 3 deletions(-)
>
>diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
>b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
>index 00e96419fcda..41f8fe04126f 100644
>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
>@@ -96,7 +96,8 @@ void amdgpu_bios_release(struct amdgpu_device *adev)
>  * part of the system bios.  On boot, the system bios puts a
>  * copy of the igp rom at the start of vram if a discrete card is
>  * present.
>- * For SR-IOV, the vbios image is also put in VRAM in the VF.
>+ * For SR-IOV, if dynamic critical region is not enabled,
>+ * the vbios image is also put at the start of VRAM in the VF.
>  */
> static bool amdgpu_read_bios_from_vram(struct amdgpu_device *adev)  {
>@@ -114,7 +115,19 @@ static bool amdgpu_read_bios_from_vram(struct
>amdgpu_device *adev)
>
>       adev->bios = NULL;
>       vram_base = pci_resource_start(adev->pdev, 0);
>-      bios = ioremap_wc(vram_base, size);
>+
>+      /* For SR-IOV, if dynamic critical region is enabled,
>+       * the vbios image is put at a dynamic offset of VRAM in the VF.
>+       * If dynamic critical region is disabled, exit early to proceed
>+       * the same seq as on baremetal.
>+       */
>+      if (amdgpu_sriov_vf(adev) && adev-
>>virt.is_dynamic_crit_regn_enabled) {
>+              if (amdgpu_virt_get_dynamic_data_info(adev,
>AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID,
>+                              (uint8_t *)&bios, &size))
[lijo]

This doesn't look correct. Please be consistent with the meaning of the parameter of this function. If it's just a uint8_t *, pass the buffer which is allocated outside and the function fills it. If that's the route taken, then you could allocate adev->bios with arbitrary size, pass it and do a realloc if required. The size param will then be in/out (as out param, it will return the actual size). If it's a uint8_t **, then the allocation can be done inside the function.

>+                      return false;
>+      } else
>+              bios = ioremap_wc(vram_base, size);
>+
>       if (!bios)
>               return false;
>
>diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
>b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
>index 73401f0aeb34..23aec57295c0 100644
>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
>@@ -283,7 +283,6 @@ static int
>amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev,
>                * wait for this to complete.  Once the C2PMSG is updated, we
>can
>                * continue.
>                */
>-
>               for (i = 0; i < 2000; i++) {
>                       msg = RREG32(mmMP0_SMN_C2PMSG_33);
>                       if (msg & 0x80000000)
>@@ -292,6 +291,16 @@ static int
>amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev,
>               }
>       }
>
>+      /* For SRIOV VFs, if dynamic critical region is enabled,
>+       * IPD binary is retrieved via this call.
>+       * If dynamic critical is disabled, fallthrough to normal seq below.
>+       */
>+      if (amdgpu_sriov_vf(adev) && adev-
>>virt.is_dynamic_crit_regn_enabled) {
>+              ret = amdgpu_virt_get_dynamic_data_info(adev,
>+                              AMD_SRIOV_MSG_IPD_TABLE_ID, binary,
>NULL);
>+              return ret;
>+      }
>+
>       vram_size = RREG32(mmRCC_CONFIG_MEMSIZE);
>       if (!vram_size || vram_size == U32_MAX)
>               sz_valid = false;
>diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
>b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
>index 461e83728594..4a7125122ae7 100644
>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
>@@ -965,6 +965,59 @@ int amdgpu_virt_init_critical_region(struct
>amdgpu_device *adev)
>       return r;
> }
>
>+int amdgpu_virt_get_dynamic_data_info(struct amdgpu_device *adev,
>+      int data_id, uint8_t *binary, uint64_t *size) {
>+      uint32_t data_offset = 0;
>+      uint32_t data_size = 0;
>+      enum amd_sriov_msg_table_id_enum data_table_id = data_id;
>+      char *data_name;
>+      uint8_t __iomem *buf;
>+
>+      if (data_table_id >= AMD_SRIOV_MSG_MAX_TABLE_ID)
>+              return -EINVAL;
>+
>+      data_offset = adev->virt.crit_regn_tbl[data_table_id].offset;
>+      data_size = adev->virt.crit_regn_tbl[data_table_id].size_kb << 10;
>+
>+      switch (data_id) {
>+      case AMD_SRIOV_MSG_IPD_TABLE_ID:
>+              data_name = "IPD";
>+              if (!IS_ALIGNED(data_offset, 4) || !IS_ALIGNED(data_size, 4)) {
>+                      dev_err(adev->dev, "IP discovery data not aligned to 4
>bytes\n");
>+                      return -EINVAL;
>+              }
>+
>+              if (data_size > DISCOVERY_TMR_SIZE) {
>+                      dev_err(adev->dev, "Invalid IP discovery size: 0x%x\n",
>data_size);
[lijo]

This looks like validation of table entry. I think that's better done at the place where you fill in table entries - amdgpu_virt_init_critical_region.

>+                      return -EINVAL;
>+              }
>+
>+              amdgpu_device_vram_access(adev,
>+                              (uint64_t)data_offset, (uint32_t *)binary,
>data_size, false);
[lijo]

There is no NULL check for the binary or validation of size. Better the size be taken always as in/out parameter.

>+              break;
>+
>+      case AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID:
>+              data_name = "BIOS";
>+              if (data_size > *size) {
>+                      dev_err(adev->dev, "Invalid vbios size: 0x%x\n",
>data_size);
>+                      return -EINVAL;
>+              }
>+
>+              buf = ioremap_wc(pci_resource_start(adev->pdev, 0) +
>data_offset,
>+data_size);
[lijo]

This doesn't look correct.  You just need to fill in the binary with amdgpu_device_vram_access().

Thanks,
Lijo

>+
>+              *(uint8_t __iomem **)binary = buf;
>+              *size = (uint64_t)data_size;
>+              break;
>+      }
>+
>+      dev_info(adev->dev,
>+              "Got %s info from dynamic crit_region_table at offset 0x%x
>with size of 0x%x bytes.\n",
>+              data_name, data_offset, data_size);
>+
>+      return 0;
>+}
>+
> void amdgpu_virt_init(struct amdgpu_device *adev)  {
>       bool is_sriov = false;
>diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
>b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
>index f46edc03f57f..5d8e3260f677 100644
>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
>@@ -438,6 +438,8 @@ void amdgpu_virt_fini_data_exchange(struct
>amdgpu_device *adev);  void amdgpu_virt_init(struct amdgpu_device *adev);
>
> int amdgpu_virt_init_critical_region(struct amdgpu_device *adev);
>+int amdgpu_virt_get_dynamic_data_info(struct amdgpu_device *adev,
>+      int data_id, uint8_t *binary, uint64_t *size);
>
> bool amdgpu_virt_can_access_debugfs(struct amdgpu_device *adev);  int
>amdgpu_virt_enable_access_debugfs(struct amdgpu_device *adev);
>--
>2.34.1


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v3 1/6] drm/amdgpu: Updated naming of SRIOV critical region offsets/sizes with _V1 suffix
  2025-10-10 18:48 [PATCH v3 1/6] drm/amdgpu: Updated naming of SRIOV critical region offsets/sizes with _V1 suffix Ellen Pan
                   ` (4 preceding siblings ...)
  2025-10-10 18:48 ` [PATCH v3 6/6] drm/amdgpu: Add logic for VF data exchange region " Ellen Pan
@ 2025-10-13 13:32 ` Alex Deucher
  5 siblings, 0 replies; 11+ messages in thread
From: Alex Deucher @ 2025-10-13 13:32 UTC (permalink / raw)
  To: Ellen Pan
  Cc: amd-gfx, Alexander.Deucher, Christian.Koenig, Lijo.Lazar,
	Jeffrey.Chan

On Fri, Oct 10, 2025 at 2:56 PM Ellen Pan <yunru.pan@amd.com> wrote:
>
>  - This change prepares the later patches to intro  _v2 suffix to SRIOV critical regions
>
> Signed-off-by: Ellen Pan <yunru.pan@amd.com>

Acked-by: Alex Deucher <alexander.deucher@amd.com>

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c    | 20 ++++----
>  drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h | 57 ++++++++++++++-------
>  2 files changed, 49 insertions(+), 28 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> index 3328ab63376b..e95adf0407a0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> @@ -686,7 +686,7 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev)
>                 /* got through this logic in early init stage to get necessary flags, e.g. rlcg_acc related*/
>                 adev->virt.fw_reserve.p_pf2vf =
>                         (struct amd_sriov_msg_pf2vf_info_header *)
> -                       (adev->bios + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10));
> +                       (adev->bios + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 << 10));
>
>                 amdgpu_virt_read_pf2vf_data(adev);
>         }
> @@ -703,21 +703,21 @@ void amdgpu_virt_exchange_data(struct amdgpu_device *adev)
>                 if (adev->mman.fw_vram_usage_va) {
>                         adev->virt.fw_reserve.p_pf2vf =
>                                 (struct amd_sriov_msg_pf2vf_info_header *)
> -                               (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10));
> +                               (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 << 10));
>                         adev->virt.fw_reserve.p_vf2pf =
>                                 (struct amd_sriov_msg_vf2pf_info_header *)
> -                               (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB << 10));
> +                               (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB_V1 << 10));
>                         adev->virt.fw_reserve.ras_telemetry =
> -                               (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB << 10));
> +                               (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB_V1 << 10));
>                 } else if (adev->mman.drv_vram_usage_va) {
>                         adev->virt.fw_reserve.p_pf2vf =
>                                 (struct amd_sriov_msg_pf2vf_info_header *)
> -                               (adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10));
> +                               (adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 << 10));
>                         adev->virt.fw_reserve.p_vf2pf =
>                                 (struct amd_sriov_msg_vf2pf_info_header *)
> -                               (adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB << 10));
> +                               (adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB_V1 << 10));
>                         adev->virt.fw_reserve.ras_telemetry =
> -                               (adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB << 10));
> +                               (adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB_V1 << 10));
>                 }
>
>                 amdgpu_virt_read_pf2vf_data(adev);
> @@ -1304,7 +1304,7 @@ static int amdgpu_virt_cache_host_error_counts(struct amdgpu_device *adev,
>         checksum = host_telemetry->header.checksum;
>         used_size = host_telemetry->header.used_size;
>
> -       if (used_size > (AMD_SRIOV_RAS_TELEMETRY_SIZE_KB << 10))
> +       if (used_size > (AMD_SRIOV_MSG_RAS_TELEMETRY_SIZE_KB_V1 << 10))
>                 return 0;
>
>         tmp = kmemdup(&host_telemetry->body.error_count, used_size, GFP_KERNEL);
> @@ -1383,7 +1383,7 @@ amdgpu_virt_write_cpers_to_ring(struct amdgpu_device *adev,
>         checksum = host_telemetry->header.checksum;
>         used_size = host_telemetry->header.used_size;
>
> -       if (used_size > (AMD_SRIOV_RAS_TELEMETRY_SIZE_KB << 10))
> +       if (used_size > (AMD_SRIOV_MSG_RAS_TELEMETRY_SIZE_KB_V1 << 10))
>                 return -EINVAL;
>
>         cper_dump = kmemdup(&host_telemetry->body.cper_dump, used_size, GFP_KERNEL);
> @@ -1515,7 +1515,7 @@ static int amdgpu_virt_cache_chk_criti_hit(struct amdgpu_device *adev,
>         checksum = host_telemetry->header.checksum;
>         used_size = host_telemetry->header.used_size;
>
> -       if (used_size > (AMD_SRIOV_RAS_TELEMETRY_SIZE_KB << 10))
> +       if (used_size > (AMD_SRIOV_MSG_RAS_TELEMETRY_SIZE_KB_V1 << 10))
>                 return 0;
>
>         tmp = kmemdup(&host_telemetry->body.chk_criti, used_size, GFP_KERNEL);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
> index 3a79ed7d8031..3b35154e2df6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
> @@ -23,26 +23,47 @@
>  #ifndef AMDGV_SRIOV_MSG__H_
>  #define AMDGV_SRIOV_MSG__H_
>
> -/* unit in kilobytes */
> -#define AMD_SRIOV_MSG_VBIOS_OFFSET          0
> -#define AMD_SRIOV_MSG_VBIOS_SIZE_KB         64
> -#define AMD_SRIOV_MSG_DATAEXCHANGE_OFFSET_KB AMD_SRIOV_MSG_VBIOS_SIZE_KB
> -#define AMD_SRIOV_MSG_DATAEXCHANGE_SIZE_KB   4
> -#define AMD_SRIOV_MSG_TMR_OFFSET_KB         2048
> -#define AMD_SRIOV_MSG_BAD_PAGE_SIZE_KB      2
> -#define AMD_SRIOV_RAS_TELEMETRY_SIZE_KB             64
>  /*
> - * layout
> + * layout v1
>   * 0           64KB        65KB        66KB           68KB                   132KB
>   * |   VBIOS   |   PF2VF   |   VF2PF   |   Bad Page   | RAS Telemetry Region | ...
>   * |   64KB    |   1KB     |   1KB     |   2KB        | 64KB                 | ...
>   */
>
> -#define AMD_SRIOV_MSG_SIZE_KB                   1
> -#define AMD_SRIOV_MSG_PF2VF_OFFSET_KB           AMD_SRIOV_MSG_DATAEXCHANGE_OFFSET_KB
> -#define AMD_SRIOV_MSG_VF2PF_OFFSET_KB           (AMD_SRIOV_MSG_PF2VF_OFFSET_KB + AMD_SRIOV_MSG_SIZE_KB)
> -#define AMD_SRIOV_MSG_BAD_PAGE_OFFSET_KB        (AMD_SRIOV_MSG_VF2PF_OFFSET_KB + AMD_SRIOV_MSG_SIZE_KB)
> -#define AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB   (AMD_SRIOV_MSG_BAD_PAGE_OFFSET_KB + AMD_SRIOV_MSG_BAD_PAGE_SIZE_KB)
> +/*
> + * layout v2 (offsets are dynamically allocated and the offsets below are examples)
> + * 0           1KB         64KB        65KB        66KB           68KB                   132KB
> + * |  INITD_H  |   VBIOS   |   PF2VF   |   VF2PF   |   Bad Page   | RAS Telemetry Region | ...
> + * |   1KB     |   64KB    |   1KB     |   1KB     |   2KB        | 64KB                 | ...
> + *
> + * Note: PF2VF + VF2PF + Bad Page = DataExchange region (allocated contiguously)
> + */
> +
> +/* v1 layout sizes */
> +#define AMD_SRIOV_MSG_VBIOS_SIZE_KB_V1                 64
> +#define AMD_SRIOV_MSG_PF2VF_SIZE_KB_V1                 1
> +#define AMD_SRIOV_MSG_VF2PF_SIZE_KB_V1                 1
> +#define AMD_SRIOV_MSG_BAD_PAGE_SIZE_KB_V1              2
> +#define AMD_SRIOV_MSG_RAS_TELEMETRY_SIZE_KB_V1         64
> +#define AMD_SRIOV_MSG_DATAEXCHANGE_SIZE_KB_V1          \
> +       (AMD_SRIOV_MSG_PF2VF_SIZE_KB_V1 + AMD_SRIOV_MSG_VF2PF_SIZE_KB_V1 + \
> +        AMD_SRIOV_MSG_BAD_PAGE_SIZE_KB_V1)
> +
> +/* v1 offsets */
> +#define AMD_SRIOV_MSG_VBIOS_OFFSET_V1                  0
> +#define AMD_SRIOV_MSG_DATAEXCHANGE_OFFSET_KB_V1                AMD_SRIOV_MSG_VBIOS_SIZE_KB_V1
> +#define AMD_SRIOV_MSG_TMR_OFFSET_KB                    2048
> +#define AMD_SRIOV_MSG_SIZE_KB_V1                       1
> +#define AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1               AMD_SRIOV_MSG_DATAEXCHANGE_OFFSET_KB_V1
> +#define AMD_SRIOV_MSG_VF2PF_OFFSET_KB_V1               \
> +       (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 + AMD_SRIOV_MSG_SIZE_KB_V1)
> +#define AMD_SRIOV_MSG_BAD_PAGE_OFFSET_KB_V1            \
> +       (AMD_SRIOV_MSG_VF2PF_OFFSET_KB_V1 + AMD_SRIOV_MSG_SIZE_KB_V1)
> +#define AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB_V1       \
> +       (AMD_SRIOV_MSG_BAD_PAGE_OFFSET_KB_V1 + AMD_SRIOV_MSG_BAD_PAGE_SIZE_KB_V1)
> +#define AMD_SRIOV_MSG_INIT_DATA_TOT_SIZE_KB_V1         \
> +       (AMD_SRIOV_MSG_VBIOS_SIZE_KB_V1 + AMD_SRIOV_MSG_DATAEXCHANGE_SIZE_KB_V1 + \
> +        AMD_SRIOV_MSG_RAS_TELEMETRY_SIZE_KB_V1)
>
>  /*
>   * PF2VF history log:
> @@ -436,12 +457,12 @@ unsigned int amd_sriov_msg_checksum(void *obj, unsigned long obj_size, unsigned
>  #define _stringification(s) #s
>
>  _Static_assert(
> -       sizeof(struct amd_sriov_msg_vf2pf_info) == AMD_SRIOV_MSG_SIZE_KB << 10,
> -       "amd_sriov_msg_vf2pf_info must be " stringification(AMD_SRIOV_MSG_SIZE_KB) " KB");
> +       sizeof(struct amd_sriov_msg_vf2pf_info) == AMD_SRIOV_MSG_SIZE_KB_V1 << 10,
> +       "amd_sriov_msg_vf2pf_info must be " stringification(AMD_SRIOV_MSG_SIZE_KB_V1) " KB");
>
>  _Static_assert(
> -       sizeof(struct amd_sriov_msg_pf2vf_info) == AMD_SRIOV_MSG_SIZE_KB << 10,
> -       "amd_sriov_msg_pf2vf_info must be " stringification(AMD_SRIOV_MSG_SIZE_KB) " KB");
> +       sizeof(struct amd_sriov_msg_pf2vf_info) == AMD_SRIOV_MSG_SIZE_KB_V1 << 10,
> +       "amd_sriov_msg_pf2vf_info must be " stringification(AMD_SRIOV_MSG_SIZE_KB_V1) " KB");
>
>  _Static_assert(AMD_SRIOV_MSG_RESERVE_UCODE % 4 == 0,
>                "AMD_SRIOV_MSG_RESERVE_UCODE must be multiple of 4");
> --
> 2.34.1
>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v3 6/6] drm/amdgpu: Add logic for VF data exchange region to init from dynamic crit_region offsets
  2025-10-10 18:48 ` [PATCH v3 6/6] drm/amdgpu: Add logic for VF data exchange region " Ellen Pan
@ 2025-10-13 13:34   ` Alex Deucher
  0 siblings, 0 replies; 11+ messages in thread
From: Alex Deucher @ 2025-10-13 13:34 UTC (permalink / raw)
  To: Ellen Pan
  Cc: amd-gfx, Alexander.Deucher, Christian.Koenig, Lijo.Lazar,
	Jeffrey.Chan

On Fri, Oct 10, 2025 at 2:49 PM Ellen Pan <yunru.pan@amd.com> wrote:
>
> 1. Added VF logic to init data exchange region using the offsets from dynamic(v2) critical regions;
>
> Signed-off-by: Ellen Pan <yunru.pan@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 88 ++++++++++++++++++++----
>  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h |  1 +
>  2 files changed, 77 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> index 4a7125122ae7..d99120b98188 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> @@ -670,6 +670,8 @@ void amdgpu_virt_fini_data_exchange(struct amdgpu_device *adev)
>
>  void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev)
>  {
> +       uint32_t *pfvf_data = NULL;
> +
>         adev->virt.fw_reserve.p_pf2vf = NULL;
>         adev->virt.fw_reserve.p_vf2pf = NULL;
>         adev->virt.vf2pf_update_interval_ms = 0;
> @@ -685,11 +687,34 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev)
>                 schedule_delayed_work(&(adev->virt.vf2pf_work), msecs_to_jiffies(adev->virt.vf2pf_update_interval_ms));
>         } else if (adev->bios != NULL) {
>                 /* got through this logic in early init stage to get necessary flags, e.g. rlcg_acc related*/
> -               adev->virt.fw_reserve.p_pf2vf =
> -                       (struct amd_sriov_msg_pf2vf_info_header *)
> -                       (adev->bios + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 << 10));
> +               if (adev->virt.req_init_data_ver == GPU_CRIT_REGION_V2) {
> +                       pfvf_data =
> +                               kzalloc(adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].size_kb << 10,
> +                                       GFP_KERNEL);
> +                       if (!pfvf_data) {
> +                               DRM_ERROR("Failed to allocate memory for pfvf_data\n");
> +                               return;
> +                       }
>
> -               amdgpu_virt_read_pf2vf_data(adev);
> +                       if (amdgpu_virt_read_exchange_data_from_mem(adev, pfvf_data))
> +                               goto free_pfvf_data;
> +
> +                       adev->virt.fw_reserve.p_pf2vf =
> +                               (struct amd_sriov_msg_pf2vf_info_header *)pfvf_data;
> +
> +                       amdgpu_virt_read_pf2vf_data(adev);
> +
> +free_pfvf_data:
> +                       kfree(pfvf_data);
> +                       pfvf_data = NULL;
> +                       adev->virt.fw_reserve.p_pf2vf = NULL;
> +               } else {
> +                       adev->virt.fw_reserve.p_pf2vf =
> +                               (struct amd_sriov_msg_pf2vf_info_header *)
> +                               (adev->bios + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 << 10));
> +
> +                       amdgpu_virt_read_pf2vf_data(adev);
> +               }
>         }
>  }
>
> @@ -702,14 +727,29 @@ void amdgpu_virt_exchange_data(struct amdgpu_device *adev)
>
>         if (adev->mman.fw_vram_usage_va || adev->mman.drv_vram_usage_va) {
>                 if (adev->mman.fw_vram_usage_va) {
> -                       adev->virt.fw_reserve.p_pf2vf =
> -                               (struct amd_sriov_msg_pf2vf_info_header *)
> -                               (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 << 10));
> -                       adev->virt.fw_reserve.p_vf2pf =
> -                               (struct amd_sriov_msg_vf2pf_info_header *)
> -                               (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB_V1 << 10));
> -                       adev->virt.fw_reserve.ras_telemetry =
> -                               (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB_V1 << 10));
> +                       if (adev->virt.req_init_data_ver == GPU_CRIT_REGION_V2) {
> +                               adev->virt.fw_reserve.p_pf2vf =
> +                                       (struct amd_sriov_msg_pf2vf_info_header *)
> +                                       (adev->mman.fw_vram_usage_va +
> +                                       adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].offset);
> +                               adev->virt.fw_reserve.p_vf2pf =
> +                                       (struct amd_sriov_msg_vf2pf_info_header *)
> +                                       (adev->mman.fw_vram_usage_va +
> +                                       adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].offset +
> +                                       (AMD_SRIOV_MSG_SIZE_KB_V1 << 10));
> +                               adev->virt.fw_reserve.ras_telemetry =
> +                                       (adev->mman.fw_vram_usage_va +
> +                                       adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID].offset);
> +                       } else {
> +                               adev->virt.fw_reserve.p_pf2vf =
> +                                       (struct amd_sriov_msg_pf2vf_info_header *)
> +                                       (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 << 10));
> +                               adev->virt.fw_reserve.p_vf2pf =
> +                                       (struct amd_sriov_msg_vf2pf_info_header *)
> +                                       (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB_V1 << 10));
> +                               adev->virt.fw_reserve.ras_telemetry =
> +                                       (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB_V1 << 10));
> +                       }
>                 } else if (adev->mman.drv_vram_usage_va) {
>                         adev->virt.fw_reserve.p_pf2vf =
>                                 (struct amd_sriov_msg_pf2vf_info_header *)
> @@ -1018,6 +1058,30 @@ int amdgpu_virt_get_dynamic_data_info(struct amdgpu_device *adev,
>         return 0;
>  }
>
> +int amdgpu_virt_read_exchange_data_from_mem(struct amdgpu_device *adev, uint32_t *pfvf_data)

This function can be static as it's only used in this file.

Alex

> +{
> +       uint32_t dataexchange_offset =
> +               adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].offset;
> +       uint32_t dataexchange_size =
> +               adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].size_kb << 10;
> +       uint64_t pos = 0;
> +
> +       dev_info(adev->dev,
> +                       "Got data exchange info from dynamic crit_region_table at offset 0x%x with size of 0x%x bytes.\n",
> +                       dataexchange_offset, dataexchange_size);
> +
> +       if (!IS_ALIGNED(dataexchange_offset, 4) || !IS_ALIGNED(dataexchange_size, 4)) {
> +               DRM_ERROR("Data exchange data not aligned to 4 bytes\n");
> +               return -EINVAL;
> +       }
> +
> +       pos = (uint64_t)dataexchange_offset;
> +       amdgpu_device_vram_access(adev, pos, pfvf_data,
> +                                       dataexchange_size, false);
> +
> +       return 0;
> +}
> +
>  void amdgpu_virt_init(struct amdgpu_device *adev)
>  {
>         bool is_sriov = false;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> index 5d8e3260f677..4e9489ff295c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> @@ -440,6 +440,7 @@ void amdgpu_virt_init(struct amdgpu_device *adev);
>  int amdgpu_virt_init_critical_region(struct amdgpu_device *adev);
>  int amdgpu_virt_get_dynamic_data_info(struct amdgpu_device *adev,
>         int data_id, uint8_t *binary, uint64_t *size);
> +int amdgpu_virt_read_exchange_data_from_mem(struct amdgpu_device *adev, uint32_t *pfvf_data);
>
>  bool amdgpu_virt_can_access_debugfs(struct amdgpu_device *adev);
>  int amdgpu_virt_enable_access_debugfs(struct amdgpu_device *adev);
> --
> 2.34.1
>

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2025-10-13 13:34 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-10-10 18:48 [PATCH v3 1/6] drm/amdgpu: Updated naming of SRIOV critical region offsets/sizes with _V1 suffix Ellen Pan
2025-10-10 18:48 ` [PATCH v3 2/6] drm/amdgpu: Add SRIOV crit_region_version support Ellen Pan
2025-10-13  4:57   ` Lazar, Lijo
2025-10-10 18:48 ` [PATCH v3 3/6] drm/amdgpu: Introduce SRIOV critical regions v2 during VF init Ellen Pan
2025-10-13  5:14   ` Lazar, Lijo
2025-10-10 18:48 ` [PATCH v3 4/6] drm/amdgpu: Reuse fw_vram_usage_* for dynamic critical region in SRIOV Ellen Pan
2025-10-10 18:48 ` [PATCH v3 5/6] drm/amdgpu: Add logic for VF ipd and VF bios to init from dynamic crit_region offsets Ellen Pan
2025-10-13  5:35   ` Lazar, Lijo
2025-10-10 18:48 ` [PATCH v3 6/6] drm/amdgpu: Add logic for VF data exchange region " Ellen Pan
2025-10-13 13:34   ` Alex Deucher
2025-10-13 13:32 ` [PATCH v3 1/6] drm/amdgpu: Updated naming of SRIOV critical region offsets/sizes with _V1 suffix Alex Deucher

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox