AMD-GFX Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3 1/6] drm/amdgpu: Updated naming of SRIOV critical region offsets/sizes with _V1 suffix
@ 2025-10-14 20:13 Ellen Pan
  2025-10-14 20:13 ` [PATCH v4 2/6] drm/amdgpu: Add SRIOV crit_region_version support Ellen Pan
                   ` (5 more replies)
  0 siblings, 6 replies; 14+ messages in thread
From: Ellen Pan @ 2025-10-14 20:13 UTC (permalink / raw)
  To: amd-gfx
  Cc: Alexander.Deucher, Christian.Koenig, Lijo.Lazar, Jeffrey.Chan,
	Ellen Pan

 - This change prepares the later patches to intro  _v2 suffix to SRIOV critical regions

Signed-off-by: Ellen Pan <yunru.pan@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c    | 20 ++++----
 drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h | 57 ++++++++++++++-------
 2 files changed, 49 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 3328ab63376b..e95adf0407a0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -686,7 +686,7 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev)
 		/* got through this logic in early init stage to get necessary flags, e.g. rlcg_acc related*/
 		adev->virt.fw_reserve.p_pf2vf =
 			(struct amd_sriov_msg_pf2vf_info_header *)
-			(adev->bios + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10));
+			(adev->bios + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 << 10));
 
 		amdgpu_virt_read_pf2vf_data(adev);
 	}
@@ -703,21 +703,21 @@ void amdgpu_virt_exchange_data(struct amdgpu_device *adev)
 		if (adev->mman.fw_vram_usage_va) {
 			adev->virt.fw_reserve.p_pf2vf =
 				(struct amd_sriov_msg_pf2vf_info_header *)
-				(adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10));
+				(adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 << 10));
 			adev->virt.fw_reserve.p_vf2pf =
 				(struct amd_sriov_msg_vf2pf_info_header *)
-				(adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB << 10));
+				(adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB_V1 << 10));
 			adev->virt.fw_reserve.ras_telemetry =
-				(adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB << 10));
+				(adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB_V1 << 10));
 		} else if (adev->mman.drv_vram_usage_va) {
 			adev->virt.fw_reserve.p_pf2vf =
 				(struct amd_sriov_msg_pf2vf_info_header *)
-				(adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10));
+				(adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 << 10));
 			adev->virt.fw_reserve.p_vf2pf =
 				(struct amd_sriov_msg_vf2pf_info_header *)
-				(adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB << 10));
+				(adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB_V1 << 10));
 			adev->virt.fw_reserve.ras_telemetry =
-				(adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB << 10));
+				(adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB_V1 << 10));
 		}
 
 		amdgpu_virt_read_pf2vf_data(adev);
@@ -1304,7 +1304,7 @@ static int amdgpu_virt_cache_host_error_counts(struct amdgpu_device *adev,
 	checksum = host_telemetry->header.checksum;
 	used_size = host_telemetry->header.used_size;
 
-	if (used_size > (AMD_SRIOV_RAS_TELEMETRY_SIZE_KB << 10))
+	if (used_size > (AMD_SRIOV_MSG_RAS_TELEMETRY_SIZE_KB_V1 << 10))
 		return 0;
 
 	tmp = kmemdup(&host_telemetry->body.error_count, used_size, GFP_KERNEL);
@@ -1383,7 +1383,7 @@ amdgpu_virt_write_cpers_to_ring(struct amdgpu_device *adev,
 	checksum = host_telemetry->header.checksum;
 	used_size = host_telemetry->header.used_size;
 
-	if (used_size > (AMD_SRIOV_RAS_TELEMETRY_SIZE_KB << 10))
+	if (used_size > (AMD_SRIOV_MSG_RAS_TELEMETRY_SIZE_KB_V1 << 10))
 		return -EINVAL;
 
 	cper_dump = kmemdup(&host_telemetry->body.cper_dump, used_size, GFP_KERNEL);
@@ -1515,7 +1515,7 @@ static int amdgpu_virt_cache_chk_criti_hit(struct amdgpu_device *adev,
 	checksum = host_telemetry->header.checksum;
 	used_size = host_telemetry->header.used_size;
 
-	if (used_size > (AMD_SRIOV_RAS_TELEMETRY_SIZE_KB << 10))
+	if (used_size > (AMD_SRIOV_MSG_RAS_TELEMETRY_SIZE_KB_V1 << 10))
 		return 0;
 
 	tmp = kmemdup(&host_telemetry->body.chk_criti, used_size, GFP_KERNEL);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
index 3a79ed7d8031..3b35154e2df6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
@@ -23,26 +23,47 @@
 #ifndef AMDGV_SRIOV_MSG__H_
 #define AMDGV_SRIOV_MSG__H_
 
-/* unit in kilobytes */
-#define AMD_SRIOV_MSG_VBIOS_OFFSET	     0
-#define AMD_SRIOV_MSG_VBIOS_SIZE_KB	     64
-#define AMD_SRIOV_MSG_DATAEXCHANGE_OFFSET_KB AMD_SRIOV_MSG_VBIOS_SIZE_KB
-#define AMD_SRIOV_MSG_DATAEXCHANGE_SIZE_KB   4
-#define AMD_SRIOV_MSG_TMR_OFFSET_KB	     2048
-#define AMD_SRIOV_MSG_BAD_PAGE_SIZE_KB	     2
-#define AMD_SRIOV_RAS_TELEMETRY_SIZE_KB	     64
 /*
- * layout
+ * layout v1
  * 0           64KB        65KB        66KB           68KB                   132KB
  * |   VBIOS   |   PF2VF   |   VF2PF   |   Bad Page   | RAS Telemetry Region | ...
  * |   64KB    |   1KB     |   1KB     |   2KB        | 64KB                 | ...
  */
 
-#define AMD_SRIOV_MSG_SIZE_KB                   1
-#define AMD_SRIOV_MSG_PF2VF_OFFSET_KB           AMD_SRIOV_MSG_DATAEXCHANGE_OFFSET_KB
-#define AMD_SRIOV_MSG_VF2PF_OFFSET_KB           (AMD_SRIOV_MSG_PF2VF_OFFSET_KB + AMD_SRIOV_MSG_SIZE_KB)
-#define AMD_SRIOV_MSG_BAD_PAGE_OFFSET_KB        (AMD_SRIOV_MSG_VF2PF_OFFSET_KB + AMD_SRIOV_MSG_SIZE_KB)
-#define AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB   (AMD_SRIOV_MSG_BAD_PAGE_OFFSET_KB + AMD_SRIOV_MSG_BAD_PAGE_SIZE_KB)
+/*
+ * layout v2 (offsets are dynamically allocated and the offsets below are examples)
+ * 0           1KB         64KB        65KB        66KB           68KB                   132KB
+ * |  INITD_H  |   VBIOS   |   PF2VF   |   VF2PF   |   Bad Page   | RAS Telemetry Region | ...
+ * |   1KB     |   64KB    |   1KB     |   1KB     |   2KB        | 64KB                 | ...
+ *
+ * Note: PF2VF + VF2PF + Bad Page = DataExchange region (allocated contiguously)
+ */
+
+/* v1 layout sizes */
+#define AMD_SRIOV_MSG_VBIOS_SIZE_KB_V1			64
+#define AMD_SRIOV_MSG_PF2VF_SIZE_KB_V1			1
+#define AMD_SRIOV_MSG_VF2PF_SIZE_KB_V1			1
+#define AMD_SRIOV_MSG_BAD_PAGE_SIZE_KB_V1		2
+#define AMD_SRIOV_MSG_RAS_TELEMETRY_SIZE_KB_V1		64
+#define AMD_SRIOV_MSG_DATAEXCHANGE_SIZE_KB_V1		\
+	(AMD_SRIOV_MSG_PF2VF_SIZE_KB_V1 + AMD_SRIOV_MSG_VF2PF_SIZE_KB_V1 + \
+	 AMD_SRIOV_MSG_BAD_PAGE_SIZE_KB_V1)
+
+/* v1 offsets */
+#define AMD_SRIOV_MSG_VBIOS_OFFSET_V1			0
+#define AMD_SRIOV_MSG_DATAEXCHANGE_OFFSET_KB_V1		AMD_SRIOV_MSG_VBIOS_SIZE_KB_V1
+#define AMD_SRIOV_MSG_TMR_OFFSET_KB			2048
+#define AMD_SRIOV_MSG_SIZE_KB_V1			1
+#define AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1		AMD_SRIOV_MSG_DATAEXCHANGE_OFFSET_KB_V1
+#define AMD_SRIOV_MSG_VF2PF_OFFSET_KB_V1		\
+	(AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 + AMD_SRIOV_MSG_SIZE_KB_V1)
+#define AMD_SRIOV_MSG_BAD_PAGE_OFFSET_KB_V1		\
+	(AMD_SRIOV_MSG_VF2PF_OFFSET_KB_V1 + AMD_SRIOV_MSG_SIZE_KB_V1)
+#define AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB_V1	\
+	(AMD_SRIOV_MSG_BAD_PAGE_OFFSET_KB_V1 + AMD_SRIOV_MSG_BAD_PAGE_SIZE_KB_V1)
+#define AMD_SRIOV_MSG_INIT_DATA_TOT_SIZE_KB_V1		\
+	(AMD_SRIOV_MSG_VBIOS_SIZE_KB_V1 + AMD_SRIOV_MSG_DATAEXCHANGE_SIZE_KB_V1 + \
+	 AMD_SRIOV_MSG_RAS_TELEMETRY_SIZE_KB_V1)
 
 /*
  * PF2VF history log:
@@ -436,12 +457,12 @@ unsigned int amd_sriov_msg_checksum(void *obj, unsigned long obj_size, unsigned
 #define _stringification(s) #s
 
 _Static_assert(
-	sizeof(struct amd_sriov_msg_vf2pf_info) == AMD_SRIOV_MSG_SIZE_KB << 10,
-	"amd_sriov_msg_vf2pf_info must be " stringification(AMD_SRIOV_MSG_SIZE_KB) " KB");
+	sizeof(struct amd_sriov_msg_vf2pf_info) == AMD_SRIOV_MSG_SIZE_KB_V1 << 10,
+	"amd_sriov_msg_vf2pf_info must be " stringification(AMD_SRIOV_MSG_SIZE_KB_V1) " KB");
 
 _Static_assert(
-	sizeof(struct amd_sriov_msg_pf2vf_info) == AMD_SRIOV_MSG_SIZE_KB << 10,
-	"amd_sriov_msg_pf2vf_info must be " stringification(AMD_SRIOV_MSG_SIZE_KB) " KB");
+	sizeof(struct amd_sriov_msg_pf2vf_info) == AMD_SRIOV_MSG_SIZE_KB_V1 << 10,
+	"amd_sriov_msg_pf2vf_info must be " stringification(AMD_SRIOV_MSG_SIZE_KB_V1) " KB");
 
 _Static_assert(AMD_SRIOV_MSG_RESERVE_UCODE % 4 == 0,
 	       "AMD_SRIOV_MSG_RESERVE_UCODE must be multiple of 4");
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH v4 2/6] drm/amdgpu: Add SRIOV crit_region_version support
  2025-10-14 20:13 [PATCH v3 1/6] drm/amdgpu: Updated naming of SRIOV critical region offsets/sizes with _V1 suffix Ellen Pan
@ 2025-10-14 20:13 ` Ellen Pan
  2025-10-15 13:02   ` Alex Deucher
  2025-10-14 20:13 ` [PATCH v4 3/6] drm/amdgpu: Introduce SRIOV critical regions v2 during VF init Ellen Pan
                   ` (4 subsequent siblings)
  5 siblings, 1 reply; 14+ messages in thread
From: Ellen Pan @ 2025-10-14 20:13 UTC (permalink / raw)
  To: amd-gfx
  Cc: Alexander.Deucher, Christian.Koenig, Lijo.Lazar, Jeffrey.Chan,
	Ellen Pan

1. Added enum amd_sriov_crit_region_version to support multi versions
2. Added logic in SRIOV mailbox to regonize crit_region version during
   req_gpu_init_data

Signed-off-by: Ellen Pan <yunru.pan@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c    |  5 +++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h    |  8 +++++++
 drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h |  5 +++++
 drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c       | 23 ++++++++++++++-------
 4 files changed, 32 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index e95adf0407a0..39ab7d00379b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -150,9 +150,10 @@ void amdgpu_virt_request_init_data(struct amdgpu_device *adev)
 		virt->ops->req_init_data(adev);
 
 	if (adev->virt.req_init_data_ver > 0)
-		DRM_INFO("host supports REQ_INIT_DATA handshake\n");
+		dev_info(adev->dev, "host supports REQ_INIT_DATA handshake of critical_region_version %d\n",
+				 adev->virt.req_init_data_ver);
 	else
-		DRM_WARN("host doesn't support REQ_INIT_DATA handshake\n");
+		dev_warn(adev->dev, "host doesn't support REQ_INIT_DATA handshake\n");
 }
 
 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index d1172c8e58c4..36247a160aa6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -262,6 +262,11 @@ struct amdgpu_virt_ras {
 
 DECLARE_ATTR_CAP_CLASS(amdgpu_virt, AMDGPU_VIRT_CAPS_LIST);
 
+struct amdgpu_virt_region {
+	uint32_t offset;
+	uint32_t size_kb;
+};
+
 /* GPU virtualization */
 struct amdgpu_virt {
 	uint32_t			caps;
@@ -289,6 +294,9 @@ struct amdgpu_virt {
 	bool ras_init_done;
 	uint32_t reg_access;
 
+	/* dynamic(v2) critical regions */
+	struct amdgpu_virt_region init_data_header;
+
 	/* vf2pf message */
 	struct delayed_work vf2pf_work;
 	uint32_t vf2pf_update_interval_ms;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
index 3b35154e2df6..b53caab5b706 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
@@ -65,6 +65,11 @@
 	(AMD_SRIOV_MSG_VBIOS_SIZE_KB_V1 + AMD_SRIOV_MSG_DATAEXCHANGE_SIZE_KB_V1 + \
 	 AMD_SRIOV_MSG_RAS_TELEMETRY_SIZE_KB_V1)
 
+enum amd_sriov_crit_region_version {
+	GPU_CRIT_REGION_V1 = 1,
+	GPU_CRIT_REGION_V2 = 2,
+};
+
 /*
  * PF2VF history log:
  * v1 defined in amdgim
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
index e5282a5d05d9..cd5b2f07edb8 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
@@ -222,12 +222,20 @@ static int xgpu_nv_send_access_requests_with_param(struct amdgpu_device *adev,
 				adev->virt.req_init_data_ver = 0;
 		} else {
 			if (req == IDH_REQ_GPU_INIT_DATA) {
-				adev->virt.req_init_data_ver =
-					RREG32_NO_KIQ(mmMAILBOX_MSGBUF_RCV_DW1);
-
-				/* assume V1 in case host doesn't set version number */
-				if (adev->virt.req_init_data_ver < 1)
-					adev->virt.req_init_data_ver = 1;
+				switch (RREG32_NO_KIQ(mmMAILBOX_MSGBUF_RCV_DW1)) {
+				case GPU_CRIT_REGION_V2:
+					adev->virt.req_init_data_ver = GPU_CRIT_REGION_V2;
+					adev->virt.init_data_header.offset =
+						RREG32_NO_KIQ(mmMAILBOX_MSGBUF_RCV_DW2);
+					adev->virt.init_data_header.size_kb =
+						RREG32_NO_KIQ(mmMAILBOX_MSGBUF_RCV_DW3);
+					break;
+				default:
+					adev->virt.req_init_data_ver = GPU_CRIT_REGION_V1;
+					adev->virt.init_data_header.offset = -1;
+					adev->virt.init_data_header.size_kb = 0;
+					break;
+				}
 			}
 		}
 
@@ -285,7 +293,8 @@ static int xgpu_nv_release_full_gpu_access(struct amdgpu_device *adev,
 
 static int xgpu_nv_request_init_data(struct amdgpu_device *adev)
 {
-	return xgpu_nv_send_access_requests(adev, IDH_REQ_GPU_INIT_DATA);
+	return xgpu_nv_send_access_requests_with_param(adev, IDH_REQ_GPU_INIT_DATA,
+			0, GPU_CRIT_REGION_V2, 0);
 }
 
 static int xgpu_nv_mailbox_ack_irq(struct amdgpu_device *adev,
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH v4 3/6] drm/amdgpu: Introduce SRIOV critical regions v2 during VF init
  2025-10-14 20:13 [PATCH v3 1/6] drm/amdgpu: Updated naming of SRIOV critical region offsets/sizes with _V1 suffix Ellen Pan
  2025-10-14 20:13 ` [PATCH v4 2/6] drm/amdgpu: Add SRIOV crit_region_version support Ellen Pan
@ 2025-10-14 20:13 ` Ellen Pan
  2025-10-15  9:09   ` Lazar, Lijo
  2025-10-14 20:13 ` [PATCH v4 4/6] drm/amdgpu: Reuse fw_vram_usage_* for dynamic critical region in SRIOV Ellen Pan
                   ` (3 subsequent siblings)
  5 siblings, 1 reply; 14+ messages in thread
From: Ellen Pan @ 2025-10-14 20:13 UTC (permalink / raw)
  To: amd-gfx
  Cc: Alexander.Deucher, Christian.Koenig, Lijo.Lazar, Jeffrey.Chan,
	Ellen Pan

    1. Introduced amdgpu_virt_init_critical_region during VF init.
     - VFs use init_data_header_offset and init_data_header_size_kb
            transmitted via PF2VF mailbox to fetch the offset of
            critical regions' offsets/sizes in VRAM and save to
            adev->virt.crit_region_offsets and adev->virt.crit_region_sizes_kb.

Signed-off-by: Ellen Pan <yunru.pan@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c  |   4 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c    | 106 ++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h    |   9 ++
 drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h |  31 ++++++
 4 files changed, 150 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 929936c8d87c..351cfe03a1aa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2754,6 +2754,10 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
 		r = amdgpu_virt_request_full_gpu(adev, true);
 		if (r)
 			return r;
+
+		r = amdgpu_virt_init_critical_region(adev);
+		if (r)
+			return r;
 	}
 
 	switch (adev->asic_type) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 39ab7d00379b..27235f3f3b81 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -44,6 +44,8 @@
 		vf2pf_info->ucode_info[ucode].version = ver; \
 	} while (0)
 
+#define mmRCC_CONFIG_MEMSIZE    0xde3
+
 bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev)
 {
 	/* By now all MMIO pages except mailbox are blocked */
@@ -843,6 +845,110 @@ static void amdgpu_virt_init_ras(struct amdgpu_device *adev)
 	adev->virt.ras.cper_rptr = 0;
 }
 
+static uint8_t amdgpu_virt_crit_region_calc_checksum(uint8_t *buf_start, uint8_t *buf_end)
+{
+	uint32_t sum = 0;
+
+	if (buf_start >= buf_end)
+		return 0;
+
+	for (; buf_start < buf_end; buf_start++)
+		sum += buf_start[0];
+
+	return 0xffffffff - sum;
+}
+
+int amdgpu_virt_init_critical_region(struct amdgpu_device *adev)
+{
+	struct amd_sriov_msg_init_data_header *init_data_hdr = NULL;
+	uint32_t init_hdr_offset = adev->virt.init_data_header.offset;
+	uint32_t init_hdr_size = adev->virt.init_data_header.size_kb << 10;
+	uint64_t vram_size;
+	int r = 0;
+	uint8_t checksum = 0;
+
+	/* Skip below init if critical region version != v2 */
+	if (adev->virt.req_init_data_ver != GPU_CRIT_REGION_V2)
+		return 0;
+
+	if (init_hdr_offset < 0) {
+		dev_err(adev->dev, "Invalid init header offset\n");
+		return -EINVAL;
+	}
+
+	vram_size = RREG32(mmRCC_CONFIG_MEMSIZE);
+	if (!vram_size || vram_size == U32_MAX)
+		return -EINVAL;
+	vram_size <<= 20;
+
+	if ((init_hdr_offset + init_hdr_size) > vram_size) {
+		dev_err(adev->dev, "init_data_header exceeds VRAM size, exiting\n");
+		return -EINVAL;
+	}
+
+	/* Allocate for init_data_hdr */
+	init_data_hdr = kzalloc(sizeof(struct amd_sriov_msg_init_data_header), GFP_KERNEL);
+	if (!init_data_hdr)
+		return -ENOMEM;
+
+	amdgpu_device_vram_access(adev, (uint64_t)init_hdr_offset, (uint32_t *)init_data_hdr,
+					sizeof(struct amd_sriov_msg_init_data_header), false);
+
+	if (strncmp(init_data_hdr->signature,
+				AMDGPU_SRIOV_CRIT_DATA_SIGNATURE,
+				AMDGPU_SRIOV_CRIT_DATA_SIG_LEN) != 0) {
+		dev_err(adev->dev, "Invalid init data signature: %.4s\n",
+			init_data_hdr->signature);
+		r = -EINVAL;
+		goto out;
+	}
+
+	checksum = amdgpu_virt_crit_region_calc_checksum(
+			(uint8_t *)&init_data_hdr->initdata_offset,
+			(uint8_t *)init_data_hdr +
+			sizeof(struct amd_sriov_msg_init_data_header));
+	if (checksum != init_data_hdr->checksum) {
+		dev_err(adev->dev, "Found unmatching checksum from calculation 0x%x and init_data 0x%x\n",
+				checksum, init_data_hdr->checksum);
+		r = -EINVAL;
+		goto out;
+	}
+
+	/* Initialize critical region offsets */
+	adev->virt.crit_regn.offset = init_data_hdr->initdata_offset;
+	adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_IPD_TABLE_ID].offset =
+		init_data_hdr->ip_discovery_offset;
+	adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID].offset =
+		init_data_hdr->vbios_img_offset;
+	adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID].offset =
+		init_data_hdr->ras_tele_info_offset;
+	adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].offset =
+		init_data_hdr->dataexchange_offset;
+	adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID].offset =
+		init_data_hdr->bad_page_info_offset;
+
+	/* Initialize critical region sizes */
+	adev->virt.crit_regn.size_kb = init_data_hdr->initdata_size_in_kb;
+	adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_IPD_TABLE_ID].size_kb =
+		init_data_hdr->ip_discovery_size_in_kb;
+	adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID].size_kb =
+		init_data_hdr->vbios_img_size_in_kb;
+	adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID].size_kb =
+		init_data_hdr->ras_tele_info_size_in_kb;
+	adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].size_kb =
+		init_data_hdr->dataexchange_size_in_kb;
+	adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID].size_kb =
+		init_data_hdr->bad_page_size_in_kb;
+
+	adev->virt.is_dynamic_crit_regn_enabled = true;
+
+out:
+	kfree(init_data_hdr);
+	init_data_hdr = NULL;
+
+	return r;
+}
+
 void amdgpu_virt_init(struct amdgpu_device *adev)
 {
 	bool is_sriov = false;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index 36247a160aa6..5c1dce9731e1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -54,6 +54,10 @@
 
 #define AMDGPU_VF2PF_UPDATE_MAX_RETRY_LIMIT 2
 
+/* Signature used to validate the SR-IOV dynamic critical region init data header ("INDA") */
+#define AMDGPU_SRIOV_CRIT_DATA_SIGNATURE "INDA"
+#define AMDGPU_SRIOV_CRIT_DATA_SIG_LEN   4
+
 enum amdgpu_sriov_vf_mode {
 	SRIOV_VF_MODE_BARE_METAL = 0,
 	SRIOV_VF_MODE_ONE_VF,
@@ -296,6 +300,9 @@ struct amdgpu_virt {
 
 	/* dynamic(v2) critical regions */
 	struct amdgpu_virt_region init_data_header;
+	struct amdgpu_virt_region crit_regn;
+	struct amdgpu_virt_region crit_regn_tbl[AMD_SRIOV_MSG_MAX_TABLE_ID];
+	bool is_dynamic_crit_regn_enabled;
 
 	/* vf2pf message */
 	struct delayed_work vf2pf_work;
@@ -432,6 +439,8 @@ void amdgpu_virt_exchange_data(struct amdgpu_device *adev);
 void amdgpu_virt_fini_data_exchange(struct amdgpu_device *adev);
 void amdgpu_virt_init(struct amdgpu_device *adev);
 
+int amdgpu_virt_init_critical_region(struct amdgpu_device *adev);
+
 bool amdgpu_virt_can_access_debugfs(struct amdgpu_device *adev);
 int amdgpu_virt_enable_access_debugfs(struct amdgpu_device *adev);
 void amdgpu_virt_disable_access_debugfs(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
index b53caab5b706..d15c256f9abd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
@@ -70,6 +70,37 @@ enum amd_sriov_crit_region_version {
 	GPU_CRIT_REGION_V2 = 2,
 };
 
+/* v2 layout offset enum (in order of allocation) */
+enum amd_sriov_msg_table_id_enum {
+	AMD_SRIOV_MSG_IPD_TABLE_ID = 0,
+	AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID,
+	AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID,
+	AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID,
+	AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID,
+	AMD_SRIOV_MSG_INITD_H_TABLE_ID,
+	AMD_SRIOV_MSG_MAX_TABLE_ID,
+};
+
+struct amd_sriov_msg_init_data_header {
+	char     signature[4];  /* "INDA"  */
+	uint32_t version;
+	uint32_t checksum;
+	uint32_t initdata_offset; /* 0 */
+	uint32_t initdata_size_in_kb; /* 5MB */
+	uint32_t valid_tables;
+	uint32_t vbios_img_offset;
+	uint32_t vbios_img_size_in_kb;
+	uint32_t dataexchange_offset;
+	uint32_t dataexchange_size_in_kb;
+	uint32_t ras_tele_info_offset;
+	uint32_t ras_tele_info_size_in_kb;
+	uint32_t ip_discovery_offset;
+	uint32_t ip_discovery_size_in_kb;
+	uint32_t bad_page_info_offset;
+	uint32_t bad_page_size_in_kb;
+	uint32_t reserved[8];
+};
+
 /*
  * PF2VF history log:
  * v1 defined in amdgim
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH v4 4/6] drm/amdgpu: Reuse fw_vram_usage_* for dynamic critical region in SRIOV
  2025-10-14 20:13 [PATCH v3 1/6] drm/amdgpu: Updated naming of SRIOV critical region offsets/sizes with _V1 suffix Ellen Pan
  2025-10-14 20:13 ` [PATCH v4 2/6] drm/amdgpu: Add SRIOV crit_region_version support Ellen Pan
  2025-10-14 20:13 ` [PATCH v4 3/6] drm/amdgpu: Introduce SRIOV critical regions v2 during VF init Ellen Pan
@ 2025-10-14 20:13 ` Ellen Pan
  2025-10-15 13:12   ` Alex Deucher
  2025-10-14 20:13 ` [PATCH v4 5/6] drm/amdgpu: Add logic for VF ipd and VF bios to init from dynamic crit_region offsets Ellen Pan
                   ` (2 subsequent siblings)
  5 siblings, 1 reply; 14+ messages in thread
From: Ellen Pan @ 2025-10-14 20:13 UTC (permalink / raw)
  To: amd-gfx
  Cc: Alexander.Deucher, Christian.Koenig, Lijo.Lazar, Jeffrey.Chan,
	Ellen Pan

- During guest driver init, asa VFs receive PF msg to
	init dynamic critical region(v2), VFs reuse fw_vram_usage_*
	 from ttm to store critical region tables in a 5MB chunk.

Signed-off-by: Ellen Pan <yunru.pan@amd.com>
---
 .../gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c  | 29 ++++++++++---------
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c       | 12 ++++----
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c      |  9 ++++++
 3 files changed, 31 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
index c7d32fb216e4..636385c80f64 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
@@ -181,19 +181,22 @@ int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev)
 	u8 frev, crev;
 	int usage_bytes = 0;
 
-	if (amdgpu_atom_parse_data_header(ctx, index, NULL, &frev, &crev, &data_offset)) {
-		if (frev == 2 && crev == 1) {
-			fw_usage_v2_1 =
-				(struct vram_usagebyfirmware_v2_1 *)(ctx->bios + data_offset);
-			amdgpu_atomfirmware_allocate_fb_v2_1(adev,
-					fw_usage_v2_1,
-					&usage_bytes);
-		} else if (frev >= 2 && crev >= 2) {
-			fw_usage_v2_2 =
-				(struct vram_usagebyfirmware_v2_2 *)(ctx->bios + data_offset);
-			amdgpu_atomfirmware_allocate_fb_v2_2(adev,
-					fw_usage_v2_2,
-					&usage_bytes);
+	/* Skip atomfirmware allocation for SRIOV VFs when dynamic crit regn is enabled */
+	if (!(amdgpu_sriov_vf(adev) && adev->virt.is_dynamic_crit_regn_enabled)) {
+		if (amdgpu_atom_parse_data_header(ctx, index, NULL, &frev, &crev, &data_offset)) {
+			if (frev == 2 && crev == 1) {
+				fw_usage_v2_1 =
+					(struct vram_usagebyfirmware_v2_1 *)(ctx->bios + data_offset);
+				amdgpu_atomfirmware_allocate_fb_v2_1(adev,
+						fw_usage_v2_1,
+						&usage_bytes);
+			} else if (frev >= 2 && crev >= 2) {
+				fw_usage_v2_2 =
+					(struct vram_usagebyfirmware_v2_2 *)(ctx->bios + data_offset);
+				amdgpu_atomfirmware_allocate_fb_v2_2(adev,
+						fw_usage_v2_2,
+						&usage_bytes);
+			}
 		}
 	}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 96bd0185f936..b5148a33b6f5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1944,19 +1944,19 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
 		return r;
 
 	/*
-	 *The reserved vram for driver must be pinned to the specified
-	 *place on the VRAM, so reserve it early.
+	 * The reserved VRAM for the driver must be pinned to a specific
+	 * location in VRAM, so reserve it early.
 	 */
 	r = amdgpu_ttm_drv_reserve_vram_init(adev);
 	if (r)
 		return r;
 
 	/*
-	 * only NAVI10 and onwards ASIC support for IP discovery.
-	 * If IP discovery enabled, a block of memory should be
-	 * reserved for IP discovey.
+	 * only NAVI10 and later ASICs support IP discovery.
+	 * If IP discovery is enabled, a block of memory should be
+	 * reserved for it.
 	 */
-	if (adev->mman.discovery_bin) {
+	if (adev->mman.discovery_bin && !adev->virt.is_dynamic_crit_regn_enabled) {
 		r = amdgpu_ttm_reserve_tmr(adev);
 		if (r)
 			return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 27235f3f3b81..820dab538164 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -940,6 +940,15 @@ int amdgpu_virt_init_critical_region(struct amdgpu_device *adev)
 	adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID].size_kb =
 		init_data_hdr->bad_page_size_in_kb;
 
+	/* reserved memory starts from crit region base offset with the size of 5MB */
+	adev->mman.fw_vram_usage_start_offset = adev->virt.crit_regn.offset;
+	adev->mman.fw_vram_usage_size = adev->virt.crit_regn.size_kb << 10;
+	dev_info(adev->dev,
+		"critical region v%d requested to reserve memory start at %08x with %d KB.\n",
+			init_data_hdr->version,
+			adev->mman.fw_vram_usage_start_offset,
+			adev->mman.fw_vram_usage_size >> 10);
+
 	adev->virt.is_dynamic_crit_regn_enabled = true;
 
 out:
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH v4 5/6] drm/amdgpu: Add logic for VF ipd and VF bios to init from dynamic crit_region offsets
  2025-10-14 20:13 [PATCH v3 1/6] drm/amdgpu: Updated naming of SRIOV critical region offsets/sizes with _V1 suffix Ellen Pan
                   ` (2 preceding siblings ...)
  2025-10-14 20:13 ` [PATCH v4 4/6] drm/amdgpu: Reuse fw_vram_usage_* for dynamic critical region in SRIOV Ellen Pan
@ 2025-10-14 20:13 ` Ellen Pan
  2025-10-15  9:37   ` Lazar, Lijo
  2025-10-15 13:23   ` Alex Deucher
  2025-10-14 20:13 ` [PATCH v4 6/6] drm/amdgpu: Add logic for VF data exchange region " Ellen Pan
  2025-10-15 13:02 ` [PATCH v3 1/6] drm/amdgpu: Updated naming of SRIOV critical region offsets/sizes with _V1 suffix Alex Deucher
  5 siblings, 2 replies; 14+ messages in thread
From: Ellen Pan @ 2025-10-14 20:13 UTC (permalink / raw)
  To: amd-gfx
  Cc: Alexander.Deucher, Christian.Koenig, Lijo.Lazar, Jeffrey.Chan,
	Ellen Pan

1. Added VF logic in amdgpu_virt to init IP discovery using the offsets from dynamic(v2) critical regions;
2. Added VF logic in amdgpu_virt to init bios image using the offsets from dynamic(v2) critical regions;

Signed-off-by: Ellen Pan <yunru.pan@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c      | 36 ++++++++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 23 +++++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c      | 63 +++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h      |  2 +
 4 files changed, 111 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
index 00e96419fcda..5960ab1be4d8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
@@ -96,11 +96,12 @@ void amdgpu_bios_release(struct amdgpu_device *adev)
  * part of the system bios.  On boot, the system bios puts a
  * copy of the igp rom at the start of vram if a discrete card is
  * present.
- * For SR-IOV, the vbios image is also put in VRAM in the VF.
+ * For SR-IOV, if dynamic critical region is not enabled,
+ * the vbios image is also put at the start of VRAM in the VF.
  */
 static bool amdgpu_read_bios_from_vram(struct amdgpu_device *adev)
 {
-	uint8_t __iomem *bios;
+	uint8_t __iomem *bios = NULL;
 	resource_size_t vram_base;
 	resource_size_t size = 256 * 1024; /* ??? */
 
@@ -114,18 +115,35 @@ static bool amdgpu_read_bios_from_vram(struct amdgpu_device *adev)
 
 	adev->bios = NULL;
 	vram_base = pci_resource_start(adev->pdev, 0);
-	bios = ioremap_wc(vram_base, size);
-	if (!bios)
-		return false;
+
+	/* For SR-IOV, if dynamic critical region is enabled,
+	* the vbios image is put at a dynamic offset of VRAM in the VF.
+	* If dynamic critical region is disabled, follow the same seq as on baremetal.
+	*/
+	if (!(amdgpu_sriov_vf(adev) && adev->virt.is_dynamic_crit_regn_enabled)) {
+		bios = ioremap_wc(vram_base, size);
+		if (!bios)
+				return false;
+	}
 
 	adev->bios = kmalloc(size, GFP_KERNEL);
 	if (!adev->bios) {
-		iounmap(bios);
-		return false;
+			if (bios)
+				iounmap(bios);
+			return false;
 	}
+
+	if (amdgpu_sriov_vf(adev) && adev->virt.is_dynamic_crit_regn_enabled) {
+		if (amdgpu_virt_get_dynamic_data_info(adev,
+					AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID, adev->bios, &size))
+			return false;
+	}
+
 	adev->bios_size = size;
-	memcpy_fromio(adev->bios, bios, size);
-	iounmap(bios);
+	if (bios) {
+		memcpy_fromio(adev->bios, bios, size);
+		iounmap(bios);
+	}
 
 	if (!check_atom_bios(adev, size)) {
 		amdgpu_bios_release(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index 73401f0aeb34..e035dba96790 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -283,7 +283,7 @@ static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev,
 		 * wait for this to complete.  Once the C2PMSG is updated, we can
 		 * continue.
 		 */
-
+		
 		for (i = 0; i < 2000; i++) {
 			msg = RREG32(mmMP0_SMN_C2PMSG_33);
 			if (msg & 0x80000000)
@@ -299,13 +299,28 @@ static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev,
 		vram_size <<= 20;
 
 	if (sz_valid) {
-		uint64_t pos = vram_size - DISCOVERY_TMR_OFFSET;
-		amdgpu_device_vram_access(adev, pos, (uint32_t *)binary,
-					  adev->mman.discovery_tmr_size, false);
+		if (amdgpu_sriov_vf(adev) && adev->virt.is_dynamic_crit_regn_enabled) {
+			/* For SRIOV VFs with dynamic critical region enabled,
+			 * we will get the IPD binary via below call.
+			 * If dynamic critical is disabled, fall through to normal seq.
+			 */
+			valid_size = vram_size;
+			if (amdgpu_virt_get_dynamic_data_info(adev,
+						AMD_SRIOV_MSG_IPD_TABLE_ID, binary, &valid_size)) {
+				ret = -EINVAL;
+				goto exit;
+			}
+		} else {
+			uint64_t pos = vram_size - DISCOVERY_TMR_OFFSET;
+
+			amdgpu_device_vram_access(adev, pos, (uint32_t *)binary,
+					adev->mman.discovery_tmr_size, false);
+		}
 	} else {
 		ret = amdgpu_discovery_read_binary_from_sysmem(adev, binary);
 	}
 
+exit:
 	if (ret)
 		dev_err(adev->dev,
 			"failed to read discovery info from memory, vram size read: %llx",
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 820dab538164..fef4ebb0f879 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -940,6 +940,14 @@ int amdgpu_virt_init_critical_region(struct amdgpu_device *adev)
 	adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID].size_kb =
 		init_data_hdr->bad_page_size_in_kb;
 
+	/* Validation for critical region info */
+	 if (adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_IPD_TABLE_ID].size_kb > DISCOVERY_TMR_SIZE) {
+		dev_err(adev->dev, "Invalid IP discovery size: 0x%x\n",
+				adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_IPD_TABLE_ID].size_kb);
+		r = -EINVAL;
+		goto out;
+	}
+
 	/* reserved memory starts from crit region base offset with the size of 5MB */
 	adev->mman.fw_vram_usage_start_offset = adev->virt.crit_regn.offset;
 	adev->mman.fw_vram_usage_size = adev->virt.crit_regn.size_kb << 10;
@@ -958,6 +966,61 @@ int amdgpu_virt_init_critical_region(struct amdgpu_device *adev)
 	return r;
 }
 
+int amdgpu_virt_get_dynamic_data_info(struct amdgpu_device *adev,
+	int data_id, uint8_t *binary, uint64_t *size)
+{
+	uint32_t data_offset = 0;
+	uint32_t data_size = 0;
+	enum amd_sriov_msg_table_id_enum data_table_id = data_id;
+	char *data_name;
+
+	if (data_table_id >= AMD_SRIOV_MSG_MAX_TABLE_ID)
+		return -EINVAL;
+
+	data_offset = adev->virt.crit_regn_tbl[data_table_id].offset;
+	data_size = adev->virt.crit_regn_tbl[data_table_id].size_kb << 10;
+
+	switch (data_id) {
+	case AMD_SRIOV_MSG_IPD_TABLE_ID:
+		data_name = "IPD";
+		if (!IS_ALIGNED(data_offset, 4) || !IS_ALIGNED(data_size, 4)) {
+			dev_err(adev->dev, "IP discovery data not aligned to 4 bytes\n");
+			return -EINVAL;
+		}
+
+		amdgpu_device_vram_access(adev,
+				(uint64_t)data_offset, (uint32_t *)binary, data_size, false);
+		if (!binary)
+			return -EINVAL;
+
+		if (((uint64_t)data_offset + (uint64_t)data_size) > *size)
+			return -EINVAL;
+
+		*size = (uint64_t)data_size;
+
+		break;
+
+	case AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID:
+		data_name = "BIOS";
+		if (data_size > *size) {
+			dev_err(adev->dev, "Invalid vbios size: 0x%x\n", data_size);
+			return -EINVAL;
+		}
+
+		amdgpu_device_vram_access(adev,
+				(uint64_t)data_offset, (uint32_t *)binary, data_size, false);
+
+		*size = (uint64_t)data_size;
+		break;
+	}
+
+	dev_info(adev->dev,
+		"Got %s info from dynamic crit_region_table at offset 0x%x with size of 0x%x bytes.\n",
+		data_name, data_offset, data_size);
+
+	return 0;
+}
+
 void amdgpu_virt_init(struct amdgpu_device *adev)
 {
 	bool is_sriov = false;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index 5c1dce9731e1..a3ae1ff40e84 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -440,6 +440,8 @@ void amdgpu_virt_fini_data_exchange(struct amdgpu_device *adev);
 void amdgpu_virt_init(struct amdgpu_device *adev);
 
 int amdgpu_virt_init_critical_region(struct amdgpu_device *adev);
+int amdgpu_virt_get_dynamic_data_info(struct amdgpu_device *adev,
+	int data_id, uint8_t *binary, uint64_t *size);
 
 bool amdgpu_virt_can_access_debugfs(struct amdgpu_device *adev);
 int amdgpu_virt_enable_access_debugfs(struct amdgpu_device *adev);
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH v4 6/6] drm/amdgpu: Add logic for VF data exchange region to init from dynamic crit_region offsets
  2025-10-14 20:13 [PATCH v3 1/6] drm/amdgpu: Updated naming of SRIOV critical region offsets/sizes with _V1 suffix Ellen Pan
                   ` (3 preceding siblings ...)
  2025-10-14 20:13 ` [PATCH v4 5/6] drm/amdgpu: Add logic for VF ipd and VF bios to init from dynamic crit_region offsets Ellen Pan
@ 2025-10-14 20:13 ` Ellen Pan
  2025-10-15 13:09   ` Alex Deucher
  2025-10-15 13:02 ` [PATCH v3 1/6] drm/amdgpu: Updated naming of SRIOV critical region offsets/sizes with _V1 suffix Alex Deucher
  5 siblings, 1 reply; 14+ messages in thread
From: Ellen Pan @ 2025-10-14 20:13 UTC (permalink / raw)
  To: amd-gfx
  Cc: Alexander.Deucher, Christian.Koenig, Lijo.Lazar, Jeffrey.Chan,
	Ellen Pan

1. Added VF logic to init data exchange region using the offsets from dynamic(v2) critical regions;

Signed-off-by: Ellen Pan <yunru.pan@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 104 ++++++++++++++++++-----
 1 file changed, 85 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index fef4ebb0f879..35cb716ec594 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -208,12 +208,12 @@ int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev)
 				    &adev->virt.mm_table.gpu_addr,
 				    (void *)&adev->virt.mm_table.cpu_addr);
 	if (r) {
-		DRM_ERROR("failed to alloc mm table and error = %d.\n", r);
+		dev_err(adev->dev, "failed to alloc mm table and error = %d.\n", r);
 		return r;
 	}
 
 	memset((void *)adev->virt.mm_table.cpu_addr, 0, PAGE_SIZE);
-	DRM_INFO("MM table gpu addr = 0x%llx, cpu addr = %p.\n",
+	dev_info(adev->dev, "MM table gpu addr = 0x%llx, cpu addr = %p.\n",
 		 adev->virt.mm_table.gpu_addr,
 		 adev->virt.mm_table.cpu_addr);
 	return 0;
@@ -393,7 +393,9 @@ static void amdgpu_virt_ras_reserve_bps(struct amdgpu_device *adev)
 			if (amdgpu_bo_create_kernel_at(adev, bp << AMDGPU_GPU_PAGE_SHIFT,
 							AMDGPU_GPU_PAGE_SIZE,
 							&bo, NULL))
-				DRM_DEBUG("RAS WARN: reserve vram for retired page %llx fail\n", bp);
+				dev_dbg(adev->dev,
+						"RAS WARN: reserve vram for retired page %llx fail\n",
+						bp);
 			data->bps_bo[i] = bo;
 		}
 		data->last_reserved = i + 1;
@@ -661,10 +663,34 @@ static void amdgpu_virt_update_vf2pf_work_item(struct work_struct *work)
 	schedule_delayed_work(&(adev->virt.vf2pf_work), adev->virt.vf2pf_update_interval_ms);
 }
 
+static int amdgpu_virt_read_exchange_data_from_mem(struct amdgpu_device *adev, uint32_t *pfvf_data)
+{
+	uint32_t dataexchange_offset =
+		adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].offset;
+	uint32_t dataexchange_size =
+		adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].size_kb << 10;
+	uint64_t pos = 0;
+
+	dev_info(adev->dev,
+			"Got data exchange info from dynamic crit_region_table at offset 0x%x with size of 0x%x bytes.\n",
+			dataexchange_offset, dataexchange_size);
+
+	if (!IS_ALIGNED(dataexchange_offset, 4) || !IS_ALIGNED(dataexchange_size, 4)) {
+		dev_err(adev->dev, "Data exchange data not aligned to 4 bytes\n");
+		return -EINVAL;
+	}
+
+	pos = (uint64_t)dataexchange_offset;
+	amdgpu_device_vram_access(adev, pos, pfvf_data,
+			dataexchange_size, false);
+
+	return 0;
+}
+
 void amdgpu_virt_fini_data_exchange(struct amdgpu_device *adev)
 {
 	if (adev->virt.vf2pf_update_interval_ms != 0) {
-		DRM_INFO("clean up the vf2pf work item\n");
+		dev_info(adev->dev, "clean up the vf2pf work item\n");
 		cancel_delayed_work_sync(&adev->virt.vf2pf_work);
 		adev->virt.vf2pf_update_interval_ms = 0;
 	}
@@ -672,13 +698,15 @@ void amdgpu_virt_fini_data_exchange(struct amdgpu_device *adev)
 
 void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev)
 {
+	uint32_t *pfvf_data = NULL;
+
 	adev->virt.fw_reserve.p_pf2vf = NULL;
 	adev->virt.fw_reserve.p_vf2pf = NULL;
 	adev->virt.vf2pf_update_interval_ms = 0;
 	adev->virt.vf2pf_update_retry_cnt = 0;
 
 	if (adev->mman.fw_vram_usage_va && adev->mman.drv_vram_usage_va) {
-		DRM_WARN("Currently fw_vram and drv_vram should not have values at the same time!");
+		dev_warn(adev->dev, "Currently fw_vram and drv_vram should not have values at the same time!");
 	} else if (adev->mman.fw_vram_usage_va || adev->mman.drv_vram_usage_va) {
 		/* go through this logic in ip_init and reset to init workqueue*/
 		amdgpu_virt_exchange_data(adev);
@@ -687,11 +715,34 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev)
 		schedule_delayed_work(&(adev->virt.vf2pf_work), msecs_to_jiffies(adev->virt.vf2pf_update_interval_ms));
 	} else if (adev->bios != NULL) {
 		/* got through this logic in early init stage to get necessary flags, e.g. rlcg_acc related*/
-		adev->virt.fw_reserve.p_pf2vf =
-			(struct amd_sriov_msg_pf2vf_info_header *)
-			(adev->bios + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 << 10));
+		if (adev->virt.req_init_data_ver == GPU_CRIT_REGION_V2) {
+			pfvf_data =
+				kzalloc(adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].size_kb << 10,
+					GFP_KERNEL);
+			if (!pfvf_data) {
+				dev_err(adev->dev, "Failed to allocate memory for pfvf_data\n");
+				return;
+			}
 
-		amdgpu_virt_read_pf2vf_data(adev);
+			if (amdgpu_virt_read_exchange_data_from_mem(adev, pfvf_data))
+				goto free_pfvf_data;
+
+			adev->virt.fw_reserve.p_pf2vf =
+				(struct amd_sriov_msg_pf2vf_info_header *)pfvf_data;
+
+			amdgpu_virt_read_pf2vf_data(adev);
+
+free_pfvf_data:
+			kfree(pfvf_data);
+			pfvf_data = NULL;
+			adev->virt.fw_reserve.p_pf2vf = NULL;
+		} else {
+			adev->virt.fw_reserve.p_pf2vf =
+				(struct amd_sriov_msg_pf2vf_info_header *)
+				(adev->bios + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 << 10));
+
+			amdgpu_virt_read_pf2vf_data(adev);
+		}
 	}
 }
 
@@ -704,14 +755,29 @@ void amdgpu_virt_exchange_data(struct amdgpu_device *adev)
 
 	if (adev->mman.fw_vram_usage_va || adev->mman.drv_vram_usage_va) {
 		if (adev->mman.fw_vram_usage_va) {
-			adev->virt.fw_reserve.p_pf2vf =
-				(struct amd_sriov_msg_pf2vf_info_header *)
-				(adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 << 10));
-			adev->virt.fw_reserve.p_vf2pf =
-				(struct amd_sriov_msg_vf2pf_info_header *)
-				(adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB_V1 << 10));
-			adev->virt.fw_reserve.ras_telemetry =
-				(adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB_V1 << 10));
+			if (adev->virt.req_init_data_ver == GPU_CRIT_REGION_V2) {
+				adev->virt.fw_reserve.p_pf2vf =
+					(struct amd_sriov_msg_pf2vf_info_header *)
+					(adev->mman.fw_vram_usage_va +
+					adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].offset);
+				adev->virt.fw_reserve.p_vf2pf =
+					(struct amd_sriov_msg_vf2pf_info_header *)
+					(adev->mman.fw_vram_usage_va +
+					adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].offset +
+					(AMD_SRIOV_MSG_SIZE_KB_V1 << 10));
+				adev->virt.fw_reserve.ras_telemetry =
+					(adev->mman.fw_vram_usage_va +
+					adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID].offset);
+			} else {
+				adev->virt.fw_reserve.p_pf2vf =
+					(struct amd_sriov_msg_pf2vf_info_header *)
+					(adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 << 10));
+				adev->virt.fw_reserve.p_vf2pf =
+					(struct amd_sriov_msg_vf2pf_info_header *)
+					(adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB_V1 << 10));
+				adev->virt.fw_reserve.ras_telemetry =
+					(adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB_V1 << 10));
+			}
 		} else if (adev->mman.drv_vram_usage_va) {
 			adev->virt.fw_reserve.p_pf2vf =
 				(struct amd_sriov_msg_pf2vf_info_header *)
@@ -819,7 +885,7 @@ static bool amdgpu_virt_init_req_data(struct amdgpu_device *adev, u32 reg)
 			break;
 		default: /* other chip doesn't support SRIOV */
 			is_sriov = false;
-			DRM_ERROR("Unknown asic type: %d!\n", adev->asic_type);
+			dev_err(adev->dev, "Unknown asic type: %d!\n", adev->asic_type);
 			break;
 		}
 	}
@@ -1468,7 +1534,7 @@ amdgpu_ras_block_to_sriov(struct amdgpu_device *adev, enum amdgpu_ras_block bloc
 	case AMDGPU_RAS_BLOCK__MPIO:
 		return RAS_TELEMETRY_GPU_BLOCK_MPIO;
 	default:
-		DRM_WARN_ONCE("Unsupported SRIOV RAS telemetry block 0x%x\n",
+		dev_warn(adev->dev, "Unsupported SRIOV RAS telemetry block 0x%x\n",
 			      block);
 		return RAS_TELEMETRY_GPU_BLOCK_COUNT;
 	}
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH v4 3/6] drm/amdgpu: Introduce SRIOV critical regions v2 during VF init
  2025-10-14 20:13 ` [PATCH v4 3/6] drm/amdgpu: Introduce SRIOV critical regions v2 during VF init Ellen Pan
@ 2025-10-15  9:09   ` Lazar, Lijo
  0 siblings, 0 replies; 14+ messages in thread
From: Lazar, Lijo @ 2025-10-15  9:09 UTC (permalink / raw)
  To: Ellen Pan, amd-gfx; +Cc: Alexander.Deucher, Christian.Koenig, Jeffrey.Chan



On 10/15/2025 1:43 AM, Ellen Pan wrote:
>      1. Introduced amdgpu_virt_init_critical_region during VF init.
>       - VFs use init_data_header_offset and init_data_header_size_kb
>              transmitted via PF2VF mailbox to fetch the offset of
>              critical regions' offsets/sizes in VRAM and save to
>              adev->virt.crit_region_offsets and adev->virt.crit_region_sizes_kb.
> 
> Signed-off-by: Ellen Pan <yunru.pan@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c  |   4 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c    | 106 ++++++++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h    |   9 ++
>   drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h |  31 ++++++
>   4 files changed, 150 insertions(+)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 929936c8d87c..351cfe03a1aa 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -2754,6 +2754,10 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
>   		r = amdgpu_virt_request_full_gpu(adev, true);
>   		if (r)
>   			return r;
> +
> +		r = amdgpu_virt_init_critical_region(adev);
> +		if (r)
> +			return r;
>   	}
>   
>   	switch (adev->asic_type) {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> index 39ab7d00379b..27235f3f3b81 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> @@ -44,6 +44,8 @@
>   		vf2pf_info->ucode_info[ucode].version = ver; \
>   	} while (0)
>   
> +#define mmRCC_CONFIG_MEMSIZE    0xde3
> +
>   bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev)
>   {
>   	/* By now all MMIO pages except mailbox are blocked */
> @@ -843,6 +845,110 @@ static void amdgpu_virt_init_ras(struct amdgpu_device *adev)
>   	adev->virt.ras.cper_rptr = 0;
>   }
>   
> +static uint8_t amdgpu_virt_crit_region_calc_checksum(uint8_t *buf_start, uint8_t *buf_end)
> +{
> +	uint32_t sum = 0;
> +
> +	if (buf_start >= buf_end)
> +		return 0;
> +
> +	for (; buf_start < buf_end; buf_start++)
> +		sum += buf_start[0];
> +
> +	return 0xffffffff - sum;
> +}
> +
> +int amdgpu_virt_init_critical_region(struct amdgpu_device *adev)
> +{
> +	struct amd_sriov_msg_init_data_header *init_data_hdr = NULL;
> +	uint32_t init_hdr_offset = adev->virt.init_data_header.offset;
> +	uint32_t init_hdr_size = adev->virt.init_data_header.size_kb << 10;
> +	uint64_t vram_size;
> +	int r = 0;
> +	uint8_t checksum = 0;
> +
> +	/* Skip below init if critical region version != v2 */
> +	if (adev->virt.req_init_data_ver != GPU_CRIT_REGION_V2)
> +		return 0;
> +
> +	if (init_hdr_offset < 0) {
> +		dev_err(adev->dev, "Invalid init header offset\n");
> +		return -EINVAL;
> +	}
> +
> +	vram_size = RREG32(mmRCC_CONFIG_MEMSIZE);
> +	if (!vram_size || vram_size == U32_MAX)
> +		return -EINVAL;
> +	vram_size <<= 20;
> +
> +	if ((init_hdr_offset + init_hdr_size) > vram_size) {
> +		dev_err(adev->dev, "init_data_header exceeds VRAM size, exiting\n");
> +		return -EINVAL;
> +	}
> +
> +	/* Allocate for init_data_hdr */
> +	init_data_hdr = kzalloc(sizeof(struct amd_sriov_msg_init_data_header), GFP_KERNEL);
> +	if (!init_data_hdr)
> +		return -ENOMEM;
> +
> +	amdgpu_device_vram_access(adev, (uint64_t)init_hdr_offset, (uint32_t *)init_data_hdr,
> +					sizeof(struct amd_sriov_msg_init_data_header), false);
> +
> +	if (strncmp(init_data_hdr->signature,
> +				AMDGPU_SRIOV_CRIT_DATA_SIGNATURE,
> +				AMDGPU_SRIOV_CRIT_DATA_SIG_LEN) != 0) {
> +		dev_err(adev->dev, "Invalid init data signature: %.4s\n",
> +			init_data_hdr->signature);
> +		r = -EINVAL;
> +		goto out;
> +	}
> +
> +	checksum = amdgpu_virt_crit_region_calc_checksum(
> +			(uint8_t *)&init_data_hdr->initdata_offset,
> +			(uint8_t *)init_data_hdr +
> +			sizeof(struct amd_sriov_msg_init_data_header));
> +	if (checksum != init_data_hdr->checksum) {
> +		dev_err(adev->dev, "Found unmatching checksum from calculation 0x%x and init_data 0x%x\n",
> +				checksum, init_data_hdr->checksum);
> +		r = -EINVAL;
> +		goto out;
> +	}
> +
> +	/* Initialize critical region offsets */
> +	adev->virt.crit_regn.offset = init_data_hdr->initdata_offset;
> +	adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_IPD_TABLE_ID].offset =
> +		init_data_hdr->ip_discovery_offset;
> +	adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID].offset =
> +		init_data_hdr->vbios_img_offset;
> +	adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID].offset =
> +		init_data_hdr->ras_tele_info_offset;
> +	adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].offset =
> +		init_data_hdr->dataexchange_offset;
> +	adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID].offset =
> +		init_data_hdr->bad_page_info_offset;
> +
> +	/* Initialize critical region sizes */
> +	adev->virt.crit_regn.size_kb = init_data_hdr->initdata_size_in_kb;
> +	adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_IPD_TABLE_ID].size_kb =
> +		init_data_hdr->ip_discovery_size_in_kb;
> +	adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID].size_kb =
> +		init_data_hdr->vbios_img_size_in_kb;
> +	adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID].size_kb =
> +		init_data_hdr->ras_tele_info_size_in_kb;
> +	adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].size_kb =
> +		init_data_hdr->dataexchange_size_in_kb;
> +	adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID].size_kb =
> +		init_data_hdr->bad_page_size_in_kb;
> +
> +	adev->virt.is_dynamic_crit_regn_enabled = true;
> +
> +out:
> +	kfree(init_data_hdr);
> +	init_data_hdr = NULL;
> +
> +	return r;
> +}
> +
>   void amdgpu_virt_init(struct amdgpu_device *adev)
>   {
>   	bool is_sriov = false;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> index 36247a160aa6..5c1dce9731e1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> @@ -54,6 +54,10 @@
>   
>   #define AMDGPU_VF2PF_UPDATE_MAX_RETRY_LIMIT 2
>   
> +/* Signature used to validate the SR-IOV dynamic critical region init data header ("INDA") */
> +#define AMDGPU_SRIOV_CRIT_DATA_SIGNATURE "INDA"
> +#define AMDGPU_SRIOV_CRIT_DATA_SIG_LEN   4
> +
>   enum amdgpu_sriov_vf_mode {
>   	SRIOV_VF_MODE_BARE_METAL = 0,
>   	SRIOV_VF_MODE_ONE_VF,
> @@ -296,6 +300,9 @@ struct amdgpu_virt {
>   
>   	/* dynamic(v2) critical regions */
>   	struct amdgpu_virt_region init_data_header;
> +	struct amdgpu_virt_region crit_regn;
> +	struct amdgpu_virt_region crit_regn_tbl[AMD_SRIOV_MSG_MAX_TABLE_ID];
> +	bool is_dynamic_crit_regn_enabled;
>   
>   	/* vf2pf message */
>   	struct delayed_work vf2pf_work;
> @@ -432,6 +439,8 @@ void amdgpu_virt_exchange_data(struct amdgpu_device *adev);
>   void amdgpu_virt_fini_data_exchange(struct amdgpu_device *adev);
>   void amdgpu_virt_init(struct amdgpu_device *adev);
>   
> +int amdgpu_virt_init_critical_region(struct amdgpu_device *adev);
> +
>   bool amdgpu_virt_can_access_debugfs(struct amdgpu_device *adev);
>   int amdgpu_virt_enable_access_debugfs(struct amdgpu_device *adev);
>   void amdgpu_virt_disable_access_debugfs(struct amdgpu_device *adev);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
> index b53caab5b706..d15c256f9abd 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
> @@ -70,6 +70,37 @@ enum amd_sriov_crit_region_version {
>   	GPU_CRIT_REGION_V2 = 2,
>   };
>   
> +/* v2 layout offset enum (in order of allocation) */
> +enum amd_sriov_msg_table_id_enum {
> +	AMD_SRIOV_MSG_IPD_TABLE_ID = 0,
> +	AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID,
> +	AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID,
> +	AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID,
> +	AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID,
> +	AMD_SRIOV_MSG_INITD_H_TABLE_ID,
> +	AMD_SRIOV_MSG_MAX_TABLE_ID,
> +};
> +
> +struct amd_sriov_msg_init_data_header {
> +	char     signature[4];  /* "INDA"  */
> +	uint32_t version;
> +	uint32_t checksum;
> +	uint32_t initdata_offset; /* 0 */
> +	uint32_t initdata_size_in_kb; /* 5MB */
> +	uint32_t valid_tables;

Missed this earlier. Is this a mask of table_ids that are valid? Not 
seeing it used before assigning the offsets.

Thanks,
Lijo

> +	uint32_t vbios_img_offset;
> +	uint32_t vbios_img_size_in_kb;
> +	uint32_t dataexchange_offset;
> +	uint32_t dataexchange_size_in_kb;
> +	uint32_t ras_tele_info_offset;
> +	uint32_t ras_tele_info_size_in_kb;
> +	uint32_t ip_discovery_offset;
> +	uint32_t ip_discovery_size_in_kb;
> +	uint32_t bad_page_info_offset;
> +	uint32_t bad_page_size_in_kb;
> +	uint32_t reserved[8];
> +};
> +
>   /*
>    * PF2VF history log:
>    * v1 defined in amdgim


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v4 5/6] drm/amdgpu: Add logic for VF ipd and VF bios to init from dynamic crit_region offsets
  2025-10-14 20:13 ` [PATCH v4 5/6] drm/amdgpu: Add logic for VF ipd and VF bios to init from dynamic crit_region offsets Ellen Pan
@ 2025-10-15  9:37   ` Lazar, Lijo
  2025-10-15 13:23   ` Alex Deucher
  1 sibling, 0 replies; 14+ messages in thread
From: Lazar, Lijo @ 2025-10-15  9:37 UTC (permalink / raw)
  To: Ellen Pan, amd-gfx; +Cc: Alexander.Deucher, Christian.Koenig, Jeffrey.Chan



On 10/15/2025 1:43 AM, Ellen Pan wrote:
> 1. Added VF logic in amdgpu_virt to init IP discovery using the offsets from dynamic(v2) critical regions;
> 2. Added VF logic in amdgpu_virt to init bios image using the offsets from dynamic(v2) critical regions;
> 
> Signed-off-by: Ellen Pan <yunru.pan@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c      | 36 ++++++++---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 23 +++++--
>   drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c      | 63 +++++++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h      |  2 +
>   4 files changed, 111 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
> index 00e96419fcda..5960ab1be4d8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
> @@ -96,11 +96,12 @@ void amdgpu_bios_release(struct amdgpu_device *adev)
>    * part of the system bios.  On boot, the system bios puts a
>    * copy of the igp rom at the start of vram if a discrete card is
>    * present.
> - * For SR-IOV, the vbios image is also put in VRAM in the VF.
> + * For SR-IOV, if dynamic critical region is not enabled,
> + * the vbios image is also put at the start of VRAM in the VF.
>    */
>   static bool amdgpu_read_bios_from_vram(struct amdgpu_device *adev)
>   {
> -	uint8_t __iomem *bios;
> +	uint8_t __iomem *bios = NULL;
>   	resource_size_t vram_base;
>   	resource_size_t size = 256 * 1024; /* ??? */
>   
> @@ -114,18 +115,35 @@ static bool amdgpu_read_bios_from_vram(struct amdgpu_device *adev)
>   
>   	adev->bios = NULL;
>   	vram_base = pci_resource_start(adev->pdev, 0);
> -	bios = ioremap_wc(vram_base, size);
> -	if (!bios)
> -		return false;
> +
> +	/* For SR-IOV, if dynamic critical region is enabled,
> +	* the vbios image is put at a dynamic offset of VRAM in the VF.
> +	* If dynamic critical region is disabled, follow the same seq as on baremetal.
> +	*/
> +	if (!(amdgpu_sriov_vf(adev) && adev->virt.is_dynamic_crit_regn_enabled)) {
> +		bios = ioremap_wc(vram_base, size);
> +		if (!bios)
> +				return false;
> +	}
>   
>   	adev->bios = kmalloc(size, GFP_KERNEL);
>   	if (!adev->bios) {
> -		iounmap(bios);
> -		return false;
> +			if (bios)
> +				iounmap(bios);
> +			return false;
>   	}
> +
> +	if (amdgpu_sriov_vf(adev) && adev->virt.is_dynamic_crit_regn_enabled) {
> +		if (amdgpu_virt_get_dynamic_data_info(adev,
> +					AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID, adev->bios, &size))
> +			return false;
> +	}
> +

It may be cleaner to keep this code up - allocate, read the binary and 
then use a label to jump to verification (check_atom_bios) for a 
successful read.

>   	adev->bios_size = size;
> -	memcpy_fromio(adev->bios, bios, size);
> -	iounmap(bios);
> +	if (bios) {
> +		memcpy_fromio(adev->bios, bios, size);
> +		iounmap(bios);
> +	}
>   
>   	if (!check_atom_bios(adev, size)) {
>   		amdgpu_bios_release(adev);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
> index 73401f0aeb34..e035dba96790 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
> @@ -283,7 +283,7 @@ static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev,
>   		 * wait for this to complete.  Once the C2PMSG is updated, we can
>   		 * continue.
>   		 */
> -
> +		
>   		for (i = 0; i < 2000; i++) {
>   			msg = RREG32(mmMP0_SMN_C2PMSG_33);
>   			if (msg & 0x80000000)
> @@ -299,13 +299,28 @@ static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev,
>   		vram_size <<= 20;
>   
>   	if (sz_valid) {
> -		uint64_t pos = vram_size - DISCOVERY_TMR_OFFSET;
> -		amdgpu_device_vram_access(adev, pos, (uint32_t *)binary,
> -					  adev->mman.discovery_tmr_size, false);
> +		if (amdgpu_sriov_vf(adev) && adev->virt.is_dynamic_crit_regn_enabled) {
> +			/* For SRIOV VFs with dynamic critical region enabled,
> +			 * we will get the IPD binary via below call.
> +			 * If dynamic critical is disabled, fall through to normal seq.
> +			 */
> +			valid_size = vram_size;

Only mman.discovery_tmr_size is the allocation size of binary.
> +			if (amdgpu_virt_get_dynamic_data_info(adev,
> +						AMD_SRIOV_MSG_IPD_TABLE_ID, binary, &valid_size)) {
> +				ret = -EINVAL;
> +				goto exit;
> +			}
> +		} else {
> +			uint64_t pos = vram_size - DISCOVERY_TMR_OFFSET;
> +
> +			amdgpu_device_vram_access(adev, pos, (uint32_t *)binary,
> +					adev->mman.discovery_tmr_size, false);
> +		}
>   	} else {
>   		ret = amdgpu_discovery_read_binary_from_sysmem(adev, binary);
>   	}
>   
> +exit:
>   	if (ret)
>   		dev_err(adev->dev,
>   			"failed to read discovery info from memory, vram size read: %llx",
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> index 820dab538164..fef4ebb0f879 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> @@ -940,6 +940,14 @@ int amdgpu_virt_init_critical_region(struct amdgpu_device *adev)
>   	adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID].size_kb =
>   		init_data_hdr->bad_page_size_in_kb;
>   
> +	/* Validation for critical region info */
> +	 if (adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_IPD_TABLE_ID].size_kb > DISCOVERY_TMR_SIZE) {
> +		dev_err(adev->dev, "Invalid IP discovery size: 0x%x\n",
> +				adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_IPD_TABLE_ID].size_kb);
> +		r = -EINVAL;
> +		goto out;
> +	}
> +
>   	/* reserved memory starts from crit region base offset with the size of 5MB */
>   	adev->mman.fw_vram_usage_start_offset = adev->virt.crit_regn.offset;
>   	adev->mman.fw_vram_usage_size = adev->virt.crit_regn.size_kb << 10;
> @@ -958,6 +966,61 @@ int amdgpu_virt_init_critical_region(struct amdgpu_device *adev)
>   	return r;
>   }
>   
> +int amdgpu_virt_get_dynamic_data_info(struct amdgpu_device *adev,
> +	int data_id, uint8_t *binary, uint64_t *size)
> +{
> +	uint32_t data_offset = 0;
> +	uint32_t data_size = 0;
> +	enum amd_sriov_msg_table_id_enum data_table_id = data_id;
> +	char *data_name;
> +
> +	if (data_table_id >= AMD_SRIOV_MSG_MAX_TABLE_ID)
> +		return -EINVAL;
> +
> +	data_offset = adev->virt.crit_regn_tbl[data_table_id].offset;
> +	data_size = adev->virt.crit_regn_tbl[data_table_id].size_kb << 10;
> +
> +	switch (data_id) {
> +	case AMD_SRIOV_MSG_IPD_TABLE_ID:
> +		data_name = "IPD";
> +		if (!IS_ALIGNED(data_offset, 4) || !IS_ALIGNED(data_size, 4)) {
> +			dev_err(adev->dev, "IP discovery data not aligned to 4 bytes\n");
> +			return -EINVAL;
> +		}

I think this should also be part of table verification.

> +
> +		amdgpu_device_vram_access(adev,
> +				(uint64_t)data_offset, (uint32_t *)binary, data_size, false);
> +		if (!binary)
> +			return -EINVAL;

This NULL check should be before reading any content to the binary.> +
> +		if (((uint64_t)data_offset + (uint64_t)data_size) > *size)
> +			return -EINVAL;

This doesn't look correct either.

You may generically do like -
	// not valid binary or not enough size allocated
	if (!binary || !size || *size < data_size)
		return -EINVAL;

	// otherwise, copy the content
	amdgpu_device_vram_access(adev, (uint64_t)data_offset, (uint32_t 
*)binary, data_size, false);
	*size = (uint64_t)data_size; // update the size as out param.
> +
> +		*size = (uint64_t)data_size;
> +
> +		break;
> +
> +	case AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID:
> +		data_name = "BIOS";

If this name is preferred for printing, probably return as 
amdgpu_virt_get_dyn_table_name(tbl_id) so that it can be used at other 
places as well.
> +		if (data_size > *size) {
> +			dev_err(adev->dev, "Invalid vbios size: 0x%x\n", data_size);
> +			return -EINVAL;
> +		}
> +
> +		amdgpu_device_vram_access(adev,
> +				(uint64_t)data_offset, (uint32_t *)binary, data_size, false);
> +
> +		*size = (uint64_t)data_size;
> +		break;
> +	}
> +
> +	dev_info(adev->dev,
> +		"Got %s info from dynamic crit_region_table at offset 0x%x with size of 0x%x bytes.\n",
> +		data_name, data_offset, data_size);

This could be kept as dev_dbg().

Thanks,
Lijo

> +
> +	return 0;
> +}
> +
>   void amdgpu_virt_init(struct amdgpu_device *adev)
>   {
>   	bool is_sriov = false;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> index 5c1dce9731e1..a3ae1ff40e84 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> @@ -440,6 +440,8 @@ void amdgpu_virt_fini_data_exchange(struct amdgpu_device *adev);
>   void amdgpu_virt_init(struct amdgpu_device *adev);
>   
>   int amdgpu_virt_init_critical_region(struct amdgpu_device *adev);
> +int amdgpu_virt_get_dynamic_data_info(struct amdgpu_device *adev,
> +	int data_id, uint8_t *binary, uint64_t *size);
>   
>   bool amdgpu_virt_can_access_debugfs(struct amdgpu_device *adev);
>   int amdgpu_virt_enable_access_debugfs(struct amdgpu_device *adev);


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v3 1/6] drm/amdgpu: Updated naming of SRIOV critical region offsets/sizes with _V1 suffix
  2025-10-14 20:13 [PATCH v3 1/6] drm/amdgpu: Updated naming of SRIOV critical region offsets/sizes with _V1 suffix Ellen Pan
                   ` (4 preceding siblings ...)
  2025-10-14 20:13 ` [PATCH v4 6/6] drm/amdgpu: Add logic for VF data exchange region " Ellen Pan
@ 2025-10-15 13:02 ` Alex Deucher
  5 siblings, 0 replies; 14+ messages in thread
From: Alex Deucher @ 2025-10-15 13:02 UTC (permalink / raw)
  To: Ellen Pan
  Cc: amd-gfx, Alexander.Deucher, Christian.Koenig, Lijo.Lazar,
	Jeffrey.Chan

On Tue, Oct 14, 2025 at 4:21 PM Ellen Pan <yunru.pan@amd.com> wrote:
>
>  - This change prepares the later patches to intro  _v2 suffix to SRIOV critical regions
>
> Signed-off-by: Ellen Pan <yunru.pan@amd.com>

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c    | 20 ++++----
>  drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h | 57 ++++++++++++++-------
>  2 files changed, 49 insertions(+), 28 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> index 3328ab63376b..e95adf0407a0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> @@ -686,7 +686,7 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev)
>                 /* got through this logic in early init stage to get necessary flags, e.g. rlcg_acc related*/
>                 adev->virt.fw_reserve.p_pf2vf =
>                         (struct amd_sriov_msg_pf2vf_info_header *)
> -                       (adev->bios + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10));
> +                       (adev->bios + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 << 10));
>
>                 amdgpu_virt_read_pf2vf_data(adev);
>         }
> @@ -703,21 +703,21 @@ void amdgpu_virt_exchange_data(struct amdgpu_device *adev)
>                 if (adev->mman.fw_vram_usage_va) {
>                         adev->virt.fw_reserve.p_pf2vf =
>                                 (struct amd_sriov_msg_pf2vf_info_header *)
> -                               (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10));
> +                               (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 << 10));
>                         adev->virt.fw_reserve.p_vf2pf =
>                                 (struct amd_sriov_msg_vf2pf_info_header *)
> -                               (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB << 10));
> +                               (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB_V1 << 10));
>                         adev->virt.fw_reserve.ras_telemetry =
> -                               (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB << 10));
> +                               (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB_V1 << 10));
>                 } else if (adev->mman.drv_vram_usage_va) {
>                         adev->virt.fw_reserve.p_pf2vf =
>                                 (struct amd_sriov_msg_pf2vf_info_header *)
> -                               (adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10));
> +                               (adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 << 10));
>                         adev->virt.fw_reserve.p_vf2pf =
>                                 (struct amd_sriov_msg_vf2pf_info_header *)
> -                               (adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB << 10));
> +                               (adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB_V1 << 10));
>                         adev->virt.fw_reserve.ras_telemetry =
> -                               (adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB << 10));
> +                               (adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB_V1 << 10));
>                 }
>
>                 amdgpu_virt_read_pf2vf_data(adev);
> @@ -1304,7 +1304,7 @@ static int amdgpu_virt_cache_host_error_counts(struct amdgpu_device *adev,
>         checksum = host_telemetry->header.checksum;
>         used_size = host_telemetry->header.used_size;
>
> -       if (used_size > (AMD_SRIOV_RAS_TELEMETRY_SIZE_KB << 10))
> +       if (used_size > (AMD_SRIOV_MSG_RAS_TELEMETRY_SIZE_KB_V1 << 10))
>                 return 0;
>
>         tmp = kmemdup(&host_telemetry->body.error_count, used_size, GFP_KERNEL);
> @@ -1383,7 +1383,7 @@ amdgpu_virt_write_cpers_to_ring(struct amdgpu_device *adev,
>         checksum = host_telemetry->header.checksum;
>         used_size = host_telemetry->header.used_size;
>
> -       if (used_size > (AMD_SRIOV_RAS_TELEMETRY_SIZE_KB << 10))
> +       if (used_size > (AMD_SRIOV_MSG_RAS_TELEMETRY_SIZE_KB_V1 << 10))
>                 return -EINVAL;
>
>         cper_dump = kmemdup(&host_telemetry->body.cper_dump, used_size, GFP_KERNEL);
> @@ -1515,7 +1515,7 @@ static int amdgpu_virt_cache_chk_criti_hit(struct amdgpu_device *adev,
>         checksum = host_telemetry->header.checksum;
>         used_size = host_telemetry->header.used_size;
>
> -       if (used_size > (AMD_SRIOV_RAS_TELEMETRY_SIZE_KB << 10))
> +       if (used_size > (AMD_SRIOV_MSG_RAS_TELEMETRY_SIZE_KB_V1 << 10))
>                 return 0;
>
>         tmp = kmemdup(&host_telemetry->body.chk_criti, used_size, GFP_KERNEL);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
> index 3a79ed7d8031..3b35154e2df6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
> @@ -23,26 +23,47 @@
>  #ifndef AMDGV_SRIOV_MSG__H_
>  #define AMDGV_SRIOV_MSG__H_
>
> -/* unit in kilobytes */
> -#define AMD_SRIOV_MSG_VBIOS_OFFSET          0
> -#define AMD_SRIOV_MSG_VBIOS_SIZE_KB         64
> -#define AMD_SRIOV_MSG_DATAEXCHANGE_OFFSET_KB AMD_SRIOV_MSG_VBIOS_SIZE_KB
> -#define AMD_SRIOV_MSG_DATAEXCHANGE_SIZE_KB   4
> -#define AMD_SRIOV_MSG_TMR_OFFSET_KB         2048
> -#define AMD_SRIOV_MSG_BAD_PAGE_SIZE_KB      2
> -#define AMD_SRIOV_RAS_TELEMETRY_SIZE_KB             64
>  /*
> - * layout
> + * layout v1
>   * 0           64KB        65KB        66KB           68KB                   132KB
>   * |   VBIOS   |   PF2VF   |   VF2PF   |   Bad Page   | RAS Telemetry Region | ...
>   * |   64KB    |   1KB     |   1KB     |   2KB        | 64KB                 | ...
>   */
>
> -#define AMD_SRIOV_MSG_SIZE_KB                   1
> -#define AMD_SRIOV_MSG_PF2VF_OFFSET_KB           AMD_SRIOV_MSG_DATAEXCHANGE_OFFSET_KB
> -#define AMD_SRIOV_MSG_VF2PF_OFFSET_KB           (AMD_SRIOV_MSG_PF2VF_OFFSET_KB + AMD_SRIOV_MSG_SIZE_KB)
> -#define AMD_SRIOV_MSG_BAD_PAGE_OFFSET_KB        (AMD_SRIOV_MSG_VF2PF_OFFSET_KB + AMD_SRIOV_MSG_SIZE_KB)
> -#define AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB   (AMD_SRIOV_MSG_BAD_PAGE_OFFSET_KB + AMD_SRIOV_MSG_BAD_PAGE_SIZE_KB)
> +/*
> + * layout v2 (offsets are dynamically allocated and the offsets below are examples)
> + * 0           1KB         64KB        65KB        66KB           68KB                   132KB
> + * |  INITD_H  |   VBIOS   |   PF2VF   |   VF2PF   |   Bad Page   | RAS Telemetry Region | ...
> + * |   1KB     |   64KB    |   1KB     |   1KB     |   2KB        | 64KB                 | ...
> + *
> + * Note: PF2VF + VF2PF + Bad Page = DataExchange region (allocated contiguously)
> + */
> +
> +/* v1 layout sizes */
> +#define AMD_SRIOV_MSG_VBIOS_SIZE_KB_V1                 64
> +#define AMD_SRIOV_MSG_PF2VF_SIZE_KB_V1                 1
> +#define AMD_SRIOV_MSG_VF2PF_SIZE_KB_V1                 1
> +#define AMD_SRIOV_MSG_BAD_PAGE_SIZE_KB_V1              2
> +#define AMD_SRIOV_MSG_RAS_TELEMETRY_SIZE_KB_V1         64
> +#define AMD_SRIOV_MSG_DATAEXCHANGE_SIZE_KB_V1          \
> +       (AMD_SRIOV_MSG_PF2VF_SIZE_KB_V1 + AMD_SRIOV_MSG_VF2PF_SIZE_KB_V1 + \
> +        AMD_SRIOV_MSG_BAD_PAGE_SIZE_KB_V1)
> +
> +/* v1 offsets */
> +#define AMD_SRIOV_MSG_VBIOS_OFFSET_V1                  0
> +#define AMD_SRIOV_MSG_DATAEXCHANGE_OFFSET_KB_V1                AMD_SRIOV_MSG_VBIOS_SIZE_KB_V1
> +#define AMD_SRIOV_MSG_TMR_OFFSET_KB                    2048
> +#define AMD_SRIOV_MSG_SIZE_KB_V1                       1
> +#define AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1               AMD_SRIOV_MSG_DATAEXCHANGE_OFFSET_KB_V1
> +#define AMD_SRIOV_MSG_VF2PF_OFFSET_KB_V1               \
> +       (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 + AMD_SRIOV_MSG_SIZE_KB_V1)
> +#define AMD_SRIOV_MSG_BAD_PAGE_OFFSET_KB_V1            \
> +       (AMD_SRIOV_MSG_VF2PF_OFFSET_KB_V1 + AMD_SRIOV_MSG_SIZE_KB_V1)
> +#define AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB_V1       \
> +       (AMD_SRIOV_MSG_BAD_PAGE_OFFSET_KB_V1 + AMD_SRIOV_MSG_BAD_PAGE_SIZE_KB_V1)
> +#define AMD_SRIOV_MSG_INIT_DATA_TOT_SIZE_KB_V1         \
> +       (AMD_SRIOV_MSG_VBIOS_SIZE_KB_V1 + AMD_SRIOV_MSG_DATAEXCHANGE_SIZE_KB_V1 + \
> +        AMD_SRIOV_MSG_RAS_TELEMETRY_SIZE_KB_V1)
>
>  /*
>   * PF2VF history log:
> @@ -436,12 +457,12 @@ unsigned int amd_sriov_msg_checksum(void *obj, unsigned long obj_size, unsigned
>  #define _stringification(s) #s
>
>  _Static_assert(
> -       sizeof(struct amd_sriov_msg_vf2pf_info) == AMD_SRIOV_MSG_SIZE_KB << 10,
> -       "amd_sriov_msg_vf2pf_info must be " stringification(AMD_SRIOV_MSG_SIZE_KB) " KB");
> +       sizeof(struct amd_sriov_msg_vf2pf_info) == AMD_SRIOV_MSG_SIZE_KB_V1 << 10,
> +       "amd_sriov_msg_vf2pf_info must be " stringification(AMD_SRIOV_MSG_SIZE_KB_V1) " KB");
>
>  _Static_assert(
> -       sizeof(struct amd_sriov_msg_pf2vf_info) == AMD_SRIOV_MSG_SIZE_KB << 10,
> -       "amd_sriov_msg_pf2vf_info must be " stringification(AMD_SRIOV_MSG_SIZE_KB) " KB");
> +       sizeof(struct amd_sriov_msg_pf2vf_info) == AMD_SRIOV_MSG_SIZE_KB_V1 << 10,
> +       "amd_sriov_msg_pf2vf_info must be " stringification(AMD_SRIOV_MSG_SIZE_KB_V1) " KB");
>
>  _Static_assert(AMD_SRIOV_MSG_RESERVE_UCODE % 4 == 0,
>                "AMD_SRIOV_MSG_RESERVE_UCODE must be multiple of 4");
> --
> 2.34.1
>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v4 2/6] drm/amdgpu: Add SRIOV crit_region_version support
  2025-10-14 20:13 ` [PATCH v4 2/6] drm/amdgpu: Add SRIOV crit_region_version support Ellen Pan
@ 2025-10-15 13:02   ` Alex Deucher
  0 siblings, 0 replies; 14+ messages in thread
From: Alex Deucher @ 2025-10-15 13:02 UTC (permalink / raw)
  To: Ellen Pan
  Cc: amd-gfx, Alexander.Deucher, Christian.Koenig, Lijo.Lazar,
	Jeffrey.Chan

On Tue, Oct 14, 2025 at 4:21 PM Ellen Pan <yunru.pan@amd.com> wrote:
>
> 1. Added enum amd_sriov_crit_region_version to support multi versions
> 2. Added logic in SRIOV mailbox to regonize crit_region version during
>    req_gpu_init_data
>
> Signed-off-by: Ellen Pan <yunru.pan@amd.com>

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c    |  5 +++--
>  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h    |  8 +++++++
>  drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h |  5 +++++
>  drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c       | 23 ++++++++++++++-------
>  4 files changed, 32 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> index e95adf0407a0..39ab7d00379b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> @@ -150,9 +150,10 @@ void amdgpu_virt_request_init_data(struct amdgpu_device *adev)
>                 virt->ops->req_init_data(adev);
>
>         if (adev->virt.req_init_data_ver > 0)
> -               DRM_INFO("host supports REQ_INIT_DATA handshake\n");
> +               dev_info(adev->dev, "host supports REQ_INIT_DATA handshake of critical_region_version %d\n",
> +                                adev->virt.req_init_data_ver);
>         else
> -               DRM_WARN("host doesn't support REQ_INIT_DATA handshake\n");
> +               dev_warn(adev->dev, "host doesn't support REQ_INIT_DATA handshake\n");
>  }
>
>  /**
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> index d1172c8e58c4..36247a160aa6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> @@ -262,6 +262,11 @@ struct amdgpu_virt_ras {
>
>  DECLARE_ATTR_CAP_CLASS(amdgpu_virt, AMDGPU_VIRT_CAPS_LIST);
>
> +struct amdgpu_virt_region {
> +       uint32_t offset;
> +       uint32_t size_kb;
> +};
> +
>  /* GPU virtualization */
>  struct amdgpu_virt {
>         uint32_t                        caps;
> @@ -289,6 +294,9 @@ struct amdgpu_virt {
>         bool ras_init_done;
>         uint32_t reg_access;
>
> +       /* dynamic(v2) critical regions */
> +       struct amdgpu_virt_region init_data_header;
> +
>         /* vf2pf message */
>         struct delayed_work vf2pf_work;
>         uint32_t vf2pf_update_interval_ms;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
> index 3b35154e2df6..b53caab5b706 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
> @@ -65,6 +65,11 @@
>         (AMD_SRIOV_MSG_VBIOS_SIZE_KB_V1 + AMD_SRIOV_MSG_DATAEXCHANGE_SIZE_KB_V1 + \
>          AMD_SRIOV_MSG_RAS_TELEMETRY_SIZE_KB_V1)
>
> +enum amd_sriov_crit_region_version {
> +       GPU_CRIT_REGION_V1 = 1,
> +       GPU_CRIT_REGION_V2 = 2,
> +};
> +
>  /*
>   * PF2VF history log:
>   * v1 defined in amdgim
> diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
> index e5282a5d05d9..cd5b2f07edb8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
> @@ -222,12 +222,20 @@ static int xgpu_nv_send_access_requests_with_param(struct amdgpu_device *adev,
>                                 adev->virt.req_init_data_ver = 0;
>                 } else {
>                         if (req == IDH_REQ_GPU_INIT_DATA) {
> -                               adev->virt.req_init_data_ver =
> -                                       RREG32_NO_KIQ(mmMAILBOX_MSGBUF_RCV_DW1);
> -
> -                               /* assume V1 in case host doesn't set version number */
> -                               if (adev->virt.req_init_data_ver < 1)
> -                                       adev->virt.req_init_data_ver = 1;
> +                               switch (RREG32_NO_KIQ(mmMAILBOX_MSGBUF_RCV_DW1)) {
> +                               case GPU_CRIT_REGION_V2:
> +                                       adev->virt.req_init_data_ver = GPU_CRIT_REGION_V2;
> +                                       adev->virt.init_data_header.offset =
> +                                               RREG32_NO_KIQ(mmMAILBOX_MSGBUF_RCV_DW2);
> +                                       adev->virt.init_data_header.size_kb =
> +                                               RREG32_NO_KIQ(mmMAILBOX_MSGBUF_RCV_DW3);
> +                                       break;
> +                               default:
> +                                       adev->virt.req_init_data_ver = GPU_CRIT_REGION_V1;
> +                                       adev->virt.init_data_header.offset = -1;
> +                                       adev->virt.init_data_header.size_kb = 0;
> +                                       break;
> +                               }
>                         }
>                 }
>
> @@ -285,7 +293,8 @@ static int xgpu_nv_release_full_gpu_access(struct amdgpu_device *adev,
>
>  static int xgpu_nv_request_init_data(struct amdgpu_device *adev)
>  {
> -       return xgpu_nv_send_access_requests(adev, IDH_REQ_GPU_INIT_DATA);
> +       return xgpu_nv_send_access_requests_with_param(adev, IDH_REQ_GPU_INIT_DATA,
> +                       0, GPU_CRIT_REGION_V2, 0);
>  }
>
>  static int xgpu_nv_mailbox_ack_irq(struct amdgpu_device *adev,
> --
> 2.34.1
>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v4 6/6] drm/amdgpu: Add logic for VF data exchange region to init from dynamic crit_region offsets
  2025-10-14 20:13 ` [PATCH v4 6/6] drm/amdgpu: Add logic for VF data exchange region " Ellen Pan
@ 2025-10-15 13:09   ` Alex Deucher
  0 siblings, 0 replies; 14+ messages in thread
From: Alex Deucher @ 2025-10-15 13:09 UTC (permalink / raw)
  To: Ellen Pan
  Cc: amd-gfx, Alexander.Deucher, Christian.Koenig, Lijo.Lazar,
	Jeffrey.Chan

On Tue, Oct 14, 2025 at 4:14 PM Ellen Pan <yunru.pan@amd.com> wrote:
>
> 1. Added VF logic to init data exchange region using the offsets from dynamic(v2) critical regions;
>
> Signed-off-by: Ellen Pan <yunru.pan@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 104 ++++++++++++++++++-----
>  1 file changed, 85 insertions(+), 19 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> index fef4ebb0f879..35cb716ec594 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> @@ -208,12 +208,12 @@ int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev)
>                                     &adev->virt.mm_table.gpu_addr,
>                                     (void *)&adev->virt.mm_table.cpu_addr);
>         if (r) {
> -               DRM_ERROR("failed to alloc mm table and error = %d.\n", r);
> +               dev_err(adev->dev, "failed to alloc mm table and error = %d.\n", r);
>                 return r;
>         }
>
>         memset((void *)adev->virt.mm_table.cpu_addr, 0, PAGE_SIZE);
> -       DRM_INFO("MM table gpu addr = 0x%llx, cpu addr = %p.\n",
> +       dev_info(adev->dev, "MM table gpu addr = 0x%llx, cpu addr = %p.\n",
>                  adev->virt.mm_table.gpu_addr,
>                  adev->virt.mm_table.cpu_addr);
>         return 0;
> @@ -393,7 +393,9 @@ static void amdgpu_virt_ras_reserve_bps(struct amdgpu_device *adev)
>                         if (amdgpu_bo_create_kernel_at(adev, bp << AMDGPU_GPU_PAGE_SHIFT,
>                                                         AMDGPU_GPU_PAGE_SIZE,
>                                                         &bo, NULL))
> -                               DRM_DEBUG("RAS WARN: reserve vram for retired page %llx fail\n", bp);
> +                               dev_dbg(adev->dev,
> +                                               "RAS WARN: reserve vram for retired page %llx fail\n",
> +                                               bp);
>                         data->bps_bo[i] = bo;
>                 }
>                 data->last_reserved = i + 1;
> @@ -661,10 +663,34 @@ static void amdgpu_virt_update_vf2pf_work_item(struct work_struct *work)
>         schedule_delayed_work(&(adev->virt.vf2pf_work), adev->virt.vf2pf_update_interval_ms);
>  }
>
> +static int amdgpu_virt_read_exchange_data_from_mem(struct amdgpu_device *adev, uint32_t *pfvf_data)
> +{
> +       uint32_t dataexchange_offset =
> +               adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].offset;
> +       uint32_t dataexchange_size =
> +               adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].size_kb << 10;
> +       uint64_t pos = 0;
> +
> +       dev_info(adev->dev,
> +                       "Got data exchange info from dynamic crit_region_table at offset 0x%x with size of 0x%x bytes.\n",
> +                       dataexchange_offset, dataexchange_size);
> +
> +       if (!IS_ALIGNED(dataexchange_offset, 4) || !IS_ALIGNED(dataexchange_size, 4)) {
> +               dev_err(adev->dev, "Data exchange data not aligned to 4 bytes\n");
> +               return -EINVAL;
> +       }
> +
> +       pos = (uint64_t)dataexchange_offset;
> +       amdgpu_device_vram_access(adev, pos, pfvf_data,
> +                       dataexchange_size, false);
> +
> +       return 0;
> +}
> +
>  void amdgpu_virt_fini_data_exchange(struct amdgpu_device *adev)
>  {
>         if (adev->virt.vf2pf_update_interval_ms != 0) {
> -               DRM_INFO("clean up the vf2pf work item\n");
> +               dev_info(adev->dev, "clean up the vf2pf work item\n");
>                 cancel_delayed_work_sync(&adev->virt.vf2pf_work);
>                 adev->virt.vf2pf_update_interval_ms = 0;
>         }
> @@ -672,13 +698,15 @@ void amdgpu_virt_fini_data_exchange(struct amdgpu_device *adev)
>
>  void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev)
>  {
> +       uint32_t *pfvf_data = NULL;
> +
>         adev->virt.fw_reserve.p_pf2vf = NULL;
>         adev->virt.fw_reserve.p_vf2pf = NULL;
>         adev->virt.vf2pf_update_interval_ms = 0;
>         adev->virt.vf2pf_update_retry_cnt = 0;
>
>         if (adev->mman.fw_vram_usage_va && adev->mman.drv_vram_usage_va) {
> -               DRM_WARN("Currently fw_vram and drv_vram should not have values at the same time!");
> +               dev_warn(adev->dev, "Currently fw_vram and drv_vram should not have values at the same time!");
>         } else if (adev->mman.fw_vram_usage_va || adev->mman.drv_vram_usage_va) {
>                 /* go through this logic in ip_init and reset to init workqueue*/
>                 amdgpu_virt_exchange_data(adev);
> @@ -687,11 +715,34 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev)
>                 schedule_delayed_work(&(adev->virt.vf2pf_work), msecs_to_jiffies(adev->virt.vf2pf_update_interval_ms));
>         } else if (adev->bios != NULL) {
>                 /* got through this logic in early init stage to get necessary flags, e.g. rlcg_acc related*/
> -               adev->virt.fw_reserve.p_pf2vf =
> -                       (struct amd_sriov_msg_pf2vf_info_header *)
> -                       (adev->bios + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 << 10));
> +               if (adev->virt.req_init_data_ver == GPU_CRIT_REGION_V2) {
> +                       pfvf_data =
> +                               kzalloc(adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].size_kb << 10,
> +                                       GFP_KERNEL);
> +                       if (!pfvf_data) {
> +                               dev_err(adev->dev, "Failed to allocate memory for pfvf_data\n");
> +                               return;
> +                       }
>
> -               amdgpu_virt_read_pf2vf_data(adev);
> +                       if (amdgpu_virt_read_exchange_data_from_mem(adev, pfvf_data))
> +                               goto free_pfvf_data;
> +
> +                       adev->virt.fw_reserve.p_pf2vf =
> +                               (struct amd_sriov_msg_pf2vf_info_header *)pfvf_data;
> +
> +                       amdgpu_virt_read_pf2vf_data(adev);
> +
> +free_pfvf_data:
> +                       kfree(pfvf_data);
> +                       pfvf_data = NULL;
> +                       adev->virt.fw_reserve.p_pf2vf = NULL;
> +               } else {
> +                       adev->virt.fw_reserve.p_pf2vf =
> +                               (struct amd_sriov_msg_pf2vf_info_header *)
> +                               (adev->bios + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 << 10));
> +
> +                       amdgpu_virt_read_pf2vf_data(adev);
> +               }
>         }
>  }
>
> @@ -704,14 +755,29 @@ void amdgpu_virt_exchange_data(struct amdgpu_device *adev)
>
>         if (adev->mman.fw_vram_usage_va || adev->mman.drv_vram_usage_va) {
>                 if (adev->mman.fw_vram_usage_va) {
> -                       adev->virt.fw_reserve.p_pf2vf =
> -                               (struct amd_sriov_msg_pf2vf_info_header *)
> -                               (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 << 10));
> -                       adev->virt.fw_reserve.p_vf2pf =
> -                               (struct amd_sriov_msg_vf2pf_info_header *)
> -                               (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB_V1 << 10));
> -                       adev->virt.fw_reserve.ras_telemetry =
> -                               (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB_V1 << 10));
> +                       if (adev->virt.req_init_data_ver == GPU_CRIT_REGION_V2) {
> +                               adev->virt.fw_reserve.p_pf2vf =
> +                                       (struct amd_sriov_msg_pf2vf_info_header *)
> +                                       (adev->mman.fw_vram_usage_va +
> +                                       adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].offset);
> +                               adev->virt.fw_reserve.p_vf2pf =
> +                                       (struct amd_sriov_msg_vf2pf_info_header *)
> +                                       (adev->mman.fw_vram_usage_va +
> +                                       adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].offset +
> +                                       (AMD_SRIOV_MSG_SIZE_KB_V1 << 10));

AMD_SRIOV_MSG_SIZE_KB_V1?  Is this common for both V1 and V2?  Other
than that, this patch looks good to me.

Alex

> +                               adev->virt.fw_reserve.ras_telemetry =
> +                                       (adev->mman.fw_vram_usage_va +
> +                                       adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID].offset);
> +                       } else {
> +                               adev->virt.fw_reserve.p_pf2vf =
> +                                       (struct amd_sriov_msg_pf2vf_info_header *)
> +                                       (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 << 10));
> +                               adev->virt.fw_reserve.p_vf2pf =
> +                                       (struct amd_sriov_msg_vf2pf_info_header *)
> +                                       (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB_V1 << 10));
> +                               adev->virt.fw_reserve.ras_telemetry =
> +                                       (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB_V1 << 10));
> +                       }
>                 } else if (adev->mman.drv_vram_usage_va) {
>                         adev->virt.fw_reserve.p_pf2vf =
>                                 (struct amd_sriov_msg_pf2vf_info_header *)
> @@ -819,7 +885,7 @@ static bool amdgpu_virt_init_req_data(struct amdgpu_device *adev, u32 reg)
>                         break;
>                 default: /* other chip doesn't support SRIOV */
>                         is_sriov = false;
> -                       DRM_ERROR("Unknown asic type: %d!\n", adev->asic_type);
> +                       dev_err(adev->dev, "Unknown asic type: %d!\n", adev->asic_type);
>                         break;
>                 }
>         }
> @@ -1468,7 +1534,7 @@ amdgpu_ras_block_to_sriov(struct amdgpu_device *adev, enum amdgpu_ras_block bloc
>         case AMDGPU_RAS_BLOCK__MPIO:
>                 return RAS_TELEMETRY_GPU_BLOCK_MPIO;
>         default:
> -               DRM_WARN_ONCE("Unsupported SRIOV RAS telemetry block 0x%x\n",
> +               dev_warn(adev->dev, "Unsupported SRIOV RAS telemetry block 0x%x\n",
>                               block);
>                 return RAS_TELEMETRY_GPU_BLOCK_COUNT;
>         }
> --
> 2.34.1
>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v4 4/6] drm/amdgpu: Reuse fw_vram_usage_* for dynamic critical region in SRIOV
  2025-10-14 20:13 ` [PATCH v4 4/6] drm/amdgpu: Reuse fw_vram_usage_* for dynamic critical region in SRIOV Ellen Pan
@ 2025-10-15 13:12   ` Alex Deucher
  2025-10-15 14:48     ` Alex Deucher
  0 siblings, 1 reply; 14+ messages in thread
From: Alex Deucher @ 2025-10-15 13:12 UTC (permalink / raw)
  To: Ellen Pan
  Cc: amd-gfx, Alexander.Deucher, Christian.Koenig, Lijo.Lazar,
	Jeffrey.Chan

On Tue, Oct 14, 2025 at 4:21 PM Ellen Pan <yunru.pan@amd.com> wrote:
>
> - During guest driver init, asa VFs receive PF msg to
>         init dynamic critical region(v2), VFs reuse fw_vram_usage_*
>          from ttm to store critical region tables in a 5MB chunk.
>
> Signed-off-by: Ellen Pan <yunru.pan@amd.com>
> ---
>  .../gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c  | 29 ++++++++++---------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c       | 12 ++++----
>  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c      |  9 ++++++
>  3 files changed, 31 insertions(+), 19 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
> index c7d32fb216e4..636385c80f64 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
> @@ -181,19 +181,22 @@ int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev)
>         u8 frev, crev;
>         int usage_bytes = 0;
>
> -       if (amdgpu_atom_parse_data_header(ctx, index, NULL, &frev, &crev, &data_offset)) {
> -               if (frev == 2 && crev == 1) {
> -                       fw_usage_v2_1 =
> -                               (struct vram_usagebyfirmware_v2_1 *)(ctx->bios + data_offset);
> -                       amdgpu_atomfirmware_allocate_fb_v2_1(adev,
> -                                       fw_usage_v2_1,
> -                                       &usage_bytes);
> -               } else if (frev >= 2 && crev >= 2) {
> -                       fw_usage_v2_2 =
> -                               (struct vram_usagebyfirmware_v2_2 *)(ctx->bios + data_offset);
> -                       amdgpu_atomfirmware_allocate_fb_v2_2(adev,
> -                                       fw_usage_v2_2,
> -                                       &usage_bytes);
> +       /* Skip atomfirmware allocation for SRIOV VFs when dynamic crit regn is enabled */
> +       if (!(amdgpu_sriov_vf(adev) && adev->virt.is_dynamic_crit_regn_enabled)) {
> +               if (amdgpu_atom_parse_data_header(ctx, index, NULL, &frev, &crev, &data_offset)) {

Do you need this check here?  Is vram_usagebyfirmware valid on V2
systems?  If that table if not present on V2 systems, then you don't
need to change anything here since amdgpu_atom_parse_data_header()
will return an error.  Other than that, looks good to me.

Alex

> +                       if (frev == 2 && crev == 1) {
> +                               fw_usage_v2_1 =
> +                                       (struct vram_usagebyfirmware_v2_1 *)(ctx->bios + data_offset);
> +                               amdgpu_atomfirmware_allocate_fb_v2_1(adev,
> +                                               fw_usage_v2_1,
> +                                               &usage_bytes);
> +                       } else if (frev >= 2 && crev >= 2) {
> +                               fw_usage_v2_2 =
> +                                       (struct vram_usagebyfirmware_v2_2 *)(ctx->bios + data_offset);
> +                               amdgpu_atomfirmware_allocate_fb_v2_2(adev,
> +                                               fw_usage_v2_2,
> +                                               &usage_bytes);
> +                       }
>                 }
>         }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 96bd0185f936..b5148a33b6f5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -1944,19 +1944,19 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
>                 return r;
>
>         /*
> -        *The reserved vram for driver must be pinned to the specified
> -        *place on the VRAM, so reserve it early.
> +        * The reserved VRAM for the driver must be pinned to a specific
> +        * location in VRAM, so reserve it early.
>          */
>         r = amdgpu_ttm_drv_reserve_vram_init(adev);
>         if (r)
>                 return r;
>
>         /*
> -        * only NAVI10 and onwards ASIC support for IP discovery.
> -        * If IP discovery enabled, a block of memory should be
> -        * reserved for IP discovey.
> +        * only NAVI10 and later ASICs support IP discovery.
> +        * If IP discovery is enabled, a block of memory should be
> +        * reserved for it.
>          */
> -       if (adev->mman.discovery_bin) {
> +       if (adev->mman.discovery_bin && !adev->virt.is_dynamic_crit_regn_enabled) {
>                 r = amdgpu_ttm_reserve_tmr(adev);
>                 if (r)
>                         return r;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> index 27235f3f3b81..820dab538164 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> @@ -940,6 +940,15 @@ int amdgpu_virt_init_critical_region(struct amdgpu_device *adev)
>         adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID].size_kb =
>                 init_data_hdr->bad_page_size_in_kb;
>
> +       /* reserved memory starts from crit region base offset with the size of 5MB */
> +       adev->mman.fw_vram_usage_start_offset = adev->virt.crit_regn.offset;
> +       adev->mman.fw_vram_usage_size = adev->virt.crit_regn.size_kb << 10;
> +       dev_info(adev->dev,
> +               "critical region v%d requested to reserve memory start at %08x with %d KB.\n",
> +                       init_data_hdr->version,
> +                       adev->mman.fw_vram_usage_start_offset,
> +                       adev->mman.fw_vram_usage_size >> 10);
> +
>         adev->virt.is_dynamic_crit_regn_enabled = true;
>
>  out:
> --
> 2.34.1
>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v4 5/6] drm/amdgpu: Add logic for VF ipd and VF bios to init from dynamic crit_region offsets
  2025-10-14 20:13 ` [PATCH v4 5/6] drm/amdgpu: Add logic for VF ipd and VF bios to init from dynamic crit_region offsets Ellen Pan
  2025-10-15  9:37   ` Lazar, Lijo
@ 2025-10-15 13:23   ` Alex Deucher
  1 sibling, 0 replies; 14+ messages in thread
From: Alex Deucher @ 2025-10-15 13:23 UTC (permalink / raw)
  To: Ellen Pan
  Cc: amd-gfx, Alexander.Deucher, Christian.Koenig, Lijo.Lazar,
	Jeffrey.Chan

On Tue, Oct 14, 2025 at 4:14 PM Ellen Pan <yunru.pan@amd.com> wrote:
>
> 1. Added VF logic in amdgpu_virt to init IP discovery using the offsets from dynamic(v2) critical regions;
> 2. Added VF logic in amdgpu_virt to init bios image using the offsets from dynamic(v2) critical regions;
>
> Signed-off-by: Ellen Pan <yunru.pan@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c      | 36 ++++++++---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 23 +++++--
>  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c      | 63 +++++++++++++++++++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h      |  2 +
>  4 files changed, 111 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
> index 00e96419fcda..5960ab1be4d8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
> @@ -96,11 +96,12 @@ void amdgpu_bios_release(struct amdgpu_device *adev)
>   * part of the system bios.  On boot, the system bios puts a
>   * copy of the igp rom at the start of vram if a discrete card is
>   * present.
> - * For SR-IOV, the vbios image is also put in VRAM in the VF.
> + * For SR-IOV, if dynamic critical region is not enabled,
> + * the vbios image is also put at the start of VRAM in the VF.
>   */
>  static bool amdgpu_read_bios_from_vram(struct amdgpu_device *adev)
>  {
> -       uint8_t __iomem *bios;
> +       uint8_t __iomem *bios = NULL;
>         resource_size_t vram_base;
>         resource_size_t size = 256 * 1024; /* ??? */
>
> @@ -114,18 +115,35 @@ static bool amdgpu_read_bios_from_vram(struct amdgpu_device *adev)
>
>         adev->bios = NULL;
>         vram_base = pci_resource_start(adev->pdev, 0);
> -       bios = ioremap_wc(vram_base, size);
> -       if (!bios)
> -               return false;
> +
> +       /* For SR-IOV, if dynamic critical region is enabled,
> +       * the vbios image is put at a dynamic offset of VRAM in the VF.
> +       * If dynamic critical region is disabled, follow the same seq as on baremetal.
> +       */
> +       if (!(amdgpu_sriov_vf(adev) && adev->virt.is_dynamic_crit_regn_enabled)) {
> +               bios = ioremap_wc(vram_base, size);
> +               if (!bios)
> +                               return false;
> +       }
>
>         adev->bios = kmalloc(size, GFP_KERNEL);
>         if (!adev->bios) {
> -               iounmap(bios);
> -               return false;
> +                       if (bios)
> +                               iounmap(bios);
> +                       return false;
>         }
> +
> +       if (amdgpu_sriov_vf(adev) && adev->virt.is_dynamic_crit_regn_enabled) {
> +               if (amdgpu_virt_get_dynamic_data_info(adev,
> +                                       AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID, adev->bios, &size))
> +                       return false;
> +       }
> +
>         adev->bios_size = size;
> -       memcpy_fromio(adev->bios, bios, size);
> -       iounmap(bios);
> +       if (bios) {
> +               memcpy_fromio(adev->bios, bios, size);
> +               iounmap(bios);
> +       }

I think it would be cleaner to just have a single conditional in this
function.  E.g.,

if (amdgpu_sriov_vf(adev) && adev->virt.is_dynamic_crit_regn_enabled) {
 /* handle v2 vbios fetching */
} else {
 /* existing logic */
}

if (!check_atom_bios(adev, size)) {
        amdgpu_bios_release(adev);
            return false;
}

...

>
>         if (!check_atom_bios(adev, size)) {
>                 amdgpu_bios_release(adev);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
> index 73401f0aeb34..e035dba96790 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
> @@ -283,7 +283,7 @@ static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev,
>                  * wait for this to complete.  Once the C2PMSG is updated, we can
>                  * continue.
>                  */
> -
> +

accidental whitespace change.

Alex

>                 for (i = 0; i < 2000; i++) {
>                         msg = RREG32(mmMP0_SMN_C2PMSG_33);
>                         if (msg & 0x80000000)
> @@ -299,13 +299,28 @@ static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev,
>                 vram_size <<= 20;
>
>         if (sz_valid) {
> -               uint64_t pos = vram_size - DISCOVERY_TMR_OFFSET;
> -               amdgpu_device_vram_access(adev, pos, (uint32_t *)binary,
> -                                         adev->mman.discovery_tmr_size, false);
> +               if (amdgpu_sriov_vf(adev) && adev->virt.is_dynamic_crit_regn_enabled) {
> +                       /* For SRIOV VFs with dynamic critical region enabled,
> +                        * we will get the IPD binary via below call.
> +                        * If dynamic critical is disabled, fall through to normal seq.
> +                        */
> +                       valid_size = vram_size;
> +                       if (amdgpu_virt_get_dynamic_data_info(adev,
> +                                               AMD_SRIOV_MSG_IPD_TABLE_ID, binary, &valid_size)) {
> +                               ret = -EINVAL;
> +                               goto exit;
> +                       }
> +               } else {
> +                       uint64_t pos = vram_size - DISCOVERY_TMR_OFFSET;
> +
> +                       amdgpu_device_vram_access(adev, pos, (uint32_t *)binary,
> +                                       adev->mman.discovery_tmr_size, false);
> +               }
>         } else {
>                 ret = amdgpu_discovery_read_binary_from_sysmem(adev, binary);
>         }
>
> +exit:
>         if (ret)
>                 dev_err(adev->dev,
>                         "failed to read discovery info from memory, vram size read: %llx",
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> index 820dab538164..fef4ebb0f879 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> @@ -940,6 +940,14 @@ int amdgpu_virt_init_critical_region(struct amdgpu_device *adev)
>         adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID].size_kb =
>                 init_data_hdr->bad_page_size_in_kb;
>
> +       /* Validation for critical region info */
> +        if (adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_IPD_TABLE_ID].size_kb > DISCOVERY_TMR_SIZE) {
> +               dev_err(adev->dev, "Invalid IP discovery size: 0x%x\n",
> +                               adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_IPD_TABLE_ID].size_kb);
> +               r = -EINVAL;
> +               goto out;
> +       }
> +
>         /* reserved memory starts from crit region base offset with the size of 5MB */
>         adev->mman.fw_vram_usage_start_offset = adev->virt.crit_regn.offset;
>         adev->mman.fw_vram_usage_size = adev->virt.crit_regn.size_kb << 10;
> @@ -958,6 +966,61 @@ int amdgpu_virt_init_critical_region(struct amdgpu_device *adev)
>         return r;
>  }
>
> +int amdgpu_virt_get_dynamic_data_info(struct amdgpu_device *adev,
> +       int data_id, uint8_t *binary, uint64_t *size)
> +{
> +       uint32_t data_offset = 0;
> +       uint32_t data_size = 0;
> +       enum amd_sriov_msg_table_id_enum data_table_id = data_id;
> +       char *data_name;
> +
> +       if (data_table_id >= AMD_SRIOV_MSG_MAX_TABLE_ID)
> +               return -EINVAL;
> +
> +       data_offset = adev->virt.crit_regn_tbl[data_table_id].offset;
> +       data_size = adev->virt.crit_regn_tbl[data_table_id].size_kb << 10;
> +
> +       switch (data_id) {
> +       case AMD_SRIOV_MSG_IPD_TABLE_ID:
> +               data_name = "IPD";
> +               if (!IS_ALIGNED(data_offset, 4) || !IS_ALIGNED(data_size, 4)) {
> +                       dev_err(adev->dev, "IP discovery data not aligned to 4 bytes\n");
> +                       return -EINVAL;
> +               }
> +
> +               amdgpu_device_vram_access(adev,
> +                               (uint64_t)data_offset, (uint32_t *)binary, data_size, false);
> +               if (!binary)
> +                       return -EINVAL;
> +
> +               if (((uint64_t)data_offset + (uint64_t)data_size) > *size)
> +                       return -EINVAL;
> +
> +               *size = (uint64_t)data_size;
> +
> +               break;
> +
> +       case AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID:
> +               data_name = "BIOS";
> +               if (data_size > *size) {
> +                       dev_err(adev->dev, "Invalid vbios size: 0x%x\n", data_size);
> +                       return -EINVAL;
> +               }
> +
> +               amdgpu_device_vram_access(adev,
> +                               (uint64_t)data_offset, (uint32_t *)binary, data_size, false);
> +
> +               *size = (uint64_t)data_size;
> +               break;
> +       }
> +
> +       dev_info(adev->dev,
> +               "Got %s info from dynamic crit_region_table at offset 0x%x with size of 0x%x bytes.\n",
> +               data_name, data_offset, data_size);
> +
> +       return 0;
> +}
> +
>  void amdgpu_virt_init(struct amdgpu_device *adev)
>  {
>         bool is_sriov = false;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> index 5c1dce9731e1..a3ae1ff40e84 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> @@ -440,6 +440,8 @@ void amdgpu_virt_fini_data_exchange(struct amdgpu_device *adev);
>  void amdgpu_virt_init(struct amdgpu_device *adev);
>
>  int amdgpu_virt_init_critical_region(struct amdgpu_device *adev);
> +int amdgpu_virt_get_dynamic_data_info(struct amdgpu_device *adev,
> +       int data_id, uint8_t *binary, uint64_t *size);
>
>  bool amdgpu_virt_can_access_debugfs(struct amdgpu_device *adev);
>  int amdgpu_virt_enable_access_debugfs(struct amdgpu_device *adev);
> --
> 2.34.1
>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v4 4/6] drm/amdgpu: Reuse fw_vram_usage_* for dynamic critical region in SRIOV
  2025-10-15 13:12   ` Alex Deucher
@ 2025-10-15 14:48     ` Alex Deucher
  0 siblings, 0 replies; 14+ messages in thread
From: Alex Deucher @ 2025-10-15 14:48 UTC (permalink / raw)
  To: Ellen Pan
  Cc: amd-gfx, Alexander.Deucher, Christian.Koenig, Lijo.Lazar,
	Jeffrey.Chan

On Wed, Oct 15, 2025 at 9:12 AM Alex Deucher <alexdeucher@gmail.com> wrote:
>
> On Tue, Oct 14, 2025 at 4:21 PM Ellen Pan <yunru.pan@amd.com> wrote:
> >
> > - During guest driver init, asa VFs receive PF msg to
> >         init dynamic critical region(v2), VFs reuse fw_vram_usage_*
> >          from ttm to store critical region tables in a 5MB chunk.
> >
> > Signed-off-by: Ellen Pan <yunru.pan@amd.com>
> > ---
> >  .../gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c  | 29 ++++++++++---------
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c       | 12 ++++----
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c      |  9 ++++++
> >  3 files changed, 31 insertions(+), 19 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
> > index c7d32fb216e4..636385c80f64 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
> > @@ -181,19 +181,22 @@ int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev)
> >         u8 frev, crev;
> >         int usage_bytes = 0;
> >
> > -       if (amdgpu_atom_parse_data_header(ctx, index, NULL, &frev, &crev, &data_offset)) {
> > -               if (frev == 2 && crev == 1) {
> > -                       fw_usage_v2_1 =
> > -                               (struct vram_usagebyfirmware_v2_1 *)(ctx->bios + data_offset);
> > -                       amdgpu_atomfirmware_allocate_fb_v2_1(adev,
> > -                                       fw_usage_v2_1,
> > -                                       &usage_bytes);
> > -               } else if (frev >= 2 && crev >= 2) {
> > -                       fw_usage_v2_2 =
> > -                               (struct vram_usagebyfirmware_v2_2 *)(ctx->bios + data_offset);
> > -                       amdgpu_atomfirmware_allocate_fb_v2_2(adev,
> > -                                       fw_usage_v2_2,
> > -                                       &usage_bytes);
> > +       /* Skip atomfirmware allocation for SRIOV VFs when dynamic crit regn is enabled */
> > +       if (!(amdgpu_sriov_vf(adev) && adev->virt.is_dynamic_crit_regn_enabled)) {
> > +               if (amdgpu_atom_parse_data_header(ctx, index, NULL, &frev, &crev, &data_offset)) {
>
> Do you need this check here?  Is vram_usagebyfirmware valid on V2
> systems?  If that table if not present on V2 systems, then you don't
> need to change anything here since amdgpu_atom_parse_data_header()
> will return an error.  Other than that, looks good to me.

Sorry, I missed your previous reply.  This patch is:
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>

>
> Alex
>
> > +                       if (frev == 2 && crev == 1) {
> > +                               fw_usage_v2_1 =
> > +                                       (struct vram_usagebyfirmware_v2_1 *)(ctx->bios + data_offset);
> > +                               amdgpu_atomfirmware_allocate_fb_v2_1(adev,
> > +                                               fw_usage_v2_1,
> > +                                               &usage_bytes);
> > +                       } else if (frev >= 2 && crev >= 2) {
> > +                               fw_usage_v2_2 =
> > +                                       (struct vram_usagebyfirmware_v2_2 *)(ctx->bios + data_offset);
> > +                               amdgpu_atomfirmware_allocate_fb_v2_2(adev,
> > +                                               fw_usage_v2_2,
> > +                                               &usage_bytes);
> > +                       }
> >                 }
> >         }
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> > index 96bd0185f936..b5148a33b6f5 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> > @@ -1944,19 +1944,19 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
> >                 return r;
> >
> >         /*
> > -        *The reserved vram for driver must be pinned to the specified
> > -        *place on the VRAM, so reserve it early.
> > +        * The reserved VRAM for the driver must be pinned to a specific
> > +        * location in VRAM, so reserve it early.
> >          */
> >         r = amdgpu_ttm_drv_reserve_vram_init(adev);
> >         if (r)
> >                 return r;
> >
> >         /*
> > -        * only NAVI10 and onwards ASIC support for IP discovery.
> > -        * If IP discovery enabled, a block of memory should be
> > -        * reserved for IP discovey.
> > +        * only NAVI10 and later ASICs support IP discovery.
> > +        * If IP discovery is enabled, a block of memory should be
> > +        * reserved for it.
> >          */
> > -       if (adev->mman.discovery_bin) {
> > +       if (adev->mman.discovery_bin && !adev->virt.is_dynamic_crit_regn_enabled) {
> >                 r = amdgpu_ttm_reserve_tmr(adev);
> >                 if (r)
> >                         return r;
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> > index 27235f3f3b81..820dab538164 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> > @@ -940,6 +940,15 @@ int amdgpu_virt_init_critical_region(struct amdgpu_device *adev)
> >         adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID].size_kb =
> >                 init_data_hdr->bad_page_size_in_kb;
> >
> > +       /* reserved memory starts from crit region base offset with the size of 5MB */
> > +       adev->mman.fw_vram_usage_start_offset = adev->virt.crit_regn.offset;
> > +       adev->mman.fw_vram_usage_size = adev->virt.crit_regn.size_kb << 10;
> > +       dev_info(adev->dev,
> > +               "critical region v%d requested to reserve memory start at %08x with %d KB.\n",
> > +                       init_data_hdr->version,
> > +                       adev->mman.fw_vram_usage_start_offset,
> > +                       adev->mman.fw_vram_usage_size >> 10);
> > +
> >         adev->virt.is_dynamic_crit_regn_enabled = true;
> >
> >  out:
> > --
> > 2.34.1
> >

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2025-10-15 14:48 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-10-14 20:13 [PATCH v3 1/6] drm/amdgpu: Updated naming of SRIOV critical region offsets/sizes with _V1 suffix Ellen Pan
2025-10-14 20:13 ` [PATCH v4 2/6] drm/amdgpu: Add SRIOV crit_region_version support Ellen Pan
2025-10-15 13:02   ` Alex Deucher
2025-10-14 20:13 ` [PATCH v4 3/6] drm/amdgpu: Introduce SRIOV critical regions v2 during VF init Ellen Pan
2025-10-15  9:09   ` Lazar, Lijo
2025-10-14 20:13 ` [PATCH v4 4/6] drm/amdgpu: Reuse fw_vram_usage_* for dynamic critical region in SRIOV Ellen Pan
2025-10-15 13:12   ` Alex Deucher
2025-10-15 14:48     ` Alex Deucher
2025-10-14 20:13 ` [PATCH v4 5/6] drm/amdgpu: Add logic for VF ipd and VF bios to init from dynamic crit_region offsets Ellen Pan
2025-10-15  9:37   ` Lazar, Lijo
2025-10-15 13:23   ` Alex Deucher
2025-10-14 20:13 ` [PATCH v4 6/6] drm/amdgpu: Add logic for VF data exchange region " Ellen Pan
2025-10-15 13:09   ` Alex Deucher
2025-10-15 13:02 ` [PATCH v3 1/6] drm/amdgpu: Updated naming of SRIOV critical region offsets/sizes with _V1 suffix Alex Deucher

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox