AMD-GFX Archive on lore.kernel.org
 help / color / mirror / Atom feed
From: <Roman.Li@amd.com>
To: <amd-gfx@lists.freedesktop.org>
Cc: Daniel Wheeler <daniel.wheeler@amd.com>, <Harry.Wentland@amd.com>,
	<Sunpeng.Li@amd.com>, <Rodrigo.Siqueira@amd.com>,
	<Aurabindo.Pillai@amd.com>,  <roman.li@amd.com>,
	<wayne.lin@amd.com>, <solomon.chiu@amd.com>,
	<agustin.gutierrez@amd.com>, <hamza.mahfooz@amd.com>,
	Alvin Lee <alvin.lee2@amd.com>, Josip Pavic <josip.pavic@amd.com>
Subject: [PATCH 21/43] drm/amd/display: Add extra DMUB logging to track message timeout
Date: Thu, 28 Mar 2024 15:50:25 -0400	[thread overview]
Message-ID: <20240328195047.2843715-22-Roman.Li@amd.com> (raw)
In-Reply-To: <20240328195047.2843715-1-Roman.Li@amd.com>

From: Alvin Lee <alvin.lee2@amd.com>

[Description]
- Add logging for first DMUB inbox message that timed out to diagnostic
  data
- It is useful to track the first failed message for debug purposes
  because once DMUB becomes hung (typically on a message), it will
  remain hung and all subsequent messages. In these cases we're
  interested in knowing which is the first message that failed.

Reviewed-by: Josip Pavic <josip.pavic@amd.com>
Acked-by: Roman Li <roman.li@amd.com>
Signed-off-by: Alvin Lee <alvin.lee2@amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
---
 drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c    | 10 +++++++++-
 drivers/gpu/drm/amd/display/dmub/dmub_srv.h     | 17 ++++++++++++++++-
 .../gpu/drm/amd/display/dmub/src/dmub_dcn20.c   |  1 +
 .../gpu/drm/amd/display/dmub/src/dmub_dcn31.c   |  1 +
 .../gpu/drm/amd/display/dmub/src/dmub_dcn32.c   |  2 ++
 .../gpu/drm/amd/display/dmub/src/dmub_dcn35.c   |  1 +
 6 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
index 9ae0e602e737..34d199b08dd9 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
+++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
@@ -23,6 +23,7 @@
  *
  */
 
+#include "dm_services.h"
 #include "dc.h"
 #include "dc_dmub_srv.h"
 #include "../dmub/dmub_srv.h"
@@ -198,6 +199,11 @@ bool dc_dmub_srv_wait_for_idle(struct dc_dmub_srv *dc_dmub_srv,
 
 		if (status != DMUB_STATUS_OK) {
 			DC_LOG_DEBUG("No reply for DMUB command: status=%d\n", status);
+			if (!dmub->debug.timeout_occured) {
+				dmub->debug.timeout_occured = true;
+				dmub->debug.timeout_cmd = *cmd_list;
+				dmub->debug.timestamp = dm_get_timestamp(dc_dmub_srv->ctx);
+			}
 			dc_dmub_srv_log_diagnostic_data(dc_dmub_srv);
 			return false;
 		}
@@ -904,6 +910,7 @@ bool dc_dmub_srv_get_diagnostic_data(struct dc_dmub_srv *dc_dmub_srv, struct dmu
 void dc_dmub_srv_log_diagnostic_data(struct dc_dmub_srv *dc_dmub_srv)
 {
 	struct dmub_diagnostic_data diag_data = {0};
+	uint32_t i;
 
 	if (!dc_dmub_srv || !dc_dmub_srv->dmub) {
 		DC_LOG_ERROR("%s: invalid parameters.", __func__);
@@ -935,7 +942,8 @@ void dc_dmub_srv_log_diagnostic_data(struct dc_dmub_srv *dc_dmub_srv)
 	DC_LOG_DEBUG("    scratch [13]       : %08x", diag_data.scratch[13]);
 	DC_LOG_DEBUG("    scratch [14]       : %08x", diag_data.scratch[14]);
 	DC_LOG_DEBUG("    scratch [15]       : %08x", diag_data.scratch[15]);
-	DC_LOG_DEBUG("    pc                 : %08x", diag_data.pc);
+	for (i = 0; i < DMUB_PC_SNAPSHOT_COUNT; i++)
+		DC_LOG_DEBUG("    pc[%d]             : %08x", i, diag_data.pc[i]);
 	DC_LOG_DEBUG("    unk_fault_addr     : %08x", diag_data.undefined_address_fault_addr);
 	DC_LOG_DEBUG("    inst_fault_addr    : %08x", diag_data.inst_fetch_fault_addr);
 	DC_LOG_DEBUG("    data_fault_addr    : %08x", diag_data.data_write_fault_addr);
diff --git a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h
index 7785908a6676..662bdb0e5d3d 100644
--- a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h
+++ b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h
@@ -71,6 +71,8 @@
 extern "C" {
 #endif
 
+#define DMUB_PC_SNAPSHOT_COUNT 10
+
 /* Forward declarations */
 struct dmub_srv;
 struct dmub_srv_common_regs;
@@ -299,6 +301,17 @@ struct dmub_srv_hw_params {
 	enum dmub_ips_disable_type disable_ips;
 };
 
+/**
+ * struct dmub_srv_debug - Debug info for dmub_srv
+ * @timeout_occured: Indicates a timeout occured on any message from driver to dmub
+ * @timeout_cmd: first cmd sent from driver that timed out - subsequent timeouts are not stored
+ */
+struct dmub_srv_debug {
+	bool timeout_occured;
+	union dmub_rb_cmd timeout_cmd;
+	unsigned long long timestamp;
+};
+
 /**
  * struct dmub_diagnostic_data - Diagnostic data retrieved from DMCUB for
  * debugging purposes, including logging, crash analysis, etc.
@@ -306,7 +319,7 @@ struct dmub_srv_hw_params {
 struct dmub_diagnostic_data {
 	uint32_t dmcub_version;
 	uint32_t scratch[17];
-	uint32_t pc;
+	uint32_t pc[DMUB_PC_SNAPSHOT_COUNT];
 	uint32_t undefined_address_fault_addr;
 	uint32_t inst_fetch_fault_addr;
 	uint32_t data_write_fault_addr;
@@ -317,6 +330,7 @@ struct dmub_diagnostic_data {
 	uint32_t inbox0_wptr;
 	uint32_t inbox0_size;
 	uint32_t gpint_datain0;
+	struct dmub_srv_debug timeout_info;
 	uint8_t is_dmcub_enabled : 1;
 	uint8_t is_dmcub_soft_reset : 1;
 	uint8_t is_dmcub_secure_reset : 1;
@@ -506,6 +520,7 @@ struct dmub_srv {
 	struct dmub_visual_confirm_color visual_confirm_color;
 
 	enum dmub_srv_power_state_type power_state;
+	struct dmub_srv_debug debug;
 };
 
 /**
diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c
index cae96fba6349..e500ca9ae09c 100644
--- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c
+++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c
@@ -472,4 +472,5 @@ void dmub_dcn20_get_diagnostic_data(struct dmub_srv *dmub, struct dmub_diagnosti
 
 	REG_GET(DMCUB_REGION3_CW6_TOP_ADDRESS, DMCUB_REGION3_CW6_ENABLE, &is_cw6_enabled);
 	diag_data->is_cw6_enabled = is_cw6_enabled;
+	diag_data->timeout_info = dmub->debug;
 }
diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c
index 2bcf5fb87dd9..662c34e9495c 100644
--- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c
+++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c
@@ -466,6 +466,7 @@ void dmub_dcn31_get_diagnostic_data(struct dmub_srv *dmub, struct dmub_diagnosti
 
 	REG_GET(DMCUB_REGION3_CW6_TOP_ADDRESS, DMCUB_REGION3_CW6_ENABLE, &is_cw6_enabled);
 	diag_data->is_cw6_enabled = is_cw6_enabled;
+	diag_data->timeout_info = dmub->debug;
 }
 
 bool dmub_dcn31_should_detect(struct dmub_srv *dmub)
diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.c
index 0d521eeda050..e1da270502cc 100644
--- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.c
+++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.c
@@ -478,6 +478,8 @@ void dmub_dcn32_get_diagnostic_data(struct dmub_srv *dmub, struct dmub_diagnosti
 	diag_data->is_cw6_enabled = is_cw6_enabled;
 
 	diag_data->gpint_datain0 = REG_READ(DMCUB_GPINT_DATAIN0);
+
+	diag_data->timeout_info = dmub->debug;
 }
 void dmub_dcn32_configure_dmub_in_system_memory(struct dmub_srv *dmub)
 {
diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c
index 53f359f3fae2..98afaecd3984 100644
--- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c
+++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c
@@ -516,6 +516,7 @@ void dmub_dcn35_get_diagnostic_data(struct dmub_srv *dmub, struct dmub_diagnosti
 	diag_data->is_cw6_enabled = is_cw6_enabled;
 
 	diag_data->gpint_datain0 = REG_READ(DMCUB_GPINT_DATAIN0);
+	diag_data->timeout_info = dmub->debug;
 }
 void dmub_dcn35_configure_dmub_in_system_memory(struct dmub_srv *dmub)
 {
-- 
2.34.1


  parent reply	other threads:[~2024-03-28 19:52 UTC|newest]

Thread overview: 52+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-03-28 19:50 [PATCH 00/43] DC Patches Apr 1, 2024 Roman.Li
2024-03-28 19:50 ` [PATCH 01/43] drm/amd/display: Fix compiler redefinition warnings for certain configs Roman.Li
2024-04-01 13:07   ` Wheeler, Daniel
2024-03-28 19:50 ` [PATCH 02/43] drm/amd/display: Add timing pixel encoding for mst mode validation Roman.Li
2024-03-28 19:50 ` [PATCH 03/43] drm/amd/display: fix underflow in some two display subvp/non-subvp configs Roman.Li
2024-03-28 19:50 ` [PATCH 04/43] drm/amd/display: optimize dml2 pipe resource allocation order Roman.Li
2024-03-28 19:50 ` [PATCH 05/43] drm/amd/display: Toggle additional RCO options in DCN35 Roman.Li
2024-03-28 19:50 ` [PATCH 06/43] drm/amd/display: Decouple dcn35 and dcn351 dmub firmware Roman.Li
2024-03-28 19:50 ` [PATCH 07/43] drm/amd/display: Expand supported Replay residency mode Roman.Li
2024-03-28 19:50 ` [PATCH 08/43] drm/amd/display: FEC overhead should be checked once for mst slot nums Roman.Li
2024-07-18  7:09   ` Jiri Slaby
2024-07-30  6:00     ` Lin, Wayne
2024-07-31  6:47       ` Jiri Slaby
2024-03-28 19:50 ` [PATCH 09/43] drm/amd/display: handle invalid connector indices Roman.Li
2024-03-28 19:50 ` [PATCH 10/43] drm/amd/display: Add dmub additional interface support for FAMS Roman.Li
2024-03-28 19:50 ` [PATCH 11/43] drm/amd/display: update pipe topology log to support subvp Roman.Li
2024-03-28 19:50 ` [PATCH 12/43] drm/amd/display: Enable DTBCLK DTO earlier in the sequence Roman.Li
2024-03-28 19:50 ` [PATCH 13/43] drm/amd/display: Add dummy interface for tracing DCN32 SMU messages Roman.Li
2024-03-28 19:50 ` [PATCH 14/43] drm/amd/display: Enable RCO for HDMISTREAMCLK in DCN35 Roman.Li
2024-03-28 19:50 ` [PATCH 15/43] drm/amd/display: Allow HPO PG for DCN35 Roman.Li
2024-03-28 19:50 ` [PATCH 16/43] drm/amd/display: Skip on writeback when it's not applicable Roman.Li
2024-03-28 19:50 ` [PATCH 17/43] drm/amd/display: Add OTG check for set AV mute Roman.Li
2024-03-28 19:50 ` [PATCH 18/43] drm/amd/display: Add extra logging for HUBP and OTG Roman.Li
2024-03-28 19:50 ` [PATCH 19/43] drm/amd/display: Disable Z8 minimum stutter period check for DCN35 Roman.Li
2024-03-28 19:50 ` [PATCH 20/43] drm/amd/display: add root clock control function pointer to fix display corruption Roman.Li
2024-03-28 19:50 ` Roman.Li [this message]
2024-03-28 19:50 ` [PATCH 22/43] drm/amd/display: remove context->dml2 dependency from DML21 wrapper Roman.Li
2024-03-28 19:50 ` [PATCH 23/43] drm/amd/display: Add handling for DC power mode Roman.Li
2024-03-28 19:50 ` [PATCH 24/43] drm/amd/display: move build test pattern params as part of pipe resource update for odm Roman.Li
2024-03-28 19:50 ` [PATCH 25/43] drm/amd/display: Fix compiler warnings on high compiler warning levels Roman.Li
2024-03-28 19:50 ` [PATCH 26/43] drm/amd/display: Allow RCG for Static Screen + LVP for DCN35 Roman.Li
2024-03-28 19:50 ` [PATCH 27/43] drm/amd/display: 3.2.279 Roman.Li
2024-03-28 19:50 ` [PATCH 28/43] drm/amd/display: Initialize DP ref clk with the correct clock Roman.Li
2024-03-28 19:50 ` [PATCH 29/43] drm/amd/display: Set alpha enable to 0 for some specific formats Roman.Li
2024-03-28 19:50 ` [PATCH 30/43] drm/amd/display: Enable cur_rom_en even if cursor degamma is not enabled Roman.Li
2024-04-01 13:40   ` Melissa Wen
2024-04-01 13:52     ` Harry Wentland
2024-03-28 19:50 ` [PATCH 31/43] drm/amd/display: Add some missing debug registers Roman.Li
2024-03-28 19:50 ` [PATCH 32/43] drm/amd/display: Update DSC compute parameter calculation Roman.Li
2024-03-28 19:50 ` [PATCH 33/43] drm/amd/display: Drop legacy code Roman.Li
2024-03-28 19:50 ` [PATCH 34/43] drm/amd/display: Add missing registers Roman.Li
2024-03-28 19:50 ` [PATCH 35/43] drm/amd/display: Remove redundant RESERVE0 and RESERVE1 Roman.Li
2024-03-28 19:50 ` [PATCH 36/43] drm/amd/display: Add missing SFB and OPP_SF Roman.Li
2024-03-28 19:50 ` [PATCH 37/43] drm/amd/display: Initialize debug variable data Roman.Li
2024-03-28 19:50 ` [PATCH 38/43] drm/amd/display: Fix MPCC DTN logging Roman.Li
2024-04-01 13:30   ` Melissa Wen
2024-03-28 19:50 ` [PATCH 39/43] drm/amd/display: Add WBSCL ram coefficient for writeback Roman.Li
2024-03-28 19:50 ` [PATCH 40/43] drm/amd/display: Add code comments clock and encode code Roman.Li
2024-03-28 19:50 ` [PATCH 41/43] drm/amd/display: Includes adjustments Roman.Li
2024-03-28 19:50 ` [PATCH 42/43] drm/amd/display: Add color logs for dcn20 Roman.Li
2024-03-28 19:50 ` [PATCH 43/43] drm/amd/display: Enable FGCG for DCN351 Roman.Li
2024-04-01 13:21 ` [PATCH 00/43] DC Patches Apr 1, 2024 Wheeler, Daniel

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240328195047.2843715-22-Roman.Li@amd.com \
    --to=roman.li@amd.com \
    --cc=Aurabindo.Pillai@amd.com \
    --cc=Harry.Wentland@amd.com \
    --cc=Rodrigo.Siqueira@amd.com \
    --cc=Sunpeng.Li@amd.com \
    --cc=agustin.gutierrez@amd.com \
    --cc=alvin.lee2@amd.com \
    --cc=amd-gfx@lists.freedesktop.org \
    --cc=daniel.wheeler@amd.com \
    --cc=hamza.mahfooz@amd.com \
    --cc=josip.pavic@amd.com \
    --cc=solomon.chiu@amd.com \
    --cc=wayne.lin@amd.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox