From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: stable@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
patches@lists.linux.dev,
Nicholas Kazlauskas <Nicholas.Kazlauskas@amd.com>,
Jasdeep Dhillon <jdhillon@amd.com>,
Stylon Wang <stylon.wang@amd.com>,
Alex Deucher <alexander.deucher@amd.com>,
"Limonciello, Mario" <mario.limonciello@amd.com>
Subject: [PATCH 6.1 29/42] drm/amd/display: Fix race condition in DPIA AUX transfer
Date: Wed, 1 Mar 2023 19:08:50 +0100 [thread overview]
Message-ID: <20230301180658.363778517@linuxfoundation.org> (raw)
In-Reply-To: <20230301180657.003689969@linuxfoundation.org>
From: Stylon Wang <stylon.wang@amd.com>
commit ead08b95fa50f40618c72b93a849c4ae30c9cd50 upstream.
[Why]
This fix was intended for improving on coding style but in the process
uncovers a race condition, which explains why we are getting incorrect
length in DPIA AUX replies. Due to the call path of DPIA AUX going from
DC back to DM layer then again into DC and the added complexities on top
of current DC AUX implementation, a proper fix to rely on current dc_lock
to address the race condition is difficult without a major overhual
on how DPIA AUX is implemented.
[How]
- Add a mutex dpia_aux_lock to protect DPIA AUX transfers
- Remove DMUB_ASYNC_TO_SYNC_ACCESS_* codes and rely solely on
aux_return_code_type for error reporting and handling
- Separate SET_CONFIG from DPIA AUX transfer because they have quiet
different processing logic
- Remove unnecessary type casting to and from void * type
Reviewed-by: Nicholas Kazlauskas <Nicholas.Kazlauskas@amd.com>
Acked-by: Jasdeep Dhillon <jdhillon@amd.com>
Signed-off-by: Stylon Wang <stylon.wang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: "Limonciello, Mario" <mario.limonciello@amd.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 147 ++++++--------
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 17 +
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c | 10
3 files changed, 89 insertions(+), 85 deletions(-)
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -147,14 +147,6 @@ MODULE_FIRMWARE(FIRMWARE_NAVI12_DMCU);
/* Number of bytes in PSP footer for firmware. */
#define PSP_FOOTER_BYTES 0x100
-/*
- * DMUB Async to Sync Mechanism Status
- */
-#define DMUB_ASYNC_TO_SYNC_ACCESS_FAIL 1
-#define DMUB_ASYNC_TO_SYNC_ACCESS_TIMEOUT 2
-#define DMUB_ASYNC_TO_SYNC_ACCESS_SUCCESS 3
-#define DMUB_ASYNC_TO_SYNC_ACCESS_INVALID 4
-
/**
* DOC: overview
*
@@ -1456,6 +1448,7 @@ static int amdgpu_dm_init(struct amdgpu_
memset(&init_params, 0, sizeof(init_params));
#endif
+ mutex_init(&adev->dm.dpia_aux_lock);
mutex_init(&adev->dm.dc_lock);
mutex_init(&adev->dm.audio_lock);
spin_lock_init(&adev->dm.vblank_lock);
@@ -1814,6 +1807,7 @@ static void amdgpu_dm_fini(struct amdgpu
mutex_destroy(&adev->dm.audio_lock);
mutex_destroy(&adev->dm.dc_lock);
+ mutex_destroy(&adev->dm.dpia_aux_lock);
return;
}
@@ -10198,91 +10192,92 @@ uint32_t dm_read_reg_func(const struct d
return value;
}
-static int amdgpu_dm_set_dmub_async_sync_status(bool is_cmd_aux,
- struct dc_context *ctx,
- uint8_t status_type,
- uint32_t *operation_result)
+int amdgpu_dm_process_dmub_aux_transfer_sync(
+ struct dc_context *ctx,
+ unsigned int link_index,
+ struct aux_payload *payload,
+ enum aux_return_code_type *operation_result)
{
struct amdgpu_device *adev = ctx->driver_context;
- int return_status = -1;
struct dmub_notification *p_notify = adev->dm.dmub_notify;
+ int ret = -1;
- if (is_cmd_aux) {
- if (status_type == DMUB_ASYNC_TO_SYNC_ACCESS_SUCCESS) {
- return_status = p_notify->aux_reply.length;
- *operation_result = p_notify->result;
- } else if (status_type == DMUB_ASYNC_TO_SYNC_ACCESS_TIMEOUT) {
- *operation_result = AUX_RET_ERROR_TIMEOUT;
- } else if (status_type == DMUB_ASYNC_TO_SYNC_ACCESS_FAIL) {
- *operation_result = AUX_RET_ERROR_ENGINE_ACQUIRE;
- } else if (status_type == DMUB_ASYNC_TO_SYNC_ACCESS_INVALID) {
- *operation_result = AUX_RET_ERROR_INVALID_REPLY;
- } else {
- *operation_result = AUX_RET_ERROR_UNKNOWN;
+ mutex_lock(&adev->dm.dpia_aux_lock);
+ if (!dc_process_dmub_aux_transfer_async(ctx->dc, link_index, payload)) {
+ *operation_result = AUX_RET_ERROR_ENGINE_ACQUIRE;
+ goto out;
+ }
+
+ if (!wait_for_completion_timeout(&adev->dm.dmub_aux_transfer_done, 10 * HZ)) {
+ DRM_ERROR("wait_for_completion_timeout timeout!");
+ *operation_result = AUX_RET_ERROR_TIMEOUT;
+ goto out;
+ }
+
+ if (p_notify->result != AUX_RET_SUCCESS) {
+ /*
+ * Transient states before tunneling is enabled could
+ * lead to this error. We can ignore this for now.
+ */
+ if (p_notify->result != AUX_RET_ERROR_PROTOCOL_ERROR) {
+ DRM_WARN("DPIA AUX failed on 0x%x(%d), error %d\n",
+ payload->address, payload->length,
+ p_notify->result);
}
- } else {
- if (status_type == DMUB_ASYNC_TO_SYNC_ACCESS_SUCCESS) {
- return_status = 0;
- *operation_result = p_notify->sc_status;
- } else {
- *operation_result = SET_CONFIG_UNKNOWN_ERROR;
+ *operation_result = AUX_RET_ERROR_INVALID_REPLY;
+ goto out;
+ }
+
+
+ payload->reply[0] = adev->dm.dmub_notify->aux_reply.command;
+ if (!payload->write && p_notify->aux_reply.length &&
+ (payload->reply[0] == AUX_TRANSACTION_REPLY_AUX_ACK)) {
+
+ if (payload->length != p_notify->aux_reply.length) {
+ DRM_WARN("invalid read length %d from DPIA AUX 0x%x(%d)!\n",
+ p_notify->aux_reply.length,
+ payload->address, payload->length);
+ *operation_result = AUX_RET_ERROR_INVALID_REPLY;
+ goto out;
}
+
+ memcpy(payload->data, p_notify->aux_reply.data,
+ p_notify->aux_reply.length);
}
- return return_status;
+ /* success */
+ ret = p_notify->aux_reply.length;
+ *operation_result = p_notify->result;
+out:
+ mutex_unlock(&adev->dm.dpia_aux_lock);
+ return ret;
}
-int amdgpu_dm_process_dmub_aux_transfer_sync(bool is_cmd_aux, struct dc_context *ctx,
- unsigned int link_index, void *cmd_payload, void *operation_result)
+int amdgpu_dm_process_dmub_set_config_sync(
+ struct dc_context *ctx,
+ unsigned int link_index,
+ struct set_config_cmd_payload *payload,
+ enum set_config_status *operation_result)
{
struct amdgpu_device *adev = ctx->driver_context;
- int ret = 0;
+ bool is_cmd_complete;
+ int ret;
- if (is_cmd_aux) {
- dc_process_dmub_aux_transfer_async(ctx->dc,
- link_index, (struct aux_payload *)cmd_payload);
- } else if (dc_process_dmub_set_config_async(ctx->dc, link_index,
- (struct set_config_cmd_payload *)cmd_payload,
- adev->dm.dmub_notify)) {
- return amdgpu_dm_set_dmub_async_sync_status(is_cmd_aux,
- ctx, DMUB_ASYNC_TO_SYNC_ACCESS_SUCCESS,
- (uint32_t *)operation_result);
- }
+ mutex_lock(&adev->dm.dpia_aux_lock);
+ is_cmd_complete = dc_process_dmub_set_config_async(ctx->dc,
+ link_index, payload, adev->dm.dmub_notify);
- ret = wait_for_completion_timeout(&adev->dm.dmub_aux_transfer_done, 10 * HZ);
- if (ret == 0) {
+ if (is_cmd_complete || wait_for_completion_timeout(&adev->dm.dmub_aux_transfer_done, 10 * HZ)) {
+ ret = 0;
+ *operation_result = adev->dm.dmub_notify->sc_status;
+ } else {
DRM_ERROR("wait_for_completion_timeout timeout!");
- return amdgpu_dm_set_dmub_async_sync_status(is_cmd_aux,
- ctx, DMUB_ASYNC_TO_SYNC_ACCESS_TIMEOUT,
- (uint32_t *)operation_result);
- }
-
- if (is_cmd_aux) {
- if (adev->dm.dmub_notify->result == AUX_RET_SUCCESS) {
- struct aux_payload *payload = (struct aux_payload *)cmd_payload;
-
- payload->reply[0] = adev->dm.dmub_notify->aux_reply.command;
- if (!payload->write && adev->dm.dmub_notify->aux_reply.length &&
- payload->reply[0] == AUX_TRANSACTION_REPLY_AUX_ACK) {
-
- if (payload->length != adev->dm.dmub_notify->aux_reply.length) {
- DRM_WARN("invalid read from DPIA AUX %x(%d) got length %d!\n",
- payload->address, payload->length,
- adev->dm.dmub_notify->aux_reply.length);
- return amdgpu_dm_set_dmub_async_sync_status(is_cmd_aux, ctx,
- DMUB_ASYNC_TO_SYNC_ACCESS_INVALID,
- (uint32_t *)operation_result);
- }
-
- memcpy(payload->data, adev->dm.dmub_notify->aux_reply.data,
- adev->dm.dmub_notify->aux_reply.length);
- }
- }
+ ret = -1;
+ *operation_result = SET_CONFIG_UNKNOWN_ERROR;
}
- return amdgpu_dm_set_dmub_async_sync_status(is_cmd_aux,
- ctx, DMUB_ASYNC_TO_SYNC_ACCESS_SUCCESS,
- (uint32_t *)operation_result);
+ mutex_unlock(&adev->dm.dpia_aux_lock);
+ return ret;
}
/*
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -59,7 +59,9 @@
#include "signal_types.h"
#include "amdgpu_dm_crc.h"
struct aux_payload;
+struct set_config_cmd_payload;
enum aux_return_code_type;
+enum set_config_status;
/* Forward declarations */
struct amdgpu_device;
@@ -549,6 +551,13 @@ struct amdgpu_display_manager {
* occurred on certain intel platform
*/
bool aux_hpd_discon_quirk;
+
+ /**
+ * @dpia_aux_lock:
+ *
+ * Guards access to DPIA AUX
+ */
+ struct mutex dpia_aux_lock;
};
enum dsc_clock_force_state {
@@ -792,9 +801,11 @@ void amdgpu_dm_update_connector_after_de
extern const struct drm_encoder_helper_funcs amdgpu_dm_encoder_helper_funcs;
-int amdgpu_dm_process_dmub_aux_transfer_sync(bool is_cmd_aux,
- struct dc_context *ctx, unsigned int link_index,
- void *payload, void *operation_result);
+int amdgpu_dm_process_dmub_aux_transfer_sync(struct dc_context *ctx, unsigned int link_index,
+ struct aux_payload *payload, enum aux_return_code_type *operation_result);
+
+int amdgpu_dm_process_dmub_set_config_sync(struct dc_context *ctx, unsigned int link_index,
+ struct set_config_cmd_payload *payload, enum set_config_status *operation_result);
bool check_seamless_boot_capability(struct amdgpu_device *adev);
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
@@ -844,9 +844,8 @@ int dm_helper_dmub_aux_transfer_sync(
struct aux_payload *payload,
enum aux_return_code_type *operation_result)
{
- return amdgpu_dm_process_dmub_aux_transfer_sync(true, ctx,
- link->link_index, (void *)payload,
- (void *)operation_result);
+ return amdgpu_dm_process_dmub_aux_transfer_sync(ctx, link->link_index, payload,
+ operation_result);
}
int dm_helpers_dmub_set_config_sync(struct dc_context *ctx,
@@ -854,9 +853,8 @@ int dm_helpers_dmub_set_config_sync(stru
struct set_config_cmd_payload *payload,
enum set_config_status *operation_result)
{
- return amdgpu_dm_process_dmub_aux_transfer_sync(false, ctx,
- link->link_index, (void *)payload,
- (void *)operation_result);
+ return amdgpu_dm_process_dmub_set_config_sync(ctx, link->link_index, payload,
+ operation_result);
}
void dm_set_dcn_clocks(struct dc_context *ctx, struct dc_clocks *clks)
next prev parent reply other threads:[~2023-03-01 18:13 UTC|newest]
Thread overview: 71+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-03-01 18:08 [PATCH 6.1 00/42] 6.1.15-rc1 review Greg Kroah-Hartman
2023-03-01 18:08 ` [PATCH 6.1 01/42] Fix XFRM-I support for nested ESP tunnels Greg Kroah-Hartman
2023-03-01 18:08 ` [PATCH 6.1 02/42] arm64: dts: rockchip: reduce thermal limits on rk3399-pinephone-pro Greg Kroah-Hartman
2023-03-01 18:08 ` [PATCH 6.1 03/42] arm64: dts: rockchip: drop unused LED mode property from rk3328-roc-cc Greg Kroah-Hartman
2023-03-01 18:08 ` [PATCH 6.1 04/42] ARM: dts: rockchip: add power-domains property to dp node on rk3288 Greg Kroah-Hartman
2023-03-01 18:08 ` [PATCH 6.1 05/42] arm64: dts: rockchip: add missing #interrupt-cells to rk356x pcie2x1 Greg Kroah-Hartman
2023-03-01 18:08 ` [PATCH 6.1 06/42] arm64: dts: rockchip: fix probe of analog sound card on rock-3a Greg Kroah-Hartman
2023-03-01 18:08 ` [PATCH 6.1 07/42] HID: elecom: add support for TrackBall 056E:011C Greg Kroah-Hartman
2023-03-01 18:08 ` [PATCH 6.1 08/42] HID: Ignore battery for Elan touchscreen on Asus TP420IA Greg Kroah-Hartman
2023-03-01 18:08 ` [PATCH 6.1 09/42] ACPI: NFIT: fix a potential deadlock during NFIT teardown Greg Kroah-Hartman
2023-03-01 18:08 ` [PATCH 6.1 10/42] pinctrl: amd: Fix debug output for debounce time Greg Kroah-Hartman
2023-03-01 18:08 ` [PATCH 6.1 11/42] btrfs: send: limit number of clones and allocated memory size Greg Kroah-Hartman
2023-03-01 18:08 ` [PATCH 6.1 12/42] arm64: dts: rockchip: align rk3399 DMC OPP table with bindings Greg Kroah-Hartman
2023-03-01 18:08 ` [PATCH 6.1 13/42] ASoC: rt715-sdca: fix clock stop prepare timeout issue Greg Kroah-Hartman
2023-03-01 18:08 ` [PATCH 6.1 14/42] IB/hfi1: Assign npages earlier Greg Kroah-Hartman
2023-03-01 18:08 ` [PATCH 6.1 15/42] powerpc: Dont select ARCH_WANTS_NO_INSTR Greg Kroah-Hartman
2023-03-01 18:08 ` [PATCH 6.1 16/42] ASoC: SOF: amd: Fix for handling spurious interrupts from DSP Greg Kroah-Hartman
2023-03-01 18:08 ` [PATCH 6.1 17/42] ARM: dts: stihxxx-b2120: fix polarity of reset line of tsin0 port Greg Kroah-Hartman
2023-03-01 18:08 ` [PATCH 6.1 18/42] neigh: make sure used and confirmed times are valid Greg Kroah-Hartman
2023-03-01 18:08 ` [PATCH 6.1 19/42] HID: core: Fix deadloop in hid_apply_multiplier Greg Kroah-Hartman
2023-03-01 18:08 ` [PATCH 6.1 20/42] ASoC: codecs: es8326: Fix DTS properties reading Greg Kroah-Hartman
2023-03-01 18:08 ` [PATCH 6.1 21/42] HID: Ignore battery for ELAN touchscreen 29DF on HP Greg Kroah-Hartman
2023-03-01 18:08 ` [PATCH 6.1 22/42] selftests: ocelot: tc_flower_chains: make test_vlan_ingress_modify() more comprehensive Greg Kroah-Hartman
2023-03-01 18:08 ` [PATCH 6.1 23/42] x86/cpu: Add Lunar Lake M Greg Kroah-Hartman
2023-03-01 18:08 ` [PATCH 6.1 24/42] PM: sleep: Avoid using pr_cont() in the tasks freezing code Greg Kroah-Hartman
2023-03-01 18:08 ` [PATCH 6.1 25/42] bpf: bpf_fib_lookup should not return neigh in NUD_FAILED state Greg Kroah-Hartman
2023-03-01 18:08 ` [PATCH 6.1 26/42] net: Remove WARN_ON_ONCE(sk->sk_forward_alloc) from sk_stream_kill_queues() Greg Kroah-Hartman
2023-03-01 18:08 ` [PATCH 6.1 27/42] vc_screen: dont clobber return value in vcs_read Greg Kroah-Hartman
2023-03-01 18:08 ` [PATCH 6.1 28/42] drm/amd/display: Move DCN314 DOMAIN power control to DMCUB Greg Kroah-Hartman
2023-03-01 18:08 ` Greg Kroah-Hartman [this message]
2023-03-01 18:08 ` [PATCH 6.1 30/42] usb: dwc3: pci: add support for the Intel Meteor Lake-M Greg Kroah-Hartman
2023-03-01 18:08 ` [PATCH 6.1 31/42] USB: serial: option: add support for VW/Skoda "Carstick LTE" Greg Kroah-Hartman
2023-03-01 18:08 ` [PATCH 6.1 32/42] usb: gadget: u_serial: Add null pointer check in gserial_resume Greg Kroah-Hartman
2023-03-01 18:08 ` [PATCH 6.1 33/42] arm64: dts: uniphier: Fix property name in PXs3 USB node Greg Kroah-Hartman
2023-03-01 18:08 ` [PATCH 6.1 34/42] usb: typec: pd: Remove usb_suspend_supported sysfs from sink PDO Greg Kroah-Hartman
2023-03-01 18:08 ` [PATCH 6.1 35/42] drm/amd/display: Properly reuse completion structure Greg Kroah-Hartman
2023-03-01 18:08 ` [PATCH 6.1 36/42] attr: add in_group_or_capable() Greg Kroah-Hartman
2023-03-01 18:08 ` [PATCH 6.1 37/42] fs: move should_remove_suid() Greg Kroah-Hartman
2023-03-01 18:08 ` [PATCH 6.1 38/42] attr: add setattr_should_drop_sgid() Greg Kroah-Hartman
2023-03-01 18:09 ` [PATCH 6.1 39/42] attr: use consistent sgid stripping checks Greg Kroah-Hartman
2023-03-01 18:09 ` [PATCH 6.1 40/42] fs: use consistent setgid checks in is_sxid() Greg Kroah-Hartman
2023-03-01 18:09 ` [PATCH 6.1 41/42] scripts/tags.sh: fix incompatibility with PCRE2 Greg Kroah-Hartman
2023-03-01 18:09 ` [PATCH 6.1 42/42] USB: core: Dont hold device lock while reading the "descriptors" sysfs file Greg Kroah-Hartman
2023-03-01 20:33 ` [PATCH 6.1 00/42] 6.1.15-rc1 review Conor Dooley
2023-03-01 22:16 ` Florian Fainelli
2023-03-01 23:43 ` Justin Forbes
2023-03-02 1:44 ` Shuah Khan
2023-03-02 4:27 ` Bagas Sanjaya
2023-03-02 7:27 ` Jon Hunter
2023-03-02 10:19 ` Naresh Kamboju
2023-03-02 10:29 ` Greg Kroah-Hartman
2023-03-02 11:00 ` Naresh Kamboju
2023-03-02 20:02 ` Naresh Kamboju
2023-03-03 7:01 ` Greg Kroah-Hartman
2023-03-03 9:00 ` Naresh Kamboju
2023-03-03 8:04 ` Paolo Abeni
2023-03-03 9:04 ` Naresh Kamboju
2023-03-03 9:23 ` Greg Kroah-Hartman
2023-03-03 9:47 ` Matthieu Baerts
2023-03-03 10:26 ` Greg Kroah-Hartman
2023-03-03 10:56 ` Matthieu Baerts
2023-03-03 11:31 ` Greg Kroah-Hartman
2023-03-03 11:39 ` Paolo Abeni
2023-03-03 11:44 ` Greg Kroah-Hartman
2023-03-03 12:41 ` Paolo Abeni
2023-03-03 13:35 ` Greg Kroah-Hartman
2023-03-02 11:37 ` Sudip Mukherjee (Codethink)
2023-03-02 23:16 ` Slade Watkins
2023-03-02 23:25 ` Rudi Heitbaum
2023-03-03 1:31 ` Guenter Roeck
2023-03-03 2:55 ` Ron Economos
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230301180658.363778517@linuxfoundation.org \
--to=gregkh@linuxfoundation.org \
--cc=Nicholas.Kazlauskas@amd.com \
--cc=alexander.deucher@amd.com \
--cc=jdhillon@amd.com \
--cc=mario.limonciello@amd.com \
--cc=patches@lists.linux.dev \
--cc=stable@vger.kernel.org \
--cc=stylon.wang@amd.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox