alsa-devel.alsa-project.org archive mirror
 help / color / mirror / Atom feed
From: Cezary Rojewski <cezary.rojewski@intel.com>
To: alsa-devel@alsa-project.org, broonie@kernel.org
Cc: Cezary Rojewski <cezary.rojewski@intel.com>,
	upstream@semihalf.com, rad@semihalf.com,
	pierre-louis.bossart@linux.intel.com, tiwai@suse.com,
	hdegoede@redhat.com, amadeuszx.slawinski@linux.intel.com,
	cujomalainey@chromium.org, lma@semihalf.com
Subject: [PATCH v3 06/15] ASoC: Intel: avs: Coredump and recovery flow
Date: Mon, 16 May 2022 12:11:07 +0200	[thread overview]
Message-ID: <20220516101116.190192-7-cezary.rojewski@intel.com> (raw)
In-Reply-To: <20220516101116.190192-1-cezary.rojewski@intel.com>

In rare occasions, under stress conditions or hardware malfunction, DSP
firmware may fail. Software is notified about such situation with
EXCEPTION_CAUGHT notification. IPC timeout is also counted as critical
device failure. More often than not, driver can recover from such
situations by performing full reset: killing and restarting ADSP.

Signed-off-by: Amadeusz Sławiński <amadeuszx.slawinski@linux.intel.com>
Signed-off-by: Cezary Rojewski <cezary.rojewski@intel.com>
---
 sound/soc/intel/Kconfig        |  1 +
 sound/soc/intel/avs/avs.h      |  4 ++
 sound/soc/intel/avs/ipc.c      | 97 +++++++++++++++++++++++++++++++++-
 sound/soc/intel/avs/messages.h |  5 ++
 4 files changed, 105 insertions(+), 2 deletions(-)

diff --git a/sound/soc/intel/Kconfig b/sound/soc/intel/Kconfig
index 039b45a4a799..1aaf9bdf721c 100644
--- a/sound/soc/intel/Kconfig
+++ b/sound/soc/intel/Kconfig
@@ -219,6 +219,7 @@ config SND_SOC_INTEL_AVS
 	select SND_HDA_EXT_CORE
 	select SND_HDA_DSP_LOADER
 	select SND_INTEL_DSP_CONFIG
+	select WANT_DEV_COREDUMP
 	help
 	  Enable support for Intel(R) cAVS 1.5 platforms with DSP
 	  capabilities. This includes Skylake, Kabylake, Amberlake and
diff --git a/sound/soc/intel/avs/avs.h b/sound/soc/intel/avs/avs.h
index e628f78d1864..9096f6c3d598 100644
--- a/sound/soc/intel/avs/avs.h
+++ b/sound/soc/intel/avs/avs.h
@@ -42,6 +42,7 @@ struct avs_dsp_ops {
 	int (* const load_basefw)(struct avs_dev *, struct firmware *);
 	int (* const load_lib)(struct avs_dev *, struct firmware *, u32);
 	int (* const transfer_mods)(struct avs_dev *, bool, struct avs_module_entry *, u32);
+	int (* const coredump)(struct avs_dev *, union avs_notify_msg *);
 };
 
 #define avs_dsp_op(adev, op, ...) \
@@ -164,12 +165,15 @@ struct avs_ipc {
 	struct avs_ipc_msg rx;
 	u32 default_timeout_ms;
 	bool ready;
+	atomic_t recovering;
 
 	bool rx_completed;
 	spinlock_t rx_lock;
 	struct mutex msg_mutex;
 	struct completion done_completion;
 	struct completion busy_completion;
+
+	struct work_struct recovery_work;
 };
 
 #define AVS_EIPC	EREMOTEIO
diff --git a/sound/soc/intel/avs/ipc.c b/sound/soc/intel/avs/ipc.c
index 68aaf01edbf2..98cdc05071fb 100644
--- a/sound/soc/intel/avs/ipc.c
+++ b/sound/soc/intel/avs/ipc.c
@@ -14,6 +14,89 @@
 
 #define AVS_IPC_TIMEOUT_MS	300
 
+static void avs_dsp_recovery(struct avs_dev *adev)
+{
+	struct avs_soc_component *acomp;
+	unsigned int core_mask;
+	int ret;
+
+	mutex_lock(&adev->comp_list_mutex);
+	/* disconnect all running streams */
+	list_for_each_entry(acomp, &adev->comp_list, node) {
+		struct snd_soc_pcm_runtime *rtd;
+		struct snd_soc_card *card;
+
+		card = acomp->base.card;
+		if (!card)
+			continue;
+
+		for_each_card_rtds(card, rtd) {
+			struct snd_pcm *pcm;
+			int dir;
+
+			pcm = rtd->pcm;
+			if (!pcm || rtd->dai_link->no_pcm)
+				continue;
+
+			for_each_pcm_streams(dir) {
+				struct snd_pcm_substream *substream;
+
+				substream = pcm->streams[dir].substream;
+				if (!substream || !substream->runtime)
+					continue;
+
+				snd_pcm_stop(substream, SNDRV_PCM_STATE_DISCONNECTED);
+			}
+		}
+	}
+	mutex_unlock(&adev->comp_list_mutex);
+
+	/* forcibly shutdown all cores */
+	core_mask = GENMASK(adev->hw_cfg.dsp_cores - 1, 0);
+	avs_dsp_core_disable(adev, core_mask);
+
+	/* attempt dsp reboot */
+	ret = avs_dsp_boot_firmware(adev, true);
+	if (ret < 0)
+		dev_err(adev->dev, "dsp reboot failed: %d\n", ret);
+
+	pm_runtime_mark_last_busy(adev->dev);
+	pm_runtime_enable(adev->dev);
+	pm_request_autosuspend(adev->dev);
+
+	atomic_set(&adev->ipc->recovering, 0);
+}
+
+static void avs_dsp_recovery_work(struct work_struct *work)
+{
+	struct avs_ipc *ipc = container_of(work, struct avs_ipc, recovery_work);
+
+	avs_dsp_recovery(to_avs_dev(ipc->dev));
+}
+
+static void avs_dsp_exception_caught(struct avs_dev *adev, union avs_notify_msg *msg)
+{
+	struct avs_ipc *ipc = adev->ipc;
+
+	/* Account for the double-exception case. */
+	ipc->ready = false;
+
+	if (!atomic_add_unless(&ipc->recovering, 1, 1)) {
+		dev_err(adev->dev, "dsp recovery is already in progress\n");
+		return;
+	}
+
+	dev_crit(adev->dev, "communication severed, rebooting dsp..\n");
+
+	/* Re-enabled on recovery completion. */
+	pm_runtime_disable(adev->dev);
+
+	/* Process received notification. */
+	avs_dsp_op(adev, coredump, msg);
+
+	schedule_work(&ipc->recovery_work);
+}
+
 static void avs_dsp_receive_rx(struct avs_dev *adev, u64 header)
 {
 	struct avs_ipc *ipc = adev->ipc;
@@ -57,6 +140,9 @@ static void avs_dsp_process_notification(struct avs_dev *adev, u64 header)
 		data_size = sizeof(struct avs_notify_res_data);
 		break;
 
+	case AVS_NOTIFY_EXCEPTION_CAUGHT:
+		break;
+
 	case AVS_NOTIFY_MODULE_EVENT:
 		/* To know the total payload size, header needs to be read first. */
 		memcpy_fromio(&mod_data, avs_uplink_addr(adev), sizeof(mod_data));
@@ -84,6 +170,10 @@ static void avs_dsp_process_notification(struct avs_dev *adev, u64 header)
 		complete(&adev->fw_ready);
 		break;
 
+	case AVS_NOTIFY_EXCEPTION_CAUGHT:
+		avs_dsp_exception_caught(adev, &msg);
+		break;
+
 	default:
 		break;
 	}
@@ -278,9 +368,10 @@ static int avs_dsp_do_send_msg(struct avs_dev *adev, struct avs_ipc_msg *request
 	ret = avs_ipc_wait_busy_completion(ipc, timeout);
 	if (ret) {
 		if (ret == -ETIMEDOUT) {
-			dev_crit(adev->dev, "communication severed: %d, rebooting dsp..\n", ret);
+			union avs_notify_msg msg = AVS_NOTIFICATION(EXCEPTION_CAUGHT);
 
-			avs_ipc_block(ipc);
+			/* Same treatment as on exception, just stack_dump=0. */
+			avs_dsp_exception_caught(adev, &msg);
 		}
 		goto exit;
 	}
@@ -368,6 +459,7 @@ int avs_ipc_init(struct avs_ipc *ipc, struct device *dev)
 	ipc->dev = dev;
 	ipc->ready = false;
 	ipc->default_timeout_ms = AVS_IPC_TIMEOUT_MS;
+	INIT_WORK(&ipc->recovery_work, avs_dsp_recovery_work);
 	init_completion(&ipc->done_completion);
 	init_completion(&ipc->busy_completion);
 	spin_lock_init(&ipc->rx_lock);
@@ -379,4 +471,5 @@ int avs_ipc_init(struct avs_ipc *ipc, struct device *dev)
 void avs_ipc_block(struct avs_ipc *ipc)
 {
 	ipc->ready = false;
+	cancel_work_sync(&ipc->recovery_work);
 }
diff --git a/sound/soc/intel/avs/messages.h b/sound/soc/intel/avs/messages.h
index 0395dd7150eb..94875a153124 100644
--- a/sound/soc/intel/avs/messages.h
+++ b/sound/soc/intel/avs/messages.h
@@ -187,6 +187,7 @@ enum avs_notify_msg_type {
 	AVS_NOTIFY_PHRASE_DETECTED = 4,
 	AVS_NOTIFY_RESOURCE_EVENT = 5,
 	AVS_NOTIFY_FW_READY = 8,
+	AVS_NOTIFY_EXCEPTION_CAUGHT = 10,
 	AVS_NOTIFY_MODULE_EVENT = 12,
 };
 
@@ -205,6 +206,10 @@ union avs_notify_msg {
 		};
 		union {
 			u32 val;
+			struct {
+				u32 core_id:2;
+				u32 stack_dump_size:16;
+			} coredump;
 		} ext;
 	};
 } __packed;
-- 
2.25.1


  parent reply	other threads:[~2022-05-16 10:04 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-05-16 10:11 [PATCH v3 00/15] ASoC: Intel: avs: Driver core and PCM operations Cezary Rojewski
2022-05-16 10:11 ` [PATCH v3 01/15] ASoC: Intel: avs: Account for libraries when booting basefw Cezary Rojewski
2022-05-16 10:11 ` [PATCH v3 02/15] ASoC: Intel: avs: Generic soc component driver Cezary Rojewski
2022-05-16 10:11 ` [PATCH v3 03/15] ASoC: Intel: avs: Generic PCM FE operations Cezary Rojewski
2022-05-16 10:11 ` [PATCH v3 04/15] ASoC: Intel: avs: non-HDA PCM BE operations Cezary Rojewski
2022-05-16 10:11 ` [PATCH v3 05/15] ASoC: Intel: avs: HDA " Cezary Rojewski
2022-05-16 10:11 ` Cezary Rojewski [this message]
2022-05-16 10:11 ` [PATCH v3 07/15] ASoC: Intel: avs: Prepare for firmware tracing Cezary Rojewski
2022-05-16 10:11 ` [PATCH v3 08/15] ASoC: Intel: avs: D0ix power state support Cezary Rojewski
2022-05-16 10:11 ` [PATCH v3 09/15] ASoC: Intel: avs: Event tracing Cezary Rojewski
2022-05-16 10:11 ` [PATCH v3 10/15] ASoC: Intel: avs: Replace link_mask usage with i2s_link_mask Cezary Rojewski
2022-05-16 10:11 ` [PATCH v3 11/15] ASoC: Intel: avs: Machine board registration Cezary Rojewski
2022-05-26 16:24   ` Guenter Roeck
2022-05-26 16:31     ` Cezary Rojewski
2022-05-26 17:09       ` Guenter Roeck
2022-05-26 17:44     ` Mark Brown
2022-05-29  5:48       ` Uwe Kleine-König
2022-05-29  6:05         ` [PATCH] ASoC: Intel: avs: Fix build error on arc, m68k and sparc Uwe Kleine-König
2022-05-29  6:21           ` Guenter Roeck
2022-05-29 13:24         ` [PATCH v3 11/15] ASoC: Intel: avs: Machine board registration Cezary Rojewski
2022-05-29 13:50           ` Uwe Kleine-König
2022-05-29 13:56             ` Cezary Rojewski
2022-05-29 14:12               ` [PATCH] ASoC: Intel: avs: Fix build error on arc, m68k and sparc Uwe Kleine-König
2022-05-30 13:29                 ` Mark Brown
2022-05-30 20:19                   ` Uwe Kleine-König
2022-05-31 10:41                     ` Mark Brown
2022-05-30 13:29         ` [PATCH v3 11/15] ASoC: Intel: avs: Machine board registration Mark Brown
2022-05-16 10:11 ` [PATCH v3 12/15] ASoC: Intel: avs: PCI driver implementation Cezary Rojewski
2022-05-16 10:11 ` [PATCH v3 13/15] ASoC: Intel: avs: Power management Cezary Rojewski
2022-05-16 10:11 ` [PATCH v3 14/15] ASoC: Intel: avs: SKL-based platforms support Cezary Rojewski
2022-05-16 10:11 ` [PATCH v3 15/15] ASoC: Intel: avs: APL-based " Cezary Rojewski
2022-05-17 17:21 ` [PATCH v3 00/15] ASoC: Intel: avs: Driver core and PCM operations Mark Brown

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220516101116.190192-7-cezary.rojewski@intel.com \
    --to=cezary.rojewski@intel.com \
    --cc=alsa-devel@alsa-project.org \
    --cc=amadeuszx.slawinski@linux.intel.com \
    --cc=broonie@kernel.org \
    --cc=cujomalainey@chromium.org \
    --cc=hdegoede@redhat.com \
    --cc=lma@semihalf.com \
    --cc=pierre-louis.bossart@linux.intel.com \
    --cc=rad@semihalf.com \
    --cc=tiwai@suse.com \
    --cc=upstream@semihalf.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).