linux-arm-msm.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] accel/qaic: Support the new READ_DATA implementation
@ 2025-10-07 22:40 Youssef Samir
  2025-10-08 18:31 ` Jeff Hugo
                   ` (3 more replies)
  0 siblings, 4 replies; 7+ messages in thread
From: Youssef Samir @ 2025-10-07 22:40 UTC (permalink / raw)
  To: jeff.hugo, carl.vanderlip, troy.hanson, zachary.mckevitt
  Cc: ogabbay, lizhi.hou, karol.wachowski, linux-arm-msm, dri-devel,
	Carl Vanderlip

From: Jeff Hugo <jeff.hugo@oss.qualcomm.com>

AIC200 uses the newer "XBL" firmware implementation which changes the
expectations of how READ_DATA is performed. Larger data requests are
supported via streaming the data over the transport instead of requiring
a single transport transfer for everything.

Co-developed-by: Carl Vanderlip <quic_carlv@quicinc.com>
Signed-off-by: Carl Vanderlip <quic_carlv@quicinc.com>
Signed-off-by: Jeff Hugo <jeff.hugo@oss.qualcomm.com>
Signed-off-by: Youssef Samir <youssef.abdulrahman@oss.qualcomm.com>
---
 drivers/accel/qaic/sahara.c | 138 +++++++++++++++++++++++++++---------
 1 file changed, 104 insertions(+), 34 deletions(-)

diff --git a/drivers/accel/qaic/sahara.c b/drivers/accel/qaic/sahara.c
index 3ebcc1f7ff58..b126cca937a9 100644
--- a/drivers/accel/qaic/sahara.c
+++ b/drivers/accel/qaic/sahara.c
@@ -159,6 +159,7 @@ struct sahara_context {
 	struct sahara_packet		*rx;
 	struct work_struct		fw_work;
 	struct work_struct		dump_work;
+	struct work_struct		read_data_work;
 	struct mhi_device		*mhi_dev;
 	const char * const		*image_table;
 	u32				table_size;
@@ -174,7 +175,10 @@ struct sahara_context {
 	u64				dump_image_offset;
 	void				*mem_dump_freespace;
 	u64				dump_images_left;
+	u32				read_data_offset;
+	u32				read_data_length;
 	bool				is_mem_dump_mode;
+	bool				non_streaming;
 };
 
 static const char * const aic100_image_table[] = {
@@ -216,6 +220,11 @@ static const char * const aic200_image_table[] = {
 	[75] = "qcom/aic200/pvs.bin",
 };
 
+static bool is_streaming(struct sahara_context *context)
+{
+	return !context->non_streaming;
+}
+
 static int sahara_find_image(struct sahara_context *context, u32 image_id)
 {
 	int ret;
@@ -265,6 +274,8 @@ static void sahara_send_reset(struct sahara_context *context)
 	int ret;
 
 	context->is_mem_dump_mode = false;
+	context->read_data_offset = 0;
+	context->read_data_length = 0;
 
 	context->tx[0]->cmd = cpu_to_le32(SAHARA_RESET_CMD);
 	context->tx[0]->length = cpu_to_le32(SAHARA_RESET_LENGTH);
@@ -319,9 +330,39 @@ static void sahara_hello(struct sahara_context *context)
 		dev_err(&context->mhi_dev->dev, "Unable to send hello response %d\n", ret);
 }
 
+static int read_data_helper(struct sahara_context *context, int buf_index)
+{
+	enum mhi_flags mhi_flag;
+	u32 pkt_data_len;
+	int ret;
+
+	pkt_data_len = min(context->read_data_length, SAHARA_PACKET_MAX_SIZE);
+
+	memcpy(context->tx[buf_index],
+	       &context->firmware->data[context->read_data_offset],
+	       pkt_data_len);
+
+	context->read_data_offset += pkt_data_len;
+	context->read_data_length -= pkt_data_len;
+
+	if (is_streaming(context) || !context->read_data_length)
+		mhi_flag = MHI_EOT;
+	else
+		mhi_flag = MHI_CHAIN;
+
+	ret = mhi_queue_buf(context->mhi_dev, DMA_TO_DEVICE,
+			    context->tx[buf_index], pkt_data_len, mhi_flag);
+	if (ret) {
+		dev_err(&context->mhi_dev->dev, "Unable to send read_data response %d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
 static void sahara_read_data(struct sahara_context *context)
 {
-	u32 image_id, data_offset, data_len, pkt_data_len;
+	u32 image_id, data_offset, data_len;
 	int ret;
 	int i;
 
@@ -357,7 +398,7 @@ static void sahara_read_data(struct sahara_context *context)
 	 * and is not needed here on error.
 	 */
 
-	if (data_len > SAHARA_TRANSFER_MAX_SIZE) {
+	if (context->non_streaming && data_len > SAHARA_TRANSFER_MAX_SIZE) {
 		dev_err(&context->mhi_dev->dev, "Malformed read_data packet - data len %d exceeds max xfer size %d\n",
 			data_len, SAHARA_TRANSFER_MAX_SIZE);
 		sahara_send_reset(context);
@@ -378,22 +419,18 @@ static void sahara_read_data(struct sahara_context *context)
 		return;
 	}
 
-	for (i = 0; i < SAHARA_NUM_TX_BUF && data_len; ++i) {
-		pkt_data_len = min(data_len, SAHARA_PACKET_MAX_SIZE);
+	context->read_data_offset = data_offset;
+	context->read_data_length = data_len;
 
-		memcpy(context->tx[i], &context->firmware->data[data_offset], pkt_data_len);
-
-		data_offset += pkt_data_len;
-		data_len -= pkt_data_len;
+	if (is_streaming(context)) {
+		schedule_work(&context->read_data_work);
+		return;
+	}
 
-		ret = mhi_queue_buf(context->mhi_dev, DMA_TO_DEVICE,
-				    context->tx[i], pkt_data_len,
-				    !data_len ? MHI_EOT : MHI_CHAIN);
-		if (ret) {
-			dev_err(&context->mhi_dev->dev, "Unable to send read_data response %d\n",
-				ret);
-			return;
-		}
+	for (i = 0; i < SAHARA_NUM_TX_BUF && context->read_data_length; ++i) {
+		ret = read_data_helper(context, i);
+		if (ret)
+			break;
 	}
 }
 
@@ -742,6 +779,13 @@ static void sahara_dump_processing(struct work_struct *work)
 	sahara_send_reset(context);
 }
 
+static void sahara_read_data_processing(struct work_struct *work)
+{
+	struct sahara_context *context = container_of(work, struct sahara_context, read_data_work);
+
+	read_data_helper(context, 0);
+}
+
 static int sahara_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_device_id *id)
 {
 	struct sahara_context *context;
@@ -756,34 +800,56 @@ static int sahara_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_device_
 	if (!context->rx)
 		return -ENOMEM;
 
+	if (!strcmp(mhi_dev->mhi_cntrl->name, "AIC200")) {
+		context->image_table = aic200_image_table;
+		context->table_size = ARRAY_SIZE(aic200_image_table);
+	} else {
+		context->image_table = aic100_image_table;
+		context->table_size = ARRAY_SIZE(aic100_image_table);
+		context->non_streaming = true;
+	}
+
 	/*
-	 * AIC100 defines SAHARA_TRANSFER_MAX_SIZE as the largest value it
-	 * will request for READ_DATA. This is larger than
-	 * SAHARA_PACKET_MAX_SIZE, and we need 9x SAHARA_PACKET_MAX_SIZE to
-	 * cover SAHARA_TRANSFER_MAX_SIZE. When the remote side issues a
-	 * READ_DATA, it requires a transfer of the exact size requested. We
-	 * can use MHI_CHAIN to link multiple buffers into a single transfer
-	 * but the remote side will not consume the buffers until it sees an
-	 * EOT, thus we need to allocate enough buffers to put in the tx fifo
-	 * to cover an entire READ_DATA request of the max size.
+	 * There are two firmware implementations for READ_DATA handling.
+	 * The older "SBL" implementation defines a Sahara transfer size, and
+	 * expects that the response is a single transport transfer. If the
+	 * FW wants to transfer a file that is larger than the transfer size,
+	 * the FW will issue multiple READ_DATA commands. For this
+	 * implementation, we need to allocate enough buffers to contain the
+	 * entire Sahara transfer size.
+	 *
+	 * The newer "XBL" implementation does not define a maximum transfer
+	 * size and instead expects the data to be streamed over using the
+	 * transport level MTU. The FW will issue a single READ_DATA command
+	 * of whatever size, and consume multiple transport level transfers
+	 * until the expected amount of data is consumed. For this
+	 * implementation we only need a single buffer of the transport MTU
+	 * but we'll need to be able to use it multiple times for a single
+	 * READ_DATA request.
+	 *
+	 * AIC100 is the SBL implementation and defines SAHARA_TRANSFER_MAX_SIZE
+	 * and we need 9x SAHARA_PACKET_MAX_SIZE to cover that. We can use
+	 * MHI_CHAIN to link multiple buffers into a single transfer but the
+	 * remote side will not consume the buffers until it sees an EOT, thus
+	 * we need to allocate enough buffers to put in the tx fifo to cover an
+	 * entire READ_DATA request of the max size.
+	 *
+	 * AIC200 is the XBL implementation, and so a single buffer will work.
 	 */
 	for (i = 0; i < SAHARA_NUM_TX_BUF; ++i) {
-		context->tx[i] = devm_kzalloc(&mhi_dev->dev, SAHARA_PACKET_MAX_SIZE, GFP_KERNEL);
+		context->tx[i] = devm_kzalloc(&mhi_dev->dev,
+					      SAHARA_PACKET_MAX_SIZE,
+					      GFP_KERNEL);
 		if (!context->tx[i])
 			return -ENOMEM;
+		if (is_streaming(context))
+			break;
 	}
 
 	context->mhi_dev = mhi_dev;
 	INIT_WORK(&context->fw_work, sahara_processing);
 	INIT_WORK(&context->dump_work, sahara_dump_processing);
-
-	if (!strcmp(mhi_dev->mhi_cntrl->name, "AIC200")) {
-		context->image_table = aic200_image_table;
-		context->table_size = ARRAY_SIZE(aic200_image_table);
-	} else {
-		context->image_table = aic100_image_table;
-		context->table_size = ARRAY_SIZE(aic100_image_table);
-	}
+	INIT_WORK(&context->read_data_work, sahara_read_data_processing);
 
 	context->active_image_id = SAHARA_IMAGE_ID_NONE;
 	dev_set_drvdata(&mhi_dev->dev, context);
@@ -814,6 +880,10 @@ static void sahara_mhi_remove(struct mhi_device *mhi_dev)
 
 static void sahara_mhi_ul_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result)
 {
+	struct sahara_context *context = dev_get_drvdata(&mhi_dev->dev);
+
+	if (!mhi_result->transaction_status && context->read_data_length && is_streaming(context))
+		schedule_work(&context->read_data_work);
 }
 
 static void sahara_mhi_dl_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result)
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH] accel/qaic: Support the new READ_DATA implementation
  2025-10-07 22:40 [PATCH] accel/qaic: Support the new READ_DATA implementation Youssef Samir
@ 2025-10-08 18:31 ` Jeff Hugo
  2025-10-13 23:33 ` Carl Vanderlip
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 7+ messages in thread
From: Jeff Hugo @ 2025-10-08 18:31 UTC (permalink / raw)
  To: Youssef Samir, carl.vanderlip, troy.hanson, zachary.mckevitt
  Cc: ogabbay, lizhi.hou, karol.wachowski, linux-arm-msm, dri-devel,
	Carl Vanderlip

On 10/7/2025 4:40 PM, Youssef Samir wrote:
> From: Jeff Hugo <jeff.hugo@oss.qualcomm.com>
> 
> AIC200 uses the newer "XBL" firmware implementation which changes the
> expectations of how READ_DATA is performed. Larger data requests are
> supported via streaming the data over the transport instead of requiring
> a single transport transfer for everything.
> 
> Co-developed-by: Carl Vanderlip <quic_carlv@quicinc.com>
> Signed-off-by: Carl Vanderlip <quic_carlv@quicinc.com>
> Signed-off-by: Jeff Hugo <jeff.hugo@oss.qualcomm.com>
> Signed-off-by: Youssef Samir <youssef.abdulrahman@oss.qualcomm.com>

Reviewed-by: Jeff Hugo <jeff.hugo@oss.qualcomm.com>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] accel/qaic: Support the new READ_DATA implementation
  2025-10-07 22:40 [PATCH] accel/qaic: Support the new READ_DATA implementation Youssef Samir
  2025-10-08 18:31 ` Jeff Hugo
@ 2025-10-13 23:33 ` Carl Vanderlip
  2025-10-14 16:40 ` Jeff Hugo
  2025-10-22 16:03 ` Konrad Dybcio
  3 siblings, 0 replies; 7+ messages in thread
From: Carl Vanderlip @ 2025-10-13 23:33 UTC (permalink / raw)
  To: Youssef Samir, jeff.hugo, troy.hanson, zachary.mckevitt
  Cc: ogabbay, lizhi.hou, karol.wachowski, linux-arm-msm, dri-devel,
	Carl Vanderlip

On 10/7/2025 3:40 PM, Youssef Samir wrote:
> From: Jeff Hugo <jeff.hugo@oss.qualcomm.com>
> 
> AIC200 uses the newer "XBL" firmware implementation which changes the
> expectations of how READ_DATA is performed. Larger data requests are
> supported via streaming the data over the transport instead of requiring
> a single transport transfer for everything.
> 
> Co-developed-by: Carl Vanderlip <quic_carlv@quicinc.com>
> Signed-off-by: Carl Vanderlip <quic_carlv@quicinc.com>
> Signed-off-by: Jeff Hugo <jeff.hugo@oss.qualcomm.com>
> Signed-off-by: Youssef Samir <youssef.abdulrahman@oss.qualcomm.com>
> ---

Reviewed-by: Carl Vanderlip <carl.vanderlip@oss.qualcomm.com>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] accel/qaic: Support the new READ_DATA implementation
  2025-10-07 22:40 [PATCH] accel/qaic: Support the new READ_DATA implementation Youssef Samir
  2025-10-08 18:31 ` Jeff Hugo
  2025-10-13 23:33 ` Carl Vanderlip
@ 2025-10-14 16:40 ` Jeff Hugo
  2025-10-22 16:03 ` Konrad Dybcio
  3 siblings, 0 replies; 7+ messages in thread
From: Jeff Hugo @ 2025-10-14 16:40 UTC (permalink / raw)
  To: Youssef Samir, carl.vanderlip, troy.hanson, zachary.mckevitt
  Cc: ogabbay, lizhi.hou, karol.wachowski, linux-arm-msm, dri-devel,
	Carl Vanderlip

On 10/7/2025 4:40 PM, Youssef Samir wrote:
> From: Jeff Hugo <jeff.hugo@oss.qualcomm.com>
> 
> AIC200 uses the newer "XBL" firmware implementation which changes the
> expectations of how READ_DATA is performed. Larger data requests are
> supported via streaming the data over the transport instead of requiring
> a single transport transfer for everything.
> 
> Co-developed-by: Carl Vanderlip <quic_carlv@quicinc.com>
> Signed-off-by: Carl Vanderlip <quic_carlv@quicinc.com>
> Signed-off-by: Jeff Hugo <jeff.hugo@oss.qualcomm.com>
> Signed-off-by: Youssef Samir <youssef.abdulrahman@oss.qualcomm.com>

Pushed to drm-misc-next.

-Jeff

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] accel/qaic: Support the new READ_DATA implementation
  2025-10-07 22:40 [PATCH] accel/qaic: Support the new READ_DATA implementation Youssef Samir
                   ` (2 preceding siblings ...)
  2025-10-14 16:40 ` Jeff Hugo
@ 2025-10-22 16:03 ` Konrad Dybcio
  2025-10-22 16:28   ` Jeff Hugo
  3 siblings, 1 reply; 7+ messages in thread
From: Konrad Dybcio @ 2025-10-22 16:03 UTC (permalink / raw)
  To: Youssef Samir, jeff.hugo, carl.vanderlip, troy.hanson,
	zachary.mckevitt
  Cc: ogabbay, lizhi.hou, karol.wachowski, linux-arm-msm, dri-devel,
	Carl Vanderlip

On 10/8/25 12:40 AM, Youssef Samir wrote:
> From: Jeff Hugo <jeff.hugo@oss.qualcomm.com>
> 
> AIC200 uses the newer "XBL" firmware implementation which changes the
> expectations of how READ_DATA is performed. Larger data requests are
> supported via streaming the data over the transport instead of requiring
> a single transport transfer for everything.

tldr just like reading/writing images in 'raw_mode' up until now?

Konrad

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] accel/qaic: Support the new READ_DATA implementation
  2025-10-22 16:03 ` Konrad Dybcio
@ 2025-10-22 16:28   ` Jeff Hugo
  2025-10-22 16:30     ` Konrad Dybcio
  0 siblings, 1 reply; 7+ messages in thread
From: Jeff Hugo @ 2025-10-22 16:28 UTC (permalink / raw)
  To: Konrad Dybcio, Youssef Samir, carl.vanderlip, troy.hanson,
	zachary.mckevitt
  Cc: ogabbay, lizhi.hou, karol.wachowski, linux-arm-msm, dri-devel,
	Carl Vanderlip

On 10/22/2025 10:03 AM, Konrad Dybcio wrote:
> On 10/8/25 12:40 AM, Youssef Samir wrote:
>> From: Jeff Hugo <jeff.hugo@oss.qualcomm.com>
>>
>> AIC200 uses the newer "XBL" firmware implementation which changes the
>> expectations of how READ_DATA is performed. Larger data requests are
>> supported via streaming the data over the transport instead of requiring
>> a single transport transfer for everything.
> 
> tldr just like reading/writing images in 'raw_mode' up until now?

I'm not sure I understand what you are asking here.

AIC100 is the "old" SBL architecture. When the "current" XBL 
architecture came about, quite a bit of the components were rewritten. 
It seems like a different interpretation of the Sahara spec was taken 
for the XBL implementation.

In both cases, the boot component that is driving the Sahara component 
in FW will want segment X of the elf for the next step of processing.

In SBL, the Sahara component would have a specific MTU and break up the 
request (segment X of the elf) into MTU sized read requests for the 
host. The MTU is negotiated with the transport (MHI). The Sahara 
component expects the entire read request to be satisfied in a single 
return from the transport - anything less is an error.

In XBL, the Sahara component would make a single read request to the 
host for the entire request from the boot component (segment X of the 
elf), and expects the underlying transport to shove up data until the 
entire read request is satisfied (Sahara will sit in a loop until it 
gets all of the data).

There is a bit of oddity because the Sahara spec says that the host can 
indicate an error by sending a packet that is anything other than the 
requested read size, but "packet" is not defined. The SBL interpretation 
is that a "packet" is the transport packet - aka the MHI transfer.  The 
XBL interpretation is that the packet is a "Sahara packet" which is 
decoupled from the transport.

In the end, we have two different Sahara implementations in FW with 
different expectations, and both need to be supported.

-Jeff

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] accel/qaic: Support the new READ_DATA implementation
  2025-10-22 16:28   ` Jeff Hugo
@ 2025-10-22 16:30     ` Konrad Dybcio
  0 siblings, 0 replies; 7+ messages in thread
From: Konrad Dybcio @ 2025-10-22 16:30 UTC (permalink / raw)
  To: Jeff Hugo, Youssef Samir, carl.vanderlip, troy.hanson,
	zachary.mckevitt
  Cc: ogabbay, lizhi.hou, karol.wachowski, linux-arm-msm, dri-devel,
	Carl Vanderlip

On 10/22/25 6:28 PM, Jeff Hugo wrote:
> On 10/22/2025 10:03 AM, Konrad Dybcio wrote:
>> On 10/8/25 12:40 AM, Youssef Samir wrote:
>>> From: Jeff Hugo <jeff.hugo@oss.qualcomm.com>
>>>
>>> AIC200 uses the newer "XBL" firmware implementation which changes the
>>> expectations of how READ_DATA is performed. Larger data requests are
>>> supported via streaming the data over the transport instead of requiring
>>> a single transport transfer for everything.
>>
>> tldr just like reading/writing images in 'raw_mode' up until now?
> 
> I'm not sure I understand what you are asking here.

Sorry I confused sahara with firehose here.. The former doesn't have a
notion of what I referred to

> 
> AIC100 is the "old" SBL architecture. When the "current" XBL architecture came about, quite a bit of the components were rewritten. It seems like a different interpretation of the Sahara spec was taken for the XBL implementation.
> 
> In both cases, the boot component that is driving the Sahara component in FW will want segment X of the elf for the next step of processing.
> 
> In SBL, the Sahara component would have a specific MTU and break up the request (segment X of the elf) into MTU sized read requests for the host. The MTU is negotiated with the transport (MHI). The Sahara component expects the entire read request to be satisfied in a single return from the transport - anything less is an error.
> 
> In XBL, the Sahara component would make a single read request to the host for the entire request from the boot component (segment X of the elf), and expects the underlying transport to shove up data until the entire read request is satisfied (Sahara will sit in a loop until it gets all of the data).
> 
> There is a bit of oddity because the Sahara spec says that the host can indicate an error by sending a packet that is anything other than the requested read size, but "packet" is not defined. The SBL interpretation is that a "packet" is the transport packet - aka the MHI transfer.  The XBL interpretation is that the packet is a "Sahara packet" which is decoupled from the transport.
> 
> In the end, we have two different Sahara implementations in FW with different expectations, and both need to be supported.

Thanks

Konrad

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2025-10-22 16:30 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-10-07 22:40 [PATCH] accel/qaic: Support the new READ_DATA implementation Youssef Samir
2025-10-08 18:31 ` Jeff Hugo
2025-10-13 23:33 ` Carl Vanderlip
2025-10-14 16:40 ` Jeff Hugo
2025-10-22 16:03 ` Konrad Dybcio
2025-10-22 16:28   ` Jeff Hugo
2025-10-22 16:30     ` Konrad Dybcio

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).