linux-i3c.lists.infradead.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/3] i3c: mipi-i3c-hci: Make bounce buffer code generic to all DMA transfers
@ 2025-06-04 12:55 Jarkko Nikula
  2025-06-04 12:55 ` [PATCH 2/3] i3c: mipi-i3c-hci: Use physical device pointer with DMA API Jarkko Nikula
                   ` (2 more replies)
  0 siblings, 3 replies; 17+ messages in thread
From: Jarkko Nikula @ 2025-06-04 12:55 UTC (permalink / raw)
  To: linux-i3c; +Cc: Alexandre Belloni, Frank Li, Jarkko Nikula

Move DMA bounce buffer code for I3C private transfers to be generic for
all DMA transfers, and round up the receive bounce buffer size to a
multiple of DWORDs.

It was observed that when the device DMA is IOMMU mapped and the receive
length is not a multiple of DWORDs, the last DWORD is padded with stale
data from the RX FIFO, corrupting 1-3 bytes beyond the expected data.

A similar issue, though less severe, occurs when an I3C target returns
less data than requested. In this case, the padding does not exceed the
requested number of bytes, assuming the device DMA is not IOMMU mapped.

Therefore, all I3C private transfer, CCC command payload and I2C
transfer receive buffers must be properly sized for the DMA being IOMMU
mapped. Even if those buffers are already DMA safe, their size may not
be, and I don't have a clear idea how to guarantee this other than
using a local bounce buffer.

To prepare for the device DMA being IOMMU mapped and to address the
above issue, implement a local, properly sized bounce buffer for all
DMA transfers. For now, allocate it only when the buffer is in the
vmalloc() area to avoid unnecessary copying with CCC commands and
DMA-safe I2C transfers.

Signed-off-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
---
 drivers/i3c/master/mipi-i3c-hci/core.c | 34 -------------------
 drivers/i3c/master/mipi-i3c-hci/dma.c  | 47 +++++++++++++++++++++++++-
 2 files changed, 46 insertions(+), 35 deletions(-)

diff --git a/drivers/i3c/master/mipi-i3c-hci/core.c b/drivers/i3c/master/mipi-i3c-hci/core.c
index bc4538694540..24c5e7d5b439 100644
--- a/drivers/i3c/master/mipi-i3c-hci/core.c
+++ b/drivers/i3c/master/mipi-i3c-hci/core.c
@@ -272,34 +272,6 @@ static int i3c_hci_daa(struct i3c_master_controller *m)
 	return hci->cmd->perform_daa(hci);
 }
 
-static int i3c_hci_alloc_safe_xfer_buf(struct i3c_hci *hci,
-				       struct hci_xfer *xfer)
-{
-	if (hci->io != &mipi_i3c_hci_dma ||
-	    xfer->data == NULL || !is_vmalloc_addr(xfer->data))
-		return 0;
-
-	if (xfer->rnw)
-		xfer->bounce_buf = kzalloc(xfer->data_len, GFP_KERNEL);
-	else
-		xfer->bounce_buf = kmemdup(xfer->data,
-					   xfer->data_len, GFP_KERNEL);
-
-	return xfer->bounce_buf == NULL ? -ENOMEM : 0;
-}
-
-static void i3c_hci_free_safe_xfer_buf(struct i3c_hci *hci,
-				       struct hci_xfer *xfer)
-{
-	if (hci->io != &mipi_i3c_hci_dma || xfer->bounce_buf == NULL)
-		return;
-
-	if (xfer->rnw)
-		memcpy(xfer->data, xfer->bounce_buf, xfer->data_len);
-
-	kfree(xfer->bounce_buf);
-}
-
 static int i3c_hci_priv_xfers(struct i3c_dev_desc *dev,
 			      struct i3c_priv_xfer *i3c_xfers,
 			      int nxfers)
@@ -333,9 +305,6 @@ static int i3c_hci_priv_xfers(struct i3c_dev_desc *dev,
 		}
 		hci->cmd->prep_i3c_xfer(hci, dev, &xfer[i]);
 		xfer[i].cmd_desc[0] |= CMD_0_ROC;
-		ret = i3c_hci_alloc_safe_xfer_buf(hci, &xfer[i]);
-		if (ret)
-			goto out;
 	}
 	last = i - 1;
 	xfer[last].cmd_desc[0] |= CMD_0_TOC;
@@ -359,9 +328,6 @@ static int i3c_hci_priv_xfers(struct i3c_dev_desc *dev,
 	}
 
 out:
-	for (i = 0; i < nxfers; i++)
-		i3c_hci_free_safe_xfer_buf(hci, &xfer[i]);
-
 	hci_free_xfer(xfer, nxfers);
 	return ret;
 }
diff --git a/drivers/i3c/master/mipi-i3c-hci/dma.c b/drivers/i3c/master/mipi-i3c-hci/dma.c
index 491dfe70b660..0311c84f5b4e 100644
--- a/drivers/i3c/master/mipi-i3c-hci/dma.c
+++ b/drivers/i3c/master/mipi-i3c-hci/dma.c
@@ -339,6 +339,44 @@ static int hci_dma_init(struct i3c_hci *hci)
 	return ret;
 }
 
+static void *hci_dma_alloc_safe_xfer_buf(struct i3c_hci *hci,
+					 struct hci_xfer *xfer)
+{
+	if (!is_vmalloc_addr(xfer->data))
+		return xfer->data;
+
+	if (xfer->rnw)
+		/*
+		 * Round up the receive bounce buffer length to a multiple of
+		 * DWORDs. Independently of buffer alignment, DMA_FROM_DEVICE
+		 * transfers may corrupt the last DWORD when transfer length is
+		 * not a multiple of DWORDs. This was observed when the device
+		 * DMA is IOMMU mapped or when an I3C target device returns
+		 * less data than requested. Latter case is less severe and
+		 * does not exceed the requested number of bytes, assuming the
+		 * device DMA is not IOMMU mapped.
+		 */
+		xfer->bounce_buf = kzalloc(ALIGN(xfer->data_len, 4),
+					   GFP_KERNEL);
+	else
+		xfer->bounce_buf = kmemdup(xfer->data, xfer->data_len,
+					   GFP_KERNEL);
+
+	return xfer->bounce_buf;
+}
+
+static void hci_dma_free_safe_xfer_buf(struct i3c_hci *hci,
+				       struct hci_xfer *xfer)
+{
+	if (xfer->bounce_buf == NULL)
+		return;
+
+	if (xfer->rnw)
+		memcpy(xfer->data, xfer->bounce_buf, xfer->data_len);
+
+	kfree(xfer->bounce_buf);
+}
+
 static void hci_dma_unmap_xfer(struct i3c_hci *hci,
 			       struct hci_xfer *xfer_list, unsigned int n)
 {
@@ -352,6 +390,7 @@ static void hci_dma_unmap_xfer(struct i3c_hci *hci,
 		dma_unmap_single(&hci->master.dev,
 				 xfer->data_dma, xfer->data_len,
 				 xfer->rnw ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
+		hci_dma_free_safe_xfer_buf(hci, xfer);
 	}
 }
 
@@ -391,7 +430,12 @@ static int hci_dma_queue_xfer(struct i3c_hci *hci,
 
 		/* 2nd and 3rd words of Data Buffer Descriptor Structure */
 		if (xfer->data) {
-			buf = xfer->bounce_buf ? xfer->bounce_buf : xfer->data;
+			buf = hci_dma_alloc_safe_xfer_buf(hci, xfer);
+			if (buf == NULL) {
+				hci_dma_unmap_xfer(hci, xfer_list, i);
+				return -ENOMEM;
+			}
+
 			xfer->data_dma =
 				dma_map_single(&hci->master.dev,
 					       buf,
@@ -401,6 +445,7 @@ static int hci_dma_queue_xfer(struct i3c_hci *hci,
 						  DMA_TO_DEVICE);
 			if (dma_mapping_error(&hci->master.dev,
 					      xfer->data_dma)) {
+				hci_dma_free_safe_xfer_buf(hci, xfer);
 				hci_dma_unmap_xfer(hci, xfer_list, i);
 				return -ENOMEM;
 			}
-- 
2.47.2


-- 
linux-i3c mailing list
linux-i3c@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-i3c

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH 2/3] i3c: mipi-i3c-hci: Use physical device pointer with DMA API
  2025-06-04 12:55 [PATCH 1/3] i3c: mipi-i3c-hci: Make bounce buffer code generic to all DMA transfers Jarkko Nikula
@ 2025-06-04 12:55 ` Jarkko Nikula
  2025-06-04 12:55 ` [PATCH 3/3] i3c: mipi-i3c-hci: Use own DMA bounce buffer management for I2C transfers Jarkko Nikula
  2025-06-04 15:00 ` [PATCH 1/3] i3c: mipi-i3c-hci: Make bounce buffer code generic to all DMA transfers Frank Li
  2 siblings, 0 replies; 17+ messages in thread
From: Jarkko Nikula @ 2025-06-04 12:55 UTC (permalink / raw)
  To: linux-i3c
  Cc: Alexandre Belloni, Frank Li, Jarkko Nikula, Prabhakaran, Krishna

DMA transfer faults on Intel hardware when the IOMMU is enabled and
driver initialization will fail when attempting to do the first transfer:

	DMAR: DRHD: handling fault status reg 2
	DMAR: [DMA Read NO_PASID] Request device [00:11.0] fault addr 0x676e3000 [fault reason 0x71] SM: Present bit in first-level paging entry is clear
	i3c mipi-i3c-hci.0: ring 0: Transfer Aborted
	mipi-i3c-hci mipi-i3c-hci.0: probe with driver mipi-i3c-hci failed with error -62

Reason for this is that the IOMMU setup is done for the physical devices
only and not for the virtual I3C Controller device object.

Therefore use the pointer to a physical device object with the DMA API.

Due to a data corruption observation when the device DMA is IOMMU
mapped, a properly sized receive bounce buffer is required if transfer
length is not a multiple of DWORDs. Extend the check in the
hci_dma_alloc_safe_xfer_buf() when bounce buffer allocation is required
and when it can be skipped.

Reported-by: Prabhakaran, Krishna <krishna.prabhakaran@intel.com>
Signed-off-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
---
 drivers/i3c/master/mipi-i3c-hci/dma.c | 53 +++++++++++++++++++--------
 1 file changed, 38 insertions(+), 15 deletions(-)

diff --git a/drivers/i3c/master/mipi-i3c-hci/dma.c b/drivers/i3c/master/mipi-i3c-hci/dma.c
index 0311c84f5b4e..20e1b405244e 100644
--- a/drivers/i3c/master/mipi-i3c-hci/dma.c
+++ b/drivers/i3c/master/mipi-i3c-hci/dma.c
@@ -14,6 +14,7 @@
 #include <linux/errno.h>
 #include <linux/i3c/master.h>
 #include <linux/io.h>
+#include <linux/pci.h>
 
 #include "hci.h"
 #include "cmd.h"
@@ -138,6 +139,7 @@ struct hci_rh_data {
 };
 
 struct hci_rings_data {
+	struct device *sysdev;
 	unsigned int total;
 	struct hci_rh_data headers[] __counted_by(total);
 };
@@ -165,20 +167,20 @@ static void hci_dma_cleanup(struct i3c_hci *hci)
 		rh_reg_write(IBI_SETUP, 0);
 
 		if (rh->xfer)
-			dma_free_coherent(&hci->master.dev,
+			dma_free_coherent(rings->sysdev,
 					  rh->xfer_struct_sz * rh->xfer_entries,
 					  rh->xfer, rh->xfer_dma);
 		if (rh->resp)
-			dma_free_coherent(&hci->master.dev,
+			dma_free_coherent(rings->sysdev,
 					  rh->resp_struct_sz * rh->xfer_entries,
 					  rh->resp, rh->resp_dma);
 		kfree(rh->src_xfers);
 		if (rh->ibi_status)
-			dma_free_coherent(&hci->master.dev,
+			dma_free_coherent(rings->sysdev,
 					  rh->ibi_status_sz * rh->ibi_status_entries,
 					  rh->ibi_status, rh->ibi_status_dma);
 		if (rh->ibi_data_dma)
-			dma_unmap_single(&hci->master.dev, rh->ibi_data_dma,
+			dma_unmap_single(rings->sysdev, rh->ibi_data_dma,
 					 rh->ibi_chunk_sz * rh->ibi_chunks_total,
 					 DMA_FROM_DEVICE);
 		kfree(rh->ibi_data);
@@ -194,11 +196,23 @@ static int hci_dma_init(struct i3c_hci *hci)
 {
 	struct hci_rings_data *rings;
 	struct hci_rh_data *rh;
+	struct device *sysdev;
 	u32 regval;
 	unsigned int i, nr_rings, xfers_sz, resps_sz;
 	unsigned int ibi_status_ring_sz, ibi_data_ring_sz;
 	int ret;
 
+	/*
+	 * Set pointer to a physical device that does DMA and has IOMMU setup
+	 * done for it in case of enabled IOMMU and use it with the DMA API.
+	 * Here such device is either
+	 * "mipi-i3c-hci" platform device (OF/ACPI enumeration) parent or
+	 * grandparent (PCI enumeration).
+	 */
+	sysdev = hci->master.dev.parent;
+	if (sysdev->parent && dev_is_pci(sysdev->parent))
+		sysdev = sysdev->parent;
+
 	regval = rhs_reg_read(CONTROL);
 	nr_rings = FIELD_GET(MAX_HEADER_COUNT_CAP, regval);
 	dev_info(&hci->master.dev, "%d DMA rings available\n", nr_rings);
@@ -213,6 +227,7 @@ static int hci_dma_init(struct i3c_hci *hci)
 		return -ENOMEM;
 	hci->io_data = rings;
 	rings->total = nr_rings;
+	rings->sysdev = sysdev;
 
 	regval = FIELD_PREP(MAX_HEADER_COUNT, rings->total);
 	rhs_reg_write(CONTROL, regval);
@@ -239,9 +254,9 @@ static int hci_dma_init(struct i3c_hci *hci)
 		xfers_sz = rh->xfer_struct_sz * rh->xfer_entries;
 		resps_sz = rh->resp_struct_sz * rh->xfer_entries;
 
-		rh->xfer = dma_alloc_coherent(&hci->master.dev, xfers_sz,
+		rh->xfer = dma_alloc_coherent(rings->sysdev, xfers_sz,
 					      &rh->xfer_dma, GFP_KERNEL);
-		rh->resp = dma_alloc_coherent(&hci->master.dev, resps_sz,
+		rh->resp = dma_alloc_coherent(rings->sysdev, resps_sz,
 					      &rh->resp_dma, GFP_KERNEL);
 		rh->src_xfers =
 			kmalloc_array(rh->xfer_entries, sizeof(*rh->src_xfers),
@@ -295,16 +310,16 @@ static int hci_dma_init(struct i3c_hci *hci)
 		ibi_data_ring_sz = rh->ibi_chunk_sz * rh->ibi_chunks_total;
 
 		rh->ibi_status =
-			dma_alloc_coherent(&hci->master.dev, ibi_status_ring_sz,
+			dma_alloc_coherent(rings->sysdev, ibi_status_ring_sz,
 					   &rh->ibi_status_dma, GFP_KERNEL);
 		rh->ibi_data = kmalloc(ibi_data_ring_sz, GFP_KERNEL);
 		ret = -ENOMEM;
 		if (!rh->ibi_status || !rh->ibi_data)
 			goto err_out;
 		rh->ibi_data_dma =
-			dma_map_single(&hci->master.dev, rh->ibi_data,
+			dma_map_single(rings->sysdev, rh->ibi_data,
 				       ibi_data_ring_sz, DMA_FROM_DEVICE);
-		if (dma_mapping_error(&hci->master.dev, rh->ibi_data_dma)) {
+		if (dma_mapping_error(rings->sysdev, rh->ibi_data_dma)) {
 			rh->ibi_data_dma = 0;
 			ret = -ENOMEM;
 			goto err_out;
@@ -342,7 +357,13 @@ static int hci_dma_init(struct i3c_hci *hci)
 static void *hci_dma_alloc_safe_xfer_buf(struct i3c_hci *hci,
 					 struct hci_xfer *xfer)
 {
-	if (!is_vmalloc_addr(xfer->data))
+	struct hci_rings_data *rings = hci->io_data;
+
+	if (!is_vmalloc_addr(xfer->data) &&
+	    (!device_iommu_mapped(rings->sysdev) ||
+	     !xfer->rnw ||
+	     xfer->data_len == ALIGN(xfer->data_len, 4)))
+		/* Bounce buffer not required for this transfer */
 		return xfer->data;
 
 	if (xfer->rnw)
@@ -380,6 +401,7 @@ static void hci_dma_free_safe_xfer_buf(struct i3c_hci *hci,
 static void hci_dma_unmap_xfer(struct i3c_hci *hci,
 			       struct hci_xfer *xfer_list, unsigned int n)
 {
+	struct hci_rings_data *rings = hci->io_data;
 	struct hci_xfer *xfer;
 	unsigned int i;
 
@@ -387,7 +409,7 @@ static void hci_dma_unmap_xfer(struct i3c_hci *hci,
 		xfer = xfer_list + i;
 		if (!xfer->data)
 			continue;
-		dma_unmap_single(&hci->master.dev,
+		dma_unmap_single(rings->sysdev,
 				 xfer->data_dma, xfer->data_len,
 				 xfer->rnw ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
 		hci_dma_free_safe_xfer_buf(hci, xfer);
@@ -437,13 +459,13 @@ static int hci_dma_queue_xfer(struct i3c_hci *hci,
 			}
 
 			xfer->data_dma =
-				dma_map_single(&hci->master.dev,
+				dma_map_single(rings->sysdev,
 					       buf,
 					       xfer->data_len,
 					       xfer->rnw ?
 						  DMA_FROM_DEVICE :
 						  DMA_TO_DEVICE);
-			if (dma_mapping_error(&hci->master.dev,
+			if (dma_mapping_error(rings->sysdev,
 					      xfer->data_dma)) {
 				hci_dma_free_safe_xfer_buf(hci, xfer);
 				hci_dma_unmap_xfer(hci, xfer_list, i);
@@ -631,6 +653,7 @@ static void hci_dma_recycle_ibi_slot(struct i3c_hci *hci,
 
 static void hci_dma_process_ibi(struct i3c_hci *hci, struct hci_rh_data *rh)
 {
+	struct hci_rings_data *rings = hci->io_data;
 	struct i3c_dev_desc *dev;
 	struct i3c_hci_dev_data *dev_data;
 	struct hci_dma_dev_ibi_data *dev_ibi;
@@ -741,7 +764,7 @@ static void hci_dma_process_ibi(struct i3c_hci *hci, struct hci_rh_data *rh)
 			* rh->ibi_chunk_sz;
 	if (first_part > ibi_size)
 		first_part = ibi_size;
-	dma_sync_single_for_cpu(&hci->master.dev, ring_ibi_data_dma,
+	dma_sync_single_for_cpu(rings->sysdev, ring_ibi_data_dma,
 				first_part, DMA_FROM_DEVICE);
 	memcpy(slot->data, ring_ibi_data, first_part);
 
@@ -750,7 +773,7 @@ static void hci_dma_process_ibi(struct i3c_hci *hci, struct hci_rh_data *rh)
 		/* we wrap back to the start and copy remaining data */
 		ring_ibi_data = rh->ibi_data;
 		ring_ibi_data_dma = rh->ibi_data_dma;
-		dma_sync_single_for_cpu(&hci->master.dev, ring_ibi_data_dma,
+		dma_sync_single_for_cpu(rings->sysdev, ring_ibi_data_dma,
 					ibi_size - first_part, DMA_FROM_DEVICE);
 		memcpy(slot->data + first_part, ring_ibi_data,
 		       ibi_size - first_part);
-- 
2.47.2


-- 
linux-i3c mailing list
linux-i3c@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-i3c

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH 3/3] i3c: mipi-i3c-hci: Use own DMA bounce buffer management for I2C transfers
  2025-06-04 12:55 [PATCH 1/3] i3c: mipi-i3c-hci: Make bounce buffer code generic to all DMA transfers Jarkko Nikula
  2025-06-04 12:55 ` [PATCH 2/3] i3c: mipi-i3c-hci: Use physical device pointer with DMA API Jarkko Nikula
@ 2025-06-04 12:55 ` Jarkko Nikula
  2025-06-04 15:00 ` [PATCH 1/3] i3c: mipi-i3c-hci: Make bounce buffer code generic to all DMA transfers Frank Li
  2 siblings, 0 replies; 17+ messages in thread
From: Jarkko Nikula @ 2025-06-04 12:55 UTC (permalink / raw)
  To: linux-i3c; +Cc: Alexandre Belloni, Frank Li, Jarkko Nikula, Billy Tsai

Stop using I2C DMA-safe API for two reasons:
- Not needed if driver is using PIO mode.
- DMA transfers needs a properly sized receive bounce buffer when the
  device DMA is IOMMU mapped causing needless double bounce buffering in
  that case.

Cc: Billy Tsai <billy_tsai@aspeedtech.com>
Signed-off-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
---
 drivers/i3c/master/mipi-i3c-hci/core.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/i3c/master/mipi-i3c-hci/core.c b/drivers/i3c/master/mipi-i3c-hci/core.c
index 24c5e7d5b439..fa4c0b826345 100644
--- a/drivers/i3c/master/mipi-i3c-hci/core.c
+++ b/drivers/i3c/master/mipi-i3c-hci/core.c
@@ -348,7 +348,7 @@ static int i3c_hci_i2c_xfers(struct i2c_dev_desc *dev,
 		return -ENOMEM;
 
 	for (i = 0; i < nxfers; i++) {
-		xfer[i].data = i2c_get_dma_safe_msg_buf(&i2c_xfers[i], 1);
+		xfer[i].data = i2c_xfers[i].buf;
 		xfer[i].data_len = i2c_xfers[i].len;
 		xfer[i].rnw = i2c_xfers[i].flags & I2C_M_RD;
 		hci->cmd->prep_i2c_xfer(hci, dev, &xfer[i]);
@@ -374,10 +374,6 @@ static int i3c_hci_i2c_xfers(struct i2c_dev_desc *dev,
 	}
 
 out:
-	for (i = 0; i < nxfers; i++)
-		i2c_put_dma_safe_msg_buf(xfer[i].data, &i2c_xfers[i],
-					 ret ? false : true);
-
 	hci_free_xfer(xfer, nxfers);
 	return ret;
 }
-- 
2.47.2


-- 
linux-i3c mailing list
linux-i3c@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-i3c

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH 1/3] i3c: mipi-i3c-hci: Make bounce buffer code generic to all DMA transfers
  2025-06-04 12:55 [PATCH 1/3] i3c: mipi-i3c-hci: Make bounce buffer code generic to all DMA transfers Jarkko Nikula
  2025-06-04 12:55 ` [PATCH 2/3] i3c: mipi-i3c-hci: Use physical device pointer with DMA API Jarkko Nikula
  2025-06-04 12:55 ` [PATCH 3/3] i3c: mipi-i3c-hci: Use own DMA bounce buffer management for I2C transfers Jarkko Nikula
@ 2025-06-04 15:00 ` Frank Li
  2025-06-05 14:07   ` Jarkko Nikula
  2 siblings, 1 reply; 17+ messages in thread
From: Frank Li @ 2025-06-04 15:00 UTC (permalink / raw)
  To: Jarkko Nikula; +Cc: linux-i3c, Alexandre Belloni

On Wed, Jun 04, 2025 at 03:55:11PM +0300, Jarkko Nikula wrote:
> Move DMA bounce buffer code for I3C private transfers to be generic for
> all DMA transfers, and round up the receive bounce buffer size to a
> multiple of DWORDs.
>
> It was observed that when the device DMA is IOMMU mapped and the receive
> length is not a multiple of DWORDs, the last DWORD is padded with stale
> data from the RX FIFO, corrupting 1-3 bytes beyond the expected data.
>
> A similar issue, though less severe, occurs when an I3C target returns
> less data than requested. In this case, the padding does not exceed the
> requested number of bytes, assuming the device DMA is not IOMMU mapped.
>
> Therefore, all I3C private transfer, CCC command payload and I2C
> transfer receive buffers must be properly sized for the DMA being IOMMU
> mapped. Even if those buffers are already DMA safe, their size may not
> be, and I don't have a clear idea how to guarantee this other than
> using a local bounce buffer.
>
> To prepare for the device DMA being IOMMU mapped and to address the
> above issue, implement a local, properly sized bounce buffer for all
> DMA transfers. For now, allocate it only when the buffer is in the
> vmalloc() area to avoid unnecessary copying with CCC commands and
> DMA-safe I2C transfers.
>
> Signed-off-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
> ---
>  drivers/i3c/master/mipi-i3c-hci/core.c | 34 -------------------
>  drivers/i3c/master/mipi-i3c-hci/dma.c  | 47 +++++++++++++++++++++++++-
>  2 files changed, 46 insertions(+), 35 deletions(-)
>
> diff --git a/drivers/i3c/master/mipi-i3c-hci/core.c b/drivers/i3c/master/mipi-i3c-hci/core.c
> index bc4538694540..24c5e7d5b439 100644
> --- a/drivers/i3c/master/mipi-i3c-hci/core.c
> +++ b/drivers/i3c/master/mipi-i3c-hci/core.c
> @@ -272,34 +272,6 @@ static int i3c_hci_daa(struct i3c_master_controller *m)
>  	return hci->cmd->perform_daa(hci);
>  }
>
...
>  }
>
> +static void *hci_dma_alloc_safe_xfer_buf(struct i3c_hci *hci,
> +					 struct hci_xfer *xfer)
> +{
> +	if (!is_vmalloc_addr(xfer->data))
> +		return xfer->data;
> +
> +	if (xfer->rnw)
> +		/*
> +		 * Round up the receive bounce buffer length to a multiple of
> +		 * DWORDs. Independently of buffer alignment, DMA_FROM_DEVICE
> +		 * transfers may corrupt the last DWORD when transfer length is
> +		 * not a multiple of DWORDs. This was observed when the device
> +		 * DMA is IOMMU mapped or when an I3C target device returns
> +		 * less data than requested. Latter case is less severe and
> +		 * does not exceed the requested number of bytes, assuming the
> +		 * device DMA is not IOMMU mapped.
> +		 */
> +		xfer->bounce_buf = kzalloc(ALIGN(xfer->data_len, 4),
> +					   GFP_KERNEL);
> +	else
> +		xfer->bounce_buf = kmemdup(xfer->data, xfer->data_len,
> +					   GFP_KERNEL);
> +
> +	return xfer->bounce_buf;

Why need use bounce_buf? why not use dma_map_single()?, which will check
alignment and size to decide if use swiotlb as bounce buffer.

Frank

> +}
> +
> +static void hci_dma_free_safe_xfer_buf(struct i3c_hci *hci,
> +				       struct hci_xfer *xfer)
> +{
> +	if (xfer->bounce_buf == NULL)
> +		return;
> +
> +	if (xfer->rnw)
> +		memcpy(xfer->data, xfer->bounce_buf, xfer->data_len);
> +
> +	kfree(xfer->bounce_buf);
> +}
> +
>  static void hci_dma_unmap_xfer(struct i3c_hci *hci,
>  			       struct hci_xfer *xfer_list, unsigned int n)
>  {
> @@ -352,6 +390,7 @@ static void hci_dma_unmap_xfer(struct i3c_hci *hci,
>  		dma_unmap_single(&hci->master.dev,
>  				 xfer->data_dma, xfer->data_len,
>  				 xfer->rnw ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
> +		hci_dma_free_safe_xfer_buf(hci, xfer);
>  	}
>  }
>
> @@ -391,7 +430,12 @@ static int hci_dma_queue_xfer(struct i3c_hci *hci,
>
>  		/* 2nd and 3rd words of Data Buffer Descriptor Structure */
>  		if (xfer->data) {
> -			buf = xfer->bounce_buf ? xfer->bounce_buf : xfer->data;
> +			buf = hci_dma_alloc_safe_xfer_buf(hci, xfer);
> +			if (buf == NULL) {
> +				hci_dma_unmap_xfer(hci, xfer_list, i);
> +				return -ENOMEM;
> +			}
> +
>  			xfer->data_dma =
>  				dma_map_single(&hci->master.dev,
>  					       buf,
> @@ -401,6 +445,7 @@ static int hci_dma_queue_xfer(struct i3c_hci *hci,
>  						  DMA_TO_DEVICE);
>  			if (dma_mapping_error(&hci->master.dev,
>  					      xfer->data_dma)) {
> +				hci_dma_free_safe_xfer_buf(hci, xfer);
>  				hci_dma_unmap_xfer(hci, xfer_list, i);
>  				return -ENOMEM;
>  			}
> --
> 2.47.2
>
>
> --
> linux-i3c mailing list
> linux-i3c@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-i3c

-- 
linux-i3c mailing list
linux-i3c@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-i3c

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 1/3] i3c: mipi-i3c-hci: Make bounce buffer code generic to all DMA transfers
  2025-06-04 15:00 ` [PATCH 1/3] i3c: mipi-i3c-hci: Make bounce buffer code generic to all DMA transfers Frank Li
@ 2025-06-05 14:07   ` Jarkko Nikula
  2025-06-05 15:13     ` Frank Li
  0 siblings, 1 reply; 17+ messages in thread
From: Jarkko Nikula @ 2025-06-05 14:07 UTC (permalink / raw)
  To: Frank Li; +Cc: linux-i3c, Alexandre Belloni

Hi

On 6/4/25 6:00 PM, Frank Li wrote:
> On Wed, Jun 04, 2025 at 03:55:11PM +0300, Jarkko Nikula wrote:
>> Move DMA bounce buffer code for I3C private transfers to be generic for
>> all DMA transfers, and round up the receive bounce buffer size to a
>> multiple of DWORDs.
>>
>> It was observed that when the device DMA is IOMMU mapped and the receive
>> length is not a multiple of DWORDs, the last DWORD is padded with stale
>> data from the RX FIFO, corrupting 1-3 bytes beyond the expected data.
>>
>> A similar issue, though less severe, occurs when an I3C target returns
>> less data than requested. In this case, the padding does not exceed the
>> requested number of bytes, assuming the device DMA is not IOMMU mapped.
>>
>> Therefore, all I3C private transfer, CCC command payload and I2C
>> transfer receive buffers must be properly sized for the DMA being IOMMU
>> mapped. Even if those buffers are already DMA safe, their size may not
>> be, and I don't have a clear idea how to guarantee this other than
>> using a local bounce buffer.
>>
>> To prepare for the device DMA being IOMMU mapped and to address the
>> above issue, implement a local, properly sized bounce buffer for all
>> DMA transfers. For now, allocate it only when the buffer is in the
>> vmalloc() area to avoid unnecessary copying with CCC commands and
>> DMA-safe I2C transfers.
>>
>> Signed-off-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
>> ---
>>   drivers/i3c/master/mipi-i3c-hci/core.c | 34 -------------------
>>   drivers/i3c/master/mipi-i3c-hci/dma.c  | 47 +++++++++++++++++++++++++-
>>   2 files changed, 46 insertions(+), 35 deletions(-)
>>
>> diff --git a/drivers/i3c/master/mipi-i3c-hci/core.c b/drivers/i3c/master/mipi-i3c-hci/core.c
>> index bc4538694540..24c5e7d5b439 100644
>> --- a/drivers/i3c/master/mipi-i3c-hci/core.c
>> +++ b/drivers/i3c/master/mipi-i3c-hci/core.c
>> @@ -272,34 +272,6 @@ static int i3c_hci_daa(struct i3c_master_controller *m)
>>   	return hci->cmd->perform_daa(hci);
>>   }
>>
> ...
>>   }
>>
>> +static void *hci_dma_alloc_safe_xfer_buf(struct i3c_hci *hci,
>> +					 struct hci_xfer *xfer)
>> +{
>> +	if (!is_vmalloc_addr(xfer->data))
>> +		return xfer->data;
>> +
>> +	if (xfer->rnw)
>> +		/*
>> +		 * Round up the receive bounce buffer length to a multiple of
>> +		 * DWORDs. Independently of buffer alignment, DMA_FROM_DEVICE
>> +		 * transfers may corrupt the last DWORD when transfer length is
>> +		 * not a multiple of DWORDs. This was observed when the device
>> +		 * DMA is IOMMU mapped or when an I3C target device returns
>> +		 * less data than requested. Latter case is less severe and
>> +		 * does not exceed the requested number of bytes, assuming the
>> +		 * device DMA is not IOMMU mapped.
>> +		 */
>> +		xfer->bounce_buf = kzalloc(ALIGN(xfer->data_len, 4),
>> +					   GFP_KERNEL);
>> +	else
>> +		xfer->bounce_buf = kmemdup(xfer->data, xfer->data_len,
>> +					   GFP_KERNEL);
>> +
>> +	return xfer->bounce_buf;
> 
> Why need use bounce_buf? why not use dma_map_single()?, which will check
> alignment and size to decide if use swiotlb as bounce buffer.
> 
We do pass the buffer to the dma_map_single(). I've understood swiotlb 
is transparently used when the DMA cannot directly access the memory but 
that's not the case here.

-- 
linux-i3c mailing list
linux-i3c@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-i3c

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 1/3] i3c: mipi-i3c-hci: Make bounce buffer code generic to all DMA transfers
  2025-06-05 14:07   ` Jarkko Nikula
@ 2025-06-05 15:13     ` Frank Li
  2025-06-06  7:16       ` Jarkko Nikula
  0 siblings, 1 reply; 17+ messages in thread
From: Frank Li @ 2025-06-05 15:13 UTC (permalink / raw)
  To: Jarkko Nikula; +Cc: linux-i3c, Alexandre Belloni

On Thu, Jun 05, 2025 at 05:07:19PM +0300, Jarkko Nikula wrote:
> Hi
>
> On 6/4/25 6:00 PM, Frank Li wrote:
> > On Wed, Jun 04, 2025 at 03:55:11PM +0300, Jarkko Nikula wrote:
> > > Move DMA bounce buffer code for I3C private transfers to be generic for
> > > all DMA transfers, and round up the receive bounce buffer size to a
> > > multiple of DWORDs.
> > >
> > > It was observed that when the device DMA is IOMMU mapped and the receive
> > > length is not a multiple of DWORDs, the last DWORD is padded with stale
> > > data from the RX FIFO, corrupting 1-3 bytes beyond the expected data.
> > >
> > > A similar issue, though less severe, occurs when an I3C target returns
> > > less data than requested. In this case, the padding does not exceed the
> > > requested number of bytes, assuming the device DMA is not IOMMU mapped.
> > >
> > > Therefore, all I3C private transfer, CCC command payload and I2C
> > > transfer receive buffers must be properly sized for the DMA being IOMMU
> > > mapped. Even if those buffers are already DMA safe, their size may not
> > > be, and I don't have a clear idea how to guarantee this other than
> > > using a local bounce buffer.
> > >
> > > To prepare for the device DMA being IOMMU mapped and to address the
> > > above issue, implement a local, properly sized bounce buffer for all
> > > DMA transfers. For now, allocate it only when the buffer is in the
> > > vmalloc() area to avoid unnecessary copying with CCC commands and
> > > DMA-safe I2C transfers.
> > >
> > > Signed-off-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
> > > ---
> > >   drivers/i3c/master/mipi-i3c-hci/core.c | 34 -------------------
> > >   drivers/i3c/master/mipi-i3c-hci/dma.c  | 47 +++++++++++++++++++++++++-
> > >   2 files changed, 46 insertions(+), 35 deletions(-)
> > >
> > > diff --git a/drivers/i3c/master/mipi-i3c-hci/core.c b/drivers/i3c/master/mipi-i3c-hci/core.c
> > > index bc4538694540..24c5e7d5b439 100644
> > > --- a/drivers/i3c/master/mipi-i3c-hci/core.c
> > > +++ b/drivers/i3c/master/mipi-i3c-hci/core.c
> > > @@ -272,34 +272,6 @@ static int i3c_hci_daa(struct i3c_master_controller *m)
> > >   	return hci->cmd->perform_daa(hci);
> > >   }
> > >
> > ...
> > >   }
> > >
> > > +static void *hci_dma_alloc_safe_xfer_buf(struct i3c_hci *hci,
> > > +					 struct hci_xfer *xfer)
> > > +{
> > > +	if (!is_vmalloc_addr(xfer->data))
> > > +		return xfer->data;
> > > +
> > > +	if (xfer->rnw)
> > > +		/*
> > > +		 * Round up the receive bounce buffer length to a multiple of
> > > +		 * DWORDs. Independently of buffer alignment, DMA_FROM_DEVICE
> > > +		 * transfers may corrupt the last DWORD when transfer length is
> > > +		 * not a multiple of DWORDs. This was observed when the device
> > > +		 * DMA is IOMMU mapped or when an I3C target device returns
> > > +		 * less data than requested. Latter case is less severe and
> > > +		 * does not exceed the requested number of bytes, assuming the
> > > +		 * device DMA is not IOMMU mapped.
> > > +		 */
> > > +		xfer->bounce_buf = kzalloc(ALIGN(xfer->data_len, 4),
> > > +					   GFP_KERNEL);
> > > +	else
> > > +		xfer->bounce_buf = kmemdup(xfer->data, xfer->data_len,
> > > +					   GFP_KERNEL);
> > > +
> > > +	return xfer->bounce_buf;
> >
> > Why need use bounce_buf? why not use dma_map_single()?, which will check
> > alignment and size to decide if use swiotlb as bounce buffer.
> >
> We do pass the buffer to the dma_map_single(). I've understood swiotlb is
> transparently used when the DMA cannot directly access the memory but that's
> not the case here.

why not? even though you have to use internal buf, why not use
dma_alloc_coherent().

Frank
>
> --
> linux-i3c mailing list
> linux-i3c@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-i3c

-- 
linux-i3c mailing list
linux-i3c@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-i3c

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 1/3] i3c: mipi-i3c-hci: Make bounce buffer code generic to all DMA transfers
  2025-06-05 15:13     ` Frank Li
@ 2025-06-06  7:16       ` Jarkko Nikula
  2025-06-06 15:02         ` Frank Li
  0 siblings, 1 reply; 17+ messages in thread
From: Jarkko Nikula @ 2025-06-06  7:16 UTC (permalink / raw)
  To: Frank Li; +Cc: linux-i3c, Alexandre Belloni

Hi

On 6/5/25 6:13 PM, Frank Li wrote:
> On Thu, Jun 05, 2025 at 05:07:19PM +0300, Jarkko Nikula wrote:
>> Hi
>>
>> On 6/4/25 6:00 PM, Frank Li wrote:
>>> On Wed, Jun 04, 2025 at 03:55:11PM +0300, Jarkko Nikula wrote:
>>>> Move DMA bounce buffer code for I3C private transfers to be generic for
>>>> all DMA transfers, and round up the receive bounce buffer size to a
>>>> multiple of DWORDs.
>>>>
>>>> It was observed that when the device DMA is IOMMU mapped and the receive
>>>> length is not a multiple of DWORDs, the last DWORD is padded with stale
>>>> data from the RX FIFO, corrupting 1-3 bytes beyond the expected data.
>>>>
>>>> A similar issue, though less severe, occurs when an I3C target returns
>>>> less data than requested. In this case, the padding does not exceed the
>>>> requested number of bytes, assuming the device DMA is not IOMMU mapped.
>>>>
>>>> Therefore, all I3C private transfer, CCC command payload and I2C
>>>> transfer receive buffers must be properly sized for the DMA being IOMMU
>>>> mapped. Even if those buffers are already DMA safe, their size may not
>>>> be, and I don't have a clear idea how to guarantee this other than
>>>> using a local bounce buffer.
>>>>
>>>> To prepare for the device DMA being IOMMU mapped and to address the
>>>> above issue, implement a local, properly sized bounce buffer for all
>>>> DMA transfers. For now, allocate it only when the buffer is in the
>>>> vmalloc() area to avoid unnecessary copying with CCC commands and
>>>> DMA-safe I2C transfers.
>>>>
>>>> Signed-off-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
>>>> ---
>>>>    drivers/i3c/master/mipi-i3c-hci/core.c | 34 -------------------
>>>>    drivers/i3c/master/mipi-i3c-hci/dma.c  | 47 +++++++++++++++++++++++++-
>>>>    2 files changed, 46 insertions(+), 35 deletions(-)
>>>>
>>>> diff --git a/drivers/i3c/master/mipi-i3c-hci/core.c b/drivers/i3c/master/mipi-i3c-hci/core.c
>>>> index bc4538694540..24c5e7d5b439 100644
>>>> --- a/drivers/i3c/master/mipi-i3c-hci/core.c
>>>> +++ b/drivers/i3c/master/mipi-i3c-hci/core.c
>>>> @@ -272,34 +272,6 @@ static int i3c_hci_daa(struct i3c_master_controller *m)
>>>>    	return hci->cmd->perform_daa(hci);
>>>>    }
>>>>
>>> ...
>>>>    }
>>>>
>>>> +static void *hci_dma_alloc_safe_xfer_buf(struct i3c_hci *hci,
>>>> +					 struct hci_xfer *xfer)
>>>> +{
>>>> +	if (!is_vmalloc_addr(xfer->data))
>>>> +		return xfer->data;
>>>> +
>>>> +	if (xfer->rnw)
>>>> +		/*
>>>> +		 * Round up the receive bounce buffer length to a multiple of
>>>> +		 * DWORDs. Independently of buffer alignment, DMA_FROM_DEVICE
>>>> +		 * transfers may corrupt the last DWORD when transfer length is
>>>> +		 * not a multiple of DWORDs. This was observed when the device
>>>> +		 * DMA is IOMMU mapped or when an I3C target device returns
>>>> +		 * less data than requested. Latter case is less severe and
>>>> +		 * does not exceed the requested number of bytes, assuming the
>>>> +		 * device DMA is not IOMMU mapped.
>>>> +		 */
>>>> +		xfer->bounce_buf = kzalloc(ALIGN(xfer->data_len, 4),
>>>> +					   GFP_KERNEL);
>>>> +	else
>>>> +		xfer->bounce_buf = kmemdup(xfer->data, xfer->data_len,
>>>> +					   GFP_KERNEL);
>>>> +
>>>> +	return xfer->bounce_buf;
>>>
>>> Why need use bounce_buf? why not use dma_map_single()?, which will check
>>> alignment and size to decide if use swiotlb as bounce buffer.
>>>
>> We do pass the buffer to the dma_map_single(). I've understood swiotlb is
>> transparently used when the DMA cannot directly access the memory but that's
>> not the case here.
> 
> why not? even though you have to use internal buf, why not use
> dma_alloc_coherent().
> 
I don't think it's suitable for "smallish" per transfer 
allocations/mappings and buffer is not accessed concurrently by the CPU 
and DMA during the transfer.

-- 
linux-i3c mailing list
linux-i3c@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-i3c

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 1/3] i3c: mipi-i3c-hci: Make bounce buffer code generic to all DMA transfers
  2025-06-06  7:16       ` Jarkko Nikula
@ 2025-06-06 15:02         ` Frank Li
  2025-06-09 14:15           ` Jarkko Nikula
  0 siblings, 1 reply; 17+ messages in thread
From: Frank Li @ 2025-06-06 15:02 UTC (permalink / raw)
  To: Jarkko Nikula; +Cc: linux-i3c, Alexandre Belloni

On Fri, Jun 06, 2025 at 10:16:44AM +0300, Jarkko Nikula wrote:
> Hi
>
> On 6/5/25 6:13 PM, Frank Li wrote:
> > On Thu, Jun 05, 2025 at 05:07:19PM +0300, Jarkko Nikula wrote:
> > > Hi
> > >
> > > On 6/4/25 6:00 PM, Frank Li wrote:
> > > > On Wed, Jun 04, 2025 at 03:55:11PM +0300, Jarkko Nikula wrote:
> > > > > Move DMA bounce buffer code for I3C private transfers to be generic for
> > > > > all DMA transfers, and round up the receive bounce buffer size to a
> > > > > multiple of DWORDs.
> > > > >
> > > > > It was observed that when the device DMA is IOMMU mapped and the receive
> > > > > length is not a multiple of DWORDs, the last DWORD is padded with stale
> > > > > data from the RX FIFO, corrupting 1-3 bytes beyond the expected data.
> > > > >
> > > > > A similar issue, though less severe, occurs when an I3C target returns
> > > > > less data than requested. In this case, the padding does not exceed the
> > > > > requested number of bytes, assuming the device DMA is not IOMMU mapped.
> > > > >
> > > > > Therefore, all I3C private transfer, CCC command payload and I2C
> > > > > transfer receive buffers must be properly sized for the DMA being IOMMU
> > > > > mapped. Even if those buffers are already DMA safe, their size may not
> > > > > be, and I don't have a clear idea how to guarantee this other than
> > > > > using a local bounce buffer.
> > > > >
> > > > > To prepare for the device DMA being IOMMU mapped and to address the
> > > > > above issue, implement a local, properly sized bounce buffer for all
> > > > > DMA transfers. For now, allocate it only when the buffer is in the
> > > > > vmalloc() area to avoid unnecessary copying with CCC commands and
> > > > > DMA-safe I2C transfers.
> > > > >
> > > > > Signed-off-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
> > > > > ---
> > > > >    drivers/i3c/master/mipi-i3c-hci/core.c | 34 -------------------
> > > > >    drivers/i3c/master/mipi-i3c-hci/dma.c  | 47 +++++++++++++++++++++++++-
> > > > >    2 files changed, 46 insertions(+), 35 deletions(-)
> > > > >
> > > > > diff --git a/drivers/i3c/master/mipi-i3c-hci/core.c b/drivers/i3c/master/mipi-i3c-hci/core.c
> > > > > index bc4538694540..24c5e7d5b439 100644
> > > > > --- a/drivers/i3c/master/mipi-i3c-hci/core.c
> > > > > +++ b/drivers/i3c/master/mipi-i3c-hci/core.c
> > > > > @@ -272,34 +272,6 @@ static int i3c_hci_daa(struct i3c_master_controller *m)
> > > > >    	return hci->cmd->perform_daa(hci);
> > > > >    }
> > > > >
> > > > ...
> > > > >    }
> > > > >
> > > > > +static void *hci_dma_alloc_safe_xfer_buf(struct i3c_hci *hci,
> > > > > +					 struct hci_xfer *xfer)
> > > > > +{
> > > > > +	if (!is_vmalloc_addr(xfer->data))
> > > > > +		return xfer->data;
> > > > > +
> > > > > +	if (xfer->rnw)
> > > > > +		/*
> > > > > +		 * Round up the receive bounce buffer length to a multiple of
> > > > > +		 * DWORDs. Independently of buffer alignment, DMA_FROM_DEVICE
> > > > > +		 * transfers may corrupt the last DWORD when transfer length is
> > > > > +		 * not a multiple of DWORDs. This was observed when the device
> > > > > +		 * DMA is IOMMU mapped or when an I3C target device returns
> > > > > +		 * less data than requested. Latter case is less severe and
> > > > > +		 * does not exceed the requested number of bytes, assuming the
> > > > > +		 * device DMA is not IOMMU mapped.
> > > > > +		 */
> > > > > +		xfer->bounce_buf = kzalloc(ALIGN(xfer->data_len, 4),
> > > > > +					   GFP_KERNEL);
> > > > > +	else
> > > > > +		xfer->bounce_buf = kmemdup(xfer->data, xfer->data_len,
> > > > > +					   GFP_KERNEL);
> > > > > +
> > > > > +	return xfer->bounce_buf;
> > > >
> > > > Why need use bounce_buf? why not use dma_map_single()?, which will check
> > > > alignment and size to decide if use swiotlb as bounce buffer.
> > > >
> > > We do pass the buffer to the dma_map_single(). I've understood swiotlb is
> > > transparently used when the DMA cannot directly access the memory but that's
> > > not the case here.
> >
> > why not? even though you have to use internal buf, why not use
> > dma_alloc_coherent().
> >
> I don't think it's suitable for "smallish" per transfer allocations/mappings
> and buffer is not accessed concurrently by the CPU and DMA during the
> transfer.

I still can't understand why need this special handle for i3c. Basic it use
dma transfer to transfer data. Can you simple descript hci's data flow?

Frank

>
> --
> linux-i3c mailing list
> linux-i3c@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-i3c

-- 
linux-i3c mailing list
linux-i3c@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-i3c

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 1/3] i3c: mipi-i3c-hci: Make bounce buffer code generic to all DMA transfers
  2025-06-06 15:02         ` Frank Li
@ 2025-06-09 14:15           ` Jarkko Nikula
  2025-06-09 15:04             ` Frank Li
  0 siblings, 1 reply; 17+ messages in thread
From: Jarkko Nikula @ 2025-06-09 14:15 UTC (permalink / raw)
  To: Frank Li; +Cc: linux-i3c, Alexandre Belloni

On 6/6/25 6:02 PM, Frank Li wrote:
> On Fri, Jun 06, 2025 at 10:16:44AM +0300, Jarkko Nikula wrote:
>> Hi
>>
>> On 6/5/25 6:13 PM, Frank Li wrote:
>>> On Thu, Jun 05, 2025 at 05:07:19PM +0300, Jarkko Nikula wrote:
>>>> Hi
>>>>
>>>> On 6/4/25 6:00 PM, Frank Li wrote:
>>>>> On Wed, Jun 04, 2025 at 03:55:11PM +0300, Jarkko Nikula wrote:
>>>>>> Move DMA bounce buffer code for I3C private transfers to be generic for
>>>>>> all DMA transfers, and round up the receive bounce buffer size to a
>>>>>> multiple of DWORDs.
>>>>>>
>>>>>> It was observed that when the device DMA is IOMMU mapped and the receive
>>>>>> length is not a multiple of DWORDs, the last DWORD is padded with stale
>>>>>> data from the RX FIFO, corrupting 1-3 bytes beyond the expected data.
>>>>>>
>>>>>> A similar issue, though less severe, occurs when an I3C target returns
>>>>>> less data than requested. In this case, the padding does not exceed the
>>>>>> requested number of bytes, assuming the device DMA is not IOMMU mapped.
>>>>>>
>>>>>> Therefore, all I3C private transfer, CCC command payload and I2C
>>>>>> transfer receive buffers must be properly sized for the DMA being IOMMU
>>>>>> mapped. Even if those buffers are already DMA safe, their size may not
>>>>>> be, and I don't have a clear idea how to guarantee this other than
>>>>>> using a local bounce buffer.
>>>>>>
>>>>>> To prepare for the device DMA being IOMMU mapped and to address the
>>>>>> above issue, implement a local, properly sized bounce buffer for all
>>>>>> DMA transfers. For now, allocate it only when the buffer is in the
>>>>>> vmalloc() area to avoid unnecessary copying with CCC commands and
>>>>>> DMA-safe I2C transfers.
>>>>>>
>>>>>> Signed-off-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
>>>>>> ---
>>>>>>     drivers/i3c/master/mipi-i3c-hci/core.c | 34 -------------------
>>>>>>     drivers/i3c/master/mipi-i3c-hci/dma.c  | 47 +++++++++++++++++++++++++-
>>>>>>     2 files changed, 46 insertions(+), 35 deletions(-)
>>>>>>
>>>>>> diff --git a/drivers/i3c/master/mipi-i3c-hci/core.c b/drivers/i3c/master/mipi-i3c-hci/core.c
>>>>>> index bc4538694540..24c5e7d5b439 100644
>>>>>> --- a/drivers/i3c/master/mipi-i3c-hci/core.c
>>>>>> +++ b/drivers/i3c/master/mipi-i3c-hci/core.c
>>>>>> @@ -272,34 +272,6 @@ static int i3c_hci_daa(struct i3c_master_controller *m)
>>>>>>     	return hci->cmd->perform_daa(hci);
>>>>>>     }
>>>>>>
>>>>> ...
>>>>>>     }
>>>>>>
>>>>>> +static void *hci_dma_alloc_safe_xfer_buf(struct i3c_hci *hci,
>>>>>> +					 struct hci_xfer *xfer)
>>>>>> +{
>>>>>> +	if (!is_vmalloc_addr(xfer->data))
>>>>>> +		return xfer->data;
>>>>>> +
>>>>>> +	if (xfer->rnw)
>>>>>> +		/*
>>>>>> +		 * Round up the receive bounce buffer length to a multiple of
>>>>>> +		 * DWORDs. Independently of buffer alignment, DMA_FROM_DEVICE
>>>>>> +		 * transfers may corrupt the last DWORD when transfer length is
>>>>>> +		 * not a multiple of DWORDs. This was observed when the device
>>>>>> +		 * DMA is IOMMU mapped or when an I3C target device returns
>>>>>> +		 * less data than requested. Latter case is less severe and
>>>>>> +		 * does not exceed the requested number of bytes, assuming the
>>>>>> +		 * device DMA is not IOMMU mapped.
>>>>>> +		 */
>>>>>> +		xfer->bounce_buf = kzalloc(ALIGN(xfer->data_len, 4),
>>>>>> +					   GFP_KERNEL);
>>>>>> +	else
>>>>>> +		xfer->bounce_buf = kmemdup(xfer->data, xfer->data_len,
>>>>>> +					   GFP_KERNEL);
>>>>>> +
>>>>>> +	return xfer->bounce_buf;
>>>>>
>>>>> Why need use bounce_buf? why not use dma_map_single()?, which will check
>>>>> alignment and size to decide if use swiotlb as bounce buffer.
>>>>>
>>>> We do pass the buffer to the dma_map_single(). I've understood swiotlb is
>>>> transparently used when the DMA cannot directly access the memory but that's
>>>> not the case here.
>>>
>>> why not? even though you have to use internal buf, why not use
>>> dma_alloc_coherent().
>>>
>> I don't think it's suitable for "smallish" per transfer allocations/mappings
>> and buffer is not accessed concurrently by the CPU and DMA during the
>> transfer.
> 
> I still can't understand why need this special handle for i3c. Basic it use
> dma transfer to transfer data. Can you simple descript hci's data flow?
> 
Unfortunately my knowledge doesn't go deeply enough in HW but I could 
guess it's somewhere in MIPI I3C HCI DMA engine implementation and/or 
HCI IP to memory bridge/bus integration.

If it would be a cache line issue then I would think we would see 
corruption independently is the DMA IOMMU mapped or not and in larger 
number of bytes than 1-3 last bytes.

Also another finding that if I3C target returns less data than expected 
then there is also padding in the last DWORD with the real data. But not 
more than requested, i.e. 2 bytes expected, target returns 1, only the 
2nd byte contains stale data and bytes 3-4 untouched. Or 8 bytes 
expected, target returns 1, bytes 2-4 contain stale data and bytes 5-8 
untouched.

So something around trailing bytes handling in the HW when the transfer 
length is not multiple of DWORDs as far as I could guess.

-- 
linux-i3c mailing list
linux-i3c@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-i3c

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 1/3] i3c: mipi-i3c-hci: Make bounce buffer code generic to all DMA transfers
  2025-06-09 14:15           ` Jarkko Nikula
@ 2025-06-09 15:04             ` Frank Li
  2025-06-10 13:42               ` Jarkko Nikula
  0 siblings, 1 reply; 17+ messages in thread
From: Frank Li @ 2025-06-09 15:04 UTC (permalink / raw)
  To: Jarkko Nikula; +Cc: linux-i3c, Alexandre Belloni

On Mon, Jun 09, 2025 at 05:15:44PM +0300, Jarkko Nikula wrote:
> On 6/6/25 6:02 PM, Frank Li wrote:
> > On Fri, Jun 06, 2025 at 10:16:44AM +0300, Jarkko Nikula wrote:
> > > Hi
> > >
> > > On 6/5/25 6:13 PM, Frank Li wrote:
> > > > On Thu, Jun 05, 2025 at 05:07:19PM +0300, Jarkko Nikula wrote:
> > > > > Hi
> > > > >
> > > > > On 6/4/25 6:00 PM, Frank Li wrote:
> > > > > > On Wed, Jun 04, 2025 at 03:55:11PM +0300, Jarkko Nikula wrote:
> > > > > > > Move DMA bounce buffer code for I3C private transfers to be generic for
> > > > > > > all DMA transfers, and round up the receive bounce buffer size to a
> > > > > > > multiple of DWORDs.
> > > > > > >
> > > > > > > It was observed that when the device DMA is IOMMU mapped and the receive
> > > > > > > length is not a multiple of DWORDs, the last DWORD is padded with stale
> > > > > > > data from the RX FIFO, corrupting 1-3 bytes beyond the expected data.
> > > > > > >
> > > > > > > A similar issue, though less severe, occurs when an I3C target returns
> > > > > > > less data than requested. In this case, the padding does not exceed the
> > > > > > > requested number of bytes, assuming the device DMA is not IOMMU mapped.
> > > > > > >
> > > > > > > Therefore, all I3C private transfer, CCC command payload and I2C
> > > > > > > transfer receive buffers must be properly sized for the DMA being IOMMU
> > > > > > > mapped. Even if those buffers are already DMA safe, their size may not
> > > > > > > be, and I don't have a clear idea how to guarantee this other than
> > > > > > > using a local bounce buffer.
> > > > > > >
> > > > > > > To prepare for the device DMA being IOMMU mapped and to address the
> > > > > > > above issue, implement a local, properly sized bounce buffer for all
> > > > > > > DMA transfers. For now, allocate it only when the buffer is in the
> > > > > > > vmalloc() area to avoid unnecessary copying with CCC commands and
> > > > > > > DMA-safe I2C transfers.
> > > > > > >
> > > > > > > Signed-off-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
> > > > > > > ---
> > > > > > >     drivers/i3c/master/mipi-i3c-hci/core.c | 34 -------------------
> > > > > > >     drivers/i3c/master/mipi-i3c-hci/dma.c  | 47 +++++++++++++++++++++++++-
> > > > > > >     2 files changed, 46 insertions(+), 35 deletions(-)
> > > > > > >
> > > > > > > diff --git a/drivers/i3c/master/mipi-i3c-hci/core.c b/drivers/i3c/master/mipi-i3c-hci/core.c
> > > > > > > index bc4538694540..24c5e7d5b439 100644
> > > > > > > --- a/drivers/i3c/master/mipi-i3c-hci/core.c
> > > > > > > +++ b/drivers/i3c/master/mipi-i3c-hci/core.c
> > > > > > > @@ -272,34 +272,6 @@ static int i3c_hci_daa(struct i3c_master_controller *m)
> > > > > > >     	return hci->cmd->perform_daa(hci);
> > > > > > >     }
> > > > > > >
> > > > > > ...
> > > > > > >     }
> > > > > > >
> > > > > > > +static void *hci_dma_alloc_safe_xfer_buf(struct i3c_hci *hci,
> > > > > > > +					 struct hci_xfer *xfer)
> > > > > > > +{
> > > > > > > +	if (!is_vmalloc_addr(xfer->data))
> > > > > > > +		return xfer->data;
> > > > > > > +
> > > > > > > +	if (xfer->rnw)
> > > > > > > +		/*
> > > > > > > +		 * Round up the receive bounce buffer length to a multiple of
> > > > > > > +		 * DWORDs. Independently of buffer alignment, DMA_FROM_DEVICE
> > > > > > > +		 * transfers may corrupt the last DWORD when transfer length is
> > > > > > > +		 * not a multiple of DWORDs. This was observed when the device
> > > > > > > +		 * DMA is IOMMU mapped or when an I3C target device returns
> > > > > > > +		 * less data than requested. Latter case is less severe and
> > > > > > > +		 * does not exceed the requested number of bytes, assuming the
> > > > > > > +		 * device DMA is not IOMMU mapped.
> > > > > > > +		 */
> > > > > > > +		xfer->bounce_buf = kzalloc(ALIGN(xfer->data_len, 4),
> > > > > > > +					   GFP_KERNEL);
> > > > > > > +	else
> > > > > > > +		xfer->bounce_buf = kmemdup(xfer->data, xfer->data_len,
> > > > > > > +					   GFP_KERNEL);
> > > > > > > +
> > > > > > > +	return xfer->bounce_buf;
> > > > > >
> > > > > > Why need use bounce_buf? why not use dma_map_single()?, which will check
> > > > > > alignment and size to decide if use swiotlb as bounce buffer.
> > > > > >
> > > > > We do pass the buffer to the dma_map_single(). I've understood swiotlb is
> > > > > transparently used when the DMA cannot directly access the memory but that's
> > > > > not the case here.
> > > >
> > > > why not? even though you have to use internal buf, why not use
> > > > dma_alloc_coherent().
> > > >
> > > I don't think it's suitable for "smallish" per transfer allocations/mappings
> > > and buffer is not accessed concurrently by the CPU and DMA during the
> > > transfer.
> >
> > I still can't understand why need this special handle for i3c. Basic it use
> > dma transfer to transfer data. Can you simple descript hci's data flow?
> >
> Unfortunately my knowledge doesn't go deeply enough in HW but I could guess
> it's somewhere in MIPI I3C HCI DMA engine implementation and/or HCI IP to
> memory bridge/bus integration.
>
> If it would be a cache line issue then I would think we would see corruption
> independently is the DMA IOMMU mapped or not and in larger number of bytes
> than 1-3 last bytes.
>
> Also another finding that if I3C target returns less data than expected then
> there is also padding in the last DWORD with the real data. But not more
> than requested, i.e. 2 bytes expected, target returns 1, only the 2nd byte
> contains stale data and bytes 3-4 untouched. Or 8 bytes expected, target
> returns 1, bytes 2-4 contain stale data and bytes 5-8 untouched.
>
> So something around trailing bytes handling in the HW when the transfer
> length is not multiple of DWORDs as far as I could guess.

That's reason why should not manually handle dma buffer here. It is
small memory and less proformance requirement.

If you use dma provided api, you should not take care such detial. DMA
buffer at least one cache line size. otherwise, dma may currupt some data,
which is hard to find.

Frank

>
> --
> linux-i3c mailing list
> linux-i3c@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-i3c

-- 
linux-i3c mailing list
linux-i3c@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-i3c

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 1/3] i3c: mipi-i3c-hci: Make bounce buffer code generic to all DMA transfers
  2025-06-09 15:04             ` Frank Li
@ 2025-06-10 13:42               ` Jarkko Nikula
  2025-06-10 17:08                 ` Frank Li
  0 siblings, 1 reply; 17+ messages in thread
From: Jarkko Nikula @ 2025-06-10 13:42 UTC (permalink / raw)
  To: Frank Li; +Cc: linux-i3c, Alexandre Belloni

On 6/9/25 6:04 PM, Frank Li wrote:
> On Mon, Jun 09, 2025 at 05:15:44PM +0300, Jarkko Nikula wrote:
>> On 6/6/25 6:02 PM, Frank Li wrote:
>>> On Fri, Jun 06, 2025 at 10:16:44AM +0300, Jarkko Nikula wrote:
>>>> Hi
>>>>
>>>> On 6/5/25 6:13 PM, Frank Li wrote:
>>>>> On Thu, Jun 05, 2025 at 05:07:19PM +0300, Jarkko Nikula wrote:
>>>>>> Hi
>>>>>>
>>>>>> On 6/4/25 6:00 PM, Frank Li wrote:
>>>>>>> On Wed, Jun 04, 2025 at 03:55:11PM +0300, Jarkko Nikula wrote:
>>>>>>>> Move DMA bounce buffer code for I3C private transfers to be generic for
>>>>>>>> all DMA transfers, and round up the receive bounce buffer size to a
>>>>>>>> multiple of DWORDs.
>>>>>>>>
>>>>>>>> It was observed that when the device DMA is IOMMU mapped and the receive
>>>>>>>> length is not a multiple of DWORDs, the last DWORD is padded with stale
>>>>>>>> data from the RX FIFO, corrupting 1-3 bytes beyond the expected data.
>>>>>>>>
>>>>>>>> A similar issue, though less severe, occurs when an I3C target returns
>>>>>>>> less data than requested. In this case, the padding does not exceed the
>>>>>>>> requested number of bytes, assuming the device DMA is not IOMMU mapped.
>>>>>>>>
>>>>>>>> Therefore, all I3C private transfer, CCC command payload and I2C
>>>>>>>> transfer receive buffers must be properly sized for the DMA being IOMMU
>>>>>>>> mapped. Even if those buffers are already DMA safe, their size may not
>>>>>>>> be, and I don't have a clear idea how to guarantee this other than
>>>>>>>> using a local bounce buffer.
>>>>>>>>
>>>>>>>> To prepare for the device DMA being IOMMU mapped and to address the
>>>>>>>> above issue, implement a local, properly sized bounce buffer for all
>>>>>>>> DMA transfers. For now, allocate it only when the buffer is in the
>>>>>>>> vmalloc() area to avoid unnecessary copying with CCC commands and
>>>>>>>> DMA-safe I2C transfers.
>>>>>>>>
>>>>>>>> Signed-off-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
>>>>>>>> ---
>>>>>>>>      drivers/i3c/master/mipi-i3c-hci/core.c | 34 -------------------
>>>>>>>>      drivers/i3c/master/mipi-i3c-hci/dma.c  | 47 +++++++++++++++++++++++++-
>>>>>>>>      2 files changed, 46 insertions(+), 35 deletions(-)
>>>>>>>>
>>>>>>>> diff --git a/drivers/i3c/master/mipi-i3c-hci/core.c b/drivers/i3c/master/mipi-i3c-hci/core.c
>>>>>>>> index bc4538694540..24c5e7d5b439 100644
>>>>>>>> --- a/drivers/i3c/master/mipi-i3c-hci/core.c
>>>>>>>> +++ b/drivers/i3c/master/mipi-i3c-hci/core.c
>>>>>>>> @@ -272,34 +272,6 @@ static int i3c_hci_daa(struct i3c_master_controller *m)
>>>>>>>>      	return hci->cmd->perform_daa(hci);
>>>>>>>>      }
>>>>>>>>
>>>>>>> ...
>>>>>>>>      }
>>>>>>>>
>>>>>>>> +static void *hci_dma_alloc_safe_xfer_buf(struct i3c_hci *hci,
>>>>>>>> +					 struct hci_xfer *xfer)
>>>>>>>> +{
>>>>>>>> +	if (!is_vmalloc_addr(xfer->data))
>>>>>>>> +		return xfer->data;
>>>>>>>> +
>>>>>>>> +	if (xfer->rnw)
>>>>>>>> +		/*
>>>>>>>> +		 * Round up the receive bounce buffer length to a multiple of
>>>>>>>> +		 * DWORDs. Independently of buffer alignment, DMA_FROM_DEVICE
>>>>>>>> +		 * transfers may corrupt the last DWORD when transfer length is
>>>>>>>> +		 * not a multiple of DWORDs. This was observed when the device
>>>>>>>> +		 * DMA is IOMMU mapped or when an I3C target device returns
>>>>>>>> +		 * less data than requested. Latter case is less severe and
>>>>>>>> +		 * does not exceed the requested number of bytes, assuming the
>>>>>>>> +		 * device DMA is not IOMMU mapped.
>>>>>>>> +		 */
>>>>>>>> +		xfer->bounce_buf = kzalloc(ALIGN(xfer->data_len, 4),
>>>>>>>> +					   GFP_KERNEL);
>>>>>>>> +	else
>>>>>>>> +		xfer->bounce_buf = kmemdup(xfer->data, xfer->data_len,
>>>>>>>> +					   GFP_KERNEL);
>>>>>>>> +
>>>>>>>> +	return xfer->bounce_buf;
>>>>>>>
>>>>>>> Why need use bounce_buf? why not use dma_map_single()?, which will check
>>>>>>> alignment and size to decide if use swiotlb as bounce buffer.
>>>>>>>
>>>>>> We do pass the buffer to the dma_map_single(). I've understood swiotlb is
>>>>>> transparently used when the DMA cannot directly access the memory but that's
>>>>>> not the case here.
>>>>>
>>>>> why not? even though you have to use internal buf, why not use
>>>>> dma_alloc_coherent().
>>>>>
>>>> I don't think it's suitable for "smallish" per transfer allocations/mappings
>>>> and buffer is not accessed concurrently by the CPU and DMA during the
>>>> transfer.
>>>
>>> I still can't understand why need this special handle for i3c. Basic it use
>>> dma transfer to transfer data. Can you simple descript hci's data flow?
>>>
>> Unfortunately my knowledge doesn't go deeply enough in HW but I could guess
>> it's somewhere in MIPI I3C HCI DMA engine implementation and/or HCI IP to
>> memory bridge/bus integration.
>>
>> If it would be a cache line issue then I would think we would see corruption
>> independently is the DMA IOMMU mapped or not and in larger number of bytes
>> than 1-3 last bytes.
>>
>> Also another finding that if I3C target returns less data than expected then
>> there is also padding in the last DWORD with the real data. But not more
>> than requested, i.e. 2 bytes expected, target returns 1, only the 2nd byte
>> contains stale data and bytes 3-4 untouched. Or 8 bytes expected, target
>> returns 1, bytes 2-4 contain stale data and bytes 5-8 untouched.
>>
>> So something around trailing bytes handling in the HW when the transfer
>> length is not multiple of DWORDs as far as I could guess.
> 
> That's reason why should not manually handle dma buffer here. It is
> small memory and less proformance requirement.
> 
> If you use dma provided api, you should not take care such detial. DMA
> buffer at least one cache line size. otherwise, dma may currupt some data,
> which is hard to find.
> 
Do you know what solution would solve the problem here?

Let's take a look at the CCC command which is the easiest here. For 
example GETPID where payload length is 6 bytes and is allocated using 
the kzalloc() which guarantees the buffer is cache line aligned and 
ought to be DMA-safe.

i3c_master_getpid_locked()
	i3c_ccc_cmd_dest_init() // Allocates 6-byte payload buffer
	i3c_master_send_ccc_cmd_locked()
		i3c_hci_send_ccc_cmd() // mipi-i3c-hci
			xfer[i].data = ccc->dests[i].payload.data;
			hci_dma_queue_xfer()
				// bounce buffer stuff, this patchset
				dma_map_single()
				// setup hw with DMA address
				// and queue the transfer

The DMA streaming API dma_map_single() maps buffer's CPU virtual address 
to the DMA address and we use that to setup the HW and do the DMA.

Assume now the DMA is IOMMU mapped. DMA writes correctly 6 bytes to the 
payload buffer + 2 extra stale bytes. In this case perhaps harmless (due 
to kzalloc() allocated buffer) but nevertheless corruption which KFENCE 
may be able to detect.

More difficult territory are I3C private transfers and especially I2C 
transfers. I3C private transfers are expected to pass a DMA-able buffer 
but I've noticed it may not be always true especially when I3C support 
is added into existing I2C or SPI target device drivers and that are 
using regmap_bulk_read(). Those use cases may pass a stack variable (not 
DMA-able and will be rejected by DMA API), structure member or buffer 
index and pointer into those come as it via regmap_bulk_read().

I2C message buffers are not even expected to be DMA-able. 
drivers/i2c/i2c-core-base.c has a helper i2c_get_dma_safe_msg_buf() but 
I see it has the same problem than with the CCC command payload buffer.

So far I haven't figured out any better solution than using a local 
bounce buffer for all of these transfer types.

-- 
linux-i3c mailing list
linux-i3c@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-i3c

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 1/3] i3c: mipi-i3c-hci: Make bounce buffer code generic to all DMA transfers
  2025-06-10 13:42               ` Jarkko Nikula
@ 2025-06-10 17:08                 ` Frank Li
  2025-06-17 15:09                   ` Frank Li
  0 siblings, 1 reply; 17+ messages in thread
From: Frank Li @ 2025-06-10 17:08 UTC (permalink / raw)
  To: Jarkko Nikula; +Cc: linux-i3c, Alexandre Belloni

On Tue, Jun 10, 2025 at 04:42:35PM +0300, Jarkko Nikula wrote:
> On 6/9/25 6:04 PM, Frank Li wrote:
> > On Mon, Jun 09, 2025 at 05:15:44PM +0300, Jarkko Nikula wrote:
> > > On 6/6/25 6:02 PM, Frank Li wrote:
> > > > On Fri, Jun 06, 2025 at 10:16:44AM +0300, Jarkko Nikula wrote:
> > > > > Hi
> > > > >
> > > > > On 6/5/25 6:13 PM, Frank Li wrote:
> > > > > > On Thu, Jun 05, 2025 at 05:07:19PM +0300, Jarkko Nikula wrote:
> > > > > > > Hi
> > > > > > >
> > > > > > > On 6/4/25 6:00 PM, Frank Li wrote:
> > > > > > > > On Wed, Jun 04, 2025 at 03:55:11PM +0300, Jarkko Nikula wrote:
> > > > > > > > > Move DMA bounce buffer code for I3C private transfers to be generic for
> > > > > > > > > all DMA transfers, and round up the receive bounce buffer size to a
> > > > > > > > > multiple of DWORDs.
> > > > > > > > >
> > > > > > > > > It was observed that when the device DMA is IOMMU mapped and the receive
> > > > > > > > > length is not a multiple of DWORDs, the last DWORD is padded with stale
> > > > > > > > > data from the RX FIFO, corrupting 1-3 bytes beyond the expected data.
> > > > > > > > >
> > > > > > > > > A similar issue, though less severe, occurs when an I3C target returns
> > > > > > > > > less data than requested. In this case, the padding does not exceed the
> > > > > > > > > requested number of bytes, assuming the device DMA is not IOMMU mapped.
> > > > > > > > >
> > > > > > > > > Therefore, all I3C private transfer, CCC command payload and I2C
> > > > > > > > > transfer receive buffers must be properly sized for the DMA being IOMMU
> > > > > > > > > mapped. Even if those buffers are already DMA safe, their size may not
> > > > > > > > > be, and I don't have a clear idea how to guarantee this other than
> > > > > > > > > using a local bounce buffer.
> > > > > > > > >
> > > > > > > > > To prepare for the device DMA being IOMMU mapped and to address the
> > > > > > > > > above issue, implement a local, properly sized bounce buffer for all
> > > > > > > > > DMA transfers. For now, allocate it only when the buffer is in the
> > > > > > > > > vmalloc() area to avoid unnecessary copying with CCC commands and
> > > > > > > > > DMA-safe I2C transfers.
> > > > > > > > >
> > > > > > > > > Signed-off-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
> > > > > > > > > ---
> > > > > > > > >      drivers/i3c/master/mipi-i3c-hci/core.c | 34 -------------------
> > > > > > > > >      drivers/i3c/master/mipi-i3c-hci/dma.c  | 47 +++++++++++++++++++++++++-
> > > > > > > > >      2 files changed, 46 insertions(+), 35 deletions(-)
> > > > > > > > >
> > > > > > > > > diff --git a/drivers/i3c/master/mipi-i3c-hci/core.c b/drivers/i3c/master/mipi-i3c-hci/core.c
> > > > > > > > > index bc4538694540..24c5e7d5b439 100644
> > > > > > > > > --- a/drivers/i3c/master/mipi-i3c-hci/core.c
> > > > > > > > > +++ b/drivers/i3c/master/mipi-i3c-hci/core.c
> > > > > > > > > @@ -272,34 +272,6 @@ static int i3c_hci_daa(struct i3c_master_controller *m)
> > > > > > > > >      	return hci->cmd->perform_daa(hci);
> > > > > > > > >      }
> > > > > > > > >
> > > > > > > > ...
> > > > > > > > >      }
> > > > > > > > >
> > > > > > > > > +static void *hci_dma_alloc_safe_xfer_buf(struct i3c_hci *hci,
> > > > > > > > > +					 struct hci_xfer *xfer)
> > > > > > > > > +{
> > > > > > > > > +	if (!is_vmalloc_addr(xfer->data))
> > > > > > > > > +		return xfer->data;
> > > > > > > > > +
> > > > > > > > > +	if (xfer->rnw)
> > > > > > > > > +		/*
> > > > > > > > > +		 * Round up the receive bounce buffer length to a multiple of
> > > > > > > > > +		 * DWORDs. Independently of buffer alignment, DMA_FROM_DEVICE
> > > > > > > > > +		 * transfers may corrupt the last DWORD when transfer length is
> > > > > > > > > +		 * not a multiple of DWORDs. This was observed when the device
> > > > > > > > > +		 * DMA is IOMMU mapped or when an I3C target device returns
> > > > > > > > > +		 * less data than requested. Latter case is less severe and
> > > > > > > > > +		 * does not exceed the requested number of bytes, assuming the
> > > > > > > > > +		 * device DMA is not IOMMU mapped.
> > > > > > > > > +		 */
> > > > > > > > > +		xfer->bounce_buf = kzalloc(ALIGN(xfer->data_len, 4),
> > > > > > > > > +					   GFP_KERNEL);
> > > > > > > > > +	else
> > > > > > > > > +		xfer->bounce_buf = kmemdup(xfer->data, xfer->data_len,
> > > > > > > > > +					   GFP_KERNEL);
> > > > > > > > > +
> > > > > > > > > +	return xfer->bounce_buf;
> > > > > > > >
> > > > > > > > Why need use bounce_buf? why not use dma_map_single()?, which will check
> > > > > > > > alignment and size to decide if use swiotlb as bounce buffer.
> > > > > > > >
> > > > > > > We do pass the buffer to the dma_map_single(). I've understood swiotlb is
> > > > > > > transparently used when the DMA cannot directly access the memory but that's
> > > > > > > not the case here.
> > > > > >
> > > > > > why not? even though you have to use internal buf, why not use
> > > > > > dma_alloc_coherent().
> > > > > >
> > > > > I don't think it's suitable for "smallish" per transfer allocations/mappings
> > > > > and buffer is not accessed concurrently by the CPU and DMA during the
> > > > > transfer.
> > > >
> > > > I still can't understand why need this special handle for i3c. Basic it use
> > > > dma transfer to transfer data. Can you simple descript hci's data flow?
> > > >
> > > Unfortunately my knowledge doesn't go deeply enough in HW but I could guess
> > > it's somewhere in MIPI I3C HCI DMA engine implementation and/or HCI IP to
> > > memory bridge/bus integration.
> > >
> > > If it would be a cache line issue then I would think we would see corruption
> > > independently is the DMA IOMMU mapped or not and in larger number of bytes
> > > than 1-3 last bytes.
> > >
> > > Also another finding that if I3C target returns less data than expected then
> > > there is also padding in the last DWORD with the real data. But not more
> > > than requested, i.e. 2 bytes expected, target returns 1, only the 2nd byte
> > > contains stale data and bytes 3-4 untouched. Or 8 bytes expected, target
> > > returns 1, bytes 2-4 contain stale data and bytes 5-8 untouched.
> > >
> > > So something around trailing bytes handling in the HW when the transfer
> > > length is not multiple of DWORDs as far as I could guess.
> >
> > That's reason why should not manually handle dma buffer here. It is
> > small memory and less proformance requirement.
> >
> > If you use dma provided api, you should not take care such detial. DMA
> > buffer at least one cache line size. otherwise, dma may currupt some data,
> > which is hard to find.
> >
> Do you know what solution would solve the problem here?
>
> Let's take a look at the CCC command which is the easiest here. For example
> GETPID where payload length is 6 bytes and is allocated using the kzalloc()
> which guarantees the buffer is cache line aligned and ought to be DMA-safe.
>
> i3c_master_getpid_locked()
> 	i3c_ccc_cmd_dest_init() // Allocates 6-byte payload buffer
> 	i3c_master_send_ccc_cmd_locked()
> 		i3c_hci_send_ccc_cmd() // mipi-i3c-hci
> 			xfer[i].data = ccc->dests[i].payload.data;
> 			hci_dma_queue_xfer()
> 				// bounce buffer stuff, this patchset
> 				dma_map_single()
> 				// setup hw with DMA address
> 				// and queue the transfer
>
> The DMA streaming API dma_map_single() maps buffer's CPU virtual address to
> the DMA address and we use that to setup the HW and do the DMA.
>
> Assume now the DMA is IOMMU mapped. DMA writes correctly 6 bytes to the
> payload buffer + 2 extra stale bytes. In this case perhaps harmless (due to
> kzalloc() allocated buffer) but nevertheless corruption which KFENCE may be
> able to detect.
>
> More difficult territory are I3C private transfers and especially I2C
> transfers. I3C private transfers are expected to pass a DMA-able buffer but
> I've noticed it may not be always true especially when I3C support is added
> into existing I2C or SPI target device drivers and that are using
> regmap_bulk_read(). Those use cases may pass a stack variable (not DMA-able
> and will be rejected by DMA API), structure member or buffer index and
> pointer into those come as it via regmap_bulk_read().
>
> I2C message buffers are not even expected to be DMA-able.
> drivers/i2c/i2c-core-base.c has a helper i2c_get_dma_safe_msg_buf() but I
> see it has the same problem than with the CCC command payload buffer.
>
> So far I haven't figured out any better solution than using a local bounce
> buffer for all of these transfer types.

Thanks, I finially understand the problem you faced. I think other master
controller face similar issue if they use dma. Let me think more. If alloc
buffer size is not align to cache line, swtlib will bounce again.

rough idea i3c core layer provide an i3c_(un)map_api to do that.

Frank

>
> --
> linux-i3c mailing list
> linux-i3c@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-i3c

-- 
linux-i3c mailing list
linux-i3c@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-i3c

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 1/3] i3c: mipi-i3c-hci: Make bounce buffer code generic to all DMA transfers
  2025-06-10 17:08                 ` Frank Li
@ 2025-06-17 15:09                   ` Frank Li
  2025-06-19 13:37                     ` Jarkko Nikula
  0 siblings, 1 reply; 17+ messages in thread
From: Frank Li @ 2025-06-17 15:09 UTC (permalink / raw)
  To: Jarkko Nikula; +Cc: linux-i3c, Alexandre Belloni

On Tue, Jun 10, 2025 at 01:08:12PM -0400, Frank Li wrote:
> On Tue, Jun 10, 2025 at 04:42:35PM +0300, Jarkko Nikula wrote:
> > On 6/9/25 6:04 PM, Frank Li wrote:
> > > On Mon, Jun 09, 2025 at 05:15:44PM +0300, Jarkko Nikula wrote:
> > > > On 6/6/25 6:02 PM, Frank Li wrote:
> > > > > On Fri, Jun 06, 2025 at 10:16:44AM +0300, Jarkko Nikula wrote:
> > > > > > Hi
> > > > > >
> > > > > > On 6/5/25 6:13 PM, Frank Li wrote:
> > > > > > > On Thu, Jun 05, 2025 at 05:07:19PM +0300, Jarkko Nikula wrote:
> > > > > > > > Hi
> > > > > > > >
> > > > > > > > On 6/4/25 6:00 PM, Frank Li wrote:
> > > > > > > > > On Wed, Jun 04, 2025 at 03:55:11PM +0300, Jarkko Nikula wrote:
> > > > > > > > > > Move DMA bounce buffer code for I3C private transfers to be generic for
> > > > > > > > > > all DMA transfers, and round up the receive bounce buffer size to a
> > > > > > > > > > multiple of DWORDs.
> > > > > > > > > >
> > > > > > > > > > It was observed that when the device DMA is IOMMU mapped and the receive
> > > > > > > > > > length is not a multiple of DWORDs, the last DWORD is padded with stale
> > > > > > > > > > data from the RX FIFO, corrupting 1-3 bytes beyond the expected data.
> > > > > > > > > >
> > > > > > > > > > A similar issue, though less severe, occurs when an I3C target returns
> > > > > > > > > > less data than requested. In this case, the padding does not exceed the
> > > > > > > > > > requested number of bytes, assuming the device DMA is not IOMMU mapped.
> > > > > > > > > >
> > > > > > > > > > Therefore, all I3C private transfer, CCC command payload and I2C
> > > > > > > > > > transfer receive buffers must be properly sized for the DMA being IOMMU
> > > > > > > > > > mapped. Even if those buffers are already DMA safe, their size may not
> > > > > > > > > > be, and I don't have a clear idea how to guarantee this other than
> > > > > > > > > > using a local bounce buffer.
> > > > > > > > > >
> > > > > > > > > > To prepare for the device DMA being IOMMU mapped and to address the
> > > > > > > > > > above issue, implement a local, properly sized bounce buffer for all
> > > > > > > > > > DMA transfers. For now, allocate it only when the buffer is in the
> > > > > > > > > > vmalloc() area to avoid unnecessary copying with CCC commands and
> > > > > > > > > > DMA-safe I2C transfers.
> > > > > > > > > >
> > > > > > > > > > Signed-off-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
> > > > > > > > > > ---
> > > > > > > > > >      drivers/i3c/master/mipi-i3c-hci/core.c | 34 -------------------
> > > > > > > > > >      drivers/i3c/master/mipi-i3c-hci/dma.c  | 47 +++++++++++++++++++++++++-
> > > > > > > > > >      2 files changed, 46 insertions(+), 35 deletions(-)
> > > > > > > > > >
> > > > > > > > > > diff --git a/drivers/i3c/master/mipi-i3c-hci/core.c b/drivers/i3c/master/mipi-i3c-hci/core.c
> > > > > > > > > > index bc4538694540..24c5e7d5b439 100644
> > > > > > > > > > --- a/drivers/i3c/master/mipi-i3c-hci/core.c
> > > > > > > > > > +++ b/drivers/i3c/master/mipi-i3c-hci/core.c
> > > > > > > > > > @@ -272,34 +272,6 @@ static int i3c_hci_daa(struct i3c_master_controller *m)
> > > > > > > > > >      	return hci->cmd->perform_daa(hci);
> > > > > > > > > >      }
> > > > > > > > > >
> > > > > > > > > ...
> > > > > > > > > >      }
> > > > > > > > > >
> > > > > > > > > > +static void *hci_dma_alloc_safe_xfer_buf(struct i3c_hci *hci,
> > > > > > > > > > +					 struct hci_xfer *xfer)
> > > > > > > > > > +{
> > > > > > > > > > +	if (!is_vmalloc_addr(xfer->data))
> > > > > > > > > > +		return xfer->data;
> > > > > > > > > > +
> > > > > > > > > > +	if (xfer->rnw)
> > > > > > > > > > +		/*
> > > > > > > > > > +		 * Round up the receive bounce buffer length to a multiple of
> > > > > > > > > > +		 * DWORDs. Independently of buffer alignment, DMA_FROM_DEVICE
> > > > > > > > > > +		 * transfers may corrupt the last DWORD when transfer length is
> > > > > > > > > > +		 * not a multiple of DWORDs. This was observed when the device
> > > > > > > > > > +		 * DMA is IOMMU mapped or when an I3C target device returns
> > > > > > > > > > +		 * less data than requested. Latter case is less severe and
> > > > > > > > > > +		 * does not exceed the requested number of bytes, assuming the
> > > > > > > > > > +		 * device DMA is not IOMMU mapped.
> > > > > > > > > > +		 */
> > > > > > > > > > +		xfer->bounce_buf = kzalloc(ALIGN(xfer->data_len, 4),
> > > > > > > > > > +					   GFP_KERNEL);
> > > > > > > > > > +	else
> > > > > > > > > > +		xfer->bounce_buf = kmemdup(xfer->data, xfer->data_len,
> > > > > > > > > > +					   GFP_KERNEL);
> > > > > > > > > > +
> > > > > > > > > > +	return xfer->bounce_buf;
> > > > > > > > >
> > > > > > > > > Why need use bounce_buf? why not use dma_map_single()?, which will check
> > > > > > > > > alignment and size to decide if use swiotlb as bounce buffer.
> > > > > > > > >
> > > > > > > > We do pass the buffer to the dma_map_single(). I've understood swiotlb is
> > > > > > > > transparently used when the DMA cannot directly access the memory but that's
> > > > > > > > not the case here.
> > > > > > >
> > > > > > > why not? even though you have to use internal buf, why not use
> > > > > > > dma_alloc_coherent().
> > > > > > >
> > > > > > I don't think it's suitable for "smallish" per transfer allocations/mappings
> > > > > > and buffer is not accessed concurrently by the CPU and DMA during the
> > > > > > transfer.
> > > > >
> > > > > I still can't understand why need this special handle for i3c. Basic it use
> > > > > dma transfer to transfer data. Can you simple descript hci's data flow?
> > > > >
> > > > Unfortunately my knowledge doesn't go deeply enough in HW but I could guess
> > > > it's somewhere in MIPI I3C HCI DMA engine implementation and/or HCI IP to
> > > > memory bridge/bus integration.
> > > >
> > > > If it would be a cache line issue then I would think we would see corruption
> > > > independently is the DMA IOMMU mapped or not and in larger number of bytes
> > > > than 1-3 last bytes.
> > > >
> > > > Also another finding that if I3C target returns less data than expected then
> > > > there is also padding in the last DWORD with the real data. But not more
> > > > than requested, i.e. 2 bytes expected, target returns 1, only the 2nd byte
> > > > contains stale data and bytes 3-4 untouched. Or 8 bytes expected, target
> > > > returns 1, bytes 2-4 contain stale data and bytes 5-8 untouched.
> > > >
> > > > So something around trailing bytes handling in the HW when the transfer
> > > > length is not multiple of DWORDs as far as I could guess.
> > >
> > > That's reason why should not manually handle dma buffer here. It is
> > > small memory and less proformance requirement.
> > >
> > > If you use dma provided api, you should not take care such detial. DMA
> > > buffer at least one cache line size. otherwise, dma may currupt some data,
> > > which is hard to find.
> > >
> > Do you know what solution would solve the problem here?
> >
> > Let's take a look at the CCC command which is the easiest here. For example
> > GETPID where payload length is 6 bytes and is allocated using the kzalloc()
> > which guarantees the buffer is cache line aligned and ought to be DMA-safe.
> >
> > i3c_master_getpid_locked()
> > 	i3c_ccc_cmd_dest_init() // Allocates 6-byte payload buffer
> > 	i3c_master_send_ccc_cmd_locked()
> > 		i3c_hci_send_ccc_cmd() // mipi-i3c-hci
> > 			xfer[i].data = ccc->dests[i].payload.data;
> > 			hci_dma_queue_xfer()
> > 				// bounce buffer stuff, this patchset
> > 				dma_map_single()
> > 				// setup hw with DMA address
> > 				// and queue the transfer
> >
> > The DMA streaming API dma_map_single() maps buffer's CPU virtual address to
> > the DMA address and we use that to setup the HW and do the DMA.
> >
> > Assume now the DMA is IOMMU mapped. DMA writes correctly 6 bytes to the
> > payload buffer + 2 extra stale bytes. In this case perhaps harmless (due to
> > kzalloc() allocated buffer) but nevertheless corruption which KFENCE may be
> > able to detect.
> >
> > More difficult territory are I3C private transfers and especially I2C
> > transfers. I3C private transfers are expected to pass a DMA-able buffer but
> > I've noticed it may not be always true especially when I3C support is added
> > into existing I2C or SPI target device drivers and that are using
> > regmap_bulk_read(). Those use cases may pass a stack variable (not DMA-able
> > and will be rejected by DMA API), structure member or buffer index and
> > pointer into those come as it via regmap_bulk_read().
> >
> > I2C message buffers are not even expected to be DMA-able.
> > drivers/i2c/i2c-core-base.c has a helper i2c_get_dma_safe_msg_buf() but I
> > see it has the same problem than with the CCC command payload buffer.
> >
> > So far I haven't figured out any better solution than using a local bounce
> > buffer for all of these transfer types.
>
> Thanks, I finially understand the problem you faced. I think other master
> controller face similar issue if they use dma. Let me think more. If alloc
> buffer size is not align to cache line, swtlib will bounce again.
>
> rough idea i3c core layer provide an i3c_(un)map_api to do that.

struct i3c_dma {
	void *bounce;
	dma_addr_t addr;
	enum dma_data_direction;
	void *data;
	size_t data_size;
	size_t bounce_size;
}

struct i3c_dma *i3c_dma_map_single(struct device *dev, void *data, size_t sz, enum dma_data_direction dir)
{
	struct i3c_dma *p

	p = kmalloc(sizeof(*p));
	if (!p)
		return p;

	if (is_vmalloc_addr(data)) {
		p->bounce = kmalloc(align_up(sz, CACHE_LINE_SIZE)); //avoid swtlib bounce again.
		memcpy()
		...
	}

	dma_map_single();
	...
}

void i3c_dma_unmap_single(struct i3c_dma *p)
{
	dma_unmap_single();

	...
	if (data != bounce)
		memcpy();

	kfree(p);
}

#define DEFINE_FREE(i3c_dma_unmap_single, struct i3c_dma *, if (_T)i3c_dma_unmap_single(_T))


Frank



>
> Frank
>
> >
> > --
> > linux-i3c mailing list
> > linux-i3c@lists.infradead.org
> > http://lists.infradead.org/mailman/listinfo/linux-i3c
>
> --
> linux-i3c mailing list
> linux-i3c@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-i3c

-- 
linux-i3c mailing list
linux-i3c@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-i3c

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 1/3] i3c: mipi-i3c-hci: Make bounce buffer code generic to all DMA transfers
  2025-06-17 15:09                   ` Frank Li
@ 2025-06-19 13:37                     ` Jarkko Nikula
  2025-06-20 18:29                       ` Frank Li
  2025-06-27 14:17                       ` Jarkko Nikula
  0 siblings, 2 replies; 17+ messages in thread
From: Jarkko Nikula @ 2025-06-19 13:37 UTC (permalink / raw)
  To: Frank Li; +Cc: linux-i3c, Alexandre Belloni

Hi

On 6/17/25 6:09 PM, Frank Li wrote:
>> Thanks, I finially understand the problem you faced. I think other master
>> controller face similar issue if they use dma. Let me think more. If alloc
>> buffer size is not align to cache line, swtlib will bounce again.
>>
>> rough idea i3c core layer provide an i3c_(un)map_api to do that.
> 
> struct i3c_dma {
> 	void *bounce;
> 	dma_addr_t addr;
> 	enum dma_data_direction;
> 	void *data;
> 	size_t data_size;
> 	size_t bounce_size;
> }
> 
> struct i3c_dma *i3c_dma_map_single(struct device *dev, void *data, size_t sz, enum dma_data_direction dir)
> {
> 	struct i3c_dma *p
> 
> 	p = kmalloc(sizeof(*p));
> 	if (!p)
> 		return p;
> 
> 	if (is_vmalloc_addr(data)) {
> 		p->bounce = kmalloc(align_up(sz, CACHE_LINE_SIZE)); //avoid swtlib bounce again.

I'll try to look at your idea next week. Do you have pointers to the 
above allocation size rounding is it required?

I've understood from the Documentation/core-api/dma-api-howto.rst 
"Architectures must ensure that kmalloc'ed buffer is DMA-safe" and 
"ARCH_DMA_MINALIGN must be set so that the memory allocator makes sure 
that kmalloc'ed buffer doesn't share a cache line with the others.".

-- 
linux-i3c mailing list
linux-i3c@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-i3c

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 1/3] i3c: mipi-i3c-hci: Make bounce buffer code generic to all DMA transfers
  2025-06-19 13:37                     ` Jarkko Nikula
@ 2025-06-20 18:29                       ` Frank Li
  2025-06-27 14:17                       ` Jarkko Nikula
  1 sibling, 0 replies; 17+ messages in thread
From: Frank Li @ 2025-06-20 18:29 UTC (permalink / raw)
  To: Jarkko Nikula; +Cc: linux-i3c, Alexandre Belloni

On Thu, Jun 19, 2025 at 04:37:30PM +0300, Jarkko Nikula wrote:
> Hi
>
> On 6/17/25 6:09 PM, Frank Li wrote:
> > > Thanks, I finially understand the problem you faced. I think other master
> > > controller face similar issue if they use dma. Let me think more. If alloc
> > > buffer size is not align to cache line, swtlib will bounce again.
> > >
> > > rough idea i3c core layer provide an i3c_(un)map_api to do that.
> >
> > struct i3c_dma {
> > 	void *bounce;
> > 	dma_addr_t addr;
> > 	enum dma_data_direction;
> > 	void *data;
> > 	size_t data_size;
> > 	size_t bounce_size;
> > }
> >
> > struct i3c_dma *i3c_dma_map_single(struct device *dev, void *data, size_t sz, enum dma_data_direction dir)
> > {
> > 	struct i3c_dma *p
> >
> > 	p = kmalloc(sizeof(*p));
> > 	if (!p)
> > 		return p;
> >
> > 	if (is_vmalloc_addr(data)) {
> > 		p->bounce = kmalloc(align_up(sz, CACHE_LINE_SIZE)); //avoid swtlib bounce again.
>
> I'll try to look at your idea next week. Do you have pointers to the above
> allocation size rounding is it required?
>
> I've understood from the Documentation/core-api/dma-api-howto.rst
> "Architectures must ensure that kmalloc'ed buffer is DMA-safe" and
> "ARCH_DMA_MINALIGN must be set so that the memory allocator makes sure that
> kmalloc'ed buffer doesn't share a cache line with the others.".

But sz in dma_map_single() will be checked by swtlib, if size is not align
with CACHE_LINE, it will bounce it.

Frank

>
> --
> linux-i3c mailing list
> linux-i3c@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-i3c

-- 
linux-i3c mailing list
linux-i3c@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-i3c

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 1/3] i3c: mipi-i3c-hci: Make bounce buffer code generic to all DMA transfers
  2025-06-19 13:37                     ` Jarkko Nikula
  2025-06-20 18:29                       ` Frank Li
@ 2025-06-27 14:17                       ` Jarkko Nikula
  2025-07-31 14:23                         ` Jarkko Nikula
  1 sibling, 1 reply; 17+ messages in thread
From: Jarkko Nikula @ 2025-06-27 14:17 UTC (permalink / raw)
  To: Frank Li; +Cc: linux-i3c, Alexandre Belloni

Hi Frank

On 6/19/25 4:37 PM, Jarkko Nikula wrote:
> Hi
> 
> On 6/17/25 6:09 PM, Frank Li wrote:
>>> Thanks, I finially understand the problem you faced. I think other 
>>> master
>>> controller face similar issue if they use dma. Let me think more. If 
>>> alloc
>>> buffer size is not align to cache line, swtlib will bounce again.
>>>
>>> rough idea i3c core layer provide an i3c_(un)map_api to do that.
>>
Unfortunately I run out of time before vacation to have a clean and 
tested patchset about your idea but wanted to share work in progress 
diff below. Which is not actually a standalone but goes on top of this 
patchset.

I have mixed feeling does this yet bring enough value to the I3C core or 
does it make sense to wait for another I3C driver using DMA to see 
better common needs.

But I'm open for your comments and will continue after vacation :-)

---
  drivers/i3c/master.c                  | 74 ++++++++++++++++++++++++
  drivers/i3c/master/mipi-i3c-hci/dma.c | 82 +++++----------------------
  drivers/i3c/master/mipi-i3c-hci/hci.h |  3 +-
  include/linux/i3c/master.h            | 20 +++++++
  4 files changed, 110 insertions(+), 69 deletions(-)

diff --git a/drivers/i3c/master.c b/drivers/i3c/master.c
index fd81871609d9..fa2532dcf62a 100644
--- a/drivers/i3c/master.c
+++ b/drivers/i3c/master.c
@@ -8,6 +8,7 @@
  #include <linux/atomic.h>
  #include <linux/bug.h>
  #include <linux/device.h>
+#include <linux/dma-mapping.h>
  #include <linux/err.h>
  #include <linux/export.h>
  #include <linux/kernel.h>
@@ -1727,6 +1728,79 @@ int i3c_master_do_daa(struct 
i3c_master_controller *master)
  }
  EXPORT_SYMBOL_GPL(i3c_master_do_daa);

+/**
+ * i3c_master_dma_map_single() - Map buffer for single DMA transfer
+ * @dev: device object of a device doing DMA
+ * @buf: destination/source buffer for DMA
+ * @len: length of transfer
+ * @need_bounce: true if buffer is not DMA safe and need a bounce buffer
+ * @dir: DMA direction
+ *
+ * Map buffer for a DMA transfer and allocate a bounce buffer if required.
+ *
+ * Return: I3C DMA transfer descriptor or NULL in case of error.
+ */
+struct i3c_dma *i3c_master_dma_map_single(struct device *dev, void *buf,
+	size_t len, bool need_bounce, enum dma_data_direction dir)
+{
+	struct i3c_dma *dma_xfer;
+	void *dma_buf = buf;
+
+	dma_xfer = kzalloc(sizeof(*dma_xfer), GFP_KERNEL);
+	if (!dma_xfer)
+		return NULL;
+
+	dma_xfer->buf = buf;
+	dma_xfer->dir = dir;
+	dma_xfer->len = len;
+	if (is_vmalloc_addr(buf))
+		need_bounce = true;
+
+	if (need_bounce) {
+		if (dir == DMA_FROM_DEVICE)
+			dma_buf = kzalloc(ALIGN(len, cache_line_size()),
+						GFP_KERNEL);
+		else
+			dma_buf = kmemdup(buf, len, GFP_KERNEL);
+		if (!dma_buf)
+			goto err_alloc;
+
+		dma_xfer->bounce_buf = dma_buf;
+	}
+
+	dma_xfer->addr = dma_map_single(dev, dma_buf, len, dir);
+	if (dma_mapping_error(dev, dma_xfer->addr))
+		goto err_map;
+
+	return dma_xfer;
+err_map:
+	kfree(dma_xfer->bounce_buf);
+err_alloc:
+	kfree(dma_xfer);
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(i3c_master_dma_map_single);
+
+/**
+ * i3c_master_dma_unmap_single() - Unmap buffer after DMA
+ * @dev: device object of a device doing DMA
+ * @dma_xfer: DMA transfer and mapping descriptor
+ *
+ * Unmap buffer and cleanup DMA transfer descriptor
+ */
+void i3c_master_dma_unmap_single(struct device *dev, struct i3c_dma 
*dma_xfer)
+{
+	dma_unmap_single(dev, dma_xfer->addr, dma_xfer->len, dma_xfer->dir);
+	if (dma_xfer->bounce_buf) {
+		if (dma_xfer->dir == DMA_FROM_DEVICE)
+			memcpy(dma_xfer->buf, dma_xfer->bounce_buf,
+			       dma_xfer->len);
+		kfree(dma_xfer->bounce_buf);
+	}
+	kfree(dma_xfer);
+}
+EXPORT_SYMBOL_GPL(i3c_master_dma_unmap_single);
+
  /**
   * i3c_master_set_info() - set master device information
   * @master: master used to send frames on the bus
diff --git a/drivers/i3c/master/mipi-i3c-hci/dma.c 
b/drivers/i3c/master/mipi-i3c-hci/dma.c
index 20e1b405244e..af1c9c97fb3d 100644
--- a/drivers/i3c/master/mipi-i3c-hci/dma.c
+++ b/drivers/i3c/master/mipi-i3c-hci/dma.c
@@ -354,50 +354,6 @@ static int hci_dma_init(struct i3c_hci *hci)
  	return ret;
  }

-static void *hci_dma_alloc_safe_xfer_buf(struct i3c_hci *hci,
-					 struct hci_xfer *xfer)
-{
-	struct hci_rings_data *rings = hci->io_data;
-
-	if (!is_vmalloc_addr(xfer->data) &&
-	    (!device_iommu_mapped(rings->sysdev) ||
-	     !xfer->rnw ||
-	     xfer->data_len == ALIGN(xfer->data_len, 4)))
-		/* Bounce buffer not required for this transfer */
-		return xfer->data;
-
-	if (xfer->rnw)
-		/*
-		 * Round up the receive bounce buffer length to a multiple of
-		 * DWORDs. Independently of buffer alignment, DMA_FROM_DEVICE
-		 * transfers may corrupt the last DWORD when transfer length is
-		 * not a multiple of DWORDs. This was observed when the device
-		 * DMA is IOMMU mapped or when an I3C target device returns
-		 * less data than requested. Latter case is less severe and
-		 * does not exceed the requested number of bytes, assuming the
-		 * device DMA is not IOMMU mapped.
-		 */
-		xfer->bounce_buf = kzalloc(ALIGN(xfer->data_len, 4),
-					   GFP_KERNEL);
-	else
-		xfer->bounce_buf = kmemdup(xfer->data, xfer->data_len,
-					   GFP_KERNEL);
-
-	return xfer->bounce_buf;
-}
-
-static void hci_dma_free_safe_xfer_buf(struct i3c_hci *hci,
-				       struct hci_xfer *xfer)
-{
-	if (xfer->bounce_buf == NULL)
-		return;
-
-	if (xfer->rnw)
-		memcpy(xfer->data, xfer->bounce_buf, xfer->data_len);
-
-	kfree(xfer->bounce_buf);
-}
-
  static void hci_dma_unmap_xfer(struct i3c_hci *hci,
  			       struct hci_xfer *xfer_list, unsigned int n)
  {
@@ -409,10 +365,7 @@ static void hci_dma_unmap_xfer(struct i3c_hci *hci,
  		xfer = xfer_list + i;
  		if (!xfer->data)
  			continue;
-		dma_unmap_single(rings->sysdev,
-				 xfer->data_dma, xfer->data_len,
-				 xfer->rnw ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
-		hci_dma_free_safe_xfer_buf(hci, xfer);
+		i3c_master_dma_unmap_single(rings->sysdev, xfer->dma);
  	}
  }

@@ -423,7 +376,6 @@ static int hci_dma_queue_xfer(struct i3c_hci *hci,
  	struct hci_rh_data *rh;
  	unsigned int i, ring, enqueue_ptr;
  	u32 op1_val, op2_val;
-	void *buf;

  	/* For now we only use ring 0 */
  	ring = 0;
@@ -434,6 +386,9 @@ static int hci_dma_queue_xfer(struct i3c_hci *hci,
  	for (i = 0; i < n; i++) {
  		struct hci_xfer *xfer = xfer_list + i;
  		u32 *ring_data = rh->xfer + rh->xfer_struct_sz * enqueue_ptr;
+		enum dma_data_direction dir = xfer->rnw ? DMA_FROM_DEVICE :
+							  DMA_TO_DEVICE;
+		bool need_bounce;

  		/* store cmd descriptor */
  		*ring_data++ = xfer->cmd_desc[0];
@@ -452,27 +407,20 @@ static int hci_dma_queue_xfer(struct i3c_hci *hci,

  		/* 2nd and 3rd words of Data Buffer Descriptor Structure */
  		if (xfer->data) {
-			buf = hci_dma_alloc_safe_xfer_buf(hci, xfer);
-			if (buf == NULL) {
-				hci_dma_unmap_xfer(hci, xfer_list, i);
-				return -ENOMEM;
-			}
-
-			xfer->data_dma =
-				dma_map_single(rings->sysdev,
-					       buf,
-					       xfer->data_len,
-					       xfer->rnw ?
-						  DMA_FROM_DEVICE :
-						  DMA_TO_DEVICE);
-			if (dma_mapping_error(rings->sysdev,
-					      xfer->data_dma)) {
-				hci_dma_free_safe_xfer_buf(hci, xfer);
+			need_bounce = device_iommu_mapped(rings->sysdev) &&
+				      xfer->rnw &&
+				      xfer->data_len != ALIGN(xfer->data_len, 4);
+			xfer->dma = i3c_master_dma_map_single(rings->sysdev,
+							      xfer->data,
+							      xfer->data_len,
+							      need_bounce,
+							      dir);
+			if (!xfer->dma) {
  				hci_dma_unmap_xfer(hci, xfer_list, i);
  				return -ENOMEM;
  			}
-			*ring_data++ = lower_32_bits(xfer->data_dma);
-			*ring_data++ = upper_32_bits(xfer->data_dma);
+			*ring_data++ = lower_32_bits(xfer->dma->addr);
+			*ring_data++ = upper_32_bits(xfer->dma->addr);
  		} else {
  			*ring_data++ = 0;
  			*ring_data++ = 0;
diff --git a/drivers/i3c/master/mipi-i3c-hci/hci.h 
b/drivers/i3c/master/mipi-i3c-hci/hci.h
index 69ea1d10414b..33bc4906df1f 100644
--- a/drivers/i3c/master/mipi-i3c-hci/hci.h
+++ b/drivers/i3c/master/mipi-i3c-hci/hci.h
@@ -94,8 +94,7 @@ struct hci_xfer {
  		};
  		struct {
  			/* DMA specific */
-			dma_addr_t data_dma;
-			void *bounce_buf;
+			struct i3c_dma *dma;
  			int ring_number;
  			int ring_entry;
  		};
diff --git a/include/linux/i3c/master.h b/include/linux/i3c/master.h
index c67922ece617..76e8174eecb0 100644
--- a/include/linux/i3c/master.h
+++ b/include/linux/i3c/master.h
@@ -553,6 +553,22 @@ struct i3c_master_controller {
  #define i3c_bus_for_each_i3cdev(bus, dev)				\
  	list_for_each_entry(dev, &(bus)->devs.i3c, common.node)

+/**
+ * struct i3c_dma - DMA transfer and mapping descriptor
+ * @buf: destination/source buffer for DMA
+ * @len: length of transfer
+ * @addr: mapped DMA address for a Host Controller Driver
+ * @dir: DMA direction
+ * @bounce_buf: an allocated bounce buffer if transfer needs it or NULL
+ */
+struct i3c_dma {
+	void *buf;
+	size_t len;
+	dma_addr_t addr;
+	enum dma_data_direction dir;
+	void *bounce_buf;
+};
+
  int i3c_master_do_i2c_xfers(struct i3c_master_controller *master,
  			    const struct i2c_msg *xfers,
  			    int nxfers);
@@ -570,6 +586,10 @@ int i3c_master_get_free_addr(struct 
i3c_master_controller *master,
  int i3c_master_add_i3c_dev_locked(struct i3c_master_controller *master,
  				  u8 addr);
  int i3c_master_do_daa(struct i3c_master_controller *master);
+struct i3c_dma *i3c_master_dma_map_single(struct device *dev, void *ptr,
+					  size_t len, bool dma_safe,
+					  enum dma_data_direction dir);
+void i3c_master_dma_unmap_single(struct device *dev, struct i3c_dma 
*dma_xfer);

  int i3c_master_set_info(struct i3c_master_controller *master,
  			const struct i3c_device_info *info);
-- 
2.47.2


-- 
linux-i3c mailing list
linux-i3c@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-i3c

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH 1/3] i3c: mipi-i3c-hci: Make bounce buffer code generic to all DMA transfers
  2025-06-27 14:17                       ` Jarkko Nikula
@ 2025-07-31 14:23                         ` Jarkko Nikula
  0 siblings, 0 replies; 17+ messages in thread
From: Jarkko Nikula @ 2025-07-31 14:23 UTC (permalink / raw)
  To: Frank Li; +Cc: linux-i3c, Alexandre Belloni

Hi Frank

On 6/27/25 5:17 PM, Jarkko Nikula wrote:
> Hi Frank
> 
> On 6/19/25 4:37 PM, Jarkko Nikula wrote:
>> Hi
>>
>> On 6/17/25 6:09 PM, Frank Li wrote:
>>>> Thanks, I finially understand the problem you faced. I think other 
>>>> master
>>>> controller face similar issue if they use dma. Let me think more. If 
>>>> alloc
>>>> buffer size is not align to cache line, swtlib will bounce again.
>>>>
>>>> rough idea i3c core layer provide an i3c_(un)map_api to do that.
>>>
> Unfortunately I run out of time before vacation to have a clean and 
> tested patchset about your idea but wanted to share work in progress 
> diff below. Which is not actually a standalone but goes on top of this 
> patchset.
> 
> I have mixed feeling does this yet bring enough value to the I3C core or 
> does it make sense to wait for another I3C driver using DMA to see 
> better common needs.
> 
> But I'm open for your comments and will continue after vacation :-)
> 
I sent an update where above idea was formed as a proper patch and rest 
of the patches updated accordingly. Diff between the sets below :-)

--- a/drivers/i3c/master.c
+++ b/drivers/i3c/master.c
@@ -1786,7 +1786,7 @@ EXPORT_SYMBOL_GPL(i3c_master_dma_map_single);
   * @dev: device object of a device doing DMA
   * @dma_xfer: DMA transfer and mapping descriptor
   *
- * Unmap buffer and cleanup DMA transfer descriptor
+ * Unmap buffer and cleanup DMA transfer descriptor.
   */
  void i3c_master_dma_unmap_single(struct device *dev, struct i3c_dma 
*dma_xfer)
  {


-- 
linux-i3c mailing list
linux-i3c@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-i3c

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2025-07-31 16:13 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-06-04 12:55 [PATCH 1/3] i3c: mipi-i3c-hci: Make bounce buffer code generic to all DMA transfers Jarkko Nikula
2025-06-04 12:55 ` [PATCH 2/3] i3c: mipi-i3c-hci: Use physical device pointer with DMA API Jarkko Nikula
2025-06-04 12:55 ` [PATCH 3/3] i3c: mipi-i3c-hci: Use own DMA bounce buffer management for I2C transfers Jarkko Nikula
2025-06-04 15:00 ` [PATCH 1/3] i3c: mipi-i3c-hci: Make bounce buffer code generic to all DMA transfers Frank Li
2025-06-05 14:07   ` Jarkko Nikula
2025-06-05 15:13     ` Frank Li
2025-06-06  7:16       ` Jarkko Nikula
2025-06-06 15:02         ` Frank Li
2025-06-09 14:15           ` Jarkko Nikula
2025-06-09 15:04             ` Frank Li
2025-06-10 13:42               ` Jarkko Nikula
2025-06-10 17:08                 ` Frank Li
2025-06-17 15:09                   ` Frank Li
2025-06-19 13:37                     ` Jarkko Nikula
2025-06-20 18:29                       ` Frank Li
2025-06-27 14:17                       ` Jarkko Nikula
2025-07-31 14:23                         ` Jarkko Nikula

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).