* [PATCH V6 1/3] dma-mapping: Introduce device_is_coherent() as a helper
@ 2017-09-19 8:52 Huacai Chen
2017-09-19 8:52 ` [PATCH V6 2/3] dma-mapping: Rework dma_get_cache_alignment() function Huacai Chen
` (2 more replies)
0 siblings, 3 replies; 11+ messages in thread
From: Huacai Chen @ 2017-09-19 8:52 UTC (permalink / raw)
To: Christoph Hellwig
Cc: Marek Szyprowski, Robin Murphy, Andrew Morton, Fuxin Zhang,
linux-kernel, James E . J . Bottomley, Martin K . Petersen,
linux-scsi, Huacai Chen, stable
We will use device_is_coherent() as a helper function, which will be
used in the next patch.
There is a MIPS-specific plat_device_is_coherent(), but we need a more
generic solution, so add and use a new function pointer in dma_map_ops.
Cc: stable@vger.kernel.org
Signed-off-by: Huacai Chen <chenhc@lemote.com>
---
arch/mips/cavium-octeon/dma-octeon.c | 3 ++-
arch/mips/include/asm/mach-generic/dma-coherence.h | 6 +++---
arch/mips/loongson64/common/dma-swiotlb.c | 1 +
arch/mips/mm/dma-default.c | 3 ++-
arch/mips/netlogic/common/nlm-dma.c | 3 ++-
include/linux/dma-mapping.h | 10 ++++++++++
6 files changed, 20 insertions(+), 6 deletions(-)
diff --git a/arch/mips/cavium-octeon/dma-octeon.c b/arch/mips/cavium-octeon/dma-octeon.c
index c64bd87..cd1a133 100644
--- a/arch/mips/cavium-octeon/dma-octeon.c
+++ b/arch/mips/cavium-octeon/dma-octeon.c
@@ -324,7 +324,8 @@ static struct octeon_dma_map_ops _octeon_pci_dma_map_ops = {
.sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
.sync_sg_for_device = octeon_dma_sync_sg_for_device,
.mapping_error = swiotlb_dma_mapping_error,
- .dma_supported = swiotlb_dma_supported
+ .dma_supported = swiotlb_dma_supported,
+ .device_is_coherent = plat_device_is_coherent
},
};
diff --git a/arch/mips/loongson64/common/dma-swiotlb.c b/arch/mips/loongson64/common/dma-swiotlb.c
index 34486c1..c758d9b 100644
--- a/arch/mips/loongson64/common/dma-swiotlb.c
+++ b/arch/mips/loongson64/common/dma-swiotlb.c
@@ -119,6 +119,7 @@ static const struct dma_map_ops loongson_dma_map_ops = {
.sync_sg_for_device = loongson_dma_sync_sg_for_device,
.mapping_error = swiotlb_dma_mapping_error,
.dma_supported = loongson_dma_supported,
+ .device_is_coherent = plat_device_is_coherent
};
void __init plat_swiotlb_setup(void)
diff --git a/arch/mips/mm/dma-default.c b/arch/mips/mm/dma-default.c
index c01bd20..6e18301 100644
--- a/arch/mips/mm/dma-default.c
+++ b/arch/mips/mm/dma-default.c
@@ -407,7 +407,8 @@ static const struct dma_map_ops mips_default_dma_map_ops = {
.sync_sg_for_cpu = mips_dma_sync_sg_for_cpu,
.sync_sg_for_device = mips_dma_sync_sg_for_device,
.mapping_error = mips_dma_mapping_error,
- .dma_supported = mips_dma_supported
+ .dma_supported = mips_dma_supported,
+ .device_is_coherent = plat_device_is_coherent
};
const struct dma_map_ops *mips_dma_map_ops = &mips_default_dma_map_ops;
diff --git a/arch/mips/netlogic/common/nlm-dma.c b/arch/mips/netlogic/common/nlm-dma.c
index 0ec9d9d..aa11b27 100644
--- a/arch/mips/netlogic/common/nlm-dma.c
+++ b/arch/mips/netlogic/common/nlm-dma.c
@@ -79,7 +79,8 @@ const struct dma_map_ops nlm_swiotlb_dma_ops = {
.sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
.sync_sg_for_device = swiotlb_sync_sg_for_device,
.mapping_error = swiotlb_dma_mapping_error,
- .dma_supported = swiotlb_dma_supported
+ .dma_supported = swiotlb_dma_supported,
+ .device_is_coherent = plat_device_is_coherent
};
void __init plat_swiotlb_setup(void)
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 29ce981..08da227 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -131,6 +131,7 @@ struct dma_map_ops {
#ifdef ARCH_HAS_DMA_GET_REQUIRED_MASK
u64 (*get_required_mask)(struct device *dev);
#endif
+ int (*device_is_coherent)(struct device *dev);
int is_phys;
};
@@ -697,6 +698,15 @@ static inline void *dma_zalloc_coherent(struct device *dev, size_t size,
}
#ifdef CONFIG_HAS_DMA
+static inline int device_is_coherent(struct device *dev)
+{
+ const struct dma_map_ops *ops = get_dma_ops(dev);
+ if (ops && ops->device_is_coherent)
+ return ops->device_is_coherent(dev);
+ else
+ return 1; /* compatible behavior */
+}
+
static inline int dma_get_cache_alignment(void)
{
#ifdef ARCH_DMA_MINALIGN
--
2.7.0
^ permalink raw reply related [flat|nested] 11+ messages in thread* [PATCH V6 2/3] dma-mapping: Rework dma_get_cache_alignment() function 2017-09-19 8:52 [PATCH V6 1/3] dma-mapping: Introduce device_is_coherent() as a helper Huacai Chen @ 2017-09-19 8:52 ` Huacai Chen 2017-09-19 15:02 ` Christoph Hellwig 2017-09-19 8:52 ` [PATCH V6 3/3] scsi: Align block queue to dma_get_cache_alignment() Huacai Chen 2017-09-21 10:47 ` [PATCH V6 1/3] dma-mapping: Introduce device_is_coherent() as a helper Robin Murphy 2 siblings, 1 reply; 11+ messages in thread From: Huacai Chen @ 2017-09-19 8:52 UTC (permalink / raw) To: Christoph Hellwig Cc: Marek Szyprowski, Robin Murphy, Andrew Morton, Fuxin Zhang, linux-kernel, James E . J . Bottomley, Martin K . Petersen, linux-scsi, Huacai Chen, stable Make dma_get_cache_alignment() to accept a 'dev' argument. As a result, it can return different alignments due to different devices' I/O cache coherency. For compatibility, make all existing callers pass a NULL dev argument. Cc: stable@vger.kernel.org Signed-off-by: Huacai Chen <chenhc@lemote.com> --- drivers/infiniband/hw/mthca/mthca_main.c | 2 +- drivers/media/v4l2-core/videobuf2-dma-contig.c | 2 +- drivers/net/ethernet/broadcom/b44.c | 2 +- drivers/net/ethernet/ibm/emac/core.h | 2 +- drivers/net/ethernet/mellanox/mlx4/main.c | 2 +- drivers/spi/spi-qup.c | 4 ++-- drivers/tty/serial/mpsc.c | 16 ++++++++-------- drivers/tty/serial/samsung.c | 14 +++++++------- include/linux/dma-mapping.h | 14 +++++++++----- 9 files changed, 31 insertions(+), 27 deletions(-) diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c index e36a9bc..cac5fac 100644 --- a/drivers/infiniband/hw/mthca/mthca_main.c +++ b/drivers/infiniband/hw/mthca/mthca_main.c @@ -416,7 +416,7 @@ static int mthca_init_icm(struct mthca_dev *mdev, /* CPU writes to non-reserved MTTs, while HCA might DMA to reserved mtts */ mdev->limits.reserved_mtts = ALIGN(mdev->limits.reserved_mtts * mdev->limits.mtt_seg_size, - dma_get_cache_alignment()) / mdev->limits.mtt_seg_size; + dma_get_cache_alignment(NULL)) / mdev->limits.mtt_seg_size; mdev->mr_table.mtt_table = mthca_alloc_icm_table(mdev, init_hca->mtt_base, mdev->limits.mtt_seg_size, diff --git a/drivers/media/v4l2-core/videobuf2-dma-contig.c b/drivers/media/v4l2-core/videobuf2-dma-contig.c index 9f389f3..7f54739 100644 --- a/drivers/media/v4l2-core/videobuf2-dma-contig.c +++ b/drivers/media/v4l2-core/videobuf2-dma-contig.c @@ -484,7 +484,7 @@ static void *vb2_dc_get_userptr(struct device *dev, unsigned long vaddr, int ret = 0; struct sg_table *sgt; unsigned long contig_size; - unsigned long dma_align = dma_get_cache_alignment(); + unsigned long dma_align = dma_get_cache_alignment(NULL); /* Only cache aligned DMA transfers are reliable */ if (!IS_ALIGNED(vaddr | size, dma_align)) { diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c index a1125d1..291d6af 100644 --- a/drivers/net/ethernet/broadcom/b44.c +++ b/drivers/net/ethernet/broadcom/b44.c @@ -2587,7 +2587,7 @@ static inline void b44_pci_exit(void) static int __init b44_init(void) { - unsigned int dma_desc_align_size = dma_get_cache_alignment(); + unsigned int dma_desc_align_size = dma_get_cache_alignment(NULL); int err; /* Setup paramaters for syncing RX/TX DMA descriptors */ diff --git a/drivers/net/ethernet/ibm/emac/core.h b/drivers/net/ethernet/ibm/emac/core.h index 369de2c..236bf37 100644 --- a/drivers/net/ethernet/ibm/emac/core.h +++ b/drivers/net/ethernet/ibm/emac/core.h @@ -68,7 +68,7 @@ static inline int emac_rx_size(int mtu) return mal_rx_size(ETH_DATA_LEN + EMAC_MTU_OVERHEAD); } -#define EMAC_DMA_ALIGN(x) ALIGN((x), dma_get_cache_alignment()) +#define EMAC_DMA_ALIGN(x) ALIGN((x), dma_get_cache_alignment(NULL)) #define EMAC_RX_SKB_HEADROOM \ EMAC_DMA_ALIGN(CONFIG_IBM_EMAC_RX_SKB_HEADROOM) diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index e61c99e..56b1449 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -1660,7 +1660,7 @@ static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap, */ dev->caps.reserved_mtts = ALIGN(dev->caps.reserved_mtts * dev->caps.mtt_entry_sz, - dma_get_cache_alignment()) / dev->caps.mtt_entry_sz; + dma_get_cache_alignment(NULL)) / dev->caps.mtt_entry_sz; err = mlx4_init_icm_table(dev, &priv->mr_table.mtt_table, init_hca->mtt_base, diff --git a/drivers/spi/spi-qup.c b/drivers/spi/spi-qup.c index 974a8ce..0c698c3 100644 --- a/drivers/spi/spi-qup.c +++ b/drivers/spi/spi-qup.c @@ -862,7 +862,7 @@ static bool spi_qup_can_dma(struct spi_master *master, struct spi_device *spi, struct spi_transfer *xfer) { struct spi_qup *qup = spi_master_get_devdata(master); - size_t dma_align = dma_get_cache_alignment(); + size_t dma_align = dma_get_cache_alignment(NULL); int n_words; if (xfer->rx_buf) { @@ -1038,7 +1038,7 @@ static int spi_qup_probe(struct platform_device *pdev) master->transfer_one = spi_qup_transfer_one; master->dev.of_node = pdev->dev.of_node; master->auto_runtime_pm = true; - master->dma_alignment = dma_get_cache_alignment(); + master->dma_alignment = dma_get_cache_alignment(NULL); master->max_dma_len = SPI_MAX_XFER; platform_set_drvdata(pdev, master); diff --git a/drivers/tty/serial/mpsc.c b/drivers/tty/serial/mpsc.c index 67ffecc..e2f792a 100644 --- a/drivers/tty/serial/mpsc.c +++ b/drivers/tty/serial/mpsc.c @@ -81,19 +81,19 @@ * Number of Tx & Rx descriptors must be powers of 2. */ #define MPSC_RXR_ENTRIES 32 -#define MPSC_RXRE_SIZE dma_get_cache_alignment() +#define MPSC_RXRE_SIZE dma_get_cache_alignment(NULL) #define MPSC_RXR_SIZE (MPSC_RXR_ENTRIES * MPSC_RXRE_SIZE) -#define MPSC_RXBE_SIZE dma_get_cache_alignment() +#define MPSC_RXBE_SIZE dma_get_cache_alignment(NULL) #define MPSC_RXB_SIZE (MPSC_RXR_ENTRIES * MPSC_RXBE_SIZE) #define MPSC_TXR_ENTRIES 32 -#define MPSC_TXRE_SIZE dma_get_cache_alignment() +#define MPSC_TXRE_SIZE dma_get_cache_alignment(NULL) #define MPSC_TXR_SIZE (MPSC_TXR_ENTRIES * MPSC_TXRE_SIZE) -#define MPSC_TXBE_SIZE dma_get_cache_alignment() +#define MPSC_TXBE_SIZE dma_get_cache_alignment(NULL) #define MPSC_TXB_SIZE (MPSC_TXR_ENTRIES * MPSC_TXBE_SIZE) #define MPSC_DMA_ALLOC_SIZE (MPSC_RXR_SIZE + MPSC_RXB_SIZE + MPSC_TXR_SIZE \ - + MPSC_TXB_SIZE + dma_get_cache_alignment() /* for alignment */) + + MPSC_TXB_SIZE + dma_get_cache_alignment(NULL) /* for alignment */) /* Rx and Tx Ring entry descriptors -- assume entry size is <= cacheline size */ struct mpsc_rx_desc { @@ -738,7 +738,7 @@ static void mpsc_init_hw(struct mpsc_port_info *pi) mpsc_brg_init(pi, pi->brg_clk_src); mpsc_brg_enable(pi); - mpsc_sdma_init(pi, dma_get_cache_alignment()); /* burst a cacheline */ + mpsc_sdma_init(pi, dma_get_cache_alignment(NULL)); /* burst a cacheline */ mpsc_sdma_stop(pi); mpsc_hw_init(pi); } @@ -798,8 +798,8 @@ static void mpsc_init_rings(struct mpsc_port_info *pi) * Descriptors & buffers are multiples of cacheline size and must be * cacheline aligned. */ - dp = ALIGN((u32)pi->dma_region, dma_get_cache_alignment()); - dp_p = ALIGN((u32)pi->dma_region_p, dma_get_cache_alignment()); + dp = ALIGN((u32)pi->dma_region, dma_get_cache_alignment(NULL)); + dp_p = ALIGN((u32)pi->dma_region_p, dma_get_cache_alignment(NULL)); /* * Partition dma region into rx ring descriptor, rx buffers, diff --git a/drivers/tty/serial/samsung.c b/drivers/tty/serial/samsung.c index 8aca18c..b40a681 100644 --- a/drivers/tty/serial/samsung.c +++ b/drivers/tty/serial/samsung.c @@ -241,7 +241,7 @@ static void enable_tx_dma(struct s3c24xx_uart_port *ourport) /* Enable tx dma mode */ ucon = rd_regl(port, S3C2410_UCON); ucon &= ~(S3C64XX_UCON_TXBURST_MASK | S3C64XX_UCON_TXMODE_MASK); - ucon |= (dma_get_cache_alignment() >= 16) ? + ucon |= (dma_get_cache_alignment(NULL) >= 16) ? S3C64XX_UCON_TXBURST_16 : S3C64XX_UCON_TXBURST_1; ucon |= S3C64XX_UCON_TXMODE_DMA; wr_regl(port, S3C2410_UCON, ucon); @@ -292,7 +292,7 @@ static int s3c24xx_serial_start_tx_dma(struct s3c24xx_uart_port *ourport, if (ourport->tx_mode != S3C24XX_TX_DMA) enable_tx_dma(ourport); - dma->tx_size = count & ~(dma_get_cache_alignment() - 1); + dma->tx_size = count & ~(dma_get_cache_alignment(NULL) - 1); dma->tx_transfer_addr = dma->tx_addr + xmit->tail; dma_sync_single_for_device(ourport->port.dev, dma->tx_transfer_addr, @@ -332,7 +332,7 @@ static void s3c24xx_serial_start_next_tx(struct s3c24xx_uart_port *ourport) if (!ourport->dma || !ourport->dma->tx_chan || count < ourport->min_dma_size || - xmit->tail & (dma_get_cache_alignment() - 1)) + xmit->tail & (dma_get_cache_alignment(NULL) - 1)) s3c24xx_serial_start_tx_pio(ourport); else s3c24xx_serial_start_tx_dma(ourport, count); @@ -718,8 +718,8 @@ static irqreturn_t s3c24xx_serial_tx_chars(int irq, void *id) if (ourport->dma && ourport->dma->tx_chan && count >= ourport->min_dma_size) { - int align = dma_get_cache_alignment() - - (xmit->tail & (dma_get_cache_alignment() - 1)); + int align = dma_get_cache_alignment(NULL) - + (xmit->tail & (dma_get_cache_alignment(NULL) - 1)); if (count-align >= ourport->min_dma_size) { dma_count = count-align; count = align; @@ -870,7 +870,7 @@ static int s3c24xx_serial_request_dma(struct s3c24xx_uart_port *p) dma->tx_conf.direction = DMA_MEM_TO_DEV; dma->tx_conf.dst_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE; dma->tx_conf.dst_addr = p->port.mapbase + S3C2410_UTXH; - if (dma_get_cache_alignment() >= 16) + if (dma_get_cache_alignment(NULL) >= 16) dma->tx_conf.dst_maxburst = 16; else dma->tx_conf.dst_maxburst = 1; @@ -1849,7 +1849,7 @@ static int s3c24xx_serial_probe(struct platform_device *pdev) * so find minimal transfer size suitable for DMA mode */ ourport->min_dma_size = max_t(int, ourport->port.fifosize, - dma_get_cache_alignment()); + dma_get_cache_alignment(NULL)); dbg("%s: initialising port %p...\n", __func__, ourport); diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 08da227..ef70b0d 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -707,12 +707,16 @@ static inline int device_is_coherent(struct device *dev) return 1; /* compatible behavior */ } -static inline int dma_get_cache_alignment(void) -{ -#ifdef ARCH_DMA_MINALIGN - return ARCH_DMA_MINALIGN; +#ifndef ARCH_DMA_MINALIGN +#define ARCH_DMA_MINALIGN 1 #endif - return 1; + +static inline int dma_get_cache_alignment(struct device *dev) +{ + if (dev && device_is_coherent(dev)) + return 1; + else /* dev is NULL or noncoherent */ + return ARCH_DMA_MINALIGN; } #endif -- 2.7.0 ^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: [PATCH V6 2/3] dma-mapping: Rework dma_get_cache_alignment() function 2017-09-19 8:52 ` [PATCH V6 2/3] dma-mapping: Rework dma_get_cache_alignment() function Huacai Chen @ 2017-09-19 15:02 ` Christoph Hellwig 2017-09-21 4:28 ` [PATCH V6 2/3] dma-mapping: Rework dma_get_cache_alignment()function 陈华才 0 siblings, 1 reply; 11+ messages in thread From: Christoph Hellwig @ 2017-09-19 15:02 UTC (permalink / raw) To: Huacai Chen Cc: Christoph Hellwig, Marek Szyprowski, Robin Murphy, Andrew Morton, Fuxin Zhang, linux-kernel, James E . J . Bottomley, Martin K . Petersen, linux-scsi, stable > mdev->limits.reserved_mtts = ALIGN(mdev->limits.reserved_mtts * mdev->limits.mtt_seg_size, > - dma_get_cache_alignment()) / mdev->limits.mtt_seg_size; > + dma_get_cache_alignment(NULL)) / mdev->limits.mtt_seg_size; > > mdev->mr_table.mtt_table = mthca_alloc_icm_table(mdev, init_hca->mtt_base, Please pass the actually relevant struct device for each call. ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH V6 2/3] dma-mapping: Rework dma_get_cache_alignment()function 2017-09-19 15:02 ` Christoph Hellwig @ 2017-09-21 4:28 ` 陈华才 2017-09-21 14:36 ` Christoph Hellwig 0 siblings, 1 reply; 11+ messages in thread From: 陈华才 @ 2017-09-21 4:28 UTC (permalink / raw) To: Christoph Hellwig Cc: Christoph Hellwig, Marek Szyprowski, Robin Murphy, Andrew Morton, Fuxin Zhang, linux-kernel, James E . J . Bottomley, Martin K . Petersen, linux-scsi, stable Hi, Christoph, I have changed dma_get_cache_alignment's return value, and I don't know whether those drivers want to return ARCH_DMA_MINALIGN unconditionally. So I pass a NULL for those drivers, in order to keep their old behavior. Huacai ------------------ Original ------------------ From: "Christoph Hellwig"<hch@lst.de>; Date: Tue, Sep 19, 2017 11:02 PM To: "Huacai Chen"<chenhc@lemote.com>; Cc: "Christoph Hellwig"<hch@lst.de>; "Marek Szyprowski"<m.szyprowski@samsung.com>; "Robin Murphy"<robin.murphy@arm.com>; "Andrew Morton"<akpm@linux-foundation.org>; "Fuxin Zhang"<zhangfx@lemote.com>; "linux-kernel"<linux-kernel@vger.kernel.org>; "James E . J . Bottomley"<jejb@linux.vnet.ibm.com>; "Martin K . Petersen"<martin.petersen@oracle.com>; "linux-scsi"<linux-scsi@vger.kernel.org>; "stable"<stable@vger.kernel.org>; Subject: Re: [PATCH V6 2/3] dma-mapping: Rework dma_get_cache_alignment()function > mdev->limits.reserved_mtts = ALIGN(mdev->limits.reserved_mtts * mdev->limits.mtt_seg_size, > - dma_get_cache_alignment()) / mdev->limits.mtt_seg_size; > + dma_get_cache_alignment(NULL)) / mdev->limits.mtt_seg_size; > > mdev->mr_table.mtt_table = mthca_alloc_icm_table(mdev, init_hca->mtt_base, Please pass the actually relevant struct device for each call. ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH V6 2/3] dma-mapping: Rework dma_get_cache_alignment()function 2017-09-21 4:28 ` [PATCH V6 2/3] dma-mapping: Rework dma_get_cache_alignment()function 陈华才 @ 2017-09-21 14:36 ` Christoph Hellwig 0 siblings, 0 replies; 11+ messages in thread From: Christoph Hellwig @ 2017-09-21 14:36 UTC (permalink / raw) To: 陈华才 Cc: Christoph Hellwig, Marek Szyprowski, Robin Murphy, Andrew Morton, Fuxin Zhang, linux-kernel, James E . J . Bottomley, Martin K . Petersen, linux-scsi, stable On Thu, Sep 21, 2017 at 12:28:25PM +0800, 陈华才 wrote: > Hi, Christoph, > > I have changed dma_get_cache_alignment's return value, and I don't know whether those drivers want to return ARCH_DMA_MINALIGN unconditionally. So I pass a NULL for those drivers, in order to keep their old behavior. Per our documentation yes, they do want ARCH_DMA_MINALIGN. Please Cc all the driver maintainers on your updated patch so that they can review it, though. ^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH V6 3/3] scsi: Align block queue to dma_get_cache_alignment() 2017-09-19 8:52 [PATCH V6 1/3] dma-mapping: Introduce device_is_coherent() as a helper Huacai Chen 2017-09-19 8:52 ` [PATCH V6 2/3] dma-mapping: Rework dma_get_cache_alignment() function Huacai Chen @ 2017-09-19 8:52 ` Huacai Chen 2017-09-24 3:45 ` kbuild test robot 2017-09-21 10:47 ` [PATCH V6 1/3] dma-mapping: Introduce device_is_coherent() as a helper Robin Murphy 2 siblings, 1 reply; 11+ messages in thread From: Huacai Chen @ 2017-09-19 8:52 UTC (permalink / raw) To: Christoph Hellwig Cc: Marek Szyprowski, Robin Murphy, Andrew Morton, Fuxin Zhang, linux-kernel, James E . J . Bottomley, Martin K . Petersen, linux-scsi, Huacai Chen, stable In non-coherent DMA mode, kernel uses cache flushing operations to maintain I/O coherency, so scsi's block queue should be aligned to ARCH_DMA_MINALIGN. Otherwise, it will cause data corruption, at least on MIPS: Step 1, dma_map_single Step 2, cache_invalidate (no writeback) Step 3, dma_from_device Step 4, dma_unmap_single If a DMA buffer and a kernel structure share a same cache line, and if the kernel structure has dirty data, cache_invalidate (no writeback) will cause data lost. Cc: stable@vger.kernel.org Signed-off-by: Huacai Chen <chenhc@lemote.com> --- drivers/scsi/scsi_lib.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 9cf6a80..19abc2e 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -2132,11 +2132,11 @@ void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q) q->limits.cluster = 0; /* - * set a reasonable default alignment on word boundaries: the - * host and device may alter it using + * set a reasonable default alignment on word/cacheline boundaries: + * the host and device may alter it using * blk_queue_update_dma_alignment() later. */ - blk_queue_dma_alignment(q, 0x03); + blk_queue_dma_alignment(q, max(4, dma_get_cache_alignment(dev)) - 1); } EXPORT_SYMBOL_GPL(__scsi_init_queue); -- 2.7.0 ^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: [PATCH V6 3/3] scsi: Align block queue to dma_get_cache_alignment() 2017-09-19 8:52 ` [PATCH V6 3/3] scsi: Align block queue to dma_get_cache_alignment() Huacai Chen @ 2017-09-24 3:45 ` kbuild test robot 0 siblings, 0 replies; 11+ messages in thread From: kbuild test robot @ 2017-09-24 3:45 UTC (permalink / raw) To: Huacai Chen Cc: kbuild-all, Christoph Hellwig, Marek Szyprowski, Robin Murphy, Andrew Morton, Fuxin Zhang, linux-kernel, James E . J . Bottomley, Martin K . Petersen, linux-scsi, Huacai Chen, stable [-- Attachment #1: Type: text/plain, Size: 2753 bytes --] Hi Huacai, [auto build test ERROR on linus/master] [also build test ERROR on v4.14-rc1 next-20170922] [if your patch is applied to the wrong git tree, please drop us a note to help improve the system] url: https://github.com/0day-ci/linux/commits/Huacai-Chen/dma-mapping-Introduce-device_is_coherent-as-a-helper/20170920-204740 config: m68k-sun3_defconfig (attached as .config) compiler: m68k-linux-gcc (GCC) 4.9.0 reproduce: wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # save the attached .config to linux build tree make.cross ARCH=m68k All errors (new ones prefixed by >>): drivers/scsi/scsi_lib.c: In function '__scsi_init_queue': >> drivers/scsi/scsi_lib.c:2139:2: error: implicit declaration of function 'dma_get_cache_alignment' [-Werror=implicit-function-declaration] blk_queue_dma_alignment(q, max(4, dma_get_cache_alignment(dev)) - 1); ^ cc1: some warnings being treated as errors vim +/dma_get_cache_alignment +2139 drivers/scsi/scsi_lib.c 2103 2104 void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q) 2105 { 2106 struct device *dev = shost->dma_dev; 2107 2108 queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, q); 2109 2110 /* 2111 * this limit is imposed by hardware restrictions 2112 */ 2113 blk_queue_max_segments(q, min_t(unsigned short, shost->sg_tablesize, 2114 SG_MAX_SEGMENTS)); 2115 2116 if (scsi_host_prot_dma(shost)) { 2117 shost->sg_prot_tablesize = 2118 min_not_zero(shost->sg_prot_tablesize, 2119 (unsigned short)SCSI_MAX_PROT_SG_SEGMENTS); 2120 BUG_ON(shost->sg_prot_tablesize < shost->sg_tablesize); 2121 blk_queue_max_integrity_segments(q, shost->sg_prot_tablesize); 2122 } 2123 2124 blk_queue_max_hw_sectors(q, shost->max_sectors); 2125 blk_queue_bounce_limit(q, scsi_calculate_bounce_limit(shost)); 2126 blk_queue_segment_boundary(q, shost->dma_boundary); 2127 dma_set_seg_boundary(dev, shost->dma_boundary); 2128 2129 blk_queue_max_segment_size(q, dma_get_max_seg_size(dev)); 2130 2131 if (!shost->use_clustering) 2132 q->limits.cluster = 0; 2133 2134 /* 2135 * set a reasonable default alignment on word/cacheline boundaries: 2136 * the host and device may alter it using 2137 * blk_queue_update_dma_alignment() later. 2138 */ > 2139 blk_queue_dma_alignment(q, max(4, dma_get_cache_alignment(dev)) - 1); 2140 } 2141 EXPORT_SYMBOL_GPL(__scsi_init_queue); 2142 --- 0-DAY kernel test infrastructure Open Source Technology Center https://lists.01.org/pipermail/kbuild-all Intel Corporation [-- Attachment #2: .config.gz --] [-- Type: application/gzip, Size: 12226 bytes --] ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH V6 1/3] dma-mapping: Introduce device_is_coherent() as a helper 2017-09-19 8:52 [PATCH V6 1/3] dma-mapping: Introduce device_is_coherent() as a helper Huacai Chen 2017-09-19 8:52 ` [PATCH V6 2/3] dma-mapping: Rework dma_get_cache_alignment() function Huacai Chen 2017-09-19 8:52 ` [PATCH V6 3/3] scsi: Align block queue to dma_get_cache_alignment() Huacai Chen @ 2017-09-21 10:47 ` Robin Murphy 2017-09-22 2:13 ` [PATCH V6 1/3] dma-mapping: Introduce device_is_coherent() as ahelper 陈华才 2 siblings, 1 reply; 11+ messages in thread From: Robin Murphy @ 2017-09-21 10:47 UTC (permalink / raw) To: Huacai Chen, Christoph Hellwig Cc: Marek Szyprowski, Andrew Morton, Fuxin Zhang, linux-kernel, James E . J . Bottomley, Martin K . Petersen, linux-scsi, stable On 19/09/17 09:52, Huacai Chen wrote: > We will use device_is_coherent() as a helper function, which will be > used in the next patch. > > There is a MIPS-specific plat_device_is_coherent(), but we need a more > generic solution, so add and use a new function pointer in dma_map_ops. I think we're heading in the right direction with the series, but I still don't like this patch. I can pretty much guarantee that driver authors *will* abuse a generic device_is_coherent() API to mean "I can skip other DMA API calls and just use virt_to_phys()". I think it would be far better to allow architectures to provide their own override of dma_get_cache_alignment(), and let the coherency detail remain internal to the relevant arch implementations. [...] > @@ -697,6 +698,15 @@ static inline void *dma_zalloc_coherent(struct device *dev, size_t size, > } > > #ifdef CONFIG_HAS_DMA > +static inline int device_is_coherent(struct device *dev) > +{ > + const struct dma_map_ops *ops = get_dma_ops(dev); > + if (ops && ops->device_is_coherent) > + return ops->device_is_coherent(dev); > + else > + return 1; /* compatible behavior */ That is also quite scary - if someone now adds a new dma_get_cache_alignemnt() call and dutifully passes a non-NULL device, they will now get back an alignment of 1 on all non-coherent platforms except MIPS: hello data corruption. Robin. > +} > + > static inline int dma_get_cache_alignment(void) > { > #ifdef ARCH_DMA_MINALIGN > ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH V6 1/3] dma-mapping: Introduce device_is_coherent() as ahelper 2017-09-21 10:47 ` [PATCH V6 1/3] dma-mapping: Introduce device_is_coherent() as a helper Robin Murphy @ 2017-09-22 2:13 ` 陈华才 2017-09-22 13:44 ` Robin Murphy 0 siblings, 1 reply; 11+ messages in thread From: 陈华才 @ 2017-09-22 2:13 UTC (permalink / raw) To: Robin Murphy, Christoph Hellwig Cc: Marek Szyprowski, Andrew Morton, Fuxin Zhang, linux-kernel, James E . J . Bottomley, Martin K . Petersen, linux-scsi, stable Hi, Robin, Before 2.6.36 dma_get_cache_alignment is arch-dependent, and it is unified in commit 4565f0170dfc849b3629c27d7 ("dma-mapping: unify dma_get_cache_alignment implementations"). Should we revert to the old implementation? Huacai ------------------ Original ------------------ From: "Robin Murphy"<robin.murphy@arm.com>; Date: Thu, Sep 21, 2017 06:47 PM To: "Huacai Chen"<chenhc@lemote.com>; "Christoph Hellwig"<hch@lst.de>; Cc: "Marek Szyprowski"<m.szyprowski@samsung.com>; "Andrew Morton"<akpm@linux-foundation.org>; "Fuxin Zhang"<zhangfx@lemote.com>; "linux-kernel"<linux-kernel@vger.kernel.org>; "James E . J . Bottomley"<jejb@linux.vnet.ibm.com>; "Martin K . Petersen"<martin.petersen@oracle.com>; "linux-scsi"<linux-scsi@vger.kernel.org>; "stable"<stable@vger.kernel.org>; Subject: Re: [PATCH V6 1/3] dma-mapping: Introduce device_is_coherent() as ahelper On 19/09/17 09:52, Huacai Chen wrote: > We will use device_is_coherent() as a helper function, which will be > used in the next patch. > > There is a MIPS-specific plat_device_is_coherent(), but we need a more > generic solution, so add and use a new function pointer in dma_map_ops. I think we're heading in the right direction with the series, but I still don't like this patch. I can pretty much guarantee that driver authors *will* abuse a generic device_is_coherent() API to mean "I can skip other DMA API calls and just use virt_to_phys()". I think it would be far better to allow architectures to provide their own override of dma_get_cache_alignment(), and let the coherency detail remain internal to the relevant arch implementations. [...] > @@ -697,6 +698,15 @@ static inline void *dma_zalloc_coherent(struct device *dev, size_t size, > } > > #ifdef CONFIG_HAS_DMA > +static inline int device_is_coherent(struct device *dev) > +{ > + const struct dma_map_ops *ops = get_dma_ops(dev); > + if (ops && ops->device_is_coherent) > + return ops->device_is_coherent(dev); > + else > + return 1; /* compatible behavior */ That is also quite scary - if someone now adds a new dma_get_cache_alignemnt() call and dutifully passes a non-NULL device, they will now get back an alignment of 1 on all non-coherent platforms except MIPS: hello data corruption. Robin. > +} > + > static inline int dma_get_cache_alignment(void) > { > #ifdef ARCH_DMA_MINALIGN > ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH V6 1/3] dma-mapping: Introduce device_is_coherent() as ahelper 2017-09-22 2:13 ` [PATCH V6 1/3] dma-mapping: Introduce device_is_coherent() as ahelper 陈华才 @ 2017-09-22 13:44 ` Robin Murphy 2017-09-22 13:49 ` Christoph Hellwig 0 siblings, 1 reply; 11+ messages in thread From: Robin Murphy @ 2017-09-22 13:44 UTC (permalink / raw) To: 陈华才, Christoph Hellwig Cc: Marek Szyprowski, Andrew Morton, Fuxin Zhang, linux-kernel, James E . J . Bottomley, Martin K . Petersen, linux-scsi, stable On 22/09/17 03:13, 陈华才 wrote: > Hi, Robin, > > Before 2.6.36 dma_get_cache_alignment is arch-dependent, and it is unified in commit 4565f0170dfc849b3629c27d7 ("dma-mapping: unify dma_get_cache_alignment implementations"). Should we revert to the old implementation? Not quite - I mean instead of adding an ops->device_is_coherent callback (which cannot really have a safe fallback value either way) and trying to enforce that dma_get_cache_alignment() should be the only valid caller, just add an ops->get_cache_alignment callback directly. Robin. > > Huacai > > ------------------ Original ------------------ > From: "Robin Murphy"<robin.murphy@arm.com>; > Date: Thu, Sep 21, 2017 06:47 PM > To: "Huacai Chen"<chenhc@lemote.com>; "Christoph Hellwig"<hch@lst.de>; > Cc: "Marek Szyprowski"<m.szyprowski@samsung.com>; "Andrew Morton"<akpm@linux-foundation.org>; "Fuxin Zhang"<zhangfx@lemote.com>; "linux-kernel"<linux-kernel@vger.kernel.org>; "James E . J . Bottomley"<jejb@linux.vnet.ibm.com>; "Martin K . Petersen"<martin.petersen@oracle.com>; "linux-scsi"<linux-scsi@vger.kernel.org>; "stable"<stable@vger.kernel.org>; > Subject: Re: [PATCH V6 1/3] dma-mapping: Introduce device_is_coherent() as ahelper > > > On 19/09/17 09:52, Huacai Chen wrote: >> We will use device_is_coherent() as a helper function, which will be >> used in the next patch. >> >> There is a MIPS-specific plat_device_is_coherent(), but we need a more >> generic solution, so add and use a new function pointer in dma_map_ops. > > I think we're heading in the right direction with the series, but I > still don't like this patch. I can pretty much guarantee that driver > authors *will* abuse a generic device_is_coherent() API to mean "I can > skip other DMA API calls and just use virt_to_phys()". > > I think it would be far better to allow architectures to provide their > own override of dma_get_cache_alignment(), and let the coherency detail > remain internal to the relevant arch implementations. > > [...] >> @@ -697,6 +698,15 @@ static inline void *dma_zalloc_coherent(struct device *dev, size_t size, >> } >> >> #ifdef CONFIG_HAS_DMA >> +static inline int device_is_coherent(struct device *dev) >> +{ >> + const struct dma_map_ops *ops = get_dma_ops(dev); >> + if (ops && ops->device_is_coherent) >> + return ops->device_is_coherent(dev); >> + else >> + return 1; /* compatible behavior */ > > That is also quite scary - if someone now adds a new > dma_get_cache_alignemnt() call and dutifully passes a non-NULL device, > they will now get back an alignment of 1 on all non-coherent platforms > except MIPS: hello data corruption. > > Robin. > >> +} >> + >> static inline int dma_get_cache_alignment(void) >> { >> #ifdef ARCH_DMA_MINALIGN >> > ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH V6 1/3] dma-mapping: Introduce device_is_coherent() as ahelper 2017-09-22 13:44 ` Robin Murphy @ 2017-09-22 13:49 ` Christoph Hellwig 0 siblings, 0 replies; 11+ messages in thread From: Christoph Hellwig @ 2017-09-22 13:49 UTC (permalink / raw) To: Robin Murphy Cc: 陈华才, Christoph Hellwig, Marek Szyprowski, Andrew Morton, Fuxin Zhang, linux-kernel, James E . J . Bottomley, Martin K . Petersen, linux-scsi, stable On Fri, Sep 22, 2017 at 02:44:51PM +0100, Robin Murphy wrote: > Not quite - I mean instead of adding an ops->device_is_coherent callback > (which cannot really have a safe fallback value either way) and trying > to enforce that dma_get_cache_alignment() should be the only valid > caller, just add an ops->get_cache_alignment callback directly. Exactly - and then fall back to ARCH_DMA_MINALIGN/1 if the ops vector is not provided, to keep the existing behavior. ^ permalink raw reply [flat|nested] 11+ messages in thread
end of thread, other threads:[~2017-09-24 3:45 UTC | newest] Thread overview: 11+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2017-09-19 8:52 [PATCH V6 1/3] dma-mapping: Introduce device_is_coherent() as a helper Huacai Chen 2017-09-19 8:52 ` [PATCH V6 2/3] dma-mapping: Rework dma_get_cache_alignment() function Huacai Chen 2017-09-19 15:02 ` Christoph Hellwig 2017-09-21 4:28 ` [PATCH V6 2/3] dma-mapping: Rework dma_get_cache_alignment()function 陈华才 2017-09-21 14:36 ` Christoph Hellwig 2017-09-19 8:52 ` [PATCH V6 3/3] scsi: Align block queue to dma_get_cache_alignment() Huacai Chen 2017-09-24 3:45 ` kbuild test robot 2017-09-21 10:47 ` [PATCH V6 1/3] dma-mapping: Introduce device_is_coherent() as a helper Robin Murphy 2017-09-22 2:13 ` [PATCH V6 1/3] dma-mapping: Introduce device_is_coherent() as ahelper 陈华才 2017-09-22 13:44 ` Robin Murphy 2017-09-22 13:49 ` Christoph Hellwig
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).