Linux-ARM-Kernel Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 2/9] dmaengine: stm32-dma: Fix typo in Kconfig
From: M'boumba Cedric Madianga @ 2016-12-13 13:40 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1481636451-27863-1-git-send-email-cedric.madianga@gmail.com>

As STM32 DMA driver is only used as buit-in driver, it couldn't be used as
module.

Signed-off-by: M'boumba Cedric Madianga <cedric.madianga@gmail.com>
Reviewed-by: Ludovic BARRE <ludovic.barre@st.com>
---
 drivers/dma/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index 263495d..96da57a 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -458,7 +458,7 @@ config STM32_DMA
 	help
 	  Enable support for the on-chip DMA controller on STMicroelectronics
 	  STM32 MCUs.
-	  If you have a board based on such a MCU and wish to use DMA say Y or M
+	  If you have a board based on such a MCU and wish to use DMA say Y
 	  here.
 
 config S3C24XX_DMAC
-- 
1.9.1

^ permalink raw reply related

* [PATCH 3/9] dt-bindings: stm32-dma: Fix typo regarding DMA client binding
From: M'boumba Cedric Madianga @ 2016-12-13 13:40 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1481636451-27863-1-git-send-email-cedric.madianga@gmail.com>

Only four cells are required for dma client binding not five.

Signed-off-by: M'boumba Cedric Madianga <cedric.madianga@gmail.com>
Reviewed-by: Ludovic BARRE <ludovic.barre@st.com>
---
 Documentation/devicetree/bindings/dma/stm32-dma.txt | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/Documentation/devicetree/bindings/dma/stm32-dma.txt b/Documentation/devicetree/bindings/dma/stm32-dma.txt
index 70cd13f..4408af6 100644
--- a/Documentation/devicetree/bindings/dma/stm32-dma.txt
+++ b/Documentation/devicetree/bindings/dma/stm32-dma.txt
@@ -40,8 +40,7 @@ Example:
 
 DMA clients connected to the STM32 DMA controller must use the format
 described in the dma.txt file, using a five-cell specifier for each
-channel: a phandle plus four integer cells.
-The four cells in order are:
+channel: a phandle to the DMA controller plus the following four integer cells:
 
 1. The channel id
 2. The request line number
@@ -61,7 +60,7 @@ The four cells in order are:
 	0x1: medium
 	0x2: high
 	0x3: very high
-5. A 32bit mask specifying the DMA FIFO threshold configuration which are device
+4. A 32bit mask specifying the DMA FIFO threshold configuration which are device
    dependent:
  -bit 0-1: Fifo threshold
 	0x0: 1/4 full FIFO
-- 
1.9.1

^ permalink raw reply related

* [PATCH 4/9] dmaengine: stm32-dma: Fix null pointer dereference in stm32_dma_tx_status
From: M'boumba Cedric Madianga @ 2016-12-13 13:40 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1481636451-27863-1-git-send-email-cedric.madianga@gmail.com>

chan->desc is always set to NULL when a DMA transfer is complete.
As a DMA transfer could be complete during the call of stm32_dma_tx_status,
we need to be sure that chan->desc is not NULL before using this variable
to avoid a null pointer deference issue.

Signed-off-by: M'boumba Cedric Madianga <cedric.madianga@gmail.com>
Reviewed-by: Ludovic BARRE <ludovic.barre@st.com>
---
 drivers/dma/stm32-dma.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/drivers/dma/stm32-dma.c b/drivers/dma/stm32-dma.c
index a884b85..3056ce7 100644
--- a/drivers/dma/stm32-dma.c
+++ b/drivers/dma/stm32-dma.c
@@ -880,7 +880,7 @@ static enum dma_status stm32_dma_tx_status(struct dma_chan *c,
 	struct virt_dma_desc *vdesc;
 	enum dma_status status;
 	unsigned long flags;
-	u32 residue;
+	u32 residue = 0;
 
 	status = dma_cookie_status(c, cookie, state);
 	if ((status == DMA_COMPLETE) || (!state))
@@ -888,16 +888,12 @@ static enum dma_status stm32_dma_tx_status(struct dma_chan *c,
 
 	spin_lock_irqsave(&chan->vchan.lock, flags);
 	vdesc = vchan_find_desc(&chan->vchan, cookie);
-	if (cookie == chan->desc->vdesc.tx.cookie) {
+	if (chan->desc && cookie == chan->desc->vdesc.tx.cookie)
 		residue = stm32_dma_desc_residue(chan, chan->desc,
 						 chan->next_sg);
-	} else if (vdesc) {
+	else if (vdesc)
 		residue = stm32_dma_desc_residue(chan,
 						 to_stm32_dma_desc(vdesc), 0);
-	} else {
-		residue = 0;
-	}
-
 	dma_set_residue(state, residue);
 
 	spin_unlock_irqrestore(&chan->vchan.lock, flags);
-- 
1.9.1

^ permalink raw reply related

* [PATCH 5/9] dmaengine: stm32-dma: Rework starting transfer management
From: M'boumba Cedric Madianga @ 2016-12-13 13:40 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1481636451-27863-1-git-send-email-cedric.madianga@gmail.com>

This patch reworks the way to manage transfer starting.
Now, starting DMA is only allowed when the channel is not busy.
Then, stm32_dma_start_transfer is declared as void.
At least, after each transfer completion, we start the next transfer if a
new descriptor as been queued in the issued list during an ongoing
transfer.

Signed-off-by: M'boumba Cedric Madianga <cedric.madianga@gmail.com>
Reviewed-by: Ludovic BARRE <ludovic.barre@st.com>
---
 drivers/dma/stm32-dma.c | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/drivers/dma/stm32-dma.c b/drivers/dma/stm32-dma.c
index 3056ce7..adb846c 100644
--- a/drivers/dma/stm32-dma.c
+++ b/drivers/dma/stm32-dma.c
@@ -421,7 +421,7 @@ static void stm32_dma_dump_reg(struct stm32_dma_chan *chan)
 	dev_dbg(chan2dev(chan), "SFCR:  0x%08x\n", sfcr);
 }
 
-static int stm32_dma_start_transfer(struct stm32_dma_chan *chan)
+static void stm32_dma_start_transfer(struct stm32_dma_chan *chan)
 {
 	struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan);
 	struct virt_dma_desc *vdesc;
@@ -432,12 +432,12 @@ static int stm32_dma_start_transfer(struct stm32_dma_chan *chan)
 
 	ret = stm32_dma_disable_chan(chan);
 	if (ret < 0)
-		return ret;
+		return;
 
 	if (!chan->desc) {
 		vdesc = vchan_next_desc(&chan->vchan);
 		if (!vdesc)
-			return -EPERM;
+			return;
 
 		chan->desc = to_stm32_dma_desc(vdesc);
 		chan->next_sg = 0;
@@ -471,7 +471,7 @@ static int stm32_dma_start_transfer(struct stm32_dma_chan *chan)
 
 	chan->busy = true;
 
-	return 0;
+	dev_dbg(chan2dev(chan), "vchan %p: started\n", &chan->vchan);
 }
 
 static void stm32_dma_configure_next_sg(struct stm32_dma_chan *chan)
@@ -552,15 +552,13 @@ static void stm32_dma_issue_pending(struct dma_chan *c)
 {
 	struct stm32_dma_chan *chan = to_stm32_dma_chan(c);
 	unsigned long flags;
-	int ret;
 
 	spin_lock_irqsave(&chan->vchan.lock, flags);
-	if (!chan->busy) {
-		if (vchan_issue_pending(&chan->vchan) && !chan->desc) {
-			ret = stm32_dma_start_transfer(chan);
-			if ((!ret) && (chan->desc->cyclic))
-				stm32_dma_configure_next_sg(chan);
-		}
+	if (vchan_issue_pending(&chan->vchan) && !chan->desc && !chan->busy) {
+		dev_dbg(chan2dev(chan), "vchan %p: issued\n", &chan->vchan);
+		stm32_dma_start_transfer(chan);
+		if (chan->desc->cyclic)
+			stm32_dma_configure_next_sg(chan);
 	}
 	spin_unlock_irqrestore(&chan->vchan.lock, flags);
 }
-- 
1.9.1

^ permalink raw reply related

* [PATCH 6/9] dmaengine: stm32-dma: Fix residue computation issue in cyclic mode
From: M'boumba Cedric Madianga @ 2016-12-13 13:40 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1481636451-27863-1-git-send-email-cedric.madianga@gmail.com>

This patch resolves the residue computation issue detected in cyclic mode.
Now, in cyclic mode, we increment next_sg variable as soon as a period is
transferred instead of after pushing a new sg request.
Then, we take into account that after transferring a complete buffer,
the next_sg variable is equal to 0.

Signed-off-by: M'boumba Cedric Madianga <cedric.madianga@gmail.com>
Reviewed-by: Ludovic BARRE <ludovic.barre@st.com>
---
 drivers/dma/stm32-dma.c | 39 ++++++++++++++++++++++++++-------------
 1 file changed, 26 insertions(+), 13 deletions(-)

diff --git a/drivers/dma/stm32-dma.c b/drivers/dma/stm32-dma.c
index adb846c..ba929a9 100644
--- a/drivers/dma/stm32-dma.c
+++ b/drivers/dma/stm32-dma.c
@@ -500,8 +500,6 @@ static void stm32_dma_configure_next_sg(struct stm32_dma_chan *chan)
 			dev_dbg(chan2dev(chan), "CT=0 <=> SM1AR: 0x%08x\n",
 				stm32_dma_read(dmadev, STM32_DMA_SM1AR(id)));
 		}
-
-		chan->next_sg++;
 	}
 }
 
@@ -510,6 +508,7 @@ static void stm32_dma_handle_chan_done(struct stm32_dma_chan *chan)
 	if (chan->desc) {
 		if (chan->desc->cyclic) {
 			vchan_cyclic_callback(&chan->desc->vdesc);
+			chan->next_sg++;
 			stm32_dma_configure_next_sg(chan);
 		} else {
 			chan->busy = false;
@@ -846,26 +845,40 @@ static struct dma_async_tx_descriptor *stm32_dma_prep_dma_memcpy(
 	return vchan_tx_prep(&chan->vchan, &desc->vdesc, flags);
 }
 
+static u32 stm32_dma_get_remaining_bytes(struct stm32_dma_chan *chan)
+{
+	u32 dma_scr, width, ndtr;
+	struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan);
+
+	dma_scr = stm32_dma_read(dmadev, STM32_DMA_SCR(chan->id));
+	width = STM32_DMA_SCR_PSIZE_GET(dma_scr);
+	ndtr = stm32_dma_read(dmadev, STM32_DMA_SNDTR(chan->id));
+
+	return ndtr << width;
+}
+
 static size_t stm32_dma_desc_residue(struct stm32_dma_chan *chan,
 				     struct stm32_dma_desc *desc,
 				     u32 next_sg)
 {
-	struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan);
-	u32 dma_scr, width, residue, count;
+	u32 residue = 0;
 	int i;
 
-	residue = 0;
+	/*
+	 * In cyclic mode, for the last period, residue = remaining bytes from
+	 * NDTR
+	 */
+	if (chan->desc->cyclic && next_sg == 0)
+		return stm32_dma_get_remaining_bytes(chan);
 
+	/*
+	 * For all other periods in cyclic mode, and in sg mode,
+	 * residue = remaining bytes from NDTR + remaining periods/sg to be
+	 * transferred
+	 */
 	for (i = next_sg; i < desc->num_sgs; i++)
 		residue += desc->sg_req[i].len;
-
-	if (next_sg != 0) {
-		dma_scr = stm32_dma_read(dmadev, STM32_DMA_SCR(chan->id));
-		width = STM32_DMA_SCR_PSIZE_GET(dma_scr);
-		count = stm32_dma_read(dmadev, STM32_DMA_SNDTR(chan->id));
-
-		residue += count << width;
-	}
+	residue += stm32_dma_get_remaining_bytes(chan);
 
 	return residue;
 }
-- 
1.9.1

^ permalink raw reply related

* [PATCH 7/9] dmaengine: stm32-dma: Add error messages if xlate fails
From: M'boumba Cedric Madianga @ 2016-12-13 13:40 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1481636451-27863-1-git-send-email-cedric.madianga@gmail.com>

This patch adds some error messages when a slave device fails to request a
channel.

Signed-off-by: M'boumba Cedric Madianga <cedric.madianga@gmail.com>
Reviewed-by: Ludovic BARRE <ludovic.barre@st.com>
---
 drivers/dma/stm32-dma.c | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/drivers/dma/stm32-dma.c b/drivers/dma/stm32-dma.c
index ba929a9..35639f6 100644
--- a/drivers/dma/stm32-dma.c
+++ b/drivers/dma/stm32-dma.c
@@ -975,27 +975,36 @@ static struct dma_chan *stm32_dma_of_xlate(struct of_phandle_args *dma_spec,
 					   struct of_dma *ofdma)
 {
 	struct stm32_dma_device *dmadev = ofdma->of_dma_data;
+	struct device *dev = dmadev->ddev.dev;
 	struct stm32_dma_cfg cfg;
 	struct stm32_dma_chan *chan;
 	struct dma_chan *c;
 
-	if (dma_spec->args_count < 4)
+	if (dma_spec->args_count < 4) {
+		dev_err(dev, "Bad number of cells\n");
 		return NULL;
+	}
 
 	cfg.channel_id = dma_spec->args[0];
 	cfg.request_line = dma_spec->args[1];
 	cfg.stream_config = dma_spec->args[2];
 	cfg.threshold = dma_spec->args[3];
 
-	if ((cfg.channel_id >= STM32_DMA_MAX_CHANNELS) || (cfg.request_line >=
-				STM32_DMA_MAX_REQUEST_ID))
+	if ((cfg.channel_id >= STM32_DMA_MAX_CHANNELS) ||
+	    (cfg.request_line >= STM32_DMA_MAX_REQUEST_ID)) {
+		dev_err(dev, "Bad channel and/or request id\n");
 		return NULL;
+	}
 
 	chan = &dmadev->chan[cfg.channel_id];
 
 	c = dma_get_slave_channel(&chan->vchan.chan);
-	if (c)
-		stm32_dma_set_config(chan, &cfg);
+	if (!c) {
+		dev_err(dev, "No more channel avalaible\n");
+		return NULL;
+	}
+
+	stm32_dma_set_config(chan, &cfg);
 
 	return c;
 }
-- 
1.9.1

^ permalink raw reply related

* [PATCH 8/9] dmaengine: stm32-dma: Add synchronization support
From: M'boumba Cedric Madianga @ 2016-12-13 13:40 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1481636451-27863-1-git-send-email-cedric.madianga@gmail.com>

Implement the new device_synchronize() callback to allow proper
synchronization when stopping a channel.

Signed-off-by: M'boumba Cedric Madianga <cedric.madianga@gmail.com>
---
 drivers/dma/stm32-dma.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/dma/stm32-dma.c b/drivers/dma/stm32-dma.c
index 35639f6..b7be43a 100644
--- a/drivers/dma/stm32-dma.c
+++ b/drivers/dma/stm32-dma.c
@@ -403,6 +403,13 @@ static int stm32_dma_terminate_all(struct dma_chan *c)
 	return 0;
 }
 
+static void stm32_dma_synchronize(struct dma_chan *c)
+{
+	struct stm32_dma_chan *chan = to_stm32_dma_chan(c);
+
+	vchan_synchronize(&chan->vchan);
+}
+
 static void stm32_dma_dump_reg(struct stm32_dma_chan *chan)
 {
 	struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan);
@@ -1068,6 +1075,7 @@ static int stm32_dma_probe(struct platform_device *pdev)
 	dd->device_prep_dma_cyclic = stm32_dma_prep_dma_cyclic;
 	dd->device_config = stm32_dma_slave_config;
 	dd->device_terminate_all = stm32_dma_terminate_all;
+	dd->device_synchronize = stm32_dma_synchronize;
 	dd->src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |
 		BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) |
 		BIT(DMA_SLAVE_BUSWIDTH_4_BYTES);
-- 
1.9.1

^ permalink raw reply related

* [PATCH 9/9] dmaengine: stm32-dma: Add max_burst support
From: M'boumba Cedric Madianga @ 2016-12-13 13:40 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1481636451-27863-1-git-send-email-cedric.madianga@gmail.com>

This patch sets the max_burst value supported by the STM32 DMA

Signed-off-by: M'boumba Cedric Madianga <cedric.madianga@gmail.com>
---
 drivers/dma/stm32-dma.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/dma/stm32-dma.c b/drivers/dma/stm32-dma.c
index b7be43a..49f86ca 100644
--- a/drivers/dma/stm32-dma.c
+++ b/drivers/dma/stm32-dma.c
@@ -114,6 +114,7 @@
 #define STM32_DMA_MAX_CHANNELS		0x08
 #define STM32_DMA_MAX_REQUEST_ID	0x08
 #define STM32_DMA_MAX_DATA_PARAM	0x03
+#define STM32_DMA_MAX_BURST		16
 
 enum stm32_dma_width {
 	STM32_DMA_BYTE,
@@ -1084,6 +1085,7 @@ static int stm32_dma_probe(struct platform_device *pdev)
 		BIT(DMA_SLAVE_BUSWIDTH_4_BYTES);
 	dd->directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
 	dd->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+	dd->max_burst = STM32_DMA_MAX_BURST;
 	dd->dev = &pdev->dev;
 	INIT_LIST_HEAD(&dd->channels);
 
-- 
1.9.1

^ permalink raw reply related

* [RFC v2 PATCH 0/3] Fix dma_alloc_coherent() and friends for NOMMU
From: Vladimir Murzin @ 2016-12-13 13:45 UTC (permalink / raw)
  To: linux-arm-kernel

Hi,

It seem that addition of cache support for M-class cpus uncovered
latent bug in DMA usage. NOMMU memory model has been treated as being
always consistent; however, for R/M classes of cpu memory can be
covered by MPU which in turn might configure RAM as Normal
i.e. bufferable and cacheable. It breaks dma_alloc_coherent() and
friends, since data can stuck in caches now or be buffered.

This patch set is trying to address the issue by providing region of
memory suitable for consistent DMA operations. It is supposed that such
region is marked by MPU as non-cacheable. Since we have MPU support in
Linux for R-class only and M-class setting MPU in bootloader, proposed
interface to advertise such memory is via "memdma=size at start" command
line option, to avoid clashing with normal memory (which usually comes
from dts) it'd be safer to use it together with "mem=" command line
option. Meanwhile, I'm open to suggestions for the better way telling
Linux of such memory.

For configuration without cache support (like Cortex-M3/M4) dma
operations are forced to be coherent and wired with dma-noop. Such
decision is made based on cacheid global variable. In case cpu
supports caches and no coherent memory region is given - dma is
disallowed. Probably, some other important checks are missing, so I'll
all my ears :)

To make life easier NOMMU dma operations are kept in separate
compilation unit.

Thanks!

Changelog:

    RFC v1 -> RFC v2
           - s/dmac_unmap_area/dmac_map_area in __dma_page_cpu_to_dev()
	   - removed unrelated changes in nommu.c

Vladimir Murzin (3):
  ARM: NOMMU: introduce dma operations for noMMU
  ARM: NOMMU: set ARM_DMA_MEM_BUFFERABLE for M-class cpus
  ARM: dma-mapping: remove traces of NOMMU code

 arch/arm/include/asm/dma-mapping.h |    3 +-
 arch/arm/mm/Kconfig                |    2 +-
 arch/arm/mm/Makefile               |    5 +-
 arch/arm/mm/dma-mapping-nommu.c    |  262 ++++++++++++++++++++++++++++++++++++
 arch/arm/mm/dma-mapping.c          |   26 +---
 arch/arm/mm/mm.h                   |    3 +
 arch/arm/mm/nommu.c                |    6 +
 7 files changed, 278 insertions(+), 29 deletions(-)
 create mode 100644 arch/arm/mm/dma-mapping-nommu.c

-- 
1.7.9.5

^ permalink raw reply

* [RFC v2 PATCH 1/3] ARM: NOMMU: introduce dma operations for noMMU
From: Vladimir Murzin @ 2016-12-13 13:45 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1481636704-18948-1-git-send-email-vladimir.murzin@arm.com>

R/M classes of cpus can have momory covered by MPU which in turn might
configure RAM as Normal i.e. bufferable and cacheable. It breaks
dma_alloc_coherent() and friends, since data can stuck in caches now
or be buffered.

This patch introduces the way to specify region of memory (via
"memdma=size at start" command line option) suitable for consistent DMA
operations. It is supposed that such region is marked by MPU as
non-cacheable.

For configuration without cache support (like Cortex-M3/M4) dma
operations are forced to be coherent and wired with dma-noop. Such
decision is made based on cacheid global variable. In case cpu
supports caches and no coherent memory region is given - dma is
disallowed.

Reported-by: Alexandre Torgue <alexandre.torgue@st.com>
Reported-by: Andras Szemzo <sza@esh.hu>
Signed-off-by: Vladimir Murzin <vladimir.murzin@arm.com>
---
 arch/arm/include/asm/dma-mapping.h |    3 +-
 arch/arm/mm/Makefile               |    5 +-
 arch/arm/mm/dma-mapping-nommu.c    |  262 ++++++++++++++++++++++++++++++++++++
 arch/arm/mm/mm.h                   |    3 +
 arch/arm/mm/nommu.c                |    6 +
 5 files changed, 275 insertions(+), 4 deletions(-)
 create mode 100644 arch/arm/mm/dma-mapping-nommu.c

diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h
index bf02dbd..559faad 100644
--- a/arch/arm/include/asm/dma-mapping.h
+++ b/arch/arm/include/asm/dma-mapping.h
@@ -20,7 +20,8 @@ static inline struct dma_map_ops *__generic_dma_ops(struct device *dev)
 {
 	if (dev && dev->archdata.dma_ops)
 		return dev->archdata.dma_ops;
-	return &arm_dma_ops;
+
+	return IS_ENABLED(CONFIG_MMU) ? &arm_dma_ops : &dma_noop_ops;
 }
 
 static inline struct dma_map_ops *get_dma_ops(struct device *dev)
diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile
index 2ac7988..5796357 100644
--- a/arch/arm/mm/Makefile
+++ b/arch/arm/mm/Makefile
@@ -2,9 +2,8 @@
 # Makefile for the linux arm-specific parts of the memory manager.
 #
 
-obj-y				:= dma-mapping.o extable.o fault.o init.o \
-				   iomap.o
-
+obj-y				:= extable.o fault.o init.o iomap.o
+obj-y				+= dma-mapping$(MMUEXT).o
 obj-$(CONFIG_MMU)		+= fault-armv.o flush.o idmap.o ioremap.o \
 				   mmap.o pgd.o mmu.o pageattr.o
 
diff --git a/arch/arm/mm/dma-mapping-nommu.c b/arch/arm/mm/dma-mapping-nommu.c
new file mode 100644
index 0000000..f92d98a
--- /dev/null
+++ b/arch/arm/mm/dma-mapping-nommu.c
@@ -0,0 +1,262 @@
+/*
+ *  Based on linux/arch/arm/mm/dma-mapping.c
+ *
+ *  Copyright (C) 2000-2004 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  DMA uncached mapping support.
+ */
+
+#include <linux/export.h>
+#include <linux/mm.h>
+#include <linux/dma-mapping.h>
+#include <linux/scatterlist.h>
+#include <linux/genalloc.h>
+
+#include <asm/cachetype.h>
+#include <asm/cacheflush.h>
+#include <asm/outercache.h>
+
+#include "dma.h"
+
+unsigned long dma_start __initdata;
+unsigned long dma_size __initdata;
+
+static struct gen_pool *dma_pool;
+
+static void *arm_nommu_dma_alloc(struct device *dev, size_t size,
+			    dma_addr_t *dma_handle, gfp_t gfp,
+			    unsigned long attrs)
+{
+	void *ptr;
+
+	if (!dma_pool)
+		return NULL;
+
+	ptr = (void *)gen_pool_alloc(dma_pool, size);
+	if (ptr) {
+		*dma_handle = __pa(ptr);
+		dmac_flush_range(ptr, ptr + size);
+		outer_flush_range(__pa(ptr), __pa(ptr) + size);
+	}
+
+	return ptr;
+}
+
+static void arm_nommu_dma_free(struct device *dev, size_t size,
+			  void *cpu_addr, dma_addr_t dma_addr,
+			  unsigned long attrs)
+{
+	gen_pool_free(dma_pool, (unsigned long)cpu_addr, size);
+}
+
+static void __dma_page_cpu_to_dev(dma_addr_t handle, size_t size,
+			 enum dma_data_direction dir)
+{
+	dmac_map_area(__va(handle), size, dir);
+
+	if (dir == DMA_FROM_DEVICE)
+		outer_inv_range(handle, handle + size);
+	else
+		outer_clean_range(handle, handle + size);
+}
+
+static void __dma_page_dev_to_cpu(dma_addr_t handle, size_t size,
+			 enum dma_data_direction dir)
+{
+	if (dir != DMA_TO_DEVICE) {
+		outer_inv_range(handle, handle + size);
+		dmac_unmap_area(__va(handle), size, dir);
+	}
+}
+
+static dma_addr_t arm_nommu_dma_map_page(struct device *dev, struct page *page,
+				      unsigned long offset, size_t size,
+				      enum dma_data_direction dir,
+				      unsigned long attrs)
+{
+	dma_addr_t handle = page_to_phys(page) + offset;
+
+	__dma_page_cpu_to_dev(handle, size, dir);
+
+	return handle;
+}
+
+static void arm_nommu_dma_unmap_page(struct device *dev, dma_addr_t handle,
+		size_t size, enum dma_data_direction dir, unsigned long attrs)
+{
+	__dma_page_dev_to_cpu(handle, size, dir);
+}
+
+
+static int arm_nommu_dma_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
+			     enum dma_data_direction dir,
+			     unsigned long attrs)
+{
+	int i;
+	struct scatterlist *sg;
+
+	for_each_sg(sgl, sg, nents, i) {
+		sg_dma_address(sg) = sg_phys(sg);
+		sg_dma_len(sg) = sg->length;
+		__dma_page_cpu_to_dev(sg_dma_address(sg), sg_dma_len(sg), dir);
+	}
+
+	return nents;
+}
+
+static void arm_nommu_dma_unmap_sg(struct device *dev, struct scatterlist *sgl, int nents,
+		enum dma_data_direction dir, unsigned long attrs)
+{
+	struct scatterlist *sg;
+	int i;
+
+	for_each_sg(sgl, sg, nents, i)
+		__dma_page_dev_to_cpu(sg_dma_address(sg), sg_dma_len(sg), dir);
+}
+
+static void arm_nommu_dma_sync_single_for_device(struct device *dev,
+		dma_addr_t handle, size_t size, enum dma_data_direction dir)
+{
+	__dma_page_cpu_to_dev(handle, size, dir);
+}
+
+static void arm_nommu_dma_sync_single_for_cpu(struct device *dev,
+		dma_addr_t handle, size_t size, enum dma_data_direction dir)
+{
+	__dma_page_cpu_to_dev(handle, size, dir);
+}
+
+static void arm_nommu_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sgl,
+				      int nents, enum dma_data_direction dir)
+{
+	struct scatterlist *sg;
+	int i;
+
+	for_each_sg(sgl, sg, nents, i)
+		__dma_page_cpu_to_dev(sg_dma_address(sg), sg_dma_len(sg), dir);
+}
+
+static void arm_nommu_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl,
+				   int nents, enum dma_data_direction dir)
+{
+	struct scatterlist *sg;
+	int i;
+
+	for_each_sg(sgl, sg, nents, i)
+		__dma_page_dev_to_cpu(sg_dma_address(sg), sg_dma_len(sg), dir);
+}
+
+struct dma_map_ops arm_nommu_dma_ops = {
+	.alloc			= arm_nommu_dma_alloc,
+	.free			= arm_nommu_dma_free,
+	.map_page		= arm_nommu_dma_map_page,
+	.unmap_page		= arm_nommu_dma_unmap_page,
+	.map_sg			= arm_nommu_dma_map_sg,
+	.unmap_sg		= arm_nommu_dma_unmap_sg,
+	.sync_single_for_device	= arm_nommu_dma_sync_single_for_device,
+	.sync_single_for_cpu	= arm_nommu_dma_sync_single_for_cpu,
+	.sync_sg_for_device	= arm_nommu_dma_sync_sg_for_device,
+	.sync_sg_for_cpu	= arm_nommu_dma_sync_sg_for_cpu,
+};
+EXPORT_SYMBOL(arm_nommu_dma_ops);
+
+static struct dma_map_ops *arm_nommu_get_dma_map_ops(bool coherent)
+{
+	return coherent ? &dma_noop_ops : &arm_nommu_dma_ops;
+}
+
+void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
+			const struct iommu_ops *iommu, bool coherent)
+{
+	struct dma_map_ops *dma_ops;
+
+	/*
+	 * Cahe support for v7m is optional, so can be treated as
+	 * coherent if no cache has been detected.
+	 */
+	dev->archdata.dma_coherent = (cacheid) ? coherent : true;
+
+	dma_ops = arm_nommu_get_dma_map_ops(dev->archdata.dma_coherent);
+
+	set_dma_ops(dev, dma_ops);
+}
+
+void arch_teardown_dma_ops(struct device *dev)
+{
+}
+
+int dma_supported(struct device *dev, u64 mask)
+{
+	if (cacheid && !dma_pool)
+		return 0;
+
+	return 1;
+}
+
+EXPORT_SYMBOL(dma_supported);
+
+#define PREALLOC_DMA_DEBUG_ENTRIES	4096
+
+static int __init dma_debug_do_init(void)
+{
+	dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
+	return 0;
+}
+core_initcall(dma_debug_do_init);
+
+/*
+ * Initialise the coherent pool for DMA allocations.
+ */
+static int  __init dma_pool_init(void)
+{
+	int ret;
+
+	if (cacheid && !dma_size) {
+		pr_warn("DMA: coherent memory region has not been given.\n");
+		return 0;
+	}
+
+	dma_pool = gen_pool_create(PAGE_SHIFT, -1);
+
+	if (!dma_pool)
+		goto out;
+
+	ret = gen_pool_add_virt(dma_pool, (unsigned long)dma_start, (unsigned long)dma_start,
+				dma_size, -1);
+	if (ret)
+		goto destroy_genpool;
+
+	gen_pool_set_algo(dma_pool, gen_pool_first_fit_order_align, NULL);
+
+	pr_info("DMA: coherent memory region 0x%lx - 0x%lx (%lu KiB)\n",
+		dma_start, dma_start + dma_size, dma_size >> 10);
+
+	return 0;
+
+destroy_genpool:
+	gen_pool_destroy(dma_pool);
+	dma_pool = NULL;
+out:
+	pr_err("DMA: failed to allocate coherent memory region\n");
+	return -ENOMEM;
+}
+
+postcore_initcall(dma_pool_init);
+
+/* "memdma=<size>@<address>" parsing. */
+static int __init early_memdma(char *p)
+{
+	if (!p)
+		return -EINVAL;
+
+	dma_size = memparse(p, &p);
+	if (*p == '@')
+		dma_start = memparse(p + 1, &p);
+
+	return 0;
+}
+early_param("memdma", early_memdma);
diff --git a/arch/arm/mm/mm.h b/arch/arm/mm/mm.h
index ce727d4..18eb869 100644
--- a/arch/arm/mm/mm.h
+++ b/arch/arm/mm/mm.h
@@ -97,3 +97,6 @@ struct static_vm {
 void dma_contiguous_remap(void);
 
 unsigned long __clear_cr(unsigned long mask);
+
+extern unsigned long dma_start  __initdata;
+extern unsigned long dma_size  __initdata;
diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c
index 681cec8..5827e54 100644
--- a/arch/arm/mm/nommu.c
+++ b/arch/arm/mm/nommu.c
@@ -303,6 +303,12 @@ void __init sanity_check_meminfo(void)
 	end = memblock_end_of_DRAM();
 	high_memory = __va(end - 1) + 1;
 	memblock_set_current_limit(end);
+
+	if (dma_size &&
+	    memblock_overlaps_region(&memblock.memory, dma_start, dma_size)) {
+		pr_crit("DMA: coherent memory region overlaps with main memory.\n");
+		dma_size = 0;
+	}
 }
 
 /*
-- 
1.7.9.5

^ permalink raw reply related

* [RFC v2 PATCH 2/3] ARM: NOMMU: set ARM_DMA_MEM_BUFFERABLE for M-class cpus
From: Vladimir Murzin @ 2016-12-13 13:45 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1481636704-18948-1-git-send-email-vladimir.murzin@arm.com>

Now, we have dedicated non-cacheable region for consistent DMA
operations. However, that region can still be marked as bufferable by
MPU, so it'd be safer to have barriers by default.

Signed-off-by: Vladimir Murzin <vladimir.murzin@arm.com>
---
 arch/arm/mm/Kconfig |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index 6dffbe4..7f0000d 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -1023,7 +1023,7 @@ config ARM_L1_CACHE_SHIFT
 
 config ARM_DMA_MEM_BUFFERABLE
 	bool "Use non-cacheable memory for DMA" if (CPU_V6 || CPU_V6K) && !CPU_V7
-	default y if CPU_V6 || CPU_V6K || CPU_V7
+	default y if CPU_V6 || CPU_V6K || CPU_V7 || CPU_V7M
 	help
 	  Historically, the kernel has used strongly ordered mappings to
 	  provide DMA coherent memory.  With the advent of ARMv7, mapping
-- 
1.7.9.5

^ permalink raw reply related

* [RFC v2 PATCH 3/3] ARM: dma-mapping: remove traces of NOMMU code
From: Vladimir Murzin @ 2016-12-13 13:45 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1481636704-18948-1-git-send-email-vladimir.murzin@arm.com>

DMA operations for NOMMU case have been just factored out into
separate compilation unit, so don't keep dead code.

Signed-off-by: Vladimir Murzin <vladimir.murzin@arm.com>
---
 arch/arm/mm/dma-mapping.c |   26 ++------------------------
 1 file changed, 2 insertions(+), 24 deletions(-)

diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index ab77100..d8a755b 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -344,8 +344,6 @@ static void __dma_free_buffer(struct page *page, size_t size)
 	}
 }
 
-#ifdef CONFIG_MMU
-
 static void *__alloc_from_contiguous(struct device *dev, size_t size,
 				     pgprot_t prot, struct page **ret_page,
 				     const void *caller, bool want_vaddr,
@@ -646,22 +644,6 @@ static inline pgprot_t __get_dma_pgprot(unsigned long attrs, pgprot_t prot)
 	return prot;
 }
 
-#define nommu() 0
-
-#else	/* !CONFIG_MMU */
-
-#define nommu() 1
-
-#define __get_dma_pgprot(attrs, prot)				__pgprot(0)
-#define __alloc_remap_buffer(dev, size, gfp, prot, ret, c, wv)	NULL
-#define __alloc_from_pool(size, ret_page)			NULL
-#define __alloc_from_contiguous(dev, size, prot, ret, c, wv, coherent_flag)	NULL
-#define __free_from_pool(cpu_addr, size)			do { } while (0)
-#define __free_from_contiguous(dev, page, cpu_addr, size, wv)	do { } while (0)
-#define __dma_free_remap(cpu_addr, size)			do { } while (0)
-
-#endif	/* CONFIG_MMU */
-
 static void *__alloc_simple_buffer(struct device *dev, size_t size, gfp_t gfp,
 				   struct page **ret_page)
 {
@@ -803,7 +785,7 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
 
 	if (cma)
 		buf->allocator = &cma_allocator;
-	else if (nommu() || is_coherent)
+	else if (is_coherent)
 		buf->allocator = &simple_allocator;
 	else if (allowblock)
 		buf->allocator = &remap_allocator;
@@ -852,8 +834,7 @@ static int __arm_dma_mmap(struct device *dev, struct vm_area_struct *vma,
 		 void *cpu_addr, dma_addr_t dma_addr, size_t size,
 		 unsigned long attrs)
 {
-	int ret = -ENXIO;
-#ifdef CONFIG_MMU
+	int ret;
 	unsigned long nr_vma_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
 	unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
 	unsigned long pfn = dma_to_pfn(dev, dma_addr);
@@ -868,7 +849,6 @@ static int __arm_dma_mmap(struct device *dev, struct vm_area_struct *vma,
 				      vma->vm_end - vma->vm_start,
 				      vma->vm_page_prot);
 	}
-#endif	/* CONFIG_MMU */
 
 	return ret;
 }
@@ -887,9 +867,7 @@ int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma,
 		 void *cpu_addr, dma_addr_t dma_addr, size_t size,
 		 unsigned long attrs)
 {
-#ifdef CONFIG_MMU
 	vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot);
-#endif	/* CONFIG_MMU */
 	return __arm_dma_mmap(dev, vma, cpu_addr, dma_addr, size, attrs);
 }
 
-- 
1.7.9.5

^ permalink raw reply related

* [PATCH v6] arm64: fpsimd: improve stacking logic in non-interruptible context
From: Dave Martin @ 2016-12-13 13:49 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1481632529-24218-1-git-send-email-ard.biesheuvel@linaro.org>

On Tue, Dec 13, 2016 at 12:35:29PM +0000, Ard Biesheuvel wrote:
> Currently, we allow kernel mode NEON in softirq or hardirq context by
> stacking and unstacking a slice of the NEON register file for each call
> to kernel_neon_begin() and kernel_neon_end(), respectively.
> 
> Given that
> a) a CPU typically spends most of its time in userland, during which time
>    no kernel mode NEON in process context is in progress,
> b) a CPU spends most of its time in the kernel doing other things than
>    kernel mode NEON when it gets interrupted to perform kernel mode NEON
>    in softirq context
> 
> the stacking and subsequent unstacking is only necessary if we are
> interrupting a thread while it is performing kernel mode NEON in process
> context, which means that in all other cases, we can simply preserve the
> userland FPSIMD state once, and only restore it upon return to userland,
> even if we are being invoked from softirq or hardirq context.
> 
> So instead of checking whether we are running in interrupt context, keep
> track of the level of nested kernel mode NEON calls in progress, and only
> perform the eager stack/unstack if the level exceeds 1.

Hang on ... we could be in the middle of fpsimd_save_state() for other
reasons, say on the context switch path.  Wouldn't we need to take the
lock for those too?

Also, a spinlock can't protect a critical section from code running on
the same CPU... wouldn't it just deadlock in that case?

I still tend to prefer v4 -- there we do a redundant double-save only if
one kernel_neon_begin() interrupts the actual task context save.

Cheers
---Dave

> 
> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
> ---
> v6:
> - use a spinlock instead of disabling interrupts
> 
> v5:
> - perform the test-and-set and the fpsimd_save_state with interrupts disabled,
>   to prevent nested kernel_neon_begin()/_end() pairs to clobber the state
>   while it is being preserved
> 
> v4:
> - use this_cpu_inc/dec, which give sufficient guarantees regarding
>   concurrency, but do not imply SMP barriers, which are not needed here
> 
> v3:
> - avoid corruption by concurrent invocations of kernel_neon_begin()/_end()
> 
> v2:
> - BUG() on unexpected values of the nesting level
> - relax the BUG() on num_regs>32 to a WARN, given that nothing actually
>   breaks in that case
> 
>  arch/arm64/include/asm/fpsimd.h |  3 +
>  arch/arm64/kernel/fpsimd.c      | 77 ++++++++++++++------
>  2 files changed, 58 insertions(+), 22 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
> index 50f559f574fe..ee09fe1c37b6 100644
> --- a/arch/arm64/include/asm/fpsimd.h
> +++ b/arch/arm64/include/asm/fpsimd.h
> @@ -17,6 +17,7 @@
>  #define __ASM_FP_H
>  
>  #include <asm/ptrace.h>
> +#include <linux/spinlock.h>
>  
>  #ifndef __ASSEMBLY__
>  
> @@ -37,6 +38,8 @@ struct fpsimd_state {
>  			u32 fpcr;
>  		};
>  	};
> +	/* lock protecting the preserved state while it is being saved */
> +	spinlock_t lock;
>  	/* the id of the last cpu to have restored this state */
>  	unsigned int cpu;
>  };
> diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
> index 394c61db5566..886eea2d4084 100644
> --- a/arch/arm64/kernel/fpsimd.c
> +++ b/arch/arm64/kernel/fpsimd.c
> @@ -160,6 +160,7 @@ void fpsimd_flush_thread(void)
>  	memset(&current->thread.fpsimd_state, 0, sizeof(struct fpsimd_state));
>  	fpsimd_flush_task_state(current);
>  	set_thread_flag(TIF_FOREIGN_FPSTATE);
> +	spin_lock_init(&current->thread.fpsimd_state.lock);
>  }
>  
>  /*
> @@ -220,45 +221,77 @@ void fpsimd_flush_task_state(struct task_struct *t)
>  
>  #ifdef CONFIG_KERNEL_MODE_NEON
>  
> -static DEFINE_PER_CPU(struct fpsimd_partial_state, hardirq_fpsimdstate);
> -static DEFINE_PER_CPU(struct fpsimd_partial_state, softirq_fpsimdstate);
> +/*
> + * Although unlikely, it is possible for three kernel mode NEON contexts to
> + * be live at the same time: process context, softirq context and hardirq
> + * context. So while the userland context is stashed in the thread's fpsimd
> + * state structure, we need two additional levels of storage.
> + */
> +static DEFINE_PER_CPU(struct fpsimd_partial_state, nested_fpsimdstate[2]);
> +static DEFINE_PER_CPU(int, kernel_neon_nesting_level);
>  
>  /*
>   * Kernel-side NEON support functions
>   */
>  void kernel_neon_begin_partial(u32 num_regs)
>  {
> -	if (in_interrupt()) {
> -		struct fpsimd_partial_state *s = this_cpu_ptr(
> -			in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate);
> +	struct fpsimd_partial_state *s;
> +	int level;
>  
> -		BUG_ON(num_regs > 32);
> -		fpsimd_save_partial_state(s, roundup(num_regs, 2));
> -	} else {
> +	preempt_disable();
> +
> +	/*
> +	 * Save the userland FPSIMD state if we have one and if we
> +	 * haven't done so already. Clear fpsimd_last_state to indicate
> +	 * that there is no longer userland FPSIMD state in the
> +	 * registers.
> +	 */
> +	if (current->mm && !test_thread_flag(TIF_FOREIGN_FPSTATE)) {
>  		/*
> -		 * Save the userland FPSIMD state if we have one and if we
> -		 * haven't done so already. Clear fpsimd_last_state to indicate
> -		 * that there is no longer userland FPSIMD state in the
> +		 * Whoever test-and-sets the TIF_FOREIGN_FPSTATE flag should
> +		 * preserve the userland FP/SIMD state without interruption by
> +		 * nested kernel_neon_begin()/_end() calls.
> +		 * The reason is that the FP/SIMD register file is shared with
> +		 * SVE on hardware that supports it, and nested kernel mode NEON
> +		 * calls do not restore the upper part of the shared SVE/SIMD
>  		 * registers.
>  		 */
> -		preempt_disable();
> -		if (current->mm &&
> -		    !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE))
> -			fpsimd_save_state(&current->thread.fpsimd_state);
> -		this_cpu_write(fpsimd_last_state, NULL);
> +		if (spin_trylock(&current->thread.fpsimd_state.lock)) {
> +			if (!test_and_set_thread_flag(TIF_FOREIGN_FPSTATE))
> +				fpsimd_save_state(&current->thread.fpsimd_state);
> +			spin_unlock(&current->thread.fpsimd_state.lock);
> +		}
> +	}
> +	this_cpu_write(fpsimd_last_state, NULL);
> +
> +	level = this_cpu_inc_return(kernel_neon_nesting_level);
> +	BUG_ON(level > 3);
> +
> +	if (level > 1) {
> +		s = this_cpu_ptr(nested_fpsimdstate);
> +
> +		WARN_ON_ONCE(num_regs > 32);
> +		num_regs = min(roundup(num_regs, 2), 32U);
> +
> +		fpsimd_save_partial_state(&s[level - 2], num_regs);
>  	}
>  }
>  EXPORT_SYMBOL(kernel_neon_begin_partial);
>  
>  void kernel_neon_end(void)
>  {
> -	if (in_interrupt()) {
> -		struct fpsimd_partial_state *s = this_cpu_ptr(
> -			in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate);
> -		fpsimd_load_partial_state(s);
> -	} else {
> -		preempt_enable();
> +	struct fpsimd_partial_state *s;
> +	int level;
> +
> +	level = this_cpu_read(kernel_neon_nesting_level);
> +	BUG_ON(level < 1);
> +
> +	if (level > 1) {
> +		s = this_cpu_ptr(nested_fpsimdstate);
> +		fpsimd_load_partial_state(&s[level - 2]);
>  	}
> +	this_cpu_dec(kernel_neon_nesting_level);
> +	preempt_enable();
>  }
>  EXPORT_SYMBOL(kernel_neon_end);
>  
> -- 
> 2.7.4
> 
> 
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel at lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply

* [PATCHv5 00/11] CONFIG_DEBUG_VIRTUAL for arm64
From: Mark Rutland @ 2016-12-13 13:50 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1481068257-6367-1-git-send-email-labbott@redhat.com>

On Tue, Dec 06, 2016 at 03:50:46PM -0800, Laura Abbott wrote:
> Hi,
> 
> This is v5 of the series to add CONFIG_DEBUG_VIRTUAL for arm64. This mostly
> contains minor fixups including adding a few extra headers around and splitting
> things out into a few more sub-patches.
> 
> With a few more acks I think this should be ready to go. More testing is
> always appreciated though.

I've given the whole series a go with kasan, kexec, and hibernate (using
test_resume with the disk target), and everything looks happy. So FWIW,
for the series:

Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Tested-by: Mark Rutland <mark.rutland@arm.com>

Hopefully this can be queued soon for v4.11!

Thanks,
Mark.

^ permalink raw reply

* [RFC v2 00/13] usb/mmc/power: Fix USB/LAN when TFTP booting
From: Krzysztof Kozlowski @ 2016-12-13 13:53 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <b34f49c8-6b37-cabf-2a0f-c8d465c53159@xs4all.nl>

On Tue, Dec 13, 2016 at 2:34 PM, Hans Verkuil <hverkuil@xs4all.nl> wrote:
> Try again, this time with Krzysztof's new email address...
>
>
> On 13/12/16 13:20, Hans Verkuil wrote:
>>
>> Hi Krzysztof,
>>
>> This still seems to be broken with the latest 4.9 kernel, right?
>>
>> Has there been any progress on this? Do you have an updated patch series
>> for me to use?

Hi,

I think it is not fixed. Still. I left this work to others. AFAIK,
Peter Chen is working on a new generic approach:
https://lwn.net/Articles/703556/
On top of his patchset, Odroid would need some DTS code as well (and
maybe something in usb3503). However I do not plan to work on this
anymore as I do not have Odroid U3 anymore. Marek and Bartlomiej from
Samsung Poland are in CC-list so maybe they would like to continue the
work?

Best regards,
Krzysztof

^ permalink raw reply

* [PATCH V7 4/8] common: DMA-mapping: add DMA_ATTR_PRIVILEGED attribute
From: Robin Murphy @ 2016-12-13 13:55 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1481567927-14791-5-git-send-email-sricharan@codeaurora.org>

On 12/12/16 18:38, Sricharan R wrote:
> From: Mitchel Humpherys <mitchelh@codeaurora.org>
> 
> This patch adds the DMA_ATTR_PRIVILEGED attribute to the DMA-mapping
> subsystem.
> 
> Some advanced peripherals such as remote processors and GPUs perform
> accesses to DMA buffers in both privileged "supervisor" and unprivileged
> "user" modes.  This attribute is used to indicate to the DMA-mapping
> subsystem that the buffer is fully accessible at the elevated privilege
> level (and ideally inaccessible or at least read-only at the
> lesser-privileged levels).
> 
> Cc: linux-doc at vger.kernel.org
> Reviewed-by: Robin Murphy <robin.murphy@arm.com>
> Tested-by: Robin Murphy <robin.murphy@arm.com>
> Acked-by: Will Deacon <will.deacon@arm.com>
> Signed-off-by: Mitchel Humpherys <mitchelh@codeaurora.org>
> ---
>  Documentation/DMA-attributes.txt | 10 ++++++++++
>  include/linux/dma-mapping.h      |  7 +++++++
>  2 files changed, 17 insertions(+)
> 
> diff --git a/Documentation/DMA-attributes.txt b/Documentation/DMA-attributes.txt
> index 98bf7ac..44c6bc4 100644
> --- a/Documentation/DMA-attributes.txt
> +++ b/Documentation/DMA-attributes.txt
> @@ -143,3 +143,13 @@ So, this provides a way for drivers to avoid those error messages on calls
>  where allocation failures are not a problem, and shouldn't bother the logs.
>  
>  NOTE: At the moment DMA_ATTR_NO_WARN is only implemented on PowerPC.
> +
> +DMA_ATTR_PRIVILEGED
> +------------------------------
> +
> +Some advanced peripherals such as remote processors and GPUs perform
> +accesses to DMA buffers in both privileged "supervisor" and unprivileged
> +"user" modes.  This attribute is used to indicate to the DMA-mapping
> +subsystem that the buffer is fully accessible at the elevated privilege
> +level (and ideally inaccessible or at least read-only at the
> +lesser-privileged levels).
> diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
> index 6f3e6ca..ee31ea1 100644
> --- a/include/linux/dma-mapping.h
> +++ b/include/linux/dma-mapping.h
> @@ -63,6 +63,13 @@
>  #define DMA_ATTR_NO_WARN	(1UL << 8)
>  
>  /*
> + * DMA_ATTR_PRIVILEGED: used to indicate that the buffer is fully
> + * accessible at an elevated privilege level (and ideally inaccessible or
> + * at least read-only at lesser-privileged levels).
> + */
> +#define DMA_ATTR_PRIVILEGED		(1UL << 8)

Oops, I spoke slightly too soon - there's a value conflict here which
has been missed in the rebase.

Robin

> +
> +/*
>   * A dma_addr_t can hold any valid DMA or bus address for the platform.
>   * It can be given to a device to use as a DMA source or target.  A CPU cannot
>   * reference a dma_addr_t directly because there may be translation between
> 

^ permalink raw reply

* [PATCH V7 5/8] arm64/dma-mapping: Implement DMA_ATTR_PRIVILEGED
From: Robin Murphy @ 2016-12-13 14:02 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1481567927-14791-6-git-send-email-sricharan@codeaurora.org>

On 12/12/16 18:38, Sricharan R wrote:
> From: Mitchel Humpherys <mitchelh@codeaurora.org>
> 
> The newly added DMA_ATTR_PRIVILEGED is useful for creating mappings that
> are only accessible to privileged DMA engines.  Implement it in
> dma-iommu.c so that the ARM64 DMA IOMMU mapper can make use of it.
> 
> Reviewed-by: Robin Murphy <robin.murphy@arm.com>
> Tested-by: Robin Murphy <robin.murphy@arm.com>
> Acked-by: Will Deacon <will.deacon@arm.com>
> Signed-off-by: Mitchel Humpherys <mitchelh@codeaurora.org>
> ---
>  arch/arm64/mm/dma-mapping.c |  6 +++---
>  drivers/iommu/dma-iommu.c   | 10 ++++++++--
>  include/linux/dma-iommu.h   |  3 ++-
>  3 files changed, 13 insertions(+), 6 deletions(-)
> 
> diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
> index 401f79a..ae76ead 100644
> --- a/arch/arm64/mm/dma-mapping.c
> +++ b/arch/arm64/mm/dma-mapping.c
> @@ -557,7 +557,7 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size,
>  				 unsigned long attrs)
>  {
>  	bool coherent = is_device_dma_coherent(dev);
> -	int ioprot = dma_direction_to_prot(DMA_BIDIRECTIONAL, coherent);
> +	int ioprot = dma_info_to_prot(DMA_BIDIRECTIONAL, coherent, attrs);
>  	size_t iosize = size;
>  	void *addr;
>  
> @@ -711,7 +711,7 @@ static dma_addr_t __iommu_map_page(struct device *dev, struct page *page,
>  				   unsigned long attrs)
>  {
>  	bool coherent = is_device_dma_coherent(dev);
> -	int prot = dma_direction_to_prot(dir, coherent);
> +	int prot = dma_info_to_prot(dir, coherent, attrs);
>  	dma_addr_t dev_addr = iommu_dma_map_page(dev, page, offset, size, prot);
>  
>  	if (!iommu_dma_mapping_error(dev, dev_addr) &&
> @@ -769,7 +769,7 @@ static int __iommu_map_sg_attrs(struct device *dev, struct scatterlist *sgl,
>  		__iommu_sync_sg_for_device(dev, sgl, nelems, dir);
>  
>  	return iommu_dma_map_sg(dev, sgl, nelems,
> -			dma_direction_to_prot(dir, coherent));
> +				dma_info_to_prot(dir, coherent, attrs));
>  }
>  
>  static void __iommu_unmap_sg_attrs(struct device *dev,
> diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
> index d2a7a46..756d5e0 100644
> --- a/drivers/iommu/dma-iommu.c
> +++ b/drivers/iommu/dma-iommu.c
> @@ -182,16 +182,22 @@ int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
>  EXPORT_SYMBOL(iommu_dma_init_domain);
>  
>  /**
> - * dma_direction_to_prot - Translate DMA API directions to IOMMU API page flags
> + * dma_info_to_prot - Translate DMA API directions and attributes to IOMMU API
> + *                    page flags.
>   * @dir: Direction of DMA transfer
>   * @coherent: Is the DMA master cache-coherent?
> + * @attrs: DMA attributes for the mapping
>   *
>   * Return: corresponding IOMMU API page protection flags
>   */
> -int dma_direction_to_prot(enum dma_data_direction dir, bool coherent)
> +int dma_info_to_prot(enum dma_data_direction dir, bool coherent,
> +		     unsigned long attrs)
>  {
>  	int prot = coherent ? IOMMU_CACHE : 0;
>  
> +	if (attrs & DMA_ATTR_PRIVILEGED)
> +		prot |= IOMMU_PRIV;
> +
>  	switch (dir) {
>  	case DMA_BIDIRECTIONAL:
>  		return prot | IOMMU_READ | IOMMU_WRITE;

...and applying against -next now also needs this hunk:

@@ -639,7 +639,7 @@ dma_addr_t iommu_dma_map_resource(struct device
*dev, phys_addr_t phys,
 		size_t size, enum dma_data_direction dir, unsigned long attrs)
 {
 	return __iommu_dma_map(dev, phys, size,
-			dma_direction_to_prot(dir, false) | IOMMU_MMIO);
+			dma_info_to_prot(dir, false, attrs) | IOMMU_MMIO);
 }

 void iommu_dma_unmap_resource(struct device *dev, dma_addr_t handle,

With those two issues fixed up, I've given the series (applied to
next-20161213) a spin on a SMMUv3/PL330 fast model and it still checks out.

Robin.

> diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h
> index 32c5890..a203181 100644
> --- a/include/linux/dma-iommu.h
> +++ b/include/linux/dma-iommu.h
> @@ -34,7 +34,8 @@ int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
>  		u64 size, struct device *dev);
>  
>  /* General helpers for DMA-API <-> IOMMU-API interaction */
> -int dma_direction_to_prot(enum dma_data_direction dir, bool coherent);
> +int dma_info_to_prot(enum dma_data_direction dir, bool coherent,
> +		     unsigned long attrs);
>  
>  /*
>   * These implement the bulk of the relevant DMA mapping callbacks, but require
> 

^ permalink raw reply

* [RFC v2 PATCH 0/3] Fix dma_alloc_coherent() and friends for NOMMU
From: Russell King - ARM Linux @ 2016-12-13 14:07 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1481636704-18948-1-git-send-email-vladimir.murzin@arm.com>

On Tue, Dec 13, 2016 at 01:45:01PM +0000, Vladimir Murzin wrote:
> This patch set is trying to address the issue by providing region of
> memory suitable for consistent DMA operations. It is supposed that such
> region is marked by MPU as non-cacheable. Since we have MPU support in
> Linux for R-class only and M-class setting MPU in bootloader, proposed
> interface to advertise such memory is via "memdma=size at start" command
> line option, to avoid clashing with normal memory (which usually comes
> from dts) it'd be safer to use it together with "mem=" command line
> option. Meanwhile, I'm open to suggestions for the better way telling
> Linux of such memory.

For those nommu systems where the MPU is not used, how do they allocate
DMA memory without setting aside a chunk of memory?

>From what I understand of the current nommu code, it would just use
the normal page allocator for DMA memory allocation, so now requiring
everything to fit the "nommu has mpu" case seems like it's going to
break older nommu.

-- 
RMK's Patch system: http://www.armlinux.org.uk/developer/patches/
FTTC broadband for 0.8mile line: currently at 9.6Mbps down 400kbps up
according to speedtest.net.

^ permalink raw reply

* [PATCH v3] firmware: arm_scpi: fix reading sensor values on pre-1.0 SCPI firmwares
From: Sudeep Holla @ 2016-12-13 14:09 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20161211211432.13252-2-martin.blumenstingl@googlemail.com>



On 11/12/16 21:14, Martin Blumenstingl wrote:
> The pre-1.0 SCPI firmwares are using one __le32 as sensor value, while
> the 1.0 SCPI protocol uses two __le32 as sensor values (a total of
> 64bit, split into 32bit upper and 32bit lower value).
> Using an "struct sensor_value" to read the sensor value on a pre-1.0
> SCPI firmware gives garbage in the "hi_val" field. Introducing a
> separate function which handles scpi_ops.sensor_get_value for pre-1.0
> SCPI firmware implementations ensures that we do not read memory which
> was not written by the SCPI firmware (which fixes for example the
> temperature reported by scpi-hwmon).
> 
> Suggested-by: Sudeep Holla <Sudeep.Holla@arm.com>
> Signed-off-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>

Will send to ARM SoC once the SCPI changes land in mainline or post -rc1.

-- 
Regards,
Sudeep

^ permalink raw reply

* [PATCH v2] crypto: sun4i-ss: support the Security System PRNG
From: Corentin Labbe @ 2016-12-13 14:10 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20161208090618.GB22932@gondor.apana.org.au>

On Thu, Dec 08, 2016 at 05:06:18PM +0800, Herbert Xu wrote:
> On Wed, Dec 07, 2016 at 01:51:27PM +0100, Corentin Labbe wrote:
> > 
> > So I must expose it as a crypto_rng ?
> 
> If it is to be exposed at all then algif_rng would be the best
> place.
> 
> > Could you explain why PRNG must not be used as hw_random ?
> 
> The hwrng interface was always meant to be an interface for real
> hardware random number generators.  People rely on that so we
> should not provide bogus entropy sources through this interface.
> 
> Cheers,

I have found two solutions:

The simplier is to add an attribute isprng to hwrng like that:
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -150,7 +150,8 @@ static int hwrng_init(struct hwrng *rng)
        reinit_completion(&rng->cleanup_done);
 
 skip_init:
-       add_early_randomness(rng);
+       if (!rng->isprng)
+               add_early_randomness(rng);
 
        current_quality = rng->quality ? : default_quality;
        if (current_quality > 1024)
@@ -158,7 +159,7 @@ static int hwrng_init(struct hwrng *rng)
 
        if (current_quality == 0 && hwrng_fill)
                kthread_stop(hwrng_fill);
-       if (current_quality > 0 && !hwrng_fill)
+       if (current_quality > 0 && !hwrng_fill && !rng->isprng)
                start_khwrngd();
 
        return 0;
@@ -439,7 +440,7 @@ int hwrng_register(struct hwrng *rng)
        }
        list_add_tail(&rng->list, &rng_list);
 
-       if (old_rng && !rng->init) {
+       if (old_rng && !rng->init && !rng->isprng) {
                /*
                 * Use a new device's input to add some randomness to
                 * the system.  If this rng device isn't going to be
diff --git a/include/linux/hw_random.h b/include/linux/hw_random.h
index 34a0dc1..5a5b8dc 100644
--- a/include/linux/hw_random.h
+++ b/include/linux/hw_random.h
@@ -50,6 +50,7 @@ struct hwrng {
        struct list_head list;
        struct kref ref;
        struct completion cleanup_done;
+       bool isprng;
 };


With that, we still could register prng, but they dont provide any entropy.
An optional Kconfig/"module parameter" could still be added for people still wanting this old behavour.


The other solution is to "duplicate" /dev/hwrng to /dev/hwprng like that:
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -24,8 +24,11 @@
 #include <linux/uaccess.h>
 
 #define RNG_MODULE_NAME		"hw_random"
+#define PRNG_MODULE_NAME	"hw_prng"
+#define HWPRNG_MINOR	185 /* not official */
 
 static struct hwrng *current_rng;
+static struct hwrng *current_prng;
 static struct task_struct *hwrng_fill;
 static LIST_HEAD(rng_list);
 /* Protects rng_list and current_rng */
@@ -44,7 +47,7 @@ module_param(default_quality, ushort, 0644);
 MODULE_PARM_DESC(default_quality,
 		 "default entropy content of hwrng per mill");
 
-static void drop_current_rng(void);
+static void drop_current_rng(bool prng);
 static int hwrng_init(struct hwrng *rng);
 static void start_khwrngd(void);
 
@@ -56,6 +59,14 @@ static size_t rng_buffer_size(void)
 	return SMP_CACHE_BYTES < 32 ? 32 : SMP_CACHE_BYTES;
 }
 
+static bool is_current_xrng(struct hwrng *rng)
+{
+	if (rng->isprng)
+		return (rng == current_prng);
+	else
+		return (rng == current_rng);
+}
+
 static void add_early_randomness(struct hwrng *rng)
 {
 	int bytes_read;
@@ -88,32 +99,46 @@ static int set_current_rng(struct hwrng *rng)
 	if (err)
 		return err;
 
-	drop_current_rng();
-	current_rng = rng;
+	drop_current_rng(rng->isprng);
+	if (rng->isprng)
+		current_prng = rng;
+	else
+		current_rng = rng;
 
 	return 0;
 }
 
-static void drop_current_rng(void)
+static void drop_current_rng(bool prng)
 {
 	BUG_ON(!mutex_is_locked(&rng_mutex));
-	if (!current_rng)
-		return;
-
-	/* decrease last reference for triggering the cleanup */
-	kref_put(&current_rng->ref, cleanup_rng);
-	current_rng = NULL;
+	if (prng) {
+		if (!current_prng)
+			return;
+		/* decrease last reference for triggering the cleanup */
+		kref_put(&current_prng->ref, cleanup_rng);
+		current_prng = NULL;
+	} else {
+		if (!current_rng)
+			return;
+		/* decrease last reference for triggering the cleanup */
+		kref_put(&current_rng->ref, cleanup_rng);
+		current_rng = NULL;
+	}
 }
 
 /* Returns ERR_PTR(), NULL or refcounted hwrng */
-static struct hwrng *get_current_rng(void)
+static struct hwrng *get_current_rng(bool prng)
 {
 	struct hwrng *rng;
 
 	if (mutex_lock_interruptible(&rng_mutex))
 		return ERR_PTR(-ERESTARTSYS);
 
-	rng = current_rng;
+	if (prng)
+		rng = current_prng;
+	else
+		rng = current_rng;
+
 	if (rng)
 		kref_get(&rng->ref);
 
@@ -193,8 +218,8 @@ static inline int rng_get_data(struct hwrng *rng, u8 *buffer, size_t size,
 	return 0;
 }
 
-static ssize_t rng_dev_read(struct file *filp, char __user *buf,
-			    size_t size, loff_t *offp)
+static ssize_t genrng_dev_read(struct file *filp, char __user *buf,
+			    size_t size, loff_t *offp, bool prng)
 {
 	ssize_t ret = 0;
 	int err = 0;
@@ -202,7 +227,7 @@ static ssize_t rng_dev_read(struct file *filp, char __user *buf,
 	struct hwrng *rng;
 
 	while (size) {
-		rng = get_current_rng();
+		rng = get_current_rng(prng);
 		if (IS_ERR(rng)) {
 			err = PTR_ERR(rng);
 			goto out;
@@ -270,6 +295,18 @@ static ssize_t rng_dev_read(struct file *filp, char __user *buf,
 	goto out;
 }
 
+static ssize_t rng_dev_read(struct file *filp, char __user *buf,
+			    size_t size, loff_t *offp)
+{
+	return genrng_dev_read(filp, buf, size, offp, false);
+}
+
+static ssize_t prng_dev_read(struct file *filp, char __user *buf,
+			    size_t size, loff_t *offp)
+{
+	return genrng_dev_read(filp, buf, size, offp, true);
+}
+
 static const struct file_operations rng_chrdev_ops = {
 	.owner		= THIS_MODULE,
 	.open		= rng_dev_open,
@@ -278,6 +315,7 @@ static const struct file_operations rng_chrdev_ops = {
 };
 
 static const struct attribute_group *rng_dev_groups[];
+static const struct attribute_group *prng_dev_groups[];
 
 static struct miscdevice rng_miscdev = {
 	.minor		= HWRNG_MINOR,
@@ -287,9 +325,24 @@ static struct miscdevice rng_miscdev = {
 	.groups		= rng_dev_groups,
 };
 
-static ssize_t hwrng_attr_current_store(struct device *dev,
-					struct device_attribute *attr,
-					const char *buf, size_t len)
+static const struct file_operations prng_chrdev_ops = {
+	.owner		= THIS_MODULE,
+	.open		= rng_dev_open,
+	.read		= prng_dev_read,
+	.llseek		= noop_llseek,
+};
+
+static struct miscdevice prng_miscdev = {
+	.minor		= HWPRNG_MINOR,
+	.name		= PRNG_MODULE_NAME,
+	.nodename	= "hwprng",
+	.fops		= &prng_chrdev_ops,
+	.groups		= prng_dev_groups,
+};
+
+static ssize_t genrng_attr_current_store(struct device *dev,
+					 struct device_attribute *attr,
+					 const char *buf, size_t len, bool prng)
 {
 	int err;
 	struct hwrng *rng;
@@ -301,8 +354,13 @@ static ssize_t hwrng_attr_current_store(struct device *dev,
 	list_for_each_entry(rng, &rng_list, list) {
 		if (sysfs_streq(rng->name, buf)) {
 			err = 0;
-			if (rng != current_rng)
-				err = set_current_rng(rng);
+			if (prng) {
+				if (rng != current_prng)
+					err = set_current_rng(rng);
+			} else {
+				if (rng != current_rng)
+					err = set_current_rng(rng);
+			}
 			break;
 		}
 	}
@@ -311,14 +369,28 @@ static ssize_t hwrng_attr_current_store(struct device *dev,
 	return err ? : len;
 }
 
-static ssize_t hwrng_attr_current_show(struct device *dev,
-				       struct device_attribute *attr,
-				       char *buf)
+static ssize_t hwrng_attr_current_store(struct device *dev,
+					struct device_attribute *attr,
+					const char *buf, size_t len)
+{
+	return genrng_attr_current_store(dev, attr, buf, len, false);
+}
+
+static ssize_t hwprng_attr_current_store(struct device *dev,
+					 struct device_attribute *attr,
+					 const char *buf, size_t len)
+{
+	return genrng_attr_current_store(dev, attr, buf, len, true);
+}
+
+static ssize_t genrng_attr_current_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf, bool prng)
 {
 	ssize_t ret;
 	struct hwrng *rng;
 
-	rng = get_current_rng();
+	rng = get_current_rng(prng);
 	if (IS_ERR(rng))
 		return PTR_ERR(rng);
 
@@ -328,9 +400,24 @@ static ssize_t hwrng_attr_current_show(struct device *dev,
 	return ret;
 }
 
-static ssize_t hwrng_attr_available_show(struct device *dev,
+static ssize_t hwrng_attr_current_show(struct device *dev,
+				       struct device_attribute *attr,
+				       char *buf)
+{
+	return genrng_attr_current_show(dev, attr, buf, false);
+}
+
+static ssize_t hwprng_attr_current_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	return genrng_attr_current_show(dev, attr, buf, true);
+}
+
+
+static ssize_t hwgenrng_attr_available_show(struct device *dev,
 					 struct device_attribute *attr,
-					 char *buf)
+					 char *buf, bool prng)
 {
 	int err;
 	struct hwrng *rng;
@@ -340,8 +427,10 @@ static ssize_t hwrng_attr_available_show(struct device *dev,
 		return -ERESTARTSYS;
 	buf[0] = '\0';
 	list_for_each_entry(rng, &rng_list, list) {
-		strlcat(buf, rng->name, PAGE_SIZE);
-		strlcat(buf, " ", PAGE_SIZE);
+		if (rng->isprng == prng) {
+			strlcat(buf, rng->name, PAGE_SIZE);
+			strlcat(buf, " ", PAGE_SIZE);
+		}
 	}
 	strlcat(buf, "\n", PAGE_SIZE);
 	mutex_unlock(&rng_mutex);
@@ -349,6 +438,20 @@ static ssize_t hwrng_attr_available_show(struct device *dev,
 	return strlen(buf);
 }
 
+static ssize_t hwrng_attr_available_show(struct device *dev,
+					 struct device_attribute *attr,
+					 char *buf)
+{
+	return hwgenrng_attr_available_show(dev, attr, buf, false);
+}
+
+static ssize_t hwprng_attr_available_show(struct device *dev,
+					  struct device_attribute *attr,
+					  char *buf)
+{
+	return hwgenrng_attr_available_show(dev, attr, buf, true);
+}
+
 static DEVICE_ATTR(rng_current, S_IRUGO | S_IWUSR,
 		   hwrng_attr_current_show,
 		   hwrng_attr_current_store);
@@ -356,6 +459,13 @@ static DEVICE_ATTR(rng_available, S_IRUGO,
 		   hwrng_attr_available_show,
 		   NULL);
 
+static DEVICE_ATTR(prng_current, S_IRUGO | S_IWUSR,
+		   hwprng_attr_current_show,
+		   hwprng_attr_current_store);
+static DEVICE_ATTR(prng_available, S_IRUGO,
+		   hwprng_attr_available_show,
+		   NULL);
+
 static struct attribute *rng_dev_attrs[] = {
 	&dev_attr_rng_current.attr,
 	&dev_attr_rng_available.attr,
@@ -364,14 +474,35 @@ static struct attribute *rng_dev_attrs[] = {
 
 ATTRIBUTE_GROUPS(rng_dev);
 
+static struct attribute *prng_dev_attrs[] = {
+	&dev_attr_prng_current.attr,
+	&dev_attr_prng_available.attr,
+	NULL
+};
+
+ATTRIBUTE_GROUPS(prng_dev);
+
 static void __exit unregister_miscdev(void)
 {
 	misc_deregister(&rng_miscdev);
+	misc_deregister(&prng_miscdev);
 }
 
 static int __init register_miscdev(void)
 {
-	return misc_register(&rng_miscdev);
+	int err;
+
+	err = misc_register(&rng_miscdev);
+	if (err)
+		return err;
+	err = misc_register(&prng_miscdev);
+	if (err)
+		goto reg_error;
+	else
+		return 0;
+reg_error:
+	misc_deregister(&rng_miscdev);
+	return err;
 }
 
 static int hwrng_fillfn(void *unused)
@@ -381,7 +512,7 @@ static int hwrng_fillfn(void *unused)
 	while (!kthread_should_stop()) {
 		struct hwrng *rng;
 
-		rng = get_current_rng();
+		rng = get_current_rng(false);
 		if (IS_ERR(rng) || !rng)
 			break;
 		mutex_lock(&reading_mutex);
@@ -462,8 +593,8 @@ void hwrng_unregister(struct hwrng *rng)
 	mutex_lock(&rng_mutex);
 
 	list_del(&rng->list);
-	if (current_rng == rng) {
-		drop_current_rng();
+	if (is_current_xrng(rng)) {
+		drop_current_rng(rng->isprng);
 		if (!list_empty(&rng_list)) {
 			struct hwrng *tail;
 
@@ -553,7 +684,8 @@ static int __init hwrng_modinit(void)
 static void __exit hwrng_modexit(void)
 {
 	mutex_lock(&rng_mutex);
-	BUG_ON(current_rng);
+	WARN_ON(current_rng);
+	WARN_ON(current_prng);
 	kfree(rng_buffer);
 	kfree(rng_fillbuf);
 	mutex_unlock(&rng_mutex);
diff --git a/include/linux/hw_random.h b/include/linux/hw_random.h
index 34a0dc1..5a5b8dc 100644
--- a/include/linux/hw_random.h
+++ b/include/linux/hw_random.h
@@ -50,6 +50,7 @@ struct hwrng {
 	struct list_head list;
 	struct kref ref;
 	struct completion cleanup_done;
+	bool isprng;
 };

What do you think about those two solutions ?

Regards
Corentin Labbe

^ permalink raw reply related

* [RFC v2 PATCH 0/3] Fix dma_alloc_coherent() and friends for NOMMU
From: Vladimir Murzin @ 2016-12-13 14:14 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20161213140711.GY14217@n2100.armlinux.org.uk>

On 13/12/16 14:07, Russell King - ARM Linux wrote:
> On Tue, Dec 13, 2016 at 01:45:01PM +0000, Vladimir Murzin wrote:
>> This patch set is trying to address the issue by providing region of
>> memory suitable for consistent DMA operations. It is supposed that such
>> region is marked by MPU as non-cacheable. Since we have MPU support in
>> Linux for R-class only and M-class setting MPU in bootloader, proposed
>> interface to advertise such memory is via "memdma=size at start" command
>> line option, to avoid clashing with normal memory (which usually comes
>> from dts) it'd be safer to use it together with "mem=" command line
>> option. Meanwhile, I'm open to suggestions for the better way telling
>> Linux of such memory.
> 
> For those nommu systems where the MPU is not used, how do they allocate
> DMA memory without setting aside a chunk of memory?
> 
>>From what I understand of the current nommu code, it would just use
> the normal page allocator for DMA memory allocation, so now requiring
> everything to fit the "nommu has mpu" case seems like it's going to
> break older nommu.
> 

Probably, it'd be better if we just fallback to dma-noop operations if there
is no dma region, i.e. assume that platform is coherent. We still need a way
to tell user that absence of such region can be reason of broken DMA.

Cheers
Vladimir

^ permalink raw reply

* [PATCH v6] arm64: fpsimd: improve stacking logic in non-interruptible context
From: Ard Biesheuvel @ 2016-12-13 14:17 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20161213134908.GG1574@e103592.cambridge.arm.com>

On 13 December 2016 at 13:49, Dave Martin <Dave.Martin@arm.com> wrote:
> On Tue, Dec 13, 2016 at 12:35:29PM +0000, Ard Biesheuvel wrote:
>> Currently, we allow kernel mode NEON in softirq or hardirq context by
>> stacking and unstacking a slice of the NEON register file for each call
>> to kernel_neon_begin() and kernel_neon_end(), respectively.
>>
>> Given that
>> a) a CPU typically spends most of its time in userland, during which time
>>    no kernel mode NEON in process context is in progress,
>> b) a CPU spends most of its time in the kernel doing other things than
>>    kernel mode NEON when it gets interrupted to perform kernel mode NEON
>>    in softirq context
>>
>> the stacking and subsequent unstacking is only necessary if we are
>> interrupting a thread while it is performing kernel mode NEON in process
>> context, which means that in all other cases, we can simply preserve the
>> userland FPSIMD state once, and only restore it upon return to userland,
>> even if we are being invoked from softirq or hardirq context.
>>
>> So instead of checking whether we are running in interrupt context, keep
>> track of the level of nested kernel mode NEON calls in progress, and only
>> perform the eager stack/unstack if the level exceeds 1.
>
> Hang on ... we could be in the middle of fpsimd_save_state() for other
> reasons, say on the context switch path.  Wouldn't we need to take the
> lock for those too?
>

Yes. Saving the state should never be interrupted, and now that I
think of it, this is an issue for your original patch as well: even if
you always preserve the state first in kernel_neon_begin(), any
occurrence of fpsimd_save_state() could be interrupted by a
kernel_neon_begin()/_end() pair, after which the SVE state is nuked
anyway (regardless of whether a patch like this one is applied) So I
think we do need the lock in all cases where the FP/SIMD is copied to
memory and the flag set.

> Also, a spinlock can't protect a critical section from code running on
> the same CPU... wouldn't it just deadlock in that case?
>

That is why I use spin_trylock(). But I realise now that the patch is
incorrect: the nesting level needs to be incremented first, or the
interrupter will still clobber the context while it is being
preserved.

> I still tend to prefer v4 -- there we do a redundant double-save only if
> one kernel_neon_begin() interrupts the actual task context save.
>

I think we're not at the bottom of this rabbit hole yet:-)

^ permalink raw reply

* [PATCH v4 0/9]  STM32F4 missing clocks
From: gabriel.fernandez at st.com @ 2016-12-13 14:20 UTC (permalink / raw)
  To: linux-arm-kernel

From: Gabriel Fernandez <gabriel.fernandez@st.com>

v4:
 - Fix post divider for lcd (replace CLK_DIVIDER_POWER_OF_TWO flage by a divider table)
 - Allow posibility to change parent of SAI clocks.
 - Use common definition file for STM32Fx clocks.

v3:
 - restructure the patch series to have only one patch for all bindings changes.
   (clk: stm32f4: Update DT bindings documentation)

v2:
 - Put post divider in config structure
 - Rework patch-set
    - add update dt binding documentation
    - add clock definition file
 - Use composite for pll vco clocks
 - For auxiliary clock, allow the possiblity to enable peripheral
   clocks at same time (sugested by radek)
 - Add vco_in clock (entry frequency for all pll) to simplify the code and clarify clock tree
 - Fix missing end of divider tables

This patch-set adds:
 - I2S & SAI PLLs
 - SDIO & 48 Mhz clocks
 - LCD-TFT clock
 - I2S & SAI clocks

Gabriel Fernandez (9):
  clk: stm32f4: Update DT bindings documentation
  clk: stm32f4: Add PLL_I2S & PLL_SAI for STM32F429/469 boards
  clk: stm32f4: Add post divisor for I2S & SAI PLLs
  clk: stm32f4: Add lcd-tft clock
  clk: stm32f4: Add I2S clock
  clk: stm32f4: Add SAI clocks
  clk: stm32f4: SDIO & 48Mhz clock management for STM32F469 board
  ARM: dts: stm32f4: Add external I2S clock
  ARM: dts: stm32f4: Include auxiliary stm32fx clock definition

 .../devicetree/bindings/clock/st,stm32-rcc.txt     |  17 +
 arch/arm/boot/dts/stm32f429.dtsi                   |   9 +-
 drivers/clk/clk-stm32f4.c                          | 595 ++++++++++++++++++++-
 include/dt-bindings/clock/stm32fx-clock.h          |  39 ++
 4 files changed, 640 insertions(+), 20 deletions(-)
 create mode 100644 include/dt-bindings/clock/stm32fx-clock.h

-- 
1.9.1

^ permalink raw reply

* [PATCH v4 1/9] clk: stm32f4: Update DT bindings documentation
From: gabriel.fernandez at st.com @ 2016-12-13 14:20 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1481638820-29324-1-git-send-email-gabriel.fernandez@st.com>

From: Gabriel Fernandez <gabriel.fernandez@st.com>

Creation of dt include file for specific stm32f4 clocks.
These specific clocks are not derived from system clock (SYSCLOCK)
We should use index 1 to use these clocks in DT.
e.g. <&rcc 1 CLK_LSI>

Signed-off-by: Gabriel Fernandez <gabriel.fernandez@st.com>
Acked-by: Rob Herring <robh@kernel.org>
---
 .../devicetree/bindings/clock/st,stm32-rcc.txt     | 17 ++++++++++
 include/dt-bindings/clock/stm32fx-clock.h          | 39 ++++++++++++++++++++++
 2 files changed, 56 insertions(+)
 create mode 100644 include/dt-bindings/clock/stm32fx-clock.h

diff --git a/Documentation/devicetree/bindings/clock/st,stm32-rcc.txt b/Documentation/devicetree/bindings/clock/st,stm32-rcc.txt
index 0532d81..8f19d87 100644
--- a/Documentation/devicetree/bindings/clock/st,stm32-rcc.txt
+++ b/Documentation/devicetree/bindings/clock/st,stm32-rcc.txt
@@ -17,6 +17,9 @@ Required properties:
   property, containing a phandle to the clock device node, an index selecting
   between gated clocks and other clocks and an index specifying the clock to
   use.
+- clocks: External oscillator clock phandle
+  - high speed external clock signal (HSE)
+  - external I2S clock (I2S_CKIN)
 
 Example:
 
@@ -25,6 +28,7 @@ Example:
 		#clock-cells = <2>
 		compatible = "st,stm32f42xx-rcc", "st,stm32-rcc";
 		reg = <0x40023800 0x400>;
+		clocks = <&clk_hse>, <&clk_i2s_ckin>;
 	};
 
 Specifying gated clocks
@@ -66,6 +70,19 @@ The secondary index is bound with the following magic numbers:
 
 	0	SYSTICK
 	1	FCLK
+	2	CLK_LSI		(low-power clock source)
+	3	CLK_LSE		(generated from a 32.768 kHz low-speed external
+				 crystal or ceramic resonator)
+	4	CLK_HSE_RTC	(HSE division factor for RTC clock)
+	5	CLK_RTC		(real-time clock)
+	6	PLL_VCO_I2S	(vco frequency of I2S pll)
+	7	PLL_VCO_SAI	(vco frequency of SAI pll)
+	8	CLK_LCD		(LCD-TFT)
+	9	CLK_I2S		(I2S clocks)
+	10	CLK_SAI1	(audio clocks)
+	11	CLK_SAI2
+	12	CLK_I2SQ_PDIV	(post divisor of pll i2s q divisor)
+	13	CLK_SAIQ_PDIV	(post divisor of pll sai q divisor)
 
 Example:
 
diff --git a/include/dt-bindings/clock/stm32fx-clock.h b/include/dt-bindings/clock/stm32fx-clock.h
new file mode 100644
index 0000000..08bcab6
--- /dev/null
+++ b/include/dt-bindings/clock/stm32fx-clock.h
@@ -0,0 +1,39 @@
+/*
+ * stm32fx-clock.h
+ *
+ * Copyright (C) 2016 STMicroelectronics
+ * Author: Gabriel Fernandez for STMicroelectronics.
+ * License terms:  GNU General Public License (GPL), version 2
+ */
+
+/*
+ * List of clocks wich are not derived from system clock (SYSCLOCK)
+ *
+ * The index of these clocks is the secondary index of DT bindings
+ * (see Documentatoin/devicetree/bindings/clock/st,stm32-rcc.txt)
+ *
+ * e.g:
+	<assigned-clocks = <&rcc 1 CLK_LSE>;
+*/
+
+#ifndef _DT_BINDINGS_CLK_STMFX_H
+#define _DT_BINDINGS_CLK_STMFX_H
+
+#define SYSTICK			0
+#define FCLK			1
+#define CLK_LSI			2
+#define CLK_LSE			3
+#define CLK_HSE_RTC		4
+#define CLK_RTC			5
+#define PLL_VCO_I2S		6
+#define PLL_VCO_SAI		7
+#define CLK_LCD			8
+#define CLK_I2S			9
+#define CLK_SAI1		10
+#define CLK_SAI2		11
+#define CLK_I2SQ_PDIV		12
+#define CLK_SAIQ_PDIV		13
+
+#define END_PRIMARY_CLK		14
+
+#endif
-- 
1.9.1

^ permalink raw reply related

* [PATCH v4 2/9] clk: stm32f4: Add PLL_I2S & PLL_SAI for STM32F429/469 boards
From: gabriel.fernandez at st.com @ 2016-12-13 14:20 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1481638820-29324-1-git-send-email-gabriel.fernandez@st.com>

From: Gabriel Fernandez <gabriel.fernandez@st.com>

This patch introduces PLL_I2S and PLL_SAI.
Vco clock of these PLLs can be modify by DT (only n multiplicator,
m divider is still fixed by the boot-loader).
Each PLL has 3 dividers. PLL should be off when we modify the rate.

Signed-off-by: Gabriel Fernandez <gabriel.fernandez@st.com>
Acked-by: Rob Herring <robh@kernel.org>
---
 drivers/clk/clk-stm32f4.c | 351 +++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 334 insertions(+), 17 deletions(-)

diff --git a/drivers/clk/clk-stm32f4.c b/drivers/clk/clk-stm32f4.c
index 5eb05db..cab2014 100644
--- a/drivers/clk/clk-stm32f4.c
+++ b/drivers/clk/clk-stm32f4.c
@@ -28,6 +28,14 @@
 #include <linux/regmap.h>
 #include <linux/mfd/syscon.h>
 
+/*
+ * Include list of clocks wich are not derived from system clock (SYSCLOCK)
+ * The index of these clocks is the secondary index of DT bindings
+ *
+ */
+#include <dt-bindings/clock/stm32fx-clock.h>
+
+#define STM32F4_RCC_CR			0x00
 #define STM32F4_RCC_PLLCFGR		0x04
 #define STM32F4_RCC_CFGR		0x08
 #define STM32F4_RCC_AHB1ENR		0x30
@@ -37,6 +45,8 @@
 #define STM32F4_RCC_APB2ENR		0x44
 #define STM32F4_RCC_BDCR		0x70
 #define STM32F4_RCC_CSR			0x74
+#define STM32F4_RCC_PLLI2SCFGR		0x84
+#define STM32F4_RCC_PLLSAICFGR		0x88
 
 struct stm32f4_gate_data {
 	u8	offset;
@@ -208,8 +218,6 @@ struct stm32f4_gate_data {
 	{ STM32F4_RCC_APB2ENR, 26,	"ltdc",		"apb2_div" },
 };
 
-enum { SYSTICK, FCLK, CLK_LSI, CLK_LSE, CLK_HSE_RTC, CLK_RTC, END_PRIMARY_CLK };
-
 /*
  * This bitmask tells us which bit offsets (0..192) on STM32F4[23]xxx
  * have gate bits associated with them. Its combined hweight is 71.
@@ -324,23 +332,312 @@ static struct clk *clk_register_apb_mul(struct device *dev, const char *name,
 	return clk;
 }
 
-/*
- * Decode current PLL state and (statically) model the state we inherit from
- * the bootloader.
- */
-static void stm32f4_rcc_register_pll(const char *hse_clk, const char *hsi_clk)
+enum {
+	PLL,
+	PLL_I2S,
+	PLL_SAI,
+};
+
+static const struct clk_div_table pll_divp_table[] = {
+	{ 0, 2 }, { 1, 4 }, { 2, 6 }, { 3, 8 }, { 0 }
+};
+
+static const struct clk_div_table pll_divr_table[] = {
+	{ 2, 2 }, { 3, 3 }, { 4, 4 }, { 5, 5 }, { 6, 6 }, { 7, 7 }, { 0 }
+};
+
+struct stm32f4_pll {
+	spinlock_t *lock;
+	struct	clk_gate gate;
+	u8 offset;
+	u8 bit_rdy_idx;
+	u8 status;
+	u8 n_start;
+};
+
+#define to_stm32f4_pll(_gate) container_of(_gate, struct stm32f4_pll, gate)
+
+struct stm32f4_vco_data {
+	const char *vco_name;
+	u8 offset;
+	u8 bit_idx;
+	u8 bit_rdy_idx;
+};
+
+static const struct stm32f4_vco_data  vco_data[] = {
+	{ "vco",     STM32F4_RCC_PLLCFGR,    24, 25 },
+	{ "vco-i2s", STM32F4_RCC_PLLI2SCFGR, 26, 27 },
+	{ "vco-sai", STM32F4_RCC_PLLSAICFGR, 28, 29 },
+};
+
+struct stm32f4_div_data {
+	u8 shift;
+	u8 width;
+	u8 flag_div;
+	const struct clk_div_table *div_table;
+};
+
+#define MAX_PLL_DIV 3
+static const struct stm32f4_div_data  div_data[MAX_PLL_DIV] = {
+	{ 16, 2, 0,			pll_divp_table	},
+	{ 24, 4, CLK_DIVIDER_ONE_BASED, NULL		},
+	{ 28, 3, 0,			pll_divr_table	},
+};
+
+struct stm32f4_pll_data {
+	u8 pll_num;
+	u8 n_start;
+	const char *div_name[MAX_PLL_DIV];
+};
+
+static const struct stm32f4_pll_data stm32f429_pll[MAX_PLL_DIV] = {
+	{ PLL,	   192, { "pll", "pll48",    NULL	} },
+	{ PLL_I2S, 192, { NULL,  "plli2s-q", "plli2s-r" } },
+	{ PLL_SAI,  49, { NULL,  "pllsai-q", "pllsai-r" } },
+};
+
+static const struct stm32f4_pll_data stm32f469_pll[MAX_PLL_DIV] = {
+	{ PLL,	   50, { "pll",	     "pll-q",    NULL	    } },
+	{ PLL_I2S, 50, { "plli2s-p", "plli2s-q", "plli2s-r" } },
+	{ PLL_SAI, 50, { "pllsai-p", "pllsai-q", "pllsai-r" } },
+};
+
+static int stm32f4_pll_is_enabled(struct clk_hw *hw)
+{
+	return clk_gate_ops.is_enabled(hw);
+}
+
+static int stm32f4_pll_enable(struct clk_hw *hw)
+{
+	struct clk_gate *gate = to_clk_gate(hw);
+	struct stm32f4_pll *pll = to_stm32f4_pll(gate);
+	int ret = 0;
+	unsigned long reg;
+
+	ret = clk_gate_ops.enable(hw);
+
+	ret = readl_relaxed_poll_timeout_atomic(base + STM32F4_RCC_CR, reg,
+			reg & (1 << pll->bit_rdy_idx), 0, 10000);
+
+	return ret;
+}
+
+static void stm32f4_pll_disable(struct clk_hw *hw)
+{
+	clk_gate_ops.disable(hw);
+}
+
+static unsigned long stm32f4_pll_recalc(struct clk_hw *hw,
+		unsigned long parent_rate)
+{
+	struct clk_gate *gate = to_clk_gate(hw);
+	struct stm32f4_pll *pll = to_stm32f4_pll(gate);
+	unsigned long n;
+
+	n = (readl(base + pll->offset) >> 6) & 0x1ff;
+
+	return parent_rate * n;
+}
+
+static long stm32f4_pll_round_rate(struct clk_hw *hw, unsigned long rate,
+		unsigned long *prate)
+{
+	struct clk_gate *gate = to_clk_gate(hw);
+	struct stm32f4_pll *pll = to_stm32f4_pll(gate);
+	unsigned long n;
+
+	n = rate / *prate;
+
+	if (n < pll->n_start)
+		n = pll->n_start;
+	else if (n > 432)
+		n = 432;
+
+	return *prate * n;
+}
+
+static int stm32f4_pll_set_rate(struct clk_hw *hw, unsigned long rate,
+				unsigned long parent_rate)
+{
+	struct clk_gate *gate = to_clk_gate(hw);
+	struct stm32f4_pll *pll = to_stm32f4_pll(gate);
+
+	unsigned long n;
+	unsigned long val;
+	int pll_state;
+
+	pll_state = stm32f4_pll_is_enabled(hw);
+
+	if (pll_state)
+		stm32f4_pll_disable(hw);
+
+	n = rate  / parent_rate;
+
+	val = readl(base + pll->offset) & ~(0x1ff << 6);
+
+	writel(val | ((n & 0x1ff) <<  6), base + pll->offset);
+
+	if (pll_state)
+		stm32f4_pll_enable(hw);
+
+	return 0;
+}
+
+static const struct clk_ops stm32f4_pll_gate_ops = {
+	.enable		= stm32f4_pll_enable,
+	.disable	= stm32f4_pll_disable,
+	.is_enabled	= stm32f4_pll_is_enabled,
+	.recalc_rate	= stm32f4_pll_recalc,
+	.round_rate	= stm32f4_pll_round_rate,
+	.set_rate	= stm32f4_pll_set_rate,
+};
+
+struct stm32f4_pll_div {
+	struct clk_divider div;
+	struct clk_hw *hw_pll;
+};
+
+#define to_pll_div_clk(_div) container_of(_div, struct stm32f4_pll_div, div)
+
+static unsigned long stm32f4_pll_div_recalc_rate(struct clk_hw *hw,
+		unsigned long parent_rate)
+{
+	return clk_divider_ops.recalc_rate(hw, parent_rate);
+}
+
+static long stm32f4_pll_div_round_rate(struct clk_hw *hw, unsigned long rate,
+				unsigned long *prate)
+{
+	return clk_divider_ops.round_rate(hw, rate, prate);
+}
+
+static int stm32f4_pll_div_set_rate(struct clk_hw *hw, unsigned long rate,
+				unsigned long parent_rate)
+{
+	int pll_state, ret;
+
+	struct clk_divider *div = to_clk_divider(hw);
+	struct stm32f4_pll_div *pll_div = to_pll_div_clk(div);
+
+	pll_state = stm32f4_pll_is_enabled(pll_div->hw_pll);
+
+	if (pll_state)
+		stm32f4_pll_disable(pll_div->hw_pll);
+
+	ret = clk_divider_ops.set_rate(hw, rate, parent_rate);
+
+	if (pll_state)
+		stm32f4_pll_enable(pll_div->hw_pll);
+
+	return ret;
+}
+
+const struct clk_ops stm32f4_pll_div_ops = {
+	.recalc_rate = stm32f4_pll_div_recalc_rate,
+	.round_rate = stm32f4_pll_div_round_rate,
+	.set_rate = stm32f4_pll_div_set_rate,
+};
+
+static struct clk_hw *clk_register_pll_div(const char *name,
+		const char *parent_name, unsigned long flags,
+		void __iomem *reg, u8 shift, u8 width,
+		u8 clk_divider_flags, const struct clk_div_table *table,
+		struct clk_hw *pll_hw, spinlock_t *lock)
 {
-	unsigned long pllcfgr = readl(base + STM32F4_RCC_PLLCFGR);
+	struct stm32f4_pll_div *pll_div;
+	struct clk_hw *hw;
+	struct clk_init_data init;
+	int ret;
+
+	/* allocate the divider */
+	pll_div = kzalloc(sizeof(*pll_div), GFP_KERNEL);
+	if (!pll_div)
+		return ERR_PTR(-ENOMEM);
+
+	init.name = name;
+	init.ops = &stm32f4_pll_div_ops;
+	init.flags = flags;
+	init.parent_names = (parent_name ? &parent_name : NULL);
+	init.num_parents = (parent_name ? 1 : 0);
+
+	/* struct clk_divider assignments */
+	pll_div->div.reg = reg;
+	pll_div->div.shift = shift;
+	pll_div->div.width = width;
+	pll_div->div.flags = clk_divider_flags;
+	pll_div->div.lock = lock;
+	pll_div->div.table = table;
+	pll_div->div.hw.init = &init;
+
+	pll_div->hw_pll = pll_hw;
+
+	/* register the clock */
+	hw = &pll_div->div.hw;
+	ret = clk_hw_register(NULL, hw);
+	if (ret) {
+		kfree(pll_div);
+		hw = ERR_PTR(ret);
+	}
+
+	return hw;
+}
+
+static struct clk_hw *stm32f4_rcc_register_pll(const char *pllsrc,
+		const struct stm32f4_pll_data *data,  spinlock_t *lock)
+{
+	struct stm32f4_pll *pll;
+	struct clk_init_data init = { NULL };
+	void __iomem *reg;
+	struct clk_hw *pll_hw;
+	int ret;
+	int i;
+	const struct stm32f4_vco_data *vco;
+
+
+	pll = kzalloc(sizeof(*pll), GFP_KERNEL);
+	if (!pll)
+		return ERR_PTR(-ENOMEM);
+
+	vco = &vco_data[data->pll_num];
+
+	init.name = vco->vco_name;
+	init.ops = &stm32f4_pll_gate_ops;
+	init.flags = CLK_SET_RATE_GATE;
+	init.parent_names = &pllsrc;
+	init.num_parents = 1;
+
+	pll->gate.lock = lock;
+	pll->gate.reg = base + STM32F4_RCC_CR;
+	pll->gate.bit_idx = vco->bit_idx;
+	pll->gate.hw.init = &init;
+
+	pll->offset = vco->offset;
+	pll->n_start = data->n_start;
+	pll->bit_rdy_idx = vco->bit_rdy_idx;
+	pll->status = (readl(base + STM32F4_RCC_CR) >> vco->bit_idx) & 0x1;
 
-	unsigned long pllm   = pllcfgr & 0x3f;
-	unsigned long plln   = (pllcfgr >> 6) & 0x1ff;
-	unsigned long pllp   = BIT(((pllcfgr >> 16) & 3) + 1);
-	const char   *pllsrc = pllcfgr & BIT(22) ? hse_clk : hsi_clk;
-	unsigned long pllq   = (pllcfgr >> 24) & 0xf;
+	reg = base + pll->offset;
 
-	clk_register_fixed_factor(NULL, "vco", pllsrc, 0, plln, pllm);
-	clk_register_fixed_factor(NULL, "pll", "vco", 0, 1, pllp);
-	clk_register_fixed_factor(NULL, "pll48", "vco", 0, 1, pllq);
+	pll_hw = &pll->gate.hw;
+	ret = clk_hw_register(NULL, pll_hw);
+	if (ret) {
+		kfree(pll);
+		return ERR_PTR(ret);
+	}
+
+	for (i = 0; i < MAX_PLL_DIV; i++)
+		if (data->div_name[i])
+			clk_register_pll_div(data->div_name[i],
+					vco->vco_name,
+					0,
+					reg,
+					div_data[i].shift,
+					div_data[i].width,
+					div_data[i].flag_div,
+					div_data[i].div_table,
+					pll_hw,
+					lock);
+	return pll_hw;
 }
 
 /*
@@ -615,18 +912,21 @@ struct stm32f4_clk_data {
 	const struct stm32f4_gate_data *gates_data;
 	const u64 *gates_map;
 	int gates_num;
+	const struct stm32f4_pll_data *pll_data;
 };
 
 static const struct stm32f4_clk_data stm32f429_clk_data = {
 	.gates_data	= stm32f429_gates,
 	.gates_map	= stm32f42xx_gate_map,
 	.gates_num	= ARRAY_SIZE(stm32f429_gates),
+	.pll_data	= stm32f429_pll,
 };
 
 static const struct stm32f4_clk_data stm32f469_clk_data = {
 	.gates_data	= stm32f469_gates,
 	.gates_map	= stm32f46xx_gate_map,
 	.gates_num	= ARRAY_SIZE(stm32f469_gates),
+	.pll_data	= stm32f469_pll,
 };
 
 static const struct of_device_id stm32f4_of_match[] = {
@@ -647,6 +947,9 @@ static void __init stm32f4_rcc_init(struct device_node *np)
 	int n;
 	const struct of_device_id *match;
 	const struct stm32f4_clk_data *data;
+	unsigned long pllcfgr;
+	const char *pllsrc;
+	unsigned long pllm;
 
 	base = of_iomap(np, 0);
 	if (!base) {
@@ -677,7 +980,21 @@ static void __init stm32f4_rcc_init(struct device_node *np)
 
 	clk_register_fixed_rate_with_accuracy(NULL, "hsi", NULL, 0,
 			16000000, 160000);
-	stm32f4_rcc_register_pll(hse_clk, "hsi");
+	pllcfgr = readl(base + STM32F4_RCC_PLLCFGR);
+	pllsrc = pllcfgr & BIT(22) ? hse_clk : "hsi";
+	pllm = pllcfgr & 0x3f;
+
+	clk_hw_register_fixed_factor(NULL, "vco_in", pllsrc,
+					       0, 1, pllm);
+
+	stm32f4_rcc_register_pll("vco_in", &data->pll_data[0],
+			&stm32f4_clk_lock);
+
+	clks[PLL_VCO_I2S] = stm32f4_rcc_register_pll("vco_in",
+			&data->pll_data[1], &stm32f4_clk_lock);
+
+	clks[PLL_VCO_SAI] = stm32f4_rcc_register_pll("vco_in",
+			&data->pll_data[2], &stm32f4_clk_lock);
 
 	sys_parents[1] = hse_clk;
 	clk_register_mux_table(
-- 
1.9.1

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox