[PATCH v2] mmc: mxcmmc: fix bug that may block a data transfer forever.

linux-mmc.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH v2] mmc: mxcmmc: fix bug that may block a data transfer forever.
@ 2012-09-07 10:43 Javier Martin
  2012-09-10  7:47 ` Sascha Hauer
                   ` (2 more replies)
  0 siblings, 3 replies; 10+ messages in thread
From: Javier Martin @ 2012-09-07 10:43 UTC (permalink / raw)
  To: linux-mmc
  Cc: viresh.linux, g.liakhovetski, vinod.koul, s.hauer, cjb,
	fabio.estevam, gcembed, Javier Martin

The problem can be easily reproduced using a script that loops
copying a file in an SD card to another place in the same SD card
and its related to read transfers. This only happens with DMA enabled.

This is related to the fact that, when reading, an MMC irq signals
the fact that all data from the SD card has been copied to the
internal buffers. However, it doesn't signal whether the DMA transfer
that is in charge of moving data from these internal buffers to RAM
has finished or not. Thus, calling dmaengine_terminate_all() in the
MMC irq routine can cancel an ongoing DMA transfer leaving some data
in the internal buffers that produces an accumulative effect which,
in the end, blocks a read data transfer forever.

The following patch watches DMA irq for reading and MMC irqs for
writing transfers. The 'dangerous' usage of dmaengine_terminate_all()
is removed and a timeout of 10 seconds is added so that the MMC won't
block forever anymore.

Signed-off-by: Javier Martin <javier.martin@vista-silicon.com>
---
Changes since v1:
 - Use DMA callbacks for reading and MMC irqs for writing instead
 of monitoring both irqs for each transfer.
 - Increase timeout to 10 seconds to give enough time to slow cards.

---
 drivers/mmc/host/mxcmmc.c |   76 +++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 70 insertions(+), 6 deletions(-)

diff --git a/drivers/mmc/host/mxcmmc.c b/drivers/mmc/host/mxcmmc.c
index 28ed52d..fc42a2e 100644
--- a/drivers/mmc/host/mxcmmc.c
+++ b/drivers/mmc/host/mxcmmc.c
@@ -44,6 +44,7 @@
 #include <mach/hardware.h>
 
 #define DRIVER_NAME "mxc-mmc"
+#define MXCMCI_TIMEOUT_MS 10000
 
 #define MMC_REG_STR_STP_CLK		0x00
 #define MMC_REG_STATUS			0x04
@@ -150,6 +151,8 @@ struct mxcmci_host {
 	int			dmareq;
 	struct dma_slave_config dma_slave_config;
 	struct imx_dma_data	dma_data;
+
+	struct timer_list	watchdog;
 };
 
 static void mxcmci_set_clk_rate(struct mxcmci_host *host, unsigned int clk_ios);
@@ -271,9 +274,32 @@ static int mxcmci_setup_data(struct mxcmci_host *host, struct mmc_data *data)
 	dmaengine_submit(host->desc);
 	dma_async_issue_pending(host->dma);
 
+	mod_timer(&host->watchdog, jiffies + msecs_to_jiffies(MXCMCI_TIMEOUT_MS));
+
 	return 0;
 }
 
+static void mxcmci_cmd_done(struct mxcmci_host *host, unsigned int stat);
+static void mxcmci_data_done(struct mxcmci_host *host, unsigned int stat);
+
+static void mxcmci_dma_callback(void *data)
+{
+	struct mxcmci_host *host = data;
+	u32 stat;
+
+	del_timer(&host->watchdog);
+
+	stat = readl(host->base + MMC_REG_STATUS);
+	writel(stat & ~STATUS_DATA_TRANS_DONE, host->base + MMC_REG_STATUS);
+
+	dev_dbg(mmc_dev(host->mmc), "%s: 0x%08x\n", __func__, stat);
+
+	if (stat & STATUS_READ_OP_DONE)
+		writel(STATUS_READ_OP_DONE, host->base + MMC_REG_STATUS);
+
+	mxcmci_data_done(host, stat);
+}
+
 static int mxcmci_start_cmd(struct mxcmci_host *host, struct mmc_command *cmd,
 		unsigned int cmdat)
 {
@@ -305,8 +331,14 @@ static int mxcmci_start_cmd(struct mxcmci_host *host, struct mmc_command *cmd,
 
 	int_cntr = INT_END_CMD_RES_EN;
 
-	if (mxcmci_use_dma(host))
-		int_cntr |= INT_READ_OP_EN | INT_WRITE_OP_DONE_EN;
+	if (mxcmci_use_dma(host)) {
+		if (host->dma_dir == DMA_FROM_DEVICE) {
+			host->desc->callback = mxcmci_dma_callback;
+			host->desc->callback_param = host;
+		} else {
+			int_cntr |= INT_WRITE_OP_DONE_EN;
+		}
+	}
 
 	spin_lock_irqsave(&host->lock, flags);
 	if (host->use_sdio)
@@ -345,11 +377,9 @@ static int mxcmci_finish_data(struct mxcmci_host *host, unsigned int stat)
 	struct mmc_data *data = host->data;
 	int data_error;
 
-	if (mxcmci_use_dma(host)) {
-		dmaengine_terminate_all(host->dma);
+	if (mxcmci_use_dma(host))
 		dma_unmap_sg(host->dma->device->dev, data->sg, data->sg_len,
 				host->dma_dir);
-	}
 
 	if (stat & STATUS_ERR_MASK) {
 		dev_dbg(mmc_dev(host->mmc), "request failed. status: 0x%08x\n",
@@ -624,8 +654,10 @@ static irqreturn_t mxcmci_irq(int irq, void *devid)
 		mxcmci_cmd_done(host, stat);
 
 	if (mxcmci_use_dma(host) &&
-		  (stat & (STATUS_DATA_TRANS_DONE | STATUS_WRITE_OP_DONE)))
+		  (stat & (STATUS_DATA_TRANS_DONE | STATUS_WRITE_OP_DONE))) {
+		del_timer(&host->watchdog);
 		mxcmci_data_done(host, stat);
+	}
 
 	if (host->default_irq_mask &&
 		  (stat & (STATUS_CARD_INSERTION | STATUS_CARD_REMOVAL)))
@@ -836,6 +868,34 @@ static bool filter(struct dma_chan *chan, void *param)
 	return true;
 }
 
+static void mxcmci_watchdog(unsigned long data)
+{
+	struct mmc_host *mmc = (struct mmc_host *)data;
+	struct mxcmci_host *host = mmc_priv(mmc);
+	struct mmc_request *req = host->req;
+	unsigned int stat = readl(host->base + MMC_REG_STATUS);
+
+	if (host->dma_dir == DMA_FROM_DEVICE) {
+		dmaengine_terminate_all(host->dma);
+		dev_err(mmc_dev(host->mmc),
+			"%s: read time out (status = 0x%08x)\n",
+			__func__, stat);
+	} else {
+		dev_err(mmc_dev(host->mmc),
+			"%s: write time out (status = 0x%08x)\n",
+			__func__, stat);
+		mxcmci_softreset(host);
+	}
+
+	/* Mark transfer as erroneus and inform the upper layers */
+
+	host->data->error = -ETIMEDOUT;
+	host->req = NULL;
+	host->cmd = NULL;
+	host->data = NULL;
+	mmc_request_done(host->mmc, req);
+}
+
 static const struct mmc_host_ops mxcmci_ops = {
 	.request		= mxcmci_request,
 	.set_ios		= mxcmci_set_ios,
@@ -968,6 +1028,10 @@ static int mxcmci_probe(struct platform_device *pdev)
 
 	mmc_add_host(mmc);
 
+	init_timer(&host->watchdog);
+	host->watchdog.function = &mxcmci_watchdog;
+	host->watchdog.data = (unsigned long)mmc;
+
 	return 0;
 
 out_free_irq:
-- 
1.7.9.5


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] mmc: mxcmmc: fix bug that may block a data transfer forever.
  2012-09-07 10:43 [PATCH v2] mmc: mxcmmc: fix bug that may block a data transfer forever Javier Martin
@ 2012-09-10  7:47 ` Sascha Hauer
  2012-09-19  5:52   ` Chris Ball
  2012-09-14  2:52 ` Vinod Koul
  2013-02-19 14:14 ` Anatolij Gustschin
  2 siblings, 1 reply; 10+ messages in thread
From: Sascha Hauer @ 2012-09-10  7:47 UTC (permalink / raw)
  To: Javier Martin
  Cc: linux-mmc, viresh.linux, g.liakhovetski, vinod.koul, cjb,
	fabio.estevam, gcembed

On Fri, Sep 07, 2012 at 12:43:37PM +0200, Javier Martin wrote:
> The problem can be easily reproduced using a script that loops
> copying a file in an SD card to another place in the same SD card
> and its related to read transfers. This only happens with DMA enabled.
> 
> This is related to the fact that, when reading, an MMC irq signals
> the fact that all data from the SD card has been copied to the
> internal buffers. However, it doesn't signal whether the DMA transfer
> that is in charge of moving data from these internal buffers to RAM
> has finished or not. Thus, calling dmaengine_terminate_all() in the
> MMC irq routine can cancel an ongoing DMA transfer leaving some data
> in the internal buffers that produces an accumulative effect which,
> in the end, blocks a read data transfer forever.
> 
> The following patch watches DMA irq for reading and MMC irqs for
> writing transfers. The 'dangerous' usage of dmaengine_terminate_all()
> is removed and a timeout of 10 seconds is added so that the MMC won't
> block forever anymore.
> 
> Signed-off-by: Javier Martin <javier.martin@vista-silicon.com>
> ---
> Changes since v1:
>  - Use DMA callbacks for reading and MMC irqs for writing instead
>  of monitoring both irqs for each transfer.
>  - Increase timeout to 10 seconds to give enough time to slow cards.
> 

Looks good now, thanks.

Reviewed-by: Sascha Hauer <s.hauer@pengutronix.de>


> ---
>  drivers/mmc/host/mxcmmc.c |   76 +++++++++++++++++++++++++++++++++++++++++----
>  1 file changed, 70 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/mmc/host/mxcmmc.c b/drivers/mmc/host/mxcmmc.c
> index 28ed52d..fc42a2e 100644
> --- a/drivers/mmc/host/mxcmmc.c
> +++ b/drivers/mmc/host/mxcmmc.c
> @@ -44,6 +44,7 @@
>  #include <mach/hardware.h>
>  
>  #define DRIVER_NAME "mxc-mmc"
> +#define MXCMCI_TIMEOUT_MS 10000
>  
>  #define MMC_REG_STR_STP_CLK		0x00
>  #define MMC_REG_STATUS			0x04
> @@ -150,6 +151,8 @@ struct mxcmci_host {
>  	int			dmareq;
>  	struct dma_slave_config dma_slave_config;
>  	struct imx_dma_data	dma_data;
> +
> +	struct timer_list	watchdog;
>  };
>  
>  static void mxcmci_set_clk_rate(struct mxcmci_host *host, unsigned int clk_ios);
> @@ -271,9 +274,32 @@ static int mxcmci_setup_data(struct mxcmci_host *host, struct mmc_data *data)
>  	dmaengine_submit(host->desc);
>  	dma_async_issue_pending(host->dma);
>  
> +	mod_timer(&host->watchdog, jiffies + msecs_to_jiffies(MXCMCI_TIMEOUT_MS));
> +
>  	return 0;
>  }
>  
> +static void mxcmci_cmd_done(struct mxcmci_host *host, unsigned int stat);
> +static void mxcmci_data_done(struct mxcmci_host *host, unsigned int stat);
> +
> +static void mxcmci_dma_callback(void *data)
> +{
> +	struct mxcmci_host *host = data;
> +	u32 stat;
> +
> +	del_timer(&host->watchdog);
> +
> +	stat = readl(host->base + MMC_REG_STATUS);
> +	writel(stat & ~STATUS_DATA_TRANS_DONE, host->base + MMC_REG_STATUS);
> +
> +	dev_dbg(mmc_dev(host->mmc), "%s: 0x%08x\n", __func__, stat);
> +
> +	if (stat & STATUS_READ_OP_DONE)
> +		writel(STATUS_READ_OP_DONE, host->base + MMC_REG_STATUS);
> +
> +	mxcmci_data_done(host, stat);
> +}
> +
>  static int mxcmci_start_cmd(struct mxcmci_host *host, struct mmc_command *cmd,
>  		unsigned int cmdat)
>  {
> @@ -305,8 +331,14 @@ static int mxcmci_start_cmd(struct mxcmci_host *host, struct mmc_command *cmd,
>  
>  	int_cntr = INT_END_CMD_RES_EN;
>  
> -	if (mxcmci_use_dma(host))
> -		int_cntr |= INT_READ_OP_EN | INT_WRITE_OP_DONE_EN;
> +	if (mxcmci_use_dma(host)) {
> +		if (host->dma_dir == DMA_FROM_DEVICE) {
> +			host->desc->callback = mxcmci_dma_callback;
> +			host->desc->callback_param = host;
> +		} else {
> +			int_cntr |= INT_WRITE_OP_DONE_EN;
> +		}
> +	}
>  
>  	spin_lock_irqsave(&host->lock, flags);
>  	if (host->use_sdio)
> @@ -345,11 +377,9 @@ static int mxcmci_finish_data(struct mxcmci_host *host, unsigned int stat)
>  	struct mmc_data *data = host->data;
>  	int data_error;
>  
> -	if (mxcmci_use_dma(host)) {
> -		dmaengine_terminate_all(host->dma);
> +	if (mxcmci_use_dma(host))
>  		dma_unmap_sg(host->dma->device->dev, data->sg, data->sg_len,
>  				host->dma_dir);
> -	}
>  
>  	if (stat & STATUS_ERR_MASK) {
>  		dev_dbg(mmc_dev(host->mmc), "request failed. status: 0x%08x\n",
> @@ -624,8 +654,10 @@ static irqreturn_t mxcmci_irq(int irq, void *devid)
>  		mxcmci_cmd_done(host, stat);
>  
>  	if (mxcmci_use_dma(host) &&
> -		  (stat & (STATUS_DATA_TRANS_DONE | STATUS_WRITE_OP_DONE)))
> +		  (stat & (STATUS_DATA_TRANS_DONE | STATUS_WRITE_OP_DONE))) {
> +		del_timer(&host->watchdog);
>  		mxcmci_data_done(host, stat);
> +	}
>  
>  	if (host->default_irq_mask &&
>  		  (stat & (STATUS_CARD_INSERTION | STATUS_CARD_REMOVAL)))
> @@ -836,6 +868,34 @@ static bool filter(struct dma_chan *chan, void *param)
>  	return true;
>  }
>  
> +static void mxcmci_watchdog(unsigned long data)
> +{
> +	struct mmc_host *mmc = (struct mmc_host *)data;
> +	struct mxcmci_host *host = mmc_priv(mmc);
> +	struct mmc_request *req = host->req;
> +	unsigned int stat = readl(host->base + MMC_REG_STATUS);
> +
> +	if (host->dma_dir == DMA_FROM_DEVICE) {
> +		dmaengine_terminate_all(host->dma);
> +		dev_err(mmc_dev(host->mmc),
> +			"%s: read time out (status = 0x%08x)\n",
> +			__func__, stat);
> +	} else {
> +		dev_err(mmc_dev(host->mmc),
> +			"%s: write time out (status = 0x%08x)\n",
> +			__func__, stat);
> +		mxcmci_softreset(host);
> +	}
> +
> +	/* Mark transfer as erroneus and inform the upper layers */
> +
> +	host->data->error = -ETIMEDOUT;
> +	host->req = NULL;
> +	host->cmd = NULL;
> +	host->data = NULL;
> +	mmc_request_done(host->mmc, req);
> +}
> +
>  static const struct mmc_host_ops mxcmci_ops = {
>  	.request		= mxcmci_request,
>  	.set_ios		= mxcmci_set_ios,
> @@ -968,6 +1028,10 @@ static int mxcmci_probe(struct platform_device *pdev)
>  
>  	mmc_add_host(mmc);
>  
> +	init_timer(&host->watchdog);
> +	host->watchdog.function = &mxcmci_watchdog;
> +	host->watchdog.data = (unsigned long)mmc;
> +
>  	return 0;
>  
>  out_free_irq:
> -- 
> 1.7.9.5
> 
> 

-- 
Pengutronix e.K.                           |                             |
Industrial Linux Solutions                 | http://www.pengutronix.de/  |
Peiner Str. 6-8, 31137 Hildesheim, Germany | Phone: +49-5121-206917-0    |
Amtsgericht Hildesheim, HRA 2686           | Fax:   +49-5121-206917-5555 |

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] mmc: mxcmmc: fix bug that may block a data transfer forever.
  2012-09-07 10:43 [PATCH v2] mmc: mxcmmc: fix bug that may block a data transfer forever Javier Martin
  2012-09-10  7:47 ` Sascha Hauer
@ 2012-09-14  2:52 ` Vinod Koul
  2012-09-14 12:50   ` javier Martin
  2013-02-19 14:14 ` Anatolij Gustschin
  2 siblings, 1 reply; 10+ messages in thread
From: Vinod Koul @ 2012-09-14  2:52 UTC (permalink / raw)
  To: Javier Martin
  Cc: linux-mmc, viresh.linux, g.liakhovetski, s.hauer, cjb,
	fabio.estevam, gcembed

On Fri, 2012-09-07 at 12:43 +0200, Javier Martin wrote:
> 
> The problem can be easily reproduced using a script that loops
> copying a file in an SD card to another place in the same SD card
> and its related to read transfers. This only happens with DMA enabled.
> 
> This is related to the fact that, when reading, an MMC irq signals
> the fact that all data from the SD card has been copied to the
> internal buffers. However, it doesn't signal whether the DMA transfer
> that is in charge of moving data from these internal buffers to RAM
> has finished or not. Thus, calling dmaengine_terminate_all() in the
> MMC irq routine can cancel an ongoing DMA transfer leaving some data
> in the internal buffers that produces an accumulative effect which,
> in the end, blocks a read data transfer forever.
> 
> The following patch watches DMA irq for reading and MMC irqs for
> writing transfers. The 'dangerous' usage of dmaengine_terminate_all()
> is removed and a timeout of 10 seconds is added so that the MMC won't
> block forever anymore. 
For a normal transactions why should you call dmaengine_terminate_all().
This should be called when you doing abort or cleanup in some erranous
situation.

why was it called in first place?
-- 
~Vinod


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] mmc: mxcmmc: fix bug that may block a data transfer forever.
  2012-09-14  2:52 ` Vinod Koul
@ 2012-09-14 12:50   ` javier Martin
  0 siblings, 0 replies; 10+ messages in thread
From: javier Martin @ 2012-09-14 12:50 UTC (permalink / raw)
  To: Vinod Koul
  Cc: linux-mmc, viresh.linux, g.liakhovetski, s.hauer, cjb,
	fabio.estevam, gcembed

Hi Vinod,

On 14 September 2012 04:52, Vinod Koul <vinod.koul@linux.intel.com> wrote:
> On Fri, 2012-09-07 at 12:43 +0200, Javier Martin wrote:
>>
>> The problem can be easily reproduced using a script that loops
>> copying a file in an SD card to another place in the same SD card
>> and its related to read transfers. This only happens with DMA enabled.
>>
>> This is related to the fact that, when reading, an MMC irq signals
>> the fact that all data from the SD card has been copied to the
>> internal buffers. However, it doesn't signal whether the DMA transfer
>> that is in charge of moving data from these internal buffers to RAM
>> has finished or not. Thus, calling dmaengine_terminate_all() in the
>> MMC irq routine can cancel an ongoing DMA transfer leaving some data
>> in the internal buffers that produces an accumulative effect which,
>> in the end, blocks a read data transfer forever.
>>
>> The following patch watches DMA irq for reading and MMC irqs for
>> writing transfers. The 'dangerous' usage of dmaengine_terminate_all()
>> is removed and a timeout of 10 seconds is added so that the MMC won't
>> block forever anymore.
> For a normal transactions why should you call dmaengine_terminate_all().

Agree. In fact, this patch fixes that.

> This should be called when you doing abort or cleanup in some erranous
> situation.

After this patch, dmaengine_terminate_all() will only be called if a
DMA transfer lasts more than 10 seconds, which is an explicit
error/abort that can happen sometimes (ie. when an SD card is worn
out).

> why was it called in first place?

I don't know, because I didn't write the original support but it's not
relevant because this patch removes the incorrect use of
dmaengine_terminate_all().

Since you took the time to review the patch, could you please give me
your ack too in case you find everything seems fine?

Regards.
-- 
Javier Martin
Vista Silicon S.L.
CDTUC - FASE C - Oficina S-345
Avda de los Castros s/n
39005- Santander. Cantabria. Spain
+34 942 25 32 60
www.vista-silicon.com

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] mmc: mxcmmc: fix bug that may block a data transfer forever.
  2012-09-10  7:47 ` Sascha Hauer
@ 2012-09-19  5:52   ` Chris Ball
  0 siblings, 0 replies; 10+ messages in thread
From: Chris Ball @ 2012-09-19  5:52 UTC (permalink / raw)
  To: Sascha Hauer
  Cc: Javier Martin, linux-mmc, viresh.linux, g.liakhovetski,
	vinod.koul, fabio.estevam, gcembed

Hi,

On Mon, Sep 10 2012, Sascha Hauer wrote:
> On Fri, Sep 07, 2012 at 12:43:37PM +0200, Javier Martin wrote:
>> The problem can be easily reproduced using a script that loops
>> copying a file in an SD card to another place in the same SD card
>> and its related to read transfers. This only happens with DMA enabled.
>> 
>> This is related to the fact that, when reading, an MMC irq signals
>> the fact that all data from the SD card has been copied to the
>> internal buffers. However, it doesn't signal whether the DMA transfer
>> that is in charge of moving data from these internal buffers to RAM
>> has finished or not. Thus, calling dmaengine_terminate_all() in the
>> MMC irq routine can cancel an ongoing DMA transfer leaving some data
>> in the internal buffers that produces an accumulative effect which,
>> in the end, blocks a read data transfer forever.
>> 
>> The following patch watches DMA irq for reading and MMC irqs for
>> writing transfers. The 'dangerous' usage of dmaengine_terminate_all()
>> is removed and a timeout of 10 seconds is added so that the MMC won't
>> block forever anymore.
>> 
>> Signed-off-by: Javier Martin <javier.martin@vista-silicon.com>
>> ---
>> Changes since v1:
>>  - Use DMA callbacks for reading and MMC irqs for writing instead
>>  of monitoring both irqs for each transfer.
>>  - Increase timeout to 10 seconds to give enough time to slow cards.
>> 
>
> Looks good now, thanks.
>
> Reviewed-by: Sascha Hauer <s.hauer@pengutronix.de>

Thanks, queued in mmc-next for 3.7.  Vinod, would be good to add your
Reviewed-by if you're happy with this patch now.

- Chris.
-- 
Chris Ball   <cjb@laptop.org>   <http://printf.net/>
One Laptop Per Child

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] mmc: mxcmmc: fix bug that may block a data transfer forever.
  2012-09-07 10:43 [PATCH v2] mmc: mxcmmc: fix bug that may block a data transfer forever Javier Martin
  2012-09-10  7:47 ` Sascha Hauer
  2012-09-14  2:52 ` Vinod Koul
@ 2013-02-19 14:14 ` Anatolij Gustschin
  2013-02-21 12:32   ` javier Martin
  2013-02-21 15:49   ` Fabio Estevam
  2 siblings, 2 replies; 10+ messages in thread
From: Anatolij Gustschin @ 2013-02-19 14:14 UTC (permalink / raw)
  To: Javier Martin
  Cc: linux-mmc, viresh.linux, g.liakhovetski, vinod.koul, s.hauer, cjb,
	fabio.estevam, gcembed

On Fri,  7 Sep 2012 12:43:37 +0200
Javier Martin <javier.martin@vista-silicon.com> wrote:
...
> +static void mxcmci_dma_callback(void *data)
> +{
> +	struct mxcmci_host *host = data;
> +	u32 stat;
> +
> +	del_timer(&host->watchdog);
> +
> +	stat = readl(host->base + MMC_REG_STATUS);
> +	writel(stat & ~STATUS_DATA_TRANS_DONE, host->base + MMC_REG_STATUS);
> +
> +	dev_dbg(mmc_dev(host->mmc), "%s: 0x%08x\n", __func__, stat);
> +
> +	if (stat & STATUS_READ_OP_DONE)
> +		writel(STATUS_READ_OP_DONE, host->base + MMC_REG_STATUS);
> +
> +	mxcmci_data_done(host, stat);

this change introduces a race condition for host->req (and maybe
for host->data) accesses. The callback is running in soft-irq context and can
be interrupted by the mxcmci_irq() interrupt which can finish the request and
set host->req to NULL. Then mxcmci_data_done() crashes with a null pointer
dereference. How extensively was it tested?

Anatolij

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] mmc: mxcmmc: fix bug that may block a data transfer forever.
  2013-02-19 14:14 ` Anatolij Gustschin
@ 2013-02-21 12:32   ` javier Martin
  2013-02-21 12:57     ` Anatolij Gustschin
  2013-02-21 15:49   ` Fabio Estevam
  1 sibling, 1 reply; 10+ messages in thread
From: javier Martin @ 2013-02-21 12:32 UTC (permalink / raw)
  To: Anatolij Gustschin
  Cc: linux-mmc, viresh.linux, g.liakhovetski, vinod.koul, s.hauer, cjb,
	fabio.estevam, gcembed

Hi,

On 19 February 2013 15:14, Anatolij Gustschin <agust@denx.de> wrote:
> On Fri,  7 Sep 2012 12:43:37 +0200
> Javier Martin <javier.martin@vista-silicon.com> wrote:
> ...
>> +static void mxcmci_dma_callback(void *data)
>> +{
>> +     struct mxcmci_host *host = data;
>> +     u32 stat;
>> +
>> +     del_timer(&host->watchdog);
>> +
>> +     stat = readl(host->base + MMC_REG_STATUS);
>> +     writel(stat & ~STATUS_DATA_TRANS_DONE, host->base + MMC_REG_STATUS);
>> +
>> +     dev_dbg(mmc_dev(host->mmc), "%s: 0x%08x\n", __func__, stat);
>> +
>> +     if (stat & STATUS_READ_OP_DONE)
>> +             writel(STATUS_READ_OP_DONE, host->base + MMC_REG_STATUS);
>> +
>> +     mxcmci_data_done(host, stat);
>
> this change introduces a race condition for host->req (and maybe
> for host->data) accesses. The callback is running in soft-irq context and can
> be interrupted by the mxcmci_irq() interrupt which can finish the request and
> set host->req to NULL. Then mxcmci_data_done() crashes with a null pointer
> dereference. How extensively was it tested?

It was tested executing a loop with bonnie++ for a week.
I didn't notice the race condition. Have you had real issues or just
found  it by analysing the code?

Regards.

-- 
Javier Martin
Vista Silicon S.L.
CDTUC - FASE C - Oficina S-345
Avda de los Castros s/n
39005- Santander. Cantabria. Spain
+34 942 25 32 60
www.vista-silicon.com

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] mmc: mxcmmc: fix bug that may block a data transfer forever.
  2013-02-21 12:32   ` javier Martin
@ 2013-02-21 12:57     ` Anatolij Gustschin
  0 siblings, 0 replies; 10+ messages in thread
From: Anatolij Gustschin @ 2013-02-21 12:57 UTC (permalink / raw)
  To: javier Martin
  Cc: linux-mmc, viresh.linux, g.liakhovetski, vinod.koul, s.hauer, cjb,
	fabio.estevam, gcembed

Hi,

On Thu, 21 Feb 2013 13:32:08 +0100
javier Martin <javier.martin@vista-silicon.com> wrote:
...
> It was tested executing a loop with bonnie++ for a week.
> I didn't notice the race condition. Have you had real issues or just
> found  it by analysing the code?

I've had real issues, but my setup is different. I'm using this mxcmmc
driver with extensions on mpc5121e (with different DMA controller/driver).
It is quite difficult to reproduce the issue on my test system. I've also
an i.mx31 based pcm037 board but the SD-Card slot is broken on it, so I
can not test the driver on i.mx31.

Thanks,

Anatolij

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] mmc: mxcmmc: fix bug that may block a data transfer forever.
  2013-02-19 14:14 ` Anatolij Gustschin
  2013-02-21 12:32   ` javier Martin
@ 2013-02-21 15:49   ` Fabio Estevam
  2013-03-14 19:58     ` Anatolij Gustschin
  1 sibling, 1 reply; 10+ messages in thread
From: Fabio Estevam @ 2013-02-21 15:49 UTC (permalink / raw)
  To: Anatolij Gustschin
  Cc: Javier Martin, linux-mmc, viresh.linux, g.liakhovetski,
	vinod.koul, s.hauer, cjb, gcembed, festevam

Hi Anatolij,

Anatolij Gustschin wrote:

> this change introduces a race condition for host->req (and maybe
> for host->data) accesses. The callback is running in soft-irq context and can
> be interrupted by the mxcmci_irq() interrupt which can finish the request and
> set host->req to NULL. Then mxcmci_data_done() crashes with a null pointer
> dereference. How extensively was it tested?

Does the patch below help?

--- a/drivers/mmc/host/mxcmmc.c
+++ b/drivers/mmc/host/mxcmmc.c
@@ -309,9 +309,11 @@ static void mxcmci_dma_callback(void *data)
 {
        struct mxcmci_host *host = data;
        u32 stat;
+       unsigned int long flags;

        del_timer(&host->watchdog);

+       spin_lock_irqsave(&host->lock, flags);
        stat = readl(host->base + MMC_REG_STATUS);
        writel(stat & ~STATUS_DATA_TRANS_DONE, host->base + MMC_REG_STATUS);

@@ -320,6 +322,7 @@ static void mxcmci_dma_callback(void *data)
        if (stat & STATUS_READ_OP_DONE)
                writel(STATUS_READ_OP_DONE, host->base + MMC_REG_STATUS);

+       spin_unlock_irqrestore(&host->lock, flags);
        mxcmci_data_done(host, stat);
 }



^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] mmc: mxcmmc: fix bug that may block a data transfer forever.
  2013-02-21 15:49   ` Fabio Estevam
@ 2013-03-14 19:58     ` Anatolij Gustschin
  0 siblings, 0 replies; 10+ messages in thread
From: Anatolij Gustschin @ 2013-03-14 19:58 UTC (permalink / raw)
  To: Fabio Estevam
  Cc: Javier Martin, linux-mmc, viresh.linux, g.liakhovetski,
	vinod.koul, s.hauer, cjb, gcembed, festevam

Hi Fabio,

On Thu, 21 Feb 2013 12:49:49 -0300
Fabio Estevam <fabio.estevam@freescale.com> wrote:

> Hi Anatolij,
> 
> Anatolij Gustschin wrote:
> 
> > this change introduces a race condition for host->req (and maybe
> > for host->data) accesses. The callback is running in soft-irq context and can
> > be interrupted by the mxcmci_irq() interrupt which can finish the request and
> > set host->req to NULL. Then mxcmci_data_done() crashes with a null pointer
> > dereference. How extensively was it tested?
> 
> Does the patch below help?

Sorry for long delay. No, it doesn't help. With this patch applied
the task copying data to the sd-card hangs forever. I used following
patch to fix observed crashes:

 http://thread.gmane.org/gmane.linux.kernel.mmc/19658/focus=19662

Thanks anyway!

Anatolij

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2013-03-14 19:58 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-09-07 10:43 [PATCH v2] mmc: mxcmmc: fix bug that may block a data transfer forever Javier Martin
2012-09-10  7:47 ` Sascha Hauer
2012-09-19  5:52   ` Chris Ball
2012-09-14  2:52 ` Vinod Koul
2012-09-14 12:50   ` javier Martin
2013-02-19 14:14 ` Anatolij Gustschin
2013-02-21 12:32   ` javier Martin
2013-02-21 12:57     ` Anatolij Gustschin
2013-02-21 15:49   ` Fabio Estevam
2013-03-14 19:58     ` Anatolij Gustschin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).