From mboxrd@z Thu Jan 1 00:00:00 1970 From: Adrian Hunter Subject: Re: [PATCH] MMC: Refine block layer waiting for card state Date: Tue, 28 Sep 2010 09:51:09 +0300 Message-ID: <4CA1905D.5010701@nokia.com> References: Mime-Version: 1.0 Content-Type: text/plain; charset=ISO-8859-1; format=flowed Content-Transfer-Encoding: 7bit Return-path: Received: from smtp.nokia.com ([192.100.122.233]:55183 "EHLO mgw-mx06.nokia.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1754734Ab0I1GvW (ORCPT ); Tue, 28 Sep 2010 02:51:22 -0400 In-Reply-To: Sender: linux-mmc-owner@vger.kernel.org List-Id: linux-mmc@vger.kernel.org To: Ethan Du Cc: linux-mmc On 27/09/10 06:32, Ethan Du wrote: > > The while loop when handling rw request may become deadloop in > case of bad card > I've seen mmcqd gets blocked forever after a single error message: > > mmcblk0: error -110 sending read/write command, response 0x900, card > status 0x80e00 > > Also there was case of card reports status without error > > mmcblk0: error -110 sending read/write command, response 0x900, card > status 0xe00 > > After this error, the card can stay in prg state, and never comes back, > and may not report any error further. So a break out condition > should be set in mmc block layer: > * should not enter the waiting loop in case of error How do you know there are not any errors where you do need to wait? > * should break out from the waiting loop, if card response with error > * should break out from the waiting loop when timeout > > These will not help with the card, one more thing to do: > * re-init the card in case of too many errors Using mmc_detect_change() will give you a new block device. That is probably a bad idea for a non-removable card. Also if the reinitialization is going to help, then why wait for lots of errors before doing it. > > Signed-off-by: Ethan Du > --- > drivers/mmc/card/block.c | 35 +++++++++++++++++++++++++++-------- > drivers/mmc/core/mmc.c | 9 +++++++-- > drivers/mmc/core/sd.c | 8 ++++++-- > include/linux/mmc/card.h | 3 +++ > include/linux/mmc/mmc.h | 1 + > 5 files changed, 44 insertions(+), 12 deletions(-) > > diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c > index d545f79..1d304a2 100644 > --- a/drivers/mmc/card/block.c > +++ b/drivers/mmc/card/block.c > @@ -316,12 +316,14 @@ out: > return err ? 0 : 1; > } > > +#define BUSY_TIMEOUT_MS (16 * 1024) > static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *req) > { > struct mmc_blk_data *md = mq->data; > struct mmc_card *card = md->queue.card; > struct mmc_blk_request brq; > int ret = 1, disable_multi = 0; > + unsigned long timeout = 0; > > mmc_claim_host(card->host); > > @@ -453,7 +455,9 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue > *mq, struct request *req) > brq.stop.resp[0], status); > } > > - if (!mmc_host_is_spi(card->host)&& rq_data_dir(req) != READ) { > + if (!mmc_host_is_spi(card->host)&& rq_data_dir(req) != READ&& > + !brq.cmd.error&& !brq.data.error&& !brq.stop.error) { > + timeout = jiffies + msecs_to_jiffies(BUSY_TIMEOUT_MS); > do { > int err; > > @@ -466,13 +470,22 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue > *mq, struct request *req) > req->rq_disk->disk_name, err); > goto cmd_err; > } > + if (cmd.resp[0]& R1_ERROR_MASK) { > + printk(KERN_ERR "%s: card err %#x\n", > + req->rq_disk->disk_name, > + cmd.resp[0]); > + goto cmd_err; > + } > /* > * Some cards mishandle the status bits, > * so make sure to check both the busy > * indication and the card state. > */ > - } while (!(cmd.resp[0]& R1_READY_FOR_DATA) || > - (R1_CURRENT_STATE(cmd.resp[0]) == 7)); > + if ((cmd.resp[0]& R1_READY_FOR_DATA)&& > + (R1_CURRENT_STATE(cmd.resp[0]) != 7)) > + break; > + } while (time_before(jiffies, timeout)); > + /* Ignore timeout out */ > > #if 0 > if (cmd.resp[0]& ~0x00000900) > @@ -510,11 +523,11 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue > *mq, struct request *req) > > return 1; > > - cmd_err: > - /* > - * If this is an SD card and we're writing, we can first > - * mark the known good sectors as ok. > - * > +cmd_err: > + /* > + * If this is an SD card and we're writing, we can first > + * mark the known good sectors as ok. > + * > * If the card is not SD, we can still ok written sectors > * as reported by the controller (which might be less than > * the real number of written sectors, but never more). > @@ -541,6 +554,12 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue > *mq, struct request *req) > ret = __blk_end_request(req, -EIO, blk_rq_cur_bytes(req)); > spin_unlock_irq(&md->lock); > > + card->err_count++; > + if (card->err_count>= ERR_TRIGGER_REINIT) { > + printk(KERN_WARNING "%s: re-init the card due to error\n", > + req->rq_disk->disk_name); > + mmc_detect_change(card->host, 0); > + } > return 0; > } > > diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c > index 6909a54..79c4759 100644 > --- a/drivers/mmc/core/mmc.c > +++ b/drivers/mmc/core/mmc.c > @@ -535,6 +535,8 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr, > > if (!oldcard) > host->card = card; > + else > + oldcard->err_count = 0; > > return 0; > > @@ -563,17 +565,20 @@ static void mmc_remove(struct mmc_host *host) > */ > static void mmc_detect(struct mmc_host *host) > { > - int err; > + int err = 0; > > BUG_ON(!host); > BUG_ON(!host->card); > > mmc_claim_host(host); > > + if (host->card->err_count>= ERR_TRIGGER_REINIT) > + err = mmc_init_card(host, host->ocr, host->card); > /* > * Just check if our card has been removed. > */ > - err = mmc_send_status(host->card, NULL); > + if (!err) > + err = mmc_send_status(host->card, NULL); > > mmc_release_host(host); > > diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c > index 0f52410..820b0d1 100644 > --- a/drivers/mmc/core/sd.c > +++ b/drivers/mmc/core/sd.c > @@ -635,6 +635,7 @@ static int mmc_sd_init_card(struct mmc_host *host, u32 ocr, > mmc_set_bus_width(host, MMC_BUS_WIDTH_4); > } > > + card->err_count = 0; > host->card = card; > return 0; > > @@ -662,17 +663,20 @@ static void mmc_sd_remove(struct mmc_host *host) > */ > static void mmc_sd_detect(struct mmc_host *host) > { > - int err; > + int err = 0; > > BUG_ON(!host); > BUG_ON(!host->card); > > mmc_claim_host(host); > > + if (host->card->err_count>= ERR_TRIGGER_REINIT) > + err = mmc_sd_init_card(host, host->ocr, host->card); > /* > * Just check if our card has been removed. > */ > - err = mmc_send_status(host->card, NULL); > + if (!err) > + err = mmc_send_status(host->card, NULL); > > mmc_release_host(host); > > diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h > index 6b75250..178de17 100644 > --- a/include/linux/mmc/card.h > +++ b/include/linux/mmc/card.h > @@ -143,6 +143,9 @@ struct mmc_card { > const char **info; /* info strings */ > struct sdio_func_tuple *tuples; /* unknown common tuples */ > > + unsigned int err_count; > +#define ERR_TRIGGER_REINIT 1024 > + > struct dentry *debugfs_root; > }; > > diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h > index dd11ae5..3b979a1 100644 > --- a/include/linux/mmc/mmc.h > +++ b/include/linux/mmc/mmc.h > @@ -122,6 +122,7 @@ > #define R1_UNDERRUN (1<< 18) /* ex, c */ > #define R1_OVERRUN (1<< 17) /* ex, c */ > #define R1_CID_CSD_OVERWRITE (1<< 16) /* erx, c, CID/CSD overwrite */ > +#define R1_ERROR_MASK 0xffff0000 > #define R1_WP_ERASE_SKIP (1<< 15) /* sx, c */ > #define R1_CARD_ECC_DISABLED (1<< 14) /* sx, a */ > #define R1_ERASE_RESET (1<< 13) /* sr, c */