Re: [PATCH v19 3/3] scsi: ufs: Prepare HPB read for cached sub-region

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Can Guo <cang@codeaurora.org>
To: daejun7.park@samsung.com
Cc: Greg KH <gregkh@linuxfoundation.org>,
	avri.altman@wdc.com, jejb@linux.ibm.com,
	martin.petersen@oracle.com, asutoshd@codeaurora.org,
	stanley.chu@mediatek.com, huobean@gmail.com, bvanassche@acm.org,
	ALIM AKHTAR <alim.akhtar@samsung.com>,
	linux-scsi@vger.kernel.org, linux-kernel@vger.kernel.org,
	Sung-Jun Park <sungjun07.park@samsung.com>,
	yongmyung lee <ymhungry.lee@samsung.com>,
	Jinyoung CHOI <j-young.choi@samsung.com>,
	BoRam Shin <boram.shin@samsung.com>,
	SEUNGUK SHIN <seunguk.shin@samsung.com>
Subject: Re: [PATCH v19 3/3] scsi: ufs: Prepare HPB read for cached sub-region
Date: Fri, 05 Feb 2021 11:29:48 +0800	[thread overview]
Message-ID: <7f25ccb1d857131baa1c0424c4542e33@codeaurora.org> (raw)
In-Reply-To: <20210129053042epcms2p538e7fa396c3c2104594c44e48be53eb8@epcms2p5>

On 2021-01-29 13:30, Daejun Park wrote:
> This patch changes the read I/O to the HPB read I/O.
> 
> If the logical address of the read I/O belongs to active sub-region, 
> the
> HPB driver modifies the read I/O command to HPB read. It modifies the 
> UPIU
> command of UFS instead of modifying the existing SCSI command.
> 
> In the HPB version 1.0, the maximum read I/O size that can be converted 
> to
> HPB read is 4KB.
> 
> The dirty map of the active sub-region prevents an incorrect HPB read 
> that
> has stale physical page number which is updated by previous write I/O.
> 
> Reviewed-by: Can Guo <cang@codeaurora.org>
> Reviewed-by: Bart Van Assche <bvanassche@acm.org>
> Acked-by: Avri Altman <Avri.Altman@wdc.com>
> Tested-by: Bean Huo <beanhuo@micron.com>
> Signed-off-by: Daejun Park <daejun7.park@samsung.com>
> ---
>  drivers/scsi/ufs/ufshcd.c |   2 +
>  drivers/scsi/ufs/ufshpb.c | 234 ++++++++++++++++++++++++++++++++++++++
>  drivers/scsi/ufs/ufshpb.h |   2 +
>  3 files changed, 238 insertions(+)
> 
> diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
> index 52e48de8d27c..37cb343e9ec1 100644
> --- a/drivers/scsi/ufs/ufshcd.c
> +++ b/drivers/scsi/ufs/ufshcd.c
> @@ -2653,6 +2653,8 @@ static int ufshcd_queuecommand(struct Scsi_Host
> *host, struct scsi_cmnd *cmd)
> 
>  	lrbp->req_abort_skip = false;
> 
> +	ufshpb_prep(hba, lrbp);
> +
>  	ufshcd_comp_scsi_upiu(hba, lrbp);
> 
>  	err = ufshcd_map_sg(hba, lrbp);
> diff --git a/drivers/scsi/ufs/ufshpb.c b/drivers/scsi/ufs/ufshpb.c
> index 48edfdd0f606..73e7b3ed04a4 100644
> --- a/drivers/scsi/ufs/ufshpb.c
> +++ b/drivers/scsi/ufs/ufshpb.c
> @@ -31,6 +31,29 @@ bool ufshpb_is_allowed(struct ufs_hba *hba)
>  	return !(hba->ufshpb_dev.hpb_disabled);
>  }
> 
> +static int ufshpb_is_valid_srgn(struct ufshpb_region *rgn,
> +			     struct ufshpb_subregion *srgn)
> +{
> +	return rgn->rgn_state != HPB_RGN_INACTIVE &&
> +		srgn->srgn_state == HPB_SRGN_VALID;
> +}
> +
> +static bool ufshpb_is_read_cmd(struct scsi_cmnd *cmd)
> +{
> +	return req_op(cmd->request) == REQ_OP_READ;
> +}
> +
> +static bool ufshpb_is_write_or_discard_cmd(struct scsi_cmnd *cmd)
> +{
> +	return op_is_write(req_op(cmd->request)) ||
> +	       op_is_discard(req_op(cmd->request));
> +}
> +
> +static bool ufshpb_is_support_chunk(int transfer_len)
> +{
> +	return transfer_len <= HPB_MULTI_CHUNK_HIGH;
> +}
> +
>  static bool ufshpb_is_general_lun(int lun)
>  {
>  	return lun < UFS_UPIU_MAX_UNIT_NUM_ID;
> @@ -98,6 +121,217 @@ static void ufshpb_set_state(struct ufshpb_lu
> *hpb, int state)
>  	atomic_set(&hpb->hpb_state, state);
>  }
> 
> +static void ufshpb_set_ppn_dirty(struct ufshpb_lu *hpb, int rgn_idx,
> +			     int srgn_idx, int srgn_offset, int cnt)
> +{
> +	struct ufshpb_region *rgn;
> +	struct ufshpb_subregion *srgn;
> +	int set_bit_len;
> +	int bitmap_len = hpb->entries_per_srgn;
> +
> +next_srgn:
> +	rgn = hpb->rgn_tbl + rgn_idx;
> +	srgn = rgn->srgn_tbl + srgn_idx;
> +
> +	if ((srgn_offset + cnt) > bitmap_len)
> +		set_bit_len = bitmap_len - srgn_offset;
> +	else
> +		set_bit_len = cnt;
> +
> +	if (rgn->rgn_state != HPB_RGN_INACTIVE &&
> +	    srgn->srgn_state == HPB_SRGN_VALID)
> +		bitmap_set(srgn->mctx->ppn_dirty, srgn_offset, set_bit_len);
> +
> +	srgn_offset = 0;
> +	if (++srgn_idx == hpb->srgns_per_rgn) {
> +		srgn_idx = 0;
> +		rgn_idx++;
> +	}
> +
> +	cnt -= set_bit_len;
> +	if (cnt > 0)
> +		goto next_srgn;
> +
> +	WARN_ON(cnt < 0);
> +}
> +
> +static bool ufshpb_test_ppn_dirty(struct ufshpb_lu *hpb, int rgn_idx,
> +				   int srgn_idx, int srgn_offset, int cnt)
> +{
> +	struct ufshpb_region *rgn;
> +	struct ufshpb_subregion *srgn;
> +	int bitmap_len = hpb->entries_per_srgn;
> +	int bit_len;
> +
> +next_srgn:
> +	rgn = hpb->rgn_tbl + rgn_idx;
> +	srgn = rgn->srgn_tbl + srgn_idx;
> +
> +	if (!ufshpb_is_valid_srgn(rgn, srgn))
> +		return true;
> +
> +	/*
> +	 * If the region state is active, mctx must be allocated.
> +	 * In this case, check whether the region is evicted or
> +	 * mctx allcation fail.
> +	 */
> +	WARN_ON(!srgn->mctx);
> +
> +	if ((srgn_offset + cnt) > bitmap_len)
> +		bit_len = bitmap_len - srgn_offset;
> +	else
> +		bit_len = cnt;
> +
> +	if (find_next_bit(srgn->mctx->ppn_dirty,
> +			  bit_len, srgn_offset) >= srgn_offset)
> +		return true;
> +
> +	srgn_offset = 0;
> +	if (++srgn_idx == hpb->srgns_per_rgn) {
> +		srgn_idx = 0;
> +		rgn_idx++;
> +	}
> +
> +	cnt -= bit_len;
> +	if (cnt > 0)
> +		goto next_srgn;
> +
> +	return false;
> +}
> +
> +static u64 ufshpb_get_ppn(struct ufshpb_lu *hpb,
> +			  struct ufshpb_map_ctx *mctx, int pos, int *error)
> +{
> +	u64 *ppn_table;
> +	struct page *page;
> +	int index, offset;
> +
> +	index = pos / (PAGE_SIZE / HPB_ENTRY_SIZE);
> +	offset = pos % (PAGE_SIZE / HPB_ENTRY_SIZE);
> +
> +	page = mctx->m_page[index];
> +	if (unlikely(!page)) {
> +		*error = -ENOMEM;
> +		dev_err(&hpb->sdev_ufs_lu->sdev_dev,
> +			"error. cannot find page in mctx\n");
> +		return 0;
> +	}
> +
> +	ppn_table = page_address(page);
> +	if (unlikely(!ppn_table)) {
> +		*error = -ENOMEM;
> +		dev_err(&hpb->sdev_ufs_lu->sdev_dev,
> +			"error. cannot get ppn_table\n");
> +		return 0;
> +	}
> +
> +	return ppn_table[offset];
> +}
> +
> +static void
> +ufshpb_get_pos_from_lpn(struct ufshpb_lu *hpb, unsigned long lpn, int 
> *rgn_idx,
> +			int *srgn_idx, int *offset)
> +{
> +	int rgn_offset;
> +
> +	*rgn_idx = lpn >> hpb->entries_per_rgn_shift;
> +	rgn_offset = lpn & hpb->entries_per_rgn_mask;
> +	*srgn_idx = rgn_offset >> hpb->entries_per_srgn_shift;
> +	*offset = rgn_offset & hpb->entries_per_srgn_mask;
> +}
> +
> +static void
> +ufshpb_set_hpb_read_to_upiu(struct ufshpb_lu *hpb, struct ufshcd_lrb 
> *lrbp,
> +				  u32 lpn, u64 ppn,  unsigned int transfer_len)
> +{
> +	unsigned char *cdb = lrbp->cmd->cmnd;
> +
> +	cdb[0] = UFSHPB_READ;
> +
> +	put_unaligned_be64(ppn, &cdb[6]);

You are assuming the HPB entries read out by "HPB Read Buffer" cmd are 
in Little
Endian, which is why you are using put_unaligned_be64 here. However, 
this assumption
is not right for all the other flash vendors - HPB entries read out by 
"HPB Read Buffer"
cmd may come in Big Endian, if so, their random read performance are 
screwed.

Actually, I have seen at least two flash vendors acting so. I had to 
modify this line
to get the code work properly on my setups.

Meanwhile, in your cover letter, you mentioned that the performance data 
is collected
on a UFS2.1 device. Please re-collect the data on a real UFS3.1 device 
and let me
know the part number. Otherwise, the data is not quite convincing to us.

Regards,
Can Guo.

> +	cdb[14] = transfer_len;
> +
> +	lrbp->cmd->cmd_len = UFS_CDB_SIZE;
> +}
> +
> +/*
> + * This function will set up HPB read command using host-side L2P map 
> data.
> + * In HPB v1.0, maximum size of HPB read command is 4KB.
> + */
> +void ufshpb_prep(struct ufs_hba *hba, struct ufshcd_lrb *lrbp)
> +{
> +	struct ufshpb_lu *hpb;
> +	struct ufshpb_region *rgn;
> +	struct ufshpb_subregion *srgn;
> +	struct scsi_cmnd *cmd = lrbp->cmd;
> +	u32 lpn;
> +	u64 ppn;
> +	unsigned long flags;
> +	int transfer_len, rgn_idx, srgn_idx, srgn_offset;
> +	int err = 0;
> +
> +	hpb = ufshpb_get_hpb_data(cmd->device);
> +	if (!hpb)
> +		return;
> +
> +	if (ufshpb_get_state(hpb) != HPB_PRESENT) {
> +		dev_notice(&hpb->sdev_ufs_lu->sdev_dev,
> +			   "%s: ufshpb state is not PRESENT", __func__);
> +		return;
> +	}
> +
> +	if (!ufshpb_is_write_or_discard_cmd(cmd) &&
> +	    !ufshpb_is_read_cmd(cmd))
> +		return;
> +
> +	transfer_len = sectors_to_logical(cmd->device, 
> blk_rq_sectors(cmd->request));
> +	if (unlikely(!transfer_len))
> +		return;
> +
> +	lpn = sectors_to_logical(cmd->device, blk_rq_pos(cmd->request));
> +	ufshpb_get_pos_from_lpn(hpb, lpn, &rgn_idx, &srgn_idx, &srgn_offset);
> +	rgn = hpb->rgn_tbl + rgn_idx;
> +	srgn = rgn->srgn_tbl + srgn_idx;
> +
> +	/* If command type is WRITE or DISCARD, set bitmap as drity */
> +	if (ufshpb_is_write_or_discard_cmd(cmd)) {
> +		spin_lock_irqsave(&hpb->rgn_state_lock, flags);
> +		ufshpb_set_ppn_dirty(hpb, rgn_idx, srgn_idx, srgn_offset,
> +				 transfer_len);
> +		spin_unlock_irqrestore(&hpb->rgn_state_lock, flags);
> +		return;
> +	}
> +
> +	if (!ufshpb_is_support_chunk(transfer_len))
> +		return;
> +
> +	spin_lock_irqsave(&hpb->rgn_state_lock, flags);
> +	if (ufshpb_test_ppn_dirty(hpb, rgn_idx, srgn_idx, srgn_offset,
> +				   transfer_len)) {
> +		hpb->stats.miss_cnt++;
> +		spin_unlock_irqrestore(&hpb->rgn_state_lock, flags);
> +		return;
> +	}
> +
> +	ppn = ufshpb_get_ppn(hpb, srgn->mctx, srgn_offset, &err);
> +	spin_unlock_irqrestore(&hpb->rgn_state_lock, flags);
> +	if (unlikely(err)) {
> +		/*
> +		 * In this case, the region state is active,
> +		 * but the ppn table is not allocated.
> +		 * Make sure that ppn table must be allocated on
> +		 * active state.
> +		 */
> +		WARN_ON(true);
> +		dev_err(hba->dev, "ufshpb_get_ppn failed. err %d\n", err);
> +		return;
> +	}
> +
> +	ufshpb_set_hpb_read_to_upiu(hpb, lrbp, lpn, ppn, transfer_len);
> +
> +	hpb->stats.hit_cnt++;
> +}
> +
>  static struct ufshpb_req *ufshpb_get_map_req(struct ufshpb_lu *hpb,
>  					     struct ufshpb_subregion *srgn)
>  {
> diff --git a/drivers/scsi/ufs/ufshpb.h b/drivers/scsi/ufs/ufshpb.h
> index e40b016971ac..2c43a03b66b6 100644
> --- a/drivers/scsi/ufs/ufshpb.h
> +++ b/drivers/scsi/ufs/ufshpb.h
> @@ -198,6 +198,7 @@ struct ufs_hba;
>  struct ufshcd_lrb;
> 
>  #ifndef CONFIG_SCSI_UFS_HPB
> +static void ufshpb_prep(struct ufs_hba *hba, struct ufshcd_lrb *lrbp) 
> {}
>  static void ufshpb_rsp_upiu(struct ufs_hba *hba, struct ufshcd_lrb 
> *lrbp) {}
>  static void ufshpb_resume(struct ufs_hba *hba) {}
>  static void ufshpb_suspend(struct ufs_hba *hba) {}
> @@ -211,6 +212,7 @@ static bool ufshpb_is_allowed(struct ufs_hba *hba)
> { return false; }
>  static void ufshpb_get_geo_info(struct ufs_hba *hba, u8 *geo_buf) {}
>  static void ufshpb_get_dev_info(struct ufs_hba *hba, u8 *desc_buf) {}
>  #else
> +void ufshpb_prep(struct ufs_hba *hba, struct ufshcd_lrb *lrbp);
>  void ufshpb_rsp_upiu(struct ufs_hba *hba, struct ufshcd_lrb *lrbp);
>  void ufshpb_resume(struct ufs_hba *hba);
>  void ufshpb_suspend(struct ufs_hba *hba);

next prev parent reply	other threads:[~2021-02-05  3:31 UTC|newest]

Thread overview: 33+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <CGME20210129052848epcms2p6e5797efd94e6282b76ad9ae6c99e3ab5@epcms2p6>
2021-01-29  5:28 ` [PATCH v19 0/3] scsi: ufs: Add Host Performance Booster Support Daejun Park
2021-01-29  5:29   ` [PATCH v19 1/3] scsi: ufs: Introduce HPB feature Daejun Park
2021-02-02  7:46     ` Avri Altman
2021-02-02 11:12       ` Greg KH
2021-01-29  5:30   ` [PATCH v19 2/3] scsi: ufs: L2P map management for HPB read Daejun Park
2021-02-05 11:15     ` Bean Huo
2021-02-05 11:42       ` Avri Altman
2021-02-05 11:54         ` Bean Huo
2021-02-08  8:00           ` Daejun Park
2021-02-06  7:23     ` Can Guo
2021-02-06  9:14       ` Bean Huo
2021-02-06 19:17       ` Avri Altman
2021-02-08  8:01       ` Daejun Park
2021-02-08  7:21     ` Can Guo
2021-02-08  8:03       ` Daejun Park
2021-02-08  8:34         ` Can Guo
2021-02-08  8:53           ` Daejun Park
2021-02-08  9:23             ` Can Guo
2021-02-09  1:27               ` Daejun Park
2021-02-09  2:28                 ` Can Guo
2021-01-29  5:30   ` [PATCH v19 3/3] scsi: ufs: Prepare HPB read for cached sub-region Daejun Park
2021-02-05  3:29     ` Can Guo [this message]
2021-02-05 12:35       ` Bean Huo
2021-02-05 14:06         ` Avri Altman
2021-02-05 15:08           ` Bean Huo
2021-02-07  7:36             ` Can Guo
2021-02-07 10:44               ` Bean Huo
2021-02-09 13:25           ` Avri Altman
2021-02-09 14:21             ` Bean Huo
2021-02-10  5:32               ` Can Guo
2021-02-08  8:16       ` Bean Huo
2021-02-08  9:58         ` Can Guo
2021-02-10  9:36     ` Avri Altman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=7f25ccb1d857131baa1c0424c4542e33@codeaurora.org \
    --to=cang@codeaurora.org \
    --cc=alim.akhtar@samsung.com \
    --cc=asutoshd@codeaurora.org \
    --cc=avri.altman@wdc.com \
    --cc=boram.shin@samsung.com \
    --cc=bvanassche@acm.org \
    --cc=daejun7.park@samsung.com \
    --cc=gregkh@linuxfoundation.org \
    --cc=huobean@gmail.com \
    --cc=j-young.choi@samsung.com \
    --cc=jejb@linux.ibm.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-scsi@vger.kernel.org \
    --cc=martin.petersen@oracle.com \
    --cc=seunguk.shin@samsung.com \
    --cc=stanley.chu@mediatek.com \
    --cc=sungjun07.park@samsung.com \
    --cc=ymhungry.lee@samsung.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.