From: Chaitanya Kulkarni <chaitanyak@nvidia.com>
To: Keith Busch <kbusch@meta.com>
Cc: "linux-fsdevel@vger.kernel.org" <linux-fsdevel@vger.kernel.org>,
"hch@lst.de" <hch@lst.de>,
"linux-block@vger.kernel.org" <linux-block@vger.kernel.org>,
"joshi.k@samsung.com" <joshi.k@samsung.com>,
"javier.gonz@samsung.com" <javier.gonz@samsung.com>,
"linux-scsi@vger.kernel.org" <linux-scsi@vger.kernel.org>,
"linux-nvme@lists.infradead.org" <linux-nvme@lists.infradead.org>,
"bvanassche@acm.org" <bvanassche@acm.org>,
Hui Qi <hui81.qi@samsung.com>,
Nitesh Shetty <nj.shetty@samsung.com>,
Hannes Reinecke <hare@suse.de>, Keith Busch <kbusch@kernel.org>,
"io-uring@vger.kernel.org" <io-uring@vger.kernel.org>
Subject: Re: [PATCHv10 8/9] nvme: enable FDP support
Date: Wed, 30 Oct 2024 00:24:36 +0000 [thread overview]
Message-ID: <33648c19-d527-4085-a2d7-3444db9664d0@nvidia.com> (raw)
In-Reply-To: <20241029151922.459139-9-kbusch@meta.com>
On 10/29/24 08:19, Keith Busch wrote:
> From: Kanchan Joshi <joshi.k@samsung.com>
>
> Flexible Data Placement (FDP), as ratified in TP 4146a, allows the host
> to control the placement of logical blocks so as to reduce the SSD WAF.
> Userspace can send the write hint information using io_uring or fcntl.
>
> Fetch the placement-identifiers if the device supports FDP. The incoming
> write-hint is mapped to a placement-identifier, which in turn is set in
> the DSPEC field of the write command.
>
> Signed-off-by: Kanchan Joshi <joshi.k@samsung.com>
> Signed-off-by: Hui Qi <hui81.qi@samsung.com>
> Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com>
> Reviewed-by: Hannes Reinecke <hare@suse.de>
> Signed-off-by: Keith Busch <kbusch@kernel.org>
> ---
> drivers/nvme/host/core.c | 84 ++++++++++++++++++++++++++++++++++++++++
> drivers/nvme/host/nvme.h | 5 +++
> include/linux/nvme.h | 19 +++++++++
> 3 files changed, 108 insertions(+)
>
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index 3de7555a7de74..bd7b89912ddb9 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -44,6 +44,20 @@ struct nvme_ns_info {
> bool is_removed;
> };
>
> +struct nvme_fdp_ruh_status_desc {
> + u16 pid;
> + u16 ruhid;
> + u32 earutr;
> + u64 ruamw;
> + u8 rsvd16[16];
> +};
> +
> +struct nvme_fdp_ruh_status {
> + u8 rsvd0[14];
> + __le16 nruhsd;
> + struct nvme_fdp_ruh_status_desc ruhsd[];
> +};
> +
> unsigned int admin_timeout = 60;
> module_param(admin_timeout, uint, 0644);
> MODULE_PARM_DESC(admin_timeout, "timeout in seconds for admin commands");
> @@ -657,6 +671,7 @@ static void nvme_free_ns_head(struct kref *ref)
> ida_free(&head->subsys->ns_ida, head->instance);
> cleanup_srcu_struct(&head->srcu);
> nvme_put_subsystem(head->subsys);
> + kfree(head->plids);
> kfree(head);
> }
>
> @@ -974,6 +989,13 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
> if (req->cmd_flags & REQ_RAHEAD)
> dsmgmt |= NVME_RW_DSM_FREQ_PREFETCH;
>
> + if (req->write_hint && ns->head->nr_plids) {
> + u16 hint = max(req->write_hint, ns->head->nr_plids);
> +
> + dsmgmt |= ns->head->plids[hint - 1] << 16;
> + control |= NVME_RW_DTYPE_DPLCMT;
> + }
> +
> if (req->cmd_flags & REQ_ATOMIC && !nvme_valid_atomic_write(req))
> return BLK_STS_INVAL;
>
> @@ -2105,6 +2127,52 @@ static int nvme_update_ns_info_generic(struct nvme_ns *ns,
> return ret;
> }
>
> +static int nvme_fetch_fdp_plids(struct nvme_ns *ns, u32 nsid)
> +{
> + struct nvme_fdp_ruh_status_desc *ruhsd;
> + struct nvme_ns_head *head = ns->head;
> + struct nvme_fdp_ruh_status *ruhs;
> + struct nvme_command c = {};
> + int size, ret, i;
> +
> + if (head->plids)
> + return 0;
> +
> + size = struct_size(ruhs, ruhsd, NVME_MAX_PLIDS);
> + ruhs = kzalloc(size, GFP_KERNEL);
> + if (!ruhs)
> + return -ENOMEM;
> +
> + c.imr.opcode = nvme_cmd_io_mgmt_recv;
> + c.imr.nsid = cpu_to_le32(nsid);
> + c.imr.mo = 0x1;
can we please add some comment where values are hardcoded ?
> + c.imr.numd = cpu_to_le32((size >> 2) - 1);
> +
> + ret = nvme_submit_sync_cmd(ns->queue, &c, ruhs, size);
> + if (ret)
> + goto out;
> +
> + i = le16_to_cpu(ruhs->nruhsd);
instead of i why can't we use local variable nr_plids ?
> + if (!i)
> + goto out;
> +
> + ns->head->nr_plids = min_t(u16, i, NVME_MAX_PLIDS);
> + head->plids = kcalloc(ns->head->nr_plids, sizeof(head->plids),
> + GFP_KERNEL);
> + if (!head->plids) {
> + ret = -ENOMEM;
> + goto out;
> + }
> +
> + for (i = 0; i < ns->head->nr_plids; i++) {
> + ruhsd = &ruhs->ruhsd[i];
> + head->plids[i] = le16_to_cpu(ruhsd->pid);
> + }
> +out:
> + kfree(ruhs);
> + return ret;
> +}
> +
> static int nvme_update_ns_info_block(struct nvme_ns *ns,
> struct nvme_ns_info *info)
> {
> @@ -2141,6 +2209,19 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
> goto out;
> }
>
> + if (ns->ctrl->ctratt & NVME_CTRL_ATTR_FDPS) {
> + ret = nvme_fetch_fdp_plids(ns, info->nsid);
> + if (ret)
> + dev_warn(ns->ctrl->device,
> + "FDP failure status:0x%x\n", ret);
> + if (ret < 0)
> + goto out;
> + } else {
> + ns->head->nr_plids = 0;
> + kfree(ns->head->plids);
> + ns->head->plids = NULL;
> + }
> +
> blk_mq_freeze_queue(ns->disk->queue);
> ns->head->lba_shift = id->lbaf[lbaf].ds;
> ns->head->nuse = le64_to_cpu(id->nuse);
> @@ -2171,6 +2252,9 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
> if (!nvme_init_integrity(ns->head, &lim, info))
> capacity = 0;
>
> + lim.max_write_hints = ns->head->nr_plids;
> + if (lim.max_write_hints)
> + lim.features |= BLK_FEAT_PLACEMENT_HINTS;
> ret = queue_limits_commit_update(ns->disk->queue, &lim);
> if (ret) {
> blk_mq_unfreeze_queue(ns->disk->queue);
> diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
> index 093cb423f536b..cec8e5d96377b 100644
> --- a/drivers/nvme/host/nvme.h
> +++ b/drivers/nvme/host/nvme.h
> @@ -454,6 +454,8 @@ struct nvme_ns_ids {
> u8 csi;
> };
>
> +#define NVME_MAX_PLIDS (NVME_CTRL_PAGE_SIZE / sizeof(16))
this calculates how many plids can fit into the ctrl page size ?
sorry but I didn't understand sizeof(16) here, since plids are u16
nvme_ns_head -> u16 *plidsshould this be sizeof(u16) ? -ck
next prev parent reply other threads:[~2024-10-30 0:24 UTC|newest]
Thread overview: 105+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-10-29 15:19 [PATCHv10 0/9] write hints with nvme fdp, scsi streams Keith Busch
2024-10-29 15:19 ` [PATCHv10 1/9] block: use generic u16 for write hints Keith Busch
2024-10-29 17:21 ` Bart Van Assche
2024-10-29 15:19 ` [PATCHv10 2/9] block: introduce max_write_hints queue limit Keith Busch
2024-10-29 15:19 ` [PATCHv10 3/9] statx: add write hint information Keith Busch
2024-10-29 15:19 ` [PATCHv10 4/9] block: allow ability to limit partition write hints Keith Busch
2024-10-29 15:23 ` Christoph Hellwig
2024-10-29 17:25 ` Bart Van Assche
2024-10-30 4:46 ` Christoph Hellwig
2024-10-30 20:11 ` Keith Busch
2024-10-30 20:26 ` Bart Van Assche
2024-10-30 20:37 ` Keith Busch
2024-10-30 21:15 ` Bart Van Assche
2024-10-29 15:19 ` [PATCHv10 5/9] block, fs: add write hint to kiocb Keith Busch
2024-10-29 15:19 ` [PATCHv10 6/9] io_uring: enable per-io hinting capability Keith Busch
2024-11-07 2:09 ` Jens Axboe
2024-10-29 15:19 ` [PATCHv10 7/9] block: export placement hint feature Keith Busch
2024-10-29 15:19 ` [PATCHv10 9/9] scsi: set permanent stream count in block limits Keith Busch
2024-10-29 15:26 ` Christoph Hellwig
2024-10-29 15:34 ` Keith Busch
2024-10-29 15:37 ` Christoph Hellwig
2024-10-29 15:38 ` Keith Busch
2024-10-29 15:53 ` Christoph Hellwig
2024-10-29 16:22 ` Keith Busch
2024-10-30 4:55 ` Christoph Hellwig
2024-10-30 15:41 ` Keith Busch
2024-10-30 15:45 ` Christoph Hellwig
2024-10-30 15:48 ` Keith Busch
2024-10-30 15:50 ` Christoph Hellwig
2024-10-30 16:42 ` Keith Busch
2024-10-30 16:57 ` Christoph Hellwig
2024-10-30 17:05 ` Keith Busch
2024-10-30 17:15 ` Christoph Hellwig
2024-10-30 17:23 ` Keith Busch
2024-10-30 22:32 ` Keith Busch
2024-10-31 8:19 ` Hans Holmberg
2024-10-31 13:02 ` Christoph Hellwig
2024-10-31 14:06 ` Keith Busch
2024-11-01 7:16 ` Hans Holmberg
2024-11-01 8:19 ` Javier González
2024-11-01 14:49 ` Keith Busch
2024-11-06 14:26 ` Hans Holmberg
2024-10-30 16:59 ` Bart Van Assche
2024-10-30 17:14 ` Christoph Hellwig
2024-10-30 17:44 ` Bart Van Assche
2024-11-01 1:03 ` Jaegeuk Kim
2024-10-29 17:18 ` Bart Van Assche
2024-10-30 5:42 ` Christoph Hellwig
2024-10-29 15:24 ` [PATCHv10 0/9] write hints with nvme fdp, scsi streams Christoph Hellwig
[not found] ` <20241029151922.459139-9-kbusch@meta.com>
2024-10-30 0:24 ` Chaitanya Kulkarni [this message]
2024-11-05 15:50 ` Christoph Hellwig
2024-11-06 18:36 ` Keith Busch
2024-11-07 20:36 ` Keith Busch
2024-11-08 14:18 ` Christoph Hellwig
2024-11-08 15:51 ` Keith Busch
2024-11-08 16:54 ` Matthew Wilcox
2024-11-08 17:43 ` Javier Gonzalez
2024-11-08 18:51 ` Bart Van Assche
2024-11-11 9:31 ` Javier Gonzalez
2024-11-11 17:45 ` Bart Van Assche
2024-11-12 13:52 ` Nitesh Shetty
2024-11-19 2:03 ` Martin K. Petersen
2024-11-25 23:21 ` Bart Van Assche
2024-11-27 2:54 ` Martin K. Petersen
2024-11-27 18:42 ` Bart Van Assche
2024-11-27 20:14 ` Martin K. Petersen
2024-11-27 21:06 ` Bart Van Assche
2024-11-28 2:09 ` Martin K. Petersen
2024-11-28 8:51 ` Damien Le Moal
2024-11-29 6:19 ` Christoph Hellwig
2024-11-29 6:23 ` Damien Le Moal
2024-11-28 3:24 ` Christoph Hellwig
2024-11-28 15:21 ` Keith Busch
2024-11-28 16:40 ` Christoph Hellwig
2024-12-05 8:03 ` Nitesh Shetty
2024-12-05 20:37 ` Martin K. Petersen
2024-12-10 0:58 ` Bart Van Assche
2024-12-10 2:20 ` Martin K. Petersen
2024-12-10 9:53 ` Nitesh Shetty
2024-12-10 21:58 ` Bart Van Assche
2024-12-10 19:41 ` Bart Van Assche
2024-12-11 9:36 ` Nitesh Shetty
2024-12-11 17:27 ` Bart Van Assche
2024-12-11 19:38 ` Martin K. Petersen
2024-12-10 7:12 ` Christoph Hellwig
2024-12-10 8:05 ` Johannes Thumshirn
2024-12-10 10:58 ` hch
2024-12-10 19:21 ` Bart Van Assche
2024-12-11 4:07 ` Damien Le Moal
2024-12-11 21:06 ` Bart Van Assche
2024-12-11 21:21 ` Jaegeuk Kim
2024-12-11 19:41 ` Martin K. Petersen
2024-12-09 22:13 ` Bart Van Assche
2024-12-09 23:13 ` Damien Le Moal
2024-12-09 23:31 ` Matthew Wilcox
2024-12-10 0:22 ` Bart Van Assche
2024-11-11 6:51 ` Christoph Hellwig
2024-11-11 9:30 ` Javier Gonzalez
2024-11-11 9:37 ` Johannes Thumshirn
2024-11-11 9:41 ` Javier Gonzalez
2024-11-11 9:42 ` hch
2024-11-11 9:43 ` Johannes Thumshirn
2024-11-11 10:37 ` Javier Gonzalez
2024-11-11 6:49 ` Christoph Hellwig
2024-11-11 6:48 ` Christoph Hellwig
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=33648c19-d527-4085-a2d7-3444db9664d0@nvidia.com \
--to=chaitanyak@nvidia.com \
--cc=bvanassche@acm.org \
--cc=hare@suse.de \
--cc=hch@lst.de \
--cc=hui81.qi@samsung.com \
--cc=io-uring@vger.kernel.org \
--cc=javier.gonz@samsung.com \
--cc=joshi.k@samsung.com \
--cc=kbusch@kernel.org \
--cc=kbusch@meta.com \
--cc=linux-block@vger.kernel.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-nvme@lists.infradead.org \
--cc=linux-scsi@vger.kernel.org \
--cc=nj.shetty@samsung.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).