* [PATCH] nvme: enable FDP support
[not found] <CGME20240510134740epcas5p24ef1c2d6e8934c1c79b01c849e7ccb41@epcas5p2.samsung.com>
@ 2024-05-10 13:40 ` Kanchan Joshi
2024-05-10 19:30 ` Keith Busch
` (2 more replies)
0 siblings, 3 replies; 11+ messages in thread
From: Kanchan Joshi @ 2024-05-10 13:40 UTC (permalink / raw)
To: axboe, kbusch, hch
Cc: linux-nvme, linux-block, javier.gonz, bvanassche, david, slava,
gost.dev, Kanchan Joshi, Hui Qi, Nitesh Shetty
Flexible Data Placement (FDP), as ratified in TP 4146a, allows the host
to control the placement of logical blocks so as to reduce the SSD WAF.
Userspace can send the data lifetime information using the write hints.
The SCSI driver (sd) can already pass this information to the SCSI
devices. This patch does the same for NVMe.
Fetches the placement-identifiers (plids) if the device supports FDP.
And map the incoming write-hints to plids.
Signed-off-by: Kanchan Joshi <joshi.k@samsung.com>
Signed-off-by: Hui Qi <hui81.qi@samsung.com>
Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com>
---
drivers/nvme/host/core.c | 67 ++++++++++++++++++++++++++++++++++++++++
drivers/nvme/host/nvme.h | 4 +++
include/linux/nvme.h | 19 ++++++++++++
3 files changed, 90 insertions(+)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 8ae0a2dc5eda..c3de06cff12f 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -42,6 +42,20 @@ struct nvme_ns_info {
bool is_removed;
};
+struct nvme_fdp_ruh_status_desc {
+ u16 pid;
+ u16 ruhid;
+ u32 earutr;
+ u64 ruamw;
+ u8 rsvd16[16];
+};
+
+struct nvme_fdp_ruh_status {
+ u8 rsvd0[14];
+ u16 nruhsd;
+ struct nvme_fdp_ruh_status_desc ruhsd[];
+};
+
unsigned int admin_timeout = 60;
module_param(admin_timeout, uint, 0644);
MODULE_PARM_DESC(admin_timeout, "timeout in seconds for admin commands");
@@ -943,6 +957,16 @@ static inline blk_status_t nvme_setup_write_zeroes(struct nvme_ns *ns,
return BLK_STS_OK;
}
+static inline void nvme_assign_placement_id(struct nvme_ns *ns,
+ struct request *req,
+ struct nvme_command *cmd)
+{
+ enum rw_hint h = min(ns->head->nr_plids, req->write_hint);
+
+ cmd->rw.control |= cpu_to_le16(NVME_RW_DTYPE_DPLCMT);
+ cmd->rw.dsmgmt |= cpu_to_le32(ns->head->plids[h] << 16);
+}
+
static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
struct request *req, struct nvme_command *cmnd,
enum nvme_opcode op)
@@ -1058,6 +1082,8 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req)
break;
case REQ_OP_WRITE:
ret = nvme_setup_rw(ns, req, cmd, nvme_cmd_write);
+ if (!ret && ns->head->nr_plids)
+ nvme_assign_placement_id(ns, req, cmd);
break;
case REQ_OP_ZONE_APPEND:
ret = nvme_setup_rw(ns, req, cmd, nvme_cmd_zone_append);
@@ -2070,6 +2096,40 @@ static int nvme_update_ns_info_generic(struct nvme_ns *ns,
return ret;
}
+static int nvme_fetch_fdp_plids(struct nvme_ns *ns, u32 nsid)
+{
+ struct nvme_command c = {};
+ struct nvme_fdp_ruh_status *ruhs;
+ struct nvme_fdp_ruh_status_desc *ruhsd;
+ int size, ret, i;
+
+ size = sizeof(*ruhs) + NVME_MAX_PLIDS * sizeof(*ruhsd);
+ ruhs = kzalloc(size, GFP_KERNEL);
+ if (!ruhs)
+ return -ENOMEM;
+
+ c.imr.opcode = nvme_cmd_io_mgmt_recv;
+ c.imr.nsid = cpu_to_le32(nsid);
+ c.imr.mo = 0x1;
+ c.imr.numd = cpu_to_le32((size >> 2) - 1);
+
+ ret = nvme_submit_sync_cmd(ns->queue, &c, ruhs, size);
+ if (ret)
+ goto out;
+
+ ns->head->nr_plids = le16_to_cpu(ruhs->nruhsd);
+ ns->head->nr_plids =
+ min_t(u16, ns->head->nr_plids, NVME_MAX_PLIDS);
+
+ for (i = 0; i < ns->head->nr_plids; i++) {
+ ruhsd = &ruhs->ruhsd[i];
+ ns->head->plids[i] = le16_to_cpu(ruhsd->pid);
+ }
+out:
+ kfree(ruhs);
+ return ret;
+}
+
static int nvme_update_ns_info_block(struct nvme_ns *ns,
struct nvme_ns_info *info)
{
@@ -2157,6 +2217,13 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
if (ret && !nvme_first_scan(ns->disk))
goto out;
}
+ if (ns->ctrl->ctratt & NVME_CTRL_ATTR_FDPS) {
+ ret = nvme_fetch_fdp_plids(ns, info->nsid);
+ if (ret)
+ dev_warn(ns->ctrl->device,
+ "FDP failure status:0x%x\n", ret);
+ }
+
ret = 0;
out:
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index d0ed64dc7380..67dad29fe289 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -440,6 +440,8 @@ struct nvme_ns_ids {
u8 csi;
};
+#define NVME_MAX_PLIDS (128)
+
/*
* Anchor structure for namespaces. There is one for each namespace in a
* NVMe subsystem that any of our controllers can see, and the namespace
@@ -457,6 +459,8 @@ struct nvme_ns_head {
bool shared;
bool passthru_err_log_enabled;
int instance;
+ u16 nr_plids;
+ u16 plids[NVME_MAX_PLIDS];
struct nvme_effects_log *effects;
u64 nuse;
unsigned ns_id;
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 425573202295..fc07ba1b5ec5 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -270,6 +270,7 @@ enum nvme_ctrl_attr {
NVME_CTRL_ATTR_HID_128_BIT = (1 << 0),
NVME_CTRL_ATTR_TBKAS = (1 << 6),
NVME_CTRL_ATTR_ELBAS = (1 << 15),
+ NVME_CTRL_ATTR_FDPS = (1 << 19),
};
struct nvme_id_ctrl {
@@ -829,6 +830,7 @@ enum nvme_opcode {
nvme_cmd_resv_register = 0x0d,
nvme_cmd_resv_report = 0x0e,
nvme_cmd_resv_acquire = 0x11,
+ nvme_cmd_io_mgmt_recv = 0x12,
nvme_cmd_resv_release = 0x15,
nvme_cmd_zone_mgmt_send = 0x79,
nvme_cmd_zone_mgmt_recv = 0x7a,
@@ -850,6 +852,7 @@ enum nvme_opcode {
nvme_opcode_name(nvme_cmd_resv_register), \
nvme_opcode_name(nvme_cmd_resv_report), \
nvme_opcode_name(nvme_cmd_resv_acquire), \
+ nvme_opcode_name(nvme_cmd_io_mgmt_recv), \
nvme_opcode_name(nvme_cmd_resv_release), \
nvme_opcode_name(nvme_cmd_zone_mgmt_send), \
nvme_opcode_name(nvme_cmd_zone_mgmt_recv), \
@@ -1001,6 +1004,7 @@ enum {
NVME_RW_PRINFO_PRCHK_GUARD = 1 << 12,
NVME_RW_PRINFO_PRACT = 1 << 13,
NVME_RW_DTYPE_STREAMS = 1 << 4,
+ NVME_RW_DTYPE_DPLCMT = 2 << 4,
NVME_WZ_DEAC = 1 << 9,
};
@@ -1088,6 +1092,20 @@ struct nvme_zone_mgmt_recv_cmd {
__le32 cdw14[2];
};
+struct nvme_io_mgmt_recv_cmd {
+ __u8 opcode;
+ __u8 flags;
+ __u16 command_id;
+ __le32 nsid;
+ __le64 rsvd2[2];
+ union nvme_data_ptr dptr;
+ __u8 mo;
+ __u8 rsvd11;
+ __u16 mos;
+ __le32 numd;
+ __le32 cdw12[4];
+};
+
enum {
NVME_ZRA_ZONE_REPORT = 0,
NVME_ZRASF_ZONE_REPORT_ALL = 0,
@@ -1808,6 +1826,7 @@ struct nvme_command {
struct nvmf_auth_receive_command auth_receive;
struct nvme_dbbuf dbbuf;
struct nvme_directive_cmd directive;
+ struct nvme_io_mgmt_recv_cmd imr;
};
};
--
2.25.1
^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: [PATCH] nvme: enable FDP support
2024-05-10 13:40 ` [PATCH] nvme: enable FDP support Kanchan Joshi
@ 2024-05-10 19:30 ` Keith Busch
2024-05-11 9:20 ` kernel test robot
2024-05-13 7:56 ` Viacheslav Dubeyko
2 siblings, 0 replies; 11+ messages in thread
From: Keith Busch @ 2024-05-10 19:30 UTC (permalink / raw)
To: Kanchan Joshi
Cc: axboe, hch, linux-nvme, linux-block, javier.gonz, bvanassche,
david, slava, gost.dev, Hui Qi, Nitesh Shetty
On Fri, May 10, 2024 at 07:10:15PM +0530, Kanchan Joshi wrote:
> Flexible Data Placement (FDP), as ratified in TP 4146a, allows the host
> to control the placement of logical blocks so as to reduce the SSD WAF.
>
> Userspace can send the data lifetime information using the write hints.
> The SCSI driver (sd) can already pass this information to the SCSI
> devices. This patch does the same for NVMe.
>
> Fetches the placement-identifiers (plids) if the device supports FDP.
> And map the incoming write-hints to plids.
Just some additional background since this looks similiar to when the
driver supported "streams".
Supporting streams in the driver was pretty a non-issue. The feature was
removed because devices didn't work with streams as expected, and
supporting it carried more maintenance overhead for the upper layers.
Since the block layer re-introduced write hints anyway outside of this
use case, this looks fine to me to re-introduce support for those hints.
So why not re-add stream support back? As far as I know, devices never
implemented that feature as expected, the driver had to enable it on
start up, and there's no required feedback mechanism to see if it's even
working or hurting.
For FDP, the user had to have configured the namespace that way in order
to get this, so it's still an optional, opt-in feature. It's also
mandatory for FDP capable drives to report WAF through the endurance
log, so users can see the effects of using it.
It would be nice to compare endurance logs with and without the FDP
configuration enabled for your various workloads. This will be great to
discuss at LSFMM next week.
> +static int nvme_fetch_fdp_plids(struct nvme_ns *ns, u32 nsid)
> +{
> + struct nvme_command c = {};
> + struct nvme_fdp_ruh_status *ruhs;
> + struct nvme_fdp_ruh_status_desc *ruhsd;
> + int size, ret, i;
> +
> + size = sizeof(*ruhs) + NVME_MAX_PLIDS * sizeof(*ruhsd);
size = struct_size(ruhs, ruhsd, MAX_PLIDS);
> +#define NVME_MAX_PLIDS (128)
> +
> /*
> * Anchor structure for namespaces. There is one for each namespace in a
> * NVMe subsystem that any of our controllers can see, and the namespace
> @@ -457,6 +459,8 @@ struct nvme_ns_head {
> bool shared;
> bool passthru_err_log_enabled;
> int instance;
> + u16 nr_plids;
> + u16 plids[NVME_MAX_PLIDS];
The largest index needed is WRITE_LIFE_EXTREME, which is "5", so I think
NVME_MAX_PLIDS should be the same value. And it will save space in the
struct.
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] nvme: enable FDP support
2024-05-10 13:40 ` [PATCH] nvme: enable FDP support Kanchan Joshi
2024-05-10 19:30 ` Keith Busch
@ 2024-05-11 9:20 ` kernel test robot
2024-05-13 7:56 ` Viacheslav Dubeyko
2 siblings, 0 replies; 11+ messages in thread
From: kernel test robot @ 2024-05-11 9:20 UTC (permalink / raw)
To: Kanchan Joshi, axboe, kbusch, hch
Cc: oe-kbuild-all, linux-nvme, linux-block, javier.gonz, bvanassche,
david, slava, gost.dev, Kanchan Joshi, Hui Qi, Nitesh Shetty
Hi Kanchan,
kernel test robot noticed the following build warnings:
[auto build test WARNING on axboe-block/for-next]
[also build test WARNING on linus/master v6.9-rc7 next-20240510]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Kanchan-Joshi/nvme-enable-FDP-support/20240510-214900
base: https://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git for-next
patch link: https://lore.kernel.org/r/20240510134015.29717-1-joshi.k%40samsung.com
patch subject: [PATCH] nvme: enable FDP support
config: x86_64-randconfig-121-20240511 (https://download.01.org/0day-ci/archive/20240511/202405111758.Ts2xnoZH-lkp@intel.com/config)
compiler: gcc-9 (Ubuntu 9.5.0-4ubuntu2) 9.5.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240511/202405111758.Ts2xnoZH-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202405111758.Ts2xnoZH-lkp@intel.com/
sparse warnings: (new ones prefixed by >>)
>> drivers/nvme/host/core.c:2120:30: sparse: sparse: cast to restricted __le16
drivers/nvme/host/core.c:2126:38: sparse: sparse: cast to restricted __le16
drivers/nvme/host/core.c: note: in included file (through include/linux/wait.h, include/linux/wait_bit.h, include/linux/fs.h, ...):
include/linux/list.h:83:21: sparse: sparse: self-comparison always evaluates to true
include/linux/list.h:83:21: sparse: sparse: self-comparison always evaluates to true
include/linux/list.h:83:21: sparse: sparse: self-comparison always evaluates to true
include/linux/list.h:83:21: sparse: sparse: self-comparison always evaluates to true
include/linux/list.h:83:21: sparse: sparse: self-comparison always evaluates to true
include/linux/list.h:83:21: sparse: sparse: self-comparison always evaluates to true
include/linux/list.h:83:21: sparse: sparse: self-comparison always evaluates to true
vim +2120 drivers/nvme/host/core.c
2098
2099 static int nvme_fetch_fdp_plids(struct nvme_ns *ns, u32 nsid)
2100 {
2101 struct nvme_command c = {};
2102 struct nvme_fdp_ruh_status *ruhs;
2103 struct nvme_fdp_ruh_status_desc *ruhsd;
2104 int size, ret, i;
2105
2106 size = sizeof(*ruhs) + NVME_MAX_PLIDS * sizeof(*ruhsd);
2107 ruhs = kzalloc(size, GFP_KERNEL);
2108 if (!ruhs)
2109 return -ENOMEM;
2110
2111 c.imr.opcode = nvme_cmd_io_mgmt_recv;
2112 c.imr.nsid = cpu_to_le32(nsid);
2113 c.imr.mo = 0x1;
2114 c.imr.numd = cpu_to_le32((size >> 2) - 1);
2115
2116 ret = nvme_submit_sync_cmd(ns->queue, &c, ruhs, size);
2117 if (ret)
2118 goto out;
2119
> 2120 ns->head->nr_plids = le16_to_cpu(ruhs->nruhsd);
2121 ns->head->nr_plids =
2122 min_t(u16, ns->head->nr_plids, NVME_MAX_PLIDS);
2123
2124 for (i = 0; i < ns->head->nr_plids; i++) {
2125 ruhsd = &ruhs->ruhsd[i];
2126 ns->head->plids[i] = le16_to_cpu(ruhsd->pid);
2127 }
2128 out:
2129 kfree(ruhs);
2130 return ret;
2131 }
2132
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] nvme: enable FDP support
2024-05-10 13:40 ` [PATCH] nvme: enable FDP support Kanchan Joshi
2024-05-10 19:30 ` Keith Busch
2024-05-11 9:20 ` kernel test robot
@ 2024-05-13 7:56 ` Viacheslav Dubeyko
2024-05-14 8:44 ` Joel Granados
2024-05-14 18:47 ` Kanchan Joshi
2 siblings, 2 replies; 11+ messages in thread
From: Viacheslav Dubeyko @ 2024-05-13 7:56 UTC (permalink / raw)
To: Kanchan Joshi
Cc: Jens Axboe, Keith Busch, Christoph Hellwig, linux-nvme,
linux-block, Javier González, Bart Van Assche, david,
gost.dev, Hui Qi, Nitesh Shetty
> On May 10, 2024, at 4:40 PM, Kanchan Joshi <joshi.k@samsung.com> wrote:
>
> Flexible Data Placement (FDP), as ratified in TP 4146a, allows the host
> to control the placement of logical blocks so as to reduce the SSD WAF.
>
> Userspace can send the data lifetime information using the write hints.
> The SCSI driver (sd) can already pass this information to the SCSI
> devices. This patch does the same for NVMe.
>
> Fetches the placement-identifiers (plids) if the device supports FDP.
> And map the incoming write-hints to plids.
>
Great! Thanks for sharing the patch.
Do we have documentation that explains how, for example, kernel-space
file system can work with block layer to employ FDP?
Do we have FDP support in QEMU already if there is no access to real
device for testing?
Thanks,
Slava.
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] nvme: enable FDP support
2024-05-13 7:56 ` Viacheslav Dubeyko
@ 2024-05-14 8:44 ` Joel Granados
2024-05-14 18:47 ` Kanchan Joshi
1 sibling, 0 replies; 11+ messages in thread
From: Joel Granados @ 2024-05-14 8:44 UTC (permalink / raw)
To: Viacheslav Dubeyko
Cc: Kanchan Joshi, Jens Axboe, Keith Busch, Christoph Hellwig,
linux-nvme, linux-block, Javier González, Bart Van Assche,
david, gost.dev, Hui Qi, Nitesh Shetty
[-- Attachment #1: Type: text/plain, Size: 1103 bytes --]
On Mon, May 13, 2024 at 10:56:00AM +0300, Viacheslav Dubeyko wrote:
>
>
> > On May 10, 2024, at 4:40 PM, Kanchan Joshi <joshi.k@samsung.com> wrote:
> >
> > Flexible Data Placement (FDP), as ratified in TP 4146a, allows the host
> > to control the placement of logical blocks so as to reduce the SSD WAF.
> >
> > Userspace can send the data lifetime information using the write hints.
> > The SCSI driver (sd) can already pass this information to the SCSI
> > devices. This patch does the same for NVMe.
> >
> > Fetches the placement-identifiers (plids) if the device supports FDP.
> > And map the incoming write-hints to plids.
> >
>
>
> Great! Thanks for sharing the patch.
>
> Do we have documentation that explains how, for example, kernel-space
> file system can work with block layer to employ FDP?
>
> Do we have FDP support in QEMU already if there is no access to real
> device for testing?
I believe FDP has been in qemu for some time. Look for 73064edfb8
("hw/nvme: flexible data placement emulation [Jesper Devantier]")
best
--
Joel Granados
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 659 bytes --]
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] nvme: enable FDP support
2024-05-13 7:56 ` Viacheslav Dubeyko
2024-05-14 8:44 ` Joel Granados
@ 2024-05-14 18:47 ` Kanchan Joshi
2024-05-14 19:00 ` Viacheslav Dubeyko
1 sibling, 1 reply; 11+ messages in thread
From: Kanchan Joshi @ 2024-05-14 18:47 UTC (permalink / raw)
To: Viacheslav Dubeyko
Cc: Kanchan Joshi, Jens Axboe, Keith Busch, Christoph Hellwig,
linux-nvme, linux-block, Javier González, Bart Van Assche,
david, gost.dev, Hui Qi, Nitesh Shetty
On Mon, May 13, 2024 at 2:04 AM Viacheslav Dubeyko <slava@dubeyko.com> wrote:
>
>
>
> > On May 10, 2024, at 4:40 PM, Kanchan Joshi <joshi.k@samsung.com> wrote:
> >
> > Flexible Data Placement (FDP), as ratified in TP 4146a, allows the host
> > to control the placement of logical blocks so as to reduce the SSD WAF.
> >
> > Userspace can send the data lifetime information using the write hints.
> > The SCSI driver (sd) can already pass this information to the SCSI
> > devices. This patch does the same for NVMe.
> >
> > Fetches the placement-identifiers (plids) if the device supports FDP.
> > And map the incoming write-hints to plids.
> >
>
>
> Great! Thanks for sharing the patch.
>
> Do we have documentation that explains how, for example, kernel-space
> file system can work with block layer to employ FDP?
This is primarily for user driven/exposed hints. For file system
driven hints, the scheme is really file system specific and therefore,
will vary from one to another.
F2FS is one (and only at the moment) example. Its 'fs-based' policy
can act as a reference for one way to go about it.
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] nvme: enable FDP support
2024-05-14 18:47 ` Kanchan Joshi
@ 2024-05-14 19:00 ` Viacheslav Dubeyko
2024-05-15 3:30 ` Kanchan Joshi
0 siblings, 1 reply; 11+ messages in thread
From: Viacheslav Dubeyko @ 2024-05-14 19:00 UTC (permalink / raw)
To: Kanchan Joshi
Cc: Kanchan Joshi, Jens Axboe, Keith Busch, Christoph Hellwig,
linux-nvme, linux-block, Javier González, Bart Van Assche,
david, gost.dev, Hui Qi, Nitesh Shetty
> On May 14, 2024, at 9:47 PM, Kanchan Joshi <joshiiitr@gmail.com> wrote:
>
> On Mon, May 13, 2024 at 2:04 AM Viacheslav Dubeyko <slava@dubeyko.com> wrote:
>>
>>
>>
>>> On May 10, 2024, at 4:40 PM, Kanchan Joshi <joshi.k@samsung.com> wrote:
>>>
>>> Flexible Data Placement (FDP), as ratified in TP 4146a, allows the host
>>> to control the placement of logical blocks so as to reduce the SSD WAF.
>>>
>>> Userspace can send the data lifetime information using the write hints.
>>> The SCSI driver (sd) can already pass this information to the SCSI
>>> devices. This patch does the same for NVMe.
>>>
>>> Fetches the placement-identifiers (plids) if the device supports FDP.
>>> And map the incoming write-hints to plids.
>>>
>>
>>
>> Great! Thanks for sharing the patch.
>>
>> Do we have documentation that explains how, for example, kernel-space
>> file system can work with block layer to employ FDP?
>
> This is primarily for user driven/exposed hints. For file system
> driven hints, the scheme is really file system specific and therefore,
> will vary from one to another.
> F2FS is one (and only at the moment) example. Its 'fs-based' policy
> can act as a reference for one way to go about it.
Yes, I completely see the point. I would like to employ the FDP in my
kernel-space file system (SSDFS). And I have a vision how I can do it.
But I simply would like to see some documentation with the explanation of
API and limitations of FDP for the case of kernel-space file systems.
Thanks,
Slava.
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] nvme: enable FDP support
2024-05-15 3:30 ` Kanchan Joshi
@ 2024-05-14 21:40 ` Viacheslav Dubeyko
2024-05-17 16:27 ` Kanchan Joshi
0 siblings, 1 reply; 11+ messages in thread
From: Viacheslav Dubeyko @ 2024-05-14 21:40 UTC (permalink / raw)
To: Kanchan Joshi
Cc: Kanchan Joshi, Jens Axboe, Keith Busch, Christoph Hellwig,
linux-nvme, linux-block, Javier González, Bart Van Assche,
david, gost.dev, Hui Qi, Nitesh Shetty
> On May 15, 2024, at 6:30 AM, Kanchan Joshi <joshiiitr@gmail.com> wrote:
>
> On Tue, May 14, 2024 at 1:00 PM Viacheslav Dubeyko <slava@dubeyko.com> wrote:
>>> On May 14, 2024, at 9:47 PM, Kanchan Joshi <joshiiitr@gmail.com> wrote:
>>>
>>> On Mon, May 13, 2024 at 2:04 AM Viacheslav Dubeyko <slava@dubeyko.com> wrote:
>>>>
>>>>
>>>>
>>>>> On May 10, 2024, at 4:40 PM, Kanchan Joshi <joshi.k@samsung.com> wrote:
>>>>>
>>>>> Flexible Data Placement (FDP), as ratified in TP 4146a, allows the host
>>>>> to control the placement of logical blocks so as to reduce the SSD WAF.
>>>>>
>>>>> Userspace can send the data lifetime information using the write hints.
>>>>> The SCSI driver (sd) can already pass this information to the SCSI
>>>>> devices. This patch does the same for NVMe.
>>>>>
>>>>> Fetches the placement-identifiers (plids) if the device supports FDP.
>>>>> And map the incoming write-hints to plids.
>>>>>
>>>>
>>>>
>>>> Great! Thanks for sharing the patch.
>>>>
>>>> Do we have documentation that explains how, for example, kernel-space
>>>> file system can work with block layer to employ FDP?
>>>
>>> This is primarily for user driven/exposed hints. For file system
>>> driven hints, the scheme is really file system specific and therefore,
>>> will vary from one to another.
>>> F2FS is one (and only at the moment) example. Its 'fs-based' policy
>>> can act as a reference for one way to go about it.
>>
>> Yes, I completely see the point. I would like to employ the FDP in my
>> kernel-space file system (SSDFS). And I have a vision how I can do it.
>> But I simply would like to see some documentation with the explanation of
>> API and limitations of FDP for the case of kernel-space file systems.
>
> Nothing complicated for early experimentation.
> Once FS has determined the hint value, it can put that into
> bio->bi_write_hint and send bio down.
>
> If SSDFS cares about user-exposed hints too, it can choose different
> hint values than what is exposed to user-space.
> Or it can do what F2FS does - use the mount option as a toggle to
> reuse the same values either for user-hints or fs-defined hints.
How many hint values file system can use? Any limitations here?
And how file system can detect that it’s FDP-based device?
Thanks,
Slava.
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] nvme: enable FDP support
2024-05-14 19:00 ` Viacheslav Dubeyko
@ 2024-05-15 3:30 ` Kanchan Joshi
2024-05-14 21:40 ` Viacheslav Dubeyko
0 siblings, 1 reply; 11+ messages in thread
From: Kanchan Joshi @ 2024-05-15 3:30 UTC (permalink / raw)
To: Viacheslav Dubeyko
Cc: Kanchan Joshi, Jens Axboe, Keith Busch, Christoph Hellwig,
linux-nvme, linux-block, Javier González, Bart Van Assche,
david, gost.dev, Hui Qi, Nitesh Shetty
On Tue, May 14, 2024 at 1:00 PM Viacheslav Dubeyko <slava@dubeyko.com> wrote:
> > On May 14, 2024, at 9:47 PM, Kanchan Joshi <joshiiitr@gmail.com> wrote:
> >
> > On Mon, May 13, 2024 at 2:04 AM Viacheslav Dubeyko <slava@dubeyko.com> wrote:
> >>
> >>
> >>
> >>> On May 10, 2024, at 4:40 PM, Kanchan Joshi <joshi.k@samsung.com> wrote:
> >>>
> >>> Flexible Data Placement (FDP), as ratified in TP 4146a, allows the host
> >>> to control the placement of logical blocks so as to reduce the SSD WAF.
> >>>
> >>> Userspace can send the data lifetime information using the write hints.
> >>> The SCSI driver (sd) can already pass this information to the SCSI
> >>> devices. This patch does the same for NVMe.
> >>>
> >>> Fetches the placement-identifiers (plids) if the device supports FDP.
> >>> And map the incoming write-hints to plids.
> >>>
> >>
> >>
> >> Great! Thanks for sharing the patch.
> >>
> >> Do we have documentation that explains how, for example, kernel-space
> >> file system can work with block layer to employ FDP?
> >
> > This is primarily for user driven/exposed hints. For file system
> > driven hints, the scheme is really file system specific and therefore,
> > will vary from one to another.
> > F2FS is one (and only at the moment) example. Its 'fs-based' policy
> > can act as a reference for one way to go about it.
>
> Yes, I completely see the point. I would like to employ the FDP in my
> kernel-space file system (SSDFS). And I have a vision how I can do it.
> But I simply would like to see some documentation with the explanation of
> API and limitations of FDP for the case of kernel-space file systems.
Nothing complicated for early experimentation.
Once FS has determined the hint value, it can put that into
bio->bi_write_hint and send bio down.
If SSDFS cares about user-exposed hints too, it can choose different
hint values than what is exposed to user-space.
Or it can do what F2FS does - use the mount option as a toggle to
reuse the same values either for user-hints or fs-defined hints.
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] nvme: enable FDP support
2024-05-14 21:40 ` Viacheslav Dubeyko
@ 2024-05-17 16:27 ` Kanchan Joshi
2024-05-17 17:22 ` Viacheslav Dubeyko
0 siblings, 1 reply; 11+ messages in thread
From: Kanchan Joshi @ 2024-05-17 16:27 UTC (permalink / raw)
To: Viacheslav Dubeyko
Cc: Kanchan Joshi, Jens Axboe, Keith Busch, Christoph Hellwig,
linux-nvme, linux-block, Javier González, Bart Van Assche,
david, gost.dev, Hui Qi, Nitesh Shetty
On Tue, May 14, 2024 at 2:40 PM Viacheslav Dubeyko <slava@dubeyko.com> wrote:
>
>
>
> > On May 15, 2024, at 6:30 AM, Kanchan Joshi <joshiiitr@gmail.com> wrote:
> >
> > On Tue, May 14, 2024 at 1:00 PM Viacheslav Dubeyko <slava@dubeyko.com> wrote:
> >>> On May 14, 2024, at 9:47 PM, Kanchan Joshi <joshiiitr@gmail.com> wrote:
> >>>
> >>> On Mon, May 13, 2024 at 2:04 AM Viacheslav Dubeyko <slava@dubeyko.com> wrote:
> >>>>
> >>>>
> >>>>
> >>>>> On May 10, 2024, at 4:40 PM, Kanchan Joshi <joshi.k@samsung.com> wrote:
> >>>>>
> >>>>> Flexible Data Placement (FDP), as ratified in TP 4146a, allows the host
> >>>>> to control the placement of logical blocks so as to reduce the SSD WAF.
> >>>>>
> >>>>> Userspace can send the data lifetime information using the write hints.
> >>>>> The SCSI driver (sd) can already pass this information to the SCSI
> >>>>> devices. This patch does the same for NVMe.
> >>>>>
> >>>>> Fetches the placement-identifiers (plids) if the device supports FDP.
> >>>>> And map the incoming write-hints to plids.
> >>>>>
> >>>>
> >>>>
> >>>> Great! Thanks for sharing the patch.
> >>>>
> >>>> Do we have documentation that explains how, for example, kernel-space
> >>>> file system can work with block layer to employ FDP?
> >>>
> >>> This is primarily for user driven/exposed hints. For file system
> >>> driven hints, the scheme is really file system specific and therefore,
> >>> will vary from one to another.
> >>> F2FS is one (and only at the moment) example. Its 'fs-based' policy
> >>> can act as a reference for one way to go about it.
> >>
> >> Yes, I completely see the point. I would like to employ the FDP in my
> >> kernel-space file system (SSDFS). And I have a vision how I can do it.
> >> But I simply would like to see some documentation with the explanation of
> >> API and limitations of FDP for the case of kernel-space file systems.
> >
> > Nothing complicated for early experimentation.
> > Once FS has determined the hint value, it can put that into
> > bio->bi_write_hint and send bio down.
> >
> > If SSDFS cares about user-exposed hints too, it can choose different
> > hint values than what is exposed to user-space.
> > Or it can do what F2FS does - use the mount option as a toggle to
> > reuse the same values either for user-hints or fs-defined hints.
>
> How many hint values file system can use? Any limitations here?
As many as already defined (in rw_hint.h). Possible to go higher too.
No hard limitation per se. Write is not going to fail even if it sends
a hint that does not exist.
> And how file system can detect that it’s FDP-based device?
It does not need to detect. File system sees write-hints; FDP is a
lower-level detail.
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] nvme: enable FDP support
2024-05-17 16:27 ` Kanchan Joshi
@ 2024-05-17 17:22 ` Viacheslav Dubeyko
0 siblings, 0 replies; 11+ messages in thread
From: Viacheslav Dubeyko @ 2024-05-17 17:22 UTC (permalink / raw)
To: Kanchan Joshi
Cc: Kanchan Joshi, Jens Axboe, Keith Busch, Christoph Hellwig,
linux-nvme, linux-block, Javier González, Bart Van Assche,
david, gost.dev, Hui Qi, Nitesh Shetty
> On May 17, 2024, at 7:27 PM, Kanchan Joshi <joshiiitr@gmail.com> wrote:
>
> On Tue, May 14, 2024 at 2:40 PM Viacheslav Dubeyko <slava@dubeyko.com> wrote:
>>
>>
>>
>>> On May 15, 2024, at 6:30 AM, Kanchan Joshi <joshiiitr@gmail.com> wrote:
>>>
>>> On Tue, May 14, 2024 at 1:00 PM Viacheslav Dubeyko <slava@dubeyko.com> wrote:
>>>>> On May 14, 2024, at 9:47 PM, Kanchan Joshi <joshiiitr@gmail.com> wrote:
>>>>>
>>>>> On Mon, May 13, 2024 at 2:04 AM Viacheslav Dubeyko <slava@dubeyko.com> wrote:
>>>>>>
>>>>>>
>>>>>>
>>>>>>> On May 10, 2024, at 4:40 PM, Kanchan Joshi <joshi.k@samsung.com> wrote:
>>>>>>>
>>>>>>> Flexible Data Placement (FDP), as ratified in TP 4146a, allows the host
>>>>>>> to control the placement of logical blocks so as to reduce the SSD WAF.
>>>>>>>
>>>>>>> Userspace can send the data lifetime information using the write hints.
>>>>>>> The SCSI driver (sd) can already pass this information to the SCSI
>>>>>>> devices. This patch does the same for NVMe.
>>>>>>>
>>>>>>> Fetches the placement-identifiers (plids) if the device supports FDP.
>>>>>>> And map the incoming write-hints to plids.
>>>>>>>
>>>>>>
>>>>>>
>>>>>> Great! Thanks for sharing the patch.
>>>>>>
>>>>>> Do we have documentation that explains how, for example, kernel-space
>>>>>> file system can work with block layer to employ FDP?
>>>>>
>>>>> This is primarily for user driven/exposed hints. For file system
>>>>> driven hints, the scheme is really file system specific and therefore,
>>>>> will vary from one to another.
>>>>> F2FS is one (and only at the moment) example. Its 'fs-based' policy
>>>>> can act as a reference for one way to go about it.
>>>>
>>>> Yes, I completely see the point. I would like to employ the FDP in my
>>>> kernel-space file system (SSDFS). And I have a vision how I can do it.
>>>> But I simply would like to see some documentation with the explanation of
>>>> API and limitations of FDP for the case of kernel-space file systems.
>>>
>>> Nothing complicated for early experimentation.
>>> Once FS has determined the hint value, it can put that into
>>> bio->bi_write_hint and send bio down.
>>>
>>> If SSDFS cares about user-exposed hints too, it can choose different
>>> hint values than what is exposed to user-space.
>>> Or it can do what F2FS does - use the mount option as a toggle to
>>> reuse the same values either for user-hints or fs-defined hints.
>>
>> How many hint values file system can use? Any limitations here?
>
> As many as already defined (in rw_hint.h). Possible to go higher too.
> No hard limitation per se. Write is not going to fail even if it sends
> a hint that does not exist.
>
OK. I see. Thanks.
>> And how file system can detect that it’s FDP-based device?
>
> It does not need to detect. File system sees write-hints; FDP is a
> lower-level detail.
I see your point. But SSDFS doesn’t need in hints from user-space side.
SSDFS has various types of segments (several types of metadata segments and
user data segment) and I would like to use hints for these different types of segments.
I mean that SSDFS needs to make decisions when and for what type of data or
metadata to send such hints without any instructions from user-space side.
Technically speaking, user-space side doesn’t need to care to provide any hints
to SSDFS because SSDFS can manage everything without such hints.
So, I would like to have opportunity to change SSDFS behavior for different
type of devices:
if (zns_device)
execute_zns_related_logic
else if (fdp_device)
execute_fdp_related_logic
else // conventional SSD
execute_conventional_ssd_logic
Does it mean that there is no such way of FDP based device detection?
Thanks,
Slava.
^ permalink raw reply [flat|nested] 11+ messages in thread
end of thread, other threads:[~2024-05-17 17:22 UTC | newest]
Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
[not found] <CGME20240510134740epcas5p24ef1c2d6e8934c1c79b01c849e7ccb41@epcas5p2.samsung.com>
2024-05-10 13:40 ` [PATCH] nvme: enable FDP support Kanchan Joshi
2024-05-10 19:30 ` Keith Busch
2024-05-11 9:20 ` kernel test robot
2024-05-13 7:56 ` Viacheslav Dubeyko
2024-05-14 8:44 ` Joel Granados
2024-05-14 18:47 ` Kanchan Joshi
2024-05-14 19:00 ` Viacheslav Dubeyko
2024-05-15 3:30 ` Kanchan Joshi
2024-05-14 21:40 ` Viacheslav Dubeyko
2024-05-17 16:27 ` Kanchan Joshi
2024-05-17 17:22 ` Viacheslav Dubeyko
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox