* [PATCH v5 2/3] block: move dif_prepare/dif_complete functions to block layer
2018-07-29 21:15 [PATCH v5 1/3] block: move ref_tag calculation func to the block layer Max Gurtovoy
@ 2018-07-29 21:15 ` Max Gurtovoy
2018-07-29 21:15 ` [PATCH v5 3/3] nvme: use blk API to remap ref tags for IOs with metadata Max Gurtovoy
2018-07-30 14:27 ` [PATCH v5 1/3] block: move ref_tag calculation func to the block layer Jens Axboe
2 siblings, 0 replies; 4+ messages in thread
From: Max Gurtovoy @ 2018-07-29 21:15 UTC (permalink / raw)
Currently these functions are implemented in the scsi layer, but their
actual place should be the block layer since T10-PI is a general data
integrity feature that is used in the nvme protocol as well. Also, use
the tuple size from the integrity profile since it may vary between
integrity types.
Suggested-by: Christoph Hellwig <hch at lst.de>
Cc: Jens Axboe <axboe at kernel.dk>
Cc: Martin K. Petersen <martin.petersen at oracle.com>
Reviewed-by: Martin K. Petersen <martin.petersen at oracle.com>
Signed-off-by: Max Gurtovoy <maxg at mellanox.com>
---
changes from v4:
- added Martin's Reviewed-by.
changes from v3:
- kmap_atomic/kunmap_atomic the same address
- declare pi struct inside the inner for loop
- check "intervals" inside the for loop condition
changes from v2:
- convert comments to kerneldoc format
- removed SCSI specific comment
- fix kmap_atomic/kunmap_atomic addresses
- fix iteration over t10_pi_tuple's
changes from v1 (Christoph, Martin and Keith comments):
- moved the functions to t10-pi.c
- updated tuple size
- changed local variables scope
- remove/add new lines
---
block/t10-pi.c | 110 +++++++++++++++++++++++++++++++++++++++++++++++
drivers/scsi/sd.c | 8 ++--
drivers/scsi/sd.h | 9 ----
drivers/scsi/sd_dif.c | 113 -------------------------------------------------
include/linux/t10-pi.h | 3 ++
5 files changed, 118 insertions(+), 125 deletions(-)
diff --git a/block/t10-pi.c b/block/t10-pi.c
index a98db38..62aed77 100644
--- a/block/t10-pi.c
+++ b/block/t10-pi.c
@@ -184,3 +184,113 @@ static blk_status_t t10_pi_type3_verify_ip(struct blk_integrity_iter *iter)
.verify_fn = t10_pi_type3_verify_ip,
};
EXPORT_SYMBOL(t10_pi_type3_ip);
+
+/**
+ * t10_pi_prepare - prepare PI prior submitting request to device
+ * @rq: request with PI that should be prepared
+ * @protection_type: PI type (Type 1/Type 2/Type 3)
+ *
+ * For Type 1/Type 2, the virtual start sector is the one that was
+ * originally submitted by the block layer for the ref_tag usage. Due to
+ * partitioning, MD/DM cloning, etc. the actual physical start sector is
+ * likely to be different. Remap protection information to match the
+ * physical LBA.
+ *
+ * Type 3 does not have a reference tag so no remapping is required.
+ */
+void t10_pi_prepare(struct request *rq, u8 protection_type)
+{
+ const int tuple_sz = rq->q->integrity.tuple_size;
+ u32 ref_tag = t10_pi_ref_tag(rq);
+ struct bio *bio;
+
+ if (protection_type == T10_PI_TYPE3_PROTECTION)
+ return;
+
+ __rq_for_each_bio(bio, rq) {
+ struct bio_integrity_payload *bip = bio_integrity(bio);
+ u32 virt = bip_get_seed(bip) & 0xffffffff;
+ struct bio_vec iv;
+ struct bvec_iter iter;
+
+ /* Already remapped? */
+ if (bip->bip_flags & BIP_MAPPED_INTEGRITY)
+ break;
+
+ bip_for_each_vec(iv, bip, iter) {
+ void *p, *pmap;
+ unsigned int j;
+
+ pmap = kmap_atomic(iv.bv_page);
+ p = pmap + iv.bv_offset;
+ for (j = 0; j < iv.bv_len; j += tuple_sz) {
+ struct t10_pi_tuple *pi = p;
+
+ if (be32_to_cpu(pi->ref_tag) == virt)
+ pi->ref_tag = cpu_to_be32(ref_tag);
+ virt++;
+ ref_tag++;
+ p += tuple_sz;
+ }
+
+ kunmap_atomic(pmap);
+ }
+
+ bip->bip_flags |= BIP_MAPPED_INTEGRITY;
+ }
+}
+EXPORT_SYMBOL(t10_pi_prepare);
+
+/**
+ * t10_pi_complete - prepare PI prior returning request to the block layer
+ * @rq: request with PI that should be prepared
+ * @protection_type: PI type (Type 1/Type 2/Type 3)
+ * @intervals: total elements to prepare
+ *
+ * For Type 1/Type 2, the virtual start sector is the one that was
+ * originally submitted by the block layer for the ref_tag usage. Due to
+ * partitioning, MD/DM cloning, etc. the actual physical start sector is
+ * likely to be different. Since the physical start sector was submitted
+ * to the device, we should remap it back to virtual values expected by the
+ * block layer.
+ *
+ * Type 3 does not have a reference tag so no remapping is required.
+ */
+void t10_pi_complete(struct request *rq, u8 protection_type,
+ unsigned int intervals)
+{
+ const int tuple_sz = rq->q->integrity.tuple_size;
+ u32 ref_tag = t10_pi_ref_tag(rq);
+ struct bio *bio;
+
+ if (protection_type == T10_PI_TYPE3_PROTECTION)
+ return;
+
+ __rq_for_each_bio(bio, rq) {
+ struct bio_integrity_payload *bip = bio_integrity(bio);
+ u32 virt = bip_get_seed(bip) & 0xffffffff;
+ struct bio_vec iv;
+ struct bvec_iter iter;
+
+ bip_for_each_vec(iv, bip, iter) {
+ void *p, *pmap;
+ unsigned int j;
+
+ pmap = kmap_atomic(iv.bv_page);
+ p = pmap + iv.bv_offset;
+ for (j = 0; j < iv.bv_len && intervals; j += tuple_sz) {
+ struct t10_pi_tuple *pi = p;
+
+ if (be32_to_cpu(pi->ref_tag) == ref_tag)
+ pi->ref_tag = cpu_to_be32(virt);
+ virt++;
+ ref_tag++;
+ intervals--;
+ p += tuple_sz;
+ }
+
+ kunmap_atomic(pmap);
+ }
+ }
+}
+EXPORT_SYMBOL(t10_pi_complete);
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 9421d98..bbebdc3 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1119,7 +1119,7 @@ static int sd_setup_read_write_cmnd(struct scsi_cmnd *SCpnt)
SCpnt->cmnd[0] = WRITE_6;
if (blk_integrity_rq(rq))
- sd_dif_prepare(SCpnt);
+ t10_pi_prepare(SCpnt->request, sdkp->protection_type);
} else if (rq_data_dir(rq) == READ) {
SCpnt->cmnd[0] = READ_6;
@@ -2047,8 +2047,10 @@ static int sd_done(struct scsi_cmnd *SCpnt)
"sd_done: completed %d of %d bytes\n",
good_bytes, scsi_bufflen(SCpnt)));
- if (rq_data_dir(SCpnt->request) == READ && scsi_prot_sg_count(SCpnt))
- sd_dif_complete(SCpnt, good_bytes);
+ if (rq_data_dir(SCpnt->request) == READ && scsi_prot_sg_count(SCpnt) &&
+ good_bytes)
+ t10_pi_complete(SCpnt->request, sdkp->protection_type,
+ good_bytes / scsi_prot_interval(SCpnt));
return good_bytes;
}
diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h
index 392c7d0..a7d4f50 100644
--- a/drivers/scsi/sd.h
+++ b/drivers/scsi/sd.h
@@ -254,21 +254,12 @@ static inline unsigned int sd_prot_flag_mask(unsigned int prot_op)
#ifdef CONFIG_BLK_DEV_INTEGRITY
extern void sd_dif_config_host(struct scsi_disk *);
-extern void sd_dif_prepare(struct scsi_cmnd *scmd);
-extern void sd_dif_complete(struct scsi_cmnd *, unsigned int);
#else /* CONFIG_BLK_DEV_INTEGRITY */
static inline void sd_dif_config_host(struct scsi_disk *disk)
{
}
-static inline int sd_dif_prepare(struct scsi_cmnd *scmd)
-{
- return 0;
-}
-static inline void sd_dif_complete(struct scsi_cmnd *cmd, unsigned int a)
-{
-}
#endif /* CONFIG_BLK_DEV_INTEGRITY */
diff --git a/drivers/scsi/sd_dif.c b/drivers/scsi/sd_dif.c
index d8de43d..db72c82 100644
--- a/drivers/scsi/sd_dif.c
+++ b/drivers/scsi/sd_dif.c
@@ -95,116 +95,3 @@ void sd_dif_config_host(struct scsi_disk *sdkp)
blk_integrity_register(disk, &bi);
}
-/*
- * The virtual start sector is the one that was originally submitted
- * by the block layer. Due to partitioning, MD/DM cloning, etc. the
- * actual physical start sector is likely to be different. Remap
- * protection information to match the physical LBA.
- *
- * From a protocol perspective there's a slight difference between
- * Type 1 and 2. The latter uses 32-byte CDBs exclusively, and the
- * reference tag is seeded in the CDB. This gives us the potential to
- * avoid virt->phys remapping during write. However, at read time we
- * don't know whether the virt sector is the same as when we wrote it
- * (we could be reading from real disk as opposed to MD/DM device. So
- * we always remap Type 2 making it identical to Type 1.
- *
- * Type 3 does not have a reference tag so no remapping is required.
- */
-void sd_dif_prepare(struct scsi_cmnd *scmd)
-{
- const int tuple_sz = sizeof(struct t10_pi_tuple);
- struct bio *bio;
- struct scsi_disk *sdkp;
- struct t10_pi_tuple *pi;
- u32 phys, virt;
-
- sdkp = scsi_disk(scmd->request->rq_disk);
-
- if (sdkp->protection_type == T10_PI_TYPE3_PROTECTION)
- return;
-
- phys = t10_pi_ref_tag(scmd->request);
-
- __rq_for_each_bio(bio, scmd->request) {
- struct bio_integrity_payload *bip = bio_integrity(bio);
- struct bio_vec iv;
- struct bvec_iter iter;
- unsigned int j;
-
- /* Already remapped? */
- if (bip->bip_flags & BIP_MAPPED_INTEGRITY)
- break;
-
- virt = bip_get_seed(bip) & 0xffffffff;
-
- bip_for_each_vec(iv, bip, iter) {
- pi = kmap_atomic(iv.bv_page) + iv.bv_offset;
-
- for (j = 0; j < iv.bv_len; j += tuple_sz, pi++) {
-
- if (be32_to_cpu(pi->ref_tag) == virt)
- pi->ref_tag = cpu_to_be32(phys);
-
- virt++;
- phys++;
- }
-
- kunmap_atomic(pi);
- }
-
- bip->bip_flags |= BIP_MAPPED_INTEGRITY;
- }
-}
-
-/*
- * Remap physical sector values in the reference tag to the virtual
- * values expected by the block layer.
- */
-void sd_dif_complete(struct scsi_cmnd *scmd, unsigned int good_bytes)
-{
- const int tuple_sz = sizeof(struct t10_pi_tuple);
- struct scsi_disk *sdkp;
- struct bio *bio;
- struct t10_pi_tuple *pi;
- unsigned int j, intervals;
- u32 phys, virt;
-
- sdkp = scsi_disk(scmd->request->rq_disk);
-
- if (sdkp->protection_type == T10_PI_TYPE3_PROTECTION || good_bytes == 0)
- return;
-
- intervals = good_bytes / scsi_prot_interval(scmd);
- phys = t10_pi_ref_tag(scmd->request);
-
- __rq_for_each_bio(bio, scmd->request) {
- struct bio_integrity_payload *bip = bio_integrity(bio);
- struct bio_vec iv;
- struct bvec_iter iter;
-
- virt = bip_get_seed(bip) & 0xffffffff;
-
- bip_for_each_vec(iv, bip, iter) {
- pi = kmap_atomic(iv.bv_page) + iv.bv_offset;
-
- for (j = 0; j < iv.bv_len; j += tuple_sz, pi++) {
-
- if (intervals == 0) {
- kunmap_atomic(pi);
- return;
- }
-
- if (be32_to_cpu(pi->ref_tag) == phys)
- pi->ref_tag = cpu_to_be32(virt);
-
- virt++;
- phys++;
- intervals--;
- }
-
- kunmap_atomic(pi);
- }
- }
-}
-
diff --git a/include/linux/t10-pi.h b/include/linux/t10-pi.h
index c40511f..5a427c2 100644
--- a/include/linux/t10-pi.h
+++ b/include/linux/t10-pi.h
@@ -51,5 +51,8 @@ static inline u32 t10_pi_ref_tag(struct request *rq)
extern const struct blk_integrity_profile t10_pi_type1_ip;
extern const struct blk_integrity_profile t10_pi_type3_crc;
extern const struct blk_integrity_profile t10_pi_type3_ip;
+extern void t10_pi_prepare(struct request *rq, u8 protection_type);
+extern void t10_pi_complete(struct request *rq, u8 protection_type,
+ unsigned int intervals);
#endif
--
1.8.3.1
^ permalink raw reply related [flat|nested] 4+ messages in thread* [PATCH v5 3/3] nvme: use blk API to remap ref tags for IOs with metadata
2018-07-29 21:15 [PATCH v5 1/3] block: move ref_tag calculation func to the block layer Max Gurtovoy
2018-07-29 21:15 ` [PATCH v5 2/3] block: move dif_prepare/dif_complete functions to " Max Gurtovoy
@ 2018-07-29 21:15 ` Max Gurtovoy
2018-07-30 14:27 ` [PATCH v5 1/3] block: move ref_tag calculation func to the block layer Jens Axboe
2 siblings, 0 replies; 4+ messages in thread
From: Max Gurtovoy @ 2018-07-29 21:15 UTC (permalink / raw)
Also moved the logic of the remapping to the nvme core driver instead
of implementing it in the nvme pci driver. This way all the other nvme
transport drivers will benefit from it (in case they'll implement metadata
support).
Suggested-by: Christoph Hellwig <hch at lst.de>
Cc: Jens Axboe <axboe at kernel.dk>
Cc: Martin K. Petersen <martin.petersen at oracle.com>
Reviewed-by: Martin K. Petersen <martin.petersen at oracle.com>
Acked-by: Keith Busch <keith.busch at intel.com>
Signed-off-by: Max Gurtovoy <maxg at mellanox.com>
---
changes from v4:
- Added Martin's and Keith's signatures
---
drivers/nvme/host/core.c | 18 ++++++++++++
drivers/nvme/host/nvme.h | 9 +-----
drivers/nvme/host/pci.c | 75 +-----------------------------------------------
3 files changed, 20 insertions(+), 82 deletions(-)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 191177b..b57abe5 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -617,6 +617,8 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
if (WARN_ON_ONCE(!nvme_ns_has_pi(ns)))
return BLK_STS_NOTSUPP;
control |= NVME_RW_PRINFO_PRACT;
+ } else if (req_op(req) == REQ_OP_WRITE) {
+ t10_pi_prepare(req, ns->pi_type);
}
switch (ns->pi_type) {
@@ -637,6 +639,22 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
return 0;
}
+void nvme_cleanup_cmd(struct request *req)
+{
+ if (blk_integrity_rq(req) && req_op(req) == REQ_OP_READ &&
+ nvme_req(req)->status == 0) {
+ struct nvme_ns *ns = req->rq_disk->private_data;
+
+ t10_pi_complete(req, ns->pi_type,
+ blk_rq_bytes(req) >> ns->lba_shift);
+ }
+ if (req->rq_flags & RQF_SPECIAL_PAYLOAD) {
+ kfree(page_address(req->special_vec.bv_page) +
+ req->special_vec.bv_offset);
+ }
+}
+EXPORT_SYMBOL_GPL(nvme_cleanup_cmd);
+
blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
struct nvme_command *cmd)
{
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 0c4a33d..dfc01ff 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -356,14 +356,6 @@ static inline u64 nvme_block_nr(struct nvme_ns *ns, sector_t sector)
return (sector >> (ns->lba_shift - 9));
}
-static inline void nvme_cleanup_cmd(struct request *req)
-{
- if (req->rq_flags & RQF_SPECIAL_PAYLOAD) {
- kfree(page_address(req->special_vec.bv_page) +
- req->special_vec.bv_offset);
- }
-}
-
static inline void nvme_end_request(struct request *req, __le16 status,
union nvme_result result)
{
@@ -420,6 +412,7 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
#define NVME_QID_ANY -1
struct request *nvme_alloc_request(struct request_queue *q,
struct nvme_command *cmd, blk_mq_req_flags_t flags, int qid);
+void nvme_cleanup_cmd(struct request *req);
blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
struct nvme_command *cmd);
int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index ddd441b..03beac5 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -535,73 +535,6 @@ static void nvme_free_iod(struct nvme_dev *dev, struct request *req)
mempool_free(iod->sg, dev->iod_mempool);
}
-#ifdef CONFIG_BLK_DEV_INTEGRITY
-static void nvme_dif_prep(u32 p, u32 v, struct t10_pi_tuple *pi)
-{
- if (be32_to_cpu(pi->ref_tag) == v)
- pi->ref_tag = cpu_to_be32(p);
-}
-
-static void nvme_dif_complete(u32 p, u32 v, struct t10_pi_tuple *pi)
-{
- if (be32_to_cpu(pi->ref_tag) == p)
- pi->ref_tag = cpu_to_be32(v);
-}
-
-/**
- * nvme_dif_remap - remaps ref tags to bip seed and physical lba
- *
- * The virtual start sector is the one that was originally submitted by the
- * block layer. Due to partitioning, MD/DM cloning, etc. the actual physical
- * start sector may be different. Remap protection information to match the
- * physical LBA on writes, and back to the original seed on reads.
- *
- * Type 0 and 3 do not have a ref tag, so no remapping required.
- */
-static void nvme_dif_remap(struct request *req,
- void (*dif_swap)(u32 p, u32 v, struct t10_pi_tuple *pi))
-{
- struct nvme_ns *ns = req->rq_disk->private_data;
- struct bio_integrity_payload *bip;
- struct t10_pi_tuple *pi;
- void *p, *pmap;
- u32 i, nlb, ts, phys, virt;
-
- if (!ns->pi_type || ns->pi_type == NVME_NS_DPS_PI_TYPE3)
- return;
-
- bip = bio_integrity(req->bio);
- if (!bip)
- return;
-
- pmap = kmap_atomic(bip->bip_vec->bv_page) + bip->bip_vec->bv_offset;
-
- p = pmap;
- virt = bip_get_seed(bip);
- phys = nvme_block_nr(ns, blk_rq_pos(req));
- nlb = (blk_rq_bytes(req) >> ns->lba_shift);
- ts = ns->disk->queue->integrity.tuple_size;
-
- for (i = 0; i < nlb; i++, virt++, phys++) {
- pi = (struct t10_pi_tuple *)p;
- dif_swap(phys, virt, pi);
- p += ts;
- }
- kunmap_atomic(pmap);
-}
-#else /* CONFIG_BLK_DEV_INTEGRITY */
-static void nvme_dif_remap(struct request *req,
- void (*dif_swap)(u32 p, u32 v, struct t10_pi_tuple *pi))
-{
-}
-static void nvme_dif_prep(u32 p, u32 v, struct t10_pi_tuple *pi)
-{
-}
-static void nvme_dif_complete(u32 p, u32 v, struct t10_pi_tuple *pi)
-{
-}
-#endif
-
static void nvme_print_sgl(struct scatterlist *sgl, int nents)
{
int i;
@@ -827,9 +760,6 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
if (blk_rq_map_integrity_sg(q, req->bio, &iod->meta_sg) != 1)
goto out_unmap;
- if (req_op(req) == REQ_OP_WRITE)
- nvme_dif_remap(req, nvme_dif_prep);
-
if (!dma_map_sg(dev->dev, &iod->meta_sg, 1, dma_dir))
goto out_unmap;
}
@@ -852,11 +782,8 @@ static void nvme_unmap_data(struct nvme_dev *dev, struct request *req)
if (iod->nents) {
dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir);
- if (blk_integrity_rq(req)) {
- if (req_op(req) == REQ_OP_READ)
- nvme_dif_remap(req, nvme_dif_complete);
+ if (blk_integrity_rq(req))
dma_unmap_sg(dev->dev, &iod->meta_sg, 1, dma_dir);
- }
}
nvme_cleanup_cmd(req);
--
1.8.3.1
^ permalink raw reply related [flat|nested] 4+ messages in thread