* [PATCH 1/2 v4] block: change rq_integrity_vec to respect the iterator
2024-05-27 15:39 [PATCH 0/2 v4] dm-crypt support for per-sector NVMe metadata Mikulas Patocka
@ 2024-05-27 15:40 ` Mikulas Patocka
2024-05-28 3:47 ` Anuj gupta
` (2 more replies)
2024-05-27 15:40 ` [PATCH 2/2 v4] dm-crypt: support for per-sector NVMe metadata Mikulas Patocka
2024-06-26 15:14 ` (subset) [PATCH 0/2 v4] dm-crypt " Jens Axboe
2 siblings, 3 replies; 7+ messages in thread
From: Mikulas Patocka @ 2024-05-27 15:40 UTC (permalink / raw)
To: Axboe, Keith Busch, Christoph Hellwig, Sagi Grimberg,
Mike Snitzer, Milan Broz, Anuj gupta
Cc: linux-block, dm-devel, linux-nvme
If we allocate a bio that is larger than NVMe maximum request size,
attach integrity metadata to it and send it to the NVMe subsystem, the
integrity metadata will be corrupted.
Splitting the bio works correctly. The function bio_split will clone the
bio, trim the iterator of the first bio and advance the iterator of the
second bio.
However, the function rq_integrity_vec has a bug - it returns the first
vector of the bio's metadata and completely disregards the metadata
iterator that was advanced when the bio was split. Thus, the second bio
uses the same metadata as the first bio and this leads to metadata
corruption.
This commit changes rq_integrity_vec, so that it calls mp_bvec_iter_bvec
instead of returning the first vector. mp_bvec_iter_bvec reads the
iterator and uses it to build a bvec for the current position in the
iterator.
The "queue_max_integrity_segments(rq->q) > 1" check was removed, because
the updated rq_integrity_vec function works correctly with multiple
segments.
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
---
drivers/nvme/host/pci.c | 6 +++---
include/linux/blk-integrity.h | 14 +++++++-------
2 files changed, 10 insertions(+), 10 deletions(-)
Index: linux-2.6/drivers/nvme/host/pci.c
===================================================================
--- linux-2.6.orig/drivers/nvme/host/pci.c
+++ linux-2.6/drivers/nvme/host/pci.c
@@ -825,9 +825,9 @@ static blk_status_t nvme_map_metadata(st
struct nvme_command *cmnd)
{
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
+ struct bio_vec bv = rq_integrity_vec(req);
- iod->meta_dma = dma_map_bvec(dev->dev, rq_integrity_vec(req),
- rq_dma_dir(req), 0);
+ iod->meta_dma = dma_map_bvec(dev->dev, &bv, rq_dma_dir(req), 0);
if (dma_mapping_error(dev->dev, iod->meta_dma))
return BLK_STS_IOERR;
cmnd->rw.metadata = cpu_to_le64(iod->meta_dma);
@@ -966,7 +966,7 @@ static __always_inline void nvme_pci_unm
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
dma_unmap_page(dev->dev, iod->meta_dma,
- rq_integrity_vec(req)->bv_len, rq_dma_dir(req));
+ rq_integrity_vec(req).bv_len, rq_dma_dir(req));
}
if (blk_rq_nr_phys_segments(req))
Index: linux-2.6/include/linux/blk-integrity.h
===================================================================
--- linux-2.6.orig/include/linux/blk-integrity.h
+++ linux-2.6/include/linux/blk-integrity.h
@@ -106,14 +106,13 @@ static inline bool blk_integrity_rq(stru
}
/*
- * Return the first bvec that contains integrity data. Only drivers that are
- * limited to a single integrity segment should use this helper.
+ * Return the current bvec that contains the integrity data. bip_iter may be
+ * advanced to iterate over the integrity data.
*/
-static inline struct bio_vec *rq_integrity_vec(struct request *rq)
+static inline struct bio_vec rq_integrity_vec(struct request *rq)
{
- if (WARN_ON_ONCE(queue_max_integrity_segments(rq->q) > 1))
- return NULL;
- return rq->bio->bi_integrity->bip_vec;
+ return mp_bvec_iter_bvec(rq->bio->bi_integrity->bip_vec,
+ rq->bio->bi_integrity->bip_iter);
}
#else /* CONFIG_BLK_DEV_INTEGRITY */
static inline int blk_rq_count_integrity_sg(struct request_queue *q,
@@ -179,7 +178,8 @@ static inline int blk_integrity_rq(struc
static inline struct bio_vec *rq_integrity_vec(struct request *rq)
{
- return NULL;
+ /* the optimizer will remove all calls to this function */
+ return (struct bio_vec){ };
}
#endif /* CONFIG_BLK_DEV_INTEGRITY */
#endif /* _LINUX_BLK_INTEGRITY_H */
^ permalink raw reply [flat|nested] 7+ messages in thread* [PATCH 2/2 v4] dm-crypt: support for per-sector NVMe metadata
2024-05-27 15:39 [PATCH 0/2 v4] dm-crypt support for per-sector NVMe metadata Mikulas Patocka
2024-05-27 15:40 ` [PATCH 1/2 v4] block: change rq_integrity_vec to respect the iterator Mikulas Patocka
@ 2024-05-27 15:40 ` Mikulas Patocka
2024-06-26 15:14 ` (subset) [PATCH 0/2 v4] dm-crypt " Jens Axboe
2 siblings, 0 replies; 7+ messages in thread
From: Mikulas Patocka @ 2024-05-27 15:40 UTC (permalink / raw)
To: Axboe, Keith Busch, Christoph Hellwig, Sagi Grimberg,
Mike Snitzer, Milan Broz, Anuj gupta
Cc: linux-block, dm-devel, linux-nvme
Support per-sector NVMe metadata in dm-crypt.
This commit changes dm-crypt, so that it can use NVMe metadata to store
authentication information. We can put dm-crypt directly on the top of
NVMe device, without using dm-integrity.
This commit improves write throughput twice, becase the will be no writes
to the dm-integrity journal.
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
---
drivers/md/dm-crypt.c | 53 ++++++++++++++++++++++++++++----------------------
1 file changed, 30 insertions(+), 23 deletions(-)
Index: linux-2.6/drivers/md/dm-crypt.c
===================================================================
--- linux-2.6.orig/drivers/md/dm-crypt.c
+++ linux-2.6/drivers/md/dm-crypt.c
@@ -211,7 +211,8 @@ struct crypt_config {
unsigned int integrity_tag_size;
unsigned int integrity_iv_size;
- unsigned int on_disk_tag_size;
+ unsigned int used_tag_size;
+ unsigned int tuple_size;
/*
* pool for per bio private data, crypto requests,
@@ -1148,14 +1149,14 @@ static int dm_crypt_integrity_io_alloc(s
unsigned int tag_len;
int ret;
- if (!bio_sectors(bio) || !io->cc->on_disk_tag_size)
+ if (!bio_sectors(bio) || !io->cc->tuple_size)
return 0;
bip = bio_integrity_alloc(bio, GFP_NOIO, 1);
if (IS_ERR(bip))
return PTR_ERR(bip);
- tag_len = io->cc->on_disk_tag_size * (bio_sectors(bio) >> io->cc->sector_shift);
+ tag_len = io->cc->tuple_size * (bio_sectors(bio) >> io->cc->sector_shift);
bip->bip_iter.bi_sector = io->cc->start + io->sector;
@@ -1173,24 +1174,30 @@ static int crypt_integrity_ctr(struct cr
struct blk_integrity *bi = blk_get_integrity(cc->dev->bdev->bd_disk);
struct mapped_device *md = dm_table_get_md(ti->table);
+ if (!bi) {
+ ti->error = "No integrity profile.";
+ return -EINVAL;
+ }
+
/* From now we require underlying device with our integrity profile */
- if (!bi || strcasecmp(bi->profile->name, "DM-DIF-EXT-TAG")) {
+ if (strcasecmp(bi->profile->name, "DM-DIF-EXT-TAG") &&
+ strcasecmp(bi->profile->name, "nop")) {
ti->error = "Integrity profile not supported.";
return -EINVAL;
}
- if (bi->tag_size != cc->on_disk_tag_size ||
- bi->tuple_size != cc->on_disk_tag_size) {
+ if (bi->tuple_size < cc->used_tag_size) {
ti->error = "Integrity profile tag size mismatch.";
return -EINVAL;
}
+ cc->tuple_size = bi->tuple_size;
if (1 << bi->interval_exp != cc->sector_size) {
ti->error = "Integrity profile sector size mismatch.";
return -EINVAL;
}
if (crypt_integrity_aead(cc)) {
- cc->integrity_tag_size = cc->on_disk_tag_size - cc->integrity_iv_size;
+ cc->integrity_tag_size = cc->used_tag_size - cc->integrity_iv_size;
DMDEBUG("%s: Integrity AEAD, tag size %u, IV size %u.", dm_device_name(md),
cc->integrity_tag_size, cc->integrity_iv_size);
@@ -1202,7 +1209,7 @@ static int crypt_integrity_ctr(struct cr
DMDEBUG("%s: Additional per-sector space %u bytes for IV.", dm_device_name(md),
cc->integrity_iv_size);
- if ((cc->integrity_tag_size + cc->integrity_iv_size) != bi->tag_size) {
+ if ((cc->integrity_tag_size + cc->integrity_iv_size) > cc->tuple_size) {
ti->error = "Not enough space for integrity tag in the profile.";
return -EINVAL;
}
@@ -1281,7 +1288,7 @@ static void *tag_from_dmreq(struct crypt
struct dm_crypt_io *io = container_of(ctx, struct dm_crypt_io, ctx);
return &io->integrity_metadata[*org_tag_of_dmreq(cc, dmreq) *
- cc->on_disk_tag_size];
+ cc->tuple_size];
}
static void *iv_tag_from_dmreq(struct crypt_config *cc,
@@ -1362,9 +1369,9 @@ static int crypt_convert_block_aead(stru
aead_request_set_crypt(req, dmreq->sg_in, dmreq->sg_out,
cc->sector_size, iv);
r = crypto_aead_encrypt(req);
- if (cc->integrity_tag_size + cc->integrity_iv_size != cc->on_disk_tag_size)
+ if (cc->integrity_tag_size + cc->integrity_iv_size != cc->tuple_size)
memset(tag + cc->integrity_tag_size + cc->integrity_iv_size, 0,
- cc->on_disk_tag_size - (cc->integrity_tag_size + cc->integrity_iv_size));
+ cc->tuple_size - (cc->integrity_tag_size + cc->integrity_iv_size));
} else {
aead_request_set_crypt(req, dmreq->sg_in, dmreq->sg_out,
cc->sector_size + cc->integrity_tag_size, iv);
@@ -1794,7 +1801,7 @@ static void crypt_dec_pending(struct dm_
return;
if (likely(!io->ctx.aead_recheck) && unlikely(io->ctx.aead_failed) &&
- cc->on_disk_tag_size && bio_data_dir(base_bio) == READ) {
+ cc->used_tag_size && bio_data_dir(base_bio) == READ) {
io->ctx.aead_recheck = true;
io->ctx.aead_failed = false;
io->error = 0;
@@ -3173,7 +3180,7 @@ static int crypt_ctr_optional(struct dm_
ti->error = "Invalid integrity arguments";
return -EINVAL;
}
- cc->on_disk_tag_size = val;
+ cc->used_tag_size = val;
sval = strchr(opt_string + strlen("integrity:"), ':') + 1;
if (!strcasecmp(sval, "aead")) {
set_bit(CRYPT_MODE_INTEGRITY_AEAD, &cc->cipher_flags);
@@ -3384,12 +3391,12 @@ static int crypt_ctr(struct dm_target *t
if (ret)
goto bad;
- cc->tag_pool_max_sectors = POOL_ENTRY_SIZE / cc->on_disk_tag_size;
+ cc->tag_pool_max_sectors = POOL_ENTRY_SIZE / cc->tuple_size;
if (!cc->tag_pool_max_sectors)
cc->tag_pool_max_sectors = 1;
ret = mempool_init_kmalloc_pool(&cc->tag_pool, MIN_IOS,
- cc->tag_pool_max_sectors * cc->on_disk_tag_size);
+ cc->tag_pool_max_sectors * cc->tuple_size);
if (ret) {
ti->error = "Cannot allocate integrity tags mempool";
goto bad;
@@ -3464,7 +3471,7 @@ static int crypt_map(struct dm_target *t
* Check if bio is too large, split as needed.
*/
if (unlikely(bio->bi_iter.bi_size > (BIO_MAX_VECS << PAGE_SHIFT)) &&
- (bio_data_dir(bio) == WRITE || cc->on_disk_tag_size))
+ (bio_data_dir(bio) == WRITE || cc->used_tag_size))
dm_accept_partial_bio(bio, ((BIO_MAX_VECS << PAGE_SHIFT) >> SECTOR_SHIFT));
/*
@@ -3480,8 +3487,8 @@ static int crypt_map(struct dm_target *t
io = dm_per_bio_data(bio, cc->per_bio_data_size);
crypt_io_init(io, cc, bio, dm_target_offset(ti, bio->bi_iter.bi_sector));
- if (cc->on_disk_tag_size) {
- unsigned int tag_len = cc->on_disk_tag_size * (bio_sectors(bio) >> cc->sector_shift);
+ if (cc->tuple_size) {
+ unsigned int tag_len = cc->tuple_size * (bio_sectors(bio) >> cc->sector_shift);
if (unlikely(tag_len > KMALLOC_MAX_SIZE))
io->integrity_metadata = NULL;
@@ -3552,7 +3559,7 @@ static void crypt_status(struct dm_targe
num_feature_args += test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags);
num_feature_args += cc->sector_size != (1 << SECTOR_SHIFT);
num_feature_args += test_bit(CRYPT_IV_LARGE_SECTORS, &cc->cipher_flags);
- if (cc->on_disk_tag_size)
+ if (cc->used_tag_size)
num_feature_args++;
if (num_feature_args) {
DMEMIT(" %d", num_feature_args);
@@ -3566,8 +3573,8 @@ static void crypt_status(struct dm_targe
DMEMIT(" no_read_workqueue");
if (test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags))
DMEMIT(" no_write_workqueue");
- if (cc->on_disk_tag_size)
- DMEMIT(" integrity:%u:%s", cc->on_disk_tag_size, cc->cipher_auth);
+ if (cc->used_tag_size)
+ DMEMIT(" integrity:%u:%s", cc->used_tag_size, cc->cipher_auth);
if (cc->sector_size != (1 << SECTOR_SHIFT))
DMEMIT(" sector_size:%d", cc->sector_size);
if (test_bit(CRYPT_IV_LARGE_SECTORS, &cc->cipher_flags))
@@ -3588,9 +3595,9 @@ static void crypt_status(struct dm_targe
DMEMIT(",iv_large_sectors=%c", test_bit(CRYPT_IV_LARGE_SECTORS, &cc->cipher_flags) ?
'y' : 'n');
- if (cc->on_disk_tag_size)
+ if (cc->used_tag_size)
DMEMIT(",integrity_tag_size=%u,cipher_auth=%s",
- cc->on_disk_tag_size, cc->cipher_auth);
+ cc->used_tag_size, cc->cipher_auth);
if (cc->sector_size != (1 << SECTOR_SHIFT))
DMEMIT(",sector_size=%d", cc->sector_size);
if (cc->cipher_string)
^ permalink raw reply [flat|nested] 7+ messages in thread