* [PATCHv2 2/3] dm-crypt: allow unaligned bio_vecs for direct io
2025-11-24 17:09 [PATCHv2 0/2] dm-crypt: support relaxed memory alignment Keith Busch
2025-11-24 17:09 ` [PATCHv2 1/3] block: remove stacking default dma_alignment Keith Busch
@ 2025-11-24 17:09 ` Keith Busch
2025-11-24 17:09 ` [PATCHv2 3/3] dm-crypt: dynamic scatterlist for many segments Keith Busch
2 siblings, 0 replies; 8+ messages in thread
From: Keith Busch @ 2025-11-24 17:09 UTC (permalink / raw)
To: dm-devel, linux-block, snitzer, hch, axboe, ebiggers; +Cc: Keith Busch
From: Keith Busch <kbusch@kernel.org>
Many storage devices can handle DMA for data that is not aligned to the
sector block size. The block and filesystem layers have introduced
updates to allow that kind of memory alignment flexibility when
possible.
dm-crypt, however, currently constrains itself to aligned memory because
it sends a single scatterlist element for the input ot the encrypt and
decrypt algorithms. This forces applications that have unaligned data to
copy through a bounce buffer, increasing CPU and memory utilization.
Use multiple scatterlist elements to relax the memory alignment
requirement. To keep this simple, this lower constraint is enabled only
for certain encryption and initialization vector types, specifically the
ones that don't had additional use for the request base scatterlist
elements beyond holding decrypted data.
Signed-off-by: Keith Busch <kbusch@kernel.org>
---
drivers/md/dm-crypt.c | 73 ++++++++++++++++++++++++++++++++-----------
1 file changed, 55 insertions(+), 18 deletions(-)
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 5ef43231fe77f..0b3f5411695ac 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -214,6 +214,7 @@ struct crypt_config {
unsigned int key_extra_size; /* additional keys length */
unsigned int key_mac_size; /* MAC key size for authenc(...) */
+ unsigned int dio_alignment;
unsigned int integrity_tag_size;
unsigned int integrity_iv_size;
unsigned int used_tag_size;
@@ -1424,22 +1425,49 @@ static int crypt_convert_block_aead(struct crypt_config *cc,
return r;
}
+static int crypt_build_sgl(struct crypt_config *cc, struct scatterlist *sg,
+ struct bvec_iter *iter, struct bio *bio,
+ int max_segs)
+{
+ unsigned int bytes = cc->sector_size;
+ struct bvec_iter tmp = *iter;
+ int segs, i = 0;
+
+ bio_advance_iter(bio, &tmp, bytes);
+ segs = tmp.bi_idx - iter->bi_idx + !!tmp.bi_bvec_done;
+ if (segs > max_segs)
+ return -EIO;
+
+ sg_init_table(sg, segs);
+ do {
+ struct bio_vec bv = mp_bvec_iter_bvec(bio->bi_io_vec, *iter);
+ int len = min(bytes, bv.bv_len);
+
+ /* Reject unexpected unaligned bio. */
+ if (unlikely((len | bv.bv_offset) &
+ bdev_dma_alignment(cc->dev->bdev)))
+ return -EIO;
+
+ sg_set_page(&sg[i++], bv.bv_page, len, bv.bv_offset);
+ bio_advance_iter_single(bio, iter, len);
+ bytes -= len;
+ } while (bytes);
+
+ if (WARN_ON_ONCE(i != segs))
+ return -EINVAL;
+ return 0;
+}
+
static int crypt_convert_block_skcipher(struct crypt_config *cc,
struct convert_context *ctx,
struct skcipher_request *req,
unsigned int tag_offset)
{
- struct bio_vec bv_in = bio_iter_iovec(ctx->bio_in, ctx->iter_in);
- struct bio_vec bv_out = bio_iter_iovec(ctx->bio_out, ctx->iter_out);
struct scatterlist *sg_in, *sg_out;
struct dm_crypt_request *dmreq;
u8 *iv, *org_iv, *tag_iv;
__le64 *sector;
- int r = 0;
-
- /* Reject unexpected unaligned bio. */
- if (unlikely(bv_in.bv_len & (cc->sector_size - 1)))
- return -EIO;
+ int r;
dmreq = dmreq_of_req(cc, req);
dmreq->iv_sector = ctx->cc_sector;
@@ -1456,15 +1484,18 @@ static int crypt_convert_block_skcipher(struct crypt_config *cc,
sector = org_sector_of_dmreq(cc, dmreq);
*sector = cpu_to_le64(ctx->cc_sector - cc->iv_offset);
- /* For skcipher we use only the first sg item */
sg_in = &dmreq->sg_in[0];
sg_out = &dmreq->sg_out[0];
- sg_init_table(sg_in, 1);
- sg_set_page(sg_in, bv_in.bv_page, cc->sector_size, bv_in.bv_offset);
+ r = crypt_build_sgl(cc, sg_in, &ctx->iter_in, ctx->bio_in,
+ ARRAY_SIZE(dmreq->sg_in));
+ if (r < 0)
+ return r;
- sg_init_table(sg_out, 1);
- sg_set_page(sg_out, bv_out.bv_page, cc->sector_size, bv_out.bv_offset);
+ r = crypt_build_sgl(cc, sg_out, &ctx->iter_out, ctx->bio_out,
+ ARRAY_SIZE(dmreq->sg_out));
+ if (r < 0)
+ return r;
if (cc->iv_gen_ops) {
/* For READs use IV stored in integrity metadata */
@@ -1495,9 +1526,6 @@ static int crypt_convert_block_skcipher(struct crypt_config *cc,
if (!r && cc->iv_gen_ops && cc->iv_gen_ops->post)
r = cc->iv_gen_ops->post(cc, org_iv, dmreq);
- bio_advance_iter(ctx->bio_in, &ctx->iter_in, cc->sector_size);
- bio_advance_iter(ctx->bio_out, &ctx->iter_out, cc->sector_size);
-
return r;
}
@@ -2829,10 +2857,12 @@ static void crypt_dtr(struct dm_target *ti)
static int crypt_ctr_ivmode(struct dm_target *ti, const char *ivmode)
{
struct crypt_config *cc = ti->private;
+ bool unaligned_allowed = true;
- if (crypt_integrity_aead(cc))
+ if (crypt_integrity_aead(cc)) {
cc->iv_size = crypto_aead_ivsize(any_tfm_aead(cc));
- else
+ unaligned_allowed = false;
+ } else
cc->iv_size = crypto_skcipher_ivsize(any_tfm(cc));
if (cc->iv_size)
@@ -2868,6 +2898,7 @@ static int crypt_ctr_ivmode(struct dm_target *ti, const char *ivmode)
if (cc->key_extra_size > ELEPHANT_MAX_KEY_SIZE)
return -EINVAL;
set_bit(CRYPT_ENCRYPT_PREPROCESS, &cc->cipher_flags);
+ unaligned_allowed = false;
} else if (strcmp(ivmode, "lmk") == 0) {
cc->iv_gen_ops = &crypt_iv_lmk_ops;
/*
@@ -2880,10 +2911,12 @@ static int crypt_ctr_ivmode(struct dm_target *ti, const char *ivmode)
cc->key_parts++;
cc->key_extra_size = cc->key_size / cc->key_parts;
}
+ unaligned_allowed = false;
} else if (strcmp(ivmode, "tcw") == 0) {
cc->iv_gen_ops = &crypt_iv_tcw_ops;
cc->key_parts += 2; /* IV + whitening */
cc->key_extra_size = cc->iv_size + TCW_WHITENING_SIZE;
+ unaligned_allowed = false;
} else if (strcmp(ivmode, "random") == 0) {
cc->iv_gen_ops = &crypt_iv_random_ops;
/* Need storage space in integrity fields. */
@@ -2893,6 +2926,8 @@ static int crypt_ctr_ivmode(struct dm_target *ti, const char *ivmode)
return -EINVAL;
}
+ if (unaligned_allowed)
+ cc->dio_alignment = 3;
return 0;
}
@@ -3286,6 +3321,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
cc->key_size = key_size;
cc->sector_size = (1 << SECTOR_SHIFT);
cc->sector_shift = 0;
+ cc->dio_alignment = 0;
ti->private = cc;
@@ -3750,7 +3786,8 @@ static void crypt_io_hints(struct dm_target *ti, struct queue_limits *limits)
limits->physical_block_size =
max_t(unsigned int, limits->physical_block_size, cc->sector_size);
limits->io_min = max_t(unsigned int, limits->io_min, cc->sector_size);
- limits->dma_alignment = limits->logical_block_size - 1;
+ limits->dma_alignment = min_not_zero(limits->logical_block_size - 1,
+ cc->dio_alignment);
/*
* For zoned dm-crypt targets, there will be no internal splitting of
--
2.47.3
^ permalink raw reply related [flat|nested] 8+ messages in thread* [PATCHv2 3/3] dm-crypt: dynamic scatterlist for many segments
2025-11-24 17:09 [PATCHv2 0/2] dm-crypt: support relaxed memory alignment Keith Busch
2025-11-24 17:09 ` [PATCHv2 1/3] block: remove stacking default dma_alignment Keith Busch
2025-11-24 17:09 ` [PATCHv2 2/3] dm-crypt: allow unaligned bio_vecs for direct io Keith Busch
@ 2025-11-24 17:09 ` Keith Busch
2025-11-28 6:15 ` Dan Carpenter
2 siblings, 1 reply; 8+ messages in thread
From: Keith Busch @ 2025-11-24 17:09 UTC (permalink / raw)
To: dm-devel, linux-block, snitzer, hch, axboe, ebiggers; +Cc: Keith Busch
From: Keith Busch <kbusch@kernel.org>
In the unlikely case where the base bio uses a highly fragmented vector,
the four inline scatterlist elements may not be enough, so allocate a
temporary scatterlist for the cause.
Signed-off-by: Keith Busch <kbusch@kernel.org>
---
drivers/md/dm-crypt.c | 48 +++++++++++++++++++++++++++++++++++--------
1 file changed, 39 insertions(+), 9 deletions(-)
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 0b3f5411695ac..a634881a490ce 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -97,6 +97,8 @@ struct dm_crypt_request {
struct convert_context *ctx;
struct scatterlist sg_in[4];
struct scatterlist sg_out[4];
+ struct scatterlist *__sg_in;
+ struct scatterlist *__sg_out;
u64 iv_sector;
};
@@ -1346,6 +1348,8 @@ static int crypt_convert_block_aead(struct crypt_config *cc,
if (test_bit(CRYPT_IV_LARGE_SECTORS, &cc->cipher_flags))
dmreq->iv_sector >>= cc->sector_shift;
dmreq->ctx = ctx;
+ dmreq->__sg_in = NULL;
+ dmreq->__sg_out = NULL;
*org_tag_of_dmreq(cc, dmreq) = tag_offset;
@@ -1425,18 +1429,22 @@ static int crypt_convert_block_aead(struct crypt_config *cc,
return r;
}
-static int crypt_build_sgl(struct crypt_config *cc, struct scatterlist *sg,
+static int crypt_build_sgl(struct crypt_config *cc, struct scatterlist **psg,
struct bvec_iter *iter, struct bio *bio,
int max_segs)
{
unsigned int bytes = cc->sector_size;
+ struct scatterlist *sg = *psg;
struct bvec_iter tmp = *iter;
int segs, i = 0;
bio_advance_iter(bio, &tmp, bytes);
segs = tmp.bi_idx - iter->bi_idx + !!tmp.bi_bvec_done;
- if (segs > max_segs)
- return -EIO;
+ if (segs > max_segs) {
+ sg = kmalloc_array(segs, sizeof(struct scatterlist), GFP_NOIO);
+ if (!sg)
+ return -ENOMEM;
+ }
sg_init_table(sg, segs);
do {
@@ -1446,7 +1454,7 @@ static int crypt_build_sgl(struct crypt_config *cc, struct scatterlist *sg,
/* Reject unexpected unaligned bio. */
if (unlikely((len | bv.bv_offset) &
bdev_dma_alignment(cc->dev->bdev)))
- return -EIO;
+ goto error;
sg_set_page(&sg[i++], bv.bv_page, len, bv.bv_offset);
bio_advance_iter_single(bio, iter, len);
@@ -1454,8 +1462,13 @@ static int crypt_build_sgl(struct crypt_config *cc, struct scatterlist *sg,
} while (bytes);
if (WARN_ON_ONCE(i != segs))
- return -EINVAL;
+ goto error;
+ *psg = sg;
return 0;
+error:
+ if (sg != *psg)
+ kfree(sg);
+ return -EIO;
}
static int crypt_convert_block_skcipher(struct crypt_config *cc,
@@ -1484,18 +1497,26 @@ static int crypt_convert_block_skcipher(struct crypt_config *cc,
sector = org_sector_of_dmreq(cc, dmreq);
*sector = cpu_to_le64(ctx->cc_sector - cc->iv_offset);
+ dmreq->__sg_in = NULL;
+ dmreq->__sg_out = NULL;
sg_in = &dmreq->sg_in[0];
sg_out = &dmreq->sg_out[0];
- r = crypt_build_sgl(cc, sg_in, &ctx->iter_in, ctx->bio_in,
+ r = crypt_build_sgl(cc, &sg_in, &ctx->iter_in, ctx->bio_in,
ARRAY_SIZE(dmreq->sg_in));
if (r < 0)
return r;
+ else if (sg_in != dmreq->sg_in)
+ dmreq->__sg_in = sg_in;
- r = crypt_build_sgl(cc, sg_out, &ctx->iter_out, ctx->bio_out,
+ r = crypt_build_sgl(cc, &sg_out, &ctx->iter_out, ctx->bio_out,
ARRAY_SIZE(dmreq->sg_out));
- if (r < 0)
+ if (r < 0) {
+ kfree(dmreq->__sg_in);
return r;
+ } else if (sg_out != dmreq->sg_out) {
+ dmreq->__sg_in = sg_out;
+ }
if (cc->iv_gen_ops) {
/* For READs use IV stored in integrity metadata */
@@ -1504,7 +1525,7 @@ static int crypt_convert_block_skcipher(struct crypt_config *cc,
} else {
r = cc->iv_gen_ops->generator(cc, org_iv, dmreq);
if (r < 0)
- return r;
+ goto out;
/* Data can be already preprocessed in generator */
if (test_bit(CRYPT_ENCRYPT_PREPROCESS, &cc->cipher_flags))
sg_in = sg_out;
@@ -1526,6 +1547,13 @@ static int crypt_convert_block_skcipher(struct crypt_config *cc,
if (!r && cc->iv_gen_ops && cc->iv_gen_ops->post)
r = cc->iv_gen_ops->post(cc, org_iv, dmreq);
+out:
+ if (r == -EINPROGRESS && r == -EBUSY) {
+ kfree(dmreq->__sg_in);
+ kfree(dmreq->__sg_out);
+ dmreq->__sg_in = NULL;
+ dmreq->__sg_out = NULL;
+ }
return r;
}
@@ -2301,6 +2329,8 @@ static void kcryptd_async_done(void *data, int error)
} else if (error < 0)
io->error = BLK_STS_IOERR;
+ kfree(dmreq->__sg_in);
+ kfree(dmreq->__sg_out);
crypt_free_req(cc, req_of_dmreq(cc, dmreq), io->base_bio);
if (!atomic_dec_and_test(&ctx->cc_pending))
--
2.47.3
^ permalink raw reply related [flat|nested] 8+ messages in thread