* [RESEND PATCHv3 1/2] dm-crypt: allow unaligned bio_vecs for direct io
@ 2026-03-16 15:02 Keith Busch
2026-03-16 15:09 ` [RESEND PATCHv3 2/2] dm-crypt: dynamic scatterlist for many segments Keith Busch
` (2 more replies)
0 siblings, 3 replies; 16+ messages in thread
From: Keith Busch @ 2026-03-16 15:02 UTC (permalink / raw)
To: dm-devel; +Cc: linux-block, snitzer, Keith Busch
From: Keith Busch <kbusch@kernel.org>
Many storage devices can handle DMA for data that is not aligned to the
sector block size. The block and filesystem layers have introduced
updates to allow that kind of memory alignment flexibility when
possible.
dm-crypt, however, currently constrains itself to aligned memory because
it sends a single scatterlist element for the in/out list to the encrypt
and decrypt algorithms. This forces applications that have unaligned
data to copy through a bounce buffer, increasing CPU and memory
utilization.
Use multiple scatterlist elements to relax the memory alignment
requirement. To keep this simple, this more flexible constraint is
enabled only for certain encryption and initialization vector types,
specifically the ones that don't have additional use for the request
base scatterlist elements beyond holding decrypted data.
Signed-off-by: Keith Busch <kbusch@kernel.org>
---
drivers/md/dm-crypt.c | 79 +++++++++++++++++++++++++++++++++----------
drivers/md/dm-table.c | 1 +
2 files changed, 62 insertions(+), 18 deletions(-)
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 54823341c9fda..bbb4346d0127f 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -150,6 +150,7 @@ enum cipher_flags {
CRYPT_IV_LARGE_SECTORS, /* Calculate IV from sector_size, not 512B sectors */
CRYPT_ENCRYPT_PREPROCESS, /* Must preprocess data for encryption (elephant) */
CRYPT_KEY_MAC_SIZE_SET, /* The integrity_key_size option was used */
+ CRYPT_DISCONTIGUOUS_SEGS, /* Can use partial sector segments */
};
/*
@@ -215,6 +216,7 @@ struct crypt_config {
unsigned int key_extra_size; /* additional keys length */
unsigned int key_mac_size; /* MAC key size for authenc(...) */
+ unsigned int io_alignment;
unsigned int integrity_tag_size;
unsigned int integrity_iv_size;
unsigned int used_tag_size;
@@ -1384,22 +1386,48 @@ static int crypt_convert_block_aead(struct crypt_config *cc,
return r;
}
+static int crypt_build_sgl(struct crypt_config *cc, struct scatterlist *sg,
+ struct bvec_iter *iter, struct bio *bio,
+ int max_segs)
+{
+ unsigned int bytes = cc->sector_size;
+ struct bvec_iter tmp = *iter;
+ int segs, i = 0;
+
+ bio_advance_iter(bio, &tmp, bytes);
+ segs = tmp.bi_idx - iter->bi_idx + !!tmp.bi_bvec_done;
+ if (segs > max_segs)
+ return -EIO;
+
+ sg_init_table(sg, segs);
+ do {
+ struct bio_vec bv = mp_bvec_iter_bvec(bio->bi_io_vec, *iter);
+ int len = min(bytes, bv.bv_len);
+
+ /* Reject unexpected unaligned bio. */
+ if (unlikely((len | bv.bv_offset) & cc->io_alignment))
+ return -EIO;
+
+ sg_set_page(&sg[i++], bv.bv_page, len, bv.bv_offset);
+ bio_advance_iter_single(bio, iter, len);
+ bytes -= len;
+ } while (bytes);
+
+ if (WARN_ON_ONCE(i != segs))
+ return -EIO;
+ return 0;
+}
+
static int crypt_convert_block_skcipher(struct crypt_config *cc,
struct convert_context *ctx,
struct skcipher_request *req,
unsigned int tag_offset)
{
- struct bio_vec bv_in = bio_iter_iovec(ctx->bio_in, ctx->iter_in);
- struct bio_vec bv_out = bio_iter_iovec(ctx->bio_out, ctx->iter_out);
struct scatterlist *sg_in, *sg_out;
struct dm_crypt_request *dmreq;
u8 *iv, *org_iv, *tag_iv;
__le64 *sector;
- int r = 0;
-
- /* Reject unexpected unaligned bio. */
- if (unlikely(bv_in.bv_len & (cc->sector_size - 1)))
- return -EIO;
+ int r;
dmreq = dmreq_of_req(cc, req);
dmreq->iv_sector = ctx->cc_sector;
@@ -1416,15 +1444,18 @@ static int crypt_convert_block_skcipher(struct crypt_config *cc,
sector = org_sector_of_dmreq(cc, dmreq);
*sector = cpu_to_le64(ctx->cc_sector - cc->iv_offset);
- /* For skcipher we use only the first sg item */
sg_in = &dmreq->sg_in[0];
sg_out = &dmreq->sg_out[0];
- sg_init_table(sg_in, 1);
- sg_set_page(sg_in, bv_in.bv_page, cc->sector_size, bv_in.bv_offset);
+ r = crypt_build_sgl(cc, sg_in, &ctx->iter_in, ctx->bio_in,
+ ARRAY_SIZE(dmreq->sg_in));
+ if (r < 0)
+ return r;
- sg_init_table(sg_out, 1);
- sg_set_page(sg_out, bv_out.bv_page, cc->sector_size, bv_out.bv_offset);
+ r = crypt_build_sgl(cc, sg_out, &ctx->iter_out, ctx->bio_out,
+ ARRAY_SIZE(dmreq->sg_out));
+ if (r < 0)
+ return r;
if (cc->iv_gen_ops) {
/* For READs use IV stored in integrity metadata */
@@ -1455,9 +1486,6 @@ static int crypt_convert_block_skcipher(struct crypt_config *cc,
if (!r && cc->iv_gen_ops && cc->iv_gen_ops->post)
r = cc->iv_gen_ops->post(cc, org_iv, dmreq);
- bio_advance_iter(ctx->bio_in, &ctx->iter_in, cc->sector_size);
- bio_advance_iter(ctx->bio_out, &ctx->iter_out, cc->sector_size);
-
return r;
}
@@ -2788,10 +2816,12 @@ static void crypt_dtr(struct dm_target *ti)
static int crypt_ctr_ivmode(struct dm_target *ti, const char *ivmode)
{
struct crypt_config *cc = ti->private;
+ bool unaligned_allowed = true;
- if (crypt_integrity_aead(cc))
+ if (crypt_integrity_aead(cc)) {
cc->iv_size = crypto_aead_ivsize(any_tfm_aead(cc));
- else
+ unaligned_allowed = false;
+ } else
cc->iv_size = crypto_skcipher_ivsize(any_tfm(cc));
if (cc->iv_size)
@@ -2827,6 +2857,7 @@ static int crypt_ctr_ivmode(struct dm_target *ti, const char *ivmode)
if (cc->key_extra_size > ELEPHANT_MAX_KEY_SIZE)
return -EINVAL;
set_bit(CRYPT_ENCRYPT_PREPROCESS, &cc->cipher_flags);
+ unaligned_allowed = false;
} else if (strcmp(ivmode, "lmk") == 0) {
cc->iv_gen_ops = &crypt_iv_lmk_ops;
/*
@@ -2839,10 +2870,12 @@ static int crypt_ctr_ivmode(struct dm_target *ti, const char *ivmode)
cc->key_parts++;
cc->key_extra_size = cc->key_size / cc->key_parts;
}
+ unaligned_allowed = false;
} else if (strcmp(ivmode, "tcw") == 0) {
cc->iv_gen_ops = &crypt_iv_tcw_ops;
cc->key_parts += 2; /* IV + whitening */
cc->key_extra_size = cc->iv_size + TCW_WHITENING_SIZE;
+ unaligned_allowed = false;
} else if (strcmp(ivmode, "random") == 0) {
cc->iv_gen_ops = &crypt_iv_random_ops;
/* Need storage space in integrity fields. */
@@ -2852,6 +2885,12 @@ static int crypt_ctr_ivmode(struct dm_target *ti, const char *ivmode)
return -EINVAL;
}
+ if (!unaligned_allowed) {
+ cc->io_alignment = cc->sector_size - 1;
+ } else {
+ set_bit(CRYPT_DISCONTIGUOUS_SEGS, &cc->cipher_flags);
+ cc->io_alignment = 3;
+ }
return 0;
}
@@ -3722,7 +3761,11 @@ static void crypt_io_hints(struct dm_target *ti, struct queue_limits *limits)
limits->physical_block_size =
max_t(unsigned int, limits->physical_block_size, cc->sector_size);
limits->io_min = max_t(unsigned int, limits->io_min, cc->sector_size);
- limits->dma_alignment = limits->logical_block_size - 1;
+
+ if (test_bit(CRYPT_DISCONTIGUOUS_SEGS, &cc->cipher_flags))
+ limits->dma_alignment = cc->io_alignment;
+ else
+ limits->dma_alignment = limits->logical_block_size - 1;
/*
* For zoned dm-crypt targets, there will be no internal splitting of
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index dc2eff6b739df..aecb19a6913db 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -1767,6 +1767,7 @@ int dm_calculate_queue_limits(struct dm_table *t,
bool zoned = false;
dm_set_stacking_limits(limits);
+ limits->dma_alignment = 0;
t->integrity_supported = true;
for (unsigned int i = 0; i < t->num_targets; i++) {
--
2.52.0
^ permalink raw reply related [flat|nested] 16+ messages in thread* [RESEND PATCHv3 2/2] dm-crypt: dynamic scatterlist for many segments
2026-03-16 15:02 [RESEND PATCHv3 1/2] dm-crypt: allow unaligned bio_vecs for direct io Keith Busch
@ 2026-03-16 15:09 ` Keith Busch
2026-03-18 16:34 ` Mikulas Patocka
2026-03-16 15:09 ` [RESEND PATCHv3 0/2] dm-crypt: support relaxed memory alignment Keith Busch
2026-03-18 16:19 ` [RESEND PATCHv3 1/2] dm-crypt: allow unaligned bio_vecs for direct io Mikulas Patocka
2 siblings, 1 reply; 16+ messages in thread
From: Keith Busch @ 2026-03-16 15:09 UTC (permalink / raw)
To: dm-devel; +Cc: linux-block, snitzer, Keith Busch
From: Keith Busch <kbusch@kernel.org>
In the unlikely case where the base bio uses a highly fragmented vector,
the four inline scatterlist elements may not be enough, so allocate a
temporary scatterlist when needed.
Signed-off-by: Keith Busch <kbusch@kernel.org>
---
drivers/md/dm-crypt.c | 51 +++++++++++++++++++++++++++++++++----------
1 file changed, 40 insertions(+), 11 deletions(-)
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index bbb4346d0127f..0a5bf6df17caf 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -100,6 +100,8 @@ struct dm_crypt_request {
struct scatterlist sg_in[4];
struct scatterlist sg_out[4];
u64 iv_sector;
+ struct scatterlist *__sg_in;
+ struct scatterlist *__sg_out;
};
struct crypt_config;
@@ -1386,18 +1388,32 @@ static int crypt_convert_block_aead(struct crypt_config *cc,
return r;
}
-static int crypt_build_sgl(struct crypt_config *cc, struct scatterlist *sg,
+static void crypt_free_sgls(struct dm_crypt_request *dmreq)
+{
+ if (dmreq->__sg_in != dmreq->sg_in)
+ kfree(dmreq->__sg_in);
+ if (dmreq->__sg_out != dmreq->sg_out)
+ kfree(dmreq->__sg_out);
+ dmreq->__sg_in = NULL;
+ dmreq->__sg_out = NULL;
+}
+
+static int crypt_build_sgl(struct crypt_config *cc, struct scatterlist **psg,
struct bvec_iter *iter, struct bio *bio,
int max_segs)
{
unsigned int bytes = cc->sector_size;
+ struct scatterlist *sg = *psg;
struct bvec_iter tmp = *iter;
int segs, i = 0;
bio_advance_iter(bio, &tmp, bytes);
segs = tmp.bi_idx - iter->bi_idx + !!tmp.bi_bvec_done;
- if (segs > max_segs)
- return -EIO;
+ if (segs > max_segs) {
+ sg = kmalloc_array(segs, sizeof(struct scatterlist), GFP_NOIO);
+ if (!sg)
+ return -ENOMEM;
+ }
sg_init_table(sg, segs);
do {
@@ -1406,7 +1422,7 @@ static int crypt_build_sgl(struct crypt_config *cc, struct scatterlist *sg,
/* Reject unexpected unaligned bio. */
if (unlikely((len | bv.bv_offset) & cc->io_alignment))
- return -EIO;
+ goto error;
sg_set_page(&sg[i++], bv.bv_page, len, bv.bv_offset);
bio_advance_iter_single(bio, iter, len);
@@ -1414,8 +1430,13 @@ static int crypt_build_sgl(struct crypt_config *cc, struct scatterlist *sg,
} while (bytes);
if (WARN_ON_ONCE(i != segs))
- return -EIO;
+ goto error;
+ *psg = sg;
return 0;
+error:
+ if (sg != *psg)
+ kfree(sg);
+ return -EIO;
}
static int crypt_convert_block_skcipher(struct crypt_config *cc,
@@ -1444,18 +1465,21 @@ static int crypt_convert_block_skcipher(struct crypt_config *cc,
sector = org_sector_of_dmreq(cc, dmreq);
*sector = cpu_to_le64(ctx->cc_sector - cc->iv_offset);
- sg_in = &dmreq->sg_in[0];
- sg_out = &dmreq->sg_out[0];
+ dmreq->__sg_in = &dmreq->sg_in[0];
+ dmreq->__sg_out = &dmreq->sg_out[0];
- r = crypt_build_sgl(cc, sg_in, &ctx->iter_in, ctx->bio_in,
+ r = crypt_build_sgl(cc, &dmreq->__sg_in, &ctx->iter_in, ctx->bio_in,
ARRAY_SIZE(dmreq->sg_in));
if (r < 0)
return r;
- r = crypt_build_sgl(cc, sg_out, &ctx->iter_out, ctx->bio_out,
+ r = crypt_build_sgl(cc, &dmreq->__sg_out, &ctx->iter_out, ctx->bio_out,
ARRAY_SIZE(dmreq->sg_out));
if (r < 0)
- return r;
+ goto out;
+
+ sg_in = dmreq->__sg_in;
+ sg_out = dmreq->__sg_out;
if (cc->iv_gen_ops) {
/* For READs use IV stored in integrity metadata */
@@ -1464,7 +1488,7 @@ static int crypt_convert_block_skcipher(struct crypt_config *cc,
} else {
r = cc->iv_gen_ops->generator(cc, org_iv, dmreq);
if (r < 0)
- return r;
+ goto out;
/* Data can be already preprocessed in generator */
if (test_bit(CRYPT_ENCRYPT_PREPROCESS, &cc->cipher_flags))
sg_in = sg_out;
@@ -1485,6 +1509,9 @@ static int crypt_convert_block_skcipher(struct crypt_config *cc,
if (!r && cc->iv_gen_ops && cc->iv_gen_ops->post)
r = cc->iv_gen_ops->post(cc, org_iv, dmreq);
+out:
+ if (r != -EINPROGRESS && r != -EBUSY)
+ crypt_free_sgls(dmreq);
return r;
}
@@ -1550,7 +1577,9 @@ static void crypt_free_req_skcipher(struct crypt_config *cc,
struct skcipher_request *req, struct bio *base_bio)
{
struct dm_crypt_io *io = dm_per_bio_data(base_bio, cc->per_bio_data_size);
+ struct dm_crypt_request *dmreq = dmreq_of_req(cc, req);
+ crypt_free_sgls(dmreq);
if ((struct skcipher_request *)(io + 1) != req)
mempool_free(req, &cc->req_pool);
}
--
2.52.0
^ permalink raw reply related [flat|nested] 16+ messages in thread* Re: [RESEND PATCHv3 2/2] dm-crypt: dynamic scatterlist for many segments
2026-03-16 15:09 ` [RESEND PATCHv3 2/2] dm-crypt: dynamic scatterlist for many segments Keith Busch
@ 2026-03-18 16:34 ` Mikulas Patocka
2026-03-18 17:01 ` Keith Busch
0 siblings, 1 reply; 16+ messages in thread
From: Mikulas Patocka @ 2026-03-18 16:34 UTC (permalink / raw)
To: Keith Busch; +Cc: dm-devel, linux-block, snitzer, Keith Busch
On Mon, 16 Mar 2026, Keith Busch wrote:
> From: Keith Busch <kbusch@kernel.org>
>
> In the unlikely case where the base bio uses a highly fragmented vector,
> the four inline scatterlist elements may not be enough, so allocate a
> temporary scatterlist when needed.
>
> Signed-off-by: Keith Busch <kbusch@kernel.org>
> ---
> +static int crypt_build_sgl(struct crypt_config *cc, struct scatterlist **psg,
> struct bvec_iter *iter, struct bio *bio,
> int max_segs)
> {
> unsigned int bytes = cc->sector_size;
> + struct scatterlist *sg = *psg;
> struct bvec_iter tmp = *iter;
> int segs, i = 0;
>
> bio_advance_iter(bio, &tmp, bytes);
> segs = tmp.bi_idx - iter->bi_idx + !!tmp.bi_bvec_done;
> - if (segs > max_segs)
> - return -EIO;
> + if (segs > max_segs) {
> + sg = kmalloc_array(segs, sizeof(struct scatterlist), GFP_NOIO);
> + if (!sg)
> + return -ENOMEM;
> + }
>
> sg_init_table(sg, segs);
> do {
GFP_NOIO allocations may be unavailable when you are swapping to the
dm-crypt device and the machine runs out of memory temporarily. There
should be:
sg = kmalloc_array(segs, sizeof(struct scatterlist), GFP_NOWAIT | __GFP_NOMEMALLOC);
and if it fails, allocate "sg" from a mempool with GFP_NOIO (mempool_alloc
with GFP_NOIO can't fail, it waits until someone frees some entries into
the mempool).
As there are two consecutive calls to crypt_build_sgl, there should be two
mempools, one for the first call and the second for the second call. The
mempools should be sized for the worst possible fragmentation.
Mikulas
^ permalink raw reply [flat|nested] 16+ messages in thread* Re: [RESEND PATCHv3 2/2] dm-crypt: dynamic scatterlist for many segments
2026-03-18 16:34 ` Mikulas Patocka
@ 2026-03-18 17:01 ` Keith Busch
2026-03-18 17:40 ` Mikulas Patocka
0 siblings, 1 reply; 16+ messages in thread
From: Keith Busch @ 2026-03-18 17:01 UTC (permalink / raw)
To: Mikulas Patocka; +Cc: Keith Busch, dm-devel, linux-block, snitzer
On Wed, Mar 18, 2026 at 05:34:47PM +0100, Mikulas Patocka wrote:
> On Mon, 16 Mar 2026, Keith Busch wrote:
> > +static int crypt_build_sgl(struct crypt_config *cc, struct scatterlist **psg,
> > struct bvec_iter *iter, struct bio *bio,
> > int max_segs)
> > {
> > unsigned int bytes = cc->sector_size;
> > + struct scatterlist *sg = *psg;
> > struct bvec_iter tmp = *iter;
> > int segs, i = 0;
> >
> > bio_advance_iter(bio, &tmp, bytes);
> > segs = tmp.bi_idx - iter->bi_idx + !!tmp.bi_bvec_done;
> > - if (segs > max_segs)
> > - return -EIO;
> > + if (segs > max_segs) {
> > + sg = kmalloc_array(segs, sizeof(struct scatterlist), GFP_NOIO);
> > + if (!sg)
> > + return -ENOMEM;
> > + }
> >
> > sg_init_table(sg, segs);
> > do {
>
> GFP_NOIO allocations may be unavailable when you are swapping to the
> dm-crypt device and the machine runs out of memory temporarily. There
> should be:
>
> sg = kmalloc_array(segs, sizeof(struct scatterlist), GFP_NOWAIT | __GFP_NOMEMALLOC);
>
> and if it fails, allocate "sg" from a mempool with GFP_NOIO (mempool_alloc
> with GFP_NOIO can't fail, it waits until someone frees some entries into
> the mempool).
Thanks for the suggestion, this sounds good. Just to note, the use case
for swap always writes out pages, so it's always aligned and would never
take this path. The use case in mind where this path could happen is
just for zero-copy direct io applications.
But even then, the only thing I know of that really wants this has an
offset that straddles two pages per block, so I never need more than 2
segments, and the inline scatterlist has four. There's just currently no
way for the block layer to report a max-segments-per-block limit, so I'm
including this patch to be consistent with the reportable limits.
^ permalink raw reply [flat|nested] 16+ messages in thread* Re: [RESEND PATCHv3 2/2] dm-crypt: dynamic scatterlist for many segments
2026-03-18 17:01 ` Keith Busch
@ 2026-03-18 17:40 ` Mikulas Patocka
2026-03-18 17:53 ` Keith Busch
0 siblings, 1 reply; 16+ messages in thread
From: Mikulas Patocka @ 2026-03-18 17:40 UTC (permalink / raw)
To: Keith Busch; +Cc: Keith Busch, dm-devel, linux-block, snitzer
On Wed, 18 Mar 2026, Keith Busch wrote:
> On Wed, Mar 18, 2026 at 05:34:47PM +0100, Mikulas Patocka wrote:
> > On Mon, 16 Mar 2026, Keith Busch wrote:
> > > +static int crypt_build_sgl(struct crypt_config *cc, struct scatterlist **psg,
> > > struct bvec_iter *iter, struct bio *bio,
> > > int max_segs)
> > > {
> > > unsigned int bytes = cc->sector_size;
> > > + struct scatterlist *sg = *psg;
> > > struct bvec_iter tmp = *iter;
> > > int segs, i = 0;
> > >
> > > bio_advance_iter(bio, &tmp, bytes);
> > > segs = tmp.bi_idx - iter->bi_idx + !!tmp.bi_bvec_done;
> > > - if (segs > max_segs)
> > > - return -EIO;
> > > + if (segs > max_segs) {
> > > + sg = kmalloc_array(segs, sizeof(struct scatterlist), GFP_NOIO);
> > > + if (!sg)
> > > + return -ENOMEM;
> > > + }
> > >
> > > sg_init_table(sg, segs);
> > > do {
> >
> > GFP_NOIO allocations may be unavailable when you are swapping to the
> > dm-crypt device and the machine runs out of memory temporarily. There
> > should be:
> >
> > sg = kmalloc_array(segs, sizeof(struct scatterlist), GFP_NOWAIT | __GFP_NOMEMALLOC);
> >
> > and if it fails, allocate "sg" from a mempool with GFP_NOIO (mempool_alloc
> > with GFP_NOIO can't fail, it waits until someone frees some entries into
> > the mempool).
>
> Thanks for the suggestion, this sounds good. Just to note, the use case
> for swap always writes out pages, so it's always aligned and would never
> take this path. The use case in mind where this path could happen is
> just for zero-copy direct io applications.
That's true, but I'd rather like to have the code clean and not depend on
some implicit assumptions like "swap never uses split sglists".
> But even then, the only thing I know of that really wants this has an
> offset that straddles two pages per block, so I never need more than 2
> segments, and the inline scatterlist has four. There's just currently no
> way for the block layer to report a max-segments-per-block limit, so I'm
> including this patch to be consistent with the reportable limits.
Can userspace use preadv/pwritev with many small iovecs on a file opened
for direct I/O?
Mikulas
^ permalink raw reply [flat|nested] 16+ messages in thread* Re: [RESEND PATCHv3 2/2] dm-crypt: dynamic scatterlist for many segments
2026-03-18 17:40 ` Mikulas Patocka
@ 2026-03-18 17:53 ` Keith Busch
2026-03-18 18:16 ` Mikulas Patocka
0 siblings, 1 reply; 16+ messages in thread
From: Keith Busch @ 2026-03-18 17:53 UTC (permalink / raw)
To: Mikulas Patocka; +Cc: Keith Busch, dm-devel, linux-block, snitzer
On Wed, Mar 18, 2026 at 06:40:52PM +0100, Mikulas Patocka wrote:
> On Wed, 18 Mar 2026, Keith Busch wrote:
>
> > But even then, the only thing I know of that really wants this has an
> > offset that straddles two pages per block, so I never need more than 2
> > segments, and the inline scatterlist has four. There's just currently no
> > way for the block layer to report a max-segments-per-block limit, so I'm
> > including this patch to be consistent with the reportable limits.
>
> Can userspace use preadv/pwritev with many small iovecs on a file opened
> for direct I/O?
Yes, I have several patches that were accepted last year to block and
iomap that align direct-io memory requirements to the backing device's
dma constraints. You can do dword aligned and sized vectored IO to a
block device today if the block limits say its okay.
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [RESEND PATCHv3 2/2] dm-crypt: dynamic scatterlist for many segments
2026-03-18 17:53 ` Keith Busch
@ 2026-03-18 18:16 ` Mikulas Patocka
2026-03-18 18:32 ` Keith Busch
0 siblings, 1 reply; 16+ messages in thread
From: Mikulas Patocka @ 2026-03-18 18:16 UTC (permalink / raw)
To: Keith Busch; +Cc: Keith Busch, dm-devel, linux-block, snitzer
On Wed, 18 Mar 2026, Keith Busch wrote:
> On Wed, Mar 18, 2026 at 06:40:52PM +0100, Mikulas Patocka wrote:
> > On Wed, 18 Mar 2026, Keith Busch wrote:
> >
> > > But even then, the only thing I know of that really wants this has an
> > > offset that straddles two pages per block, so I never need more than 2
> > > segments, and the inline scatterlist has four. There's just currently no
> > > way for the block layer to report a max-segments-per-block limit, so I'm
> > > including this patch to be consistent with the reportable limits.
> >
> > Can userspace use preadv/pwritev with many small iovecs on a file opened
> > for direct I/O?
>
> Yes, I have several patches that were accepted last year to block and
> iomap that align direct-io memory requirements to the backing device's
> dma constraints. You can do dword aligned and sized vectored IO to a
> block device today if the block limits say its okay.
So, we must support worst-case scenario - "cc->sector_size / 4" segments.
sector_size can be up to 4096, so there may be up to 1024 segments. Each
segment consumes sizeof(struct scatterlist) bytes - that's 32 bytes. So,
we allocate up to 32768 bytes.
kmalloc with size greater than 32768 is unreliable - so we are just on
this limit.
Mikulas
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [RESEND PATCHv3 2/2] dm-crypt: dynamic scatterlist for many segments
2026-03-18 18:16 ` Mikulas Patocka
@ 2026-03-18 18:32 ` Keith Busch
2026-03-18 18:41 ` Mikulas Patocka
0 siblings, 1 reply; 16+ messages in thread
From: Keith Busch @ 2026-03-18 18:32 UTC (permalink / raw)
To: Mikulas Patocka; +Cc: Keith Busch, dm-devel, linux-block, snitzer
On Wed, Mar 18, 2026 at 07:16:15PM +0100, Mikulas Patocka wrote:
> On Wed, 18 Mar 2026, Keith Busch wrote:
> > On Wed, Mar 18, 2026 at 06:40:52PM +0100, Mikulas Patocka wrote:
> > > On Wed, 18 Mar 2026, Keith Busch wrote:
> > >
> > > > But even then, the only thing I know of that really wants this has an
> > > > offset that straddles two pages per block, so I never need more than 2
> > > > segments, and the inline scatterlist has four. There's just currently no
> > > > way for the block layer to report a max-segments-per-block limit, so I'm
> > > > including this patch to be consistent with the reportable limits.
> > >
> > > Can userspace use preadv/pwritev with many small iovecs on a file opened
> > > for direct I/O?
> >
> > Yes, I have several patches that were accepted last year to block and
> > iomap that align direct-io memory requirements to the backing device's
> > dma constraints. You can do dword aligned and sized vectored IO to a
> > block device today if the block limits say its okay.
>
> So, we must support worst-case scenario - "cc->sector_size / 4" segments.
>
> sector_size can be up to 4096, so there may be up to 1024 segments. Each
> segment consumes sizeof(struct scatterlist) bytes - that's 32 bytes. So,
> we allocate up to 32768 bytes.
>
> kmalloc with size greater than 32768 is unreliable - so we are just on
> this limit.
Direct-io hits the BIO_VEC_MAX limit (256) first, so 8k for the
scatterlist (not great, but less terrible). You can have some 4-byte
segments in the user request, but the average segement size would have
to be larger. The important part is that any segment can start at any
memory address dword alignment offset.
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [RESEND PATCHv3 2/2] dm-crypt: dynamic scatterlist for many segments
2026-03-18 18:32 ` Keith Busch
@ 2026-03-18 18:41 ` Mikulas Patocka
0 siblings, 0 replies; 16+ messages in thread
From: Mikulas Patocka @ 2026-03-18 18:41 UTC (permalink / raw)
To: Keith Busch; +Cc: Keith Busch, dm-devel, linux-block, snitzer
On Wed, 18 Mar 2026, Keith Busch wrote:
> On Wed, Mar 18, 2026 at 07:16:15PM +0100, Mikulas Patocka wrote:
> > On Wed, 18 Mar 2026, Keith Busch wrote:
> > > On Wed, Mar 18, 2026 at 06:40:52PM +0100, Mikulas Patocka wrote:
> > > > On Wed, 18 Mar 2026, Keith Busch wrote:
> > > >
> > > > > But even then, the only thing I know of that really wants this has an
> > > > > offset that straddles two pages per block, so I never need more than 2
> > > > > segments, and the inline scatterlist has four. There's just currently no
> > > > > way for the block layer to report a max-segments-per-block limit, so I'm
> > > > > including this patch to be consistent with the reportable limits.
> > > >
> > > > Can userspace use preadv/pwritev with many small iovecs on a file opened
> > > > for direct I/O?
> > >
> > > Yes, I have several patches that were accepted last year to block and
> > > iomap that align direct-io memory requirements to the backing device's
> > > dma constraints. You can do dword aligned and sized vectored IO to a
> > > block device today if the block limits say its okay.
> >
> > So, we must support worst-case scenario - "cc->sector_size / 4" segments.
> >
> > sector_size can be up to 4096, so there may be up to 1024 segments. Each
> > segment consumes sizeof(struct scatterlist) bytes - that's 32 bytes. So,
> > we allocate up to 32768 bytes.
> >
> > kmalloc with size greater than 32768 is unreliable - so we are just on
> > this limit.
>
> Direct-io hits the BIO_VEC_MAX limit (256) first, so 8k for the
> scatterlist (not great, but less terrible). You can have some 4-byte
> segments in the user request, but the average segement size would have
> to be larger. The important part is that any segment can start at any
> memory address dword alignment offset.
Yes, that makes sense. So, you can preallocate two mempools (for the two
sglists) with 8k-sized objects each.
Mikulas
^ permalink raw reply [flat|nested] 16+ messages in thread
* [RESEND PATCHv3 0/2] dm-crypt: support relaxed memory alignment
2026-03-16 15:02 [RESEND PATCHv3 1/2] dm-crypt: allow unaligned bio_vecs for direct io Keith Busch
2026-03-16 15:09 ` [RESEND PATCHv3 2/2] dm-crypt: dynamic scatterlist for many segments Keith Busch
@ 2026-03-16 15:09 ` Keith Busch
2026-03-18 16:19 ` [RESEND PATCHv3 1/2] dm-crypt: allow unaligned bio_vecs for direct io Mikulas Patocka
2 siblings, 0 replies; 16+ messages in thread
From: Keith Busch @ 2026-03-16 15:09 UTC (permalink / raw)
To: dm-devel; +Cc: linux-block, snitzer, Keith Busch
From: Keith Busch <kbusch@kernel.org>
Direct-io can support any memory alignment the hardware allows. Device
mappers don't need to impose any software constraints on memory
alignment, so this series removes one of those limitations from the
dm-crypt mapper.
Keith Busch (2):
dm-crypt: allow unaligned bio_vecs for direct io
dm-crypt: dynamic scatterlist for many segments
drivers/md/dm-crypt.c | 114 ++++++++++++++++++++++++++++++++++--------
drivers/md/dm-table.c | 1 +
2 files changed, 94 insertions(+), 21 deletions(-)
--
2.52.0
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [RESEND PATCHv3 1/2] dm-crypt: allow unaligned bio_vecs for direct io
2026-03-16 15:02 [RESEND PATCHv3 1/2] dm-crypt: allow unaligned bio_vecs for direct io Keith Busch
2026-03-16 15:09 ` [RESEND PATCHv3 2/2] dm-crypt: dynamic scatterlist for many segments Keith Busch
2026-03-16 15:09 ` [RESEND PATCHv3 0/2] dm-crypt: support relaxed memory alignment Keith Busch
@ 2026-03-18 16:19 ` Mikulas Patocka
2026-03-18 17:40 ` Keith Busch
2 siblings, 1 reply; 16+ messages in thread
From: Mikulas Patocka @ 2026-03-18 16:19 UTC (permalink / raw)
To: Keith Busch; +Cc: dm-devel, linux-block, snitzer, Keith Busch
Hi
On Mon, 16 Mar 2026, Keith Busch wrote:
> From: Keith Busch <kbusch@kernel.org>
>
> Many storage devices can handle DMA for data that is not aligned to the
> sector block size. The block and filesystem layers have introduced
> updates to allow that kind of memory alignment flexibility when
> possible.
>
> dm-crypt, however, currently constrains itself to aligned memory because
> it sends a single scatterlist element for the in/out list to the encrypt
> and decrypt algorithms. This forces applications that have unaligned
> data to copy through a bounce buffer, increasing CPU and memory
> utilization.
>
> Use multiple scatterlist elements to relax the memory alignment
> requirement. To keep this simple, this more flexible constraint is
> enabled only for certain encryption and initialization vector types,
> specifically the ones that don't have additional use for the request
> base scatterlist elements beyond holding decrypted data.
>
> Signed-off-by: Keith Busch <kbusch@kernel.org>
>
> + if (!unaligned_allowed) {
> + cc->io_alignment = cc->sector_size - 1;
> + } else {
> + set_bit(CRYPT_DISCONTIGUOUS_SEGS, &cc->cipher_flags);
> + cc->io_alignment = 3;
> + }
> return 0;
> }
Why is "3" there? Should there be the dma_alignment of the underlying
block device instead?
> @@ -3722,7 +3761,11 @@ static void crypt_io_hints(struct dm_target *ti, struct queue_limits *limits)
> limits->physical_block_size =
> max_t(unsigned int, limits->physical_block_size, cc->sector_size);
> limits->io_min = max_t(unsigned int, limits->io_min, cc->sector_size);
> - limits->dma_alignment = limits->logical_block_size - 1;
> +
> + if (test_bit(CRYPT_DISCONTIGUOUS_SEGS, &cc->cipher_flags))
> + limits->dma_alignment = cc->io_alignment;
> + else
> + limits->dma_alignment = limits->logical_block_size - 1;
>
> /*
> * For zoned dm-crypt targets, there will be no internal splitting of
> diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
> index dc2eff6b739df..aecb19a6913db 100644
> --- a/drivers/md/dm-table.c
> +++ b/drivers/md/dm-table.c
> @@ -1767,6 +1767,7 @@ int dm_calculate_queue_limits(struct dm_table *t,
> bool zoned = false;
>
> dm_set_stacking_limits(limits);
> + limits->dma_alignment = 0;
>
> t->integrity_supported = true;
> for (unsigned int i = 0; i < t->num_targets; i++) {
dm-integrity doesn't set dma_alignment if it is using 512-byte sector size
(assuming that there is default 511). This should be fixed in dm-integrity
before making this change.
Other device mapper targets should also be reviewed to make sure that this
change doens't break them.
Mikulas
^ permalink raw reply [flat|nested] 16+ messages in thread* Re: [RESEND PATCHv3 1/2] dm-crypt: allow unaligned bio_vecs for direct io
2026-03-18 16:19 ` [RESEND PATCHv3 1/2] dm-crypt: allow unaligned bio_vecs for direct io Mikulas Patocka
@ 2026-03-18 17:40 ` Keith Busch
2026-03-18 18:06 ` Mikulas Patocka
0 siblings, 1 reply; 16+ messages in thread
From: Keith Busch @ 2026-03-18 17:40 UTC (permalink / raw)
To: Mikulas Patocka; +Cc: Keith Busch, dm-devel, linux-block, snitzer
On Wed, Mar 18, 2026 at 05:19:39PM +0100, Mikulas Patocka wrote:
> On Mon, 16 Mar 2026, Keith Busch wrote:
> > + if (!unaligned_allowed) {
> > + cc->io_alignment = cc->sector_size - 1;
> > + } else {
> > + set_bit(CRYPT_DISCONTIGUOUS_SEGS, &cc->cipher_flags);
> > + cc->io_alignment = 3;
> > + }
> > return 0;
> > }
>
> Why is "3" there? Should there be the dma_alignment of the underlying
> block device instead?
It doesn't matter what the underlying block device reports because
dm-crypt bounces the incoming plain text buffers to something aligned to
the backing device for the cypher text.
Why 3? This can theoretically do 0, but you can't express that as a
block limit; block layer will assume you didn't set anything and use a
511 default, which is exactly what I don't want. So why not 1 instead of
3? Three is the smallest non-zero cra_alignmask currently reported, and
its inefficient to do byte aligned direct io anyway.
> > --- a/drivers/md/dm-table.c
> > +++ b/drivers/md/dm-table.c
> > @@ -1767,6 +1767,7 @@ int dm_calculate_queue_limits(struct dm_table *t,
> > bool zoned = false;
> >
> > dm_set_stacking_limits(limits);
> > + limits->dma_alignment = 0;
> >
> > t->integrity_supported = true;
> > for (unsigned int i = 0; i < t->num_targets; i++) {
>
> dm-integrity doesn't set dma_alignment if it is using 512-byte sector size
> (assuming that there is default 511). This should be fixed in dm-integrity
> before making this change.
>
> Other device mapper targets should also be reviewed to make sure that this
> change doens't break them.
The previous call to dm_set_stacking_limits() sets it to 511, which sets
an unnecessary minimum. Setting it to 0 here means that the stacked
device will inhereit whatever the mapper overrides it to, or go back to
511 if it decides not to override it.
^ permalink raw reply [flat|nested] 16+ messages in thread* Re: [RESEND PATCHv3 1/2] dm-crypt: allow unaligned bio_vecs for direct io
2026-03-18 17:40 ` Keith Busch
@ 2026-03-18 18:06 ` Mikulas Patocka
2026-03-18 18:35 ` Keith Busch
2026-03-25 18:34 ` Keith Busch
0 siblings, 2 replies; 16+ messages in thread
From: Mikulas Patocka @ 2026-03-18 18:06 UTC (permalink / raw)
To: Keith Busch; +Cc: Keith Busch, dm-devel, linux-block, snitzer
On Wed, 18 Mar 2026, Keith Busch wrote:
> On Wed, Mar 18, 2026 at 05:19:39PM +0100, Mikulas Patocka wrote:
> > On Mon, 16 Mar 2026, Keith Busch wrote:
> > > + if (!unaligned_allowed) {
> > > + cc->io_alignment = cc->sector_size - 1;
> > > + } else {
> > > + set_bit(CRYPT_DISCONTIGUOUS_SEGS, &cc->cipher_flags);
> > > + cc->io_alignment = 3;
> > > + }
> > > return 0;
> > > }
> >
> > Why is "3" there? Should there be the dma_alignment of the underlying
> > block device instead?
>
> It doesn't matter what the underlying block device reports because
> dm-crypt bounces the incoming plain text buffers to something aligned to
> the backing device for the cypher text.
dm-crypt bounces writes, but doesn't bounce reads. When doing reads, it
reads the data into the target buffer and decompresses them in place.
So, it must respect the alignment provided by the underlying device.
> Why 3? This can theoretically do 0, but you can't express that as a
> block limit; block layer will assume you didn't set anything and use a
> 511 default, which is exactly what I don't want. So why not 1 instead of
> 3? Three is the smallest non-zero cra_alignmask currently reported, and
> its inefficient to do byte aligned direct io anyway.
>
> > > --- a/drivers/md/dm-table.c
> > > +++ b/drivers/md/dm-table.c
> > > @@ -1767,6 +1767,7 @@ int dm_calculate_queue_limits(struct dm_table *t,
> > > bool zoned = false;
> > >
> > > dm_set_stacking_limits(limits);
> > > + limits->dma_alignment = 0;
> > >
> > > t->integrity_supported = true;
> > > for (unsigned int i = 0; i < t->num_targets; i++) {
> >
> > dm-integrity doesn't set dma_alignment if it is using 512-byte sector size
> > (assuming that there is default 511). This should be fixed in dm-integrity
> > before making this change.
> >
> > Other device mapper targets should also be reviewed to make sure that this
> > change doens't break them.
>
> The previous call to dm_set_stacking_limits() sets it to 511, which sets
> an unnecessary minimum. Setting it to 0 here means that the stacked
> device will inhereit whatever the mapper overrides it to, or go back to
> 511 if it decides not to override it.
If we have dm-integrity with 512-byte sector size on the top of a device
that has dma_alignment less than 512, the code would set
limits->dma_alignment to the value of the underlying block device and
break dm-integrity (which assumes sector-aligned bios).
But that's a problem of dm-integrity - this "if (ic->sectors_per_block >
1)" condition in dm_integrity_io_hints is weird, it should be dropped and
the limits should be properly merged.
Mikulas
^ permalink raw reply [flat|nested] 16+ messages in thread* Re: [RESEND PATCHv3 1/2] dm-crypt: allow unaligned bio_vecs for direct io
2026-03-18 18:06 ` Mikulas Patocka
@ 2026-03-18 18:35 ` Keith Busch
2026-03-25 18:34 ` Keith Busch
1 sibling, 0 replies; 16+ messages in thread
From: Keith Busch @ 2026-03-18 18:35 UTC (permalink / raw)
To: Mikulas Patocka; +Cc: Keith Busch, dm-devel, linux-block, snitzer
On Wed, Mar 18, 2026 at 07:06:40PM +0100, Mikulas Patocka wrote:
> > It doesn't matter what the underlying block device reports because
> > dm-crypt bounces the incoming plain text buffers to something aligned to
> > the backing device for the cypher text.
>
> dm-crypt bounces writes, but doesn't bounce reads. When doing reads, it
> reads the data into the target buffer and decompresses them in place.
Oh, thanks. It's been a while since I developed this, so I'll need a
moment double check for any mistaken assumptions. It may be I just used
backing devices that also had similar alignment constraints and missed
something.
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [RESEND PATCHv3 1/2] dm-crypt: allow unaligned bio_vecs for direct io
2026-03-18 18:06 ` Mikulas Patocka
2026-03-18 18:35 ` Keith Busch
@ 2026-03-25 18:34 ` Keith Busch
1 sibling, 0 replies; 16+ messages in thread
From: Keith Busch @ 2026-03-25 18:34 UTC (permalink / raw)
To: Mikulas Patocka; +Cc: Keith Busch, dm-devel, linux-block, snitzer
On Wed, Mar 18, 2026 at 07:06:40PM +0100, Mikulas Patocka wrote:
>
> If we have dm-integrity with 512-byte sector size on the top of a device
> that has dma_alignment less than 512, the code would set
> limits->dma_alignment to the value of the underlying block device and
> break dm-integrity (which assumes sector-aligned bios).
>
> But that's a problem of dm-integrity - this "if (ic->sectors_per_block >
> 1)" condition in dm_integrity_io_hints is weird, it should be dropped and
> the limits should be properly merged.
Looks like dm-integrity is a bit weird for other reasons, too. If a user
creates a dm-integrity device with block size of 1k or 2k, but the
backing device is 4k logical size, we'll have a mismatch from what the
dm-integrity block device reports compared to what it can actually do.
The pattern needed to get it right is repeated several times, so a
helper is probably warranted. I'll put a series together as a prep and
fix up.
^ permalink raw reply [flat|nested] 16+ messages in thread
* [RESEND PATCHv3 0/2] dm-crypt: support relaxed memory alignment
@ 2026-01-14 15:49 Keith Busch
2026-01-14 15:49 ` [RESEND PATCHv3 1/2] dm-crypt: allow unaligned bio_vecs for direct io Keith Busch
0 siblings, 1 reply; 16+ messages in thread
From: Keith Busch @ 2026-01-14 15:49 UTC (permalink / raw)
To: dm-devel, snitzer, hch, ebiggers; +Cc: linux-block, axboe, Keith Busch
From: Keith Busch <kbusch@kernel.org>
Resending as the previous send was bad timing with the merge window.
Direct-io can support any memory alignment the hardware allows. Device
mappers don't need to impose any software constraints on memory
alignment, so this series removes one of those limitations the dm-crypt
mapper.
Changes from v2:
* Don't change the default stacking limit to allow the relaxed memory
alignment requirements; have the caller do it instead.
* Fixed scatterlist memory leaks when handling the case that can't
use the inline scatterlist.
* Fixed segment boundary check to use the crypt_config rather than the
lower level block device's dma_alignment, which may not be the same
size as the cc->sector_size which was used before, or the newly
enabled 4-byte alignment this patch set allows in certain
circumstances.
Keith Busch (2):
dm-crypt: allow unaligned bio_vecs for direct io
dm-crypt: dynamic scatterlist for many segments
drivers/md/dm-crypt.c | 114 ++++++++++++++++++++++++++++++++++--------
drivers/md/dm-table.c | 1 +
2 files changed, 94 insertions(+), 21 deletions(-)
--
2.47.3
^ permalink raw reply [flat|nested] 16+ messages in thread
* [RESEND PATCHv3 1/2] dm-crypt: allow unaligned bio_vecs for direct io
2026-01-14 15:49 [RESEND PATCHv3 0/2] dm-crypt: support relaxed memory alignment Keith Busch
@ 2026-01-14 15:49 ` Keith Busch
0 siblings, 0 replies; 16+ messages in thread
From: Keith Busch @ 2026-01-14 15:49 UTC (permalink / raw)
To: dm-devel, snitzer, hch, ebiggers; +Cc: linux-block, axboe, Keith Busch
From: Keith Busch <kbusch@kernel.org>
Many storage devices can handle DMA for data that is not aligned to the
sector block size. The block and filesystem layers have introduced
updates to allow that kind of memory alignment flexibility when
possible.
dm-crypt, however, currently constrains itself to aligned memory because
it sends a single scatterlist element for the in/out list to the encrypt
and decrypt algorithms. This forces applications that have unaligned
data to copy through a bounce buffer, increasing CPU and memory
utilization.
Use multiple scatterlist elements to relax the memory alignment
requirement. To keep this simple, this more flexible constraint is
enabled only for certain encryption and initialization vector types,
specifically the ones that don't have additional use for the request
base scatterlist elements beyond holding decrypted data.
Signed-off-by: Keith Busch <kbusch@kernel.org>
---
drivers/md/dm-crypt.c | 79 +++++++++++++++++++++++++++++++++----------
drivers/md/dm-table.c | 1 +
2 files changed, 62 insertions(+), 18 deletions(-)
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 5ef43231fe77f..5304954b4574b 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -149,6 +149,7 @@ enum cipher_flags {
CRYPT_IV_LARGE_SECTORS, /* Calculate IV from sector_size, not 512B sectors */
CRYPT_ENCRYPT_PREPROCESS, /* Must preprocess data for encryption (elephant) */
CRYPT_KEY_MAC_SIZE_SET, /* The integrity_key_size option was used */
+ CRYPT_DISCONTIGUOUS_SEGS, /* Can use partial sector segments */
};
/*
@@ -214,6 +215,7 @@ struct crypt_config {
unsigned int key_extra_size; /* additional keys length */
unsigned int key_mac_size; /* MAC key size for authenc(...) */
+ unsigned int io_alignment;
unsigned int integrity_tag_size;
unsigned int integrity_iv_size;
unsigned int used_tag_size;
@@ -1424,22 +1426,48 @@ static int crypt_convert_block_aead(struct crypt_config *cc,
return r;
}
+static int crypt_build_sgl(struct crypt_config *cc, struct scatterlist *sg,
+ struct bvec_iter *iter, struct bio *bio,
+ int max_segs)
+{
+ unsigned int bytes = cc->sector_size;
+ struct bvec_iter tmp = *iter;
+ int segs, i = 0;
+
+ bio_advance_iter(bio, &tmp, bytes);
+ segs = tmp.bi_idx - iter->bi_idx + !!tmp.bi_bvec_done;
+ if (segs > max_segs)
+ return -EIO;
+
+ sg_init_table(sg, segs);
+ do {
+ struct bio_vec bv = mp_bvec_iter_bvec(bio->bi_io_vec, *iter);
+ int len = min(bytes, bv.bv_len);
+
+ /* Reject unexpected unaligned bio. */
+ if (unlikely((len | bv.bv_offset) & cc->io_alignment))
+ return -EIO;
+
+ sg_set_page(&sg[i++], bv.bv_page, len, bv.bv_offset);
+ bio_advance_iter_single(bio, iter, len);
+ bytes -= len;
+ } while (bytes);
+
+ if (WARN_ON_ONCE(i != segs))
+ return -EIO;
+ return 0;
+}
+
static int crypt_convert_block_skcipher(struct crypt_config *cc,
struct convert_context *ctx,
struct skcipher_request *req,
unsigned int tag_offset)
{
- struct bio_vec bv_in = bio_iter_iovec(ctx->bio_in, ctx->iter_in);
- struct bio_vec bv_out = bio_iter_iovec(ctx->bio_out, ctx->iter_out);
struct scatterlist *sg_in, *sg_out;
struct dm_crypt_request *dmreq;
u8 *iv, *org_iv, *tag_iv;
__le64 *sector;
- int r = 0;
-
- /* Reject unexpected unaligned bio. */
- if (unlikely(bv_in.bv_len & (cc->sector_size - 1)))
- return -EIO;
+ int r;
dmreq = dmreq_of_req(cc, req);
dmreq->iv_sector = ctx->cc_sector;
@@ -1456,15 +1484,18 @@ static int crypt_convert_block_skcipher(struct crypt_config *cc,
sector = org_sector_of_dmreq(cc, dmreq);
*sector = cpu_to_le64(ctx->cc_sector - cc->iv_offset);
- /* For skcipher we use only the first sg item */
sg_in = &dmreq->sg_in[0];
sg_out = &dmreq->sg_out[0];
- sg_init_table(sg_in, 1);
- sg_set_page(sg_in, bv_in.bv_page, cc->sector_size, bv_in.bv_offset);
+ r = crypt_build_sgl(cc, sg_in, &ctx->iter_in, ctx->bio_in,
+ ARRAY_SIZE(dmreq->sg_in));
+ if (r < 0)
+ return r;
- sg_init_table(sg_out, 1);
- sg_set_page(sg_out, bv_out.bv_page, cc->sector_size, bv_out.bv_offset);
+ r = crypt_build_sgl(cc, sg_out, &ctx->iter_out, ctx->bio_out,
+ ARRAY_SIZE(dmreq->sg_out));
+ if (r < 0)
+ return r;
if (cc->iv_gen_ops) {
/* For READs use IV stored in integrity metadata */
@@ -1495,9 +1526,6 @@ static int crypt_convert_block_skcipher(struct crypt_config *cc,
if (!r && cc->iv_gen_ops && cc->iv_gen_ops->post)
r = cc->iv_gen_ops->post(cc, org_iv, dmreq);
- bio_advance_iter(ctx->bio_in, &ctx->iter_in, cc->sector_size);
- bio_advance_iter(ctx->bio_out, &ctx->iter_out, cc->sector_size);
-
return r;
}
@@ -2829,10 +2857,12 @@ static void crypt_dtr(struct dm_target *ti)
static int crypt_ctr_ivmode(struct dm_target *ti, const char *ivmode)
{
struct crypt_config *cc = ti->private;
+ bool unaligned_allowed = true;
- if (crypt_integrity_aead(cc))
+ if (crypt_integrity_aead(cc)) {
cc->iv_size = crypto_aead_ivsize(any_tfm_aead(cc));
- else
+ unaligned_allowed = false;
+ } else
cc->iv_size = crypto_skcipher_ivsize(any_tfm(cc));
if (cc->iv_size)
@@ -2868,6 +2898,7 @@ static int crypt_ctr_ivmode(struct dm_target *ti, const char *ivmode)
if (cc->key_extra_size > ELEPHANT_MAX_KEY_SIZE)
return -EINVAL;
set_bit(CRYPT_ENCRYPT_PREPROCESS, &cc->cipher_flags);
+ unaligned_allowed = false;
} else if (strcmp(ivmode, "lmk") == 0) {
cc->iv_gen_ops = &crypt_iv_lmk_ops;
/*
@@ -2880,10 +2911,12 @@ static int crypt_ctr_ivmode(struct dm_target *ti, const char *ivmode)
cc->key_parts++;
cc->key_extra_size = cc->key_size / cc->key_parts;
}
+ unaligned_allowed = false;
} else if (strcmp(ivmode, "tcw") == 0) {
cc->iv_gen_ops = &crypt_iv_tcw_ops;
cc->key_parts += 2; /* IV + whitening */
cc->key_extra_size = cc->iv_size + TCW_WHITENING_SIZE;
+ unaligned_allowed = false;
} else if (strcmp(ivmode, "random") == 0) {
cc->iv_gen_ops = &crypt_iv_random_ops;
/* Need storage space in integrity fields. */
@@ -2893,6 +2926,12 @@ static int crypt_ctr_ivmode(struct dm_target *ti, const char *ivmode)
return -EINVAL;
}
+ if (!unaligned_allowed) {
+ cc->io_alignment = cc->sector_size - 1;
+ } else {
+ set_bit(CRYPT_DISCONTIGUOUS_SEGS, &cc->cipher_flags);
+ cc->io_alignment = 3;
+ }
return 0;
}
@@ -3750,7 +3789,11 @@ static void crypt_io_hints(struct dm_target *ti, struct queue_limits *limits)
limits->physical_block_size =
max_t(unsigned int, limits->physical_block_size, cc->sector_size);
limits->io_min = max_t(unsigned int, limits->io_min, cc->sector_size);
- limits->dma_alignment = limits->logical_block_size - 1;
+
+ if (test_bit(CRYPT_DISCONTIGUOUS_SEGS, &cc->cipher_flags))
+ limits->dma_alignment = cc->io_alignment;
+ else
+ limits->dma_alignment = limits->logical_block_size - 1;
/*
* For zoned dm-crypt targets, there will be no internal splitting of
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index ad0a60a07b935..da0d090675103 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -1789,6 +1789,7 @@ int dm_calculate_queue_limits(struct dm_table *t,
bool zoned = false;
dm_set_stacking_limits(limits);
+ limits->dma_alignment = 0;
t->integrity_supported = true;
for (unsigned int i = 0; i < t->num_targets; i++) {
--
2.47.3
^ permalink raw reply related [flat|nested] 16+ messages in thread
end of thread, other threads:[~2026-03-25 18:34 UTC | newest]
Thread overview: 16+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-03-16 15:02 [RESEND PATCHv3 1/2] dm-crypt: allow unaligned bio_vecs for direct io Keith Busch
2026-03-16 15:09 ` [RESEND PATCHv3 2/2] dm-crypt: dynamic scatterlist for many segments Keith Busch
2026-03-18 16:34 ` Mikulas Patocka
2026-03-18 17:01 ` Keith Busch
2026-03-18 17:40 ` Mikulas Patocka
2026-03-18 17:53 ` Keith Busch
2026-03-18 18:16 ` Mikulas Patocka
2026-03-18 18:32 ` Keith Busch
2026-03-18 18:41 ` Mikulas Patocka
2026-03-16 15:09 ` [RESEND PATCHv3 0/2] dm-crypt: support relaxed memory alignment Keith Busch
2026-03-18 16:19 ` [RESEND PATCHv3 1/2] dm-crypt: allow unaligned bio_vecs for direct io Mikulas Patocka
2026-03-18 17:40 ` Keith Busch
2026-03-18 18:06 ` Mikulas Patocka
2026-03-18 18:35 ` Keith Busch
2026-03-25 18:34 ` Keith Busch
-- strict thread matches above, loose matches on Subject: below --
2026-01-14 15:49 [RESEND PATCHv3 0/2] dm-crypt: support relaxed memory alignment Keith Busch
2026-01-14 15:49 ` [RESEND PATCHv3 1/2] dm-crypt: allow unaligned bio_vecs for direct io Keith Busch
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox