* [PATCH 0/9] dm-crypt patches
@ 2014-04-05 18:04 Mikulas Patocka
2014-04-05 18:04 ` [PATCH 1/9] dm-crypt: remove per-cpu structure Mikulas Patocka
` (2 more replies)
0 siblings, 3 replies; 15+ messages in thread
From: Mikulas Patocka @ 2014-04-05 18:04 UTC (permalink / raw)
To: Mike Snitzer, Ondrej Kozina; +Cc: dm-devel
Hi
I fixed the bug that Ondrej found (it happened only under memory pressure,
that's why I couldn't reproduce it), here I'm resending the dm-crypt
patches.
Mikulas
^ permalink raw reply [flat|nested] 15+ messages in thread
* [PATCH 1/9] dm-crypt: remove per-cpu structure
2014-04-05 18:04 [PATCH 0/9] dm-crypt patches Mikulas Patocka
@ 2014-04-05 18:04 ` Mikulas Patocka
2014-04-05 18:05 ` [PATCH 2/9] bio: use kmalloc alignment for bio slab Mikulas Patocka
2014-04-07 1:01 ` [PATCH 0/9] dm-crypt patches Mike Snitzer
2014-04-08 15:25 ` Ondrej Kozina
2 siblings, 1 reply; 15+ messages in thread
From: Mikulas Patocka @ 2014-04-05 18:04 UTC (permalink / raw)
To: Mike Snitzer, Ondrej Kozina; +Cc: dm-devel
Dm-crypt used per-cpu structures to hold pointers to ablkcipher_request.
The code assumed that the work item keeps executing on a single CPU, so it
used no synchronization when accessing this structure.
When we disable a CPU by writing zero to
/sys/devices/system/cpu/cpu*/online, the work item could be moved to
another CPU. This causes crashes in dm-crypt because the code starts using
a wrong ablkcipher_request.
This patch fixes this bug by removing the percpu definition. The structure
ablkcipher_request is accessed via a pointer from convert_context.
Consequently, if the work item is rescheduled to a different CPU, the
thread still uses the same ablkcipher_request.
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Cc: stable@vger.kernel.org
---
drivers/md/dm-crypt.c | 61 +++++++++-----------------------------------------
1 file changed, 12 insertions(+), 49 deletions(-)
Index: linux-3.14-rc1/drivers/md/dm-crypt.c
===================================================================
--- linux-3.14-rc1.orig/drivers/md/dm-crypt.c 2014-02-03 19:18:23.000000000 +0100
+++ linux-3.14-rc1/drivers/md/dm-crypt.c 2014-02-03 19:21:35.000000000 +0100
@@ -19,7 +19,6 @@
#include <linux/crypto.h>
#include <linux/workqueue.h>
#include <linux/backing-dev.h>
-#include <linux/percpu.h>
#include <linux/atomic.h>
#include <linux/scatterlist.h>
#include <asm/page.h>
@@ -43,6 +42,7 @@ struct convert_context {
struct bvec_iter iter_out;
sector_t cc_sector;
atomic_t cc_pending;
+ struct ablkcipher_request *req;
};
/*
@@ -111,15 +111,7 @@ struct iv_tcw_private {
enum flags { DM_CRYPT_SUSPENDED, DM_CRYPT_KEY_VALID };
/*
- * Duplicated per-CPU state for cipher.
- */
-struct crypt_cpu {
- struct ablkcipher_request *req;
-};
-
-/*
- * The fields in here must be read only after initialization,
- * changing state should be in crypt_cpu.
+ * The fields in here must be read only after initialization.
*/
struct crypt_config {
struct dm_dev *dev;
@@ -150,12 +142,6 @@ struct crypt_config {
sector_t iv_offset;
unsigned int iv_size;
- /*
- * Duplicated per cpu state. Access through
- * per_cpu_ptr() only.
- */
- struct crypt_cpu __percpu *cpu;
-
/* ESSIV: struct crypto_cipher *essiv_tfm */
void *iv_private;
struct crypto_ablkcipher **tfms;
@@ -192,11 +178,6 @@ static void clone_init(struct dm_crypt_i
static void kcryptd_queue_crypt(struct dm_crypt_io *io);
static u8 *iv_of_dmreq(struct crypt_config *cc, struct dm_crypt_request *dmreq);
-static struct crypt_cpu *this_crypt_config(struct crypt_config *cc)
-{
- return this_cpu_ptr(cc->cpu);
-}
-
/*
* Use this to access cipher attributes that are the same for each CPU.
*/
@@ -903,16 +884,15 @@ static void kcryptd_async_done(struct cr
static void crypt_alloc_req(struct crypt_config *cc,
struct convert_context *ctx)
{
- struct crypt_cpu *this_cc = this_crypt_config(cc);
unsigned key_index = ctx->cc_sector & (cc->tfms_count - 1);
- if (!this_cc->req)
- this_cc->req = mempool_alloc(cc->req_pool, GFP_NOIO);
+ if (!ctx->req)
+ ctx->req = mempool_alloc(cc->req_pool, GFP_NOIO);
- ablkcipher_request_set_tfm(this_cc->req, cc->tfms[key_index]);
- ablkcipher_request_set_callback(this_cc->req,
+ ablkcipher_request_set_tfm(ctx->req, cc->tfms[key_index]);
+ ablkcipher_request_set_callback(ctx->req,
CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
- kcryptd_async_done, dmreq_of_req(cc, this_cc->req));
+ kcryptd_async_done, dmreq_of_req(cc, ctx->req));
}
/*
@@ -921,7 +901,6 @@ static void crypt_alloc_req(struct crypt
static int crypt_convert(struct crypt_config *cc,
struct convert_context *ctx)
{
- struct crypt_cpu *this_cc = this_crypt_config(cc);
int r;
atomic_set(&ctx->cc_pending, 1);
@@ -932,7 +911,7 @@ static int crypt_convert(struct crypt_co
atomic_inc(&ctx->cc_pending);
- r = crypt_convert_block(cc, ctx, this_cc->req);
+ r = crypt_convert_block(cc, ctx, ctx->req);
switch (r) {
/* async */
@@ -941,7 +920,7 @@ static int crypt_convert(struct crypt_co
reinit_completion(&ctx->restart);
/* fall through*/
case -EINPROGRESS:
- this_cc->req = NULL;
+ ctx->req = NULL;
ctx->cc_sector++;
continue;
@@ -1040,6 +1019,7 @@ static struct dm_crypt_io *crypt_io_allo
io->sector = sector;
io->error = 0;
io->base_io = NULL;
+ io->ctx.req = NULL;
atomic_set(&io->io_pending, 0);
return io;
@@ -1065,6 +1045,8 @@ static void crypt_dec_pending(struct dm_
if (!atomic_dec_and_test(&io->io_pending))
return;
+ if (io->ctx.req)
+ mempool_free(io->ctx.req, cc->req_pool);
mempool_free(io, cc->io_pool);
if (likely(!base_io))
@@ -1492,8 +1474,6 @@ static int crypt_wipe_key(struct crypt_c
static void crypt_dtr(struct dm_target *ti)
{
struct crypt_config *cc = ti->private;
- struct crypt_cpu *cpu_cc;
- int cpu;
ti->private = NULL;
@@ -1505,13 +1485,6 @@ static void crypt_dtr(struct dm_target *
if (cc->crypt_queue)
destroy_workqueue(cc->crypt_queue);
- if (cc->cpu)
- for_each_possible_cpu(cpu) {
- cpu_cc = per_cpu_ptr(cc->cpu, cpu);
- if (cpu_cc->req)
- mempool_free(cpu_cc->req, cc->req_pool);
- }
-
crypt_free_tfms(cc);
if (cc->bs)
@@ -1530,9 +1503,6 @@ static void crypt_dtr(struct dm_target *
if (cc->dev)
dm_put_device(ti, cc->dev);
- if (cc->cpu)
- free_percpu(cc->cpu);
-
kzfree(cc->cipher);
kzfree(cc->cipher_string);
@@ -1588,13 +1558,6 @@ static int crypt_ctr_cipher(struct dm_ta
if (tmp)
DMWARN("Ignoring unexpected additional cipher options");
- cc->cpu = __alloc_percpu(sizeof(*(cc->cpu)),
- __alignof__(struct crypt_cpu));
- if (!cc->cpu) {
- ti->error = "Cannot allocate per cpu state";
- goto bad_mem;
- }
-
/*
* For compatibility with the original dm-crypt mapping format, if
* only the cipher name is supplied, use cbc-plain.
^ permalink raw reply [flat|nested] 15+ messages in thread
* [PATCH 2/9] bio: use kmalloc alignment for bio slab
2014-04-05 18:04 ` [PATCH 1/9] dm-crypt: remove per-cpu structure Mikulas Patocka
@ 2014-04-05 18:05 ` Mikulas Patocka
2014-04-05 18:05 ` [PATCH 3/9] dm-crypt: use per-bio data Mikulas Patocka
0 siblings, 1 reply; 15+ messages in thread
From: Mikulas Patocka @ 2014-04-05 18:05 UTC (permalink / raw)
To: Mike Snitzer, Ondrej Kozina; +Cc: dm-devel
Various subsystems can ask the bio subsystem to create a bio slab cache
with some free space before the bio. This free space can be used for any
purpose. Device mapper uses this feature to place some target-specific and
device-mapper specific data before the bio, so that the target-specific
data doesn't have to be allocated separatedly.
This mechanism is used in place of kmalloc, so we need that the allocated
slab have the same memory alignment as memory allocated with kmalloc.
This patch changes the function bio_find_or_create_slab so that it uses
ARCH_KMALLOC_MINALIGN alignment when creating the slab cache. This patch
is needed so that dm-crypt can use per-bio data for encryption - the
crypto subsystem assumes that these data have the same alignment as
kmallocated memory.
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
---
fs/bio.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
Index: linux-3.14-rc3/fs/bio.c
===================================================================
--- linux-3.14-rc3.orig/fs/bio.c 2014-02-23 23:53:50.000000000 +0100
+++ linux-3.14-rc3/fs/bio.c 2014-02-23 23:55:00.000000000 +0100
@@ -112,7 +112,8 @@ static struct kmem_cache *bio_find_or_cr
bslab = &bio_slabs[entry];
snprintf(bslab->name, sizeof(bslab->name), "bio-%d", entry);
- slab = kmem_cache_create(bslab->name, sz, 0, SLAB_HWCACHE_ALIGN, NULL);
+ slab = kmem_cache_create(bslab->name, sz, ARCH_KMALLOC_MINALIGN,
+ SLAB_HWCACHE_ALIGN, NULL);
if (!slab)
goto out_unlock;
^ permalink raw reply [flat|nested] 15+ messages in thread
* [PATCH 3/9] dm-crypt: use per-bio data
2014-04-05 18:05 ` [PATCH 2/9] bio: use kmalloc alignment for bio slab Mikulas Patocka
@ 2014-04-05 18:05 ` Mikulas Patocka
2014-04-05 18:06 ` [PATCH 4/9] dm-crypt: use unbound workqueue for request processing Mikulas Patocka
0 siblings, 1 reply; 15+ messages in thread
From: Mikulas Patocka @ 2014-04-05 18:05 UTC (permalink / raw)
To: Mike Snitzer, Ondrej Kozina; +Cc: dm-devel
This patch changes dm-crypt so that it uses auxiliary data allocated with
the bio.
Dm-crypt requires two allocations per request - struct dm_crypt_io and
struct ablkcipher_request (with other data appended to it). It used
mempool for the allocation.
Some requests may require more dm_crypt_ios and ablkcipher_requests,
however most requests need just one of each of these two structures to
complete.
This patch changes it so that the first dm_crypt_io and ablkcipher_request
and allocated with the bio (using target per_bio_data_size option). If the
request needs additional values, they are allocated from the mempool.
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
---
drivers/md/dm-crypt.c | 40 ++++++++++++++++++++++++++--------------
1 file changed, 26 insertions(+), 14 deletions(-)
Index: linux-3.14-rc4/drivers/md/dm-crypt.c
===================================================================
--- linux-3.14-rc4.orig/drivers/md/dm-crypt.c 2014-02-27 17:48:31.000000000 +0100
+++ linux-3.14-rc4/drivers/md/dm-crypt.c 2014-02-27 17:48:31.000000000 +0100
@@ -59,7 +59,7 @@ struct dm_crypt_io {
int error;
sector_t sector;
struct dm_crypt_io *base_io;
-};
+} CRYPTO_MINALIGN_ATTR;
struct dm_crypt_request {
struct convert_context *ctx;
@@ -162,6 +162,8 @@ struct crypt_config {
*/
unsigned int dmreq_start;
+ unsigned int per_bio_data_size;
+
unsigned long flags;
unsigned int key_size;
unsigned int key_parts; /* independent parts in key buffer */
@@ -895,6 +897,14 @@ static void crypt_alloc_req(struct crypt
kcryptd_async_done, dmreq_of_req(cc, ctx->req));
}
+static void crypt_free_req(struct crypt_config *cc,
+ struct ablkcipher_request *req, struct bio *base_bio)
+{
+ struct dm_crypt_io *io = dm_per_bio_data(base_bio, cc->per_bio_data_size);
+ if ((struct ablkcipher_request *)(io + 1) != req)
+ mempool_free(req, cc->req_pool);
+}
+
/*
* Encrypt / decrypt data from one bio to another one (can be the same one)
*/
@@ -1008,12 +1018,9 @@ static void crypt_free_buffer_pages(stru
}
}
-static struct dm_crypt_io *crypt_io_alloc(struct crypt_config *cc,
- struct bio *bio, sector_t sector)
+static void crypt_io_init(struct dm_crypt_io *io, struct crypt_config *cc,
+ struct bio *bio, sector_t sector)
{
- struct dm_crypt_io *io;
-
- io = mempool_alloc(cc->io_pool, GFP_NOIO);
io->cc = cc;
io->base_bio = bio;
io->sector = sector;
@@ -1021,8 +1028,6 @@ static struct dm_crypt_io *crypt_io_allo
io->base_io = NULL;
io->ctx.req = NULL;
atomic_set(&io->io_pending, 0);
-
- return io;
}
static void crypt_inc_pending(struct dm_crypt_io *io)
@@ -1046,8 +1051,9 @@ static void crypt_dec_pending(struct dm_
return;
if (io->ctx.req)
- mempool_free(io->ctx.req, cc->req_pool);
- mempool_free(io, cc->io_pool);
+ crypt_free_req(cc, io->ctx.req, base_bio);
+ if (io != dm_per_bio_data(base_bio, cc->per_bio_data_size))
+ mempool_free(io, cc->io_pool);
if (likely(!base_io))
bio_endio(base_bio, error);
@@ -1255,8 +1261,8 @@ static void kcryptd_crypt_write_convert(
* between fragments, so switch to a new dm_crypt_io structure.
*/
if (unlikely(!crypt_finished && remaining)) {
- new_io = crypt_io_alloc(io->cc, io->base_bio,
- sector);
+ new_io = mempool_alloc(cc->io_pool, GFP_NOIO);
+ crypt_io_init(new_io, io->cc, io->base_bio, sector);
crypt_inc_pending(new_io);
crypt_convert_init(cc, &new_io->ctx, NULL,
io->base_bio, sector);
@@ -1325,7 +1331,7 @@ static void kcryptd_async_done(struct cr
if (error < 0)
io->error = -EIO;
- mempool_free(req_of_dmreq(cc, dmreq), cc->req_pool);
+ crypt_free_req(cc, req_of_dmreq(cc, dmreq), io->base_bio);
if (!atomic_dec_and_test(&ctx->cc_pending))
return;
@@ -1728,6 +1734,10 @@ static int crypt_ctr(struct dm_target *t
goto bad;
}
+ cc->per_bio_data_size = ti->per_bio_data_size =
+ sizeof(struct dm_crypt_io) + cc->dmreq_start +
+ sizeof(struct dm_crypt_request) + cc->iv_size;
+
cc->page_pool = mempool_create_page_pool(MIN_POOL_PAGES, 0);
if (!cc->page_pool) {
ti->error = "Cannot allocate page mempool";
@@ -1824,7 +1834,9 @@ static int crypt_map(struct dm_target *t
return DM_MAPIO_REMAPPED;
}
- io = crypt_io_alloc(cc, bio, dm_target_offset(ti, bio->bi_iter.bi_sector));
+ io = dm_per_bio_data(bio, cc->per_bio_data_size);
+ crypt_io_init(io, cc, bio, dm_target_offset(ti, bio->bi_iter.bi_sector));
+ io->ctx.req = (struct ablkcipher_request *)(io + 1);
if (bio_data_dir(io->base_bio) == READ) {
if (kcryptd_io_read(io, GFP_NOWAIT))
^ permalink raw reply [flat|nested] 15+ messages in thread
* [PATCH 4/9] dm-crypt: use unbound workqueue for request processing
2014-04-05 18:05 ` [PATCH 3/9] dm-crypt: use per-bio data Mikulas Patocka
@ 2014-04-05 18:06 ` Mikulas Patocka
2014-04-05 18:07 ` [PATCH 5/9] dm-crypt: don't allocate pages for a partial request Mikulas Patocka
0 siblings, 1 reply; 15+ messages in thread
From: Mikulas Patocka @ 2014-04-05 18:06 UTC (permalink / raw)
To: Mike Snitzer, Ondrej Kozina; +Cc: dm-devel
Use unbound workqueue so that work is automatically ballanced between
available CPUs.
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
---
drivers/md/dm-crypt.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
Index: linux-3.14-rc8/drivers/md/dm-crypt.c
===================================================================
--- linux-3.14-rc8.orig/drivers/md/dm-crypt.c 2014-03-25 22:57:00.000000000 +0100
+++ linux-3.14-rc8/drivers/md/dm-crypt.c 2014-03-28 17:09:14.000000000 +0100
@@ -1800,7 +1800,7 @@ static int crypt_ctr(struct dm_target *t
}
cc->crypt_queue = alloc_workqueue("kcryptd",
- WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM, 1);
+ WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM | WQ_UNBOUND, num_online_cpus());
if (!cc->crypt_queue) {
ti->error = "Couldn't create kcryptd queue";
goto bad;
^ permalink raw reply [flat|nested] 15+ messages in thread
* [PATCH 5/9] dm-crypt: don't allocate pages for a partial request
2014-04-05 18:06 ` [PATCH 4/9] dm-crypt: use unbound workqueue for request processing Mikulas Patocka
@ 2014-04-05 18:07 ` Mikulas Patocka
2014-04-05 18:07 ` [PATCH 6/9] dm-crypt: avoid deadlock in mempools Mikulas Patocka
0 siblings, 1 reply; 15+ messages in thread
From: Mikulas Patocka @ 2014-04-05 18:07 UTC (permalink / raw)
To: Mike Snitzer, Ondrej Kozina; +Cc: dm-devel
This patch changes crypt_alloc_buffer so that it always allocates pages for
a full request.
This change enables further simplification and removing of one refcounts
in the next patches.
Note: the next patch is needed to fix a theoretical deadlock
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
---
drivers/md/dm-crypt.c | 139 ++++++++++----------------------------------------
1 file changed, 30 insertions(+), 109 deletions(-)
Index: linux-3.14/drivers/md/dm-crypt.c
===================================================================
--- linux-3.14.orig/drivers/md/dm-crypt.c 2014-04-04 20:48:50.000000000 +0200
+++ linux-3.14/drivers/md/dm-crypt.c 2014-04-04 20:57:36.000000000 +0200
@@ -58,7 +58,6 @@ struct dm_crypt_io {
atomic_t io_pending;
int error;
sector_t sector;
- struct dm_crypt_io *base_io;
} CRYPTO_MINALIGN_ATTR;
struct dm_crypt_request {
@@ -172,7 +171,6 @@ struct crypt_config {
};
#define MIN_IOS 16
-#define MIN_POOL_PAGES 32
static struct kmem_cache *_crypt_io_pool;
@@ -951,14 +949,13 @@ static int crypt_convert(struct crypt_co
return 0;
}
+static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone);
+
/*
* Generate a new unfragmented bio with the given size
* This should never violate the device limitations
- * May return a smaller bio when running out of pages, indicated by
- * *out_of_pages set to 1.
*/
-static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size,
- unsigned *out_of_pages)
+static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size)
{
struct crypt_config *cc = io->cc;
struct bio *clone;
@@ -966,41 +963,27 @@ static struct bio *crypt_alloc_buffer(st
gfp_t gfp_mask = GFP_NOIO | __GFP_HIGHMEM;
unsigned i, len;
struct page *page;
+ struct bio_vec *bvec;
clone = bio_alloc_bioset(GFP_NOIO, nr_iovecs, cc->bs);
if (!clone)
return NULL;
clone_init(io, clone);
- *out_of_pages = 0;
for (i = 0; i < nr_iovecs; i++) {
page = mempool_alloc(cc->page_pool, gfp_mask);
- if (!page) {
- *out_of_pages = 1;
- break;
- }
-
- /*
- * If additional pages cannot be allocated without waiting,
- * return a partially-allocated bio. The caller will then try
- * to allocate more bios while submitting this partial bio.
- */
- gfp_mask = (gfp_mask | __GFP_NOWARN) & ~__GFP_WAIT;
len = (size > PAGE_SIZE) ? PAGE_SIZE : size;
- if (!bio_add_page(clone, page, len, 0)) {
- mempool_free(page, cc->page_pool);
- break;
- }
+ bvec = &clone->bi_io_vec[clone->bi_vcnt++];
+ bvec->bv_page = page;
+ bvec->bv_len = len;
+ bvec->bv_offset = 0;
- size -= len;
- }
+ clone->bi_iter.bi_size += len;
- if (!clone->bi_iter.bi_size) {
- bio_put(clone);
- return NULL;
+ size -= len;
}
return clone;
@@ -1025,7 +1008,6 @@ static void crypt_io_init(struct dm_cryp
io->base_bio = bio;
io->sector = sector;
io->error = 0;
- io->base_io = NULL;
io->ctx.req = NULL;
atomic_set(&io->io_pending, 0);
}
@@ -1038,13 +1020,11 @@ static void crypt_inc_pending(struct dm_
/*
* One of the bios was finished. Check for completion of
* the whole request and correctly clean up the buffer.
- * If base_io is set, wait for the last fragment to complete.
*/
static void crypt_dec_pending(struct dm_crypt_io *io)
{
struct crypt_config *cc = io->cc;
struct bio *base_bio = io->base_bio;
- struct dm_crypt_io *base_io = io->base_io;
int error = io->error;
if (!atomic_dec_and_test(&io->io_pending))
@@ -1055,13 +1035,7 @@ static void crypt_dec_pending(struct dm_
if (io != dm_per_bio_data(base_bio, cc->per_bio_data_size))
mempool_free(io, cc->io_pool);
- if (likely(!base_io))
- bio_endio(base_bio, error);
- else {
- if (error && !base_io->error)
- base_io->error = error;
- crypt_dec_pending(base_io);
- }
+ bio_endio(base_bio, error);
}
/*
@@ -1197,10 +1171,7 @@ static void kcryptd_crypt_write_convert(
{
struct crypt_config *cc = io->cc;
struct bio *clone;
- struct dm_crypt_io *new_io;
int crypt_finished;
- unsigned out_of_pages = 0;
- unsigned remaining = io->base_bio->bi_iter.bi_size;
sector_t sector = io->sector;
int r;
@@ -1210,80 +1181,30 @@ static void kcryptd_crypt_write_convert(
crypt_inc_pending(io);
crypt_convert_init(cc, &io->ctx, NULL, io->base_bio, sector);
- /*
- * The allocated buffers can be smaller than the whole bio,
- * so repeat the whole process until all the data can be handled.
- */
- while (remaining) {
- clone = crypt_alloc_buffer(io, remaining, &out_of_pages);
- if (unlikely(!clone)) {
- io->error = -ENOMEM;
- break;
- }
-
- io->ctx.bio_out = clone;
- io->ctx.iter_out = clone->bi_iter;
-
- remaining -= clone->bi_iter.bi_size;
- sector += bio_sectors(clone);
-
- crypt_inc_pending(io);
-
- r = crypt_convert(cc, &io->ctx);
- if (r < 0)
- io->error = -EIO;
-
- crypt_finished = atomic_dec_and_test(&io->ctx.cc_pending);
-
- /* Encryption was already finished, submit io now */
- if (crypt_finished) {
- kcryptd_crypt_write_io_submit(io, 0);
-
- /*
- * If there was an error, do not try next fragments.
- * For async, error is processed in async handler.
- */
- if (unlikely(r < 0))
- break;
+ clone = crypt_alloc_buffer(io, io->base_bio->bi_iter.bi_size);
+ if (unlikely(!clone)) {
+ io->error = -EIO;
+ goto dec;
+ }
- io->sector = sector;
- }
+ io->ctx.bio_out = clone;
+ io->ctx.iter_out = clone->bi_iter;
- /*
- * Out of memory -> run queues
- * But don't wait if split was due to the io size restriction
- */
- if (unlikely(out_of_pages))
- congestion_wait(BLK_RW_ASYNC, HZ/100);
+ sector += bio_sectors(clone);
- /*
- * With async crypto it is unsafe to share the crypto context
- * between fragments, so switch to a new dm_crypt_io structure.
- */
- if (unlikely(!crypt_finished && remaining)) {
- new_io = mempool_alloc(cc->io_pool, GFP_NOIO);
- crypt_io_init(new_io, io->cc, io->base_bio, sector);
- crypt_inc_pending(new_io);
- crypt_convert_init(cc, &new_io->ctx, NULL,
- io->base_bio, sector);
- new_io->ctx.iter_in = io->ctx.iter_in;
-
- /*
- * Fragments after the first use the base_io
- * pending count.
- */
- if (!io->base_io)
- new_io->base_io = io;
- else {
- new_io->base_io = io->base_io;
- crypt_inc_pending(io->base_io);
- crypt_dec_pending(io);
- }
+ crypt_inc_pending(io);
+ r = crypt_convert(cc, &io->ctx);
+ if (r)
+ io->error = -EIO;
+ crypt_finished = atomic_dec_and_test(&io->ctx.cc_pending);
- io = new_io;
- }
+ /* Encryption was already finished, submit io now */
+ if (crypt_finished) {
+ kcryptd_crypt_write_io_submit(io, 0);
+ io->sector = sector;
}
+dec:
crypt_dec_pending(io);
}
@@ -1738,7 +1659,7 @@ static int crypt_ctr(struct dm_target *t
sizeof(struct dm_crypt_io) + cc->dmreq_start +
sizeof(struct dm_crypt_request) + cc->iv_size;
- cc->page_pool = mempool_create_page_pool(MIN_POOL_PAGES, 0);
+ cc->page_pool = mempool_create_page_pool(BIO_MAX_PAGES, 0);
if (!cc->page_pool) {
ti->error = "Cannot allocate page mempool";
goto bad;
^ permalink raw reply [flat|nested] 15+ messages in thread
* [PATCH 6/9] dm-crypt: avoid deadlock in mempools
2014-04-05 18:07 ` [PATCH 5/9] dm-crypt: don't allocate pages for a partial request Mikulas Patocka
@ 2014-04-05 18:07 ` Mikulas Patocka
2014-04-05 18:08 ` [PATCH 7/9] dm-crypt: remove io_pool Mikulas Patocka
0 siblings, 1 reply; 15+ messages in thread
From: Mikulas Patocka @ 2014-04-05 18:07 UTC (permalink / raw)
To: Mike Snitzer, Ondrej Kozina; +Cc: dm-devel
This patch fixes a theoretical deadlock introduced in the previous patch.
The function crypt_alloc_buffer may be called concurrently. If we allocate
from the mempool concurrently, there is a possibility of deadlock.
For example, if we have mempool of 256 pages, two processes, each wanting 256,
pages allocate from the mempool concurrently, it may deadlock in a situation
where both processes have allocated 128 pages and the mempool is exhausted.
In order to avoid this scenarios, we allocate the pages under a mutex.
In order to not degrade performance with excessive locking, we try
non-blocking allocations without a mutex first and if it fails, we fallback
to a blocking allocation with a mutex.
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
---
drivers/md/dm-crypt.c | 41 ++++++++++++++++++++++++++++++++++++-----
1 file changed, 36 insertions(+), 5 deletions(-)
Index: linux-3.14/drivers/md/dm-crypt.c
===================================================================
--- linux-3.14.orig/drivers/md/dm-crypt.c 2014-04-04 20:59:46.000000000 +0200
+++ linux-3.14/drivers/md/dm-crypt.c 2014-04-04 21:04:40.000000000 +0200
@@ -124,6 +124,7 @@ struct crypt_config {
mempool_t *req_pool;
mempool_t *page_pool;
struct bio_set *bs;
+ struct mutex bio_alloc_lock;
struct workqueue_struct *io_queue;
struct workqueue_struct *crypt_queue;
@@ -954,27 +955,51 @@ static void crypt_free_buffer_pages(stru
/*
* Generate a new unfragmented bio with the given size
* This should never violate the device limitations
+ *
+ * This function may be called concurrently. If we allocate from the mempool
+ * concurrently, there is a possibility of deadlock. For example, if we have
+ * mempool of 256 pages, two processes, each wanting 256, pages allocate from
+ * the mempool concurrently, it may deadlock in a situation where both processes
+ * have allocated 128 pages and the mempool is exhausted.
+ *
+ * In order to avoid this scenarios, we allocate the pages under a mutex.
+ *
+ * In order to not degrade performance with excessive locking, we try
+ * non-blocking allocations without a mutex first and if it fails, we fallback
+ * to a blocking allocation with a mutex.
*/
static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size)
{
struct crypt_config *cc = io->cc;
struct bio *clone;
unsigned int nr_iovecs = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
- gfp_t gfp_mask = GFP_NOIO | __GFP_HIGHMEM;
- unsigned i, len;
+ gfp_t gfp_mask = GFP_NOWAIT | __GFP_HIGHMEM;
+ unsigned i, len, remaining_size;
struct page *page;
struct bio_vec *bvec;
+retry:
+ if (unlikely(gfp_mask & __GFP_WAIT))
+ mutex_lock(&cc->bio_alloc_lock);
+
clone = bio_alloc_bioset(GFP_NOIO, nr_iovecs, cc->bs);
if (!clone)
- return NULL;
+ goto return_clone;
clone_init(io, clone);
+ remaining_size = size;
+
for (i = 0; i < nr_iovecs; i++) {
page = mempool_alloc(cc->page_pool, gfp_mask);
+ if (!page) {
+ crypt_free_buffer_pages(cc, clone);
+ bio_put(clone);
+ gfp_mask |= __GFP_WAIT;
+ goto retry;
+ }
- len = (size > PAGE_SIZE) ? PAGE_SIZE : size;
+ len = (remaining_size > PAGE_SIZE) ? PAGE_SIZE : remaining_size;
bvec = &clone->bi_io_vec[clone->bi_vcnt++];
bvec->bv_page = page;
@@ -983,9 +1008,13 @@ static struct bio *crypt_alloc_buffer(st
clone->bi_iter.bi_size += len;
- size -= len;
+ remaining_size -= len;
}
+return_clone:
+ if (unlikely(gfp_mask & __GFP_WAIT))
+ mutex_unlock(&cc->bio_alloc_lock);
+
return clone;
}
@@ -1671,6 +1700,8 @@ static int crypt_ctr(struct dm_target *t
goto bad;
}
+ mutex_init(&cc->bio_alloc_lock);
+
ret = -EINVAL;
if (sscanf(argv[2], "%llu%c", &tmpll, &dummy) != 1) {
ti->error = "Invalid iv_offset sector";
^ permalink raw reply [flat|nested] 15+ messages in thread
* [PATCH 7/9] dm-crypt: remove io_pool
2014-04-05 18:07 ` [PATCH 6/9] dm-crypt: avoid deadlock in mempools Mikulas Patocka
@ 2014-04-05 18:08 ` Mikulas Patocka
2014-04-05 18:08 ` [PATCH 8/9] dm-crypt: offload writes to thread Mikulas Patocka
0 siblings, 1 reply; 15+ messages in thread
From: Mikulas Patocka @ 2014-04-05 18:08 UTC (permalink / raw)
To: Mike Snitzer, Ondrej Kozina; +Cc: dm-devel
Remove io_pool and _crypt_io_pool because they are unused.
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
---
drivers/md/dm-crypt.c | 21 +--------------------
1 file changed, 1 insertion(+), 20 deletions(-)
Index: linux-3.14/drivers/md/dm-crypt.c
===================================================================
--- linux-3.14.orig/drivers/md/dm-crypt.c 2014-04-04 21:04:40.000000000 +0200
+++ linux-3.14/drivers/md/dm-crypt.c 2014-04-04 21:05:40.000000000 +0200
@@ -120,7 +120,6 @@ struct crypt_config {
* pool for per bio private data, crypto requests and
* encryption requeusts/buffer pages
*/
- mempool_t *io_pool;
mempool_t *req_pool;
mempool_t *page_pool;
struct bio_set *bs;
@@ -173,8 +172,6 @@ struct crypt_config {
#define MIN_IOS 16
-static struct kmem_cache *_crypt_io_pool;
-
static void clone_init(struct dm_crypt_io *, struct bio *);
static void kcryptd_queue_crypt(struct dm_crypt_io *io);
static u8 *iv_of_dmreq(struct crypt_config *cc, struct dm_crypt_request *dmreq);
@@ -1061,8 +1058,6 @@ static void crypt_dec_pending(struct dm_
if (io->ctx.req)
crypt_free_req(cc, io->ctx.req, base_bio);
- if (io != dm_per_bio_data(base_bio, cc->per_bio_data_size))
- mempool_free(io, cc->io_pool);
bio_endio(base_bio, error);
}
@@ -1450,8 +1445,6 @@ static void crypt_dtr(struct dm_target *
mempool_destroy(cc->page_pool);
if (cc->req_pool)
mempool_destroy(cc->req_pool);
- if (cc->io_pool)
- mempool_destroy(cc->io_pool);
if (cc->iv_gen_ops && cc->iv_gen_ops->dtr)
cc->iv_gen_ops->dtr(cc);
@@ -1664,19 +1657,13 @@ static int crypt_ctr(struct dm_target *t
if (ret < 0)
goto bad;
- ret = -ENOMEM;
- cc->io_pool = mempool_create_slab_pool(MIN_IOS, _crypt_io_pool);
- if (!cc->io_pool) {
- ti->error = "Cannot allocate crypt io mempool";
- goto bad;
- }
-
cc->dmreq_start = sizeof(struct ablkcipher_request);
cc->dmreq_start += crypto_ablkcipher_reqsize(any_tfm(cc));
cc->dmreq_start = ALIGN(cc->dmreq_start, crypto_tfm_ctx_alignment());
cc->dmreq_start += crypto_ablkcipher_alignmask(any_tfm(cc)) &
~(crypto_tfm_ctx_alignment() - 1);
+ ret = -ENOMEM;
cc->req_pool = mempool_create_kmalloc_pool(MIN_IOS, cc->dmreq_start +
sizeof(struct dm_crypt_request) + cc->iv_size);
if (!cc->req_pool) {
@@ -1938,14 +1925,9 @@ static int __init dm_crypt_init(void)
{
int r;
- _crypt_io_pool = KMEM_CACHE(dm_crypt_io, 0);
- if (!_crypt_io_pool)
- return -ENOMEM;
-
r = dm_register_target(&crypt_target);
if (r < 0) {
DMERR("register failed %d", r);
- kmem_cache_destroy(_crypt_io_pool);
}
return r;
@@ -1954,7 +1936,6 @@ static int __init dm_crypt_init(void)
static void __exit dm_crypt_exit(void)
{
dm_unregister_target(&crypt_target);
- kmem_cache_destroy(_crypt_io_pool);
}
module_init(dm_crypt_init);
^ permalink raw reply [flat|nested] 15+ messages in thread
* [PATCH 8/9] dm-crypt: offload writes to thread
2014-04-05 18:08 ` [PATCH 7/9] dm-crypt: remove io_pool Mikulas Patocka
@ 2014-04-05 18:08 ` Mikulas Patocka
2014-04-05 18:09 ` [PATCH 9/9] dm-crypt: sort writes Mikulas Patocka
0 siblings, 1 reply; 15+ messages in thread
From: Mikulas Patocka @ 2014-04-05 18:08 UTC (permalink / raw)
To: Mike Snitzer, Ondrej Kozina; +Cc: dm-devel
Submitting write bios directly in the encryption thread caused serious
performance degradation. On multiprocessor machine encryption requests
finish in a different order than they were submitted in. Consequently, write
requests would be submitted in a different order and it could cause severe
performance degradation.
This patch moves submitting write requests to a separate thread so that
the requests can be sorted before submitting.
Sorting is implemented in the next patch.
Note: it is required that a previous patch "dm-crypt: don't allocate pages
for a partial request." is applied before applying this patch. Without
that, this patch could introduce a crash.
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
---
drivers/md/dm-crypt.c | 120 ++++++++++++++++++++++++++++++++++++++++----------
1 file changed, 97 insertions(+), 23 deletions(-)
Index: linux-3.14/drivers/md/dm-crypt.c
===================================================================
--- linux-3.14.orig/drivers/md/dm-crypt.c 2014-04-04 21:05:40.000000000 +0200
+++ linux-3.14/drivers/md/dm-crypt.c 2014-04-04 21:06:22.000000000 +0200
@@ -18,6 +18,7 @@
#include <linux/slab.h>
#include <linux/crypto.h>
#include <linux/workqueue.h>
+#include <linux/kthread.h>
#include <linux/backing-dev.h>
#include <linux/atomic.h>
#include <linux/scatterlist.h>
@@ -58,6 +59,8 @@ struct dm_crypt_io {
atomic_t io_pending;
int error;
sector_t sector;
+
+ struct list_head list;
} CRYPTO_MINALIGN_ATTR;
struct dm_crypt_request {
@@ -128,6 +131,10 @@ struct crypt_config {
struct workqueue_struct *io_queue;
struct workqueue_struct *crypt_queue;
+ struct task_struct *write_thread;
+ wait_queue_head_t write_thread_wait;
+ struct list_head write_thread_list;
+
char *cipher;
char *cipher_string;
@@ -1141,37 +1148,89 @@ static int kcryptd_io_read(struct dm_cry
return 0;
}
+static void kcryptd_io_read_work(struct work_struct *work)
+{
+ struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work);
+
+ crypt_inc_pending(io);
+ if (kcryptd_io_read(io, GFP_NOIO))
+ io->error = -ENOMEM;
+ crypt_dec_pending(io);
+}
+
+static void kcryptd_queue_read(struct dm_crypt_io *io)
+{
+ struct crypt_config *cc = io->cc;
+
+ INIT_WORK(&io->work, kcryptd_io_read_work);
+ queue_work(cc->io_queue, &io->work);
+}
+
static void kcryptd_io_write(struct dm_crypt_io *io)
{
struct bio *clone = io->ctx.bio_out;
+
generic_make_request(clone);
}
-static void kcryptd_io(struct work_struct *work)
+static int dmcrypt_write(void *data)
{
- struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work);
+ struct crypt_config *cc = data;
+ while (1) {
+ struct list_head local_list;
+ struct blk_plug plug;
- if (bio_data_dir(io->base_bio) == READ) {
- crypt_inc_pending(io);
- if (kcryptd_io_read(io, GFP_NOIO))
- io->error = -ENOMEM;
- crypt_dec_pending(io);
- } else
- kcryptd_io_write(io);
-}
+ DECLARE_WAITQUEUE(wait, current);
-static void kcryptd_queue_io(struct dm_crypt_io *io)
-{
- struct crypt_config *cc = io->cc;
+ spin_lock_irq(&cc->write_thread_wait.lock);
+continue_locked:
- INIT_WORK(&io->work, kcryptd_io);
- queue_work(cc->io_queue, &io->work);
+ if (!list_empty(&cc->write_thread_list))
+ goto pop_from_list;
+
+ __set_current_state(TASK_INTERRUPTIBLE);
+ __add_wait_queue(&cc->write_thread_wait, &wait);
+
+ spin_unlock_irq(&cc->write_thread_wait.lock);
+
+ if (unlikely(kthread_should_stop())) {
+ set_task_state(current, TASK_RUNNING);
+ remove_wait_queue(&cc->write_thread_wait, &wait);
+ break;
+ }
+
+ schedule();
+
+ set_task_state(current, TASK_RUNNING);
+ spin_lock_irq(&cc->write_thread_wait.lock);
+ __remove_wait_queue(&cc->write_thread_wait, &wait);
+ goto continue_locked;
+
+pop_from_list:
+ local_list = cc->write_thread_list;
+ local_list.next->prev = &local_list;
+ local_list.prev->next = &local_list;
+ INIT_LIST_HEAD(&cc->write_thread_list);
+
+ spin_unlock_irq(&cc->write_thread_wait.lock);
+
+ blk_start_plug(&plug);
+ do {
+ struct dm_crypt_io *io = container_of(local_list.next,
+ struct dm_crypt_io, list);
+ list_del(&io->list);
+ kcryptd_io_write(io);
+ } while (!list_empty(&local_list));
+ blk_finish_plug(&plug);
+ }
+ return 0;
}
-static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async)
+static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io)
{
struct bio *clone = io->ctx.bio_out;
struct crypt_config *cc = io->cc;
+ unsigned long flags;
if (unlikely(io->error < 0)) {
crypt_free_buffer_pages(cc, clone);
@@ -1185,10 +1244,10 @@ static void kcryptd_crypt_write_io_submi
clone->bi_iter.bi_sector = cc->start + io->sector;
- if (async)
- kcryptd_queue_io(io);
- else
- generic_make_request(clone);
+ spin_lock_irqsave(&cc->write_thread_wait.lock, flags);
+ list_add_tail(&io->list, &cc->write_thread_list);
+ wake_up_locked(&cc->write_thread_wait);
+ spin_unlock_irqrestore(&cc->write_thread_wait.lock, flags);
}
static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
@@ -1224,7 +1283,7 @@ static void kcryptd_crypt_write_convert(
/* Encryption was already finished, submit io now */
if (crypt_finished) {
- kcryptd_crypt_write_io_submit(io, 0);
+ kcryptd_crypt_write_io_submit(io);
io->sector = sector;
}
@@ -1284,7 +1343,7 @@ static void kcryptd_async_done(struct cr
if (bio_data_dir(io->base_bio) == READ)
kcryptd_crypt_read_done(io);
else
- kcryptd_crypt_write_io_submit(io, 1);
+ kcryptd_crypt_write_io_submit(io);
}
static void kcryptd_crypt(struct work_struct *work)
@@ -1431,6 +1490,9 @@ static void crypt_dtr(struct dm_target *
if (!cc)
return;
+ if (cc->write_thread)
+ kthread_stop(cc->write_thread);
+
if (cc->io_queue)
destroy_workqueue(cc->io_queue);
if (cc->crypt_queue)
@@ -1745,6 +1807,18 @@ static int crypt_ctr(struct dm_target *t
goto bad;
}
+ init_waitqueue_head(&cc->write_thread_wait);
+ INIT_LIST_HEAD(&cc->write_thread_list);
+
+ cc->write_thread = kthread_create(dmcrypt_write, cc, "dmcrypt_write");
+ if (IS_ERR(cc->write_thread)) {
+ ret = PTR_ERR(cc->write_thread);
+ cc->write_thread = NULL;
+ ti->error = "Couldn't spawn write thread";
+ goto bad;
+ }
+ wake_up_process(cc->write_thread);
+
ti->num_flush_bios = 1;
ti->discard_zeroes_data_unsupported = true;
@@ -1779,7 +1853,7 @@ static int crypt_map(struct dm_target *t
if (bio_data_dir(io->base_bio) == READ) {
if (kcryptd_io_read(io, GFP_NOWAIT))
- kcryptd_queue_io(io);
+ kcryptd_queue_read(io);
} else
kcryptd_queue_crypt(io);
^ permalink raw reply [flat|nested] 15+ messages in thread
* [PATCH 9/9] dm-crypt: sort writes
2014-04-05 18:08 ` [PATCH 8/9] dm-crypt: offload writes to thread Mikulas Patocka
@ 2014-04-05 18:09 ` Mikulas Patocka
0 siblings, 0 replies; 15+ messages in thread
From: Mikulas Patocka @ 2014-04-05 18:09 UTC (permalink / raw)
To: Mike Snitzer, Ondrej Kozina; +Cc: dm-devel
Write requests are sorted in a red-black tree structure and are submitted
in the sorted order.
In theory the sorting should be performed by the underlying disk scheduler,
however, in practice the disk scheduler accepts and sorts only 128 requests.
In order to sort more requests, we need to implement our own sorting.
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
---
drivers/md/dm-crypt.c | 50 +++++++++++++++++++++++++++++++++++---------------
1 file changed, 35 insertions(+), 15 deletions(-)
Index: linux-3.14/drivers/md/dm-crypt.c
===================================================================
--- linux-3.14.orig/drivers/md/dm-crypt.c 2014-04-04 21:06:22.000000000 +0200
+++ linux-3.14/drivers/md/dm-crypt.c 2014-04-04 21:06:55.000000000 +0200
@@ -22,6 +22,7 @@
#include <linux/backing-dev.h>
#include <linux/atomic.h>
#include <linux/scatterlist.h>
+#include <linux/rbtree.h>
#include <asm/page.h>
#include <asm/unaligned.h>
#include <crypto/hash.h>
@@ -60,7 +61,7 @@ struct dm_crypt_io {
int error;
sector_t sector;
- struct list_head list;
+ struct rb_node rb_node;
} CRYPTO_MINALIGN_ATTR;
struct dm_crypt_request {
@@ -133,7 +134,7 @@ struct crypt_config {
struct task_struct *write_thread;
wait_queue_head_t write_thread_wait;
- struct list_head write_thread_list;
+ struct rb_root write_tree;
char *cipher;
char *cipher_string;
@@ -1177,7 +1178,7 @@ static int dmcrypt_write(void *data)
{
struct crypt_config *cc = data;
while (1) {
- struct list_head local_list;
+ struct rb_root write_tree;
struct blk_plug plug;
DECLARE_WAITQUEUE(wait, current);
@@ -1185,7 +1186,7 @@ static int dmcrypt_write(void *data)
spin_lock_irq(&cc->write_thread_wait.lock);
continue_locked:
- if (!list_empty(&cc->write_thread_list))
+ if (!RB_EMPTY_ROOT(&cc->write_tree))
goto pop_from_list;
__set_current_state(TASK_INTERRUPTIBLE);
@@ -1207,20 +1208,23 @@ continue_locked:
goto continue_locked;
pop_from_list:
- local_list = cc->write_thread_list;
- local_list.next->prev = &local_list;
- local_list.prev->next = &local_list;
- INIT_LIST_HEAD(&cc->write_thread_list);
-
+ write_tree = cc->write_tree;
+ cc->write_tree = RB_ROOT;
spin_unlock_irq(&cc->write_thread_wait.lock);
+ BUG_ON(rb_parent(write_tree.rb_node));
+
+ /*
+ * Note: we cannot walk the tree here with rb_next because
+ * the structures may be freed when kcryptd_io_write is called.
+ */
blk_start_plug(&plug);
do {
- struct dm_crypt_io *io = container_of(local_list.next,
- struct dm_crypt_io, list);
- list_del(&io->list);
+ struct dm_crypt_io *io = rb_entry(rb_first(&write_tree),
+ struct dm_crypt_io, rb_node);
+ rb_erase(&io->rb_node, &write_tree);
kcryptd_io_write(io);
- } while (!list_empty(&local_list));
+ } while (!RB_EMPTY_ROOT(&write_tree));
blk_finish_plug(&plug);
}
return 0;
@@ -1231,6 +1235,8 @@ static void kcryptd_crypt_write_io_submi
struct bio *clone = io->ctx.bio_out;
struct crypt_config *cc = io->cc;
unsigned long flags;
+ sector_t sector;
+ struct rb_node **p, *parent;
if (unlikely(io->error < 0)) {
crypt_free_buffer_pages(cc, clone);
@@ -1245,7 +1251,21 @@ static void kcryptd_crypt_write_io_submi
clone->bi_iter.bi_sector = cc->start + io->sector;
spin_lock_irqsave(&cc->write_thread_wait.lock, flags);
- list_add_tail(&io->list, &cc->write_thread_list);
+ p = &cc->write_tree.rb_node;
+ parent = NULL;
+ sector = io->sector;
+ while (*p) {
+ parent = *p;
+#define io_node rb_entry(parent, struct dm_crypt_io, rb_node)
+ if (sector < io_node->sector)
+ p = &io_node->rb_node.rb_left;
+ else
+ p = &io_node->rb_node.rb_right;
+#undef io_node
+ }
+ rb_link_node(&io->rb_node, parent, p);
+ rb_insert_color(&io->rb_node, &cc->write_tree);
+
wake_up_locked(&cc->write_thread_wait);
spin_unlock_irqrestore(&cc->write_thread_wait.lock, flags);
}
@@ -1808,7 +1828,7 @@ static int crypt_ctr(struct dm_target *t
}
init_waitqueue_head(&cc->write_thread_wait);
- INIT_LIST_HEAD(&cc->write_thread_list);
+ cc->write_tree = RB_ROOT;
cc->write_thread = kthread_create(dmcrypt_write, cc, "dmcrypt_write");
if (IS_ERR(cc->write_thread)) {
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH 0/9] dm-crypt patches
2014-04-05 18:04 [PATCH 0/9] dm-crypt patches Mikulas Patocka
2014-04-05 18:04 ` [PATCH 1/9] dm-crypt: remove per-cpu structure Mikulas Patocka
@ 2014-04-07 1:01 ` Mike Snitzer
2014-04-08 15:25 ` Ondrej Kozina
2 siblings, 0 replies; 15+ messages in thread
From: Mike Snitzer @ 2014-04-07 1:01 UTC (permalink / raw)
To: Mikulas Patocka; +Cc: Ondrej Kozina, Mike Snitzer, dm-devel
On Sat, Apr 05 2014 at 2:04pm -0400,
Mikulas Patocka <mpatocka@redhat.com> wrote:
> Hi
>
> I fixed the bug that Ondrej found (it happened only under memory pressure,
> that's why I couldn't reproduce it), here I'm resending the dm-crypt
> patches.
I already staged your original patchset in linux-dm.git's 'for-next'
branch (linux-next). So your re-post of these patches threw away patch
header edits, my Signed-off-by;s and also my cleanu-up of the rb-tree
code in patch 9.
Anyway, I folded the following fix from your new patch series into patch
5 (I saw no reason why 'remaining_size' couldn't be applied in patch 5
rather than patch 6). All rebased patches are now staged in linux-next
(Ondrej please test, if we can verify performance I'm still open to
sending to Linus for 3.15, but must be by Thursday at the latest with no
further changes to this code):
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index c74c779..bbc1043 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -979,8 +979,9 @@ static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size)
struct bio *clone;
unsigned int nr_iovecs = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
gfp_t gfp_mask = GFP_NOWAIT | __GFP_HIGHMEM;
- unsigned i, len;
+ unsigned i, len, remaining_size;
struct page *page;
+ struct bio_vec *bvec;
retry:
if (unlikely(gfp_mask & __GFP_WAIT))
@@ -992,6 +993,8 @@ retry:
clone_init(io, clone);
+ remaining_size = size;
+
for (i = 0; i < nr_iovecs; i++) {
page = mempool_alloc(cc->page_pool, gfp_mask);
if (!page) {
@@ -1001,18 +1004,16 @@ retry:
goto retry;
}
- len = (size > PAGE_SIZE) ? PAGE_SIZE : size;
+ len = (remaining_size > PAGE_SIZE) ? PAGE_SIZE : remaining_size;
- if (!bio_add_page(clone, page, len, 0)) {
- DMERR("bio_add_page failed for page %d: the underlying device has stricter limits than dm-crypt target", i);
- mempool_free(page, cc->page_pool);
- crypt_free_buffer_pages(cc, clone);
- bio_put(clone);
- clone = NULL;
- goto return_clone;
- }
+ bvec = &clone->bi_io_vec[clone->bi_vcnt++];
+ bvec->bv_page = page;
+ bvec->bv_len = len;
+ bvec->bv_offset = 0;
+
+ clone->bi_iter.bi_size += len;
- size -= len;
+ remaining_size -= len;
}
return_clone:
Also note, I also folded this into patch 6 (BUG_ON conveys intent that
mempool_alloc should _never_ fail once __GFP_WAIT is set in
gfp_mask.. if it did we'd deadlock trying to re-acquire the mutex,
rather than BUG_ON but...):
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 56784ed..53cbfdf 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -967,10 +967,10 @@ static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone);
* the mempool concurrently, it may deadlock in a situation where both processes
* have allocated 128 pages and the mempool is exhausted.
*
- * In order to avoid this scenarios, we allocate the pages under a mutex.
+ * In order to avoid this scenario we allocate the pages under a mutex.
*
* In order to not degrade performance with excessive locking, we try
- * non-blocking allocations without a mutex first and if it fails, we fallback
+ * non-blocking allocation without a mutex first and if it fails, we fallback
* to a blocking allocation with a mutex.
*/
static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size)
@@ -998,6 +998,7 @@ retry:
for (i = 0; i < nr_iovecs; i++) {
page = mempool_alloc(cc->page_pool, gfp_mask);
if (!page) {
+ BUG_ON(gfp_mask & __GFP_WAIT);
crypt_free_buffer_pages(cc, clone);
bio_put(clone);
gfp_mask |= __GFP_WAIT;
^ permalink raw reply related [flat|nested] 15+ messages in thread
* Re: [PATCH 0/9] dm-crypt patches
2014-04-05 18:04 [PATCH 0/9] dm-crypt patches Mikulas Patocka
2014-04-05 18:04 ` [PATCH 1/9] dm-crypt: remove per-cpu structure Mikulas Patocka
2014-04-07 1:01 ` [PATCH 0/9] dm-crypt patches Mike Snitzer
@ 2014-04-08 15:25 ` Ondrej Kozina
2014-04-08 15:44 ` Mike Snitzer
2014-04-08 21:29 ` Marian Csontos
2 siblings, 2 replies; 15+ messages in thread
From: Ondrej Kozina @ 2014-04-08 15:25 UTC (permalink / raw)
To: Mikulas Patocka; +Cc: device-mapper development, Mike Snitzer
Hi,
I ran all tests again and with new version and everything passed even on
raid5 back end, so it's right time to paste here some performance results:
Requirements:
------------
http://okozina.fedorapeople.org/dmcrypt-tests.tar.xz
compiled fio package
Performance tests are based on script in subdirectory 1/ in
http://okozina.fedorapeople.org/dmcrypt-tests.tar.xz
The result tables were aggregated from output of following fio command:
fio --output=log --latency-log=log --bandwidth-log=log \
--name=global --rw=$3 --size=1G --bsrange=1k-128k \
--filename=$1 \
--name=job1 --name=job2 --name=job3 --name=job4 \
--end_fsync=1
$1 - test device
$3 - read/write mode see explanation of TEST NAME column below
HW:
---
2-socket machine Intel(R) Xeon(TM) CPU 3.40GHz (4 logical CPUs, without
HW acceleration for AES!)
4GB RAM
4x SEAGATE ST336753LC (rotational drive)
One disk was used for system data, the rest for testing
Explanation for tables below:
-----------------------------
In column TEST NAME:
- "disk_nocrypt" stands for pure device w/o any encryption in effect
(it's single device or raid5 device using md)
- "zero" stands for encryption over dm-zero target. Basically this test
is trying to measure the performance of dm-crypt core alone.
- "disk" is real device (same as in "disk_nocrypt") with encryption set
up over it.
following suffixes after dash in TABLE NAME column:
(all correspond to permissible values for fio --rw option)
- read = sequential reads
- write = sequential writes
- randread = random reads
- randwrite = random writes
- rw = sequential mixed reads and writes
- randrw = random mixed reads and writes
Any mixed option above has separate line for aggregated READs and WRITEs
operations.
For encryption I used aes-xts-plain64 cipher, but please note that the
machine doesn't provide HW acceleration for AES algorithm.
The column AGGRB_A stands for results with vanilla 3.14
The column AGGRB_B stands for results with all Mikulas's patches applied.
The interesting results are prefixed with "zero-" and "disk-". The
"disk_nocrypt-" results are just for reference to see real limits of HW.
The "zero-" prefixed lines doesn't require to be published multiple
times but it can serve to demonstrate scatter of results even on idling
machine (except for running tests).
The single device tests:
TEST NAME OPERATION AGGRB_A KB/s AGGRB_B KB/s DIFF A->B
------------------------- --------- ------------ ------------ ---------
disk_nocrypt-read READ 260289 259339 -0.36 %
disk_nocrypt-write WRITE 242950 242922 -0.01 %
disk_nocrypt-randread READ 53095 52195 -1.70 %
disk_nocrypt-randwrite WRITE 193665 194482 0.42 %
disk_nocrypt-rw READ 64050 64037 -0.02 %
disk_nocrypt-rw WRITE 64581 64567 -0.02 %
disk_nocrypt-randrw READ 16792 17235 2.64 %
disk_nocrypt-randrw WRITE 16796 17239 2.64 %
zero-read READ 187614 309748 65.10 %
zero-write WRITE 240871 332301 37.96 %
zero-randread READ 299024 325476 8.85 %
zero-randwrite WRITE 108506 123805 14.10 %
zero-rw READ 81694 106583 30.47 %
zero-rw WRITE 82370 107465 30.47 %
zero-randrw READ 60794 85574 40.76 %
zero-randrw WRITE 60810 85597 40.76 %
disk-read READ 232603 238258 2.43 %
disk-write WRITE 119649 218601 82.70 %
disk-randread READ 52578 52249 -0.63 %
disk-randwrite WRITE 153878 173487 12.74 %
disk-rw READ 61341 64629 5.36 %
disk-rw WRITE 61849 65164 5.36 %
disk-randrw READ 16251 15229 -6.29 %
disk-randrw WRITE 16255 15233 -6.29 %
md raid5 with 64KiB chunk size as basking device:
TEST NAME OPERATION AGGRB_A KB/s AGGRB_B KB/s DIFF A->B
------------------------- --------- ------------ ------------ ---------
disk_nocrypt-read READ 391405 390276 -0.29 %
disk_nocrypt-write WRITE 209736 210272 0.26 %
disk_nocrypt-randread READ 84644 84319 -0.38 %
disk_nocrypt-randwrite WRITE 30651 29174 -4.82 %
disk_nocrypt-rw READ 64533 64472 -0.09 %
disk_nocrypt-rw WRITE 65068 65005 -0.10 %
disk_nocrypt-randrw READ 18586 18827 1.30 %
disk_nocrypt-randrw WRITE 18591 18832 1.30 %
zero-read READ 190641 308042 61.58 %
zero-write WRITE 343936 306533 -10.87 %
zero-randread READ 298322 326007 9.28 %
zero-randwrite WRITE 115462 122193 5.83 %
zero-rw READ 82092 114980 40.06 %
zero-rw WRITE 82771 115932 40.06 %
zero-randrw READ 69730 90008 29.08 %
zero-randrw WRITE 69748 90032 29.08 %
disk-read READ 283207 264641 -6.56 %
disk-write WRITE 152088 176431 16.01 %
disk-randread READ 82021 81720 -0.37 %
disk-randwrite WRITE 20553 23880 16.19 %
disk-rw READ 56833 59059 3.92 %
disk-rw WRITE 57303 59548 3.92 %
disk-randrw READ 15819 16726 5.73 %
disk-randrw WRITE 15823 16730 5.73 %
md raid5 with 512KiB chunk size as basking device:
TEST NAME OPERATION AGGRB_A KB/s AGGRB_B KB/s DIFF A->B
------------------------- --------- ------------ ------------ ---------
disk_nocrypt-read READ 463151 447822 -3.31 %
disk_nocrypt-write WRITE 149476 149866 0.26 %
disk_nocrypt-randread READ 99205 98393 -0.82 %
disk_nocrypt-randwrite WRITE 34422 34535 0.33 %
disk_nocrypt-rw READ 75898 77132 1.63 %
disk_nocrypt-rw WRITE 76527 77770 1.62 %
disk_nocrypt-randrw READ 21886 21928 0.19 %
disk_nocrypt-randrw WRITE 21892 21934 0.19 %
zero-read READ 189735 305173 60.84 %
zero-write WRITE 289402 289082 -0.11 %
zero-randread READ 300373 317061 5.56 %
zero-randwrite WRITE 128128 123838 -3.35 %
zero-rw READ 81956 114558 39.78 %
zero-rw WRITE 82635 115506 39.78 %
zero-randrw READ 74406 86589 16.37 %
zero-randrw WRITE 74426 86612 16.37 %
disk-read READ 333251 399153 19.78 %
disk-write WRITE 139703 158574 13.51 %
disk-randread READ 93669 92408 -1.35 %
disk-randwrite WRITE 22994 26520 15.33 %
disk-rw READ 63573 73389 15.44 %
disk-rw WRITE 64099 73996 15.44 %
disk-randrw READ 19045 19617 3.00 %
disk-randrw WRITE 19050 19622 3.00 %
Ondrej
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH 0/9] dm-crypt patches
2014-04-08 15:25 ` Ondrej Kozina
@ 2014-04-08 15:44 ` Mike Snitzer
2014-04-09 8:46 ` Ondrej Kozina
2014-04-08 21:29 ` Marian Csontos
1 sibling, 1 reply; 15+ messages in thread
From: Mike Snitzer @ 2014-04-08 15:44 UTC (permalink / raw)
To: Ondrej Kozina; +Cc: device-mapper development, Mikulas Patocka
On Tue, Apr 08 2014 at 11:25am -0400,
Ondrej Kozina <okozina@redhat.com> wrote:
> Hi,
>
> I ran all tests again and with new version and everything passed
> even on raid5 back end, so it's right time to paste here some
> performance results:
Thanks for sharing the results. The system is a pretty low-end machine,
certainly doesn't showcase the intended configuration (improved CPU
scalability on a system with many cores).
BUT it is a useful data point to have. Because it reflects that the
improvements are realized even on systems without many cores or AES.
> Requirements:
> ------------
> http://okozina.fedorapeople.org/dmcrypt-tests.tar.xz
> compiled fio package
>
> Performance tests are based on script in subdirectory 1/ in
> http://okozina.fedorapeople.org/dmcrypt-tests.tar.xz
>
> The result tables were aggregated from output of following fio command:
>
> fio --output=log --latency-log=log --bandwidth-log=log \
> --name=global --rw=$3 --size=1G --bsrange=1k-128k \
> --filename=$1 \
> --name=job1 --name=job2 --name=job3 --name=job4 \
> --end_fsync=1
>
> $1 - test device
> $3 - read/write mode see explanation of TEST NAME column below
How is it that you've extracted these results from the test run? I'd be
open to rerunning the same on my testbed (1 Intel Xeon E5649 Westmere-EP
with 12 logical cores, AES, and 6 SAS devices for RAID, 1 STEC PCIe SSD)
I could also yield this testbed to you if that'd be easier than you
telling me how to extract the test results.
BTW, it seems odd to me that the tests run all the fsx and dt tests yet
only the FIO tests are what you're reporting.. would save time to just
disable the non-fio tests wouldn't it?
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH 0/9] dm-crypt patches
2014-04-08 15:25 ` Ondrej Kozina
2014-04-08 15:44 ` Mike Snitzer
@ 2014-04-08 21:29 ` Marian Csontos
1 sibling, 0 replies; 15+ messages in thread
From: Marian Csontos @ 2014-04-08 21:29 UTC (permalink / raw)
To: device-mapper development, Mikulas Patocka; +Cc: Mike Snitzer
On 04/08/2014 05:25 PM, Ondrej Kozina wrote:
> Hi,
>
> I ran all tests again and with new version and everything passed even on
> raid5 back end, so it's right time to paste here some performance results:
I have found few strange numbers there I have difficulties to understand
and wonder what maybe the cause of these discrepancies. Could you have a
look?
>
>
> Requirements:
> ------------
> http://okozina.fedorapeople.org/dmcrypt-tests.tar.xz
> compiled fio package
>
> Performance tests are based on script in subdirectory 1/ in
> http://okozina.fedorapeople.org/dmcrypt-tests.tar.xz
>
> The result tables were aggregated from output of following fio command:
>
> fio --output=log --latency-log=log --bandwidth-log=log \
> --name=global --rw=$3 --size=1G --bsrange=1k-128k \
> --filename=$1 \
> --name=job1 --name=job2 --name=job3 --name=job4 \
> --end_fsync=1
>
> $1 - test device
> $3 - read/write mode see explanation of TEST NAME column below
>
> HW:
> ---
> 2-socket machine Intel(R) Xeon(TM) CPU 3.40GHz (4 logical CPUs, without
> HW acceleration for AES!)
> 4GB RAM
> 4x SEAGATE ST336753LC (rotational drive)
>
> One disk was used for system data, the rest for testing
>
> Explanation for tables below:
> -----------------------------
> In column TEST NAME:
>
> - "disk_nocrypt" stands for pure device w/o any encryption in effect
> (it's single device or raid5 device using md)
> - "zero" stands for encryption over dm-zero target. Basically this test
> is trying to measure the performance of dm-crypt core alone.
> - "disk" is real device (same as in "disk_nocrypt") with encryption set
> up over it.
>
> following suffixes after dash in TABLE NAME column:
> (all correspond to permissible values for fio --rw option)
>
> - read = sequential reads
> - write = sequential writes
> - randread = random reads
> - randwrite = random writes
> - rw = sequential mixed reads and writes
> - randrw = random mixed reads and writes
>
> Any mixed option above has separate line for aggregated READs and WRITEs
> operations.
>
> For encryption I used aes-xts-plain64 cipher, but please note that the
> machine doesn't provide HW acceleration for AES algorithm.
>
> The column AGGRB_A stands for results with vanilla 3.14
> The column AGGRB_B stands for results with all Mikulas's patches applied.
>
> The interesting results are prefixed with "zero-" and "disk-". The
> "disk_nocrypt-" results are just for reference to see real limits of HW.
>
> The "zero-" prefixed lines doesn't require to be published multiple
> times but it can serve to demonstrate scatter of results even on idling
> machine (except for running tests).
>
> The single device tests:
>
> TEST NAME OPERATION AGGRB_A KB/s AGGRB_B KB/s DIFF A->B
> ------------------------- --------- ------------ ------------ ---------
> disk_nocrypt-read READ 260289 259339 -0.36 %
> disk_nocrypt-write WRITE 242950 242922 -0.01 %
> disk_nocrypt-randread READ 53095 52195 -1.70 %
> disk_nocrypt-randwrite WRITE 193665 194482 0.42 %
> disk_nocrypt-rw READ 64050 64037 -0.02 %
> disk_nocrypt-rw WRITE 64581 64567 -0.02 %
> disk_nocrypt-randrw READ 16792 17235 2.64 %
> disk_nocrypt-randrw WRITE 16796 17239 2.64 %
> zero-read READ 187614 309748 65.10 %
> zero-write WRITE 240871 332301 37.96 %
> zero-randread READ 299024 325476 8.85 %
^ Wow! Randread so much faster
than sequential that I wonder if it is correct...
> zero-randwrite WRITE 108506 123805 14.10 %
> zero-rw READ 81694 106583 30.47 %
> zero-rw WRITE 82370 107465 30.47 %
> zero-randrw READ 60794 85574 40.76 %
> zero-randrw WRITE 60810 85597 40.76 %
> disk-read READ 232603 238258 2.43 %
> disk-write WRITE 119649 218601 82.70 %
> disk-randread READ 52578 52249 -0.63 %
> disk-randwrite WRITE 153878 173487 12.74 %
^ And disk randwrite faster than
sequential. And three times faster than randread suggesting it may be
writing to cache.
> disk-rw READ 61341 64629 5.36 %
> disk-rw WRITE 61849 65164 5.36 %
> disk-randrw READ 16251 15229 -6.29 %
> disk-randrw WRITE 16255 15233 -6.29 %
>
>
> md raid5 with 64KiB chunk size as basking device:
>
> TEST NAME OPERATION AGGRB_A KB/s AGGRB_B KB/s DIFF A->B
> ------------------------- --------- ------------ ------------ ---------
> disk_nocrypt-read READ 391405 390276 -0.29 %
> disk_nocrypt-write WRITE 209736 210272 0.26 %
> disk_nocrypt-randread READ 84644 84319 -0.38 %
> disk_nocrypt-randwrite WRITE 30651 29174 -4.82 %
> disk_nocrypt-rw READ 64533 64472 -0.09 %
> disk_nocrypt-rw WRITE 65068 65005 -0.10 %
> disk_nocrypt-randrw READ 18586 18827 1.30 %
> disk_nocrypt-randrw WRITE 18591 18832 1.30 %
> zero-read READ 190641 308042 61.58 %
> zero-write WRITE 343936 306533 -10.87 %
^ Here the write is much much
faster than reads and the -10% looks more like an error than performance
regression
> zero-randread READ 298322 326007 9.28 %
^ Again randread much faster than
sequential
> zero-randwrite WRITE 115462 122193 5.83 %
> zero-rw READ 82092 114980 40.06 %
> zero-rw WRITE 82771 115932 40.06 %
> zero-randrw READ 69730 90008 29.08 %
> zero-randrw WRITE 69748 90032 29.08 %
> disk-read READ 283207 264641 -6.56 %
^ Disk read being so faster than
dm-zero looks odd...
> disk-write WRITE 152088 176431 16.01 %
> disk-randread READ 82021 81720 -0.37 %
> disk-randwrite WRITE 20553 23880 16.19 %
> disk-rw READ 56833 59059 3.92 %
> disk-rw WRITE 57303 59548 3.92 %
> disk-randrw READ 15819 16726 5.73 %
> disk-randrw WRITE 15823 16730 5.73 %
>
> md raid5 with 512KiB chunk size as basking device:
>
> TEST NAME OPERATION AGGRB_A KB/s AGGRB_B KB/s DIFF A->B
> ------------------------- --------- ------------ ------------ ---------
> disk_nocrypt-read READ 463151 447822 -3.31 %
> disk_nocrypt-write WRITE 149476 149866 0.26 %
> disk_nocrypt-randread READ 99205 98393 -0.82 %
> disk_nocrypt-randwrite WRITE 34422 34535 0.33 %
> disk_nocrypt-rw READ 75898 77132 1.63 %
> disk_nocrypt-rw WRITE 76527 77770 1.62 %
> disk_nocrypt-randrw READ 21886 21928 0.19 %
> disk_nocrypt-randrw WRITE 21892 21934 0.19 %
> zero-read READ 189735 305173 60.84 %
> zero-write WRITE 289402 289082 -0.11 %
> zero-randread READ 300373 317061 5.56 %
> zero-randwrite WRITE 128128 123838 -3.35 %
> zero-rw READ 81956 114558 39.78 %
> zero-rw WRITE 82635 115506 39.78 %
> zero-randrw READ 74406 86589 16.37 %
> zero-randrw WRITE 74426 86612 16.37 %
> disk-read READ 333251 399153 19.78 %
^ It looks odd - this is so much
faster than dm-zero and I wonder whether dm-zero is not fast enough or
the disk tests are hitting cache instead of disk and whether all the
zero-* tests are meaningful
> disk-write WRITE 139703 158574 13.51 %
^ Here dm-crypt is
faster than raw disk
> disk-randread READ 93669 92408 -1.35 %
> disk-randwrite WRITE 22994 26520 15.33 %
> disk-rw READ 63573 73389 15.44 %
> disk-rw WRITE 64099 73996 15.44 %
> disk-randrw READ 19045 19617 3.00 %
> disk-randrw WRITE 19050 19622 3.00 %
>
> Ondrej
>
> --
> dm-devel mailing list
> dm-devel@redhat.com
> https://www.redhat.com/mailman/listinfo/dm-devel
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH 0/9] dm-crypt patches
2014-04-08 15:44 ` Mike Snitzer
@ 2014-04-09 8:46 ` Ondrej Kozina
0 siblings, 0 replies; 15+ messages in thread
From: Ondrej Kozina @ 2014-04-09 8:46 UTC (permalink / raw)
To: Mike Snitzer; +Cc: device-mapper development, Mikulas Patocka
On 04/08/2014 05:44 PM, Mike Snitzer wrote:
> How is it that you've extracted these results from the test run? I'd be
> open to rerunning the same on my testbed (1 Intel Xeon E5649 Westmere-EP
> with 12 logical cores, AES, and 6 SAS devices for RAID, 1 STEC PCIe SSD)
>
> I could also yield this testbed to you if that'd be easier than you
> telling me how to extract the test results.
Thanks Mike!
Pasting new results for loaned machine and things are getting more
interesting (with respect to Marian's reply):
single (rotational) device:
TEST NAME OPERATION AGGRB_A KB/s AGGRB_B KB/s DIFF A->B
------------------------- --------- ------------ ------------ ---------
disk_nocrypt-read READ 479129 478528 -0.13 %
disk_nocrypt-write WRITE 225040 225718 0.30 %
disk_nocrypt-randread READ 30150 30193 0.14 %
disk_nocrypt-randwrite WRITE 39059 39546 1.25 %
disk_nocrypt-rw READ 92477 94021 1.67 %
disk_nocrypt-rw WRITE 93242 94800 1.67 %
disk_nocrypt-randrw READ 14426 14902 3.30 %
disk_nocrypt-randrw WRITE 14388 14862 3.29 %
zero-read READ 272889 354069 29.75 %
zero-write WRITE 145721 306175 110.11 %
zero-randread READ 482888 599366 24.12 %
zero-randwrite WRITE 151175 346728 129.36 %
zero-rw READ 91280 131950 44.56 %
zero-rw WRITE 92036 133042 44.55 %
zero-randrw READ 72625 142476 96.18 %
zero-randrw WRITE 72433 142099 96.18 %
disk-read READ 193161 210156 8.80 %
disk-write WRITE 33845 182790 440.08 %
disk-randread READ 29746 29640 -0.36 %
disk-randwrite WRITE 7783 31689 307.16 %
disk-rw READ 23954 69686 190.92 %
disk-rw WRITE 24152 70263 190.92 %
disk-randrw READ 6770 12523 84.98 %
disk-randrw WRITE 6753 12490 84.95 %
single PCIe SSD device:
(Staring to immense difference between sequential reads and random
reads. Can it be there some sort of raid inside SSD card?)
TEST NAME OPERATION AGGRB_A KB/s AGGRB_B KB/s DIFF A->B
------------------------- --------- ------------ ------------ ---------
disk_nocrypt-read READ 1072537 1145446 6.80 %
disk_nocrypt-write WRITE 843924 828586 -1.82 %
disk_nocrypt-randread READ 2091110 2108825 0.85 %
disk_nocrypt-randwrite WRITE 1149542 1137971 -1.01 %
disk_nocrypt-rw READ 425704 426661 0.22 %
disk_nocrypt-rw WRITE 429228 430193 0.22 %
disk_nocrypt-randrw READ 534206 532311 -0.35 %
disk_nocrypt-randrw WRITE 532791 530900 -0.35 %
zero-read READ 271388 351871 29.66 %
zero-write WRITE 143748 300000 108.70 %
zero-randread READ 487264 600310 23.20 %
zero-randwrite WRITE 148958 344308 131.14 %
zero-rw READ 90927 131659 44.80 %
zero-rw WRITE 91679 132748 44.80 %
zero-randrw READ 71325 137468 92.73 %
zero-randrw WRITE 71136 137104 92.74 %
disk-read READ 243854 269678 10.59 %
disk-write WRITE 179773 300408 67.10 %
disk-randread READ 518848 647679 24.83 %
disk-randwrite WRITE 195314 348398 78.38 %
disk-rw READ 103381 131270 26.98 %
disk-rw WRITE 104237 132356 26.98 %
disk-randrw READ 93681 145719 55.55 %
disk-randrw WRITE 93433 145333 55.55 %
4 rotational devices forming md raid5, 64KiB chunk size:
TEST NAME OPERATION AGGRB_A KB/s AGGRB_B KB/s DIFF A->B
------------------------- --------- ------------ ------------ ---------
disk_nocrypt-read READ 560436 547773 -2.26 %
disk_nocrypt-write WRITE 360552 359039 -0.42 %
disk_nocrypt-randread READ 62256 62118 -0.22 %
disk_nocrypt-randwrite WRITE 51591 52289 1.35 %
disk_nocrypt-rw READ 59680 58955 -1.21 %
disk_nocrypt-rw WRITE 60174 59443 -1.21 %
disk_nocrypt-randrw READ 18328 18219 -0.59 %
disk_nocrypt-randrw WRITE 18280 18171 -0.60 %
zero-read READ 274712 353651 28.74 %
zero-write WRITE 146469 300904 105.44 %
zero-randread READ 498736 603246 20.95 %
zero-randwrite WRITE 150131 355003 136.46 %
zero-rw READ 92481 132359 43.12 %
zero-rw WRITE 93246 133455 43.12 %
zero-randrw READ 72278 141164 95.31 %
zero-randrw WRITE 72086 140790 95.31 %
disk-read READ 177154 203054 14.62 %
disk-write WRITE 71057 241913 240.45 %
disk-randread READ 58757 58988 0.39 %
disk-randwrite WRITE 14453 28966 100.42 %
disk-rw READ 59772 67560 13.03 %
disk-rw WRITE 60267 68120 13.03 %
disk-randrw READ 9634 12815 33.02 %
disk-randrw WRITE 9608 12781 33.02 %
4 rotational devices forming md raid5, 512KiB chunk size:
(I'll repeat this one since there's strange increase in random read
performance on raw raid5 device, see line "disk_nocrypt-randread"):
TEST NAME OPERATION AGGRB_A KB/s AGGRB_B KB/s DIFF A->B
------------------------- --------- ------------ ------------ ---------
disk_nocrypt-read READ 734811 730206 -0.63 %
disk_nocrypt-write WRITE 264474 259773 -1.78 %
disk_nocrypt-randread READ 64387 83218 29.25 %
disk_nocrypt-randwrite WRITE 52671 51898 -1.47 %
disk_nocrypt-rw READ 75785 73469 -3.06 %
disk_nocrypt-rw WRITE 76413 74077 -3.06 %
disk_nocrypt-randrw READ 17664 17690 0.15 %
disk_nocrypt-randrw WRITE 17617 17643 0.15 %
zero-read READ 270967 354099 30.68 %
zero-write WRITE 145277 307230 111.48 %
zero-randread READ 478481 585805 22.43 %
zero-randwrite WRITE 149734 349997 133.75 %
zero-rw READ 91742 127760 39.26 %
zero-rw WRITE 92501 128818 39.26 %
zero-randrw READ 78925 145437 84.27 %
zero-randrw WRITE 78716 145051 84.27 %
disk-read READ 203705 237220 16.45 %
disk-write WRITE 64068 230051 259.07 %
disk-randread READ 60698 77793 28.16 %
disk-randwrite WRITE 18535 32227 73.87 %
disk-rw READ 49023 65775 34.17 %
disk-rw WRITE 49429 66320 34.17 %
disk-randrw READ 10740 13084 21.82 %
disk-randrw WRITE 10712 13049 21.82 %
Now I would like to work on simulating different workloads (these tests
were simulating 4 jobs in fio on otherwise idling machine)
Ondrej
^ permalink raw reply [flat|nested] 15+ messages in thread
end of thread, other threads:[~2014-04-09 8:46 UTC | newest]
Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-04-05 18:04 [PATCH 0/9] dm-crypt patches Mikulas Patocka
2014-04-05 18:04 ` [PATCH 1/9] dm-crypt: remove per-cpu structure Mikulas Patocka
2014-04-05 18:05 ` [PATCH 2/9] bio: use kmalloc alignment for bio slab Mikulas Patocka
2014-04-05 18:05 ` [PATCH 3/9] dm-crypt: use per-bio data Mikulas Patocka
2014-04-05 18:06 ` [PATCH 4/9] dm-crypt: use unbound workqueue for request processing Mikulas Patocka
2014-04-05 18:07 ` [PATCH 5/9] dm-crypt: don't allocate pages for a partial request Mikulas Patocka
2014-04-05 18:07 ` [PATCH 6/9] dm-crypt: avoid deadlock in mempools Mikulas Patocka
2014-04-05 18:08 ` [PATCH 7/9] dm-crypt: remove io_pool Mikulas Patocka
2014-04-05 18:08 ` [PATCH 8/9] dm-crypt: offload writes to thread Mikulas Patocka
2014-04-05 18:09 ` [PATCH 9/9] dm-crypt: sort writes Mikulas Patocka
2014-04-07 1:01 ` [PATCH 0/9] dm-crypt patches Mike Snitzer
2014-04-08 15:25 ` Ondrej Kozina
2014-04-08 15:44 ` Mike Snitzer
2014-04-09 8:46 ` Ondrej Kozina
2014-04-08 21:29 ` Marian Csontos
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.