dm-devel.redhat.com archive mirror
 help / color / mirror / Atom feed
* [PATCH] DM-CRYPT: Scale to multiple CPUs v3
@ 2010-10-10 11:59 Andi Kleen
  2010-10-10 12:38 ` [dm-devel] " Milan Broz
  0 siblings, 1 reply; 24+ messages in thread
From: Andi Kleen @ 2010-10-10 11:59 UTC (permalink / raw)
  To: pedrib, linux-kernel, dm-devel, ak

DM-CRYPT: Scale to multiple CPUs v3

[Due to popular demand this is a port of the dm-crypt scalability
patch to 2.6.36-rc7.  The 2.6.35 and .32 patches were widely used by
lots of users with good results. 

This also updates the work queue calls for the
new workqueue frame work in 2.6.36; the crypto worker tasks
as marked CPU intensive now. -Andi]

---

Currently dm-crypt does all encryption work per dmcrypt mapping in a
single workqueue. This does not scale well when multiple CPUs
are submitting IO at a high rate. The single CPU running the single
thread cannot keep up with the encryption and encrypted IO performance
tanks.

This patch changes the crypto workqueue to be per CPU. This means
that as long as the IO submitter (or the interrupt target CPUs
for reads) runs on different CPUs the encryption work will be also
parallel.

To avoid a bottleneck on the IO worker I also changed those to be
per CPU threads.

There is still some shared data, so I suspect some bouncing
cache lines. But I haven't done a detailed study on that yet.

All the threads are global, not per CPU. That is to avoid a thread
explosion on systems with a large number of CPUs and a larger
number of dm-crypt mappings. The code takes care to avoid problems
with nested mappings.

v2: per cpu improvements from Eric Dumazet
v3: Port to 2.6.36-rc7. Mark per CPU crypto work queues as CPU intensive.

Signed-off-by: Andi Kleen <ak@linux.intel.com>

---
 drivers/md/dm-crypt.c |  325 ++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 251 insertions(+), 74 deletions(-)

Index: linux-2.6.36-rc7-ak/drivers/md/dm-crypt.c
===================================================================
--- linux-2.6.36-rc7-ak.orig/drivers/md/dm-crypt.c
+++ linux-2.6.36-rc7-ak/drivers/md/dm-crypt.c
@@ -18,6 +18,7 @@
 #include <linux/crypto.h>
 #include <linux/workqueue.h>
 #include <linux/backing-dev.h>
+#include <linux/percpu.h>
 #include <asm/atomic.h>
 #include <linux/scatterlist.h>
 #include <asm/page.h>
@@ -77,11 +78,15 @@ struct crypt_iv_operations {
 };
 
 struct iv_essiv_private {
-	struct crypto_cipher *tfm;
 	struct crypto_hash *hash_tfm;
 	u8 *salt;
 };
 
+/* Duplicated per CPU state for cipher */
+struct iv_essiv_private_cpu {
+	struct crypto_cipher *tfm;
+};
+
 struct iv_benbi_private {
 	int shift;
 };
@@ -91,6 +96,18 @@ struct iv_benbi_private {
  * and encrypts / decrypts at the same time.
  */
 enum flags { DM_CRYPT_SUSPENDED, DM_CRYPT_KEY_VALID };
+
+/* Duplicated per CPU state for cipher */
+struct crypt_cpu {
+	struct ablkcipher_request *req;
+	struct crypto_ablkcipher *tfm;
+	struct iv_essiv_private_cpu ie;
+};
+
+/*
+ * The fields in here must be read only after initialization,
+ * changing state should be in crypt_cpu.
+ */
 struct crypt_config {
 	struct dm_dev *dev;
 	sector_t start;
@@ -104,9 +121,6 @@ struct crypt_config {
 	mempool_t *page_pool;
 	struct bio_set *bs;
 
-	struct workqueue_struct *io_queue;
-	struct workqueue_struct *crypt_queue;
-
 	char *cipher;
 	char *cipher_mode;
 
@@ -119,6 +133,12 @@ struct crypt_config {
 	unsigned int iv_size;
 
 	/*
+	 * Duplicated per cpu state. Access through
+	 * per_cpu_ptr() only.
+	 */
+	struct crypt_cpu __percpu *cpu;
+
+	/*
 	 * Layout of each crypto request:
 	 *
 	 *   struct ablkcipher_request
@@ -132,23 +152,42 @@ struct crypt_config {
 	 * correctly aligned.
 	 */
 	unsigned int dmreq_start;
-	struct ablkcipher_request *req;
 
-	struct crypto_ablkcipher *tfm;
 	unsigned long flags;
 	unsigned int key_size;
 	u8 key[0];
 };
 
+/* RED-PEN scale with number of cpus? */
 #define MIN_IOS        16
 #define MIN_POOL_PAGES 32
 #define MIN_BIO_PAGES  8
 
+/* Protect creation of a new crypt queue */
+static DEFINE_MUTEX(queue_setup_lock);
+static struct workqueue_struct *crypt_workqueue;
+static struct workqueue_struct *io_workqueue;
+static DEFINE_PER_CPU(struct task_struct *, io_wq_cpu);
+
 static struct kmem_cache *_crypt_io_pool;
 
 static void clone_init(struct dm_crypt_io *, struct bio *);
 static void kcryptd_queue_crypt(struct dm_crypt_io *io);
 
+static struct crypt_cpu *crypt_me(struct crypt_config *cc)
+{
+	return this_cpu_ptr(cc->cpu);
+}
+
+/* Use this for cipher attributes that are the same for all cpus */
+static struct crypto_ablkcipher *any_tfm(struct crypt_config *cc)
+{
+	struct crypto_ablkcipher *tfm;
+	/* cpu doesn't matter, output is always the same */
+	tfm = __this_cpu_ptr(cc->cpu)->tfm;
+	return tfm;
+}
+
 /*
  * Different IV generation algorithms:
  *
@@ -195,7 +234,7 @@ static int crypt_iv_essiv_init(struct cr
 	struct iv_essiv_private *essiv = &cc->iv_gen_private.essiv;
 	struct hash_desc desc;
 	struct scatterlist sg;
-	int err;
+	int err, n, cpu;
 
 	sg_init_one(&sg, cc->key, cc->key_size);
 	desc.tfm = essiv->hash_tfm;
@@ -205,8 +244,18 @@ static int crypt_iv_essiv_init(struct cr
 	if (err)
 		return err;
 
-	return crypto_cipher_setkey(essiv->tfm, essiv->salt,
+	for_each_possible_cpu (cpu) {
+		struct crypt_cpu *cs = per_cpu_ptr(cc->cpu, cpu);
+
+		n = crypto_cipher_setkey(cs->ie.tfm, essiv->salt,
 				    crypto_hash_digestsize(essiv->hash_tfm));
+		if (n) {
+			err = n;
+			break;
+		}
+	}
+
+	return err;
 }
 
 /* Wipe salt and reset key derived from volume key */
@@ -214,24 +263,70 @@ static int crypt_iv_essiv_wipe(struct cr
 {
 	struct iv_essiv_private *essiv = &cc->iv_gen_private.essiv;
 	unsigned salt_size = crypto_hash_digestsize(essiv->hash_tfm);
+	int cpu, err, n;
 
 	memset(essiv->salt, 0, salt_size);
 
-	return crypto_cipher_setkey(essiv->tfm, essiv->salt, salt_size);
+	err = 0;
+	for_each_possible_cpu (cpu) {
+		struct crypt_cpu *cs = per_cpu_ptr(cc->cpu, cpu);
+		n = crypto_cipher_setkey(cs->ie.tfm, essiv->salt, salt_size);
+		if (n)
+			err = n;
+	}
+	return err;
+}
+
+/* Set up per cpu cipher state */
+static struct crypto_cipher *setup_essiv_cpu(struct crypt_config *cc,
+					     struct dm_target *ti,
+					     u8 *salt, unsigned saltsize)
+{
+	struct crypto_cipher *essiv_tfm;
+	int err;
+
+	/* Setup the essiv_tfm with the given salt */
+	essiv_tfm = crypto_alloc_cipher(cc->cipher, 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(essiv_tfm)) {
+		ti->error = "Error allocating crypto tfm for ESSIV";
+		return essiv_tfm;
+	}
+
+	if (crypto_cipher_blocksize(essiv_tfm) !=
+	    crypto_ablkcipher_ivsize(any_tfm(cc))) {
+		ti->error = "Block size of ESSIV cipher does "
+			    "not match IV size of block cipher";
+		crypto_free_cipher(essiv_tfm);
+		return ERR_PTR(-EINVAL);
+	}
+	err = crypto_cipher_setkey(essiv_tfm, salt, saltsize);
+	if (err) {
+		ti->error = "Failed to set key for ESSIV cipher";
+		crypto_free_cipher(essiv_tfm);
+		return ERR_PTR(err);
+	}
+
+	return essiv_tfm;
 }
 
 static void crypt_iv_essiv_dtr(struct crypt_config *cc)
 {
+	int cpu;
 	struct iv_essiv_private *essiv = &cc->iv_gen_private.essiv;
 
-	crypto_free_cipher(essiv->tfm);
-	essiv->tfm = NULL;
-
 	crypto_free_hash(essiv->hash_tfm);
 	essiv->hash_tfm = NULL;
 
 	kzfree(essiv->salt);
 	essiv->salt = NULL;
+
+	for_each_possible_cpu (cpu) {
+		struct crypt_cpu *cs = per_cpu_ptr(cc->cpu, cpu);
+		if (cs->ie.tfm) {
+			crypto_free_cipher(cs->ie.tfm);
+			cs->ie.tfm = NULL;
+		}
+	}
 }
 
 static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti,
@@ -241,6 +336,7 @@ static int crypt_iv_essiv_ctr(struct cry
 	struct crypto_hash *hash_tfm = NULL;
 	u8 *salt = NULL;
 	int err;
+	int cpu;
 
 	if (!opts) {
 		ti->error = "Digest algorithm missing for ESSIV mode";
@@ -262,30 +358,22 @@ static int crypt_iv_essiv_ctr(struct cry
 		goto bad;
 	}
 
-	/* Allocate essiv_tfm */
-	essiv_tfm = crypto_alloc_cipher(cc->cipher, 0, CRYPTO_ALG_ASYNC);
-	if (IS_ERR(essiv_tfm)) {
-		ti->error = "Error allocating crypto tfm for ESSIV";
-		err = PTR_ERR(essiv_tfm);
-		goto bad;
-	}
-	if (crypto_cipher_blocksize(essiv_tfm) !=
-	    crypto_ablkcipher_ivsize(cc->tfm)) {
-		ti->error = "Block size of ESSIV cipher does "
-			    "not match IV size of block cipher";
-		err = -EINVAL;
-		goto bad;
-	}
-
 	cc->iv_gen_private.essiv.salt = salt;
-	cc->iv_gen_private.essiv.tfm = essiv_tfm;
 	cc->iv_gen_private.essiv.hash_tfm = hash_tfm;
 
+	for_each_possible_cpu (cpu) {
+		essiv_tfm = setup_essiv_cpu(cc, ti, salt,
+					crypto_hash_digestsize(hash_tfm));
+		if (IS_ERR(essiv_tfm)) {
+			kfree(salt);
+			crypt_iv_essiv_dtr(cc);
+			return PTR_ERR(essiv_tfm);
+		}
+		per_cpu_ptr(cc->cpu, cpu)->ie.tfm = essiv_tfm;
+	}
 	return 0;
 
 bad:
-	if (essiv_tfm && !IS_ERR(essiv_tfm))
-		crypto_free_cipher(essiv_tfm);
 	if (hash_tfm && !IS_ERR(hash_tfm))
 		crypto_free_hash(hash_tfm);
 	kfree(salt);
@@ -296,14 +384,14 @@ static int crypt_iv_essiv_gen(struct cry
 {
 	memset(iv, 0, cc->iv_size);
 	*(u64 *)iv = cpu_to_le64(sector);
-	crypto_cipher_encrypt_one(cc->iv_gen_private.essiv.tfm, iv, iv);
+	crypto_cipher_encrypt_one(crypt_me(cc)->ie.tfm, iv, iv);
 	return 0;
 }
 
 static int crypt_iv_benbi_ctr(struct crypt_config *cc, struct dm_target *ti,
 			      const char *opts)
 {
-	unsigned bs = crypto_ablkcipher_blocksize(cc->tfm);
+	unsigned bs = crypto_ablkcipher_blocksize(any_tfm(cc));
 	int log = ilog2(bs);
 
 	/* we need to calculate how far we must shift the sector count
@@ -412,7 +500,7 @@ static int crypt_convert_block(struct cr
 
 	dmreq = dmreq_of_req(cc, req);
 	iv = (u8 *)ALIGN((unsigned long)(dmreq + 1),
-			 crypto_ablkcipher_alignmask(cc->tfm) + 1);
+			 crypto_ablkcipher_alignmask(any_tfm(cc)) + 1);
 
 	dmreq->ctx = ctx;
 	sg_init_table(&dmreq->sg_in, 1);
@@ -457,13 +545,14 @@ static void kcryptd_async_done(struct cr
 static void crypt_alloc_req(struct crypt_config *cc,
 			    struct convert_context *ctx)
 {
-	if (!cc->req)
-		cc->req = mempool_alloc(cc->req_pool, GFP_NOIO);
-	ablkcipher_request_set_tfm(cc->req, cc->tfm);
-	ablkcipher_request_set_callback(cc->req, CRYPTO_TFM_REQ_MAY_BACKLOG |
+	struct crypt_cpu *cs = crypt_me(cc);
+	if (!cs->req)
+		cs->req = mempool_alloc(cc->req_pool, GFP_NOIO);
+	ablkcipher_request_set_tfm(cs->req, cs->tfm);
+	ablkcipher_request_set_callback(cs->req, CRYPTO_TFM_REQ_MAY_BACKLOG |
 					CRYPTO_TFM_REQ_MAY_SLEEP,
 					kcryptd_async_done,
-					dmreq_of_req(cc, cc->req));
+					dmreq_of_req(cc, cs->req));
 }
 
 /*
@@ -472,6 +561,7 @@ static void crypt_alloc_req(struct crypt
 static int crypt_convert(struct crypt_config *cc,
 			 struct convert_context *ctx)
 {
+	struct crypt_cpu *cs = crypt_me(cc);
 	int r;
 
 	atomic_set(&ctx->pending, 1);
@@ -483,7 +573,7 @@ static int crypt_convert(struct crypt_co
 
 		atomic_inc(&ctx->pending);
 
-		r = crypt_convert_block(cc, ctx, cc->req);
+		r = crypt_convert_block(cc, ctx, cs->req);
 
 		switch (r) {
 		/* async */
@@ -492,7 +582,7 @@ static int crypt_convert(struct crypt_co
 			INIT_COMPLETION(ctx->restart);
 			/* fall through*/
 		case -EINPROGRESS:
-			cc->req = NULL;
+			cs->req = NULL;
 			ctx->sector++;
 			continue;
 
@@ -651,6 +741,9 @@ static void crypt_dec_pending(struct dm_
  * They must be separated as otherwise the final stages could be
  * starved by new requests which can block in the first stages due
  * to memory allocation.
+ *
+ * The work is done per CPU global for all dmcrypt instances.
+ * They should not depend on each other and do not block.
  */
 static void crypt_endio(struct bio *clone, int error)
 {
@@ -732,6 +825,7 @@ static void kcryptd_io(struct work_struc
 {
 	struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work);
 
+	__this_cpu_write(io_wq_cpu, current);
 	if (bio_data_dir(io->base_bio) == READ)
 		kcryptd_io_read(io);
 	else
@@ -740,10 +834,23 @@ static void kcryptd_io(struct work_struc
 
 static void kcryptd_queue_io(struct dm_crypt_io *io)
 {
-	struct crypt_config *cc = io->target->private;
+	int cpu;
+
+	/*
+	 * Since we only have a single worker per CPU in extreme
+	 * cases there might be nesting (dm-crypt on another dm-crypt)
+	 * To avoid deadlock run the work directly then.
+	 */
+	cpu = get_cpu();
+	if (per_cpu(io_wq_cpu, cpu) == current && !in_interrupt()) {
+		put_cpu();
+		kcryptd_io(&io->work);
+		return;
+	}
 
 	INIT_WORK(&io->work, kcryptd_io);
-	queue_work(cc->io_queue, &io->work);
+	queue_work(io_workqueue, &io->work);
+	put_cpu();
 }
 
 static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io,
@@ -924,10 +1031,8 @@ static void kcryptd_crypt(struct work_st
 
 static void kcryptd_queue_crypt(struct dm_crypt_io *io)
 {
-	struct crypt_config *cc = io->target->private;
-
 	INIT_WORK(&io->work, kcryptd_crypt);
-	queue_work(cc->crypt_queue, &io->work);
+	queue_work(crypt_workqueue, &io->work);
 }
 
 /*
@@ -971,6 +1076,21 @@ static void crypt_encode_key(char *hex,
 	}
 }
 
+static int crypt_setkey_allcpus(struct crypt_config *cc)
+{
+	int cpu, n, err;
+
+	err = 0;
+	for_each_possible_cpu (cpu) {
+		struct crypt_cpu *cs = per_cpu_ptr(cc->cpu, cpu);
+		n = crypto_ablkcipher_setkey(cs->tfm, cc->key, cc->key_size);
+		if (n)
+			err = n;
+	}
+	return err;
+}
+
+
 static int crypt_set_key(struct crypt_config *cc, char *key)
 {
 	unsigned key_size = strlen(key) >> 1;
@@ -986,29 +1106,68 @@ static int crypt_set_key(struct crypt_co
 
 	set_bit(DM_CRYPT_KEY_VALID, &cc->flags);
 
-	return crypto_ablkcipher_setkey(cc->tfm, cc->key, cc->key_size);
+	return crypt_setkey_allcpus(cc);
 }
 
 static int crypt_wipe_key(struct crypt_config *cc)
 {
 	clear_bit(DM_CRYPT_KEY_VALID, &cc->flags);
 	memset(&cc->key, 0, cc->key_size * sizeof(u8));
-	return crypto_ablkcipher_setkey(cc->tfm, cc->key, cc->key_size);
+	return crypt_setkey_allcpus(cc);
+}
+
+/* Use a global per-CPU encryption workqueue for all mounts */
+static int crypt_create_workqueues(void)
+{
+	int ret = 0;
+
+	/* Module unload cleans up on error */
+	mutex_lock(&queue_setup_lock);
+	if (!crypt_workqueue) {
+		crypt_workqueue = alloc_workqueue("dmcrypt",
+						  WQ_CPU_INTENSIVE|WQ_RESCUER,
+						  1);
+		if (!crypt_workqueue)
+			ret = -ENOMEM;
+	}
+	if (!io_workqueue) {
+		io_workqueue = create_workqueue("dmcrypt-io");
+		if (!io_workqueue)
+			ret = -ENOMEM;
+	}
+	mutex_unlock(&queue_setup_lock);
+	return ret;
+}
+
+static void crypt_dtr_cpus(struct crypt_config *cc)
+{
+	int cpu;
+
+	for_each_possible_cpu (cpu) {
+		struct crypt_cpu *cs = per_cpu_ptr(cc->cpu, cpu);
+		if (cs->tfm) {
+			crypto_free_ablkcipher(cs->tfm);
+			cs->tfm = NULL;
+		}
+	}
 }
 
 static void crypt_dtr(struct dm_target *ti)
 {
 	struct crypt_config *cc = ti->private;
+	int cpu;
 
 	ti->private = NULL;
 
 	if (!cc)
 		return;
 
-	if (cc->io_queue)
-		destroy_workqueue(cc->io_queue);
-	if (cc->crypt_queue)
-		destroy_workqueue(cc->crypt_queue);
+	for_each_possible_cpu (cpu) {
+		struct crypt_cpu *cs = per_cpu_ptr(cc->cpu, cpu);
+		if (cs->req)
+			mempool_free(cs->req, cc->req_pool);
+	}
+
 
 	if (cc->bs)
 		bioset_free(cc->bs);
@@ -1023,12 +1182,14 @@ static void crypt_dtr(struct dm_target *
 	if (cc->iv_gen_ops && cc->iv_gen_ops->dtr)
 		cc->iv_gen_ops->dtr(cc);
 
-	if (cc->tfm && !IS_ERR(cc->tfm))
-		crypto_free_ablkcipher(cc->tfm);
-
 	if (cc->dev)
 		dm_put_device(ti, cc->dev);
 
+	crypt_dtr_cpus(cc);
+
+	if (cc->cpu)
+		free_percpu(cc->cpu);
+
 	kzfree(cc->cipher);
 	kzfree(cc->cipher_mode);
 
@@ -1040,9 +1201,11 @@ static int crypt_ctr_cipher(struct dm_ta
 			    char *cipher_in, char *key)
 {
 	struct crypt_config *cc = ti->private;
+	struct crypto_ablkcipher *tfm;
 	char *tmp, *cipher, *chainmode, *ivmode, *ivopts;
 	char *cipher_api = NULL;
 	int ret = -EINVAL;
+	int cpu;
 
 	/* Convert to crypto api definition? */
 	if (strchr(cipher_in, '(')) {
@@ -1074,6 +1237,12 @@ static int crypt_ctr_cipher(struct dm_ta
 	if (tmp)
 		DMWARN("Ignoring unexpected additional cipher options");
 
+	cc->cpu = alloc_percpu(struct crypt_cpu);
+	if (!cc->cpu) {
+		ti->error = "Cannot allocate per cpu state";
+		goto bad_mem;
+	}
+
 	/* Compatibility mode for old dm-crypt mappings */
 	if (!chainmode || (!strcmp(chainmode, "plain") && !ivmode)) {
 		kfree(cc->cipher_mode);
@@ -1099,12 +1268,16 @@ static int crypt_ctr_cipher(struct dm_ta
 	}
 
 	/* Allocate cipher */
-	cc->tfm = crypto_alloc_ablkcipher(cipher_api, 0, 0);
-	if (IS_ERR(cc->tfm)) {
-		ret = PTR_ERR(cc->tfm);
-		ti->error = "Error allocating crypto tfm";
-		goto bad;
-	}
+	for_each_possible_cpu (cpu) {
+		tfm = crypto_alloc_ablkcipher(cipher_api, 0, 0);
+		if (IS_ERR(tfm)) {
+			ret = PTR_ERR(tfm);
+			ti->error = "Error allocating crypto tfm";
+			goto bad;
+		}
+		per_cpu_ptr(cc->cpu, cpu)->tfm = tfm;
+ 	}
+	tfm = any_tfm(cc);
 
 	/* Initialize and set key */
 	ret = crypt_set_key(cc, key);
@@ -1114,7 +1287,7 @@ static int crypt_ctr_cipher(struct dm_ta
 	}
 
 	/* Initialize IV */
-	cc->iv_size = crypto_ablkcipher_ivsize(cc->tfm);
+	cc->iv_size = crypto_ablkcipher_ivsize(tfm);
 	if (cc->iv_size)
 		/* at least a 64 bit sector number should fit in our buffer */
 		cc->iv_size = max(cc->iv_size,
@@ -1181,6 +1354,7 @@ static int crypt_ctr(struct dm_target *t
 	unsigned int key_size;
 	unsigned long long tmpll;
 	int ret;
+	int cpu;
 
 	if (argc != 5) {
 		ti->error = "Not enough arguments";
@@ -1208,9 +1382,9 @@ static int crypt_ctr(struct dm_target *t
 	}
 
 	cc->dmreq_start = sizeof(struct ablkcipher_request);
-	cc->dmreq_start += crypto_ablkcipher_reqsize(cc->tfm);
+	cc->dmreq_start += crypto_ablkcipher_reqsize(any_tfm(cc));
 	cc->dmreq_start = ALIGN(cc->dmreq_start, crypto_tfm_ctx_alignment());
-	cc->dmreq_start += crypto_ablkcipher_alignmask(cc->tfm) &
+	cc->dmreq_start += crypto_ablkcipher_alignmask(any_tfm(cc)) &
 			   ~(crypto_tfm_ctx_alignment() - 1);
 
 	cc->req_pool = mempool_create_kmalloc_pool(MIN_IOS, cc->dmreq_start +
@@ -1219,7 +1393,6 @@ static int crypt_ctr(struct dm_target *t
 		ti->error = "Cannot allocate crypt request mempool";
 		goto bad;
 	}
-	cc->req = NULL;
 
 	cc->page_pool = mempool_create_page_pool(MIN_POOL_PAGES, 0);
 	if (!cc->page_pool) {
@@ -1233,6 +1406,14 @@ static int crypt_ctr(struct dm_target *t
 		goto bad;
 	}
 
+	for_each_possible_cpu (cpu) {
+		struct crypt_cpu *cs = per_cpu_ptr(cc->cpu, cpu);
+		if (crypto_ablkcipher_setkey(cs->tfm, cc->key, key_size) < 0) {
+			ti->error = "Error setting key";
+			goto bad;
+		}
+	}
+
 	ret = -EINVAL;
 	if (sscanf(argv[2], "%llu", &tmpll) != 1) {
 		ti->error = "Invalid iv_offset sector";
@@ -1251,16 +1432,8 @@ static int crypt_ctr(struct dm_target *t
 	}
 	cc->start = tmpll;
 
-	ret = -ENOMEM;
-	cc->io_queue = create_singlethread_workqueue("kcryptd_io");
-	if (!cc->io_queue) {
-		ti->error = "Couldn't create kcryptd io queue";
-		goto bad;
-	}
-
-	cc->crypt_queue = create_singlethread_workqueue("kcryptd");
-	if (!cc->crypt_queue) {
-		ti->error = "Couldn't create kcryptd queue";
+	if (crypt_create_workqueues() < 0) {
+		ti->error = "Couldn't create kcrypt work queues";
 		goto bad;
 	}
 
@@ -1456,6 +1629,10 @@ static void __exit dm_crypt_exit(void)
 {
 	dm_unregister_target(&crypt_target);
 	kmem_cache_destroy(_crypt_io_pool);
+	if (crypt_workqueue)
+		destroy_workqueue(crypt_workqueue);
+	if (io_workqueue)
+		destroy_workqueue(io_workqueue);
 }
 
 module_init(dm_crypt_init);

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [dm-devel] [PATCH] DM-CRYPT: Scale to multiple CPUs v3
  2010-10-10 11:59 [PATCH] DM-CRYPT: Scale to multiple CPUs v3 Andi Kleen
@ 2010-10-10 12:38 ` Milan Broz
  2010-10-10 12:53   ` Milan Broz
  2010-10-10 13:08   ` Andi Kleen
  0 siblings, 2 replies; 24+ messages in thread
From: Milan Broz @ 2010-10-10 12:38 UTC (permalink / raw)
  To: Andi Kleen
  Cc: device-mapper development, Andi Kleen, pedrib, linux-kernel,
	Alasdair G Kergon, Mike Snitzer


On 10/10/2010 01:59 PM, Andi Kleen wrote:
> DM-CRYPT: Scale to multiple CPUs v3
> 
> [Due to popular demand this is a port of the dm-crypt scalability
> patch to 2.6.36-rc7.  The 2.6.35 and .32 patches were widely used by
> lots of users with good results. 
> 

Hi Andi,

please can you check split patches in
http://mbroz.fedorapeople.org/dm-crypt/2.6.36-devel/

is there some change in your new version?

Can I send this to dm-devel instead? 
(It is better for review.)

I know that I fixed some small bug there and these are heavily
tested by me.

Alasdair, _please_ can you include it in dm-tree?
I asked you at least 5 times already, my last info is that
you are planning this for 2.6.37, right?


>  static void kcryptd_queue_io(struct dm_crypt_io *io)
>  {
> -	struct crypt_config *cc = io->target->private;
> +	int cpu;
> +
> +	/*
> +	 * Since we only have a single worker per CPU in extreme
> +	 * cases there might be nesting (dm-crypt on another dm-crypt)
> +	 * To avoid deadlock run the work directly then.
> +	 */
> +	cpu = get_cpu();
> +	if (per_cpu(io_wq_cpu, cpu) == current && !in_interrupt()) {
> +		put_cpu();
> +		kcryptd_io(&io->work);
> +		return;
> +	}

This is only place where I see problem - if running in crypto async mode,
callback is called in interrupt mode (please correct me if I am wrong).

So with async crypto and nested dm-crypt mapping this deadlock
prevention doesn't work - so is there still possibility of deadlock?

(I think we can ignore it for now, I tried create some "real world" deadlocky
mappings some time ago and was not able to catch it even on high memory pressure.)

Milan

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [dm-devel] [PATCH] DM-CRYPT: Scale to multiple CPUs v3
  2010-10-10 12:38 ` [dm-devel] " Milan Broz
@ 2010-10-10 12:53   ` Milan Broz
  2010-10-10 13:09     ` Andi Kleen
  2010-10-10 13:08   ` Andi Kleen
  1 sibling, 1 reply; 24+ messages in thread
From: Milan Broz @ 2010-10-10 12:53 UTC (permalink / raw)
  To: Andi Kleen
  Cc: device-mapper development, Mike Snitzer, linux-kernel, Andi Kleen,
	pedrib, Alasdair G Kergon

On 10/10/2010 02:38 PM, Milan Broz wrote:

> is there some change in your new version?
Of course I mean except the changes you mentioned which are easy
to change in my patchset:-)

(There was requirement from agk to have several simple
bisectable patches.)
 
> Can I send this to dm-devel instead? 
(I mean with the v3 CPU intensive workqueue changes of course)

In any case, it should be merged ASAP
to allow more people to test it.

Milan

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [dm-devel] [PATCH] DM-CRYPT: Scale to multiple CPUs v3
  2010-10-10 12:38 ` [dm-devel] " Milan Broz
  2010-10-10 12:53   ` Milan Broz
@ 2010-10-10 13:08   ` Andi Kleen
  2010-10-10 15:34     ` Milan Broz
  1 sibling, 1 reply; 24+ messages in thread
From: Andi Kleen @ 2010-10-10 13:08 UTC (permalink / raw)
  To: Milan Broz
  Cc: Andi Kleen, device-mapper development, Andi Kleen, pedrib,
	linux-kernel, Alasdair G Kergon, Mike Snitzer

On Sun, Oct 10, 2010 at 02:38:17PM +0200, Milan Broz wrote:
> 
> On 10/10/2010 01:59 PM, Andi Kleen wrote:
> > DM-CRYPT: Scale to multiple CPUs v3
> > 
> > [Due to popular demand this is a port of the dm-crypt scalability
> > patch to 2.6.36-rc7.  The 2.6.35 and .32 patches were widely used by
> > lots of users with good results. 
> > 
> 
> Hi Andi,
> 
> please can you check split patches in
> http://mbroz.fedorapeople.org/dm-crypt/2.6.36-devel/
> 
> is there some change in your new version?

Yes I updated the work queue setup and ported it to
all these changes in 2.6.36.

See the changelog in the description.

I did it intentionally not split up because a split up is unlikely
to be bisectable. I think there is no need for any splitups.

> Can I send this to dm-devel instead? 
> (It is better for review.)

What review? The code has been ready since last merge window at least
and was revied back then is my understanding.

The various versions I posted have been extensively tested
by lots of people, including me.

-Andi

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [dm-devel] [PATCH] DM-CRYPT: Scale to multiple CPUs v3
  2010-10-10 12:53   ` Milan Broz
@ 2010-10-10 13:09     ` Andi Kleen
  0 siblings, 0 replies; 24+ messages in thread
From: Andi Kleen @ 2010-10-10 13:09 UTC (permalink / raw)
  To: Milan Broz
  Cc: Andi Kleen, device-mapper development, Mike Snitzer, linux-kernel,
	Andi Kleen, pedrib, Alasdair G Kergon

On Sun, Oct 10, 2010 at 02:53:54PM +0200, Milan Broz wrote:
> On 10/10/2010 02:38 PM, Milan Broz wrote:
> 
> > is there some change in your new version?
> Of course I mean except the changes you mentioned which are easy
> to change in my patchset:-)
> 
> (There was requirement from agk to have several simple
> bisectable patches.)

Well my patch is bisectable.

I don't see how it can be split up without being not functional
in the middle.

Either you have per cpu data or you don't.

-Andi
-- 
ak@linux.intel.com -- Speaking for myself only.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [dm-devel] [PATCH] DM-CRYPT: Scale to multiple CPUs v3
  2010-10-10 13:08   ` Andi Kleen
@ 2010-10-10 15:34     ` Milan Broz
  2010-10-10 16:06       ` Andi Kleen
  2010-10-10 16:22       ` Mike Snitzer
  0 siblings, 2 replies; 24+ messages in thread
From: Milan Broz @ 2010-10-10 15:34 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Andi Kleen, device-mapper development, pedrib, linux-kernel,
	Alasdair G Kergon, Mike Snitzer

On 10/10/2010 03:08 PM, Andi Kleen wrote:
> I did it intentionally not split up because a split up is unlikely
> to be bisectable. I think there is no need for any splitups.

Shrug. The main encryption thread and ESSIV per-cpu are two separate
things from my point of view.

Anyway, the ball is on DM maintainer's playground now.

Milan

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [dm-devel] [PATCH] DM-CRYPT: Scale to multiple CPUs v3
  2010-10-10 15:34     ` Milan Broz
@ 2010-10-10 16:06       ` Andi Kleen
  2010-10-10 16:22       ` Mike Snitzer
  1 sibling, 0 replies; 24+ messages in thread
From: Andi Kleen @ 2010-10-10 16:06 UTC (permalink / raw)
  To: Milan Broz
  Cc: Andi Kleen, Andi Kleen, device-mapper development, pedrib,
	linux-kernel, Alasdair G Kergon, Mike Snitzer

On Sun, Oct 10, 2010 at 05:34:50PM +0200, Milan Broz wrote:
> On 10/10/2010 03:08 PM, Andi Kleen wrote:
> > I did it intentionally not split up because a split up is unlikely
> > to be bisectable. I think there is no need for any splitups.
> 
> Shrug. The main encryption thread and ESSIV per-cpu are two separate
> things from my point of view.

Is not obvious to me.

> 
> Anyway, the ball is on DM maintainer's playground now.

Well I was told already for 2.6.35 it was ready to merge
(and also I already have an active user base, people are using
it because it's solving a wide spread problem) 

I am frankly somewhat surprised things should be suddenly 
completely open now again and such contortions as you
did to the patch should be needed.

-Andi

-- 
ak@linux.intel.com -- Speaking for myself only.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: DM-CRYPT: Scale to multiple CPUs v3
  2010-10-10 15:34     ` Milan Broz
  2010-10-10 16:06       ` Andi Kleen
@ 2010-10-10 16:22       ` Mike Snitzer
  2010-10-10 16:41         ` Milan Broz
  2010-10-10 17:01         ` Alasdair G Kergon
  1 sibling, 2 replies; 24+ messages in thread
From: Mike Snitzer @ 2010-10-10 16:22 UTC (permalink / raw)
  To: Milan Broz
  Cc: Andi Kleen, Andi Kleen, device-mapper development, pedrib,
	linux-kernel, Alasdair G Kergon

Hi Milan,

I'll help cut through this as best I can.  I'm new to this work so I
first need to get up to speed.

I'm just providing my early thoughts below...

On Sun, Oct 10 2010 at 11:34am -0400,
Milan Broz <mbroz@redhat.com> wrote:

> On 10/10/2010 03:08 PM, Andi Kleen wrote:
> > I did it intentionally not split up because a split up is unlikely
> > to be bisectable. I think there is no need for any splitups.
> 
> Shrug. The main encryption thread and ESSIV per-cpu are two separate
> things from my point of view.

Milan, are your split patches equivalent to Andi's new single v3 patch
here?: https://patchwork.kernel.org/patch/244031/

I'd imagine there may be some differences.

> Anyway, the ball is on DM maintainer's playground now.

If there are differences then seems its not for DM maintainers to sort
this out quite yet.  You have 4 patches yet you say conceptually there
are 2 distinct changes.

At a minimum I think your patch 1 and 2 need to be merged if patch 1 on
its own results in "using one tfm is not safe, fixed by foollowing
patch."

If in the end the split patches can be made identical to Andi's v3
patch, then I'm inclined to agree that splitting the single v3 patch
makes sense: if it really is doing multiple distinct changes in one.

But any of Andi's changes in his v3 patch need to be folded back into
your split patchset.  And then your (3?) split patches need to be posted
to dm-devel with both Andi's and your Signed-off-by.

If you feel you shouldn't be doing any more to your split patches then
I'll review all of this closer tomorrow.

Thanks,
Mike

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: DM-CRYPT: Scale to multiple CPUs v3
  2010-10-10 16:22       ` Mike Snitzer
@ 2010-10-10 16:41         ` Milan Broz
  2010-10-10 17:07           ` Mike Snitzer
  2010-10-10 17:01         ` Alasdair G Kergon
  1 sibling, 1 reply; 24+ messages in thread
From: Milan Broz @ 2010-10-10 16:41 UTC (permalink / raw)
  To: Mike Snitzer
  Cc: Andi Kleen, Andi Kleen, device-mapper development, pedrib,
	linux-kernel, Alasdair G Kergon

On 10/10/2010 06:22 PM, Mike Snitzer wrote:
> If you feel you shouldn't be doing any more to your split patches then
> I'll review all of this closer tomorrow.

I think there was small bugfix in my patchset (some missing free in error path)
and I change to use generic per-cpu IV struct (not ESSIV only) -
see patch already sent here https://www.redhat.com/archives/dm-devel/2010-July/msg00118.html

Others are just small code shuffle changes (which I know agk is doing to all patches;-)

I'll send patch on top of Andi's v3 if it helps something. (When back to my devel machine).

Milan

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: DM-CRYPT: Scale to multiple CPUs v3
  2010-10-10 16:22       ` Mike Snitzer
  2010-10-10 16:41         ` Milan Broz
@ 2010-10-10 17:01         ` Alasdair G Kergon
  2010-10-10 17:44           ` Andi Kleen
  1 sibling, 1 reply; 24+ messages in thread
From: Alasdair G Kergon @ 2010-10-10 17:01 UTC (permalink / raw)
  To: Milan Broz
  Cc: Andi Kleen, Mike Snitzer, Andi Kleen, device-mapper development,
	pedrib, linux-kernel

On Sun, Oct 10, 2010 at 12:22:57PM -0400, Mike Snitzer wrote:
> this out quite yet.  You have 4 patches yet you say conceptually there
> are 2 distinct changes.
 
IOW should we end up with 2 bisectable patches here?

And the potentially-broken/poorly-performing stacked async should be
explained in comments inline perhaps if we're choosing to ignore this
apparent regression.

Alasdair

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: DM-CRYPT: Scale to multiple CPUs v3
  2010-10-10 16:41         ` Milan Broz
@ 2010-10-10 17:07           ` Mike Snitzer
  2010-10-10 18:56             ` [PATCH] Fix double free and use generic private pointer in per-cpu struct Milan Broz
  0 siblings, 1 reply; 24+ messages in thread
From: Mike Snitzer @ 2010-10-10 17:07 UTC (permalink / raw)
  To: Milan Broz
  Cc: Andi Kleen, Andi Kleen, device-mapper development, pedrib,
	linux-kernel, Alasdair G Kergon

On Sun, Oct 10 2010 at 12:41pm -0400,
Milan Broz <mbroz@redhat.com> wrote:

> On 10/10/2010 06:22 PM, Mike Snitzer wrote:
> > If you feel you shouldn't be doing any more to your split patches then
> > I'll review all of this closer tomorrow.
> 
> I think there was small bugfix in my patchset (some missing free in error path)
> and I change to use generic per-cpu IV struct (not ESSIV only) -
> see patch already sent here https://www.redhat.com/archives/dm-devel/2010-July/msg00118.html

Sure, but Andi's v3 was adjusted with "Mark per CPU crypto work queues
as CPU intensive."  Do your split patches do that?  I haven't looked at
the actual changes closely yet, just patch headers...
 
> Others are just small code shuffle changes (which I know agk is doing to all patches;-)

If this area is being actively changed behind the scenes then it is
pointless to try to make sense of this now.  As you said, Alasdair needs
to delegate and remove any doubt about where this patchset stands.

> I'll send patch on top of Andi's v3 if it helps something. (When back to my devel machine).

That'll be helpful (I'm sure Andi is interested).  In addition, updating
your split patches like I suggested in my previous mail would be great
too:
1) combine patch 1 and 2 so result bisect safe
2) fold Andi's v3 changes into the appropriate patch(es) of your split
   patchset

In the end I'd expect your (3?) split patches to be functionally
identical to Andi's v3 patch (only differences being the missing free
fix and your using per-cpu of a more generic IV struct).

So expressing your differences as a follow-on patch ontop of Andi's v3
patch would be great.  If Andi agrees with those changes then we can use
your split patches for the final commit sequence.

All patches would have both Andi's and your Signed-off-by.

Thanks,
Mike

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: DM-CRYPT: Scale to multiple CPUs v3
  2010-10-10 17:01         ` Alasdair G Kergon
@ 2010-10-10 17:44           ` Andi Kleen
  2010-10-10 18:17             ` [dm-devel] " Alasdair G Kergon
  0 siblings, 1 reply; 24+ messages in thread
From: Andi Kleen @ 2010-10-10 17:44 UTC (permalink / raw)
  To: Milan Broz, Andi Kleen, Mike Snitzer, Andi Kleen,
	device-mapper development

On Sun, Oct 10, 2010 at 06:01:51PM +0100, Alasdair G Kergon wrote:
> On Sun, Oct 10, 2010 at 12:22:57PM -0400, Mike Snitzer wrote:
> > this out quite yet.  You have 4 patches yet you say conceptually there
> > are 2 distinct changes.
>  
> IOW should we end up with 2 bisectable patches here?
> 
> And the potentially-broken/poorly-performing stacked async should be
> explained in comments inline perhaps if we're choosing to ignore this
> apparent regression.

It's not broken AFAIK and it will not perform worse than the original
single thread work queue.

-Andi

-- 
ak@linux.intel.com -- Speaking for myself only.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [dm-devel] DM-CRYPT: Scale to multiple CPUs v3
  2010-10-10 17:44           ` Andi Kleen
@ 2010-10-10 18:17             ` Alasdair G Kergon
  2010-10-10 18:48               ` Alasdair G Kergon
  2010-10-10 18:51               ` [dm-devel] " Andi Kleen
  0 siblings, 2 replies; 24+ messages in thread
From: Alasdair G Kergon @ 2010-10-10 18:17 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Milan Broz, Mike Snitzer, Andi Kleen, device-mapper development,
	pedrib, linux-kernel

On Sun, Oct 10, 2010 at 07:44:54PM +0200, Andi Kleen wrote:
> It's not broken AFAIK and it will not perform worse than the original
> single thread work queue.
 
This is the contentious code:

+       /*
+        * Since we only have a single worker per CPU in extreme
+        * cases there might be nesting (dm-crypt on another dm-crypt)
+        * To avoid deadlock run the work directly then.
+        */
+       cpu = get_cpu();   
+       if (per_cpu(io_wq_cpu, cpu) == current && !in_interrupt()) {
+               put_cpu();
+               kcryptd_io(&io->work);   
+               return;
+       }

AFAIK Nested dm-crypt is not extreme for some folk who have routine
configurations using it.  But if in_interrupt is set how does this code
work for them now compared to how it worked before?

Alasdair

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: DM-CRYPT: Scale to multiple CPUs v3
  2010-10-10 18:17             ` [dm-devel] " Alasdair G Kergon
@ 2010-10-10 18:48               ` Alasdair G Kergon
  2010-10-10 18:51               ` [dm-devel] " Andi Kleen
  1 sibling, 0 replies; 24+ messages in thread
From: Alasdair G Kergon @ 2010-10-10 18:48 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Andi Kleen, Mike Snitzer, linux-kernel, device-mapper development,
	pedrib, Milan Broz

On Sun, Oct 10, 2010 at 07:17:36PM +0100, Alasdair G Kergon wrote:
> AFAIK Nested dm-crypt is not extreme for some folk who have routine
> configurations using it.  But if in_interrupt is set how does this code
> work for them now compared to how it worked before?
 
And indeed, even if in_interrupt is not set, is there any performance
impact for this (minority) class of users?

Alasdair

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [dm-devel] DM-CRYPT: Scale to multiple CPUs v3
  2010-10-10 18:17             ` [dm-devel] " Alasdair G Kergon
  2010-10-10 18:48               ` Alasdair G Kergon
@ 2010-10-10 18:51               ` Andi Kleen
  2010-10-10 19:07                 ` Alasdair G Kergon
  1 sibling, 1 reply; 24+ messages in thread
From: Andi Kleen @ 2010-10-10 18:51 UTC (permalink / raw)
  To: Andi Kleen, Milan Broz, Mike Snitzer, Andi Kleen,
	device-mapper development

> AFAIK Nested dm-crypt is not extreme for some folk who have routine
> configurations using it.  But if in_interrupt is set how does this code
> work for them now compared to how it worked before?

Previously they will all pile up on the single worker thread.

Now in the nested case the nested work is run in their own
context. This should actually scale better because you
get parallelism too, not contention on a single resource.

The only difference to the non nested case is that the wq 
is not used.

-Andi

-- 
ak@linux.intel.com -- Speaking for myself only.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH] Fix double free and use generic private pointer in per-cpu struct
  2010-10-10 17:07           ` Mike Snitzer
@ 2010-10-10 18:56             ` Milan Broz
  2010-10-14 19:26               ` [dm-devel] " Milan Broz
  0 siblings, 1 reply; 24+ messages in thread
From: Milan Broz @ 2010-10-10 18:56 UTC (permalink / raw)
  To: Mike Snitzer
  Cc: Andi Kleen, Andi Kleen, device-mapper development, pedrib,
	linux-kernel, Alasdair G Kergon



On 10/10/2010 07:07 PM, Mike Snitzer wrote:

> I'll send patch on top of Andi's v3 if it helps something. (When back to my devel machine).
> That'll be helpful (I'm sure Andi is interested).
ok here are my changes on top of v3 - one bugfix and generic IV pointer.
If you merge it to v4 and it appears in git as one patch I have no problem with that.

Thanks,
Milan


[PATCH] Use generic private pointer in per-cpu struct

If an IV need to use per-cpu struct, it should allocate
it in its constructor and free in destructor.
(There will be possible more compatibility IVs which need per-cpu struct.)

For ESSIV, only tfm pointer is needed so use iv_private directly.

Also fix double free of salt in ESSIV IV constructor.

Signed-off-by: Milan Broz <mbroz@redhat.com>

diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 8802e1d..88a2a05 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -82,11 +82,6 @@ struct iv_essiv_private {
 	u8 *salt;
 };
 
-/* Duplicated per CPU state for cipher */
-struct iv_essiv_private_cpu {
-	struct crypto_cipher *tfm;
-};
-
 struct iv_benbi_private {
 	int shift;
 };
@@ -101,7 +96,9 @@ enum flags { DM_CRYPT_SUSPENDED, DM_CRYPT_KEY_VALID };
 struct crypt_cpu {
 	struct ablkcipher_request *req;
 	struct crypto_ablkcipher *tfm;
-	struct iv_essiv_private_cpu ie;
+
+	/* ESSIV: struct crypto_cipher *essiv_tfm */
+	void *iv_private;
 };
 
 /*
@@ -234,6 +231,8 @@ static int crypt_iv_essiv_init(struct crypt_config *cc)
 	struct iv_essiv_private *essiv = &cc->iv_gen_private.essiv;
 	struct hash_desc desc;
 	struct scatterlist sg;
+	struct crypt_cpu *cs;
+	struct crypto_cipher *essiv_tfm;
 	int err, n, cpu;
 
 	sg_init_one(&sg, cc->key, cc->key_size);
@@ -245,9 +244,10 @@ static int crypt_iv_essiv_init(struct crypt_config *cc)
 		return err;
 
 	for_each_possible_cpu (cpu) {
-		struct crypt_cpu *cs = per_cpu_ptr(cc->cpu, cpu);
+		cs = per_cpu_ptr(cc->cpu, cpu);
+		essiv_tfm = cs->iv_private,
 
-		n = crypto_cipher_setkey(cs->ie.tfm, essiv->salt,
+		n = crypto_cipher_setkey(essiv_tfm, essiv->salt,
 				    crypto_hash_digestsize(essiv->hash_tfm));
 		if (n) {
 			err = n;
@@ -263,14 +263,17 @@ static int crypt_iv_essiv_wipe(struct crypt_config *cc)
 {
 	struct iv_essiv_private *essiv = &cc->iv_gen_private.essiv;
 	unsigned salt_size = crypto_hash_digestsize(essiv->hash_tfm);
+	struct crypt_cpu *cs;
+	struct crypto_cipher *essiv_tfm;
 	int cpu, err, n;
 
 	memset(essiv->salt, 0, salt_size);
 
 	err = 0;
 	for_each_possible_cpu (cpu) {
-		struct crypt_cpu *cs = per_cpu_ptr(cc->cpu, cpu);
-		n = crypto_cipher_setkey(cs->ie.tfm, essiv->salt, salt_size);
+		cs = per_cpu_ptr(cc->cpu, cpu);
+		essiv_tfm = cs->iv_private;
+		n = crypto_cipher_setkey(essiv_tfm, essiv->salt, salt_size);
 		if (n)
 			err = n;
 	}
@@ -312,6 +315,8 @@ static struct crypto_cipher *setup_essiv_cpu(struct crypt_config *cc,
 static void crypt_iv_essiv_dtr(struct crypt_config *cc)
 {
 	int cpu;
+	struct crypt_cpu *cs;
+	struct crypto_cipher *essiv_tfm;
 	struct iv_essiv_private *essiv = &cc->iv_gen_private.essiv;
 
 	crypto_free_hash(essiv->hash_tfm);
@@ -321,11 +326,11 @@ static void crypt_iv_essiv_dtr(struct crypt_config *cc)
 	essiv->salt = NULL;
 
 	for_each_possible_cpu (cpu) {
-		struct crypt_cpu *cs = per_cpu_ptr(cc->cpu, cpu);
-		if (cs->ie.tfm) {
-			crypto_free_cipher(cs->ie.tfm);
-			cs->ie.tfm = NULL;
-		}
+		cs = per_cpu_ptr(cc->cpu, cpu);
+		essiv_tfm = cs->iv_private;
+		if (essiv_tfm)
+			crypto_free_cipher(essiv_tfm);
+		cs->iv_private = NULL;
 	}
 }
 
@@ -365,11 +370,10 @@ static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti,
 		essiv_tfm = setup_essiv_cpu(cc, ti, salt,
 					crypto_hash_digestsize(hash_tfm));
 		if (IS_ERR(essiv_tfm)) {
-			kfree(salt);
 			crypt_iv_essiv_dtr(cc);
 			return PTR_ERR(essiv_tfm);
 		}
-		per_cpu_ptr(cc->cpu, cpu)->ie.tfm = essiv_tfm;
+		per_cpu_ptr(cc->cpu, cpu)->iv_private = essiv_tfm;
 	}
 	return 0;
 
@@ -382,9 +386,11 @@ bad:
 
 static int crypt_iv_essiv_gen(struct crypt_config *cc, u8 *iv, sector_t sector)
 {
+	struct crypto_cipher *essiv_tfm = crypt_me(cc)->iv_private;
+
 	memset(iv, 0, cc->iv_size);
 	*(u64 *)iv = cpu_to_le64(sector);
-	crypto_cipher_encrypt_one(crypt_me(cc)->ie.tfm, iv, iv);
+	crypto_cipher_encrypt_one(essiv_tfm, iv, iv);
 	return 0;
 }
 

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* Re: [dm-devel] DM-CRYPT: Scale to multiple CPUs v3
  2010-10-10 18:51               ` [dm-devel] " Andi Kleen
@ 2010-10-10 19:07                 ` Alasdair G Kergon
  2010-10-10 19:16                   ` Andi Kleen
  0 siblings, 1 reply; 24+ messages in thread
From: Alasdair G Kergon @ 2010-10-10 19:07 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Milan Broz, Mike Snitzer, Andi Kleen, device-mapper development,
	pedrib, linux-kernel

> > AFAIK Nested dm-crypt is not extreme for some folk who have routine
> > configurations using it.  But if in_interrupt is set how does this code
                                     ^^^^^^^^^^^
> > work for them now compared to how it worked before?

On Sun, Oct 10, 2010 at 08:51:00PM +0200, Andi Kleen wrote:
> Previously they will all pile up on the single worker thread.
> Now in the nested case the nested work is run in their own
> context. This should actually scale better because you
> get parallelism too, not contention on a single resource.
 
Not if in_interrupt is set though?
+       if (per_cpu(io_wq_cpu, cpu) == current && !in_interrupt()) {

What I am missing here?

(And assume there is only 1 CPU too for worst case behaviour, presumably.)

Alasdair

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [dm-devel] DM-CRYPT: Scale to multiple CPUs v3
  2010-10-10 19:07                 ` Alasdair G Kergon
@ 2010-10-10 19:16                   ` Andi Kleen
  2010-10-10 19:31                     ` Milan Broz
  0 siblings, 1 reply; 24+ messages in thread
From: Andi Kleen @ 2010-10-10 19:16 UTC (permalink / raw)
  To: Andi Kleen, Milan Broz, Mike Snitzer, Andi Kleen,
	device-mapper development

> Not if in_interrupt is set though?
> +       if (per_cpu(io_wq_cpu, cpu) == current && !in_interrupt()) {
> 
> What I am missing here?

The interrupt doesn't block on the task.

Actually most likely that check isn't needed anyways because
that should not happen, was just pure paranoia from my side.

> 
> (And assume there is only 1 CPU too for worst case behaviour, presumably.)

One per process, previously it was always one per CPU.

-Andi
-- 
ak@linux.intel.com -- Speaking for myself only.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [dm-devel] DM-CRYPT: Scale to multiple CPUs v3
  2010-10-10 19:16                   ` Andi Kleen
@ 2010-10-10 19:31                     ` Milan Broz
  2010-10-10 20:20                       ` Andi Kleen
  0 siblings, 1 reply; 24+ messages in thread
From: Milan Broz @ 2010-10-10 19:31 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Mike Snitzer, Andi Kleen, device-mapper development, pedrib,
	linux-kernel

On 10/10/2010 09:16 PM, Andi Kleen wrote:
>> Not if in_interrupt is set though?
>> +       if (per_cpu(io_wq_cpu, cpu) == current && !in_interrupt()) {
>>
>> What I am missing here?
> 
> The interrupt doesn't block on the task.
> 
> Actually most likely that check isn't needed anyways because
> that should not happen, was just pure paranoia from my side.

I don't think so. If you run crypto in async mode, you get asynchronous
callback (kcryptd_asynnc_done() here).

AFAIK this callback is called in interrupt context. This callback
decreases pending counter and if it reach zero it calls
kcryptd_crypt_write_io_submit() -> kcryptd_queue_io().

You cannot call direct encryption if it is called from async callback,
so the IO must be always queued to IO workqueue for later.

So the in_interrupt() is IMHO equivalent of async flag and it is
properly placed there.

But previously, there were threads per device, so if one IO thread blocks,
others stacked mappings can continue
Now I see possibility for deadlock there because we have one io thread now
(assuming that 1 CPU situation Alasdair mentioned).

Or is there a mistake in my analysis?

> 
>>
>> (And assume there is only 1 CPU too for worst case behaviour, presumably.)
> 
> One per process, previously it was always one per CPU.

Nope, one singlethread per crypt device (resp. two: io + crypt).

Milan

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [dm-devel] DM-CRYPT: Scale to multiple CPUs v3
  2010-10-10 19:31                     ` Milan Broz
@ 2010-10-10 20:20                       ` Andi Kleen
  2010-10-11  9:32                         ` Milan Broz
  0 siblings, 1 reply; 24+ messages in thread
From: Andi Kleen @ 2010-10-10 20:20 UTC (permalink / raw)
  To: Milan Broz
  Cc: Andi Kleen, Mike Snitzer, Andi Kleen, device-mapper development,
	pedrib, linux-kernel

> But previously, there were threads per device, so if one IO thread blocks,
> others stacked mappings can continue
> Now I see possibility for deadlock there because we have one io thread now
> (assuming that 1 CPU situation Alasdair mentioned).

That path calls the crypto worker thread, not the IO worker thread?
crypto worker should be fine here, only IO worker would be a problem
I think because crypto doesn't really block on nested IO.

-Andi

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [dm-devel] DM-CRYPT: Scale to multiple CPUs v3
  2010-10-10 20:20                       ` Andi Kleen
@ 2010-10-11  9:32                         ` Milan Broz
  0 siblings, 0 replies; 24+ messages in thread
From: Milan Broz @ 2010-10-11  9:32 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Mike Snitzer, Andi Kleen, device-mapper development, pedrib,
	linux-kernel

On 10/10/2010 10:20 PM, Andi Kleen wrote:
>> But previously, there were threads per device, so if one IO thread blocks,
>> others stacked mappings can continue
>> Now I see possibility for deadlock there because we have one io thread now
>> (assuming that 1 CPU situation Alasdair mentioned).
> 
> That path calls the crypto worker thread, not the IO worker thread?
> crypto worker should be fine here, only IO worker would be a problem
> I think because crypto doesn't really block on nested IO.

Well, crypt thread can block, this is surely not what we want in async callback
and IO thread cannot call generic_make_request() from this context as well...

But the reads are not problem - system queues IO, then after completion
it calls async crypt. When async crypt is done, kcrypt_crypt_read_done()
is called - and it is safe, there is just bio_endio().

The problem is write path - it allocates bio clone, run async crypto on it
and the final callback queues cloned bio to underlying device.
Because code cannot call generic_make_request() directly here (assuming
it still runs in interrupt mode), it submits new work to io thread.

So there the code behaves the same as before - just instead of queueing into
separate per-device io workqueue we have now just one common queue...
So question is, if this is safe in all stacked situations and cannot deadlock.

Imagine you have stacked one dm-crypt device, which implements uses
alg in sync mode over another one, which run in async mode.
So the common io thread runs both encryption and make_request...
If it can lock itself here, it is regression from previous version here.

(Note I am not blocking the patch - I think this can be solved later somehow,
but either we should know about this problem or prove it is safe.
In normal (sync) mode this path is not used at all - and this is the most
common situation.)

Milan

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [dm-devel] [PATCH] Fix double free and use generic private pointer in per-cpu struct
  2010-10-10 18:56             ` [PATCH] Fix double free and use generic private pointer in per-cpu struct Milan Broz
@ 2010-10-14 19:26               ` Milan Broz
  2010-10-20 14:20                 ` [dm-devel] [PATCH] DM-CRYPT: Scale to multiple CPUs v3 Milan Broz
  0 siblings, 1 reply; 24+ messages in thread
From: Milan Broz @ 2010-10-14 19:26 UTC (permalink / raw)
  To: Andi Kleen
  Cc: device-mapper development, Mike Snitzer, linux-kernel, Andi Kleen,
	pedrib, Alasdair G Kergon

On 10/10/2010 08:56 PM, Milan Broz wrote:
> On 10/10/2010 07:07 PM, Mike Snitzer wrote:
> 
>> I'll send patch on top of Andi's v3 if it helps something. (When back to my devel machine).
>> That'll be helpful (I'm sure Andi is interested).
> ok here are my changes on top of v3 - one bugfix and generic IV pointer.
> If you merge it to v4 and it appears in git as one patch I have no problem with that.
> 
> Thanks,
> Milan
> 
> 
> [PATCH] Use generic private pointer in per-cpu struct

Hi Andi,

any progress here?
I would like to achieve some consensus and merge some version
of patch to dm tree...
Maybe with known problems if there is no better solution for now.

Thanks,
Milan

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [dm-devel] [PATCH] DM-CRYPT: Scale to multiple CPUs v3
  2010-10-14 19:26               ` [dm-devel] " Milan Broz
@ 2010-10-20 14:20                 ` Milan Broz
  2010-10-20 17:32                   ` Alasdair G Kergon
  0 siblings, 1 reply; 24+ messages in thread
From: Milan Broz @ 2010-10-20 14:20 UTC (permalink / raw)
  To: Andi Kleen
  Cc: device-mapper development, Mike Snitzer, linux-kernel, Andi Kleen,
	pedrib, Alasdair G Kergon



> any progress here?
> I would like to achieve some consensus and merge some version
> of patch to dm tree...
> Maybe with known problems if there is no better solution for now. 

I have to reply to myself:
this patch introduces apparent regressions in async crypto mode.

- Without per-cpu patch applied, the test script runs fine even
with >16 levels stacked devices, both in sync/forced async crypto

- With the per-cpu patch it seems to deadlocks if the stacked
devices level is more than 16 levels, even in crypto sync mode

- With added force async patch below it deadlocks even for
10 level stacked device (maybe even less)

Tested on VM with 256M RAM and Linus' tree from today...

But is it possible that there is another problem related.

You can easily test it using this patch, which forces dmcrypt to use
cryptd (thus async crypto):

diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 88a2a05..1d8ddc1 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -1267,7 +1267,7 @@ static int crypt_ctr_cipher(struct dm_target *ti,
 		goto bad_mem;
 
 	ret = snprintf(cipher_api, CRYPTO_MAX_ALG_NAME,
-		       "%s(%s)", chainmode, cipher);
+		       "cryptd(%s(%s-generic))", chainmode, cipher);
 	if (ret < 0) {
 		kfree(cipher_api);
 		goto bad_mem;


And this test script (maybe some parameters mangling needed):

#!/bin/bash

DEVICE=/dev/sde
NUM=10

echo xxx | cryptsetup create -c aes-xts-plain64 -s 256 CTEST1 $DEVICE
for i in $(seq 1 $NUM); do
	j=$i
	i=$(($i + 1))
	echo xxx | cryptsetup create -c aes-xts-plain64 -s 256 CTEST$i /dev/mapper/CTEST$j
done

NUM=$(($NUM + 1))

dd if=/dev/zero of=/dev/mapper/CTEST$NUM bs=512 count=102400

for i in $(seq $NUM -1 1); do
	dmsetup remove CTEST$i
done


Milan

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* Re: [dm-devel] [PATCH] DM-CRYPT: Scale to multiple CPUs v3
  2010-10-20 14:20                 ` [dm-devel] [PATCH] DM-CRYPT: Scale to multiple CPUs v3 Milan Broz
@ 2010-10-20 17:32                   ` Alasdair G Kergon
  0 siblings, 0 replies; 24+ messages in thread
From: Alasdair G Kergon @ 2010-10-20 17:32 UTC (permalink / raw)
  To: Milan Broz
  Cc: Andi Kleen, Mike Snitzer, linux-kernel, device-mapper development,
	Andi Kleen, pedrib

On Wed, Oct 20, 2010 at 04:20:44PM +0200, Milan Broz wrote:
> this patch introduces apparent regressions in async crypto mode.
 
OK, thanks for performing those tests.

This patch is stalled until these problems are understood properly and
solutions are proposed.  I was prepared to overlook the regression in
stacked 'async' provided there was an inline FIXME explaining clearly
how to fix it in future should we need to, but I can't support a patch that
apparently breaks stacked sync encryption.

One solution might be to make the proposed changes only for non-stacked
configurations and leave stacks to work as they do today - either
detecting that automatically or having userspace supply a target
parameter to say whether or not to turn it on.  (For that, would need
tests to show a stack of many layers where only one is a crypt target
continues to work after the change.)

Alasdair

^ permalink raw reply	[flat|nested] 24+ messages in thread

end of thread, other threads:[~2010-10-20 17:32 UTC | newest]

Thread overview: 24+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-10-10 11:59 [PATCH] DM-CRYPT: Scale to multiple CPUs v3 Andi Kleen
2010-10-10 12:38 ` [dm-devel] " Milan Broz
2010-10-10 12:53   ` Milan Broz
2010-10-10 13:09     ` Andi Kleen
2010-10-10 13:08   ` Andi Kleen
2010-10-10 15:34     ` Milan Broz
2010-10-10 16:06       ` Andi Kleen
2010-10-10 16:22       ` Mike Snitzer
2010-10-10 16:41         ` Milan Broz
2010-10-10 17:07           ` Mike Snitzer
2010-10-10 18:56             ` [PATCH] Fix double free and use generic private pointer in per-cpu struct Milan Broz
2010-10-14 19:26               ` [dm-devel] " Milan Broz
2010-10-20 14:20                 ` [dm-devel] [PATCH] DM-CRYPT: Scale to multiple CPUs v3 Milan Broz
2010-10-20 17:32                   ` Alasdair G Kergon
2010-10-10 17:01         ` Alasdair G Kergon
2010-10-10 17:44           ` Andi Kleen
2010-10-10 18:17             ` [dm-devel] " Alasdair G Kergon
2010-10-10 18:48               ` Alasdair G Kergon
2010-10-10 18:51               ` [dm-devel] " Andi Kleen
2010-10-10 19:07                 ` Alasdair G Kergon
2010-10-10 19:16                   ` Andi Kleen
2010-10-10 19:31                     ` Milan Broz
2010-10-10 20:20                       ` Andi Kleen
2010-10-11  9:32                         ` Milan Broz

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).