All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jeff Garzik <jgarzik@pobox.com>
To: Christophe Saout <christophe@saout.de>
Cc: Christoph Hellwig <hch@infradead.org>,
	Joe Thornber <thornber@redhat.com>,
	Mike Christie <mikenc@us.ibm.com>, Andrew Morton <akpm@osdl.org>,
	linux-kernel@vger.kernel.org
Subject: Re: dm-crypt using kthread
Date: Sun, 15 Feb 2004 21:10:20 -0500	[thread overview]
Message-ID: <4030268C.6050701@pobox.com> (raw)
In-Reply-To: <20040216014433.GA5430@leto.cs.pocnet.net>

Christophe Saout wrote:
> diff -Nur linux-2.6.3-rc3-mm1.orig/drivers/md/dm-crypt.c linux-2.6.3-rc3-mm1/drivers/md/dm-crypt.c
> --- linux-2.6.3-rc3-mm1.orig/drivers/md/dm-crypt.c	1970-01-01 01:00:00.000000000 +0100
> +++ linux-2.6.3-rc3-mm1/drivers/md/dm-crypt.c	2004-02-16 01:47:37.000000000 +0100

Overall it looks pretty nice and clean, but some issues...



> +#define MIN_IOS        256
> +#define MIN_POOL_PAGES 32
> +#define MIN_BIO_PAGES  8

> +static kmem_cache_t *_crypt_io_pool;

> +static struct bio *
> +crypt_alloc_buffer(struct crypt_config *cc, unsigned int size,
> +                   struct bio *base_bio, int *bio_vec_idx)
> +{
> +	struct bio *bio;
> +	int nr_iovecs = dm_div_up(size, PAGE_SIZE);
> +	int gfp_mask = GFP_NOIO | __GFP_HIGHMEM;
> +	int flags = current->flags;
> +	int i;
> +
> +	/*
> +	 * Tell VM to act less aggressively and fail earlier.
> +	 * This is not necessary but increases throughput.
> +	 * FIXME: Is this really intelligent?
> +	 */
> +	current->flags &= ~PF_MEMALLOC;
> +
> +	if (base_bio)
> +		bio = bio_clone(base_bio, GFP_NOIO);
> +	else
> +		bio = bio_alloc(GFP_NOIO, nr_iovecs);

Ewww.  Somebody (not you) should be thwapped for the ordering of the 
gfp_mask argument in each bio_xxx function.



> +		/*
> +		 * if additional pages cannot be allocated without waiting,
> +		 * return a partially allocated bio, the caller will then try
> +		 * to allocate additional bios while submitting this partial bio
> +		 */
> +		if ((i - bio->bi_idx) == (MIN_BIO_PAGES - 1))
> +			gfp_mask = (gfp_mask | __GFP_NOWARN) & ~__GFP_WAIT;

If the caller said they can wait, why not wait?



> +static void dec_pending(struct crypt_io *io, int error)
> +{
> +	struct crypt_config *cc = (struct crypt_config *) io->target->private;
> +
> +	if (error < 0)
> +		io->error = error;
> +
> +	if (!atomic_dec_and_test(&io->pending))
> +		return;
> +
> +	if (io->first_clone)
> +		bio_put(io->first_clone);

> +	if (io->bio)
> +		bio_endio(io->bio, io->bio->bi_size, io->error);

when does io->bio==NULL ?



> +static spinlock_t _kcryptd_lock = SPIN_LOCK_UNLOCKED;
> +static struct bio_list _kcryptd_bios;
> +
> +static struct task_struct *_kcryptd;
> +
> +/*
> + * Fetch a list of the complete bios.
> + */
> +static struct bio *kcryptd_get_bios(void)
> +{
> +	struct bio *bio;
> +
> +	spin_lock_irq(&_kcryptd_lock);
> +	bio = bio_list_get(&_kcryptd_bios);
> +	spin_unlock_irq(&_kcryptd_lock);
> +
> +	return bio;
> +}
> +
> +/*
> + * Append bio to work queue
> + */
> +static void kcryptd_queue_bio(struct bio *bio)
> +{
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(&_kcryptd_lock, flags);
> +	bio_list_add(&_kcryptd_bios, bio);
> +	spin_unlock_irqrestore(&_kcryptd_lock, flags);
> +}
> +
> +static int kcryptd_do_work(void *data)
> +{
> +	current->flags |= PF_IOTHREAD;
> +
> +	for(;;) {
> +		struct bio *bio;
> +
> +		set_task_state(current, TASK_INTERRUPTIBLE);
> +		while (!(bio = kcryptd_get_bios())) {
> +			schedule();
> +			if (signal_pending(current))
> +				return 0;
> +		}
> +		set_task_state(current, TASK_RUNNING);

You just keep calling schedule() rapid-fire until you get a bio?  That's 
a bit sub-optimal.

This seems like a semaphore is needed.


> +		while (bio) {
> +			struct crypt_io *io;
> +			struct crypt_config *cc;
> +			struct convert_context ctx;
> +			struct bio *next_bio;
> +			int r;
> +
> +			io = (struct crypt_io *) bio->bi_private;
> +			cc = (struct crypt_config *) io->target->private;
> +
> +			crypt_convert_init(cc, &ctx, io->bio, io->bio,
> +				io->bio->bi_sector - io->target->begin, 0);
> +			r = crypt_convert(cc, &ctx);
> +
> +			next_bio = bio->bi_next;
> +			bio->bi_next = NULL;
> +
> +			bio_put(bio);
> +			dec_pending(io, r);
> +
> +			bio = next_bio;
> +		}
> +	}
> +}
> +
> +/*
> + * Encode key into its hex representation
> + */
> +static void crypt_encode_key(char *hex, u8 *key, int size)
> +{
> +	static char hex_digits[] = "0123456789abcdef";

static const


> +/*
> + * Construct an encryption mapping:
> + * <cipher> <key> <iv_offset> <dev_path> <start>
> + */
> +static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
> +{
> +	struct crypt_config *cc;
> +	struct crypto_tfm *tfm;
> +	char *tmp;
> +	char *cipher;
> +	char *mode;
> +	int crypto_flags;
> +	int key_size;
> +
> +	if (argc != 5) {
> +		ti->error = "dm-crypt: Not enough arguments";
> +		return -EINVAL;
> +	}
> +
> +	tmp = argv[0];
> +	cipher = strsep(&tmp, "-");
> +	mode = strsep(&tmp, "-");
> +
> +	if (tmp)
> +		DMWARN("dm-crypt: Unexpected additional cipher options");
> +
> +	key_size = strlen(argv[1]) >> 1;
> +
> +	cc = kmalloc(sizeof(*cc) + key_size * sizeof(u8), GFP_KERNEL);
> +	if (cc == NULL) {
> +		ti->error =
> +			"dm-crypt: Cannot allocate transparent encryption context";
> +		return -ENOMEM;
> +	}
> +
> +	if (!mode || strcmp(mode, "plain") == 0)
> +		cc->iv_generator = crypt_iv_plain;
> +	else if (strcmp(mode, "ecb") == 0)
> +		cc->iv_generator = NULL;
> +	else {
> +		ti->error = "dm-crypt: Invalid chaining mode";
> +		return -EINVAL;
> +	}

memory leak on error


> +	if (cc->iv_generator)
> +		crypto_flags = CRYPTO_TFM_MODE_CBC;
> +	else
> +		crypto_flags = CRYPTO_TFM_MODE_ECB;
> +
> +	tfm = crypto_alloc_tfm(cipher, crypto_flags);
> +	if (!tfm) {
> +		ti->error = "dm-crypt: Error allocating crypto tfm";
> +		goto bad1;
> +	}
> +
> +	if (tfm->crt_u.cipher.cit_decrypt_iv && tfm->crt_u.cipher.cit_encrypt_iv)
> +		/* at least a 32 bit sector number should fit in our buffer */
> +		cc->iv_size = max(crypto_tfm_alg_ivsize(tfm), 
> +		                  (unsigned int)(sizeof(u32) / sizeof(u8)));
> +	else {
> +		cc->iv_size = 0;
> +		if (cc->iv_generator) {
> +			DMWARN("dm-crypt: Selected cipher does not support IVs");
> +			cc->iv_generator = NULL;
> +		}
> +	}
> +
> +	cc->io_pool = mempool_create(MIN_IOS, mempool_alloc_slab,
> +				     mempool_free_slab, _crypt_io_pool);
> +	if (!cc->io_pool) {
> +		ti->error = "dm-crypt: Cannot allocate crypt io mempool";
> +		goto bad2;
> +	}
> +
> +	cc->page_pool = mempool_create(MIN_POOL_PAGES, mempool_alloc_page,
> +				       mempool_free_page, NULL);
> +	if (!cc->page_pool) {
> +		ti->error = "dm-crypt: Cannot allocate page mempool";
> +		goto bad3;
> +	}
> +
> +	cc->tfm = tfm;
> +	cc->key_size = key_size;
> +	if ((key_size == 0 && strcmp(argv[1], "-") != 0)
> +	    || crypt_decode_key(cc->key, argv[1], key_size) < 0) {
> +		ti->error = "dm-crypt: Error decoding key";
> +		goto bad4;
> +	}
> +
> +	if (tfm->crt_u.cipher.cit_setkey(tfm, cc->key, key_size) < 0) {
> +		ti->error = "dm-crypt: Error setting key";
> +		goto bad4;
> +	}
> +
> +	if (sscanf(argv[2], SECTOR_FORMAT, &cc->iv_offset) != 1) {
> +		ti->error = "dm-crypt: Invalid iv_offset sector";
> +		goto bad4;
> +	}
> +
> +	if (sscanf(argv[4], SECTOR_FORMAT, &cc->start) != 1) {
> +		ti->error = "dm-crypt: Invalid device sector";
> +		goto bad4;
> +	}
> +
> +	if (dm_get_device(ti, argv[3], cc->start, ti->len,
> +	                  dm_table_get_mode(ti->table), &cc->dev)) {
> +		ti->error = "dm-crypt: Device lookup failed";
> +		goto bad4;
> +	}
> +
> +	ti->private = cc;
> +	return 0;
> +
> +bad4:
> +	mempool_destroy(cc->page_pool);
> +bad3:
> +	mempool_destroy(cc->io_pool);
> +bad2:
> +	crypto_free_tfm(tfm);
> +bad1:
> +	kfree(cc);
> +	return -EINVAL;
> +}
> +
> +static void crypt_dtr(struct dm_target *ti)
> +{
> +	struct crypt_config *cc = (struct crypt_config *) ti->private;
> +
> +	mempool_destroy(cc->page_pool);
> +	mempool_destroy(cc->io_pool);
> +
> +	crypto_free_tfm(cc->tfm);
> +	dm_put_device(ti, cc->dev);
> +	kfree(cc);
> +}
> +
> +static int crypt_endio(struct bio *bio, unsigned int done, int error)
> +{
> +	struct crypt_io *io = (struct crypt_io *) bio->bi_private;
> +	struct crypt_config *cc = (struct crypt_config *) io->target->private;
> +
> +	if (bio_rw(bio) == WRITE) {
> +		/*
> +		 * free the processed pages, even if
> +		 * it's only a partially completed write
> +		 */
> +		crypt_free_buffer_pages(cc, bio, done);
> +	}
> +
> +	if (bio->bi_size)
> +		return 1;

dumb question, for my own knowledge:  what does this 'if' test do?


> +static int crypt_map(struct dm_target *ti, struct bio *bio,
> +                     union map_info *map_context)
> +{
> +	struct crypt_config *cc = (struct crypt_config *) ti->private;
> +	struct crypt_io *io = mempool_alloc(cc->io_pool, GFP_NOIO);
> +	struct bio *clone = NULL;
> +	struct convert_context ctx;
> +	unsigned int remaining = bio->bi_size;
> +	sector_t sector = bio->bi_sector - ti->begin;
> +	int bio_vec_idx = 0;
> +	int r = 0;
> +
> +	io->target = ti;
> +	io->bio = bio;
> +	io->first_clone = NULL;
> +	io->error = 0;
> +	atomic_set(&io->pending, 1); /* hold a reference */
> +
> +	if (bio_rw(bio) == WRITE)
> +		crypt_convert_init(cc, &ctx, NULL, bio, sector, 1);
> +
> +	/*
> +	 * The allocated buffers can be smaller than the whole bio,
> +	 * so repeat the whole process until all the data can be handled.
> +	 */
> +	while (remaining) {
> +		if (bio_rw(bio) == WRITE) {
> +			clone = crypt_alloc_buffer(cc, bio->bi_size,
> +			                           io->first_clone,
> +			                           &bio_vec_idx);
> +			if (clone) {
> +				ctx.bio_out = clone;
> +				r = crypt_convert(cc, &ctx);
> +				if (r < 0) {
> +					crypt_free_buffer_pages(cc, clone,
> +					                        clone->bi_size);
> +					bio_put(clone);
> +					goto cleanup;
> +				}
> +			}
> +		} else
> +			clone = bio_clone(bio, GFP_NOIO);
> +
> +		if (!clone) {
> +			r = -ENOMEM;
> +			goto cleanup;
> +		}
> +
> +		if (!io->first_clone) {
> +			/*
> +			 * hold a reference to the first clone, because it
> +			 * holds the bio_vec array and that can't be freed
> +			 * before all other clones are released
> +			 */
> +			bio_get(clone);
> +			io->first_clone = clone;
> +		}
> +		atomic_inc(&io->pending);
> +
> +		clone->bi_private = io;
> +		clone->bi_end_io = crypt_endio;
> +		clone->bi_bdev = cc->dev->bdev;
> +		clone->bi_sector = cc->start + sector;
> +		clone->bi_rw = bio->bi_rw;
> +
> +		remaining -= clone->bi_size;
> +		sector += bio_sectors(clone);

would be nice to move this code into a separate "create and init my 
clone" function, simply to be ease review and make things a bit more 
clear...


> +		generic_make_request(clone);
> +	}
> +
> +	/* drop reference, clones could have returned before we reach this */
> +	dec_pending(io, 0);
> +	return 0;
> +
> +cleanup:
> +	if (io->first_clone) {
> +		dec_pending(io, r);
> +		return 0;
> +	}
> +
> +	/* if no bio has been dispatched yet, we can directly return the error */
> +	mempool_free(io, cc->io_pool);
> +	return r;
> +}
> +
> +static int crypt_status(struct dm_target *ti, status_type_t type,
> +			char *result, unsigned int maxlen)
> +{
> +	struct crypt_config *cc = (struct crypt_config *) ti->private;
> +	char buffer[32];
> +	const char *cipher;
> +	const char *mode = NULL;
> +	int offset;
> +
> +	switch (type) {
> +	case STATUSTYPE_INFO:
> +		result[0] = '\0';
> +		break;
> +
> +	case STATUSTYPE_TABLE:
> +		cipher = crypto_tfm_alg_name(cc->tfm);
> +
> +		switch(cc->tfm->crt_u.cipher.cit_mode) {
> +		case CRYPTO_TFM_MODE_CBC:
> +			mode = "plain";
> +			break;
> +		case CRYPTO_TFM_MODE_ECB:
> +			mode = "ecb";
> +			break;
> +		default:
> +			BUG();
> +		}
> +
> +		snprintf(result, maxlen, "%s-%s ", cipher, mode);
> +		offset = strlen(result);
> +
> +		if (cc->key_size > 0) {
> +			if ((maxlen - offset) < ((cc->key_size << 1) + 1))
> +				return -ENOMEM;
> +
> +			crypt_encode_key(result + offset, cc->key, cc->key_size);
> +			offset += cc->key_size << 1;
> +		} else {
> +			if (offset >= maxlen)
> +				return -ENOMEM;
> +			result[offset++] = '-';
> +		}
> +
> +		format_dev_t(buffer, cc->dev->bdev->bd_dev);
> +		snprintf(result + offset, maxlen - offset, " " SECTOR_FORMAT
> +		         " %s " SECTOR_FORMAT, cc->iv_offset,
> +		         buffer, cc->start);
> +		break;
> +	}
> +	return 0;
> +}
> +
> +static struct target_type crypt_target = {
> +	.name   = "crypt",
> +	.module = THIS_MODULE,
> +	.ctr    = crypt_ctr,
> +	.dtr    = crypt_dtr,
> +	.map    = crypt_map,
> +	.status = crypt_status,
> +};
> +
> +static int __init dm_crypt_init(void)
> +{
> +	int r;
> +
> +	_crypt_io_pool = kmem_cache_create("dm-crypt_io",
> +	                                   sizeof(struct crypt_io),
> +	                                   0, 0, NULL, NULL);
> +	if (!_crypt_io_pool)
> +		return -ENOMEM;
> +
> +	_kcryptd = kthread_create(kcryptd_do_work, NULL, "kcryptd");
> +	if (IS_ERR(_kcryptd)) {
> +		r = PTR_ERR(_kcryptd);
> +		DMERR("couldn't create kcryptd: %d", r);
> +		kmem_cache_destroy(_crypt_io_pool);
> +		return r;
> +	}

> +	r = dm_register_target(&crypt_target);
> +	if (r < 0) {
> +		DMERR("crypt: register failed %d", r);
> +		kthread_stop(_kcryptd);
> +		kmem_cache_destroy(_crypt_io_pool);
> +		return r;
> +	}

might want to use the standard 'goto' style exception handling here too, 
instead of duplicating the error unwind code.

Overall, needs a tiny bit of work, but I like it.

	Jeff




  parent reply	other threads:[~2004-02-16  2:10 UTC|newest]

Thread overview: 55+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2004-02-11 15:33 Oopsing cryptoapi (or loop device?) on 2.6.* Michal Kwolek
2004-02-11 18:41 ` Jari Ruusu
2004-02-15  2:35   ` Jan Rychter
2004-02-15 14:51     ` Jari Ruusu
2004-02-15 16:38       ` Jari Ruusu
2004-02-16  0:26       ` James Morris
2004-02-18 14:07         ` Bill Davidsen
2004-02-16 12:22       ` Jan Rychter
2004-02-17 14:09         ` Jari Ruusu
2004-02-17 19:14           ` Jan Rychter
2004-02-18 14:06             ` Jari Ruusu
2004-02-18 21:40               ` Jan Rychter
2004-02-19 13:34                 ` Jari Ruusu
2004-02-11 22:54 ` bill davidsen
2004-02-15 17:34 ` Christophe Saout
2004-02-15 18:02   ` Christoph Hellwig
2004-02-15 18:42     ` Christophe Saout
2004-02-15 18:53       ` Christoph Hellwig
2004-02-15 19:36         ` Christophe Saout
2004-02-15 19:46           ` Christoph Hellwig
2004-02-15 20:24             ` kthread vs. dm-daemon (was: Oopsing cryptoapi (or loop device?) on 2.6.*) Christophe Saout
2004-02-15 22:13               ` kthread vs. dm-daemon Mike Christie
2004-02-16  0:04                 ` Christophe Saout
2004-02-16  1:04                   ` Mike Christie
2004-02-16  1:29                     ` Christophe Saout
2004-02-16  3:02               ` kthread vs. dm-daemon (was: Oopsing cryptoapi (or loop device?) on 2.6.*) Rusty Russell
2004-02-16 13:27                 ` Christophe Saout
2004-02-16 16:42                   ` Christophe Saout
2004-02-16 13:48                 ` Joe Thornber
2004-02-16  1:44             ` dm-crypt using kthread " Christophe Saout
2004-02-16  1:53               ` Andrew Morton
2004-02-16  2:07                 ` Grzegorz Kulewski
2004-02-16  3:03                   ` Christophe Saout
2004-02-16  3:22                     ` Grzegorz Kulewski
2004-02-16  4:05                       ` dm-crypt using kthread Jeff Garzik
2004-02-16  4:14                         ` Grzegorz Kulewski
2004-02-16 10:15                           ` Christophe Saout
2004-02-16  9:54                       ` dm-crypt using kthread (was: Oopsing cryptoapi (or loop device?) on 2.6.*) Christophe Saout
2004-03-01 22:18                     ` Matthias Urlichs
2004-03-01 22:51                       ` Christophe Saout
2004-03-01 23:22                         ` Matthias Urlichs
2004-02-16  2:58                 ` Christophe Saout
2004-02-16  7:28                   ` David Wagner
2004-02-16 10:11                     ` Christophe Saout
2004-02-18 14:15                 ` dm-crypt using kthread Bill Davidsen
2004-02-16  2:07               ` dm-crypt using kthread (was: Oopsing cryptoapi (or loop device?) on 2.6.*) Andrew Morton
2004-02-16  2:17                 ` dm-crypt using kthread Jeff Garzik
2004-02-16  2:53                 ` dm-crypt using kthread (was: Oopsing cryptoapi (or loop device?) on 2.6.*) Christophe Saout
2004-02-16  2:10               ` Jeff Garzik [this message]
2004-02-16  2:40                 ` dm-crypt using kthread Christophe Saout
2004-02-16  2:58                   ` Jeff Garzik
2004-02-16  3:10                     ` Christophe Saout
2004-02-16 13:04                     ` Christophe Saout
2004-02-16 19:09                       ` Jeff Garzik
     [not found] <1o4ML-V4-5@gated-at.bofh.it>
     [not found] ` <1pypu-2eR-25@gated-at.bofh.it>
     [not found]   ` <1pySs-2Hu-13@gated-at.bofh.it>
     [not found]     ` <1pzve-3a8-21@gated-at.bofh.it>
     [not found]       ` <1pzEW-3hA-11@gated-at.bofh.it>
     [not found]         ` <1pAhG-3RJ-29@gated-at.bofh.it>
     [not found]           ` <1pArj-3ZE-31@gated-at.bofh.it>
     [not found]             ` <1pG3C-kZ-9@gated-at.bofh.it>
     [not found]               ` <1pGdl-qX-11@gated-at.bofh.it>
     [not found]                 ` <1pGn1-E8-23@gated-at.bofh.it>
     [not found]                   ` <1pHj1-1q2-11@gated-at.bofh.it>
2004-02-16  3:58                     ` Andi Kleen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4030268C.6050701@pobox.com \
    --to=jgarzik@pobox.com \
    --cc=akpm@osdl.org \
    --cc=christophe@saout.de \
    --cc=hch@infradead.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mikenc@us.ibm.com \
    --cc=thornber@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.