public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] loop.c patches, take two
  2003-11-02 20:46           ` Ben Slusky
@ 2003-12-21 19:55             ` Ben Slusky
  2003-12-21 20:07               ` Ben Slusky
                                 ` (3 more replies)
  0 siblings, 4 replies; 12+ messages in thread
From: Ben Slusky @ 2003-12-21 19:55 UTC (permalink / raw)
  To: linux-kernel; +Cc: Andrew Morton, jariruusu

[-- Attachment #1: Type: text/plain, Size: 1185 bytes --]

Hi everybody,

Well, it appears that neither my loop.c patches nor Andrew's were merged
into 2.6.0... I'd request that my patches be merged into mainline,
since Jari Ruusu has pointed out that Andrew's patch (which removes the
separate code path for block-backed loop devices) will break journaling
filesystems on loop devices. Right now, journaling FS's on file-backed
loop devices are kinda iffy (they will work only if the underlying FS is
also journaled, with the correct journal options) but journaling FS's
on block-backed loop devices work perfectly. Andrew's patches would
break this.

This first patch improves the memory allocation technique used by
block-backed loop devices. Specifically the loop device will make
efficient use of whatever pages it can get, rather than throwing them all
back in case it didn't get as many as it wanted -- this fixes Bugzilla
bug #1198.

Taken together, this patch and the following patch make loop devices
safe for use as swap. Please apply.

-
-Ben


-- 
Ben Slusky                      | "I have a cunning plan! 
sluskyb@paranoiacs.org          |  RUN!"
sluskyb@stwing.org              |       -Spider Jerusalem
PGP keyID ADA44B3B      

[-- Attachment #2: loop-recycle-mk2.diff --]
[-- Type: text/plain, Size: 7985 bytes --]

diff -pu linux-2.6.0/drivers/block/loop.c-orig linux-2.6.0/drivers/block/loop.c
--- linux-2.6.0/drivers/block/loop.c-orig	2003-12-20 21:46:53.430260696 -0500
+++ linux-2.6.0/drivers/block/loop.c	2003-12-20 21:47:11.743476664 -0500
@@ -247,12 +247,10 @@ fail:
 static int
 lo_send(struct loop_device *lo, struct bio *bio, int bsize, loff_t pos)
 {
-	unsigned vecnr;
-	int ret = 0;
-
-	for (vecnr = 0; vecnr < bio->bi_vcnt; vecnr++) {
-		struct bio_vec *bvec = &bio->bi_io_vec[vecnr];
+	struct bio_vec *bvec;
+	int i, ret = 0;
 
+	bio_for_each_segment(bvec, bio, i) {
 		ret = do_lo_send(lo, bvec, bsize, pos);
 		if (ret < 0)
 			break;
@@ -318,12 +316,10 @@ do_lo_receive(struct loop_device *lo,
 static int
 lo_receive(struct loop_device *lo, struct bio *bio, int bsize, loff_t pos)
 {
-	unsigned vecnr;
-	int ret = 0;
-
-	for (vecnr = 0; vecnr < bio->bi_vcnt; vecnr++) {
-		struct bio_vec *bvec = &bio->bi_io_vec[vecnr];
+	struct bio_vec *bvec;
+	int i, ret = 0;
 
+	bio_for_each_segment(bvec, bio, i) {
 		ret = do_lo_receive(lo, bvec, bsize, pos);
 		if (ret < 0)
 			break;
@@ -353,10 +349,12 @@ static void loop_put_buffer(struct bio *
 	 * check bi_end_io, may just be a remapped bio
 	 */
 	if (bio && bio->bi_end_io == loop_end_io_transfer) {
+		struct bio_vec *bv;
 		int i;
 
-		for (i = 0; i < bio->bi_vcnt; i++)
-			__free_page(bio->bi_io_vec[i].bv_page);
+		if (!bio_flagged(bio, BIO_CLONED))
+			bio_for_each_segment(bv, bio, i)
+				__free_page(bv->bv_page);
 
 		bio_put(bio);
 	}
@@ -413,7 +411,7 @@ static int loop_end_io_transfer(struct b
 	if (bio->bi_size)
 		return 1;
 
-	if (err || bio_rw(bio) == WRITE) {
+	if (err || (bio_rw(bio) == WRITE && bio->bi_vcnt == rbh->bi_vcnt)) {
 		bio_endio(rbh, rbh->bi_size, err);
 		if (atomic_dec_and_test(&lo->lo_pending))
 			up(&lo->lo_bh_mutex);
@@ -430,35 +428,41 @@ static struct bio *loop_copy_bio(struct 
 	struct bio_vec *bv;
 	int i;
 
-	bio = bio_alloc(__GFP_NOWARN, rbh->bi_vcnt);
+	if (bio_rw(rbh) != WRITE) {
+		bio = bio_clone(rbh, GFP_NOIO);
+		return bio;
+	}
+
+	bio = bio_alloc(GFP_NOIO, rbh->bi_vcnt);
 	if (!bio)
 		return NULL;
 
 	/*
 	 * iterate iovec list and alloc pages
 	 */
-	__bio_for_each_segment(bv, rbh, i, 0) {
+	bio_for_each_segment(bv, rbh, i) {
 		struct bio_vec *bbv = &bio->bi_io_vec[i];
 
-		bbv->bv_page = alloc_page(__GFP_NOWARN|__GFP_HIGHMEM);
+		/* We need one page; the rest we can live without */
+		bbv->bv_page = alloc_page((bio->bi_vcnt ? __GFP_NOWARN : GFP_NOIO) | __GFP_HIGHMEM);
 		if (bbv->bv_page == NULL)
-			goto oom;
+			break;
 
-		bbv->bv_len = bv->bv_len;
 		bbv->bv_offset = bv->bv_offset;
+		bio->bi_size += (bbv->bv_len = bv->bv_len);
+		bio->bi_vcnt++;
 	}
 
-	bio->bi_vcnt = rbh->bi_vcnt;
-	bio->bi_size = rbh->bi_size;
-
-	return bio;
+	/* Can't get anything done if we didn't get any pages */
+	if (unlikely(!bio->bi_vcnt)) {
+		bio_put(bio);
+		return NULL;
+	}
 
-oom:
-	while (--i >= 0)
-		__free_page(bio->bi_io_vec[i].bv_page);
+	bio->bi_vcnt += (bio->bi_idx = rbh->bi_idx);
+	bio->bi_rw = rbh->bi_rw;
 
-	bio_put(bio);
-	return NULL;
+	return bio;
 }
 
 static struct bio *loop_get_buffer(struct loop_device *lo, struct bio *rbh)
@@ -479,19 +483,85 @@ static struct bio *loop_get_buffer(struc
 		if (flags & PF_MEMALLOC)
 			current->flags |= PF_MEMALLOC;
 
-		if (bio == NULL)
+		if (unlikely(bio == NULL)) {
+			printk("loop: alloc failed\n");
 			blk_congestion_wait(WRITE, HZ/10);
+		}
 	} while (bio == NULL);
 
 	bio->bi_end_io = loop_end_io_transfer;
 	bio->bi_private = rbh;
 	bio->bi_sector = rbh->bi_sector + (lo->lo_offset >> 9);
-	bio->bi_rw = rbh->bi_rw;
 	bio->bi_bdev = lo->lo_device;
 
 	return bio;
 }
 
+static void loop_recycle_buffer(struct loop_device *lo, struct bio *bio)
+{
+	struct bio *rbh = bio->bi_private;
+	struct bio_vec *bv, *bbv, *rbv;
+	int i, flags, nvecs = bio->bi_vcnt - bio->bi_idx;
+
+	/*
+	 * Comments in ll_rw_blk.c reserve for generic_make_request the right to
+	 * "change bi_dev and bi_sector for remaps as it sees fit." Doh!
+	 * Workaround: reset the bi_bdev and recompute the starting sector for
+	 * the next write.
+	 */
+	bio->bi_bdev = lo->lo_device;
+	bio->bi_sector = rbh->bi_sector + (lo->lo_offset >> 9);
+	/* Clean up the flags too */
+	bio->bi_flags &= (~(BIO_POOL_MASK - 1) | (1 << BIO_UPTODATE));
+
+	/*
+	 * Move the allocated pages into position to transfer more data.
+	 */
+	__bio_for_each_segment(bv, bio, i, rbh->bi_idx) {
+		rbv = &rbh->bi_io_vec[i];
+		bbv = bv + nvecs;
+
+		/* Workaround -- see note above */
+		bio->bi_sector += rbv->bv_len >> 9;
+		if (i < bio->bi_idx)
+			continue;
+
+		if (i + nvecs < rbh->bi_vcnt) {
+			bbv->bv_page = bv->bv_page;
+			bbv->bv_offset = rbv->bv_offset;
+			bio->bi_size += (bbv->bv_len = rbv->bv_len);
+		} else
+			__free_page(bv->bv_page);
+		memset(bv, 0, sizeof(*bv));
+	}
+
+	bio->bi_idx = bio->bi_vcnt;
+	bio->bi_vcnt += nvecs;
+	bio->bi_vcnt = min(bio->bi_vcnt, rbh->bi_vcnt);
+
+	/*
+	 * If we need more pages, try to get some.
+	 * Clear PF_MEMALLOC to avoid consuming all available memory.
+	 */
+	flags = current->flags;
+	current->flags &= ~PF_MEMALLOC;
+
+	__bio_for_each_segment(rbv, rbh, i, bio->bi_vcnt) {
+		bv = &bio->bi_io_vec[i];
+
+		bv->bv_page = alloc_page(__GFP_NOWARN|__GFP_HIGHMEM);
+		if (bv->bv_page == NULL)
+			break;
+
+		bv->bv_offset = rbv->bv_offset;
+		bio->bi_size += (bv->bv_len = rbv->bv_len);
+		bio->bi_vcnt++;
+	}
+
+	if (flags & PF_MEMALLOC)
+		current->flags |= PF_MEMALLOC;
+}
+
 static int loop_transfer_bio(struct loop_device *lo,
 			     struct bio *to_bio, struct bio *from_bio)
 {
@@ -502,17 +572,19 @@ static int loop_transfer_bio(struct loop
 
 	IV = from_bio->bi_sector + (lo->lo_offset >> 9);
 
-	__bio_for_each_segment(from_bvec, from_bio, i, 0) {
-		to_bvec = &to_bio->bi_io_vec[i];
+	__bio_for_each_segment(to_bvec, to_bio, i, from_bio->bi_idx) {
+		from_bvec = &from_bio->bi_io_vec[i];
 
-		kmap(from_bvec->bv_page);
-		kmap(to_bvec->bv_page);
-		vfrom = page_address(from_bvec->bv_page) + from_bvec->bv_offset;
-		vto = page_address(to_bvec->bv_page) + to_bvec->bv_offset;
-		ret |= lo_do_transfer(lo, bio_data_dir(to_bio), vto, vfrom,
-					from_bvec->bv_len, IV);
-		kunmap(from_bvec->bv_page);
-		kunmap(to_bvec->bv_page);
+		if (i >= to_bio->bi_idx) {
+			kmap(from_bvec->bv_page);
+			kmap(to_bvec->bv_page);
+			vfrom = page_address(from_bvec->bv_page) + from_bvec->bv_offset;
+			vto = page_address(to_bvec->bv_page) + to_bvec->bv_offset;
+			ret |= lo_do_transfer(lo, bio_data_dir(to_bio), vto, vfrom,
+						from_bvec->bv_len, IV);
+			kunmap(from_bvec->bv_page);
+			kunmap(to_bvec->bv_page);
+		}
 		IV += from_bvec->bv_len >> 9;
 	}
 
@@ -579,16 +651,30 @@ inactive:
 static inline void loop_handle_bio(struct loop_device *lo, struct bio *bio)
 {
 	int ret;
+	struct bio *rbh;
 
-	/*
-	 * For block backed loop, we know this is a READ
-	 */
 	if (lo->lo_flags & LO_FLAGS_DO_BMAP) {
 		ret = do_bio_filebacked(lo, bio);
 		bio_endio(bio, bio->bi_size, ret);
-	} else {
-		struct bio *rbh = bio->bi_private;
+	} else if (bio_rw(bio) == WRITE) {
+		/*
+		 * Write complete, but more pages remain;
+		 * encrypt and write some more pages
+		 */
+		loop_recycle_buffer(lo, bio);
 
+		rbh = bio->bi_private;
+		if ((ret = loop_transfer_bio(lo, bio, rbh))) {
+			bio_endio(bio, bio->bi_size, ret);
+			return;
+		}
+
+		generic_make_request(bio);
+	} else {
+		/*
+		 * Read complete; do decryption now
+		 */
+		rbh = bio->bi_private;
 		ret = loop_transfer_bio(lo, bio, rbh);
 
 		bio_endio(rbh, rbh->bi_size, ret);
@@ -606,6 +692,7 @@ static int loop_thread(void *data)
 {
 	struct loop_device *lo = data;
 	struct bio *bio;
+	int x;
 
 	daemonize("loop%d", lo->lo_number);
 
@@ -640,7 +727,11 @@ static int loop_thread(void *data)
 			printk("loop: missing bio\n");
 			continue;
 		}
+
+		x = (lo->lo_flags & LO_FLAGS_DO_BMAP) || bio_rw(bio) != WRITE;
 		loop_handle_bio(lo, bio);
+		if (!x)
+			continue;
 
 		/*
 		 * upped both for pending work and tear-down, lo_pending

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] loop.c patches, take two
  2003-12-21 19:55             ` [PATCH] loop.c patches, take two Ben Slusky
@ 2003-12-21 20:07               ` Ben Slusky
  2003-12-21 20:49               ` Mika Penttilä
                                 ` (2 subsequent siblings)
  3 siblings, 0 replies; 12+ messages in thread
From: Ben Slusky @ 2003-12-21 20:07 UTC (permalink / raw)
  To: linux-kernel; +Cc: Andrew Morton, jariruusu

[-- Attachment #1: Type: text/plain, Size: 640 bytes --]

This patch will localize (and atomicize) the kmaps in loop.c, moving
them from do_lo_send, do_lo_receive, and loop_transfer_bio into the
transform functions. The effect is to eliminate the page,offset ->
vaddr -> page,offset -> vaddr back-and-forth in cryptoloop devices.

I've incorporated Andrew's corrections.

Taken together, the preceding patch and this patch make loop devices
safe for use as swap. Please apply.

-
-Ben


-- 
Ben Slusky                      | Integrity is the key. Once you
sluskyb@paranoiacs.org          | can fake that...
sluskyb@stwing.org              |                -Simon Travaglia
PGP keyID ADA44B3B      

[-- Attachment #2: loop-highmem-mk2.diff --]
[-- Type: text/plain, Size: 12398 bytes --]

diff -pu linux-2.6.0/include/linux/loop.h-orig linux-2.6.0/include/linux/loop.h
--- linux-2.6.0/include/linux/loop.h-orig	2003-12-20 21:36:18.579772640 -0500
+++ linux-2.6.0/include/linux/loop.h	2003-12-20 21:37:22.086118208 -0500
@@ -34,8 +34,9 @@ struct loop_device {
 	loff_t		lo_sizelimit;
 	int		lo_flags;
 	int		(*transfer)(struct loop_device *, int cmd,
-				    char *raw_buf, char *loop_buf, int size,
-				    sector_t real_block);
+				    struct page *raw_page, unsigned raw_off,
+				    struct page *loop_page, unsigned loop_off,
+				    int size, sector_t real_block);
 	char		lo_file_name[LO_NAME_SIZE];
 	char		lo_crypt_name[LO_NAME_SIZE];
 	char		lo_encrypt_key[LO_KEY_SIZE];
@@ -128,8 +129,10 @@ struct loop_info64 {
 /* Support for loadable transfer modules */
 struct loop_func_table {
 	int number;	/* filter type */ 
-	int (*transfer)(struct loop_device *lo, int cmd, char *raw_buf,
-			char *loop_buf, int size, sector_t real_block);
+	int (*transfer)(struct loop_device *lo, int cmd,
+			struct page *raw_page, unsigned raw_off,
+			struct page *loop_page, unsigned loop_off,
+			int size, sector_t real_block);
 	int (*init)(struct loop_device *, const struct loop_info64 *); 
 	/* release is called from loop_unregister_transfer or clr_fd */
 	int (*release)(struct loop_device *); 
diff -pu linux-2.6.0/drivers/block/loop.c-orig linux-2.6.0/drivers/block/loop.c
--- linux-2.6.0/drivers/block/loop.c-orig	2003-12-20 21:36:18.614767320 -0500
+++ linux-2.6.0/drivers/block/loop.c	2003-12-20 21:37:42.488016648 -0500
@@ -75,24 +75,34 @@ static struct gendisk **disks;
 /*
  * Transfer functions
  */
-static int transfer_none(struct loop_device *lo, int cmd, char *raw_buf,
-			 char *loop_buf, int size, sector_t real_block)
+static int transfer_none(struct loop_device *lo, int cmd,
+			 struct page *raw_page, unsigned raw_off,
+			 struct page *loop_page, unsigned loop_off,
+			 int size, sector_t real_block)
 {
-	if (raw_buf != loop_buf) {
-		if (cmd == READ)
-			memcpy(loop_buf, raw_buf, size);
-		else
-			memcpy(raw_buf, loop_buf, size);
-	}
+	char *raw_buf = kmap_atomic(raw_page, KM_USER0) + raw_off;
+	char *loop_buf = kmap_atomic(loop_page, KM_USER1) + loop_off;
 
+	if (cmd == READ)
+		memcpy(loop_buf, raw_buf, size);
+	else
+		memcpy(raw_buf, loop_buf, size);
+
+	kunmap_atomic(raw_buf, KM_USER0);
+	kunmap_atomic(loop_buf, KM_USER1);
+	cond_resched();
 	return 0;
 }
 
-static int transfer_xor(struct loop_device *lo, int cmd, char *raw_buf,
-			char *loop_buf, int size, sector_t real_block)
-{
-	char	*in, *out, *key;
-	int	i, keysize;
+static int transfer_xor(struct loop_device *lo, int cmd,
+			struct page *raw_page, unsigned raw_off,
+			struct page *loop_page, unsigned loop_off,
+			int size, sector_t real_block)
+{
+	char *raw_buf = kmap_atomic(raw_page, KM_USER0) + raw_off;
+	char *loop_buf = kmap_atomic(loop_page, KM_USER1) + loop_off;
+	char *in, *out, *key;
+	int i, keysize;
 
 	if (cmd == READ) {
 		in = raw_buf;
@@ -106,6 +116,10 @@ static int transfer_xor(struct loop_devi
 	keysize = lo->lo_encrypt_key_size;
 	for (i = 0; i < size; i++)
 		*out++ = *in++ ^ key[(i & 511) % keysize];
+
+	kunmap_atomic(raw_buf, KM_USER0);
+	kunmap_atomic(loop_buf, KM_USER1);
+	cond_resched();
 	return 0;
 }
 
@@ -162,13 +176,15 @@ figure_loop_size(struct loop_device *lo)
 }
 
 static inline int
-lo_do_transfer(struct loop_device *lo, int cmd, char *rbuf,
-	       char *lbuf, int size, sector_t rblock)
+lo_do_transfer(struct loop_device *lo, int cmd,
+	       struct page *rpage, unsigned roffs,
+	       struct page *lpage, unsigned loffs,
+	       int size, sector_t rblock)
 {
 	if (!lo->transfer)
 		return 0;
 
-	return lo->transfer(lo, cmd, rbuf, lbuf, size, rblock);
+	return lo->transfer(lo, cmd, rpage, roffs, lpage, loffs, size, rblock);
 }
 
 static int
@@ -178,16 +194,15 @@ do_lo_send(struct loop_device *lo, struc
 	struct address_space *mapping = file->f_dentry->d_inode->i_mapping;
 	struct address_space_operations *aops = mapping->a_ops;
 	struct page *page;
-	char *kaddr, *data;
 	pgoff_t index;
-	unsigned size, offset;
+	unsigned size, offset, bv_offs;
 	int len;
 	int ret = 0;
 
 	down(&mapping->host->i_sem);
 	index = pos >> PAGE_CACHE_SHIFT;
 	offset = pos & ((pgoff_t)PAGE_CACHE_SIZE - 1);
-	data = kmap(bvec->bv_page) + bvec->bv_offset;
+	bv_offs = bvec->bv_offset;
 	len = bvec->bv_len;
 	while (len > 0) {
 		sector_t IV;
@@ -204,25 +219,28 @@ do_lo_send(struct loop_device *lo, struc
 			goto fail;
 		if (aops->prepare_write(file, page, offset, offset+size))
 			goto unlock;
-		kaddr = kmap(page);
-		transfer_result = lo_do_transfer(lo, WRITE, kaddr + offset,
-						 data, size, IV);
+		transfer_result = lo_do_transfer(lo, WRITE, page, offset,
+						 bvec->bv_page, bv_offs,
+						 size, IV);
 		if (transfer_result) {
+			char *kaddr;
+
 			/*
 			 * The transfer failed, but we still write the data to
 			 * keep prepare/commit calls balanced.
 			 */
 			printk(KERN_ERR "loop: transfer error block %llu\n",
 			       (unsigned long long)index);
+			kaddr = kmap_atomic(page, KM_USER0);
 			memset(kaddr + offset, 0, size);
+			kunmap_atomic(kaddr, KM_USER0);
 		}
 		flush_dcache_page(page);
-		kunmap(page);
 		if (aops->commit_write(file, page, offset, offset+size))
 			goto unlock;
 		if (transfer_result)
 			goto unlock;
-		data += size;
+		bv_offs += size;
 		len -= size;
 		offset = 0;
 		index++;
@@ -232,7 +250,6 @@ do_lo_send(struct loop_device *lo, struc
 	}
 	up(&mapping->host->i_sem);
 out:
-	kunmap(bvec->bv_page);
 	return ret;
 
 unlock:
@@ -261,7 +278,8 @@ lo_send(struct loop_device *lo, struct b
 
 struct lo_read_data {
 	struct loop_device *lo;
-	char *data;
+	struct page *page;
+	unsigned offset;
 	int bsize;
 };
 
@@ -269,7 +287,6 @@ static int
 lo_read_actor(read_descriptor_t *desc, struct page *page,
 	      unsigned long offset, unsigned long size)
 {
-	char *kaddr;
 	unsigned long count = desc->count;
 	struct lo_read_data *p = (struct lo_read_data*)desc->buf;
 	struct loop_device *lo = p->lo;
@@ -280,18 +297,16 @@ lo_read_actor(read_descriptor_t *desc, s
 	if (size > count)
 		size = count;
 
-	kaddr = kmap(page);
-	if (lo_do_transfer(lo, READ, kaddr + offset, p->data, size, IV)) {
+	if (lo_do_transfer(lo, READ, page, offset, p->page, p->offset, size, IV)) {
 		size = 0;
 		printk(KERN_ERR "loop: transfer error block %ld\n",
 		       page->index);
 		desc->error = -EINVAL;
 	}
-	kunmap(page);
 	
 	desc->count = count - size;
 	desc->written += size;
-	p->data += size;
+	p->offset += size;
 	return size;
 }
 
@@ -304,12 +319,12 @@ do_lo_receive(struct loop_device *lo,
 	int retval;
 
 	cookie.lo = lo;
-	cookie.data = kmap(bvec->bv_page) + bvec->bv_offset;
+	cookie.page = bvec->bv_page;
+	cookie.offset = bvec->bv_offset;
 	cookie.bsize = bsize;
 	file = lo->lo_backing_file;
 	retval = file->f_op->sendfile(file, &pos, bvec->bv_len,
 			lo_read_actor, &cookie);
-	kunmap(bvec->bv_page);
 	return (retval < 0)? retval: 0;
 }
 
@@ -567,23 +582,17 @@ static int loop_transfer_bio(struct loop
 {
 	sector_t IV;
 	struct bio_vec *from_bvec, *to_bvec;
-	char *vto, *vfrom;
 	int ret = 0, i;
 
 	IV = from_bio->bi_sector + (lo->lo_offset >> 9);
 
 	__bio_for_each_segment(to_bvec, to_bio, i, from_bio->bi_idx) {
 		from_bvec = &from_bio->bi_io_vec[i];
-
 		if (i >= to_bio->bi_idx) {
-			kmap(from_bvec->bv_page);
-			kmap(to_bvec->bv_page);
-			vfrom = page_address(from_bvec->bv_page) + from_bvec->bv_offset;
-			vto = page_address(to_bvec->bv_page) + to_bvec->bv_offset;
-			ret |= lo_do_transfer(lo, bio_data_dir(to_bio), vto, vfrom,
-						from_bvec->bv_len, IV);
-			kunmap(from_bvec->bv_page);
-			kunmap(to_bvec->bv_page);
+			ret |= lo_do_transfer(lo, bio_data_dir(to_bio),
+				      to_bvec->bv_page, to_bvec->bv_offset,
+				      from_bvec->bv_page, from_bvec->bv_offset,
+				      from_bvec->bv_len, IV);
 		}
 		IV += from_bvec->bv_len >> 9;
 	}
diff -pu linux-2.6.0/drivers/block/cryptoloop.c-orig linux-2.6.0/drivers/block/cryptoloop.c
--- linux-2.6.0/drivers/block/cryptoloop.c-orig	2003-12-20 21:36:18.630764888 -0500
+++ linux-2.6.0/drivers/block/cryptoloop.c	2003-12-20 21:37:22.130111520 -0500
@@ -87,43 +87,49 @@ typedef int (*encdec_ecb_t)(struct crypt
 
 
 static int
-cryptoloop_transfer_ecb(struct loop_device *lo, int cmd, char *raw_buf,
-		     char *loop_buf, int size, sector_t IV)
+cryptoloop_transfer_ecb(struct loop_device *lo, int cmd,
+			struct page *raw_page, unsigned raw_off,
+			struct page *loop_page, unsigned loop_off,
+			int size, sector_t IV)
 {
 	struct crypto_tfm *tfm = (struct crypto_tfm *) lo->key_data;
 	struct scatterlist sg_out = { 0, };
 	struct scatterlist sg_in = { 0, };
 
 	encdec_ecb_t encdecfunc;
-	char const *in;
-	char *out;
+	struct page *in_page, *out_page;
+	unsigned in_offs, out_offs;
 
 	if (cmd == READ) {
-		in = raw_buf;
-		out = loop_buf;
+		in_page = raw_page;
+		in_offs = raw_off;
+		out_page = loop_page;
+		out_offs = loop_off;
 		encdecfunc = tfm->crt_u.cipher.cit_decrypt;
 	} else {
-		in = loop_buf;
-		out = raw_buf;
+		in_page = loop_page;
+		in_offs = loop_off;
+		out_page = raw_page;
+		out_offs = raw_off;
 		encdecfunc = tfm->crt_u.cipher.cit_encrypt;
 	}
 
 	while (size > 0) {
 		const int sz = min(size, LOOP_IV_SECTOR_SIZE);
 
-		sg_in.page = virt_to_page(in);
-		sg_in.offset = (unsigned long)in & ~PAGE_MASK;
+		sg_in.page = in_page;
+		sg_in.offset = in_offs;
 		sg_in.length = sz;
 
-		sg_out.page = virt_to_page(out);
-		sg_out.offset = (unsigned long)out & ~PAGE_MASK;
+		sg_out.page = out_page;
+		sg_out.offset = out_offs;
 		sg_out.length = sz;
 
 		encdecfunc(tfm, &sg_out, &sg_in, sz);
 
 		size -= sz;
-		in += sz;
-		out += sz;
+		in_offs += sz;
+		out_offs += sz;
 	}
 
 	return 0;
@@ -135,24 +141,30 @@ typedef int (*encdec_cbc_t)(struct crypt
 			unsigned int nsg, u8 *iv);
 
 static int
-cryptoloop_transfer_cbc(struct loop_device *lo, int cmd, char *raw_buf,
-		     char *loop_buf, int size, sector_t IV)
+cryptoloop_transfer_cbc(struct loop_device *lo, int cmd,
+			struct page *raw_page, unsigned raw_off,
+			struct page *loop_page, unsigned loop_off,
+			int size, sector_t IV)
 {
 	struct crypto_tfm *tfm = (struct crypto_tfm *) lo->key_data;
 	struct scatterlist sg_out = { 0, };
 	struct scatterlist sg_in = { 0, };
 
 	encdec_cbc_t encdecfunc;
-	char const *in;
-	char *out;
+	struct page *in_page, *out_page;
+	unsigned in_offs, out_offs;
 
 	if (cmd == READ) {
-		in = raw_buf;
-		out = loop_buf;
+		in_page = raw_page;
+		in_offs = raw_off;
+		out_page = loop_page;
+		out_offs = loop_off;
 		encdecfunc = tfm->crt_u.cipher.cit_decrypt_iv;
 	} else {
-		in = loop_buf;
-		out = raw_buf;
+		in_page = loop_page;
+		in_offs = loop_off;
+		out_page = raw_page;
+		out_offs = raw_off;
 		encdecfunc = tfm->crt_u.cipher.cit_encrypt_iv;
 	}
 
@@ -161,39 +173,43 @@ cryptoloop_transfer_cbc(struct loop_devi
 		u32 iv[4] = { 0, };
 		iv[0] = cpu_to_le32(IV & 0xffffffff);
 
-		sg_in.page = virt_to_page(in);
-		sg_in.offset = offset_in_page(in);
+		sg_in.page = in_page;
+		sg_in.offset = in_offs;
 		sg_in.length = sz;
 
-		sg_out.page = virt_to_page(out);
-		sg_out.offset = offset_in_page(out);
+		sg_out.page = out_page;
+		sg_out.offset = out_offs;
 		sg_out.length = sz;
 
 		encdecfunc(tfm, &sg_out, &sg_in, sz, (u8 *)iv);
 
 		IV++;
 		size -= sz;
-		in += sz;
-		out += sz;
+		in_offs += sz;
+		out_offs += sz;
 	}
 
 	return 0;
 }
 
 static int
-cryptoloop_transfer(struct loop_device *lo, int cmd, char *raw_buf,
-		     char *loop_buf, int size, sector_t IV)
+cryptoloop_transfer(struct loop_device *lo, int cmd,
+		    struct page *raw_page, unsigned raw_off,
+		    struct page *loop_page, unsigned loop_off,
+		    int size, sector_t IV)
 {
 	struct crypto_tfm *tfm = (struct crypto_tfm *) lo->key_data;
 	if(tfm->crt_cipher.cit_mode == CRYPTO_TFM_MODE_ECB)
 	{
 		lo->transfer = cryptoloop_transfer_ecb;
-		return cryptoloop_transfer_ecb(lo, cmd, raw_buf, loop_buf, size, IV);
+		return cryptoloop_transfer_ecb(lo, cmd, raw_page, raw_off,
+					       loop_page, loop_off, size, IV);
 	}	
 	if(tfm->crt_cipher.cit_mode == CRYPTO_TFM_MODE_CBC)
 	{	
 		lo->transfer = cryptoloop_transfer_cbc;
-		return cryptoloop_transfer_cbc(lo, cmd, raw_buf, loop_buf, size, IV);
+		return cryptoloop_transfer_cbc(lo, cmd, raw_page, raw_off,
+					       loop_page, loop_off, size, IV);
 	}
 	
 	/*  This is not supposed to happen */

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] loop.c patches, take two
  2003-12-21 19:55             ` [PATCH] loop.c patches, take two Ben Slusky
  2003-12-21 20:07               ` Ben Slusky
@ 2003-12-21 20:49               ` Mika Penttilä
  2003-12-21 21:12                 ` Ben Slusky
  2003-12-21 22:01                 ` Christophe Saout
  2003-12-22  6:33               ` Andrew Morton
  2003-12-22 23:32               ` Ben Slusky
  3 siblings, 2 replies; 12+ messages in thread
From: Mika Penttilä @ 2003-12-21 20:49 UTC (permalink / raw)
  To: Ben Slusky; +Cc: linux-kernel, Andrew Morton, jariruusu

Yet another Big Loop Patch... :)

It's not obvious which parts are bug fixes, and which performance 
improvements. What exactly breaks loops on journalling filesystems, and 
how do you solve it?

What's the clone_bio() business? Why on reads only?

--Mika


Ben Slusky wrote:

>Hi everybody,
>
>Well, it appears that neither my loop.c patches nor Andrew's were merged
>into 2.6.0... I'd request that my patches be merged into mainline,
>since Jari Ruusu has pointed out that Andrew's patch (which removes the
>separate code path for block-backed loop devices) will break journaling
>filesystems on loop devices. Right now, journaling FS's on file-backed
>loop devices are kinda iffy (they will work only if the underlying FS is
>also journaled, with the correct journal options) but journaling FS's
>on block-backed loop devices work perfectly. Andrew's patches would
>break this.
>
>This first patch improves the memory allocation technique used by
>block-backed loop devices. Specifically the loop device will make
>efficient use of whatever pages it can get, rather than throwing them all
>back in case it didn't get as many as it wanted -- this fixes Bugzilla
>bug #1198.
>
>Taken together, this patch and the following patch make loop devices
>safe for use as swap. Please apply.
>
>-
>-Ben
>
>
>  
>
>------------------------------------------------------------------------
>
>diff -pu linux-2.6.0/drivers/block/loop.c-orig linux-2.6.0/drivers/block/loop.c
>--- linux-2.6.0/drivers/block/loop.c-orig	2003-12-20 21:46:53.430260696 -0500
>+++ linux-2.6.0/drivers/block/loop.c	2003-12-20 21:47:11.743476664 -0500
>@@ -247,12 +247,10 @@ fail:
> static int
> lo_send(struct loop_device *lo, struct bio *bio, int bsize, loff_t pos)
> {
>-	unsigned vecnr;
>-	int ret = 0;
>-
>-	for (vecnr = 0; vecnr < bio->bi_vcnt; vecnr++) {
>-		struct bio_vec *bvec = &bio->bi_io_vec[vecnr];
>+	struct bio_vec *bvec;
>+	int i, ret = 0;
> 
>+	bio_for_each_segment(bvec, bio, i) {
> 		ret = do_lo_send(lo, bvec, bsize, pos);
> 		if (ret < 0)
> 			break;
>@@ -318,12 +316,10 @@ do_lo_receive(struct loop_device *lo,
> static int
> lo_receive(struct loop_device *lo, struct bio *bio, int bsize, loff_t pos)
> {
>-	unsigned vecnr;
>-	int ret = 0;
>-
>-	for (vecnr = 0; vecnr < bio->bi_vcnt; vecnr++) {
>-		struct bio_vec *bvec = &bio->bi_io_vec[vecnr];
>+	struct bio_vec *bvec;
>+	int i, ret = 0;
> 
>+	bio_for_each_segment(bvec, bio, i) {
> 		ret = do_lo_receive(lo, bvec, bsize, pos);
> 		if (ret < 0)
> 			break;
>@@ -353,10 +349,12 @@ static void loop_put_buffer(struct bio *
> 	 * check bi_end_io, may just be a remapped bio
> 	 */
> 	if (bio && bio->bi_end_io == loop_end_io_transfer) {
>+		struct bio_vec *bv;
> 		int i;
> 
>-		for (i = 0; i < bio->bi_vcnt; i++)
>-			__free_page(bio->bi_io_vec[i].bv_page);
>+		if (!bio_flagged(bio, BIO_CLONED))
>+			bio_for_each_segment(bv, bio, i)
>+				__free_page(bv->bv_page);
> 
> 		bio_put(bio);
> 	}
>@@ -413,7 +411,7 @@ static int loop_end_io_transfer(struct b
> 	if (bio->bi_size)
> 		return 1;
> 
>-	if (err || bio_rw(bio) == WRITE) {
>+	if (err || (bio_rw(bio) == WRITE && bio->bi_vcnt == rbh->bi_vcnt)) {
> 		bio_endio(rbh, rbh->bi_size, err);
> 		if (atomic_dec_and_test(&lo->lo_pending))
> 			up(&lo->lo_bh_mutex);
>@@ -430,35 +428,41 @@ static struct bio *loop_
>

>_bio(struct 
> 	struct bio_vec *bv;
> 	int i;
> 
>-	bio = bio_alloc(__GFP_NOWARN, rbh->bi_vcnt);
>+	if (bio_rw(rbh) != WRITE) {
>+		bio = bio_clone(rbh, GFP_NOIO);
>+		return bio;
>+	}
>+
>+	bio = bio_alloc(GFP_NOIO, rbh->bi_vcnt);
> 	if (!bio)
> 		return NULL;
> 
> 	/*
> 	 * iterate iovec list and alloc pages
> 	 */
>-	__bio_for_each_segment(bv, rbh, i, 0) {
>+	bio_for_each_segment(bv, rbh, i) {
> 		struct bio_vec *bbv = &bio->bi_io_vec[i];
> 
>-		bbv->bv_page = alloc_page(__GFP_NOWARN|__GFP_HIGHMEM);
>+		/* We need one page; the rest we can live without */
>+		bbv->bv_page = alloc_page((bio->bi_vcnt ? __GFP_NOWARN : GFP_NOIO) | __GFP_HIGHMEM);
> 		if (bbv->bv_page == NULL)
>-			goto oom;
>+			break;
> 
>-		bbv->bv_len = bv->bv_len;
> 		bbv->bv_offset = bv->bv_offset;
>+		bio->bi_size += (bbv->bv_len = bv->bv_len);
>+		bio->bi_vcnt++;
> 	}
> 
>-	bio->bi_vcnt = rbh->bi_vcnt;
>-	bio->bi_size = rbh->bi_size;
>-
>-	return bio;
>+	/* Can't get anything done if we didn't get any pages */
>+	if (unlikely(!bio->bi_vcnt)) {
>+		bio_put(bio);
>+		return NULL;
>+	}
> 
>-oom:
>-	while (--i >= 0)
>-		__free_page(bio->bi_io_vec[i].bv_page);
>+	bio->bi_vcnt += (bio->bi_idx = rbh->bi_idx);
>+	bio->bi_rw = rbh->bi_rw;
> 
>-	bio_put(bio);
>-	return NULL;
>+	return bio;
> }
> 
> static struct bio *loop_get_buffer(struct loop_device *lo, struct bio *rbh)
>@@ -479,19 +483,85 @@ static struct bio *loop_get_buffer(struc
> 		if (flags & PF_MEMALLOC)
> 			current->flags |= PF_MEMALLOC;
> 
>-		if (bio == NULL)
>+		if (unlikely(bio == NULL)) {
>+			printk("loop: alloc failed\n");
> 			blk_congestion_wait(WRITE, HZ/10);
>+		}
> 	} while (bio == NULL);
> 
> 	bio->bi_end_io = loop_end_io_transfer;
> 	bio->bi_private = rbh;
> 	bio->bi_sector = rbh->bi_sector + (lo->lo_offset >> 9);
>-	bio->bi_rw = rbh->bi_rw;
> 	bio->bi_bdev = lo->lo_device;
> 
> 	return bio;
> }
> 
>+static void loop_recycle_buffer(struct loop_device *lo, struct bio *bio)
>+{
>+	struct bio *rbh = bio->bi_private;
>+	struct bio_vec *bv, *bbv, *rbv;
>+	int i, flags, nvecs = bio->bi_vcnt - bio->bi_idx;
>+
>+	/*
>+	 * Comments in ll_rw_blk.c reserve for generic_make_request the right to
>+	 * "change bi_dev and bi_sector for remaps as it sees fit." Doh!
>+	 * Workaround: reset the bi_bdev and recompute the starting sector for
>+	 * the next write.
>+	 */
>+	bio->bi_bdev = lo->lo_device;
>+	bio->bi_sector = rbh->bi_sector + (lo->lo_offset >> 9);
>+	/* Clean up the flags too */
>+	bio->bi_flags &= (~(BIO_POOL_MASK - 1) | (1 << BIO_UPTODATE));
>+
>+	/*
>+	 * Move the allocated pages into position to transfer more data.
>+	 */
>+	__bio_for_each_segment(bv, bio, i, rbh->bi_idx) {
>+		rbv = &rbh->bi_io_vec[i];
>+		bbv = bv + nvecs;
>+
>+		/* Workaround -- see note above */
>+		bio->bi_sector += rbv->bv_len >> 9;
>+		if (i < bio->bi_idx)
>+			continue;
>+
>+		if (i + nvecs < rbh->bi_vcnt) {
>+			bbv->bv_page = bv->bv_page;
>+			bbv->bv_offset = rbv->bv_offset;
>+			bio->bi_size += (bbv->bv_len = rbv->bv_len);
>+		} else
>+			__free_page(bv->bv_page);
>+		memset(bv, 0, sizeof(*bv));
>+	}
>+
>+	bio->bi_idx = bio->bi_vcnt;
>+	bio->bi_vcnt += nvecs;
>+	bio->bi_vcnt = min(bio->bi_vcnt, rbh->bi_vcnt);
>+
>+	/*
>+	 * If we need more pages, try to get some.
>+	 * Clear PF_MEMALLOC to avoid consuming all available memory.
>+	 */
>+	flags = current->flags;
>+	current->flags &= ~PF_MEMALLOC;
>+
>+	__bio_for_each_segment(rbv, rbh, i, bio->bi_vcnt) {
>+		bv = &bio->bi_io_vec[i];
>+
>+		bv->bv_page = alloc_page(__GFP_NOWARN|__GFP_HIGHMEM);
>+		if (bv->bv_page == NULL)
>+			break;
>+
>+		bv->bv_offset = rbv->bv_offset;
>+		bio->bi_size += (bv->bv_len = rbv->bv_len);
>+		bio->bi_vcnt++;
>+	}
>+
>+	if (flags & PF_MEMALLOC)
>+		current->flags |= PF_MEMALLOC;
>+}
>+
> static int loop_transfer_bio(struct loop_device *lo,
> 			     struct bio *to_bio, struct bio *from_bio)
> {
>@@ -502,17 +572,19 @@ static int loop_transfer_bio(struct loop
> 
> 	IV = from_bio->bi_sector + (lo->lo_offset >> 9);
> 
>-	__bio_for_each_segment(from_bvec, from_bio, i, 0) {
>-		to_bvec = &to_bio->bi_io_vec[i];
>+	__bio_for_each_segment(to_bvec, to_bio, i, from_bio->bi_idx) {
>+		from_bvec = &from_bio->bi_io_vec[i];
> 
>-		kmap(from_bvec->bv_page);
>-		kmap(to_bvec->bv_page);
>-		vfrom = page_address(from_bvec->bv_page) + from_bvec->bv_offset;
>-		vto = page_address(to_bvec->bv_page) + to_bvec->bv_offset;
>-		ret |= lo_do_transfer(lo, bio_data_dir(to_bio), vto, vfrom,
>-					from_bvec->bv_len, IV);
>-		kunmap(from_bvec->bv_page);
>-		kunmap(to_bvec->bv_page);
>+		if (i >= to_bio->bi_idx) {
>+			kmap(from_bvec->bv_page);
>+			kmap(to_bvec->bv_page);
>+			vfrom = page_address(from_bvec->bv_page) + from_bvec->bv_offset;
>+			vto = page_address(to_bvec->bv_page) + to_bvec->bv_offset;
>+			ret |= lo_do_transfer(lo, bio_data_dir(to_bio), vto, vfrom,
>+						from_bvec->bv_len, IV);
>+			kunmap(from_bvec->bv_page);
>+			kunmap(to_bvec->bv_page);
>+		}
> 		IV += from_bvec->bv_len >> 9;
> 	}
> 
>@@ -579,16 +651,30 @@ inactive:
> static inline void loop_handle_bio(struct loop_device *lo, struct bio *bio)
> {
> 	int ret;
>+	struct bio *rbh;
> 
>-	/*
>-	 * For block backed loop, we know this is a READ
>-	 */
> 	if (lo->lo_flags & LO_FLAGS_DO_BMAP) {
> 		ret = do_bio_filebacked(lo, bio);
> 		bio_endio(bio, bio->bi_size, ret);
>-	} else {
>-		struct bio *rbh = bio->bi_private;
>+	} else if (bio_rw(bio) == WRITE) {
>+		/*
>+		 * Write complete, but more pages remain;
>+		 * encrypt and write some more pages
>+		 */
>+		loop_recycle_buffer(lo, bio);
> 
>+		rbh = bio->bi_private;
>+		if ((ret = loop_transfer_bio(lo, bio, rbh))) {
>+			bio_endio(bio, bio->bi_size, ret);
>+			return;
>+		}
>+
>+		generic_make_request(bio);
>+	} else {
>+		/*
>+		 * Read complete; do decryption now
>+		 */
>+		rbh = bio->bi_private;
> 		ret = loop_transfer_bio(lo, bio, rbh);
> 
> 		bio_endio(rbh, rbh->bi_size, ret);
>@@ -606,6 +692,7 @@ static int loop_thread(void *data)
> {
> 	struct loop_device *lo = data;
> 	struct bio *bio;
>+	int x;
> 
> 	daemonize("loop%d", lo->lo_number);
> 
>@@ -640,7 +727,11 @@ static int loop_thread(void *data)
> 			printk("loop: missing bio\n");
> 			continue;
> 		}
>+
>+		x = (lo->lo_flags & LO_FLAGS_DO_BMAP) || bio_rw(bio) != WRITE;
> 		loop_handle_bio(lo, bio);
>+		if (!x)
>+			continue;
> 
> 		/*
> 		 * upped both for pending work and tear-down, lo_pending
>  
>


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] loop.c patches, take two
  2003-12-21 20:49               ` Mika Penttilä
@ 2003-12-21 21:12                 ` Ben Slusky
  2003-12-21 23:00                   ` Mika Penttilä
  2003-12-21 22:01                 ` Christophe Saout
  1 sibling, 1 reply; 12+ messages in thread
From: Ben Slusky @ 2003-12-21 21:12 UTC (permalink / raw)
  To: Mika Penttil?; +Cc: linux-kernel, Andrew Morton, jariruusu

On Sun, 21 Dec 2003 22:49:47 +0200, Mika Penttil? wrote:
> Yet another Big Loop Patch... :)
> 
> It's not obvious which parts are bug fixes, and which performance 
> improvements. What exactly breaks loops on journalling filesystems, and 
> how do you solve it?

See <URL:http://www.ussg.iu.edu/hypermail/linux/kernel/0310.3/1151.html>...
I'd submitted these patches a while back. Andrew observed that the
problems they fixed did not manifest in file-backed loops, so his
solution (which was merged into -mm but not mainstream) was to cut out
the block-backed code path entirely. THAT is what breaks journaling
filesystems on loops (note: not vice versa).

> What's the clone_bio() business? Why on reads only?

There's no need to allocate memory for a second copy of the data on
a read. This is a performance improvenment but I'd say it's closely
related to the main point of the patch (i.e. take what pages you can get
and recycle them); I'm making the block-backed code path significantly
more complex and at the same time having reads take a shortcut. But I
could split that into a separate patch if desired.

-- 
Ben Slusky                      | Yakka foob mog. Grug pubbawup
sluskyb@paranoiacs.org          | zink wattoom gazork. Chumble
sluskyb@stwing.org              | spuzz.
PGP keyID ADA44B3B              |               -Calvin

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] loop.c patches, take two
  2003-12-21 20:49               ` Mika Penttilä
  2003-12-21 21:12                 ` Ben Slusky
@ 2003-12-21 22:01                 ` Christophe Saout
  1 sibling, 0 replies; 12+ messages in thread
From: Christophe Saout @ 2003-12-21 22:01 UTC (permalink / raw)
  To: Mika Penttilä; +Cc: Ben Slusky, linux-kernel, Andrew Morton, jariruusu

Am So, den 21.12.2003 schrieb Mika Penttilä um 21:49:

> Yet another Big Loop Patch... :)
> 
> It's not obvious which parts are bug fixes, and which performance 
> improvements. What exactly breaks loops on journalling filesystems, and 
> how do you solve it?

What about dropping block device backed support for the loop driver
altogether? We now have a nice device mapper in the 2.6 kernel. I don't
like the schizophrenic way the loop driver handles these things (and
you'll always run into similar trouble when trying to fix or resolve
this issue). I know this is kind of radical... but everyone loves
radical ideas. ;)

There also a new device mapper target dm-crypt I've written some time
ago, which does basically the same cryptoloop is doing (or tries to do,
whatever ;)) and which seems to work quite well.

I've never had any feedback from the kernel maintainers about including
it into the main kernel. Andrew?

--
Christophe Saout <christophe@saout.de>
Please avoid sending me Word or PowerPoint attachments.
See http://www.fsf.org/philosophy/no-word-attachments.html


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] loop.c patches, take two
  2003-12-21 21:12                 ` Ben Slusky
@ 2003-12-21 23:00                   ` Mika Penttilä
  2003-12-21 23:05                     ` Ben Slusky
  0 siblings, 1 reply; 12+ messages in thread
From: Mika Penttilä @ 2003-12-21 23:00 UTC (permalink / raw)
  To: Ben Slusky; +Cc: linux-kernel, Andrew Morton, jariruusu

 static inline void loop_handle_bio(struct loop_device *lo, struct bio *bio)
 {
 	int ret;
+	struct bio *rbh;
 
-	/*
-	 * For block backed loop, we know this is a READ
-	 */
 	if (lo->lo_flags & LO_FLAGS_DO_BMAP) {
 		ret = do_bio_filebacked(lo, bio);
 		bio_endio(bio, bio->bi_size, ret);
-	} else {
-		struct bio *rbh = bio->bi_private;
+	} else if (bio_rw(bio) == WRITE) {
+		/*


AFAICS, this code path is never taken. You don't queue block device writes for the loop thread.


+		 * Write complete, but more pages remain;
+		 * encrypt and write some more pages
+		 */
+		loop_recycle_buffer(lo, bio);



--Mika




^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] loop.c patches, take two
  2003-12-21 23:00                   ` Mika Penttilä
@ 2003-12-21 23:05                     ` Ben Slusky
  2003-12-21 23:51                       ` Mika Penttilä
  0 siblings, 1 reply; 12+ messages in thread
From: Ben Slusky @ 2003-12-21 23:05 UTC (permalink / raw)
  To: Mika Penttil?; +Cc: linux-kernel, Andrew Morton, jariruusu

On Mon, 22 Dec 2003 01:00:39 +0200, Mika Penttil? wrote:
> AFAICS, this code path is never taken. You don't queue block device writes 
> for the loop thread.

Yes I do, in loop_end_io_transfer. If we allocated fewer pages for the copy
bio than contained in the original bio, then those pages are recycled for
the next write.

@@ -413,7 +411,7 @@ static int loop_end_io_transfer(struct b
 	if (bio->bi_size)
 		return 1;
 
-	if (err || bio_rw(bio) == WRITE) {
+	if (err || (bio_rw(bio) == WRITE && bio->bi_vcnt == rbh->bi_vcnt)) {
 		bio_endio(rbh, rbh->bi_size, err);
 		if (atomic_dec_and_test(&lo->lo_pending))
 			up(&lo->lo_bh_mutex);

-- 
Ben Slusky                | "Looks like yet another megatrend
sluskyb@paranoiacs.org    |  has come and gone without affecting
sluskyb@stwing.org        |  me in any way whatsoever."
PGP keyID ADA44B3B        |                     www.theonion.com

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] loop.c patches, take two
  2003-12-21 23:05                     ` Ben Slusky
@ 2003-12-21 23:51                       ` Mika Penttilä
  0 siblings, 0 replies; 12+ messages in thread
From: Mika Penttilä @ 2003-12-21 23:51 UTC (permalink / raw)
  To: Ben Slusky; +Cc: linux-kernel, Andrew Morton, jariruusu



Ben Slusky wrote:

>On Mon, 22 Dec 2003 01:00:39 +0200, Mika Penttil? wrote:
>  
>
>>AFAICS, this code path is never taken. You don't queue block device writes 
>>for the loop thread.
>>    
>>
>
>Yes I do, in loop_end_io_transfer. If we allocated fewer pages for the copy
>bio than contained in the original bio, then those pages are recycled for
>the next write.
>
>@@ -413,7 +411,7 @@ static int loop_end_io_transfer(struct b
> 	if (bio->bi_size)
> 		return 1;
> 
>-	if (err || bio_rw(bio) == WRITE) {
>+	if (err || (bio_rw(bio) == WRITE && bio->bi_vcnt == rbh->bi_vcnt)) {
> 		bio_endio(rbh, rbh->bi_size, err);
> 		if (atomic_dec_and_test(&lo->lo_pending))
> 			up(&lo->lo_bh_mutex);
>  
>
I see, subtle...

--Mika



^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] loop.c patches, take two
       [not found]                 ` <15kfk-vj-1@gated-at.bofh.it>
@ 2003-12-22  1:21                   ` Andi Kleen
  2003-12-22  1:30                     ` Christophe Saout
  0 siblings, 1 reply; 12+ messages in thread
From: Andi Kleen @ 2003-12-22  1:21 UTC (permalink / raw)
  To: Christophe Saout; +Cc: linux-kernel

Christophe Saout <christophe@saout.de> writes:

> Am So, den 21.12.2003 schrieb Mika Penttilä um 21:49:
>
>> Yet another Big Loop Patch... :)
>> 
>> It's not obvious which parts are bug fixes, and which performance 
>> improvements. What exactly breaks loops on journalling filesystems, and 
>> how do you solve it?
>
> What about dropping block device backed support for the loop driver
> altogether? We now have a nice device mapper in the 2.6 kernel. I don't

Device Mapper doesn't support cryptographic transformations.

-Andi

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] loop.c patches, take two
  2003-12-22  1:21                   ` [PATCH] loop.c patches, take two Andi Kleen
@ 2003-12-22  1:30                     ` Christophe Saout
  0 siblings, 0 replies; 12+ messages in thread
From: Christophe Saout @ 2003-12-22  1:30 UTC (permalink / raw)
  To: Andi Kleen; +Cc: linux-kernel

Am Mo, den 22.12.2003 schrieb Andi Kleen um 02:21:

> > What about dropping block device backed support for the loop driver
> > altogether? We now have a nice device mapper in the 2.6 kernel. I don't
> 
> Device Mapper doesn't support cryptographic transformations.

I know. Not yet.

As I've written some lines below, there is a patch:
http://www.saout.de/misc/dm-crypt.diff that I've written some time ago.

I posted it some time ago but never got feedback from any "VIP" (except
for Joe Thornber who helped developing the target and thinks it looks
good). Some people tested it though and it worked for them (and better
than cryptoloop around test4).

--
Christophe Saout <christophe@saout.de>
Please avoid sending me Word or PowerPoint attachments.
See http://www.fsf.org/philosophy/no-word-attachments.html


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] loop.c patches, take two
  2003-12-21 19:55             ` [PATCH] loop.c patches, take two Ben Slusky
  2003-12-21 20:07               ` Ben Slusky
  2003-12-21 20:49               ` Mika Penttilä
@ 2003-12-22  6:33               ` Andrew Morton
  2003-12-22 23:32               ` Ben Slusky
  3 siblings, 0 replies; 12+ messages in thread
From: Andrew Morton @ 2003-12-22  6:33 UTC (permalink / raw)
  To: Ben Slusky; +Cc: linux-kernel, jariruusu

Ben Slusky <sluskyb@paranoiacs.org> wrote:
>
> Well, it appears that neither my loop.c patches nor Andrew's were merged
>  into 2.6.0... I'd request that my patches be merged into mainline,
>  since Jari Ruusu has pointed out that Andrew's patch (which removes the
>  separate code path for block-backed loop devices) will break journaling
>  filesystems on loop devices. Right now, journaling FS's on file-backed
>  loop devices are kinda iffy (they will work only if the underlying FS is
>  also journaled, with the correct journal options) but journaling FS's
>  on block-backed loop devices work perfectly. Andrew's patches would
>  break this.

I'm not sure how important this is?

Remember that one of the reasons for dropping the block-backed special case
was that it ran like crap under heavy load.  It locked up, iirc.  Has that
been fixed here?


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] loop.c patches, take two
  2003-12-21 19:55             ` [PATCH] loop.c patches, take two Ben Slusky
                                 ` (2 preceding siblings ...)
  2003-12-22  6:33               ` Andrew Morton
@ 2003-12-22 23:32               ` Ben Slusky
  3 siblings, 0 replies; 12+ messages in thread
From: Ben Slusky @ 2003-12-22 23:32 UTC (permalink / raw)
  To: linux-kernel

On Mon, Dec 22 2003 01:36:29 -0500, Andrew Morton <akpm@osdl.org> wrote:
> I'm not sure how important this is?
> 
> Remember that one of the reasons for dropping the block-backed special case
> was that it ran like crap under heavy load. It locked up, iirc. Has that
> been fixed here?

Yes, the old block-backed code was prone to deadlock when trying to
allocate memory under heavy I/O. This patch fixes it.

As to how important this is, it fixes this bug and makes loop devices
usable for swap, i.e. enables encrypted swap. Everybody likes bugfixes
and encrypted swap, right?

-
-Ben


-- 
Ben Slusky                      | The human race never solves
sluskyb@paranoiacs.org          | any of its problems. It merely
sluskyb@stwing.org              | outlives them.
PGP keyID ADA44B3B              |               -David Gerrold

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2003-12-22 23:32 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <MllE.6qa.7@gated-at.bofh.it>
     [not found] ` <MpyW.3Ub.9@gated-at.bofh.it>
     [not found]   ` <MsGq.8cN.1@gated-at.bofh.it>
     [not found]     ` <MvO6.47g.7@gated-at.bofh.it>
     [not found]       ` <MEf3.8oB.13@gated-at.bofh.it>
     [not found]         ` <MROA.319.5@gated-at.bofh.it>
     [not found]           ` <NxkP.4kY.17@gated-at.bofh.it>
     [not found]             ` <15hUp-58e-21@gated-at.bofh.it>
     [not found]               ` <15iGH-6hd-17@gated-at.bofh.it>
     [not found]                 ` <15kfk-vj-1@gated-at.bofh.it>
2003-12-22  1:21                   ` [PATCH] loop.c patches, take two Andi Kleen
2003-12-22  1:30                     ` Christophe Saout
2003-10-30 13:41 [PATCH] remove useless highmem bounce from loop/cryptoloop Ben Slusky
2003-10-30 18:14 ` Jari Ruusu
2003-10-30 21:30   ` Andrew Morton
2003-10-31  0:52     ` Ben Slusky
2003-10-31  9:55       ` Andrew Morton
2003-11-01  0:26         ` Ben Slusky
2003-11-02 20:46           ` Ben Slusky
2003-12-21 19:55             ` [PATCH] loop.c patches, take two Ben Slusky
2003-12-21 20:07               ` Ben Slusky
2003-12-21 20:49               ` Mika Penttilä
2003-12-21 21:12                 ` Ben Slusky
2003-12-21 23:00                   ` Mika Penttilä
2003-12-21 23:05                     ` Ben Slusky
2003-12-21 23:51                       ` Mika Penttilä
2003-12-21 22:01                 ` Christophe Saout
2003-12-22  6:33               ` Andrew Morton
2003-12-22 23:32               ` Ben Slusky

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox