All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Mika Penttilä" <mika.penttila@kolumbus.fi>
To: Ben Slusky <sluskyb@paranoiacs.org>
Cc: linux-kernel@vger.kernel.org, Andrew Morton <akpm@osdl.org>,
	jariruusu@users.sourceforge.net
Subject: Re: [PATCH] loop.c patches, take two
Date: Sun, 21 Dec 2003 22:49:47 +0200	[thread overview]
Message-ID: <3FE6076B.3090908@kolumbus.fi> (raw)
In-Reply-To: <20031221195534.GA4721@fukurou.paranoiacs.org>

Yet another Big Loop Patch... :)

It's not obvious which parts are bug fixes, and which performance 
improvements. What exactly breaks loops on journalling filesystems, and 
how do you solve it?

What's the clone_bio() business? Why on reads only?

--Mika


Ben Slusky wrote:

>Hi everybody,
>
>Well, it appears that neither my loop.c patches nor Andrew's were merged
>into 2.6.0... I'd request that my patches be merged into mainline,
>since Jari Ruusu has pointed out that Andrew's patch (which removes the
>separate code path for block-backed loop devices) will break journaling
>filesystems on loop devices. Right now, journaling FS's on file-backed
>loop devices are kinda iffy (they will work only if the underlying FS is
>also journaled, with the correct journal options) but journaling FS's
>on block-backed loop devices work perfectly. Andrew's patches would
>break this.
>
>This first patch improves the memory allocation technique used by
>block-backed loop devices. Specifically the loop device will make
>efficient use of whatever pages it can get, rather than throwing them all
>back in case it didn't get as many as it wanted -- this fixes Bugzilla
>bug #1198.
>
>Taken together, this patch and the following patch make loop devices
>safe for use as swap. Please apply.
>
>-
>-Ben
>
>
>  
>
>------------------------------------------------------------------------
>
>diff -pu linux-2.6.0/drivers/block/loop.c-orig linux-2.6.0/drivers/block/loop.c
>--- linux-2.6.0/drivers/block/loop.c-orig	2003-12-20 21:46:53.430260696 -0500
>+++ linux-2.6.0/drivers/block/loop.c	2003-12-20 21:47:11.743476664 -0500
>@@ -247,12 +247,10 @@ fail:
> static int
> lo_send(struct loop_device *lo, struct bio *bio, int bsize, loff_t pos)
> {
>-	unsigned vecnr;
>-	int ret = 0;
>-
>-	for (vecnr = 0; vecnr < bio->bi_vcnt; vecnr++) {
>-		struct bio_vec *bvec = &bio->bi_io_vec[vecnr];
>+	struct bio_vec *bvec;
>+	int i, ret = 0;
> 
>+	bio_for_each_segment(bvec, bio, i) {
> 		ret = do_lo_send(lo, bvec, bsize, pos);
> 		if (ret < 0)
> 			break;
>@@ -318,12 +316,10 @@ do_lo_receive(struct loop_device *lo,
> static int
> lo_receive(struct loop_device *lo, struct bio *bio, int bsize, loff_t pos)
> {
>-	unsigned vecnr;
>-	int ret = 0;
>-
>-	for (vecnr = 0; vecnr < bio->bi_vcnt; vecnr++) {
>-		struct bio_vec *bvec = &bio->bi_io_vec[vecnr];
>+	struct bio_vec *bvec;
>+	int i, ret = 0;
> 
>+	bio_for_each_segment(bvec, bio, i) {
> 		ret = do_lo_receive(lo, bvec, bsize, pos);
> 		if (ret < 0)
> 			break;
>@@ -353,10 +349,12 @@ static void loop_put_buffer(struct bio *
> 	 * check bi_end_io, may just be a remapped bio
> 	 */
> 	if (bio && bio->bi_end_io == loop_end_io_transfer) {
>+		struct bio_vec *bv;
> 		int i;
> 
>-		for (i = 0; i < bio->bi_vcnt; i++)
>-			__free_page(bio->bi_io_vec[i].bv_page);
>+		if (!bio_flagged(bio, BIO_CLONED))
>+			bio_for_each_segment(bv, bio, i)
>+				__free_page(bv->bv_page);
> 
> 		bio_put(bio);
> 	}
>@@ -413,7 +411,7 @@ static int loop_end_io_transfer(struct b
> 	if (bio->bi_size)
> 		return 1;
> 
>-	if (err || bio_rw(bio) == WRITE) {
>+	if (err || (bio_rw(bio) == WRITE && bio->bi_vcnt == rbh->bi_vcnt)) {
> 		bio_endio(rbh, rbh->bi_size, err);
> 		if (atomic_dec_and_test(&lo->lo_pending))
> 			up(&lo->lo_bh_mutex);
>@@ -430,35 +428,41 @@ static struct bio *loop_
>

>_bio(struct 
> 	struct bio_vec *bv;
> 	int i;
> 
>-	bio = bio_alloc(__GFP_NOWARN, rbh->bi_vcnt);
>+	if (bio_rw(rbh) != WRITE) {
>+		bio = bio_clone(rbh, GFP_NOIO);
>+		return bio;
>+	}
>+
>+	bio = bio_alloc(GFP_NOIO, rbh->bi_vcnt);
> 	if (!bio)
> 		return NULL;
> 
> 	/*
> 	 * iterate iovec list and alloc pages
> 	 */
>-	__bio_for_each_segment(bv, rbh, i, 0) {
>+	bio_for_each_segment(bv, rbh, i) {
> 		struct bio_vec *bbv = &bio->bi_io_vec[i];
> 
>-		bbv->bv_page = alloc_page(__GFP_NOWARN|__GFP_HIGHMEM);
>+		/* We need one page; the rest we can live without */
>+		bbv->bv_page = alloc_page((bio->bi_vcnt ? __GFP_NOWARN : GFP_NOIO) | __GFP_HIGHMEM);
> 		if (bbv->bv_page == NULL)
>-			goto oom;
>+			break;
> 
>-		bbv->bv_len = bv->bv_len;
> 		bbv->bv_offset = bv->bv_offset;
>+		bio->bi_size += (bbv->bv_len = bv->bv_len);
>+		bio->bi_vcnt++;
> 	}
> 
>-	bio->bi_vcnt = rbh->bi_vcnt;
>-	bio->bi_size = rbh->bi_size;
>-
>-	return bio;
>+	/* Can't get anything done if we didn't get any pages */
>+	if (unlikely(!bio->bi_vcnt)) {
>+		bio_put(bio);
>+		return NULL;
>+	}
> 
>-oom:
>-	while (--i >= 0)
>-		__free_page(bio->bi_io_vec[i].bv_page);
>+	bio->bi_vcnt += (bio->bi_idx = rbh->bi_idx);
>+	bio->bi_rw = rbh->bi_rw;
> 
>-	bio_put(bio);
>-	return NULL;
>+	return bio;
> }
> 
> static struct bio *loop_get_buffer(struct loop_device *lo, struct bio *rbh)
>@@ -479,19 +483,85 @@ static struct bio *loop_get_buffer(struc
> 		if (flags & PF_MEMALLOC)
> 			current->flags |= PF_MEMALLOC;
> 
>-		if (bio == NULL)
>+		if (unlikely(bio == NULL)) {
>+			printk("loop: alloc failed\n");
> 			blk_congestion_wait(WRITE, HZ/10);
>+		}
> 	} while (bio == NULL);
> 
> 	bio->bi_end_io = loop_end_io_transfer;
> 	bio->bi_private = rbh;
> 	bio->bi_sector = rbh->bi_sector + (lo->lo_offset >> 9);
>-	bio->bi_rw = rbh->bi_rw;
> 	bio->bi_bdev = lo->lo_device;
> 
> 	return bio;
> }
> 
>+static void loop_recycle_buffer(struct loop_device *lo, struct bio *bio)
>+{
>+	struct bio *rbh = bio->bi_private;
>+	struct bio_vec *bv, *bbv, *rbv;
>+	int i, flags, nvecs = bio->bi_vcnt - bio->bi_idx;
>+
>+	/*
>+	 * Comments in ll_rw_blk.c reserve for generic_make_request the right to
>+	 * "change bi_dev and bi_sector for remaps as it sees fit." Doh!
>+	 * Workaround: reset the bi_bdev and recompute the starting sector for
>+	 * the next write.
>+	 */
>+	bio->bi_bdev = lo->lo_device;
>+	bio->bi_sector = rbh->bi_sector + (lo->lo_offset >> 9);
>+	/* Clean up the flags too */
>+	bio->bi_flags &= (~(BIO_POOL_MASK - 1) | (1 << BIO_UPTODATE));
>+
>+	/*
>+	 * Move the allocated pages into position to transfer more data.
>+	 */
>+	__bio_for_each_segment(bv, bio, i, rbh->bi_idx) {
>+		rbv = &rbh->bi_io_vec[i];
>+		bbv = bv + nvecs;
>+
>+		/* Workaround -- see note above */
>+		bio->bi_sector += rbv->bv_len >> 9;
>+		if (i < bio->bi_idx)
>+			continue;
>+
>+		if (i + nvecs < rbh->bi_vcnt) {
>+			bbv->bv_page = bv->bv_page;
>+			bbv->bv_offset = rbv->bv_offset;
>+			bio->bi_size += (bbv->bv_len = rbv->bv_len);
>+		} else
>+			__free_page(bv->bv_page);
>+		memset(bv, 0, sizeof(*bv));
>+	}
>+
>+	bio->bi_idx = bio->bi_vcnt;
>+	bio->bi_vcnt += nvecs;
>+	bio->bi_vcnt = min(bio->bi_vcnt, rbh->bi_vcnt);
>+
>+	/*
>+	 * If we need more pages, try to get some.
>+	 * Clear PF_MEMALLOC to avoid consuming all available memory.
>+	 */
>+	flags = current->flags;
>+	current->flags &= ~PF_MEMALLOC;
>+
>+	__bio_for_each_segment(rbv, rbh, i, bio->bi_vcnt) {
>+		bv = &bio->bi_io_vec[i];
>+
>+		bv->bv_page = alloc_page(__GFP_NOWARN|__GFP_HIGHMEM);
>+		if (bv->bv_page == NULL)
>+			break;
>+
>+		bv->bv_offset = rbv->bv_offset;
>+		bio->bi_size += (bv->bv_len = rbv->bv_len);
>+		bio->bi_vcnt++;
>+	}
>+
>+	if (flags & PF_MEMALLOC)
>+		current->flags |= PF_MEMALLOC;
>+}
>+
> static int loop_transfer_bio(struct loop_device *lo,
> 			     struct bio *to_bio, struct bio *from_bio)
> {
>@@ -502,17 +572,19 @@ static int loop_transfer_bio(struct loop
> 
> 	IV = from_bio->bi_sector + (lo->lo_offset >> 9);
> 
>-	__bio_for_each_segment(from_bvec, from_bio, i, 0) {
>-		to_bvec = &to_bio->bi_io_vec[i];
>+	__bio_for_each_segment(to_bvec, to_bio, i, from_bio->bi_idx) {
>+		from_bvec = &from_bio->bi_io_vec[i];
> 
>-		kmap(from_bvec->bv_page);
>-		kmap(to_bvec->bv_page);
>-		vfrom = page_address(from_bvec->bv_page) + from_bvec->bv_offset;
>-		vto = page_address(to_bvec->bv_page) + to_bvec->bv_offset;
>-		ret |= lo_do_transfer(lo, bio_data_dir(to_bio), vto, vfrom,
>-					from_bvec->bv_len, IV);
>-		kunmap(from_bvec->bv_page);
>-		kunmap(to_bvec->bv_page);
>+		if (i >= to_bio->bi_idx) {
>+			kmap(from_bvec->bv_page);
>+			kmap(to_bvec->bv_page);
>+			vfrom = page_address(from_bvec->bv_page) + from_bvec->bv_offset;
>+			vto = page_address(to_bvec->bv_page) + to_bvec->bv_offset;
>+			ret |= lo_do_transfer(lo, bio_data_dir(to_bio), vto, vfrom,
>+						from_bvec->bv_len, IV);
>+			kunmap(from_bvec->bv_page);
>+			kunmap(to_bvec->bv_page);
>+		}
> 		IV += from_bvec->bv_len >> 9;
> 	}
> 
>@@ -579,16 +651,30 @@ inactive:
> static inline void loop_handle_bio(struct loop_device *lo, struct bio *bio)
> {
> 	int ret;
>+	struct bio *rbh;
> 
>-	/*
>-	 * For block backed loop, we know this is a READ
>-	 */
> 	if (lo->lo_flags & LO_FLAGS_DO_BMAP) {
> 		ret = do_bio_filebacked(lo, bio);
> 		bio_endio(bio, bio->bi_size, ret);
>-	} else {
>-		struct bio *rbh = bio->bi_private;
>+	} else if (bio_rw(bio) == WRITE) {
>+		/*
>+		 * Write complete, but more pages remain;
>+		 * encrypt and write some more pages
>+		 */
>+		loop_recycle_buffer(lo, bio);
> 
>+		rbh = bio->bi_private;
>+		if ((ret = loop_transfer_bio(lo, bio, rbh))) {
>+			bio_endio(bio, bio->bi_size, ret);
>+			return;
>+		}
>+
>+		generic_make_request(bio);
>+	} else {
>+		/*
>+		 * Read complete; do decryption now
>+		 */
>+		rbh = bio->bi_private;
> 		ret = loop_transfer_bio(lo, bio, rbh);
> 
> 		bio_endio(rbh, rbh->bi_size, ret);
>@@ -606,6 +692,7 @@ static int loop_thread(void *data)
> {
> 	struct loop_device *lo = data;
> 	struct bio *bio;
>+	int x;
> 
> 	daemonize("loop%d", lo->lo_number);
> 
>@@ -640,7 +727,11 @@ static int loop_thread(void *data)
> 			printk("loop: missing bio\n");
> 			continue;
> 		}
>+
>+		x = (lo->lo_flags & LO_FLAGS_DO_BMAP) || bio_rw(bio) != WRITE;
> 		loop_handle_bio(lo, bio);
>+		if (!x)
>+			continue;
> 
> 		/*
> 		 * upped both for pending work and tear-down, lo_pending
>  
>


  parent reply	other threads:[~2003-12-21 20:45 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2003-10-30 13:41 [PATCH] remove useless highmem bounce from loop/cryptoloop Ben Slusky
2003-10-30 18:14 ` Jari Ruusu
2003-10-30 21:30   ` Andrew Morton
2003-10-31  0:52     ` Ben Slusky
2003-10-31  9:55       ` Andrew Morton
2003-10-31  9:55         ` Andrew Morton
2003-10-31  9:58           ` Andrew Morton
2003-11-13  3:06             ` Ben Slusky
2003-11-01  0:26         ` Ben Slusky
2003-11-02 20:46           ` Ben Slusky
2003-12-21 19:55             ` [PATCH] loop.c patches, take two Ben Slusky
2003-12-21 20:07               ` Ben Slusky
2003-12-21 20:49               ` Mika Penttilä [this message]
2003-12-21 21:12                 ` Ben Slusky
2003-12-21 23:00                   ` Mika Penttilä
2003-12-21 23:05                     ` Ben Slusky
2003-12-21 23:51                       ` Mika Penttilä
2003-12-21 22:01                 ` Christophe Saout
2003-12-22  6:33               ` Andrew Morton
2003-12-22 23:32               ` Ben Slusky
2003-10-31 12:15     ` [PATCH] remove useless highmem bounce from loop/cryptoloop Jari Ruusu
2003-10-31 13:02       ` Jens Axboe
     [not found] <MllE.6qa.7@gated-at.bofh.it>
     [not found] ` <MpyW.3Ub.9@gated-at.bofh.it>
     [not found]   ` <MsGq.8cN.1@gated-at.bofh.it>
     [not found]     ` <MvO6.47g.7@gated-at.bofh.it>
     [not found]       ` <MEf3.8oB.13@gated-at.bofh.it>
     [not found]         ` <MROA.319.5@gated-at.bofh.it>
     [not found]           ` <NxkP.4kY.17@gated-at.bofh.it>
     [not found]             ` <15hUp-58e-21@gated-at.bofh.it>
     [not found]               ` <15iGH-6hd-17@gated-at.bofh.it>
     [not found]                 ` <15kfk-vj-1@gated-at.bofh.it>
2003-12-22  1:21                   ` [PATCH] loop.c patches, take two Andi Kleen
2003-12-22  1:30                     ` Christophe Saout

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=3FE6076B.3090908@kolumbus.fi \
    --to=mika.penttila@kolumbus.fi \
    --cc=akpm@osdl.org \
    --cc=jariruusu@users.sourceforge.net \
    --cc=linux-kernel@vger.kernel.org \
    --cc=sluskyb@paranoiacs.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.