All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 14/14] barriers
@ 2009-02-23 19:23 Mikulas Patocka
  2009-02-24 10:11 ` Nikanth K
                   ` (2 more replies)
  0 siblings, 3 replies; 10+ messages in thread
From: Mikulas Patocka @ 2009-02-23 19:23 UTC (permalink / raw)
  To: dm-devel

Barrier support.

Barriers are submitted to a worker thread that issues them in-order.

__process_bio functions is modified that when it sees a barrier request,
it waits for all pending IO before the request, then submits the barrier
and waits for it.

DM_ENDIO_REQUEUE doesn't work and I doubt that it can be made work with
barriers.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>

---
 drivers/md/dm.c |   75 +++++++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 61 insertions(+), 14 deletions(-)

Index: linux-2.6.29-rc6-devel/drivers/md/dm.c
===================================================================
--- linux-2.6.29-rc6-devel.orig/drivers/md/dm.c	2009-02-23 17:55:37.000000000 +0100
+++ linux-2.6.29-rc6-devel/drivers/md/dm.c	2009-02-23 17:55:45.000000000 +0100
@@ -125,6 +125,11 @@ struct mapped_device {
 	spinlock_t deferred_lock;
 
 	/*
+	 * An error from the barrier request currently being processed.
+	 */
+	int barrier_error;
+
+	/*
 	 * Processing queue (flush/barriers)
 	 */
 	struct workqueue_struct *wq;
@@ -425,6 +430,10 @@ static void end_io_acct(struct dm_io *io
 	part_stat_add(cpu, &dm_disk(md)->part0, ticks[rw], duration);
 	part_stat_unlock();
 
+	/*
+	 * after this is decremented, the bio must not be touched if it is
+	 * barrier bio
+	 */
 	dm_disk(md)->part0.in_flight = pending =
 		atomic_dec_return(&md->pending);
 
@@ -525,19 +534,29 @@ static void dec_pending(struct dm_io *io
 			 */
 			spin_lock_irqsave(&io->md->deferred_lock, flags);
 			if (__noflush_suspending(io->md))
-				bio_list_add(&io->md->deferred, io->bio);
+				bio_list_add_head(&io->md->deferred, io->bio);
 			else
 				/* noflush suspend was interrupted. */
 				io->error = -EIO;
 			spin_unlock_irqrestore(&io->md->deferred_lock, flags);
 		}
 
-		end_io_acct(io);
+		if (bio_barrier(io->bio)) {
+			/*
+			 * There could be just one barrier request, so we use
+			 * per-device variable for error reporting is OK.
+			 * Note that you can't touch the bio after end_io_acct
+			 */
+			io->md->barrier_error = io->error;
+			end_io_acct(io);
+		} else {
+			end_io_acct(io);
 
-		if (io->error != DM_ENDIO_REQUEUE) {
-			trace_block_bio_complete(io->md->queue, io->bio);
+			if (io->error != DM_ENDIO_REQUEUE) {
+				trace_block_bio_complete(io->md->queue, io->bio);
 
-			bio_endio(io->bio, io->error);
+				bio_endio(io->bio, io->error);
+			}
 		}
 
 		free_io(io->md, io);
@@ -682,7 +701,7 @@ static struct bio *split_bvec(struct bio
 
 	clone->bi_sector = sector;
 	clone->bi_bdev = bio->bi_bdev;
-	clone->bi_rw = bio->bi_rw;
+	clone->bi_rw = bio->bi_rw & ~(1 << BIO_RW_BARRIER);
 	clone->bi_vcnt = 1;
 	clone->bi_size = to_bytes(len);
 	clone->bi_io_vec->bv_offset = offset;
@@ -703,6 +722,7 @@ static struct bio *clone_bio(struct bio 
 
 	clone = bio_alloc_bioset(GFP_NOIO, bio->bi_max_vecs, bs);
 	__bio_clone(clone, bio);
+	clone->bi_rw &= ~(1 << BIO_RW_BARRIER);
 	clone->bi_destructor = dm_bio_destructor;
 	clone->bi_sector = sector;
 	clone->bi_idx = idx;
@@ -823,7 +843,10 @@ static void __process_bio(struct mapped_
 
 	ci.map = dm_get_table(md);
 	if (unlikely(!ci.map)) {
-		bio_io_error(bio);
+		if (!bio_barrier(bio))
+			bio_io_error(bio);
+		else
+			md->barrier_error = -EIO;
 		return;
 	}
 
@@ -911,11 +934,6 @@ static int dm_request(struct request_que
 	 * There is no use in forwarding any barrier request since we can't
 	 * guarantee it is (or can be) handled by the targets correctly.
 	 */
-	if (unlikely(bio_barrier(bio))) {
-		bio_endio(bio, -EOPNOTSUPP);
-		return 0;
-	}
-
 	down_read(&md->io_lock);
 
 	cpu = part_stat_lock();
@@ -927,7 +945,8 @@ static int dm_request(struct request_que
 	 * If we're suspended we have to queue
 	 * this io for later.
 	 */
-	if (unlikely(test_bit(DMF_BLOCK_IO, &md->flags))) {
+	if (unlikely(test_bit(DMF_BLOCK_IO, &md->flags)) ||
+	    unlikely(bio_barrier(bio))) {
 		up_read(&md->io_lock);
 
 		if (unlikely(test_bit(DMF_BLOCK_FOR_SUSPEND, &md->flags)) &&
@@ -1392,6 +1411,12 @@ static int dm_wait_for_completion(struct
 	return r;
 }
 
+static int dm_flush(struct mapped_device *md)
+{
+	dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE);
+	return 0;
+}
+
 /*
  * Process the deferred bios
  */
@@ -1413,8 +1438,30 @@ static void dm_wq_work(struct work_struc
 		}
 		up_write(&md->io_lock);
 
-		__process_bio(md, c);
+		if (!bio_barrier(c))
+			__process_bio(md, c);
+		else {
+			int error = dm_flush(md);
+			if (unlikely(error)) {
+				bio_endio(c, error);
+				goto next_bio;
+			}
+			if (bio_empty_barrier(c)) {
+				bio_endio(c, 0);
+				goto next_bio;
+			}
+
+			__process_bio(md, c);
+
+			error = dm_flush(md);
+			if (!error && md->barrier_error)
+				error = md->barrier_error;
+
+			if (md->barrier_error != DM_ENDIO_REQUEUE)
+				bio_endio(c, error);
+		}
 
+next_bio:
 		down_write(&md->io_lock);
 	}
 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 14/14] barriers
  2009-02-23 19:23 [PATCH 14/14] barriers Mikulas Patocka
@ 2009-02-24 10:11 ` Nikanth K
  2009-03-24 14:24 ` Alasdair G Kergon
  2009-03-27  6:11 ` Mikulas Patocka
  2 siblings, 0 replies; 10+ messages in thread
From: Nikanth K @ 2009-02-24 10:11 UTC (permalink / raw)
  To: device-mapper development

On Tue, Feb 24, 2009 at 12:53 AM, Mikulas Patocka <mpatocka@redhat.com> wrote:

<snip>

>
> +static int dm_flush(struct mapped_device *md)
> +{
> +       dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE);
> +       return 0;
> +}
> +

Always returns zero! Why not void?

<snip>

Thanks
Nikanth

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 14/14] barriers
  2009-02-23 19:23 [PATCH 14/14] barriers Mikulas Patocka
  2009-02-24 10:11 ` Nikanth K
@ 2009-03-24 14:24 ` Alasdair G Kergon
  2009-03-24 14:30   ` Mikulas Patocka
  2009-03-27  6:11 ` Mikulas Patocka
  2 siblings, 1 reply; 10+ messages in thread
From: Alasdair G Kergon @ 2009-03-24 14:24 UTC (permalink / raw)
  To: Mikulas Patocka; +Cc: dm-devel

On Mon, Feb 23, 2009 at 02:23:00PM -0500, Mikulas Patocka wrote:
> DM_ENDIO_REQUEUE doesn't work and I doubt that it can be made work with
> barriers.
 
Didn't we discuss this before?
We *require* DM_ENDIO_REQUEUE functionality - I can't apply a patch that
breaks it.  But it is not something that can be used arbitrarily - it is
only used in situations where core dm is firmly in control (and I/O cannot get
through to the device e.g. all paths down in multipath) and I don't see why it
can't continue to work with barriers.

I hope it's just the comment that is wrong here - or does the patch need
updating?

Alasdair
-- 
agk@redhat.com

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 14/14] barriers
  2009-03-24 14:24 ` Alasdair G Kergon
@ 2009-03-24 14:30   ` Mikulas Patocka
  0 siblings, 0 replies; 10+ messages in thread
From: Mikulas Patocka @ 2009-03-24 14:30 UTC (permalink / raw)
  To: Alasdair G Kergon; +Cc: dm-devel

On Tue, 24 Mar 2009, Alasdair G Kergon wrote:

> On Mon, Feb 23, 2009 at 02:23:00PM -0500, Mikulas Patocka wrote:
> > DM_ENDIO_REQUEUE doesn't work and I doubt that it can be made work with
> > barriers.
>  
> Didn't we discuss this before?
> We *require* DM_ENDIO_REQUEUE functionality - I can't apply a patch that
> breaks it.  But it is not something that can be used arbitrarily - it is
> only used in situations where core dm is firmly in control (and I/O cannot get
> through to the device e.g. all paths down in multipath) and I don't see why it
> can't continue to work with barriers.
> 
> I hope it's just the comment that is wrong here - or does the patch need
> updating?

Oh, sorry, that is old comment for the old version of that patch. I fixed 
the code but forgot to remove the comment. The code in the patch should be 
correct w.r.t. DM_ENDIO_REQUEUE.

Mikulas

> Alasdair
> -- 
> agk@redhat.com
> 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH 14/14] barriers
  2009-02-23 19:23 [PATCH 14/14] barriers Mikulas Patocka
  2009-02-24 10:11 ` Nikanth K
  2009-03-24 14:24 ` Alasdair G Kergon
@ 2009-03-27  6:11 ` Mikulas Patocka
  2009-03-30  6:36   ` Kiyoshi Ueda
  2009-04-03 18:01   ` Alasdair G Kergon
  2 siblings, 2 replies; 10+ messages in thread
From: Mikulas Patocka @ 2009-03-27  6:11 UTC (permalink / raw)
  To: dm-devel; +Cc: Alasdair G Kergon

Barrier support.

Barriers are submitted to a worker thread that issues them in-order.

__split_and_process_bio function is modified that when it sees a barrier
request, it waits for all pending IO before the request, then submits
the barrier and waits for it (we must wait, otherwise it could be
intermixed with following requests).

Errors from the barrier request are recorded in per-device barrier_error
variable. There may be only one barrier request in progress, so using
a per-device variable is correct.

The barrier request is converted to non-barrier request when sending it
to the underlying device.

This patch guarantees correct barrier behavior if the underlying device
doesn't perform write-back caching. The same requirement existed before
barriers were supported in dm.

Bottom layer barrier support (sending barriers by target drivers) and
handling devices with write-back caches will be done in further patches.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>

---
 drivers/md/dm.c |   79 +++++++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 61 insertions(+), 18 deletions(-)

Index: linux-2.6.29-rc8-devel/drivers/md/dm.c
===================================================================
--- linux-2.6.29-rc8-devel.orig/drivers/md/dm.c	2009-03-27 06:51:29.000000000 +0100
+++ linux-2.6.29-rc8-devel/drivers/md/dm.c	2009-03-27 06:51:33.000000000 +0100
@@ -125,6 +125,11 @@ struct mapped_device {
 	spinlock_t deferred_lock;
 
 	/*
+	 * An error from the barrier request currently being processed.
+	 */
+	int barrier_error;
+
+	/*
 	 * Processing queue (flush/barriers)
 	 */
 	struct workqueue_struct *wq;
@@ -425,6 +430,10 @@ static void end_io_acct(struct dm_io *io
 	part_stat_add(cpu, &dm_disk(md)->part0, ticks[rw], duration);
 	part_stat_unlock();
 
+	/*
+	 * after this is decremented, the bio must not be touched if it is
+	 * barrier bio
+	 */
 	dm_disk(md)->part0.in_flight = pending =
 		atomic_dec_return(&md->pending);
 
@@ -525,19 +534,29 @@ static void dec_pending(struct dm_io *io
 			 */
 			spin_lock_irqsave(&io->md->deferred_lock, flags);
 			if (__noflush_suspending(io->md))
-				bio_list_add(&io->md->deferred, io->bio);
+				bio_list_add_head(&io->md->deferred, io->bio);
 			else
 				/* noflush suspend was interrupted. */
 				io->error = -EIO;
 			spin_unlock_irqrestore(&io->md->deferred_lock, flags);
 		}
 
-		end_io_acct(io);
+		if (bio_barrier(io->bio)) {
+			/*
+			 * There could be just one barrier request, so we use
+			 * per-device variable for error reporting is OK.
+			 * Note that you can't touch the bio after end_io_acct
+			 */
+			io->md->barrier_error = io->error;
+			end_io_acct(io);
+		} else {
+			end_io_acct(io);
 
-		if (io->error != DM_ENDIO_REQUEUE) {
-			trace_block_bio_complete(io->md->queue, io->bio);
+			if (io->error != DM_ENDIO_REQUEUE) {
+				trace_block_bio_complete(io->md->queue, io->bio);
 
-			bio_endio(io->bio, io->error);
+				bio_endio(io->bio, io->error);
+			}
 		}
 
 		free_io(io->md, io);
@@ -682,7 +701,7 @@ static struct bio *split_bvec(struct bio
 
 	clone->bi_sector = sector;
 	clone->bi_bdev = bio->bi_bdev;
-	clone->bi_rw = bio->bi_rw;
+	clone->bi_rw = bio->bi_rw & ~(1 << BIO_RW_BARRIER);
 	clone->bi_vcnt = 1;
 	clone->bi_size = to_bytes(len);
 	clone->bi_io_vec->bv_offset = offset;
@@ -703,6 +722,7 @@ static struct bio *clone_bio(struct bio 
 
 	clone = bio_alloc_bioset(GFP_NOIO, bio->bi_max_vecs, bs);
 	__bio_clone(clone, bio);
+	clone->bi_rw &= ~(1 << BIO_RW_BARRIER);
 	clone->bi_destructor = dm_bio_destructor;
 	clone->bi_sector = sector;
 	clone->bi_idx = idx;
@@ -823,7 +843,10 @@ static void __split_and_process_bio(stru
 
 	ci.map = dm_get_table(md);
 	if (unlikely(!ci.map)) {
-		bio_io_error(bio);
+		if (!bio_barrier(bio))
+			bio_io_error(bio);
+		else
+			md->barrier_error = -EIO;
 		return;
 	}
 
@@ -907,15 +930,6 @@ static int dm_request(struct request_que
 	struct mapped_device *md = q->queuedata;
 	int cpu;
 
-	/*
-	 * There is no use in forwarding any barrier request since we can't
-	 * guarantee it is (or can be) handled by the targets correctly.
-	 */
-	if (unlikely(bio_barrier(bio))) {
-		bio_endio(bio, -EOPNOTSUPP);
-		return 0;
-	}
-
 	down_read(&md->io_lock);
 
 	cpu = part_stat_lock();
@@ -927,7 +941,8 @@ static int dm_request(struct request_que
 	 * If we're suspended or the thread is processing barriers
 	 * we have to queue this io for later.
 	 */
-	if (unlikely(test_bit(DMF_QUEUE_IO_FOR_THREAD, &md->flags))) {
+	if (unlikely(test_bit(DMF_QUEUE_IO_FOR_THREAD, &md->flags)) ||
+	    unlikely(bio_barrier(bio))) {
 		up_read(&md->io_lock);
 
 		if (unlikely(test_bit(DMF_BLOCK_FOR_SUSPEND, &md->flags)) &&
@@ -1393,6 +1408,12 @@ static int dm_wait_for_completion(struct
 	return r;
 }
 
+static int dm_flush(struct mapped_device *md)
+{
+	dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE);
+	return 0;
+}
+
 /*
  * Process the deferred bios
  */
@@ -1415,8 +1436,30 @@ static void dm_wq_work(struct work_struc
 			break;
 		}
 
-		__split_and_process_bio(md, c);
+		if (!bio_barrier(c))
+			__split_and_process_bio(md, c);
+		else {
+			int error = dm_flush(md);
+			if (unlikely(error)) {
+				bio_endio(c, error);
+				goto next_bio;
+			}
+			if (bio_empty_barrier(c)) {
+				bio_endio(c, 0);
+				goto next_bio;
+			}
+
+			__split_and_process_bio(md, c);
+
+			error = dm_flush(md);
+			if (!error && md->barrier_error)
+				error = md->barrier_error;
+
+			if (md->barrier_error != DM_ENDIO_REQUEUE)
+				bio_endio(c, error);
+		}
 
+next_bio:
 		down_write(&md->io_lock);
 	}
 }

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 14/14] barriers
  2009-03-27  6:11 ` Mikulas Patocka
@ 2009-03-30  6:36   ` Kiyoshi Ueda
  2009-03-30 14:02     ` Mikulas Patocka
  2009-04-03 18:01   ` Alasdair G Kergon
  1 sibling, 1 reply; 10+ messages in thread
From: Kiyoshi Ueda @ 2009-03-30  6:36 UTC (permalink / raw)
  To: Mikulas Patocka; +Cc: device-mapper development

Hi Mikulas,

This patch doesn't apply to 2.6.29.
Probably you are using a pretty older kernel?

Thanks,
Kiyoshi Ueda


On 2009/03/27 15:11 +0900, Mikulas Patocka wrote:
> Barrier support.
> 
> Barriers are submitted to a worker thread that issues them in-order.
> 
> __split_and_process_bio function is modified that when it sees a barrier
> request, it waits for all pending IO before the request, then submits
> the barrier and waits for it (we must wait, otherwise it could be
> intermixed with following requests).
> 
> Errors from the barrier request are recorded in per-device barrier_error
> variable. There may be only one barrier request in progress, so using
> a per-device variable is correct.
> 
> The barrier request is converted to non-barrier request when sending it
> to the underlying device.
> 
> This patch guarantees correct barrier behavior if the underlying device
> doesn't perform write-back caching. The same requirement existed before
> barriers were supported in dm.
> 
> Bottom layer barrier support (sending barriers by target drivers) and
> handling devices with write-back caches will be done in further patches.
> 
> Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
> 
> ---
>  drivers/md/dm.c |   79 +++++++++++++++++++++++++++++++++++++++++++-------------
>  1 file changed, 61 insertions(+), 18 deletions(-)
> 
> Index: linux-2.6.29-rc8-devel/drivers/md/dm.c
> ===================================================================
> --- linux-2.6.29-rc8-devel.orig/drivers/md/dm.c	2009-03-27 06:51:29.000000000 +0100
> +++ linux-2.6.29-rc8-devel/drivers/md/dm.c	2009-03-27 06:51:33.000000000 +0100
> @@ -125,6 +125,11 @@ struct mapped_device {
>  	spinlock_t deferred_lock;
>  
>  	/*
> +	 * An error from the barrier request currently being processed.
> +	 */
> +	int barrier_error;
> +
> +	/*
>  	 * Processing queue (flush/barriers)
>  	 */
>  	struct workqueue_struct *wq;
> @@ -425,6 +430,10 @@ static void end_io_acct(struct dm_io *io
>  	part_stat_add(cpu, &dm_disk(md)->part0, ticks[rw], duration);
>  	part_stat_unlock();
>  
> +	/*
> +	 * after this is decremented, the bio must not be touched if it is
> +	 * barrier bio
> +	 */
>  	dm_disk(md)->part0.in_flight = pending =
>  		atomic_dec_return(&md->pending);
>  
> @@ -525,19 +534,29 @@ static void dec_pending(struct dm_io *io
>  			 */
>  			spin_lock_irqsave(&io->md->deferred_lock, flags);
>  			if (__noflush_suspending(io->md))
> -				bio_list_add(&io->md->deferred, io->bio);
> +				bio_list_add_head(&io->md->deferred, io->bio);
>  			else
>  				/* noflush suspend was interrupted. */
>  				io->error = -EIO;
>  			spin_unlock_irqrestore(&io->md->deferred_lock, flags);
>  		}
>  
> -		end_io_acct(io);
> +		if (bio_barrier(io->bio)) {
> +			/*
> +			 * There could be just one barrier request, so we use
> +			 * per-device variable for error reporting is OK.
> +			 * Note that you can't touch the bio after end_io_acct
> +			 */
> +			io->md->barrier_error = io->error;
> +			end_io_acct(io);
> +		} else {
> +			end_io_acct(io);
>  
> -		if (io->error != DM_ENDIO_REQUEUE) {
> -			trace_block_bio_complete(io->md->queue, io->bio);
> +			if (io->error != DM_ENDIO_REQUEUE) {
> +				trace_block_bio_complete(io->md->queue, io->bio);
>  
> -			bio_endio(io->bio, io->error);
> +				bio_endio(io->bio, io->error);
> +			}
>  		}
>  
>  		free_io(io->md, io);
> @@ -682,7 +701,7 @@ static struct bio *split_bvec(struct bio
>  
>  	clone->bi_sector = sector;
>  	clone->bi_bdev = bio->bi_bdev;
> -	clone->bi_rw = bio->bi_rw;
> +	clone->bi_rw = bio->bi_rw & ~(1 << BIO_RW_BARRIER);
>  	clone->bi_vcnt = 1;
>  	clone->bi_size = to_bytes(len);
>  	clone->bi_io_vec->bv_offset = offset;
> @@ -703,6 +722,7 @@ static struct bio *clone_bio(struct bio 
>  
>  	clone = bio_alloc_bioset(GFP_NOIO, bio->bi_max_vecs, bs);
>  	__bio_clone(clone, bio);
> +	clone->bi_rw &= ~(1 << BIO_RW_BARRIER);
>  	clone->bi_destructor = dm_bio_destructor;
>  	clone->bi_sector = sector;
>  	clone->bi_idx = idx;
> @@ -823,7 +843,10 @@ static void __split_and_process_bio(stru
>  
>  	ci.map = dm_get_table(md);
>  	if (unlikely(!ci.map)) {
> -		bio_io_error(bio);
> +		if (!bio_barrier(bio))
> +			bio_io_error(bio);
> +		else
> +			md->barrier_error = -EIO;
>  		return;
>  	}
>  
> @@ -907,15 +930,6 @@ static int dm_request(struct request_que
>  	struct mapped_device *md = q->queuedata;
>  	int cpu;
>  
> -	/*
> -	 * There is no use in forwarding any barrier request since we can't
> -	 * guarantee it is (or can be) handled by the targets correctly.
> -	 */
> -	if (unlikely(bio_barrier(bio))) {
> -		bio_endio(bio, -EOPNOTSUPP);
> -		return 0;
> -	}
> -
>  	down_read(&md->io_lock);
>  
>  	cpu = part_stat_lock();
> @@ -927,7 +941,8 @@ static int dm_request(struct request_que
>  	 * If we're suspended or the thread is processing barriers
>  	 * we have to queue this io for later.
>  	 */
> -	if (unlikely(test_bit(DMF_QUEUE_IO_FOR_THREAD, &md->flags))) {
> +	if (unlikely(test_bit(DMF_QUEUE_IO_FOR_THREAD, &md->flags)) ||
> +	    unlikely(bio_barrier(bio))) {
>  		up_read(&md->io_lock);
>  
>  		if (unlikely(test_bit(DMF_BLOCK_FOR_SUSPEND, &md->flags)) &&
> @@ -1393,6 +1408,12 @@ static int dm_wait_for_completion(struct
>  	return r;
>  }
>  
> +static int dm_flush(struct mapped_device *md)
> +{
> +	dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE);
> +	return 0;
> +}
> +
>  /*
>   * Process the deferred bios
>   */
> @@ -1415,8 +1436,30 @@ static void dm_wq_work(struct work_struc
>  			break;
>  		}
>  
> -		__split_and_process_bio(md, c);
> +		if (!bio_barrier(c))
> +			__split_and_process_bio(md, c);
> +		else {
> +			int error = dm_flush(md);
> +			if (unlikely(error)) {
> +				bio_endio(c, error);
> +				goto next_bio;
> +			}
> +			if (bio_empty_barrier(c)) {
> +				bio_endio(c, 0);
> +				goto next_bio;
> +			}
> +
> +			__split_and_process_bio(md, c);
> +
> +			error = dm_flush(md);
> +			if (!error && md->barrier_error)
> +				error = md->barrier_error;
> +
> +			if (md->barrier_error != DM_ENDIO_REQUEUE)
> +				bio_endio(c, error);
> +		}
>  
> +next_bio:
>  		down_write(&md->io_lock);
>  	}
>  }
> 
> --
> dm-devel mailing list
> dm-devel@redhat.com
> https://www.redhat.com/mailman/listinfo/dm-devel

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 14/14] barriers
  2009-03-30  6:36   ` Kiyoshi Ueda
@ 2009-03-30 14:02     ` Mikulas Patocka
  2009-04-01  5:48       ` Kiyoshi Ueda
  0 siblings, 1 reply; 10+ messages in thread
From: Mikulas Patocka @ 2009-03-30 14:02 UTC (permalink / raw)
  To: Kiyoshi Ueda; +Cc: device-mapper development

Hi

You must apply the whole series of patches from patch 1 (that I sent long 
time before). This patch is an update to the series.

Mikulas

> Hi Mikulas,
> 
> This patch doesn't apply to 2.6.29.
> Probably you are using a pretty older kernel?
> 
> Thanks,
> Kiyoshi Ueda
> 
> 
> On 2009/03/27 15:11 +0900, Mikulas Patocka wrote:
> > Barrier support.
> > 
> > Barriers are submitted to a worker thread that issues them in-order.
> > 
> > __split_and_process_bio function is modified that when it sees a barrier
> > request, it waits for all pending IO before the request, then submits
> > the barrier and waits for it (we must wait, otherwise it could be
> > intermixed with following requests).
> > 
> > Errors from the barrier request are recorded in per-device barrier_error
> > variable. There may be only one barrier request in progress, so using
> > a per-device variable is correct.
> > 
> > The barrier request is converted to non-barrier request when sending it
> > to the underlying device.
> > 
> > This patch guarantees correct barrier behavior if the underlying device
> > doesn't perform write-back caching. The same requirement existed before
> > barriers were supported in dm.
> > 
> > Bottom layer barrier support (sending barriers by target drivers) and
> > handling devices with write-back caches will be done in further patches.
> > 
> > Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
> > 
> > ---
> >  drivers/md/dm.c |   79 +++++++++++++++++++++++++++++++++++++++++++-------------
> >  1 file changed, 61 insertions(+), 18 deletions(-)
> > 
> > Index: linux-2.6.29-rc8-devel/drivers/md/dm.c
> > ===================================================================
> > --- linux-2.6.29-rc8-devel.orig/drivers/md/dm.c	2009-03-27 06:51:29.000000000 +0100
> > +++ linux-2.6.29-rc8-devel/drivers/md/dm.c	2009-03-27 06:51:33.000000000 +0100
> > @@ -125,6 +125,11 @@ struct mapped_device {
> >  	spinlock_t deferred_lock;
> >  
> >  	/*
> > +	 * An error from the barrier request currently being processed.
> > +	 */
> > +	int barrier_error;
> > +
> > +	/*
> >  	 * Processing queue (flush/barriers)
> >  	 */
> >  	struct workqueue_struct *wq;
> > @@ -425,6 +430,10 @@ static void end_io_acct(struct dm_io *io
> >  	part_stat_add(cpu, &dm_disk(md)->part0, ticks[rw], duration);
> >  	part_stat_unlock();
> >  
> > +	/*
> > +	 * after this is decremented, the bio must not be touched if it is
> > +	 * barrier bio
> > +	 */
> >  	dm_disk(md)->part0.in_flight = pending =
> >  		atomic_dec_return(&md->pending);
> >  
> > @@ -525,19 +534,29 @@ static void dec_pending(struct dm_io *io
> >  			 */
> >  			spin_lock_irqsave(&io->md->deferred_lock, flags);
> >  			if (__noflush_suspending(io->md))
> > -				bio_list_add(&io->md->deferred, io->bio);
> > +				bio_list_add_head(&io->md->deferred, io->bio);
> >  			else
> >  				/* noflush suspend was interrupted. */
> >  				io->error = -EIO;
> >  			spin_unlock_irqrestore(&io->md->deferred_lock, flags);
> >  		}
> >  
> > -		end_io_acct(io);
> > +		if (bio_barrier(io->bio)) {
> > +			/*
> > +			 * There could be just one barrier request, so we use
> > +			 * per-device variable for error reporting is OK.
> > +			 * Note that you can't touch the bio after end_io_acct
> > +			 */
> > +			io->md->barrier_error = io->error;
> > +			end_io_acct(io);
> > +		} else {
> > +			end_io_acct(io);
> >  
> > -		if (io->error != DM_ENDIO_REQUEUE) {
> > -			trace_block_bio_complete(io->md->queue, io->bio);
> > +			if (io->error != DM_ENDIO_REQUEUE) {
> > +				trace_block_bio_complete(io->md->queue, io->bio);
> >  
> > -			bio_endio(io->bio, io->error);
> > +				bio_endio(io->bio, io->error);
> > +			}
> >  		}
> >  
> >  		free_io(io->md, io);
> > @@ -682,7 +701,7 @@ static struct bio *split_bvec(struct bio
> >  
> >  	clone->bi_sector = sector;
> >  	clone->bi_bdev = bio->bi_bdev;
> > -	clone->bi_rw = bio->bi_rw;
> > +	clone->bi_rw = bio->bi_rw & ~(1 << BIO_RW_BARRIER);
> >  	clone->bi_vcnt = 1;
> >  	clone->bi_size = to_bytes(len);
> >  	clone->bi_io_vec->bv_offset = offset;
> > @@ -703,6 +722,7 @@ static struct bio *clone_bio(struct bio 
> >  
> >  	clone = bio_alloc_bioset(GFP_NOIO, bio->bi_max_vecs, bs);
> >  	__bio_clone(clone, bio);
> > +	clone->bi_rw &= ~(1 << BIO_RW_BARRIER);
> >  	clone->bi_destructor = dm_bio_destructor;
> >  	clone->bi_sector = sector;
> >  	clone->bi_idx = idx;
> > @@ -823,7 +843,10 @@ static void __split_and_process_bio(stru
> >  
> >  	ci.map = dm_get_table(md);
> >  	if (unlikely(!ci.map)) {
> > -		bio_io_error(bio);
> > +		if (!bio_barrier(bio))
> > +			bio_io_error(bio);
> > +		else
> > +			md->barrier_error = -EIO;
> >  		return;
> >  	}
> >  
> > @@ -907,15 +930,6 @@ static int dm_request(struct request_que
> >  	struct mapped_device *md = q->queuedata;
> >  	int cpu;
> >  
> > -	/*
> > -	 * There is no use in forwarding any barrier request since we can't
> > -	 * guarantee it is (or can be) handled by the targets correctly.
> > -	 */
> > -	if (unlikely(bio_barrier(bio))) {
> > -		bio_endio(bio, -EOPNOTSUPP);
> > -		return 0;
> > -	}
> > -
> >  	down_read(&md->io_lock);
> >  
> >  	cpu = part_stat_lock();
> > @@ -927,7 +941,8 @@ static int dm_request(struct request_que
> >  	 * If we're suspended or the thread is processing barriers
> >  	 * we have to queue this io for later.
> >  	 */
> > -	if (unlikely(test_bit(DMF_QUEUE_IO_FOR_THREAD, &md->flags))) {
> > +	if (unlikely(test_bit(DMF_QUEUE_IO_FOR_THREAD, &md->flags)) ||
> > +	    unlikely(bio_barrier(bio))) {
> >  		up_read(&md->io_lock);
> >  
> >  		if (unlikely(test_bit(DMF_BLOCK_FOR_SUSPEND, &md->flags)) &&
> > @@ -1393,6 +1408,12 @@ static int dm_wait_for_completion(struct
> >  	return r;
> >  }
> >  
> > +static int dm_flush(struct mapped_device *md)
> > +{
> > +	dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE);
> > +	return 0;
> > +}
> > +
> >  /*
> >   * Process the deferred bios
> >   */
> > @@ -1415,8 +1436,30 @@ static void dm_wq_work(struct work_struc
> >  			break;
> >  		}
> >  
> > -		__split_and_process_bio(md, c);
> > +		if (!bio_barrier(c))
> > +			__split_and_process_bio(md, c);
> > +		else {
> > +			int error = dm_flush(md);
> > +			if (unlikely(error)) {
> > +				bio_endio(c, error);
> > +				goto next_bio;
> > +			}
> > +			if (bio_empty_barrier(c)) {
> > +				bio_endio(c, 0);
> > +				goto next_bio;
> > +			}
> > +
> > +			__split_and_process_bio(md, c);
> > +
> > +			error = dm_flush(md);
> > +			if (!error && md->barrier_error)
> > +				error = md->barrier_error;
> > +
> > +			if (md->barrier_error != DM_ENDIO_REQUEUE)
> > +				bio_endio(c, error);
> > +		}
> >  
> > +next_bio:
> >  		down_write(&md->io_lock);
> >  	}
> >  }
> > 
> > --
> > dm-devel mailing list
> > dm-devel@redhat.com
> > https://www.redhat.com/mailman/listinfo/dm-devel
> 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 14/14] barriers
  2009-03-30 14:02     ` Mikulas Patocka
@ 2009-04-01  5:48       ` Kiyoshi Ueda
  2009-04-01 12:24         ` Alasdair G Kergon
  0 siblings, 1 reply; 10+ messages in thread
From: Kiyoshi Ueda @ 2009-04-01  5:48 UTC (permalink / raw)
  To: Mikulas Patocka; +Cc: device-mapper development

Hi Mikulas,

Sorry for my less explanation.

On 2009/03/30 23:02 +0900, Mikulas Patocka wrote:
> Hi
> 
> You must apply the whole series of patches from patch 1 (that I sent long 
> time before). This patch is an update to the series.

Yes, I know that.
But the Milan's patch (http://marc.info/?l=dm-devel&m=123695241224620&w=2)
changed around dec_pending() in dm.c and it has already been merged into
2.6.29 (after 2.6.29-rc8).
Your patch conflicts against the change and is rejected, when I try to
apply your whole patch-set on top of 2.6.29.

The conflict may be just cosmetic reasons, but I'm not sure.
So I noticed that to you, so that I can review this patch correctly
for my request-based dm work.

Thanks,
Kiyoshi Ueda


> Mikulas
> 
>> Hi Mikulas,
>>
>> This patch doesn't apply to 2.6.29.
>> Probably you are using a pretty older kernel?
>>
>> Thanks,
>> Kiyoshi Ueda
>>
>>
>> On 2009/03/27 15:11 +0900, Mikulas Patocka wrote:
>>> Barrier support.
>>>
>>> Barriers are submitted to a worker thread that issues them in-order.
>>>
>>> __split_and_process_bio function is modified that when it sees a barrier
>>> request, it waits for all pending IO before the request, then submits
>>> the barrier and waits for it (we must wait, otherwise it could be
>>> intermixed with following requests).
>>>
>>> Errors from the barrier request are recorded in per-device barrier_error
>>> variable. There may be only one barrier request in progress, so using
>>> a per-device variable is correct.
>>>
>>> The barrier request is converted to non-barrier request when sending it
>>> to the underlying device.
>>>
>>> This patch guarantees correct barrier behavior if the underlying device
>>> doesn't perform write-back caching. The same requirement existed before
>>> barriers were supported in dm.
>>>
>>> Bottom layer barrier support (sending barriers by target drivers) and
>>> handling devices with write-back caches will be done in further patches.
>>>
>>> Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
>>>
>>> ---
>>>  drivers/md/dm.c |   79 +++++++++++++++++++++++++++++++++++++++++++-------------
>>>  1 file changed, 61 insertions(+), 18 deletions(-)
>>>
>>> Index: linux-2.6.29-rc8-devel/drivers/md/dm.c
>>> ===================================================================
>>> --- linux-2.6.29-rc8-devel.orig/drivers/md/dm.c	2009-03-27 06:51:29.000000000 +0100
>>> +++ linux-2.6.29-rc8-devel/drivers/md/dm.c	2009-03-27 06:51:33.000000000 +0100
>>> @@ -125,6 +125,11 @@ struct mapped_device {
>>>  	spinlock_t deferred_lock;
>>>  
>>>  	/*
>>> +	 * An error from the barrier request currently being processed.
>>> +	 */
>>> +	int barrier_error;
>>> +
>>> +	/*
>>>  	 * Processing queue (flush/barriers)
>>>  	 */
>>>  	struct workqueue_struct *wq;
>>> @@ -425,6 +430,10 @@ static void end_io_acct(struct dm_io *io
>>>  	part_stat_add(cpu, &dm_disk(md)->part0, ticks[rw], duration);
>>>  	part_stat_unlock();
>>>  
>>> +	/*
>>> +	 * after this is decremented, the bio must not be touched if it is
>>> +	 * barrier bio
>>> +	 */
>>>  	dm_disk(md)->part0.in_flight = pending =
>>>  		atomic_dec_return(&md->pending);
>>>  
>>> @@ -525,19 +534,29 @@ static void dec_pending(struct dm_io *io
>>>  			 */
>>>  			spin_lock_irqsave(&io->md->deferred_lock, flags);
>>>  			if (__noflush_suspending(io->md))
>>> -				bio_list_add(&io->md->deferred, io->bio);
>>> +				bio_list_add_head(&io->md->deferred, io->bio);
>>>  			else
>>>  				/* noflush suspend was interrupted. */
>>>  				io->error = -EIO;
>>>  			spin_unlock_irqrestore(&io->md->deferred_lock, flags);
>>>  		}
>>>  
>>> -		end_io_acct(io);
>>> +		if (bio_barrier(io->bio)) {
>>> +			/*
>>> +			 * There could be just one barrier request, so we use
>>> +			 * per-device variable for error reporting is OK.
>>> +			 * Note that you can't touch the bio after end_io_acct
>>> +			 */
>>> +			io->md->barrier_error = io->error;
>>> +			end_io_acct(io);
>>> +		} else {
>>> +			end_io_acct(io);
>>>  
>>> -		if (io->error != DM_ENDIO_REQUEUE) {
>>> -			trace_block_bio_complete(io->md->queue, io->bio);
>>> +			if (io->error != DM_ENDIO_REQUEUE) {
>>> +				trace_block_bio_complete(io->md->queue, io->bio);
>>>  
>>> -			bio_endio(io->bio, io->error);
>>> +				bio_endio(io->bio, io->error);
>>> +			}
>>>  		}
>>>  
>>>  		free_io(io->md, io);
>>> @@ -682,7 +701,7 @@ static struct bio *split_bvec(struct bio
>>>  
>>>  	clone->bi_sector = sector;
>>>  	clone->bi_bdev = bio->bi_bdev;
>>> -	clone->bi_rw = bio->bi_rw;
>>> +	clone->bi_rw = bio->bi_rw & ~(1 << BIO_RW_BARRIER);
>>>  	clone->bi_vcnt = 1;
>>>  	clone->bi_size = to_bytes(len);
>>>  	clone->bi_io_vec->bv_offset = offset;
>>> @@ -703,6 +722,7 @@ static struct bio *clone_bio(struct bio 
>>>  
>>>  	clone = bio_alloc_bioset(GFP_NOIO, bio->bi_max_vecs, bs);
>>>  	__bio_clone(clone, bio);
>>> +	clone->bi_rw &= ~(1 << BIO_RW_BARRIER);
>>>  	clone->bi_destructor = dm_bio_destructor;
>>>  	clone->bi_sector = sector;
>>>  	clone->bi_idx = idx;
>>> @@ -823,7 +843,10 @@ static void __split_and_process_bio(stru
>>>  
>>>  	ci.map = dm_get_table(md);
>>>  	if (unlikely(!ci.map)) {
>>> -		bio_io_error(bio);
>>> +		if (!bio_barrier(bio))
>>> +			bio_io_error(bio);
>>> +		else
>>> +			md->barrier_error = -EIO;
>>>  		return;
>>>  	}
>>>  
>>> @@ -907,15 +930,6 @@ static int dm_request(struct request_que
>>>  	struct mapped_device *md = q->queuedata;
>>>  	int cpu;
>>>  
>>> -	/*
>>> -	 * There is no use in forwarding any barrier request since we can't
>>> -	 * guarantee it is (or can be) handled by the targets correctly.
>>> -	 */
>>> -	if (unlikely(bio_barrier(bio))) {
>>> -		bio_endio(bio, -EOPNOTSUPP);
>>> -		return 0;
>>> -	}
>>> -
>>>  	down_read(&md->io_lock);
>>>  
>>>  	cpu = part_stat_lock();
>>> @@ -927,7 +941,8 @@ static int dm_request(struct request_que
>>>  	 * If we're suspended or the thread is processing barriers
>>>  	 * we have to queue this io for later.
>>>  	 */
>>> -	if (unlikely(test_bit(DMF_QUEUE_IO_FOR_THREAD, &md->flags))) {
>>> +	if (unlikely(test_bit(DMF_QUEUE_IO_FOR_THREAD, &md->flags)) ||
>>> +	    unlikely(bio_barrier(bio))) {
>>>  		up_read(&md->io_lock);
>>>  
>>>  		if (unlikely(test_bit(DMF_BLOCK_FOR_SUSPEND, &md->flags)) &&
>>> @@ -1393,6 +1408,12 @@ static int dm_wait_for_completion(struct
>>>  	return r;
>>>  }
>>>  
>>> +static int dm_flush(struct mapped_device *md)
>>> +{
>>> +	dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE);
>>> +	return 0;
>>> +}
>>> +
>>>  /*
>>>   * Process the deferred bios
>>>   */
>>> @@ -1415,8 +1436,30 @@ static void dm_wq_work(struct work_struc
>>>  			break;
>>>  		}
>>>  
>>> -		__split_and_process_bio(md, c);
>>> +		if (!bio_barrier(c))
>>> +			__split_and_process_bio(md, c);
>>> +		else {
>>> +			int error = dm_flush(md);
>>> +			if (unlikely(error)) {
>>> +				bio_endio(c, error);
>>> +				goto next_bio;
>>> +			}
>>> +			if (bio_empty_barrier(c)) {
>>> +				bio_endio(c, 0);
>>> +				goto next_bio;
>>> +			}
>>> +
>>> +			__split_and_process_bio(md, c);
>>> +
>>> +			error = dm_flush(md);
>>> +			if (!error && md->barrier_error)
>>> +				error = md->barrier_error;
>>> +
>>> +			if (md->barrier_error != DM_ENDIO_REQUEUE)
>>> +				bio_endio(c, error);
>>> +		}
>>>  
>>> +next_bio:
>>>  		down_write(&md->io_lock);
>>>  	}
>>>  }
>>>
>>> --
>>> dm-devel mailing list
>>> dm-devel@redhat.com
>>> https://www.redhat.com/mailman/listinfo/dm-devel

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 14/14] barriers
  2009-04-01  5:48       ` Kiyoshi Ueda
@ 2009-04-01 12:24         ` Alasdair G Kergon
  0 siblings, 0 replies; 10+ messages in thread
From: Alasdair G Kergon @ 2009-04-01 12:24 UTC (permalink / raw)
  To: Kiyoshi Ueda; +Cc: device-mapper development, Mikulas Patocka

I don't think it's significant: use the version I've pushed to:

http://www.kernel.org/pub/linux/kernel/people/agk/patches/2.6/editing/series.html
dm-implement-basic-barrier-support.patch

Alasdair
-- 
agk@redhat.com

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 14/14] barriers
  2009-03-27  6:11 ` Mikulas Patocka
  2009-03-30  6:36   ` Kiyoshi Ueda
@ 2009-04-03 18:01   ` Alasdair G Kergon
  1 sibling, 0 replies; 10+ messages in thread
From: Alasdair G Kergon @ 2009-04-03 18:01 UTC (permalink / raw)
  To: dm-devel

On Fri, Mar 27, 2009 at 02:11:04AM -0400, Mikulas Patocka wrote:
> Barrier support.
 
Queued for linux-next.

> +		else {
> +			int error = dm_flush(md);
> +			if (unlikely(error)) {
> +				bio_endio(c, error);
> +				goto next_bio;
> +			}
> +			if (bio_empty_barrier(c)) {
> +				bio_endio(c, 0);
> +				goto next_bio;
> +			}
> +
> +			__split_and_process_bio(md, c);
> +
> +			error = dm_flush(md);
> +			if (!error && md->barrier_error)
> +				error = md->barrier_error;
> +
> +			if (md->barrier_error != DM_ENDIO_REQUEUE)
> +				bio_endio(c, error);
> +		}
  
I made that a separate function.

Alasdair
-- 
agk@redhat.com

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2009-04-03 18:01 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-02-23 19:23 [PATCH 14/14] barriers Mikulas Patocka
2009-02-24 10:11 ` Nikanth K
2009-03-24 14:24 ` Alasdair G Kergon
2009-03-24 14:30   ` Mikulas Patocka
2009-03-27  6:11 ` Mikulas Patocka
2009-03-30  6:36   ` Kiyoshi Ueda
2009-03-30 14:02     ` Mikulas Patocka
2009-04-01  5:48       ` Kiyoshi Ueda
2009-04-01 12:24         ` Alasdair G Kergon
2009-04-03 18:01   ` Alasdair G Kergon

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.