- * [PATCH v2 1/6] blk-mq: introduce blk_mq_delay_kick_requeue_list()
  2016-09-14 16:29 [PATCH for-4.9 v2 0/6] [PATCH for-4.9 v2 0/3] allow delayed requeue of blk-mq requests Mike Snitzer
@ 2016-09-14 16:29 ` Mike Snitzer
  2016-09-14 16:34   ` Jens Axboe
  2016-09-15  6:10   ` [PATCH v2 " Hannes Reinecke
  2016-09-14 16:29 ` [PATCH v2 2/6] dm rq: add DM_MAPIO_DELAY_REQUEUE to delay requeue of blk-mq requests Mike Snitzer
                   ` (4 subsequent siblings)
  5 siblings, 2 replies; 17+ messages in thread
From: Mike Snitzer @ 2016-09-14 16:29 UTC (permalink / raw)
  To: axboe, dm-devel; +Cc: linux-block
blk_mq_delay_kick_requeue_list() provides the ability to kick the
q->requeue_list after a specified time.  To do this the request_queue's
'requeue_work' member was changed to a delayed_work.
blk_mq_delay_kick_requeue_list() allows DM to defer processing requeued
requests while it doesn't make sense to immediately requeue them
(e.g. when all paths in a DM multipath have failed).
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 block/blk-mq.c         | 15 +++++++++++----
 include/linux/blk-mq.h |  1 +
 include/linux/blkdev.h |  2 +-
 3 files changed, 13 insertions(+), 5 deletions(-)
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 13f5a6c..4ff96b7 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -501,7 +501,7 @@ EXPORT_SYMBOL(blk_mq_requeue_request);
 static void blk_mq_requeue_work(struct work_struct *work)
 {
 	struct request_queue *q =
-		container_of(work, struct request_queue, requeue_work);
+		container_of(work, struct request_queue, requeue_work.work);
 	LIST_HEAD(rq_list);
 	struct request *rq, *next;
 	unsigned long flags;
@@ -556,16 +556,23 @@ EXPORT_SYMBOL(blk_mq_add_to_requeue_list);
 
 void blk_mq_cancel_requeue_work(struct request_queue *q)
 {
-	cancel_work_sync(&q->requeue_work);
+	cancel_delayed_work_sync(&q->requeue_work);
 }
 EXPORT_SYMBOL_GPL(blk_mq_cancel_requeue_work);
 
 void blk_mq_kick_requeue_list(struct request_queue *q)
 {
-	kblockd_schedule_work(&q->requeue_work);
+	kblockd_schedule_delayed_work(&q->requeue_work, 0);
 }
 EXPORT_SYMBOL(blk_mq_kick_requeue_list);
 
+void blk_mq_delay_kick_requeue_list(struct request_queue *q,
+				    unsigned long delay)
+{
+	kblockd_schedule_delayed_work(&q->requeue_work, delay);
+}
+EXPORT_SYMBOL(blk_mq_delay_kick_requeue_list);
+
 void blk_mq_abort_requeue_list(struct request_queue *q)
 {
 	unsigned long flags;
@@ -2082,7 +2089,7 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
 
 	q->sg_reserved_size = INT_MAX;
 
-	INIT_WORK(&q->requeue_work, blk_mq_requeue_work);
+	INIT_DELAYED_WORK(&q->requeue_work, blk_mq_requeue_work);
 	INIT_LIST_HEAD(&q->requeue_list);
 	spin_lock_init(&q->requeue_lock);
 
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index e43bbff..bac01bb 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -232,6 +232,7 @@ void blk_mq_requeue_request(struct request *rq);
 void blk_mq_add_to_requeue_list(struct request *rq, bool at_head);
 void blk_mq_cancel_requeue_work(struct request_queue *q);
 void blk_mq_kick_requeue_list(struct request_queue *q);
+void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long delay);
 void blk_mq_abort_requeue_list(struct request_queue *q);
 void blk_mq_complete_request(struct request *rq, int error);
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index e79055c..b0a6189 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -449,7 +449,7 @@ struct request_queue {
 
 	struct list_head	requeue_list;
 	spinlock_t		requeue_lock;
-	struct work_struct	requeue_work;
+	struct delayed_work	requeue_work;
 
 	struct mutex		sysfs_lock;
 
-- 
2.7.4 (Apple Git-66)
^ permalink raw reply related	[flat|nested] 17+ messages in thread
- * Re: [PATCH v2 1/6] blk-mq: introduce blk_mq_delay_kick_requeue_list()
  2016-09-14 16:29 ` [PATCH v2 1/6] blk-mq: introduce blk_mq_delay_kick_requeue_list() Mike Snitzer
@ 2016-09-14 16:34   ` Jens Axboe
  2016-09-14 17:28     ` [PATCH v3 " Mike Snitzer
  2016-09-15  6:10   ` [PATCH v2 " Hannes Reinecke
  1 sibling, 1 reply; 17+ messages in thread
From: Jens Axboe @ 2016-09-14 16:34 UTC (permalink / raw)
  To: Mike Snitzer, dm-devel; +Cc: linux-block
On 09/14/2016 10:29 AM, Mike Snitzer wrote:
> +void blk_mq_delay_kick_requeue_list(struct request_queue *q,
> +				    unsigned long delay)
> +{
> +	kblockd_schedule_delayed_work(&q->requeue_work, delay);
> +}
> +EXPORT_SYMBOL(blk_mq_delay_kick_requeue_list);
Leave the interface as taking msecs, but do msecs_to_jiffies() before
calling into kblockd_schedule_delayed_work().
-- 
Jens Axboe
^ permalink raw reply	[flat|nested] 17+ messages in thread
- * [PATCH v3 1/6] blk-mq: introduce blk_mq_delay_kick_requeue_list()
  2016-09-14 16:34   ` Jens Axboe
@ 2016-09-14 17:28     ` Mike Snitzer
  2016-09-14 17:49       ` Jens Axboe
  0 siblings, 1 reply; 17+ messages in thread
From: Mike Snitzer @ 2016-09-14 17:28 UTC (permalink / raw)
  To: Jens Axboe; +Cc: dm-devel, linux-block
blk_mq_delay_kick_requeue_list() provides the ability to kick the
q->requeue_list after a specified time.  To do this the request_queue's
'requeue_work' member was changed to a delayed_work.
blk_mq_delay_kick_requeue_list() allows DM to defer processing requeued
requests while it doesn't make sense to immediately requeue them
(e.g. when all paths in a DM multipath have failed).
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 block/blk-mq.c         | 16 ++++++++++++----
 include/linux/blk-mq.h |  1 +
 include/linux/blkdev.h |  2 +-
 3 files changed, 14 insertions(+), 5 deletions(-)
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 13f5a6c..844c2f3 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -501,7 +501,7 @@ EXPORT_SYMBOL(blk_mq_requeue_request);
 static void blk_mq_requeue_work(struct work_struct *work)
 {
 	struct request_queue *q =
-		container_of(work, struct request_queue, requeue_work);
+		container_of(work, struct request_queue, requeue_work.work);
 	LIST_HEAD(rq_list);
 	struct request *rq, *next;
 	unsigned long flags;
@@ -556,16 +556,24 @@ EXPORT_SYMBOL(blk_mq_add_to_requeue_list);
 
 void blk_mq_cancel_requeue_work(struct request_queue *q)
 {
-	cancel_work_sync(&q->requeue_work);
+	cancel_delayed_work_sync(&q->requeue_work);
 }
 EXPORT_SYMBOL_GPL(blk_mq_cancel_requeue_work);
 
 void blk_mq_kick_requeue_list(struct request_queue *q)
 {
-	kblockd_schedule_work(&q->requeue_work);
+	kblockd_schedule_delayed_work(&q->requeue_work, 0);
 }
 EXPORT_SYMBOL(blk_mq_kick_requeue_list);
 
+void blk_mq_delay_kick_requeue_list(struct request_queue *q,
+				    unsigned long msecs)
+{
+	kblockd_schedule_delayed_work(&q->requeue_work,
+				      msecs_to_jiffies(msecs));
+}
+EXPORT_SYMBOL(blk_mq_delay_kick_requeue_list);
+
 void blk_mq_abort_requeue_list(struct request_queue *q)
 {
 	unsigned long flags;
@@ -2082,7 +2090,7 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
 
 	q->sg_reserved_size = INT_MAX;
 
-	INIT_WORK(&q->requeue_work, blk_mq_requeue_work);
+	INIT_DELAYED_WORK(&q->requeue_work, blk_mq_requeue_work);
 	INIT_LIST_HEAD(&q->requeue_list);
 	spin_lock_init(&q->requeue_lock);
 
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index e43bbff..ecec4b8 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -232,6 +232,7 @@ void blk_mq_requeue_request(struct request *rq);
 void blk_mq_add_to_requeue_list(struct request *rq, bool at_head);
 void blk_mq_cancel_requeue_work(struct request_queue *q);
 void blk_mq_kick_requeue_list(struct request_queue *q);
+void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs);
 void blk_mq_abort_requeue_list(struct request_queue *q);
 void blk_mq_complete_request(struct request *rq, int error);
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index e79055c..b0a6189 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -449,7 +449,7 @@ struct request_queue {
 
 	struct list_head	requeue_list;
 	spinlock_t		requeue_lock;
-	struct work_struct	requeue_work;
+	struct delayed_work	requeue_work;
 
 	struct mutex		sysfs_lock;
 
-- 
2.7.4 (Apple Git-66)
^ permalink raw reply related	[flat|nested] 17+ messages in thread
- * Re: [PATCH v3 1/6] blk-mq: introduce blk_mq_delay_kick_requeue_list()
  2016-09-14 17:28     ` [PATCH v3 " Mike Snitzer
@ 2016-09-14 17:49       ` Jens Axboe
  0 siblings, 0 replies; 17+ messages in thread
From: Jens Axboe @ 2016-09-14 17:49 UTC (permalink / raw)
  To: Mike Snitzer; +Cc: dm-devel, linux-block
On 09/14/2016 11:28 AM, Mike Snitzer wrote:
> blk_mq_delay_kick_requeue_list() provides the ability to kick the
> q->requeue_list after a specified time.  To do this the request_queue's
> 'requeue_work' member was changed to a delayed_work.
>
> blk_mq_delay_kick_requeue_list() allows DM to defer processing requeued
> requests while it doesn't make sense to immediately requeue them
> (e.g. when all paths in a DM multipath have failed).
Applied for 4.9.
-- 
Jens Axboe
^ permalink raw reply	[flat|nested] 17+ messages in thread 
 
 
- * Re: [PATCH v2 1/6] blk-mq: introduce blk_mq_delay_kick_requeue_list()
  2016-09-14 16:29 ` [PATCH v2 1/6] blk-mq: introduce blk_mq_delay_kick_requeue_list() Mike Snitzer
  2016-09-14 16:34   ` Jens Axboe
@ 2016-09-15  6:10   ` Hannes Reinecke
  1 sibling, 0 replies; 17+ messages in thread
From: Hannes Reinecke @ 2016-09-15  6:10 UTC (permalink / raw)
  To: Mike Snitzer, axboe, dm-devel; +Cc: linux-block
On 09/14/2016 06:29 PM, Mike Snitzer wrote:
> blk_mq_delay_kick_requeue_list() provides the ability to kick the
> q->requeue_list after a specified time.  To do this the request_queue's
> 'requeue_work' member was changed to a delayed_work.
> 
> blk_mq_delay_kick_requeue_list() allows DM to defer processing requeued
> requests while it doesn't make sense to immediately requeue them
> (e.g. when all paths in a DM multipath have failed).
> 
> Signed-off-by: Mike Snitzer <snitzer@redhat.com>
> ---
>  block/blk-mq.c         | 15 +++++++++++----
>  include/linux/blk-mq.h |  1 +
>  include/linux/blkdev.h |  2 +-
>  3 files changed, 13 insertions(+), 5 deletions(-)
> 
Reviewed-by: Hannes Reinecke <hare@suse.com>
Cheers,
Hannes
-- 
Dr. Hannes Reinecke		      zSeries & Storage
hare@suse.de			      +49 911 74053 688
SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 N�rnberg
GF: J. Hawn, J. Guild, F. Imend�rffer, HRB 16746 (AG N�rnberg)
^ permalink raw reply	[flat|nested] 17+ messages in thread 
 
- * [PATCH v2 2/6] dm rq: add DM_MAPIO_DELAY_REQUEUE to delay requeue of blk-mq requests
  2016-09-14 16:29 [PATCH for-4.9 v2 0/6] [PATCH for-4.9 v2 0/3] allow delayed requeue of blk-mq requests Mike Snitzer
  2016-09-14 16:29 ` [PATCH v2 1/6] blk-mq: introduce blk_mq_delay_kick_requeue_list() Mike Snitzer
@ 2016-09-14 16:29 ` Mike Snitzer
  2016-09-15  6:14   ` Hannes Reinecke
  2016-09-14 16:29 ` [PATCH v2 3/6] dm rq: reduce arguments passed to map_request() and dm_requeue_original_request() Mike Snitzer
                   ` (3 subsequent siblings)
  5 siblings, 1 reply; 17+ messages in thread
From: Mike Snitzer @ 2016-09-14 16:29 UTC (permalink / raw)
  To: axboe, dm-devel; +Cc: linux-block
Otherwise blk-mq will immediately dispatch requests that are requeued
via a BLK_MQ_RQ_QUEUE_BUSY return from blk_mq_ops .queue_rq.
Delayed requeue is implemented using blk_mq_delay_kick_requeue_list()
with a delay of 5 secs.  In the context of DM multipath (all paths down)
it doesn't make any sense to requeue more quickly.
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-rq.c            | 32 ++++++++++++++++++--------------
 include/linux/device-mapper.h |  1 +
 2 files changed, 19 insertions(+), 14 deletions(-)
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index 0d301d5..9eebc8d 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -336,20 +336,21 @@ static void dm_old_requeue_request(struct request *rq)
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
 
-static void dm_mq_requeue_request(struct request *rq)
+static void dm_mq_delay_requeue_request(struct request *rq, unsigned long msecs)
 {
 	struct request_queue *q = rq->q;
 	unsigned long flags;
 
 	blk_mq_requeue_request(rq);
+
 	spin_lock_irqsave(q->queue_lock, flags);
 	if (!blk_queue_stopped(q))
-		blk_mq_kick_requeue_list(q);
+		blk_mq_delay_kick_requeue_list(q, msecs_to_jiffies(msecs));
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
 
 static void dm_requeue_original_request(struct mapped_device *md,
-					struct request *rq)
+					struct request *rq, bool delay_requeue)
 {
 	int rw = rq_data_dir(rq);
 
@@ -359,7 +360,7 @@ static void dm_requeue_original_request(struct mapped_device *md,
 	if (!rq->q->mq_ops)
 		dm_old_requeue_request(rq);
 	else
-		dm_mq_requeue_request(rq);
+		dm_mq_delay_requeue_request(rq, delay_requeue ? 5000 : 0);
 
 	rq_completed(md, rw, false);
 }
@@ -389,7 +390,7 @@ static void dm_done(struct request *clone, int error, bool mapped)
 		return;
 	else if (r == DM_ENDIO_REQUEUE)
 		/* The target wants to requeue the I/O */
-		dm_requeue_original_request(tio->md, tio->orig);
+		dm_requeue_original_request(tio->md, tio->orig, false);
 	else {
 		DMWARN("unimplemented target endio return value: %d", r);
 		BUG();
@@ -629,8 +630,8 @@ static int dm_old_prep_fn(struct request_queue *q, struct request *rq)
 
 /*
  * Returns:
- * 0                : the request has been processed
- * DM_MAPIO_REQUEUE : the original request needs to be requeued
+ * DM_MAPIO_*       : the request has been processed as indicated
+ * DM_MAPIO_REQUEUE : the original request needs to be immediately requeued
  * < 0              : the request was completed due to failure
  */
 static int map_request(struct dm_rq_target_io *tio, struct request *rq,
@@ -643,6 +644,8 @@ static int map_request(struct dm_rq_target_io *tio, struct request *rq,
 	if (tio->clone) {
 		clone = tio->clone;
 		r = ti->type->map_rq(ti, clone, &tio->info);
+		if (r == DM_MAPIO_DELAY_REQUEUE)
+			return DM_MAPIO_REQUEUE; /* .request_fn requeue is always immediate */
 	} else {
 		r = ti->type->clone_and_map_rq(ti, rq, &tio->info, &clone);
 		if (r < 0) {
@@ -650,9 +653,8 @@ static int map_request(struct dm_rq_target_io *tio, struct request *rq,
 			dm_kill_unmapped_request(rq, r);
 			return r;
 		}
-		if (r != DM_MAPIO_REMAPPED)
-			return r;
-		if (setup_clone(clone, rq, tio, GFP_ATOMIC)) {
+		if (r == DM_MAPIO_REMAPPED &&
+		    setup_clone(clone, rq, tio, GFP_ATOMIC)) {
 			/* -ENOMEM */
 			ti->type->release_clone_rq(clone);
 			return DM_MAPIO_REQUEUE;
@@ -671,7 +673,10 @@ static int map_request(struct dm_rq_target_io *tio, struct request *rq,
 		break;
 	case DM_MAPIO_REQUEUE:
 		/* The target wants to requeue the I/O */
-		dm_requeue_original_request(md, tio->orig);
+		break;
+	case DM_MAPIO_DELAY_REQUEUE:
+		/* The target wants to requeue the I/O after a delay */
+		dm_requeue_original_request(md, tio->orig, true);
 		break;
 	default:
 		if (r > 0) {
@@ -681,10 +686,9 @@ static int map_request(struct dm_rq_target_io *tio, struct request *rq,
 
 		/* The target wants to complete the I/O */
 		dm_kill_unmapped_request(rq, r);
-		return r;
 	}
 
-	return 0;
+	return r;
 }
 
 static void dm_start_request(struct mapped_device *md, struct request *orig)
@@ -727,7 +731,7 @@ static void map_tio_request(struct kthread_work *work)
 	struct mapped_device *md = tio->md;
 
 	if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE)
-		dm_requeue_original_request(md, rq);
+		dm_requeue_original_request(md, rq, false);
 }
 
 ssize_t dm_attr_rq_based_seq_io_merge_deadline_show(struct mapped_device *md, char *buf)
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 91acfce..ef7962e 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -590,6 +590,7 @@ extern struct ratelimit_state dm_ratelimit_state;
 #define DM_MAPIO_SUBMITTED	0
 #define DM_MAPIO_REMAPPED	1
 #define DM_MAPIO_REQUEUE	DM_ENDIO_REQUEUE
+#define DM_MAPIO_DELAY_REQUEUE	3
 
 #define dm_sector_div64(x, y)( \
 { \
-- 
2.7.4 (Apple Git-66)
^ permalink raw reply related	[flat|nested] 17+ messages in thread
- * Re: [PATCH v2 2/6] dm rq: add DM_MAPIO_DELAY_REQUEUE to delay requeue of blk-mq requests
  2016-09-14 16:29 ` [PATCH v2 2/6] dm rq: add DM_MAPIO_DELAY_REQUEUE to delay requeue of blk-mq requests Mike Snitzer
@ 2016-09-15  6:14   ` Hannes Reinecke
  2016-09-15 12:54     ` Mike Snitzer
  0 siblings, 1 reply; 17+ messages in thread
From: Hannes Reinecke @ 2016-09-15  6:14 UTC (permalink / raw)
  To: Mike Snitzer, axboe, dm-devel; +Cc: linux-block
On 09/14/2016 06:29 PM, Mike Snitzer wrote:
> Otherwise blk-mq will immediately dispatch requests that are requeued
> via a BLK_MQ_RQ_QUEUE_BUSY return from blk_mq_ops .queue_rq.
> 
> Delayed requeue is implemented using blk_mq_delay_kick_requeue_list()
> with a delay of 5 secs.  In the context of DM multipath (all paths down)
> it doesn't make any sense to requeue more quickly.
> 
> Signed-off-by: Mike Snitzer <snitzer@redhat.com>
> ---
>  drivers/md/dm-rq.c            | 32 ++++++++++++++++++--------------
>  include/linux/device-mapper.h |  1 +
>  2 files changed, 19 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
> index 0d301d5..9eebc8d 100644
> --- a/drivers/md/dm-rq.c
> +++ b/drivers/md/dm-rq.c
[..]
> @@ -671,7 +673,10 @@ static int map_request(struct dm_rq_target_io *tio, struct request *rq,
>  		break;
>  	case DM_MAPIO_REQUEUE:
>  		/* The target wants to requeue the I/O */
> -		dm_requeue_original_request(md, tio->orig);
> +		break;
> +	case DM_MAPIO_DELAY_REQUEUE:
> +		/* The target wants to requeue the I/O after a delay */
> +		dm_requeue_original_request(md, tio->orig, true);
>  		break;
>  	default:
>  		if (r > 0) {
Hmm? What happened here?
Don't we need to requeue the request for DM_MAPIO_REQUEUE?
> @@ -681,10 +686,9 @@ static int map_request(struct dm_rq_target_io *tio, struct request *rq,
>  
>  		/* The target wants to complete the I/O */
>  		dm_kill_unmapped_request(rq, r);
> -		return r;
>  	}
>  
> -	return 0;
> +	return r;
>  }
>  
>  static void dm_start_request(struct mapped_device *md, struct request *orig)
[ .. ]
Cheers,
Hannes
-- 
Dr. Hannes Reinecke		      zSeries & Storage
hare@suse.de			      +49 911 74053 688
SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 N�rnberg
GF: J. Hawn, J. Guild, F. Imend�rffer, HRB 16746 (AG N�rnberg)
^ permalink raw reply	[flat|nested] 17+ messages in thread
- * Re: [PATCH v2 2/6] dm rq: add DM_MAPIO_DELAY_REQUEUE to delay requeue of blk-mq requests
  2016-09-15  6:14   ` Hannes Reinecke
@ 2016-09-15 12:54     ` Mike Snitzer
  0 siblings, 0 replies; 17+ messages in thread
From: Mike Snitzer @ 2016-09-15 12:54 UTC (permalink / raw)
  To: Hannes Reinecke; +Cc: axboe, dm-devel, linux-block
On Thu, Sep 15 2016 at  2:14am -0400,
Hannes Reinecke <hare@suse.de> wrote:
> On 09/14/2016 06:29 PM, Mike Snitzer wrote:
> > Otherwise blk-mq will immediately dispatch requests that are requeued
> > via a BLK_MQ_RQ_QUEUE_BUSY return from blk_mq_ops .queue_rq.
> > 
> > Delayed requeue is implemented using blk_mq_delay_kick_requeue_list()
> > with a delay of 5 secs.  In the context of DM multipath (all paths down)
> > it doesn't make any sense to requeue more quickly.
> > 
> > Signed-off-by: Mike Snitzer <snitzer@redhat.com>
> > ---
> >  drivers/md/dm-rq.c            | 32 ++++++++++++++++++--------------
> >  include/linux/device-mapper.h |  1 +
> >  2 files changed, 19 insertions(+), 14 deletions(-)
> > 
> > diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
> > index 0d301d5..9eebc8d 100644
> > --- a/drivers/md/dm-rq.c
> > +++ b/drivers/md/dm-rq.c
> [..]
> > @@ -671,7 +673,10 @@ static int map_request(struct dm_rq_target_io *tio, struct request *rq,
> >  		break;
> >  	case DM_MAPIO_REQUEUE:
> >  		/* The target wants to requeue the I/O */
> > -		dm_requeue_original_request(md, tio->orig);
> > +		break;
> > +	case DM_MAPIO_DELAY_REQUEUE:
> > +		/* The target wants to requeue the I/O after a delay */
> > +		dm_requeue_original_request(md, tio->orig, true);
> >  		break;
> >  	default:
> >  		if (r > 0) {
> Hmm? What happened here?
> Don't we need to requeue the request for DM_MAPIO_REQUEUE?
Yes, as always, the caller will perform the immediate requeue -- this is
a requirement for blk-mq but I made .request_fn do the same just for
consistency in the request-based DM code.
In the case of blk-mq it is done in terms of a BLK_MQ_RQ_QUEUE_BUSY
return from .queue_rq (which is the most immediate requeue there is
since blk-mq just puts the request back on its dispatch list for the
very next queue run).
(it is so quick that it causes excessive load when all paths are down,
hence this patchset to only use immediate requeue when it makes sense)
Mike
^ permalink raw reply	[flat|nested] 17+ messages in thread
 
 
- * [PATCH v2 3/6] dm rq: reduce arguments passed to map_request() and dm_requeue_original_request()
  2016-09-14 16:29 [PATCH for-4.9 v2 0/6] [PATCH for-4.9 v2 0/3] allow delayed requeue of blk-mq requests Mike Snitzer
  2016-09-14 16:29 ` [PATCH v2 1/6] blk-mq: introduce blk_mq_delay_kick_requeue_list() Mike Snitzer
  2016-09-14 16:29 ` [PATCH v2 2/6] dm rq: add DM_MAPIO_DELAY_REQUEUE to delay requeue of blk-mq requests Mike Snitzer
@ 2016-09-14 16:29 ` Mike Snitzer
  2016-09-15  6:15   ` Hannes Reinecke
  2016-09-14 16:29 ` [PATCH v2 4/6] dm rq: introduce dm_mq_kick_requeue_list() Mike Snitzer
                   ` (2 subsequent siblings)
  5 siblings, 1 reply; 17+ messages in thread
From: Mike Snitzer @ 2016-09-14 16:29 UTC (permalink / raw)
  To: axboe, dm-devel; +Cc: linux-block
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-rq.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index 9eebc8d..bdbfe05 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -349,9 +349,10 @@ static void dm_mq_delay_requeue_request(struct request *rq, unsigned long msecs)
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
 
-static void dm_requeue_original_request(struct mapped_device *md,
-					struct request *rq, bool delay_requeue)
+static void dm_requeue_original_request(struct dm_rq_target_io *tio, bool delay_requeue)
 {
+	struct mapped_device *md = tio->md;
+	struct request *rq = tio->orig;
 	int rw = rq_data_dir(rq);
 
 	rq_end_stats(md, rq);
@@ -390,7 +391,7 @@ static void dm_done(struct request *clone, int error, bool mapped)
 		return;
 	else if (r == DM_ENDIO_REQUEUE)
 		/* The target wants to requeue the I/O */
-		dm_requeue_original_request(tio->md, tio->orig, false);
+		dm_requeue_original_request(tio, false);
 	else {
 		DMWARN("unimplemented target endio return value: %d", r);
 		BUG();
@@ -634,11 +635,12 @@ static int dm_old_prep_fn(struct request_queue *q, struct request *rq)
  * DM_MAPIO_REQUEUE : the original request needs to be immediately requeued
  * < 0              : the request was completed due to failure
  */
-static int map_request(struct dm_rq_target_io *tio, struct request *rq,
-		       struct mapped_device *md)
+static int map_request(struct dm_rq_target_io *tio)
 {
 	int r;
 	struct dm_target *ti = tio->ti;
+	struct mapped_device *md = tio->md;
+	struct request *rq = tio->orig;
 	struct request *clone = NULL;
 
 	if (tio->clone) {
@@ -676,7 +678,7 @@ static int map_request(struct dm_rq_target_io *tio, struct request *rq,
 		break;
 	case DM_MAPIO_DELAY_REQUEUE:
 		/* The target wants to requeue the I/O after a delay */
-		dm_requeue_original_request(md, tio->orig, true);
+		dm_requeue_original_request(tio, true);
 		break;
 	default:
 		if (r > 0) {
@@ -727,11 +729,9 @@ static void dm_start_request(struct mapped_device *md, struct request *orig)
 static void map_tio_request(struct kthread_work *work)
 {
 	struct dm_rq_target_io *tio = container_of(work, struct dm_rq_target_io, work);
-	struct request *rq = tio->orig;
-	struct mapped_device *md = tio->md;
 
-	if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE)
-		dm_requeue_original_request(md, rq, false);
+	if (map_request(tio) == DM_MAPIO_REQUEUE)
+		dm_requeue_original_request(tio, false);
 }
 
 ssize_t dm_attr_rq_based_seq_io_merge_deadline_show(struct mapped_device *md, char *buf)
@@ -917,7 +917,7 @@ static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
 	tio->ti = ti;
 
 	/* Direct call is fine since .queue_rq allows allocations */
-	if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE) {
+	if (map_request(tio) == DM_MAPIO_REQUEUE) {
 		/* Undo dm_start_request() before requeuing */
 		rq_end_stats(md, rq);
 		rq_completed(md, rq_data_dir(rq), false);
-- 
2.7.4 (Apple Git-66)
^ permalink raw reply related	[flat|nested] 17+ messages in thread
- * Re: [PATCH v2 3/6] dm rq: reduce arguments passed to map_request() and dm_requeue_original_request()
  2016-09-14 16:29 ` [PATCH v2 3/6] dm rq: reduce arguments passed to map_request() and dm_requeue_original_request() Mike Snitzer
@ 2016-09-15  6:15   ` Hannes Reinecke
  0 siblings, 0 replies; 17+ messages in thread
From: Hannes Reinecke @ 2016-09-15  6:15 UTC (permalink / raw)
  To: Mike Snitzer, axboe, dm-devel; +Cc: linux-block
On 09/14/2016 06:29 PM, Mike Snitzer wrote:
> Signed-off-by: Mike Snitzer <snitzer@redhat.com>
> ---
>  drivers/md/dm-rq.c | 22 +++++++++++-----------
>  1 file changed, 11 insertions(+), 11 deletions(-)
> 
Reviewed-by: Hannes Reinecke <hare@suse.com>
Cheers,
Hannes
-- 
Dr. Hannes Reinecke		      zSeries & Storage
hare@suse.de			      +49 911 74053 688
SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 N�rnberg
GF: J. Hawn, J. Guild, F. Imend�rffer, HRB 16746 (AG N�rnberg)
^ permalink raw reply	[flat|nested] 17+ messages in thread 
 
- * [PATCH v2 4/6] dm rq: introduce dm_mq_kick_requeue_list()
  2016-09-14 16:29 [PATCH for-4.9 v2 0/6] [PATCH for-4.9 v2 0/3] allow delayed requeue of blk-mq requests Mike Snitzer
                   ` (2 preceding siblings ...)
  2016-09-14 16:29 ` [PATCH v2 3/6] dm rq: reduce arguments passed to map_request() and dm_requeue_original_request() Mike Snitzer
@ 2016-09-14 16:29 ` Mike Snitzer
  2016-09-15  6:16   ` Hannes Reinecke
  2016-09-14 16:29 ` [PATCH v2 5/6] dm mpath: use dm_mq_kick_requeue_list() Mike Snitzer
  2016-09-14 16:29 ` [PATCH v2 6/6] dm mpath: delay the requeue of blk-mq requests while all paths down Mike Snitzer
  5 siblings, 1 reply; 17+ messages in thread
From: Mike Snitzer @ 2016-09-14 16:29 UTC (permalink / raw)
  To: axboe, dm-devel; +Cc: linux-block
Make it possible for a request-based target to kick the DM device's
blk-mq request_queue's requeue_list.
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-rq.c | 17 +++++++++++++----
 drivers/md/dm-rq.h |  2 ++
 2 files changed, 15 insertions(+), 4 deletions(-)
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index bdbfe05..726cb3b 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -336,19 +336,28 @@ static void dm_old_requeue_request(struct request *rq)
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
 
-static void dm_mq_delay_requeue_request(struct request *rq, unsigned long msecs)
+static void __dm_mq_kick_requeue_list(struct request_queue *q, unsigned long msecs)
 {
-	struct request_queue *q = rq->q;
 	unsigned long flags;
 
-	blk_mq_requeue_request(rq);
-
 	spin_lock_irqsave(q->queue_lock, flags);
 	if (!blk_queue_stopped(q))
 		blk_mq_delay_kick_requeue_list(q, msecs_to_jiffies(msecs));
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
 
+void dm_mq_kick_requeue_list(struct mapped_device *md)
+{
+	__dm_mq_kick_requeue_list(dm_get_md_queue(md), 0);
+}
+EXPORT_SYMBOL(dm_mq_kick_requeue_list);
+
+static void dm_mq_delay_requeue_request(struct request *rq, unsigned long msecs)
+{
+	blk_mq_requeue_request(rq);
+	__dm_mq_kick_requeue_list(rq->q, msecs);
+}
+
 static void dm_requeue_original_request(struct dm_rq_target_io *tio, bool delay_requeue)
 {
 	struct mapped_device *md = tio->md;
diff --git a/drivers/md/dm-rq.h b/drivers/md/dm-rq.h
index 9e6f0a3..4da06ca 100644
--- a/drivers/md/dm-rq.h
+++ b/drivers/md/dm-rq.h
@@ -55,6 +55,8 @@ void dm_mq_cleanup_mapped_device(struct mapped_device *md);
 void dm_start_queue(struct request_queue *q);
 void dm_stop_queue(struct request_queue *q);
 
+void dm_mq_kick_requeue_list(struct mapped_device *md);
+
 unsigned dm_get_reserved_rq_based_ios(void);
 
 ssize_t dm_attr_rq_based_seq_io_merge_deadline_show(struct mapped_device *md, char *buf);
-- 
2.7.4 (Apple Git-66)
^ permalink raw reply related	[flat|nested] 17+ messages in thread
- * Re: [PATCH v2 4/6] dm rq: introduce dm_mq_kick_requeue_list()
  2016-09-14 16:29 ` [PATCH v2 4/6] dm rq: introduce dm_mq_kick_requeue_list() Mike Snitzer
@ 2016-09-15  6:16   ` Hannes Reinecke
  0 siblings, 0 replies; 17+ messages in thread
From: Hannes Reinecke @ 2016-09-15  6:16 UTC (permalink / raw)
  To: Mike Snitzer, axboe, dm-devel; +Cc: linux-block
On 09/14/2016 06:29 PM, Mike Snitzer wrote:
> Make it possible for a request-based target to kick the DM device's
> blk-mq request_queue's requeue_list.
> 
> Signed-off-by: Mike Snitzer <snitzer@redhat.com>
> ---
>  drivers/md/dm-rq.c | 17 +++++++++++++----
>  drivers/md/dm-rq.h |  2 ++
>  2 files changed, 15 insertions(+), 4 deletions(-)
> 
Reviewed-by: Hannes Reinecke <hare@suse.com>
Cheers,
Hannes
-- 
Dr. Hannes Reinecke		      zSeries & Storage
hare@suse.de			      +49 911 74053 688
SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 N�rnberg
GF: J. Hawn, J. Guild, F. Imend�rffer, HRB 16746 (AG N�rnberg)
^ permalink raw reply	[flat|nested] 17+ messages in thread 
 
- * [PATCH v2 5/6] dm mpath: use dm_mq_kick_requeue_list()
  2016-09-14 16:29 [PATCH for-4.9 v2 0/6] [PATCH for-4.9 v2 0/3] allow delayed requeue of blk-mq requests Mike Snitzer
                   ` (3 preceding siblings ...)
  2016-09-14 16:29 ` [PATCH v2 4/6] dm rq: introduce dm_mq_kick_requeue_list() Mike Snitzer
@ 2016-09-14 16:29 ` Mike Snitzer
  2016-09-15  6:16   ` Hannes Reinecke
  2016-09-14 16:29 ` [PATCH v2 6/6] dm mpath: delay the requeue of blk-mq requests while all paths down Mike Snitzer
  5 siblings, 1 reply; 17+ messages in thread
From: Mike Snitzer @ 2016-09-14 16:29 UTC (permalink / raw)
  To: axboe, dm-devel; +Cc: linux-block
When reinstating a path the blk-mq request_queue's requeue_list should
get kicked.  It makes sense to kick the requeue_list as part of the
existing hook (previously only used by bio-based support).
Rename process_queued_bios_list to process_queued_io_list.
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-mpath.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index c777d38..f69715b 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -680,9 +680,11 @@ static int multipath_map_bio(struct dm_target *ti, struct bio *bio)
 	return __multipath_map_bio(m, bio, mpio);
 }
 
-static void process_queued_bios_list(struct multipath *m)
+static void process_queued_io_list(struct multipath *m)
 {
-	if (m->queue_mode == DM_TYPE_BIO_BASED)
+	if (m->queue_mode == DM_TYPE_MQ_REQUEST_BASED)
+		dm_mq_kick_requeue_list(dm_table_get_md(m->ti->table));
+	else if (m->queue_mode == DM_TYPE_BIO_BASED)
 		queue_work(kmultipathd, &m->process_queued_bios);
 }
 
@@ -752,7 +754,7 @@ static int queue_if_no_path(struct multipath *m, bool queue_if_no_path,
 
 	if (!queue_if_no_path) {
 		dm_table_run_md_queue_async(m->ti->table);
-		process_queued_bios_list(m);
+		process_queued_io_list(m);
 	}
 
 	return 0;
@@ -1304,7 +1306,7 @@ out:
 	spin_unlock_irqrestore(&m->lock, flags);
 	if (run_queue) {
 		dm_table_run_md_queue_async(m->ti->table);
-		process_queued_bios_list(m);
+		process_queued_io_list(m);
 	}
 
 	return r;
@@ -1502,7 +1504,7 @@ static void pg_init_done(void *data, int errors)
 	}
 	clear_bit(MPATHF_QUEUE_IO, &m->flags);
 
-	process_queued_bios_list(m);
+	process_queued_io_list(m);
 
 	/*
 	 * Wake up any thread waiting to suspend.
@@ -1937,7 +1939,7 @@ static int multipath_prepare_ioctl(struct dm_target *ti,
 		if (test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags))
 			pg_init_all_paths(m);
 		dm_table_run_md_queue_async(m->ti->table);
-		process_queued_bios_list(m);
+		process_queued_io_list(m);
 	}
 
 	/*
-- 
2.7.4 (Apple Git-66)
^ permalink raw reply related	[flat|nested] 17+ messages in thread
- * Re: [PATCH v2 5/6] dm mpath: use dm_mq_kick_requeue_list()
  2016-09-14 16:29 ` [PATCH v2 5/6] dm mpath: use dm_mq_kick_requeue_list() Mike Snitzer
@ 2016-09-15  6:16   ` Hannes Reinecke
  0 siblings, 0 replies; 17+ messages in thread
From: Hannes Reinecke @ 2016-09-15  6:16 UTC (permalink / raw)
  To: Mike Snitzer, axboe, dm-devel; +Cc: linux-block
On 09/14/2016 06:29 PM, Mike Snitzer wrote:
> When reinstating a path the blk-mq request_queue's requeue_list should
> get kicked.  It makes sense to kick the requeue_list as part of the
> existing hook (previously only used by bio-based support).
> 
> Rename process_queued_bios_list to process_queued_io_list.
> 
> Signed-off-by: Mike Snitzer <snitzer@redhat.com>
> ---
>  drivers/md/dm-mpath.c | 14 ++++++++------
>  1 file changed, 8 insertions(+), 6 deletions(-)
> 
Reviewed-by: Hannes Reinecke <hare@suse.com>
Cheers,
Hannes
-- 
Dr. Hannes Reinecke		      zSeries & Storage
hare@suse.de			      +49 911 74053 688
SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 N�rnberg
GF: J. Hawn, J. Guild, F. Imend�rffer, HRB 16746 (AG N�rnberg)
^ permalink raw reply	[flat|nested] 17+ messages in thread 
 
- * [PATCH v2 6/6] dm mpath: delay the requeue of blk-mq requests while all paths down
  2016-09-14 16:29 [PATCH for-4.9 v2 0/6] [PATCH for-4.9 v2 0/3] allow delayed requeue of blk-mq requests Mike Snitzer
                   ` (4 preceding siblings ...)
  2016-09-14 16:29 ` [PATCH v2 5/6] dm mpath: use dm_mq_kick_requeue_list() Mike Snitzer
@ 2016-09-14 16:29 ` Mike Snitzer
  2016-09-15  6:18   ` Hannes Reinecke
  5 siblings, 1 reply; 17+ messages in thread
From: Mike Snitzer @ 2016-09-14 16:29 UTC (permalink / raw)
  To: axboe, dm-devel; +Cc: linux-block
Return DM_MAPIO_DELAY_REQUEUE from .clone_and_map_rq.  Also, return
false from .busy, if all paths are down, so that blk-mq requests get
mapped via .clone_and_map_rq -- which results in DM_MAPIO_DELAY_REQUEUE
being returned to dm-rq.
This change allows for a noticeable reduction in cpu utilization
(reduced kworker load) while all paths are down, e.g.:
system CPU idleness (as measured by fio's --idle-prof=system):
before: system: 86.58%
after:  system: 98.60%
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-mpath.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index f69715b..f31fa13 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -550,9 +550,9 @@ static int __multipath_map(struct dm_target *ti, struct request *clone,
 		pgpath = choose_pgpath(m, nr_bytes);
 
 	if (!pgpath) {
-		if (!must_push_back_rq(m))
-			r = -EIO;	/* Failed */
-		return r;
+		if (must_push_back_rq(m))
+			return DM_MAPIO_DELAY_REQUEUE;
+		return -EIO;	/* Failed */
 	} else if (test_bit(MPATHF_QUEUE_IO, &m->flags) ||
 		   test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags)) {
 		pg_init_all_paths(m);
@@ -1992,11 +1992,14 @@ static int multipath_busy(struct dm_target *ti)
 	struct priority_group *pg, *next_pg;
 	struct pgpath *pgpath;
 
-	/* pg_init in progress or no paths available */
-	if (atomic_read(&m->pg_init_in_progress) ||
-	    (!atomic_read(&m->nr_valid_paths) && test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)))
+	/* pg_init in progress */
+	if (atomic_read(&m->pg_init_in_progress))
 		return true;
 
+	/* no paths available, for blk-mq: rely on IO mapping to delay requeue */
+	if (!atomic_read(&m->nr_valid_paths) && test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))
+		return (m->queue_mode != DM_TYPE_MQ_REQUEST_BASED);
+
 	/* Guess which priority_group will be used at next mapping time */
 	pg = lockless_dereference(m->current_pg);
 	next_pg = lockless_dereference(m->next_pg);
-- 
2.7.4 (Apple Git-66)
^ permalink raw reply related	[flat|nested] 17+ messages in thread
- * Re: [PATCH v2 6/6] dm mpath: delay the requeue of blk-mq requests while all paths down
  2016-09-14 16:29 ` [PATCH v2 6/6] dm mpath: delay the requeue of blk-mq requests while all paths down Mike Snitzer
@ 2016-09-15  6:18   ` Hannes Reinecke
  0 siblings, 0 replies; 17+ messages in thread
From: Hannes Reinecke @ 2016-09-15  6:18 UTC (permalink / raw)
  To: Mike Snitzer, axboe, dm-devel; +Cc: linux-block
On 09/14/2016 06:29 PM, Mike Snitzer wrote:
> Return DM_MAPIO_DELAY_REQUEUE from .clone_and_map_rq.  Also, return
> false from .busy, if all paths are down, so that blk-mq requests get
> mapped via .clone_and_map_rq -- which results in DM_MAPIO_DELAY_REQUEUE
> being returned to dm-rq.
> 
> This change allows for a noticeable reduction in cpu utilization
> (reduced kworker load) while all paths are down, e.g.:
> 
> system CPU idleness (as measured by fio's --idle-prof=system):
> before: system: 86.58%
> after:  system: 98.60%
> 
> Signed-off-by: Mike Snitzer <snitzer@redhat.com>
> ---
>  drivers/md/dm-mpath.c | 15 +++++++++------
>  1 file changed, 9 insertions(+), 6 deletions(-)
> 
Reviewed-by: Hannes Reinecke <hare@suse.com>
Cheers,
Hannes
-- 
Dr. Hannes Reinecke		      zSeries & Storage
hare@suse.de			      +49 911 74053 688
SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 N�rnberg
GF: J. Hawn, J. Guild, F. Imend�rffer, HRB 16746 (AG N�rnberg)
^ permalink raw reply	[flat|nested] 17+ messages in thread