All of lore.kernel.org
 help / color / mirror / Atom feed
* Patch "dm delay: for short delays, use kthread instead of timers and wq" has been added to the 6.6-stable tree
@ 2023-11-29  2:54 Sasha Levin
  2023-11-29  8:28 ` Mikulas Patocka
  0 siblings, 1 reply; 9+ messages in thread
From: Sasha Levin @ 2023-11-29  2:54 UTC (permalink / raw)
  To: stable-commits, christian.loehle
  Cc: Alasdair Kergon, Mike Snitzer, Mikulas Patocka, dm-devel

This is a note to let you know that I've just added the patch titled

    dm delay: for short delays, use kthread instead of timers and wq

to the 6.6-stable tree which can be found at:
    http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary

The filename of the patch is:
     dm-delay-for-short-delays-use-kthread-instead-of-tim.patch
and it can be found in the queue-6.6 subdirectory.

If you, or anyone else, feels it should not be added to the stable tree,
please let <stable@vger.kernel.org> know about it.



commit 976fd593415e170a8ed5db68683b280d5876982d
Author: Christian Loehle <christian.loehle@arm.com>
Date:   Fri Oct 20 12:46:05 2023 +0100

    dm delay: for short delays, use kthread instead of timers and wq
    
    [ Upstream commit 70bbeb29fab09d6ea6cfe64109db60a97d84d739 ]
    
    DM delay's current design of using timers and wq to realize the delays
    is insufficient for delays below ~50ms.
    
    This commit enhances the design to use a kthread to flush the expired
    delays, trading some CPU time (in some cases) for better delay
    accuracy and delays closer to what the user requested for smaller
    delays. The new design is chosen as long as all the delays are below
    50ms.
    
    Since bios can't be completed in interrupt context using a kthread
    is probably the most reasonable way to approach this.
    
    Testing with
    echo "0 2097152 zero" | dmsetup create dm-zeros
    for i in $(seq 0 20);
    do
      echo "0 2097152 delay /dev/mapper/dm-zeros 0 $i" | dmsetup create dm-delay-${i}ms;
    done
    
    Some performance numbers for comparison, on beaglebone black (single
    core) CONFIG_HZ_1000=y:
    
    fio --name=1msread --rw=randread --bs=4k --runtime=60 --time_based \
        --filename=/dev/mapper/dm-delay-1ms
    Theoretical maximum: 1000 IOPS
    Previous: 250 IOPS
    Kthread: 500 IOPS
    
    fio --name=10msread --rw=randread --bs=4k --runtime=60 --time_based \
        --filename=/dev/mapper/dm-delay-10ms
    Theoretical maximum: 100 IOPS
    Previous: 45 IOPS
    Kthread: 50 IOPS
    
    fio --name=1mswrite --rw=randwrite --direct=1 --bs=4k --runtime=60 \
        --time_based --filename=/dev/mapper/dm-delay-1ms
    Theoretical maximum: 1000 IOPS
    Previous: 498 IOPS
    Kthread: 1000 IOPS
    
    fio --name=10mswrite --rw=randwrite --direct=1 --bs=4k --runtime=60 \
        --time_based --filename=/dev/mapper/dm-delay-10ms
    Theoretical maximum: 100 IOPS
    Previous: 90 IOPS
    Kthread: 100 IOPS
    
    (This one is just to prove the new design isn't impacting throughput,
    not really about delays):
    fio --name=10mswriteasync --rw=randwrite --direct=1 --bs=4k \
        --runtime=60 --time_based --filename=/dev/mapper/dm-delay-10ms \
        --numjobs=32 --iodepth=64 --ioengine=libaio --group_reporting
    Previous: 13.3k IOPS
    Kthread: 13.3k IOPS
    
    Signed-off-by: Christian Loehle <christian.loehle@arm.com>
    [Harshit: kthread_create error handling fix in delay_ctr]
    Signed-off-by: Harshit Mogalapalli <harshit.m.mogalapalli@oracle.com>
    Signed-off-by: Mike Snitzer <snitzer@kernel.org>
    Stable-dep-of: 6fc45b6ed921 ("dm-delay: fix a race between delay_presuspend and delay_bio")
    Signed-off-by: Sasha Levin <sashal@kernel.org>

diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c
index 7433525e59856..efd510984e259 100644
--- a/drivers/md/dm-delay.c
+++ b/drivers/md/dm-delay.c
@@ -13,6 +13,7 @@
 #include <linux/blkdev.h>
 #include <linux/bio.h>
 #include <linux/slab.h>
+#include <linux/kthread.h>
 
 #include <linux/device-mapper.h>
 
@@ -31,6 +32,7 @@ struct delay_c {
 	struct workqueue_struct *kdelayd_wq;
 	struct work_struct flush_expired_bios;
 	struct list_head delayed_bios;
+	struct task_struct *worker;
 	atomic_t may_delay;
 
 	struct delay_class read;
@@ -66,6 +68,44 @@ static void queue_timeout(struct delay_c *dc, unsigned long expires)
 	mutex_unlock(&dc->timer_lock);
 }
 
+static inline bool delay_is_fast(struct delay_c *dc)
+{
+	return !!dc->worker;
+}
+
+static void flush_delayed_bios_fast(struct delay_c *dc, bool flush_all)
+{
+	struct dm_delay_info *delayed, *next;
+
+	mutex_lock(&delayed_bios_lock);
+	list_for_each_entry_safe(delayed, next, &dc->delayed_bios, list) {
+		if (flush_all || time_after_eq(jiffies, delayed->expires)) {
+			struct bio *bio = dm_bio_from_per_bio_data(delayed,
+						sizeof(struct dm_delay_info));
+			list_del(&delayed->list);
+			dm_submit_bio_remap(bio, NULL);
+			delayed->class->ops--;
+		}
+	}
+	mutex_unlock(&delayed_bios_lock);
+}
+
+static int flush_worker_fn(void *data)
+{
+	struct delay_c *dc = data;
+
+	while (1) {
+		flush_delayed_bios_fast(dc, false);
+		if (unlikely(list_empty(&dc->delayed_bios))) {
+			set_current_state(TASK_INTERRUPTIBLE);
+			schedule();
+		} else
+			cond_resched();
+	}
+
+	return 0;
+}
+
 static void flush_bios(struct bio *bio)
 {
 	struct bio *n;
@@ -78,7 +118,7 @@ static void flush_bios(struct bio *bio)
 	}
 }
 
-static struct bio *flush_delayed_bios(struct delay_c *dc, int flush_all)
+static struct bio *flush_delayed_bios(struct delay_c *dc, bool flush_all)
 {
 	struct dm_delay_info *delayed, *next;
 	unsigned long next_expires = 0;
@@ -115,7 +155,10 @@ static void flush_expired_bios(struct work_struct *work)
 	struct delay_c *dc;
 
 	dc = container_of(work, struct delay_c, flush_expired_bios);
-	flush_bios(flush_delayed_bios(dc, 0));
+	if (delay_is_fast(dc))
+		flush_delayed_bios_fast(dc, false);
+	else
+		flush_bios(flush_delayed_bios(dc, false));
 }
 
 static void delay_dtr(struct dm_target *ti)
@@ -131,8 +174,11 @@ static void delay_dtr(struct dm_target *ti)
 		dm_put_device(ti, dc->write.dev);
 	if (dc->flush.dev)
 		dm_put_device(ti, dc->flush.dev);
+	if (dc->worker)
+		kthread_stop(dc->worker);
 
-	mutex_destroy(&dc->timer_lock);
+	if (!delay_is_fast(dc))
+		mutex_destroy(&dc->timer_lock);
 
 	kfree(dc);
 }
@@ -175,6 +221,7 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 {
 	struct delay_c *dc;
 	int ret;
+	unsigned int max_delay;
 
 	if (argc != 3 && argc != 6 && argc != 9) {
 		ti->error = "Requires exactly 3, 6 or 9 arguments";
@@ -188,16 +235,14 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	}
 
 	ti->private = dc;
-	timer_setup(&dc->delay_timer, handle_delayed_timer, 0);
-	INIT_WORK(&dc->flush_expired_bios, flush_expired_bios);
 	INIT_LIST_HEAD(&dc->delayed_bios);
-	mutex_init(&dc->timer_lock);
 	atomic_set(&dc->may_delay, 1);
 	dc->argc = argc;
 
 	ret = delay_class_ctr(ti, &dc->read, argv);
 	if (ret)
 		goto bad;
+	max_delay = dc->read.delay;
 
 	if (argc == 3) {
 		ret = delay_class_ctr(ti, &dc->write, argv);
@@ -206,6 +251,8 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 		ret = delay_class_ctr(ti, &dc->flush, argv);
 		if (ret)
 			goto bad;
+		max_delay = max(max_delay, dc->write.delay);
+		max_delay = max(max_delay, dc->flush.delay);
 		goto out;
 	}
 
@@ -216,19 +263,37 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 		ret = delay_class_ctr(ti, &dc->flush, argv + 3);
 		if (ret)
 			goto bad;
+		max_delay = max(max_delay, dc->flush.delay);
 		goto out;
 	}
 
 	ret = delay_class_ctr(ti, &dc->flush, argv + 6);
 	if (ret)
 		goto bad;
+	max_delay = max(max_delay, dc->flush.delay);
 
 out:
-	dc->kdelayd_wq = alloc_workqueue("kdelayd", WQ_MEM_RECLAIM, 0);
-	if (!dc->kdelayd_wq) {
-		ret = -EINVAL;
-		DMERR("Couldn't start kdelayd");
-		goto bad;
+	if (max_delay < 50) {
+		/*
+		 * In case of small requested delays, use kthread instead of
+		 * timers and workqueue to achieve better latency.
+		 */
+		dc->worker = kthread_create(&flush_worker_fn, dc,
+					    "dm-delay-flush-worker");
+		if (IS_ERR(dc->worker)) {
+			ret = PTR_ERR(dc->worker);
+			goto bad;
+		}
+	} else {
+		timer_setup(&dc->delay_timer, handle_delayed_timer, 0);
+		INIT_WORK(&dc->flush_expired_bios, flush_expired_bios);
+		mutex_init(&dc->timer_lock);
+		dc->kdelayd_wq = alloc_workqueue("kdelayd", WQ_MEM_RECLAIM, 0);
+		if (!dc->kdelayd_wq) {
+			ret = -EINVAL;
+			DMERR("Couldn't start kdelayd");
+			goto bad;
+		}
 	}
 
 	ti->num_flush_bios = 1;
@@ -260,7 +325,10 @@ static int delay_bio(struct delay_c *dc, struct delay_class *c, struct bio *bio)
 	list_add_tail(&delayed->list, &dc->delayed_bios);
 	mutex_unlock(&delayed_bios_lock);
 
-	queue_timeout(dc, expires);
+	if (delay_is_fast(dc))
+		wake_up_process(dc->worker);
+	else
+		queue_timeout(dc, expires);
 
 	return DM_MAPIO_SUBMITTED;
 }
@@ -270,8 +338,13 @@ static void delay_presuspend(struct dm_target *ti)
 	struct delay_c *dc = ti->private;
 
 	atomic_set(&dc->may_delay, 0);
-	del_timer_sync(&dc->delay_timer);
-	flush_bios(flush_delayed_bios(dc, 1));
+
+	if (delay_is_fast(dc))
+		flush_delayed_bios_fast(dc, true);
+	else {
+		del_timer_sync(&dc->delay_timer);
+		flush_bios(flush_delayed_bios(dc, true));
+	}
 }
 
 static void delay_resume(struct dm_target *ti)
@@ -356,7 +429,7 @@ static int delay_iterate_devices(struct dm_target *ti,
 
 static struct target_type delay_target = {
 	.name	     = "delay",
-	.version     = {1, 3, 0},
+	.version     = {1, 4, 0},
 	.features    = DM_TARGET_PASSES_INTEGRITY,
 	.module      = THIS_MODULE,
 	.ctr	     = delay_ctr,

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: Patch "dm delay: for short delays, use kthread instead of timers and wq" has been added to the 6.6-stable tree
  2023-11-29  2:54 Patch "dm delay: for short delays, use kthread instead of timers and wq" has been added to the 6.6-stable tree Sasha Levin
@ 2023-11-29  8:28 ` Mikulas Patocka
  2023-11-29 10:02   ` Christian Loehle
  0 siblings, 1 reply; 9+ messages in thread
From: Mikulas Patocka @ 2023-11-29  8:28 UTC (permalink / raw)
  To: Sasha Levin
  Cc: stable-commits, stable, christian.loehle, Alasdair Kergon,
	Mike Snitzer, dm-devel

Hi

This patch doesn't fix any bug (and introduces several serious bugs), so 
it shouldn't be backported at all.

Mikulas


On Tue, 28 Nov 2023, Sasha Levin wrote:

> This is a note to let you know that I've just added the patch titled
> 
>     dm delay: for short delays, use kthread instead of timers and wq
> 
> to the 6.6-stable tree which can be found at:
>     http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary
> 
> The filename of the patch is:
>      dm-delay-for-short-delays-use-kthread-instead-of-tim.patch
> and it can be found in the queue-6.6 subdirectory.
> 
> If you, or anyone else, feels it should not be added to the stable tree,
> please let <stable@vger.kernel.org> know about it.
> 
> 
> 
> commit 976fd593415e170a8ed5db68683b280d5876982d
> Author: Christian Loehle <christian.loehle@arm.com>
> Date:   Fri Oct 20 12:46:05 2023 +0100
> 
>     dm delay: for short delays, use kthread instead of timers and wq
>     
>     [ Upstream commit 70bbeb29fab09d6ea6cfe64109db60a97d84d739 ]
>     
>     DM delay's current design of using timers and wq to realize the delays
>     is insufficient for delays below ~50ms.
>     
>     This commit enhances the design to use a kthread to flush the expired
>     delays, trading some CPU time (in some cases) for better delay
>     accuracy and delays closer to what the user requested for smaller
>     delays. The new design is chosen as long as all the delays are below
>     50ms.
>     
>     Since bios can't be completed in interrupt context using a kthread
>     is probably the most reasonable way to approach this.
>     
>     Testing with
>     echo "0 2097152 zero" | dmsetup create dm-zeros
>     for i in $(seq 0 20);
>     do
>       echo "0 2097152 delay /dev/mapper/dm-zeros 0 $i" | dmsetup create dm-delay-${i}ms;
>     done
>     
>     Some performance numbers for comparison, on beaglebone black (single
>     core) CONFIG_HZ_1000=y:
>     
>     fio --name=1msread --rw=randread --bs=4k --runtime=60 --time_based \
>         --filename=/dev/mapper/dm-delay-1ms
>     Theoretical maximum: 1000 IOPS
>     Previous: 250 IOPS
>     Kthread: 500 IOPS
>     
>     fio --name=10msread --rw=randread --bs=4k --runtime=60 --time_based \
>         --filename=/dev/mapper/dm-delay-10ms
>     Theoretical maximum: 100 IOPS
>     Previous: 45 IOPS
>     Kthread: 50 IOPS
>     
>     fio --name=1mswrite --rw=randwrite --direct=1 --bs=4k --runtime=60 \
>         --time_based --filename=/dev/mapper/dm-delay-1ms
>     Theoretical maximum: 1000 IOPS
>     Previous: 498 IOPS
>     Kthread: 1000 IOPS
>     
>     fio --name=10mswrite --rw=randwrite --direct=1 --bs=4k --runtime=60 \
>         --time_based --filename=/dev/mapper/dm-delay-10ms
>     Theoretical maximum: 100 IOPS
>     Previous: 90 IOPS
>     Kthread: 100 IOPS
>     
>     (This one is just to prove the new design isn't impacting throughput,
>     not really about delays):
>     fio --name=10mswriteasync --rw=randwrite --direct=1 --bs=4k \
>         --runtime=60 --time_based --filename=/dev/mapper/dm-delay-10ms \
>         --numjobs=32 --iodepth=64 --ioengine=libaio --group_reporting
>     Previous: 13.3k IOPS
>     Kthread: 13.3k IOPS
>     
>     Signed-off-by: Christian Loehle <christian.loehle@arm.com>
>     [Harshit: kthread_create error handling fix in delay_ctr]
>     Signed-off-by: Harshit Mogalapalli <harshit.m.mogalapalli@oracle.com>
>     Signed-off-by: Mike Snitzer <snitzer@kernel.org>
>     Stable-dep-of: 6fc45b6ed921 ("dm-delay: fix a race between delay_presuspend and delay_bio")
>     Signed-off-by: Sasha Levin <sashal@kernel.org>
> 
> diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c
> index 7433525e59856..efd510984e259 100644
> --- a/drivers/md/dm-delay.c
> +++ b/drivers/md/dm-delay.c
> @@ -13,6 +13,7 @@
>  #include <linux/blkdev.h>
>  #include <linux/bio.h>
>  #include <linux/slab.h>
> +#include <linux/kthread.h>
>  
>  #include <linux/device-mapper.h>
>  
> @@ -31,6 +32,7 @@ struct delay_c {
>  	struct workqueue_struct *kdelayd_wq;
>  	struct work_struct flush_expired_bios;
>  	struct list_head delayed_bios;
> +	struct task_struct *worker;
>  	atomic_t may_delay;
>  
>  	struct delay_class read;
> @@ -66,6 +68,44 @@ static void queue_timeout(struct delay_c *dc, unsigned long expires)
>  	mutex_unlock(&dc->timer_lock);
>  }
>  
> +static inline bool delay_is_fast(struct delay_c *dc)
> +{
> +	return !!dc->worker;
> +}
> +
> +static void flush_delayed_bios_fast(struct delay_c *dc, bool flush_all)
> +{
> +	struct dm_delay_info *delayed, *next;
> +
> +	mutex_lock(&delayed_bios_lock);
> +	list_for_each_entry_safe(delayed, next, &dc->delayed_bios, list) {
> +		if (flush_all || time_after_eq(jiffies, delayed->expires)) {
> +			struct bio *bio = dm_bio_from_per_bio_data(delayed,
> +						sizeof(struct dm_delay_info));
> +			list_del(&delayed->list);
> +			dm_submit_bio_remap(bio, NULL);
> +			delayed->class->ops--;
> +		}
> +	}
> +	mutex_unlock(&delayed_bios_lock);
> +}
> +
> +static int flush_worker_fn(void *data)
> +{
> +	struct delay_c *dc = data;
> +
> +	while (1) {
> +		flush_delayed_bios_fast(dc, false);
> +		if (unlikely(list_empty(&dc->delayed_bios))) {
> +			set_current_state(TASK_INTERRUPTIBLE);
> +			schedule();
> +		} else
> +			cond_resched();
> +	}
> +
> +	return 0;
> +}
> +
>  static void flush_bios(struct bio *bio)
>  {
>  	struct bio *n;
> @@ -78,7 +118,7 @@ static void flush_bios(struct bio *bio)
>  	}
>  }
>  
> -static struct bio *flush_delayed_bios(struct delay_c *dc, int flush_all)
> +static struct bio *flush_delayed_bios(struct delay_c *dc, bool flush_all)
>  {
>  	struct dm_delay_info *delayed, *next;
>  	unsigned long next_expires = 0;
> @@ -115,7 +155,10 @@ static void flush_expired_bios(struct work_struct *work)
>  	struct delay_c *dc;
>  
>  	dc = container_of(work, struct delay_c, flush_expired_bios);
> -	flush_bios(flush_delayed_bios(dc, 0));
> +	if (delay_is_fast(dc))
> +		flush_delayed_bios_fast(dc, false);
> +	else
> +		flush_bios(flush_delayed_bios(dc, false));
>  }
>  
>  static void delay_dtr(struct dm_target *ti)
> @@ -131,8 +174,11 @@ static void delay_dtr(struct dm_target *ti)
>  		dm_put_device(ti, dc->write.dev);
>  	if (dc->flush.dev)
>  		dm_put_device(ti, dc->flush.dev);
> +	if (dc->worker)
> +		kthread_stop(dc->worker);
>  
> -	mutex_destroy(&dc->timer_lock);
> +	if (!delay_is_fast(dc))
> +		mutex_destroy(&dc->timer_lock);
>  
>  	kfree(dc);
>  }
> @@ -175,6 +221,7 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
>  {
>  	struct delay_c *dc;
>  	int ret;
> +	unsigned int max_delay;
>  
>  	if (argc != 3 && argc != 6 && argc != 9) {
>  		ti->error = "Requires exactly 3, 6 or 9 arguments";
> @@ -188,16 +235,14 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
>  	}
>  
>  	ti->private = dc;
> -	timer_setup(&dc->delay_timer, handle_delayed_timer, 0);
> -	INIT_WORK(&dc->flush_expired_bios, flush_expired_bios);
>  	INIT_LIST_HEAD(&dc->delayed_bios);
> -	mutex_init(&dc->timer_lock);
>  	atomic_set(&dc->may_delay, 1);
>  	dc->argc = argc;
>  
>  	ret = delay_class_ctr(ti, &dc->read, argv);
>  	if (ret)
>  		goto bad;
> +	max_delay = dc->read.delay;
>  
>  	if (argc == 3) {
>  		ret = delay_class_ctr(ti, &dc->write, argv);
> @@ -206,6 +251,8 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
>  		ret = delay_class_ctr(ti, &dc->flush, argv);
>  		if (ret)
>  			goto bad;
> +		max_delay = max(max_delay, dc->write.delay);
> +		max_delay = max(max_delay, dc->flush.delay);
>  		goto out;
>  	}
>  
> @@ -216,19 +263,37 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
>  		ret = delay_class_ctr(ti, &dc->flush, argv + 3);
>  		if (ret)
>  			goto bad;
> +		max_delay = max(max_delay, dc->flush.delay);
>  		goto out;
>  	}
>  
>  	ret = delay_class_ctr(ti, &dc->flush, argv + 6);
>  	if (ret)
>  		goto bad;
> +	max_delay = max(max_delay, dc->flush.delay);
>  
>  out:
> -	dc->kdelayd_wq = alloc_workqueue("kdelayd", WQ_MEM_RECLAIM, 0);
> -	if (!dc->kdelayd_wq) {
> -		ret = -EINVAL;
> -		DMERR("Couldn't start kdelayd");
> -		goto bad;
> +	if (max_delay < 50) {
> +		/*
> +		 * In case of small requested delays, use kthread instead of
> +		 * timers and workqueue to achieve better latency.
> +		 */
> +		dc->worker = kthread_create(&flush_worker_fn, dc,
> +					    "dm-delay-flush-worker");
> +		if (IS_ERR(dc->worker)) {
> +			ret = PTR_ERR(dc->worker);
> +			goto bad;
> +		}
> +	} else {
> +		timer_setup(&dc->delay_timer, handle_delayed_timer, 0);
> +		INIT_WORK(&dc->flush_expired_bios, flush_expired_bios);
> +		mutex_init(&dc->timer_lock);
> +		dc->kdelayd_wq = alloc_workqueue("kdelayd", WQ_MEM_RECLAIM, 0);
> +		if (!dc->kdelayd_wq) {
> +			ret = -EINVAL;
> +			DMERR("Couldn't start kdelayd");
> +			goto bad;
> +		}
>  	}
>  
>  	ti->num_flush_bios = 1;
> @@ -260,7 +325,10 @@ static int delay_bio(struct delay_c *dc, struct delay_class *c, struct bio *bio)
>  	list_add_tail(&delayed->list, &dc->delayed_bios);
>  	mutex_unlock(&delayed_bios_lock);
>  
> -	queue_timeout(dc, expires);
> +	if (delay_is_fast(dc))
> +		wake_up_process(dc->worker);
> +	else
> +		queue_timeout(dc, expires);
>  
>  	return DM_MAPIO_SUBMITTED;
>  }
> @@ -270,8 +338,13 @@ static void delay_presuspend(struct dm_target *ti)
>  	struct delay_c *dc = ti->private;
>  
>  	atomic_set(&dc->may_delay, 0);
> -	del_timer_sync(&dc->delay_timer);
> -	flush_bios(flush_delayed_bios(dc, 1));
> +
> +	if (delay_is_fast(dc))
> +		flush_delayed_bios_fast(dc, true);
> +	else {
> +		del_timer_sync(&dc->delay_timer);
> +		flush_bios(flush_delayed_bios(dc, true));
> +	}
>  }
>  
>  static void delay_resume(struct dm_target *ti)
> @@ -356,7 +429,7 @@ static int delay_iterate_devices(struct dm_target *ti,
>  
>  static struct target_type delay_target = {
>  	.name	     = "delay",
> -	.version     = {1, 3, 0},
> +	.version     = {1, 4, 0},
>  	.features    = DM_TARGET_PASSES_INTEGRITY,
>  	.module      = THIS_MODULE,
>  	.ctr	     = delay_ctr,
> 


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: Patch "dm delay: for short delays, use kthread instead of timers and wq" has been added to the 6.6-stable tree
  2023-11-29  8:28 ` Mikulas Patocka
@ 2023-11-29 10:02   ` Christian Loehle
  2023-11-29 17:28     ` Mikulas Patocka
  0 siblings, 1 reply; 9+ messages in thread
From: Christian Loehle @ 2023-11-29 10:02 UTC (permalink / raw)
  To: Mikulas Patocka, Sasha Levin
  Cc: stable-commits, stable, Alasdair Kergon, Mike Snitzer, dm-devel

Hi Mikulas,
Agreed and thanks for fixing.
Has this been selected for stable because of:
6fc45b6ed921 ("dm-delay: fix a race between delay_presuspend and delay_bio")
If so, I would volunteer do the backports for that for you at least.

BR,
Christian

On 29/11/2023 08:28, Mikulas Patocka wrote:
> Hi
> 
> This patch doesn't fix any bug (and introduces several serious bugs), so 
> it shouldn't be backported at all.
> 
> Mikulas
> 
> 
> On Tue, 28 Nov 2023, Sasha Levin wrote:
> 
>> This is a note to let you know that I've just added the patch titled
>>
>>     dm delay: for short delays, use kthread instead of timers and wq
>>
>> to the 6.6-stable tree which can be found at:
>>     http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary
>>
>> The filename of the patch is:
>>      dm-delay-for-short-delays-use-kthread-instead-of-tim.patch
>> and it can be found in the queue-6.6 subdirectory.
>>
>> If you, or anyone else, feels it should not be added to the stable tree,
>> please let <stable@vger.kernel.org> know about it.
>>
>>
>>
>> commit 976fd593415e170a8ed5db68683b280d5876982d
>> Author: Christian Loehle <christian.loehle@arm.com>
>> Date:   Fri Oct 20 12:46:05 2023 +0100
>>
>>     dm delay: for short delays, use kthread instead of timers and wq
>>     
>>     [ Upstream commit 70bbeb29fab09d6ea6cfe64109db60a97d84d739 ]
>>     
>>     DM delay's current design of using timers and wq to realize the delays
>>     is insufficient for delays below ~50ms.
>>     
>>     This commit enhances the design to use a kthread to flush the expired
>>     delays, trading some CPU time (in some cases) for better delay
>>     accuracy and delays closer to what the user requested for smaller
>>     delays. The new design is chosen as long as all the delays are below
>>     50ms.
>>     
>>     Since bios can't be completed in interrupt context using a kthread
>>     is probably the most reasonable way to approach this.
>>     
>>     Testing with
>>     echo "0 2097152 zero" | dmsetup create dm-zeros
>>     for i in $(seq 0 20);
>>     do
>>       echo "0 2097152 delay /dev/mapper/dm-zeros 0 $i" | dmsetup create dm-delay-${i}ms;
>>     done
>>     
>>     Some performance numbers for comparison, on beaglebone black (single
>>     core) CONFIG_HZ_1000=y:
>>     
>>     fio --name=1msread --rw=randread --bs=4k --runtime=60 --time_based \
>>         --filename=/dev/mapper/dm-delay-1ms
>>     Theoretical maximum: 1000 IOPS
>>     Previous: 250 IOPS
>>     Kthread: 500 IOPS
>>     
>>     fio --name=10msread --rw=randread --bs=4k --runtime=60 --time_based \
>>         --filename=/dev/mapper/dm-delay-10ms
>>     Theoretical maximum: 100 IOPS
>>     Previous: 45 IOPS
>>     Kthread: 50 IOPS
>>     
>>     fio --name=1mswrite --rw=randwrite --direct=1 --bs=4k --runtime=60 \
>>         --time_based --filename=/dev/mapper/dm-delay-1ms
>>     Theoretical maximum: 1000 IOPS
>>     Previous: 498 IOPS
>>     Kthread: 1000 IOPS
>>     
>>     fio --name=10mswrite --rw=randwrite --direct=1 --bs=4k --runtime=60 \
>>         --time_based --filename=/dev/mapper/dm-delay-10ms
>>     Theoretical maximum: 100 IOPS
>>     Previous: 90 IOPS
>>     Kthread: 100 IOPS
>>     
>>     (This one is just to prove the new design isn't impacting throughput,
>>     not really about delays):
>>     fio --name=10mswriteasync --rw=randwrite --direct=1 --bs=4k \
>>         --runtime=60 --time_based --filename=/dev/mapper/dm-delay-10ms \
>>         --numjobs=32 --iodepth=64 --ioengine=libaio --group_reporting
>>     Previous: 13.3k IOPS
>>     Kthread: 13.3k IOPS
>>     
>>     Signed-off-by: Christian Loehle <christian.loehle@arm.com>
>>     [Harshit: kthread_create error handling fix in delay_ctr]
>>     Signed-off-by: Harshit Mogalapalli <harshit.m.mogalapalli@oracle.com>
>>     Signed-off-by: Mike Snitzer <snitzer@kernel.org>
>>     Stable-dep-of: 6fc45b6ed921 ("dm-delay: fix a race between delay_presuspend and delay_bio")
>>     Signed-off-by: Sasha Levin <sashal@kernel.org>
>>
>> diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c
>> index 7433525e59856..efd510984e259 100644
>> --- a/drivers/md/dm-delay.c
>> +++ b/drivers/md/dm-delay.c
>> @@ -13,6 +13,7 @@
>>  #include <linux/blkdev.h>
>>  #include <linux/bio.h>
>>  #include <linux/slab.h>
>> +#include <linux/kthread.h>
>>  
>>  #include <linux/device-mapper.h>
>>  
>> @@ -31,6 +32,7 @@ struct delay_c {
>>  	struct workqueue_struct *kdelayd_wq;
>>  	struct work_struct flush_expired_bios;
>>  	struct list_head delayed_bios;
>> +	struct task_struct *worker;
>>  	atomic_t may_delay;
>>  
>>  	struct delay_class read;
>> @@ -66,6 +68,44 @@ static void queue_timeout(struct delay_c *dc, unsigned long expires)
>>  	mutex_unlock(&dc->timer_lock);
>>  }
>>  
>> +static inline bool delay_is_fast(struct delay_c *dc)
>> +{
>> +	return !!dc->worker;
>> +}
>> +
>> +static void flush_delayed_bios_fast(struct delay_c *dc, bool flush_all)
>> +{
>> +	struct dm_delay_info *delayed, *next;
>> +
>> +	mutex_lock(&delayed_bios_lock);
>> +	list_for_each_entry_safe(delayed, next, &dc->delayed_bios, list) {
>> +		if (flush_all || time_after_eq(jiffies, delayed->expires)) {
>> +			struct bio *bio = dm_bio_from_per_bio_data(delayed,
>> +						sizeof(struct dm_delay_info));
>> +			list_del(&delayed->list);
>> +			dm_submit_bio_remap(bio, NULL);
>> +			delayed->class->ops--;
>> +		}
>> +	}
>> +	mutex_unlock(&delayed_bios_lock);
>> +}
>> +
>> +static int flush_worker_fn(void *data)
>> +{
>> +	struct delay_c *dc = data;
>> +
>> +	while (1) {
>> +		flush_delayed_bios_fast(dc, false);
>> +		if (unlikely(list_empty(&dc->delayed_bios))) {
>> +			set_current_state(TASK_INTERRUPTIBLE);
>> +			schedule();
>> +		} else
>> +			cond_resched();
>> +	}
>> +
>> +	return 0;
>> +}
>> +
>>  static void flush_bios(struct bio *bio)
>>  {
>>  	struct bio *n;
>> @@ -78,7 +118,7 @@ static void flush_bios(struct bio *bio)
>>  	}
>>  }
>>  
>> -static struct bio *flush_delayed_bios(struct delay_c *dc, int flush_all)
>> +static struct bio *flush_delayed_bios(struct delay_c *dc, bool flush_all)
>>  {
>>  	struct dm_delay_info *delayed, *next;
>>  	unsigned long next_expires = 0;
>> @@ -115,7 +155,10 @@ static void flush_expired_bios(struct work_struct *work)
>>  	struct delay_c *dc;
>>  
>>  	dc = container_of(work, struct delay_c, flush_expired_bios);
>> -	flush_bios(flush_delayed_bios(dc, 0));
>> +	if (delay_is_fast(dc))
>> +		flush_delayed_bios_fast(dc, false);
>> +	else
>> +		flush_bios(flush_delayed_bios(dc, false));
>>  }
>>  
>>  static void delay_dtr(struct dm_target *ti)
>> @@ -131,8 +174,11 @@ static void delay_dtr(struct dm_target *ti)
>>  		dm_put_device(ti, dc->write.dev);
>>  	if (dc->flush.dev)
>>  		dm_put_device(ti, dc->flush.dev);
>> +	if (dc->worker)
>> +		kthread_stop(dc->worker);
>>  
>> -	mutex_destroy(&dc->timer_lock);
>> +	if (!delay_is_fast(dc))
>> +		mutex_destroy(&dc->timer_lock);
>>  
>>  	kfree(dc);
>>  }
>> @@ -175,6 +221,7 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
>>  {
>>  	struct delay_c *dc;
>>  	int ret;
>> +	unsigned int max_delay;
>>  
>>  	if (argc != 3 && argc != 6 && argc != 9) {
>>  		ti->error = "Requires exactly 3, 6 or 9 arguments";
>> @@ -188,16 +235,14 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
>>  	}
>>  
>>  	ti->private = dc;
>> -	timer_setup(&dc->delay_timer, handle_delayed_timer, 0);
>> -	INIT_WORK(&dc->flush_expired_bios, flush_expired_bios);
>>  	INIT_LIST_HEAD(&dc->delayed_bios);
>> -	mutex_init(&dc->timer_lock);
>>  	atomic_set(&dc->may_delay, 1);
>>  	dc->argc = argc;
>>  
>>  	ret = delay_class_ctr(ti, &dc->read, argv);
>>  	if (ret)
>>  		goto bad;
>> +	max_delay = dc->read.delay;
>>  
>>  	if (argc == 3) {
>>  		ret = delay_class_ctr(ti, &dc->write, argv);
>> @@ -206,6 +251,8 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
>>  		ret = delay_class_ctr(ti, &dc->flush, argv);
>>  		if (ret)
>>  			goto bad;
>> +		max_delay = max(max_delay, dc->write.delay);
>> +		max_delay = max(max_delay, dc->flush.delay);
>>  		goto out;
>>  	}
>>  
>> @@ -216,19 +263,37 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
>>  		ret = delay_class_ctr(ti, &dc->flush, argv + 3);
>>  		if (ret)
>>  			goto bad;
>> +		max_delay = max(max_delay, dc->flush.delay);
>>  		goto out;
>>  	}
>>  
>>  	ret = delay_class_ctr(ti, &dc->flush, argv + 6);
>>  	if (ret)
>>  		goto bad;
>> +	max_delay = max(max_delay, dc->flush.delay);
>>  
>>  out:
>> -	dc->kdelayd_wq = alloc_workqueue("kdelayd", WQ_MEM_RECLAIM, 0);
>> -	if (!dc->kdelayd_wq) {
>> -		ret = -EINVAL;
>> -		DMERR("Couldn't start kdelayd");
>> -		goto bad;
>> +	if (max_delay < 50) {
>> +		/*
>> +		 * In case of small requested delays, use kthread instead of
>> +		 * timers and workqueue to achieve better latency.
>> +		 */
>> +		dc->worker = kthread_create(&flush_worker_fn, dc,
>> +					    "dm-delay-flush-worker");
>> +		if (IS_ERR(dc->worker)) {
>> +			ret = PTR_ERR(dc->worker);
>> +			goto bad;
>> +		}
>> +	} else {
>> +		timer_setup(&dc->delay_timer, handle_delayed_timer, 0);
>> +		INIT_WORK(&dc->flush_expired_bios, flush_expired_bios);
>> +		mutex_init(&dc->timer_lock);
>> +		dc->kdelayd_wq = alloc_workqueue("kdelayd", WQ_MEM_RECLAIM, 0);
>> +		if (!dc->kdelayd_wq) {
>> +			ret = -EINVAL;
>> +			DMERR("Couldn't start kdelayd");
>> +			goto bad;
>> +		}
>>  	}
>>  
>>  	ti->num_flush_bios = 1;
>> @@ -260,7 +325,10 @@ static int delay_bio(struct delay_c *dc, struct delay_class *c, struct bio *bio)
>>  	list_add_tail(&delayed->list, &dc->delayed_bios);
>>  	mutex_unlock(&delayed_bios_lock);
>>  
>> -	queue_timeout(dc, expires);
>> +	if (delay_is_fast(dc))
>> +		wake_up_process(dc->worker);
>> +	else
>> +		queue_timeout(dc, expires);
>>  
>>  	return DM_MAPIO_SUBMITTED;
>>  }
>> @@ -270,8 +338,13 @@ static void delay_presuspend(struct dm_target *ti)
>>  	struct delay_c *dc = ti->private;
>>  
>>  	atomic_set(&dc->may_delay, 0);
>> -	del_timer_sync(&dc->delay_timer);
>> -	flush_bios(flush_delayed_bios(dc, 1));
>> +
>> +	if (delay_is_fast(dc))
>> +		flush_delayed_bios_fast(dc, true);
>> +	else {
>> +		del_timer_sync(&dc->delay_timer);
>> +		flush_bios(flush_delayed_bios(dc, true));
>> +	}
>>  }
>>  
>>  static void delay_resume(struct dm_target *ti)
>> @@ -356,7 +429,7 @@ static int delay_iterate_devices(struct dm_target *ti,
>>  
>>  static struct target_type delay_target = {
>>  	.name	     = "delay",
>> -	.version     = {1, 3, 0},
>> +	.version     = {1, 4, 0},
>>  	.features    = DM_TARGET_PASSES_INTEGRITY,
>>  	.module      = THIS_MODULE,
>>  	.ctr	     = delay_ctr,
>>
> 


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: Patch "dm delay: for short delays, use kthread instead of timers and wq" has been added to the 6.6-stable tree
  2023-11-29 10:02   ` Christian Loehle
@ 2023-11-29 17:28     ` Mikulas Patocka
  2023-11-29 17:38       ` Sasha Levin
  0 siblings, 1 reply; 9+ messages in thread
From: Mikulas Patocka @ 2023-11-29 17:28 UTC (permalink / raw)
  To: Christian Loehle
  Cc: Sasha Levin, stable-commits, stable, Alasdair Kergon,
	Mike Snitzer, dm-devel



On Wed, 29 Nov 2023, Christian Loehle wrote:

> Hi Mikulas,
> Agreed and thanks for fixing.
> Has this been selected for stable because of:
> 6fc45b6ed921 ("dm-delay: fix a race between delay_presuspend and delay_bio")
> If so, I would volunteer do the backports for that for you at least.

I wouldn't backport this patch - it is an enhancement, not a bugfix, so it 
doesn't qualify for the stable kernel backports.

Mikulas

> BR,
> Christian
> 
> On 29/11/2023 08:28, Mikulas Patocka wrote:
> > Hi
> > 
> > This patch doesn't fix any bug (and introduces several serious bugs), so 
> > it shouldn't be backported at all.
> > 
> > Mikulas
> > 
> > 
> > On Tue, 28 Nov 2023, Sasha Levin wrote:
> > 
> >> This is a note to let you know that I've just added the patch titled
> >>
> >>     dm delay: for short delays, use kthread instead of timers and wq
> >>
> >> to the 6.6-stable tree which can be found at:
> >>     http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary
> >>
> >> The filename of the patch is:
> >>      dm-delay-for-short-delays-use-kthread-instead-of-tim.patch
> >> and it can be found in the queue-6.6 subdirectory.
> >>
> >> If you, or anyone else, feels it should not be added to the stable tree,
> >> please let <stable@vger.kernel.org> know about it.
> >>
> >>
> >>
> >> commit 976fd593415e170a8ed5db68683b280d5876982d
> >> Author: Christian Loehle <christian.loehle@arm.com>
> >> Date:   Fri Oct 20 12:46:05 2023 +0100
> >>
> >>     dm delay: for short delays, use kthread instead of timers and wq
> >>     
> >>     [ Upstream commit 70bbeb29fab09d6ea6cfe64109db60a97d84d739 ]
> >>     
> >>     DM delay's current design of using timers and wq to realize the delays
> >>     is insufficient for delays below ~50ms.
> >>     
> >>     This commit enhances the design to use a kthread to flush the expired
> >>     delays, trading some CPU time (in some cases) for better delay
> >>     accuracy and delays closer to what the user requested for smaller
> >>     delays. The new design is chosen as long as all the delays are below
> >>     50ms.
> >>     
> >>     Since bios can't be completed in interrupt context using a kthread
> >>     is probably the most reasonable way to approach this.
> >>     
> >>     Testing with
> >>     echo "0 2097152 zero" | dmsetup create dm-zeros
> >>     for i in $(seq 0 20);
> >>     do
> >>       echo "0 2097152 delay /dev/mapper/dm-zeros 0 $i" | dmsetup create dm-delay-${i}ms;
> >>     done
> >>     
> >>     Some performance numbers for comparison, on beaglebone black (single
> >>     core) CONFIG_HZ_1000=y:
> >>     
> >>     fio --name=1msread --rw=randread --bs=4k --runtime=60 --time_based \
> >>         --filename=/dev/mapper/dm-delay-1ms
> >>     Theoretical maximum: 1000 IOPS
> >>     Previous: 250 IOPS
> >>     Kthread: 500 IOPS
> >>     
> >>     fio --name=10msread --rw=randread --bs=4k --runtime=60 --time_based \
> >>         --filename=/dev/mapper/dm-delay-10ms
> >>     Theoretical maximum: 100 IOPS
> >>     Previous: 45 IOPS
> >>     Kthread: 50 IOPS
> >>     
> >>     fio --name=1mswrite --rw=randwrite --direct=1 --bs=4k --runtime=60 \
> >>         --time_based --filename=/dev/mapper/dm-delay-1ms
> >>     Theoretical maximum: 1000 IOPS
> >>     Previous: 498 IOPS
> >>     Kthread: 1000 IOPS
> >>     
> >>     fio --name=10mswrite --rw=randwrite --direct=1 --bs=4k --runtime=60 \
> >>         --time_based --filename=/dev/mapper/dm-delay-10ms
> >>     Theoretical maximum: 100 IOPS
> >>     Previous: 90 IOPS
> >>     Kthread: 100 IOPS
> >>     
> >>     (This one is just to prove the new design isn't impacting throughput,
> >>     not really about delays):
> >>     fio --name=10mswriteasync --rw=randwrite --direct=1 --bs=4k \
> >>         --runtime=60 --time_based --filename=/dev/mapper/dm-delay-10ms \
> >>         --numjobs=32 --iodepth=64 --ioengine=libaio --group_reporting
> >>     Previous: 13.3k IOPS
> >>     Kthread: 13.3k IOPS
> >>     
> >>     Signed-off-by: Christian Loehle <christian.loehle@arm.com>
> >>     [Harshit: kthread_create error handling fix in delay_ctr]
> >>     Signed-off-by: Harshit Mogalapalli <harshit.m.mogalapalli@oracle.com>
> >>     Signed-off-by: Mike Snitzer <snitzer@kernel.org>
> >>     Stable-dep-of: 6fc45b6ed921 ("dm-delay: fix a race between delay_presuspend and delay_bio")
> >>     Signed-off-by: Sasha Levin <sashal@kernel.org>
> >>
> >> diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c
> >> index 7433525e59856..efd510984e259 100644
> >> --- a/drivers/md/dm-delay.c
> >> +++ b/drivers/md/dm-delay.c
> >> @@ -13,6 +13,7 @@
> >>  #include <linux/blkdev.h>
> >>  #include <linux/bio.h>
> >>  #include <linux/slab.h>
> >> +#include <linux/kthread.h>
> >>  
> >>  #include <linux/device-mapper.h>
> >>  
> >> @@ -31,6 +32,7 @@ struct delay_c {
> >>  	struct workqueue_struct *kdelayd_wq;
> >>  	struct work_struct flush_expired_bios;
> >>  	struct list_head delayed_bios;
> >> +	struct task_struct *worker;
> >>  	atomic_t may_delay;
> >>  
> >>  	struct delay_class read;
> >> @@ -66,6 +68,44 @@ static void queue_timeout(struct delay_c *dc, unsigned long expires)
> >>  	mutex_unlock(&dc->timer_lock);
> >>  }
> >>  
> >> +static inline bool delay_is_fast(struct delay_c *dc)
> >> +{
> >> +	return !!dc->worker;
> >> +}
> >> +
> >> +static void flush_delayed_bios_fast(struct delay_c *dc, bool flush_all)
> >> +{
> >> +	struct dm_delay_info *delayed, *next;
> >> +
> >> +	mutex_lock(&delayed_bios_lock);
> >> +	list_for_each_entry_safe(delayed, next, &dc->delayed_bios, list) {
> >> +		if (flush_all || time_after_eq(jiffies, delayed->expires)) {
> >> +			struct bio *bio = dm_bio_from_per_bio_data(delayed,
> >> +						sizeof(struct dm_delay_info));
> >> +			list_del(&delayed->list);
> >> +			dm_submit_bio_remap(bio, NULL);
> >> +			delayed->class->ops--;
> >> +		}
> >> +	}
> >> +	mutex_unlock(&delayed_bios_lock);
> >> +}
> >> +
> >> +static int flush_worker_fn(void *data)
> >> +{
> >> +	struct delay_c *dc = data;
> >> +
> >> +	while (1) {
> >> +		flush_delayed_bios_fast(dc, false);
> >> +		if (unlikely(list_empty(&dc->delayed_bios))) {
> >> +			set_current_state(TASK_INTERRUPTIBLE);
> >> +			schedule();
> >> +		} else
> >> +			cond_resched();
> >> +	}
> >> +
> >> +	return 0;
> >> +}
> >> +
> >>  static void flush_bios(struct bio *bio)
> >>  {
> >>  	struct bio *n;
> >> @@ -78,7 +118,7 @@ static void flush_bios(struct bio *bio)
> >>  	}
> >>  }
> >>  
> >> -static struct bio *flush_delayed_bios(struct delay_c *dc, int flush_all)
> >> +static struct bio *flush_delayed_bios(struct delay_c *dc, bool flush_all)
> >>  {
> >>  	struct dm_delay_info *delayed, *next;
> >>  	unsigned long next_expires = 0;
> >> @@ -115,7 +155,10 @@ static void flush_expired_bios(struct work_struct *work)
> >>  	struct delay_c *dc;
> >>  
> >>  	dc = container_of(work, struct delay_c, flush_expired_bios);
> >> -	flush_bios(flush_delayed_bios(dc, 0));
> >> +	if (delay_is_fast(dc))
> >> +		flush_delayed_bios_fast(dc, false);
> >> +	else
> >> +		flush_bios(flush_delayed_bios(dc, false));
> >>  }
> >>  
> >>  static void delay_dtr(struct dm_target *ti)
> >> @@ -131,8 +174,11 @@ static void delay_dtr(struct dm_target *ti)
> >>  		dm_put_device(ti, dc->write.dev);
> >>  	if (dc->flush.dev)
> >>  		dm_put_device(ti, dc->flush.dev);
> >> +	if (dc->worker)
> >> +		kthread_stop(dc->worker);
> >>  
> >> -	mutex_destroy(&dc->timer_lock);
> >> +	if (!delay_is_fast(dc))
> >> +		mutex_destroy(&dc->timer_lock);
> >>  
> >>  	kfree(dc);
> >>  }
> >> @@ -175,6 +221,7 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
> >>  {
> >>  	struct delay_c *dc;
> >>  	int ret;
> >> +	unsigned int max_delay;
> >>  
> >>  	if (argc != 3 && argc != 6 && argc != 9) {
> >>  		ti->error = "Requires exactly 3, 6 or 9 arguments";
> >> @@ -188,16 +235,14 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
> >>  	}
> >>  
> >>  	ti->private = dc;
> >> -	timer_setup(&dc->delay_timer, handle_delayed_timer, 0);
> >> -	INIT_WORK(&dc->flush_expired_bios, flush_expired_bios);
> >>  	INIT_LIST_HEAD(&dc->delayed_bios);
> >> -	mutex_init(&dc->timer_lock);
> >>  	atomic_set(&dc->may_delay, 1);
> >>  	dc->argc = argc;
> >>  
> >>  	ret = delay_class_ctr(ti, &dc->read, argv);
> >>  	if (ret)
> >>  		goto bad;
> >> +	max_delay = dc->read.delay;
> >>  
> >>  	if (argc == 3) {
> >>  		ret = delay_class_ctr(ti, &dc->write, argv);
> >> @@ -206,6 +251,8 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
> >>  		ret = delay_class_ctr(ti, &dc->flush, argv);
> >>  		if (ret)
> >>  			goto bad;
> >> +		max_delay = max(max_delay, dc->write.delay);
> >> +		max_delay = max(max_delay, dc->flush.delay);
> >>  		goto out;
> >>  	}
> >>  
> >> @@ -216,19 +263,37 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
> >>  		ret = delay_class_ctr(ti, &dc->flush, argv + 3);
> >>  		if (ret)
> >>  			goto bad;
> >> +		max_delay = max(max_delay, dc->flush.delay);
> >>  		goto out;
> >>  	}
> >>  
> >>  	ret = delay_class_ctr(ti, &dc->flush, argv + 6);
> >>  	if (ret)
> >>  		goto bad;
> >> +	max_delay = max(max_delay, dc->flush.delay);
> >>  
> >>  out:
> >> -	dc->kdelayd_wq = alloc_workqueue("kdelayd", WQ_MEM_RECLAIM, 0);
> >> -	if (!dc->kdelayd_wq) {
> >> -		ret = -EINVAL;
> >> -		DMERR("Couldn't start kdelayd");
> >> -		goto bad;
> >> +	if (max_delay < 50) {
> >> +		/*
> >> +		 * In case of small requested delays, use kthread instead of
> >> +		 * timers and workqueue to achieve better latency.
> >> +		 */
> >> +		dc->worker = kthread_create(&flush_worker_fn, dc,
> >> +					    "dm-delay-flush-worker");
> >> +		if (IS_ERR(dc->worker)) {
> >> +			ret = PTR_ERR(dc->worker);
> >> +			goto bad;
> >> +		}
> >> +	} else {
> >> +		timer_setup(&dc->delay_timer, handle_delayed_timer, 0);
> >> +		INIT_WORK(&dc->flush_expired_bios, flush_expired_bios);
> >> +		mutex_init(&dc->timer_lock);
> >> +		dc->kdelayd_wq = alloc_workqueue("kdelayd", WQ_MEM_RECLAIM, 0);
> >> +		if (!dc->kdelayd_wq) {
> >> +			ret = -EINVAL;
> >> +			DMERR("Couldn't start kdelayd");
> >> +			goto bad;
> >> +		}
> >>  	}
> >>  
> >>  	ti->num_flush_bios = 1;
> >> @@ -260,7 +325,10 @@ static int delay_bio(struct delay_c *dc, struct delay_class *c, struct bio *bio)
> >>  	list_add_tail(&delayed->list, &dc->delayed_bios);
> >>  	mutex_unlock(&delayed_bios_lock);
> >>  
> >> -	queue_timeout(dc, expires);
> >> +	if (delay_is_fast(dc))
> >> +		wake_up_process(dc->worker);
> >> +	else
> >> +		queue_timeout(dc, expires);
> >>  
> >>  	return DM_MAPIO_SUBMITTED;
> >>  }
> >> @@ -270,8 +338,13 @@ static void delay_presuspend(struct dm_target *ti)
> >>  	struct delay_c *dc = ti->private;
> >>  
> >>  	atomic_set(&dc->may_delay, 0);
> >> -	del_timer_sync(&dc->delay_timer);
> >> -	flush_bios(flush_delayed_bios(dc, 1));
> >> +
> >> +	if (delay_is_fast(dc))
> >> +		flush_delayed_bios_fast(dc, true);
> >> +	else {
> >> +		del_timer_sync(&dc->delay_timer);
> >> +		flush_bios(flush_delayed_bios(dc, true));
> >> +	}
> >>  }
> >>  
> >>  static void delay_resume(struct dm_target *ti)
> >> @@ -356,7 +429,7 @@ static int delay_iterate_devices(struct dm_target *ti,
> >>  
> >>  static struct target_type delay_target = {
> >>  	.name	     = "delay",
> >> -	.version     = {1, 3, 0},
> >> +	.version     = {1, 4, 0},
> >>  	.features    = DM_TARGET_PASSES_INTEGRITY,
> >>  	.module      = THIS_MODULE,
> >>  	.ctr	     = delay_ctr,
> >>
> > 
> 


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: Patch "dm delay: for short delays, use kthread instead of timers and wq" has been added to the 6.6-stable tree
  2023-11-29 17:28     ` Mikulas Patocka
@ 2023-11-29 17:38       ` Sasha Levin
  2023-11-29 18:16         ` Mikulas Patocka
  0 siblings, 1 reply; 9+ messages in thread
From: Sasha Levin @ 2023-11-29 17:38 UTC (permalink / raw)
  To: Mikulas Patocka
  Cc: Christian Loehle, stable-commits, stable, Alasdair Kergon,
	Mike Snitzer, dm-devel

On Wed, Nov 29, 2023 at 06:28:16PM +0100, Mikulas Patocka wrote:
>
>
>On Wed, 29 Nov 2023, Christian Loehle wrote:
>
>> Hi Mikulas,
>> Agreed and thanks for fixing.
>> Has this been selected for stable because of:
>> 6fc45b6ed921 ("dm-delay: fix a race between delay_presuspend and delay_bio")
>> If so, I would volunteer do the backports for that for you at least.
>
>I wouldn't backport this patch - it is an enhancement, not a bugfix, so it
>doesn't qualify for the stable kernel backports.

Right - this watch was selected as a dependency for 6fc45b6ed921
("dm-delay: fix a race between delay_presuspend and delay_bio").

In general, unless it's impractical, we'd rather take a dependency chain
rather than deal with a non-trivial backport as those tend to have
issues longer term.

-- 
Thanks,
Sasha

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: Patch "dm delay: for short delays, use kthread instead of timers and wq" has been added to the 6.6-stable tree
  2023-11-29 17:38       ` Sasha Levin
@ 2023-11-29 18:16         ` Mikulas Patocka
  2023-11-29 18:52           ` Sasha Levin
  0 siblings, 1 reply; 9+ messages in thread
From: Mikulas Patocka @ 2023-11-29 18:16 UTC (permalink / raw)
  To: Sasha Levin
  Cc: Christian Loehle, stable-commits, stable, Alasdair Kergon,
	Mike Snitzer, dm-devel



On Wed, 29 Nov 2023, Sasha Levin wrote:

> On Wed, Nov 29, 2023 at 06:28:16PM +0100, Mikulas Patocka wrote:
> >
> >
> >On Wed, 29 Nov 2023, Christian Loehle wrote:
> >
> >> Hi Mikulas,
> >> Agreed and thanks for fixing.
> >> Has this been selected for stable because of:
> >> 6fc45b6ed921 ("dm-delay: fix a race between delay_presuspend and
> >> delay_bio")
> >> If so, I would volunteer do the backports for that for you at least.
> >
> >I wouldn't backport this patch - it is an enhancement, not a bugfix, so it
> >doesn't qualify for the stable kernel backports.
> 
> Right - this watch was selected as a dependency for 6fc45b6ed921
> ("dm-delay: fix a race between delay_presuspend and delay_bio").
> 
> In general, unless it's impractical, we'd rather take a dependency chain
> rather than deal with a non-trivial backport as those tend to have
> issues longer term.
> 
> -- 
> Thanks,
> Sasha

The patch 70bbeb29fab0 ("dm delay: for short delays, use kthread instead 
of timers and wq") changes behavior of dm-delay from using timers to 
polling, so it may cause problems to people running legacy kernels - the 
polling consumes more CPU time than the timers - so I think it shouldn't 
go to the stable kernels where users expect that there will be no 
functional change.

Here I'm submitting the patch 6fc45b6ed921 backported for 6.6.3.

Mikulas


From: Mikulas Patocka <mpatocka@redhat.com>

dm-delay: fix a race between delay_presuspend and delay_bio

In delay_presuspend, we set the atomic variable may_delay and then stop
the timer and flush pending bios. The intention here is to prevent the
delay target from re-arming the timer again.

However, this test is racy. Suppose that one thread goes to delay_bio,
sees that dc->may_delay is one and proceeds; now, another theread executes
delay_presuspend, it sets, dc->may_delay to zero, deletes the timer and
flushes pending bios. Now, the first thread continues and adds the bio to
delayed->list despite the fact that dc->may_delay is false.

In order to fix this bug, we change may_delay's type from atomic_t to bool
and we read and write it only while holding the delayed_bios_lock mutex.
Note that we don't have to grab the mutex in delay_resume because there
are no bios in flight at this point.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Cc: stable@vger.kernel.org

---
 drivers/md/dm-delay.c |   17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

Index: linux-stable/drivers/md/dm-delay.c
===================================================================
--- linux-stable.orig/drivers/md/dm-delay.c	2023-11-29 19:03:03.000000000 +0100
+++ linux-stable/drivers/md/dm-delay.c	2023-11-29 19:03:03.000000000 +0100
@@ -31,7 +31,7 @@ struct delay_c {
 	struct workqueue_struct *kdelayd_wq;
 	struct work_struct flush_expired_bios;
 	struct list_head delayed_bios;
-	atomic_t may_delay;
+	bool may_delay;
 
 	struct delay_class read;
 	struct delay_class write;
@@ -192,7 +192,7 @@ static int delay_ctr(struct dm_target *t
 	INIT_WORK(&dc->flush_expired_bios, flush_expired_bios);
 	INIT_LIST_HEAD(&dc->delayed_bios);
 	mutex_init(&dc->timer_lock);
-	atomic_set(&dc->may_delay, 1);
+	dc->may_delay = true;
 	dc->argc = argc;
 
 	ret = delay_class_ctr(ti, &dc->read, argv);
@@ -247,7 +247,7 @@ static int delay_bio(struct delay_c *dc,
 	struct dm_delay_info *delayed;
 	unsigned long expires = 0;
 
-	if (!c->delay || !atomic_read(&dc->may_delay))
+	if (!c->delay)
 		return DM_MAPIO_REMAPPED;
 
 	delayed = dm_per_bio_data(bio, sizeof(struct dm_delay_info));
@@ -256,6 +256,10 @@ static int delay_bio(struct delay_c *dc,
 	delayed->expires = expires = jiffies + msecs_to_jiffies(c->delay);
 
 	mutex_lock(&delayed_bios_lock);
+	if (unlikely(!dc->may_delay)) {
+		mutex_unlock(&delayed_bios_lock);
+		return DM_MAPIO_REMAPPED;
+	}
 	c->ops++;
 	list_add_tail(&delayed->list, &dc->delayed_bios);
 	mutex_unlock(&delayed_bios_lock);
@@ -269,7 +273,10 @@ static void delay_presuspend(struct dm_t
 {
 	struct delay_c *dc = ti->private;
 
-	atomic_set(&dc->may_delay, 0);
+	mutex_lock(&delayed_bios_lock);
+	dc->may_delay = false;
+	mutex_unlock(&delayed_bios_lock);
+
 	del_timer_sync(&dc->delay_timer);
 	flush_bios(flush_delayed_bios(dc, 1));
 }
@@ -278,7 +285,7 @@ static void delay_resume(struct dm_targe
 {
 	struct delay_c *dc = ti->private;
 
-	atomic_set(&dc->may_delay, 1);
+	dc->may_delay = true;
 }
 
 static int delay_map(struct dm_target *ti, struct bio *bio)


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: Patch "dm delay: for short delays, use kthread instead of timers and wq" has been added to the 6.6-stable tree
  2023-11-29 18:16         ` Mikulas Patocka
@ 2023-11-29 18:52           ` Sasha Levin
  2023-11-29 19:02             ` Mikulas Patocka
  0 siblings, 1 reply; 9+ messages in thread
From: Sasha Levin @ 2023-11-29 18:52 UTC (permalink / raw)
  To: Mikulas Patocka
  Cc: Christian Loehle, stable-commits, stable, Alasdair Kergon,
	Mike Snitzer, dm-devel

On Wed, Nov 29, 2023 at 07:16:52PM +0100, Mikulas Patocka wrote:
>
>
>On Wed, 29 Nov 2023, Sasha Levin wrote:
>
>> On Wed, Nov 29, 2023 at 06:28:16PM +0100, Mikulas Patocka wrote:
>> >
>> >
>> >On Wed, 29 Nov 2023, Christian Loehle wrote:
>> >
>> >> Hi Mikulas,
>> >> Agreed and thanks for fixing.
>> >> Has this been selected for stable because of:
>> >> 6fc45b6ed921 ("dm-delay: fix a race between delay_presuspend and
>> >> delay_bio")
>> >> If so, I would volunteer do the backports for that for you at least.
>> >
>> >I wouldn't backport this patch - it is an enhancement, not a bugfix, so it
>> >doesn't qualify for the stable kernel backports.
>>
>> Right - this watch was selected as a dependency for 6fc45b6ed921
>> ("dm-delay: fix a race between delay_presuspend and delay_bio").
>>
>> In general, unless it's impractical, we'd rather take a dependency chain
>> rather than deal with a non-trivial backport as those tend to have
>> issues longer term.
>>
>> --
>> Thanks,
>> Sasha
>
>The patch 70bbeb29fab0 ("dm delay: for short delays, use kthread instead
>of timers and wq") changes behavior of dm-delay from using timers to
>polling, so it may cause problems to people running legacy kernels - the
>polling consumes more CPU time than the timers - so I think it shouldn't
>go to the stable kernels where users expect that there will be no
>functional change.
>
>Here I'm submitting the patch 6fc45b6ed921 backported for 6.6.3.

Is this okay for 6.1 too?

-- 
Thanks,
Sasha

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: Patch "dm delay: for short delays, use kthread instead of timers and wq" has been added to the 6.6-stable tree
  2023-11-29 18:52           ` Sasha Levin
@ 2023-11-29 19:02             ` Mikulas Patocka
  2023-11-29 20:00               ` Sasha Levin
  0 siblings, 1 reply; 9+ messages in thread
From: Mikulas Patocka @ 2023-11-29 19:02 UTC (permalink / raw)
  To: Sasha Levin
  Cc: Christian Loehle, stable-commits, stable, Alasdair Kergon,
	Mike Snitzer, dm-devel



On Wed, 29 Nov 2023, Sasha Levin wrote:

> On Wed, Nov 29, 2023 at 07:16:52PM +0100, Mikulas Patocka wrote:
> >
> >
> >On Wed, 29 Nov 2023, Sasha Levin wrote:
> >
> >> On Wed, Nov 29, 2023 at 06:28:16PM +0100, Mikulas Patocka wrote:
> >> >
> >> >
> >> >On Wed, 29 Nov 2023, Christian Loehle wrote:
> >> >
> >> >> Hi Mikulas,
> >> >> Agreed and thanks for fixing.
> >> >> Has this been selected for stable because of:
> >> >> 6fc45b6ed921 ("dm-delay: fix a race between delay_presuspend and
> >> >> delay_bio")
> >> >> If so, I would volunteer do the backports for that for you at least.
> >> >
> >> >I wouldn't backport this patch - it is an enhancement, not a bugfix, so it
> >> >doesn't qualify for the stable kernel backports.
> >>
> >> Right - this watch was selected as a dependency for 6fc45b6ed921
> >> ("dm-delay: fix a race between delay_presuspend and delay_bio").
> >>
> >> In general, unless it's impractical, we'd rather take a dependency chain
> >> rather than deal with a non-trivial backport as those tend to have
> >> issues longer term.
> >>
> >> --
> >> Thanks,
> >> Sasha
> >
> >The patch 70bbeb29fab0 ("dm delay: for short delays, use kthread instead
> >of timers and wq") changes behavior of dm-delay from using timers to
> >polling, so it may cause problems to people running legacy kernels - the
> >polling consumes more CPU time than the timers - so I think it shouldn't
> >go to the stable kernels where users expect that there will be no
> >functional change.
> >
> >Here I'm submitting the patch 6fc45b6ed921 backported for 6.6.3.
> 
> Is this okay for 6.1 too?

Yes, it is. It applies to kernels as old as 4.19.

Mikulas


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: Patch "dm delay: for short delays, use kthread instead of timers and wq" has been added to the 6.6-stable tree
  2023-11-29 19:02             ` Mikulas Patocka
@ 2023-11-29 20:00               ` Sasha Levin
  0 siblings, 0 replies; 9+ messages in thread
From: Sasha Levin @ 2023-11-29 20:00 UTC (permalink / raw)
  To: Mikulas Patocka
  Cc: Christian Loehle, stable-commits, stable, Alasdair Kergon,
	Mike Snitzer, dm-devel

On Wed, Nov 29, 2023 at 08:02:23PM +0100, Mikulas Patocka wrote:
>
>
>On Wed, 29 Nov 2023, Sasha Levin wrote:
>
>> On Wed, Nov 29, 2023 at 07:16:52PM +0100, Mikulas Patocka wrote:
>> >
>> >
>> >On Wed, 29 Nov 2023, Sasha Levin wrote:
>> >
>> >> On Wed, Nov 29, 2023 at 06:28:16PM +0100, Mikulas Patocka wrote:
>> >> >
>> >> >
>> >> >On Wed, 29 Nov 2023, Christian Loehle wrote:
>> >> >
>> >> >> Hi Mikulas,
>> >> >> Agreed and thanks for fixing.
>> >> >> Has this been selected for stable because of:
>> >> >> 6fc45b6ed921 ("dm-delay: fix a race between delay_presuspend and
>> >> >> delay_bio")
>> >> >> If so, I would volunteer do the backports for that for you at least.
>> >> >
>> >> >I wouldn't backport this patch - it is an enhancement, not a bugfix, so it
>> >> >doesn't qualify for the stable kernel backports.
>> >>
>> >> Right - this watch was selected as a dependency for 6fc45b6ed921
>> >> ("dm-delay: fix a race between delay_presuspend and delay_bio").
>> >>
>> >> In general, unless it's impractical, we'd rather take a dependency chain
>> >> rather than deal with a non-trivial backport as those tend to have
>> >> issues longer term.
>> >>
>> >> --
>> >> Thanks,
>> >> Sasha
>> >
>> >The patch 70bbeb29fab0 ("dm delay: for short delays, use kthread instead
>> >of timers and wq") changes behavior of dm-delay from using timers to
>> >polling, so it may cause problems to people running legacy kernels - the
>> >polling consumes more CPU time than the timers - so I think it shouldn't
>> >go to the stable kernels where users expect that there will be no
>> >functional change.
>> >
>> >Here I'm submitting the patch 6fc45b6ed921 backported for 6.6.3.
>>
>> Is this okay for 6.1 too?
>
>Yes, it is. It applies to kernels as old as 4.19.

Great, applied all the way back to 4.19. Thanks!

-- 
Thanks,
Sasha

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2023-11-29 20:00 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-11-29  2:54 Patch "dm delay: for short delays, use kthread instead of timers and wq" has been added to the 6.6-stable tree Sasha Levin
2023-11-29  8:28 ` Mikulas Patocka
2023-11-29 10:02   ` Christian Loehle
2023-11-29 17:28     ` Mikulas Patocka
2023-11-29 17:38       ` Sasha Levin
2023-11-29 18:16         ` Mikulas Patocka
2023-11-29 18:52           ` Sasha Levin
2023-11-29 19:02             ` Mikulas Patocka
2023-11-29 20:00               ` Sasha Levin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.