linux-block.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH -next 0/8] iocost bugfix
@ 2022-11-28 15:44 Li Nan
  2022-11-28 15:44 ` [PATCH -next 1/8] blk-iocost: cleanup ioc_qos_write() and ioc_cost_model_write() Li Nan
                   ` (7 more replies)
  0 siblings, 8 replies; 13+ messages in thread
From: Li Nan @ 2022-11-28 15:44 UTC (permalink / raw)
  To: tj, josef, axboe
  Cc: cgroups, linux-block, linux-kernel, linan122, yukuai3, yi.zhang

This patchset fixes problem in iocost. It contains Yu Kuai's patchset he
sent before because of conflicts.

Li Nan (4):
  blk-iocost: fix divide by 0 error in calc_lcoefs()
  blk-iocost: change div64_u64 to DIV64_U64_ROUND_UP in
    ioc_refresh_params()
  blk-iocost: fix possible UAF in ioc_pd_free
  block: fix null-pointer dereference in ioc_pd_init

Yu Kuai (4):
  blk-iocost: cleanup ioc_qos_write() and ioc_cost_model_write()
  blk-iocost: improve hanlder of match_u64()
  blk-iocost: don't allow to configure bio based device
  blk-iocost: read params inside lock in sysfs apis

 block/blk-iocost.c | 114 ++++++++++++++++++++++++++++-----------------
 block/genhd.c      |   2 +-
 2 files changed, 73 insertions(+), 43 deletions(-)

-- 
2.31.1


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH -next 1/8] blk-iocost: cleanup ioc_qos_write() and ioc_cost_model_write()
  2022-11-28 15:44 [PATCH -next 0/8] iocost bugfix Li Nan
@ 2022-11-28 15:44 ` Li Nan
  2022-11-28 15:44 ` [PATCH -next 2/8] blk-iocost: improve hanlder of match_u64() Li Nan
                   ` (6 subsequent siblings)
  7 siblings, 0 replies; 13+ messages in thread
From: Li Nan @ 2022-11-28 15:44 UTC (permalink / raw)
  To: tj, josef, axboe
  Cc: cgroups, linux-block, linux-kernel, linan122, yukuai3, yi.zhang

From: Yu Kuai <yukuai3@huawei.com>

There are no functional changes, just to make the code a litter cleaner
and follow up patches easier.

Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Li Nan <linan122@huawei.com>
---
 block/blk-iocost.c | 62 +++++++++++++++++++---------------------------
 1 file changed, 25 insertions(+), 37 deletions(-)

diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index f01359906c83..fd495e823db2 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -3185,7 +3185,7 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
 	if (!ioc) {
 		ret = blk_iocost_init(disk);
 		if (ret)
-			goto err;
+			goto out;
 		ioc = q_to_ioc(disk->queue);
 	}
 
@@ -3197,6 +3197,7 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
 	enable = ioc->enabled;
 	user = ioc->user_qos_params;
 
+	ret = -EINVAL;
 	while ((p = strsep(&input, " \t\n"))) {
 		substring_t args[MAX_OPT_ARGS];
 		char buf[32];
@@ -3218,7 +3219,7 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
 			else if (!strcmp(buf, "user"))
 				user = true;
 			else
-				goto einval;
+				goto out_unlock;
 			continue;
 		}
 
@@ -3228,39 +3229,39 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
 		case QOS_WPPM:
 			if (match_strlcpy(buf, &args[0], sizeof(buf)) >=
 			    sizeof(buf))
-				goto einval;
+				goto out_unlock;
 			if (cgroup_parse_float(buf, 2, &v))
-				goto einval;
+				goto out_unlock;
 			if (v < 0 || v > 10000)
-				goto einval;
+				goto out_unlock;
 			qos[tok] = v * 100;
 			break;
 		case QOS_RLAT:
 		case QOS_WLAT:
 			if (match_u64(&args[0], &v))
-				goto einval;
+				goto out_unlock;
 			qos[tok] = v;
 			break;
 		case QOS_MIN:
 		case QOS_MAX:
 			if (match_strlcpy(buf, &args[0], sizeof(buf)) >=
 			    sizeof(buf))
-				goto einval;
+				goto out_unlock;
 			if (cgroup_parse_float(buf, 2, &v))
-				goto einval;
+				goto out_unlock;
 			if (v < 0)
-				goto einval;
+				goto out_unlock;
 			qos[tok] = clamp_t(s64, v * 100,
 					   VRATE_MIN_PPM, VRATE_MAX_PPM);
 			break;
 		default:
-			goto einval;
+			goto out_unlock;
 		}
 		user = true;
 	}
 
 	if (qos[QOS_MIN] > qos[QOS_MAX])
-		goto einval;
+		goto out_unlock;
 
 	if (enable) {
 		blk_stat_enable_accounting(disk->queue);
@@ -3281,21 +3282,14 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
 	}
 
 	ioc_refresh_params(ioc, true);
-	spin_unlock_irq(&ioc->lock);
+	ret = nbytes;
 
-	blk_mq_unquiesce_queue(disk->queue);
-	blk_mq_unfreeze_queue(disk->queue);
-
-	blkdev_put_no_open(bdev);
-	return nbytes;
-einval:
+out_unlock:
 	spin_unlock_irq(&ioc->lock);
-
 	blk_mq_unquiesce_queue(disk->queue);
 	blk_mq_unfreeze_queue(disk->queue);
 
-	ret = -EINVAL;
-err:
+out:
 	blkdev_put_no_open(bdev);
 	return ret;
 }
@@ -3364,7 +3358,7 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,
 	if (!ioc) {
 		ret = blk_iocost_init(bdev->bd_disk);
 		if (ret)
-			goto err;
+			goto out;
 		ioc = q_to_ioc(q);
 	}
 
@@ -3375,6 +3369,7 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,
 	memcpy(u, ioc->params.i_lcoefs, sizeof(u));
 	user = ioc->user_cost_model;
 
+	ret = -EINVAL;
 	while ((p = strsep(&input, " \t\n"))) {
 		substring_t args[MAX_OPT_ARGS];
 		char buf[32];
@@ -3392,20 +3387,20 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,
 			else if (!strcmp(buf, "user"))
 				user = true;
 			else
-				goto einval;
+				goto out_unlock;
 			continue;
 		case COST_MODEL:
 			match_strlcpy(buf, &args[0], sizeof(buf));
 			if (strcmp(buf, "linear"))
-				goto einval;
+				goto out_unlock;
 			continue;
 		}
 
 		tok = match_token(p, i_lcoef_tokens, args);
 		if (tok == NR_I_LCOEFS)
-			goto einval;
+			goto out_unlock;
 		if (match_u64(&args[0], &v))
-			goto einval;
+			goto out_unlock;
 		u[tok] = v;
 		user = true;
 	}
@@ -3416,23 +3411,16 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,
 	} else {
 		ioc->user_cost_model = false;
 	}
-	ioc_refresh_params(ioc, true);
-	spin_unlock_irq(&ioc->lock);
 
-	blk_mq_unquiesce_queue(q);
-	blk_mq_unfreeze_queue(q);
-
-	blkdev_put_no_open(bdev);
-	return nbytes;
+	ioc_refresh_params(ioc, true);
+	ret = nbytes;
 
-einval:
+out_unlock:
 	spin_unlock_irq(&ioc->lock);
-
 	blk_mq_unquiesce_queue(q);
 	blk_mq_unfreeze_queue(q);
 
-	ret = -EINVAL;
-err:
+out:
 	blkdev_put_no_open(bdev);
 	return ret;
 }
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH -next 2/8] blk-iocost: improve hanlder of match_u64()
  2022-11-28 15:44 [PATCH -next 0/8] iocost bugfix Li Nan
  2022-11-28 15:44 ` [PATCH -next 1/8] blk-iocost: cleanup ioc_qos_write() and ioc_cost_model_write() Li Nan
@ 2022-11-28 15:44 ` Li Nan
  2022-11-28 15:44 ` [PATCH -next 3/8] blk-iocost: don't allow to configure bio based device Li Nan
                   ` (5 subsequent siblings)
  7 siblings, 0 replies; 13+ messages in thread
From: Li Nan @ 2022-11-28 15:44 UTC (permalink / raw)
  To: tj, josef, axboe
  Cc: cgroups, linux-block, linux-kernel, linan122, yukuai3, yi.zhang

From: Yu Kuai <yukuai3@huawei.com>

1) There are one place that return value of match_u64() is not checked.
2) If match_u64() failed, return value is set to -EINVAL despite that
   there are other possible errnos.

Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Li Nan <linan122@huawei.com>
---
 block/blk-iocost.c | 22 +++++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index fd495e823db2..c532129a1456 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -3202,6 +3202,7 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
 		substring_t args[MAX_OPT_ARGS];
 		char buf[32];
 		int tok;
+		int err;
 		s64 v;
 
 		if (!*p)
@@ -3209,7 +3210,12 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
 
 		switch (match_token(p, qos_ctrl_tokens, args)) {
 		case QOS_ENABLE:
-			match_u64(&args[0], &v);
+			err = match_u64(&args[0], &v);
+			if (err) {
+				ret = err;
+				goto out_unlock;
+			}
+
 			enable = v;
 			continue;
 		case QOS_CTRL:
@@ -3238,8 +3244,12 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
 			break;
 		case QOS_RLAT:
 		case QOS_WLAT:
-			if (match_u64(&args[0], &v))
+			err = match_u64(&args[0], &v);
+			if (err) {
+				ret = err;
 				goto out_unlock;
+			}
+
 			qos[tok] = v;
 			break;
 		case QOS_MIN:
@@ -3374,6 +3384,7 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,
 		substring_t args[MAX_OPT_ARGS];
 		char buf[32];
 		int tok;
+		int err;
 		u64 v;
 
 		if (!*p)
@@ -3399,8 +3410,13 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,
 		tok = match_token(p, i_lcoef_tokens, args);
 		if (tok == NR_I_LCOEFS)
 			goto out_unlock;
-		if (match_u64(&args[0], &v))
+
+		err = match_u64(&args[0], &v);
+		if (err) {
+			ret = err;
 			goto out_unlock;
+		}
+
 		u[tok] = v;
 		user = true;
 	}
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH -next 3/8] blk-iocost: don't allow to configure bio based device
  2022-11-28 15:44 [PATCH -next 0/8] iocost bugfix Li Nan
  2022-11-28 15:44 ` [PATCH -next 1/8] blk-iocost: cleanup ioc_qos_write() and ioc_cost_model_write() Li Nan
  2022-11-28 15:44 ` [PATCH -next 2/8] blk-iocost: improve hanlder of match_u64() Li Nan
@ 2022-11-28 15:44 ` Li Nan
  2022-11-28 15:44 ` [PATCH -next 4/8] blk-iocost: read params inside lock in sysfs apis Li Nan
                   ` (4 subsequent siblings)
  7 siblings, 0 replies; 13+ messages in thread
From: Li Nan @ 2022-11-28 15:44 UTC (permalink / raw)
  To: tj, josef, axboe
  Cc: cgroups, linux-block, linux-kernel, linan122, yukuai3, yi.zhang

From: Yu Kuai <yukuai3@huawei.com>

iocost is based on rq_qos, which can only work for request based device,
thus it doesn't make sense to configure iocost for bio based device.

Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Li Nan <linan122@huawei.com>
---
 block/blk-iocost.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index c532129a1456..2bfecc511dd9 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -3181,6 +3181,11 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
 		return PTR_ERR(bdev);
 
 	disk = bdev->bd_disk;
+	if (!queue_is_mq(disk->queue)) {
+		ret = -EPERM;
+		goto out;
+	}
+
 	ioc = q_to_ioc(disk->queue);
 	if (!ioc) {
 		ret = blk_iocost_init(disk);
@@ -3364,6 +3369,11 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,
 		return PTR_ERR(bdev);
 
 	q = bdev_get_queue(bdev);
+	if (!queue_is_mq(q)) {
+		ret = -EPERM;
+		goto out;
+	}
+
 	ioc = q_to_ioc(q);
 	if (!ioc) {
 		ret = blk_iocost_init(bdev->bd_disk);
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH -next 4/8] blk-iocost: read params inside lock in sysfs apis
  2022-11-28 15:44 [PATCH -next 0/8] iocost bugfix Li Nan
                   ` (2 preceding siblings ...)
  2022-11-28 15:44 ` [PATCH -next 3/8] blk-iocost: don't allow to configure bio based device Li Nan
@ 2022-11-28 15:44 ` Li Nan
  2022-11-28 15:44 ` [PATCH -next 5/8] blk-iocost: fix divide by 0 error in calc_lcoefs() Li Nan
                   ` (3 subsequent siblings)
  7 siblings, 0 replies; 13+ messages in thread
From: Li Nan @ 2022-11-28 15:44 UTC (permalink / raw)
  To: tj, josef, axboe
  Cc: cgroups, linux-block, linux-kernel, linan122, yukuai3, yi.zhang

From: Yu Kuai <yukuai3@huawei.com>

Otherwise, user might get abnormal values if params is updated
concurrently.

Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Signed-off-by: Li Nan <linan122@huawei.com>
---
 block/blk-iocost.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index 2bfecc511dd9..a645184aba4a 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -3125,6 +3125,7 @@ static u64 ioc_qos_prfill(struct seq_file *sf, struct blkg_policy_data *pd,
 	if (!dname)
 		return 0;
 
+	spin_lock_irq(&ioc->lock);
 	seq_printf(sf, "%s enable=%d ctrl=%s rpct=%u.%02u rlat=%u wpct=%u.%02u wlat=%u min=%u.%02u max=%u.%02u\n",
 		   dname, ioc->enabled, ioc->user_qos_params ? "user" : "auto",
 		   ioc->params.qos[QOS_RPPM] / 10000,
@@ -3137,6 +3138,7 @@ static u64 ioc_qos_prfill(struct seq_file *sf, struct blkg_policy_data *pd,
 		   ioc->params.qos[QOS_MIN] % 10000 / 100,
 		   ioc->params.qos[QOS_MAX] / 10000,
 		   ioc->params.qos[QOS_MAX] % 10000 / 100);
+	spin_unlock_irq(&ioc->lock);
 	return 0;
 }
 
@@ -3319,12 +3321,14 @@ static u64 ioc_cost_model_prfill(struct seq_file *sf,
 	if (!dname)
 		return 0;
 
+	spin_lock_irq(&ioc->lock);
 	seq_printf(sf, "%s ctrl=%s model=linear "
 		   "rbps=%llu rseqiops=%llu rrandiops=%llu "
 		   "wbps=%llu wseqiops=%llu wrandiops=%llu\n",
 		   dname, ioc->user_cost_model ? "user" : "auto",
 		   u[I_LCOEF_RBPS], u[I_LCOEF_RSEQIOPS], u[I_LCOEF_RRANDIOPS],
 		   u[I_LCOEF_WBPS], u[I_LCOEF_WSEQIOPS], u[I_LCOEF_WRANDIOPS]);
+	spin_unlock_irq(&ioc->lock);
 	return 0;
 }
 
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH -next 5/8] blk-iocost: fix divide by 0 error in calc_lcoefs()
  2022-11-28 15:44 [PATCH -next 0/8] iocost bugfix Li Nan
                   ` (3 preceding siblings ...)
  2022-11-28 15:44 ` [PATCH -next 4/8] blk-iocost: read params inside lock in sysfs apis Li Nan
@ 2022-11-28 15:44 ` Li Nan
  2022-11-28 15:44 ` [PATCH -next 6/8] blk-iocost: change div64_u64 to DIV64_U64_ROUND_UP in ioc_refresh_params() Li Nan
                   ` (2 subsequent siblings)
  7 siblings, 0 replies; 13+ messages in thread
From: Li Nan @ 2022-11-28 15:44 UTC (permalink / raw)
  To: tj, josef, axboe
  Cc: cgroups, linux-block, linux-kernel, linan122, yukuai3, yi.zhang

echo max of u64 to cost.model can cause divide by 0 error.

  # echo 8:0 rbps=18446744073709551615 > /sys/fs/cgroup/io.cost.model

  divide error: 0000 [#1] PREEMPT SMP
  RIP: 0010:calc_lcoefs+0x4c/0xc0
  Call Trace:
   <TASK>
   ioc_refresh_params+0x2b3/0x4f0
   ioc_cost_model_write+0x3cb/0x4c0
   ? _copy_from_iter+0x6d/0x6c0
   ? kernfs_fop_write_iter+0xfc/0x270
   cgroup_file_write+0xa0/0x200
   kernfs_fop_write_iter+0x17d/0x270
   vfs_write+0x414/0x620
   ksys_write+0x73/0x160
   __x64_sys_write+0x1e/0x30
   do_syscall_64+0x35/0x80
   entry_SYSCALL_64_after_hwframe+0x63/0xcd

calc_lcoefs() uses the input value of cost.model in DIV_ROUND_UP_ULL,
overflow would happen if bps plus IOC_PAGE_SIZE is greater than
ULLONG_MAX, it can cause divide by 0 error.I_LCOEF_MAX is introduced to
prevent it.

Signed-off-by: Li Nan <linan122@huawei.com>
---
 block/blk-iocost.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index a645184aba4a..a4fa17ebb2fa 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -306,6 +306,9 @@ enum {
 	IOC_PAGE_SIZE		= 1 << IOC_PAGE_SHIFT,
 	IOC_SECT_TO_PAGE_SHIFT	= IOC_PAGE_SHIFT - SECTOR_SHIFT,
 
+	/* avoid overflow */
+	I_LCOEF_MAX		= ULLONG_MAX - IOC_PAGE_SIZE,
+
 	/* if apart further than 16M, consider randio for linear model */
 	LCOEF_RANDIO_PAGES	= 4096,
 };
@@ -3431,6 +3434,8 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,
 			goto out_unlock;
 		}
 
+		if (v > I_LCOEF_MAX)
+			goto out_unlock;
 		u[tok] = v;
 		user = true;
 	}
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH -next 6/8] blk-iocost: change div64_u64 to DIV64_U64_ROUND_UP in ioc_refresh_params()
  2022-11-28 15:44 [PATCH -next 0/8] iocost bugfix Li Nan
                   ` (4 preceding siblings ...)
  2022-11-28 15:44 ` [PATCH -next 5/8] blk-iocost: fix divide by 0 error in calc_lcoefs() Li Nan
@ 2022-11-28 15:44 ` Li Nan
  2022-11-28 15:44 ` [PATCH -next 7/8] blk-iocost: fix possible UAF in ioc_pd_free Li Nan
  2022-11-28 15:44 ` [PATCH -next 8/8] block: fix null-pointer dereference in ioc_pd_init Li Nan
  7 siblings, 0 replies; 13+ messages in thread
From: Li Nan @ 2022-11-28 15:44 UTC (permalink / raw)
  To: tj, josef, axboe
  Cc: cgroups, linux-block, linux-kernel, linan122, yukuai3, yi.zhang

vrate_min is calculated by DIV64_U64_ROUND_UP, but vrate_max is calculated
by div64_u64. Vrate_min may be 1 greater than vrate_max if the input
values min and max of cost.qos are equal.

Signed-off-by: Li Nan <linan122@huawei.com>
---
 block/blk-iocost.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index a4fa17ebb2fa..03977385449f 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -926,7 +926,7 @@ static bool ioc_refresh_params(struct ioc *ioc, bool force)
 
 	ioc->vrate_min = DIV64_U64_ROUND_UP((u64)ioc->params.qos[QOS_MIN] *
 					    VTIME_PER_USEC, MILLION);
-	ioc->vrate_max = div64_u64((u64)ioc->params.qos[QOS_MAX] *
+	ioc->vrate_max = DIV64_U64_ROUND_UP((u64)ioc->params.qos[QOS_MAX] *
 				   VTIME_PER_USEC, MILLION);
 
 	return true;
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH -next 7/8] blk-iocost: fix possible UAF in ioc_pd_free
  2022-11-28 15:44 [PATCH -next 0/8] iocost bugfix Li Nan
                   ` (5 preceding siblings ...)
  2022-11-28 15:44 ` [PATCH -next 6/8] blk-iocost: change div64_u64 to DIV64_U64_ROUND_UP in ioc_refresh_params() Li Nan
@ 2022-11-28 15:44 ` Li Nan
  2022-11-29 12:18   ` Yu Kuai
  2022-11-28 15:44 ` [PATCH -next 8/8] block: fix null-pointer dereference in ioc_pd_init Li Nan
  7 siblings, 1 reply; 13+ messages in thread
From: Li Nan @ 2022-11-28 15:44 UTC (permalink / raw)
  To: tj, josef, axboe
  Cc: cgroups, linux-block, linux-kernel, linan122, yukuai3, yi.zhang

Our test found the following problem in kernel 5.10, and the same problem
should exist in mainline:

  BUG: KASAN: use-after-free in _raw_spin_lock_irqsave+0x71/0xe0
  Write of size 4 at addr ffff8881432000e0 by task swapper/4/0
  ...
  Call Trace:
   <IRQ>
   dump_stack+0x9c/0xd3
   print_address_description.constprop.0+0x19/0x170
   __kasan_report.cold+0x6c/0x84
   kasan_report+0x3a/0x50
   check_memory_region+0xfd/0x1f0
   _raw_spin_lock_irqsave+0x71/0xe0
   ioc_pd_free+0x9d/0x250
   blkg_free.part.0+0x80/0x100
   __blkg_release+0xf3/0x1c0
   rcu_do_batch+0x292/0x700
   rcu_core+0x270/0x2d0
   __do_softirq+0xfd/0x402
    </IRQ>
   asm_call_irq_on_stack+0x12/0x20
   do_softirq_own_stack+0x37/0x50
   irq_exit_rcu+0x134/0x1a0
   sysvec_apic_timer_interrupt+0x36/0x80
   asm_sysvec_apic_timer_interrupt+0x12/0x20

   Freed by task 57:
   kfree+0xba/0x680
   rq_qos_exit+0x5a/0x80
   blk_cleanup_queue+0xce/0x1a0
   virtblk_remove+0x77/0x130 [virtio_blk]
   virtio_dev_remove+0x56/0xe0
   __device_release_driver+0x2ba/0x450
   device_release_driver+0x29/0x40
   bus_remove_device+0x1d8/0x2c0
   device_del+0x333/0x7e0
   device_unregister+0x27/0x90
   unregister_virtio_device+0x22/0x40
   virtio_pci_remove+0x53/0xb0
   pci_device_remove+0x7a/0x130
   __device_release_driver+0x2ba/0x450
   device_release_driver+0x29/0x40
   pci_stop_bus_device+0xcf/0x100
   pci_stop_and_remove_bus_device+0x16/0x20
   disable_slot+0xa1/0x110
   acpiphp_disable_and_eject_slot+0x35/0xe0
   hotplug_event+0x1b8/0x3c0
   acpiphp_hotplug_notify+0x37/0x70
   acpi_device_hotplug+0xee/0x320
   acpi_hotplug_work_fn+0x69/0x80
   process_one_work+0x3c5/0x730
   worker_thread+0x93/0x650
   kthread+0x1ba/0x210
   ret_from_fork+0x22/0x30

It happened as follow:

	T1		    T2			T3
  //rmdir cgroup
  blkcg_destroy_blkgs
   blkg_destroy
    percpu_ref_kill
     blkg_release
      call_rcu
			//delete device
			del_gendisk
			 rq_qos_exit
			  ioc_rqos_exit
			   kfree(ioc)
					   __blkg_release
					    blkg_free
					     blkg_free_workfn
					      pd_free_fn
					       ioc_pd_free
						spin_lock_irqsave
						 ->ioc is freed

Fix the problem by moving the operation on ioc in ioc_pd_free() to
ioc_pd_offline(), and just free resource in ioc_pd_free() like iolatency
and throttle.

Signed-off-by: Li Nan <linan122@huawei.com>
---
 block/blk-iocost.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index 03977385449f..1b855babfc35 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -2978,7 +2978,7 @@ static void ioc_pd_init(struct blkg_policy_data *pd)
 	spin_unlock_irqrestore(&ioc->lock, flags);
 }
 
-static void ioc_pd_free(struct blkg_policy_data *pd)
+static void ioc_pd_offline(struct blkg_policy_data *pd)
 {
 	struct ioc_gq *iocg = pd_to_iocg(pd);
 	struct ioc *ioc = iocg->ioc;
@@ -3002,6 +3002,12 @@ static void ioc_pd_free(struct blkg_policy_data *pd)
 
 		hrtimer_cancel(&iocg->waitq_timer);
 	}
+}
+
+static void ioc_pd_free(struct blkg_policy_data *pd)
+{
+	struct ioc_gq *iocg = pd_to_iocg(pd);
+
 	free_percpu(iocg->pcpu_stat);
 	kfree(iocg);
 }
@@ -3488,6 +3494,7 @@ static struct blkcg_policy blkcg_policy_iocost = {
 	.cpd_free_fn	= ioc_cpd_free,
 	.pd_alloc_fn	= ioc_pd_alloc,
 	.pd_init_fn	= ioc_pd_init,
+	.pd_offline_fn	= ioc_pd_offline,
 	.pd_free_fn	= ioc_pd_free,
 	.pd_stat_fn	= ioc_pd_stat,
 };
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH -next 8/8] block: fix null-pointer dereference in ioc_pd_init
  2022-11-28 15:44 [PATCH -next 0/8] iocost bugfix Li Nan
                   ` (6 preceding siblings ...)
  2022-11-28 15:44 ` [PATCH -next 7/8] blk-iocost: fix possible UAF in ioc_pd_free Li Nan
@ 2022-11-28 15:44 ` Li Nan
  2022-11-29 14:25   ` Christoph Hellwig
  7 siblings, 1 reply; 13+ messages in thread
From: Li Nan @ 2022-11-28 15:44 UTC (permalink / raw)
  To: tj, josef, axboe
  Cc: cgroups, linux-block, linux-kernel, linan122, yukuai3, yi.zhang

Remove block device when iocost is initializing may cause
null-pointer dereference:

	CPU1				   CPU2
  ioc_qos_write
   blkcg_conf_open_bdev
    blkdev_get_no_open
     kobject_get_unless_zero
    blk_iocost_init
     rq_qos_add
  					del_gendisk
  					 rq_qos_exit
  					  q->rq_qos = rqos->next
  					   //iocost is removed from q->roqs
      blkcg_activate_policy
       pd_init_fn
        ioc_pd_init
  	 ioc = q_to_ioc(blkg->q)
 	  //cant find iocost and return null

Fix problem by moving rq_qos_exit() to disk_release(). ioc_qos_write() get
bd_device.kobj in blkcg_conf_open_bdev(), so disk_release will not be
actived until iocost initialization is complited.

Signed-off-by: Li Nan <linan122@huawei.com>
---
 block/genhd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/genhd.c b/block/genhd.c
index dcf200bcbd3e..c264da49eaaa 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -656,7 +656,6 @@ void del_gendisk(struct gendisk *disk)
 		elevator_exit(q);
 		mutex_unlock(&q->sysfs_lock);
 	}
-	rq_qos_exit(q);
 	blk_mq_unquiesce_queue(q);
 
 	/*
@@ -1168,6 +1167,7 @@ static void disk_release(struct device *dev)
 	    !test_bit(GD_ADDED, &disk->state))
 		blk_mq_exit_queue(disk->queue);
 
+	rq_qos_exit(q);
 	blkcg_exit_disk(disk);
 
 	bioset_exit(&disk->bio_split);
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH -next 7/8] blk-iocost: fix possible UAF in ioc_pd_free
  2022-11-28 15:44 ` [PATCH -next 7/8] blk-iocost: fix possible UAF in ioc_pd_free Li Nan
@ 2022-11-29 12:18   ` Yu Kuai
  0 siblings, 0 replies; 13+ messages in thread
From: Yu Kuai @ 2022-11-29 12:18 UTC (permalink / raw)
  To: Li Nan, tj, josef, axboe
  Cc: cgroups, linux-block, linux-kernel, yi.zhang, yukuai (C)



在 2022/11/28 23:44, Li Nan 写道:
> Our test found the following problem in kernel 5.10, and the same problem
> should exist in mainline:
> 
>    BUG: KASAN: use-after-free in _raw_spin_lock_irqsave+0x71/0xe0
>    Write of size 4 at addr ffff8881432000e0 by task swapper/4/0
>    ...
>    Call Trace:
>     <IRQ>
>     dump_stack+0x9c/0xd3
>     print_address_description.constprop.0+0x19/0x170
>     __kasan_report.cold+0x6c/0x84
>     kasan_report+0x3a/0x50
>     check_memory_region+0xfd/0x1f0
>     _raw_spin_lock_irqsave+0x71/0xe0
>     ioc_pd_free+0x9d/0x250
>     blkg_free.part.0+0x80/0x100
>     __blkg_release+0xf3/0x1c0
>     rcu_do_batch+0x292/0x700
>     rcu_core+0x270/0x2d0
>     __do_softirq+0xfd/0x402
>      </IRQ>
>     asm_call_irq_on_stack+0x12/0x20
>     do_softirq_own_stack+0x37/0x50
>     irq_exit_rcu+0x134/0x1a0
>     sysvec_apic_timer_interrupt+0x36/0x80
>     asm_sysvec_apic_timer_interrupt+0x12/0x20
> 
>     Freed by task 57:
>     kfree+0xba/0x680
>     rq_qos_exit+0x5a/0x80
>     blk_cleanup_queue+0xce/0x1a0
>     virtblk_remove+0x77/0x130 [virtio_blk]
>     virtio_dev_remove+0x56/0xe0
>     __device_release_driver+0x2ba/0x450
>     device_release_driver+0x29/0x40
>     bus_remove_device+0x1d8/0x2c0
>     device_del+0x333/0x7e0
>     device_unregister+0x27/0x90
>     unregister_virtio_device+0x22/0x40
>     virtio_pci_remove+0x53/0xb0
>     pci_device_remove+0x7a/0x130
>     __device_release_driver+0x2ba/0x450
>     device_release_driver+0x29/0x40
>     pci_stop_bus_device+0xcf/0x100
>     pci_stop_and_remove_bus_device+0x16/0x20
>     disable_slot+0xa1/0x110
>     acpiphp_disable_and_eject_slot+0x35/0xe0
>     hotplug_event+0x1b8/0x3c0
>     acpiphp_hotplug_notify+0x37/0x70
>     acpi_device_hotplug+0xee/0x320
>     acpi_hotplug_work_fn+0x69/0x80
>     process_one_work+0x3c5/0x730
>     worker_thread+0x93/0x650
>     kthread+0x1ba/0x210
>     ret_from_fork+0x22/0x30
> 
> It happened as follow:
> 
> 	T1		    T2			T3
>    //rmdir cgroup
>    blkcg_destroy_blkgs
>     blkg_destroy
>      percpu_ref_kill
>       blkg_release
>        call_rcu
> 			//delete device
> 			del_gendisk

del_gendisk will synchronize_rcu, hence this is wrong.

call_rcu from blkcg_destroy_blkgs should be called after
synchronize_rcu.

Thanks,
Kuai
> 			 rq_qos_exit
> 			  ioc_rqos_exit
> 			   kfree(ioc)
> 					   __blkg_release
> 					    blkg_free
> 					     blkg_free_workfn
> 					      pd_free_fn
> 					       ioc_pd_free
> 						spin_lock_irqsave
> 						 ->ioc is freed
> 
> Fix the problem by moving the operation on ioc in ioc_pd_free() to
> ioc_pd_offline(), and just free resource in ioc_pd_free() like iolatency
> and throttle.
> 
> Signed-off-by: Li Nan <linan122@huawei.com>
> ---
>   block/blk-iocost.c | 9 ++++++++-
>   1 file changed, 8 insertions(+), 1 deletion(-)
> 
> diff --git a/block/blk-iocost.c b/block/blk-iocost.c
> index 03977385449f..1b855babfc35 100644
> --- a/block/blk-iocost.c
> +++ b/block/blk-iocost.c
> @@ -2978,7 +2978,7 @@ static void ioc_pd_init(struct blkg_policy_data *pd)
>   	spin_unlock_irqrestore(&ioc->lock, flags);
>   }
>   
> -static void ioc_pd_free(struct blkg_policy_data *pd)
> +static void ioc_pd_offline(struct blkg_policy_data *pd)
>   {
>   	struct ioc_gq *iocg = pd_to_iocg(pd);
>   	struct ioc *ioc = iocg->ioc;
> @@ -3002,6 +3002,12 @@ static void ioc_pd_free(struct blkg_policy_data *pd)
>   
>   		hrtimer_cancel(&iocg->waitq_timer);
>   	}
> +}
> +
> +static void ioc_pd_free(struct blkg_policy_data *pd)
> +{
> +	struct ioc_gq *iocg = pd_to_iocg(pd);
> +
>   	free_percpu(iocg->pcpu_stat);
>   	kfree(iocg);
>   }
> @@ -3488,6 +3494,7 @@ static struct blkcg_policy blkcg_policy_iocost = {
>   	.cpd_free_fn	= ioc_cpd_free,
>   	.pd_alloc_fn	= ioc_pd_alloc,
>   	.pd_init_fn	= ioc_pd_init,
> +	.pd_offline_fn	= ioc_pd_offline,
>   	.pd_free_fn	= ioc_pd_free,
>   	.pd_stat_fn	= ioc_pd_stat,
>   };
> 


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH -next 8/8] block: fix null-pointer dereference in ioc_pd_init
  2022-11-28 15:44 ` [PATCH -next 8/8] block: fix null-pointer dereference in ioc_pd_init Li Nan
@ 2022-11-29 14:25   ` Christoph Hellwig
  2022-11-30  1:32     ` Yu Kuai
  0 siblings, 1 reply; 13+ messages in thread
From: Christoph Hellwig @ 2022-11-29 14:25 UTC (permalink / raw)
  To: Li Nan
  Cc: tj, josef, axboe, cgroups, linux-block, linux-kernel, yukuai3,
	yi.zhang

On Mon, Nov 28, 2022 at 11:44:34PM +0800, Li Nan wrote:
> Fix problem by moving rq_qos_exit() to disk_release().

No, that now means it is removed to later.  You need to add proper
synchronization.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH -next 8/8] block: fix null-pointer dereference in ioc_pd_init
  2022-11-29 14:25   ` Christoph Hellwig
@ 2022-11-30  1:32     ` Yu Kuai
  2022-11-30 15:59       ` Christoph Hellwig
  0 siblings, 1 reply; 13+ messages in thread
From: Yu Kuai @ 2022-11-30  1:32 UTC (permalink / raw)
  To: Christoph Hellwig, Li Nan
  Cc: tj, josef, axboe, cgroups, linux-block, linux-kernel, yi.zhang,
	yukuai (C)

Hi,

在 2022/11/29 22:25, Christoph Hellwig 写道:
> On Mon, Nov 28, 2022 at 11:44:34PM +0800, Li Nan wrote:
>> Fix problem by moving rq_qos_exit() to disk_release().
> 
> No, that now means it is removed to later.  You need to add proper
> synchronization.
> .
> 

Can you explain a bit more? Maybe I'm being noob, here disk is about to
be freed, and I can think of any contention.

Thanks,
Kuai


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH -next 8/8] block: fix null-pointer dereference in ioc_pd_init
  2022-11-30  1:32     ` Yu Kuai
@ 2022-11-30 15:59       ` Christoph Hellwig
  0 siblings, 0 replies; 13+ messages in thread
From: Christoph Hellwig @ 2022-11-30 15:59 UTC (permalink / raw)
  To: Yu Kuai
  Cc: Christoph Hellwig, Li Nan, tj, josef, axboe, cgroups, linux-block,
	linux-kernel, yi.zhang, yukuai (C)

On Wed, Nov 30, 2022 at 09:32:58AM +0800, Yu Kuai wrote:
> > No, that now means it is removed to later.  You need to add proper
> > synchronization.
> > .
> > 
> 
> Can you explain a bit more? Maybe I'm being noob, here disk is about to
> be freed, and I can think of any contention.

Right now we need synchronization with e.g. open_mutex and a check
for a dead disk, which I suggst to add insted of creating a lifetime
imbalance.

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2022-11-30 15:59 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2022-11-28 15:44 [PATCH -next 0/8] iocost bugfix Li Nan
2022-11-28 15:44 ` [PATCH -next 1/8] blk-iocost: cleanup ioc_qos_write() and ioc_cost_model_write() Li Nan
2022-11-28 15:44 ` [PATCH -next 2/8] blk-iocost: improve hanlder of match_u64() Li Nan
2022-11-28 15:44 ` [PATCH -next 3/8] blk-iocost: don't allow to configure bio based device Li Nan
2022-11-28 15:44 ` [PATCH -next 4/8] blk-iocost: read params inside lock in sysfs apis Li Nan
2022-11-28 15:44 ` [PATCH -next 5/8] blk-iocost: fix divide by 0 error in calc_lcoefs() Li Nan
2022-11-28 15:44 ` [PATCH -next 6/8] blk-iocost: change div64_u64 to DIV64_U64_ROUND_UP in ioc_refresh_params() Li Nan
2022-11-28 15:44 ` [PATCH -next 7/8] blk-iocost: fix possible UAF in ioc_pd_free Li Nan
2022-11-29 12:18   ` Yu Kuai
2022-11-28 15:44 ` [PATCH -next 8/8] block: fix null-pointer dereference in ioc_pd_init Li Nan
2022-11-29 14:25   ` Christoph Hellwig
2022-11-30  1:32     ` Yu Kuai
2022-11-30 15:59       ` Christoph Hellwig

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).