* [PATCH -next 0/8] iocost bugfix
@ 2022-11-28 15:44 Li Nan
2022-11-28 15:44 ` [PATCH -next 1/8] blk-iocost: cleanup ioc_qos_write() and ioc_cost_model_write() Li Nan
` (7 more replies)
0 siblings, 8 replies; 13+ messages in thread
From: Li Nan @ 2022-11-28 15:44 UTC (permalink / raw)
To: tj, josef, axboe
Cc: cgroups, linux-block, linux-kernel, linan122, yukuai3, yi.zhang
This patchset fixes problem in iocost. It contains Yu Kuai's patchset he
sent before because of conflicts.
Li Nan (4):
blk-iocost: fix divide by 0 error in calc_lcoefs()
blk-iocost: change div64_u64 to DIV64_U64_ROUND_UP in
ioc_refresh_params()
blk-iocost: fix possible UAF in ioc_pd_free
block: fix null-pointer dereference in ioc_pd_init
Yu Kuai (4):
blk-iocost: cleanup ioc_qos_write() and ioc_cost_model_write()
blk-iocost: improve hanlder of match_u64()
blk-iocost: don't allow to configure bio based device
blk-iocost: read params inside lock in sysfs apis
block/blk-iocost.c | 114 ++++++++++++++++++++++++++++-----------------
block/genhd.c | 2 +-
2 files changed, 73 insertions(+), 43 deletions(-)
--
2.31.1
^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH -next 1/8] blk-iocost: cleanup ioc_qos_write() and ioc_cost_model_write()
2022-11-28 15:44 [PATCH -next 0/8] iocost bugfix Li Nan
@ 2022-11-28 15:44 ` Li Nan
2022-11-28 15:44 ` [PATCH -next 2/8] blk-iocost: improve hanlder of match_u64() Li Nan
` (6 subsequent siblings)
7 siblings, 0 replies; 13+ messages in thread
From: Li Nan @ 2022-11-28 15:44 UTC (permalink / raw)
To: tj, josef, axboe
Cc: cgroups, linux-block, linux-kernel, linan122, yukuai3, yi.zhang
From: Yu Kuai <yukuai3@huawei.com>
There are no functional changes, just to make the code a litter cleaner
and follow up patches easier.
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Li Nan <linan122@huawei.com>
---
block/blk-iocost.c | 62 +++++++++++++++++++---------------------------
1 file changed, 25 insertions(+), 37 deletions(-)
diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index f01359906c83..fd495e823db2 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -3185,7 +3185,7 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
if (!ioc) {
ret = blk_iocost_init(disk);
if (ret)
- goto err;
+ goto out;
ioc = q_to_ioc(disk->queue);
}
@@ -3197,6 +3197,7 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
enable = ioc->enabled;
user = ioc->user_qos_params;
+ ret = -EINVAL;
while ((p = strsep(&input, " \t\n"))) {
substring_t args[MAX_OPT_ARGS];
char buf[32];
@@ -3218,7 +3219,7 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
else if (!strcmp(buf, "user"))
user = true;
else
- goto einval;
+ goto out_unlock;
continue;
}
@@ -3228,39 +3229,39 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
case QOS_WPPM:
if (match_strlcpy(buf, &args[0], sizeof(buf)) >=
sizeof(buf))
- goto einval;
+ goto out_unlock;
if (cgroup_parse_float(buf, 2, &v))
- goto einval;
+ goto out_unlock;
if (v < 0 || v > 10000)
- goto einval;
+ goto out_unlock;
qos[tok] = v * 100;
break;
case QOS_RLAT:
case QOS_WLAT:
if (match_u64(&args[0], &v))
- goto einval;
+ goto out_unlock;
qos[tok] = v;
break;
case QOS_MIN:
case QOS_MAX:
if (match_strlcpy(buf, &args[0], sizeof(buf)) >=
sizeof(buf))
- goto einval;
+ goto out_unlock;
if (cgroup_parse_float(buf, 2, &v))
- goto einval;
+ goto out_unlock;
if (v < 0)
- goto einval;
+ goto out_unlock;
qos[tok] = clamp_t(s64, v * 100,
VRATE_MIN_PPM, VRATE_MAX_PPM);
break;
default:
- goto einval;
+ goto out_unlock;
}
user = true;
}
if (qos[QOS_MIN] > qos[QOS_MAX])
- goto einval;
+ goto out_unlock;
if (enable) {
blk_stat_enable_accounting(disk->queue);
@@ -3281,21 +3282,14 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
}
ioc_refresh_params(ioc, true);
- spin_unlock_irq(&ioc->lock);
+ ret = nbytes;
- blk_mq_unquiesce_queue(disk->queue);
- blk_mq_unfreeze_queue(disk->queue);
-
- blkdev_put_no_open(bdev);
- return nbytes;
-einval:
+out_unlock:
spin_unlock_irq(&ioc->lock);
-
blk_mq_unquiesce_queue(disk->queue);
blk_mq_unfreeze_queue(disk->queue);
- ret = -EINVAL;
-err:
+out:
blkdev_put_no_open(bdev);
return ret;
}
@@ -3364,7 +3358,7 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,
if (!ioc) {
ret = blk_iocost_init(bdev->bd_disk);
if (ret)
- goto err;
+ goto out;
ioc = q_to_ioc(q);
}
@@ -3375,6 +3369,7 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,
memcpy(u, ioc->params.i_lcoefs, sizeof(u));
user = ioc->user_cost_model;
+ ret = -EINVAL;
while ((p = strsep(&input, " \t\n"))) {
substring_t args[MAX_OPT_ARGS];
char buf[32];
@@ -3392,20 +3387,20 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,
else if (!strcmp(buf, "user"))
user = true;
else
- goto einval;
+ goto out_unlock;
continue;
case COST_MODEL:
match_strlcpy(buf, &args[0], sizeof(buf));
if (strcmp(buf, "linear"))
- goto einval;
+ goto out_unlock;
continue;
}
tok = match_token(p, i_lcoef_tokens, args);
if (tok == NR_I_LCOEFS)
- goto einval;
+ goto out_unlock;
if (match_u64(&args[0], &v))
- goto einval;
+ goto out_unlock;
u[tok] = v;
user = true;
}
@@ -3416,23 +3411,16 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,
} else {
ioc->user_cost_model = false;
}
- ioc_refresh_params(ioc, true);
- spin_unlock_irq(&ioc->lock);
- blk_mq_unquiesce_queue(q);
- blk_mq_unfreeze_queue(q);
-
- blkdev_put_no_open(bdev);
- return nbytes;
+ ioc_refresh_params(ioc, true);
+ ret = nbytes;
-einval:
+out_unlock:
spin_unlock_irq(&ioc->lock);
-
blk_mq_unquiesce_queue(q);
blk_mq_unfreeze_queue(q);
- ret = -EINVAL;
-err:
+out:
blkdev_put_no_open(bdev);
return ret;
}
--
2.31.1
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH -next 2/8] blk-iocost: improve hanlder of match_u64()
2022-11-28 15:44 [PATCH -next 0/8] iocost bugfix Li Nan
2022-11-28 15:44 ` [PATCH -next 1/8] blk-iocost: cleanup ioc_qos_write() and ioc_cost_model_write() Li Nan
@ 2022-11-28 15:44 ` Li Nan
2022-11-28 15:44 ` [PATCH -next 3/8] blk-iocost: don't allow to configure bio based device Li Nan
` (5 subsequent siblings)
7 siblings, 0 replies; 13+ messages in thread
From: Li Nan @ 2022-11-28 15:44 UTC (permalink / raw)
To: tj, josef, axboe
Cc: cgroups, linux-block, linux-kernel, linan122, yukuai3, yi.zhang
From: Yu Kuai <yukuai3@huawei.com>
1) There are one place that return value of match_u64() is not checked.
2) If match_u64() failed, return value is set to -EINVAL despite that
there are other possible errnos.
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Li Nan <linan122@huawei.com>
---
block/blk-iocost.c | 22 +++++++++++++++++++---
1 file changed, 19 insertions(+), 3 deletions(-)
diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index fd495e823db2..c532129a1456 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -3202,6 +3202,7 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
substring_t args[MAX_OPT_ARGS];
char buf[32];
int tok;
+ int err;
s64 v;
if (!*p)
@@ -3209,7 +3210,12 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
switch (match_token(p, qos_ctrl_tokens, args)) {
case QOS_ENABLE:
- match_u64(&args[0], &v);
+ err = match_u64(&args[0], &v);
+ if (err) {
+ ret = err;
+ goto out_unlock;
+ }
+
enable = v;
continue;
case QOS_CTRL:
@@ -3238,8 +3244,12 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
break;
case QOS_RLAT:
case QOS_WLAT:
- if (match_u64(&args[0], &v))
+ err = match_u64(&args[0], &v);
+ if (err) {
+ ret = err;
goto out_unlock;
+ }
+
qos[tok] = v;
break;
case QOS_MIN:
@@ -3374,6 +3384,7 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,
substring_t args[MAX_OPT_ARGS];
char buf[32];
int tok;
+ int err;
u64 v;
if (!*p)
@@ -3399,8 +3410,13 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,
tok = match_token(p, i_lcoef_tokens, args);
if (tok == NR_I_LCOEFS)
goto out_unlock;
- if (match_u64(&args[0], &v))
+
+ err = match_u64(&args[0], &v);
+ if (err) {
+ ret = err;
goto out_unlock;
+ }
+
u[tok] = v;
user = true;
}
--
2.31.1
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH -next 3/8] blk-iocost: don't allow to configure bio based device
2022-11-28 15:44 [PATCH -next 0/8] iocost bugfix Li Nan
2022-11-28 15:44 ` [PATCH -next 1/8] blk-iocost: cleanup ioc_qos_write() and ioc_cost_model_write() Li Nan
2022-11-28 15:44 ` [PATCH -next 2/8] blk-iocost: improve hanlder of match_u64() Li Nan
@ 2022-11-28 15:44 ` Li Nan
2022-11-28 15:44 ` [PATCH -next 4/8] blk-iocost: read params inside lock in sysfs apis Li Nan
` (4 subsequent siblings)
7 siblings, 0 replies; 13+ messages in thread
From: Li Nan @ 2022-11-28 15:44 UTC (permalink / raw)
To: tj, josef, axboe
Cc: cgroups, linux-block, linux-kernel, linan122, yukuai3, yi.zhang
From: Yu Kuai <yukuai3@huawei.com>
iocost is based on rq_qos, which can only work for request based device,
thus it doesn't make sense to configure iocost for bio based device.
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Li Nan <linan122@huawei.com>
---
block/blk-iocost.c | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index c532129a1456..2bfecc511dd9 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -3181,6 +3181,11 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
return PTR_ERR(bdev);
disk = bdev->bd_disk;
+ if (!queue_is_mq(disk->queue)) {
+ ret = -EPERM;
+ goto out;
+ }
+
ioc = q_to_ioc(disk->queue);
if (!ioc) {
ret = blk_iocost_init(disk);
@@ -3364,6 +3369,11 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,
return PTR_ERR(bdev);
q = bdev_get_queue(bdev);
+ if (!queue_is_mq(q)) {
+ ret = -EPERM;
+ goto out;
+ }
+
ioc = q_to_ioc(q);
if (!ioc) {
ret = blk_iocost_init(bdev->bd_disk);
--
2.31.1
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH -next 4/8] blk-iocost: read params inside lock in sysfs apis
2022-11-28 15:44 [PATCH -next 0/8] iocost bugfix Li Nan
` (2 preceding siblings ...)
2022-11-28 15:44 ` [PATCH -next 3/8] blk-iocost: don't allow to configure bio based device Li Nan
@ 2022-11-28 15:44 ` Li Nan
2022-11-28 15:44 ` [PATCH -next 5/8] blk-iocost: fix divide by 0 error in calc_lcoefs() Li Nan
` (3 subsequent siblings)
7 siblings, 0 replies; 13+ messages in thread
From: Li Nan @ 2022-11-28 15:44 UTC (permalink / raw)
To: tj, josef, axboe
Cc: cgroups, linux-block, linux-kernel, linan122, yukuai3, yi.zhang
From: Yu Kuai <yukuai3@huawei.com>
Otherwise, user might get abnormal values if params is updated
concurrently.
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Signed-off-by: Li Nan <linan122@huawei.com>
---
block/blk-iocost.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index 2bfecc511dd9..a645184aba4a 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -3125,6 +3125,7 @@ static u64 ioc_qos_prfill(struct seq_file *sf, struct blkg_policy_data *pd,
if (!dname)
return 0;
+ spin_lock_irq(&ioc->lock);
seq_printf(sf, "%s enable=%d ctrl=%s rpct=%u.%02u rlat=%u wpct=%u.%02u wlat=%u min=%u.%02u max=%u.%02u\n",
dname, ioc->enabled, ioc->user_qos_params ? "user" : "auto",
ioc->params.qos[QOS_RPPM] / 10000,
@@ -3137,6 +3138,7 @@ static u64 ioc_qos_prfill(struct seq_file *sf, struct blkg_policy_data *pd,
ioc->params.qos[QOS_MIN] % 10000 / 100,
ioc->params.qos[QOS_MAX] / 10000,
ioc->params.qos[QOS_MAX] % 10000 / 100);
+ spin_unlock_irq(&ioc->lock);
return 0;
}
@@ -3319,12 +3321,14 @@ static u64 ioc_cost_model_prfill(struct seq_file *sf,
if (!dname)
return 0;
+ spin_lock_irq(&ioc->lock);
seq_printf(sf, "%s ctrl=%s model=linear "
"rbps=%llu rseqiops=%llu rrandiops=%llu "
"wbps=%llu wseqiops=%llu wrandiops=%llu\n",
dname, ioc->user_cost_model ? "user" : "auto",
u[I_LCOEF_RBPS], u[I_LCOEF_RSEQIOPS], u[I_LCOEF_RRANDIOPS],
u[I_LCOEF_WBPS], u[I_LCOEF_WSEQIOPS], u[I_LCOEF_WRANDIOPS]);
+ spin_unlock_irq(&ioc->lock);
return 0;
}
--
2.31.1
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH -next 5/8] blk-iocost: fix divide by 0 error in calc_lcoefs()
2022-11-28 15:44 [PATCH -next 0/8] iocost bugfix Li Nan
` (3 preceding siblings ...)
2022-11-28 15:44 ` [PATCH -next 4/8] blk-iocost: read params inside lock in sysfs apis Li Nan
@ 2022-11-28 15:44 ` Li Nan
2022-11-28 15:44 ` [PATCH -next 6/8] blk-iocost: change div64_u64 to DIV64_U64_ROUND_UP in ioc_refresh_params() Li Nan
` (2 subsequent siblings)
7 siblings, 0 replies; 13+ messages in thread
From: Li Nan @ 2022-11-28 15:44 UTC (permalink / raw)
To: tj, josef, axboe
Cc: cgroups, linux-block, linux-kernel, linan122, yukuai3, yi.zhang
echo max of u64 to cost.model can cause divide by 0 error.
# echo 8:0 rbps=18446744073709551615 > /sys/fs/cgroup/io.cost.model
divide error: 0000 [#1] PREEMPT SMP
RIP: 0010:calc_lcoefs+0x4c/0xc0
Call Trace:
<TASK>
ioc_refresh_params+0x2b3/0x4f0
ioc_cost_model_write+0x3cb/0x4c0
? _copy_from_iter+0x6d/0x6c0
? kernfs_fop_write_iter+0xfc/0x270
cgroup_file_write+0xa0/0x200
kernfs_fop_write_iter+0x17d/0x270
vfs_write+0x414/0x620
ksys_write+0x73/0x160
__x64_sys_write+0x1e/0x30
do_syscall_64+0x35/0x80
entry_SYSCALL_64_after_hwframe+0x63/0xcd
calc_lcoefs() uses the input value of cost.model in DIV_ROUND_UP_ULL,
overflow would happen if bps plus IOC_PAGE_SIZE is greater than
ULLONG_MAX, it can cause divide by 0 error.I_LCOEF_MAX is introduced to
prevent it.
Signed-off-by: Li Nan <linan122@huawei.com>
---
block/blk-iocost.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index a645184aba4a..a4fa17ebb2fa 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -306,6 +306,9 @@ enum {
IOC_PAGE_SIZE = 1 << IOC_PAGE_SHIFT,
IOC_SECT_TO_PAGE_SHIFT = IOC_PAGE_SHIFT - SECTOR_SHIFT,
+ /* avoid overflow */
+ I_LCOEF_MAX = ULLONG_MAX - IOC_PAGE_SIZE,
+
/* if apart further than 16M, consider randio for linear model */
LCOEF_RANDIO_PAGES = 4096,
};
@@ -3431,6 +3434,8 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,
goto out_unlock;
}
+ if (v > I_LCOEF_MAX)
+ goto out_unlock;
u[tok] = v;
user = true;
}
--
2.31.1
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH -next 6/8] blk-iocost: change div64_u64 to DIV64_U64_ROUND_UP in ioc_refresh_params()
2022-11-28 15:44 [PATCH -next 0/8] iocost bugfix Li Nan
` (4 preceding siblings ...)
2022-11-28 15:44 ` [PATCH -next 5/8] blk-iocost: fix divide by 0 error in calc_lcoefs() Li Nan
@ 2022-11-28 15:44 ` Li Nan
2022-11-28 15:44 ` [PATCH -next 7/8] blk-iocost: fix possible UAF in ioc_pd_free Li Nan
2022-11-28 15:44 ` [PATCH -next 8/8] block: fix null-pointer dereference in ioc_pd_init Li Nan
7 siblings, 0 replies; 13+ messages in thread
From: Li Nan @ 2022-11-28 15:44 UTC (permalink / raw)
To: tj, josef, axboe
Cc: cgroups, linux-block, linux-kernel, linan122, yukuai3, yi.zhang
vrate_min is calculated by DIV64_U64_ROUND_UP, but vrate_max is calculated
by div64_u64. Vrate_min may be 1 greater than vrate_max if the input
values min and max of cost.qos are equal.
Signed-off-by: Li Nan <linan122@huawei.com>
---
block/blk-iocost.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index a4fa17ebb2fa..03977385449f 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -926,7 +926,7 @@ static bool ioc_refresh_params(struct ioc *ioc, bool force)
ioc->vrate_min = DIV64_U64_ROUND_UP((u64)ioc->params.qos[QOS_MIN] *
VTIME_PER_USEC, MILLION);
- ioc->vrate_max = div64_u64((u64)ioc->params.qos[QOS_MAX] *
+ ioc->vrate_max = DIV64_U64_ROUND_UP((u64)ioc->params.qos[QOS_MAX] *
VTIME_PER_USEC, MILLION);
return true;
--
2.31.1
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH -next 7/8] blk-iocost: fix possible UAF in ioc_pd_free
2022-11-28 15:44 [PATCH -next 0/8] iocost bugfix Li Nan
` (5 preceding siblings ...)
2022-11-28 15:44 ` [PATCH -next 6/8] blk-iocost: change div64_u64 to DIV64_U64_ROUND_UP in ioc_refresh_params() Li Nan
@ 2022-11-28 15:44 ` Li Nan
2022-11-29 12:18 ` Yu Kuai
2022-11-28 15:44 ` [PATCH -next 8/8] block: fix null-pointer dereference in ioc_pd_init Li Nan
7 siblings, 1 reply; 13+ messages in thread
From: Li Nan @ 2022-11-28 15:44 UTC (permalink / raw)
To: tj, josef, axboe
Cc: cgroups, linux-block, linux-kernel, linan122, yukuai3, yi.zhang
Our test found the following problem in kernel 5.10, and the same problem
should exist in mainline:
BUG: KASAN: use-after-free in _raw_spin_lock_irqsave+0x71/0xe0
Write of size 4 at addr ffff8881432000e0 by task swapper/4/0
...
Call Trace:
<IRQ>
dump_stack+0x9c/0xd3
print_address_description.constprop.0+0x19/0x170
__kasan_report.cold+0x6c/0x84
kasan_report+0x3a/0x50
check_memory_region+0xfd/0x1f0
_raw_spin_lock_irqsave+0x71/0xe0
ioc_pd_free+0x9d/0x250
blkg_free.part.0+0x80/0x100
__blkg_release+0xf3/0x1c0
rcu_do_batch+0x292/0x700
rcu_core+0x270/0x2d0
__do_softirq+0xfd/0x402
</IRQ>
asm_call_irq_on_stack+0x12/0x20
do_softirq_own_stack+0x37/0x50
irq_exit_rcu+0x134/0x1a0
sysvec_apic_timer_interrupt+0x36/0x80
asm_sysvec_apic_timer_interrupt+0x12/0x20
Freed by task 57:
kfree+0xba/0x680
rq_qos_exit+0x5a/0x80
blk_cleanup_queue+0xce/0x1a0
virtblk_remove+0x77/0x130 [virtio_blk]
virtio_dev_remove+0x56/0xe0
__device_release_driver+0x2ba/0x450
device_release_driver+0x29/0x40
bus_remove_device+0x1d8/0x2c0
device_del+0x333/0x7e0
device_unregister+0x27/0x90
unregister_virtio_device+0x22/0x40
virtio_pci_remove+0x53/0xb0
pci_device_remove+0x7a/0x130
__device_release_driver+0x2ba/0x450
device_release_driver+0x29/0x40
pci_stop_bus_device+0xcf/0x100
pci_stop_and_remove_bus_device+0x16/0x20
disable_slot+0xa1/0x110
acpiphp_disable_and_eject_slot+0x35/0xe0
hotplug_event+0x1b8/0x3c0
acpiphp_hotplug_notify+0x37/0x70
acpi_device_hotplug+0xee/0x320
acpi_hotplug_work_fn+0x69/0x80
process_one_work+0x3c5/0x730
worker_thread+0x93/0x650
kthread+0x1ba/0x210
ret_from_fork+0x22/0x30
It happened as follow:
T1 T2 T3
//rmdir cgroup
blkcg_destroy_blkgs
blkg_destroy
percpu_ref_kill
blkg_release
call_rcu
//delete device
del_gendisk
rq_qos_exit
ioc_rqos_exit
kfree(ioc)
__blkg_release
blkg_free
blkg_free_workfn
pd_free_fn
ioc_pd_free
spin_lock_irqsave
->ioc is freed
Fix the problem by moving the operation on ioc in ioc_pd_free() to
ioc_pd_offline(), and just free resource in ioc_pd_free() like iolatency
and throttle.
Signed-off-by: Li Nan <linan122@huawei.com>
---
block/blk-iocost.c | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index 03977385449f..1b855babfc35 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -2978,7 +2978,7 @@ static void ioc_pd_init(struct blkg_policy_data *pd)
spin_unlock_irqrestore(&ioc->lock, flags);
}
-static void ioc_pd_free(struct blkg_policy_data *pd)
+static void ioc_pd_offline(struct blkg_policy_data *pd)
{
struct ioc_gq *iocg = pd_to_iocg(pd);
struct ioc *ioc = iocg->ioc;
@@ -3002,6 +3002,12 @@ static void ioc_pd_free(struct blkg_policy_data *pd)
hrtimer_cancel(&iocg->waitq_timer);
}
+}
+
+static void ioc_pd_free(struct blkg_policy_data *pd)
+{
+ struct ioc_gq *iocg = pd_to_iocg(pd);
+
free_percpu(iocg->pcpu_stat);
kfree(iocg);
}
@@ -3488,6 +3494,7 @@ static struct blkcg_policy blkcg_policy_iocost = {
.cpd_free_fn = ioc_cpd_free,
.pd_alloc_fn = ioc_pd_alloc,
.pd_init_fn = ioc_pd_init,
+ .pd_offline_fn = ioc_pd_offline,
.pd_free_fn = ioc_pd_free,
.pd_stat_fn = ioc_pd_stat,
};
--
2.31.1
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH -next 8/8] block: fix null-pointer dereference in ioc_pd_init
2022-11-28 15:44 [PATCH -next 0/8] iocost bugfix Li Nan
` (6 preceding siblings ...)
2022-11-28 15:44 ` [PATCH -next 7/8] blk-iocost: fix possible UAF in ioc_pd_free Li Nan
@ 2022-11-28 15:44 ` Li Nan
2022-11-29 14:25 ` Christoph Hellwig
7 siblings, 1 reply; 13+ messages in thread
From: Li Nan @ 2022-11-28 15:44 UTC (permalink / raw)
To: tj, josef, axboe
Cc: cgroups, linux-block, linux-kernel, linan122, yukuai3, yi.zhang
Remove block device when iocost is initializing may cause
null-pointer dereference:
CPU1 CPU2
ioc_qos_write
blkcg_conf_open_bdev
blkdev_get_no_open
kobject_get_unless_zero
blk_iocost_init
rq_qos_add
del_gendisk
rq_qos_exit
q->rq_qos = rqos->next
//iocost is removed from q->roqs
blkcg_activate_policy
pd_init_fn
ioc_pd_init
ioc = q_to_ioc(blkg->q)
//cant find iocost and return null
Fix problem by moving rq_qos_exit() to disk_release(). ioc_qos_write() get
bd_device.kobj in blkcg_conf_open_bdev(), so disk_release will not be
actived until iocost initialization is complited.
Signed-off-by: Li Nan <linan122@huawei.com>
---
block/genhd.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/block/genhd.c b/block/genhd.c
index dcf200bcbd3e..c264da49eaaa 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -656,7 +656,6 @@ void del_gendisk(struct gendisk *disk)
elevator_exit(q);
mutex_unlock(&q->sysfs_lock);
}
- rq_qos_exit(q);
blk_mq_unquiesce_queue(q);
/*
@@ -1168,6 +1167,7 @@ static void disk_release(struct device *dev)
!test_bit(GD_ADDED, &disk->state))
blk_mq_exit_queue(disk->queue);
+ rq_qos_exit(q);
blkcg_exit_disk(disk);
bioset_exit(&disk->bio_split);
--
2.31.1
^ permalink raw reply related [flat|nested] 13+ messages in thread
* Re: [PATCH -next 7/8] blk-iocost: fix possible UAF in ioc_pd_free
2022-11-28 15:44 ` [PATCH -next 7/8] blk-iocost: fix possible UAF in ioc_pd_free Li Nan
@ 2022-11-29 12:18 ` Yu Kuai
0 siblings, 0 replies; 13+ messages in thread
From: Yu Kuai @ 2022-11-29 12:18 UTC (permalink / raw)
To: Li Nan, tj, josef, axboe
Cc: cgroups, linux-block, linux-kernel, yi.zhang, yukuai (C)
在 2022/11/28 23:44, Li Nan 写道:
> Our test found the following problem in kernel 5.10, and the same problem
> should exist in mainline:
>
> BUG: KASAN: use-after-free in _raw_spin_lock_irqsave+0x71/0xe0
> Write of size 4 at addr ffff8881432000e0 by task swapper/4/0
> ...
> Call Trace:
> <IRQ>
> dump_stack+0x9c/0xd3
> print_address_description.constprop.0+0x19/0x170
> __kasan_report.cold+0x6c/0x84
> kasan_report+0x3a/0x50
> check_memory_region+0xfd/0x1f0
> _raw_spin_lock_irqsave+0x71/0xe0
> ioc_pd_free+0x9d/0x250
> blkg_free.part.0+0x80/0x100
> __blkg_release+0xf3/0x1c0
> rcu_do_batch+0x292/0x700
> rcu_core+0x270/0x2d0
> __do_softirq+0xfd/0x402
> </IRQ>
> asm_call_irq_on_stack+0x12/0x20
> do_softirq_own_stack+0x37/0x50
> irq_exit_rcu+0x134/0x1a0
> sysvec_apic_timer_interrupt+0x36/0x80
> asm_sysvec_apic_timer_interrupt+0x12/0x20
>
> Freed by task 57:
> kfree+0xba/0x680
> rq_qos_exit+0x5a/0x80
> blk_cleanup_queue+0xce/0x1a0
> virtblk_remove+0x77/0x130 [virtio_blk]
> virtio_dev_remove+0x56/0xe0
> __device_release_driver+0x2ba/0x450
> device_release_driver+0x29/0x40
> bus_remove_device+0x1d8/0x2c0
> device_del+0x333/0x7e0
> device_unregister+0x27/0x90
> unregister_virtio_device+0x22/0x40
> virtio_pci_remove+0x53/0xb0
> pci_device_remove+0x7a/0x130
> __device_release_driver+0x2ba/0x450
> device_release_driver+0x29/0x40
> pci_stop_bus_device+0xcf/0x100
> pci_stop_and_remove_bus_device+0x16/0x20
> disable_slot+0xa1/0x110
> acpiphp_disable_and_eject_slot+0x35/0xe0
> hotplug_event+0x1b8/0x3c0
> acpiphp_hotplug_notify+0x37/0x70
> acpi_device_hotplug+0xee/0x320
> acpi_hotplug_work_fn+0x69/0x80
> process_one_work+0x3c5/0x730
> worker_thread+0x93/0x650
> kthread+0x1ba/0x210
> ret_from_fork+0x22/0x30
>
> It happened as follow:
>
> T1 T2 T3
> //rmdir cgroup
> blkcg_destroy_blkgs
> blkg_destroy
> percpu_ref_kill
> blkg_release
> call_rcu
> //delete device
> del_gendisk
del_gendisk will synchronize_rcu, hence this is wrong.
call_rcu from blkcg_destroy_blkgs should be called after
synchronize_rcu.
Thanks,
Kuai
> rq_qos_exit
> ioc_rqos_exit
> kfree(ioc)
> __blkg_release
> blkg_free
> blkg_free_workfn
> pd_free_fn
> ioc_pd_free
> spin_lock_irqsave
> ->ioc is freed
>
> Fix the problem by moving the operation on ioc in ioc_pd_free() to
> ioc_pd_offline(), and just free resource in ioc_pd_free() like iolatency
> and throttle.
>
> Signed-off-by: Li Nan <linan122@huawei.com>
> ---
> block/blk-iocost.c | 9 ++++++++-
> 1 file changed, 8 insertions(+), 1 deletion(-)
>
> diff --git a/block/blk-iocost.c b/block/blk-iocost.c
> index 03977385449f..1b855babfc35 100644
> --- a/block/blk-iocost.c
> +++ b/block/blk-iocost.c
> @@ -2978,7 +2978,7 @@ static void ioc_pd_init(struct blkg_policy_data *pd)
> spin_unlock_irqrestore(&ioc->lock, flags);
> }
>
> -static void ioc_pd_free(struct blkg_policy_data *pd)
> +static void ioc_pd_offline(struct blkg_policy_data *pd)
> {
> struct ioc_gq *iocg = pd_to_iocg(pd);
> struct ioc *ioc = iocg->ioc;
> @@ -3002,6 +3002,12 @@ static void ioc_pd_free(struct blkg_policy_data *pd)
>
> hrtimer_cancel(&iocg->waitq_timer);
> }
> +}
> +
> +static void ioc_pd_free(struct blkg_policy_data *pd)
> +{
> + struct ioc_gq *iocg = pd_to_iocg(pd);
> +
> free_percpu(iocg->pcpu_stat);
> kfree(iocg);
> }
> @@ -3488,6 +3494,7 @@ static struct blkcg_policy blkcg_policy_iocost = {
> .cpd_free_fn = ioc_cpd_free,
> .pd_alloc_fn = ioc_pd_alloc,
> .pd_init_fn = ioc_pd_init,
> + .pd_offline_fn = ioc_pd_offline,
> .pd_free_fn = ioc_pd_free,
> .pd_stat_fn = ioc_pd_stat,
> };
>
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH -next 8/8] block: fix null-pointer dereference in ioc_pd_init
2022-11-28 15:44 ` [PATCH -next 8/8] block: fix null-pointer dereference in ioc_pd_init Li Nan
@ 2022-11-29 14:25 ` Christoph Hellwig
2022-11-30 1:32 ` Yu Kuai
0 siblings, 1 reply; 13+ messages in thread
From: Christoph Hellwig @ 2022-11-29 14:25 UTC (permalink / raw)
To: Li Nan
Cc: tj, josef, axboe, cgroups, linux-block, linux-kernel, yukuai3,
yi.zhang
On Mon, Nov 28, 2022 at 11:44:34PM +0800, Li Nan wrote:
> Fix problem by moving rq_qos_exit() to disk_release().
No, that now means it is removed to later. You need to add proper
synchronization.
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH -next 8/8] block: fix null-pointer dereference in ioc_pd_init
2022-11-29 14:25 ` Christoph Hellwig
@ 2022-11-30 1:32 ` Yu Kuai
2022-11-30 15:59 ` Christoph Hellwig
0 siblings, 1 reply; 13+ messages in thread
From: Yu Kuai @ 2022-11-30 1:32 UTC (permalink / raw)
To: Christoph Hellwig, Li Nan
Cc: tj, josef, axboe, cgroups, linux-block, linux-kernel, yi.zhang,
yukuai (C)
Hi,
在 2022/11/29 22:25, Christoph Hellwig 写道:
> On Mon, Nov 28, 2022 at 11:44:34PM +0800, Li Nan wrote:
>> Fix problem by moving rq_qos_exit() to disk_release().
>
> No, that now means it is removed to later. You need to add proper
> synchronization.
> .
>
Can you explain a bit more? Maybe I'm being noob, here disk is about to
be freed, and I can think of any contention.
Thanks,
Kuai
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH -next 8/8] block: fix null-pointer dereference in ioc_pd_init
2022-11-30 1:32 ` Yu Kuai
@ 2022-11-30 15:59 ` Christoph Hellwig
0 siblings, 0 replies; 13+ messages in thread
From: Christoph Hellwig @ 2022-11-30 15:59 UTC (permalink / raw)
To: Yu Kuai
Cc: Christoph Hellwig, Li Nan, tj, josef, axboe, cgroups, linux-block,
linux-kernel, yi.zhang, yukuai (C)
On Wed, Nov 30, 2022 at 09:32:58AM +0800, Yu Kuai wrote:
> > No, that now means it is removed to later. You need to add proper
> > synchronization.
> > .
> >
>
> Can you explain a bit more? Maybe I'm being noob, here disk is about to
> be freed, and I can think of any contention.
Right now we need synchronization with e.g. open_mutex and a check
for a dead disk, which I suggst to add insted of creating a lifetime
imbalance.
^ permalink raw reply [flat|nested] 13+ messages in thread
end of thread, other threads:[~2022-11-30 15:59 UTC | newest]
Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2022-11-28 15:44 [PATCH -next 0/8] iocost bugfix Li Nan
2022-11-28 15:44 ` [PATCH -next 1/8] blk-iocost: cleanup ioc_qos_write() and ioc_cost_model_write() Li Nan
2022-11-28 15:44 ` [PATCH -next 2/8] blk-iocost: improve hanlder of match_u64() Li Nan
2022-11-28 15:44 ` [PATCH -next 3/8] blk-iocost: don't allow to configure bio based device Li Nan
2022-11-28 15:44 ` [PATCH -next 4/8] blk-iocost: read params inside lock in sysfs apis Li Nan
2022-11-28 15:44 ` [PATCH -next 5/8] blk-iocost: fix divide by 0 error in calc_lcoefs() Li Nan
2022-11-28 15:44 ` [PATCH -next 6/8] blk-iocost: change div64_u64 to DIV64_U64_ROUND_UP in ioc_refresh_params() Li Nan
2022-11-28 15:44 ` [PATCH -next 7/8] blk-iocost: fix possible UAF in ioc_pd_free Li Nan
2022-11-29 12:18 ` Yu Kuai
2022-11-28 15:44 ` [PATCH -next 8/8] block: fix null-pointer dereference in ioc_pd_init Li Nan
2022-11-29 14:25 ` Christoph Hellwig
2022-11-30 1:32 ` Yu Kuai
2022-11-30 15:59 ` Christoph Hellwig
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).