* [PATCH] nvme-core: Fixing ctrl de-referencing in nvme_init_ctrl
@ 2023-04-03 0:41 Irvin Cote
2023-04-03 10:22 ` Sagi Grimberg
` (2 more replies)
0 siblings, 3 replies; 8+ messages in thread
From: Irvin Cote @ 2023-04-03 0:41 UTC (permalink / raw)
To: hch; +Cc: kbusch, axboe, sagi, linux-nvme
Correcting the de-reference count for
the controller in the core layer to avoid
a memory leak. Also cleaning a bit the
teardown logic of nvme_init_ctrl.
Signed-off-by: Irvin Cote <irvincoteg@gmail.com>
---
drivers/nvme/host/core.c | 14 ++++++--------
1 file changed, 6 insertions(+), 8 deletions(-)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 8698410aeb84..3d7aca7d0f2a 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -5137,14 +5137,12 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
BUILD_BUG_ON(NVME_DSM_MAX_RANGES * sizeof(struct nvme_dsm_range) >
PAGE_SIZE);
ctrl->discard_page = alloc_page(GFP_KERNEL);
- if (!ctrl->discard_page) {
- ret = -ENOMEM;
- goto out;
- }
+ if (!ctrl->discard_page)
+ return -ENOMEM;
ret = ida_alloc(&nvme_instance_ida, GFP_KERNEL);
if (ret < 0)
- goto out;
+ goto out_free_page;
ctrl->instance = ret;
device_initialize(&ctrl->ctrl_device);
@@ -5191,10 +5189,10 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
nvme_put_ctrl(ctrl);
kfree_const(ctrl->device->kobj.name);
out_release_instance:
+ nvme_put_ctrl(ctrl);
ida_free(&nvme_instance_ida, ctrl->instance);
-out:
- if (ctrl->discard_page)
- __free_page(ctrl->discard_page);
+out_free_page:
+ __free_page(ctrl->discard_page);
return ret;
}
EXPORT_SYMBOL_GPL(nvme_init_ctrl);
--
2.37.2
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH] nvme-core: Fixing ctrl de-referencing in nvme_init_ctrl
2023-04-03 0:41 [PATCH] nvme-core: Fixing ctrl de-referencing in nvme_init_ctrl Irvin Cote
@ 2023-04-03 10:22 ` Sagi Grimberg
2023-04-03 18:27 ` irvin cote
2023-04-05 15:20 ` Christoph Hellwig
2023-04-13 9:20 ` Chaitanya Kulkarni
2 siblings, 1 reply; 8+ messages in thread
From: Sagi Grimberg @ 2023-04-03 10:22 UTC (permalink / raw)
To: Irvin Cote, hch; +Cc: kbusch, axboe, linux-nvme
On 4/3/23 03:41, Irvin Cote wrote:
> Correcting the de-reference count for
> the controller in the core layer to avoid
> a memory leak.
Is this a regression?
> Also cleaning a bit the teardown logic of nvme_init_ctrl.
This is a good cleanup.
>
> Signed-off-by: Irvin Cote <irvincoteg@gmail.com>
> ---
> drivers/nvme/host/core.c | 14 ++++++--------
> 1 file changed, 6 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index 8698410aeb84..3d7aca7d0f2a 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -5137,14 +5137,12 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
> BUILD_BUG_ON(NVME_DSM_MAX_RANGES * sizeof(struct nvme_dsm_range) >
> PAGE_SIZE);
> ctrl->discard_page = alloc_page(GFP_KERNEL);
> - if (!ctrl->discard_page) {
> - ret = -ENOMEM;
> - goto out;
> - }
> + if (!ctrl->discard_page)
> + return -ENOMEM;
>
> ret = ida_alloc(&nvme_instance_ida, GFP_KERNEL);
> if (ret < 0)
> - goto out;
> + goto out_free_page;
> ctrl->instance = ret;
>
> device_initialize(&ctrl->ctrl_device);
> @@ -5191,10 +5189,10 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
> nvme_put_ctrl(ctrl);
> kfree_const(ctrl->device->kobj.name);
> out_release_instance:
> + nvme_put_ctrl(ctrl);
> ida_free(&nvme_instance_ida, ctrl->instance);
> -out:
> - if (ctrl->discard_page)
> - __free_page(ctrl->discard_page);
> +out_free_page:
> + __free_page(ctrl->discard_page);
> return ret;
> }
> EXPORT_SYMBOL_GPL(nvme_init_ctrl);
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] nvme-core: Fixing ctrl de-referencing in nvme_init_ctrl
2023-04-03 10:22 ` Sagi Grimberg
@ 2023-04-03 18:27 ` irvin cote
2023-04-03 22:37 ` Sagi Grimberg
0 siblings, 1 reply; 8+ messages in thread
From: irvin cote @ 2023-04-03 18:27 UTC (permalink / raw)
To: Sagi Grimberg; +Cc: hch, kbusch, axboe, linux-nvme
The thing is that after device_initialize is called the reference
count for the device is equal to 1. Now the function also calls
nvme_get_ctrl which increases the ref-count to 2.
However the teardown path only accounts for 1 decrement. This means
that if an error were to occur during nvme_init_ctrl, we would return
from nvme_probe without having freed the resources of the controller.
On Mon, 3 Apr 2023 at 07:22, Sagi Grimberg <sagi@grimberg.me> wrote:
>
>
>
> On 4/3/23 03:41, Irvin Cote wrote:
> > Correcting the de-reference count for
> > the controller in the core layer to avoid
> > a memory leak.
>
> Is this a regression?
>
> > Also cleaning a bit the teardown logic of nvme_init_ctrl.
>
> This is a good cleanup.
>
> >
> > Signed-off-by: Irvin Cote <irvincoteg@gmail.com>
> > ---
> > drivers/nvme/host/core.c | 14 ++++++--------
> > 1 file changed, 6 insertions(+), 8 deletions(-)
> >
> > diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> > index 8698410aeb84..3d7aca7d0f2a 100644
> > --- a/drivers/nvme/host/core.c
> > +++ b/drivers/nvme/host/core.c
> > @@ -5137,14 +5137,12 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
> > BUILD_BUG_ON(NVME_DSM_MAX_RANGES * sizeof(struct nvme_dsm_range) >
> > PAGE_SIZE);
> > ctrl->discard_page = alloc_page(GFP_KERNEL);
> > - if (!ctrl->discard_page) {
> > - ret = -ENOMEM;
> > - goto out;
> > - }
> > + if (!ctrl->discard_page)
> > + return -ENOMEM;
> >
> > ret = ida_alloc(&nvme_instance_ida, GFP_KERNEL);
> > if (ret < 0)
> > - goto out;
> > + goto out_free_page;
> > ctrl->instance = ret;
> >
> > device_initialize(&ctrl->ctrl_device);
> > @@ -5191,10 +5189,10 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
> > nvme_put_ctrl(ctrl);
> > kfree_const(ctrl->device->kobj.name);
> > out_release_instance:
> > + nvme_put_ctrl(ctrl);
> > ida_free(&nvme_instance_ida, ctrl->instance);
> > -out:
> > - if (ctrl->discard_page)
> > - __free_page(ctrl->discard_page);
> > +out_free_page:
> > + __free_page(ctrl->discard_page);
> > return ret;
> > }
> > EXPORT_SYMBOL_GPL(nvme_init_ctrl);
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] nvme-core: Fixing ctrl de-referencing in nvme_init_ctrl
2023-04-03 18:27 ` irvin cote
@ 2023-04-03 22:37 ` Sagi Grimberg
2023-04-04 21:18 ` irvin cote
2023-04-05 10:06 ` Maurizio Lombardi
0 siblings, 2 replies; 8+ messages in thread
From: Sagi Grimberg @ 2023-04-03 22:37 UTC (permalink / raw)
To: irvin cote; +Cc: hch, kbusch, axboe, linux-nvme
> The thing is that after device_initialize is called the reference
> count for the device is equal to 1. Now the function also calls
> nvme_get_ctrl which increases the ref-count to 2.
> However the teardown path only accounts for 1 decrement. This means
> that if an error were to occur during nvme_init_ctrl, we would return
> from nvme_probe without having freed the resources of the controller.
Not arguing the bug, just want to understand what Fixes tag it needs,
so that this can go as far as possible to stable kernels.
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] nvme-core: Fixing ctrl de-referencing in nvme_init_ctrl
2023-04-03 22:37 ` Sagi Grimberg
@ 2023-04-04 21:18 ` irvin cote
2023-04-05 10:06 ` Maurizio Lombardi
1 sibling, 0 replies; 8+ messages in thread
From: irvin cote @ 2023-04-04 21:18 UTC (permalink / raw)
To: Sagi Grimberg; +Cc: hch, kbusch, axboe, linux-nvme
Oh sorry I had not understood the term, and I am not too familiar with
fix tags yet, but let's go with regression if it is a good fit.
On Mon, 3 Apr 2023 at 19:37, Sagi Grimberg <sagi@grimberg.me> wrote:
>
>
> > The thing is that after device_initialize is called the reference
> > count for the device is equal to 1. Now the function also calls
> > nvme_get_ctrl which increases the ref-count to 2.
> > However the teardown path only accounts for 1 decrement. This means
> > that if an error were to occur during nvme_init_ctrl, we would return
> > from nvme_probe without having freed the resources of the controller.
>
> Not arguing the bug, just want to understand what Fixes tag it needs,
> so that this can go as far as possible to stable kernels.
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] nvme-core: Fixing ctrl de-referencing in nvme_init_ctrl
2023-04-03 22:37 ` Sagi Grimberg
2023-04-04 21:18 ` irvin cote
@ 2023-04-05 10:06 ` Maurizio Lombardi
1 sibling, 0 replies; 8+ messages in thread
From: Maurizio Lombardi @ 2023-04-05 10:06 UTC (permalink / raw)
To: Sagi Grimberg; +Cc: irvin cote, hch, kbusch, axboe, linux-nvme
út 4. 4. 2023 v 0:38 odesílatel Sagi Grimberg <sagi@grimberg.me> napsal:
> Not arguing the bug, just want to understand what Fixes tag it needs,
> so that this can go as far as possible to stable kernels.
>
I think the correct one is:
Fixes: d22524a4782a943bb02a9cf68 ("nvme: switch controller refcounting
to use struct device")
It calls device_initialize() but doesn't add nvme_put_ctrl() in the error path.
Maurizio
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] nvme-core: Fixing ctrl de-referencing in nvme_init_ctrl
2023-04-03 0:41 [PATCH] nvme-core: Fixing ctrl de-referencing in nvme_init_ctrl Irvin Cote
2023-04-03 10:22 ` Sagi Grimberg
@ 2023-04-05 15:20 ` Christoph Hellwig
2023-04-13 9:20 ` Chaitanya Kulkarni
2 siblings, 0 replies; 8+ messages in thread
From: Christoph Hellwig @ 2023-04-05 15:20 UTC (permalink / raw)
To: Irvin Cote; +Cc: hch, kbusch, axboe, sagi, linux-nvme
Thanks.
I've applied this after splitting the cleanup into a second patch
and rewriting the commit log to describe the changes a bit better.
In the future please try to split fixes and cleanups, or generally
separate changes yourself.
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] nvme-core: Fixing ctrl de-referencing in nvme_init_ctrl
2023-04-03 0:41 [PATCH] nvme-core: Fixing ctrl de-referencing in nvme_init_ctrl Irvin Cote
2023-04-03 10:22 ` Sagi Grimberg
2023-04-05 15:20 ` Christoph Hellwig
@ 2023-04-13 9:20 ` Chaitanya Kulkarni
2 siblings, 0 replies; 8+ messages in thread
From: Chaitanya Kulkarni @ 2023-04-13 9:20 UTC (permalink / raw)
To: Irvin Cote
Cc: kbusch@kernel.org, axboe@fb.com, hch@lst.de, sagi@grimberg.me,
linux-nvme@lists.infradead.org
On 4/2/23 17:41, Irvin Cote wrote:
> Correcting the de-reference count for
> the controller in the core layer to avoid
> a memory leak. Also cleaning a bit the
> teardown logic of nvme_init_ctrl.
>
> Signed-off-by: Irvin Cote <irvincoteg@gmail.com>
> ---
> drivers/nvme/host/core.c | 14 ++++++--------
> 1 file changed, 6 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index 8698410aeb84..3d7aca7d0f2a 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -5137,14 +5137,12 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
> BUILD_BUG_ON(NVME_DSM_MAX_RANGES * sizeof(struct nvme_dsm_range) >
> PAGE_SIZE);
> ctrl->discard_page = alloc_page(GFP_KERNEL);
> - if (!ctrl->discard_page) {
> - ret = -ENOMEM;
> - goto out;
> - }
> + if (!ctrl->discard_page)
> + return -ENOMEM;
>
> ret = ida_alloc(&nvme_instance_ida, GFP_KERNEL);
> if (ret < 0)
> - goto out;
> + goto out_free_page;
> ctrl->instance = ret;
>
> device_initialize(&ctrl->ctrl_device);
> @@ -5191,10 +5189,10 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
> nvme_put_ctrl(ctrl);
> kfree_const(ctrl->device->kobj.name);
> out_release_instance:
> + nvme_put_ctrl(ctrl);
> ida_free(&nvme_instance_ida, ctrl->instance);
> -out:
> - if (ctrl->discard_page)
> - __free_page(ctrl->discard_page);
> +out_free_page:
> + __free_page(ctrl->discard_page);
> return ret;
> }
> EXPORT_SYMBOL_GPL(nvme_init_ctrl);
blktests nvme/044 is failing, potential problem commit by just eye
inspection,
3250e769508b59b1ca54d9892bd8683afc684c86
("nvme: fix and error path leak in nvme_init_ctr") :-
<4>[ 549.551854] ------------[ cut here ]------------
<4>[ 549.551855] ida_free called for id=1 which is not allocated.
<4>[ 549.551870] WARNING: CPU: 18 PID: 3593 at lib/idr.c:525
ida_free+0xec/0x130
<4>[ 549.551917] CPU: 18 PID: 3593 Comm: nvme Tainted: G W
O N 6.3.0-rc2nvme+ #1
<4>[ 549.551919] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),
BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014
<4>[ 549.551920] RIP: 0010:ida_free+0xec/0x130
<4>[ 549.551926] RSP: 0018:ffffc900014abce0 EFLAGS: 00010286
<4>[ 549.551928] RAX: 0000000000000000 RBX: 0000000000000001 RCX:
0000000000000000
<4>[ 549.551932] RDX: 0000000000000002 RSI: ffffffff8272f833 RDI:
00000000ffffffff
<4>[ 549.551937] RBP: 0000000000000001 R08: ffff88983ff3cba8 R09:
00000000ffffbfff
<4>[ 549.551938] R10: ffff8897df0a0000 R11: ffff88983fedcbc0 R12:
0000000000000001
<4>[ 549.551939] R13: 0000000000000202 R14: ffff8881044000b0 R15:
ffff88810477c000
<4>[ 549.551942] FS: 00007f1cb198ab80(0000) GS:ffff8897df680000(0000)
knlGS:0000000000000000
<4>[ 549.551944] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
<4>[ 549.551945] CR2: 0000000000b7b0c8 CR3: 0000000162a1c000 CR4:
0000000000350ee0
<4>[ 549.551948] DR0: ffffffff84379428 DR1: ffffffff84379429 DR2:
ffffffff8437942a
<4>[ 549.551950] DR3: ffffffff8437942b DR6: 00000000ffff0ff0 DR7:
0000000000000600
<4>[ 549.551951] Call Trace:
<4>[ 549.551952] <TASK>
<4>[ 549.551955] nvme_init_ctrl+0x3a3/0x530 [nvme_core]
<4>[ 549.551970] nvme_loop_create_ctrl+0xa3/0x3f0 [nvme_loop]
<4>[ 549.551976] nvmf_dev_write+0x5db/0xe80 [nvme_fabrics]
<4>[ 549.551983] ? inode_security+0x22/0x60
<4>[ 549.551985] ? selinux_file_permission+0x108/0x150
<4>[ 549.551987] vfs_write+0xc5/0x3c0
<4>[ 549.551990] ? _raw_spin_unlock+0x15/0x30
<4>[ 549.551992] ? preempt_count_add+0x4d/0xa0
<4>[ 549.551994] ? fd_install+0x5c/0xe0
<4>[ 549.551997] ksys_write+0x5f/0xe0
<4>[ 549.551999] do_syscall_64+0x3b/0x90
<4>[ 549.552000] entry_SYSCALL_64_after_hwframe+0x72/0xdc
<4>[ 549.552002] RIP: 0033:0x7f1cb1aa07a7
<4>[ 549.552006] RSP: 002b:00007ffdc6b16898 EFLAGS: 00000246 ORIG_RAX:
0000000000000001
<4>[ 549.552007] RAX: ffffffffffffffda RBX: 0000000000b7a090 RCX:
00007f1cb1aa07a7
<4>[ 549.552008] RDX: 0000000000000139 RSI: 0000000000b7a090 RDI:
0000000000000004
<4>[ 549.552008] RBP: 0000000000000004 R08: 0000000000000139 R09:
0000000000b7a090
<4>[ 549.552009] R10: 00007f1cb19c4118 R11: 0000000000000246 R12:
0000000000b787e0
<4>[ 549.552010] R13: 0000000000000139 R14: 00007f1cb1bcd11d R15:
00007f1cb1bcd02b
<4>[ 549.552011] </TASK>
<4>[ 549.552014] ---[ end trace 0000000000000000 ]---
<6>[ 549.552021] nvme_init_ctrl 5202
<6>[ 549.552022] nvme_init_ctrl 5204
<4>[ 549.552022] page:00000000a1e1cf34 refcount:0 mapcount:0
mapping:0000000000000000 index:0x0 pfn:0x15975a
<4>[ 549.552025] flags:
0x17ffffc0000000(node=0|zone=2|lastcpupid=0x1fffff)
<4>[ 549.552027] raw: 0017ffffc0000000 ffffea0004889dc8
ffff8897df6b3158 0000000000000000
<4>[ 549.552028] raw: 0000000000000000 0000000000000000
00000000ffffffff 0000000000000000
<4>[ 549.552029] page dumped because:
VM_BUG_ON_PAGE(page_ref_count(page) == 0)
<4>[ 549.552033] ------------[ cut here ]------------
<2>[ 549.552034] kernel BUG at include/linux/mm.h:875!
[18]kdb>
* with the following fix its working :-
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 518c759346f0..b59570e50657 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -5157,7 +5157,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct
device *dev,
dev_set_drvdata(ctrl->device, ctrl);
ret = dev_set_name(ctrl->device, "nvme%d", ctrl->instance);
if (ret)
- goto out_put_ctrl;
+ goto out_release_instance;
nvme_get_ctrl(ctrl);
cdev_init(&ctrl->cdev, &nvme_dev_fops);
@@ -5186,8 +5186,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct
device *dev,
out_free_name:
nvme_put_ctrl(ctrl);
kfree_const(ctrl->device->kobj.name);
-out_put_ctrl:
- nvme_put_ctrl(ctrl);
+out_release_instance:
ida_free(&nvme_instance_ida, ctrl->instance);
out_free_page:
__free_page(ctrl->discard_page);
Perhaps we can drop original change for now and reconsider later ...
-ck
^ permalink raw reply related [flat|nested] 8+ messages in thread
end of thread, other threads:[~2023-04-13 9:20 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-04-03 0:41 [PATCH] nvme-core: Fixing ctrl de-referencing in nvme_init_ctrl Irvin Cote
2023-04-03 10:22 ` Sagi Grimberg
2023-04-03 18:27 ` irvin cote
2023-04-03 22:37 ` Sagi Grimberg
2023-04-04 21:18 ` irvin cote
2023-04-05 10:06 ` Maurizio Lombardi
2023-04-05 15:20 ` Christoph Hellwig
2023-04-13 9:20 ` Chaitanya Kulkarni
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox