* [PATCH rdma-rc] RDMA/mlx5: Don't access ib_qp fields in internal destroy QP path
@ 2020-06-17 13:01 Leon Romanovsky
2020-06-18 18:05 ` Jason Gunthorpe
0 siblings, 1 reply; 2+ messages in thread
From: Leon Romanovsky @ 2020-06-17 13:01 UTC (permalink / raw)
To: Doug Ledford, Jason Gunthorpe; +Cc: Leon Romanovsky, linux-rdma, Maor Gottlieb
From: Leon Romanovsky <leonro@mellanox.com>
destroy_qp_common is called for flows where QP is already created
by HW. While it is called from IB/core, the ibqp.* fields will be
fully initialized, but it is not the case if this function is called
during QP creation.
Don't rely on ibqp fields as much as possible and initialize
send_cq/recv_cq as temporal solution till all drivers will be
converted to IB/core QP allocation scheme.
------------[ cut here ]------------
refcount_t: underflow; use-after-free.
WARNING: CPU: 1 PID: 5372 at lib/refcount.c:28 refcount_warn_saturate+0xfe/0x1a0
Kernel panic - not syncing: panic_on_warn set ...
CPU: 1 PID: 5372 Comm: syz-executor.2 Not tainted 5.5.0-rc5 #2
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.1-0-ga5cab58e9a3f-prebuilt.qemu.org 04/01/2014
Call Trace:
dump_stack+0x94/0xce
panic+0x234/0x56f
__warn+0x1cc/0x1e1
? refcount_warn_saturate+0xfe/mlx: ERR-GENERAL [mlx5/mlx5_cmdif_cq.c:213] mlx5/mlx5_cmdif_cq.c:213:mlx5_opcode_CREATE_CQ: assertion failed (MLX5_ST_SZ_BYTES(create_cq_in) + (npas * MLX5_PHYSICAL_ADDRESS_SIZE) == in_size): (0x00000118 == 0x00000120)
report_bug+0x200/0x310
fixup_bug.part.11+0x32/0x80
do_error_trap+0xd3/0x100
do_invalid_op+0x31/0x40
invalid_op+0mlx: ERR-GENERAL [mlx5/mlx5_cmdif_cq.c:213] mlx5/mlx5_cmdif_cq.c:213:mlx5_opcode_CREATE_CQ: assertion failed (MLX5_ST_SZ_BYTES(create_cq_in) + (npas * MLX5_PHYSICAL_ADDRESS_SIZE) == in_size): (0x00000118 == 0x00000120)
RIP: 0010:refcount_warn_saturate+0xfe/0x1a0
Code: 0f 0b eb 9b e8 53 e2 6d ff 80 3d 8c c6 39 03 00 75 8d e8 45 e2 6d ff 48 c7 c7 80 07 15 84 c6 05 77 c6 39 03 01 e8 b2 44 49 ff <0f> 0b e9 6e ff ff ff e8 26 e2 6d ff 80 3d 62 c6 39 03 00 0f 85 5c
RSP: 0018:ffffc9000b5976f0 EFLAGS: 00010286
RAX: 0000000000000000 RBX: ffff88811478f134 RCX: ffffffff81248f69
RDX: 0000000000015258 RSI: ffffc900022b1000 RDI: ffff88811b1283cc
RBP: 0000000000000003 R08: ffffed10236260e3 R09: ffffed10236260e3
R13: 0000000000000246 R14: 0000000000000000 R15: 0000000000000000
mlx5_core_put_rsc+0x70/0x80
destroy_resource_common+0x8e/0xb0
mlx5_core_destroy_qp+0xaf/0x1d0
mlx5_ib_destroy_qp+0xeb0/0x1460
ib_destroy_qp_user+0x2d5/0x7d0
create_qp+0xed3/0x2130
ib_uverbs_create_qp+0x13e/0x190
? ib_uverbs_ex_create_qp+0mlx: ERR-GENERAL [mlx5/mlx5_cmdif_cq.c:213] mlx5/mlx5_cmdif_cq.c:213:mlx5_opcode_CREATE_CQ: assertion failed (MLX5_ST_SZ_BYTES(create_cq_in) + (npas * MLX5_PHYSICAL_ADDRESS_SIZE) == in_size): (0x00000118 == 0x00000120)
ib_uverbs_write+0xaa5/0xdf0
__vfs_write+0x7c/0x100
ksys_write+0xc8/0x200
do_syscall_64+0x9c/0x390
entry_SYSCALL_64_after_hwframe+0x44/0xa9
RIP: 0033:0x465b49
Code: f7 d8 64 89 02 b8 ff ff ff ff c3 66 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 mlx: ERR-GENERAL [mlx5/mlx5_cmdif_cq.c:213] mlx5/mlx5_cmdif_cq.c:213:mlx5_opcode_CREATE_CQ: assertion failed (MLX5_ST_SZ_BYTES(create_cq_in) + (npas * MLX5_PHYSICAL_ADDRESS_SIZE) == in_size): (0x00000118 == 0x00000120)
RSP: 002b:00007f0984e16c58 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
RAX: ffffffffffffffda RBX: 000000000073bf00 RCX: 0000000000465b49
RDX: 0000000000000068 RSI: 0000000020000380 RDI: 0000000000000004
RBP: 00007f0984e16c70 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 00007f0984e176bc
R13: 00000000004ca2ec R14: 000000000070ded0 R15: 0000000000000008
Dumping ftrace buffer:
(ftrace buffer empty)
Kernel Offset: disabled
Fixes: 08d53976609a ("RDMA/mlx5: Copy response to the user in one place")
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
drivers/infiniband/hw/mlx5/qp.c | 28 ++++++++++++++++++----------
1 file changed, 18 insertions(+), 10 deletions(-)
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 805d1c03938d..aff412b513ae 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -2341,18 +2341,18 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
unsigned long flags;
int err;
- if (qp->ibqp.rwq_ind_tbl) {
+ if (qp->is_rss) {
destroy_rss_raw_qp_tir(dev, qp);
return;
}
- base = (qp->ibqp.qp_type == IB_QPT_RAW_PACKET ||
+ base = (qp->type == IB_QPT_RAW_PACKET ||
qp->flags & IB_QP_CREATE_SOURCE_QPN) ?
- &qp->raw_packet_qp.rq.base :
- &qp->trans_qp.base;
+ &qp->raw_packet_qp.rq.base :
+ &qp->trans_qp.base;
if (qp->state != IB_QPS_RESET) {
- if (qp->ibqp.qp_type != IB_QPT_RAW_PACKET &&
+ if (qp->type != IB_QPT_RAW_PACKET &&
!(qp->flags & IB_QP_CREATE_SOURCE_QPN)) {
err = mlx5_core_qp_modify(dev, MLX5_CMD_OP_2RST_QP, 0,
NULL, &base->mqp, NULL);
@@ -2368,8 +2368,8 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
base->mqp.qpn);
}
- get_cqs(qp->ibqp.qp_type, qp->ibqp.send_cq, qp->ibqp.recv_cq,
- &send_cq, &recv_cq);
+ get_cqs(qp->type, qp->ibqp.send_cq, qp->ibqp.recv_cq, &send_cq,
+ &recv_cq);
spin_lock_irqsave(&dev->reset_flow_resource_lock, flags);
mlx5_ib_lock_cqs(send_cq, recv_cq);
@@ -2391,7 +2391,7 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
mlx5_ib_unlock_cqs(send_cq, recv_cq);
spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags);
- if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET ||
+ if (qp->type == IB_QPT_RAW_PACKET ||
qp->flags & IB_QP_CREATE_SOURCE_QPN) {
destroy_raw_packet_qp(dev, qp);
} else {
@@ -3002,10 +3002,18 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attr,
return &qp->ibqp;
destroy_qp:
- if (qp->type == MLX5_IB_QPT_DCT)
+ if (qp->type == MLX5_IB_QPT_DCT) {
mlx5_ib_destroy_dct(qp);
- else
+ } else {
+ /*
+ * The two lines below are temp solution till QP allocation
+ * will be moved to be under IB/core responsiblity.
+ */
+ qp->ibqp.send_cq = attr->send_cq;
+ qp->ibqp.recv_cq = attr->recv_cq;
destroy_qp_common(dev, qp, udata);
+ }
+
qp = NULL;
free_qp:
kfree(qp);
--
2.26.2
^ permalink raw reply related [flat|nested] 2+ messages in thread
* Re: [PATCH rdma-rc] RDMA/mlx5: Don't access ib_qp fields in internal destroy QP path
2020-06-17 13:01 [PATCH rdma-rc] RDMA/mlx5: Don't access ib_qp fields in internal destroy QP path Leon Romanovsky
@ 2020-06-18 18:05 ` Jason Gunthorpe
0 siblings, 0 replies; 2+ messages in thread
From: Jason Gunthorpe @ 2020-06-18 18:05 UTC (permalink / raw)
To: Leon Romanovsky; +Cc: Doug Ledford, Leon Romanovsky, linux-rdma, Maor Gottlieb
On Wed, Jun 17, 2020 at 04:01:48PM +0300, Leon Romanovsky wrote:
> From: Leon Romanovsky <leonro@mellanox.com>
>
> destroy_qp_common is called for flows where QP is already created
> by HW. While it is called from IB/core, the ibqp.* fields will be
> fully initialized, but it is not the case if this function is called
> during QP creation.
>
> Don't rely on ibqp fields as much as possible and initialize
> send_cq/recv_cq as temporal solution till all drivers will be
> converted to IB/core QP allocation scheme.
>
> ------------[ cut here ]------------
> refcount_t: underflow; use-after-free.
> WARNING: CPU: 1 PID: 5372 at lib/refcount.c:28 refcount_warn_saturate+0xfe/0x1a0
> Kernel panic - not syncing: panic_on_warn set ...
> CPU: 1 PID: 5372 Comm: syz-executor.2 Not tainted 5.5.0-rc5 #2
> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.1-0-ga5cab58e9a3f-prebuilt.qemu.org 04/01/2014
> Call Trace:
> dump_stack+0x94/0xce
> panic+0x234/0x56f
> __warn+0x1cc/0x1e1
> ? refcount_warn_saturate+0xfe/mlx: ERR-GENERAL [mlx5/mlx5_cmdif_cq.c:213] mlx5/mlx5_cmdif_cq.c:213:mlx5_opcode_CREATE_CQ: assertion failed (MLX5_ST_SZ_BYTES(create_cq_in) + (npas * MLX5_PHYSICAL_ADDRESS_SIZE) == in_size): (0x00000118 == 0x00000120)
> report_bug+0x200/0x310
> fixup_bug.part.11+0x32/0x80
> do_error_trap+0xd3/0x100
> do_invalid_op+0x31/0x40
> invalid_op+0mlx: ERR-GENERAL [mlx5/mlx5_cmdif_cq.c:213] mlx5/mlx5_cmdif_cq.c:213:mlx5_opcode_CREATE_CQ: assertion failed (MLX5_ST_SZ_BYTES(create_cq_in) + (npas * MLX5_PHYSICAL_ADDRESS_SIZE) == in_size): (0x00000118 == 0x00000120)
> RIP: 0010:refcount_warn_saturate+0xfe/0x1a0
> Code: 0f 0b eb 9b e8 53 e2 6d ff 80 3d 8c c6 39 03 00 75 8d e8 45 e2 6d ff 48 c7 c7 80 07 15 84 c6 05 77 c6 39 03 01 e8 b2 44 49 ff <0f> 0b e9 6e ff ff ff e8 26 e2 6d ff 80 3d 62 c6 39 03 00 0f 85 5c
> RSP: 0018:ffffc9000b5976f0 EFLAGS: 00010286
> RAX: 0000000000000000 RBX: ffff88811478f134 RCX: ffffffff81248f69
> RDX: 0000000000015258 RSI: ffffc900022b1000 RDI: ffff88811b1283cc
> RBP: 0000000000000003 R08: ffffed10236260e3 R09: ffffed10236260e3
> R13: 0000000000000246 R14: 0000000000000000 R15: 0000000000000000
> mlx5_core_put_rsc+0x70/0x80
> destroy_resource_common+0x8e/0xb0
> mlx5_core_destroy_qp+0xaf/0x1d0
> mlx5_ib_destroy_qp+0xeb0/0x1460
> ib_destroy_qp_user+0x2d5/0x7d0
> create_qp+0xed3/0x2130
> ib_uverbs_create_qp+0x13e/0x190
> ? ib_uverbs_ex_create_qp+0mlx: ERR-GENERAL [mlx5/mlx5_cmdif_cq.c:213] mlx5/mlx5_cmdif_cq.c:213:mlx5_opcode_CREATE_CQ: assertion failed (MLX5_ST_SZ_BYTES(create_cq_in) + (npas * MLX5_PHYSICAL_ADDRESS_SIZE) == in_size): (0x00000118 == 0x00000120)
> ib_uverbs_write+0xaa5/0xdf0
> __vfs_write+0x7c/0x100
> ksys_write+0xc8/0x200
> do_syscall_64+0x9c/0x390
> entry_SYSCALL_64_after_hwframe+0x44/0xa9
> RIP: 0033:0x465b49
> Code: f7 d8 64 89 02 b8 ff ff ff ff c3 66 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 mlx: ERR-GENERAL [mlx5/mlx5_cmdif_cq.c:213] mlx5/mlx5_cmdif_cq.c:213:mlx5_opcode_CREATE_CQ: assertion failed (MLX5_ST_SZ_BYTES(create_cq_in) + (npas * MLX5_PHYSICAL_ADDRESS_SIZE) == in_size): (0x00000118 == 0x00000120)
> RSP: 002b:00007f0984e16c58 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
> RAX: ffffffffffffffda RBX: 000000000073bf00 RCX: 0000000000465b49
> RDX: 0000000000000068 RSI: 0000000020000380 RDI: 0000000000000004
> RBP: 00007f0984e16c70 R08: 0000000000000000 R09: 0000000000000000
> R10: 0000000000000000 R11: 0000000000000246 R12: 00007f0984e176bc
> R13: 00000000004ca2ec R14: 000000000070ded0 R15: 0000000000000008
> Dumping ftrace buffer:
> (ftrace buffer empty)
> Kernel Offset: disabled
>
> Fixes: 08d53976609a ("RDMA/mlx5: Copy response to the user in one place")
> Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
> ---
> drivers/infiniband/hw/mlx5/qp.c | 28 ++++++++++++++++++----------
> 1 file changed, 18 insertions(+), 10 deletions(-)
applied to for-rc
Thanks,
Jason
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2020-06-18 18:05 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2020-06-17 13:01 [PATCH rdma-rc] RDMA/mlx5: Don't access ib_qp fields in internal destroy QP path Leon Romanovsky
2020-06-18 18:05 ` Jason Gunthorpe
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).