* [PATCH RESEND v2] RDMA/siw: Remove direct link to net_device
@ 2024-12-12 15:18 Bernard Metzler
2024-12-14 12:37 ` Zhu Yanjun
2024-12-19 10:19 ` Leon Romanovsky
0 siblings, 2 replies; 6+ messages in thread
From: Bernard Metzler @ 2024-12-12 15:18 UTC (permalink / raw)
To: linux-rdma
Cc: jgg, leon, linux-kernel, netdev, syzkaller-bugs, zyjzyj2000,
Bernard Metzler, syzbot+4b87489410b4efd181bf
Do not manage a per device direct link to net_device. Rely
on associated ib_devices net_device management, not doubling
the effort locally. A badly managed local link to net_device
was causing a 'KASAN: slab-use-after-free' exception during
siw_query_port() call.
Fixes: bdcf26bf9b3a ("rdma/siw: network and RDMA core interface")
Reported-by: syzbot+4b87489410b4efd181bf@syzkaller.appspotmail.com
Link: https://syzkaller.appspot.com/bug?extid=4b87489410b4efd181bf
Signed-off-by: Bernard Metzler <bmt@zurich.ibm.com>
---
drivers/infiniband/sw/siw/siw.h | 7 +++---
drivers/infiniband/sw/siw/siw_cm.c | 31 +++++++++++++++++++-----
drivers/infiniband/sw/siw/siw_main.c | 15 +-----------
drivers/infiniband/sw/siw/siw_verbs.c | 35 ++++++++++++++++++---------
4 files changed, 53 insertions(+), 35 deletions(-)
diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h
index 86d4d6a2170e..ea5eee50dc39 100644
--- a/drivers/infiniband/sw/siw/siw.h
+++ b/drivers/infiniband/sw/siw/siw.h
@@ -46,6 +46,9 @@
*/
#define SIW_IRQ_MAXBURST_SQ_ACTIVE 4
+/* There is always only a port 1 per siw device */
+#define SIW_PORT 1
+
struct siw_dev_cap {
int max_qp;
int max_qp_wr;
@@ -69,16 +72,12 @@ struct siw_pd {
struct siw_device {
struct ib_device base_dev;
- struct net_device *netdev;
struct siw_dev_cap attrs;
u32 vendor_part_id;
int numa_node;
char raw_gid[ETH_ALEN];
- /* physical port state (only one port per device) */
- enum ib_port_state state;
-
spinlock_t lock;
struct xarray qp_xa;
diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c
index 86323918a570..b157bd01e70b 100644
--- a/drivers/infiniband/sw/siw/siw_cm.c
+++ b/drivers/infiniband/sw/siw/siw_cm.c
@@ -1759,6 +1759,7 @@ int siw_create_listen(struct iw_cm_id *id, int backlog)
{
struct socket *s;
struct siw_cep *cep = NULL;
+ struct net_device *ndev = NULL;
struct siw_device *sdev = to_siw_dev(id->device);
int addr_family = id->local_addr.ss_family;
int rv = 0;
@@ -1779,9 +1780,15 @@ int siw_create_listen(struct iw_cm_id *id, int backlog)
struct sockaddr_in *laddr = &to_sockaddr_in(id->local_addr);
/* For wildcard addr, limit binding to current device only */
- if (ipv4_is_zeronet(laddr->sin_addr.s_addr))
- s->sk->sk_bound_dev_if = sdev->netdev->ifindex;
-
+ if (ipv4_is_zeronet(laddr->sin_addr.s_addr)) {
+ ndev = ib_device_get_netdev(id->device, SIW_PORT);
+ if (ndev) {
+ s->sk->sk_bound_dev_if = ndev->ifindex;
+ } else {
+ rv = -ENODEV;
+ goto error;
+ }
+ }
rv = s->ops->bind(s, (struct sockaddr *)laddr,
sizeof(struct sockaddr_in));
} else {
@@ -1797,9 +1804,15 @@ int siw_create_listen(struct iw_cm_id *id, int backlog)
}
/* For wildcard addr, limit binding to current device only */
- if (ipv6_addr_any(&laddr->sin6_addr))
- s->sk->sk_bound_dev_if = sdev->netdev->ifindex;
-
+ if (ipv6_addr_any(&laddr->sin6_addr)) {
+ ndev = ib_device_get_netdev(id->device, SIW_PORT);
+ if (ndev) {
+ s->sk->sk_bound_dev_if = ndev->ifindex;
+ } else {
+ rv = -ENODEV;
+ goto error;
+ }
+ }
rv = s->ops->bind(s, (struct sockaddr *)laddr,
sizeof(struct sockaddr_in6));
}
@@ -1861,6 +1874,9 @@ int siw_create_listen(struct iw_cm_id *id, int backlog)
list_add_tail(&cep->listenq, (struct list_head *)id->provider_data);
cep->state = SIW_EPSTATE_LISTENING;
+ if (ndev)
+ dev_put(ndev);
+
siw_dbg(id->device, "Listen at laddr %pISp\n", &id->local_addr);
return 0;
@@ -1880,6 +1896,9 @@ int siw_create_listen(struct iw_cm_id *id, int backlog)
}
sock_release(s);
+ if (ndev)
+ dev_put(ndev);
+
return rv;
}
diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c
index 17abef48abcd..14d3103aee6f 100644
--- a/drivers/infiniband/sw/siw/siw_main.c
+++ b/drivers/infiniband/sw/siw/siw_main.c
@@ -287,7 +287,6 @@ static struct siw_device *siw_device_create(struct net_device *netdev)
return NULL;
base_dev = &sdev->base_dev;
- sdev->netdev = netdev;
if (netdev->addr_len) {
memcpy(sdev->raw_gid, netdev->dev_addr,
@@ -381,12 +380,10 @@ static int siw_netdev_event(struct notifier_block *nb, unsigned long event,
switch (event) {
case NETDEV_UP:
- sdev->state = IB_PORT_ACTIVE;
siw_port_event(sdev, 1, IB_EVENT_PORT_ACTIVE);
break;
case NETDEV_DOWN:
- sdev->state = IB_PORT_DOWN;
siw_port_event(sdev, 1, IB_EVENT_PORT_ERR);
break;
@@ -407,12 +404,8 @@ static int siw_netdev_event(struct notifier_block *nb, unsigned long event,
siw_port_event(sdev, 1, IB_EVENT_LID_CHANGE);
break;
/*
- * Todo: Below netdev events are currently not handled.
+ * All other events are not handled
*/
- case NETDEV_CHANGEMTU:
- case NETDEV_CHANGE:
- break;
-
default:
break;
}
@@ -442,12 +435,6 @@ static int siw_newlink(const char *basedev_name, struct net_device *netdev)
sdev = siw_device_create(netdev);
if (sdev) {
dev_dbg(&netdev->dev, "siw: new device\n");
-
- if (netif_running(netdev) && netif_carrier_ok(netdev))
- sdev->state = IB_PORT_ACTIVE;
- else
- sdev->state = IB_PORT_DOWN;
-
ib_mark_name_assigned_by_user(&sdev->base_dev);
rv = siw_device_register(sdev, basedev_name);
if (rv)
diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c
index 986666c19378..7ca0297d68a4 100644
--- a/drivers/infiniband/sw/siw/siw_verbs.c
+++ b/drivers/infiniband/sw/siw/siw_verbs.c
@@ -171,21 +171,29 @@ int siw_query_device(struct ib_device *base_dev, struct ib_device_attr *attr,
int siw_query_port(struct ib_device *base_dev, u32 port,
struct ib_port_attr *attr)
{
- struct siw_device *sdev = to_siw_dev(base_dev);
+ struct net_device *ndev;
int rv;
memset(attr, 0, sizeof(*attr));
rv = ib_get_eth_speed(base_dev, port, &attr->active_speed,
&attr->active_width);
+ if (rv)
+ return rv;
+
+ ndev = ib_device_get_netdev(base_dev, SIW_PORT);
+ if (!ndev)
+ return -ENODEV;
+
attr->gid_tbl_len = 1;
attr->max_msg_sz = -1;
- attr->max_mtu = ib_mtu_int_to_enum(sdev->netdev->mtu);
- attr->active_mtu = ib_mtu_int_to_enum(sdev->netdev->mtu);
- attr->phys_state = sdev->state == IB_PORT_ACTIVE ?
+ attr->max_mtu = ib_mtu_int_to_enum(ndev->max_mtu);
+ attr->active_mtu = ib_mtu_int_to_enum(READ_ONCE(ndev->mtu));
+ attr->phys_state = (netif_running(ndev) && netif_carrier_ok(ndev)) ?
IB_PORT_PHYS_STATE_LINK_UP : IB_PORT_PHYS_STATE_DISABLED;
+ attr->state = attr->phys_state == IB_PORT_PHYS_STATE_LINK_UP ?
+ IB_PORT_ACTIVE : IB_PORT_DOWN;
attr->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_DEVICE_MGMT_SUP;
- attr->state = sdev->state;
/*
* All zero
*
@@ -199,6 +207,7 @@ int siw_query_port(struct ib_device *base_dev, u32 port,
* attr->subnet_timeout = 0;
* attr->init_type_repy = 0;
*/
+ dev_put(ndev);
return rv;
}
@@ -505,21 +514,24 @@ int siw_query_qp(struct ib_qp *base_qp, struct ib_qp_attr *qp_attr,
int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
{
struct siw_qp *qp;
- struct siw_device *sdev;
+ struct net_device *ndev;
- if (base_qp && qp_attr && qp_init_attr) {
+ if (base_qp && qp_attr && qp_init_attr)
qp = to_siw_qp(base_qp);
- sdev = to_siw_dev(base_qp->device);
- } else {
+ else
return -EINVAL;
- }
+
+ ndev = ib_device_get_netdev(base_qp->device, SIW_PORT);
+ if (!ndev)
+ return -ENODEV;
+
qp_attr->qp_state = siw_qp_state_to_ib_qp_state[qp->attrs.state];
qp_attr->cap.max_inline_data = SIW_MAX_INLINE;
qp_attr->cap.max_send_wr = qp->attrs.sq_size;
qp_attr->cap.max_send_sge = qp->attrs.sq_max_sges;
qp_attr->cap.max_recv_wr = qp->attrs.rq_size;
qp_attr->cap.max_recv_sge = qp->attrs.rq_max_sges;
- qp_attr->path_mtu = ib_mtu_int_to_enum(sdev->netdev->mtu);
+ qp_attr->path_mtu = ib_mtu_int_to_enum(READ_ONCE(ndev->mtu));
qp_attr->max_rd_atomic = qp->attrs.irq_size;
qp_attr->max_dest_rd_atomic = qp->attrs.orq_size;
@@ -534,6 +546,7 @@ int siw_query_qp(struct ib_qp *base_qp, struct ib_qp_attr *qp_attr,
qp_init_attr->cap = qp_attr->cap;
+ dev_put(ndev);
return 0;
}
--
2.38.1
^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [PATCH RESEND v2] RDMA/siw: Remove direct link to net_device
2024-12-12 15:18 [PATCH RESEND v2] RDMA/siw: Remove direct link to net_device Bernard Metzler
@ 2024-12-14 12:37 ` Zhu Yanjun
2024-12-15 17:37 ` Zhu Yanjun
2024-12-19 10:19 ` Leon Romanovsky
1 sibling, 1 reply; 6+ messages in thread
From: Zhu Yanjun @ 2024-12-14 12:37 UTC (permalink / raw)
To: Bernard Metzler, linux-rdma
Cc: jgg, leon, linux-kernel, netdev, syzkaller-bugs, zyjzyj2000,
syzbot+4b87489410b4efd181bf
在 2024/12/12 16:18, Bernard Metzler 写道:
> Do not manage a per device direct link to net_device. Rely
> on associated ib_devices net_device management, not doubling
> the effort locally. A badly managed local link to net_device
> was causing a 'KASAN: slab-use-after-free' exception during
> siw_query_port() call.
>
> Fixes: bdcf26bf9b3a ("rdma/siw: network and RDMA core interface")
> Reported-by: syzbot+4b87489410b4efd181bf@syzkaller.appspotmail.com
> Link: https://syzkaller.appspot.com/bug?extid=4b87489410b4efd181bf
> Signed-off-by: Bernard Metzler <bmt@zurich.ibm.com>
> ---
> drivers/infiniband/sw/siw/siw.h | 7 +++---
> drivers/infiniband/sw/siw/siw_cm.c | 31 +++++++++++++++++++-----
> drivers/infiniband/sw/siw/siw_main.c | 15 +-----------
> drivers/infiniband/sw/siw/siw_verbs.c | 35 ++++++++++++++++++---------
> 4 files changed, 53 insertions(+), 35 deletions(-)
>
> diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h
> index 86d4d6a2170e..ea5eee50dc39 100644
> --- a/drivers/infiniband/sw/siw/siw.h
> +++ b/drivers/infiniband/sw/siw/siw.h
> @@ -46,6 +46,9 @@
> */
> #define SIW_IRQ_MAXBURST_SQ_ACTIVE 4
>
> +/* There is always only a port 1 per siw device */
> +#define SIW_PORT 1
> +
> struct siw_dev_cap {
> int max_qp;
> int max_qp_wr;
> @@ -69,16 +72,12 @@ struct siw_pd {
>
> struct siw_device {
> struct ib_device base_dev;
> - struct net_device *netdev;
> struct siw_dev_cap attrs;
>
> u32 vendor_part_id;
> int numa_node;
> char raw_gid[ETH_ALEN];
>
> - /* physical port state (only one port per device) */
> - enum ib_port_state state;
> -
> spinlock_t lock;
>
> struct xarray qp_xa;
> diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c
> index 86323918a570..b157bd01e70b 100644
> --- a/drivers/infiniband/sw/siw/siw_cm.c
> +++ b/drivers/infiniband/sw/siw/siw_cm.c
> @@ -1759,6 +1759,7 @@ int siw_create_listen(struct iw_cm_id *id, int backlog)
> {
> struct socket *s;
> struct siw_cep *cep = NULL;
> + struct net_device *ndev = NULL;
> struct siw_device *sdev = to_siw_dev(id->device);
> int addr_family = id->local_addr.ss_family;
> int rv = 0;
> @@ -1779,9 +1780,15 @@ int siw_create_listen(struct iw_cm_id *id, int backlog)
> struct sockaddr_in *laddr = &to_sockaddr_in(id->local_addr);
>
> /* For wildcard addr, limit binding to current device only */
> - if (ipv4_is_zeronet(laddr->sin_addr.s_addr))
> - s->sk->sk_bound_dev_if = sdev->netdev->ifindex;
> -
> + if (ipv4_is_zeronet(laddr->sin_addr.s_addr)) {
> + ndev = ib_device_get_netdev(id->device, SIW_PORT);
> + if (ndev) {
> + s->sk->sk_bound_dev_if = ndev->ifindex;
> + } else {
> + rv = -ENODEV;
> + goto error;
> + }
> + }
> rv = s->ops->bind(s, (struct sockaddr *)laddr,
> sizeof(struct sockaddr_in));
> } else {
> @@ -1797,9 +1804,15 @@ int siw_create_listen(struct iw_cm_id *id, int backlog)
> }
>
> /* For wildcard addr, limit binding to current device only */
> - if (ipv6_addr_any(&laddr->sin6_addr))
> - s->sk->sk_bound_dev_if = sdev->netdev->ifindex;
> -
> + if (ipv6_addr_any(&laddr->sin6_addr)) {
> + ndev = ib_device_get_netdev(id->device, SIW_PORT);
> + if (ndev) {
> + s->sk->sk_bound_dev_if = ndev->ifindex;
> + } else {
> + rv = -ENODEV;
> + goto error;
> + }
> + }
> rv = s->ops->bind(s, (struct sockaddr *)laddr,
> sizeof(struct sockaddr_in6));
> }
> @@ -1861,6 +1874,9 @@ int siw_create_listen(struct iw_cm_id *id, int backlog)
> list_add_tail(&cep->listenq, (struct list_head *)id->provider_data);
> cep->state = SIW_EPSTATE_LISTENING;
>
> + if (ndev)
> + dev_put(ndev);
> +
<...>
> siw_dbg(id->device, "Listen at laddr %pISp\n", &id->local_addr);
>
> return 0;
> @@ -1880,6 +1896,9 @@ int siw_create_listen(struct iw_cm_id *id, int backlog)
> }
> sock_release(s);
>
> + if (ndev)
> + dev_put(ndev);
> +
dev_put will invoke netdev_put. In netdev_put, dev is checked.
Thus, no need to check ndev before dev_put function?
static inline void netdev_put(struct net_device *dev,
netdevice_tracker *tracker)
{
if (dev) {
netdev_tracker_free(dev, tracker);
__dev_put(dev);
}
}
> return rv;
> }
>
> diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c
> index 17abef48abcd..14d3103aee6f 100644
> --- a/drivers/infiniband/sw/siw/siw_main.c
> +++ b/drivers/infiniband/sw/siw/siw_main.c
> @@ -287,7 +287,6 @@ static struct siw_device *siw_device_create(struct net_device *netdev)
> return NULL;
>
> base_dev = &sdev->base_dev;
> - sdev->netdev = netdev;
>
> if (netdev->addr_len) {
> memcpy(sdev->raw_gid, netdev->dev_addr,
> @@ -381,12 +380,10 @@ static int siw_netdev_event(struct notifier_block *nb, unsigned long event,
>
> switch (event) {
> case NETDEV_UP:
> - sdev->state = IB_PORT_ACTIVE;
> siw_port_event(sdev, 1, IB_EVENT_PORT_ACTIVE);
> break;
>
> case NETDEV_DOWN:
> - sdev->state = IB_PORT_DOWN;
> siw_port_event(sdev, 1, IB_EVENT_PORT_ERR);
> break;
>
> @@ -407,12 +404,8 @@ static int siw_netdev_event(struct notifier_block *nb, unsigned long event,
> siw_port_event(sdev, 1, IB_EVENT_LID_CHANGE);
> break;
> /*
> - * Todo: Below netdev events are currently not handled.
> + * All other events are not handled
> */
> - case NETDEV_CHANGEMTU:
> - case NETDEV_CHANGE:
> - break;
> -
> default:
> break;
> }
> @@ -442,12 +435,6 @@ static int siw_newlink(const char *basedev_name, struct net_device *netdev)
> sdev = siw_device_create(netdev);
> if (sdev) {
> dev_dbg(&netdev->dev, "siw: new device\n");
> -
> - if (netif_running(netdev) && netif_carrier_ok(netdev))
> - sdev->state = IB_PORT_ACTIVE;
> - else
> - sdev->state = IB_PORT_DOWN;
> -
> ib_mark_name_assigned_by_user(&sdev->base_dev);
> rv = siw_device_register(sdev, basedev_name);
> if (rv)
> diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c
> index 986666c19378..7ca0297d68a4 100644
> --- a/drivers/infiniband/sw/siw/siw_verbs.c
> +++ b/drivers/infiniband/sw/siw/siw_verbs.c
> @@ -171,21 +171,29 @@ int siw_query_device(struct ib_device *base_dev, struct ib_device_attr *attr,
> int siw_query_port(struct ib_device *base_dev, u32 port,
> struct ib_port_attr *attr)
> {
> - struct siw_device *sdev = to_siw_dev(base_dev);
> + struct net_device *ndev;
> int rv;
>
> memset(attr, 0, sizeof(*attr));
>
> rv = ib_get_eth_speed(base_dev, port, &attr->active_speed,
> &attr->active_width);
> + if (rv)
> + return rv;
> +
> + ndev = ib_device_get_netdev(base_dev, SIW_PORT);
> + if (!ndev)
> + return -ENODEV;
> +
> attr->gid_tbl_len = 1;
> attr->max_msg_sz = -1;
> - attr->max_mtu = ib_mtu_int_to_enum(sdev->netdev->mtu);
> - attr->active_mtu = ib_mtu_int_to_enum(sdev->netdev->mtu);
> - attr->phys_state = sdev->state == IB_PORT_ACTIVE ?
> + attr->max_mtu = ib_mtu_int_to_enum(ndev->max_mtu);
> + attr->active_mtu = ib_mtu_int_to_enum(READ_ONCE(ndev->mtu));
> + attr->phys_state = (netif_running(ndev) && netif_carrier_ok(ndev)) ?
> IB_PORT_PHYS_STATE_LINK_UP : IB_PORT_PHYS_STATE_DISABLED;
> + attr->state = attr->phys_state == IB_PORT_PHYS_STATE_LINK_UP ?
> + IB_PORT_ACTIVE : IB_PORT_DOWN;
> attr->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_DEVICE_MGMT_SUP;
> - attr->state = sdev->state;
> /*
> * All zero
> *
> @@ -199,6 +207,7 @@ int siw_query_port(struct ib_device *base_dev, u32 port,
> * attr->subnet_timeout = 0;
> * attr->init_type_repy = 0;
> */
> + dev_put(ndev);
> return rv;
> }
>
> @@ -505,21 +514,24 @@ int siw_query_qp(struct ib_qp *base_qp, struct ib_qp_attr *qp_attr,
> int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
> {
> struct siw_qp *qp;
> - struct siw_device *sdev;
> + struct net_device *ndev;
>
> - if (base_qp && qp_attr && qp_init_attr) {
> + if (base_qp && qp_attr && qp_init_attr)
> qp = to_siw_qp(base_qp);
> - sdev = to_siw_dev(base_qp->device);
> - } else {
> + else
> return -EINVAL;
> - }
> +
> + ndev = ib_device_get_netdev(base_qp->device, SIW_PORT);
> + if (!ndev)
> + return -ENODEV;
> +
> qp_attr->qp_state = siw_qp_state_to_ib_qp_state[qp->attrs.state];
> qp_attr->cap.max_inline_data = SIW_MAX_INLINE;
> qp_attr->cap.max_send_wr = qp->attrs.sq_size;
> qp_attr->cap.max_send_sge = qp->attrs.sq_max_sges;
> qp_attr->cap.max_recv_wr = qp->attrs.rq_size;
> qp_attr->cap.max_recv_sge = qp->attrs.rq_max_sges;
> - qp_attr->path_mtu = ib_mtu_int_to_enum(sdev->netdev->mtu);
> + qp_attr->path_mtu = ib_mtu_int_to_enum(READ_ONCE(ndev->mtu));
> qp_attr->max_rd_atomic = qp->attrs.irq_size;
> qp_attr->max_dest_rd_atomic = qp->attrs.orq_size;
>
> @@ -534,6 +546,7 @@ int siw_query_qp(struct ib_qp *base_qp, struct ib_qp_attr *qp_attr,
>
> qp_init_attr->cap = qp_attr->cap;
>
> + dev_put(ndev);
> return 0;
> }
>
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH RESEND v2] RDMA/siw: Remove direct link to net_device
2024-12-14 12:37 ` Zhu Yanjun
@ 2024-12-15 17:37 ` Zhu Yanjun
2024-12-16 12:31 ` Bernard Metzler
0 siblings, 1 reply; 6+ messages in thread
From: Zhu Yanjun @ 2024-12-15 17:37 UTC (permalink / raw)
To: Bernard Metzler, linux-rdma
Cc: jgg, leon, linux-kernel, netdev, syzkaller-bugs, zyjzyj2000,
syzbot+4b87489410b4efd181bf
在 2024/12/14 13:37, Zhu Yanjun 写道:
> 在 2024/12/12 16:18, Bernard Metzler 写道:
>> Do not manage a per device direct link to net_device. Rely
>> on associated ib_devices net_device management, not doubling
>> the effort locally. A badly managed local link to net_device
>> was causing a 'KASAN: slab-use-after-free' exception during
>> siw_query_port() call.
>>
>> Fixes: bdcf26bf9b3a ("rdma/siw: network and RDMA core interface")
>> Reported-by: syzbot+4b87489410b4efd181bf@syzkaller.appspotmail.com
>> Link: https://syzkaller.appspot.com/bug?extid=4b87489410b4efd181bf
>> Signed-off-by: Bernard Metzler <bmt@zurich.ibm.com>
>> ---
>> drivers/infiniband/sw/siw/siw.h | 7 +++---
>> drivers/infiniband/sw/siw/siw_cm.c | 31 +++++++++++++++++++-----
>> drivers/infiniband/sw/siw/siw_main.c | 15 +-----------
>> drivers/infiniband/sw/siw/siw_verbs.c | 35 ++++++++++++++++++---------
>> 4 files changed, 53 insertions(+), 35 deletions(-)
>>
>> diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/
>> siw/siw.h
>> index 86d4d6a2170e..ea5eee50dc39 100644
>> --- a/drivers/infiniband/sw/siw/siw.h
>> +++ b/drivers/infiniband/sw/siw/siw.h
>> @@ -46,6 +46,9 @@
>> */
>> #define SIW_IRQ_MAXBURST_SQ_ACTIVE 4
>> +/* There is always only a port 1 per siw device */
>> +#define SIW_PORT 1
>> +
>> struct siw_dev_cap {
>> int max_qp;
>> int max_qp_wr;
>> @@ -69,16 +72,12 @@ struct siw_pd {
>> struct siw_device {
>> struct ib_device base_dev;
>> - struct net_device *netdev;
>> struct siw_dev_cap attrs;
>> u32 vendor_part_id;
>> int numa_node;
>> char raw_gid[ETH_ALEN];
>> - /* physical port state (only one port per device) */
>> - enum ib_port_state state;
>> -
>> spinlock_t lock;
>> struct xarray qp_xa;
>> diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/
>> sw/siw/siw_cm.c
>> index 86323918a570..b157bd01e70b 100644
>> --- a/drivers/infiniband/sw/siw/siw_cm.c
>> +++ b/drivers/infiniband/sw/siw/siw_cm.c
>> @@ -1759,6 +1759,7 @@ int siw_create_listen(struct iw_cm_id *id, int
>> backlog)
>> {
>> struct socket *s;
>> struct siw_cep *cep = NULL;
>> + struct net_device *ndev = NULL;
>> struct siw_device *sdev = to_siw_dev(id->device);
>> int addr_family = id->local_addr.ss_family;
>> int rv = 0;
>> @@ -1779,9 +1780,15 @@ int siw_create_listen(struct iw_cm_id *id, int
>> backlog)
>> struct sockaddr_in *laddr = &to_sockaddr_in(id->local_addr);
>> /* For wildcard addr, limit binding to current device only */
>> - if (ipv4_is_zeronet(laddr->sin_addr.s_addr))
>> - s->sk->sk_bound_dev_if = sdev->netdev->ifindex;
>> -
>> + if (ipv4_is_zeronet(laddr->sin_addr.s_addr)) {
>> + ndev = ib_device_get_netdev(id->device, SIW_PORT);
>> + if (ndev) {
>> + s->sk->sk_bound_dev_if = ndev->ifindex;
>> + } else {
>> + rv = -ENODEV;
>> + goto error;
>> + }
>> + }
>> rv = s->ops->bind(s, (struct sockaddr *)laddr,
>> sizeof(struct sockaddr_in));
>> } else {
>> @@ -1797,9 +1804,15 @@ int siw_create_listen(struct iw_cm_id *id, int
>> backlog)
>> }
>> /* For wildcard addr, limit binding to current device only */
>> - if (ipv6_addr_any(&laddr->sin6_addr))
>> - s->sk->sk_bound_dev_if = sdev->netdev->ifindex;
>> -
>> + if (ipv6_addr_any(&laddr->sin6_addr)) {
>> + ndev = ib_device_get_netdev(id->device, SIW_PORT);
>> + if (ndev) {
>> + s->sk->sk_bound_dev_if = ndev->ifindex;
>> + } else {
>> + rv = -ENODEV;
>> + goto error;
>> + }
>> + }
>> rv = s->ops->bind(s, (struct sockaddr *)laddr,
>> sizeof(struct sockaddr_in6));
>> }
>> @@ -1861,6 +1874,9 @@ int siw_create_listen(struct iw_cm_id *id, int
>> backlog)
>> list_add_tail(&cep->listenq, (struct list_head *)id-
>> >provider_data);
>> cep->state = SIW_EPSTATE_LISTENING;
>> + if (ndev)
>> + dev_put(ndev);
>> +
>
> <...>
>
>> siw_dbg(id->device, "Listen at laddr %pISp\n", &id->local_addr);
>> return 0;
>> @@ -1880,6 +1896,9 @@ int siw_create_listen(struct iw_cm_id *id, int
>> backlog)
>> }
>> sock_release(s);
>> + if (ndev)
>> + dev_put(ndev);
>> +
>
> dev_put will invoke netdev_put. In netdev_put, dev is checked.
> Thus, no need to check ndev before dev_put function?
Reviewed-by: Zhu Yanjun <yanjun.zhu@linux.dev>
Zhu Yanjun
>
> static inline void netdev_put(struct net_device *dev,
> netdevice_tracker *tracker)
> {
> if (dev) {
> netdev_tracker_free(dev, tracker);
> __dev_put(dev);
> }
> }
>
>> return rv;
>> }
>> diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/
>> infiniband/sw/siw/siw_main.c
>> index 17abef48abcd..14d3103aee6f 100644
>> --- a/drivers/infiniband/sw/siw/siw_main.c
>> +++ b/drivers/infiniband/sw/siw/siw_main.c
>> @@ -287,7 +287,6 @@ static struct siw_device *siw_device_create(struct
>> net_device *netdev)
>> return NULL;
>> base_dev = &sdev->base_dev;
>> - sdev->netdev = netdev;
>> if (netdev->addr_len) {
>> memcpy(sdev->raw_gid, netdev->dev_addr,
>> @@ -381,12 +380,10 @@ static int siw_netdev_event(struct
>> notifier_block *nb, unsigned long event,
>> switch (event) {
>> case NETDEV_UP:
>> - sdev->state = IB_PORT_ACTIVE;
>> siw_port_event(sdev, 1, IB_EVENT_PORT_ACTIVE);
>> break;
>> case NETDEV_DOWN:
>> - sdev->state = IB_PORT_DOWN;
>> siw_port_event(sdev, 1, IB_EVENT_PORT_ERR);
>> break;
>> @@ -407,12 +404,8 @@ static int siw_netdev_event(struct notifier_block
>> *nb, unsigned long event,
>> siw_port_event(sdev, 1, IB_EVENT_LID_CHANGE);
>> break;
>> /*
>> - * Todo: Below netdev events are currently not handled.
>> + * All other events are not handled
>> */
>> - case NETDEV_CHANGEMTU:
>> - case NETDEV_CHANGE:
>> - break;
>> -
>> default:
>> break;
>> }
>> @@ -442,12 +435,6 @@ static int siw_newlink(const char *basedev_name,
>> struct net_device *netdev)
>> sdev = siw_device_create(netdev);
>> if (sdev) {
>> dev_dbg(&netdev->dev, "siw: new device\n");
>> -
>> - if (netif_running(netdev) && netif_carrier_ok(netdev))
>> - sdev->state = IB_PORT_ACTIVE;
>> - else
>> - sdev->state = IB_PORT_DOWN;
>> -
>> ib_mark_name_assigned_by_user(&sdev->base_dev);
>> rv = siw_device_register(sdev, basedev_name);
>> if (rv)
>> diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/
>> infiniband/sw/siw/siw_verbs.c
>> index 986666c19378..7ca0297d68a4 100644
>> --- a/drivers/infiniband/sw/siw/siw_verbs.c
>> +++ b/drivers/infiniband/sw/siw/siw_verbs.c
>> @@ -171,21 +171,29 @@ int siw_query_device(struct ib_device *base_dev,
>> struct ib_device_attr *attr,
>> int siw_query_port(struct ib_device *base_dev, u32 port,
>> struct ib_port_attr *attr)
>> {
>> - struct siw_device *sdev = to_siw_dev(base_dev);
>> + struct net_device *ndev;
>> int rv;
>> memset(attr, 0, sizeof(*attr));
>> rv = ib_get_eth_speed(base_dev, port, &attr->active_speed,
>> &attr->active_width);
>> + if (rv)
>> + return rv;
>> +
>> + ndev = ib_device_get_netdev(base_dev, SIW_PORT);
>> + if (!ndev)
>> + return -ENODEV;
>> +
>> attr->gid_tbl_len = 1;
>> attr->max_msg_sz = -1;
>> - attr->max_mtu = ib_mtu_int_to_enum(sdev->netdev->mtu);
>> - attr->active_mtu = ib_mtu_int_to_enum(sdev->netdev->mtu);
>> - attr->phys_state = sdev->state == IB_PORT_ACTIVE ?
>> + attr->max_mtu = ib_mtu_int_to_enum(ndev->max_mtu);
>> + attr->active_mtu = ib_mtu_int_to_enum(READ_ONCE(ndev->mtu));
>> + attr->phys_state = (netif_running(ndev) && netif_carrier_ok(ndev)) ?
>> IB_PORT_PHYS_STATE_LINK_UP : IB_PORT_PHYS_STATE_DISABLED;
>> + attr->state = attr->phys_state == IB_PORT_PHYS_STATE_LINK_UP ?
>> + IB_PORT_ACTIVE : IB_PORT_DOWN;
>> attr->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_DEVICE_MGMT_SUP;
>> - attr->state = sdev->state;
>> /*
>> * All zero
>> *
>> @@ -199,6 +207,7 @@ int siw_query_port(struct ib_device *base_dev, u32
>> port,
>> * attr->subnet_timeout = 0;
>> * attr->init_type_repy = 0;
>> */
>> + dev_put(ndev);
>> return rv;
>> }
>> @@ -505,21 +514,24 @@ int siw_query_qp(struct ib_qp *base_qp, struct
>> ib_qp_attr *qp_attr,
>> int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
>> {
>> struct siw_qp *qp;
>> - struct siw_device *sdev;
>> + struct net_device *ndev;
>> - if (base_qp && qp_attr && qp_init_attr) {
>> + if (base_qp && qp_attr && qp_init_attr)
>> qp = to_siw_qp(base_qp);
>> - sdev = to_siw_dev(base_qp->device);
>> - } else {
>> + else
>> return -EINVAL;
>> - }
>> +
>> + ndev = ib_device_get_netdev(base_qp->device, SIW_PORT);
>> + if (!ndev)
>> + return -ENODEV;
>> +
>> qp_attr->qp_state = siw_qp_state_to_ib_qp_state[qp->attrs.state];
>> qp_attr->cap.max_inline_data = SIW_MAX_INLINE;
>> qp_attr->cap.max_send_wr = qp->attrs.sq_size;
>> qp_attr->cap.max_send_sge = qp->attrs.sq_max_sges;
>> qp_attr->cap.max_recv_wr = qp->attrs.rq_size;
>> qp_attr->cap.max_recv_sge = qp->attrs.rq_max_sges;
>> - qp_attr->path_mtu = ib_mtu_int_to_enum(sdev->netdev->mtu);
>> + qp_attr->path_mtu = ib_mtu_int_to_enum(READ_ONCE(ndev->mtu));
>> qp_attr->max_rd_atomic = qp->attrs.irq_size;
>> qp_attr->max_dest_rd_atomic = qp->attrs.orq_size;
>> @@ -534,6 +546,7 @@ int siw_query_qp(struct ib_qp *base_qp, struct
>> ib_qp_attr *qp_attr,
>> qp_init_attr->cap = qp_attr->cap;
>> + dev_put(ndev);
>> return 0;
>> }
>
--
Best Regards,
Yanjun.Zhu
^ permalink raw reply [flat|nested] 6+ messages in thread
* RE: [PATCH RESEND v2] RDMA/siw: Remove direct link to net_device
2024-12-15 17:37 ` Zhu Yanjun
@ 2024-12-16 12:31 ` Bernard Metzler
2024-12-19 10:23 ` Leon Romanovsky
0 siblings, 1 reply; 6+ messages in thread
From: Bernard Metzler @ 2024-12-16 12:31 UTC (permalink / raw)
To: Zhu Yanjun, linux-rdma@vger.kernel.org; +Cc: jgg@ziepe.ca, leon@kernel.org
> -----Original Message-----
> From: Zhu Yanjun <yanjun.zhu@linux.dev>
> Sent: Sunday, December 15, 2024 6:38 PM
> To: Bernard Metzler <BMT@zurich.ibm.com>; linux-rdma@vger.kernel.org
> Cc: jgg@ziepe.ca; leon@kernel.org; linux-kernel@vger.kernel.org;
> netdev@vger.kernel.org; syzkaller-bugs@googlegroups.com;
> zyjzyj2000@gmail.com; syzbot+4b87489410b4efd181bf@syzkaller.appspotmail.com
> Subject: [EXTERNAL] Re: [PATCH RESEND v2] RDMA/siw: Remove direct link to
> net_device
>
>
>
> 在 2024/12/14 13:37, Zhu Yanjun 写道:
> > 在 2024/12/12 16:18, Bernard Metzler 写道:
> >> Do not manage a per device direct link to net_device. Rely
> >> on associated ib_devices net_device management, not doubling
> >> the effort locally. A badly managed local link to net_device
> >> was causing a 'KASAN: slab-use-after-free' exception during
> >> siw_query_port() call.
> >>
> >> Fixes: bdcf26bf9b3a ("rdma/siw: network and RDMA core interface")
> >> Reported-by: syzbot+4b87489410b4efd181bf@syzkaller.appspotmail.com
> >> Link: https%
> 3A__syzkaller.appspot.com_bug-3Fextid-
> 3D4b87489410b4efd181bf&d=DwIDaQ&c=BSDicqBQBDjDI9RkVyTcHQ&r=4ynb4Sj_4MUcZXbh
> vovE4tYSbqxyOwdSiLedP4yO55g&m=Sr4DcK0Wb4iQYxeAGdwnJVj231gGpXbdjE0vjQXbpMgNG
> MrUQnjp4I9ZuzuThnlu&s=JMDawq7uiJd4vTvguvjXj0pC2okvGwSJ-mB05JlZJX4&e=
> >> Signed-off-by: Bernard Metzler <bmt@zurich.ibm.com>
> >> ---
> >> drivers/infiniband/sw/siw/siw.h | 7 +++---
> >> drivers/infiniband/sw/siw/siw_cm.c | 31 +++++++++++++++++++-----
> >> drivers/infiniband/sw/siw/siw_main.c | 15 +-----------
> >> drivers/infiniband/sw/siw/siw_verbs.c | 35 ++++++++++++++++++---------
> >> 4 files changed, 53 insertions(+), 35 deletions(-)
> >>
> >> diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/
> >> siw/siw.h
> >> index 86d4d6a2170e..ea5eee50dc39 100644
> >> --- a/drivers/infiniband/sw/siw/siw.h
> >> +++ b/drivers/infiniband/sw/siw/siw.h
> >> @@ -46,6 +46,9 @@
> >> */
> >> #define SIW_IRQ_MAXBURST_SQ_ACTIVE 4
> >> +/* There is always only a port 1 per siw device */
> >> +#define SIW_PORT 1
> >> +
> >> struct siw_dev_cap {
> >> int max_qp;
> >> int max_qp_wr;
> >> @@ -69,16 +72,12 @@ struct siw_pd {
> >> struct siw_device {
> >> struct ib_device base_dev;
> >> - struct net_device *netdev;
> >> struct siw_dev_cap attrs;
> >> u32 vendor_part_id;
> >> int numa_node;
> >> char raw_gid[ETH_ALEN];
> >> - /* physical port state (only one port per device) */
> >> - enum ib_port_state state;
> >> -
> >> spinlock_t lock;
> >> struct xarray qp_xa;
> >> diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/
> >> sw/siw/siw_cm.c
> >> index 86323918a570..b157bd01e70b 100644
> >> --- a/drivers/infiniband/sw/siw/siw_cm.c
> >> +++ b/drivers/infiniband/sw/siw/siw_cm.c
> >> @@ -1759,6 +1759,7 @@ int siw_create_listen(struct iw_cm_id *id, int
> >> backlog)
> >> {
> >> struct socket *s;
> >> struct siw_cep *cep = NULL;
> >> + struct net_device *ndev = NULL;
> >> struct siw_device *sdev = to_siw_dev(id->device);
> >> int addr_family = id->local_addr.ss_family;
> >> int rv = 0;
> >> @@ -1779,9 +1780,15 @@ int siw_create_listen(struct iw_cm_id *id, int
> >> backlog)
> >> struct sockaddr_in *laddr = &to_sockaddr_in(id->local_addr);
> >> /* For wildcard addr, limit binding to current device only */
> >> - if (ipv4_is_zeronet(laddr->sin_addr.s_addr))
> >> - s->sk->sk_bound_dev_if = sdev->netdev->ifindex;
> >> -
> >> + if (ipv4_is_zeronet(laddr->sin_addr.s_addr)) {
> >> + ndev = ib_device_get_netdev(id->device, SIW_PORT);
> >> + if (ndev) {
> >> + s->sk->sk_bound_dev_if = ndev->ifindex;
> >> + } else {
> >> + rv = -ENODEV;
> >> + goto error;
> >> + }
> >> + }
> >> rv = s->ops->bind(s, (struct sockaddr *)laddr,
> >> sizeof(struct sockaddr_in));
> >> } else {
> >> @@ -1797,9 +1804,15 @@ int siw_create_listen(struct iw_cm_id *id, int
> >> backlog)
> >> }
> >> /* For wildcard addr, limit binding to current device only */
> >> - if (ipv6_addr_any(&laddr->sin6_addr))
> >> - s->sk->sk_bound_dev_if = sdev->netdev->ifindex;
> >> -
> >> + if (ipv6_addr_any(&laddr->sin6_addr)) {
> >> + ndev = ib_device_get_netdev(id->device, SIW_PORT);
> >> + if (ndev) {
> >> + s->sk->sk_bound_dev_if = ndev->ifindex;
> >> + } else {
> >> + rv = -ENODEV;
> >> + goto error;
> >> + }
> >> + }
> >> rv = s->ops->bind(s, (struct sockaddr *)laddr,
> >> sizeof(struct sockaddr_in6));
> >> }
> >> @@ -1861,6 +1874,9 @@ int siw_create_listen(struct iw_cm_id *id, int
> >> backlog)
> >> list_add_tail(&cep->listenq, (struct list_head *)id-
> >> >provider_data);
> >> cep->state = SIW_EPSTATE_LISTENING;
> >> + if (ndev)
> >> + dev_put(ndev);
> >> +
> >
> > <...>
> >
> >> siw_dbg(id->device, "Listen at laddr %pISp\n", &id->local_addr);
> >> return 0;
> >> @@ -1880,6 +1896,9 @@ int siw_create_listen(struct iw_cm_id *id, int
> >> backlog)
> >> }
> >> sock_release(s);
> >> + if (ndev)
> >> + dev_put(ndev);
> >> +
> >
> > dev_put will invoke netdev_put. In netdev_put, dev is checked.
> > Thus, no need to check ndev before dev_put function?
>
Thanks for the review.
Sending this to RDMA only for now. Since kernel 5.10,
this NULL check has been introduced. siw is in Linux
since 5.4. Shall I care about back porting,
or just follow Zhu's recommendation with a corrected
resend?
Thanks,
Bernard.
> Reviewed-by: Zhu Yanjun <yanjun.zhu@linux.dev>
>
> Zhu Yanjun
>
> >
> > static inline void netdev_put(struct net_device *dev,
> > netdevice_tracker *tracker)
> > {
> > if (dev) {
> > netdev_tracker_free(dev, tracker);
> > __dev_put(dev);
> > }
> > }
> >
> >> return rv;
> >> }
> >> diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/
> >> infiniband/sw/siw/siw_main.c
> >> index 17abef48abcd..14d3103aee6f 100644
> >> --- a/drivers/infiniband/sw/siw/siw_main.c
> >> +++ b/drivers/infiniband/sw/siw/siw_main.c
> >> @@ -287,7 +287,6 @@ static struct siw_device *siw_device_create(struct
> >> net_device *netdev)
> >> return NULL;
> >> base_dev = &sdev->base_dev;
> >> - sdev->netdev = netdev;
> >> if (netdev->addr_len) {
> >> memcpy(sdev->raw_gid, netdev->dev_addr,
> >> @@ -381,12 +380,10 @@ static int siw_netdev_event(struct
> >> notifier_block *nb, unsigned long event,
> >> switch (event) {
> >> case NETDEV_UP:
> >> - sdev->state = IB_PORT_ACTIVE;
> >> siw_port_event(sdev, 1, IB_EVENT_PORT_ACTIVE);
> >> break;
> >> case NETDEV_DOWN:
> >> - sdev->state = IB_PORT_DOWN;
> >> siw_port_event(sdev, 1, IB_EVENT_PORT_ERR);
> >> break;
> >> @@ -407,12 +404,8 @@ static int siw_netdev_event(struct notifier_block
> >> *nb, unsigned long event,
> >> siw_port_event(sdev, 1, IB_EVENT_LID_CHANGE);
> >> break;
> >> /*
> >> - * Todo: Below netdev events are currently not handled.
> >> + * All other events are not handled
> >> */
> >> - case NETDEV_CHANGEMTU:
> >> - case NETDEV_CHANGE:
> >> - break;
> >> -
> >> default:
> >> break;
> >> }
> >> @@ -442,12 +435,6 @@ static int siw_newlink(const char *basedev_name,
> >> struct net_device *netdev)
> >> sdev = siw_device_create(netdev);
> >> if (sdev) {
> >> dev_dbg(&netdev->dev, "siw: new device\n");
> >> -
> >> - if (netif_running(netdev) && netif_carrier_ok(netdev))
> >> - sdev->state = IB_PORT_ACTIVE;
> >> - else
> >> - sdev->state = IB_PORT_DOWN;
> >> -
> >> ib_mark_name_assigned_by_user(&sdev->base_dev);
> >> rv = siw_device_register(sdev, basedev_name);
> >> if (rv)
> >> diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/
> >> infiniband/sw/siw/siw_verbs.c
> >> index 986666c19378..7ca0297d68a4 100644
> >> --- a/drivers/infiniband/sw/siw/siw_verbs.c
> >> +++ b/drivers/infiniband/sw/siw/siw_verbs.c
> >> @@ -171,21 +171,29 @@ int siw_query_device(struct ib_device *base_dev,
> >> struct ib_device_attr *attr,
> >> int siw_query_port(struct ib_device *base_dev, u32 port,
> >> struct ib_port_attr *attr)
> >> {
> >> - struct siw_device *sdev = to_siw_dev(base_dev);
> >> + struct net_device *ndev;
> >> int rv;
> >> memset(attr, 0, sizeof(*attr));
> >> rv = ib_get_eth_speed(base_dev, port, &attr->active_speed,
> >> &attr->active_width);
> >> + if (rv)
> >> + return rv;
> >> +
> >> + ndev = ib_device_get_netdev(base_dev, SIW_PORT);
> >> + if (!ndev)
> >> + return -ENODEV;
> >> +
> >> attr->gid_tbl_len = 1;
> >> attr->max_msg_sz = -1;
> >> - attr->max_mtu = ib_mtu_int_to_enum(sdev->netdev->mtu);
> >> - attr->active_mtu = ib_mtu_int_to_enum(sdev->netdev->mtu);
> >> - attr->phys_state = sdev->state == IB_PORT_ACTIVE ?
> >> + attr->max_mtu = ib_mtu_int_to_enum(ndev->max_mtu);
> >> + attr->active_mtu = ib_mtu_int_to_enum(READ_ONCE(ndev->mtu));
> >> + attr->phys_state = (netif_running(ndev) && netif_carrier_ok(ndev))
> ?
> >> IB_PORT_PHYS_STATE_LINK_UP : IB_PORT_PHYS_STATE_DISABLED;
> >> + attr->state = attr->phys_state == IB_PORT_PHYS_STATE_LINK_UP ?
> >> + IB_PORT_ACTIVE : IB_PORT_DOWN;
> >> attr->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_DEVICE_MGMT_SUP;
> >> - attr->state = sdev->state;
> >> /*
> >> * All zero
> >> *
> >> @@ -199,6 +207,7 @@ int siw_query_port(struct ib_device *base_dev, u32
> >> port,
> >> * attr->subnet_timeout = 0;
> >> * attr->init_type_repy = 0;
> >> */
> >> + dev_put(ndev);
> >> return rv;
> >> }
> >> @@ -505,21 +514,24 @@ int siw_query_qp(struct ib_qp *base_qp, struct
> >> ib_qp_attr *qp_attr,
> >> int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
> >> {
> >> struct siw_qp *qp;
> >> - struct siw_device *sdev;
> >> + struct net_device *ndev;
> >> - if (base_qp && qp_attr && qp_init_attr) {
> >> + if (base_qp && qp_attr && qp_init_attr)
> >> qp = to_siw_qp(base_qp);
> >> - sdev = to_siw_dev(base_qp->device);
> >> - } else {
> >> + else
> >> return -EINVAL;
> >> - }
> >> +
> >> + ndev = ib_device_get_netdev(base_qp->device, SIW_PORT);
> >> + if (!ndev)
> >> + return -ENODEV;
> >> +
> >> qp_attr->qp_state = siw_qp_state_to_ib_qp_state[qp->attrs.state];
> >> qp_attr->cap.max_inline_data = SIW_MAX_INLINE;
> >> qp_attr->cap.max_send_wr = qp->attrs.sq_size;
> >> qp_attr->cap.max_send_sge = qp->attrs.sq_max_sges;
> >> qp_attr->cap.max_recv_wr = qp->attrs.rq_size;
> >> qp_attr->cap.max_recv_sge = qp->attrs.rq_max_sges;
> >> - qp_attr->path_mtu = ib_mtu_int_to_enum(sdev->netdev->mtu);
> >> + qp_attr->path_mtu = ib_mtu_int_to_enum(READ_ONCE(ndev->mtu));
> >> qp_attr->max_rd_atomic = qp->attrs.irq_size;
> >> qp_attr->max_dest_rd_atomic = qp->attrs.orq_size;
> >> @@ -534,6 +546,7 @@ int siw_query_qp(struct ib_qp *base_qp, struct
> >> ib_qp_attr *qp_attr,
> >> qp_init_attr->cap = qp_attr->cap;
> >> + dev_put(ndev);
> >> return 0;
> >> }
> >
>
> --
> Best Regards,
> Yanjun.Zhu
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH RESEND v2] RDMA/siw: Remove direct link to net_device
2024-12-12 15:18 [PATCH RESEND v2] RDMA/siw: Remove direct link to net_device Bernard Metzler
2024-12-14 12:37 ` Zhu Yanjun
@ 2024-12-19 10:19 ` Leon Romanovsky
1 sibling, 0 replies; 6+ messages in thread
From: Leon Romanovsky @ 2024-12-19 10:19 UTC (permalink / raw)
To: linux-rdma, Bernard Metzler
Cc: jgg, linux-kernel, netdev, syzkaller-bugs, zyjzyj2000,
syzbot+4b87489410b4efd181bf
On Thu, 12 Dec 2024 16:18:48 +0100, Bernard Metzler wrote:
> Do not manage a per device direct link to net_device. Rely
> on associated ib_devices net_device management, not doubling
> the effort locally. A badly managed local link to net_device
> was causing a 'KASAN: slab-use-after-free' exception during
> siw_query_port() call.
>
>
> [...]
Applied, thanks!
[1/1] RDMA/siw: Remove direct link to net_device
https://git.kernel.org/rdma/rdma/c/16b87037b48889
Best regards,
--
Leon Romanovsky <leon@kernel.org>
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH RESEND v2] RDMA/siw: Remove direct link to net_device
2024-12-16 12:31 ` Bernard Metzler
@ 2024-12-19 10:23 ` Leon Romanovsky
0 siblings, 0 replies; 6+ messages in thread
From: Leon Romanovsky @ 2024-12-19 10:23 UTC (permalink / raw)
To: Bernard Metzler; +Cc: Zhu Yanjun, linux-rdma@vger.kernel.org, jgg@ziepe.ca
On Mon, Dec 16, 2024 at 12:31:18PM +0000, Bernard Metzler wrote:
>
>
> > -----Original Message-----
> > From: Zhu Yanjun <yanjun.zhu@linux.dev>
> > Sent: Sunday, December 15, 2024 6:38 PM
> > To: Bernard Metzler <BMT@zurich.ibm.com>; linux-rdma@vger.kernel.org
> > Cc: jgg@ziepe.ca; leon@kernel.org; linux-kernel@vger.kernel.org;
> > netdev@vger.kernel.org; syzkaller-bugs@googlegroups.com;
> > zyjzyj2000@gmail.com; syzbot+4b87489410b4efd181bf@syzkaller.appspotmail.com
> > Subject: [EXTERNAL] Re: [PATCH RESEND v2] RDMA/siw: Remove direct link to
> > net_device
> >
> >
> >
> > 在 2024/12/14 13:37, Zhu Yanjun 写道:
> > > 在 2024/12/12 16:18, Bernard Metzler 写道:
> > >> Do not manage a per device direct link to net_device. Rely
> > >> on associated ib_devices net_device management, not doubling
> > >> the effort locally. A badly managed local link to net_device
> > >> was causing a 'KASAN: slab-use-after-free' exception during
> > >> siw_query_port() call.
> > >>
> > >> Fixes: bdcf26bf9b3a ("rdma/siw: network and RDMA core interface")
> > >> Reported-by: syzbot+4b87489410b4efd181bf@syzkaller.appspotmail.com
> > >> Link: https%
> > 3A__syzkaller.appspot.com_bug-3Fextid-
> > 3D4b87489410b4efd181bf&d=DwIDaQ&c=BSDicqBQBDjDI9RkVyTcHQ&r=4ynb4Sj_4MUcZXbh
> > vovE4tYSbqxyOwdSiLedP4yO55g&m=Sr4DcK0Wb4iQYxeAGdwnJVj231gGpXbdjE0vjQXbpMgNG
> > MrUQnjp4I9ZuzuThnlu&s=JMDawq7uiJd4vTvguvjXj0pC2okvGwSJ-mB05JlZJX4&e=
> > >> Signed-off-by: Bernard Metzler <bmt@zurich.ibm.com>
> > >> ---
> > >> drivers/infiniband/sw/siw/siw.h | 7 +++---
> > >> drivers/infiniband/sw/siw/siw_cm.c | 31 +++++++++++++++++++-----
> > >> drivers/infiniband/sw/siw/siw_main.c | 15 +-----------
> > >> drivers/infiniband/sw/siw/siw_verbs.c | 35 ++++++++++++++++++---------
> > >> 4 files changed, 53 insertions(+), 35 deletions(-)
> > >>
> > >> diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/
> > >> siw/siw.h
> > >> index 86d4d6a2170e..ea5eee50dc39 100644
> > >> --- a/drivers/infiniband/sw/siw/siw.h
> > >> +++ b/drivers/infiniband/sw/siw/siw.h
> > >> @@ -46,6 +46,9 @@
> > >> */
> > >> #define SIW_IRQ_MAXBURST_SQ_ACTIVE 4
> > >> +/* There is always only a port 1 per siw device */
> > >> +#define SIW_PORT 1
> > >> +
> > >> struct siw_dev_cap {
> > >> int max_qp;
> > >> int max_qp_wr;
> > >> @@ -69,16 +72,12 @@ struct siw_pd {
> > >> struct siw_device {
> > >> struct ib_device base_dev;
> > >> - struct net_device *netdev;
> > >> struct siw_dev_cap attrs;
> > >> u32 vendor_part_id;
> > >> int numa_node;
> > >> char raw_gid[ETH_ALEN];
> > >> - /* physical port state (only one port per device) */
> > >> - enum ib_port_state state;
> > >> -
> > >> spinlock_t lock;
> > >> struct xarray qp_xa;
> > >> diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/
> > >> sw/siw/siw_cm.c
> > >> index 86323918a570..b157bd01e70b 100644
> > >> --- a/drivers/infiniband/sw/siw/siw_cm.c
> > >> +++ b/drivers/infiniband/sw/siw/siw_cm.c
> > >> @@ -1759,6 +1759,7 @@ int siw_create_listen(struct iw_cm_id *id, int
> > >> backlog)
> > >> {
> > >> struct socket *s;
> > >> struct siw_cep *cep = NULL;
> > >> + struct net_device *ndev = NULL;
> > >> struct siw_device *sdev = to_siw_dev(id->device);
> > >> int addr_family = id->local_addr.ss_family;
> > >> int rv = 0;
> > >> @@ -1779,9 +1780,15 @@ int siw_create_listen(struct iw_cm_id *id, int
> > >> backlog)
> > >> struct sockaddr_in *laddr = &to_sockaddr_in(id->local_addr);
> > >> /* For wildcard addr, limit binding to current device only */
> > >> - if (ipv4_is_zeronet(laddr->sin_addr.s_addr))
> > >> - s->sk->sk_bound_dev_if = sdev->netdev->ifindex;
> > >> -
> > >> + if (ipv4_is_zeronet(laddr->sin_addr.s_addr)) {
> > >> + ndev = ib_device_get_netdev(id->device, SIW_PORT);
> > >> + if (ndev) {
> > >> + s->sk->sk_bound_dev_if = ndev->ifindex;
> > >> + } else {
> > >> + rv = -ENODEV;
> > >> + goto error;
> > >> + }
> > >> + }
> > >> rv = s->ops->bind(s, (struct sockaddr *)laddr,
> > >> sizeof(struct sockaddr_in));
> > >> } else {
> > >> @@ -1797,9 +1804,15 @@ int siw_create_listen(struct iw_cm_id *id, int
> > >> backlog)
> > >> }
> > >> /* For wildcard addr, limit binding to current device only */
> > >> - if (ipv6_addr_any(&laddr->sin6_addr))
> > >> - s->sk->sk_bound_dev_if = sdev->netdev->ifindex;
> > >> -
> > >> + if (ipv6_addr_any(&laddr->sin6_addr)) {
> > >> + ndev = ib_device_get_netdev(id->device, SIW_PORT);
> > >> + if (ndev) {
> > >> + s->sk->sk_bound_dev_if = ndev->ifindex;
> > >> + } else {
> > >> + rv = -ENODEV;
> > >> + goto error;
> > >> + }
> > >> + }
> > >> rv = s->ops->bind(s, (struct sockaddr *)laddr,
> > >> sizeof(struct sockaddr_in6));
> > >> }
> > >> @@ -1861,6 +1874,9 @@ int siw_create_listen(struct iw_cm_id *id, int
> > >> backlog)
> > >> list_add_tail(&cep->listenq, (struct list_head *)id-
> > >> >provider_data);
> > >> cep->state = SIW_EPSTATE_LISTENING;
> > >> + if (ndev)
> > >> + dev_put(ndev);
> > >> +
> > >
> > > <...>
> > >
> > >> siw_dbg(id->device, "Listen at laddr %pISp\n", &id->local_addr);
> > >> return 0;
> > >> @@ -1880,6 +1896,9 @@ int siw_create_listen(struct iw_cm_id *id, int
> > >> backlog)
> > >> }
> > >> sock_release(s);
> > >> + if (ndev)
> > >> + dev_put(ndev);
> > >> +
> > >
> > > dev_put will invoke netdev_put. In netdev_put, dev is checked.
> > > Thus, no need to check ndev before dev_put function?
> >
>
> Thanks for the review.
>
> Sending this to RDMA only for now. Since kernel 5.10,
> this NULL check has been introduced. siw is in Linux
> since 5.4. Shall I care about back porting,
> or just follow Zhu's recommendation with a corrected
> resend?
We prefer do not make backporter's life harder without reasons,
so I don't like patches which add churn just for the sake of churn.
However new added and/or fixed code should follow up-to-the date
kernel coding standards and practices.
I fixed it locally, no need to resend.
Thanks
>
> Thanks,
> Bernard.
>
>
> > Reviewed-by: Zhu Yanjun <yanjun.zhu@linux.dev>
> >
> > Zhu Yanjun
> >
> > >
> > > static inline void netdev_put(struct net_device *dev,
> > > netdevice_tracker *tracker)
> > > {
> > > if (dev) {
> > > netdev_tracker_free(dev, tracker);
> > > __dev_put(dev);
> > > }
> > > }
> > >
> > >> return rv;
> > >> }
> > >> diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/
> > >> infiniband/sw/siw/siw_main.c
> > >> index 17abef48abcd..14d3103aee6f 100644
> > >> --- a/drivers/infiniband/sw/siw/siw_main.c
> > >> +++ b/drivers/infiniband/sw/siw/siw_main.c
> > >> @@ -287,7 +287,6 @@ static struct siw_device *siw_device_create(struct
> > >> net_device *netdev)
> > >> return NULL;
> > >> base_dev = &sdev->base_dev;
> > >> - sdev->netdev = netdev;
> > >> if (netdev->addr_len) {
> > >> memcpy(sdev->raw_gid, netdev->dev_addr,
> > >> @@ -381,12 +380,10 @@ static int siw_netdev_event(struct
> > >> notifier_block *nb, unsigned long event,
> > >> switch (event) {
> > >> case NETDEV_UP:
> > >> - sdev->state = IB_PORT_ACTIVE;
> > >> siw_port_event(sdev, 1, IB_EVENT_PORT_ACTIVE);
> > >> break;
> > >> case NETDEV_DOWN:
> > >> - sdev->state = IB_PORT_DOWN;
> > >> siw_port_event(sdev, 1, IB_EVENT_PORT_ERR);
> > >> break;
> > >> @@ -407,12 +404,8 @@ static int siw_netdev_event(struct notifier_block
> > >> *nb, unsigned long event,
> > >> siw_port_event(sdev, 1, IB_EVENT_LID_CHANGE);
> > >> break;
> > >> /*
> > >> - * Todo: Below netdev events are currently not handled.
> > >> + * All other events are not handled
> > >> */
> > >> - case NETDEV_CHANGEMTU:
> > >> - case NETDEV_CHANGE:
> > >> - break;
> > >> -
> > >> default:
> > >> break;
> > >> }
> > >> @@ -442,12 +435,6 @@ static int siw_newlink(const char *basedev_name,
> > >> struct net_device *netdev)
> > >> sdev = siw_device_create(netdev);
> > >> if (sdev) {
> > >> dev_dbg(&netdev->dev, "siw: new device\n");
> > >> -
> > >> - if (netif_running(netdev) && netif_carrier_ok(netdev))
> > >> - sdev->state = IB_PORT_ACTIVE;
> > >> - else
> > >> - sdev->state = IB_PORT_DOWN;
> > >> -
> > >> ib_mark_name_assigned_by_user(&sdev->base_dev);
> > >> rv = siw_device_register(sdev, basedev_name);
> > >> if (rv)
> > >> diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/
> > >> infiniband/sw/siw/siw_verbs.c
> > >> index 986666c19378..7ca0297d68a4 100644
> > >> --- a/drivers/infiniband/sw/siw/siw_verbs.c
> > >> +++ b/drivers/infiniband/sw/siw/siw_verbs.c
> > >> @@ -171,21 +171,29 @@ int siw_query_device(struct ib_device *base_dev,
> > >> struct ib_device_attr *attr,
> > >> int siw_query_port(struct ib_device *base_dev, u32 port,
> > >> struct ib_port_attr *attr)
> > >> {
> > >> - struct siw_device *sdev = to_siw_dev(base_dev);
> > >> + struct net_device *ndev;
> > >> int rv;
> > >> memset(attr, 0, sizeof(*attr));
> > >> rv = ib_get_eth_speed(base_dev, port, &attr->active_speed,
> > >> &attr->active_width);
> > >> + if (rv)
> > >> + return rv;
> > >> +
> > >> + ndev = ib_device_get_netdev(base_dev, SIW_PORT);
> > >> + if (!ndev)
> > >> + return -ENODEV;
> > >> +
> > >> attr->gid_tbl_len = 1;
> > >> attr->max_msg_sz = -1;
> > >> - attr->max_mtu = ib_mtu_int_to_enum(sdev->netdev->mtu);
> > >> - attr->active_mtu = ib_mtu_int_to_enum(sdev->netdev->mtu);
> > >> - attr->phys_state = sdev->state == IB_PORT_ACTIVE ?
> > >> + attr->max_mtu = ib_mtu_int_to_enum(ndev->max_mtu);
> > >> + attr->active_mtu = ib_mtu_int_to_enum(READ_ONCE(ndev->mtu));
> > >> + attr->phys_state = (netif_running(ndev) && netif_carrier_ok(ndev))
> > ?
> > >> IB_PORT_PHYS_STATE_LINK_UP : IB_PORT_PHYS_STATE_DISABLED;
> > >> + attr->state = attr->phys_state == IB_PORT_PHYS_STATE_LINK_UP ?
> > >> + IB_PORT_ACTIVE : IB_PORT_DOWN;
> > >> attr->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_DEVICE_MGMT_SUP;
> > >> - attr->state = sdev->state;
> > >> /*
> > >> * All zero
> > >> *
> > >> @@ -199,6 +207,7 @@ int siw_query_port(struct ib_device *base_dev, u32
> > >> port,
> > >> * attr->subnet_timeout = 0;
> > >> * attr->init_type_repy = 0;
> > >> */
> > >> + dev_put(ndev);
> > >> return rv;
> > >> }
> > >> @@ -505,21 +514,24 @@ int siw_query_qp(struct ib_qp *base_qp, struct
> > >> ib_qp_attr *qp_attr,
> > >> int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
> > >> {
> > >> struct siw_qp *qp;
> > >> - struct siw_device *sdev;
> > >> + struct net_device *ndev;
> > >> - if (base_qp && qp_attr && qp_init_attr) {
> > >> + if (base_qp && qp_attr && qp_init_attr)
> > >> qp = to_siw_qp(base_qp);
> > >> - sdev = to_siw_dev(base_qp->device);
> > >> - } else {
> > >> + else
> > >> return -EINVAL;
> > >> - }
> > >> +
> > >> + ndev = ib_device_get_netdev(base_qp->device, SIW_PORT);
> > >> + if (!ndev)
> > >> + return -ENODEV;
> > >> +
> > >> qp_attr->qp_state = siw_qp_state_to_ib_qp_state[qp->attrs.state];
> > >> qp_attr->cap.max_inline_data = SIW_MAX_INLINE;
> > >> qp_attr->cap.max_send_wr = qp->attrs.sq_size;
> > >> qp_attr->cap.max_send_sge = qp->attrs.sq_max_sges;
> > >> qp_attr->cap.max_recv_wr = qp->attrs.rq_size;
> > >> qp_attr->cap.max_recv_sge = qp->attrs.rq_max_sges;
> > >> - qp_attr->path_mtu = ib_mtu_int_to_enum(sdev->netdev->mtu);
> > >> + qp_attr->path_mtu = ib_mtu_int_to_enum(READ_ONCE(ndev->mtu));
> > >> qp_attr->max_rd_atomic = qp->attrs.irq_size;
> > >> qp_attr->max_dest_rd_atomic = qp->attrs.orq_size;
> > >> @@ -534,6 +546,7 @@ int siw_query_qp(struct ib_qp *base_qp, struct
> > >> ib_qp_attr *qp_attr,
> > >> qp_init_attr->cap = qp_attr->cap;
> > >> + dev_put(ndev);
> > >> return 0;
> > >> }
> > >
> >
> > --
> > Best Regards,
> > Yanjun.Zhu
>
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2024-12-19 10:23 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-12-12 15:18 [PATCH RESEND v2] RDMA/siw: Remove direct link to net_device Bernard Metzler
2024-12-14 12:37 ` Zhu Yanjun
2024-12-15 17:37 ` Zhu Yanjun
2024-12-16 12:31 ` Bernard Metzler
2024-12-19 10:23 ` Leon Romanovsky
2024-12-19 10:19 ` Leon Romanovsky
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).