public inbox for netdev@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH net] openvswitch: fix kernel panic from oversized vport upcall PID arrays
@ 2026-04-11  5:59 Weiming Shi
  2026-04-11 12:23 ` David Laight
  0 siblings, 1 reply; 2+ messages in thread
From: Weiming Shi @ 2026-04-11  5:59 UTC (permalink / raw)
  To: Aaron Conole, Eelco Chaudron, Ilya Maximets, David S . Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni
  Cc: Simon Horman, Pravin B Shelar, Flavio Leitner, Mark Gray, netdev,
	dev, linux-kernel, Xiang Mei, Weiming Shi

The vport netlink reply helpers allocate a fixed-size skb with
nlmsg_new(NLMSG_DEFAULT_SIZE, ...) but serialize the full upcall PID
array via ovs_vport_get_upcall_portids(). Since
ovs_vport_set_upcall_portids() accepts any non-zero multiple of
sizeof(u32) with no upper bound, a CAP_NET_ADMIN user can install a
PID array large enough to overflow the reply buffer. When the
subsequent nla_put() fails with -EMSGSIZE, five BUG_ON(err < 0) sites
fire and panic the kernel. On systems with unprivileged user namespaces
enabled (e.g., Ubuntu default), this is reachable via unshare -Urn.

 kernel BUG at net/openvswitch/datapath.c:2414!
 Oops: invalid opcode: 0000 [#1] SMP KASAN NOPTI
 CPU: 1 UID: 0 PID: 65 Comm: poc Not tainted 7.0.0-rc7-00195-geb216e422044 #1
 RIP: 0010:ovs_vport_cmd_set (net/openvswitch/datapath.c:2414 (discriminator 1))
 Call Trace:
  <TASK>
  genl_family_rcv_msg_doit (net/netlink/genetlink.c:1116)
  genl_rcv_msg (net/netlink/genetlink.c:1194 net/netlink/genetlink.c:1209)
  netlink_rcv_skb (net/netlink/af_netlink.c:2550)
  genl_rcv (net/netlink/genetlink.c:1219)
  netlink_unicast (net/netlink/af_netlink.c:1319 net/netlink/af_netlink.c:1344)
  netlink_sendmsg (net/netlink/af_netlink.c:1894)
  __sys_sendto (net/socket.c:2206 (discriminator 1))
  __x64_sys_sendto (net/socket.c:2209)
  do_syscall_64 (arch/x86/entry/syscall_64.c:63 (discriminator 1))
  entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130)
  </TASK>
 Kernel panic - not syncing: Fatal exception

Dynamically compute the reply skb size based on the vport's actual PID
array length instead of using a fixed NLMSG_DEFAULT_SIZE, and replace
the BUG_ON() calls with WARN_ON_ONCE() plus graceful error returns.

Fixes: b83d23a2a38b ("openvswitch: Introduce per-cpu upcall dispatch")
Reported-by: Xiang Mei <xmei5@asu.edu>
Signed-off-by: Weiming Shi <bestswngs@gmail.com>
---
 net/openvswitch/datapath.c | 95 ++++++++++++++++++++++++++------------
 1 file changed, 66 insertions(+), 29 deletions(-)

diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index e209099218b4..3649a1f2a3f5 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -2184,9 +2184,17 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
 	return err;
 }
 
-static struct sk_buff *ovs_vport_cmd_alloc_info(void)
+/* Must be called with ovs_mutex or rcu_read_lock. */
+static size_t ovs_vport_cmd_msg_size(const struct vport *vport)
 {
-	return nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	const struct vport_portids *ids;
+	size_t msgsize = NLMSG_DEFAULT_SIZE;
+
+	ids = rcu_dereference_ovsl(vport->upcall_portids);
+	if (ids && (vport->dp->user_features & OVS_DP_F_VPORT_PIDS))
+		msgsize += ids->n_ids * sizeof(u32);
+
+	return msgsize;
 }
 
 /* Called with ovs_mutex, only via ovs_dp_notify_wq(). */
@@ -2196,13 +2204,16 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, struct net *net,
 	struct sk_buff *skb;
 	int retval;
 
-	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	skb = nlmsg_new(ovs_vport_cmd_msg_size(vport), GFP_KERNEL);
 	if (!skb)
 		return ERR_PTR(-ENOMEM);
 
 	retval = ovs_vport_cmd_fill_info(vport, skb, net, portid, seq, 0, cmd,
 					 GFP_KERNEL);
-	BUG_ON(retval < 0);
+	if (WARN_ON_ONCE(retval < 0)) {
+		kfree_skb(skb);
+		return ERR_PTR(retval);
+	}
 
 	return skb;
 }
@@ -2303,7 +2314,8 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
 	if (port_no >= DP_MAX_PORTS)
 		return -EFBIG;
 
-	reply = ovs_vport_cmd_alloc_info();
+	reply = genlmsg_new(NLMSG_DEFAULT_SIZE +
+			    nla_len(a[OVS_VPORT_ATTR_UPCALL_PID]), GFP_KERNEL);
 	if (!reply)
 		return -ENOMEM;
 
@@ -2358,7 +2370,9 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
 	else
 		netdev_set_rx_headroom(vport->dev, dp->max_headroom);
 
-	BUG_ON(err < 0);
+	if (WARN_ON_ONCE(err < 0))
+		goto exit_unlock_free;
+
 	ovs_unlock();
 
 	ovs_notify(&dp_vport_genl_family, reply, info);
@@ -2377,49 +2391,52 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
 	struct vport *vport;
 	int err;
 
-	reply = ovs_vport_cmd_alloc_info();
-	if (!reply)
-		return -ENOMEM;
-
 	ovs_lock();
 	vport = lookup_vport(sock_net(skb->sk), genl_info_userhdr(info), a);
 	err = PTR_ERR(vport);
 	if (IS_ERR(vport))
-		goto exit_unlock_free;
+		goto exit_unlock;
 
 	if (a[OVS_VPORT_ATTR_TYPE] &&
 	    nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) {
 		err = -EINVAL;
-		goto exit_unlock_free;
+		goto exit_unlock;
 	}
 
 	if (a[OVS_VPORT_ATTR_OPTIONS]) {
 		err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]);
 		if (err)
-			goto exit_unlock_free;
+			goto exit_unlock;
 	}
 
-
 	if (a[OVS_VPORT_ATTR_UPCALL_PID]) {
 		struct nlattr *ids = a[OVS_VPORT_ATTR_UPCALL_PID];
 
 		err = ovs_vport_set_upcall_portids(vport, ids);
 		if (err)
-			goto exit_unlock_free;
+			goto exit_unlock;
+	}
+
+	reply = genlmsg_new(ovs_vport_cmd_msg_size(vport), GFP_KERNEL);
+	if (!reply) {
+		err = -ENOMEM;
+		goto exit_unlock;
 	}
 
 	err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
 				      info->snd_portid, info->snd_seq, 0,
 				      OVS_VPORT_CMD_SET, GFP_KERNEL);
-	BUG_ON(err < 0);
+	if (WARN_ON_ONCE(err < 0)) {
+		kfree_skb(reply);
+		goto exit_unlock;
+	}
 
 	ovs_unlock();
 	ovs_notify(&dp_vport_genl_family, reply, info);
 	return 0;
 
-exit_unlock_free:
+exit_unlock:
 	ovs_unlock();
-	kfree_skb(reply);
 	return err;
 }
 
@@ -2433,25 +2450,30 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
 	unsigned int new_headroom;
 	int err;
 
-	reply = ovs_vport_cmd_alloc_info();
-	if (!reply)
-		return -ENOMEM;
-
 	ovs_lock();
 	vport = lookup_vport(sock_net(skb->sk), genl_info_userhdr(info), a);
 	err = PTR_ERR(vport);
 	if (IS_ERR(vport))
-		goto exit_unlock_free;
+		goto exit_unlock;
 
 	if (vport->port_no == OVSP_LOCAL) {
 		err = -EINVAL;
-		goto exit_unlock_free;
+		goto exit_unlock;
+	}
+
+	reply = genlmsg_new(ovs_vport_cmd_msg_size(vport), GFP_KERNEL);
+	if (!reply) {
+		err = -ENOMEM;
+		goto exit_unlock;
 	}
 
 	err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
 				      info->snd_portid, info->snd_seq, 0,
 				      OVS_VPORT_CMD_DEL, GFP_KERNEL);
-	BUG_ON(err < 0);
+	if (WARN_ON_ONCE(err < 0)) {
+		kfree_skb(reply);
+		goto exit_unlock;
+	}
 
 	/* the vport deletion may trigger dp headroom update */
 	dp = vport->dp;
@@ -2472,9 +2494,8 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
 	ovs_notify(&dp_vport_genl_family, reply, info);
 	return 0;
 
-exit_unlock_free:
+exit_unlock:
 	ovs_unlock();
-	kfree_skb(reply);
 	return err;
 }
 
@@ -2484,9 +2505,20 @@ static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
 	struct ovs_header *ovs_header = genl_info_userhdr(info);
 	struct sk_buff *reply;
 	struct vport *vport;
+	size_t msg_size;
 	int err;
 
-	reply = ovs_vport_cmd_alloc_info();
+	rcu_read_lock();
+	vport = lookup_vport(sock_net(skb->sk), ovs_header, a);
+	err = PTR_ERR(vport);
+	if (IS_ERR(vport)) {
+		rcu_read_unlock();
+		return err;
+	}
+	msg_size = ovs_vport_cmd_msg_size(vport);
+	rcu_read_unlock();
+
+	reply = genlmsg_new(msg_size, GFP_KERNEL);
 	if (!reply)
 		return -ENOMEM;
 
@@ -2495,12 +2527,17 @@ static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
 	err = PTR_ERR(vport);
 	if (IS_ERR(vport))
 		goto exit_unlock_free;
+
 	err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
 				      info->snd_portid, info->snd_seq, 0,
 				      OVS_VPORT_CMD_GET, GFP_ATOMIC);
-	BUG_ON(err < 0);
 	rcu_read_unlock();
 
+	if (err < 0) {
+		kfree_skb(reply);
+		return err;
+	}
+
 	return genlmsg_reply(reply, info);
 
 exit_unlock_free:
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [PATCH net] openvswitch: fix kernel panic from oversized vport upcall PID arrays
  2026-04-11  5:59 [PATCH net] openvswitch: fix kernel panic from oversized vport upcall PID arrays Weiming Shi
@ 2026-04-11 12:23 ` David Laight
  0 siblings, 0 replies; 2+ messages in thread
From: David Laight @ 2026-04-11 12:23 UTC (permalink / raw)
  To: Weiming Shi
  Cc: Aaron Conole, Eelco Chaudron, Ilya Maximets, David S . Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Simon Horman,
	Pravin B Shelar, Flavio Leitner, Mark Gray, netdev, dev,
	linux-kernel, Xiang Mei

On Fri, 10 Apr 2026 22:59:16 -0700
Weiming Shi <bestswngs@gmail.com> wrote:

> The vport netlink reply helpers allocate a fixed-size skb with
> nlmsg_new(NLMSG_DEFAULT_SIZE, ...) but serialize the full upcall PID
> array via ovs_vport_get_upcall_portids(). Since
> ovs_vport_set_upcall_portids() accepts any non-zero multiple of
> sizeof(u32) with no upper bound, a CAP_NET_ADMIN user can install a
> PID array large enough to overflow the reply buffer. When the
> subsequent nla_put() fails with -EMSGSIZE, five BUG_ON(err < 0) sites
> fire and panic the kernel. On systems with unprivileged user namespaces
> enabled (e.g., Ubuntu default), this is reachable via unshare -Urn.
> 
>  kernel BUG at net/openvswitch/datapath.c:2414!
>  Oops: invalid opcode: 0000 [#1] SMP KASAN NOPTI
>  CPU: 1 UID: 0 PID: 65 Comm: poc Not tainted 7.0.0-rc7-00195-geb216e422044 #1
>  RIP: 0010:ovs_vport_cmd_set (net/openvswitch/datapath.c:2414 (discriminator 1))
>  Call Trace:
>   <TASK>
>   genl_family_rcv_msg_doit (net/netlink/genetlink.c:1116)
>   genl_rcv_msg (net/netlink/genetlink.c:1194 net/netlink/genetlink.c:1209)
>   netlink_rcv_skb (net/netlink/af_netlink.c:2550)
>   genl_rcv (net/netlink/genetlink.c:1219)
>   netlink_unicast (net/netlink/af_netlink.c:1319 net/netlink/af_netlink.c:1344)
>   netlink_sendmsg (net/netlink/af_netlink.c:1894)
>   __sys_sendto (net/socket.c:2206 (discriminator 1))
>   __x64_sys_sendto (net/socket.c:2209)
>   do_syscall_64 (arch/x86/entry/syscall_64.c:63 (discriminator 1))
>   entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130)
>   </TASK>
>  Kernel panic - not syncing: Fatal exception
> 
> Dynamically compute the reply skb size based on the vport's actual PID
> array length instead of using a fixed NLMSG_DEFAULT_SIZE, and replace
> the BUG_ON() calls with WARN_ON_ONCE() plus graceful error returns.

IIRC WARN_ON_ONCE() will still panic all the systems with panic-on_warn set.

	David

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2026-04-11 12:23 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-04-11  5:59 [PATCH net] openvswitch: fix kernel panic from oversized vport upcall PID arrays Weiming Shi
2026-04-11 12:23 ` David Laight

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox