[PATCH v2 net] openvswitch: fix vport netlink reply size for large upcall PID arrays

public inbox for netdev@vger.kernel.org
 help / color / mirror / Atom feed

From: Weiming Shi <bestswngs@gmail.com>
To: Aaron Conole <aconole@redhat.com>,
	Eelco Chaudron <echaudro@redhat.com>,
	Ilya Maximets <i.maximets@ovn.org>
Cc: "David S . Miller" <davem@davemloft.net>,
	Eric Dumazet <edumazet@google.com>,
	Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>,
	Simon Horman <horms@kernel.org>,
	Pravin B Shelar <pshelar@ovn.org>,
	Flavio Leitner <fbl@sysclose.org>,
	Mark Gray <mark.d.gray@redhat.com>,
	netdev@vger.kernel.org, Xiang Mei <xmei5@asu.edu>,
	Weiming Shi <bestswngs@gmail.com>
Subject: [PATCH v2 net] openvswitch: fix vport netlink reply size for large upcall PID arrays
Date: Sat, 11 Apr 2026 07:14:50 -0700	[thread overview]
Message-ID: <20260411141448.1479933-3-bestswngs@gmail.com> (raw)
In-Reply-To: <v1-message-id>

The vport netlink reply helpers allocate a fixed-size skb with
nlmsg_new(NLMSG_DEFAULT_SIZE, ...) but serialize the full upcall PID
array via ovs_vport_get_upcall_portids(). Since
ovs_vport_set_upcall_portids() accepts any non-zero multiple of
sizeof(u32) with no upper bound, a CAP_NET_ADMIN user can install a PID
array large enough to overflow the reply buffer. On systems with
unprivileged user namespaces enabled (e.g., Ubuntu default), this is
reachable via unshare -Urn since all OVS vport genl operations use
GENL_UNS_ADMIN_PERM.

When the subsequent nla_put() fails with -EMSGSIZE, five BUG_ON(err < 0)
sites fire and panic the kernel:

 kernel BUG at net/openvswitch/datapath.c:2414!
 Oops: invalid opcode: 0000 [#1] SMP KASAN NOPTI
 CPU: 1 UID: 0 PID: 65 Comm: poc Not tainted 7.0.0-rc7-00195-geb216e422044 #1
 RIP: 0010:ovs_vport_cmd_set+0x34c/0x400
 Call Trace:
  <TASK>
  genl_family_rcv_msg_doit (net/netlink/genetlink.c:1116)
  genl_rcv_msg (net/netlink/genetlink.c:1194)
  netlink_rcv_skb (net/netlink/af_netlink.c:2550)
  genl_rcv (net/netlink/genetlink.c:1219)
  netlink_unicast (net/netlink/af_netlink.c:1344)
  netlink_sendmsg (net/netlink/af_netlink.c:1894)
  __sys_sendto (net/socket.c:2206)
  __x64_sys_sendto (net/socket.c:2209)
  do_syscall_64 (arch/x86/entry/syscall_64.c:63)
  entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130)
  </TASK>
 Kernel panic - not syncing: Fatal exception

Fix this by dynamically sizing the reply skb to account for the actual
PID array length, and replace the BUG_ON() calls with graceful error
returns.

Fixes: b83d23a2a38b ("openvswitch: Introduce per-cpu upcall dispatch")
Reported-by: Xiang Mei <xmei5@asu.edu>
Signed-off-by: Weiming Shi <bestswngs@gmail.com>
---
Changes in v2:
 - Dynamically size reply skb instead of using fixed NLMSG_DEFAULT_SIZE.
 - Drop WARN_ON_ONCE (still panics with panic_on_warn); use plain error
   returns instead.

 net/openvswitch/datapath.c | 102 +++++++++++++++++++++++++------------
 net/openvswitch/vport.c    |  12 +++++
 net/openvswitch/vport.h    |   1 +
 3 files changed, 82 insertions(+), 33 deletions(-)

diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index e209099218b4..72e27a38d3a7 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -2184,9 +2184,11 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
 	return err;
 }
 
-static struct sk_buff *ovs_vport_cmd_alloc_info(void)
+static struct sk_buff *ovs_vport_cmd_alloc_info(struct vport *vport)
 {
-	return nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	return nlmsg_new(NLMSG_DEFAULT_SIZE +
+			 ovs_vport_get_upcall_portids_size(vport),
+			 GFP_KERNEL);
 }
 
 /* Called with ovs_mutex, only via ovs_dp_notify_wq(). */
@@ -2196,13 +2198,16 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, struct net *net,
 	struct sk_buff *skb;
 	int retval;
 
-	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	skb = ovs_vport_cmd_alloc_info(vport);
 	if (!skb)
 		return ERR_PTR(-ENOMEM);
 
 	retval = ovs_vport_cmd_fill_info(vport, skb, net, portid, seq, 0, cmd,
 					 GFP_KERNEL);
-	BUG_ON(retval < 0);
+	if (retval < 0) {
+		kfree_skb(skb);
+		return ERR_PTR(retval);
+	}
 
 	return skb;
 }
@@ -2303,10 +2308,6 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
 	if (port_no >= DP_MAX_PORTS)
 		return -EFBIG;
 
-	reply = ovs_vport_cmd_alloc_info();
-	if (!reply)
-		return -ENOMEM;
-
 	ovs_lock();
 restart:
 	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
@@ -2347,6 +2348,13 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
 		goto exit_unlock_free;
 	}
 
+	reply = ovs_vport_cmd_alloc_info(vport);
+	if (!reply) {
+		ovs_dp_detach_port(vport);
+		err = -ENOMEM;
+		goto exit_unlock_free;
+	}
+
 	err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
 				      info->snd_portid, info->snd_seq, 0,
 				      OVS_VPORT_CMD_NEW, GFP_KERNEL);
@@ -2358,7 +2366,11 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
 	else
 		netdev_set_rx_headroom(vport->dev, dp->max_headroom);
 
-	BUG_ON(err < 0);
+	if (err < 0) {
+		ovs_unlock();
+		kfree_skb(reply);
+		return err;
+	}
 	ovs_unlock();
 
 	ovs_notify(&dp_vport_genl_family, reply, info);
@@ -2366,7 +2378,6 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
 
 exit_unlock_free:
 	ovs_unlock();
-	kfree_skb(reply);
 	return err;
 }
 
@@ -2377,10 +2388,6 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
 	struct vport *vport;
 	int err;
 
-	reply = ovs_vport_cmd_alloc_info();
-	if (!reply)
-		return -ENOMEM;
-
 	ovs_lock();
 	vport = lookup_vport(sock_net(skb->sk), genl_info_userhdr(info), a);
 	err = PTR_ERR(vport);
@@ -2399,7 +2406,6 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
 			goto exit_unlock_free;
 	}
 
-
 	if (a[OVS_VPORT_ATTR_UPCALL_PID]) {
 		struct nlattr *ids = a[OVS_VPORT_ATTR_UPCALL_PID];
 
@@ -2408,10 +2414,20 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
 			goto exit_unlock_free;
 	}
 
+	reply = ovs_vport_cmd_alloc_info(vport);
+	if (!reply) {
+		err = -ENOMEM;
+		goto exit_unlock_free;
+	}
+
 	err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
 				      info->snd_portid, info->snd_seq, 0,
 				      OVS_VPORT_CMD_SET, GFP_KERNEL);
-	BUG_ON(err < 0);
+	if (err < 0) {
+		ovs_unlock();
+		kfree_skb(reply);
+		return err;
+	}
 
 	ovs_unlock();
 	ovs_notify(&dp_vport_genl_family, reply, info);
@@ -2419,7 +2435,6 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
 
 exit_unlock_free:
 	ovs_unlock();
-	kfree_skb(reply);
 	return err;
 }
 
@@ -2433,10 +2448,6 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
 	unsigned int new_headroom;
 	int err;
 
-	reply = ovs_vport_cmd_alloc_info();
-	if (!reply)
-		return -ENOMEM;
-
 	ovs_lock();
 	vport = lookup_vport(sock_net(skb->sk), genl_info_userhdr(info), a);
 	err = PTR_ERR(vport);
@@ -2448,10 +2459,20 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
 		goto exit_unlock_free;
 	}
 
+	reply = ovs_vport_cmd_alloc_info(vport);
+	if (!reply) {
+		err = -ENOMEM;
+		goto exit_unlock_free;
+	}
+
 	err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
 				      info->snd_portid, info->snd_seq, 0,
 				      OVS_VPORT_CMD_DEL, GFP_KERNEL);
-	BUG_ON(err < 0);
+	if (err < 0) {
+		ovs_unlock();
+		kfree_skb(reply);
+		return err;
+	}
 
 	/* the vport deletion may trigger dp headroom update */
 	dp = vport->dp;
@@ -2474,7 +2495,6 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
 
 exit_unlock_free:
 	ovs_unlock();
-	kfree_skb(reply);
 	return err;
 }
 
@@ -2484,29 +2504,45 @@ static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
 	struct ovs_header *ovs_header = genl_info_userhdr(info);
 	struct sk_buff *reply;
 	struct vport *vport;
+	size_t portids_size;
 	int err;
 
-	reply = ovs_vport_cmd_alloc_info();
+	/* Get portids size under RCU, then allocate outside RCU
+	 * since nlmsg_new(GFP_KERNEL) may sleep.
+	 */
+	rcu_read_lock();
+	vport = lookup_vport(sock_net(skb->sk), ovs_header, a);
+	if (IS_ERR(vport)) {
+		err = PTR_ERR(vport);
+		rcu_read_unlock();
+		return err;
+	}
+	portids_size = ovs_vport_get_upcall_portids_size(vport);
+	rcu_read_unlock();
+
+	reply = nlmsg_new(NLMSG_DEFAULT_SIZE + portids_size, GFP_KERNEL);
 	if (!reply)
 		return -ENOMEM;
 
 	rcu_read_lock();
 	vport = lookup_vport(sock_net(skb->sk), ovs_header, a);
-	err = PTR_ERR(vport);
-	if (IS_ERR(vport))
-		goto exit_unlock_free;
+	if (IS_ERR(vport)) {
+		err = PTR_ERR(vport);
+		rcu_read_unlock();
+		kfree_skb(reply);
+		return err;
+	}
 	err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
 				      info->snd_portid, info->snd_seq, 0,
 				      OVS_VPORT_CMD_GET, GFP_ATOMIC);
-	BUG_ON(err < 0);
 	rcu_read_unlock();
 
-	return genlmsg_reply(reply, info);
+	if (err < 0) {
+		kfree_skb(reply);
+		return err;
+	}
 
-exit_unlock_free:
-	rcu_read_unlock();
-	kfree_skb(reply);
-	return err;
+	return genlmsg_reply(reply, info);
 }
 
 static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 23f629e94a36..57a6df7d6829 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -424,6 +424,18 @@ int ovs_vport_set_upcall_portids(struct vport *vport, const struct nlattr *ids)
 	return 0;
 }
 
+int ovs_vport_get_upcall_portids_size(const struct vport *vport)
+{
+	struct vport_portids *ids;
+
+	ids = rcu_dereference_ovsl(vport->upcall_portids);
+
+	if (vport->dp->user_features & OVS_DP_F_VPORT_PIDS)
+		return nla_total_size(ids->n_ids * sizeof(u32));
+	else
+		return nla_total_size(sizeof(u32));
+}
+
 /**
  *	ovs_vport_get_upcall_portids - get the upcall_portids of @vport.
  *
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
index 9f67b9dd49f9..ee674d59a9c6 100644
--- a/net/openvswitch/vport.h
+++ b/net/openvswitch/vport.h
@@ -38,6 +38,7 @@ int ovs_vport_set_options(struct vport *, struct nlattr *options);
 int ovs_vport_get_options(const struct vport *, struct sk_buff *);
 
 int ovs_vport_set_upcall_portids(struct vport *, const struct nlattr *pids);
+int ovs_vport_get_upcall_portids_size(const struct vport *vport);
 int ovs_vport_get_upcall_portids(const struct vport *, struct sk_buff *);
 u32 ovs_vport_find_upcall_portid(const struct vport *, struct sk_buff *);
 
-- 
2.43.0

          parent reply	other threads:[~2026-04-11 14:15 UTC|newest]

Thread overview: expand[flat|nested]  mbox.gz  Atom feed
 [parent not found: <v1-message-id>]

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:e209099218b dfblob:72e27a38d3a dfblob:23f629e94a3
dfblob:57a6df7d682 dfblob:9f67b9dd49f dfblob:ee674d59a9c )
 OR (
bs:"[PATCH v2 net] openvswitch: fix vport netlink reply size for large upcall PID arrays" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260411141448.1479933-3-bestswngs@gmail.com \
    --to=bestswngs@gmail.com \
    --cc=aconole@redhat.com \
    --cc=davem@davemloft.net \
    --cc=echaudro@redhat.com \
    --cc=edumazet@google.com \
    --cc=fbl@sysclose.org \
    --cc=horms@kernel.org \
    --cc=i.maximets@ovn.org \
    --cc=kuba@kernel.org \
    --cc=mark.d.gray@redhat.com \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=pshelar@ovn.org \
    --cc=xmei5@asu.edu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox