netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH net-next v3 0/4] netdevsim: link and forward skbs between ports
@ 2023-12-14 21:24 David Wei
  2023-12-14 21:24 ` [PATCH net-next v3 1/4] netdevsim: allow two netdevsim ports to be connected David Wei
                   ` (3 more replies)
  0 siblings, 4 replies; 14+ messages in thread
From: David Wei @ 2023-12-14 21:24 UTC (permalink / raw)
  To: Jakub Kicinski, Jiri Pirko, Sabrina Dubroca, netdev
  Cc: David S. Miller, Eric Dumazet, Paolo Abeni

This patchset adds the ability to link two netdevsim ports together and
forward skbs between them, similar to veth. The goal is to use netdevsim
for testing features e.g. zero copy Rx using io_uring.

This feature was tested locally on QEMU, and a selftest is included.

---
v2->v3:
- take lock when traversing nsim_bus_dev_list
- take device ref when getting a nsim_bus_dev
- return 0 if nsim_dev_peer_read cannot find the port
- address code formatting
- do not hard code values in selftests
- add Makefile for selftests

v1->v2:
- renamed debugfs file from "link" to "peer"
- replaced strstep() with sscanf() for consistency
- increased char[] buf sz to 22 for copying id + port from user
- added err msg w/ expected fmt when linking as a hint to user
- prevent linking port to itself
- protect peer ptr using RCU

David Wei (4):
  netdevsim: allow two netdevsim ports to be connected
  netdevsim: forward skbs from one connected port to another
  netdevsim: add selftest for forwarding skb between connected ports
  netdevsim: add Makefile for selftests

 MAINTAINERS                                   |   1 +
 drivers/net/netdevsim/bus.c                   |  17 +++
 drivers/net/netdevsim/dev.c                   |  88 +++++++++++++
 drivers/net/netdevsim/netdev.c                |  29 ++++-
 drivers/net/netdevsim/netdevsim.h             |   3 +
 .../selftests/drivers/net/netdevsim/Makefile  |  18 +++
 .../selftests/drivers/net/netdevsim/peer.sh   | 123 ++++++++++++++++++
 7 files changed, 274 insertions(+), 5 deletions(-)
 create mode 100644 tools/testing/selftests/drivers/net/netdevsim/Makefile
 create mode 100755 tools/testing/selftests/drivers/net/netdevsim/peer.sh

-- 
2.39.3


^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH net-next v3 1/4] netdevsim: allow two netdevsim ports to be connected
  2023-12-14 21:24 [PATCH net-next v3 0/4] netdevsim: link and forward skbs between ports David Wei
@ 2023-12-14 21:24 ` David Wei
  2023-12-15 11:11   ` Jiri Pirko
  2023-12-14 21:24 ` [PATCH net-next v3 2/4] netdevsim: forward skbs from one connected port to another David Wei
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 14+ messages in thread
From: David Wei @ 2023-12-14 21:24 UTC (permalink / raw)
  To: Jakub Kicinski, Jiri Pirko, Sabrina Dubroca, netdev
  Cc: David S. Miller, Eric Dumazet, Paolo Abeni

Add a debugfs file in
/sys/kernel/debug/netdevsim/netdevsimN/ports/A/peer

Writing "M B" to this file will link port A of netdevsim N with port B of
netdevsim M.

Reading this file will return the linked netdevsim id and port, if any.

Signed-off-by: David Wei <dw@davidwei.uk>
---
 drivers/net/netdevsim/bus.c       | 17 ++++++
 drivers/net/netdevsim/dev.c       | 88 +++++++++++++++++++++++++++++++
 drivers/net/netdevsim/netdev.c    |  6 +++
 drivers/net/netdevsim/netdevsim.h |  3 ++
 4 files changed, 114 insertions(+)

diff --git a/drivers/net/netdevsim/bus.c b/drivers/net/netdevsim/bus.c
index bcbc1e19edde..1ef95661a3f5 100644
--- a/drivers/net/netdevsim/bus.c
+++ b/drivers/net/netdevsim/bus.c
@@ -323,6 +323,23 @@ static struct device_driver nsim_driver = {
 	.owner		= THIS_MODULE,
 };
 
+struct nsim_bus_dev *nsim_bus_dev_get(unsigned int id)
+{
+	struct nsim_bus_dev *nsim_bus_dev;
+
+	mutex_lock(&nsim_bus_dev_list_lock);
+	list_for_each_entry(nsim_bus_dev, &nsim_bus_dev_list, list) {
+		if (nsim_bus_dev->dev.id == id) {
+			get_device(&nsim_bus_dev->dev);
+			mutex_unlock(&nsim_bus_dev_list_lock);
+			return nsim_bus_dev;
+		}
+	}
+	mutex_unlock(&nsim_bus_dev_list_lock);
+
+	return NULL;
+}
+
 int nsim_bus_init(void)
 {
 	int err;
diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c
index b4d3b9cde8bd..034145ba1861 100644
--- a/drivers/net/netdevsim/dev.c
+++ b/drivers/net/netdevsim/dev.c
@@ -388,6 +388,91 @@ static const struct file_operations nsim_dev_rate_parent_fops = {
 	.owner = THIS_MODULE,
 };
 
+static ssize_t nsim_dev_peer_read(struct file *file, char __user *data,
+				  size_t count, loff_t *ppos)
+{
+	struct nsim_dev_port *nsim_dev_port;
+	struct netdevsim *peer;
+	unsigned int id, port;
+	char buf[23];
+	ssize_t len;
+
+	nsim_dev_port = file->private_data;
+	rcu_read_lock();
+	peer = rcu_dereference(nsim_dev_port->ns->peer);
+	if (!peer) {
+		rcu_read_unlock();
+		return 0;
+	}
+
+	id = peer->nsim_bus_dev->dev.id;
+	port = peer->nsim_dev_port->port_index;
+	len = scnprintf(buf, sizeof(buf), "%u %u\n", id, port);
+
+	rcu_read_unlock();
+	return simple_read_from_buffer(data, count, ppos, buf, len);
+}
+
+static ssize_t nsim_dev_peer_write(struct file *file,
+				   const char __user *data,
+				   size_t count, loff_t *ppos)
+{
+	struct nsim_dev_port *nsim_dev_port, *peer_dev_port;
+	struct nsim_bus_dev *peer_bus_dev;
+	struct nsim_dev *peer_dev;
+	unsigned int id, port;
+	char buf[22];
+	ssize_t ret;
+
+	if (count >= sizeof(buf))
+		return -ENOSPC;
+
+	ret = copy_from_user(buf, data, count);
+	if (ret)
+		return -EFAULT;
+	buf[count] = '\0';
+
+	ret = sscanf(buf, "%u %u", &id, &port);
+	if (ret != 2) {
+		pr_err("Format for adding a peer is \"id port\" (uint uint)");
+		return -EINVAL;
+	}
+
+	/* invalid netdevsim id */
+	peer_bus_dev = nsim_bus_dev_get(id);
+	if (!peer_bus_dev)
+		return -EINVAL;
+
+	ret = -EINVAL;
+	/* cannot link to self */
+	nsim_dev_port = file->private_data;
+	if (nsim_dev_port->ns->nsim_bus_dev == peer_bus_dev &&
+	    nsim_dev_port->port_index == port)
+		goto out;
+
+	peer_dev = dev_get_drvdata(&peer_bus_dev->dev);
+	list_for_each_entry(peer_dev_port, &peer_dev->port_list, list) {
+		if (peer_dev_port->port_index != port)
+			continue;
+		rcu_assign_pointer(nsim_dev_port->ns->peer, peer_dev_port->ns);
+		rcu_assign_pointer(peer_dev_port->ns->peer, nsim_dev_port->ns);
+		ret = count;
+		goto out;
+	}
+
+out:
+	put_device(&peer_bus_dev->dev);
+	return ret;
+}
+
+static const struct file_operations nsim_dev_peer_fops = {
+	.open = simple_open,
+	.read = nsim_dev_peer_read,
+	.write = nsim_dev_peer_write,
+	.llseek = generic_file_llseek,
+	.owner = THIS_MODULE,
+};
+
 static int nsim_dev_port_debugfs_init(struct nsim_dev *nsim_dev,
 				      struct nsim_dev_port *nsim_dev_port)
 {
@@ -418,6 +503,9 @@ static int nsim_dev_port_debugfs_init(struct nsim_dev *nsim_dev,
 	}
 	debugfs_create_symlink("dev", nsim_dev_port->ddir, dev_link_name);
 
+	debugfs_create_file("peer", 0600, nsim_dev_port->ddir,
+			    nsim_dev_port, &nsim_dev_peer_fops);
+
 	return 0;
 }
 
diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c
index aecaf5f44374..e290c54b0e70 100644
--- a/drivers/net/netdevsim/netdev.c
+++ b/drivers/net/netdevsim/netdev.c
@@ -388,6 +388,7 @@ nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port)
 	ns->nsim_dev = nsim_dev;
 	ns->nsim_dev_port = nsim_dev_port;
 	ns->nsim_bus_dev = nsim_dev->nsim_bus_dev;
+	RCU_INIT_POINTER(ns->peer, NULL);
 	SET_NETDEV_DEV(dev, &ns->nsim_bus_dev->dev);
 	SET_NETDEV_DEVLINK_PORT(dev, &nsim_dev_port->devlink_port);
 	nsim_ethtool_init(ns);
@@ -407,9 +408,14 @@ nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port)
 void nsim_destroy(struct netdevsim *ns)
 {
 	struct net_device *dev = ns->netdev;
+	struct netdevsim *peer;
 
 	rtnl_lock();
+	peer = rtnl_dereference(ns->peer);
+	RCU_INIT_POINTER(ns->peer, NULL);
 	unregister_netdevice(dev);
+	if (peer)
+		RCU_INIT_POINTER(peer->peer, NULL);
 	if (nsim_dev_port_is_pf(ns->nsim_dev_port)) {
 		nsim_macsec_teardown(ns);
 		nsim_ipsec_teardown(ns);
diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h
index 028c825b86db..61ac3a80cf9a 100644
--- a/drivers/net/netdevsim/netdevsim.h
+++ b/drivers/net/netdevsim/netdevsim.h
@@ -125,6 +125,7 @@ struct netdevsim {
 	} udp_ports;
 
 	struct nsim_ethtool ethtool;
+	struct netdevsim __rcu *peer;
 };
 
 struct netdevsim *
@@ -415,5 +416,7 @@ struct nsim_bus_dev {
 	bool init;
 };
 
+struct nsim_bus_dev *nsim_bus_dev_get(unsigned int id);
+
 int nsim_bus_init(void);
 void nsim_bus_exit(void);
-- 
2.39.3


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH net-next v3 2/4] netdevsim: forward skbs from one connected port to another
  2023-12-14 21:24 [PATCH net-next v3 0/4] netdevsim: link and forward skbs between ports David Wei
  2023-12-14 21:24 ` [PATCH net-next v3 1/4] netdevsim: allow two netdevsim ports to be connected David Wei
@ 2023-12-14 21:24 ` David Wei
  2023-12-15 10:45   ` Jiri Pirko
  2023-12-14 21:24 ` [PATCH net-next v3 3/4] netdevsim: add selftest for forwarding skb between connected ports David Wei
  2023-12-14 21:24 ` [PATCH net-next v3 4/4] netdevsim: add Makefile for selftests David Wei
  3 siblings, 1 reply; 14+ messages in thread
From: David Wei @ 2023-12-14 21:24 UTC (permalink / raw)
  To: Jakub Kicinski, Jiri Pirko, Sabrina Dubroca, netdev
  Cc: David S. Miller, Eric Dumazet, Paolo Abeni

Forward skbs sent from one netdevsim port to its connected netdevsim
port using dev_forward_skb, in a spirit similar to veth.

Signed-off-by: David Wei <dw@davidwei.uk>
---
 drivers/net/netdevsim/netdev.c | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c
index e290c54b0e70..c5f53b1dbdcc 100644
--- a/drivers/net/netdevsim/netdev.c
+++ b/drivers/net/netdevsim/netdev.c
@@ -29,19 +29,33 @@
 static netdev_tx_t nsim_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct netdevsim *ns = netdev_priv(dev);
+	struct netdevsim *peer_ns;
+	int ret = NETDEV_TX_OK;
 
+	rcu_read_lock();
 	if (!nsim_ipsec_tx(ns, skb))
-		goto out;
+		goto err;
 
 	u64_stats_update_begin(&ns->syncp);
 	ns->tx_packets++;
 	ns->tx_bytes += skb->len;
 	u64_stats_update_end(&ns->syncp);
 
-out:
-	dev_kfree_skb(skb);
+	peer_ns = rcu_dereference(ns->peer);
+	if (!peer_ns)
+		goto err;
+
+	skb_tx_timestamp(skb);
+	if (unlikely(dev_forward_skb(peer_ns->netdev, skb) == NET_RX_DROP))
+		ret = NET_XMIT_DROP;
 
-	return NETDEV_TX_OK;
+	rcu_read_unlock();
+	return ret;
+
+err:
+	rcu_read_unlock();
+	dev_kfree_skb(skb);
+	return ret;
 }
 
 static void nsim_set_rx_mode(struct net_device *dev)
@@ -302,7 +316,6 @@ static void nsim_setup(struct net_device *dev)
 	eth_hw_addr_random(dev);
 
 	dev->tx_queue_len = 0;
-	dev->flags |= IFF_NOARP;
 	dev->flags &= ~IFF_MULTICAST;
 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE |
 			   IFF_NO_QUEUE;
-- 
2.39.3


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH net-next v3 3/4] netdevsim: add selftest for forwarding skb between connected ports
  2023-12-14 21:24 [PATCH net-next v3 0/4] netdevsim: link and forward skbs between ports David Wei
  2023-12-14 21:24 ` [PATCH net-next v3 1/4] netdevsim: allow two netdevsim ports to be connected David Wei
  2023-12-14 21:24 ` [PATCH net-next v3 2/4] netdevsim: forward skbs from one connected port to another David Wei
@ 2023-12-14 21:24 ` David Wei
  2023-12-14 21:24 ` [PATCH net-next v3 4/4] netdevsim: add Makefile for selftests David Wei
  3 siblings, 0 replies; 14+ messages in thread
From: David Wei @ 2023-12-14 21:24 UTC (permalink / raw)
  To: Jakub Kicinski, Jiri Pirko, Sabrina Dubroca, netdev
  Cc: David S. Miller, Eric Dumazet, Paolo Abeni

Connect two netdevsim ports in different namespaces together, then send
packets between them using socat.

Signed-off-by: David Wei <dw@davidwei.uk>
---
 .../selftests/drivers/net/netdevsim/peer.sh   | 123 ++++++++++++++++++
 1 file changed, 123 insertions(+)
 create mode 100755 tools/testing/selftests/drivers/net/netdevsim/peer.sh

diff --git a/tools/testing/selftests/drivers/net/netdevsim/peer.sh b/tools/testing/selftests/drivers/net/netdevsim/peer.sh
new file mode 100755
index 000000000000..942526640d59
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/peer.sh
@@ -0,0 +1,123 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only
+
+NSIM_DEV_1_ID=$((RANDOM % 1024))
+NSIM_DEV_1_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_DEV_1_ID
+NSIM_DEV_1_DFS=/sys/kernel/debug/netdevsim/netdevsim$NSIM_DEV_1_ID
+NSIM_DEV_2_ID=$((RANDOM % 1024))
+NSIM_DEV_2_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_DEV_2_ID
+NSIM_DEV_2_DFS=/sys/kernel/debug/netdevsim/netdevsim$NSIM_DEV_2_ID
+
+NSIM_DEV_SYS_NEW=/sys/bus/netdevsim/new_device
+NSIM_DEV_SYS_DEL=/sys/bus/netdevsim/del_device
+
+socat_check()
+{
+	if [ ! -x "$(command -v socat)" ]; then
+		echo "socat command not found. Skipping test"
+		return 1
+	fi
+
+	return 0
+}
+
+setup_ns()
+{
+	set -e
+	ip netns add nssv
+	ip netns add nscl
+
+	NSIM_DEV_1_NAME=$(find $NSIM_DEV_1_SYS/net -maxdepth 1 -type d ! \
+		-path $NSIM_DEV_1_SYS/net -exec basename {} \;)
+	NSIM_DEV_2_NAME=$(find $NSIM_DEV_2_SYS/net -maxdepth 1 -type d ! \
+		-path $NSIM_DEV_2_SYS/net -exec basename {} \;)
+
+	ip link set $NSIM_DEV_1_NAME netns nssv
+	ip link set $NSIM_DEV_2_NAME netns nscl
+
+	ip netns exec nssv ip addr add '192.168.1.1/24' dev $NSIM_DEV_1_NAME
+	ip netns exec nscl ip addr add '192.168.1.2/24' dev $NSIM_DEV_2_NAME
+
+	ip netns exec nssv ip link set dev $NSIM_DEV_1_NAME up
+	ip netns exec nscl ip link set dev $NSIM_DEV_2_NAME up
+	set +e
+}
+
+cleanup_ns()
+{
+	ip netns del nscl
+	ip netns del nssv
+}
+
+###
+### Code start
+###
+
+modprobe netdevsim
+
+# linking
+
+echo $NSIM_DEV_1_ID > $NSIM_DEV_SYS_NEW
+
+echo "$NSIM_DEV_2_ID 0" > ${NSIM_DEV_1_DFS}/ports/0/peer 2>/dev/null
+if [ $? -eq 0 ]; then
+	echo "linking with non-existent netdevsim should fail"
+	exit 1
+fi
+
+echo $NSIM_DEV_2_ID > $NSIM_DEV_SYS_NEW
+
+echo "$NSIM_DEV_2_ID 0" > ${NSIM_DEV_1_DFS}/ports/0/peer
+if [ $? -ne 0 ]; then
+	echo "linking netdevsim1 port0 with netdevsim2 port0 should succeed"
+	exit 1
+fi
+
+# argument error checking
+
+echo "$NSIM_DEV_2_ID 1" > ${NSIM_DEV_1_DFS}/ports/0/peer 2>/dev/null
+if [ $? -eq 0 ]; then
+	echo "linking with non-existent port in a netdevsim should fail"
+	exit 1
+fi
+
+echo "$NSIM_DEV_1_ID 0" > ${NSIM_DEV_1_DFS}/ports/0/peer 2>/dev/null
+if [ $? -eq 0 ]; then
+	echo "linking with self should fail"
+	exit 1
+fi
+
+echo "$NSIM_DEV_2_ID a" > ${NSIM_DEV_1_DFS}/ports/0/peer 2>/dev/null
+if [ $? -eq 0 ]; then
+	echo "invalid arg should fail"
+	exit 1
+fi
+
+# send/recv packets
+
+socat_check || exit 4
+
+setup_ns
+
+tmp_file=$(mktemp)
+ip netns exec nssv socat TCP-LISTEN:1234,fork $tmp_file &
+pid=$!
+
+echo "HI" | ip netns exec nscl socat STDIN TCP:192.168.1.1:1234
+
+count=$(cat $tmp_file | wc -c)
+if [[ $count -ne 3 ]]; then
+	echo "expected 3 bytes, got $count"
+	exit 1
+fi
+
+echo $NSIM_DEV_2_ID > $NSIM_DEV_SYS_DEL
+
+kill $pid
+echo $NSIM_DEV_1_ID > $NSIM_DEV_SYS_DEL
+
+cleanup_ns
+
+modprobe -r netdevsim
+
+exit 0
-- 
2.39.3


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH net-next v3 4/4] netdevsim: add Makefile for selftests
  2023-12-14 21:24 [PATCH net-next v3 0/4] netdevsim: link and forward skbs between ports David Wei
                   ` (2 preceding siblings ...)
  2023-12-14 21:24 ` [PATCH net-next v3 3/4] netdevsim: add selftest for forwarding skb between connected ports David Wei
@ 2023-12-14 21:24 ` David Wei
  3 siblings, 0 replies; 14+ messages in thread
From: David Wei @ 2023-12-14 21:24 UTC (permalink / raw)
  To: Jakub Kicinski, Jiri Pirko, Sabrina Dubroca, netdev
  Cc: David S. Miller, Eric Dumazet, Paolo Abeni

Add a Makefile for netdevsim selftests and add selftests path to
MAINTAINERS

# make run_tests
...
# timeout set to 45
# selftests: drivers/net/netdevsim: peer.sh
# modprobe: FATAL: Module netdevsim is builtin.
ok 11 selftests: drivers/net/netdevsim: peer.sh

Signed-off-by: David Wei <dw@davidwei.uk>
---
 MAINTAINERS                                    |  1 +
 .../selftests/drivers/net/netdevsim/Makefile   | 18 ++++++++++++++++++
 2 files changed, 19 insertions(+)
 create mode 100644 tools/testing/selftests/drivers/net/netdevsim/Makefile

diff --git a/MAINTAINERS b/MAINTAINERS
index a151988646fe..cfa0804772b5 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -14899,6 +14899,7 @@ NETDEVSIM
 M:	Jakub Kicinski <kuba@kernel.org>
 S:	Maintained
 F:	drivers/net/netdevsim/*
+F:	tools/testing/selftests/drivers/net/netdevsim/*
 
 NETEM NETWORK EMULATOR
 M:	Stephen Hemminger <stephen@networkplumber.org>
diff --git a/tools/testing/selftests/drivers/net/netdevsim/Makefile b/tools/testing/selftests/drivers/net/netdevsim/Makefile
new file mode 100644
index 000000000000..5bace0b7fb57
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/Makefile
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: GPL-2.0+ OR MIT
+
+TEST_PROGS = devlink.sh \
+	devlink_in_netns.sh \
+	devlink_trap.sh \
+	ethtool-coalesce.sh \
+	ethtool-fec.sh \
+	ethtool-pause.sh \
+	ethtool-ring.sh \
+	fib.sh \
+	hw_stats_l3.sh \
+	nexthop.sh \
+	peer.sh \
+	psample.sh \
+	tc-mq-visibility.sh \
+	udp_tunnel_nic.sh \
+
+include ../../../lib.mk
-- 
2.39.3


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH net-next v3 2/4] netdevsim: forward skbs from one connected port to another
  2023-12-14 21:24 ` [PATCH net-next v3 2/4] netdevsim: forward skbs from one connected port to another David Wei
@ 2023-12-15 10:45   ` Jiri Pirko
  2023-12-15 18:31     ` David Wei
  0 siblings, 1 reply; 14+ messages in thread
From: Jiri Pirko @ 2023-12-15 10:45 UTC (permalink / raw)
  To: David Wei
  Cc: Jakub Kicinski, Sabrina Dubroca, netdev, David S. Miller,
	Eric Dumazet, Paolo Abeni

Thu, Dec 14, 2023 at 10:24:41PM CET, dw@davidwei.uk wrote:
>Forward skbs sent from one netdevsim port to its connected netdevsim
>port using dev_forward_skb, in a spirit similar to veth.

Perhaps better to write "dev_forward_skb()" to make obvious you talk
about function.


>
>Signed-off-by: David Wei <dw@davidwei.uk>
>---
> drivers/net/netdevsim/netdev.c | 23 ++++++++++++++++++-----
> 1 file changed, 18 insertions(+), 5 deletions(-)
>
>diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c
>index e290c54b0e70..c5f53b1dbdcc 100644
>--- a/drivers/net/netdevsim/netdev.c
>+++ b/drivers/net/netdevsim/netdev.c
>@@ -29,19 +29,33 @@
> static netdev_tx_t nsim_start_xmit(struct sk_buff *skb, struct net_device *dev)
> {
> 	struct netdevsim *ns = netdev_priv(dev);
>+	struct netdevsim *peer_ns;
>+	int ret = NETDEV_TX_OK;
> 
>+	rcu_read_lock();

Why do you need to be in rcu read locked section here?


> 	if (!nsim_ipsec_tx(ns, skb))
>-		goto out;
>+		goto err;

Not sure why you need to rename the label. Why "out" is not okay?

> 
> 	u64_stats_update_begin(&ns->syncp);
> 	ns->tx_packets++;
> 	ns->tx_bytes += skb->len;
> 	u64_stats_update_end(&ns->syncp);
> 
>-out:
>-	dev_kfree_skb(skb);
>+	peer_ns = rcu_dereference(ns->peer);
>+	if (!peer_ns)
>+		goto err;

This is definitelly not an error path, "err" label name is misleading.


>+
>+	skb_tx_timestamp(skb);
>+	if (unlikely(dev_forward_skb(peer_ns->netdev, skb) == NET_RX_DROP))
>+		ret = NET_XMIT_DROP;

Hmm, can't you track dropped packets in ns->tx_dropped and expose in
nsim_get_stats64() ?


> 
>-	return NETDEV_TX_OK;
>+	rcu_read_unlock();
>+	return ret;
>+
>+err:
>+	rcu_read_unlock();
>+	dev_kfree_skb(skb);
>+	return ret;
> }
> 
> static void nsim_set_rx_mode(struct net_device *dev)
>@@ -302,7 +316,6 @@ static void nsim_setup(struct net_device *dev)
> 	eth_hw_addr_random(dev);
> 
> 	dev->tx_queue_len = 0;
>-	dev->flags |= IFF_NOARP;
> 	dev->flags &= ~IFF_MULTICAST;
> 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE |
> 			   IFF_NO_QUEUE;
>-- 
>2.39.3
>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH net-next v3 1/4] netdevsim: allow two netdevsim ports to be connected
  2023-12-14 21:24 ` [PATCH net-next v3 1/4] netdevsim: allow two netdevsim ports to be connected David Wei
@ 2023-12-15 11:11   ` Jiri Pirko
  2023-12-15 19:13     ` David Wei
  0 siblings, 1 reply; 14+ messages in thread
From: Jiri Pirko @ 2023-12-15 11:11 UTC (permalink / raw)
  To: David Wei
  Cc: Jakub Kicinski, Sabrina Dubroca, netdev, David S. Miller,
	Eric Dumazet, Paolo Abeni

Thu, Dec 14, 2023 at 10:24:40PM CET, dw@davidwei.uk wrote:
>Add a debugfs file in
>/sys/kernel/debug/netdevsim/netdevsimN/ports/A/peer
>
>Writing "M B" to this file will link port A of netdevsim N with port B of
>netdevsim M.
>
>Reading this file will return the linked netdevsim id and port, if any.
>
>Signed-off-by: David Wei <dw@davidwei.uk>
>---
> drivers/net/netdevsim/bus.c       | 17 ++++++
> drivers/net/netdevsim/dev.c       | 88 +++++++++++++++++++++++++++++++
> drivers/net/netdevsim/netdev.c    |  6 +++
> drivers/net/netdevsim/netdevsim.h |  3 ++
> 4 files changed, 114 insertions(+)
>
>diff --git a/drivers/net/netdevsim/bus.c b/drivers/net/netdevsim/bus.c
>index bcbc1e19edde..1ef95661a3f5 100644
>--- a/drivers/net/netdevsim/bus.c
>+++ b/drivers/net/netdevsim/bus.c
>@@ -323,6 +323,23 @@ static struct device_driver nsim_driver = {
> 	.owner		= THIS_MODULE,
> };
> 
>+struct nsim_bus_dev *nsim_bus_dev_get(unsigned int id)

This sounds definitelly incorrect. You should not need to touch bus.c
code. It arranges the bus and devices on it. The fact that a device is
probed or not is parallel to this.

I think you need to maintain a separate list/xarray of netdevsim devices
probed by nsim_drv_probe()


>+{
>+	struct nsim_bus_dev *nsim_bus_dev;
>+
>+	mutex_lock(&nsim_bus_dev_list_lock);
>+	list_for_each_entry(nsim_bus_dev, &nsim_bus_dev_list, list) {
>+		if (nsim_bus_dev->dev.id == id) {
>+			get_device(&nsim_bus_dev->dev);
>+			mutex_unlock(&nsim_bus_dev_list_lock);
>+			return nsim_bus_dev;
>+		}
>+	}
>+	mutex_unlock(&nsim_bus_dev_list_lock);
>+
>+	return NULL;
>+}
>+
> int nsim_bus_init(void)
> {
> 	int err;
>diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c
>index b4d3b9cde8bd..034145ba1861 100644
>--- a/drivers/net/netdevsim/dev.c
>+++ b/drivers/net/netdevsim/dev.c
>@@ -388,6 +388,91 @@ static const struct file_operations nsim_dev_rate_parent_fops = {
> 	.owner = THIS_MODULE,
> };
> 
>+static ssize_t nsim_dev_peer_read(struct file *file, char __user *data,
>+				  size_t count, loff_t *ppos)
>+{
>+	struct nsim_dev_port *nsim_dev_port;
>+	struct netdevsim *peer;
>+	unsigned int id, port;
>+	char buf[23];
>+	ssize_t len;
>+
>+	nsim_dev_port = file->private_data;
>+	rcu_read_lock();
>+	peer = rcu_dereference(nsim_dev_port->ns->peer);
>+	if (!peer) {
>+		rcu_read_unlock();
>+		return 0;
>+	}
>+
>+	id = peer->nsim_bus_dev->dev.id;
>+	port = peer->nsim_dev_port->port_index;
>+	len = scnprintf(buf, sizeof(buf), "%u %u\n", id, port);
>+
>+	rcu_read_unlock();
>+	return simple_read_from_buffer(data, count, ppos, buf, len);
>+}
>+
>+static ssize_t nsim_dev_peer_write(struct file *file,
>+				   const char __user *data,
>+				   size_t count, loff_t *ppos)
>+{
>+	struct nsim_dev_port *nsim_dev_port, *peer_dev_port;
>+	struct nsim_bus_dev *peer_bus_dev;
>+	struct nsim_dev *peer_dev;
>+	unsigned int id, port;
>+	char buf[22];
>+	ssize_t ret;
>+
>+	if (count >= sizeof(buf))
>+		return -ENOSPC;
>+
>+	ret = copy_from_user(buf, data, count);
>+	if (ret)
>+		return -EFAULT;
>+	buf[count] = '\0';
>+
>+	ret = sscanf(buf, "%u %u", &id, &port);
>+	if (ret != 2) {
>+		pr_err("Format for adding a peer is \"id port\" (uint uint)");
>+		return -EINVAL;
>+	}
>+
>+	/* invalid netdevsim id */
>+	peer_bus_dev = nsim_bus_dev_get(id);
>+	if (!peer_bus_dev)
>+		return -EINVAL;
>+
>+	ret = -EINVAL;
>+	/* cannot link to self */
>+	nsim_dev_port = file->private_data;
>+	if (nsim_dev_port->ns->nsim_bus_dev == peer_bus_dev &&
>+	    nsim_dev_port->port_index == port)
>+		goto out;
>+
>+	peer_dev = dev_get_drvdata(&peer_bus_dev->dev);

Again, no bus touching should be needed. (btw, this could be null is dev
is not probed)


>+	list_for_each_entry(peer_dev_port, &peer_dev->port_list, list) {
>+		if (peer_dev_port->port_index != port)
>+			continue;
>+		rcu_assign_pointer(nsim_dev_port->ns->peer, peer_dev_port->ns);
>+		rcu_assign_pointer(peer_dev_port->ns->peer, nsim_dev_port->ns);

What is stopping another cpu from setting different peer for the same
port here, making a mess?


>+		ret = count;
>+		goto out;
>+	}
>+
>+out:
>+	put_device(&peer_bus_dev->dev);
>+	return ret;
>+}
>+
>+static const struct file_operations nsim_dev_peer_fops = {
>+	.open = simple_open,
>+	.read = nsim_dev_peer_read,
>+	.write = nsim_dev_peer_write,
>+	.llseek = generic_file_llseek,
>+	.owner = THIS_MODULE,
>+};
>+
> static int nsim_dev_port_debugfs_init(struct nsim_dev *nsim_dev,
> 				      struct nsim_dev_port *nsim_dev_port)
> {
>@@ -418,6 +503,9 @@ static int nsim_dev_port_debugfs_init(struct nsim_dev *nsim_dev,
> 	}
> 	debugfs_create_symlink("dev", nsim_dev_port->ddir, dev_link_name);
> 
>+	debugfs_create_file("peer", 0600, nsim_dev_port->ddir,
>+			    nsim_dev_port, &nsim_dev_peer_fops);
>+
> 	return 0;
> }
> 
>diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c
>index aecaf5f44374..e290c54b0e70 100644
>--- a/drivers/net/netdevsim/netdev.c
>+++ b/drivers/net/netdevsim/netdev.c
>@@ -388,6 +388,7 @@ nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port)
> 	ns->nsim_dev = nsim_dev;
> 	ns->nsim_dev_port = nsim_dev_port;
> 	ns->nsim_bus_dev = nsim_dev->nsim_bus_dev;
>+	RCU_INIT_POINTER(ns->peer, NULL);
> 	SET_NETDEV_DEV(dev, &ns->nsim_bus_dev->dev);
> 	SET_NETDEV_DEVLINK_PORT(dev, &nsim_dev_port->devlink_port);
> 	nsim_ethtool_init(ns);
>@@ -407,9 +408,14 @@ nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port)
> void nsim_destroy(struct netdevsim *ns)
> {
> 	struct net_device *dev = ns->netdev;
>+	struct netdevsim *peer;
> 
> 	rtnl_lock();
>+	peer = rtnl_dereference(ns->peer);
>+	RCU_INIT_POINTER(ns->peer, NULL);
> 	unregister_netdevice(dev);
>+	if (peer)
>+		RCU_INIT_POINTER(peer->peer, NULL);

What is stopping the another CPU from setting this back to this "ns"?
Or what is stopping another netdevsim port from setting this ns while
going away?

Do you rely on RTNL_LOCK in any way (other then synchronize_net() in
unlock())? If yes, looks wrong.

This ns->peer update locking looks very broken to me :/




> 	if (nsim_dev_port_is_pf(ns->nsim_dev_port)) {
> 		nsim_macsec_teardown(ns);
> 		nsim_ipsec_teardown(ns);
>diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h
>index 028c825b86db..61ac3a80cf9a 100644
>--- a/drivers/net/netdevsim/netdevsim.h
>+++ b/drivers/net/netdevsim/netdevsim.h
>@@ -125,6 +125,7 @@ struct netdevsim {
> 	} udp_ports;
> 
> 	struct nsim_ethtool ethtool;
>+	struct netdevsim __rcu *peer;
> };
> 
> struct netdevsim *
>@@ -415,5 +416,7 @@ struct nsim_bus_dev {
> 	bool init;
> };
> 
>+struct nsim_bus_dev *nsim_bus_dev_get(unsigned int id);
>+
> int nsim_bus_init(void);
> void nsim_bus_exit(void);
>-- 
>2.39.3
>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH net-next v3 2/4] netdevsim: forward skbs from one connected port to another
  2023-12-15 10:45   ` Jiri Pirko
@ 2023-12-15 18:31     ` David Wei
  2023-12-16  9:22       ` Jiri Pirko
  0 siblings, 1 reply; 14+ messages in thread
From: David Wei @ 2023-12-15 18:31 UTC (permalink / raw)
  To: Jiri Pirko
  Cc: Jakub Kicinski, Sabrina Dubroca, netdev, David S. Miller,
	Eric Dumazet, Paolo Abeni

On 2023-12-15 02:45, Jiri Pirko wrote:
> Thu, Dec 14, 2023 at 10:24:41PM CET, dw@davidwei.uk wrote:
>> Forward skbs sent from one netdevsim port to its connected netdevsim
>> port using dev_forward_skb, in a spirit similar to veth.
> 
> Perhaps better to write "dev_forward_skb()" to make obvious you talk
> about function.

Sorry, it's a bad habit at this point :)

> 
> 
>>
>> Signed-off-by: David Wei <dw@davidwei.uk>
>> ---
>> drivers/net/netdevsim/netdev.c | 23 ++++++++++++++++++-----
>> 1 file changed, 18 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c
>> index e290c54b0e70..c5f53b1dbdcc 100644
>> --- a/drivers/net/netdevsim/netdev.c
>> +++ b/drivers/net/netdevsim/netdev.c
>> @@ -29,19 +29,33 @@
>> static netdev_tx_t nsim_start_xmit(struct sk_buff *skb, struct net_device *dev)
>> {
>> 	struct netdevsim *ns = netdev_priv(dev);
>> +	struct netdevsim *peer_ns;
>> +	int ret = NETDEV_TX_OK;
>>
>> +	rcu_read_lock();
> 
> Why do you need to be in rcu read locked section here?

So the RCU protected pointer `peer` does not change during the critical
section. Veth does something similar in its xmit() for its peer.

> 
> 
>> 	if (!nsim_ipsec_tx(ns, skb))
>> -		goto out;
>> +		goto err;
> 
> Not sure why you need to rename the label. Why "out" is not okay?
> 
>>
>> 	u64_stats_update_begin(&ns->syncp);
>> 	ns->tx_packets++;
>> 	ns->tx_bytes += skb->len;
>> 	u64_stats_update_end(&ns->syncp);
>>
>> -out:
>> -	dev_kfree_skb(skb);
>> +	peer_ns = rcu_dereference(ns->peer);
>> +	if (!peer_ns)
>> +		goto err;
> 
> This is definitelly not an error path, "err" label name is misleading.

That's fair, I can change it back. Lots has changed since my original
intentions.

> 
> 
>> +
>> +	skb_tx_timestamp(skb);
>> +	if (unlikely(dev_forward_skb(peer_ns->netdev, skb) == NET_RX_DROP))
>> +		ret = NET_XMIT_DROP;
> 
> Hmm, can't you track dropped packets in ns->tx_dropped and expose in
> nsim_get_stats64() ?

I can add this.

> 
> 
>>
>> -	return NETDEV_TX_OK;
>> +	rcu_read_unlock();
>> +	return ret;
>> +
>> +err:
>> +	rcu_read_unlock();
>> +	dev_kfree_skb(skb);
>> +	return ret;
>> }
>>
>> static void nsim_set_rx_mode(struct net_device *dev)
>> @@ -302,7 +316,6 @@ static void nsim_setup(struct net_device *dev)
>> 	eth_hw_addr_random(dev);
>>
>> 	dev->tx_queue_len = 0;
>> -	dev->flags |= IFF_NOARP;
>> 	dev->flags &= ~IFF_MULTICAST;
>> 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE |
>> 			   IFF_NO_QUEUE;
>> -- 
>> 2.39.3
>>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH net-next v3 1/4] netdevsim: allow two netdevsim ports to be connected
  2023-12-15 11:11   ` Jiri Pirko
@ 2023-12-15 19:13     ` David Wei
  2023-12-16  9:21       ` Jiri Pirko
  0 siblings, 1 reply; 14+ messages in thread
From: David Wei @ 2023-12-15 19:13 UTC (permalink / raw)
  To: Jiri Pirko
  Cc: Jakub Kicinski, Sabrina Dubroca, netdev, David S. Miller,
	Eric Dumazet, Paolo Abeni

On 2023-12-15 03:11, Jiri Pirko wrote:
> Thu, Dec 14, 2023 at 10:24:40PM CET, dw@davidwei.uk wrote:
>> Add a debugfs file in
>> /sys/kernel/debug/netdevsim/netdevsimN/ports/A/peer
>>
>> Writing "M B" to this file will link port A of netdevsim N with port B of
>> netdevsim M.
>>
>> Reading this file will return the linked netdevsim id and port, if any.
>>
>> Signed-off-by: David Wei <dw@davidwei.uk>
>> ---
>> drivers/net/netdevsim/bus.c       | 17 ++++++
>> drivers/net/netdevsim/dev.c       | 88 +++++++++++++++++++++++++++++++
>> drivers/net/netdevsim/netdev.c    |  6 +++
>> drivers/net/netdevsim/netdevsim.h |  3 ++
>> 4 files changed, 114 insertions(+)
>>
>> diff --git a/drivers/net/netdevsim/bus.c b/drivers/net/netdevsim/bus.c
>> index bcbc1e19edde..1ef95661a3f5 100644
>> --- a/drivers/net/netdevsim/bus.c
>> +++ b/drivers/net/netdevsim/bus.c
>> @@ -323,6 +323,23 @@ static struct device_driver nsim_driver = {
>> 	.owner		= THIS_MODULE,
>> };
>>
>> +struct nsim_bus_dev *nsim_bus_dev_get(unsigned int id)
> 
> This sounds definitelly incorrect. You should not need to touch bus.c
> code. It arranges the bus and devices on it. The fact that a device is
> probed or not is parallel to this.
> 
> I think you need to maintain a separate list/xarray of netdevsim devices
> probed by nsim_drv_probe()

There is a 1:1 relationship between bus devices (nsim_bus_dev) and nsim devices
(nsim_dev). Adding a separate list for nsim devices seemed redundant to me when
there is already a list for bus devices.

> 
> 
>> +{
>> +	struct nsim_bus_dev *nsim_bus_dev;
>> +
>> +	mutex_lock(&nsim_bus_dev_list_lock);
>> +	list_for_each_entry(nsim_bus_dev, &nsim_bus_dev_list, list) {
>> +		if (nsim_bus_dev->dev.id == id) {
>> +			get_device(&nsim_bus_dev->dev);
>> +			mutex_unlock(&nsim_bus_dev_list_lock);
>> +			return nsim_bus_dev;
>> +		}
>> +	}
>> +	mutex_unlock(&nsim_bus_dev_list_lock);
>> +
>> +	return NULL;
>> +}
>> +
>> int nsim_bus_init(void)
>> {
>> 	int err;
>> diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c
>> index b4d3b9cde8bd..034145ba1861 100644
>> --- a/drivers/net/netdevsim/dev.c
>> +++ b/drivers/net/netdevsim/dev.c
>> @@ -388,6 +388,91 @@ static const struct file_operations nsim_dev_rate_parent_fops = {
>> 	.owner = THIS_MODULE,
>> };
>>
>> +static ssize_t nsim_dev_peer_read(struct file *file, char __user *data,
>> +				  size_t count, loff_t *ppos)
>> +{
>> +	struct nsim_dev_port *nsim_dev_port;
>> +	struct netdevsim *peer;
>> +	unsigned int id, port;
>> +	char buf[23];
>> +	ssize_t len;
>> +
>> +	nsim_dev_port = file->private_data;
>> +	rcu_read_lock();
>> +	peer = rcu_dereference(nsim_dev_port->ns->peer);
>> +	if (!peer) {
>> +		rcu_read_unlock();
>> +		return 0;
>> +	}
>> +
>> +	id = peer->nsim_bus_dev->dev.id;
>> +	port = peer->nsim_dev_port->port_index;
>> +	len = scnprintf(buf, sizeof(buf), "%u %u\n", id, port);
>> +
>> +	rcu_read_unlock();
>> +	return simple_read_from_buffer(data, count, ppos, buf, len);
>> +}
>> +
>> +static ssize_t nsim_dev_peer_write(struct file *file,
>> +				   const char __user *data,
>> +				   size_t count, loff_t *ppos)
>> +{
>> +	struct nsim_dev_port *nsim_dev_port, *peer_dev_port;
>> +	struct nsim_bus_dev *peer_bus_dev;
>> +	struct nsim_dev *peer_dev;
>> +	unsigned int id, port;
>> +	char buf[22];
>> +	ssize_t ret;
>> +
>> +	if (count >= sizeof(buf))
>> +		return -ENOSPC;
>> +
>> +	ret = copy_from_user(buf, data, count);
>> +	if (ret)
>> +		return -EFAULT;
>> +	buf[count] = '\0';
>> +
>> +	ret = sscanf(buf, "%u %u", &id, &port);
>> +	if (ret != 2) {
>> +		pr_err("Format for adding a peer is \"id port\" (uint uint)");
>> +		return -EINVAL;
>> +	}
>> +
>> +	/* invalid netdevsim id */
>> +	peer_bus_dev = nsim_bus_dev_get(id);
>> +	if (!peer_bus_dev)
>> +		return -EINVAL;
>> +
>> +	ret = -EINVAL;
>> +	/* cannot link to self */
>> +	nsim_dev_port = file->private_data;
>> +	if (nsim_dev_port->ns->nsim_bus_dev == peer_bus_dev &&
>> +	    nsim_dev_port->port_index == port)
>> +		goto out;
>> +
>> +	peer_dev = dev_get_drvdata(&peer_bus_dev->dev);
> 
> Again, no bus touching should be needed. (btw, this could be null is dev
> is not probed)

That's fair, I can do a null check.

> 
> 
>> +	list_for_each_entry(peer_dev_port, &peer_dev->port_list, list) {
>> +		if (peer_dev_port->port_index != port)
>> +			continue;
>> +		rcu_assign_pointer(nsim_dev_port->ns->peer, peer_dev_port->ns);
>> +		rcu_assign_pointer(peer_dev_port->ns->peer, nsim_dev_port->ns);
> 
> What is stopping another cpu from setting different peer for the same
> port here, making a mess?

Looking into RCU a bit more, you're right that it does not protect from
multiple writers. Would adding a lock (spinlock?) to nsim_dev and taking that
be sufficient here?

Or what if I took rtnl_lock()?

> 
> 
>> +		ret = count;
>> +		goto out;
>> +	}
>> +
>> +out:
>> +	put_device(&peer_bus_dev->dev);
>> +	return ret;
>> +}
>> +
>> +static const struct file_operations nsim_dev_peer_fops = {
>> +	.open = simple_open,
>> +	.read = nsim_dev_peer_read,
>> +	.write = nsim_dev_peer_write,
>> +	.llseek = generic_file_llseek,
>> +	.owner = THIS_MODULE,
>> +};
>> +
>> static int nsim_dev_port_debugfs_init(struct nsim_dev *nsim_dev,
>> 				      struct nsim_dev_port *nsim_dev_port)
>> {
>> @@ -418,6 +503,9 @@ static int nsim_dev_port_debugfs_init(struct nsim_dev *nsim_dev,
>> 	}
>> 	debugfs_create_symlink("dev", nsim_dev_port->ddir, dev_link_name);
>>
>> +	debugfs_create_file("peer", 0600, nsim_dev_port->ddir,
>> +			    nsim_dev_port, &nsim_dev_peer_fops);
>> +
>> 	return 0;
>> }
>>
>> diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c
>> index aecaf5f44374..e290c54b0e70 100644
>> --- a/drivers/net/netdevsim/netdev.c
>> +++ b/drivers/net/netdevsim/netdev.c
>> @@ -388,6 +388,7 @@ nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port)
>> 	ns->nsim_dev = nsim_dev;
>> 	ns->nsim_dev_port = nsim_dev_port;
>> 	ns->nsim_bus_dev = nsim_dev->nsim_bus_dev;
>> +	RCU_INIT_POINTER(ns->peer, NULL);
>> 	SET_NETDEV_DEV(dev, &ns->nsim_bus_dev->dev);
>> 	SET_NETDEV_DEVLINK_PORT(dev, &nsim_dev_port->devlink_port);
>> 	nsim_ethtool_init(ns);
>> @@ -407,9 +408,14 @@ nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port)
>> void nsim_destroy(struct netdevsim *ns)
>> {
>> 	struct net_device *dev = ns->netdev;
>> +	struct netdevsim *peer;
>>
>> 	rtnl_lock();
>> +	peer = rtnl_dereference(ns->peer);
>> +	RCU_INIT_POINTER(ns->peer, NULL);
>> 	unregister_netdevice(dev);
>> +	if (peer)
>> +		RCU_INIT_POINTER(peer->peer, NULL);
> 
> What is stopping the another CPU from setting this back to this "ns"?
> Or what is stopping another netdevsim port from setting this ns while
> going away?
> 
> Do you rely on RTNL_LOCK in any way (other then synchronize_net() in
> unlock())? If yes, looks wrong.
> 
> This ns->peer update locking looks very broken to me :/

As above, would a spinlock on nsim_dev or taking rtnl_lock() in
nsim_dev_peer_write() resolve this?

> 
> 
> 
> 
>> 	if (nsim_dev_port_is_pf(ns->nsim_dev_port)) {
>> 		nsim_macsec_teardown(ns);
>> 		nsim_ipsec_teardown(ns);
>> diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h
>> index 028c825b86db..61ac3a80cf9a 100644
>> --- a/drivers/net/netdevsim/netdevsim.h
>> +++ b/drivers/net/netdevsim/netdevsim.h
>> @@ -125,6 +125,7 @@ struct netdevsim {
>> 	} udp_ports;
>>
>> 	struct nsim_ethtool ethtool;
>> +	struct netdevsim __rcu *peer;
>> };
>>
>> struct netdevsim *
>> @@ -415,5 +416,7 @@ struct nsim_bus_dev {
>> 	bool init;
>> };
>>
>> +struct nsim_bus_dev *nsim_bus_dev_get(unsigned int id);
>> +
>> int nsim_bus_init(void);
>> void nsim_bus_exit(void);
>> -- 
>> 2.39.3
>>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH net-next v3 1/4] netdevsim: allow two netdevsim ports to be connected
  2023-12-15 19:13     ` David Wei
@ 2023-12-16  9:21       ` Jiri Pirko
  2023-12-16 20:52         ` David Wei
  0 siblings, 1 reply; 14+ messages in thread
From: Jiri Pirko @ 2023-12-16  9:21 UTC (permalink / raw)
  To: David Wei
  Cc: Jakub Kicinski, Sabrina Dubroca, netdev, David S. Miller,
	Eric Dumazet, Paolo Abeni

Fri, Dec 15, 2023 at 08:13:45PM CET, dw@davidwei.uk wrote:
>On 2023-12-15 03:11, Jiri Pirko wrote:
>> Thu, Dec 14, 2023 at 10:24:40PM CET, dw@davidwei.uk wrote:
>>> Add a debugfs file in
>>> /sys/kernel/debug/netdevsim/netdevsimN/ports/A/peer
>>>
>>> Writing "M B" to this file will link port A of netdevsim N with port B of
>>> netdevsim M.
>>>
>>> Reading this file will return the linked netdevsim id and port, if any.
>>>
>>> Signed-off-by: David Wei <dw@davidwei.uk>
>>> ---
>>> drivers/net/netdevsim/bus.c       | 17 ++++++
>>> drivers/net/netdevsim/dev.c       | 88 +++++++++++++++++++++++++++++++
>>> drivers/net/netdevsim/netdev.c    |  6 +++
>>> drivers/net/netdevsim/netdevsim.h |  3 ++
>>> 4 files changed, 114 insertions(+)
>>>
>>> diff --git a/drivers/net/netdevsim/bus.c b/drivers/net/netdevsim/bus.c
>>> index bcbc1e19edde..1ef95661a3f5 100644
>>> --- a/drivers/net/netdevsim/bus.c
>>> +++ b/drivers/net/netdevsim/bus.c
>>> @@ -323,6 +323,23 @@ static struct device_driver nsim_driver = {
>>> 	.owner		= THIS_MODULE,
>>> };
>>>
>>> +struct nsim_bus_dev *nsim_bus_dev_get(unsigned int id)
>> 
>> This sounds definitelly incorrect. You should not need to touch bus.c
>> code. It arranges the bus and devices on it. The fact that a device is
>> probed or not is parallel to this.
>> 
>> I think you need to maintain a separate list/xarray of netdevsim devices
>> probed by nsim_drv_probe()
>
>There is a 1:1 relationship between bus devices (nsim_bus_dev) and nsim devices

Of course it is not. I thought I exaplained that. If you unbind (or not
bind at all), it is still in this list, however not probed.


>(nsim_dev). Adding a separate list for nsim devices seemed redundant to me when
>there is already a list for bus devices.

Again, please don't call into bus.c here.


>
>> 
>> 
>>> +{
>>> +	struct nsim_bus_dev *nsim_bus_dev;
>>> +
>>> +	mutex_lock(&nsim_bus_dev_list_lock);
>>> +	list_for_each_entry(nsim_bus_dev, &nsim_bus_dev_list, list) {
>>> +		if (nsim_bus_dev->dev.id == id) {
>>> +			get_device(&nsim_bus_dev->dev);
>>> +			mutex_unlock(&nsim_bus_dev_list_lock);
>>> +			return nsim_bus_dev;
>>> +		}
>>> +	}
>>> +	mutex_unlock(&nsim_bus_dev_list_lock);
>>> +
>>> +	return NULL;
>>> +}
>>> +
>>> int nsim_bus_init(void)
>>> {
>>> 	int err;
>>> diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c
>>> index b4d3b9cde8bd..034145ba1861 100644
>>> --- a/drivers/net/netdevsim/dev.c
>>> +++ b/drivers/net/netdevsim/dev.c
>>> @@ -388,6 +388,91 @@ static const struct file_operations nsim_dev_rate_parent_fops = {
>>> 	.owner = THIS_MODULE,
>>> };
>>>
>>> +static ssize_t nsim_dev_peer_read(struct file *file, char __user *data,
>>> +				  size_t count, loff_t *ppos)
>>> +{
>>> +	struct nsim_dev_port *nsim_dev_port;
>>> +	struct netdevsim *peer;
>>> +	unsigned int id, port;
>>> +	char buf[23];
>>> +	ssize_t len;
>>> +
>>> +	nsim_dev_port = file->private_data;
>>> +	rcu_read_lock();
>>> +	peer = rcu_dereference(nsim_dev_port->ns->peer);
>>> +	if (!peer) {
>>> +		rcu_read_unlock();
>>> +		return 0;
>>> +	}
>>> +
>>> +	id = peer->nsim_bus_dev->dev.id;
>>> +	port = peer->nsim_dev_port->port_index;
>>> +	len = scnprintf(buf, sizeof(buf), "%u %u\n", id, port);
>>> +
>>> +	rcu_read_unlock();
>>> +	return simple_read_from_buffer(data, count, ppos, buf, len);
>>> +}
>>> +
>>> +static ssize_t nsim_dev_peer_write(struct file *file,
>>> +				   const char __user *data,
>>> +				   size_t count, loff_t *ppos)
>>> +{
>>> +	struct nsim_dev_port *nsim_dev_port, *peer_dev_port;
>>> +	struct nsim_bus_dev *peer_bus_dev;
>>> +	struct nsim_dev *peer_dev;
>>> +	unsigned int id, port;
>>> +	char buf[22];
>>> +	ssize_t ret;
>>> +
>>> +	if (count >= sizeof(buf))
>>> +		return -ENOSPC;
>>> +
>>> +	ret = copy_from_user(buf, data, count);
>>> +	if (ret)
>>> +		return -EFAULT;
>>> +	buf[count] = '\0';
>>> +
>>> +	ret = sscanf(buf, "%u %u", &id, &port);
>>> +	if (ret != 2) {
>>> +		pr_err("Format for adding a peer is \"id port\" (uint uint)");
>>> +		return -EINVAL;
>>> +	}
>>> +
>>> +	/* invalid netdevsim id */
>>> +	peer_bus_dev = nsim_bus_dev_get(id);
>>> +	if (!peer_bus_dev)
>>> +		return -EINVAL;
>>> +
>>> +	ret = -EINVAL;
>>> +	/* cannot link to self */
>>> +	nsim_dev_port = file->private_data;
>>> +	if (nsim_dev_port->ns->nsim_bus_dev == peer_bus_dev &&
>>> +	    nsim_dev_port->port_index == port)
>>> +		goto out;
>>> +
>>> +	peer_dev = dev_get_drvdata(&peer_bus_dev->dev);
>> 
>> Again, no bus touching should be needed. (btw, this could be null is dev
>> is not probed)
>
>That's fair, I can do a null check.
>
>> 
>> 
>>> +	list_for_each_entry(peer_dev_port, &peer_dev->port_list, list) {
>>> +		if (peer_dev_port->port_index != port)
>>> +			continue;
>>> +		rcu_assign_pointer(nsim_dev_port->ns->peer, peer_dev_port->ns);
>>> +		rcu_assign_pointer(peer_dev_port->ns->peer, nsim_dev_port->ns);
>> 
>> What is stopping another cpu from setting different peer for the same
>> port here, making a mess?
>
>Looking into RCU a bit more, you're right that it does not protect from
>multiple writers. Would adding a lock (spinlock?) to nsim_dev and taking that
>be sufficient here?
>
>Or what if I took rtnl_lock()?

You have multiple choices how to handle this.

>
>> 
>> 
>>> +		ret = count;
>>> +		goto out;
>>> +	}
>>> +
>>> +out:
>>> +	put_device(&peer_bus_dev->dev);
>>> +	return ret;
>>> +}
>>> +
>>> +static const struct file_operations nsim_dev_peer_fops = {
>>> +	.open = simple_open,
>>> +	.read = nsim_dev_peer_read,
>>> +	.write = nsim_dev_peer_write,
>>> +	.llseek = generic_file_llseek,
>>> +	.owner = THIS_MODULE,
>>> +};
>>> +
>>> static int nsim_dev_port_debugfs_init(struct nsim_dev *nsim_dev,
>>> 				      struct nsim_dev_port *nsim_dev_port)
>>> {
>>> @@ -418,6 +503,9 @@ static int nsim_dev_port_debugfs_init(struct nsim_dev *nsim_dev,
>>> 	}
>>> 	debugfs_create_symlink("dev", nsim_dev_port->ddir, dev_link_name);
>>>
>>> +	debugfs_create_file("peer", 0600, nsim_dev_port->ddir,
>>> +			    nsim_dev_port, &nsim_dev_peer_fops);
>>> +
>>> 	return 0;
>>> }
>>>
>>> diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c
>>> index aecaf5f44374..e290c54b0e70 100644
>>> --- a/drivers/net/netdevsim/netdev.c
>>> +++ b/drivers/net/netdevsim/netdev.c
>>> @@ -388,6 +388,7 @@ nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port)
>>> 	ns->nsim_dev = nsim_dev;
>>> 	ns->nsim_dev_port = nsim_dev_port;
>>> 	ns->nsim_bus_dev = nsim_dev->nsim_bus_dev;
>>> +	RCU_INIT_POINTER(ns->peer, NULL);
>>> 	SET_NETDEV_DEV(dev, &ns->nsim_bus_dev->dev);
>>> 	SET_NETDEV_DEVLINK_PORT(dev, &nsim_dev_port->devlink_port);
>>> 	nsim_ethtool_init(ns);
>>> @@ -407,9 +408,14 @@ nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port)
>>> void nsim_destroy(struct netdevsim *ns)
>>> {
>>> 	struct net_device *dev = ns->netdev;
>>> +	struct netdevsim *peer;
>>>
>>> 	rtnl_lock();
>>> +	peer = rtnl_dereference(ns->peer);
>>> +	RCU_INIT_POINTER(ns->peer, NULL);
>>> 	unregister_netdevice(dev);
>>> +	if (peer)
>>> +		RCU_INIT_POINTER(peer->peer, NULL);
>> 
>> What is stopping the another CPU from setting this back to this "ns"?
>> Or what is stopping another netdevsim port from setting this ns while
>> going away?
>> 
>> Do you rely on RTNL_LOCK in any way (other then synchronize_net() in
>> unlock())? If yes, looks wrong.
>> 
>> This ns->peer update locking looks very broken to me :/
>
>As above, would a spinlock on nsim_dev or taking rtnl_lock() in
>nsim_dev_peer_write() resolve this?
>
>> 
>> 
>> 
>> 
>>> 	if (nsim_dev_port_is_pf(ns->nsim_dev_port)) {
>>> 		nsim_macsec_teardown(ns);
>>> 		nsim_ipsec_teardown(ns);
>>> diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h
>>> index 028c825b86db..61ac3a80cf9a 100644
>>> --- a/drivers/net/netdevsim/netdevsim.h
>>> +++ b/drivers/net/netdevsim/netdevsim.h
>>> @@ -125,6 +125,7 @@ struct netdevsim {
>>> 	} udp_ports;
>>>
>>> 	struct nsim_ethtool ethtool;
>>> +	struct netdevsim __rcu *peer;
>>> };
>>>
>>> struct netdevsim *
>>> @@ -415,5 +416,7 @@ struct nsim_bus_dev {
>>> 	bool init;
>>> };
>>>
>>> +struct nsim_bus_dev *nsim_bus_dev_get(unsigned int id);
>>> +
>>> int nsim_bus_init(void);
>>> void nsim_bus_exit(void);
>>> -- 
>>> 2.39.3
>>>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH net-next v3 2/4] netdevsim: forward skbs from one connected port to another
  2023-12-15 18:31     ` David Wei
@ 2023-12-16  9:22       ` Jiri Pirko
  2023-12-17  2:59         ` David Wei
  0 siblings, 1 reply; 14+ messages in thread
From: Jiri Pirko @ 2023-12-16  9:22 UTC (permalink / raw)
  To: David Wei
  Cc: Jakub Kicinski, Sabrina Dubroca, netdev, David S. Miller,
	Eric Dumazet, Paolo Abeni

Fri, Dec 15, 2023 at 07:31:42PM CET, dw@davidwei.uk wrote:
>On 2023-12-15 02:45, Jiri Pirko wrote:
>> Thu, Dec 14, 2023 at 10:24:41PM CET, dw@davidwei.uk wrote:
>>> Forward skbs sent from one netdevsim port to its connected netdevsim
>>> port using dev_forward_skb, in a spirit similar to veth.
>> 
>> Perhaps better to write "dev_forward_skb()" to make obvious you talk
>> about function.
>
>Sorry, it's a bad habit at this point :)
>
>> 
>> 
>>>
>>> Signed-off-by: David Wei <dw@davidwei.uk>
>>> ---
>>> drivers/net/netdevsim/netdev.c | 23 ++++++++++++++++++-----
>>> 1 file changed, 18 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c
>>> index e290c54b0e70..c5f53b1dbdcc 100644
>>> --- a/drivers/net/netdevsim/netdev.c
>>> +++ b/drivers/net/netdevsim/netdev.c
>>> @@ -29,19 +29,33 @@
>>> static netdev_tx_t nsim_start_xmit(struct sk_buff *skb, struct net_device *dev)
>>> {
>>> 	struct netdevsim *ns = netdev_priv(dev);
>>> +	struct netdevsim *peer_ns;
>>> +	int ret = NETDEV_TX_OK;
>>>
>>> +	rcu_read_lock();
>> 
>> Why do you need to be in rcu read locked section here?
>
>So the RCU protected pointer `peer` does not change during the critical
>section. Veth does something similar in its xmit() for its peer.

RCU does not work like this. Please check out the documentation.


>
>> 
>> 
>>> 	if (!nsim_ipsec_tx(ns, skb))
>>> -		goto out;
>>> +		goto err;
>> 
>> Not sure why you need to rename the label. Why "out" is not okay?
>> 
>>>
>>> 	u64_stats_update_begin(&ns->syncp);
>>> 	ns->tx_packets++;
>>> 	ns->tx_bytes += skb->len;
>>> 	u64_stats_update_end(&ns->syncp);
>>>
>>> -out:
>>> -	dev_kfree_skb(skb);
>>> +	peer_ns = rcu_dereference(ns->peer);
>>> +	if (!peer_ns)
>>> +		goto err;
>> 
>> This is definitelly not an error path, "err" label name is misleading.
>
>That's fair, I can change it back. Lots has changed since my original
>intentions.
>
>> 
>> 
>>> +
>>> +	skb_tx_timestamp(skb);
>>> +	if (unlikely(dev_forward_skb(peer_ns->netdev, skb) == NET_RX_DROP))
>>> +		ret = NET_XMIT_DROP;
>> 
>> Hmm, can't you track dropped packets in ns->tx_dropped and expose in
>> nsim_get_stats64() ?
>
>I can add this.
>
>> 
>> 
>>>
>>> -	return NETDEV_TX_OK;
>>> +	rcu_read_unlock();
>>> +	return ret;
>>> +
>>> +err:
>>> +	rcu_read_unlock();
>>> +	dev_kfree_skb(skb);
>>> +	return ret;
>>> }
>>>
>>> static void nsim_set_rx_mode(struct net_device *dev)
>>> @@ -302,7 +316,6 @@ static void nsim_setup(struct net_device *dev)
>>> 	eth_hw_addr_random(dev);
>>>
>>> 	dev->tx_queue_len = 0;
>>> -	dev->flags |= IFF_NOARP;
>>> 	dev->flags &= ~IFF_MULTICAST;
>>> 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE |
>>> 			   IFF_NO_QUEUE;
>>> -- 
>>> 2.39.3
>>>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH net-next v3 1/4] netdevsim: allow two netdevsim ports to be connected
  2023-12-16  9:21       ` Jiri Pirko
@ 2023-12-16 20:52         ` David Wei
  0 siblings, 0 replies; 14+ messages in thread
From: David Wei @ 2023-12-16 20:52 UTC (permalink / raw)
  To: Jiri Pirko
  Cc: Jakub Kicinski, Sabrina Dubroca, netdev, David S. Miller,
	Eric Dumazet, Paolo Abeni

On 2023-12-16 01:21, Jiri Pirko wrote:
> Fri, Dec 15, 2023 at 08:13:45PM CET, dw@davidwei.uk wrote:
>> On 2023-12-15 03:11, Jiri Pirko wrote:
>>> Thu, Dec 14, 2023 at 10:24:40PM CET, dw@davidwei.uk wrote:
>>>> Add a debugfs file in
>>>> /sys/kernel/debug/netdevsim/netdevsimN/ports/A/peer
>>>>
>>>> Writing "M B" to this file will link port A of netdevsim N with port B of
>>>> netdevsim M.
>>>>
>>>> Reading this file will return the linked netdevsim id and port, if any.
>>>>
>>>> Signed-off-by: David Wei <dw@davidwei.uk>
>>>> ---
>>>> drivers/net/netdevsim/bus.c       | 17 ++++++
>>>> drivers/net/netdevsim/dev.c       | 88 +++++++++++++++++++++++++++++++
>>>> drivers/net/netdevsim/netdev.c    |  6 +++
>>>> drivers/net/netdevsim/netdevsim.h |  3 ++
>>>> 4 files changed, 114 insertions(+)
>>>>
>>>> diff --git a/drivers/net/netdevsim/bus.c b/drivers/net/netdevsim/bus.c
>>>> index bcbc1e19edde..1ef95661a3f5 100644
>>>> --- a/drivers/net/netdevsim/bus.c
>>>> +++ b/drivers/net/netdevsim/bus.c
>>>> @@ -323,6 +323,23 @@ static struct device_driver nsim_driver = {
>>>> 	.owner		= THIS_MODULE,
>>>> };
>>>>
>>>> +struct nsim_bus_dev *nsim_bus_dev_get(unsigned int id)
>>>
>>> This sounds definitelly incorrect. You should not need to touch bus.c
>>> code. It arranges the bus and devices on it. The fact that a device is
>>> probed or not is parallel to this.
>>>
>>> I think you need to maintain a separate list/xarray of netdevsim devices
>>> probed by nsim_drv_probe()
>>
>> There is a 1:1 relationship between bus devices (nsim_bus_dev) and nsim devices
> 
> Of course it is not. I thought I exaplained that. If you unbind (or not
> bind at all), it is still in this list, however not probed.

Right, I understand now thanks. The 1:1 relationship is initially true, but
devices can be removed by unbinding their drivers. I tried manually unbinding
using /sys/bus/netdevsim/drivers/netdevsim/unbind which removes a device but
keeps the bus device.

The current implementation is not resilient to this and I can trigger crashes
e.g. by unbinding then trying to add a port.

> 
> 
>> (nsim_dev). Adding a separate list for nsim devices seemed redundant to me when
>> there is already a list for bus devices.
> 
> Again, please don't call into bus.c here.

Got it, I will maintain a separate list of bound devices.

> 
> 
>>
>>>
>>>
>>>> +{
>>>> +	struct nsim_bus_dev *nsim_bus_dev;
>>>> +
>>>> +	mutex_lock(&nsim_bus_dev_list_lock);
>>>> +	list_for_each_entry(nsim_bus_dev, &nsim_bus_dev_list, list) {
>>>> +		if (nsim_bus_dev->dev.id == id) {
>>>> +			get_device(&nsim_bus_dev->dev);
>>>> +			mutex_unlock(&nsim_bus_dev_list_lock);
>>>> +			return nsim_bus_dev;
>>>> +		}
>>>> +	}
>>>> +	mutex_unlock(&nsim_bus_dev_list_lock);
>>>> +
>>>> +	return NULL;
>>>> +}
>>>> +
>>>> int nsim_bus_init(void)
>>>> {
>>>> 	int err;
>>>> diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c
>>>> index b4d3b9cde8bd..034145ba1861 100644
>>>> --- a/drivers/net/netdevsim/dev.c
>>>> +++ b/drivers/net/netdevsim/dev.c
>>>> @@ -388,6 +388,91 @@ static const struct file_operations nsim_dev_rate_parent_fops = {
>>>> 	.owner = THIS_MODULE,
>>>> };
>>>>
>>>> +static ssize_t nsim_dev_peer_read(struct file *file, char __user *data,
>>>> +				  size_t count, loff_t *ppos)
>>>> +{
>>>> +	struct nsim_dev_port *nsim_dev_port;
>>>> +	struct netdevsim *peer;
>>>> +	unsigned int id, port;
>>>> +	char buf[23];
>>>> +	ssize_t len;
>>>> +
>>>> +	nsim_dev_port = file->private_data;
>>>> +	rcu_read_lock();
>>>> +	peer = rcu_dereference(nsim_dev_port->ns->peer);
>>>> +	if (!peer) {
>>>> +		rcu_read_unlock();
>>>> +		return 0;
>>>> +	}
>>>> +
>>>> +	id = peer->nsim_bus_dev->dev.id;
>>>> +	port = peer->nsim_dev_port->port_index;
>>>> +	len = scnprintf(buf, sizeof(buf), "%u %u\n", id, port);
>>>> +
>>>> +	rcu_read_unlock();
>>>> +	return simple_read_from_buffer(data, count, ppos, buf, len);
>>>> +}
>>>> +
>>>> +static ssize_t nsim_dev_peer_write(struct file *file,
>>>> +				   const char __user *data,
>>>> +				   size_t count, loff_t *ppos)
>>>> +{
>>>> +	struct nsim_dev_port *nsim_dev_port, *peer_dev_port;
>>>> +	struct nsim_bus_dev *peer_bus_dev;
>>>> +	struct nsim_dev *peer_dev;
>>>> +	unsigned int id, port;
>>>> +	char buf[22];
>>>> +	ssize_t ret;
>>>> +
>>>> +	if (count >= sizeof(buf))
>>>> +		return -ENOSPC;
>>>> +
>>>> +	ret = copy_from_user(buf, data, count);
>>>> +	if (ret)
>>>> +		return -EFAULT;
>>>> +	buf[count] = '\0';
>>>> +
>>>> +	ret = sscanf(buf, "%u %u", &id, &port);
>>>> +	if (ret != 2) {
>>>> +		pr_err("Format for adding a peer is \"id port\" (uint uint)");
>>>> +		return -EINVAL;
>>>> +	}
>>>> +
>>>> +	/* invalid netdevsim id */
>>>> +	peer_bus_dev = nsim_bus_dev_get(id);
>>>> +	if (!peer_bus_dev)
>>>> +		return -EINVAL;
>>>> +
>>>> +	ret = -EINVAL;
>>>> +	/* cannot link to self */
>>>> +	nsim_dev_port = file->private_data;
>>>> +	if (nsim_dev_port->ns->nsim_bus_dev == peer_bus_dev &&
>>>> +	    nsim_dev_port->port_index == port)
>>>> +		goto out;
>>>> +
>>>> +	peer_dev = dev_get_drvdata(&peer_bus_dev->dev);
>>>
>>> Again, no bus touching should be needed. (btw, this could be null is dev
>>> is not probed)
>>
>> That's fair, I can do a null check.
>>
>>>
>>>
>>>> +	list_for_each_entry(peer_dev_port, &peer_dev->port_list, list) {
>>>> +		if (peer_dev_port->port_index != port)
>>>> +			continue;
>>>> +		rcu_assign_pointer(nsim_dev_port->ns->peer, peer_dev_port->ns);
>>>> +		rcu_assign_pointer(peer_dev_port->ns->peer, nsim_dev_port->ns);
>>>
>>> What is stopping another cpu from setting different peer for the same
>>> port here, making a mess?
>>
>> Looking into RCU a bit more, you're right that it does not protect from
>> multiple writers. Would adding a lock (spinlock?) to nsim_dev and taking that
>> be sufficient here?
>>
>> Or what if I took rtnl_lock()?
> 
> You have multiple choices how to handle this.

I'll start with a spinlock but it would be good to know what the 'best' option
is in this case. I don't have a well calibrated sense for it yet.

> 
>>
>>>
>>>
>>>> +		ret = count;
>>>> +		goto out;
>>>> +	}
>>>> +
>>>> +out:
>>>> +	put_device(&peer_bus_dev->dev);
>>>> +	return ret;
>>>> +}
>>>> +
>>>> +static const struct file_operations nsim_dev_peer_fops = {
>>>> +	.open = simple_open,
>>>> +	.read = nsim_dev_peer_read,
>>>> +	.write = nsim_dev_peer_write,
>>>> +	.llseek = generic_file_llseek,
>>>> +	.owner = THIS_MODULE,
>>>> +};
>>>> +
>>>> static int nsim_dev_port_debugfs_init(struct nsim_dev *nsim_dev,
>>>> 				      struct nsim_dev_port *nsim_dev_port)
>>>> {
>>>> @@ -418,6 +503,9 @@ static int nsim_dev_port_debugfs_init(struct nsim_dev *nsim_dev,
>>>> 	}
>>>> 	debugfs_create_symlink("dev", nsim_dev_port->ddir, dev_link_name);
>>>>
>>>> +	debugfs_create_file("peer", 0600, nsim_dev_port->ddir,
>>>> +			    nsim_dev_port, &nsim_dev_peer_fops);
>>>> +
>>>> 	return 0;
>>>> }
>>>>
>>>> diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c
>>>> index aecaf5f44374..e290c54b0e70 100644
>>>> --- a/drivers/net/netdevsim/netdev.c
>>>> +++ b/drivers/net/netdevsim/netdev.c
>>>> @@ -388,6 +388,7 @@ nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port)
>>>> 	ns->nsim_dev = nsim_dev;
>>>> 	ns->nsim_dev_port = nsim_dev_port;
>>>> 	ns->nsim_bus_dev = nsim_dev->nsim_bus_dev;
>>>> +	RCU_INIT_POINTER(ns->peer, NULL);
>>>> 	SET_NETDEV_DEV(dev, &ns->nsim_bus_dev->dev);
>>>> 	SET_NETDEV_DEVLINK_PORT(dev, &nsim_dev_port->devlink_port);
>>>> 	nsim_ethtool_init(ns);
>>>> @@ -407,9 +408,14 @@ nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port)
>>>> void nsim_destroy(struct netdevsim *ns)
>>>> {
>>>> 	struct net_device *dev = ns->netdev;
>>>> +	struct netdevsim *peer;
>>>>
>>>> 	rtnl_lock();
>>>> +	peer = rtnl_dereference(ns->peer);
>>>> +	RCU_INIT_POINTER(ns->peer, NULL);
>>>> 	unregister_netdevice(dev);
>>>> +	if (peer)
>>>> +		RCU_INIT_POINTER(peer->peer, NULL);
>>>
>>> What is stopping the another CPU from setting this back to this "ns"?
>>> Or what is stopping another netdevsim port from setting this ns while
>>> going away?
>>>
>>> Do you rely on RTNL_LOCK in any way (other then synchronize_net() in
>>> unlock())? If yes, looks wrong.
>>>
>>> This ns->peer update locking looks very broken to me :/
>>
>> As above, would a spinlock on nsim_dev or taking rtnl_lock() in
>> nsim_dev_peer_write() resolve this?
>>
>>>
>>>
>>>
>>>
>>>> 	if (nsim_dev_port_is_pf(ns->nsim_dev_port)) {
>>>> 		nsim_macsec_teardown(ns);
>>>> 		nsim_ipsec_teardown(ns);
>>>> diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h
>>>> index 028c825b86db..61ac3a80cf9a 100644
>>>> --- a/drivers/net/netdevsim/netdevsim.h
>>>> +++ b/drivers/net/netdevsim/netdevsim.h
>>>> @@ -125,6 +125,7 @@ struct netdevsim {
>>>> 	} udp_ports;
>>>>
>>>> 	struct nsim_ethtool ethtool;
>>>> +	struct netdevsim __rcu *peer;
>>>> };
>>>>
>>>> struct netdevsim *
>>>> @@ -415,5 +416,7 @@ struct nsim_bus_dev {
>>>> 	bool init;
>>>> };
>>>>
>>>> +struct nsim_bus_dev *nsim_bus_dev_get(unsigned int id);
>>>> +
>>>> int nsim_bus_init(void);
>>>> void nsim_bus_exit(void);
>>>> -- 
>>>> 2.39.3
>>>>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH net-next v3 2/4] netdevsim: forward skbs from one connected port to another
  2023-12-16  9:22       ` Jiri Pirko
@ 2023-12-17  2:59         ` David Wei
  2023-12-17 11:36           ` Jiri Pirko
  0 siblings, 1 reply; 14+ messages in thread
From: David Wei @ 2023-12-17  2:59 UTC (permalink / raw)
  To: Jiri Pirko
  Cc: Jakub Kicinski, Sabrina Dubroca, netdev, David S. Miller,
	Eric Dumazet, Paolo Abeni

On 2023-12-16 01:22, Jiri Pirko wrote:
> Fri, Dec 15, 2023 at 07:31:42PM CET, dw@davidwei.uk wrote:
>> On 2023-12-15 02:45, Jiri Pirko wrote:
>>> Thu, Dec 14, 2023 at 10:24:41PM CET, dw@davidwei.uk wrote:
>>>> Forward skbs sent from one netdevsim port to its connected netdevsim
>>>> port using dev_forward_skb, in a spirit similar to veth.
>>>
>>> Perhaps better to write "dev_forward_skb()" to make obvious you talk
>>> about function.
>>
>> Sorry, it's a bad habit at this point :)
>>
>>>
>>>
>>>>
>>>> Signed-off-by: David Wei <dw@davidwei.uk>
>>>> ---
>>>> drivers/net/netdevsim/netdev.c | 23 ++++++++++++++++++-----
>>>> 1 file changed, 18 insertions(+), 5 deletions(-)
>>>>
>>>> diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c
>>>> index e290c54b0e70..c5f53b1dbdcc 100644
>>>> --- a/drivers/net/netdevsim/netdev.c
>>>> +++ b/drivers/net/netdevsim/netdev.c
>>>> @@ -29,19 +29,33 @@
>>>> static netdev_tx_t nsim_start_xmit(struct sk_buff *skb, struct net_device *dev)
>>>> {
>>>> 	struct netdevsim *ns = netdev_priv(dev);
>>>> +	struct netdevsim *peer_ns;
>>>> +	int ret = NETDEV_TX_OK;
>>>>
>>>> +	rcu_read_lock();
>>>
>>> Why do you need to be in rcu read locked section here?
>>
>> So the RCU protected pointer `peer` does not change during the critical
>> section. Veth does something similar in its xmit() for its peer.
> 
> RCU does not work like this. Please check out the documentation.

When destroying a netdevsim in nsim_destroy(), rtnl_lock is held which prevents
concurrent destruction of netdevsims. Then, unregister_netdevice() will
eventually call synchronize_rcu_expedited().

Let's say we have two netdevsims, A linked with B, where A->peer is B and
B->peer is A.

If we're destroying B in nsim_destroy(), then we first do
rcu_assign_pointer(A->peer, NULL). Of course, any read-side critical sections
that dereferenced a non-NULL A->peer won't be affected by this update.

Then B's nsim_destroy() calls unregister_netdevice(), followed eventually by
synchronize_rcu_expedited(). As I understand RCU, this will wait for one RCU
grace period, or any nsim_start_xmit() that started _before_ B's
rcu_assign_pointer(A->peer, NULL) to complete.

RCU docs say that the caller of synchronize_rcu() upon return may be again
concurrent w/ another nsim_start_xmit() reader. But they should see NULL for
A->peer ptr due to the rcu_assign_pointer(A->peer, NULL) update during B's
nsim_destroy(). So after synchronize_rcu() no skb from A should be forwarded to
B anymore.

In fact, it looks like since v5.0 being in a softirq handler serves as an RCU
read-side critical section. So the rcu_read_lock() here in nsim_start_xmit() is
actually redundant.

I believe this is veth's intention too. There is a comment in veth_dellink()
that says the pair of peer devices are guaranteed to be not freed before one
RCU grace period.

As long as adding/removing links is also under rtnl_lock, I think with RCU
guarantees discussed above we will be SMP safe. Does this seem right to you?

> 
> 
>>
>>>
>>>
>>>> 	if (!nsim_ipsec_tx(ns, skb))
>>>> -		goto out;
>>>> +		goto err;
>>>
>>> Not sure why you need to rename the label. Why "out" is not okay?
>>>
>>>>
>>>> 	u64_stats_update_begin(&ns->syncp);
>>>> 	ns->tx_packets++;
>>>> 	ns->tx_bytes += skb->len;
>>>> 	u64_stats_update_end(&ns->syncp);
>>>>
>>>> -out:
>>>> -	dev_kfree_skb(skb);
>>>> +	peer_ns = rcu_dereference(ns->peer);
>>>> +	if (!peer_ns)
>>>> +		goto err;
>>>
>>> This is definitelly not an error path, "err" label name is misleading.
>>
>> That's fair, I can change it back. Lots has changed since my original
>> intentions.
>>
>>>
>>>
>>>> +
>>>> +	skb_tx_timestamp(skb);
>>>> +	if (unlikely(dev_forward_skb(peer_ns->netdev, skb) == NET_RX_DROP))
>>>> +		ret = NET_XMIT_DROP;
>>>
>>> Hmm, can't you track dropped packets in ns->tx_dropped and expose in
>>> nsim_get_stats64() ?
>>
>> I can add this.
>>
>>>
>>>
>>>>
>>>> -	return NETDEV_TX_OK;
>>>> +	rcu_read_unlock();
>>>> +	return ret;
>>>> +
>>>> +err:
>>>> +	rcu_read_unlock();
>>>> +	dev_kfree_skb(skb);
>>>> +	return ret;
>>>> }
>>>>
>>>> static void nsim_set_rx_mode(struct net_device *dev)
>>>> @@ -302,7 +316,6 @@ static void nsim_setup(struct net_device *dev)
>>>> 	eth_hw_addr_random(dev);
>>>>
>>>> 	dev->tx_queue_len = 0;
>>>> -	dev->flags |= IFF_NOARP;
>>>> 	dev->flags &= ~IFF_MULTICAST;
>>>> 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE |
>>>> 			   IFF_NO_QUEUE;
>>>> -- 
>>>> 2.39.3
>>>>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH net-next v3 2/4] netdevsim: forward skbs from one connected port to another
  2023-12-17  2:59         ` David Wei
@ 2023-12-17 11:36           ` Jiri Pirko
  0 siblings, 0 replies; 14+ messages in thread
From: Jiri Pirko @ 2023-12-17 11:36 UTC (permalink / raw)
  To: David Wei
  Cc: Jakub Kicinski, Sabrina Dubroca, netdev, David S. Miller,
	Eric Dumazet, Paolo Abeni

Sun, Dec 17, 2023 at 03:59:53AM CET, dw@davidwei.uk wrote:
>On 2023-12-16 01:22, Jiri Pirko wrote:
>> Fri, Dec 15, 2023 at 07:31:42PM CET, dw@davidwei.uk wrote:
>>> On 2023-12-15 02:45, Jiri Pirko wrote:
>>>> Thu, Dec 14, 2023 at 10:24:41PM CET, dw@davidwei.uk wrote:
>>>>> Forward skbs sent from one netdevsim port to its connected netdevsim
>>>>> port using dev_forward_skb, in a spirit similar to veth.
>>>>
>>>> Perhaps better to write "dev_forward_skb()" to make obvious you talk
>>>> about function.
>>>
>>> Sorry, it's a bad habit at this point :)
>>>
>>>>
>>>>
>>>>>
>>>>> Signed-off-by: David Wei <dw@davidwei.uk>
>>>>> ---
>>>>> drivers/net/netdevsim/netdev.c | 23 ++++++++++++++++++-----
>>>>> 1 file changed, 18 insertions(+), 5 deletions(-)
>>>>>
>>>>> diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c
>>>>> index e290c54b0e70..c5f53b1dbdcc 100644
>>>>> --- a/drivers/net/netdevsim/netdev.c
>>>>> +++ b/drivers/net/netdevsim/netdev.c
>>>>> @@ -29,19 +29,33 @@
>>>>> static netdev_tx_t nsim_start_xmit(struct sk_buff *skb, struct net_device *dev)
>>>>> {
>>>>> 	struct netdevsim *ns = netdev_priv(dev);
>>>>> +	struct netdevsim *peer_ns;
>>>>> +	int ret = NETDEV_TX_OK;
>>>>>
>>>>> +	rcu_read_lock();
>>>>
>>>> Why do you need to be in rcu read locked section here?
>>>
>>> So the RCU protected pointer `peer` does not change during the critical
>>> section. Veth does something similar in its xmit() for its peer.
>> 
>> RCU does not work like this. Please check out the documentation.
>
>When destroying a netdevsim in nsim_destroy(), rtnl_lock is held which prevents
>concurrent destruction of netdevsims. Then, unregister_netdevice() will
>eventually call synchronize_rcu_expedited().
>
>Let's say we have two netdevsims, A linked with B, where A->peer is B and
>B->peer is A.
>
>If we're destroying B in nsim_destroy(), then we first do
>rcu_assign_pointer(A->peer, NULL). Of course, any read-side critical sections
>that dereferenced a non-NULL A->peer won't be affected by this update.
>
>Then B's nsim_destroy() calls unregister_netdevice(), followed eventually by
>synchronize_rcu_expedited(). As I understand RCU, this will wait for one RCU
>grace period, or any nsim_start_xmit() that started _before_ B's
>rcu_assign_pointer(A->peer, NULL) to complete.
>
>RCU docs say that the caller of synchronize_rcu() upon return may be again
>concurrent w/ another nsim_start_xmit() reader. But they should see NULL for
>A->peer ptr due to the rcu_assign_pointer(A->peer, NULL) update during B's
>nsim_destroy(). So after synchronize_rcu() no skb from A should be forwarded to
>B anymore.
>
>In fact, it looks like since v5.0 being in a softirq handler serves as an RCU
>read-side critical section. So the rcu_read_lock() here in nsim_start_xmit() is
>actually redundant.
>
>I believe this is veth's intention too. There is a comment in veth_dellink()
>that says the pair of peer devices are guaranteed to be not freed before one
>RCU grace period.
>
>As long as adding/removing links is also under rtnl_lock, I think with RCU
>guarantees discussed above we will be SMP safe. Does this seem right to you?
>
>> 
>> 
>>>
>>>>
>>>>
>>>>> 	if (!nsim_ipsec_tx(ns, skb))
>>>>> -		goto out;
>>>>> +		goto err;
>>>>
>>>> Not sure why you need to rename the label. Why "out" is not okay?
>>>>
>>>>>
>>>>> 	u64_stats_update_begin(&ns->syncp);
>>>>> 	ns->tx_packets++;
>>>>> 	ns->tx_bytes += skb->len;
>>>>> 	u64_stats_update_end(&ns->syncp);
>>>>>
>>>>> -out:
>>>>> -	dev_kfree_skb(skb);


My point is, why don't take rcu_read_lock() here.


>>>>> +	peer_ns = rcu_dereference(ns->peer);
>>>>> +	if (!peer_ns)
>>>>> +		goto err;
>>>>
>>>> This is definitelly not an error path, "err" label name is misleading.
>>>
>>> That's fair, I can change it back. Lots has changed since my original
>>> intentions.
>>>
>>>>
>>>>
>>>>> +
>>>>> +	skb_tx_timestamp(skb);
>>>>> +	if (unlikely(dev_forward_skb(peer_ns->netdev, skb) == NET_RX_DROP))
>>>>> +		ret = NET_XMIT_DROP;
>>>>
>>>> Hmm, can't you track dropped packets in ns->tx_dropped and expose in
>>>> nsim_get_stats64() ?
>>>
>>> I can add this.
>>>
>>>>
>>>>
>>>>>
>>>>> -	return NETDEV_TX_OK;
>>>>> +	rcu_read_unlock();
>>>>> +	return ret;
>>>>> +
>>>>> +err:
>>>>> +	rcu_read_unlock();
>>>>> +	dev_kfree_skb(skb);
>>>>> +	return ret;
>>>>> }
>>>>>
>>>>> static void nsim_set_rx_mode(struct net_device *dev)
>>>>> @@ -302,7 +316,6 @@ static void nsim_setup(struct net_device *dev)
>>>>> 	eth_hw_addr_random(dev);
>>>>>
>>>>> 	dev->tx_queue_len = 0;
>>>>> -	dev->flags |= IFF_NOARP;
>>>>> 	dev->flags &= ~IFF_MULTICAST;
>>>>> 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE |
>>>>> 			   IFF_NO_QUEUE;
>>>>> -- 
>>>>> 2.39.3
>>>>>

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2023-12-17 11:36 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-12-14 21:24 [PATCH net-next v3 0/4] netdevsim: link and forward skbs between ports David Wei
2023-12-14 21:24 ` [PATCH net-next v3 1/4] netdevsim: allow two netdevsim ports to be connected David Wei
2023-12-15 11:11   ` Jiri Pirko
2023-12-15 19:13     ` David Wei
2023-12-16  9:21       ` Jiri Pirko
2023-12-16 20:52         ` David Wei
2023-12-14 21:24 ` [PATCH net-next v3 2/4] netdevsim: forward skbs from one connected port to another David Wei
2023-12-15 10:45   ` Jiri Pirko
2023-12-15 18:31     ` David Wei
2023-12-16  9:22       ` Jiri Pirko
2023-12-17  2:59         ` David Wei
2023-12-17 11:36           ` Jiri Pirko
2023-12-14 21:24 ` [PATCH net-next v3 3/4] netdevsim: add selftest for forwarding skb between connected ports David Wei
2023-12-14 21:24 ` [PATCH net-next v3 4/4] netdevsim: add Makefile for selftests David Wei

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).