* [PATCH net-next v13 1/4] netdevsim: allow two netdevsim ports to be connected
2024-02-22 5:08 [PATCH net-next v13 0/4] netdevsim: link and forward skbs between ports David Wei
@ 2024-02-22 5:08 ` David Wei
2024-02-24 0:44 ` Jakub Kicinski
2024-02-22 5:08 ` [PATCH net-next v13 2/4] netdevsim: forward skbs from one connected port to another David Wei
` (2 subsequent siblings)
3 siblings, 1 reply; 8+ messages in thread
From: David Wei @ 2024-02-22 5:08 UTC (permalink / raw)
To: Jakub Kicinski, Jiri Pirko, Sabrina Dubroca, maciek, horms,
netdev
Cc: David S. Miller, Eric Dumazet, Paolo Abeni
Add two netdevsim bus attribute to sysfs:
/sys/bus/netdevsim/link_device
/sys/bus/netdevsim/unlink_device
Writing "A M B N" to link_device will link netdevsim M in netnsid A with
netdevsim N in netnsid B.
Writing "A M" to unlink_device will unlink netdevsim M in netnsid A from
its peer, if any.
rtnl_lock is taken to ensure nothing changes during the linking.
Signed-off-by: David Wei <dw@davidwei.uk>
---
drivers/net/netdevsim/bus.c | 135 ++++++++++++++++++++++++++++++
drivers/net/netdevsim/netdev.c | 10 +++
drivers/net/netdevsim/netdevsim.h | 2 +
3 files changed, 147 insertions(+)
diff --git a/drivers/net/netdevsim/bus.c b/drivers/net/netdevsim/bus.c
index 0c5aff63d242..202fa35716db 100644
--- a/drivers/net/netdevsim/bus.c
+++ b/drivers/net/netdevsim/bus.c
@@ -232,9 +232,144 @@ del_device_store(const struct bus_type *bus, const char *buf, size_t count)
}
static BUS_ATTR_WO(del_device);
+static ssize_t link_device_store(const struct bus_type *bus, const char *buf, size_t count)
+{
+ struct netdevsim *nsim_a, *nsim_b, *peer;
+ struct net_device *dev_a, *dev_b;
+ unsigned int ifidx_a, ifidx_b;
+ int netnsfd_a, netnsfd_b, err;
+ struct net *ns_a, *ns_b;
+
+ err = sscanf(buf, "%d:%u %d:%u", &netnsfd_a, &ifidx_a, &netnsfd_b, &ifidx_b);
+ if (err != 4) {
+ pr_err("Format for linking two devices is \"netnsfd_a:ifidx_a netnsfd_b:ifidx_b\" (int uint int uint).\n");
+ return -EINVAL;
+ }
+
+ ns_a = get_net_ns_by_fd(netnsfd_a);
+ if (IS_ERR(ns_a)) {
+ pr_err("Could not find netns with fd: %d\n", netnsfd_a);
+ return -EINVAL;
+ }
+
+ ns_b = get_net_ns_by_fd(netnsfd_b);
+ if (IS_ERR(ns_b)) {
+ pr_err("Could not find netns with fd: %d\n", netnsfd_b);
+ put_net(ns_a);
+ return -EINVAL;
+ }
+
+ err = -EINVAL;
+ rtnl_lock();
+ dev_a = __dev_get_by_index(ns_a, ifidx_a);
+ if (!dev_a) {
+ pr_err("Could not find device with ifindex %u in netnsfd %d\n", ifidx_a, netnsfd_a);
+ goto out_err;
+ }
+
+ if (!netdev_is_nsim(dev_a)) {
+ pr_err("Device with ifindex %u in netnsfd %d is not a netdevsim\n", ifidx_a, netnsfd_a);
+ goto out_err;
+ }
+
+ dev_b = __dev_get_by_index(ns_b, ifidx_b);
+ if (!dev_b) {
+ pr_err("Could not find device with ifindex %u in netnsfd %d\n", ifidx_b, netnsfd_b);
+ goto out_err;
+ }
+
+ if (!netdev_is_nsim(dev_b)) {
+ pr_err("Device with ifindex %u in netnsfd %d is not a netdevsim\n", ifidx_b, netnsfd_b);
+ goto out_err;
+ }
+
+ if (dev_a == dev_b) {
+ pr_err("Cannot link a netdevsim to itself\n");
+ goto out_err;
+ }
+
+ err = 0;
+ nsim_a = netdev_priv(dev_a);
+ peer = rtnl_dereference(nsim_a->peer);
+ if (peer) {
+ pr_err("Netdevsim %d:%u is already linked\n", netnsfd_a, ifidx_a);
+ goto out_err;
+ }
+
+ nsim_b = netdev_priv(dev_b);
+ peer = rtnl_dereference(nsim_b->peer);
+ if (peer) {
+ pr_err("Netdevsim %d:%u is already linked\n", netnsfd_b, ifidx_b);
+ goto out_err;
+ }
+
+ rcu_assign_pointer(nsim_a->peer, nsim_b);
+ rcu_assign_pointer(nsim_b->peer, nsim_a);
+
+out_err:
+ put_net(ns_b);
+ put_net(ns_a);
+ rtnl_unlock();
+
+ return !err ? count : err;
+}
+static BUS_ATTR_WO(link_device);
+
+static ssize_t unlink_device_store(const struct bus_type *bus, const char *buf, size_t count)
+{
+ struct netdevsim *nsim, *peer;
+ struct net_device *dev;
+ unsigned int ifidx;
+ int netnsfd, err;
+ struct net *ns;
+
+ err = sscanf(buf, "%u:%u", &netnsfd, &ifidx);
+ if (err != 2) {
+ pr_err("Format for unlinking a device is \"netnsfd:ifidx\" (int uint).\n");
+ return -EINVAL;
+ }
+
+ ns = get_net_ns_by_fd(netnsfd);
+ if (IS_ERR(ns)) {
+ pr_err("Could not find netns with fd: %d\n", netnsfd);
+ return -EINVAL;
+ }
+
+ err = -EINVAL;
+ rtnl_lock();
+ dev = __dev_get_by_index(ns, ifidx);
+ if (!dev) {
+ pr_err("Could not find device with ifindex %u in netnsfd %d\n", ifidx, netnsfd);
+ goto out_put_netns;
+ }
+
+ if (!netdev_is_nsim(dev)) {
+ pr_err("Device with ifindex %u in netnsfd %d is not a netdevsim\n", ifidx, netnsfd);
+ goto out_put_netns;
+ }
+
+ err = 0;
+ nsim = netdev_priv(dev);
+ peer = rtnl_dereference(nsim->peer);
+ if (!peer)
+ goto out_put_netns;
+
+ RCU_INIT_POINTER(nsim->peer, NULL);
+ RCU_INIT_POINTER(peer->peer, NULL);
+
+out_put_netns:
+ put_net(ns);
+ rtnl_unlock();
+
+ return !err ? count : err;
+}
+static BUS_ATTR_WO(unlink_device);
+
static struct attribute *nsim_bus_attrs[] = {
&bus_attr_new_device.attr,
&bus_attr_del_device.attr,
+ &bus_attr_link_device.attr,
+ &bus_attr_unlink_device.attr,
NULL
};
ATTRIBUTE_GROUPS(nsim_bus);
diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c
index 77e8250282a5..9063f4f2971b 100644
--- a/drivers/net/netdevsim/netdev.c
+++ b/drivers/net/netdevsim/netdev.c
@@ -413,8 +413,13 @@ nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port)
void nsim_destroy(struct netdevsim *ns)
{
struct net_device *dev = ns->netdev;
+ struct netdevsim *peer;
rtnl_lock();
+ peer = rtnl_dereference(ns->peer);
+ if (peer)
+ RCU_INIT_POINTER(peer->peer, NULL);
+ RCU_INIT_POINTER(ns->peer, NULL);
unregister_netdevice(dev);
if (nsim_dev_port_is_pf(ns->nsim_dev_port)) {
nsim_macsec_teardown(ns);
@@ -427,6 +432,11 @@ void nsim_destroy(struct netdevsim *ns)
free_netdev(dev);
}
+bool netdev_is_nsim(struct net_device *dev)
+{
+ return dev->netdev_ops == &nsim_netdev_ops;
+}
+
static int nsim_validate(struct nlattr *tb[], struct nlattr *data[],
struct netlink_ext_ack *extack)
{
diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h
index 028c825b86db..c8b45b0d955e 100644
--- a/drivers/net/netdevsim/netdevsim.h
+++ b/drivers/net/netdevsim/netdevsim.h
@@ -125,11 +125,13 @@ struct netdevsim {
} udp_ports;
struct nsim_ethtool ethtool;
+ struct netdevsim __rcu *peer;
};
struct netdevsim *
nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port);
void nsim_destroy(struct netdevsim *ns);
+bool netdev_is_nsim(struct net_device *dev);
void nsim_ethtool_init(struct netdevsim *ns);
--
2.39.3
^ permalink raw reply related [flat|nested] 8+ messages in thread* [PATCH net-next v13 2/4] netdevsim: forward skbs from one connected port to another
2024-02-22 5:08 [PATCH net-next v13 0/4] netdevsim: link and forward skbs between ports David Wei
2024-02-22 5:08 ` [PATCH net-next v13 1/4] netdevsim: allow two netdevsim ports to be connected David Wei
@ 2024-02-22 5:08 ` David Wei
2024-02-22 5:08 ` [PATCH net-next v13 3/4] netdevsim: add selftest for forwarding skb between connected ports David Wei
2024-02-22 5:08 ` [PATCH net-next v13 4/4] netdevsim: fix rtnetlink.sh selftest David Wei
3 siblings, 0 replies; 8+ messages in thread
From: David Wei @ 2024-02-22 5:08 UTC (permalink / raw)
To: Jakub Kicinski, Jiri Pirko, Sabrina Dubroca, maciek, horms,
netdev
Cc: David S. Miller, Eric Dumazet, Paolo Abeni
Forward skbs sent from one netdevsim port to its connected netdevsim
port using dev_forward_skb, in a spirit similar to veth.
Add a tx_dropped variable to struct netdevsim, tracking the number of
skbs that could not be forwarded using dev_forward_skb().
The xmit() function accessing the peer ptr is protected by an RCU read
critical section. The rcu_read_lock() is functionally redundant as since
v5.0 all softirqs are implicitly RCU read critical sections; but it is
useful for human readers.
If another CPU is concurrently in nsim_destroy(), then it will first set
the peer ptr to NULL. This does not affect any existing readers that
dereferenced a non-NULL peer. Then, in unregister_netdevice(), there is
a synchronize_rcu() before the netdev is actually unregistered and
freed. This ensures that any readers i.e. xmit() that got a non-NULL
peer will complete before the netdev is freed.
Any readers after the RCU_INIT_POINTER() but before synchronize_rcu()
will dereference NULL, making it safe.
The codepath to nsim_destroy() and nsim_create() takes both the newly
added nsim_dev_list_lock and rtnl_lock. This makes it safe with
concurrent calls to linking two netdevsims together.
Signed-off-by: David Wei <dw@davidwei.uk>
---
drivers/net/netdevsim/netdev.c | 30 +++++++++++++++++++++++++-----
drivers/net/netdevsim/netdevsim.h | 1 +
2 files changed, 26 insertions(+), 5 deletions(-)
diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c
index 9063f4f2971b..d151859fa2c0 100644
--- a/drivers/net/netdevsim/netdev.c
+++ b/drivers/net/netdevsim/netdev.c
@@ -29,19 +29,39 @@
static netdev_tx_t nsim_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct netdevsim *ns = netdev_priv(dev);
+ unsigned int len = skb->len;
+ struct netdevsim *peer_ns;
+ int ret = NETDEV_TX_OK;
if (!nsim_ipsec_tx(ns, skb))
goto out;
+ rcu_read_lock();
+ peer_ns = rcu_dereference(ns->peer);
+ if (!peer_ns) {
+ dev_kfree_skb(skb);
+ goto out_stats;
+ }
+
+ skb_tx_timestamp(skb);
+ if (unlikely(dev_forward_skb(peer_ns->netdev, skb) == NET_RX_DROP))
+ ret = NET_XMIT_DROP;
+
+out_stats:
+ rcu_read_unlock();
u64_stats_update_begin(&ns->syncp);
- ns->tx_packets++;
- ns->tx_bytes += skb->len;
+ if (ret == NET_XMIT_DROP) {
+ ns->tx_dropped++;
+ } else {
+ ns->tx_packets++;
+ ns->tx_bytes += len;
+ }
u64_stats_update_end(&ns->syncp);
+ return ret;
out:
dev_kfree_skb(skb);
-
- return NETDEV_TX_OK;
+ return ret;
}
static void nsim_set_rx_mode(struct net_device *dev)
@@ -70,6 +90,7 @@ nsim_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
start = u64_stats_fetch_begin(&ns->syncp);
stats->tx_bytes = ns->tx_bytes;
stats->tx_packets = ns->tx_packets;
+ stats->tx_dropped = ns->tx_dropped;
} while (u64_stats_fetch_retry(&ns->syncp, start));
}
@@ -302,7 +323,6 @@ static void nsim_setup(struct net_device *dev)
eth_hw_addr_random(dev);
dev->tx_queue_len = 0;
- dev->flags |= IFF_NOARP;
dev->flags &= ~IFF_MULTICAST;
dev->priv_flags |= IFF_LIVE_ADDR_CHANGE |
IFF_NO_QUEUE;
diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h
index c8b45b0d955e..553c4b9b4f63 100644
--- a/drivers/net/netdevsim/netdevsim.h
+++ b/drivers/net/netdevsim/netdevsim.h
@@ -98,6 +98,7 @@ struct netdevsim {
u64 tx_packets;
u64 tx_bytes;
+ u64 tx_dropped;
struct u64_stats_sync syncp;
struct nsim_bus_dev *nsim_bus_dev;
--
2.39.3
^ permalink raw reply related [flat|nested] 8+ messages in thread* [PATCH net-next v13 3/4] netdevsim: add selftest for forwarding skb between connected ports
2024-02-22 5:08 [PATCH net-next v13 0/4] netdevsim: link and forward skbs between ports David Wei
2024-02-22 5:08 ` [PATCH net-next v13 1/4] netdevsim: allow two netdevsim ports to be connected David Wei
2024-02-22 5:08 ` [PATCH net-next v13 2/4] netdevsim: forward skbs from one connected port to another David Wei
@ 2024-02-22 5:08 ` David Wei
2024-02-22 5:08 ` [PATCH net-next v13 4/4] netdevsim: fix rtnetlink.sh selftest David Wei
3 siblings, 0 replies; 8+ messages in thread
From: David Wei @ 2024-02-22 5:08 UTC (permalink / raw)
To: Jakub Kicinski, Jiri Pirko, Sabrina Dubroca, maciek, horms,
netdev
Cc: David S. Miller, Eric Dumazet, Paolo Abeni
Connect two netdevsim ports in different namespaces together, then send
packets between them using socat.
Signed-off-by: David Wei <dw@davidwei.uk>
---
.../selftests/drivers/net/netdevsim/Makefile | 1 +
.../selftests/drivers/net/netdevsim/peer.sh | 143 ++++++++++++++++++
2 files changed, 144 insertions(+)
create mode 100755 tools/testing/selftests/drivers/net/netdevsim/peer.sh
diff --git a/tools/testing/selftests/drivers/net/netdevsim/Makefile b/tools/testing/selftests/drivers/net/netdevsim/Makefile
index 7a29a05bea8b..5bace0b7fb57 100644
--- a/tools/testing/selftests/drivers/net/netdevsim/Makefile
+++ b/tools/testing/selftests/drivers/net/netdevsim/Makefile
@@ -10,6 +10,7 @@ TEST_PROGS = devlink.sh \
fib.sh \
hw_stats_l3.sh \
nexthop.sh \
+ peer.sh \
psample.sh \
tc-mq-visibility.sh \
udp_tunnel_nic.sh \
diff --git a/tools/testing/selftests/drivers/net/netdevsim/peer.sh b/tools/testing/selftests/drivers/net/netdevsim/peer.sh
new file mode 100755
index 000000000000..aed62d9e6c0a
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/peer.sh
@@ -0,0 +1,143 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only
+
+source ../../../net/net_helper.sh
+
+NSIM_DEV_1_ID=$((256 + RANDOM % 256))
+NSIM_DEV_1_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_DEV_1_ID
+NSIM_DEV_2_ID=$((512 + RANDOM % 256))
+NSIM_DEV_2_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_DEV_2_ID
+
+NSIM_DEV_SYS_NEW=/sys/bus/netdevsim/new_device
+NSIM_DEV_SYS_DEL=/sys/bus/netdevsim/del_device
+NSIM_DEV_SYS_LINK=/sys/bus/netdevsim/link_device
+NSIM_DEV_SYS_UNLINK=/sys/bus/netdevsim/unlink_device
+
+socat_check()
+{
+ if [ ! -x "$(command -v socat)" ]; then
+ echo "socat command not found. Skipping test"
+ return 1
+ fi
+
+ return 0
+}
+
+setup_ns()
+{
+ set -e
+ ip netns add nssv
+ ip netns add nscl
+
+ NSIM_DEV_1_NAME=$(find $NSIM_DEV_1_SYS/net -maxdepth 1 -type d ! \
+ -path $NSIM_DEV_1_SYS/net -exec basename {} \;)
+ NSIM_DEV_2_NAME=$(find $NSIM_DEV_2_SYS/net -maxdepth 1 -type d ! \
+ -path $NSIM_DEV_2_SYS/net -exec basename {} \;)
+
+ ip link set $NSIM_DEV_1_NAME netns nssv
+ ip link set $NSIM_DEV_2_NAME netns nscl
+
+ ip netns exec nssv ip addr add '192.168.1.1/24' dev $NSIM_DEV_1_NAME
+ ip netns exec nscl ip addr add '192.168.1.2/24' dev $NSIM_DEV_2_NAME
+
+ ip netns exec nssv ip link set dev $NSIM_DEV_1_NAME up
+ ip netns exec nscl ip link set dev $NSIM_DEV_2_NAME up
+ set +e
+}
+
+cleanup_ns()
+{
+ ip netns del nscl
+ ip netns del nssv
+}
+
+###
+### Code start
+###
+
+socat_check || exit 4
+
+modprobe netdevsim
+
+# linking
+
+echo $NSIM_DEV_1_ID > $NSIM_DEV_SYS_NEW
+echo $NSIM_DEV_2_ID > $NSIM_DEV_SYS_NEW
+udevadm settle
+
+setup_ns
+
+NSIM_DEV_1_FD=$((256 + RANDOM % 256))
+exec {NSIM_DEV_1_FD}</var/run/netns/nssv
+NSIM_DEV_1_IFIDX=$(ip netns exec nssv cat /sys/class/net/$NSIM_DEV_1_NAME/ifindex)
+
+NSIM_DEV_2_FD=$((256 + RANDOM % 256))
+exec {NSIM_DEV_2_FD}</var/run/netns/nscl
+NSIM_DEV_2_IFIDX=$(ip netns exec nscl cat /sys/class/net/$NSIM_DEV_2_NAME/ifindex)
+
+echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX $NSIM_DEV_2_FD:2000" > $NSIM_DEV_SYS_LINK 2>/dev/null
+if [ $? -eq 0 ]; then
+ echo "linking with non-existent netdevsim should fail"
+ cleanup_ns
+ exit 1
+fi
+
+echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX 2000:$NSIM_DEV_2_IFIDX" > $NSIM_DEV_SYS_LINK 2>/dev/null
+if [ $? -eq 0 ]; then
+ echo "linking with non-existent netnsid should fail"
+ cleanup_ns
+ exit 1
+fi
+
+echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX $NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX" > $NSIM_DEV_SYS_LINK 2>/dev/null
+if [ $? -eq 0 ]; then
+ echo "linking with self should fail"
+ cleanup_ns
+ exit 1
+fi
+
+echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX $NSIM_DEV_2_FD:$NSIM_DEV_2_IFIDX" > $NSIM_DEV_SYS_LINK
+if [ $? -ne 0 ]; then
+ echo "linking netdevsim1 with netdevsim2 should succeed"
+ cleanup_ns
+ exit 1
+fi
+
+# argument error checking
+
+echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX $NSIM_DEV_2_FD:a" > $NSIM_DEV_SYS_LINK 2>/dev/null
+if [ $? -eq 0 ]; then
+ echo "invalid arg should fail"
+ cleanup_ns
+ exit 1
+fi
+
+# send/recv packets
+
+tmp_file=$(mktemp)
+ip netns exec nssv socat TCP-LISTEN:1234,fork $tmp_file &
+pid=$!
+res=0
+
+wait_local_port_listen nssv 1234 tcp
+
+echo "HI" | ip netns exec nscl socat STDIN TCP:192.168.1.1:1234
+
+count=$(cat $tmp_file | wc -c)
+if [[ $count -ne 3 ]]; then
+ echo "expected 3 bytes, got $count"
+ res=1
+fi
+
+echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX" > $NSIM_DEV_SYS_UNLINK
+
+echo $NSIM_DEV_2_ID > $NSIM_DEV_SYS_DEL
+
+kill $pid
+echo $NSIM_DEV_1_ID > $NSIM_DEV_SYS_DEL
+
+cleanup_ns
+
+modprobe -r netdevsim
+
+exit $res
--
2.39.3
^ permalink raw reply related [flat|nested] 8+ messages in thread