netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH net-next 07/14] ipvlan: Support IPv6 for learnable l2-bridge
  2025-11-05 16:07 [PATCH net-next v2 00/14] " Dmitry Skorodumov
@ 2025-11-05 16:07 ` Dmitry Skorodumov
  0 siblings, 0 replies; 26+ messages in thread
From: Dmitry Skorodumov @ 2025-11-05 16:07 UTC (permalink / raw)
  To: netdev, linux-kernel
  Cc: andrey.bokhanko, Dmitry Skorodumov, Andrew Lunn, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni

To make IPv6 work with learnable l2-bridge, need to
process the TX-path:
* Replace Source-ll-addr in Solicitation ndisc,
* Replace Target-ll-addr in Advertisement ndisc

No need to do anything in RX-path

Signed-off-by: Dmitry Skorodumov <skorodumov.dmitry@huawei.com>
---
 drivers/net/ipvlan/ipvlan_core.c | 129 +++++++++++++++++++++++++++----
 1 file changed, 115 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c
index 547016e3ca8c..659aed8fc4ff 100644
--- a/drivers/net/ipvlan/ipvlan_core.c
+++ b/drivers/net/ipvlan/ipvlan_core.c
@@ -4,6 +4,7 @@
 
 #include <net/flow.h>
 #include <net/ip.h>
+#include <net/ip6_checksum.h>
 
 #include "ipvlan.h"
 
@@ -769,13 +770,122 @@ static int ipvlan_xmit_mode_l3(struct sk_buff *skb, struct net_device *dev)
 	return ipvlan_process_outbound(skb);
 }
 
+static void ipvlan_macnat_patch_tx_arp(struct ipvl_dev *ipvlan,
+				       struct sk_buff *skb)
+{
+	struct arphdr *arph;
+	int addr_type;
+
+	arph = (struct arphdr *)ipvlan_get_L3_hdr(ipvlan->port, skb,
+						 &addr_type);
+	ether_addr_copy((u8 *)(arph + 1), ipvlan->phy_dev->dev_addr);
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+
+static u8 *ipvlan_search_icmp6_ll_addr(struct sk_buff *skb, u8 icmp_option)
+{
+	/* skb is ensured to pullable for all ipv6 payload_len by caller */
+	struct ipv6hdr *ip6h = ipv6_hdr(skb);
+	struct icmp6hdr *icmph;
+	int ndsize, curr_off;
+
+	icmph = (struct icmp6hdr *)(ip6h + 1);
+	ndsize = (int)htons(ip6h->payload_len);
+	curr_off = sizeof(*icmph);
+
+	if (icmph->icmp6_type != NDISC_ROUTER_SOLICITATION)
+		curr_off += sizeof(struct in6_addr);
+
+	while ((curr_off + 2) < ndsize) {
+		u8  *data = (u8 *)icmph + curr_off;
+		u32 opt_len = data[1] << 3;
+
+		if (unlikely(opt_len == 0))
+			return NULL;
+
+		if (data[0] != icmp_option) {
+			curr_off += opt_len;
+			continue;
+		}
+
+		if (unlikely(opt_len < ETH_ALEN + 2))
+			return NULL;
+
+		if (unlikely(curr_off + opt_len > ndsize))
+			return NULL;
+
+		return data + 2;
+	}
+
+	return NULL;
+}
+
+static void ipvlan_macnat_patch_tx_ipv6(struct ipvl_dev *ipvlan,
+					struct sk_buff *skb)
+{
+	struct ipv6hdr *ip6h;
+	struct icmp6hdr *icmph;
+	u8 icmp_option;
+	u8 *lladdr;
+	u16 ndsize;
+
+	if (unlikely(!pskb_may_pull(skb, sizeof(*ip6h))))
+		return;
+
+	if (ipv6_hdr(skb)->nexthdr != NEXTHDR_ICMP)
+		return;
+
+	if (unlikely(!pskb_may_pull(skb, sizeof(*ip6h) + sizeof(*icmph))))
+		return;
+
+	ip6h = ipv6_hdr(skb);
+	icmph = (struct icmp6hdr *)(ip6h + 1);
+
+	/* Patch Source-LL for solicitation, Target-LL for advertisement */
+	if (icmph->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION ||
+	    icmph->icmp6_type == NDISC_ROUTER_SOLICITATION)
+		icmp_option = ND_OPT_SOURCE_LL_ADDR;
+	else if (icmph->icmp6_type == NDISC_NEIGHBOUR_ADVERTISEMENT)
+		icmp_option = ND_OPT_TARGET_LL_ADDR;
+	else
+		return;
+
+	ndsize = (int)htons(ip6h->payload_len);
+	if (unlikely(!pskb_may_pull(skb, sizeof(*ip6h) + ndsize)))
+		return;
+
+	lladdr = ipvlan_search_icmp6_ll_addr(skb, icmp_option);
+	if (!lladdr)
+		return;
+
+	ether_addr_copy(lladdr, ipvlan->phy_dev->dev_addr);
+
+	ip6h = ipv6_hdr(skb);
+	icmph = (struct icmp6hdr *)(ip6h + 1);
+	icmph->icmp6_cksum = 0;
+	icmph->icmp6_cksum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
+					     ndsize,
+					     IPPROTO_ICMPV6,
+					     csum_partial(icmph,
+							  ndsize,
+							  0));
+	skb->ip_summed = CHECKSUM_COMPLETE;
+}
+#else
+static void ipvlan_macnat_patch_tx_ipv6(struct ipvl_dev *ipvlan,
+					struct sk_buff *skb)
+{
+}
+#endif
+
 static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev)
 {
 	struct ipvl_dev *ipvlan;
 	struct ipvl_addr *addr;
 	struct ethhdr *eth;
 	bool same_mac_addr;
-	int addr_type;
+	int addr_type = -1;
 	void *lyr3h;
 
 	ipvlan = netdev_priv(dev);
@@ -862,8 +972,6 @@ static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev)
 		}
 	} else {
 		/* Packet to outside on learnable. Fix source eth-addr. */
-		struct sk_buff *orig_skb = skb;
-
 		skb = skb_unshare(skb, GFP_ATOMIC);
 		if (!skb)
 			return NET_XMIT_DROP;
@@ -872,17 +980,10 @@ static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev)
 		ether_addr_copy(skb_eth_hdr(skb)->h_source,
 				ipvlan->phy_dev->dev_addr);
 
-		/* ToDo: Handle ICMPv6 for neighbours discovery.*/
-		if (lyr3h && addr_type == IPVL_ARP) {
-			struct arphdr *arph;
-			/* must reparse new skb */
-			if (skb != orig_skb && lyr3h && addr_type == IPVL_ARP)
-				lyr3h = ipvlan_get_L3_hdr(ipvlan->port, skb,
-							  &addr_type);
-			arph = (struct arphdr *)lyr3h;
-			ether_addr_copy((u8 *)(arph + 1),
-					ipvlan->phy_dev->dev_addr);
-		}
+		if (addr_type == IPVL_ARP)
+			ipvlan_macnat_patch_tx_arp(ipvlan, skb);
+		else if (addr_type == IPVL_ICMPV6 || addr_type == IPVL_IPV6)
+			ipvlan_macnat_patch_tx_ipv6(ipvlan, skb);
 	}
 
 tx_phy_dev:
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH net-next v3 00/14] ipvlan: support mac-nat mode
@ 2025-11-05 16:14 Dmitry Skorodumov
  2025-11-05 16:14 ` [PATCH net-next 01/14] ipvlan: Preparation to support mac-nat Dmitry Skorodumov
                   ` (14 more replies)
  0 siblings, 15 replies; 26+ messages in thread
From: Dmitry Skorodumov @ 2025-11-05 16:14 UTC (permalink / raw)
  To: netdev; +Cc: andrey.bokhanko, Dmitry Skorodumov

ipvlan: Add support of MAC-NAT translation in L2-bridge

Make it is possible to create link in L2_MACNAT mode: learnable
bridge with MAC Address Translation. The IPs and MAC addresses will be learned
from TX-packets of child interfaces.

Also, dev_add_pack() protocol is attached to the main port
to support communication from main to child interfaces.

This mode is intended for the desktop virtual machines, for
bridging to Wireless interfaces.

The mode should be specified while creating first child interface.
It is not possible to change it after this.

This functionality is quite often requested by users.

diff from v2:
- forgotten patches (10..14) added

diff from v1:

- changed name of the mode to be L2_MACNAT
- Fixed use of uninitialized variable, found by Intel CI/CD
- Fixed style problems with lines more then 80 chars
- Try to use xmastree style of vars declarations
- Fixed broken intermediate compilation
- Added check, that child-ip doesn't use IP of the main port
- Added patch to ignore PACKET_LOOPBACK in handle_mode_l2()
- Some patches with style-refactoring of addr-event notifications

Dmitry Skorodumov (14):
  ipvlan: Preparation to support mac-nat
  ipvlan: Send mcasts out directly in ipvlan_xmit_mode_l2()
  ipvlan: Handle rx mcast-ip and unicast eth
  ipvlan: Added some kind of MAC NAT
  ipvlan: Forget all IP when device goes down
  ipvlan: Support GSO for port -> ipvlan
  ipvlan: Support IPv6 for learnable l2-bridge
  ipvlan: Make the addrs_lock be per port
  ipvlan: Take addr_lock in ipvlan_open()
  ipvlan: Don't allow children to use IPs of main
  ipvlan: const-specifier for functions that use iaddr
  ipvlan: Common code from v6/v4 validator_event
  ipvlan: common code to handle ipv6/ipv4 address events
  ipvlan: Ignore PACKET_LOOPBACK in handle_mode_l2()

 Documentation/networking/ipvlan.rst |  11 +
 drivers/net/ipvlan/ipvlan.h         |  45 ++-
 drivers/net/ipvlan/ipvlan_core.c    | 516 ++++++++++++++++++++++++---
 drivers/net/ipvlan/ipvlan_main.c    | 521 ++++++++++++++++++++++------
 include/uapi/linux/if_link.h        |   1 +
 5 files changed, 925 insertions(+), 169 deletions(-)

-- 
2.25.1


^ permalink raw reply	[flat|nested] 26+ messages in thread

* [PATCH net-next 01/14] ipvlan: Preparation to support mac-nat
  2025-11-05 16:14 [PATCH net-next v3 00/14] ipvlan: support mac-nat mode Dmitry Skorodumov
@ 2025-11-05 16:14 ` Dmitry Skorodumov
  2025-11-05 23:32   ` Bagas Sanjaya
  2025-11-06 23:30   ` kernel test robot
  2025-11-05 16:14 ` [PATCH net-next 02/14] ipvlan: Send mcasts out directly in ipvlan_xmit_mode_l2() Dmitry Skorodumov
                   ` (13 subsequent siblings)
  14 siblings, 2 replies; 26+ messages in thread
From: Dmitry Skorodumov @ 2025-11-05 16:14 UTC (permalink / raw)
  To: netdev, Simon Horman, linux-doc, linux-kernel
  Cc: andrey.bokhanko, Dmitry Skorodumov, David S. Miller, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, Jonathan Corbet, Andrew Lunn

Now it is possible to create link in L2_MACNAT mode. In this patch
it is just a learnable bridge: the IPs of slaves are learned
from TX-packets of child interfaces. But in later patches
it will be extended also to collect MAC-addresses for translation.

Also, dev_add_pack() protocol is attached to the main port
to support communication from main to child interfaces.

This mode is intended for the desktop virtual machines, for
bridging to Wireless interfaces.

The mode should be specified while creating first child interface.
It is not possible to change it after this.

The maximum number of addresses on child interface is limited.
There can be IPVLAN_MAX_MACNAT_ADDRS of each (ipv4/ipv6) types.

Signed-off-by: Dmitry Skorodumov <skorodumov.dmitry@huawei.com>
---
 Documentation/networking/ipvlan.rst |  11 ++
 drivers/net/ipvlan/ipvlan.h         |  25 ++++
 drivers/net/ipvlan/ipvlan_core.c    | 196 +++++++++++++++++++++++++---
 drivers/net/ipvlan/ipvlan_main.c    | 138 +++++++++++++++++---
 include/uapi/linux/if_link.h        |   1 +
 5 files changed, 337 insertions(+), 34 deletions(-)

diff --git a/Documentation/networking/ipvlan.rst b/Documentation/networking/ipvlan.rst
index 895d0ccfd596..e1a15ae87bdf 100644
--- a/Documentation/networking/ipvlan.rst
+++ b/Documentation/networking/ipvlan.rst
@@ -90,6 +90,17 @@ works in this mode and hence it is L3-symmetric (L3s). This will have slightly l
 performance but that shouldn't matter since you are choosing this mode over plain-L3
 mode to make conn-tracking work.
 
+4.4 L2_MACNAT mode:
+-------------
+
+This mode is an extension for the L2 mode. It is primarily intended for
+desktop virtual machines for bridging to Wireless interfaces. In plain L2
+mode you have to configure IPs on slave interface to make it possible
+mux-ing frames between slaves/master. In the L2_MACNAT mode, ipvlan will
+learn itself IPv4/IPv6 address from outgoing packets. Moreover,
+the dev_add_pack() is configured on master interface to capture
+outgoing frames and mux-ing it to slave interfaces, if needed.
+
 5. Mode flags:
 ==============
 
diff --git a/drivers/net/ipvlan/ipvlan.h b/drivers/net/ipvlan/ipvlan.h
index 50de3ee204db..9db92ee11999 100644
--- a/drivers/net/ipvlan/ipvlan.h
+++ b/drivers/net/ipvlan/ipvlan.h
@@ -39,6 +39,8 @@
 
 #define IPVLAN_QBACKLOG_LIMIT	1000
 
+#define IPVLAN_MAX_MACNAT_ADDRS	4
+
 typedef enum {
 	IPVL_IPV6 = 0,
 	IPVL_ICMPV6,
@@ -83,6 +85,7 @@ struct ipvl_addr {
 	struct hlist_node	hlnode;  /* Hash-table linkage */
 	struct list_head	anode;   /* logical-interface linkage */
 	ipvl_hdr_type		atype;
+	u64			tstamp;
 	struct rcu_head		rcu;
 };
 
@@ -91,6 +94,7 @@ struct ipvl_port {
 	possible_net_t		pnet;
 	struct hlist_head	hlhead[IPVLAN_HASH_SIZE];
 	struct list_head	ipvlans;
+	struct packet_type	ipvl_ptype;
 	u16			mode;
 	u16			flags;
 	u16			dev_id_start;
@@ -103,6 +107,7 @@ struct ipvl_port {
 
 struct ipvl_skb_cb {
 	bool tx_pkt;
+	void *mark;
 };
 #define IPVL_SKB_CB(_skb) ((struct ipvl_skb_cb *)&((_skb)->cb[0]))
 
@@ -151,12 +156,32 @@ static inline void ipvlan_clear_vepa(struct ipvl_port *port)
 	port->flags &= ~IPVLAN_F_VEPA;
 }
 
+static inline bool ipvlan_is_macnat(struct ipvl_port *port)
+{
+	return port->mode == IPVLAN_MODE_L2_MACNAT;
+}
+
+static inline void ipvlan_mark_skb(struct sk_buff *skb, struct net_device *dev)
+{
+	IPVL_SKB_CB(skb)->mark = dev;
+}
+
+static inline bool ipvlan_is_skb_marked(struct sk_buff *skb,
+					struct net_device *dev)
+{
+	return (IPVL_SKB_CB(skb)->mark == dev);
+}
+
 void ipvlan_init_secret(void);
 unsigned int ipvlan_mac_hash(const unsigned char *addr);
 rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb);
+void ipvlan_skb_crossing_ns(struct sk_buff *skb, struct net_device *dev);
 void ipvlan_process_multicast(struct work_struct *work);
+void ipvlan_multicast_enqueue(struct ipvl_port *port,
+			      struct sk_buff *skb, bool tx_pkt);
 int ipvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev);
 void ipvlan_ht_addr_add(struct ipvl_dev *ipvlan, struct ipvl_addr *addr);
+int ipvlan_add_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6);
 struct ipvl_addr *ipvlan_find_addr(const struct ipvl_dev *ipvlan,
 				   const void *iaddr, bool is_v6);
 bool ipvlan_addr_busy(struct ipvl_port *port, void *iaddr, bool is_v6);
diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c
index d7e3ddbcab6f..06c1c4fdc4f6 100644
--- a/drivers/net/ipvlan/ipvlan_core.c
+++ b/drivers/net/ipvlan/ipvlan_core.c
@@ -284,6 +284,18 @@ void ipvlan_process_multicast(struct work_struct *work)
 		rcu_read_unlock();
 
 		if (tx_pkt) {
+			if (ipvlan_is_macnat(port)) {
+				/* Inject packet to main dev */
+				nskb = skb_clone(skb, GFP_ATOMIC);
+				if (nskb) {
+					local_bh_disable();
+					nskb->pkt_type = pkt_type;
+					nskb->dev = port->dev;
+					dev_forward_skb(port->dev, nskb);
+					local_bh_enable();
+				}
+			}
+
 			/* If the packet originated here, send it out. */
 			skb->dev = port->dev;
 			skb->pkt_type = pkt_type;
@@ -299,7 +311,7 @@ void ipvlan_process_multicast(struct work_struct *work)
 	}
 }
 
-static void ipvlan_skb_crossing_ns(struct sk_buff *skb, struct net_device *dev)
+void ipvlan_skb_crossing_ns(struct sk_buff *skb, struct net_device *dev)
 {
 	bool xnet = true;
 
@@ -414,6 +426,107 @@ struct ipvl_addr *ipvlan_addr_lookup(struct ipvl_port *port, void *lyr3h,
 	return addr;
 }
 
+static bool is_ipv4_usable(__be32 addr)
+{
+	return !ipv4_is_lbcast(addr) && !ipv4_is_multicast(addr) &&
+	       !ipv4_is_zeronet(addr);
+}
+
+static bool is_ipv6_usable(const struct in6_addr *addr)
+{
+	return !ipv6_addr_is_multicast(addr) && !ipv6_addr_loopback(addr) &&
+	       !ipv6_addr_any(addr);
+}
+
+static void __ipvlan_addr_learn(struct ipvl_dev *ipvlan, void *addr, bool is_v6)
+{
+	const ipvl_hdr_type atype = is_v6 ? IPVL_IPV6 : IPVL_IPV4;
+	struct ipvl_addr *ipvladdr, *oldest = NULL;
+	unsigned int naddrs = 0;
+
+	spin_lock_bh(&ipvlan->addrs_lock);
+
+	if (ipvlan_addr_busy(ipvlan->port, addr, is_v6))
+		goto out_unlock;
+
+	list_for_each_entry_rcu(ipvladdr, &ipvlan->addrs, anode) {
+		if (ipvladdr->atype != atype)
+			continue;
+		naddrs++;
+		if (!oldest || time_before64(ipvladdr->tstamp, oldest->tstamp))
+			oldest = ipvladdr;
+	}
+
+	if (naddrs < IPVLAN_MAX_MACNAT_ADDRS) {
+		oldest = NULL;
+	} else {
+		ipvlan_ht_addr_del(oldest);
+		list_del_rcu(&oldest->anode);
+	}
+
+	ipvlan_add_addr(ipvlan, addr, is_v6);
+
+out_unlock:
+	spin_unlock_bh(&ipvlan->addrs_lock);
+	if (oldest)
+		kfree_rcu(oldest, rcu);
+}
+
+static void ipvlan_addr_learn(struct ipvl_dev *ipvlan, void *lyr3h,
+			      int addr_type)
+{
+	void *addr = NULL;
+	bool is_v6;
+
+	switch (addr_type) {
+#if IS_ENABLED(CONFIG_IPV6)
+	/* No need to handle IPVL_ICMPV6, it never has valid src-address. */
+	case IPVL_IPV6: {
+		struct ipv6hdr *ip6h;
+
+		ip6h = (struct ipv6hdr *)lyr3h;
+		if (!is_ipv6_usable(&ip6h->saddr))
+			return;
+		is_v6 = true;
+		addr = &ip6h->saddr;
+		break;
+	}
+#endif
+	case IPVL_IPV4: {
+		struct iphdr *ip4h;
+		__be32 *i4addr;
+
+		ip4h = (struct iphdr *)lyr3h;
+		i4addr = &ip4h->saddr;
+		if (!is_ipv4_usable(*i4addr))
+			return;
+		is_v6 = false;
+		addr = i4addr;
+		break;
+	}
+	case IPVL_ARP: {
+		struct arphdr *arph;
+		unsigned char *arp_ptr;
+		__be32 *i4addr;
+
+		arph = (struct arphdr *)lyr3h;
+		arp_ptr = (unsigned char *)(arph + 1);
+		arp_ptr += ipvlan->port->dev->addr_len;
+		i4addr = (__be32 *)arp_ptr;
+		if (!is_ipv4_usable(*i4addr))
+			return;
+		is_v6 = false;
+		addr = i4addr;
+		break;
+	}
+	default:
+		return;
+	}
+
+	if (!ipvlan_ht_addr_lookup(ipvlan->port, addr, is_v6))
+		__ipvlan_addr_learn(ipvlan, addr, is_v6);
+}
+
 static noinline_for_stack int ipvlan_process_v4_outbound(struct sk_buff *skb)
 {
 	struct net_device *dev = skb->dev;
@@ -561,8 +674,8 @@ static int ipvlan_process_outbound(struct sk_buff *skb)
 	return ret;
 }
 
-static void ipvlan_multicast_enqueue(struct ipvl_port *port,
-				     struct sk_buff *skb, bool tx_pkt)
+void ipvlan_multicast_enqueue(struct ipvl_port *port,
+			      struct sk_buff *skb, bool tx_pkt)
 {
 	if (skb->protocol == htons(ETH_P_PAUSE)) {
 		kfree_skb(skb);
@@ -618,15 +731,61 @@ static int ipvlan_xmit_mode_l3(struct sk_buff *skb, struct net_device *dev)
 
 static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev)
 {
-	const struct ipvl_dev *ipvlan = netdev_priv(dev);
-	struct ethhdr *eth = skb_eth_hdr(skb);
+	struct ipvl_dev *ipvlan;
 	struct ipvl_addr *addr;
-	void *lyr3h;
+	struct ethhdr *eth;
+	bool same_mac_addr;
 	int addr_type;
+	void *lyr3h;
+
+	ipvlan = netdev_priv(dev);
+	eth = skb_eth_hdr(skb);
+	if (ipvlan_is_macnat(ipvlan->port) &&
+	    ether_addr_equal(eth->h_source, dev->dev_addr)) {
+		/* ignore tx-packets from host */
+		goto out_drop;
+	}
+
+	same_mac_addr = ether_addr_equal(eth->h_dest, eth->h_source);
 
-	if (!ipvlan_is_vepa(ipvlan->port) &&
-	    ether_addr_equal(eth->h_dest, eth->h_source)) {
+	lyr3h = NULL;
+	if (!ipvlan_is_vepa(ipvlan->port)) {
 		lyr3h = ipvlan_get_L3_hdr(ipvlan->port, skb, &addr_type);
+
+		if (ipvlan_is_macnat(ipvlan->port)) {
+			if (lyr3h)
+				ipvlan_addr_learn(ipvlan, lyr3h, addr_type);
+			/* Mark SKB in advance */
+			skb = skb_share_check(skb, GFP_ATOMIC);
+			if (!skb)
+				return NET_XMIT_DROP;
+			ipvlan_mark_skb(skb, ipvlan->phy_dev);
+		}
+	}
+
+	if (is_multicast_ether_addr(eth->h_dest)) {
+		skb_reset_mac_header(skb);
+		ipvlan_skb_crossing_ns(skb, NULL);
+		ipvlan_multicast_enqueue(ipvlan->port, skb, true);
+		return NET_XMIT_SUCCESS;
+	}
+
+	if (ipvlan_is_vepa(ipvlan->port))
+		goto tx_phy_dev;
+
+	if (!same_mac_addr &&
+	    ether_addr_equal(eth->h_dest, ipvlan->phy_dev->dev_addr)) {
+		/* It is a packet from child with destination to main port.
+		 * Pass it to main.
+		 */
+		skb = skb_share_check(skb, GFP_ATOMIC);
+		if (!skb)
+			return NET_XMIT_DROP;
+		skb->pkt_type = PACKET_HOST;
+		skb->dev = ipvlan->phy_dev;
+		dev_forward_skb(ipvlan->phy_dev, skb);
+		return NET_XMIT_SUCCESS;
+	} else if (same_mac_addr) {
 		if (lyr3h) {
 			addr = ipvlan_addr_lookup(ipvlan->port, lyr3h, addr_type, true);
 			if (addr) {
@@ -649,16 +808,14 @@ static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev)
 		 */
 		dev_forward_skb(ipvlan->phy_dev, skb);
 		return NET_XMIT_SUCCESS;
-
-	} else if (is_multicast_ether_addr(eth->h_dest)) {
-		skb_reset_mac_header(skb);
-		ipvlan_skb_crossing_ns(skb, NULL);
-		ipvlan_multicast_enqueue(ipvlan->port, skb, true);
-		return NET_XMIT_SUCCESS;
 	}
 
+tx_phy_dev:
 	skb->dev = ipvlan->phy_dev;
 	return dev_queue_xmit(skb);
+out_drop:
+	consume_skb(skb);
+	return NET_XMIT_DROP;
 }
 
 int ipvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev)
@@ -674,6 +831,7 @@ int ipvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	switch(port->mode) {
 	case IPVLAN_MODE_L2:
+	case IPVLAN_MODE_L2_MACNAT:
 		return ipvlan_xmit_mode_l2(skb, dev);
 	case IPVLAN_MODE_L3:
 #ifdef CONFIG_IPVLAN_L3S
@@ -737,17 +895,22 @@ static rx_handler_result_t ipvlan_handle_mode_l2(struct sk_buff **pskb,
 	struct ethhdr *eth = eth_hdr(skb);
 	rx_handler_result_t ret = RX_HANDLER_PASS;
 
+	/* Ignore already seen packets. */
+	if (ipvlan_is_skb_marked(skb, port->dev))
+		return RX_HANDLER_PASS;
+
 	if (is_multicast_ether_addr(eth->h_dest)) {
 		if (ipvlan_external_frame(skb, port)) {
-			struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
-
 			/* External frames are queued for device local
 			 * distribution, but a copy is given to master
 			 * straight away to avoid sending duplicates later
 			 * when work-queue processes this frame. This is
 			 * achieved by returning RX_HANDLER_PASS.
 			 */
+			struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
+
 			if (nskb) {
+				ipvlan_mark_skb(skb, port->dev);
 				ipvlan_skb_crossing_ns(nskb, NULL);
 				ipvlan_multicast_enqueue(port, nskb, false);
 			}
@@ -770,6 +933,7 @@ rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb)
 
 	switch (port->mode) {
 	case IPVLAN_MODE_L2:
+	case IPVLAN_MODE_L2_MACNAT:
 		return ipvlan_handle_mode_l2(pskb, port);
 	case IPVLAN_MODE_L3:
 		return ipvlan_handle_mode_l3(pskb, port);
diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
index 660f3db11766..4535a9ab50da 100644
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -16,6 +16,15 @@ static int ipvlan_set_port_mode(struct ipvl_port *port, u16 nval,
 
 	ASSERT_RTNL();
 	if (port->mode != nval) {
+		/* Don't allow switch off the learnable bridge mode.
+		 * Flags also must be set from the first port-link setup.
+		 */
+		if (port->mode == IPVLAN_MODE_L2_MACNAT ||
+		    (nval == IPVLAN_MODE_L2_MACNAT && port->count > 1)) {
+			netdev_err(port->dev, "MACNAT mode cannot be changed.\n");
+			return -EINVAL;
+		}
+
 		list_for_each_entry(ipvlan, &port->ipvlans, pnode) {
 			flags = ipvlan->dev->flags;
 			if (nval == IPVLAN_MODE_L3 || nval == IPVLAN_MODE_L3S) {
@@ -40,7 +49,10 @@ static int ipvlan_set_port_mode(struct ipvl_port *port, u16 nval,
 			ipvlan_l3s_unregister(port);
 		}
 		port->mode = nval;
+		if (port->mode == IPVLAN_MODE_L2_MACNAT)
+			dev_add_pack(&port->ipvl_ptype);
 	}
+
 	return 0;
 
 fail:
@@ -59,6 +71,66 @@ static int ipvlan_set_port_mode(struct ipvl_port *port, u16 nval,
 	return err;
 }
 
+static int ipvlan_port_rcv(struct sk_buff *skb, struct net_device *wdev,
+			   struct packet_type *pt, struct net_device *orig_wdev)
+{
+	struct ipvl_port *port;
+	struct ipvl_addr *addr;
+	struct ethhdr *eth;
+	int addr_type;
+	void *lyr3h;
+
+	port = container_of(pt, struct ipvl_port, ipvl_ptype);
+	/* We are interested only in outgoing packets.
+	 * rx-path is handled in rx_handler().
+	 */
+	if (skb->pkt_type != PACKET_OUTGOING ||
+	    ipvlan_is_skb_marked(skb, port->dev))
+		goto out;
+
+	skb = skb_share_check(skb, GFP_ATOMIC);
+	if (!skb)
+		goto no_mem;
+
+	/* data should point to eth-header */
+	skb_push(skb, skb->data - skb_mac_header(skb));
+	skb->dev = port->dev;
+	eth = eth_hdr(skb);
+
+	if (is_multicast_ether_addr(eth->h_dest)) {
+		ipvlan_skb_crossing_ns(skb, NULL);
+		skb->protocol = eth_type_trans(skb, skb->dev);
+		skb->pkt_type = PACKET_HOST;
+		ipvlan_mark_skb(skb, port->dev);
+		ipvlan_multicast_enqueue(port, skb, false);
+		return NET_RX_SUCCESS;
+	}
+
+	lyr3h = ipvlan_get_L3_hdr(port, skb, &addr_type);
+	if (!lyr3h)
+		goto out;
+
+	addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true);
+	if (addr) {
+		struct ipvl_dev *ipvlan = addr->master;
+		int ret, len;
+
+		ipvlan_skb_crossing_ns(skb, ipvlan->dev);
+		skb->protocol = eth_type_trans(skb, skb->dev);
+		skb->pkt_type = PACKET_HOST;
+		ipvlan_mark_skb(skb, port->dev);
+		len = skb->len + ETH_HLEN;
+		ret = netif_rx(skb);
+		ipvlan_count_rx(ipvlan, len, ret == NET_RX_SUCCESS, false);
+		return NET_RX_SUCCESS;
+	}
+
+out:
+	dev_kfree_skb(skb);
+no_mem:
+	return NET_RX_DROP;
+}
+
 static int ipvlan_port_create(struct net_device *dev)
 {
 	struct ipvl_port *port;
@@ -84,6 +156,11 @@ static int ipvlan_port_create(struct net_device *dev)
 	if (err)
 		goto err;
 
+	port->ipvl_ptype.func = ipvlan_port_rcv;
+	port->ipvl_ptype.type = htons(ETH_P_ALL);
+	port->ipvl_ptype.dev = dev;
+	port->ipvl_ptype.list.prev = LIST_POISON2;
+
 	netdev_hold(dev, &port->dev_tracker, GFP_KERNEL);
 	return 0;
 
@@ -100,6 +177,8 @@ static void ipvlan_port_destroy(struct net_device *dev)
 	netdev_put(dev, &port->dev_tracker);
 	if (port->mode == IPVLAN_MODE_L3S)
 		ipvlan_l3s_unregister(port);
+	if (port->ipvl_ptype.list.prev != LIST_POISON2)
+		dev_remove_pack(&port->ipvl_ptype);
 	netdev_rx_handler_unregister(dev);
 	cancel_work_sync(&port->wq);
 	while ((skb = __skb_dequeue(&port->backlog)) != NULL) {
@@ -189,10 +268,13 @@ static int ipvlan_open(struct net_device *dev)
 	else
 		dev->flags &= ~IFF_NOARP;
 
-	rcu_read_lock();
-	list_for_each_entry_rcu(addr, &ipvlan->addrs, anode)
-		ipvlan_ht_addr_add(ipvlan, addr);
-	rcu_read_unlock();
+	/* for learnable, addresses will be obtained from tx-packets. */
+	if (!ipvlan_is_macnat(ipvlan->port)) {
+		rcu_read_lock();
+		list_for_each_entry_rcu(addr, &ipvlan->addrs, anode)
+			ipvlan_ht_addr_add(ipvlan, addr);
+		rcu_read_unlock();
+	}
 
 	return 0;
 }
@@ -581,11 +663,21 @@ int ipvlan_link_new(struct net_device *dev, struct rtnl_newlink_params *params,
 	INIT_LIST_HEAD(&ipvlan->addrs);
 	spin_lock_init(&ipvlan->addrs_lock);
 
-	/* TODO Probably put random address here to be presented to the
-	 * world but keep using the physical-dev address for the outgoing
-	 * packets.
+	/* Flags are per port and latest update overrides. User has
+	 * to be consistent in setting it just like the mode attribute.
 	 */
-	eth_hw_addr_set(dev, phy_dev->dev_addr);
+	if (data && data[IFLA_IPVLAN_MODE])
+		mode = nla_get_u16(data[IFLA_IPVLAN_MODE]);
+
+	if (mode != IPVLAN_MODE_L2_MACNAT) {
+		/* TODO Probably put random address here to be presented to the
+		 * world but keep using the physical-dev addr for the outgoing
+		 * packets.
+		 */
+		eth_hw_addr_set(dev, phy_dev->dev_addr);
+	} else {
+		eth_hw_addr_random(dev);
+	}
 
 	dev->priv_flags |= IFF_NO_RX_HANDLER;
 
@@ -597,6 +689,9 @@ int ipvlan_link_new(struct net_device *dev, struct rtnl_newlink_params *params,
 	port = ipvlan_port_get_rtnl(phy_dev);
 	ipvlan->port = port;
 
+	if (data && data[IFLA_IPVLAN_FLAGS])
+		port->flags = nla_get_u16(data[IFLA_IPVLAN_FLAGS]);
+
 	/* If the port-id base is at the MAX value, then wrap it around and
 	 * begin from 0x1 again. This may be due to a busy system where lots
 	 * of slaves are getting created and deleted.
@@ -625,19 +720,13 @@ int ipvlan_link_new(struct net_device *dev, struct rtnl_newlink_params *params,
 	if (err)
 		goto remove_ida;
 
-	/* Flags are per port and latest update overrides. User has
-	 * to be consistent in setting it just like the mode attribute.
-	 */
-	if (data && data[IFLA_IPVLAN_FLAGS])
-		port->flags = nla_get_u16(data[IFLA_IPVLAN_FLAGS]);
-
-	if (data && data[IFLA_IPVLAN_MODE])
-		mode = nla_get_u16(data[IFLA_IPVLAN_MODE]);
-
 	err = ipvlan_set_port_mode(port, mode, extack);
 	if (err)
 		goto unlink_netdev;
 
+	if (ipvlan_is_macnat(port))
+		dev_set_allmulti(dev, 1);
+
 	list_add_tail_rcu(&ipvlan->pnode, &port->ipvlans);
 	netif_stacked_transfer_operstate(phy_dev, dev);
 	return 0;
@@ -657,6 +746,9 @@ void ipvlan_link_delete(struct net_device *dev, struct list_head *head)
 	struct ipvl_dev *ipvlan = netdev_priv(dev);
 	struct ipvl_addr *addr, *next;
 
+	if (ipvlan_is_macnat(ipvlan->port))
+		dev_set_allmulti(dev, -1);
+
 	spin_lock_bh(&ipvlan->addrs_lock);
 	list_for_each_entry_safe(addr, next, &ipvlan->addrs, anode) {
 		ipvlan_ht_addr_del(addr);
@@ -793,6 +885,9 @@ static int ipvlan_device_event(struct notifier_block *unused,
 		break;
 
 	case NETDEV_CHANGEADDR:
+		if (ipvlan_is_macnat(port))
+			break;
+
 		list_for_each_entry(ipvlan, &port->ipvlans, pnode) {
 			eth_hw_addr_set(ipvlan->dev, dev->dev_addr);
 			call_netdevice_notifiers(NETDEV_CHANGEADDR, ipvlan->dev);
@@ -813,7 +908,7 @@ static int ipvlan_device_event(struct notifier_block *unused,
 }
 
 /* the caller must held the addrs lock */
-static int ipvlan_add_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6)
+int ipvlan_add_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6)
 {
 	struct ipvl_addr *addr;
 
@@ -822,6 +917,7 @@ static int ipvlan_add_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6)
 		return -ENOMEM;
 
 	addr->master = ipvlan;
+	addr->tstamp = get_jiffies_64();
 	if (!is_v6) {
 		memcpy(&addr->ip4addr, iaddr, sizeof(struct in_addr));
 		addr->atype = IPVL_IPV4;
@@ -928,6 +1024,9 @@ static int ipvlan_addr6_validator_event(struct notifier_block *unused,
 	if (!ipvlan_is_valid_dev(dev))
 		return NOTIFY_DONE;
 
+	if (ipvlan_is_macnat(ipvlan->port))
+		return notifier_from_errno(-EADDRNOTAVAIL);
+
 	switch (event) {
 	case NETDEV_UP:
 		if (ipvlan_addr_busy(ipvlan->port, &i6vi->i6vi_addr, true)) {
@@ -999,6 +1098,9 @@ static int ipvlan_addr4_validator_event(struct notifier_block *unused,
 	if (!ipvlan_is_valid_dev(dev))
 		return NOTIFY_DONE;
 
+	if (ipvlan_is_macnat(ipvlan->port))
+		return notifier_from_errno(-EADDRNOTAVAIL);
+
 	switch (event) {
 	case NETDEV_UP:
 		if (ipvlan_addr_busy(ipvlan->port, &ivi->ivi_addr, false)) {
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 3b491d96e52e..64ecb1d739d0 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -1269,6 +1269,7 @@ enum ipvlan_mode {
 	IPVLAN_MODE_L2 = 0,
 	IPVLAN_MODE_L3,
 	IPVLAN_MODE_L3S,
+	IPVLAN_MODE_L2_MACNAT,
 	IPVLAN_MODE_MAX
 };
 
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH net-next 02/14] ipvlan: Send mcasts out directly in ipvlan_xmit_mode_l2()
  2025-11-05 16:14 [PATCH net-next v3 00/14] ipvlan: support mac-nat mode Dmitry Skorodumov
  2025-11-05 16:14 ` [PATCH net-next 01/14] ipvlan: Preparation to support mac-nat Dmitry Skorodumov
@ 2025-11-05 16:14 ` Dmitry Skorodumov
  2025-11-05 16:14 ` [PATCH net-next 03/14] ipvlan: Handle rx mcast-ip and unicast eth Dmitry Skorodumov
                   ` (12 subsequent siblings)
  14 siblings, 0 replies; 26+ messages in thread
From: Dmitry Skorodumov @ 2025-11-05 16:14 UTC (permalink / raw)
  To: netdev, linux-kernel
  Cc: andrey.bokhanko, Dmitry Skorodumov, Andrew Lunn, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni

Mcasts are sent to external net directly in
ipvlan_xmit_mode_l2(). The ipvlan_process_multicast()
for tx-packets just distributes them to local ifaces.

This makes life a bit easier for further patches. When
out-mcasts should be patched with proper MAC-address.

Signed-off-by: Dmitry Skorodumov <skorodumov.dmitry@huawei.com>
---
 drivers/net/ipvlan/ipvlan_core.c | 32 +++++++++++++++++---------------
 1 file changed, 17 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c
index 06c1c4fdc4f6..2ff681de8105 100644
--- a/drivers/net/ipvlan/ipvlan_core.c
+++ b/drivers/net/ipvlan/ipvlan_core.c
@@ -285,9 +285,10 @@ void ipvlan_process_multicast(struct work_struct *work)
 
 		if (tx_pkt) {
 			if (ipvlan_is_macnat(port)) {
-				/* Inject packet to main dev */
+				/* Inject as rx-packet to main dev. */
 				nskb = skb_clone(skb, GFP_ATOMIC);
 				if (nskb) {
+					consumed = true;
 					local_bh_disable();
 					nskb->pkt_type = pkt_type;
 					nskb->dev = port->dev;
@@ -295,17 +296,13 @@ void ipvlan_process_multicast(struct work_struct *work)
 					local_bh_enable();
 				}
 			}
-
-			/* If the packet originated here, send it out. */
-			skb->dev = port->dev;
-			skb->pkt_type = pkt_type;
-			dev_queue_xmit(skb);
-		} else {
-			if (consumed)
-				consume_skb(skb);
-			else
-				kfree_skb(skb);
+			/* Tx was done in ipvlan_xmit_mode_l2(). */
 		}
+		if (consumed)
+			consume_skb(skb);
+		else
+			kfree_skb(skb);
+
 		dev_put(dev);
 		cond_resched();
 	}
@@ -764,10 +761,15 @@ static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev)
 	}
 
 	if (is_multicast_ether_addr(eth->h_dest)) {
-		skb_reset_mac_header(skb);
-		ipvlan_skb_crossing_ns(skb, NULL);
-		ipvlan_multicast_enqueue(ipvlan->port, skb, true);
-		return NET_XMIT_SUCCESS;
+		struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
+
+		if (nskb) {
+			skb_reset_mac_header(nskb);
+			ipvlan_skb_crossing_ns(nskb, NULL);
+			ipvlan_multicast_enqueue(ipvlan->port, nskb, true);
+		}
+
+		goto tx_phy_dev;
 	}
 
 	if (ipvlan_is_vepa(ipvlan->port))
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH net-next 03/14] ipvlan: Handle rx mcast-ip and unicast eth
  2025-11-05 16:14 [PATCH net-next v3 00/14] ipvlan: support mac-nat mode Dmitry Skorodumov
  2025-11-05 16:14 ` [PATCH net-next 01/14] ipvlan: Preparation to support mac-nat Dmitry Skorodumov
  2025-11-05 16:14 ` [PATCH net-next 02/14] ipvlan: Send mcasts out directly in ipvlan_xmit_mode_l2() Dmitry Skorodumov
@ 2025-11-05 16:14 ` Dmitry Skorodumov
  2025-11-05 16:14 ` [PATCH net-next 04/14] ipvlan: Added some kind of MAC NAT Dmitry Skorodumov
                   ` (11 subsequent siblings)
  14 siblings, 0 replies; 26+ messages in thread
From: Dmitry Skorodumov @ 2025-11-05 16:14 UTC (permalink / raw)
  To: netdev, linux-kernel
  Cc: andrey.bokhanko, Dmitry Skorodumov, Andrew Lunn, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni

Some WiFi enfironments sometimes send mcast packets
with unicast eth_dst. Forcibly replace eth_dst to be bcast in this case
if bridge is in L2E mode.

Signed-off-by: Dmitry Skorodumov <skorodumov.dmitry@huawei.com>
---
 drivers/net/ipvlan/ipvlan_core.c | 60 ++++++++++++++++++++++++++++++--
 1 file changed, 58 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c
index 2ff681de8105..913b2f2c62fa 100644
--- a/drivers/net/ipvlan/ipvlan_core.c
+++ b/drivers/net/ipvlan/ipvlan_core.c
@@ -890,18 +890,69 @@ static rx_handler_result_t ipvlan_handle_mode_l3(struct sk_buff **pskb,
 	return ret;
 }
 
+static bool ipvlan_is_mcast(struct ipvl_port *port, void *lyr3h, int addr_type)
+{
+	switch (addr_type) {
+#if IS_ENABLED(CONFIG_IPV6)
+	/* No need to handle ICMPv6. This type is used for DAD only. */
+	case IPVL_IPV6:
+		return !is_ipv6_usable(&((struct ipv6hdr *)lyr3h)->daddr);
+#endif
+	case IPVL_IPV4: {
+		/* Treat mcast, bcast and zero as multicast. */
+		__be32 i4addr = ((struct iphdr *)lyr3h)->daddr;
+
+		return !is_ipv4_usable(i4addr);
+	}
+	case IPVL_ARP: {
+		struct arphdr *arph;
+		unsigned char *arp_ptr;
+		__be32 i4addr;
+
+		arph = (struct arphdr *)lyr3h;
+		arp_ptr = (unsigned char *)(arph + 1);
+		arp_ptr += (2 * port->dev->addr_len) + 4;
+		i4addr = *(__be32 *)arp_ptr;
+		return !is_ipv4_usable(i4addr);
+	}
+	}
+	return false;
+}
+
+static bool ipvlan_is_l2_mcast(struct ipvl_port *port, struct sk_buff *skb,
+			       bool *need_eth_fix)
+{
+	int addr_type;
+	void *lyr3h;
+
+	/* In some wifi environments unicast dest address means nothing.
+	 * IP still can be a mcast and frame should be treated as mcast.
+	 */
+	*need_eth_fix = false;
+	if (is_multicast_ether_addr(eth_hdr(skb)->h_dest))
+		return true;
+
+	if (!ipvlan_is_macnat(port))
+		return false;
+
+	lyr3h = ipvlan_get_L3_hdr(port, skb, &addr_type);
+	*need_eth_fix = lyr3h && ipvlan_is_mcast(port, lyr3h, addr_type);
+
+	return *need_eth_fix;
+}
+
 static rx_handler_result_t ipvlan_handle_mode_l2(struct sk_buff **pskb,
 						 struct ipvl_port *port)
 {
 	struct sk_buff *skb = *pskb;
-	struct ethhdr *eth = eth_hdr(skb);
 	rx_handler_result_t ret = RX_HANDLER_PASS;
+	bool need_eth_fix;
 
 	/* Ignore already seen packets. */
 	if (ipvlan_is_skb_marked(skb, port->dev))
 		return RX_HANDLER_PASS;
 
-	if (is_multicast_ether_addr(eth->h_dest)) {
+	if (ipvlan_is_l2_mcast(port, skb, &need_eth_fix)) {
 		if (ipvlan_external_frame(skb, port)) {
 			/* External frames are queued for device local
 			 * distribution, but a copy is given to master
@@ -912,6 +963,11 @@ static rx_handler_result_t ipvlan_handle_mode_l2(struct sk_buff **pskb,
 			struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
 
 			if (nskb) {
+				if (need_eth_fix) {
+					struct ethhdr *eth = eth_hdr(nskb);
+
+					eth_broadcast_addr(eth->h_dest);
+				}
 				ipvlan_mark_skb(skb, port->dev);
 				ipvlan_skb_crossing_ns(nskb, NULL);
 				ipvlan_multicast_enqueue(port, nskb, false);
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH net-next 04/14] ipvlan: Added some kind of MAC NAT
  2025-11-05 16:14 [PATCH net-next v3 00/14] ipvlan: support mac-nat mode Dmitry Skorodumov
                   ` (2 preceding siblings ...)
  2025-11-05 16:14 ` [PATCH net-next 03/14] ipvlan: Handle rx mcast-ip and unicast eth Dmitry Skorodumov
@ 2025-11-05 16:14 ` Dmitry Skorodumov
  2025-11-05 16:14 ` [PATCH net-next 05/14] ipvlan: Forget all IP when device goes down Dmitry Skorodumov
                   ` (10 subsequent siblings)
  14 siblings, 0 replies; 26+ messages in thread
From: Dmitry Skorodumov @ 2025-11-05 16:14 UTC (permalink / raw)
  To: netdev, linux-kernel
  Cc: andrey.bokhanko, Dmitry Skorodumov, Andrew Lunn, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni

We remember the SRC MAC address of outgoing packets
together with IP addresses.

While RX, we patch MAC address with remembered MAC.

We do patching for both eth_dst and ARPs.

ToDo: support IPv6 Neighbours Discovery.

Signed-off-by: Dmitry Skorodumov <skorodumov.dmitry@huawei.com>
---
 drivers/net/ipvlan/ipvlan.h      |   5 +-
 drivers/net/ipvlan/ipvlan_core.c | 151 +++++++++++++++++++++++--------
 drivers/net/ipvlan/ipvlan_main.c |  11 ++-
 3 files changed, 123 insertions(+), 44 deletions(-)

diff --git a/drivers/net/ipvlan/ipvlan.h b/drivers/net/ipvlan/ipvlan.h
index 9db92ee11999..c690e313ef6b 100644
--- a/drivers/net/ipvlan/ipvlan.h
+++ b/drivers/net/ipvlan/ipvlan.h
@@ -80,6 +80,7 @@ struct ipvl_addr {
 		struct in6_addr	ip6;	 /* IPv6 address on logical interface */
 		struct in_addr	ip4;	 /* IPv4 address on logical interface */
 	} ipu;
+	u8			hwaddr[ETH_ALEN];
 #define ip6addr	ipu.ip6
 #define ip4addr ipu.ip4
 	struct hlist_node	hlnode;  /* Hash-table linkage */
@@ -181,7 +182,9 @@ void ipvlan_multicast_enqueue(struct ipvl_port *port,
 			      struct sk_buff *skb, bool tx_pkt);
 int ipvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev);
 void ipvlan_ht_addr_add(struct ipvl_dev *ipvlan, struct ipvl_addr *addr);
-int ipvlan_add_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6);
+int ipvlan_add_addr(struct ipvl_dev *ipvlan,
+		    void *iaddr, bool is_v6, const u8 *hwaddr);
+void ipvlan_del_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6);
 struct ipvl_addr *ipvlan_find_addr(const struct ipvl_dev *ipvlan,
 				   const void *iaddr, bool is_v6);
 bool ipvlan_addr_busy(struct ipvl_port *port, void *iaddr, bool is_v6);
diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c
index 913b2f2c62fa..547016e3ca8c 100644
--- a/drivers/net/ipvlan/ipvlan_core.c
+++ b/drivers/net/ipvlan/ipvlan_core.c
@@ -320,8 +320,36 @@ void ipvlan_skb_crossing_ns(struct sk_buff *skb, struct net_device *dev)
 		skb->dev = dev;
 }
 
-static int ipvlan_rcv_frame(struct ipvl_addr *addr, struct sk_buff **pskb,
-			    bool local)
+static int ipvlan_macnat_rx_skb(struct ipvl_addr *addr, int addr_type,
+				struct sk_buff *skb)
+{
+	/* Here we have non-shared skb and free to modify it. */
+	struct ethhdr *eth = eth_hdr(skb);
+
+	if (addr_type == IPVL_ARP) {
+		struct arphdr *arph = arp_hdr(skb);
+		u8 *arp_ptr = (u8 *)(arph + 1);
+		u8 *dsthw = arp_ptr + addr->master->dev->addr_len + sizeof(u32);
+		const u8 *phy_addr = addr->master->phy_dev->dev_addr;
+
+		/* Some access points may do ARP-proxy and answers us back.
+		 * Client may treat this as address-conflict.
+		 */
+		if (ether_addr_equal(eth->h_source, phy_addr) &&
+		    ether_addr_equal(eth->h_dest, phy_addr) &&
+		    is_zero_ether_addr(dsthw)) {
+			return NET_RX_DROP;
+		}
+		if (ether_addr_equal(dsthw, phy_addr))
+			ether_addr_copy(dsthw, addr->hwaddr);
+	}
+
+	ether_addr_copy(eth->h_dest, addr->hwaddr);
+	return NET_RX_SUCCESS;
+}
+
+static int ipvlan_rcv_frame(struct ipvl_addr *addr, int addr_type,
+			    struct sk_buff **pskb, bool local)
 {
 	struct ipvl_dev *ipvlan = addr->master;
 	struct net_device *dev = ipvlan->dev;
@@ -331,10 +359,8 @@ static int ipvlan_rcv_frame(struct ipvl_addr *addr, struct sk_buff **pskb,
 	struct sk_buff *skb = *pskb;
 
 	len = skb->len + ETH_HLEN;
-	/* Only packets exchanged between two local slaves need to have
-	 * device-up check as well as skb-share check.
-	 */
-	if (local) {
+
+	if (local || ipvlan_is_macnat(ipvlan->port)) {
 		if (unlikely(!(dev->flags & IFF_UP))) {
 			kfree_skb(skb);
 			goto out;
@@ -345,6 +371,13 @@ static int ipvlan_rcv_frame(struct ipvl_addr *addr, struct sk_buff **pskb,
 			goto out;
 
 		*pskb = skb;
+		if (!local && ipvlan_is_macnat(ipvlan->port)) {
+			if (ipvlan_macnat_rx_skb(addr, addr_type, skb) !=
+			    NET_RX_SUCCESS) {
+				kfree_skb(skb);
+				goto out;
+			}
+		}
 	}
 
 	if (local) {
@@ -435,7 +468,8 @@ static bool is_ipv6_usable(const struct in6_addr *addr)
 	       !ipv6_addr_any(addr);
 }
 
-static void __ipvlan_addr_learn(struct ipvl_dev *ipvlan, void *addr, bool is_v6)
+static void __ipvlan_addr_learn(struct ipvl_dev *ipvlan, void *addr, bool is_v6,
+				const u8 *hwaddr)
 {
 	const ipvl_hdr_type atype = is_v6 ? IPVL_IPV6 : IPVL_IPV4;
 	struct ipvl_addr *ipvladdr, *oldest = NULL;
@@ -461,7 +495,7 @@ static void __ipvlan_addr_learn(struct ipvl_dev *ipvlan, void *addr, bool is_v6)
 		list_del_rcu(&oldest->anode);
 	}
 
-	ipvlan_add_addr(ipvlan, addr, is_v6);
+	ipvlan_add_addr(ipvlan, addr, is_v6, hwaddr);
 
 out_unlock:
 	spin_unlock_bh(&ipvlan->addrs_lock);
@@ -470,8 +504,9 @@ static void __ipvlan_addr_learn(struct ipvl_dev *ipvlan, void *addr, bool is_v6)
 }
 
 static void ipvlan_addr_learn(struct ipvl_dev *ipvlan, void *lyr3h,
-			      int addr_type)
+			      int addr_type, const u8 *hwaddr)
 {
+	struct ipvl_addr *ipvladdr;
 	void *addr = NULL;
 	bool is_v6;
 
@@ -520,8 +555,16 @@ static void ipvlan_addr_learn(struct ipvl_dev *ipvlan, void *lyr3h,
 		return;
 	}
 
-	if (!ipvlan_ht_addr_lookup(ipvlan->port, addr, is_v6))
-		__ipvlan_addr_learn(ipvlan, addr, is_v6);
+	/* handle situation when MAC changed, but IP is the same. */
+	ipvladdr = ipvlan_ht_addr_lookup(ipvlan->port, addr, is_v6);
+	if (ipvladdr && !ether_addr_equal(ipvladdr->hwaddr, hwaddr)) {
+		/* del_addr is safe to call, because we are inside xmit. */
+		ipvlan_del_addr(ipvladdr->master, addr, is_v6);
+		ipvladdr = NULL;
+	}
+
+	if (!ipvladdr)
+		__ipvlan_addr_learn(ipvlan, addr, is_v6, hwaddr);
 }
 
 static noinline_for_stack int ipvlan_process_v4_outbound(struct sk_buff *skb)
@@ -717,7 +760,7 @@ static int ipvlan_xmit_mode_l3(struct sk_buff *skb, struct net_device *dev)
 				consume_skb(skb);
 				return NET_XMIT_DROP;
 			}
-			ipvlan_rcv_frame(addr, &skb, true);
+			ipvlan_rcv_frame(addr, addr_type, &skb, true);
 			return NET_XMIT_SUCCESS;
 		}
 	}
@@ -744,6 +787,8 @@ static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev)
 	}
 
 	same_mac_addr = ether_addr_equal(eth->h_dest, eth->h_source);
+	if (same_mac_addr && ipvlan_is_macnat(ipvlan->port))
+		goto out_drop;
 
 	lyr3h = NULL;
 	if (!ipvlan_is_vepa(ipvlan->port)) {
@@ -751,7 +796,8 @@ static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev)
 
 		if (ipvlan_is_macnat(ipvlan->port)) {
 			if (lyr3h)
-				ipvlan_addr_learn(ipvlan, lyr3h, addr_type);
+				ipvlan_addr_learn(ipvlan, lyr3h, addr_type,
+						  eth->h_source);
 			/* Mark SKB in advance */
 			skb = skb_share_check(skb, GFP_ATOMIC);
 			if (!skb)
@@ -769,47 +815,74 @@ static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev)
 			ipvlan_multicast_enqueue(ipvlan->port, nskb, true);
 		}
 
-		goto tx_phy_dev;
+		goto tx_frame_out;
 	}
 
 	if (ipvlan_is_vepa(ipvlan->port))
 		goto tx_phy_dev;
 
-	if (!same_mac_addr &&
+	if (ipvlan_is_macnat(ipvlan->port) &&
 	    ether_addr_equal(eth->h_dest, ipvlan->phy_dev->dev_addr)) {
 		/* It is a packet from child with destination to main port.
 		 * Pass it to main.
 		 */
-		skb = skb_share_check(skb, GFP_ATOMIC);
-		if (!skb)
-			return NET_XMIT_DROP;
 		skb->pkt_type = PACKET_HOST;
 		skb->dev = ipvlan->phy_dev;
 		dev_forward_skb(ipvlan->phy_dev, skb);
 		return NET_XMIT_SUCCESS;
-	} else if (same_mac_addr) {
-		if (lyr3h) {
-			addr = ipvlan_addr_lookup(ipvlan->port, lyr3h, addr_type, true);
-			if (addr) {
-				if (ipvlan_is_private(ipvlan->port)) {
-					consume_skb(skb);
-					return NET_XMIT_DROP;
-				}
-				ipvlan_rcv_frame(addr, &skb, true);
-				return NET_XMIT_SUCCESS;
-			}
+	}
+
+	if (lyr3h) {
+		addr = ipvlan_addr_lookup(ipvlan->port, lyr3h, addr_type, true);
+		if (addr) {
+			if (ipvlan_is_private(ipvlan->port))
+				goto out_drop;
+
+			ipvlan_rcv_frame(addr, addr_type, &skb, true);
+			return NET_XMIT_SUCCESS;
 		}
+	}
+
+tx_frame_out:
+	/* We don't know destination. Now we have to handle case for
+	 * non-learnable bridge and learnable case.
+	 */
+	if (!ipvlan_is_macnat(ipvlan->port)) {
 		skb = skb_share_check(skb, GFP_ATOMIC);
 		if (!skb)
 			return NET_XMIT_DROP;
+		if (same_mac_addr) {
+			/* Packet definitely does not belong to any of the
+			 * virtual devices, but the dest is local. So forward
+			 * the skb for the main. At the RX side we just return
+			 * RX_PASS for it to be processed further on the stack.
+			 */
+			dev_forward_skb(ipvlan->phy_dev, skb);
+			return NET_XMIT_SUCCESS;
+		}
+	} else {
+		/* Packet to outside on learnable. Fix source eth-addr. */
+		struct sk_buff *orig_skb = skb;
 
-		/* Packet definitely does not belong to any of the
-		 * virtual devices, but the dest is local. So forward
-		 * the skb for the main-dev. At the RX side we just return
-		 * RX_PASS for it to be processed further on the stack.
-		 */
-		dev_forward_skb(ipvlan->phy_dev, skb);
-		return NET_XMIT_SUCCESS;
+		skb = skb_unshare(skb, GFP_ATOMIC);
+		if (!skb)
+			return NET_XMIT_DROP;
+
+		skb_reset_mac_header(skb);
+		ether_addr_copy(skb_eth_hdr(skb)->h_source,
+				ipvlan->phy_dev->dev_addr);
+
+		/* ToDo: Handle ICMPv6 for neighbours discovery.*/
+		if (lyr3h && addr_type == IPVL_ARP) {
+			struct arphdr *arph;
+			/* must reparse new skb */
+			if (skb != orig_skb && lyr3h && addr_type == IPVL_ARP)
+				lyr3h = ipvlan_get_L3_hdr(ipvlan->port, skb,
+							  &addr_type);
+			arph = (struct arphdr *)lyr3h;
+			ether_addr_copy((u8 *)(arph + 1),
+					ipvlan->phy_dev->dev_addr);
+		}
 	}
 
 tx_phy_dev:
@@ -884,8 +957,7 @@ static rx_handler_result_t ipvlan_handle_mode_l3(struct sk_buff **pskb,
 
 	addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true);
 	if (addr)
-		ret = ipvlan_rcv_frame(addr, pskb, false);
-
+		ret = ipvlan_rcv_frame(addr, addr_type, pskb, false);
 out:
 	return ret;
 }
@@ -953,7 +1025,8 @@ static rx_handler_result_t ipvlan_handle_mode_l2(struct sk_buff **pskb,
 		return RX_HANDLER_PASS;
 
 	if (ipvlan_is_l2_mcast(port, skb, &need_eth_fix)) {
-		if (ipvlan_external_frame(skb, port)) {
+		if (ipvlan_is_macnat(port) ||
+		    ipvlan_external_frame(skb, port)) {
 			/* External frames are queued for device local
 			 * distribution, but a copy is given to master
 			 * straight away to avoid sending duplicates later
diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
index 4535a9ab50da..8ccf35a24e95 100644
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -908,7 +908,8 @@ static int ipvlan_device_event(struct notifier_block *unused,
 }
 
 /* the caller must held the addrs lock */
-int ipvlan_add_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6)
+int ipvlan_add_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6,
+		    const u8 *hwaddr)
 {
 	struct ipvl_addr *addr;
 
@@ -927,6 +928,8 @@ int ipvlan_add_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6)
 		addr->atype = IPVL_IPV6;
 #endif
 	}
+	if (hwaddr)
+		ether_addr_copy(addr->hwaddr, hwaddr);
 
 	list_add_tail_rcu(&addr->anode, &ipvlan->addrs);
 
@@ -939,7 +942,7 @@ int ipvlan_add_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6)
 	return 0;
 }
 
-static void ipvlan_del_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6)
+void ipvlan_del_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6)
 {
 	struct ipvl_addr *addr;
 
@@ -980,7 +983,7 @@ static int ipvlan_add_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr)
 			  "Failed to add IPv6=%pI6c addr for %s intf\n",
 			  ip6_addr, ipvlan->dev->name);
 	else
-		ret = ipvlan_add_addr(ipvlan, ip6_addr, true);
+		ret = ipvlan_add_addr(ipvlan, ip6_addr, true, NULL);
 	spin_unlock_bh(&ipvlan->addrs_lock);
 	return ret;
 }
@@ -1051,7 +1054,7 @@ static int ipvlan_add_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr)
 			  "Failed to add IPv4=%pI4 on %s intf.\n",
 			  ip4_addr, ipvlan->dev->name);
 	else
-		ret = ipvlan_add_addr(ipvlan, ip4_addr, false);
+		ret = ipvlan_add_addr(ipvlan, ip4_addr, false, NULL);
 	spin_unlock_bh(&ipvlan->addrs_lock);
 	return ret;
 }
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH net-next 05/14] ipvlan: Forget all IP when device goes down
  2025-11-05 16:14 [PATCH net-next v3 00/14] ipvlan: support mac-nat mode Dmitry Skorodumov
                   ` (3 preceding siblings ...)
  2025-11-05 16:14 ` [PATCH net-next 04/14] ipvlan: Added some kind of MAC NAT Dmitry Skorodumov
@ 2025-11-05 16:14 ` Dmitry Skorodumov
  2025-11-05 16:14 ` [PATCH net-next 06/14] ipvlan: Support GSO for port -> ipvlan Dmitry Skorodumov
                   ` (9 subsequent siblings)
  14 siblings, 0 replies; 26+ messages in thread
From: Dmitry Skorodumov @ 2025-11-05 16:14 UTC (permalink / raw)
  To: netdev, linux-kernel
  Cc: andrey.bokhanko, Dmitry Skorodumov, Andrew Lunn, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni

When ipvlan interface goes down, forget all learned addresses.

This is a way to cleanup addresses when master dev switches to
another network.

Signed-off-by: Dmitry Skorodumov <skorodumov.dmitry@huawei.com>
---
 drivers/net/ipvlan/ipvlan_main.c | 49 ++++++++++++++++++++------------
 1 file changed, 31 insertions(+), 18 deletions(-)

diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
index 8ccf35a24e95..18a69b4fb58c 100644
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -741,14 +741,10 @@ int ipvlan_link_new(struct net_device *dev, struct rtnl_newlink_params *params,
 }
 EXPORT_SYMBOL_GPL(ipvlan_link_new);
 
-void ipvlan_link_delete(struct net_device *dev, struct list_head *head)
+static void ipvlan_addrs_forget_all(struct ipvl_dev *ipvlan)
 {
-	struct ipvl_dev *ipvlan = netdev_priv(dev);
 	struct ipvl_addr *addr, *next;
 
-	if (ipvlan_is_macnat(ipvlan->port))
-		dev_set_allmulti(dev, -1);
-
 	spin_lock_bh(&ipvlan->addrs_lock);
 	list_for_each_entry_safe(addr, next, &ipvlan->addrs, anode) {
 		ipvlan_ht_addr_del(addr);
@@ -756,6 +752,16 @@ void ipvlan_link_delete(struct net_device *dev, struct list_head *head)
 		kfree_rcu(addr, rcu);
 	}
 	spin_unlock_bh(&ipvlan->addrs_lock);
+}
+
+void ipvlan_link_delete(struct net_device *dev, struct list_head *head)
+{
+	struct ipvl_dev *ipvlan = netdev_priv(dev);
+
+	if (ipvlan_is_macnat(ipvlan->port))
+		dev_set_allmulti(dev, -1);
+
+	ipvlan_addrs_forget_all(ipvlan);
 
 	ida_free(&ipvlan->port->ida, dev->dev_id);
 	list_del_rcu(&ipvlan->pnode);
@@ -813,6 +819,19 @@ int ipvlan_link_register(struct rtnl_link_ops *ops)
 }
 EXPORT_SYMBOL_GPL(ipvlan_link_register);
 
+static bool ipvlan_is_valid_dev(const struct net_device *dev)
+{
+	struct ipvl_dev *ipvlan = netdev_priv(dev);
+
+	if (!netif_is_ipvlan(dev))
+		return false;
+
+	if (!ipvlan || !ipvlan->port)
+		return false;
+
+	return true;
+}
+
 static int ipvlan_device_event(struct notifier_block *unused,
 			       unsigned long event, void *ptr)
 {
@@ -824,6 +843,13 @@ static int ipvlan_device_event(struct notifier_block *unused,
 	LIST_HEAD(lst_kill);
 	int err;
 
+	if (event == NETDEV_DOWN && ipvlan_is_valid_dev(dev)) {
+		struct ipvl_dev *ipvlan = netdev_priv(dev);
+
+		ipvlan_addrs_forget_all(ipvlan);
+		return NOTIFY_DONE;
+	}
+
 	if (!netif_is_ipvlan_port(dev))
 		return NOTIFY_DONE;
 
@@ -959,19 +985,6 @@ void ipvlan_del_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6)
 	kfree_rcu(addr, rcu);
 }
 
-static bool ipvlan_is_valid_dev(const struct net_device *dev)
-{
-	struct ipvl_dev *ipvlan = netdev_priv(dev);
-
-	if (!netif_is_ipvlan(dev))
-		return false;
-
-	if (!ipvlan || !ipvlan->port)
-		return false;
-
-	return true;
-}
-
 #if IS_ENABLED(CONFIG_IPV6)
 static int ipvlan_add_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr)
 {
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH net-next 06/14] ipvlan: Support GSO for port -> ipvlan
  2025-11-05 16:14 [PATCH net-next v3 00/14] ipvlan: support mac-nat mode Dmitry Skorodumov
                   ` (4 preceding siblings ...)
  2025-11-05 16:14 ` [PATCH net-next 05/14] ipvlan: Forget all IP when device goes down Dmitry Skorodumov
@ 2025-11-05 16:14 ` Dmitry Skorodumov
  2025-11-05 16:29   ` Eric Dumazet
  2025-11-05 16:14 ` [PATCH net-next 07/14] ipvlan: Support IPv6 for learnable l2-bridge Dmitry Skorodumov
                   ` (8 subsequent siblings)
  14 siblings, 1 reply; 26+ messages in thread
From: Dmitry Skorodumov @ 2025-11-05 16:14 UTC (permalink / raw)
  To: netdev, linux-kernel
  Cc: andrey.bokhanko, Dmitry Skorodumov, Andrew Lunn, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni

If main port interface supports GSO, we need manually segment
the skb before forwarding it to ipvlan interface.

Signed-off-by: Dmitry Skorodumov <skorodumov.dmitry@huawei.com>
---
 drivers/net/ipvlan/ipvlan_main.c | 51 ++++++++++++++++++++++++--------
 1 file changed, 38 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
index 18a69b4fb58c..ec53cc0ada3b 100644
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -4,6 +4,7 @@
 
 #include <linux/ethtool.h>
 #include <net/netdev_lock.h>
+#include <net/gso.h>
 
 #include "ipvlan.h"
 
@@ -71,6 +72,41 @@ static int ipvlan_set_port_mode(struct ipvl_port *port, u16 nval,
 	return err;
 }
 
+static int ipvlan_receive(struct ipvl_dev *ipvlan, struct sk_buff *skb)
+{
+	struct sk_buff *segs;
+	struct sk_buff *nskb;
+	ssize_t mac_hdr_size;
+	int ret, len;
+
+	skb->pkt_type = PACKET_HOST;
+	skb->protocol = eth_type_trans(skb, skb->dev);
+	ipvlan_skb_crossing_ns(skb, ipvlan->dev);
+	ipvlan_mark_skb(skb, ipvlan->phy_dev);
+	if (skb_shinfo(skb)->gso_size == 0) {
+		len = skb->len + ETH_HLEN;
+		ret = netif_rx(skb);
+		ipvlan_count_rx(ipvlan, len, ret == NET_RX_SUCCESS, false);
+		return ret;
+	}
+
+	mac_hdr_size = skb->network_header - skb->mac_header;
+	__skb_push(skb, mac_hdr_size);
+	segs = skb_gso_segment(skb, 0);
+	dev_kfree_skb(skb);
+	if (IS_ERR(segs))
+		return NET_RX_DROP;
+
+	skb_list_walk_safe(segs, segs, nskb) {
+		skb_mark_not_on_list(segs);
+		__skb_pull(segs, mac_hdr_size);
+		len = segs->len + ETH_HLEN;
+		ret = netif_rx(segs);
+		ipvlan_count_rx(ipvlan, len, ret == NET_RX_SUCCESS, false);
+	}
+	return ret;
+}
+
 static int ipvlan_port_rcv(struct sk_buff *skb, struct net_device *wdev,
 			   struct packet_type *pt, struct net_device *orig_wdev)
 {
@@ -111,19 +147,8 @@ static int ipvlan_port_rcv(struct sk_buff *skb, struct net_device *wdev,
 		goto out;
 
 	addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true);
-	if (addr) {
-		struct ipvl_dev *ipvlan = addr->master;
-		int ret, len;
-
-		ipvlan_skb_crossing_ns(skb, ipvlan->dev);
-		skb->protocol = eth_type_trans(skb, skb->dev);
-		skb->pkt_type = PACKET_HOST;
-		ipvlan_mark_skb(skb, port->dev);
-		len = skb->len + ETH_HLEN;
-		ret = netif_rx(skb);
-		ipvlan_count_rx(ipvlan, len, ret == NET_RX_SUCCESS, false);
-		return NET_RX_SUCCESS;
-	}
+	if (addr)
+		return ipvlan_receive(addr->master, skb);
 
 out:
 	dev_kfree_skb(skb);
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH net-next 07/14] ipvlan: Support IPv6 for learnable l2-bridge
  2025-11-05 16:14 [PATCH net-next v3 00/14] ipvlan: support mac-nat mode Dmitry Skorodumov
                   ` (5 preceding siblings ...)
  2025-11-05 16:14 ` [PATCH net-next 06/14] ipvlan: Support GSO for port -> ipvlan Dmitry Skorodumov
@ 2025-11-05 16:14 ` Dmitry Skorodumov
  2025-11-11  0:33   ` kernel test robot
  2025-11-05 16:14 ` [PATCH net-next 08/14] ipvlan: Make the addrs_lock be per port Dmitry Skorodumov
                   ` (7 subsequent siblings)
  14 siblings, 1 reply; 26+ messages in thread
From: Dmitry Skorodumov @ 2025-11-05 16:14 UTC (permalink / raw)
  To: netdev, linux-kernel
  Cc: andrey.bokhanko, Dmitry Skorodumov, Andrew Lunn, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni

To make IPv6 work with learnable l2-bridge, need to
process the TX-path:
* Replace Source-ll-addr in Solicitation ndisc,
* Replace Target-ll-addr in Advertisement ndisc

No need to do anything in RX-path

Signed-off-by: Dmitry Skorodumov <skorodumov.dmitry@huawei.com>
---
 drivers/net/ipvlan/ipvlan_core.c | 129 +++++++++++++++++++++++++++----
 1 file changed, 115 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c
index 547016e3ca8c..659aed8fc4ff 100644
--- a/drivers/net/ipvlan/ipvlan_core.c
+++ b/drivers/net/ipvlan/ipvlan_core.c
@@ -4,6 +4,7 @@
 
 #include <net/flow.h>
 #include <net/ip.h>
+#include <net/ip6_checksum.h>
 
 #include "ipvlan.h"
 
@@ -769,13 +770,122 @@ static int ipvlan_xmit_mode_l3(struct sk_buff *skb, struct net_device *dev)
 	return ipvlan_process_outbound(skb);
 }
 
+static void ipvlan_macnat_patch_tx_arp(struct ipvl_dev *ipvlan,
+				       struct sk_buff *skb)
+{
+	struct arphdr *arph;
+	int addr_type;
+
+	arph = (struct arphdr *)ipvlan_get_L3_hdr(ipvlan->port, skb,
+						 &addr_type);
+	ether_addr_copy((u8 *)(arph + 1), ipvlan->phy_dev->dev_addr);
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+
+static u8 *ipvlan_search_icmp6_ll_addr(struct sk_buff *skb, u8 icmp_option)
+{
+	/* skb is ensured to pullable for all ipv6 payload_len by caller */
+	struct ipv6hdr *ip6h = ipv6_hdr(skb);
+	struct icmp6hdr *icmph;
+	int ndsize, curr_off;
+
+	icmph = (struct icmp6hdr *)(ip6h + 1);
+	ndsize = (int)htons(ip6h->payload_len);
+	curr_off = sizeof(*icmph);
+
+	if (icmph->icmp6_type != NDISC_ROUTER_SOLICITATION)
+		curr_off += sizeof(struct in6_addr);
+
+	while ((curr_off + 2) < ndsize) {
+		u8  *data = (u8 *)icmph + curr_off;
+		u32 opt_len = data[1] << 3;
+
+		if (unlikely(opt_len == 0))
+			return NULL;
+
+		if (data[0] != icmp_option) {
+			curr_off += opt_len;
+			continue;
+		}
+
+		if (unlikely(opt_len < ETH_ALEN + 2))
+			return NULL;
+
+		if (unlikely(curr_off + opt_len > ndsize))
+			return NULL;
+
+		return data + 2;
+	}
+
+	return NULL;
+}
+
+static void ipvlan_macnat_patch_tx_ipv6(struct ipvl_dev *ipvlan,
+					struct sk_buff *skb)
+{
+	struct ipv6hdr *ip6h;
+	struct icmp6hdr *icmph;
+	u8 icmp_option;
+	u8 *lladdr;
+	u16 ndsize;
+
+	if (unlikely(!pskb_may_pull(skb, sizeof(*ip6h))))
+		return;
+
+	if (ipv6_hdr(skb)->nexthdr != NEXTHDR_ICMP)
+		return;
+
+	if (unlikely(!pskb_may_pull(skb, sizeof(*ip6h) + sizeof(*icmph))))
+		return;
+
+	ip6h = ipv6_hdr(skb);
+	icmph = (struct icmp6hdr *)(ip6h + 1);
+
+	/* Patch Source-LL for solicitation, Target-LL for advertisement */
+	if (icmph->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION ||
+	    icmph->icmp6_type == NDISC_ROUTER_SOLICITATION)
+		icmp_option = ND_OPT_SOURCE_LL_ADDR;
+	else if (icmph->icmp6_type == NDISC_NEIGHBOUR_ADVERTISEMENT)
+		icmp_option = ND_OPT_TARGET_LL_ADDR;
+	else
+		return;
+
+	ndsize = (int)htons(ip6h->payload_len);
+	if (unlikely(!pskb_may_pull(skb, sizeof(*ip6h) + ndsize)))
+		return;
+
+	lladdr = ipvlan_search_icmp6_ll_addr(skb, icmp_option);
+	if (!lladdr)
+		return;
+
+	ether_addr_copy(lladdr, ipvlan->phy_dev->dev_addr);
+
+	ip6h = ipv6_hdr(skb);
+	icmph = (struct icmp6hdr *)(ip6h + 1);
+	icmph->icmp6_cksum = 0;
+	icmph->icmp6_cksum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
+					     ndsize,
+					     IPPROTO_ICMPV6,
+					     csum_partial(icmph,
+							  ndsize,
+							  0));
+	skb->ip_summed = CHECKSUM_COMPLETE;
+}
+#else
+static void ipvlan_macnat_patch_tx_ipv6(struct ipvl_dev *ipvlan,
+					struct sk_buff *skb)
+{
+}
+#endif
+
 static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev)
 {
 	struct ipvl_dev *ipvlan;
 	struct ipvl_addr *addr;
 	struct ethhdr *eth;
 	bool same_mac_addr;
-	int addr_type;
+	int addr_type = -1;
 	void *lyr3h;
 
 	ipvlan = netdev_priv(dev);
@@ -862,8 +972,6 @@ static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev)
 		}
 	} else {
 		/* Packet to outside on learnable. Fix source eth-addr. */
-		struct sk_buff *orig_skb = skb;
-
 		skb = skb_unshare(skb, GFP_ATOMIC);
 		if (!skb)
 			return NET_XMIT_DROP;
@@ -872,17 +980,10 @@ static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev)
 		ether_addr_copy(skb_eth_hdr(skb)->h_source,
 				ipvlan->phy_dev->dev_addr);
 
-		/* ToDo: Handle ICMPv6 for neighbours discovery.*/
-		if (lyr3h && addr_type == IPVL_ARP) {
-			struct arphdr *arph;
-			/* must reparse new skb */
-			if (skb != orig_skb && lyr3h && addr_type == IPVL_ARP)
-				lyr3h = ipvlan_get_L3_hdr(ipvlan->port, skb,
-							  &addr_type);
-			arph = (struct arphdr *)lyr3h;
-			ether_addr_copy((u8 *)(arph + 1),
-					ipvlan->phy_dev->dev_addr);
-		}
+		if (addr_type == IPVL_ARP)
+			ipvlan_macnat_patch_tx_arp(ipvlan, skb);
+		else if (addr_type == IPVL_ICMPV6 || addr_type == IPVL_IPV6)
+			ipvlan_macnat_patch_tx_ipv6(ipvlan, skb);
 	}
 
 tx_phy_dev:
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH net-next 08/14] ipvlan: Make the addrs_lock be per port
  2025-11-05 16:14 [PATCH net-next v3 00/14] ipvlan: support mac-nat mode Dmitry Skorodumov
                   ` (6 preceding siblings ...)
  2025-11-05 16:14 ` [PATCH net-next 07/14] ipvlan: Support IPv6 for learnable l2-bridge Dmitry Skorodumov
@ 2025-11-05 16:14 ` Dmitry Skorodumov
  2025-11-05 16:14 ` [PATCH net-next 09/14] ipvlan: Take addr_lock in ipvlan_open() Dmitry Skorodumov
                   ` (6 subsequent siblings)
  14 siblings, 0 replies; 26+ messages in thread
From: Dmitry Skorodumov @ 2025-11-05 16:14 UTC (permalink / raw)
  To: netdev, linux-kernel
  Cc: andrey.bokhanko, Dmitry Skorodumov, Paolo Abeni, Andrew Lunn,
	David S. Miller, Eric Dumazet, Jakub Kicinski

Make the addrs_lock be per port, not per ipvlan dev.

This appears to be a very minor problem though.
Since it's highly unlikely that ipvlan_add_addr() will
be called on 2 CPU simultaneously. But nevertheless,
this may cause:

1. False-negative of ipvlan_addr_busy(): one interface
iterated through all port->ipvlans + ipvlan->addrs
under some ipvlan spinlock, and another added IP
under its own lock. Though this is only possible
for IPv6, since looks like only ipvlan_addr6_event() can be
called without rtnl_lock.

2. Race since ipvlan_ht_addr_add(port) is called under
different ipvlan->addrs_lock locks

This should not affect performance, since add/remove IP
is a rare situation and spinlock is not locked on fast
paths.

Also, it's quite convenient to have addrs_lock on
ipvl_port, to dynamically prevent conflict of IPs
with addresses on main port.

CC: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Dmitry Skorodumov <skorodumov.dmitry@huawei.com>
---
 drivers/net/ipvlan/ipvlan.h      |  2 +-
 drivers/net/ipvlan/ipvlan_core.c |  4 ++--
 drivers/net/ipvlan/ipvlan_main.c | 20 ++++++++++----------
 3 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ipvlan/ipvlan.h b/drivers/net/ipvlan/ipvlan.h
index c690e313ef6b..0ab1797c6128 100644
--- a/drivers/net/ipvlan/ipvlan.h
+++ b/drivers/net/ipvlan/ipvlan.h
@@ -71,7 +71,6 @@ struct ipvl_dev {
 	DECLARE_BITMAP(mac_filters, IPVLAN_MAC_FILTER_SIZE);
 	netdev_features_t	sfeatures;
 	u32			msg_enable;
-	spinlock_t		addrs_lock;
 };
 
 struct ipvl_addr {
@@ -94,6 +93,7 @@ struct ipvl_port {
 	struct net_device	*dev;
 	possible_net_t		pnet;
 	struct hlist_head	hlhead[IPVLAN_HASH_SIZE];
+	spinlock_t		addrs_lock; /* guards hash-table and addrs */
 	struct list_head	ipvlans;
 	struct packet_type	ipvl_ptype;
 	u16			mode;
diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c
index 659aed8fc4ff..a952a257a791 100644
--- a/drivers/net/ipvlan/ipvlan_core.c
+++ b/drivers/net/ipvlan/ipvlan_core.c
@@ -476,7 +476,7 @@ static void __ipvlan_addr_learn(struct ipvl_dev *ipvlan, void *addr, bool is_v6,
 	struct ipvl_addr *ipvladdr, *oldest = NULL;
 	unsigned int naddrs = 0;
 
-	spin_lock_bh(&ipvlan->addrs_lock);
+	spin_lock_bh(&ipvlan->port->addrs_lock);
 
 	if (ipvlan_addr_busy(ipvlan->port, addr, is_v6))
 		goto out_unlock;
@@ -499,7 +499,7 @@ static void __ipvlan_addr_learn(struct ipvl_dev *ipvlan, void *addr, bool is_v6,
 	ipvlan_add_addr(ipvlan, addr, is_v6, hwaddr);
 
 out_unlock:
-	spin_unlock_bh(&ipvlan->addrs_lock);
+	spin_unlock_bh(&ipvlan->port->addrs_lock);
 	if (oldest)
 		kfree_rcu(oldest, rcu);
 }
diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
index ec53cc0ada3b..56f65ac8ecef 100644
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -172,6 +172,7 @@ static int ipvlan_port_create(struct net_device *dev)
 	for (idx = 0; idx < IPVLAN_HASH_SIZE; idx++)
 		INIT_HLIST_HEAD(&port->hlhead[idx]);
 
+	spin_lock_init(&port->addrs_lock);
 	skb_queue_head_init(&port->backlog);
 	INIT_WORK(&port->wq, ipvlan_process_multicast);
 	ida_init(&port->ida);
@@ -686,7 +687,6 @@ int ipvlan_link_new(struct net_device *dev, struct rtnl_newlink_params *params,
 	if (!tb[IFLA_MTU])
 		ipvlan_adjust_mtu(ipvlan, phy_dev);
 	INIT_LIST_HEAD(&ipvlan->addrs);
-	spin_lock_init(&ipvlan->addrs_lock);
 
 	/* Flags are per port and latest update overrides. User has
 	 * to be consistent in setting it just like the mode attribute.
@@ -770,13 +770,13 @@ static void ipvlan_addrs_forget_all(struct ipvl_dev *ipvlan)
 {
 	struct ipvl_addr *addr, *next;
 
-	spin_lock_bh(&ipvlan->addrs_lock);
+	spin_lock_bh(&ipvlan->port->addrs_lock);
 	list_for_each_entry_safe(addr, next, &ipvlan->addrs, anode) {
 		ipvlan_ht_addr_del(addr);
 		list_del_rcu(&addr->anode);
 		kfree_rcu(addr, rcu);
 	}
-	spin_unlock_bh(&ipvlan->addrs_lock);
+	spin_unlock_bh(&ipvlan->port->addrs_lock);
 }
 
 void ipvlan_link_delete(struct net_device *dev, struct list_head *head)
@@ -997,16 +997,16 @@ void ipvlan_del_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6)
 {
 	struct ipvl_addr *addr;
 
-	spin_lock_bh(&ipvlan->addrs_lock);
+	spin_lock_bh(&ipvlan->port->addrs_lock);
 	addr = ipvlan_find_addr(ipvlan, iaddr, is_v6);
 	if (!addr) {
-		spin_unlock_bh(&ipvlan->addrs_lock);
+		spin_unlock_bh(&ipvlan->port->addrs_lock);
 		return;
 	}
 
 	ipvlan_ht_addr_del(addr);
 	list_del_rcu(&addr->anode);
-	spin_unlock_bh(&ipvlan->addrs_lock);
+	spin_unlock_bh(&ipvlan->port->addrs_lock);
 	kfree_rcu(addr, rcu);
 }
 
@@ -1015,14 +1015,14 @@ static int ipvlan_add_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr)
 {
 	int ret = -EINVAL;
 
-	spin_lock_bh(&ipvlan->addrs_lock);
+	spin_lock_bh(&ipvlan->port->addrs_lock);
 	if (ipvlan_addr_busy(ipvlan->port, ip6_addr, true))
 		netif_err(ipvlan, ifup, ipvlan->dev,
 			  "Failed to add IPv6=%pI6c addr for %s intf\n",
 			  ip6_addr, ipvlan->dev->name);
 	else
 		ret = ipvlan_add_addr(ipvlan, ip6_addr, true, NULL);
-	spin_unlock_bh(&ipvlan->addrs_lock);
+	spin_unlock_bh(&ipvlan->port->addrs_lock);
 	return ret;
 }
 
@@ -1086,14 +1086,14 @@ static int ipvlan_add_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr)
 {
 	int ret = -EINVAL;
 
-	spin_lock_bh(&ipvlan->addrs_lock);
+	spin_lock_bh(&ipvlan->port->addrs_lock);
 	if (ipvlan_addr_busy(ipvlan->port, ip4_addr, false))
 		netif_err(ipvlan, ifup, ipvlan->dev,
 			  "Failed to add IPv4=%pI4 on %s intf.\n",
 			  ip4_addr, ipvlan->dev->name);
 	else
 		ret = ipvlan_add_addr(ipvlan, ip4_addr, false, NULL);
-	spin_unlock_bh(&ipvlan->addrs_lock);
+	spin_unlock_bh(&ipvlan->port->addrs_lock);
 	return ret;
 }
 
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH net-next 09/14] ipvlan: Take addr_lock in ipvlan_open()
  2025-11-05 16:14 [PATCH net-next v3 00/14] ipvlan: support mac-nat mode Dmitry Skorodumov
                   ` (7 preceding siblings ...)
  2025-11-05 16:14 ` [PATCH net-next 08/14] ipvlan: Make the addrs_lock be per port Dmitry Skorodumov
@ 2025-11-05 16:14 ` Dmitry Skorodumov
  2025-11-05 16:14 ` [PATCH net-next 10/14] ipvlan: Don't allow children to use IPs of main Dmitry Skorodumov
                   ` (5 subsequent siblings)
  14 siblings, 0 replies; 26+ messages in thread
From: Dmitry Skorodumov @ 2025-11-05 16:14 UTC (permalink / raw)
  To: netdev, linux-kernel
  Cc: andrey.bokhanko, Dmitry Skorodumov, Andrew Lunn, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni

It was forgotten to lock addrs in ipvlan_open().

Seems that code was initially written in assumption
that any address change occurs under rtnl_lock(). But
it's not true for the ipv6 case. So, we have to
take addr_lock in ipvlan_open().

Signed-off-by: Dmitry Skorodumov <skorodumov.dmitry@huawei.com>
---
 drivers/net/ipvlan/ipvlan_main.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
index 56f65ac8ecef..b888c2ef77ca 100644
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -286,20 +286,20 @@ static void ipvlan_uninit(struct net_device *dev)
 static int ipvlan_open(struct net_device *dev)
 {
 	struct ipvl_dev *ipvlan = netdev_priv(dev);
+	struct ipvl_port *port = ipvlan->port;
 	struct ipvl_addr *addr;
 
-	if (ipvlan->port->mode == IPVLAN_MODE_L3 ||
-	    ipvlan->port->mode == IPVLAN_MODE_L3S)
+	if (port->mode == IPVLAN_MODE_L3 || port->mode == IPVLAN_MODE_L3S)
 		dev->flags |= IFF_NOARP;
 	else
 		dev->flags &= ~IFF_NOARP;
 
 	/* for learnable, addresses will be obtained from tx-packets. */
-	if (!ipvlan_is_macnat(ipvlan->port)) {
-		rcu_read_lock();
+	if (!ipvlan_is_macnat(port)) {
+		spin_lock_bh(&port->addrs_lock);
 		list_for_each_entry_rcu(addr, &ipvlan->addrs, anode)
 			ipvlan_ht_addr_add(ipvlan, addr);
-		rcu_read_unlock();
+		spin_unlock_bh(&port->addrs_lock);
 	}
 
 	return 0;
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH net-next 10/14] ipvlan: Don't allow children to use IPs of main
  2025-11-05 16:14 [PATCH net-next v3 00/14] ipvlan: support mac-nat mode Dmitry Skorodumov
                   ` (8 preceding siblings ...)
  2025-11-05 16:14 ` [PATCH net-next 09/14] ipvlan: Take addr_lock in ipvlan_open() Dmitry Skorodumov
@ 2025-11-05 16:14 ` Dmitry Skorodumov
  2025-11-05 16:14 ` [PATCH net-next 11/14] ipvlan: const-specifier for functions that use iaddr Dmitry Skorodumov
                   ` (4 subsequent siblings)
  14 siblings, 0 replies; 26+ messages in thread
From: Dmitry Skorodumov @ 2025-11-05 16:14 UTC (permalink / raw)
  To: netdev, linux-kernel
  Cc: andrey.bokhanko, Dmitry Skorodumov, Andrew Lunn, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni

Remember all ip-addresses on main iface and check
in ipvlan_addr_busy() that addr is not used on main.

Store IPs in separate list. Remember IP address at port create
and listen for addr-change events. Don't allow to configure
addresses on children with addresses of main.

In learning mode, child may not learn the address if
it is used on main.

Signed-off-by: Dmitry Skorodumov <skorodumov.dmitry@huawei.com>
---
 drivers/net/ipvlan/ipvlan.h      |  13 ++
 drivers/net/ipvlan/ipvlan_core.c |  39 ++++--
 drivers/net/ipvlan/ipvlan_main.c | 196 +++++++++++++++++++++++++++++++
 3 files changed, 235 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ipvlan/ipvlan.h b/drivers/net/ipvlan/ipvlan.h
index 0ab1797c6128..faba1308c135 100644
--- a/drivers/net/ipvlan/ipvlan.h
+++ b/drivers/net/ipvlan/ipvlan.h
@@ -89,10 +89,21 @@ struct ipvl_addr {
 	struct rcu_head		rcu;
 };
 
+struct ipvl_port_addr {
+	union {
+		struct in6_addr	ip6;
+		struct in_addr	ip4;
+	} ipu;
+	ipvl_hdr_type		atype;
+	struct list_head	anode;
+	struct rcu_head		rcu;
+};
+
 struct ipvl_port {
 	struct net_device	*dev;
 	possible_net_t		pnet;
 	struct hlist_head	hlhead[IPVLAN_HASH_SIZE];
+	struct list_head	port_addrs; /* addresses of main iface.*/
 	spinlock_t		addrs_lock; /* guards hash-table and addrs */
 	struct list_head	ipvlans;
 	struct packet_type	ipvl_ptype;
@@ -199,6 +210,8 @@ int ipvlan_link_new(struct net_device *dev, struct rtnl_newlink_params *params,
 void ipvlan_link_delete(struct net_device *dev, struct list_head *head);
 void ipvlan_link_setup(struct net_device *dev);
 int ipvlan_link_register(struct rtnl_link_ops *ops);
+struct ipvl_port_addr *ipvlan_port_find_addr(struct ipvl_port *port,
+					     const void *iaddr, bool is_v6);
 #ifdef CONFIG_IPVLAN_L3S
 int ipvlan_l3s_register(struct ipvl_port *port);
 void ipvlan_l3s_unregister(struct ipvl_port *port);
diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c
index a952a257a791..cba1378cc920 100644
--- a/drivers/net/ipvlan/ipvlan_core.c
+++ b/drivers/net/ipvlan/ipvlan_core.c
@@ -133,6 +133,8 @@ bool ipvlan_addr_busy(struct ipvl_port *port, void *iaddr, bool is_v6)
 			break;
 		}
 	}
+	if (!ret)
+		ret = !!ipvlan_port_find_addr(port, iaddr, is_v6);
 	rcu_read_unlock();
 	return ret;
 }
@@ -469,17 +471,21 @@ static bool is_ipv6_usable(const struct in6_addr *addr)
 	       !ipv6_addr_any(addr);
 }
 
-static void __ipvlan_addr_learn(struct ipvl_dev *ipvlan, void *addr, bool is_v6,
-				const u8 *hwaddr)
+static int __ipvlan_addr_learn(struct ipvl_dev *ipvlan, void *addr, bool is_v6,
+			       const u8 *hwaddr)
 {
 	const ipvl_hdr_type atype = is_v6 ? IPVL_IPV6 : IPVL_IPV4;
 	struct ipvl_addr *ipvladdr, *oldest = NULL;
 	unsigned int naddrs = 0;
+	int ret = -1;
 
 	spin_lock_bh(&ipvlan->port->addrs_lock);
 
+	if (ipvlan_port_find_addr(ipvlan->port, addr, is_v6))
+		goto out_unlock; /* used by main. */
+
 	if (ipvlan_addr_busy(ipvlan->port, addr, is_v6))
-		goto out_unlock;
+		goto out_unlock; /* used by other ipvlan. */
 
 	list_for_each_entry_rcu(ipvladdr, &ipvlan->addrs, anode) {
 		if (ipvladdr->atype != atype)
@@ -497,15 +503,19 @@ static void __ipvlan_addr_learn(struct ipvl_dev *ipvlan, void *addr, bool is_v6,
 	}
 
 	ipvlan_add_addr(ipvlan, addr, is_v6, hwaddr);
+	ret = 0;
 
 out_unlock:
 	spin_unlock_bh(&ipvlan->port->addrs_lock);
 	if (oldest)
 		kfree_rcu(oldest, rcu);
+
+	return ret;
 }
 
-static void ipvlan_addr_learn(struct ipvl_dev *ipvlan, void *lyr3h,
-			      int addr_type, const u8 *hwaddr)
+/* return -1 if frame should be dropped. */
+static int ipvlan_addr_learn(struct ipvl_dev *ipvlan, void *lyr3h,
+			     int addr_type, const u8 *hwaddr)
 {
 	struct ipvl_addr *ipvladdr;
 	void *addr = NULL;
@@ -519,7 +529,7 @@ static void ipvlan_addr_learn(struct ipvl_dev *ipvlan, void *lyr3h,
 
 		ip6h = (struct ipv6hdr *)lyr3h;
 		if (!is_ipv6_usable(&ip6h->saddr))
-			return;
+			return 0;
 		is_v6 = true;
 		addr = &ip6h->saddr;
 		break;
@@ -532,7 +542,7 @@ static void ipvlan_addr_learn(struct ipvl_dev *ipvlan, void *lyr3h,
 		ip4h = (struct iphdr *)lyr3h;
 		i4addr = &ip4h->saddr;
 		if (!is_ipv4_usable(*i4addr))
-			return;
+			return 0;
 		is_v6 = false;
 		addr = i4addr;
 		break;
@@ -547,17 +557,18 @@ static void ipvlan_addr_learn(struct ipvl_dev *ipvlan, void *lyr3h,
 		arp_ptr += ipvlan->port->dev->addr_len;
 		i4addr = (__be32 *)arp_ptr;
 		if (!is_ipv4_usable(*i4addr))
-			return;
+			return 0;
 		is_v6 = false;
 		addr = i4addr;
 		break;
 	}
 	default:
-		return;
+		return 0;
 	}
 
 	/* handle situation when MAC changed, but IP is the same. */
 	ipvladdr = ipvlan_ht_addr_lookup(ipvlan->port, addr, is_v6);
+
 	if (ipvladdr && !ether_addr_equal(ipvladdr->hwaddr, hwaddr)) {
 		/* del_addr is safe to call, because we are inside xmit. */
 		ipvlan_del_addr(ipvladdr->master, addr, is_v6);
@@ -565,7 +576,9 @@ static void ipvlan_addr_learn(struct ipvl_dev *ipvlan, void *lyr3h,
 	}
 
 	if (!ipvladdr)
-		__ipvlan_addr_learn(ipvlan, addr, is_v6, hwaddr);
+		return __ipvlan_addr_learn(ipvlan, addr, is_v6, hwaddr);
+
+	return 0;
 }
 
 static noinline_for_stack int ipvlan_process_v4_outbound(struct sk_buff *skb)
@@ -905,9 +918,9 @@ static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev)
 		lyr3h = ipvlan_get_L3_hdr(ipvlan->port, skb, &addr_type);
 
 		if (ipvlan_is_macnat(ipvlan->port)) {
-			if (lyr3h)
-				ipvlan_addr_learn(ipvlan, lyr3h, addr_type,
-						  eth->h_source);
+			if (lyr3h && ipvlan_addr_learn(ipvlan, lyr3h, addr_type,
+						       eth->h_source) < 0)
+				goto out_drop;
 			/* Mark SKB in advance */
 			skb = skb_share_check(skb, GFP_ATOMIC);
 			if (!skb)
diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
index b888c2ef77ca..18b49f74dc35 100644
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -156,6 +156,115 @@ static int ipvlan_port_rcv(struct sk_buff *skb, struct net_device *wdev,
 	return NET_RX_DROP;
 }
 
+static int ipvlan_port_add_addr(struct ipvl_port *port, const void *iaddr,
+				bool is_v6)
+{
+	struct ipvl_port_addr *addr;
+
+	addr = kzalloc(sizeof(*addr), GFP_KERNEL);
+	if (!addr)
+		return -ENOMEM;
+	if (!is_v6) {
+		memcpy(&addr->ip4addr, iaddr, sizeof(struct in_addr));
+		addr->atype = IPVL_IPV4;
+	} else {
+		memcpy(&addr->ip6addr, iaddr, sizeof(struct in6_addr));
+		addr->atype = IPVL_IPV6;
+	}
+
+	spin_lock_bh(&port->addrs_lock);
+	list_add_tail_rcu(&addr->anode, &port->port_addrs);
+	spin_unlock_bh(&port->addrs_lock);
+
+	return 0;
+}
+
+static bool portaddr_equal(bool is_v6, const struct ipvl_port_addr *addr,
+			   const void *iaddr)
+{
+	if (!is_v6 && addr->atype == IPVL_IPV4) {
+		struct in_addr *i4addr = (struct in_addr *)iaddr;
+
+		return addr->ip4addr.s_addr == i4addr->s_addr;
+#if IS_ENABLED(CONFIG_IPV6)
+	} else if (is_v6 && addr->atype == IPVL_IPV6) {
+		struct in6_addr *i6addr = (struct in6_addr *)iaddr;
+
+		return ipv6_addr_equal(&addr->ip6addr, i6addr);
+#endif
+	}
+
+	return false;
+}
+
+struct ipvl_port_addr *ipvlan_port_find_addr(struct ipvl_port *port,
+					     const void *iaddr, bool is_v6)
+{
+	struct ipvl_port_addr *addr;
+
+	list_for_each_entry_rcu(addr, &port->port_addrs, anode)
+		if (portaddr_equal(is_v6, addr, iaddr))
+			return addr;
+	return NULL;
+}
+
+static void ipvlan_port_del_addr(struct ipvl_port *port, const void *iaddr,
+				 bool is_v6)
+{
+	struct ipvl_port_addr *addr;
+
+	spin_lock_bh(&port->addrs_lock);
+	addr = ipvlan_port_find_addr(port, iaddr, is_v6);
+	if (addr)
+		list_del_rcu(&addr->anode);
+	spin_unlock_bh(&port->addrs_lock);
+
+	if (addr)
+		kfree_rcu(addr, rcu);
+}
+
+static int ipvlan_port_enum_addrs(struct ipvl_port *port)
+{
+	const struct inet6_dev *in6_dev __maybe_unused;
+	const struct inet6_ifaddr *ifa6 __maybe_unused;
+	const struct in_device *in_dev;
+	const struct in_ifaddr *ifa;
+	int r = 0;
+
+	ASSERT_RTNL();
+
+	in_dev = __in_dev_get_rcu(port->dev);
+	if (in_dev)
+		in_dev_for_each_ifa_rcu(ifa, in_dev) {
+			r = ipvlan_port_add_addr(port, &ifa->ifa_local, false);
+			if (r < 0)
+				return r;
+		}
+
+#if IS_ENABLED(CONFIG_IPV6)
+	in6_dev = __in6_dev_get(port->dev);
+	if (in6_dev)
+		list_for_each_entry_rcu(ifa6, &in6_dev->addr_list, if_list) {
+			r = ipvlan_port_add_addr(port, &ifa6->addr, true);
+			if (r < 0)
+				return r;
+		}
+#endif
+	return r;
+}
+
+static void ipvlan_port_free_port_addrs(struct ipvl_port *port)
+{
+	struct ipvl_port_addr *addr, *next;
+
+	ASSERT_RTNL();
+
+	list_for_each_entry_safe(addr, next, &port->port_addrs, anode) {
+		list_del_rcu(&addr->anode);
+		kfree_rcu(addr, rcu);
+	}
+}
+
 static int ipvlan_port_create(struct net_device *dev)
 {
 	struct ipvl_port *port;
@@ -172,12 +281,15 @@ static int ipvlan_port_create(struct net_device *dev)
 	for (idx = 0; idx < IPVLAN_HASH_SIZE; idx++)
 		INIT_HLIST_HEAD(&port->hlhead[idx]);
 
+	INIT_LIST_HEAD(&port->port_addrs);
 	spin_lock_init(&port->addrs_lock);
 	skb_queue_head_init(&port->backlog);
 	INIT_WORK(&port->wq, ipvlan_process_multicast);
 	ida_init(&port->ida);
 	port->dev_id_start = 1;
 
+	ipvlan_port_enum_addrs(port);
+
 	err = netdev_rx_handler_register(dev, ipvlan_handle_frame, port);
 	if (err)
 		goto err;
@@ -191,6 +303,7 @@ static int ipvlan_port_create(struct net_device *dev)
 	return 0;
 
 err:
+	ipvlan_port_free_port_addrs(port);
 	kfree(port);
 	return err;
 }
@@ -212,6 +325,7 @@ static void ipvlan_port_destroy(struct net_device *dev)
 		kfree_skb(skb);
 	}
 	ida_destroy(&port->ida);
+	ipvlan_port_free_port_addrs(port);
 	kfree(port);
 }
 
@@ -1010,6 +1124,50 @@ void ipvlan_del_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6)
 	kfree_rcu(addr, rcu);
 }
 
+static void ipvlan_port_del_addr_ipvlans(struct ipvl_port *port,
+					 const void *iaddr, bool is_v6)
+{
+	struct ipvl_addr *addr = NULL;
+	struct ipvl_dev *ipvlan;
+
+	list_for_each_entry_rcu(ipvlan, &port->ipvlans, pnode) {
+		spin_lock_bh(&port->addrs_lock);
+		addr = ipvlan_find_addr(ipvlan, iaddr, is_v6);
+		if (addr) {
+			ipvlan_ht_addr_del(addr);
+			list_del_rcu(&addr->anode);
+			spin_unlock_bh(&port->addrs_lock);
+			break;
+		}
+		spin_unlock_bh(&port->addrs_lock);
+	}
+
+	if (addr)
+		kfree_rcu(addr, rcu);
+}
+
+static int ipvlan_port_add_addr_event(struct ipvl_port *port,
+				      const void *iaddr, bool is_v6)
+{
+	int r;
+
+	r = ipvlan_port_add_addr(port, iaddr, is_v6);
+	if (r < 0)
+		return r;
+
+	ipvlan_port_del_addr_ipvlans(port, iaddr, is_v6);
+
+	return NOTIFY_OK;
+}
+
+static int ipvlan_port_del_addr_event(struct ipvl_port *port,
+				      const void *iaddr, bool is_v6)
+{
+	ipvlan_port_del_addr(port, iaddr, is_v6);
+
+	return NOTIFY_OK;
+}
+
 #if IS_ENABLED(CONFIG_IPV6)
 static int ipvlan_add_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr)
 {
@@ -1038,6 +1196,24 @@ static int ipvlan_addr6_event(struct notifier_block *unused,
 	struct net_device *dev = (struct net_device *)if6->idev->dev;
 	struct ipvl_dev *ipvlan = netdev_priv(dev);
 
+	if (netif_is_ipvlan_port(dev)) {
+		struct ipvl_port *port = ipvlan_port_get_rcu(dev);
+
+		if (!ipvlan_is_macnat(port))
+			return NOTIFY_DONE;
+
+		switch (event) {
+		case NETDEV_UP:
+			return ipvlan_port_add_addr_event(port, &if6->addr,
+							  true);
+		case NETDEV_DOWN:
+			return ipvlan_port_del_addr_event(port, &if6->addr,
+							  true);
+		default:
+			return NOTIFY_OK;
+		}
+	}
+
 	if (!ipvlan_is_valid_dev(dev))
 		return NOTIFY_DONE;
 
@@ -1110,6 +1286,26 @@ static int ipvlan_addr4_event(struct notifier_block *unused,
 	struct ipvl_dev *ipvlan = netdev_priv(dev);
 	struct in_addr ip4_addr;
 
+	if (netif_is_ipvlan_port(dev)) {
+		struct ipvl_port *port = ipvlan_port_get_rcu(dev);
+
+		if (!ipvlan_is_macnat(port))
+			return NOTIFY_DONE;
+
+		switch (event) {
+		case NETDEV_UP:
+			return ipvlan_port_add_addr_event(port,
+							  &if4->ifa_address,
+							  false);
+		case NETDEV_DOWN:
+			return ipvlan_port_del_addr_event(port,
+							  &if4->ifa_address,
+							  false);
+		default:
+			return NOTIFY_OK;
+		}
+	}
+
 	if (!ipvlan_is_valid_dev(dev))
 		return NOTIFY_DONE;
 
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH net-next 11/14] ipvlan: const-specifier for functions that use iaddr
  2025-11-05 16:14 [PATCH net-next v3 00/14] ipvlan: support mac-nat mode Dmitry Skorodumov
                   ` (9 preceding siblings ...)
  2025-11-05 16:14 ` [PATCH net-next 10/14] ipvlan: Don't allow children to use IPs of main Dmitry Skorodumov
@ 2025-11-05 16:14 ` Dmitry Skorodumov
  2025-11-05 16:14 ` [PATCH net-next 12/14] ipvlan: Common code from v6/v4 validator_event Dmitry Skorodumov
                   ` (3 subsequent siblings)
  14 siblings, 0 replies; 26+ messages in thread
From: Dmitry Skorodumov @ 2025-11-05 16:14 UTC (permalink / raw)
  To: netdev, linux-kernel
  Cc: andrey.bokhanko, Dmitry Skorodumov, Andrew Lunn, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni

Fix functions that accept "void *iaddr" as param to have
const-specifier.

Signed-off-by: Dmitry Skorodumov <skorodumov.dmitry@huawei.com>
---
 drivers/net/ipvlan/ipvlan.h      | 6 +++---
 drivers/net/ipvlan/ipvlan_core.c | 2 +-
 drivers/net/ipvlan/ipvlan_main.c | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ipvlan/ipvlan.h b/drivers/net/ipvlan/ipvlan.h
index faba1308c135..be2bc2d33ddb 100644
--- a/drivers/net/ipvlan/ipvlan.h
+++ b/drivers/net/ipvlan/ipvlan.h
@@ -194,11 +194,11 @@ void ipvlan_multicast_enqueue(struct ipvl_port *port,
 int ipvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev);
 void ipvlan_ht_addr_add(struct ipvl_dev *ipvlan, struct ipvl_addr *addr);
 int ipvlan_add_addr(struct ipvl_dev *ipvlan,
-		    void *iaddr, bool is_v6, const u8 *hwaddr);
-void ipvlan_del_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6);
+		    const void *iaddr, bool is_v6, const u8 *hwaddr);
+void ipvlan_del_addr(struct ipvl_dev *ipvlan, const void *iaddr, bool is_v6);
 struct ipvl_addr *ipvlan_find_addr(const struct ipvl_dev *ipvlan,
 				   const void *iaddr, bool is_v6);
-bool ipvlan_addr_busy(struct ipvl_port *port, void *iaddr, bool is_v6);
+bool ipvlan_addr_busy(struct ipvl_port *port, const void *iaddr, bool is_v6);
 void ipvlan_ht_addr_del(struct ipvl_addr *addr);
 struct ipvl_addr *ipvlan_addr_lookup(struct ipvl_port *port, void *lyr3h,
 				     int addr_type, bool use_dest);
diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c
index cba1378cc920..b38ce991e832 100644
--- a/drivers/net/ipvlan/ipvlan_core.c
+++ b/drivers/net/ipvlan/ipvlan_core.c
@@ -121,7 +121,7 @@ struct ipvl_addr *ipvlan_find_addr(const struct ipvl_dev *ipvlan,
 	return ret;
 }
 
-bool ipvlan_addr_busy(struct ipvl_port *port, void *iaddr, bool is_v6)
+bool ipvlan_addr_busy(struct ipvl_port *port, const void *iaddr, bool is_v6)
 {
 	struct ipvl_dev *ipvlan;
 	bool ret = false;
diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
index 18b49f74dc35..d20fc473b4e1 100644
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -1073,7 +1073,7 @@ static int ipvlan_device_event(struct notifier_block *unused,
 }
 
 /* the caller must held the addrs lock */
-int ipvlan_add_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6,
+int ipvlan_add_addr(struct ipvl_dev *ipvlan, const void *iaddr, bool is_v6,
 		    const u8 *hwaddr)
 {
 	struct ipvl_addr *addr;
@@ -1107,7 +1107,7 @@ int ipvlan_add_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6,
 	return 0;
 }
 
-void ipvlan_del_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6)
+void ipvlan_del_addr(struct ipvl_dev *ipvlan, const void *iaddr, bool is_v6)
 {
 	struct ipvl_addr *addr;
 
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH net-next 12/14] ipvlan: Common code from v6/v4 validator_event
  2025-11-05 16:14 [PATCH net-next v3 00/14] ipvlan: support mac-nat mode Dmitry Skorodumov
                   ` (10 preceding siblings ...)
  2025-11-05 16:14 ` [PATCH net-next 11/14] ipvlan: const-specifier for functions that use iaddr Dmitry Skorodumov
@ 2025-11-05 16:14 ` Dmitry Skorodumov
  2025-11-05 16:14 ` [PATCH net-next 13/14] ipvlan: common code to handle ipv6/ipv4 address events Dmitry Skorodumov
                   ` (2 subsequent siblings)
  14 siblings, 0 replies; 26+ messages in thread
From: Dmitry Skorodumov @ 2025-11-05 16:14 UTC (permalink / raw)
  To: netdev, linux-kernel
  Cc: andrey.bokhanko, Dmitry Skorodumov, Andrew Lunn, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni

Extract commond code for ipvlan_addr4_validator_event()/
ipvlan_addr6_validator_event() to own function

Signed-off-by: Dmitry Skorodumov <skorodumov.dmitry@huawei.com>
---
 drivers/net/ipvlan/ipvlan_main.c | 67 +++++++++++++++-----------------
 1 file changed, 31 insertions(+), 36 deletions(-)

diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
index d20fc473b4e1..5b4bfd00544b 100644
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -1168,6 +1168,33 @@ static int ipvlan_port_del_addr_event(struct ipvl_port *port,
 	return NOTIFY_OK;
 }
 
+static int ipvlan_addr_validator_event(struct net_device *dev,
+				       unsigned long event,
+				       struct netlink_ext_ack *extack,
+				       const void *iaddr,
+				       bool is_v6)
+{
+	struct ipvl_dev *ipvlan = netdev_priv(dev);
+
+	if (!ipvlan_is_valid_dev(dev))
+		return NOTIFY_DONE;
+
+	if (ipvlan_is_macnat(ipvlan->port))
+		return notifier_from_errno(-EADDRNOTAVAIL);
+
+	switch (event) {
+	case NETDEV_UP:
+		if (ipvlan_addr_busy(ipvlan->port, iaddr, is_v6)) {
+			NL_SET_ERR_MSG(extack,
+				       "Address already assigned to an ipvlan device");
+			return notifier_from_errno(-EADDRINUSE);
+		}
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
 #if IS_ENABLED(CONFIG_IPV6)
 static int ipvlan_add_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr)
 {
@@ -1236,25 +1263,9 @@ static int ipvlan_addr6_validator_event(struct notifier_block *unused,
 {
 	struct in6_validator_info *i6vi = (struct in6_validator_info *)ptr;
 	struct net_device *dev = (struct net_device *)i6vi->i6vi_dev->dev;
-	struct ipvl_dev *ipvlan = netdev_priv(dev);
-
-	if (!ipvlan_is_valid_dev(dev))
-		return NOTIFY_DONE;
-
-	if (ipvlan_is_macnat(ipvlan->port))
-		return notifier_from_errno(-EADDRNOTAVAIL);
 
-	switch (event) {
-	case NETDEV_UP:
-		if (ipvlan_addr_busy(ipvlan->port, &i6vi->i6vi_addr, true)) {
-			NL_SET_ERR_MSG(i6vi->extack,
-				       "Address already assigned to an ipvlan device");
-			return notifier_from_errno(-EADDRINUSE);
-		}
-		break;
-	}
-
-	return NOTIFY_OK;
+	return ipvlan_addr_validator_event(dev, event, i6vi->extack,
+					   &i6vi->i6vi_addr, true);
 }
 #endif
 
@@ -1330,25 +1341,9 @@ static int ipvlan_addr4_validator_event(struct notifier_block *unused,
 {
 	struct in_validator_info *ivi = (struct in_validator_info *)ptr;
 	struct net_device *dev = (struct net_device *)ivi->ivi_dev->dev;
-	struct ipvl_dev *ipvlan = netdev_priv(dev);
-
-	if (!ipvlan_is_valid_dev(dev))
-		return NOTIFY_DONE;
-
-	if (ipvlan_is_macnat(ipvlan->port))
-		return notifier_from_errno(-EADDRNOTAVAIL);
 
-	switch (event) {
-	case NETDEV_UP:
-		if (ipvlan_addr_busy(ipvlan->port, &ivi->ivi_addr, false)) {
-			NL_SET_ERR_MSG(ivi->extack,
-				       "Address already assigned to an ipvlan device");
-			return notifier_from_errno(-EADDRINUSE);
-		}
-		break;
-	}
-
-	return NOTIFY_OK;
+	return ipvlan_addr_validator_event(dev, event, ivi->extack,
+					   &ivi->ivi_addr, false);
 }
 
 static struct notifier_block ipvlan_addr4_notifier_block __read_mostly = {
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH net-next 13/14] ipvlan: common code to handle ipv6/ipv4 address events
  2025-11-05 16:14 [PATCH net-next v3 00/14] ipvlan: support mac-nat mode Dmitry Skorodumov
                   ` (11 preceding siblings ...)
  2025-11-05 16:14 ` [PATCH net-next 12/14] ipvlan: Common code from v6/v4 validator_event Dmitry Skorodumov
@ 2025-11-05 16:14 ` Dmitry Skorodumov
  2025-11-05 16:14 ` [PATCH net-next 14/14] ipvlan: Ignore PACKET_LOOPBACK in handle_mode_l2() Dmitry Skorodumov
  2025-11-06  9:03 ` [syzbot ci] Re: ipvlan: support mac-nat mode syzbot ci
  14 siblings, 0 replies; 26+ messages in thread
From: Dmitry Skorodumov @ 2025-11-05 16:14 UTC (permalink / raw)
  To: netdev, linux-kernel
  Cc: andrey.bokhanko, Dmitry Skorodumov, Andrew Lunn, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni

Both IPv4 and IPv6 addr-event functions are very similar. Refactor
to use common funcitons.

Signed-off-by: Dmitry Skorodumov <skorodumov.dmitry@huawei.com>
---
 drivers/net/ipvlan/ipvlan_main.c | 117 ++++++++++---------------------
 1 file changed, 37 insertions(+), 80 deletions(-)

diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
index 5b4bfd00544b..bc6db32f59bf 100644
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -1195,33 +1195,39 @@ static int ipvlan_addr_validator_event(struct net_device *dev,
 	return NOTIFY_OK;
 }
 
-#if IS_ENABLED(CONFIG_IPV6)
-static int ipvlan_add_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr)
+static int ipvlan_add_addr_event(struct ipvl_dev *ipvlan, const void *iaddr,
+				 bool is_v6)
 {
 	int ret = -EINVAL;
 
 	spin_lock_bh(&ipvlan->port->addrs_lock);
-	if (ipvlan_addr_busy(ipvlan->port, ip6_addr, true))
-		netif_err(ipvlan, ifup, ipvlan->dev,
-			  "Failed to add IPv6=%pI6c addr for %s intf\n",
-			  ip6_addr, ipvlan->dev->name);
-	else
-		ret = ipvlan_add_addr(ipvlan, ip6_addr, true, NULL);
+	if (ipvlan_addr_busy(ipvlan->port, iaddr, is_v6)) {
+		if (is_v6) {
+			netif_err(ipvlan, ifup, ipvlan->dev,
+				  "Failed to add IPv6=%pI6c on %s intf.\n",
+				  iaddr, ipvlan->dev->name);
+		} else {
+			netif_err(ipvlan, ifup, ipvlan->dev,
+				  "Failed to add IPv4=%pI4 on %s intf.\n",
+				  iaddr, ipvlan->dev->name);
+		}
+	} else {
+		ret = ipvlan_add_addr(ipvlan, iaddr, is_v6, NULL);
+	}
 	spin_unlock_bh(&ipvlan->port->addrs_lock);
 	return ret;
 }
 
-static void ipvlan_del_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr)
+static void ipvlan_del_addr_event(struct ipvl_dev *ipvlan, const void *iaddr,
+				  bool is_v6)
 {
-	return ipvlan_del_addr(ipvlan, ip6_addr, true);
+	return ipvlan_del_addr(ipvlan, iaddr, is_v6);
 }
 
-static int ipvlan_addr6_event(struct notifier_block *unused,
-			      unsigned long event, void *ptr)
+static int ipvlan_addr_event(struct net_device *dev, unsigned long event,
+			     const void *iaddr, bool is_v6)
 {
-	struct inet6_ifaddr *if6 = (struct inet6_ifaddr *)ptr;
-	struct net_device *dev = (struct net_device *)if6->idev->dev;
-	struct ipvl_dev *ipvlan = netdev_priv(dev);
+	struct ipvl_dev *ipvlan;
 
 	if (netif_is_ipvlan_port(dev)) {
 		struct ipvl_port *port = ipvlan_port_get_rcu(dev);
@@ -1231,11 +1237,9 @@ static int ipvlan_addr6_event(struct notifier_block *unused,
 
 		switch (event) {
 		case NETDEV_UP:
-			return ipvlan_port_add_addr_event(port, &if6->addr,
-							  true);
+			return ipvlan_port_add_addr_event(port, iaddr, is_v6);
 		case NETDEV_DOWN:
-			return ipvlan_port_del_addr_event(port, &if6->addr,
-							  true);
+			return ipvlan_port_del_addr_event(port, iaddr, is_v6);
 		default:
 			return NOTIFY_OK;
 		}
@@ -1244,20 +1248,31 @@ static int ipvlan_addr6_event(struct notifier_block *unused,
 	if (!ipvlan_is_valid_dev(dev))
 		return NOTIFY_DONE;
 
+	ipvlan = netdev_priv(dev);
 	switch (event) {
 	case NETDEV_UP:
-		if (ipvlan_add_addr6(ipvlan, &if6->addr))
+		if (ipvlan_add_addr_event(ipvlan, iaddr, is_v6))
 			return NOTIFY_BAD;
 		break;
 
 	case NETDEV_DOWN:
-		ipvlan_del_addr6(ipvlan, &if6->addr);
+		ipvlan_del_addr_event(ipvlan, iaddr, is_v6);
 		break;
 	}
 
 	return NOTIFY_OK;
 }
 
+#if IS_ENABLED(CONFIG_IPV6)
+static int ipvlan_addr6_event(struct notifier_block *unused,
+			      unsigned long event, void *ptr)
+{
+	struct inet6_ifaddr *if6 = (struct inet6_ifaddr *)ptr;
+	struct net_device *dev = (struct net_device *)if6->idev->dev;
+
+	return ipvlan_addr_event(dev, event, &if6->addr, true);
+}
+
 static int ipvlan_addr6_validator_event(struct notifier_block *unused,
 					unsigned long event, void *ptr)
 {
@@ -1269,71 +1284,13 @@ static int ipvlan_addr6_validator_event(struct notifier_block *unused,
 }
 #endif
 
-static int ipvlan_add_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr)
-{
-	int ret = -EINVAL;
-
-	spin_lock_bh(&ipvlan->port->addrs_lock);
-	if (ipvlan_addr_busy(ipvlan->port, ip4_addr, false))
-		netif_err(ipvlan, ifup, ipvlan->dev,
-			  "Failed to add IPv4=%pI4 on %s intf.\n",
-			  ip4_addr, ipvlan->dev->name);
-	else
-		ret = ipvlan_add_addr(ipvlan, ip4_addr, false, NULL);
-	spin_unlock_bh(&ipvlan->port->addrs_lock);
-	return ret;
-}
-
-static void ipvlan_del_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr)
-{
-	return ipvlan_del_addr(ipvlan, ip4_addr, false);
-}
-
 static int ipvlan_addr4_event(struct notifier_block *unused,
 			      unsigned long event, void *ptr)
 {
 	struct in_ifaddr *if4 = (struct in_ifaddr *)ptr;
 	struct net_device *dev = (struct net_device *)if4->ifa_dev->dev;
-	struct ipvl_dev *ipvlan = netdev_priv(dev);
-	struct in_addr ip4_addr;
-
-	if (netif_is_ipvlan_port(dev)) {
-		struct ipvl_port *port = ipvlan_port_get_rcu(dev);
-
-		if (!ipvlan_is_macnat(port))
-			return NOTIFY_DONE;
-
-		switch (event) {
-		case NETDEV_UP:
-			return ipvlan_port_add_addr_event(port,
-							  &if4->ifa_address,
-							  false);
-		case NETDEV_DOWN:
-			return ipvlan_port_del_addr_event(port,
-							  &if4->ifa_address,
-							  false);
-		default:
-			return NOTIFY_OK;
-		}
-	}
-
-	if (!ipvlan_is_valid_dev(dev))
-		return NOTIFY_DONE;
 
-	switch (event) {
-	case NETDEV_UP:
-		ip4_addr.s_addr = if4->ifa_address;
-		if (ipvlan_add_addr4(ipvlan, &ip4_addr))
-			return NOTIFY_BAD;
-		break;
-
-	case NETDEV_DOWN:
-		ip4_addr.s_addr = if4->ifa_address;
-		ipvlan_del_addr4(ipvlan, &ip4_addr);
-		break;
-	}
-
-	return NOTIFY_OK;
+	return ipvlan_addr_event(dev, event, &if4->ifa_address, false);
 }
 
 static int ipvlan_addr4_validator_event(struct notifier_block *unused,
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH net-next 14/14] ipvlan: Ignore PACKET_LOOPBACK in handle_mode_l2()
  2025-11-05 16:14 [PATCH net-next v3 00/14] ipvlan: support mac-nat mode Dmitry Skorodumov
                   ` (12 preceding siblings ...)
  2025-11-05 16:14 ` [PATCH net-next 13/14] ipvlan: common code to handle ipv6/ipv4 address events Dmitry Skorodumov
@ 2025-11-05 16:14 ` Dmitry Skorodumov
  2025-11-06  9:03 ` [syzbot ci] Re: ipvlan: support mac-nat mode syzbot ci
  14 siblings, 0 replies; 26+ messages in thread
From: Dmitry Skorodumov @ 2025-11-05 16:14 UTC (permalink / raw)
  To: netdev, linux-kernel
  Cc: andrey.bokhanko, Dmitry Skorodumov, Andrew Lunn, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni

Packets with pkt_type == PACKET_LOOPBACK are captured by
handle_frame() function, but they don't have L2 header.
We should not process them in handle_mode_l2().

This doesn't affect old L2 functionality, since handling
was anyway incorrect.

Handle them the same way as in br_handle_frame():
just pass the skb.

To observe invalid behaviour, just start "ping -b" on bcast address
of port-interface.

Signed-off-by: Dmitry Skorodumov <skorodumov.dmitry@huawei.com>
---
 drivers/net/ipvlan/ipvlan_core.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c
index b38ce991e832..cb89fb7213c9 100644
--- a/drivers/net/ipvlan/ipvlan_core.c
+++ b/drivers/net/ipvlan/ipvlan_core.c
@@ -1134,6 +1134,9 @@ static rx_handler_result_t ipvlan_handle_mode_l2(struct sk_buff **pskb,
 	rx_handler_result_t ret = RX_HANDLER_PASS;
 	bool need_eth_fix;
 
+	if (unlikely(skb->pkt_type == PACKET_LOOPBACK))
+		return RX_HANDLER_PASS;
+
 	/* Ignore already seen packets. */
 	if (ipvlan_is_skb_marked(skb, port->dev))
 		return RX_HANDLER_PASS;
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [PATCH net-next 06/14] ipvlan: Support GSO for port -> ipvlan
  2025-11-05 16:14 ` [PATCH net-next 06/14] ipvlan: Support GSO for port -> ipvlan Dmitry Skorodumov
@ 2025-11-05 16:29   ` Eric Dumazet
  2025-11-05 16:58     ` Dmitry Skorodumov
  0 siblings, 1 reply; 26+ messages in thread
From: Eric Dumazet @ 2025-11-05 16:29 UTC (permalink / raw)
  To: Dmitry Skorodumov
  Cc: netdev, linux-kernel, andrey.bokhanko, Andrew Lunn,
	David S. Miller, Jakub Kicinski, Paolo Abeni

On Wed, Nov 5, 2025 at 8:15 AM Dmitry Skorodumov
<skorodumov.dmitry@huawei.com> wrote:
>
> If main port interface supports GSO, we need manually segment
> the skb before forwarding it to ipvlan interface.

Why ?

I think you need to explain much more than a neutral sentence,

Also I do not see any tests, for the whole series ?

I have not seen the cover letter.

Also you sent the series twice today :/

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH net-next 06/14] ipvlan: Support GSO for port -> ipvlan
  2025-11-05 16:29   ` Eric Dumazet
@ 2025-11-05 16:58     ` Dmitry Skorodumov
  2025-11-06 15:41       ` Dmitry Skorodumov
  0 siblings, 1 reply; 26+ messages in thread
From: Dmitry Skorodumov @ 2025-11-05 16:58 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: netdev, linux-kernel, andrey.bokhanko, Andrew Lunn,
	David S. Miller, Jakub Kicinski, Paolo Abeni


On 05.11.2025 19:29, Eric Dumazet wrote:
> On Wed, Nov 5, 2025 at 8:15 AM Dmitry Skorodumov
> <skorodumov.dmitry@huawei.com> wrote:
>> If main port interface supports GSO, we need manually segment
>> the skb before forwarding it to ipvlan interface.
> Why ?
>
> I think you need to explain much more than a neutral sentence,

Ok, got it. Will resend the patch with more description: I expect there will be v4 anyway.

The reason is that this function is a protocol handler, installed on main port (with dev_add_pack()), so if main port supports GSO/checksum offload, OS will send us big/non-checksummed packets (tested with scp to IP of some child port). This packet is forwarded to child. I believe we may not expect child ipvlan-iface be prepared to RX big packet, without checksum. But I agree, that I should investigate behaviour in more details. May be I missed something and it is possible to force corresponding  TAP to somehow do this.

> Also I do not see any tests, for the whole series ?
Ok, If modules like this have some kind of unit-tests, I should study it and provide it. I haven't seen this as a common practice for most of the modules here. So far all testing is made manually (likely this should be described anyway)
>
> I have not seen the cover letter.
Cover letter was sent to netdev@vger.kernel.org. Wasn't sure that it is a good practice to add to CC every maintainer to each email of the series.
> Also you sent the series twice today :/

Well, I've sent just 000* by mistake. And immediately resent patches (as v3 in cover letter) after noticing this

Dmitry



^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH net-next 01/14] ipvlan: Preparation to support mac-nat
  2025-11-05 16:14 ` [PATCH net-next 01/14] ipvlan: Preparation to support mac-nat Dmitry Skorodumov
@ 2025-11-05 23:32   ` Bagas Sanjaya
  2025-11-06 23:30   ` kernel test robot
  1 sibling, 0 replies; 26+ messages in thread
From: Bagas Sanjaya @ 2025-11-05 23:32 UTC (permalink / raw)
  To: Dmitry Skorodumov, netdev, Simon Horman, linux-doc, linux-kernel
  Cc: andrey.bokhanko, David S. Miller, Eric Dumazet, Jakub Kicinski,
	Paolo Abeni, Jonathan Corbet, Andrew Lunn

[-- Attachment #1: Type: text/plain, Size: 245 bytes --]

On Wed, Nov 05, 2025 at 07:14:37PM +0300, Dmitry Skorodumov wrote:
> +4.4 L2_MACNAT mode:
> +-------------

Please match section underline length to the heading text.

Thanks.

-- 
An old man doll... just what I always wanted! - Clara

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 228 bytes --]

^ permalink raw reply	[flat|nested] 26+ messages in thread

* [syzbot ci] Re: ipvlan: support mac-nat mode
  2025-11-05 16:14 [PATCH net-next v3 00/14] ipvlan: support mac-nat mode Dmitry Skorodumov
                   ` (13 preceding siblings ...)
  2025-11-05 16:14 ` [PATCH net-next 14/14] ipvlan: Ignore PACKET_LOOPBACK in handle_mode_l2() Dmitry Skorodumov
@ 2025-11-06  9:03 ` syzbot ci
  14 siblings, 0 replies; 26+ messages in thread
From: syzbot ci @ 2025-11-06  9:03 UTC (permalink / raw)
  To: andrew, andrey.bokhanko, corbet, davem, edumazet, horms, kuba,
	linux-doc, linux-kernel, netdev, pabeni, skorodumov.dmitry
  Cc: syzbot, syzkaller-bugs

syzbot ci has tested the following series

[v3] ipvlan: support mac-nat mode
https://lore.kernel.org/all/20251105161450.1730216-1-skorodumov.dmitry@huawei.com
* [PATCH net-next 01/14] ipvlan: Preparation to support mac-nat
* [PATCH net-next 02/14] ipvlan: Send mcasts out directly in ipvlan_xmit_mode_l2()
* [PATCH net-next 03/14] ipvlan: Handle rx mcast-ip and unicast eth
* [PATCH net-next 04/14] ipvlan: Added some kind of MAC NAT
* [PATCH net-next 05/14] ipvlan: Forget all IP when device goes down
* [PATCH net-next 06/14] ipvlan: Support GSO for port -> ipvlan
* [PATCH net-next 07/14] ipvlan: Support IPv6 for learnable l2-bridge
* [PATCH net-next 08/14] ipvlan: Make the addrs_lock be per port
* [PATCH net-next 09/14] ipvlan: Take addr_lock in ipvlan_open()
* [PATCH net-next 10/14] ipvlan: Don't allow children to use IPs of main
* [PATCH net-next 11/14] ipvlan: const-specifier for functions that use iaddr
* [PATCH net-next 12/14] ipvlan: Common code from v6/v4 validator_event
* [PATCH net-next 13/14] ipvlan: common code to handle ipv6/ipv4 address events
* [PATCH net-next 14/14] ipvlan: Ignore PACKET_LOOPBACK in handle_mode_l2()

and found the following issue:
WARNING: suspicious RCU usage in ipvlan_init

Full report is available here:
https://ci.syzbot.org/series/349ca33e-4ae2-4720-9a69-17a2a9e17107

***

WARNING: suspicious RCU usage in ipvlan_init

tree:      net-next
URL:       https://kernel.googlesource.com/pub/scm/linux/kernel/git/netdev/net-next.git
base:      01cc760632b875c4ad0d8fec0b0c01896b8a36d4
arch:      amd64
compiler:  Debian clang version 20.1.8 (++20250708063551+0c9f909b7976-1~exp1~20250708183702.136), Debian LLD 20.1.8
config:    https://ci.syzbot.org/builds/d6598a0d-2fcb-499d-95fc-30c5096555dc/config

batman_adv: batadv0: Not using interface batadv_slave_1 (retrying later): interface not active
hsr_slave_0: entered promiscuous mode
hsr_slave_1: entered promiscuous mode
=============================
WARNING: suspicious RCU usage
syzkaller #0 Not tainted
-----------------------------
./include/linux/inetdevice.h:239 suspicious rcu_dereference_check() usage!

other info that might help us debug this:


rcu_scheduler_active = 2, debug_locks = 1
2 locks held by syz-executor/6496:
 #0: ffffffff8ea2f980 (&ops->srcu#2){.+.+}-{0:0}, at: rtnl_link_ops_get+0x23/0x250
 #1: ffffffff8f2cb3c8 (rtnl_mutex){+.+.}-{4:4}, at: rtnl_newlink+0x8e9/0x1c80

stack backtrace:
CPU: 1 UID: 0 PID: 6496 Comm: syz-executor Not tainted syzkaller #0 PREEMPT(full) 
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.2-debian-1.16.2-1 04/01/2014
Call Trace:
 <TASK>
 dump_stack_lvl+0x189/0x250
 lockdep_rcu_suspicious+0x140/0x1d0
 ipvlan_init+0xff2/0x1260
 register_netdevice+0x6bf/0x1ae0
 ipvlan_link_new+0x57a/0xc70
 rtnl_newlink_create+0x310/0xb00
 rtnl_newlink+0x16e4/0x1c80
 rtnetlink_rcv_msg+0x7cf/0xb70
 netlink_rcv_skb+0x208/0x470
 netlink_unicast+0x82f/0x9e0
 netlink_sendmsg+0x805/0xb30
 __sock_sendmsg+0x21c/0x270
 __sys_sendto+0x3bd/0x520
 __x64_sys_sendto+0xde/0x100
 do_syscall_64+0xfa/0xfa0
 entry_SYSCALL_64_after_hwframe+0x77/0x7f
RIP: 0033:0x7f114c590e03
Code: 64 89 02 48 c7 c0 ff ff ff ff eb b7 66 2e 0f 1f 84 00 00 00 00 00 90 80 3d 61 77 22 00 00 41 89 ca 74 14 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 75 c3 0f 1f 40 00 55 48 83 ec 30 44 89 4c 24
RSP: 002b:00007ffecaf08958 EFLAGS: 00000202 ORIG_RAX: 000000000000002c
RAX: ffffffffffffffda RBX: 00007f114d314620 RCX: 00007f114c590e03
RDX: 0000000000000058 RSI: 00007f114d314670 RDI: 0000000000000003
RBP: 0000000000000001 R08: 00007ffecaf08974 R09: 000000000000000c
R10: 0000000000000000 R11: 0000000000000202 R12: 0000000000000003
R13: 0000000000000000 R14: 00007f114d314670 R15: 0000000000000000
 </TASK>

=============================
WARNING: suspicious RCU usage
syzkaller #0 Not tainted
-----------------------------
drivers/net/ipvlan/ipvlan_main.c:238 suspicious rcu_dereference_check() usage!

other info that might help us debug this:


rcu_scheduler_active = 2, debug_locks = 1
2 locks held by syz-executor/6496:
 #0: ffffffff8ea2f980 (&ops->srcu#2){.+.+}-{0:0}, at: rtnl_link_ops_get+0x23/0x250
 #1: ffffffff8f2cb3c8 (rtnl_mutex){+.+.}-{4:4}, at: rtnl_newlink+0x8e9/0x1c80

stack backtrace:
CPU: 0 UID: 0 PID: 6496 Comm: syz-executor Not tainted syzkaller #0 PREEMPT(full) 
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.2-debian-1.16.2-1 04/01/2014
Call Trace:
 <TASK>
 dump_stack_lvl+0x189/0x250
 lockdep_rcu_suspicious+0x140/0x1d0
 ipvlan_init+0x1025/0x1260
 register_netdevice+0x6bf/0x1ae0
 ipvlan_link_new+0x57a/0xc70
 rtnl_newlink_create+0x310/0xb00
 rtnl_newlink+0x16e4/0x1c80
 rtnetlink_rcv_msg+0x7cf/0xb70
 netlink_rcv_skb+0x208/0x470
 netlink_unicast+0x82f/0x9e0
 netlink_sendmsg+0x805/0xb30
 __sock_sendmsg+0x21c/0x270
 __sys_sendto+0x3bd/0x520
 __x64_sys_sendto+0xde/0x100
 do_syscall_64+0xfa/0xfa0
 entry_SYSCALL_64_after_hwframe+0x77/0x7f
RIP: 0033:0x7f114c590e03
Code: 64 89 02 48 c7 c0 ff ff ff ff eb b7 66 2e 0f 1f 84 00 00 00 00 00 90 80 3d 61 77 22 00 00 41 89 ca 74 14 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 75 c3 0f 1f 40 00 55 48 83 ec 30 44 89 4c 24
RSP: 002b:00007ffecaf08958 EFLAGS: 00000202 ORIG_RAX: 000000000000002c
RAX: ffffffffffffffda RBX: 00007f114d314620 RCX: 00007f114c590e03
RDX: 0000000000000058 RSI: 00007f114d314670 RDI: 0000000000000003
RBP: 0000000000000001 R08: 00007ffecaf08974 R09: 000000000000000c
R10: 0000000000000000 R11: 0000000000000202 R12: 0000000000000003
R13: 0000000000000000 R14: 00007f114d314670 R15: 0000000000000000
 </TASK>


***

If these findings have caused you to resend the series or submit a
separate fix, please add the following tag to your commit message:
  Tested-by: syzbot@syzkaller.appspotmail.com

---
This report is generated by a bot. It may contain errors.
syzbot ci engineers can be reached at syzkaller@googlegroups.com.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH net-next 06/14] ipvlan: Support GSO for port -> ipvlan
  2025-11-05 16:58     ` Dmitry Skorodumov
@ 2025-11-06 15:41       ` Dmitry Skorodumov
  2025-11-06 15:56         ` Eric Dumazet
  2025-11-06 16:19         ` Dmitry Skorodumov
  0 siblings, 2 replies; 26+ messages in thread
From: Dmitry Skorodumov @ 2025-11-06 15:41 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: netdev, linux-kernel, andrey.bokhanko, Andrew Lunn,
	David S. Miller, Jakub Kicinski, Paolo Abeni


On 05.11.2025 19:58, Dmitry Skorodumov wrote:
> On 05.11.2025 19:29, Eric Dumazet wrote:
>> On Wed, Nov 5, 2025 at 8:15 AM Dmitry Skorodumov
>> <skorodumov.dmitry@huawei.com> wrote:
>>> If main port interface supports GSO, we need manually segment
>>> the skb before forwarding it to ipvlan interface.
>> Why ?

Hm, really, this patch is not needed at all. tap_handle_frame() already does everything needed. Looks like I had another trouble and this patch was an attempt to fix it.

>> Also I do not see any tests, for the whole series ?
> Ok, If modules like this have some kind of unit-tests, I should study it and provide it. I haven't seen this as a common practice for most of the modules here. So far all testing is made manually (likely this should be described anyway)

I see that currently there is no any tests for this ipvlan module (may be I missed something).. Do you have any ideas about tests? I'm a bit  confused at the moment: designing tests from scratch - this might be a bit tricky.

Or it is enough just describe test-cases I checked manually (in some of the patches of the series)?

Dmitry



^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH net-next 06/14] ipvlan: Support GSO for port -> ipvlan
  2025-11-06 15:41       ` Dmitry Skorodumov
@ 2025-11-06 15:56         ` Eric Dumazet
  2025-11-07 11:26           ` Dmitry Skorodumov
  2025-11-06 16:19         ` Dmitry Skorodumov
  1 sibling, 1 reply; 26+ messages in thread
From: Eric Dumazet @ 2025-11-06 15:56 UTC (permalink / raw)
  To: Dmitry Skorodumov
  Cc: netdev, linux-kernel, andrey.bokhanko, Andrew Lunn,
	David S. Miller, Jakub Kicinski, Paolo Abeni

On Thu, Nov 6, 2025 at 7:41 AM Dmitry Skorodumov
<skorodumov.dmitry@huawei.com> wrote:
>
>
> On 05.11.2025 19:58, Dmitry Skorodumov wrote:
> > On 05.11.2025 19:29, Eric Dumazet wrote:
> >> On Wed, Nov 5, 2025 at 8:15 AM Dmitry Skorodumov
> >> <skorodumov.dmitry@huawei.com> wrote:
> >>> If main port interface supports GSO, we need manually segment
> >>> the skb before forwarding it to ipvlan interface.
> >> Why ?
>
> Hm, really, this patch is not needed at all. tap_handle_frame() already does everything needed. Looks like I had another trouble and this patch was an attempt to fix it.
>
> >> Also I do not see any tests, for the whole series ?
> > Ok, If modules like this have some kind of unit-tests, I should study it and provide it. I haven't seen this as a common practice for most of the modules here. So far all testing is made manually (likely this should be described anyway)
>
> I see that currently there is no any tests for this ipvlan module (may be I missed something).. Do you have any ideas about tests? I'm a bit  confused at the moment: designing tests from scratch - this might be a bit tricky.
>
> Or it is enough just describe test-cases I checked manually (in some of the patches of the series)?

I have some hard time to figure out why you are changing ipvlan, with
some features that seem quite unrelated.

ipvlan is heavily used by Google, I am quite reluctant to see a huge
chunk of changes that I do not understand, without spending hours on
it.

The MAC-NAT keyword seems more related to a bridge.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH net-next 06/14] ipvlan: Support GSO for port -> ipvlan
  2025-11-06 15:41       ` Dmitry Skorodumov
  2025-11-06 15:56         ` Eric Dumazet
@ 2025-11-06 16:19         ` Dmitry Skorodumov
  1 sibling, 0 replies; 26+ messages in thread
From: Dmitry Skorodumov @ 2025-11-06 16:19 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: netdev, linux-kernel, andrey.bokhanko, Andrew Lunn,
	David S. Miller, Jakub Kicinski, Paolo Abeni


On 06.11.2025 18:41, Dmitry Skorodumov wrote:
> I see that currently there is no any tests for this ipvlan module (may be I missed something).. Do you have any ideas about tests? I'm a bit  confused at the moment: designing tests from scratch - this might be a bit tricky.
>
> Or it is enough just describe test-cases I checked manually (in some of the patches of the series)?
>
I just got few ideas how to implement tests.. I think I'll provide some tests, at least for the new functionality.

Dmitry


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH net-next 01/14] ipvlan: Preparation to support mac-nat
  2025-11-05 16:14 ` [PATCH net-next 01/14] ipvlan: Preparation to support mac-nat Dmitry Skorodumov
  2025-11-05 23:32   ` Bagas Sanjaya
@ 2025-11-06 23:30   ` kernel test robot
  1 sibling, 0 replies; 26+ messages in thread
From: kernel test robot @ 2025-11-06 23:30 UTC (permalink / raw)
  To: Dmitry Skorodumov, netdev, Simon Horman, linux-doc, linux-kernel
  Cc: oe-kbuild-all, andrey.bokhanko, Dmitry Skorodumov, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, Jonathan Corbet, Andrew Lunn

Hi Dmitry,

kernel test robot noticed the following build warnings:

[auto build test WARNING on net-next/main]

url:    https://github.com/intel-lab-lkp/linux/commits/Dmitry-Skorodumov/ipvlan-Preparation-to-support-mac-nat/20251106-004449
base:   net-next/main
patch link:    https://lore.kernel.org/r/20251105161450.1730216-2-skorodumov.dmitry%40huawei.com
patch subject: [PATCH net-next 01/14] ipvlan: Preparation to support mac-nat
config: s390-randconfig-001-20251107 (https://download.01.org/0day-ci/archive/20251107/202511070917.SA9qQyy5-lkp@intel.com/config)
compiler: s390-linux-gcc (GCC) 8.5.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20251107/202511070917.SA9qQyy5-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202511070917.SA9qQyy5-lkp@intel.com/

All warnings (new ones prefixed by >>):

>> drivers/net/ipvlan/ipvlan_core.c:435:13: warning: 'is_ipv6_usable' defined but not used [-Wunused-function]
    static bool is_ipv6_usable(const struct in6_addr *addr)
                ^~~~~~~~~~~~~~

Kconfig warnings: (for reference only)
   WARNING: unmet direct dependencies detected for OF_GPIO
   Depends on [n]: GPIOLIB [=y] && OF [=y] && HAS_IOMEM [=n]
   Selected by [m]:
   - REGULATOR_RT5133 [=m] && REGULATOR [=y] && I2C [=m] && GPIOLIB [=y] && OF [=y]


vim +/is_ipv6_usable +435 drivers/net/ipvlan/ipvlan_core.c

   434	
 > 435	static bool is_ipv6_usable(const struct in6_addr *addr)
   436	{
   437		return !ipv6_addr_is_multicast(addr) && !ipv6_addr_loopback(addr) &&
   438		       !ipv6_addr_any(addr);
   439	}
   440	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH net-next 06/14] ipvlan: Support GSO for port -> ipvlan
  2025-11-06 15:56         ` Eric Dumazet
@ 2025-11-07 11:26           ` Dmitry Skorodumov
  0 siblings, 0 replies; 26+ messages in thread
From: Dmitry Skorodumov @ 2025-11-07 11:26 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: netdev, linux-kernel, andrey.bokhanko, Andrew Lunn,
	David S. Miller, Jakub Kicinski, Paolo Abeni

>> I see that currently there is no any tests for this ipvlan module (may be I missed something).. Do you have any ideas about tests? I'm a bit  confused at the moment: designing tests from scratch - this might be a bit tricky.
>>
>> Or it is enough just describe test-cases I checked manually (in some of the patches of the series)?
> I have some hard time to figure out why you are changing ipvlan, with
> some features that seem quite unrelated.
Sorry! I had to sent a more descriptive cover letter with CC to all maintainers
> ipvlan is heavily used by Google, I am quite reluctant to see a huge
> chunk of changes that I do not understand, without spending hours on
> it.
>
> The MAC-NAT keyword seems more related to a bridge.
>
I tried to make all the new functionality to not affect any existing code. The only place that changes behavior - is "[patch 2] Send mcasts out directly in ipvlan_xmit_mode_l2". May be I should spend some time and invent a way to not change behavior at all. All other places should be under "if (ipvlan_is_macnat(port))".

Now I'd also want to implement some tests, and try to ensure, that existing functionality continues work well. I hope that after review and tests, there will be no bugs.

> The MAC-NAT keyword seems more related to a bridge.

At start of work on this feature, I saw options: 1) Modify IPVLan 2) Modify net/bridge 3) clone IPVLan to new module and extend it

But net/bridge is already overbloated, and I believe it is better not touch it. And IPVlan already has all the required infrastructure functions. Actually, all new functionality - is about 600 lines of diff (patches 1 and 4). The IPVLan is essentially "bridge" in its functionality.. extending it to learn IPs and do mac-nat - is easy. All other diffs - are just improvements (like improve handling IP conflicts, refactor validator/address events handling)

And... i saw a lot of people are already using IPVLan to bridge to WiFi - though with a lot of limitations and troubles.

Here is a bit rewritten documentation (AI also suggests server-case scenarios, but I'm skeptical about it):

+4.4 L2_MACNAT mode:
+-------------------
+
+This mode extends the L2 mode and is primarily designed for desktop virtual
+machines that need to bridge to wireless interfaces. In standard L2 mode,
+you must configure IP addresses on slave interfaces to enable frame
+multiplexing between slaves and the master.
+
+In L2_MACNAT mode, IPVLAN automatically learns IPv4/IPv6 and MAC addresses
+from outgoing packets. For transmitted packets, the source MAC address
+is replaced with the MAC address of the main interface. Received packets
+are routed to the interface that previously used the destination address,
+and the destination MAC is replaced with the learned MAC address.
+
+This enables slave interfaces to automatically obtain IP addresses
+via DHCP and IPv6 autoconfiguration.
+
+Additionally, dev_add_pack() is configured on the master interface to capture
+outgoing frames and multiplex them to slave interfaces when necessary.

Dmitry



^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH net-next 07/14] ipvlan: Support IPv6 for learnable l2-bridge
  2025-11-05 16:14 ` [PATCH net-next 07/14] ipvlan: Support IPv6 for learnable l2-bridge Dmitry Skorodumov
@ 2025-11-11  0:33   ` kernel test robot
  0 siblings, 0 replies; 26+ messages in thread
From: kernel test robot @ 2025-11-11  0:33 UTC (permalink / raw)
  To: Dmitry Skorodumov, netdev, linux-kernel
  Cc: oe-kbuild-all, andrey.bokhanko, Dmitry Skorodumov, Andrew Lunn,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni

Hi Dmitry,

kernel test robot noticed the following build warnings:

[auto build test WARNING on net-next/main]

url:    https://github.com/intel-lab-lkp/linux/commits/Dmitry-Skorodumov/ipvlan-Preparation-to-support-mac-nat/20251106-004449
base:   net-next/main
patch link:    https://lore.kernel.org/r/20251105161450.1730216-8-skorodumov.dmitry%40huawei.com
patch subject: [PATCH net-next 07/14] ipvlan: Support IPv6 for learnable l2-bridge
config: um-randconfig-r123-20251110 (https://download.01.org/0day-ci/archive/20251111/202511110823.oBrdGTfa-lkp@intel.com/config)
compiler: clang version 22.0.0git (https://github.com/llvm/llvm-project 93d445cba39f4dd3dcda4fa1433eca825cf8fc09)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20251111/202511110823.oBrdGTfa-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202511110823.oBrdGTfa-lkp@intel.com/

sparse warnings: (new ones prefixed by >>)
   drivers/net/ipvlan/ipvlan_core.c:56:36: sparse: sparse: incorrect type in argument 1 (different base types) @@     expected unsigned int [usertype] a @@     got restricted __be32 const [usertype] s_addr @@
   drivers/net/ipvlan/ipvlan_core.c:56:36: sparse:     expected unsigned int [usertype] a
   drivers/net/ipvlan/ipvlan_core.c:56:36: sparse:     got restricted __be32 const [usertype] s_addr
>> drivers/net/ipvlan/ipvlan_core.c:794:23: sparse: sparse: incorrect type in argument 1 (different base types) @@     expected unsigned short [usertype] val @@     got restricted __be16 [usertype] payload_len @@
   drivers/net/ipvlan/ipvlan_core.c:794:23: sparse:     expected unsigned short [usertype] val
   drivers/net/ipvlan/ipvlan_core.c:794:23: sparse:     got restricted __be16 [usertype] payload_len
   drivers/net/ipvlan/ipvlan_core.c:794:23: sparse: sparse: cast from restricted __be16
   drivers/net/ipvlan/ipvlan_core.c:794:23: sparse: sparse: cast from restricted __be16
   drivers/net/ipvlan/ipvlan_core.c:794:19: sparse: sparse: cast from restricted __be16
   drivers/net/ipvlan/ipvlan_core.c:854:23: sparse: sparse: incorrect type in argument 1 (different base types) @@     expected unsigned short [usertype] val @@     got restricted __be16 [usertype] payload_len @@
   drivers/net/ipvlan/ipvlan_core.c:854:23: sparse:     expected unsigned short [usertype] val
   drivers/net/ipvlan/ipvlan_core.c:854:23: sparse:     got restricted __be16 [usertype] payload_len
   drivers/net/ipvlan/ipvlan_core.c:854:23: sparse: sparse: cast from restricted __be16
   drivers/net/ipvlan/ipvlan_core.c:854:23: sparse: sparse: cast from restricted __be16
   drivers/net/ipvlan/ipvlan_core.c:854:19: sparse: sparse: cast from restricted __be16

vim +794 drivers/net/ipvlan/ipvlan_core.c

   785	
   786	static u8 *ipvlan_search_icmp6_ll_addr(struct sk_buff *skb, u8 icmp_option)
   787	{
   788		/* skb is ensured to pullable for all ipv6 payload_len by caller */
   789		struct ipv6hdr *ip6h = ipv6_hdr(skb);
   790		struct icmp6hdr *icmph;
   791		int ndsize, curr_off;
   792	
   793		icmph = (struct icmp6hdr *)(ip6h + 1);
 > 794		ndsize = (int)htons(ip6h->payload_len);
   795		curr_off = sizeof(*icmph);
   796	
   797		if (icmph->icmp6_type != NDISC_ROUTER_SOLICITATION)
   798			curr_off += sizeof(struct in6_addr);
   799	
   800		while ((curr_off + 2) < ndsize) {
   801			u8  *data = (u8 *)icmph + curr_off;
   802			u32 opt_len = data[1] << 3;
   803	
   804			if (unlikely(opt_len == 0))
   805				return NULL;
   806	
   807			if (data[0] != icmp_option) {
   808				curr_off += opt_len;
   809				continue;
   810			}
   811	
   812			if (unlikely(opt_len < ETH_ALEN + 2))
   813				return NULL;
   814	
   815			if (unlikely(curr_off + opt_len > ndsize))
   816				return NULL;
   817	
   818			return data + 2;
   819		}
   820	
   821		return NULL;
   822	}
   823	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

^ permalink raw reply	[flat|nested] 26+ messages in thread

end of thread, other threads:[~2025-11-11  0:34 UTC | newest]

Thread overview: 26+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-11-05 16:14 [PATCH net-next v3 00/14] ipvlan: support mac-nat mode Dmitry Skorodumov
2025-11-05 16:14 ` [PATCH net-next 01/14] ipvlan: Preparation to support mac-nat Dmitry Skorodumov
2025-11-05 23:32   ` Bagas Sanjaya
2025-11-06 23:30   ` kernel test robot
2025-11-05 16:14 ` [PATCH net-next 02/14] ipvlan: Send mcasts out directly in ipvlan_xmit_mode_l2() Dmitry Skorodumov
2025-11-05 16:14 ` [PATCH net-next 03/14] ipvlan: Handle rx mcast-ip and unicast eth Dmitry Skorodumov
2025-11-05 16:14 ` [PATCH net-next 04/14] ipvlan: Added some kind of MAC NAT Dmitry Skorodumov
2025-11-05 16:14 ` [PATCH net-next 05/14] ipvlan: Forget all IP when device goes down Dmitry Skorodumov
2025-11-05 16:14 ` [PATCH net-next 06/14] ipvlan: Support GSO for port -> ipvlan Dmitry Skorodumov
2025-11-05 16:29   ` Eric Dumazet
2025-11-05 16:58     ` Dmitry Skorodumov
2025-11-06 15:41       ` Dmitry Skorodumov
2025-11-06 15:56         ` Eric Dumazet
2025-11-07 11:26           ` Dmitry Skorodumov
2025-11-06 16:19         ` Dmitry Skorodumov
2025-11-05 16:14 ` [PATCH net-next 07/14] ipvlan: Support IPv6 for learnable l2-bridge Dmitry Skorodumov
2025-11-11  0:33   ` kernel test robot
2025-11-05 16:14 ` [PATCH net-next 08/14] ipvlan: Make the addrs_lock be per port Dmitry Skorodumov
2025-11-05 16:14 ` [PATCH net-next 09/14] ipvlan: Take addr_lock in ipvlan_open() Dmitry Skorodumov
2025-11-05 16:14 ` [PATCH net-next 10/14] ipvlan: Don't allow children to use IPs of main Dmitry Skorodumov
2025-11-05 16:14 ` [PATCH net-next 11/14] ipvlan: const-specifier for functions that use iaddr Dmitry Skorodumov
2025-11-05 16:14 ` [PATCH net-next 12/14] ipvlan: Common code from v6/v4 validator_event Dmitry Skorodumov
2025-11-05 16:14 ` [PATCH net-next 13/14] ipvlan: common code to handle ipv6/ipv4 address events Dmitry Skorodumov
2025-11-05 16:14 ` [PATCH net-next 14/14] ipvlan: Ignore PACKET_LOOPBACK in handle_mode_l2() Dmitry Skorodumov
2025-11-06  9:03 ` [syzbot ci] Re: ipvlan: support mac-nat mode syzbot ci
  -- strict thread matches above, loose matches on Subject: below --
2025-11-05 16:07 [PATCH net-next v2 00/14] " Dmitry Skorodumov
2025-11-05 16:07 ` [PATCH net-next 07/14] ipvlan: Support IPv6 for learnable l2-bridge Dmitry Skorodumov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).