From: Ralf Lici <ralf@mandelbit.com>
To: netdev@vger.kernel.org
Cc: "Daniel Gröber" <dxld@darkboxed.org>,
"Ralf Lici" <ralf@mandelbit.com>,
"Antonio Quartulli" <antonio@mandelbit.com>,
"Andrew Lunn" <andrew+netdev@lunn.ch>,
"David S. Miller" <davem@davemloft.net>,
"Eric Dumazet" <edumazet@google.com>,
"Jakub Kicinski" <kuba@kernel.org>,
"Paolo Abeni" <pabeni@redhat.com>,
linux-kernel@vger.kernel.org
Subject: [RFC net-next 08/15] ipxlat: add translation engine and dispatch core
Date: Thu, 19 Mar 2026 16:12:17 +0100 [thread overview]
Message-ID: <20260319151230.655687-9-ralf@mandelbit.com> (raw)
In-Reply-To: <20260319151230.655687-1-ralf@mandelbit.com>
This commit introduces the core start_xmit processing flow: validate,
select action, translate, and forward. It centralizes action resolution
in the dispatch layer and keeps per-direction translation logic separate
from device glue. The result is a single data-path entry point with
explicit control over drop/forward/emit behavior.
Signed-off-by: Ralf Lici <ralf@mandelbit.com>
---
drivers/net/ipxlat/Makefile | 4 +
drivers/net/ipxlat/dispatch.c | 104 +++++++++++++++
drivers/net/ipxlat/dispatch.h | 71 +++++++++++
drivers/net/ipxlat/main.c | 6 +-
drivers/net/ipxlat/packet.c | 1 +
drivers/net/ipxlat/translate_46.c | 198 +++++++++++++++++++++++++++++
drivers/net/ipxlat/translate_46.h | 73 +++++++++++
drivers/net/ipxlat/translate_64.c | 205 ++++++++++++++++++++++++++++++
drivers/net/ipxlat/translate_64.h | 56 ++++++++
drivers/net/ipxlat/transport.c | 11 ++
drivers/net/ipxlat/transport.h | 5 +
11 files changed, 732 insertions(+), 2 deletions(-)
create mode 100644 drivers/net/ipxlat/dispatch.c
create mode 100644 drivers/net/ipxlat/dispatch.h
create mode 100644 drivers/net/ipxlat/translate_46.c
create mode 100644 drivers/net/ipxlat/translate_46.h
create mode 100644 drivers/net/ipxlat/translate_64.c
create mode 100644 drivers/net/ipxlat/translate_64.h
diff --git a/drivers/net/ipxlat/Makefile b/drivers/net/ipxlat/Makefile
index 90dbc0489fa2..d7b7097aee5f 100644
--- a/drivers/net/ipxlat/Makefile
+++ b/drivers/net/ipxlat/Makefile
@@ -7,3 +7,7 @@ obj-$(CONFIG_IPXLAT) := ipxlat.o
ipxlat-objs += main.o
ipxlat-objs += address.o
ipxlat-objs += packet.o
+ipxlat-objs += transport.o
+ipxlat-objs += dispatch.o
+ipxlat-objs += translate_46.o
+ipxlat-objs += translate_64.o
diff --git a/drivers/net/ipxlat/dispatch.c b/drivers/net/ipxlat/dispatch.c
new file mode 100644
index 000000000000..133d30859f49
--- /dev/null
+++ b/drivers/net/ipxlat/dispatch.c
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: GPL-2.0
+/* IPXLAT - Stateless IP/ICMP Translation (SIIT) virtual device driver
+ *
+ * Copyright (C) 2024- Alberto Leiva Popper <ydahhrk@gmail.com>
+ * Copyright (C) 2026- Mandelbit SRL
+ * Copyright (C) 2026- Daniel Gröber <dxld@darkboxed.org>
+ *
+ * Author: Alberto Leiva Popper <ydahhrk@gmail.com>
+ * Antonio Quartulli <antonio@mandelbit.com>
+ * Daniel Gröber <dxld@darkboxed.org>
+ * Ralf Lici <ralf@mandelbit.com>
+ */
+
+#include <net/ip.h>
+
+#include "dispatch.h"
+#include "packet.h"
+#include "translate_46.h"
+#include "translate_64.h"
+
+static enum ipxlat_action
+ipxlat_resolve_failed_action(const struct sk_buff *skb)
+{
+ return IPXLAT_ACT_DROP;
+}
+
+enum ipxlat_action ipxlat_translate(struct ipxlat_priv *ipxlat,
+ struct sk_buff *skb)
+{
+ const u16 proto = ntohs(skb->protocol);
+
+ memset(skb->cb, 0, sizeof(struct ipxlat_cb));
+
+ if (proto == ETH_P_IPV6) {
+ if (unlikely(ipxlat_v6_validate_skb(skb)) ||
+ unlikely(ipxlat_64_translate(ipxlat, skb)))
+ return ipxlat_resolve_failed_action(skb);
+
+ return IPXLAT_ACT_FWD;
+ } else if (likely(proto == ETH_P_IP)) {
+ if (unlikely(ipxlat_v4_validate_skb(ipxlat, skb)))
+ return ipxlat_resolve_failed_action(skb);
+
+ if (unlikely(ipxlat_46_translate(ipxlat, skb)))
+ return ipxlat_resolve_failed_action(skb);
+
+ return IPXLAT_ACT_FWD;
+ }
+
+ return IPXLAT_ACT_DROP;
+}
+
+/* mark current skb as drop-with-icmp and cache type/code/info for dispatch */
+void ipxlat_mark_icmp_drop(struct sk_buff *skb, u8 type, u8 code, u32 info)
+{
+ struct ipxlat_cb *cb = ipxlat_skb_cb(skb);
+
+ cb->emit_icmp_err = true;
+ cb->icmp_err.type = type;
+ cb->icmp_err.code = code;
+ cb->icmp_err.info = info;
+}
+
+static void ipxlat_forward_pkt(struct ipxlat_priv *ipxlat, struct sk_buff *skb)
+{
+ const unsigned int len = skb->len;
+ int err;
+
+ /* reinject as a fresh packet with scrubbed metadata */
+ skb_set_queue_mapping(skb, 0);
+ skb_scrub_packet(skb, false);
+
+ err = gro_cells_receive(&ipxlat->gro_cells, skb);
+ if (likely(err == NET_RX_SUCCESS))
+ dev_dstats_rx_add(ipxlat->dev, len);
+ /* on failure gro_cells updates rx drop stats internally */
+}
+
+int ipxlat_process_skb(struct ipxlat_priv *ipxlat, struct sk_buff *skb,
+ bool allow_pre_frag)
+{
+ enum ipxlat_action action;
+ int err = -EINVAL;
+
+ (void)allow_pre_frag;
+
+ action = ipxlat_translate(ipxlat, skb);
+ switch (action) {
+ case IPXLAT_ACT_FWD:
+ dev_dstats_tx_add(ipxlat->dev, skb->len);
+ ipxlat_forward_pkt(ipxlat, skb);
+ return 0;
+ case IPXLAT_ACT_DROP:
+ goto drop_free;
+ default:
+ DEBUG_NET_WARN_ON_ONCE(1);
+ goto drop_free;
+ }
+
+drop_free:
+ dev_dstats_tx_dropped(ipxlat->dev);
+ kfree_skb(skb);
+ return err;
+}
diff --git a/drivers/net/ipxlat/dispatch.h b/drivers/net/ipxlat/dispatch.h
new file mode 100644
index 000000000000..fa6fafea656b
--- /dev/null
+++ b/drivers/net/ipxlat/dispatch.h
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* IPXLAT - Stateless IP/ICMP Translation (SIIT) virtual device driver
+ *
+ * Copyright (C) 2024- Alberto Leiva Popper <ydahhrk@gmail.com>
+ * Copyright (C) 2026- Mandelbit SRL
+ * Copyright (C) 2026- Daniel Gröber <dxld@darkboxed.org>
+ *
+ * Author: Alberto Leiva Popper <ydahhrk@gmail.com>
+ * Antonio Quartulli <antonio@mandelbit.com>
+ * Daniel Gröber <dxld@darkboxed.org>
+ * Ralf Lici <ralf@mandelbit.com>
+ */
+
+#ifndef _NET_IPXLAT_DISPATCH_H_
+#define _NET_IPXLAT_DISPATCH_H_
+
+#include "ipxlpriv.h"
+
+struct sk_buff;
+
+/**
+ * enum ipxlat_action - result of packet translation dispatch
+ * @IPXLAT_ACT_DROP: drop the packet
+ * @IPXLAT_ACT_FWD: packet translated and ready for forward reinjection
+ * @IPXLAT_ACT_PRE_FRAG: packet must be fragmented before 4->6 translation
+ * @IPXLAT_ACT_ICMP_ERR: drop packet and emit translator-generated ICMP error
+ */
+enum ipxlat_action {
+ IPXLAT_ACT_DROP,
+ IPXLAT_ACT_FWD,
+ IPXLAT_ACT_PRE_FRAG,
+ IPXLAT_ACT_ICMP_ERR,
+};
+
+/**
+ * ipxlat_mark_icmp_drop - cache translator-generated ICMP action in skb cb
+ * @skb: packet being rejected
+ * @type: ICMP type to emit
+ * @code: ICMP code to emit
+ * @info: ICMP auxiliary info (pointer/MTU), host-endian
+ *
+ * This does not emit immediately; dispatch consumes the mark later and sends
+ * the ICMP error through the appropriate address family path.
+ */
+void ipxlat_mark_icmp_drop(struct sk_buff *skb, u8 type, u8 code, u32 info);
+
+/**
+ * ipxlat_translate - validate/translate one packet and return next action
+ * @ipxlat: translator private context
+ * @skb: packet to process
+ *
+ * Return: one of &enum ipxlat_action.
+ */
+enum ipxlat_action ipxlat_translate(struct ipxlat_priv *ipxlat,
+ struct sk_buff *skb);
+
+/**
+ * ipxlat_process_skb - top-level packet handler for ndo_start_xmit/reinjection
+ * @ipxlat: translator private context
+ * @skb: packet to process
+ * @allow_pre_frag: allow 4->6 pre-fragment action for this invocation
+ *
+ * The function always consumes @skb directly or through fragmentation
+ * callback/reinjection paths.
+ *
+ * Return: 0 on success, negative errno on processing failure.
+ */
+int ipxlat_process_skb(struct ipxlat_priv *ipxlat, struct sk_buff *skb,
+ bool allow_pre_frag);
+
+#endif /* _NET_IPXLAT_DISPATCH_H_ */
diff --git a/drivers/net/ipxlat/main.c b/drivers/net/ipxlat/main.c
index 26b7f5b6ff20..a1b4bcd39478 100644
--- a/drivers/net/ipxlat/main.c
+++ b/drivers/net/ipxlat/main.c
@@ -15,6 +15,7 @@
#include <net/ip.h>
+#include "dispatch.h"
#include "ipxlpriv.h"
#include "main.h"
@@ -56,8 +57,9 @@ static void ipxlat_dev_uninit(struct net_device *dev)
static int ipxlat_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
- dev_dstats_tx_dropped(dev);
- kfree_skb(skb);
+ struct ipxlat_priv *ipxlat = netdev_priv(dev);
+
+ ipxlat_process_skb(ipxlat, skb, true);
return NETDEV_TX_OK;
}
diff --git a/drivers/net/ipxlat/packet.c b/drivers/net/ipxlat/packet.c
index b9a9af1b3adb..b37a3e55aff8 100644
--- a/drivers/net/ipxlat/packet.c
+++ b/drivers/net/ipxlat/packet.c
@@ -13,6 +13,7 @@
#include <linux/icmp.h>
+#include "dispatch.h"
#include "packet.h"
/* Shift cached skb cb offsets by the L3 header delta after in-place rewrite.
diff --git a/drivers/net/ipxlat/translate_46.c b/drivers/net/ipxlat/translate_46.c
new file mode 100644
index 000000000000..aec8500db2c2
--- /dev/null
+++ b/drivers/net/ipxlat/translate_46.c
@@ -0,0 +1,198 @@
+// SPDX-License-Identifier: GPL-2.0
+/* IPXLAT - Stateless IP/ICMP Translation (SIIT) virtual device driver
+ *
+ * Copyright (C) 2024- Alberto Leiva Popper <ydahhrk@gmail.com>
+ * Copyright (C) 2026- Mandelbit SRL
+ * Copyright (C) 2026- Daniel Gröber <dxld@darkboxed.org>
+ *
+ * Author: Alberto Leiva Popper <ydahhrk@gmail.com>
+ * Antonio Quartulli <antonio@mandelbit.com>
+ * Daniel Gröber <dxld@darkboxed.org>
+ * Ralf Lici <ralf@mandelbit.com>
+ */
+
+#include <net/ip6_route.h>
+
+#include "address.h"
+#include "packet.h"
+#include "transport.h"
+#include "translate_46.h"
+
+u8 ipxlat_46_map_proto_to_nexthdr(u8 protocol)
+{
+ return (protocol == IPPROTO_ICMP) ? NEXTHDR_ICMP : protocol;
+}
+
+void ipxlat_46_build_frag_hdr(struct frag_hdr *fh6, const struct iphdr *hdr4,
+ u8 l4_proto)
+{
+ fh6->nexthdr = ipxlat_46_map_proto_to_nexthdr(l4_proto);
+ fh6->reserved = 0;
+ fh6->frag_off =
+ ipxlat_build_frag6_offset(ipxlat_get_frag4_offset(hdr4),
+ !!(be16_to_cpu(hdr4->frag_off) &
+ IP_MF));
+ fh6->identification = cpu_to_be32(be16_to_cpu(hdr4->id));
+}
+
+void ipxlat_46_build_l3(struct ipv6hdr *iph6, const struct iphdr *iph4,
+ unsigned int payload_len, u8 nexthdr, u8 hop_limit)
+{
+ iph6->version = 6;
+ iph6->priority = iph4->tos >> 4;
+ iph6->flow_lbl[0] = (iph4->tos & 0x0F) << 4;
+ iph6->flow_lbl[1] = 0;
+ iph6->flow_lbl[2] = 0;
+ iph6->payload_len = htons(payload_len);
+ iph6->nexthdr = nexthdr;
+ iph6->hop_limit = hop_limit;
+}
+
+/* Lookup post-translation IPv6 PMTU for 4->6 output decisions.
+ * Falls back to translator MTU on routing failures and clamps route MTU
+ * against translator egress MTU.
+ */
+unsigned int ipxlat_46_lookup_pmtu6(struct ipxlat_priv *ipxlat,
+ const struct sk_buff *skb,
+ const struct iphdr *in4)
+{
+ unsigned int mtu6, dev_mtu;
+ struct flowi6 fl6 = {};
+ struct dst_entry *dst;
+
+ dev_mtu = READ_ONCE(ipxlat->dev->mtu);
+
+ ipxlat_46_convert_addr(&ipxlat->xlat_prefix6, in4->saddr,
+ &fl6.saddr);
+ ipxlat_46_convert_addr(&ipxlat->xlat_prefix6, in4->daddr,
+ &fl6.daddr);
+ fl6.flowi6_mark = skb->mark;
+
+ dst = ip6_route_output(dev_net(ipxlat->dev), NULL, &fl6);
+ if (unlikely(dst->error)) {
+ mtu6 = dev_mtu;
+ goto out;
+ }
+
+ /* Route lookup can return a very large MTU (eg, local/loopback style
+ * routes) that does not reflect the translator egress constraint.
+ * Clamp with the translator device MTU so DF decisions are stable and
+ * pre-fragment planning never targets packets larger than what this
+ * interface can hand to the next stages.
+ */
+ mtu6 = min_t(unsigned int, dst_mtu(dst), dev_mtu);
+
+out:
+ dst_release(dst);
+ return mtu6;
+}
+
+/**
+ * ipxlat_46_translate - translate one validated packet from IPv4 to IPv6
+ * @ipxlat: translator private context
+ * @skb: packet to translate
+ *
+ * Rewrites outer L3 in place, rebases cached offsets and translates L4 on
+ * first fragments only.
+ *
+ * Return: 0 on success, negative errno on translation failure.
+ */
+int ipxlat_46_translate(struct ipxlat_priv *ipxlat, struct sk_buff *skb)
+{
+ unsigned int min_l4_len, old_l3_len, new_l3_len;
+ struct ipxlat_cb *cb = ipxlat_skb_cb(skb);
+ const struct iphdr outer4 = *ip_hdr(skb);
+ const u8 in_l4_proto = cb->l4_proto;
+ bool has_frag, first_frag;
+ struct frag_hdr *fh6;
+ struct ipv6hdr *iph6;
+ int l3_delta, err;
+ u8 out_l4_proto;
+
+ /* snapshot the original IPv4 header fields before skb layout changes */
+ has_frag = ip_is_fragment(&outer4);
+ first_frag = ipxlat_is_first_frag4(&outer4);
+ out_l4_proto = ipxlat_46_map_proto_to_nexthdr(in_l4_proto);
+
+ old_l3_len = cb->l3_hdr_len;
+ new_l3_len = sizeof(struct ipv6hdr) +
+ (has_frag ? sizeof(struct frag_hdr) : 0);
+ l3_delta = (int)new_l3_len - (int)old_l3_len;
+
+ /* make room for the new hdrs */
+ if (unlikely(skb_cow_head(skb, max_t(int, 0, l3_delta))))
+ return -ENOMEM;
+
+ /* replace outer L3 area: drop IPv4 hdr, reserve IPv6(+Frag) hdr */
+ skb_pull(skb, old_l3_len);
+ skb_push(skb, new_l3_len);
+ skb_reset_network_header(skb);
+ skb_set_transport_header(skb, new_l3_len);
+ skb->protocol = htons(ETH_P_IPV6);
+
+ /* build outer IPv6 base hdr from translated IPv4 fields */
+ iph6 = ipv6_hdr(skb);
+ ipxlat_46_build_l3(iph6, &outer4, skb->len - sizeof(*iph6),
+ out_l4_proto, outer4.ttl - 1);
+
+ /* translate IPv4 endpoints into IPv6 addresses using xlat_prefix6 */
+ ipxlat_46_convert_addrs(&ipxlat->xlat_prefix6, &outer4, iph6);
+
+ /* add IPv6 fragment hdr when the IPv4 packet carried fragmentation */
+ if (unlikely(has_frag)) {
+ iph6->nexthdr = NEXTHDR_FRAGMENT;
+
+ fh6 = (struct frag_hdr *)(iph6 + 1);
+ ipxlat_46_build_frag_hdr(fh6, &outer4, in_l4_proto);
+ cb->fragh_off = sizeof(struct ipv6hdr);
+ }
+
+ /* Rebase cached offsets after L3 size delta.
+ * For outer 4->6 translation this should not underflow: cached offsets
+ * were built from l3_off + ip4_len(+...) and delta = ip6_len - ip4_len,
+ * so ip4_len cancels out after rebasing. A failure here means internal
+ * metadata inconsistency, not a packet validation outcome.
+ */
+ err = ipxlat_cb_rebase_offsets(cb, l3_delta);
+ if (unlikely(err)) {
+ DEBUG_NET_WARN_ON_ONCE(1);
+ return err;
+ }
+
+ cb->l3_hdr_len = new_l3_len;
+ cb->l4_proto = out_l4_proto;
+ DEBUG_NET_WARN_ON_ONCE(!ipxlat_cb_offsets_valid(cb));
+
+ /* non-first fragments have no transport header to translate */
+ if (unlikely(!first_frag))
+ goto out;
+
+ /* ensure transport bytes are writable before L4 csum/proto rewrites */
+ min_l4_len = ipxlat_l4_min_len(in_l4_proto);
+ if (unlikely(skb_ensure_writable(skb, skb_transport_offset(skb) +
+ min_l4_len)))
+ return -ENOMEM;
+
+ /* translate transport hdr and pseudohdr dependent checksums */
+ switch (in_l4_proto) {
+ case IPPROTO_TCP:
+ err = ipxlat_46_outer_tcp(skb, &outer4);
+ break;
+ case IPPROTO_UDP:
+ err = ipxlat_46_outer_udp(skb, &outer4);
+ break;
+ case IPPROTO_ICMP:
+ err = ipxlat_46_icmp(ipxlat, skb);
+ break;
+ default:
+ err = 0;
+ break;
+ }
+ if (unlikely(err))
+ return err;
+
+out:
+ /* normalize checksum/offload metadata for the translated frame */
+ return ipxlat_finalize_offload(skb, in_l4_proto, has_frag,
+ SKB_GSO_TCPV4, SKB_GSO_TCPV6);
+}
diff --git a/drivers/net/ipxlat/translate_46.h b/drivers/net/ipxlat/translate_46.h
new file mode 100644
index 000000000000..75def10d0cad
--- /dev/null
+++ b/drivers/net/ipxlat/translate_46.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* IPXLAT - Stateless IP/ICMP Translation (SIIT) virtual device driver
+ *
+ * Copyright (C) 2024- Alberto Leiva Popper <ydahhrk@gmail.com>
+ * Copyright (C) 2026- Mandelbit SRL
+ * Copyright (C) 2026- Daniel Gröber <dxld@darkboxed.org>
+ *
+ * Author: Alberto Leiva Popper <ydahhrk@gmail.com>
+ * Antonio Quartulli <antonio@mandelbit.com>
+ * Daniel Gröber <dxld@darkboxed.org>
+ * Ralf Lici <ralf@mandelbit.com>
+ */
+
+#ifndef _NET_IPXLAT_TRANSLATE_46_H_
+#define _NET_IPXLAT_TRANSLATE_46_H_
+
+#include "ipxlpriv.h"
+
+struct iphdr;
+struct ipv6hdr;
+struct frag_hdr;
+struct sk_buff;
+
+/**
+ * ipxlat_46_map_proto_to_nexthdr - map IPv4 L4 protocol to IPv6 nexthdr
+ * @protocol: IPv4 L4 protocol
+ *
+ * Return: IPv6 next-header value corresponding to @protocol.
+ */
+u8 ipxlat_46_map_proto_to_nexthdr(u8 protocol);
+
+/**
+ * ipxlat_46_build_frag_hdr - build IPv6 Fragment Header from IPv4 fragment info
+ * @fh6: output IPv6 fragment header
+ * @hdr4: source IPv4 header
+ * @l4_proto: original IPv4 L4 protocol
+ */
+void ipxlat_46_build_frag_hdr(struct frag_hdr *fh6, const struct iphdr *hdr4,
+ u8 l4_proto);
+
+/**
+ * ipxlat_46_build_l3 - build translated outer IPv6 header from IPv4 metadata
+ * @iph6: output IPv6 header
+ * @iph4: source IPv4 header
+ * @payload_len: IPv6 payload length
+ * @nexthdr: resulting IPv6 nexthdr
+ * @hop_limit: resulting IPv6 hop limit
+ */
+void ipxlat_46_build_l3(struct ipv6hdr *iph6, const struct iphdr *iph4,
+ unsigned int payload_len, u8 nexthdr, u8 hop_limit);
+
+/**
+ * ipxlat_46_lookup_pmtu6 - lookup post-translation IPv6 PMTU for a 4->6 packet
+ * @ipxlat: translator private context
+ * @skb: packet being translated
+ * @in4: source IPv4 header snapshot
+ *
+ * Return: effective PMTU clamped against translator device MTU.
+ */
+unsigned int ipxlat_46_lookup_pmtu6(struct ipxlat_priv *ipxlat,
+ const struct sk_buff *skb,
+ const struct iphdr *in4);
+
+/**
+ * ipxlat_46_translate - translate outer packet from IPv4 to IPv6 in place
+ * @ipxlat: translator private context
+ * @skb: packet to translate
+ *
+ * Return: 0 on success, negative errno on translation failure.
+ */
+int ipxlat_46_translate(struct ipxlat_priv *ipxlat, struct sk_buff *skb);
+
+#endif /* _NET_IPXLAT_TRANSLATE_46_H_ */
diff --git a/drivers/net/ipxlat/translate_64.c b/drivers/net/ipxlat/translate_64.c
new file mode 100644
index 000000000000..50a95fb75f9d
--- /dev/null
+++ b/drivers/net/ipxlat/translate_64.c
@@ -0,0 +1,205 @@
+// SPDX-License-Identifier: GPL-2.0
+/* IPXLAT - Stateless IP/ICMP Translation (SIIT) virtual device driver
+ *
+ * Copyright (C) 2024- Alberto Leiva Popper <ydahhrk@gmail.com>
+ * Copyright (C) 2026- Mandelbit SRL
+ * Copyright (C) 2026- Daniel Gröber <dxld@darkboxed.org>
+ *
+ * Author: Alberto Leiva Popper <ydahhrk@gmail.com>
+ * Antonio Quartulli <antonio@mandelbit.com>
+ * Daniel Gröber <dxld@darkboxed.org>
+ * Ralf Lici <ralf@mandelbit.com>
+ */
+
+#include <linux/icmpv6.h>
+#include <net/ip.h>
+
+#include "translate_64.h"
+#include "address.h"
+#include "packet.h"
+#include "transport.h"
+
+u8 ipxlat_64_map_nexthdr_proto(u8 nexthdr)
+{
+ return (nexthdr == NEXTHDR_ICMP) ? IPPROTO_ICMP : nexthdr;
+}
+
+void ipxlat_64_build_l3(struct iphdr *iph4, const struct ipv6hdr *iph6,
+ unsigned int tot_len, __be16 frag_off, u8 protocol,
+ __be32 saddr, __be32 daddr, u8 ttl, __be16 id)
+{
+ iph4->version = 4;
+ iph4->ihl = 5;
+ iph4->tos = ipxlat_get_ipv6_tclass(iph6);
+ iph4->tot_len = cpu_to_be16(tot_len);
+ iph4->frag_off = frag_off;
+ iph4->ttl = ttl;
+ iph4->protocol = protocol;
+ iph4->saddr = saddr;
+ iph4->daddr = daddr;
+ iph4->id = id;
+ iph4->check = 0;
+ iph4->check = ip_fast_csum(iph4, iph4->ihl);
+}
+
+static __be16 ipxlat_64_build_frag_off(const struct sk_buff *skb,
+ const struct frag_hdr *frag6,
+ u8 l4_proto)
+{
+ bool df, mf, over_mtu;
+ u16 frag_offset;
+
+ /* preserve real IPv6 fragmentation state with a Fragment Header */
+ if (frag6) {
+ mf = !!(be16_to_cpu(frag6->frag_off) & IP6_MF);
+ frag_offset = ipxlat_get_frag6_offset(frag6);
+ return ipxlat_build_frag4_offset(false, mf, frag_offset);
+ }
+
+ /* frag_list implies segmented payload emitted as fragments */
+ if (skb_has_frag_list(skb))
+ return ipxlat_build_frag4_offset(false, false, 0);
+
+ if (skb_is_gso(skb)) {
+ /* GSO frames are one datagram here; set DF only for TCP
+ * when later segmentation exceeds IPv6 minimum MTU
+ */
+ df = (l4_proto == IPPROTO_TCP) &&
+ (ipxlat_skb_cb(skb)->payload_off +
+ skb_shinfo(skb)->gso_size >
+ (IPV6_MIN_MTU - sizeof(struct iphdr)));
+ return ipxlat_build_frag4_offset(df, false, 0);
+ }
+
+ over_mtu = skb->len > (IPV6_MIN_MTU - sizeof(struct iphdr));
+ return ipxlat_build_frag4_offset(over_mtu, false, 0);
+}
+
+/**
+ * ipxlat_64_translate - translate one validated packet from IPv6 to IPv4
+ * @ipxlat: translator private context
+ * @skb: packet to translate
+ *
+ * Rewrites outer L3 in place, rebases cached offsets and translates L4 on
+ * first fragments only.
+ *
+ * Return: 0 on success, negative errno on translation failure.
+ */
+int ipxlat_64_translate(struct ipxlat_priv *ipxlat, struct sk_buff *skb)
+{
+ unsigned int min_l4_len, old_l3_len, new_l3_len;
+ struct ipxlat_cb *cb = ipxlat_skb_cb(skb);
+ struct ipv6hdr outer6 = *ipv6_hdr(skb);
+ bool is_icmp_err, has_frag, first_frag;
+ u8 in_l4_proto, out_l4_proto;
+ struct frag_hdr frag_copy;
+ struct frag_hdr *frag6;
+ __be32 saddr, daddr;
+ __be16 frag_off, id;
+ struct iphdr *iph4;
+ int l3_delta, err;
+
+ /* snapshot original outer IPv6 fields before L3 rewrite */
+ frag6 = cb->fragh_off ? (struct frag_hdr *)(skb->data + cb->fragh_off) :
+ NULL;
+ has_frag = !!frag6;
+ in_l4_proto = cb->l4_proto;
+ is_icmp_err = cb->is_icmp_err;
+ out_l4_proto = ipxlat_64_map_nexthdr_proto(in_l4_proto);
+
+ old_l3_len = cb->l3_hdr_len;
+ new_l3_len = sizeof(struct iphdr);
+ l3_delta = (int)new_l3_len - (int)old_l3_len;
+
+ if (unlikely(has_frag))
+ frag_copy = *frag6;
+ first_frag = ipxlat_is_first_frag6(has_frag ? &frag_copy : NULL);
+
+ if (unlikely(is_icmp_err)) {
+ if (unlikely(in_l4_proto != NEXTHDR_ICMP))
+ return -EINVAL;
+ }
+
+ /* derive translated IPv4 endpoints */
+ err = ipxlat_64_convert_addrs(&ipxlat->xlat_prefix6, &outer6,
+ is_icmp_err, &saddr, &daddr);
+ if (unlikely(err))
+ return err;
+
+ /* replace outer IPv6 hdr with IPv4 hdr in-place */
+ skb_pull(skb, old_l3_len);
+ skb_push(skb, new_l3_len);
+ skb_reset_network_header(skb);
+ skb_set_transport_header(skb, new_l3_len);
+ skb->protocol = htons(ETH_P_IP);
+
+ /* Rebase cached offsets after L3 size delta.
+ * For outer 6->4 translation this should not underflow: cached offsets
+ * were built from l3_off + ip6_len (+ ...), and
+ * delta = sizeof(struct iphdr) - ip6_len, so ip6_len cancels out after
+ * rebasing. A failure here means internal metadata inconsistency, not
+ * a packet validation outcome.
+ */
+ err = ipxlat_cb_rebase_offsets(cb, l3_delta);
+ if (unlikely(err)) {
+ DEBUG_NET_WARN_ON_ONCE(1);
+ return err;
+ }
+
+ cb->l3_hdr_len = sizeof(struct iphdr);
+ cb->fragh_off = 0;
+ cb->l4_proto = out_l4_proto;
+ DEBUG_NET_WARN_ON_ONCE(!ipxlat_cb_offsets_valid(cb));
+
+ /* build outer IPv4 base hdr from translated IPv6 fields */
+ iph4 = ip_hdr(skb);
+ frag_off = ipxlat_64_build_frag_off(skb, has_frag ? &frag_copy : NULL,
+ out_l4_proto);
+ /* when source had Fragment Header we preserve its identification;
+ * otherwise allocate a fresh IPv4 ID for the translated packet
+ */
+ id = has_frag ? cpu_to_be16(be32_to_cpu(frag_copy.identification)) : 0;
+ ipxlat_64_build_l3(iph4, &outer6, skb->len, frag_off,
+ out_l4_proto, saddr, daddr,
+ outer6.hop_limit - 1, id);
+
+ if (likely(!has_frag)) {
+ iph4->id = 0;
+ __ip_select_ident(dev_net(ipxlat->dev), iph4, 1);
+ iph4->check = 0;
+ iph4->check = ip_fast_csum(iph4, iph4->ihl);
+ }
+
+ /* non-first fragments have no transport header to translate */
+ if (unlikely(!first_frag))
+ goto out;
+
+ /* ensure transport bytes are writable before L4 csum/proto rewrites */
+ min_l4_len = ipxlat_l4_min_len(out_l4_proto);
+ if (unlikely(skb_ensure_writable(skb, skb_transport_offset(skb) +
+ min_l4_len)))
+ return -ENOMEM;
+
+ /* translate transport hdr and pseudohdr dependent checksums */
+ switch (out_l4_proto) {
+ case IPPROTO_TCP:
+ err = ipxlat_64_outer_tcp(skb, &outer6);
+ break;
+ case IPPROTO_UDP:
+ err = ipxlat_64_outer_udp(skb, &outer6);
+ break;
+ case IPPROTO_ICMP:
+ err = ipxlat_64_icmp(ipxlat, skb, &outer6);
+ break;
+ default:
+ err = 0;
+ break;
+ }
+ if (unlikely(err))
+ return err;
+
+out:
+ /* normalize checksum/offload metadata for the translated frame */
+ return ipxlat_finalize_offload(skb, out_l4_proto, ip_is_fragment(iph4),
+ SKB_GSO_TCPV6, SKB_GSO_TCPV4);
+}
diff --git a/drivers/net/ipxlat/translate_64.h b/drivers/net/ipxlat/translate_64.h
new file mode 100644
index 000000000000..269d1955944f
--- /dev/null
+++ b/drivers/net/ipxlat/translate_64.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* IPXLAT - Stateless IP/ICMP Translation (SIIT) virtual device driver
+ *
+ * Copyright (C) 2024- Alberto Leiva Popper <ydahhrk@gmail.com>
+ * Copyright (C) 2026- Mandelbit SRL
+ * Copyright (C) 2026- Daniel Gröber <dxld@darkboxed.org>
+ *
+ * Author: Alberto Leiva Popper <ydahhrk@gmail.com>
+ * Antonio Quartulli <antonio@mandelbit.com>
+ * Daniel Gröber <dxld@darkboxed.org>
+ * Ralf Lici <ralf@mandelbit.com>
+ */
+
+#ifndef _NET_IPXLAT_TRANSLATE_64_H_
+#define _NET_IPXLAT_TRANSLATE_64_H_
+
+#include "ipxlpriv.h"
+
+struct sk_buff;
+struct iphdr;
+struct ipv6hdr;
+
+/**
+ * ipxlat_64_build_l3 - build translated outer IPv4 header from IPv6 metadata
+ * @iph4: output IPv4 header
+ * @iph6: source IPv6 header
+ * @tot_len: resulting IPv4 total length
+ * @frag_off: resulting IPv4 fragment offset/flags
+ * @protocol: resulting IPv4 L4 protocol
+ * @saddr: resulting IPv4 source address
+ * @daddr: resulting IPv4 destination address
+ * @ttl: resulting IPv4 TTL
+ * @id: resulting IPv4 identification field
+ */
+void ipxlat_64_build_l3(struct iphdr *iph4, const struct ipv6hdr *iph6,
+ unsigned int tot_len, __be16 frag_off, u8 protocol,
+ __be32 saddr, __be32 daddr, u8 ttl, __be16 id);
+
+/**
+ * ipxlat_64_translate - translate outer packet from IPv6 to IPv4 in place
+ * @ipxlat: translator private context
+ * @skb: packet to translate
+ *
+ * Return: 0 on success, negative errno on translation failure.
+ */
+int ipxlat_64_translate(struct ipxlat_priv *ipxlat, struct sk_buff *skb);
+
+/**
+ * ipxlat_64_map_nexthdr_proto - map IPv6 nexthdr to IPv4 L4 protocol
+ * @nexthdr: IPv6 next-header value
+ *
+ * Return: IPv4 protocol value corresponding to @nexthdr.
+ */
+u8 ipxlat_64_map_nexthdr_proto(u8 nexthdr);
+
+#endif /* _NET_IPXLAT_TRANSLATE_64_H_ */
diff --git a/drivers/net/ipxlat/transport.c b/drivers/net/ipxlat/transport.c
index 3aa00c635916..78548d0b8c22 100644
--- a/drivers/net/ipxlat/transport.c
+++ b/drivers/net/ipxlat/transport.c
@@ -338,3 +338,14 @@ int ipxlat_64_inner_udp(struct sk_buff *skb, const struct ipv6hdr *in6,
udp_new->check = CSUM_MANGLED_0;
return 0;
}
+
+int ipxlat_46_icmp(struct ipxlat_priv *ipxlat, struct sk_buff *skb)
+{
+ return -EPROTONOSUPPORT;
+}
+
+int ipxlat_64_icmp(struct ipxlat_priv *ipxlat, struct sk_buff *skb,
+ const struct ipv6hdr *outer6)
+{
+ return -EPROTONOSUPPORT;
+}
diff --git a/drivers/net/ipxlat/transport.h b/drivers/net/ipxlat/transport.h
index 9b6fe422b01f..0e69b98eafd0 100644
--- a/drivers/net/ipxlat/transport.h
+++ b/drivers/net/ipxlat/transport.h
@@ -100,4 +100,9 @@ int ipxlat_64_inner_tcp(struct sk_buff *skb, const struct ipv6hdr *in6,
int ipxlat_64_inner_udp(struct sk_buff *skb, const struct ipv6hdr *in6,
const struct iphdr *out4, struct udphdr *udp_new);
+/* temporary ICMP stubs until ICMP translation support is introduced */
+int ipxlat_46_icmp(struct ipxlat_priv *ipxlat, struct sk_buff *skb);
+int ipxlat_64_icmp(struct ipxlat_priv *ipxlat, struct sk_buff *skb,
+ const struct ipv6hdr *outer6);
+
#endif /* _NET_IPXLAT_TRANSPORT_H_ */
--
2.53.0
next prev parent reply other threads:[~2026-03-19 15:20 UTC|newest]
Thread overview: 18+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-03-19 15:12 [RFC net-next 00/15] Introducing ipxlat: a stateless IPv4/IPv6 translation device Ralf Lici
2026-03-19 15:12 ` [RFC net-next 01/15] drivers/net: add ipxlat netdevice skeleton and build plumbing Ralf Lici
2026-03-19 15:12 ` [RFC net-next 02/15] ipxlat: add RFC 6052 address conversion helpers Ralf Lici
2026-03-19 15:12 ` [RFC net-next 03/15] ipxlat: add packet metadata control block helpers Ralf Lici
2026-03-19 15:12 ` [RFC net-next 04/15] ipxlat: add IPv4 packet validation path Ralf Lici
2026-03-19 15:12 ` [RFC net-next 05/15] ipxlat: add IPv6 " Ralf Lici
2026-03-19 15:12 ` [RFC net-next 06/15] ipxlat: add transport checksum and offload helpers Ralf Lici
2026-03-19 15:12 ` [RFC net-next 07/15] ipxlat: add 4to6 and 6to4 TCP/UDP translation helpers Ralf Lici
2026-03-19 15:12 ` Ralf Lici [this message]
2026-03-19 15:12 ` [RFC net-next 09/15] ipxlat: emit translator-generated ICMP errors on drop Ralf Lici
2026-03-19 15:12 ` [RFC net-next 10/15] ipxlat: add 4to6 pre-fragmentation path Ralf Lici
2026-03-19 15:12 ` [RFC net-next 11/15] ipxlat: add ICMP informational translation paths Ralf Lici
2026-03-19 15:12 ` [RFC net-next 12/15] ipxlat: add ICMP error translation and quoted-inner handling Ralf Lici
2026-03-19 15:12 ` [RFC net-next 13/15] ipxlat: add netlink control plane and uapi Ralf Lici
2026-03-19 15:12 ` [RFC net-next 14/15] selftests: net: add ipxlat coverage Ralf Lici
2026-03-19 15:12 ` [RFC net-next 15/15] Documentation: networking: add ipxlat translator guide Ralf Lici
2026-03-19 22:11 ` Jonathan Corbet
2026-03-24 9:55 ` Ralf Lici
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260319151230.655687-9-ralf@mandelbit.com \
--to=ralf@mandelbit.com \
--cc=andrew+netdev@lunn.ch \
--cc=antonio@mandelbit.com \
--cc=davem@davemloft.net \
--cc=dxld@darkboxed.org \
--cc=edumazet@google.com \
--cc=kuba@kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox