public inbox for linux-kselftest@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/3] Add RFC-7597 Section 5.1 PSID support
       [not found] <20210705103959.GG18022@breakpoint.cc>
@ 2021-07-16  0:27 ` Cole Dishington
  2021-07-16  0:27   ` [PATCH 1/3] net: netfilter: Add RFC-7597 Section 5.1 PSID support xtables API Cole Dishington
                     ` (2 more replies)
  0 siblings, 3 replies; 15+ messages in thread
From: Cole Dishington @ 2021-07-16  0:27 UTC (permalink / raw)
  To: pablo
  Cc: kadlec, fw, davem, kuba, shuah, linux-kernel, netfilter-devel,
	coreteam, netdev, linux-kselftest, Cole Dishington

Thanks for your time reviewing!

Changes in v4:
- Handle special case of no offset bits (a=0 / A=2^16).

Cole Dishington (3):
  net: netfilter: Add RFC-7597 Section 5.1 PSID support xtables API
  net: netfilter: Add RFC-7597 Section 5.1 PSID support
  selftests: netfilter: Add RFC-7597 Section 5.1 PSID selftests

 include/uapi/linux/netfilter/nf_nat.h         |   3 +-
 net/netfilter/nf_nat_core.c                   |  39 +++-
 net/netfilter/nf_nat_masquerade.c             |  20 +-
 net/netfilter/xt_MASQUERADE.c                 |  44 ++++-
 .../netfilter/nat_masquerade_psid.sh          | 182 ++++++++++++++++++
 5 files changed, 276 insertions(+), 12 deletions(-)
 create mode 100644 tools/testing/selftests/netfilter/nat_masquerade_psid.sh

-- 
2.32.0


^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH 1/3] net: netfilter: Add RFC-7597 Section 5.1 PSID support xtables API
  2021-07-16  0:27 ` [PATCH 0/3] Add RFC-7597 Section 5.1 PSID support Cole Dishington
@ 2021-07-16  0:27   ` Cole Dishington
  2021-07-16  0:27   ` [PATCH 2/3] net: netfilter: Add RFC-7597 Section 5.1 PSID support Cole Dishington
  2021-07-16  0:27   ` [PATCH " Cole Dishington
  2 siblings, 0 replies; 15+ messages in thread
From: Cole Dishington @ 2021-07-16  0:27 UTC (permalink / raw)
  To: pablo
  Cc: kadlec, fw, davem, kuba, shuah, linux-kernel, netfilter-devel,
	coreteam, netdev, linux-kselftest, Cole Dishington,
	Anthony Lineham, Scott Parlane, Blair Steven

Add support for revision 2 of xtables masquerade extension.

Co-developed-by: Anthony Lineham <anthony.lineham@alliedtelesis.co.nz>
Signed-off-by: Anthony Lineham <anthony.lineham@alliedtelesis.co.nz>
Co-developed-by: Scott Parlane <scott.parlane@alliedtelesis.co.nz>
Signed-off-by: Scott Parlane <scott.parlane@alliedtelesis.co.nz>
Signed-off-by: Blair Steven <blair.steven@alliedtelesis.co.nz>
Signed-off-by: Cole Dishington <Cole.Dishington@alliedtelesis.co.nz>
---
 include/uapi/linux/netfilter/nf_nat.h |  3 +-
 net/netfilter/xt_MASQUERADE.c         | 44 ++++++++++++++++++++++++---
 2 files changed, 41 insertions(+), 6 deletions(-)

diff --git a/include/uapi/linux/netfilter/nf_nat.h b/include/uapi/linux/netfilter/nf_nat.h
index a64586e77b24..660e53ffdb57 100644
--- a/include/uapi/linux/netfilter/nf_nat.h
+++ b/include/uapi/linux/netfilter/nf_nat.h
@@ -12,6 +12,7 @@
 #define NF_NAT_RANGE_PROTO_RANDOM_FULLY		(1 << 4)
 #define NF_NAT_RANGE_PROTO_OFFSET		(1 << 5)
 #define NF_NAT_RANGE_NETMAP			(1 << 6)
+#define NF_NAT_RANGE_PSID			(1 << 7)
 
 #define NF_NAT_RANGE_PROTO_RANDOM_ALL		\
 	(NF_NAT_RANGE_PROTO_RANDOM | NF_NAT_RANGE_PROTO_RANDOM_FULLY)
@@ -20,7 +21,7 @@
 	(NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED |	\
 	 NF_NAT_RANGE_PROTO_RANDOM | NF_NAT_RANGE_PERSISTENT |	\
 	 NF_NAT_RANGE_PROTO_RANDOM_FULLY | NF_NAT_RANGE_PROTO_OFFSET | \
-	 NF_NAT_RANGE_NETMAP)
+	 NF_NAT_RANGE_NETMAP | NF_NAT_RANGE_PSID)
 
 struct nf_nat_ipv4_range {
 	unsigned int			flags;
diff --git a/net/netfilter/xt_MASQUERADE.c b/net/netfilter/xt_MASQUERADE.c
index eae05c178336..dc6870ca2b71 100644
--- a/net/netfilter/xt_MASQUERADE.c
+++ b/net/netfilter/xt_MASQUERADE.c
@@ -16,7 +16,7 @@ MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
 MODULE_DESCRIPTION("Xtables: automatic-address SNAT");
 
 /* FIXME: Multiple targets. --RR */
-static int masquerade_tg_check(const struct xt_tgchk_param *par)
+static int masquerade_tg_check_v0(const struct xt_tgchk_param *par)
 {
 	const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
 
@@ -31,8 +31,19 @@ static int masquerade_tg_check(const struct xt_tgchk_param *par)
 	return nf_ct_netns_get(par->net, par->family);
 }
 
+static int masquerade_tg_check_v1(const struct xt_tgchk_param *par)
+{
+	const struct nf_nat_range2 *range = par->targinfo;
+
+	if (range->flags & NF_NAT_RANGE_MAP_IPS) {
+		pr_debug("bad MAP_IPS.\n");
+		return -EINVAL;
+	}
+	return nf_ct_netns_get(par->net, par->family);
+}
+
 static unsigned int
-masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par)
+masquerade_tg_v0(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	struct nf_nat_range2 range;
 	const struct nf_nat_ipv4_multi_range_compat *mr;
@@ -46,6 +57,15 @@ masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par)
 				      xt_out(par));
 }
 
+static unsigned int
+masquerade_tg_v1(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	const struct nf_nat_range2 *range = par->targinfo;
+
+	return nf_nat_masquerade_ipv4(skb, xt_hooknum(par), range,
+				      xt_out(par));
+}
+
 static void masquerade_tg_destroy(const struct xt_tgdtor_param *par)
 {
 	nf_ct_netns_put(par->net, par->family);
@@ -73,6 +93,7 @@ static struct xt_target masquerade_tg_reg[] __read_mostly = {
 	{
 #if IS_ENABLED(CONFIG_IPV6)
 		.name		= "MASQUERADE",
+		.revision	= 0,
 		.family		= NFPROTO_IPV6,
 		.target		= masquerade_tg6,
 		.targetsize	= sizeof(struct nf_nat_range),
@@ -84,15 +105,28 @@ static struct xt_target masquerade_tg_reg[] __read_mostly = {
 	}, {
 #endif
 		.name		= "MASQUERADE",
+		.revision	= 0,
 		.family		= NFPROTO_IPV4,
-		.target		= masquerade_tg,
+		.target		= masquerade_tg_v0,
 		.targetsize	= sizeof(struct nf_nat_ipv4_multi_range_compat),
 		.table		= "nat",
 		.hooks		= 1 << NF_INET_POST_ROUTING,
-		.checkentry	= masquerade_tg_check,
+		.checkentry	= masquerade_tg_check_v0,
 		.destroy	= masquerade_tg_destroy,
 		.me		= THIS_MODULE,
-	}
+	},
+	{
+		.name		= "MASQUERADE",
+		.revision	= 1,
+		.family		= NFPROTO_IPV4,
+		.target		= masquerade_tg_v1,
+		.targetsize	= sizeof(struct nf_nat_range2),
+		.table		= "nat",
+		.hooks		= 1 << NF_INET_POST_ROUTING,
+		.checkentry	= masquerade_tg_check_v1,
+		.destroy	= masquerade_tg_destroy,
+		.me		= THIS_MODULE,
+	},
 };
 
 static int __init masquerade_tg_init(void)
-- 
2.32.0


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 2/3] net: netfilter: Add RFC-7597 Section 5.1 PSID support
  2021-07-16  0:27 ` [PATCH 0/3] Add RFC-7597 Section 5.1 PSID support Cole Dishington
  2021-07-16  0:27   ` [PATCH 1/3] net: netfilter: Add RFC-7597 Section 5.1 PSID support xtables API Cole Dishington
@ 2021-07-16  0:27   ` Cole Dishington
  2021-07-16 15:18     ` Florian Westphal
  2021-07-16  0:27   ` [PATCH " Cole Dishington
  2 siblings, 1 reply; 15+ messages in thread
From: Cole Dishington @ 2021-07-16  0:27 UTC (permalink / raw)
  To: pablo
  Cc: kadlec, fw, davem, kuba, shuah, linux-kernel, netfilter-devel,
	coreteam, netdev, linux-kselftest, Cole Dishington,
	Anthony Lineham, Scott Parlane, Blair Steven

Adds support for masquerading into a smaller subset of ports -
defined by the PSID values from RFC-7597 Section 5.1. This is part of
the support for MAP-E and Lightweight 4over6, which allows multiple
devices to share an IPv4 address by splitting the L4 port / id into
ranges.

Co-developed-by: Anthony Lineham <anthony.lineham@alliedtelesis.co.nz>
Signed-off-by: Anthony Lineham <anthony.lineham@alliedtelesis.co.nz>
Co-developed-by: Scott Parlane <scott.parlane@alliedtelesis.co.nz>
Signed-off-by: Scott Parlane <scott.parlane@alliedtelesis.co.nz>
Signed-off-by: Blair Steven <blair.steven@alliedtelesis.co.nz>
Signed-off-by: Cole Dishington <Cole.Dishington@alliedtelesis.co.nz>
---

Notes:
    Thanks for your time reviewing!
    
    Changes in v4:
    - Add support for a=0 (A=2^16) special case.
    - Now that offset=0 is used for the special case of 2^16 (as it cannot fit
      in a u16) the divide by zero issues are no longer present.

 net/netfilter/nf_nat_core.c       | 39 +++++++++++++++++++++++++++----
 net/netfilter/nf_nat_masquerade.c | 20 ++++++++++++++--
 2 files changed, 53 insertions(+), 6 deletions(-)

diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 7de595ead06a..4a9448684504 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -195,13 +195,36 @@ static bool nf_nat_inet_in_range(const struct nf_conntrack_tuple *t,
 static bool l4proto_in_range(const struct nf_conntrack_tuple *tuple,
 			     enum nf_nat_manip_type maniptype,
 			     const union nf_conntrack_man_proto *min,
-			     const union nf_conntrack_man_proto *max)
+			     const union nf_conntrack_man_proto *max,
+			     const union nf_conntrack_man_proto *base,
+			     bool is_psid)
 {
 	__be16 port;
+	u16 psid, psid_mask, offset_mask;
+
+	/* In this case we are in PSID mode, avoid checking all ranges by computing bitmasks */
+	if (is_psid) {
+		u16 power_j = ntohs(max->all) - ntohs(min->all) + 1;
+		u32 offset = ntohs(base->all);
+		u16 power_a;
+
+		if (offset == 0)
+			offset = 1 << 16;
+
+		power_a = (1 << 16) / offset;
+		offset_mask = (power_a - 1) * offset;
+		psid_mask = ((offset / power_j) << 1) - 1;
+		psid = ntohs(min->all) & psid_mask;
+	}
 
 	switch (tuple->dst.protonum) {
 	case IPPROTO_ICMP:
 	case IPPROTO_ICMPV6:
+		if (is_psid) {
+			return (offset_mask == 0 ||
+				(ntohs(tuple->src.u.icmp.id) & offset_mask) != 0) &&
+				((ntohs(tuple->src.u.icmp.id) & psid_mask) == psid);
+		}
 		return ntohs(tuple->src.u.icmp.id) >= ntohs(min->icmp.id) &&
 		       ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id);
 	case IPPROTO_GRE: /* all fall though */
@@ -215,6 +238,10 @@ static bool l4proto_in_range(const struct nf_conntrack_tuple *tuple,
 		else
 			port = tuple->dst.u.all;
 
+		if (is_psid) {
+			return (offset_mask == 0 || (ntohs(port) & offset_mask) != 0) &&
+				((ntohs(port) & psid_mask) == psid);
+		}
 		return ntohs(port) >= ntohs(min->all) &&
 		       ntohs(port) <= ntohs(max->all);
 	default:
@@ -239,7 +266,8 @@ static int in_range(const struct nf_conntrack_tuple *tuple,
 		return 1;
 
 	return l4proto_in_range(tuple, NF_NAT_MANIP_SRC,
-				&range->min_proto, &range->max_proto);
+				&range->min_proto, &range->max_proto, &range->base_proto,
+				range->flags & NF_NAT_RANGE_PSID);
 }
 
 static inline int
@@ -532,8 +560,11 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
 		if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) {
 			if (!(range->flags & NF_NAT_RANGE_PROTO_OFFSET) &&
 			    l4proto_in_range(tuple, maniptype,
-			          &range->min_proto,
-			          &range->max_proto) &&
+				  &range->min_proto,
+				  &range->max_proto,
+				  &range->base_proto,
+				  range->flags &
+				  NF_NAT_RANGE_PSID) &&
 			    (range->min_proto.all == range->max_proto.all ||
 			     !nf_nat_used_tuple(tuple, ct)))
 				return;
diff --git a/net/netfilter/nf_nat_masquerade.c b/net/netfilter/nf_nat_masquerade.c
index 8e8a65d46345..7e2fb0da344a 100644
--- a/net/netfilter/nf_nat_masquerade.c
+++ b/net/netfilter/nf_nat_masquerade.c
@@ -55,8 +55,24 @@ nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
 	newrange.flags       = range->flags | NF_NAT_RANGE_MAP_IPS;
 	newrange.min_addr.ip = newsrc;
 	newrange.max_addr.ip = newsrc;
-	newrange.min_proto   = range->min_proto;
-	newrange.max_proto   = range->max_proto;
+
+	if (range->flags & NF_NAT_RANGE_PSID) {
+		u16 base = ntohs(range->base_proto.all);
+		u16 min =  ntohs(range->min_proto.all);
+		u16 off = 0;
+
+		/* If offset=0, port range is in one contiguous block */
+		if (base)
+			off = prandom_u32() % (((1 << 16) / base) - 1);
+
+		newrange.min_proto.all   = htons(min + base * off);
+		newrange.max_proto.all   = htons(ntohs(newrange.min_proto.all) + ntohs(range->max_proto.all) - min);
+		newrange.base_proto      = range->base_proto;
+		newrange.flags           = newrange.flags | NF_NAT_RANGE_PROTO_SPECIFIED;
+	} else {
+		newrange.min_proto       = range->min_proto;
+		newrange.max_proto       = range->max_proto;
+	}
 
 	/* Hand modified range to generic setup. */
 	return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC);
-- 
2.32.0


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 3/3] selftests: netfilter: Add RFC-7597 Section 5.1 PSID selftests
  2021-07-16  0:27 ` [PATCH 0/3] Add RFC-7597 Section 5.1 PSID support Cole Dishington
  2021-07-16  0:27   ` [PATCH 1/3] net: netfilter: Add RFC-7597 Section 5.1 PSID support xtables API Cole Dishington
  2021-07-16  0:27   ` [PATCH 2/3] net: netfilter: Add RFC-7597 Section 5.1 PSID support Cole Dishington
@ 2021-07-16  0:27   ` Cole Dishington
  2 siblings, 0 replies; 15+ messages in thread
From: Cole Dishington @ 2021-07-16  0:27 UTC (permalink / raw)
  To: pablo
  Cc: kadlec, fw, davem, kuba, shuah, linux-kernel, netfilter-devel,
	coreteam, netdev, linux-kselftest, Cole Dishington

Add selftests for masquerading into a smaller subset of ports defined by
PSID.

Signed-off-by: Cole Dishington <Cole.Dishington@alliedtelesis.co.nz>
---

Notes:
    Thanks for your time reviewing!
    
    Changes in v4:
    - Add tests for a=0 (A=2^16) special case.
    - Update to use offset_length (from iptables change).

 .../netfilter/nat_masquerade_psid.sh          | 182 ++++++++++++++++++
 1 file changed, 182 insertions(+)
 create mode 100644 tools/testing/selftests/netfilter/nat_masquerade_psid.sh

diff --git a/tools/testing/selftests/netfilter/nat_masquerade_psid.sh b/tools/testing/selftests/netfilter/nat_masquerade_psid.sh
new file mode 100644
index 000000000000..56c1b509caf6
--- /dev/null
+++ b/tools/testing/selftests/netfilter/nat_masquerade_psid.sh
@@ -0,0 +1,182 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# <:copyright-gpl
+# Copyright (C) 2021 Allied Telesis Labs NZ
+#
+# check that NAT can masquerade using PSID defined ranges.
+#
+# Setup is:
+#
+# nsclient1(veth0) -> (veth1)nsrouter(veth2) -> (veth0)nsclient2
+# Setup a nat masquerade rule with psid defined ranges.
+#
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+ret=0
+ns_all="nsclient1 nsrouter nsclient2"
+
+readonly infile="$(mktemp)"
+readonly outfile="$(mktemp)"
+readonly datalen=32
+readonly server_port=8080
+
+conntrack -V > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run test without conntrack tool"
+	exit $ksft_skip
+fi
+
+iptables --version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run test without iptables tool"
+	exit $ksft_skip
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run test without ip tool"
+	exit $ksft_skip
+fi
+
+ipv4() {
+	echo -n 192.168.$1.$2
+}
+
+cleanup() {
+	for n in $ns_all; do ip netns del $n;done
+
+	if [ -f "${outfile}" ]; then
+		rm "$outfile"
+	fi
+	if [ -f "${infile}" ]; then
+		rm "$infile"
+	fi
+}
+
+server_listen() {
+	ip netns exec nsclient2 nc -l -p "$server_port" > "$outfile" &
+	server_pid=$!
+	sleep 0.2
+}
+
+client_connect() {
+	ip netns exec nsclient1 timeout 2 nc -w 1 -p "$port" $(ipv4 2 2) "$server_port" < $infile
+}
+
+verify_data() {
+	local _ret=0
+	wait "$server_pid"
+	cmp "$infile" "$outfile" 2>/dev/null
+	_ret=$?
+	rm "$outfile"
+	return $_ret
+}
+
+test_service() {
+	server_listen
+	client_connect
+	verify_data
+}
+
+check_connection() {
+	local _ret=0
+	entry=$(ip netns exec nsrouter conntrack -p tcp --sport $port -L 2>&1)
+	entry=${entry##*sport=8080 dport=}
+	entry=${entry%% *}
+
+	if [[ "x$(( ($entry & $psid_mask) / $two_power_j ))" != "x$psid" ]]; then
+		_ret=1
+		echo "Failed psid mask check for $offset_len:$psid:$psid_length with port $entry"
+	fi
+
+	if [[ "x$_ret" = "x0" ]] &&
+	   [[ "x$offset_mask" != "x0" -a "x$(( ($entry & $offset_mask) ))" == "x0" ]]; then
+		_ret=1
+		echo "Failed offset mask check for $offset_len:$psid:$psid_length with port $entry"
+	fi
+	return $_ret
+}
+
+run_test() {
+	ip netns exec nsrouter iptables -A FORWARD -i veth1 -j ACCEPT
+	ip netns exec nsrouter iptables -P FORWARD DROP
+	ip netns exec nsrouter iptables -A FORWARD -m state --state ESTABLISHED,RELATED -j ACCEPT
+	ip netns exec nsrouter iptables -t nat --new psid
+	ip netns exec nsrouter iptables -t nat --insert psid -j MASQUERADE \
+		--psid $offset_len:$psid:$psid_length
+	ip netns exec nsrouter iptables -t nat -I POSTROUTING -o veth2 -j psid
+
+	# calculate psid mask
+	offset=$(( 1 << (16 - $offset_len) ))
+	two_power_j=$(( $offset / (1 << $psid_length) ))
+	offset_mask=$(( ( (1 << $offset_len) - 1 ) << (16 - $offset_len) ))
+	psid_mask=$(( ( (1 << $psid_length) - 1) * $two_power_j ))
+
+	# Create file
+	dd if=/dev/urandom of="${infile}" bs="${datalen}" count=1 >/dev/null 2>&1
+
+	# Test multiple ports
+	for p in 1 2 3 4 5; do
+		port=1080$p
+
+		test_service
+		if [ $? -ne 0 ]; then
+			ret=1
+			break
+		fi
+
+		check_connection
+		if [ $? -ne 0 ]; then
+			ret=1
+			break
+		fi
+	done
+
+	# tidy up test rules
+	ip netns exec nsrouter iptables -F
+	ip netns exec nsrouter iptables -t nat -F
+	ip netns exec nsrouter iptables -t nat -X psid
+}
+
+for n in $ns_all; do
+	ip netns add $n
+	ip -net $n link set lo up
+done
+
+for i in 1 2; do
+	ip link add veth0 netns nsclient$i type veth peer name veth$i netns nsrouter
+
+	ip -net nsclient$i link set veth0 up
+	ip -net nsclient$i addr add $(ipv4 $i 2)/24 dev veth0
+
+	ip -net nsrouter link set veth$i up
+	ip -net nsrouter addr add $(ipv4 $i 1)/24 dev veth$i
+done
+
+ip -net nsclient1 route add default via $(ipv4 1 1)
+ip -net nsclient2 route add default via $(ipv4 2 1)
+
+ip netns exec nsrouter sysctl -q net.ipv4.conf.all.forwarding=1
+
+offset_len=0
+psid_length=8
+for psid in 0 52; do
+	run_test
+	if [ $? -ne 0 ]; then
+		break
+	fi
+done
+
+offset_len=6
+psid_length=8
+for psid in 0 52; do
+	run_test
+	if [ $? -ne 0 ]; then
+		break
+	fi
+done
+
+cleanup
+exit $ret
-- 
2.32.0


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH 2/3] net: netfilter: Add RFC-7597 Section 5.1 PSID support
  2021-07-16  0:27   ` [PATCH 2/3] net: netfilter: Add RFC-7597 Section 5.1 PSID support Cole Dishington
@ 2021-07-16 15:18     ` Florian Westphal
  2021-07-19  1:21       ` Cole Dishington
  0 siblings, 1 reply; 15+ messages in thread
From: Florian Westphal @ 2021-07-16 15:18 UTC (permalink / raw)
  To: Cole Dishington
  Cc: pablo, kadlec, fw, davem, kuba, shuah, linux-kernel,
	netfilter-devel, coreteam, netdev, linux-kselftest,
	Anthony Lineham, Scott Parlane, Blair Steven

Cole Dishington <Cole.Dishington@alliedtelesis.co.nz> wrote:
> diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
> index 7de595ead06a..4a9448684504 100644
> --- a/net/netfilter/nf_nat_core.c
> +++ b/net/netfilter/nf_nat_core.c
> @@ -195,13 +195,36 @@ static bool nf_nat_inet_in_range(const struct nf_conntrack_tuple *t,
>  static bool l4proto_in_range(const struct nf_conntrack_tuple *tuple,
>  			     enum nf_nat_manip_type maniptype,
>  			     const union nf_conntrack_man_proto *min,
> -			     const union nf_conntrack_man_proto *max)
> +			     const union nf_conntrack_man_proto *max,
> +			     const union nf_conntrack_man_proto *base,
> +			     bool is_psid)
>  {
>  	__be16 port;
> +	u16 psid, psid_mask, offset_mask;
> +
> +	/* In this case we are in PSID mode, avoid checking all ranges by computing bitmasks */
> +	if (is_psid) {
> +		u16 power_j = ntohs(max->all) - ntohs(min->all) + 1;
> +		u32 offset = ntohs(base->all);
> +		u16 power_a;
> +
> +		if (offset == 0)
> +			offset = 1 << 16;
> +
> +		power_a = (1 << 16) / offset;

Since the dividie is only needed nat setup and not for each packet I
think its ok.

> +	if (range->flags & NF_NAT_RANGE_PSID) {
> +		u16 base = ntohs(range->base_proto.all);
> +		u16 min =  ntohs(range->min_proto.all);
> +		u16 off = 0;
> +
> +		/* If offset=0, port range is in one contiguous block */
> +		if (base)
> +			off = prandom_u32() % (((1 << 16) / base) - 1);

Bases 32769 > gives 0 for the modulo value, so perhaps compute that
independently.

You could reject > 32769 in the iptables checkentry target.

Also, base of 21846 and above always give 0 result (% 1).

I don't know psid well enough to give a recommendation here.

If such inputs are nonsensical, just reject it when userspace asks for
this and add a 

if (WARN_ON_ONCE(base > bogus))
	return NF_DROP;

with s small coment explaining that xtables is supposed to not provide
such value.

Other than this I think its ok.

I still dislike the 'bool is_psid' in the nat core, but I can't find
a better solution.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH 2/3] net: netfilter: Add RFC-7597 Section 5.1 PSID support
  2021-07-16 15:18     ` Florian Westphal
@ 2021-07-19  1:21       ` Cole Dishington
  2021-07-22  7:17         ` Florian Westphal
  0 siblings, 1 reply; 15+ messages in thread
From: Cole Dishington @ 2021-07-19  1:21 UTC (permalink / raw)
  To: pablo
  Cc: kadlec, fw, davem, kuba, shuah, linux-kernel, netfilter-devel,
	coreteam, netdev, linux-kselftest, Cole Dishington,
	Anthony Lineham, Scott Parlane, Blair Steven

Adds support for masquerading into a smaller subset of ports -
defined by the PSID values from RFC-7597 Section 5.1. This is part of
the support for MAP-E and Lightweight 4over6, which allows multiple
devices to share an IPv4 address by splitting the L4 port / id into
ranges.

Co-developed-by: Anthony Lineham <anthony.lineham@alliedtelesis.co.nz>
Signed-off-by: Anthony Lineham <anthony.lineham@alliedtelesis.co.nz>
Co-developed-by: Scott Parlane <scott.parlane@alliedtelesis.co.nz>
Signed-off-by: Scott Parlane <scott.parlane@alliedtelesis.co.nz>
Signed-off-by: Blair Steven <blair.steven@alliedtelesis.co.nz>
Signed-off-by: Cole Dishington <Cole.Dishington@alliedtelesis.co.nz>
---

Notes:
    Thanks for time reviewing!
    
    Changes in v5:
    - Add WARN_ON_ONCE for invalid value of range->base.

 net/netfilter/nf_nat_core.c       | 39 +++++++++++++++++++++++++++----
 net/netfilter/nf_nat_masquerade.c | 27 +++++++++++++++++++--
 2 files changed, 60 insertions(+), 6 deletions(-)

diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 7de595ead06a..4a9448684504 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -195,13 +195,36 @@ static bool nf_nat_inet_in_range(const struct nf_conntrack_tuple *t,
 static bool l4proto_in_range(const struct nf_conntrack_tuple *tuple,
 			     enum nf_nat_manip_type maniptype,
 			     const union nf_conntrack_man_proto *min,
-			     const union nf_conntrack_man_proto *max)
+			     const union nf_conntrack_man_proto *max,
+			     const union nf_conntrack_man_proto *base,
+			     bool is_psid)
 {
 	__be16 port;
+	u16 psid, psid_mask, offset_mask;
+
+	/* In this case we are in PSID mode, avoid checking all ranges by computing bitmasks */
+	if (is_psid) {
+		u16 power_j = ntohs(max->all) - ntohs(min->all) + 1;
+		u32 offset = ntohs(base->all);
+		u16 power_a;
+
+		if (offset == 0)
+			offset = 1 << 16;
+
+		power_a = (1 << 16) / offset;
+		offset_mask = (power_a - 1) * offset;
+		psid_mask = ((offset / power_j) << 1) - 1;
+		psid = ntohs(min->all) & psid_mask;
+	}
 
 	switch (tuple->dst.protonum) {
 	case IPPROTO_ICMP:
 	case IPPROTO_ICMPV6:
+		if (is_psid) {
+			return (offset_mask == 0 ||
+				(ntohs(tuple->src.u.icmp.id) & offset_mask) != 0) &&
+				((ntohs(tuple->src.u.icmp.id) & psid_mask) == psid);
+		}
 		return ntohs(tuple->src.u.icmp.id) >= ntohs(min->icmp.id) &&
 		       ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id);
 	case IPPROTO_GRE: /* all fall though */
@@ -215,6 +238,10 @@ static bool l4proto_in_range(const struct nf_conntrack_tuple *tuple,
 		else
 			port = tuple->dst.u.all;
 
+		if (is_psid) {
+			return (offset_mask == 0 || (ntohs(port) & offset_mask) != 0) &&
+				((ntohs(port) & psid_mask) == psid);
+		}
 		return ntohs(port) >= ntohs(min->all) &&
 		       ntohs(port) <= ntohs(max->all);
 	default:
@@ -239,7 +266,8 @@ static int in_range(const struct nf_conntrack_tuple *tuple,
 		return 1;
 
 	return l4proto_in_range(tuple, NF_NAT_MANIP_SRC,
-				&range->min_proto, &range->max_proto);
+				&range->min_proto, &range->max_proto, &range->base_proto,
+				range->flags & NF_NAT_RANGE_PSID);
 }
 
 static inline int
@@ -532,8 +560,11 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
 		if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) {
 			if (!(range->flags & NF_NAT_RANGE_PROTO_OFFSET) &&
 			    l4proto_in_range(tuple, maniptype,
-			          &range->min_proto,
-			          &range->max_proto) &&
+				  &range->min_proto,
+				  &range->max_proto,
+				  &range->base_proto,
+				  range->flags &
+				  NF_NAT_RANGE_PSID) &&
 			    (range->min_proto.all == range->max_proto.all ||
 			     !nf_nat_used_tuple(tuple, ct)))
 				return;
diff --git a/net/netfilter/nf_nat_masquerade.c b/net/netfilter/nf_nat_masquerade.c
index 8e8a65d46345..dea6106f1699 100644
--- a/net/netfilter/nf_nat_masquerade.c
+++ b/net/netfilter/nf_nat_masquerade.c
@@ -55,8 +55,31 @@ nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
 	newrange.flags       = range->flags | NF_NAT_RANGE_MAP_IPS;
 	newrange.min_addr.ip = newsrc;
 	newrange.max_addr.ip = newsrc;
-	newrange.min_proto   = range->min_proto;
-	newrange.max_proto   = range->max_proto;
+
+	if (range->flags & NF_NAT_RANGE_PSID) {
+		u16 base = ntohs(range->base_proto.all);
+		u16 min =  ntohs(range->min_proto.all);
+		u16 off = 0;
+
+		/* xtables should stop base > 2^15 by enforcement of
+		 * 0 <= offset_len < 16 argument, with offset_len=0
+		 * as a special case inwhich base=0.
+		 */
+		if (WARN_ON_ONCE(base > (1 << 15)))
+			return NF_DROP;
+
+		/* If offset=0, port range is in one contiguous block */
+		if (base)
+			off = prandom_u32() % (((1 << 16) / base) - 1);
+
+		newrange.min_proto.all   = htons(min + base * off);
+		newrange.max_proto.all   = htons(ntohs(newrange.min_proto.all) + ntohs(range->max_proto.all) - min);
+		newrange.base_proto      = range->base_proto;
+		newrange.flags           = newrange.flags | NF_NAT_RANGE_PROTO_SPECIFIED;
+	} else {
+		newrange.min_proto       = range->min_proto;
+		newrange.max_proto       = range->max_proto;
+	}
 
 	/* Hand modified range to generic setup. */
 	return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC);
-- 
2.32.0


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH 2/3] net: netfilter: Add RFC-7597 Section 5.1 PSID support
  2021-07-19  1:21       ` Cole Dishington
@ 2021-07-22  7:17         ` Florian Westphal
  2021-07-25 23:28           ` Cole Dishington
  0 siblings, 1 reply; 15+ messages in thread
From: Florian Westphal @ 2021-07-22  7:17 UTC (permalink / raw)
  To: Cole Dishington
  Cc: pablo, kadlec, fw, davem, kuba, shuah, linux-kernel,
	netfilter-devel, coreteam, netdev, linux-kselftest,
	Anthony Lineham, Scott Parlane, Blair Steven

Cole Dishington <Cole.Dishington@alliedtelesis.co.nz> wrote:
> Adds support for masquerading into a smaller subset of ports -
> defined by the PSID values from RFC-7597 Section 5.1. This is part of
> the support for MAP-E and Lightweight 4over6, which allows multiple
> devices to share an IPv4 address by splitting the L4 port / id into
> ranges.
> 
> Co-developed-by: Anthony Lineham <anthony.lineham@alliedtelesis.co.nz>
> Signed-off-by: Anthony Lineham <anthony.lineham@alliedtelesis.co.nz>
> Co-developed-by: Scott Parlane <scott.parlane@alliedtelesis.co.nz>
> Signed-off-by: Scott Parlane <scott.parlane@alliedtelesis.co.nz>
> Signed-off-by: Blair Steven <blair.steven@alliedtelesis.co.nz>
> Signed-off-by: Cole Dishington <Cole.Dishington@alliedtelesis.co.nz>
> ---
> +
> +	/* In this case we are in PSID mode, avoid checking all ranges by computing bitmasks */
> +	if (is_psid) {
> +		u16 power_j = ntohs(max->all) - ntohs(min->all) + 1;

I think this needs to be 'u32 power_j' to prevent overflow of
65535 + 1 -> 0.

> +		if (base)
> +			off = prandom_u32() % (((1 << 16) / base) - 1);

I think this can use prandom_u32_max(((1 << 16) / base) - 1).

I have no other comments.  Other kernel patches LGTM.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH 2/3] net: netfilter: Add RFC-7597 Section 5.1 PSID support
  2021-07-22  7:17         ` Florian Westphal
@ 2021-07-25 23:28           ` Cole Dishington
  2021-07-26 14:37             ` Florian Westphal
  0 siblings, 1 reply; 15+ messages in thread
From: Cole Dishington @ 2021-07-25 23:28 UTC (permalink / raw)
  To: pablo
  Cc: kadlec, fw, davem, kuba, shuah, linux-kernel, netfilter-devel,
	coreteam, netdev, linux-kselftest, Cole Dishington,
	Anthony Lineham, Scott Parlane, Blair Steven

Adds support for masquerading into a smaller subset of ports -
defined by the PSID values from RFC-7597 Section 5.1. This is part of
the support for MAP-E and Lightweight 4over6, which allows multiple
devices to share an IPv4 address by splitting the L4 port / id into
ranges.

Co-developed-by: Anthony Lineham <anthony.lineham@alliedtelesis.co.nz>
Signed-off-by: Anthony Lineham <anthony.lineham@alliedtelesis.co.nz>
Co-developed-by: Scott Parlane <scott.parlane@alliedtelesis.co.nz>
Signed-off-by: Scott Parlane <scott.parlane@alliedtelesis.co.nz>
Signed-off-by: Blair Steven <blair.steven@alliedtelesis.co.nz>
Signed-off-by: Cole Dishington <Cole.Dishington@alliedtelesis.co.nz>
---

Notes:
    Thanks for your time reviewing!
    
    Changes in v6:
    - Use prandom_u32_max() rather than prandom_u32() % max for generating PSID sub-range offset.
    - Use u32 for power_j for the case of a=0,psid_len=0.

 net/netfilter/nf_nat_core.c       | 39 +++++++++++++++++++++++++++----
 net/netfilter/nf_nat_masquerade.c | 27 +++++++++++++++++++--
 2 files changed, 60 insertions(+), 6 deletions(-)

diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 7de595ead06a..f07a3473aab5 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -195,13 +195,36 @@ static bool nf_nat_inet_in_range(const struct nf_conntrack_tuple *t,
 static bool l4proto_in_range(const struct nf_conntrack_tuple *tuple,
 			     enum nf_nat_manip_type maniptype,
 			     const union nf_conntrack_man_proto *min,
-			     const union nf_conntrack_man_proto *max)
+			     const union nf_conntrack_man_proto *max,
+			     const union nf_conntrack_man_proto *base,
+			     bool is_psid)
 {
 	__be16 port;
+	u16 psid, psid_mask, offset_mask;
+
+	/* In this case we are in PSID mode, avoid checking all ranges by computing bitmasks */
+	if (is_psid) {
+		u32 power_j = ntohs(max->all) - ntohs(min->all) + 1;
+		u32 offset = ntohs(base->all);
+		u16 power_a;
+
+		if (offset == 0)
+			offset = 1 << 16;
+
+		power_a = (1 << 16) / offset;
+		offset_mask = (power_a - 1) * offset;
+		psid_mask = ((offset / power_j) << 1) - 1;
+		psid = ntohs(min->all) & psid_mask;
+	}
 
 	switch (tuple->dst.protonum) {
 	case IPPROTO_ICMP:
 	case IPPROTO_ICMPV6:
+		if (is_psid) {
+			return (offset_mask == 0 ||
+				(ntohs(tuple->src.u.icmp.id) & offset_mask) != 0) &&
+				((ntohs(tuple->src.u.icmp.id) & psid_mask) == psid);
+		}
 		return ntohs(tuple->src.u.icmp.id) >= ntohs(min->icmp.id) &&
 		       ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id);
 	case IPPROTO_GRE: /* all fall though */
@@ -215,6 +238,10 @@ static bool l4proto_in_range(const struct nf_conntrack_tuple *tuple,
 		else
 			port = tuple->dst.u.all;
 
+		if (is_psid) {
+			return (offset_mask == 0 || (ntohs(port) & offset_mask) != 0) &&
+				((ntohs(port) & psid_mask) == psid);
+		}
 		return ntohs(port) >= ntohs(min->all) &&
 		       ntohs(port) <= ntohs(max->all);
 	default:
@@ -239,7 +266,8 @@ static int in_range(const struct nf_conntrack_tuple *tuple,
 		return 1;
 
 	return l4proto_in_range(tuple, NF_NAT_MANIP_SRC,
-				&range->min_proto, &range->max_proto);
+				&range->min_proto, &range->max_proto, &range->base_proto,
+				range->flags & NF_NAT_RANGE_PSID);
 }
 
 static inline int
@@ -532,8 +560,11 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
 		if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) {
 			if (!(range->flags & NF_NAT_RANGE_PROTO_OFFSET) &&
 			    l4proto_in_range(tuple, maniptype,
-			          &range->min_proto,
-			          &range->max_proto) &&
+				  &range->min_proto,
+				  &range->max_proto,
+				  &range->base_proto,
+				  range->flags &
+				  NF_NAT_RANGE_PSID) &&
 			    (range->min_proto.all == range->max_proto.all ||
 			     !nf_nat_used_tuple(tuple, ct)))
 				return;
diff --git a/net/netfilter/nf_nat_masquerade.c b/net/netfilter/nf_nat_masquerade.c
index 8e8a65d46345..19a4754cda76 100644
--- a/net/netfilter/nf_nat_masquerade.c
+++ b/net/netfilter/nf_nat_masquerade.c
@@ -55,8 +55,31 @@ nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
 	newrange.flags       = range->flags | NF_NAT_RANGE_MAP_IPS;
 	newrange.min_addr.ip = newsrc;
 	newrange.max_addr.ip = newsrc;
-	newrange.min_proto   = range->min_proto;
-	newrange.max_proto   = range->max_proto;
+
+	if (range->flags & NF_NAT_RANGE_PSID) {
+		u16 base = ntohs(range->base_proto.all);
+		u16 min =  ntohs(range->min_proto.all);
+		u16 off = 0;
+
+		/* xtables should stop base > 2^15 by enforcement of
+		 * 0 <= offset_len < 16 argument, with offset_len=0
+		 * as a special case inwhich base=0.
+		 */
+		if (WARN_ON_ONCE(base > (1 << 15)))
+			return NF_DROP;
+
+		/* If offset=0, port range is in one contiguous block */
+		if (base)
+			off = prandom_u32_max(((1 << 16) / base) - 1);
+
+		newrange.min_proto.all   = htons(min + base * off);
+		newrange.max_proto.all   = htons(ntohs(newrange.min_proto.all) + ntohs(range->max_proto.all) - min);
+		newrange.base_proto      = range->base_proto;
+		newrange.flags           = newrange.flags | NF_NAT_RANGE_PROTO_SPECIFIED;
+	} else {
+		newrange.min_proto       = range->min_proto;
+		newrange.max_proto       = range->max_proto;
+	}
 
 	/* Hand modified range to generic setup. */
 	return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC);
-- 
2.32.0


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH 2/3] net: netfilter: Add RFC-7597 Section 5.1 PSID support
  2021-07-25 23:28           ` Cole Dishington
@ 2021-07-26 14:37             ` Florian Westphal
  2021-08-09  4:10               ` [PATCH net-next 0/3] " Cole Dishington
  0 siblings, 1 reply; 15+ messages in thread
From: Florian Westphal @ 2021-07-26 14:37 UTC (permalink / raw)
  To: Cole Dishington
  Cc: pablo, kadlec, fw, davem, kuba, shuah, linux-kernel,
	netfilter-devel, coreteam, netdev, linux-kselftest,
	Anthony Lineham, Scott Parlane, Blair Steven

Cole Dishington <Cole.Dishington@alliedtelesis.co.nz> wrote:
> Adds support for masquerading into a smaller subset of ports -
> defined by the PSID values from RFC-7597 Section 5.1. This is part of
> the support for MAP-E and Lightweight 4over6, which allows multiple
> devices to share an IPv4 address by splitting the L4 port / id into
> ranges.
> 
> Co-developed-by: Anthony Lineham <anthony.lineham@alliedtelesis.co.nz>
> Signed-off-by: Anthony Lineham <anthony.lineham@alliedtelesis.co.nz>
> Co-developed-by: Scott Parlane <scott.parlane@alliedtelesis.co.nz>
> Signed-off-by: Scott Parlane <scott.parlane@alliedtelesis.co.nz>
> Signed-off-by: Blair Steven <blair.steven@alliedtelesis.co.nz>
> Signed-off-by: Cole Dishington <Cole.Dishington@alliedtelesis.co.nz>
> ---

Thanks for your patience and addressing all the comments.

Reviewed-by: Florian Westphal <fw@strlen.de>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH net-next 0/3] Add RFC-7597 Section 5.1 PSID support
  2021-07-26 14:37             ` Florian Westphal
@ 2021-08-09  4:10               ` Cole Dishington
  2021-08-09  4:10                 ` [PATCH net-next 1/3] net: netfilter: Add RFC-7597 Section 5.1 PSID support xtables API Cole Dishington
                                   ` (2 more replies)
  0 siblings, 3 replies; 15+ messages in thread
From: Cole Dishington @ 2021-08-09  4:10 UTC (permalink / raw)
  To: pablo
  Cc: kadlec, fw, davem, kuba, shuah, Cole.Dishington, linux-kernel,
	netfilter-devel, coreteam, netdev, linux-kselftest

Thanks for your time reviewing!

Changes in v7:
- Added net-next to subject.
- Added Reviewed-by: Florian Westphal <fw@strlen.de> to patch 2/3.

Cole Dishington (3):
  net: netfilter: Add RFC-7597 Section 5.1 PSID support xtables API
  net: netfilter: Add RFC-7597 Section 5.1 PSID support
  selftests: netfilter: Add RFC-7597 Section 5.1 PSID selftests

 include/uapi/linux/netfilter/nf_nat.h         |   3 +-
 net/netfilter/nf_nat_core.c                   |  39 +++-
 net/netfilter/nf_nat_masquerade.c             |  27 ++-
 net/netfilter/xt_MASQUERADE.c                 |  44 ++++-
 .../netfilter/nat_masquerade_psid.sh          | 182 ++++++++++++++++++
 5 files changed, 283 insertions(+), 12 deletions(-)
 create mode 100644 tools/testing/selftests/netfilter/nat_masquerade_psid.sh

-- 
2.32.0


^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH net-next 1/3] net: netfilter: Add RFC-7597 Section 5.1 PSID support xtables API
  2021-08-09  4:10               ` [PATCH net-next 0/3] " Cole Dishington
@ 2021-08-09  4:10                 ` Cole Dishington
  2021-08-09  4:10                 ` [PATCH net-next 2/3] net: netfilter: Add RFC-7597 Section 5.1 PSID support Cole Dishington
  2021-08-09  4:10                 ` [PATCH net-next 3/3] selftests: netfilter: Add RFC-7597 Section 5.1 PSID selftests Cole Dishington
  2 siblings, 0 replies; 15+ messages in thread
From: Cole Dishington @ 2021-08-09  4:10 UTC (permalink / raw)
  To: pablo
  Cc: kadlec, fw, davem, kuba, shuah, Cole.Dishington, linux-kernel,
	netfilter-devel, coreteam, netdev, linux-kselftest,
	Anthony Lineham, Scott Parlane, Blair Steven

Add support for revision 2 of xtables masquerade extension.

Co-developed-by: Anthony Lineham <anthony.lineham@alliedtelesis.co.nz>
Signed-off-by: Anthony Lineham <anthony.lineham@alliedtelesis.co.nz>
Co-developed-by: Scott Parlane <scott.parlane@alliedtelesis.co.nz>
Signed-off-by: Scott Parlane <scott.parlane@alliedtelesis.co.nz>
Signed-off-by: Blair Steven <blair.steven@alliedtelesis.co.nz>
Signed-off-by: Cole Dishington <Cole.Dishington@alliedtelesis.co.nz>
---

Notes:
    No changes.

 include/uapi/linux/netfilter/nf_nat.h |  3 +-
 net/netfilter/xt_MASQUERADE.c         | 44 ++++++++++++++++++++++++---
 2 files changed, 41 insertions(+), 6 deletions(-)

diff --git a/include/uapi/linux/netfilter/nf_nat.h b/include/uapi/linux/netfilter/nf_nat.h
index a64586e77b24..660e53ffdb57 100644
--- a/include/uapi/linux/netfilter/nf_nat.h
+++ b/include/uapi/linux/netfilter/nf_nat.h
@@ -12,6 +12,7 @@
 #define NF_NAT_RANGE_PROTO_RANDOM_FULLY		(1 << 4)
 #define NF_NAT_RANGE_PROTO_OFFSET		(1 << 5)
 #define NF_NAT_RANGE_NETMAP			(1 << 6)
+#define NF_NAT_RANGE_PSID			(1 << 7)
 
 #define NF_NAT_RANGE_PROTO_RANDOM_ALL		\
 	(NF_NAT_RANGE_PROTO_RANDOM | NF_NAT_RANGE_PROTO_RANDOM_FULLY)
@@ -20,7 +21,7 @@
 	(NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED |	\
 	 NF_NAT_RANGE_PROTO_RANDOM | NF_NAT_RANGE_PERSISTENT |	\
 	 NF_NAT_RANGE_PROTO_RANDOM_FULLY | NF_NAT_RANGE_PROTO_OFFSET | \
-	 NF_NAT_RANGE_NETMAP)
+	 NF_NAT_RANGE_NETMAP | NF_NAT_RANGE_PSID)
 
 struct nf_nat_ipv4_range {
 	unsigned int			flags;
diff --git a/net/netfilter/xt_MASQUERADE.c b/net/netfilter/xt_MASQUERADE.c
index eae05c178336..dc6870ca2b71 100644
--- a/net/netfilter/xt_MASQUERADE.c
+++ b/net/netfilter/xt_MASQUERADE.c
@@ -16,7 +16,7 @@ MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
 MODULE_DESCRIPTION("Xtables: automatic-address SNAT");
 
 /* FIXME: Multiple targets. --RR */
-static int masquerade_tg_check(const struct xt_tgchk_param *par)
+static int masquerade_tg_check_v0(const struct xt_tgchk_param *par)
 {
 	const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
 
@@ -31,8 +31,19 @@ static int masquerade_tg_check(const struct xt_tgchk_param *par)
 	return nf_ct_netns_get(par->net, par->family);
 }
 
+static int masquerade_tg_check_v1(const struct xt_tgchk_param *par)
+{
+	const struct nf_nat_range2 *range = par->targinfo;
+
+	if (range->flags & NF_NAT_RANGE_MAP_IPS) {
+		pr_debug("bad MAP_IPS.\n");
+		return -EINVAL;
+	}
+	return nf_ct_netns_get(par->net, par->family);
+}
+
 static unsigned int
-masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par)
+masquerade_tg_v0(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	struct nf_nat_range2 range;
 	const struct nf_nat_ipv4_multi_range_compat *mr;
@@ -46,6 +57,15 @@ masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par)
 				      xt_out(par));
 }
 
+static unsigned int
+masquerade_tg_v1(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	const struct nf_nat_range2 *range = par->targinfo;
+
+	return nf_nat_masquerade_ipv4(skb, xt_hooknum(par), range,
+				      xt_out(par));
+}
+
 static void masquerade_tg_destroy(const struct xt_tgdtor_param *par)
 {
 	nf_ct_netns_put(par->net, par->family);
@@ -73,6 +93,7 @@ static struct xt_target masquerade_tg_reg[] __read_mostly = {
 	{
 #if IS_ENABLED(CONFIG_IPV6)
 		.name		= "MASQUERADE",
+		.revision	= 0,
 		.family		= NFPROTO_IPV6,
 		.target		= masquerade_tg6,
 		.targetsize	= sizeof(struct nf_nat_range),
@@ -84,15 +105,28 @@ static struct xt_target masquerade_tg_reg[] __read_mostly = {
 	}, {
 #endif
 		.name		= "MASQUERADE",
+		.revision	= 0,
 		.family		= NFPROTO_IPV4,
-		.target		= masquerade_tg,
+		.target		= masquerade_tg_v0,
 		.targetsize	= sizeof(struct nf_nat_ipv4_multi_range_compat),
 		.table		= "nat",
 		.hooks		= 1 << NF_INET_POST_ROUTING,
-		.checkentry	= masquerade_tg_check,
+		.checkentry	= masquerade_tg_check_v0,
 		.destroy	= masquerade_tg_destroy,
 		.me		= THIS_MODULE,
-	}
+	},
+	{
+		.name		= "MASQUERADE",
+		.revision	= 1,
+		.family		= NFPROTO_IPV4,
+		.target		= masquerade_tg_v1,
+		.targetsize	= sizeof(struct nf_nat_range2),
+		.table		= "nat",
+		.hooks		= 1 << NF_INET_POST_ROUTING,
+		.checkentry	= masquerade_tg_check_v1,
+		.destroy	= masquerade_tg_destroy,
+		.me		= THIS_MODULE,
+	},
 };
 
 static int __init masquerade_tg_init(void)
-- 
2.32.0


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH net-next 2/3] net: netfilter: Add RFC-7597 Section 5.1 PSID support
  2021-08-09  4:10               ` [PATCH net-next 0/3] " Cole Dishington
  2021-08-09  4:10                 ` [PATCH net-next 1/3] net: netfilter: Add RFC-7597 Section 5.1 PSID support xtables API Cole Dishington
@ 2021-08-09  4:10                 ` Cole Dishington
  2021-08-25 17:05                   ` Pablo Neira Ayuso
  2021-08-09  4:10                 ` [PATCH net-next 3/3] selftests: netfilter: Add RFC-7597 Section 5.1 PSID selftests Cole Dishington
  2 siblings, 1 reply; 15+ messages in thread
From: Cole Dishington @ 2021-08-09  4:10 UTC (permalink / raw)
  To: pablo
  Cc: kadlec, fw, davem, kuba, shuah, Cole.Dishington, linux-kernel,
	netfilter-devel, coreteam, netdev, linux-kselftest,
	Anthony Lineham, Scott Parlane, Blair Steven

Adds support for masquerading into a smaller subset of ports -
defined by the PSID values from RFC-7597 Section 5.1. This is part of
the support for MAP-E and Lightweight 4over6, which allows multiple
devices to share an IPv4 address by splitting the L4 port / id into
ranges.

Co-developed-by: Anthony Lineham <anthony.lineham@alliedtelesis.co.nz>
Signed-off-by: Anthony Lineham <anthony.lineham@alliedtelesis.co.nz>
Co-developed-by: Scott Parlane <scott.parlane@alliedtelesis.co.nz>
Signed-off-by: Scott Parlane <scott.parlane@alliedtelesis.co.nz>
Signed-off-by: Blair Steven <blair.steven@alliedtelesis.co.nz>
Signed-off-by: Cole Dishington <Cole.Dishington@alliedtelesis.co.nz>
Reviewed-by: Florian Westphal <fw@strlen.de>
---

Notes:
    Changes:
    - Added Reviewed-by: Florian Westphal <fw@strlen.de>.

 net/netfilter/nf_nat_core.c       | 39 +++++++++++++++++++++++++++----
 net/netfilter/nf_nat_masquerade.c | 27 +++++++++++++++++++--
 2 files changed, 60 insertions(+), 6 deletions(-)

diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 7de595ead06a..f07a3473aab5 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -195,13 +195,36 @@ static bool nf_nat_inet_in_range(const struct nf_conntrack_tuple *t,
 static bool l4proto_in_range(const struct nf_conntrack_tuple *tuple,
 			     enum nf_nat_manip_type maniptype,
 			     const union nf_conntrack_man_proto *min,
-			     const union nf_conntrack_man_proto *max)
+			     const union nf_conntrack_man_proto *max,
+			     const union nf_conntrack_man_proto *base,
+			     bool is_psid)
 {
 	__be16 port;
+	u16 psid, psid_mask, offset_mask;
+
+	/* In this case we are in PSID mode, avoid checking all ranges by computing bitmasks */
+	if (is_psid) {
+		u32 power_j = ntohs(max->all) - ntohs(min->all) + 1;
+		u32 offset = ntohs(base->all);
+		u16 power_a;
+
+		if (offset == 0)
+			offset = 1 << 16;
+
+		power_a = (1 << 16) / offset;
+		offset_mask = (power_a - 1) * offset;
+		psid_mask = ((offset / power_j) << 1) - 1;
+		psid = ntohs(min->all) & psid_mask;
+	}
 
 	switch (tuple->dst.protonum) {
 	case IPPROTO_ICMP:
 	case IPPROTO_ICMPV6:
+		if (is_psid) {
+			return (offset_mask == 0 ||
+				(ntohs(tuple->src.u.icmp.id) & offset_mask) != 0) &&
+				((ntohs(tuple->src.u.icmp.id) & psid_mask) == psid);
+		}
 		return ntohs(tuple->src.u.icmp.id) >= ntohs(min->icmp.id) &&
 		       ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id);
 	case IPPROTO_GRE: /* all fall though */
@@ -215,6 +238,10 @@ static bool l4proto_in_range(const struct nf_conntrack_tuple *tuple,
 		else
 			port = tuple->dst.u.all;
 
+		if (is_psid) {
+			return (offset_mask == 0 || (ntohs(port) & offset_mask) != 0) &&
+				((ntohs(port) & psid_mask) == psid);
+		}
 		return ntohs(port) >= ntohs(min->all) &&
 		       ntohs(port) <= ntohs(max->all);
 	default:
@@ -239,7 +266,8 @@ static int in_range(const struct nf_conntrack_tuple *tuple,
 		return 1;
 
 	return l4proto_in_range(tuple, NF_NAT_MANIP_SRC,
-				&range->min_proto, &range->max_proto);
+				&range->min_proto, &range->max_proto, &range->base_proto,
+				range->flags & NF_NAT_RANGE_PSID);
 }
 
 static inline int
@@ -532,8 +560,11 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
 		if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) {
 			if (!(range->flags & NF_NAT_RANGE_PROTO_OFFSET) &&
 			    l4proto_in_range(tuple, maniptype,
-			          &range->min_proto,
-			          &range->max_proto) &&
+				  &range->min_proto,
+				  &range->max_proto,
+				  &range->base_proto,
+				  range->flags &
+				  NF_NAT_RANGE_PSID) &&
 			    (range->min_proto.all == range->max_proto.all ||
 			     !nf_nat_used_tuple(tuple, ct)))
 				return;
diff --git a/net/netfilter/nf_nat_masquerade.c b/net/netfilter/nf_nat_masquerade.c
index 8e8a65d46345..19a4754cda76 100644
--- a/net/netfilter/nf_nat_masquerade.c
+++ b/net/netfilter/nf_nat_masquerade.c
@@ -55,8 +55,31 @@ nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
 	newrange.flags       = range->flags | NF_NAT_RANGE_MAP_IPS;
 	newrange.min_addr.ip = newsrc;
 	newrange.max_addr.ip = newsrc;
-	newrange.min_proto   = range->min_proto;
-	newrange.max_proto   = range->max_proto;
+
+	if (range->flags & NF_NAT_RANGE_PSID) {
+		u16 base = ntohs(range->base_proto.all);
+		u16 min =  ntohs(range->min_proto.all);
+		u16 off = 0;
+
+		/* xtables should stop base > 2^15 by enforcement of
+		 * 0 <= offset_len < 16 argument, with offset_len=0
+		 * as a special case inwhich base=0.
+		 */
+		if (WARN_ON_ONCE(base > (1 << 15)))
+			return NF_DROP;
+
+		/* If offset=0, port range is in one contiguous block */
+		if (base)
+			off = prandom_u32_max(((1 << 16) / base) - 1);
+
+		newrange.min_proto.all   = htons(min + base * off);
+		newrange.max_proto.all   = htons(ntohs(newrange.min_proto.all) + ntohs(range->max_proto.all) - min);
+		newrange.base_proto      = range->base_proto;
+		newrange.flags           = newrange.flags | NF_NAT_RANGE_PROTO_SPECIFIED;
+	} else {
+		newrange.min_proto       = range->min_proto;
+		newrange.max_proto       = range->max_proto;
+	}
 
 	/* Hand modified range to generic setup. */
 	return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC);
-- 
2.32.0


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH net-next 3/3] selftests: netfilter: Add RFC-7597 Section 5.1 PSID selftests
  2021-08-09  4:10               ` [PATCH net-next 0/3] " Cole Dishington
  2021-08-09  4:10                 ` [PATCH net-next 1/3] net: netfilter: Add RFC-7597 Section 5.1 PSID support xtables API Cole Dishington
  2021-08-09  4:10                 ` [PATCH net-next 2/3] net: netfilter: Add RFC-7597 Section 5.1 PSID support Cole Dishington
@ 2021-08-09  4:10                 ` Cole Dishington
  2 siblings, 0 replies; 15+ messages in thread
From: Cole Dishington @ 2021-08-09  4:10 UTC (permalink / raw)
  To: pablo
  Cc: kadlec, fw, davem, kuba, shuah, Cole.Dishington, linux-kernel,
	netfilter-devel, coreteam, netdev, linux-kselftest

Add selftests for masquerading into a smaller subset of ports defined by
PSID.

Signed-off-by: Cole Dishington <Cole.Dishington@alliedtelesis.co.nz>
---

Notes:
    No changes.

 .../netfilter/nat_masquerade_psid.sh          | 182 ++++++++++++++++++
 1 file changed, 182 insertions(+)
 create mode 100644 tools/testing/selftests/netfilter/nat_masquerade_psid.sh

diff --git a/tools/testing/selftests/netfilter/nat_masquerade_psid.sh b/tools/testing/selftests/netfilter/nat_masquerade_psid.sh
new file mode 100644
index 000000000000..56c1b509caf6
--- /dev/null
+++ b/tools/testing/selftests/netfilter/nat_masquerade_psid.sh
@@ -0,0 +1,182 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# <:copyright-gpl
+# Copyright (C) 2021 Allied Telesis Labs NZ
+#
+# check that NAT can masquerade using PSID defined ranges.
+#
+# Setup is:
+#
+# nsclient1(veth0) -> (veth1)nsrouter(veth2) -> (veth0)nsclient2
+# Setup a nat masquerade rule with psid defined ranges.
+#
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+ret=0
+ns_all="nsclient1 nsrouter nsclient2"
+
+readonly infile="$(mktemp)"
+readonly outfile="$(mktemp)"
+readonly datalen=32
+readonly server_port=8080
+
+conntrack -V > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run test without conntrack tool"
+	exit $ksft_skip
+fi
+
+iptables --version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run test without iptables tool"
+	exit $ksft_skip
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run test without ip tool"
+	exit $ksft_skip
+fi
+
+ipv4() {
+	echo -n 192.168.$1.$2
+}
+
+cleanup() {
+	for n in $ns_all; do ip netns del $n;done
+
+	if [ -f "${outfile}" ]; then
+		rm "$outfile"
+	fi
+	if [ -f "${infile}" ]; then
+		rm "$infile"
+	fi
+}
+
+server_listen() {
+	ip netns exec nsclient2 nc -l -p "$server_port" > "$outfile" &
+	server_pid=$!
+	sleep 0.2
+}
+
+client_connect() {
+	ip netns exec nsclient1 timeout 2 nc -w 1 -p "$port" $(ipv4 2 2) "$server_port" < $infile
+}
+
+verify_data() {
+	local _ret=0
+	wait "$server_pid"
+	cmp "$infile" "$outfile" 2>/dev/null
+	_ret=$?
+	rm "$outfile"
+	return $_ret
+}
+
+test_service() {
+	server_listen
+	client_connect
+	verify_data
+}
+
+check_connection() {
+	local _ret=0
+	entry=$(ip netns exec nsrouter conntrack -p tcp --sport $port -L 2>&1)
+	entry=${entry##*sport=8080 dport=}
+	entry=${entry%% *}
+
+	if [[ "x$(( ($entry & $psid_mask) / $two_power_j ))" != "x$psid" ]]; then
+		_ret=1
+		echo "Failed psid mask check for $offset_len:$psid:$psid_length with port $entry"
+	fi
+
+	if [[ "x$_ret" = "x0" ]] &&
+	   [[ "x$offset_mask" != "x0" -a "x$(( ($entry & $offset_mask) ))" == "x0" ]]; then
+		_ret=1
+		echo "Failed offset mask check for $offset_len:$psid:$psid_length with port $entry"
+	fi
+	return $_ret
+}
+
+run_test() {
+	ip netns exec nsrouter iptables -A FORWARD -i veth1 -j ACCEPT
+	ip netns exec nsrouter iptables -P FORWARD DROP
+	ip netns exec nsrouter iptables -A FORWARD -m state --state ESTABLISHED,RELATED -j ACCEPT
+	ip netns exec nsrouter iptables -t nat --new psid
+	ip netns exec nsrouter iptables -t nat --insert psid -j MASQUERADE \
+		--psid $offset_len:$psid:$psid_length
+	ip netns exec nsrouter iptables -t nat -I POSTROUTING -o veth2 -j psid
+
+	# calculate psid mask
+	offset=$(( 1 << (16 - $offset_len) ))
+	two_power_j=$(( $offset / (1 << $psid_length) ))
+	offset_mask=$(( ( (1 << $offset_len) - 1 ) << (16 - $offset_len) ))
+	psid_mask=$(( ( (1 << $psid_length) - 1) * $two_power_j ))
+
+	# Create file
+	dd if=/dev/urandom of="${infile}" bs="${datalen}" count=1 >/dev/null 2>&1
+
+	# Test multiple ports
+	for p in 1 2 3 4 5; do
+		port=1080$p
+
+		test_service
+		if [ $? -ne 0 ]; then
+			ret=1
+			break
+		fi
+
+		check_connection
+		if [ $? -ne 0 ]; then
+			ret=1
+			break
+		fi
+	done
+
+	# tidy up test rules
+	ip netns exec nsrouter iptables -F
+	ip netns exec nsrouter iptables -t nat -F
+	ip netns exec nsrouter iptables -t nat -X psid
+}
+
+for n in $ns_all; do
+	ip netns add $n
+	ip -net $n link set lo up
+done
+
+for i in 1 2; do
+	ip link add veth0 netns nsclient$i type veth peer name veth$i netns nsrouter
+
+	ip -net nsclient$i link set veth0 up
+	ip -net nsclient$i addr add $(ipv4 $i 2)/24 dev veth0
+
+	ip -net nsrouter link set veth$i up
+	ip -net nsrouter addr add $(ipv4 $i 1)/24 dev veth$i
+done
+
+ip -net nsclient1 route add default via $(ipv4 1 1)
+ip -net nsclient2 route add default via $(ipv4 2 1)
+
+ip netns exec nsrouter sysctl -q net.ipv4.conf.all.forwarding=1
+
+offset_len=0
+psid_length=8
+for psid in 0 52; do
+	run_test
+	if [ $? -ne 0 ]; then
+		break
+	fi
+done
+
+offset_len=6
+psid_length=8
+for psid in 0 52; do
+	run_test
+	if [ $? -ne 0 ]; then
+		break
+	fi
+done
+
+cleanup
+exit $ret
-- 
2.32.0


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH net-next 2/3] net: netfilter: Add RFC-7597 Section 5.1 PSID support
  2021-08-09  4:10                 ` [PATCH net-next 2/3] net: netfilter: Add RFC-7597 Section 5.1 PSID support Cole Dishington
@ 2021-08-25 17:05                   ` Pablo Neira Ayuso
  2021-08-29 21:30                     ` Cole Dishington
  0 siblings, 1 reply; 15+ messages in thread
From: Pablo Neira Ayuso @ 2021-08-25 17:05 UTC (permalink / raw)
  To: Cole Dishington
  Cc: kadlec, fw, davem, kuba, shuah, linux-kernel, netfilter-devel,
	coreteam, netdev, linux-kselftest, Anthony Lineham, Scott Parlane,
	Blair Steven

Hi,

On Mon, Aug 09, 2021 at 04:10:36PM +1200, Cole Dishington wrote:
> Adds support for masquerading into a smaller subset of ports -
> defined by the PSID values from RFC-7597 Section 5.1. This is part of
> the support for MAP-E and Lightweight 4over6, which allows multiple
> devices to share an IPv4 address by splitting the L4 port / id into
> ranges.
> 
> Co-developed-by: Anthony Lineham <anthony.lineham@alliedtelesis.co.nz>
> Signed-off-by: Anthony Lineham <anthony.lineham@alliedtelesis.co.nz>
> Co-developed-by: Scott Parlane <scott.parlane@alliedtelesis.co.nz>
> Signed-off-by: Scott Parlane <scott.parlane@alliedtelesis.co.nz>
> Signed-off-by: Blair Steven <blair.steven@alliedtelesis.co.nz>
> Signed-off-by: Cole Dishington <Cole.Dishington@alliedtelesis.co.nz>
> Reviewed-by: Florian Westphal <fw@strlen.de>
[...]

Looking at the userspace logic:

https://patchwork.ozlabs.org/project/netfilter-devel/patch/20210716002219.30193-1-Cole.Dishington@alliedtelesis.co.nz/

Chunk extracted from void parse_psid(...)

>        offset = (1 << (16 - offset_len));

Assuming offset_len = 6, then you skip 0-1023 ports, OK.

>        psid = psid << (16 - offset_len - psid_len);

This psid calculation is correct? Maybe:

        psid = psid << (16 - offset_len);

instead?

        psid=0  =>      0 << (16 - 6) = 1024
        psid=1  =>      1 << (16 - 6) = 2048

This is implicitly assuming that 64 PSIDs are available, each of them
taking 1024 ports, ie. psid_len is 6 bits. But why are you subtracting
the psid_len above?

>        /* Handle the special case of no offset bits (a=0), so offset loops */
>        min = psid;

OK, this line above is the minimal port in the range

>        if (offset)
>                min += offset;

... which is incremented by the offset (to skip the 0-1023 ports).

>       r->min_proto.all = htons(min);
>       r->max_proto.all = htons(min + ((1 << (16 - offset_len - psid_len)) - 1));

Here, you subtract psid_len again, not sure why.

>       r->base_proto.all = htons(offset);

base is set to offset, ie. 1024.

>       r->flags |= NF_NAT_RANGE_PSID;
>       r->flags |= NF_NAT_RANGE_PROTO_SPECIFIED;

Now looking at the kernel side.

> diff --git a/net/netfilter/nf_nat_masquerade.c b/net/netfilter/nf_nat_masquerade.c
> index 8e8a65d46345..19a4754cda76 100644
> --- a/net/netfilter/nf_nat_masquerade.c
> +++ b/net/netfilter/nf_nat_masquerade.c
> @@ -55,8 +55,31 @@ nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
>  	newrange.flags       = range->flags | NF_NAT_RANGE_MAP_IPS;
>  	newrange.min_addr.ip = newsrc;
>  	newrange.max_addr.ip = newsrc;
> -	newrange.min_proto   = range->min_proto;
> -	newrange.max_proto   = range->max_proto;
> +
> +	if (range->flags & NF_NAT_RANGE_PSID) {
> +		u16 base = ntohs(range->base_proto.all);
> +		u16 min =  ntohs(range->min_proto.all);
> +		u16 off = 0;
> +
> +		/* xtables should stop base > 2^15 by enforcement of
> +		 * 0 <= offset_len < 16 argument, with offset_len=0
> +		 * as a special case inwhich base=0.

I don't understand this comment.

> +		 */
> +		if (WARN_ON_ONCE(base > (1 << 15)))
> +			return NF_DROP;
> +
> +		/* If offset=0, port range is in one contiguous block */
> +		if (base)
> +			off = prandom_u32_max(((1 << 16) / base) - 1);

Assuming the example above, base is set to 1024. Then, off is a random
value between UINT16_MAX (you expressed this as 1 << 16) and the base
which is 1024 minus 1.

So this is picking a random off (actually the PSID?) between 0 and 63.
What about clashes? I mean, two different machines behind the NAT
might get the same off.

> +		newrange.min_proto.all   = htons(min + base * off);

min could be 1024, 2048, 3072... you add base which is 1024 * off.

Is this duplicated? Both calculated in user and kernel space?

> +		newrange.max_proto.all   = htons(ntohs(newrange.min_proto.all) + ntohs(range->max_proto.all) - min);

I'm stopping here, I'm getting lost.

My understanding about this RFC is that you would like to split the
16-bit ports in ranges to uniquely identify the host behind the NAT.

Why don't you just you just select the port range from userspace
utilizing the existing infrastructure? I mean, why do you need this
kernel patch?

Florian already suggested:

> Is it really needed to place all of this in the nat core?
> 
> The only thing that has to be done in the NAT core, afaics, is to
> suppress port reallocation attmepts when NF_NAT_RANGE_PSID is set.
> 
> Is there a reason why nf_nat_masquerade_ipv4/6 can't be changed instead
> to do what you want?
> 
> AFAICS its enough to set NF_NAT_RANGE_PROTO_SPECIFIED and init the
> upper/lower boundaries, i.e. change input given to nf_nat_setup_info().

extracted from:

https://patchwork.ozlabs.org/project/netfilter-devel/patch/20210422023506.4651-1-Cole.Dishington@alliedtelesis.co.nz/

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH net-next 2/3] net: netfilter: Add RFC-7597 Section 5.1 PSID support
  2021-08-25 17:05                   ` Pablo Neira Ayuso
@ 2021-08-29 21:30                     ` Cole Dishington
  0 siblings, 0 replies; 15+ messages in thread
From: Cole Dishington @ 2021-08-29 21:30 UTC (permalink / raw)
  To: Blair Steven, davem@davemloft.net, Anthony Lineham,
	pablo@netfilter.org, shuah@kernel.org, Scott Parlane,
	kadlec@netfilter.org, kuba@kernel.org, fw@strlen.de
  Cc: linux-kselftest@vger.kernel.org, linux-kernel@vger.kernel.org,
	netfilter-devel@vger.kernel.org, coreteam@netfilter.org,
	netdev@vger.kernel.org

Hello,

Thanks for your time reviewing!

On Wed, 2021-08-25 at 19:05 +0200, Pablo Neira Ayuso wrote:
> Hi,
> 
> On Mon, Aug 09, 2021 at 04:10:36PM +1200, Cole Dishington wrote:
> > Adds support for masquerading into a smaller subset of ports -
> > defined by the PSID values from RFC-7597 Section 5.1. This is part of
> > the support for MAP-E and Lightweight 4over6, which allows multiple
> > devices to share an IPv4 address by splitting the L4 port / id into
> > ranges.
> > 
> > Co-developed-by: Anthony Lineham <anthony.lineham@alliedtelesis.co.nz>
> > Signed-off-by: Anthony Lineham <anthony.lineham@alliedtelesis.co.nz>
> > Co-developed-by: Scott Parlane <scott.parlane@alliedtelesis.co.nz>
> > Signed-off-by: Scott Parlane <scott.parlane@alliedtelesis.co.nz>
> > Signed-off-by: Blair Steven <blair.steven@alliedtelesis.co.nz>
> > Signed-off-by: Cole Dishington <Cole.Dishington@alliedtelesis.co.nz>
> > Reviewed-by: Florian Westphal <fw@strlen.de>
> [...]
> 
> Looking at the userspace logic:
> 
> https://scanmail.trustwave.com/?c=20988&d=6vim4fcVLjPkIbLUDqz3Tj2W4gXWNCkYa5llWggBjA&u=https%3a%2f%2fpatchwork%2eozlabs%2eorg%2fproject%2fnetfilter-devel%2fpatch%2f20210716002219%2e30193-1-Cole%2eDishington%40alliedtelesis%2eco%2enz%2f
> 
> Chunk extracted from void parse_psid(...)
> 
> >        offset = (1 << (16 - offset_len));
> 
> Assuming offset_len = 6, then you skip 0-1023 ports, OK.
> 
> >        psid = psid << (16 - offset_len - psid_len);
> 
> This psid calculation is correct? Maybe:
> 
>         psid = psid << (16 - offset_len);

PSID port numbers have the form
[offset|PSID|j]
and
16 = offset_length + PSID_length + j_length.
The PSID calculation above is bit shifting the passed psid up j_length.

The userspace tool accepts the unshifted psid to be consistent with how RFC7597 specified it (see RFC7597 Appendix A. Examples).

> 
> instead?
> 
>         psid=0  =>      0 << (16 - 6) = 1024
>         psid=1  =>      1 << (16 - 6) = 2048
> 
> This is implicitly assuming that 64 PSIDs are available, each of them
> taking 1024 ports, ie. psid_len is 6 bits. But why are you subtracting
> the psid_len above?
> 
> >        /* Handle the special case of no offset bits (a=0), so offset loops */
> >        min = psid;
> 
> OK, this line above is the minimal port in the range
> 
> >        if (offset)
> >                min += offset;
> 
> ... which is incremented by the offset (to skip the 0-1023 ports).
> 
> >       r->min_proto.all = htons(min);
> >       r->max_proto.all = htons(min + ((1 << (16 - offset_len - psid_len)) - 1));
> 
> Here, you subtract psid_len again, not sure why.

Each PSID port range is made up of many smaller contiguous port sub-ranges  (except for the special case of offset_len = 0) e.g. for PSID=0x34,psid_length=8,psid_offset=6 the ranges are 1232-1235, 2256-2259, ..., 63696-63699, 64720-64723 (Taken from rfc7597 Appendix A. Examples).
The above calculation is selecting the first sub-range. Max is computed by finding j_length and filling it with 1's.

> 
> >       r->base_proto.all = htons(offset);
> 
> base is set to offset, ie. 1024.
> 
> >       r->flags |= NF_NAT_RANGE_PSID;
> >       r->flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
> 
> Now looking at the kernel side.
> 
> > diff --git a/net/netfilter/nf_nat_masquerade.c b/net/netfilter/nf_nat_masquerade.c
> > index 8e8a65d46345..19a4754cda76 100644
> > --- a/net/netfilter/nf_nat_masquerade.c
> > +++ b/net/netfilter/nf_nat_masquerade.c
> > @@ -55,8 +55,31 @@ nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
> >  	newrange.flags       = range->flags | NF_NAT_RANGE_MAP_IPS;
> >  	newrange.min_addr.ip = newsrc;
> >  	newrange.max_addr.ip = newsrc;
> > -	newrange.min_proto   = range->min_proto;
> > -	newrange.max_proto   = range->max_proto;
> > +
> > +	if (range->flags & NF_NAT_RANGE_PSID) {
> > +		u16 base = ntohs(range->base_proto.all);
> > +		u16 min =  ntohs(range->min_proto.all);
> > +		u16 off = 0;
> > +
> > +		/* xtables should stop base > 2^15 by enforcement of
> > +		 * 0 <= offset_len < 16 argument, with offset_len=0
> > +		 * as a special case inwhich base=0.
> 
> I don't understand this comment.

This is a sanity check. The userspace tool restricts offset_len to the specified range and since base = 2^(16 - offset_len) (or base = 0 for the special case of offset_len = 16) the below condition should never be true.
However, if base greater than 1<<15 was allowed, a divide by zero error would occur on the block below.

> 
> > +		 */
> > +		if (WARN_ON_ONCE(base > (1 << 15)))
> > +			return NF_DROP;
> > +
> > +		/* If offset=0, port range is in one contiguous block */
> > +		if (base)
> > +			off = prandom_u32_max(((1 << 16) / base) - 1);
> 
> Assuming the example above, base is set to 1024. Then, off is a random
> value between UINT16_MAX (you expressed this as 1 << 16) and the base
> which is 1024 minus 1.
> 
> So this is picking a random off (actually the PSID?) between 0 and 63.
> What about clashes? I mean, two different machines behind the NAT
> might get the same off.
> 
> > +		newrange.min_proto.all   = htons(min + base * off);
> 
> min could be 1024, 2048, 3072... you add base which is 1024 * off.
> 
> Is this duplicated? Both calculated in user and kernel space?

Each PSID value defines many contiguous port sub-ranges. The randomly chosen off selects the ith sub-range for a given PSID e.g. off=1 would select 2256-2259 for rfc7597 Appendix A. Examples.

The userspace tool calculates the min and max of the first sub-range for a given psid, whereas the above randomly selects one of the sub-ranges for a given psid.

j_length determines how large each sub-range will be, so for small j_length values there still is the risk the chosen sub-range will be exhausted.

> 
> > +		newrange.max_proto.all   = htons(ntohs(newrange.min_proto.all) + ntohs(range->max_proto.all) - min);
> 
> I'm stopping here, I'm getting lost.
> 
> My understanding about this RFC is that you would like to split the
> 16-bit ports in ranges to uniquely identify the host behind the NAT.
> 
> Why don't you just you just select the port range from userspace
> utilizing the existing infrastructure? I mean, why do you need this
> kernel patch?

If utilizing existing infrastruture to install PSID port ranges a lot of rules would be required as each PSID port range is made up of many smaller sub-ranges.

e.g. (from rfc7597 Appendix A. Examples)
for psid_length=8,offset_length=6 each PSID would need 63 NF_NAT_RANGE_PROTO_SPECIFIED rules, hence a total of 16128 rules if all the PSIDs were allocated.

> 
> Florian already suggested:
> 
> > Is it really needed to place all of this in the nat core?
> > 
> > The only thing that has to be done in the NAT core, afaics, is to
> > suppress port reallocation attmepts when NF_NAT_RANGE_PSID is set.
> > 
> > Is there a reason why nf_nat_masquerade_ipv4/6 can't be changed instead
> > to do what you want?
> > 
> > AFAICS its enough to set NF_NAT_RANGE_PROTO_SPECIFIED and init the
> > upper/lower boundaries, i.e. change input given to nf_nat_setup_info().
> 
> extracted from:
> 
> https://scanmail.trustwave.com/?c=20988&d=6vim4fcVLjPkIbLUDqz3Tj2W4gXWNCkYa5s0Bg8JjA&u=https%3a%2f%2fpatchwork%2eozlabs%2eorg%2fproject%2fnetfilter-devel%2fpatch%2f20210422023506%2e4651-1-Cole%2eDishington%40alliedtelesis%2eco%2enz%2f







^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2021-08-29 21:30 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <20210705103959.GG18022@breakpoint.cc>
2021-07-16  0:27 ` [PATCH 0/3] Add RFC-7597 Section 5.1 PSID support Cole Dishington
2021-07-16  0:27   ` [PATCH 1/3] net: netfilter: Add RFC-7597 Section 5.1 PSID support xtables API Cole Dishington
2021-07-16  0:27   ` [PATCH 2/3] net: netfilter: Add RFC-7597 Section 5.1 PSID support Cole Dishington
2021-07-16 15:18     ` Florian Westphal
2021-07-19  1:21       ` Cole Dishington
2021-07-22  7:17         ` Florian Westphal
2021-07-25 23:28           ` Cole Dishington
2021-07-26 14:37             ` Florian Westphal
2021-08-09  4:10               ` [PATCH net-next 0/3] " Cole Dishington
2021-08-09  4:10                 ` [PATCH net-next 1/3] net: netfilter: Add RFC-7597 Section 5.1 PSID support xtables API Cole Dishington
2021-08-09  4:10                 ` [PATCH net-next 2/3] net: netfilter: Add RFC-7597 Section 5.1 PSID support Cole Dishington
2021-08-25 17:05                   ` Pablo Neira Ayuso
2021-08-29 21:30                     ` Cole Dishington
2021-08-09  4:10                 ` [PATCH net-next 3/3] selftests: netfilter: Add RFC-7597 Section 5.1 PSID selftests Cole Dishington
2021-07-16  0:27   ` [PATCH " Cole Dishington

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox