All of lore.kernel.org
 help / color / mirror / Atom feed
* netfilter expected behavior for established connections
@ 2025-03-11 23:56 Antonio Ojea
  2025-03-12  0:30 ` imnozi
                   ` (2 more replies)
  0 siblings, 3 replies; 13+ messages in thread
From: Antonio Ojea @ 2025-03-11 23:56 UTC (permalink / raw)
  To: netfilter

[-- Attachment #1: Type: text/plain, Size: 729 bytes --]

Hi,

I'm puzzled trying to understand the following behavior, appreciate it
if you can help me to understand better how this works.

The setup is like this:  Client --- Router --- Server

- Router DNATs to a Virtual IP and Port of the Server.
- Client establishes a permanent connection to the Virtual IP.
- Router adds a REJECT rule in the FORWARD hook for the Server IP

I expect the REJECT to match the established connection, but the
client keeps reaching the Server using the existing connection.

The packets of the established connection do not show up on the traces
using nftrace.

Is it possible to "DROP/REJECT" the established connection ?

I've created a selftest to reproduce this behavior, please find it attached.

[-- Attachment #2: 0001-selftests-netfilter-conntrack-does-not-shadow-reject.patch --]
[-- Type: application/octet-stream, Size: 9094 bytes --]

From 8f60146397b277c43bf795651e4cd00469c0bbf3 Mon Sep 17 00:00:00 2001
From: Antonio Ojea <aojea@google.com>
Date: Tue, 11 Mar 2025 08:36:56 +0000
Subject: [PATCH] selftests: netfilter: conntrack does not shadow reject rules

Test netfilter behavior specific for established connections.

Signed-off-by: Antonio Ojea <aojea@google.com>
---
 .../testing/selftests/net/netfilter/Makefile  |   1 +
 .../nft_conntrack_reject_established.sh       | 251 ++++++++++++++++++
 2 files changed, 252 insertions(+)
 create mode 100755 tools/testing/selftests/net/netfilter/nft_conntrack_reject_established.sh

diff --git a/tools/testing/selftests/net/netfilter/Makefile b/tools/testing/selftests/net/netfilter/Makefile
index ffe161fac8b5..c276b8ac2383 100644
--- a/tools/testing/selftests/net/netfilter/Makefile
+++ b/tools/testing/selftests/net/netfilter/Makefile
@@ -21,6 +21,7 @@ TEST_PROGS += nf_nat_edemux.sh
 TEST_PROGS += nft_audit.sh
 TEST_PROGS += nft_concat_range.sh
 TEST_PROGS += nft_conntrack_helper.sh
+TEST_PROGS += nft_conntrack_reject_established.sh
 TEST_PROGS += nft_fib.sh
 TEST_PROGS += nft_flowtable.sh
 TEST_PROGS += nft_meta.sh
diff --git a/tools/testing/selftests/net/netfilter/nft_conntrack_reject_established.sh b/tools/testing/selftests/net/netfilter/nft_conntrack_reject_established.sh
new file mode 100755
index 000000000000..9e2a2f24640e
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/nft_conntrack_reject_established.sh
@@ -0,0 +1,251 @@
+#!/bin/bash
+#
+# This tests conntrack on the following scenario:
+#
+#                         +------------+
+# +-------+               |  nsrouter  |                  +-------+
+# |ns1    |.99          .1|            |.1             .99|    ns2|
+# |   eth0|---------------|veth0  veth1|------------------|eth0   |
+# |       |  10.0.1.0/24  |            |   10.0.2.0/24    |       |
+# +-------+  dead:1::/64  |    veth2   |   dead:2::/64    +-------+
+#                         +------------+
+#
+# nsrouters implement loadbalancing using DNAT with a virtual IP
+# 10.0.4.10 - dead:4::a
+# shellcheck disable=SC2162,SC2317
+
+source lib.sh
+ret=0
+
+timeout=15
+
+cleanup()
+{
+	ip netns pids "$ns1" | xargs kill 2>/dev/null
+	ip netns pids "$ns2" | xargs kill 2>/dev/null
+	ip netns pids "$nsrouter" | xargs kill 2>/dev/null
+
+	cleanup_all_ns
+}
+
+checktool "nft --version" "test without nft tool"
+checktool "socat -h" "run test without socat"
+
+trap cleanup EXIT
+setup_ns ns1 ns2 nsrouter
+
+if ! ip link add veth0 netns "$nsrouter" type veth peer name eth0 netns "$ns1" > /dev/null 2>&1; then
+    echo "SKIP: No virtual ethernet pair device support in kernel"
+    exit $ksft_skip
+fi
+ip link add veth1 netns "$nsrouter" type veth peer name eth0 netns "$ns2"
+
+ip -net "$nsrouter" link set veth0 up
+ip -net "$nsrouter" addr add 10.0.1.1/24 dev veth0
+ip -net "$nsrouter" addr add dead:1::1/64 dev veth0 nodad
+
+ip -net "$nsrouter" link set veth1 up
+ip -net "$nsrouter" addr add 10.0.2.1/24 dev veth1
+ip -net "$nsrouter" addr add dead:2::1/64 dev veth1 nodad
+
+
+ip -net "$ns1" link set eth0 up
+ip -net "$ns2" link set eth0 up
+
+ip -net "$ns1" addr add 10.0.1.99/24 dev eth0
+ip -net "$ns1" addr add dead:1::99/64 dev eth0 nodad
+ip -net "$ns1" route add default via 10.0.1.1
+ip -net "$ns1" route add default via dead:1::1
+
+ip -net "$ns2" addr add 10.0.2.99/24 dev eth0
+ip -net "$ns2" addr add dead:2::99/64 dev eth0 nodad
+ip -net "$ns2" route add default via 10.0.2.1
+ip -net "$ns2" route add default via dead:2::1
+
+
+ip netns exec "$nsrouter" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
+ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
+
+test_ping() {
+  if ! ip netns exec "$ns1" ping -c 1 -q 10.0.2.99 > /dev/null; then
+	return 1
+  fi
+
+  if ! ip netns exec "$ns1" ping -c 1 -q dead:2::99 > /dev/null; then
+	return 2
+  fi
+
+  return 0
+}
+
+test_ping_router() {
+  if ! ip netns exec "$ns1" ping -c 1 -q 10.0.2.1 > /dev/null; then
+	return 3
+  fi
+
+  if ! ip netns exec "$ns1" ping -c 1 -q dead:2::1 > /dev/null; then
+	return 4
+  fi
+
+  return 0
+}
+
+
+listener_ready()
+{
+	local ns="$1"
+	local port="$2"
+	local proto="$3"
+	ss -N "$ns" -ln "$proto" -o "sport = :$port" | grep -q "$port"
+}
+
+test_conntrack_reject_established()
+{
+	local ip_proto="$1"
+	# derived variables
+	local testname="test_${ip_proto}_conntrack_reject_established"
+	local socat_ipproto
+	local vip
+	local ns2_ip
+	local ns2_ip_port
+
+	# socat 1.8.0 has a bug that requires to specify the IP family to bind (fixed in 1.8.0.1)
+	case $ip_proto in
+	"ip")
+		socat_ipproto="-4"
+		vip=10.0.4.10
+		ns2_ip=10.0.2.99
+		vip_ip_port="$vip:8080"
+		ns2_ip_port="$ns2_ip:8080"
+	;;
+	"ip6")
+		socat_ipproto="-6"
+		vip=dead:4::a
+		ns2_ip=dead:2::99
+		vip_ip_port="[$vip]:8080"
+		ns2_ip_port="[$ns2_ip]:8080"
+	;;
+	*)
+	echo "FAIL: unsupported protocol"
+	exit 255
+	;;
+	esac
+
+	ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF
+flush ruleset
+table inet nat {
+	chain kube-proxy {
+		type nat hook prerouting priority 0; policy accept;
+		$ip_proto daddr $vip tcp dport 8080 dnat to $ns2_ip_port
+	}
+}
+EOF
+
+	# set up an echo server
+	timeout "$timeout" ip netns exec "$ns2" socat "$socat_ipproto" tcp-listen:8080,fork PIPE 2>/dev/null &
+	local server2_pid=$!
+
+	busywait "$BUSYWAIT_TIMEOUT" listener_ready "$ns2" 8080 "-t"
+
+	local result
+	# request from ns1 to ns2 (direct traffic)
+	result=$(echo PING | ip netns exec "$ns1" socat -t 2 -T 2 STDIO tcp:"$ns2_ip_port")
+	if [ "$result" == "PING" ] ;then
+		echo "PASS: $testname: ns1 got reply \"$result\" connecting to ns2"
+	else
+		echo "ERROR: $testname: ns1 got reply \"$result\" connecting to ns2, not \"PING\" as intended"
+		ret=1
+	fi
+
+	# set up a persistent connection through DNAT to ns3
+	rm -f pipe.test
+	timeout "$timeout" ip netns exec "$ns1" socat -v -d "$socat_ipproto" PIPE:pipe.test tcp:"$vip_ip_port" &
+	local client1_pid=$!
+	# create FD 3 for writing and reading to the pipe
+	exec 3<>pipe.test
+
+
+	# request from ns1 to vip (DNAT to ns2)
+	echo PING >&3 && read line <&3
+	if [ "$result" = "PING" ] ;then
+		echo "PASS: $testname: ns1 got reply \"$result\" connecting to vip using persistent connection"
+	else
+		echo "ERROR: $testname: ns1 got reply \"$result\" connecting to vip using persistent connection, not \"PING\" as intended"
+		ret=1
+	fi
+
+	# request from ns1 to vip
+	result=$(echo PING | ip netns exec "$ns1" socat -t 2 -T 2 STDIO tcp:"$vip_ip_port")
+	if [ "$result" == "PING" ] ;then
+		echo "PASS: $testname: ns1 got reply \"$result\" connecting to vip"
+	else
+		echo "ERROR: $testname: ns1 got reply \"$result\" connecting to vip, not \"PING\" as intended"
+		ret=1
+	fi
+
+	# request from ns1 to vip persistent connection (DNAT to ns2)
+	echo PING >&3 && read line <&3
+	if [ "$result" = "PING" ] ;then
+		echo "PASS: $testname: ns1 got reply \"$result\" connecting to vip using persistent connection"
+	else
+		echo "ERROR: $testname: ns1 got reply \"$result\" connecting to vip using persistent connection, not \"PING\" as intended"
+		ret=1
+	fi
+
+	# add a rule to filter traffic to ns2 ip and port (after DNAT)
+	ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF
+table inet filter {
+	chain kube-proxy {
+		type filter hook forward priority 0; policy accept;
+		$ip_proto daddr $ns2_ip tcp dport 8080 counter reject
+	}
+}
+EOF
+
+	# request from ns1 to ns2 (direct traffic)
+	result=$(echo PING | ip netns exec "$ns1" socat -t 2 -T 2 STDIO tcp:"$ns2_ip_port" 2>&1 >/dev/null)
+	if [[ "$result" == *"Connection refused"* ]] ;then
+		echo "PASS: $testname: ns1 got \"Connection refused\" connecting to vip (ns2)"
+	else
+		echo "ERROR: $testname: ns1 got reply \"$result\" connecting to vip, not \"Connection refused\" as intended"
+		ret=1
+	fi
+
+	# request from ns1 to vip (DNAT to ns2)
+	result=$(echo PING | ip netns exec "$ns1" socat -t 2 -T 2 STDIO tcp:"$vip_ip_port" 2>&1 >/dev/null)
+	if [[ "$result" == *"Connection refused"* ]] ;then
+		echo "PASS: $testname: ns1 connection to vip is closed (ns2)"
+	else
+		echo "ERROR: $testname: ns1 got reply \"$result\" connecting to vip, not \"Connection refused\" as intended"
+		ret=1
+	fi
+
+	# request from ns1 to vip (DNAT to ns2) on an existing connection
+	echo PING >&3 && read result <&3
+	if [[ -z "$result" ]] && ! kill -0 "$client1_pid" 2>/dev/null; then
+		echo "PASS: $testname: ns1 got no response and client is closed to vip (ns2)"
+	else
+		echo "ERROR: $testname: ns1 got reply \"$result\" connecting to vip, persistent connection is not closed as intended"
+		ret=1
+	fi
+
+	nft list counters 1>&2
+
+	kill $client1_pid 2>/dev/null
+	kill $server2_pid 2>/dev/null
+}
+
+
+if test_ping; then
+	# queue bypass works (rules were skipped, no listener)
+	echo "PASS: ${ns1} can reach ${ns2}"
+else
+	echo "FAIL: ${ns1} cannot reach ${ns2}: $ret" 1>&2
+	exit $ret
+fi
+
+test_conntrack_reject_established "ip"
+test_conntrack_reject_established "ip6"
+
+exit $ret
-- 
2.49.0.rc0.332.g42c0ae87b1-goog


^ permalink raw reply related	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2025-03-13 23:24 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-03-11 23:56 netfilter expected behavior for established connections Antonio Ojea
2025-03-12  0:30 ` imnozi
2025-03-12  7:11 ` Florian Westphal
2025-03-12 10:55   ` Antonio Ojea
2025-03-12 12:51     ` Florian Westphal
2025-03-12 13:04       ` Antonio Ojea
2025-03-12 14:17         ` Antonio Ojea
2025-03-12 14:25           ` Florian Westphal
2025-03-12 16:13 ` Florian Westphal
2025-03-12 18:02   ` Antonio Ojea
2025-03-12 18:20     ` Florian Westphal
2025-03-12 18:29       ` Antonio Ojea
2025-03-13 23:23         ` Antonio Ojea

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.