netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v1 bpf-next 0/2] BPF: support mark in bpf_fib_lookup
@ 2024-03-22 14:02 Anton Protopopov
  2024-03-22 14:02 ` [PATCH v1 bpf-next 1/2] bpf: add support for passing mark with bpf_fib_lookup Anton Protopopov
  2024-03-22 14:02 ` [PATCH v1 bpf-next 2/2] selftests/bpf: Add BPF_FIB_LOOKUP_MARK tests Anton Protopopov
  0 siblings, 2 replies; 9+ messages in thread
From: Anton Protopopov @ 2024-03-22 14:02 UTC (permalink / raw)
  To: Alexei Starovoitov, Andrii Nakryiko, Daniel Borkmann, Jiri Olsa,
	Martin KaFai Lau, Stanislav Fomichev, bpf
  Cc: Anton Protopopov, Rumen Telbizov, David Ahern, netdev

This patch series adds policy routing support in bpf_fib_lookup.
This is a useful functionality which was missing for a long time,
as without it some networking setups can't be implemented in BPF.
One example can be found here [1].

A while ago there was an attempt to add this functionality [2] by
Rumen Telbizov and David Ahern. I've completely refactored the code,
except that the changes to the struct bpf_fib_lookup were copy-pasted
from the original patch.

The first patch implements the functionality, the second patch adds
a few selftests.

  [1] https://github.com/cilium/cilium/pull/12770
  [2] https://lore.kernel.org/all/20210629185537.78008-2-rumen.telbizov@menlosecurity.com/

Anton Protopopov (2):
  bpf: add support for passing mark with bpf_fib_lookup
  selftests/bpf: Add BPF_FIB_LOOKUP_MARK tests

 include/uapi/linux/bpf.h                      |  20 ++-
 net/core/filter.c                             |  12 +-
 tools/include/uapi/linux/bpf.h                |  20 ++-
 .../selftests/bpf/prog_tests/fib_lookup.c     | 160 ++++++++++++++----
 4 files changed, 176 insertions(+), 36 deletions(-)

-- 
2.34.1


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH v1 bpf-next 1/2] bpf: add support for passing mark with bpf_fib_lookup
  2024-03-22 14:02 [PATCH v1 bpf-next 0/2] BPF: support mark in bpf_fib_lookup Anton Protopopov
@ 2024-03-22 14:02 ` Anton Protopopov
  2024-03-24 17:38   ` David Ahern
  2024-03-22 14:02 ` [PATCH v1 bpf-next 2/2] selftests/bpf: Add BPF_FIB_LOOKUP_MARK tests Anton Protopopov
  1 sibling, 1 reply; 9+ messages in thread
From: Anton Protopopov @ 2024-03-22 14:02 UTC (permalink / raw)
  To: Alexei Starovoitov, Andrii Nakryiko, Daniel Borkmann, Jiri Olsa,
	Martin KaFai Lau, Stanislav Fomichev, bpf
  Cc: Anton Protopopov, Rumen Telbizov, David Ahern, netdev

Extend the bpf_fib_lookup() helper by making it to utilize mark if
the BPF_FIB_LOOKUP_MARK flag is set. In order to pass the mark the
four bytes of struct bpf_fib_lookup are used, shared with the
output-only smac/dmac fields.

Signed-off-by: Anton Protopopov <aspsk@isovalent.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/uapi/linux/bpf.h       | 20 ++++++++++++++++++--
 net/core/filter.c              | 12 +++++++++---
 tools/include/uapi/linux/bpf.h | 20 ++++++++++++++++++--
 3 files changed, 45 insertions(+), 7 deletions(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 9585f5345353..96d57e483133 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3394,6 +3394,10 @@ union bpf_attr {
  *			for the nexthop. If the src addr cannot be derived,
  *			**BPF_FIB_LKUP_RET_NO_SRC_ADDR** is returned. In this
  *			case, *params*->dmac and *params*->smac are not set either.
+ *		**BPF_FIB_LOOKUP_MARK**
+ *			Use the mark present in *params*->mark for the fib lookup.
+ *			This option should not be used with BPF_FIB_LOOKUP_DIRECT,
+ *			as it only has meaning for full lookups.
  *
  *		*ctx* is either **struct xdp_md** for XDP programs or
  *		**struct sk_buff** tc cls_act programs.
@@ -7120,6 +7124,7 @@ enum {
 	BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2),
 	BPF_FIB_LOOKUP_TBID    = (1U << 3),
 	BPF_FIB_LOOKUP_SRC     = (1U << 4),
+	BPF_FIB_LOOKUP_MARK    = (1U << 5),
 };
 
 enum {
@@ -7197,8 +7202,19 @@ struct bpf_fib_lookup {
 		__u32	tbid;
 	};
 
-	__u8	smac[6];     /* ETH_ALEN */
-	__u8	dmac[6];     /* ETH_ALEN */
+	union {
+		/* input */
+		struct {
+			__u32	mark;   /* policy routing */
+			/* 2 4-byte holes for input */
+		};
+
+		/* output: source and dest mac */
+		struct {
+			__u8	smac[6];	/* ETH_ALEN */
+			__u8	dmac[6];	/* ETH_ALEN */
+		};
+	};
 };
 
 struct bpf_redir_neigh {
diff --git a/net/core/filter.c b/net/core/filter.c
index 0c66e4a3fc5b..1205dd777dc2 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5884,7 +5884,10 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
 
 		err = fib_table_lookup(tb, &fl4, &res, FIB_LOOKUP_NOREF);
 	} else {
-		fl4.flowi4_mark = 0;
+		if (flags & BPF_FIB_LOOKUP_MARK)
+			fl4.flowi4_mark = params->mark;
+		else
+			fl4.flowi4_mark = 0;
 		fl4.flowi4_secid = 0;
 		fl4.flowi4_tun_key.tun_id = 0;
 		fl4.flowi4_uid = sock_net_uid(net, NULL);
@@ -6027,7 +6030,10 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
 		err = ipv6_stub->fib6_table_lookup(net, tb, oif, &fl6, &res,
 						   strict);
 	} else {
-		fl6.flowi6_mark = 0;
+		if (flags & BPF_FIB_LOOKUP_MARK)
+			fl6.flowi6_mark = params->mark;
+		else
+			fl6.flowi6_mark = 0;
 		fl6.flowi6_secid = 0;
 		fl6.flowi6_tun_key.tun_id = 0;
 		fl6.flowi6_uid = sock_net_uid(net, NULL);
@@ -6105,7 +6111,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
 
 #define BPF_FIB_LOOKUP_MASK (BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT | \
 			     BPF_FIB_LOOKUP_SKIP_NEIGH | BPF_FIB_LOOKUP_TBID | \
-			     BPF_FIB_LOOKUP_SRC)
+			     BPF_FIB_LOOKUP_SRC | BPF_FIB_LOOKUP_MARK)
 
 BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx,
 	   struct bpf_fib_lookup *, params, int, plen, u32, flags)
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index bf80b614c4db..4c9b5bfbd9c6 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3393,6 +3393,10 @@ union bpf_attr {
  *			for the nexthop. If the src addr cannot be derived,
  *			**BPF_FIB_LKUP_RET_NO_SRC_ADDR** is returned. In this
  *			case, *params*->dmac and *params*->smac are not set either.
+ *		**BPF_FIB_LOOKUP_MARK**
+ *			Use the mark present in *params*->mark for the fib lookup.
+ *			This option should not be used with BPF_FIB_LOOKUP_DIRECT,
+ *			as it only has meaning for full lookups.
  *
  *		*ctx* is either **struct xdp_md** for XDP programs or
  *		**struct sk_buff** tc cls_act programs.
@@ -7119,6 +7123,7 @@ enum {
 	BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2),
 	BPF_FIB_LOOKUP_TBID    = (1U << 3),
 	BPF_FIB_LOOKUP_SRC     = (1U << 4),
+	BPF_FIB_LOOKUP_MARK    = (1U << 5),
 };
 
 enum {
@@ -7196,8 +7201,19 @@ struct bpf_fib_lookup {
 		__u32	tbid;
 	};
 
-	__u8	smac[6];     /* ETH_ALEN */
-	__u8	dmac[6];     /* ETH_ALEN */
+	union {
+		/* input */
+		struct {
+			__u32	mark;   /* policy routing */
+			/* 2 4-byte holes for input */
+		};
+
+		/* output: source and dest mac */
+		struct {
+			__u8	smac[6];	/* ETH_ALEN */
+			__u8	dmac[6];	/* ETH_ALEN */
+		};
+	};
 };
 
 struct bpf_redir_neigh {
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH v1 bpf-next 2/2] selftests/bpf: Add BPF_FIB_LOOKUP_MARK tests
  2024-03-22 14:02 [PATCH v1 bpf-next 0/2] BPF: support mark in bpf_fib_lookup Anton Protopopov
  2024-03-22 14:02 ` [PATCH v1 bpf-next 1/2] bpf: add support for passing mark with bpf_fib_lookup Anton Protopopov
@ 2024-03-22 14:02 ` Anton Protopopov
  2024-03-23 22:34   ` Martin KaFai Lau
  1 sibling, 1 reply; 9+ messages in thread
From: Anton Protopopov @ 2024-03-22 14:02 UTC (permalink / raw)
  To: Alexei Starovoitov, Andrii Nakryiko, Daniel Borkmann, Jiri Olsa,
	Martin KaFai Lau, Stanislav Fomichev, bpf
  Cc: Anton Protopopov, Rumen Telbizov, David Ahern, netdev

This patch extends the fib_lookup test suite by adding a few test
cases for each IP family to test the new BPF_FIB_LOOKUP_MARK flag
to the bpf_fib_lookup:

  * Test destination IP address selection with and without a mark
    and/or the BPF_FIB_LOOKUP_MARK flag set

To test this functionality another network namespace and a new veth
pair were added to the test.

Signed-off-by: Anton Protopopov <aspsk@isovalent.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
---
 .../selftests/bpf/prog_tests/fib_lookup.c     | 160 ++++++++++++++----
 1 file changed, 131 insertions(+), 29 deletions(-)

diff --git a/tools/testing/selftests/bpf/prog_tests/fib_lookup.c b/tools/testing/selftests/bpf/prog_tests/fib_lookup.c
index 3379df2d4cf2..a78316431f32 100644
--- a/tools/testing/selftests/bpf/prog_tests/fib_lookup.c
+++ b/tools/testing/selftests/bpf/prog_tests/fib_lookup.c
@@ -10,6 +10,7 @@
 #include "fib_lookup.skel.h"
 
 #define NS_TEST			"fib_lookup_ns"
+#define NS_REMOTE		"remote_ns"
 #define IPV6_IFACE_ADDR		"face::face"
 #define IPV6_IFACE_ADDR_SEC	"cafe::cafe"
 #define IPV6_ADDR_DST		"face::3"
@@ -26,6 +27,17 @@
 #define IPV6_TBID_ADDR		"fd00::FFFF"
 #define IPV6_TBID_NET		"fd00::"
 #define IPV6_TBID_DST		"fd00::2"
+#define MARK_NO_POLICY		33
+#define MARK			42
+#define MARK_TABLE		"200"
+#define IPV4_REMOTE_DST		"1.2.3.4"
+#define IPV4_LOCAL		"10.4.0.3"
+#define IPV4_GW1		"10.4.0.1"
+#define IPV4_GW2		"10.4.0.2"
+#define IPV6_REMOTE_DST		"be:ef::b0:10"
+#define IPV6_LOCAL		"fd01::3"
+#define IPV6_GW1		"fd01::1"
+#define IPV6_GW2		"fd01::2"
 #define DMAC			"11:11:11:11:11:11"
 #define DMAC_INIT { 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, }
 #define DMAC2			"01:01:01:01:01:01"
@@ -36,9 +48,12 @@ struct fib_lookup_test {
 	const char *daddr;
 	int expected_ret;
 	const char *expected_src;
+	const char *expected_dst;
 	int lookup_flags;
 	__u32 tbid;
 	__u8 dmac[6];
+	__u32 mark;
+	const char *ifname;
 };
 
 static const struct fib_lookup_test tests[] = {
@@ -90,10 +105,47 @@ static const struct fib_lookup_test tests[] = {
 	  .daddr = IPV6_ADDR_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
 	  .expected_src = IPV6_IFACE_ADDR_SEC,
 	  .lookup_flags = BPF_FIB_LOOKUP_SRC | BPF_FIB_LOOKUP_SKIP_NEIGH, },
+	/* policy routing */
+	{ .desc = "IPv4 policy routing, default",
+	  .daddr = IPV4_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
+	  .expected_dst = IPV4_GW1, .ifname = "veth3",
+	  .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH, },
+	{ .desc = "IPv4 policy routing, mark doesn't point to a policy",
+	  .daddr = IPV4_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
+	  .expected_dst = IPV4_GW1, .ifname = "veth3",
+	  .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH,
+	  .mark = MARK_NO_POLICY, },
+	{ .desc = "IPv4 policy routing, mark points to a policy",
+	  .daddr = IPV4_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
+	  .expected_dst = IPV4_GW2, .ifname = "veth3",
+	  .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH,
+	  .mark = MARK, },
+	{ .desc = "IPv4 policy routing, mark points to a policy, but no flag",
+	  .daddr = IPV4_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
+	  .expected_dst = IPV4_GW1, .ifname = "veth3",
+	  .lookup_flags = BPF_FIB_LOOKUP_SKIP_NEIGH,
+	  .mark = MARK, },
+	{ .desc = "IPv6 policy routing, default",
+	  .daddr = IPV6_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
+	  .expected_dst = IPV6_GW1, .ifname = "veth3",
+	  .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH, },
+	{ .desc = "IPv6 policy routing, mark doesn't point to a policy",
+	  .daddr = IPV6_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
+	  .expected_dst = IPV6_GW1, .ifname = "veth3",
+	  .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH,
+	  .mark = MARK_NO_POLICY, },
+	{ .desc = "IPv6 policy routing, mark points to a policy",
+	  .daddr = IPV6_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
+	  .expected_dst = IPV6_GW2, .ifname = "veth3",
+	  .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH,
+	  .mark = MARK, },
+	{ .desc = "IPv6 policy routing, mark points to a policy, but no flag",
+	  .daddr = IPV6_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
+	  .expected_dst = IPV6_GW1, .ifname = "veth3",
+	  .lookup_flags = BPF_FIB_LOOKUP_SKIP_NEIGH,
+	  .mark = MARK, },
 };
 
-static int ifindex;
-
 static int setup_netns(void)
 {
 	int err;
@@ -144,12 +196,40 @@ static int setup_netns(void)
 	if (!ASSERT_OK(err, "write_sysctl(net.ipv6.conf.veth1.forwarding)"))
 		goto fail;
 
+	/* Setup for policy routing tests */
+	SYS(fail, "ip link add veth3 type veth peer name veth4");
+	SYS(fail, "ip link set dev veth3 up");
+	SYS(fail, "ip link set dev veth4 netns %s up", NS_REMOTE);
+
+	SYS(fail, "ip addr add %s/24 dev veth3", IPV4_LOCAL);
+	SYS(fail, "ip netns exec %s ip addr add %s/24 dev veth4", NS_REMOTE, IPV4_GW1);
+	SYS(fail, "ip netns exec %s ip addr add %s/24 dev veth4", NS_REMOTE, IPV4_GW2);
+	SYS(fail, "ip addr add %s/64 dev veth3 nodad", IPV6_LOCAL);
+	SYS(fail, "ip netns exec %s ip addr add %s/64 dev veth4 nodad", NS_REMOTE, IPV6_GW1);
+	SYS(fail, "ip netns exec %s ip addr add %s/64 dev veth4 nodad", NS_REMOTE, IPV6_GW2);
+	SYS(fail, "ip route add %s/32 via %s", IPV4_REMOTE_DST, IPV4_GW1);
+	SYS(fail, "ip route add %s/32 via %s table %s", IPV4_REMOTE_DST, IPV4_GW2, MARK_TABLE);
+	SYS(fail, "ip -6 route add %s/128 via %s", IPV6_REMOTE_DST, IPV6_GW1);
+	SYS(fail, "ip -6 route add %s/128 via %s table %s", IPV6_REMOTE_DST, IPV6_GW2, MARK_TABLE);
+	SYS(fail, "ip rule add prio 2 fwmark %d lookup %s", MARK, MARK_TABLE);
+	SYS(fail, "ip -6 rule add prio 2 fwmark %d lookup %s", MARK, MARK_TABLE);
+
+	err = write_sysctl("/proc/sys/net/ipv4/conf/veth3/forwarding", "1");
+	if (!ASSERT_OK(err, "write_sysctl(net.ipv4.conf.veth3.forwarding)"))
+		goto fail;
+
+	err = write_sysctl("/proc/sys/net/ipv6/conf/veth3/forwarding", "1");
+	if (!ASSERT_OK(err, "write_sysctl(net.ipv6.conf.veth3.forwarding)"))
+		goto fail;
+
 	return 0;
 fail:
 	return -1;
 }
 
-static int set_lookup_params(struct bpf_fib_lookup *params, const struct fib_lookup_test *test)
+static int set_lookup_params(struct bpf_fib_lookup *params,
+			     const struct fib_lookup_test *test,
+			     int ifindex)
 {
 	int ret;
 
@@ -159,6 +239,9 @@ static int set_lookup_params(struct bpf_fib_lookup *params, const struct fib_loo
 	params->ifindex = ifindex;
 	params->tbid = test->tbid;
 
+	if (test->lookup_flags & BPF_FIB_LOOKUP_MARK)
+		params->mark = test->mark;
+
 	if (inet_pton(AF_INET6, test->daddr, params->ipv6_dst) == 1) {
 		params->family = AF_INET6;
 		if (!(test->lookup_flags & BPF_FIB_LOOKUP_SRC)) {
@@ -190,40 +273,45 @@ static void mac_str(char *b, const __u8 *mac)
 		mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]);
 }
 
-static void assert_src_ip(struct bpf_fib_lookup *fib_params, const char *expected_src)
+static void assert_ip_address(int family, void *addr, const char *expected_str)
 {
+	char str[INET6_ADDRSTRLEN];
+	u8 expected_addr[16];
+	int addr_len = 0;
 	int ret;
-	__u32 src6[4];
-	__be32 src4;
 
-	switch (fib_params->family) {
+	switch (family) {
 	case AF_INET6:
-		ret = inet_pton(AF_INET6, expected_src, src6);
-		ASSERT_EQ(ret, 1, "inet_pton(expected_src)");
-
-		ret = memcmp(src6, fib_params->ipv6_src, sizeof(fib_params->ipv6_src));
-		if (!ASSERT_EQ(ret, 0, "fib_lookup ipv6 src")) {
-			char str_src6[64];
-
-			inet_ntop(AF_INET6, fib_params->ipv6_src, str_src6,
-				  sizeof(str_src6));
-			printf("ipv6 expected %s actual %s ", expected_src,
-			       str_src6);
-		}
-
+		ret = inet_pton(AF_INET6, expected_str, expected_addr);
+		ASSERT_EQ(ret, 1, "inet_pton(AF_INET6, expected_str)");
+		addr_len = 16;
 		break;
 	case AF_INET:
-		ret = inet_pton(AF_INET, expected_src, &src4);
-		ASSERT_EQ(ret, 1, "inet_pton(expected_src)");
-
-		ASSERT_EQ(fib_params->ipv4_src, src4, "fib_lookup ipv4 src");
-
+		ret = inet_pton(AF_INET, expected_str, expected_addr);
+		ASSERT_EQ(ret, 1, "inet_pton(AF_INET, expected_str)");
+		addr_len = 4;
 		break;
 	default:
-		PRINT_FAIL("invalid addr family: %d", fib_params->family);
+		PRINT_FAIL("invalid address family: %d", family);
+		break;
+	}
+
+	if (memcmp(addr, expected_addr, addr_len)) {
+		inet_ntop(family, addr, str, sizeof(str));
+		PRINT_FAIL("expected %s actual %s ", expected_str, str);
 	}
 }
 
+static void assert_src_ip(struct bpf_fib_lookup *params, const char *expected)
+{
+	assert_ip_address(params->family, params->ipv6_src, expected);
+}
+
+static void assert_dst_ip(struct bpf_fib_lookup *params, const char *expected)
+{
+	assert_ip_address(params->family, params->ipv6_dst, expected);
+}
+
 void test_fib_lookup(void)
 {
 	struct bpf_fib_lookup *fib_params;
@@ -231,6 +319,7 @@ void test_fib_lookup(void)
 	struct __sk_buff skb = { };
 	struct fib_lookup *skel;
 	int prog_fd, err, ret, i;
+	int default_ifindex;
 
 	/* The test does not use the skb->data, so
 	 * use pkt_v6 for both v6 and v4 test.
@@ -248,6 +337,7 @@ void test_fib_lookup(void)
 	prog_fd = bpf_program__fd(skel->progs.fib_lookup);
 
 	SYS(fail, "ip netns add %s", NS_TEST);
+	SYS(fail, "ip netns add %s", NS_REMOTE);
 
 	nstoken = open_netns(NS_TEST);
 	if (!ASSERT_OK_PTR(nstoken, "open_netns"))
@@ -256,15 +346,23 @@ void test_fib_lookup(void)
 	if (setup_netns())
 		goto fail;
 
-	ifindex = if_nametoindex("veth1");
-	skb.ifindex = ifindex;
+	default_ifindex = if_nametoindex("veth1");
+	if (!ASSERT_NEQ(default_ifindex, 0, "if_nametoindex(veth1)"))
+		goto fail;
+
 	fib_params = &skel->bss->fib_params;
 
 	for (i = 0; i < ARRAY_SIZE(tests); i++) {
 		printf("Testing %s ", tests[i].desc);
 
-		if (set_lookup_params(fib_params, &tests[i]))
+		if (tests[i].ifname)
+			skb.ifindex = if_nametoindex(tests[i].ifname);
+		else
+			skb.ifindex = default_ifindex;
+
+		if (set_lookup_params(fib_params, &tests[i], skb.ifindex))
 			continue;
+
 		skel->bss->fib_lookup_ret = -1;
 		skel->bss->lookup_flags = tests[i].lookup_flags;
 
@@ -278,6 +376,9 @@ void test_fib_lookup(void)
 		if (tests[i].expected_src)
 			assert_src_ip(fib_params, tests[i].expected_src);
 
+		if (tests[i].expected_dst)
+			assert_dst_ip(fib_params, tests[i].expected_dst);
+
 		ret = memcmp(tests[i].dmac, fib_params->dmac, sizeof(tests[i].dmac));
 		if (!ASSERT_EQ(ret, 0, "dmac not match")) {
 			char expected[18], actual[18];
@@ -299,5 +400,6 @@ void test_fib_lookup(void)
 	if (nstoken)
 		close_netns(nstoken);
 	SYS_NOFAIL("ip netns del " NS_TEST);
+	SYS_NOFAIL("ip netns del " NS_REMOTE);
 	fib_lookup__destroy(skel);
 }
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH v1 bpf-next 2/2] selftests/bpf: Add BPF_FIB_LOOKUP_MARK tests
  2024-03-22 14:02 ` [PATCH v1 bpf-next 2/2] selftests/bpf: Add BPF_FIB_LOOKUP_MARK tests Anton Protopopov
@ 2024-03-23 22:34   ` Martin KaFai Lau
  2024-03-24 15:04     ` Anton Protopopov
  0 siblings, 1 reply; 9+ messages in thread
From: Martin KaFai Lau @ 2024-03-23 22:34 UTC (permalink / raw)
  To: Anton Protopopov
  Cc: Rumen Telbizov, David Ahern, netdev, Alexei Starovoitov,
	Andrii Nakryiko, Daniel Borkmann, Jiri Olsa, Stanislav Fomichev,
	bpf

On 3/22/24 7:02 AM, Anton Protopopov wrote:
> This patch extends the fib_lookup test suite by adding a few test
> cases for each IP family to test the new BPF_FIB_LOOKUP_MARK flag
> to the bpf_fib_lookup:
> 
>    * Test destination IP address selection with and without a mark
>      and/or the BPF_FIB_LOOKUP_MARK flag set
> 
> To test this functionality another network namespace and a new veth
> pair were added to the test.
> 

[ ... ]

>   static const struct fib_lookup_test tests[] = {
> @@ -90,10 +105,47 @@ static const struct fib_lookup_test tests[] = {
>   	  .daddr = IPV6_ADDR_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
>   	  .expected_src = IPV6_IFACE_ADDR_SEC,
>   	  .lookup_flags = BPF_FIB_LOOKUP_SRC | BPF_FIB_LOOKUP_SKIP_NEIGH, },
> +	/* policy routing */
> +	{ .desc = "IPv4 policy routing, default",
> +	  .daddr = IPV4_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
> +	  .expected_dst = IPV4_GW1, .ifname = "veth3",
> +	  .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH, },
> +	{ .desc = "IPv4 policy routing, mark doesn't point to a policy",
> +	  .daddr = IPV4_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
> +	  .expected_dst = IPV4_GW1, .ifname = "veth3",
> +	  .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH,
> +	  .mark = MARK_NO_POLICY, },
> +	{ .desc = "IPv4 policy routing, mark points to a policy",
> +	  .daddr = IPV4_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
> +	  .expected_dst = IPV4_GW2, .ifname = "veth3",
> +	  .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH,
> +	  .mark = MARK, },
> +	{ .desc = "IPv4 policy routing, mark points to a policy, but no flag",
> +	  .daddr = IPV4_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
> +	  .expected_dst = IPV4_GW1, .ifname = "veth3",
> +	  .lookup_flags = BPF_FIB_LOOKUP_SKIP_NEIGH,
> +	  .mark = MARK, },
> +	{ .desc = "IPv6 policy routing, default",
> +	  .daddr = IPV6_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
> +	  .expected_dst = IPV6_GW1, .ifname = "veth3",
> +	  .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH, },
> +	{ .desc = "IPv6 policy routing, mark doesn't point to a policy",
> +	  .daddr = IPV6_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
> +	  .expected_dst = IPV6_GW1, .ifname = "veth3",
> +	  .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH,
> +	  .mark = MARK_NO_POLICY, },
> +	{ .desc = "IPv6 policy routing, mark points to a policy",
> +	  .daddr = IPV6_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
> +	  .expected_dst = IPV6_GW2, .ifname = "veth3",
> +	  .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH,
> +	  .mark = MARK, },
> +	{ .desc = "IPv6 policy routing, mark points to a policy, but no flag",
> +	  .daddr = IPV6_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
> +	  .expected_dst = IPV6_GW1, .ifname = "veth3",
> +	  .lookup_flags = BPF_FIB_LOOKUP_SKIP_NEIGH,
> +	  .mark = MARK, },
>   };
>   
> -static int ifindex;
> -
>   static int setup_netns(void)
>   {
>   	int err;
> @@ -144,12 +196,40 @@ static int setup_netns(void)
>   	if (!ASSERT_OK(err, "write_sysctl(net.ipv6.conf.veth1.forwarding)"))
>   		goto fail;
>   
> +	/* Setup for policy routing tests */
> +	SYS(fail, "ip link add veth3 type veth peer name veth4");
> +	SYS(fail, "ip link set dev veth3 up");
> +	SYS(fail, "ip link set dev veth4 netns %s up", NS_REMOTE);
> +
> +	SYS(fail, "ip addr add %s/24 dev veth3", IPV4_LOCAL);
> +	SYS(fail, "ip netns exec %s ip addr add %s/24 dev veth4", NS_REMOTE, IPV4_GW1);
> +	SYS(fail, "ip netns exec %s ip addr add %s/24 dev veth4", NS_REMOTE, IPV4_GW2);
> +	SYS(fail, "ip addr add %s/64 dev veth3 nodad", IPV6_LOCAL);
> +	SYS(fail, "ip netns exec %s ip addr add %s/64 dev veth4 nodad", NS_REMOTE, IPV6_GW1);
> +	SYS(fail, "ip netns exec %s ip addr add %s/64 dev veth4 nodad", NS_REMOTE, IPV6_GW2);

Trying to see if the setup can be simplified.

Does it need to add another netns and setup a reachable IPV[46]_GW[12] gateway?

The test is not sending any traffic and it is a BPF_FIB_LOOKUP_SKIP_NEIGH test.

> +	SYS(fail, "ip route add %s/32 via %s", IPV4_REMOTE_DST, IPV4_GW1);
> +	SYS(fail, "ip route add %s/32 via %s table %s", IPV4_REMOTE_DST, IPV4_GW2, MARK_TABLE);
> +	SYS(fail, "ip -6 route add %s/128 via %s", IPV6_REMOTE_DST, IPV6_GW1);
> +	SYS(fail, "ip -6 route add %s/128 via %s table %s", IPV6_REMOTE_DST, IPV6_GW2, MARK_TABLE);
> +	SYS(fail, "ip rule add prio 2 fwmark %d lookup %s", MARK, MARK_TABLE);
> +	SYS(fail, "ip -6 rule add prio 2 fwmark %d lookup %s", MARK, MARK_TABLE);
> +
> +	err = write_sysctl("/proc/sys/net/ipv4/conf/veth3/forwarding", "1");
> +	if (!ASSERT_OK(err, "write_sysctl(net.ipv4.conf.veth3.forwarding)"))
> +		goto fail;
> +
> +	err = write_sysctl("/proc/sys/net/ipv6/conf/veth3/forwarding", "1");
> +	if (!ASSERT_OK(err, "write_sysctl(net.ipv6.conf.veth3.forwarding)"))
> +		goto fail;
> +
>   	return 0;
>   fail:
>   	return -1;
>   }

[ ... ]

> @@ -248,6 +337,7 @@ void test_fib_lookup(void)
>   	prog_fd = bpf_program__fd(skel->progs.fib_lookup);
>   
>   	SYS(fail, "ip netns add %s", NS_TEST);
> +	SYS(fail, "ip netns add %s", NS_REMOTE);



^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v1 bpf-next 2/2] selftests/bpf: Add BPF_FIB_LOOKUP_MARK tests
  2024-03-23 22:34   ` Martin KaFai Lau
@ 2024-03-24 15:04     ` Anton Protopopov
  2024-03-25 18:12       ` Martin KaFai Lau
  0 siblings, 1 reply; 9+ messages in thread
From: Anton Protopopov @ 2024-03-24 15:04 UTC (permalink / raw)
  To: Martin KaFai Lau
  Cc: Rumen Telbizov, David Ahern, netdev, Alexei Starovoitov,
	Andrii Nakryiko, Daniel Borkmann, Jiri Olsa, Stanislav Fomichev,
	bpf

On Sat, Mar 23, 2024 at 03:34:10PM -0700, Martin KaFai Lau wrote:
> On 3/22/24 7:02 AM, Anton Protopopov wrote:
> > This patch extends the fib_lookup test suite by adding a few test
> > cases for each IP family to test the new BPF_FIB_LOOKUP_MARK flag
> > to the bpf_fib_lookup:
> > 
> >    * Test destination IP address selection with and without a mark
> >      and/or the BPF_FIB_LOOKUP_MARK flag set
> > 
> > To test this functionality another network namespace and a new veth
> > pair were added to the test.
> > 
> 
> [ ... ]
> 
> >   static const struct fib_lookup_test tests[] = {
> > @@ -90,10 +105,47 @@ static const struct fib_lookup_test tests[] = {
> >   	  .daddr = IPV6_ADDR_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
> >   	  .expected_src = IPV6_IFACE_ADDR_SEC,
> >   	  .lookup_flags = BPF_FIB_LOOKUP_SRC | BPF_FIB_LOOKUP_SKIP_NEIGH, },
> > +	/* policy routing */
> > +	{ .desc = "IPv4 policy routing, default",
> > +	  .daddr = IPV4_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
> > +	  .expected_dst = IPV4_GW1, .ifname = "veth3",
> > +	  .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH, },
> > +	{ .desc = "IPv4 policy routing, mark doesn't point to a policy",
> > +	  .daddr = IPV4_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
> > +	  .expected_dst = IPV4_GW1, .ifname = "veth3",
> > +	  .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH,
> > +	  .mark = MARK_NO_POLICY, },
> > +	{ .desc = "IPv4 policy routing, mark points to a policy",
> > +	  .daddr = IPV4_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
> > +	  .expected_dst = IPV4_GW2, .ifname = "veth3",
> > +	  .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH,
> > +	  .mark = MARK, },
> > +	{ .desc = "IPv4 policy routing, mark points to a policy, but no flag",
> > +	  .daddr = IPV4_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
> > +	  .expected_dst = IPV4_GW1, .ifname = "veth3",
> > +	  .lookup_flags = BPF_FIB_LOOKUP_SKIP_NEIGH,
> > +	  .mark = MARK, },
> > +	{ .desc = "IPv6 policy routing, default",
> > +	  .daddr = IPV6_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
> > +	  .expected_dst = IPV6_GW1, .ifname = "veth3",
> > +	  .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH, },
> > +	{ .desc = "IPv6 policy routing, mark doesn't point to a policy",
> > +	  .daddr = IPV6_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
> > +	  .expected_dst = IPV6_GW1, .ifname = "veth3",
> > +	  .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH,
> > +	  .mark = MARK_NO_POLICY, },
> > +	{ .desc = "IPv6 policy routing, mark points to a policy",
> > +	  .daddr = IPV6_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
> > +	  .expected_dst = IPV6_GW2, .ifname = "veth3",
> > +	  .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH,
> > +	  .mark = MARK, },
> > +	{ .desc = "IPv6 policy routing, mark points to a policy, but no flag",
> > +	  .daddr = IPV6_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
> > +	  .expected_dst = IPV6_GW1, .ifname = "veth3",
> > +	  .lookup_flags = BPF_FIB_LOOKUP_SKIP_NEIGH,
> > +	  .mark = MARK, },
> >   };
> > -static int ifindex;
> > -
> >   static int setup_netns(void)
> >   {
> >   	int err;
> > @@ -144,12 +196,40 @@ static int setup_netns(void)
> >   	if (!ASSERT_OK(err, "write_sysctl(net.ipv6.conf.veth1.forwarding)"))
> >   		goto fail;
> > +	/* Setup for policy routing tests */
> > +	SYS(fail, "ip link add veth3 type veth peer name veth4");
> > +	SYS(fail, "ip link set dev veth3 up");
> > +	SYS(fail, "ip link set dev veth4 netns %s up", NS_REMOTE);
> > +
> > +	SYS(fail, "ip addr add %s/24 dev veth3", IPV4_LOCAL);
> > +	SYS(fail, "ip netns exec %s ip addr add %s/24 dev veth4", NS_REMOTE, IPV4_GW1);
> > +	SYS(fail, "ip netns exec %s ip addr add %s/24 dev veth4", NS_REMOTE, IPV4_GW2);
> > +	SYS(fail, "ip addr add %s/64 dev veth3 nodad", IPV6_LOCAL);
> > +	SYS(fail, "ip netns exec %s ip addr add %s/64 dev veth4 nodad", NS_REMOTE, IPV6_GW1);
> > +	SYS(fail, "ip netns exec %s ip addr add %s/64 dev veth4 nodad", NS_REMOTE, IPV6_GW2);
> 
> Trying to see if the setup can be simplified.
> 
> Does it need to add another netns and setup a reachable IPV[46]_GW[12] gateway?
> 
> The test is not sending any traffic and it is a BPF_FIB_LOOKUP_SKIP_NEIGH test.

I think this will not work without another namespace, as FIB lookup will
return DST="final destination", not DST="gateway", as the gateway is in the
same namespace and can be skipped.

Instead of adding a new namespace I can move the second interface to the
root namespace. This will work, but then we're interfering with the root
namespace.

> > +	SYS(fail, "ip route add %s/32 via %s", IPV4_REMOTE_DST, IPV4_GW1);
> > +	SYS(fail, "ip route add %s/32 via %s table %s", IPV4_REMOTE_DST, IPV4_GW2, MARK_TABLE);
> > +	SYS(fail, "ip -6 route add %s/128 via %s", IPV6_REMOTE_DST, IPV6_GW1);
> > +	SYS(fail, "ip -6 route add %s/128 via %s table %s", IPV6_REMOTE_DST, IPV6_GW2, MARK_TABLE);
> > +	SYS(fail, "ip rule add prio 2 fwmark %d lookup %s", MARK, MARK_TABLE);
> > +	SYS(fail, "ip -6 rule add prio 2 fwmark %d lookup %s", MARK, MARK_TABLE);
> > +
> > +	err = write_sysctl("/proc/sys/net/ipv4/conf/veth3/forwarding", "1");
> > +	if (!ASSERT_OK(err, "write_sysctl(net.ipv4.conf.veth3.forwarding)"))
> > +		goto fail;
> > +
> > +	err = write_sysctl("/proc/sys/net/ipv6/conf/veth3/forwarding", "1");
> > +	if (!ASSERT_OK(err, "write_sysctl(net.ipv6.conf.veth3.forwarding)"))
> > +		goto fail;
> > +
> >   	return 0;
> >   fail:
> >   	return -1;
> >   }
> 
> [ ... ]
> 
> > @@ -248,6 +337,7 @@ void test_fib_lookup(void)
> >   	prog_fd = bpf_program__fd(skel->progs.fib_lookup);
> >   	SYS(fail, "ip netns add %s", NS_TEST);
> > +	SYS(fail, "ip netns add %s", NS_REMOTE);
> 
> 

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v1 bpf-next 1/2] bpf: add support for passing mark with bpf_fib_lookup
  2024-03-22 14:02 ` [PATCH v1 bpf-next 1/2] bpf: add support for passing mark with bpf_fib_lookup Anton Protopopov
@ 2024-03-24 17:38   ` David Ahern
  2024-03-25 12:19     ` Anton Protopopov
  0 siblings, 1 reply; 9+ messages in thread
From: David Ahern @ 2024-03-24 17:38 UTC (permalink / raw)
  To: Anton Protopopov, Alexei Starovoitov, Andrii Nakryiko,
	Daniel Borkmann, Jiri Olsa, Martin KaFai Lau, Stanislav Fomichev,
	bpf
  Cc: Rumen Telbizov, netdev

On 3/22/24 8:02 AM, Anton Protopopov wrote:
> Extend the bpf_fib_lookup() helper by making it to utilize mark if
> the BPF_FIB_LOOKUP_MARK flag is set. In order to pass the mark the
> four bytes of struct bpf_fib_lookup are used, shared with the
> output-only smac/dmac fields.
> 
> Signed-off-by: Anton Protopopov <aspsk@isovalent.com>
> Acked-by: Daniel Borkmann <daniel@iogearbox.net>
> ---
>  include/uapi/linux/bpf.h       | 20 ++++++++++++++++++--
>  net/core/filter.c              | 12 +++++++++---
>  tools/include/uapi/linux/bpf.h | 20 ++++++++++++++++++--
>  3 files changed, 45 insertions(+), 7 deletions(-)
> 
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 9585f5345353..96d57e483133 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -3394,6 +3394,10 @@ union bpf_attr {
>   *			for the nexthop. If the src addr cannot be derived,
>   *			**BPF_FIB_LKUP_RET_NO_SRC_ADDR** is returned. In this
>   *			case, *params*->dmac and *params*->smac are not set either.
> + *		**BPF_FIB_LOOKUP_MARK**
> + *			Use the mark present in *params*->mark for the fib lookup.
> + *			This option should not be used with BPF_FIB_LOOKUP_DIRECT,
> + *			as it only has meaning for full lookups.
>   *
>   *		*ctx* is either **struct xdp_md** for XDP programs or
>   *		**struct sk_buff** tc cls_act programs.
> @@ -7120,6 +7124,7 @@ enum {
>  	BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2),
>  	BPF_FIB_LOOKUP_TBID    = (1U << 3),
>  	BPF_FIB_LOOKUP_SRC     = (1U << 4),
> +	BPF_FIB_LOOKUP_MARK    = (1U << 5),
>  };
>  
>  enum {
> @@ -7197,8 +7202,19 @@ struct bpf_fib_lookup {
>  		__u32	tbid;
>  	};
>  
> -	__u8	smac[6];     /* ETH_ALEN */
> -	__u8	dmac[6];     /* ETH_ALEN */
> +	union {
> +		/* input */
> +		struct {
> +			__u32	mark;   /* policy routing */
> +			/* 2 4-byte holes for input */
> +		};
> +
> +		/* output: source and dest mac */
> +		struct {
> +			__u8	smac[6];	/* ETH_ALEN */
> +			__u8	dmac[6];	/* ETH_ALEN */
> +		};
> +	};
>  };
>  
>  struct bpf_redir_neigh {
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 0c66e4a3fc5b..1205dd777dc2 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -5884,7 +5884,10 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
>  
>  		err = fib_table_lookup(tb, &fl4, &res, FIB_LOOKUP_NOREF);
>  	} else {
> -		fl4.flowi4_mark = 0;
> +		if (flags & BPF_FIB_LOOKUP_MARK)
> +			fl4.flowi4_mark = params->mark;
> +		else
> +			fl4.flowi4_mark = 0;
>  		fl4.flowi4_secid = 0;
>  		fl4.flowi4_tun_key.tun_id = 0;
>  		fl4.flowi4_uid = sock_net_uid(net, NULL);
> @@ -6027,7 +6030,10 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
>  		err = ipv6_stub->fib6_table_lookup(net, tb, oif, &fl6, &res,
>  						   strict);
>  	} else {
> -		fl6.flowi6_mark = 0;
> +		if (flags & BPF_FIB_LOOKUP_MARK)
> +			fl6.flowi6_mark = params->mark;
> +		else
> +			fl6.flowi6_mark = 0;
>  		fl6.flowi6_secid = 0;
>  		fl6.flowi6_tun_key.tun_id = 0;
>  		fl6.flowi6_uid = sock_net_uid(net, NULL);
> @@ -6105,7 +6111,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
>  
>  #define BPF_FIB_LOOKUP_MASK (BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT | \
>  			     BPF_FIB_LOOKUP_SKIP_NEIGH | BPF_FIB_LOOKUP_TBID | \
> -			     BPF_FIB_LOOKUP_SRC)
> +			     BPF_FIB_LOOKUP_SRC | BPF_FIB_LOOKUP_MARK)
>  
>  BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx,
>  	   struct bpf_fib_lookup *, params, int, plen, u32, flags)
> diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
> index bf80b614c4db..4c9b5bfbd9c6 100644
> --- a/tools/include/uapi/linux/bpf.h
> +++ b/tools/include/uapi/linux/bpf.h
> @@ -3393,6 +3393,10 @@ union bpf_attr {
>   *			for the nexthop. If the src addr cannot be derived,
>   *			**BPF_FIB_LKUP_RET_NO_SRC_ADDR** is returned. In this
>   *			case, *params*->dmac and *params*->smac are not set either.
> + *		**BPF_FIB_LOOKUP_MARK**
> + *			Use the mark present in *params*->mark for the fib lookup.
> + *			This option should not be used with BPF_FIB_LOOKUP_DIRECT,
> + *			as it only has meaning for full lookups.
>   *
>   *		*ctx* is either **struct xdp_md** for XDP programs or
>   *		**struct sk_buff** tc cls_act programs.
> @@ -7119,6 +7123,7 @@ enum {
>  	BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2),
>  	BPF_FIB_LOOKUP_TBID    = (1U << 3),
>  	BPF_FIB_LOOKUP_SRC     = (1U << 4),
> +	BPF_FIB_LOOKUP_MARK    = (1U << 5),
>  };
>  
>  enum {
> @@ -7196,8 +7201,19 @@ struct bpf_fib_lookup {
>  		__u32	tbid;
>  	};
>  
> -	__u8	smac[6];     /* ETH_ALEN */
> -	__u8	dmac[6];     /* ETH_ALEN */
> +	union {
> +		/* input */
> +		struct {
> +			__u32	mark;   /* policy routing */
> +			/* 2 4-byte holes for input */
> +		};
> +
> +		/* output: source and dest mac */
> +		struct {
> +			__u8	smac[6];	/* ETH_ALEN */
> +			__u8	dmac[6];	/* ETH_ALEN */
> +		};
> +	};
>  };
>  
>  struct bpf_redir_neigh {

It would be good to add

static_assert(sizeof(struct bpf_fib_lookup) == 64, "bpf_fib_lookup size
check");

to ensure this struct never exceeds a cacheline.

The patch itself looks good to me:

Reviewed-by: David Ahern <dsahern@kernel.org>


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v1 bpf-next 1/2] bpf: add support for passing mark with bpf_fib_lookup
  2024-03-24 17:38   ` David Ahern
@ 2024-03-25 12:19     ` Anton Protopopov
  0 siblings, 0 replies; 9+ messages in thread
From: Anton Protopopov @ 2024-03-25 12:19 UTC (permalink / raw)
  To: David Ahern
  Cc: Alexei Starovoitov, Andrii Nakryiko, Daniel Borkmann, Jiri Olsa,
	Martin KaFai Lau, Stanislav Fomichev, bpf, Rumen Telbizov, netdev

On Sun, Mar 24, 2024 at 11:38:44AM -0600, David Ahern wrote:
> On 3/22/24 8:02 AM, Anton Protopopov wrote:
> > Extend the bpf_fib_lookup() helper by making it to utilize mark if
> > the BPF_FIB_LOOKUP_MARK flag is set. In order to pass the mark the
> > four bytes of struct bpf_fib_lookup are used, shared with the
> > output-only smac/dmac fields.
> > 
> > Signed-off-by: Anton Protopopov <aspsk@isovalent.com>
> > Acked-by: Daniel Borkmann <daniel@iogearbox.net>
> > ---
> >  include/uapi/linux/bpf.h       | 20 ++++++++++++++++++--
> >  net/core/filter.c              | 12 +++++++++---
> >  tools/include/uapi/linux/bpf.h | 20 ++++++++++++++++++--
> >  3 files changed, 45 insertions(+), 7 deletions(-)
> > 
> > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> > index 9585f5345353..96d57e483133 100644
> > --- a/include/uapi/linux/bpf.h
> > +++ b/include/uapi/linux/bpf.h
> > @@ -3394,6 +3394,10 @@ union bpf_attr {
> >   *			for the nexthop. If the src addr cannot be derived,
> >   *			**BPF_FIB_LKUP_RET_NO_SRC_ADDR** is returned. In this
> >   *			case, *params*->dmac and *params*->smac are not set either.
> > + *		**BPF_FIB_LOOKUP_MARK**
> > + *			Use the mark present in *params*->mark for the fib lookup.
> > + *			This option should not be used with BPF_FIB_LOOKUP_DIRECT,
> > + *			as it only has meaning for full lookups.
> >   *
> >   *		*ctx* is either **struct xdp_md** for XDP programs or
> >   *		**struct sk_buff** tc cls_act programs.
> > @@ -7120,6 +7124,7 @@ enum {
> >  	BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2),
> >  	BPF_FIB_LOOKUP_TBID    = (1U << 3),
> >  	BPF_FIB_LOOKUP_SRC     = (1U << 4),
> > +	BPF_FIB_LOOKUP_MARK    = (1U << 5),
> >  };
> >  
> >  enum {
> > @@ -7197,8 +7202,19 @@ struct bpf_fib_lookup {
> >  		__u32	tbid;
> >  	};
> >  
> > -	__u8	smac[6];     /* ETH_ALEN */
> > -	__u8	dmac[6];     /* ETH_ALEN */
> > +	union {
> > +		/* input */
> > +		struct {
> > +			__u32	mark;   /* policy routing */
> > +			/* 2 4-byte holes for input */
> > +		};
> > +
> > +		/* output: source and dest mac */
> > +		struct {
> > +			__u8	smac[6];	/* ETH_ALEN */
> > +			__u8	dmac[6];	/* ETH_ALEN */
> > +		};
> > +	};
> >  };
> >  
> >  struct bpf_redir_neigh {
> > diff --git a/net/core/filter.c b/net/core/filter.c
> > index 0c66e4a3fc5b..1205dd777dc2 100644
> > --- a/net/core/filter.c
> > +++ b/net/core/filter.c
> > @@ -5884,7 +5884,10 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
> >  
> >  		err = fib_table_lookup(tb, &fl4, &res, FIB_LOOKUP_NOREF);
> >  	} else {
> > -		fl4.flowi4_mark = 0;
> > +		if (flags & BPF_FIB_LOOKUP_MARK)
> > +			fl4.flowi4_mark = params->mark;
> > +		else
> > +			fl4.flowi4_mark = 0;
> >  		fl4.flowi4_secid = 0;
> >  		fl4.flowi4_tun_key.tun_id = 0;
> >  		fl4.flowi4_uid = sock_net_uid(net, NULL);
> > @@ -6027,7 +6030,10 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
> >  		err = ipv6_stub->fib6_table_lookup(net, tb, oif, &fl6, &res,
> >  						   strict);
> >  	} else {
> > -		fl6.flowi6_mark = 0;
> > +		if (flags & BPF_FIB_LOOKUP_MARK)
> > +			fl6.flowi6_mark = params->mark;
> > +		else
> > +			fl6.flowi6_mark = 0;
> >  		fl6.flowi6_secid = 0;
> >  		fl6.flowi6_tun_key.tun_id = 0;
> >  		fl6.flowi6_uid = sock_net_uid(net, NULL);
> > @@ -6105,7 +6111,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
> >  
> >  #define BPF_FIB_LOOKUP_MASK (BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT | \
> >  			     BPF_FIB_LOOKUP_SKIP_NEIGH | BPF_FIB_LOOKUP_TBID | \
> > -			     BPF_FIB_LOOKUP_SRC)
> > +			     BPF_FIB_LOOKUP_SRC | BPF_FIB_LOOKUP_MARK)
> >  
> >  BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx,
> >  	   struct bpf_fib_lookup *, params, int, plen, u32, flags)
> > diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
> > index bf80b614c4db..4c9b5bfbd9c6 100644
> > --- a/tools/include/uapi/linux/bpf.h
> > +++ b/tools/include/uapi/linux/bpf.h
> > @@ -3393,6 +3393,10 @@ union bpf_attr {
> >   *			for the nexthop. If the src addr cannot be derived,
> >   *			**BPF_FIB_LKUP_RET_NO_SRC_ADDR** is returned. In this
> >   *			case, *params*->dmac and *params*->smac are not set either.
> > + *		**BPF_FIB_LOOKUP_MARK**
> > + *			Use the mark present in *params*->mark for the fib lookup.
> > + *			This option should not be used with BPF_FIB_LOOKUP_DIRECT,
> > + *			as it only has meaning for full lookups.
> >   *
> >   *		*ctx* is either **struct xdp_md** for XDP programs or
> >   *		**struct sk_buff** tc cls_act programs.
> > @@ -7119,6 +7123,7 @@ enum {
> >  	BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2),
> >  	BPF_FIB_LOOKUP_TBID    = (1U << 3),
> >  	BPF_FIB_LOOKUP_SRC     = (1U << 4),
> > +	BPF_FIB_LOOKUP_MARK    = (1U << 5),
> >  };
> >  
> >  enum {
> > @@ -7196,8 +7201,19 @@ struct bpf_fib_lookup {
> >  		__u32	tbid;
> >  	};
> >  
> > -	__u8	smac[6];     /* ETH_ALEN */
> > -	__u8	dmac[6];     /* ETH_ALEN */
> > +	union {
> > +		/* input */
> > +		struct {
> > +			__u32	mark;   /* policy routing */
> > +			/* 2 4-byte holes for input */
> > +		};
> > +
> > +		/* output: source and dest mac */
> > +		struct {
> > +			__u8	smac[6];	/* ETH_ALEN */
> > +			__u8	dmac[6];	/* ETH_ALEN */
> > +		};
> > +	};
> >  };
> >  
> >  struct bpf_redir_neigh {
> 
> It would be good to add
> 
> static_assert(sizeof(struct bpf_fib_lookup) == 64, "bpf_fib_lookup size
> check");
> 
> to ensure this struct never exceeds a cacheline.

Thanks, added: https://github.com/aspsk/bpf-next/commit/7cd3685e52d5

> 
> The patch itself looks good to me:
> 
> Reviewed-by: David Ahern <dsahern@kernel.org>

Thanks!

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v1 bpf-next 2/2] selftests/bpf: Add BPF_FIB_LOOKUP_MARK tests
  2024-03-24 15:04     ` Anton Protopopov
@ 2024-03-25 18:12       ` Martin KaFai Lau
  2024-03-25 20:03         ` Anton Protopopov
  0 siblings, 1 reply; 9+ messages in thread
From: Martin KaFai Lau @ 2024-03-25 18:12 UTC (permalink / raw)
  To: Anton Protopopov
  Cc: Rumen Telbizov, David Ahern, netdev, Alexei Starovoitov,
	Andrii Nakryiko, Daniel Borkmann, Jiri Olsa, Stanislav Fomichev,
	bpf

On 3/24/24 8:04 AM, Anton Protopopov wrote:
> On Sat, Mar 23, 2024 at 03:34:10PM -0700, Martin KaFai Lau wrote:
>> On 3/22/24 7:02 AM, Anton Protopopov wrote:
>>> This patch extends the fib_lookup test suite by adding a few test
>>> cases for each IP family to test the new BPF_FIB_LOOKUP_MARK flag
>>> to the bpf_fib_lookup:
>>>
>>>     * Test destination IP address selection with and without a mark
>>>       and/or the BPF_FIB_LOOKUP_MARK flag set
>>>
>>> To test this functionality another network namespace and a new veth
>>> pair were added to the test.
>>>
>>
>> [ ... ]
>>
>>>    static const struct fib_lookup_test tests[] = {
>>> @@ -90,10 +105,47 @@ static const struct fib_lookup_test tests[] = {
>>>    	  .daddr = IPV6_ADDR_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
>>>    	  .expected_src = IPV6_IFACE_ADDR_SEC,
>>>    	  .lookup_flags = BPF_FIB_LOOKUP_SRC | BPF_FIB_LOOKUP_SKIP_NEIGH, },
>>> +	/* policy routing */
>>> +	{ .desc = "IPv4 policy routing, default",
>>> +	  .daddr = IPV4_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
>>> +	  .expected_dst = IPV4_GW1, .ifname = "veth3",
>>> +	  .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH, },
>>> +	{ .desc = "IPv4 policy routing, mark doesn't point to a policy",
>>> +	  .daddr = IPV4_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
>>> +	  .expected_dst = IPV4_GW1, .ifname = "veth3",
>>> +	  .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH,
>>> +	  .mark = MARK_NO_POLICY, },
>>> +	{ .desc = "IPv4 policy routing, mark points to a policy",
>>> +	  .daddr = IPV4_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
>>> +	  .expected_dst = IPV4_GW2, .ifname = "veth3",
>>> +	  .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH,
>>> +	  .mark = MARK, },
>>> +	{ .desc = "IPv4 policy routing, mark points to a policy, but no flag",
>>> +	  .daddr = IPV4_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
>>> +	  .expected_dst = IPV4_GW1, .ifname = "veth3",
>>> +	  .lookup_flags = BPF_FIB_LOOKUP_SKIP_NEIGH,
>>> +	  .mark = MARK, },
>>> +	{ .desc = "IPv6 policy routing, default",
>>> +	  .daddr = IPV6_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
>>> +	  .expected_dst = IPV6_GW1, .ifname = "veth3",
>>> +	  .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH, },
>>> +	{ .desc = "IPv6 policy routing, mark doesn't point to a policy",
>>> +	  .daddr = IPV6_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
>>> +	  .expected_dst = IPV6_GW1, .ifname = "veth3",
>>> +	  .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH,
>>> +	  .mark = MARK_NO_POLICY, },
>>> +	{ .desc = "IPv6 policy routing, mark points to a policy",
>>> +	  .daddr = IPV6_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
>>> +	  .expected_dst = IPV6_GW2, .ifname = "veth3",
>>> +	  .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH,
>>> +	  .mark = MARK, },
>>> +	{ .desc = "IPv6 policy routing, mark points to a policy, but no flag",
>>> +	  .daddr = IPV6_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
>>> +	  .expected_dst = IPV6_GW1, .ifname = "veth3",
>>> +	  .lookup_flags = BPF_FIB_LOOKUP_SKIP_NEIGH,
>>> +	  .mark = MARK, },
>>>    };
>>> -static int ifindex;
>>> -
>>>    static int setup_netns(void)
>>>    {
>>>    	int err;
>>> @@ -144,12 +196,40 @@ static int setup_netns(void)
>>>    	if (!ASSERT_OK(err, "write_sysctl(net.ipv6.conf.veth1.forwarding)"))
>>>    		goto fail;
>>> +	/* Setup for policy routing tests */
>>> +	SYS(fail, "ip link add veth3 type veth peer name veth4");
>>> +	SYS(fail, "ip link set dev veth3 up");
>>> +	SYS(fail, "ip link set dev veth4 netns %s up", NS_REMOTE);
>>> +
>>> +	SYS(fail, "ip addr add %s/24 dev veth3", IPV4_LOCAL);
>>> +	SYS(fail, "ip netns exec %s ip addr add %s/24 dev veth4", NS_REMOTE, IPV4_GW1);
>>> +	SYS(fail, "ip netns exec %s ip addr add %s/24 dev veth4", NS_REMOTE, IPV4_GW2);
>>> +	SYS(fail, "ip addr add %s/64 dev veth3 nodad", IPV6_LOCAL);
>>> +	SYS(fail, "ip netns exec %s ip addr add %s/64 dev veth4 nodad", NS_REMOTE, IPV6_GW1);
>>> +	SYS(fail, "ip netns exec %s ip addr add %s/64 dev veth4 nodad", NS_REMOTE, IPV6_GW2);
>>
>> Trying to see if the setup can be simplified.
>>
>> Does it need to add another netns and setup a reachable IPV[46]_GW[12] gateway?
>>
>> The test is not sending any traffic and it is a BPF_FIB_LOOKUP_SKIP_NEIGH test.
> 
> I think this will not work without another namespace, as FIB lookup will
> return DST="final destination", not DST="gateway", as the gateway is in the
> same namespace and can be skipped.

hmm... not sure I understand why it would get "final destination". Am I missing something?
To be specific, there is no need to configure the IPV[46]_GW[12] address:

-	SYS(fail, "ip link set dev veth4 netns %s up", NS_REMOTE);

	SYS(fail, "ip addr add %s/24 dev veth3", IPV4_LOCAL);
-	SYS(fail, "ip netns exec %s ip addr add %s/24 dev veth4", NS_REMOTE, IPV4_GW1);
-	SYS(fail, "ip netns exec %s ip addr add %s/24 dev veth4", NS_REMOTE, IPV4_GW2);
	SYS(fail, "ip addr add %s/64 dev veth3 nodad", IPV6_LOCAL);
-	SYS(fail, "ip netns exec %s ip addr add %s/64 dev veth4 nodad", NS_REMOTE, IPV6_GW1);
-	SYS(fail, "ip netns exec %s ip addr add %s/64 dev veth4 nodad", NS_REMOTE, IPV6_GW2);
	SYS(fail, "ip route add %s/32 via %s", IPV4_REMOTE_DST, IPV4_GW1);
	SYS(fail, "ip route add %s/32 via %s table %s", IPV4_REMOTE_DST, IPV4_GW2, MARK_TABLE);
	SYS(fail, "ip -6 route add %s/128 via %s", IPV6_REMOTE_DST, IPV6_GW1);
	SYS(fail, "ip -6 route add %s/128 via %s table %s", IPV6_REMOTE_DST, IPV6_GW2, MARK_TABLE);
	SYS(fail, "ip rule add prio 2 fwmark %d lookup %s", MARK, MARK_TABLE);
	SYS(fail, "ip -6 rule add prio 2 fwmark %d lookup %s", MARK, MARK_TABLE);

[root@arch-fb-vm1 ~]# ip netns exec fib_lookup_ns /bin/bash

[root@arch-fb-vm1 ~]# ip -6 rule
0:	from all lookup local
2:	from all fwmark 0x2a lookup 200
32766:	from all lookup main

[root@arch-fb-vm1 ~]# ip -6 route show table main
be:ef::b0:10 via fd01::1 dev veth3 metric 1024 linkdown pref medium

[root@arch-fb-vm1 ~]# ip -6 route show table 200
be:ef::b0:10 via fd01::2 dev veth3 metric 1024 linkdown pref medium

[root@arch-fb-vm1 ~]# ip -6 route get be:ef::b0:10
be:ef::b0:10 from :: via fd01::1 dev veth3 src fd01::3 metric 1024 pref medium

[root@arch-fb-vm1 ~]# ip -6 route get be:ef::b0:10 mark 0x2a
be:ef::b0:10 from :: via fd01::2 dev veth3 table 200 src fd01::3 metric 1024 pref medium

> 
> Instead of adding a new namespace I can move the second interface to the
> root namespace. This will work, but then we're interfering with the root
> namespace.
> 
>>> +	SYS(fail, "ip route add %s/32 via %s", IPV4_REMOTE_DST, IPV4_GW1);
>>> +	SYS(fail, "ip route add %s/32 via %s table %s", IPV4_REMOTE_DST, IPV4_GW2, MARK_TABLE);
>>> +	SYS(fail, "ip -6 route add %s/128 via %s", IPV6_REMOTE_DST, IPV6_GW1);
>>> +	SYS(fail, "ip -6 route add %s/128 via %s table %s", IPV6_REMOTE_DST, IPV6_GW2, MARK_TABLE);
>>> +	SYS(fail, "ip rule add prio 2 fwmark %d lookup %s", MARK, MARK_TABLE);
>>> +	SYS(fail, "ip -6 rule add prio 2 fwmark %d lookup %s", MARK, MARK_TABLE);
>>> +
>>> +	err = write_sysctl("/proc/sys/net/ipv4/conf/veth3/forwarding", "1");
>>> +	if (!ASSERT_OK(err, "write_sysctl(net.ipv4.conf.veth3.forwarding)"))
>>> +		goto fail;
>>> +
>>> +	err = write_sysctl("/proc/sys/net/ipv6/conf/veth3/forwarding", "1");
>>> +	if (!ASSERT_OK(err, "write_sysctl(net.ipv6.conf.veth3.forwarding)"))
>>> +		goto fail;
>>> +
>>>    	return 0;
>>>    fail:
>>>    	return -1;
>>>    }
>>
>> [ ... ]
>>
>>> @@ -248,6 +337,7 @@ void test_fib_lookup(void)
>>>    	prog_fd = bpf_program__fd(skel->progs.fib_lookup);
>>>    	SYS(fail, "ip netns add %s", NS_TEST);
>>> +	SYS(fail, "ip netns add %s", NS_REMOTE);
>>
>>


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v1 bpf-next 2/2] selftests/bpf: Add BPF_FIB_LOOKUP_MARK tests
  2024-03-25 18:12       ` Martin KaFai Lau
@ 2024-03-25 20:03         ` Anton Protopopov
  0 siblings, 0 replies; 9+ messages in thread
From: Anton Protopopov @ 2024-03-25 20:03 UTC (permalink / raw)
  To: Martin KaFai Lau
  Cc: Rumen Telbizov, David Ahern, netdev, Alexei Starovoitov,
	Andrii Nakryiko, Daniel Borkmann, Jiri Olsa, Stanislav Fomichev,
	bpf

On Mon, Mar 25, 2024 at 11:12:39AM -0700, Martin KaFai Lau wrote:
> On 3/24/24 8:04 AM, Anton Protopopov wrote:
> > On Sat, Mar 23, 2024 at 03:34:10PM -0700, Martin KaFai Lau wrote:
> > > On 3/22/24 7:02 AM, Anton Protopopov wrote:
> > > > This patch extends the fib_lookup test suite by adding a few test
> > > > cases for each IP family to test the new BPF_FIB_LOOKUP_MARK flag
> > > > to the bpf_fib_lookup:
> > > > 
> > > >     * Test destination IP address selection with and without a mark
> > > >       and/or the BPF_FIB_LOOKUP_MARK flag set
> > > > 
> > > > To test this functionality another network namespace and a new veth
> > > > pair were added to the test.
> > > > 
> > > 
> > > [ ... ]
> > > 
> > > >    static const struct fib_lookup_test tests[] = {
> > > > @@ -90,10 +105,47 @@ static const struct fib_lookup_test tests[] = {
> > > >    	  .daddr = IPV6_ADDR_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
> > > >    	  .expected_src = IPV6_IFACE_ADDR_SEC,
> > > >    	  .lookup_flags = BPF_FIB_LOOKUP_SRC | BPF_FIB_LOOKUP_SKIP_NEIGH, },
> > > > +	/* policy routing */
> > > > +	{ .desc = "IPv4 policy routing, default",
> > > > +	  .daddr = IPV4_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
> > > > +	  .expected_dst = IPV4_GW1, .ifname = "veth3",
> > > > +	  .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH, },
> > > > +	{ .desc = "IPv4 policy routing, mark doesn't point to a policy",
> > > > +	  .daddr = IPV4_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
> > > > +	  .expected_dst = IPV4_GW1, .ifname = "veth3",
> > > > +	  .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH,
> > > > +	  .mark = MARK_NO_POLICY, },
> > > > +	{ .desc = "IPv4 policy routing, mark points to a policy",
> > > > +	  .daddr = IPV4_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
> > > > +	  .expected_dst = IPV4_GW2, .ifname = "veth3",
> > > > +	  .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH,
> > > > +	  .mark = MARK, },
> > > > +	{ .desc = "IPv4 policy routing, mark points to a policy, but no flag",
> > > > +	  .daddr = IPV4_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
> > > > +	  .expected_dst = IPV4_GW1, .ifname = "veth3",
> > > > +	  .lookup_flags = BPF_FIB_LOOKUP_SKIP_NEIGH,
> > > > +	  .mark = MARK, },
> > > > +	{ .desc = "IPv6 policy routing, default",
> > > > +	  .daddr = IPV6_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
> > > > +	  .expected_dst = IPV6_GW1, .ifname = "veth3",
> > > > +	  .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH, },
> > > > +	{ .desc = "IPv6 policy routing, mark doesn't point to a policy",
> > > > +	  .daddr = IPV6_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
> > > > +	  .expected_dst = IPV6_GW1, .ifname = "veth3",
> > > > +	  .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH,
> > > > +	  .mark = MARK_NO_POLICY, },
> > > > +	{ .desc = "IPv6 policy routing, mark points to a policy",
> > > > +	  .daddr = IPV6_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
> > > > +	  .expected_dst = IPV6_GW2, .ifname = "veth3",
> > > > +	  .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH,
> > > > +	  .mark = MARK, },
> > > > +	{ .desc = "IPv6 policy routing, mark points to a policy, but no flag",
> > > > +	  .daddr = IPV6_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
> > > > +	  .expected_dst = IPV6_GW1, .ifname = "veth3",
> > > > +	  .lookup_flags = BPF_FIB_LOOKUP_SKIP_NEIGH,
> > > > +	  .mark = MARK, },
> > > >    };
> > > > -static int ifindex;
> > > > -
> > > >    static int setup_netns(void)
> > > >    {
> > > >    	int err;
> > > > @@ -144,12 +196,40 @@ static int setup_netns(void)
> > > >    	if (!ASSERT_OK(err, "write_sysctl(net.ipv6.conf.veth1.forwarding)"))
> > > >    		goto fail;
> > > > +	/* Setup for policy routing tests */
> > > > +	SYS(fail, "ip link add veth3 type veth peer name veth4");
> > > > +	SYS(fail, "ip link set dev veth3 up");
> > > > +	SYS(fail, "ip link set dev veth4 netns %s up", NS_REMOTE);
> > > > +
> > > > +	SYS(fail, "ip addr add %s/24 dev veth3", IPV4_LOCAL);
> > > > +	SYS(fail, "ip netns exec %s ip addr add %s/24 dev veth4", NS_REMOTE, IPV4_GW1);
> > > > +	SYS(fail, "ip netns exec %s ip addr add %s/24 dev veth4", NS_REMOTE, IPV4_GW2);
> > > > +	SYS(fail, "ip addr add %s/64 dev veth3 nodad", IPV6_LOCAL);
> > > > +	SYS(fail, "ip netns exec %s ip addr add %s/64 dev veth4 nodad", NS_REMOTE, IPV6_GW1);
> > > > +	SYS(fail, "ip netns exec %s ip addr add %s/64 dev veth4 nodad", NS_REMOTE, IPV6_GW2);
> > > 
> > > Trying to see if the setup can be simplified.
> > > 
> > > Does it need to add another netns and setup a reachable IPV[46]_GW[12] gateway?
> > > 
> > > The test is not sending any traffic and it is a BPF_FIB_LOOKUP_SKIP_NEIGH test.
> > 
> > I think this will not work without another namespace, as FIB lookup will
> > return DST="final destination", not DST="gateway", as the gateway is in the
> > same namespace and can be skipped.
> 
> hmm... not sure I understand why it would get "final destination". Am I missing something?
> To be specific, there is no need to configure the IPV[46]_GW[12] address:
> 
> -	SYS(fail, "ip link set dev veth4 netns %s up", NS_REMOTE);
> 
> 	SYS(fail, "ip addr add %s/24 dev veth3", IPV4_LOCAL);
> -	SYS(fail, "ip netns exec %s ip addr add %s/24 dev veth4", NS_REMOTE, IPV4_GW1);
> -	SYS(fail, "ip netns exec %s ip addr add %s/24 dev veth4", NS_REMOTE, IPV4_GW2);
> 	SYS(fail, "ip addr add %s/64 dev veth3 nodad", IPV6_LOCAL);
> -	SYS(fail, "ip netns exec %s ip addr add %s/64 dev veth4 nodad", NS_REMOTE, IPV6_GW1);
> -	SYS(fail, "ip netns exec %s ip addr add %s/64 dev veth4 nodad", NS_REMOTE, IPV6_GW2);
> 	SYS(fail, "ip route add %s/32 via %s", IPV4_REMOTE_DST, IPV4_GW1);
> 	SYS(fail, "ip route add %s/32 via %s table %s", IPV4_REMOTE_DST, IPV4_GW2, MARK_TABLE);
> 	SYS(fail, "ip -6 route add %s/128 via %s", IPV6_REMOTE_DST, IPV6_GW1);
> 	SYS(fail, "ip -6 route add %s/128 via %s table %s", IPV6_REMOTE_DST, IPV6_GW2, MARK_TABLE);
> 	SYS(fail, "ip rule add prio 2 fwmark %d lookup %s", MARK, MARK_TABLE);
> 	SYS(fail, "ip -6 rule add prio 2 fwmark %d lookup %s", MARK, MARK_TABLE);
> 
> [root@arch-fb-vm1 ~]# ip netns exec fib_lookup_ns /bin/bash
> 
> [root@arch-fb-vm1 ~]# ip -6 rule
> 0:	from all lookup local
> 2:	from all fwmark 0x2a lookup 200
> 32766:	from all lookup main
> 
> [root@arch-fb-vm1 ~]# ip -6 route show table main
> be:ef::b0:10 via fd01::1 dev veth3 metric 1024 linkdown pref medium
> 
> [root@arch-fb-vm1 ~]# ip -6 route show table 200
> be:ef::b0:10 via fd01::2 dev veth3 metric 1024 linkdown pref medium
> 
> [root@arch-fb-vm1 ~]# ip -6 route get be:ef::b0:10
> be:ef::b0:10 from :: via fd01::1 dev veth3 src fd01::3 metric 1024 pref medium
> 
> [root@arch-fb-vm1 ~]# ip -6 route get be:ef::b0:10 mark 0x2a
> be:ef::b0:10 from :: via fd01::2 dev veth3 table 200 src fd01::3 metric 1024 pref medium

Ok, thanks. I will send a v2 with a simplified test

> > 
> > Instead of adding a new namespace I can move the second interface to the
> > root namespace. This will work, but then we're interfering with the root
> > namespace.
> > 
> > > > +	SYS(fail, "ip route add %s/32 via %s", IPV4_REMOTE_DST, IPV4_GW1);
> > > > +	SYS(fail, "ip route add %s/32 via %s table %s", IPV4_REMOTE_DST, IPV4_GW2, MARK_TABLE);
> > > > +	SYS(fail, "ip -6 route add %s/128 via %s", IPV6_REMOTE_DST, IPV6_GW1);
> > > > +	SYS(fail, "ip -6 route add %s/128 via %s table %s", IPV6_REMOTE_DST, IPV6_GW2, MARK_TABLE);
> > > > +	SYS(fail, "ip rule add prio 2 fwmark %d lookup %s", MARK, MARK_TABLE);
> > > > +	SYS(fail, "ip -6 rule add prio 2 fwmark %d lookup %s", MARK, MARK_TABLE);
> > > > +
> > > > +	err = write_sysctl("/proc/sys/net/ipv4/conf/veth3/forwarding", "1");
> > > > +	if (!ASSERT_OK(err, "write_sysctl(net.ipv4.conf.veth3.forwarding)"))
> > > > +		goto fail;
> > > > +
> > > > +	err = write_sysctl("/proc/sys/net/ipv6/conf/veth3/forwarding", "1");
> > > > +	if (!ASSERT_OK(err, "write_sysctl(net.ipv6.conf.veth3.forwarding)"))
> > > > +		goto fail;
> > > > +
> > > >    	return 0;
> > > >    fail:
> > > >    	return -1;
> > > >    }
> > > 
> > > [ ... ]
> > > 
> > > > @@ -248,6 +337,7 @@ void test_fib_lookup(void)
> > > >    	prog_fd = bpf_program__fd(skel->progs.fib_lookup);
> > > >    	SYS(fail, "ip netns add %s", NS_TEST);
> > > > +	SYS(fail, "ip netns add %s", NS_REMOTE);
> > > 
> > > 
> 

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2024-03-25 20:01 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-03-22 14:02 [PATCH v1 bpf-next 0/2] BPF: support mark in bpf_fib_lookup Anton Protopopov
2024-03-22 14:02 ` [PATCH v1 bpf-next 1/2] bpf: add support for passing mark with bpf_fib_lookup Anton Protopopov
2024-03-24 17:38   ` David Ahern
2024-03-25 12:19     ` Anton Protopopov
2024-03-22 14:02 ` [PATCH v1 bpf-next 2/2] selftests/bpf: Add BPF_FIB_LOOKUP_MARK tests Anton Protopopov
2024-03-23 22:34   ` Martin KaFai Lau
2024-03-24 15:04     ` Anton Protopopov
2024-03-25 18:12       ` Martin KaFai Lau
2024-03-25 20:03         ` Anton Protopopov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).