* [PATCH v1 bpf-next 0/2] BPF: support mark in bpf_fib_lookup
@ 2024-03-22 14:02 Anton Protopopov
2024-03-22 14:02 ` [PATCH v1 bpf-next 1/2] bpf: add support for passing mark with bpf_fib_lookup Anton Protopopov
2024-03-22 14:02 ` [PATCH v1 bpf-next 2/2] selftests/bpf: Add BPF_FIB_LOOKUP_MARK tests Anton Protopopov
0 siblings, 2 replies; 9+ messages in thread
From: Anton Protopopov @ 2024-03-22 14:02 UTC (permalink / raw)
To: Alexei Starovoitov, Andrii Nakryiko, Daniel Borkmann, Jiri Olsa,
Martin KaFai Lau, Stanislav Fomichev, bpf
Cc: Anton Protopopov, Rumen Telbizov, David Ahern, netdev
This patch series adds policy routing support in bpf_fib_lookup.
This is a useful functionality which was missing for a long time,
as without it some networking setups can't be implemented in BPF.
One example can be found here [1].
A while ago there was an attempt to add this functionality [2] by
Rumen Telbizov and David Ahern. I've completely refactored the code,
except that the changes to the struct bpf_fib_lookup were copy-pasted
from the original patch.
The first patch implements the functionality, the second patch adds
a few selftests.
[1] https://github.com/cilium/cilium/pull/12770
[2] https://lore.kernel.org/all/20210629185537.78008-2-rumen.telbizov@menlosecurity.com/
Anton Protopopov (2):
bpf: add support for passing mark with bpf_fib_lookup
selftests/bpf: Add BPF_FIB_LOOKUP_MARK tests
include/uapi/linux/bpf.h | 20 ++-
net/core/filter.c | 12 +-
tools/include/uapi/linux/bpf.h | 20 ++-
.../selftests/bpf/prog_tests/fib_lookup.c | 160 ++++++++++++++----
4 files changed, 176 insertions(+), 36 deletions(-)
--
2.34.1
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH v1 bpf-next 1/2] bpf: add support for passing mark with bpf_fib_lookup
2024-03-22 14:02 [PATCH v1 bpf-next 0/2] BPF: support mark in bpf_fib_lookup Anton Protopopov
@ 2024-03-22 14:02 ` Anton Protopopov
2024-03-24 17:38 ` David Ahern
2024-03-22 14:02 ` [PATCH v1 bpf-next 2/2] selftests/bpf: Add BPF_FIB_LOOKUP_MARK tests Anton Protopopov
1 sibling, 1 reply; 9+ messages in thread
From: Anton Protopopov @ 2024-03-22 14:02 UTC (permalink / raw)
To: Alexei Starovoitov, Andrii Nakryiko, Daniel Borkmann, Jiri Olsa,
Martin KaFai Lau, Stanislav Fomichev, bpf
Cc: Anton Protopopov, Rumen Telbizov, David Ahern, netdev
Extend the bpf_fib_lookup() helper by making it to utilize mark if
the BPF_FIB_LOOKUP_MARK flag is set. In order to pass the mark the
four bytes of struct bpf_fib_lookup are used, shared with the
output-only smac/dmac fields.
Signed-off-by: Anton Protopopov <aspsk@isovalent.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
---
include/uapi/linux/bpf.h | 20 ++++++++++++++++++--
net/core/filter.c | 12 +++++++++---
tools/include/uapi/linux/bpf.h | 20 ++++++++++++++++++--
3 files changed, 45 insertions(+), 7 deletions(-)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 9585f5345353..96d57e483133 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3394,6 +3394,10 @@ union bpf_attr {
* for the nexthop. If the src addr cannot be derived,
* **BPF_FIB_LKUP_RET_NO_SRC_ADDR** is returned. In this
* case, *params*->dmac and *params*->smac are not set either.
+ * **BPF_FIB_LOOKUP_MARK**
+ * Use the mark present in *params*->mark for the fib lookup.
+ * This option should not be used with BPF_FIB_LOOKUP_DIRECT,
+ * as it only has meaning for full lookups.
*
* *ctx* is either **struct xdp_md** for XDP programs or
* **struct sk_buff** tc cls_act programs.
@@ -7120,6 +7124,7 @@ enum {
BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2),
BPF_FIB_LOOKUP_TBID = (1U << 3),
BPF_FIB_LOOKUP_SRC = (1U << 4),
+ BPF_FIB_LOOKUP_MARK = (1U << 5),
};
enum {
@@ -7197,8 +7202,19 @@ struct bpf_fib_lookup {
__u32 tbid;
};
- __u8 smac[6]; /* ETH_ALEN */
- __u8 dmac[6]; /* ETH_ALEN */
+ union {
+ /* input */
+ struct {
+ __u32 mark; /* policy routing */
+ /* 2 4-byte holes for input */
+ };
+
+ /* output: source and dest mac */
+ struct {
+ __u8 smac[6]; /* ETH_ALEN */
+ __u8 dmac[6]; /* ETH_ALEN */
+ };
+ };
};
struct bpf_redir_neigh {
diff --git a/net/core/filter.c b/net/core/filter.c
index 0c66e4a3fc5b..1205dd777dc2 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5884,7 +5884,10 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
err = fib_table_lookup(tb, &fl4, &res, FIB_LOOKUP_NOREF);
} else {
- fl4.flowi4_mark = 0;
+ if (flags & BPF_FIB_LOOKUP_MARK)
+ fl4.flowi4_mark = params->mark;
+ else
+ fl4.flowi4_mark = 0;
fl4.flowi4_secid = 0;
fl4.flowi4_tun_key.tun_id = 0;
fl4.flowi4_uid = sock_net_uid(net, NULL);
@@ -6027,7 +6030,10 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
err = ipv6_stub->fib6_table_lookup(net, tb, oif, &fl6, &res,
strict);
} else {
- fl6.flowi6_mark = 0;
+ if (flags & BPF_FIB_LOOKUP_MARK)
+ fl6.flowi6_mark = params->mark;
+ else
+ fl6.flowi6_mark = 0;
fl6.flowi6_secid = 0;
fl6.flowi6_tun_key.tun_id = 0;
fl6.flowi6_uid = sock_net_uid(net, NULL);
@@ -6105,7 +6111,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
#define BPF_FIB_LOOKUP_MASK (BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT | \
BPF_FIB_LOOKUP_SKIP_NEIGH | BPF_FIB_LOOKUP_TBID | \
- BPF_FIB_LOOKUP_SRC)
+ BPF_FIB_LOOKUP_SRC | BPF_FIB_LOOKUP_MARK)
BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx,
struct bpf_fib_lookup *, params, int, plen, u32, flags)
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index bf80b614c4db..4c9b5bfbd9c6 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3393,6 +3393,10 @@ union bpf_attr {
* for the nexthop. If the src addr cannot be derived,
* **BPF_FIB_LKUP_RET_NO_SRC_ADDR** is returned. In this
* case, *params*->dmac and *params*->smac are not set either.
+ * **BPF_FIB_LOOKUP_MARK**
+ * Use the mark present in *params*->mark for the fib lookup.
+ * This option should not be used with BPF_FIB_LOOKUP_DIRECT,
+ * as it only has meaning for full lookups.
*
* *ctx* is either **struct xdp_md** for XDP programs or
* **struct sk_buff** tc cls_act programs.
@@ -7119,6 +7123,7 @@ enum {
BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2),
BPF_FIB_LOOKUP_TBID = (1U << 3),
BPF_FIB_LOOKUP_SRC = (1U << 4),
+ BPF_FIB_LOOKUP_MARK = (1U << 5),
};
enum {
@@ -7196,8 +7201,19 @@ struct bpf_fib_lookup {
__u32 tbid;
};
- __u8 smac[6]; /* ETH_ALEN */
- __u8 dmac[6]; /* ETH_ALEN */
+ union {
+ /* input */
+ struct {
+ __u32 mark; /* policy routing */
+ /* 2 4-byte holes for input */
+ };
+
+ /* output: source and dest mac */
+ struct {
+ __u8 smac[6]; /* ETH_ALEN */
+ __u8 dmac[6]; /* ETH_ALEN */
+ };
+ };
};
struct bpf_redir_neigh {
--
2.34.1
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH v1 bpf-next 2/2] selftests/bpf: Add BPF_FIB_LOOKUP_MARK tests
2024-03-22 14:02 [PATCH v1 bpf-next 0/2] BPF: support mark in bpf_fib_lookup Anton Protopopov
2024-03-22 14:02 ` [PATCH v1 bpf-next 1/2] bpf: add support for passing mark with bpf_fib_lookup Anton Protopopov
@ 2024-03-22 14:02 ` Anton Protopopov
2024-03-23 22:34 ` Martin KaFai Lau
1 sibling, 1 reply; 9+ messages in thread
From: Anton Protopopov @ 2024-03-22 14:02 UTC (permalink / raw)
To: Alexei Starovoitov, Andrii Nakryiko, Daniel Borkmann, Jiri Olsa,
Martin KaFai Lau, Stanislav Fomichev, bpf
Cc: Anton Protopopov, Rumen Telbizov, David Ahern, netdev
This patch extends the fib_lookup test suite by adding a few test
cases for each IP family to test the new BPF_FIB_LOOKUP_MARK flag
to the bpf_fib_lookup:
* Test destination IP address selection with and without a mark
and/or the BPF_FIB_LOOKUP_MARK flag set
To test this functionality another network namespace and a new veth
pair were added to the test.
Signed-off-by: Anton Protopopov <aspsk@isovalent.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
---
.../selftests/bpf/prog_tests/fib_lookup.c | 160 ++++++++++++++----
1 file changed, 131 insertions(+), 29 deletions(-)
diff --git a/tools/testing/selftests/bpf/prog_tests/fib_lookup.c b/tools/testing/selftests/bpf/prog_tests/fib_lookup.c
index 3379df2d4cf2..a78316431f32 100644
--- a/tools/testing/selftests/bpf/prog_tests/fib_lookup.c
+++ b/tools/testing/selftests/bpf/prog_tests/fib_lookup.c
@@ -10,6 +10,7 @@
#include "fib_lookup.skel.h"
#define NS_TEST "fib_lookup_ns"
+#define NS_REMOTE "remote_ns"
#define IPV6_IFACE_ADDR "face::face"
#define IPV6_IFACE_ADDR_SEC "cafe::cafe"
#define IPV6_ADDR_DST "face::3"
@@ -26,6 +27,17 @@
#define IPV6_TBID_ADDR "fd00::FFFF"
#define IPV6_TBID_NET "fd00::"
#define IPV6_TBID_DST "fd00::2"
+#define MARK_NO_POLICY 33
+#define MARK 42
+#define MARK_TABLE "200"
+#define IPV4_REMOTE_DST "1.2.3.4"
+#define IPV4_LOCAL "10.4.0.3"
+#define IPV4_GW1 "10.4.0.1"
+#define IPV4_GW2 "10.4.0.2"
+#define IPV6_REMOTE_DST "be:ef::b0:10"
+#define IPV6_LOCAL "fd01::3"
+#define IPV6_GW1 "fd01::1"
+#define IPV6_GW2 "fd01::2"
#define DMAC "11:11:11:11:11:11"
#define DMAC_INIT { 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, }
#define DMAC2 "01:01:01:01:01:01"
@@ -36,9 +48,12 @@ struct fib_lookup_test {
const char *daddr;
int expected_ret;
const char *expected_src;
+ const char *expected_dst;
int lookup_flags;
__u32 tbid;
__u8 dmac[6];
+ __u32 mark;
+ const char *ifname;
};
static const struct fib_lookup_test tests[] = {
@@ -90,10 +105,47 @@ static const struct fib_lookup_test tests[] = {
.daddr = IPV6_ADDR_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
.expected_src = IPV6_IFACE_ADDR_SEC,
.lookup_flags = BPF_FIB_LOOKUP_SRC | BPF_FIB_LOOKUP_SKIP_NEIGH, },
+ /* policy routing */
+ { .desc = "IPv4 policy routing, default",
+ .daddr = IPV4_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
+ .expected_dst = IPV4_GW1, .ifname = "veth3",
+ .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH, },
+ { .desc = "IPv4 policy routing, mark doesn't point to a policy",
+ .daddr = IPV4_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
+ .expected_dst = IPV4_GW1, .ifname = "veth3",
+ .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH,
+ .mark = MARK_NO_POLICY, },
+ { .desc = "IPv4 policy routing, mark points to a policy",
+ .daddr = IPV4_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
+ .expected_dst = IPV4_GW2, .ifname = "veth3",
+ .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH,
+ .mark = MARK, },
+ { .desc = "IPv4 policy routing, mark points to a policy, but no flag",
+ .daddr = IPV4_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
+ .expected_dst = IPV4_GW1, .ifname = "veth3",
+ .lookup_flags = BPF_FIB_LOOKUP_SKIP_NEIGH,
+ .mark = MARK, },
+ { .desc = "IPv6 policy routing, default",
+ .daddr = IPV6_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
+ .expected_dst = IPV6_GW1, .ifname = "veth3",
+ .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH, },
+ { .desc = "IPv6 policy routing, mark doesn't point to a policy",
+ .daddr = IPV6_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
+ .expected_dst = IPV6_GW1, .ifname = "veth3",
+ .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH,
+ .mark = MARK_NO_POLICY, },
+ { .desc = "IPv6 policy routing, mark points to a policy",
+ .daddr = IPV6_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
+ .expected_dst = IPV6_GW2, .ifname = "veth3",
+ .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH,
+ .mark = MARK, },
+ { .desc = "IPv6 policy routing, mark points to a policy, but no flag",
+ .daddr = IPV6_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
+ .expected_dst = IPV6_GW1, .ifname = "veth3",
+ .lookup_flags = BPF_FIB_LOOKUP_SKIP_NEIGH,
+ .mark = MARK, },
};
-static int ifindex;
-
static int setup_netns(void)
{
int err;
@@ -144,12 +196,40 @@ static int setup_netns(void)
if (!ASSERT_OK(err, "write_sysctl(net.ipv6.conf.veth1.forwarding)"))
goto fail;
+ /* Setup for policy routing tests */
+ SYS(fail, "ip link add veth3 type veth peer name veth4");
+ SYS(fail, "ip link set dev veth3 up");
+ SYS(fail, "ip link set dev veth4 netns %s up", NS_REMOTE);
+
+ SYS(fail, "ip addr add %s/24 dev veth3", IPV4_LOCAL);
+ SYS(fail, "ip netns exec %s ip addr add %s/24 dev veth4", NS_REMOTE, IPV4_GW1);
+ SYS(fail, "ip netns exec %s ip addr add %s/24 dev veth4", NS_REMOTE, IPV4_GW2);
+ SYS(fail, "ip addr add %s/64 dev veth3 nodad", IPV6_LOCAL);
+ SYS(fail, "ip netns exec %s ip addr add %s/64 dev veth4 nodad", NS_REMOTE, IPV6_GW1);
+ SYS(fail, "ip netns exec %s ip addr add %s/64 dev veth4 nodad", NS_REMOTE, IPV6_GW2);
+ SYS(fail, "ip route add %s/32 via %s", IPV4_REMOTE_DST, IPV4_GW1);
+ SYS(fail, "ip route add %s/32 via %s table %s", IPV4_REMOTE_DST, IPV4_GW2, MARK_TABLE);
+ SYS(fail, "ip -6 route add %s/128 via %s", IPV6_REMOTE_DST, IPV6_GW1);
+ SYS(fail, "ip -6 route add %s/128 via %s table %s", IPV6_REMOTE_DST, IPV6_GW2, MARK_TABLE);
+ SYS(fail, "ip rule add prio 2 fwmark %d lookup %s", MARK, MARK_TABLE);
+ SYS(fail, "ip -6 rule add prio 2 fwmark %d lookup %s", MARK, MARK_TABLE);
+
+ err = write_sysctl("/proc/sys/net/ipv4/conf/veth3/forwarding", "1");
+ if (!ASSERT_OK(err, "write_sysctl(net.ipv4.conf.veth3.forwarding)"))
+ goto fail;
+
+ err = write_sysctl("/proc/sys/net/ipv6/conf/veth3/forwarding", "1");
+ if (!ASSERT_OK(err, "write_sysctl(net.ipv6.conf.veth3.forwarding)"))
+ goto fail;
+
return 0;
fail:
return -1;
}
-static int set_lookup_params(struct bpf_fib_lookup *params, const struct fib_lookup_test *test)
+static int set_lookup_params(struct bpf_fib_lookup *params,
+ const struct fib_lookup_test *test,
+ int ifindex)
{
int ret;
@@ -159,6 +239,9 @@ static int set_lookup_params(struct bpf_fib_lookup *params, const struct fib_loo
params->ifindex = ifindex;
params->tbid = test->tbid;
+ if (test->lookup_flags & BPF_FIB_LOOKUP_MARK)
+ params->mark = test->mark;
+
if (inet_pton(AF_INET6, test->daddr, params->ipv6_dst) == 1) {
params->family = AF_INET6;
if (!(test->lookup_flags & BPF_FIB_LOOKUP_SRC)) {
@@ -190,40 +273,45 @@ static void mac_str(char *b, const __u8 *mac)
mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]);
}
-static void assert_src_ip(struct bpf_fib_lookup *fib_params, const char *expected_src)
+static void assert_ip_address(int family, void *addr, const char *expected_str)
{
+ char str[INET6_ADDRSTRLEN];
+ u8 expected_addr[16];
+ int addr_len = 0;
int ret;
- __u32 src6[4];
- __be32 src4;
- switch (fib_params->family) {
+ switch (family) {
case AF_INET6:
- ret = inet_pton(AF_INET6, expected_src, src6);
- ASSERT_EQ(ret, 1, "inet_pton(expected_src)");
-
- ret = memcmp(src6, fib_params->ipv6_src, sizeof(fib_params->ipv6_src));
- if (!ASSERT_EQ(ret, 0, "fib_lookup ipv6 src")) {
- char str_src6[64];
-
- inet_ntop(AF_INET6, fib_params->ipv6_src, str_src6,
- sizeof(str_src6));
- printf("ipv6 expected %s actual %s ", expected_src,
- str_src6);
- }
-
+ ret = inet_pton(AF_INET6, expected_str, expected_addr);
+ ASSERT_EQ(ret, 1, "inet_pton(AF_INET6, expected_str)");
+ addr_len = 16;
break;
case AF_INET:
- ret = inet_pton(AF_INET, expected_src, &src4);
- ASSERT_EQ(ret, 1, "inet_pton(expected_src)");
-
- ASSERT_EQ(fib_params->ipv4_src, src4, "fib_lookup ipv4 src");
-
+ ret = inet_pton(AF_INET, expected_str, expected_addr);
+ ASSERT_EQ(ret, 1, "inet_pton(AF_INET, expected_str)");
+ addr_len = 4;
break;
default:
- PRINT_FAIL("invalid addr family: %d", fib_params->family);
+ PRINT_FAIL("invalid address family: %d", family);
+ break;
+ }
+
+ if (memcmp(addr, expected_addr, addr_len)) {
+ inet_ntop(family, addr, str, sizeof(str));
+ PRINT_FAIL("expected %s actual %s ", expected_str, str);
}
}
+static void assert_src_ip(struct bpf_fib_lookup *params, const char *expected)
+{
+ assert_ip_address(params->family, params->ipv6_src, expected);
+}
+
+static void assert_dst_ip(struct bpf_fib_lookup *params, const char *expected)
+{
+ assert_ip_address(params->family, params->ipv6_dst, expected);
+}
+
void test_fib_lookup(void)
{
struct bpf_fib_lookup *fib_params;
@@ -231,6 +319,7 @@ void test_fib_lookup(void)
struct __sk_buff skb = { };
struct fib_lookup *skel;
int prog_fd, err, ret, i;
+ int default_ifindex;
/* The test does not use the skb->data, so
* use pkt_v6 for both v6 and v4 test.
@@ -248,6 +337,7 @@ void test_fib_lookup(void)
prog_fd = bpf_program__fd(skel->progs.fib_lookup);
SYS(fail, "ip netns add %s", NS_TEST);
+ SYS(fail, "ip netns add %s", NS_REMOTE);
nstoken = open_netns(NS_TEST);
if (!ASSERT_OK_PTR(nstoken, "open_netns"))
@@ -256,15 +346,23 @@ void test_fib_lookup(void)
if (setup_netns())
goto fail;
- ifindex = if_nametoindex("veth1");
- skb.ifindex = ifindex;
+ default_ifindex = if_nametoindex("veth1");
+ if (!ASSERT_NEQ(default_ifindex, 0, "if_nametoindex(veth1)"))
+ goto fail;
+
fib_params = &skel->bss->fib_params;
for (i = 0; i < ARRAY_SIZE(tests); i++) {
printf("Testing %s ", tests[i].desc);
- if (set_lookup_params(fib_params, &tests[i]))
+ if (tests[i].ifname)
+ skb.ifindex = if_nametoindex(tests[i].ifname);
+ else
+ skb.ifindex = default_ifindex;
+
+ if (set_lookup_params(fib_params, &tests[i], skb.ifindex))
continue;
+
skel->bss->fib_lookup_ret = -1;
skel->bss->lookup_flags = tests[i].lookup_flags;
@@ -278,6 +376,9 @@ void test_fib_lookup(void)
if (tests[i].expected_src)
assert_src_ip(fib_params, tests[i].expected_src);
+ if (tests[i].expected_dst)
+ assert_dst_ip(fib_params, tests[i].expected_dst);
+
ret = memcmp(tests[i].dmac, fib_params->dmac, sizeof(tests[i].dmac));
if (!ASSERT_EQ(ret, 0, "dmac not match")) {
char expected[18], actual[18];
@@ -299,5 +400,6 @@ void test_fib_lookup(void)
if (nstoken)
close_netns(nstoken);
SYS_NOFAIL("ip netns del " NS_TEST);
+ SYS_NOFAIL("ip netns del " NS_REMOTE);
fib_lookup__destroy(skel);
}
--
2.34.1
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH v1 bpf-next 2/2] selftests/bpf: Add BPF_FIB_LOOKUP_MARK tests
2024-03-22 14:02 ` [PATCH v1 bpf-next 2/2] selftests/bpf: Add BPF_FIB_LOOKUP_MARK tests Anton Protopopov
@ 2024-03-23 22:34 ` Martin KaFai Lau
2024-03-24 15:04 ` Anton Protopopov
0 siblings, 1 reply; 9+ messages in thread
From: Martin KaFai Lau @ 2024-03-23 22:34 UTC (permalink / raw)
To: Anton Protopopov
Cc: Rumen Telbizov, David Ahern, netdev, Alexei Starovoitov,
Andrii Nakryiko, Daniel Borkmann, Jiri Olsa, Stanislav Fomichev,
bpf
On 3/22/24 7:02 AM, Anton Protopopov wrote:
> This patch extends the fib_lookup test suite by adding a few test
> cases for each IP family to test the new BPF_FIB_LOOKUP_MARK flag
> to the bpf_fib_lookup:
>
> * Test destination IP address selection with and without a mark
> and/or the BPF_FIB_LOOKUP_MARK flag set
>
> To test this functionality another network namespace and a new veth
> pair were added to the test.
>
[ ... ]
> static const struct fib_lookup_test tests[] = {
> @@ -90,10 +105,47 @@ static const struct fib_lookup_test tests[] = {
> .daddr = IPV6_ADDR_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
> .expected_src = IPV6_IFACE_ADDR_SEC,
> .lookup_flags = BPF_FIB_LOOKUP_SRC | BPF_FIB_LOOKUP_SKIP_NEIGH, },
> + /* policy routing */
> + { .desc = "IPv4 policy routing, default",
> + .daddr = IPV4_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
> + .expected_dst = IPV4_GW1, .ifname = "veth3",
> + .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH, },
> + { .desc = "IPv4 policy routing, mark doesn't point to a policy",
> + .daddr = IPV4_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
> + .expected_dst = IPV4_GW1, .ifname = "veth3",
> + .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH,
> + .mark = MARK_NO_POLICY, },
> + { .desc = "IPv4 policy routing, mark points to a policy",
> + .daddr = IPV4_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
> + .expected_dst = IPV4_GW2, .ifname = "veth3",
> + .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH,
> + .mark = MARK, },
> + { .desc = "IPv4 policy routing, mark points to a policy, but no flag",
> + .daddr = IPV4_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
> + .expected_dst = IPV4_GW1, .ifname = "veth3",
> + .lookup_flags = BPF_FIB_LOOKUP_SKIP_NEIGH,
> + .mark = MARK, },
> + { .desc = "IPv6 policy routing, default",
> + .daddr = IPV6_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
> + .expected_dst = IPV6_GW1, .ifname = "veth3",
> + .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH, },
> + { .desc = "IPv6 policy routing, mark doesn't point to a policy",
> + .daddr = IPV6_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
> + .expected_dst = IPV6_GW1, .ifname = "veth3",
> + .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH,
> + .mark = MARK_NO_POLICY, },
> + { .desc = "IPv6 policy routing, mark points to a policy",
> + .daddr = IPV6_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
> + .expected_dst = IPV6_GW2, .ifname = "veth3",
> + .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH,
> + .mark = MARK, },
> + { .desc = "IPv6 policy routing, mark points to a policy, but no flag",
> + .daddr = IPV6_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
> + .expected_dst = IPV6_GW1, .ifname = "veth3",
> + .lookup_flags = BPF_FIB_LOOKUP_SKIP_NEIGH,
> + .mark = MARK, },
> };
>
> -static int ifindex;
> -
> static int setup_netns(void)
> {
> int err;
> @@ -144,12 +196,40 @@ static int setup_netns(void)
> if (!ASSERT_OK(err, "write_sysctl(net.ipv6.conf.veth1.forwarding)"))
> goto fail;
>
> + /* Setup for policy routing tests */
> + SYS(fail, "ip link add veth3 type veth peer name veth4");
> + SYS(fail, "ip link set dev veth3 up");
> + SYS(fail, "ip link set dev veth4 netns %s up", NS_REMOTE);
> +
> + SYS(fail, "ip addr add %s/24 dev veth3", IPV4_LOCAL);
> + SYS(fail, "ip netns exec %s ip addr add %s/24 dev veth4", NS_REMOTE, IPV4_GW1);
> + SYS(fail, "ip netns exec %s ip addr add %s/24 dev veth4", NS_REMOTE, IPV4_GW2);
> + SYS(fail, "ip addr add %s/64 dev veth3 nodad", IPV6_LOCAL);
> + SYS(fail, "ip netns exec %s ip addr add %s/64 dev veth4 nodad", NS_REMOTE, IPV6_GW1);
> + SYS(fail, "ip netns exec %s ip addr add %s/64 dev veth4 nodad", NS_REMOTE, IPV6_GW2);
Trying to see if the setup can be simplified.
Does it need to add another netns and setup a reachable IPV[46]_GW[12] gateway?
The test is not sending any traffic and it is a BPF_FIB_LOOKUP_SKIP_NEIGH test.
> + SYS(fail, "ip route add %s/32 via %s", IPV4_REMOTE_DST, IPV4_GW1);
> + SYS(fail, "ip route add %s/32 via %s table %s", IPV4_REMOTE_DST, IPV4_GW2, MARK_TABLE);
> + SYS(fail, "ip -6 route add %s/128 via %s", IPV6_REMOTE_DST, IPV6_GW1);
> + SYS(fail, "ip -6 route add %s/128 via %s table %s", IPV6_REMOTE_DST, IPV6_GW2, MARK_TABLE);
> + SYS(fail, "ip rule add prio 2 fwmark %d lookup %s", MARK, MARK_TABLE);
> + SYS(fail, "ip -6 rule add prio 2 fwmark %d lookup %s", MARK, MARK_TABLE);
> +
> + err = write_sysctl("/proc/sys/net/ipv4/conf/veth3/forwarding", "1");
> + if (!ASSERT_OK(err, "write_sysctl(net.ipv4.conf.veth3.forwarding)"))
> + goto fail;
> +
> + err = write_sysctl("/proc/sys/net/ipv6/conf/veth3/forwarding", "1");
> + if (!ASSERT_OK(err, "write_sysctl(net.ipv6.conf.veth3.forwarding)"))
> + goto fail;
> +
> return 0;
> fail:
> return -1;
> }
[ ... ]
> @@ -248,6 +337,7 @@ void test_fib_lookup(void)
> prog_fd = bpf_program__fd(skel->progs.fib_lookup);
>
> SYS(fail, "ip netns add %s", NS_TEST);
> + SYS(fail, "ip netns add %s", NS_REMOTE);
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v1 bpf-next 2/2] selftests/bpf: Add BPF_FIB_LOOKUP_MARK tests
2024-03-23 22:34 ` Martin KaFai Lau
@ 2024-03-24 15:04 ` Anton Protopopov
2024-03-25 18:12 ` Martin KaFai Lau
0 siblings, 1 reply; 9+ messages in thread
From: Anton Protopopov @ 2024-03-24 15:04 UTC (permalink / raw)
To: Martin KaFai Lau
Cc: Rumen Telbizov, David Ahern, netdev, Alexei Starovoitov,
Andrii Nakryiko, Daniel Borkmann, Jiri Olsa, Stanislav Fomichev,
bpf
On Sat, Mar 23, 2024 at 03:34:10PM -0700, Martin KaFai Lau wrote:
> On 3/22/24 7:02 AM, Anton Protopopov wrote:
> > This patch extends the fib_lookup test suite by adding a few test
> > cases for each IP family to test the new BPF_FIB_LOOKUP_MARK flag
> > to the bpf_fib_lookup:
> >
> > * Test destination IP address selection with and without a mark
> > and/or the BPF_FIB_LOOKUP_MARK flag set
> >
> > To test this functionality another network namespace and a new veth
> > pair were added to the test.
> >
>
> [ ... ]
>
> > static const struct fib_lookup_test tests[] = {
> > @@ -90,10 +105,47 @@ static const struct fib_lookup_test tests[] = {
> > .daddr = IPV6_ADDR_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
> > .expected_src = IPV6_IFACE_ADDR_SEC,
> > .lookup_flags = BPF_FIB_LOOKUP_SRC | BPF_FIB_LOOKUP_SKIP_NEIGH, },
> > + /* policy routing */
> > + { .desc = "IPv4 policy routing, default",
> > + .daddr = IPV4_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
> > + .expected_dst = IPV4_GW1, .ifname = "veth3",
> > + .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH, },
> > + { .desc = "IPv4 policy routing, mark doesn't point to a policy",
> > + .daddr = IPV4_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
> > + .expected_dst = IPV4_GW1, .ifname = "veth3",
> > + .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH,
> > + .mark = MARK_NO_POLICY, },
> > + { .desc = "IPv4 policy routing, mark points to a policy",
> > + .daddr = IPV4_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
> > + .expected_dst = IPV4_GW2, .ifname = "veth3",
> > + .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH,
> > + .mark = MARK, },
> > + { .desc = "IPv4 policy routing, mark points to a policy, but no flag",
> > + .daddr = IPV4_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
> > + .expected_dst = IPV4_GW1, .ifname = "veth3",
> > + .lookup_flags = BPF_FIB_LOOKUP_SKIP_NEIGH,
> > + .mark = MARK, },
> > + { .desc = "IPv6 policy routing, default",
> > + .daddr = IPV6_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
> > + .expected_dst = IPV6_GW1, .ifname = "veth3",
> > + .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH, },
> > + { .desc = "IPv6 policy routing, mark doesn't point to a policy",
> > + .daddr = IPV6_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
> > + .expected_dst = IPV6_GW1, .ifname = "veth3",
> > + .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH,
> > + .mark = MARK_NO_POLICY, },
> > + { .desc = "IPv6 policy routing, mark points to a policy",
> > + .daddr = IPV6_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
> > + .expected_dst = IPV6_GW2, .ifname = "veth3",
> > + .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH,
> > + .mark = MARK, },
> > + { .desc = "IPv6 policy routing, mark points to a policy, but no flag",
> > + .daddr = IPV6_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
> > + .expected_dst = IPV6_GW1, .ifname = "veth3",
> > + .lookup_flags = BPF_FIB_LOOKUP_SKIP_NEIGH,
> > + .mark = MARK, },
> > };
> > -static int ifindex;
> > -
> > static int setup_netns(void)
> > {
> > int err;
> > @@ -144,12 +196,40 @@ static int setup_netns(void)
> > if (!ASSERT_OK(err, "write_sysctl(net.ipv6.conf.veth1.forwarding)"))
> > goto fail;
> > + /* Setup for policy routing tests */
> > + SYS(fail, "ip link add veth3 type veth peer name veth4");
> > + SYS(fail, "ip link set dev veth3 up");
> > + SYS(fail, "ip link set dev veth4 netns %s up", NS_REMOTE);
> > +
> > + SYS(fail, "ip addr add %s/24 dev veth3", IPV4_LOCAL);
> > + SYS(fail, "ip netns exec %s ip addr add %s/24 dev veth4", NS_REMOTE, IPV4_GW1);
> > + SYS(fail, "ip netns exec %s ip addr add %s/24 dev veth4", NS_REMOTE, IPV4_GW2);
> > + SYS(fail, "ip addr add %s/64 dev veth3 nodad", IPV6_LOCAL);
> > + SYS(fail, "ip netns exec %s ip addr add %s/64 dev veth4 nodad", NS_REMOTE, IPV6_GW1);
> > + SYS(fail, "ip netns exec %s ip addr add %s/64 dev veth4 nodad", NS_REMOTE, IPV6_GW2);
>
> Trying to see if the setup can be simplified.
>
> Does it need to add another netns and setup a reachable IPV[46]_GW[12] gateway?
>
> The test is not sending any traffic and it is a BPF_FIB_LOOKUP_SKIP_NEIGH test.
I think this will not work without another namespace, as FIB lookup will
return DST="final destination", not DST="gateway", as the gateway is in the
same namespace and can be skipped.
Instead of adding a new namespace I can move the second interface to the
root namespace. This will work, but then we're interfering with the root
namespace.
> > + SYS(fail, "ip route add %s/32 via %s", IPV4_REMOTE_DST, IPV4_GW1);
> > + SYS(fail, "ip route add %s/32 via %s table %s", IPV4_REMOTE_DST, IPV4_GW2, MARK_TABLE);
> > + SYS(fail, "ip -6 route add %s/128 via %s", IPV6_REMOTE_DST, IPV6_GW1);
> > + SYS(fail, "ip -6 route add %s/128 via %s table %s", IPV6_REMOTE_DST, IPV6_GW2, MARK_TABLE);
> > + SYS(fail, "ip rule add prio 2 fwmark %d lookup %s", MARK, MARK_TABLE);
> > + SYS(fail, "ip -6 rule add prio 2 fwmark %d lookup %s", MARK, MARK_TABLE);
> > +
> > + err = write_sysctl("/proc/sys/net/ipv4/conf/veth3/forwarding", "1");
> > + if (!ASSERT_OK(err, "write_sysctl(net.ipv4.conf.veth3.forwarding)"))
> > + goto fail;
> > +
> > + err = write_sysctl("/proc/sys/net/ipv6/conf/veth3/forwarding", "1");
> > + if (!ASSERT_OK(err, "write_sysctl(net.ipv6.conf.veth3.forwarding)"))
> > + goto fail;
> > +
> > return 0;
> > fail:
> > return -1;
> > }
>
> [ ... ]
>
> > @@ -248,6 +337,7 @@ void test_fib_lookup(void)
> > prog_fd = bpf_program__fd(skel->progs.fib_lookup);
> > SYS(fail, "ip netns add %s", NS_TEST);
> > + SYS(fail, "ip netns add %s", NS_REMOTE);
>
>
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v1 bpf-next 1/2] bpf: add support for passing mark with bpf_fib_lookup
2024-03-22 14:02 ` [PATCH v1 bpf-next 1/2] bpf: add support for passing mark with bpf_fib_lookup Anton Protopopov
@ 2024-03-24 17:38 ` David Ahern
2024-03-25 12:19 ` Anton Protopopov
0 siblings, 1 reply; 9+ messages in thread
From: David Ahern @ 2024-03-24 17:38 UTC (permalink / raw)
To: Anton Protopopov, Alexei Starovoitov, Andrii Nakryiko,
Daniel Borkmann, Jiri Olsa, Martin KaFai Lau, Stanislav Fomichev,
bpf
Cc: Rumen Telbizov, netdev
On 3/22/24 8:02 AM, Anton Protopopov wrote:
> Extend the bpf_fib_lookup() helper by making it to utilize mark if
> the BPF_FIB_LOOKUP_MARK flag is set. In order to pass the mark the
> four bytes of struct bpf_fib_lookup are used, shared with the
> output-only smac/dmac fields.
>
> Signed-off-by: Anton Protopopov <aspsk@isovalent.com>
> Acked-by: Daniel Borkmann <daniel@iogearbox.net>
> ---
> include/uapi/linux/bpf.h | 20 ++++++++++++++++++--
> net/core/filter.c | 12 +++++++++---
> tools/include/uapi/linux/bpf.h | 20 ++++++++++++++++++--
> 3 files changed, 45 insertions(+), 7 deletions(-)
>
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 9585f5345353..96d57e483133 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -3394,6 +3394,10 @@ union bpf_attr {
> * for the nexthop. If the src addr cannot be derived,
> * **BPF_FIB_LKUP_RET_NO_SRC_ADDR** is returned. In this
> * case, *params*->dmac and *params*->smac are not set either.
> + * **BPF_FIB_LOOKUP_MARK**
> + * Use the mark present in *params*->mark for the fib lookup.
> + * This option should not be used with BPF_FIB_LOOKUP_DIRECT,
> + * as it only has meaning for full lookups.
> *
> * *ctx* is either **struct xdp_md** for XDP programs or
> * **struct sk_buff** tc cls_act programs.
> @@ -7120,6 +7124,7 @@ enum {
> BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2),
> BPF_FIB_LOOKUP_TBID = (1U << 3),
> BPF_FIB_LOOKUP_SRC = (1U << 4),
> + BPF_FIB_LOOKUP_MARK = (1U << 5),
> };
>
> enum {
> @@ -7197,8 +7202,19 @@ struct bpf_fib_lookup {
> __u32 tbid;
> };
>
> - __u8 smac[6]; /* ETH_ALEN */
> - __u8 dmac[6]; /* ETH_ALEN */
> + union {
> + /* input */
> + struct {
> + __u32 mark; /* policy routing */
> + /* 2 4-byte holes for input */
> + };
> +
> + /* output: source and dest mac */
> + struct {
> + __u8 smac[6]; /* ETH_ALEN */
> + __u8 dmac[6]; /* ETH_ALEN */
> + };
> + };
> };
>
> struct bpf_redir_neigh {
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 0c66e4a3fc5b..1205dd777dc2 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -5884,7 +5884,10 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
>
> err = fib_table_lookup(tb, &fl4, &res, FIB_LOOKUP_NOREF);
> } else {
> - fl4.flowi4_mark = 0;
> + if (flags & BPF_FIB_LOOKUP_MARK)
> + fl4.flowi4_mark = params->mark;
> + else
> + fl4.flowi4_mark = 0;
> fl4.flowi4_secid = 0;
> fl4.flowi4_tun_key.tun_id = 0;
> fl4.flowi4_uid = sock_net_uid(net, NULL);
> @@ -6027,7 +6030,10 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
> err = ipv6_stub->fib6_table_lookup(net, tb, oif, &fl6, &res,
> strict);
> } else {
> - fl6.flowi6_mark = 0;
> + if (flags & BPF_FIB_LOOKUP_MARK)
> + fl6.flowi6_mark = params->mark;
> + else
> + fl6.flowi6_mark = 0;
> fl6.flowi6_secid = 0;
> fl6.flowi6_tun_key.tun_id = 0;
> fl6.flowi6_uid = sock_net_uid(net, NULL);
> @@ -6105,7 +6111,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
>
> #define BPF_FIB_LOOKUP_MASK (BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT | \
> BPF_FIB_LOOKUP_SKIP_NEIGH | BPF_FIB_LOOKUP_TBID | \
> - BPF_FIB_LOOKUP_SRC)
> + BPF_FIB_LOOKUP_SRC | BPF_FIB_LOOKUP_MARK)
>
> BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx,
> struct bpf_fib_lookup *, params, int, plen, u32, flags)
> diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
> index bf80b614c4db..4c9b5bfbd9c6 100644
> --- a/tools/include/uapi/linux/bpf.h
> +++ b/tools/include/uapi/linux/bpf.h
> @@ -3393,6 +3393,10 @@ union bpf_attr {
> * for the nexthop. If the src addr cannot be derived,
> * **BPF_FIB_LKUP_RET_NO_SRC_ADDR** is returned. In this
> * case, *params*->dmac and *params*->smac are not set either.
> + * **BPF_FIB_LOOKUP_MARK**
> + * Use the mark present in *params*->mark for the fib lookup.
> + * This option should not be used with BPF_FIB_LOOKUP_DIRECT,
> + * as it only has meaning for full lookups.
> *
> * *ctx* is either **struct xdp_md** for XDP programs or
> * **struct sk_buff** tc cls_act programs.
> @@ -7119,6 +7123,7 @@ enum {
> BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2),
> BPF_FIB_LOOKUP_TBID = (1U << 3),
> BPF_FIB_LOOKUP_SRC = (1U << 4),
> + BPF_FIB_LOOKUP_MARK = (1U << 5),
> };
>
> enum {
> @@ -7196,8 +7201,19 @@ struct bpf_fib_lookup {
> __u32 tbid;
> };
>
> - __u8 smac[6]; /* ETH_ALEN */
> - __u8 dmac[6]; /* ETH_ALEN */
> + union {
> + /* input */
> + struct {
> + __u32 mark; /* policy routing */
> + /* 2 4-byte holes for input */
> + };
> +
> + /* output: source and dest mac */
> + struct {
> + __u8 smac[6]; /* ETH_ALEN */
> + __u8 dmac[6]; /* ETH_ALEN */
> + };
> + };
> };
>
> struct bpf_redir_neigh {
It would be good to add
static_assert(sizeof(struct bpf_fib_lookup) == 64, "bpf_fib_lookup size
check");
to ensure this struct never exceeds a cacheline.
The patch itself looks good to me:
Reviewed-by: David Ahern <dsahern@kernel.org>
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v1 bpf-next 1/2] bpf: add support for passing mark with bpf_fib_lookup
2024-03-24 17:38 ` David Ahern
@ 2024-03-25 12:19 ` Anton Protopopov
0 siblings, 0 replies; 9+ messages in thread
From: Anton Protopopov @ 2024-03-25 12:19 UTC (permalink / raw)
To: David Ahern
Cc: Alexei Starovoitov, Andrii Nakryiko, Daniel Borkmann, Jiri Olsa,
Martin KaFai Lau, Stanislav Fomichev, bpf, Rumen Telbizov, netdev
On Sun, Mar 24, 2024 at 11:38:44AM -0600, David Ahern wrote:
> On 3/22/24 8:02 AM, Anton Protopopov wrote:
> > Extend the bpf_fib_lookup() helper by making it to utilize mark if
> > the BPF_FIB_LOOKUP_MARK flag is set. In order to pass the mark the
> > four bytes of struct bpf_fib_lookup are used, shared with the
> > output-only smac/dmac fields.
> >
> > Signed-off-by: Anton Protopopov <aspsk@isovalent.com>
> > Acked-by: Daniel Borkmann <daniel@iogearbox.net>
> > ---
> > include/uapi/linux/bpf.h | 20 ++++++++++++++++++--
> > net/core/filter.c | 12 +++++++++---
> > tools/include/uapi/linux/bpf.h | 20 ++++++++++++++++++--
> > 3 files changed, 45 insertions(+), 7 deletions(-)
> >
> > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> > index 9585f5345353..96d57e483133 100644
> > --- a/include/uapi/linux/bpf.h
> > +++ b/include/uapi/linux/bpf.h
> > @@ -3394,6 +3394,10 @@ union bpf_attr {
> > * for the nexthop. If the src addr cannot be derived,
> > * **BPF_FIB_LKUP_RET_NO_SRC_ADDR** is returned. In this
> > * case, *params*->dmac and *params*->smac are not set either.
> > + * **BPF_FIB_LOOKUP_MARK**
> > + * Use the mark present in *params*->mark for the fib lookup.
> > + * This option should not be used with BPF_FIB_LOOKUP_DIRECT,
> > + * as it only has meaning for full lookups.
> > *
> > * *ctx* is either **struct xdp_md** for XDP programs or
> > * **struct sk_buff** tc cls_act programs.
> > @@ -7120,6 +7124,7 @@ enum {
> > BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2),
> > BPF_FIB_LOOKUP_TBID = (1U << 3),
> > BPF_FIB_LOOKUP_SRC = (1U << 4),
> > + BPF_FIB_LOOKUP_MARK = (1U << 5),
> > };
> >
> > enum {
> > @@ -7197,8 +7202,19 @@ struct bpf_fib_lookup {
> > __u32 tbid;
> > };
> >
> > - __u8 smac[6]; /* ETH_ALEN */
> > - __u8 dmac[6]; /* ETH_ALEN */
> > + union {
> > + /* input */
> > + struct {
> > + __u32 mark; /* policy routing */
> > + /* 2 4-byte holes for input */
> > + };
> > +
> > + /* output: source and dest mac */
> > + struct {
> > + __u8 smac[6]; /* ETH_ALEN */
> > + __u8 dmac[6]; /* ETH_ALEN */
> > + };
> > + };
> > };
> >
> > struct bpf_redir_neigh {
> > diff --git a/net/core/filter.c b/net/core/filter.c
> > index 0c66e4a3fc5b..1205dd777dc2 100644
> > --- a/net/core/filter.c
> > +++ b/net/core/filter.c
> > @@ -5884,7 +5884,10 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
> >
> > err = fib_table_lookup(tb, &fl4, &res, FIB_LOOKUP_NOREF);
> > } else {
> > - fl4.flowi4_mark = 0;
> > + if (flags & BPF_FIB_LOOKUP_MARK)
> > + fl4.flowi4_mark = params->mark;
> > + else
> > + fl4.flowi4_mark = 0;
> > fl4.flowi4_secid = 0;
> > fl4.flowi4_tun_key.tun_id = 0;
> > fl4.flowi4_uid = sock_net_uid(net, NULL);
> > @@ -6027,7 +6030,10 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
> > err = ipv6_stub->fib6_table_lookup(net, tb, oif, &fl6, &res,
> > strict);
> > } else {
> > - fl6.flowi6_mark = 0;
> > + if (flags & BPF_FIB_LOOKUP_MARK)
> > + fl6.flowi6_mark = params->mark;
> > + else
> > + fl6.flowi6_mark = 0;
> > fl6.flowi6_secid = 0;
> > fl6.flowi6_tun_key.tun_id = 0;
> > fl6.flowi6_uid = sock_net_uid(net, NULL);
> > @@ -6105,7 +6111,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
> >
> > #define BPF_FIB_LOOKUP_MASK (BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT | \
> > BPF_FIB_LOOKUP_SKIP_NEIGH | BPF_FIB_LOOKUP_TBID | \
> > - BPF_FIB_LOOKUP_SRC)
> > + BPF_FIB_LOOKUP_SRC | BPF_FIB_LOOKUP_MARK)
> >
> > BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx,
> > struct bpf_fib_lookup *, params, int, plen, u32, flags)
> > diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
> > index bf80b614c4db..4c9b5bfbd9c6 100644
> > --- a/tools/include/uapi/linux/bpf.h
> > +++ b/tools/include/uapi/linux/bpf.h
> > @@ -3393,6 +3393,10 @@ union bpf_attr {
> > * for the nexthop. If the src addr cannot be derived,
> > * **BPF_FIB_LKUP_RET_NO_SRC_ADDR** is returned. In this
> > * case, *params*->dmac and *params*->smac are not set either.
> > + * **BPF_FIB_LOOKUP_MARK**
> > + * Use the mark present in *params*->mark for the fib lookup.
> > + * This option should not be used with BPF_FIB_LOOKUP_DIRECT,
> > + * as it only has meaning for full lookups.
> > *
> > * *ctx* is either **struct xdp_md** for XDP programs or
> > * **struct sk_buff** tc cls_act programs.
> > @@ -7119,6 +7123,7 @@ enum {
> > BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2),
> > BPF_FIB_LOOKUP_TBID = (1U << 3),
> > BPF_FIB_LOOKUP_SRC = (1U << 4),
> > + BPF_FIB_LOOKUP_MARK = (1U << 5),
> > };
> >
> > enum {
> > @@ -7196,8 +7201,19 @@ struct bpf_fib_lookup {
> > __u32 tbid;
> > };
> >
> > - __u8 smac[6]; /* ETH_ALEN */
> > - __u8 dmac[6]; /* ETH_ALEN */
> > + union {
> > + /* input */
> > + struct {
> > + __u32 mark; /* policy routing */
> > + /* 2 4-byte holes for input */
> > + };
> > +
> > + /* output: source and dest mac */
> > + struct {
> > + __u8 smac[6]; /* ETH_ALEN */
> > + __u8 dmac[6]; /* ETH_ALEN */
> > + };
> > + };
> > };
> >
> > struct bpf_redir_neigh {
>
> It would be good to add
>
> static_assert(sizeof(struct bpf_fib_lookup) == 64, "bpf_fib_lookup size
> check");
>
> to ensure this struct never exceeds a cacheline.
Thanks, added: https://github.com/aspsk/bpf-next/commit/7cd3685e52d5
>
> The patch itself looks good to me:
>
> Reviewed-by: David Ahern <dsahern@kernel.org>
Thanks!
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v1 bpf-next 2/2] selftests/bpf: Add BPF_FIB_LOOKUP_MARK tests
2024-03-24 15:04 ` Anton Protopopov
@ 2024-03-25 18:12 ` Martin KaFai Lau
2024-03-25 20:03 ` Anton Protopopov
0 siblings, 1 reply; 9+ messages in thread
From: Martin KaFai Lau @ 2024-03-25 18:12 UTC (permalink / raw)
To: Anton Protopopov
Cc: Rumen Telbizov, David Ahern, netdev, Alexei Starovoitov,
Andrii Nakryiko, Daniel Borkmann, Jiri Olsa, Stanislav Fomichev,
bpf
On 3/24/24 8:04 AM, Anton Protopopov wrote:
> On Sat, Mar 23, 2024 at 03:34:10PM -0700, Martin KaFai Lau wrote:
>> On 3/22/24 7:02 AM, Anton Protopopov wrote:
>>> This patch extends the fib_lookup test suite by adding a few test
>>> cases for each IP family to test the new BPF_FIB_LOOKUP_MARK flag
>>> to the bpf_fib_lookup:
>>>
>>> * Test destination IP address selection with and without a mark
>>> and/or the BPF_FIB_LOOKUP_MARK flag set
>>>
>>> To test this functionality another network namespace and a new veth
>>> pair were added to the test.
>>>
>>
>> [ ... ]
>>
>>> static const struct fib_lookup_test tests[] = {
>>> @@ -90,10 +105,47 @@ static const struct fib_lookup_test tests[] = {
>>> .daddr = IPV6_ADDR_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
>>> .expected_src = IPV6_IFACE_ADDR_SEC,
>>> .lookup_flags = BPF_FIB_LOOKUP_SRC | BPF_FIB_LOOKUP_SKIP_NEIGH, },
>>> + /* policy routing */
>>> + { .desc = "IPv4 policy routing, default",
>>> + .daddr = IPV4_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
>>> + .expected_dst = IPV4_GW1, .ifname = "veth3",
>>> + .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH, },
>>> + { .desc = "IPv4 policy routing, mark doesn't point to a policy",
>>> + .daddr = IPV4_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
>>> + .expected_dst = IPV4_GW1, .ifname = "veth3",
>>> + .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH,
>>> + .mark = MARK_NO_POLICY, },
>>> + { .desc = "IPv4 policy routing, mark points to a policy",
>>> + .daddr = IPV4_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
>>> + .expected_dst = IPV4_GW2, .ifname = "veth3",
>>> + .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH,
>>> + .mark = MARK, },
>>> + { .desc = "IPv4 policy routing, mark points to a policy, but no flag",
>>> + .daddr = IPV4_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
>>> + .expected_dst = IPV4_GW1, .ifname = "veth3",
>>> + .lookup_flags = BPF_FIB_LOOKUP_SKIP_NEIGH,
>>> + .mark = MARK, },
>>> + { .desc = "IPv6 policy routing, default",
>>> + .daddr = IPV6_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
>>> + .expected_dst = IPV6_GW1, .ifname = "veth3",
>>> + .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH, },
>>> + { .desc = "IPv6 policy routing, mark doesn't point to a policy",
>>> + .daddr = IPV6_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
>>> + .expected_dst = IPV6_GW1, .ifname = "veth3",
>>> + .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH,
>>> + .mark = MARK_NO_POLICY, },
>>> + { .desc = "IPv6 policy routing, mark points to a policy",
>>> + .daddr = IPV6_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
>>> + .expected_dst = IPV6_GW2, .ifname = "veth3",
>>> + .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH,
>>> + .mark = MARK, },
>>> + { .desc = "IPv6 policy routing, mark points to a policy, but no flag",
>>> + .daddr = IPV6_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
>>> + .expected_dst = IPV6_GW1, .ifname = "veth3",
>>> + .lookup_flags = BPF_FIB_LOOKUP_SKIP_NEIGH,
>>> + .mark = MARK, },
>>> };
>>> -static int ifindex;
>>> -
>>> static int setup_netns(void)
>>> {
>>> int err;
>>> @@ -144,12 +196,40 @@ static int setup_netns(void)
>>> if (!ASSERT_OK(err, "write_sysctl(net.ipv6.conf.veth1.forwarding)"))
>>> goto fail;
>>> + /* Setup for policy routing tests */
>>> + SYS(fail, "ip link add veth3 type veth peer name veth4");
>>> + SYS(fail, "ip link set dev veth3 up");
>>> + SYS(fail, "ip link set dev veth4 netns %s up", NS_REMOTE);
>>> +
>>> + SYS(fail, "ip addr add %s/24 dev veth3", IPV4_LOCAL);
>>> + SYS(fail, "ip netns exec %s ip addr add %s/24 dev veth4", NS_REMOTE, IPV4_GW1);
>>> + SYS(fail, "ip netns exec %s ip addr add %s/24 dev veth4", NS_REMOTE, IPV4_GW2);
>>> + SYS(fail, "ip addr add %s/64 dev veth3 nodad", IPV6_LOCAL);
>>> + SYS(fail, "ip netns exec %s ip addr add %s/64 dev veth4 nodad", NS_REMOTE, IPV6_GW1);
>>> + SYS(fail, "ip netns exec %s ip addr add %s/64 dev veth4 nodad", NS_REMOTE, IPV6_GW2);
>>
>> Trying to see if the setup can be simplified.
>>
>> Does it need to add another netns and setup a reachable IPV[46]_GW[12] gateway?
>>
>> The test is not sending any traffic and it is a BPF_FIB_LOOKUP_SKIP_NEIGH test.
>
> I think this will not work without another namespace, as FIB lookup will
> return DST="final destination", not DST="gateway", as the gateway is in the
> same namespace and can be skipped.
hmm... not sure I understand why it would get "final destination". Am I missing something?
To be specific, there is no need to configure the IPV[46]_GW[12] address:
- SYS(fail, "ip link set dev veth4 netns %s up", NS_REMOTE);
SYS(fail, "ip addr add %s/24 dev veth3", IPV4_LOCAL);
- SYS(fail, "ip netns exec %s ip addr add %s/24 dev veth4", NS_REMOTE, IPV4_GW1);
- SYS(fail, "ip netns exec %s ip addr add %s/24 dev veth4", NS_REMOTE, IPV4_GW2);
SYS(fail, "ip addr add %s/64 dev veth3 nodad", IPV6_LOCAL);
- SYS(fail, "ip netns exec %s ip addr add %s/64 dev veth4 nodad", NS_REMOTE, IPV6_GW1);
- SYS(fail, "ip netns exec %s ip addr add %s/64 dev veth4 nodad", NS_REMOTE, IPV6_GW2);
SYS(fail, "ip route add %s/32 via %s", IPV4_REMOTE_DST, IPV4_GW1);
SYS(fail, "ip route add %s/32 via %s table %s", IPV4_REMOTE_DST, IPV4_GW2, MARK_TABLE);
SYS(fail, "ip -6 route add %s/128 via %s", IPV6_REMOTE_DST, IPV6_GW1);
SYS(fail, "ip -6 route add %s/128 via %s table %s", IPV6_REMOTE_DST, IPV6_GW2, MARK_TABLE);
SYS(fail, "ip rule add prio 2 fwmark %d lookup %s", MARK, MARK_TABLE);
SYS(fail, "ip -6 rule add prio 2 fwmark %d lookup %s", MARK, MARK_TABLE);
[root@arch-fb-vm1 ~]# ip netns exec fib_lookup_ns /bin/bash
[root@arch-fb-vm1 ~]# ip -6 rule
0: from all lookup local
2: from all fwmark 0x2a lookup 200
32766: from all lookup main
[root@arch-fb-vm1 ~]# ip -6 route show table main
be:ef::b0:10 via fd01::1 dev veth3 metric 1024 linkdown pref medium
[root@arch-fb-vm1 ~]# ip -6 route show table 200
be:ef::b0:10 via fd01::2 dev veth3 metric 1024 linkdown pref medium
[root@arch-fb-vm1 ~]# ip -6 route get be:ef::b0:10
be:ef::b0:10 from :: via fd01::1 dev veth3 src fd01::3 metric 1024 pref medium
[root@arch-fb-vm1 ~]# ip -6 route get be:ef::b0:10 mark 0x2a
be:ef::b0:10 from :: via fd01::2 dev veth3 table 200 src fd01::3 metric 1024 pref medium
>
> Instead of adding a new namespace I can move the second interface to the
> root namespace. This will work, but then we're interfering with the root
> namespace.
>
>>> + SYS(fail, "ip route add %s/32 via %s", IPV4_REMOTE_DST, IPV4_GW1);
>>> + SYS(fail, "ip route add %s/32 via %s table %s", IPV4_REMOTE_DST, IPV4_GW2, MARK_TABLE);
>>> + SYS(fail, "ip -6 route add %s/128 via %s", IPV6_REMOTE_DST, IPV6_GW1);
>>> + SYS(fail, "ip -6 route add %s/128 via %s table %s", IPV6_REMOTE_DST, IPV6_GW2, MARK_TABLE);
>>> + SYS(fail, "ip rule add prio 2 fwmark %d lookup %s", MARK, MARK_TABLE);
>>> + SYS(fail, "ip -6 rule add prio 2 fwmark %d lookup %s", MARK, MARK_TABLE);
>>> +
>>> + err = write_sysctl("/proc/sys/net/ipv4/conf/veth3/forwarding", "1");
>>> + if (!ASSERT_OK(err, "write_sysctl(net.ipv4.conf.veth3.forwarding)"))
>>> + goto fail;
>>> +
>>> + err = write_sysctl("/proc/sys/net/ipv6/conf/veth3/forwarding", "1");
>>> + if (!ASSERT_OK(err, "write_sysctl(net.ipv6.conf.veth3.forwarding)"))
>>> + goto fail;
>>> +
>>> return 0;
>>> fail:
>>> return -1;
>>> }
>>
>> [ ... ]
>>
>>> @@ -248,6 +337,7 @@ void test_fib_lookup(void)
>>> prog_fd = bpf_program__fd(skel->progs.fib_lookup);
>>> SYS(fail, "ip netns add %s", NS_TEST);
>>> + SYS(fail, "ip netns add %s", NS_REMOTE);
>>
>>
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v1 bpf-next 2/2] selftests/bpf: Add BPF_FIB_LOOKUP_MARK tests
2024-03-25 18:12 ` Martin KaFai Lau
@ 2024-03-25 20:03 ` Anton Protopopov
0 siblings, 0 replies; 9+ messages in thread
From: Anton Protopopov @ 2024-03-25 20:03 UTC (permalink / raw)
To: Martin KaFai Lau
Cc: Rumen Telbizov, David Ahern, netdev, Alexei Starovoitov,
Andrii Nakryiko, Daniel Borkmann, Jiri Olsa, Stanislav Fomichev,
bpf
On Mon, Mar 25, 2024 at 11:12:39AM -0700, Martin KaFai Lau wrote:
> On 3/24/24 8:04 AM, Anton Protopopov wrote:
> > On Sat, Mar 23, 2024 at 03:34:10PM -0700, Martin KaFai Lau wrote:
> > > On 3/22/24 7:02 AM, Anton Protopopov wrote:
> > > > This patch extends the fib_lookup test suite by adding a few test
> > > > cases for each IP family to test the new BPF_FIB_LOOKUP_MARK flag
> > > > to the bpf_fib_lookup:
> > > >
> > > > * Test destination IP address selection with and without a mark
> > > > and/or the BPF_FIB_LOOKUP_MARK flag set
> > > >
> > > > To test this functionality another network namespace and a new veth
> > > > pair were added to the test.
> > > >
> > >
> > > [ ... ]
> > >
> > > > static const struct fib_lookup_test tests[] = {
> > > > @@ -90,10 +105,47 @@ static const struct fib_lookup_test tests[] = {
> > > > .daddr = IPV6_ADDR_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
> > > > .expected_src = IPV6_IFACE_ADDR_SEC,
> > > > .lookup_flags = BPF_FIB_LOOKUP_SRC | BPF_FIB_LOOKUP_SKIP_NEIGH, },
> > > > + /* policy routing */
> > > > + { .desc = "IPv4 policy routing, default",
> > > > + .daddr = IPV4_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
> > > > + .expected_dst = IPV4_GW1, .ifname = "veth3",
> > > > + .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH, },
> > > > + { .desc = "IPv4 policy routing, mark doesn't point to a policy",
> > > > + .daddr = IPV4_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
> > > > + .expected_dst = IPV4_GW1, .ifname = "veth3",
> > > > + .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH,
> > > > + .mark = MARK_NO_POLICY, },
> > > > + { .desc = "IPv4 policy routing, mark points to a policy",
> > > > + .daddr = IPV4_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
> > > > + .expected_dst = IPV4_GW2, .ifname = "veth3",
> > > > + .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH,
> > > > + .mark = MARK, },
> > > > + { .desc = "IPv4 policy routing, mark points to a policy, but no flag",
> > > > + .daddr = IPV4_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
> > > > + .expected_dst = IPV4_GW1, .ifname = "veth3",
> > > > + .lookup_flags = BPF_FIB_LOOKUP_SKIP_NEIGH,
> > > > + .mark = MARK, },
> > > > + { .desc = "IPv6 policy routing, default",
> > > > + .daddr = IPV6_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
> > > > + .expected_dst = IPV6_GW1, .ifname = "veth3",
> > > > + .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH, },
> > > > + { .desc = "IPv6 policy routing, mark doesn't point to a policy",
> > > > + .daddr = IPV6_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
> > > > + .expected_dst = IPV6_GW1, .ifname = "veth3",
> > > > + .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH,
> > > > + .mark = MARK_NO_POLICY, },
> > > > + { .desc = "IPv6 policy routing, mark points to a policy",
> > > > + .daddr = IPV6_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
> > > > + .expected_dst = IPV6_GW2, .ifname = "veth3",
> > > > + .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH,
> > > > + .mark = MARK, },
> > > > + { .desc = "IPv6 policy routing, mark points to a policy, but no flag",
> > > > + .daddr = IPV6_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
> > > > + .expected_dst = IPV6_GW1, .ifname = "veth3",
> > > > + .lookup_flags = BPF_FIB_LOOKUP_SKIP_NEIGH,
> > > > + .mark = MARK, },
> > > > };
> > > > -static int ifindex;
> > > > -
> > > > static int setup_netns(void)
> > > > {
> > > > int err;
> > > > @@ -144,12 +196,40 @@ static int setup_netns(void)
> > > > if (!ASSERT_OK(err, "write_sysctl(net.ipv6.conf.veth1.forwarding)"))
> > > > goto fail;
> > > > + /* Setup for policy routing tests */
> > > > + SYS(fail, "ip link add veth3 type veth peer name veth4");
> > > > + SYS(fail, "ip link set dev veth3 up");
> > > > + SYS(fail, "ip link set dev veth4 netns %s up", NS_REMOTE);
> > > > +
> > > > + SYS(fail, "ip addr add %s/24 dev veth3", IPV4_LOCAL);
> > > > + SYS(fail, "ip netns exec %s ip addr add %s/24 dev veth4", NS_REMOTE, IPV4_GW1);
> > > > + SYS(fail, "ip netns exec %s ip addr add %s/24 dev veth4", NS_REMOTE, IPV4_GW2);
> > > > + SYS(fail, "ip addr add %s/64 dev veth3 nodad", IPV6_LOCAL);
> > > > + SYS(fail, "ip netns exec %s ip addr add %s/64 dev veth4 nodad", NS_REMOTE, IPV6_GW1);
> > > > + SYS(fail, "ip netns exec %s ip addr add %s/64 dev veth4 nodad", NS_REMOTE, IPV6_GW2);
> > >
> > > Trying to see if the setup can be simplified.
> > >
> > > Does it need to add another netns and setup a reachable IPV[46]_GW[12] gateway?
> > >
> > > The test is not sending any traffic and it is a BPF_FIB_LOOKUP_SKIP_NEIGH test.
> >
> > I think this will not work without another namespace, as FIB lookup will
> > return DST="final destination", not DST="gateway", as the gateway is in the
> > same namespace and can be skipped.
>
> hmm... not sure I understand why it would get "final destination". Am I missing something?
> To be specific, there is no need to configure the IPV[46]_GW[12] address:
>
> - SYS(fail, "ip link set dev veth4 netns %s up", NS_REMOTE);
>
> SYS(fail, "ip addr add %s/24 dev veth3", IPV4_LOCAL);
> - SYS(fail, "ip netns exec %s ip addr add %s/24 dev veth4", NS_REMOTE, IPV4_GW1);
> - SYS(fail, "ip netns exec %s ip addr add %s/24 dev veth4", NS_REMOTE, IPV4_GW2);
> SYS(fail, "ip addr add %s/64 dev veth3 nodad", IPV6_LOCAL);
> - SYS(fail, "ip netns exec %s ip addr add %s/64 dev veth4 nodad", NS_REMOTE, IPV6_GW1);
> - SYS(fail, "ip netns exec %s ip addr add %s/64 dev veth4 nodad", NS_REMOTE, IPV6_GW2);
> SYS(fail, "ip route add %s/32 via %s", IPV4_REMOTE_DST, IPV4_GW1);
> SYS(fail, "ip route add %s/32 via %s table %s", IPV4_REMOTE_DST, IPV4_GW2, MARK_TABLE);
> SYS(fail, "ip -6 route add %s/128 via %s", IPV6_REMOTE_DST, IPV6_GW1);
> SYS(fail, "ip -6 route add %s/128 via %s table %s", IPV6_REMOTE_DST, IPV6_GW2, MARK_TABLE);
> SYS(fail, "ip rule add prio 2 fwmark %d lookup %s", MARK, MARK_TABLE);
> SYS(fail, "ip -6 rule add prio 2 fwmark %d lookup %s", MARK, MARK_TABLE);
>
> [root@arch-fb-vm1 ~]# ip netns exec fib_lookup_ns /bin/bash
>
> [root@arch-fb-vm1 ~]# ip -6 rule
> 0: from all lookup local
> 2: from all fwmark 0x2a lookup 200
> 32766: from all lookup main
>
> [root@arch-fb-vm1 ~]# ip -6 route show table main
> be:ef::b0:10 via fd01::1 dev veth3 metric 1024 linkdown pref medium
>
> [root@arch-fb-vm1 ~]# ip -6 route show table 200
> be:ef::b0:10 via fd01::2 dev veth3 metric 1024 linkdown pref medium
>
> [root@arch-fb-vm1 ~]# ip -6 route get be:ef::b0:10
> be:ef::b0:10 from :: via fd01::1 dev veth3 src fd01::3 metric 1024 pref medium
>
> [root@arch-fb-vm1 ~]# ip -6 route get be:ef::b0:10 mark 0x2a
> be:ef::b0:10 from :: via fd01::2 dev veth3 table 200 src fd01::3 metric 1024 pref medium
Ok, thanks. I will send a v2 with a simplified test
> >
> > Instead of adding a new namespace I can move the second interface to the
> > root namespace. This will work, but then we're interfering with the root
> > namespace.
> >
> > > > + SYS(fail, "ip route add %s/32 via %s", IPV4_REMOTE_DST, IPV4_GW1);
> > > > + SYS(fail, "ip route add %s/32 via %s table %s", IPV4_REMOTE_DST, IPV4_GW2, MARK_TABLE);
> > > > + SYS(fail, "ip -6 route add %s/128 via %s", IPV6_REMOTE_DST, IPV6_GW1);
> > > > + SYS(fail, "ip -6 route add %s/128 via %s table %s", IPV6_REMOTE_DST, IPV6_GW2, MARK_TABLE);
> > > > + SYS(fail, "ip rule add prio 2 fwmark %d lookup %s", MARK, MARK_TABLE);
> > > > + SYS(fail, "ip -6 rule add prio 2 fwmark %d lookup %s", MARK, MARK_TABLE);
> > > > +
> > > > + err = write_sysctl("/proc/sys/net/ipv4/conf/veth3/forwarding", "1");
> > > > + if (!ASSERT_OK(err, "write_sysctl(net.ipv4.conf.veth3.forwarding)"))
> > > > + goto fail;
> > > > +
> > > > + err = write_sysctl("/proc/sys/net/ipv6/conf/veth3/forwarding", "1");
> > > > + if (!ASSERT_OK(err, "write_sysctl(net.ipv6.conf.veth3.forwarding)"))
> > > > + goto fail;
> > > > +
> > > > return 0;
> > > > fail:
> > > > return -1;
> > > > }
> > >
> > > [ ... ]
> > >
> > > > @@ -248,6 +337,7 @@ void test_fib_lookup(void)
> > > > prog_fd = bpf_program__fd(skel->progs.fib_lookup);
> > > > SYS(fail, "ip netns add %s", NS_TEST);
> > > > + SYS(fail, "ip netns add %s", NS_REMOTE);
> > >
> > >
>
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2024-03-25 20:01 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-03-22 14:02 [PATCH v1 bpf-next 0/2] BPF: support mark in bpf_fib_lookup Anton Protopopov
2024-03-22 14:02 ` [PATCH v1 bpf-next 1/2] bpf: add support for passing mark with bpf_fib_lookup Anton Protopopov
2024-03-24 17:38 ` David Ahern
2024-03-25 12:19 ` Anton Protopopov
2024-03-22 14:02 ` [PATCH v1 bpf-next 2/2] selftests/bpf: Add BPF_FIB_LOOKUP_MARK tests Anton Protopopov
2024-03-23 22:34 ` Martin KaFai Lau
2024-03-24 15:04 ` Anton Protopopov
2024-03-25 18:12 ` Martin KaFai Lau
2024-03-25 20:03 ` Anton Protopopov
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).