* [PATCH v2 1/2] bpf: add table ID to bpf_fib_lookup BPF helper
2023-05-31 19:38 [PATCH v2 0/2] bpf: utilize table ID in bpf_fib_lookup helper Louis DeLosSantos
@ 2023-05-31 19:38 ` Louis DeLosSantos
2023-05-31 19:38 ` [PATCH v2 2/2] selftests/bpf: test table ID fib lookup " Louis DeLosSantos
` (2 subsequent siblings)
3 siblings, 0 replies; 5+ messages in thread
From: Louis DeLosSantos @ 2023-05-31 19:38 UTC (permalink / raw)
To: bpf, netdev
Cc: Martin KaFai Lau, Stanislav Fomichev, razor, John Fastabend,
Yonghong Song, Louis DeLosSantos
Add ability to specify routing table ID to the `bpf_fib_lookup` BPF
helper.
A new field `tbid` is added to `struct bpf_fib_lookup` used as
parameters to the `bpf_fib_lookup` BPF helper.
When the helper is called with the `BPF_FIB_LOOKUP_DIRECT` and
`BPF_FIB_LOOKUP_TBID` flags the `tbid` field in `struct bpf_fib_lookup`
will be used as the table ID for the fib lookup.
If the `tbid` does not exist the fib lookup will fail with
`BPF_FIB_LKUP_RET_NOT_FWDED`.
The `tbid` field becomes a union over the vlan related output fields in
`struct bpf_fib_lookup` and will be zeroed immediately after usage.
This functionality is useful in containerized environments.
For instance, if a CNI wants to dictate the next-hop for traffic leaving
a container it can create a container-specific routing table and perform
a fib lookup against this table in a "host-net-namespace-side" TC program.
This functionality also allows `ip rule` like functionality at the TC
layer, allowing an eBPF program to pick a routing table based on some
aspect of the sk_buff.
As a concrete use case, this feature will be used in Cilium's SRv6 L3VPN
datapath.
When egress traffic leaves a Pod an eBPF program attached by Cilium will
determine which VRF the egress traffic should target, and then perform a
FIB lookup in a specific table representing this VRF's FIB.
Signed-off-by: Louis DeLosSantos <louis.delos.devel@gmail.com>
---
include/uapi/linux/bpf.h | 21 ++++++++++++++++++---
net/core/filter.c | 14 +++++++++++++-
tools/include/uapi/linux/bpf.h | 21 ++++++++++++++++++---
3 files changed, 49 insertions(+), 7 deletions(-)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 1bb11a6ee6676..6254eac48b185 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3167,6 +3167,10 @@ union bpf_attr {
* **BPF_FIB_LOOKUP_DIRECT**
* Do a direct table lookup vs full lookup using FIB
* rules.
+ * **BPF_FIB_LOOKUP_TBID**
+ * Used with BPF_FIB_LOOKUP_DIRECT.
+ * Use the routing table ID present in *params*->tbid
+ * for the fib lookup.
* **BPF_FIB_LOOKUP_OUTPUT**
* Perform lookup from an egress perspective (default is
* ingress).
@@ -6821,6 +6825,7 @@ enum {
BPF_FIB_LOOKUP_DIRECT = (1U << 0),
BPF_FIB_LOOKUP_OUTPUT = (1U << 1),
BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2),
+ BPF_FIB_LOOKUP_TBID = (1U << 3),
};
enum {
@@ -6881,9 +6886,19 @@ struct bpf_fib_lookup {
__u32 ipv6_dst[4]; /* in6_addr; network order */
};
- /* output */
- __be16 h_vlan_proto;
- __be16 h_vlan_TCI;
+ union {
+ struct {
+ /* output */
+ __be16 h_vlan_proto;
+ __be16 h_vlan_TCI;
+ };
+ /* input: when accompanied with the
+ * 'BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_TBID` flags, a
+ * specific routing table to use for the fib lookup.
+ */
+ __u32 tbid;
+ };
+
__u8 smac[6]; /* ETH_ALEN */
__u8 dmac[6]; /* ETH_ALEN */
};
diff --git a/net/core/filter.c b/net/core/filter.c
index 451b0ec7f2421..6a97a2aeabc10 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5803,6 +5803,12 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN;
struct fib_table *tb;
+ if (flags & BPF_FIB_LOOKUP_TBID) {
+ tbid = params->tbid;
+ /* zero out for vlan output */
+ params->tbid = 0;
+ }
+
tb = fib_get_table(net, tbid);
if (unlikely(!tb))
return BPF_FIB_LKUP_RET_NOT_FWDED;
@@ -5936,6 +5942,12 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN;
struct fib6_table *tb;
+ if (flags & BPF_FIB_LOOKUP_TBID) {
+ tbid = params->tbid;
+ /* zero out for vlan output */
+ params->tbid = 0;
+ }
+
tb = ipv6_stub->fib6_get_table(net, tbid);
if (unlikely(!tb))
return BPF_FIB_LKUP_RET_NOT_FWDED;
@@ -6008,7 +6020,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
#endif
#define BPF_FIB_LOOKUP_MASK (BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT | \
- BPF_FIB_LOOKUP_SKIP_NEIGH)
+ BPF_FIB_LOOKUP_SKIP_NEIGH | BPF_FIB_LOOKUP_TBID)
BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx,
struct bpf_fib_lookup *, params, int, plen, u32, flags)
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 1bb11a6ee6676..6254eac48b185 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3167,6 +3167,10 @@ union bpf_attr {
* **BPF_FIB_LOOKUP_DIRECT**
* Do a direct table lookup vs full lookup using FIB
* rules.
+ * **BPF_FIB_LOOKUP_TBID**
+ * Used with BPF_FIB_LOOKUP_DIRECT.
+ * Use the routing table ID present in *params*->tbid
+ * for the fib lookup.
* **BPF_FIB_LOOKUP_OUTPUT**
* Perform lookup from an egress perspective (default is
* ingress).
@@ -6821,6 +6825,7 @@ enum {
BPF_FIB_LOOKUP_DIRECT = (1U << 0),
BPF_FIB_LOOKUP_OUTPUT = (1U << 1),
BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2),
+ BPF_FIB_LOOKUP_TBID = (1U << 3),
};
enum {
@@ -6881,9 +6886,19 @@ struct bpf_fib_lookup {
__u32 ipv6_dst[4]; /* in6_addr; network order */
};
- /* output */
- __be16 h_vlan_proto;
- __be16 h_vlan_TCI;
+ union {
+ struct {
+ /* output */
+ __be16 h_vlan_proto;
+ __be16 h_vlan_TCI;
+ };
+ /* input: when accompanied with the
+ * 'BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_TBID` flags, a
+ * specific routing table to use for the fib lookup.
+ */
+ __u32 tbid;
+ };
+
__u8 smac[6]; /* ETH_ALEN */
__u8 dmac[6]; /* ETH_ALEN */
};
--
2.40.1
^ permalink raw reply related [flat|nested] 5+ messages in thread* [PATCH v2 2/2] selftests/bpf: test table ID fib lookup BPF helper
2023-05-31 19:38 [PATCH v2 0/2] bpf: utilize table ID in bpf_fib_lookup helper Louis DeLosSantos
2023-05-31 19:38 ` [PATCH v2 1/2] bpf: add table ID to bpf_fib_lookup BPF helper Louis DeLosSantos
@ 2023-05-31 19:38 ` Louis DeLosSantos
2023-06-01 18:07 ` [PATCH v2 0/2] bpf: utilize table ID in bpf_fib_lookup helper Daniel Borkmann
2023-06-01 18:10 ` patchwork-bot+netdevbpf
3 siblings, 0 replies; 5+ messages in thread
From: Louis DeLosSantos @ 2023-05-31 19:38 UTC (permalink / raw)
To: bpf, netdev
Cc: Martin KaFai Lau, Stanislav Fomichev, razor, John Fastabend,
Yonghong Song, Louis DeLosSantos
Add additional test cases to `fib_lookup.c` prog_test.
These test cases add a new /24 network to the previously unused veth2
device, removes the directly connected route from the main routing table
and moves it to table 100.
The first test case then confirms a fib lookup for a remote address in this
directly connected network, using the main routing table fails.
The second test case ensures the same fib lookup using table 100
succeeds.
An additional pair of tests which function in the same manner are added
for IPv6.
Signed-off-by: Louis DeLosSantos <louis.delos.devel@gmail.com>
---
.../testing/selftests/bpf/prog_tests/fib_lookup.c | 61 +++++++++++++++++++---
1 file changed, 53 insertions(+), 8 deletions(-)
diff --git a/tools/testing/selftests/bpf/prog_tests/fib_lookup.c b/tools/testing/selftests/bpf/prog_tests/fib_lookup.c
index a1e7121058118..2fd05649bad19 100644
--- a/tools/testing/selftests/bpf/prog_tests/fib_lookup.c
+++ b/tools/testing/selftests/bpf/prog_tests/fib_lookup.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+#include <linux/rtnetlink.h>
#include <sys/types.h>
#include <net/if.h>
@@ -15,14 +16,23 @@
#define IPV4_IFACE_ADDR "10.0.0.254"
#define IPV4_NUD_FAILED_ADDR "10.0.0.1"
#define IPV4_NUD_STALE_ADDR "10.0.0.2"
+#define IPV4_TBID_ADDR "172.0.0.254"
+#define IPV4_TBID_NET "172.0.0.0"
+#define IPV4_TBID_DST "172.0.0.2"
+#define IPV6_TBID_ADDR "fd00::FFFF"
+#define IPV6_TBID_NET "fd00::"
+#define IPV6_TBID_DST "fd00::2"
#define DMAC "11:11:11:11:11:11"
#define DMAC_INIT { 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, }
+#define DMAC2 "01:01:01:01:01:01"
+#define DMAC_INIT2 { 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, }
struct fib_lookup_test {
const char *desc;
const char *daddr;
int expected_ret;
int lookup_flags;
+ __u32 tbid;
__u8 dmac[6];
};
@@ -43,6 +53,22 @@ static const struct fib_lookup_test tests[] = {
{ .desc = "IPv4 skip neigh",
.daddr = IPV4_NUD_FAILED_ADDR, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
.lookup_flags = BPF_FIB_LOOKUP_SKIP_NEIGH, },
+ { .desc = "IPv4 TBID lookup failure",
+ .daddr = IPV4_TBID_DST, .expected_ret = BPF_FIB_LKUP_RET_NOT_FWDED,
+ .lookup_flags = BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_TBID,
+ .tbid = RT_TABLE_MAIN, },
+ { .desc = "IPv4 TBID lookup success",
+ .daddr = IPV4_TBID_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
+ .lookup_flags = BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_TBID, .tbid = 100,
+ .dmac = DMAC_INIT2, },
+ { .desc = "IPv6 TBID lookup failure",
+ .daddr = IPV6_TBID_DST, .expected_ret = BPF_FIB_LKUP_RET_NOT_FWDED,
+ .lookup_flags = BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_TBID,
+ .tbid = RT_TABLE_MAIN, },
+ { .desc = "IPv6 TBID lookup success",
+ .daddr = IPV6_TBID_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
+ .lookup_flags = BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_TBID, .tbid = 100,
+ .dmac = DMAC_INIT2, },
};
static int ifindex;
@@ -53,6 +79,7 @@ static int setup_netns(void)
SYS(fail, "ip link add veth1 type veth peer name veth2");
SYS(fail, "ip link set dev veth1 up");
+ SYS(fail, "ip link set dev veth2 up");
err = write_sysctl("/proc/sys/net/ipv4/neigh/veth1/gc_stale_time", "900");
if (!ASSERT_OK(err, "write_sysctl(net.ipv4.neigh.veth1.gc_stale_time)"))
@@ -70,6 +97,17 @@ static int setup_netns(void)
SYS(fail, "ip neigh add %s dev veth1 nud failed", IPV4_NUD_FAILED_ADDR);
SYS(fail, "ip neigh add %s dev veth1 lladdr %s nud stale", IPV4_NUD_STALE_ADDR, DMAC);
+ /* Setup for tbid lookup tests */
+ SYS(fail, "ip addr add %s/24 dev veth2", IPV4_TBID_ADDR);
+ SYS(fail, "ip route del %s/24 dev veth2", IPV4_TBID_NET);
+ SYS(fail, "ip route add table 100 %s/24 dev veth2", IPV4_TBID_NET);
+ SYS(fail, "ip neigh add %s dev veth2 lladdr %s nud stale", IPV4_TBID_DST, DMAC2);
+
+ SYS(fail, "ip addr add %s/64 dev veth2", IPV6_TBID_ADDR);
+ SYS(fail, "ip -6 route del %s/64 dev veth2", IPV6_TBID_NET);
+ SYS(fail, "ip -6 route add table 100 %s/64 dev veth2", IPV6_TBID_NET);
+ SYS(fail, "ip neigh add %s dev veth2 lladdr %s nud stale", IPV6_TBID_DST, DMAC2);
+
err = write_sysctl("/proc/sys/net/ipv4/conf/veth1/forwarding", "1");
if (!ASSERT_OK(err, "write_sysctl(net.ipv4.conf.veth1.forwarding)"))
goto fail;
@@ -83,7 +121,7 @@ static int setup_netns(void)
return -1;
}
-static int set_lookup_params(struct bpf_fib_lookup *params, const char *daddr)
+static int set_lookup_params(struct bpf_fib_lookup *params, const struct fib_lookup_test *test)
{
int ret;
@@ -91,8 +129,9 @@ static int set_lookup_params(struct bpf_fib_lookup *params, const char *daddr)
params->l4_protocol = IPPROTO_TCP;
params->ifindex = ifindex;
+ params->tbid = test->tbid;
- if (inet_pton(AF_INET6, daddr, params->ipv6_dst) == 1) {
+ if (inet_pton(AF_INET6, test->daddr, params->ipv6_dst) == 1) {
params->family = AF_INET6;
ret = inet_pton(AF_INET6, IPV6_IFACE_ADDR, params->ipv6_src);
if (!ASSERT_EQ(ret, 1, "inet_pton(IPV6_IFACE_ADDR)"))
@@ -100,7 +139,7 @@ static int set_lookup_params(struct bpf_fib_lookup *params, const char *daddr)
return 0;
}
- ret = inet_pton(AF_INET, daddr, ¶ms->ipv4_dst);
+ ret = inet_pton(AF_INET, test->daddr, ¶ms->ipv4_dst);
if (!ASSERT_EQ(ret, 1, "convert IP[46] address"))
return -1;
params->family = AF_INET;
@@ -154,13 +193,12 @@ void test_fib_lookup(void)
fib_params = &skel->bss->fib_params;
for (i = 0; i < ARRAY_SIZE(tests); i++) {
- printf("Testing %s\n", tests[i].desc);
+ printf("Testing %s ", tests[i].desc);
- if (set_lookup_params(fib_params, tests[i].daddr))
+ if (set_lookup_params(fib_params, &tests[i]))
continue;
skel->bss->fib_lookup_ret = -1;
- skel->bss->lookup_flags = BPF_FIB_LOOKUP_OUTPUT |
- tests[i].lookup_flags;
+ skel->bss->lookup_flags = tests[i].lookup_flags;
err = bpf_prog_test_run_opts(prog_fd, &run_opts);
if (!ASSERT_OK(err, "bpf_prog_test_run_opts"))
@@ -175,7 +213,14 @@ void test_fib_lookup(void)
mac_str(expected, tests[i].dmac);
mac_str(actual, fib_params->dmac);
- printf("dmac expected %s actual %s\n", expected, actual);
+ printf("dmac expected %s actual %s ", expected, actual);
+ }
+
+ // ensure tbid is zero'd out after fib lookup.
+ if (tests[i].lookup_flags & BPF_FIB_LOOKUP_DIRECT) {
+ if (!ASSERT_EQ(skel->bss->fib_params.tbid, 0,
+ "expected fib_params.tbid to be zero"))
+ goto fail;
}
}
--
2.40.1
^ permalink raw reply related [flat|nested] 5+ messages in thread