* [PATCH bpf 0/4] Socket lookup BPF API from tc/xdp ingress does not respect VRF bindings.
@ 2023-04-20 12:31 Gilad Sever
2023-04-20 12:31 ` [PATCH bpf 1/4] bpf: factor out socket lookup functions for the TC hookpoint Gilad Sever
` (3 more replies)
0 siblings, 4 replies; 5+ messages in thread
From: Gilad Sever @ 2023-04-20 12:31 UTC (permalink / raw)
To: dsahern, martin.lau, daniel, john.fastabend, ast, andrii, song,
yhs, kpsingh, sdf, haoluo, jolsa, davem, edumazet, kuba, pabeni,
mykolal, shuah, hawk, joe
Cc: eyal.birger, shmulik.ladkani, bpf, netdev, linux-kselftest,
Gilad Sever
When calling socket lookup from L2 (tc, xdp), VRF boundaries aren't
respected. This patchset fixes this by regarding the incoming device's
VRF attachment when performing the socket lookups from tc/xdp.
The first two patches are coding changes which facilitate this fix by
factoring out the tc helper's logic which was shared with cg/sk_skb
(which operate correctly).
The third patch contains the actual bugfix.
The fourth patch adds bpf tests for these lookup functions.
Gilad Sever (4):
bpf: factor out socket lookup functions for the TC hookpoint.
bpf: Call __bpf_sk_lookup()/__bpf_skc_lookup() directly via TC
hookpoint
bpf: fix bpf socket lookup from tc/xdp to respect socket VRF bindings
selftests/bpf: Add tc_socket_lookup tests
net/core/filter.c | 132 +++++--
.../bpf/prog_tests/tc_socket_lookup.c | 341 ++++++++++++++++++
.../selftests/bpf/progs/tc_socket_lookup.c | 73 ++++
3 files changed, 525 insertions(+), 21 deletions(-)
create mode 100644 tools/testing/selftests/bpf/prog_tests/tc_socket_lookup.c
create mode 100644 tools/testing/selftests/bpf/progs/tc_socket_lookup.c
--
2.34.1
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH bpf 1/4] bpf: factor out socket lookup functions for the TC hookpoint.
2023-04-20 12:31 [PATCH bpf 0/4] Socket lookup BPF API from tc/xdp ingress does not respect VRF bindings Gilad Sever
@ 2023-04-20 12:31 ` Gilad Sever
2023-04-20 12:31 ` [PATCH bpf 2/4] bpf: Call __bpf_sk_lookup()/__bpf_skc_lookup() directly via " Gilad Sever
` (2 subsequent siblings)
3 siblings, 0 replies; 5+ messages in thread
From: Gilad Sever @ 2023-04-20 12:31 UTC (permalink / raw)
To: dsahern, martin.lau, daniel, john.fastabend, ast, andrii, song,
yhs, kpsingh, sdf, haoluo, jolsa, davem, edumazet, kuba, pabeni,
mykolal, shuah, hawk, joe
Cc: eyal.birger, shmulik.ladkani, bpf, netdev, linux-kselftest,
Gilad Sever
Change BPF helper socket lookup functions to use TC specific variants:
bpf_tc_sk_lookup_tcp() / bpf_tc_sk_lookup_udp() / bpf_tc_skc_lookup_tcp()
instead of sharing implementation with the cg / sk_skb hooking points.
This allows introducing a separate logic for the TC flow.
The tc functions are identical to the original code.
Reviewed-by: Shmulik Ladkani <shmulik.ladkani@gmail.com>
Reviewed-by: Eyal Birger <eyal.birger@gmail.com>
Signed-off-by: Gilad Sever <gilad9366@gmail.com>
---
net/core/filter.c | 63 ++++++++++++++++++++++++++++++++++++++++++++---
1 file changed, 60 insertions(+), 3 deletions(-)
diff --git a/net/core/filter.c b/net/core/filter.c
index 1d6f165923bf..5910956f4e0d 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -6701,6 +6701,63 @@ static const struct bpf_func_proto bpf_sk_lookup_udp_proto = {
.arg5_type = ARG_ANYTHING,
};
+BPF_CALL_5(bpf_tc_skc_lookup_tcp, struct sk_buff *, skb,
+ struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
+{
+ return (unsigned long)bpf_skc_lookup(skb, tuple, len, IPPROTO_TCP,
+ netns_id, flags);
+}
+
+static const struct bpf_func_proto bpf_tc_skc_lookup_tcp_proto = {
+ .func = bpf_tc_skc_lookup_tcp,
+ .gpl_only = false,
+ .pkt_access = true,
+ .ret_type = RET_PTR_TO_SOCK_COMMON_OR_NULL,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
+ .arg3_type = ARG_CONST_SIZE,
+ .arg4_type = ARG_ANYTHING,
+ .arg5_type = ARG_ANYTHING,
+};
+
+BPF_CALL_5(bpf_tc_sk_lookup_tcp, struct sk_buff *, skb,
+ struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
+{
+ return (unsigned long)bpf_sk_lookup(skb, tuple, len, IPPROTO_TCP,
+ netns_id, flags);
+}
+
+static const struct bpf_func_proto bpf_tc_sk_lookup_tcp_proto = {
+ .func = bpf_tc_sk_lookup_tcp,
+ .gpl_only = false,
+ .pkt_access = true,
+ .ret_type = RET_PTR_TO_SOCKET_OR_NULL,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
+ .arg3_type = ARG_CONST_SIZE,
+ .arg4_type = ARG_ANYTHING,
+ .arg5_type = ARG_ANYTHING,
+};
+
+BPF_CALL_5(bpf_tc_sk_lookup_udp, struct sk_buff *, skb,
+ struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
+{
+ return (unsigned long)bpf_sk_lookup(skb, tuple, len, IPPROTO_UDP,
+ netns_id, flags);
+}
+
+static const struct bpf_func_proto bpf_tc_sk_lookup_udp_proto = {
+ .func = bpf_tc_sk_lookup_udp,
+ .gpl_only = false,
+ .pkt_access = true,
+ .ret_type = RET_PTR_TO_SOCKET_OR_NULL,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
+ .arg3_type = ARG_CONST_SIZE,
+ .arg4_type = ARG_ANYTHING,
+ .arg5_type = ARG_ANYTHING,
+};
+
BPF_CALL_1(bpf_sk_release, struct sock *, sk)
{
if (sk && sk_is_refcounted(sk))
@@ -7954,9 +8011,9 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
#endif
#ifdef CONFIG_INET
case BPF_FUNC_sk_lookup_tcp:
- return &bpf_sk_lookup_tcp_proto;
+ return &bpf_tc_sk_lookup_tcp_proto;
case BPF_FUNC_sk_lookup_udp:
- return &bpf_sk_lookup_udp_proto;
+ return &bpf_tc_sk_lookup_udp_proto;
case BPF_FUNC_sk_release:
return &bpf_sk_release_proto;
case BPF_FUNC_tcp_sock:
@@ -7964,7 +8021,7 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_get_listener_sock:
return &bpf_get_listener_sock_proto;
case BPF_FUNC_skc_lookup_tcp:
- return &bpf_skc_lookup_tcp_proto;
+ return &bpf_tc_skc_lookup_tcp_proto;
case BPF_FUNC_tcp_check_syncookie:
return &bpf_tcp_check_syncookie_proto;
case BPF_FUNC_skb_ecn_set_ce:
--
2.34.1
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH bpf 2/4] bpf: Call __bpf_sk_lookup()/__bpf_skc_lookup() directly via TC hookpoint
2023-04-20 12:31 [PATCH bpf 0/4] Socket lookup BPF API from tc/xdp ingress does not respect VRF bindings Gilad Sever
2023-04-20 12:31 ` [PATCH bpf 1/4] bpf: factor out socket lookup functions for the TC hookpoint Gilad Sever
@ 2023-04-20 12:31 ` Gilad Sever
2023-04-20 12:31 ` [PATCH bpf 3/4] bpf: fix bpf socket lookup from tc/xdp to respect socket VRF bindings Gilad Sever
2023-04-20 12:31 ` [PATCH bpf 4/4] selftests/bpf: Add tc_socket_lookup tests Gilad Sever
3 siblings, 0 replies; 5+ messages in thread
From: Gilad Sever @ 2023-04-20 12:31 UTC (permalink / raw)
To: dsahern, martin.lau, daniel, john.fastabend, ast, andrii, song,
yhs, kpsingh, sdf, haoluo, jolsa, davem, edumazet, kuba, pabeni,
mykolal, shuah, hawk, joe
Cc: eyal.birger, shmulik.ladkani, bpf, netdev, linux-kselftest,
Gilad Sever
skb->dev always exists in the tc flow. There is no need to use
bpf_skc_lookup(), bpf_sk_lookup() from this code path.
This change facilitates fixing the tc flow to be VRF aware.
Reviewed-by: Shmulik Ladkani <shmulik.ladkani@gmail.com>
Reviewed-by: Eyal Birger <eyal.birger@gmail.com>
Signed-off-by: Gilad Sever <gilad9366@gmail.com>
---
net/core/filter.c | 24 ++++++++++++++++++------
1 file changed, 18 insertions(+), 6 deletions(-)
diff --git a/net/core/filter.c b/net/core/filter.c
index 5910956f4e0d..f43f86fc1235 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -6704,8 +6704,12 @@ static const struct bpf_func_proto bpf_sk_lookup_udp_proto = {
BPF_CALL_5(bpf_tc_skc_lookup_tcp, struct sk_buff *, skb,
struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
{
- return (unsigned long)bpf_skc_lookup(skb, tuple, len, IPPROTO_TCP,
- netns_id, flags);
+ struct net *caller_net = dev_net(skb->dev);
+ int ifindex = skb->dev->ifindex;
+
+ return (unsigned long)__bpf_skc_lookup(skb, tuple, len, caller_net,
+ ifindex, IPPROTO_TCP, netns_id,
+ flags);
}
static const struct bpf_func_proto bpf_tc_skc_lookup_tcp_proto = {
@@ -6723,8 +6727,12 @@ static const struct bpf_func_proto bpf_tc_skc_lookup_tcp_proto = {
BPF_CALL_5(bpf_tc_sk_lookup_tcp, struct sk_buff *, skb,
struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
{
- return (unsigned long)bpf_sk_lookup(skb, tuple, len, IPPROTO_TCP,
- netns_id, flags);
+ struct net *caller_net = dev_net(skb->dev);
+ int ifindex = skb->dev->ifindex;
+
+ return (unsigned long)__bpf_sk_lookup(skb, tuple, len, caller_net,
+ ifindex, IPPROTO_TCP, netns_id,
+ flags);
}
static const struct bpf_func_proto bpf_tc_sk_lookup_tcp_proto = {
@@ -6742,8 +6750,12 @@ static const struct bpf_func_proto bpf_tc_sk_lookup_tcp_proto = {
BPF_CALL_5(bpf_tc_sk_lookup_udp, struct sk_buff *, skb,
struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
{
- return (unsigned long)bpf_sk_lookup(skb, tuple, len, IPPROTO_UDP,
- netns_id, flags);
+ struct net *caller_net = dev_net(skb->dev);
+ int ifindex = skb->dev->ifindex;
+
+ return (unsigned long)__bpf_sk_lookup(skb, tuple, len, caller_net,
+ ifindex, IPPROTO_UDP, netns_id,
+ flags);
}
static const struct bpf_func_proto bpf_tc_sk_lookup_udp_proto = {
--
2.34.1
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH bpf 3/4] bpf: fix bpf socket lookup from tc/xdp to respect socket VRF bindings
2023-04-20 12:31 [PATCH bpf 0/4] Socket lookup BPF API from tc/xdp ingress does not respect VRF bindings Gilad Sever
2023-04-20 12:31 ` [PATCH bpf 1/4] bpf: factor out socket lookup functions for the TC hookpoint Gilad Sever
2023-04-20 12:31 ` [PATCH bpf 2/4] bpf: Call __bpf_sk_lookup()/__bpf_skc_lookup() directly via " Gilad Sever
@ 2023-04-20 12:31 ` Gilad Sever
2023-04-20 12:31 ` [PATCH bpf 4/4] selftests/bpf: Add tc_socket_lookup tests Gilad Sever
3 siblings, 0 replies; 5+ messages in thread
From: Gilad Sever @ 2023-04-20 12:31 UTC (permalink / raw)
To: dsahern, martin.lau, daniel, john.fastabend, ast, andrii, song,
yhs, kpsingh, sdf, haoluo, jolsa, davem, edumazet, kuba, pabeni,
mykolal, shuah, hawk, joe
Cc: eyal.birger, shmulik.ladkani, bpf, netdev, linux-kselftest,
Gilad Sever
When calling bpf_sk_lookup_tcp(), bpf_sk_lookup_udp() or
bpf_skc_lookup_tcp() from tc/xdp ingress, VRF socket bindings aren't
respoected, i.e. unbound sockets are returned, and bound sockets aren't
found.
VRF binding is determined by the sdif argument to sk_lookup(), however
when called from tc the IP SKB control block isn't initialized and thus
inet{,6}_sdif() always returns 0.
Fix by calculating sdif for the tc/xdp flows by observing the device's
l3 enslaved state.
The cg/sk_skb hooking points which are expected to support
inet{,6}_sdif() pass sdif=-1 which makes __bpf_skc_lookup() use the
existing logic.
Fixes: 6acc9b432e67 ("bpf: Add helper to retrieve socket in BPF")
Reviewed-by: Shmulik Ladkani <shmulik.ladkani@gmail.com>
Reviewed-by: Eyal Birger <eyal.birger@gmail.com>
Signed-off-by: Gilad Sever <gilad9366@gmail.com>
---
net/core/filter.c | 63 +++++++++++++++++++++++++++++++----------------
1 file changed, 42 insertions(+), 21 deletions(-)
diff --git a/net/core/filter.c b/net/core/filter.c
index f43f86fc1235..d68e14ff1e55 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -6529,12 +6529,11 @@ static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple,
static struct sock *
__bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
struct net *caller_net, u32 ifindex, u8 proto, u64 netns_id,
- u64 flags)
+ u64 flags, int sdif)
{
struct sock *sk = NULL;
struct net *net;
u8 family;
- int sdif;
if (len == sizeof(tuple->ipv4))
family = AF_INET;
@@ -6546,10 +6545,12 @@ __bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
if (unlikely(flags || !((s32)netns_id < 0 || netns_id <= S32_MAX)))
goto out;
- if (family == AF_INET)
- sdif = inet_sdif(skb);
- else
- sdif = inet6_sdif(skb);
+ if (sdif < 0) {
+ if (family == AF_INET)
+ sdif = inet_sdif(skb);
+ else
+ sdif = inet6_sdif(skb);
+ }
if ((s32)netns_id < 0) {
net = caller_net;
@@ -6569,10 +6570,11 @@ __bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
static struct sock *
__bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
struct net *caller_net, u32 ifindex, u8 proto, u64 netns_id,
- u64 flags)
+ u64 flags, int sdif)
{
struct sock *sk = __bpf_skc_lookup(skb, tuple, len, caller_net,
- ifindex, proto, netns_id, flags);
+ ifindex, proto, netns_id, flags,
+ sdif);
if (sk) {
struct sock *sk2 = sk_to_full_sk(sk);
@@ -6612,7 +6614,7 @@ bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
}
return __bpf_skc_lookup(skb, tuple, len, caller_net, ifindex, proto,
- netns_id, flags);
+ netns_id, flags, -1);
}
static struct sock *
@@ -6701,15 +6703,25 @@ static const struct bpf_func_proto bpf_sk_lookup_udp_proto = {
.arg5_type = ARG_ANYTHING,
};
+static int bpf_l2_sdif(const struct net_device *dev)
+{
+#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
+ if (netif_is_l3_slave(dev))
+ return dev->ifindex;
+#endif
+ return 0;
+}
+
BPF_CALL_5(bpf_tc_skc_lookup_tcp, struct sk_buff *, skb,
struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
{
struct net *caller_net = dev_net(skb->dev);
+ int sdif = bpf_l2_sdif(skb->dev);
int ifindex = skb->dev->ifindex;
return (unsigned long)__bpf_skc_lookup(skb, tuple, len, caller_net,
ifindex, IPPROTO_TCP, netns_id,
- flags);
+ flags, sdif);
}
static const struct bpf_func_proto bpf_tc_skc_lookup_tcp_proto = {
@@ -6728,11 +6740,12 @@ BPF_CALL_5(bpf_tc_sk_lookup_tcp, struct sk_buff *, skb,
struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
{
struct net *caller_net = dev_net(skb->dev);
+ int sdif = bpf_l2_sdif(skb->dev);
int ifindex = skb->dev->ifindex;
return (unsigned long)__bpf_sk_lookup(skb, tuple, len, caller_net,
ifindex, IPPROTO_TCP, netns_id,
- flags);
+ flags, sdif);
}
static const struct bpf_func_proto bpf_tc_sk_lookup_tcp_proto = {
@@ -6751,11 +6764,12 @@ BPF_CALL_5(bpf_tc_sk_lookup_udp, struct sk_buff *, skb,
struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
{
struct net *caller_net = dev_net(skb->dev);
+ int sdif = bpf_l2_sdif(skb->dev);
int ifindex = skb->dev->ifindex;
return (unsigned long)__bpf_sk_lookup(skb, tuple, len, caller_net,
ifindex, IPPROTO_UDP, netns_id,
- flags);
+ flags, sdif);
}
static const struct bpf_func_proto bpf_tc_sk_lookup_udp_proto = {
@@ -6788,11 +6802,13 @@ BPF_CALL_5(bpf_xdp_sk_lookup_udp, struct xdp_buff *, ctx,
struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags)
{
struct net *caller_net = dev_net(ctx->rxq->dev);
- int ifindex = ctx->rxq->dev->ifindex;
+ struct net_device *dev = ctx->rxq->dev;
+ int sdif = bpf_l2_sdif(dev);
+ int ifindex = dev->ifindex;
return (unsigned long)__bpf_sk_lookup(NULL, tuple, len, caller_net,
ifindex, IPPROTO_UDP, netns_id,
- flags);
+ flags, sdif);
}
static const struct bpf_func_proto bpf_xdp_sk_lookup_udp_proto = {
@@ -6811,11 +6827,13 @@ BPF_CALL_5(bpf_xdp_skc_lookup_tcp, struct xdp_buff *, ctx,
struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags)
{
struct net *caller_net = dev_net(ctx->rxq->dev);
- int ifindex = ctx->rxq->dev->ifindex;
+ struct net_device *dev = ctx->rxq->dev;
+ int sdif = bpf_l2_sdif(dev);
+ int ifindex = dev->ifindex;
return (unsigned long)__bpf_skc_lookup(NULL, tuple, len, caller_net,
ifindex, IPPROTO_TCP, netns_id,
- flags);
+ flags, sdif);
}
static const struct bpf_func_proto bpf_xdp_skc_lookup_tcp_proto = {
@@ -6834,11 +6852,13 @@ BPF_CALL_5(bpf_xdp_sk_lookup_tcp, struct xdp_buff *, ctx,
struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags)
{
struct net *caller_net = dev_net(ctx->rxq->dev);
- int ifindex = ctx->rxq->dev->ifindex;
+ struct net_device *dev = ctx->rxq->dev;
+ int sdif = bpf_l2_sdif(dev);
+ int ifindex = dev->ifindex;
return (unsigned long)__bpf_sk_lookup(NULL, tuple, len, caller_net,
ifindex, IPPROTO_TCP, netns_id,
- flags);
+ flags, sdif);
}
static const struct bpf_func_proto bpf_xdp_sk_lookup_tcp_proto = {
@@ -6858,7 +6878,8 @@ BPF_CALL_5(bpf_sock_addr_skc_lookup_tcp, struct bpf_sock_addr_kern *, ctx,
{
return (unsigned long)__bpf_skc_lookup(NULL, tuple, len,
sock_net(ctx->sk), 0,
- IPPROTO_TCP, netns_id, flags);
+ IPPROTO_TCP, netns_id, flags,
+ -1);
}
static const struct bpf_func_proto bpf_sock_addr_skc_lookup_tcp_proto = {
@@ -6877,7 +6898,7 @@ BPF_CALL_5(bpf_sock_addr_sk_lookup_tcp, struct bpf_sock_addr_kern *, ctx,
{
return (unsigned long)__bpf_sk_lookup(NULL, tuple, len,
sock_net(ctx->sk), 0, IPPROTO_TCP,
- netns_id, flags);
+ netns_id, flags, -1);
}
static const struct bpf_func_proto bpf_sock_addr_sk_lookup_tcp_proto = {
@@ -6896,7 +6917,7 @@ BPF_CALL_5(bpf_sock_addr_sk_lookup_udp, struct bpf_sock_addr_kern *, ctx,
{
return (unsigned long)__bpf_sk_lookup(NULL, tuple, len,
sock_net(ctx->sk), 0, IPPROTO_UDP,
- netns_id, flags);
+ netns_id, flags, -1);
}
static const struct bpf_func_proto bpf_sock_addr_sk_lookup_udp_proto = {
--
2.34.1
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH bpf 4/4] selftests/bpf: Add tc_socket_lookup tests
2023-04-20 12:31 [PATCH bpf 0/4] Socket lookup BPF API from tc/xdp ingress does not respect VRF bindings Gilad Sever
` (2 preceding siblings ...)
2023-04-20 12:31 ` [PATCH bpf 3/4] bpf: fix bpf socket lookup from tc/xdp to respect socket VRF bindings Gilad Sever
@ 2023-04-20 12:31 ` Gilad Sever
3 siblings, 0 replies; 5+ messages in thread
From: Gilad Sever @ 2023-04-20 12:31 UTC (permalink / raw)
To: dsahern, martin.lau, daniel, john.fastabend, ast, andrii, song,
yhs, kpsingh, sdf, haoluo, jolsa, davem, edumazet, kuba, pabeni,
mykolal, shuah, hawk, joe
Cc: eyal.birger, shmulik.ladkani, bpf, netdev, linux-kselftest,
Gilad Sever
Verify that socket lookup via TC with all BPF APIs is VRF aware.
Signed-off-by: Gilad Sever <gilad9366@gmail.com>
---
.../bpf/prog_tests/tc_socket_lookup.c | 341 ++++++++++++++++++
.../selftests/bpf/progs/tc_socket_lookup.c | 73 ++++
2 files changed, 414 insertions(+)
create mode 100644 tools/testing/selftests/bpf/prog_tests/tc_socket_lookup.c
create mode 100644 tools/testing/selftests/bpf/progs/tc_socket_lookup.c
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_socket_lookup.c b/tools/testing/selftests/bpf/prog_tests/tc_socket_lookup.c
new file mode 100644
index 000000000000..1ddd70cf4a61
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/tc_socket_lookup.c
@@ -0,0 +1,341 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+
+/*
+ * Topology:
+ * ---------
+ * NS1 namespace | NS2 namespace
+ * |
+ * +--------------+ | +--------------+
+ * | veth01 |----------| veth10 |
+ * | 172.16.1.100 | | | 172.16.1.200 |
+ * | bpf | | +--------------+
+ * +--------------+ |
+ * server(UDP/TCP) |
+ * +-------------------+ |
+ * | vrf1 | |
+ * | +--------------+ | | +--------------+
+ * | | veth02 |----------| veth20 |
+ * | | 172.16.2.100 | | | | 172.16.2.200 |
+ * | | bpf | | | +--------------+
+ * | +--------------+ | |
+ * | server(UDP/TCP) | |
+ * +-------------------+ |
+ *
+ * Test flow
+ * -----------
+ * The tests verifies that socket lookup via TC is VRF aware:
+ * 1) Creates two veth pairs between NS1 and NS2:
+ * a) veth01 <-> veth10 outside the VRF
+ * b) veth02 <-> veth20 in the VRF
+ * 2) Attaches to veth01 and veth02 a program that calls:
+ * a) bpf_skc_lookup_tcp() with TCP and tcp_skc is true
+ * b) bpf_sk_lookup_tcp() with TCP and tcp_skc is false
+ * c) bpf_sk_lookup_udp() with UDP
+ * The program stores the lookup result in bss->lookup_status.
+ * 3) Creates a socket TCP/UDP server in/outside the VRF.
+ * 4) The test expects lookup_status to be:
+ * a) 0 from device in VRF to server outside VRF
+ * b) 0 from device outside VRF to server in VRF
+ * c) 1 from device in VRF to server in VRF
+ * d) 1 from device outside VRF to server outside VRF
+ */
+
+#include <net/if.h>
+
+#include "test_progs.h"
+#include "network_helpers.h"
+#include "tc_socket_lookup.skel.h"
+
+#define NS1 "tc_socket_lookup_1"
+#define NS2 "tc_socket_lookup_2"
+
+#define IP4_ADDR_VETH01 "172.16.1.100"
+#define IP4_ADDR_VETH10 "172.16.1.200"
+#define IP4_ADDR_VETH02 "172.16.2.100"
+#define IP4_ADDR_VETH20 "172.16.2.200"
+
+#define NON_VRF_PORT 5000
+#define IN_VRF_PORT 5001
+
+#define IO_TIMEOUT_SEC 3
+
+#define SYS(fmt, ...) \
+ ({ \
+ char cmd[1024]; \
+ snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \
+ if (!ASSERT_OK(system(cmd), cmd)) \
+ goto fail; \
+ })
+
+#define SYS_NOFAIL(fmt, ...) \
+ ({ \
+ char cmd[1024]; \
+ snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \
+ system(cmd); \
+ })
+
+static int make_socket(int sotype, const char *ip, int port,
+ struct sockaddr_storage *addr)
+{
+ struct timeval timeo = { .tv_sec = IO_TIMEOUT_SEC };
+ int err, fd;
+
+ err = make_sockaddr(AF_INET, ip, port, addr, NULL);
+ if (!ASSERT_OK(err, "make_address"))
+ return -1;
+
+ fd = socket(AF_INET, sotype, 0);
+ if (!ASSERT_OK(fd < 0, "socket"))
+ return -1;
+
+ err = setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo));
+ if (!ASSERT_OK(err, "setsockopt(SO_SNDTIMEO)"))
+ goto fail;
+
+ err = setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo));
+ if (!ASSERT_OK(err, "setsockopt(SO_RCVTIMEO)"))
+ goto fail;
+
+ return fd;
+fail:
+ close(fd);
+ return -1;
+}
+
+static int make_server(int sotype, const char *ip, int port, const char *ifname)
+{
+ struct sockaddr_storage addr = {};
+ const int one = 1;
+ int err, fd = -1;
+
+ fd = make_socket(sotype, ip, port, &addr);
+ if (fd < 0)
+ return -1;
+
+ if (sotype == SOCK_STREAM) {
+ err = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one,
+ sizeof(one));
+ if (!ASSERT_OK(err, "setsockopt(SO_REUSEADDR)"))
+ goto fail;
+ }
+
+ if (ifname) {
+ err = setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE,
+ ifname, strlen(ifname) + 1);
+ if (!ASSERT_OK(err, "setsockopt(SO_BINDTODEVICE)"))
+ goto fail;
+ }
+
+ err = bind(fd, (void *)&addr, sizeof(struct sockaddr_in));
+ if (!ASSERT_OK(err, "bind"))
+ goto fail;
+
+ if (sotype == SOCK_STREAM) {
+ err = listen(fd, SOMAXCONN);
+ if (!ASSERT_OK(err, "listen"))
+ goto fail;
+ }
+
+ return fd;
+fail:
+ close(fd);
+ return -1;
+}
+
+static int attach_tc_prog(struct bpf_tc_hook *hook, int prog_fd)
+{
+ LIBBPF_OPTS(bpf_tc_opts, opts1, .handle = 1, .priority = 1,
+ .prog_fd = prog_fd);
+ int ret;
+
+ ret = bpf_tc_hook_create(hook);
+ if (!ASSERT_OK(ret, "create tc hook"))
+ return ret;
+
+ if (prog_fd >= 0) {
+ hook->attach_point = BPF_TC_INGRESS;
+ ret = bpf_tc_attach(hook, &opts1);
+ if (!ASSERT_OK(ret, "bpf_tc_attach")) {
+ bpf_tc_hook_destroy(hook);
+ return ret;
+ }
+ }
+ return 0;
+}
+
+static void cleanup(void)
+{
+ SYS_NOFAIL("test -f /var/run/netns/" NS1 " && ip netns delete "
+ NS1);
+ SYS_NOFAIL("test -f /var/run/netns/" NS2 " && ip netns delete "
+ NS2);
+}
+
+static int setup(struct tc_socket_lookup *skel)
+{
+ LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS);
+ struct nstoken *nstoken = NULL;
+ int ifindex, prog_fd, ret = 0;
+
+ SYS("ip netns add " NS1);
+ SYS("ip netns add " NS2);
+
+ /* NS1 <-> NS2 [veth01 <-> veth10] */
+ SYS("ip link add veth01 netns " NS1 " type veth peer name veth10 netns "
+ NS2);
+ SYS("ip -net " NS1 " addr add " IP4_ADDR_VETH01 "/24 dev veth01");
+ SYS("ip -net " NS1 " link set dev veth01 up");
+ SYS("ip -net " NS2 " addr add " IP4_ADDR_VETH10 "/24 dev veth10");
+ SYS("ip -net " NS2 " link set dev veth10 up");
+
+ /* NS1 <-> NS2 [veth02 <-> veth20] */
+ SYS("ip link add veth02 netns " NS1 " type veth peer name veth20 netns "
+ NS2);
+ SYS("ip -net " NS1 " addr add " IP4_ADDR_VETH02 "/24 dev veth02");
+ SYS("ip -net " NS1 " link set dev veth02 up");
+ SYS("ip -net " NS2 " addr add " IP4_ADDR_VETH20 "/24 dev veth20");
+ SYS("ip -net " NS2 " link set dev veth20 up");
+
+ /* veth02 -> vrf1 */
+ SYS("ip -net " NS1 " link add vrf1 type vrf table 11");
+ SYS("ip -net " NS1 " route add vrf vrf1 unreachable default metric "
+ "4278198272");
+ SYS("ip -net " NS1 " link set vrf1 alias vrf");
+ SYS("ip -net " NS1 " link set vrf1 up");
+ SYS("ip -net " NS1 " link set veth02 master vrf1");
+
+ /* Attach prog to veth devices in NS1 */
+ nstoken = open_netns(NS1);
+ if (!ASSERT_OK_PTR(nstoken, "setns " NS1))
+ goto fail;
+ prog_fd = bpf_program__fd(skel->progs.test_socket_lookup);
+ if (!ASSERT_GE(prog_fd, 0, "bpf_program__fd"))
+ goto fail;
+
+ ifindex = if_nametoindex("veth01");
+ if (!ASSERT_NEQ(ifindex, 0, "veth01 ifindex"))
+ goto fail;
+ tc_hook.ifindex = ifindex;
+ if (attach_tc_prog(&tc_hook, prog_fd))
+ goto fail;
+
+ ifindex = if_nametoindex("veth02");
+ if (!ASSERT_NEQ(ifindex, 0, "veth02 ifindex"))
+ goto fail;
+ tc_hook.ifindex = ifindex;
+ if (attach_tc_prog(&tc_hook, prog_fd))
+ goto fail;
+ goto close;
+fail:
+ ret = -1;
+close:
+ if (nstoken)
+ close_netns(nstoken);
+ return ret;
+}
+
+static int test_lookup(struct tc_socket_lookup *skel, int sotype,
+ const char *ip, int port, bool tcp_skc,
+ int lookup_status_exp)
+{
+ static const char msg[] = "Hello Server";
+ struct sockaddr_storage addr = {};
+ int fd, ret = 0;
+
+ fd = make_socket(sotype, ip, port, &addr);
+ if (fd < 0)
+ return -1;
+
+ skel->bss->tcp_skc = tcp_skc;
+ skel->bss->lookup_status = -1;
+
+ if (sotype == SOCK_STREAM)
+ connect(fd, (void *)&addr, sizeof(struct sockaddr_in));
+ else
+ sendto(fd, msg, sizeof(msg), 0, (void *)&addr,
+ sizeof(struct sockaddr_in));
+
+ if (!ASSERT_EQ(skel->bss->lookup_status, lookup_status_exp,
+ "lookup_status"))
+ goto fail;
+
+ goto close;
+
+fail:
+ ret = -1;
+close:
+ close(fd);
+ return ret;
+}
+
+static void _test_tc_socket_lookup(struct tc_socket_lookup *skel, int sotype,
+ bool tcp_skc)
+{
+ int in_vrf_server, non_vrf_server;
+ struct nstoken *nstoken = NULL;
+
+ nstoken = open_netns(NS1);
+ if (!ASSERT_OK_PTR(nstoken, "setns " NS1))
+ goto done;
+
+ /* Open sockets in and outside VRF */
+ non_vrf_server = make_server(sotype, "0.0.0.0", NON_VRF_PORT, NULL);
+ if (!ASSERT_GE(non_vrf_server, 0, "make_server__outside_vrf_fd"))
+ goto done;
+
+ in_vrf_server = make_server(sotype, "0.0.0.0", IN_VRF_PORT, "veth02");
+ if (!ASSERT_GE(in_vrf_server, 0, "make_server__in_vrf_fd"))
+ goto done;
+
+ /* Perform test from NS2 */
+ close_netns(nstoken);
+ nstoken = open_netns(NS2);
+ if (!ASSERT_OK_PTR(nstoken, "setns " NS2))
+ goto done;
+
+ if (!ASSERT_OK(test_lookup(skel, sotype, IP4_ADDR_VETH02, NON_VRF_PORT,
+ tcp_skc, 0), "in_to_out"))
+ goto done;
+ if (!ASSERT_OK(test_lookup(skel, sotype, IP4_ADDR_VETH02, IN_VRF_PORT,
+ tcp_skc, 1), "in_to_in"))
+ goto done;
+ if (!ASSERT_OK(test_lookup(skel, sotype, IP4_ADDR_VETH01, NON_VRF_PORT,
+ tcp_skc, 1), "out_to_out"))
+ goto done;
+ if (!ASSERT_OK(test_lookup(skel, sotype, IP4_ADDR_VETH01, IN_VRF_PORT,
+ tcp_skc, 0), "out_to_in"))
+ goto done;
+
+done:
+ if (non_vrf_server)
+ close(non_vrf_server);
+ if (in_vrf_server)
+ close(in_vrf_server);
+ if (nstoken)
+ close_netns(nstoken);
+}
+
+void test_tc_socket_lookup(void)
+{
+ struct tc_socket_lookup *skel;
+
+ cleanup();
+
+ skel = tc_socket_lookup__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "tc_socket_lookup__open_and_load"))
+ return;
+
+ if (!ASSERT_OK(setup(skel), "setup"))
+ goto done;
+
+ if (test__start_subtest("tc_socket_lookup_tcp"))
+ _test_tc_socket_lookup(skel, SOCK_STREAM, false);
+ if (test__start_subtest("tc_socket_lookup_tcp_skc"))
+ _test_tc_socket_lookup(skel, SOCK_STREAM, true);
+ if (test__start_subtest("tc_socket_lookup_udp"))
+ _test_tc_socket_lookup(skel, SOCK_DGRAM, false);
+
+done:
+ tc_socket_lookup__destroy(skel);
+ cleanup();
+}
diff --git a/tools/testing/selftests/bpf/progs/tc_socket_lookup.c b/tools/testing/selftests/bpf/progs/tc_socket_lookup.c
new file mode 100644
index 000000000000..06601eb17807
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tc_socket_lookup.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <linux/ip.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/pkt_cls.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+#include <stdbool.h>
+
+int lookup_status;
+bool tcp_skc;
+
+#define CUR_NS BPF_F_CURRENT_NETNS
+
+SEC("tc")
+int test_socket_lookup(struct __sk_buff *skb)
+{
+ struct bpf_sock_tuple *tp;
+ void *data_end, *data;
+ struct bpf_sock *sk;
+ struct ethhdr *eth;
+ struct iphdr *iph;
+ int tplen;
+
+ if (skb->protocol != bpf_htons(ETH_P_IP))
+ return TC_ACT_UNSPEC;
+
+ tplen = sizeof(tp->ipv4);
+
+ if (bpf_skb_pull_data(skb, sizeof(*eth) + sizeof(*iph) + tplen))
+ return TC_ACT_SHOT;
+
+ data_end = (void *)(long)skb->data_end;
+ data = (void *)(long)skb->data;
+
+ eth = data;
+ if (eth + 1 > data_end)
+ return TC_ACT_SHOT;
+
+ iph = (struct iphdr *)(eth + 1);
+ if (iph + 1 > data_end)
+ return TC_ACT_SHOT;
+
+ tp = (struct bpf_sock_tuple *)&iph->saddr;
+ if ((void *)tp + tplen > data_end)
+ return TC_ACT_SHOT;
+
+ switch (iph->protocol) {
+ case IPPROTO_TCP:
+ if (tcp_skc)
+ sk = bpf_skc_lookup_tcp(skb, tp, tplen, CUR_NS, 0);
+ else
+ sk = bpf_sk_lookup_tcp(skb, tp, tplen, CUR_NS, 0);
+ break;
+ case IPPROTO_UDP:
+ sk = bpf_sk_lookup_udp(skb, tp, tplen, CUR_NS, 0);
+ break;
+ default:
+ return TC_ACT_SHOT;
+ }
+
+ lookup_status = 0;
+
+ if (sk) {
+ bpf_sk_release(sk);
+ lookup_status = 1;
+ }
+
+ return TC_ACT_UNSPEC;
+}
+
+char _license[] SEC("license") = "GPL";
--
2.34.1
^ permalink raw reply related [flat|nested] 5+ messages in thread
end of thread, other threads:[~2023-04-20 12:33 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-04-20 12:31 [PATCH bpf 0/4] Socket lookup BPF API from tc/xdp ingress does not respect VRF bindings Gilad Sever
2023-04-20 12:31 ` [PATCH bpf 1/4] bpf: factor out socket lookup functions for the TC hookpoint Gilad Sever
2023-04-20 12:31 ` [PATCH bpf 2/4] bpf: Call __bpf_sk_lookup()/__bpf_skc_lookup() directly via " Gilad Sever
2023-04-20 12:31 ` [PATCH bpf 3/4] bpf: fix bpf socket lookup from tc/xdp to respect socket VRF bindings Gilad Sever
2023-04-20 12:31 ` [PATCH bpf 4/4] selftests/bpf: Add tc_socket_lookup tests Gilad Sever
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).