* [PATCH bpf,v5 1/4] bpf: factor out socket lookup functions for the TC hookpoint.
2023-06-08 11:41 [PATCH bpf,v5 0/4] Socket lookup BPF API from tc/xdp ingress does not respect VRF bindings Gilad Sever
@ 2023-06-08 11:41 ` Gilad Sever
2023-06-08 11:41 ` [PATCH bpf,v5 2/4] bpf: Call __bpf_sk_lookup()/__bpf_skc_lookup() directly via " Gilad Sever
` (2 subsequent siblings)
3 siblings, 0 replies; 6+ messages in thread
From: Gilad Sever @ 2023-06-08 11:41 UTC (permalink / raw)
To: dsahern, martin.lau, daniel, john.fastabend, ast, andrii, song,
yhs, kpsingh, sdf, haoluo, jolsa, davem, edumazet, kuba, pabeni,
mykolal, shuah, hawk, joe
Cc: eyal.birger, shmulik.ladkani, bpf, netdev, linux-kselftest,
Gilad Sever
Change BPF helper socket lookup functions to use TC specific variants:
bpf_tc_sk_lookup_tcp() / bpf_tc_sk_lookup_udp() / bpf_tc_skc_lookup_tcp()
instead of sharing implementation with the cg / sk_skb hooking points.
This allows introducing a separate logic for the TC flow.
The tc functions are identical to the original code.
Acked-by: Stanislav Fomichev <sdf@google.com>
Reviewed-by: Shmulik Ladkani <shmulik.ladkani@gmail.com>
Reviewed-by: Eyal Birger <eyal.birger@gmail.com>
Signed-off-by: Gilad Sever <gilad9366@gmail.com>
---
net/core/filter.c | 63 ++++++++++++++++++++++++++++++++++++++++++++---
1 file changed, 60 insertions(+), 3 deletions(-)
diff --git a/net/core/filter.c b/net/core/filter.c
index d9ce04ca22ce..57d853460e12 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -6727,6 +6727,63 @@ static const struct bpf_func_proto bpf_sk_lookup_udp_proto = {
.arg5_type = ARG_ANYTHING,
};
+BPF_CALL_5(bpf_tc_skc_lookup_tcp, struct sk_buff *, skb,
+ struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
+{
+ return (unsigned long)bpf_skc_lookup(skb, tuple, len, IPPROTO_TCP,
+ netns_id, flags);
+}
+
+static const struct bpf_func_proto bpf_tc_skc_lookup_tcp_proto = {
+ .func = bpf_tc_skc_lookup_tcp,
+ .gpl_only = false,
+ .pkt_access = true,
+ .ret_type = RET_PTR_TO_SOCK_COMMON_OR_NULL,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
+ .arg3_type = ARG_CONST_SIZE,
+ .arg4_type = ARG_ANYTHING,
+ .arg5_type = ARG_ANYTHING,
+};
+
+BPF_CALL_5(bpf_tc_sk_lookup_tcp, struct sk_buff *, skb,
+ struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
+{
+ return (unsigned long)bpf_sk_lookup(skb, tuple, len, IPPROTO_TCP,
+ netns_id, flags);
+}
+
+static const struct bpf_func_proto bpf_tc_sk_lookup_tcp_proto = {
+ .func = bpf_tc_sk_lookup_tcp,
+ .gpl_only = false,
+ .pkt_access = true,
+ .ret_type = RET_PTR_TO_SOCKET_OR_NULL,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
+ .arg3_type = ARG_CONST_SIZE,
+ .arg4_type = ARG_ANYTHING,
+ .arg5_type = ARG_ANYTHING,
+};
+
+BPF_CALL_5(bpf_tc_sk_lookup_udp, struct sk_buff *, skb,
+ struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
+{
+ return (unsigned long)bpf_sk_lookup(skb, tuple, len, IPPROTO_UDP,
+ netns_id, flags);
+}
+
+static const struct bpf_func_proto bpf_tc_sk_lookup_udp_proto = {
+ .func = bpf_tc_sk_lookup_udp,
+ .gpl_only = false,
+ .pkt_access = true,
+ .ret_type = RET_PTR_TO_SOCKET_OR_NULL,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
+ .arg3_type = ARG_CONST_SIZE,
+ .arg4_type = ARG_ANYTHING,
+ .arg5_type = ARG_ANYTHING,
+};
+
BPF_CALL_1(bpf_sk_release, struct sock *, sk)
{
if (sk && sk_is_refcounted(sk))
@@ -7980,9 +8037,9 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
#endif
#ifdef CONFIG_INET
case BPF_FUNC_sk_lookup_tcp:
- return &bpf_sk_lookup_tcp_proto;
+ return &bpf_tc_sk_lookup_tcp_proto;
case BPF_FUNC_sk_lookup_udp:
- return &bpf_sk_lookup_udp_proto;
+ return &bpf_tc_sk_lookup_udp_proto;
case BPF_FUNC_sk_release:
return &bpf_sk_release_proto;
case BPF_FUNC_tcp_sock:
@@ -7990,7 +8047,7 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_get_listener_sock:
return &bpf_get_listener_sock_proto;
case BPF_FUNC_skc_lookup_tcp:
- return &bpf_skc_lookup_tcp_proto;
+ return &bpf_tc_skc_lookup_tcp_proto;
case BPF_FUNC_tcp_check_syncookie:
return &bpf_tcp_check_syncookie_proto;
case BPF_FUNC_skb_ecn_set_ce:
--
2.34.1
^ permalink raw reply related [flat|nested] 6+ messages in thread* [PATCH bpf,v5 2/4] bpf: Call __bpf_sk_lookup()/__bpf_skc_lookup() directly via TC hookpoint
2023-06-08 11:41 [PATCH bpf,v5 0/4] Socket lookup BPF API from tc/xdp ingress does not respect VRF bindings Gilad Sever
2023-06-08 11:41 ` [PATCH bpf,v5 1/4] bpf: factor out socket lookup functions for the TC hookpoint Gilad Sever
@ 2023-06-08 11:41 ` Gilad Sever
2023-06-08 11:41 ` [PATCH bpf,v5 3/4] bpf: fix bpf socket lookup from tc/xdp to respect socket VRF bindings Gilad Sever
2023-06-08 11:41 ` [PATCH bpf,v5 4/4] selftests/bpf: Add vrf_socket_lookup tests Gilad Sever
3 siblings, 0 replies; 6+ messages in thread
From: Gilad Sever @ 2023-06-08 11:41 UTC (permalink / raw)
To: dsahern, martin.lau, daniel, john.fastabend, ast, andrii, song,
yhs, kpsingh, sdf, haoluo, jolsa, davem, edumazet, kuba, pabeni,
mykolal, shuah, hawk, joe
Cc: eyal.birger, shmulik.ladkani, bpf, netdev, linux-kselftest,
Gilad Sever
skb->dev always exists in the tc flow. There is no need to use
bpf_skc_lookup(), bpf_sk_lookup() from this code path.
This change facilitates fixing the tc flow to be VRF aware.
Acked-by: Stanislav Fomichev <sdf@google.com>
Reviewed-by: Shmulik Ladkani <shmulik.ladkani@gmail.com>
Reviewed-by: Eyal Birger <eyal.birger@gmail.com>
Signed-off-by: Gilad Sever <gilad9366@gmail.com>
---
net/core/filter.c | 24 ++++++++++++++++++------
1 file changed, 18 insertions(+), 6 deletions(-)
diff --git a/net/core/filter.c b/net/core/filter.c
index 57d853460e12..e06547922edc 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -6730,8 +6730,12 @@ static const struct bpf_func_proto bpf_sk_lookup_udp_proto = {
BPF_CALL_5(bpf_tc_skc_lookup_tcp, struct sk_buff *, skb,
struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
{
- return (unsigned long)bpf_skc_lookup(skb, tuple, len, IPPROTO_TCP,
- netns_id, flags);
+ struct net *caller_net = dev_net(skb->dev);
+ int ifindex = skb->dev->ifindex;
+
+ return (unsigned long)__bpf_skc_lookup(skb, tuple, len, caller_net,
+ ifindex, IPPROTO_TCP, netns_id,
+ flags);
}
static const struct bpf_func_proto bpf_tc_skc_lookup_tcp_proto = {
@@ -6749,8 +6753,12 @@ static const struct bpf_func_proto bpf_tc_skc_lookup_tcp_proto = {
BPF_CALL_5(bpf_tc_sk_lookup_tcp, struct sk_buff *, skb,
struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
{
- return (unsigned long)bpf_sk_lookup(skb, tuple, len, IPPROTO_TCP,
- netns_id, flags);
+ struct net *caller_net = dev_net(skb->dev);
+ int ifindex = skb->dev->ifindex;
+
+ return (unsigned long)__bpf_sk_lookup(skb, tuple, len, caller_net,
+ ifindex, IPPROTO_TCP, netns_id,
+ flags);
}
static const struct bpf_func_proto bpf_tc_sk_lookup_tcp_proto = {
@@ -6768,8 +6776,12 @@ static const struct bpf_func_proto bpf_tc_sk_lookup_tcp_proto = {
BPF_CALL_5(bpf_tc_sk_lookup_udp, struct sk_buff *, skb,
struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
{
- return (unsigned long)bpf_sk_lookup(skb, tuple, len, IPPROTO_UDP,
- netns_id, flags);
+ struct net *caller_net = dev_net(skb->dev);
+ int ifindex = skb->dev->ifindex;
+
+ return (unsigned long)__bpf_sk_lookup(skb, tuple, len, caller_net,
+ ifindex, IPPROTO_UDP, netns_id,
+ flags);
}
static const struct bpf_func_proto bpf_tc_sk_lookup_udp_proto = {
--
2.34.1
^ permalink raw reply related [flat|nested] 6+ messages in thread* [PATCH bpf,v5 3/4] bpf: fix bpf socket lookup from tc/xdp to respect socket VRF bindings
2023-06-08 11:41 [PATCH bpf,v5 0/4] Socket lookup BPF API from tc/xdp ingress does not respect VRF bindings Gilad Sever
2023-06-08 11:41 ` [PATCH bpf,v5 1/4] bpf: factor out socket lookup functions for the TC hookpoint Gilad Sever
2023-06-08 11:41 ` [PATCH bpf,v5 2/4] bpf: Call __bpf_sk_lookup()/__bpf_skc_lookup() directly via " Gilad Sever
@ 2023-06-08 11:41 ` Gilad Sever
2023-06-20 22:30 ` Daniel Borkmann
2023-06-08 11:41 ` [PATCH bpf,v5 4/4] selftests/bpf: Add vrf_socket_lookup tests Gilad Sever
3 siblings, 1 reply; 6+ messages in thread
From: Gilad Sever @ 2023-06-08 11:41 UTC (permalink / raw)
To: dsahern, martin.lau, daniel, john.fastabend, ast, andrii, song,
yhs, kpsingh, sdf, haoluo, jolsa, davem, edumazet, kuba, pabeni,
mykolal, shuah, hawk, joe
Cc: eyal.birger, shmulik.ladkani, bpf, netdev, linux-kselftest,
Gilad Sever
When calling bpf_sk_lookup_tcp(), bpf_sk_lookup_udp() or
bpf_skc_lookup_tcp() from tc/xdp ingress, VRF socket bindings aren't
respoected, i.e. unbound sockets are returned, and bound sockets aren't
found.
VRF binding is determined by the sdif argument to sk_lookup(), however
when called from tc the IP SKB control block isn't initialized and thus
inet{,6}_sdif() always returns 0.
Fix by calculating sdif for the tc/xdp flows by observing the device's
l3 enslaved state.
The cg/sk_skb hooking points which are expected to support
inet{,6}_sdif() pass sdif=-1 which makes __bpf_skc_lookup() use the
existing logic.
Fixes: 6acc9b432e67 ("bpf: Add helper to retrieve socket in BPF")
Acked-by: Stanislav Fomichev <sdf@google.com>
Reviewed-by: Shmulik Ladkani <shmulik.ladkani@gmail.com>
Reviewed-by: Eyal Birger <eyal.birger@gmail.com>
Signed-off-by: Gilad Sever <gilad9366@gmail.com>
---
v5: Use reverse xmas tree indentation
v4: Move dev_sdif() to include/linux/netdevice.h as suggested by Stanislav Fomichev
v3: Rename bpf_l2_sdif() to dev_sdif() as suggested by Stanislav Fomichev
---
include/linux/netdevice.h | 9 +++++++
net/core/filter.c | 54 ++++++++++++++++++++++++---------------
2 files changed, 42 insertions(+), 21 deletions(-)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index c2f0c6002a84..db1bfca6b8b4 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -5093,6 +5093,15 @@ static inline bool netif_is_l3_slave(const struct net_device *dev)
return dev->priv_flags & IFF_L3MDEV_SLAVE;
}
+static inline int dev_sdif(const struct net_device *dev)
+{
+#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
+ if (netif_is_l3_slave(dev))
+ return dev->ifindex;
+#endif
+ return 0;
+}
+
static inline bool netif_is_bridge_master(const struct net_device *dev)
{
return dev->priv_flags & IFF_EBRIDGE;
diff --git a/net/core/filter.c b/net/core/filter.c
index e06547922edc..5f665845ae45 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -6555,12 +6555,11 @@ static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple,
static struct sock *
__bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
struct net *caller_net, u32 ifindex, u8 proto, u64 netns_id,
- u64 flags)
+ u64 flags, int sdif)
{
struct sock *sk = NULL;
struct net *net;
u8 family;
- int sdif;
if (len == sizeof(tuple->ipv4))
family = AF_INET;
@@ -6572,10 +6571,12 @@ __bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
if (unlikely(flags || !((s32)netns_id < 0 || netns_id <= S32_MAX)))
goto out;
- if (family == AF_INET)
- sdif = inet_sdif(skb);
- else
- sdif = inet6_sdif(skb);
+ if (sdif < 0) {
+ if (family == AF_INET)
+ sdif = inet_sdif(skb);
+ else
+ sdif = inet6_sdif(skb);
+ }
if ((s32)netns_id < 0) {
net = caller_net;
@@ -6595,10 +6596,11 @@ __bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
static struct sock *
__bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
struct net *caller_net, u32 ifindex, u8 proto, u64 netns_id,
- u64 flags)
+ u64 flags, int sdif)
{
struct sock *sk = __bpf_skc_lookup(skb, tuple, len, caller_net,
- ifindex, proto, netns_id, flags);
+ ifindex, proto, netns_id, flags,
+ sdif);
if (sk) {
struct sock *sk2 = sk_to_full_sk(sk);
@@ -6638,7 +6640,7 @@ bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
}
return __bpf_skc_lookup(skb, tuple, len, caller_net, ifindex, proto,
- netns_id, flags);
+ netns_id, flags, -1);
}
static struct sock *
@@ -6732,10 +6734,11 @@ BPF_CALL_5(bpf_tc_skc_lookup_tcp, struct sk_buff *, skb,
{
struct net *caller_net = dev_net(skb->dev);
int ifindex = skb->dev->ifindex;
+ int sdif = dev_sdif(skb->dev);
return (unsigned long)__bpf_skc_lookup(skb, tuple, len, caller_net,
ifindex, IPPROTO_TCP, netns_id,
- flags);
+ flags, sdif);
}
static const struct bpf_func_proto bpf_tc_skc_lookup_tcp_proto = {
@@ -6755,10 +6758,11 @@ BPF_CALL_5(bpf_tc_sk_lookup_tcp, struct sk_buff *, skb,
{
struct net *caller_net = dev_net(skb->dev);
int ifindex = skb->dev->ifindex;
+ int sdif = dev_sdif(skb->dev);
return (unsigned long)__bpf_sk_lookup(skb, tuple, len, caller_net,
ifindex, IPPROTO_TCP, netns_id,
- flags);
+ flags, sdif);
}
static const struct bpf_func_proto bpf_tc_sk_lookup_tcp_proto = {
@@ -6778,10 +6782,11 @@ BPF_CALL_5(bpf_tc_sk_lookup_udp, struct sk_buff *, skb,
{
struct net *caller_net = dev_net(skb->dev);
int ifindex = skb->dev->ifindex;
+ int sdif = dev_sdif(skb->dev);
return (unsigned long)__bpf_sk_lookup(skb, tuple, len, caller_net,
ifindex, IPPROTO_UDP, netns_id,
- flags);
+ flags, sdif);
}
static const struct bpf_func_proto bpf_tc_sk_lookup_udp_proto = {
@@ -6814,11 +6819,13 @@ BPF_CALL_5(bpf_xdp_sk_lookup_udp, struct xdp_buff *, ctx,
struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags)
{
struct net *caller_net = dev_net(ctx->rxq->dev);
- int ifindex = ctx->rxq->dev->ifindex;
+ struct net_device *dev = ctx->rxq->dev;
+ int ifindex = dev->ifindex;
+ int sdif = dev_sdif(dev);
return (unsigned long)__bpf_sk_lookup(NULL, tuple, len, caller_net,
ifindex, IPPROTO_UDP, netns_id,
- flags);
+ flags, sdif);
}
static const struct bpf_func_proto bpf_xdp_sk_lookup_udp_proto = {
@@ -6837,11 +6844,13 @@ BPF_CALL_5(bpf_xdp_skc_lookup_tcp, struct xdp_buff *, ctx,
struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags)
{
struct net *caller_net = dev_net(ctx->rxq->dev);
- int ifindex = ctx->rxq->dev->ifindex;
+ struct net_device *dev = ctx->rxq->dev;
+ int ifindex = dev->ifindex;
+ int sdif = dev_sdif(dev);
return (unsigned long)__bpf_skc_lookup(NULL, tuple, len, caller_net,
ifindex, IPPROTO_TCP, netns_id,
- flags);
+ flags, sdif);
}
static const struct bpf_func_proto bpf_xdp_skc_lookup_tcp_proto = {
@@ -6860,11 +6869,13 @@ BPF_CALL_5(bpf_xdp_sk_lookup_tcp, struct xdp_buff *, ctx,
struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags)
{
struct net *caller_net = dev_net(ctx->rxq->dev);
- int ifindex = ctx->rxq->dev->ifindex;
+ struct net_device *dev = ctx->rxq->dev;
+ int ifindex = dev->ifindex;
+ int sdif = dev_sdif(dev);
return (unsigned long)__bpf_sk_lookup(NULL, tuple, len, caller_net,
ifindex, IPPROTO_TCP, netns_id,
- flags);
+ flags, sdif);
}
static const struct bpf_func_proto bpf_xdp_sk_lookup_tcp_proto = {
@@ -6884,7 +6895,8 @@ BPF_CALL_5(bpf_sock_addr_skc_lookup_tcp, struct bpf_sock_addr_kern *, ctx,
{
return (unsigned long)__bpf_skc_lookup(NULL, tuple, len,
sock_net(ctx->sk), 0,
- IPPROTO_TCP, netns_id, flags);
+ IPPROTO_TCP, netns_id, flags,
+ -1);
}
static const struct bpf_func_proto bpf_sock_addr_skc_lookup_tcp_proto = {
@@ -6903,7 +6915,7 @@ BPF_CALL_5(bpf_sock_addr_sk_lookup_tcp, struct bpf_sock_addr_kern *, ctx,
{
return (unsigned long)__bpf_sk_lookup(NULL, tuple, len,
sock_net(ctx->sk), 0, IPPROTO_TCP,
- netns_id, flags);
+ netns_id, flags, -1);
}
static const struct bpf_func_proto bpf_sock_addr_sk_lookup_tcp_proto = {
@@ -6922,7 +6934,7 @@ BPF_CALL_5(bpf_sock_addr_sk_lookup_udp, struct bpf_sock_addr_kern *, ctx,
{
return (unsigned long)__bpf_sk_lookup(NULL, tuple, len,
sock_net(ctx->sk), 0, IPPROTO_UDP,
- netns_id, flags);
+ netns_id, flags, -1);
}
static const struct bpf_func_proto bpf_sock_addr_sk_lookup_udp_proto = {
--
2.34.1
^ permalink raw reply related [flat|nested] 6+ messages in thread* Re: [PATCH bpf,v5 3/4] bpf: fix bpf socket lookup from tc/xdp to respect socket VRF bindings
2023-06-08 11:41 ` [PATCH bpf,v5 3/4] bpf: fix bpf socket lookup from tc/xdp to respect socket VRF bindings Gilad Sever
@ 2023-06-20 22:30 ` Daniel Borkmann
0 siblings, 0 replies; 6+ messages in thread
From: Daniel Borkmann @ 2023-06-20 22:30 UTC (permalink / raw)
To: Gilad Sever, dsahern, martin.lau, john.fastabend, ast, andrii,
song, yhs, kpsingh, sdf, haoluo, jolsa, davem, edumazet, kuba,
pabeni, mykolal, shuah, hawk, joe
Cc: eyal.birger, shmulik.ladkani, bpf, netdev, linux-kselftest
On 6/8/23 1:41 PM, Gilad Sever wrote:
> When calling bpf_sk_lookup_tcp(), bpf_sk_lookup_udp() or
> bpf_skc_lookup_tcp() from tc/xdp ingress, VRF socket bindings aren't
> respoected, i.e. unbound sockets are returned, and bound sockets aren't
> found.
>
> VRF binding is determined by the sdif argument to sk_lookup(), however
> when called from tc the IP SKB control block isn't initialized and thus
> inet{,6}_sdif() always returns 0.
>
> Fix by calculating sdif for the tc/xdp flows by observing the device's
> l3 enslaved state.
>
> The cg/sk_skb hooking points which are expected to support
> inet{,6}_sdif() pass sdif=-1 which makes __bpf_skc_lookup() use the
> existing logic.
>
> Fixes: 6acc9b432e67 ("bpf: Add helper to retrieve socket in BPF")
> Acked-by: Stanislav Fomichev <sdf@google.com>
> Reviewed-by: Shmulik Ladkani <shmulik.ladkani@gmail.com>
> Reviewed-by: Eyal Birger <eyal.birger@gmail.com>
> Signed-off-by: Gilad Sever <gilad9366@gmail.com>
Overall this series looks good to go, just small nits which would be great
to still address.
> ---
> v5: Use reverse xmas tree indentation
>
> v4: Move dev_sdif() to include/linux/netdevice.h as suggested by Stanislav Fomichev
>
> v3: Rename bpf_l2_sdif() to dev_sdif() as suggested by Stanislav Fomichev
> ---
> include/linux/netdevice.h | 9 +++++++
> net/core/filter.c | 54 ++++++++++++++++++++++++---------------
> 2 files changed, 42 insertions(+), 21 deletions(-)
>
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index c2f0c6002a84..db1bfca6b8b4 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -5093,6 +5093,15 @@ static inline bool netif_is_l3_slave(const struct net_device *dev)
> return dev->priv_flags & IFF_L3MDEV_SLAVE;
> }
>
> +static inline int dev_sdif(const struct net_device *dev)
> +{
> +#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
Why IS_ENABLED? config NET_L3_MASTER_DEV says bool, so #ifdef CONFIG_NET_L3_MASTER_DEV
should suffice?
> + if (netif_is_l3_slave(dev))
> + return dev->ifindex;
> +#endif
> + return 0;
> +}
> +
> static inline bool netif_is_bridge_master(const struct net_device *dev)
> {
> return dev->priv_flags & IFF_EBRIDGE;
[...]
> static const struct bpf_func_proto bpf_tc_sk_lookup_tcp_proto = {
> @@ -6778,10 +6782,11 @@ BPF_CALL_5(bpf_tc_sk_lookup_udp, struct sk_buff *, skb,
> {
> struct net *caller_net = dev_net(skb->dev);
> int ifindex = skb->dev->ifindex;
> + int sdif = dev_sdif(skb->dev);
>
> return (unsigned long)__bpf_sk_lookup(skb, tuple, len, caller_net,
> ifindex, IPPROTO_UDP, netns_id,
> - flags);
> + flags, sdif);
> }
>
> static const struct bpf_func_proto bpf_tc_sk_lookup_udp_proto = {
> @@ -6814,11 +6819,13 @@ BPF_CALL_5(bpf_xdp_sk_lookup_udp, struct xdp_buff *, ctx,
> struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags)
> {
> struct net *caller_net = dev_net(ctx->rxq->dev);
> - int ifindex = ctx->rxq->dev->ifindex;
> + struct net_device *dev = ctx->rxq->dev;
Why not doing this before the struct net *caller_net and also use it there
for the dev_net()? Same in other XDP places. It would be nice to also do the
same for the tc helpers.
> + int ifindex = dev->ifindex;
> + int sdif = dev_sdif(dev);
>
> return (unsigned long)__bpf_sk_lookup(NULL, tuple, len, caller_net,
> ifindex, IPPROTO_UDP, netns_id,
> - flags);
> + flags, sdif);
> }
>
> static const struct bpf_func_proto bpf_xdp_sk_lookup_udp_proto = {
> @@ -6837,11 +6844,13 @@ BPF_CALL_5(bpf_xdp_skc_lookup_tcp, struct xdp_buff *, ctx,
> struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags)
> {
> struct net *caller_net = dev_net(ctx->rxq->dev);
> - int ifindex = ctx->rxq->dev->ifindex;
> + struct net_device *dev = ctx->rxq->dev;
> + int ifindex = dev->ifindex;
> + int sdif = dev_sdif(dev);
>
> return (unsigned long)__bpf_skc_lookup(NULL, tuple, len, caller_net,
> ifindex, IPPROTO_TCP, netns_id,
> - flags);
> + flags, sdif);
> }
>
> static const struct bpf_func_proto bpf_xdp_skc_lookup_tcp_proto = {
^ permalink raw reply [flat|nested] 6+ messages in thread
* [PATCH bpf,v5 4/4] selftests/bpf: Add vrf_socket_lookup tests
2023-06-08 11:41 [PATCH bpf,v5 0/4] Socket lookup BPF API from tc/xdp ingress does not respect VRF bindings Gilad Sever
` (2 preceding siblings ...)
2023-06-08 11:41 ` [PATCH bpf,v5 3/4] bpf: fix bpf socket lookup from tc/xdp to respect socket VRF bindings Gilad Sever
@ 2023-06-08 11:41 ` Gilad Sever
3 siblings, 0 replies; 6+ messages in thread
From: Gilad Sever @ 2023-06-08 11:41 UTC (permalink / raw)
To: dsahern, martin.lau, daniel, john.fastabend, ast, andrii, song,
yhs, kpsingh, sdf, haoluo, jolsa, davem, edumazet, kuba, pabeni,
mykolal, shuah, hawk, joe
Cc: eyal.birger, shmulik.ladkani, bpf, netdev, linux-kselftest,
Gilad Sever
Verify that socket lookup via TC/XDP with all BPF APIs is VRF aware.
Acked-by: Stanislav Fomichev <sdf@google.com>
Reviewed-by: Eyal Birger <eyal.birger@gmail.com>
Signed-off-by: Gilad Sever <gilad9366@gmail.com>
---
v5: - Use reverse xmas tree indentation
v4: - Remove SYS and SYS_NOFAIL duplicate definitions
v3: - Added xdp tests as suggested by Daniel Borkmann
- Use start_server() to avoid duplicate code as suggested by Stanislav Fomichev
v2: Fix build by initializing vars with -1
---
.../bpf/prog_tests/vrf_socket_lookup.c | 312 ++++++++++++++++++
.../selftests/bpf/progs/vrf_socket_lookup.c | 88 +++++
2 files changed, 400 insertions(+)
create mode 100644 tools/testing/selftests/bpf/prog_tests/vrf_socket_lookup.c
create mode 100644 tools/testing/selftests/bpf/progs/vrf_socket_lookup.c
diff --git a/tools/testing/selftests/bpf/prog_tests/vrf_socket_lookup.c b/tools/testing/selftests/bpf/prog_tests/vrf_socket_lookup.c
new file mode 100644
index 000000000000..2a5e207edad6
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/vrf_socket_lookup.c
@@ -0,0 +1,312 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+
+/*
+ * Topology:
+ * ---------
+ * NS0 namespace | NS1 namespace
+ * |
+ * +--------------+ | +--------------+
+ * | veth01 |----------| veth10 |
+ * | 172.16.1.100 | | | 172.16.1.200 |
+ * | bpf | | +--------------+
+ * +--------------+ |
+ * server(UDP/TCP) |
+ * +-------------------+ |
+ * | vrf1 | |
+ * | +--------------+ | | +--------------+
+ * | | veth02 |----------| veth20 |
+ * | | 172.16.2.100 | | | | 172.16.2.200 |
+ * | | bpf | | | +--------------+
+ * | +--------------+ | |
+ * | server(UDP/TCP) | |
+ * +-------------------+ |
+ *
+ * Test flow
+ * -----------
+ * The tests verifies that socket lookup via TC is VRF aware:
+ * 1) Creates two veth pairs between NS0 and NS1:
+ * a) veth01 <-> veth10 outside the VRF
+ * b) veth02 <-> veth20 in the VRF
+ * 2) Attaches to veth01 and veth02 a program that calls:
+ * a) bpf_skc_lookup_tcp() with TCP and tcp_skc is true
+ * b) bpf_sk_lookup_tcp() with TCP and tcp_skc is false
+ * c) bpf_sk_lookup_udp() with UDP
+ * The program stores the lookup result in bss->lookup_status.
+ * 3) Creates a socket TCP/UDP server in/outside the VRF.
+ * 4) The test expects lookup_status to be:
+ * a) 0 from device in VRF to server outside VRF
+ * b) 0 from device outside VRF to server in VRF
+ * c) 1 from device in VRF to server in VRF
+ * d) 1 from device outside VRF to server outside VRF
+ */
+
+#include <net/if.h>
+
+#include "test_progs.h"
+#include "network_helpers.h"
+#include "vrf_socket_lookup.skel.h"
+
+#define NS0 "vrf_socket_lookup_0"
+#define NS1 "vrf_socket_lookup_1"
+
+#define IP4_ADDR_VETH01 "172.16.1.100"
+#define IP4_ADDR_VETH10 "172.16.1.200"
+#define IP4_ADDR_VETH02 "172.16.2.100"
+#define IP4_ADDR_VETH20 "172.16.2.200"
+
+#define NON_VRF_PORT 5000
+#define IN_VRF_PORT 5001
+
+#define TIMEOUT_MS 3000
+
+static int make_socket(int sotype, const char *ip, int port,
+ struct sockaddr_storage *addr)
+{
+ int err, fd;
+
+ err = make_sockaddr(AF_INET, ip, port, addr, NULL);
+ if (!ASSERT_OK(err, "make_address"))
+ return -1;
+
+ fd = socket(AF_INET, sotype, 0);
+ if (!ASSERT_GE(fd, 0, "socket"))
+ return -1;
+
+ if (!ASSERT_OK(settimeo(fd, TIMEOUT_MS), "settimeo"))
+ goto fail;
+
+ return fd;
+fail:
+ close(fd);
+ return -1;
+}
+
+static int make_server(int sotype, const char *ip, int port, const char *ifname)
+{
+ int err, fd = -1;
+
+ fd = start_server(AF_INET, sotype, ip, port, TIMEOUT_MS);
+ if (!ASSERT_GE(fd, 0, "start_server"))
+ return -1;
+
+ if (ifname) {
+ err = setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE,
+ ifname, strlen(ifname) + 1);
+ if (!ASSERT_OK(err, "setsockopt(SO_BINDTODEVICE)"))
+ goto fail;
+ }
+
+ return fd;
+fail:
+ close(fd);
+ return -1;
+}
+
+static int attach_progs(char *ifname, int tc_prog_fd, int xdp_prog_fd)
+{
+ LIBBPF_OPTS(bpf_tc_hook, hook, .attach_point = BPF_TC_INGRESS);
+ LIBBPF_OPTS(bpf_tc_opts, opts, .handle = 1, .priority = 1,
+ .prog_fd = tc_prog_fd);
+ int ret, ifindex;
+
+ ifindex = if_nametoindex(ifname);
+ if (!ASSERT_NEQ(ifindex, 0, "if_nametoindex"))
+ return -1;
+ hook.ifindex = ifindex;
+
+ ret = bpf_tc_hook_create(&hook);
+ if (!ASSERT_OK(ret, "bpf_tc_hook_create"))
+ return ret;
+
+ ret = bpf_tc_attach(&hook, &opts);
+ if (!ASSERT_OK(ret, "bpf_tc_attach")) {
+ bpf_tc_hook_destroy(&hook);
+ return ret;
+ }
+ ret = bpf_xdp_attach(ifindex, xdp_prog_fd, 0, NULL);
+ if (!ASSERT_OK(ret, "bpf_xdp_attach")) {
+ bpf_tc_hook_destroy(&hook);
+ return ret;
+ }
+
+ return 0;
+}
+
+static void cleanup(void)
+{
+ SYS_NOFAIL("test -f /var/run/netns/" NS0 " && ip netns delete "
+ NS0);
+ SYS_NOFAIL("test -f /var/run/netns/" NS1 " && ip netns delete "
+ NS1);
+}
+
+static int setup(struct vrf_socket_lookup *skel)
+{
+ int tc_prog_fd, xdp_prog_fd, ret = 0;
+ struct nstoken *nstoken = NULL;
+
+ SYS(fail, "ip netns add " NS0);
+ SYS(fail, "ip netns add " NS1);
+
+ /* NS0 <-> NS1 [veth01 <-> veth10] */
+ SYS(fail, "ip link add veth01 netns " NS0 " type veth peer name veth10"
+ " netns " NS1);
+ SYS(fail, "ip -net " NS0 " addr add " IP4_ADDR_VETH01 "/24 dev veth01");
+ SYS(fail, "ip -net " NS0 " link set dev veth01 up");
+ SYS(fail, "ip -net " NS1 " addr add " IP4_ADDR_VETH10 "/24 dev veth10");
+ SYS(fail, "ip -net " NS1 " link set dev veth10 up");
+
+ /* NS0 <-> NS1 [veth02 <-> veth20] */
+ SYS(fail, "ip link add veth02 netns " NS0 " type veth peer name veth20"
+ " netns " NS1);
+ SYS(fail, "ip -net " NS0 " addr add " IP4_ADDR_VETH02 "/24 dev veth02");
+ SYS(fail, "ip -net " NS0 " link set dev veth02 up");
+ SYS(fail, "ip -net " NS1 " addr add " IP4_ADDR_VETH20 "/24 dev veth20");
+ SYS(fail, "ip -net " NS1 " link set dev veth20 up");
+
+ /* veth02 -> vrf1 */
+ SYS(fail, "ip -net " NS0 " link add vrf1 type vrf table 11");
+ SYS(fail, "ip -net " NS0 " route add vrf vrf1 unreachable default"
+ " metric 4278198272");
+ SYS(fail, "ip -net " NS0 " link set vrf1 alias vrf");
+ SYS(fail, "ip -net " NS0 " link set vrf1 up");
+ SYS(fail, "ip -net " NS0 " link set veth02 master vrf1");
+
+ /* Attach TC and XDP progs to veth devices in NS0 */
+ nstoken = open_netns(NS0);
+ if (!ASSERT_OK_PTR(nstoken, "setns " NS0))
+ goto fail;
+ tc_prog_fd = bpf_program__fd(skel->progs.tc_socket_lookup);
+ if (!ASSERT_GE(tc_prog_fd, 0, "bpf_program__tc_fd"))
+ goto fail;
+ xdp_prog_fd = bpf_program__fd(skel->progs.xdp_socket_lookup);
+ if (!ASSERT_GE(xdp_prog_fd, 0, "bpf_program__xdp_fd"))
+ goto fail;
+
+ if (attach_progs("veth01", tc_prog_fd, xdp_prog_fd))
+ goto fail;
+
+ if (attach_progs("veth02", tc_prog_fd, xdp_prog_fd))
+ goto fail;
+
+ goto close;
+fail:
+ ret = -1;
+close:
+ if (nstoken)
+ close_netns(nstoken);
+ return ret;
+}
+
+static int test_lookup(struct vrf_socket_lookup *skel, int sotype,
+ const char *ip, int port, bool test_xdp, bool tcp_skc,
+ int lookup_status_exp)
+{
+ static const char msg[] = "Hello Server";
+ struct sockaddr_storage addr = {};
+ int fd, ret = 0;
+
+ fd = make_socket(sotype, ip, port, &addr);
+ if (fd < 0)
+ return -1;
+
+ skel->bss->test_xdp = test_xdp;
+ skel->bss->tcp_skc = tcp_skc;
+ skel->bss->lookup_status = -1;
+
+ if (sotype == SOCK_STREAM)
+ connect(fd, (void *)&addr, sizeof(struct sockaddr_in));
+ else
+ sendto(fd, msg, sizeof(msg), 0, (void *)&addr,
+ sizeof(struct sockaddr_in));
+
+ if (!ASSERT_EQ(skel->bss->lookup_status, lookup_status_exp,
+ "lookup_status"))
+ goto fail;
+
+ goto close;
+
+fail:
+ ret = -1;
+close:
+ close(fd);
+ return ret;
+}
+
+static void _test_vrf_socket_lookup(struct vrf_socket_lookup *skel, int sotype,
+ bool test_xdp, bool tcp_skc)
+{
+ int in_vrf_server = -1, non_vrf_server = -1;
+ struct nstoken *nstoken = NULL;
+
+ nstoken = open_netns(NS0);
+ if (!ASSERT_OK_PTR(nstoken, "setns " NS0))
+ goto done;
+
+ /* Open sockets in and outside VRF */
+ non_vrf_server = make_server(sotype, "0.0.0.0", NON_VRF_PORT, NULL);
+ if (!ASSERT_GE(non_vrf_server, 0, "make_server__outside_vrf_fd"))
+ goto done;
+
+ in_vrf_server = make_server(sotype, "0.0.0.0", IN_VRF_PORT, "veth02");
+ if (!ASSERT_GE(in_vrf_server, 0, "make_server__in_vrf_fd"))
+ goto done;
+
+ /* Perform test from NS1 */
+ close_netns(nstoken);
+ nstoken = open_netns(NS1);
+ if (!ASSERT_OK_PTR(nstoken, "setns " NS1))
+ goto done;
+
+ if (!ASSERT_OK(test_lookup(skel, sotype, IP4_ADDR_VETH02, NON_VRF_PORT,
+ test_xdp, tcp_skc, 0), "in_to_out"))
+ goto done;
+ if (!ASSERT_OK(test_lookup(skel, sotype, IP4_ADDR_VETH02, IN_VRF_PORT,
+ test_xdp, tcp_skc, 1), "in_to_in"))
+ goto done;
+ if (!ASSERT_OK(test_lookup(skel, sotype, IP4_ADDR_VETH01, NON_VRF_PORT,
+ test_xdp, tcp_skc, 1), "out_to_out"))
+ goto done;
+ if (!ASSERT_OK(test_lookup(skel, sotype, IP4_ADDR_VETH01, IN_VRF_PORT,
+ test_xdp, tcp_skc, 0), "out_to_in"))
+ goto done;
+
+done:
+ if (non_vrf_server >= 0)
+ close(non_vrf_server);
+ if (in_vrf_server >= 0)
+ close(in_vrf_server);
+ if (nstoken)
+ close_netns(nstoken);
+}
+
+void test_vrf_socket_lookup(void)
+{
+ struct vrf_socket_lookup *skel;
+
+ cleanup();
+
+ skel = vrf_socket_lookup__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "vrf_socket_lookup__open_and_load"))
+ return;
+
+ if (!ASSERT_OK(setup(skel), "setup"))
+ goto done;
+
+ if (test__start_subtest("tc_socket_lookup_tcp"))
+ _test_vrf_socket_lookup(skel, SOCK_STREAM, false, false);
+ if (test__start_subtest("tc_socket_lookup_tcp_skc"))
+ _test_vrf_socket_lookup(skel, SOCK_STREAM, false, false);
+ if (test__start_subtest("tc_socket_lookup_udp"))
+ _test_vrf_socket_lookup(skel, SOCK_STREAM, false, false);
+ if (test__start_subtest("xdp_socket_lookup_tcp"))
+ _test_vrf_socket_lookup(skel, SOCK_STREAM, true, false);
+ if (test__start_subtest("xdp_socket_lookup_tcp_skc"))
+ _test_vrf_socket_lookup(skel, SOCK_STREAM, true, false);
+ if (test__start_subtest("xdp_socket_lookup_udp"))
+ _test_vrf_socket_lookup(skel, SOCK_STREAM, true, false);
+
+done:
+ vrf_socket_lookup__destroy(skel);
+ cleanup();
+}
diff --git a/tools/testing/selftests/bpf/progs/vrf_socket_lookup.c b/tools/testing/selftests/bpf/progs/vrf_socket_lookup.c
new file mode 100644
index 000000000000..26e07a252585
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/vrf_socket_lookup.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <linux/ip.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/pkt_cls.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+#include <stdbool.h>
+
+int lookup_status;
+bool test_xdp;
+bool tcp_skc;
+
+#define CUR_NS BPF_F_CURRENT_NETNS
+
+static void socket_lookup(void *ctx, void *data_end, void *data)
+{
+ struct ethhdr *eth = data;
+ struct bpf_sock_tuple *tp;
+ struct bpf_sock *sk;
+ struct iphdr *iph;
+ int tplen;
+
+ if (eth + 1 > data_end)
+ return;
+
+ if (eth->h_proto != bpf_htons(ETH_P_IP))
+ return;
+
+ iph = (struct iphdr *)(eth + 1);
+ if (iph + 1 > data_end)
+ return;
+
+ tp = (struct bpf_sock_tuple *)&iph->saddr;
+ tplen = sizeof(tp->ipv4);
+ if ((void *)tp + tplen > data_end)
+ return;
+
+ switch (iph->protocol) {
+ case IPPROTO_TCP:
+ if (tcp_skc)
+ sk = bpf_skc_lookup_tcp(ctx, tp, tplen, CUR_NS, 0);
+ else
+ sk = bpf_sk_lookup_tcp(ctx, tp, tplen, CUR_NS, 0);
+ break;
+ case IPPROTO_UDP:
+ sk = bpf_sk_lookup_udp(ctx, tp, tplen, CUR_NS, 0);
+ break;
+ default:
+ return;
+ }
+
+ lookup_status = 0;
+
+ if (sk) {
+ bpf_sk_release(sk);
+ lookup_status = 1;
+ }
+}
+
+SEC("tc")
+int tc_socket_lookup(struct __sk_buff *skb)
+{
+ void *data_end = (void *)(long)skb->data_end;
+ void *data = (void *)(long)skb->data;
+
+ if (test_xdp)
+ return TC_ACT_UNSPEC;
+
+ socket_lookup(skb, data_end, data);
+ return TC_ACT_UNSPEC;
+}
+
+SEC("xdp")
+int xdp_socket_lookup(struct xdp_md *xdp)
+{
+ void *data_end = (void *)(long)xdp->data_end;
+ void *data = (void *)(long)xdp->data;
+
+ if (!test_xdp)
+ return XDP_PASS;
+
+ socket_lookup(xdp, data_end, data);
+ return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
--
2.34.1
^ permalink raw reply related [flat|nested] 6+ messages in thread