From: Francis Deslauriers <francis.deslauriers@crowdstrike.com>
To: "bpf@vger.kernel.org" <bpf@vger.kernel.org>
Cc: "daniel@iogearbox.net" <daniel@iogearbox.net>,
"maxim@isovalent.com" <maxim@isovalent.com>,
"ast@kernel.org" <ast@kernel.org>
Subject: Recent eBPF verifier rejects program that was accepted by 6.8 eBPF verifier
Date: Fri, 15 Nov 2024 20:59:43 +0000 [thread overview]
Message-ID: <50b7301fcfd0682c9923b3639c1589f3eb37af33.camel@crowdstrike.com> (raw)
Hi all,
I'd like to report what I think is a eBPF verifier regression. I have a eBPF
program that is accepted by the 6.8 eBPF verifier but is rejected on 6.9 and
later. It's my understanding that a eBPF program that is accepted by one
version of the verifier should be accepted on all subsequent versions.
I added a stripped down libbpf reproducer based on the libbpf-bootstrap repo to
showcase this issue (see below). Note that if I change the RULE_LIST_SIZE from
380 to 30, the verifier accepts the program.
I bisected the issue down to this commit:
commit 8ecfc371d829bfed75e0ef2cab45b2290b982f64
Author: Maxim Mikityanskiy <maxim@isovalent.com>
Date: Mon Jan 8 22:52:02 2024 +0200
bpf: Assign ID to scalars on spill
Before commit 8ecfc371d82
$ uname -r
6.7.0-12291-g87e51ac6cb19
$ sudo ./tc
loads succesfully
After commit 8ecfc371d82
$ uname -r
6.7.0-12292-g8ecfc371d829
$ sudo ./tc 2>&1 | grep -A4 "too large"
BPF program is too large. Processed 1000001 insn
processed 1000001 insns (limit 1000000) max_states_per_insn 26 total_states 62473 peak_states 5456 mark_read 162
-- END PROG LOAD LOG --
libbpf: prog 'tc_ingress': failed to load: -7
libbpf: failed to load object 'tc_bpf'
I confirmed the issue affects master as well.
$ uname -r
6.12.0-rc7-00125-gcfaaa7d010d1
$ sudo ./tc 2>&1 | grep -A4 "too large"
BPF program is too large. Processed 1000001 insn
processed 1000001 insns (limit 1000000) max_states_per_insn 26 total_states 62473 peak_states 5456 mark_read 162
-- END PROG LOAD LOG --
libbpf: prog 'tc_ingress': failed to load: -7
libbpf: failed to load object 'tc_bpf'
I'm investigating how this commit affects how instructions are counted
and why it leads to such a drastic change for this particular program.
I wanted to share my findings early in case someone has any hints for me.
To reproduce, use the following file as a drop in replacement of
libbpf-boostrap's examples/c/tc.bpf.c:
--->8---
#include <vmlinux.h>
#include <bpf/bpf_core_read.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
char LICENSE[] SEC("license") = "GPL";
typedef enum : uint8_t {
CONN_DIR_OUTBOUND = 0,
CONN_DIR_INBOUND = 1,
CONN_DIR_NEITHER = 2,
} connection_dir_t;
typedef enum : uint8_t {
ACTION_ALLOW = 0,
ACTION_DENY = 1,
} rule_action_t;
#define TC_ACT_UNSPEC (-1)
#define TC_ACT_OK 0
#define TC_ACT_RECLASSIFY 1
#define TC_ACT_SHOT 2
#define AF_UNSPEC 0
#define AF_INET 2
#define ETH_P_IP 0x0800
#define ETH_P_IPV6 0x86DD
#define RULE_LIST_SIZE 380
#define RULE_CONTROLS 0x00000001
struct connection_tuple_v4 {
uint32_t saddr;
uint32_t daddr;
};
typedef struct pkt_info {
struct connection_tuple_v4 ip4;
uint16_t family;
uint16_t ip_protocol;
uint16_t eth_protocol;
rule_action_t action;
bool ip_proto_udp_or_tcp;
uint8_t pad[7];
} pkt_info_t;
typedef struct pkt_info_rule {
pkt_info_t info;
uint32_t pad;
struct connection_tuple_v4 ip4_mask;
} pkt_info_rule_t;
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__uint(max_entries, 2);
__type(key, u32);
__type(value, pkt_info_rule_t[RULE_LIST_SIZE]);
__uint(map_flags, BPF_F_RDONLY_PROG | BPF_F_WRONLY);
} rules_map SEC(".maps");
static __always_inline bool
match_ipaddr_with_mask_ipv4(const uint32_t *rule_addr,
const uint32_t *rule_addr_mask,
const uint32_t *data_packet_addr)
{
if ((*rule_addr == 0) || (*data_packet_addr == 0)) {
return true;
}
else if ((*rule_addr_mask == 0) && (*rule_addr == *data_packet_addr)) {
return true;
}
else if ((*rule_addr_mask != 0) &&
((*rule_addr & *rule_addr_mask) ==
(*data_packet_addr & *rule_addr_mask))) {
return true;
}
return false;
}
static __always_inline bool protocol_match(int rule_protocol,
int packet_protocol)
{
return (rule_protocol == 0 || rule_protocol == packet_protocol);
}
struct match_ipv4_pkt {
uint32_t local_addr;
uint32_t remote_addr;
};
static __always_inline bool match_ipv4(const pkt_info_rule_t *rule,
pkt_info_t *pkt,
const struct match_ipv4_pkt *cmp,
int *output)
{
if (protocol_match(rule->info.ip_protocol, pkt->ip_protocol) &&
(match_ipaddr_with_mask_ipv4(&rule->info.ip4.saddr,
&rule->ip4_mask.saddr,
&cmp->local_addr)) &&
(match_ipaddr_with_mask_ipv4(&rule->info.ip4.daddr,
&rule->ip4_mask.daddr,
&cmp->remote_addr))) {
if (pkt->ip_proto_udp_or_tcp) {
return false;
}
*output = rule->info.action;
return true;
}
return false;
}
static __always_inline int apply_rules_ipv4(pkt_info_t *pkt,
const pkt_info_rule_t *rules)
{
if ((pkt == NULL) || (rules == NULL)) {
return TC_ACT_UNSPEC;
}
int output = ACTION_ALLOW;
struct match_ipv4_pkt dir_cmp;
dir_cmp.local_addr = pkt->ip4.daddr;
dir_cmp.remote_addr = pkt->ip4.saddr;
for (int i = 0; i < RULE_LIST_SIZE; i++) {
const pkt_info_rule_t rule = rules[i];
bool ret;
ret = match_ipv4(&rule, pkt, &dir_cmp, &output);
if (ret) {
break;
}
}
if (output == ACTION_ALLOW) {
return TC_ACT_UNSPEC;
} else {
return TC_ACT_SHOT;
}
}
#define HEADER_BOUNDS_CHECK(hdr, skb) \
((unsigned long)(hdr + 1) > (unsigned long)skb->data_end)
static __always_inline bool get_tuple(const struct __sk_buff *skb,
const struct ethhdr *eth, pkt_info_t *pkt)
{
pkt->eth_protocol = bpf_ntohs(BPF_CORE_READ(eth, h_proto));
if ((pkt->eth_protocol == ETH_P_IP) ||
((skb->protocol == __bpf_constant_htons(ETH_P_IP)))) {
struct iphdr *iph = (struct iphdr *)(skb->data + sizeof(*eth));
if (HEADER_BOUNDS_CHECK(iph, skb)) {
return false;
}
pkt->family = AF_INET;
pkt->ip4.saddr = BPF_CORE_READ(iph, saddr);
pkt->ip4.daddr = BPF_CORE_READ(iph, daddr);
pkt->ip_protocol = BPF_CORE_READ(iph, protocol);
pkt->ip_proto_udp_or_tcp = (pkt->ip_protocol == IPPROTO_TCP) ||
(pkt->ip_protocol == IPPROTO_UDP);
return true;
} else {
pkt->family = AF_UNSPEC;
return false;
}
}
static __always_inline bool populate_pkt_info(const struct __sk_buff *skb,
pkt_info_t *pkt)
{
struct ethhdr *eth = (struct ethhdr *)(unsigned long)(skb->data);
if (HEADER_BOUNDS_CHECK(eth, skb)) {
return false;
}
return get_tuple(skb, eth, pkt);
}
static __always_inline int allow_pkt(pkt_info_t *pkt)
{
uint32_t key = pkt->family;
const pkt_info_rule_t *rules = bpf_map_lookup_elem(&rules_map, &key);
if (rules == NULL) {
return TC_ACT_UNSPEC;
}
switch (pkt->family) {
case AF_INET:
return apply_rules_ipv4(pkt, rules);
default:
return TC_ACT_UNSPEC;
}
}
SEC("tc")
int tc_ingress(struct __sk_buff *skb)
{
pkt_info_t pkt ={0};
bool ret = populate_pkt_info(skb, &pkt);
if (ret != true) {
return TC_ACT_UNSPEC;
}
return allow_pkt(&pkt);
}
--->8---
Thank you,
Francis
next reply other threads:[~2024-11-15 21:06 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-11-15 20:59 Francis Deslauriers [this message]
2024-11-20 20:33 ` Recent eBPF verifier rejects program that was accepted by 6.8 eBPF verifier Maxim Mikityanskiy
2024-11-20 21:55 ` Francis Deslauriers
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=50b7301fcfd0682c9923b3639c1589f3eb37af33.camel@crowdstrike.com \
--to=francis.deslauriers@crowdstrike.com \
--cc=ast@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=daniel@iogearbox.net \
--cc=maxim@isovalent.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox