* [bpf-next 1/3] flow_dissector: implements flow dissector BPF hook
2018-08-30 18:22 [bpf-next 0/3] Introduce eBPF flow dissector Petar Penkov
@ 2018-08-30 18:22 ` Petar Penkov
2018-09-02 21:03 ` Daniel Borkmann
2018-08-30 18:23 ` [bpf-next 2/3] flow_dissector: implements eBPF parser Petar Penkov
2018-08-30 18:23 ` [bpf-next 3/3] selftests/bpf: test bpf flow dissection Petar Penkov
2 siblings, 1 reply; 7+ messages in thread
From: Petar Penkov @ 2018-08-30 18:22 UTC (permalink / raw)
To: netdev
Cc: davem, ast, daniel, simon.horman, ecree, songliubraving, tom,
Petar Penkov, Willem de Bruijn
From: Petar Penkov <ppenkov@google.com>
Adds a hook for programs of type BPF_PROG_TYPE_FLOW_DISSECTOR and
attach type BPF_FLOW_DISSECTOR that is executed in the flow dissector
path. The BPF program is per-network namespace.
Signed-off-by: Petar Penkov <ppenkov@google.com>
Signed-off-by: Willem de Bruijn <willemb@google.com>
---
include/linux/bpf.h | 1 +
include/linux/bpf_types.h | 1 +
include/linux/skbuff.h | 7 ++
include/net/net_namespace.h | 3 +
include/net/sch_generic.h | 12 ++-
include/uapi/linux/bpf.h | 25 ++++++
kernel/bpf/syscall.c | 8 ++
kernel/bpf/verifier.c | 33 ++++++++
net/core/filter.c | 67 ++++++++++++++++
net/core/flow_dissector.c | 136 +++++++++++++++++++++++++++++++++
tools/bpf/bpftool/prog.c | 1 +
tools/include/uapi/linux/bpf.h | 25 ++++++
tools/lib/bpf/libbpf.c | 2 +
13 files changed, 318 insertions(+), 3 deletions(-)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 523481a3471b..988a00797bcd 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -212,6 +212,7 @@ enum bpf_reg_type {
PTR_TO_PACKET_META, /* skb->data - meta_len */
PTR_TO_PACKET, /* reg points to skb->data */
PTR_TO_PACKET_END, /* skb->data + headlen */
+ PTR_TO_FLOW_KEYS, /* reg points to bpf_flow_keys */
};
/* The information passed from prog-specific *_is_valid_access
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index cd26c090e7c0..22083712dd18 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -32,6 +32,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_LIRC_MODE2, lirc_mode2)
#ifdef CONFIG_INET
BPF_PROG_TYPE(BPF_PROG_TYPE_SK_REUSEPORT, sk_reuseport)
#endif
+BPF_PROG_TYPE(BPF_PROG_TYPE_FLOW_DISSECTOR, flow_dissector)
BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 17a13e4785fc..ce0e863f02a2 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -243,6 +243,8 @@ struct scatterlist;
struct pipe_inode_info;
struct iov_iter;
struct napi_struct;
+struct bpf_prog;
+union bpf_attr;
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
struct nf_conntrack {
@@ -1192,6 +1194,11 @@ void skb_flow_dissector_init(struct flow_dissector *flow_dissector,
const struct flow_dissector_key *key,
unsigned int key_count);
+int skb_flow_dissector_bpf_prog_attach(const union bpf_attr *attr,
+ struct bpf_prog *prog);
+
+int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr);
+
bool __skb_flow_dissect(const struct sk_buff *skb,
struct flow_dissector *flow_dissector,
void *target_container,
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 9b5fdc50519a..99d4148e0f90 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -43,6 +43,7 @@ struct ctl_table_header;
struct net_generic;
struct uevent_sock;
struct netns_ipvs;
+struct bpf_prog;
#define NETDEV_HASHBITS 8
@@ -145,6 +146,8 @@ struct net {
#endif
struct net_generic __rcu *gen;
+ struct bpf_prog __rcu *flow_dissector_prog;
+
/* Note : following structs are cache line aligned */
#ifdef CONFIG_XFRM
struct netns_xfrm xfrm;
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index a6d00093f35e..1b81ba85fd2d 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -19,6 +19,7 @@ struct Qdisc_ops;
struct qdisc_walker;
struct tcf_walker;
struct module;
+struct bpf_flow_keys;
typedef int tc_setup_cb_t(enum tc_setup_type type,
void *type_data, void *cb_priv);
@@ -307,9 +308,14 @@ struct tcf_proto {
};
struct qdisc_skb_cb {
- unsigned int pkt_len;
- u16 slave_dev_queue_mapping;
- u16 tc_classid;
+ union {
+ struct {
+ unsigned int pkt_len;
+ u16 slave_dev_queue_mapping;
+ u16 tc_classid;
+ };
+ struct bpf_flow_keys *flow_keys;
+ };
#define QDISC_CB_PRIV_LEN 20
unsigned char data[QDISC_CB_PRIV_LEN];
};
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 66917a4eba27..3064706fcaaa 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -152,6 +152,7 @@ enum bpf_prog_type {
BPF_PROG_TYPE_LWT_SEG6LOCAL,
BPF_PROG_TYPE_LIRC_MODE2,
BPF_PROG_TYPE_SK_REUSEPORT,
+ BPF_PROG_TYPE_FLOW_DISSECTOR,
};
enum bpf_attach_type {
@@ -172,6 +173,7 @@ enum bpf_attach_type {
BPF_CGROUP_UDP4_SENDMSG,
BPF_CGROUP_UDP6_SENDMSG,
BPF_LIRC_MODE2,
+ BPF_FLOW_DISSECTOR,
__MAX_BPF_ATTACH_TYPE
};
@@ -2333,6 +2335,7 @@ struct __sk_buff {
/* ... here. */
__u32 data_meta;
+ __u32 flow_keys;
};
struct bpf_tunnel_key {
@@ -2778,4 +2781,26 @@ enum bpf_task_fd_type {
BPF_FD_TYPE_URETPROBE, /* filename + offset */
};
+struct bpf_flow_keys {
+ __u16 thoff;
+ __u16 addr_proto; /* ETH_P_* of valid addrs */
+ __u8 is_frag;
+ __u8 is_first_frag;
+ __u8 is_encap;
+ __be16 n_proto;
+ __u8 ip_proto;
+ union {
+ struct {
+ __be32 ipv4_src;
+ __be32 ipv4_dst;
+ };
+ struct {
+ __u32 ipv6_src[4]; /* in6_addr; network order */
+ __u32 ipv6_dst[4]; /* in6_addr; network order */
+ };
+ };
+ __be16 sport;
+ __be16 dport;
+};
+
#endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 8339d81cba1d..043c1a72e382 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1616,6 +1616,9 @@ static int bpf_prog_attach(const union bpf_attr *attr)
case BPF_LIRC_MODE2:
ptype = BPF_PROG_TYPE_LIRC_MODE2;
break;
+ case BPF_FLOW_DISSECTOR:
+ ptype = BPF_PROG_TYPE_FLOW_DISSECTOR;
+ break;
default:
return -EINVAL;
}
@@ -1637,6 +1640,9 @@ static int bpf_prog_attach(const union bpf_attr *attr)
case BPF_PROG_TYPE_LIRC_MODE2:
ret = lirc_prog_attach(attr, prog);
break;
+ case BPF_PROG_TYPE_FLOW_DISSECTOR:
+ ret = skb_flow_dissector_bpf_prog_attach(attr, prog);
+ break;
default:
ret = cgroup_bpf_prog_attach(attr, ptype, prog);
}
@@ -1689,6 +1695,8 @@ static int bpf_prog_detach(const union bpf_attr *attr)
return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_SKB, NULL);
case BPF_LIRC_MODE2:
return lirc_prog_detach(attr);
+ case BPF_FLOW_DISSECTOR:
+ return skb_flow_dissector_bpf_prog_detach(attr);
default:
return -EINVAL;
}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 92246117d2b0..ac9aeed325b5 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -261,6 +261,7 @@ static const char * const reg_type_str[] = {
[PTR_TO_PACKET] = "pkt",
[PTR_TO_PACKET_META] = "pkt_meta",
[PTR_TO_PACKET_END] = "pkt_end",
+ [PTR_TO_FLOW_KEYS] = "flow_keys",
};
static void print_liveness(struct bpf_verifier_env *env,
@@ -993,6 +994,7 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
case PTR_TO_PACKET:
case PTR_TO_PACKET_META:
case PTR_TO_PACKET_END:
+ case PTR_TO_FLOW_KEYS:
case CONST_PTR_TO_MAP:
return true;
default:
@@ -1321,6 +1323,7 @@ static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
case BPF_PROG_TYPE_LWT_XMIT:
case BPF_PROG_TYPE_SK_SKB:
case BPF_PROG_TYPE_SK_MSG:
+ case BPF_PROG_TYPE_FLOW_DISSECTOR:
if (meta)
return meta->pkt_access;
@@ -1404,6 +1407,18 @@ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off,
return -EACCES;
}
+static int check_flow_keys_access(struct bpf_verifier_env *env, int off,
+ int size)
+{
+ if (size < 0 || off < 0 ||
+ (u64)off + size > sizeof(struct bpf_flow_keys)) {
+ verbose(env, "invalid access to flow keys off=%d size=%d\n",
+ off, size);
+ return -EACCES;
+ }
+ return 0;
+}
+
static bool __is_pointer_value(bool allow_ptr_leaks,
const struct bpf_reg_state *reg)
{
@@ -1505,6 +1520,9 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
* right in front, treat it the very same way.
*/
return check_pkt_ptr_alignment(env, reg, off, size, strict);
+ case PTR_TO_FLOW_KEYS:
+ pointer_desc = "flow keys ";
+ break;
case PTR_TO_MAP_VALUE:
pointer_desc = "value ";
break;
@@ -1778,6 +1796,17 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
err = check_packet_access(env, regno, off, size, false);
if (!err && t == BPF_READ && value_regno >= 0)
mark_reg_unknown(env, regs, value_regno);
+ } else if (reg->type == PTR_TO_FLOW_KEYS) {
+ if (t == BPF_WRITE && value_regno >= 0 &&
+ is_pointer_value(env, value_regno)) {
+ verbose(env, "R%d leaks addr into flow keys\n",
+ value_regno);
+ return -EACCES;
+ }
+
+ err = check_flow_keys_access(env, off, size);
+ if (!err && t == BPF_READ && value_regno >= 0)
+ mark_reg_unknown(env, regs, value_regno);
} else {
verbose(env, "R%d invalid mem access '%s'\n", regno,
reg_type_str[reg->type]);
@@ -1925,6 +1954,8 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
case PTR_TO_PACKET_META:
return check_packet_access(env, regno, reg->off, access_size,
zero_size_allowed);
+ case PTR_TO_FLOW_KEYS:
+ return check_flow_keys_access(env, reg->off, access_size);
case PTR_TO_MAP_VALUE:
return check_map_access(env, regno, reg->off, access_size,
zero_size_allowed);
@@ -3976,6 +4007,7 @@ static bool may_access_skb(enum bpf_prog_type type)
case BPF_PROG_TYPE_SOCKET_FILTER:
case BPF_PROG_TYPE_SCHED_CLS:
case BPF_PROG_TYPE_SCHED_ACT:
+ case BPF_PROG_TYPE_FLOW_DISSECTOR:
return true;
default:
return false;
@@ -4451,6 +4483,7 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
case PTR_TO_CTX:
case CONST_PTR_TO_MAP:
case PTR_TO_PACKET_END:
+ case PTR_TO_FLOW_KEYS:
/* Only valid matches are exact, which memcmp() above
* would have accepted
*/
diff --git a/net/core/filter.c b/net/core/filter.c
index c25eb36f1320..0143b9c0c67e 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5092,6 +5092,17 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
}
}
+static const struct bpf_func_proto *
+flow_dissector_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+{
+ switch (func_id) {
+ case BPF_FUNC_skb_load_bytes:
+ return &bpf_skb_load_bytes_proto;
+ default:
+ return bpf_base_func_proto(func_id);
+ }
+}
+
static const struct bpf_func_proto *
lwt_out_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
@@ -5207,6 +5218,7 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type
case bpf_ctx_range(struct __sk_buff, data):
case bpf_ctx_range(struct __sk_buff, data_meta):
case bpf_ctx_range(struct __sk_buff, data_end):
+ case bpf_ctx_range(struct __sk_buff, flow_keys):
if (size != size_default)
return false;
break;
@@ -5235,6 +5247,7 @@ static bool sk_filter_is_valid_access(int off, int size,
case bpf_ctx_range(struct __sk_buff, data):
case bpf_ctx_range(struct __sk_buff, data_meta):
case bpf_ctx_range(struct __sk_buff, data_end):
+ case bpf_ctx_range(struct __sk_buff, flow_keys):
case bpf_ctx_range_till(struct __sk_buff, family, local_port):
return false;
}
@@ -5260,6 +5273,7 @@ static bool lwt_is_valid_access(int off, int size,
case bpf_ctx_range(struct __sk_buff, tc_classid):
case bpf_ctx_range_till(struct __sk_buff, family, local_port):
case bpf_ctx_range(struct __sk_buff, data_meta):
+ case bpf_ctx_range(struct __sk_buff, flow_keys):
return false;
}
@@ -5470,6 +5484,7 @@ static bool tc_cls_act_is_valid_access(int off, int size,
case bpf_ctx_range(struct __sk_buff, data_end):
info->reg_type = PTR_TO_PACKET_END;
break;
+ case bpf_ctx_range(struct __sk_buff, flow_keys):
case bpf_ctx_range_till(struct __sk_buff, family, local_port):
return false;
}
@@ -5671,6 +5686,7 @@ static bool sk_skb_is_valid_access(int off, int size,
switch (off) {
case bpf_ctx_range(struct __sk_buff, tc_classid):
case bpf_ctx_range(struct __sk_buff, data_meta):
+ case bpf_ctx_range(struct __sk_buff, flow_keys):
return false;
}
@@ -5730,6 +5746,38 @@ static bool sk_msg_is_valid_access(int off, int size,
return true;
}
+static bool flow_dissector_is_valid_access(int off, int size,
+ enum bpf_access_type type,
+ const struct bpf_prog *prog,
+ struct bpf_insn_access_aux *info)
+{
+ if (type == BPF_WRITE) {
+ switch (off) {
+ case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
+ case bpf_ctx_range(struct __sk_buff, flow_keys):
+ break;
+ default:
+ return false;
+ }
+ }
+
+ switch (off) {
+ case bpf_ctx_range(struct __sk_buff, data):
+ info->reg_type = PTR_TO_PACKET;
+ break;
+ case bpf_ctx_range(struct __sk_buff, data_end):
+ info->reg_type = PTR_TO_PACKET_END;
+ break;
+ case bpf_ctx_range(struct __sk_buff, flow_keys):
+ info->reg_type = PTR_TO_FLOW_KEYS;
+ break;
+ case bpf_ctx_range_till(struct __sk_buff, family, local_port):
+ return false;
+ }
+
+ return bpf_skb_is_valid_access(off, size, type, prog, info);
+}
+
static u32 bpf_convert_ctx_access(enum bpf_access_type type,
const struct bpf_insn *si,
struct bpf_insn *insn_buf,
@@ -6024,6 +6072,15 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
bpf_target_off(struct sock_common,
skc_num, 2, target_size));
break;
+
+ case offsetof(struct __sk_buff, flow_keys):
+ off = si->off;
+ off -= offsetof(struct __sk_buff, flow_keys);
+ off += offsetof(struct sk_buff, cb);
+ off += offsetof(struct qdisc_skb_cb, flow_keys);
+ *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
+ si->src_reg, off);
+ break;
}
return insn - insn_buf;
@@ -6987,6 +7044,16 @@ const struct bpf_verifier_ops sk_msg_verifier_ops = {
const struct bpf_prog_ops sk_msg_prog_ops = {
};
+const struct bpf_verifier_ops flow_dissector_verifier_ops = {
+ .get_func_proto = flow_dissector_func_proto,
+ .is_valid_access = flow_dissector_is_valid_access,
+ .convert_ctx_access = bpf_convert_ctx_access,
+ .gen_ld_abs = bpf_gen_ld_abs,
+};
+
+const struct bpf_prog_ops flow_dissector_prog_ops = {
+};
+
int sk_detach_filter(struct sock *sk)
{
int ret = -ENOENT;
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index ce9eeeb7c024..a5f7da69911a 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -25,6 +25,9 @@
#include <net/flow_dissector.h>
#include <scsi/fc/fc_fcoe.h>
#include <uapi/linux/batadv_packet.h>
+#include <linux/bpf.h>
+
+static DEFINE_MUTEX(flow_dissector_mutex);
static void dissector_set_key(struct flow_dissector *flow_dissector,
enum flow_dissector_key_id key_id)
@@ -62,6 +65,44 @@ void skb_flow_dissector_init(struct flow_dissector *flow_dissector,
}
EXPORT_SYMBOL(skb_flow_dissector_init);
+int skb_flow_dissector_bpf_prog_attach(const union bpf_attr *attr,
+ struct bpf_prog *prog)
+{
+ struct bpf_prog *attached;
+ struct net *net;
+
+ net = current->nsproxy->net_ns;
+ mutex_lock(&flow_dissector_mutex);
+ attached = rcu_dereference_protected(net->flow_dissector_prog,
+ lockdep_is_held(&flow_dissector_mutex));
+ if (attached) {
+ /* Only one BPF program can be attached at a time */
+ mutex_unlock(&flow_dissector_mutex);
+ return -EEXIST;
+ }
+ rcu_assign_pointer(net->flow_dissector_prog, prog);
+ mutex_unlock(&flow_dissector_mutex);
+ return 0;
+}
+
+int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr)
+{
+ struct bpf_prog *attached;
+ struct net *net;
+
+ net = current->nsproxy->net_ns;
+ mutex_lock(&flow_dissector_mutex);
+ attached = rcu_dereference_protected(net->flow_dissector_prog,
+ lockdep_is_held(&flow_dissector_mutex));
+ if (!attached) {
+ mutex_unlock(&flow_dissector_mutex);
+ return -ENOENT;
+ }
+ bpf_prog_put(attached);
+ RCU_INIT_POINTER(net->flow_dissector_prog, NULL);
+ mutex_unlock(&flow_dissector_mutex);
+ return 0;
+}
/**
* skb_flow_get_be16 - extract be16 entity
* @skb: sk_buff to extract from
@@ -588,6 +629,60 @@ static bool skb_flow_dissect_allowed(int *num_hdrs)
return (*num_hdrs <= MAX_FLOW_DISSECT_HDRS);
}
+static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys,
+ struct flow_dissector *flow_dissector,
+ void *target_container)
+{
+ struct flow_dissector_key_control *key_control;
+ struct flow_dissector_key_basic *key_basic;
+ struct flow_dissector_key_addrs *key_addrs;
+ struct flow_dissector_key_ports *key_ports;
+
+ key_control = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_CONTROL,
+ target_container);
+ key_control->thoff = flow_keys->thoff;
+ if (flow_keys->is_frag)
+ key_control->flags |= FLOW_DIS_IS_FRAGMENT;
+ if (flow_keys->is_first_frag)
+ key_control->flags |= FLOW_DIS_FIRST_FRAG;
+ if (flow_keys->is_encap)
+ key_control->flags |= FLOW_DIS_ENCAPSULATION;
+
+ key_basic = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_BASIC,
+ target_container);
+ key_basic->n_proto = flow_keys->n_proto;
+ key_basic->ip_proto = flow_keys->ip_proto;
+
+ if (flow_keys->addr_proto == ETH_P_IP &&
+ dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_IPV4_ADDRS)) {
+ key_addrs = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_IPV4_ADDRS,
+ target_container);
+ key_addrs->v4addrs.src = flow_keys->ipv4_src;
+ key_addrs->v4addrs.dst = flow_keys->ipv4_dst;
+ key_control->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+ } else if (flow_keys->addr_proto == ETH_P_IPV6 &&
+ dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_IPV6_ADDRS)) {
+ key_addrs = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_IPV6_ADDRS,
+ target_container);
+ memcpy(&key_addrs->v6addrs, &flow_keys->ipv6_src,
+ sizeof(key_addrs->v6addrs));
+ key_control->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+ }
+
+ if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS)) {
+ key_ports = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_PORTS,
+ target_container);
+ key_ports->src = flow_keys->sport;
+ key_ports->dst = flow_keys->dport;
+ }
+}
+
/**
* __skb_flow_dissect - extract the flow_keys struct and return it
* @skb: sk_buff to extract the flow from, can be NULL if the rest are specified
@@ -619,6 +714,7 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
struct flow_dissector_key_vlan *key_vlan;
enum flow_dissect_ret fdret;
enum flow_dissector_key_id dissector_vlan = FLOW_DISSECTOR_KEY_MAX;
+ struct bpf_prog *attached;
int num_hdrs = 0;
u8 ip_proto = 0;
bool ret;
@@ -658,6 +754,46 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
FLOW_DISSECTOR_KEY_BASIC,
target_container);
+ rcu_read_lock();
+ attached = rcu_dereference(dev_net(skb->dev)->flow_dissector_prog);
+ if (attached) {
+ /* Note that even though the const qualifier is discarded
+ * throughout the execution of the BPF program, all changes(the
+ * control block) are reverted after the BPF program returns.
+ * Therefore, __skb_flow_dissect does not alter the skb.
+ */
+ struct bpf_flow_keys flow_keys = {};
+ struct qdisc_skb_cb cb_saved;
+ struct qdisc_skb_cb *cb;
+ u16 *pseudo_cb;
+ u32 result;
+
+ cb = qdisc_skb_cb(skb);
+ pseudo_cb = (u16 *)bpf_skb_cb((struct sk_buff *)skb);
+
+ /* Save Control Block */
+ memcpy(&cb_saved, cb, sizeof(cb_saved));
+ memset(cb, 0, sizeof(cb_saved));
+
+ /* Pass parameters to the BPF program */
+ cb->flow_keys = &flow_keys;
+ *pseudo_cb = nhoff;
+
+ bpf_compute_data_pointers((struct sk_buff *)skb);
+ result = BPF_PROG_RUN(attached, skb);
+
+ /* Restore state */
+ memcpy(cb, &cb_saved, sizeof(cb_saved));
+
+ __skb_flow_bpf_to_target(&flow_keys, flow_dissector,
+ target_container);
+ key_control->thoff = min_t(u16, key_control->thoff,
+ skb ? skb->len : hlen);
+ rcu_read_unlock();
+ return result == BPF_OK;
+ }
+ rcu_read_unlock();
+
if (dissector_uses_key(flow_dissector,
FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
struct ethhdr *eth = eth_hdr(skb);
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index dce960d22106..b1cd3bc8db70 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -74,6 +74,7 @@ static const char * const prog_type_name[] = {
[BPF_PROG_TYPE_RAW_TRACEPOINT] = "raw_tracepoint",
[BPF_PROG_TYPE_CGROUP_SOCK_ADDR] = "cgroup_sock_addr",
[BPF_PROG_TYPE_LIRC_MODE2] = "lirc_mode2",
+ [BPF_PROG_TYPE_FLOW_DISSECTOR] = "flow_dissector",
};
static void print_boot_time(__u64 nsecs, char *buf, unsigned int size)
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 66917a4eba27..3064706fcaaa 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -152,6 +152,7 @@ enum bpf_prog_type {
BPF_PROG_TYPE_LWT_SEG6LOCAL,
BPF_PROG_TYPE_LIRC_MODE2,
BPF_PROG_TYPE_SK_REUSEPORT,
+ BPF_PROG_TYPE_FLOW_DISSECTOR,
};
enum bpf_attach_type {
@@ -172,6 +173,7 @@ enum bpf_attach_type {
BPF_CGROUP_UDP4_SENDMSG,
BPF_CGROUP_UDP6_SENDMSG,
BPF_LIRC_MODE2,
+ BPF_FLOW_DISSECTOR,
__MAX_BPF_ATTACH_TYPE
};
@@ -2333,6 +2335,7 @@ struct __sk_buff {
/* ... here. */
__u32 data_meta;
+ __u32 flow_keys;
};
struct bpf_tunnel_key {
@@ -2778,4 +2781,26 @@ enum bpf_task_fd_type {
BPF_FD_TYPE_URETPROBE, /* filename + offset */
};
+struct bpf_flow_keys {
+ __u16 thoff;
+ __u16 addr_proto; /* ETH_P_* of valid addrs */
+ __u8 is_frag;
+ __u8 is_first_frag;
+ __u8 is_encap;
+ __be16 n_proto;
+ __u8 ip_proto;
+ union {
+ struct {
+ __be32 ipv4_src;
+ __be32 ipv4_dst;
+ };
+ struct {
+ __u32 ipv6_src[4]; /* in6_addr; network order */
+ __u32 ipv6_dst[4]; /* in6_addr; network order */
+ };
+ };
+ __be16 sport;
+ __be16 dport;
+};
+
#endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 2abd0f112627..0c749ce1b717 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -1502,6 +1502,7 @@ static bool bpf_prog_type__needs_kver(enum bpf_prog_type type)
case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
case BPF_PROG_TYPE_LIRC_MODE2:
case BPF_PROG_TYPE_SK_REUSEPORT:
+ case BPF_PROG_TYPE_FLOW_DISSECTOR:
return false;
case BPF_PROG_TYPE_UNSPEC:
case BPF_PROG_TYPE_KPROBE:
@@ -2121,6 +2122,7 @@ static const struct {
BPF_PROG_SEC("sk_skb", BPF_PROG_TYPE_SK_SKB),
BPF_PROG_SEC("sk_msg", BPF_PROG_TYPE_SK_MSG),
BPF_PROG_SEC("lirc_mode2", BPF_PROG_TYPE_LIRC_MODE2),
+ BPF_PROG_SEC("flow_dissector", BPF_PROG_TYPE_FLOW_DISSECTOR),
BPF_SA_PROG_SEC("cgroup/bind4", BPF_CGROUP_INET4_BIND),
BPF_SA_PROG_SEC("cgroup/bind6", BPF_CGROUP_INET6_BIND),
BPF_SA_PROG_SEC("cgroup/connect4", BPF_CGROUP_INET4_CONNECT),
--
2.19.0.rc0.228.g281dcd1b4d0-goog
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [bpf-next 3/3] selftests/bpf: test bpf flow dissection
2018-08-30 18:22 [bpf-next 0/3] Introduce eBPF flow dissector Petar Penkov
2018-08-30 18:22 ` [bpf-next 1/3] flow_dissector: implements flow dissector BPF hook Petar Penkov
2018-08-30 18:23 ` [bpf-next 2/3] flow_dissector: implements eBPF parser Petar Penkov
@ 2018-08-30 18:23 ` Petar Penkov
2 siblings, 0 replies; 7+ messages in thread
From: Petar Penkov @ 2018-08-30 18:23 UTC (permalink / raw)
To: netdev
Cc: davem, ast, daniel, simon.horman, ecree, songliubraving, tom,
Petar Penkov, Willem de Bruijn
From: Petar Penkov <ppenkov@google.com>
Adds a test that sends different types of packets over multiple
tunnels and verifies that valid packets are dissected correctly. To do
so, a tc-flower rule is added to drop packets on UDP src port 9, and
packets are sent from ports 8, 9, and 10. Only the packets on port 9
should be dropped. Because tc-flower relies on the flow dissector to
match flows, correct classification demonstrates correct dissection.
Also add support logic to load the BPF program and to inject the test
packets.
Signed-off-by: Petar Penkov <ppenkov@google.com>
Signed-off-by: Willem de Bruijn <willemb@google.com>
---
tools/testing/selftests/bpf/.gitignore | 2 +
tools/testing/selftests/bpf/Makefile | 6 +-
tools/testing/selftests/bpf/config | 1 +
.../selftests/bpf/flow_dissector_load.c | 140 ++++
.../selftests/bpf/test_flow_dissector.c | 782 ++++++++++++++++++
.../selftests/bpf/test_flow_dissector.sh | 115 +++
tools/testing/selftests/bpf/with_addr.sh | 54 ++
tools/testing/selftests/bpf/with_tunnels.sh | 36 +
8 files changed, 1134 insertions(+), 2 deletions(-)
create mode 100644 tools/testing/selftests/bpf/flow_dissector_load.c
create mode 100644 tools/testing/selftests/bpf/test_flow_dissector.c
create mode 100755 tools/testing/selftests/bpf/test_flow_dissector.sh
create mode 100755 tools/testing/selftests/bpf/with_addr.sh
create mode 100755 tools/testing/selftests/bpf/with_tunnels.sh
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index 49938d72cf63..e61a85ac4b79 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -19,3 +19,5 @@ test_btf
test_sockmap
test_lirc_mode2_user
get_cgroup_id_user
+test_flow_dissector
+flow_dissector_load
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index e65f50f9185e..fd3851d5c079 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -47,10 +47,12 @@ TEST_PROGS := test_kmod.sh \
test_tunnel.sh \
test_lwt_seg6local.sh \
test_lirc_mode2.sh \
- test_skb_cgroup_id.sh
+ test_skb_cgroup_id.sh \
+ test_flow_dissector.sh
# Compile but not part of 'make run_tests'
-TEST_GEN_PROGS_EXTENDED = test_libbpf_open test_sock_addr test_skb_cgroup_id_user
+TEST_GEN_PROGS_EXTENDED = test_libbpf_open test_sock_addr test_skb_cgroup_id_user \
+ flow_dissector_load test_flow_dissector
include ../lib.mk
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index b4994a94968b..3655508f95fd 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -18,3 +18,4 @@ CONFIG_CRYPTO_HMAC=m
CONFIG_CRYPTO_SHA256=m
CONFIG_VXLAN=y
CONFIG_GENEVE=y
+CONFIG_NET_CLS_FLOWER=m
diff --git a/tools/testing/selftests/bpf/flow_dissector_load.c b/tools/testing/selftests/bpf/flow_dissector_load.c
new file mode 100644
index 000000000000..d3273b5b3173
--- /dev/null
+++ b/tools/testing/selftests/bpf/flow_dissector_load.c
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <error.h>
+#include <errno.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+const char *cfg_pin_path = "/sys/fs/bpf/flow_dissector";
+const char *cfg_map_name = "jmp_table";
+bool cfg_attach = true;
+char *cfg_section_name;
+char *cfg_path_name;
+
+static void load_and_attach_program(void)
+{
+ struct bpf_program *prog, *main_prog;
+ struct bpf_map *prog_array;
+ int i, fd, prog_fd, ret;
+ struct bpf_object *obj;
+ int prog_array_fd;
+
+ ret = bpf_prog_load(cfg_path_name, BPF_PROG_TYPE_FLOW_DISSECTOR, &obj,
+ &prog_fd);
+ if (ret)
+ error(1, 0, "bpf_prog_load %s", cfg_path_name);
+
+ main_prog = bpf_object__find_program_by_title(obj, cfg_section_name);
+ if (!main_prog)
+ error(1, 0, "bpf_object__find_program_by_title %s",
+ cfg_section_name);
+
+ prog_fd = bpf_program__fd(main_prog);
+ if (prog_fd < 0)
+ error(1, 0, "bpf_program__fd");
+
+ prog_array = bpf_object__find_map_by_name(obj, cfg_map_name);
+ if (!prog_array)
+ error(1, 0, "bpf_object__find_map_by_name %s", cfg_map_name);
+
+ prog_array_fd = bpf_map__fd(prog_array);
+ if (prog_array_fd < 0)
+ error(1, 0, "bpf_map__fd %s", cfg_map_name);
+
+ i = 0;
+ bpf_object__for_each_program(prog, obj) {
+ fd = bpf_program__fd(prog);
+ if (fd < 0)
+ error(1, 0, "bpf_program__fd");
+
+ if (fd != prog_fd) {
+ printf("%d: %s\n", i, bpf_program__title(prog, false));
+ bpf_map_update_elem(prog_array_fd, &i, &fd, BPF_ANY);
+ ++i;
+ }
+ }
+
+ ret = bpf_prog_attach(prog_fd, 0 /* Ignore */, BPF_FLOW_DISSECTOR, 0);
+ if (ret)
+ error(1, 0, "bpf_prog_attach %s", cfg_path_name);
+
+ ret = bpf_object__pin(obj, cfg_pin_path);
+ if (ret)
+ error(1, 0, "bpf_object__pin %s", cfg_pin_path);
+
+}
+
+static void detach_program(void)
+{
+ char command[64];
+ int ret;
+
+ ret = bpf_prog_detach(0, BPF_FLOW_DISSECTOR);
+ if (ret)
+ error(1, 0, "bpf_prog_detach");
+
+ /* To unpin, it is necessary and sufficient to just remove this dir */
+ sprintf(command, "rm -r %s", cfg_pin_path);
+ ret = system(command);
+ if (ret)
+ error(1, errno, command);
+}
+
+static void parse_opts(int argc, char **argv)
+{
+ bool attach = false;
+ bool detach = false;
+ int c;
+
+ while ((c = getopt(argc, argv, "adp:s:")) != -1) {
+ switch (c) {
+ case 'a':
+ if (detach)
+ error(1, 0, "attach/detach are exclusive");
+ attach = true;
+ break;
+ case 'd':
+ if (attach)
+ error(1, 0, "attach/detach are exclusive");
+ detach = true;
+ break;
+ case 'p':
+ if (cfg_path_name)
+ error(1, 0, "only one prog name can be given");
+
+ cfg_path_name = optarg;
+ break;
+ case 's':
+ if (cfg_section_name)
+ error(1, 0, "only one section can be given");
+
+ cfg_section_name = optarg;
+ break;
+ }
+ }
+
+ if (detach)
+ cfg_attach = false;
+
+ if (cfg_attach && !cfg_path_name)
+ error(1, 0, "must provide a path to the BPF program");
+
+ if (cfg_attach && !cfg_section_name)
+ error(1, 0, "must provide a section name");
+}
+
+int main(int argc, char **argv)
+{
+ parse_opts(argc, argv);
+ if (cfg_attach)
+ load_and_attach_program();
+ else
+ detach_program();
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/test_flow_dissector.c b/tools/testing/selftests/bpf/test_flow_dissector.c
new file mode 100644
index 000000000000..12b784afba31
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_flow_dissector.c
@@ -0,0 +1,782 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Inject packets with all sorts of encapsulation into the kernel.
+ *
+ * IPv4/IPv6 outer layer 3
+ * GRE/GUE/BARE outer layer 4, where bare is IPIP/SIT/IPv4-in-IPv6/..
+ * IPv4/IPv6 inner layer 3
+ */
+
+#define _GNU_SOURCE
+
+#include <stddef.h>
+#include <arpa/inet.h>
+#include <asm/byteorder.h>
+#include <error.h>
+#include <errno.h>
+#include <linux/if_packet.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ipv6.h>
+#include <netinet/ip.h>
+#include <netinet/in.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#define CFG_PORT_INNER 8000
+
+/* Add some protocol definitions that do not exist in userspace */
+
+struct grehdr {
+ uint16_t unused;
+ uint16_t protocol;
+} __attribute__((packed));
+
+struct guehdr {
+ union {
+ struct {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+ __u8 hlen:5,
+ control:1,
+ version:2;
+#elif defined (__BIG_ENDIAN_BITFIELD)
+ __u8 version:2,
+ control:1,
+ hlen:5;
+#else
+#error "Please fix <asm/byteorder.h>"
+#endif
+ __u8 proto_ctype;
+ __be16 flags;
+ };
+ __be32 word;
+ };
+};
+
+static uint8_t cfg_dsfield_inner;
+static uint8_t cfg_dsfield_outer;
+static uint8_t cfg_encap_proto;
+static bool cfg_expect_failure = false;
+static int cfg_l3_extra = AF_UNSPEC; /* optional SIT prefix */
+static int cfg_l3_inner = AF_UNSPEC;
+static int cfg_l3_outer = AF_UNSPEC;
+static int cfg_num_pkt = 10;
+static int cfg_num_secs = 0;
+static char cfg_payload_char = 'a';
+static int cfg_payload_len = 100;
+static int cfg_port_gue = 6080;
+static bool cfg_only_rx;
+static bool cfg_only_tx;
+static int cfg_src_port = 9;
+
+static char buf[ETH_DATA_LEN];
+
+#define INIT_ADDR4(name, addr4, port) \
+ static struct sockaddr_in name = { \
+ .sin_family = AF_INET, \
+ .sin_port = __constant_htons(port), \
+ .sin_addr.s_addr = __constant_htonl(addr4), \
+ };
+
+#define INIT_ADDR6(name, addr6, port) \
+ static struct sockaddr_in6 name = { \
+ .sin6_family = AF_INET6, \
+ .sin6_port = __constant_htons(port), \
+ .sin6_addr = addr6, \
+ };
+
+INIT_ADDR4(in_daddr4, INADDR_LOOPBACK, CFG_PORT_INNER)
+INIT_ADDR4(in_saddr4, INADDR_LOOPBACK + 2, 0)
+INIT_ADDR4(out_daddr4, INADDR_LOOPBACK, 0)
+INIT_ADDR4(out_saddr4, INADDR_LOOPBACK + 1, 0)
+INIT_ADDR4(extra_daddr4, INADDR_LOOPBACK, 0)
+INIT_ADDR4(extra_saddr4, INADDR_LOOPBACK + 1, 0)
+
+INIT_ADDR6(in_daddr6, IN6ADDR_LOOPBACK_INIT, CFG_PORT_INNER)
+INIT_ADDR6(in_saddr6, IN6ADDR_LOOPBACK_INIT, 0)
+INIT_ADDR6(out_daddr6, IN6ADDR_LOOPBACK_INIT, 0)
+INIT_ADDR6(out_saddr6, IN6ADDR_LOOPBACK_INIT, 0)
+INIT_ADDR6(extra_daddr6, IN6ADDR_LOOPBACK_INIT, 0)
+INIT_ADDR6(extra_saddr6, IN6ADDR_LOOPBACK_INIT, 0)
+
+static unsigned long util_gettime(void)
+{
+ struct timeval tv;
+
+ gettimeofday(&tv, NULL);
+ return (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
+}
+
+static void util_printaddr(const char *msg, struct sockaddr *addr)
+{
+ unsigned long off = 0;
+ char nbuf[INET6_ADDRSTRLEN];
+
+ switch (addr->sa_family) {
+ case PF_INET:
+ off = __builtin_offsetof(struct sockaddr_in, sin_addr);
+ break;
+ case PF_INET6:
+ off = __builtin_offsetof(struct sockaddr_in6, sin6_addr);
+ break;
+ default:
+ error(1, 0, "printaddr: unsupported family %u\n",
+ addr->sa_family);
+ }
+
+ if (!inet_ntop(addr->sa_family, ((void *) addr) + off, nbuf,
+ sizeof(nbuf)))
+ error(1, errno, "inet_ntop");
+
+ fprintf(stderr, "%s: %s\n", msg, nbuf);
+}
+
+static unsigned long add_csum_hword(const uint16_t *start, int num_u16)
+{
+ unsigned long sum = 0;
+ int i;
+
+ for (i = 0; i < num_u16; i++)
+ sum += start[i];
+
+ return sum;
+}
+
+static uint16_t build_ip_csum(const uint16_t *start, int num_u16,
+ unsigned long sum)
+{
+ sum += add_csum_hword(start, num_u16);
+
+ while (sum >> 16)
+ sum = (sum & 0xffff) + (sum >> 16);
+
+ return ~sum;
+}
+
+static void build_ipv4_header(void *header, uint8_t proto,
+ uint32_t src, uint32_t dst,
+ int payload_len, uint8_t tos)
+{
+ struct iphdr *iph = header;
+
+ iph->ihl = 5;
+ iph->version = 4;
+ iph->tos = tos;
+ iph->ttl = 8;
+ iph->tot_len = htons(sizeof(*iph) + payload_len);
+ iph->id = htons(1337);
+ iph->protocol = proto;
+ iph->saddr = src;
+ iph->daddr = dst;
+ iph->check = build_ip_csum((void *) iph, iph->ihl << 1, 0);
+}
+
+static void ipv6_set_dsfield(struct ipv6hdr *ip6h, uint8_t dsfield)
+{
+ uint16_t val, *ptr = (uint16_t *)ip6h;
+
+ val = ntohs(*ptr);
+ val &= 0xF00F;
+ val |= ((uint16_t) dsfield) << 4;
+ *ptr = htons(val);
+}
+
+static void build_ipv6_header(void *header, uint8_t proto,
+ struct sockaddr_in6 *src,
+ struct sockaddr_in6 *dst,
+ int payload_len, uint8_t dsfield)
+{
+ struct ipv6hdr *ip6h = header;
+
+ ip6h->version = 6;
+ ip6h->payload_len = htons(payload_len);
+ ip6h->nexthdr = proto;
+ ip6h->hop_limit = 8;
+ ipv6_set_dsfield(ip6h, dsfield);
+
+ memcpy(&ip6h->saddr, &src->sin6_addr, sizeof(ip6h->saddr));
+ memcpy(&ip6h->daddr, &dst->sin6_addr, sizeof(ip6h->daddr));
+}
+
+static uint16_t build_udp_v4_csum(const struct iphdr *iph,
+ const struct udphdr *udph,
+ int num_words)
+{
+ unsigned long pseudo_sum;
+ int num_u16 = sizeof(iph->saddr); /* halfwords: twice byte len */
+
+ pseudo_sum = add_csum_hword((void *) &iph->saddr, num_u16);
+ pseudo_sum += htons(IPPROTO_UDP);
+ pseudo_sum += udph->len;
+ return build_ip_csum((void *) udph, num_words, pseudo_sum);
+}
+
+static uint16_t build_udp_v6_csum(const struct ipv6hdr *ip6h,
+ const struct udphdr *udph,
+ int num_words)
+{
+ unsigned long pseudo_sum;
+ int num_u16 = sizeof(ip6h->saddr); /* halfwords: twice byte len */
+
+ pseudo_sum = add_csum_hword((void *) &ip6h->saddr, num_u16);
+ pseudo_sum += htons(ip6h->nexthdr);
+ pseudo_sum += ip6h->payload_len;
+ return build_ip_csum((void *) udph, num_words, pseudo_sum);
+}
+
+static void build_udp_header(void *header, int payload_len,
+ uint16_t dport, int family)
+{
+ struct udphdr *udph = header;
+ int len = sizeof(*udph) + payload_len;
+
+ udph->source = htons(cfg_src_port);
+ udph->dest = htons(dport);
+ udph->len = htons(len);
+ udph->check = 0;
+ if (family == AF_INET)
+ udph->check = build_udp_v4_csum(header - sizeof(struct iphdr),
+ udph, len >> 1);
+ else
+ udph->check = build_udp_v6_csum(header - sizeof(struct ipv6hdr),
+ udph, len >> 1);
+}
+
+static void build_gue_header(void *header, uint8_t proto)
+{
+ struct guehdr *gueh = header;
+
+ gueh->proto_ctype = proto;
+}
+
+static void build_gre_header(void *header, uint16_t proto)
+{
+ struct grehdr *greh = header;
+
+ greh->protocol = htons(proto);
+}
+
+static int l3_length(int family)
+{
+ if (family == AF_INET)
+ return sizeof(struct iphdr);
+ else
+ return sizeof(struct ipv6hdr);
+}
+
+static int build_packet(void)
+{
+ int ol3_len = 0, ol4_len = 0, il3_len = 0, il4_len = 0;
+ int el3_len = 0;
+
+ if (cfg_l3_extra)
+ el3_len = l3_length(cfg_l3_extra);
+
+ /* calculate header offsets */
+ if (cfg_encap_proto) {
+ ol3_len = l3_length(cfg_l3_outer);
+
+ if (cfg_encap_proto == IPPROTO_GRE)
+ ol4_len = sizeof(struct grehdr);
+ else if (cfg_encap_proto == IPPROTO_UDP)
+ ol4_len = sizeof(struct udphdr) + sizeof(struct guehdr);
+ }
+
+ il3_len = l3_length(cfg_l3_inner);
+ il4_len = sizeof(struct udphdr);
+
+ if (el3_len + ol3_len + ol4_len + il3_len + il4_len + cfg_payload_len >=
+ sizeof(buf))
+ error(1, 0, "packet too large\n");
+
+ /*
+ * Fill packet from inside out, to calculate correct checksums.
+ * But create ip before udp headers, as udp uses ip for pseudo-sum.
+ */
+ memset(buf + el3_len + ol3_len + ol4_len + il3_len + il4_len,
+ cfg_payload_char, cfg_payload_len);
+
+ /* add zero byte for udp csum padding */
+ buf[el3_len + ol3_len + ol4_len + il3_len + il4_len + cfg_payload_len] = 0;
+
+ switch (cfg_l3_inner) {
+ case PF_INET:
+ build_ipv4_header(buf + el3_len + ol3_len + ol4_len,
+ IPPROTO_UDP,
+ in_saddr4.sin_addr.s_addr,
+ in_daddr4.sin_addr.s_addr,
+ il4_len + cfg_payload_len,
+ cfg_dsfield_inner);
+ break;
+ case PF_INET6:
+ build_ipv6_header(buf + el3_len + ol3_len + ol4_len,
+ IPPROTO_UDP,
+ &in_saddr6, &in_daddr6,
+ il4_len + cfg_payload_len,
+ cfg_dsfield_inner);
+ break;
+ }
+
+ build_udp_header(buf + el3_len + ol3_len + ol4_len + il3_len,
+ cfg_payload_len, CFG_PORT_INNER, cfg_l3_inner);
+
+ if (!cfg_encap_proto)
+ return il3_len + il4_len + cfg_payload_len;
+
+ switch (cfg_l3_outer) {
+ case PF_INET:
+ build_ipv4_header(buf + el3_len, cfg_encap_proto,
+ out_saddr4.sin_addr.s_addr,
+ out_daddr4.sin_addr.s_addr,
+ ol4_len + il3_len + il4_len + cfg_payload_len,
+ cfg_dsfield_outer);
+ break;
+ case PF_INET6:
+ build_ipv6_header(buf + el3_len, cfg_encap_proto,
+ &out_saddr6, &out_daddr6,
+ ol4_len + il3_len + il4_len + cfg_payload_len,
+ cfg_dsfield_outer);
+ break;
+ }
+
+ switch (cfg_encap_proto) {
+ case IPPROTO_UDP:
+ build_gue_header(buf + el3_len + ol3_len + ol4_len -
+ sizeof(struct guehdr),
+ cfg_l3_inner == PF_INET ? IPPROTO_IPIP
+ : IPPROTO_IPV6);
+ build_udp_header(buf + el3_len + ol3_len,
+ sizeof(struct guehdr) + il3_len + il4_len +
+ cfg_payload_len,
+ cfg_port_gue, cfg_l3_outer);
+ break;
+ case IPPROTO_GRE:
+ build_gre_header(buf + el3_len + ol3_len,
+ cfg_l3_inner == PF_INET ? ETH_P_IP
+ : ETH_P_IPV6);
+ break;
+ }
+
+ switch (cfg_l3_extra) {
+ case PF_INET:
+ build_ipv4_header(buf,
+ cfg_l3_outer == PF_INET ? IPPROTO_IPIP
+ : IPPROTO_IPV6,
+ extra_saddr4.sin_addr.s_addr,
+ extra_daddr4.sin_addr.s_addr,
+ ol3_len + ol4_len + il3_len + il4_len +
+ cfg_payload_len, 0);
+ break;
+ case PF_INET6:
+ build_ipv6_header(buf,
+ cfg_l3_outer == PF_INET ? IPPROTO_IPIP
+ : IPPROTO_IPV6,
+ &extra_saddr6, &extra_daddr6,
+ ol3_len + ol4_len + il3_len + il4_len +
+ cfg_payload_len, 0);
+ break;
+ }
+
+ return el3_len + ol3_len + ol4_len + il3_len + il4_len +
+ cfg_payload_len;
+}
+
+/* sender transmits encapsulated over RAW or unencap'd over UDP */
+static int setup_tx(void)
+{
+ int family, fd, ret;
+
+ if (cfg_l3_extra)
+ family = cfg_l3_extra;
+ else if (cfg_l3_outer)
+ family = cfg_l3_outer;
+ else
+ family = cfg_l3_inner;
+
+ fd = socket(family, SOCK_RAW, IPPROTO_RAW);
+ if (fd == -1)
+ error(1, errno, "socket tx");
+
+ if (cfg_l3_extra) {
+ if (cfg_l3_extra == PF_INET)
+ ret = connect(fd, (void *) &extra_daddr4,
+ sizeof(extra_daddr4));
+ else
+ ret = connect(fd, (void *) &extra_daddr6,
+ sizeof(extra_daddr6));
+ if (ret)
+ error(1, errno, "connect tx");
+ } else if (cfg_l3_outer) {
+ /* connect to destination if not encapsulated */
+ if (cfg_l3_outer == PF_INET)
+ ret = connect(fd, (void *) &out_daddr4,
+ sizeof(out_daddr4));
+ else
+ ret = connect(fd, (void *) &out_daddr6,
+ sizeof(out_daddr6));
+ if (ret)
+ error(1, errno, "connect tx");
+ } else {
+ /* otherwise using loopback */
+ if (cfg_l3_inner == PF_INET)
+ ret = connect(fd, (void *) &in_daddr4,
+ sizeof(in_daddr4));
+ else
+ ret = connect(fd, (void *) &in_daddr6,
+ sizeof(in_daddr6));
+ if (ret)
+ error(1, errno, "connect tx");
+ }
+
+ return fd;
+}
+
+/* receiver reads unencapsulated UDP */
+static int setup_rx(void)
+{
+ int fd, ret;
+
+ fd = socket(cfg_l3_inner, SOCK_DGRAM, 0);
+ if (fd == -1)
+ error(1, errno, "socket rx");
+
+ if (cfg_l3_inner == PF_INET)
+ ret = bind(fd, (void *) &in_daddr4, sizeof(in_daddr4));
+ else
+ ret = bind(fd, (void *) &in_daddr6, sizeof(in_daddr6));
+ if (ret)
+ error(1, errno, "bind rx");
+
+ return fd;
+}
+
+static int do_tx(int fd, const char *pkt, int len)
+{
+ int ret;
+
+ ret = write(fd, pkt, len);
+ if (ret == -1)
+ error(1, errno, "send");
+ if (ret != len)
+ error(1, errno, "send: len (%d < %d)\n", ret, len);
+
+ return 1;
+}
+
+static int do_poll(int fd, short events, int timeout)
+{
+ struct pollfd pfd;
+ int ret;
+
+ pfd.fd = fd;
+ pfd.events = events;
+
+ ret = poll(&pfd, 1, timeout);
+ if (ret == -1)
+ error(1, errno, "poll");
+ if (ret && !(pfd.revents & POLLIN))
+ error(1, errno, "poll: unexpected event 0x%x\n", pfd.revents);
+
+ return ret;
+}
+
+static int do_rx(int fd)
+{
+ char rbuf;
+ int ret, num = 0;
+
+ while (1) {
+ ret = recv(fd, &rbuf, 1, MSG_DONTWAIT);
+ if (ret == -1 && errno == EAGAIN)
+ break;
+ if (ret == -1)
+ error(1, errno, "recv");
+ if (rbuf != cfg_payload_char)
+ error(1, 0, "recv: payload mismatch");
+ num++;
+ };
+
+ return num;
+}
+
+static int do_main(void)
+{
+ unsigned long tstop, treport, tcur;
+ int fdt = -1, fdr = -1, len, tx = 0, rx = 0;
+
+ if (!cfg_only_tx)
+ fdr = setup_rx();
+ if (!cfg_only_rx)
+ fdt = setup_tx();
+
+ len = build_packet();
+
+ tcur = util_gettime();
+ treport = tcur + 1000;
+ tstop = tcur + (cfg_num_secs * 1000);
+
+ while (1) {
+ if (!cfg_only_rx)
+ tx += do_tx(fdt, buf, len);
+
+ if (!cfg_only_tx)
+ rx += do_rx(fdr);
+
+ if (cfg_num_secs) {
+ tcur = util_gettime();
+ if (tcur >= tstop)
+ break;
+ if (tcur >= treport) {
+ fprintf(stderr, "pkts: tx=%u rx=%u\n", tx, rx);
+ tx = 0;
+ rx = 0;
+ treport = tcur + 1000;
+ }
+ } else {
+ if (tx == cfg_num_pkt)
+ break;
+ }
+ }
+
+ /* read straggler packets, if any */
+ if (rx < tx) {
+ tstop = util_gettime() + 100;
+ while (rx < tx) {
+ tcur = util_gettime();
+ if (tcur >= tstop)
+ break;
+
+ do_poll(fdr, POLLIN, tstop - tcur);
+ rx += do_rx(fdr);
+ }
+ }
+
+ fprintf(stderr, "pkts: tx=%u rx=%u\n", tx, rx);
+
+ if (fdr != -1 && close(fdr))
+ error(1, errno, "close rx");
+ if (fdt != -1 && close(fdt))
+ error(1, errno, "close tx");
+
+ /*
+ * success (== 0) only if received all packets
+ * unless failure is expected, in which case none must arrive.
+ */
+ if (cfg_expect_failure)
+ return rx != 0;
+ else
+ return rx != tx;
+}
+
+
+static void __attribute__((noreturn)) usage(const char *filepath)
+{
+ fprintf(stderr, "Usage: %s [-e gre|gue|bare|none] [-i 4|6] [-l len] "
+ "[-O 4|6] [-o 4|6] [-n num] [-t secs] [-R] [-T] "
+ "[-s <osrc> [-d <odst>] [-S <isrc>] [-D <idst>] "
+ "[-x <otos>] [-X <itos>] [-f <isport>] [-F]\n",
+ filepath);
+ exit(1);
+}
+
+static void parse_addr(int family, void *addr, const char *optarg)
+{
+ int ret;
+
+ ret = inet_pton(family, optarg, addr);
+ if (ret == -1)
+ error(1, errno, "inet_pton");
+ if (ret == 0)
+ error(1, 0, "inet_pton: bad string");
+}
+
+static void parse_addr4(struct sockaddr_in *addr, const char *optarg)
+{
+ parse_addr(AF_INET, &addr->sin_addr, optarg);
+}
+
+static void parse_addr6(struct sockaddr_in6 *addr, const char *optarg)
+{
+ parse_addr(AF_INET6, &addr->sin6_addr, optarg);
+}
+
+static int parse_protocol_family(const char *filepath, const char *optarg)
+{
+ if (!strcmp(optarg, "4"))
+ return PF_INET;
+ if (!strcmp(optarg, "6"))
+ return PF_INET6;
+
+ usage(filepath);
+}
+
+static void parse_opts(int argc, char **argv)
+{
+ int c;
+
+ while ((c = getopt(argc, argv, "d:D:e:f:Fhi:l:n:o:O:Rs:S:t:Tx:X:")) != -1) {
+ switch (c) {
+ case 'd':
+ if (cfg_l3_outer == AF_UNSPEC)
+ error(1, 0, "-d must be preceded by -o");
+ if (cfg_l3_outer == AF_INET)
+ parse_addr4(&out_daddr4, optarg);
+ else
+ parse_addr6(&out_daddr6, optarg);
+ break;
+ case 'D':
+ if (cfg_l3_inner == AF_UNSPEC)
+ error(1, 0, "-D must be preceded by -i");
+ if (cfg_l3_inner == AF_INET)
+ parse_addr4(&in_daddr4, optarg);
+ else
+ parse_addr6(&in_daddr6, optarg);
+ break;
+ case 'e':
+ if (!strcmp(optarg, "gre"))
+ cfg_encap_proto = IPPROTO_GRE;
+ else if (!strcmp(optarg, "gue"))
+ cfg_encap_proto = IPPROTO_UDP;
+ else if (!strcmp(optarg, "bare"))
+ cfg_encap_proto = IPPROTO_IPIP;
+ else if (!strcmp(optarg, "none"))
+ cfg_encap_proto = IPPROTO_IP; /* == 0 */
+ else
+ usage(argv[0]);
+ break;
+ case 'f':
+ cfg_src_port = strtol(optarg, NULL, 0);
+ break;
+ case 'F':
+ cfg_expect_failure = true;
+ break;
+ case 'h':
+ usage(argv[0]);
+ break;
+ case 'i':
+ if (!strcmp(optarg, "4"))
+ cfg_l3_inner = PF_INET;
+ else if (!strcmp(optarg, "6"))
+ cfg_l3_inner = PF_INET6;
+ else
+ usage(argv[0]);
+ break;
+ case 'l':
+ cfg_payload_len = strtol(optarg, NULL, 0);
+ break;
+ case 'n':
+ cfg_num_pkt = strtol(optarg, NULL, 0);
+ break;
+ case 'o':
+ cfg_l3_outer = parse_protocol_family(argv[0], optarg);
+ break;
+ case 'O':
+ cfg_l3_extra = parse_protocol_family(argv[0], optarg);
+ break;
+ case 'R':
+ cfg_only_rx = true;
+ break;
+ case 's':
+ if (cfg_l3_outer == AF_INET)
+ parse_addr4(&out_saddr4, optarg);
+ else
+ parse_addr6(&out_saddr6, optarg);
+ break;
+ case 'S':
+ if (cfg_l3_inner == AF_INET)
+ parse_addr4(&in_saddr4, optarg);
+ else
+ parse_addr6(&in_saddr6, optarg);
+ break;
+ case 't':
+ cfg_num_secs = strtol(optarg, NULL, 0);
+ break;
+ case 'T':
+ cfg_only_tx = true;
+ break;
+ case 'x':
+ cfg_dsfield_outer = strtol(optarg, NULL, 0);
+ break;
+ case 'X':
+ cfg_dsfield_inner = strtol(optarg, NULL, 0);
+ break;
+ }
+ }
+
+ if (cfg_only_rx && cfg_only_tx)
+ error(1, 0, "options: cannot combine rx-only and tx-only");
+
+ if (cfg_encap_proto && cfg_l3_outer == AF_UNSPEC)
+ error(1, 0, "options: must specify outer with encap");
+ else if ((!cfg_encap_proto) && cfg_l3_outer != AF_UNSPEC)
+ error(1, 0, "options: cannot combine no-encap and outer");
+ else if ((!cfg_encap_proto) && cfg_l3_extra != AF_UNSPEC)
+ error(1, 0, "options: cannot combine no-encap and extra");
+
+ if (cfg_l3_inner == AF_UNSPEC)
+ cfg_l3_inner = AF_INET6;
+ if (cfg_l3_inner == AF_INET6 && cfg_encap_proto == IPPROTO_IPIP)
+ cfg_encap_proto = IPPROTO_IPV6;
+
+ /* RFC 6040 4.2:
+ * on decap, if outer encountered congestion (CE == 0x3),
+ * but inner cannot encode ECN (NoECT == 0x0), then drop packet.
+ */
+ if (((cfg_dsfield_outer & 0x3) == 0x3) &&
+ ((cfg_dsfield_inner & 0x3) == 0x0))
+ cfg_expect_failure = true;
+}
+
+static void print_opts(void)
+{
+ if (cfg_l3_inner == PF_INET6) {
+ util_printaddr("inner.dest6", (void *) &in_daddr6);
+ util_printaddr("inner.source6", (void *) &in_saddr6);
+ } else {
+ util_printaddr("inner.dest4", (void *) &in_daddr4);
+ util_printaddr("inner.source4", (void *) &in_saddr4);
+ }
+
+ if (!cfg_l3_outer)
+ return;
+
+ fprintf(stderr, "encap proto: %u\n", cfg_encap_proto);
+
+ if (cfg_l3_outer == PF_INET6) {
+ util_printaddr("outer.dest6", (void *) &out_daddr6);
+ util_printaddr("outer.source6", (void *) &out_saddr6);
+ } else {
+ util_printaddr("outer.dest4", (void *) &out_daddr4);
+ util_printaddr("outer.source4", (void *) &out_saddr4);
+ }
+
+ if (!cfg_l3_extra)
+ return;
+
+ if (cfg_l3_outer == PF_INET6) {
+ util_printaddr("extra.dest6", (void *) &extra_daddr6);
+ util_printaddr("extra.source6", (void *) &extra_saddr6);
+ } else {
+ util_printaddr("extra.dest4", (void *) &extra_daddr4);
+ util_printaddr("extra.source4", (void *) &extra_saddr4);
+ }
+
+}
+
+int main(int argc, char **argv)
+{
+ parse_opts(argc, argv);
+ print_opts();
+ return do_main();
+}
diff --git a/tools/testing/selftests/bpf/test_flow_dissector.sh b/tools/testing/selftests/bpf/test_flow_dissector.sh
new file mode 100755
index 000000000000..c0fb073b5eab
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_flow_dissector.sh
@@ -0,0 +1,115 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Load BPF flow dissector and verify it correctly dissects traffic
+export TESTNAME=test_flow_dissector
+unmount=0
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+msg="skip all tests:"
+if [ $UID != 0 ]; then
+ echo $msg please run this as root >&2
+ exit $ksft_skip
+fi
+
+# This test needs to be run in a network namespace with in_netns.sh. Check if
+# this is the case and run it with in_netns.sh if it is being run in the root
+# namespace.
+if [[ -z $(ip netns identify $$) ]]; then
+ ../net/in_netns.sh "$0" "$@"
+ exit $?
+fi
+
+# Determine selftest success via shell exit code
+exit_handler()
+{
+ if (( $? == 0 )); then
+ echo "selftests: $TESTNAME [PASS]";
+ else
+ echo "selftests: $TESTNAME [FAILED]";
+ fi
+
+ set +e
+
+ # Cleanup
+ tc filter del dev lo ingress pref 1337 2> /dev/null
+ tc qdisc del dev lo ingress 2> /dev/null
+ ./flow_dissector_load -d 2> /dev/null
+ if [ $unmount -ne 0 ]; then
+ umount bpffs 2> /dev/null
+ fi
+}
+
+# Exit script immediately (well catched by trap handler) if any
+# program/thing exits with a non-zero status.
+set -e
+
+# (Use 'trap -l' to list meaning of numbers)
+trap exit_handler 0 2 3 6 9
+
+# Mount BPF file system
+if /bin/mount | grep /sys/fs/bpf > /dev/null; then
+ echo "bpffs already mounted"
+else
+ echo "bpffs not mounted. Mounting..."
+ unmount=1
+ /bin/mount bpffs /sys/fs/bpf -t bpf
+fi
+
+# Attach BPF program
+./flow_dissector_load -p bpf_flow.o -s dissect
+
+# Setup
+tc qdisc add dev lo ingress
+
+echo "Testing IPv4..."
+# Drops all IP/UDP packets coming from port 9
+tc filter add dev lo parent ffff: protocol ip pref 1337 flower ip_proto \
+ udp src_port 9 action drop
+
+# Send 10 IPv4/UDP packets from port 8. Filter should not drop any.
+./test_flow_dissector -i 4 -f 8
+# Send 10 IPv4/UDP packets from port 9. Filter should drop all.
+./test_flow_dissector -i 4 -f 9 -F
+# Send 10 IPv4/UDP packets from port 10. Filter should not drop any.
+./test_flow_dissector -i 4 -f 10
+
+echo "Testing IPIP..."
+# Send 10 IPv4/IPv4/UDP packets from port 8. Filter should not drop any.
+./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e bare -i 4 \
+ -D 192.168.0.1 -S 1.1.1.1 -f 8
+# Send 10 IPv4/IPv4/UDP packets from port 9. Filter should drop all.
+./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e bare -i 4 \
+ -D 192.168.0.1 -S 1.1.1.1 -f 9 -F
+# Send 10 IPv4/IPv4/UDP packets from port 10. Filter should not drop any.
+./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e bare -i 4 \
+ -D 192.168.0.1 -S 1.1.1.1 -f 10
+
+echo "Testing IPv4 + GRE..."
+# Send 10 IPv4/GRE/IPv4/UDP packets from port 8. Filter should not drop any.
+./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e gre -i 4 \
+ -D 192.168.0.1 -S 1.1.1.1 -f 8
+# Send 10 IPv4/GRE/IPv4/UDP packets from port 9. Filter should drop all.
+./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e gre -i 4 \
+ -D 192.168.0.1 -S 1.1.1.1 -f 9 -F
+# Send 10 IPv4/GRE/IPv4/UDP packets from port 10. Filter should not drop any.
+./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e gre -i 4 \
+ -D 192.168.0.1 -S 1.1.1.1 -f 10
+
+tc filter del dev lo ingress pref 1337
+
+echo "Testing IPv6..."
+# Drops all IPv6/UDP packets coming from port 9
+tc filter add dev lo parent ffff: protocol ipv6 pref 1337 flower ip_proto \
+ udp src_port 9 action drop
+
+# Send 10 IPv6/UDP packets from port 8. Filter should not drop any.
+./test_flow_dissector -i 6 -f 8
+# Send 10 IPv6/UDP packets from port 9. Filter should drop all.
+./test_flow_dissector -i 6 -f 9 -F
+# Send 10 IPv6/UDP packets from port 10. Filter should not drop any.
+./test_flow_dissector -i 6 -f 10
+
+exit 0
diff --git a/tools/testing/selftests/bpf/with_addr.sh b/tools/testing/selftests/bpf/with_addr.sh
new file mode 100755
index 000000000000..ffcd3953f94c
--- /dev/null
+++ b/tools/testing/selftests/bpf/with_addr.sh
@@ -0,0 +1,54 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# add private ipv4 and ipv6 addresses to loopback
+
+readonly V6_INNER='100::a/128'
+readonly V4_INNER='192.168.0.1/32'
+
+if getopts ":s" opt; then
+ readonly SIT_DEV_NAME='sixtofourtest0'
+ readonly V6_SIT='2::/64'
+ readonly V4_SIT='172.17.0.1/32'
+ shift
+fi
+
+fail() {
+ echo "error: $*" 1>&2
+ exit 1
+}
+
+setup() {
+ ip -6 addr add "${V6_INNER}" dev lo || fail 'failed to setup v6 address'
+ ip -4 addr add "${V4_INNER}" dev lo || fail 'failed to setup v4 address'
+
+ if [[ -n "${V6_SIT}" ]]; then
+ ip link add "${SIT_DEV_NAME}" type sit remote any local any \
+ || fail 'failed to add sit'
+ ip link set dev "${SIT_DEV_NAME}" up \
+ || fail 'failed to bring sit device up'
+ ip -6 addr add "${V6_SIT}" dev "${SIT_DEV_NAME}" \
+ || fail 'failed to setup v6 SIT address'
+ ip -4 addr add "${V4_SIT}" dev "${SIT_DEV_NAME}" \
+ || fail 'failed to setup v4 SIT address'
+ fi
+
+ sleep 2 # avoid race causing bind to fail
+}
+
+cleanup() {
+ if [[ -n "${V6_SIT}" ]]; then
+ ip -4 addr del "${V4_SIT}" dev "${SIT_DEV_NAME}"
+ ip -6 addr del "${V6_SIT}" dev "${SIT_DEV_NAME}"
+ ip link del "${SIT_DEV_NAME}"
+ fi
+
+ ip -4 addr del "${V4_INNER}" dev lo
+ ip -6 addr del "${V6_INNER}" dev lo
+}
+
+trap cleanup EXIT
+
+setup
+"$@"
+exit "$?"
diff --git a/tools/testing/selftests/bpf/with_tunnels.sh b/tools/testing/selftests/bpf/with_tunnels.sh
new file mode 100755
index 000000000000..e24949ed3a20
--- /dev/null
+++ b/tools/testing/selftests/bpf/with_tunnels.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# setup tunnels for flow dissection test
+
+readonly SUFFIX="test_$(mktemp -u XXXX)"
+CONFIG="remote 127.0.0.2 local 127.0.0.1 dev lo"
+
+setup() {
+ ip link add "ipip_${SUFFIX}" type ipip ${CONFIG}
+ ip link add "gre_${SUFFIX}" type gre ${CONFIG}
+ ip link add "sit_${SUFFIX}" type sit ${CONFIG}
+
+ echo "tunnels before test:"
+ ip tunnel show
+
+ ip link set "ipip_${SUFFIX}" up
+ ip link set "gre_${SUFFIX}" up
+ ip link set "sit_${SUFFIX}" up
+}
+
+
+cleanup() {
+ ip tunnel del "ipip_${SUFFIX}"
+ ip tunnel del "gre_${SUFFIX}"
+ ip tunnel del "sit_${SUFFIX}"
+
+ echo "tunnels after test:"
+ ip tunnel show
+}
+
+trap cleanup EXIT
+
+setup
+"$@"
+exit "$?"
--
2.19.0.rc0.228.g281dcd1b4d0-goog
^ permalink raw reply related [flat|nested] 7+ messages in thread