From: Stanislav Fomichev <sdf@google.com>
To: netdev@vger.kernel.org, bpf@vger.kernel.org
Cc: davem@davemloft.net, ast@kernel.org, daniel@iogearbox.net,
simon.horman@netronome.com, willemb@google.com,
peterpenkov96@gmail.com, Stanislav Fomichev <sdf@google.com>
Subject: [PATCH bpf-next v5 1/6] flow_dissector: switch kernel context to struct bpf_flow_dissector
Date: Mon, 15 Apr 2019 10:37:56 -0700 [thread overview]
Message-ID: <20190415173801.257254-2-sdf@google.com> (raw)
In-Reply-To: <20190415173801.257254-1-sdf@google.com>
struct bpf_flow_dissector has a small subset of sk_buff fields that
flow dissector BPF program is allowed to access and an optional
pointer to real skb. Real skb is used only in bpf_skb_load_bytes
helper to read non-linear data.
The real motivation for this is to be able to call flow dissector
from eth_get_headlen context where we don't have an skb and need
to dissect raw bytes.
Signed-off-by: Stanislav Fomichev <sdf@google.com>
---
include/linux/skbuff.h | 4 ++
include/net/flow_dissector.h | 7 +++
include/net/sch_generic.h | 11 ++--
net/bpf/test_run.c | 4 --
net/core/filter.c | 105 +++++++++++++++++++++++++++--------
net/core/flow_dissector.c | 45 +++++++--------
6 files changed, 117 insertions(+), 59 deletions(-)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index a06275a618f0..4d2365ffa444 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1273,6 +1273,10 @@ static inline int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr)
}
#endif
+struct bpf_flow_dissector;
+bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx,
+ __be16 proto, int nhoff, int hlen);
+
struct bpf_flow_keys;
bool __skb_flow_bpf_dissect(struct bpf_prog *prog,
const struct sk_buff *skb,
diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index 2b26979efb48..7c5a8d9a8d2a 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h
@@ -305,4 +305,11 @@ static inline void *skb_flow_dissector_target(struct flow_dissector *flow_dissec
return ((char *)target_container) + flow_dissector->offset[key_id];
}
+struct bpf_flow_dissector {
+ struct bpf_flow_keys *flow_keys;
+ const struct sk_buff *skb;
+ void *data;
+ void *data_end;
+};
+
#endif
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index e8f85cd2afce..21f434f3ac9e 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -364,13 +364,10 @@ struct tcf_proto {
};
struct qdisc_skb_cb {
- union {
- struct {
- unsigned int pkt_len;
- u16 slave_dev_queue_mapping;
- u16 tc_classid;
- };
- struct bpf_flow_keys *flow_keys;
+ struct {
+ unsigned int pkt_len;
+ u16 slave_dev_queue_mapping;
+ u16 tc_classid;
};
#define QDISC_CB_PRIV_LEN 20
unsigned char data[QDISC_CB_PRIV_LEN];
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 2221573dacdb..006ad865f7fb 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -382,7 +382,6 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
u32 repeat = kattr->test.repeat;
struct bpf_flow_keys flow_keys;
u64 time_start, time_spent = 0;
- struct bpf_skb_data_end *cb;
u32 retval, duration;
struct sk_buff *skb;
struct sock *sk;
@@ -423,9 +422,6 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
current->nsproxy->net_ns->loopback_dev);
skb_reset_network_header(skb);
- cb = (struct bpf_skb_data_end *)skb->cb;
- cb->qdisc_cb.flow_keys = &flow_keys;
-
if (!repeat)
repeat = 1;
diff --git a/net/core/filter.c b/net/core/filter.c
index 95a27fdf9a40..52c2424226e9 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1730,6 +1730,40 @@ static const struct bpf_func_proto bpf_skb_load_bytes_proto = {
.arg4_type = ARG_CONST_SIZE,
};
+BPF_CALL_4(bpf_flow_dissector_load_bytes,
+ const struct bpf_flow_dissector *, ctx, u32, offset,
+ void *, to, u32, len)
+{
+ void *ptr;
+
+ if (unlikely(offset > 0xffff))
+ goto err_clear;
+
+ if (unlikely(!ctx->skb))
+ goto err_clear;
+
+ ptr = skb_header_pointer(ctx->skb, offset, len, to);
+ if (unlikely(!ptr))
+ goto err_clear;
+ if (ptr != to)
+ memcpy(to, ptr, len);
+
+ return 0;
+err_clear:
+ memset(to, 0, len);
+ return -EFAULT;
+}
+
+static const struct bpf_func_proto bpf_flow_dissector_load_bytes_proto = {
+ .func = bpf_flow_dissector_load_bytes,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_PTR_TO_UNINIT_MEM,
+ .arg4_type = ARG_CONST_SIZE,
+};
+
BPF_CALL_5(bpf_skb_load_bytes_relative, const struct sk_buff *, skb,
u32, offset, void *, to, u32, len, u32, start_header)
{
@@ -6119,7 +6153,7 @@ flow_dissector_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
case BPF_FUNC_skb_load_bytes:
- return &bpf_skb_load_bytes_proto;
+ return &bpf_flow_dissector_load_bytes_proto;
default:
return bpf_base_func_proto(func_id);
}
@@ -6246,9 +6280,7 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type
return false;
break;
case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
- if (size != sizeof(__u64))
- return false;
- break;
+ return false;
case bpf_ctx_range(struct __sk_buff, tstamp):
if (size != sizeof(__u64))
return false;
@@ -6283,7 +6315,6 @@ static bool sk_filter_is_valid_access(int off, int size,
case bpf_ctx_range(struct __sk_buff, data):
case bpf_ctx_range(struct __sk_buff, data_meta):
case bpf_ctx_range(struct __sk_buff, data_end):
- case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
case bpf_ctx_range_till(struct __sk_buff, family, local_port):
case bpf_ctx_range(struct __sk_buff, tstamp):
case bpf_ctx_range(struct __sk_buff, wire_len):
@@ -6310,7 +6341,6 @@ static bool cg_skb_is_valid_access(int off, int size,
switch (off) {
case bpf_ctx_range(struct __sk_buff, tc_classid):
case bpf_ctx_range(struct __sk_buff, data_meta):
- case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
case bpf_ctx_range(struct __sk_buff, wire_len):
return false;
case bpf_ctx_range(struct __sk_buff, data):
@@ -6356,7 +6386,6 @@ static bool lwt_is_valid_access(int off, int size,
case bpf_ctx_range(struct __sk_buff, tc_classid):
case bpf_ctx_range_till(struct __sk_buff, family, local_port):
case bpf_ctx_range(struct __sk_buff, data_meta):
- case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
case bpf_ctx_range(struct __sk_buff, tstamp):
case bpf_ctx_range(struct __sk_buff, wire_len):
return false;
@@ -6599,7 +6628,6 @@ static bool tc_cls_act_is_valid_access(int off, int size,
case bpf_ctx_range(struct __sk_buff, data_end):
info->reg_type = PTR_TO_PACKET_END;
break;
- case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
case bpf_ctx_range_till(struct __sk_buff, family, local_port):
return false;
}
@@ -6801,7 +6829,6 @@ static bool sk_skb_is_valid_access(int off, int size,
switch (off) {
case bpf_ctx_range(struct __sk_buff, tc_classid):
case bpf_ctx_range(struct __sk_buff, data_meta):
- case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
case bpf_ctx_range(struct __sk_buff, tstamp):
case bpf_ctx_range(struct __sk_buff, wire_len):
return false;
@@ -6875,24 +6902,65 @@ static bool flow_dissector_is_valid_access(int off, int size,
const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
{
+ const int size_default = sizeof(__u32);
+
+ if (off < 0 || off >= sizeof(struct __sk_buff))
+ return false;
+
if (type == BPF_WRITE)
return false;
switch (off) {
case bpf_ctx_range(struct __sk_buff, data):
+ if (size != size_default)
+ return false;
info->reg_type = PTR_TO_PACKET;
- break;
+ return true;
case bpf_ctx_range(struct __sk_buff, data_end):
+ if (size != size_default)
+ return false;
info->reg_type = PTR_TO_PACKET_END;
- break;
+ return true;
case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
+ if (size != sizeof(__u64))
+ return false;
info->reg_type = PTR_TO_FLOW_KEYS;
- break;
+ return true;
default:
return false;
}
+}
- return bpf_skb_is_valid_access(off, size, type, prog, info);
+static u32 flow_dissector_convert_ctx_access(enum bpf_access_type type,
+ const struct bpf_insn *si,
+ struct bpf_insn *insn_buf,
+ struct bpf_prog *prog,
+ u32 *target_size)
+
+{
+ struct bpf_insn *insn = insn_buf;
+
+ switch (si->off) {
+ case offsetof(struct __sk_buff, data):
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_flow_dissector, data),
+ si->dst_reg, si->src_reg,
+ offsetof(struct bpf_flow_dissector, data));
+ break;
+
+ case offsetof(struct __sk_buff, data_end):
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_flow_dissector, data_end),
+ si->dst_reg, si->src_reg,
+ offsetof(struct bpf_flow_dissector, data_end));
+ break;
+
+ case offsetof(struct __sk_buff, flow_keys):
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_flow_dissector, flow_keys),
+ si->dst_reg, si->src_reg,
+ offsetof(struct bpf_flow_dissector, flow_keys));
+ break;
+ }
+
+ return insn - insn_buf;
}
static u32 bpf_convert_ctx_access(enum bpf_access_type type,
@@ -7199,15 +7267,6 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
skc_num, 2, target_size));
break;
- case offsetof(struct __sk_buff, flow_keys):
- off = si->off;
- off -= offsetof(struct __sk_buff, flow_keys);
- off += offsetof(struct sk_buff, cb);
- off += offsetof(struct qdisc_skb_cb, flow_keys);
- *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
- si->src_reg, off);
- break;
-
case offsetof(struct __sk_buff, tstamp):
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, tstamp) != 8);
@@ -8212,7 +8271,7 @@ const struct bpf_prog_ops sk_msg_prog_ops = {
const struct bpf_verifier_ops flow_dissector_verifier_ops = {
.get_func_proto = flow_dissector_func_proto,
.is_valid_access = flow_dissector_is_valid_access,
- .convert_ctx_access = bpf_convert_ctx_access,
+ .convert_ctx_access = flow_dissector_convert_ctx_access,
};
const struct bpf_prog_ops flow_dissector_prog_ops = {
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 795449713ba4..ef6d9443cc75 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -688,39 +688,34 @@ bool __skb_flow_bpf_dissect(struct bpf_prog *prog,
struct flow_dissector *flow_dissector,
struct bpf_flow_keys *flow_keys)
{
- struct bpf_skb_data_end cb_saved;
- struct bpf_skb_data_end *cb;
- u32 result;
-
- /* Note that even though the const qualifier is discarded
- * throughout the execution of the BPF program, all changes(the
- * control block) are reverted after the BPF program returns.
- * Therefore, __skb_flow_dissect does not alter the skb.
- */
-
- cb = (struct bpf_skb_data_end *)skb->cb;
+ struct bpf_flow_dissector ctx = {
+ .flow_keys = flow_keys,
+ .skb = skb,
+ .data = skb->data,
+ .data_end = skb->data + skb_headlen(skb),
+ };
+
+ return bpf_flow_dissect(prog, &ctx, skb->protocol,
+ skb_network_offset(skb), skb_headlen(skb));
+}
- /* Save Control Block */
- memcpy(&cb_saved, cb, sizeof(cb_saved));
- memset(cb, 0, sizeof(*cb));
+bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx,
+ __be16 proto, int nhoff, int hlen)
+{
+ struct bpf_flow_keys *flow_keys = ctx->flow_keys;
+ u32 result;
/* Pass parameters to the BPF program */
memset(flow_keys, 0, sizeof(*flow_keys));
- cb->qdisc_cb.flow_keys = flow_keys;
- flow_keys->n_proto = skb->protocol;
- flow_keys->nhoff = skb_network_offset(skb);
+ flow_keys->n_proto = proto;
+ flow_keys->nhoff = nhoff;
flow_keys->thoff = flow_keys->nhoff;
- bpf_compute_data_pointers((struct sk_buff *)skb);
- result = BPF_PROG_RUN(prog, skb);
-
- /* Restore state */
- memcpy(cb, &cb_saved, sizeof(cb_saved));
+ result = BPF_PROG_RUN(prog, ctx);
- flow_keys->nhoff = clamp_t(u16, flow_keys->nhoff,
- skb_network_offset(skb), skb->len);
+ flow_keys->nhoff = clamp_t(u16, flow_keys->nhoff, nhoff, hlen);
flow_keys->thoff = clamp_t(u16, flow_keys->thoff,
- flow_keys->nhoff, skb->len);
+ flow_keys->nhoff, hlen);
return result == BPF_OK;
}
--
2.21.0.392.gf8f6787159e-goog
next prev parent reply other threads:[~2019-04-15 17:38 UTC|newest]
Thread overview: 22+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-04-15 17:37 [PATCH bpf-next v5 0/6] net: flow_dissector: trigger BPF hook when called from eth_get_headlen Stanislav Fomichev
2019-04-15 17:37 ` Stanislav Fomichev [this message]
2019-04-15 17:37 ` [PATCH bpf-next v5 2/6] bpf: when doing BPF_PROG_TEST_RUN for flow dissector use no-skb mode Stanislav Fomichev
2019-04-15 17:37 ` [PATCH bpf-next v5 3/6] net: plumb network namespace into __skb_flow_dissect Stanislav Fomichev
2019-04-15 17:37 ` [PATCH bpf-next v5 4/6] flow_dissector: handle no-skb use case Stanislav Fomichev
2019-04-15 17:38 ` [PATCH bpf-next v5 5/6] net: pass net argument to the eth_get_headlen Stanislav Fomichev
2019-04-15 17:38 ` [Intel-wired-lan] " Stanislav Fomichev
2019-04-19 0:28 ` Alexei Starovoitov
2019-04-19 0:28 ` [Intel-wired-lan] " Alexei Starovoitov
2019-04-19 0:43 ` Stanislav Fomichev
2019-04-19 0:43 ` [Intel-wired-lan] " Stanislav Fomichev
2019-04-19 4:50 ` Alexei Starovoitov
2019-04-19 4:50 ` [Intel-wired-lan] " Alexei Starovoitov
2019-04-19 23:29 ` Stanislav Fomichev
2019-04-19 23:29 ` [Intel-wired-lan] " Stanislav Fomichev
2019-04-19 23:37 ` Alexei Starovoitov
2019-04-19 23:37 ` [Intel-wired-lan] " Alexei Starovoitov
2019-04-19 23:47 ` Stanislav Fomichev
2019-04-19 23:47 ` [Intel-wired-lan] " Stanislav Fomichev
2019-04-19 23:50 ` Alexei Starovoitov
2019-04-19 23:50 ` [Intel-wired-lan] " Alexei Starovoitov
2019-04-15 17:38 ` [PATCH bpf-next v5 6/6] selftests/bpf: add flow dissector bpf_skb_load_bytes helper test Stanislav Fomichev
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20190415173801.257254-2-sdf@google.com \
--to=sdf@google.com \
--cc=ast@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=daniel@iogearbox.net \
--cc=davem@davemloft.net \
--cc=netdev@vger.kernel.org \
--cc=peterpenkov96@gmail.com \
--cc=simon.horman@netronome.com \
--cc=willemb@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.