From: Daniel Borkmann <daniel@iogearbox.net>
To: Lawrence Brakmo <brakmo@fb.com>, netdev <netdev@vger.kernel.org>
Cc: Kernel Team <kernel-team@fb.com>, Blake Matheny <bmatheny@fb.com>,
Alexei Starovoitov <ast@fb.com>,
David Ahern <dsa@cumulusnetworks.com>
Subject: Re: [PATCH net-next v4 01/16] bpf: BPF support for sock_ops
Date: Thu, 29 Jun 2017 11:46:11 +0200 [thread overview]
Message-ID: <5954CC63.7010707@iogearbox.net> (raw)
In-Reply-To: <20170628173124.3299500-2-brakmo@fb.com>
On 06/28/2017 07:31 PM, Lawrence Brakmo wrote:
> Created a new BPF program type, BPF_PROG_TYPE_SOCK_OPS, and a corresponding
> struct that allows BPF programs of this type to access some of the
> socket's fields (such as IP addresses, ports, etc.). It uses the
> existing bpf cgroups infrastructure so the programs can be attached per
> cgroup with full inheritance support. The program will be called at
> appropriate times to set relevant connections parameters such as buffer
> sizes, SYN and SYN-ACK RTOs, etc., based on connection information such
> as IP addresses, port numbers, etc.
[...]
> Currently there are two types of ops. The first type expects the BPF
> program to return a value which is then used by the caller (or a
> negative value to indicate the operation is not supported). The second
> type expects state changes to be done by the BPF program, for example
> through a setsockopt BPF helper function, and they ignore the return
> value.
>
> The reply fields of the bpf_sockt_ops struct are there in case a bpf
> program needs to return a value larger than an integer.
>
> Signed-off-by: Lawrence Brakmo <brakmo@fb.com>
For BPF bits:
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
> @@ -3379,6 +3409,140 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
> return insn - insn_buf;
> }
>
> +static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
> + const struct bpf_insn *si,
> + struct bpf_insn *insn_buf,
> + struct bpf_prog *prog)
> +{
> + struct bpf_insn *insn = insn_buf;
> + int off;
> +
> + switch (si->off) {
[...]
> + case offsetof(struct bpf_sock_ops, remote_ip4):
> + BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_daddr) != 4);
> +
> + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
> + struct bpf_sock_ops_kern, sk),
> + si->dst_reg, si->src_reg,
> + offsetof(struct bpf_sock_ops_kern, sk));
> + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
> + offsetof(struct sock_common, skc_daddr));
> + *insn++ = BPF_ENDIAN(BPF_FROM_BE, si->dst_reg, 32);
> + break;
> +
> + case offsetof(struct bpf_sock_ops, local_ip4):
> + BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_rcv_saddr) != 4);
> +
> + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
> + struct bpf_sock_ops_kern, sk),
> + si->dst_reg, si->src_reg,
> + offsetof(struct bpf_sock_ops_kern, sk));
> + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
> + offsetof(struct sock_common,
> + skc_rcv_saddr));
> + *insn++ = BPF_ENDIAN(BPF_FROM_BE, si->dst_reg, 32);
> + break;
> +
> + case offsetof(struct bpf_sock_ops, remote_ip6[0]) ...
> + offsetof(struct bpf_sock_ops, remote_ip6[3]):
> +#if IS_ENABLED(CONFIG_IPV6)
> + BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
> + skc_v6_daddr.s6_addr32[0]) != 4);
> +
> + off = si->off;
> + off -= offsetof(struct bpf_sock_ops, remote_ip6[0]);
> + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
> + struct bpf_sock_ops_kern, sk),
> + si->dst_reg, si->src_reg,
> + offsetof(struct bpf_sock_ops_kern, sk));
> + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
> + offsetof(struct sock_common,
> + skc_v6_daddr.s6_addr32[0]) +
> + off);
> + *insn++ = BPF_ENDIAN(BPF_FROM_BE, si->dst_reg, 32);
> +#else
> + *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
> +#endif
> + break;
> +
> + case offsetof(struct bpf_sock_ops, local_ip6[0]) ...
> + offsetof(struct bpf_sock_ops, local_ip6[3]):
> +#if IS_ENABLED(CONFIG_IPV6)
> + BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
> + skc_v6_rcv_saddr.s6_addr32[0]) != 4);
> +
> + off = si->off;
> + off -= offsetof(struct bpf_sock_ops, local_ip6[0]);
> + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
> + struct bpf_sock_ops_kern, sk),
> + si->dst_reg, si->src_reg,
> + offsetof(struct bpf_sock_ops_kern, sk));
> + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
> + offsetof(struct sock_common,
> + skc_v6_rcv_saddr.s6_addr32[0]) +
> + off);
> + *insn++ = BPF_ENDIAN(BPF_FROM_BE, si->dst_reg, 32);
> +#else
> + *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
> +#endif
> + break;
> +
> + case offsetof(struct bpf_sock_ops, remote_port):
> + BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_dport) != 2);
> +
> + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
> + struct bpf_sock_ops_kern, sk),
> + si->dst_reg, si->src_reg,
> + offsetof(struct bpf_sock_ops_kern, sk));
> + *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
> + offsetof(struct sock_common, skc_dport));
> + *insn++ = BPF_ENDIAN(BPF_FROM_BE, si->dst_reg, 16);
> + break;
> +
> + case offsetof(struct bpf_sock_ops, local_port):
> + BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_num) != 2);
> +
> + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
> + struct bpf_sock_ops_kern, sk),
> + si->dst_reg, si->src_reg,
> + offsetof(struct bpf_sock_ops_kern, sk));
> + *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
> + offsetof(struct sock_common, skc_num));
That one is indeed in host endianness. Makes sense to have remote_port
and local_port in a consistent representation.
I was wondering though whether we should do all the conversion of
BPF_ENDIAN(BPF_FROM_BE, ...) or just leave it to the user whether
he needs the BPF_ENDIAN(BPF_FROM_BE, ...) or process it in network
byte order as-is. In case the user needs to go and undo again via
BPF_ENDIAN(BPF_TO_BE, ...), e.g., to reconstruct a full v6 addr,
then we have two unneeded insns for each of the remote_ip6[X] /
local_ip6[X]. So, not providing it in host byte order, the user can
still always chose to do a BPF_ENDIAN(BPF_FROM_BE, ...) by himself,
if this representation is preferred. Wdyt?
> + break;
> + }
> + return insn - insn_buf;
> +}
> +
> const struct bpf_verifier_ops sk_filter_prog_ops = {
> .get_func_proto = sk_filter_func_proto,
[...]
next prev parent reply other threads:[~2017-06-29 9:46 UTC|newest]
Thread overview: 26+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-06-28 17:31 [PATCH net-next v4 00/16] bpf: BPF cgroup support for sock_ops Lawrence Brakmo
2017-06-28 17:31 ` [PATCH net-next v4 01/16] bpf: BPF " Lawrence Brakmo
2017-06-28 19:53 ` Alexei Starovoitov
2017-06-29 9:46 ` Daniel Borkmann [this message]
2017-06-30 7:27 ` Lawrence Brakmo
2017-06-29 15:57 ` kbuild test robot
2017-06-29 16:21 ` kbuild test robot
2017-06-28 17:31 ` [PATCH net-next v4 02/16] bpf: program to load and attach sock_ops BPF progs Lawrence Brakmo
2017-06-28 17:31 ` [PATCH net-next v4 03/16] bpf: Support for per connection SYN/SYN-ACK RTOs Lawrence Brakmo
2017-06-28 17:31 ` [PATCH net-next v4 04/16] bpf: Sample bpf program to set " Lawrence Brakmo
2017-06-29 19:39 ` Jesper Dangaard Brouer
2017-06-29 22:25 ` Lawrence Brakmo
2017-06-28 17:31 ` [PATCH net-next v4 05/16] bpf: Support for setting initial receive window Lawrence Brakmo
2017-06-28 17:31 ` [PATCH net-next v4 06/16] bpf: Sample bpf program to set initial window Lawrence Brakmo
2017-06-28 17:31 ` [PATCH net-next v4 07/16] bpf: Add setsockopt helper function to bpf Lawrence Brakmo
2017-06-29 10:08 ` Daniel Borkmann
2017-06-28 17:31 ` [PATCH net-next v4 08/16] bpf: Add TCP connection BPF callbacks Lawrence Brakmo
2017-06-28 17:31 ` [PATCH net-next v4 09/16] bpf: Sample BPF program to set buffer sizes Lawrence Brakmo
2017-06-28 17:31 ` [PATCH net-next v4 10/16] bpf: Add support for changing congestion control Lawrence Brakmo
2017-06-30 12:50 ` kbuild test robot
2017-06-28 17:31 ` [PATCH net-next v4 11/16] bpf: Sample BPF program to set " Lawrence Brakmo
2017-06-28 17:31 ` [PATCH net-next v4 12/16] bpf: Adds support for setting initial cwnd Lawrence Brakmo
2017-06-28 17:31 ` [PATCH net-next v4 13/16] bpf: Sample BPF program to set " Lawrence Brakmo
2017-06-28 17:31 ` [PATCH net-next v4 14/16] bpf: Adds support for setting sndcwnd clamp Lawrence Brakmo
2017-06-28 17:31 ` [PATCH net-next v4 15/16] bpf: Sample bpf program to set " Lawrence Brakmo
2017-06-28 17:31 ` [PATCH net-next v4 16/16] bpf: update tools/include/uapi/linux/bpf.h Lawrence Brakmo
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=5954CC63.7010707@iogearbox.net \
--to=daniel@iogearbox.net \
--cc=ast@fb.com \
--cc=bmatheny@fb.com \
--cc=brakmo@fb.com \
--cc=dsa@cumulusnetworks.com \
--cc=kernel-team@fb.com \
--cc=netdev@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.