From: Lawrence Brakmo <brakmo@fb.com>
To: netdev <netdev@vger.kernel.org>
Cc: Kernel Team <kernel-team@fb.com>, Blake Matheny <bmatheny@fb.com>,
Alexei Starovoitov <ast@fb.com>,
Daniel Borkmann <daniel@iogearbox.net>,
Eric Dumazet <eric.dumazet@gmail.com>,
Neal Cardwell <ncardwell@google.com>,
Yuchung Cheng <ycheng@google.com>
Subject: [PATCH bpf-next v9 06/12] bpf: Adds field bpf_sock_ops_cb_flags to tcp_sock
Date: Wed, 24 Jan 2018 18:37:06 -0800 [thread overview]
Message-ID: <20180125023712.3038949-7-brakmo@fb.com> (raw)
In-Reply-To: <20180125023712.3038949-1-brakmo@fb.com>
Adds field bpf_sock_ops_cb_flags to tcp_sock and bpf_sock_ops. Its primary
use is to determine if there should be calls to sock_ops bpf program at
various points in the TCP code. The field is initialized to zero,
disabling the calls. A sock_ops BPF program can set it, per connection and
as necessary, when the connection is established.
It also adds support for reading and writting the field within a
sock_ops BPF program. Reading is done by accessing the field directly.
However, writing is done through the helper function
bpf_sock_ops_cb_flags_set, in order to return an error if a BPF program
is trying to set a callback that is not supported in the current kernel
(i.e. running an older kernel). The helper function returns 0 if it was
able to set all of the bits set in the argument, a positive number
containing the bits that could not be set, or -EINVAL if the socket is
not a full TCP socket.
Examples of where one could call the bpf program:
1) When RTO fires
2) When a packet is retransmitted
3) When the connection terminates
4) When a packet is sent
5) When a packet is received
Signed-off-by: Lawrence Brakmo <brakmo@fb.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
---
include/linux/tcp.h | 11 +++++++++++
include/uapi/linux/bpf.h | 12 +++++++++++-
include/uapi/linux/tcp.h | 5 +++++
net/core/filter.c | 34 ++++++++++++++++++++++++++++++++++
4 files changed, 61 insertions(+), 1 deletion(-)
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 4f93f095..8f4c549 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -335,6 +335,17 @@ struct tcp_sock {
int linger2;
+
+/* Sock_ops bpf program related variables */
+#ifdef CONFIG_BPF
+ u8 bpf_sock_ops_cb_flags; /* Control calling BPF programs
+ * values defined in uapi/linux/tcp.h
+ */
+#define BPF_SOCK_OPS_TEST_FLAG(TP, ARG) (TP->bpf_sock_ops_cb_flags & ARG)
+#else
+#define BPF_SOCK_OPS_TEST_FLAG(TP, ARG) 0
+#endif
+
/* Receiver side RTT estimation */
struct {
u32 rtt_us;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 8d5874c..7573f5b 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -642,6 +642,14 @@ union bpf_attr {
* @optlen: length of optval in bytes
* Return: 0 or negative error
*
+ * int bpf_sock_ops_cb_flags_set(bpf_sock_ops, flags)
+ * Set callback flags for sock_ops
+ * @bpf_sock_ops: pointer to bpf_sock_ops_kern struct
+ * @flags: flags value
+ * Return: 0 for no error
+ * -EINVAL if there is no full tcp socket
+ * bits in flags that are not supported by current kernel
+ *
* int bpf_skb_adjust_room(skb, len_diff, mode, flags)
* Grow or shrink room in sk_buff.
* @skb: pointer to skb
@@ -748,7 +756,8 @@ union bpf_attr {
FN(perf_event_read_value), \
FN(perf_prog_read_value), \
FN(getsockopt), \
- FN(override_return),
+ FN(override_return), \
+ FN(sock_ops_cb_flags_set),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
@@ -969,6 +978,7 @@ struct bpf_sock_ops {
*/
__u32 snd_cwnd;
__u32 srtt_us; /* Averaged RTT << 3 in usecs */
+ __u32 bpf_sock_ops_cb_flags; /* flags defined in uapi/linux/tcp.h */
};
/* List of known BPF sock_ops operators.
diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index b4a4f64..d1df2f6 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -268,4 +268,9 @@ struct tcp_diag_md5sig {
__u8 tcpm_key[TCP_MD5SIG_MAXKEYLEN];
};
+/* Definitions for bpf_sock_ops_cb_flags */
+#define BPF_SOCK_OPS_ALL_CB_FLAGS 0 /* Mask of all currently
+ * supported cb flags
+ */
+
#endif /* _UAPI_LINUX_TCP_H */
diff --git a/net/core/filter.c b/net/core/filter.c
index c356ec0..6936d19 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3328,6 +3328,33 @@ static const struct bpf_func_proto bpf_getsockopt_proto = {
.arg5_type = ARG_CONST_SIZE,
};
+BPF_CALL_2(bpf_sock_ops_cb_flags_set, struct bpf_sock_ops_kern *, bpf_sock,
+ int, argval)
+{
+ struct sock *sk = bpf_sock->sk;
+ int val = argval & BPF_SOCK_OPS_ALL_CB_FLAGS;
+
+ if (!sk_fullsock(sk))
+ return -EINVAL;
+
+#ifdef CONFIG_INET
+ if (val)
+ tcp_sk(sk)->bpf_sock_ops_cb_flags = val;
+
+ return argval & (~BPF_SOCK_OPS_ALL_CB_FLAGS);
+#else
+ return -EINVAL;
+#endif
+}
+
+static const struct bpf_func_proto bpf_sock_ops_cb_flags_set_proto = {
+ .func = bpf_sock_ops_cb_flags_set,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+};
+
static const struct bpf_func_proto *
bpf_base_func_proto(enum bpf_func_id func_id)
{
@@ -3510,6 +3537,8 @@ static const struct bpf_func_proto *
return &bpf_setsockopt_proto;
case BPF_FUNC_getsockopt:
return &bpf_getsockopt_proto;
+ case BPF_FUNC_sock_ops_cb_flags_set:
+ return &bpf_sock_ops_cb_flags_set_proto;
case BPF_FUNC_sock_map_update:
return &bpf_sock_map_update_proto;
default:
@@ -4546,6 +4575,11 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
case offsetof(struct bpf_sock_ops, srtt_us):
SOCK_OPS_GET_FIELD(srtt_us, srtt_us, struct tcp_sock);
break;
+
+ case offsetof(struct bpf_sock_ops, bpf_sock_ops_cb_flags):
+ SOCK_OPS_GET_FIELD(bpf_sock_ops_cb_flags, bpf_sock_ops_cb_flags,
+ struct tcp_sock);
+ break;
}
return insn - insn_buf;
}
--
2.9.5
next prev parent reply other threads:[~2018-01-25 2:37 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-01-25 2:37 [PATCH bpf-next v9 00/12] bpf: More sock_ops callbacks Lawrence Brakmo
2018-01-25 2:37 ` [PATCH bpf-next v9 01/12] bpf: Only reply field should be writeable Lawrence Brakmo
2018-01-25 2:37 ` [PATCH bpf-next v9 02/12] bpf: Make SOCK_OPS_GET_TCP size independent Lawrence Brakmo
2018-01-25 2:37 ` [PATCH bpf-next v9 03/12] bpf: Make SOCK_OPS_GET_TCP struct independent Lawrence Brakmo
2018-01-25 2:37 ` [PATCH bpf-next v9 04/12] bpf: Add write access to tcp_sock and sock fields Lawrence Brakmo
2018-01-25 2:37 ` [PATCH bpf-next v9 05/12] bpf: Support passing args to sock_ops bpf function Lawrence Brakmo
2018-01-25 2:37 ` Lawrence Brakmo [this message]
2018-01-25 2:37 ` [PATCH bpf-next v9 07/12] bpf: Add sock_ops RTO callback Lawrence Brakmo
2018-01-25 2:37 ` [PATCH bpf-next v9 08/12] bpf: Add support for reading sk_state and more Lawrence Brakmo
2018-01-25 2:37 ` [PATCH bpf-next v9 09/12] bpf: Add sock_ops R/W access to tclass Lawrence Brakmo
2018-01-25 2:37 ` [PATCH bpf-next v9 10/12] bpf: Add BPF_SOCK_OPS_RETRANS_CB Lawrence Brakmo
2018-01-25 2:37 ` [PATCH bpf-next v9 11/12] bpf: Add BPF_SOCK_OPS_STATE_CB Lawrence Brakmo
2018-01-25 2:37 ` [PATCH bpf-next v9 12/12] bpf: add selftest for tcpbpf Lawrence Brakmo
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20180125023712.3038949-7-brakmo@fb.com \
--to=brakmo@fb.com \
--cc=ast@fb.com \
--cc=bmatheny@fb.com \
--cc=daniel@iogearbox.net \
--cc=eric.dumazet@gmail.com \
--cc=kernel-team@fb.com \
--cc=ncardwell@google.com \
--cc=netdev@vger.kernel.org \
--cc=ycheng@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).