All of lore.kernel.org
 help / color / mirror / Atom feed
From: Lawrence Brakmo <brakmo@fb.com>
To: netdev <netdev@vger.kernel.org>
Cc: Kernel Team <kernel-team@fb.com>, Blake Matheny <bmatheny@fb.com>,
	Alexei Starovoitov <ast@fb.com>,
	Daniel Borkmann <daniel@iogearbox.net>,
	Eric Dumazet <eric.dumazet@gmail.com>,
	Neal Cardwell <ncardwell@google.com>,
	Yuchung Cheng <ycheng@google.com>
Subject: [PATCH bpf-next v6 05/11] bpf: Adds field bpf_sock_ops_cb_flags to tcp_sock
Date: Fri, 19 Jan 2018 17:45:42 -0800	[thread overview]
Message-ID: <20180120014548.2941040-6-brakmo@fb.com> (raw)
In-Reply-To: <20180120014548.2941040-1-brakmo@fb.com>

Adds field bpf_sock_ops_cb_flags to tcp_sock and bpf_sock_ops. Its primary
use is to determine if there should be calls to sock_ops bpf program at
various points in the TCP code. The field is initialized to zero,
disabling the calls. A sock_ops BPF program can set it, per connection and
as necessary, when the connection is established.

It also adds support for reading and writting the field within a
sock_ops BPF program. Reading is done by accessing the field directly.
However, writing is done through the helper function
bpf_sock_ops_cb_flags_set, in order to return an error if a BPF program
is trying to set a callback that is not supported in the current kernel
(i.e. running an older kernel). The helper function returns 0 if it was
able to set all of the bits set in the argument, a positive number
containing the bits that could not be set, or -EINVAL if the socket is
not a full TCP socket.

Examples of where one could call the bpf program:

1) When RTO fires
2) When a packet is retransmitted
3) When the connection terminates
4) When a packet is sent
5) When a packet is received

Signed-off-by: Lawrence Brakmo <brakmo@fb.com>
---
 include/linux/tcp.h      | 11 +++++++++++
 include/uapi/linux/bpf.h | 12 +++++++++++-
 include/uapi/linux/tcp.h |  5 +++++
 net/core/filter.c        | 34 ++++++++++++++++++++++++++++++++++
 4 files changed, 61 insertions(+), 1 deletion(-)

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 4f93f095..8f4c549 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -335,6 +335,17 @@ struct tcp_sock {
 
 	int			linger2;
 
+
+/* Sock_ops bpf program related variables */
+#ifdef CONFIG_BPF
+	u8	bpf_sock_ops_cb_flags;  /* Control calling BPF programs
+					 * values defined in uapi/linux/tcp.h
+					 */
+#define BPF_SOCK_OPS_TEST_FLAG(TP, ARG) (TP->bpf_sock_ops_cb_flags & ARG)
+#else
+#define BPF_SOCK_OPS_TEST_FLAG(TP, ARG) 0
+#endif
+
 /* Receiver side RTT estimation */
 	struct {
 		u32	rtt_us;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 8d5874c..7573f5b 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -642,6 +642,14 @@ union bpf_attr {
  *     @optlen: length of optval in bytes
  *     Return: 0 or negative error
  *
+ * int bpf_sock_ops_cb_flags_set(bpf_sock_ops, flags)
+ *     Set callback flags for sock_ops
+ *     @bpf_sock_ops: pointer to bpf_sock_ops_kern struct
+ *     @flags: flags value
+ *     Return: 0 for no error
+ *             -EINVAL if there is no full tcp socket
+ *             bits in flags that are not supported by current kernel
+ *
  * int bpf_skb_adjust_room(skb, len_diff, mode, flags)
  *     Grow or shrink room in sk_buff.
  *     @skb: pointer to skb
@@ -748,7 +756,8 @@ union bpf_attr {
 	FN(perf_event_read_value),	\
 	FN(perf_prog_read_value),	\
 	FN(getsockopt),			\
-	FN(override_return),
+	FN(override_return),		\
+	FN(sock_ops_cb_flags_set),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -969,6 +978,7 @@ struct bpf_sock_ops {
 				 */
 	__u32 snd_cwnd;
 	__u32 srtt_us;		/* Averaged RTT << 3 in usecs */
+	__u32 bpf_sock_ops_cb_flags; /* flags defined in uapi/linux/tcp.h */
 };
 
 /* List of known BPF sock_ops operators.
diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index b4a4f64..d1df2f6 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -268,4 +268,9 @@ struct tcp_diag_md5sig {
 	__u8	tcpm_key[TCP_MD5SIG_MAXKEYLEN];
 };
 
+/* Definitions for bpf_sock_ops_cb_flags */
+#define BPF_SOCK_OPS_ALL_CB_FLAGS       0		/* Mask of all currently
+							 * supported cb flags
+							 */
+
 #endif /* _UAPI_LINUX_TCP_H */
diff --git a/net/core/filter.c b/net/core/filter.c
index 1ff36ca..c9411dc 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3324,6 +3324,33 @@ static const struct bpf_func_proto bpf_getsockopt_proto = {
 	.arg5_type	= ARG_CONST_SIZE,
 };
 
+BPF_CALL_2(bpf_sock_ops_cb_flags_set, struct bpf_sock_ops_kern *, bpf_sock,
+	   int, argval)
+{
+	struct sock *sk = bpf_sock->sk;
+	int val = argval & BPF_SOCK_OPS_ALL_CB_FLAGS;
+
+	if (!sk_fullsock(sk))
+		return -EINVAL;
+
+#ifdef CONFIG_INET
+	if (val)
+		tcp_sk(sk)->bpf_sock_ops_cb_flags = val;
+
+	return argval & (~BPF_SOCK_OPS_ALL_CB_FLAGS);
+#else
+	return -EINVAL;
+#endif
+}
+
+static const struct bpf_func_proto bpf_sock_ops_cb_flags_set_proto = {
+	.func		= bpf_sock_ops_cb_flags_set,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_ANYTHING,
+};
+
 static const struct bpf_func_proto *
 bpf_base_func_proto(enum bpf_func_id func_id)
 {
@@ -3504,6 +3531,8 @@ static const struct bpf_func_proto *
 		return &bpf_setsockopt_proto;
 	case BPF_FUNC_getsockopt:
 		return &bpf_getsockopt_proto;
+	case BPF_FUNC_sock_ops_cb_flags_set:
+		return &bpf_sock_ops_cb_flags_set_proto;
 	case BPF_FUNC_sock_map_update:
 		return &bpf_sock_map_update_proto;
 	default:
@@ -4541,6 +4570,11 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
 	case offsetof(struct bpf_sock_ops, srtt_us):
 		SOCK_OPS_GET_FIELD(srtt_us, srtt_us, struct tcp_sock);
 		break;
+
+	case offsetof(struct bpf_sock_ops, bpf_sock_ops_cb_flags):
+		SOCK_OPS_GET_FIELD(bpf_sock_ops_cb_flags, bpf_sock_ops_cb_flags,
+				   struct tcp_sock);
+		break;
 	}
 	return insn - insn_buf;
 }
-- 
2.9.5

  parent reply	other threads:[~2018-01-20  1:45 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-01-20  1:45 [PATCH bpf-next v6 00/11] bpf: More sock_ops callbacks Lawrence Brakmo
2018-01-20  1:45 ` [PATCH bpf-next v6 01/11] bpf: Make SOCK_OPS_GET_TCP size independent Lawrence Brakmo
2018-01-20  1:45 ` [PATCH bpf-next v6 02/11] bpf: Make SOCK_OPS_GET_TCP struct independent Lawrence Brakmo
2018-01-20  1:45 ` [PATCH bpf-next v6 03/11] bpf: Add write access to tcp_sock and sock fields Lawrence Brakmo
2018-01-20  3:54   ` Alexei Starovoitov
2018-01-20  1:45 ` [PATCH bpf-next v6 04/11] bpf: Support passing args to sock_ops bpf function Lawrence Brakmo
2018-01-24  1:11   ` Daniel Borkmann
2018-01-24  1:30     ` Lawrence Brakmo
2018-01-24  1:34       ` Daniel Borkmann
2018-01-20  1:45 ` Lawrence Brakmo [this message]
2018-01-20  3:52   ` [PATCH bpf-next v6 05/11] bpf: Adds field bpf_sock_ops_cb_flags to tcp_sock Alexei Starovoitov
2018-01-20  7:50     ` Lawrence Brakmo
2018-01-23 17:29     ` Eric Dumazet
2018-01-20  1:45 ` [PATCH bpf-next v6 06/11] bpf: Add sock_ops RTO callback Lawrence Brakmo
2018-01-20  1:45 ` [PATCH bpf-next v6 07/11] bpf: Add support for reading sk_state and more Lawrence Brakmo
2018-01-24  1:05   ` Daniel Borkmann
2018-01-24  1:27     ` Lawrence Brakmo
2018-01-20  1:45 ` [PATCH bpf-next v6 08/11] bpf: Add sock_ops R/W access to tclass & sk_txhash Lawrence Brakmo
2018-01-20  1:45 ` [PATCH bpf-next v6 09/11] bpf: Add BPF_SOCK_OPS_RETRANS_CB Lawrence Brakmo
2018-01-20  1:45 ` [PATCH bpf-next v6 10/11] bpf: Add BPF_SOCK_OPS_STATE_CB Lawrence Brakmo
2018-01-20  1:45 ` [PATCH bpf-next v6 11/11] bpf: add selftest for tcpbpf Lawrence Brakmo
2018-01-20  3:59   ` Alexei Starovoitov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180120014548.2941040-6-brakmo@fb.com \
    --to=brakmo@fb.com \
    --cc=ast@fb.com \
    --cc=bmatheny@fb.com \
    --cc=daniel@iogearbox.net \
    --cc=eric.dumazet@gmail.com \
    --cc=kernel-team@fb.com \
    --cc=ncardwell@google.com \
    --cc=netdev@vger.kernel.org \
    --cc=ycheng@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.