Netdev List
 help / color / mirror / Atom feed
* [PATCH net-next v2 3/5] bpf: Split lwt inout verifier structures
From: Mathieu Xhonneux @ 2018-04-24 17:44 UTC (permalink / raw)
  To: netdev; +Cc: dlebrun, alexei.starovoitov
In-Reply-To: <cover.1524591163.git.m.xhonneux@gmail.com>

The new bpf_lwt_push_encap helper should only be accessible within the
LWT BPF IN hook, and not the OUT one, as this may lead to a skb under
panic.

At the moment, both LWT BPF IN and OUT share the same list of helpers,
whose calls are authorized by the verifier. This patch separates the
verifier ops for the IN and OUT hooks, and allows the IN hook to call the
bpf_lwt_push_encap helper.

This patch is also the occasion to put all lwt_*_func_proto functions
together for clarity. At the moment, socks_op_func_proto is in the middle
of lwt_inout_func_proto and lwt_xmit_func_proto.

Signed-off-by: Mathieu Xhonneux <m.xhonneux@gmail.com>
Acked-by: David Lebrun <dlebrun@google.com>
---
 include/linux/bpf_types.h |  3 +-
 net/core/filter.c         | 83 +++++++++++++++++++++++++++++------------------
 2 files changed, 53 insertions(+), 33 deletions(-)

diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 2b28fcf6f6ae..799c8bb6416f 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -9,8 +9,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_XDP, xdp)
 BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SKB, cg_skb)
 BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK, cg_sock)
 BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK_ADDR, cg_sock_addr)
-BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_inout)
-BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_OUT, lwt_inout)
+BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_in)
 BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_XMIT, lwt_xmit)
 BPF_PROG_TYPE(BPF_PROG_TYPE_SOCK_OPS, sock_ops)
 BPF_PROG_TYPE(BPF_PROG_TYPE_SK_SKB, sk_skb)
diff --git a/net/core/filter.c b/net/core/filter.c
index 8e67c423db35..8a2331c47881 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4136,33 +4136,6 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 	}
 }
 
-static const struct bpf_func_proto *
-lwt_inout_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
-{
-	switch (func_id) {
-	case BPF_FUNC_skb_load_bytes:
-		return &bpf_skb_load_bytes_proto;
-	case BPF_FUNC_skb_pull_data:
-		return &bpf_skb_pull_data_proto;
-	case BPF_FUNC_csum_diff:
-		return &bpf_csum_diff_proto;
-	case BPF_FUNC_get_cgroup_classid:
-		return &bpf_get_cgroup_classid_proto;
-	case BPF_FUNC_get_route_realm:
-		return &bpf_get_route_realm_proto;
-	case BPF_FUNC_get_hash_recalc:
-		return &bpf_get_hash_recalc_proto;
-	case BPF_FUNC_perf_event_output:
-		return &bpf_skb_event_output_proto;
-	case BPF_FUNC_get_smp_processor_id:
-		return &bpf_get_smp_processor_id_proto;
-	case BPF_FUNC_skb_under_cgroup:
-		return &bpf_skb_under_cgroup_proto;
-	default:
-		return bpf_base_func_proto(func_id);
-	}
-}
-
 static const struct bpf_func_proto *
 sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
@@ -4222,6 +4195,44 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 	}
 }
 
+static const struct bpf_func_proto *
+lwt_out_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+{
+	switch (func_id) {
+	case BPF_FUNC_skb_load_bytes:
+		return &bpf_skb_load_bytes_proto;
+	case BPF_FUNC_skb_pull_data:
+		return &bpf_skb_pull_data_proto;
+	case BPF_FUNC_csum_diff:
+		return &bpf_csum_diff_proto;
+	case BPF_FUNC_get_cgroup_classid:
+		return &bpf_get_cgroup_classid_proto;
+	case BPF_FUNC_get_route_realm:
+		return &bpf_get_route_realm_proto;
+	case BPF_FUNC_get_hash_recalc:
+		return &bpf_get_hash_recalc_proto;
+	case BPF_FUNC_perf_event_output:
+		return &bpf_skb_event_output_proto;
+	case BPF_FUNC_get_smp_processor_id:
+		return &bpf_get_smp_processor_id_proto;
+	case BPF_FUNC_skb_under_cgroup:
+		return &bpf_skb_under_cgroup_proto;
+	default:
+		return bpf_base_func_proto(func_id);
+	}
+}
+
+static const struct bpf_func_proto *
+lwt_in_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+{
+	switch (func_id) {
+	case BPF_FUNC_lwt_push_encap:
+		return &bpf_lwt_push_encap_proto;
+	default:
+		return lwt_out_func_proto(func_id, prog);
+	}
+}
+
 static const struct bpf_func_proto *
 lwt_xmit_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
@@ -4253,7 +4264,7 @@ lwt_xmit_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 	case BPF_FUNC_set_hash_invalid:
 		return &bpf_set_hash_invalid_proto;
 	default:
-		return lwt_inout_func_proto(func_id, prog);
+		return lwt_out_func_proto(func_id, prog);
 	}
 }
 
@@ -5782,13 +5793,23 @@ const struct bpf_prog_ops cg_skb_prog_ops = {
 	.test_run		= bpf_prog_test_run_skb,
 };
 
-const struct bpf_verifier_ops lwt_inout_verifier_ops = {
-	.get_func_proto		= lwt_inout_func_proto,
+const struct bpf_verifier_ops lwt_in_verifier_ops = {
+	.get_func_proto		= lwt_in_func_proto,
+	.is_valid_access	= lwt_is_valid_access,
+	.convert_ctx_access	= bpf_convert_ctx_access,
+};
+
+const struct bpf_prog_ops lwt_in_prog_ops = {
+	.test_run		= bpf_prog_test_run_skb,
+};
+
+const struct bpf_verifier_ops lwt_out_verifier_ops = {
+	.get_func_proto		= lwt_out_func_proto,
 	.is_valid_access	= lwt_is_valid_access,
 	.convert_ctx_access	= bpf_convert_ctx_access,
 };
 
-const struct bpf_prog_ops lwt_inout_prog_ops = {
+const struct bpf_prog_ops lwt_out_prog_ops = {
 	.test_run		= bpf_prog_test_run_skb,
 };
 
-- 
2.16.1

^ permalink raw reply related

* [PATCH net-next v2 2/5] bpf: Add IPv6 Segment Routing helpers
From: Mathieu Xhonneux @ 2018-04-24 17:44 UTC (permalink / raw)
  To: netdev; +Cc: dlebrun, alexei.starovoitov
In-Reply-To: <cover.1524591163.git.m.xhonneux@gmail.com>

The BPF seg6local hook should be powerful enough to enable users to
implement most of the use-cases one could think of. After some thinking,
we figured out that the following actions should be possible on a SRv6
packet, requiring 3 specific helpers :
    - bpf_lwt_seg6_store_bytes: Modify non-sensitive fields of the SRH
    - bpf_lwt_seg6_adjust_srh: Allow to grow or shrink a SRH
                               (to add/delete TLVs)
    - bpf_lwt_seg6_action: Apply some SRv6 network programming actions
                           (specifically End.X, End.T, End.B6 and
                            End.B6.Encap)

The specifications of these helpers are provided in the patch (see
include/uapi/linux/bpf.h).

The non-sensitive fields of the SRH are the following : flags, tag and
TLVs. The other fields can not be modified, to maintain the SRH
integrity. Flags, tag and TLVs can easily be modified as their validity
can be checked afterwards via seg6_validate_srh. It is not allowed to
modify the segments directly. If one wants to add segments on the path,
he should stack a new SRH using the End.B6 action via
bpf_lwt_seg6_action.

Growing, shrinking or editing TLVs via the helpers will flag the SRH as
invalid, and it will have to be re-validated before re-entering the IPv6
layer. This flag is stored in a per-CPU buffer, along with the current
header length in bytes.

Storing the SRH len in bytes in the control block is mandatory when using
bpf_lwt_seg6_adjust_srh. The Header Ext. Length field contains the SRH
len rounded to 8 bytes (a padding TLV can be inserted to ensure the 8-bytes
boundary). When adding/deleting TLVs within the BPF program, the SRH may
temporary be in an invalid state where its length cannot be rounded to 8
bytes without remainder, hence the need to store the length in bytes
separately. The caller of the BPF program can then ensure that the SRH's
final length is valid using this value. Again, a final SRH modified by a
BPF program which doesn’t respect the 8-bytes boundary will be discarded
as it will be considered as invalid.

Finally, a fourth helper is provided, bpf_lwt_push_encap, which is
available from the LWT BPF IN hook, but not from the seg6local BPF one.
This helper allows to encapsulate a Segment Routing Header (either with
a new outer IPv6 header, or by inlining it directly in the existing IPv6
header) into a non-SRv6 packet. This helper is required if we want to
offer the possibility to dynamically encapsulate a SRH for non-SRv6 packet,
as the BPF seg6local hook only works on traffic already containing a SRH.
This is the BPF equivalent of the seg6 LWT infrastructure, which achieves
the same purpose but with a static SRH per route.

Signed-off-by: Mathieu Xhonneux <m.xhonneux@gmail.com>
Acked-by: David Lebrun <dlebrun@google.com>
---
 include/net/seg6_local.h |   8 ++
 include/uapi/linux/bpf.h |  57 +++++++++-
 net/core/filter.c        | 267 +++++++++++++++++++++++++++++++++++++++++++----
 3 files changed, 308 insertions(+), 24 deletions(-)

diff --git a/include/net/seg6_local.h b/include/net/seg6_local.h
index 57498b23085d..661fd5b4d3e0 100644
--- a/include/net/seg6_local.h
+++ b/include/net/seg6_local.h
@@ -15,10 +15,18 @@
 #ifndef _NET_SEG6_LOCAL_H
 #define _NET_SEG6_LOCAL_H
 
+#include <linux/percpu.h>
 #include <linux/net.h>
 #include <linux/ipv6.h>
 
 extern int seg6_lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
 			       u32 tbl_id);
 
+struct seg6_bpf_srh_state {
+	bool valid;
+	u16 hdrlen;
+};
+
+DECLARE_PER_CPU(struct seg6_bpf_srh_state, seg6_bpf_srh_states);
+
 #endif
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index c8383a289f7b..23421dbc29a7 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -774,6 +774,51 @@ union bpf_attr {
  *     @xdp_md: pointer to xdp_md
  *     @delta: A negative integer to be added to xdp_md.data_end
  *     Return: 0 on success or negative on error
+ *
+ * int lwt_push_encap(skb, type, hdr, len)
+ *     Push an encapsulation header on top of current packet.
+ *     @type: type of header to push :
+ *          - BPF_LWT_ENCAP_SEG6 (push an IPv6 Segment Routing Header, struct
+ *                    ipv6_sr_hdr, the helper will add the outer IPv6 header)
+ *          - BPF_LWT_ENCAP_SEG6_INLINE (push an IPv6 Segment Routing Header,
+ *                       struct ipv6_sr_hdr, inside the existing IPv6 header)
+ *     @hdr: pointer where to copy the header from
+ *     @len: size of hdr in bytes
+ *     Return: 0 on success or negative error
+ *
+ * int lwt_seg6_store_bytes(skb, offset, from, len)
+ *     Store bytes into the outermost Segment Routing header of an IPv6 header.
+ *     Only the flags, tag and TLVs can be modified.
+ *     @skb: pointer to skb
+ *     @offset: offset within packet from skb->data
+ *     @from: pointer where to copy bytes from
+ *     @len: number of bytes to store into packet
+ *     Return: 0 on success or negative error
+ *
+ * int lwt_seg6_adjust_srh(skb, offset, delta)
+ *     Adjust the size allocated to TLVs in the outermost IPv6 Segment Routing
+ *     Header (grow if delta > 0, else shrink)
+ *     @skb: pointer to skb
+ *     @offset: offset within packet from skb->data where SRH will grow/shrink,
+ *              only offsets after the segments are accepted
+ *     @delta: a positive/negative integer
+ *     Return: 0 on success or negative on error
+ *
+ * int lwt_seg6_action(skb, action, param, param_len)
+ *     Apply a IPv6 Segment Routing action on a packet with an IPv6 Segment
+ *     Routing Header.
+ *     @action:
+ *              - End.X: SEG6_LOCAL_ACTION_END_X
+ *                                           (type of param: struct in6_addr)
+ *              - End.T: SEG6_LOCAL_ACTION_END_T
+ *                                           (type of param: int)
+ *              - End.B6: SEG6_LOCAL_ACTION_END_B6
+ *                                           (type of param: struct ipv6_sr_hdr)
+ *              - End.B6.Encap: SEG6_LOCAL_ACTION_END_B6_ENCAP
+ *                                           (type of param: struct ipv6_sr_hdr)
+ *     @param: pointer to the parameter required by the action
+ *     @param_len: length of param in bytes
+ *     Return: 0 on success or negative error
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -841,7 +886,11 @@ union bpf_attr {
 	FN(msg_cork_bytes),		\
 	FN(msg_pull_data),		\
 	FN(bind),			\
-	FN(xdp_adjust_tail),
+	FN(xdp_adjust_tail),		\
+	FN(lwt_push_encap),		\
+	FN(lwt_seg6_store_bytes),	\
+	FN(lwt_seg6_adjust_srh),	\
+	FN(lwt_seg6_action),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -899,6 +948,12 @@ enum bpf_adj_room_mode {
 	BPF_ADJ_ROOM_NET,
 };
 
+/* Encapsulation type for BPF_FUNC_lwt_push_encap helper. */
+enum bpf_lwt_encap_mode {
+	BPF_LWT_ENCAP_SEG6,
+	BPF_LWT_ENCAP_SEG6_INLINE
+};
+
 /* user accessible mirror of in-kernel sk_buff.
  * new fields can only be added to the end of this structure
  */
diff --git a/net/core/filter.c b/net/core/filter.c
index e25bc4a3aa1a..8e67c423db35 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -58,6 +58,10 @@
 #include <net/busy_poll.h>
 #include <net/tcp.h>
 #include <linux/bpf_trace.h>
+#include <net/ipv6.h>
+#include <linux/seg6_local.h>
+#include <net/seg6.h>
+#include <net/seg6_local.h>
 
 /**
  *	sk_filter_trim_cap - run a packet through a socket filter
@@ -3077,28 +3081,6 @@ static const struct bpf_func_proto bpf_xdp_redirect_map_proto = {
 	.arg3_type      = ARG_ANYTHING,
 };
 
-bool bpf_helper_changes_pkt_data(void *func)
-{
-	if (func == bpf_skb_vlan_push ||
-	    func == bpf_skb_vlan_pop ||
-	    func == bpf_skb_store_bytes ||
-	    func == bpf_skb_change_proto ||
-	    func == bpf_skb_change_head ||
-	    func == bpf_skb_change_tail ||
-	    func == bpf_skb_adjust_room ||
-	    func == bpf_skb_pull_data ||
-	    func == bpf_clone_redirect ||
-	    func == bpf_l3_csum_replace ||
-	    func == bpf_l4_csum_replace ||
-	    func == bpf_xdp_adjust_head ||
-	    func == bpf_xdp_adjust_meta ||
-	    func == bpf_msg_pull_data ||
-	    func == bpf_xdp_adjust_tail)
-		return true;
-
-	return false;
-}
-
 static unsigned long bpf_skb_copy(void *dst_buff, const void *skb,
 				  unsigned long off, unsigned long len)
 {
@@ -3743,6 +3725,246 @@ static const struct bpf_func_proto bpf_bind_proto = {
 	.arg3_type	= ARG_CONST_SIZE,
 };
 
+int bpf_push_seg6_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len)
+{
+	int err;
+	struct ipv6_sr_hdr *srh = (struct ipv6_sr_hdr *)hdr;
+
+	if (!seg6_validate_srh(srh, len))
+		return -EINVAL;
+
+	switch (type) {
+	case BPF_LWT_ENCAP_SEG6_INLINE:
+		if (skb->protocol != htons(ETH_P_IPV6))
+			return -EBADMSG;
+
+		err = seg6_do_srh_inline(skb, srh);
+		break;
+	case BPF_LWT_ENCAP_SEG6:
+		skb_reset_inner_headers(skb);
+		skb->encapsulation = 1;
+		err = seg6_do_srh_encap(skb, srh, IPPROTO_IPV6);
+		break;
+	default:
+		return -EINVAL;
+	}
+	if (err)
+		return err;
+
+	ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
+	skb_set_transport_header(skb, sizeof(struct ipv6hdr));
+
+	return seg6_lookup_nexthop(skb, NULL, 0);
+}
+
+BPF_CALL_4(bpf_lwt_push_encap, struct sk_buff *, skb, u32, type, void *, hdr,
+	   u32, len)
+{
+	switch (type) {
+	case BPF_LWT_ENCAP_SEG6:
+	case BPF_LWT_ENCAP_SEG6_INLINE:
+		return bpf_push_seg6_encap(skb, type, hdr, len);
+	default:
+		return -EINVAL;
+	}
+}
+
+static const struct bpf_func_proto bpf_lwt_push_encap_proto = {
+	.func		= bpf_lwt_push_encap,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_ANYTHING,
+	.arg3_type	= ARG_PTR_TO_MEM,
+	.arg4_type	= ARG_CONST_SIZE
+};
+
+DEFINE_PER_CPU(struct seg6_bpf_srh_state, seg6_bpf_srh_states);
+
+BPF_CALL_4(bpf_lwt_seg6_store_bytes, struct sk_buff *, skb, u32, offset,
+	   const void *, from, u32, len)
+{
+	struct seg6_bpf_srh_state *srh_state =
+		this_cpu_ptr(&seg6_bpf_srh_states);
+	void *srh_tlvs, *srh_end, *ptr;
+	struct ipv6_sr_hdr *srh;
+	int srhoff = 0;
+
+	if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
+		return -EINVAL;
+
+	srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
+	srh_tlvs = (void *)((char *)srh + ((srh->first_segment + 1) << 4));
+	srh_end = (void *)((char *)srh + sizeof(*srh) + srh_state->hdrlen);
+
+	ptr = skb->data + offset;
+	if (ptr >= srh_tlvs && ptr + len <= srh_end)
+		srh_state->valid = 0;
+	else if (ptr < (void *)&srh->flags ||
+		 ptr + len > (void *)&srh->segments)
+		return -EFAULT;
+
+	if (unlikely(bpf_try_make_writable(skb, offset + len)))
+		return -EFAULT;
+
+	memcpy(ptr, from, len);
+	return 0;
+}
+
+static const struct bpf_func_proto bpf_lwt_seg6_store_bytes_proto = {
+	.func		= bpf_lwt_seg6_store_bytes,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_ANYTHING,
+	.arg3_type	= ARG_PTR_TO_MEM,
+	.arg4_type	= ARG_CONST_SIZE
+};
+
+BPF_CALL_4(bpf_lwt_seg6_action, struct sk_buff *, skb,
+	   u32, action, void *, param, u32, param_len)
+{
+	struct seg6_bpf_srh_state *srh_state =
+		this_cpu_ptr(&seg6_bpf_srh_states);
+	struct ipv6_sr_hdr *srh;
+	int srhoff = 0;
+	int err;
+
+	if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
+		return -EINVAL;
+	srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
+
+	if (!srh_state->valid) {
+		if (unlikely((srh_state->hdrlen & 7) != 0))
+			return -EBADMSG;
+
+		srh->hdrlen = (u8)(srh_state->hdrlen >> 3);
+		if (unlikely(!seg6_validate_srh(srh, (srh->hdrlen + 1) << 3)))
+			return -EBADMSG;
+
+		srh_state->valid = 1;
+	}
+
+	switch (action) {
+	case SEG6_LOCAL_ACTION_END_X:
+		if (param_len != sizeof(struct in6_addr))
+			return -EINVAL;
+		return seg6_lookup_nexthop(skb, (struct in6_addr *)param, 0);
+	case SEG6_LOCAL_ACTION_END_T:
+		if (param_len != sizeof(int))
+			return -EINVAL;
+		return seg6_lookup_nexthop(skb, NULL, *(int *)param);
+	case SEG6_LOCAL_ACTION_END_B6:
+		err = bpf_push_seg6_encap(skb, BPF_LWT_ENCAP_SEG6_INLINE,
+					  param, param_len);
+		if (!err)
+			srh_state->hdrlen =
+				((struct ipv6_sr_hdr *)param)->hdrlen << 3;
+		return err;
+	case SEG6_LOCAL_ACTION_END_B6_ENCAP:
+		err = bpf_push_seg6_encap(skb, BPF_LWT_ENCAP_SEG6,
+					  param, param_len);
+		if (!err)
+			srh_state->hdrlen =
+				((struct ipv6_sr_hdr *)param)->hdrlen << 3;
+		return err;
+	default:
+		return -EINVAL;
+	}
+}
+
+static const struct bpf_func_proto bpf_lwt_seg6_action_proto = {
+	.func		= bpf_lwt_seg6_action,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_ANYTHING,
+	.arg3_type	= ARG_PTR_TO_MEM,
+	.arg4_type	= ARG_CONST_SIZE
+};
+
+BPF_CALL_3(bpf_lwt_seg6_adjust_srh, struct sk_buff *, skb, u32, offset,
+	   s32, len)
+{
+	struct seg6_bpf_srh_state *srh_state =
+		this_cpu_ptr(&seg6_bpf_srh_states);
+	void *srh_end, *srh_tlvs, *ptr;
+	struct ipv6_sr_hdr *srh;
+	struct ipv6hdr *hdr;
+	int srhoff = 0;
+	int ret;
+
+	if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
+		return -EINVAL;
+	srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
+
+	srh_tlvs = (void *)((unsigned char *)srh + sizeof(*srh) +
+			((srh->first_segment + 1) << 4));
+	srh_end = (void *)((unsigned char *)srh + sizeof(*srh) +
+			srh_state->hdrlen);
+	ptr = skb->data + offset;
+
+	if (unlikely(ptr < srh_tlvs || ptr > srh_end))
+		return -EFAULT;
+	if (unlikely(len < 0 && (void *)((char *)ptr - len) > srh_end))
+		return -EFAULT;
+
+	if (len > 0) {
+		ret = skb_cow_head(skb, len);
+		if (unlikely(ret < 0))
+			return ret;
+
+		ret = bpf_skb_net_hdr_push(skb, offset, len);
+	} else {
+		ret = bpf_skb_net_hdr_pop(skb, offset, -1 * len);
+	}
+	if (unlikely(ret < 0))
+		return ret;
+
+	hdr = (struct ipv6hdr *)skb->data;
+	hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
+
+	bpf_compute_data_pointers(skb);
+	srh_state->hdrlen += len;
+	srh_state->valid = 0;
+	return 0;
+}
+
+static const struct bpf_func_proto bpf_lwt_seg6_adjust_srh_proto = {
+	.func		= bpf_lwt_seg6_adjust_srh,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_ANYTHING,
+	.arg3_type	= ARG_ANYTHING,
+};
+
+bool bpf_helper_changes_pkt_data(void *func)
+{
+	if (func == bpf_skb_vlan_push ||
+	    func == bpf_skb_vlan_pop ||
+	    func == bpf_skb_store_bytes ||
+	    func == bpf_skb_change_proto ||
+	    func == bpf_skb_change_head ||
+	    func == bpf_skb_change_tail ||
+	    func == bpf_skb_adjust_room ||
+	    func == bpf_skb_pull_data ||
+	    func == bpf_clone_redirect ||
+	    func == bpf_l3_csum_replace ||
+	    func == bpf_l4_csum_replace ||
+	    func == bpf_xdp_adjust_head ||
+	    func == bpf_xdp_adjust_meta ||
+	    func == bpf_msg_pull_data ||
+	    func == bpf_xdp_adjust_tail ||
+	    func == bpf_lwt_push_encap ||
+	    func == bpf_lwt_seg6_store_bytes ||
+	    func == bpf_lwt_seg6_adjust_srh
+	    )
+		return true;
+
+	return false;
+}
+
 static const struct bpf_func_proto *
 bpf_base_func_proto(enum bpf_func_id func_id)
 {
@@ -4139,7 +4361,6 @@ static bool lwt_is_valid_access(int off, int size,
 	return bpf_skb_is_valid_access(off, size, type, prog, info);
 }
 
-
 /* Attach type specific accesses */
 static bool __sock_filter_check_attach_type(int off,
 					    enum bpf_access_type access_type,
-- 
2.16.1

^ permalink raw reply related

* [PATCH net-next v2 1/5] ipv6: sr: export function lookup_nexthop
From: Mathieu Xhonneux @ 2018-04-24 17:44 UTC (permalink / raw)
  To: netdev; +Cc: dlebrun, alexei.starovoitov
In-Reply-To: <cover.1524591163.git.m.xhonneux@gmail.com>

The function lookup_nexthop is essential to implement most of the seg6local
actions. As we want to provide a BPF helper allowing to apply some of these
actions on the packet being processed, the helper should be able to call
this function, hence the need to make it public.

Moreover, if one argument is incorrect or if the next hop can not be found,
an error should be returned by the BPF helper so the BPF program can adapt
its processing of the packet (return an error, properly force the drop,
...). This patch hence makes this function return dst->error to indicate a
possible error.

Signed-off-by: Mathieu Xhonneux <m.xhonneux@gmail.com>
Acked-by: David Lebrun <dlebrun@google.com>
---
 include/net/seg6.h       |  3 ++-
 include/net/seg6_local.h | 24 ++++++++++++++++++++++++
 net/ipv6/seg6_local.c    | 20 +++++++++++---------
 3 files changed, 37 insertions(+), 10 deletions(-)
 create mode 100644 include/net/seg6_local.h

diff --git a/include/net/seg6.h b/include/net/seg6.h
index 099bad59dc90..f450bc37d196 100644
--- a/include/net/seg6.h
+++ b/include/net/seg6.h
@@ -63,5 +63,6 @@ extern bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len);
 extern int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh,
 			     int proto);
 extern int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh);
-
+extern int seg6_lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
+			       u32 tbl_id);
 #endif
diff --git a/include/net/seg6_local.h b/include/net/seg6_local.h
new file mode 100644
index 000000000000..57498b23085d
--- /dev/null
+++ b/include/net/seg6_local.h
@@ -0,0 +1,24 @@
+/*
+ *  SR-IPv6 implementation
+ *
+ *  Authors:
+ *  David Lebrun <david.lebrun@uclouvain.be>
+ *  eBPF support: Mathieu Xhonneux <m.xhonneux@gmail.com>
+ *
+ *
+ *  This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _NET_SEG6_LOCAL_H
+#define _NET_SEG6_LOCAL_H
+
+#include <linux/net.h>
+#include <linux/ipv6.h>
+
+extern int seg6_lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
+			       u32 tbl_id);
+
+#endif
diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c
index 45722327375a..e9b23fb924ad 100644
--- a/net/ipv6/seg6_local.c
+++ b/net/ipv6/seg6_local.c
@@ -30,6 +30,7 @@
 #ifdef CONFIG_IPV6_SEG6_HMAC
 #include <net/seg6_hmac.h>
 #endif
+#include <net/seg6_local.h>
 #include <linux/etherdevice.h>
 
 struct seg6_local_lwt;
@@ -140,8 +141,8 @@ static void advance_nextseg(struct ipv6_sr_hdr *srh, struct in6_addr *daddr)
 	*daddr = *addr;
 }
 
-static void lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
-			   u32 tbl_id)
+int seg6_lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
+			u32 tbl_id)
 {
 	struct net *net = dev_net(skb->dev);
 	struct ipv6hdr *hdr = ipv6_hdr(skb);
@@ -187,6 +188,7 @@ static void lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
 
 	skb_dst_drop(skb);
 	skb_dst_set(skb, dst);
+	return dst->error;
 }
 
 /* regular endpoint function */
@@ -200,7 +202,7 @@ static int input_action_end(struct sk_buff *skb, struct seg6_local_lwt *slwt)
 
 	advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
 
-	lookup_nexthop(skb, NULL, 0);
+	seg6_lookup_nexthop(skb, NULL, 0);
 
 	return dst_input(skb);
 
@@ -220,7 +222,7 @@ static int input_action_end_x(struct sk_buff *skb, struct seg6_local_lwt *slwt)
 
 	advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
 
-	lookup_nexthop(skb, &slwt->nh6, 0);
+	seg6_lookup_nexthop(skb, &slwt->nh6, 0);
 
 	return dst_input(skb);
 
@@ -239,7 +241,7 @@ static int input_action_end_t(struct sk_buff *skb, struct seg6_local_lwt *slwt)
 
 	advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
 
-	lookup_nexthop(skb, NULL, slwt->table);
+	seg6_lookup_nexthop(skb, NULL, slwt->table);
 
 	return dst_input(skb);
 
@@ -331,7 +333,7 @@ static int input_action_end_dx6(struct sk_buff *skb,
 	if (!ipv6_addr_any(&slwt->nh6))
 		nhaddr = &slwt->nh6;
 
-	lookup_nexthop(skb, nhaddr, 0);
+	seg6_lookup_nexthop(skb, nhaddr, 0);
 
 	return dst_input(skb);
 drop:
@@ -380,7 +382,7 @@ static int input_action_end_dt6(struct sk_buff *skb,
 	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
 		goto drop;
 
-	lookup_nexthop(skb, NULL, slwt->table);
+	seg6_lookup_nexthop(skb, NULL, slwt->table);
 
 	return dst_input(skb);
 
@@ -406,7 +408,7 @@ static int input_action_end_b6(struct sk_buff *skb, struct seg6_local_lwt *slwt)
 	ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
 	skb_set_transport_header(skb, sizeof(struct ipv6hdr));
 
-	lookup_nexthop(skb, NULL, 0);
+	seg6_lookup_nexthop(skb, NULL, 0);
 
 	return dst_input(skb);
 
@@ -438,7 +440,7 @@ static int input_action_end_b6_encap(struct sk_buff *skb,
 	ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
 	skb_set_transport_header(skb, sizeof(struct ipv6hdr));
 
-	lookup_nexthop(skb, NULL, 0);
+	seg6_lookup_nexthop(skb, NULL, 0);
 
 	return dst_input(skb);
 
-- 
2.16.1

^ permalink raw reply related

* [PATCH net-next v2 0/5] ipv6: sr: introduce seg6local End.BPF action
From: Mathieu Xhonneux @ 2018-04-24 17:44 UTC (permalink / raw)
  To: netdev; +Cc: dlebrun, alexei.starovoitov

As of Linux 4.14, it is possible to define advanced local processing for
IPv6 packets with a Segment Routing Header through the seg6local LWT
infrastructure. This LWT implements the network programming principles
defined in the IETF “SRv6 Network Programming” draft.

The implemented operations are generic, and it would be very interesting to
be able to implement user-specific seg6local actions, without having to
modify the kernel directly. To do so, this patchset adds an End.BPF action
to seg6local, powered by some specific Segment Routing-related helpers,
which provide SR functionalities that can be applied on the packet. This
BPF hook would then allow to implement specific actions at native kernel
speed such as OAM features, advanced SR SDN policies, SRv6 actions like
Segment Routing Header (SRH) encapsulation depending on the content of
the packet, etc ... 

This patchset is divided in 5 patches, whose main features are :

- A new seg6local action End.BPF with the corresponding new BPF program
  type BPF_PROG_TYPE_LWT_SEG6LOCAL. Such attached BPF program can be
  passed to the LWT seg6local through netlink, the same way as the LWT
  BPF hook operates.
- 3 new BPF helpers for the seg6local BPF hook, allowing to edit/grow/
  shrink a SRH and apply on a packet some of the generic SRv6 actions.
- 1 new BPF helper for the LWT BPF IN hook, allowing to add a SRH through
  encapsulation (via IPv6 encapsulation or inlining if the packet contains
  already an IPv6 header).

As this patchset adds a new LWT BPF hook, I took into account the result of
the discussions when the LWT BPF infrastructure got merged. Hence, the
seg6local BPF hook doesn’t allow write access to skb->data directly, only
the SRH can be modified through specific helpers, which ensures that the
integrity of the packet is maintained.
More details are available in the related patches messages.

The performances of this BPF hook have been assessed with the BPF JIT
enabled on a Intel Xeon X3440 processors with 4 cores and 8 threads
clocked at 2.53 GHz. No throughput losses are noted with the seg6local
BPF hook when the BPF program does nothing (440kpps). Adding a 8-bytes
TLV (1 call each to bpf_lwt_seg6_adjust_srh and bpf_lwt_seg6_store_bytes)
drops the throughput to 410kpps, and inlining a SRH via
bpf_lwt_seg6_action drops the throughput to 420kpps.
All throughputs are stable.

-------
v2: move the SRH integrity state from skb->cb to a per-cpu buffer

Thanks.


Mathieu Xhonneux (5):
  ipv6: sr: export function lookup_nexthop
  bpf: Add IPv6 Segment Routing helpers
  bpf: Split lwt inout verifier structures
  ipv6: sr: Add seg6local action End.BPF
  selftests/bpf: test for seg6local End.BPF action

 include/linux/bpf_types.h                         |   4 +-
 include/net/seg6.h                                |   3 +-
 include/net/seg6_local.h                          |  32 ++
 include/uapi/linux/bpf.h                          |  58 ++-
 include/uapi/linux/seg6_local.h                   |   3 +
 kernel/bpf/verifier.c                             |   1 +
 net/core/filter.c                                 | 375 +++++++++++++++---
 net/ipv6/seg6_local.c                             | 176 ++++++++-
 tools/include/uapi/linux/bpf.h                    |  59 ++-
 tools/testing/selftests/bpf/Makefile              |   6 +-
 tools/testing/selftests/bpf/bpf_helpers.h         |  13 +-
 tools/testing/selftests/bpf/test_lwt_seg6local.c  | 438 ++++++++++++++++++++++
 tools/testing/selftests/bpf/test_lwt_seg6local.sh | 140 +++++++
 13 files changed, 1234 insertions(+), 74 deletions(-)
 create mode 100644 include/net/seg6_local.h
 create mode 100644 tools/testing/selftests/bpf/test_lwt_seg6local.c
 create mode 100755 tools/testing/selftests/bpf/test_lwt_seg6local.sh

-- 
2.16.1

^ permalink raw reply

* Re: [Cake] [PATCH net-next v2] Add Common Applications Kept Enhanced (cake) qdisc
From: Toke Høiland-Jørgensen @ 2018-04-24 15:41 UTC (permalink / raw)
  To: Georgios Amanakis; +Cc: Cake List, netdev
In-Reply-To: <CACvFP_hEv_cEPKFxz+7X5mvwbM+4oTOAxMcW52b51qYUpez1ow@mail.gmail.com>

Georgios Amanakis <gamanakis@gmail.com> writes:

> On Tue, 24 Apr 2018 13:44:06 +0200
> Toke Høiland-Jørgensen <toke@toke.dk> wrote:
>
>> +config NET_SCH_CAKE
>> +       tristate "Common Applications Kept Enhanced (CAKE)"
>> +       help
>> +         Say Y here if you want to use the Common Applications Kept Enhanced
>> +          (CAKE) queue management algorithm.
>> +
>> +         To compile this driver as a module, choose M here: the module
>> +         will be called sch_cake.
>
> In net/sched/Kconfig we should probably add NF_CONNTRACK as a dependency:
> "depends on NF_CONNTRACK"
>
> Otherwise if NET_SCH_CAKE=y and NF_CONNTRACK=m compilation fails with:
>
> net/sched/sch_cake.o: In function `cake_enqueue':
> sch_cake.c:(.text+0x3e10): undefined reference to `nf_ct_get_tuplepr'
> sch_cake.c:(.text+0x3e3a): undefined reference to `nf_conntrack_find_get'
> make: *** [Makefile:1041: vmlinux] Error 1

Hmm we really don't want to have a hard depend on conntrack. We
currently ifdef the conntrack-specific bits thus:
#if IS_ENABLED(CONFIG_NF_CONNTRACK).

Does anyone know if there is a way to do this so the module/builtin
split doesn't bite us?

-Toke

^ permalink raw reply

* Re: [PATCH  1/8] net: ethernet: stmmac: add adaptation for stm32mp157c.
From: Andrew Lunn @ 2018-04-24 15:39 UTC (permalink / raw)
  To: Christophe Roullier
  Cc: mark.rutland, mcoquelin.stm32, alexandre.torgue, peppe.cavallaro,
	devicetree, linux-arm-kernel, netdev
In-Reply-To: <1524582120-4451-2-git-send-email-christophe.roullier@st.com>

On Tue, Apr 24, 2018 at 05:01:53PM +0200, Christophe Roullier wrote:

> +	case PHY_INTERFACE_MODE_RGMII:
> +		val = SYSCFG_PMCR_ETH_SEL_RGMII;
> +		if (dwmac->int_phyclk)
> +			val |= SYSCFG_PMCR_ETH_CLK_SEL;
> +		pr_debug("SYSCFG init : PHY_INTERFACE_MODE_RGMII\n");
> +		break;

Hi Christophe

What about PHY_INTERFACE_MODE_RGMII_ID, PHY_INTERFACE_MODE_RGMII_RXID
and PHY_INTERFACE_MODE_RGMII_TXID.

    Andrew

^ permalink raw reply

* Re: [Cake] [PATCH iproute2-next v3] Add support for cake qdisc
From: Toke Høiland-Jørgensen @ 2018-04-24 15:38 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: netdev, cake
In-Reply-To: <20180424081008.2ad7eaa7@xeon-e3>

Stephen Hemminger <stephen@networkplumber.org> writes:

> On Tue, 24 Apr 2018 16:52:57 +0200
> Toke Høiland-Jørgensen <toke@toke.dk> wrote:
>
>> Well, this is leftover from keeping track of different versions of the
>> out-of-tree patch, and we already broke compatibility pretty thoroughly
>> as a preparation for upstreaming. So I'm fine with dropping the version
>> check; will resend.
>> 
>> That being said, the versioning comes from the XSTATS API, which does
>> not use netlink attributes for its members, but rather passes through as
>> struct. So what is one supposed to do in this case?
>
> If a structure is likely to change, then it should be decomposed into nested
> netlink attributes. Once you send a structure over userspace API in netlink
> it is fixed forever.

Right. Is decomposing stats into netlink attributes actually possible
within the qdisc dump_stats callback? Could we do something like:

nla_start_nest(d->skb, TCA_CAKE_STATS);

from within cake_dump_stats() and read it in cake_print_xstats in tc?

-Toke

^ permalink raw reply

* Attention Please,
From: Mrs Riet Zongo @ 2018-04-24 15:30 UTC (permalink / raw)

In-Reply-To: <756271649.323858.1524583837122.ref@mail.yahoo.com>

Attention Please, 

Dear Friend, ac

My name is Mrs Ariet zongo. I am working with Bank of Africa here in Burkina Faso. Here in this bank existed dormant account for many years, which belong to one of our late foreign customer. 

When I discovered that there had been neither deposits nor withdrawals from this account for this long period, I decided to carry out a system investigation and discovered that none of the family member or relations of the late person are aware of this account, which means nobody will come to take it. The amount in this account stands at $5, 000 000 (Five Million USA Dollars). 

I want a foreign account where the bank will transfer this fund. I will front you in the bank as the Next of Kin to the late customer and back you up with relevant information. What the bank need is proof and information about the late customer which I will assist you on. This is a genuine, risk free and legal business transaction. 

All details shall be sent to you once I hear from you. The information as contained herein be accorded the necessary attention, urgency as well as the secrecy it deserves. If you are really sure of your integrity, trustworthy and confidentiality, reply back to me urgently. 


1. Full name:......... 
2. Current Address:......... 
3. Telephone N°:........... 
4. Occupation:............. 
5. Age:............ 
6. Country:........ 
7. Copy of International Passport Or ID card........... 

Waiting to hear from you soon. 
Regards 
Mrs Ariet zongo

^ permalink raw reply

* Re: [Cake] [PATCH net-next v2] Add Common Applications Kept Enhanced (cake) qdisc
From: Georgios Amanakis @ 2018-04-24 15:32 UTC (permalink / raw)
  To: Toke Høiland-Jørgensen; +Cc: Cake List, netdev
In-Reply-To: <20180424081406.5127f1bc@xeon-e3>

On Tue, 24 Apr 2018 13:44:06 +0200
Toke Høiland-Jørgensen <toke@toke.dk> wrote:

> +config NET_SCH_CAKE
> +       tristate "Common Applications Kept Enhanced (CAKE)"
> +       help
> +         Say Y here if you want to use the Common Applications Kept Enhanced
> +          (CAKE) queue management algorithm.
> +
> +         To compile this driver as a module, choose M here: the module
> +         will be called sch_cake.

In net/sched/Kconfig we should probably add NF_CONNTRACK as a dependency:
"depends on NF_CONNTRACK"

Otherwise if NET_SCH_CAKE=y and NF_CONNTRACK=m compilation fails with:

net/sched/sch_cake.o: In function `cake_enqueue':
sch_cake.c:(.text+0x3e10): undefined reference to `nf_ct_get_tuplepr'
sch_cake.c:(.text+0x3e3a): undefined reference to `nf_conntrack_find_get'
make: *** [Makefile:1041: vmlinux] Error 1

^ permalink raw reply

* Re: [PATCH] kvmalloc: always use vmalloc if CONFIG_DEBUG_VM
From: Mikulas Patocka @ 2018-04-24 15:30 UTC (permalink / raw)
  To: Michal Hocko
  Cc: dm-devel, eric.dumazet, mst, netdev, linux-kernel, Matthew Wilcox,
	virtualization, linux-mm, edumazet, Andrew Morton, David Miller,
	Vlastimil Babka
In-Reply-To: <20180424133146.GG17484@dhcp22.suse.cz>



On Tue, 24 Apr 2018, Michal Hocko wrote:

> On Mon 23-04-18 20:25:15, Mikulas Patocka wrote:
> 
> > Fixing __vmalloc code 
> > is easy and it doesn't require cooperation with maintainers.
> 
> But it is a hack against the intention of the scope api.

It is not! You can fix __vmalloc now and you can convert the kernel to the 
scope API in 4 years. It's not one way or the other.

> It also alows maintainers to not care about their broken code.

Most maintainers don't even know that it's broken. Out of 14 subsystems 
using __vmalloc with GFP_NOIO/NOFS, only 2 realized that its 
implementation is broken and implemented a workaround (me and the XFS 
developers).

Misimplementing a function in a subtle and hard-to-notice way won't drive 
developers away from using it.

> > > > He refuses 15-line patch to fix GFP_NOIO bug because he believes that in 4 
> > > > years, the kernel will be refactored and GFP_NOIO will be eliminated. Why 
> > > > does he have veto over this part of the code? I'd much rather argue with 
> > > > people who have constructive comments about fixing bugs than with him.
> > > 
> > > I didn't NACK the patch AFAIR. I've said it is not a good idea longterm.
> > > I would be much more willing to change my mind if you would back your
> > > patch by a real bug report. Hacks are acceptable when we have a real
> > > issue in hands. But if we want to fix potential issue then better make
> > > it properly.
> > 
> > Developers should fix bugs in advance, not to wait until a crash hapens, 
> > is analyzed and reported.
> 
> I agree. But are those existing users broken in the first place? I have
> seen so many GFP_NOFS abuses that I would dare to guess that most of
> those vmalloc NOFS abusers can be simply turned into GFP_KERNEL. Maybe
> that is the reason we haven't heard any complains in years.

alloc_pages reclaims clean pages and most hard work is done by kswapd, so 
GFP_KERNEL doesn't cause much issues with writeback. But cheating isn't 
justified if you can get away with it. Incorrect GFP flags cause real 
problems with shrinkers - because shrinkers are called from alloc_pages 
and they do respond to GFP flags.

I had reported deadlock due to GFP issues (9d28eb12447). And the worst 
thing about these bug reports is that they are totally unreproducible and 
I get nothing, but a stacktrace in bugzilla. I had to guess what happened 
and I couldn't even test if the patch fixed the bug.

I'm not really happy that you are deliberately leaving these issues behind 
and making excuses.

Mikulas

^ permalink raw reply

* Re: simplify procfs code for seq_file instances
From: Andrew Morton @ 2018-04-24 15:19 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: linux-rtc, Alessandro Zummo, Alexandre Belloni, devel,
	linux-kernel, linux-scsi, Corey Minyard, linux-ide,
	Greg Kroah-Hartman, jfs-discussion, linux-afs, linux-acpi, netdev,
	netfilter-devel, Alexander Viro, Jiri Slaby, linux-ext4,
	Alexey Dobriyan, megaraidlinux.pdl, drbd-dev
In-Reply-To: <20180424142304.GE26136@lst.de>

On Tue, 24 Apr 2018 16:23:04 +0200 Christoph Hellwig <hch@lst.de> wrote:

> On Thu, Apr 19, 2018 at 09:57:50PM +0300, Alexey Dobriyan wrote:
> > >     git://git.infradead.org/users/hch/misc.git proc_create
> > 
> > 
> > I want to ask if it is time to start using poorman function overloading
> > with _b_c_e(). There are millions of allocation functions for example,
> > all slightly difference, and people will add more. Seeing /proc interfaces
> > doubled like this is painful.
> 
> Function overloading is totally unacceptable.
> 
> And I very much disagree with a tradeoff that keeps 5000 lines of 
> code vs a few new helpers.

OK, the curiosity and suspense are killing me.  What the heck is
"function overloading with _b_c_e()"?

^ permalink raw reply

* Re: [Cake] [PATCH net-next v2] Add Common Applications Kept Enhanced (cake) qdisc
From: Stephen Hemminger @ 2018-04-24 15:14 UTC (permalink / raw)
  To: Toke Høiland-Jørgensen; +Cc: netdev, cake
In-Reply-To: <20180424114407.5939-1-toke@toke.dk>

On Tue, 24 Apr 2018 13:44:06 +0200
Toke Høiland-Jørgensen <toke@toke.dk> wrote:

> +static const u8 precedence[] = {0, 0, 0, 0, 0, 0, 0, 0,
> +				1, 1, 1, 1, 1, 1, 1, 1,
> +				2, 2, 2, 2, 2, 2, 2, 2,
> +				3, 3, 3, 3, 3, 3, 3, 3,
> +				4, 4, 4, 4, 4, 4, 4, 4,
> +				5, 5, 5, 5, 5, 5, 5, 5,
> +				6, 6, 6, 6, 6, 6, 6, 6,
> +				7, 7, 7, 7, 7, 7, 7, 7,
> +				};

Minor nit.  The kernel style for initializing array should be used
here.

static const u8 precedence[] = {
	0, 0, 0, 0, 0, 0, 0, 0,
	1, 1, 1, 1, 1, 1, 1, 1,
...
};

^ permalink raw reply

* Re: [PATCH net-next] Revert "net: init sk_cookie for inet socket"
From: Yafang Shao @ 2018-04-24 15:12 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: David Miller, netdev
In-Reply-To: <29dbd735-5fd7-d4e4-288f-220f2075b645@gmail.com>

On Tue, Apr 24, 2018 at 8:38 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
>
>
> On 04/24/2018 05:05 AM, Yafang Shao wrote:
>> This revert commit <c6849a3ac17e> ("net: init sk_cookie for inet socket")
>>
>> Per discussion with Eric.
>>
>
> I suggest you include a bit more details, about cache line false sharing.
>

Coud we adjust the struct common to avoid such kind of cache line
false sharing ?
I mean removing "atomic64_t  skc_cookie;" from struct sock_common and
place it in struct inet_sock ?

Thanks
Yafang

^ permalink raw reply

* Re: [Cake] [PATCH net-next v2] Add Common Applications Kept Enhanced (cake) qdisc
From: Stephen Hemminger @ 2018-04-24 15:11 UTC (permalink / raw)
  To: Toke Høiland-Jørgensen; +Cc: netdev, cake
In-Reply-To: <20180424114407.5939-1-toke@toke.dk>

On Tue, 24 Apr 2018 13:44:06 +0200
Toke Høiland-Jørgensen <toke@toke.dk> wrote:

> +struct tc_cake_xstats {
> +	__u16 version;
> +	__u16 tin_stats_size; /* == sizeof(struct tc_cake_tin_stats) */
> +	__u32 capacity_estimate;
> +	__u32 memory_limit;
> +	__u32 memory_used;
> +	__u8  tin_cnt;
> +	__u8  avg_trnoff;
> +	__u16 max_netlen;
> +	__u16 max_adjlen;
> +	__u16 min_netlen;
> +	__u16 min_adjlen;
> +
> +	__u16 spare1;
> +	__u32 spare2;
> +
> +	struct tc_cake_tin_stats tin_stats[0]; /* keep last */
> +};

No versioning allowed in userspace API. You need to drop version and make
it permanent.

^ permalink raw reply

* Re: [PATCH v2 net-next] Revert "net: init sk_cookie for inet socket"
From: Eric Dumazet @ 2018-04-24 15:10 UTC (permalink / raw)
  To: Yafang Shao, eric.dumazet, davem; +Cc: netdev
In-Reply-To: <1524582465-11055-1-git-send-email-laoar.shao@gmail.com>



On 04/24/2018 08:07 AM, Yafang Shao wrote:
> This reverts commit <c6849a3ac17e> ("net: init sk_cookie for inet socket")
> 
> Per discussion with Eric, when update sock_net(sk)->cookie_gen, the
> whole cache cache line will be invalidated, as this cache line is shared
> with all cpus, that may cause great performace hit.
> 
> Bellow is the data form Eric.
> "Performance is reduced from ~5 Mpps to ~3.8 Mpps with 16 RX queues on
> my host" when running synflood test.
> 
> Have to revert it to prevent from cache line false sharing.
> 
> Signed-off-by: Yafang Shao <laoar.shao@gmail.com>

Reviewed-by: Eric Dumazet <edumazet@google.com>

Thanks !

^ permalink raw reply

* Re: [PATCH v2 net-next] Revert "net: init sk_cookie for inet socket"
From: David Miller @ 2018-04-24 15:10 UTC (permalink / raw)
  To: laoar.shao; +Cc: eric.dumazet, netdev
In-Reply-To: <1524582465-11055-1-git-send-email-laoar.shao@gmail.com>

From: Yafang Shao <laoar.shao@gmail.com>
Date: Tue, 24 Apr 2018 23:07:45 +0800

> This reverts commit <c6849a3ac17e> ("net: init sk_cookie for inet socket")
> 
> Per discussion with Eric, when update sock_net(sk)->cookie_gen, the
> whole cache cache line will be invalidated, as this cache line is shared
> with all cpus, that may cause great performace hit.
> 
> Bellow is the data form Eric.
> "Performance is reduced from ~5 Mpps to ~3.8 Mpps with 16 RX queues on
> my host" when running synflood test.
> 
> Have to revert it to prevent from cache line false sharing.
> 
> Signed-off-by: Yafang Shao <laoar.shao@gmail.com>

Applied, thank you.

^ permalink raw reply

* Re: [Cake] [PATCH iproute2-next v3] Add support for cake qdisc
From: Stephen Hemminger @ 2018-04-24 15:10 UTC (permalink / raw)
  To: Toke Høiland-Jørgensen; +Cc: netdev, cake
In-Reply-To: <87efj47j1y.fsf@toke.dk>

On Tue, 24 Apr 2018 16:52:57 +0200
Toke Høiland-Jørgensen <toke@toke.dk> wrote:

> Well, this is leftover from keeping track of different versions of the
> out-of-tree patch, and we already broke compatibility pretty thoroughly
> as a preparation for upstreaming. So I'm fine with dropping the version
> check; will resend.
> 
> That being said, the versioning comes from the XSTATS API, which does
> not use netlink attributes for its members, but rather passes through as
> struct. So what is one supposed to do in this case?

If a structure is likely to change, then it should be decomposed into nested
netlink attributes. Once you send a structure over userspace API in netlink
it is fixed forever.

^ permalink raw reply

* Re: [PATCH] l2tp: fix l2tp_eth_dev_xmit()'s return type
From: Guillaume Nault @ 2018-04-24 15:09 UTC (permalink / raw)
  To: Luc Van Oostenryck
  Cc: linux-kernel, David S. Miller, James Chapman, Dominik Heidler,
	netdev
In-Reply-To: <20180424131855.5618-1-luc.vanoostenryck@gmail.com>

On Tue, Apr 24, 2018 at 03:18:53PM +0200, Luc Van Oostenryck wrote:
> The method ndo_start_xmit() is defined as returning an 'netdev_tx_t',
> which is a typedef for an enum type, but the implementation in this
> driver returns an 'int'.
> 
> Fix this by returning 'netdev_tx_t' in this driver too.
> 
> Signed-off-by: Luc Van Oostenryck <luc.vanoostenryck@gmail.com>
> ---
>  net/l2tp/l2tp_eth.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
> index 5c366ecfa..e027f8b54 100644
> --- a/net/l2tp/l2tp_eth.c
> +++ b/net/l2tp/l2tp_eth.c
> @@ -77,7 +77,7 @@ static void l2tp_eth_dev_uninit(struct net_device *dev)
>  	 */
>  }
>  
> -static int l2tp_eth_dev_xmit(struct sk_buff *skb, struct net_device *dev)
> +static netdev_tx_t l2tp_eth_dev_xmit(struct sk_buff *skb, struct net_device *dev)
> 
You could easily keep this line under the 80 columns limit by moving
the net_device parameter on the next line.

^ permalink raw reply

* Re: [PATCH v2 net] sfc: ARFS filter IDs
From: David Miller @ 2018-04-24 15:09 UTC (permalink / raw)
  To: ecree; +Cc: linux-net-drivers, netdev
In-Reply-To: <5b202ff8-e478-3e2d-1b61-7cc91d62d712@solarflare.com>

From: Edward Cree <ecree@solarflare.com>
Date: Tue, 24 Apr 2018 16:06:24 +0100

> On 24/04/18 15:14, Edward Cree wrote:
>> Associate an arbitrary ID with each ARFS filter, allowing to properly query
>>  for expiry.  The association is maintained in a hash table, which is
>>  protected by a spinlock.
>>
>> v2: fixed uninitialised variable (thanks davem and lkp-robot).
>>
>> Fixes: 3af0f34290f6 ("sfc: replace asynchronous filter operations")
>> Signed-off-by: Edward Cree <ecree@solarflare.com>
>> ---
> self-NAK, lkp-robot found another problem that's still present here.

I was just about to mention that, ok.

^ permalink raw reply

* Re: [PATCH net-next V3 0/3] Introduce adaptive TX interrupt moderation to net DIM
From: David Miller @ 2018-04-24 15:08 UTC (permalink / raw)
  To: andrew.gospodarek; +Cc: talgi, netdev, tariqt, saeedm, f.fainelli
In-Reply-To: <20180424150246.GA28159@C02RW35GFVH8.dhcp.broadcom.net>

From: Andy Gospodarek <andrew.gospodarek@broadcom.com>
Date: Tue, 24 Apr 2018 11:02:46 -0400

> There is also an advantage that since this is done a per queue basis one
> queue that may be handling a bulk transfer can have its coalescing
> parameters adjusted while others stay at a setting that keeps traffic
> flowing at low latency.  This is helpful when a system is receiving a
> large amount of traffic on one queue but also sending data on another
> queue and quick processing of acks keeps data flowing at high rate with
> low CPU utilization in both directions.

Ok, that's the missing piece on my end.  My original analysis of this
problem space was on uni-queue NICs, and the problem there is that the
sampling algorithm is exposed to the traffic anomalies of the entire
link rather than a specific sub-class of traffic as is the case with
multiqueue NICs.

^ permalink raw reply

* [PATCH v2 net-next] Revert "net: init sk_cookie for inet socket"
From: Yafang Shao @ 2018-04-24 15:07 UTC (permalink / raw)
  To: eric.dumazet, davem; +Cc: netdev, Yafang Shao

This reverts commit <c6849a3ac17e> ("net: init sk_cookie for inet socket")

Per discussion with Eric, when update sock_net(sk)->cookie_gen, the
whole cache cache line will be invalidated, as this cache line is shared
with all cpus, that may cause great performace hit.

Bellow is the data form Eric.
"Performance is reduced from ~5 Mpps to ~3.8 Mpps with 16 RX queues on
my host" when running synflood test.

Have to revert it to prevent from cache line false sharing.

Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
---
 include/linux/sock_diag.h | 9 ---------
 net/ipv4/tcp_input.c      | 8 +-------
 2 files changed, 1 insertion(+), 16 deletions(-)

diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h
index 5c916e6..15fe980 100644
--- a/include/linux/sock_diag.h
+++ b/include/linux/sock_diag.h
@@ -25,15 +25,6 @@ struct sock_diag_handler {
 void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh));
 void sock_diag_unregister_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh));
 
-static inline
-void sock_init_cookie(struct sock *sk)
-{
-	u64 res;
-
-	res = atomic64_inc_return(&sock_net(sk)->cookie_gen);
-	atomic64_set(&sk->sk_cookie, res);
-}
-
 u64 sock_gen_cookie(struct sock *sk);
 int sock_diag_check_cookie(struct sock *sk, const __u32 *cookie);
 void sock_diag_save_cookie(struct sock *sk, __u32 *cookie);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 17b7858..5a17cfc 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -78,7 +78,6 @@
 #include <linux/errqueue.h>
 #include <trace/events/tcp.h>
 #include <linux/static_key.h>
-#include <linux/sock_diag.h>
 
 int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
 
@@ -6191,15 +6190,10 @@ struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops,
 #if IS_ENABLED(CONFIG_IPV6)
 		ireq->pktopts = NULL;
 #endif
+		atomic64_set(&ireq->ir_cookie, 0);
 		ireq->ireq_state = TCP_NEW_SYN_RECV;
 		write_pnet(&ireq->ireq_net, sock_net(sk_listener));
 		ireq->ireq_family = sk_listener->sk_family;
-
-		BUILD_BUG_ON(offsetof(struct inet_request_sock, ir_cookie) !=
-					offsetof(struct sock, sk_cookie));
-		BUILD_BUG_ON(offsetof(struct inet_request_sock, ireq_net) !=
-					offsetof(struct sock, sk_net));
-		sock_init_cookie((struct sock *)ireq);
 	}
 
 	return req;
-- 
1.8.3.1

^ permalink raw reply related

* Re: [PATCH v2 net] sfc: ARFS filter IDs
From: Edward Cree @ 2018-04-24 15:06 UTC (permalink / raw)
  To: linux-net-drivers, David Miller; +Cc: netdev
In-Reply-To: <0829d4b1-45fd-d4d0-3883-0e59dc699743@solarflare.com>

On 24/04/18 15:14, Edward Cree wrote:
> Associate an arbitrary ID with each ARFS filter, allowing to properly query
>  for expiry.  The association is maintained in a hash table, which is
>  protected by a spinlock.
>
> v2: fixed uninitialised variable (thanks davem and lkp-robot).
>
> Fixes: 3af0f34290f6 ("sfc: replace asynchronous filter operations")
> Signed-off-by: Edward Cree <ecree@solarflare.com>
> ---
self-NAK, lkp-robot found another problem that's still present here.

^ permalink raw reply

* [PATCH  5/8] ARM: dts: stm32: Add ethernet dwmac on stm32mp1
From: Christophe Roullier @ 2018-04-24 15:01 UTC (permalink / raw)
  To: mark.rutland, mcoquelin.stm32, alexandre.torgue, peppe.cavallaro
  Cc: devicetree, linux-arm-kernel, netdev, christophe.roullier
In-Reply-To: <1524582120-4451-1-git-send-email-christophe.roullier@st.com>

Add Ethernet support (Synopsys MAC IP 4.20a) on stm32mp1 SOC.
Enable feature supported by the stmmac driver, such as TSO.

Signed-off-by: Christophe Roullier <christophe.roullier@st.com>
---
 arch/arm/boot/dts/stm32mp157c.dtsi | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/arch/arm/boot/dts/stm32mp157c.dtsi b/arch/arm/boot/dts/stm32mp157c.dtsi
index f98a0ae..4b40aa5 100644
--- a/arch/arm/boot/dts/stm32mp157c.dtsi
+++ b/arch/arm/boot/dts/stm32mp157c.dtsi
@@ -179,5 +179,35 @@
 			clocks = <&rcc USART1_K>;
 			status = "disabled";
 		};
+
+		stmmac_axi_config_0: stmmac-axi-config {
+			snps,wr_osr_lmt = <0x7>;
+			snps,rd_osr_lmt = <0x7>;
+			snps,blen = <0 0 0 0 16 8 4>;
+		};
+
+		ethernet0: ethernet@5800a000 {
+			compatible = "st,stm32mp1-dwmac", "snps,dwmac-4.20a";
+			reg = <0x5800a000 0x2000>;
+			reg-names = "stmmaceth";
+			interrupts-extended = <&intc GIC_SPI 61 IRQ_TYPE_NONE>;
+			interrupt-names = "macirq";
+			clock-names = "stmmaceth",
+				      "mac-clk-tx",
+				      "mac-clk-rx",
+				      "ethstp",
+				      "syscfg-clk";
+			clocks = <&rcc ETHMAC>,
+				 <&rcc ETHTX>,
+				 <&rcc ETHRX>,
+				 <&rcc ETHSTP>,
+				 <&rcc SYSCFG>;
+			st,syscon = <&syscfg 0x4>;
+			snps,mixed-burst;
+			snps,pbl = <2>;
+			snps,axi-config = <&stmmac_axi_config_0>;
+			snps,tso;
+			status = "disabled";
+		};
 	};
 };
-- 
1.9.1

^ permalink raw reply related

* [PATCH  7/8] ARM: dts: stm32: add support of ethernet on stm32mp157c-ev1
From: Christophe Roullier @ 2018-04-24 15:01 UTC (permalink / raw)
  To: mark.rutland, mcoquelin.stm32, alexandre.torgue, peppe.cavallaro
  Cc: devicetree, linux-arm-kernel, netdev, christophe.roullier
In-Reply-To: <1524582120-4451-1-git-send-email-christophe.roullier@st.com>

MAC is connected to a PHY in RGMII mode.

Signed-off-by: Christophe Roullier <christophe.roullier@st.com>
---
 arch/arm/boot/dts/stm32mp157c-ev1.dts | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/arch/arm/boot/dts/stm32mp157c-ev1.dts b/arch/arm/boot/dts/stm32mp157c-ev1.dts
index 57e6dbc..a7fee5c 100644
--- a/arch/arm/boot/dts/stm32mp157c-ev1.dts
+++ b/arch/arm/boot/dts/stm32mp157c-ev1.dts
@@ -17,5 +17,25 @@
 
 	aliases {
 		serial0 = &uart4;
+		ethernet0 = &ethernet0;
+	};
+};
+
+&ethernet0 {
+	status = "okay";
+	pinctrl-0 = <&ethernet0_rgmii_pins_a>;
+	pinctrl-1 = <&ethernet0_rgmii_pins_sleep_a>;
+	pinctrl-names = "default", "sleep";
+	phy-mode = "rgmii";
+	max-speed = <1000>;
+	phy-handle = <&phy0>;
+
+	mdio0 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		compatible = "snps,dwmac-mdio";
+		phy0: ethernet-phy@0 {
+			reg = <0>;
+		};
 	};
 };
-- 
1.9.1

^ permalink raw reply related

* [PATCH  6/8] net: stmmac: add dwmac-4.20a compatible
From: Christophe Roullier @ 2018-04-24 15:01 UTC (permalink / raw)
  To: mark.rutland, mcoquelin.stm32, alexandre.torgue, peppe.cavallaro
  Cc: devicetree, linux-arm-kernel, netdev, christophe.roullier
In-Reply-To: <1524582120-4451-1-git-send-email-christophe.roullier@st.com>

Manage dwmac-4.20a version from synopsys

Signed-off-by: Christophe Roullier <christophe.roullier@st.com>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index ebd3e5f..6d141f3 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -472,7 +472,8 @@ struct plat_stmmacenet_data *
 	}
 
 	if (of_device_is_compatible(np, "snps,dwmac-4.00") ||
-	    of_device_is_compatible(np, "snps,dwmac-4.10a")) {
+	    of_device_is_compatible(np, "snps,dwmac-4.10a") ||
+	    of_device_is_compatible(np, "snps,dwmac-4.20a")) {
 		plat->has_gmac4 = 1;
 		plat->has_gmac = 0;
 		plat->pmt = 1;
-- 
1.9.1

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox