Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH bpf-next 07/11] bpf: make sure to clear unused fields in tunnel/xfrm state fetch
From: Daniel Borkmann @ 2018-05-28  0:43 UTC (permalink / raw)
  To: ast; +Cc: netdev, Daniel Borkmann
In-Reply-To: <20180528004344.3606-1-daniel@iogearbox.net>

Since the remaining bits are not filled in struct bpf_tunnel_key
resp. struct bpf_xfrm_state and originate from uninitialized stack
space, we should make sure to clear them before handing control
back to the program.

Also add a padding element to struct bpf_xfrm_state for future use
similar as we have in struct bpf_tunnel_key and clear it as well.

  struct bpf_xfrm_state {
      __u32                      reqid;            /*     0     4 */
      __u32                      spi;              /*     4     4 */
      __u16                      family;           /*     8     2 */

      /* XXX 2 bytes hole, try to pack */

      union {
          __u32              remote_ipv4;          /*           4 */
          __u32              remote_ipv6[4];       /*          16 */
      };                                           /*    12    16 */

      /* size: 28, cachelines: 1, members: 4 */
      /* sum members: 26, holes: 1, sum holes: 2 */
      /* last cacheline: 28 bytes */
  };

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
---
 include/uapi/linux/bpf.h | 3 ++-
 net/core/filter.c        | 6 ++++++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index e2853aa..7108711 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2214,7 +2214,7 @@ struct bpf_tunnel_key {
 	};
 	__u8 tunnel_tos;
 	__u8 tunnel_ttl;
-	__u16 tunnel_ext;
+	__u16 tunnel_ext;	/* Padding, future use. */
 	__u32 tunnel_label;
 };
 
@@ -2225,6 +2225,7 @@ struct bpf_xfrm_state {
 	__u32 reqid;
 	__u32 spi;	/* Stored in network byte order */
 	__u16 family;
+	__u16 ext;	/* Padding, future use. */
 	union {
 		__u32 remote_ipv4;	/* Stored in network byte order */
 		__u32 remote_ipv6[4];	/* Stored in network byte order */
diff --git a/net/core/filter.c b/net/core/filter.c
index 717c740..5ceb5e6 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3445,6 +3445,7 @@ BPF_CALL_4(bpf_skb_get_tunnel_key, struct sk_buff *, skb, struct bpf_tunnel_key
 	to->tunnel_id = be64_to_cpu(info->key.tun_id);
 	to->tunnel_tos = info->key.tos;
 	to->tunnel_ttl = info->key.ttl;
+	to->tunnel_ext = 0;
 
 	if (flags & BPF_F_TUNINFO_IPV6) {
 		memcpy(to->remote_ipv6, &info->key.u.ipv6.src,
@@ -3452,6 +3453,8 @@ BPF_CALL_4(bpf_skb_get_tunnel_key, struct sk_buff *, skb, struct bpf_tunnel_key
 		to->tunnel_label = be32_to_cpu(info->key.label);
 	} else {
 		to->remote_ipv4 = be32_to_cpu(info->key.u.ipv4.src);
+		memset(&to->remote_ipv6[1], 0, sizeof(__u32) * 3);
+		to->tunnel_label = 0;
 	}
 
 	if (unlikely(size != sizeof(struct bpf_tunnel_key)))
@@ -4047,11 +4050,14 @@ BPF_CALL_5(bpf_skb_get_xfrm_state, struct sk_buff *, skb, u32, index,
 	to->reqid = x->props.reqid;
 	to->spi = x->id.spi;
 	to->family = x->props.family;
+	to->ext = 0;
+
 	if (to->family == AF_INET6) {
 		memcpy(to->remote_ipv6, x->props.saddr.a6,
 		       sizeof(to->remote_ipv6));
 	} else {
 		to->remote_ipv4 = x->props.saddr.a4;
+		memset(&to->remote_ipv6[1], 0, sizeof(__u32) * 3);
 	}
 
 	return 0;
-- 
2.9.5

^ permalink raw reply related

* [PATCH bpf-next 06/11] bpf: add bpf_skb_cgroup_id helper
From: Daniel Borkmann @ 2018-05-28  0:43 UTC (permalink / raw)
  To: ast; +Cc: netdev, Daniel Borkmann
In-Reply-To: <20180528004344.3606-1-daniel@iogearbox.net>

Add a new bpf_skb_cgroup_id() helper that allows to retrieve the
cgroup id from the skb's socket. This is useful in particular to
enable bpf_get_cgroup_classid()-like behavior for cgroup v1 in
cgroup v2 by allowing ID based matching on egress. This can in
particular be used in combination with applying policy e.g. from
map lookups, and also complements the older bpf_skb_under_cgroup()
interface. In user space the cgroup id for a given path can be
retrieved through the f_handle as demonstrated in [0] recently.

  [0] https://lkml.org/lkml/2018/5/22/1190

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
---
 include/uapi/linux/bpf.h | 17 ++++++++++++++++-
 net/core/filter.c        | 29 +++++++++++++++++++++++++++--
 2 files changed, 43 insertions(+), 3 deletions(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 9b8c6e3..e2853aa 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2004,6 +2004,20 @@ union bpf_attr {
  * 		direct packet access.
  *	Return
  * 		0 on success, or a negative error in case of failure.
+ *
+ * uint64_t bpf_skb_cgroup_id(struct sk_buff *skb)
+ * 	Description
+ * 		Return the cgroup v2 id of the socket associated with the *skb*.
+ * 		This is roughly similar to the **bpf_get_cgroup_classid**\ ()
+ * 		helper for cgroup v1 by providing a tag resp. identifier that
+ * 		can be matched on or used for map lookups e.g. to implement
+ * 		policy. The cgroup v2 id of a given path in the hierarchy is
+ * 		exposed in user space through the f_handle API in order to get
+ * 		to the same 64-bit id.
+ *
+ * 		This helper can be used on TC egress path, but not on ingress.
+ * 	Return
+ * 		The id is returned or 0 in case the id could not be retrieved.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -2082,7 +2096,8 @@ union bpf_attr {
 	FN(lwt_push_encap),		\
 	FN(lwt_seg6_store_bytes),	\
 	FN(lwt_seg6_adjust_srh),	\
-	FN(lwt_seg6_action),
+	FN(lwt_seg6_action),		\
+	FN(skb_cgroup_id),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
diff --git a/net/core/filter.c b/net/core/filter.c
index acf1f4f..717c740 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3661,6 +3661,27 @@ static const struct bpf_func_proto bpf_skb_under_cgroup_proto = {
 	.arg3_type	= ARG_ANYTHING,
 };
 
+#ifdef CONFIG_SOCK_CGROUP_DATA
+BPF_CALL_1(bpf_skb_cgroup_id, const struct sk_buff *, skb)
+{
+	struct sock *sk = skb_to_full_sk(skb);
+	struct cgroup *cgrp;
+
+	if (!sk || !sk_fullsock(sk))
+		return 0;
+
+	cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
+	return cgrp->kn->id.id;
+}
+
+static const struct bpf_func_proto bpf_skb_cgroup_id_proto = {
+	.func           = bpf_skb_cgroup_id,
+	.gpl_only       = false,
+	.ret_type       = RET_INTEGER,
+	.arg1_type      = ARG_PTR_TO_CTX,
+};
+#endif
+
 static unsigned long bpf_xdp_copy(void *dst_buff, const void *src_buff,
 				  unsigned long off, unsigned long len)
 {
@@ -4741,12 +4762,16 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_get_socket_cookie_proto;
 	case BPF_FUNC_get_socket_uid:
 		return &bpf_get_socket_uid_proto;
+	case BPF_FUNC_fib_lookup:
+		return &bpf_skb_fib_lookup_proto;
 #ifdef CONFIG_XFRM
 	case BPF_FUNC_skb_get_xfrm_state:
 		return &bpf_skb_get_xfrm_state_proto;
 #endif
-	case BPF_FUNC_fib_lookup:
-		return &bpf_skb_fib_lookup_proto;
+#ifdef CONFIG_SOCK_CGROUP_DATA
+	case BPF_FUNC_skb_cgroup_id:
+		return &bpf_skb_cgroup_id_proto;
+#endif
 	default:
 		return bpf_base_func_proto(func_id);
 	}
-- 
2.9.5

^ permalink raw reply related

* [PATCH bpf-next 05/11] bpf: avoid retpoline for lookup/update/delete calls on maps
From: Daniel Borkmann @ 2018-05-28  0:43 UTC (permalink / raw)
  To: ast; +Cc: netdev, Daniel Borkmann
In-Reply-To: <20180528004344.3606-1-daniel@iogearbox.net>

While some of the BPF map lookup helpers provide a ->map_gen_lookup()
callback for inlining the map lookup altogether it is not available
for every map, so the remaining ones have to call bpf_map_lookup_elem()
helper which does a dispatch to map->ops->map_lookup_elem(). In
times of retpolines, this will control and trap speculative execution
rather than letting it do its work for the indirect call and will
therefore cause a slowdown. Likewise, bpf_map_update_elem() and
bpf_map_delete_elem() do not have an inlined version and need to call
into their map->ops->map_update_elem() resp. map->ops->map_delete_elem()
handlers.

Before:

  # bpftool p d x i 1
    0: (bf) r2 = r10
    1: (07) r2 += -8
    2: (7a) *(u64 *)(r2 +0) = 0
    3: (18) r1 = map[id:1]
    5: (85) call __htab_map_lookup_elem#232656
    6: (15) if r0 == 0x0 goto pc+4
    7: (71) r1 = *(u8 *)(r0 +35)
    8: (55) if r1 != 0x0 goto pc+1
    9: (72) *(u8 *)(r0 +35) = 1
   10: (07) r0 += 56
   11: (15) if r0 == 0x0 goto pc+4
   12: (bf) r2 = r0
   13: (18) r1 = map[id:1]
   15: (85) call bpf_map_delete_elem#215008  <-- indirect call via
   16: (95) exit                                 helper

After:

  # bpftool p d x i 1
    0: (bf) r2 = r10
    1: (07) r2 += -8
    2: (7a) *(u64 *)(r2 +0) = 0
    3: (18) r1 = map[id:1]
    5: (85) call __htab_map_lookup_elem#233328
    6: (15) if r0 == 0x0 goto pc+4
    7: (71) r1 = *(u8 *)(r0 +35)
    8: (55) if r1 != 0x0 goto pc+1
    9: (72) *(u8 *)(r0 +35) = 1
   10: (07) r0 += 56
   11: (15) if r0 == 0x0 goto pc+4
   12: (bf) r2 = r0
   13: (18) r1 = map[id:1]
   15: (85) call htab_lru_map_delete_elem#238240  <-- direct call
   16: (95) exit

In all three lookup/update/delete cases however we can use the actual
address of the map callback directly if we find that there's only a
single path with a map pointer leading to the helper call, meaning
when the map pointer has not been poisoned from verifier side.
Example code can be seen above for the delete case.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/filter.h |  3 +++
 kernel/bpf/hashtab.c   | 12 ++++++---
 kernel/bpf/verifier.c  | 67 +++++++++++++++++++++++++++++++++++++-------------
 3 files changed, 62 insertions(+), 20 deletions(-)

diff --git a/include/linux/filter.h b/include/linux/filter.h
index b443f70..d407ede 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -301,6 +301,9 @@ struct xdp_buff;
 
 /* Function call */
 
+#define BPF_CAST_CALL(x)					\
+		((u64 (*)(u64, u64, u64, u64, u64))(x))
+
 #define BPF_EMIT_CALL(FUNC)					\
 	((struct bpf_insn) {					\
 		.code  = BPF_JMP | BPF_CALL,			\
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index b76828f..3ca2198 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -503,7 +503,9 @@ static u32 htab_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
 	struct bpf_insn *insn = insn_buf;
 	const int ret = BPF_REG_0;
 
-	*insn++ = BPF_EMIT_CALL((u64 (*)(u64, u64, u64, u64, u64))__htab_map_lookup_elem);
+	BUILD_BUG_ON(!__same_type(&__htab_map_lookup_elem,
+		     (void *(*)(struct bpf_map *map, void *key))NULL));
+	*insn++ = BPF_EMIT_CALL(BPF_CAST_CALL(__htab_map_lookup_elem));
 	*insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 1);
 	*insn++ = BPF_ALU64_IMM(BPF_ADD, ret,
 				offsetof(struct htab_elem, key) +
@@ -530,7 +532,9 @@ static u32 htab_lru_map_gen_lookup(struct bpf_map *map,
 	const int ret = BPF_REG_0;
 	const int ref_reg = BPF_REG_1;
 
-	*insn++ = BPF_EMIT_CALL((u64 (*)(u64, u64, u64, u64, u64))__htab_map_lookup_elem);
+	BUILD_BUG_ON(!__same_type(&__htab_map_lookup_elem,
+		     (void *(*)(struct bpf_map *map, void *key))NULL));
+	*insn++ = BPF_EMIT_CALL(BPF_CAST_CALL(__htab_map_lookup_elem));
 	*insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 4);
 	*insn++ = BPF_LDX_MEM(BPF_B, ref_reg, ret,
 			      offsetof(struct htab_elem, lru_node) +
@@ -1369,7 +1373,9 @@ static u32 htab_of_map_gen_lookup(struct bpf_map *map,
 	struct bpf_insn *insn = insn_buf;
 	const int ret = BPF_REG_0;
 
-	*insn++ = BPF_EMIT_CALL((u64 (*)(u64, u64, u64, u64, u64))__htab_map_lookup_elem);
+	BUILD_BUG_ON(!__same_type(&__htab_map_lookup_elem,
+		     (void *(*)(struct bpf_map *map, void *key))NULL));
+	*insn++ = BPF_EMIT_CALL(BPF_CAST_CALL(__htab_map_lookup_elem));
 	*insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 2);
 	*insn++ = BPF_ALU64_IMM(BPF_ADD, ret,
 				offsetof(struct htab_elem, key) +
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 4f4786e..5684b15 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2421,8 +2421,11 @@ record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
 	struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
 
 	if (func_id != BPF_FUNC_tail_call &&
-	    func_id != BPF_FUNC_map_lookup_elem)
+	    func_id != BPF_FUNC_map_lookup_elem &&
+	    func_id != BPF_FUNC_map_update_elem &&
+	    func_id != BPF_FUNC_map_delete_elem)
 		return 0;
+
 	if (meta->map_ptr == NULL) {
 		verbose(env, "kernel subsystem misconfigured verifier\n");
 		return -EINVAL;
@@ -5586,6 +5589,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
 	struct bpf_insn *insn = prog->insnsi;
 	const struct bpf_func_proto *fn;
 	const int insn_cnt = prog->len;
+	const struct bpf_map_ops *ops;
 	struct bpf_insn_aux_data *aux;
 	struct bpf_insn insn_buf[16];
 	struct bpf_prog *new_prog;
@@ -5715,10 +5719,13 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
 		}
 
 		/* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
-		 * handlers are currently limited to 64 bit only.
+		 * and other inlining handlers are currently limited to 64 bit
+		 * only.
 		 */
 		if (prog->jit_requested && BITS_PER_LONG == 64 &&
-		    insn->imm == BPF_FUNC_map_lookup_elem) {
+		    (insn->imm == BPF_FUNC_map_lookup_elem ||
+		     insn->imm == BPF_FUNC_map_update_elem ||
+		     insn->imm == BPF_FUNC_map_delete_elem)) {
 			aux = &env->insn_aux_data[i + delta];
 			if (bpf_map_ptr_poisoned(aux))
 				goto patch_call_imm;
@@ -5727,23 +5734,49 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
 			if (!map_ptr->ops->map_gen_lookup)
 				goto patch_call_imm;
 
-			cnt = map_ptr->ops->map_gen_lookup(map_ptr, insn_buf);
-			if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
-				verbose(env, "bpf verifier is misconfigured\n");
-				return -EINVAL;
-			}
+			ops = map_ptr->ops;
+			if (insn->imm == BPF_FUNC_map_lookup_elem &&
+			    ops->map_gen_lookup) {
+				cnt = ops->map_gen_lookup(map_ptr, insn_buf);
+				if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
+					verbose(env, "bpf verifier is misconfigured\n");
+					return -EINVAL;
+				}
 
-			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf,
-						       cnt);
-			if (!new_prog)
-				return -ENOMEM;
+				new_prog = bpf_patch_insn_data(env, i + delta,
+							       insn_buf, cnt);
+				if (!new_prog)
+					return -ENOMEM;
 
-			delta += cnt - 1;
+				delta    += cnt - 1;
+				env->prog = prog = new_prog;
+				insn      = new_prog->insnsi + i + delta;
+				continue;
+			}
 
-			/* keep walking new program and skip insns we just inserted */
-			env->prog = prog = new_prog;
-			insn      = new_prog->insnsi + i + delta;
-			continue;
+			BUILD_BUG_ON(!__same_type(ops->map_lookup_elem,
+				     (void *(*)(struct bpf_map *map, void *key))NULL));
+			BUILD_BUG_ON(!__same_type(ops->map_delete_elem,
+				     (int (*)(struct bpf_map *map, void *key))NULL));
+			BUILD_BUG_ON(!__same_type(ops->map_update_elem,
+				     (int (*)(struct bpf_map *map, void *key, void *value,
+					      u64 flags))NULL));
+			switch (insn->imm) {
+			case BPF_FUNC_map_lookup_elem:
+				insn->imm = BPF_CAST_CALL(ops->map_lookup_elem) -
+					    __bpf_call_base;
+				continue;
+			case BPF_FUNC_map_update_elem:
+				insn->imm = BPF_CAST_CALL(ops->map_update_elem) -
+					    __bpf_call_base;
+				continue;
+			case BPF_FUNC_map_delete_elem:
+				insn->imm = BPF_CAST_CALL(ops->map_delete_elem) -
+					    __bpf_call_base;
+				continue;
+			}
+
+			goto patch_call_imm;
 		}
 
 		if (insn->imm == BPF_FUNC_redirect_map) {
-- 
2.9.5

^ permalink raw reply related

* [PATCH bpf-next 08/11] bpf: fix cbpf parser bug for octal numbers
From: Daniel Borkmann @ 2018-05-28  0:43 UTC (permalink / raw)
  To: ast; +Cc: netdev, Daniel Borkmann
In-Reply-To: <20180528004344.3606-1-daniel@iogearbox.net>

Range is 0-7, not 0-9, otherwise parser silently excludes it from the
strtol() rather than throwing an error.

Reported-by: Marc Boschma <marc@boschma.cx>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/bpf/bpf_exp.l | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/bpf/bpf_exp.l b/tools/bpf/bpf_exp.l
index bd83149..4da8d05 100644
--- a/tools/bpf/bpf_exp.l
+++ b/tools/bpf/bpf_exp.l
@@ -175,7 +175,7 @@ extern void yyerror(const char *str);
 			yylval.number = strtol(yytext, NULL, 10);
 			return number;
 		}
-([0][0-9]+)	{
+([0][0-7]+)	{
 			yylval.number = strtol(yytext + 1, NULL, 8);
 			return number;
 		}
-- 
2.9.5

^ permalink raw reply related

* [PATCH bpf-next 09/11] bpf: fix context access in tracing progs on 32 bit archs
From: Daniel Borkmann @ 2018-05-28  0:43 UTC (permalink / raw)
  To: ast; +Cc: netdev, Daniel Borkmann
In-Reply-To: <20180528004344.3606-1-daniel@iogearbox.net>

Wang reported that all the testcases for BPF_PROG_TYPE_PERF_EVENT
program type in test_verifier report the following errors on x86_32:

  172/p unpriv: spill/fill of different pointers ldx FAIL
  Unexpected error message!
  0: (bf) r6 = r10
  1: (07) r6 += -8
  2: (15) if r1 == 0x0 goto pc+3
  R1=ctx(id=0,off=0,imm=0) R6=fp-8,call_-1 R10=fp0,call_-1
  3: (bf) r2 = r10
  4: (07) r2 += -76
  5: (7b) *(u64 *)(r6 +0) = r2
  6: (55) if r1 != 0x0 goto pc+1
  R1=ctx(id=0,off=0,imm=0) R2=fp-76,call_-1 R6=fp-8,call_-1 R10=fp0,call_-1 fp-8=fp
  7: (7b) *(u64 *)(r6 +0) = r1
  8: (79) r1 = *(u64 *)(r6 +0)
  9: (79) r1 = *(u64 *)(r1 +68)
  invalid bpf_context access off=68 size=8

  378/p check bpf_perf_event_data->sample_period byte load permitted FAIL
  Failed to load prog 'Permission denied'!
  0: (b7) r0 = 0
  1: (71) r0 = *(u8 *)(r1 +68)
  invalid bpf_context access off=68 size=1

  379/p check bpf_perf_event_data->sample_period half load permitted FAIL
  Failed to load prog 'Permission denied'!
  0: (b7) r0 = 0
  1: (69) r0 = *(u16 *)(r1 +68)
  invalid bpf_context access off=68 size=2

  380/p check bpf_perf_event_data->sample_period word load permitted FAIL
  Failed to load prog 'Permission denied'!
  0: (b7) r0 = 0
  1: (61) r0 = *(u32 *)(r1 +68)
  invalid bpf_context access off=68 size=4

  381/p check bpf_perf_event_data->sample_period dword load permitted FAIL
  Failed to load prog 'Permission denied'!
  0: (b7) r0 = 0
  1: (79) r0 = *(u64 *)(r1 +68)
  invalid bpf_context access off=68 size=8

Reason is that struct pt_regs on x86_32 doesn't fully align to 8 byte
boundary due to its size of 68 bytes.

Therefore, bpf_ctx_narrow_access_ok() will then bail out saying that
off & (size_default - 1) which is 68 & 7 doesn't cleanly align in the
case of sample_period access from struct bpf_perf_event_data, hence
verifier wrongly thinks we might be doing an unaligned access here.
Therefore adjust this down to machine size and check the offset for
narrow access on that basis.

We also need to fix pe_prog_is_valid_access(), since we hit the check
for off % size != 0 (e.g. 68 % 8 -> 4) in the first and last test.

Reported-by: Wang YanQing <udknight@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/filter.h   | 30 ++++++++++++++++++++++++------
 kernel/trace/bpf_trace.c | 10 ++++++++--
 2 files changed, 32 insertions(+), 8 deletions(-)

diff --git a/include/linux/filter.h b/include/linux/filter.h
index d407ede..89903d2 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -639,16 +639,34 @@ static inline bool bpf_prog_was_classic(const struct bpf_prog *prog)
 	return prog->type == BPF_PROG_TYPE_UNSPEC;
 }
 
-static inline bool
-bpf_ctx_narrow_access_ok(u32 off, u32 size, const u32 size_default)
+static inline u32 bpf_ctx_off_adjust_machine(u32 size)
+{
+	const u32 size_machine = sizeof(unsigned long);
+
+	if (size > size_machine && size % size_machine == 0)
+		size = size_machine;
+
+	return size;
+}
+
+static inline bool bpf_ctx_narrow_align_ok(u32 off, u32 size_access,
+					   u32 size_default)
 {
-	bool off_ok;
+	size_default = bpf_ctx_off_adjust_machine(size_default);
+	size_access  = bpf_ctx_off_adjust_machine(size_access);
+
 #ifdef __LITTLE_ENDIAN
-	off_ok = (off & (size_default - 1)) == 0;
+	return (off & (size_default - 1)) == 0;
 #else
-	off_ok = (off & (size_default - 1)) + size == size_default;
+	return (off & (size_default - 1)) + size_access == size_default;
 #endif
-	return off_ok && size <= size_default && (size & (size - 1)) == 0;
+}
+
+static inline bool
+bpf_ctx_narrow_access_ok(u32 off, u32 size, u32 size_default)
+{
+	return bpf_ctx_narrow_align_ok(off, size, size_default) &&
+	       size <= size_default && (size & (size - 1)) == 0;
 }
 
 #define bpf_classic_proglen(fprog) (fprog->len * sizeof(fprog->filter[0]))
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 81fdf2f..7269530 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -880,8 +880,14 @@ static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type
 		return false;
 	if (type != BPF_READ)
 		return false;
-	if (off % size != 0)
-		return false;
+	if (off % size != 0) {
+		if (sizeof(unsigned long) != 4)
+			return false;
+		if (size != 8)
+			return false;
+		if (off % size != 4)
+			return false;
+	}
 
 	switch (off) {
 	case bpf_ctx_range(struct bpf_perf_event_data, sample_period):
-- 
2.9.5

^ permalink raw reply related

* [PATCH bpf-next 11/11] bpf, doc: add missing patchwork url and libbpf to maintainers
From: Daniel Borkmann @ 2018-05-28  0:43 UTC (permalink / raw)
  To: ast; +Cc: netdev, Daniel Borkmann
In-Reply-To: <20180528004344.3606-1-daniel@iogearbox.net>

Add missing bits under tools/lib/bpf/ and also Q: entry in order to
make it easier for people to retrieve current patch queue.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
---
 MAINTAINERS | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index f492431..2fd51db 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2722,6 +2722,7 @@ L:	netdev@vger.kernel.org
 L:	linux-kernel@vger.kernel.org
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf.git
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git
+Q:	https://patchwork.ozlabs.org/project/netdev/list/?delegate=77147
 S:	Supported
 F:	arch/x86/net/bpf_jit*
 F:	Documentation/networking/filter.txt
@@ -2740,6 +2741,7 @@ F:	net/sched/act_bpf.c
 F:	net/sched/cls_bpf.c
 F:	samples/bpf/
 F:	tools/bpf/
+F:	tools/lib/bpf/
 F:	tools/testing/selftests/bpf/
 
 BROADCOM B44 10/100 ETHERNET DRIVER
-- 
2.9.5

^ permalink raw reply related

* [PATCH bpf-next 10/11] bpf: sync bpf uapi header with tools
From: Daniel Borkmann @ 2018-05-28  0:43 UTC (permalink / raw)
  To: ast; +Cc: netdev, Daniel Borkmann
In-Reply-To: <20180528004344.3606-1-daniel@iogearbox.net>

Pull in recent changes from include/uapi/linux/bpf.h.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/include/uapi/linux/bpf.h | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 9b8c6e3..7108711 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -2004,6 +2004,20 @@ union bpf_attr {
  * 		direct packet access.
  *	Return
  * 		0 on success, or a negative error in case of failure.
+ *
+ * uint64_t bpf_skb_cgroup_id(struct sk_buff *skb)
+ * 	Description
+ * 		Return the cgroup v2 id of the socket associated with the *skb*.
+ * 		This is roughly similar to the **bpf_get_cgroup_classid**\ ()
+ * 		helper for cgroup v1 by providing a tag resp. identifier that
+ * 		can be matched on or used for map lookups e.g. to implement
+ * 		policy. The cgroup v2 id of a given path in the hierarchy is
+ * 		exposed in user space through the f_handle API in order to get
+ * 		to the same 64-bit id.
+ *
+ * 		This helper can be used on TC egress path, but not on ingress.
+ * 	Return
+ * 		The id is returned or 0 in case the id could not be retrieved.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -2082,7 +2096,8 @@ union bpf_attr {
 	FN(lwt_push_encap),		\
 	FN(lwt_seg6_store_bytes),	\
 	FN(lwt_seg6_adjust_srh),	\
-	FN(lwt_seg6_action),
+	FN(lwt_seg6_action),		\
+	FN(skb_cgroup_id),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -2199,7 +2214,7 @@ struct bpf_tunnel_key {
 	};
 	__u8 tunnel_tos;
 	__u8 tunnel_ttl;
-	__u16 tunnel_ext;
+	__u16 tunnel_ext;	/* Padding, future use. */
 	__u32 tunnel_label;
 };
 
@@ -2210,6 +2225,7 @@ struct bpf_xfrm_state {
 	__u32 reqid;
 	__u32 spi;	/* Stored in network byte order */
 	__u16 family;
+	__u16 ext;	/* Padding, future use. */
 	union {
 		__u32 remote_ipv4;	/* Stored in network byte order */
 		__u32 remote_ipv6[4];	/* Stored in network byte order */
-- 
2.9.5

^ permalink raw reply related

* Re: [PATCH v4 net] stmmac: 802.1ad tag stripping support fix
From: Toshiaki Makita @ 2018-05-28  0:44 UTC (permalink / raw)
  To: Elad Nachman, David Miller
  Cc: Jose Abreu, Florian Fainelli, netdev, peppe.cavallaro,
	alexandre.torgue
In-Reply-To: <ca747b06-627a-c40a-2ad2-6c01245af86f@gmail.com>

On 2018/05/27 4:24, Elad Nachman wrote:
> stmmac reception handler calls stmmac_rx_vlan() to strip the vlan before calling napi_gro_receive().
> 
> The function assumes VLAN tagged frames are always tagged with 802.1Q protocol,
> and assigns ETH_P_8021Q to the skb by hard-coding the parameter on call to __vlan_hwaccel_put_tag() .
> 
> This causes packets not to be passed to the VLAN slave if it was created with 802.1AD protocol
> (ip link add link eth0 eth0.100 type vlan proto 802.1ad id 100).
> 
> This fix passes the protocol from the VLAN header into __vlan_hwaccel_put_tag()
> instead of using the hard-coded value of ETH_P_8021Q.
> NETIF_F_HW_VLAN_CTAG_RX check was removed to be in line with the driver actual abilities.
> 
> Signed-off-by: Elad Nachman <eladn@gilat.com>
> 

I might have not been clear enough but you still need this hunk with
this change.

> -	ndev->features |= NETIF_F_HW_VLAN_CTAG_RX;
> +	ndev->features |= (NETIF_F_HW_VLAN_CTAG_RX|NETIF_F_HW_VLAN_STAG_RX);

Also I guess spaces are preferred around '|'. Would you check it by
checkpatch.pl?

-- 
Toshiaki Makita

^ permalink raw reply

* Re: [PATCH net] tun: Fix NULL pointer dereference in XDP redirect
From: Jason Wang @ 2018-05-28  2:24 UTC (permalink / raw)
  To: Toshiaki Makita, Toshiaki Makita, David S. Miller
  Cc: netdev, Michael S. Tsirkin
In-Reply-To: <863681d4-92cf-1575-a182-b07f22ec578e@gmail.com>



On 2018年05月25日 21:43, Toshiaki Makita wrote:

[...]

>>> @@ -1917,16 +1923,22 @@ static ssize_t tun_get_user(struct 
>>> tun_struct *tun, struct tun_file *tfile,
>>>           struct bpf_prog *xdp_prog;
>>>           int ret;
>>> +        local_bh_disable();
>>> +        preempt_disable();
>>>           rcu_read_lock();
>>>           xdp_prog = rcu_dereference(tun->xdp_prog);
>>>           if (xdp_prog) {
>>>               ret = do_xdp_generic(xdp_prog, skb);
>>>               if (ret != XDP_PASS) {
>>>                   rcu_read_unlock();
>>> +                preempt_enable();
>>> +                local_bh_enable();
>>>                   return total_len;
>>>               }
>>>           }
>>>           rcu_read_unlock();
>>> +        preempt_enable();
>>> +        local_bh_enable();
>>>       }
>>>       rcu_read_lock();
>>
>> Good catch, thanks.
>>
>> But I think we can just replace preempt_disable()/enable() with 
>> local_bh_disable()/local_bh_enable() ?
>
> I actually thought the same, but noticed this patch.
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=9ea4c380066fbe 
>
>
> It looks like they do not think local_bh_disable() implies 
> preempt_disable(). But I'm not sure why..
>
> Toshiaki Makita 

I see, there're probably have some subtle differences and implications 
for e.g scheduler or others.

What we what here is to make sure the process is not moved to another 
CPU and bh is enabled. By checking preemptible() function, preemption 
should be disabled after local_bh_disable(). So I think we're safe here.

Thanks

^ permalink raw reply

* Re: [PATCH] drivers/net/phy/micrel: Fix for PHY KSZ8061 errrata: Potential link-up failure when Ethernet cable is connected slowly
From: Andrew Lunn @ 2018-05-28  3:08 UTC (permalink / raw)
  To: Alexander Onnasch; +Cc: Florian Fainelli, netdev, linux-kernel
In-Reply-To: <1527251853-22218-1-git-send-email-alexander.onnasch@landisgyr.com>

> This e-mail (including any attachments) is confidential and may be
> legally privileged. If you are not an intended recipient or an
> authorized representative of an intended recipient, you are
> prohibited from using, copying or distributing the information in
> this e-mail or its attachments. If you have received this e-mail in
> error, please notify the sender immediately by return e-mail and
> delete all copies of this message and any attachments. Thank you.

Once this has been removed, i will have a comment...

     Andrew

^ permalink raw reply

* Re: [PATCH net] mlxsw: spectrum: Forbid creation of VLAN 1 over port/LAG
From: Andrew Lunn @ 2018-05-28  3:55 UTC (permalink / raw)
  To: Ido Schimmel; +Cc: netdev, davem, jiri, petrm, mlxsw
In-Reply-To: <20180527064841.32199-1-idosch@mellanox.com>

> diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
> index ca38a30fbe91..adc6ab2cf429 100644
> --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
> +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
> @@ -4433,6 +4433,11 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev,
>  			NL_SET_ERR_MSG_MOD(extack, "Can not put a VLAN on an OVS port");
>  			return -EINVAL;
>  		}
> +		if (is_vlan_dev(upper_dev) &&
> +		    vlan_dev_vlan_id(upper_dev) == 1) {
> +			NL_SET_ERR_MSG_MOD(extack, "Creating a VLAN device with VID 1 is unsupported: VLAN 1 carries untagged traffic");
> +			return -EINVAL;
> +		}

Hi Ido

Would ENOTSUPP be a better return code. VLAN 1 is valid, you just
don't support it.

	Andrew

^ permalink raw reply

* Re: [PATCH bpf-next 0/5] fix test_sockmap
From: Prashant Bhole @ 2018-05-28  4:16 UTC (permalink / raw)
  To: John Fastabend, Alexei Starovoitov, Daniel Borkmann
  Cc: David S . Miller, Shuah Khan, netdev
In-Reply-To: <652cb724-b966-6ce3-7002-624e5e7f747e@gmail.com>

On 5/25/2018 11:01 PM, John Fastabend wrote:
> On 05/25/2018 01:28 AM, Prashant Bhole wrote:
>>
>>
>> On 5/24/2018 1:58 PM, John Fastabend wrote:
>>> On 05/23/2018 09:47 PM, Prashant Bhole wrote:
>>>>
>>>>
>>>> On 5/23/2018 6:44 PM, Prashant Bhole wrote:
>>>>>
>>>>>
>>>>> On 5/22/2018 2:08 AM, John Fastabend wrote:
>>>>>> On 05/20/2018 10:13 PM, Prashant Bhole wrote:
>>>>>>>
>>>>>>>
>>>>>>> On 5/19/2018 1:42 AM, John Fastabend wrote:
>>>>>>>> On 05/18/2018 12:17 AM, Prashant Bhole wrote:
>>>>>>>>> This series fixes bugs in test_sockmap code. They weren't caught
>>>>>>>>> previously because failure in RX/TX thread was not notified to the
>>>>>>>>> main thread.
>>>>>>>>>
>>>>>>>>> Also fixed data verification logic and slightly improved test
>>>>>>>>> output
>>>>>>>>> such that parameters values (cork, apply, start, end) of failed
>>>>>>>>> test
>>>>>>>>> can be easily seen.
>>>>>>>>>
>>>>>>>>
>>>>>>>> Great, this was on my list so thanks for taking care of it.
>>>>>>>>
>>>>>>>>> Note: Even after fixing above problems there are issues with tests
>>>>>>>>> which set cork parameter. Tests fail (RX thread timeout) when cork
>>>>>>>>> value is non-zero and overall data sent by TX thread isn't
>>>>>>>>> multiples
>>>>>>>>> of cork value.
>>>>>>>>
>>>>>>>>
>>>>>>>> This is expected. When 'cork' is set the sender should only xmit
>>>>>>>> the data when 'cork' bytes are available. If the user doesn't
>>>>>>>> provide the N bytes the data is cork'ed waiting for the bytes and
>>>>>>>> if the socket is closed the state is cleaned up. What these tests
>>>>>>>> are testing is the cleanup path when a user doesn't provide the
>>>>>>>> N bytes. In practice this is used to validate headers and prevent
>>>>>>>> users from sending partial headers. We want to keep these tests
>>>>>>>> because
>>>>>>>> they verify a tear-down path in the code.
>>>>>>>
>>>>>>> Ok.
>>>>>>>
>>>>>>>>
>>>>>>>> After your changes do these get reported as failures? If so we
>>>>>>>> need to account for the above in the calculations.
>>>>>>>
>>>>>>> Yes, cork related test are reported as failures because of RX thread
>>>>>>> timeout.
>>>>>>>
>>>>>>> So with your above description, I think we need to differentiate cork
>>>>>>> tests with partial data and full data. In partial data test we can
>>>>>>> have
>>>>>>> something like "timeout_expected" flag. Any other way to fix it?
>>>>>>>
>>>>>>
>>>>>> Adding a flag seems reasonable to me. Lets do this for now. Also I
>>>>>> plan to add more negative tests so we can either use the same
>>>>>> flag or a new one for those cases as well.
>>>>>>
>>>>>
>>>>> John,
>>>>> I worked on this for some time and noticed that the RX-timeout of
>>>>> tests with cork parameter is dependent on various parameters. So we
>>>>> can not set a flag like the way 'drop_expected' flag is set before
>>>>> executing the test.
>>>>>
>>>>> So I decided to write a function which judges all parameters before
>>>>> each test and decides whether a test with cork parameter will
>>>>> timeout or not. Then the conditions in the function became
>>>>> complicated. For example some tests fail if opt->rate < 17 (with
>>>>> some other conditions). Here is 17 is related to FRAGS_PER_SKB.
>>>>> Consider following two examples.
>>>> I'm sorry. Correction: s/FRAGS_PER_SKB/MAX_SKB_FRAGS/
>>>>
>>>>>
>>>>> ./test_sockmap --cgroup /mnt/cgroup2 -r 16 -i 1 -l 30 -t sendpage
>>>>> --txmsg --txmsg_cork 1024   # RX timeout occurs
>>>>>
>>>>> ./test_sockmap --cgroup /mnt/cgroup2 -r 17 -i 1 -l 30 -t sendpage
>>>>> --txmsg --txmsg_cork 1024   # Success!
>>>>>
>>>
>>> Ah yes this hits the buffer limit and flushes the queue. The kernel
>>> side doesn't know how to merge those specific sendpage requests so
>>> it gives each request its own buffer and when the limit is reached
>>> we flush it.
>>>
>>>>> Do we need to keep such tests? if yes, then I will continue with
>>>>> adding such conditions in the function.
>>>>>
>>>
>>> Yes, these tests are needed because they are testing the edge cases.
>>> These are probably the most important tests because my normal usage
>>> will catch any issues in the "good" cases its these types of things
>>> that can go unnoticed (at least for a short while) if we don't have
>>> specific tests for them.
>>
>> I tried but it is difficult to come up with a right set of conditions
>> which lead to test failure.
>>
> 
> Agreed, it can be yes. How about adding your logic for all tests except
> "cork" cases. If there is a flag to set if the timeout is expected we
> can always manually set it in the test invocation. Might not be as
> nice as automatically learning the expected results but possibly easier
> than building some complicated logic to figure it out.
> 
> Would you mind submitting your series again without the "cork" tests
> being tracked? And if you want add a bit to tell if the "cork" tests are
> going to timeout or not setting it per test manually. But I think
> your series can just omit the cork test for now and still be useful.

Ok. I will submit the series again. Without any change in actual 
patches, but cover letter reorganized. Thanks.

-Prashant

^ permalink raw reply

* Re: [PATCH net] tun: Fix NULL pointer dereference in XDP redirect
From: Toshiaki Makita @ 2018-05-28  4:28 UTC (permalink / raw)
  To: Jason Wang; +Cc: Toshiaki Makita, David S. Miller, netdev, Michael S. Tsirkin
In-Reply-To: <e7222fda-a845-e1ac-f7c2-9a019951af12@redhat.com>

On 2018/05/28 11:24, Jason Wang wrote:
> On 2018年05月25日 21:43, Toshiaki Makita wrote:
> 
> [...]
> 
>>>> @@ -1917,16 +1923,22 @@ static ssize_t tun_get_user(struct
>>>> tun_struct *tun, struct tun_file *tfile,
>>>>           struct bpf_prog *xdp_prog;
>>>>           int ret;
>>>> +        local_bh_disable();
>>>> +        preempt_disable();
>>>>           rcu_read_lock();
>>>>           xdp_prog = rcu_dereference(tun->xdp_prog);
>>>>           if (xdp_prog) {
>>>>               ret = do_xdp_generic(xdp_prog, skb);
>>>>               if (ret != XDP_PASS) {
>>>>                   rcu_read_unlock();
>>>> +                preempt_enable();
>>>> +                local_bh_enable();
>>>>                   return total_len;
>>>>               }
>>>>           }
>>>>           rcu_read_unlock();
>>>> +        preempt_enable();
>>>> +        local_bh_enable();
>>>>       }
>>>>       rcu_read_lock();
>>>
>>> Good catch, thanks.
>>>
>>> But I think we can just replace preempt_disable()/enable() with
>>> local_bh_disable()/local_bh_enable() ?
>>
>> I actually thought the same, but noticed this patch.
>> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=9ea4c380066fbe
>>
>>
>> It looks like they do not think local_bh_disable() implies
>> preempt_disable(). But I'm not sure why..
>>
>> Toshiaki Makita 
> 
> I see, there're probably have some subtle differences and implications
> for e.g scheduler or others.
> 
> What we what here is to make sure the process is not moved to another
> CPU and bh is enabled. By checking preemptible() function, preemption
> should be disabled after local_bh_disable(). So I think we're safe here.

OK. I checked retint_kernel which IIUC is the entry point of preemption
process on x86, and confirmed it just checks if __preempt_count is zero.

I haven't checked other archs but I was probably worried too much.
Will send v2.

Thanks,
Toshiaki Makita

^ permalink raw reply

* [PATCH bpf v2 0/5] fix test_sockmap
From: Prashant Bhole @ 2018-05-28  4:37 UTC (permalink / raw)
  To: Alexei Starovoitov, Daniel Borkmann, John Fastabend
  Cc: Prashant Bhole, David S . Miller, Shuah Khan, netdev,
	linux-kselftest

This series fixes error handling, timeout and data verification in
test_sockmap. Previously it was not able to detect failure/timeout in
RX/TX thread because error was not notified to the main thread.

Also slightly improved test output by printing parameter values (cork,
apply, start, end) so that parameters for all tests are displayed.

Prashant Bhole (5):
  selftests/bpf: test_sockmap, check test failure
  selftests/bpf: test_sockmap, join cgroup in selftest mode
  selftests/bpf: test_sockmap, fix test timeout
  selftests/bpf: test_sockmap, fix data verification
  selftests/bpf: test_sockmap, print additional test options

 tools/testing/selftests/bpf/test_sockmap.c | 76 +++++++++++++++++-----
 1 file changed, 58 insertions(+), 18 deletions(-)

-- 
2.17.0

^ permalink raw reply

* [PATCH bpf v2 1/5] selftests/bpf: test_sockmap, check test failure
From: Prashant Bhole @ 2018-05-28  4:37 UTC (permalink / raw)
  To: Alexei Starovoitov, Daniel Borkmann, John Fastabend
  Cc: Prashant Bhole, David S . Miller, Shuah Khan, netdev,
	linux-kselftest
In-Reply-To: <20180528043803.4824-1-bhole_prashant_q7@lab.ntt.co.jp>

Test failures are not identified because exit code of RX/TX threads
is not checked. Also threads are not returning correct exit code.

- Return exit code from threads depending on test execution status
- In main thread, check the exit code of RX/TX threads

Fixes: 16962b2404ac ("bpf: sockmap, add selftests")
Signed-off-by: Prashant Bhole <bhole_prashant_q7@lab.ntt.co.jp>
---
 tools/testing/selftests/bpf/test_sockmap.c | 25 ++++++++++++++++------
 1 file changed, 19 insertions(+), 6 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c
index eb17fae458e6..34feb74c95c4 100644
--- a/tools/testing/selftests/bpf/test_sockmap.c
+++ b/tools/testing/selftests/bpf/test_sockmap.c
@@ -429,8 +429,8 @@ static int sendmsg_test(struct sockmap_options *opt)
 	struct msg_stats s = {0};
 	int iov_count = opt->iov_count;
 	int iov_buf = opt->iov_length;
+	int rx_status, tx_status;
 	int cnt = opt->rate;
-	int status;
 
 	errno = 0;
 
@@ -442,7 +442,7 @@ static int sendmsg_test(struct sockmap_options *opt)
 	rxpid = fork();
 	if (rxpid == 0) {
 		if (opt->drop_expected)
-			exit(1);
+			exit(0);
 
 		if (opt->sendpage)
 			iov_count = 1;
@@ -463,7 +463,7 @@ static int sendmsg_test(struct sockmap_options *opt)
 				"rx_sendmsg: TX: %zuB %fB/s %fGB/s RX: %zuB %fB/s %fGB/s\n",
 				s.bytes_sent, sent_Bps, sent_Bps/giga,
 				s.bytes_recvd, recvd_Bps, recvd_Bps/giga);
-		exit(1);
+		exit(err ? 1 : 0);
 	} else if (rxpid == -1) {
 		perror("msg_loop_rx: ");
 		return errno;
@@ -491,14 +491,27 @@ static int sendmsg_test(struct sockmap_options *opt)
 				"tx_sendmsg: TX: %zuB %fB/s %f GB/s RX: %zuB %fB/s %fGB/s\n",
 				s.bytes_sent, sent_Bps, sent_Bps/giga,
 				s.bytes_recvd, recvd_Bps, recvd_Bps/giga);
-		exit(1);
+		exit(err ? 1 : 0);
 	} else if (txpid == -1) {
 		perror("msg_loop_tx: ");
 		return errno;
 	}
 
-	assert(waitpid(rxpid, &status, 0) == rxpid);
-	assert(waitpid(txpid, &status, 0) == txpid);
+	assert(waitpid(rxpid, &rx_status, 0) == rxpid);
+	assert(waitpid(txpid, &tx_status, 0) == txpid);
+	if (WIFEXITED(rx_status)) {
+		err = WEXITSTATUS(rx_status);
+		if (err) {
+			fprintf(stderr, "rx thread exited with err %d. ", err);
+			goto out;
+		}
+	}
+	if (WIFEXITED(tx_status)) {
+		err = WEXITSTATUS(tx_status);
+		if (err)
+			fprintf(stderr, "tx thread exited with err %d. ", err);
+	}
+out:
 	return err;
 }
 
-- 
2.17.0

^ permalink raw reply related

* [PATCH bpf v2 2/5] selftests/bpf: test_sockmap, join cgroup in selftest mode
From: Prashant Bhole @ 2018-05-28  4:38 UTC (permalink / raw)
  To: Alexei Starovoitov, Daniel Borkmann, John Fastabend
  Cc: Prashant Bhole, David S . Miller, Shuah Khan, netdev,
	linux-kselftest
In-Reply-To: <20180528043803.4824-1-bhole_prashant_q7@lab.ntt.co.jp>

In case of selftest mode, temporary cgroup environment is created but
cgroup is not joined. It causes test failures. Fixed by joining the
cgroup

Fixes: 16962b2404ac ("bpf: sockmap, add selftests")
Acked-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Prashant Bhole <bhole_prashant_q7@lab.ntt.co.jp>
---
 tools/testing/selftests/bpf/test_sockmap.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c
index 34feb74c95c4..8a81ea0e9fb6 100644
--- a/tools/testing/selftests/bpf/test_sockmap.c
+++ b/tools/testing/selftests/bpf/test_sockmap.c
@@ -1342,6 +1342,11 @@ static int __test_suite(char *bpf_file)
 		return cg_fd;
 	}
 
+	if (join_cgroup(CG_PATH)) {
+		fprintf(stderr, "ERROR: failed to join cgroup\n");
+		return -EINVAL;
+	}
+
 	/* Tests basic commands and APIs with range of iov values */
 	txmsg_start = txmsg_end = 0;
 	err = test_txmsg(cg_fd);
-- 
2.17.0

^ permalink raw reply related

* [PATCH bpf v2 3/5] selftests/bpf: test_sockmap, fix test timeout
From: Prashant Bhole @ 2018-05-28  4:38 UTC (permalink / raw)
  To: Alexei Starovoitov, Daniel Borkmann, John Fastabend
  Cc: Prashant Bhole, David S . Miller, Shuah Khan, netdev,
	linux-kselftest
In-Reply-To: <20180528043803.4824-1-bhole_prashant_q7@lab.ntt.co.jp>

In order to reduce runtime of tests, recently timout for select() call
was reduced from 1sec to 10usec. This was causing many tests failures.
It was caught with failure handling commits in this series.

Restoring the timeout from 10usec to 1sec

Fixes: a18fda1a62c3 ("bpf: reduce runtime of test_sockmap tests")
Signed-off-by: Prashant Bhole <bhole_prashant_q7@lab.ntt.co.jp>
---
 tools/testing/selftests/bpf/test_sockmap.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c
index 8a81ea0e9fb6..f79397513362 100644
--- a/tools/testing/selftests/bpf/test_sockmap.c
+++ b/tools/testing/selftests/bpf/test_sockmap.c
@@ -345,8 +345,8 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
 		if (err < 0)
 			perror("recv start time: ");
 		while (s->bytes_recvd < total_bytes) {
-			timeout.tv_sec = 0;
-			timeout.tv_usec = 10;
+			timeout.tv_sec = 1;
+			timeout.tv_usec = 0;
 
 			/* FD sets */
 			FD_ZERO(&w);
-- 
2.17.0

^ permalink raw reply related

* [PATCH bpf v2 4/5] selftests/bpf: test_sockmap, fix data verification
From: Prashant Bhole @ 2018-05-28  4:38 UTC (permalink / raw)
  To: Alexei Starovoitov, Daniel Borkmann, John Fastabend
  Cc: Prashant Bhole, David S . Miller, Shuah Khan, netdev,
	linux-kselftest
In-Reply-To: <20180528043803.4824-1-bhole_prashant_q7@lab.ntt.co.jp>

When data verification is enabled, some tests fail because verification is done
incorrectly. Following changes fix it.

- Identify the size of data block to be verified
- Reset verification counter when data block size is reached
- Fixed the value printed in case of verfication failure

Fixes: 16962b2404ac ("bpf: sockmap, add selftests")
Acked-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Prashant Bhole <bhole_prashant_q7@lab.ntt.co.jp>
---
 tools/testing/selftests/bpf/test_sockmap.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c
index f79397513362..e8faf4596dac 100644
--- a/tools/testing/selftests/bpf/test_sockmap.c
+++ b/tools/testing/selftests/bpf/test_sockmap.c
@@ -337,8 +337,15 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
 		int fd_flags = O_NONBLOCK;
 		struct timeval timeout;
 		float total_bytes;
+		int bytes_cnt = 0;
+		int chunk_sz;
 		fd_set w;
 
+		if (opt->sendpage)
+			chunk_sz = iov_length * cnt;
+		else
+			chunk_sz = iov_length * iov_count;
+
 		fcntl(fd, fd_flags);
 		total_bytes = (float)iov_count * (float)iov_length * (float)cnt;
 		err = clock_gettime(CLOCK_MONOTONIC, &s->start);
@@ -388,9 +395,14 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
 							errno = -EIO;
 							fprintf(stderr,
 								"detected data corruption @iov[%i]:%i %02x != %02x, %02x ?= %02x\n",
-								i, j, d[j], k - 1, d[j+1], k + 1);
+								i, j, d[j], k - 1, d[j+1], k);
 							goto out_errno;
 						}
+						bytes_cnt++;
+						if (bytes_cnt == chunk_sz) {
+							k = 0;
+							bytes_cnt = 0;
+						}
 						recv--;
 					}
 				}
-- 
2.17.0

^ permalink raw reply related

* [PATCH bpf v2 5/5] selftests/bpf: test_sockmap, print additional test options
From: Prashant Bhole @ 2018-05-28  4:38 UTC (permalink / raw)
  To: Alexei Starovoitov, Daniel Borkmann, John Fastabend
  Cc: Prashant Bhole, David S . Miller, Shuah Khan, netdev,
	linux-kselftest
In-Reply-To: <20180528043803.4824-1-bhole_prashant_q7@lab.ntt.co.jp>

Print values of test options like apply, cork, start, end so that
individual failed tests can be identified for manual run

Acked-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Prashant Bhole <bhole_prashant_q7@lab.ntt.co.jp>
---
 tools/testing/selftests/bpf/test_sockmap.c | 28 +++++++++++++++-------
 1 file changed, 19 insertions(+), 9 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c
index e8faf4596dac..7970d48c4acb 100644
--- a/tools/testing/selftests/bpf/test_sockmap.c
+++ b/tools/testing/selftests/bpf/test_sockmap.c
@@ -869,6 +869,8 @@ static char *test_to_str(int test)
 #define OPTSTRING 60
 static void test_options(char *options)
 {
+	char tstr[OPTSTRING];
+
 	memset(options, 0, OPTSTRING);
 
 	if (txmsg_pass)
@@ -881,14 +883,22 @@ static void test_options(char *options)
 		strncat(options, "redir_noisy,", OPTSTRING);
 	if (txmsg_drop)
 		strncat(options, "drop,", OPTSTRING);
-	if (txmsg_apply)
-		strncat(options, "apply,", OPTSTRING);
-	if (txmsg_cork)
-		strncat(options, "cork,", OPTSTRING);
-	if (txmsg_start)
-		strncat(options, "start,", OPTSTRING);
-	if (txmsg_end)
-		strncat(options, "end,", OPTSTRING);
+	if (txmsg_apply) {
+		snprintf(tstr, OPTSTRING, "apply %d,", txmsg_apply);
+		strncat(options, tstr, OPTSTRING);
+	}
+	if (txmsg_cork) {
+		snprintf(tstr, OPTSTRING, "cork %d,", txmsg_cork);
+		strncat(options, tstr, OPTSTRING);
+	}
+	if (txmsg_start) {
+		snprintf(tstr, OPTSTRING, "start %d,", txmsg_start);
+		strncat(options, tstr, OPTSTRING);
+	}
+	if (txmsg_end) {
+		snprintf(tstr, OPTSTRING, "end %d,", txmsg_end);
+		strncat(options, tstr, OPTSTRING);
+	}
 	if (txmsg_ingress)
 		strncat(options, "ingress,", OPTSTRING);
 	if (txmsg_skb)
@@ -897,7 +907,7 @@ static void test_options(char *options)
 
 static int __test_exec(int cgrp, int test, struct sockmap_options *opt)
 {
-	char *options = calloc(60, sizeof(char));
+	char *options = calloc(OPTSTRING, sizeof(char));
 	int err;
 
 	if (test == SENDPAGE)
-- 
2.17.0

^ permalink raw reply related

* Re: [PATCH v2] Revert "alx: remove WoL support"
From: AceLan Kao @ 2018-05-28  5:06 UTC (permalink / raw)
  To: David Miller
  Cc: James Cliburn, Chris Snook, rakesh, netdev, Emily Chien,
	Andrew Lunn, Linux-Kernel@Vger. Kernel. Org, Johannes Berg,
	Johannes Stezenbach
In-Reply-To: <20180520.231820.868530785962553925.davem@davemloft.net>

Hi all,

Just inform you a news reported by a user who confirmed the wake up
issue can't be reproduce by the new kernel.
https://bugzilla.kernel.org/show_bug.cgi?id=61651#c126

<quote>
Guillaume de Jabrun 2018-05-27 15:12:54 UTC

I am using this patch for a long time. I was experiencing the "wake up
twice" bug, but with recent kernel version, I don't have this issue
anymore.
I can't tell which version actually fix the bug (I don't remember..).
</quote>

Best regards,
AceLan Kao.

2018-05-21 11:18 GMT+08:00 David Miller <davem@davemloft.net>:
> From: AceLan Kao <acelan.kao@canonical.com>
> Date: Mon, 21 May 2018 11:14:00 +0800
>
>> We are willing to fix the issue, but we don't have a machine to
>> reproduce it, and the WoL feature has been removed 5 years ago, it's
>> hard to find those buggy machines.
>
> Have you bothered to ask the person who did the revert?
>
>> WoL is a feature that is only used by a very small group of people,
>> and the wake up issue looks like only happens on some
>> platforms. Which means only small part of the group of people are
>> affected.
>
> One of those people was the wireless networking stack maintainer.
>
>> So, it's not a serious issue worth to remove it from alx driver.
>
> I disagree.
>
> You must fix the regression solved by the revert, before adding
> WoL support back to the driver.
>
> I'm not going to say this again.
>
> Thank you.
>

^ permalink raw reply

* [PATCH net] be2net: Fix error detection logic for BE3
From: Suresh Reddy @ 2018-05-28  5:26 UTC (permalink / raw)
  To: netdev
In-Reply-To: <Suresh.Reddy@broadcom.com>

Check for 0xE00 (RECOVERABLE_ERR) along with ARMFW UE (0x0)
in be_detect_error() to know whether the error is valid error or not

Fixes: 673c96e5a ("be2net: Fix UE detection logic for BE3")
Signed-off-by: Suresh Reddy <suresh.reddy@broadcom.com>
---
 drivers/net/ethernet/emulex/benet/be_main.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index c697e79..8f75500 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -3309,7 +3309,9 @@ void be_detect_error(struct be_adapter *adapter)
 				if ((val & POST_STAGE_FAT_LOG_START)
 				     != POST_STAGE_FAT_LOG_START &&
 				    (val & POST_STAGE_ARMFW_UE)
-				     != POST_STAGE_ARMFW_UE)
+				     != POST_STAGE_ARMFW_UE &&
+				    (val & POST_STAGE_RECOVERABLE_ERR)
+				     != POST_STAGE_RECOVERABLE_ERR)
 					return;
 			}
 
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH v2 net-next] net:sched: add action inheritdsfield to skbedit
From: Fu, Qiaobin @ 2018-05-28  5:40 UTC (permalink / raw)
  To: davem@davemloft.net
  Cc: netdev@vger.kernel.org, jhs@mojatatu.com, Michel Machado,
	Marcelo Ricardo Leitner, xiyou.wangcong@gmail.com

The new action inheritdsfield copies the field DS of
IPv4 and IPv6 packets into skb->priority. This enables
later classification of packets based on the DS field.

Original idea by Jamal Hadi Salim <jhs@mojatatu.com>

Signed-off-by: Qiaobin Fu <qiaobinf@bu.edu>
Reviewed-by: Michel Machado <michel@digirati.com.br>
---

Note that the motivation for this patch is found in the following discussion:
https://www.spinics.net/lists/netdev/msg501061.html
---

diff --git a/include/uapi/linux/tc_act/tc_skbedit.h b/include/uapi/linux/tc_act/tc_skbedit.h
index fbcfe27..432ad2f 100644
--- a/include/uapi/linux/tc_act/tc_skbedit.h
+++ b/include/uapi/linux/tc_act/tc_skbedit.h
@@ -30,9 +30,11 @@
 #define SKBEDIT_F_MARK			0x4
 #define SKBEDIT_F_PTYPE			0x8
 #define SKBEDIT_F_MASK			0x10
+#define SKBEDIT_F_INHERITDSFIELD	0x20
 
 struct tc_skbedit {
 	tc_gen;
+	__u64 flags;
 };
 
 enum {
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 6138d1d7..c3e9d03 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -23,6 +23,9 @@
 #include <linux/rtnetlink.h>
 #include <net/netlink.h>
 #include <net/pkt_sched.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/dsfield.h>
 
 #include <linux/tc_act/tc_skbedit.h>
 #include <net/tc_act/tc_skbedit.h>
@@ -39,8 +42,32 @@ static int tcf_skbedit(struct sk_buff *skb, const struct tc_action *a,
 	tcf_lastuse_update(&d->tcf_tm);
 	bstats_update(&d->tcf_bstats, skb);
 
-	if (d->flags & SKBEDIT_F_PRIORITY)
-		skb->priority = d->priority;
+	if (d->flags & SKBEDIT_F_INHERITDSFIELD) {
+		int wlen = skb_network_offset(skb);
+
+		switch (tc_skb_protocol(skb)) {
+		case htons(ETH_P_IP):
+			wlen += sizeof(struct iphdr);
+			if (!pskb_may_pull(skb, wlen))
+				goto err;
+			skb->priority = ipv4_get_dsfield(ip_hdr(skb)) >> 2;
+			break;
+
+		case htons(ETH_P_IPV6):
+			wlen += sizeof(struct ipv6hdr);
+			if (!pskb_may_pull(skb, wlen))
+				goto err;
+			skb->priority = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2;
+			break;
+
+		default:
+			goto default_priority;
+		}
+	} else {
+default_priority:
+		if (d->flags & SKBEDIT_F_PRIORITY)
+			skb->priority = d->priority;
+	}
 	if (d->flags & SKBEDIT_F_QUEUE_MAPPING &&
 	    skb->dev->real_num_tx_queues > d->queue_mapping)
 		skb_set_queue_mapping(skb, d->queue_mapping);
@@ -53,6 +80,10 @@ static int tcf_skbedit(struct sk_buff *skb, const struct tc_action *a,
 
 	spin_unlock(&d->tcf_lock);
 	return d->tcf_action;
+
+err:
+	spin_unlock(&d->tcf_lock);
+	return TC_ACT_SHOT;
 }
 
 static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = {
@@ -115,6 +146,8 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
 	}
 
 	parm = nla_data(tb[TCA_SKBEDIT_PARMS]);
+	if (parm->flags & SKBEDIT_F_INHERITDSFIELD)
+		flags |= SKBEDIT_F_INHERITDSFIELD;
 
 	exists = tcf_idr_check(tn, parm->index, a, bind);
 	if (exists && bind)

^ permalink raw reply related

* [PATCH iproute2] net:sched: add action inheritdsfield to skbedit
From: Fu, Qiaobin @ 2018-05-28  5:54 UTC (permalink / raw)
  To: davem@davemloft.net
  Cc: netdev@vger.kernel.org, jhs@mojatatu.com, Michel Machado,
	Marcelo Ricardo Leitner, xiyou.wangcong@gmail.com

The new action inheritdsfield copies the field DS of
IPv4 and IPv6 packets into skb->priority. This enables
later classification of packets based on the DS field.

Original idea by Jamal Hadi Salim <jhs@mojatatu.com>

Signed-off-by: Qiaobin Fu <qiaobinf@bu.edu>
Reviewed-by: Michel Machado <michel@digirati.com.br>
---

Note that the motivation for this patch is found in the following discussion:
https://www.spinics.net/lists/netdev/msg501061.html
---

diff --git a/include/uapi/linux/tc_act/tc_skbedit.h b/include/uapi/linux/tc_act/tc_skbedit.h
index fbcfe27..432ad2f 100644
--- a/include/uapi/linux/tc_act/tc_skbedit.h
+++ b/include/uapi/linux/tc_act/tc_skbedit.h
@@ -30,9 +30,11 @@
 #define SKBEDIT_F_MARK			0x4
 #define SKBEDIT_F_PTYPE			0x8
 #define SKBEDIT_F_MASK			0x10
+#define SKBEDIT_F_INHERITDSFIELD	0x20
 
 struct tc_skbedit {
 	tc_gen;
+	__u64 flags;
 };
 
 enum {
diff --git a/tc/m_skbedit.c b/tc/m_skbedit.c
index db5c64c..7553a40 100644
--- a/tc/m_skbedit.c
+++ b/tc/m_skbedit.c
@@ -30,16 +30,18 @@
 
 static void explain(void)
 {
-	fprintf(stderr, "Usage: ... skbedit <[QM] [PM] [MM] [PT]>\n"
+	fprintf(stderr, "Usage: ... skbedit <[QM] [PM] [MM] [PT] [IF]>\n"
 		"QM = queue_mapping QUEUE_MAPPING\n"
 		"PM = priority PRIORITY\n"
 		"MM = mark MARK\n"
 		"PT = ptype PACKETYPE\n"
+		"IF = inheritdsfield\n"
 		"PACKETYPE = is one of:\n"
 		"  host, otherhost, broadcast, multicast\n"
 		"QUEUE_MAPPING = device transmit queue to use\n"
 		"PRIORITY = classID to assign to priority field\n"
-		"MARK = firewall mark to set\n");
+		"MARK = firewall mark to set\n"
+		"note: inheritdsfield maps DS field to skb->priority\n");
 }
 
 static void
@@ -59,7 +61,7 @@ parse_skbedit(struct action_util *a, int *argc_p, char ***argv_p, int tca_id,
 	struct rtattr *tail;
 	unsigned int tmp;
 	__u16 queue_mapping, ptype;
-	__u32 flags = 0, priority, mark;
+	__u32 priority, mark;
 	struct tc_skbedit sel = { 0 };
 
 	if (matches(*argv, "skbedit") != 0)
@@ -69,7 +71,7 @@ parse_skbedit(struct action_util *a, int *argc_p, char ***argv_p, int tca_id,
 
 	while (argc > 0) {
 		if (matches(*argv, "queue_mapping") == 0) {
-			flags |= SKBEDIT_F_QUEUE_MAPPING;
+			sel.flags |= SKBEDIT_F_QUEUE_MAPPING;
 			NEXT_ARG();
 			if (get_unsigned(&tmp, *argv, 10) || tmp > 65535) {
 				fprintf(stderr, "Illegal queue_mapping\n");
@@ -78,7 +80,7 @@ parse_skbedit(struct action_util *a, int *argc_p, char ***argv_p, int tca_id,
 			queue_mapping = tmp;
 			ok++;
 		} else if (matches(*argv, "priority") == 0) {
-			flags |= SKBEDIT_F_PRIORITY;
+			sel.flags |= SKBEDIT_F_PRIORITY;
 			NEXT_ARG();
 			if (get_tc_classid(&priority, *argv)) {
 				fprintf(stderr, "Illegal priority\n");
@@ -86,7 +88,7 @@ parse_skbedit(struct action_util *a, int *argc_p, char ***argv_p, int tca_id,
 			}
 			ok++;
 		} else if (matches(*argv, "mark") == 0) {
-			flags |= SKBEDIT_F_MARK;
+			sel.flags |= SKBEDIT_F_MARK;
 			NEXT_ARG();
 			if (get_u32(&mark, *argv, 0)) {
 				fprintf(stderr, "Illegal mark\n");
@@ -109,7 +111,10 @@ parse_skbedit(struct action_util *a, int *argc_p, char ***argv_p, int tca_id,
 					*argv);
 				return -1;
 			}
-			flags |= SKBEDIT_F_PTYPE;
+			sel.flags |= SKBEDIT_F_PTYPE;
+			ok++;
+		} else if (matches(*argv, "inheritdsfield") == 0) {
+			sel.flags |= SKBEDIT_F_INHERITDSFIELD;
 			ok++;
 		} else if (matches(*argv, "help") == 0) {
 			usage();
@@ -144,16 +149,16 @@ parse_skbedit(struct action_util *a, int *argc_p, char ***argv_p, int tca_id,
 
 	tail = addattr_nest(n, MAX_MSG, tca_id);
 	addattr_l(n, MAX_MSG, TCA_SKBEDIT_PARMS, &sel, sizeof(sel));
-	if (flags & SKBEDIT_F_QUEUE_MAPPING)
+	if (sel.flags & SKBEDIT_F_QUEUE_MAPPING)
 		addattr_l(n, MAX_MSG, TCA_SKBEDIT_QUEUE_MAPPING,
 			  &queue_mapping, sizeof(queue_mapping));
-	if (flags & SKBEDIT_F_PRIORITY)
+	if (sel.flags & SKBEDIT_F_PRIORITY)
 		addattr_l(n, MAX_MSG, TCA_SKBEDIT_PRIORITY,
 			  &priority, sizeof(priority));
-	if (flags & SKBEDIT_F_MARK)
+	if (sel.flags & SKBEDIT_F_MARK)
 		addattr_l(n, MAX_MSG, TCA_SKBEDIT_MARK,
 			  &mark, sizeof(mark));
-	if (flags & SKBEDIT_F_PTYPE)
+	if (sel.flags & SKBEDIT_F_PTYPE)
 		addattr_l(n, MAX_MSG, TCA_SKBEDIT_PTYPE,
 			  &ptype, sizeof(ptype));
 	addattr_nest_end(n, tail);
@@ -211,6 +216,8 @@ static int print_skbedit(struct action_util *au, FILE *f, struct rtattr *arg)
 		else
 			fprintf(f, " ptype %d", *ptype);
 	}
+	if (p->flags & SKBEDIT_F_INHERITDSFIELD)
+		fprintf(f, "inherit DS field ");
 
 	print_action_control(f, " ", p->action, "");

^ permalink raw reply related

* Re: [PATCH 5/7] x86: remove the experimental forcesac boot option
From: Thomas Gleixner @ 2018-05-28  6:07 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Fenghua Yu, Tony Luck, linux-ia64-u79uwXL29TY76Z2rM5mHXA,
	Greg Kroah-Hartman, x86-DgEjT+Ai2ygdnm+yROfE0A,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA, Ingo Molnar,
	netdev-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <20180525143512.1466-6-hch-jcswGhMUV9g@public.gmane.org>

On Fri, 25 May 2018, Christoph Hellwig wrote:

x86/pci-dma: ...

Please

> Limiting the dma mask to avoid PCI (pre-PCIe) DAC cycles while paying
> the huge overhead of an IOMMU is rather pointless, and this seriously
> gets in the way of dma mapping work.
> 
> Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>

Reviewed-by: Thomas Gleixner <tglx-hfZtesqFncYOwBW4kG4KsQ@public.gmane.org>
 

^ permalink raw reply

* Re: [PATCH 6/7] x86: remove the explicit nodac and allowdac option
From: Thomas Gleixner @ 2018-05-28  6:08 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Fenghua Yu, Tony Luck, linux-ia64-u79uwXL29TY76Z2rM5mHXA,
	Greg Kroah-Hartman, x86-DgEjT+Ai2ygdnm+yROfE0A,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA, Ingo Molnar,
	netdev-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <20180525143512.1466-7-hch-jcswGhMUV9g@public.gmane.org>

On Fri, 25 May 2018, Christoph Hellwig wrote:

x86/pci-dma: ...

Please

> This is something drivers should decide (modulo chipset quirks like
> for VIA), which as far as I can tell is how things have been handled
> for the last 15 years.
> 
> Note that we keep the usedac option for now, as it is used in the wild
> to override the too generic VIA quirk.
> 
> Signed-off-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>

Reviewed-by: Thomas Gleixner <tglx-hfZtesqFncYOwBW4kG4KsQ@public.gmane.org>

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox