Netdev List
 help / color / mirror / Atom feed
* [PATCH bpf-next v3 04/10] bpf: get kernel symbol addresses via syscall
From: Sandipan Das @ 2018-05-22 17:16 UTC (permalink / raw)
  To: ast, daniel; +Cc: netdev, linuxppc-dev, mpe, naveen.n.rao, jakub.kicinski
In-Reply-To: <cover.1527008646.git.sandipan@linux.vnet.ibm.com>

This adds new two new fields to struct bpf_prog_info. For
multi-function programs, these fields can be used to pass
a list of kernel symbol addresses for all functions in a
given program to userspace using the bpf system call with
the BPF_OBJ_GET_INFO_BY_FD command.

When bpf_jit_kallsyms is enabled, we can get the address
of the corresponding kernel symbol for a callee function
and resolve the symbol's name. The address is determined
by adding the value of the call instruction's imm field
to __bpf_call_base. This offset gets assigned to the imm
field by the verifier.

For some architectures, such as powerpc64, the imm field
is not large enough to hold this offset.

We resolve this by:

[1] Assigning the subprog id to the imm field of a call
    instruction in the verifier instead of the offset of
    the callee's symbol's address from __bpf_call_base.

[2] Determining the address of a callee's corresponding
    symbol by using the imm field as an index for the
    list of kernel symbol addresses now available from
    the program info.

Suggested-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Sandipan Das <sandipan@linux.vnet.ibm.com>
---
v3:
 - Copy addresses to jited_ksyms only if bpf_dump_raw_ok()
   is true.
 - Move new fields to the end of bpf_prog_info to avoid
   breaking userspace.
---
 include/uapi/linux/bpf.h |  2 ++
 kernel/bpf/syscall.c     | 25 +++++++++++++++++++++++++
 kernel/bpf/verifier.c    |  7 +------
 3 files changed, 28 insertions(+), 6 deletions(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 97446bbe2ca5..c44105f27da9 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2205,6 +2205,8 @@ struct bpf_prog_info {
 	__u32 gpl_compatible:1;
 	__u64 netns_dev;
 	__u64 netns_ino;
+	__u32 nr_jited_ksyms;
+	__aligned_u64 jited_ksyms;
 } __attribute__((aligned(8)));
 
 struct bpf_map_info {
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index bfcde949c7f8..f0ad4b5f0224 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1933,6 +1933,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
 	if (!capable(CAP_SYS_ADMIN)) {
 		info.jited_prog_len = 0;
 		info.xlated_prog_len = 0;
+		info.nr_jited_ksyms = 0;
 		goto done;
 	}
 
@@ -1981,6 +1982,30 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
 		}
 	}
 
+	ulen = info.nr_jited_ksyms;
+	info.nr_jited_ksyms = prog->aux->func_cnt;
+	if (info.nr_jited_ksyms && ulen) {
+		if (bpf_dump_raw_ok()) {
+			u64 __user *user_ksyms;
+			ulong ksym_addr;
+			u32 i;
+
+			/* copy the address of the kernel symbol
+			 * corresponding to each function
+			 */
+			ulen = min_t(u32, info.nr_jited_ksyms, ulen);
+			user_ksyms = u64_to_user_ptr(info.jited_ksyms);
+			for (i = 0; i < ulen; i++) {
+				ksym_addr = (ulong) prog->aux->func[i]->bpf_func;
+				ksym_addr &= PAGE_MASK;
+				if (put_user((u64) ksym_addr, &user_ksyms[i]))
+					return -EFAULT;
+			}
+		} else {
+			info.jited_ksyms = 0;
+		}
+	}
+
 done:
 	if (copy_to_user(uinfo, &info, info_len) ||
 	    put_user(info_len, &uattr->info.info_len))
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 559cb74ba29e..8c4d9d0fd3ab 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5426,17 +5426,12 @@ static int jit_subprogs(struct bpf_verifier_env *env)
 	 * later look the same as if they were interpreted only.
 	 */
 	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
-		unsigned long addr;
-
 		if (insn->code != (BPF_JMP | BPF_CALL) ||
 		    insn->src_reg != BPF_PSEUDO_CALL)
 			continue;
 		insn->off = env->insn_aux_data[i].call_imm;
 		subprog = find_subprog(env, i + insn->off + 1);
-		addr  = (unsigned long)func[subprog]->bpf_func;
-		addr &= PAGE_MASK;
-		insn->imm = (u64 (*)(u64, u64, u64, u64, u64))
-			    addr - __bpf_call_base;
+		insn->imm = subprog;
 	}
 
 	prog->jited = 1;
-- 
2.14.3

^ permalink raw reply related

* [PATCH bpf-next v3 05/10] tools: bpf: sync bpf uapi header
From: Sandipan Das @ 2018-05-22 17:16 UTC (permalink / raw)
  To: ast, daniel; +Cc: netdev, linuxppc-dev, mpe, naveen.n.rao, jakub.kicinski
In-Reply-To: <cover.1527008646.git.sandipan@linux.vnet.ibm.com>

Syncing the bpf.h uapi header with tools so that struct
bpf_prog_info has the two new fields for passing on the
addresses of the kernel symbols corresponding to each
function in a program.

Signed-off-by: Sandipan Das <sandipan@linux.vnet.ibm.com>
---
v3:
 - Move new fields to the end of bpf_prog_info to avoid
   breaking userspace.
---
 tools/include/uapi/linux/bpf.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 97446bbe2ca5..c44105f27da9 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -2205,6 +2205,8 @@ struct bpf_prog_info {
 	__u32 gpl_compatible:1;
 	__u64 netns_dev;
 	__u64 netns_ino;
+	__u32 nr_jited_ksyms;
+	__aligned_u64 jited_ksyms;
 } __attribute__((aligned(8)));
 
 struct bpf_map_info {
-- 
2.14.3

^ permalink raw reply related

* [PATCH bpf-next v3 06/10] tools: bpftool: resolve calls without using imm field
From: Sandipan Das @ 2018-05-22 17:16 UTC (permalink / raw)
  To: ast, daniel; +Cc: netdev, linuxppc-dev, mpe, naveen.n.rao, jakub.kicinski
In-Reply-To: <cover.1527008646.git.sandipan@linux.vnet.ibm.com>

Currently, we resolve the callee's address for a JITed function
call by using the imm field of the call instruction as an offset
from __bpf_call_base. If bpf_jit_kallsyms is enabled, we further
use this address to get the callee's kernel symbol's name.

For some architectures, such as powerpc64, the imm field is not
large enough to hold this offset. So, instead of assigning this
offset to the imm field, the verifier now assigns the subprog
id. Also, a list of kernel symbol addresses for all the JITed
functions is provided in the program info. We now use the imm
field as an index for this list to lookup a callee's symbol's
address and resolve its name.

Suggested-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Sandipan Das <sandipan@linux.vnet.ibm.com>
---
v3:
 - Avoid using redundant pointers.
 - Fix indentation.

v2:
 - Order variables from longest to shortest.
 - Make sure that ksyms_ptr and ksyms_len are always initialized.
 - Simplify code.
---
 tools/bpf/bpftool/prog.c          | 24 ++++++++++++++++++++++++
 tools/bpf/bpftool/xlated_dumper.c | 10 +++++++++-
 tools/bpf/bpftool/xlated_dumper.h |  2 ++
 3 files changed, 35 insertions(+), 1 deletion(-)

diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index 9bdfdf2d3fbe..e05ab58d39e2 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -420,7 +420,9 @@ static int do_show(int argc, char **argv)
 
 static int do_dump(int argc, char **argv)
 {
+	unsigned long *func_ksyms = NULL;
 	struct bpf_prog_info info = {};
+	unsigned int nr_func_ksyms;
 	struct dump_data dd = {};
 	__u32 len = sizeof(info);
 	unsigned int buf_size;
@@ -496,10 +498,22 @@ static int do_dump(int argc, char **argv)
 		return -1;
 	}
 
+	nr_func_ksyms = info.nr_jited_ksyms;
+	if (nr_func_ksyms) {
+		func_ksyms = malloc(nr_func_ksyms * sizeof(__u64));
+		if (!func_ksyms) {
+			p_err("mem alloc failed");
+			close(fd);
+			goto err_free;
+		}
+	}
+
 	memset(&info, 0, sizeof(info));
 
 	*member_ptr = ptr_to_u64(buf);
 	*member_len = buf_size;
+	info.jited_ksyms = ptr_to_u64(func_ksyms);
+	info.nr_jited_ksyms = nr_func_ksyms;
 
 	err = bpf_obj_get_info_by_fd(fd, &info, &len);
 	close(fd);
@@ -513,6 +527,11 @@ static int do_dump(int argc, char **argv)
 		goto err_free;
 	}
 
+	if (info.nr_jited_ksyms > nr_func_ksyms) {
+		p_err("too many addresses returned");
+		goto err_free;
+	}
+
 	if ((member_len == &info.jited_prog_len &&
 	     info.jited_prog_insns == 0) ||
 	    (member_len == &info.xlated_prog_len &&
@@ -558,6 +577,9 @@ static int do_dump(int argc, char **argv)
 			dump_xlated_cfg(buf, *member_len);
 	} else {
 		kernel_syms_load(&dd);
+		dd.nr_jited_ksyms = info.nr_jited_ksyms;
+		dd.jited_ksyms = (__u64 *) info.jited_ksyms;
+
 		if (json_output)
 			dump_xlated_json(&dd, buf, *member_len, opcodes);
 		else
@@ -566,10 +588,12 @@ static int do_dump(int argc, char **argv)
 	}
 
 	free(buf);
+	free(func_ksyms);
 	return 0;
 
 err_free:
 	free(buf);
+	free(func_ksyms);
 	return -1;
 }
 
diff --git a/tools/bpf/bpftool/xlated_dumper.c b/tools/bpf/bpftool/xlated_dumper.c
index 7a3173b76c16..efdc8fecf2bb 100644
--- a/tools/bpf/bpftool/xlated_dumper.c
+++ b/tools/bpf/bpftool/xlated_dumper.c
@@ -174,7 +174,11 @@ static const char *print_call_pcrel(struct dump_data *dd,
 				    unsigned long address,
 				    const struct bpf_insn *insn)
 {
-	if (sym)
+	if (!dd->nr_jited_ksyms)
+		/* Do not show address for interpreted programs */
+		snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
+			"%+d", insn->off);
+	else if (sym)
 		snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
 			 "%+d#%s", insn->off, sym->name);
 	else
@@ -203,6 +207,10 @@ static const char *print_call(void *private_data,
 	unsigned long address = dd->address_call_base + insn->imm;
 	struct kernel_sym *sym;
 
+	if (insn->src_reg == BPF_PSEUDO_CALL &&
+	    (__u32) insn->imm < dd->nr_jited_ksyms)
+		address = dd->jited_ksyms[insn->imm];
+
 	sym = kernel_syms_search(dd, address);
 	if (insn->src_reg == BPF_PSEUDO_CALL)
 		return print_call_pcrel(dd, sym, address, insn);
diff --git a/tools/bpf/bpftool/xlated_dumper.h b/tools/bpf/bpftool/xlated_dumper.h
index b34affa7ef2d..eafbb49c8d0b 100644
--- a/tools/bpf/bpftool/xlated_dumper.h
+++ b/tools/bpf/bpftool/xlated_dumper.h
@@ -49,6 +49,8 @@ struct dump_data {
 	unsigned long address_call_base;
 	struct kernel_sym *sym_mapping;
 	__u32 sym_count;
+	__u64 *jited_ksyms;
+	__u32 nr_jited_ksyms;
 	char scratch_buff[SYM_MAX_NAME + 8];
 };
 
-- 
2.14.3

^ permalink raw reply related

* [PATCH bpf-next v3 07/10] bpf: fix multi-function JITed dump obtained via syscall
From: Sandipan Das @ 2018-05-22 17:16 UTC (permalink / raw)
  To: ast, daniel; +Cc: netdev, linuxppc-dev, mpe, naveen.n.rao, jakub.kicinski
In-Reply-To: <cover.1527008646.git.sandipan@linux.vnet.ibm.com>

Currently, for multi-function programs, we cannot get the JITed
instructions using the bpf system call's BPF_OBJ_GET_INFO_BY_FD
command. Because of this, userspace tools such as bpftool fail
to identify a multi-function program as being JITed or not.

With the JIT enabled and the test program running, this can be
verified as follows:

  # cat /proc/sys/net/core/bpf_jit_enable
  1

Before applying this patch:

  # bpftool prog list
  1: kprobe  name foo  tag b811aab41a39ad3d  gpl
          loaded_at 2018-05-16T11:43:38+0530  uid 0
          xlated 216B  not jited  memlock 65536B
  ...

  # bpftool prog dump jited id 1
  no instructions returned

After applying this patch:

  # bpftool prog list
  1: kprobe  name foo  tag b811aab41a39ad3d  gpl
          loaded_at 2018-05-16T12:13:01+0530  uid 0
          xlated 216B  jited 308B  memlock 65536B
  ...

  # bpftool prog dump jited id 1
     0:   nop
     4:   nop
     8:   mflr    r0
     c:   std     r0,16(r1)
    10:   stdu    r1,-112(r1)
    14:   std     r31,104(r1)
    18:   addi    r31,r1,48
    1c:   li      r3,10
  ...

Signed-off-by: Sandipan Das <sandipan@linux.vnet.ibm.com>
---
 kernel/bpf/syscall.c | 36 +++++++++++++++++++++++++++++++++---
 1 file changed, 33 insertions(+), 3 deletions(-)

diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index f0ad4b5f0224..1c4cba91e523 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1970,13 +1970,43 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
 	 * for offload.
 	 */
 	ulen = info.jited_prog_len;
-	info.jited_prog_len = prog->jited_len;
+	if (prog->aux->func_cnt) {
+		u32 i;
+
+		info.jited_prog_len = 0;
+		for (i = 0; i < prog->aux->func_cnt; i++)
+			info.jited_prog_len += prog->aux->func[i]->jited_len;
+	} else {
+		info.jited_prog_len = prog->jited_len;
+	}
+
 	if (info.jited_prog_len && ulen) {
 		if (bpf_dump_raw_ok()) {
 			uinsns = u64_to_user_ptr(info.jited_prog_insns);
 			ulen = min_t(u32, info.jited_prog_len, ulen);
-			if (copy_to_user(uinsns, prog->bpf_func, ulen))
-				return -EFAULT;
+
+			/* for multi-function programs, copy the JITed
+			 * instructions for all the functions
+			 */
+			if (prog->aux->func_cnt) {
+				u32 len, free, i;
+				u8 *img;
+
+				free = ulen;
+				for (i = 0; i < prog->aux->func_cnt; i++) {
+					len = prog->aux->func[i]->jited_len;
+					img = (u8 *) prog->aux->func[i]->bpf_func;
+					if (len > free)
+						break;
+					if (copy_to_user(uinsns, img, len))
+						return -EFAULT;
+					uinsns += len;
+					free -= len;
+				}
+			} else {
+				if (copy_to_user(uinsns, prog->bpf_func, ulen))
+					return -EFAULT;
+			}
 		} else {
 			info.jited_prog_insns = 0;
 		}
-- 
2.14.3

^ permalink raw reply related

* [PATCH bpf-next v3 08/10] bpf: get JITed image lengths of functions via syscall
From: Sandipan Das @ 2018-05-22 17:16 UTC (permalink / raw)
  To: ast, daniel; +Cc: netdev, linuxppc-dev, mpe, naveen.n.rao, jakub.kicinski
In-Reply-To: <cover.1527008646.git.sandipan@linux.vnet.ibm.com>

This adds new two new fields to struct bpf_prog_info. For
multi-function programs, these fields can be used to pass
a list of the JITed image lengths of each function for a
given program to userspace using the bpf system call with
the BPF_OBJ_GET_INFO_BY_FD command.

This can be used by userspace applications like bpftool
to split up the contiguous JITed dump, also obtained via
the system call, into more relatable chunks corresponding
to each function.

Signed-off-by: Sandipan Das <sandipan@linux.vnet.ibm.com>
---
 include/uapi/linux/bpf.h |  2 ++
 kernel/bpf/syscall.c     | 20 ++++++++++++++++++++
 2 files changed, 22 insertions(+)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index c44105f27da9..8c3109b5d6d3 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2206,7 +2206,9 @@ struct bpf_prog_info {
 	__u64 netns_dev;
 	__u64 netns_ino;
 	__u32 nr_jited_ksyms;
+	__u32 nr_jited_func_lens;
 	__aligned_u64 jited_ksyms;
+	__aligned_u64 jited_func_lens;
 } __attribute__((aligned(8)));
 
 struct bpf_map_info {
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 1c4cba91e523..faadbcd90191 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2036,6 +2036,26 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
 		}
 	}
 
+	ulen = info.nr_jited_func_lens;
+	info.nr_jited_func_lens = prog->aux->func_cnt;
+	if (info.nr_jited_func_lens && ulen) {
+		if (bpf_dump_raw_ok()) {
+			u32 __user *user_lens;
+			u32 func_len, i;
+
+			/* copy the JITed image lengths for each function */
+			ulen = min_t(u32, info.nr_jited_func_lens, ulen);
+			user_lens = u64_to_user_ptr(info.jited_func_lens);
+			for (i = 0; i < ulen; i++) {
+				func_len = prog->aux->func[i]->jited_len;
+				if (put_user(func_len, &user_lens[i]))
+					return -EFAULT;
+			}
+		} else {
+			info.jited_func_lens = 0;
+		}
+	}
+
 done:
 	if (copy_to_user(uinfo, &info, info_len) ||
 	    put_user(info_len, &uattr->info.info_len))
-- 
2.14.3

^ permalink raw reply related

* [PATCH bpf-next v3 09/10] tools: bpf: sync bpf uapi header
From: Sandipan Das @ 2018-05-22 17:16 UTC (permalink / raw)
  To: ast, daniel; +Cc: netdev, linuxppc-dev, mpe, naveen.n.rao, jakub.kicinski
In-Reply-To: <cover.1527008646.git.sandipan@linux.vnet.ibm.com>

Syncing the bpf.h uapi header with tools so that struct
bpf_prog_info has the two new fields for passing on the
JITed image lengths of each function in a multi-function
program.

Signed-off-by: Sandipan Das <sandipan@linux.vnet.ibm.com>
---
 tools/include/uapi/linux/bpf.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index c44105f27da9..8c3109b5d6d3 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -2206,7 +2206,9 @@ struct bpf_prog_info {
 	__u64 netns_dev;
 	__u64 netns_ino;
 	__u32 nr_jited_ksyms;
+	__u32 nr_jited_func_lens;
 	__aligned_u64 jited_ksyms;
+	__aligned_u64 jited_func_lens;
 } __attribute__((aligned(8)));
 
 struct bpf_map_info {
-- 
2.14.3

^ permalink raw reply related

* [PATCH bpf-next v3 10/10] tools: bpftool: add delimiters to multi-function JITed dumps
From: Sandipan Das @ 2018-05-22 17:16 UTC (permalink / raw)
  To: ast, daniel; +Cc: netdev, linuxppc-dev, mpe, naveen.n.rao, jakub.kicinski
In-Reply-To: <cover.1527008646.git.sandipan@linux.vnet.ibm.com>

This splits up the contiguous JITed dump obtained via the bpf
system call into more relatable chunks for each function in
the program. If the kernel symbols corresponding to these are
known, they are printed in the header for each JIT image dump
otherwise the masked start address is printed.

Before applying this patch:

  # bpftool prog dump jited id 1

     0:   nop
     4:   nop
     8:   mflr    r0
     c:   std     r0,16(r1)
    10:   stdu    r1,-112(r1)
    14:   std     r31,104(r1)
  ...
    a8:   mr      r3,r8
    ac:   blr
    b0:   nop
    b4:   nop
    b8:   mflr    r0
    bc:   std     r0,16(r1)
    c0:   stdu    r1,-112(r1)
    c4:   std     r31,104(r1)
  ...
   138:   mr      r3,r8
   13c:   blr

After applying this patch:

  # echo 0 > /proc/sys/net/core/bpf_jit_kallsyms
  # bpftool prog dump jited id 1

  d00000000acc0000:
     0:   nop
     4:   nop
     8:   mflr    r0
     c:   std     r0,16(r1)
    10:   stdu    r1,-112(r1)
    14:   std     r31,104(r1)
  ...
    a8:   mr      r3,r8
    ac:   blr

  d00000000ad20000:
     0:   nop
     4:   nop
     8:   mflr    r0
     c:   std     r0,16(r1)
    10:   stdu    r1,-112(r1)
    14:   std     r31,104(r1)
  ...
    88:   mr      r3,r8
    8c:   blr

  # echo 1 > /proc/sys/net/core/bpf_jit_kallsyms
  # bpftool prog dump jited id 1

  bpf_prog_8852b2ccb8ec75a7_F:
     0:   nop
     4:   nop
     8:   mflr    r0
     c:   std     r0,16(r1)
    10:   stdu    r1,-112(r1)
    14:   std     r31,104(r1)
  ...
    a8:   mr      r3,r8
    ac:   blr

  bpf_prog_196af774a3477707_F:
     0:   nop
     4:   nop
     8:   mflr    r0
     c:   std     r0,16(r1)
    10:   stdu    r1,-112(r1)
    14:   std     r31,104(r1)
  ...
    88:   mr      r3,r8
    8c:   blr

Signed-off-by: Sandipan Das <sandipan@linux.vnet.ibm.com>
---
 tools/bpf/bpftool/prog.c          | 51 ++++++++++++++++++++++++++++++++++++++-
 tools/bpf/bpftool/xlated_dumper.c |  4 +--
 tools/bpf/bpftool/xlated_dumper.h |  1 +
 3 files changed, 53 insertions(+), 3 deletions(-)

diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index e05ab58d39e2..8ab7a683ac67 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -422,7 +422,9 @@ static int do_dump(int argc, char **argv)
 {
 	unsigned long *func_ksyms = NULL;
 	struct bpf_prog_info info = {};
+	unsigned int *func_lens = NULL;
 	unsigned int nr_func_ksyms;
+	unsigned int nr_func_lens;
 	struct dump_data dd = {};
 	__u32 len = sizeof(info);
 	unsigned int buf_size;
@@ -508,12 +510,24 @@ static int do_dump(int argc, char **argv)
 		}
 	}
 
+	nr_func_lens = info.nr_jited_func_lens;
+	if (nr_func_lens) {
+		func_lens = malloc(nr_func_lens * sizeof(__u32));
+		if (!func_lens) {
+			p_err("mem alloc failed");
+			close(fd);
+			goto err_free;
+		}
+	}
+
 	memset(&info, 0, sizeof(info));
 
 	*member_ptr = ptr_to_u64(buf);
 	*member_len = buf_size;
 	info.jited_ksyms = ptr_to_u64(func_ksyms);
 	info.nr_jited_ksyms = nr_func_ksyms;
+	info.jited_func_lens = ptr_to_u64(func_lens);
+	info.nr_jited_func_lens = nr_func_lens;
 
 	err = bpf_obj_get_info_by_fd(fd, &info, &len);
 	close(fd);
@@ -532,6 +546,11 @@ static int do_dump(int argc, char **argv)
 		goto err_free;
 	}
 
+	if (info.nr_jited_func_lens > nr_func_lens) {
+		p_err("too many values returned");
+		goto err_free;
+	}
+
 	if ((member_len == &info.jited_prog_len &&
 	     info.jited_prog_insns == 0) ||
 	    (member_len == &info.xlated_prog_len &&
@@ -569,7 +588,35 @@ static int do_dump(int argc, char **argv)
 				goto err_free;
 		}
 
-		disasm_print_insn(buf, *member_len, opcodes, name);
+		if (info.nr_jited_func_lens && info.jited_func_lens) {
+			struct kernel_sym *sym = NULL;
+			unsigned char *img = buf;
+			__u64 *ksyms = NULL;
+			__u32 *lens;
+			__u32 i;
+
+			if (info.nr_jited_ksyms) {
+				kernel_syms_load(&dd);
+				ksyms = (__u64 *) info.jited_ksyms;
+			}
+
+			lens = (__u32 *) info.jited_func_lens;
+			for (i = 0; i < info.nr_jited_func_lens; i++) {
+				if (ksyms) {
+					sym = kernel_syms_search(&dd, ksyms[i]);
+					if (sym)
+						printf("%s:\n", sym->name);
+					else
+						printf("%016llx:\n", ksyms[i]);
+				}
+
+				disasm_print_insn(img, lens[i], opcodes, name);
+				img += lens[i];
+				printf("\n");
+			}
+		} else {
+			disasm_print_insn(buf, *member_len, opcodes, name);
+		}
 	} else if (visual) {
 		if (json_output)
 			jsonw_null(json_wtr);
@@ -589,11 +636,13 @@ static int do_dump(int argc, char **argv)
 
 	free(buf);
 	free(func_ksyms);
+	free(func_lens);
 	return 0;
 
 err_free:
 	free(buf);
 	free(func_ksyms);
+	free(func_lens);
 	return -1;
 }
 
diff --git a/tools/bpf/bpftool/xlated_dumper.c b/tools/bpf/bpftool/xlated_dumper.c
index efdc8fecf2bb..b97f1da60dd1 100644
--- a/tools/bpf/bpftool/xlated_dumper.c
+++ b/tools/bpf/bpftool/xlated_dumper.c
@@ -102,8 +102,8 @@ void kernel_syms_destroy(struct dump_data *dd)
 	free(dd->sym_mapping);
 }
 
-static struct kernel_sym *kernel_syms_search(struct dump_data *dd,
-					     unsigned long key)
+struct kernel_sym *kernel_syms_search(struct dump_data *dd,
+				      unsigned long key)
 {
 	struct kernel_sym sym = {
 		.address = key,
diff --git a/tools/bpf/bpftool/xlated_dumper.h b/tools/bpf/bpftool/xlated_dumper.h
index eafbb49c8d0b..33d86e2b369b 100644
--- a/tools/bpf/bpftool/xlated_dumper.h
+++ b/tools/bpf/bpftool/xlated_dumper.h
@@ -56,6 +56,7 @@ struct dump_data {
 
 void kernel_syms_load(struct dump_data *dd);
 void kernel_syms_destroy(struct dump_data *dd);
+struct kernel_sym *kernel_syms_search(struct dump_data *dd, unsigned long key);
 void dump_xlated_json(struct dump_data *dd, void *buf, unsigned int len,
 		      bool opcodes);
 void dump_xlated_plain(struct dump_data *dd, void *buf, unsigned int len,
-- 
2.14.3

^ permalink raw reply related

* Re: [PATCH V3 8/8] dt-bindings: stm32: add compatible for syscon
From: Rob Herring @ 2018-05-22 17:22 UTC (permalink / raw)
  To: Christophe Roullier
  Cc: mark.rutland, mcoquelin.stm32, alexandre.torgue, peppe.cavallaro,
	devicetree, linux-arm-kernel, netdev, andrew
In-Reply-To: <1526890046-10565-9-git-send-email-christophe.roullier@st.com>

On Mon, May 21, 2018 at 10:07:26AM +0200, Christophe Roullier wrote:
> This patch describes syscon DT bindings.
> 
> Signed-off-by: Christophe Roullier <christophe.roullier@st.com>
> ---
>  Documentation/devicetree/bindings/arm/stm32.txt | 5 +++++
>  1 file changed, 5 insertions(+)
> 
> diff --git a/Documentation/devicetree/bindings/arm/stm32.txt b/Documentation/devicetree/bindings/arm/stm32.txt
> index 6808ed9..e46ebad 100644
> --- a/Documentation/devicetree/bindings/arm/stm32.txt
> +++ b/Documentation/devicetree/bindings/arm/stm32.txt
> @@ -8,3 +8,8 @@ using one of the following compatible strings:
>    st,stm32f746
>    st,stm32h743
>    st,stm32mp157
> +
> +Required nodes:
> +- syscon: the soc bus node must have a system controller node pointing to the
> +  global control registers, with the compatible string
> +  "st,stm32mp157-syscfg", "syscon";

Please don't mix soc/board bindings with other nodes. So perhaps 
stm32-syscon.txt.

Rob

^ permalink raw reply

* Re: [PATCH v4 0/3] IR decoding using BPF
From: VDR User @ 2018-05-22 17:24 UTC (permalink / raw)
  To: Matthias Reichl, Sean Young, mailing list: linux-media,
	Linux Kernel Mailing List, Alexei Starovoitov,
	Mauro Carvalho Chehab, Daniel Borkmann, netdev, Devin Heitmueller,
	Y Song, Quentin Monnet
In-Reply-To: <20180522135020.y3xxmtvhdui2so3t@camel2.lan>

Sean, I'd like to echo Matthias's appreciation for your work with this
BPF project. I'm very much looking forward to the possibility of using
my remotes directly with decoders generated from the existing
lircd.conf's. Excited seeing your work progress!

Cheers,
Derek

On Tue, May 22, 2018 at 6:50 AM, Matthias Reichl <hias@horus.com> wrote:
> Hi Sean,
>
> On Fri, May 18, 2018 at 03:07:27PM +0100, Sean Young wrote:
>> The kernel IR decoders (drivers/media/rc/ir-*-decoder.c) support the most
>> widely used IR protocols, but there are many protocols which are not
>> supported[1]. For example, the lirc-remotes[2] repo has over 2700 remotes,
>> many of which are not supported by rc-core. There is a "long tail" of
>> unsupported IR protocols, for which lircd is need to decode the IR .
>>
>> IR encoding is done in such a way that some simple circuit can decode it;
>> therefore, bpf is ideal.
>>
>> In order to support all these protocols, here we have bpf based IR decoding.
>> The idea is that user-space can define a decoder in bpf, attach it to
>> the rc device through the lirc chardev.
>>
>> Separate work is underway to extend ir-keytable to have an extensive library
>> of bpf-based decoders, and a much expanded library of rc keymaps.
>>
>> Another future application would be to compile IRP[3] to a IR BPF program, and
>> so support virtually every remote without having to write a decoder for each.
>> It might also be possible to support non-button devices such as analog
>> directional pads or air conditioning remote controls and decode the target
>> temperature in bpf, and pass that to an input device.
>
> Thanks a lot, this looks like a very interesting feature to me!
>
> Unfortunately I don't have time to test it ATM, but please keep
> me posted - also on ir-keytable progress - I'm rather excited
> to give it a try.
>
> so long & thanks,
>
> Hias
>
>>
>> Thanks,
>>
>> Sean Young
>>
>> [1] http://www.hifi-remote.com/wiki/index.php?title=DecodeIR
>> [2] https://sourceforge.net/p/lirc-remotes/code/ci/master/tree/remotes/
>> [3] http://www.hifi-remote.com/wiki/index.php?title=IRP_Notation
>>
>> Changes since v3:
>>  - Implemented review comments from Quentin Monnet and Y Song (thanks!)
>>  - More helpful and better formatted bpf helper documentation
>>  - Changed back to bpf_prog_array rather than open-coded implementation
>>  - scancodes can be 64 bit
>>  - bpf gets passed values in microseconds, not nanoseconds.
>>    microseconds is more than than enough (IR receivers support carriers upto
>>    70kHz, at which point a single period is already 14 microseconds). Also,
>>    this makes it much more consistent with lirc mode2.
>>  - Since it looks much more like lirc mode2, rename the program type to
>>    BPF_PROG_TYPE_LIRC_MODE2.
>>  - Rebased on bpf-next
>>
>> Changes since v2:
>>  - Fixed locking issues
>>  - Improved self-test to cover more cases
>>  - Rebased on bpf-next again
>>
>> Changes since v1:
>>  - Code review comments from Y Song <ys114321@gmail.com> and
>>    Randy Dunlap <rdunlap@infradead.org>
>>  - Re-wrote sample bpf to be selftest
>>  - Renamed RAWIR_DECODER -> RAWIR_EVENT (Kconfig, context, bpf prog type)
>>  - Rebase on bpf-next
>>  - Introduced bpf_rawir_event context structure with simpler access checking
>>
>> Sean Young (3):
>>   bpf: bpf_prog_array_copy() should return -ENOENT if exclude_prog not
>>     found
>>   media: rc: introduce BPF_PROG_LIRC_MODE2
>>   bpf: add selftest for lirc_mode2 type program
>>
>>  drivers/media/rc/Kconfig                      |  13 +
>>  drivers/media/rc/Makefile                     |   1 +
>>  drivers/media/rc/bpf-lirc.c                   | 308 ++++++++++++++++++
>>  drivers/media/rc/lirc_dev.c                   |  30 ++
>>  drivers/media/rc/rc-core-priv.h               |  22 ++
>>  drivers/media/rc/rc-ir-raw.c                  |  12 +-
>>  include/linux/bpf_rcdev.h                     |  30 ++
>>  include/linux/bpf_types.h                     |   3 +
>>  include/uapi/linux/bpf.h                      |  53 ++-
>>  kernel/bpf/core.c                             |  11 +-
>>  kernel/bpf/syscall.c                          |   7 +
>>  kernel/trace/bpf_trace.c                      |   2 +
>>  tools/bpf/bpftool/prog.c                      |   1 +
>>  tools/include/uapi/linux/bpf.h                |  53 ++-
>>  tools/include/uapi/linux/lirc.h               | 217 ++++++++++++
>>  tools/lib/bpf/libbpf.c                        |   1 +
>>  tools/testing/selftests/bpf/Makefile          |   8 +-
>>  tools/testing/selftests/bpf/bpf_helpers.h     |   6 +
>>  .../testing/selftests/bpf/test_lirc_mode2.sh  |  28 ++
>>  .../selftests/bpf/test_lirc_mode2_kern.c      |  23 ++
>>  .../selftests/bpf/test_lirc_mode2_user.c      | 154 +++++++++
>>  21 files changed, 974 insertions(+), 9 deletions(-)
>>  create mode 100644 drivers/media/rc/bpf-lirc.c
>>  create mode 100644 include/linux/bpf_rcdev.h
>>  create mode 100644 tools/include/uapi/linux/lirc.h
>>  create mode 100755 tools/testing/selftests/bpf/test_lirc_mode2.sh
>>  create mode 100644 tools/testing/selftests/bpf/test_lirc_mode2_kern.c
>>  create mode 100644 tools/testing/selftests/bpf/test_lirc_mode2_user.c
>>
>> --
>> 2.17.0
>>

^ permalink raw reply

* Re: [PATCH net-next 1/2] tcp: add max_quickacks param to tcp_incr_quickack and tcp_enter_quickack_mode
From: Neal Cardwell @ 2018-05-22 17:27 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David Miller, Netdev, Van Jacobson, Yuchung Cheng,
	Soheil Hassas Yeganeh, Eric Dumazet
In-Reply-To: <20180521220857.229273-2-edumazet@google.com>

On Mon, May 21, 2018 at 6:09 PM Eric Dumazet <edumazet@google.com> wrote:

> We want to add finer control of the number of ACK packets sent after
> ECN events.

> This patch is not changing current behavior, it only enables following
> change.

> Signed-off-by: Eric Dumazet <edumazet@google.com>
> ---

Acked-by: Neal Cardwell <ncardwell@google.com>

Thanks!

neal

^ permalink raw reply

* Re: [RFC PATCH ghak32 V2 13/13] debug audit: read container ID of a process
From: Richard Guy Briggs @ 2018-05-22 17:35 UTC (permalink / raw)
  To: Paul Moore
  Cc: cgroups-u79uwXL29TY76Z2rM5mHXA, luto-DgEjT+Ai2ygdnm+yROfE0A,
	jlayton-H+wXaHxf7aLQT0dZR+AlfA, carlos-H+wXaHxf7aLQT0dZR+AlfA,
	linux-api-u79uwXL29TY76Z2rM5mHXA,
	containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA, LKML,
	dhowells-H+wXaHxf7aLQT0dZR+AlfA,
	linux-audit-H+wXaHxf7aLQT0dZR+AlfA, Eric W. Biederman,
	simo-H+wXaHxf7aLQT0dZR+AlfA, netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Eric Paris, Steve Grubb,
	viro-RmSDqhL/yNMiFSDQTTA3OLVCufUGDwFn
In-Reply-To: <CAHC9VhQruN88t-R9Qo3e4hwCZ58RAyrmEmH1nY4RR6NZaiBzGQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>

On 2018-05-21 16:06, Paul Moore wrote:
> On Mon, May 21, 2018 at 3:19 PM, Eric W. Biederman <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org> wrote:
> > Steve Grubb <sgrubb-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org> writes:
> >> On Friday, March 16, 2018 5:00:40 AM EDT Richard Guy Briggs wrote:
> >>> Add support for reading the container ID from the proc filesystem.
> >>
> >> I think this could be useful in general. Please consider this to be part of
> >> the full patch set and not something merely used to debug the patches.
> >
> > Only with an audit specific name.
> >
> > As it is:
> >
> > Nacked-by: "Eric W. Biederman" <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
> >
> > The truth is the containerid name really stinks and is quite confusing
> > and does not imply that the label applies only to audit.  And little
> > things like this make me extremely uncofortable with it.
> 
> It also makes the audit container ID (notice how I *always* call it
> the *audit* container ID? that is not an accident) available for
> userspace applications to abuse.  Perhaps in the future we can look at
> ways to make this more available to applications, but this patch is
> not the answer.

Do you have a productive suggestion?

> paul moore

- RGB

--
Richard Guy Briggs <rgb-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
Sr. S/W Engineer, Kernel Security, Base Operating Systems
Remote, Ottawa, Red Hat Canada
IRC: rgb, SunRaycer
Voice: +1.647.777.2635, Internal: (81) 32635

^ permalink raw reply

* Re: [PATCH net-next 2/2] tcp: do not aggressively quick ack after ECN events
From: Neal Cardwell @ 2018-05-22 17:36 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David Miller, Netdev, Van Jacobson, Yuchung Cheng,
	Soheil Hassas Yeganeh, Eric Dumazet
In-Reply-To: <20180521220857.229273-3-edumazet@google.com>

On Mon, May 21, 2018 at 6:09 PM Eric Dumazet <edumazet@google.com> wrote:

> ECN signals currently forces TCP to enter quickack mode for
> up to 16 (TCP_MAX_QUICKACKS) following incoming packets.

> We believe this is not needed, and only sending one immediate ack
> for the current packet should be enough.

> This should reduce the extra load noticed in DCTCP environments,
> after congestion events.

> This is part 2 of our effort to reduce pure ACK packets.

> Signed-off-by: Eric Dumazet <edumazet@google.com>
> ---

Acked-by: Neal Cardwell <ncardwell@google.com>

Thanks!

neal

^ permalink raw reply

* Re: [PATCH net-next v11 2/5] netvsc: refactor notifier/event handling code to use the failover framework
From: Jiri Pirko @ 2018-05-22 17:38 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Sridhar Samudrala, stephen, davem, netdev, virtualization,
	virtio-dev, jesse.brandeburg, alexander.h.duyck, kubakici,
	jasowang, loseweigh, aaron.f.brown, anjali.singhai
In-Reply-To: <20180522194633-mutt-send-email-mst@kernel.org>

Tue, May 22, 2018 at 06:52:21PM CEST, mst@redhat.com wrote:
>On Tue, May 22, 2018 at 05:45:01PM +0200, Jiri Pirko wrote:
>> Tue, May 22, 2018 at 05:32:30PM CEST, mst@redhat.com wrote:
>> >On Tue, May 22, 2018 at 05:13:43PM +0200, Jiri Pirko wrote:
>> >> Tue, May 22, 2018 at 03:39:33PM CEST, mst@redhat.com wrote:
>> >> >On Tue, May 22, 2018 at 03:26:26PM +0200, Jiri Pirko wrote:
>> >> >> Tue, May 22, 2018 at 03:17:37PM CEST, mst@redhat.com wrote:
>> >> >> >On Tue, May 22, 2018 at 03:14:22PM +0200, Jiri Pirko wrote:
>> >> >> >> Tue, May 22, 2018 at 03:12:40PM CEST, mst@redhat.com wrote:
>> >> >> >> >On Tue, May 22, 2018 at 11:08:53AM +0200, Jiri Pirko wrote:
>> >> >> >> >> Tue, May 22, 2018 at 11:06:37AM CEST, jiri@resnulli.us wrote:
>> >> >> >> >> >Tue, May 22, 2018 at 04:06:18AM CEST, sridhar.samudrala@intel.com wrote:
>> >> >> >> >> >>Use the registration/notification framework supported by the generic
>> >> >> >> >> >>failover infrastructure.
>> >> >> >> >> >>
>> >> >> >> >> >>Signed-off-by: Sridhar Samudrala <sridhar.samudrala@intel.com>
>> >> >> >> >> >
>> >> >> >> >> >In previous patchset versions, the common code did
>> >> >> >> >> >netdev_rx_handler_register() and netdev_upper_dev_link() etc
>> >> >> >> >> >(netvsc_vf_join()). Now, this is still done in netvsc. Why?
>> >> >> >> >> >
>> >> >> >> >> >This should be part of the common "failover" code.
>> >> >> >> >> >
>> >> >> >> >> 
>> >> >> >> >> Also note that in the current patchset you use IFF_FAILOVER flag for
>> >> >> >> >> master, yet for the slave you use IFF_SLAVE. That is wrong.
>> >> >> >> >> IFF_FAILOVER_SLAVE should be used.
>> >> >> >> >
>> >> >> >> >Or drop IFF_FAILOVER_SLAVE and set both IFF_FAILOVER and IFF_SLAVE?
>> >> >> >> 
>> >> >> >> No. IFF_SLAVE is for bonding.
>> >> >> >
>> >> >> >What breaks if we reuse it for failover?
>> >> >> 
>> >> >> This is exposed to userspace. IFF_SLAVE is expected for bonding slaves.
>> >> >> And failover slave is not a bonding slave.
>> >> >
>> >> >That does not really answer the question.  I'd claim it's sufficiently
>> >> >like a bond slave for IFF_SLAVE to make sense.
>> >> >
>> >> >In fact you will find that netvsc already sets IFF_SLAVE, and so
>> >> 
>> >> netvsc does the whole failover thing in a wrong way. This patchset is
>> >> trying to fix it.
>> >
>> >Maybe, but we don't need gratuitous changes either, especially if they
>> >break userspace.
>> 
>> What do you mean by the "break"? It was a mistake to reuse IFF_SLAVE at
>> the first place, lets fix it. If some userspace depends on that flag, it
>> is broken anyway.
>> 
>> 
>> >
>> >> >does e.g. the eql driver.
>> >> >
>> >> >The advantage of using IFF_SLAVE is that userspace knows to skip it.  If
>> >> 
>> >> The userspace should know how to skip other types of slaves - team,
>> >> bridge, ovs, etc.
>> >> The "master link" should be the one to look at.
>> >> 
>> >
>> >How should existing userspace know which ones to skip and which one is
>> >the master?  Right now userspace seems to assume whatever does not have
>> >IFF_SLAVE should be looked at. Are you saying that's not the right thing
>> 
>> Why do you say so? What do you mean by "looked at"? Certainly not.
>> IFLA_MASTER is the attribute that should be looked at, nothing else.
>> 
>> 
>> >to do and userspace should be fixed? What should userspace do in
>> >your opinion that will be forward compatible with future kernels?
>> >
>> >> 
>> >> >we don't set IFF_SLAVE existing userspace tries to use the lowerdev.
>> >> 
>> >> Each master type has a IFF_ master flag and IFF_ slave flag.
>> >
>> >Could you give some examples please?
>> 
>> enum netdev_priv_flags {
>>         IFF_EBRIDGE                     = 1<<1,
>>         IFF_BRIDGE_PORT                 = 1<<9,
>>         IFF_OPENVSWITCH                 = 1<<20,
>>         IFF_OVS_DATAPATH                = 1<<10,
>> 	IFF_L3MDEV_MASTER               = 1<<18,
>>         IFF_L3MDEV_SLAVE                = 1<<21,
>>         IFF_TEAM                        = 1<<22,
>>         IFF_TEAM_PORT                   = 1<<13,
>> };
>
>That's not in uapi, is it?  the comment above that says:

Correct.


>
>These flags are invisible to userspace
>
>
>
>> 
>> >
>> >> In private
>> >> flag. I don't see no reason to break this pattern here.
>> >
>> >Other masters are setup from userspace, this one is set up automatically
>> >by kernel. So the bar is higher, we need an interface that existing
>> >userspace knows about.  We can't just say "oh if userspace set this up
>> >it should know to skip lowerdevs".
>> >
>> >Otherwise multiple interfaces with same mac tend to confuse userspace.
>> 
>> No difference, really.
>> Regardless who does the setup, and independent userspace deamon should
>> react accordingly.
>
>If the deamon does the setup itself, it's reasonable to require that it
>learns about new flags each time we add a new driver.  If it doesn't,
>then I think it's less reasonable.

No need. The "IFLA_MASTER" attr is always there to be looked at. That is
enough.

^ permalink raw reply

* Re: [PATCH net] sctp: fix the issue that flags are ignored when using kernel_connect
From: David Miller @ 2018-05-22 17:40 UTC (permalink / raw)
  To: lucien.xin; +Cc: netdev, linux-sctp, marcelo.leitner, nhorman, mkubecek
In-Reply-To: <4863916c3e574b0d860725466d7d4a2f445fbe5b.1526805550.git.lucien.xin@gmail.com>

From: Xin Long <lucien.xin@gmail.com>
Date: Sun, 20 May 2018 16:39:10 +0800

> Now sctp uses inet_dgram_connect as its proto_ops .connect, and the flags
> param can't be passed into its proto .connect where this flags is really
> needed.
> 
> sctp works around it by getting flags from socket file in __sctp_connect.
> It works for connecting from userspace, as inherently the user sock has
> socket file and it passes f_flags as the flags param into the proto_ops
> .connect.
> 
> However, the sock created by sock_create_kern doesn't have a socket file,
> and it passes the flags (like O_NONBLOCK) by using the flags param in
> kernel_connect, which calls proto_ops .connect later.
> 
> So to fix it, this patch defines a new proto_ops .connect for sctp,
> sctp_inet_connect, which calls __sctp_connect() directly with this
> flags param. After this, the sctp's proto .connect can be removed.
> 
> Note that sctp_inet_connect doesn't need to do some checks that are not
> needed for sctp, which makes thing better than with inet_dgram_connect.
> 
> Suggested-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
> Signed-off-by: Xin Long <lucien.xin@gmail.com>

Applied, thank you.

I don't see a Fixes: tag, please give me some guidance me wrt. -stable.

^ permalink raw reply

* [net-next 0/9][pull request] 40GbE Intel Wired LAN Driver Updates 2018-05-22
From: Jeff Kirsher @ 2018-05-22 17:45 UTC (permalink / raw)
  To: davem; +Cc: Jeff Kirsher, netdev, nhorman, sassmann, jogreene

This series contains updates to i40e only.

Jake provides all the changes in this series starting with making it
consistent in how we approach the bit lock.  Fixed the reporting of the
VEB statistics and the queue statistics to always return every queue
even if it is not currently in use.  Use WARN_ONCE() so that the first
time we end up with an incorrect size we will dump a stack trace and a
message to help highlight the issue early in testing.  Folded the fixed
string prefix into the stat string definition.  Instead of using a
separate char *p pointer when copying strings, use the data pointer
directly.  Added code comments for several of the statistic functions to
better explain the number and ordering of statistics.

The following are changes since commit e3bb946cd922b773fdc03252aefbf2472d1d530c:
  Merge branch 'TI-Ethernet-driver-warnings-fixes'
and are available in the git repository at:
  git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/next-queue 40GbE

Jacob Keller (9):
  i40e: free skb after clearing lock in ptp_stop
  i40e: always return VEB stat strings
  i40e: always return all queue stat strings
  i40e: split i40e_get_strings() into smaller functions
  i40e: use WARN_ONCE to replace the commented BUG_ON size check
  i40e: fold prefix strings directly into stat names
  i40e: update data pointer directly when copying to the buffer
  i40e: add function doc headers for ethtool stats functions
  i40e: use the more traditional 'i' loop variable

 .../net/ethernet/intel/i40e/i40e_ethtool.c    | 459 ++++++++++--------
 drivers/net/ethernet/intel/i40e/i40e_ptp.c    |   4 +-
 2 files changed, 264 insertions(+), 199 deletions(-)

-- 
2.17.0

^ permalink raw reply

* [net-next 1/9] i40e: free skb after clearing lock in ptp_stop
From: Jeff Kirsher @ 2018-05-22 17:45 UTC (permalink / raw)
  To: davem; +Cc: Jacob Keller, netdev, nhorman, sassmann, jogreene, Jeff Kirsher
In-Reply-To: <20180522174527.19680-1-jeffrey.t.kirsher@intel.com>

From: Jacob Keller <jacob.e.keller@intel.com>

Use the same logic to free the skb after clearing the Tx timestamp bit
lock in i40e_ptp_stop as we use in the other locations. It is not as
important here since we are not racing against a future Tx timestamp
request (as we are disabling PTP at this point). However it is good to
be consistent in how we approach the bit lock so that future callers
don't copy the old anti-pattern.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_ptp.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_ptp.c b/drivers/net/ethernet/intel/i40e/i40e_ptp.c
index d50d84927e6b..35f2866b38c6 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ptp.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ptp.c
@@ -799,9 +799,11 @@ void i40e_ptp_stop(struct i40e_pf *pf)
 	pf->ptp_rx = false;
 
 	if (pf->ptp_tx_skb) {
-		dev_kfree_skb_any(pf->ptp_tx_skb);
+		struct sk_buff *skb = pf->ptp_tx_skb;
+
 		pf->ptp_tx_skb = NULL;
 		clear_bit_unlock(__I40E_PTP_TX_IN_PROGRESS, pf->state);
+		dev_kfree_skb_any(skb);
 	}
 
 	if (pf->ptp_clock) {
-- 
2.17.0

^ permalink raw reply related

* [net-next 2/9] i40e: always return VEB stat strings
From: Jeff Kirsher @ 2018-05-22 17:45 UTC (permalink / raw)
  To: davem; +Cc: Jacob Keller, netdev, nhorman, sassmann, jogreene, Jeff Kirsher
In-Reply-To: <20180522174527.19680-1-jeffrey.t.kirsher@intel.com>

From: Jacob Keller <jacob.e.keller@intel.com>

The ethtool API for obtaining device statistics is not intended to allow
runtime changes in the number of statistics reported. It may *appear*
this way, as there is an ability to request the number of stats using
ethtool_get_set_count(). However, it is expected that this must always
return the same value for invocations of the same device.

If we don't satisfy this contract, and allow the number of stats to
change during run time, we could cause invalid memory accesses or report
the stat strings incorrectly. This is because the API for obtaining
stats is to (1) get the size, (2) get the strings and finally (3) get
the stats. Since these are each separate ethtool op commands, it is not
possible to maintain consistency by holding the RTNL lock over the whole
operation. This results in the potential for a race condition to occur
where the size changed between any of the 3 calls.

Avoid this issue by requiring that we always return the same value for
a given device. We can check any values which remain constant for the
life of the device, but must not report different sizes depending on
runtime attributes.

This patch specifically fixes the VEB statistics strings to always be
reported. Other issues will be fixed in future patches.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 .../net/ethernet/intel/i40e/i40e_ethtool.c    | 52 ++++++++-----------
 1 file changed, 23 insertions(+), 29 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 329e59eae4a1..de5dad7ff340 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -1661,15 +1661,10 @@ static int i40e_get_stats_count(struct net_device *netdev)
 	struct i40e_vsi *vsi = np->vsi;
 	struct i40e_pf *pf = vsi->back;
 
-	if (vsi == pf->vsi[pf->lan_vsi] && pf->hw.partition_id == 1) {
-		if (pf->lan_veb != I40E_NO_VEB &&
-		    pf->flags & I40E_FLAG_VEB_STATS_ENABLED)
-			return I40E_PF_STATS_LEN(netdev) + I40E_VEB_STATS_TOTAL;
-		else
-			return I40E_PF_STATS_LEN(netdev);
-	} else {
+	if (vsi == pf->vsi[pf->lan_vsi] && pf->hw.partition_id == 1)
+		return I40E_PF_STATS_LEN(netdev) + I40E_VEB_STATS_TOTAL;
+	else
 		return I40E_VSI_STATS_LEN(netdev);
-	}
 }
 
 static int i40e_get_sset_count(struct net_device *netdev, int sset)
@@ -1760,6 +1755,8 @@ static void i40e_get_ethtool_stats(struct net_device *netdev,
 			data[i++] = veb->tc_stats.tc_rx_packets[j];
 			data[i++] = veb->tc_stats.tc_rx_bytes[j];
 		}
+	} else {
+		i += I40E_VEB_STATS_TOTAL;
 	}
 	for (j = 0; j < I40E_GLOBAL_STATS_LEN; j++) {
 		p = (char *)pf + i40e_gstrings_stats[j].stat_offset;
@@ -1816,27 +1813,24 @@ static void i40e_get_strings(struct net_device *netdev, u32 stringset,
 		if (vsi != pf->vsi[pf->lan_vsi] || pf->hw.partition_id != 1)
 			return;
 
-		if ((pf->lan_veb != I40E_NO_VEB) &&
-		    (pf->flags & I40E_FLAG_VEB_STATS_ENABLED)) {
-			for (i = 0; i < I40E_VEB_STATS_LEN; i++) {
-				snprintf(p, ETH_GSTRING_LEN, "veb.%s",
-					i40e_gstrings_veb_stats[i].stat_string);
-				p += ETH_GSTRING_LEN;
-			}
-			for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
-				snprintf(p, ETH_GSTRING_LEN,
-					 "veb.tc_%d_tx_packets", i);
-				p += ETH_GSTRING_LEN;
-				snprintf(p, ETH_GSTRING_LEN,
-					 "veb.tc_%d_tx_bytes", i);
-				p += ETH_GSTRING_LEN;
-				snprintf(p, ETH_GSTRING_LEN,
-					 "veb.tc_%d_rx_packets", i);
-				p += ETH_GSTRING_LEN;
-				snprintf(p, ETH_GSTRING_LEN,
-					 "veb.tc_%d_rx_bytes", i);
-				p += ETH_GSTRING_LEN;
-			}
+		for (i = 0; i < I40E_VEB_STATS_LEN; i++) {
+			snprintf(p, ETH_GSTRING_LEN, "veb.%s",
+				 i40e_gstrings_veb_stats[i].stat_string);
+			p += ETH_GSTRING_LEN;
+		}
+		for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+			snprintf(p, ETH_GSTRING_LEN,
+				 "veb.tc_%u_tx_packets", i);
+			p += ETH_GSTRING_LEN;
+			snprintf(p, ETH_GSTRING_LEN,
+				 "veb.tc_%u_tx_bytes", i);
+			p += ETH_GSTRING_LEN;
+			snprintf(p, ETH_GSTRING_LEN,
+				 "veb.tc_%u_rx_packets", i);
+			p += ETH_GSTRING_LEN;
+			snprintf(p, ETH_GSTRING_LEN,
+				 "veb.tc_%u_rx_bytes", i);
+			p += ETH_GSTRING_LEN;
 		}
 		for (i = 0; i < I40E_GLOBAL_STATS_LEN; i++) {
 			snprintf(p, ETH_GSTRING_LEN, "port.%s",
-- 
2.17.0

^ permalink raw reply related

* [net-next 6/9] i40e: fold prefix strings directly into stat names
From: Jeff Kirsher @ 2018-05-22 17:45 UTC (permalink / raw)
  To: davem; +Cc: Jacob Keller, netdev, nhorman, sassmann, jogreene, Jeff Kirsher
In-Reply-To: <20180522174527.19680-1-jeffrey.t.kirsher@intel.com>

From: Jacob Keller <jacob.e.keller@intel.com>

We always prefix these stats with a fixed string, so just fold this
prefix into the stat string definition. This preparatory work will make
it easier to implement a helper function to copy stats and strings into
the supplied buffers in a future patch.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 .../net/ethernet/intel/i40e/i40e_ethtool.c    | 137 +++++++++---------
 1 file changed, 69 insertions(+), 68 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 32bcb6a2a590..6b34845d251c 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -42,18 +42,18 @@ static const struct i40e_stats i40e_gstrings_net_stats[] = {
 };
 
 static const struct i40e_stats i40e_gstrings_veb_stats[] = {
-	I40E_VEB_STAT("rx_bytes", stats.rx_bytes),
-	I40E_VEB_STAT("tx_bytes", stats.tx_bytes),
-	I40E_VEB_STAT("rx_unicast", stats.rx_unicast),
-	I40E_VEB_STAT("tx_unicast", stats.tx_unicast),
-	I40E_VEB_STAT("rx_multicast", stats.rx_multicast),
-	I40E_VEB_STAT("tx_multicast", stats.tx_multicast),
-	I40E_VEB_STAT("rx_broadcast", stats.rx_broadcast),
-	I40E_VEB_STAT("tx_broadcast", stats.tx_broadcast),
-	I40E_VEB_STAT("rx_discards", stats.rx_discards),
-	I40E_VEB_STAT("tx_discards", stats.tx_discards),
-	I40E_VEB_STAT("tx_errors", stats.tx_errors),
-	I40E_VEB_STAT("rx_unknown_protocol", stats.rx_unknown_protocol),
+	I40E_VEB_STAT("veb.rx_bytes", stats.rx_bytes),
+	I40E_VEB_STAT("veb.tx_bytes", stats.tx_bytes),
+	I40E_VEB_STAT("veb.rx_unicast", stats.rx_unicast),
+	I40E_VEB_STAT("veb.tx_unicast", stats.tx_unicast),
+	I40E_VEB_STAT("veb.rx_multicast", stats.rx_multicast),
+	I40E_VEB_STAT("veb.tx_multicast", stats.tx_multicast),
+	I40E_VEB_STAT("veb.rx_broadcast", stats.rx_broadcast),
+	I40E_VEB_STAT("veb.tx_broadcast", stats.tx_broadcast),
+	I40E_VEB_STAT("veb.rx_discards", stats.rx_discards),
+	I40E_VEB_STAT("veb.tx_discards", stats.tx_discards),
+	I40E_VEB_STAT("veb.tx_errors", stats.tx_errors),
+	I40E_VEB_STAT("veb.rx_unknown_protocol", stats.rx_unknown_protocol),
 };
 
 static const struct i40e_stats i40e_gstrings_misc_stats[] = {
@@ -82,62 +82,63 @@ static const struct i40e_stats i40e_gstrings_misc_stats[] = {
  * is queried on the base PF netdev, not on the VMDq or FCoE netdev.
  */
 static const struct i40e_stats i40e_gstrings_stats[] = {
-	I40E_PF_STAT("rx_bytes", stats.eth.rx_bytes),
-	I40E_PF_STAT("tx_bytes", stats.eth.tx_bytes),
-	I40E_PF_STAT("rx_unicast", stats.eth.rx_unicast),
-	I40E_PF_STAT("tx_unicast", stats.eth.tx_unicast),
-	I40E_PF_STAT("rx_multicast", stats.eth.rx_multicast),
-	I40E_PF_STAT("tx_multicast", stats.eth.tx_multicast),
-	I40E_PF_STAT("rx_broadcast", stats.eth.rx_broadcast),
-	I40E_PF_STAT("tx_broadcast", stats.eth.tx_broadcast),
-	I40E_PF_STAT("tx_errors", stats.eth.tx_errors),
-	I40E_PF_STAT("rx_dropped", stats.eth.rx_discards),
-	I40E_PF_STAT("tx_dropped_link_down", stats.tx_dropped_link_down),
-	I40E_PF_STAT("rx_crc_errors", stats.crc_errors),
-	I40E_PF_STAT("illegal_bytes", stats.illegal_bytes),
-	I40E_PF_STAT("mac_local_faults", stats.mac_local_faults),
-	I40E_PF_STAT("mac_remote_faults", stats.mac_remote_faults),
-	I40E_PF_STAT("tx_timeout", tx_timeout_count),
-	I40E_PF_STAT("rx_csum_bad", hw_csum_rx_error),
-	I40E_PF_STAT("rx_length_errors", stats.rx_length_errors),
-	I40E_PF_STAT("link_xon_rx", stats.link_xon_rx),
-	I40E_PF_STAT("link_xoff_rx", stats.link_xoff_rx),
-	I40E_PF_STAT("link_xon_tx", stats.link_xon_tx),
-	I40E_PF_STAT("link_xoff_tx", stats.link_xoff_tx),
-	I40E_PF_STAT("rx_size_64", stats.rx_size_64),
-	I40E_PF_STAT("rx_size_127", stats.rx_size_127),
-	I40E_PF_STAT("rx_size_255", stats.rx_size_255),
-	I40E_PF_STAT("rx_size_511", stats.rx_size_511),
-	I40E_PF_STAT("rx_size_1023", stats.rx_size_1023),
-	I40E_PF_STAT("rx_size_1522", stats.rx_size_1522),
-	I40E_PF_STAT("rx_size_big", stats.rx_size_big),
-	I40E_PF_STAT("tx_size_64", stats.tx_size_64),
-	I40E_PF_STAT("tx_size_127", stats.tx_size_127),
-	I40E_PF_STAT("tx_size_255", stats.tx_size_255),
-	I40E_PF_STAT("tx_size_511", stats.tx_size_511),
-	I40E_PF_STAT("tx_size_1023", stats.tx_size_1023),
-	I40E_PF_STAT("tx_size_1522", stats.tx_size_1522),
-	I40E_PF_STAT("tx_size_big", stats.tx_size_big),
-	I40E_PF_STAT("rx_undersize", stats.rx_undersize),
-	I40E_PF_STAT("rx_fragments", stats.rx_fragments),
-	I40E_PF_STAT("rx_oversize", stats.rx_oversize),
-	I40E_PF_STAT("rx_jabber", stats.rx_jabber),
-	I40E_PF_STAT("VF_admin_queue_requests", vf_aq_requests),
-	I40E_PF_STAT("arq_overflows", arq_overflows),
-	I40E_PF_STAT("rx_hwtstamp_cleared", rx_hwtstamp_cleared),
-	I40E_PF_STAT("tx_hwtstamp_skipped", tx_hwtstamp_skipped),
-	I40E_PF_STAT("fdir_flush_cnt", fd_flush_cnt),
-	I40E_PF_STAT("fdir_atr_match", stats.fd_atr_match),
-	I40E_PF_STAT("fdir_atr_tunnel_match", stats.fd_atr_tunnel_match),
-	I40E_PF_STAT("fdir_atr_status", stats.fd_atr_status),
-	I40E_PF_STAT("fdir_sb_match", stats.fd_sb_match),
-	I40E_PF_STAT("fdir_sb_status", stats.fd_sb_status),
+	I40E_PF_STAT("port.rx_bytes", stats.eth.rx_bytes),
+	I40E_PF_STAT("port.tx_bytes", stats.eth.tx_bytes),
+	I40E_PF_STAT("port.rx_unicast", stats.eth.rx_unicast),
+	I40E_PF_STAT("port.tx_unicast", stats.eth.tx_unicast),
+	I40E_PF_STAT("port.rx_multicast", stats.eth.rx_multicast),
+	I40E_PF_STAT("port.tx_multicast", stats.eth.tx_multicast),
+	I40E_PF_STAT("port.rx_broadcast", stats.eth.rx_broadcast),
+	I40E_PF_STAT("port.tx_broadcast", stats.eth.tx_broadcast),
+	I40E_PF_STAT("port.tx_errors", stats.eth.tx_errors),
+	I40E_PF_STAT("port.rx_dropped", stats.eth.rx_discards),
+	I40E_PF_STAT("port.tx_dropped_link_down", stats.tx_dropped_link_down),
+	I40E_PF_STAT("port.rx_crc_errors", stats.crc_errors),
+	I40E_PF_STAT("port.illegal_bytes", stats.illegal_bytes),
+	I40E_PF_STAT("port.mac_local_faults", stats.mac_local_faults),
+	I40E_PF_STAT("port.mac_remote_faults", stats.mac_remote_faults),
+	I40E_PF_STAT("port.tx_timeout", tx_timeout_count),
+	I40E_PF_STAT("port.rx_csum_bad", hw_csum_rx_error),
+	I40E_PF_STAT("port.rx_length_errors", stats.rx_length_errors),
+	I40E_PF_STAT("port.link_xon_rx", stats.link_xon_rx),
+	I40E_PF_STAT("port.link_xoff_rx", stats.link_xoff_rx),
+	I40E_PF_STAT("port.link_xon_tx", stats.link_xon_tx),
+	I40E_PF_STAT("port.link_xoff_tx", stats.link_xoff_tx),
+	I40E_PF_STAT("port.rx_size_64", stats.rx_size_64),
+	I40E_PF_STAT("port.rx_size_127", stats.rx_size_127),
+	I40E_PF_STAT("port.rx_size_255", stats.rx_size_255),
+	I40E_PF_STAT("port.rx_size_511", stats.rx_size_511),
+	I40E_PF_STAT("port.rx_size_1023", stats.rx_size_1023),
+	I40E_PF_STAT("port.rx_size_1522", stats.rx_size_1522),
+	I40E_PF_STAT("port.rx_size_big", stats.rx_size_big),
+	I40E_PF_STAT("port.tx_size_64", stats.tx_size_64),
+	I40E_PF_STAT("port.tx_size_127", stats.tx_size_127),
+	I40E_PF_STAT("port.tx_size_255", stats.tx_size_255),
+	I40E_PF_STAT("port.tx_size_511", stats.tx_size_511),
+	I40E_PF_STAT("port.tx_size_1023", stats.tx_size_1023),
+	I40E_PF_STAT("port.tx_size_1522", stats.tx_size_1522),
+	I40E_PF_STAT("port.tx_size_big", stats.tx_size_big),
+	I40E_PF_STAT("port.rx_undersize", stats.rx_undersize),
+	I40E_PF_STAT("port.rx_fragments", stats.rx_fragments),
+	I40E_PF_STAT("port.rx_oversize", stats.rx_oversize),
+	I40E_PF_STAT("port.rx_jabber", stats.rx_jabber),
+	I40E_PF_STAT("port.VF_admin_queue_requests", vf_aq_requests),
+	I40E_PF_STAT("port.arq_overflows", arq_overflows),
+	I40E_PF_STAT("port.tx_hwtstamp_timeouts", tx_hwtstamp_timeouts),
+	I40E_PF_STAT("port.rx_hwtstamp_cleared", rx_hwtstamp_cleared),
+	I40E_PF_STAT("port.tx_hwtstamp_skipped", tx_hwtstamp_skipped),
+	I40E_PF_STAT("port.fdir_flush_cnt", fd_flush_cnt),
+	I40E_PF_STAT("port.fdir_atr_match", stats.fd_atr_match),
+	I40E_PF_STAT("port.fdir_atr_tunnel_match", stats.fd_atr_tunnel_match),
+	I40E_PF_STAT("port.fdir_atr_status", stats.fd_atr_status),
+	I40E_PF_STAT("port.fdir_sb_match", stats.fd_sb_match),
+	I40E_PF_STAT("port.fdir_sb_status", stats.fd_sb_status),
 
 	/* LPI stats */
-	I40E_PF_STAT("tx_lpi_status", stats.tx_lpi_status),
-	I40E_PF_STAT("rx_lpi_status", stats.rx_lpi_status),
-	I40E_PF_STAT("tx_lpi_count", stats.tx_lpi_count),
-	I40E_PF_STAT("rx_lpi_count", stats.rx_lpi_count),
+	I40E_PF_STAT("port.tx_lpi_status", stats.tx_lpi_status),
+	I40E_PF_STAT("port.rx_lpi_status", stats.rx_lpi_status),
+	I40E_PF_STAT("port.tx_lpi_count", stats.tx_lpi_count),
+	I40E_PF_STAT("port.rx_lpi_count", stats.rx_lpi_count),
 };
 
 /* We use num_tx_queues here as a proxy for the maximum number of queues
@@ -1819,7 +1820,7 @@ static void i40e_get_stat_strings(struct net_device *netdev, u8 *data)
 		return;
 
 	for (i = 0; i < I40E_VEB_STATS_LEN; i++) {
-		snprintf(p, ETH_GSTRING_LEN, "veb.%s",
+		snprintf(p, ETH_GSTRING_LEN, "%s",
 			 i40e_gstrings_veb_stats[i].stat_string);
 		p += ETH_GSTRING_LEN;
 	}
@@ -1839,7 +1840,7 @@ static void i40e_get_stat_strings(struct net_device *netdev, u8 *data)
 	}
 
 	for (i = 0; i < I40E_GLOBAL_STATS_LEN; i++) {
-		snprintf(p, ETH_GSTRING_LEN, "port.%s",
+		snprintf(p, ETH_GSTRING_LEN, "%s",
 			 i40e_gstrings_stats[i].stat_string);
 		p += ETH_GSTRING_LEN;
 	}
-- 
2.17.0

^ permalink raw reply related

* [net-next 7/9] i40e: update data pointer directly when copying to the buffer
From: Jeff Kirsher @ 2018-05-22 17:45 UTC (permalink / raw)
  To: davem; +Cc: Jacob Keller, netdev, nhorman, sassmann, jogreene, Jeff Kirsher
In-Reply-To: <20180522174527.19680-1-jeffrey.t.kirsher@intel.com>

From: Jacob Keller <jacob.e.keller@intel.com>

A future patch is going to add a helper function i40e_add_ethtool_stats
that will help lower the amount of boiler plate code in the
i40e_get_ethtool_stats function.

This conversion will take place over many patches, and the helper
function will work by directly updating a reference to the data pointer.

Since this would not work combined with the current method of accessing
data like an array, update all the code that copies stats into the data
buffer to use direct updates to the pointer instead of array accesses.

This will prevent incorrect stat updates for patches in between the
conversion.

Similarly, when copying strings, we used a separate char *p pointer.
Instead, use the data pointer directly as it's already a (u8 *) type
which is the same size.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 .../net/ethernet/intel/i40e/i40e_ethtool.c    | 117 +++++++++---------
 1 file changed, 58 insertions(+), 59 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 6b34845d251c..44a2803cb1ec 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -1699,7 +1699,6 @@ static void i40e_get_ethtool_stats(struct net_device *netdev,
 	struct i40e_vsi *vsi = np->vsi;
 	struct i40e_pf *pf = vsi->back;
 	unsigned int j;
-	int i = 0;
 	char *p;
 	struct rtnl_link_stats64 *net_stats = i40e_get_vsi_stats_struct(vsi);
 	unsigned int start;
@@ -1708,12 +1707,12 @@ static void i40e_get_ethtool_stats(struct net_device *netdev,
 
 	for (j = 0; j < I40E_NETDEV_STATS_LEN; j++) {
 		p = (char *)net_stats + i40e_gstrings_net_stats[j].stat_offset;
-		data[i++] = (i40e_gstrings_net_stats[j].sizeof_stat ==
+		*(data++) = (i40e_gstrings_net_stats[j].sizeof_stat ==
 			sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
 	}
 	for (j = 0; j < I40E_MISC_STATS_LEN; j++) {
 		p = (char *)vsi + i40e_gstrings_misc_stats[j].stat_offset;
-		data[i++] = (i40e_gstrings_misc_stats[j].sizeof_stat ==
+		*(data++) = (i40e_gstrings_misc_stats[j].sizeof_stat ==
 			    sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
 	}
 	rcu_read_lock();
@@ -1724,29 +1723,29 @@ static void i40e_get_ethtool_stats(struct net_device *netdev,
 			/* Bump the stat counter to skip these stats, and make
 			 * sure the memory is zero'd
 			 */
-			data[i++] = 0;
-			data[i++] = 0;
-			data[i++] = 0;
-			data[i++] = 0;
+			*(data++) = 0;
+			*(data++) = 0;
+			*(data++) = 0;
+			*(data++) = 0;
 			continue;
 		}
 
 		/* process Tx ring statistics */
 		do {
 			start = u64_stats_fetch_begin_irq(&tx_ring->syncp);
-			data[i] = tx_ring->stats.packets;
-			data[i + 1] = tx_ring->stats.bytes;
+			data[0] = tx_ring->stats.packets;
+			data[1] = tx_ring->stats.bytes;
 		} while (u64_stats_fetch_retry_irq(&tx_ring->syncp, start));
-		i += 2;
+		data += 2;
 
 		/* Rx ring is the 2nd half of the queue pair */
 		rx_ring = &tx_ring[1];
 		do {
 			start = u64_stats_fetch_begin_irq(&rx_ring->syncp);
-			data[i] = rx_ring->stats.packets;
-			data[i + 1] = rx_ring->stats.bytes;
+			data[0] = rx_ring->stats.packets;
+			data[1] = rx_ring->stats.bytes;
 		} while (u64_stats_fetch_retry_irq(&rx_ring->syncp, start));
-		i += 2;
+		data += 2;
 	}
 	rcu_read_unlock();
 	if (vsi != pf->vsi[pf->lan_vsi] || pf->hw.partition_id != 1)
@@ -1759,33 +1758,33 @@ static void i40e_get_ethtool_stats(struct net_device *netdev,
 		for (j = 0; j < I40E_VEB_STATS_LEN; j++) {
 			p = (char *)veb;
 			p += i40e_gstrings_veb_stats[j].stat_offset;
-			data[i++] = (i40e_gstrings_veb_stats[j].sizeof_stat ==
+			*(data++) = (i40e_gstrings_veb_stats[j].sizeof_stat ==
 				     sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
 		}
 		for (j = 0; j < I40E_MAX_TRAFFIC_CLASS; j++) {
-			data[i++] = veb->tc_stats.tc_tx_packets[j];
-			data[i++] = veb->tc_stats.tc_tx_bytes[j];
-			data[i++] = veb->tc_stats.tc_rx_packets[j];
-			data[i++] = veb->tc_stats.tc_rx_bytes[j];
+			*(data++) = veb->tc_stats.tc_tx_packets[j];
+			*(data++) = veb->tc_stats.tc_tx_bytes[j];
+			*(data++) = veb->tc_stats.tc_rx_packets[j];
+			*(data++) = veb->tc_stats.tc_rx_bytes[j];
 		}
 	} else {
-		i += I40E_VEB_STATS_TOTAL;
+		data += I40E_VEB_STATS_TOTAL;
 	}
 	for (j = 0; j < I40E_GLOBAL_STATS_LEN; j++) {
 		p = (char *)pf + i40e_gstrings_stats[j].stat_offset;
-		data[i++] = (i40e_gstrings_stats[j].sizeof_stat ==
+		*(data++) = (i40e_gstrings_stats[j].sizeof_stat ==
 			     sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
 	}
 	for (j = 0; j < I40E_MAX_USER_PRIORITY; j++) {
-		data[i++] = pf->stats.priority_xon_tx[j];
-		data[i++] = pf->stats.priority_xoff_tx[j];
+		*(data++) = pf->stats.priority_xon_tx[j];
+		*(data++) = pf->stats.priority_xoff_tx[j];
 	}
 	for (j = 0; j < I40E_MAX_USER_PRIORITY; j++) {
-		data[i++] = pf->stats.priority_xon_rx[j];
-		data[i++] = pf->stats.priority_xoff_rx[j];
+		*(data++) = pf->stats.priority_xon_rx[j];
+		*(data++) = pf->stats.priority_xoff_rx[j];
 	}
 	for (j = 0; j < I40E_MAX_USER_PRIORITY; j++)
-		data[i++] = pf->stats.priority_xon_2_xoff[j];
+		*(data++) = pf->stats.priority_xon_2_xoff[j];
 }
 
 static void i40e_get_stat_strings(struct net_device *netdev, u8 *data)
@@ -1797,73 +1796,73 @@ static void i40e_get_stat_strings(struct net_device *netdev, u8 *data)
 	u8 *p = data;
 
 	for (i = 0; i < I40E_NETDEV_STATS_LEN; i++) {
-		snprintf(p, ETH_GSTRING_LEN, "%s",
+		snprintf(data, ETH_GSTRING_LEN, "%s",
 			 i40e_gstrings_net_stats[i].stat_string);
-		p += ETH_GSTRING_LEN;
+		data += ETH_GSTRING_LEN;
 	}
 	for (i = 0; i < I40E_MISC_STATS_LEN; i++) {
-		snprintf(p, ETH_GSTRING_LEN, "%s",
+		snprintf(data, ETH_GSTRING_LEN, "%s",
 			 i40e_gstrings_misc_stats[i].stat_string);
-		p += ETH_GSTRING_LEN;
+		data += ETH_GSTRING_LEN;
 	}
 	for (i = 0; i < I40E_MAX_NUM_QUEUES(netdev); i++) {
-		snprintf(p, ETH_GSTRING_LEN, "tx-%u.tx_packets", i);
-		p += ETH_GSTRING_LEN;
-		snprintf(p, ETH_GSTRING_LEN, "tx-%u.tx_bytes", i);
-		p += ETH_GSTRING_LEN;
-		snprintf(p, ETH_GSTRING_LEN, "rx-%u.rx_packets", i);
-		p += ETH_GSTRING_LEN;
-		snprintf(p, ETH_GSTRING_LEN, "rx-%u.rx_bytes", i);
-		p += ETH_GSTRING_LEN;
+		snprintf(data, ETH_GSTRING_LEN, "tx-%u.tx_packets", i);
+		data += ETH_GSTRING_LEN;
+		snprintf(data, ETH_GSTRING_LEN, "tx-%u.tx_bytes", i);
+		data += ETH_GSTRING_LEN;
+		snprintf(data, ETH_GSTRING_LEN, "rx-%u.rx_packets", i);
+		data += ETH_GSTRING_LEN;
+		snprintf(data, ETH_GSTRING_LEN, "rx-%u.rx_bytes", i);
+		data += ETH_GSTRING_LEN;
 	}
 	if (vsi != pf->vsi[pf->lan_vsi] || pf->hw.partition_id != 1)
 		return;
 
 	for (i = 0; i < I40E_VEB_STATS_LEN; i++) {
-		snprintf(p, ETH_GSTRING_LEN, "%s",
+		snprintf(data, ETH_GSTRING_LEN, "%s",
 			 i40e_gstrings_veb_stats[i].stat_string);
-		p += ETH_GSTRING_LEN;
+		data += ETH_GSTRING_LEN;
 	}
 	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
-		snprintf(p, ETH_GSTRING_LEN,
+		snprintf(data, ETH_GSTRING_LEN,
 			 "veb.tc_%u_tx_packets", i);
-		p += ETH_GSTRING_LEN;
-		snprintf(p, ETH_GSTRING_LEN,
+		data += ETH_GSTRING_LEN;
+		snprintf(data, ETH_GSTRING_LEN,
 			 "veb.tc_%u_tx_bytes", i);
-		p += ETH_GSTRING_LEN;
-		snprintf(p, ETH_GSTRING_LEN,
+		data += ETH_GSTRING_LEN;
+		snprintf(data, ETH_GSTRING_LEN,
 			 "veb.tc_%u_rx_packets", i);
-		p += ETH_GSTRING_LEN;
-		snprintf(p, ETH_GSTRING_LEN,
+		data += ETH_GSTRING_LEN;
+		snprintf(data, ETH_GSTRING_LEN,
 			 "veb.tc_%u_rx_bytes", i);
-		p += ETH_GSTRING_LEN;
+		data += ETH_GSTRING_LEN;
 	}
 
 	for (i = 0; i < I40E_GLOBAL_STATS_LEN; i++) {
-		snprintf(p, ETH_GSTRING_LEN, "%s",
+		snprintf(data, ETH_GSTRING_LEN, "%s",
 			 i40e_gstrings_stats[i].stat_string);
-		p += ETH_GSTRING_LEN;
+		data += ETH_GSTRING_LEN;
 	}
 	for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) {
-		snprintf(p, ETH_GSTRING_LEN,
+		snprintf(data, ETH_GSTRING_LEN,
 			 "port.tx_priority_%u_xon", i);
-		p += ETH_GSTRING_LEN;
-		snprintf(p, ETH_GSTRING_LEN,
+		data += ETH_GSTRING_LEN;
+		snprintf(data, ETH_GSTRING_LEN,
 			 "port.tx_priority_%u_xoff", i);
-		p += ETH_GSTRING_LEN;
+		data += ETH_GSTRING_LEN;
 	}
 	for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) {
-		snprintf(p, ETH_GSTRING_LEN,
+		snprintf(data, ETH_GSTRING_LEN,
 			 "port.rx_priority_%u_xon", i);
-		p += ETH_GSTRING_LEN;
-		snprintf(p, ETH_GSTRING_LEN,
+		data += ETH_GSTRING_LEN;
+		snprintf(data, ETH_GSTRING_LEN,
 			 "port.rx_priority_%u_xoff", i);
-		p += ETH_GSTRING_LEN;
+		data += ETH_GSTRING_LEN;
 	}
 	for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) {
-		snprintf(p, ETH_GSTRING_LEN,
+		snprintf(data, ETH_GSTRING_LEN,
 			 "port.rx_priority_%u_xon_2_xoff", i);
-		p += ETH_GSTRING_LEN;
+		data += ETH_GSTRING_LEN;
 	}
 
 	WARN_ONCE(p - data != i40e_get_stats_count(netdev) * ETH_GSTRING_LEN,
-- 
2.17.0

^ permalink raw reply related

* [net-next 3/9] i40e: always return all queue stat strings
From: Jeff Kirsher @ 2018-05-22 17:45 UTC (permalink / raw)
  To: davem; +Cc: Jacob Keller, netdev, nhorman, sassmann, jogreene, Jeff Kirsher
In-Reply-To: <20180522174527.19680-1-jeffrey.t.kirsher@intel.com>

From: Jacob Keller <jacob.e.keller@intel.com>

The ethtool API for obtaining device statistics is not intended to allow
runtime changes in the number of statistics reported. It may *appear*
this way, as there is an ability to request the number of stats using
ethtool_get_set_count(). However, it is expected that this must always
return the same value for invocations of the same device.

If we don't satisfy this contract, and allow the number of stats to
change during run time, we could cause invalid memory accesses or report
the stat strings incorrectly. This is because the API for obtaining
stats is to (1) get the size, (2) get the strings and finally (3) get
the stats. Since these are each separate ethtool op commands, it is not
possible to maintain consistency by holding the RTNL lock over the whole
operation. This results in the potential for a race condition to occur
where the size changed between any of the 3 calls.

Avoid this issue by requiring that we always return the same value for
a given device. We can check any values which remain constant for the
life of the device, but must not report different sizes depending on
runtime attributes.

This patch specifically fixes the queue statistics to always return
every queue even if it's not currently in use.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 .../net/ethernet/intel/i40e/i40e_ethtool.c    | 22 ++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index de5dad7ff340..bacb01b63727 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -140,8 +140,12 @@ static const struct i40e_stats i40e_gstrings_stats[] = {
 	I40E_PF_STAT("rx_lpi_count", stats.rx_lpi_count),
 };
 
-#define I40E_QUEUE_STATS_LEN(n) \
-	(((struct i40e_netdev_priv *)netdev_priv((n)))->vsi->num_queue_pairs \
+/* We use num_tx_queues here as a proxy for the maximum number of queues
+ * available because we always allocate queues symmetrically.
+ */
+#define I40E_MAX_NUM_QUEUES(n) ((n)->num_tx_queues)
+#define I40E_QUEUE_STATS_LEN(n)                                              \
+	   (I40E_MAX_NUM_QUEUES(n)                                           \
 	    * 2 /* Tx and Rx together */                                     \
 	    * (sizeof(struct i40e_queue_stats) / sizeof(u64)))
 #define I40E_GLOBAL_STATS_LEN	ARRAY_SIZE(i40e_gstrings_stats)
@@ -1712,11 +1716,19 @@ static void i40e_get_ethtool_stats(struct net_device *netdev,
 			    sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
 	}
 	rcu_read_lock();
-	for (j = 0; j < vsi->num_queue_pairs; j++) {
+	for (j = 0; j < I40E_MAX_NUM_QUEUES(netdev) ; j++) {
 		tx_ring = READ_ONCE(vsi->tx_rings[j]);
 
-		if (!tx_ring)
+		if (!tx_ring) {
+			/* Bump the stat counter to skip these stats, and make
+			 * sure the memory is zero'd
+			 */
+			data[i++] = 0;
+			data[i++] = 0;
+			data[i++] = 0;
+			data[i++] = 0;
 			continue;
+		}
 
 		/* process Tx ring statistics */
 		do {
@@ -1800,7 +1812,7 @@ static void i40e_get_strings(struct net_device *netdev, u32 stringset,
 				 i40e_gstrings_misc_stats[i].stat_string);
 			p += ETH_GSTRING_LEN;
 		}
-		for (i = 0; i < vsi->num_queue_pairs; i++) {
+		for (i = 0; i < I40E_MAX_NUM_QUEUES(netdev); i++) {
 			snprintf(p, ETH_GSTRING_LEN, "tx-%d.tx_packets", i);
 			p += ETH_GSTRING_LEN;
 			snprintf(p, ETH_GSTRING_LEN, "tx-%d.tx_bytes", i);
-- 
2.17.0

^ permalink raw reply related

* [net-next 4/9] i40e: split i40e_get_strings() into smaller functions
From: Jeff Kirsher @ 2018-05-22 17:45 UTC (permalink / raw)
  To: davem; +Cc: Jacob Keller, netdev, nhorman, sassmann, jogreene, Jeff Kirsher
In-Reply-To: <20180522174527.19680-1-jeffrey.t.kirsher@intel.com>

From: Jacob Keller <jacob.e.keller@intel.com>

Split the statistic strings and private flags strings into their own
separate functions to aid code readability.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 .../net/ethernet/intel/i40e/i40e_ethtool.c    | 183 ++++++++++--------
 1 file changed, 100 insertions(+), 83 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index bacb01b63727..c50ed2d391e1 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -1787,8 +1787,7 @@ static void i40e_get_ethtool_stats(struct net_device *netdev,
 		data[i++] = pf->stats.priority_xon_2_xoff[j];
 }
 
-static void i40e_get_strings(struct net_device *netdev, u32 stringset,
-			     u8 *data)
+static void i40e_get_stat_strings(struct net_device *netdev, u8 *data)
 {
 	struct i40e_netdev_priv *np = netdev_priv(netdev);
 	struct i40e_vsi *vsi = np->vsi;
@@ -1796,95 +1795,113 @@ static void i40e_get_strings(struct net_device *netdev, u32 stringset,
 	char *p = (char *)data;
 	unsigned int i;
 
+	for (i = 0; i < I40E_NETDEV_STATS_LEN; i++) {
+		snprintf(p, ETH_GSTRING_LEN, "%s",
+			 i40e_gstrings_net_stats[i].stat_string);
+		p += ETH_GSTRING_LEN;
+	}
+	for (i = 0; i < I40E_MISC_STATS_LEN; i++) {
+		snprintf(p, ETH_GSTRING_LEN, "%s",
+			 i40e_gstrings_misc_stats[i].stat_string);
+		p += ETH_GSTRING_LEN;
+	}
+	for (i = 0; i < I40E_MAX_NUM_QUEUES(netdev); i++) {
+		snprintf(p, ETH_GSTRING_LEN, "tx-%u.tx_packets", i);
+		p += ETH_GSTRING_LEN;
+		snprintf(p, ETH_GSTRING_LEN, "tx-%u.tx_bytes", i);
+		p += ETH_GSTRING_LEN;
+		snprintf(p, ETH_GSTRING_LEN, "rx-%u.rx_packets", i);
+		p += ETH_GSTRING_LEN;
+		snprintf(p, ETH_GSTRING_LEN, "rx-%u.rx_bytes", i);
+		p += ETH_GSTRING_LEN;
+	}
+	if (vsi != pf->vsi[pf->lan_vsi] || pf->hw.partition_id != 1)
+		return;
+
+	for (i = 0; i < I40E_VEB_STATS_LEN; i++) {
+		snprintf(p, ETH_GSTRING_LEN, "veb.%s",
+			 i40e_gstrings_veb_stats[i].stat_string);
+		p += ETH_GSTRING_LEN;
+	}
+	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+		snprintf(p, ETH_GSTRING_LEN,
+			 "veb.tc_%u_tx_packets", i);
+		p += ETH_GSTRING_LEN;
+		snprintf(p, ETH_GSTRING_LEN,
+			 "veb.tc_%u_tx_bytes", i);
+		p += ETH_GSTRING_LEN;
+		snprintf(p, ETH_GSTRING_LEN,
+			 "veb.tc_%u_rx_packets", i);
+		p += ETH_GSTRING_LEN;
+		snprintf(p, ETH_GSTRING_LEN,
+			 "veb.tc_%u_rx_bytes", i);
+		p += ETH_GSTRING_LEN;
+	}
+
+	for (i = 0; i < I40E_GLOBAL_STATS_LEN; i++) {
+		snprintf(p, ETH_GSTRING_LEN, "port.%s",
+			 i40e_gstrings_stats[i].stat_string);
+		p += ETH_GSTRING_LEN;
+	}
+	for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) {
+		snprintf(p, ETH_GSTRING_LEN,
+			 "port.tx_priority_%u_xon", i);
+		p += ETH_GSTRING_LEN;
+		snprintf(p, ETH_GSTRING_LEN,
+			 "port.tx_priority_%u_xoff", i);
+		p += ETH_GSTRING_LEN;
+	}
+	for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) {
+		snprintf(p, ETH_GSTRING_LEN,
+			 "port.rx_priority_%u_xon", i);
+		p += ETH_GSTRING_LEN;
+		snprintf(p, ETH_GSTRING_LEN,
+			 "port.rx_priority_%u_xoff", i);
+		p += ETH_GSTRING_LEN;
+	}
+	for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) {
+		snprintf(p, ETH_GSTRING_LEN,
+			 "port.rx_priority_%u_xon_2_xoff", i);
+		p += ETH_GSTRING_LEN;
+	}
+	/* BUG_ON(p - data != I40E_STATS_LEN * ETH_GSTRING_LEN); */
+}
+
+static void i40e_get_priv_flag_strings(struct net_device *netdev, u8 *data)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_vsi *vsi = np->vsi;
+	struct i40e_pf *pf = vsi->back;
+	char *p = (char *)data;
+	unsigned int i;
+
+	for (i = 0; i < I40E_PRIV_FLAGS_STR_LEN; i++) {
+		snprintf(p, ETH_GSTRING_LEN, "%s",
+			 i40e_gstrings_priv_flags[i].flag_string);
+		p += ETH_GSTRING_LEN;
+	}
+	if (pf->hw.pf_id != 0)
+		return;
+	for (i = 0; i < I40E_GL_PRIV_FLAGS_STR_LEN; i++) {
+		snprintf(p, ETH_GSTRING_LEN, "%s",
+			 i40e_gl_gstrings_priv_flags[i].flag_string);
+		p += ETH_GSTRING_LEN;
+	}
+}
+
+static void i40e_get_strings(struct net_device *netdev, u32 stringset,
+			     u8 *data)
+{
 	switch (stringset) {
 	case ETH_SS_TEST:
 		memcpy(data, i40e_gstrings_test,
 		       I40E_TEST_LEN * ETH_GSTRING_LEN);
 		break;
 	case ETH_SS_STATS:
-		for (i = 0; i < I40E_NETDEV_STATS_LEN; i++) {
-			snprintf(p, ETH_GSTRING_LEN, "%s",
-				 i40e_gstrings_net_stats[i].stat_string);
-			p += ETH_GSTRING_LEN;
-		}
-		for (i = 0; i < I40E_MISC_STATS_LEN; i++) {
-			snprintf(p, ETH_GSTRING_LEN, "%s",
-				 i40e_gstrings_misc_stats[i].stat_string);
-			p += ETH_GSTRING_LEN;
-		}
-		for (i = 0; i < I40E_MAX_NUM_QUEUES(netdev); i++) {
-			snprintf(p, ETH_GSTRING_LEN, "tx-%d.tx_packets", i);
-			p += ETH_GSTRING_LEN;
-			snprintf(p, ETH_GSTRING_LEN, "tx-%d.tx_bytes", i);
-			p += ETH_GSTRING_LEN;
-			snprintf(p, ETH_GSTRING_LEN, "rx-%d.rx_packets", i);
-			p += ETH_GSTRING_LEN;
-			snprintf(p, ETH_GSTRING_LEN, "rx-%d.rx_bytes", i);
-			p += ETH_GSTRING_LEN;
-		}
-		if (vsi != pf->vsi[pf->lan_vsi] || pf->hw.partition_id != 1)
-			return;
-
-		for (i = 0; i < I40E_VEB_STATS_LEN; i++) {
-			snprintf(p, ETH_GSTRING_LEN, "veb.%s",
-				 i40e_gstrings_veb_stats[i].stat_string);
-			p += ETH_GSTRING_LEN;
-		}
-		for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
-			snprintf(p, ETH_GSTRING_LEN,
-				 "veb.tc_%u_tx_packets", i);
-			p += ETH_GSTRING_LEN;
-			snprintf(p, ETH_GSTRING_LEN,
-				 "veb.tc_%u_tx_bytes", i);
-			p += ETH_GSTRING_LEN;
-			snprintf(p, ETH_GSTRING_LEN,
-				 "veb.tc_%u_rx_packets", i);
-			p += ETH_GSTRING_LEN;
-			snprintf(p, ETH_GSTRING_LEN,
-				 "veb.tc_%u_rx_bytes", i);
-			p += ETH_GSTRING_LEN;
-		}
-		for (i = 0; i < I40E_GLOBAL_STATS_LEN; i++) {
-			snprintf(p, ETH_GSTRING_LEN, "port.%s",
-				 i40e_gstrings_stats[i].stat_string);
-			p += ETH_GSTRING_LEN;
-		}
-		for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) {
-			snprintf(p, ETH_GSTRING_LEN,
-				 "port.tx_priority_%d_xon", i);
-			p += ETH_GSTRING_LEN;
-			snprintf(p, ETH_GSTRING_LEN,
-				 "port.tx_priority_%d_xoff", i);
-			p += ETH_GSTRING_LEN;
-		}
-		for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) {
-			snprintf(p, ETH_GSTRING_LEN,
-				 "port.rx_priority_%d_xon", i);
-			p += ETH_GSTRING_LEN;
-			snprintf(p, ETH_GSTRING_LEN,
-				 "port.rx_priority_%d_xoff", i);
-			p += ETH_GSTRING_LEN;
-		}
-		for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) {
-			snprintf(p, ETH_GSTRING_LEN,
-				 "port.rx_priority_%d_xon_2_xoff", i);
-			p += ETH_GSTRING_LEN;
-		}
-		/* BUG_ON(p - data != I40E_STATS_LEN * ETH_GSTRING_LEN); */
+		i40e_get_stat_strings(netdev, data);
 		break;
 	case ETH_SS_PRIV_FLAGS:
-		for (i = 0; i < I40E_PRIV_FLAGS_STR_LEN; i++) {
-			snprintf(p, ETH_GSTRING_LEN, "%s",
-				 i40e_gstrings_priv_flags[i].flag_string);
-			p += ETH_GSTRING_LEN;
-		}
-		if (pf->hw.pf_id != 0)
-			break;
-		for (i = 0; i < I40E_GL_PRIV_FLAGS_STR_LEN; i++) {
-			snprintf(p, ETH_GSTRING_LEN, "%s",
-				 i40e_gl_gstrings_priv_flags[i].flag_string);
-			p += ETH_GSTRING_LEN;
-		}
+		i40e_get_priv_flag_strings(netdev, data);
 		break;
 	default:
 		break;
-- 
2.17.0

^ permalink raw reply related

* [net-next 5/9] i40e: use WARN_ONCE to replace the commented BUG_ON size check
From: Jeff Kirsher @ 2018-05-22 17:45 UTC (permalink / raw)
  To: davem; +Cc: Jacob Keller, netdev, nhorman, sassmann, jogreene, Jeff Kirsher
In-Reply-To: <20180522174527.19680-1-jeffrey.t.kirsher@intel.com>

From: Jacob Keller <jacob.e.keller@intel.com>

We don't really want to use BUG_ON here since that would completely
crash the kernel, thus the reason we commented it out. We *can't* use
BUILD_BUG_ON because at least now (a) the sizes aren't constant (we are
fixing this) and (b) not all compilers are smart enough to understand
that "p - data" is a constant.

Instead, just use a WARN_ONCE so that the first time we end up with an
incorrect size we will dump a stack trace and a message, hopefully
highlighting the issues early in testing.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index c50ed2d391e1..32bcb6a2a590 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -1792,8 +1792,8 @@ static void i40e_get_stat_strings(struct net_device *netdev, u8 *data)
 	struct i40e_netdev_priv *np = netdev_priv(netdev);
 	struct i40e_vsi *vsi = np->vsi;
 	struct i40e_pf *pf = vsi->back;
-	char *p = (char *)data;
 	unsigned int i;
+	u8 *p = data;
 
 	for (i = 0; i < I40E_NETDEV_STATS_LEN; i++) {
 		snprintf(p, ETH_GSTRING_LEN, "%s",
@@ -1864,7 +1864,9 @@ static void i40e_get_stat_strings(struct net_device *netdev, u8 *data)
 			 "port.rx_priority_%u_xon_2_xoff", i);
 		p += ETH_GSTRING_LEN;
 	}
-	/* BUG_ON(p - data != I40E_STATS_LEN * ETH_GSTRING_LEN); */
+
+	WARN_ONCE(p - data != i40e_get_stats_count(netdev) * ETH_GSTRING_LEN,
+		  "stat strings count mismatch!");
 }
 
 static void i40e_get_priv_flag_strings(struct net_device *netdev, u8 *data)
-- 
2.17.0

^ permalink raw reply related

* [net-next 8/9] i40e: add function doc headers for ethtool stats functions
From: Jeff Kirsher @ 2018-05-22 17:45 UTC (permalink / raw)
  To: davem; +Cc: Jacob Keller, netdev, nhorman, sassmann, jogreene, Jeff Kirsher
In-Reply-To: <20180522174527.19680-1-jeffrey.t.kirsher@intel.com>

From: Jacob Keller <jacob.e.keller@intel.com>

Add documentation for the i40e_get_stats_count, i40e_get_stat_strings
and i40e_get_ethtool_stats explaining that the number and ordering of
statistics must remain constant for a given netdevice.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 .../net/ethernet/intel/i40e/i40e_ethtool.c    | 38 +++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 44a2803cb1ec..4b5ecba0148c 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -1660,6 +1660,20 @@ static int i40e_set_ringparam(struct net_device *netdev,
 	return err;
 }
 
+/**
+ * i40e_get_stats_count - return the stats count for a device
+ * @netdev: the netdev to return the count for
+ *
+ * Returns the total number of statistics for this netdev. Note that even
+ * though this is a function, it is required that the count for a specific
+ * netdev must never change. Basing the count on static values such as the
+ * maximum number of queues or the device type is ok. However, the API for
+ * obtaining stats is *not* safe against changes based on non-static
+ * values such as the *current* number of queues, or runtime flags.
+ *
+ * If a statistic is not always enabled, return it as part of the count
+ * anyways, always return its string, and report its value as zero.
+ **/
 static int i40e_get_stats_count(struct net_device *netdev)
 {
 	struct i40e_netdev_priv *np = netdev_priv(netdev);
@@ -1691,6 +1705,20 @@ static int i40e_get_sset_count(struct net_device *netdev, int sset)
 	}
 }
 
+/**
+ * i40e_get_ethtool_stats - copy stat values into supplied buffer
+ * @netdev: the netdev to collect stats for
+ * @stats: ethtool stats command structure
+ * @data: ethtool supplied buffer
+ *
+ * Copy the stats values for this netdev into the buffer. Expects data to be
+ * pre-allocated to the size returned by i40e_get_stats_count.. Note that all
+ * statistics must be copied in a static order, and the count must not change
+ * for a given netdev. See i40e_get_stats_count for more details.
+ *
+ * If a statistic is not currently valid (such as a disabled queue), this
+ * function reports its value as zero.
+ **/
 static void i40e_get_ethtool_stats(struct net_device *netdev,
 				   struct ethtool_stats *stats, u64 *data)
 {
@@ -1787,6 +1815,16 @@ static void i40e_get_ethtool_stats(struct net_device *netdev,
 		*(data++) = pf->stats.priority_xon_2_xoff[j];
 }
 
+/**
+ * i40e_get_stat_strings - copy stat strings into supplied buffer
+ * @netdev: the netdev to collect strings for
+ * @data: supplied buffer to copy strings into
+ *
+ * Copy the strings related to stats for this netdev. Expects data to be
+ * pre-allocated with the size reported by i40e_get_stats_count. Note that the
+ * strings must be copied in a static order and the total count must not
+ * change for a given netdev. See i40e_get_stats_count for more details.
+ **/
 static void i40e_get_stat_strings(struct net_device *netdev, u8 *data)
 {
 	struct i40e_netdev_priv *np = netdev_priv(netdev);
-- 
2.17.0

^ permalink raw reply related

* [net-next 9/9] i40e: use the more traditional 'i' loop variable
From: Jeff Kirsher @ 2018-05-22 17:45 UTC (permalink / raw)
  To: davem; +Cc: Jacob Keller, netdev, nhorman, sassmann, jogreene, Jeff Kirsher
In-Reply-To: <20180522174527.19680-1-jeffrey.t.kirsher@intel.com>

From: Jacob Keller <jacob.e.keller@intel.com>

Since we no longer use i as an array index for the data variable,
replace the use of 'j' with 'i' so that we match the general loop
variable name.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 .../net/ethernet/intel/i40e/i40e_ethtool.c    | 56 +++++++++----------
 1 file changed, 28 insertions(+), 28 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 4b5ecba0148c..6947a2a571cb 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -1726,26 +1726,26 @@ static void i40e_get_ethtool_stats(struct net_device *netdev,
 	struct i40e_ring *tx_ring, *rx_ring;
 	struct i40e_vsi *vsi = np->vsi;
 	struct i40e_pf *pf = vsi->back;
-	unsigned int j;
+	unsigned int i;
 	char *p;
 	struct rtnl_link_stats64 *net_stats = i40e_get_vsi_stats_struct(vsi);
 	unsigned int start;
 
 	i40e_update_stats(vsi);
 
-	for (j = 0; j < I40E_NETDEV_STATS_LEN; j++) {
-		p = (char *)net_stats + i40e_gstrings_net_stats[j].stat_offset;
-		*(data++) = (i40e_gstrings_net_stats[j].sizeof_stat ==
+	for (i = 0; i < I40E_NETDEV_STATS_LEN; i++) {
+		p = (char *)net_stats + i40e_gstrings_net_stats[i].stat_offset;
+		*(data++) = (i40e_gstrings_net_stats[i].sizeof_stat ==
 			sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
 	}
-	for (j = 0; j < I40E_MISC_STATS_LEN; j++) {
-		p = (char *)vsi + i40e_gstrings_misc_stats[j].stat_offset;
-		*(data++) = (i40e_gstrings_misc_stats[j].sizeof_stat ==
+	for (i = 0; i < I40E_MISC_STATS_LEN; i++) {
+		p = (char *)vsi + i40e_gstrings_misc_stats[i].stat_offset;
+		*(data++) = (i40e_gstrings_misc_stats[i].sizeof_stat ==
 			    sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
 	}
 	rcu_read_lock();
-	for (j = 0; j < I40E_MAX_NUM_QUEUES(netdev) ; j++) {
-		tx_ring = READ_ONCE(vsi->tx_rings[j]);
+	for (i = 0; i < I40E_MAX_NUM_QUEUES(netdev) ; i++) {
+		tx_ring = READ_ONCE(vsi->tx_rings[i]);
 
 		if (!tx_ring) {
 			/* Bump the stat counter to skip these stats, and make
@@ -1783,36 +1783,36 @@ static void i40e_get_ethtool_stats(struct net_device *netdev,
 	    (pf->flags & I40E_FLAG_VEB_STATS_ENABLED)) {
 		struct i40e_veb *veb = pf->veb[pf->lan_veb];
 
-		for (j = 0; j < I40E_VEB_STATS_LEN; j++) {
+		for (i = 0; i < I40E_VEB_STATS_LEN; i++) {
 			p = (char *)veb;
-			p += i40e_gstrings_veb_stats[j].stat_offset;
-			*(data++) = (i40e_gstrings_veb_stats[j].sizeof_stat ==
+			p += i40e_gstrings_veb_stats[i].stat_offset;
+			*(data++) = (i40e_gstrings_veb_stats[i].sizeof_stat ==
 				     sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
 		}
-		for (j = 0; j < I40E_MAX_TRAFFIC_CLASS; j++) {
-			*(data++) = veb->tc_stats.tc_tx_packets[j];
-			*(data++) = veb->tc_stats.tc_tx_bytes[j];
-			*(data++) = veb->tc_stats.tc_rx_packets[j];
-			*(data++) = veb->tc_stats.tc_rx_bytes[j];
+		for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+			*(data++) = veb->tc_stats.tc_tx_packets[i];
+			*(data++) = veb->tc_stats.tc_tx_bytes[i];
+			*(data++) = veb->tc_stats.tc_rx_packets[i];
+			*(data++) = veb->tc_stats.tc_rx_bytes[i];
 		}
 	} else {
 		data += I40E_VEB_STATS_TOTAL;
 	}
-	for (j = 0; j < I40E_GLOBAL_STATS_LEN; j++) {
-		p = (char *)pf + i40e_gstrings_stats[j].stat_offset;
-		*(data++) = (i40e_gstrings_stats[j].sizeof_stat ==
+	for (i = 0; i < I40E_GLOBAL_STATS_LEN; i++) {
+		p = (char *)pf + i40e_gstrings_stats[i].stat_offset;
+		*(data++) = (i40e_gstrings_stats[i].sizeof_stat ==
 			     sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
 	}
-	for (j = 0; j < I40E_MAX_USER_PRIORITY; j++) {
-		*(data++) = pf->stats.priority_xon_tx[j];
-		*(data++) = pf->stats.priority_xoff_tx[j];
+	for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) {
+		*(data++) = pf->stats.priority_xon_tx[i];
+		*(data++) = pf->stats.priority_xoff_tx[i];
 	}
-	for (j = 0; j < I40E_MAX_USER_PRIORITY; j++) {
-		*(data++) = pf->stats.priority_xon_rx[j];
-		*(data++) = pf->stats.priority_xoff_rx[j];
+	for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) {
+		*(data++) = pf->stats.priority_xon_rx[i];
+		*(data++) = pf->stats.priority_xoff_rx[i];
 	}
-	for (j = 0; j < I40E_MAX_USER_PRIORITY; j++)
-		*(data++) = pf->stats.priority_xon_2_xoff[j];
+	for (i = 0; i < I40E_MAX_USER_PRIORITY; i++)
+		*(data++) = pf->stats.priority_xon_2_xoff[i];
 }
 
 /**
-- 
2.17.0

^ permalink raw reply related

* Re: [PATCH 1/2] net: fec: ptp: Switch to SPDX identifier
From: David Miller @ 2018-05-22 17:45 UTC (permalink / raw)
  To: festevam; +Cc: fugang.duan, netdev, fabio.estevam
In-Reply-To: <1526835319-17408-1-git-send-email-festevam@gmail.com>

From: Fabio Estevam <festevam@gmail.com>
Date: Sun, 20 May 2018 13:55:18 -0300

> From: Fabio Estevam <fabio.estevam@nxp.com>
> 
> Adopt the SPDX license identifier headers to ease license compliance
> management.
> 
> Signed-off-by: Fabio Estevam <fabio.estevam@nxp.com>

Applied.

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox