Netdev List
 help / color / mirror / Atom feed
* [PATCH bpf-next v2 1/4] tools: bpftool: ignore make built-in rules for getting kernel version
From: Quentin Monnet @ 2019-08-30 11:00 UTC (permalink / raw)
  To: Alexei Starovoitov, Daniel Borkmann
  Cc: bpf, netdev, oss-drivers, Quentin Monnet, Lorenz Bauer,
	Ilya Leoshkevich, Jakub Kicinski
In-Reply-To: <20190830110040.31257-1-quentin.monnet@netronome.com>

Bpftool calls the toplevel Makefile to get the kernel version for the
sources it is built from. But when the utility is built from the top of
the kernel repository, it may dump the following error message for
certain architectures (including x86):

    $ make tools/bpf
    [...]
    make[3]: *** [checkbin] Error 1
    [...]

This does not prevent bpftool compilation, but may feel disconcerting.
The "checkbin" arch-dependent target is not supposed to be called for
target "kernelversion", which is a simple "echo" of the version number.

It turns out this is caused by the make invocation in tools/bpf/bpftool,
which attempts to find implicit rules to apply. Extract from debug
output:

    Reading makefiles...
    Reading makefile 'Makefile'...
    Reading makefile 'scripts/Kbuild.include' (search path) (no ~ expansion)...
    Reading makefile 'scripts/subarch.include' (search path) (no ~ expansion)...
    Reading makefile 'arch/x86/Makefile' (search path) (no ~ expansion)...
    Reading makefile 'scripts/Makefile.kcov' (search path) (no ~ expansion)...
    Reading makefile 'scripts/Makefile.gcc-plugins' (search path) (no ~ expansion)...
    Reading makefile 'scripts/Makefile.kasan' (search path) (no ~ expansion)...
    Reading makefile 'scripts/Makefile.extrawarn' (search path) (no ~ expansion)...
    Reading makefile 'scripts/Makefile.ubsan' (search path) (no ~ expansion)...
    Updating makefiles....
     Considering target file 'scripts/Makefile.ubsan'.
      Looking for an implicit rule for 'scripts/Makefile.ubsan'.
      Trying pattern rule with stem 'Makefile.ubsan'.
    [...]
      Trying pattern rule with stem 'Makefile.ubsan'.
      Trying implicit prerequisite 'scripts/Makefile.ubsan.o'.
      Looking for a rule with intermediate file 'scripts/Makefile.ubsan.o'.
       Avoiding implicit rule recursion.
       Trying pattern rule with stem 'Makefile.ubsan'.
       Trying rule prerequisite 'prepare'.
       Trying rule prerequisite 'FORCE'.
      Found an implicit rule for 'scripts/Makefile.ubsan'.
        Considering target file 'prepare'.
         File 'prepare' does not exist.
          Considering target file 'prepare0'.
           File 'prepare0' does not exist.
            Considering target file 'archprepare'.
             File 'archprepare' does not exist.
              Considering target file 'archheaders'.
               File 'archheaders' does not exist.
               Finished prerequisites of target file 'archheaders'.
              Must remake target 'archheaders'.
    Putting child 0x55976f4f6980 (archheaders) PID 31743 on the chain.

To avoid that, pass the -r and -R flags to eliminate the use of make
built-in rules (and while at it, built-in variables) when running
command "make kernelversion" from bpftool's Makefile.

Signed-off-by: Quentin Monnet <quentin.monnet@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
 tools/bpf/bpftool/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index f284c207765a..cd0fc05464e7 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -24,7 +24,7 @@ endif
 
 LIBBPF = $(BPF_PATH)libbpf.a
 
-BPFTOOL_VERSION := $(shell make --no-print-directory -sC ../../.. kernelversion)
+BPFTOOL_VERSION := $(shell make -rR --no-print-directory -sC ../../.. kernelversion)
 
 $(LIBBPF): FORCE
 	$(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(OUTPUT) $(OUTPUT)libbpf.a
-- 
2.17.1


^ permalink raw reply related

* [PATCH bpf-next v2 4/4] tools: bpftool: do not link twice against libbpf.a in Makefile
From: Quentin Monnet @ 2019-08-30 11:00 UTC (permalink / raw)
  To: Alexei Starovoitov, Daniel Borkmann
  Cc: bpf, netdev, oss-drivers, Quentin Monnet, Lorenz Bauer,
	Ilya Leoshkevich, Jakub Kicinski
In-Reply-To: <20190830110040.31257-1-quentin.monnet@netronome.com>

In bpftool's Makefile, $(LIBS) includes $(LIBBPF), therefore the library
is used twice in the linking command. No need to have $(LIBBPF) (from
$^) on that command, let's do with "$(OBJS) $(LIBS)" (but move $(LIBBPF)
_before_ the -l flags in $(LIBS)).

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Quentin Monnet <quentin.monnet@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
 tools/bpf/bpftool/Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index b0c5a369f54a..39bc6f0f4f0b 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -55,7 +55,7 @@ ifneq ($(EXTRA_LDFLAGS),)
 LDFLAGS += $(EXTRA_LDFLAGS)
 endif
 
-LIBS = -lelf -lz $(LIBBPF)
+LIBS = $(LIBBPF) -lelf -lz
 
 INSTALL ?= install
 RM ?= rm -f
@@ -117,7 +117,7 @@ $(OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c
 $(OUTPUT)feature.o: | zdep
 
 $(OUTPUT)bpftool: $(OBJS) $(LIBBPF)
-	$(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^ $(LIBS)
+	$(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJS) $(LIBS)
 
 $(OUTPUT)%.o: %.c
 	$(QUIET_CC)$(COMPILE.c) -MMD -o $@ $<
-- 
2.17.1


^ permalink raw reply related

* [PATCH bpf-next v2 3/4] tools: bpf: account for generated feature/ and libbpf/ directories
From: Quentin Monnet @ 2019-08-30 11:00 UTC (permalink / raw)
  To: Alexei Starovoitov, Daniel Borkmann
  Cc: bpf, netdev, oss-drivers, Quentin Monnet, Lorenz Bauer,
	Ilya Leoshkevich, Jakub Kicinski
In-Reply-To: <20190830110040.31257-1-quentin.monnet@netronome.com>

When building "tools/bpf" from the top of the Linux repository, the
build system passes a value for the $(OUTPUT) Makefile variable to
tools/bpf/Makefile and tools/bpf/bpftool/Makefile, which results in
generating "libbpf/" (for bpftool) and "feature/" (bpf and bpftool)
directories inside the tree.

This commit adds such directories to the relevant .gitignore files, and
edits the Makefiles to ensure they are removed on "make clean". The use
of "rm" is also made consistent throughout those Makefiles (relies on
the $(RM) variable, use "--" to prevent interpreting
$(OUTPUT)/$(DESTDIR) as options.

v2:
- New patch.

Signed-off-by: Quentin Monnet <quentin.monnet@netronome.com>
---
 tools/bpf/.gitignore         |  1 +
 tools/bpf/Makefile           |  5 +++--
 tools/bpf/bpftool/.gitignore |  2 ++
 tools/bpf/bpftool/Makefile   | 10 ++++++----
 4 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/tools/bpf/.gitignore b/tools/bpf/.gitignore
index dfe2bd5a4b95..59024197e71d 100644
--- a/tools/bpf/.gitignore
+++ b/tools/bpf/.gitignore
@@ -1,4 +1,5 @@
 FEATURE-DUMP.bpf
+feature
 bpf_asm
 bpf_dbg
 bpf_exp.yacc.*
diff --git a/tools/bpf/Makefile b/tools/bpf/Makefile
index 53b60ad452f5..fbf5e4a0cb9c 100644
--- a/tools/bpf/Makefile
+++ b/tools/bpf/Makefile
@@ -81,10 +81,11 @@ $(OUTPUT)bpf_exp.lex.o: $(OUTPUT)bpf_exp.lex.c
 
 clean: bpftool_clean
 	$(call QUIET_CLEAN, bpf-progs)
-	$(Q)rm -rf $(OUTPUT)*.o $(OUTPUT)bpf_jit_disasm $(OUTPUT)bpf_dbg \
+	$(Q)$(RM) -r -- $(OUTPUT)*.o $(OUTPUT)bpf_jit_disasm $(OUTPUT)bpf_dbg \
 	       $(OUTPUT)bpf_asm $(OUTPUT)bpf_exp.yacc.* $(OUTPUT)bpf_exp.lex.*
 	$(call QUIET_CLEAN, core-gen)
-	$(Q)rm -f $(OUTPUT)FEATURE-DUMP.bpf
+	$(Q)$(RM) -- $(OUTPUT)FEATURE-DUMP.bpf
+	$(Q)$(RM) -r -- $(OUTPUT)feature
 
 install: $(PROGS) bpftool_install
 	$(call QUIET_INSTALL, bpf_jit_disasm)
diff --git a/tools/bpf/bpftool/.gitignore b/tools/bpf/bpftool/.gitignore
index 8248b8dd89d4..b13926432b84 100644
--- a/tools/bpf/bpftool/.gitignore
+++ b/tools/bpf/bpftool/.gitignore
@@ -3,3 +3,5 @@
 bpftool*.8
 bpf-helpers.*
 FEATURE-DUMP.bpftool
+feature
+libbpf
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index 3fc82ff9b52c..b0c5a369f54a 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -124,9 +124,11 @@ $(OUTPUT)%.o: %.c
 
 clean: $(LIBBPF)-clean
 	$(call QUIET_CLEAN, bpftool)
-	$(Q)$(RM) $(OUTPUT)bpftool $(OUTPUT)*.o $(OUTPUT)*.d
+	$(Q)$(RM) -- $(OUTPUT)bpftool $(OUTPUT)*.o $(OUTPUT)*.d
+	$(Q)$(RM) -r -- $(OUTPUT)libbpf/
 	$(call QUIET_CLEAN, core-gen)
-	$(Q)$(RM) $(OUTPUT)FEATURE-DUMP.bpftool
+	$(Q)$(RM) -- $(OUTPUT)FEATURE-DUMP.bpftool
+	$(Q)$(RM) -r -- $(OUTPUT)feature/
 
 install: $(OUTPUT)bpftool
 	$(call QUIET_INSTALL, bpftool)
@@ -137,8 +139,8 @@ install: $(OUTPUT)bpftool
 
 uninstall:
 	$(call QUIET_UNINST, bpftool)
-	$(Q)$(RM) $(DESTDIR)$(prefix)/sbin/bpftool
-	$(Q)$(RM) $(DESTDIR)$(bash_compdir)/bpftool
+	$(Q)$(RM) -- $(DESTDIR)$(prefix)/sbin/bpftool
+	$(Q)$(RM) -- $(DESTDIR)$(bash_compdir)/bpftool
 
 doc:
 	$(call descend,Documentation)
-- 
2.17.1


^ permalink raw reply related

* [PATCH bpf-next v2 2/4] tools: bpftool: improve and check builds for different make invocations
From: Quentin Monnet @ 2019-08-30 11:00 UTC (permalink / raw)
  To: Alexei Starovoitov, Daniel Borkmann
  Cc: bpf, netdev, oss-drivers, Quentin Monnet, Lorenz Bauer,
	Ilya Leoshkevich, Jakub Kicinski
In-Reply-To: <20190830110040.31257-1-quentin.monnet@netronome.com>

There are a number of alternative "make" invocations that can be used to
compile bpftool. The following invocations are expected to work:

  - through the kbuild system, from the top of the repository
    (make tools/bpf)
  - by telling make to change to the bpftool directory
    (make -C tools/bpf/bpftool)
  - by building the BPF tools from tools/
    (cd tools && make bpf)
  - by running make from bpftool directory
    (cd tools/bpf/bpftool && make)

Additionally, setting the O or OUTPUT variables should tell the build
system to use a custom output path, for each of these alternatives.

The following patch fixes the following invocations:

  $ make tools/bpf
  $ make tools/bpf O=<dir>
  $ make -C tools/bpf/bpftool OUTPUT=<dir>
  $ make -C tools/bpf/bpftool O=<dir>
  $ cd tools/ && make bpf O=<dir>
  $ cd tools/bpf/bpftool && make OUTPUT=<dir>
  $ cd tools/bpf/bpftool && make O=<dir>

After this commit, the build still fails for two variants when passing
the OUTPUT variable:

  $ make tools/bpf OUTPUT=<dir>
  $ cd tools/ && make bpf OUTPUT=<dir>

In order to remember and check what make invocations are supposed to
work, and to document the ones which do not, a new script is added to
the BPF selftests. Note that some invocations require the kernel to be
configured, so the script skips them if no .config file is found.

v2:
- In make_and_clean(), set $ERROR to 1 when "make" returns non-zero,
  even if the binary was produced.
- Run "make clean" from the correct directory (bpf/ instead of bpftool/,
  when relevant).

Reported-by: Lorenz Bauer <lmb@cloudflare.com>
Signed-off-by: Quentin Monnet <quentin.monnet@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
 tools/bpf/bpftool/Makefile                    |  12 +-
 tools/testing/selftests/bpf/Makefile          |   3 +-
 .../selftests/bpf/test_bpftool_build.sh       | 143 ++++++++++++++++++
 3 files changed, 152 insertions(+), 6 deletions(-)
 create mode 100755 tools/testing/selftests/bpf/test_bpftool_build.sh

diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index cd0fc05464e7..3fc82ff9b52c 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -17,21 +17,23 @@ endif
 BPF_DIR = $(srctree)/tools/lib/bpf/
 
 ifneq ($(OUTPUT),)
-  BPF_PATH = $(OUTPUT)
+  LIBBPF_OUTPUT = $(OUTPUT)/libbpf/
+  LIBBPF_PATH = $(LIBBPF_OUTPUT)
 else
-  BPF_PATH = $(BPF_DIR)
+  LIBBPF_PATH = $(BPF_DIR)
 endif
 
-LIBBPF = $(BPF_PATH)libbpf.a
+LIBBPF = $(LIBBPF_PATH)libbpf.a
 
 BPFTOOL_VERSION := $(shell make -rR --no-print-directory -sC ../../.. kernelversion)
 
 $(LIBBPF): FORCE
-	$(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(OUTPUT) $(OUTPUT)libbpf.a
+	$(if $(LIBBPF_OUTPUT),@mkdir -p $(LIBBPF_OUTPUT))
+	$(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(LIBBPF_OUTPUT) $(LIBBPF_OUTPUT)libbpf.a
 
 $(LIBBPF)-clean:
 	$(call QUIET_CLEAN, libbpf)
-	$(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(OUTPUT) clean >/dev/null
+	$(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(LIBBPF_OUTPUT) clean >/dev/null
 
 prefix ?= /usr/local
 bash_compdir ?= /usr/share/bash-completion/completions
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 1faad0c3c3c9..c7595b4ed55d 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -63,7 +63,8 @@ TEST_PROGS := test_kmod.sh \
 	test_tcp_check_syncookie.sh \
 	test_tc_tunnel.sh \
 	test_tc_edt.sh \
-	test_xdping.sh
+	test_xdping.sh \
+	test_bpftool_build.sh
 
 TEST_PROGS_EXTENDED := with_addr.sh \
 	with_tunnels.sh \
diff --git a/tools/testing/selftests/bpf/test_bpftool_build.sh b/tools/testing/selftests/bpf/test_bpftool_build.sh
new file mode 100755
index 000000000000..4ba5a34bff56
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_bpftool_build.sh
@@ -0,0 +1,143 @@
+#!/bin/bash
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+
+ERROR=0
+TMPDIR=
+
+# If one build fails, continue but return non-0 on exit.
+return_value() {
+	if [ -d "$TMPDIR" ] ; then
+		rm -rf -- $TMPDIR
+	fi
+	exit $ERROR
+}
+trap return_value EXIT
+
+case $1 in
+	-h|--help)
+		echo -e "$0 [-j <n>]"
+		echo -e "\tTest the different ways of building bpftool."
+		echo -e ""
+		echo -e "\tOptions:"
+		echo -e "\t\t-j <n>:\tPass -j flag to 'make'."
+		exit
+		;;
+esac
+
+J=$*
+
+# Assume script is located under tools/testing/selftests/bpf/. We want to start
+# build attempts from the top of kernel repository.
+SCRIPT_REL_PATH=$(realpath --relative-to=$PWD $0)
+SCRIPT_REL_DIR=$(dirname $SCRIPT_REL_PATH)
+KDIR_ROOT_DIR=$(realpath $PWD/$SCRIPT_REL_DIR/../../../../)
+cd $KDIR_ROOT_DIR
+
+check() {
+	local dir=$(realpath $1)
+
+	echo -n "binary:  "
+	# Returns non-null if file is found (and "false" is run)
+	find $dir -type f -executable -name bpftool -print -exec false {} + && \
+		ERROR=1 && printf "FAILURE: Did not find bpftool\n"
+}
+
+make_and_clean() {
+	echo -e "\$PWD:    $PWD"
+	echo -e "command: make -s $* >/dev/null"
+	make $J -s $* >/dev/null
+	if [ $? -ne 0 ] ; then
+		ERROR=1
+	fi
+	if [ $# -ge 1 ] ; then
+		check ${@: -1}
+	else
+		check .
+	fi
+	(
+		if [ $# -ge 1 ] ; then
+			cd ${@: -1}
+		fi
+		make -s clean
+	)
+	echo
+}
+
+make_with_tmpdir() {
+	local ARGS
+
+	TMPDIR=$(mktemp -d)
+	if [ $# -ge 2 ] ; then
+		ARGS=${@:1:(($# - 1))}
+	fi
+	echo -e "\$PWD:    $PWD"
+	echo -e "command: make -s $ARGS ${@: -1}=$TMPDIR/ >/dev/null"
+	make $J -s $ARGS ${@: -1}=$TMPDIR/ >/dev/null
+	if [ $? -ne 0 ] ; then
+		ERROR=1
+	fi
+	check $TMPDIR
+	rm -rf -- $TMPDIR
+	echo
+}
+
+echo "Trying to build bpftool"
+echo -e "... through kbuild\n"
+
+if [ -f ".config" ] ; then
+	make_and_clean tools/bpf
+
+	## $OUTPUT is overwritten in kbuild Makefile, and thus cannot be passed
+	## down from toplevel Makefile to bpftool's Makefile.
+
+	# make_with_tmpdir tools/bpf OUTPUT
+	echo -e "skip:    make tools/bpf OUTPUT=<dir> (not supported)\n"
+
+	make_with_tmpdir tools/bpf O
+else
+	echo -e "skip:    make tools/bpf (no .config found)\n"
+	echo -e "skip:    make tools/bpf OUTPUT=<dir> (not supported)\n"
+	echo -e "skip:    make tools/bpf O=<dir> (no .config found)\n"
+fi
+
+echo -e "... from kernel source tree\n"
+
+make_and_clean -C tools/bpf/bpftool
+
+make_with_tmpdir -C tools/bpf/bpftool OUTPUT
+
+make_with_tmpdir -C tools/bpf/bpftool O
+
+echo -e "... from tools/\n"
+cd tools/
+
+make_and_clean bpf
+
+## In tools/bpf/Makefile, function "descend" is called and passes $(O) and
+## $(OUTPUT). We would like $(OUTPUT) to have "bpf/bpftool/" appended before
+## calling bpftool's Makefile, but this is not the case as the "descend"
+## function focuses on $(O)/$(subdir). However, in the present case, updating
+## $(O) to have $(OUTPUT) recomputed from it in bpftool's Makefile does not
+## work, because $(O) is not defined from command line and $(OUTPUT) is not
+## updated in tools/scripts/Makefile.include.
+##
+## Workarounds would require to a) edit "descend" or use an alternative way to
+## call bpftool's Makefile, b) modify the conditions to update $(OUTPUT) and
+## other variables in tools/scripts/Makefile.include (at the risk of breaking
+## the build of other tools), or c) append manually the "bpf/bpftool" suffix to
+## $(OUTPUT) in bpf's Makefile, which may break if targets for other directories
+## use "descend" in the future.
+
+# make_with_tmpdir bpf OUTPUT
+echo -e "skip:    make bpf OUTPUT=<dir> (not supported)\n"
+
+make_with_tmpdir bpf O
+
+echo -e "... from bpftool's dir\n"
+cd bpf/bpftool
+
+make_and_clean
+
+make_with_tmpdir OUTPUT
+
+make_with_tmpdir O
-- 
2.17.1


^ permalink raw reply related

* [PATCH bpf-next v2 0/4] tools: bpftool: improve bpftool build experience
From: Quentin Monnet @ 2019-08-30 11:00 UTC (permalink / raw)
  To: Alexei Starovoitov, Daniel Borkmann
  Cc: bpf, netdev, oss-drivers, Quentin Monnet, Lorenz Bauer,
	Ilya Leoshkevich, Jakub Kicinski

Hi,
This set attempts to make it easier to build bpftool, in particular when
passing a specific output directory. This is a follow-up to the
conversation held last month by Lorenz, Ilya and Jakub [0].

The first patch is a minor fix to bpftool's Makefile, regarding the
retrieval of kernel version (which currently prints a non-relevant make
warning on some invocations).

Second patch improves the Makefile commands to support more "make"
invocations, or to fix building with custom output directory. On Jakub's
suggestion, a script is also added to BPF selftests in order to keep track
of the supported build variants.

Building bpftool with "make tools/bpf" from the top of the repository
generates files in "libbpf/" and "feature/" directories under tools/bpf/
and tools/bpf/bpftool/. The third patch ensures such directories are taken
care of on "make clean", and add them to the relevant .gitignore files.

At last, fourth patch is a sligthly modified version of Ilya's fix
regarding libbpf.a appearing twice on the linking command for bpftool.

[0] https://lore.kernel.org/bpf/CACAyw9-CWRHVH3TJ=Tke2x8YiLsH47sLCijdp=V+5M836R9aAA@mail.gmail.com/

v2:
- Return error from check script if one of the make invocations returns
  non-zero (even if binary is successfully produced).
- Run "make clean" from bpf/ and not only bpf/bpftool/ in that same script,
  when relevant.
- Add a patch to clean up generated "feature/" and "libbpf/" directories.

Cc: Lorenz Bauer <lmb@cloudflare.com>
Cc: Ilya Leoshkevich <iii@linux.ibm.com>
Cc: Jakub Kicinski <jakub.kicinski@netronome.com>

Quentin Monnet (4):
  tools: bpftool: ignore make built-in rules for getting kernel version
  tools: bpftool: improve and check builds for different make
    invocations
  tools: bpf: account for generated feature/ and libbpf/ directories
  tools: bpftool: do not link twice against libbpf.a in Makefile

 tools/bpf/.gitignore                          |   1 +
 tools/bpf/Makefile                            |   5 +-
 tools/bpf/bpftool/.gitignore                  |   2 +
 tools/bpf/bpftool/Makefile                    |  28 ++--
 tools/testing/selftests/bpf/Makefile          |   3 +-
 .../selftests/bpf/test_bpftool_build.sh       | 143 ++++++++++++++++++
 6 files changed, 167 insertions(+), 15 deletions(-)
 create mode 100755 tools/testing/selftests/bpf/test_bpftool_build.sh

-- 
2.17.1


^ permalink raw reply

* Re: [PATCH bpf-next 2/3] tools: bpftool: improve and check builds for different make invocations
From: Quentin Monnet @ 2019-08-30 10:58 UTC (permalink / raw)
  To: Ilya Leoshkevich
  Cc: Alexei Starovoitov, Daniel Borkmann, bpf, netdev, oss-drivers,
	Lorenz Bauer, Jakub Kicinski
In-Reply-To: <C90F7A80-D2E9-401B-8BFC-47C22A44ADAC@linux.ibm.com>

2019-08-29 18:03 UTC+0200 ~ Ilya Leoshkevich <iii@linux.ibm.com>
>> Am 29.08.2019 um 12:56 schrieb Quentin Monnet <quentin.monnet@netronome.com>:
>>
>> +make_and_clean() {
>> +	echo -e "\$PWD:    $PWD"
>> +	echo -e "command: make -s $* >/dev/null"
>> +	make $J -s $* >/dev/null
> 
> Would it make sense to set ERROR=1 if make produces a bpftool binary,
> but still fails with a non-zero RC for whatever reason?
> 

Hi Ilya,

Generating bpftool being the last thing the Makefile does, I don't know
if this could happen. But sure, that wouldn't hurt, and I will add it to
v2, thanks!

Quentin

^ permalink raw reply

* [PATCH net-next 2/2] devlink: Use switch-case instead of if-else
From: Parav Pandit @ 2019-08-30 10:39 UTC (permalink / raw)
  To: netdev, davem; +Cc: jiri, Parav Pandit
In-Reply-To: <20190830103945.18097-1-parav@mellanox.com>

Make core more readable with switch-case for various port flavours.

Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Parav Pandit <parav@mellanox.com>
---
 net/core/devlink.c | 39 ++++++++++++++++++++++-----------------
 1 file changed, 22 insertions(+), 17 deletions(-)

diff --git a/net/core/devlink.c b/net/core/devlink.c
index b7091329987a..6e52d639dac6 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -510,32 +510,37 @@ static int devlink_nl_port_attrs_put(struct sk_buff *msg,
 		return 0;
 	if (nla_put_u16(msg, DEVLINK_ATTR_PORT_FLAVOUR, attrs->flavour))
 		return -EMSGSIZE;
-	if (devlink_port->attrs.flavour == DEVLINK_PORT_FLAVOUR_PCI_PF) {
+	switch (devlink_port->attrs.flavour) {
+	case DEVLINK_PORT_FLAVOUR_PCI_PF:
 		if (nla_put_u16(msg, DEVLINK_ATTR_PORT_PCI_PF_NUMBER,
 				attrs->pci_pf.pf))
 			return -EMSGSIZE;
-	} else if (devlink_port->attrs.flavour == DEVLINK_PORT_FLAVOUR_PCI_VF) {
+		break;
+	case DEVLINK_PORT_FLAVOUR_PCI_VF:
 		if (nla_put_u16(msg, DEVLINK_ATTR_PORT_PCI_PF_NUMBER,
 				attrs->pci_vf.pf) ||
 		    nla_put_u16(msg, DEVLINK_ATTR_PORT_PCI_VF_NUMBER,
 				attrs->pci_vf.vf))
 			return -EMSGSIZE;
+		break;
+	case DEVLINK_PORT_FLAVOUR_PHYSICAL:
+	case DEVLINK_PORT_FLAVOUR_CPU:
+	case DEVLINK_PORT_FLAVOUR_DSA:
+		if (nla_put_u32(msg, DEVLINK_ATTR_PORT_NUMBER,
+				attrs->phys.port_number))
+			return -EMSGSIZE;
+		if (!attrs->split)
+			return 0;
+		if (nla_put_u32(msg, DEVLINK_ATTR_PORT_SPLIT_GROUP,
+				attrs->phys.port_number))
+			return -EMSGSIZE;
+		if (nla_put_u32(msg, DEVLINK_ATTR_PORT_SPLIT_SUBPORT_NUMBER,
+				attrs->phys.split_subport_number))
+			return -EMSGSIZE;
+		break;
+	default:
+		break;
 	}
-	if (devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_PHYSICAL &&
-	    devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_CPU &&
-	    devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_DSA)
-		return 0;
-	if (nla_put_u32(msg, DEVLINK_ATTR_PORT_NUMBER,
-			attrs->phys.port_number))
-		return -EMSGSIZE;
-	if (!attrs->split)
-		return 0;
-	if (nla_put_u32(msg, DEVLINK_ATTR_PORT_SPLIT_GROUP,
-			attrs->phys.port_number))
-		return -EMSGSIZE;
-	if (nla_put_u32(msg, DEVLINK_ATTR_PORT_SPLIT_SUBPORT_NUMBER,
-			attrs->phys.split_subport_number))
-		return -EMSGSIZE;
 	return 0;
 }
 
-- 
2.19.2


^ permalink raw reply related

* [PATCH net-next 1/2] devlink: Make port index data type as unsigned int
From: Parav Pandit @ 2019-08-30 10:39 UTC (permalink / raw)
  To: netdev, davem; +Cc: jiri, Parav Pandit
In-Reply-To: <20190830103945.18097-1-parav@mellanox.com>

Devlink port index attribute is returned to users as u32 through
netlink response.
Change index data type from 'unsigned' to 'unsigned int' to avoid
below checkpatch.pl warning.

WARNING: Prefer 'unsigned int' to bare use of 'unsigned'
81: FILE: include/net/devlink.h:81:
+       unsigned index;

Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Parav Pandit <parav@mellanox.com>
---
 include/net/devlink.h | 2 +-
 net/core/devlink.c    | 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/include/net/devlink.h b/include/net/devlink.h
index 7f43c48f54cd..13523b0a0642 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -75,7 +75,7 @@ struct devlink_port {
 	struct list_head list;
 	struct list_head param_list;
 	struct devlink *devlink;
-	unsigned index;
+	unsigned int index;
 	bool registered;
 	spinlock_t type_lock; /* Protects type and type_dev
 			       * pointer consistency.
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 650f36379203..b7091329987a 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -136,7 +136,7 @@ static struct devlink *devlink_get_from_info(struct genl_info *info)
 }
 
 static struct devlink_port *devlink_port_get_by_index(struct devlink *devlink,
-						      int port_index)
+						      unsigned int port_index)
 {
 	struct devlink_port *devlink_port;
 
@@ -147,7 +147,8 @@ static struct devlink_port *devlink_port_get_by_index(struct devlink *devlink,
 	return NULL;
 }
 
-static bool devlink_port_index_exists(struct devlink *devlink, int port_index)
+static bool devlink_port_index_exists(struct devlink *devlink,
+				      unsigned int port_index)
 {
 	return devlink_port_get_by_index(devlink, port_index);
 }
-- 
2.19.2


^ permalink raw reply related

* [PATCH net-next 0/2] Minor cleanup in devlink
From: Parav Pandit @ 2019-08-30 10:39 UTC (permalink / raw)
  To: netdev, davem; +Cc: jiri, Parav Pandit

Two minor cleanup in devlink.

Patch-1 Explicitly defines devlink port index as unsigned int
Patch-2 Uses switch-case to handle different port flavours attributes


Parav Pandit (2):
  devlink: Make port index data type as unsigned int
  devlink: Use switch-case instead of if-else

 include/net/devlink.h |  2 +-
 net/core/devlink.c    | 44 ++++++++++++++++++++++++-------------------
 2 files changed, 26 insertions(+), 20 deletions(-)

-- 
2.19.2


^ permalink raw reply

* [PATCH net-next v3 3/3] net: tls: export protocol version, cipher, tx_conf/rx_conf to socket diag
From: Davide Caratti @ 2019-08-30 10:25 UTC (permalink / raw)
  To: borisp, jakub.kicinski, Eric Dumazet
  Cc: aviadye, davejwatson, davem, john.fastabend, Matthieu Baerts,
	netdev
In-Reply-To: <cover.1567158431.git.dcaratti@redhat.com>

When an application configures kernel TLS on top of a TCP socket, it's
now possible for inet_diag_handler() to collect information regarding the
protocol version, the cipher type and TX / RX configuration, in case
INET_DIAG_INFO is requested.

Signed-off-by: Davide Caratti <dcaratti@redhat.com>
---
 include/net/tls.h              | 17 +++++++++
 include/uapi/linux/inet_diag.h |  1 +
 include/uapi/linux/tls.h       | 15 ++++++++
 net/tls/tls_main.c             | 64 ++++++++++++++++++++++++++++++++++
 4 files changed, 97 insertions(+)

diff --git a/include/net/tls.h b/include/net/tls.h
index 4997742475cd..ec3c3ed2c6c3 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -431,6 +431,23 @@ static inline bool is_tx_ready(struct tls_sw_context_tx *ctx)
 	return READ_ONCE(rec->tx_ready);
 }
 
+static inline u16 tls_user_config(struct tls_context *ctx, bool tx)
+{
+	u16 config = tx ? ctx->tx_conf : ctx->rx_conf;
+
+	switch (config) {
+	case TLS_BASE:
+		return TLS_CONF_BASE;
+	case TLS_SW:
+		return TLS_CONF_SW;
+	case TLS_HW:
+		return TLS_CONF_HW;
+	case TLS_HW_RECORD:
+		return TLS_CONF_HW_RECORD;
+	}
+	return 0;
+}
+
 struct sk_buff *
 tls_validate_xmit_skb(struct sock *sk, struct net_device *dev,
 		      struct sk_buff *skb);
diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h
index e2c6273274f3..a1ff345b3f33 100644
--- a/include/uapi/linux/inet_diag.h
+++ b/include/uapi/linux/inet_diag.h
@@ -162,6 +162,7 @@ enum {
 enum {
 	INET_ULP_INFO_UNSPEC,
 	INET_ULP_INFO_NAME,
+	INET_ULP_INFO_TLS,
 	__INET_ULP_INFO_MAX,
 };
 #define INET_ULP_INFO_MAX (__INET_ULP_INFO_MAX - 1)
diff --git a/include/uapi/linux/tls.h b/include/uapi/linux/tls.h
index 5b9c26753e46..bcd2869ed472 100644
--- a/include/uapi/linux/tls.h
+++ b/include/uapi/linux/tls.h
@@ -109,4 +109,19 @@ struct tls12_crypto_info_aes_ccm_128 {
 	unsigned char rec_seq[TLS_CIPHER_AES_CCM_128_REC_SEQ_SIZE];
 };
 
+enum {
+	TLS_INFO_UNSPEC,
+	TLS_INFO_VERSION,
+	TLS_INFO_CIPHER,
+	TLS_INFO_TXCONF,
+	TLS_INFO_RXCONF,
+	__TLS_INFO_MAX,
+};
+#define TLS_INFO_MAX (__TLS_INFO_MAX - 1)
+
+#define TLS_CONF_BASE 1
+#define TLS_CONF_SW 2
+#define TLS_CONF_HW 3
+#define TLS_CONF_HW_RECORD 4
+
 #endif /* _UAPI_LINUX_TLS_H */
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index f8f2d2c3d627..277f7c209fed 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -39,6 +39,7 @@
 #include <linux/netdevice.h>
 #include <linux/sched/signal.h>
 #include <linux/inetdevice.h>
+#include <linux/inet_diag.h>
 
 #include <net/tls.h>
 
@@ -835,6 +836,67 @@ static void tls_update(struct sock *sk, struct proto *p)
 	}
 }
 
+static int tls_get_info(const struct sock *sk, struct sk_buff *skb)
+{
+	u16 version, cipher_type;
+	struct tls_context *ctx;
+	struct nlattr *start;
+	int err;
+
+	start = nla_nest_start_noflag(skb, INET_ULP_INFO_TLS);
+	if (!start)
+		return -EMSGSIZE;
+
+	rcu_read_lock();
+	ctx = rcu_dereference(inet_csk(sk)->icsk_ulp_data);
+	if (!ctx) {
+		err = 0;
+		goto nla_failure;
+	}
+	version = ctx->prot_info.version;
+	if (version) {
+		err = nla_put_u16(skb, TLS_INFO_VERSION, version);
+		if (err)
+			goto nla_failure;
+	}
+	cipher_type = ctx->prot_info.cipher_type;
+	if (cipher_type) {
+		err = nla_put_u16(skb, TLS_INFO_CIPHER, cipher_type);
+		if (err)
+			goto nla_failure;
+	}
+	err = nla_put_u16(skb, TLS_INFO_TXCONF, tls_user_config(ctx, true));
+	if (err)
+		goto nla_failure;
+
+	err = nla_put_u16(skb, TLS_INFO_RXCONF, tls_user_config(ctx, false));
+	if (err)
+		goto nla_failure;
+
+	rcu_read_unlock();
+	nla_nest_end(skb, start);
+	return 0;
+
+nla_failure:
+	rcu_read_unlock();
+	nla_nest_cancel(skb, start);
+	return err;
+}
+
+static size_t tls_get_info_size(const struct sock *sk)
+{
+	size_t size = 0;
+
+	size += nla_total_size(0) +		/* INET_ULP_INFO_TLS */
+		nla_total_size(sizeof(u16)) +	/* TLS_INFO_VERSION */
+		nla_total_size(sizeof(u16)) +	/* TLS_INFO_CIPHER */
+		nla_total_size(sizeof(u16)) +	/* TLS_INFO_RXCONF */
+		nla_total_size(sizeof(u16)) +	/* TLS_INFO_TXCONF */
+		0;
+
+	return size;
+}
+
 void tls_register_device(struct tls_device *device)
 {
 	spin_lock_bh(&device_spinlock);
@@ -856,6 +918,8 @@ static struct tcp_ulp_ops tcp_tls_ulp_ops __read_mostly = {
 	.owner			= THIS_MODULE,
 	.init			= tls_init,
 	.update			= tls_update,
+	.get_info		= tls_get_info,
+	.get_info_size		= tls_get_info_size,
 };
 
 static int __init tls_register(void)
-- 
2.20.1


^ permalink raw reply related

* [PATCH net-next v3 2/3] tcp: ulp: add functions to dump ulp-specific information
From: Davide Caratti @ 2019-08-30 10:25 UTC (permalink / raw)
  To: borisp, jakub.kicinski, Eric Dumazet
  Cc: aviadye, davejwatson, davem, john.fastabend, Matthieu Baerts,
	netdev
In-Reply-To: <cover.1567158431.git.dcaratti@redhat.com>

currently, only getsockopt(TCP_ULP) can be invoked to know if a ULP is on
top of a TCP socket. Extend idiag_get_aux() and idiag_get_aux_size(),
introduced by commit b37e88407c1d ("inet_diag: allow protocols to provide
additional data"), to report the ULP name and other information that can
be made available by the ULP through optional functions.

Users having CAP_NET_ADMIN privileges will then be able to retrieve this
information through inet_diag_handler, if they specify INET_DIAG_INFO in
the request.

Signed-off-by: Davide Caratti <dcaratti@redhat.com>
---
 include/net/tcp.h              |  3 ++
 include/uapi/linux/inet_diag.h |  8 ++++++
 net/ipv4/tcp_diag.c            | 52 +++++++++++++++++++++++++++++++++-
 3 files changed, 62 insertions(+), 1 deletion(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 77fe87f7a992..c9a3f9688223 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -2122,6 +2122,9 @@ struct tcp_ulp_ops {
 	void (*update)(struct sock *sk, struct proto *p);
 	/* cleanup ulp */
 	void (*release)(struct sock *sk);
+	/* diagnostic */
+	int (*get_info)(const struct sock *sk, struct sk_buff *skb);
+	size_t (*get_info_size)(const struct sock *sk);
 
 	char		name[TCP_ULP_NAME_MAX];
 	struct module	*owner;
diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h
index e8baca85bac6..e2c6273274f3 100644
--- a/include/uapi/linux/inet_diag.h
+++ b/include/uapi/linux/inet_diag.h
@@ -153,11 +153,19 @@ enum {
 	INET_DIAG_BBRINFO,	/* request as INET_DIAG_VEGASINFO */
 	INET_DIAG_CLASS_ID,	/* request as INET_DIAG_TCLASS */
 	INET_DIAG_MD5SIG,
+	INET_DIAG_ULP_INFO,
 	__INET_DIAG_MAX,
 };
 
 #define INET_DIAG_MAX (__INET_DIAG_MAX - 1)
 
+enum {
+	INET_ULP_INFO_UNSPEC,
+	INET_ULP_INFO_NAME,
+	__INET_ULP_INFO_MAX,
+};
+#define INET_ULP_INFO_MAX (__INET_ULP_INFO_MAX - 1)
+
 /* INET_DIAG_MEM */
 
 struct inet_diag_meminfo {
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index a3a386236d93..babc156deabb 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -81,13 +81,42 @@ static int tcp_diag_put_md5sig(struct sk_buff *skb,
 }
 #endif
 
+static int tcp_diag_put_ulp(struct sk_buff *skb, struct sock *sk,
+			    const struct tcp_ulp_ops *ulp_ops)
+{
+	struct nlattr *nest;
+	int err;
+
+	nest = nla_nest_start_noflag(skb, INET_DIAG_ULP_INFO);
+	if (!nest)
+		return -EMSGSIZE;
+
+	err = nla_put_string(skb, INET_ULP_INFO_NAME, ulp_ops->name);
+	if (err)
+		goto nla_failure;
+
+	if (ulp_ops->get_info)
+		err = ulp_ops->get_info(sk, skb);
+	if (err)
+		goto nla_failure;
+
+	nla_nest_end(skb, nest);
+	return 0;
+
+nla_failure:
+	nla_nest_cancel(skb, nest);
+	return err;
+}
+
 static int tcp_diag_get_aux(struct sock *sk, bool net_admin,
 			    struct sk_buff *skb)
 {
+	struct inet_connection_sock *icsk = inet_csk(sk);
+	int err = 0;
+
 #ifdef CONFIG_TCP_MD5SIG
 	if (net_admin) {
 		struct tcp_md5sig_info *md5sig;
-		int err = 0;
 
 		rcu_read_lock();
 		md5sig = rcu_dereference(tcp_sk(sk)->md5sig_info);
@@ -99,11 +128,21 @@ static int tcp_diag_get_aux(struct sock *sk, bool net_admin,
 	}
 #endif
 
+	if (net_admin) {
+		const struct tcp_ulp_ops *ulp_ops;
+
+		ulp_ops = icsk->icsk_ulp_ops;
+		if (ulp_ops)
+			err = tcp_diag_put_ulp(skb, sk, ulp_ops);
+		if (err)
+			return err;
+	}
 	return 0;
 }
 
 static size_t tcp_diag_get_aux_size(struct sock *sk, bool net_admin)
 {
+	struct inet_connection_sock *icsk = inet_csk(sk);
 	size_t size = 0;
 
 #ifdef CONFIG_TCP_MD5SIG
@@ -124,6 +163,17 @@ static size_t tcp_diag_get_aux_size(struct sock *sk, bool net_admin)
 	}
 #endif
 
+	if (net_admin) {
+		const struct tcp_ulp_ops *ulp_ops;
+
+		ulp_ops = icsk->icsk_ulp_ops;
+		if (ulp_ops) {
+			size += nla_total_size(0) +
+				nla_total_size(TCP_ULP_NAME_MAX);
+			if (ulp_ops->get_info_size)
+				size += ulp_ops->get_info_size(sk);
+		}
+	}
 	return size;
 }
 
-- 
2.20.1


^ permalink raw reply related

* [PATCH net-next v3 1/3] net/tls: use RCU protection on icsk->icsk_ulp_data
From: Davide Caratti @ 2019-08-30 10:25 UTC (permalink / raw)
  To: borisp, jakub.kicinski, Eric Dumazet
  Cc: aviadye, davejwatson, davem, john.fastabend, Matthieu Baerts,
	netdev
In-Reply-To: <cover.1567158431.git.dcaratti@redhat.com>

From: Jakub Kicinski <jakub.kicinski@netronome.com>

We need to make sure context does not get freed while diag
code is interrogating it. Free struct tls_context with
kfree_rcu().

We add the __rcu annotation directly in icsk, and cast it
away in the datapath accessor. Presumably all ULPs will
do a similar thing.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
 include/net/inet_connection_sock.h |  2 +-
 include/net/tls.h                  |  9 +++++++--
 net/core/sock_map.c                |  2 +-
 net/tls/tls_device.c               |  2 +-
 net/tls/tls_main.c                 | 26 +++++++++++++++++++-------
 5 files changed, 29 insertions(+), 12 deletions(-)

diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index c57d53e7e02c..895546058a20 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -97,7 +97,7 @@ struct inet_connection_sock {
 	const struct tcp_congestion_ops *icsk_ca_ops;
 	const struct inet_connection_sock_af_ops *icsk_af_ops;
 	const struct tcp_ulp_ops  *icsk_ulp_ops;
-	void			  *icsk_ulp_data;
+	void __rcu		  *icsk_ulp_data;
 	void (*icsk_clean_acked)(struct sock *sk, u32 acked_seq);
 	struct hlist_node         icsk_listen_portaddr_node;
 	unsigned int		  (*icsk_sync_mss)(struct sock *sk, u32 pmtu);
diff --git a/include/net/tls.h b/include/net/tls.h
index 41b2d41bb1b8..4997742475cd 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -41,6 +41,7 @@
 #include <linux/tcp.h>
 #include <linux/skmsg.h>
 #include <linux/netdevice.h>
+#include <linux/rcupdate.h>
 
 #include <net/tcp.h>
 #include <net/strparser.h>
@@ -290,6 +291,7 @@ struct tls_context {
 
 	struct list_head list;
 	refcount_t refcount;
+	struct rcu_head rcu;
 };
 
 enum tls_offload_ctx_dir {
@@ -348,7 +350,7 @@ struct tls_offload_context_rx {
 #define TLS_OFFLOAD_CONTEXT_SIZE_RX					\
 	(sizeof(struct tls_offload_context_rx) + TLS_DRIVER_STATE_SIZE_RX)
 
-void tls_ctx_free(struct tls_context *ctx);
+void tls_ctx_free(struct sock *sk, struct tls_context *ctx);
 int wait_on_pending_writer(struct sock *sk, long *timeo);
 int tls_sk_query(struct sock *sk, int optname, char __user *optval,
 		int __user *optlen);
@@ -467,7 +469,10 @@ static inline struct tls_context *tls_get_ctx(const struct sock *sk)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 
-	return icsk->icsk_ulp_data;
+	/* Use RCU on icsk_ulp_data only for sock diag code,
+	 * TLS data path doesn't need rcu_dereference().
+	 */
+	return (__force void *)icsk->icsk_ulp_data;
 }
 
 static inline void tls_advance_record_sn(struct sock *sk,
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 1330a7442e5b..01998860afaa 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -345,7 +345,7 @@ static int sock_map_update_common(struct bpf_map *map, u32 idx,
 		return -EINVAL;
 	if (unlikely(idx >= map->max_entries))
 		return -E2BIG;
-	if (unlikely(icsk->icsk_ulp_data))
+	if (unlikely(rcu_access_pointer(icsk->icsk_ulp_data)))
 		return -EINVAL;
 
 	link = sk_psock_init_link();
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index a470df7ffcf9..e188139f0464 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -61,7 +61,7 @@ static void tls_device_free_ctx(struct tls_context *ctx)
 	if (ctx->rx_conf == TLS_HW)
 		kfree(tls_offload_ctx_rx(ctx));
 
-	tls_ctx_free(ctx);
+	tls_ctx_free(NULL, ctx);
 }
 
 static void tls_device_gc_task(struct work_struct *work)
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 43252a801c3f..f8f2d2c3d627 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -251,14 +251,26 @@ static void tls_write_space(struct sock *sk)
 	ctx->sk_write_space(sk);
 }
 
-void tls_ctx_free(struct tls_context *ctx)
+/**
+ * tls_ctx_free() - free TLS ULP context
+ * @sk:  socket to with @ctx is attached
+ * @ctx: TLS context structure
+ *
+ * Free TLS context. If @sk is %NULL caller guarantees that the socket
+ * to which @ctx was attached has no outstanding references.
+ */
+void tls_ctx_free(struct sock *sk, struct tls_context *ctx)
 {
 	if (!ctx)
 		return;
 
 	memzero_explicit(&ctx->crypto_send, sizeof(ctx->crypto_send));
 	memzero_explicit(&ctx->crypto_recv, sizeof(ctx->crypto_recv));
-	kfree(ctx);
+
+	if (sk)
+		kfree_rcu(ctx, rcu);
+	else
+		kfree(ctx);
 }
 
 static void tls_sk_proto_cleanup(struct sock *sk,
@@ -306,7 +318,7 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
 
 	write_lock_bh(&sk->sk_callback_lock);
 	if (free_ctx)
-		icsk->icsk_ulp_data = NULL;
+		rcu_assign_pointer(icsk->icsk_ulp_data, NULL);
 	sk->sk_prot = ctx->sk_proto;
 	if (sk->sk_write_space == tls_write_space)
 		sk->sk_write_space = ctx->sk_write_space;
@@ -321,7 +333,7 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
 	ctx->sk_proto_close(sk, timeout);
 
 	if (free_ctx)
-		tls_ctx_free(ctx);
+		tls_ctx_free(sk, ctx);
 }
 
 static int do_tls_getsockopt_tx(struct sock *sk, char __user *optval,
@@ -610,7 +622,7 @@ static struct tls_context *create_ctx(struct sock *sk)
 	if (!ctx)
 		return NULL;
 
-	icsk->icsk_ulp_data = ctx;
+	rcu_assign_pointer(icsk->icsk_ulp_data, ctx);
 	ctx->setsockopt = sk->sk_prot->setsockopt;
 	ctx->getsockopt = sk->sk_prot->getsockopt;
 	ctx->sk_proto_close = sk->sk_prot->close;
@@ -651,8 +663,8 @@ static void tls_hw_sk_destruct(struct sock *sk)
 
 	ctx->sk_destruct(sk);
 	/* Free ctx */
-	tls_ctx_free(ctx);
-	icsk->icsk_ulp_data = NULL;
+	rcu_assign_pointer(icsk->icsk_ulp_data, NULL);
+	tls_ctx_free(sk, ctx);
 }
 
 static int tls_hw_prot(struct sock *sk)
-- 
2.20.1


^ permalink raw reply related

* [PATCH net-next v3 0/3] net: tls: add socket diag
From: Davide Caratti @ 2019-08-30 10:25 UTC (permalink / raw)
  To: borisp, jakub.kicinski, Eric Dumazet
  Cc: aviadye, davejwatson, davem, john.fastabend, Matthieu Baerts,
	netdev

The current kernel does not provide any diagnostic tool, except
getsockopt(TCP_ULP), to know more about TCP sockets that have an upper
layer protocol (ULP) on top of them. This series extends the set of
information exported by INET_DIAG_INFO, to include data that are
specific to the ULP (and that might be meaningful for debug/testing
purposes).

patch 1/3 ensures that the control plane reads/updates ULP specific data
using RCU.

patch 2/3 extends INET_DIAG_INFO and allows knowing the ULP name for
each TCP socket that has done setsockopt(TCP_ULP) successfully.

patch 3/3 extends kTLS to let programs like 'ss' know the protocol
version and the cipher in use.

Changes since v2:
- remove unneeded #ifdef and fix reverse christmas tree in
  tls_get_info(), thanks to Jakub Kicinski 

Changes since v1:
- don't worry about grace period when accessing ulp_ops, thanks to
  Jakub Kicinski and Eric Dumazet
- use rcu_dereference() to access ULP data in tls get_info(), and 
  test against NULL value, thanks to Jakub Kicinski
- move RCU protected section inside tls get_info(), thanks to Jakub
  Kicinski

Changes since RFC:
- some coding style fixes, thanks to Jakub Kicinski
- add X_UNSPEC as lowest value of uAPI enums, thanks to Jakub Kicinski
- fix assignment of struct nlattr *start, thanks to Jakub Kicinski
- let tls dump RXCONF and TXCONF, suggested by Jakub Kicinski
- don't dump anything if TLS version or cipher are 0 (but still return a
  constant size in get_aux_size()), thanks to Boris Pismenny
- constify first argument of get_info() and get_size()
- use RCU to access access ulp_ops, like it's done for ca_ops
- add patch 1/3, from Jakub Kicinski

Davide Caratti (2):
  tcp: ulp: add functions to dump ulp-specific information
  net: tls: export protocol version, cipher, tx_conf/rx_conf to socket
    diag

Jakub Kicinski (1):
  net/tls: use RCU protection on icsk->icsk_ulp_data

 include/net/inet_connection_sock.h |  2 +-
 include/net/tcp.h                  |  3 +
 include/net/tls.h                  | 26 ++++++++-
 include/uapi/linux/inet_diag.h     |  9 +++
 include/uapi/linux/tls.h           | 15 +++++
 net/core/sock_map.c                |  2 +-
 net/ipv4/tcp_diag.c                | 52 ++++++++++++++++-
 net/tls/tls_device.c               |  2 +-
 net/tls/tls_main.c                 | 90 +++++++++++++++++++++++++++---
 9 files changed, 188 insertions(+), 13 deletions(-)

-- 
2.20.1


^ permalink raw reply

* [PATCH net-next 1/3] dpaa2-eth: Minor refactoring in ethtool stats
From: Ioana Radulescu @ 2019-08-30 10:20 UTC (permalink / raw)
  To: netdev, davem; +Cc: ioana.ciornei
In-Reply-To: <1567160443-31297-1-git-send-email-ruxandra.radulescu@nxp.com>

As we prepare to read more pages from the DPNI stat counters,
reorganize the code a bit to make it easier to extend.

Signed-off-by: Ioana Radulescu <ruxandra.radulescu@nxp.com>
---
 drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c
index 93076fe..1c5b54b 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c
@@ -188,6 +188,11 @@ static void dpaa2_eth_get_ethtool_stats(struct net_device *net_dev,
 	struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
 	struct dpaa2_eth_drv_stats *extras;
 	struct dpaa2_eth_ch_stats *ch_stats;
+	int dpni_stats_page_size[DPNI_STATISTICS_CNT] = {
+		sizeof(dpni_stats.page_0),
+		sizeof(dpni_stats.page_1),
+		sizeof(dpni_stats.page_2),
+	};
 
 	memset(data, 0,
 	       sizeof(u64) * (DPAA2_ETH_NUM_STATS + DPAA2_ETH_NUM_EXTRA_STATS));
@@ -198,17 +203,8 @@ static void dpaa2_eth_get_ethtool_stats(struct net_device *net_dev,
 					  j, &dpni_stats);
 		if (err != 0)
 			netdev_warn(net_dev, "dpni_get_stats(%d) failed\n", j);
-		switch (j) {
-		case 0:
-			num_cnt = sizeof(dpni_stats.page_0) / sizeof(u64);
-			break;
-		case 1:
-			num_cnt = sizeof(dpni_stats.page_1) / sizeof(u64);
-			break;
-		case 2:
-			num_cnt = sizeof(dpni_stats.page_2) / sizeof(u64);
-			break;
-		}
+
+		num_cnt = dpni_stats_page_size[j] / sizeof(u64);
 		for (k = 0; k < num_cnt; k++)
 			*(data + i++) = dpni_stats.raw.counter[k];
 	}
-- 
2.7.4


^ permalink raw reply related

* [PATCH net-next 2/3] dpaa2-eth: Add new DPNI statistics counters
From: Ioana Radulescu @ 2019-08-30 10:20 UTC (permalink / raw)
  To: netdev, davem; +Cc: ioana.ciornei
In-Reply-To: <1567160443-31297-1-git-send-email-ruxandra.radulescu@nxp.com>

Recent firmware versions expose more  DPNI counters.
Export relevant ones via ethtool -S.

Signed-off-by: Ioana Radulescu <ruxandra.radulescu@nxp.com>
---
 .../net/ethernet/freescale/dpaa2/dpaa2-ethtool.c   | 21 ++++++++++--
 drivers/net/ethernet/freescale/dpaa2/dpni.c        |  2 +-
 drivers/net/ethernet/freescale/dpaa2/dpni.h        | 40 ++++++++++++++++++++++
 3 files changed, 59 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c
index 1c5b54b..c83ee76 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c
@@ -28,6 +28,11 @@ static char dpaa2_ethtool_stats[][ETH_GSTRING_LEN] = {
 	"[hw] rx nobuffer discards",
 	"[hw] tx discarded frames",
 	"[hw] tx confirmed frames",
+	"[hw] tx dequeued bytes",
+	"[hw] tx dequeued frames",
+	"[hw] tx rejected bytes",
+	"[hw] tx rejected frames",
+	"[hw] tx pending frames",
 };
 
 #define DPAA2_ETH_NUM_STATS	ARRAY_SIZE(dpaa2_ethtool_stats)
@@ -192,17 +197,27 @@ static void dpaa2_eth_get_ethtool_stats(struct net_device *net_dev,
 		sizeof(dpni_stats.page_0),
 		sizeof(dpni_stats.page_1),
 		sizeof(dpni_stats.page_2),
+		sizeof(dpni_stats.page_3),
+		sizeof(dpni_stats.page_4),
+		sizeof(dpni_stats.page_5),
+		sizeof(dpni_stats.page_6),
 	};
 
 	memset(data, 0,
 	       sizeof(u64) * (DPAA2_ETH_NUM_STATS + DPAA2_ETH_NUM_EXTRA_STATS));
 
 	/* Print standard counters, from DPNI statistics */
-	for (j = 0; j <= 2; j++) {
+	for (j = 0; j <= 6; j++) {
+		/* We're not interested in pages 4 & 5 for now */
+		if (j == 4 || j == 5)
+			continue;
 		err = dpni_get_statistics(priv->mc_io, 0, priv->mc_token,
 					  j, &dpni_stats);
-		if (err != 0)
-			netdev_warn(net_dev, "dpni_get_stats(%d) failed\n", j);
+		/* Report zero for counters of unsupported pages
+		 * (e.g. when using older firmware versions)
+		 */
+		if (err)
+			memset(&dpni_stats, 0, sizeof(dpni_stats));
 
 		num_cnt = dpni_stats_page_size[j] / sizeof(u64);
 		for (k = 0; k < num_cnt; k++)
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpni.c b/drivers/net/ethernet/freescale/dpaa2/dpni.c
index 05e3089..dd54e69 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpni.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpni.c
@@ -1470,7 +1470,7 @@ int dpni_get_queue(struct fsl_mc_io *mc_io,
  * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
  * @token:	Token of DPNI object
  * @page:	Selects the statistics page to retrieve, see
- *		DPNI_GET_STATISTICS output. Pages are numbered 0 to 2.
+ *		DPNI_GET_STATISTICS output. Pages are numbered 0 to 6.
  * @stat:	Structure containing the statistics
  *
  * Return:	'0' on Success; Error code otherwise.
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpni.h b/drivers/net/ethernet/freescale/dpaa2/dpni.h
index 3e8fc6c..fd583911 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpni.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpni.h
@@ -416,6 +416,26 @@ int dpni_get_tx_data_offset(struct fsl_mc_io	*mc_io,
  *	lack of buffers
  * @page_2.egress_discarded_frames: Egress discarded frame count
  * @page_2.egress_confirmed_frames: Egress confirmed frame count
+ * @page3: Page_3 statistics structure
+ * @page_3.egress_dequeue_bytes: Cumulative count of the number of bytes
+ *	dequeued from egress FQs
+ * @page_3.egress_dequeue_frames: Cumulative count of the number of frames
+ *	dequeued from egress FQs
+ * @page_3.egress_reject_bytes: Cumulative count of the number of bytes in
+ *	egress frames whose enqueue was rejected
+ * @page_3.egress_reject_frames: Cumulative count of the number of egress
+ *	frames whose enqueue was rejected
+ * @page_4: Page_4 statistics structure: congestion points
+ * @page_4.cgr_reject_frames: number of rejected frames due to congestion point
+ * @page_4.cgr_reject_bytes: number of rejected bytes due to congestion point
+ * @page_5: Page_5 statistics structure: policer
+ * @page_5.policer_cnt_red: NUmber of red colored frames
+ * @page_5.policer_cnt_yellow: number of yellow colored frames
+ * @page_5.policer_cnt_green: number of green colored frames
+ * @page_5.policer_cnt_re_red: number of recolored red frames
+ * @page_5.policer_cnt_re_yellow: number of recolored yellow frames
+ * @page_6: Page_6 statistics structure
+ * @page_6.tx_pending_frames: total number of frames pending in egress FQs
  * @raw: raw statistics structure, used to index counters
  */
 union dpni_statistics {
@@ -443,6 +463,26 @@ union dpni_statistics {
 		u64 egress_confirmed_frames;
 	} page_2;
 	struct {
+		u64 egress_dequeue_bytes;
+		u64 egress_dequeue_frames;
+		u64 egress_reject_bytes;
+		u64 egress_reject_frames;
+	} page_3;
+	struct {
+		u64 cgr_reject_frames;
+		u64 cgr_reject_bytes;
+	} page_4;
+	struct {
+		u64 policer_cnt_red;
+		u64 policer_cnt_yellow;
+		u64 policer_cnt_green;
+		u64 policer_cnt_re_red;
+		u64 policer_cnt_re_yellow;
+	} page_5;
+	struct {
+		u64 tx_pending_frames;
+	} page_6;
+	struct {
 		u64 counter[DPNI_STATISTICS_CNT];
 	} raw;
 };
-- 
2.7.4


^ permalink raw reply related

* [PATCH net-next 3/3] dpaa2-eth: Poll Tx pending frames counter on if down
From: Ioana Radulescu @ 2019-08-30 10:20 UTC (permalink / raw)
  To: netdev, davem; +Cc: ioana.ciornei
In-Reply-To: <1567160443-31297-1-git-send-email-ruxandra.radulescu@nxp.com>

Starting with firmware version MC10.18.0, a new counter for in flight
Tx frames is offered. Use it when bringing down the interface to
determine when all pending Tx frames have been processed by hardware
instead of sleeping a fixed amount of time.

Signed-off-by: Ioana Radulescu <ruxandra.radulescu@nxp.com>
---
 drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c | 31 +++++++++++++++++++++---
 1 file changed, 28 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
index 5402867..162d7d8 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
@@ -1348,7 +1348,7 @@ static u32 ingress_fq_count(struct dpaa2_eth_priv *priv)
 	return total;
 }
 
-static void wait_for_fq_empty(struct dpaa2_eth_priv *priv)
+static void wait_for_ingress_fq_empty(struct dpaa2_eth_priv *priv)
 {
 	int retries = 10;
 	u32 pending;
@@ -1360,6 +1360,31 @@ static void wait_for_fq_empty(struct dpaa2_eth_priv *priv)
 	} while (pending && --retries);
 }
 
+#define DPNI_TX_PENDING_VER_MAJOR	7
+#define DPNI_TX_PENDING_VER_MINOR	13
+static void wait_for_egress_fq_empty(struct dpaa2_eth_priv *priv)
+{
+	union dpni_statistics stats;
+	int retries = 10;
+	int err;
+
+	if (dpaa2_eth_cmp_dpni_ver(priv, DPNI_TX_PENDING_VER_MAJOR,
+				   DPNI_TX_PENDING_VER_MINOR) < 0)
+		goto out;
+
+	do {
+		err = dpni_get_statistics(priv->mc_io, 0, priv->mc_token, 6,
+					  &stats);
+		if (err)
+			goto out;
+		if (stats.page_6.tx_pending_frames == 0)
+			return;
+	} while (--retries);
+
+out:
+	msleep(500);
+}
+
 static int dpaa2_eth_stop(struct net_device *net_dev)
 {
 	struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
@@ -1379,7 +1404,7 @@ static int dpaa2_eth_stop(struct net_device *net_dev)
 	 * on WRIOP. After it finishes, wait until all remaining frames on Rx
 	 * and Tx conf queues are consumed on NAPI poll.
 	 */
-	msleep(500);
+	wait_for_egress_fq_empty(priv);
 
 	do {
 		dpni_disable(priv->mc_io, 0, priv->mc_token);
@@ -1395,7 +1420,7 @@ static int dpaa2_eth_stop(struct net_device *net_dev)
 		 */
 	}
 
-	wait_for_fq_empty(priv);
+	wait_for_ingress_fq_empty(priv);
 	disable_ch_napi(priv);
 
 	/* Empty the buffer pool */
-- 
2.7.4


^ permalink raw reply related

* [PATCH net-next 0/3] dpaa2-eth: Add new statistics counters
From: Ioana Radulescu @ 2019-08-30 10:20 UTC (permalink / raw)
  To: netdev, davem; +Cc: ioana.ciornei

Recent firmware versions offer access to more DPNI statistics
counters. Add the relevant ones to ethtool interface stats.

Also we can now make use of a new counter for in flight egress frames
to avoid sleeping an arbitrary amount of time in the ndo_stop routine.

Ioana Radulescu (3):
  dpaa2-eth: Minor refactoring in ethtool stats
  dpaa2-eth: Add new DPNI statistics counters
  dpaa2-eth: Poll Tx pending frames counter on if down

 drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c   | 31 +++++++++++++++--
 .../net/ethernet/freescale/dpaa2/dpaa2-ethtool.c   | 39 +++++++++++++--------
 drivers/net/ethernet/freescale/dpaa2/dpni.c        |  2 +-
 drivers/net/ethernet/freescale/dpaa2/dpni.h        | 40 ++++++++++++++++++++++
 4 files changed, 94 insertions(+), 18 deletions(-)

-- 
2.7.4


^ permalink raw reply

* Re: [RFC PATCH v2 net-next 00/15] tc-taprio offload for SJA1105 DSA
From: Vladimir Oltean @ 2019-08-30 10:11 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: Florian Fainelli, Vivien Didelot, Andrew Lunn, David S. Miller,
	Vinicius Costa Gomes, vedang.patel, Richard Cochran, weifeng.voon,
	jiri, m-karicheri2, Jose.Abreu, Ilias Apalodimas,
	Jamal Hadi Salim, xiyou.wangcong, netdev
In-Reply-To: <20190829182132.43001706@cakuba.netronome.com>

Hi Jakub,

On Fri, 30 Aug 2019 at 04:21, Jakub Kicinski
<jakub.kicinski@netronome.com> wrote:
>
> On Fri, 30 Aug 2019 03:46:20 +0300, Vladimir Oltean wrote:
> > - Configuring the switch over SPI cannot apparently be done from this
> >   ndo_setup_tc callback because it runs in atomic context. I also have
> >   some downstream patches to offload tc clsact matchall with mirred
> >   action, but in that case it looks like the atomic context restriction
> >   does not apply.
>
> This sounds really surprising ndo_setup_tc should always be allowed to
> sleep. Can the taprio limitation be lifted somehow?

I need to get more familiar with the taprio internal data structures.
I think you're suggesting to get those updated to a consistent state
while under spin_lock_bh(qdisc_lock(sch)), then call ndo_setup_tc from
outside that critical section?

Also, I just noticed that I introduced a bug in taprio_disable_offload
with my reference counting addition. The qdisc can't just pass stack
memory to the driver, now that it's allowing it to keep it. So
definitely the patch needs more refactoring.

Thanks,
-Vladimir

^ permalink raw reply

* Re: [PATCH v3 1/2] net: core: Notify on changes to dev->promiscuity.
From: Ido Schimmel @ 2019-08-30  9:43 UTC (permalink / raw)
  To: David Miller
  Cc: andrew, jiri, horatiu.vultur, alexandre.belloni, UNGLinuxDriver,
	allan.nielsen, ivecera, f.fainelli, netdev, linux-kernel
In-Reply-To: <20190829.151201.940681219080864052.davem@davemloft.net>

On Thu, Aug 29, 2019 at 03:12:01PM -0700, David Miller wrote:
> From: Ido Schimmel <idosch@idosch.org>
> Date: Thu, 29 Aug 2019 22:36:13 +0300
> 
> > I fully agree that we should make it easy for users to capture offloaded
> > traffic, which is why I suggested patching libpcap. Add a flag to
> > capable netdevs that tells libpcap that in order to capture all the
> > traffic from this interface it needs to add a tc filter with a trap
> > action. That way zero familiarity with tc is required from users.
> 
> Why not just make setting promisc mode on the device do this rather than
> require all of this tc indirection nonsense?
> 
> That's the whole point of this conversation I thought?

As I understand it, the goal of this series is to be able to capture
offloaded traffic.

Currently, the only indication that drivers get when someone is running
tcpdump/tshark/whatever over an interface is that it's is put in promisc
mode. So this patches use it as an indication to trap all the traffic to
the CPU and special case the bridge in order to prevent the driver from
trapping packets when its ports are bridged. The same will have to be
done for OVS and in any other (current or future) cases where promisc
mode is needed to disable Rx filtering.

If there was another indication that these applications are running over
an interface, would we be bothering with this new interpretation of
promisc mode and special casing? I don't think so.

We can instead teach libpcap that in order to capture offloaded traffic
it should use this "tc indirection nonsense". Or turn on a new knob.
This avoids the need to change each driver with this new special case
logic.

Also, what happens when I'm running these application without putting
the interface in promisc mode? On an offloaded interface I would not be
able to even capture packets addressed to my interface's MAC address.
What happens when the interface is already in promisc mode (because of
the bridge) and I'm again running tcpdump with '-p'? I will not be able
to capture offloaded traffic despite the fact that the interface is
already configured in promisc mode.

^ permalink raw reply

* Re: [PATCH v4 1/5] vsock/virtio: limit the memory used per-socket
From: Stefano Garzarella @ 2019-08-30  9:40 UTC (permalink / raw)
  To: Michael S. Tsirkin, Stefan Hajnoczi
  Cc: netdev, linux-kernel, David S. Miller, virtualization, Jason Wang,
	kvm
In-Reply-To: <20190729095956-mutt-send-email-mst@kernel.org>

On Mon, Jul 29, 2019 at 10:04:29AM -0400, Michael S. Tsirkin wrote:
> On Wed, Jul 17, 2019 at 01:30:26PM +0200, Stefano Garzarella wrote:
> > Since virtio-vsock was introduced, the buffers filled by the host
> > and pushed to the guest using the vring, are directly queued in
> > a per-socket list. These buffers are preallocated by the guest
> > with a fixed size (4 KB).
> > 
> > The maximum amount of memory used by each socket should be
> > controlled by the credit mechanism.
> > The default credit available per-socket is 256 KB, but if we use
> > only 1 byte per packet, the guest can queue up to 262144 of 4 KB
> > buffers, using up to 1 GB of memory per-socket. In addition, the
> > guest will continue to fill the vring with new 4 KB free buffers
> > to avoid starvation of other sockets.
> > 
> > This patch mitigates this issue copying the payload of small
> > packets (< 128 bytes) into the buffer of last packet queued, in
> > order to avoid wasting memory.
> > 
> > Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
> > Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
> 
> This is good enough for net-next, but for net I think we
> should figure out how to address the issue completely.
> Can we make the accounting precise? What happens to
> performance if we do?
> 

Since I'm back from holidays, I'm restarting this thread to figure out
how to address the issue completely.

I did a better analysis of the credit mechanism that we implemented in
virtio-vsock to get a clearer view and I'd share it with you:

    This issue affect only the "host->guest" path. In this case, when the
    host wants to send a packet to the guest, it uses a "free" buffer
    allocated by the guest (4KB).
    The "free" buffers available for the host are shared between all
    sockets, instead, the credit mechanism is per-socket, I think to
    avoid the starvation of others sockets.
    The guests re-fill the "free" queue when the available buffers are
    less than half.

    Each peer have these variables in the per-socket state:
       /* local vars */
       buf_alloc        /* max bytes usable by this socket
                           [exposed to the other peer] */
       fwd_cnt          /* increased when RX packet is consumed by the
                           user space [exposed to the other peer] */
       tx_cnt 	        /* increased when TX packet is sent to the other peer */

       /* remote vars  */
       peer_buf_alloc   /* peer's buf_alloc */
       peer_fwd_cnt     /* peer's fwd_cnt */

    When a peer sends a packet, it increases the 'tx_cnt'; when the
    receiver consumes the packet (copy it to the user-space buffer), it
    increases the 'fwd_cnt'.
    Note: increments are made considering the payload length and not the
    buffer length.

    The value of 'buf_alloc' and 'fwd_cnt' are sent to the other peer in
    all packet headers or with an explicit CREDIT_UPDATE packet.

    The local 'buf_alloc' value can be modified by the user space using
    setsockopt() with optname=SO_VM_SOCKETS_BUFFER_SIZE.

    Before to send a packet, the peer checks the space available:
    	credit_available = peer_buf_alloc - (tx_cnt - peer_fwd_cnt)
    and it will send up to credit_available bytes to the other peer.

Possible solutions considering Michael's advice:
1. Use the buffer length instead of the payload length when we increment
   the counters:
  - This approach will account precisely the memory used per socket.
  - This requires changes in both guest and host.
  - It is not compatible with old drivers, so a feature should be negotiated.

2. Decrease the advertised 'buf_alloc' taking count of bytes queued in
   the socket queue but not used. (e.g. 256 byte used on 4K available in
   the buffer)
  - pkt->hdr.buf_alloc = buf_alloc - bytes_not_used.
  - This should be compatible also with old drivers.

Maybe the second is less invasive, but will it be too tricky?
Any other advice or suggestions?

Thanks in advance,
Stefano

^ permalink raw reply

* Re: [PATCH 1/1] forcedeth: use per cpu to collect xmit/recv statistics
From: Eric Dumazet @ 2019-08-30  9:32 UTC (permalink / raw)
  To: Zhu Yanjun, netdev, davem, nan.1986san
In-Reply-To: <1567154111-23315-2-git-send-email-yanjun.zhu@oracle.com>



On 8/30/19 10:35 AM, Zhu Yanjun wrote:
> When testing with a background iperf pushing 1Gbit/sec traffic and running
> both ifconfig and netstat to collect statistics, some deadlocks occurred.
> 

This is quite a heavy patch trying to fix a bug...

I suspect the root cause has nothing to do with stat
collection since on 64bit arches there is no additional synchronization.

(u64_stats_update_begin(), u64_stats_update_end() are nops)

> +static inline void nv_get_stats(int cpu, struct fe_priv *np,
> +				struct rtnl_link_stats64 *storage)
> +{
> +	struct nv_txrx_stats *src = per_cpu_ptr(np->txrx_stats, cpu);
> +	unsigned int syncp_start;
> +
> +	do {
> +		syncp_start = u64_stats_fetch_begin_irq(&np->swstats_rx_syncp);
> +		storage->rx_packets       += src->stat_rx_packets;
> +		storage->rx_bytes         += src->stat_rx_bytes;
> +		storage->rx_dropped       += src->stat_rx_dropped;
> +		storage->rx_missed_errors += src->stat_rx_missed_errors;
> +	} while (u64_stats_fetch_retry_irq(&np->swstats_rx_syncp, syncp_start));
> +
> +	do {
> +		syncp_start = u64_stats_fetch_begin_irq(&np->swstats_tx_syncp);
> +		storage->tx_packets += src->stat_tx_packets;
> +		storage->tx_bytes   += src->stat_tx_bytes;
> +		storage->tx_dropped += src->stat_tx_dropped;
> +	} while (u64_stats_fetch_retry_irq(&np->swstats_tx_syncp, syncp_start));
> +}
> +
>

This is buggy :
If the loops are ever restarted, the storage->fields will have
been modified multiple times.


^ permalink raw reply

* [PATCH v3 net-next 00/15] ioc3-eth improvements
From: Thomas Bogendoerfer @ 2019-08-30  9:25 UTC (permalink / raw)
  To: Ralf Baechle, Paul Burton, James Hogan, David S. Miller,
	linux-mips, linux-kernel, netdev

In my patch series for splitting out the serial code from ioc3-eth
by using a MFD device there was one big patch for ioc3-eth.c,
which wasn't really usefull for reviews. This series contains the
ioc3-eth changes splitted in smaller steps and few more cleanups.
Only the conversion to MFD will be done later in a different series.

Changes in v3:
- no need to check skb == NULL before passing it to dev_kfree_skb_any
- free memory allocated with get_page(s) with free_page(s)
- allocate rx ring with just GFP_KERNEL
- add required alignment for rings in comments

Changes in v2:
- use net_err_ratelimited for printing various ioc3 errors
- added missing clearing of rx buf valid flags into ioc3_alloc_rings
- use __func__ for printing out of memory messages

Thomas Bogendoerfer (15):
  MIPS: SGI-IP27: remove ioc3 ethernet init
  MIPS: SGI-IP27: restructure ioc3 register access
  net: sgi: ioc3-eth: remove checkpatch errors/warning
  net: sgi: ioc3-eth: use defines for constants dealing with desc rings
  net: sgi: ioc3-eth: allocate space for desc rings only once
  net: sgi: ioc3-eth: get rid of ioc3_clean_rx_ring()
  net: sgi: ioc3-eth: separate tx and rx ring handling
  net: sgi: ioc3-eth: introduce chip start function
  net: sgi: ioc3-eth: split ring cleaning/freeing and allocation
  net: sgi: ioc3-eth: refactor rx buffer allocation
  net: sgi: ioc3-eth: use dma-direct for dma allocations
  net: sgi: ioc3-eth: use csum_fold
  net: sgi: ioc3-eth: Fix IPG settings
  net: sgi: ioc3-eth: protect emcr in all cases
  net: sgi: ioc3-eth: no need to stop queue set_multicast_list

 arch/mips/include/asm/sn/ioc3.h     |  357 +++++-------
 arch/mips/sgi-ip27/ip27-console.c   |    5 +-
 arch/mips/sgi-ip27/ip27-init.c      |   13 -
 drivers/net/ethernet/sgi/ioc3-eth.c | 1038 +++++++++++++++++------------------
 4 files changed, 665 insertions(+), 748 deletions(-)

-- 
2.13.7


^ permalink raw reply

* [PATCH v3 net-next 03/15] net: sgi: ioc3-eth: remove checkpatch errors/warning
From: Thomas Bogendoerfer @ 2019-08-30  9:25 UTC (permalink / raw)
  To: Ralf Baechle, Paul Burton, James Hogan, David S. Miller,
	linux-mips, linux-kernel, netdev
In-Reply-To: <20190830092539.24550-1-tbogendoerfer@suse.de>

Before massaging the driver further fix oddities found by checkpatch like
- wrong indention
- comment formatting
- use of printk instead or netdev_xxx/pr_xxx

Signed-off-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
---
 drivers/net/ethernet/sgi/ioc3-eth.c | 275 +++++++++++++++++-------------------
 1 file changed, 130 insertions(+), 145 deletions(-)

diff --git a/drivers/net/ethernet/sgi/ioc3-eth.c b/drivers/net/ethernet/sgi/ioc3-eth.c
index 713d2472cb97..51cc1389e204 100644
--- a/drivers/net/ethernet/sgi/ioc3-eth.c
+++ b/drivers/net/ethernet/sgi/ioc3-eth.c
@@ -1,9 +1,5 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Driver for SGI's IOC3 based Ethernet cards as found in the PCI card.
+// SPDX-License-Identifier: GPL-2.0
+/* Driver for SGI's IOC3 based Ethernet cards as found in the PCI card.
  *
  * Copyright (C) 1999, 2000, 01, 03, 06 Ralf Baechle
  * Copyright (C) 1995, 1999, 2000, 2001 by Silicon Graphics, Inc.
@@ -39,6 +35,7 @@
 #include <linux/crc32.h>
 #include <linux/mii.h>
 #include <linux/in.h>
+#include <linux/io.h>
 #include <linux/ip.h>
 #include <linux/tcp.h>
 #include <linux/udp.h>
@@ -58,21 +55,19 @@
 #include <net/ip.h>
 
 #include <asm/byteorder.h>
-#include <asm/io.h>
 #include <asm/pgtable.h>
 #include <linux/uaccess.h>
 #include <asm/sn/types.h>
 #include <asm/sn/ioc3.h>
 #include <asm/pci/bridge.h>
 
-/*
- * 64 RX buffers.  This is tunable in the range of 16 <= x < 512.  The
+/* 64 RX buffers.  This is tunable in the range of 16 <= x < 512.  The
  * value must be a power of two.
  */
 #define RX_BUFFS 64
 
-#define ETCSR_FD	((17<<ETCSR_IPGR2_SHIFT) | (11<<ETCSR_IPGR1_SHIFT) | 21)
-#define ETCSR_HD	((21<<ETCSR_IPGR2_SHIFT) | (21<<ETCSR_IPGR1_SHIFT) | 21)
+#define ETCSR_FD   ((17 << ETCSR_IPGR2_SHIFT) | (11 << ETCSR_IPGR1_SHIFT) | 21)
+#define ETCSR_HD   ((21 << ETCSR_IPGR2_SHIFT) | (21 << ETCSR_IPGR1_SHIFT) | 21)
 
 /* Private per NIC data of the driver.  */
 struct ioc3_private {
@@ -119,14 +114,15 @@ static inline unsigned long aligned_rx_skb_addr(unsigned long addr)
 	return (~addr + 1) & (IOC3_CACHELINE - 1UL);
 }
 
-static inline struct sk_buff * ioc3_alloc_skb(unsigned long length,
-	unsigned int gfp_mask)
+static inline struct sk_buff *ioc3_alloc_skb(unsigned long length,
+					     unsigned int gfp_mask)
 {
 	struct sk_buff *skb;
 
 	skb = alloc_skb(length + IOC3_CACHELINE - 1, gfp_mask);
 	if (likely(skb)) {
-		int offset = aligned_rx_skb_addr((unsigned long) skb->data);
+		int offset = aligned_rx_skb_addr((unsigned long)skb->data);
+
 		if (offset)
 			skb_reserve(skb, offset);
 	}
@@ -147,15 +143,11 @@ static inline unsigned long ioc3_map(void *ptr, unsigned long vdev)
 }
 
 /* BEWARE: The IOC3 documentation documents the size of rx buffers as
-   1644 while it's actually 1664.  This one was nasty to track down ...  */
+ * 1644 while it's actually 1664.  This one was nasty to track down ...
+ */
 #define RX_OFFSET		10
 #define RX_BUF_ALLOC_SIZE	(1664 + RX_OFFSET + IOC3_CACHELINE)
 
-/* DMA barrier to separate cached and uncached accesses.  */
-#define BARRIER()							\
-	__asm__("sync" ::: "memory")
-
-
 #define IOC3_SIZE 0x100000
 
 static inline u32 mcr_pack(u32 pulse, u32 sample)
@@ -176,7 +168,7 @@ static int nic_wait(u32 __iomem *mcr)
 
 static int nic_reset(u32 __iomem *mcr)
 {
-        int presence;
+	int presence;
 
 	writel(mcr_pack(500, 65), mcr);
 	presence = nic_wait(mcr);
@@ -184,7 +176,7 @@ static int nic_reset(u32 __iomem *mcr)
 	writel(mcr_pack(0, 500), mcr);
 	nic_wait(mcr);
 
-        return presence;
+	return presence;
 }
 
 static inline int nic_read_bit(u32 __iomem *mcr)
@@ -209,8 +201,7 @@ static inline void nic_write_bit(u32 __iomem *mcr, int bit)
 	nic_wait(mcr);
 }
 
-/*
- * Read a byte from an iButton device
+/* Read a byte from an iButton device
  */
 static u32 nic_read_byte(u32 __iomem *mcr)
 {
@@ -223,8 +214,7 @@ static u32 nic_read_byte(u32 __iomem *mcr)
 	return result;
 }
 
-/*
- * Write a byte to an iButton device
+/* Write a byte to an iButton device
  */
 static void nic_write_byte(u32 __iomem *mcr, int byte)
 {
@@ -253,7 +243,7 @@ static u64 nic_find(u32 __iomem *mcr, int *last)
 		b = nic_read_bit(mcr);
 
 		if (a && b) {
-			printk("NIC search failed (not fatal).\n");
+			pr_warn("NIC search failed (not fatal).\n");
 			*last = 0;
 			return 0;
 		}
@@ -264,8 +254,9 @@ static u64 nic_find(u32 __iomem *mcr, int *last)
 			} else if (index > *last) {
 				address &= ~(1UL << index);
 				disc = index;
-			} else if ((address & (1UL << index)) == 0)
+			} else if ((address & (1UL << index)) == 0) {
 				disc = index;
+			}
 			nic_write_bit(mcr, address & (1UL << index));
 			continue;
 		} else {
@@ -293,6 +284,7 @@ static int nic_init(u32 __iomem *mcr)
 
 	while (1) {
 		u64 reg;
+
 		reg = nic_find(mcr, &save);
 
 		switch (reg & 0xff) {
@@ -323,16 +315,15 @@ static int nic_init(u32 __iomem *mcr)
 		break;
 	}
 
-	printk("Found %s NIC", type);
+	pr_info("Found %s NIC", type);
 	if (type != unknown)
-		printk (" registration number %pM, CRC %02x", serial, crc);
-	printk(".\n");
+		pr_cont(" registration number %pM, CRC %02x", serial, crc);
+	pr_cont(".\n");
 
 	return 0;
 }
 
-/*
- * Read the NIC (Number-In-a-Can) device used to store the MAC address on
+/* Read the NIC (Number-In-a-Can) device used to store the MAC address on
  * SN0 / SN00 nodeboards and PCI cards.
  */
 static void ioc3_get_eaddr_nic(struct ioc3_private *ip)
@@ -351,7 +342,7 @@ static void ioc3_get_eaddr_nic(struct ioc3_private *ip)
 	}
 
 	if (tries < 0) {
-		printk("Failed to read MAC address\n");
+		pr_err("Failed to read MAC address\n");
 		return;
 	}
 
@@ -367,8 +358,7 @@ static void ioc3_get_eaddr_nic(struct ioc3_private *ip)
 		ip->dev->dev_addr[i - 2] = nic[i];
 }
 
-/*
- * Ok, this is hosed by design.  It's necessary to know what machine the
+/* Ok, this is hosed by design.  It's necessary to know what machine the
  * NIC is in in order to know how to read the NIC address.  We also have
  * to know if it's a PCI card or a NIC in on the node board ...
  */
@@ -376,7 +366,7 @@ static void ioc3_get_eaddr(struct ioc3_private *ip)
 {
 	ioc3_get_eaddr_nic(ip);
 
-	printk("Ethernet address is %pM.\n", ip->dev->dev_addr);
+	pr_info("Ethernet address is %pM.\n", ip->dev->dev_addr);
 }
 
 static void __ioc3_set_mac_address(struct net_device *dev)
@@ -407,8 +397,7 @@ static int ioc3_set_mac_address(struct net_device *dev, void *addr)
 	return 0;
 }
 
-/*
- * Caller must hold the ioc3_lock ever for MII readers.  This is also
+/* Caller must hold the ioc3_lock ever for MII readers.  This is also
  * used to protect the transmitter side but it's low contention.
  */
 static int ioc3_mdio_read(struct net_device *dev, int phy, int reg)
@@ -450,17 +439,16 @@ static struct net_device_stats *ioc3_get_stats(struct net_device *dev)
 	return &dev->stats;
 }
 
-static void ioc3_tcpudp_checksum(struct sk_buff *skb, uint32_t hwsum, int len)
+static void ioc3_tcpudp_checksum(struct sk_buff *skb, u32 hwsum, int len)
 {
 	struct ethhdr *eh = eth_hdr(skb);
-	uint32_t csum, ehsum;
 	unsigned int proto;
-	struct iphdr *ih;
-	uint16_t *ew;
 	unsigned char *cp;
+	struct iphdr *ih;
+	u32 csum, ehsum;
+	u16 *ew;
 
-	/*
-	 * Did hardware handle the checksum at all?  The cases we can handle
+	/* Did hardware handle the checksum at all?  The cases we can handle
 	 * are:
 	 *
 	 * - TCP and UDP checksums of IPv4 only.
@@ -476,7 +464,7 @@ static void ioc3_tcpudp_checksum(struct sk_buff *skb, uint32_t hwsum, int len)
 	if (eh->h_proto != htons(ETH_P_IP))
 		return;
 
-	ih = (struct iphdr *) ((char *)eh + ETH_HLEN);
+	ih = (struct iphdr *)((char *)eh + ETH_HLEN);
 	if (ip_is_fragment(ih))
 		return;
 
@@ -487,12 +475,12 @@ static void ioc3_tcpudp_checksum(struct sk_buff *skb, uint32_t hwsum, int len)
 	/* Same as tx - compute csum of pseudo header  */
 	csum = hwsum +
 	       (ih->tot_len - (ih->ihl << 2)) +
-	       htons((uint16_t)ih->protocol) +
+	       htons((u16)ih->protocol) +
 	       (ih->saddr >> 16) + (ih->saddr & 0xffff) +
 	       (ih->daddr >> 16) + (ih->daddr & 0xffff);
 
 	/* Sum up ethernet dest addr, src addr and protocol  */
-	ew = (uint16_t *) eh;
+	ew = (u16 *)eh;
 	ehsum = ew[0] + ew[1] + ew[2] + ew[3] + ew[4] + ew[5] + ew[6];
 
 	ehsum = (ehsum & 0xffff) + (ehsum >> 16);
@@ -501,14 +489,15 @@ static void ioc3_tcpudp_checksum(struct sk_buff *skb, uint32_t hwsum, int len)
 	csum += 0xffff ^ ehsum;
 
 	/* In the next step we also subtract the 1's complement
-	   checksum of the trailing ethernet CRC.  */
+	 * checksum of the trailing ethernet CRC.
+	 */
 	cp = (char *)eh + len;	/* points at trailing CRC */
 	if (len & 1) {
-		csum += 0xffff ^ (uint16_t) ((cp[1] << 8) | cp[0]);
-		csum += 0xffff ^ (uint16_t) ((cp[3] << 8) | cp[2]);
+		csum += 0xffff ^ (u16)((cp[1] << 8) | cp[0]);
+		csum += 0xffff ^ (u16)((cp[3] << 8) | cp[2]);
 	} else {
-		csum += 0xffff ^ (uint16_t) ((cp[0] << 8) | cp[1]);
-		csum += 0xffff ^ (uint16_t) ((cp[2] << 8) | cp[3]);
+		csum += 0xffff ^ (u16)((cp[0] << 8) | cp[1]);
+		csum += 0xffff ^ (u16)((cp[2] << 8) | cp[3]);
 	}
 
 	csum = (csum & 0xffff) + (csum >> 16);
@@ -532,7 +521,7 @@ static inline void ioc3_rx(struct net_device *dev)
 	n_entry = ip->rx_pi;
 
 	skb = ip->rx_skbs[rx_entry];
-	rxb = (struct ioc3_erxbuf *) (skb->data - RX_OFFSET);
+	rxb = (struct ioc3_erxbuf *)(skb->data - RX_OFFSET);
 	w0 = be32_to_cpu(rxb->w0);
 
 	while (w0 & ERXBUF_V) {
@@ -545,7 +534,8 @@ static inline void ioc3_rx(struct net_device *dev)
 			new_skb = ioc3_alloc_skb(RX_BUF_ALLOC_SIZE, GFP_ATOMIC);
 			if (!new_skb) {
 				/* Ouch, drop packet and just recycle packet
-				   to keep the ring filled.  */
+				 * to keep the ring filled.
+				 */
 				dev->stats.rx_dropped++;
 				new_skb = skb;
 				goto next;
@@ -553,7 +543,8 @@ static inline void ioc3_rx(struct net_device *dev)
 
 			if (likely(dev->features & NETIF_F_RXCSUM))
 				ioc3_tcpudp_checksum(skb,
-					w0 & ERXBUF_IPCKSUM_MASK, len);
+						     w0 & ERXBUF_IPCKSUM_MASK,
+						     len);
 
 			netif_rx(skb);
 
@@ -561,15 +552,16 @@ static inline void ioc3_rx(struct net_device *dev)
 
 			/* Because we reserve afterwards. */
 			skb_put(new_skb, (1664 + RX_OFFSET));
-			rxb = (struct ioc3_erxbuf *) new_skb->data;
+			rxb = (struct ioc3_erxbuf *)new_skb->data;
 			skb_reserve(new_skb, RX_OFFSET);
 
 			dev->stats.rx_packets++;		/* Statistics */
 			dev->stats.rx_bytes += len;
 		} else {
 			/* The frame is invalid and the skb never
-			   reached the network layer so we can just
-			   recycle it.  */
+			 * reached the network layer so we can just
+			 * recycle it.
+			 */
 			new_skb = skb;
 			dev->stats.rx_errors++;
 		}
@@ -586,7 +578,7 @@ static inline void ioc3_rx(struct net_device *dev)
 		/* Now go on to the next ring entry.  */
 		rx_entry = (rx_entry + 1) & 511;
 		skb = ip->rx_skbs[rx_entry];
-		rxb = (struct ioc3_erxbuf *) (skb->data - RX_OFFSET);
+		rxb = (struct ioc3_erxbuf *)(skb->data - RX_OFFSET);
 		w0 = be32_to_cpu(rxb->w0);
 	}
 	writel((n_entry << 3) | ERPIR_ARM, &ip->regs->erpir);
@@ -635,8 +627,7 @@ static inline void ioc3_tx(struct net_device *dev)
 	spin_unlock(&ip->ioc3_lock);
 }
 
-/*
- * Deal with fatal IOC3 errors.  This condition might be caused by a hard or
+/* Deal with fatal IOC3 errors.  This condition might be caused by a hard or
  * software problems, so we should try to recover
  * more gracefully if this ever happens.  In theory we might be flooded
  * with such error interrupts if something really goes wrong, so we might
@@ -645,22 +636,21 @@ static inline void ioc3_tx(struct net_device *dev)
 static void ioc3_error(struct net_device *dev, u32 eisr)
 {
 	struct ioc3_private *ip = netdev_priv(dev);
-	unsigned char *iface = dev->name;
 
 	spin_lock(&ip->ioc3_lock);
 
 	if (eisr & EISR_RXOFLO)
-		printk(KERN_ERR "%s: RX overflow.\n", iface);
+		net_err_ratelimited("%s: RX overflow.\n", dev->name);
 	if (eisr & EISR_RXBUFOFLO)
-		printk(KERN_ERR "%s: RX buffer overflow.\n", iface);
+		net_err_ratelimited("%s: RX buffer overflow.\n", dev->name);
 	if (eisr & EISR_RXMEMERR)
-		printk(KERN_ERR "%s: RX PCI error.\n", iface);
+		net_err_ratelimited("%s: RX PCI error.\n", dev->name);
 	if (eisr & EISR_RXPARERR)
-		printk(KERN_ERR "%s: RX SSRAM parity error.\n", iface);
+		net_err_ratelimited("%s: RX SSRAM parity error.\n", dev->name);
 	if (eisr & EISR_TXBUFUFLO)
-		printk(KERN_ERR "%s: TX buffer underflow.\n", iface);
+		net_err_ratelimited("%s: TX buffer underflow.\n", dev->name);
 	if (eisr & EISR_TXMEMERR)
-		printk(KERN_ERR "%s: TX PCI error.\n", iface);
+		net_err_ratelimited("%s: TX PCI error.\n", dev->name);
 
 	ioc3_stop(ip);
 	ioc3_init(dev);
@@ -672,7 +662,8 @@ static void ioc3_error(struct net_device *dev, u32 eisr)
 }
 
 /* The interrupt handler does all of the Rx thread work and cleans up
-   after the Tx thread.  */
+ * after the Tx thread.
+ */
 static irqreturn_t ioc3_interrupt(int irq, void *dev_id)
 {
 	struct ioc3_private *ip = netdev_priv(dev_id);
@@ -684,7 +675,7 @@ static irqreturn_t ioc3_interrupt(int irq, void *dev_id)
 	readl(&regs->eisr);				/* Flush */
 
 	if (eisr & (EISR_RXOFLO | EISR_RXBUFOFLO | EISR_RXMEMERR |
-	            EISR_RXPARERR | EISR_TXBUFUFLO | EISR_TXMEMERR))
+		    EISR_RXPARERR | EISR_TXBUFUFLO | EISR_TXMEMERR))
 		ioc3_error(dev_id, eisr);
 	if (eisr & EISR_RXTIMERINT)
 		ioc3_rx(dev_id);
@@ -716,12 +707,11 @@ static void ioc3_timer(struct timer_list *t)
 	mii_check_media(&ip->mii, 1, 0);
 	ioc3_setup_duplex(ip);
 
-	ip->ioc3_timer.expires = jiffies + ((12 * HZ)/10); /* 1.2s */
+	ip->ioc3_timer.expires = jiffies + ((12 * HZ) / 10); /* 1.2s */
 	add_timer(&ip->ioc3_timer);
 }
 
-/*
- * Try to find a PHY.  There is no apparent relation between the MII addresses
+/* Try to find a PHY.  There is no apparent relation between the MII addresses
  * in the SGI documentation and what we find in reality, so we simply probe
  * for the PHY.  It seems IOC3 PHYs usually live on address 31.  One of my
  * onboard IOC3s has the special oddity that probing doesn't seem to find it
@@ -730,8 +720,8 @@ static void ioc3_timer(struct timer_list *t)
  */
 static int ioc3_mii_init(struct ioc3_private *ip)
 {
-	int i, found = 0, res = 0;
 	int ioc3_phy_workaround = 1;
+	int i, found = 0, res = 0;
 	u16 word;
 
 	for (i = 0; i < 32; i++) {
@@ -744,9 +734,9 @@ static int ioc3_mii_init(struct ioc3_private *ip)
 	}
 
 	if (!found) {
-		if (ioc3_phy_workaround)
+		if (ioc3_phy_workaround) {
 			i = 31;
-		else {
+		} else {
 			ip->mii.phy_id = -1;
 			res = -ENODEV;
 			goto out;
@@ -761,12 +751,13 @@ static int ioc3_mii_init(struct ioc3_private *ip)
 
 static void ioc3_mii_start(struct ioc3_private *ip)
 {
-	ip->ioc3_timer.expires = jiffies + (12 * HZ)/10;  /* 1.2 sec. */
+	ip->ioc3_timer.expires = jiffies + (12 * HZ) / 10;  /* 1.2 sec. */
 	add_timer(&ip->ioc3_timer);
 }
 
 static inline void ioc3_clean_rx_ring(struct ioc3_private *ip)
 {
+	struct ioc3_erxbuf *rxb;
 	struct sk_buff *skb;
 	int i;
 
@@ -777,10 +768,9 @@ static inline void ioc3_clean_rx_ring(struct ioc3_private *ip)
 	ip->rx_pi &= 511;
 	ip->rx_ci &= 511;
 
-	for (i = ip->rx_ci; i != ip->rx_pi; i = (i+1) & 511) {
-		struct ioc3_erxbuf *rxb;
+	for (i = ip->rx_ci; i != ip->rx_pi; i = (i + 1) & 511) {
 		skb = ip->rx_skbs[i];
-		rxb = (struct ioc3_erxbuf *) (skb->data - RX_OFFSET);
+		rxb = (struct ioc3_erxbuf *)(skb->data - RX_OFFSET);
 		rxb->w0 = 0;
 	}
 }
@@ -790,7 +780,7 @@ static inline void ioc3_clean_tx_ring(struct ioc3_private *ip)
 	struct sk_buff *skb;
 	int i;
 
-	for (i=0; i < 128; i++) {
+	for (i = 0; i < 128; i++) {
 		skb = ip->tx_skbs[i];
 		if (skb) {
 			ip->tx_skbs[i] = NULL;
@@ -836,16 +826,17 @@ static void ioc3_alloc_rings(struct net_device *dev)
 	unsigned long *rxr;
 	int i;
 
-	if (ip->rxr == NULL) {
+	if (!ip->rxr) {
 		/* Allocate and initialize rx ring.  4kb = 512 entries  */
-		ip->rxr = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
+		ip->rxr = (unsigned long *)get_zeroed_page(GFP_ATOMIC);
 		rxr = ip->rxr;
 		if (!rxr)
-			printk("ioc3_alloc_rings(): get_zeroed_page() failed!\n");
+			pr_err("%s: get_zeroed_page() failed!\n", __func__);
 
 		/* Now the rx buffers.  The RX ring may be larger but
-		   we only allocate 16 buffers for now.  Need to tune
-		   this for performance and memory later.  */
+		 * we only allocate 16 buffers for now.  Need to tune
+		 * this for performance and memory later.
+		 */
 		for (i = 0; i < RX_BUFFS; i++) {
 			struct sk_buff *skb;
 
@@ -859,7 +850,7 @@ static void ioc3_alloc_rings(struct net_device *dev)
 
 			/* Because we reserve afterwards. */
 			skb_put(skb, (1664 + RX_OFFSET));
-			rxb = (struct ioc3_erxbuf *) skb->data;
+			rxb = (struct ioc3_erxbuf *)skb->data;
 			rxr[i] = cpu_to_be64(ioc3_map(rxb, 1));
 			skb_reserve(skb, RX_OFFSET);
 		}
@@ -867,11 +858,11 @@ static void ioc3_alloc_rings(struct net_device *dev)
 		ip->rx_pi = RX_BUFFS;
 	}
 
-	if (ip->txr == NULL) {
+	if (!ip->txr) {
 		/* Allocate and initialize tx rings.  16kb = 128 bufs.  */
 		ip->txr = (struct ioc3_etxd *)__get_free_pages(GFP_KERNEL, 2);
 		if (!ip->txr)
-			printk("ioc3_alloc_rings(): __get_free_pages() failed!\n");
+			pr_err("%s: __get_free_pages() failed!\n", __func__);
 		ip->tx_pi = 0;
 		ip->tx_ci = 0;
 	}
@@ -964,7 +955,7 @@ static void ioc3_init(struct net_device *dev)
 	ioc3_init_rings(dev);
 
 	ip->emcr |= ((RX_OFFSET / 2) << EMCR_RXOFF_SHIFT) | EMCR_TXDMAEN |
-	             EMCR_TXEN | EMCR_RXDMAEN | EMCR_RXEN | EMCR_PADEN;
+		    EMCR_TXEN | EMCR_RXDMAEN | EMCR_RXEN | EMCR_PADEN;
 	writel(ip->emcr, &regs->emcr);
 	writel(EISR_RXTIMERINT | EISR_RXOFLO | EISR_RXBUFOFLO |
 	       EISR_RXMEMERR | EISR_RXPARERR | EISR_TXBUFUFLO |
@@ -986,7 +977,7 @@ static int ioc3_open(struct net_device *dev)
 	struct ioc3_private *ip = netdev_priv(dev);
 
 	if (request_irq(dev->irq, ioc3_interrupt, IRQF_SHARED, ioc3_str, dev)) {
-		printk(KERN_ERR "%s: Can't get irq %d\n", dev->name, dev->irq);
+		netdev_err(dev, "Can't get irq %d\n", dev->irq);
 
 		return -EAGAIN;
 	}
@@ -1015,8 +1006,7 @@ static int ioc3_close(struct net_device *dev)
 	return 0;
 }
 
-/*
- * MENET cards have four IOC3 chips, which are attached to two sets of
+/* MENET cards have four IOC3 chips, which are attached to two sets of
  * PCI slot resources each: the primary connections are on slots
  * 0..3 and the secondaries are on 4..7
  *
@@ -1033,7 +1023,7 @@ static int ioc3_adjacent_is_ioc3(struct pci_dev *pdev, int slot)
 
 	if (dev) {
 		if (dev->vendor == PCI_VENDOR_ID_SGI &&
-			dev->device == PCI_DEVICE_ID_SGI_IOC3)
+		    dev->device == PCI_DEVICE_ID_SGI_IOC3)
 			ret = 1;
 		pci_dev_put(dev);
 	}
@@ -1043,15 +1033,14 @@ static int ioc3_adjacent_is_ioc3(struct pci_dev *pdev, int slot)
 
 static int ioc3_is_menet(struct pci_dev *pdev)
 {
-	return pdev->bus->parent == NULL &&
+	return !pdev->bus->parent &&
 	       ioc3_adjacent_is_ioc3(pdev, 0) &&
 	       ioc3_adjacent_is_ioc3(pdev, 1) &&
 	       ioc3_adjacent_is_ioc3(pdev, 2);
 }
 
 #ifdef CONFIG_SERIAL_8250
-/*
- * Note about serial ports and consoles:
+/* Note about serial ports and consoles:
  * For console output, everyone uses the IOC3 UARTA (offset 0x178)
  * connected to the master node (look in ip27_setup_console() and
  * ip27prom_console_write()).
@@ -1088,16 +1077,16 @@ static void ioc3_8250_register(struct ioc3_uartregs __iomem *uart)
 #define COSMISC_CONSTANT 6
 
 	struct uart_8250_port port = {
-	        .port = {
+		.port = {
 			.irq		= 0,
 			.flags		= UPF_SKIP_TEST | UPF_BOOT_AUTOCONF,
 			.iotype		= UPIO_MEM,
 			.regshift	= 0,
 			.uartclk	= (22000000 << 1) / COSMISC_CONSTANT,
 
-			.membase	= (unsigned char __iomem *) uart,
-			.mapbase	= (unsigned long) uart,
-                }
+			.membase	= (unsigned char __iomem *)uart,
+			.mapbase	= (unsigned long)uart,
+		}
 	};
 	unsigned char lcr;
 
@@ -1113,8 +1102,7 @@ static void ioc3_serial_probe(struct pci_dev *pdev, struct ioc3 *ioc3)
 {
 	u32 sio_iec;
 
-	/*
-	 * We need to recognice and treat the fourth MENET serial as it
+	/* We need to recognice and treat the fourth MENET serial as it
 	 * does not have an SuperIO chip attached to it, therefore attempting
 	 * to access it will result in bus errors.  We call something an
 	 * MENET if PCI slot 0, 1, 2 and 3 of a master PCI bus all have an IOC3
@@ -1125,8 +1113,7 @@ static void ioc3_serial_probe(struct pci_dev *pdev, struct ioc3 *ioc3)
 	if (ioc3_is_menet(pdev) && PCI_SLOT(pdev->devfn) == 3)
 		return;
 
-	/*
-	 * Switch IOC3 to PIO mode.  It probably already was but let's be
+	/* Switch IOC3 to PIO mode.  It probably already was but let's be
 	 * paranoid
 	 */
 	writel(GPCR_UARTA_MODESEL | GPCR_UARTB_MODESEL, &ioc3->gpcr_s);
@@ -1188,15 +1175,15 @@ static int ioc3_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		pci_using_dac = 1;
 		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
 		if (err < 0) {
-			printk(KERN_ERR "%s: Unable to obtain 64 bit DMA "
-			       "for consistent allocations\n", pci_name(pdev));
+			pr_err("%s: Unable to obtain 64 bit DMA for consistent allocations\n",
+			       pci_name(pdev));
 			goto out;
 		}
 	} else {
 		err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
 		if (err) {
-			printk(KERN_ERR "%s: No usable DMA configuration, "
-			       "aborting.\n", pci_name(pdev));
+			pr_err("%s: No usable DMA configuration, aborting.\n",
+			       pci_name(pdev));
 			goto out;
 		}
 		pci_using_dac = 0;
@@ -1227,9 +1214,9 @@ static int ioc3_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	ioc3_base = pci_resource_start(pdev, 0);
 	ioc3_size = pci_resource_len(pdev, 0);
-	ioc3 = (struct ioc3 *) ioremap(ioc3_base, ioc3_size);
+	ioc3 = (struct ioc3 *)ioremap(ioc3_base, ioc3_size);
 	if (!ioc3) {
-		printk(KERN_CRIT "ioc3eth(%s): ioremap failed, goodbye.\n",
+		pr_err("ioc3eth(%s): ioremap failed, goodbye.\n",
 		       pci_name(pdev));
 		err = -ENOMEM;
 		goto out_res;
@@ -1259,7 +1246,7 @@ static int ioc3_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	ioc3_mii_init(ip);
 
 	if (ip->mii.phy_id == -1) {
-		printk(KERN_CRIT "ioc3-eth(%s): Didn't find a PHY, goodbye.\n",
+		pr_err("ioc3-eth(%s): Didn't find a PHY, goodbye.\n",
 		       pci_name(pdev));
 		err = -ENODEV;
 		goto out_stop;
@@ -1289,10 +1276,10 @@ static int ioc3_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	vendor = (sw_physid1 << 12) | (sw_physid2 >> 4);
 	model  = (sw_physid2 >> 4) & 0x3f;
 	rev    = sw_physid2 & 0xf;
-	printk(KERN_INFO "%s: Using PHY %d, vendor 0x%x, model %d, "
-	       "rev %d.\n", dev->name, ip->mii.phy_id, vendor, model, rev);
-	printk(KERN_INFO "%s: IOC3 SSRAM has %d kbyte.\n", dev->name,
-	       ip->emcr & EMCR_BUFSIZ ? 128 : 64);
+	netdev_info(dev, "Using PHY %d, vendor 0x%x, model %d, rev %d.\n",
+		    ip->mii.phy_id, vendor, model, rev);
+	netdev_info(dev, "IOC3 SSRAM has %d kbyte.\n",
+		    ip->emcr & EMCR_BUFSIZ ? 128 : 64);
 
 	return 0;
 
@@ -1305,8 +1292,7 @@ static int ioc3_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 out_free:
 	free_netdev(dev);
 out_disable:
-	/*
-	 * We should call pci_disable_device(pdev); here if the IOC3 wasn't
+	/* We should call pci_disable_device(pdev); here if the IOC3 wasn't
 	 * such a weird device ...
 	 */
 out:
@@ -1324,8 +1310,7 @@ static void ioc3_remove_one(struct pci_dev *pdev)
 	iounmap(ip->all_regs);
 	pci_release_regions(pdev);
 	free_netdev(dev);
-	/*
-	 * We should call pci_disable_device(pdev); here if the IOC3 wasn't
+	/* We should call pci_disable_device(pdev); here if the IOC3 wasn't
 	 * such a weird device ...
 	 */
 }
@@ -1349,11 +1334,10 @@ static netdev_tx_t ioc3_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct ioc3_etxd *desc;
 	unsigned long data;
 	unsigned int len;
-	uint32_t w0 = 0;
 	int produce;
+	u32 w0 = 0;
 
-	/*
-	 * IOC3 has a fairly simple minded checksumming hardware which simply
+	/* IOC3 has a fairly simple minded checksumming hardware which simply
 	 * adds up the 1's complement checksum for the entire packet and
 	 * inserts it at an offset which can be specified in the descriptor
 	 * into the transmit packet.  This means we have to compensate for the
@@ -1364,12 +1348,13 @@ static netdev_tx_t ioc3_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		const struct iphdr *ih = ip_hdr(skb);
 		const int proto = ntohs(ih->protocol);
 		unsigned int csoff;
-		uint32_t csum, ehsum;
-		uint16_t *eh;
+		u32 csum, ehsum;
+		u16 *eh;
 
 		/* The MAC header.  skb->mac seem the logic approach
-		   to find the MAC header - except it's a NULL pointer ...  */
-		eh = (uint16_t *) skb->data;
+		 * to find the MAC header - except it's a NULL pointer ...
+		 */
+		eh = (u16 *)skb->data;
 
 		/* Sum up dest addr, src addr and protocol  */
 		ehsum = eh[0] + eh[1] + eh[2] + eh[3] + eh[4] + eh[5] + eh[6];
@@ -1379,10 +1364,11 @@ static netdev_tx_t ioc3_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		ehsum = (ehsum & 0xffff) + (ehsum >> 16);
 
 		/* Skip IP header; it's sum is always zero and was
-		   already filled in by ip_output.c */
+		 * already filled in by ip_output.c
+		 */
 		csum = csum_tcpudp_nofold(ih->saddr, ih->daddr,
-		                          ih->tot_len - (ih->ihl << 2),
-		                          proto, 0xffff ^ ehsum);
+					  ih->tot_len - (ih->ihl << 2),
+					  proto, 0xffff ^ ehsum);
 
 		csum = (csum & 0xffff) + (csum >> 16);	/* Fold again */
 		csum = (csum & 0xffff) + (csum >> 16);
@@ -1402,7 +1388,7 @@ static netdev_tx_t ioc3_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	spin_lock_irq(&ip->ioc3_lock);
 
-	data = (unsigned long) skb->data;
+	data = (unsigned long)skb->data;
 	len = skb->len;
 
 	produce = ip->tx_pi;
@@ -1424,11 +1410,11 @@ static netdev_tx_t ioc3_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		unsigned long s2 = data + len - b2;
 
 		desc->cmd    = cpu_to_be32(len | ETXD_INTWHENDONE |
-		                           ETXD_B1V | ETXD_B2V | w0);
+					   ETXD_B1V | ETXD_B2V | w0);
 		desc->bufcnt = cpu_to_be32((s1 << ETXD_B1CNT_SHIFT) |
-		                           (s2 << ETXD_B2CNT_SHIFT));
+					   (s2 << ETXD_B2CNT_SHIFT));
 		desc->p1     = cpu_to_be64(ioc3_map(skb->data, 1));
-		desc->p2     = cpu_to_be64(ioc3_map((void *) b2, 1));
+		desc->p2     = cpu_to_be64(ioc3_map((void *)b2, 1));
 	} else {
 		/* Normal sized packet that doesn't cross a page boundary. */
 		desc->cmd = cpu_to_be32(len | ETXD_INTWHENDONE | ETXD_B1V | w0);
@@ -1436,7 +1422,7 @@ static netdev_tx_t ioc3_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		desc->p1     = cpu_to_be64(ioc3_map(skb->data, 1));
 	}
 
-	BARRIER();
+	mb(); /* make sure all descriptor changes are visible */
 
 	ip->tx_skbs[produce] = skb;			/* Remember skb */
 	produce = (produce + 1) & 127;
@@ -1457,7 +1443,7 @@ static void ioc3_timeout(struct net_device *dev)
 {
 	struct ioc3_private *ip = netdev_priv(dev);
 
-	printk(KERN_ERR "%s: transmit timed out, resetting\n", dev->name);
+	netdev_err(dev, "transmit timed out, resetting\n");
 
 	spin_lock_irq(&ip->ioc3_lock);
 
@@ -1471,16 +1457,14 @@ static void ioc3_timeout(struct net_device *dev)
 	netif_wake_queue(dev);
 }
 
-/*
- * Given a multicast ethernet address, this routine calculates the
+/* Given a multicast ethernet address, this routine calculates the
  * address's bit index in the logical address filter mask
  */
-
 static inline unsigned int ioc3_hash(const unsigned char *addr)
 {
 	unsigned int temp = 0;
-	u32 crc;
 	int bits;
+	u32 crc;
 
 	crc = ether_crc_le(ETH_ALEN, addr);
 
@@ -1494,8 +1478,8 @@ static inline unsigned int ioc3_hash(const unsigned char *addr)
 	return temp;
 }
 
-static void ioc3_get_drvinfo (struct net_device *dev,
-	struct ethtool_drvinfo *info)
+static void ioc3_get_drvinfo(struct net_device *dev,
+			     struct ethtool_drvinfo *info)
 {
 	struct ioc3_private *ip = netdev_priv(dev);
 
@@ -1594,8 +1578,9 @@ static void ioc3_set_multicast_list(struct net_device *dev)
 		if ((dev->flags & IFF_ALLMULTI) ||
 		    (netdev_mc_count(dev) > 64)) {
 			/* Too many for hashing to make sense or we want all
-			   multicast packets anyway,  so skip computing all the
-			   hashes and just accept all packets.  */
+			 * multicast packets anyway,  so skip computing all the
+			 * hashes and just accept all packets.
+			 */
 			ip->ehar_h = 0xffffffff;
 			ip->ehar_l = 0xffffffff;
 		} else {
-- 
2.13.7


^ permalink raw reply related

* [PATCH v3 net-next 01/15] MIPS: SGI-IP27: remove ioc3 ethernet init
From: Thomas Bogendoerfer @ 2019-08-30  9:25 UTC (permalink / raw)
  To: Ralf Baechle, Paul Burton, James Hogan, David S. Miller,
	linux-mips, linux-kernel, netdev
In-Reply-To: <20190830092539.24550-1-tbogendoerfer@suse.de>

Removed not needed disabling of ethernet interrupts in IP27 platform code.

Acked-by: Paul Burton <paul.burton@mips.com>
Signed-off-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
---
 arch/mips/sgi-ip27/ip27-init.c | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/arch/mips/sgi-ip27/ip27-init.c b/arch/mips/sgi-ip27/ip27-init.c
index 066b33f50bcc..59d5375c9021 100644
--- a/arch/mips/sgi-ip27/ip27-init.c
+++ b/arch/mips/sgi-ip27/ip27-init.c
@@ -130,17 +130,6 @@ cnodeid_t get_compact_nodeid(void)
 	return NASID_TO_COMPACT_NODEID(get_nasid());
 }
 
-static inline void ioc3_eth_init(void)
-{
-	struct ioc3 *ioc3;
-	nasid_t nid;
-
-	nid = get_nasid();
-	ioc3 = (struct ioc3 *) KL_CONFIG_CH_CONS_INFO(nid)->memory_base;
-
-	ioc3->eier = 0;
-}
-
 extern void ip27_reboot_setup(void);
 
 void __init plat_mem_setup(void)
@@ -182,8 +171,6 @@ void __init plat_mem_setup(void)
 		panic("Kernel compiled for N mode.");
 #endif
 
-	ioc3_eth_init();
-
 	ioport_resource.start = 0;
 	ioport_resource.end = ~0UL;
 	set_io_port_base(IO_BASE);
-- 
2.13.7


^ permalink raw reply related

* [PATCH v3 net-next 07/15] net: sgi: ioc3-eth: separate tx and rx ring handling
From: Thomas Bogendoerfer @ 2019-08-30  9:25 UTC (permalink / raw)
  To: Ralf Baechle, Paul Burton, James Hogan, David S. Miller,
	linux-mips, linux-kernel, netdev
In-Reply-To: <20190830092539.24550-1-tbogendoerfer@suse.de>

After allocation of descriptor memory is now done once in probe
handling of tx ring is completely done by ioc3_clean_tx_ring. So
we remove the remaining tx ring actions out of ioc3_alloc_rings
and ioc3_free_rings and rename it to ioc3_[alloc|free]_rx_bufs
to better describe what they are doing.

Signed-off-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
---
 drivers/net/ethernet/sgi/ioc3-eth.c | 20 ++++++++------------
 1 file changed, 8 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/sgi/ioc3-eth.c b/drivers/net/ethernet/sgi/ioc3-eth.c
index fe77e4d7732f..29b9e5098052 100644
--- a/drivers/net/ethernet/sgi/ioc3-eth.c
+++ b/drivers/net/ethernet/sgi/ioc3-eth.c
@@ -778,12 +778,10 @@ static inline void ioc3_clean_tx_ring(struct ioc3_private *ip)
 	ip->tx_ci = 0;
 }
 
-static void ioc3_free_rings(struct ioc3_private *ip)
+static void ioc3_free_rx_bufs(struct ioc3_private *ip)
 {
 	int rx_entry, n_entry;
 
-	ioc3_clean_tx_ring(ip);
-
 	n_entry = ip->rx_ci;
 	rx_entry = ip->rx_pi;
 
@@ -794,7 +792,7 @@ static void ioc3_free_rings(struct ioc3_private *ip)
 	}
 }
 
-static void ioc3_alloc_rings(struct net_device *dev)
+static void ioc3_alloc_rx_bufs(struct net_device *dev)
 {
 	struct ioc3_private *ip = netdev_priv(dev);
 	struct ioc3_erxbuf *rxb;
@@ -824,9 +822,6 @@ static void ioc3_alloc_rings(struct net_device *dev)
 	}
 	ip->rx_ci = 0;
 	ip->rx_pi = RX_BUFFS;
-
-	ip->tx_pi = 0;
-	ip->tx_ci = 0;
 }
 
 static void ioc3_init_rings(struct net_device *dev)
@@ -835,8 +830,8 @@ static void ioc3_init_rings(struct net_device *dev)
 	struct ioc3_ethregs *regs = ip->regs;
 	unsigned long ring;
 
-	ioc3_free_rings(ip);
-	ioc3_alloc_rings(dev);
+	ioc3_free_rx_bufs(ip);
+	ioc3_alloc_rx_bufs(dev);
 
 	ioc3_clean_tx_ring(ip);
 
@@ -962,7 +957,9 @@ static int ioc3_close(struct net_device *dev)
 	ioc3_stop(ip);
 	free_irq(dev->irq, dev);
 
-	ioc3_free_rings(ip);
+	ioc3_free_rx_bufs(ip);
+	ioc3_clean_tx_ring(ip);
+
 	return 0;
 }
 
@@ -1263,12 +1260,11 @@ static int ioc3_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 out_stop:
 	ioc3_stop(ip);
 	del_timer_sync(&ip->ioc3_timer);
-	ioc3_free_rings(ip);
+	ioc3_free_rx_bufs(ip);
 	if (ip->rxr)
 		free_page((unsigned long)ip->rxr);
 	if (ip->txr)
 		free_pages((unsigned long)ip->txr, 2);
-	kfree(ip->txr);
 out_res:
 	pci_release_regions(pdev);
 out_free:
-- 
2.13.7


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox