Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH v7 net-next 3/6] samples: bpf: add userspace example for modifying sk_bound_dev_if
From: David Ahern @ 2016-12-01 16:48 UTC (permalink / raw)
  To: netdev; +Cc: daniel, ast, daniel, maheshb, tgraf, David Ahern
In-Reply-To: <1480610888-31082-1-git-send-email-dsa@cumulusnetworks.com>

Add a simple program to demonstrate the ability to attach a bpf program
to a cgroup that sets sk_bound_dev_if for AF_INET{6} sockets when they
are created.

Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
---
v7
- no change

v6
- added conversion from device name to index in test program

v5
- changed BPF_CGROUP_INET_SOCK to BPF_CGROUP_INET_SOCK_CREATE

v4
- added test_cgrp2_sock.sh for an automated test

v3
- revert to BPF_PROG_TYPE_CGROUP_SOCK prog type

v2
- removed bpf_sock_store_u32 references
- changed BPF_CGROUP_INET_SOCK_CREATE to BPF_CGROUP_INET_SOCK
- remove BPF_PROG_TYPE_CGROUP_SOCK prog type and add prog_subtype

 samples/bpf/Makefile           |  2 +
 samples/bpf/test_cgrp2_sock.c  | 83 ++++++++++++++++++++++++++++++++++++++++++
 samples/bpf/test_cgrp2_sock.sh | 47 ++++++++++++++++++++++++
 3 files changed, 132 insertions(+)
 create mode 100644 samples/bpf/test_cgrp2_sock.c
 create mode 100755 samples/bpf/test_cgrp2_sock.sh

diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 3ceb5a9d86df..a335b218198e 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -23,6 +23,7 @@ hostprogs-y += map_perf_test
 hostprogs-y += test_overhead
 hostprogs-y += test_cgrp2_array_pin
 hostprogs-y += test_cgrp2_attach
+hostprogs-y += test_cgrp2_sock
 hostprogs-y += xdp1
 hostprogs-y += xdp2
 hostprogs-y += test_current_task_under_cgroup
@@ -51,6 +52,7 @@ map_perf_test-objs := bpf_load.o libbpf.o map_perf_test_user.o
 test_overhead-objs := bpf_load.o libbpf.o test_overhead_user.o
 test_cgrp2_array_pin-objs := libbpf.o test_cgrp2_array_pin.o
 test_cgrp2_attach-objs := libbpf.o test_cgrp2_attach.o
+test_cgrp2_sock-objs := libbpf.o test_cgrp2_sock.o
 xdp1-objs := bpf_load.o libbpf.o xdp1_user.o
 # reuse xdp1 source intentionally
 xdp2-objs := bpf_load.o libbpf.o xdp1_user.o
diff --git a/samples/bpf/test_cgrp2_sock.c b/samples/bpf/test_cgrp2_sock.c
new file mode 100644
index 000000000000..d467b3c1c55c
--- /dev/null
+++ b/samples/bpf/test_cgrp2_sock.c
@@ -0,0 +1,83 @@
+/* eBPF example program:
+ *
+ * - Loads eBPF program
+ *
+ *   The eBPF program sets the sk_bound_dev_if index in new AF_INET{6}
+ *   sockets opened by processes in the cgroup.
+ *
+ * - Attaches the new program to a cgroup using BPF_PROG_ATTACH
+ */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <string.h>
+#include <unistd.h>
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <net/if.h>
+#include <linux/bpf.h>
+
+#include "libbpf.h"
+
+static int prog_load(int idx)
+{
+	struct bpf_insn prog[] = {
+		BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+		BPF_MOV64_IMM(BPF_REG_3, idx),
+		BPF_MOV64_IMM(BPF_REG_2, offsetof(struct bpf_sock, bound_dev_if)),
+		BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, offsetof(struct bpf_sock, bound_dev_if)),
+		BPF_MOV64_IMM(BPF_REG_0, 1), /* r0 = verdict */
+		BPF_EXIT_INSN(),
+	};
+
+	return bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, prog, sizeof(prog),
+			     "GPL", 0);
+}
+
+static int usage(const char *argv0)
+{
+	printf("Usage: %s cg-path device-index\n", argv0);
+	return EXIT_FAILURE;
+}
+
+int main(int argc, char **argv)
+{
+	int cg_fd, prog_fd, ret;
+	unsigned int idx;
+
+	if (argc < 2)
+		return usage(argv[0]);
+
+	idx = if_nametoindex(argv[2]);
+	if (!idx) {
+		printf("Invalid device name\n");
+		return EXIT_FAILURE;
+	}
+
+	cg_fd = open(argv[1], O_DIRECTORY | O_RDONLY);
+	if (cg_fd < 0) {
+		printf("Failed to open cgroup path: '%s'\n", strerror(errno));
+		return EXIT_FAILURE;
+	}
+
+	prog_fd = prog_load(idx);
+	printf("Output from kernel verifier:\n%s\n-------\n", bpf_log_buf);
+
+	if (prog_fd < 0) {
+		printf("Failed to load prog: '%s'\n", strerror(errno));
+		return EXIT_FAILURE;
+	}
+
+	ret = bpf_prog_attach(prog_fd, cg_fd, BPF_CGROUP_INET_SOCK_CREATE);
+	if (ret < 0) {
+		printf("Failed to attach prog to cgroup: '%s'\n",
+		       strerror(errno));
+		return EXIT_FAILURE;
+	}
+
+	return EXIT_SUCCESS;
+}
diff --git a/samples/bpf/test_cgrp2_sock.sh b/samples/bpf/test_cgrp2_sock.sh
new file mode 100755
index 000000000000..925fd467c7cc
--- /dev/null
+++ b/samples/bpf/test_cgrp2_sock.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+
+function config_device {
+	ip netns add at_ns0
+	ip link add veth0 type veth peer name veth0b
+	ip link set veth0b up
+	ip link set veth0 netns at_ns0
+	ip netns exec at_ns0 ip addr add 172.16.1.100/24 dev veth0
+	ip netns exec at_ns0 ip addr add 2401:db00::1/64 dev veth0 nodad
+	ip netns exec at_ns0 ip link set dev veth0 up
+	ip link add foo type vrf table 1234
+	ip link set foo up
+	ip addr add 172.16.1.101/24 dev veth0b
+	ip addr add 2401:db00::2/64 dev veth0b nodad
+	ip link set veth0b master foo
+}
+
+function attach_bpf {
+	rm -rf /tmp/cgroupv2
+	mkdir -p /tmp/cgroupv2
+	mount -t cgroup2 none /tmp/cgroupv2
+	mkdir -p /tmp/cgroupv2/foo
+	test_cgrp2_sock /tmp/cgroupv2/foo foo
+	echo $$ >> /tmp/cgroupv2/foo/cgroup.procs
+}
+
+function cleanup {
+	set +ex
+	ip netns delete at_ns0
+	ip link del veth0
+	ip link del foo
+	umount /tmp/cgroupv2
+	rm -rf /tmp/cgroupv2
+	set -ex
+}
+
+function do_test {
+	ping -c1 -w1 172.16.1.100
+	ping6 -c1 -w1 2401:db00::1
+}
+
+cleanup 2>/dev/null
+config_device
+attach_bpf
+do_test
+cleanup
+echo "*** PASS ***"
-- 
2.1.4

^ permalink raw reply related

* [PATCH v7 net-next 4/6] bpf: Add support for reading socket family, type, protocol
From: David Ahern @ 2016-12-01 16:48 UTC (permalink / raw)
  To: netdev; +Cc: daniel, ast, daniel, maheshb, tgraf, David Ahern
In-Reply-To: <1480610888-31082-1-git-send-email-dsa@cumulusnetworks.com>

Add socket family, type and protocol to bpf_sock allowing bpf programs
read-only access.

Add __sk_flags_offset[0] to struct sock before the bitfield to
programmtically determine the offset of the unsigned int containing
protocol and type.

Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
---
v7
- remove convert_sock_access helper and put code inline

v6
- new patch for version 6 of set

 include/net/sock.h       | 15 +++++++++++++++
 include/uapi/linux/bpf.h |  3 +++
 net/core/filter.c        | 21 +++++++++++++++++++++
 3 files changed, 39 insertions(+)

diff --git a/include/net/sock.h b/include/net/sock.h
index 442cbb118a07..69afda6bea15 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -389,6 +389,21 @@ struct sock {
 	 * Because of non atomicity rules, all
 	 * changes are protected by socket lock.
 	 */
+	unsigned int		__sk_flags_offset[0];
+#ifdef __BIG_ENDIAN_BITFIELD
+#define SK_FL_PROTO_SHIFT  16
+#define SK_FL_PROTO_MASK   0x00ff0000
+
+#define SK_FL_TYPE_SHIFT   0
+#define SK_FL_TYPE_MASK    0x0000ffff
+#else
+#define SK_FL_PROTO_SHIFT  8
+#define SK_FL_PROTO_MASK   0x0000ff00
+
+#define SK_FL_TYPE_SHIFT   16
+#define SK_FL_TYPE_MASK    0xffff0000
+#endif
+
 	kmemcheck_bitfield_begin(flags);
 	unsigned int		sk_padding : 2,
 				sk_no_check_tx : 1,
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 75964e00d947..b47ffd117fd6 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -541,6 +541,9 @@ struct bpf_tunnel_key {
 
 struct bpf_sock {
 	__u32 bound_dev_if;
+	__u32 family;
+	__u32 type;
+	__u32 protocol;
 };
 
 /* User return codes for XDP prog type.
diff --git a/net/core/filter.c b/net/core/filter.c
index 5ee722dc097d..efcc22b44ec1 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2979,6 +2979,27 @@ static u32 sock_filter_convert_ctx_access(enum bpf_access_type type,
 			*insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
 				      offsetof(struct sock, sk_bound_dev_if));
 		break;
+
+	case offsetof(struct bpf_sock, family):
+		BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_family) != 2);
+
+		*insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
+				      offsetof(struct sock, sk_family));
+		break;
+
+	case offsetof(struct bpf_sock, type):
+		*insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
+				      offsetof(struct sock, __sk_flags_offset));
+		*insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, SK_FL_TYPE_MASK);
+		*insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, SK_FL_TYPE_SHIFT);
+		break;
+
+	case offsetof(struct bpf_sock, protocol):
+		*insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
+				      offsetof(struct sock, __sk_flags_offset));
+		*insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, SK_FL_PROTO_MASK);
+		*insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, SK_FL_PROTO_SHIFT);
+		break;
 	}
 
 	return insn - insn_buf;
-- 
2.1.4

^ permalink raw reply related

* [PATCH v7 net-next 6/6] samples/bpf: add userspace example for prohibiting sockets
From: David Ahern @ 2016-12-01 16:48 UTC (permalink / raw)
  To: netdev; +Cc: daniel, ast, daniel, maheshb, tgraf, David Ahern
In-Reply-To: <1480610888-31082-1-git-send-email-dsa@cumulusnetworks.com>

Add examples preventing a process in a cgroup from opening a socket
based family, protocol and type.

Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
---
v7
- fix header file includes to use symbolic names in sock_flags_kern.c

v6
- new patch for version 6

 samples/bpf/Makefile            |  4 ++
 samples/bpf/sock_flags_kern.c   | 44 ++++++++++++++++++++++
 samples/bpf/test_cgrp2_sock2.c  | 66 +++++++++++++++++++++++++++++++++
 samples/bpf/test_cgrp2_sock2.sh | 81 +++++++++++++++++++++++++++++++++++++++++
 4 files changed, 195 insertions(+)
 create mode 100644 samples/bpf/sock_flags_kern.c
 create mode 100644 samples/bpf/test_cgrp2_sock2.c
 create mode 100755 samples/bpf/test_cgrp2_sock2.sh

diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index a335b218198e..8df12f9429dc 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -24,6 +24,7 @@ hostprogs-y += test_overhead
 hostprogs-y += test_cgrp2_array_pin
 hostprogs-y += test_cgrp2_attach
 hostprogs-y += test_cgrp2_sock
+hostprogs-y += test_cgrp2_sock2
 hostprogs-y += xdp1
 hostprogs-y += xdp2
 hostprogs-y += test_current_task_under_cgroup
@@ -53,6 +54,7 @@ test_overhead-objs := bpf_load.o libbpf.o test_overhead_user.o
 test_cgrp2_array_pin-objs := libbpf.o test_cgrp2_array_pin.o
 test_cgrp2_attach-objs := libbpf.o test_cgrp2_attach.o
 test_cgrp2_sock-objs := libbpf.o test_cgrp2_sock.o
+test_cgrp2_sock2-objs := bpf_load.o libbpf.o test_cgrp2_sock2.o
 xdp1-objs := bpf_load.o libbpf.o xdp1_user.o
 # reuse xdp1 source intentionally
 xdp2-objs := bpf_load.o libbpf.o xdp1_user.o
@@ -73,6 +75,7 @@ always += tracex3_kern.o
 always += tracex4_kern.o
 always += tracex5_kern.o
 always += tracex6_kern.o
+always += sock_flags_kern.o
 always += test_probe_write_user_kern.o
 always += trace_output_kern.o
 always += tcbpf1_kern.o
@@ -106,6 +109,7 @@ HOSTLOADLIBES_tracex3 += -lelf
 HOSTLOADLIBES_tracex4 += -lelf -lrt
 HOSTLOADLIBES_tracex5 += -lelf
 HOSTLOADLIBES_tracex6 += -lelf
+HOSTLOADLIBES_test_cgrp2_sock2 += -lelf
 HOSTLOADLIBES_test_probe_write_user += -lelf
 HOSTLOADLIBES_trace_output += -lelf -lrt
 HOSTLOADLIBES_lathist += -lelf
diff --git a/samples/bpf/sock_flags_kern.c b/samples/bpf/sock_flags_kern.c
new file mode 100644
index 000000000000..533dd11a6baa
--- /dev/null
+++ b/samples/bpf/sock_flags_kern.c
@@ -0,0 +1,44 @@
+#include <uapi/linux/bpf.h>
+#include <linux/socket.h>
+#include <linux/net.h>
+#include <uapi/linux/in.h>
+#include <uapi/linux/in6.h>
+#include "bpf_helpers.h"
+
+SEC("cgroup/sock1")
+int bpf_prog1(struct bpf_sock *sk)
+{
+	char fmt[] = "socket: family %d type %d protocol %d\n";
+
+	bpf_trace_printk(fmt, sizeof(fmt), sk->family, sk->type, sk->protocol);
+
+	/* block PF_INET6, SOCK_RAW, IPPROTO_ICMPV6 sockets
+	 * ie., make ping6 fail
+	 */
+	if (sk->family == PF_INET6 &&
+	    sk->type == SOCK_RAW   &&
+	    sk->protocol == IPPROTO_ICMPV6)
+		return 0;
+
+	return 1;
+}
+
+SEC("cgroup/sock2")
+int bpf_prog2(struct bpf_sock *sk)
+{
+	char fmt[] = "socket: family %d type %d protocol %d\n";
+
+	bpf_trace_printk(fmt, sizeof(fmt), sk->family, sk->type, sk->protocol);
+
+	/* block PF_INET, SOCK_RAW, IPPROTO_ICMP sockets
+	 * ie., make ping fail
+	 */
+	if (sk->family == PF_INET &&
+	    sk->type == SOCK_RAW  &&
+	    sk->protocol == IPPROTO_ICMP)
+		return 0;
+
+	return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/test_cgrp2_sock2.c b/samples/bpf/test_cgrp2_sock2.c
new file mode 100644
index 000000000000..455ef0d06e93
--- /dev/null
+++ b/samples/bpf/test_cgrp2_sock2.c
@@ -0,0 +1,66 @@
+/* eBPF example program:
+ *
+ * - Loads eBPF program
+ *
+ *   The eBPF program loads a filter from file and attaches the
+ *   program to a cgroup using BPF_PROG_ATTACH
+ */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <string.h>
+#include <unistd.h>
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <net/if.h>
+#include <linux/bpf.h>
+
+#include "libbpf.h"
+#include "bpf_load.h"
+
+static int usage(const char *argv0)
+{
+	printf("Usage: %s cg-path filter-path [filter-id]\n", argv0);
+	return EXIT_FAILURE;
+}
+
+int main(int argc, char **argv)
+{
+	int cg_fd, ret, filter_id = 0;
+
+	if (argc < 3)
+		return usage(argv[0]);
+
+	cg_fd = open(argv[1], O_DIRECTORY | O_RDONLY);
+	if (cg_fd < 0) {
+		printf("Failed to open cgroup path: '%s'\n", strerror(errno));
+		return EXIT_FAILURE;
+	}
+
+	if (load_bpf_file(argv[2]))
+		return EXIT_FAILURE;
+
+	printf("Output from kernel verifier:\n%s\n-------\n", bpf_log_buf);
+
+	if (argc > 3)
+		filter_id = atoi(argv[3]);
+
+	if (filter_id > prog_cnt) {
+		printf("Invalid program id; program not found in file\n");
+		return EXIT_FAILURE;
+	}
+
+	ret = bpf_prog_attach(prog_fd[filter_id], cg_fd,
+			      BPF_CGROUP_INET_SOCK_CREATE);
+	if (ret < 0) {
+		printf("Failed to attach prog to cgroup: '%s'\n",
+		       strerror(errno));
+		return EXIT_FAILURE;
+	}
+
+	return EXIT_SUCCESS;
+}
diff --git a/samples/bpf/test_cgrp2_sock2.sh b/samples/bpf/test_cgrp2_sock2.sh
new file mode 100755
index 000000000000..891f12a0e26f
--- /dev/null
+++ b/samples/bpf/test_cgrp2_sock2.sh
@@ -0,0 +1,81 @@
+#!/bin/bash
+
+function config_device {
+	ip netns add at_ns0
+	ip link add veth0 type veth peer name veth0b
+	ip link set veth0b up
+	ip link set veth0 netns at_ns0
+	ip netns exec at_ns0 ip addr add 172.16.1.100/24 dev veth0
+	ip netns exec at_ns0 ip addr add 2401:db00::1/64 dev veth0 nodad
+	ip netns exec at_ns0 ip link set dev veth0 up
+	ip addr add 172.16.1.101/24 dev veth0b
+	ip addr add 2401:db00::2/64 dev veth0b nodad
+}
+
+function config_cgroup {
+	rm -rf /tmp/cgroupv2
+	mkdir -p /tmp/cgroupv2
+	mount -t cgroup2 none /tmp/cgroupv2
+	mkdir -p /tmp/cgroupv2/foo
+	echo $$ >> /tmp/cgroupv2/foo/cgroup.procs
+}
+
+
+function attach_bpf {
+	test_cgrp2_sock2 /tmp/cgroupv2/foo sock_flags_kern.o $1
+	[ $? -ne 0 ] && exit 1
+}
+
+function cleanup {
+	ip link del veth0b
+	ip netns delete at_ns0
+	umount /tmp/cgroupv2
+	rm -rf /tmp/cgroupv2
+}
+
+cleanup 2>/dev/null
+
+set -e
+config_device
+config_cgroup
+set +e
+
+#
+# Test 1 - fail ping6
+#
+attach_bpf 0
+ping -c1 -w1 172.16.1.100
+if [ $? -ne 0 ]; then
+	echo "ping failed when it should succeed"
+	cleanup
+	exit 1
+fi
+
+ping6 -c1 -w1 2401:db00::1
+if [ $? -eq 0 ]; then
+	echo "ping6 succeeded when it should not"
+	cleanup
+	exit 1
+fi
+
+#
+# Test 2 - fail ping
+#
+attach_bpf 1
+ping6 -c1 -w1 2401:db00::1
+if [ $? -ne 0 ]; then
+	echo "ping6 failed when it should succeed"
+	cleanup
+	exit 1
+fi
+
+ping -c1 -w1 172.16.1.100
+if [ $? -eq 0 ]; then
+	echo "ping succeeded when it should not"
+	cleanup
+	exit 1
+fi
+
+cleanup
+echo
+echo "*** PASS ***"
-- 
2.1.4

^ permalink raw reply related

* [PATCH v7 net-next 5/6] samples/bpf: Update bpf loader for cgroup section names
From: David Ahern @ 2016-12-01 16:48 UTC (permalink / raw)
  To: netdev; +Cc: daniel, ast, daniel, maheshb, tgraf, David Ahern
In-Reply-To: <1480610888-31082-1-git-send-email-dsa@cumulusnetworks.com>

Add support for section names starting with cgroup/skb and cgroup/sock.

Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
---
v7
- no change

v6
- new patch for version 6

 samples/bpf/bpf_load.c | 14 +++++++++++---
 samples/bpf/bpf_load.h |  1 +
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c
index 62f54d6eb8bf..49b45ccbe153 100644
--- a/samples/bpf/bpf_load.c
+++ b/samples/bpf/bpf_load.c
@@ -52,6 +52,8 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
 	bool is_tracepoint = strncmp(event, "tracepoint/", 11) == 0;
 	bool is_xdp = strncmp(event, "xdp", 3) == 0;
 	bool is_perf_event = strncmp(event, "perf_event", 10) == 0;
+	bool is_cgroup_skb = strncmp(event, "cgroup/skb", 10) == 0;
+	bool is_cgroup_sk = strncmp(event, "cgroup/sock", 11) == 0;
 	enum bpf_prog_type prog_type;
 	char buf[256];
 	int fd, efd, err, id;
@@ -72,6 +74,10 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
 		prog_type = BPF_PROG_TYPE_XDP;
 	} else if (is_perf_event) {
 		prog_type = BPF_PROG_TYPE_PERF_EVENT;
+	} else if (is_cgroup_skb) {
+		prog_type = BPF_PROG_TYPE_CGROUP_SKB;
+	} else if (is_cgroup_sk) {
+		prog_type = BPF_PROG_TYPE_CGROUP_SOCK;
 	} else {
 		printf("Unknown event '%s'\n", event);
 		return -1;
@@ -85,7 +91,7 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
 
 	prog_fd[prog_cnt++] = fd;
 
-	if (is_xdp || is_perf_event)
+	if (is_xdp || is_perf_event || is_cgroup_skb || is_cgroup_sk)
 		return 0;
 
 	if (is_socket) {
@@ -334,7 +340,8 @@ int load_bpf_file(char *path)
 			    memcmp(shname_prog, "tracepoint/", 11) == 0 ||
 			    memcmp(shname_prog, "xdp", 3) == 0 ||
 			    memcmp(shname_prog, "perf_event", 10) == 0 ||
-			    memcmp(shname_prog, "socket", 6) == 0)
+			    memcmp(shname_prog, "socket", 6) == 0 ||
+			    memcmp(shname_prog, "cgroup/", 7) == 0)
 				load_and_attach(shname_prog, insns, data_prog->d_size);
 		}
 	}
@@ -353,7 +360,8 @@ int load_bpf_file(char *path)
 		    memcmp(shname, "tracepoint/", 11) == 0 ||
 		    memcmp(shname, "xdp", 3) == 0 ||
 		    memcmp(shname, "perf_event", 10) == 0 ||
-		    memcmp(shname, "socket", 6) == 0)
+		    memcmp(shname, "socket", 6) == 0 ||
+		    memcmp(shname, "cgroup/", 7) == 0)
 			load_and_attach(shname, data->d_buf, data->d_size);
 	}
 
diff --git a/samples/bpf/bpf_load.h b/samples/bpf/bpf_load.h
index dfa57fe65c8e..4adeeef53ad6 100644
--- a/samples/bpf/bpf_load.h
+++ b/samples/bpf/bpf_load.h
@@ -7,6 +7,7 @@
 extern int map_fd[MAX_MAPS];
 extern int prog_fd[MAX_PROGS];
 extern int event_fd[MAX_PROGS];
+extern int prog_cnt;
 
 /* parses elf file compiled by llvm .c->.o
  * . parses 'maps' section and creates maps via BPF syscall
-- 
2.1.4

^ permalink raw reply related

* Re: DSA vs. SWTICHDEV ?
From: Murali Karicheri @ 2016-12-01 16:50 UTC (permalink / raw)
  To: Joakim Tjernlund, netdev@vger.kernel.org, Roger Quadros,
	Grygorii Strashko
In-Reply-To: <1480495831.3563.135.camel@infinera.com>

On 11/30/2016 03:50 AM, Joakim Tjernlund wrote:
> I am trying to wrap my head around these two "devices" and have a hard time telling them apart.
> We are looking att adding a faily large switch(over PCIe) to our board and from what I can tell
> switchdev is the new way to do it but DSA is still there. Is it possible to just list
> how they differ?
> 
> What can switchdev do that DSA cannot?
> 
> What can DSA do that switchdev cannot?
> 
> 
> Can one enable switchdev and dsa for the same switch device?
> 
>  Jocke 
> 

DSA/Switchdev experts,

Nice to see this discussion as I am trying to evaluat what model
works best for our hardware. From my evaluation so far, DSA can be
used even though there is no tag protocol used between the Host and
Switch. In our hardware, the Host and Switch are part of the SoC.
The Host interface is a shared memory with queues implemented at
hardware. The Phy is attached to the mii ports externally on the board.
Also this hardware is programmable through firmware. More details 
can be seen at http://processors.wiki.ti.com/index.php/PRU-ICSS
PRU can run a firmware to configure the hardware in one of the following:-

1. EMAC mode where it appears as two Ethernet ports
2. Switch mode where it implements a simple Ethernet switch. Currently
   it doesn't have address learning capability, but in future it
   can.
3. Switch with HSR/PRP offload where it provides HSR/PRP protocol
   support and cut through switch.

So a device need to function in one of the modes. A a regular Ethernet
driver that provides two network devices, one per port, and switchdev
for each physical port (in switch mode) will look ideal in this case.
This will allow attaching the associated interfaces to a bridge (where
a L2 offload is possible in the future). This also helps to attach the
interfaces to an HSR device at the top layer like bridge to support
HSR/PRP protocol with offload possible to the PRU Switch.

Using a DSA for this appears to be adding more complexity to the driver
model and may not be ideal. What do you think? 

-- 
Murali Karicheri
Linux Kernel, Keystone

^ permalink raw reply

* Re: [flamebait] xdp, well meaning but pointless
From: Florian Westphal @ 2016-12-01 16:51 UTC (permalink / raw)
  To: David Miller; +Cc: tgraf, fw, netdev
In-Reply-To: <20161201.111947.888676978252329124.davem@davemloft.net>

David Miller <davem@davemloft.net> wrote:
> Saying that ntuple filters can handle the early drop use case doesn't
> take into consideration the nature of the tables (hundreds of
> thousands of "evil" IP addresses),

Thats not what I said.

But Ok, message received. I rest my case.

^ permalink raw reply

* Re: [PATCH v7 net-next 1/6] bpf: Refactor cgroups code in prep for new type
From: Alexei Starovoitov @ 2016-12-01 16:56 UTC (permalink / raw)
  To: David Ahern; +Cc: netdev, daniel, ast, daniel, maheshb, tgraf
In-Reply-To: <1480610888-31082-2-git-send-email-dsa@cumulusnetworks.com>

On Thu, Dec 01, 2016 at 08:48:03AM -0800, David Ahern wrote:
> Code move and rename only; no functional change intended.
> 
> Signed-off-by: David Ahern <dsa@cumulusnetworks.com>

Acked-by: Alexei Starovoitov <ast@kernel.org>

^ permalink raw reply

* Re: [PATCH v7 net-next 4/6] bpf: Add support for reading socket family, type, protocol
From: Alexei Starovoitov @ 2016-12-01 16:57 UTC (permalink / raw)
  To: David Ahern; +Cc: netdev, daniel, ast, daniel, maheshb, tgraf
In-Reply-To: <1480610888-31082-5-git-send-email-dsa@cumulusnetworks.com>

On Thu, Dec 01, 2016 at 08:48:06AM -0800, David Ahern wrote:
> Add socket family, type and protocol to bpf_sock allowing bpf programs
> read-only access.
> 
> Add __sk_flags_offset[0] to struct sock before the bitfield to
> programmtically determine the offset of the unsigned int containing
> protocol and type.
> 
> Signed-off-by: David Ahern <dsa@cumulusnetworks.com>

Acked-by: Alexei Starovoitov <ast@kernel.org>

^ permalink raw reply

* Re: [PATCH v7 net-next 3/6] samples: bpf: add userspace example for modifying sk_bound_dev_if
From: Alexei Starovoitov @ 2016-12-01 16:57 UTC (permalink / raw)
  To: David Ahern; +Cc: netdev, daniel, ast, daniel, maheshb, tgraf
In-Reply-To: <1480610888-31082-4-git-send-email-dsa@cumulusnetworks.com>

On Thu, Dec 01, 2016 at 08:48:05AM -0800, David Ahern wrote:
> Add a simple program to demonstrate the ability to attach a bpf program
> to a cgroup that sets sk_bound_dev_if for AF_INET{6} sockets when they
> are created.
> 
> Signed-off-by: David Ahern <dsa@cumulusnetworks.com>

Acked-by: Alexei Starovoitov <ast@kernel.org>

^ permalink raw reply

* Re: [PATCH v7 net-next 5/6] samples/bpf: Update bpf loader for cgroup section names
From: Alexei Starovoitov @ 2016-12-01 16:58 UTC (permalink / raw)
  To: David Ahern; +Cc: netdev, daniel, ast, daniel, maheshb, tgraf
In-Reply-To: <1480610888-31082-6-git-send-email-dsa@cumulusnetworks.com>

On Thu, Dec 01, 2016 at 08:48:07AM -0800, David Ahern wrote:
> Add support for section names starting with cgroup/skb and cgroup/sock.
> 
> Signed-off-by: David Ahern <dsa@cumulusnetworks.com>

Acked-by: Alexei Starovoitov <ast@kernel.org>

^ permalink raw reply

* Re: [PATCH v7 net-next 6/6] samples/bpf: add userspace example for prohibiting sockets
From: Alexei Starovoitov @ 2016-12-01 16:58 UTC (permalink / raw)
  To: David Ahern; +Cc: netdev, daniel, ast, daniel, maheshb, tgraf
In-Reply-To: <1480610888-31082-7-git-send-email-dsa@cumulusnetworks.com>

On Thu, Dec 01, 2016 at 08:48:08AM -0800, David Ahern wrote:
> Add examples preventing a process in a cgroup from opening a socket
> based family, protocol and type.
> 
> Signed-off-by: David Ahern <dsa@cumulusnetworks.com>

Acked-by: Alexei Starovoitov <ast@kernel.org>

^ permalink raw reply

* RE: [PATCH V2 net-next] net: hns: Fix to conditionally convey RX checksum flag to stack
From: Salil Mehta @ 2016-12-01 16:59 UTC (permalink / raw)
  To: David Miller
  Cc: Zhuangyuzeng (Yisen), mehta.salil.lnk@gmail.com,
	netdev@vger.kernel.org, linux-kernel@vger.kernel.org, Linuxarm
In-Reply-To: <20161130.142539.1927956259851457047.davem@davemloft.net>

> -----Original Message-----
> From: Salil Mehta
> Sent: Thursday, December 01, 2016 12:09 PM
> To: 'David Miller'
> Cc: Zhuangyuzeng (Yisen); mehta.salil.lnk@gmail.com;
> netdev@vger.kernel.org; linux-kernel@vger.kernel.org; Linuxarm
> Subject: RE: [PATCH V2 net-next] net: hns: Fix to conditionally convey
> RX checksum flag to stack
> 
> > -----Original Message-----
> > From: David Miller [mailto:davem@davemloft.net]
> > Sent: Wednesday, November 30, 2016 7:26 PM
> > To: Salil Mehta
> > Cc: Zhuangyuzeng (Yisen); mehta.salil.lnk@gmail.com;
> > netdev@vger.kernel.org; linux-kernel@vger.kernel.org; Linuxarm
> > Subject: Re: [PATCH V2 net-next] net: hns: Fix to conditionally
> convey
> > RX checksum flag to stack
> >
> > From: Salil Mehta <salil.mehta@huawei.com>
> > Date: Tue, 29 Nov 2016 13:09:45 +0000
> >
> > > +	/* We only support checksum for IPv4,UDP(over IPv4 or IPv6),
> > TCP(over
> > > +	 * IPv4 or IPv6) and SCTP but we support many L3(IPv4, IPv6,
> > MPLS,
> > > +	 * PPPoE etc) and L4(TCP, UDP, GRE, SCTP, IGMP, ICMP etc.)
> > protocols.
> > > +	 * We want to filter out L3 and L4 protocols early on for which
> > checksum
> > > +	 * is not supported.
> >  ...
> > > +	 */
> > > +	l3id = hnae_get_field(flag, HNS_RXD_L3ID_M, HNS_RXD_L3ID_S);
> > > +	l4id = hnae_get_field(flag, HNS_RXD_L4ID_M, HNS_RXD_L4ID_S);
> > > +	if ((l3id != HNS_RX_FLAG_L3ID_IPV4) &&
> > > +	    ((l3id != HNS_RX_FLAG_L3ID_IPV6) ||
> > > +	     (l4id != HNS_RX_FLAG_L4ID_UDP)) &&
> > > +	    ((l3id != HNS_RX_FLAG_L3ID_IPV6) ||
> > > +	     (l4id != HNS_RX_FLAG_L4ID_TCP)) &&
> > > +	    (l4id != HNS_RX_FLAG_L4ID_SCTP))
> > > +		return;
> >
> > I have a hard time understanding this seemingly overcomplicated
> > check.
> >
> > It looks like if L3 is IPV4 it will accept any underlying L4
> protocol,
> > but is that what is really intended?  That doesn't match what this
> new
> > comment states.
> I agree that it is bit difficult to read. Earlier, I was banking on the
> register(mistakenly, its hardware implementation err ) to de-multiplex
> the checksum error type. The register supported indication of just IPv4
> Header Checksum Error as well (which meant it could carry any L4
> protocol).
> IPv6 does not have similar Header checksum error. Therefore, to check
> if it is just IPv4 Header checksum error or any supported L4 transport
> (UDP or TCP) error over IPv4 or IPv6 I had to bank upon above complex
> check
> 
> Below suggested solution check would have been insufficient for
> example, if packet had IPv4/IGMP and there was a checksum error in IPv4
> header.
> 
> Comment states:
> " We only support checksum for IPv4, UDP(over IPv4 or IPv6),
>   TCP(over IPv4 or IPv6) and SCTP"
>    1) Checksum of IPv4 (IPv4 header)
>    2) UDP(over IPv4 or IPv6)
>    3) TCP(over IPv4 or IPv6)
>    4) SCTP (over IPv4 or IPv6)*
> 
> (*) I should have put IPv4/IPv6 check for SCTP in the code
>     and made it clear in the comment as well?
> 
> >
> > My understanding is that the chip supports checksums for:
> >
> > 	UDP/IPV4
> > 	UDP/IPV6
> > 	TCP/IPV4
> > 	TCP/IPV6
> > 	SCTP/IPV4
> > 	SCTP/IPV6
> 
> Hardware also supports checksum of IPv4 Header.
> 
> >
> > So the simplest thing is to validate each level one at a time:
> >
> > 	if (l3 != IPV4 && l3 != IPV6)
> > 		return;
> > 	if (l4 != UDP && l4 != TCP && l4 != SCTP)
> > 		return;
> I guess above check will fail to catch cases like IPv4/IGMP, when there
> is a bad checksum in The IPv4 header.
> 
> But maybe now since we don't have any method to de-multiplex the kind
> of
> checksum error (cannot depend upon register) we can have below code
> re-arrangement:
> 
> hns_nic_rx_checksum() {
>       /* check supported L3 protocol */
> 	if (l3 != IPV4 && l3 != IPV6)
> 		return;
>       /* check if L3 protocols error */
>       if (l3e)
> 	 	return;
> 
>       /* check if the packets are fragmented */
> 	If (l3frags)
> 		Return;
> 
>       /* check supported L4 protocol */
>  	if (l4 != UDP && l4 != TCP && l4 != SCTP)
>  		return;
Hi David,
This logic might not do as well since IGMP packet with valid IP
Checksum will be bypassed here. We would want to set
CHECKSUM_UNNECESSARY for such IP packets as well.

It looks to me the cumbersome check in the PATCH V2 should
be retained.

>       /* check if any L4 protocol error */
>       if (l3e)
> 	 	return;
> 
>       /* packet with valid checksum - covey to stack */
>       skb->ip_summed = CHECKSUM_UNNECESSARY
> }

> 
> Hope I am not missing something here. Please correct my understanding
> if there is any gap here. Thanks!
> 
> Best regards
> Salil

^ permalink raw reply

* [PATCH net-next 0/3] sfc: defalconisation fixups
From: Edward Cree @ 2016-12-01 16:59 UTC (permalink / raw)
  To: linux-net-drivers, davem; +Cc: bkenward, netdev

A bug fix, the Kconfig change, and cleaning up a bit more unused code.

Edward Cree (3):
  sfc: fix debug message format string in efx_farch_handle_rx_not_ok
  sfc: don't select SFC_FALCON
  sfc: remove RESET_TYPE_RX_RECOVERY

 drivers/net/ethernet/sfc/Kconfig | 1 -
 drivers/net/ethernet/sfc/efx.c   | 1 -
 drivers/net/ethernet/sfc/enum.h  | 5 +----
 drivers/net/ethernet/sfc/farch.c | 2 +-
 4 files changed, 2 insertions(+), 7 deletions(-)

^ permalink raw reply

* Re: [PATCH v7 net-next 0/6] net: Add bpf support for sockets
From: Alexei Starovoitov @ 2016-12-01 16:59 UTC (permalink / raw)
  To: David Ahern; +Cc: netdev, daniel, ast, daniel, maheshb, tgraf
In-Reply-To: <1480610888-31082-1-git-send-email-dsa@cumulusnetworks.com>

On Thu, Dec 01, 2016 at 08:48:02AM -0800, David Ahern wrote:
> The recently added VRF support in Linux leverages the bind-to-device
> API for programs to specify an L3 domain for a socket. While
> SO_BINDTODEVICE has been around for ages, not every ipv4/ipv6 capable
> program has support for it. Even for those programs that do support it,
> the API requires processes to be started as root (CAP_NET_RAW) which
> is not desirable from a general security perspective.
> 
> This patch set leverages Daniel Mack's work to attach bpf programs to
> a cgroup to provide a capability to set sk_bound_dev_if for all
> AF_INET{6} sockets opened by a process in a cgroup when the sockets
> are allocated.
> 
> For example:
>  1. configure vrf (e.g., using ifupdown2)
>         auto eth0
>         iface eth0 inet dhcp
>             vrf mgmt
> 
>         auto mgmt
>         iface mgmt
>             vrf-table auto
> 
>  2. configure cgroup
>         mount -t cgroup2 none /tmp/cgroupv2
>         mkdir /tmp/cgroupv2/mgmt
>         test_cgrp2_sock /tmp/cgroupv2/mgmt 15
> 
>  3. set shell into cgroup (e.g., can be done at login using pam)
>         echo $$ >> /tmp/cgroupv2/mgmt/cgroup.procs
> 
> At this point all commands run in the shell (e.g, apt) have sockets
> automatically bound to the VRF (see output of ss -ap 'dev == <vrf>'),
> including processes not running as root.
> 
> This capability enables running any program in a VRF context and is key
> to deploying Management VRF, a fundamental configuration for networking
> gear, with any Linux OS installation.
> 
> This patchset also exports the socket family, type and protocol as
> read-only allowing bpf filters to deny a process in a cgroup the ability
> to open specific types of AF_INET or AF_INET6 sockets.
> 
> v7
> - comments from Alexei

Looks great.
In case you need to change something. Please keep my Acks
on patches that were kept as-is.
Thanks

^ permalink raw reply

* [PATCH net-next 1/3] sfc: fix debug message format string in efx_farch_handle_rx_not_ok
From: Edward Cree @ 2016-12-01 17:00 UTC (permalink / raw)
  To: linux-net-drivers, davem; +Cc: bkenward, netdev
In-Reply-To: <c52a0276-e379-7841-8d10-d5a834b81c4e@solarflare.com>

Defalconisation removed one of the string arguments, but missed the
 corresponding %s.

Fixes: 5a6681e22c14 ("sfc: separate out SFC4000 ("Falcon") support into new sfc-falcon driver")

Signed-off-by: Edward Cree <ecree@solarflare.com>
---
 drivers/net/ethernet/sfc/farch.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/sfc/farch.c b/drivers/net/ethernet/sfc/farch.c
index c282d66..91aa3ec 100644
--- a/drivers/net/ethernet/sfc/farch.c
+++ b/drivers/net/ethernet/sfc/farch.c
@@ -913,7 +913,7 @@ static u16 efx_farch_handle_rx_not_ok(struct efx_rx_queue *rx_queue,
 	if (rx_ev_other_err && net_ratelimit()) {
 		netif_dbg(efx, rx_err, efx->net_dev,
 			  " RX queue %d unexpected RX event "
-			  EFX_QWORD_FMT "%s%s%s%s%s%s%s%s\n",
+			  EFX_QWORD_FMT "%s%s%s%s%s%s%s\n",
 			  efx_rx_queue_index(rx_queue), EFX_QWORD_VAL(*event),
 			  rx_ev_buf_owner_id_err ? " [OWNER_ID_ERR]" : "",
 			  rx_ev_ip_hdr_chksum_err ?

^ permalink raw reply related

* [PATCH net-next 2/3] sfc: don't select SFC_FALCON
From: Edward Cree @ 2016-12-01 17:01 UTC (permalink / raw)
  To: linux-net-drivers, davem; +Cc: bkenward, netdev
In-Reply-To: <c52a0276-e379-7841-8d10-d5a834b81c4e@solarflare.com>

Easy enough for Falcon users to enable it when making oldconfig.

Signed-off-by: Edward Cree <ecree@solarflare.com>
---
 drivers/net/ethernet/sfc/Kconfig | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/sfc/Kconfig b/drivers/net/ethernet/sfc/Kconfig
index 0e16197..605ebc7 100644
--- a/drivers/net/ethernet/sfc/Kconfig
+++ b/drivers/net/ethernet/sfc/Kconfig
@@ -6,7 +6,6 @@ config SFC
 	select I2C
 	select I2C_ALGOBIT
 	select PTP_1588_CLOCK
-	select SFC_FALCON
 	---help---
 	  This driver supports 10/40-gigabit Ethernet cards based on
 	  the Solarflare SFC9000-family and SFC9100-family controllers.

^ permalink raw reply related

* [PATCH net-next 3/3] sfc: remove RESET_TYPE_RX_RECOVERY
From: Edward Cree @ 2016-12-01 17:01 UTC (permalink / raw)
  To: linux-net-drivers, davem; +Cc: bkenward, netdev
In-Reply-To: <c52a0276-e379-7841-8d10-d5a834b81c4e@solarflare.com>

It's no longer used now that Falcon is gone.

Also remove a reference in a comment to an ioctl that doesn't exist.

Signed-off-by: Edward Cree <ecree@solarflare.com>
---
 drivers/net/ethernet/sfc/efx.c  | 1 -
 drivers/net/ethernet/sfc/enum.h | 5 +----
 2 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index 4a234a8..da7028d 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -82,7 +82,6 @@ const char *const efx_reset_type_names[] = {
 	[RESET_TYPE_DISABLE]            = "DISABLE",
 	[RESET_TYPE_TX_WATCHDOG]        = "TX_WATCHDOG",
 	[RESET_TYPE_INT_ERROR]          = "INT_ERROR",
-	[RESET_TYPE_RX_RECOVERY]        = "RX_RECOVERY",
 	[RESET_TYPE_DMA_ERROR]          = "DMA_ERROR",
 	[RESET_TYPE_TX_SKIP]            = "TX_SKIP",
 	[RESET_TYPE_MC_FAILURE]         = "MC_FAILURE",
diff --git a/drivers/net/ethernet/sfc/enum.h b/drivers/net/ethernet/sfc/enum.h
index c94f562..6fa8242 100644
--- a/drivers/net/ethernet/sfc/enum.h
+++ b/drivers/net/ethernet/sfc/enum.h
@@ -148,7 +148,6 @@ enum efx_loopback_mode {
  * @RESET_TYPE_DISABLE: Reset datapath, MAC and PHY; leave NIC disabled
  * @RESET_TYPE_TX_WATCHDOG: reset due to TX watchdog
  * @RESET_TYPE_INT_ERROR: reset due to internal error
- * @RESET_TYPE_RX_RECOVERY: reset to recover from RX datapath errors
  * @RESET_TYPE_DMA_ERROR: DMA error
  * @RESET_TYPE_TX_SKIP: hardware completed empty tx descriptors
  * @RESET_TYPE_MC_FAILURE: MC reboot/assertion
@@ -166,15 +165,13 @@ enum reset_type {
 	RESET_TYPE_MAX_METHOD,
 	RESET_TYPE_TX_WATCHDOG,
 	RESET_TYPE_INT_ERROR,
-	RESET_TYPE_RX_RECOVERY,
 	RESET_TYPE_DMA_ERROR,
 	RESET_TYPE_TX_SKIP,
 	RESET_TYPE_MC_FAILURE,
 	/* RESET_TYPE_MCDI_TIMEOUT is actually a method, not just a reason, but
 	 * it doesn't fit the scope hierarchy (not well-ordered by inclusion).
 	 * We encode this by having its enum value be greater than
-	 * RESET_TYPE_MAX_METHOD. This also prevents issuing it with
-	 * efx_ioctl_reset.
+	 * RESET_TYPE_MAX_METHOD.
 	 */
 	RESET_TYPE_MCDI_TIMEOUT,
 	RESET_TYPE_MAX,

^ permalink raw reply related

* [PATCH v6 net-next 0/7] Support Armada 37xx SoC (ARMv8 64-bits) in mvneta driver
From: Gregory CLEMENT @ 2016-12-01 17:03 UTC (permalink / raw)
  To: David S. Miller, linux-kernel, netdev
  Cc: Jisheng Zhang, Arnd Bergmann, Jason Cooper, Andrew Lunn,
	Sebastian Hesselbarth, Gregory CLEMENT, Thomas Petazzoni,
	linux-arm-kernel, Nadav Haklai, Marcin Wojtas, Dmitri Epshtein,
	Yelena Krivosheev

Hi,

The Armada 37xx is a new ARMv8 SoC from Marvell using same network
controller as the older Armada 370/38x/XP SoCs. This series adapts the
driver in order to be able to use it on this new SoC. The main changes
are:

- 64-bits support: the first patches allow using the driver on a 64-bit
  architecture.

- MBUS support: the mbus configuration is different on Armada 37xx
  from the older SoCs.

- per cpu interrupt: Armada 37xx do not support per cpu interrupt for
  the NETA IP, the non-per-CPU behavior was added back.

The first patch is an optimization in the rx path in swbm mode.
The second patch remove unnecessary allocation for HWBM.
The first item is solved by patches 4 and 5.
The 2 last items are solved by patch 6.
In patch 7 the dt support is added.

Beside Armada 37xx, this series have been again tested on Armada XP
and Armada 38x (with Hardware Buffer Management and with Software
Buffer Management).

This is the 6th version of the series:
- 1st version:
http://lists.infradead.org/pipermail/linux-arm-kernel/2016-November/469588.html

- 2nd version:
http://lists.infradead.org/pipermail/linux-arm-kernel/2016-November/470476.html

- 3rd version:
http://lists.infradead.org/pipermail/linux-arm-kernel/2016-November/470901.html

- 4th version:
http://lists.infradead.org/pipermail/linux-arm-kernel/2016-November/471039.html

- 5th version:
http://lists.infradead.org/pipermail/linux-arm-kernel/2016-November/471478.html

Changelog:
v5 -> v6:
 - Added Tested-by from  Marcin Wojtas on the series
 - Added Reviewed-by from Jisheng Zhang on patch 3
 - Fix eth1 phy mode for Armada 3720 DB board on patch 7

v4 -> v5:
 - remove unnecessary cast in patch 3

v3 -> v4:
 - Adding new patch: "net: mvneta: do not allocate buffer in rxq init
   with HWBM"

 - Simplify the HWBM case in patch 3 as suggested by Marcin

v2 -> v3:
 - Adding patch 1 "Optimize rx path for small frame"

 - Fix the kbuild error by moving the "phys_addr += pp->rx_offset_correction;"
  line from patch 2 to patch 3 where rx_offset_correction is introduced.

 - Move the memory allocation of the buf_virt_addr of the rxq to be
   called by the probe function in order to avoid a memory leak.

Thanks,

Gregory

Gregory CLEMENT (5):
  net: mvneta: Optimize rx path for small frame
  net: mvneta: Do not allocate buffer in rxq init with HWBM
  net: mvneta: Use cacheable memory to store the rx buffer virtual address
  net: mvneta: Only disable mvneta_bm for 64-bits
  ARM64: dts: marvell: Add network support for Armada 3700

Marcin Wojtas (2):
  net: mvneta: Convert to be 64 bits compatible
  net: mvneta: Add network support for Armada 3700 SoC

 Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt |   7 +-
 arch/arm64/boot/dts/marvell/armada-3720-db.dts                    |  23 +++++-
 arch/arm64/boot/dts/marvell/armada-37xx.dtsi                      |  23 +++++-
 drivers/net/ethernet/marvell/Kconfig                              |  10 +-
 drivers/net/ethernet/marvell/mvneta.c                             | 344 +++++++++++++++++++++++++++++++++++++++++++++++++++---------------------
 5 files changed, 305 insertions(+), 102 deletions(-)

base-commit: 436accebb53021ef7c63535f60bda410aa87c136
-- 
git-series 0.8.10

^ permalink raw reply

* [PATCH v6 net-next 1/7] net: mvneta: Optimize rx path for small frame
From: Gregory CLEMENT @ 2016-12-01 17:03 UTC (permalink / raw)
  To: David S. Miller, linux-kernel, netdev
  Cc: Jisheng Zhang, Arnd Bergmann, Jason Cooper, Andrew Lunn,
	Sebastian Hesselbarth, Gregory CLEMENT, Thomas Petazzoni,
	linux-arm-kernel, Nadav Haklai, Marcin Wojtas, Dmitri Epshtein,
	Yelena Krivosheev
In-Reply-To: <cover.dd374b7aaa358be0211d7ead81129a399fa692f4.1480611779.git-series.gregory.clement@free-electrons.com>

For small frame reuse the phys_addr variable instead of accessing the
uncacheable value in the rx descriptor.

Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
Tested-by: Marcin Wojtas <mw@semihalf.com>
---
 drivers/net/ethernet/marvell/mvneta.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index 87274d4ab102..1b84f746d748 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -1918,7 +1918,7 @@ static int mvneta_rx_swbm(struct mvneta_port *pp, int rx_todo,
 				goto err_drop_frame;
 
 			dma_sync_single_range_for_cpu(dev->dev.parent,
-						      rx_desc->buf_phys_addr,
+						      phys_addr,
 						      MVNETA_MH_SIZE + NET_SKB_PAD,
 						      rx_bytes,
 						      DMA_FROM_DEVICE);
-- 
git-series 0.8.10

^ permalink raw reply related

* [PATCH v6 net-next 2/7] net: mvneta: Do not allocate buffer in rxq init with HWBM
From: Gregory CLEMENT @ 2016-12-01 17:03 UTC (permalink / raw)
  To: David S. Miller, linux-kernel, netdev
  Cc: Jisheng Zhang, Andrew Lunn, Jason Cooper, Arnd Bergmann,
	Dmitri Epshtein, Nadav Haklai, Yelena Krivosheev, Gregory CLEMENT,
	Marcin Wojtas, Thomas Petazzoni, linux-arm-kernel,
	Sebastian Hesselbarth
In-Reply-To: <cover.dd374b7aaa358be0211d7ead81129a399fa692f4.1480611779.git-series.gregory.clement@free-electrons.com>

For HWBM all buffers are allocated in mvneta_bm_construct() and in runtime
they are put into descriptors by hardware. There is no need to fill them
at this point.

Suggested-by: Marcin Wojtas <mw@semihalf.com>
Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
Tested-by: Marcin Wojtas <mw@semihalf.com>
---
 drivers/net/ethernet/marvell/mvneta.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index 1b84f746d748..f5319c50f8d9 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -2784,14 +2784,14 @@ static int mvneta_rxq_init(struct mvneta_port *pp,
 		mvneta_rxq_buf_size_set(pp, rxq,
 					MVNETA_RX_BUF_SIZE(pp->pkt_size));
 		mvneta_rxq_bm_disable(pp, rxq);
+		mvneta_rxq_fill(pp, rxq, rxq->size);
 	} else {
 		mvneta_rxq_bm_enable(pp, rxq);
 		mvneta_rxq_long_pool_set(pp, rxq);
 		mvneta_rxq_short_pool_set(pp, rxq);
+		mvneta_rxq_non_occup_desc_add(pp, rxq, rxq->size);
 	}
 
-	mvneta_rxq_fill(pp, rxq, rxq->size);
-
 	return 0;
 }
 
-- 
git-series 0.8.10

^ permalink raw reply related

* [PATCH v6 net-next 3/7] net: mvneta: Use cacheable memory to store the rx buffer virtual address
From: Gregory CLEMENT @ 2016-12-01 17:03 UTC (permalink / raw)
  To: David S. Miller, linux-kernel, netdev
  Cc: Jisheng Zhang, Arnd Bergmann, Jason Cooper, Andrew Lunn,
	Sebastian Hesselbarth, Gregory CLEMENT, Thomas Petazzoni,
	linux-arm-kernel, Nadav Haklai, Marcin Wojtas, Dmitri Epshtein,
	Yelena Krivosheev
In-Reply-To: <cover.dd374b7aaa358be0211d7ead81129a399fa692f4.1480611779.git-series.gregory.clement@free-electrons.com>

Until now the virtual address of the received buffer were stored in the
cookie field of the rx descriptor. However, this field is 32-bits only
which prevents to use the driver on a 64-bits architecture.

With this patch the virtual address is stored in an array not shared with
the hardware (no more need to use the DMA API). Thanks to this, it is
possible to use cache contrary to the access of the rx descriptor member.

The change is done in the swbm path only because the hwbm uses the cookie
field, this also means that currently the hwbm is not usable in 64-bits.

Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
Reviewed-by: Jisheng Zhang <jszhang@marvell.com>
Tested-by: Marcin Wojtas <mw@semihalf.com>
---
 drivers/net/ethernet/marvell/mvneta.c | 34 +++++++++++++++++++---------
 1 file changed, 24 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index f5319c50f8d9..92b9af14c352 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -561,6 +561,9 @@ struct mvneta_rx_queue {
 	u32 pkts_coal;
 	u32 time_coal;
 
+	/* Virtual address of the RX buffer */
+	void  **buf_virt_addr;
+
 	/* Virtual address of the RX DMA descriptors array */
 	struct mvneta_rx_desc *descs;
 
@@ -1573,10 +1576,14 @@ static void mvneta_tx_done_pkts_coal_set(struct mvneta_port *pp,
 
 /* Handle rx descriptor fill by setting buf_cookie and buf_phys_addr */
 static void mvneta_rx_desc_fill(struct mvneta_rx_desc *rx_desc,
-				u32 phys_addr, u32 cookie)
+				u32 phys_addr, void *virt_addr,
+				struct mvneta_rx_queue *rxq)
 {
-	rx_desc->buf_cookie = cookie;
+	int i;
+
 	rx_desc->buf_phys_addr = phys_addr;
+	i = rx_desc - rxq->descs;
+	rxq->buf_virt_addr[i] = virt_addr;
 }
 
 /* Decrement sent descriptors counter */
@@ -1781,7 +1788,8 @@ EXPORT_SYMBOL_GPL(mvneta_frag_free);
 
 /* Refill processing for SW buffer management */
 static int mvneta_rx_refill(struct mvneta_port *pp,
-			    struct mvneta_rx_desc *rx_desc)
+			    struct mvneta_rx_desc *rx_desc,
+			    struct mvneta_rx_queue *rxq)
 
 {
 	dma_addr_t phys_addr;
@@ -1799,7 +1807,7 @@ static int mvneta_rx_refill(struct mvneta_port *pp,
 		return -ENOMEM;
 	}
 
-	mvneta_rx_desc_fill(rx_desc, phys_addr, (u32)data);
+	mvneta_rx_desc_fill(rx_desc, phys_addr, data, rxq);
 	return 0;
 }
 
@@ -1861,7 +1869,7 @@ static void mvneta_rxq_drop_pkts(struct mvneta_port *pp,
 
 	for (i = 0; i < rxq->size; i++) {
 		struct mvneta_rx_desc *rx_desc = rxq->descs + i;
-		void *data = (void *)rx_desc->buf_cookie;
+		void *data = rxq->buf_virt_addr[i];
 
 		dma_unmap_single(pp->dev->dev.parent, rx_desc->buf_phys_addr,
 				 MVNETA_RX_BUF_SIZE(pp->pkt_size), DMA_FROM_DEVICE);
@@ -1894,12 +1902,13 @@ static int mvneta_rx_swbm(struct mvneta_port *pp, int rx_todo,
 		unsigned char *data;
 		dma_addr_t phys_addr;
 		u32 rx_status, frag_size;
-		int rx_bytes, err;
+		int rx_bytes, err, index;
 
 		rx_done++;
 		rx_status = rx_desc->status;
 		rx_bytes = rx_desc->data_size - (ETH_FCS_LEN + MVNETA_MH_SIZE);
-		data = (unsigned char *)rx_desc->buf_cookie;
+		index = rx_desc - rxq->descs;
+		data = rxq->buf_virt_addr[index];
 		phys_addr = rx_desc->buf_phys_addr;
 
 		if (!mvneta_rxq_desc_is_first_last(rx_status) ||
@@ -1938,7 +1947,7 @@ static int mvneta_rx_swbm(struct mvneta_port *pp, int rx_todo,
 		}
 
 		/* Refill processing */
-		err = mvneta_rx_refill(pp, rx_desc);
+		err = mvneta_rx_refill(pp, rx_desc, rxq);
 		if (err) {
 			netdev_err(dev, "Linux processing - Can't refill\n");
 			rxq->missed++;
@@ -2020,7 +2029,7 @@ static int mvneta_rx_hwbm(struct mvneta_port *pp, int rx_todo,
 		rx_done++;
 		rx_status = rx_desc->status;
 		rx_bytes = rx_desc->data_size - (ETH_FCS_LEN + MVNETA_MH_SIZE);
-		data = (unsigned char *)rx_desc->buf_cookie;
+		data = (u8 *)(uintptr_t)rx_desc->buf_cookie;
 		phys_addr = rx_desc->buf_phys_addr;
 		pool_id = MVNETA_RX_GET_BM_POOL_ID(rx_desc);
 		bm_pool = &pp->bm_priv->bm_pools[pool_id];
@@ -2716,7 +2725,7 @@ static int mvneta_rxq_fill(struct mvneta_port *pp, struct mvneta_rx_queue *rxq,
 
 	for (i = 0; i < num; i++) {
 		memset(rxq->descs + i, 0, sizeof(struct mvneta_rx_desc));
-		if (mvneta_rx_refill(pp, rxq->descs + i) != 0) {
+		if (mvneta_rx_refill(pp, rxq->descs + i, rxq) != 0) {
 			netdev_err(pp->dev, "%s:rxq %d, %d of %d buffs  filled\n",
 				__func__, rxq->id, i, num);
 			break;
@@ -3865,6 +3874,11 @@ static int mvneta_init(struct device *dev, struct mvneta_port *pp)
 		rxq->size = pp->rx_ring_size;
 		rxq->pkts_coal = MVNETA_RX_COAL_PKTS;
 		rxq->time_coal = MVNETA_RX_COAL_USEC;
+		rxq->buf_virt_addr = devm_kmalloc(pp->dev->dev.parent,
+						  rxq->size * sizeof(void *),
+						  GFP_KERNEL);
+		if (!rxq->buf_virt_addr)
+			return -ENOMEM;
 	}
 
 	return 0;
-- 
git-series 0.8.10

^ permalink raw reply related

* [PATCH v6 net-next 4/7] net: mvneta: Convert to be 64 bits compatible
From: Gregory CLEMENT @ 2016-12-01 17:03 UTC (permalink / raw)
  To: David S. Miller, linux-kernel, netdev
  Cc: Jisheng Zhang, Andrew Lunn, Jason Cooper, Arnd Bergmann,
	Dmitri Epshtein, Nadav Haklai, Yelena Krivosheev, Gregory CLEMENT,
	Marcin Wojtas, Thomas Petazzoni, linux-arm-kernel,
	Sebastian Hesselbarth
In-Reply-To: <cover.dd374b7aaa358be0211d7ead81129a399fa692f4.1480611779.git-series.gregory.clement@free-electrons.com>

From: Marcin Wojtas <mw@semihalf.com>

Prepare the mvneta driver in order to be usable on the 64 bits platform
such as the Armada 3700.

[gregory.clement@free-electrons.com]: this patch was extract from a larger
one to ease review and maintenance.

Signed-off-by: Marcin Wojtas <mw@semihalf.com>
Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
Tested-by: Marcin Wojtas <mw@semihalf.com>
---
 drivers/net/ethernet/marvell/mvneta.c | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index 92b9af14c352..8ef03fb69bcd 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -296,6 +296,12 @@
 /* descriptor aligned size */
 #define MVNETA_DESC_ALIGNED_SIZE	32
 
+/* Number of bytes to be taken into account by HW when putting incoming data
+ * to the buffers. It is needed in case NET_SKB_PAD exceeds maximum packet
+ * offset supported in MVNETA_RXQ_CONFIG_REG(q) registers.
+ */
+#define MVNETA_RX_PKT_OFFSET_CORRECTION		64
+
 #define MVNETA_RX_PKT_SIZE(mtu) \
 	ALIGN((mtu) + MVNETA_MH_SIZE + MVNETA_VLAN_TAG_LEN + \
 	      ETH_HLEN + ETH_FCS_LEN,			     \
@@ -416,6 +422,7 @@ struct mvneta_port {
 	u64 ethtool_stats[ARRAY_SIZE(mvneta_statistics)];
 
 	u32 indir[MVNETA_RSS_LU_TABLE_SIZE];
+	u16 rx_offset_correction;
 };
 
 /* The mvneta_tx_desc and mvneta_rx_desc structures describe the
@@ -1807,6 +1814,7 @@ static int mvneta_rx_refill(struct mvneta_port *pp,
 		return -ENOMEM;
 	}
 
+	phys_addr += pp->rx_offset_correction;
 	mvneta_rx_desc_fill(rx_desc, phys_addr, data, rxq);
 	return 0;
 }
@@ -2782,7 +2790,7 @@ static int mvneta_rxq_init(struct mvneta_port *pp,
 	mvreg_write(pp, MVNETA_RXQ_SIZE_REG(rxq->id), rxq->size);
 
 	/* Set Offset */
-	mvneta_rxq_offset_set(pp, rxq, NET_SKB_PAD);
+	mvneta_rxq_offset_set(pp, rxq, NET_SKB_PAD - pp->rx_offset_correction);
 
 	/* Set coalescing pkts and time */
 	mvneta_rx_pkts_coal_set(pp, rxq, rxq->pkts_coal);
@@ -4033,6 +4041,13 @@ static int mvneta_probe(struct platform_device *pdev)
 
 	pp->rxq_def = rxq_def;
 
+	/* Set RX packet offset correction for platforms, whose
+	 * NET_SKB_PAD, exceeds 64B. It should be 64B for 64-bit
+	 * platforms and 0B for 32-bit ones.
+	 */
+	pp->rx_offset_correction =
+		max(0, NET_SKB_PAD - MVNETA_RX_PKT_OFFSET_CORRECTION);
+
 	pp->indir[0] = rxq_def;
 
 	pp->clk = devm_clk_get(&pdev->dev, "core");
-- 
git-series 0.8.10

^ permalink raw reply related

* [PATCH v6 net-next 5/7] net: mvneta: Only disable mvneta_bm for 64-bits
From: Gregory CLEMENT @ 2016-12-01 17:03 UTC (permalink / raw)
  To: David S. Miller, linux-kernel, netdev
  Cc: Jisheng Zhang, Arnd Bergmann, Jason Cooper, Andrew Lunn,
	Sebastian Hesselbarth, Gregory CLEMENT, Thomas Petazzoni,
	linux-arm-kernel, Nadav Haklai, Marcin Wojtas, Dmitri Epshtein,
	Yelena Krivosheev
In-Reply-To: <cover.dd374b7aaa358be0211d7ead81129a399fa692f4.1480611779.git-series.gregory.clement@free-electrons.com>

Actually only the mvneta_bm support is not 64-bits compatible.
The mvneta code itself can run on 64-bits architecture.

Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
Tested-by: Marcin Wojtas <mw@semihalf.com>
---
 drivers/net/ethernet/marvell/Kconfig | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/marvell/Kconfig b/drivers/net/ethernet/marvell/Kconfig
index 66fd9dbb2ca7..2ccea9dd9248 100644
--- a/drivers/net/ethernet/marvell/Kconfig
+++ b/drivers/net/ethernet/marvell/Kconfig
@@ -44,6 +44,7 @@ config MVMDIO
 config MVNETA_BM_ENABLE
 	tristate "Marvell Armada 38x/XP network interface BM support"
 	depends on MVNETA
+	depends on !64BIT
 	---help---
 	  This driver supports auxiliary block of the network
 	  interface units in the Marvell ARMADA XP and ARMADA 38x SoC
@@ -58,7 +59,6 @@ config MVNETA
 	tristate "Marvell Armada 370/38x/XP network interface support"
 	depends on PLAT_ORION || COMPILE_TEST
 	depends on HAS_DMA
-	depends on !64BIT
 	select MVMDIO
 	select FIXED_PHY
 	---help---
@@ -71,6 +71,7 @@ config MVNETA
 
 config MVNETA_BM
 	tristate
+	depends on !64BIT
 	default y if MVNETA=y && MVNETA_BM_ENABLE!=n
 	default MVNETA_BM_ENABLE
 	select HWBM
-- 
git-series 0.8.10

^ permalink raw reply related

* [PATCH v6 net-next 6/7] net: mvneta: Add network support for Armada 3700 SoC
From: Gregory CLEMENT @ 2016-12-01 17:03 UTC (permalink / raw)
  To: David S. Miller, linux-kernel, netdev
  Cc: Jisheng Zhang, Arnd Bergmann, Jason Cooper, Andrew Lunn,
	Sebastian Hesselbarth, Gregory CLEMENT, Thomas Petazzoni,
	linux-arm-kernel, Nadav Haklai, Marcin Wojtas, Dmitri Epshtein,
	Yelena Krivosheev
In-Reply-To: <cover.dd374b7aaa358be0211d7ead81129a399fa692f4.1480611779.git-series.gregory.clement@free-electrons.com>

From: Marcin Wojtas <mw@semihalf.com>

Armada 3700 is a new ARMv8 SoC from Marvell using same network controller
as older Armada 370/38x/XP. There are however some differences that
needed taking into account when adding support for it:

* open default MBUS window to 4GB of DRAM - Armada 3700 SoC's Mbus
  configuration for network controller has to be done on two levels:
  global and per-port. The first one is inherited from the
  bootloader. The latter can be opened in a default way, leaving
  arbitration to the bus controller.  Hence filled mbus_dram_target_info
  structure is not needed

* make per-CPU operation optional - Recent patches adding RSS and XPS
  support for Armada 38x/XP enabled per-CPU operation of the controller
  by default. Contrary to older SoC's Armada 3700 SoC's network
  controller is not capable of per-CPU processing due to interrupt lines'
  connectivity.  This patch restores non-per-CPU operation, which is now
  optional and depends on neta_armada3700 flag value in mvneta_port
  structure. In order not to complicate the code, separate interrupt
  subroutine is implemented.

For now, on the Armada 3700, RSS is disabled as the current
implementation depend on the per cpu interrupts.

[gregory.clement@free-electrons.com: extract from a larger patch, replace
some ifdef and port to net-next for v4.10]

Signed-off-by: Marcin Wojtas <mw@semihalf.com>
Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
Tested-by: Marcin Wojtas <mw@semihalf.com>
---
 Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt |   7 +-
 drivers/net/ethernet/marvell/Kconfig                              |   7 +-
 drivers/net/ethernet/marvell/mvneta.c                             | 287 +++++++++++++++++++++++++++++++++++++++++++++++++++---------------------
 3 files changed, 214 insertions(+), 87 deletions(-)

diff --git a/Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt b/Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt
index 73be8970815e..7aa840c8768d 100644
--- a/Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt
+++ b/Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt
@@ -1,7 +1,10 @@
-* Marvell Armada 370 / Armada XP Ethernet Controller (NETA)
+* Marvell Armada 370 / Armada XP / Armada 3700 Ethernet Controller (NETA)
 
 Required properties:
-- compatible: "marvell,armada-370-neta" or "marvell,armada-xp-neta".
+- compatible: could be one of the followings
+	"marvell,armada-370-neta"
+	"marvell,armada-xp-neta"
+	"marvell,armada-3700-neta"
 - reg: address and length of the register set for the device.
 - interrupts: interrupt for the device
 - phy: See ethernet.txt file in the same directory.
diff --git a/drivers/net/ethernet/marvell/Kconfig b/drivers/net/ethernet/marvell/Kconfig
index 2ccea9dd9248..3b8f11fe5e13 100644
--- a/drivers/net/ethernet/marvell/Kconfig
+++ b/drivers/net/ethernet/marvell/Kconfig
@@ -56,14 +56,15 @@ config MVNETA_BM_ENABLE
 	  buffer management.
 
 config MVNETA
-	tristate "Marvell Armada 370/38x/XP network interface support"
-	depends on PLAT_ORION || COMPILE_TEST
+	tristate "Marvell Armada 370/38x/XP/37xx network interface support"
+	depends on ARCH_MVEBU || COMPILE_TEST
 	depends on HAS_DMA
 	select MVMDIO
 	select FIXED_PHY
 	---help---
 	  This driver supports the network interface units in the
-	  Marvell ARMADA XP, ARMADA 370 and ARMADA 38x SoC family.
+	  Marvell ARMADA XP, ARMADA 370, ARMADA 38x and
+	  ARMADA 37xx SoC family.
 
 	  Note that this driver is distinct from the mv643xx_eth
 	  driver, which should be used for the older Marvell SoCs
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index 8ef03fb69bcd..ffc0c65068ea 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -397,6 +397,9 @@ struct mvneta_port {
 	spinlock_t lock;
 	bool is_stopped;
 
+	u32 cause_rx_tx;
+	struct napi_struct napi;
+
 	/* Core clock */
 	struct clk *clk;
 	/* AXI clock */
@@ -422,6 +425,9 @@ struct mvneta_port {
 	u64 ethtool_stats[ARRAY_SIZE(mvneta_statistics)];
 
 	u32 indir[MVNETA_RSS_LU_TABLE_SIZE];
+
+	/* Flags for special SoC configurations */
+	bool neta_armada3700;
 	u16 rx_offset_correction;
 };
 
@@ -965,14 +971,9 @@ static int mvneta_mbus_io_win_set(struct mvneta_port *pp, u32 base, u32 wsize,
 	return 0;
 }
 
-/* Assign and initialize pools for port. In case of fail
- * buffer manager will remain disabled for current port.
- */
-static int mvneta_bm_port_init(struct platform_device *pdev,
-			       struct mvneta_port *pp)
+static  int mvneta_bm_port_mbus_init(struct mvneta_port *pp)
 {
-	struct device_node *dn = pdev->dev.of_node;
-	u32 long_pool_id, short_pool_id, wsize;
+	u32 wsize;
 	u8 target, attr;
 	int err;
 
@@ -991,6 +992,25 @@ static int mvneta_bm_port_init(struct platform_device *pdev,
 		netdev_info(pp->dev, "fail to configure mbus window to BM\n");
 		return err;
 	}
+	return 0;
+}
+
+/* Assign and initialize pools for port. In case of fail
+ * buffer manager will remain disabled for current port.
+ */
+static int mvneta_bm_port_init(struct platform_device *pdev,
+			       struct mvneta_port *pp)
+{
+	struct device_node *dn = pdev->dev.of_node;
+	u32 long_pool_id, short_pool_id;
+
+	if (!pp->neta_armada3700) {
+		int ret;
+
+		ret = mvneta_bm_port_mbus_init(pp);
+		if (ret)
+			return ret;
+	}
 
 	if (of_property_read_u32(dn, "bm,pool-long", &long_pool_id)) {
 		netdev_info(pp->dev, "missing long pool id\n");
@@ -1359,22 +1379,27 @@ static void mvneta_defaults_set(struct mvneta_port *pp)
 	for_each_present_cpu(cpu) {
 		int rxq_map = 0, txq_map = 0;
 		int rxq, txq;
+		if (!pp->neta_armada3700) {
+			for (rxq = 0; rxq < rxq_number; rxq++)
+				if ((rxq % max_cpu) == cpu)
+					rxq_map |= MVNETA_CPU_RXQ_ACCESS(rxq);
+
+			for (txq = 0; txq < txq_number; txq++)
+				if ((txq % max_cpu) == cpu)
+					txq_map |= MVNETA_CPU_TXQ_ACCESS(txq);
+
+			/* With only one TX queue we configure a special case
+			 * which will allow to get all the irq on a single
+			 * CPU
+			 */
+			if (txq_number == 1)
+				txq_map = (cpu == pp->rxq_def) ?
+					MVNETA_CPU_TXQ_ACCESS(1) : 0;
 
-		for (rxq = 0; rxq < rxq_number; rxq++)
-			if ((rxq % max_cpu) == cpu)
-				rxq_map |= MVNETA_CPU_RXQ_ACCESS(rxq);
-
-		for (txq = 0; txq < txq_number; txq++)
-			if ((txq % max_cpu) == cpu)
-				txq_map |= MVNETA_CPU_TXQ_ACCESS(txq);
-
-		/* With only one TX queue we configure a special case
-		 * which will allow to get all the irq on a single
-		 * CPU
-		 */
-		if (txq_number == 1)
-			txq_map = (cpu == pp->rxq_def) ?
-				MVNETA_CPU_TXQ_ACCESS(1) : 0;
+		} else {
+			txq_map = MVNETA_CPU_TXQ_ACCESS_ALL_MASK;
+			rxq_map = MVNETA_CPU_RXQ_ACCESS_ALL_MASK;
+		}
 
 		mvreg_write(pp, MVNETA_CPU_MAP(cpu), rxq_map | txq_map);
 	}
@@ -2627,6 +2652,17 @@ static void mvneta_set_rx_mode(struct net_device *dev)
 /* Interrupt handling - the callback for request_irq() */
 static irqreturn_t mvneta_isr(int irq, void *dev_id)
 {
+	struct mvneta_port *pp = (struct mvneta_port *)dev_id;
+
+	mvreg_write(pp, MVNETA_INTR_NEW_MASK, 0);
+	napi_schedule(&pp->napi);
+
+	return IRQ_HANDLED;
+}
+
+/* Interrupt handling - the callback for request_percpu_irq() */
+static irqreturn_t mvneta_percpu_isr(int irq, void *dev_id)
+{
 	struct mvneta_pcpu_port *port = (struct mvneta_pcpu_port *)dev_id;
 
 	disable_percpu_irq(port->pp->dev->irq);
@@ -2674,7 +2710,7 @@ static int mvneta_poll(struct napi_struct *napi, int budget)
 	struct mvneta_pcpu_port *port = this_cpu_ptr(pp->ports);
 
 	if (!netif_running(pp->dev)) {
-		napi_complete(&port->napi);
+		napi_complete(napi);
 		return rx_done;
 	}
 
@@ -2703,7 +2739,8 @@ static int mvneta_poll(struct napi_struct *napi, int budget)
 	 */
 	rx_queue = fls(((cause_rx_tx >> 8) & 0xff));
 
-	cause_rx_tx |= port->cause_rx_tx;
+	cause_rx_tx |= pp->neta_armada3700 ? pp->cause_rx_tx :
+		port->cause_rx_tx;
 
 	if (rx_queue) {
 		rx_queue = rx_queue - 1;
@@ -2717,11 +2754,27 @@ static int mvneta_poll(struct napi_struct *napi, int budget)
 
 	if (budget > 0) {
 		cause_rx_tx = 0;
-		napi_complete(&port->napi);
-		enable_percpu_irq(pp->dev->irq, 0);
+		napi_complete(napi);
+
+		if (pp->neta_armada3700) {
+			unsigned long flags;
+
+			local_irq_save(flags);
+			mvreg_write(pp, MVNETA_INTR_NEW_MASK,
+				    MVNETA_RX_INTR_MASK(rxq_number) |
+				    MVNETA_TX_INTR_MASK(txq_number) |
+				    MVNETA_MISCINTR_INTR_MASK);
+			local_irq_restore(flags);
+		} else {
+			enable_percpu_irq(pp->dev->irq, 0);
+		}
 	}
 
-	port->cause_rx_tx = cause_rx_tx;
+	if (pp->neta_armada3700)
+		pp->cause_rx_tx = cause_rx_tx;
+	else
+		port->cause_rx_tx = cause_rx_tx;
+
 	return rx_done;
 }
 
@@ -2991,11 +3044,16 @@ static void mvneta_start_dev(struct mvneta_port *pp)
 	/* start the Rx/Tx activity */
 	mvneta_port_enable(pp);
 
-	/* Enable polling on the port */
-	for_each_online_cpu(cpu) {
-		struct mvneta_pcpu_port *port = per_cpu_ptr(pp->ports, cpu);
+	if (!pp->neta_armada3700) {
+		/* Enable polling on the port */
+		for_each_online_cpu(cpu) {
+			struct mvneta_pcpu_port *port =
+				per_cpu_ptr(pp->ports, cpu);
 
-		napi_enable(&port->napi);
+			napi_enable(&port->napi);
+		}
+	} else {
+		napi_enable(&pp->napi);
 	}
 
 	/* Unmask interrupts. It has to be done from each CPU */
@@ -3017,10 +3075,15 @@ static void mvneta_stop_dev(struct mvneta_port *pp)
 
 	phy_stop(ndev->phydev);
 
-	for_each_online_cpu(cpu) {
-		struct mvneta_pcpu_port *port = per_cpu_ptr(pp->ports, cpu);
+	if (!pp->neta_armada3700) {
+		for_each_online_cpu(cpu) {
+			struct mvneta_pcpu_port *port =
+				per_cpu_ptr(pp->ports, cpu);
 
-		napi_disable(&port->napi);
+			napi_disable(&port->napi);
+		}
+	} else {
+		napi_disable(&pp->napi);
 	}
 
 	netif_carrier_off(pp->dev);
@@ -3430,31 +3493,37 @@ static int mvneta_open(struct net_device *dev)
 		goto err_cleanup_rxqs;
 
 	/* Connect to port interrupt line */
-	ret = request_percpu_irq(pp->dev->irq, mvneta_isr,
-				 MVNETA_DRIVER_NAME, pp->ports);
+	if (pp->neta_armada3700)
+		ret = request_irq(pp->dev->irq, mvneta_isr, 0,
+				  dev->name, pp);
+	else
+		ret = request_percpu_irq(pp->dev->irq, mvneta_percpu_isr,
+					 dev->name, pp->ports);
 	if (ret) {
 		netdev_err(pp->dev, "cannot request irq %d\n", pp->dev->irq);
 		goto err_cleanup_txqs;
 	}
 
-	/* Enable per-CPU interrupt on all the CPU to handle our RX
-	 * queue interrupts
-	 */
-	on_each_cpu(mvneta_percpu_enable, pp, true);
+	if (!pp->neta_armada3700) {
+		/* Enable per-CPU interrupt on all the CPU to handle our RX
+		 * queue interrupts
+		 */
+		on_each_cpu(mvneta_percpu_enable, pp, true);
 
-	pp->is_stopped = false;
-	/* Register a CPU notifier to handle the case where our CPU
-	 * might be taken offline.
-	 */
-	ret = cpuhp_state_add_instance_nocalls(online_hpstate,
-					       &pp->node_online);
-	if (ret)
-		goto err_free_irq;
+		pp->is_stopped = false;
+		/* Register a CPU notifier to handle the case where our CPU
+		 * might be taken offline.
+		 */
+		ret = cpuhp_state_add_instance_nocalls(online_hpstate,
+						       &pp->node_online);
+		if (ret)
+			goto err_free_irq;
 
-	ret = cpuhp_state_add_instance_nocalls(CPUHP_NET_MVNETA_DEAD,
-					       &pp->node_dead);
-	if (ret)
-		goto err_free_online_hp;
+		ret = cpuhp_state_add_instance_nocalls(CPUHP_NET_MVNETA_DEAD,
+						       &pp->node_dead);
+		if (ret)
+			goto err_free_online_hp;
+	}
 
 	/* In default link is down */
 	netif_carrier_off(pp->dev);
@@ -3470,13 +3539,20 @@ static int mvneta_open(struct net_device *dev)
 	return 0;
 
 err_free_dead_hp:
-	cpuhp_state_remove_instance_nocalls(CPUHP_NET_MVNETA_DEAD,
-					    &pp->node_dead);
+	if (!pp->neta_armada3700)
+		cpuhp_state_remove_instance_nocalls(CPUHP_NET_MVNETA_DEAD,
+						    &pp->node_dead);
 err_free_online_hp:
-	cpuhp_state_remove_instance_nocalls(online_hpstate, &pp->node_online);
+	if (!pp->neta_armada3700)
+		cpuhp_state_remove_instance_nocalls(online_hpstate,
+						    &pp->node_online);
 err_free_irq:
-	on_each_cpu(mvneta_percpu_disable, pp, true);
-	free_percpu_irq(pp->dev->irq, pp->ports);
+	if (pp->neta_armada3700) {
+		free_irq(pp->dev->irq, pp);
+	} else {
+		on_each_cpu(mvneta_percpu_disable, pp, true);
+		free_percpu_irq(pp->dev->irq, pp->ports);
+	}
 err_cleanup_txqs:
 	mvneta_cleanup_txqs(pp);
 err_cleanup_rxqs:
@@ -3489,23 +3565,30 @@ static int mvneta_stop(struct net_device *dev)
 {
 	struct mvneta_port *pp = netdev_priv(dev);
 
-	/* Inform that we are stopping so we don't want to setup the
-	 * driver for new CPUs in the notifiers. The code of the
-	 * notifier for CPU online is protected by the same spinlock,
-	 * so when we get the lock, the notifer work is done.
-	 */
-	spin_lock(&pp->lock);
-	pp->is_stopped = true;
-	spin_unlock(&pp->lock);
+	if (!pp->neta_armada3700) {
+		/* Inform that we are stopping so we don't want to setup the
+		 * driver for new CPUs in the notifiers. The code of the
+		 * notifier for CPU online is protected by the same spinlock,
+		 * so when we get the lock, the notifer work is done.
+		 */
+		spin_lock(&pp->lock);
+		pp->is_stopped = true;
+		spin_unlock(&pp->lock);
 
-	mvneta_stop_dev(pp);
-	mvneta_mdio_remove(pp);
+		mvneta_stop_dev(pp);
+		mvneta_mdio_remove(pp);
 
 	cpuhp_state_remove_instance_nocalls(online_hpstate, &pp->node_online);
 	cpuhp_state_remove_instance_nocalls(CPUHP_NET_MVNETA_DEAD,
 					    &pp->node_dead);
-	on_each_cpu(mvneta_percpu_disable, pp, true);
-	free_percpu_irq(dev->irq, pp->ports);
+		on_each_cpu(mvneta_percpu_disable, pp, true);
+		free_percpu_irq(dev->irq, pp->ports);
+	} else {
+		mvneta_stop_dev(pp);
+		mvneta_mdio_remove(pp);
+		free_irq(dev->irq, pp);
+	}
+
 	mvneta_cleanup_rxqs(pp);
 	mvneta_cleanup_txqs(pp);
 
@@ -3784,6 +3867,11 @@ static int mvneta_ethtool_set_rxfh(struct net_device *dev, const u32 *indir,
 				   const u8 *key, const u8 hfunc)
 {
 	struct mvneta_port *pp = netdev_priv(dev);
+
+	/* Current code for Armada 3700 doesn't support RSS features yet */
+	if (pp->neta_armada3700)
+		return -EOPNOTSUPP;
+
 	/* We require at least one supported parameter to be changed
 	 * and no change in any of the unsupported parameters
 	 */
@@ -3804,6 +3892,10 @@ static int mvneta_ethtool_get_rxfh(struct net_device *dev, u32 *indir, u8 *key,
 {
 	struct mvneta_port *pp = netdev_priv(dev);
 
+	/* Current code for Armada 3700 doesn't support RSS features yet */
+	if (pp->neta_armada3700)
+		return -EOPNOTSUPP;
+
 	if (hfunc)
 		*hfunc = ETH_RSS_HASH_TOP;
 
@@ -3911,16 +4003,29 @@ static void mvneta_conf_mbus_windows(struct mvneta_port *pp,
 	win_enable = 0x3f;
 	win_protect = 0;
 
-	for (i = 0; i < dram->num_cs; i++) {
-		const struct mbus_dram_window *cs = dram->cs + i;
-		mvreg_write(pp, MVNETA_WIN_BASE(i), (cs->base & 0xffff0000) |
-			    (cs->mbus_attr << 8) | dram->mbus_dram_target_id);
+	if (dram) {
+		for (i = 0; i < dram->num_cs; i++) {
+			const struct mbus_dram_window *cs = dram->cs + i;
+
+			mvreg_write(pp, MVNETA_WIN_BASE(i),
+				    (cs->base & 0xffff0000) |
+				    (cs->mbus_attr << 8) |
+				    dram->mbus_dram_target_id);
 
-		mvreg_write(pp, MVNETA_WIN_SIZE(i),
-			    (cs->size - 1) & 0xffff0000);
+			mvreg_write(pp, MVNETA_WIN_SIZE(i),
+				    (cs->size - 1) & 0xffff0000);
 
-		win_enable &= ~(1 << i);
-		win_protect |= 3 << (2 * i);
+			win_enable &= ~(1 << i);
+			win_protect |= 3 << (2 * i);
+		}
+	} else {
+		/* For Armada3700 open default 4GB Mbus window, leaving
+		 * arbitration of target/attribute to a different layer
+		 * of configuration.
+		 */
+		mvreg_write(pp, MVNETA_WIN_SIZE(0), 0xffff0000);
+		win_enable &= ~BIT(0);
+		win_protect = 3;
 	}
 
 	mvreg_write(pp, MVNETA_BASE_ADDR_ENABLE, win_enable);
@@ -4050,6 +4155,10 @@ static int mvneta_probe(struct platform_device *pdev)
 
 	pp->indir[0] = rxq_def;
 
+	/* Get special SoC configurations */
+	if (of_device_is_compatible(dn, "marvell,armada-3700-neta"))
+		pp->neta_armada3700 = true;
+
 	pp->clk = devm_clk_get(&pdev->dev, "core");
 	if (IS_ERR(pp->clk))
 		pp->clk = devm_clk_get(&pdev->dev, NULL);
@@ -4117,7 +4226,11 @@ static int mvneta_probe(struct platform_device *pdev)
 	pp->tx_csum_limit = tx_csum_limit;
 
 	dram_target_info = mv_mbus_dram_info();
-	if (dram_target_info)
+	/* Armada3700 requires setting default configuration of Mbus
+	 * windows, however without using filled mbus_dram_target_info
+	 * structure.
+	 */
+	if (dram_target_info || pp->neta_armada3700)
 		mvneta_conf_mbus_windows(pp, dram_target_info);
 
 	pp->tx_ring_size = MVNETA_MAX_TXD;
@@ -4150,11 +4263,20 @@ static int mvneta_probe(struct platform_device *pdev)
 		goto err_netdev;
 	}
 
-	for_each_present_cpu(cpu) {
-		struct mvneta_pcpu_port *port = per_cpu_ptr(pp->ports, cpu);
+	/* Armada3700 network controller does not support per-cpu
+	 * operation, so only single NAPI should be initialized.
+	 */
+	if (pp->neta_armada3700) {
+		netif_napi_add(dev, &pp->napi, mvneta_poll, NAPI_POLL_WEIGHT);
+	} else {
+		for_each_present_cpu(cpu) {
+			struct mvneta_pcpu_port *port =
+				per_cpu_ptr(pp->ports, cpu);
 
-		netif_napi_add(dev, &port->napi, mvneta_poll, NAPI_POLL_WEIGHT);
-		port->pp = pp;
+			netif_napi_add(dev, &port->napi, mvneta_poll,
+				       NAPI_POLL_WEIGHT);
+			port->pp = pp;
+		}
 	}
 
 	dev->features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO;
@@ -4239,6 +4361,7 @@ static int mvneta_remove(struct platform_device *pdev)
 static const struct of_device_id mvneta_match[] = {
 	{ .compatible = "marvell,armada-370-neta" },
 	{ .compatible = "marvell,armada-xp-neta" },
+	{ .compatible = "marvell,armada-3700-neta" },
 	{ }
 };
 MODULE_DEVICE_TABLE(of, mvneta_match);
-- 
git-series 0.8.10

^ permalink raw reply related

* [PATCH v6 net-next 7/7] ARM64: dts: marvell: Add network support for Armada 3700
From: Gregory CLEMENT @ 2016-12-01 17:03 UTC (permalink / raw)
  To: David S. Miller, linux-kernel, netdev
  Cc: Jisheng Zhang, Arnd Bergmann, Jason Cooper, Andrew Lunn,
	Sebastian Hesselbarth, Gregory CLEMENT, Thomas Petazzoni,
	linux-arm-kernel, Nadav Haklai, Marcin Wojtas, Dmitri Epshtein,
	Yelena Krivosheev
In-Reply-To: <cover.dd374b7aaa358be0211d7ead81129a399fa692f4.1480611779.git-series.gregory.clement@free-electrons.com>

Add neta nodes for network support both in device tree for the SoC and
the board.

Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
---
 arch/arm64/boot/dts/marvell/armada-3720-db.dts | 23 +++++++++++++++++++-
 arch/arm64/boot/dts/marvell/armada-37xx.dtsi   | 23 +++++++++++++++++++-
 2 files changed, 46 insertions(+), 0 deletions(-)

diff --git a/arch/arm64/boot/dts/marvell/armada-3720-db.dts b/arch/arm64/boot/dts/marvell/armada-3720-db.dts
index 1372e9a6aaa4..a59d36cd6caf 100644
--- a/arch/arm64/boot/dts/marvell/armada-3720-db.dts
+++ b/arch/arm64/boot/dts/marvell/armada-3720-db.dts
@@ -81,3 +81,26 @@
 &pcie0 {
 	status = "okay";
 };
+
+&mdio {
+	status = "okay";
+	phy0: ethernet-phy@0 {
+		reg = <0>;
+	};
+
+	phy1: ethernet-phy@1 {
+		reg = <1>;
+	};
+};
+
+&eth0 {
+	phy-mode = "rgmii-id";
+	phy = <&phy0>;
+	status = "okay";
+};
+
+&eth1 {
+	phy-mode = "sgmii";
+	phy = <&phy1>;
+	status = "okay";
+};
diff --git a/arch/arm64/boot/dts/marvell/armada-37xx.dtsi b/arch/arm64/boot/dts/marvell/armada-37xx.dtsi
index e9bd58793464..3b8eb45bdc76 100644
--- a/arch/arm64/boot/dts/marvell/armada-37xx.dtsi
+++ b/arch/arm64/boot/dts/marvell/armada-37xx.dtsi
@@ -140,6 +140,29 @@
 				};
 			};
 
+			eth0: ethernet@30000 {
+				   compatible = "marvell,armada-3700-neta";
+				   reg = <0x30000 0x4000>;
+				   interrupts = <GIC_SPI 42 IRQ_TYPE_LEVEL_HIGH>;
+				   clocks = <&sb_periph_clk 8>;
+				   status = "disabled";
+			};
+
+			mdio: mdio@32004 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "marvell,orion-mdio";
+				reg = <0x32004 0x4>;
+			};
+
+			eth1: ethernet@40000 {
+				compatible = "marvell,armada-3700-neta";
+				reg = <0x40000 0x4000>;
+				interrupts = <GIC_SPI 45 IRQ_TYPE_LEVEL_HIGH>;
+				clocks = <&sb_periph_clk 7>;
+				status = "disabled";
+			};
+
 			usb3: usb@58000 {
 				compatible = "marvell,armada3700-xhci",
 				"generic-xhci";
-- 
git-series 0.8.10

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox