Netdev List
 help / color / mirror / Atom feed
* [PATCH iproute2 2/4] utils: add get_be{16,32,64}, use them where possible
From: Sabrina Dubroca @ 2016-04-14 13:01 UTC (permalink / raw)
  To: netdev; +Cc: Stephen Hemminger, Sabrina Dubroca
In-Reply-To: <cover.1460622809.git.sd@queasysnail.net>

Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
---
 include/utils.h       |  3 +++
 ip/ipfou.c            |  3 +--
 ip/iplink_vxlan.c     |  8 ++------
 ip/iproute_lwtunnel.c |  8 ++++----
 ip/ipxfrm.c           | 13 +++----------
 ip/xfrm_state.c       | 10 +++-------
 lib/ll_proto.c        |  3 +--
 lib/utils.c           | 36 ++++++++++++++++++++++++++++++++++++
 tc/f_flower.c         |  4 ++--
 tc/f_u32.c            | 10 ++--------
 10 files changed, 57 insertions(+), 41 deletions(-)

diff --git a/include/utils.h b/include/utils.h
index aef28ce732ab..a9aa89162950 100644
--- a/include/utils.h
+++ b/include/utils.h
@@ -112,6 +112,9 @@ int get_u16(__u16 *val, const char *arg, int base);
 int get_s16(__s16 *val, const char *arg, int base);
 int get_u8(__u8 *val, const char *arg, int base);
 int get_s8(__s8 *val, const char *arg, int base);
+int get_be64(__be64 *val, const char *arg, int base);
+int get_be32(__be32 *val, const char *arg, int base);
+int get_be16(__be16 *val, const char *arg, int base);
 int get_addr64(__u64 *ap, const char *cp);
 
 char *hexstring_n2a(const __u8 *str, int len, char *buf, int blen);
diff --git a/ip/ipfou.c b/ip/ipfou.c
index 8a86b18fc284..2a6ae1755d3e 100644
--- a/ip/ipfou.c
+++ b/ip/ipfou.c
@@ -55,9 +55,8 @@ static int fou_parse_opt(int argc, char **argv, struct nlmsghdr *n,
 		if (!matches(*argv, "port")) {
 			NEXT_ARG();
 
-			if (get_u16(&port, *argv, 0) || port == 0)
+			if (get_be16(&port, *argv, 0) || port == 0)
 				invarg("invalid port", *argv);
-			port = htons(port);
 			port_set = 1;
 		} else if (!matches(*argv, "ipproto")) {
 			struct protoent *servptr;
diff --git a/ip/iplink_vxlan.c b/ip/iplink_vxlan.c
index 49a40befa5d5..7ba68bc14c78 100644
--- a/ip/iplink_vxlan.c
+++ b/ip/iplink_vxlan.c
@@ -172,16 +172,12 @@ static int vxlan_parse_opt(struct link_util *lu, int argc, char **argv,
 				invarg("max addresses", *argv);
 		} else if (!matches(*argv, "port") ||
 			   !matches(*argv, "srcport")) {
-			__u16 minport, maxport;
-
 			NEXT_ARG();
-			if (get_u16(&minport, *argv, 0))
+			if (get_be16(&range.low, *argv, 0))
 				invarg("min port", *argv);
 			NEXT_ARG();
-			if (get_u16(&maxport, *argv, 0))
+			if (get_be16(&range.high, *argv, 0))
 				invarg("max port", *argv);
-			range.low = htons(minport);
-			range.high = htons(maxport);
 		} else if (!matches(*argv, "dstport")) {
 			NEXT_ARG();
 			if (get_u16(&dstport, *argv, 0))
diff --git a/ip/iproute_lwtunnel.c b/ip/iproute_lwtunnel.c
index 3baac7720816..bdbb15d2b746 100644
--- a/ip/iproute_lwtunnel.c
+++ b/ip/iproute_lwtunnel.c
@@ -190,9 +190,9 @@ static int parse_encap_ip(struct rtattr *rta, size_t len, int *argcp, char ***ar
 			NEXT_ARG();
 			if (id_ok++)
 				duparg2("id", *argv);
-			if (get_u64(&id, *argv, 0))
+			if (get_be64(&id, *argv, 0))
 				invarg("\"id\" value is invalid\n", *argv);
-			rta_addattr64(rta, len, LWTUNNEL_IP_ID, htonll(id));
+			rta_addattr64(rta, len, LWTUNNEL_IP_ID, id);
 		} else if (strcmp(*argv, "dst") == 0) {
 			inet_prefix addr;
 
@@ -267,9 +267,9 @@ static int parse_encap_ip6(struct rtattr *rta, size_t len, int *argcp, char ***a
 			NEXT_ARG();
 			if (id_ok++)
 				duparg2("id", *argv);
-			if (get_u64(&id, *argv, 0))
+			if (get_be64(&id, *argv, 0))
 				invarg("\"id\" value is invalid\n", *argv);
-			rta_addattr64(rta, len, LWTUNNEL_IP6_ID, htonll(id));
+			rta_addattr64(rta, len, LWTUNNEL_IP6_ID, id);
 		} else if (strcmp(*argv, "dst") == 0) {
 			inet_prefix addr;
 
diff --git a/ip/ipxfrm.c b/ip/ipxfrm.c
index 8741ff3b302a..8d786d1334df 100644
--- a/ip/ipxfrm.c
+++ b/ip/ipxfrm.c
@@ -1109,15 +1109,10 @@ int xfrm_id_parse(xfrm_address_t *saddr, struct xfrm_id *id, __u16 *family,
 			filter.id_proto_mask = XFRM_FILTER_MASK_FULL;
 
 		} else if (strcmp(*argv, "spi") == 0) {
-			__u32 spi;
-
 			NEXT_ARG();
-			if (get_u32(&spi, *argv, 0))
+			if (get_be32(&id->spi, *argv, 0))
 				invarg("SPI value is invalid", *argv);
 
-			spi = htonl(spi);
-			id->spi = spi;
-
 			filter.id_spi_mask = XFRM_FILTER_MASK_FULL;
 
 		} else {
@@ -1252,9 +1247,8 @@ static int xfrm_selector_upspec_parse(struct xfrm_selector *sel,
 
 			NEXT_ARG();
 
-			if (get_u16(&sel->sport, *argv, 0))
+			if (get_be16(&sel->sport, *argv, 0))
 				invarg("value after \"sport\" is invalid", *argv);
-			sel->sport = htons(sel->sport);
 			if (sel->sport)
 				sel->sport_mask = ~((__u16)0);
 
@@ -1265,9 +1259,8 @@ static int xfrm_selector_upspec_parse(struct xfrm_selector *sel,
 
 			NEXT_ARG();
 
-			if (get_u16(&sel->dport, *argv, 0))
+			if (get_be16(&sel->dport, *argv, 0))
 				invarg("value after \"dport\" is invalid", *argv);
-			sel->dport = htons(sel->dport);
 			if (sel->dport)
 				sel->dport_mask = ~((__u16)0);
 
diff --git a/ip/xfrm_state.c b/ip/xfrm_state.c
index 5e2b641959bf..21ada3647ba4 100644
--- a/ip/xfrm_state.c
+++ b/ip/xfrm_state.c
@@ -175,11 +175,9 @@ static int xfrm_seq_parse(__u32 *seq, int *argcp, char ***argvp)
 	int argc = *argcp;
 	char **argv = *argvp;
 
-	if (get_u32(seq, *argv, 0))
+	if (get_be32(seq, *argv, 0))
 		invarg("SEQ value is invalid", *argv);
 
-	*seq = htonl(*seq);
-
 	*argcp = argc;
 	*argvp = argv;
 
@@ -359,13 +357,11 @@ static int xfrm_state_modify(int cmd, unsigned int flags, int argc, char **argv)
 			NEXT_ARG();
 			xfrm_encap_type_parse(&encap.encap_type, &argc, &argv);
 			NEXT_ARG();
-			if (get_u16(&encap.encap_sport, *argv, 0))
+			if (get_be16(&encap.encap_sport, *argv, 0))
 				invarg("SPORT value after \"encap\" is invalid", *argv);
-			encap.encap_sport = htons(encap.encap_sport);
 			NEXT_ARG();
-			if (get_u16(&encap.encap_dport, *argv, 0))
+			if (get_be16(&encap.encap_dport, *argv, 0))
 				invarg("DPORT value after \"encap\" is invalid", *argv);
-			encap.encap_dport = htons(encap.encap_dport);
 			NEXT_ARG();
 			get_addr(&oa, *argv, AF_UNSPEC);
 			memcpy(&encap.encap_oa, &oa.data, sizeof(encap.encap_oa));
diff --git a/lib/ll_proto.c b/lib/ll_proto.c
index d8df68c110b0..e094d9f81ccc 100644
--- a/lib/ll_proto.c
+++ b/lib/ll_proto.c
@@ -111,8 +111,7 @@ int ll_proto_a2n(unsigned short *id, const char *buf)
 			 return 0;
 		 }
 	}
-	if (get_u16(id, buf, 0))
+	if (get_be16(id, buf, 0))
 		return -1;
-	*id = htons(*id);
 	return 0;
 }
diff --git a/lib/utils.c b/lib/utils.c
index 9337f57a5bde..50d268066d94 100644
--- a/lib/utils.c
+++ b/lib/utils.c
@@ -353,6 +353,42 @@ int get_s8(__s8 *val, const char *arg, int base)
 	return 0;
 }
 
+int get_be64(__be64 *val, const char *arg, int base)
+{
+	__u64 v;
+	int ret = get_u64(&v, arg, base);
+
+	if (ret)
+		return ret;
+
+	*val = htonll(v);
+	return 0;
+}
+
+int get_be32(__be32 *val, const char *arg, int base)
+{
+	__u32 v;
+	int ret = get_u32(&v, arg, base);
+
+	if (ret)
+		return ret;
+
+	*val = htonl(v);
+	return 0;
+}
+
+int get_be16(__be16 *val, const char *arg, int base)
+{
+	__u16 v;
+	int ret = get_u16(&v, arg, base);
+
+	if (ret)
+		return ret;
+
+	*val = htons(v);
+	return 0;
+}
+
 /* This uses a non-standard parsing (ie not inet_aton, or inet_pton)
  * because of legacy choice to parse 10.8 as 10.8.0.0 not 10.0.0.8
  */
diff --git a/tc/f_flower.c b/tc/f_flower.c
index 306f056c1b66..fd2014b374a1 100644
--- a/tc/f_flower.c
+++ b/tc/f_flower.c
@@ -150,11 +150,11 @@ static int flower_parse_port(char *str, __u8 ip_port,
 		return -1;
 	}
 
-	ret = get_u16(&port, str, 10);
+	ret = get_be16(&port, str, 10);
 	if (ret)
 		return -1;
 
-	addattr16(n, MAX_MSG, type, htons(port));
+	addattr16(n, MAX_MSG, type, port);
 
 	return 0;
 }
diff --git a/tc/f_u32.c b/tc/f_u32.c
index 62995153923a..e2d39b3d52fe 100644
--- a/tc/f_u32.c
+++ b/tc/f_u32.c
@@ -766,12 +766,9 @@ static int parse_offset(int *argc_p, char ***argv_p, struct tc_u32_sel *sel)
 			}
 			sel->flags |= TC_U32_VAROFFSET;
 		} else if (matches(*argv, "mask") == 0) {
-			__u16 mask;
-
 			NEXT_ARG();
-			if (get_u16(&mask, *argv, 16))
+			if (get_be16(&sel->offmask, *argv, 16))
 				return -1;
-			sel->offmask = htons(mask);
 			sel->flags |= TC_U32_VAROFFSET;
 		} else if (matches(*argv, "shift") == 0) {
 			int shift;
@@ -801,12 +798,9 @@ static int parse_hashkey(int *argc_p, char ***argv_p, struct tc_u32_sel *sel)
 
 	while (argc > 0) {
 		if (matches(*argv, "mask") == 0) {
-			__u32 mask;
-
 			NEXT_ARG();
-			if (get_u32(&mask, *argv, 16))
+			if (get_be32(&sel->hmask, *argv, 16))
 				return -1;
-			sel->hmask = htonl(mask);
 		} else if (matches(*argv, "at") == 0) {
 			int num;
 
-- 
2.8.0

^ permalink raw reply related

* [PATCH iproute2 3/4] utils: provide get_hex to read an hex digit from a char
From: Sabrina Dubroca @ 2016-04-14 13:01 UTC (permalink / raw)
  To: netdev; +Cc: Stephen Hemminger, Sabrina Dubroca
In-Reply-To: <cover.1460622809.git.sd@queasysnail.net>

Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
---
 include/utils.h |  1 +
 ip/ipl2tp.c     | 15 ++-------------
 lib/ipx_pton.c  | 18 +++---------------
 lib/utils.c     | 12 ++++++++++++
 4 files changed, 18 insertions(+), 28 deletions(-)

diff --git a/include/utils.h b/include/utils.h
index a9aa89162950..27562a1c949c 100644
--- a/include/utils.h
+++ b/include/utils.h
@@ -99,6 +99,7 @@ int get_prefix(inet_prefix *dst, char *arg, int family);
 int mask2bits(__u32 netmask);
 int get_addr_ila(__u64 *val, const char *arg);
 
+int get_hex(char c);
 int get_integer(int *val, const char *arg, int base);
 int get_unsigned(unsigned *val, const char *arg, int base);
 int get_time_rtt(unsigned *val, const char *arg, int *raw);
diff --git a/ip/ipl2tp.c b/ip/ipl2tp.c
index 3c8ee9355439..1f84c6149f39 100644
--- a/ip/ipl2tp.c
+++ b/ip/ipl2tp.c
@@ -425,30 +425,19 @@ static int get_tunnel(struct l2tp_data *p)
  * Command parser
  *****************************************************************************/
 
-static int hex(char ch)
-{
-	if ((ch >= 'a') && (ch <= 'f'))
-		return ch - 'a' + 10;
-	if ((ch >= '0') && (ch <= '9'))
-		return ch - '0';
-	if ((ch >= 'A') && (ch <= 'F'))
-		return ch - 'A' + 10;
-	return -1;
-}
-
 static int hex2mem(const char *buf, uint8_t *mem, int count)
 {
 	int i, j;
 	int c;
 
 	for (i = 0, j = 0; i < count; i++, j += 2) {
-		c = hex(buf[j]);
+		c = get_hex(buf[j]);
 		if (c < 0)
 			goto err;
 
 		mem[i] = c << 4;
 
-		c = hex(buf[j + 1]);
+		c = get_hex(buf[j + 1]);
 		if (c < 0)
 			goto err;
 
diff --git a/lib/ipx_pton.c b/lib/ipx_pton.c
index 3dca2713719a..071a775e7437 100644
--- a/lib/ipx_pton.c
+++ b/lib/ipx_pton.c
@@ -6,18 +6,6 @@
 
 #include "utils.h"
 
-static u_int32_t hexget(char c)
-{
-	if (c >= 'A' && c <= 'F')
-		return c - 'A' + 10;
-	if (c >= 'a' && c <= 'f')
-		return c - 'a' + 10;
-	if (c >= '0' && c <= '9')
-		return c - '0';
-
-	return 0xf0;
-}
-
 static int ipx_getnet(u_int32_t *net, const char *str)
 {
 	int i;
@@ -25,7 +13,7 @@ static int ipx_getnet(u_int32_t *net, const char *str)
 
 	for(i = 0; *str && (i < 8); i++) {
 
-		if ((tmp = hexget(*str)) & 0xf0) {
+		if ((tmp = get_hex(*str)) == -1) {
 			if (*str == '.')
 				return 0;
 			else
@@ -49,11 +37,11 @@ static int ipx_getnode(u_int8_t *node, const char *str)
 	u_int32_t tmp;
 
 	for(i = 0; i < 6; i++) {
-		if ((tmp = hexget(*str++)) & 0xf0)
+		if ((tmp = get_hex(*str++)) == -1)
 			return -1;
 		node[i] = (u_int8_t)tmp;
 		node[i] <<= 4;
-		if ((tmp = hexget(*str++)) & 0xf0)
+		if ((tmp = get_hex(*str++)) == -1)
 			return -1;
 		node[i] |= (u_int8_t)tmp;
 		if (*str == ':')
diff --git a/lib/utils.c b/lib/utils.c
index 50d268066d94..591e70cc3450 100644
--- a/lib/utils.c
+++ b/lib/utils.c
@@ -37,6 +37,18 @@
 
 int timestamp_short = 0;
 
+int get_hex(char c)
+{
+	if (c >= 'A' && c <= 'F')
+		return c - 'A' + 10;
+	if (c >= 'a' && c <= 'f')
+		return c - 'a' + 10;
+	if (c >= '0' && c <= '9')
+		return c - '0';
+
+	return -1;
+}
+
 int get_integer(int *val, const char *arg, int base)
 {
 	long res;
-- 
2.8.0

^ permalink raw reply related

* [PATCH iproute2 4/4] ip: add MACsec support
From: Sabrina Dubroca @ 2016-04-14 13:01 UTC (permalink / raw)
  To: netdev; +Cc: Stephen Hemminger, Sabrina Dubroca
In-Reply-To: <cover.1460622809.git.sd@queasysnail.net>

Extend ip-link to create MACsec devices

  ip link add link <master> <macsec> type macsec [options]

Add an `ip macsec` command to configure receive-side secure channels
and secure associations within a macsec netdevice.

Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
---
Changes since v1:
 - use new utils
 - adapt to new netlink API
 - clean up stats printing
 - make commands and output symmetrical

Stephen, this depends on if_macsec.h being picked up from kernel uapi,
but it wasn't part of the recent headers updates.  Did I screw up
something in the kernel's Makefiles?
---
 ip/Makefile           |    2 +-
 ip/ip.c               |    3 +-
 ip/ip_common.h        |    1 +
 ip/ipmacsec.c         | 1231 +++++++++++++++++++++++++++++++++++++++++++++++++
 man/man8/Makefile     |    2 +-
 man/man8/ip-link.8.in |  134 ++++++
 man/man8/ip-macsec.8  |  110 +++++
 7 files changed, 1480 insertions(+), 3 deletions(-)
 create mode 100644 ip/ipmacsec.c
 create mode 100644 man/man8/ip-macsec.8

diff --git a/ip/Makefile b/ip/Makefile
index f3d298739cac..fe59ea8bdc76 100644
--- a/ip/Makefile
+++ b/ip/Makefile
@@ -7,7 +7,7 @@ IPOBJ=ip.o ipaddress.o ipaddrlabel.o iproute.o iprule.o ipnetns.o \
     iplink_vxlan.o tcp_metrics.o iplink_ipoib.o ipnetconf.o link_ip6tnl.o \
     link_iptnl.o link_gre6.o iplink_bond.o iplink_bond_slave.o iplink_hsr.o \
     iplink_bridge.o iplink_bridge_slave.o ipfou.o iplink_ipvlan.o \
-    iplink_geneve.o iplink_vrf.o iproute_lwtunnel.o
+    iplink_geneve.o iplink_vrf.o iproute_lwtunnel.o ipmacsec.o
 
 RTMONOBJ=rtmon.o
 
diff --git a/ip/ip.c b/ip/ip.c
index 123f18133a2e..166ef17499d4 100644
--- a/ip/ip.c
+++ b/ip/ip.c
@@ -51,7 +51,7 @@ static void usage(void)
 "       ip [ -force ] -batch filename\n"
 "where  OBJECT := { link | address | addrlabel | route | rule | neigh | ntable |\n"
 "                   tunnel | tuntap | maddress | mroute | mrule | monitor | xfrm |\n"
-"                   netns | l2tp | fou | tcp_metrics | token | netconf }\n"
+"                   netns | l2tp | fou | macsec | tcp_metrics | token | netconf }\n"
 "       OPTIONS := { -V[ersion] | -s[tatistics] | -d[etails] | -r[esolve] |\n"
 "                    -h[uman-readable] | -iec |\n"
 "                    -f[amily] { inet | inet6 | ipx | dnet | mpls | bridge | link } |\n"
@@ -84,6 +84,7 @@ static const struct cmd {
 	{ "link",	do_iplink },
 	{ "l2tp",	do_ipl2tp },
 	{ "fou",	do_ipfou },
+	{ "macsec",	do_ipmacsec },
 	{ "tunnel",	do_iptunnel },
 	{ "tunl",	do_iptunnel },
 	{ "tuntap",	do_iptuntap },
diff --git a/ip/ip_common.h b/ip/ip_common.h
index b7361a8fc65d..3a7a2a9d0e00 100644
--- a/ip/ip_common.h
+++ b/ip/ip_common.h
@@ -43,6 +43,7 @@ int do_iptunnel(int argc, char **argv);
 int do_ip6tunnel(int argc, char **argv);
 int do_iptuntap(int argc, char **argv);
 int do_iplink(int argc, char **argv);
+int do_ipmacsec(int argc, char **argv);
 int do_ipmonitor(int argc, char **argv);
 int do_multiaddr(int argc, char **argv);
 int do_multiroute(int argc, char **argv);
diff --git a/ip/ipmacsec.c b/ip/ipmacsec.c
new file mode 100644
index 000000000000..70408222fcba
--- /dev/null
+++ b/ip/ipmacsec.c
@@ -0,0 +1,1231 @@
+/*
+ * ipmacsec.c		"ip macsec".
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Sabrina Dubroca <sd@queasysnail.net>
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <linux/genetlink.h>
+#include <linux/if_ether.h>
+#include <linux/if_macsec.h>
+
+#include "rt_names.h"
+#include "utils.h"
+#include "ip_common.h"
+#include "ll_map.h"
+#include "libgenl.h"
+
+static const char *values_on_off[] = { "on", "off" };
+
+static const char *VALIDATE_STR[] = {
+	[MACSEC_VALIDATE_DISABLED] = "disabled",
+	[MACSEC_VALIDATE_CHECK] = "check",
+	[MACSEC_VALIDATE_STRICT] = "strict",
+};
+
+struct sci {
+	__u64 sci;
+	__u16 port;
+	char abuf[6];
+};
+
+struct sa_desc {
+	__u8 an;
+	__u32 pn;
+	__u32 key_id;
+	__u32 key_len;
+	__u8 key[MACSEC_MAX_KEY_LEN];
+	__u8 active;
+};
+
+struct cipher_args {
+	__u64 id;
+	__u8 icv_len;
+};
+
+struct txsc_desc {
+	int ifindex;
+	__u64 sci;
+	__be16 port;
+	struct cipher_args cipher;
+	__u32 flags;
+	__u32 flags_set;
+	__u32 window;
+	enum macsec_validation_type validate;
+	bool validate_set;
+	__u8 encoding_sa;
+};
+
+struct rxsc_desc {
+	int ifindex;
+	__u64 sci;
+	__u8 active;
+};
+
+#define MACSEC_BUFLEN 1024
+
+
+/* netlink socket */
+static struct rtnl_handle genl_rth;
+static int genl_family = -1;
+
+#define MACSEC_GENL_REQ(_req, _bufsiz, _cmd, _flags) \
+	GENL_REQUEST(_req, _bufsiz, genl_family, 0, MACSEC_GENL_VERSION, _cmd, _flags)
+
+
+static void init_genl(void)
+{
+	if (genl_family >= 0)
+		return;
+
+	if (rtnl_open_byproto(&genl_rth, 0, NETLINK_GENERIC) < 0) {
+		fprintf(stderr, "Cannot open generic netlink socket\n");
+		exit(1);
+	}
+
+	genl_family = genl_resolve_family(&genl_rth, MACSEC_GENL_NAME);
+	if (genl_family < 0)
+		exit(1);
+}
+
+static void ipmacsec_usage(void)
+{
+	fprintf(stderr, "Usage: ip macsec add DEV tx sa { 0..3 } [ OPTS ] key ID KEY\n");
+	fprintf(stderr, "       ip macsec set DEV tx sa { 0..3 } [ OPTS ]\n");
+	fprintf(stderr, "       ip macsec del DEV tx sa { 0..3 }\n");
+	fprintf(stderr, "       ip macsec add DEV rx SCI [ on | off ]\n");
+	fprintf(stderr, "       ip macsec set DEV rx SCI [ on | off ]\n");
+	fprintf(stderr, "       ip macsec del DEV rx SCI\n");
+	fprintf(stderr, "       ip macsec add DEV rx SCI sa { 0..3 } [ OPTS ] key ID KEY\n");
+	fprintf(stderr, "       ip macsec set DEV rx SCI sa { 0..3 } [ OPTS ]\n");
+	fprintf(stderr, "       ip macsec del DEV rx SCI sa { 0..3 }\n");
+	fprintf(stderr, "       ip macsec show\n");
+	fprintf(stderr, "       ip macsec show DEV\n");
+	fprintf(stderr, "where  OPTS := [ pn <u32> ] [ on | off ]\n");
+	fprintf(stderr, "       SCI  := { sci <u64> | port <u16> address <lladdr> }\n");
+
+	exit(-1);
+}
+
+static int one_of(const char *msg, const char *realval, const char **list,
+		  size_t len, int *index)
+{
+	int i;
+
+	for (i = 0; i < len; i++) {
+		if (matches(realval, list[i]) == 0) {
+			*index = i;
+			return 0;
+		}
+	}
+
+	fprintf(stderr, "Error: argument of \"%s\" must be one of ", msg);
+	for (i = 0; i < len; i++)
+		fprintf(stderr, "\"%s\", ", list[i]);
+	fprintf(stderr, "not \"%s\"\n", realval);
+	return -1;
+}
+
+static int get_an(__u8 *val, const char *arg)
+{
+	int ret = get_u8(val, arg, 0);
+
+	if (ret)
+		return ret;
+
+	if (*val > 3)
+		return -1;
+
+	return 0;
+}
+
+static int get_sci(__u64 *sci, const char *arg)
+{
+	return get_u64(sci, arg, 16);
+}
+
+static int get_port(__be16 *port, const char *arg)
+{
+	return get_be16(port, arg, 10);
+}
+
+#define _STR(a) #a
+#define STR(a) _STR(a)
+
+static void get_icvlen(__u8 *icvlen, char *arg)
+{
+	int ret = get_u8(icvlen, arg, 10);
+
+	if (ret)
+		invarg("expected ICV length", arg);
+
+	if (*icvlen < MACSEC_MIN_ICV_LEN || *icvlen > MACSEC_MAX_ICV_LEN)
+		invarg("ICV length must be in the range {"
+		       STR(MACSEC_MIN_ICV_LEN) ".." STR(MACSEC_MAX_ICV_LEN)
+		       "}", arg);
+}
+
+static bool get_sa(int *argcp, char ***argvp, __u8 *an)
+{
+	int argc = *argcp;
+	char **argv = *argvp;
+	int ret;
+
+	if (argc <= 0 || strcmp(*argv, "sa") != 0)
+		return false;
+
+	NEXT_ARG();
+	ret = get_an(an, *argv);
+	if (ret)
+		invarg("expected an { 0..3 }", *argv);
+	argc--; argv++;
+
+	*argvp = argv;
+	*argcp = argc;
+	return true;
+}
+
+static int parse_sa_args(int *argcp, char ***argvp, struct sa_desc *sa)
+{
+	int argc = *argcp;
+	char **argv = *argvp;
+	int ret;
+	bool active_set = false;
+
+	while (argc > 0) {
+		if (strcmp(*argv, "pn") == 0) {
+			if (sa->pn != 0)
+				duparg2("pn", "pn");
+			NEXT_ARG();
+			ret = get_u32(&sa->pn, *argv, 0);
+			if (ret)
+				invarg("expected pn", *argv);
+			if (sa->pn == 0)
+				invarg("expected pn != 0", *argv);
+		} else if (strcmp(*argv, "key") == 0) {
+			NEXT_ARG();
+			ret = get_u32(&sa->key_id, *argv, 0);
+			if (ret)
+				invarg("expected key id", *argv);
+			NEXT_ARG();
+			if (!hexstring_a2n(*argv, sa->key, MACSEC_MAX_KEY_LEN, &sa->key_len))
+				invarg("expected key", *argv);
+		} else if (strcmp(*argv, "on") == 0) {
+			if (active_set)
+				duparg2("on/off", "on");
+			sa->active = true;
+			active_set = true;
+		} else if (strcmp(*argv, "off") == 0) {
+			if (active_set)
+				duparg2("on/off", "off");
+			sa->active = false;
+			active_set = true;
+		} else {
+			fprintf(stderr, "macsec: unknown command \"%s\"?\n",
+				*argv);
+			ipmacsec_usage();
+		}
+
+		argv++; argc--;
+	}
+
+	*argvp = argv;
+	*argcp = argc;
+	return 0;
+}
+
+static __u64 make_sci(char *addr, __be16 port)
+{
+	__u64 sci;
+
+	memcpy(&sci, addr, ETH_ALEN);
+	memcpy(((char *)&sci) + ETH_ALEN, &port, sizeof(port));
+
+	return sci;
+}
+
+static bool sci_complete(bool sci, bool port, bool addr, bool port_only)
+{
+	return sci || (port && (addr || port_only));
+}
+
+static int get_sci_portaddr(struct sci *sci, int *argcp, char ***argvp,
+			    bool port_only, bool optional)
+{
+	int argc = *argcp;
+	char **argv = *argvp;
+	int ret;
+	bool p = false, a = false, s = false;
+
+	while (argc > 0) {
+		if (strcmp(*argv, "sci") == 0) {
+			if (p)
+				invarg("expected address", *argv);
+			if (a)
+				invarg("expected port", *argv);
+			NEXT_ARG();
+			ret = get_sci(&sci->sci, *argv);
+			if (ret)
+				invarg("expected sci", *argv);
+			s = true;
+		} else if (strcmp(*argv, "port") == 0) {
+			NEXT_ARG();
+			ret = get_port(&sci->port, *argv);
+			if (ret)
+				invarg("expected port", *argv);
+			if (sci->port == 0)
+				invarg("expected port != 0", *argv);
+			p = true;
+		} else if (strcmp(*argv, "address") == 0) {
+			NEXT_ARG();
+			ret = ll_addr_a2n(sci->abuf, sizeof(sci->abuf), *argv);
+			if (ret < 0)
+				invarg("expected lladdr", *argv);
+			a = true;
+		} else if (optional) {
+			break;
+		} else {
+			invarg("expected sci, port, or address", *argv);
+		}
+
+		argv++; argc--;
+
+		if (sci_complete(s, p, a, port_only))
+			break;
+	}
+
+	if (!optional && !sci_complete(s, p, a, port_only))
+		return -1;
+
+	if (p && a)
+		sci->sci = make_sci(sci->abuf, sci->port);
+
+	*argvp = argv;
+	*argcp = argc;
+
+	return p || a || s;
+}
+
+static bool parse_rxsci(int *argcp, char ***argvp, struct rxsc_desc *rxsc,
+			struct sa_desc *rxsa)
+{
+	struct sci sci = { 0 };
+
+	if (*argcp == 0 ||
+	    get_sci_portaddr(&sci, argcp, argvp, false, false) < 0) {
+		fprintf(stderr, "expected sci\n");
+		ipmacsec_usage();
+	}
+
+	rxsc->sci = sci.sci;
+
+	return get_sa(argcp, argvp, &rxsa->an);
+}
+
+static int parse_rxsci_args(int *argcp, char ***argvp, struct rxsc_desc *rxsc)
+{
+	int argc = *argcp;
+	char **argv = *argvp;
+	bool active_set = false;
+
+	while (argc > 0) {
+		if (strcmp(*argv, "on") == 0) {
+			if (active_set)
+				duparg2("on/off", "on");
+			rxsc->active = true;
+			active_set = true;
+		} else if (strcmp(*argv, "off") == 0) {
+			if (active_set)
+				duparg2("on/off", "off");
+			rxsc->active = false;
+			active_set = true;
+		} else {
+			fprintf(stderr, "macsec: unknown command \"%s\"?\n",
+				*argv);
+			ipmacsec_usage();
+		}
+
+		argv++; argc--;
+	}
+
+	*argvp = argv;
+	*argcp = argc;
+	return 0;
+}
+
+enum cmd {
+	CMD_ADD,
+	CMD_DEL,
+	CMD_UPD,
+	__CMD_MAX
+};
+
+static const enum macsec_nl_commands macsec_commands[__CMD_MAX][2][2] = {
+	[CMD_ADD] = {
+		[0] = {-1, MACSEC_CMD_ADD_RXSC},
+		[1] = {MACSEC_CMD_ADD_TXSA, MACSEC_CMD_ADD_RXSA},
+	},
+	[CMD_UPD] = {
+		[0] = {-1, MACSEC_CMD_UPD_RXSC},
+		[1] = {MACSEC_CMD_UPD_TXSA, MACSEC_CMD_UPD_RXSA},
+	},
+	[CMD_DEL] = {
+		[0] = {-1, MACSEC_CMD_DEL_RXSC},
+		[1] = {MACSEC_CMD_DEL_TXSA, MACSEC_CMD_DEL_RXSA},
+	},
+};
+
+static int do_modify_nl(enum cmd c, enum macsec_nl_commands cmd, int ifindex,
+			struct rxsc_desc *rxsc, struct sa_desc *sa)
+{
+	struct rtattr *attr_sa;
+
+	MACSEC_GENL_REQ(req, MACSEC_BUFLEN, cmd, NLM_F_REQUEST);
+
+	addattr32(&req.n, MACSEC_BUFLEN, MACSEC_ATTR_IFINDEX, ifindex);
+	if (rxsc) {
+		struct rtattr *attr_rxsc = addattr_nest(&req.n, MACSEC_BUFLEN, MACSEC_ATTR_RXSC_CONFIG);
+
+		addattr64(&req.n, MACSEC_BUFLEN, MACSEC_RXSC_ATTR_SCI, rxsc->sci);
+		if (c != CMD_DEL && rxsc->active != 0xff)
+			addattr8(&req.n, MACSEC_BUFLEN, MACSEC_RXSC_ATTR_ACTIVE, rxsc->active);
+
+		addattr_nest_end(&req.n, attr_rxsc);
+	}
+
+	if (sa->an == 0xff)
+		goto talk;
+
+	attr_sa = addattr_nest(&req.n, MACSEC_BUFLEN, MACSEC_ATTR_SA_CONFIG);
+
+	addattr8(&req.n, MACSEC_BUFLEN, MACSEC_SA_ATTR_AN, sa->an);
+
+	if (c != CMD_DEL) {
+		if (sa->pn)
+			addattr32(&req.n, MACSEC_BUFLEN, MACSEC_SA_ATTR_PN,
+				  sa->pn);
+
+		if (sa->key_len) {
+			addattr64(&req.n, MACSEC_BUFLEN, MACSEC_SA_ATTR_KEYID,
+				  sa->key_id);
+			addattr_l(&req.n, MACSEC_BUFLEN, MACSEC_SA_ATTR_KEY,
+				  sa->key, sa->key_len);
+		}
+
+		if (sa->active != 0xff)
+			addattr8(&req.n, MACSEC_BUFLEN, MACSEC_SA_ATTR_ACTIVE, sa->active);
+	}
+
+	addattr_nest_end(&req.n, attr_sa);
+
+talk:
+	if (rtnl_talk(&genl_rth, &req.n, NULL, 0) < 0)
+		return -2;
+
+	return 0;
+}
+
+static bool check_sa_args(enum cmd c, struct sa_desc *sa)
+{
+	if (c == CMD_ADD) {
+		if (!sa->key_len) {
+			fprintf(stderr, "cannot create SA without key\n");
+			return -1;
+		}
+
+		if (sa->pn == 0) {
+			fprintf(stderr, "must specify a packet number != 0\n");
+			return -1;
+		}
+	} else if (c == CMD_UPD) {
+		if (sa->key_len) {
+			fprintf(stderr, "cannot change key on SA\n");
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+static int do_modify_txsa(enum cmd c, int argc, char **argv, int ifindex)
+{
+	struct sa_desc txsa = {0};
+	enum macsec_nl_commands cmd;
+
+	txsa.an = 0xff;
+	txsa.active = 0xff;
+
+	if (argc == 0 || !get_sa(&argc, &argv, &txsa.an))
+		ipmacsec_usage();
+
+	if (c == CMD_DEL)
+		goto modify;
+
+	if (parse_sa_args(&argc, &argv, &txsa))
+		return -1;
+
+	if (check_sa_args(c, &txsa))
+		return -1;
+
+modify:
+	cmd = macsec_commands[c][1][0];
+	return do_modify_nl(c, cmd, ifindex, NULL, &txsa);
+}
+
+static int do_modify_rxsci(enum cmd c, int argc, char **argv, int ifindex)
+{
+	struct rxsc_desc rxsc = {0};
+	struct sa_desc rxsa = {0};
+	bool sa_set;
+	enum macsec_nl_commands cmd;
+
+	rxsc.ifindex = ifindex;
+	rxsc.active = 0xff;
+	rxsa.an = 0xff;
+	rxsa.active = 0xff;
+
+	sa_set = parse_rxsci(&argc, &argv, &rxsc, &rxsa);
+
+	if (c == CMD_DEL)
+		goto modify;
+
+	if (sa_set && (parse_sa_args(&argc, &argv, &rxsa) ||
+		       check_sa_args(c, &rxsa)))
+		return -1;
+	if (!sa_set && parse_rxsci_args(&argc, &argv, &rxsc))
+		return -1;
+
+modify:
+	cmd = macsec_commands[c][sa_set][1];
+	return do_modify_nl(c, cmd, rxsc.ifindex, &rxsc, &rxsa);
+}
+
+static int do_modify(enum cmd c, int argc, char **argv)
+{
+	int ifindex;
+
+	if (argc == 0)
+		ipmacsec_usage();
+
+	ifindex = ll_name_to_index(*argv);
+	if (!ifindex) {
+		fprintf(stderr, "Device \"%s\" does not exist.\n", *argv);
+		return -1;
+	}
+	argc--; argv++;
+
+	if (argc == 0)
+		ipmacsec_usage();
+
+	if (strcmp(*argv, "tx") == 0)
+		return do_modify_txsa(c, argc-1, argv+1, ifindex);
+	if (strcmp(*argv, "rx") == 0)
+		return do_modify_rxsci(c, argc-1, argv+1, ifindex);
+
+	ipmacsec_usage();
+	return -1;
+}
+
+/* dump/show */
+static struct {
+	int ifindex;
+	__u64 sci;
+} filter;
+
+static int validate_dump(struct rtattr **attrs)
+{
+	return attrs[MACSEC_ATTR_IFINDEX] && attrs[MACSEC_ATTR_SECY] &&
+	       attrs[MACSEC_ATTR_TXSA_LIST] && attrs[MACSEC_ATTR_RXSC_LIST] &&
+	       attrs[MACSEC_ATTR_TXSC_STATS] && attrs[MACSEC_ATTR_SECY_STATS];
+
+}
+
+static int validate_secy_dump(struct rtattr **attrs)
+{
+	return attrs[MACSEC_SECY_ATTR_SCI] &&
+	       attrs[MACSEC_SECY_ATTR_ENCODING_SA] &&
+	       attrs[MACSEC_SECY_ATTR_CIPHER_SUITE] &&
+	       attrs[MACSEC_SECY_ATTR_ICV_LEN] &&
+	       attrs[MACSEC_SECY_ATTR_PROTECT] &&
+	       attrs[MACSEC_SECY_ATTR_REPLAY] &&
+	       attrs[MACSEC_SECY_ATTR_OPER] &&
+	       attrs[MACSEC_SECY_ATTR_VALIDATE] &&
+	       attrs[MACSEC_SECY_ATTR_ENCRYPT] &&
+	       attrs[MACSEC_SECY_ATTR_INC_SCI] &&
+	       attrs[MACSEC_SECY_ATTR_ES] &&
+	       attrs[MACSEC_SECY_ATTR_SCB];
+}
+
+static void print_flag(FILE *f, struct rtattr *attrs[], const char *desc,
+		       int field)
+{
+	if (attrs[field])
+		fprintf(f, "%s %s ", desc,
+			rta_getattr_u8(attrs[field]) ? "on" : "off");
+}
+
+#define DEFAULT_CIPHER_NAME "GCM-AES-128"
+
+static const char *cs_id_to_name(__u64 cid)
+{
+	switch (cid) {
+	case DEFAULT_CIPHER_ID:
+	case DEFAULT_CIPHER_ALT:
+		return DEFAULT_CIPHER_NAME;
+	default:
+		return "(unknown)";
+	}
+}
+
+static void print_cipher_suite(const char *prefix, __u64 cid, __u8 icv_len)
+{
+	printf("%scipher suite: %s, using ICV length %d\n", prefix,
+	       cs_id_to_name(cid), icv_len);
+}
+
+static void print_attrs(const char *prefix, struct rtattr *attrs[])
+{
+	print_flag(stdout, attrs, "protect", MACSEC_SECY_ATTR_PROTECT);
+
+	if (attrs[MACSEC_SECY_ATTR_VALIDATE]) {
+		printf("validate %s ",
+		       VALIDATE_STR[rta_getattr_u8(attrs[MACSEC_SECY_ATTR_VALIDATE])]);
+	}
+
+	print_flag(stdout, attrs, "sc", MACSEC_RXSC_ATTR_ACTIVE);
+	print_flag(stdout, attrs, "sa", MACSEC_SA_ATTR_ACTIVE);
+	print_flag(stdout, attrs, "encrypt", MACSEC_SECY_ATTR_ENCRYPT);
+	print_flag(stdout, attrs, "send_sci", MACSEC_SECY_ATTR_INC_SCI);
+	print_flag(stdout, attrs, "end_station", MACSEC_SECY_ATTR_ES);
+	print_flag(stdout, attrs, "scb", MACSEC_SECY_ATTR_SCB);
+
+	print_flag(stdout, attrs, "replay", MACSEC_SECY_ATTR_REPLAY);
+	if (attrs[MACSEC_SECY_ATTR_WINDOW]) {
+		printf("window %d ",
+		       rta_getattr_u32(attrs[MACSEC_SECY_ATTR_WINDOW]));
+	}
+
+	if (attrs[MACSEC_SECY_ATTR_CIPHER_SUITE] && attrs[MACSEC_SECY_ATTR_ICV_LEN]) {
+		printf("\n");
+		print_cipher_suite(prefix,
+			rta_getattr_u64(attrs[MACSEC_SECY_ATTR_CIPHER_SUITE]),
+			rta_getattr_u8(attrs[MACSEC_SECY_ATTR_ICV_LEN]));
+	}
+
+}
+
+static void print_one_stat(const char **names, struct rtattr **attr, int idx, bool long_stat)
+{
+	int pad = strlen(names[idx]) + 1;
+
+	if (attr[idx]) {
+		if (long_stat)
+			printf("%*llu", pad, rta_getattr_u64(attr[idx]));
+		else
+			printf("%*u", pad, rta_getattr_u32(attr[idx]));
+	} else {
+		printf("%*c", pad, '-');
+	}
+}
+
+static const char *txsc_stats_names[] = {
+	[MACSEC_TXSC_STATS_ATTR_OUT_PKTS_PROTECTED] = "OutOctetsProtected",
+	[MACSEC_TXSC_STATS_ATTR_OUT_PKTS_ENCRYPTED] = "OutOctetsEncrypted",
+	[MACSEC_TXSC_STATS_ATTR_OUT_OCTETS_PROTECTED] = "OutPktsProtected",
+	[MACSEC_TXSC_STATS_ATTR_OUT_OCTETS_ENCRYPTED] = "OutPktsEncrypted",
+};
+
+static void print_txsc_stats(const char *prefix, struct rtattr *attr)
+{
+	struct rtattr *stats[MACSEC_TXSC_STATS_ATTR_MAX + 1];
+	int i;
+
+	if (!attr || show_stats == 0)
+		return;
+
+	parse_rtattr_nested(stats, MACSEC_TXSC_STATS_ATTR_MAX + 1, attr);
+	printf("%sstats:", prefix);
+
+	for (i = 1; i < NUM_MACSEC_TXSC_STATS_ATTR; i++)
+		printf(" %s", txsc_stats_names[i]);
+
+	printf("\n%s      ", prefix);
+
+	for (i = 1; i < NUM_MACSEC_TXSC_STATS_ATTR; i++)
+		print_one_stat(txsc_stats_names, stats, i, true);
+
+	printf("\n");
+}
+
+static const char *secy_stats_names[] = {
+	[MACSEC_SECY_STATS_ATTR_OUT_PKTS_UNTAGGED] = "OutPktsUntagged",
+	[MACSEC_SECY_STATS_ATTR_IN_PKTS_UNTAGGED] = "InPktsUntagged",
+	[MACSEC_SECY_STATS_ATTR_OUT_PKTS_TOO_LONG] = "OutPktsTooLong",
+	[MACSEC_SECY_STATS_ATTR_IN_PKTS_NO_TAG] = "InPktsNoTag",
+	[MACSEC_SECY_STATS_ATTR_IN_PKTS_BAD_TAG] = "InPktsBadTag",
+	[MACSEC_SECY_STATS_ATTR_IN_PKTS_UNKNOWN_SCI] = "InPktsUnknownSCI",
+	[MACSEC_SECY_STATS_ATTR_IN_PKTS_NO_SCI] = "InPktsNoSCI",
+	[MACSEC_SECY_STATS_ATTR_IN_PKTS_OVERRUN] = "InPktsOverrun",
+};
+
+static void print_secy_stats(const char *prefix, struct rtattr *attr)
+{
+	struct rtattr *stats[MACSEC_SECY_STATS_ATTR_MAX + 1];
+	int i;
+
+	if (!attr || show_stats == 0)
+		return;
+
+	parse_rtattr_nested(stats, MACSEC_SECY_STATS_ATTR_MAX + 1, attr);
+	printf("%sstats:", prefix);
+
+	for (i = 1; i < NUM_MACSEC_SECY_STATS_ATTR; i++)
+		printf(" %s", secy_stats_names[i]);
+
+	printf("\n%s      ", prefix);
+
+	for (i = 1; i < NUM_MACSEC_SECY_STATS_ATTR; i++)
+		print_one_stat(secy_stats_names, stats, i, true);
+
+	printf("\n");
+}
+
+static const char *rxsa_stats_names[] = {
+	[MACSEC_SA_STATS_ATTR_IN_PKTS_OK] = "InPktsOK",
+	[MACSEC_SA_STATS_ATTR_IN_PKTS_INVALID] = "InPktsInvalid",
+	[MACSEC_SA_STATS_ATTR_IN_PKTS_NOT_VALID] = "InPktsNotValid",
+	[MACSEC_SA_STATS_ATTR_IN_PKTS_NOT_USING_SA] = "InPktsNotUsingSA",
+	[MACSEC_SA_STATS_ATTR_IN_PKTS_UNUSED_SA] = "InPktsUnusedSA",
+};
+
+static void print_rxsa_stats(const char *prefix, struct rtattr *attr)
+{
+	struct rtattr *stats[MACSEC_SA_STATS_ATTR_MAX + 1];
+	int i;
+
+	if (!attr || show_stats == 0)
+		return;
+
+	parse_rtattr_nested(stats, MACSEC_SA_STATS_ATTR_MAX + 1, attr);
+	printf("%s%s  ", prefix, prefix);
+
+	for (i = 1; i <= MACSEC_SA_STATS_ATTR_IN_PKTS_UNUSED_SA; i++)
+		printf(" %s", rxsa_stats_names[i]);
+
+	printf("\n%s%s  ", prefix, prefix);
+
+	for (i = 1; i <= MACSEC_SA_STATS_ATTR_IN_PKTS_UNUSED_SA; i++)
+		print_one_stat(rxsa_stats_names, stats, i, false);
+
+	printf("\n");
+}
+
+static const char *txsa_stats_names[] = {
+	[MACSEC_SA_STATS_ATTR_OUT_PKTS_PROTECTED] = "OutPktsProtected",
+	[MACSEC_SA_STATS_ATTR_OUT_PKTS_ENCRYPTED] = "OutPktsEncrypted",
+};
+
+static void print_txsa_stats(const char *prefix, struct rtattr *attr)
+{
+	struct rtattr *stats[MACSEC_SA_STATS_ATTR_MAX + 1];
+
+	if (!attr || show_stats == 0)
+		return;
+
+	parse_rtattr_nested(stats, MACSEC_SA_STATS_ATTR_MAX + 1, attr);
+	printf("%s%s   %s %s\n", prefix, prefix,
+	       txsa_stats_names[MACSEC_SA_STATS_ATTR_OUT_PKTS_PROTECTED],
+	       txsa_stats_names[MACSEC_SA_STATS_ATTR_OUT_PKTS_ENCRYPTED]);
+	printf("%s%s  ", prefix, prefix);
+
+	print_one_stat(txsa_stats_names, stats, MACSEC_SA_STATS_ATTR_OUT_PKTS_PROTECTED, false);
+	print_one_stat(txsa_stats_names, stats, MACSEC_SA_STATS_ATTR_OUT_PKTS_ENCRYPTED, false);
+	printf("\n");
+}
+
+static void print_tx_sc(const char *prefix, __u64 sci, __u8 encoding_sa,
+			struct rtattr *txsc_stats, struct rtattr *secy_stats,
+			struct rtattr *sa)
+{
+	struct rtattr *sa_attr[MACSEC_SA_ATTR_MAX + 1];
+	struct rtattr *a;
+	int rem;
+
+	printf("%sTXSC: %016llx on SA %d\n", prefix, sci, encoding_sa);
+	print_secy_stats(prefix, secy_stats);
+	print_txsc_stats(prefix, txsc_stats);
+
+	rem = RTA_PAYLOAD(sa);
+	for (a = RTA_DATA(sa); RTA_OK(a, rem); a = RTA_NEXT(a, rem)) {
+		parse_rtattr_nested(sa_attr, MACSEC_SA_ATTR_MAX + 1, a);
+		printf("%s%s%d: PN %u, state %s, key %llu\n", prefix, prefix,
+			rta_getattr_u8(sa_attr[MACSEC_SA_ATTR_AN]),
+			rta_getattr_u32(sa_attr[MACSEC_SA_ATTR_PN]),
+			rta_getattr_u8(sa_attr[MACSEC_SA_ATTR_ACTIVE]) ? "on" :
+									 "off",
+			rta_getattr_u64(sa_attr[MACSEC_SA_ATTR_KEYID]));
+		print_txsa_stats(prefix, sa_attr[MACSEC_SA_ATTR_STATS]);
+	}
+}
+
+static const char *rxsc_stats_names[] = {
+	[MACSEC_RXSC_STATS_ATTR_IN_OCTETS_VALIDATED] = "InOctetsValidated",
+	[MACSEC_RXSC_STATS_ATTR_IN_OCTETS_DECRYPTED] = "InOctetsDecrypted",
+	[MACSEC_RXSC_STATS_ATTR_IN_PKTS_UNCHECKED] = "InPktsUnchecked",
+	[MACSEC_RXSC_STATS_ATTR_IN_PKTS_DELAYED] = "InPktsDelayed",
+	[MACSEC_RXSC_STATS_ATTR_IN_PKTS_OK] = "InPktsOK",
+	[MACSEC_RXSC_STATS_ATTR_IN_PKTS_INVALID] = "InPktsInvalid",
+	[MACSEC_RXSC_STATS_ATTR_IN_PKTS_LATE] = "InPktsLate",
+	[MACSEC_RXSC_STATS_ATTR_IN_PKTS_NOT_VALID] = "InPktsNotValid",
+	[MACSEC_RXSC_STATS_ATTR_IN_PKTS_NOT_USING_SA] = "InPktsNotUsingSA",
+	[MACSEC_RXSC_STATS_ATTR_IN_PKTS_UNUSED_SA] = "InPktsUnusedSA",
+};
+
+static void print_rxsc_stats(const char *prefix, struct rtattr *attr)
+{
+	struct rtattr *stats[MACSEC_RXSC_STATS_ATTR_MAX + 1];
+	int i;
+
+	if (!attr || show_stats == 0)
+		return;
+
+	parse_rtattr_nested(stats, MACSEC_RXSC_STATS_ATTR_MAX + 1, attr);
+	printf("%sstats:", prefix);
+	for (i = 1; i < NUM_MACSEC_RXSC_STATS_ATTR; i++)
+		printf(" %s", rxsc_stats_names[i]);
+
+	printf("\n%s      ", prefix);
+
+	for (i = 1; i < NUM_MACSEC_RXSC_STATS_ATTR; i++)
+		print_one_stat(rxsc_stats_names, stats, i, true);
+
+	printf("\n");
+}
+
+static void print_rx_sc(const char *prefix, __u64 sci, __u8 active, struct rtattr *rxsc_stats, struct rtattr *sa)
+{
+	struct rtattr *sa_attr[MACSEC_SA_ATTR_MAX + 1];
+	struct rtattr *a;
+	int rem;
+
+	printf("%sRXSC: %016llx, state %s\n", prefix, sci, active ? "on" :
+								    "off");
+	print_rxsc_stats(prefix, rxsc_stats);
+
+	rem = RTA_PAYLOAD(sa);
+	for (a = RTA_DATA(sa); RTA_OK(a, rem); a = RTA_NEXT(a, rem)) {
+		parse_rtattr_nested(sa_attr, MACSEC_SA_ATTR_MAX + 1, a);
+		printf("%s%s%d: PN %u, state %s, key %llu\n", prefix, prefix,
+			rta_getattr_u8(sa_attr[MACSEC_SA_ATTR_AN]),
+			rta_getattr_u32(sa_attr[MACSEC_SA_ATTR_PN]),
+			rta_getattr_u8(sa_attr[MACSEC_SA_ATTR_ACTIVE]) ? "on" :
+									 "off",
+			rta_getattr_u64(sa_attr[MACSEC_SA_ATTR_KEYID]));
+		print_rxsa_stats(prefix, sa_attr[MACSEC_SA_ATTR_STATS]);
+	}
+}
+
+static int process(const struct sockaddr_nl *who, struct nlmsghdr *n,
+		   void *arg)
+{
+	struct genlmsghdr *ghdr;
+	struct rtattr *attrs[MACSEC_ATTR_MAX + 1], *sc, *c;
+	struct rtattr *attrs_secy[MACSEC_SECY_ATTR_MAX + 1];
+	int len = n->nlmsg_len;
+	int ifindex;
+	__u64 sci;
+	__u8 encoding_sa;
+	int rem;
+
+	if (n->nlmsg_type != genl_family)
+		return -1;
+
+	len -= NLMSG_LENGTH(GENL_HDRLEN);
+	if (len < 0)
+		return -1;
+
+	ghdr = NLMSG_DATA(n);
+	if (ghdr->cmd != MACSEC_CMD_GET_TXSC)
+		return 0;
+
+	parse_rtattr(attrs, MACSEC_ATTR_MAX, (void *) ghdr + GENL_HDRLEN, len);
+	if (!validate_dump(attrs)) {
+		printf("incomplete dump message\n");
+		return -1;
+	}
+
+	ifindex = rta_getattr_u32(attrs[MACSEC_ATTR_IFINDEX]);
+	parse_rtattr_nested(attrs_secy, MACSEC_SECY_ATTR_MAX + 1, attrs[MACSEC_ATTR_SECY]);
+
+	if (!validate_secy_dump(attrs_secy)) {
+		printf("incomplete dump message\n");
+		return -1;
+	}
+
+	sci = rta_getattr_u64(attrs_secy[MACSEC_SECY_ATTR_SCI]);
+	encoding_sa = rta_getattr_u8(attrs_secy[MACSEC_SECY_ATTR_ENCODING_SA]);
+
+	if (filter.ifindex && ifindex != filter.ifindex)
+		return 0;
+
+	if (filter.sci && sci != filter.sci)
+		return 0;
+
+	printf("%d: %s: ", ifindex, ll_index_to_name(ifindex));
+	print_attrs("    ", attrs_secy);
+
+	print_tx_sc("    ", sci, encoding_sa, attrs[MACSEC_ATTR_TXSC_STATS],
+		    attrs[MACSEC_ATTR_SECY_STATS], attrs[MACSEC_ATTR_TXSA_LIST]);
+
+	if (!attrs[MACSEC_ATTR_RXSC_LIST])
+		return 0;
+
+	sc = attrs[MACSEC_ATTR_RXSC_LIST];
+	rem = RTA_PAYLOAD(sc);
+	for (c = RTA_DATA(sc); RTA_OK(c, rem); c = RTA_NEXT(c, rem)) {
+		struct rtattr *sc_attr[MACSEC_RXSC_ATTR_MAX + 1];
+
+		parse_rtattr_nested(sc_attr, MACSEC_RXSC_ATTR_MAX + 1, c);
+		print_rx_sc("    ",
+			    rta_getattr_u64(sc_attr[MACSEC_RXSC_ATTR_SCI]),
+			    rta_getattr_u32(sc_attr[MACSEC_RXSC_ATTR_ACTIVE]),
+			    sc_attr[MACSEC_RXSC_ATTR_STATS],
+			    sc_attr[MACSEC_RXSC_ATTR_SA_LIST]);
+	}
+
+	return 0;
+}
+
+static int do_dump(int ifindex)
+{
+	MACSEC_GENL_REQ(req, MACSEC_BUFLEN, MACSEC_CMD_GET_TXSC,
+			NLM_F_REQUEST | NLM_F_DUMP);
+
+	memset(&filter, 0, sizeof(filter));
+	filter.ifindex = ifindex;
+
+	req.n.nlmsg_seq = genl_rth.dump = ++genl_rth.seq;
+	if (rtnl_send(&genl_rth, &req, req.n.nlmsg_len) < 0) {
+		perror("Failed to send dump request");
+		exit(1);
+	}
+
+	if (rtnl_dump_filter(&genl_rth, process, stdout) < 0) {
+		fprintf(stderr, "Dump terminated\n");
+		exit(1);
+	}
+
+	return 0;
+}
+
+static int do_show(int argc, char **argv)
+{
+	int ifindex;
+
+	if (argc == 0)
+		return do_dump(0);
+
+	ifindex = ll_name_to_index(*argv);
+	if (ifindex == 0) {
+		fprintf(stderr, "Device \"%s\" does not exist.\n", *argv);
+		return -1;
+	}
+
+	argc--, argv++;
+	if (argc == 0)
+		return do_dump(ifindex);
+
+	ipmacsec_usage();
+	return -1;
+}
+
+int do_ipmacsec(int argc, char **argv)
+{
+	init_genl();
+
+	if (argc < 1)
+		ipmacsec_usage();
+
+	if (matches(*argv, "help") == 0)
+		ipmacsec_usage();
+
+	if (matches(*argv, "show") == 0)
+		return do_show(argc-1, argv+1);
+
+	if (matches(*argv, "add") == 0)
+		return do_modify(CMD_ADD, argc-1, argv+1);
+	if (matches(*argv, "set") == 0)
+		return do_modify(CMD_UPD, argc-1, argv+1);
+	if (matches(*argv, "delete") == 0)
+		return do_modify(CMD_DEL, argc-1, argv+1);
+
+	fprintf(stderr, "Command \"%s\" is unknown, try \"ip macsec help\".\n",
+		*argv);
+	exit(-1);
+}
+
+/* device creation */
+static void macsec_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb[])
+{
+	if (!tb)
+		return;
+
+	if (tb[IFLA_MACSEC_SCI])
+		fprintf(f, "sci %016llx ", rta_getattr_u64(tb[IFLA_MACSEC_SCI]));
+
+	print_flag(f, tb, "protect", IFLA_MACSEC_PROTECT);
+
+	if (tb[IFLA_MACSEC_CIPHER_SUITE])
+		fprintf(f, "cipher %s ", cs_id_to_name(rta_getattr_u64(tb[IFLA_MACSEC_CIPHER_SUITE])));
+
+	if (tb[IFLA_MACSEC_ICV_LEN])
+		fprintf(f, "icvlen %d ", rta_getattr_u8(tb[IFLA_MACSEC_ICV_LEN]));
+
+	if (tb[IFLA_MACSEC_ENCODING_SA])
+		fprintf(f, "encodingsa %d ", rta_getattr_u8(tb[IFLA_MACSEC_ENCODING_SA]));
+
+	if (tb[IFLA_MACSEC_VALIDATION])
+		fprintf(f, "validate %s ", VALIDATE_STR[rta_getattr_u8(tb[IFLA_MACSEC_VALIDATION])]);
+
+	print_flag(f, tb, "encrypt", IFLA_MACSEC_ENCRYPT);
+	print_flag(f, tb, "send_sci", IFLA_MACSEC_INC_SCI);
+	print_flag(f, tb, "end_station", IFLA_MACSEC_ES);
+	print_flag(f, tb, "scb", IFLA_MACSEC_SCB);
+
+	print_flag(f, tb, "replay", IFLA_MACSEC_REPLAY_PROTECT);
+	if (tb[IFLA_MACSEC_WINDOW])
+		fprintf(f, "window %d ", rta_getattr_u32(tb[IFLA_MACSEC_WINDOW]));
+}
+
+
+static int do_cipher_suite(struct cipher_args *cipher, int *argcp,
+			   char ***argvp)
+{
+	char **argv = *argvp;
+	int argc = *argcp;
+
+	if (argc == 0)
+		return -1;
+
+	if (strcmp(*argv, "default") == 0 ||
+	    strcmp(*argv, "gcm-aes-128") == 0 ||
+	    strcmp(*argv, "GCM-AES-128") == 0)
+		cipher->id = DEFAULT_CIPHER_ID;
+	NEXT_ARG();
+
+	if (strcmp(*argv, "icvlen") == 0) {
+		NEXT_ARG();
+		if (cipher->icv_len != 0)
+			duparg2("icvlen", "icvlen");
+		get_icvlen(&cipher->icv_len, *argv);
+	}
+	*argcp = argc;
+	*argvp = argv;
+
+	return 0;
+}
+
+static bool check_txsc_flags(bool es, bool scb, bool sci)
+{
+	if (sci && (es || scb))
+		return false;
+	if (es && scb)
+		return false;
+	return true;
+}
+
+static void usage(FILE *f)
+{
+	fprintf(f,
+		"Usage: ... macsec [ port PORT | sci SCI ]\n"
+		"                  [ cipher CIPHER_SUITE ]\n"
+		"                  [ encrypt { on | off } ]\n"
+		"                  [ send_sci { on | off } ]\n"
+		"                  [ end_station { on | off } ]\n"
+		"                  [ scb { on | off } ]\n"
+		"                  [ protect { on | off } ]\n"
+		"                  [ replay { on | off} window { 0..2^32-1 } ]\n"
+		"                  [ validate { strict | check | disabled } ]\n"
+		"                  [ encodingsa { 0..3 } ]\n"
+		);
+	fprintf(f, "CIPHER_SUITE := [ default = gcm-aes-128 ] icvlen { 8..32 }\n");
+}
+
+static int macsec_parse_opt(struct link_util *lu, int argc, char **argv,
+			    struct nlmsghdr *hdr)
+{
+	int ret;
+	__u8 encoding_sa = 0xff;
+	__u32 window = -1;
+	struct cipher_args cipher = {0};
+	enum macsec_validation_type validate;
+	bool es = false, scb = false, send_sci = false;
+	int replay_protect = -1;
+	struct sci sci = { 0 };
+
+	ret = get_sci_portaddr(&sci, &argc, &argv, true, true);
+	if (ret < 0) {
+		fprintf(stderr, "expected sci\n");
+		return -1;
+	}
+
+	if (ret > 0) {
+		if (sci.sci)
+			addattr_l(hdr, MACSEC_BUFLEN, IFLA_MACSEC_SCI,
+				  &sci.sci, sizeof(sci.sci));
+		else
+			addattr_l(hdr, MACSEC_BUFLEN, IFLA_MACSEC_PORT,
+				  &sci.port, sizeof(sci.port));
+	}
+
+	while (argc > 0) {
+		if (strcmp(*argv, "cipher") == 0) {
+			if (cipher.id)
+				duparg2("cipher", "cipher");
+			NEXT_ARG();
+			if (do_cipher_suite(&cipher, &argc, &argv))
+				return -1;
+		} else if (strcmp(*argv, "encrypt") == 0) {
+			NEXT_ARG();
+			int i;
+
+			ret = one_of("encrypt", *argv, values_on_off,
+				     ARRAY_SIZE(values_on_off), &i);
+			if (ret != 0)
+				return ret;
+			addattr8(hdr, MACSEC_BUFLEN, IFLA_MACSEC_ENCRYPT, !i);
+		} else if (strcmp(*argv, "send_sci") == 0) {
+			NEXT_ARG();
+			int i;
+
+			ret = one_of("send_sci", *argv, values_on_off,
+				     ARRAY_SIZE(values_on_off), &i);
+			if (ret != 0)
+				return ret;
+			send_sci = !i;
+			addattr8(hdr, MACSEC_BUFLEN, IFLA_MACSEC_INC_SCI, send_sci);
+		} else if (strcmp(*argv, "end_station") == 0) {
+			NEXT_ARG();
+			int i;
+
+			ret = one_of("end_station", *argv, values_on_off,
+				     ARRAY_SIZE(values_on_off), &i);
+			if (ret != 0)
+				return ret;
+			es = !i;
+			addattr8(hdr, MACSEC_BUFLEN, IFLA_MACSEC_ES, es);
+		} else if (strcmp(*argv, "scb") == 0) {
+			NEXT_ARG();
+			int i;
+
+			ret = one_of("scb", *argv, values_on_off,
+				     ARRAY_SIZE(values_on_off), &i);
+			if (ret != 0)
+				return ret;
+			scb = !i;
+			addattr8(hdr, MACSEC_BUFLEN, IFLA_MACSEC_SCB, scb);
+		} else if (strcmp(*argv, "protect") == 0) {
+			NEXT_ARG();
+			int i;
+
+			ret = one_of("protect", *argv, values_on_off,
+				     ARRAY_SIZE(values_on_off), &i);
+			if (ret != 0)
+				return ret;
+			addattr8(hdr, MACSEC_BUFLEN, IFLA_MACSEC_PROTECT, !i);
+		} else if (strcmp(*argv, "replay") == 0) {
+			NEXT_ARG();
+			int i;
+
+			ret = one_of("replay", *argv, values_on_off,
+				     ARRAY_SIZE(values_on_off), &i);
+			if (ret != 0)
+				return ret;
+			replay_protect = !i;
+		} else if (strcmp(*argv, "window") == 0) {
+			NEXT_ARG();
+			ret = get_u32(&window, *argv, 0);
+			if (ret)
+				invarg("expected replay window size", *argv);
+		} else if (strcmp(*argv, "validate") == 0) {
+			NEXT_ARG();
+			ret = one_of("validate", *argv,
+				     VALIDATE_STR, ARRAY_SIZE(VALIDATE_STR),
+				     (int *)&validate);
+			if (ret != 0)
+				return ret;
+			addattr8(hdr, MACSEC_BUFLEN, IFLA_MACSEC_VALIDATION,
+				 validate);
+		} else if (strcmp(*argv, "encodingsa") == 0) {
+			if (encoding_sa != 0xff)
+				duparg2("encodingsa", "encodingsa");
+			NEXT_ARG();
+			ret = get_an(&encoding_sa, *argv);
+			if (ret)
+				invarg("expected an { 0..3 }", *argv);
+		} else {
+			fprintf(stderr, "macsec: unknown command \"%s\"?\n",
+				*argv);
+			usage(stderr);
+			return -1;
+		}
+
+		argv++; argc--;
+	}
+
+	if (!check_txsc_flags(es, scb, send_sci)) {
+		fprintf(stderr, "invalid combination of send_sci/end_station/scb\n");
+		return -1;
+	}
+
+	if (window != -1 && replay_protect == -1) {
+		fprintf(stderr, "replay window set, but replay protection not enabled. did you mean 'replay on window %u'?\n", window);
+		return -1;
+	} else if (window == -1 && replay_protect != -1) {
+		fprintf(stderr, "replay protection enabled, but no window set. did you mean 'replay on window VALUE'?\n");
+		return -1;
+	}
+
+	if (cipher.id) {
+		addattr_l(hdr, MACSEC_BUFLEN, IFLA_MACSEC_CIPHER_SUITE,
+			  &cipher.id, sizeof(cipher.id));
+		addattr_l(hdr, MACSEC_BUFLEN, IFLA_MACSEC_ICV_LEN,
+			  &cipher.icv_len, sizeof(cipher.icv_len));
+	}
+
+	if (replay_protect != -1) {
+		addattr32(hdr, MACSEC_BUFLEN, IFLA_MACSEC_WINDOW, window);
+		addattr8(hdr, MACSEC_BUFLEN, IFLA_MACSEC_REPLAY_PROTECT,
+			 replay_protect);
+	}
+
+	if (encoding_sa != 0xff) {
+		addattr_l(hdr, MACSEC_BUFLEN, IFLA_MACSEC_ENCODING_SA,
+			  &encoding_sa, sizeof(encoding_sa));
+	}
+
+	return 0;
+}
+
+static void macsec_print_help(struct link_util *lu, int argc, char **argv,
+			      FILE *f)
+{
+	usage(f);
+}
+
+struct link_util macsec_link_util = {
+	.id = "macsec",
+	.maxattr = IFLA_MACSEC_MAX,
+	.parse_opt = macsec_parse_opt,
+	.print_help = macsec_print_help,
+	.print_opt = macsec_print_opt,
+	.slave = false,
+};
diff --git a/man/man8/Makefile b/man/man8/Makefile
index d3fdf66ab11d..929826ecb56c 100644
--- a/man/man8/Makefile
+++ b/man/man8/Makefile
@@ -7,7 +7,7 @@ MAN8PAGES = $(TARGETS) ip.8 arpd.8 lnstat.8 routel.8 rtacct.8 rtmon.8 rtpr.8 ss.
 	tc-mqprio.8 tc-netem.8 tc-pfifo.8 tc-pfifo_fast.8 tc-prio.8 tc-red.8 \
 	tc-sfb.8 tc-sfq.8 tc-stab.8 tc-tbf.8 \
 	bridge.8 rtstat.8 ctstat.8 nstat.8 routef.8 \
-	ip-addrlabel.8 ip-fou.8 ip-gue.8 ip-l2tp.8 \
+	ip-addrlabel.8 ip-fou.8 ip-gue.8 ip-l2tp.8 ip-macsec.8 \
 	ip-maddress.8 ip-monitor.8 ip-mroute.8 ip-neighbour.8 \
 	ip-netns.8 ip-ntable.8 ip-rule.8 ip-tunnel.8 ip-xfrm.8 \
 	ip-tcp_metrics.8 ip-netconf.8 ip-token.8 \
diff --git a/man/man8/ip-link.8.in b/man/man8/ip-link.8.in
index 984fb2eb0d63..6b443b27b2af 100644
--- a/man/man8/ip-link.8.in
+++ b/man/man8/ip-link.8.in
@@ -258,6 +258,9 @@ Link types:
 .sp
 .BR geneve
 - GEneric NEtwork Virtualization Encapsulation
+.sp
+.BR macsec
+- Interface for IEEE 802.1AE MAC Security (MACsec)
 .in -8
 
 .TP
@@ -846,6 +849,137 @@ forces the underlying interface into promiscuous mode. Passing the
 using standard tools.
 .in -8
 
+.TP
+MACsec Type Support
+For a link of type
+.I MACsec
+the following additional arguments are supported:
+
+.BI "ip link add link " DEVICE " name " NAME
+.BI type " macsec "
+.R " [ "
+.BI port " PORT "
+.R " | "
+.BI sci " SCI "
+.R " ] [ "
+.BI cipher " CIPHER_SUITE "
+.R " ] ["
+.BI encrypt
+.R " { "
+.BI "on "
+.R " |"
+.BI "off "
+.R " } "
+.R " ] ["
+.BI send_sci
+.R " { "
+.BI "on "
+.R " |"
+.BI "off "
+.R " } "
+.R " ] ["
+.BI es
+.R " { "
+.BI "on "
+.R " |"
+.BI "off "
+.R " } "
+.R " ] ["
+.BI scb
+.R " { "
+.BI "on "
+.R " |"
+.BI "off "
+.R " } "
+.R " ] ["
+.BI protect
+.R " { "
+.BI "on "
+.R " |"
+.BI "off "
+.R " } "
+.R " ] ["
+.BI replay
+.R " { "
+.BI "on "
+.R " |"
+.BI "off "
+.R " } "
+.BI window
+.R " { "
+.BI "0..2^32-1 "
+.R " } "
+.R " ] ["
+.BI validate
+.R " { "
+.BI "strict "
+.R " |"
+.BI "check "
+.R " |"
+.BI "disabled "
+.R " } "
+.R " ] ["
+.BI encoding
+.R " {"
+.BI "0..3 "
+.R " } "
+.R " ]"
+
+.in +8
+.sp
+.BI  port " PORT "
+- sets the port number for this MACsec device.
+
+.sp
+.BI sci " SCI "
+- sets the SCI for this MACsec device.
+
+.sp
+.BI cipher " CIPHER_SUITE "
+- defines the cipher suite to use.
+
+.sp
+.BR "encrypt on " or " encrypt off"
+- switches between authenticated encryption, or authenticity mode only.
+
+.sp
+.BR "send_sci on " or " send_sci off"
+- specifies whether the SCI is included in every packet, or only when it is necessary.
+
+.sp
+.BR "es on " or " es off"
+- sets the End Station bit.
+
+.sp
+.BR "scb on " or " scb off"
+- sets the Single Copy Broadcast bit.
+
+.sp
+.BR "protect on " or " protect off"
+- enables MACsec protection on the device.
+
+.sp
+.BR "replay on " or " replay off"
+- enables replay protection on the device.
+
+.in +8
+
+.sp
+.BI window " SIZE "
+- sets the size of the replay window.
+
+.in -8
+
+.sp
+.BR "validate strict " or " validate check " or " validate disabled"
+- sets the validation mode on the device.
+
+.sp
+.BI encoding " AN "
+- sets the active secure association for transmission.
+
+.in -8
+
 .SS ip link delete - delete virtual link
 
 .TP
diff --git a/man/man8/ip-macsec.8 b/man/man8/ip-macsec.8
new file mode 100644
index 000000000000..bd150d3b16eb
--- /dev/null
+++ b/man/man8/ip-macsec.8
@@ -0,0 +1,110 @@
+.TH IP\-MACSEC 8 "07 Mar 2016" "iproute" "Linux"
+.SH "NAME"
+ip-macsec \- MACsec device configuration
+.SH "SYNOPSIS"
+.BI "ip link add link " DEVICE " name " NAME
+.BR type " macsec "
+.R " [ "
+.R " [ "
+.B cipher " { " default " | " gcm-aes-128 " } "
+.R " ] "
+.B icvlen
+.I ICVLEN
+.R " ] [ "
+.B encrypt " { " on " | " off " } "
+.R " ] [ "
+.B send_sci " { " on " | " off " } "
+.R " ] [ "
+.B end_station " { " on " | " off " } "
+.R " ] [ "
+.B scb " { " on " | " off " } "
+.R " ] [ "
+.B protect " { " on " | " off " } "
+.R " ] [ "
+.B replay " { " on " | " off " } "
+.R " ] [ "
+.B window
+.I WINDOW
+.R " ] [ "
+.B encodingsa
+.I SA
+.R " ]"
+
+.BR "ip macsec add"
+.IR DEV
+.B "tx sa { 0..3 } [ OPTS ] key ID KEY"
+.br
+.BR "ip macsec set"
+.IR DEV
+.B "tx sa { 0..3 } [ OPTS ]"
+.br
+.BR "ip macsec del"
+.IR DEV
+.B "tx sa { 0..3 }"
+
+.BR "ip macsec add"
+.IR DEV
+.B "rx SCI [ on | off ]"
+.br
+.BR "ip macsec set"
+.IR DEV
+.B "rx SCI [ on | off ]"
+.br
+.BR "ip macsec del"
+.IR DEV
+.B "rx SCI"
+
+.BR "ip macsec add"
+.IR DEV
+.B "rx SCI sa { 0..3 } [ OPTS ] key ID KEY"
+.br
+.BR "ip macsec set"
+.IR DEV
+.B "rx SCI sa { 0..3 } [ OPTS ]"
+.br
+.BR "ip macsec del"
+.IR DEV
+.B "rx SCI sa { 0..3 }"
+
+.BR "ip macsec show [ DEV ]"
+
+.IR OPTS " := [ pn  { 1..2^32-1  } ] [ on | off ]"
+.br
+.IR SCI  " := { sci <u64> | port <u16> address <lladdr> }"
+
+
+.SH "DESCRIPTION"
+The
+.B ip macsec
+commands are used to configure transmit secure associations and receive secure channels and their secure associations on a MACsec device created with the
+.B ip link add
+command using the
+.I macsec
+type.
+
+.SH "EXAMPLES"
+.PP
+.SS Create a MACsec device on link eth0
+.nf
+# ip link add device eth0 macsec0 type macsec port 11 encrypt on
+.PP
+.SS Configure a secure association on that device
+.nf
+# ip macsec add macsec0 tx sa 0 pn 1024 on key 1 81818181818181818181818181818181
+.PP
+.SS Configure a receive channel
+.nf
+# ip macsec add macsec0 rx port 1234 address c6:19:52:8f:e6:a0
+.PP
+.SS Configure a receive association
+.nf
+# ip macsec add macsec0 rx port 1234 address c6:19:52:8f:e6:a0 sa 0 pn 1 on key 0 82828282828282828282828282828282
+.PP
+.SS Display MACsec configuration
+.nf
+# ip macsec show
+.SH "SEE ALSO"
+.br
+.BR ip-link (8)
+.SH "AUTHOR"
+Sabrina Dubroca <sd@queasysnail.net>
-- 
2.8.0

^ permalink raw reply related

* Re: [PATCH v3 0/2] sctp: delay calls to sk_data_ready() as much as possible
From: Neil Horman @ 2016-04-14 13:03 UTC (permalink / raw)
  To: David Miller
  Cc: marcelo.leitner, netdev, vyasevich, linux-sctp, David.Laight,
	jkbs
In-Reply-To: <20160413.230532.676746231426161126.davem@davemloft.net>

On Wed, Apr 13, 2016 at 11:05:32PM -0400, David Miller wrote:
> From: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
> Date: Fri,  8 Apr 2016 16:41:26 -0300
> 
> > 1st patch is a preparation for the 2nd. The idea is to not call
> > ->sk_data_ready() for every data chunk processed while processing
> > packets but only once before releasing the socket.
> > 
> > v2: patchset re-checked, small changelog fixes
> > v3: on patch 2, make use of local vars to make it more readable
> 
> Applied to net-next, but isn't this reduced overhead coming at the
> expense of latency?  What if that lower latency is important to the
> application and/or consumer?
Thats a fair point, but I'd make the counter argument that, as it currently
stands, any latency introduced (or removed), is an artifact of our
implementation rather than a designed feature of it.  That is to say, we make no
guarantees at the application level regarding how long it takes to signal data
readines from the time we get data off the wire, so I would rather see our
throughput raised if we can, as thats been sctp's more pressing achilles heel.


Thats not to say I'd like to enable lower latency, but I'd rather have this now,
and start pondering how to design that in.  Perhaps we can convert the pending
flag to a counter to count the number of events we enqueue, and call
sk_data_ready every  time we reach a sysctl defined threshold.

Neil

> --
> To unsubscribe from this list: send the line "unsubscribe linux-sctp" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

^ permalink raw reply

* Deleting child qdisc doesn't reset parent to default qdisc?
From: Jiri Kosina @ 2016-04-14 14:44 UTC (permalink / raw)
  To: Jamal Hadi Salim; +Cc: netdev, linux-kernel

Hi,

I've came across the behavior where adding a child qdisc and then deleting 
it again makes the networking dysfunctional (I guess that's because all of 
a sudden there is absolutely no working qdisc on the device, although 
there originally was a default one in the parent).

In a nutshell, is this expected behavior or bug?

=====
jikos:~ # tc qdisc show
qdisc tbf 10: dev eth0 root refcnt 2 rate 800Mbit burst 131000b lat 1.0ms 
jikos:~ # ping -c 1 nix.cz | head -2
PING nix.cz (195.47.235.3) 56(84) bytes of data.
64 bytes from info.nix.cz (195.47.235.3): icmp_seq=1 ttl=89 time=1.59 ms

jikos:~ # tc qdisc add dev eth0 parent 10:1 sfq
jikos:~ # tc qdisc show
qdisc tbf 10: dev eth0 root refcnt 2 rate 800Mbit burst 131000b lat 1.0ms 
qdisc sfq 8008: dev eth0 parent 10:1 limit 127p quantum 1514b depth 127 divisor 1024 

jikos:~ # ping -c 1 nix.cz | head -2
PING nix.cz (195.47.235.3) 56(84) bytes of data.
64 bytes from info.nix.cz (195.47.235.3): icmp_seq=1 ttl=89 time=1.67 ms

jikos:~ # tc qdisc del dev eth0 parent 10:1 sfq
jikos:~ # tc qdisc show
qdisc tbf 10: dev eth0 root refcnt 2 rate 800Mbit burst 131000b lat 1.0ms 
jikos:~ # ping -c 1 nix.cz | head -2
PING nix.cz (195.47.235.3) 56(84) bytes of data.
	[ ... nothing happens ... ]
^C
jikos:~ # tc qdisc add dev eth0 parent 10:1 sfq
jikos:~ # ping -c 1 nix.cz | head -2
PING nix.cz (195.47.235.3) 56(84) bytes of data.
64 bytes from info.nix.cz (195.47.235.3): icmp_seq=1 ttl=89 time=1.66 ms
=====

Thanks,

-- 
Jiri Kosina

^ permalink raw reply

* Re: Deleting child qdisc doesn't reset parent to default qdisc?
From: Jiri Kosina @ 2016-04-14 14:51 UTC (permalink / raw)
  To: Jamal Hadi Salim; +Cc: netdev, linux-kernel
In-Reply-To: <alpine.LNX.2.00.1604141638230.27368@cbobk.fhfr.pm>

On Thu, 14 Apr 2016, Jiri Kosina wrote:

> In a nutshell, is this expected behavior or bug?

Just to clarify what seems to suggest to me that this is rather a bug that 
needs to be fixed (but apparently one that has been there for quite a long 
time) can be demonstrated by this:

> 
> =====
> jikos:~ # tc qdisc show
> qdisc tbf 10: dev eth0 root refcnt 2 rate 800Mbit burst 131000b lat 1.0ms 

The above configuration works.

> jikos:~ # ping -c 1 nix.cz | head -2
> PING nix.cz (195.47.235.3) 56(84) bytes of data.
> 64 bytes from info.nix.cz (195.47.235.3): icmp_seq=1 ttl=89 time=1.59 ms
> 
> jikos:~ # tc qdisc add dev eth0 parent 10:1 sfq
> jikos:~ # tc qdisc show
> qdisc tbf 10: dev eth0 root refcnt 2 rate 800Mbit burst 131000b lat 1.0ms 
> qdisc sfq 8008: dev eth0 parent 10:1 limit 127p quantum 1514b depth 127 divisor 1024 
> 
> jikos:~ # ping -c 1 nix.cz | head -2
> PING nix.cz (195.47.235.3) 56(84) bytes of data.
> 64 bytes from info.nix.cz (195.47.235.3): icmp_seq=1 ttl=89 time=1.67 ms
> 
> jikos:~ # tc qdisc del dev eth0 parent 10:1 sfq
> jikos:~ # tc qdisc show
> qdisc tbf 10: dev eth0 root refcnt 2 rate 800Mbit burst 131000b lat 1.0ms 

The above configuration doesn't although it's identical to the working one 
at the beginning.

> jikos:~ # ping -c 1 nix.cz | head -2
> PING nix.cz (195.47.235.3) 56(84) bytes of data.
> 	[ ... nothing happens ... ]
> ^C

-- 
Jiri Kosina
SUSE Labs

^ permalink raw reply

* Re: [PATCH net-next 5/8] dsa: mv88e6xxx: Kill the REG_READ and REG_WRITE macros
From: Vivien Didelot @ 2016-04-14 14:54 UTC (permalink / raw)
  To: Andrew Lunn, David Miller; +Cc: Florian Fainelli, netdev, Andrew Lunn
In-Reply-To: <1460591998-20598-6-git-send-email-andrew@lunn.ch>

Hi Andrew,

Andrew Lunn <andrew@lunn.ch> writes:

<snip>

>  int mv88e6xxx_set_addr_indirect(struct dsa_switch *ds, u8 *addr)
>  {
> +	int ret, err;
>  	int i;
> -	int ret;
>  
>  	for (i = 0; i < 6; i++) {
>  		int j;
>  
>  		/* Write the MAC address byte. */
> -		REG_WRITE(REG_GLOBAL2, GLOBAL2_SWITCH_MAC,
> -			  GLOBAL2_SWITCH_MAC_BUSY | (i << 8) | addr[i]);
> +		err = mv88e6xxx_reg_write(ds, REG_GLOBAL2, GLOBAL2_SWITCH_MAC,
> +					  GLOBAL2_SWITCH_MAC_BUSY |
> +					  (i << 8) | addr[i]);
> +		if (err)
> +			return err;

Just use the ret variable here.

>  
>  		/* Wait for the write to complete. */
>  		for (j = 0; j < 16; j++) {
> -			ret = REG_READ(REG_GLOBAL2, GLOBAL2_SWITCH_MAC);
> +			ret = mv88e6xxx_reg_read(ds, REG_GLOBAL2,
> +						 GLOBAL2_SWITCH_MAC);
> +			if (ret < 0)
> +				return ret;
> +
> +			if (ret < 0)
> +				return ret;

duplicated condition.

>  			if ((ret & GLOBAL2_SWITCH_MAC_BUSY) == 0)
>  				break;
>  		}

<snip>

> @@ -2697,7 +2732,8 @@ int mv88e6xxx_setup_common(struct dsa_switch *ds)
>  	ps->ds = ds;
>  	mutex_init(&ps->smi_mutex);
>  
> -	ps->id = REG_READ(REG_PORT(0), PORT_SWITCH_ID) & 0xfff0;
> +	ps->id = __mv88e6xxx_reg_read(ps->bus, ps->sw_addr, REG_PORT(0),
> +				      PORT_SWITCH_ID) & 0xfff0;

Here you dropped the error checking, and the (maybe unnecessary?) mutex
locking.

>  
>  	INIT_WORK(&ps->bridge_work, mv88e6xxx_bridge_work);
>  
> @@ -2708,42 +2744,66 @@ EXPORT_SYMBOL_GPL(mv88e6xxx_setup_common);
>  int mv88e6xxx_setup_global(struct dsa_switch *ds)
>  {
>  	struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
> -	int ret;
> +	int err;
>  	int i;
>  
>  	/* Set the default address aging time to 5 minutes, and
>  	 * enable address learn messages to be sent to all message
>  	 * ports.
>  	 */
> -	REG_WRITE(REG_GLOBAL, GLOBAL_ATU_CONTROL,
> -		  0x0140 | GLOBAL_ATU_CONTROL_LEARN2ALL);
> +	err = mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_ATU_CONTROL,
> +				  0x0140 | GLOBAL_ATU_CONTROL_LEARN2ALL);
> +	if (err)
> +		return err;
>  
>  	/* Configure the IP ToS mapping registers. */
> -	REG_WRITE(REG_GLOBAL, GLOBAL_IP_PRI_0, 0x0000);
> -	REG_WRITE(REG_GLOBAL, GLOBAL_IP_PRI_1, 0x0000);
> -	REG_WRITE(REG_GLOBAL, GLOBAL_IP_PRI_2, 0x5555);
> -	REG_WRITE(REG_GLOBAL, GLOBAL_IP_PRI_3, 0x5555);
> -	REG_WRITE(REG_GLOBAL, GLOBAL_IP_PRI_4, 0xaaaa);
> -	REG_WRITE(REG_GLOBAL, GLOBAL_IP_PRI_5, 0xaaaa);
> -	REG_WRITE(REG_GLOBAL, GLOBAL_IP_PRI_6, 0xffff);
> -	REG_WRITE(REG_GLOBAL, GLOBAL_IP_PRI_7, 0xffff);
> +	err = mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_IP_PRI_0, 0x0000);
> +	if (err)
> +		return err;
> +	err = mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_IP_PRI_1, 0x0000);
> +	if (err)
> +		return err;
> +	err = mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_IP_PRI_2, 0x5555);
> +	if (err)
> +		return err;
> +	err = mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_IP_PRI_3, 0x5555);
> +	if (err)
> +		return err;
> +	err = mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_IP_PRI_4, 0xaaaa);
> +	if (err)
> +		return err;
> +	err = mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_IP_PRI_5, 0xaaaa);
> +	if (err)
> +		return err;
> +	err = mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_IP_PRI_6, 0xffff);
> +	if (err)
> +		return err;
> +	err = mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_IP_PRI_7, 0xffff);
> +	if (err)
> +		return err;
>  
>  	/* Configure the IEEE 802.1p priority mapping register. */
> -	REG_WRITE(REG_GLOBAL, GLOBAL_IEEE_PRI, 0xfa41);
> +	err = mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_IEEE_PRI, 0xfa41);
> +	if (err)
> +		return err;
>  
>  	/* Send all frames with destination addresses matching
>  	 * 01:80:c2:00:00:0x to the CPU port.
>  	 */
> -	REG_WRITE(REG_GLOBAL2, GLOBAL2_MGMT_EN_0X, 0xffff);
> +	err = mv88e6xxx_reg_write(ds, REG_GLOBAL2, GLOBAL2_MGMT_EN_0X, 0xffff);
> +	if (err)
> +		return err;
>  
>  	/* Ignore removed tag data on doubly tagged packets, disable
>  	 * flow control messages, force flow control priority to the
>  	 * highest, and send all special multicast frames to the CPU
>  	 * port at the highest priority.
>  	 */
> -	REG_WRITE(REG_GLOBAL2, GLOBAL2_SWITCH_MGMT,
> -		  0x7 | GLOBAL2_SWITCH_MGMT_RSVD2CPU | 0x70 |
> -		  GLOBAL2_SWITCH_MGMT_FORCE_FLOW_CTRL_PRI);
> +	err = mv88e6xxx_reg_write(ds, REG_GLOBAL2, GLOBAL2_SWITCH_MGMT,
> +				  0x7 | GLOBAL2_SWITCH_MGMT_RSVD2CPU | 0x70 |
> +				  GLOBAL2_SWITCH_MGMT_FORCE_FLOW_CTRL_PRI);
> +	if (err)
> +		return err;
>  
>  	/* Program the DSA routing table. */
>  	for (i = 0; i < 32; i++) {
> @@ -2753,23 +2813,35 @@ int mv88e6xxx_setup_global(struct dsa_switch *ds)
>  		    i != ds->index && i < ds->dst->pd->nr_chips)
>  			nexthop = ds->pd->rtable[i] & 0x1f;
>  
> -		REG_WRITE(REG_GLOBAL2, GLOBAL2_DEVICE_MAPPING,
> -			  GLOBAL2_DEVICE_MAPPING_UPDATE |
> -			  (i << GLOBAL2_DEVICE_MAPPING_TARGET_SHIFT) |
> -			  nexthop);
> +		err = mv88e6xxx_reg_write(
> +			ds, REG_GLOBAL2,
> +			GLOBAL2_DEVICE_MAPPING,
> +			GLOBAL2_DEVICE_MAPPING_UPDATE |
> +			(i << GLOBAL2_DEVICE_MAPPING_TARGET_SHIFT) | nexthop);
> +	if (err)
> +		return err;

Wrong indentation here.

>  	}
>  
>  	/* Clear all trunk masks. */
> -	for (i = 0; i < 8; i++)
> -		REG_WRITE(REG_GLOBAL2, GLOBAL2_TRUNK_MASK,
> -			  0x8000 | (i << GLOBAL2_TRUNK_MASK_NUM_SHIFT) |
> -			  ((1 << ps->num_ports) - 1));
> +	for (i = 0; i < 8; i++) {
> +		err = mv88e6xxx_reg_write(ds, REG_GLOBAL2, GLOBAL2_TRUNK_MASK,
> +					  0x8000 |
> +					  (i << GLOBAL2_TRUNK_MASK_NUM_SHIFT) |
> +					  ((1 << ps->num_ports) - 1));
> +		if (err)
> +			return err;
> +	}
>  
>  	/* Clear all trunk mappings. */
> -	for (i = 0; i < 16; i++)
> -		REG_WRITE(REG_GLOBAL2, GLOBAL2_TRUNK_MAPPING,
> -			  GLOBAL2_TRUNK_MAPPING_UPDATE |
> -			  (i << GLOBAL2_TRUNK_MAPPING_ID_SHIFT));
> +	for (i = 0; i < 16; i++) {
> +		err = mv88e6xxx_reg_write(
> +			ds, REG_GLOBAL2,
> +			GLOBAL2_TRUNK_MAPPING,
> +			GLOBAL2_TRUNK_MAPPING_UPDATE |
> +			(i << GLOBAL2_TRUNK_MAPPING_ID_SHIFT));
> +		if (err)
> +			return err;
> +	}
>  
>  	if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) ||
>  	    mv88e6xxx_6165_family(ds) || mv88e6xxx_6097_family(ds) ||
> @@ -2777,17 +2849,27 @@ int mv88e6xxx_setup_global(struct dsa_switch *ds)
>  		/* Send all frames with destination addresses matching
>  		 * 01:80:c2:00:00:2x to the CPU port.
>  		 */
> -		REG_WRITE(REG_GLOBAL2, GLOBAL2_MGMT_EN_2X, 0xffff);
> +		err = mv88e6xxx_reg_write(ds, REG_GLOBAL2,
> +					  GLOBAL2_MGMT_EN_2X, 0xffff);
> +		if (err)
> +			return err;
>  
>  		/* Initialise cross-chip port VLAN table to reset
>  		 * defaults.
>  		 */
> -		REG_WRITE(REG_GLOBAL2, GLOBAL2_PVT_ADDR, 0x9000);
> +		err = mv88e6xxx_reg_write(ds, REG_GLOBAL2,
> +					  GLOBAL2_PVT_ADDR, 0x9000);
> +		if (err)
> +			return err;
>  
>  		/* Clear the priority override table. */
> -		for (i = 0; i < 16; i++)
> -			REG_WRITE(REG_GLOBAL2, GLOBAL2_PRIO_OVERRIDE,
> -				  0x8000 | (i << 8));
> +		for (i = 0; i < 16; i++) {
> +			err = mv88e6xxx_reg_write(ds, REG_GLOBAL2,
> +						  GLOBAL2_PRIO_OVERRIDE,
> +						  0x8000 | (i << 8));
> +			if (err)
> +				return err;
> +		}
>  	}
>  
>  	if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) ||
> @@ -2798,31 +2880,38 @@ int mv88e6xxx_setup_global(struct dsa_switch *ds)
>  		 * ingress rate limit registers to their initial
>  		 * state.
>  		 */
> -		for (i = 0; i < ps->num_ports; i++)
> -			REG_WRITE(REG_GLOBAL2, GLOBAL2_INGRESS_OP,
> -				  0x9000 | (i << 8));
> +		for (i = 0; i < ps->num_ports; i++) {
> +			err = mv88e6xxx_reg_write(ds, REG_GLOBAL2,
> +						  GLOBAL2_INGRESS_OP,
> +						  0x9000 | (i << 8));
> +			if (err)
> +				return err;
> +		}
>  	}
>  
>  	/* Clear the statistics counters for all ports */
> -	REG_WRITE(REG_GLOBAL, GLOBAL_STATS_OP, GLOBAL_STATS_OP_FLUSH_ALL);
> +	err = mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_STATS_OP,
> +				  GLOBAL_STATS_OP_FLUSH_ALL);
> +	if (err)
> +		return err;
>  
>  	/* Wait for the flush to complete. */
>  	mutex_lock(&ps->smi_mutex);

This function is ugly. As you are rewriting all read/write access in it,
Please put that mutex_lock at its beginning and use the _mv88e6xxx_reg_*
variant with goto unlock statement instead, so we lock/unlock just once.

> -	ret = _mv88e6xxx_stats_wait(ds);
> -	if (ret < 0)
> +	err = _mv88e6xxx_stats_wait(ds);
> +	if (err < 0)
>  		goto unlock;
>  
>  	/* Clear all ATU entries */
> -	ret = _mv88e6xxx_atu_flush(ds, 0, true);
> -	if (ret < 0)
> +	err = _mv88e6xxx_atu_flush(ds, 0, true);
> +	if (err < 0)
>  		goto unlock;
>  
>  	/* Clear all the VTU and STU entries */
> -	ret = _mv88e6xxx_vtu_stu_flush(ds);
> +	err = _mv88e6xxx_vtu_stu_flush(ds);
>  unlock:
>  	mutex_unlock(&ps->smi_mutex);
>  
> -	return ret;
> +	return err;
>  }
>  EXPORT_SYMBOL_GPL(mv88e6xxx_setup_global);
>  
> @@ -2832,13 +2921,19 @@ int mv88e6xxx_switch_reset(struct dsa_switch *ds, bool ppu_active)
>  	u16 is_reset = (ppu_active ? 0x8800 : 0xc800);
>  	struct gpio_desc *gpiod = ps->reset;
>  	unsigned long timeout;
> -	int ret;
> +	int ret, err;
>  	int i;
>  
>  	/* Set all ports to the disabled state. */
>  	for (i = 0; i < ps->num_ports; i++) {
> -		ret = REG_READ(REG_PORT(i), PORT_CONTROL);
> -		REG_WRITE(REG_PORT(i), PORT_CONTROL, ret & 0xfffc);
> +		ret = mv88e6xxx_reg_read(ds, REG_PORT(i), PORT_CONTROL);
> +		if (ret < 0)
> +			return ret;
> +
> +		err = mv88e6xxx_reg_write(ds, REG_PORT(i), PORT_CONTROL,
> +					  ret & 0xfffc);
> +		if (err)
> +			return err;
>  	}
>  
>  	/* Wait for transmit queues to drain. */
> @@ -2857,14 +2952,19 @@ int mv88e6xxx_switch_reset(struct dsa_switch *ds, bool ppu_active)
>  	 * through global registers 0x18 and 0x19.
>  	 */
>  	if (ppu_active)
> -		REG_WRITE(REG_GLOBAL, 0x04, 0xc000);
> +		err = mv88e6xxx_reg_write(ds, REG_GLOBAL, 0x04, 0xc000);
>  	else
> -		REG_WRITE(REG_GLOBAL, 0x04, 0xc400);
> +		err = mv88e6xxx_reg_write(ds, REG_GLOBAL, 0x04, 0xc400);
> +	if (err)
> +		return err;

The ret variable is already there, I'd stick with it instead of adding a
new err variable to the function.

>  
>  	/* Wait up to one second for reset to complete. */
>  	timeout = jiffies + 1 * HZ;
>  	while (time_before(jiffies, timeout)) {
> -		ret = REG_READ(REG_GLOBAL, 0x00);
> +		ret = mv88e6xxx_reg_read(ds, REG_GLOBAL, 0x00);
> +		if (ret < 0)
> +			return ret;
> +
>  		if ((ret & is_reset) == is_reset)
>  			break;
>  		usleep_range(1000, 2000);

This comment is up to you, but as you're adding error conditions to the
function, I'd go with a single mutex_lock/mutex_unlock, use the
_mv88e6xxx_reg_* variants and goto unlock statements here as well.

> diff --git a/drivers/net/dsa/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx.h
> index 739d3ff1bddf..6d1b6207144d 100644
> --- a/drivers/net/dsa/mv88e6xxx.h
> +++ b/drivers/net/dsa/mv88e6xxx.h
> @@ -554,25 +554,6 @@ extern struct dsa_switch_driver mv88e6123_switch_driver;
>  extern struct dsa_switch_driver mv88e6352_switch_driver;
>  extern struct dsa_switch_driver mv88e6171_switch_driver;
>  
> -#define REG_READ(addr, reg)						\
> -	({								\
> -		int __ret;						\
> -									\
> -		__ret = mv88e6xxx_reg_read(ds, addr, reg);		\
> -		if (__ret < 0)						\
> -			return __ret;					\
> -		__ret;							\
> -	})
> -
> -#define REG_WRITE(addr, reg, val)					\
> -	({								\
> -		int __ret;						\
> -									\
> -		__ret = mv88e6xxx_reg_write(ds, addr, reg, val);	\
> -		if (__ret < 0)						\
> -			return __ret;					\
> -	})
> -
>  /**
>   * mv88e6xxx_module_driver() - Helper macro for registering mv88e6xxx drivers
>   *

I'm 200% for this patch. However it is not related to the rest of the
patchset, which I'm not really OK with yet (still reviewing). As this
needs a respin anyway, can you extract and send it as a single patch? It
applies cleanly to net-next once you remove the mv88e6xxx_module_driver
macro. I'll ack it right away.

Thanks,
Vivien

^ permalink raw reply

* Re: [PATCH iproute2 0/4] add MACsec support
From: Phil Sutter @ 2016-04-14 14:59 UTC (permalink / raw)
  To: Sabrina Dubroca; +Cc: netdev, Stephen Hemminger
In-Reply-To: <cover.1460622809.git.sd@queasysnail.net>

On Thu, Apr 14, 2016 at 03:01:06PM +0200, Sabrina Dubroca wrote:
> This series introduces support for MACsec devices, with a new device
> type for `ip link`, and a new `ip macsec` subcommand.
> 
> The first three patches introduce some necessary helper functions.
> 
> Sabrina Dubroca (4):
>   utils: make hexstring_a2n provide the number of hex digits parsed
>   utils: add get_be{16,32,64}, use them where possible
>   utils: provide get_hex to read an hex digit from a char
>   ip: add MACsec support

Apart from the unrelated coding-style fix in patch 1 and the somewhat
intricate man page markup:

Acked-by: Phil Sutter <phil@nwl.cc>

^ permalink raw reply

* Re: Deleting child qdisc doesn't reset parent to default qdisc?
From: Eric Dumazet @ 2016-04-14 15:01 UTC (permalink / raw)
  To: Jiri Kosina; +Cc: Jamal Hadi Salim, netdev, linux-kernel
In-Reply-To: <alpine.LNX.2.00.1604141638230.27368@cbobk.fhfr.pm>

On Thu, 2016-04-14 at 16:44 +0200, Jiri Kosina wrote:
> Hi,
> 
> I've came across the behavior where adding a child qdisc and then deleting 
> it again makes the networking dysfunctional (I guess that's because all of 
> a sudden there is absolutely no working qdisc on the device, although 
> there originally was a default one in the parent).
> 
> In a nutshell, is this expected behavior or bug?

This is the expected behavior.

If the kernel was suddenly doing a 'replace' when you ask a delete,
then the scripts doing a delete , than a add would break.

tc users are skilled admins ;)

^ permalink raw reply

* [PATCH v3 0/5] net: w5100: add support W5100/W5200 for SPI interface
From: Akinobu Mita @ 2016-04-14 15:11 UTC (permalink / raw)
  To: netdev; +Cc: Akinobu Mita, Mike Sinkovsky, David S. Miller

This series add support for Wiznet W5100 and W5200 for SPI interface.

We can easily find the ethernet modules and shield for Arduino with
these chips for purchase.  I've tested them with BeagleBone.

Wiznet W5100 for mmio access has already supported by w5100 driver.

In order to share the code between mmio mode and SPI mode, this series
firstly adds ability to support another register access interface to
the existing w5100 driver.  This ground work also requires to introduce
workqueue and threaded irq because SPI transfers are callable only from
contexts that can sleep unlike mmio access.

The latter part of this series adds w5100-spi driver which actually
support W5100 and W5200 for SPI interface.  Supporting W5100 is
straight forward because it only required to add a register access
interface by the SPI transfer.  W5100 and W5200 have similar memory
map which justifies adding W5200 support to w5100 driver.

* Changes from v2 to v3
- Add comment for reg_lock
- Add ability to allocate ops specific data structure
- Allocate w5200 ops specific data structure to put DMA-safe buffer
- Add missing chip_id assignment for w5100_*_ops

* Changes from v1 to v2
- Use a plain single pointer instead of SKB queue, spotted by David S. Miller
- Correct timeout period in w5100_command
- Use spi_write_then_read instead of spi_write which needs DMA-safe buffer
- Support W5200


Akinobu Mita (5):
  net: w5100: move mmiowb into register access callbacks
  net: w5100: add ability to support other bus interface
  net: w5100: enable to support sleepable register access interface
  net: w5100: support SPI interface mode
  net: w5100: support W5200

 drivers/net/ethernet/wiznet/Kconfig     |  14 +
 drivers/net/ethernet/wiznet/Makefile    |   1 +
 drivers/net/ethernet/wiznet/w5100-spi.c | 300 ++++++++++
 drivers/net/ethernet/wiznet/w5100.c     | 935 ++++++++++++++++++++++----------
 drivers/net/ethernet/wiznet/w5100.h     |  35 ++
 5 files changed, 1004 insertions(+), 281 deletions(-)
 create mode 100644 drivers/net/ethernet/wiznet/w5100-spi.c
 create mode 100644 drivers/net/ethernet/wiznet/w5100.h

Cc: Mike Sinkovsky <msink@permonline.ru>
Cc: David S. Miller <davem@davemloft.net>
-- 
2.5.0

^ permalink raw reply

* [PATCH v3 1/5] net: w5100: move mmiowb into register access callbacks
From: Akinobu Mita @ 2016-04-14 15:11 UTC (permalink / raw)
  To: netdev; +Cc: Akinobu Mita, Mike Sinkovsky, David S. Miller
In-Reply-To: <1460646693-25179-1-git-send-email-akinobu.mita@gmail.com>

Instead of sprinkle mmiowb over the driver code, move it into primary
register write callbacks. (w5100_write, w5100_write16, w5100_writebuf)

This is a preparation for supporting SPI interface which doesn't use
MMIO for accessing w5100 registers.

Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Mike Sinkovsky <msink@permonline.ru>
Cc: David S. Miller <davem@davemloft.net>
---
* No changes from v2

 drivers/net/ethernet/wiznet/w5100.c | 44 +++++++++++++------------------------
 1 file changed, 15 insertions(+), 29 deletions(-)

diff --git a/drivers/net/ethernet/wiznet/w5100.c b/drivers/net/ethernet/wiznet/w5100.c
index 8b282d0..f4b7200 100644
--- a/drivers/net/ethernet/wiznet/w5100.c
+++ b/drivers/net/ethernet/wiznet/w5100.c
@@ -122,10 +122,17 @@ static inline u8 w5100_read_direct(struct w5100_priv *priv, u16 addr)
 	return ioread8(priv->base + (addr << CONFIG_WIZNET_BUS_SHIFT));
 }
 
+static inline void __w5100_write_direct(struct w5100_priv *priv, u16 addr,
+					u8 data)
+{
+	iowrite8(data, priv->base + (addr << CONFIG_WIZNET_BUS_SHIFT));
+}
+
 static inline void w5100_write_direct(struct w5100_priv *priv,
 				      u16 addr, u8 data)
 {
-	iowrite8(data, priv->base + (addr << CONFIG_WIZNET_BUS_SHIFT));
+	__w5100_write_direct(priv, addr, data);
+	mmiowb();
 }
 
 static u16 w5100_read16_direct(struct w5100_priv *priv, u16 addr)
@@ -138,8 +145,9 @@ static u16 w5100_read16_direct(struct w5100_priv *priv, u16 addr)
 
 static void w5100_write16_direct(struct w5100_priv *priv, u16 addr, u16 data)
 {
-	w5100_write_direct(priv, addr, data >> 8);
-	w5100_write_direct(priv, addr + 1, data);
+	__w5100_write_direct(priv, addr, data >> 8);
+	__w5100_write_direct(priv, addr + 1, data);
+	mmiowb();
 }
 
 static void w5100_readbuf_direct(struct w5100_priv *priv,
@@ -164,8 +172,9 @@ static void w5100_writebuf_direct(struct w5100_priv *priv,
 	for (i = 0; i < len; i++, addr++) {
 		if (unlikely(addr > W5100_TX_MEM_END))
 			addr = W5100_TX_MEM_START;
-		w5100_write_direct(priv, addr, *buf++);
+		__w5100_write_direct(priv, addr, *buf++);
 	}
+	mmiowb();
 }
 
 /*
@@ -186,7 +195,6 @@ static u8 w5100_read_indirect(struct w5100_priv *priv, u16 addr)
 
 	spin_lock_irqsave(&priv->reg_lock, flags);
 	w5100_write16_direct(priv, W5100_IDM_AR, addr);
-	mmiowb();
 	data = w5100_read_direct(priv, W5100_IDM_DR);
 	spin_unlock_irqrestore(&priv->reg_lock, flags);
 
@@ -199,9 +207,7 @@ static void w5100_write_indirect(struct w5100_priv *priv, u16 addr, u8 data)
 
 	spin_lock_irqsave(&priv->reg_lock, flags);
 	w5100_write16_direct(priv, W5100_IDM_AR, addr);
-	mmiowb();
 	w5100_write_direct(priv, W5100_IDM_DR, data);
-	mmiowb();
 	spin_unlock_irqrestore(&priv->reg_lock, flags);
 }
 
@@ -212,7 +218,6 @@ static u16 w5100_read16_indirect(struct w5100_priv *priv, u16 addr)
 
 	spin_lock_irqsave(&priv->reg_lock, flags);
 	w5100_write16_direct(priv, W5100_IDM_AR, addr);
-	mmiowb();
 	data  = w5100_read_direct(priv, W5100_IDM_DR) << 8;
 	data |= w5100_read_direct(priv, W5100_IDM_DR);
 	spin_unlock_irqrestore(&priv->reg_lock, flags);
@@ -226,10 +231,8 @@ static void w5100_write16_indirect(struct w5100_priv *priv, u16 addr, u16 data)
 
 	spin_lock_irqsave(&priv->reg_lock, flags);
 	w5100_write16_direct(priv, W5100_IDM_AR, addr);
-	mmiowb();
-	w5100_write_direct(priv, W5100_IDM_DR, data >> 8);
+	__w5100_write_direct(priv, W5100_IDM_DR, data >> 8);
 	w5100_write_direct(priv, W5100_IDM_DR, data);
-	mmiowb();
 	spin_unlock_irqrestore(&priv->reg_lock, flags);
 }
 
@@ -242,13 +245,11 @@ static void w5100_readbuf_indirect(struct w5100_priv *priv,
 
 	spin_lock_irqsave(&priv->reg_lock, flags);
 	w5100_write16_direct(priv, W5100_IDM_AR, addr);
-	mmiowb();
 
 	for (i = 0; i < len; i++, addr++) {
 		if (unlikely(addr > W5100_RX_MEM_END)) {
 			addr = W5100_RX_MEM_START;
 			w5100_write16_direct(priv, W5100_IDM_AR, addr);
-			mmiowb();
 		}
 		*buf++ = w5100_read_direct(priv, W5100_IDM_DR);
 	}
@@ -265,15 +266,13 @@ static void w5100_writebuf_indirect(struct w5100_priv *priv,
 
 	spin_lock_irqsave(&priv->reg_lock, flags);
 	w5100_write16_direct(priv, W5100_IDM_AR, addr);
-	mmiowb();
 
 	for (i = 0; i < len; i++, addr++) {
 		if (unlikely(addr > W5100_TX_MEM_END)) {
 			addr = W5100_TX_MEM_START;
 			w5100_write16_direct(priv, W5100_IDM_AR, addr);
-			mmiowb();
 		}
-		w5100_write_direct(priv, W5100_IDM_DR, *buf++);
+		__w5100_write_direct(priv, W5100_IDM_DR, *buf++);
 	}
 	mmiowb();
 	spin_unlock_irqrestore(&priv->reg_lock, flags);
@@ -309,7 +308,6 @@ static int w5100_command(struct w5100_priv *priv, u16 cmd)
 	unsigned long timeout = jiffies + msecs_to_jiffies(100);
 
 	w5100_write(priv, W5100_S0_CR, cmd);
-	mmiowb();
 
 	while (w5100_read(priv, W5100_S0_CR) != 0) {
 		if (time_after(jiffies, timeout))
@@ -327,18 +325,15 @@ static void w5100_write_macaddr(struct w5100_priv *priv)
 
 	for (i = 0; i < ETH_ALEN; i++)
 		w5100_write(priv, W5100_SHAR + i, ndev->dev_addr[i]);
-	mmiowb();
 }
 
 static void w5100_hw_reset(struct w5100_priv *priv)
 {
 	w5100_write_direct(priv, W5100_MR, MR_RST);
-	mmiowb();
 	mdelay(5);
 	w5100_write_direct(priv, W5100_MR, priv->indirect ?
 				  MR_PB | MR_AI | MR_IND :
 				  MR_PB);
-	mmiowb();
 	w5100_write(priv, W5100_IMR, 0);
 	w5100_write_macaddr(priv);
 
@@ -347,23 +342,19 @@ static void w5100_hw_reset(struct w5100_priv *priv)
 	 */
 	w5100_write(priv, W5100_RMSR, 0x03);
 	w5100_write(priv, W5100_TMSR, 0x03);
-	mmiowb();
 }
 
 static void w5100_hw_start(struct w5100_priv *priv)
 {
 	w5100_write(priv, W5100_S0_MR, priv->promisc ?
 			  S0_MR_MACRAW : S0_MR_MACRAW_MF);
-	mmiowb();
 	w5100_command(priv, S0_CR_OPEN);
 	w5100_write(priv, W5100_IMR, IR_S0);
-	mmiowb();
 }
 
 static void w5100_hw_close(struct w5100_priv *priv)
 {
 	w5100_write(priv, W5100_IMR, 0);
-	mmiowb();
 	w5100_command(priv, S0_CR_CLOSE);
 }
 
@@ -447,7 +438,6 @@ static int w5100_start_tx(struct sk_buff *skb, struct net_device *ndev)
 	offset = w5100_read16(priv, W5100_S0_TX_WR);
 	w5100_writebuf(priv, offset, skb->data, skb->len);
 	w5100_write16(priv, W5100_S0_TX_WR, offset + skb->len);
-	mmiowb();
 	ndev->stats.tx_bytes += skb->len;
 	ndev->stats.tx_packets++;
 	dev_kfree_skb(skb);
@@ -488,7 +478,6 @@ static int w5100_napi_poll(struct napi_struct *napi, int budget)
 		skb_put(skb, rx_len);
 		w5100_readbuf(priv, offset + 2, skb->data, rx_len);
 		w5100_write16(priv, W5100_S0_RX_RD, offset + 2 + rx_len);
-		mmiowb();
 		w5100_command(priv, S0_CR_RECV);
 		skb->protocol = eth_type_trans(skb, ndev);
 
@@ -500,7 +489,6 @@ static int w5100_napi_poll(struct napi_struct *napi, int budget)
 	if (rx_count < budget) {
 		napi_complete(napi);
 		w5100_write(priv, W5100_IMR, IR_S0);
-		mmiowb();
 	}
 
 	return rx_count;
@@ -515,7 +503,6 @@ static irqreturn_t w5100_interrupt(int irq, void *ndev_instance)
 	if (!ir)
 		return IRQ_NONE;
 	w5100_write(priv, W5100_S0_IR, ir);
-	mmiowb();
 
 	if (ir & S0_IR_SENDOK) {
 		netif_dbg(priv, tx_done, ndev, "tx done\n");
@@ -525,7 +512,6 @@ static irqreturn_t w5100_interrupt(int irq, void *ndev_instance)
 	if (ir & S0_IR_RECV) {
 		if (napi_schedule_prep(&priv->napi)) {
 			w5100_write(priv, W5100_IMR, 0);
-			mmiowb();
 			__napi_schedule(&priv->napi);
 		}
 	}
-- 
2.5.0

^ permalink raw reply related

* [PATCH v3 2/5] net: w5100: add ability to support other bus interface
From: Akinobu Mita @ 2016-04-14 15:11 UTC (permalink / raw)
  To: netdev; +Cc: Akinobu Mita, Mike Sinkovsky, David S. Miller
In-Reply-To: <1460646693-25179-1-git-send-email-akinobu.mita@gmail.com>

The w5100 driver currently only supports direct and indirect bus
interface mode which use MMIO space for accessing w5100 registers.

In order to support SPI interface mode which is supported by W5100 chip,
this makes the bus interface abstraction layer more generic so that
separated w5100-spi driver can use w5100 driver as core module.

Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Mike Sinkovsky <msink@permonline.ru>
Cc: David S. Miller <davem@davemloft.net>
---
* v3
- Add comment for reg_lock
- Add ability to allocate ops specific data structure

 drivers/net/ethernet/wiznet/w5100.c | 604 ++++++++++++++++++++++++------------
 drivers/net/ethernet/wiznet/w5100.h |  28 ++
 2 files changed, 431 insertions(+), 201 deletions(-)
 create mode 100644 drivers/net/ethernet/wiznet/w5100.h

diff --git a/drivers/net/ethernet/wiznet/w5100.c b/drivers/net/ethernet/wiznet/w5100.c
index f4b7200..89cba67 100644
--- a/drivers/net/ethernet/wiznet/w5100.c
+++ b/drivers/net/ethernet/wiznet/w5100.c
@@ -27,6 +27,8 @@
 #include <linux/irq.h>
 #include <linux/gpio.h>
 
+#include "w5100.h"
+
 #define DRV_NAME	"w5100"
 #define DRV_VERSION	"2012-04-04"
 
@@ -76,25 +78,16 @@ MODULE_LICENSE("GPL");
 #define W5100_S0_REGS_LEN	0x0040
 
 #define W5100_TX_MEM_START	0x4000
-#define W5100_TX_MEM_END	0x5fff
-#define W5100_TX_MEM_MASK	0x1fff
+#define W5100_TX_MEM_SIZE	0x2000
 #define W5100_RX_MEM_START	0x6000
-#define W5100_RX_MEM_END	0x7fff
-#define W5100_RX_MEM_MASK	0x1fff
+#define W5100_RX_MEM_SIZE	0x2000
 
 /*
  * Device driver private data structure
  */
+
 struct w5100_priv {
-	void __iomem *base;
-	spinlock_t reg_lock;
-	bool indirect;
-	u8   (*read)(struct w5100_priv *priv, u16 addr);
-	void (*write)(struct w5100_priv *priv, u16 addr, u8 data);
-	u16  (*read16)(struct w5100_priv *priv, u16 addr);
-	void (*write16)(struct w5100_priv *priv, u16 addr, u16 data);
-	void (*readbuf)(struct w5100_priv *priv, u16 addr, u8 *buf, int len);
-	void (*writebuf)(struct w5100_priv *priv, u16 addr, u8 *buf, int len);
+	const struct w5100_ops *ops;
 	int irq;
 	int link_irq;
 	int link_gpio;
@@ -111,72 +104,121 @@ struct w5100_priv {
  *
  ***********************************************************************/
 
+struct w5100_mmio_priv {
+	void __iomem *base;
+	/* Serialize access in indirect address mode */
+	spinlock_t reg_lock;
+};
+
+static inline struct w5100_mmio_priv *w5100_mmio_priv(struct net_device *dev)
+{
+	return w5100_ops_priv(dev);
+}
+
+static inline void __iomem *w5100_mmio(struct net_device *ndev)
+{
+	struct w5100_mmio_priv *mmio_priv = w5100_mmio_priv(ndev);
+
+	return mmio_priv->base;
+}
+
 /*
  * In direct address mode host system can directly access W5100 registers
  * after mapping to Memory-Mapped I/O space.
  *
  * 0x8000 bytes are required for memory space.
  */
-static inline u8 w5100_read_direct(struct w5100_priv *priv, u16 addr)
+static inline int w5100_read_direct(struct net_device *ndev, u16 addr)
 {
-	return ioread8(priv->base + (addr << CONFIG_WIZNET_BUS_SHIFT));
+	return ioread8(w5100_mmio(ndev) + (addr << CONFIG_WIZNET_BUS_SHIFT));
 }
 
-static inline void __w5100_write_direct(struct w5100_priv *priv, u16 addr,
-					u8 data)
+static inline int __w5100_write_direct(struct net_device *ndev, u16 addr,
+				       u8 data)
 {
-	iowrite8(data, priv->base + (addr << CONFIG_WIZNET_BUS_SHIFT));
+	iowrite8(data, w5100_mmio(ndev) + (addr << CONFIG_WIZNET_BUS_SHIFT));
+
+	return 0;
 }
 
-static inline void w5100_write_direct(struct w5100_priv *priv,
-				      u16 addr, u8 data)
+static inline int w5100_write_direct(struct net_device *ndev, u16 addr, u8 data)
 {
-	__w5100_write_direct(priv, addr, data);
+	__w5100_write_direct(ndev, addr, data);
 	mmiowb();
+
+	return 0;
 }
 
-static u16 w5100_read16_direct(struct w5100_priv *priv, u16 addr)
+static int w5100_read16_direct(struct net_device *ndev, u16 addr)
 {
 	u16 data;
-	data  = w5100_read_direct(priv, addr) << 8;
-	data |= w5100_read_direct(priv, addr + 1);
+	data  = w5100_read_direct(ndev, addr) << 8;
+	data |= w5100_read_direct(ndev, addr + 1);
 	return data;
 }
 
-static void w5100_write16_direct(struct w5100_priv *priv, u16 addr, u16 data)
+static int w5100_write16_direct(struct net_device *ndev, u16 addr, u16 data)
 {
-	__w5100_write_direct(priv, addr, data >> 8);
-	__w5100_write_direct(priv, addr + 1, data);
+	__w5100_write_direct(ndev, addr, data >> 8);
+	__w5100_write_direct(ndev, addr + 1, data);
 	mmiowb();
+
+	return 0;
 }
 
-static void w5100_readbuf_direct(struct w5100_priv *priv,
-				 u16 offset, u8 *buf, int len)
+static int w5100_readbulk_direct(struct net_device *ndev, u16 addr, u8 *buf,
+				 int len)
 {
-	u16 addr = W5100_RX_MEM_START + (offset & W5100_RX_MEM_MASK);
 	int i;
 
-	for (i = 0; i < len; i++, addr++) {
-		if (unlikely(addr > W5100_RX_MEM_END))
-			addr = W5100_RX_MEM_START;
-		*buf++ = w5100_read_direct(priv, addr);
-	}
+	for (i = 0; i < len; i++, addr++)
+		*buf++ = w5100_read_direct(ndev, addr);
+
+	return 0;
 }
 
-static void w5100_writebuf_direct(struct w5100_priv *priv,
-				  u16 offset, u8 *buf, int len)
+static int w5100_writebulk_direct(struct net_device *ndev, u16 addr,
+				  const u8 *buf, int len)
 {
-	u16 addr = W5100_TX_MEM_START + (offset & W5100_TX_MEM_MASK);
 	int i;
 
-	for (i = 0; i < len; i++, addr++) {
-		if (unlikely(addr > W5100_TX_MEM_END))
-			addr = W5100_TX_MEM_START;
-		__w5100_write_direct(priv, addr, *buf++);
-	}
+	for (i = 0; i < len; i++, addr++)
+		__w5100_write_direct(ndev, addr, *buf++);
+
 	mmiowb();
+
+	return 0;
+}
+
+static int w5100_mmio_init(struct net_device *ndev)
+{
+	struct platform_device *pdev = to_platform_device(ndev->dev.parent);
+	struct w5100_priv *priv = netdev_priv(ndev);
+	struct w5100_mmio_priv *mmio_priv = w5100_mmio_priv(ndev);
+	struct resource *mem;
+
+	spin_lock_init(&mmio_priv->reg_lock);
+
+	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	mmio_priv->base = devm_ioremap_resource(&pdev->dev, mem);
+	if (IS_ERR(mmio_priv->base))
+		return PTR_ERR(mmio_priv->base);
+
+	netdev_info(ndev, "at 0x%llx irq %d\n", (u64)mem->start, priv->irq);
+
+	return 0;
 }
 
+static const struct w5100_ops w5100_mmio_direct_ops = {
+	.read = w5100_read_direct,
+	.write = w5100_write_direct,
+	.read16 = w5100_read16_direct,
+	.write16 = w5100_write16_direct,
+	.readbulk = w5100_readbulk_direct,
+	.writebulk = w5100_writebulk_direct,
+	.init = w5100_mmio_init,
+};
+
 /*
  * In indirect address mode host system indirectly accesses registers by
  * using Indirect Mode Address Register (IDM_AR) and Indirect Mode Data
@@ -188,121 +230,276 @@ static void w5100_writebuf_direct(struct w5100_priv *priv,
 #define W5100_IDM_AR		0x01   /* Indirect Mode Address Register */
 #define W5100_IDM_DR		0x03   /* Indirect Mode Data Register */
 
-static u8 w5100_read_indirect(struct w5100_priv *priv, u16 addr)
+static int w5100_read_indirect(struct net_device *ndev, u16 addr)
 {
+	struct w5100_mmio_priv *mmio_priv = w5100_mmio_priv(ndev);
 	unsigned long flags;
 	u8 data;
 
-	spin_lock_irqsave(&priv->reg_lock, flags);
-	w5100_write16_direct(priv, W5100_IDM_AR, addr);
-	data = w5100_read_direct(priv, W5100_IDM_DR);
-	spin_unlock_irqrestore(&priv->reg_lock, flags);
+	spin_lock_irqsave(&mmio_priv->reg_lock, flags);
+	w5100_write16_direct(ndev, W5100_IDM_AR, addr);
+	data = w5100_read_direct(ndev, W5100_IDM_DR);
+	spin_unlock_irqrestore(&mmio_priv->reg_lock, flags);
 
 	return data;
 }
 
-static void w5100_write_indirect(struct w5100_priv *priv, u16 addr, u8 data)
+static int w5100_write_indirect(struct net_device *ndev, u16 addr, u8 data)
 {
+	struct w5100_mmio_priv *mmio_priv = w5100_mmio_priv(ndev);
 	unsigned long flags;
 
-	spin_lock_irqsave(&priv->reg_lock, flags);
-	w5100_write16_direct(priv, W5100_IDM_AR, addr);
-	w5100_write_direct(priv, W5100_IDM_DR, data);
-	spin_unlock_irqrestore(&priv->reg_lock, flags);
+	spin_lock_irqsave(&mmio_priv->reg_lock, flags);
+	w5100_write16_direct(ndev, W5100_IDM_AR, addr);
+	w5100_write_direct(ndev, W5100_IDM_DR, data);
+	spin_unlock_irqrestore(&mmio_priv->reg_lock, flags);
+
+	return 0;
 }
 
-static u16 w5100_read16_indirect(struct w5100_priv *priv, u16 addr)
+static int w5100_read16_indirect(struct net_device *ndev, u16 addr)
 {
+	struct w5100_mmio_priv *mmio_priv = w5100_mmio_priv(ndev);
 	unsigned long flags;
 	u16 data;
 
-	spin_lock_irqsave(&priv->reg_lock, flags);
-	w5100_write16_direct(priv, W5100_IDM_AR, addr);
-	data  = w5100_read_direct(priv, W5100_IDM_DR) << 8;
-	data |= w5100_read_direct(priv, W5100_IDM_DR);
-	spin_unlock_irqrestore(&priv->reg_lock, flags);
+	spin_lock_irqsave(&mmio_priv->reg_lock, flags);
+	w5100_write16_direct(ndev, W5100_IDM_AR, addr);
+	data  = w5100_read_direct(ndev, W5100_IDM_DR) << 8;
+	data |= w5100_read_direct(ndev, W5100_IDM_DR);
+	spin_unlock_irqrestore(&mmio_priv->reg_lock, flags);
 
 	return data;
 }
 
-static void w5100_write16_indirect(struct w5100_priv *priv, u16 addr, u16 data)
+static int w5100_write16_indirect(struct net_device *ndev, u16 addr, u16 data)
 {
+	struct w5100_mmio_priv *mmio_priv = w5100_mmio_priv(ndev);
 	unsigned long flags;
 
-	spin_lock_irqsave(&priv->reg_lock, flags);
-	w5100_write16_direct(priv, W5100_IDM_AR, addr);
-	__w5100_write_direct(priv, W5100_IDM_DR, data >> 8);
-	w5100_write_direct(priv, W5100_IDM_DR, data);
-	spin_unlock_irqrestore(&priv->reg_lock, flags);
+	spin_lock_irqsave(&mmio_priv->reg_lock, flags);
+	w5100_write16_direct(ndev, W5100_IDM_AR, addr);
+	__w5100_write_direct(ndev, W5100_IDM_DR, data >> 8);
+	w5100_write_direct(ndev, W5100_IDM_DR, data);
+	spin_unlock_irqrestore(&mmio_priv->reg_lock, flags);
+
+	return 0;
 }
 
-static void w5100_readbuf_indirect(struct w5100_priv *priv,
-				   u16 offset, u8 *buf, int len)
+static int w5100_readbulk_indirect(struct net_device *ndev, u16 addr, u8 *buf,
+				   int len)
 {
-	u16 addr = W5100_RX_MEM_START + (offset & W5100_RX_MEM_MASK);
+	struct w5100_mmio_priv *mmio_priv = w5100_mmio_priv(ndev);
 	unsigned long flags;
 	int i;
 
-	spin_lock_irqsave(&priv->reg_lock, flags);
-	w5100_write16_direct(priv, W5100_IDM_AR, addr);
+	spin_lock_irqsave(&mmio_priv->reg_lock, flags);
+	w5100_write16_direct(ndev, W5100_IDM_AR, addr);
+
+	for (i = 0; i < len; i++)
+		*buf++ = w5100_read_direct(ndev, W5100_IDM_DR);
 
-	for (i = 0; i < len; i++, addr++) {
-		if (unlikely(addr > W5100_RX_MEM_END)) {
-			addr = W5100_RX_MEM_START;
-			w5100_write16_direct(priv, W5100_IDM_AR, addr);
-		}
-		*buf++ = w5100_read_direct(priv, W5100_IDM_DR);
-	}
 	mmiowb();
-	spin_unlock_irqrestore(&priv->reg_lock, flags);
+	spin_unlock_irqrestore(&mmio_priv->reg_lock, flags);
+
+	return 0;
 }
 
-static void w5100_writebuf_indirect(struct w5100_priv *priv,
-				    u16 offset, u8 *buf, int len)
+static int w5100_writebulk_indirect(struct net_device *ndev, u16 addr,
+				    const u8 *buf, int len)
 {
-	u16 addr = W5100_TX_MEM_START + (offset & W5100_TX_MEM_MASK);
+	struct w5100_mmio_priv *mmio_priv = w5100_mmio_priv(ndev);
 	unsigned long flags;
 	int i;
 
-	spin_lock_irqsave(&priv->reg_lock, flags);
-	w5100_write16_direct(priv, W5100_IDM_AR, addr);
+	spin_lock_irqsave(&mmio_priv->reg_lock, flags);
+	w5100_write16_direct(ndev, W5100_IDM_AR, addr);
+
+	for (i = 0; i < len; i++)
+		__w5100_write_direct(ndev, W5100_IDM_DR, *buf++);
 
-	for (i = 0; i < len; i++, addr++) {
-		if (unlikely(addr > W5100_TX_MEM_END)) {
-			addr = W5100_TX_MEM_START;
-			w5100_write16_direct(priv, W5100_IDM_AR, addr);
-		}
-		__w5100_write_direct(priv, W5100_IDM_DR, *buf++);
-	}
 	mmiowb();
-	spin_unlock_irqrestore(&priv->reg_lock, flags);
+	spin_unlock_irqrestore(&mmio_priv->reg_lock, flags);
+
+	return 0;
+}
+
+static int w5100_reset_indirect(struct net_device *ndev)
+{
+	w5100_write_direct(ndev, W5100_MR, MR_RST);
+	mdelay(5);
+	w5100_write_direct(ndev, W5100_MR, MR_PB | MR_AI | MR_IND);
+
+	return 0;
 }
 
+static const struct w5100_ops w5100_mmio_indirect_ops = {
+	.read = w5100_read_indirect,
+	.write = w5100_write_indirect,
+	.read16 = w5100_read16_indirect,
+	.write16 = w5100_write16_indirect,
+	.readbulk = w5100_readbulk_indirect,
+	.writebulk = w5100_writebulk_indirect,
+	.init = w5100_mmio_init,
+	.reset = w5100_reset_indirect,
+};
+
 #if defined(CONFIG_WIZNET_BUS_DIRECT)
-#define w5100_read	w5100_read_direct
-#define w5100_write	w5100_write_direct
-#define w5100_read16	w5100_read16_direct
-#define w5100_write16	w5100_write16_direct
-#define w5100_readbuf	w5100_readbuf_direct
-#define w5100_writebuf	w5100_writebuf_direct
+
+static int w5100_read(struct w5100_priv *priv, u16 addr)
+{
+	return w5100_read_direct(priv->ndev, addr);
+}
+
+static int w5100_write(struct w5100_priv *priv, u16 addr, u8 data)
+{
+	return w5100_write_direct(priv->ndev, addr, data);
+}
+
+static int w5100_read16(struct w5100_priv *priv, u16 addr)
+{
+	return w5100_read16_direct(priv->ndev, addr);
+}
+
+static int w5100_write16(struct w5100_priv *priv, u16 addr, u16 data)
+{
+	return w5100_write16_direct(priv->ndev, addr, data);
+}
+
+static int w5100_readbulk(struct w5100_priv *priv, u16 addr, u8 *buf, int len)
+{
+	return w5100_readbulk_direct(priv->ndev, addr, buf, len);
+}
+
+static int w5100_writebulk(struct w5100_priv *priv, u16 addr, const u8 *buf,
+			   int len)
+{
+	return w5100_writebulk_direct(priv->ndev, addr, buf, len);
+}
 
 #elif defined(CONFIG_WIZNET_BUS_INDIRECT)
-#define w5100_read	w5100_read_indirect
-#define w5100_write	w5100_write_indirect
-#define w5100_read16	w5100_read16_indirect
-#define w5100_write16	w5100_write16_indirect
-#define w5100_readbuf	w5100_readbuf_indirect
-#define w5100_writebuf	w5100_writebuf_indirect
+
+static int w5100_read(struct w5100_priv *priv, u16 addr)
+{
+	return w5100_read_indirect(priv->ndev, addr);
+}
+
+static int w5100_write(struct w5100_priv *priv, u16 addr, u8 data)
+{
+	return w5100_write_indirect(priv->ndev, addr, data);
+}
+
+static int w5100_read16(struct w5100_priv *priv, u16 addr)
+{
+	return w5100_read16_indirect(priv->ndev, addr);
+}
+
+static int w5100_write16(struct w5100_priv *priv, u16 addr, u16 data)
+{
+	return w5100_write16_indirect(priv->ndev, addr, data);
+}
+
+static int w5100_readbulk(struct w5100_priv *priv, u16 addr, u8 *buf, int len)
+{
+	return w5100_readbulk_indirect(priv->ndev, addr, buf, len);
+}
+
+static int w5100_writebulk(struct w5100_priv *priv, u16 addr, const u8 *buf,
+			   int len)
+{
+	return w5100_writebulk_indirect(priv->ndev, addr, buf, len);
+}
 
 #else /* CONFIG_WIZNET_BUS_ANY */
-#define w5100_read	priv->read
-#define w5100_write	priv->write
-#define w5100_read16	priv->read16
-#define w5100_write16	priv->write16
-#define w5100_readbuf	priv->readbuf
-#define w5100_writebuf	priv->writebuf
+
+static int w5100_read(struct w5100_priv *priv, u16 addr)
+{
+	return priv->ops->read(priv->ndev, addr);
+}
+
+static int w5100_write(struct w5100_priv *priv, u16 addr, u8 data)
+{
+	return priv->ops->write(priv->ndev, addr, data);
+}
+
+static int w5100_read16(struct w5100_priv *priv, u16 addr)
+{
+	return priv->ops->read16(priv->ndev, addr);
+}
+
+static int w5100_write16(struct w5100_priv *priv, u16 addr, u16 data)
+{
+	return priv->ops->write16(priv->ndev, addr, data);
+}
+
+static int w5100_readbulk(struct w5100_priv *priv, u16 addr, u8 *buf, int len)
+{
+	return priv->ops->readbulk(priv->ndev, addr, buf, len);
+}
+
+static int w5100_writebulk(struct w5100_priv *priv, u16 addr, const u8 *buf,
+			   int len)
+{
+	return priv->ops->writebulk(priv->ndev, addr, buf, len);
+}
+
 #endif
 
+static int w5100_readbuf(struct w5100_priv *priv, u16 offset, u8 *buf, int len)
+{
+	u16 addr;
+	int remain = 0;
+	int ret;
+
+	offset %= W5100_RX_MEM_SIZE;
+	addr = W5100_RX_MEM_START + offset;
+
+	if (offset + len > W5100_RX_MEM_SIZE) {
+		remain = (offset + len) % W5100_RX_MEM_SIZE;
+		len = W5100_RX_MEM_SIZE - offset;
+	}
+
+	ret = w5100_readbulk(priv, addr, buf, len);
+	if (ret || !remain)
+		return ret;
+
+	return w5100_readbulk(priv, W5100_RX_MEM_START, buf + len, remain);
+}
+
+static int w5100_writebuf(struct w5100_priv *priv, u16 offset, const u8 *buf,
+			  int len)
+{
+	u16 addr;
+	int ret;
+	int remain = 0;
+
+	offset %= W5100_TX_MEM_SIZE;
+	addr = W5100_TX_MEM_START + offset;
+
+	if (offset + len > W5100_TX_MEM_SIZE) {
+		remain = (offset + len) % W5100_TX_MEM_SIZE;
+		len = W5100_TX_MEM_SIZE - offset;
+	}
+
+	ret = w5100_writebulk(priv, addr, buf, len);
+	if (ret || !remain)
+		return ret;
+
+	return w5100_writebulk(priv, W5100_TX_MEM_START, buf + len, remain);
+}
+
+static int w5100_reset(struct w5100_priv *priv)
+{
+	if (priv->ops->reset)
+		return priv->ops->reset(priv->ndev);
+
+	w5100_write(priv, W5100_MR, MR_RST);
+	mdelay(5);
+	w5100_write(priv, W5100_MR, MR_PB);
+
+	return 0;
+}
+
 static int w5100_command(struct w5100_priv *priv, u16 cmd)
 {
 	unsigned long timeout = jiffies + msecs_to_jiffies(100);
@@ -321,19 +518,14 @@ static int w5100_command(struct w5100_priv *priv, u16 cmd)
 static void w5100_write_macaddr(struct w5100_priv *priv)
 {
 	struct net_device *ndev = priv->ndev;
-	int i;
 
-	for (i = 0; i < ETH_ALEN; i++)
-		w5100_write(priv, W5100_SHAR + i, ndev->dev_addr[i]);
+	w5100_writebulk(priv, W5100_SHAR, ndev->dev_addr, ETH_ALEN);
 }
 
 static void w5100_hw_reset(struct w5100_priv *priv)
 {
-	w5100_write_direct(priv, W5100_MR, MR_RST);
-	mdelay(5);
-	w5100_write_direct(priv, W5100_MR, priv->indirect ?
-				  MR_PB | MR_AI | MR_IND :
-				  MR_PB);
+	w5100_reset(priv);
+
 	w5100_write(priv, W5100_IMR, 0);
 	w5100_write_macaddr(priv);
 
@@ -403,17 +595,14 @@ static int w5100_get_regs_len(struct net_device *ndev)
 }
 
 static void w5100_get_regs(struct net_device *ndev,
-			   struct ethtool_regs *regs, void *_buf)
+			   struct ethtool_regs *regs, void *buf)
 {
 	struct w5100_priv *priv = netdev_priv(ndev);
-	u8 *buf = _buf;
-	u16 i;
 
 	regs->version = 1;
-	for (i = 0; i < W5100_COMMON_REGS_LEN; i++)
-		*buf++ = w5100_read(priv, W5100_COMMON_REGS + i);
-	for (i = 0; i < W5100_S0_REGS_LEN; i++)
-		*buf++ = w5100_read(priv, W5100_S0_REGS + i);
+	w5100_readbulk(priv, W5100_COMMON_REGS, buf, W5100_COMMON_REGS_LEN);
+	buf += W5100_COMMON_REGS_LEN;
+	w5100_readbulk(priv, W5100_S0_REGS, buf, W5100_S0_REGS_LEN);
 }
 
 static void w5100_tx_timeout(struct net_device *ndev)
@@ -606,91 +795,68 @@ static const struct net_device_ops w5100_netdev_ops = {
 	.ndo_change_mtu		= eth_change_mtu,
 };
 
-static int w5100_hw_probe(struct platform_device *pdev)
+static int w5100_mmio_probe(struct platform_device *pdev)
 {
 	struct wiznet_platform_data *data = dev_get_platdata(&pdev->dev);
-	struct net_device *ndev = platform_get_drvdata(pdev);
-	struct w5100_priv *priv = netdev_priv(ndev);
-	const char *name = netdev_name(ndev);
+	u8 *mac_addr = NULL;
 	struct resource *mem;
-	int mem_size;
+	const struct w5100_ops *ops;
 	int irq;
-	int ret;
 
-	if (data && is_valid_ether_addr(data->mac_addr)) {
-		memcpy(ndev->dev_addr, data->mac_addr, ETH_ALEN);
-	} else {
-		eth_hw_addr_random(ndev);
-	}
+	if (data && is_valid_ether_addr(data->mac_addr))
+		mac_addr = data->mac_addr;
 
 	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	priv->base = devm_ioremap_resource(&pdev->dev, mem);
-	if (IS_ERR(priv->base))
-		return PTR_ERR(priv->base);
-
-	mem_size = resource_size(mem);
-
-	spin_lock_init(&priv->reg_lock);
-	priv->indirect = mem_size < W5100_BUS_DIRECT_SIZE;
-	if (priv->indirect) {
-		priv->read     = w5100_read_indirect;
-		priv->write    = w5100_write_indirect;
-		priv->read16   = w5100_read16_indirect;
-		priv->write16  = w5100_write16_indirect;
-		priv->readbuf  = w5100_readbuf_indirect;
-		priv->writebuf = w5100_writebuf_indirect;
-	} else {
-		priv->read     = w5100_read_direct;
-		priv->write    = w5100_write_direct;
-		priv->read16   = w5100_read16_direct;
-		priv->write16  = w5100_write16_direct;
-		priv->readbuf  = w5100_readbuf_direct;
-		priv->writebuf = w5100_writebuf_direct;
-	}
-
-	w5100_hw_reset(priv);
-	if (w5100_read16(priv, W5100_RTR) != RTR_DEFAULT)
-		return -ENODEV;
+	if (resource_size(mem) < W5100_BUS_DIRECT_SIZE)
+		ops = &w5100_mmio_indirect_ops;
+	else
+		ops = &w5100_mmio_direct_ops;
 
 	irq = platform_get_irq(pdev, 0);
 	if (irq < 0)
 		return irq;
-	ret = request_irq(irq, w5100_interrupt,
-			  IRQ_TYPE_LEVEL_LOW, name, ndev);
-	if (ret < 0)
-		return ret;
-	priv->irq = irq;
 
-	priv->link_gpio = data ? data->link_gpio : -EINVAL;
-	if (gpio_is_valid(priv->link_gpio)) {
-		char *link_name = devm_kzalloc(&pdev->dev, 16, GFP_KERNEL);
-		if (!link_name)
-			return -ENOMEM;
-		snprintf(link_name, 16, "%s-link", name);
-		priv->link_irq = gpio_to_irq(priv->link_gpio);
-		if (request_any_context_irq(priv->link_irq, w5100_detect_link,
-				IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING,
-				link_name, priv->ndev) < 0)
-			priv->link_gpio = -EINVAL;
-	}
+	return w5100_probe(&pdev->dev, ops, sizeof(struct w5100_mmio_priv),
+			   mac_addr, irq, data ? data->link_gpio : -EINVAL);
+}
 
-	netdev_info(ndev, "at 0x%llx irq %d\n", (u64)mem->start, irq);
-	return 0;
+static int w5100_mmio_remove(struct platform_device *pdev)
+{
+	return w5100_remove(&pdev->dev);
 }
 
-static int w5100_probe(struct platform_device *pdev)
+void *w5100_ops_priv(const struct net_device *ndev)
+{
+	return netdev_priv(ndev) +
+	       ALIGN(sizeof(struct w5100_priv), NETDEV_ALIGN);
+}
+EXPORT_SYMBOL_GPL(w5100_ops_priv);
+
+int w5100_probe(struct device *dev, const struct w5100_ops *ops,
+		int sizeof_ops_priv, u8 *mac_addr, int irq, int link_gpio)
 {
 	struct w5100_priv *priv;
 	struct net_device *ndev;
 	int err;
+	size_t alloc_size;
+
+	alloc_size = sizeof(*priv);
+	if (sizeof_ops_priv) {
+		alloc_size = ALIGN(alloc_size, NETDEV_ALIGN);
+		alloc_size += sizeof_ops_priv;
+	}
+	alloc_size += NETDEV_ALIGN - 1;
 
-	ndev = alloc_etherdev(sizeof(*priv));
+	ndev = alloc_etherdev(alloc_size);
 	if (!ndev)
 		return -ENOMEM;
-	SET_NETDEV_DEV(ndev, &pdev->dev);
-	platform_set_drvdata(pdev, ndev);
+	SET_NETDEV_DEV(ndev, dev);
+	dev_set_drvdata(dev, ndev);
 	priv = netdev_priv(ndev);
 	priv->ndev = ndev;
+	priv->ops = ops;
+	priv->irq = irq;
+	priv->link_gpio = link_gpio;
 
 	ndev->netdev_ops = &w5100_netdev_ops;
 	ndev->ethtool_ops = &w5100_ethtool_ops;
@@ -706,22 +872,59 @@ static int w5100_probe(struct platform_device *pdev)
 	if (err < 0)
 		goto err_register;
 
-	err = w5100_hw_probe(pdev);
-	if (err < 0)
-		goto err_hw_probe;
+	if (mac_addr)
+		memcpy(ndev->dev_addr, mac_addr, ETH_ALEN);
+	else
+		eth_hw_addr_random(ndev);
+
+	if (priv->ops->init) {
+		err = priv->ops->init(priv->ndev);
+		if (err)
+			goto err_hw;
+	}
+
+	w5100_hw_reset(priv);
+	if (w5100_read16(priv, W5100_RTR) != RTR_DEFAULT) {
+		err = -ENODEV;
+		goto err_hw;
+	}
+
+	err = request_irq(priv->irq, w5100_interrupt, IRQF_TRIGGER_LOW,
+			  netdev_name(ndev), ndev);
+	if (err)
+		goto err_hw;
+
+	if (gpio_is_valid(priv->link_gpio)) {
+		char *link_name = devm_kzalloc(dev, 16, GFP_KERNEL);
+
+		if (!link_name) {
+			err = -ENOMEM;
+			goto err_gpio;
+		}
+		snprintf(link_name, 16, "%s-link", netdev_name(ndev));
+		priv->link_irq = gpio_to_irq(priv->link_gpio);
+		if (request_any_context_irq(priv->link_irq, w5100_detect_link,
+					    IRQF_TRIGGER_RISING |
+					    IRQF_TRIGGER_FALLING,
+					    link_name, priv->ndev) < 0)
+			priv->link_gpio = -EINVAL;
+	}
 
 	return 0;
 
-err_hw_probe:
+err_gpio:
+	free_irq(priv->irq, ndev);
+err_hw:
 	unregister_netdev(ndev);
 err_register:
 	free_netdev(ndev);
 	return err;
 }
+EXPORT_SYMBOL_GPL(w5100_probe);
 
-static int w5100_remove(struct platform_device *pdev)
+int w5100_remove(struct device *dev)
 {
-	struct net_device *ndev = platform_get_drvdata(pdev);
+	struct net_device *ndev = dev_get_drvdata(dev);
 	struct w5100_priv *priv = netdev_priv(ndev);
 
 	w5100_hw_reset(priv);
@@ -733,12 +936,12 @@ static int w5100_remove(struct platform_device *pdev)
 	free_netdev(ndev);
 	return 0;
 }
+EXPORT_SYMBOL_GPL(w5100_remove);
 
 #ifdef CONFIG_PM_SLEEP
 static int w5100_suspend(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct net_device *ndev = platform_get_drvdata(pdev);
+	struct net_device *ndev = dev_get_drvdata(dev);
 	struct w5100_priv *priv = netdev_priv(ndev);
 
 	if (netif_running(ndev)) {
@@ -752,8 +955,7 @@ static int w5100_suspend(struct device *dev)
 
 static int w5100_resume(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct net_device *ndev = platform_get_drvdata(pdev);
+	struct net_device *ndev = dev_get_drvdata(dev);
 	struct w5100_priv *priv = netdev_priv(ndev);
 
 	if (netif_running(ndev)) {
@@ -769,15 +971,15 @@ static int w5100_resume(struct device *dev)
 }
 #endif /* CONFIG_PM_SLEEP */
 
-static SIMPLE_DEV_PM_OPS(w5100_pm_ops, w5100_suspend, w5100_resume);
+SIMPLE_DEV_PM_OPS(w5100_pm_ops, w5100_suspend, w5100_resume);
+EXPORT_SYMBOL_GPL(w5100_pm_ops);
 
-static struct platform_driver w5100_driver = {
+static struct platform_driver w5100_mmio_driver = {
 	.driver		= {
 		.name	= DRV_NAME,
 		.pm	= &w5100_pm_ops,
 	},
-	.probe		= w5100_probe,
-	.remove		= w5100_remove,
+	.probe		= w5100_mmio_probe,
+	.remove		= w5100_mmio_remove,
 };
-
-module_platform_driver(w5100_driver);
+module_platform_driver(w5100_mmio_driver);
diff --git a/drivers/net/ethernet/wiznet/w5100.h b/drivers/net/ethernet/wiznet/w5100.h
new file mode 100644
index 0000000..39d452d8
--- /dev/null
+++ b/drivers/net/ethernet/wiznet/w5100.h
@@ -0,0 +1,28 @@
+/*
+ * Ethernet driver for the WIZnet W5100 chip.
+ *
+ * Copyright (C) 2006-2008 WIZnet Co.,Ltd.
+ * Copyright (C) 2012 Mike Sinkovsky <msink@permonline.ru>
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+struct w5100_ops {
+	int (*read)(struct net_device *ndev, u16 addr);
+	int (*write)(struct net_device *ndev, u16 addr, u8 data);
+	int (*read16)(struct net_device *ndev, u16 addr);
+	int (*write16)(struct net_device *ndev, u16 addr, u16 data);
+	int (*readbulk)(struct net_device *ndev, u16 addr, u8 *buf, int len);
+	int (*writebulk)(struct net_device *ndev, u16 addr, const u8 *buf,
+			 int len);
+	int (*reset)(struct net_device *ndev);
+	int (*init)(struct net_device *ndev);
+};
+
+void *w5100_ops_priv(const struct net_device *ndev);
+
+int w5100_probe(struct device *dev, const struct w5100_ops *ops,
+		int sizeof_ops_priv, u8 *mac_addr, int irq, int link_gpio);
+int w5100_remove(struct device *dev);
+
+extern const struct dev_pm_ops w5100_pm_ops;
-- 
2.5.0

^ permalink raw reply related

* [PATCH v3 3/5] net: w5100: enable to support sleepable register access interface
From: Akinobu Mita @ 2016-04-14 15:11 UTC (permalink / raw)
  To: netdev; +Cc: Akinobu Mita, Mike Sinkovsky, David S. Miller
In-Reply-To: <1460646693-25179-1-git-send-email-akinobu.mita@gmail.com>

SPI transfer routines are callable only from contexts that can sleep.

This adds ability to tell the core driver that the interface mode
cannot access w5100 register on atomic contexts.  In this case,
workqueue and threaded irq are required.

This also corrects timeout period waiting for command register to be
automatically cleared because the latency of the register access with
SPI transfer can be interfered by other contexts.

Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Mike Sinkovsky <msink@permonline.ru>
Cc: David S. Miller <davem@davemloft.net>
---
* No changes from v2

 drivers/net/ethernet/wiznet/w5100.c | 190 ++++++++++++++++++++++++++++--------
 drivers/net/ethernet/wiznet/w5100.h |   1 +
 2 files changed, 153 insertions(+), 38 deletions(-)

diff --git a/drivers/net/ethernet/wiznet/w5100.c b/drivers/net/ethernet/wiznet/w5100.c
index 89cba67..42a9de4 100644
--- a/drivers/net/ethernet/wiznet/w5100.c
+++ b/drivers/net/ethernet/wiznet/w5100.c
@@ -96,6 +96,13 @@ struct w5100_priv {
 	struct net_device *ndev;
 	bool promisc;
 	u32 msg_enable;
+
+	struct workqueue_struct *xfer_wq;
+	struct work_struct rx_work;
+	struct sk_buff *tx_skb;
+	struct work_struct tx_work;
+	struct work_struct setrx_work;
+	struct work_struct restart_work;
 };
 
 /************************************************************************
@@ -502,10 +509,12 @@ static int w5100_reset(struct w5100_priv *priv)
 
 static int w5100_command(struct w5100_priv *priv, u16 cmd)
 {
-	unsigned long timeout = jiffies + msecs_to_jiffies(100);
+	unsigned long timeout;
 
 	w5100_write(priv, W5100_S0_CR, cmd);
 
+	timeout = jiffies + msecs_to_jiffies(100);
+
 	while (w5100_read(priv, W5100_S0_CR) != 0) {
 		if (time_after(jiffies, timeout))
 			return -EIO;
@@ -605,7 +614,7 @@ static void w5100_get_regs(struct net_device *ndev,
 	w5100_readbulk(priv, W5100_S0_REGS, buf, W5100_S0_REGS_LEN);
 }
 
-static void w5100_tx_timeout(struct net_device *ndev)
+static void w5100_restart(struct net_device *ndev)
 {
 	struct w5100_priv *priv = netdev_priv(ndev);
 
@@ -617,12 +626,28 @@ static void w5100_tx_timeout(struct net_device *ndev)
 	netif_wake_queue(ndev);
 }
 
-static int w5100_start_tx(struct sk_buff *skb, struct net_device *ndev)
+static void w5100_restart_work(struct work_struct *work)
+{
+	struct w5100_priv *priv = container_of(work, struct w5100_priv,
+					       restart_work);
+
+	w5100_restart(priv->ndev);
+}
+
+static void w5100_tx_timeout(struct net_device *ndev)
 {
 	struct w5100_priv *priv = netdev_priv(ndev);
-	u16 offset;
 
-	netif_stop_queue(ndev);
+	if (priv->ops->may_sleep)
+		schedule_work(&priv->restart_work);
+	else
+		w5100_restart(ndev);
+}
+
+static void w5100_tx_skb(struct net_device *ndev, struct sk_buff *skb)
+{
+	struct w5100_priv *priv = netdev_priv(ndev);
+	u16 offset;
 
 	offset = w5100_read16(priv, W5100_S0_TX_WR);
 	w5100_writebuf(priv, offset, skb->data, skb->len);
@@ -632,47 +657,98 @@ static int w5100_start_tx(struct sk_buff *skb, struct net_device *ndev)
 	dev_kfree_skb(skb);
 
 	w5100_command(priv, S0_CR_SEND);
+}
+
+static void w5100_tx_work(struct work_struct *work)
+{
+	struct w5100_priv *priv = container_of(work, struct w5100_priv,
+					       tx_work);
+	struct sk_buff *skb = priv->tx_skb;
+
+	priv->tx_skb = NULL;
+
+	if (WARN_ON(!skb))
+		return;
+	w5100_tx_skb(priv->ndev, skb);
+}
+
+static int w5100_start_tx(struct sk_buff *skb, struct net_device *ndev)
+{
+	struct w5100_priv *priv = netdev_priv(ndev);
+
+	netif_stop_queue(ndev);
+
+	if (priv->ops->may_sleep) {
+		WARN_ON(priv->tx_skb);
+		priv->tx_skb = skb;
+		queue_work(priv->xfer_wq, &priv->tx_work);
+	} else {
+		w5100_tx_skb(ndev, skb);
+	}
 
 	return NETDEV_TX_OK;
 }
 
-static int w5100_napi_poll(struct napi_struct *napi, int budget)
+static struct sk_buff *w5100_rx_skb(struct net_device *ndev)
 {
-	struct w5100_priv *priv = container_of(napi, struct w5100_priv, napi);
-	struct net_device *ndev = priv->ndev;
+	struct w5100_priv *priv = netdev_priv(ndev);
 	struct sk_buff *skb;
-	int rx_count;
 	u16 rx_len;
 	u16 offset;
 	u8 header[2];
+	u16 rx_buf_len = w5100_read16(priv, W5100_S0_RX_RSR);
 
-	for (rx_count = 0; rx_count < budget; rx_count++) {
-		u16 rx_buf_len = w5100_read16(priv, W5100_S0_RX_RSR);
-		if (rx_buf_len == 0)
-			break;
+	if (rx_buf_len == 0)
+		return NULL;
 
-		offset = w5100_read16(priv, W5100_S0_RX_RD);
-		w5100_readbuf(priv, offset, header, 2);
-		rx_len = get_unaligned_be16(header) - 2;
-
-		skb = netdev_alloc_skb_ip_align(ndev, rx_len);
-		if (unlikely(!skb)) {
-			w5100_write16(priv, W5100_S0_RX_RD,
-					    offset + rx_buf_len);
-			w5100_command(priv, S0_CR_RECV);
-			ndev->stats.rx_dropped++;
-			return -ENOMEM;
-		}
+	offset = w5100_read16(priv, W5100_S0_RX_RD);
+	w5100_readbuf(priv, offset, header, 2);
+	rx_len = get_unaligned_be16(header) - 2;
 
-		skb_put(skb, rx_len);
-		w5100_readbuf(priv, offset + 2, skb->data, rx_len);
-		w5100_write16(priv, W5100_S0_RX_RD, offset + 2 + rx_len);
+	skb = netdev_alloc_skb_ip_align(ndev, rx_len);
+	if (unlikely(!skb)) {
+		w5100_write16(priv, W5100_S0_RX_RD, offset + rx_buf_len);
 		w5100_command(priv, S0_CR_RECV);
-		skb->protocol = eth_type_trans(skb, ndev);
+		ndev->stats.rx_dropped++;
+		return NULL;
+	}
+
+	skb_put(skb, rx_len);
+	w5100_readbuf(priv, offset + 2, skb->data, rx_len);
+	w5100_write16(priv, W5100_S0_RX_RD, offset + 2 + rx_len);
+	w5100_command(priv, S0_CR_RECV);
+	skb->protocol = eth_type_trans(skb, ndev);
+
+	ndev->stats.rx_packets++;
+	ndev->stats.rx_bytes += rx_len;
+
+	return skb;
+}
+
+static void w5100_rx_work(struct work_struct *work)
+{
+	struct w5100_priv *priv = container_of(work, struct w5100_priv,
+					       rx_work);
+	struct sk_buff *skb;
+
+	while ((skb = w5100_rx_skb(priv->ndev)))
+		netif_rx_ni(skb);
+
+	w5100_write(priv, W5100_IMR, IR_S0);
+}
+
+static int w5100_napi_poll(struct napi_struct *napi, int budget)
+{
+	struct w5100_priv *priv = container_of(napi, struct w5100_priv, napi);
+	int rx_count;
+
+	for (rx_count = 0; rx_count < budget; rx_count++) {
+		struct sk_buff *skb = w5100_rx_skb(priv->ndev);
 
-		netif_receive_skb(skb);
-		ndev->stats.rx_packets++;
-		ndev->stats.rx_bytes += rx_len;
+		if (skb)
+			netif_receive_skb(skb);
+		else
+			break;
 	}
 
 	if (rx_count < budget) {
@@ -699,10 +775,12 @@ static irqreturn_t w5100_interrupt(int irq, void *ndev_instance)
 	}
 
 	if (ir & S0_IR_RECV) {
-		if (napi_schedule_prep(&priv->napi)) {
-			w5100_write(priv, W5100_IMR, 0);
+		w5100_write(priv, W5100_IMR, 0);
+
+		if (priv->ops->may_sleep)
+			queue_work(priv->xfer_wq, &priv->rx_work);
+		else if (napi_schedule_prep(&priv->napi))
 			__napi_schedule(&priv->napi);
-		}
 	}
 
 	return IRQ_HANDLED;
@@ -726,6 +804,14 @@ static irqreturn_t w5100_detect_link(int irq, void *ndev_instance)
 	return IRQ_HANDLED;
 }
 
+static void w5100_setrx_work(struct work_struct *work)
+{
+	struct w5100_priv *priv = container_of(work, struct w5100_priv,
+					       setrx_work);
+
+	w5100_hw_start(priv);
+}
+
 static void w5100_set_rx_mode(struct net_device *ndev)
 {
 	struct w5100_priv *priv = netdev_priv(ndev);
@@ -733,7 +819,11 @@ static void w5100_set_rx_mode(struct net_device *ndev)
 
 	if (priv->promisc != set_promisc) {
 		priv->promisc = set_promisc;
-		w5100_hw_start(priv);
+
+		if (priv->ops->may_sleep)
+			schedule_work(&priv->setrx_work);
+		else
+			w5100_hw_start(priv);
 	}
 }
 
@@ -872,6 +962,17 @@ int w5100_probe(struct device *dev, const struct w5100_ops *ops,
 	if (err < 0)
 		goto err_register;
 
+	priv->xfer_wq = create_workqueue(netdev_name(ndev));
+	if (!priv->xfer_wq) {
+		err = -ENOMEM;
+		goto err_wq;
+	}
+
+	INIT_WORK(&priv->rx_work, w5100_rx_work);
+	INIT_WORK(&priv->tx_work, w5100_tx_work);
+	INIT_WORK(&priv->setrx_work, w5100_setrx_work);
+	INIT_WORK(&priv->restart_work, w5100_restart_work);
+
 	if (mac_addr)
 		memcpy(ndev->dev_addr, mac_addr, ETH_ALEN);
 	else
@@ -889,8 +990,14 @@ int w5100_probe(struct device *dev, const struct w5100_ops *ops,
 		goto err_hw;
 	}
 
-	err = request_irq(priv->irq, w5100_interrupt, IRQF_TRIGGER_LOW,
-			  netdev_name(ndev), ndev);
+	if (ops->may_sleep) {
+		err = request_threaded_irq(priv->irq, NULL, w5100_interrupt,
+					   IRQF_TRIGGER_LOW | IRQF_ONESHOT,
+					   netdev_name(ndev), ndev);
+	} else {
+		err = request_irq(priv->irq, w5100_interrupt,
+				  IRQF_TRIGGER_LOW, netdev_name(ndev), ndev);
+	}
 	if (err)
 		goto err_hw;
 
@@ -915,6 +1022,8 @@ int w5100_probe(struct device *dev, const struct w5100_ops *ops,
 err_gpio:
 	free_irq(priv->irq, ndev);
 err_hw:
+	destroy_workqueue(priv->xfer_wq);
+err_wq:
 	unregister_netdev(ndev);
 err_register:
 	free_netdev(ndev);
@@ -932,6 +1041,11 @@ int w5100_remove(struct device *dev)
 	if (gpio_is_valid(priv->link_gpio))
 		free_irq(priv->link_irq, ndev);
 
+	flush_work(&priv->setrx_work);
+	flush_work(&priv->restart_work);
+	flush_workqueue(priv->xfer_wq);
+	destroy_workqueue(priv->xfer_wq);
+
 	unregister_netdev(ndev);
 	free_netdev(ndev);
 	return 0;
diff --git a/drivers/net/ethernet/wiznet/w5100.h b/drivers/net/ethernet/wiznet/w5100.h
index 39d452d8..69045f0 100644
--- a/drivers/net/ethernet/wiznet/w5100.h
+++ b/drivers/net/ethernet/wiznet/w5100.h
@@ -8,6 +8,7 @@
  */
 
 struct w5100_ops {
+	bool may_sleep;
 	int (*read)(struct net_device *ndev, u16 addr);
 	int (*write)(struct net_device *ndev, u16 addr, u8 data);
 	int (*read16)(struct net_device *ndev, u16 addr);
-- 
2.5.0

^ permalink raw reply related

* [PATCH v3 4/5] net: w5100: support SPI interface mode
From: Akinobu Mita @ 2016-04-14 15:11 UTC (permalink / raw)
  To: netdev; +Cc: Akinobu Mita, Mike Sinkovsky, David S. Miller
In-Reply-To: <1460646693-25179-1-git-send-email-akinobu.mita@gmail.com>

This adds new w5100-spi driver which shares the bus interface
independent code with existing w5100 driver.

Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Mike Sinkovsky <msink@permonline.ru>
Cc: David S. Miller <davem@davemloft.net>
---
* No changes from v2

 drivers/net/ethernet/wiznet/Kconfig     |  14 ++++
 drivers/net/ethernet/wiznet/Makefile    |   1 +
 drivers/net/ethernet/wiznet/w5100-spi.c | 136 ++++++++++++++++++++++++++++++++
 3 files changed, 151 insertions(+)
 create mode 100644 drivers/net/ethernet/wiznet/w5100-spi.c

diff --git a/drivers/net/ethernet/wiznet/Kconfig b/drivers/net/ethernet/wiznet/Kconfig
index f98b91d..d1ab353 100644
--- a/drivers/net/ethernet/wiznet/Kconfig
+++ b/drivers/net/ethernet/wiznet/Kconfig
@@ -69,4 +69,18 @@ config WIZNET_BUS_ANY
 	  Performance may decrease compared to explicitly selected bus mode.
 endchoice
 
+config WIZNET_W5100_SPI
+	tristate "WIZnet W5100 Ethernet support for SPI mode"
+	depends on WIZNET_BUS_ANY
+	depends on SPI
+	---help---
+	  In SPI mode host system accesses registers using SPI protocol
+	  (mode 0) on the SPI bus.
+
+	  Performance decreases compared to other bus interface mode.
+	  In W5100 SPI mode, burst READ/WRITE processing are not provided.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called w5100-spi.
+
 endif # NET_VENDOR_WIZNET
diff --git a/drivers/net/ethernet/wiznet/Makefile b/drivers/net/ethernet/wiznet/Makefile
index c614535..1e05e1a 100644
--- a/drivers/net/ethernet/wiznet/Makefile
+++ b/drivers/net/ethernet/wiznet/Makefile
@@ -1,2 +1,3 @@
 obj-$(CONFIG_WIZNET_W5100) += w5100.o
+obj-$(CONFIG_WIZNET_W5100_SPI) += w5100-spi.o
 obj-$(CONFIG_WIZNET_W5300) += w5300.o
diff --git a/drivers/net/ethernet/wiznet/w5100-spi.c b/drivers/net/ethernet/wiznet/w5100-spi.c
new file mode 100644
index 0000000..32f406c
--- /dev/null
+++ b/drivers/net/ethernet/wiznet/w5100-spi.c
@@ -0,0 +1,136 @@
+/*
+ * Ethernet driver for the WIZnet W5100 chip.
+ *
+ * Copyright (C) 2016 Akinobu Mita <akinobu.mita@gmail.com>
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/netdevice.h>
+#include <linux/spi/spi.h>
+
+#include "w5100.h"
+
+#define W5100_SPI_WRITE_OPCODE 0xf0
+#define W5100_SPI_READ_OPCODE 0x0f
+
+static int w5100_spi_read(struct net_device *ndev, u16 addr)
+{
+	struct spi_device *spi = to_spi_device(ndev->dev.parent);
+	u8 cmd[3] = { W5100_SPI_READ_OPCODE, addr >> 8, addr & 0xff };
+	u8 data;
+	int ret;
+
+	ret = spi_write_then_read(spi, cmd, sizeof(cmd), &data, 1);
+
+	return ret ? ret : data;
+}
+
+static int w5100_spi_write(struct net_device *ndev, u16 addr, u8 data)
+{
+	struct spi_device *spi = to_spi_device(ndev->dev.parent);
+	u8 cmd[4] = { W5100_SPI_WRITE_OPCODE, addr >> 8, addr & 0xff, data};
+
+	return spi_write_then_read(spi, cmd, sizeof(cmd), NULL, 0);
+}
+
+static int w5100_spi_read16(struct net_device *ndev, u16 addr)
+{
+	u16 data;
+	int ret;
+
+	ret = w5100_spi_read(ndev, addr);
+	if (ret < 0)
+		return ret;
+	data = ret << 8;
+	ret = w5100_spi_read(ndev, addr + 1);
+
+	return ret < 0 ? ret : data | ret;
+}
+
+static int w5100_spi_write16(struct net_device *ndev, u16 addr, u16 data)
+{
+	int ret;
+
+	ret = w5100_spi_write(ndev, addr, data >> 8);
+	if (ret)
+		return ret;
+
+	return w5100_spi_write(ndev, addr + 1, data & 0xff);
+}
+
+static int w5100_spi_readbulk(struct net_device *ndev, u16 addr, u8 *buf,
+			      int len)
+{
+	int i;
+
+	for (i = 0; i < len; i++) {
+		int ret = w5100_spi_read(ndev, addr + i);
+
+		if (ret < 0)
+			return ret;
+		buf[i] = ret;
+	}
+
+	return 0;
+}
+
+static int w5100_spi_writebulk(struct net_device *ndev, u16 addr, const u8 *buf,
+			       int len)
+{
+	int i;
+
+	for (i = 0; i < len; i++) {
+		int ret = w5100_spi_write(ndev, addr + i, buf[i]);
+
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static const struct w5100_ops w5100_spi_ops = {
+	.may_sleep = true,
+	.read = w5100_spi_read,
+	.write = w5100_spi_write,
+	.read16 = w5100_spi_read16,
+	.write16 = w5100_spi_write16,
+	.readbulk = w5100_spi_readbulk,
+	.writebulk = w5100_spi_writebulk,
+};
+
+static int w5100_spi_probe(struct spi_device *spi)
+{
+	return w5100_probe(&spi->dev, &w5100_spi_ops, 0, NULL, spi->irq,
+			   -EINVAL);
+}
+
+static int w5100_spi_remove(struct spi_device *spi)
+{
+	return w5100_remove(&spi->dev);
+}
+
+static const struct spi_device_id w5100_spi_ids[] = {
+	{ "w5100", 0 },
+	{}
+};
+MODULE_DEVICE_TABLE(spi, w5100_spi_ids);
+
+static struct spi_driver w5100_spi_driver = {
+	.driver		= {
+		.name	= "w5100",
+		.pm	= &w5100_pm_ops,
+	},
+	.probe		= w5100_spi_probe,
+	.remove		= w5100_spi_remove,
+	.id_table	= w5100_spi_ids,
+};
+module_spi_driver(w5100_spi_driver);
+
+MODULE_DESCRIPTION("WIZnet W5100 Ethernet driver for SPI mode");
+MODULE_AUTHOR("Akinobu Mita <akinobu.mita@gmail.com>");
+MODULE_LICENSE("GPL");
-- 
2.5.0

^ permalink raw reply related

* [PATCH v3 5/5] net: w5100: support W5200
From: Akinobu Mita @ 2016-04-14 15:11 UTC (permalink / raw)
  To: netdev; +Cc: Akinobu Mita, Mike Sinkovsky, David S. Miller
In-Reply-To: <1460646693-25179-1-git-send-email-akinobu.mita@gmail.com>

This adds support for W5200 chip.

W5100 and W5200 have similar memory map although some of their offsets
are different.  The register access sequences between them are different
but w5100 driver has abstraction layer for difference bus interface
modes so it is easy to add W5200 support to w5100 and w5100-spi drivers.

Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Mike Sinkovsky <msink@permonline.ru>
Cc: David S. Miller <davem@davemloft.net>
---
* v3
- Allocate w5200 ops specific data structure to put DMA-safe buffer
- Add missing chip_id assignment for w5100_*_ops

 drivers/net/ethernet/wiznet/Kconfig     |   2 +-
 drivers/net/ethernet/wiznet/w5100-spi.c | 174 +++++++++++++++++++++++++++++++-
 drivers/net/ethernet/wiznet/w5100.c     | 155 ++++++++++++++++++++--------
 drivers/net/ethernet/wiznet/w5100.h     |   6 ++
 4 files changed, 289 insertions(+), 48 deletions(-)

diff --git a/drivers/net/ethernet/wiznet/Kconfig b/drivers/net/ethernet/wiznet/Kconfig
index d1ab353..1f15376 100644
--- a/drivers/net/ethernet/wiznet/Kconfig
+++ b/drivers/net/ethernet/wiznet/Kconfig
@@ -70,7 +70,7 @@ config WIZNET_BUS_ANY
 endchoice
 
 config WIZNET_W5100_SPI
-	tristate "WIZnet W5100 Ethernet support for SPI mode"
+	tristate "WIZnet W5100/W5200 Ethernet support for SPI mode"
 	depends on WIZNET_BUS_ANY
 	depends on SPI
 	---help---
diff --git a/drivers/net/ethernet/wiznet/w5100-spi.c b/drivers/net/ethernet/wiznet/w5100-spi.c
index 32f406c..598a7b0 100644
--- a/drivers/net/ethernet/wiznet/w5100-spi.c
+++ b/drivers/net/ethernet/wiznet/w5100-spi.c
@@ -1,9 +1,13 @@
 /*
- * Ethernet driver for the WIZnet W5100 chip.
+ * Ethernet driver for the WIZnet W5100/W5200 chip.
  *
  * Copyright (C) 2016 Akinobu Mita <akinobu.mita@gmail.com>
  *
  * Licensed under the GPL-2 or later.
+ *
+ * Datasheet:
+ * http://www.wiznet.co.kr/wp-content/uploads/wiznethome/Chip/W5100/Document/W5100_Datasheet_v1.2.6.pdf
+ * http://wiznethome.cafe24.com/wp-content/uploads/wiznethome/Chip/W5200/Documents/W5200_DS_V140E.pdf
  */
 
 #include <linux/kernel.h>
@@ -95,6 +99,7 @@ static int w5100_spi_writebulk(struct net_device *ndev, u16 addr, const u8 *buf,
 
 static const struct w5100_ops w5100_spi_ops = {
 	.may_sleep = true,
+	.chip_id = W5100,
 	.read = w5100_spi_read,
 	.write = w5100_spi_write,
 	.read16 = w5100_spi_read16,
@@ -103,10 +108,168 @@ static const struct w5100_ops w5100_spi_ops = {
 	.writebulk = w5100_spi_writebulk,
 };
 
+#define W5200_SPI_WRITE_OPCODE 0x80
+
+struct w5200_spi_priv {
+	/* Serialize access to cmd_buf */
+	struct mutex cmd_lock;
+
+	/* DMA (thus cache coherency maintenance) requires the
+	 * transfer buffers to live in their own cache lines.
+	 */
+	u8 cmd_buf[4] ____cacheline_aligned;
+};
+
+static struct w5200_spi_priv *w5200_spi_priv(struct net_device *ndev)
+{
+	return w5100_ops_priv(ndev);
+}
+
+static int w5200_spi_init(struct net_device *ndev)
+{
+	struct w5200_spi_priv *spi_priv = w5200_spi_priv(ndev);
+
+	mutex_init(&spi_priv->cmd_lock);
+
+	return 0;
+}
+
+static int w5200_spi_read(struct net_device *ndev, u16 addr)
+{
+	struct spi_device *spi = to_spi_device(ndev->dev.parent);
+	u8 cmd[4] = { addr >> 8, addr & 0xff, 0, 1 };
+	u8 data;
+	int ret;
+
+	ret = spi_write_then_read(spi, cmd, sizeof(cmd), &data, 1);
+
+	return ret ? ret : data;
+}
+
+static int w5200_spi_write(struct net_device *ndev, u16 addr, u8 data)
+{
+	struct spi_device *spi = to_spi_device(ndev->dev.parent);
+	u8 cmd[5] = { addr >> 8, addr & 0xff, W5200_SPI_WRITE_OPCODE, 1, data };
+
+	return spi_write_then_read(spi, cmd, sizeof(cmd), NULL, 0);
+}
+
+static int w5200_spi_read16(struct net_device *ndev, u16 addr)
+{
+	struct spi_device *spi = to_spi_device(ndev->dev.parent);
+	u8 cmd[4] = { addr >> 8, addr & 0xff, 0, 2 };
+	__be16 data;
+	int ret;
+
+	ret = spi_write_then_read(spi, cmd, sizeof(cmd), &data, sizeof(data));
+
+	return ret ? ret : be16_to_cpu(data);
+}
+
+static int w5200_spi_write16(struct net_device *ndev, u16 addr, u16 data)
+{
+	struct spi_device *spi = to_spi_device(ndev->dev.parent);
+	u8 cmd[6] = {
+		addr >> 8, addr & 0xff,
+		W5200_SPI_WRITE_OPCODE, 2,
+		data >> 8, data & 0xff
+	};
+
+	return spi_write_then_read(spi, cmd, sizeof(cmd), NULL, 0);
+}
+
+static int w5200_spi_readbulk(struct net_device *ndev, u16 addr, u8 *buf,
+			      int len)
+{
+	struct spi_device *spi = to_spi_device(ndev->dev.parent);
+	struct w5200_spi_priv *spi_priv = w5200_spi_priv(ndev);
+	struct spi_transfer xfer[] = {
+		{
+			.tx_buf = spi_priv->cmd_buf,
+			.len = sizeof(spi_priv->cmd_buf),
+		},
+		{
+			.rx_buf = buf,
+			.len = len,
+		},
+	};
+	int ret;
+
+	mutex_lock(&spi_priv->cmd_lock);
+
+	spi_priv->cmd_buf[0] = addr >> 8;
+	spi_priv->cmd_buf[1] = addr;
+	spi_priv->cmd_buf[2] = len >> 8;
+	spi_priv->cmd_buf[3] = len;
+	ret = spi_sync_transfer(spi, xfer, ARRAY_SIZE(xfer));
+
+	mutex_unlock(&spi_priv->cmd_lock);
+
+	return ret;
+}
+
+static int w5200_spi_writebulk(struct net_device *ndev, u16 addr, const u8 *buf,
+			       int len)
+{
+	struct spi_device *spi = to_spi_device(ndev->dev.parent);
+	struct w5200_spi_priv *spi_priv = w5200_spi_priv(ndev);
+	struct spi_transfer xfer[] = {
+		{
+			.tx_buf = spi_priv->cmd_buf,
+			.len = sizeof(spi_priv->cmd_buf),
+		},
+		{
+			.tx_buf = buf,
+			.len = len,
+		},
+	};
+	int ret;
+
+	mutex_lock(&spi_priv->cmd_lock);
+
+	spi_priv->cmd_buf[0] = addr >> 8;
+	spi_priv->cmd_buf[1] = addr;
+	spi_priv->cmd_buf[2] = W5200_SPI_WRITE_OPCODE | (len >> 8);
+	spi_priv->cmd_buf[3] = len;
+	ret = spi_sync_transfer(spi, xfer, ARRAY_SIZE(xfer));
+
+	mutex_unlock(&spi_priv->cmd_lock);
+
+	return ret;
+}
+
+static const struct w5100_ops w5200_ops = {
+	.may_sleep = true,
+	.chip_id = W5200,
+	.read = w5200_spi_read,
+	.write = w5200_spi_write,
+	.read16 = w5200_spi_read16,
+	.write16 = w5200_spi_write16,
+	.readbulk = w5200_spi_readbulk,
+	.writebulk = w5200_spi_writebulk,
+	.init = w5200_spi_init,
+};
+
 static int w5100_spi_probe(struct spi_device *spi)
 {
-	return w5100_probe(&spi->dev, &w5100_spi_ops, 0, NULL, spi->irq,
-			   -EINVAL);
+	const struct spi_device_id *id = spi_get_device_id(spi);
+	const struct w5100_ops *ops;
+	int priv_size;
+
+	switch (id->driver_data) {
+	case W5100:
+		ops = &w5100_spi_ops;
+		priv_size = 0;
+		break;
+	case W5200:
+		ops = &w5200_ops;
+		priv_size = sizeof(struct w5200_spi_priv);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return w5100_probe(&spi->dev, ops, priv_size, NULL, spi->irq, -EINVAL);
 }
 
 static int w5100_spi_remove(struct spi_device *spi)
@@ -115,7 +278,8 @@ static int w5100_spi_remove(struct spi_device *spi)
 }
 
 static const struct spi_device_id w5100_spi_ids[] = {
-	{ "w5100", 0 },
+	{ "w5100", W5100 },
+	{ "w5200", W5200 },
 	{}
 };
 MODULE_DEVICE_TABLE(spi, w5100_spi_ids);
@@ -131,6 +295,6 @@ static struct spi_driver w5100_spi_driver = {
 };
 module_spi_driver(w5100_spi_driver);
 
-MODULE_DESCRIPTION("WIZnet W5100 Ethernet driver for SPI mode");
+MODULE_DESCRIPTION("WIZnet W5100/W5200 Ethernet driver for SPI mode");
 MODULE_AUTHOR("Akinobu Mita <akinobu.mita@gmail.com>");
 MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/wiznet/w5100.c b/drivers/net/ethernet/wiznet/w5100.c
index 42a9de4..09149c9 100644
--- a/drivers/net/ethernet/wiznet/w5100.c
+++ b/drivers/net/ethernet/wiznet/w5100.c
@@ -38,7 +38,7 @@ MODULE_ALIAS("platform:"DRV_NAME);
 MODULE_LICENSE("GPL");
 
 /*
- * Registers
+ * W5100 and W5100 common registers
  */
 #define W5100_COMMON_REGS	0x0000
 #define W5100_MR		0x0000 /* Mode Register */
@@ -52,37 +52,69 @@ MODULE_LICENSE("GPL");
 #define   IR_S0			  0x01 /* S0 interrupt */
 #define W5100_RTR		0x0017 /* Retry Time-value Register */
 #define   RTR_DEFAULT		  2000 /* =0x07d0 (2000) */
-#define W5100_RMSR		0x001a /* Receive Memory Size */
-#define W5100_TMSR		0x001b /* Transmit Memory Size */
 #define W5100_COMMON_REGS_LEN	0x0040
 
-#define W5100_S0_REGS		0x0400
-#define W5100_S0_MR		0x0400 /* S0 Mode Register */
+#define W5100_Sn_MR		0x0000 /* Sn Mode Register */
+#define W5100_Sn_CR		0x0001 /* Sn Command Register */
+#define W5100_Sn_IR		0x0002 /* Sn Interrupt Register */
+#define W5100_Sn_SR		0x0003 /* Sn Status Register */
+#define W5100_Sn_TX_FSR		0x0020 /* Sn Transmit free memory size */
+#define W5100_Sn_TX_RD		0x0022 /* Sn Transmit memory read pointer */
+#define W5100_Sn_TX_WR		0x0024 /* Sn Transmit memory write pointer */
+#define W5100_Sn_RX_RSR		0x0026 /* Sn Receive free memory size */
+#define W5100_Sn_RX_RD		0x0028 /* Sn Receive memory read pointer */
+
+#define S0_REGS(priv)		(is_w5200(priv) ? W5200_S0_REGS : W5100_S0_REGS)
+
+#define W5100_S0_MR(priv)	(S0_REGS(priv) + W5100_Sn_MR)
 #define   S0_MR_MACRAW		  0x04 /* MAC RAW mode (promiscuous) */
 #define   S0_MR_MACRAW_MF	  0x44 /* MAC RAW mode (filtered) */
-#define W5100_S0_CR		0x0401 /* S0 Command Register */
+#define W5100_S0_CR(priv)	(S0_REGS(priv) + W5100_Sn_CR)
 #define   S0_CR_OPEN		  0x01 /* OPEN command */
 #define   S0_CR_CLOSE		  0x10 /* CLOSE command */
 #define   S0_CR_SEND		  0x20 /* SEND command */
 #define   S0_CR_RECV		  0x40 /* RECV command */
-#define W5100_S0_IR		0x0402 /* S0 Interrupt Register */
+#define W5100_S0_IR(priv)	(S0_REGS(priv) + W5100_Sn_IR)
 #define   S0_IR_SENDOK		  0x10 /* complete sending */
 #define   S0_IR_RECV		  0x04 /* receiving data */
-#define W5100_S0_SR		0x0403 /* S0 Status Register */
+#define W5100_S0_SR(priv)	(S0_REGS(priv) + W5100_Sn_SR)
 #define   S0_SR_MACRAW		  0x42 /* mac raw mode */
-#define W5100_S0_TX_FSR		0x0420 /* S0 Transmit free memory size */
-#define W5100_S0_TX_RD		0x0422 /* S0 Transmit memory read pointer */
-#define W5100_S0_TX_WR		0x0424 /* S0 Transmit memory write pointer */
-#define W5100_S0_RX_RSR		0x0426 /* S0 Receive free memory size */
-#define W5100_S0_RX_RD		0x0428 /* S0 Receive memory read pointer */
+#define W5100_S0_TX_FSR(priv)	(S0_REGS(priv) + W5100_Sn_TX_FSR)
+#define W5100_S0_TX_RD(priv)	(S0_REGS(priv) + W5100_Sn_TX_RD)
+#define W5100_S0_TX_WR(priv)	(S0_REGS(priv) + W5100_Sn_TX_WR)
+#define W5100_S0_RX_RSR(priv)	(S0_REGS(priv) + W5100_Sn_RX_RSR)
+#define W5100_S0_RX_RD(priv)	(S0_REGS(priv) + W5100_Sn_RX_RD)
+
 #define W5100_S0_REGS_LEN	0x0040
 
+/*
+ * W5100 specific registers
+ */
+#define W5100_RMSR		0x001a /* Receive Memory Size */
+#define W5100_TMSR		0x001b /* Transmit Memory Size */
+
+#define W5100_S0_REGS		0x0400
+
 #define W5100_TX_MEM_START	0x4000
 #define W5100_TX_MEM_SIZE	0x2000
 #define W5100_RX_MEM_START	0x6000
 #define W5100_RX_MEM_SIZE	0x2000
 
 /*
+ * W5200 specific registers
+ */
+#define W5200_S0_REGS		0x4000
+
+#define W5200_Sn_RXMEM_SIZE(n)	(0x401e + (n) * 0x0100) /* Sn RX Memory Size */
+#define W5200_Sn_TXMEM_SIZE(n)	(0x401f + (n) * 0x0100) /* Sn TX Memory Size */
+#define W5200_S0_IMR		0x402c /* S0 Interrupt Mask Register */
+
+#define W5200_TX_MEM_START	0x8000
+#define W5200_TX_MEM_SIZE	0x4000
+#define W5200_RX_MEM_START	0xc000
+#define W5200_RX_MEM_SIZE	0x4000
+
+/*
  * Device driver private data structure
  */
 
@@ -105,6 +137,11 @@ struct w5100_priv {
 	struct work_struct restart_work;
 };
 
+static inline bool is_w5200(struct w5100_priv *priv)
+{
+	return priv->ops->chip_id == W5200;
+}
+
 /************************************************************************
  *
  *  Lowlevel I/O functions
@@ -217,6 +254,7 @@ static int w5100_mmio_init(struct net_device *ndev)
 }
 
 static const struct w5100_ops w5100_mmio_direct_ops = {
+	.chip_id = W5100,
 	.read = w5100_read_direct,
 	.write = w5100_write_direct,
 	.read16 = w5100_read16_direct,
@@ -341,6 +379,7 @@ static int w5100_reset_indirect(struct net_device *ndev)
 }
 
 static const struct w5100_ops w5100_mmio_indirect_ops = {
+	.chip_id = W5100,
 	.read = w5100_read_indirect,
 	.write = w5100_write_indirect,
 	.read16 = w5100_read16_indirect,
@@ -457,20 +496,24 @@ static int w5100_readbuf(struct w5100_priv *priv, u16 offset, u8 *buf, int len)
 	u16 addr;
 	int remain = 0;
 	int ret;
+	const u16 mem_start =
+		is_w5200(priv) ? W5200_RX_MEM_START : W5100_RX_MEM_START;
+	const u16 mem_size =
+		is_w5200(priv) ? W5200_RX_MEM_SIZE : W5100_RX_MEM_SIZE;
 
-	offset %= W5100_RX_MEM_SIZE;
-	addr = W5100_RX_MEM_START + offset;
+	offset %= mem_size;
+	addr = mem_start + offset;
 
-	if (offset + len > W5100_RX_MEM_SIZE) {
-		remain = (offset + len) % W5100_RX_MEM_SIZE;
-		len = W5100_RX_MEM_SIZE - offset;
+	if (offset + len > mem_size) {
+		remain = (offset + len) % mem_size;
+		len = mem_size - offset;
 	}
 
 	ret = w5100_readbulk(priv, addr, buf, len);
 	if (ret || !remain)
 		return ret;
 
-	return w5100_readbulk(priv, W5100_RX_MEM_START, buf + len, remain);
+	return w5100_readbulk(priv, mem_start, buf + len, remain);
 }
 
 static int w5100_writebuf(struct w5100_priv *priv, u16 offset, const u8 *buf,
@@ -479,20 +522,24 @@ static int w5100_writebuf(struct w5100_priv *priv, u16 offset, const u8 *buf,
 	u16 addr;
 	int ret;
 	int remain = 0;
+	const u16 mem_start =
+		is_w5200(priv) ? W5200_TX_MEM_START : W5100_TX_MEM_START;
+	const u16 mem_size =
+		is_w5200(priv) ? W5200_TX_MEM_SIZE : W5100_TX_MEM_SIZE;
 
-	offset %= W5100_TX_MEM_SIZE;
-	addr = W5100_TX_MEM_START + offset;
+	offset %= mem_size;
+	addr = mem_start + offset;
 
-	if (offset + len > W5100_TX_MEM_SIZE) {
-		remain = (offset + len) % W5100_TX_MEM_SIZE;
-		len = W5100_TX_MEM_SIZE - offset;
+	if (offset + len > mem_size) {
+		remain = (offset + len) % mem_size;
+		len = mem_size - offset;
 	}
 
 	ret = w5100_writebulk(priv, addr, buf, len);
 	if (ret || !remain)
 		return ret;
 
-	return w5100_writebulk(priv, W5100_TX_MEM_START, buf + len, remain);
+	return w5100_writebulk(priv, mem_start, buf + len, remain);
 }
 
 static int w5100_reset(struct w5100_priv *priv)
@@ -511,11 +558,11 @@ static int w5100_command(struct w5100_priv *priv, u16 cmd)
 {
 	unsigned long timeout;
 
-	w5100_write(priv, W5100_S0_CR, cmd);
+	w5100_write(priv, W5100_S0_CR(priv), cmd);
 
 	timeout = jiffies + msecs_to_jiffies(100);
 
-	while (w5100_read(priv, W5100_S0_CR) != 0) {
+	while (w5100_read(priv, W5100_S0_CR(priv)) != 0) {
 		if (time_after(jiffies, timeout))
 			return -EIO;
 		cpu_relax();
@@ -531,6 +578,31 @@ static void w5100_write_macaddr(struct w5100_priv *priv)
 	w5100_writebulk(priv, W5100_SHAR, ndev->dev_addr, ETH_ALEN);
 }
 
+static void w5100_memory_configure(struct w5100_priv *priv)
+{
+	/* Configure 16K of internal memory
+	 * as 8K RX buffer and 8K TX buffer
+	 */
+	w5100_write(priv, W5100_RMSR, 0x03);
+	w5100_write(priv, W5100_TMSR, 0x03);
+}
+
+static void w5200_memory_configure(struct w5100_priv *priv)
+{
+	int i;
+
+	/* Configure internal RX memory as 16K RX buffer and
+	 * internal TX memory as 16K TX buffer
+	 */
+	w5100_write(priv, W5200_Sn_RXMEM_SIZE(0), 0x10);
+	w5100_write(priv, W5200_Sn_TXMEM_SIZE(0), 0x10);
+
+	for (i = 1; i < 8; i++) {
+		w5100_write(priv, W5200_Sn_RXMEM_SIZE(i), 0);
+		w5100_write(priv, W5200_Sn_TXMEM_SIZE(i), 0);
+	}
+}
+
 static void w5100_hw_reset(struct w5100_priv *priv)
 {
 	w5100_reset(priv);
@@ -538,16 +610,15 @@ static void w5100_hw_reset(struct w5100_priv *priv)
 	w5100_write(priv, W5100_IMR, 0);
 	w5100_write_macaddr(priv);
 
-	/* Configure 16K of internal memory
-	 * as 8K RX buffer and 8K TX buffer
-	 */
-	w5100_write(priv, W5100_RMSR, 0x03);
-	w5100_write(priv, W5100_TMSR, 0x03);
+	if (is_w5200(priv))
+		w5200_memory_configure(priv);
+	else
+		w5100_memory_configure(priv);
 }
 
 static void w5100_hw_start(struct w5100_priv *priv)
 {
-	w5100_write(priv, W5100_S0_MR, priv->promisc ?
+	w5100_write(priv, W5100_S0_MR(priv), priv->promisc ?
 			  S0_MR_MACRAW : S0_MR_MACRAW_MF);
 	w5100_command(priv, S0_CR_OPEN);
 	w5100_write(priv, W5100_IMR, IR_S0);
@@ -611,7 +682,7 @@ static void w5100_get_regs(struct net_device *ndev,
 	regs->version = 1;
 	w5100_readbulk(priv, W5100_COMMON_REGS, buf, W5100_COMMON_REGS_LEN);
 	buf += W5100_COMMON_REGS_LEN;
-	w5100_readbulk(priv, W5100_S0_REGS, buf, W5100_S0_REGS_LEN);
+	w5100_readbulk(priv, S0_REGS(priv), buf, W5100_S0_REGS_LEN);
 }
 
 static void w5100_restart(struct net_device *ndev)
@@ -649,9 +720,9 @@ static void w5100_tx_skb(struct net_device *ndev, struct sk_buff *skb)
 	struct w5100_priv *priv = netdev_priv(ndev);
 	u16 offset;
 
-	offset = w5100_read16(priv, W5100_S0_TX_WR);
+	offset = w5100_read16(priv, W5100_S0_TX_WR(priv));
 	w5100_writebuf(priv, offset, skb->data, skb->len);
-	w5100_write16(priv, W5100_S0_TX_WR, offset + skb->len);
+	w5100_write16(priv, W5100_S0_TX_WR(priv), offset + skb->len);
 	ndev->stats.tx_bytes += skb->len;
 	ndev->stats.tx_packets++;
 	dev_kfree_skb(skb);
@@ -696,18 +767,18 @@ static struct sk_buff *w5100_rx_skb(struct net_device *ndev)
 	u16 rx_len;
 	u16 offset;
 	u8 header[2];
-	u16 rx_buf_len = w5100_read16(priv, W5100_S0_RX_RSR);
+	u16 rx_buf_len = w5100_read16(priv, W5100_S0_RX_RSR(priv));
 
 	if (rx_buf_len == 0)
 		return NULL;
 
-	offset = w5100_read16(priv, W5100_S0_RX_RD);
+	offset = w5100_read16(priv, W5100_S0_RX_RD(priv));
 	w5100_readbuf(priv, offset, header, 2);
 	rx_len = get_unaligned_be16(header) - 2;
 
 	skb = netdev_alloc_skb_ip_align(ndev, rx_len);
 	if (unlikely(!skb)) {
-		w5100_write16(priv, W5100_S0_RX_RD, offset + rx_buf_len);
+		w5100_write16(priv, W5100_S0_RX_RD(priv), offset + rx_buf_len);
 		w5100_command(priv, S0_CR_RECV);
 		ndev->stats.rx_dropped++;
 		return NULL;
@@ -715,7 +786,7 @@ static struct sk_buff *w5100_rx_skb(struct net_device *ndev)
 
 	skb_put(skb, rx_len);
 	w5100_readbuf(priv, offset + 2, skb->data, rx_len);
-	w5100_write16(priv, W5100_S0_RX_RD, offset + 2 + rx_len);
+	w5100_write16(priv, W5100_S0_RX_RD(priv), offset + 2 + rx_len);
 	w5100_command(priv, S0_CR_RECV);
 	skb->protocol = eth_type_trans(skb, ndev);
 
@@ -764,10 +835,10 @@ static irqreturn_t w5100_interrupt(int irq, void *ndev_instance)
 	struct net_device *ndev = ndev_instance;
 	struct w5100_priv *priv = netdev_priv(ndev);
 
-	int ir = w5100_read(priv, W5100_S0_IR);
+	int ir = w5100_read(priv, W5100_S0_IR(priv));
 	if (!ir)
 		return IRQ_NONE;
-	w5100_write(priv, W5100_S0_IR, ir);
+	w5100_write(priv, W5100_S0_IR(priv), ir);
 
 	if (ir & S0_IR_SENDOK) {
 		netif_dbg(priv, tx_done, ndev, "tx done\n");
diff --git a/drivers/net/ethernet/wiznet/w5100.h b/drivers/net/ethernet/wiznet/w5100.h
index 69045f0..9b1fa23 100644
--- a/drivers/net/ethernet/wiznet/w5100.h
+++ b/drivers/net/ethernet/wiznet/w5100.h
@@ -7,8 +7,14 @@
  * Licensed under the GPL-2 or later.
  */
 
+enum {
+	W5100,
+	W5200,
+};
+
 struct w5100_ops {
 	bool may_sleep;
+	int chip_id;
 	int (*read)(struct net_device *ndev, u16 addr);
 	int (*write)(struct net_device *ndev, u16 addr, u8 data);
 	int (*read16)(struct net_device *ndev, u16 addr);
-- 
2.5.0

^ permalink raw reply related

* [RFC] VLAN aux info for AF_PACKET available only with ETH_P_ALL
From: Peter Palúch @ 2016-04-14 15:17 UTC (permalink / raw)
  To: netdev

Greetings,

When using AF_PACKET sockets with PACKET_AUXDATA socket option to access 
the VLAN TCI information of received frames, I have noticed that the 
VLAN information in struct tpacket_auxdata, namely,

- tp_vlan_tci
- tp_vlan_tpid
- TP_STATUS_VLAN_VALID and TP_STATUS_VLAN_TPID_VALID flags

is filled in only when the socket is bound to htons (ETH_P_ALL). If the 
socket is bound to any specific protocol, the VLAN information fields in 
struct tpacket_auxdata are set to 0 even if the datagram of the specific 
protocol was received in an 802.1Q-tagged frame.

As the VLAN tag is being stripped off the frame soon in the receive 
path, using PACKET_AUXDATA is the only way for an application over an 
AF_PACKET socket to know what VLAN did a particular frame arrive in; 
yet, the current behavior forces the application to listen to all 
received traffic to get the actual VLAN info.

Is this behavior intentional, or is this lack of VLAN info a bug? I am 
running vanilla Linux kernel v4.4.6.

Thanks!

Best regards,
Peter

^ permalink raw reply

* Re: Deleting child qdisc doesn't reset parent to default qdisc?
From: Phil Sutter @ 2016-04-14 15:18 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Jiri Kosina, Jamal Hadi Salim, netdev, linux-kernel
In-Reply-To: <1460646099.10638.44.camel@edumazet-glaptop3.roam.corp.google.com>

On Thu, Apr 14, 2016 at 08:01:39AM -0700, Eric Dumazet wrote:
> On Thu, 2016-04-14 at 16:44 +0200, Jiri Kosina wrote:
> > Hi,
> > 
> > I've came across the behavior where adding a child qdisc and then deleting 
> > it again makes the networking dysfunctional (I guess that's because all of 
> > a sudden there is absolutely no working qdisc on the device, although 
> > there originally was a default one in the parent).
> > 
> > In a nutshell, is this expected behavior or bug?
> 
> This is the expected behavior.

OTOH some qdiscs (CBQ, DRR, DSMARK, HFSC, HTB, QFQ) assign the default
one upon deletion instead of noop_qdisc, hence I would describe
the situation using the words 'inconsistent' and 'accident' rather than
'expected'. :)

Anyhow, the problem with skilled admins is they accept quirks too easily
and just build their scripts around them - the same scripts we have to
keep compatible to then.

Cheers, Phil

^ permalink raw reply

* Re: Deleting child qdisc doesn't reset parent to default qdisc?
From: Jiri Kosina @ 2016-04-14 15:34 UTC (permalink / raw)
  To: Phil Sutter; +Cc: Eric Dumazet, Jamal Hadi Salim, netdev, linux-kernel
In-Reply-To: <20160414151813.GE3715@orbyte.nwl.cc>

On Thu, 14 Apr 2016, Phil Sutter wrote:

> OTOH some qdiscs (CBQ, DRR, DSMARK, HFSC, HTB, QFQ) assign the default
> one upon deletion instead of noop_qdisc, hence I would describe
> the situation using the words 'inconsistent' and 'accident' rather than
> 'expected'. :)

Exactly. I'd again like to stress the fact that this configuration works:

	jikos:~ # tc qdisc show
	qdisc tbf 10: dev eth0 root refcnt 2 rate 800Mbit burst 131000b lat 1.0ms 

and this (after performing add/delete operation) doesn't:

	jikos:~ # tc qdisc show
	qdisc tbf 10: dev eth0 root refcnt 2 rate 800Mbit burst 131000b lat 1.0ms 

It's hard to spot a difference (hint: there is none).

Thanks,

-- 
Jiri Kosina
SUSE Labs

^ permalink raw reply

* Re: [PATCH net-next V3 00/16] net: fec: cleanup and fixes
From: Troy Kisky @ 2016-04-14 15:39 UTC (permalink / raw)
  To: Holger Schurig
  Cc: netdev, davem, fugang.duan, lznuaa, andrew, stillcompiling, arnd,
	sergei.shtylyov, gerg, fabio.estevam, johannes, l.stach,
	linux-arm-kernel, tremyfr
In-Reply-To: <87h9f4ebvp.fsf@gmail.com>

On 4/14/2016 3:13 AM, Holger Schurig wrote:
> Do you guys that work with the FEC driver ever run with
> CONFIG_DMA_API_DEBUG enabled?
> 
> I ask this Because I get this error when it's turned on when I do some
> "rsync" transfer to my device:
> 
> [   58.420980] ------------[ cut here ]------------
> [   58.425667] WARNING: CPU: 0 PID: 377 at /home/schurig/d/mkarm/linux-4.5/lib/dma-debug.c:1096 check_unmap+0x9d0/0xab8()
> [   58.436405] fec 2188000.ethernet: DMA-API: device driver tries to free DMA memory it has not allocated [device address=0x0000000000000000] [size=66 bytes]
> [   58.450248] Modules linked in: bnep usbhid imx_sdma flexcan btusb btrtl btbcm btintel smsc95xx usbnet mii bluetooth
> [   58.460882] CPU: 0 PID: 377 Comm: sshd Tainted: G        W       4.5.1 #3
> [   58.467671] Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree)
> [   58.474199] Backtrace: 
> [   58.476675] [<c0012a24>] (dump_backtrace) from [<c0012c20>] (show_stack+0x18/0x1c)
> [   58.484244]  r6:60000113 r5:c05a96c0 r4:00000000 r3:00000000
> [   58.489964] [<c0012c08>] (show_stack) from [<c01dbc4c>] (dump_stack+0x9c/0xb0)
> [   58.497197] [<c01dbbb0>] (dump_stack) from [<c001f558>] (warn_slowpath_common+0x8c/0xbc)
> [   58.505286]  r6:c01f9c74 r5:00000009 r4:ee9f17f8 r3:c0596da4
> [   58.511002] [<c001f4cc>] (warn_slowpath_common) from [<c001f5c0>] (warn_slowpath_fmt+0x38/0x40)
> [   58.519698]  r8:00000042 r7:00000001 r6:00000000 r5:00000000 r4:c050c020
> [   58.526470] [<c001f58c>] (warn_slowpath_fmt) from [<c01f9c74>] (check_unmap+0x9d0/0xab8)
> [   58.534559]  r3:c0520e6c r2:c050c020
> [   58.538159]  r4:00000000
> [   58.540710] [<c01f92a4>] (check_unmap) from [<c01f9de0>] (debug_dma_unmap_page+0x84/0x8c)
> [   58.548886]  r10:ef2ec000 r9:f09e5fa0 r8:ef0ef810 r7:00000001 r6:00000000 r5:00000042
> [   58.556780]  r4:00000001
> [   58.559336] [<c01f9d5c>] (debug_dma_unmap_page) from [<c02cdf00>] (fec_txq+0x140/0x31c)
> [   58.567338]  r8:ef0ef810 r7:00000000 r6:00000000 r5:00000000 r4:ef2c6000
> [   58.574108] [<c02cddc0>] (fec_txq) from [<c02ce2f4>] (fec_enet_napi_q1+0x98/0xe8)
> [   58.581589]  r10:08000000 r9:ef2ec580 r8:00000000 r7:00000040 r6:00000000 r5:ef2ec000


I think I've already fixed this, but I've only submitted once.

commit 466cb4a2e5583d2e18470f30d5948edcf4b947f5
Author: Troy Kisky <troy.kisky@boundarydevices.com>
Date:   Wed Jan 20 12:52:10 2016 -0700

    net: fec: update dirty_tx even if no skb

    If dirty_tx isn't updated, then dma_unmap_single
    will be called twice.

    Signed-off-by: Troy Kisky <troy.kisky@boundarydevices.com>

diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 452be9c..150a90a 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -1243,10 +1243,8 @@ static void fec_txq(struct net_device *ndev, struct fec_enet_priv_tx_q *txq)
                                         fec16_to_cpu(bdp->cbd_datlen),
                                         DMA_TO_DEVICE);
                bdp->cbd_bufaddr = cpu_to_fec32(0);
-               if (!skb) {
-                       bdp = fec_enet_get_nextdesc(bdp, &txq->bd);
-                       continue;
-               }
+               if (!skb)
+                       goto skb_done;

                /* Check for errors. */
                if (status & (BD_ENET_TX_HB | BD_ENET_TX_LC |
@@ -1285,7 +1283,7 @@ static void fec_txq(struct net_device *ndev, struct fec_enet_priv_tx_q *txq)

                /* Free the sk buffer associated with this last transmit */
                dev_kfree_skb_any(skb);
-
+skb_done:
                /* Make sure the update to bdp and tx_skbuff are performed
                 * before dirty_tx
                 */

^ permalink raw reply related

* Re: Deleting child qdisc doesn't reset parent to default qdisc?
From: Eric Dumazet @ 2016-04-14 15:44 UTC (permalink / raw)
  To: Jiri Kosina; +Cc: Phil Sutter, Jamal Hadi Salim, netdev, linux-kernel
In-Reply-To: <alpine.LNX.2.00.1604141733250.27368@cbobk.fhfr.pm>

On Thu, 2016-04-14 at 17:34 +0200, Jiri Kosina wrote:
> On Thu, 14 Apr 2016, Phil Sutter wrote:
> 
> > OTOH some qdiscs (CBQ, DRR, DSMARK, HFSC, HTB, QFQ) assign the default
> > one upon deletion instead of noop_qdisc, hence I would describe
> > the situation using the words 'inconsistent' and 'accident' rather than
> > 'expected'. :)
> 
> Exactly. I'd again like to stress the fact that this configuration works:
> 
> 	jikos:~ # tc qdisc show
> 	qdisc tbf 10: dev eth0 root refcnt 2 rate 800Mbit burst 131000b lat 1.0ms 
> 
> and this (after performing add/delete operation) doesn't:
> 
> 	jikos:~ # tc qdisc show
> 	qdisc tbf 10: dev eth0 root refcnt 2 rate 800Mbit burst 131000b lat 1.0ms 
> 
> It's hard to spot a difference (hint: there is none).

This is because some qdisc are not visible in the dump.


qdisc_list_add() uses a single list, so adding too much stuff in it
could slow down fast path (qdisc_lookup(), called from
qdisc_tree_reduce_backlog())

^ permalink raw reply

* Re: [PATCH] qlge: Replace create_singlethread_workqueue with alloc_ordered_workqueue
From: Tejun Heo @ 2016-04-14 15:47 UTC (permalink / raw)
  To: Manish Chopra
  Cc: Amitoj Kaur Chawla, Sudarsana Kalluru, netdev, linux-kernel,
	Dept-Eng Linux Driver, Harish Patil, Dept-GE Linux NIC Dev
In-Reply-To: <BLUPR11MB011498CD02CC919F023545E793970@BLUPR11MB0114.namprd11.prod.outlook.com>

Hello, Manish.

On Thu, Apr 14, 2016 at 07:25:15AM +0000, Manish Chopra wrote:
> Just want to confirm that __WQ_LEGACY flag is not necessary here as this is removed
> with this change ? 

Yeah, that should be fine.  That only affects locking dependency
tracking which can fire spuriously due to workqueues created with the
old interface having WQ_MEM_RECLAIM unconditionally.  In this case, we
actually want WQ_MEM_RECLAIM and thus we want the dependency tracking
too.

Thanks.

-- 
tejun

^ permalink raw reply

* Re: pull-request: mac80211 2016-04-14
From: David Miller @ 2016-04-14 16:01 UTC (permalink / raw)
  To: johannes-cdvu00un1VgdHxzADdlk8Q
  Cc: netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-wireless-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <1460618838-5819-1-git-send-email-johannes-cdvu00un1VgdHxzADdlk8Q@public.gmane.org>

From: Johannes Berg <johannes-cdvu00un1VgdHxzADdlk8Q@public.gmane.org>
Date: Thu, 14 Apr 2016 09:27:17 +0200

> Since I didn't get anything else, and this has been pending for a week,
> here's the other part of the nl80211 socket problem fix (the netlink
> family URELEASE was the first part.)
> 
> Let me know if there's any problem.

Pulles, thank you.
--
To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: Deleting child qdisc doesn't reset parent to default qdisc?
From: Jiri Kosina @ 2016-04-14 16:08 UTC (permalink / raw)
  To: Phil Sutter; +Cc: Eric Dumazet, Jamal Hadi Salim, netdev, linux-kernel
In-Reply-To: <20160414151813.GE3715@orbyte.nwl.cc>

On Thu, 14 Apr 2016, Phil Sutter wrote:

> > > I've came across the behavior where adding a child qdisc and then deleting 
> > > it again makes the networking dysfunctional (I guess that's because all of 
> > > a sudden there is absolutely no working qdisc on the device, although 
> > > there originally was a default one in the parent).
> > > 
> > > In a nutshell, is this expected behavior or bug?
> > 
> > This is the expected behavior.
> 
> OTOH some qdiscs (CBQ, DRR, DSMARK, HFSC, HTB, QFQ) assign the default
> one upon deletion instead of noop_qdisc, hence I would describe
> the situation using the words 'inconsistent' and 'accident' rather than
> 'expected'. :)

Would a patch that'd unify this in a sense that all qdiscs would assign 
the default one upon deletion acceptable?

Thanks,

-- 
Jiri Kosina
SUSE Labs

^ permalink raw reply

* [patch net-next 00/18] devlink + mlxsw: add support for config and control of shared buffers
From: Jiri Pirko @ 2016-04-14 16:19 UTC (permalink / raw)
  To: netdev
  Cc: davem, idosch, eladr, yotamg, ogerlitz, roopa, nikolay, jhs,
	john.fastabend, rami.rosen, gospo, stephen, sfeldma

From: Jiri Pirko <jiri@mellanox.com>

ASICs implement shared buffer for packet forwarding purposes and enable
flexible partitioning of the shared buffer for different flows and ports,
enabling non-blocking progress of different flows as well as separation
of lossy traffic from loss-less traffic when using Per-Priority Flow
Control (PFC). The shared buffer optimizes the buffer utilization for better
absorption of packet bursts.

This patchset implements API which is based on the model SAI uses. That is
aligned with multiple ASIC vendors so this API should be vendor neutral.

Userspace counterpart patchset for devlink iproute2 tool can be found here:
https://github.com/jpirko/iproute2_mlxsw/tree/devlink_sb

Couple of examples of usage:

switch$ devlink sb help
Usage: devlink sb show [ DEV [ sb SB_INDEX ] ]
       devlink sb pool show [ DEV [ sb SB_INDEX ] pool POOL_INDEX ]
       devlink sb pool set DEV [ sb SB_INDEX ] pool POOL_INDEX
                           size POOL_SIZE thtype { static | dynamic }
       devlink sb port pool show [ DEV/PORT_INDEX [ sb SB_INDEX ]
                                   pool POOL_INDEX ]
       devlink sb port pool set DEV/PORT_INDEX [ sb SB_INDEX ]
                                pool POOL_INDEX th THRESHOLD
       devlink sb tc bind show [ DEV/PORT_INDEX [ sb SB_INDEX ] tc TC_INDEX ]
       devlink sb tc bind set DEV/PORT_INDEX [ sb SB_INDEX ] tc TC_INDEX
                              type { ingress | egress } pool POOL_INDEX
                              th THRESHOLD
       devlink sb occupancy show { DEV | DEV/PORT_INDEX } [ sb SB_INDEX ]
       devlink sb occupancy snapshot DEV [ sb SB_INDEX ]
       devlink sb occupancy clearmax DEV [ sb SB_INDEX ]

# list available share buffers
switch$ devlink sb show
pci/0000:03:00.0: sb 0 size 16777216 ing_pools 4 eg_pools 4 ing_tcs 8 eg_tcs 8

# list available pools and their config
switch$ devlink sb pool show
pci/0000:03:00.0: sb 0 pool 0 type ingress size 12400032 thtype dynamic
pci/0000:03:00.0: sb 0 pool 1 type ingress size 0 thtype dynamic
pci/0000:03:00.0: sb 0 pool 2 type ingress size 0 thtype dynamic
pci/0000:03:00.0: sb 0 pool 3 type ingress size 200064 thtype dynamic
pci/0000:03:00.0: sb 0 pool 4 type egress size 13220064 thtype dynamic
pci/0000:03:00.0: sb 0 pool 5 type egress size 0 thtype dynamic
pci/0000:03:00.0: sb 0 pool 6 type egress size 0 thtype dynamic
pci/0000:03:00.0: sb 0 pool 7 type egress size 0 thtype dynamic

# show port-pool setup for port sw0p7
switch$ devlink sb port pool show sw0p7 pool 0
sw0p7: sb 0 pool 0 threshold 16

# change threshold for port sw0p7
switch$ sudo devlink sb port pool set sw0p7 pool 0 th 15

# show port-pool changed setup for port sw0p7
switch$ devlink sb port pool show sw0p7 pool 0
sw0p7: sb 0 pool 0 threshold 15

# show TC binding setup for port sw0p7 ingress TC 0
switch$ devlink sb tc bind show sw0p7 tc 0 type ingress
sw0p7: sb 0 tc 0 type ingress pool 0 threshold 10

# change threshold TC binding setup for port sw0p7 ingress TC 0
switch$ sudo devlink sb tc bind set sw0p7 tc 0 type ingress pool 0 th 9

# show TC binding changed setup for port sw0p7 ingress TC 0
switch$ devlink sb tc bind show sw0p7 tc 0 type ingress
sw0p7: sb 0 tc 0 type ingress pool 0 threshold 9

# make a snapshot of occupancy of shared buffer for device pci/0000:03:00.0
switch$ sudo devlink sb occupancy snapshot pci/0000:03:00.0

# show occupancy for port sw0p7 from the snapshot (current/watermark)
switch$ devlink sb occupancy show sw0p7
sw0p7:
  pool: 0:      82944/3217344 1:          0/0       2:          0/0       3:          0/0      
        4:          0/384     5:          0/0       6:          0/0       7:          0/0      
  itc:  0(0):   96768/3217344 1(0):       0/0       2(0):       0/0       3(0):       0/0      
        4(0):       0/0       5(0):       0/0       6(0):       0/0       7(0):       0/0      
  etc:  0(4):       0/384     1(4):       0/0       2(4):       0/0       3(4):       0/0      
        4(4):       0/0       5(4):       0/0       6(4):       0/0       7(4):       0/0

# clear watermarks for shared buffer of device pci/0000:03:00.0
switch$ sudo devlink sb occupancy clearmax pci/0000:03:00.0

Jiri Pirko (18):
  devlink: add shared buffer configuration
  devlink: implement shared buffer occupancy monitoring interface
  mlxsw: core: Add devlink shared buffer callbacks
  mlxsw: spectrum_buffers: Push out shared buffer register writes
  mlxsw: spectrum_buffers: Push out indexes and direction out of SB
    structs
  mlxsw: spectrum_buffers: Rename "pool" to "pr" in initialization
  mlxsw: spectrum_buffers: Cache shared buffer configuration
  mlxsw: spectrum_buffers: Remove eg pool 3 default init and CPU port TC
    binding to it
  mlxsw: spectrum_buffers: Change initialization of PG 9
  mlxsw: spectrum_buffers: Get max_buff defaults into limits exposed to
    user
  mlxsw: core: Add mlxsw_core_port_driver_priv helper
  mlxsw: spectrum_buffers: Implement shared buffer configuration
  mlxsw: core: Add devlink shared buffer occupancy callbacks
  mlxsw: reg: Add Shared Buffer Status register definition
  mlxsw: reg: Extend SBPM register for occupancy control
  mlxsw: core: Add mlxsw specific workqueue and use it for FDB notif.
    processing
  mlxsw: core: Introduce support for asynchronous EMAD register access
  mlxsw: spectrum_buffers: Implement occupancy monitoring

 drivers/net/ethernet/mellanox/mlxsw/core.c         |  682 ++++++++---
 drivers/net/ethernet/mellanox/mlxsw/core.h         |   56 +
 drivers/net/ethernet/mellanox/mlxsw/reg.h          |  135 ++-
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c     |   32 +-
 drivers/net/ethernet/mellanox/mlxsw/spectrum.h     |   68 ++
 .../net/ethernet/mellanox/mlxsw/spectrum_buffers.c |  974 +++++++++++----
 .../ethernet/mellanox/mlxsw/spectrum_switchdev.c   |    4 +-
 include/net/devlink.h                              |   59 +
 include/uapi/linux/devlink.h                       |   63 +
 net/core/devlink.c                                 | 1236 ++++++++++++++++++--
 10 files changed, 2787 insertions(+), 522 deletions(-)

-- 
2.5.5

^ permalink raw reply

* [patch net-next 01/18] devlink: add shared buffer configuration
From: Jiri Pirko @ 2016-04-14 16:19 UTC (permalink / raw)
  To: netdev
  Cc: davem, idosch, eladr, yotamg, ogerlitz, roopa, nikolay, jhs,
	john.fastabend, rami.rosen, gospo, stephen, sfeldma
In-Reply-To: <1460650770-19382-1-git-send-email-jiri@resnulli.us>

From: Jiri Pirko <jiri@mellanox.com>

Define userspace API and drivers API for configuration of shared
buffers. Four basic objects are defined:
shared buffer - attributes are size, number of pools and TCs
pool - chunk of sharedbuffer definition, it has some size and either
       static or dynamic threshold
port pool threshold - to set per-port threshold for each pool
port tc threshold bind - to bind port and TC to specified pool
                         with threshold.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
---
 include/net/devlink.h        |  47 +++
 include/uapi/linux/devlink.h |  57 +++
 net/core/devlink.c           | 940 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 1044 insertions(+)

diff --git a/include/net/devlink.h b/include/net/devlink.h
index c37d257..e4c2747 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -24,6 +24,7 @@ struct devlink_ops;
 struct devlink {
 	struct list_head list;
 	struct list_head port_list;
+	struct list_head sb_list;
 	const struct devlink_ops *ops;
 	struct device *dev;
 	possible_net_t _net;
@@ -42,6 +43,12 @@ struct devlink_port {
 	u32 split_group;
 };
 
+struct devlink_sb_pool_info {
+	enum devlink_sb_pool_type pool_type;
+	u32 size;
+	enum devlink_sb_threshold_type threshold_type;
+};
+
 struct devlink_ops {
 	size_t priv_size;
 	int (*port_type_set)(struct devlink_port *devlink_port,
@@ -49,6 +56,28 @@ struct devlink_ops {
 	int (*port_split)(struct devlink *devlink, unsigned int port_index,
 			  unsigned int count);
 	int (*port_unsplit)(struct devlink *devlink, unsigned int port_index);
+	int (*sb_pool_get)(struct devlink *devlink, unsigned int sb_index,
+			   u16 pool_index,
+			   struct devlink_sb_pool_info *pool_info);
+	int (*sb_pool_set)(struct devlink *devlink, unsigned int sb_index,
+			   u16 pool_index, u32 size,
+			   enum devlink_sb_threshold_type threshold_type);
+	int (*sb_port_pool_get)(struct devlink_port *devlink_port,
+				unsigned int sb_index, u16 pool_index,
+				u32 *p_threshold);
+	int (*sb_port_pool_set)(struct devlink_port *devlink_port,
+				unsigned int sb_index, u16 pool_index,
+				u32 threshold);
+	int (*sb_tc_pool_bind_get)(struct devlink_port *devlink_port,
+				   unsigned int sb_index,
+				   u16 tc_index,
+				   enum devlink_sb_pool_type pool_type,
+				   u16 *p_pool_index, u32 *p_threshold);
+	int (*sb_tc_pool_bind_set)(struct devlink_port *devlink_port,
+				   unsigned int sb_index,
+				   u16 tc_index,
+				   enum devlink_sb_pool_type pool_type,
+				   u16 pool_index, u32 threshold);
 };
 
 static inline void *devlink_priv(struct devlink *devlink)
@@ -82,6 +111,11 @@ void devlink_port_type_ib_set(struct devlink_port *devlink_port,
 void devlink_port_type_clear(struct devlink_port *devlink_port);
 void devlink_port_split_set(struct devlink_port *devlink_port,
 			    u32 split_group);
+int devlink_sb_register(struct devlink *devlink, unsigned int sb_index,
+			u32 size, u16 ingress_pools_count,
+			u16 egress_pools_count, u16 ingress_tc_count,
+			u16 egress_tc_count);
+void devlink_sb_unregister(struct devlink *devlink, unsigned int sb_index);
 
 #else
 
@@ -135,6 +169,19 @@ static inline void devlink_port_split_set(struct devlink_port *devlink_port,
 {
 }
 
+static inline int devlink_sb_register(struct devlink *devlink,
+				      unsigned int sb_index, u32 size,
+				      u16 ingress_pools_count,
+				      u16 egress_pools_count, u16 tc_count)
+{
+	return 0;
+}
+
+static inline void devlink_sb_unregister(struct devlink *devlink,
+					 unsigned int sb_index)
+{
+}
+
 #endif
 
 #endif /* _NET_DEVLINK_H_ */
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index c9fee57..9c1aa57 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -33,6 +33,26 @@ enum devlink_command {
 	DEVLINK_CMD_PORT_SPLIT,
 	DEVLINK_CMD_PORT_UNSPLIT,
 
+	DEVLINK_CMD_SB_GET,		/* can dump */
+	DEVLINK_CMD_SB_SET,
+	DEVLINK_CMD_SB_NEW,
+	DEVLINK_CMD_SB_DEL,
+
+	DEVLINK_CMD_SB_POOL_GET,	/* can dump */
+	DEVLINK_CMD_SB_POOL_SET,
+	DEVLINK_CMD_SB_POOL_NEW,
+	DEVLINK_CMD_SB_POOL_DEL,
+
+	DEVLINK_CMD_SB_PORT_POOL_GET,	/* can dump */
+	DEVLINK_CMD_SB_PORT_POOL_SET,
+	DEVLINK_CMD_SB_PORT_POOL_NEW,
+	DEVLINK_CMD_SB_PORT_POOL_DEL,
+
+	DEVLINK_CMD_SB_TC_POOL_BIND_GET,	/* can dump */
+	DEVLINK_CMD_SB_TC_POOL_BIND_SET,
+	DEVLINK_CMD_SB_TC_POOL_BIND_NEW,
+	DEVLINK_CMD_SB_TC_POOL_BIND_DEL,
+
 	/* add new commands above here */
 
 	__DEVLINK_CMD_MAX,
@@ -46,6 +66,31 @@ enum devlink_port_type {
 	DEVLINK_PORT_TYPE_IB,
 };
 
+enum devlink_sb_pool_type {
+	DEVLINK_SB_POOL_TYPE_INGRESS,
+	DEVLINK_SB_POOL_TYPE_EGRESS,
+};
+
+/* static threshold - limiting the maximum number of bytes.
+ * dynamic threshold - limiting the maximum number of bytes
+ *   based on the currently available free space in the shared buffer pool.
+ *   In this mode, the maximum quota is calculated based
+ *   on the following formula:
+ *     max_quota = alpha / (1 + alpha) * Free_Buffer
+ *   While Free_Buffer is the amount of none-occupied buffer associated to
+ *   the relevant pool.
+ *   The value range which can be passed is 0-20 and serves
+ *   for computation of alpha by following formula:
+ *     alpha = 2 ^ (passed_value - 10)
+ */
+
+enum devlink_sb_threshold_type {
+	DEVLINK_SB_THRESHOLD_TYPE_STATIC,
+	DEVLINK_SB_THRESHOLD_TYPE_DYNAMIC,
+};
+
+#define DEVLINK_SB_THRESHOLD_TO_ALPHA_MAX 20
+
 enum devlink_attr {
 	/* don't change the order or add anything between, this is ABI! */
 	DEVLINK_ATTR_UNSPEC,
@@ -62,6 +107,18 @@ enum devlink_attr {
 	DEVLINK_ATTR_PORT_IBDEV_NAME,		/* string */
 	DEVLINK_ATTR_PORT_SPLIT_COUNT,		/* u32 */
 	DEVLINK_ATTR_PORT_SPLIT_GROUP,		/* u32 */
+	DEVLINK_ATTR_SB_INDEX,			/* u32 */
+	DEVLINK_ATTR_SB_SIZE,			/* u32 */
+	DEVLINK_ATTR_SB_INGRESS_POOL_COUNT,	/* u16 */
+	DEVLINK_ATTR_SB_EGRESS_POOL_COUNT,	/* u16 */
+	DEVLINK_ATTR_SB_INGRESS_TC_COUNT,	/* u16 */
+	DEVLINK_ATTR_SB_EGRESS_TC_COUNT,	/* u16 */
+	DEVLINK_ATTR_SB_POOL_INDEX,		/* u16 */
+	DEVLINK_ATTR_SB_POOL_TYPE,		/* u8 */
+	DEVLINK_ATTR_SB_POOL_SIZE,		/* u32 */
+	DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE,	/* u8 */
+	DEVLINK_ATTR_SB_THRESHOLD,		/* u32 */
+	DEVLINK_ATTR_SB_TC_INDEX,		/* u16 */
 
 	/* add new attributes above here, update the policy in devlink.c */
 
diff --git a/net/core/devlink.c b/net/core/devlink.c
index b84cf0d..aa0b9e1 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -119,8 +119,167 @@ static struct devlink_port *devlink_port_get_from_info(struct devlink *devlink,
 	return devlink_port_get_from_attrs(devlink, info->attrs);
 }
 
+struct devlink_sb {
+	struct list_head list;
+	unsigned int index;
+	u32 size;
+	u16 ingress_pools_count;
+	u16 egress_pools_count;
+	u16 ingress_tc_count;
+	u16 egress_tc_count;
+};
+
+static u16 devlink_sb_pool_count(struct devlink_sb *devlink_sb)
+{
+	return devlink_sb->ingress_pools_count + devlink_sb->egress_pools_count;
+}
+
+static struct devlink_sb *devlink_sb_get_by_index(struct devlink *devlink,
+						  unsigned int sb_index)
+{
+	struct devlink_sb *devlink_sb;
+
+	list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
+		if (devlink_sb->index == sb_index)
+			return devlink_sb;
+	}
+	return NULL;
+}
+
+static bool devlink_sb_index_exists(struct devlink *devlink,
+				    unsigned int sb_index)
+{
+	return devlink_sb_get_by_index(devlink, sb_index);
+}
+
+static struct devlink_sb *devlink_sb_get_from_attrs(struct devlink *devlink,
+						    struct nlattr **attrs)
+{
+	if (attrs[DEVLINK_ATTR_SB_INDEX]) {
+		u32 sb_index = nla_get_u32(attrs[DEVLINK_ATTR_SB_INDEX]);
+		struct devlink_sb *devlink_sb;
+
+		devlink_sb = devlink_sb_get_by_index(devlink, sb_index);
+		if (!devlink_sb)
+			return ERR_PTR(-ENODEV);
+		return devlink_sb;
+	}
+	return ERR_PTR(-EINVAL);
+}
+
+static struct devlink_sb *devlink_sb_get_from_info(struct devlink *devlink,
+						   struct genl_info *info)
+{
+	return devlink_sb_get_from_attrs(devlink, info->attrs);
+}
+
+static int devlink_sb_pool_index_get_from_attrs(struct devlink_sb *devlink_sb,
+						struct nlattr **attrs,
+						u16 *p_pool_index)
+{
+	u16 val;
+
+	if (!attrs[DEVLINK_ATTR_SB_POOL_INDEX])
+		return -EINVAL;
+
+	val = nla_get_u16(attrs[DEVLINK_ATTR_SB_POOL_INDEX]);
+	if (val >= devlink_sb_pool_count(devlink_sb))
+		return -EINVAL;
+	*p_pool_index = val;
+	return 0;
+}
+
+static int devlink_sb_pool_index_get_from_info(struct devlink_sb *devlink_sb,
+					       struct genl_info *info,
+					       u16 *p_pool_index)
+{
+	return devlink_sb_pool_index_get_from_attrs(devlink_sb, info->attrs,
+						    p_pool_index);
+}
+
+static int
+devlink_sb_pool_type_get_from_attrs(struct nlattr **attrs,
+				    enum devlink_sb_pool_type *p_pool_type)
+{
+	u8 val;
+
+	if (!attrs[DEVLINK_ATTR_SB_POOL_TYPE])
+		return -EINVAL;
+
+	val = nla_get_u8(attrs[DEVLINK_ATTR_SB_POOL_TYPE]);
+	if (val != DEVLINK_SB_POOL_TYPE_INGRESS &&
+	    val != DEVLINK_SB_POOL_TYPE_EGRESS)
+		return -EINVAL;
+	*p_pool_type = val;
+	return 0;
+}
+
+static int
+devlink_sb_pool_type_get_from_info(struct genl_info *info,
+				   enum devlink_sb_pool_type *p_pool_type)
+{
+	return devlink_sb_pool_type_get_from_attrs(info->attrs, p_pool_type);
+}
+
+static int
+devlink_sb_th_type_get_from_attrs(struct nlattr **attrs,
+				  enum devlink_sb_threshold_type *p_th_type)
+{
+	u8 val;
+
+	if (!attrs[DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE])
+		return -EINVAL;
+
+	val = nla_get_u8(attrs[DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE]);
+	if (val != DEVLINK_SB_THRESHOLD_TYPE_STATIC &&
+	    val != DEVLINK_SB_THRESHOLD_TYPE_DYNAMIC)
+		return -EINVAL;
+	*p_th_type = val;
+	return 0;
+}
+
+static int
+devlink_sb_th_type_get_from_info(struct genl_info *info,
+				 enum devlink_sb_threshold_type *p_th_type)
+{
+	return devlink_sb_th_type_get_from_attrs(info->attrs, p_th_type);
+}
+
+static int
+devlink_sb_tc_index_get_from_attrs(struct devlink_sb *devlink_sb,
+				   struct nlattr **attrs,
+				   enum devlink_sb_pool_type pool_type,
+				   u16 *p_tc_index)
+{
+	u16 val;
+
+	if (!attrs[DEVLINK_ATTR_SB_TC_INDEX])
+		return -EINVAL;
+
+	val = nla_get_u16(attrs[DEVLINK_ATTR_SB_TC_INDEX]);
+	if (pool_type == DEVLINK_SB_POOL_TYPE_INGRESS &&
+	    val >= devlink_sb->ingress_tc_count)
+		return -EINVAL;
+	if (pool_type == DEVLINK_SB_POOL_TYPE_EGRESS &&
+	    val >= devlink_sb->egress_tc_count)
+		return -EINVAL;
+	*p_tc_index = val;
+	return 0;
+}
+
+static int
+devlink_sb_tc_index_get_from_info(struct devlink_sb *devlink_sb,
+				  struct genl_info *info,
+				  enum devlink_sb_pool_type pool_type,
+				  u16 *p_tc_index)
+{
+	return devlink_sb_tc_index_get_from_attrs(devlink_sb, info->attrs,
+						  pool_type, p_tc_index);
+}
+
 #define DEVLINK_NL_FLAG_NEED_DEVLINK	BIT(0)
 #define DEVLINK_NL_FLAG_NEED_PORT	BIT(1)
+#define DEVLINK_NL_FLAG_NEED_SB		BIT(2)
 
 static int devlink_nl_pre_doit(const struct genl_ops *ops,
 			       struct sk_buff *skb, struct genl_info *info)
@@ -147,6 +306,18 @@ static int devlink_nl_pre_doit(const struct genl_ops *ops,
 		}
 		info->user_ptr[0] = devlink_port;
 	}
+	if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_SB) {
+		struct devlink_sb *devlink_sb;
+
+		devlink_sb = devlink_sb_get_from_info(devlink, info);
+		if (IS_ERR(devlink_sb)) {
+			if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_PORT)
+				mutex_unlock(&devlink_port_mutex);
+			mutex_unlock(&devlink_mutex);
+			return PTR_ERR(devlink_sb);
+		}
+		info->user_ptr[1] = devlink_sb;
+	}
 	return 0;
 }
 
@@ -499,12 +670,675 @@ static int devlink_nl_cmd_port_unsplit_doit(struct sk_buff *skb,
 	return devlink_port_unsplit(devlink, port_index);
 }
 
+static int devlink_nl_sb_fill(struct sk_buff *msg, struct devlink *devlink,
+			      struct devlink_sb *devlink_sb,
+			      enum devlink_command cmd, u32 portid,
+			      u32 seq, int flags)
+{
+	void *hdr;
+
+	hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	if (devlink_nl_put_handle(msg, devlink))
+		goto nla_put_failure;
+	if (nla_put_u32(msg, DEVLINK_ATTR_SB_INDEX, devlink_sb->index))
+		goto nla_put_failure;
+	if (nla_put_u32(msg, DEVLINK_ATTR_SB_SIZE, devlink_sb->size))
+		goto nla_put_failure;
+	if (nla_put_u16(msg, DEVLINK_ATTR_SB_INGRESS_POOL_COUNT,
+			devlink_sb->ingress_pools_count))
+		goto nla_put_failure;
+	if (nla_put_u16(msg, DEVLINK_ATTR_SB_EGRESS_POOL_COUNT,
+			devlink_sb->egress_pools_count))
+		goto nla_put_failure;
+	if (nla_put_u16(msg, DEVLINK_ATTR_SB_INGRESS_TC_COUNT,
+			devlink_sb->ingress_tc_count))
+		goto nla_put_failure;
+	if (nla_put_u16(msg, DEVLINK_ATTR_SB_EGRESS_TC_COUNT,
+			devlink_sb->egress_tc_count))
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+	return 0;
+
+nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+static int devlink_nl_cmd_sb_get_doit(struct sk_buff *skb,
+				      struct genl_info *info)
+{
+	struct devlink *devlink = info->user_ptr[0];
+	struct devlink_sb *devlink_sb = info->user_ptr[1];
+	struct sk_buff *msg;
+	int err;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	err = devlink_nl_sb_fill(msg, devlink, devlink_sb,
+				 DEVLINK_CMD_SB_NEW,
+				 info->snd_portid, info->snd_seq, 0);
+	if (err) {
+		nlmsg_free(msg);
+		return err;
+	}
+
+	return genlmsg_reply(msg, info);
+}
+
+static int devlink_nl_cmd_sb_get_dumpit(struct sk_buff *msg,
+					struct netlink_callback *cb)
+{
+	struct devlink *devlink;
+	struct devlink_sb *devlink_sb;
+	int start = cb->args[0];
+	int idx = 0;
+	int err;
+
+	mutex_lock(&devlink_mutex);
+	list_for_each_entry(devlink, &devlink_list, list) {
+		if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+			continue;
+		list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
+			if (idx < start) {
+				idx++;
+				continue;
+			}
+			err = devlink_nl_sb_fill(msg, devlink, devlink_sb,
+						 DEVLINK_CMD_SB_NEW,
+						 NETLINK_CB(cb->skb).portid,
+						 cb->nlh->nlmsg_seq,
+						 NLM_F_MULTI);
+			if (err)
+				goto out;
+			idx++;
+		}
+	}
+out:
+	mutex_unlock(&devlink_mutex);
+
+	cb->args[0] = idx;
+	return msg->len;
+}
+
+static int devlink_nl_sb_pool_fill(struct sk_buff *msg, struct devlink *devlink,
+				   struct devlink_sb *devlink_sb,
+				   u16 pool_index, enum devlink_command cmd,
+				   u32 portid, u32 seq, int flags)
+{
+	struct devlink_sb_pool_info pool_info;
+	void *hdr;
+	int err;
+
+	err = devlink->ops->sb_pool_get(devlink, devlink_sb->index,
+					pool_index, &pool_info);
+	if (err)
+		return err;
+
+	hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	if (devlink_nl_put_handle(msg, devlink))
+		goto nla_put_failure;
+	if (nla_put_u32(msg, DEVLINK_ATTR_SB_INDEX, devlink_sb->index))
+		goto nla_put_failure;
+	if (nla_put_u16(msg, DEVLINK_ATTR_SB_POOL_INDEX, pool_index))
+		goto nla_put_failure;
+	if (nla_put_u8(msg, DEVLINK_ATTR_SB_POOL_TYPE, pool_info.pool_type))
+		goto nla_put_failure;
+	if (nla_put_u32(msg, DEVLINK_ATTR_SB_POOL_SIZE, pool_info.size))
+		goto nla_put_failure;
+	if (nla_put_u8(msg, DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE,
+		       pool_info.threshold_type))
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+	return 0;
+
+nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+static int devlink_nl_cmd_sb_pool_get_doit(struct sk_buff *skb,
+					   struct genl_info *info)
+{
+	struct devlink *devlink = info->user_ptr[0];
+	struct devlink_sb *devlink_sb = info->user_ptr[1];
+	struct sk_buff *msg;
+	u16 pool_index;
+	int err;
+
+	err = devlink_sb_pool_index_get_from_info(devlink_sb, info,
+						  &pool_index);
+	if (err)
+		return err;
+
+	if (!devlink->ops || !devlink->ops->sb_pool_get)
+		return -EOPNOTSUPP;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	err = devlink_nl_sb_pool_fill(msg, devlink, devlink_sb, pool_index,
+				      DEVLINK_CMD_SB_POOL_NEW,
+				      info->snd_portid, info->snd_seq, 0);
+	if (err) {
+		nlmsg_free(msg);
+		return err;
+	}
+
+	return genlmsg_reply(msg, info);
+}
+
+static int __sb_pool_get_dumpit(struct sk_buff *msg, int start, int *p_idx,
+				struct devlink *devlink,
+				struct devlink_sb *devlink_sb,
+				u32 portid, u32 seq)
+{
+	u16 pool_count = devlink_sb_pool_count(devlink_sb);
+	u16 pool_index;
+	int err;
+
+	for (pool_index = 0; pool_index < pool_count; pool_index++) {
+		if (*p_idx < start) {
+			(*p_idx)++;
+			continue;
+		}
+		err = devlink_nl_sb_pool_fill(msg, devlink,
+					      devlink_sb,
+					      pool_index,
+					      DEVLINK_CMD_SB_POOL_NEW,
+					      portid, seq, NLM_F_MULTI);
+		if (err)
+			return err;
+		(*p_idx)++;
+	}
+	return 0;
+}
+
+static int devlink_nl_cmd_sb_pool_get_dumpit(struct sk_buff *msg,
+					     struct netlink_callback *cb)
+{
+	struct devlink *devlink;
+	struct devlink_sb *devlink_sb;
+	int start = cb->args[0];
+	int idx = 0;
+	int err;
+
+	mutex_lock(&devlink_mutex);
+	list_for_each_entry(devlink, &devlink_list, list) {
+		if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) ||
+		    !devlink->ops || !devlink->ops->sb_pool_get)
+			continue;
+		list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
+			err = __sb_pool_get_dumpit(msg, start, &idx, devlink,
+						   devlink_sb,
+						   NETLINK_CB(cb->skb).portid,
+						   cb->nlh->nlmsg_seq);
+			if (err && err != -EOPNOTSUPP)
+				goto out;
+		}
+	}
+out:
+	mutex_unlock(&devlink_mutex);
+
+	cb->args[0] = idx;
+	return msg->len;
+}
+
+static int devlink_sb_pool_set(struct devlink *devlink, unsigned int sb_index,
+			       u16 pool_index, u32 size,
+			       enum devlink_sb_threshold_type threshold_type)
+
+{
+	const struct devlink_ops *ops = devlink->ops;
+
+	if (ops && ops->sb_pool_set)
+		return ops->sb_pool_set(devlink, sb_index, pool_index,
+					size, threshold_type);
+	return -EOPNOTSUPP;
+}
+
+static int devlink_nl_cmd_sb_pool_set_doit(struct sk_buff *skb,
+					   struct genl_info *info)
+{
+	struct devlink *devlink = info->user_ptr[0];
+	struct devlink_sb *devlink_sb = info->user_ptr[1];
+	enum devlink_sb_threshold_type threshold_type;
+	u16 pool_index;
+	u32 size;
+	int err;
+
+	err = devlink_sb_pool_index_get_from_info(devlink_sb, info,
+						  &pool_index);
+	if (err)
+		return err;
+
+	err = devlink_sb_th_type_get_from_info(info, &threshold_type);
+	if (err)
+		return err;
+
+	if (!info->attrs[DEVLINK_ATTR_SB_POOL_SIZE])
+		return -EINVAL;
+
+	size = nla_get_u32(info->attrs[DEVLINK_ATTR_SB_POOL_SIZE]);
+	return devlink_sb_pool_set(devlink, devlink_sb->index,
+				   pool_index, size, threshold_type);
+}
+
+static int devlink_nl_sb_port_pool_fill(struct sk_buff *msg,
+					struct devlink *devlink,
+					struct devlink_port *devlink_port,
+					struct devlink_sb *devlink_sb,
+					u16 pool_index,
+					enum devlink_command cmd,
+					u32 portid, u32 seq, int flags)
+{
+	u32 threshold;
+	void *hdr;
+	int err;
+
+	err = devlink->ops->sb_port_pool_get(devlink_port, devlink_sb->index,
+					     pool_index, &threshold);
+	if (err)
+		return err;
+
+	hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	if (devlink_nl_put_handle(msg, devlink))
+		goto nla_put_failure;
+	if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, devlink_port->index))
+		goto nla_put_failure;
+	if (nla_put_u32(msg, DEVLINK_ATTR_SB_INDEX, devlink_sb->index))
+		goto nla_put_failure;
+	if (nla_put_u16(msg, DEVLINK_ATTR_SB_POOL_INDEX, pool_index))
+		goto nla_put_failure;
+	if (nla_put_u32(msg, DEVLINK_ATTR_SB_THRESHOLD, threshold))
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+	return 0;
+
+nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+static int devlink_nl_cmd_sb_port_pool_get_doit(struct sk_buff *skb,
+						struct genl_info *info)
+{
+	struct devlink_port *devlink_port = info->user_ptr[0];
+	struct devlink *devlink = devlink_port->devlink;
+	struct devlink_sb *devlink_sb = info->user_ptr[1];
+	struct sk_buff *msg;
+	u16 pool_index;
+	int err;
+
+	err = devlink_sb_pool_index_get_from_info(devlink_sb, info,
+						  &pool_index);
+	if (err)
+		return err;
+
+	if (!devlink->ops || !devlink->ops->sb_port_pool_get)
+		return -EOPNOTSUPP;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	err = devlink_nl_sb_port_pool_fill(msg, devlink, devlink_port,
+					   devlink_sb, pool_index,
+					   DEVLINK_CMD_SB_PORT_POOL_NEW,
+					   info->snd_portid, info->snd_seq, 0);
+	if (err) {
+		nlmsg_free(msg);
+		return err;
+	}
+
+	return genlmsg_reply(msg, info);
+}
+
+static int __sb_port_pool_get_dumpit(struct sk_buff *msg, int start, int *p_idx,
+				     struct devlink *devlink,
+				     struct devlink_sb *devlink_sb,
+				     u32 portid, u32 seq)
+{
+	struct devlink_port *devlink_port;
+	u16 pool_count = devlink_sb_pool_count(devlink_sb);
+	u16 pool_index;
+	int err;
+
+	list_for_each_entry(devlink_port, &devlink->port_list, list) {
+		for (pool_index = 0; pool_index < pool_count; pool_index++) {
+			if (*p_idx < start) {
+				(*p_idx)++;
+				continue;
+			}
+			err = devlink_nl_sb_port_pool_fill(msg, devlink,
+							   devlink_port,
+							   devlink_sb,
+							   pool_index,
+							   DEVLINK_CMD_SB_PORT_POOL_NEW,
+							   portid, seq,
+							   NLM_F_MULTI);
+			if (err)
+				return err;
+			(*p_idx)++;
+		}
+	}
+	return 0;
+}
+
+static int devlink_nl_cmd_sb_port_pool_get_dumpit(struct sk_buff *msg,
+						  struct netlink_callback *cb)
+{
+	struct devlink *devlink;
+	struct devlink_sb *devlink_sb;
+	int start = cb->args[0];
+	int idx = 0;
+	int err;
+
+	mutex_lock(&devlink_mutex);
+	mutex_lock(&devlink_port_mutex);
+	list_for_each_entry(devlink, &devlink_list, list) {
+		if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) ||
+		    !devlink->ops || !devlink->ops->sb_port_pool_get)
+			continue;
+		list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
+			err = __sb_port_pool_get_dumpit(msg, start, &idx,
+							devlink, devlink_sb,
+							NETLINK_CB(cb->skb).portid,
+							cb->nlh->nlmsg_seq);
+			if (err && err != -EOPNOTSUPP)
+				goto out;
+		}
+	}
+out:
+	mutex_unlock(&devlink_port_mutex);
+	mutex_unlock(&devlink_mutex);
+
+	cb->args[0] = idx;
+	return msg->len;
+}
+
+static int devlink_sb_port_pool_set(struct devlink_port *devlink_port,
+				    unsigned int sb_index, u16 pool_index,
+				    u32 threshold)
+
+{
+	const struct devlink_ops *ops = devlink_port->devlink->ops;
+
+	if (ops && ops->sb_port_pool_set)
+		return ops->sb_port_pool_set(devlink_port, sb_index,
+					     pool_index, threshold);
+	return -EOPNOTSUPP;
+}
+
+static int devlink_nl_cmd_sb_port_pool_set_doit(struct sk_buff *skb,
+						struct genl_info *info)
+{
+	struct devlink_port *devlink_port = info->user_ptr[0];
+	struct devlink_sb *devlink_sb = info->user_ptr[1];
+	u16 pool_index;
+	u32 threshold;
+	int err;
+
+	err = devlink_sb_pool_index_get_from_info(devlink_sb, info,
+						  &pool_index);
+	if (err)
+		return err;
+
+	if (!info->attrs[DEVLINK_ATTR_SB_THRESHOLD])
+		return -EINVAL;
+
+	threshold = nla_get_u32(info->attrs[DEVLINK_ATTR_SB_THRESHOLD]);
+	return devlink_sb_port_pool_set(devlink_port, devlink_sb->index,
+					pool_index, threshold);
+}
+
+static int
+devlink_nl_sb_tc_pool_bind_fill(struct sk_buff *msg, struct devlink *devlink,
+				struct devlink_port *devlink_port,
+				struct devlink_sb *devlink_sb, u16 tc_index,
+				enum devlink_sb_pool_type pool_type,
+				enum devlink_command cmd,
+				u32 portid, u32 seq, int flags)
+{
+	u16 pool_index;
+	u32 threshold;
+	void *hdr;
+	int err;
+
+	err = devlink->ops->sb_tc_pool_bind_get(devlink_port, devlink_sb->index,
+						tc_index, pool_type,
+						&pool_index, &threshold);
+	if (err)
+		return err;
+
+	hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	if (devlink_nl_put_handle(msg, devlink))
+		goto nla_put_failure;
+	if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, devlink_port->index))
+		goto nla_put_failure;
+	if (nla_put_u32(msg, DEVLINK_ATTR_SB_INDEX, devlink_sb->index))
+		goto nla_put_failure;
+	if (nla_put_u16(msg, DEVLINK_ATTR_SB_TC_INDEX, tc_index))
+		goto nla_put_failure;
+	if (nla_put_u8(msg, DEVLINK_ATTR_SB_POOL_TYPE, pool_type))
+		goto nla_put_failure;
+	if (nla_put_u16(msg, DEVLINK_ATTR_SB_POOL_INDEX, pool_index))
+		goto nla_put_failure;
+	if (nla_put_u32(msg, DEVLINK_ATTR_SB_THRESHOLD, threshold))
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+	return 0;
+
+nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+static int devlink_nl_cmd_sb_tc_pool_bind_get_doit(struct sk_buff *skb,
+						   struct genl_info *info)
+{
+	struct devlink_port *devlink_port = info->user_ptr[0];
+	struct devlink *devlink = devlink_port->devlink;
+	struct devlink_sb *devlink_sb = info->user_ptr[1];
+	struct sk_buff *msg;
+	enum devlink_sb_pool_type pool_type;
+	u16 tc_index;
+	int err;
+
+	err = devlink_sb_pool_type_get_from_info(info, &pool_type);
+	if (err)
+		return err;
+
+	err = devlink_sb_tc_index_get_from_info(devlink_sb, info,
+						pool_type, &tc_index);
+	if (err)
+		return err;
+
+	if (!devlink->ops || !devlink->ops->sb_tc_pool_bind_get)
+		return -EOPNOTSUPP;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	err = devlink_nl_sb_tc_pool_bind_fill(msg, devlink, devlink_port,
+					      devlink_sb, tc_index, pool_type,
+					      DEVLINK_CMD_SB_TC_POOL_BIND_NEW,
+					      info->snd_portid,
+					      info->snd_seq, 0);
+	if (err) {
+		nlmsg_free(msg);
+		return err;
+	}
+
+	return genlmsg_reply(msg, info);
+}
+
+static int __sb_tc_pool_bind_get_dumpit(struct sk_buff *msg,
+					int start, int *p_idx,
+					struct devlink *devlink,
+					struct devlink_sb *devlink_sb,
+					u32 portid, u32 seq)
+{
+	struct devlink_port *devlink_port;
+	u16 tc_index;
+	int err;
+
+	list_for_each_entry(devlink_port, &devlink->port_list, list) {
+		for (tc_index = 0;
+		     tc_index < devlink_sb->ingress_tc_count; tc_index++) {
+			if (*p_idx < start) {
+				(*p_idx)++;
+				continue;
+			}
+			err = devlink_nl_sb_tc_pool_bind_fill(msg, devlink,
+							      devlink_port,
+							      devlink_sb,
+							      tc_index,
+							      DEVLINK_SB_POOL_TYPE_INGRESS,
+							      DEVLINK_CMD_SB_TC_POOL_BIND_NEW,
+							      portid, seq,
+							      NLM_F_MULTI);
+			if (err)
+				return err;
+			(*p_idx)++;
+		}
+		for (tc_index = 0;
+		     tc_index < devlink_sb->egress_tc_count; tc_index++) {
+			if (*p_idx < start) {
+				(*p_idx)++;
+				continue;
+			}
+			err = devlink_nl_sb_tc_pool_bind_fill(msg, devlink,
+							      devlink_port,
+							      devlink_sb,
+							      tc_index,
+							      DEVLINK_SB_POOL_TYPE_EGRESS,
+							      DEVLINK_CMD_SB_TC_POOL_BIND_NEW,
+							      portid, seq,
+							      NLM_F_MULTI);
+			if (err)
+				return err;
+			(*p_idx)++;
+		}
+	}
+	return 0;
+}
+
+static int
+devlink_nl_cmd_sb_tc_pool_bind_get_dumpit(struct sk_buff *msg,
+					  struct netlink_callback *cb)
+{
+	struct devlink *devlink;
+	struct devlink_sb *devlink_sb;
+	int start = cb->args[0];
+	int idx = 0;
+	int err;
+
+	mutex_lock(&devlink_mutex);
+	mutex_lock(&devlink_port_mutex);
+	list_for_each_entry(devlink, &devlink_list, list) {
+		if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) ||
+		    !devlink->ops || !devlink->ops->sb_tc_pool_bind_get)
+			continue;
+		list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
+			err = __sb_tc_pool_bind_get_dumpit(msg, start, &idx,
+							   devlink,
+							   devlink_sb,
+							   NETLINK_CB(cb->skb).portid,
+							   cb->nlh->nlmsg_seq);
+			if (err && err != -EOPNOTSUPP)
+				goto out;
+		}
+	}
+out:
+	mutex_unlock(&devlink_port_mutex);
+	mutex_unlock(&devlink_mutex);
+
+	cb->args[0] = idx;
+	return msg->len;
+}
+
+static int devlink_sb_tc_pool_bind_set(struct devlink_port *devlink_port,
+				       unsigned int sb_index, u16 tc_index,
+				       enum devlink_sb_pool_type pool_type,
+				       u16 pool_index, u32 threshold)
+
+{
+	const struct devlink_ops *ops = devlink_port->devlink->ops;
+
+	if (ops && ops->sb_tc_pool_bind_set)
+		return ops->sb_tc_pool_bind_set(devlink_port, sb_index,
+						tc_index, pool_type,
+						pool_index, threshold);
+	return -EOPNOTSUPP;
+}
+
+static int devlink_nl_cmd_sb_tc_pool_bind_set_doit(struct sk_buff *skb,
+						   struct genl_info *info)
+{
+	struct devlink_port *devlink_port = info->user_ptr[0];
+	struct devlink_sb *devlink_sb = info->user_ptr[1];
+	enum devlink_sb_pool_type pool_type;
+	u16 tc_index;
+	u16 pool_index;
+	u32 threshold;
+	int err;
+
+	err = devlink_sb_pool_type_get_from_info(info, &pool_type);
+	if (err)
+		return err;
+
+	err = devlink_sb_tc_index_get_from_info(devlink_sb, info,
+						pool_type, &tc_index);
+	if (err)
+		return err;
+
+	err = devlink_sb_pool_index_get_from_info(devlink_sb, info,
+						  &pool_index);
+	if (err)
+		return err;
+
+	if (!info->attrs[DEVLINK_ATTR_SB_THRESHOLD])
+		return -EINVAL;
+
+	threshold = nla_get_u32(info->attrs[DEVLINK_ATTR_SB_THRESHOLD]);
+	return devlink_sb_tc_pool_bind_set(devlink_port, devlink_sb->index,
+					   tc_index, pool_type,
+					   pool_index, threshold);
+}
+
 static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
 	[DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING },
 	[DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING },
 	[DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32 },
 	[DEVLINK_ATTR_PORT_TYPE] = { .type = NLA_U16 },
 	[DEVLINK_ATTR_PORT_SPLIT_COUNT] = { .type = NLA_U32 },
+	[DEVLINK_ATTR_SB_INDEX] = { .type = NLA_U32 },
+	[DEVLINK_ATTR_SB_POOL_INDEX] = { .type = NLA_U16 },
+	[DEVLINK_ATTR_SB_POOL_TYPE] = { .type = NLA_U8 },
+	[DEVLINK_ATTR_SB_POOL_SIZE] = { .type = NLA_U32 },
+	[DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE] = { .type = NLA_U8 },
+	[DEVLINK_ATTR_SB_THRESHOLD] = { .type = NLA_U32 },
+	[DEVLINK_ATTR_SB_TC_INDEX] = { .type = NLA_U16 },
 };
 
 static const struct genl_ops devlink_nl_ops[] = {
@@ -545,6 +1379,66 @@ static const struct genl_ops devlink_nl_ops[] = {
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
 	},
+	{
+		.cmd = DEVLINK_CMD_SB_GET,
+		.doit = devlink_nl_cmd_sb_get_doit,
+		.dumpit = devlink_nl_cmd_sb_get_dumpit,
+		.policy = devlink_nl_policy,
+		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
+				  DEVLINK_NL_FLAG_NEED_SB,
+		/* can be retrieved by unprivileged users */
+	},
+	{
+		.cmd = DEVLINK_CMD_SB_POOL_GET,
+		.doit = devlink_nl_cmd_sb_pool_get_doit,
+		.dumpit = devlink_nl_cmd_sb_pool_get_dumpit,
+		.policy = devlink_nl_policy,
+		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
+				  DEVLINK_NL_FLAG_NEED_SB,
+		/* can be retrieved by unprivileged users */
+	},
+	{
+		.cmd = DEVLINK_CMD_SB_POOL_SET,
+		.doit = devlink_nl_cmd_sb_pool_set_doit,
+		.policy = devlink_nl_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
+				  DEVLINK_NL_FLAG_NEED_SB,
+	},
+	{
+		.cmd = DEVLINK_CMD_SB_PORT_POOL_GET,
+		.doit = devlink_nl_cmd_sb_port_pool_get_doit,
+		.dumpit = devlink_nl_cmd_sb_port_pool_get_dumpit,
+		.policy = devlink_nl_policy,
+		.internal_flags = DEVLINK_NL_FLAG_NEED_PORT |
+				  DEVLINK_NL_FLAG_NEED_SB,
+		/* can be retrieved by unprivileged users */
+	},
+	{
+		.cmd = DEVLINK_CMD_SB_PORT_POOL_SET,
+		.doit = devlink_nl_cmd_sb_port_pool_set_doit,
+		.policy = devlink_nl_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = DEVLINK_NL_FLAG_NEED_PORT |
+				  DEVLINK_NL_FLAG_NEED_SB,
+	},
+	{
+		.cmd = DEVLINK_CMD_SB_TC_POOL_BIND_GET,
+		.doit = devlink_nl_cmd_sb_tc_pool_bind_get_doit,
+		.dumpit = devlink_nl_cmd_sb_tc_pool_bind_get_dumpit,
+		.policy = devlink_nl_policy,
+		.internal_flags = DEVLINK_NL_FLAG_NEED_PORT |
+				  DEVLINK_NL_FLAG_NEED_SB,
+		/* can be retrieved by unprivileged users */
+	},
+	{
+		.cmd = DEVLINK_CMD_SB_TC_POOL_BIND_SET,
+		.doit = devlink_nl_cmd_sb_tc_pool_bind_set_doit,
+		.policy = devlink_nl_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = DEVLINK_NL_FLAG_NEED_PORT |
+				  DEVLINK_NL_FLAG_NEED_SB,
+	},
 };
 
 /**
@@ -566,6 +1460,7 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size)
 	devlink->ops = ops;
 	devlink_net_set(devlink, &init_net);
 	INIT_LIST_HEAD(&devlink->port_list);
+	INIT_LIST_HEAD(&devlink->sb_list);
 	return devlink;
 }
 EXPORT_SYMBOL_GPL(devlink_alloc);
@@ -721,6 +1616,51 @@ void devlink_port_split_set(struct devlink_port *devlink_port,
 }
 EXPORT_SYMBOL_GPL(devlink_port_split_set);
 
+int devlink_sb_register(struct devlink *devlink, unsigned int sb_index,
+			u32 size, u16 ingress_pools_count,
+			u16 egress_pools_count, u16 ingress_tc_count,
+			u16 egress_tc_count)
+{
+	struct devlink_sb *devlink_sb;
+	int err = 0;
+
+	mutex_lock(&devlink_mutex);
+	if (devlink_sb_index_exists(devlink, sb_index)) {
+		err = -EEXIST;
+		goto unlock;
+	}
+
+	devlink_sb = kzalloc(sizeof(*devlink_sb), GFP_KERNEL);
+	if (!devlink_sb) {
+		err = -ENOMEM;
+		goto unlock;
+	}
+	devlink_sb->index = sb_index;
+	devlink_sb->size = size;
+	devlink_sb->ingress_pools_count = ingress_pools_count;
+	devlink_sb->egress_pools_count = egress_pools_count;
+	devlink_sb->ingress_tc_count = ingress_tc_count;
+	devlink_sb->egress_tc_count = egress_tc_count;
+	list_add_tail(&devlink_sb->list, &devlink->sb_list);
+unlock:
+	mutex_unlock(&devlink_mutex);
+	return err;
+}
+EXPORT_SYMBOL_GPL(devlink_sb_register);
+
+void devlink_sb_unregister(struct devlink *devlink, unsigned int sb_index)
+{
+	struct devlink_sb *devlink_sb;
+
+	mutex_lock(&devlink_mutex);
+	devlink_sb = devlink_sb_get_by_index(devlink, sb_index);
+	WARN_ON(!devlink_sb);
+	list_del(&devlink_sb->list);
+	mutex_unlock(&devlink_mutex);
+	kfree(devlink_sb);
+}
+EXPORT_SYMBOL_GPL(devlink_sb_unregister);
+
 static int __init devlink_module_init(void)
 {
 	return genl_register_family_with_ops_groups(&devlink_nl_family,
-- 
2.5.5

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox