Netdev List
 help / color / mirror / Atom feed
* [PATCH 7/7] netfilter: nf_socket: Fix out of bounds access in nf_sk_lookup_slow_v{4,6}
From: Pablo Neira Ayuso @ 2018-03-24 20:34 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <20180324203423.4513-1-pablo@netfilter.org>

From: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>

skb_header_pointer will copy data into a buffer if data is non linear,
otherwise it will return a pointer in the linear section of the data.
nf_sk_lookup_slow_v{4,6} always copies data of size udphdr but later
accesses memory within the size of tcphdr (th->doff) in case of TCP
packets. This causes a crash when running with KASAN with the following
call stack -

BUG: KASAN: stack-out-of-bounds in xt_socket_lookup_slow_v4+0x524/0x718
net/netfilter/xt_socket.c:178
Read of size 2 at addr ffffffe3d417a87c by task syz-executor/28971
CPU: 2 PID: 28971 Comm: syz-executor Tainted: G    B   W  O    4.9.65+ #1
Call trace:
[<ffffff9467e8d390>] dump_backtrace+0x0/0x428 arch/arm64/kernel/traps.c:76
[<ffffff9467e8d7e0>] show_stack+0x28/0x38 arch/arm64/kernel/traps.c:226
[<ffffff946842d9b8>] __dump_stack lib/dump_stack.c:15 [inline]
[<ffffff946842d9b8>] dump_stack+0xd4/0x124 lib/dump_stack.c:51
[<ffffff946811d4b0>] print_address_description+0x68/0x258 mm/kasan/report.c:248
[<ffffff946811d8c8>] kasan_report_error mm/kasan/report.c:347 [inline]
[<ffffff946811d8c8>] kasan_report.part.2+0x228/0x2f0 mm/kasan/report.c:371
[<ffffff946811df44>] kasan_report+0x5c/0x70 mm/kasan/report.c:372
[<ffffff946811bebc>] check_memory_region_inline mm/kasan/kasan.c:308 [inline]
[<ffffff946811bebc>] __asan_load2+0x84/0x98 mm/kasan/kasan.c:739
[<ffffff94694d6f04>] __tcp_hdrlen include/linux/tcp.h:35 [inline]
[<ffffff94694d6f04>] xt_socket_lookup_slow_v4+0x524/0x718 net/netfilter/xt_socket.c:178

Fix this by copying data into appropriate size headers based on protocol.

Fixes: a583636a83ea ("inet: refactor inet[6]_lookup functions to take skb")
Signed-off-by: Tejaswi Tanikella <tejaswit@codeaurora.org>
Signed-off-by: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv4/netfilter/nf_socket_ipv4.c | 6 ++++--
 net/ipv6/netfilter/nf_socket_ipv6.c | 6 ++++--
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/net/ipv4/netfilter/nf_socket_ipv4.c b/net/ipv4/netfilter/nf_socket_ipv4.c
index e9293bdebba0..4824b1e183a1 100644
--- a/net/ipv4/netfilter/nf_socket_ipv4.c
+++ b/net/ipv4/netfilter/nf_socket_ipv4.c
@@ -108,10 +108,12 @@ struct sock *nf_sk_lookup_slow_v4(struct net *net, const struct sk_buff *skb,
 	int doff = 0;
 
 	if (iph->protocol == IPPROTO_UDP || iph->protocol == IPPROTO_TCP) {
-		struct udphdr _hdr, *hp;
+		struct tcphdr _hdr;
+		struct udphdr *hp;
 
 		hp = skb_header_pointer(skb, ip_hdrlen(skb),
-					sizeof(_hdr), &_hdr);
+					iph->protocol == IPPROTO_UDP ?
+					sizeof(*hp) : sizeof(_hdr), &_hdr);
 		if (hp == NULL)
 			return NULL;
 
diff --git a/net/ipv6/netfilter/nf_socket_ipv6.c b/net/ipv6/netfilter/nf_socket_ipv6.c
index ebb2bf84232a..f14de4b6d639 100644
--- a/net/ipv6/netfilter/nf_socket_ipv6.c
+++ b/net/ipv6/netfilter/nf_socket_ipv6.c
@@ -116,9 +116,11 @@ struct sock *nf_sk_lookup_slow_v6(struct net *net, const struct sk_buff *skb,
 	}
 
 	if (tproto == IPPROTO_UDP || tproto == IPPROTO_TCP) {
-		struct udphdr _hdr, *hp;
+		struct tcphdr _hdr;
+		struct udphdr *hp;
 
-		hp = skb_header_pointer(skb, thoff, sizeof(_hdr), &_hdr);
+		hp = skb_header_pointer(skb, thoff, tproto == IPPROTO_UDP ?
+					sizeof(*hp) : sizeof(_hdr), &_hdr);
 		if (hp == NULL)
 			return NULL;
 
-- 
2.11.0

^ permalink raw reply related

* [PATCH 6/7] netfilter: nf_tables: do not hold reference on netdevice from preparation phase
From: Pablo Neira Ayuso @ 2018-03-24 20:34 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <20180324203423.4513-1-pablo@netfilter.org>

The netfilter netdevice event handler hold the nfnl_lock mutex, this
avoids races with a device going away while such device is being
attached to hooks from the netlink control plane. Therefore, either
control plane bails out with ENOENT or netdevice event path waits until
the hook that is attached to net_device is registered.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 19 ++++---------------
 1 file changed, 4 insertions(+), 15 deletions(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 977d43e00f98..530e12ae52d7 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1288,8 +1288,6 @@ static void nf_tables_chain_destroy(struct nft_chain *chain)
 		free_percpu(basechain->stats);
 		if (basechain->stats)
 			static_branch_dec(&nft_counters_enabled);
-		if (basechain->ops.dev != NULL)
-			dev_put(basechain->ops.dev);
 		kfree(chain->name);
 		kfree(basechain);
 	} else {
@@ -1356,7 +1354,7 @@ static int nft_chain_parse_hook(struct net *net,
 		}
 
 		nla_strlcpy(ifname, ha[NFTA_HOOK_DEV], IFNAMSIZ);
-		dev = dev_get_by_name(net, ifname);
+		dev = __dev_get_by_name(net, ifname);
 		if (!dev) {
 			module_put(type->owner);
 			return -ENOENT;
@@ -1373,8 +1371,6 @@ static int nft_chain_parse_hook(struct net *net,
 static void nft_chain_release_hook(struct nft_chain_hook *hook)
 {
 	module_put(hook->type->owner);
-	if (hook->dev != NULL)
-		dev_put(hook->dev);
 }
 
 static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
@@ -4948,7 +4944,7 @@ static int nf_tables_parse_devices(const struct nft_ctx *ctx,
 		}
 
 		nla_strlcpy(ifname, tmp, IFNAMSIZ);
-		dev = dev_get_by_name(ctx->net, ifname);
+		dev = __dev_get_by_name(ctx->net, ifname);
 		if (!dev) {
 			err = -ENOENT;
 			goto err1;
@@ -5007,10 +5003,8 @@ static int nf_tables_flowtable_parse_hook(const struct nft_ctx *ctx,
 		return err;
 
 	ops = kzalloc(sizeof(struct nf_hook_ops) * n, GFP_KERNEL);
-	if (!ops) {
-		err = -ENOMEM;
-		goto err1;
-	}
+	if (!ops)
+		return -ENOMEM;
 
 	flowtable->hooknum	= hooknum;
 	flowtable->priority	= priority;
@@ -5028,11 +5022,6 @@ static int nf_tables_flowtable_parse_hook(const struct nft_ctx *ctx,
 							  GFP_KERNEL);
 	}
 
-	err = 0;
-err1:
-	for (i = 0; i < n; i++)
-		dev_put(dev_array[i]);
-
 	return err;
 }
 
-- 
2.11.0

^ permalink raw reply related

* [PATCH 5/7] netfilter: nf_tables: cache device name in flowtable object
From: Pablo Neira Ayuso @ 2018-03-24 20:34 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <20180324203423.4513-1-pablo@netfilter.org>

Devices going away have to grab the nfnl_lock from the netdev event path
to avoid races with control plane updates.

However, netlink dumps in netfilter do not hold nfnl_lock mutex. Cache
the device name into the objects to avoid an use-after-free situation
for a device that is going away.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h |  4 ++++
 net/netfilter/nf_tables_api.c     | 15 +++++++++------
 2 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 663b015dace5..30eb0652b025 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -1068,6 +1068,8 @@ struct nft_object_ops {
 int nft_register_obj(struct nft_object_type *obj_type);
 void nft_unregister_obj(struct nft_object_type *obj_type);
 
+#define NFT_FLOWTABLE_DEVICE_MAX	8
+
 /**
  *	struct nft_flowtable - nf_tables flow table
  *
@@ -1080,6 +1082,7 @@ void nft_unregister_obj(struct nft_object_type *obj_type);
  *	@genmask: generation mask
  *	@use: number of references to this flow table
  * 	@handle: unique object handle
+ *	@dev_name: array of device names
  *	@data: rhashtable and garbage collector
  * 	@ops: array of hooks
  */
@@ -1093,6 +1096,7 @@ struct nft_flowtable {
 	u32				genmask:2,
 					use:30;
 	u64				handle;
+	char				*dev_name[NFT_FLOWTABLE_DEVICE_MAX];
 	/* runtime data below here */
 	struct nf_hook_ops		*ops ____cacheline_aligned;
 	struct nf_flowtable		data;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 14777c404071..977d43e00f98 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -4932,8 +4932,6 @@ nf_tables_flowtable_lookup_byhandle(const struct nft_table *table,
        return ERR_PTR(-ENOENT);
 }
 
-#define NFT_FLOWTABLE_DEVICE_MAX	8
-
 static int nf_tables_parse_devices(const struct nft_ctx *ctx,
 				   const struct nlattr *attr,
 				   struct net_device *dev_array[], int *len)
@@ -5006,7 +5004,7 @@ static int nf_tables_flowtable_parse_hook(const struct nft_ctx *ctx,
 	err = nf_tables_parse_devices(ctx, tb[NFTA_FLOWTABLE_HOOK_DEVS],
 				      dev_array, &n);
 	if (err < 0)
-		goto err1;
+		return err;
 
 	ops = kzalloc(sizeof(struct nf_hook_ops) * n, GFP_KERNEL);
 	if (!ops) {
@@ -5026,6 +5024,8 @@ static int nf_tables_flowtable_parse_hook(const struct nft_ctx *ctx,
 		flowtable->ops[i].priv		= &flowtable->data.rhashtable;
 		flowtable->ops[i].hook		= flowtable->data.type->hook;
 		flowtable->ops[i].dev		= dev_array[i];
+		flowtable->dev_name[i]		= kstrdup(dev_array[i]->name,
+							  GFP_KERNEL);
 	}
 
 	err = 0;
@@ -5203,8 +5203,10 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
 err5:
 	i = flowtable->ops_len;
 err4:
-	for (k = i - 1; k >= 0; k--)
+	for (k = i - 1; k >= 0; k--) {
+		kfree(flowtable->dev_name[k]);
 		nf_unregister_net_hook(net, &flowtable->ops[k]);
+	}
 
 	kfree(flowtable->ops);
 err3:
@@ -5294,9 +5296,9 @@ static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net,
 		goto nla_put_failure;
 
 	for (i = 0; i < flowtable->ops_len; i++) {
-		if (flowtable->ops[i].dev &&
+		if (flowtable->dev_name[i][0] &&
 		    nla_put_string(skb, NFTA_DEVICE_NAME,
-				   flowtable->ops[i].dev->name))
+				   flowtable->dev_name[i]))
 			goto nla_put_failure;
 	}
 	nla_nest_end(skb, nest_devs);
@@ -5538,6 +5540,7 @@ static void nft_flowtable_event(unsigned long event, struct net_device *dev,
 			continue;
 
 		nf_unregister_net_hook(dev_net(dev), &flowtable->ops[i]);
+		flowtable->dev_name[i][0] = '\0';
 		flowtable->ops[i].dev = NULL;
 		break;
 	}
-- 
2.11.0

^ permalink raw reply related

* [PATCH 4/7] netfilter: drop template ct when conntrack is skipped.
From: Pablo Neira Ayuso @ 2018-03-24 20:34 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <20180324203423.4513-1-pablo@netfilter.org>

From: Paolo Abeni <pabeni@redhat.com>

The ipv4 nf_ct code currently skips the nf_conntrak_in() call
for fragmented packets. As a results later matches/target can end
up manipulating template ct entry instead of 'real' ones.

Exploiting the above, syzbot found a way to trigger the following
splat:

WARNING: CPU: 1 PID: 4242 at net/netfilter/xt_cluster.c:55
xt_cluster_mt+0x6c1/0x840 net/netfilter/xt_cluster.c:127
Kernel panic - not syncing: panic_on_warn set ...

CPU: 1 PID: 4242 Comm: syzkaller027971 Not tainted 4.16.0-rc2+ #243
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
Google 01/01/2011
Call Trace:
  __dump_stack lib/dump_stack.c:17 [inline]
  dump_stack+0x194/0x24d lib/dump_stack.c:53
  panic+0x1e4/0x41c kernel/panic.c:183
  __warn+0x1dc/0x200 kernel/panic.c:547
  report_bug+0x211/0x2d0 lib/bug.c:184
  fixup_bug.part.11+0x37/0x80 arch/x86/kernel/traps.c:178
  fixup_bug arch/x86/kernel/traps.c:247 [inline]
  do_error_trap+0x2d7/0x3e0 arch/x86/kernel/traps.c:296
  do_invalid_op+0x1b/0x20 arch/x86/kernel/traps.c:315
  invalid_op+0x58/0x80 arch/x86/entry/entry_64.S:957
RIP: 0010:xt_cluster_hash net/netfilter/xt_cluster.c:55 [inline]
RIP: 0010:xt_cluster_mt+0x6c1/0x840 net/netfilter/xt_cluster.c:127
RSP: 0018:ffff8801d2f6f2d0 EFLAGS: 00010293
RAX: ffff8801af700540 RBX: 0000000000000000 RCX: ffffffff84a2d1e1
RDX: 0000000000000000 RSI: ffff8801d2f6f478 RDI: ffff8801cafd336a
RBP: ffff8801d2f6f2e8 R08: 0000000000000000 R09: 0000000000000001
R10: 0000000000000000 R11: 0000000000000000 R12: ffff8801b03b3d18
R13: ffff8801cafd3300 R14: dffffc0000000000 R15: ffff8801d2f6f478
  ipt_do_table+0xa91/0x19b0 net/ipv4/netfilter/ip_tables.c:296
  iptable_filter_hook+0x65/0x80 net/ipv4/netfilter/iptable_filter.c:41
  nf_hook_entry_hookfn include/linux/netfilter.h:120 [inline]
  nf_hook_slow+0xba/0x1a0 net/netfilter/core.c:483
  nf_hook include/linux/netfilter.h:243 [inline]
  NF_HOOK include/linux/netfilter.h:286 [inline]
  raw_send_hdrinc.isra.17+0xf39/0x1880 net/ipv4/raw.c:432
  raw_sendmsg+0x14cd/0x26b0 net/ipv4/raw.c:669
  inet_sendmsg+0x11f/0x5e0 net/ipv4/af_inet.c:763
  sock_sendmsg_nosec net/socket.c:629 [inline]
  sock_sendmsg+0xca/0x110 net/socket.c:639
  SYSC_sendto+0x361/0x5c0 net/socket.c:1748
  SyS_sendto+0x40/0x50 net/socket.c:1716
  do_syscall_64+0x280/0x940 arch/x86/entry/common.c:287
  entry_SYSCALL_64_after_hwframe+0x42/0xb7
RIP: 0033:0x441b49
RSP: 002b:00007ffff5ca8b18 EFLAGS: 00000216 ORIG_RAX: 000000000000002c
RAX: ffffffffffffffda RBX: 00000000004002c8 RCX: 0000000000441b49
RDX: 0000000000000030 RSI: 0000000020ff7000 RDI: 0000000000000003
RBP: 00000000006cc018 R08: 000000002066354c R09: 0000000000000010
R10: 0000000000000000 R11: 0000000000000216 R12: 0000000000403470
R13: 0000000000403500 R14: 0000000000000000 R15: 0000000000000000
Dumping ftrace buffer:
    (ftrace buffer empty)
Kernel Offset: disabled
Rebooting in 86400 seconds..

Instead of adding checks for template ct on every target/match
manipulating skb->_nfct, simply drop the template ct when skipping
nf_conntrack_in().

Fixes: 7b4fdf77a450ec ("netfilter: don't track fragmented packets")
Reported-and-tested-by: syzbot+0346441ae0545cfcea3a@syzkaller.appspotmail.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Acked-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index b50721d9d30e..9db988f9a4d7 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -154,8 +154,20 @@ static unsigned int ipv4_conntrack_local(void *priv,
 					 struct sk_buff *skb,
 					 const struct nf_hook_state *state)
 {
-	if (ip_is_fragment(ip_hdr(skb))) /* IP_NODEFRAG setsockopt set */
+	if (ip_is_fragment(ip_hdr(skb))) { /* IP_NODEFRAG setsockopt set */
+		enum ip_conntrack_info ctinfo;
+		struct nf_conn *tmpl;
+
+		tmpl = nf_ct_get(skb, &ctinfo);
+		if (tmpl && nf_ct_is_template(tmpl)) {
+			/* when skipping ct, clear templates to avoid fooling
+			 * later targets/matches
+			 */
+			skb->_nfct = 0;
+			nf_ct_put(tmpl);
+		}
 		return NF_ACCEPT;
+	}
 
 	return nf_conntrack_in(state->net, PF_INET, state->hook, skb);
 }
-- 
2.11.0

^ permalink raw reply related

* [PATCH 3/7] netfilter: nf_tables: add missing netlink attrs to policies
From: Pablo Neira Ayuso @ 2018-03-24 20:34 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <20180324203423.4513-1-pablo@netfilter.org>

From: Florian Westphal <fw@strlen.de>

Fixes: 8aeff920dcc9 ("netfilter: nf_tables: add stateful object reference to set elements")
Fixes: f25ad2e907f1 ("netfilter: nf_tables: prepare for expressions associated to set elements")
Fixes: 1a94e38d254b ("netfilter: nf_tables: add NFTA_RULE_ID attribute")
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index cc8ca00e6e6e..14777c404071 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1973,6 +1973,7 @@ static const struct nla_policy nft_rule_policy[NFTA_RULE_MAX + 1] = {
 	[NFTA_RULE_POSITION]	= { .type = NLA_U64 },
 	[NFTA_RULE_USERDATA]	= { .type = NLA_BINARY,
 				    .len = NFT_USERDATA_MAXLEN },
+	[NFTA_RULE_ID]		= { .type = NLA_U32 },
 };
 
 static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net,
@@ -3380,6 +3381,8 @@ static const struct nla_policy nft_set_elem_policy[NFTA_SET_ELEM_MAX + 1] = {
 	[NFTA_SET_ELEM_TIMEOUT]		= { .type = NLA_U64 },
 	[NFTA_SET_ELEM_USERDATA]	= { .type = NLA_BINARY,
 					    .len = NFT_USERDATA_MAXLEN },
+	[NFTA_SET_ELEM_EXPR]		= { .type = NLA_NESTED },
+	[NFTA_SET_ELEM_OBJREF]		= { .type = NLA_STRING },
 };
 
 static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX + 1] = {
-- 
2.11.0

^ permalink raw reply related

* [PATCH 2/7] netfilter: nf_tables: permit second nat hook if colliding hook is going away
From: Pablo Neira Ayuso @ 2018-03-24 20:34 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <20180324203423.4513-1-pablo@netfilter.org>

From: Florian Westphal <fw@strlen.de>

Sergei Trofimovich reported that restoring an nft ruleset doesn't work
anymore unless old rule content is flushed first.

The problem stems from a recent change designed to prevent multiple nat
hooks at the same hook point locations and nftables transaction model.

A 'flush ruleset' won't take effect until the entire transaction has
completed.

So, if one has a nft.rules file that contains a 'flush ruleset',
followed by a nat hook register request, then 'nft -f file' will work,
but running 'nft -f file' again will fail with -EBUSY.

Reason is that nftables will place the flush/removal requests in the
transaction list, but it will not act on the removal until after all new
rules are in place.

The netfilter core will therefore get request to register a new nat
hook before the old one is removed -- this now fails as the netfilter
core can't know the existing hook is staged for removal.

To fix this, we can search the transaction log when a hook collision
is detected.  The collision is okay if

 1. there is a delete request pending for the nat hook that is already
    registered.
 2. there is no second add request for a matching nat hook.
    This is required to only apply the exception once.

Fixes: f92b40a8b2645 ("netfilter: core: only allow one nat hook per hook point")
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 64 ++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 63 insertions(+), 1 deletion(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 36f69acaf51f..cc8ca00e6e6e 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -74,15 +74,77 @@ static void nft_trans_destroy(struct nft_trans *trans)
 	kfree(trans);
 }
 
+/* removal requests are queued in the commit_list, but not acted upon
+ * until after all new rules are in place.
+ *
+ * Therefore, nf_register_net_hook(net, &nat_hook) runs before pending
+ * nf_unregister_net_hook().
+ *
+ * nf_register_net_hook thus fails if a nat hook is already in place
+ * even if the conflicting hook is about to be removed.
+ *
+ * If collision is detected, search commit_log for DELCHAIN matching
+ * the new nat hooknum; if we find one collision is temporary:
+ *
+ * Either transaction is aborted (new/colliding hook is removed), or
+ * transaction is committed (old hook is removed).
+ */
+static bool nf_tables_allow_nat_conflict(const struct net *net,
+					 const struct nf_hook_ops *ops)
+{
+	const struct nft_trans *trans;
+	bool ret = false;
+
+	if (!ops->nat_hook)
+		return false;
+
+	list_for_each_entry(trans, &net->nft.commit_list, list) {
+		const struct nf_hook_ops *pending_ops;
+		const struct nft_chain *pending;
+
+		if (trans->msg_type != NFT_MSG_NEWCHAIN &&
+		    trans->msg_type != NFT_MSG_DELCHAIN)
+			continue;
+
+		pending = trans->ctx.chain;
+		if (!nft_is_base_chain(pending))
+			continue;
+
+		pending_ops = &nft_base_chain(pending)->ops;
+		if (pending_ops->nat_hook &&
+		    pending_ops->pf == ops->pf &&
+		    pending_ops->hooknum == ops->hooknum) {
+			/* other hook registration already pending? */
+			if (trans->msg_type == NFT_MSG_NEWCHAIN)
+				return false;
+
+			ret = true;
+		}
+	}
+
+	return ret;
+}
+
 static int nf_tables_register_hook(struct net *net,
 				   const struct nft_table *table,
 				   struct nft_chain *chain)
 {
+	struct nf_hook_ops *ops;
+	int ret;
+
 	if (table->flags & NFT_TABLE_F_DORMANT ||
 	    !nft_is_base_chain(chain))
 		return 0;
 
-	return nf_register_net_hook(net, &nft_base_chain(chain)->ops);
+	ops = &nft_base_chain(chain)->ops;
+	ret = nf_register_net_hook(net, ops);
+	if (ret == -EBUSY && nf_tables_allow_nat_conflict(net, ops)) {
+		ops->nat_hook = false;
+		ret = nf_register_net_hook(net, ops);
+		ops->nat_hook = true;
+	}
+
+	return ret;
 }
 
 static void nf_tables_unregister_hook(struct net *net,
-- 
2.11.0

^ permalink raw reply related

* [PATCH 1/7] netfilter: nf_tables: meter: pick a set backend that supports updates
From: Pablo Neira Ayuso @ 2018-03-24 20:34 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <20180324203423.4513-1-pablo@netfilter.org>

From: Florian Westphal <fw@strlen.de>

in nftables, 'meter' can be used to instantiate a hash-table at run
time:

rule add filter forward iif "internal" meter hostacct { ip saddr counter}
nft list meter ip filter hostacct
table ip filter {
  meter hostacct {
    type ipv4_addr
    elements = { 192.168.0.1 : counter packets 8 bytes 2672, ..

because elemets get added on the fly, the kernel must chose a set
backend type that implements the ->update() function, otherwise
rule insertion fails with EOPNOTSUPP.

Therefore, skip set types that lack ->update, and also
make sure we do not discard a (bad) candidate when we did yet
find any candidate at all.  This could happen when userspace prefers
low memory footprint -- the set implementation currently checked might
not be a fit at all.  Make sure we pick it anyway (!bops).  In
case next candidate is a better fix, it will be chosen instead.

But in case nothing else is found we at least have a non-ideal
match rather than no match at all.

Fixes: 6c03ae210ce3 ("netfilter: nft_set_hash: add non-resizable hashtable implementation")
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 5 ++++-
 net/netfilter/nft_set_hash.c  | 2 +-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index c4acc7340eb1..36f69acaf51f 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2446,6 +2446,9 @@ EXPORT_SYMBOL_GPL(nft_unregister_set);
 
 static bool nft_set_ops_candidate(const struct nft_set_ops *ops, u32 flags)
 {
+	if ((flags & NFT_SET_EVAL) && !ops->update)
+		return false;
+
 	return (flags & ops->features) == (flags & NFT_SET_FEATURES);
 }
 
@@ -2510,7 +2513,7 @@ nft_select_set_ops(const struct nft_ctx *ctx,
 				if (est.space == best.space &&
 				    est.lookup < best.lookup)
 					break;
-			} else if (est.size < best.size) {
+			} else if (est.size < best.size || !bops) {
 				break;
 			}
 			continue;
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index d40591fe1b2f..fc9c6d5d64cd 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -674,7 +674,7 @@ static const struct nft_set_ops *
 nft_hash_select_ops(const struct nft_ctx *ctx, const struct nft_set_desc *desc,
 		    u32 flags)
 {
-	if (desc->size && !(flags & NFT_SET_TIMEOUT)) {
+	if (desc->size && !(flags & (NFT_SET_EVAL | NFT_SET_TIMEOUT))) {
 		switch (desc->klen) {
 		case 4:
 			return &nft_hash_fast_ops;
-- 
2.11.0

^ permalink raw reply related

* [PATCH 0/7] Netfilter fixes for net
From: Pablo Neira Ayuso @ 2018-03-24 20:34 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev

Hi David,

The following patchset contains Netfilter fixes for your net tree,
they are:

1) Don't pick fixed hash implementation for NFT_SET_EVAL sets, otherwise
   userspace hits EOPNOTSUPP with valid rules using the meter statement,
   from Florian Westphal.

2) If you send a batch that flushes the existing ruleset (that contains
   a NAT chain) and the new ruleset definition comes with a new NAT
   chain, don't bogusly hit EBUSY. Also from Florian.

3) Missing netlink policy attribute validation, from Florian.

4) Detach conntrack template from skbuff if IP_NODEFRAG is set on,
   from Paolo Abeni.

5) Cache device names in flowtable object, otherwise we may end up
   walking over devices going aways given no rtnl_lock is held.

6) Fix incorrect net_device ingress with ingress hooks.

7) Fix crash when trying to read more data than available in UDP
   packets from the nf_socket infrastructure, from Subash.

You can pull these changes from:

  git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf.git

Thanks!

----------------------------------------------------------------

The following changes since commit 36fe095606f881e6a3c7f9283c986aec6083f3e6:

  Merge branch 'phy-relax-error-checking' (2018-03-19 21:14:27 -0400)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf.git HEAD

for you to fetch changes up to 32c1733f0dd4bd11d6e65512bf4dc337c0452c8e:

  netfilter: nf_socket: Fix out of bounds access in nf_sk_lookup_slow_v{4,6} (2018-03-24 21:17:14 +0100)

----------------------------------------------------------------
Florian Westphal (3):
      netfilter: nf_tables: meter: pick a set backend that supports updates
      netfilter: nf_tables: permit second nat hook if colliding hook is going away
      netfilter: nf_tables: add missing netlink attrs to policies

Pablo Neira Ayuso (2):
      netfilter: nf_tables: cache device name in flowtable object
      netfilter: nf_tables: do not hold reference on netdevice from preparation phase

Paolo Abeni (1):
      netfilter: drop template ct when conntrack is skipped.

Subash Abhinov Kasiviswanathan (1):
      netfilter: nf_socket: Fix out of bounds access in nf_sk_lookup_slow_v{4,6}

 include/net/netfilter/nf_tables.h              |   4 +
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c |  14 +++-
 net/ipv4/netfilter/nf_socket_ipv4.c            |   6 +-
 net/ipv6/netfilter/nf_socket_ipv6.c            |   6 +-
 net/netfilter/nf_tables_api.c                  | 106 +++++++++++++++++++------
 net/netfilter/nft_set_hash.c                   |   2 +-
 6 files changed, 109 insertions(+), 29 deletions(-)

^ permalink raw reply

* Re: [PATCH 0/2] sh_eth: unify the SoC feature checks
From: Sergei Shtylyov @ 2018-03-24 20:30 UTC (permalink / raw)
  To: netdev; +Cc: linux-renesas-soc, linux-sh
In-Reply-To: <d150c423-8e86-822f-a16b-b0fc0d717543@cogentembedded.com>

On 03/24/2018 11:14 PM, Sergei Shtylyov wrote:

> Should have been 0/5 in the subject. Sorry. :-)

   Oh, and I forgot 'net-next' inside [], sorry again! :-)

^ permalink raw reply

* Re: [PATCH 4/5] sh_eth: add sh_eth_cpu_data::no_tx_cntr flag
From: Sergei Shtylyov @ 2018-03-24 20:15 UTC (permalink / raw)
  To: netdev; +Cc: linux-renesas-soc, linux-sh
In-Reply-To: <a6ddaada-59fa-3a38-7c69-87891bba9cad@cogentembedded.com>

Should read no_tx_cntrs in the subject -- forgot tp update it when
renaming the flag...

^ permalink raw reply

* Re: [PATCH 0/2] sh_eth: unify the SoC feature checks
From: Sergei Shtylyov @ 2018-03-24 20:14 UTC (permalink / raw)
  To: netdev; +Cc: linux-renesas-soc, linux-sh
In-Reply-To: <b3706a51-27a4-d78b-998d-9440637ac216@cogentembedded.com>

Should have been 0/5 in the subject. Sorry. :-)

^ permalink raw reply

* [net PATCH] net: sched, fix OOO packets with pfifo_fast
From: John Fastabend @ 2018-03-24 20:13 UTC (permalink / raw)
  To: xiyou.wangcong, jiri, davem; +Cc: netdev

After the qdisc lock was dropped in pfifo_fast we allow multiple
enqueue threads and dequeue threads to run in parallel. On the
enqueue side the skb bit ooo_okay is used to ensure all related
skbs are enqueued in-order. On the dequeue side though there is
no similar logic. What we observe is with fewer queues than CPUs
it is possible to re-order packets when two instances of
__qdisc_run() are running in parallel. Each thread will dequeue
a skb and then whichever thread calls the ndo op first will
be sent on the wire. This doesn't typically happen because
qdisc_run() is usually triggered by the same core that did the
enqueue. However, drivers will trigger __netif_schedule()
when queues are transitioning from stopped to awake using the
netif_tx_wake_* APIs. When this happens netif_schedule() calls
qdisc_run() on the same CPU that did the netif_tx_wake_* which
is usually done in the interrupt completion context. This CPU
is selected with the irq affinity which is unrelated to the
enqueue operations.

To resolve this we add a RUNNING bit to the qdisc to ensure
only a single dequeue per qdisc is running. Enqueue and dequeue
operations can still run in parallel and also on multi queue
NICs we can still have a dequeue in-flight per qdisc, which
is typically per CPU.

Fixes: c5ad119fb6c0 ("net: sched: pfifo_fast use skb_array")
Reported-by: Jakob Unterwurzacher <jakob.unterwurzacher@theobroma-systems.com>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
---
 include/net/sch_generic.h |    1 +
 net/sched/sch_generic.c   |   13 ++++++++++---
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 2092d33..8da3267 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -30,6 +30,7 @@ struct qdisc_rate_table {
 enum qdisc_state_t {
 	__QDISC_STATE_SCHED,
 	__QDISC_STATE_DEACTIVATED,
+	__QDISC_STATE_RUNNING,
 };
 
 struct qdisc_size_table {
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 7e3fbe9..29a1b47 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -377,12 +377,17 @@ static inline bool qdisc_restart(struct Qdisc *q, int *packets)
 	struct netdev_queue *txq;
 	struct net_device *dev;
 	struct sk_buff *skb;
-	bool validate;
+	bool more, validate;
 
 	/* Dequeue packet */
+	if (test_and_set_bit(__QDISC_STATE_RUNNING, &q->state))
+		return false;
+
 	skb = dequeue_skb(q, &validate, packets);
-	if (unlikely(!skb))
+	if (unlikely(!skb)) {
+		clear_bit(__QDISC_STATE_RUNNING, &q->state);
 		return false;
+	}
 
 	if (!(q->flags & TCQ_F_NOLOCK))
 		root_lock = qdisc_lock(q);
@@ -390,7 +395,9 @@ static inline bool qdisc_restart(struct Qdisc *q, int *packets)
 	dev = qdisc_dev(q);
 	txq = skb_get_tx_queue(dev, skb);
 
-	return sch_direct_xmit(skb, q, dev, txq, root_lock, validate);
+	more = sch_direct_xmit(skb, q, dev, txq, root_lock, validate);
+	clear_bit(__QDISC_STATE_RUNNING, &q->state);
+	return more;
 }
 
 void __qdisc_run(struct Qdisc *q)

^ permalink raw reply related

* [PATCH 5/5] sh_eth: add sh_eth_cpu_data::cexcr flag
From: Sergei Shtylyov @ 2018-03-24 20:12 UTC (permalink / raw)
  To: netdev; +Cc: linux-renesas-soc, linux-sh
In-Reply-To: <b3706a51-27a4-d78b-998d-9440637ac216@cogentembedded.com>

GEther controllers have CERCR/CEECR instead of CNDCR on the others.
Currently we are calling sh_eth_is_gether() in order to check for this,
however it would be simpler  to check the new 'cexcr' bitfield in the
'struct sh_eth_cpu_data';  then we'd be able to remove sh_eth_is_gether()
as there would be no callers left...

Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>

---
 drivers/net/ethernet/renesas/sh_eth.c |   11 +++++------
 drivers/net/ethernet/renesas/sh_eth.h |    1 +
 2 files changed, 6 insertions(+), 6 deletions(-)

Index: net-next/drivers/net/ethernet/renesas/sh_eth.c
===================================================================
--- net-next.orig/drivers/net/ethernet/renesas/sh_eth.c
+++ net-next/drivers/net/ethernet/renesas/sh_eth.c
@@ -450,11 +450,6 @@ static u32 sh_eth_tsu_read(struct sh_eth
 	return ioread32(mdp->tsu_addr + mdp->reg_offset[enum_index]);
 }
 
-static bool sh_eth_is_gether(struct sh_eth_private *mdp)
-{
-	return mdp->reg_offset == sh_eth_offset_gigabit;
-}
-
 static void sh_eth_select_mii(struct net_device *ndev)
 {
 	struct sh_eth_private *mdp = netdev_priv(ndev);
@@ -661,6 +656,7 @@ static struct sh_eth_cpu_data r8a7740_da
 	.tsu		= 1,
 	.select_mii	= 1,
 	.magic		= 1,
+	.cexcr		= 1,
 };
 
 /* There is CPU dependent code */
@@ -918,6 +914,7 @@ static struct sh_eth_cpu_data sh7757_dat
 	.no_ade		= 1,
 	.xdfar_rw	= 1,
 	.tsu		= 1,
+	.cexcr		= 1,
 	.dual_port	= 1,
 };
 
@@ -959,6 +956,7 @@ static struct sh_eth_cpu_data sh7734_dat
 	.hw_checksum	= 1,
 	.select_mii	= 1,
 	.magic		= 1,
+	.cexcr		= 1,
 };
 
 /* SH7763 */
@@ -997,6 +995,7 @@ static struct sh_eth_cpu_data sh7763_dat
 	.tsu		= 1,
 	.irq_flags	= IRQF_SHARED,
 	.magic		= 1,
+	.cexcr		= 1,
 	.dual_port	= 1,
 };
 
@@ -2540,7 +2539,7 @@ static struct net_device_stats *sh_eth_g
 	sh_eth_update_stat(ndev, &ndev->stats.collisions, CDCR);
 	sh_eth_update_stat(ndev, &ndev->stats.tx_carrier_errors, LCCR);
 
-	if (sh_eth_is_gether(mdp)) {
+	if (mdp->cd->cexcr) {
 		sh_eth_update_stat(ndev, &ndev->stats.tx_carrier_errors,
 				   CERCR);
 		sh_eth_update_stat(ndev, &ndev->stats.tx_carrier_errors,
Index: net-next/drivers/net/ethernet/renesas/sh_eth.h
===================================================================
--- net-next.orig/drivers/net/ethernet/renesas/sh_eth.h
+++ net-next/drivers/net/ethernet/renesas/sh_eth.h
@@ -515,6 +515,7 @@ struct sh_eth_cpu_data {
 	unsigned rtrate:1;	/* EtherC has RTRATE register */
 	unsigned magic:1;	/* EtherC has ECMR.MPDE and ECSR.MPD */
 	unsigned no_tx_cntrs:1;	/* EtherC DOES NOT have TX error counters */
+	unsigned cexcr:1;	/* EtherC has CERCR/CEECR */
 	unsigned dual_port:1;	/* Dual EtherC/E-DMAC */
 };
 

^ permalink raw reply

* [PATCH 4/5] sh_eth: add sh_eth_cpu_data::no_tx_cntr flag
From: Sergei Shtylyov @ 2018-03-24 20:11 UTC (permalink / raw)
  To: netdev; +Cc: linux-renesas-soc, linux-sh
In-Reply-To: <b3706a51-27a4-d78b-998d-9440637ac216@cogentembedded.com>

RZ/A1H (R7S72100) Ether controller doesn't  seem to have the TX counter
registers like TROCR/CDCR/LCCR (or at least they are still undocumented
like some TSU registers), so we bail out of sh_eth_get_stats() early in
this case.  Currently we are calling sh_eth_is_rz_fast_ether() in order
to check for this, but it would be simpler to check the new 'no_tx_cntrs'
bitfield in the 'struct sh_eth_cpu_data'; then we'd be able  to remove
sh_eth_is_rz_fast_ether() as there would be no callers left...

Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>

---
 drivers/net/ethernet/renesas/sh_eth.c |    8 ++------
 drivers/net/ethernet/renesas/sh_eth.h |    1 +
 2 files changed, 3 insertions(+), 6 deletions(-)

Index: net-next/drivers/net/ethernet/renesas/sh_eth.c
===================================================================
--- net-next.orig/drivers/net/ethernet/renesas/sh_eth.c
+++ net-next/drivers/net/ethernet/renesas/sh_eth.c
@@ -455,11 +455,6 @@ static bool sh_eth_is_gether(struct sh_e
 	return mdp->reg_offset == sh_eth_offset_gigabit;
 }
 
-static bool sh_eth_is_rz_fast_ether(struct sh_eth_private *mdp)
-{
-	return mdp->reg_offset == sh_eth_offset_fast_rz;
-}
-
 static void sh_eth_select_mii(struct net_device *ndev)
 {
 	struct sh_eth_private *mdp = netdev_priv(ndev);
@@ -614,6 +609,7 @@ static struct sh_eth_cpu_data r7s72100_d
 	.xdfar_rw	= 1,
 	.hw_checksum	= 1,
 	.tsu		= 1,
+	.no_tx_cntrs	= 1,
 };
 
 static void sh_eth_chip_reset_r8a7740(struct net_device *ndev)
@@ -2534,7 +2530,7 @@ static struct net_device_stats *sh_eth_g
 {
 	struct sh_eth_private *mdp = netdev_priv(ndev);
 
-	if (sh_eth_is_rz_fast_ether(mdp))
+	if (mdp->cd->no_tx_cntrs)
 		return &ndev->stats;
 
 	if (!mdp->is_opened)
Index: net-next/drivers/net/ethernet/renesas/sh_eth.h
===================================================================
--- net-next.orig/drivers/net/ethernet/renesas/sh_eth.h
+++ net-next/drivers/net/ethernet/renesas/sh_eth.h
@@ -514,6 +514,7 @@ struct sh_eth_cpu_data {
 	unsigned rmiimode:1;	/* EtherC has RMIIMODE register */
 	unsigned rtrate:1;	/* EtherC has RTRATE register */
 	unsigned magic:1;	/* EtherC has ECMR.MPDE and ECSR.MPD */
+	unsigned no_tx_cntrs:1;	/* EtherC DOES NOT have TX error counters */
 	unsigned dual_port:1;	/* Dual EtherC/E-DMAC */
 };
 

^ permalink raw reply

* [PATCH 3/5] sh_eth: add sh_eth_cpu_data::xdfar_rw flag
From: Sergei Shtylyov @ 2018-03-24 20:09 UTC (permalink / raw)
  To: netdev; +Cc: linux-renesas-soc, linux-sh
In-Reply-To: <b3706a51-27a4-d78b-998d-9440637ac216@cogentembedded.com>

The GEther-like controllers have writeable RDFAR/TDFAR, on the others
they are read-only or just absent (on R-Car). Currently we are calling
sh_eth_is_{gether|rz_fast_ether}() in order to check if these registers
can be written to, however it would be simpler to check the new 'xdfar_rw'
bitfield in the 'struct sh_eth_cpu_data'...

Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>

---
 drivers/net/ethernet/renesas/sh_eth.c |   11 +++++++----
 drivers/net/ethernet/renesas/sh_eth.h |    1 +
 2 files changed, 8 insertions(+), 4 deletions(-)

Index: net-next/drivers/net/ethernet/renesas/sh_eth.c
===================================================================
--- net-next.orig/drivers/net/ethernet/renesas/sh_eth.c
+++ net-next/drivers/net/ethernet/renesas/sh_eth.c
@@ -611,6 +611,7 @@ static struct sh_eth_cpu_data r7s72100_d
 	.rpadir_value   = 2 << 16,
 	.no_trimd	= 1,
 	.no_ade		= 1,
+	.xdfar_rw	= 1,
 	.hw_checksum	= 1,
 	.tsu		= 1,
 };
@@ -659,6 +660,7 @@ static struct sh_eth_cpu_data r8a7740_da
 	.rpadir_value   = 2 << 16,
 	.no_trimd	= 1,
 	.no_ade		= 1,
+	.xdfar_rw	= 1,
 	.hw_checksum	= 1,
 	.tsu		= 1,
 	.select_mii	= 1,
@@ -918,6 +920,7 @@ static struct sh_eth_cpu_data sh7757_dat
 	.rpadir_value   = 2 << 16,
 	.no_trimd	= 1,
 	.no_ade		= 1,
+	.xdfar_rw	= 1,
 	.tsu		= 1,
 	.dual_port	= 1,
 };
@@ -955,6 +958,7 @@ static struct sh_eth_cpu_data sh7734_dat
 	.hw_swap	= 1,
 	.no_trimd	= 1,
 	.no_ade		= 1,
+	.xdfar_rw	= 1,
 	.tsu		= 1,
 	.hw_checksum	= 1,
 	.select_mii	= 1,
@@ -993,6 +997,7 @@ static struct sh_eth_cpu_data sh7763_dat
 	.hw_swap	= 1,
 	.no_trimd	= 1,
 	.no_ade		= 1,
+	.xdfar_rw	= 1,
 	.tsu		= 1,
 	.irq_flags	= IRQF_SHARED,
 	.magic		= 1,
@@ -1301,8 +1306,7 @@ static void sh_eth_ring_format(struct ne
 		/* Rx descriptor address set */
 		if (i == 0) {
 			sh_eth_write(ndev, mdp->rx_desc_dma, RDLAR);
-			if (sh_eth_is_gether(mdp) ||
-			    sh_eth_is_rz_fast_ether(mdp))
+			if (mdp->cd->xdfar_rw)
 				sh_eth_write(ndev, mdp->rx_desc_dma, RDFAR);
 		}
 	}
@@ -1324,8 +1328,7 @@ static void sh_eth_ring_format(struct ne
 		if (i == 0) {
 			/* Tx descriptor address set */
 			sh_eth_write(ndev, mdp->tx_desc_dma, TDLAR);
-			if (sh_eth_is_gether(mdp) ||
-			    sh_eth_is_rz_fast_ether(mdp))
+			if (mdp->cd->xdfar_rw)
 				sh_eth_write(ndev, mdp->tx_desc_dma, TDFAR);
 		}
 	}
Index: net-next/drivers/net/ethernet/renesas/sh_eth.h
===================================================================
--- net-next.orig/drivers/net/ethernet/renesas/sh_eth.h
+++ net-next/drivers/net/ethernet/renesas/sh_eth.h
@@ -508,6 +508,7 @@ struct sh_eth_cpu_data {
 	unsigned rpadir:1;	/* E-DMAC have RPADIR */
 	unsigned no_trimd:1;	/* E-DMAC DO NOT have TRIMD */
 	unsigned no_ade:1;	/* E-DMAC DO NOT have ADE bit in EESR */
+	unsigned xdfar_rw:1;	/* E-DMAC has writeable RDFAR/TDFAR */
 	unsigned hw_checksum:1;	/* E-DMAC has CSMR */
 	unsigned select_mii:1;	/* EtherC have RMII_MII (MII select register) */
 	unsigned rmiimode:1;	/* EtherC has RMIIMODE register */

^ permalink raw reply

* [PATCH 2/5] sh_eth: add sh_eth_cpu_data::edtrr_trns value
From: Sergei Shtylyov @ 2018-03-24 20:08 UTC (permalink / raw)
  To: netdev; +Cc: linux-renesas-soc, linux-sh
In-Reply-To: <b3706a51-27a4-d78b-998d-9440637ac216@cogentembedded.com>

sh_eth_get_edtrr_trns() returns the value to be written to EDTRR in order
to start TX DMA -- this value is different between the GEther-like and
the other controllers. We can replace this function (and thus get rid of
the calls to sh_eth_is_{gether|rz_fast_ether}() by it) with a new field
'edtrr_trns' in the 'struct sh_eth_cpu_data'.

Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>

---
 drivers/net/ethernet/renesas/sh_eth.c |   27 +++++++++++++++------------
 drivers/net/ethernet/renesas/sh_eth.h |    1 +
 2 files changed, 16 insertions(+), 12 deletions(-)

Index: net-next/drivers/net/ethernet/renesas/sh_eth.c
===================================================================
--- net-next.orig/drivers/net/ethernet/renesas/sh_eth.c
+++ net-next/drivers/net/ethernet/renesas/sh_eth.c
@@ -584,6 +584,7 @@ static struct sh_eth_cpu_data r7s72100_d
 
 	.register_type	= SH_ETH_REG_FAST_RZ,
 
+	.edtrr_trns	= EDTRR_TRNS_GETHER,
 	.ecsr_value	= ECSR_ICD,
 	.ecsipr_value	= ECSIPR_ICDIP,
 	.eesipr_value	= EESIPR_TWB1IP | EESIPR_TWBIP | EESIPR_TC1IP |
@@ -631,6 +632,7 @@ static struct sh_eth_cpu_data r8a7740_da
 
 	.register_type	= SH_ETH_REG_GIGABIT,
 
+	.edtrr_trns	= EDTRR_TRNS_GETHER,
 	.ecsr_value	= ECSR_ICD | ECSR_MPD,
 	.ecsipr_value	= ECSIPR_LCHNGIP | ECSIPR_ICDIP | ECSIPR_MPDIP,
 	.eesipr_value	= EESIPR_RFCOFIP | EESIPR_ECIIP |
@@ -687,6 +689,7 @@ static struct sh_eth_cpu_data rcar_gen1_
 
 	.register_type	= SH_ETH_REG_FAST_RCAR,
 
+	.edtrr_trns	= EDTRR_TRNS_ETHER,
 	.ecsr_value	= ECSR_PSRTO | ECSR_LCHNG | ECSR_ICD,
 	.ecsipr_value	= ECSIPR_PSRTOIP | ECSIPR_LCHNGIP | ECSIPR_ICDIP,
 	.eesipr_value	= EESIPR_RFCOFIP | EESIPR_ADEIP | EESIPR_ECIIP |
@@ -716,6 +719,7 @@ static struct sh_eth_cpu_data rcar_gen2_
 
 	.register_type	= SH_ETH_REG_FAST_RCAR,
 
+	.edtrr_trns	= EDTRR_TRNS_ETHER,
 	.ecsr_value	= ECSR_PSRTO | ECSR_LCHNG | ECSR_ICD | ECSR_MPD,
 	.ecsipr_value	= ECSIPR_PSRTOIP | ECSIPR_LCHNGIP | ECSIPR_ICDIP |
 			  ECSIPR_MPDIP,
@@ -765,6 +769,7 @@ static struct sh_eth_cpu_data sh7724_dat
 
 	.register_type	= SH_ETH_REG_FAST_SH4,
 
+	.edtrr_trns	= EDTRR_TRNS_ETHER,
 	.ecsr_value	= ECSR_PSRTO | ECSR_LCHNG | ECSR_ICD,
 	.ecsipr_value	= ECSIPR_PSRTOIP | ECSIPR_LCHNGIP | ECSIPR_ICDIP,
 	.eesipr_value	= EESIPR_RFCOFIP | EESIPR_ADEIP | EESIPR_ECIIP |
@@ -809,6 +814,7 @@ static struct sh_eth_cpu_data sh7757_dat
 
 	.register_type	= SH_ETH_REG_FAST_SH4,
 
+	.edtrr_trns	= EDTRR_TRNS_ETHER,
 	.eesipr_value	= EESIPR_RFCOFIP | EESIPR_ECIIP |
 			  EESIPR_FTCIP | EESIPR_TDEIP | EESIPR_TFUFIP |
 			  EESIPR_FRIP | EESIPR_RDEIP | EESIPR_RFOFIP |
@@ -884,6 +890,7 @@ static struct sh_eth_cpu_data sh7757_dat
 
 	.register_type	= SH_ETH_REG_GIGABIT,
 
+	.edtrr_trns	= EDTRR_TRNS_GETHER,
 	.ecsr_value	= ECSR_ICD | ECSR_MPD,
 	.ecsipr_value	= ECSIPR_LCHNGIP | ECSIPR_ICDIP | ECSIPR_MPDIP,
 	.eesipr_value	= EESIPR_RFCOFIP | EESIPR_ECIIP |
@@ -925,6 +932,7 @@ static struct sh_eth_cpu_data sh7734_dat
 
 	.register_type	= SH_ETH_REG_GIGABIT,
 
+	.edtrr_trns	= EDTRR_TRNS_GETHER,
 	.ecsr_value	= ECSR_ICD | ECSR_MPD,
 	.ecsipr_value	= ECSIPR_LCHNGIP | ECSIPR_ICDIP | ECSIPR_MPDIP,
 	.eesipr_value	= EESIPR_RFCOFIP | EESIPR_ECIIP |
@@ -963,6 +971,7 @@ static struct sh_eth_cpu_data sh7763_dat
 
 	.register_type	= SH_ETH_REG_GIGABIT,
 
+	.edtrr_trns	= EDTRR_TRNS_GETHER,
 	.ecsr_value	= ECSR_ICD | ECSR_MPD,
 	.ecsipr_value	= ECSIPR_LCHNGIP | ECSIPR_ICDIP | ECSIPR_MPDIP,
 	.eesipr_value	= EESIPR_RFCOFIP | EESIPR_ECIIP |
@@ -995,6 +1004,7 @@ static struct sh_eth_cpu_data sh7619_dat
 
 	.register_type	= SH_ETH_REG_FAST_SH3_SH2,
 
+	.edtrr_trns	= EDTRR_TRNS_ETHER,
 	.eesipr_value	= EESIPR_RFCOFIP | EESIPR_ECIIP |
 			  EESIPR_FTCIP | EESIPR_TDEIP | EESIPR_TFUFIP |
 			  EESIPR_FRIP | EESIPR_RDEIP | EESIPR_RFOFIP |
@@ -1015,6 +1025,7 @@ static struct sh_eth_cpu_data sh771x_dat
 
 	.register_type	= SH_ETH_REG_FAST_SH3_SH2,
 
+	.edtrr_trns	= EDTRR_TRNS_ETHER,
 	.eesipr_value	= EESIPR_RFCOFIP | EESIPR_ECIIP |
 			  EESIPR_FTCIP | EESIPR_TDEIP | EESIPR_TFUFIP |
 			  EESIPR_FRIP | EESIPR_RDEIP | EESIPR_RFOFIP |
@@ -1094,14 +1105,6 @@ static void read_mac_address(struct net_
 	}
 }
 
-static u32 sh_eth_get_edtrr_trns(struct sh_eth_private *mdp)
-{
-	if (sh_eth_is_gether(mdp) || sh_eth_is_rz_fast_ether(mdp))
-		return EDTRR_TRNS_GETHER;
-	else
-		return EDTRR_TRNS_ETHER;
-}
-
 struct bb_info {
 	void (*set_gate)(void *addr);
 	struct mdiobb_ctrl ctrl;
@@ -1741,9 +1744,9 @@ static void sh_eth_error(struct net_devi
 		sh_eth_tx_free(ndev, true);
 
 		/* SH7712 BUG */
-		if (edtrr ^ sh_eth_get_edtrr_trns(mdp)) {
+		if (edtrr ^ mdp->cd->edtrr_trns) {
 			/* tx dma start */
-			sh_eth_write(ndev, sh_eth_get_edtrr_trns(mdp), EDTRR);
+			sh_eth_write(ndev, mdp->cd->edtrr_trns, EDTRR);
 		}
 		/* wakeup */
 		netif_wake_queue(ndev);
@@ -2502,8 +2505,8 @@ static int sh_eth_start_xmit(struct sk_b
 
 	mdp->cur_tx++;
 
-	if (!(sh_eth_read(ndev, EDTRR) & sh_eth_get_edtrr_trns(mdp)))
-		sh_eth_write(ndev, sh_eth_get_edtrr_trns(mdp), EDTRR);
+	if (!(sh_eth_read(ndev, EDTRR) & mdp->cd->edtrr_trns))
+		sh_eth_write(ndev, mdp->cd->edtrr_trns, EDTRR);
 
 	return NETDEV_TX_OK;
 }
Index: net-next/drivers/net/ethernet/renesas/sh_eth.h
===================================================================
--- net-next.orig/drivers/net/ethernet/renesas/sh_eth.h
+++ net-next/drivers/net/ethernet/renesas/sh_eth.h
@@ -479,6 +479,7 @@ struct sh_eth_cpu_data {
 
 	/* mandatory initialize value */
 	int register_type;
+	u32 edtrr_trns;
 	u32 eesipr_value;
 
 	/* optional initialize value */

^ permalink raw reply

* [PATCH 1/5] sh_eth: add sh_eth_cpu_data::soft_reset() method
From: Sergei Shtylyov @ 2018-03-24 20:07 UTC (permalink / raw)
  To: netdev; +Cc: linux-renesas-soc, linux-sh
In-Reply-To: <b3706a51-27a4-d78b-998d-9440637ac216@cogentembedded.com>

sh_eth_reset() performs a software reset which is implemented in a
completely different way for the GEther-like controllers vs the other
controllers due to a different layout of EDMR (and other factors) --
it therefore makes sense to convert this function to a mandatory
sh_eth_cpu_data::soft_reset() method and thus get rid of the runtime
controller type check via sh_eth_is_{gether|rz_fast_ether}().

Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>

---
 drivers/net/ethernet/renesas/sh_eth.c |  135 ++++++++++++++++++++--------------
 drivers/net/ethernet/renesas/sh_eth.h |    3 
 2 files changed, 83 insertions(+), 55 deletions(-)

Index: net-next/drivers/net/ethernet/renesas/sh_eth.c
===================================================================
--- net-next.orig/drivers/net/ethernet/renesas/sh_eth.c
+++ net-next/drivers/net/ethernet/renesas/sh_eth.c
@@ -501,6 +501,62 @@ static void sh_eth_chip_reset(struct net
 	mdelay(1);
 }
 
+static int sh_eth_soft_reset(struct net_device *ndev)
+{
+	sh_eth_modify(ndev, EDMR, EDMR_SRST_ETHER, EDMR_SRST_ETHER);
+	mdelay(3);
+	sh_eth_modify(ndev, EDMR, EDMR_SRST_ETHER, 0);
+
+	return 0;
+}
+
+static int sh_eth_check_soft_reset(struct net_device *ndev)
+{
+	int cnt;
+
+	for (cnt = 100; cnt > 0; cnt--) {
+		if (!(sh_eth_read(ndev, EDMR) & EDMR_SRST_GETHER))
+			return 0;
+		mdelay(1);
+	}
+
+	netdev_err(ndev, "Device reset failed\n");
+	return -ETIMEDOUT;
+}
+
+static int sh_eth_soft_reset_gether(struct net_device *ndev)
+{
+	struct sh_eth_private *mdp = netdev_priv(ndev);
+	int ret;
+
+	sh_eth_write(ndev, EDSR_ENALL, EDSR);
+	sh_eth_modify(ndev, EDMR, EDMR_SRST_GETHER, EDMR_SRST_GETHER);
+
+	ret = sh_eth_check_soft_reset(ndev);
+	if (ret)
+		return ret;
+
+	/* Table Init */
+	sh_eth_write(ndev, 0, TDLAR);
+	sh_eth_write(ndev, 0, TDFAR);
+	sh_eth_write(ndev, 0, TDFXR);
+	sh_eth_write(ndev, 0, TDFFR);
+	sh_eth_write(ndev, 0, RDLAR);
+	sh_eth_write(ndev, 0, RDFAR);
+	sh_eth_write(ndev, 0, RDFXR);
+	sh_eth_write(ndev, 0, RDFFR);
+
+	/* Reset HW CRC register */
+	if (mdp->cd->hw_checksum)
+		sh_eth_write(ndev, 0, CSMR);
+
+	/* Select MII mode */
+	if (mdp->cd->select_mii)
+		sh_eth_select_mii(ndev);
+
+	return ret;
+}
+
 static void sh_eth_set_rate_gether(struct net_device *ndev)
 {
 	struct sh_eth_private *mdp = netdev_priv(ndev);
@@ -521,6 +577,8 @@ static void sh_eth_set_rate_gether(struc
 #ifdef CONFIG_OF
 /* R7S72100 */
 static struct sh_eth_cpu_data r7s72100_data = {
+	.soft_reset	= sh_eth_soft_reset_gether,
+
 	.chip_reset	= sh_eth_chip_reset,
 	.set_duplex	= sh_eth_set_duplex,
 
@@ -565,6 +623,8 @@ static void sh_eth_chip_reset_r8a7740(st
 
 /* R8A7740 */
 static struct sh_eth_cpu_data r8a7740_data = {
+	.soft_reset	= sh_eth_soft_reset_gether,
+
 	.chip_reset	= sh_eth_chip_reset_r8a7740,
 	.set_duplex	= sh_eth_set_duplex,
 	.set_rate	= sh_eth_set_rate_gether,
@@ -620,6 +680,8 @@ static void sh_eth_set_rate_rcar(struct
 
 /* R-Car Gen1 */
 static struct sh_eth_cpu_data rcar_gen1_data = {
+	.soft_reset	= sh_eth_soft_reset,
+
 	.set_duplex	= sh_eth_set_duplex,
 	.set_rate	= sh_eth_set_rate_rcar,
 
@@ -647,6 +709,8 @@ static struct sh_eth_cpu_data rcar_gen1_
 
 /* R-Car Gen2 and RZ/G1 */
 static struct sh_eth_cpu_data rcar_gen2_data = {
+	.soft_reset	= sh_eth_soft_reset,
+
 	.set_duplex	= sh_eth_set_duplex,
 	.set_rate	= sh_eth_set_rate_rcar,
 
@@ -694,6 +758,8 @@ static void sh_eth_set_rate_sh7724(struc
 
 /* SH7724 */
 static struct sh_eth_cpu_data sh7724_data = {
+	.soft_reset	= sh_eth_soft_reset,
+
 	.set_duplex	= sh_eth_set_duplex,
 	.set_rate	= sh_eth_set_rate_sh7724,
 
@@ -736,6 +802,8 @@ static void sh_eth_set_rate_sh7757(struc
 
 /* SH7757 */
 static struct sh_eth_cpu_data sh7757_data = {
+	.soft_reset	= sh_eth_soft_reset,
+
 	.set_duplex	= sh_eth_set_duplex,
 	.set_rate	= sh_eth_set_rate_sh7757,
 
@@ -808,6 +876,8 @@ static void sh_eth_set_rate_giga(struct
 
 /* SH7757(GETHERC) */
 static struct sh_eth_cpu_data sh7757_data_giga = {
+	.soft_reset	= sh_eth_soft_reset_gether,
+
 	.chip_reset	= sh_eth_chip_reset_giga,
 	.set_duplex	= sh_eth_set_duplex,
 	.set_rate	= sh_eth_set_rate_giga,
@@ -847,6 +917,8 @@ static struct sh_eth_cpu_data sh7757_dat
 
 /* SH7734 */
 static struct sh_eth_cpu_data sh7734_data = {
+	.soft_reset	= sh_eth_soft_reset_gether,
+
 	.chip_reset	= sh_eth_chip_reset,
 	.set_duplex	= sh_eth_set_duplex,
 	.set_rate	= sh_eth_set_rate_gether,
@@ -883,6 +955,8 @@ static struct sh_eth_cpu_data sh7734_dat
 
 /* SH7763 */
 static struct sh_eth_cpu_data sh7763_data = {
+	.soft_reset	= sh_eth_soft_reset_gether,
+
 	.chip_reset	= sh_eth_chip_reset,
 	.set_duplex	= sh_eth_set_duplex,
 	.set_rate	= sh_eth_set_rate_gether,
@@ -917,6 +991,8 @@ static struct sh_eth_cpu_data sh7763_dat
 };
 
 static struct sh_eth_cpu_data sh7619_data = {
+	.soft_reset	= sh_eth_soft_reset,
+
 	.register_type	= SH_ETH_REG_FAST_SH3_SH2,
 
 	.eesipr_value	= EESIPR_RFCOFIP | EESIPR_ECIIP |
@@ -935,6 +1011,8 @@ static struct sh_eth_cpu_data sh7619_dat
 };
 
 static struct sh_eth_cpu_data sh771x_data = {
+	.soft_reset	= sh_eth_soft_reset,
+
 	.register_type	= SH_ETH_REG_FAST_SH3_SH2,
 
 	.eesipr_value	= EESIPR_RFCOFIP | EESIPR_ECIIP |
@@ -974,59 +1052,6 @@ static void sh_eth_set_default_cpu_data(
 		cd->trscer_err_mask = DEFAULT_TRSCER_ERR_MASK;
 }
 
-static int sh_eth_check_reset(struct net_device *ndev)
-{
-	int cnt;
-
-	for (cnt = 100; cnt > 0; cnt--) {
-		if (!(sh_eth_read(ndev, EDMR) & EDMR_SRST_GETHER))
-			return 0;
-		mdelay(1);
-	}
-
-	netdev_err(ndev, "Device reset failed\n");
-	return -ETIMEDOUT;
-}
-
-static int sh_eth_reset(struct net_device *ndev)
-{
-	struct sh_eth_private *mdp = netdev_priv(ndev);
-	int ret = 0;
-
-	if (sh_eth_is_gether(mdp) || sh_eth_is_rz_fast_ether(mdp)) {
-		sh_eth_write(ndev, EDSR_ENALL, EDSR);
-		sh_eth_modify(ndev, EDMR, EDMR_SRST_GETHER, EDMR_SRST_GETHER);
-
-		ret = sh_eth_check_reset(ndev);
-		if (ret)
-			return ret;
-
-		/* Table Init */
-		sh_eth_write(ndev, 0x0, TDLAR);
-		sh_eth_write(ndev, 0x0, TDFAR);
-		sh_eth_write(ndev, 0x0, TDFXR);
-		sh_eth_write(ndev, 0x0, TDFFR);
-		sh_eth_write(ndev, 0x0, RDLAR);
-		sh_eth_write(ndev, 0x0, RDFAR);
-		sh_eth_write(ndev, 0x0, RDFXR);
-		sh_eth_write(ndev, 0x0, RDFFR);
-
-		/* Reset HW CRC register */
-		if (mdp->cd->hw_checksum)
-			sh_eth_write(ndev, 0x0, CSMR);
-
-		/* Select MII mode */
-		if (mdp->cd->select_mii)
-			sh_eth_select_mii(ndev);
-	} else {
-		sh_eth_modify(ndev, EDMR, EDMR_SRST_ETHER, EDMR_SRST_ETHER);
-		mdelay(3);
-		sh_eth_modify(ndev, EDMR, EDMR_SRST_ETHER, 0);
-	}
-
-	return ret;
-}
-
 static void sh_eth_set_receive_align(struct sk_buff *skb)
 {
 	uintptr_t reserve = (uintptr_t)skb->data & (SH_ETH_RX_ALIGN - 1);
@@ -1362,7 +1387,7 @@ static int sh_eth_dev_init(struct net_de
 	int ret;
 
 	/* Soft Reset */
-	ret = sh_eth_reset(ndev);
+	ret = mdp->cd->soft_reset(ndev);
 	if (ret)
 		return ret;
 
@@ -1463,7 +1488,7 @@ static void sh_eth_dev_exit(struct net_d
 	 */
 	msleep(2); /* max frame time at 10 Mbps < 1250 us */
 	sh_eth_get_stats(ndev);
-	sh_eth_reset(ndev);
+	mdp->cd->soft_reset(ndev);
 
 	/* Set MAC address again */
 	update_mac_address(ndev);
Index: net-next/drivers/net/ethernet/renesas/sh_eth.h
===================================================================
--- net-next.orig/drivers/net/ethernet/renesas/sh_eth.h
+++ net-next/drivers/net/ethernet/renesas/sh_eth.h
@@ -469,6 +469,9 @@ struct sh_eth_rxdesc {
 
 /* This structure is used by each CPU dependency handling. */
 struct sh_eth_cpu_data {
+	/* mandatory functions */
+	int (*soft_reset)(struct net_device *ndev);
+
 	/* optional functions */
 	void (*chip_reset)(struct net_device *ndev);
 	void (*set_duplex)(struct net_device *ndev);

^ permalink raw reply

* Re: [patch net-next RFC 00/12] devlink: introduce port flavours and common phys_port_name generation
From: Florian Fainelli @ 2018-03-24 20:05 UTC (permalink / raw)
  To: Andrew Lunn, Jiri Pirko
  Cc: netdev, davem, idosch, jakub.kicinski, mlxsw, vivien.didelot,
	michael.chan, ganeshgr, saeedm, simon.horman,
	pieter.jansenvanvuuren, john.hurley, dirk.vandermerwe,
	alexander.h.duyck, ogerlitz, dsahern, vijaya.guvva,
	satananda.burla, raghu.vatsavayi, felix.manlunas, gospo,
	sathya.perla, vasundhara-v.volam, tariqt, eranbe,
	jeffrey.t.kirsher
In-Reply-To: <20180324190032.GH31941@lunn.ch>



On 24/03/2018 12:00, Andrew Lunn wrote:
>>>>>>>>> root@zii-devel-b:~# ./iproute2/devlink/devlink port 
>>>>>>>>> mdio_bus/0.1:00/0: type eth netdev lan0 flavour physical number 0
>>>>>>>>> mdio_bus/0.1:00/1: type eth netdev lan1 flavour physical number 1
>>>>>>>>> mdio_bus/0.1:00/2: type eth netdev lan2 flavour physical number 2
>>>>>>>>> mdio_bus/0.1:00/3: type notset
>>>>>>>>> mdio_bus/0.1:00/4: type notset
>>>>>>>>> mdio_bus/0.1:00/5: type notset flavour dsa number 5
>>>>>>>>> mdio_bus/0.1:00/6: type notset flavour cpu number 6
>>>>>>>>> mdio_bus/0.2:00/0: type eth netdev lan3 flavour physical number 0
>>>>>>>>> mdio_bus/0.2:00/1: type eth netdev lan4 flavour physical number 1
>>>>>>>>> mdio_bus/0.2:00/2: type eth netdev lan5 flavour physical number 2
>>>>>>>>> mdio_bus/0.2:00/3: type notset
>>>>>>>>> mdio_bus/0.2:00/4: type notset
>>>>>>>>> mdio_bus/0.2:00/5: type notset flavour dsa number 5
>>>>>>>>> mdio_bus/0.2:00/6: type notset flavour dsa number 6
>>>>>>>>> mdio_bus/0.4:00/0: type eth netdev lan6 flavour physical number 0
>>>>>>>>> mdio_bus/0.4:00/1: type eth netdev lan7 flavour physical number 1
>>>>>>>>> mdio_bus/0.4:00/2: type eth netdev lan8 flavour physical number 2
>>>>>>>>> mdio_bus/0.4:00/3: type eth netdev optical3 flavour physical
> 
>>> What is an appropriate attribute to use to return the physical port number within a given switch?
>>
>> I don't think there's one out there. I tried to add it in this patchset.
> 
> Florian, do you need it to be unique per switch, or DSA cluster. The
> current solution is clearly not unique across a cluster.

It needs to be unique per switch, we have the switch identifier through
phys_switch_id for higher level "uniqueness".
-- 
Florian

^ permalink raw reply

* [PATCH 0/2] sh_eth: unify the SoC feature checks
From: Sergei Shtylyov @ 2018-03-24 20:04 UTC (permalink / raw)
  To: netdev; +Cc: linux-renesas-soc, linux-sh

Hello!

Here's a set of 5 patches against DaveM's 'net-next.git' repo.

The Ether driver sometimes uses the bit fields in 'struct sh_eth_cpu_data'
to check which Ether registers exist in a certain SoC and sometimes it uses
sh_eth_is_{gether|rz_fast_ether}() which basically compares 2 pointers (1 of
them being constant) -- the latter is definitely not a strongest feature of
the RISC CPUs (be it SH or ARM), so I decided to get rid of this type of
the feature checks in favour of the bit fields (I've also made use of a
32-bit value and method pointer where appropriate)...

[1/5] sh_eth: add sh_eth_cpu_data::soft_reset() method
[2/5] sh_eth: add sh_eth_cpu_data::edtrr_trns value
[3/5] sh_eth: add sh_eth_cpu_data::xdfar_rw flag
[4/5] sh_eth: add sh_eth_cpu_data::no_tx_cntr flag
[5/5] sh_eth: add sh_eth_cpu_data::cexcr flag

MBR, Sergei

^ permalink raw reply

* Re: switchdev and dsa
From: Andrew Lunn @ 2018-03-24 19:02 UTC (permalink / raw)
  To: Ran Shalit; +Cc: netdev
In-Reply-To: <CAJ2oMhK8FU8hPL6L0B6-Rujz5TXNGXQm7tF9=OtWtMa4ZZX5Tg@mail.gmail.com>

On Sat, Mar 24, 2018 at 07:12:15PM +0300, Ran Shalit wrote:
> Hello,
> 
> I am new with switchdev and dsa.
> I would please like to ask if configuring a device tree/board file
> with a switch (for example marvell switch) will provide all ports
> behins the switch as valid interface (ethX) ports in userspace
> ifconfig command ?

Hi Ranran

Yes, that is the idea.

https://www.netdevconf.org/2.1/session.html?lunn_didelot_fainelli

	Andrew

^ permalink raw reply

* Re: [patch net-next RFC 00/12] devlink: introduce port flavours and common phys_port_name generation
From: Andrew Lunn @ 2018-03-24 19:00 UTC (permalink / raw)
  To: Jiri Pirko
  Cc: Florian Fainelli, netdev, davem, idosch, jakub.kicinski, mlxsw,
	vivien.didelot, michael.chan, ganeshgr, saeedm, simon.horman,
	pieter.jansenvanvuuren, john.hurley, dirk.vandermerwe,
	alexander.h.duyck, ogerlitz, dsahern, vijaya.guvva,
	satananda.burla, raghu.vatsavayi, felix.manlunas, gospo,
	sathya.perla, vasundhara-v.volam, tariqt, eranbe,
	jeffrey.t.kirsher
In-Reply-To: <20180324174248.GH1891@nanopsycho>

> >>>>>> >root@zii-devel-b:~# ./iproute2/devlink/devlink port 
> >>>>>> >mdio_bus/0.1:00/0: type eth netdev lan0 flavour physical number 0
> >>>>>> >mdio_bus/0.1:00/1: type eth netdev lan1 flavour physical number 1
> >>>>>> >mdio_bus/0.1:00/2: type eth netdev lan2 flavour physical number 2
> >>>>>> >mdio_bus/0.1:00/3: type notset
> >>>>>> >mdio_bus/0.1:00/4: type notset
> >>>>>> >mdio_bus/0.1:00/5: type notset flavour dsa number 5
> >>>>>> >mdio_bus/0.1:00/6: type notset flavour cpu number 6
> >>>>>> >mdio_bus/0.2:00/0: type eth netdev lan3 flavour physical number 0
> >>>>>> >mdio_bus/0.2:00/1: type eth netdev lan4 flavour physical number 1
> >>>>>> >mdio_bus/0.2:00/2: type eth netdev lan5 flavour physical number 2
> >>>>>> >mdio_bus/0.2:00/3: type notset
> >>>>>> >mdio_bus/0.2:00/4: type notset
> >>>>>> >mdio_bus/0.2:00/5: type notset flavour dsa number 5
> >>>>>> >mdio_bus/0.2:00/6: type notset flavour dsa number 6
> >>>>>> >mdio_bus/0.4:00/0: type eth netdev lan6 flavour physical number 0
> >>>>>> >mdio_bus/0.4:00/1: type eth netdev lan7 flavour physical number 1
> >>>>>> >mdio_bus/0.4:00/2: type eth netdev lan8 flavour physical number 2
> >>>>>> >mdio_bus/0.4:00/3: type eth netdev optical3 flavour physical

> >What is an appropriate attribute to use to return the physical port number within a given switch?
> 
> I don't think there's one out there. I tried to add it in this patchset.

Florian, do you need it to be unique per switch, or DSA cluster. The
current solution is clearly not unique across a cluster.

	Andrew

^ permalink raw reply

* Re: [PATCH] of_net: Implement of_get_nvmem_mac_address helper
From: Andrew Lunn @ 2018-03-24 18:53 UTC (permalink / raw)
  To: Mike Looijmans
  Cc: Florian Fainelli, netdev, linux-kernel, devicetree, robh+dt,
	frowand.list
In-Reply-To: <cb12ea29-a7b0-cf66-7f5a-c09d36f443f0@topic.nl>

> A quick survey for the of_get_mac_address users learns that most of them do
> a memcpy (or similar) right after it, so for these drivers the
> "of_get_nvmem_mac_address" style signature that performs the memcpy (or
> better, ether_addr_copy) is a better fit, e.g.:
> 
> int of_get_mac_address(struct device_node *np, void *addr)

Hi Mike

This is a nicer solution, but it is quite a lot of work, there are a
lot of users. Maybe Coccinelle can help?

    Andrew

^ permalink raw reply

* Re: [PATCH net-next RFC V1 5/5] net: mdio: Add a driver for InES time stamping IP core.
From: Andrew Lunn @ 2018-03-24 18:48 UTC (permalink / raw)
  To: Richard Cochran
  Cc: netdev, devicetree, David Miller, Florian Fainelli, Mark Rutland,
	Miroslav Lichvar, Rob Herring, Willem de Bruijn
In-Reply-To: <20180324171219.vh6wcxbem3hyhkuu@localhost>

On Sat, Mar 24, 2018 at 10:12:19AM -0700, Richard Cochran wrote:
> On Thu, Mar 22, 2018 at 12:50:07AM +0100, Andrew Lunn wrote:
> > How clever is this device? Can it tell the difference between
> > 1000Base-X and SGMII? Can it figure out that the MAC is repeating
> > every bit 100 times and so has dropped to 10Mbits? Does it understand
> > EEE? Does it need to know if RGMII or RGMII-ID is being used?
> 
> This device isn't configurable at run time for any of those AFAICT.
> Those decisions are made when the IP core is synthesized as part of
> the HW design.

Hi Richard

Should we be designing an API for this specific device, or should we
think of the general case?

The general case would be a device which passes through MII signals,
and has some sort of control interface.

The control interface could be MMIO as here, MDIO, I2C, SPI, etc.

The MII interfaces could be MII, RMII, GMII, RGMII, SGMII, QSGMII,
XGMII, etc. Generally, a device which can do GMII can also do RGMII,
etc.

The device probably needs to know about the MII bus. If it is
synthesized as part of the hardware design, it might be able to get
this information directly from the MAC IP core. An external device
probably needs to be told. Especially when it comes to RGII, where the
clocking is 'interesting'.

As you already said, you need to know link speed. Do we want to be
able to restrict the MAC to specific link speeds? The Marvell MACs can
do a 2.5Gbps SGMII, by speeding up the clock. I can image a PTP device
not supporting this, the MII breaks, but PHY looks happy. To limit
this cleanly, you at least need to mask out the unsupported auto-neg
offered speeds.

With EEE there is probably two cases:

1) The PTP device understands it, but needs to be told it has been enabled.
2) The PTP device breaks when EEE is enabled, so EEE needs to be disabled.

To me, 1) seems pretty unlikely. But 2) is possible. Disabling EEE
again means changing the auto-neg parameters, so that EEE is not
offered.

As far as i can see, you have three basic problems:

1) How do you associate the PTP device to the netdev?
2) How do you get the information you need to configure the PTP device
3) How do you limit the MAC/PHY to what the PTP device can do.

For 1) you need some sort of phandle, either in the MAC, or the PHY
section of device tree. There is currently no generic code for
handling MAC OF nodes. phylib however does do all the generic work for
PHY nodes in DT.

2) you can get some of the information you need via notifiers. e.g,
NETDEV_CHANGE tells you something has happen, up/down, etc. But i'm
not sure it is 100% reliable. A PHY might be able to do 1G->100M
without going down and back up again, so you don't get a NETDEV_CHANGE
event. There is no notification of how the MII bus is being used.  So
i think notifiers is probably not the best bet.

phylib does have all this information. It is phylib that calls the MAC
with link speed information. When the MAC connects to the PHY, it
passes the MII mode, and when the PHY requests the MII mode changes,
phylib knows. The MAC has to call phy_init_eee() to see if the PHY is
EEE capable. phylib also tells the MAC what speeds the PHY is capable
off, so it is in the position to mask out speeds the PTP device does
not support, etc.

So i really think you need to cleanly integrate into phylib and
phylink.

	Andrew

^ permalink raw reply

* [PATCH bpf-next 2/2] bpf: Add bpf_verifier_vlog() and bpf_verifier_log_needed()
From: Martin KaFai Lau @ 2018-03-24 18:44 UTC (permalink / raw)
  To: netdev; +Cc: Alexei Starovoitov, Daniel Borkmann, kernel-team
In-Reply-To: <20180324184423.3053682-1-kafai@fb.com>

The BTF (BPF Type Format) verifier needs to reuse the current
BPF verifier log.  Hence, it requires the following changes:

(1) Expose log_write() in verifier.c for other users.
    Its name is renamed to bpf_verifier_vlog().

(2) The BTF verifier also needs to check
'log->level && log->ubuf && !bpf_verifier_log_full(log);'
independently outside of the current log_write().  It is
because the BTF verifier will do one-check before
making multiple calls to btf_verifier_vlog to log
the details of a type.

Hence, this check is also re-factored to a new function
bpf_verifier_log_needed().  Since it is re-factored,
we can check it before va_start() in the current
bpf_verifier_log_write() and verbose().

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Alexei Starovoitov <ast@fb.com>
---
 include/linux/bpf_verifier.h |  7 +++++++
 kernel/bpf/verifier.c        | 19 +++++++++++--------
 2 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index c30668414b22..7e61c395fddf 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -166,6 +166,11 @@ static inline bool bpf_verifier_log_full(const struct bpf_verifier_log *log)
 	return log->len_used >= log->len_total - 1;
 }
 
+static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log)
+{
+	return log->level && log->ubuf && !bpf_verifier_log_full(log);
+}
+
 #define BPF_MAX_SUBPROGS 256
 
 /* single container for all structs
@@ -192,6 +197,8 @@ struct bpf_verifier_env {
 	u32 subprog_cnt;
 };
 
+void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt,
+		       va_list args);
 __printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
 					   const char *fmt, ...);
 
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 1e84e02ff733..8acd2207e412 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -168,15 +168,11 @@ struct bpf_call_arg_meta {
 
 static DEFINE_MUTEX(bpf_verifier_lock);
 
-static void log_write(struct bpf_verifier_env *env, const char *fmt,
-		      va_list args)
+void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt,
+		       va_list args)
 {
-	struct bpf_verifier_log *log = &env->log;
 	unsigned int n;
 
-	if (!log->level || !log->ubuf || bpf_verifier_log_full(log))
-		return;
-
 	n = vscnprintf(log->kbuf, BPF_VERIFIER_TMP_LOG_SIZE, fmt, args);
 
 	WARN_ONCE(n >= BPF_VERIFIER_TMP_LOG_SIZE - 1,
@@ -200,18 +196,25 @@ __printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
 {
 	va_list args;
 
+	if (!bpf_verifier_log_needed(&env->log))
+		return;
+
 	va_start(args, fmt);
-	log_write(env, fmt, args);
+	bpf_verifier_vlog(&env->log, fmt, args);
 	va_end(args);
 }
 EXPORT_SYMBOL_GPL(bpf_verifier_log_write);
 
 __printf(2, 3) static void verbose(void *private_data, const char *fmt, ...)
 {
+	struct bpf_verifier_env *env = private_data;
 	va_list args;
 
+	if (!bpf_verifier_log_needed(&env->log))
+		return;
+
 	va_start(args, fmt);
-	log_write(private_data, fmt, args);
+	bpf_verifier_vlog(&env->log, fmt, args);
 	va_end(args);
 }
 
-- 
2.9.5

^ permalink raw reply related

* [PATCH bpf-next 1/2] bpf: Rename bpf_verifer_log
From: Martin KaFai Lau @ 2018-03-24 18:44 UTC (permalink / raw)
  To: netdev; +Cc: Alexei Starovoitov, Daniel Borkmann, kernel-team
In-Reply-To: <20180324184423.3053682-1-kafai@fb.com>

bpf_verifer_log =>
bpf_verifier_log

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Alexei Starovoitov <ast@fb.com>
---
 include/linux/bpf_verifier.h | 6 +++---
 kernel/bpf/verifier.c        | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 6b66cd1aa0b9..c30668414b22 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -153,7 +153,7 @@ struct bpf_insn_aux_data {
 
 #define BPF_VERIFIER_TMP_LOG_SIZE	1024
 
-struct bpf_verifer_log {
+struct bpf_verifier_log {
 	u32 level;
 	char kbuf[BPF_VERIFIER_TMP_LOG_SIZE];
 	char __user *ubuf;
@@ -161,7 +161,7 @@ struct bpf_verifer_log {
 	u32 len_total;
 };
 
-static inline bool bpf_verifier_log_full(const struct bpf_verifer_log *log)
+static inline bool bpf_verifier_log_full(const struct bpf_verifier_log *log)
 {
 	return log->len_used >= log->len_total - 1;
 }
@@ -185,7 +185,7 @@ struct bpf_verifier_env {
 	bool allow_ptr_leaks;
 	bool seen_direct_write;
 	struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */
-	struct bpf_verifer_log log;
+	struct bpf_verifier_log log;
 	u32 subprog_starts[BPF_MAX_SUBPROGS];
 	/* computes the stack depth of each bpf function */
 	u16 subprog_stack_depth[BPF_MAX_SUBPROGS + 1];
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index e93a6e48641b..1e84e02ff733 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -171,7 +171,7 @@ static DEFINE_MUTEX(bpf_verifier_lock);
 static void log_write(struct bpf_verifier_env *env, const char *fmt,
 		      va_list args)
 {
-	struct bpf_verifer_log *log = &env->log;
+	struct bpf_verifier_log *log = &env->log;
 	unsigned int n;
 
 	if (!log->level || !log->ubuf || bpf_verifier_log_full(log))
@@ -5611,7 +5611,7 @@ static void free_states(struct bpf_verifier_env *env)
 int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
 {
 	struct bpf_verifier_env *env;
-	struct bpf_verifer_log *log;
+	struct bpf_verifier_log *log;
 	int ret = -EINVAL;
 
 	/* no program is valid */
-- 
2.9.5

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox