Netdev List
 help / color / mirror / Atom feed
* [PATCH 13/39] netfilter: conntrack: simplify init/uninit of L4 protocol trackers
From: Pablo Neira Ayuso @ 2016-11-13 22:25 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1479075933-4491-1-git-send-email-pablo@netfilter.org>

From: Davide Caratti <dcaratti@redhat.com>

modify registration and deregistration of layer-4 protocol trackers to
facilitate inclusion of new elements into the current list of builtin
protocols. Both builtin (TCP, UDP, ICMP) and non-builtin (DCCP, GRE, SCTP,
UDPlite) layer-4 protocol trackers usually register/deregister themselves
using consecutive calls to nf_ct_l4proto_{,pernet}_{,un}register(...).
This sequence is interrupted and rolled back in case of error; in order to
simplify addition of builtin protocols, the input of the above functions
has been modified to allow registering/unregistering multiple protocols.

Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_l4proto.h   | 18 ++++--
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 76 +++++++----------------
 net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 78 ++++++++---------------
 net/netfilter/nf_conntrack_proto.c             | 85 ++++++++++++++++++++++----
 net/netfilter/nf_conntrack_proto_dccp.c        | 48 ++++-----------
 net/netfilter/nf_conntrack_proto_gre.c         | 11 ++--
 net/netfilter/nf_conntrack_proto_sctp.c        | 50 ++++-----------
 net/netfilter/nf_conntrack_proto_udplite.c     | 50 ++++-----------
 8 files changed, 179 insertions(+), 237 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index de629f1520df..2152b70626d5 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -125,14 +125,24 @@ struct nf_conntrack_l4proto *nf_ct_l4proto_find_get(u_int16_t l3proto,
 void nf_ct_l4proto_put(struct nf_conntrack_l4proto *p);
 
 /* Protocol pernet registration. */
+int nf_ct_l4proto_pernet_register_one(struct net *net,
+				      struct nf_conntrack_l4proto *proto);
+void nf_ct_l4proto_pernet_unregister_one(struct net *net,
+					 struct nf_conntrack_l4proto *proto);
 int nf_ct_l4proto_pernet_register(struct net *net,
-				  struct nf_conntrack_l4proto *proto);
+				  struct nf_conntrack_l4proto *proto[],
+				  unsigned int num_proto);
 void nf_ct_l4proto_pernet_unregister(struct net *net,
-				     struct nf_conntrack_l4proto *proto);
+				     struct nf_conntrack_l4proto *proto[],
+				     unsigned int num_proto);
 
 /* Protocol global registration. */
-int nf_ct_l4proto_register(struct nf_conntrack_l4proto *proto);
-void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *proto);
+int nf_ct_l4proto_register_one(struct nf_conntrack_l4proto *proto);
+void nf_ct_l4proto_unregister_one(struct nf_conntrack_l4proto *proto);
+int nf_ct_l4proto_register(struct nf_conntrack_l4proto *proto[],
+			   unsigned int num_proto);
+void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *proto[],
+			      unsigned int num_proto);
 
 /* Generic netlink helpers */
 int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb,
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 713c09a74b90..7130ed5dc1fa 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -336,47 +336,34 @@ MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET));
 MODULE_ALIAS("ip_conntrack");
 MODULE_LICENSE("GPL");
 
+static struct nf_conntrack_l4proto *builtin_l4proto4[] = {
+	&nf_conntrack_l4proto_tcp4,
+	&nf_conntrack_l4proto_udp4,
+	&nf_conntrack_l4proto_icmp,
+};
+
 static int ipv4_net_init(struct net *net)
 {
 	int ret = 0;
 
-	ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_tcp4);
-	if (ret < 0) {
-		pr_err("nf_conntrack_tcp4: pernet registration failed\n");
-		goto out_tcp;
-	}
-	ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_udp4);
-	if (ret < 0) {
-		pr_err("nf_conntrack_udp4: pernet registration failed\n");
-		goto out_udp;
-	}
-	ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_icmp);
-	if (ret < 0) {
-		pr_err("nf_conntrack_icmp4: pernet registration failed\n");
-		goto out_icmp;
-	}
+	ret = nf_ct_l4proto_pernet_register(net, builtin_l4proto4,
+					    ARRAY_SIZE(builtin_l4proto4));
+	if (ret < 0)
+		return ret;
 	ret = nf_ct_l3proto_pernet_register(net, &nf_conntrack_l3proto_ipv4);
 	if (ret < 0) {
 		pr_err("nf_conntrack_ipv4: pernet registration failed\n");
-		goto out_ipv4;
+		nf_ct_l4proto_pernet_unregister(net, builtin_l4proto4,
+						ARRAY_SIZE(builtin_l4proto4));
 	}
-	return 0;
-out_ipv4:
-	nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_icmp);
-out_icmp:
-	nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udp4);
-out_udp:
-	nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_tcp4);
-out_tcp:
 	return ret;
 }
 
 static void ipv4_net_exit(struct net *net)
 {
 	nf_ct_l3proto_pernet_unregister(net, &nf_conntrack_l3proto_ipv4);
-	nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_icmp);
-	nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udp4);
-	nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_tcp4);
+	nf_ct_l4proto_pernet_unregister(net, builtin_l4proto4,
+					ARRAY_SIZE(builtin_l4proto4));
 }
 
 static struct pernet_operations ipv4_net_ops = {
@@ -410,37 +397,21 @@ static int __init nf_conntrack_l3proto_ipv4_init(void)
 		goto cleanup_pernet;
 	}
 
-	ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_tcp4);
-	if (ret < 0) {
-		pr_err("nf_conntrack_ipv4: can't register tcp4 proto.\n");
+	ret = nf_ct_l4proto_register(builtin_l4proto4,
+				     ARRAY_SIZE(builtin_l4proto4));
+	if (ret < 0)
 		goto cleanup_hooks;
-	}
-
-	ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_udp4);
-	if (ret < 0) {
-		pr_err("nf_conntrack_ipv4: can't register udp4 proto.\n");
-		goto cleanup_tcp4;
-	}
-
-	ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_icmp);
-	if (ret < 0) {
-		pr_err("nf_conntrack_ipv4: can't register icmpv4 proto.\n");
-		goto cleanup_udp4;
-	}
 
 	ret = nf_ct_l3proto_register(&nf_conntrack_l3proto_ipv4);
 	if (ret < 0) {
 		pr_err("nf_conntrack_ipv4: can't register ipv4 proto.\n");
-		goto cleanup_icmpv4;
+		goto cleanup_l4proto;
 	}
 
 	return ret;
- cleanup_icmpv4:
-	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmp);
- cleanup_udp4:
-	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udp4);
- cleanup_tcp4:
-	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_tcp4);
+cleanup_l4proto:
+	nf_ct_l4proto_unregister(builtin_l4proto4,
+				 ARRAY_SIZE(builtin_l4proto4));
  cleanup_hooks:
 	nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops));
  cleanup_pernet:
@@ -454,9 +425,8 @@ static void __exit nf_conntrack_l3proto_ipv4_fini(void)
 {
 	synchronize_net();
 	nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv4);
-	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmp);
-	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udp4);
-	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_tcp4);
+	nf_ct_l4proto_unregister(builtin_l4proto4,
+				 ARRAY_SIZE(builtin_l4proto4));
 	nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops));
 	unregister_pernet_subsys(&ipv4_net_ops);
 	nf_unregister_sockopt(&so_getorigdst);
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 963ee3848675..500be28ff563 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -336,47 +336,35 @@ static struct nf_sockopt_ops so_getorigdst6 = {
 	.owner		= THIS_MODULE,
 };
 
+static struct nf_conntrack_l4proto *builtin_l4proto6[] = {
+	&nf_conntrack_l4proto_tcp6,
+	&nf_conntrack_l4proto_udp6,
+	&nf_conntrack_l4proto_icmpv6,
+};
+
 static int ipv6_net_init(struct net *net)
 {
 	int ret = 0;
 
-	ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_tcp6);
-	if (ret < 0) {
-		pr_err("nf_conntrack_tcp6: pernet registration failed\n");
-		goto out;
-	}
-	ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_udp6);
-	if (ret < 0) {
-		pr_err("nf_conntrack_udp6: pernet registration failed\n");
-		goto cleanup_tcp6;
-	}
-	ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_icmpv6);
-	if (ret < 0) {
-		pr_err("nf_conntrack_icmp6: pernet registration failed\n");
-		goto cleanup_udp6;
-	}
+	ret = nf_ct_l4proto_pernet_register(net, builtin_l4proto6,
+					    ARRAY_SIZE(builtin_l4proto6));
+	if (ret < 0)
+		return ret;
+
 	ret = nf_ct_l3proto_pernet_register(net, &nf_conntrack_l3proto_ipv6);
 	if (ret < 0) {
 		pr_err("nf_conntrack_ipv6: pernet registration failed.\n");
-		goto cleanup_icmpv6;
+		nf_ct_l4proto_pernet_unregister(net, builtin_l4proto6,
+						ARRAY_SIZE(builtin_l4proto6));
 	}
-	return 0;
- cleanup_icmpv6:
-	nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_icmpv6);
- cleanup_udp6:
-	nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udp6);
- cleanup_tcp6:
-	nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_tcp6);
- out:
 	return ret;
 }
 
 static void ipv6_net_exit(struct net *net)
 {
 	nf_ct_l3proto_pernet_unregister(net, &nf_conntrack_l3proto_ipv6);
-	nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_icmpv6);
-	nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udp6);
-	nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_tcp6);
+	nf_ct_l4proto_pernet_unregister(net, builtin_l4proto6,
+					ARRAY_SIZE(builtin_l4proto6));
 }
 
 static struct pernet_operations ipv6_net_ops = {
@@ -409,37 +397,20 @@ static int __init nf_conntrack_l3proto_ipv6_init(void)
 		goto cleanup_pernet;
 	}
 
-	ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_tcp6);
-	if (ret < 0) {
-		pr_err("nf_conntrack_ipv6: can't register tcp6 proto.\n");
+	ret = nf_ct_l4proto_register(builtin_l4proto6,
+				     ARRAY_SIZE(builtin_l4proto6));
+	if (ret < 0)
 		goto cleanup_hooks;
-	}
-
-	ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_udp6);
-	if (ret < 0) {
-		pr_err("nf_conntrack_ipv6: can't register udp6 proto.\n");
-		goto cleanup_tcp6;
-	}
-
-	ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_icmpv6);
-	if (ret < 0) {
-		pr_err("nf_conntrack_ipv6: can't register icmpv6 proto.\n");
-		goto cleanup_udp6;
-	}
 
 	ret = nf_ct_l3proto_register(&nf_conntrack_l3proto_ipv6);
 	if (ret < 0) {
 		pr_err("nf_conntrack_ipv6: can't register ipv6 proto.\n");
-		goto cleanup_icmpv6;
+		goto cleanup_l4proto;
 	}
 	return ret;
-
- cleanup_icmpv6:
-	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmpv6);
- cleanup_udp6:
-	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udp6);
- cleanup_tcp6:
-	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_tcp6);
+cleanup_l4proto:
+	nf_ct_l4proto_unregister(builtin_l4proto6,
+				 ARRAY_SIZE(builtin_l4proto6));
  cleanup_hooks:
 	nf_unregister_hooks(ipv6_conntrack_ops, ARRAY_SIZE(ipv6_conntrack_ops));
  cleanup_pernet:
@@ -453,9 +424,8 @@ static void __exit nf_conntrack_l3proto_ipv6_fini(void)
 {
 	synchronize_net();
 	nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv6);
-	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_tcp6);
-	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udp6);
-	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmpv6);
+	nf_ct_l4proto_unregister(builtin_l4proto6,
+				 ARRAY_SIZE(builtin_l4proto6));
 	nf_unregister_hooks(ipv6_conntrack_ops, ARRAY_SIZE(ipv6_conntrack_ops));
 	unregister_pernet_subsys(&ipv6_net_ops);
 	nf_unregister_sockopt(&so_getorigdst6);
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 8d2c7d8c666a..9bd34647225a 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -281,15 +281,15 @@ void nf_ct_l4proto_unregister_sysctl(struct net *net,
 
 /* FIXME: Allow NULL functions and sub in pointers to generic for
    them. --RR */
-int nf_ct_l4proto_register(struct nf_conntrack_l4proto *l4proto)
+int nf_ct_l4proto_register_one(struct nf_conntrack_l4proto *l4proto)
 {
 	int ret = 0;
 
 	if (l4proto->l3proto >= PF_MAX)
 		return -EBUSY;
 
-	if ((l4proto->to_nlattr && !l4proto->nlattr_size)
-		|| (l4proto->tuple_to_nlattr && !l4proto->nlattr_tuple_size))
+	if ((l4proto->to_nlattr && !l4proto->nlattr_size) ||
+	    (l4proto->tuple_to_nlattr && !l4proto->nlattr_tuple_size))
 		return -EINVAL;
 
 	mutex_lock(&nf_ct_proto_mutex);
@@ -307,7 +307,8 @@ int nf_ct_l4proto_register(struct nf_conntrack_l4proto *l4proto)
 		}
 
 		for (i = 0; i < MAX_NF_CT_PROTO; i++)
-			RCU_INIT_POINTER(proto_array[i], &nf_conntrack_l4proto_generic);
+			RCU_INIT_POINTER(proto_array[i],
+					 &nf_conntrack_l4proto_generic);
 
 		/* Before making proto_array visible to lockless readers,
 		 * we must make sure its content is committed to memory.
@@ -335,10 +336,10 @@ int nf_ct_l4proto_register(struct nf_conntrack_l4proto *l4proto)
 	mutex_unlock(&nf_ct_proto_mutex);
 	return ret;
 }
-EXPORT_SYMBOL_GPL(nf_ct_l4proto_register);
+EXPORT_SYMBOL_GPL(nf_ct_l4proto_register_one);
 
-int nf_ct_l4proto_pernet_register(struct net *net,
-				  struct nf_conntrack_l4proto *l4proto)
+int nf_ct_l4proto_pernet_register_one(struct net *net,
+				      struct nf_conntrack_l4proto *l4proto)
 {
 	int ret = 0;
 	struct nf_proto_net *pn = NULL;
@@ -361,9 +362,9 @@ int nf_ct_l4proto_pernet_register(struct net *net,
 out:
 	return ret;
 }
-EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register);
+EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register_one);
 
-void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *l4proto)
+void nf_ct_l4proto_unregister_one(struct nf_conntrack_l4proto *l4proto)
 {
 	BUG_ON(l4proto->l3proto >= PF_MAX);
 
@@ -378,10 +379,10 @@ void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *l4proto)
 
 	synchronize_rcu();
 }
-EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister);
+EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister_one);
 
-void nf_ct_l4proto_pernet_unregister(struct net *net,
-				     struct nf_conntrack_l4proto *l4proto)
+void nf_ct_l4proto_pernet_unregister_one(struct net *net,
+					 struct nf_conntrack_l4proto *l4proto)
 {
 	struct nf_proto_net *pn = NULL;
 
@@ -395,6 +396,66 @@ void nf_ct_l4proto_pernet_unregister(struct net *net,
 	/* Remove all contrack entries for this protocol */
 	nf_ct_iterate_cleanup(net, kill_l4proto, l4proto, 0, 0);
 }
+EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister_one);
+
+int nf_ct_l4proto_register(struct nf_conntrack_l4proto *l4proto[],
+			   unsigned int num_proto)
+{
+	int ret = -EINVAL, ver;
+	unsigned int i;
+
+	for (i = 0; i < num_proto; i++) {
+		ret = nf_ct_l4proto_register_one(l4proto[i]);
+		if (ret < 0)
+			break;
+	}
+	if (i != num_proto) {
+		ver = l4proto[i]->l3proto == PF_INET6 ? 6 : 4;
+		pr_err("nf_conntrack_ipv%d: can't register %s%d proto.\n",
+		       ver, l4proto[i]->name, ver);
+		nf_ct_l4proto_unregister(l4proto, i);
+	}
+	return ret;
+}
+EXPORT_SYMBOL_GPL(nf_ct_l4proto_register);
+
+int nf_ct_l4proto_pernet_register(struct net *net,
+				  struct nf_conntrack_l4proto *l4proto[],
+				  unsigned int num_proto)
+{
+	int ret = -EINVAL;
+	unsigned int i;
+
+	for (i = 0; i < num_proto; i++) {
+		ret = nf_ct_l4proto_pernet_register_one(net, l4proto[i]);
+		if (ret < 0)
+			break;
+	}
+	if (i != num_proto) {
+		pr_err("nf_conntrack_%s%d: pernet registration failed\n",
+		       l4proto[i]->name,
+		       l4proto[i]->l3proto == PF_INET6 ? 6 : 4);
+		nf_ct_l4proto_pernet_unregister(net, l4proto, i);
+	}
+	return ret;
+}
+EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register);
+
+void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *l4proto[],
+			      unsigned int num_proto)
+{
+	while (num_proto-- != 0)
+		nf_ct_l4proto_unregister_one(l4proto[num_proto]);
+}
+EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister);
+
+void nf_ct_l4proto_pernet_unregister(struct net *net,
+				     struct nf_conntrack_l4proto *l4proto[],
+				     unsigned int num_proto)
+{
+	while (num_proto-- != 0)
+		nf_ct_l4proto_pernet_unregister_one(net, l4proto[num_proto]);
+}
 EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister);
 
 int nf_conntrack_proto_pernet_init(struct net *net)
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index a45bee52dccc..ac8976964975 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -936,30 +936,21 @@ static struct nf_conntrack_l4proto dccp_proto6 __read_mostly = {
 	.init_net		= dccp_init_net,
 };
 
+static struct nf_conntrack_l4proto *dccp_proto[] = {
+	&dccp_proto4,
+	&dccp_proto6,
+};
+
 static __net_init int dccp_net_init(struct net *net)
 {
-	int ret = 0;
-	ret = nf_ct_l4proto_pernet_register(net, &dccp_proto4);
-	if (ret < 0) {
-		pr_err("nf_conntrack_dccp4: pernet registration failed.\n");
-		goto out;
-	}
-	ret = nf_ct_l4proto_pernet_register(net, &dccp_proto6);
-	if (ret < 0) {
-		pr_err("nf_conntrack_dccp6: pernet registration failed.\n");
-		goto cleanup_dccp4;
-	}
-	return 0;
-cleanup_dccp4:
-	nf_ct_l4proto_pernet_unregister(net, &dccp_proto4);
-out:
-	return ret;
+	return nf_ct_l4proto_pernet_register(net, dccp_proto,
+					     ARRAY_SIZE(dccp_proto));
 }
 
 static __net_exit void dccp_net_exit(struct net *net)
 {
-	nf_ct_l4proto_pernet_unregister(net, &dccp_proto6);
-	nf_ct_l4proto_pernet_unregister(net, &dccp_proto4);
+	nf_ct_l4proto_pernet_unregister(net, dccp_proto,
+					ARRAY_SIZE(dccp_proto));
 }
 
 static struct pernet_operations dccp_net_ops = {
@@ -975,29 +966,16 @@ static int __init nf_conntrack_proto_dccp_init(void)
 
 	ret = register_pernet_subsys(&dccp_net_ops);
 	if (ret < 0)
-		goto out_pernet;
-
-	ret = nf_ct_l4proto_register(&dccp_proto4);
-	if (ret < 0)
-		goto out_dccp4;
-
-	ret = nf_ct_l4proto_register(&dccp_proto6);
+		return ret;
+	ret = nf_ct_l4proto_register(dccp_proto, ARRAY_SIZE(dccp_proto));
 	if (ret < 0)
-		goto out_dccp6;
-
-	return 0;
-out_dccp6:
-	nf_ct_l4proto_unregister(&dccp_proto4);
-out_dccp4:
-	unregister_pernet_subsys(&dccp_net_ops);
-out_pernet:
+		unregister_pernet_subsys(&dccp_net_ops);
 	return ret;
 }
 
 static void __exit nf_conntrack_proto_dccp_fini(void)
 {
-	nf_ct_l4proto_unregister(&dccp_proto6);
-	nf_ct_l4proto_unregister(&dccp_proto4);
+	nf_ct_l4proto_unregister(dccp_proto, ARRAY_SIZE(dccp_proto));
 	unregister_pernet_subsys(&dccp_net_ops);
 }
 
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index 9a715f88b2f1..ff405c9183f1 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -396,7 +396,9 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 __read_mostly = {
 static int proto_gre_net_init(struct net *net)
 {
 	int ret = 0;
-	ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_gre4);
+
+	ret = nf_ct_l4proto_pernet_register_one(net,
+						&nf_conntrack_l4proto_gre4);
 	if (ret < 0)
 		pr_err("nf_conntrack_gre4: pernet registration failed.\n");
 	return ret;
@@ -404,7 +406,7 @@ static int proto_gre_net_init(struct net *net)
 
 static void proto_gre_net_exit(struct net *net)
 {
-	nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_gre4);
+	nf_ct_l4proto_pernet_unregister_one(net, &nf_conntrack_l4proto_gre4);
 	nf_ct_gre_keymap_flush(net);
 }
 
@@ -422,8 +424,7 @@ static int __init nf_ct_proto_gre_init(void)
 	ret = register_pernet_subsys(&proto_gre_net_ops);
 	if (ret < 0)
 		goto out_pernet;
-
-	ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_gre4);
+	ret = nf_ct_l4proto_register_one(&nf_conntrack_l4proto_gre4);
 	if (ret < 0)
 		goto out_gre4;
 
@@ -436,7 +437,7 @@ static int __init nf_ct_proto_gre_init(void)
 
 static void __exit nf_ct_proto_gre_fini(void)
 {
-	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_gre4);
+	nf_ct_l4proto_unregister_one(&nf_conntrack_l4proto_gre4);
 	unregister_pernet_subsys(&proto_gre_net_ops);
 }
 
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 982ea62606c7..17c0ade23fd8 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -816,32 +816,21 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = {
 	.init_net		= sctp_init_net,
 };
 
+static struct nf_conntrack_l4proto *sctp_proto[] = {
+	&nf_conntrack_l4proto_sctp4,
+	&nf_conntrack_l4proto_sctp6,
+};
+
 static int sctp_net_init(struct net *net)
 {
-	int ret = 0;
-
-	ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_sctp4);
-	if (ret < 0) {
-		pr_err("nf_conntrack_sctp4: pernet registration failed.\n");
-		goto out;
-	}
-	ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_sctp6);
-	if (ret < 0) {
-		pr_err("nf_conntrack_sctp6: pernet registration failed.\n");
-		goto cleanup_sctp4;
-	}
-	return 0;
-
-cleanup_sctp4:
-	nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_sctp4);
-out:
-	return ret;
+	return nf_ct_l4proto_pernet_register(net, sctp_proto,
+					     ARRAY_SIZE(sctp_proto));
 }
 
 static void sctp_net_exit(struct net *net)
 {
-	nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_sctp6);
-	nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_sctp4);
+	nf_ct_l4proto_pernet_unregister(net, sctp_proto,
+					ARRAY_SIZE(sctp_proto));
 }
 
 static struct pernet_operations sctp_net_ops = {
@@ -857,29 +846,16 @@ static int __init nf_conntrack_proto_sctp_init(void)
 
 	ret = register_pernet_subsys(&sctp_net_ops);
 	if (ret < 0)
-		goto out_pernet;
-
-	ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_sctp4);
-	if (ret < 0)
-		goto out_sctp4;
-
-	ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_sctp6);
+		return ret;
+	ret = nf_ct_l4proto_register(sctp_proto, ARRAY_SIZE(sctp_proto));
 	if (ret < 0)
-		goto out_sctp6;
-
-	return 0;
-out_sctp6:
-	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_sctp4);
-out_sctp4:
-	unregister_pernet_subsys(&sctp_net_ops);
-out_pernet:
+		unregister_pernet_subsys(&sctp_net_ops);
 	return ret;
 }
 
 static void __exit nf_conntrack_proto_sctp_fini(void)
 {
-	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_sctp6);
-	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_sctp4);
+	nf_ct_l4proto_unregister(sctp_proto, ARRAY_SIZE(sctp_proto));
 	unregister_pernet_subsys(&sctp_net_ops);
 }
 
diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c
index 029206e8dec4..8cdb4b1bf933 100644
--- a/net/netfilter/nf_conntrack_proto_udplite.c
+++ b/net/netfilter/nf_conntrack_proto_udplite.c
@@ -336,32 +336,21 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly =
 	.init_net		= udplite_init_net,
 };
 
+static struct nf_conntrack_l4proto *udplite_proto[] = {
+	&nf_conntrack_l4proto_udplite4,
+	&nf_conntrack_l4proto_udplite6,
+};
+
 static int udplite_net_init(struct net *net)
 {
-	int ret = 0;
-
-	ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_udplite4);
-	if (ret < 0) {
-		pr_err("nf_conntrack_udplite4: pernet registration failed.\n");
-		goto out;
-	}
-	ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_udplite6);
-	if (ret < 0) {
-		pr_err("nf_conntrack_udplite6: pernet registration failed.\n");
-		goto cleanup_udplite4;
-	}
-	return 0;
-
-cleanup_udplite4:
-	nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udplite4);
-out:
-	return ret;
+	return nf_ct_l4proto_pernet_register(net, udplite_proto,
+					     ARRAY_SIZE(udplite_proto));
 }
 
 static void udplite_net_exit(struct net *net)
 {
-	nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udplite6);
-	nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udplite4);
+	nf_ct_l4proto_pernet_unregister(net, udplite_proto,
+					ARRAY_SIZE(udplite_proto));
 }
 
 static struct pernet_operations udplite_net_ops = {
@@ -377,29 +366,16 @@ static int __init nf_conntrack_proto_udplite_init(void)
 
 	ret = register_pernet_subsys(&udplite_net_ops);
 	if (ret < 0)
-		goto out_pernet;
-
-	ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_udplite4);
-	if (ret < 0)
-		goto out_udplite4;
-
-	ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_udplite6);
+		return ret;
+	ret = nf_ct_l4proto_register(udplite_proto, ARRAY_SIZE(udplite_proto));
 	if (ret < 0)
-		goto out_udplite6;
-
-	return 0;
-out_udplite6:
-	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udplite4);
-out_udplite4:
-	unregister_pernet_subsys(&udplite_net_ops);
-out_pernet:
+		unregister_pernet_subsys(&udplite_net_ops);
 	return ret;
 }
 
 static void __exit nf_conntrack_proto_udplite_exit(void)
 {
-	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udplite6);
-	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udplite4);
+	nf_ct_l4proto_unregister(udplite_proto, ARRAY_SIZE(udplite_proto));
 	unregister_pernet_subsys(&udplite_net_ops);
 }
 
-- 
2.1.4


^ permalink raw reply related

* [PATCH 09/39] netfilter: merge nf_iterate() into nf_hook_slow()
From: Pablo Neira Ayuso @ 2016-11-13 22:25 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1479075933-4491-1-git-send-email-pablo@netfilter.org>

nf_iterate() has become rather simple, we can integrate this code into
nf_hook_slow() to reduce the amount of LOC in the core path.

However, we still need nf_iterate() around for nf_queue packet handling,
so move this function there where we only need it. I think it should be
possible to refactor nf_queue code to get rid of it definitely, but
given this is slow path anyway, let's have a look this later.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/core.c         | 73 +++++++++++++++++---------------------------
 net/netfilter/nf_internals.h |  5 ---
 net/netfilter/nf_queue.c     | 20 ++++++++++++
 3 files changed, 48 insertions(+), 50 deletions(-)

diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index ebece48b8392..bd9272eeccb5 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -302,26 +302,6 @@ void _nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n)
 }
 EXPORT_SYMBOL(_nf_unregister_hooks);
 
-unsigned int nf_iterate(struct sk_buff *skb,
-			struct nf_hook_state *state,
-			struct nf_hook_entry **entryp)
-{
-	unsigned int verdict;
-
-	do {
-repeat:
-		verdict = (*entryp)->ops.hook((*entryp)->ops.priv, skb, state);
-		if (verdict != NF_ACCEPT) {
-			if (verdict != NF_REPEAT)
-				return verdict;
-			goto repeat;
-		}
-		*entryp = rcu_dereference((*entryp)->next);
-	} while (*entryp);
-	return NF_ACCEPT;
-}
-
-
 /* Returns 1 if okfn() needs to be executed by the caller,
  * -EPERM for NF_DROP, 0 otherwise.  Caller must hold rcu_read_lock. */
 int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
@@ -330,31 +310,34 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
 	unsigned int verdict;
 	int ret;
 
-next_hook:
-	verdict = nf_iterate(skb, state, &entry);
-	switch (verdict & NF_VERDICT_MASK) {
-	case NF_ACCEPT:
-		ret = 1;
-		break;
-	case NF_DROP:
-		kfree_skb(skb);
-		ret = NF_DROP_GETERR(verdict);
-		if (ret == 0)
-			ret = -EPERM;
-		break;
-	case NF_QUEUE:
-		ret = nf_queue(skb, state, &entry, verdict);
-		if (ret == 1 && entry)
-			goto next_hook;
-		/* Fall through. */
-	default:
-		/* Implicit handling for NF_STOLEN, as well as any other non
-		 * conventional verdicts.
-		 */
-		ret = 0;
-		break;
-	}
-	return ret;
+	do {
+		verdict = entry->ops.hook(entry->ops.priv, skb, state);
+		switch (verdict & NF_VERDICT_MASK) {
+		case NF_ACCEPT:
+			entry = rcu_dereference(entry->next);
+			break;
+		case NF_DROP:
+			kfree_skb(skb);
+			ret = NF_DROP_GETERR(verdict);
+			if (ret == 0)
+				ret = -EPERM;
+			return ret;
+		case NF_REPEAT:
+			continue;
+		case NF_QUEUE:
+			ret = nf_queue(skb, state, &entry, verdict);
+			if (ret == 1 && entry)
+				continue;
+			return ret;
+		default:
+			/* Implicit handling for NF_STOLEN, as well as any other
+			 * non conventional verdicts.
+			 */
+			return 0;
+		}
+	} while (entry);
+
+	return 1;
 }
 EXPORT_SYMBOL(nf_hook_slow);
 
diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h
index 9fdb655f85bc..c46d214d5323 100644
--- a/net/netfilter/nf_internals.h
+++ b/net/netfilter/nf_internals.h
@@ -11,11 +11,6 @@
 #define NFDEBUG(format, args...)
 #endif
 
-
-/* core.c */
-unsigned int nf_iterate(struct sk_buff *skb, struct nf_hook_state *state,
-			struct nf_hook_entry **entryp);
-
 /* nf_queue.c */
 int nf_queue(struct sk_buff *skb, struct nf_hook_state *state,
 	     struct nf_hook_entry **entryp, unsigned int verdict);
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 2e39e38ae1c7..77cba9f6ccb6 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -177,6 +177,26 @@ int nf_queue(struct sk_buff *skb, struct nf_hook_state *state,
 	return 0;
 }
 
+static unsigned int nf_iterate(struct sk_buff *skb,
+			       struct nf_hook_state *state,
+			       struct nf_hook_entry **entryp)
+{
+	unsigned int verdict;
+
+	do {
+repeat:
+		verdict = (*entryp)->ops.hook((*entryp)->ops.priv, skb, state);
+		if (verdict != NF_ACCEPT) {
+			if (verdict != NF_REPEAT)
+				return verdict;
+			goto repeat;
+		}
+		*entryp = rcu_dereference((*entryp)->next);
+	} while (*entryp);
+
+	return NF_ACCEPT;
+}
+
 void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
 {
 	struct nf_hook_entry *hook_entry = entry->hook;
-- 
2.1.4


^ permalink raw reply related

* [PATCH 08/39] netfilter: remove hook_entries field from nf_hook_state
From: Pablo Neira Ayuso @ 2016-11-13 22:25 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1479075933-4491-1-git-send-email-pablo@netfilter.org>

This field is only useful for nf_queue, so store it in the
nf_queue_entry structure instead, away from the core path. Pass
hook_head to nf_hook_slow().

Since we always have a valid entry on the first iteration in
nf_iterate(), we can use 'do { ... } while (entry)' loop instead.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter.h             | 10 ++++------
 include/linux/netfilter_ingress.h     |  4 ++--
 include/net/netfilter/nf_queue.h      |  1 +
 net/bridge/br_netfilter_hooks.c       |  4 ++--
 net/bridge/netfilter/ebtable_broute.c |  2 +-
 net/netfilter/core.c                  |  9 ++++-----
 net/netfilter/nf_queue.c              | 13 +++++--------
 net/netfilter/nfnetlink_queue.c       |  2 +-
 8 files changed, 20 insertions(+), 25 deletions(-)

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index e0d000f6c9bf..69230140215b 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -54,7 +54,6 @@ struct nf_hook_state {
 	struct net_device *out;
 	struct sock *sk;
 	struct net *net;
-	struct nf_hook_entry __rcu *hook_entries;
 	int (*okfn)(struct net *, struct sock *, struct sk_buff *);
 };
 
@@ -81,7 +80,6 @@ struct nf_hook_entry {
 };
 
 static inline void nf_hook_state_init(struct nf_hook_state *p,
-				      struct nf_hook_entry *hook_entry,
 				      unsigned int hook,
 				      u_int8_t pf,
 				      struct net_device *indev,
@@ -96,7 +94,6 @@ static inline void nf_hook_state_init(struct nf_hook_state *p,
 	p->out = outdev;
 	p->sk = sk;
 	p->net = net;
-	RCU_INIT_POINTER(p->hook_entries, hook_entry);
 	p->okfn = okfn;
 }
 
@@ -150,7 +147,8 @@ void nf_unregister_sockopt(struct nf_sockopt_ops *reg);
 extern struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
 #endif
 
-int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state);
+int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
+		 struct nf_hook_entry *entry);
 
 /**
  *	nf_hook - call a netfilter hook
@@ -179,10 +177,10 @@ static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net,
 	if (hook_head) {
 		struct nf_hook_state state;
 
-		nf_hook_state_init(&state, hook_head, hook, pf, indev, outdev,
+		nf_hook_state_init(&state, hook, pf, indev, outdev,
 				   sk, net, okfn);
 
-		ret = nf_hook_slow(skb, &state);
+		ret = nf_hook_slow(skb, &state, hook_head);
 	}
 	rcu_read_unlock();
 
diff --git a/include/linux/netfilter_ingress.h b/include/linux/netfilter_ingress.h
index fd44e4131710..2dc3b49b804a 100644
--- a/include/linux/netfilter_ingress.h
+++ b/include/linux/netfilter_ingress.h
@@ -26,10 +26,10 @@ static inline int nf_hook_ingress(struct sk_buff *skb)
 	if (unlikely(!e))
 		return 0;
 
-	nf_hook_state_init(&state, e, NF_NETDEV_INGRESS,
+	nf_hook_state_init(&state, NF_NETDEV_INGRESS,
 			   NFPROTO_NETDEV, skb->dev, NULL, NULL,
 			   dev_net(skb->dev), NULL);
-	return nf_hook_slow(skb, &state);
+	return nf_hook_slow(skb, &state, e);
 }
 
 static inline void nf_hook_ingress_init(struct net_device *dev)
diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h
index 2280cfe86c56..09948d10e38e 100644
--- a/include/net/netfilter/nf_queue.h
+++ b/include/net/netfilter/nf_queue.h
@@ -12,6 +12,7 @@ struct nf_queue_entry {
 	unsigned int		id;
 
 	struct nf_hook_state	state;
+	struct nf_hook_entry	*hook;
 	u16			size; /* sizeof(entry) + saved route keys */
 
 	/* extra space to store route keys */
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 7e3645fa6339..8155bd2a5138 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -1018,10 +1018,10 @@ int br_nf_hook_thresh(unsigned int hook, struct net *net,
 
 	/* We may already have this, but read-locks nest anyway */
 	rcu_read_lock();
-	nf_hook_state_init(&state, elem, hook, NFPROTO_BRIDGE, indev, outdev,
+	nf_hook_state_init(&state, hook, NFPROTO_BRIDGE, indev, outdev,
 			   sk, net, okfn);
 
-	ret = nf_hook_slow(skb, &state);
+	ret = nf_hook_slow(skb, &state, elem);
 	rcu_read_unlock();
 	if (ret == 1)
 		ret = okfn(net, sk, skb);
diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c
index 599679e3498d..8fe36dc3aab2 100644
--- a/net/bridge/netfilter/ebtable_broute.c
+++ b/net/bridge/netfilter/ebtable_broute.c
@@ -53,7 +53,7 @@ static int ebt_broute(struct sk_buff *skb)
 	struct nf_hook_state state;
 	int ret;
 
-	nf_hook_state_init(&state, NULL, NF_BR_BROUTING,
+	nf_hook_state_init(&state, NF_BR_BROUTING,
 			   NFPROTO_BRIDGE, skb->dev, NULL, NULL,
 			   dev_net(skb->dev), NULL);
 
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 64623374bc5f..ebece48b8392 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -308,7 +308,7 @@ unsigned int nf_iterate(struct sk_buff *skb,
 {
 	unsigned int verdict;
 
-	while (*entryp) {
+	do {
 repeat:
 		verdict = (*entryp)->ops.hook((*entryp)->ops.priv, skb, state);
 		if (verdict != NF_ACCEPT) {
@@ -317,20 +317,19 @@ unsigned int nf_iterate(struct sk_buff *skb,
 			goto repeat;
 		}
 		*entryp = rcu_dereference((*entryp)->next);
-	}
+	} while (*entryp);
 	return NF_ACCEPT;
 }
 
 
 /* Returns 1 if okfn() needs to be executed by the caller,
  * -EPERM for NF_DROP, 0 otherwise.  Caller must hold rcu_read_lock. */
-int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state)
+int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
+		 struct nf_hook_entry *entry)
 {
-	struct nf_hook_entry *entry;
 	unsigned int verdict;
 	int ret;
 
-	entry = rcu_dereference(state->hook_entries);
 next_hook:
 	verdict = nf_iterate(skb, state, &entry);
 	switch (verdict & NF_VERDICT_MASK) {
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 0fb38966e5bf..2e39e38ae1c7 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -108,7 +108,7 @@ void nf_queue_nf_hook_drop(struct net *net, const struct nf_hook_entry *entry)
 }
 
 static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
-		      unsigned int queuenum)
+		      struct nf_hook_entry *hook_entry, unsigned int queuenum)
 {
 	int status = -ENOENT;
 	struct nf_queue_entry *entry = NULL;
@@ -136,6 +136,7 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
 	*entry = (struct nf_queue_entry) {
 		.skb	= skb,
 		.state	= *state,
+		.hook	= hook_entry,
 		.size	= sizeof(*entry) + afinfo->route_key_size,
 	};
 
@@ -163,8 +164,7 @@ int nf_queue(struct sk_buff *skb, struct nf_hook_state *state,
 	struct nf_hook_entry *entry = *entryp;
 	int ret;
 
-	RCU_INIT_POINTER(state->hook_entries, entry);
-	ret = __nf_queue(skb, state, verdict >> NF_VERDICT_QBITS);
+	ret = __nf_queue(skb, state, entry, verdict >> NF_VERDICT_QBITS);
 	if (ret < 0) {
 		if (ret == -ESRCH &&
 		    (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS)) {
@@ -179,15 +179,12 @@ int nf_queue(struct sk_buff *skb, struct nf_hook_state *state,
 
 void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
 {
-	struct nf_hook_entry *hook_entry;
+	struct nf_hook_entry *hook_entry = entry->hook;
+	struct nf_hook_ops *elem = &hook_entry->ops;
 	struct sk_buff *skb = entry->skb;
 	const struct nf_afinfo *afinfo;
-	struct nf_hook_ops *elem;
 	int err;
 
-	hook_entry = rcu_dereference(entry->state.hook_entries);
-	elem = &hook_entry->ops;
-
 	nf_queue_entry_release_refs(entry);
 
 	/* Continue traversal iff userspace said ok... */
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 5379f788a372..1e33115b399f 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -919,7 +919,7 @@ static struct notifier_block nfqnl_dev_notifier = {
 
 static int nf_hook_cmp(struct nf_queue_entry *entry, unsigned long entry_ptr)
 {
-	return rcu_access_pointer(entry->state.hook_entries) ==
+	return rcu_access_pointer(entry->hook) ==
 		(struct nf_hook_entry *)entry_ptr;
 }
 
-- 
2.1.4


^ permalink raw reply related

* [PATCH 04/39] netfilter: deprecate NF_STOP
From: Pablo Neira Ayuso @ 2016-11-13 22:24 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1479075933-4491-1-git-send-email-pablo@netfilter.org>

NF_STOP is only used by br_netfilter these days, and it can be emulated
with a combination of NF_STOLEN plus explicit call to the ->okfn()
function as Florian suggests.

To retain binary compatibility with userspace nf_queue application, we
have to keep NF_STOP around, so libnetfilter_queue userspace userspace
applications still work if they use NF_STOP for some exotic reason.

Out of tree modules using NF_STOP would break, but we don't care about
those.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter.h  | 2 +-
 net/bridge/br_netfilter_hooks.c | 6 ++++--
 net/netfilter/core.c            | 2 +-
 3 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/include/uapi/linux/netfilter.h b/include/uapi/linux/netfilter.h
index d93f949d1d9a..7550e9176a54 100644
--- a/include/uapi/linux/netfilter.h
+++ b/include/uapi/linux/netfilter.h
@@ -13,7 +13,7 @@
 #define NF_STOLEN 2
 #define NF_QUEUE 3
 #define NF_REPEAT 4
-#define NF_STOP 5
+#define NF_STOP 5	/* Deprecated, for userspace nf_queue compatibility. */
 #define NF_MAX_VERDICT NF_STOP
 
 /* we overload the higher bits for encoding auxiliary data such as the queue
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index d0d66faebe90..7e3645fa6339 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -845,8 +845,10 @@ static unsigned int ip_sabotage_in(void *priv,
 				   struct sk_buff *skb,
 				   const struct nf_hook_state *state)
 {
-	if (skb->nf_bridge && !skb->nf_bridge->in_prerouting)
-		return NF_STOP;
+	if (skb->nf_bridge && !skb->nf_bridge->in_prerouting) {
+		state->okfn(state->net, state->sk, skb);
+		return NF_STOLEN;
+	}
 
 	return NF_ACCEPT;
 }
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index cb0232c11bc8..14f97b624f98 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -333,7 +333,7 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state)
 	entry = rcu_dereference(state->hook_entries);
 next_hook:
 	verdict = nf_iterate(skb, state, &entry);
-	if (verdict == NF_ACCEPT || verdict == NF_STOP) {
+	if (verdict == NF_ACCEPT) {
 		ret = 1;
 	} else if ((verdict & NF_VERDICT_MASK) == NF_DROP) {
 		kfree_skb(skb);
-- 
2.1.4


^ permalink raw reply related

* [PATCH 03/39] netfilter: kill NF_HOOK_THRESH() and state->tresh
From: Pablo Neira Ayuso @ 2016-11-13 22:24 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1479075933-4491-1-git-send-email-pablo@netfilter.org>

Patch c5136b15ea36 ("netfilter: bridge: add and use br_nf_hook_thresh")
introduced br_nf_hook_thresh().

Replace NF_HOOK_THRESH() by br_nf_hook_thresh from
br_nf_forward_finish(), so we have no more callers for this macro.

As a result, state->thresh and explicit thresh parameter in the hook
state structure is not required anymore. And we can get rid of
skip-hook-under-thresh loop in nf_iterate() in the core path that is
only used by br_netfilter to search for the filter hook.

Suggested-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter.h             | 50 +++++++++--------------------------
 include/linux/netfilter_ingress.h     |  2 +-
 net/bridge/br_netfilter_hooks.c       |  8 +++---
 net/bridge/netfilter/ebtable_broute.c |  2 +-
 net/netfilter/core.c                  |  4 ---
 net/netfilter/nf_queue.c              |  2 --
 6 files changed, 19 insertions(+), 49 deletions(-)

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index abc7fdcb9eb1..e0d000f6c9bf 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -49,7 +49,6 @@ struct sock;
 
 struct nf_hook_state {
 	unsigned int hook;
-	int thresh;
 	u_int8_t pf;
 	struct net_device *in;
 	struct net_device *out;
@@ -84,7 +83,7 @@ struct nf_hook_entry {
 static inline void nf_hook_state_init(struct nf_hook_state *p,
 				      struct nf_hook_entry *hook_entry,
 				      unsigned int hook,
-				      int thresh, u_int8_t pf,
+				      u_int8_t pf,
 				      struct net_device *indev,
 				      struct net_device *outdev,
 				      struct sock *sk,
@@ -92,7 +91,6 @@ static inline void nf_hook_state_init(struct nf_hook_state *p,
 				      int (*okfn)(struct net *, struct sock *, struct sk_buff *))
 {
 	p->hook = hook;
-	p->thresh = thresh;
 	p->pf = pf;
 	p->in = indev;
 	p->out = outdev;
@@ -155,20 +153,16 @@ extern struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
 int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state);
 
 /**
- *	nf_hook_thresh - call a netfilter hook
+ *	nf_hook - call a netfilter hook
  *
  *	Returns 1 if the hook has allowed the packet to pass.  The function
  *	okfn must be invoked by the caller in this case.  Any other return
  *	value indicates the packet has been consumed by the hook.
  */
-static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook,
-				 struct net *net,
-				 struct sock *sk,
-				 struct sk_buff *skb,
-				 struct net_device *indev,
-				 struct net_device *outdev,
-				 int (*okfn)(struct net *, struct sock *, struct sk_buff *),
-				 int thresh)
+static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net,
+			  struct sock *sk, struct sk_buff *skb,
+			  struct net_device *indev, struct net_device *outdev,
+			  int (*okfn)(struct net *, struct sock *, struct sk_buff *))
 {
 	struct nf_hook_entry *hook_head;
 	int ret = 1;
@@ -185,8 +179,8 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook,
 	if (hook_head) {
 		struct nf_hook_state state;
 
-		nf_hook_state_init(&state, hook_head, hook, thresh,
-				   pf, indev, outdev, sk, net, okfn);
+		nf_hook_state_init(&state, hook_head, hook, pf, indev, outdev,
+				   sk, net, okfn);
 
 		ret = nf_hook_slow(skb, &state);
 	}
@@ -195,14 +189,6 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook,
 	return ret;
 }
 
-static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net,
-			  struct sock *sk, struct sk_buff *skb,
-			  struct net_device *indev, struct net_device *outdev,
-			  int (*okfn)(struct net *, struct sock *, struct sk_buff *))
-{
-	return nf_hook_thresh(pf, hook, net, sk, skb, indev, outdev, okfn, INT_MIN);
-}
-                   
 /* Activate hook; either okfn or kfree_skb called, unless a hook
    returns NF_STOLEN (in which case, it's up to the hook to deal with
    the consequences).
@@ -221,19 +207,6 @@ static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net,
 */
 
 static inline int
-NF_HOOK_THRESH(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk,
-	       struct sk_buff *skb, struct net_device *in,
-	       struct net_device *out,
-	       int (*okfn)(struct net *, struct sock *, struct sk_buff *),
-	       int thresh)
-{
-	int ret = nf_hook_thresh(pf, hook, net, sk, skb, in, out, okfn, thresh);
-	if (ret == 1)
-		ret = okfn(net, sk, skb);
-	return ret;
-}
-
-static inline int
 NF_HOOK_COND(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk,
 	     struct sk_buff *skb, struct net_device *in, struct net_device *out,
 	     int (*okfn)(struct net *, struct sock *, struct sk_buff *),
@@ -242,7 +215,7 @@ NF_HOOK_COND(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk,
 	int ret;
 
 	if (!cond ||
-	    ((ret = nf_hook_thresh(pf, hook, net, sk, skb, in, out, okfn, INT_MIN)) == 1))
+	    ((ret = nf_hook(pf, hook, net, sk, skb, in, out, okfn)) == 1))
 		ret = okfn(net, sk, skb);
 	return ret;
 }
@@ -252,7 +225,10 @@ NF_HOOK(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk, struct
 	struct net_device *in, struct net_device *out,
 	int (*okfn)(struct net *, struct sock *, struct sk_buff *))
 {
-	return NF_HOOK_THRESH(pf, hook, net, sk, skb, in, out, okfn, INT_MIN);
+	int ret = nf_hook(pf, hook, net, sk, skb, in, out, okfn);
+	if (ret == 1)
+		ret = okfn(net, sk, skb);
+	return ret;
 }
 
 /* Call setsockopt() */
diff --git a/include/linux/netfilter_ingress.h b/include/linux/netfilter_ingress.h
index 33e37fb41d5d..fd44e4131710 100644
--- a/include/linux/netfilter_ingress.h
+++ b/include/linux/netfilter_ingress.h
@@ -26,7 +26,7 @@ static inline int nf_hook_ingress(struct sk_buff *skb)
 	if (unlikely(!e))
 		return 0;
 
-	nf_hook_state_init(&state, e, NF_NETDEV_INGRESS, INT_MIN,
+	nf_hook_state_init(&state, e, NF_NETDEV_INGRESS,
 			   NFPROTO_NETDEV, skb->dev, NULL, NULL,
 			   dev_net(skb->dev), NULL);
 	return nf_hook_slow(skb, &state);
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 2fe9345c1407..d0d66faebe90 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -561,8 +561,8 @@ static int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff
 	}
 	nf_bridge_push_encap_header(skb);
 
-	NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_FORWARD, net, sk, skb,
-		       in, skb->dev, br_forward_finish, 1);
+	br_nf_hook_thresh(NF_BR_FORWARD, net, sk, skb, in, skb->dev,
+			  br_forward_finish);
 	return 0;
 }
 
@@ -1016,8 +1016,8 @@ int br_nf_hook_thresh(unsigned int hook, struct net *net,
 
 	/* We may already have this, but read-locks nest anyway */
 	rcu_read_lock();
-	nf_hook_state_init(&state, elem, hook, NF_BR_PRI_BRNF + 1,
-			   NFPROTO_BRIDGE, indev, outdev, sk, net, okfn);
+	nf_hook_state_init(&state, elem, hook, NFPROTO_BRIDGE, indev, outdev,
+			   sk, net, okfn);
 
 	ret = nf_hook_slow(skb, &state);
 	rcu_read_unlock();
diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c
index ec94c6f1ae88..599679e3498d 100644
--- a/net/bridge/netfilter/ebtable_broute.c
+++ b/net/bridge/netfilter/ebtable_broute.c
@@ -53,7 +53,7 @@ static int ebt_broute(struct sk_buff *skb)
 	struct nf_hook_state state;
 	int ret;
 
-	nf_hook_state_init(&state, NULL, NF_BR_BROUTING, INT_MIN,
+	nf_hook_state_init(&state, NULL, NF_BR_BROUTING,
 			   NFPROTO_BRIDGE, skb->dev, NULL, NULL,
 			   dev_net(skb->dev), NULL);
 
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 76014ad72ec5..cb0232c11bc8 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -309,10 +309,6 @@ unsigned int nf_iterate(struct sk_buff *skb,
 	unsigned int verdict;
 
 	while (*entryp) {
-		if (state->thresh > (*entryp)->ops.priority) {
-			*entryp = rcu_dereference((*entryp)->next);
-			continue;
-		}
 repeat:
 		verdict = (*entryp)->ops.hook((*entryp)->ops.priv, skb, state);
 		if (verdict != NF_ACCEPT) {
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 8f08d759844a..0fb38966e5bf 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -200,8 +200,6 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
 			verdict = NF_DROP;
 	}
 
-	entry->state.thresh = INT_MIN;
-
 	if (verdict == NF_ACCEPT) {
 		hook_entry = rcu_dereference(hook_entry->next);
 		if (hook_entry)
-- 
2.1.4


^ permalink raw reply related

* [PATCH 02/39] netfilter: remove comments that predate rcu days
From: Pablo Neira Ayuso @ 2016-11-13 22:24 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1479075933-4491-1-git-send-email-pablo@netfilter.org>

We cannot block/sleep on nf_iterate because netfilter runs under rcu
read lock these days, where blocking is well-known to be illegal. So
let's remove these old comments.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/core.c | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 3d4aa96cb219..76014ad72ec5 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -308,18 +308,11 @@ unsigned int nf_iterate(struct sk_buff *skb,
 {
 	unsigned int verdict;
 
-	/*
-	 * The caller must not block between calls to this
-	 * function because of risk of continuing from deleted element.
-	 */
 	while (*entryp) {
 		if (state->thresh > (*entryp)->ops.priority) {
 			*entryp = rcu_dereference((*entryp)->next);
 			continue;
 		}
-
-		/* Optimization: we don't need to hold module
-		   reference here, since function can't sleep. --RR */
 repeat:
 		verdict = (*entryp)->ops.hook((*entryp)->ops.priv, skb, state);
 		if (verdict != NF_ACCEPT) {
-- 
2.1.4


^ permalink raw reply related

* [PATCH 01/39] netfilter: get rid of useless debugging from core
From: Pablo Neira Ayuso @ 2016-11-13 22:24 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1479075933-4491-1-git-send-email-pablo@netfilter.org>

This patch remove compile time code to catch inconventional verdicts.
We have better ways to handle this case these days, eg. pr_debug() but
even though I don't think this is useful at all, so let's remove this.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/core.c | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 004af030ef1a..3d4aa96cb219 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -323,15 +323,6 @@ unsigned int nf_iterate(struct sk_buff *skb,
 repeat:
 		verdict = (*entryp)->ops.hook((*entryp)->ops.priv, skb, state);
 		if (verdict != NF_ACCEPT) {
-#ifdef CONFIG_NETFILTER_DEBUG
-			if (unlikely((verdict & NF_VERDICT_MASK)
-							> NF_MAX_VERDICT)) {
-				NFDEBUG("Evil return from %p(%u).\n",
-					(*entryp)->ops.hook, state->hook);
-				*entryp = rcu_dereference((*entryp)->next);
-				continue;
-			}
-#endif
 			if (verdict != NF_REPEAT)
 				return verdict;
 			goto repeat;
-- 
2.1.4


^ permalink raw reply related

* Re: [PATCH] net: stmmac: Add support for ethtool::nway_reset
From: kbuild test robot @ 2016-11-13 22:19 UTC (permalink / raw)
  To: Florian Fainelli
  Cc: kbuild-all, netdev, davem, Florian Fainelli, Giuseppe Cavallaro,
	Alexandre Torgue, open list
In-Reply-To: <20161113212444.3777-1-f.fainelli@gmail.com>

[-- Attachment #1: Type: text/plain, Size: 1681 bytes --]

Hi Florian,

[auto build test WARNING on net-next/master]
[also build test WARNING on next-20161111]
[cannot apply to v4.9-rc5]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Florian-Fainelli/net-stmmac-Add-support-for-ethtool-nway_reset/20161114-053015
config: x86_64-kexec (attached as .config)
compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901
reproduce:
        # save the attached .config to linux build tree
        make ARCH=x86_64 

All warnings (new ones prefixed by >>):

   drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c: In function 'stmmac_nway_reset':
>> drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c:867:22: warning: unused variable 'priv' [-Wunused-variable]
     struct stmmac_priv *priv = netdev_priv(dev);
                         ^~~~

vim +/priv +867 drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c

   851		int ret = 0;
   852	
   853		switch (tuna->id) {
   854		case ETHTOOL_RX_COPYBREAK:
   855			priv->rx_copybreak = *(u32 *)data;
   856			break;
   857		default:
   858			ret = -EINVAL;
   859			break;
   860		}
   861	
   862		return ret;
   863	}
   864	
   865	static int stmmac_nway_reset(struct net_device *dev)
   866	{
 > 867		struct stmmac_priv *priv = netdev_priv(dev);
   868	
   869		if (!dev->phydev)
   870			return -ENODEV;
   871	
   872		return genphy_restart_aneg(dev->phydev);
   873	}
   874	
   875	static const struct ethtool_ops stmmac_ethtool_ops = {

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 24265 bytes --]

^ permalink raw reply

* Re: [PATCH] net: stmmac: Add support for ethtool::nway_reset
From: Florian Fainelli @ 2016-11-13 21:35 UTC (permalink / raw)
  To: netdev; +Cc: davem, Giuseppe Cavallaro, Alexandre Torgue, open list
In-Reply-To: <20161113212444.3777-1-f.fainelli@gmail.com>

Le 13/11/2016 à 13:24, Florian Fainelli a écrit :
> If we have a PHY device, just invoke genphy_restart_aneg() to restart
> auto-negotiation.
> 
> Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>

David, please drop this patch for now, since I have another one pending
which is going to touch the net_device/phydev interaction, this one also
causes a build warning since priv is not used.

Thank you!
-- 
Florian

^ permalink raw reply

* Re: [PATCH] netfilter: x_tables: simplify IS_ERR_OR_NULL to NULL test
From: Pablo Neira Ayuso @ 2016-11-13 21:27 UTC (permalink / raw)
  To: Julia Lawall
  Cc: Florian Westphal, David S. Miller, kernel-janitors, linux-kernel,
	netdev, coreteam, netfilter-devel, Hideaki YOSHIFUJI,
	James Morris, Alexey Kuznetsov, Jozsef Kadlecsik, Patrick McHardy,
	christophe.jaillet
In-Reply-To: <1478867558-21650-1-git-send-email-Julia.Lawall@lip6.fr>

On Fri, Nov 11, 2016 at 01:32:38PM +0100, Julia Lawall wrote:
> Since commit 7926dbfa4bc1 ("netfilter: don't use
> mutex_lock_interruptible()"), the function xt_find_table_lock can only
> return NULL on an error.  Simplify the call sites and update the
> comment before the function.

Applied, thanks Julia!

^ permalink raw reply

* [PATCH] net: stmmac: Add support for ethtool::nway_reset
From: Florian Fainelli @ 2016-11-13 21:24 UTC (permalink / raw)
  To: netdev
  Cc: davem, Florian Fainelli, Giuseppe Cavallaro, Alexandre Torgue,
	open list

If we have a PHY device, just invoke genphy_restart_aneg() to restart
auto-negotiation.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
index 3fe9340b748f..7a487c9ccdea 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
@@ -862,6 +862,16 @@ static int stmmac_set_tunable(struct net_device *dev,
 	return ret;
 }
 
+static int stmmac_nway_reset(struct net_device *dev)
+{
+	struct stmmac_priv *priv = netdev_priv(dev);
+
+	if (!dev->phydev)
+		return -ENODEV;
+
+	return genphy_restart_aneg(dev->phydev);
+}
+
 static const struct ethtool_ops stmmac_ethtool_ops = {
 	.begin = stmmac_check_if_running,
 	.get_drvinfo = stmmac_ethtool_getdrvinfo,
@@ -886,6 +896,7 @@ static const struct ethtool_ops stmmac_ethtool_ops = {
 	.set_tunable = stmmac_set_tunable,
 	.get_link_ksettings = stmmac_ethtool_get_link_ksettings,
 	.set_link_ksettings = stmmac_ethtool_set_link_ksettings,
+	.nway_reset = stmmac_nway_reset,
 };
 
 void stmmac_set_ethtool_ops(struct net_device *netdev)
-- 
2.9.3

^ permalink raw reply related

* Re: [PATCH v2] ip6_output: ensure flow saddr actually belongs to device
From: David Ahern @ 2016-11-13 20:45 UTC (permalink / raw)
  To: Jason A. Donenfeld, Netdev, WireGuard mailing list, LKML,
	YOSHIFUJI Hideaki, Hannes Frederic Sowa
In-Reply-To: <20161113190251.1027-1-Jason@zx2c4.com>

On 11/13/16 12:02 PM, Jason A. Donenfeld wrote:
> This puts the IPv6 routing functions in parity with the IPv4 routing
> functions. Namely, we now check in v6 that if a flowi6 requests an
> saddr, the returned dst actually corresponds to a net device that has
> that saddr. This mirrors the v4 logic with __ip_dev_find in
> __ip_route_output_key_hash. In the event that the returned dst is not
> for a dst with a dev that has the saddr, we return -EINVAL, just like
> v4; this makes it easy to use the same error handlers for both cases.
> 
> Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
> Cc: David Ahern <dsa@cumulusnetworks.com>
> ---
> Changes from v1:
>    This moves the check to the top and now sees if it's a valid address
>    on _any_ device, not just the one in dst.
> 
>  include/net/ipv6.h    |  2 ++
>  net/ipv6/ip6_output.c | 28 ++++++++++++++++++++++++++++
>  2 files changed, 30 insertions(+)
> 
> diff --git a/include/net/ipv6.h b/include/net/ipv6.h
> index 8fed1cd..e5dc14f 100644
> --- a/include/net/ipv6.h
> +++ b/include/net/ipv6.h
> @@ -914,6 +914,8 @@ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
>  					 const struct in6_addr *final_dst);
>  struct dst_entry *ip6_blackhole_route(struct net *net,
>  				      struct dst_entry *orig_dst);
> +struct net_device *__ip6_dev_find(struct net *net, struct in6_addr *addr,
> +				  bool devref);
>  
>  /*
>   *	skb processing functions
> diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
> index 6001e78..371170b 100644
> --- a/net/ipv6/ip6_output.c
> +++ b/net/ipv6/ip6_output.c
> @@ -916,6 +916,30 @@ static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
>  	return dst;
>  }
>  
> +/**
> + * __ip6_dev_find - find the first device with a given source address.
> + * @net: the net namespace
> + * @addr: the source address
> + * @devref: if true, take a reference on the found device
> + *
> + * If a caller uses devref=false, it should be protected by RCU, or RTNL
> + */
> +struct net_device *__ip6_dev_find(struct net *net, struct in6_addr *addr, bool devref)
> +{
> +	struct net_device *result;
> +
> +	rcu_read_lock();
> +	for_each_netdev_rcu(net, result) {
> +		if (ipv6_chk_addr(net, addr, result, 1))
> +			break;
> +	}
> +	if (result && devref)
> +		dev_hold(result);
> +	rcu_read_unlock();
> +	return result;
> +}
> +EXPORT_SYMBOL(__ip6_dev_find);

You don't need a new function to walk all interfaces; just use ipv6_chk_addr with a dev arg of NULL. IPv6 has a hash table with all unicast addresses -- inet6_addr_lst. ipv6_chk_addr is checking that list for the address in question. The actual device is not relevant for verifying the address is a valid local one (though the device can be returned from ifp->idev->dev if ever needed).

So drop the above ...

> +
>  static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
>  			       struct dst_entry **dst, struct flowi6 *fl6)
>  {
> @@ -926,6 +950,10 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
>  	int err;
>  	int flags = 0;
>  
> +	if (!ipv6_addr_any(&fl6->saddr) &&
> +	    !__ip6_dev_find(net, &fl6->saddr, false))

... and just use ipv6_chk_addr here.

> +		return -EINVAL;
> +
>  	/* The correct way to handle this would be to do
>  	 * ip6_route_get_saddr, and then ip6_route_output; however,
>  	 * the route-specific preferred source forces the
> 

^ permalink raw reply

* Re: [PATCH] ip6_output: ensure flow saddr actually belongs to device
From: David Ahern @ 2016-11-13 20:39 UTC (permalink / raw)
  To: Jason A. Donenfeld
  Cc: Netdev, Hannes Frederic Sowa, LKML, WireGuard mailing list,
	YOSHIFUJI Hideaki
In-Reply-To: <CAHmME9rV_Qqj_QNKOARav6vOyyF-__E07saChPYo9MF6wED83A@mail.gmail.com>

On 11/13/16 1:19 PM, Jason A. Donenfeld wrote:
> I gave v2 my best shot. Hopefully it's adequate, but I have a feeling
> it might be best for you to just code up what you have in mind.

nah, you are doing fine. one more comment on v2.

^ permalink raw reply

* Re: [PATCH net 2/2] r8152: rx descriptor check
From: Mark Lord @ 2016-11-13 20:38 UTC (permalink / raw)
  To: David Miller, hayeswang-Rasf1IRRPZFBDgjK7y7TUQ
  Cc: netdev-u79uwXL29TY76Z2rM5mHXA, nic_swsd-Rasf1IRRPZFBDgjK7y7TUQ,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	linux-usb-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <cd36fce8-2f17-debd-d514-e3cbe7ed3762-e+AXbWqSrlAAvxtiuMwx3w@public.gmane.org>

On 16-11-13 03:34 PM, Mark Lord wrote:
>
> The system I use it with is a 32-bit ppc476, with non-coherent RAM,
> and using 16KB page sizes.
> 
> The dongle instantly becomes a lot more reliable when r8152.c is updated
> to use usb_alloc_coherent() for URB buffers, rather than kmalloc().
> 
> Not sure why that would be though, as the USB stack normally would handle
> kmalloc'd buffers just fine.  It is calling the appropriate routines,
> which boil down to invalidating the dcache lines (for inbound bulk xfers)
> as part of usb_submit_urb(), and yet the problem there persists.
> 
> It could be caused by cache-line sharing with other allocations, but that seems
> unlikely as the kmalloc() size is 16384 bytes per buffer.  Perhaps the driver
> is somehow accessing the buffer space again after doing usb_submit_urb()?
> That would certainly produce this kind of behaviour.
> 
> Or maybe there's just a memory barrier missing somewhere in path.
> 
> The really weird thing is that ASIX-based dongles (which use a different driver)
> don't have this problem, and yet they also use kmalloc'd buffers.
> 
> I have access to the test system only for a day or two a week,
> and it takes a few hours to do a good test as to whether something helps or not.
> I'll continue to poke at it as time and New Ideas permit.

Oh, and the problems did not exist with the 3.14.xx kernels and earlier.
They began to show up when we tried 3.16.xx and all newer kernels.

The difference there is that RX checksums were enabled in hardware as of 3.16.xx,
and thus the network stack began accepting bad packets from the r8152 driver.

I don't know if the ASIX driver uses hardware checksums or just software checksums.
That might explain why it is more reliable here.
-- 
Mark Lord
Real-Time Remedies Inc.
mlord-e+AXbWqSrlAAvxtiuMwx3w@public.gmane.org
--
To unsubscribe from this list: send the line "unsubscribe linux-usb" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH net 2/2] r8152: rx descriptor check
From: Mark Lord @ 2016-11-13 20:34 UTC (permalink / raw)
  To: David Miller, hayeswang; +Cc: netdev, nic_swsd, linux-kernel, linux-usb
In-Reply-To: <20161113.123954.2134945576362221851.davem@davemloft.net>

On 16-11-13 12:39 PM, David Miller wrote:
> From: Hayes Wang <hayeswang@realtek.com>
> Date: Fri, 11 Nov 2016 15:15:41 +0800
> 
>> For some platforms, the data in memory is not the same with the one
>> from the device. That is, the data of memory is unbelievable. The
>> check is used to find out this situation.
>>
>> Signed-off-by: Hayes Wang <hayeswang@realtek.com>
> 
> I'm all for adding consistency checks, but I disagree with proceeding
> in this manner for this.
> 
> If you add this patch now, there is a much smaller likelyhood that you
> will work with a high priority to figure out _why_ this is happening.
> 
> For all we know this could be a platform bug in the DMA API for the
> systems in question.
> 
> It could also be a bug elsewhere in the driver, either in setting up
> the descriptor DMA mappings or how the chip is programmed.
> 
> Either way the true cause must be found before we start throwing
> changes like this into the driver.

I agree.

The system I use it with is a 32-bit ppc476, with non-coherent RAM,
and using 16KB page sizes.

The dongle instantly becomes a lot more reliable when r8152.c is updated
to use usb_alloc_coherent() for URB buffers, rather than kmalloc().

Not sure why that would be though, as the USB stack normally would handle
kmalloc'd buffers just fine.  It is calling the appropriate routines,
which boil down to invalidating the dcache lines (for inbound bulk xfers)
as part of usb_submit_urb(), and yet the problem there persists.

It could be caused by cache-line sharing with other allocations, but that seems
unlikely as the kmalloc() size is 16384 bytes per buffer.  Perhaps the driver
is somehow accessing the buffer space again after doing usb_submit_urb()?
That would certainly produce this kind of behaviour.

Or maybe there's just a memory barrier missing somewhere in path.

The really weird thing is that ASIX-based dongles (which use a different driver)
don't have this problem, and yet they also use kmalloc'd buffers.

I have access to the test system only for a day or two a week,
and it takes a few hours to do a good test as to whether something helps or not.
I'll continue to poke at it as time and New Ideas permit.

New Ideas welcome!
-- 
Mark Lord
Real-Time Remedies Inc.
mlord@pobox.com

^ permalink raw reply

* Re: [PATCH net-next 00/11] Start adding support for mv88e6390 family
From: Andrew Lunn @ 2016-11-13 20:24 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, vivien.didelot
In-Reply-To: <20161113.004859.749028544003333521.davem@davemloft.net>

On Sun, Nov 13, 2016 at 12:48:59AM -0500, David Miller wrote:
> From: Andrew Lunn <andrew@lunn.ch>
> Date: Fri, 11 Nov 2016 03:53:32 +0100
> 
> > This is the first patchset implementing support for the mv88e6390
> > family.  This is a new generation of switch devices and has numerous
> > incompatible changes to the registers. These patches allow the switch
> > to the detected during probe, and makes the statistics unit work.
> > 
> > These patches are insufficient to make the mv88e6390 functional. More
> > patches will follow.
> 
> Andrew, this series doesn't apply cleanly to net-next, so you'll
> need to respin.

Hi David

I'm happy to respin, but i'm wondering why the don't apply.

What seems to be the issue is you said you have accepted:

[PATCH net-next 0/2] Fixes for port refactoring
https://marc.info/?l=linux-netdev&m=147880114928996&w=1

Yet i don't see these in net-next. And i based this patchset on a tree
which included the fixes. Hence they are not applying.

Have the fixes really been accepted?

Thanks
	Andrew


 

^ permalink raw reply

* Re: [PATCH] ip6_output: ensure flow saddr actually belongs to device
From: Jason A. Donenfeld @ 2016-11-13 20:19 UTC (permalink / raw)
  To: David Ahern
  Cc: Netdev, WireGuard mailing list, LKML, YOSHIFUJI Hideaki,
	Hannes Frederic Sowa
In-Reply-To: <405b2e79-854d-4c30-07b0-bd524137d2f6@cumulusnetworks.com>

Hi David,

On Sun, Nov 13, 2016 at 5:30 PM, David Ahern <dsa@cumulusnetworks.com> wrote:
> You can't require the address to be on the dst device. e.g., it can be an address from the loopback/vrf device.
>
> This block needs to be done at function entry, and pass dev as NULL to mean is the address assigned to any interface. That gets you the equivalency of the IPv4 check.

I gave v2 my best shot. Hopefully it's adequate, but I have a feeling
it might be best for you to just code up what you have in mind.

Regards,
Jason

^ permalink raw reply

* Re: [patch net v2 0/2] mlxsw: Couple of fixes
From: Jiri Pirko @ 2016-11-13 20:00 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, yotamg, arkadis, idosch, eladr, nogahf, ogerlitz
In-Reply-To: <20161113.125133.1226481580321868668.davem@davemloft.net>

Sun, Nov 13, 2016 at 06:51:33PM CET, davem@davemloft.net wrote:
>From: Jiri Pirko <jiri@resnulli.us>
>Date: Fri, 11 Nov 2016 16:34:24 +0100
>
>> From: Jiri Pirko <jiri@mellanox.com>
>> 
>> Please, queue-up both for stable. Thanks!
>
>Just to be clear I did make sure to take v2 rather than
>v1.

Good. Thanks!

^ permalink raw reply

* Re: [PATCH net-next v2] ipv6: sr: fix IPv6 initialization failure without lwtunnels
From: David Lebrun @ 2016-11-13 19:59 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, lorenzo
In-Reply-To: <20161113.002348.81553025732356797.davem@davemloft.net>

[-- Attachment #1: Type: text/plain, Size: 1929 bytes --]

On 11/13/2016 06:23 AM, David Miller wrote:
> This seems like such a huge mess, quite frankly.
> 
> IPV6-SR has so many strange dependencies, a weird Kconfig option that is
> simply controlling what a responsible sysadmin should be allow to do if
> he chooses anyways.
> 
> Every distribution is going to say "¯\_(ツ)_/¯" and just turn the thing
> on in their builds.

Indeed, the issue is that seg6_iptunnel.o was included in obj-y instead
of ipv6-y, triggering the bug when CONFIG_IPV6=m. Fixed with the
following modification to the patch (tested with allyesconfig and
allmodconfig):

diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 8979d53..a233136 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -53,6 +53,6 @@ obj-$(subst m,y,$(CONFIG_IPV6)) += inet6_hashtables.o

 ifneq ($(CONFIG_IPV6),)
 obj-$(CONFIG_NET_UDP_TUNNEL) += ip6_udp_tunnel.o
-obj-$(CONFIG_LWTUNNEL) += seg6_iptunnel.o
+ipv6-$(CONFIG_LWTUNNEL) += seg6_iptunnel.o
 obj-y += mcast_snoop.o
 endif

I agree with you that the way to combine the dependencies is strange,
even if they are very few. The part of the IPv6-SR patch that is enabled
by default depends on two things: IPV6 and LWTUNNEL. The problem is that
LWTUNNEL does not depend on IPV6 and is not necessarily enabled. To fix
the bug reported by Lorenzo, I propose to select one the three following
solutions:

1. Make LWTUNNEL always enabled (removing the option).
   Pros: remove an option
   Cons: add always-enabled code

2. Create an option IPV6_SEG6_LWTUNNEL, which would select LWTUNNEL and
enable the compilation of seg6_iptunnel.o.
   Pros: logically dissociate the part of IPv6-SR that depends on
LWTUNNEL from the core patch and simplifies compilation
   Cons: add an option

3. Apply the proposed patch with the fix
   Pros: do not modify options
   Cons: weird conditional compilation

What do you think ?

David


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 163 bytes --]

^ permalink raw reply related

* Re: [net-next 1/2] net: ethernet: slicoss: add slicoss gigabit ethernet driver
From: Andrew Lunn @ 2016-11-13 19:55 UTC (permalink / raw)
  To: Lino Sanfilippo
  Cc: davem, charrer, liodot, gregkh, devel, linux-kernel, netdev
In-Reply-To: <1479012453-19410-2-git-send-email-LinoSanfilippo@gmx.de>

> +static const char slic_stats_strings[][ETH_GSTRING_LEN] = {
> +	"rx_packets     ",
> +	"rx_bytes       ",
> +	"rx_multicasts  ",
> +	"rx_errors      ",
> +	"rx_buff_miss   ",
> +	"rx_tp_csum     ",
> +	"rx_tp_oflow    ",
> +	"rx_tp_hlen     ",
> +	"rx_ip_csum     ",
> +	"rx_ip_len      ",

Are there any other drivers which pad the statistics strings?

> +static void slic_set_link_autoneg(struct slic_device *sdev)
> +{
> +	unsigned int subid = sdev->pdev->subsystem_device;
> +	u32 val;
> +
> +	if (sdev->is_fiber) {
> +		/* We've got a fiber gigabit interface, and register 4 is
> +		 * different in fiber mode than in copper mode.
> +		 */
> +		/* advertise FD only @1000 Mb */
> +		val = MII_ADVERTISE << 16 | SLIC_PAR_ADV1000XFD |
> +		      SLIC_PAR_ASYMPAUSE_FIBER;
> +		/* enable PAUSE frames */
> +		slic_write(sdev, SLIC_REG_WPHY, val);
> +		/* reset phy, enable auto-neg  */
> +		val = MII_BMCR << 16 | SLIC_PCR_RESET | SLIC_PCR_AUTONEG |
> +		      SLIC_PCR_AUTONEG_RST;
> +		slic_write(sdev, SLIC_REG_WPHY, val);
> +	} else {	/* copper gigabit */
> +		/* We've got a copper gigabit interface, and register 4 is
> +		 * different in copper mode than in fiber mode.
> +		 */
> +		/* advertise 10/100 Mb modes   */
> +		val = MII_ADVERTISE << 16 | SLIC_PAR_ADV100FD |
> +		      SLIC_PAR_ADV100HD | SLIC_PAR_ADV10FD | SLIC_PAR_ADV10HD;
> +		/* enable PAUSE frames  */
> +		val |= SLIC_PAR_ASYMPAUSE;
> +		/* required by the Cicada PHY  */
> +		val |= SLIC_PAR_802_3;
> +		slic_write(sdev, SLIC_REG_WPHY, val);
> +
> +		/* advertise FD only @1000 Mb  */
> +		val = MII_CTRL1000 << 16 | SLIC_PGC_ADV1000FD;
> +		slic_write(sdev, SLIC_REG_WPHY, val);
> +
> +		if (subid != PCI_SUBDEVICE_ID_ALACRITECH_CICADA) {
> +			 /* if a Marvell PHY enable auto crossover */
> +			val = SLIC_MIICR_REG_16 | SLIC_MRV_REG16_XOVERON;
> +			slic_write(sdev, SLIC_REG_WPHY, val);
> +
> +			/* reset phy, enable auto-neg  */
> +			val = MII_BMCR << 16 | SLIC_PCR_RESET |
> +			      SLIC_PCR_AUTONEG | SLIC_PCR_AUTONEG_RST;
> +			slic_write(sdev, SLIC_REG_WPHY, val);
> +		} else {
> +			/* enable and restart auto-neg (don't reset)  */
> +			val = MII_BMCR << 16 | SLIC_PCR_AUTONEG |
> +			      SLIC_PCR_AUTONEG_RST;
> +			slic_write(sdev, SLIC_REG_WPHY, val);
> +		}
> +	}
> +	sdev->autoneg = true;
> +}

Could this be pulled out into a standard PHY driver? All the SLIC
SLIC_PCR_ defines seems to be the same as those in mii.h. This could
be a standard PHY hidden behind a single register.

   Andrew

^ permalink raw reply

* Re: Debugging Ethernet issues
From: Florian Fainelli @ 2016-11-13 19:55 UTC (permalink / raw)
  To: Mason, Andrew Lunn
  Cc: netdev, Mans Rullgard, Sergei Shtylyov, Tom Lendacky, Zach Brown,
	Shaohui Xie, Tim Beale, Brian Hill, Vince Bridgers,
	Balakumaran Kannan, David S. Miller, Sebastian Frias,
	Kirill Kapranov
In-Reply-To: <5828C452.6050808@free.fr>

Le 13/11/2016 à 11:51, Mason a écrit :
> On 13/11/2016 04:09, Andrew Lunn wrote:
> 
>> Mason wrote:
>>
>>> When connected to a Gigabit switch
>>> 3.4 negotiates a LAN DHCP setup instantly
>>> 4.7 requires over 5 seconds to do so
>>
>> When you run tcpdump on the DHCP server, are you noticing the first
>> request is missing?
>>
>> What can happen is the dhclient gets started immediately and sends out
>> its first request before auto-negotiation has finished. So this first packet
>> gets lost. The retransmit after a few seconds is then successful.
> 
> I will run tcpdump on the server as I run udhcpc on the client
> for Linux 3.4 vs 4.7
> 
> Do you know what would make auto-negotiation fail at 100 Mbps
> on 4.7? (whereas it succeeds on 3.4)
> 
> (Thinking out loud) If the problem were in auto-negotiation,
> then if should work if I hard-code speed and duplex using
> ethtool, right? (IIRC, hard-coding doesn't help.)

I would start with checking basic things:

- does your Ethernet driver get a link UP being reported correctly
(netif_carrier_ok returns 1)?
- if you let the bootloader configure the PHY and utilize the Generic
PHY driver instead of the Atheros PHY driver, does the problem appear as
well?
- what do transmit/receive counters on the Ethernet driver/MAC return?
-- 
Florian

^ permalink raw reply

* Re: Debugging Ethernet issues
From: Mason @ 2016-11-13 19:51 UTC (permalink / raw)
  To: Andrew Lunn
  Cc: netdev, Florian Fainelli, Mans Rullgard, Sergei Shtylyov,
	Tom Lendacky, Zach Brown, Shaohui Xie, Tim Beale, Brian Hill,
	Vince Bridgers, Balakumaran Kannan, David S. Miller,
	Sebastian Frias, Kirill Kapranov
In-Reply-To: <20161113030919.GA2892@lunn.ch>

On 13/11/2016 04:09, Andrew Lunn wrote:

> Mason wrote:
>
>> When connected to a Gigabit switch
>> 3.4 negotiates a LAN DHCP setup instantly
>> 4.7 requires over 5 seconds to do so
> 
> When you run tcpdump on the DHCP server, are you noticing the first
> request is missing?
> 
> What can happen is the dhclient gets started immediately and sends out
> its first request before auto-negotiation has finished. So this first packet
> gets lost. The retransmit after a few seconds is then successful.

I will run tcpdump on the server as I run udhcpc on the client
for Linux 3.4 vs 4.7

Do you know what would make auto-negotiation fail at 100 Mbps
on 4.7? (whereas it succeeds on 3.4)

(Thinking out loud) If the problem were in auto-negotiation,
then if should work if I hard-code speed and duplex using
ethtool, right? (IIRC, hard-coding doesn't help.)

Regards.

^ permalink raw reply

* Re: stmmac/RTL8211F/Meson GXBB: TX throughput problems
From: André Roth @ 2016-11-13 19:20 UTC (permalink / raw)
  To: Jerome Brunet
  Cc: Martin Blumenstingl, Giuseppe CAVALLARO, Johnson Leung, netdev,
	Alexandre Torgue, linux-amlogic
In-Reply-To: <1478190980.6632.26.camel@baylibre.com>


Hi all,

> To everybody having similar issue with their OdroidC2, could you try
> the attached patch and let us know if it changes anything for you ?

I can confirm that this patch removes the problem, I have now constant
~300Mb/s in both directions without any issue ! :)

I used the v4.10/integ branch, which shows the problem without applying
this patch. 

Thanks for your work,

 André 

^ permalink raw reply

* Re: stmmac/RTL8211F/Meson GXBB: TX throughput problems
From: André Roth @ 2016-11-13 19:13 UTC (permalink / raw)
  To: Jerome Brunet
  Cc: Martin Blumenstingl, Johnson Leung, Giuseppe CAVALLARO,
	linux-amlogic, Alexandre Torgue, netdev
In-Reply-To: <1478192276.6632.34.camel@baylibre.com>


> Andre, the 3.14 kernel you are talking, is it this one ? : 
> https://github.com/hardkernel/linux/tree/odroidc2-3.14.y

yes
 
> Because in drivers/net/phy/realtek.c, they disable EEE, but
> also 1000Base-T Full Duplex advertisement ?
> 
> +	/* disable 1000m adv*/
> +	val = phy_read(phydev, 0x9);
> +	phy_write(phydev, 0x9, val&(~(1<<9)));
> 
> If this is the kernel you are running, you should not be able to have
> ethernet at 1000MB/s ? Or is it in half duplex mode ?

ethtool shows 1000Mb/s Full-Duplex and the bandwith is around 300Mb/s
(as measured by scp). kernel version: 3.14.65-73

^ permalink raw reply

* [PATCH v2] ip6_output: ensure flow saddr actually belongs to device
From: Jason A. Donenfeld @ 2016-11-13 19:02 UTC (permalink / raw)
  To: David Ahern, Netdev, WireGuard mailing list, LKML,
	YOSHIFUJI Hideaki, Hannes Frederic Sowa
In-Reply-To: <405b2e79-854d-4c30-07b0-bd524137d2f6@cumulusnetworks.com>

This puts the IPv6 routing functions in parity with the IPv4 routing
functions. Namely, we now check in v6 that if a flowi6 requests an
saddr, the returned dst actually corresponds to a net device that has
that saddr. This mirrors the v4 logic with __ip_dev_find in
__ip_route_output_key_hash. In the event that the returned dst is not
for a dst with a dev that has the saddr, we return -EINVAL, just like
v4; this makes it easy to use the same error handlers for both cases.

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Cc: David Ahern <dsa@cumulusnetworks.com>
---
Changes from v1:
   This moves the check to the top and now sees if it's a valid address
   on _any_ device, not just the one in dst.

 include/net/ipv6.h    |  2 ++
 net/ipv6/ip6_output.c | 28 ++++++++++++++++++++++++++++
 2 files changed, 30 insertions(+)

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 8fed1cd..e5dc14f 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -914,6 +914,8 @@ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
 					 const struct in6_addr *final_dst);
 struct dst_entry *ip6_blackhole_route(struct net *net,
 				      struct dst_entry *orig_dst);
+struct net_device *__ip6_dev_find(struct net *net, struct in6_addr *addr,
+				  bool devref);
 
 /*
  *	skb processing functions
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 6001e78..371170b 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -916,6 +916,30 @@ static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
 	return dst;
 }
 
+/**
+ * __ip6_dev_find - find the first device with a given source address.
+ * @net: the net namespace
+ * @addr: the source address
+ * @devref: if true, take a reference on the found device
+ *
+ * If a caller uses devref=false, it should be protected by RCU, or RTNL
+ */
+struct net_device *__ip6_dev_find(struct net *net, struct in6_addr *addr, bool devref)
+{
+	struct net_device *result;
+
+	rcu_read_lock();
+	for_each_netdev_rcu(net, result) {
+		if (ipv6_chk_addr(net, addr, result, 1))
+			break;
+	}
+	if (result && devref)
+		dev_hold(result);
+	rcu_read_unlock();
+	return result;
+}
+EXPORT_SYMBOL(__ip6_dev_find);
+
 static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
 			       struct dst_entry **dst, struct flowi6 *fl6)
 {
@@ -926,6 +950,10 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
 	int err;
 	int flags = 0;
 
+	if (!ipv6_addr_any(&fl6->saddr) &&
+	    !__ip6_dev_find(net, &fl6->saddr, false))
+		return -EINVAL;
+
 	/* The correct way to handle this would be to do
 	 * ip6_route_get_saddr, and then ip6_route_output; however,
 	 * the route-specific preferred source forces the
-- 
2.10.2

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox