Netdev List
 help / color / mirror / Atom feed
* [PATCH 11/13] netfilter: nf_conntrack_l4proto_dccp[4,6] cleanup
From: Gao feng @ 2012-06-21 14:36 UTC (permalink / raw)
  To: pablo; +Cc: netdev, netfilter-devel, Gao feng
In-Reply-To: <1340289410-17642-1-git-send-email-gaofeng@cn.fujitsu.com>

some cleanup of nf_conntrack_l4proto_dccp[4,6],
make codes more clearer and ready for moving the
sysctl code to nf_conntrack_proto_*_sysctl.c to
reduce the ifdef pollution.

and use nf_proto_net.users to identify if it's the first time
we use the nf_proto_net. when it's the first time,we will
initialized it.

Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com>
---
 net/netfilter/nf_conntrack_proto_dccp.c |   54 +++++++++++++++++--------------
 1 files changed, 30 insertions(+), 24 deletions(-)

diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index 52da8f0..6535326 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -387,7 +387,7 @@ dccp_state_table[CT_DCCP_ROLE_MAX + 1][DCCP_PKT_SYNCACK + 1][CT_DCCP_MAX + 1] =
 /* this module per-net specifics */
 static int dccp_net_id __read_mostly;
 struct dccp_net {
-	struct nf_proto_net np;
+	struct nf_proto_net pn;
 	int dccp_loose;
 	unsigned int dccp_timeout[CT_DCCP_MAX + 1];
 };
@@ -815,16 +815,37 @@ static struct ctl_table dccp_sysctl_table[] = {
 };
 #endif /* CONFIG_SYSCTL */
 
+static int dccp_kmemdup_sysctl_table(struct nf_proto_net *pn,
+				     struct dccp_net *dn)
+{
+#ifdef CONFIG_SYSCTL
+	if (pn->ctl_table)
+		return 0;
+
+	pn->ctl_table = kmemdup(dccp_sysctl_table,
+				sizeof(dccp_sysctl_table),
+				GFP_KERNEL);
+	if (!pn->ctl_table)
+		return -ENOMEM;
+
+	pn->ctl_table[0].data = &dn->dccp_timeout[CT_DCCP_REQUEST];
+	pn->ctl_table[1].data = &dn->dccp_timeout[CT_DCCP_RESPOND];
+	pn->ctl_table[2].data = &dn->dccp_timeout[CT_DCCP_PARTOPEN];
+	pn->ctl_table[3].data = &dn->dccp_timeout[CT_DCCP_OPEN];
+	pn->ctl_table[4].data = &dn->dccp_timeout[CT_DCCP_CLOSEREQ];
+	pn->ctl_table[5].data = &dn->dccp_timeout[CT_DCCP_CLOSING];
+	pn->ctl_table[6].data = &dn->dccp_timeout[CT_DCCP_TIMEWAIT];
+	pn->ctl_table[7].data = &dn->dccp_loose;
+#endif
+	return 0;
+}
+
 static int dccp_init_net(struct net *net, u_int16_t proto)
 {
 	struct dccp_net *dn = dccp_pernet(net);
-	struct nf_proto_net *pn = (struct nf_proto_net *)dn;
+	struct nf_proto_net *pn = &dn->pn;
 
-#ifdef CONFIG_SYSCTL
-	if (!pn->ctl_table) {
-#else
-	if (!pn->users++) {
-#endif
+	if (!pn->users) {
 		/* default values */
 		dn->dccp_loose = 1;
 		dn->dccp_timeout[CT_DCCP_REQUEST]	= 2 * DCCP_MSL;
@@ -834,24 +855,9 @@ static int dccp_init_net(struct net *net, u_int16_t proto)
 		dn->dccp_timeout[CT_DCCP_CLOSEREQ]	= 64 * HZ;
 		dn->dccp_timeout[CT_DCCP_CLOSING]	= 64 * HZ;
 		dn->dccp_timeout[CT_DCCP_TIMEWAIT]	= 2 * DCCP_MSL;
-#ifdef CONFIG_SYSCTL
-		pn->ctl_table = kmemdup(dccp_sysctl_table,
-					sizeof(dccp_sysctl_table),
-					GFP_KERNEL);
-		if (!pn->ctl_table)
-			return -ENOMEM;
-
-		pn->ctl_table[0].data = &dn->dccp_timeout[CT_DCCP_REQUEST];
-		pn->ctl_table[1].data = &dn->dccp_timeout[CT_DCCP_RESPOND];
-		pn->ctl_table[2].data = &dn->dccp_timeout[CT_DCCP_PARTOPEN];
-		pn->ctl_table[3].data = &dn->dccp_timeout[CT_DCCP_OPEN];
-		pn->ctl_table[4].data = &dn->dccp_timeout[CT_DCCP_CLOSEREQ];
-		pn->ctl_table[5].data = &dn->dccp_timeout[CT_DCCP_CLOSING];
-		pn->ctl_table[6].data = &dn->dccp_timeout[CT_DCCP_TIMEWAIT];
-		pn->ctl_table[7].data = &dn->dccp_loose;
-#endif
 	}
-	return 0;
+
+	return dccp_kmemdup_sysctl_table(pn, dn);
 }
 
 static struct nf_conntrack_l4proto dccp_proto4 __read_mostly = {
-- 
1.7.7.6

^ permalink raw reply related

* [PATCH 09/13] netfilter: merge sctpv[4,6]_net_init into sctp_net_init
From: Gao feng @ 2012-06-21 14:36 UTC (permalink / raw)
  To: pablo; +Cc: netdev, netfilter-devel, Gao feng
In-Reply-To: <1340289410-17642-1-git-send-email-gaofeng@cn.fujitsu.com>

merge sctpv4_net_init and sctpv6_net_init into sctp_net_init to
reduce the redundancy codes.

and use nf_proto_net.users to identify if it's the first time
we use the nf_proto_net. when it's the first time,we will
initialized it.

Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com>
---
 net/netfilter/nf_conntrack_proto_sctp.c |   65 ++++++++++--------------------
 1 files changed, 22 insertions(+), 43 deletions(-)

diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 1e7836c..c746d61 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -707,23 +707,10 @@ static struct ctl_table sctp_compat_sysctl_table[] = {
 #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
 #endif
 
-static void sctp_init_net_data(struct sctp_net *sn)
-{
-	int i;
-#ifdef CONFIG_SYSCTL
-	if (!sn->pn.ctl_table) {
-#else
-	if (!sn->pn.users++) {
-#endif
-		for (i = 0; i < SCTP_CONNTRACK_MAX; i++)
-			sn->timeouts[i] = sctp_timeouts[i];
-	}
-}
-
-static int sctp_kmemdup_sysctl_table(struct nf_proto_net *pn)
+static int sctp_kmemdup_sysctl_table(struct nf_proto_net *pn,
+				     struct sctp_net *sn)
 {
 #ifdef CONFIG_SYSCTL
-	struct sctp_net *sn = (struct sctp_net *)pn;
 	if (pn->ctl_table)
 		return 0;
 
@@ -744,11 +731,11 @@ static int sctp_kmemdup_sysctl_table(struct nf_proto_net *pn)
 	return 0;
 }
 
-static int sctp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn)
+static int sctp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn,
+					    struct sctp_net *sn)
 {
 #ifdef CONFIG_SYSCTL
 #ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
-	struct sctp_net *sn = (struct sctp_net *)pn;
 	pn->ctl_compat_table = kmemdup(sctp_compat_sysctl_table,
 				       sizeof(sctp_compat_sysctl_table),
 				       GFP_KERNEL);
@@ -767,41 +754,33 @@ static int sctp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn)
 	return 0;
 }
 
-static int sctpv4_init_net(struct net *net, u_int16_t proto)
+static int sctp_init_net(struct net *net, u_int16_t proto)
 {
 	int ret;
 	struct sctp_net *sn = sctp_pernet(net);
-	struct nf_proto_net *pn = (struct nf_proto_net *)sn;
+	struct nf_proto_net *pn = &sn->pn;
 
-	sctp_init_net_data(sn);
+	if (!pn->users) {
+		int i;
 
-	ret = sctp_kmemdup_compat_sysctl_table(pn);
-	if (ret < 0)
-		return ret;
+		for (i = 0; i < SCTP_CONNTRACK_MAX; i++)
+			sn->timeouts[i] = sctp_timeouts[i];
+	}
 
-	ret = sctp_kmemdup_sysctl_table(pn);
+	if (proto == AF_INET) {
+		ret = sctp_kmemdup_compat_sysctl_table(pn, sn);
+		if (ret < 0)
+			return ret;
 
-#ifdef CONFIG_SYSCTL
-#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
-	if (ret < 0) {
+		ret = sctp_kmemdup_sysctl_table(pn, sn);
+		if (ret < 0)
+			nf_ct_kfree_compat_sysctl_table(pn);
+	} else
+		ret = sctp_kmemdup_sysctl_table(pn, sn);
 
-		kfree(pn->ctl_compat_table);
-		pn->ctl_compat_table = NULL;
-	}
-#endif
-#endif
 	return ret;
 }
 
-static int sctpv6_init_net(struct net *net, u_int16_t proto)
-{
-	struct sctp_net *sn = sctp_pernet(net);
-	struct nf_proto_net *pn = (struct nf_proto_net *)sn;
-
-	sctp_init_net_data(sn);
-	return sctp_kmemdup_sysctl_table(pn);
-}
-
 static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = {
 	.l3proto		= PF_INET,
 	.l4proto 		= IPPROTO_SCTP,
@@ -833,7 +812,7 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = {
 	},
 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
 	.net_id			= &sctp_net_id,
-	.init_net		= sctpv4_init_net,
+	.init_net		= sctp_init_net,
 };
 
 static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = {
@@ -867,7 +846,7 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = {
 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
 #endif
 	.net_id			= &sctp_net_id,
-	.init_net		= sctpv6_init_net,
+	.init_net		= sctp_init_net,
 };
 
 static int sctp_net_init(struct net *net)
-- 
1.7.7.6

^ permalink raw reply related

* [PATCH 03/13] netfilter: add nf_ct_kfree_compat_sysctl_table to make codes clear
From: Gao feng @ 2012-06-21 14:36 UTC (permalink / raw)
  To: pablo; +Cc: netdev, netfilter-devel, Gao feng
In-Reply-To: <1340289410-17642-1-git-send-email-gaofeng@cn.fujitsu.com>

add function nf_ct_kfree_compat_sysctl_table to kfree l4proto's
compat sysctl table and set the compat sysctl table point to NULL.

Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com>
---
 include/net/netfilter/nf_conntrack_l4proto.h |    8 ++++++++
 net/netfilter/nf_conntrack_proto.c           |    3 +--
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index 5dd60f2..08bb571 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -124,6 +124,14 @@ extern int nf_conntrack_l4proto_register(struct net *net,
 extern void nf_conntrack_l4proto_unregister(struct net *net,
 					    struct nf_conntrack_l4proto *proto);
 
+static inline void nf_ct_kfree_compat_sysctl_table(struct nf_proto_net *pn)
+{
+#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
+	kfree(pn->ctl_compat_table);
+	pn->ctl_compat_table = NULL;
+#endif
+}
+
 /* Generic netlink helpers */
 extern int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb,
 				      const struct nf_conntrack_tuple *tuple);
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 6f4b6f3..9d6b6ab 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -361,8 +361,7 @@ int nf_ct_l4proto_register_sysctl(struct net *net,
 		if (err == 0)
 			goto out;
 
-		kfree(pn->ctl_compat_table);
-		pn->ctl_compat_table = NULL;
+		nf_ct_kfree_compat_sysctl_table(pn);
 		nf_ct_unregister_sysctl(&pn->ctl_table_header,
 					&pn->ctl_table,
 					&pn->users);
-- 
1.7.7.6

^ permalink raw reply related

* [PATCH 02/13] netfilter: add parameter proto for l4proto.init_net
From: Gao feng @ 2012-06-21 14:36 UTC (permalink / raw)
  To: pablo; +Cc: netdev, netfilter-devel, Gao feng
In-Reply-To: <1340289410-17642-1-git-send-email-gaofeng@cn.fujitsu.com>

there are redundancy codes in l4proto's init_net functions.
we can use one init_net function and l3proto to impletment
the same thing.

So we should add l3proto as a parameter for init_net function.

Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com>
---
 include/net/netfilter/nf_conntrack_l4proto.h   |    2 +-
 net/ipv4/netfilter/nf_conntrack_proto_icmp.c   |    2 +-
 net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c |    2 +-
 net/netfilter/nf_conntrack_proto.c             |    5 +++--
 net/netfilter/nf_conntrack_proto_dccp.c        |    2 +-
 net/netfilter/nf_conntrack_proto_generic.c     |    2 +-
 net/netfilter/nf_conntrack_proto_gre.c         |    2 +-
 net/netfilter/nf_conntrack_proto_sctp.c        |    4 ++--
 net/netfilter/nf_conntrack_proto_tcp.c         |    4 ++--
 net/netfilter/nf_conntrack_proto_udp.c         |    4 ++--
 net/netfilter/nf_conntrack_proto_udplite.c     |    2 +-
 11 files changed, 16 insertions(+), 15 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index 81c52b5..5dd60f2 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -97,7 +97,7 @@ struct nf_conntrack_l4proto {
 #endif
 	int	*net_id;
 	/* Init l4proto pernet data */
-	int (*init_net)(struct net *net);
+	int (*init_net)(struct net *net, u_int16_t proto);
 
 	/* Protocol name */
 	const char *name;
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 041923c..76f7a2f 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -337,7 +337,7 @@ static struct ctl_table icmp_compat_sysctl_table[] = {
 #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
 #endif /* CONFIG_SYSCTL */
 
-static int icmp_init_net(struct net *net)
+static int icmp_init_net(struct net *net, u_int16_t proto)
 {
 	struct nf_icmp_net *in = icmp_pernet(net);
 	struct nf_proto_net *pn = (struct nf_proto_net *)in;
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index 63ed012..807ae09 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -333,7 +333,7 @@ static struct ctl_table icmpv6_sysctl_table[] = {
 };
 #endif /* CONFIG_SYSCTL */
 
-static int icmpv6_init_net(struct net *net)
+static int icmpv6_init_net(struct net *net, u_int16_t proto)
 {
 	struct nf_icmp_net *in = icmpv6_pernet(net);
 	struct nf_proto_net *pn = (struct nf_proto_net *)in;
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 9bd88aa..6f4b6f3 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -461,7 +461,7 @@ int nf_conntrack_l4proto_register(struct net *net,
 	int ret = 0;
 
 	if (l4proto->init_net) {
-		ret = l4proto->init_net(net);
+		ret = l4proto->init_net(net, l4proto->l3proto);
 		if (ret < 0)
 			return ret;
 	}
@@ -515,7 +515,8 @@ int nf_conntrack_proto_init(struct net *net)
 {
 	unsigned int i;
 	int err;
-	err = nf_conntrack_l4proto_generic.init_net(net);
+	err = nf_conntrack_l4proto_generic.init_net(net,
+					nf_conntrack_l4proto_generic.l3proto);
 	if (err < 0)
 		return err;
 	err = nf_ct_l4proto_register_sysctl(net,
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index c33f76a..52da8f0 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -815,7 +815,7 @@ static struct ctl_table dccp_sysctl_table[] = {
 };
 #endif /* CONFIG_SYSCTL */
 
-static int dccp_init_net(struct net *net)
+static int dccp_init_net(struct net *net, u_int16_t proto)
 {
 	struct dccp_net *dn = dccp_pernet(net);
 	struct nf_proto_net *pn = (struct nf_proto_net *)dn;
diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c
index bb0e74f..d1ed7b4 100644
--- a/net/netfilter/nf_conntrack_proto_generic.c
+++ b/net/netfilter/nf_conntrack_proto_generic.c
@@ -135,7 +135,7 @@ static struct ctl_table generic_compat_sysctl_table[] = {
 #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
 #endif /* CONFIG_SYSCTL */
 
-static int generic_init_net(struct net *net)
+static int generic_init_net(struct net *net, u_int16_t proto)
 {
 	struct nf_generic_net *gn = generic_pernet(net);
 	struct nf_proto_net *pn = (struct nf_proto_net *)gn;
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index 5cac41c..b09b7af 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -348,7 +348,7 @@ gre_timeout_nla_policy[CTA_TIMEOUT_GRE_MAX+1] = {
 };
 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
 
-static int gre_init_net(struct net *net)
+static int gre_init_net(struct net *net, u_int16_t proto)
 {
 	struct netns_proto_gre *net_gre = gre_pernet(net);
 	int i;
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 8fb0582..1e7836c 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -767,7 +767,7 @@ static int sctp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn)
 	return 0;
 }
 
-static int sctpv4_init_net(struct net *net)
+static int sctpv4_init_net(struct net *net, u_int16_t proto)
 {
 	int ret;
 	struct sctp_net *sn = sctp_pernet(net);
@@ -793,7 +793,7 @@ static int sctpv4_init_net(struct net *net)
 	return ret;
 }
 
-static int sctpv6_init_net(struct net *net)
+static int sctpv6_init_net(struct net *net, u_int16_t proto)
 {
 	struct sctp_net *sn = sctp_pernet(net);
 	struct nf_proto_net *pn = (struct nf_proto_net *)sn;
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 99caa13..6db9d3c 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -1593,7 +1593,7 @@ static int tcp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn)
 	return 0;
 }
 
-static int tcpv4_init_net(struct net *net)
+static int tcpv4_init_net(struct net *net, u_int16_t proto)
 {
 	int i;
 	int ret = 0;
@@ -1631,7 +1631,7 @@ static int tcpv4_init_net(struct net *net)
 	return ret;
 }
 
-static int tcpv6_init_net(struct net *net)
+static int tcpv6_init_net(struct net *net, u_int16_t proto)
 {
 	int i;
 	struct nf_tcp_net *tn = tcp_pernet(net);
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index a83cf93..2b978e6 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -283,7 +283,7 @@ static void udp_init_net_data(struct nf_udp_net *un)
 	}
 }
 
-static int udpv4_init_net(struct net *net)
+static int udpv4_init_net(struct net *net, u_int16_t proto)
 {
 	int ret;
 	struct nf_udp_net *un = udp_pernet(net);
@@ -307,7 +307,7 @@ static int udpv4_init_net(struct net *net)
 	return ret;
 }
 
-static int udpv6_init_net(struct net *net)
+static int udpv6_init_net(struct net *net, u_int16_t proto)
 {
 	struct nf_udp_net *un = udp_pernet(net);
 	struct nf_proto_net *pn = (struct nf_proto_net *)un;
diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c
index b32e700..d33e511 100644
--- a/net/netfilter/nf_conntrack_proto_udplite.c
+++ b/net/netfilter/nf_conntrack_proto_udplite.c
@@ -234,7 +234,7 @@ static struct ctl_table udplite_sysctl_table[] = {
 };
 #endif /* CONFIG_SYSCTL */
 
-static int udplite_init_net(struct net *net)
+static int udplite_init_net(struct net *net, u_int16_t proto)
 {
 	int i;
 	struct udplite_net *un = udplite_pernet(net);
-- 
1.7.7.6

^ permalink raw reply related

* [PATCH net] net: qmi_wwan: fix Gobi device probing
From: Bjørn Mork @ 2012-06-21 12:45 UTC (permalink / raw)
  To: netdev-u79uwXL29TY76Z2rM5mHXA
  Cc: linux-usb-u79uwXL29TY76Z2rM5mHXA,
	H.Siebmanns-zqRNUXuvxA0b1SvskN2V4Q, Bjørn Mork

Ignoring interfaces with additional descriptors is not a reliable
method for locating the correct interface on Gobi devices.  There
is at least one device where this method fails:
https://bbs.archlinux.org/viewtopic.php?id=143506

The result is that the AT command port (interface #2) is hidden
from qcserial, preventing traditional serial modem usage:

[   15.562552] qmi_wwan 4-1.6:1.0: cdc-wdm0: USB WDM device
[   15.562691] qmi_wwan 4-1.6:1.0: wwan0: register 'qmi_wwan' at usb-0000:00:1d.0-1.6, Qualcomm Gobi wwan/QMI device, 1e:df:3c:3a:4e:3b
[   15.563383] qmi_wwan: probe of 4-1.6:1.1 failed with error -22
[   15.564189] qmi_wwan 4-1.6:1.2: cdc-wdm1: USB WDM device
[   15.564302] qmi_wwan 4-1.6:1.2: wwan1: register 'qmi_wwan' at usb-0000:00:1d.0-1.6, Qualcomm Gobi wwan/QMI device, 1e:df:3c:3a:4e:3b
[   15.564328] qmi_wwan: probe of 4-1.6:1.3 failed with error -22
[   15.569376] qcserial 4-1.6:1.1: Qualcomm USB modem converter detected
[   15.569440] usb 4-1.6: Qualcomm USB modem converter now attached to ttyUSB0
[   15.570372] qcserial 4-1.6:1.3: Qualcomm USB modem converter detected
[   15.570430] usb 4-1.6: Qualcomm USB modem converter now attached to ttyUSB1

Use static interface numbers taken from the interface map in
qcserial for all Gobi devices instead:

	Gobi 1K USB layout:
	0: serial port (doesn't respond)
	1: serial port (doesn't respond)
	2: AT-capable modem port
	3: QMI/net

	Gobi 2K+ USB layout:
	0: QMI/net
	1: DM/DIAG (use libqcdm from ModemManager for communication)
	2: AT-capable modem port
	3: NMEA

This should be more reliable over all, and will also prevent the
noisy "probe failed" messages.  The whitelisting logic is expected
to be replaced by direct interface number matching in 3.6.

Reported-by: Heinrich Siebmanns (Harvey) <H.Siebmanns-zqRNUXuvxA0b1SvskN2V4Q@public.gmane.org>
Cc: <stable-u79uwXL29TY76Z2rM5mHXA@public.gmane.org> # v3.4: 0000188 USB: qmi_wwan: Make forced int 4 whitelist generic
Cc: <stable-u79uwXL29TY76Z2rM5mHXA@public.gmane.org> # v3.4: f7142e6 USB: qmi_wwan: Add ZTE (Vodafone) K3520-Z
Cc: <stable-u79uwXL29TY76Z2rM5mHXA@public.gmane.org> # v3.4
Signed-off-by: Bjørn Mork <bjorn-yOkvZcmFvRU@public.gmane.org>
---
 drivers/net/usb/qmi_wwan.c |   83 +++++++++++++++++++++-----------------------
 1 file changed, 40 insertions(+), 43 deletions(-)

diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index 3b20678..3767a12 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -257,29 +257,6 @@ err:
 	return rv;
 }
 
-/* Gobi devices uses identical class/protocol codes for all interfaces regardless
- * of function. Some of these are CDC ACM like and have the exact same endpoints
- * we are looking for. This leaves two possible strategies for identifying the
- * correct interface:
- *   a) hardcoding interface number, or
- *   b) use the fact that the wwan interface is the only one lacking additional
- *      (CDC functional) descriptors
- *
- * Let's see if we can get away with the generic b) solution.
- */
-static int qmi_wwan_bind_gobi(struct usbnet *dev, struct usb_interface *intf)
-{
-	int rv = -EINVAL;
-
-	/* ignore any interface with additional descriptors */
-	if (intf->cur_altsetting->extralen)
-		goto err;
-
-	rv = qmi_wwan_bind_shared(dev, intf);
-err:
-	return rv;
-}
-
 static void qmi_wwan_unbind_shared(struct usbnet *dev, struct usb_interface *intf)
 {
 	struct usb_driver *subdriver = (void *)dev->data[0];
@@ -347,15 +324,15 @@ static const struct driver_info	qmi_wwan_shared = {
 	.manage_power	= qmi_wwan_manage_power,
 };
 
-static const struct driver_info	qmi_wwan_gobi = {
-	.description	= "Qualcomm Gobi wwan/QMI device",
+static const struct driver_info	qmi_wwan_force_int0 = {
+	.description	= "Qualcomm WWAN/QMI device",
 	.flags		= FLAG_WWAN,
-	.bind		= qmi_wwan_bind_gobi,
+	.bind		= qmi_wwan_bind_shared,
 	.unbind		= qmi_wwan_unbind_shared,
 	.manage_power	= qmi_wwan_manage_power,
+	.data		= BIT(0), /* interface whitelist bitmap */
 };
 
-/* ZTE suck at making USB descriptors */
 static const struct driver_info	qmi_wwan_force_int1 = {
 	.description	= "Qualcomm WWAN/QMI device",
 	.flags		= FLAG_WWAN,
@@ -365,6 +342,15 @@ static const struct driver_info	qmi_wwan_force_int1 = {
 	.data		= BIT(1), /* interface whitelist bitmap */
 };
 
+static const struct driver_info	qmi_wwan_force_int3 = {
+	.description	= "Qualcomm WWAN/QMI device",
+	.flags		= FLAG_WWAN,
+	.bind		= qmi_wwan_bind_shared,
+	.unbind		= qmi_wwan_unbind_shared,
+	.manage_power	= qmi_wwan_manage_power,
+	.data		= BIT(3), /* interface whitelist bitmap */
+};
+
 static const struct driver_info	qmi_wwan_force_int4 = {
 	.description	= "Qualcomm WWAN/QMI device",
 	.flags		= FLAG_WWAN,
@@ -390,16 +376,23 @@ static const struct driver_info	qmi_wwan_force_int4 = {
 static const struct driver_info	qmi_wwan_sierra = {
 	.description	= "Sierra Wireless wwan/QMI device",
 	.flags		= FLAG_WWAN,
-	.bind		= qmi_wwan_bind_gobi,
+	.bind		= qmi_wwan_bind_shared,
 	.unbind		= qmi_wwan_unbind_shared,
 	.manage_power	= qmi_wwan_manage_power,
 	.data		= BIT(8) | BIT(19), /* interface whitelist bitmap */
 };
 
 #define HUAWEI_VENDOR_ID	0x12D1
+
+/* Gobi 1000 QMI/wwan interface number is 3 according to qcserial */
+#define QMI_GOBI1K_DEVICE(vend, prod) \
+	USB_DEVICE(vend, prod), \
+	.driver_info = (unsigned long)&qmi_wwan_force_int3
+
+/* Gobi 2000 and Gobi 3000 QMI/wwan interface number is 0 according to qcserial */
 #define QMI_GOBI_DEVICE(vend, prod) \
 	USB_DEVICE(vend, prod), \
-	.driver_info = (unsigned long)&qmi_wwan_gobi
+	.driver_info = (unsigned long)&qmi_wwan_force_int0
 
 static const struct usb_device_id products[] = {
 	{	/* Huawei E392, E398 and possibly others sharing both device id and more... */
@@ -510,20 +503,24 @@ static const struct usb_device_id products[] = {
 		.bInterfaceProtocol = 0xff,
 		.driver_info        = (unsigned long)&qmi_wwan_sierra,
 	},
-	{QMI_GOBI_DEVICE(0x05c6, 0x9212)},	/* Acer Gobi Modem Device */
-	{QMI_GOBI_DEVICE(0x03f0, 0x1f1d)},	/* HP un2400 Gobi Modem Device */
-	{QMI_GOBI_DEVICE(0x03f0, 0x371d)},	/* HP un2430 Mobile Broadband Module */
-	{QMI_GOBI_DEVICE(0x04da, 0x250d)},	/* Panasonic Gobi Modem device */
-	{QMI_GOBI_DEVICE(0x413c, 0x8172)},	/* Dell Gobi Modem device */
-	{QMI_GOBI_DEVICE(0x1410, 0xa001)},	/* Novatel Gobi Modem device */
-	{QMI_GOBI_DEVICE(0x0b05, 0x1776)},	/* Asus Gobi Modem device */
-	{QMI_GOBI_DEVICE(0x19d2, 0xfff3)},	/* ONDA Gobi Modem device */
-	{QMI_GOBI_DEVICE(0x05c6, 0x9001)},	/* Generic Gobi Modem device */
-	{QMI_GOBI_DEVICE(0x05c6, 0x9002)},	/* Generic Gobi Modem device */
-	{QMI_GOBI_DEVICE(0x05c6, 0x9202)},	/* Generic Gobi Modem device */
-	{QMI_GOBI_DEVICE(0x05c6, 0x9203)},	/* Generic Gobi Modem device */
-	{QMI_GOBI_DEVICE(0x05c6, 0x9222)},	/* Generic Gobi Modem device */
-	{QMI_GOBI_DEVICE(0x05c6, 0x9009)},	/* Generic Gobi Modem device */
+
+	/* Gobi 1000 devices */
+	{QMI_GOBI1K_DEVICE(0x05c6, 0x9212)},	/* Acer Gobi Modem Device */
+	{QMI_GOBI1K_DEVICE(0x03f0, 0x1f1d)},	/* HP un2400 Gobi Modem Device */
+	{QMI_GOBI1K_DEVICE(0x03f0, 0x371d)},	/* HP un2430 Mobile Broadband Module */
+	{QMI_GOBI1K_DEVICE(0x04da, 0x250d)},	/* Panasonic Gobi Modem device */
+	{QMI_GOBI1K_DEVICE(0x413c, 0x8172)},	/* Dell Gobi Modem device */
+	{QMI_GOBI1K_DEVICE(0x1410, 0xa001)},	/* Novatel Gobi Modem device */
+	{QMI_GOBI1K_DEVICE(0x0b05, 0x1776)},	/* Asus Gobi Modem device */
+	{QMI_GOBI1K_DEVICE(0x19d2, 0xfff3)},	/* ONDA Gobi Modem device */
+	{QMI_GOBI1K_DEVICE(0x05c6, 0x9001)},	/* Generic Gobi Modem device */
+	{QMI_GOBI1K_DEVICE(0x05c6, 0x9002)},	/* Generic Gobi Modem device */
+	{QMI_GOBI1K_DEVICE(0x05c6, 0x9202)},	/* Generic Gobi Modem device */
+	{QMI_GOBI1K_DEVICE(0x05c6, 0x9203)},	/* Generic Gobi Modem device */
+	{QMI_GOBI1K_DEVICE(0x05c6, 0x9222)},	/* Generic Gobi Modem device */
+	{QMI_GOBI1K_DEVICE(0x05c6, 0x9009)},	/* Generic Gobi Modem device */
+
+	/* Gobi 2000 and 3000 devices */
 	{QMI_GOBI_DEVICE(0x413c, 0x8186)},	/* Dell Gobi 2000 Modem device (N0218, VU936) */
 	{QMI_GOBI_DEVICE(0x05c6, 0x920b)},	/* Generic Gobi 2000 Modem device */
 	{QMI_GOBI_DEVICE(0x05c6, 0x9225)},	/* Sony Gobi 2000 Modem device (N0279, VU730) */
-- 
1.7.10

--
To unsubscribe from this list: send the line "unsubscribe linux-usb" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* Re: [PATCH] bnx2x: fix panic when TX ring is full
From: Dmitry Kravkov @ 2012-06-21 12:19 UTC (permalink / raw)
  To: Tomas Hruby, David Miller
  Cc: Eric Dumazet, netdev@vger.kernel.org, therbert@google.com,
	evansr@google.com, Eilon Greenstein, Merav Sicron, Yaniv Rosner,
	willemb@google.com
In-Reply-To: <CA+yapKM+H0DHA_UotD7v5LRZfYUeanupxb4=r89+fVSTo-Ad4w@mail.gmail.com>

On Mon, 2012-06-18 at 10:18 -0700, Tomas Hruby wrote:
> On Mon, Jun 18, 2012 at 12:38 AM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> > On Sat, 2012-06-16 at 07:40 +0000, Dmitry Kravkov wrote:
> >> Hi Eric and Tomas
> >>
> >> > From: netdev-owner@vger.kernel.org [mailto:netdev-
> >> > owner@vger.kernel.org] On Behalf Of David Miller
> >> > Sent: Saturday, June 16, 2012 1:31 AM
> >> > To: eric.dumazet@gmail.com
> >> > Cc: netdev@vger.kernel.org; therbert@google.com; evansr@google.com;
> >> > Eilon Greenstein; Merav Sicron; Yaniv Rosner; willemb@google.com;
> >> > thruby@google.com
> >> > Subject: Re: [PATCH] bnx2x: fix panic when TX ring is full
> >> >
> >> > From: Eric Dumazet <eric.dumazet@gmail.com>
> >> > Date: Wed, 13 Jun 2012 21:45:16 +0200
> >> >
> >> > > From: Eric Dumazet <edumazet@google.com>
> >> > >
> >> > > There is a off by one error in the minimal number of BD in
> >> > > bnx2x_start_xmit() and bnx2x_tx_int() before stopping/resuming tx
> >> > queue.
> >> > >
> >> > > A full size GSO packet, with data included in skb->head really needs
> >> > > (MAX_SKB_FRAGS + 4) BDs, because of bnx2x_tx_split()
> >> > >
> >> > > This error triggers if BQL is disabled and heavy TCP transmit traffic
> >> > > occurs.
> >> > >
> >> > > bnx2x_tx_split() definitely can be called, remove a wrong comment.
> >> > >
> >> > > Reported-by: Tomas Hruby <thruby@google.com>
> >> > > Signed-off-by: Eric Dumazet <edumazet@google.com>
> >>
> >> Theoretically a can't see how we can reach the case with 4 BDs required apart of frags,
> >> Usually we need 2, when split invoked 3:
> >> 1.Start
> >> 2.Start(split)
> >> 3.Parsing
> >> + Frags
> >>
> >> Next pages descriptors and 2 extras for full indication are not counted as available.
> >>
> >> Practically I'm running the traffic for more then a day without hitting the panic.
> >>
> >> Can you describe the scenario you reproduced this in details? And which code has paniced?
> >
> > Thats pretty immediate.
> 
> yes
> 
> > Disable bql on your NIC.
> >
> > Say you have 4 queues :
> >
> > for q in 0 1 2 3
> > do
> >  echo max >/sys/class/net/eth0/queues/tx-$q/byte_queue_limits/limit_min
> > done
> >
> > Then start 40 netperf
> >
> > for i in `seq 1 40`
> > do
> >  netperf -H 192.168.1.4 &
> > done
> 
> this is enough in my case too, it is perfectly reproducible on
> different machines. Replacing +3 for +4 fixes the problem.

The crash happens with default configuration since
[4acb41903b2f99f3dffd4c3df9acc84ca5942cb2] "net/tcp: Fix tcp memory
limits initialization when !CONFIG_SYSCTL", but it can be hit by
increasing values of tcp_wmem even earlier.

We want to submit a semantic patch into net-next once the two branches are
merged, but the original patch from Eric needs to go -stable.

Thanks.

From: Dmitry Kravkov <dmitry@broadcom.com>
Subject: [PATCH net-next] bnx2x: reservation for NEXT tx BDs

Commit [4acb41903b2f99f3dffd4c3df9acc84ca5942cb2]
net/tcp: Fix tcp memory limits initialization when !CONFIG_SYSCTL
provided new default value for tcp_wmem, since heavy tcp
traffic may cause the TSO packet to consume 20 BDs + 1 for next page
descriptor.
Eric has fixed the reservation in
[bc14786a100cc6a81cd060e8031ec481241b418c]
 bnx2x: fix panic when TX ring is full, this patch provides some
inline explanation for magic numbers are used and fixes condition for
statistics increment in a similar way.

Signed-off-by: Dmitry Kravkov <dmitry@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x.h     |   15 +++++++++++++++
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c |    9 +++++----
 2 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
index 7de8241..be75f45 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
@@ -616,6 +616,21 @@ struct bnx2x_fastpath {
 #define TX_BD(x)		((x) & MAX_TX_BD)
 #define TX_BD_POFF(x)		((x) & MAX_TX_DESC_CNT)
 
+/* how many NEXT PAGE descriptors may packet occupy */
+#define NEXT_CNT_PER_TX_PKT(bds)	\
+				(((bds) + MAX_TX_DESC_CNT - 1) / \
+				 MAX_TX_DESC_CNT * NEXT_PAGE_TX_DESC_CNT)
+/* max pure data/headers BDs per tx packet:
+ * START_BD		- describes packed
+ * START_BD(splitted)	- includes unpaged data segment for GSO
+ * PARSING_BD		- for TSO and CSUM data
+ * Frag BDs		- decribes pages for frags
+ */
+#define MAX_BDS_PER_TX_PKT	(MAX_SKB_FRAGS + 3)
+/* max BDs per tx packet including next pages */
+#define MAX_DESC_PER_TX_PKT	(MAX_BDS_PER_TX_PKT + \
+				 NEXT_CNT_PER_TX_PKT(MAX_BDS_PER_TX_PKT))
+
 /* The RX BD ring is special, each bd is 8 bytes but the last one is 16 */
 #define NUM_RX_RINGS		8
 #define RX_DESC_CNT		(BCM_PAGE_SIZE / sizeof(struct eth_rx_bd))
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index 8098eea..c4928ea 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -190,7 +190,7 @@ int bnx2x_tx_int(struct bnx2x *bp, struct bnx2x_fp_txdata *txdata)
 
 		if ((netif_tx_queue_stopped(txq)) &&
 		    (bp->state == BNX2X_STATE_OPEN) &&
-		    (bnx2x_tx_avail(bp, txdata) >= MAX_SKB_FRAGS + 4))
+		    (bnx2x_tx_avail(bp, txdata) >= MAX_DESC_PER_TX_PKT))
 			netif_tx_wake_queue(txq);
 
 		__netif_tx_unlock(txq);
@@ -2894,7 +2894,8 @@ netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	   txdata->cid, fp_index, txdata_index, txdata, fp); */
 
 	if (unlikely(bnx2x_tx_avail(bp, txdata) <
-		     (skb_shinfo(skb)->nr_frags + 3))) {
+			skb_shinfo(skb)->nr_frags + 3 +
+			NEXT_CNT_PER_TX_PKT(MAX_BDS_PER_TX_PKT))) {
 		fp->eth_q_stats.driver_xoff++;
 		netif_tx_stop_queue(txq);
 		BNX2X_ERR("BUG! Tx ring full when queue awake!\n");
@@ -3169,7 +3170,7 @@ netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	txdata->tx_bd_prod += nbd;
 
-	if (unlikely(bnx2x_tx_avail(bp, txdata) < MAX_SKB_FRAGS + 4)) {
+	if (unlikely(bnx2x_tx_avail(bp, txdata) < MAX_DESC_PER_TX_PKT)) {
 		netif_tx_stop_queue(txq);
 
 		/* paired memory barrier is in bnx2x_tx_int(), we have to keep
@@ -3178,7 +3179,7 @@ netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		smp_mb();
 
 		fp->eth_q_stats.driver_xoff++;
-		if (bnx2x_tx_avail(bp, txdata) >= MAX_SKB_FRAGS + 4)
+		if (bnx2x_tx_avail(bp, txdata) >= MAX_DESC_PER_TX_PKT)
 			netif_tx_wake_queue(txq);
 	}
 	txdata->tx_pkt++;
-- 
1.7.7.2





> T.
> 

^ permalink raw reply related

* [net] ixgbe: simplify padding and length checks
From: Jeff Kirsher @ 2012-06-21 12:15 UTC (permalink / raw)
  To: davem; +Cc: Stephen Hemminger, netdev, gospo, sassmann, Jeff Kirsher

From: Stephen Hemminger <shemminger@vyatta.com>

The check for length <= 0 is bogus because length is unsigned, and network
stack never sends zero length packets (unless it is totally broken).

The check for really small packets can be optimized (using unlikely)
and calling skb_pad directly.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Tested-by: Phil Schmitt <phillip.j.schmitt@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c |    9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index cbb05d6..9afe0cb 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -6383,17 +6383,12 @@ static netdev_tx_t ixgbe_xmit_frame(struct sk_buff *skb,
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 	struct ixgbe_ring *tx_ring;
 
-	if (skb->len <= 0) {
-		dev_kfree_skb_any(skb);
-		return NETDEV_TX_OK;
-	}
-
 	/*
 	 * The minimum packet size for olinfo paylen is 17 so pad the skb
 	 * in order to meet this minimum size requirement.
 	 */
-	if (skb->len < 17) {
-		if (skb_padto(skb, 17))
+	if (unlikely(skb->len < 17)) {
+		if (skb_pad(skb, 17 - skb->len))
 			return NETDEV_TX_OK;
 		skb->len = 17;
 	}
-- 
1.7.10.2

^ permalink raw reply related

* Re: [PATCH] net: dcb: fix small regression in __dcbnl_pg_setcfg()
From: Thomas Graf @ 2012-06-21 10:52 UTC (permalink / raw)
  To: John Fastabend; +Cc: davem, netdev, lucy.liu, alexander.h.duyck
In-Reply-To: <20120621055621.14148.42206.stgit@jf-dev1-dcblab>

On Wed, Jun 20, 2012 at 10:56:21PM -0700, John Fastabend wrote:
> A small regression was introduced in the reply command of
> dcbnl_pg_setcfg(). User space apps may be expecting the
> DCB_ATTR_PG_CFG attribute to be returned with the patch
> below TX or RX variants are returned.
> 
> commit 7be994138b188387691322921c08e19bddf6d3c5
> Author: Thomas Graf <tgraf@suug.ch>
> Date:   Wed Jun 13 02:54:55 2012 +0000
> 
>     dcbnl: Shorten all command handling functions
> 
> This patch reverts this behavior and returns DCB_ATTR_PG_CFG
> 
> Signed-off-by: John Fastabend <john.r.fastabend@intel.com>

Acked-by: Thomas Graf <tgraf@suug.ch>

^ permalink raw reply

* Re: [RFC] tcp: How does SACK or FACK determine the time to start fast retransmition?
From: 李易 @ 2012-06-21 10:17 UTC (permalink / raw)
  To: Vijay Subramanian; +Cc: netdev, kernelnewbies
In-Reply-To: <CAGK4HS_CN53EtQvPm2H7Qn85eXRz7hiEQvs3bU3xXpcNeg4byg@mail.gmail.com>

于 2012/6/21 16:42, Vijay Subramanian 写道:
> With SACK, number of  dupacks does not have much meaning. What matters is
> --how the SACK scoreboard looks like i.e. which packets are tagged
> Lost/Sacked/Retransmitted
> -- Whether FACK is in use (this assumes holes in between sacked
> packets are lost and have left the network and so we can send out more
> packets)
>
> So, stack does not count the number of dupacks that have come in. Only
> SACK blocks matter.
> You can try to track the following path:
> tcp_ack() deals with incoming acks and if it sees a dupack (does not
> matter what number), or incoming packet contains SACK it calls
> tcp_fastretrans_alert() which calls  tcp_xmit_retransmit_queue().
>
> tcp_xmit_retransmit_queue() decides which packets to retransmit. The
> first packet to start retransmitting from is tracked in
> tp->retransmit_skb_hint.
> Note that the dupThresh is actually tracked by tp->reordering which
> measures  the reordering in the network and is not fixed at 3.  So, if
> more than
> tp->reordering packets have been acked above a given packet, this
> packet is a candidate for retransmisson. See tcp_mark_head_lost() to
> see how the
> reordering metric is used to mark packets as lost. This corresponds to
> the check you mentioned in the RFC.
>
> So, window permitting, packets are sent as follows;
> (a)-- Packets marked lost as per description above
> (b)-- new packets (if any)
> (c)-- Holes between sacked packets  which are not reliably lost.
>
> choice between (b) and (c) is made in tcp_can_forward_retransmit().
>
> Hope this helps.
> Vijay
>
It is just I wanted! Thanks for your detailed explaination and kindness.

^ permalink raw reply

* [PATCH 1/4] net/mlx4_en: Set correct port parameters during device initialization
From: Yevgeny Petrilin @ 2012-06-21  9:19 UTC (permalink / raw)
  To: davem; +Cc: netdev, Yevgeny Petrilin
In-Reply-To: <1340270358-19504-1-git-send-email-yevgenyp@mellanox.co.il>

Set valid port parameters: MTU and flow control configuration when
configuring the port during HW device initialization,
prior to the net device open() being called.
Using  invalid parameters (such as all zeros)
could lead to bad firmware behavior.

Signed-off-by: Yevgeny Petrilin <yevgenyp@mellanox.co.il>
---
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c |    6 ++++--
 1 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 926d8aa..a80280e 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -1204,9 +1204,11 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
 	en_warn(priv, "Using %d RX rings\n", prof->rx_ring_num);
 
 	/* Configure port */
+	mlx4_en_calc_rx_buf(dev);
 	err = mlx4_SET_PORT_general(mdev->dev, priv->port,
-				    MLX4_EN_MIN_MTU,
-				    0, 0, 0, 0);
+				    priv->rx_skb_size + ETH_FCS_LEN,
+				    prof->tx_pause, prof->tx_ppp,
+				    prof->rx_pause, prof->rx_ppp);
 	if (err) {
 		en_err(priv, "Failed setting port general configurations "
 		       "for port %d, with error %d\n", priv->port, err);
-- 
1.7.1

^ permalink raw reply related

* [PATCH 4/4] net/mlx4_en: Use atomic counter to decide when queue is full
From: Yevgeny Petrilin @ 2012-06-21  9:19 UTC (permalink / raw)
  To: davem; +Cc: netdev, Yevgeny Petrilin
In-Reply-To: <1340270358-19504-1-git-send-email-yevgenyp@mellanox.co.il>

The Transmit and transmit completion flows execute from different contexts,
which are not synchronized. Hence naive reading the of consumer index might
give wrong value by the time it is being used, That could lead to a state of transmit timeout.
Fix that by using atomic variable to maintain that index.

Signed-off-by: Yevgeny Petrilin <yevgenyp@mellanox.co.il>
---
 drivers/net/ethernet/mellanox/mlx4/en_tx.c   |   16 ++++++++--------
 drivers/net/ethernet/mellanox/mlx4/mlx4_en.h |    1 +
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index 019d856..f4b4703 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -165,6 +165,7 @@ int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv,
 	ring->last_nr_txbb = 1;
 	ring->poll_cnt = 0;
 	ring->blocked = 0;
+	atomic_set(&ring->inflight, 0);
 	memset(ring->tx_info, 0, ring->size * sizeof(struct mlx4_en_tx_info));
 	memset(ring->buf, 0, ring->buf_size);
 
@@ -364,15 +365,13 @@ static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq)
 	wmb();
 	ring->cons += txbbs_skipped;
 	netdev_tx_completed_queue(ring->tx_queue, packets, bytes);
+	atomic_sub(txbbs_skipped, &ring->inflight);
 
 	/* Wakeup Tx queue if this ring stopped it */
-	if (unlikely(ring->blocked)) {
-		if ((u32) (ring->prod - ring->cons) <=
-		     ring->size - HEADROOM - MAX_DESC_TXBBS) {
-			ring->blocked = 0;
-			netif_tx_wake_queue(ring->tx_queue);
-			priv->port_stats.wake_queue++;
-		}
+	if (unlikely(ring->blocked && txbbs_skipped > 0)) {
+		ring->blocked = 0;
+		netif_tx_wake_queue(ring->tx_queue);
+		priv->port_stats.wake_queue++;
 	}
 }
 
@@ -588,7 +587,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
 		vlan_tag = vlan_tx_tag_get(skb);
 
 	/* Check available TXBBs And 2K spare for prefetch */
-	if (unlikely(((int)(ring->prod - ring->cons)) >
+	if (unlikely(atomic_read(&ring->inflight) >
 		     ring->size - HEADROOM - MAX_DESC_TXBBS)) {
 		/* every full Tx ring stops queue */
 		netif_tx_stop_queue(ring->tx_queue);
@@ -710,6 +709,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
 	}
 
 	ring->prod += nr_txbb;
+	atomic_add(nr_txbb, &ring->inflight);
 
 	/* If we used a bounce buffer then copy descriptor back into place */
 	if (bounce)
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index 225c20d..6a8a69d 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -257,6 +257,7 @@ struct mlx4_en_tx_ring {
 	struct mlx4_bf bf;
 	bool bf_enabled;
 	struct netdev_queue *tx_queue;
+	atomic_t inflight;
 };
 
 struct mlx4_en_rx_desc {
-- 
1.7.1

^ permalink raw reply related

* [PATCH 3/4] net/mlx4_en: Release QP range in free_resources
From: Yevgeny Petrilin @ 2012-06-21  9:19 UTC (permalink / raw)
  To: davem; +Cc: netdev, Yevgeny Petrilin
In-Reply-To: <1340270358-19504-1-git-send-email-yevgenyp@mellanox.co.il>

Add a missing resource release in ring cleanup.
Not doing this leaves a range of QPs that are being reserved,
and no one can use them.

Signed-off-by: Yevgeny Petrilin <yevgenyp@mellanox.co.il>
---
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c |   12 ++++++++----
 drivers/net/ethernet/mellanox/mlx4/mlx4_en.h   |    1 +
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index a80280e..073b85b 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -929,15 +929,20 @@ void mlx4_en_free_resources(struct mlx4_en_priv *priv)
 		if (priv->rx_cq[i].buf)
 			mlx4_en_destroy_cq(priv, &priv->rx_cq[i]);
 	}
+
+	if (priv->base_tx_qpn) {
+		mlx4_qp_release_range(priv->mdev->dev, priv->base_tx_qpn, priv->tx_ring_num);
+		priv->base_tx_qpn = 0;
+	}
 }
 
 int mlx4_en_alloc_resources(struct mlx4_en_priv *priv)
 {
 	struct mlx4_en_port_profile *prof = priv->prof;
 	int i;
-	int base_tx_qpn, err;
+	int err;
 
-	err = mlx4_qp_reserve_range(priv->mdev->dev, priv->tx_ring_num, 256, &base_tx_qpn);
+	err = mlx4_qp_reserve_range(priv->mdev->dev, priv->tx_ring_num, 256, &priv->base_tx_qpn);
 	if (err) {
 		en_err(priv, "failed reserving range for TX rings\n");
 		return err;
@@ -949,7 +954,7 @@ int mlx4_en_alloc_resources(struct mlx4_en_priv *priv)
 				      prof->tx_ring_size, i, TX))
 			goto err;
 
-		if (mlx4_en_create_tx_ring(priv, &priv->tx_ring[i], base_tx_qpn + i,
+		if (mlx4_en_create_tx_ring(priv, &priv->tx_ring[i], priv->base_tx_qpn + i,
 					   prof->tx_ring_size, TXBB_SIZE))
 			goto err;
 	}
@@ -969,7 +974,6 @@ int mlx4_en_alloc_resources(struct mlx4_en_priv *priv)
 
 err:
 	en_err(priv, "Failed to allocate NIC resources\n");
-	mlx4_qp_release_range(priv->mdev->dev, base_tx_qpn, priv->tx_ring_num);
 	return -ENOMEM;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index 6ae3509..225c20d 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -495,6 +495,7 @@ struct mlx4_en_priv {
 	int vids[128];
 	bool wol;
 	struct device *ddev;
+	int base_tx_qpn;
 
 #ifdef CONFIG_MLX4_EN_DCB
 	struct ieee_ets ets;
-- 
1.7.1

^ permalink raw reply related

* [PATCH 2/4] net/mlx4: Use single completion vector after NOP failure
From: Yevgeny Petrilin @ 2012-06-21  9:19 UTC (permalink / raw)
  To: davem; +Cc: netdev, Yevgeny Petrilin
In-Reply-To: <1340270358-19504-1-git-send-email-yevgenyp@mellanox.co.il>

Fix a crash at the error flow of NOP command which caused the driver to try and use
a completion vector which wasn't allocated.

Signed-off-by: Yevgeny Petrilin <yevgenyp@mellanox.co.il>
---
 drivers/net/ethernet/mellanox/mlx4/main.c |    2 ++
 1 files changed, 2 insertions(+), 0 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 18c8deb..c91a2b8 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -1978,6 +1978,8 @@ slave_start:
 	if (err == -EBUSY && (dev->flags & MLX4_FLAG_MSI_X) &&
 	    !mlx4_is_mfunc(dev)) {
 		dev->flags &= ~MLX4_FLAG_MSI_X;
+		dev->caps.num_comp_vectors = 1;
+		dev->caps.comp_pool	   = 0;
 		pci_disable_msix(pdev);
 		err = mlx4_setup_hca(dev);
 	}
-- 
1.7.1

^ permalink raw reply related

* [PATCH net 0/4] net/mlx4: Bug fixes for the mlx4_en driver
From: Yevgeny Petrilin @ 2012-06-21  9:19 UTC (permalink / raw)
  To: davem; +Cc: netdev

This is a set of 4 bug fixes generated agains net tree:

Yevgeny Petrilin (4):
	net/mlx4_en: Set correct port parameters during device initialization
	net/mlx4: Use single completion vector after NOP failure
	net/mlx4_en: Release QP range in free_resources
	net/mlx4_en: Use atomic counter to decide when queue is full

 en_netdev.c |   18 ++++++++++++------
 en_tx.c     |   16 ++++++++--------
 main.c      |    2 ++
 mlx4_en.h   |    2 ++
 4 files changed, 24 insertions(+), 14 deletions(-)

Thanks,
Yevgeny

^ permalink raw reply

* [PATCH net 0/4] net/mlx4: Bug fixes for the mlx4_en driver
From: Yevgeny Petrilin @ 2012-06-21  9:18 UTC (permalink / raw)
  To: davem; +Cc: netdev, yevgenyp

This is a set of 4 bug fixes generated agains net tree:

Yevgeny Petrilin (4):
	net/mlx4_en: Set correct port parameters during device initialization
	net/mlx4: Use single completion vector after NOP failure
	net/mlx4_en: Release QP range in free_resources
	net/mlx4_en: Use atomic counter to decide when queue is full

 en_netdev.c |   18 ++++++++++++------
 en_tx.c     |   16 ++++++++--------
 main.c      |    2 ++
 mlx4_en.h   |    2 ++
 4 files changed, 24 insertions(+), 14 deletions(-)

Thanks,
Yevgeny

^ permalink raw reply

* RE: IPV6 ndisc::  Bad NIC causing  IPV6 NDP to stop working
From: Eric Dumazet @ 2012-06-21  9:16 UTC (permalink / raw)
  To: Menny_Hamburger; +Cc: netdev
In-Reply-To: <D8C50530D6022F40A817A35C40CC06A70B34DBF05D@DUBX7MCDUB01.EMEA.DELL.COM>

Please don't top post on this list.

On Thu, 2012-06-21 at 09:43 +0100, Menny_Hamburger@Dell.com wrote:
> For high availability reasons, the machines discussed run with a
> number of NICs per subnet, where our own proprietary service fixes up
> routing when a NIC goes wild.
> We schedule a fix in the field but our goal is to eliminate as many
> single points of failure as we can, so that our systems will still run
> properly when something goes wrong.

Even if a NIC does memory corruption or some nasty bug ?
That sounds great :)

> We encountered this issue on some proprietary NICs but also with bnx2,
> where we get "chip not in correct endian mode" errors (This is another
> problem that may require a separate discussion).

Until very recently, we used to orphan skb before giving them to device
transmit. So you probably use a very old kernel.

I guess we could just do a regular alloc_skb(), it makes no sense to
limit in-flight ND skbs, we have Qdisc/device limits anyway.

BTW, I have no idea why ndisc_build_skb() is EXPORTed

 net/ipv6/ndisc.c |   24 ++++++------------------
 1 file changed, 6 insertions(+), 18 deletions(-)


diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 69a6330..f149d85 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -429,7 +429,6 @@ struct sk_buff *ndisc_build_skb(struct net_device *dev,
 	int hlen = LL_RESERVED_SPACE(dev);
 	int tlen = dev->needed_tailroom;
 	int len;
-	int err;
 	u8 *opt;
 
 	if (!dev->addr_len)
@@ -439,15 +438,10 @@ struct sk_buff *ndisc_build_skb(struct net_device *dev,
 	if (llinfo)
 		len += ndisc_opt_addr_space(dev);
 
-	skb = sock_alloc_send_skb(sk,
-				  (MAX_HEADER + sizeof(struct ipv6hdr) +
-				   len + hlen + tlen),
-				  1, &err);
-	if (!skb) {
-		ND_PRINTK(0, err, "ND: %s failed to allocate an skb, err=%d\n",
-			  __func__, err);
+	skb = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + len + hlen + tlen,
+			GFP_ATOMIC);
+	if (!skb)
 		return NULL;
-	}
 
 	skb_reserve(skb, hlen);
 	ip6_nd_hdr(sk, skb, dev, saddr, daddr, IPPROTO_ICMPV6, len);
@@ -1550,16 +1544,10 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
 
 	hlen = LL_RESERVED_SPACE(dev);
 	tlen = dev->needed_tailroom;
-	buff = sock_alloc_send_skb(sk,
-				   (MAX_HEADER + sizeof(struct ipv6hdr) +
-				    len + hlen + tlen),
-				   1, &err);
-	if (buff == NULL) {
-		ND_PRINTK(0, err,
-			  "Redirect: %s failed to allocate an skb, err=%d\n",
-			  __func__, err);
+	buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + len + hlen + tlen,
+			 GFP_ATOMIC);
+	if (!buff)
 		goto release;
-	}
 
 	skb_reserve(buff, hlen);
 	ip6_nd_hdr(sk, buff, dev, &saddr_buf, &ipv6_hdr(skb)->saddr,

^ permalink raw reply related

* Re: [PATCH] net: dcb: fix small regression in __dcbnl_pg_setcfg()
From: David Miller @ 2012-06-21  9:04 UTC (permalink / raw)
  To: tgraf; +Cc: john.r.fastabend, tgraf, netdev, lucy.liu, alexander.h.duyck
In-Reply-To: <20120621081910.GH27921@canuck.infradead.org>

From: Thomas Graf <tgraf@infradead.org>
Date: Thu, 21 Jun 2012 04:19:10 -0400

> ACK

Can I get a real "Acked-by: ..." so that it automatically gets
picked up by patchwork?  Thanks.

^ permalink raw reply

* RE: IPV6 ndisc::  Bad NIC causing  IPV6 NDP to stop working
From: Menny_Hamburger @ 2012-06-21  8:43 UTC (permalink / raw)
  To: eric.dumazet; +Cc: netdev
In-Reply-To: <1340266972.4604.4404.camel@edumazet-glaptop>

For high availability reasons, the machines discussed run with a number of NICs per subnet, where our own proprietary service fixes up routing when a NIC goes wild.
We schedule a fix in the field but our goal is to eliminate as many single points of failure as we can, so that our systems will still run properly when something goes wrong.
We encountered this issue on some proprietary NICs but also with bnx2, where we get "chip not in correct endian mode" errors (This is another problem that may require a separate discussion).

-----Original Message-----
From: Eric Dumazet [mailto:eric.dumazet@gmail.com] 
Sent: 21 June, 2012 11:23
To: Hamburger, Menny
Cc: netdev@vger.kernel.org
Subject: Re: IPV6 ndisc:: Bad NIC causing IPV6 NDP to stop working

On Thu, 2012-06-21 at 08:59 +0100, Menny_Hamburger@Dell.com wrote:
> Hi,
> 
> Our machines runs EL5.8 x86_64.
> We have witnessed several cases where we suspect that a bad NIC on the machine caused IPV6 neighbour discovery to stop working on all the other NICs - when this happens ping6 fails on every NIC we try it.
> From looking into the code I see that there is only a single socket assigned for NDP; Does it sound logical to allocate a socket per interface instead of a single global socket.
> I have found the following thread in LKML: https://lkml.org/lkml/2006/11/29/335, and it seems that this allocation issue still exists in EL5 based kernels - could this cause the above problem?
> 

What is a bad NIC, and why not fixing it ?




^ permalink raw reply

* Re: [RFC] tcp: How does SACK or FACK determine the time to start fast retransmition?
From: Vijay Subramanian @ 2012-06-21  8:42 UTC (permalink / raw)
  To: 李易; +Cc: netdev, kernelnewbies
In-Reply-To: <4FE2C03C.6030102@gmail.com>

On 20 June 2012 23:33, 李易 <lovelylich@gmail.com> wrote:
> HI all,
>     When tcp uses reno as its congestion control algothim, it uses
> tp->sacked_out as dup-ack. When the third dup-ack(under default
> condition) comes, tcp will initiate its fast retransmition.
>     But how about sack ?
>     According to kernel source code comments, when sack or fack tcp option
> is enabled, there is no dup-ack counter. See comments for function
> tcp_dupack_heuristics():
> http://lxr.linux.no/linux+v2.6.37/net/ipv4/tcp_input.c#L2300
>     So , how does tcp know the current dup-ack is the last one which
> triggers the fast retransmition?

With SACK, number of  dupacks does not have much meaning. What matters is
--how the SACK scoreboard looks like i.e. which packets are tagged
Lost/Sacked/Retransmitted
-- Whether FACK is in use (this assumes holes in between sacked
packets are lost and have left the network and so we can send out more
packets)

So, stack does not count the number of dupacks that have come in. Only
SACK blocks matter.
You can try to track the following path:
tcp_ack() deals with incoming acks and if it sees a dupack (does not
matter what number), or incoming packet contains SACK it calls
tcp_fastretrans_alert() which calls  tcp_xmit_retransmit_queue().

tcp_xmit_retransmit_queue() decides which packets to retransmit. The
first packet to start retransmitting from is tracked in
tp->retransmit_skb_hint.
Note that the dupThresh is actually tracked by tp->reordering which
measures  the reordering in the network and is not fixed at 3.  So, if
more than
tp->reordering packets have been acked above a given packet, this
packet is a candidate for retransmisson. See tcp_mark_head_lost() to
see how the
reordering metric is used to mark packets as lost. This corresponds to
the check you mentioned in the RFC.

So, window permitting, packets are sent as follows;
(a)-- Packets marked lost as per description above
(b)-- new packets (if any)
(c)-- Holes between sacked packets  which are not reliably lost.

choice between (b) and (c) is made in tcp_can_forward_retransmit().

Hope this helps.
Vijay

^ permalink raw reply

* Re: IPV6 ndisc::  Bad NIC causing  IPV6 NDP to stop working
From: Eric Dumazet @ 2012-06-21  8:22 UTC (permalink / raw)
  To: Menny_Hamburger; +Cc: netdev
In-Reply-To: <D8C50530D6022F40A817A35C40CC06A70B34DBEFCF@DUBX7MCDUB01.EMEA.DELL.COM>

On Thu, 2012-06-21 at 08:59 +0100, Menny_Hamburger@Dell.com wrote:
> Hi,
> 
> Our machines runs EL5.8 x86_64.
> We have witnessed several cases where we suspect that a bad NIC on the machine caused IPV6 neighbour discovery to stop working on all the other NICs - when this happens ping6 fails on every NIC we try it.
> From looking into the code I see that there is only a single socket assigned for NDP; Does it sound logical to allocate a socket per interface instead of a single global socket.
> I have found the following thread in LKML: https://lkml.org/lkml/2006/11/29/335, and it seems that this allocation issue still exists in EL5 based kernels - could this cause the above problem?
> 

What is a bad NIC, and why not fixing it ?

^ permalink raw reply

* Re: [PATCH] net: dcb: fix small regression in __dcbnl_pg_setcfg()
From: Thomas Graf @ 2012-06-21  8:19 UTC (permalink / raw)
  To: John Fastabend; +Cc: tgraf, davem, netdev, lucy.liu, alexander.h.duyck
In-Reply-To: <20120621055621.14148.42206.stgit@jf-dev1-dcblab>

On Wed, Jun 20, 2012 at 10:56:21PM -0700, John Fastabend wrote:
> A small regression was introduced in the reply command of
> dcbnl_pg_setcfg(). User space apps may be expecting the
> DCB_ATTR_PG_CFG attribute to be returned with the patch
> below TX or RX variants are returned.
> 
> commit 7be994138b188387691322921c08e19bddf6d3c5
> Author: Thomas Graf <tgraf@suug.ch>
> Date:   Wed Jun 13 02:54:55 2012 +0000
> 
>     dcbnl: Shorten all command handling functions
> 
> This patch reverts this behavior and returns DCB_ATTR_PG_CFG
> 
> Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
> ---
> 
>  net/dcb/dcbnl.c |    3 +--
>  1 files changed, 1 insertions(+), 2 deletions(-)
> 
> diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
> index 0a36007..013da86 100644
> --- a/net/dcb/dcbnl.c
> +++ b/net/dcb/dcbnl.c
> @@ -852,8 +852,7 @@ static int __dcbnl_pg_setcfg(struct net_device *netdev, struct nlmsghdr *nlh,
>  		}
>  	}
>  
> -	return nla_put_u8(skb,
> -			  (dir ? DCB_CMD_PGRX_SCFG : DCB_CMD_PGTX_SCFG), 0);
> +	return nla_put_u8(skb, DCB_ATTR_PG_CFG, 0);
>  }
>  

ACK

Thanks John

^ permalink raw reply

* IPV6 ndisc::  Bad NIC causing  IPV6 NDP to stop working
From: Menny_Hamburger @ 2012-06-21  7:59 UTC (permalink / raw)
  To: netdev

Hi,

Our machines runs EL5.8 x86_64.
We have witnessed several cases where we suspect that a bad NIC on the machine caused IPV6 neighbour discovery to stop working on all the other NICs - when this happens ping6 fails on every NIC we try it.
>From looking into the code I see that there is only a single socket assigned for NDP; Does it sound logical to allocate a socket per interface instead of a single global socket.
I have found the following thread in LKML: https://lkml.org/lkml/2006/11/29/335, and it seems that this allocation issue still exists in EL5 based kernels - could this cause the above problem?

Thanks,
Menny

^ permalink raw reply

* Re: divide by 0 error in igbvf_set_coalesce - ab50a2a
From: Jeff Kirsher @ 2012-06-21  7:35 UTC (permalink / raw)
  To: David Ahern; +Cc: Williams, Mitch A, netdev@vger.kernel.org
In-Reply-To: <4FE24CF1.50603@cisco.com>

[-- Attachment #1: Type: text/plain, Size: 373 bytes --]

On 06/20/2012 03:21 PM, David Ahern wrote:
>
> On 6/18/12 2:45 PM, Williams, Mitch A wrote:
>> Thanks for letting me know, David. I'll look into it and get a patch
>> out soon. Shouldn't be that big of a deal to fix.
>
> Could you CC me on the patch so I know when it's fixed? I have enough
> events to poll. 
I will CC you when I push Mitch's patch upstream.



[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 900 bytes --]

^ permalink raw reply

* Re: [RFC] tcp: How does SACK or FACK determine the time to start fast retransmition?
From: Li Yu @ 2012-06-21  7:20 UTC (permalink / raw)
  To: 李易; +Cc: netdev, kernelnewbies
In-Reply-To: <4FE2C03C.6030102@gmail.com>

于 2012年06月21日 14:33, 李易 写道:
> HI all,
> When tcp uses reno as its congestion control algothim, it uses
> tp->sacked_out as dup-ack. When the third dup-ack(under default
> condition) comes, tcp will initiate its fast retransmition.
> But how about sack ?
> According to kernel source code comments, when sack or fack tcp option
> is enabled, there is no dup-ack counter. See comments for function
> tcp_dupack_heuristics():
> http://lxr.linux.no/linux+v2.6.37/net/ipv4/tcp_input.c#L2300
> So , how does tcp know the current dup-ack is the last one which
> triggers the fast retransmition?
>
> According to rfc3517 section 5:
> "Upon the receipt of the first (DupThresh - 1) duplicate ACKs, the
> scoreboard is to be updated as normal."
> "When a TCP sender receives the duplicate ACK corresponding to
> DupThresh ACKs,
> the scoreboard MUST be updated with the new SACK information (via
> Update ()). If no previous loss event has occurred
> on the connection or the cumulative acknowledgment point is beyond
> the last value of RecoveryPoint, a loss recovery phase SHOULD be
> initiated, per the fast retransmit algorithm outlined in [RFC2581]."
>
> But these sentences doesn't describe how tcp knows the current ack
> is the dup-threshold dup-ack.
>
> Accorrding to rfc3517 seciton 4 and isLost(Seqnum) function:
> "The routine returns true when either
> DupThresh discontiguous SACKed sequences have arrived above
> ’SeqNum’ or (DupThresh * SMSS) bytes with sequence numbers greater
> than ’SeqNum’ have been SACKed. Otherwise, the routine returns
> false."
> I think this is just what I am searching for, but I still don't know
> which line of code in Linux tcp protocol does this check.
> Can any one help me ? thks in advance.
>
>

Do you mean you did not locate where FR is triggered in TCP stack ?
I am not a TCP expert, however I think that it may be at
tcp_time_to_recover(), and the "DupThresh" is not a fixed value in
Linux TCP implementation.

Thanks

>
>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>

^ permalink raw reply

* RE: [RFC net-next 11/14] Fix emulex/benet
From: Sathya.Perla @ 2012-06-21  6:42 UTC (permalink / raw)
  To: yuvalmin, netdev, davem; +Cc: eilong
In-Reply-To: <1340118848-30978-12-git-send-email-yuvalmin@broadcom.com>

Yuval, for be2net, the best place to cap the number of queues to a global default 
value would be_num_rss_want(). The change would look like:

diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/
index fa2a01e..5265b42 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -2186,12 +2186,15 @@ static void be_msix_disable(struct be_adapter *adapter)
 
 static uint be_num_rss_want(struct be_adapter *adapter)
 {
+       u32 num = 0;
+
        if ((adapter->function_caps & BE_FUNCTION_CAPS_RSS) &&
             !sriov_want(adapter) && be_physfn(adapter) &&
-            !be_is_mc(adapter))
-               return (adapter->be3_native) ? BE3_MAX_RSS_QS : BE2_MAX_RSS_QS;
-       else
-               return 0;
+            !be_is_mc(adapter)) {
+               num = (adapter->be3_native) ? BE3_MAX_RSS_QS : BE2_MAX_RSS_QS;
+               num = min_t(u32, num, DEFAULT_MAX_NUM_RSS_QUEUES);
+       }
+       return num;
 }
 
 static void be_msix_enable(struct be_adapter *adapter)

thanks,
-Sathya

________________________________________
From: Yuval Mintz [yuvalmin@broadcom.com]

Signed-off-by: Yuval Mintz <yuvalmin@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>

Cc: Sathya Perla <sathya.perla@emulex.com>
Cc: Subbu Seetharaman <subbu.seetharaman@emulex.com>
Cc: Ajit Khaparde <ajit.khaparde@emulex.com>
---
 drivers/net/ethernet/emulex/benet/be_main.c |    8 +++++---
 1 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 5a34503..e42597d 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -2153,13 +2153,15 @@ static uint be_num_rss_want(struct be_adapter *adapter)
 static void be_msix_enable(struct be_adapter *adapter)
 {
 #define BE_MIN_MSIX_VECTORS            1
-       int i, status, num_vec, num_roce_vec = 0;
+       int i, status, num_vec, num_roce_vec = 0, ncpu;
+
+       ncpu = min_t(int, num_online_cpus(), DEFAULT_MAX_NUM_RSS_QUEUES);

        /* If RSS queues are not used, need a vec for default RX Q */
-       num_vec = min(be_num_rss_want(adapter), num_online_cpus());
+       num_vec = min(be_num_rss_want(adapter), ncpu);
        if (be_roce_supported(adapter)) {
                num_roce_vec = min_t(u32, MAX_ROCE_MSIX_VECTORS,
-                                       (num_online_cpus() + 1));
+                                    (u32)(ncpu + 1));
                num_roce_vec = min(num_roce_vec, MAX_ROCE_EQS);
                num_vec += num_roce_vec;
                num_vec = min(num_vec, MAX_MSIX_VECTORS);
--
1.7.9.rc2

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox