Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH net-next-2.6 2/7] sctp: cache the ipv6 source after route lookup
From: Wei Yongjun @ 2011-04-26  3:46 UTC (permalink / raw)
  To: David Miller; +Cc: netdev@vger.kernel.org, lksctp
In-Reply-To: <4DB63F85.2090609@cn.fujitsu.com>

From: Vlad Yasevich <vladislav.yasevich@hp.com>

The ipv6 routing lookup does give us a source address,
but instead of filling it into the dst, it's stored in
the flowi.  We can use that instead of going through the
entire source address selection again.  Now we pass
the flowi around to get at the source for ipv6, but let
ipv4 still deal with dst.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: Wei Yongjun <yjwei@cn.fujitsu.com>
---
 include/net/sctp/structs.h |   13 ++--
 net/sctp/ipv6.c            |  163 +++++++++++++++++++++-----------------------
 net/sctp/protocol.c        |   50 +++++++-------
 net/sctp/socket.c          |    2 +-
 net/sctp/transport.c       |   15 +++--
 5 files changed, 119 insertions(+), 124 deletions(-)

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 5c9bada..a98d36f 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -566,16 +566,17 @@ struct sctp_af {
 					 int __user *optlen);
 	struct dst_entry *(*get_dst)	(struct sctp_association *asoc,
 					 union sctp_addr *daddr,
-					 union sctp_addr *saddr);
+					 union sctp_addr *saddr,
+					 struct flowi *fl,
+					 struct sock *sk);
 	void		(*get_saddr)	(struct sctp_sock *sk,
-					 struct sctp_association *asoc,
-					 struct dst_entry *dst,
+					 struct sctp_transport *t,
 					 union sctp_addr *daddr,
-					 union sctp_addr *saddr);
+					 struct flowi *fl);
 	void		(*copy_addrlist) (struct list_head *,
 					  struct net_device *);
 	void		(*dst_saddr)	(union sctp_addr *saddr,
-					 struct dst_entry *dst,
+					 void *from,
 					 __be16 port);
 	int		(*cmp_addr)	(const union sctp_addr *addr1,
 					 const union sctp_addr *addr2);
@@ -1061,7 +1062,7 @@ void sctp_transport_set_owner(struct sctp_transport *,
 			      struct sctp_association *);
 void sctp_transport_route(struct sctp_transport *, union sctp_addr *,
 			  struct sctp_sock *);
-void sctp_transport_pmtu(struct sctp_transport *);
+void sctp_transport_pmtu(struct sctp_transport *, struct sock *sk);
 void sctp_transport_free(struct sctp_transport *);
 void sctp_transport_reset_timers(struct sctp_transport *);
 void sctp_transport_hold(struct sctp_transport *);
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 5adf585..cc9ea37 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -82,6 +82,10 @@
 
 static inline int sctp_v6_addr_match_len(union sctp_addr *s1,
 					 union sctp_addr *s2);
+static void sctp_v6_dst_saddr(union sctp_addr *addr, void *from,
+			      __be16 port);
+static int sctp_v6_cmp_addr(const union sctp_addr *addr1,
+			    const union sctp_addr *addr2);
 
 /* Event handler for inet6 address addition/deletion events.
  * The sctp_local_addr_list needs to be protocted by a spin lock since
@@ -245,69 +249,97 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport)
  */
 static struct dst_entry *sctp_v6_get_dst(struct sctp_association *asoc,
 					 union sctp_addr *daddr,
-					 union sctp_addr *saddr)
+					 union sctp_addr *saddr,
+					 struct flowi *fl,
+					 struct sock *sk)
 {
 	struct dst_entry *dst = NULL;
-	struct flowi6 fl6;
+	struct flowi6 *fl6 = &fl->u.ip6;
 	struct sctp_bind_addr *bp;
 	struct sctp_sockaddr_entry *laddr;
 	union sctp_addr *baddr = NULL;
+	union sctp_addr dst_saddr;
 	__u8 matchlen = 0;
 	__u8 bmatchlen;
 	sctp_scope_t scope;
+	int err = 0;
 
-	memset(&fl6, 0, sizeof(fl6));
-	ipv6_addr_copy(&fl6.daddr, &daddr->v6.sin6_addr);
+	memset(fl6, 0, sizeof(struct flowi6));
+	ipv6_addr_copy(&fl6->daddr, &daddr->v6.sin6_addr);
 	if (ipv6_addr_type(&daddr->v6.sin6_addr) & IPV6_ADDR_LINKLOCAL)
-		fl6.flowi6_oif = daddr->v6.sin6_scope_id;
+		fl6->flowi6_oif = daddr->v6.sin6_scope_id;
 
 
-	SCTP_DEBUG_PRINTK("%s: DST=%pI6 ", __func__, &fl6.daddr);
+	SCTP_DEBUG_PRINTK("%s: DST=%pI6 ", __func__, &fl6->daddr);
 
 	if (saddr) {
-		ipv6_addr_copy(&fl6.saddr, &saddr->v6.sin6_addr);
-		SCTP_DEBUG_PRINTK("SRC=%pI6 - ", &fl6.saddr);
+		ipv6_addr_copy(&fl6->saddr, &saddr->v6.sin6_addr);
+		SCTP_DEBUG_PRINTK("SRC=%pI6 - ", &fl6->saddr);
 	}
 
-	dst = ip6_route_output(&init_net, NULL, &fl6);
+	err = ip6_dst_lookup(sk, &dst, fl6);
 	if (!asoc || saddr)
 		goto out;
 
-	if (dst->error) {
-		dst_release(dst);
-		dst = NULL;
-		bp = &asoc->base.bind_addr;
-		scope = sctp_scope(daddr);
-		/* Walk through the bind address list and try to get a dst that
-		 * matches a bind address as the source address.
+	bp = &asoc->base.bind_addr;
+	scope = sctp_scope(daddr);
+	/* ip6_dst_lookup has filled in the fl6->saddr for us.  Check
+	 * to see if we can use it.
+	 */
+	if (!err) {
+		/* Walk through the bind address list and look for a bind
+		 * address that matches the source address of the returned dst.
 		 */
+		sctp_v6_dst_saddr(&dst_saddr, fl6, htons(bp->port));
 		rcu_read_lock();
 		list_for_each_entry_rcu(laddr, &bp->address_list, list) {
-			if (!laddr->valid)
+			if (!laddr->valid || (laddr->state != SCTP_ADDR_SRC))
 				continue;
-			if ((laddr->state == SCTP_ADDR_SRC) &&
-			    (laddr->a.sa.sa_family == AF_INET6) &&
-			    (scope <= sctp_scope(&laddr->a))) {
-				bmatchlen = sctp_v6_addr_match_len(daddr,
-								   &laddr->a);
-				if (!baddr || (matchlen < bmatchlen)) {
-					baddr = &laddr->a;
-					matchlen = bmatchlen;
-				}
+
+			/* Do not compare against v4 addrs */
+			if ((laddr->a.sa.sa_family == AF_INET6) &&
+			    (sctp_v6_cmp_addr(&dst_saddr, &laddr->a))) {
+				rcu_read_unlock();
+				goto out;
 			}
 		}
 		rcu_read_unlock();
-		if (baddr) {
-			ipv6_addr_copy(&fl6.saddr, &baddr->v6.sin6_addr);
-			dst = ip6_route_output(&init_net, NULL, &fl6);
+		/* None of the bound addresses match the source address of the
+		 * dst. So release it.
+		 */
+		dst_release(dst);
+		dst = NULL;
+	}
+
+	/* Walk through the bind address list and try to get the
+	 * best source address for a given destination.
+	 */
+	rcu_read_lock();
+	list_for_each_entry_rcu(laddr, &bp->address_list, list) {
+		if (!laddr->valid)
+			continue;
+		if ((laddr->state == SCTP_ADDR_SRC) &&
+		    (laddr->a.sa.sa_family == AF_INET6) &&
+		    (scope <= sctp_scope(&laddr->a))) {
+			bmatchlen = sctp_v6_addr_match_len(daddr, &laddr->a);
+			if (!baddr || (matchlen < bmatchlen)) {
+				baddr = &laddr->a;
+				matchlen = bmatchlen;
+			}
 		}
 	}
+	rcu_read_unlock();
+	if (baddr) {
+		ipv6_addr_copy(&fl6->saddr, &baddr->v6.sin6_addr);
+		err = ip6_dst_lookup(sk, &dst, fl6);
+	}
+
 out:
-	if (!dst->error) {
+	if (dst) {
 		struct rt6_info *rt;
 		rt = (struct rt6_info *)dst;
 		SCTP_DEBUG_PRINTK("rt6_dst:%pI6 rt6_src:%pI6\n",
-			&rt->rt6i_dst.addr, &rt->rt6i_src.addr);
+			&rt->rt6i_dst.addr, &fl6->saddr);
 		return dst;
 	}
 	SCTP_DEBUG_PRINTK("NO ROUTE\n");
@@ -329,64 +361,21 @@ static inline int sctp_v6_addr_match_len(union sctp_addr *s1,
  * and asoc's bind address list.
  */
 static void sctp_v6_get_saddr(struct sctp_sock *sk,
-			      struct sctp_association *asoc,
-			      struct dst_entry *dst,
+			      struct sctp_transport *t,
 			      union sctp_addr *daddr,
-			      union sctp_addr *saddr)
+			      struct flowi *fl)
 {
-	struct sctp_bind_addr *bp;
-	struct sctp_sockaddr_entry *laddr;
-	sctp_scope_t scope;
-	union sctp_addr *baddr = NULL;
-	__u8 matchlen = 0;
-	__u8 bmatchlen;
+	struct flowi6 *fl6 = &fl->u.ip6;
+	union sctp_addr *saddr = &t->saddr;
 
 	SCTP_DEBUG_PRINTK("%s: asoc:%p dst:%p daddr:%pI6 ",
-			  __func__, asoc, dst, &daddr->v6.sin6_addr);
-
-	if (!asoc) {
-		ipv6_dev_get_saddr(sock_net(sctp_opt2sk(sk)),
-				   dst ? ip6_dst_idev(dst)->dev : NULL,
-				   &daddr->v6.sin6_addr,
-				   inet6_sk(&sk->inet.sk)->srcprefs,
-				   &saddr->v6.sin6_addr);
-		SCTP_DEBUG_PRINTK("saddr from ipv6_get_saddr: %pI6\n",
-				  &saddr->v6.sin6_addr);
-		return;
-	}
+			  __func__, t->asoc, t->dst, &daddr->v6.sin6_addr);
 
-	scope = sctp_scope(daddr);
-
-	bp = &asoc->base.bind_addr;
-
-	/* Go through the bind address list and find the best source address
-	 * that matches the scope of the destination address.
-	 */
-	rcu_read_lock();
-	list_for_each_entry_rcu(laddr, &bp->address_list, list) {
-		if (!laddr->valid)
-			continue;
-		if ((laddr->state == SCTP_ADDR_SRC) &&
-		    (laddr->a.sa.sa_family == AF_INET6) &&
-		    (scope <= sctp_scope(&laddr->a))) {
-			bmatchlen = sctp_v6_addr_match_len(daddr, &laddr->a);
-			if (!baddr || (matchlen < bmatchlen)) {
-				baddr = &laddr->a;
-				matchlen = bmatchlen;
-			}
-		}
-	}
 
-	if (baddr) {
-		memcpy(saddr, baddr, sizeof(union sctp_addr));
-		SCTP_DEBUG_PRINTK("saddr: %pI6\n", &saddr->v6.sin6_addr);
-	} else {
-		pr_err("%s: asoc:%p Could not find a valid source "
-		       "address for the dest:%pI6\n",
-		       __func__, asoc, &daddr->v6.sin6_addr);
+	if (t->dst) {
+		saddr->v6.sin6_family = AF_INET6;
+		ipv6_addr_copy(&saddr->v6.sin6_addr, &fl6->saddr);
 	}
-
-	rcu_read_unlock();
 }
 
 /* Make a copy of all potential local addresses. */
@@ -508,14 +497,16 @@ static int sctp_v6_to_addr_param(const union sctp_addr *addr,
 	return length;
 }
 
-/* Initialize a sctp_addr from a dst_entry. */
-static void sctp_v6_dst_saddr(union sctp_addr *addr, struct dst_entry *dst,
+/* Initialize a sctp_addr from a dst_entry. Problem is that v6 dst
+ * entries do not carry the source, so we pass a flowi instead.
+ */
+static void sctp_v6_dst_saddr(union sctp_addr *addr, void *from,
 			      __be16 port)
 {
-	struct rt6_info *rt = (struct rt6_info *)dst;
+	struct flowi6 *fl6 = (struct flowi6 *)from;
 	addr->sa.sa_family = AF_INET6;
 	addr->v6.sin6_port = port;
-	ipv6_addr_copy(&addr->v6.sin6_addr, &rt->rt6i_src.addr);
+	ipv6_addr_copy(&addr->v6.sin6_addr, &fl6->saddr);
 }
 
 /* Compare addresses exactly.
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index d5bf91d..7be5df6 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -339,10 +339,10 @@ static int sctp_v4_to_addr_param(const union sctp_addr *addr,
 }
 
 /* Initialize a sctp_addr from a dst_entry. */
-static void sctp_v4_dst_saddr(union sctp_addr *saddr, struct dst_entry *dst,
+static void sctp_v4_dst_saddr(union sctp_addr *saddr, void *from,
 			      __be16 port)
 {
-	struct rtable *rt = (struct rtable *)dst;
+	struct rtable *rt = (struct rtable *)from;
 	saddr->v4.sin_family = AF_INET;
 	saddr->v4.sin_port = port;
 	saddr->v4.sin_addr.s_addr = rt->rt_src;
@@ -465,33 +465,35 @@ static sctp_scope_t sctp_v4_scope(union sctp_addr *addr)
  */
 static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc,
 					 union sctp_addr *daddr,
-					 union sctp_addr *saddr)
+					 union sctp_addr *saddr,
+					 struct flowi *fl,
+					 struct sock *sk)
 {
 	struct rtable *rt;
-	struct flowi4 fl4;
+	struct flowi4 *fl4 = &fl->u.ip4;
 	struct sctp_bind_addr *bp;
 	struct sctp_sockaddr_entry *laddr;
 	struct dst_entry *dst = NULL;
 	union sctp_addr dst_saddr;
 
-	memset(&fl4, 0x0, sizeof(struct flowi4));
-	fl4.daddr  = daddr->v4.sin_addr.s_addr;
-	fl4.fl4_dport = daddr->v4.sin_port;
-	fl4.flowi4_proto = IPPROTO_SCTP;
+	memset(fl4, 0x0, sizeof(struct flowi4));
+	fl4->daddr  = daddr->v4.sin_addr.s_addr;
+	fl4->fl4_dport = daddr->v4.sin_port;
+	fl4->flowi4_proto = IPPROTO_SCTP;
 	if (asoc) {
-		fl4.flowi4_tos = RT_CONN_FLAGS(asoc->base.sk);
-		fl4.flowi4_oif = asoc->base.sk->sk_bound_dev_if;
-		fl4.fl4_sport = htons(asoc->base.bind_addr.port);
+		fl4->flowi4_tos = RT_CONN_FLAGS(asoc->base.sk);
+		fl4->flowi4_oif = asoc->base.sk->sk_bound_dev_if;
+		fl4->fl4_sport = htons(asoc->base.bind_addr.port);
 	}
 	if (saddr) {
-		fl4.saddr = saddr->v4.sin_addr.s_addr;
-		fl4.fl4_sport = saddr->v4.sin_port;
+		fl4->saddr = saddr->v4.sin_addr.s_addr;
+		fl4->fl4_sport = saddr->v4.sin_port;
 	}
 
 	SCTP_DEBUG_PRINTK("%s: DST:%pI4, SRC:%pI4 - ",
-			  __func__, &fl4.daddr, &fl4.saddr);
+			  __func__, &fl4->daddr, &fl4->saddr);
 
-	rt = ip_route_output_key(&init_net, &fl4);
+	rt = ip_route_output_key(&init_net, fl4);
 	if (!IS_ERR(rt))
 		dst = &rt->dst;
 
@@ -533,9 +535,9 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc,
 			continue;
 		if ((laddr->state == SCTP_ADDR_SRC) &&
 		    (AF_INET == laddr->a.sa.sa_family)) {
-			fl4.saddr = laddr->a.v4.sin_addr.s_addr;
-			fl4.fl4_sport = laddr->a.v4.sin_port;
-			rt = ip_route_output_key(&init_net, &fl4);
+			fl4->saddr = laddr->a.v4.sin_addr.s_addr;
+			fl4->fl4_sport = laddr->a.v4.sin_port;
+			rt = ip_route_output_key(&init_net, fl4);
 			if (!IS_ERR(rt)) {
 				dst = &rt->dst;
 				goto out_unlock;
@@ -559,19 +561,15 @@ out:
  * to cache it separately and hence this is an empty routine.
  */
 static void sctp_v4_get_saddr(struct sctp_sock *sk,
-			      struct sctp_association *asoc,
-			      struct dst_entry *dst,
+			      struct sctp_transport *t,
 			      union sctp_addr *daddr,
-			      union sctp_addr *saddr)
+			      struct flowi *fl)
 {
-	struct rtable *rt = (struct rtable *)dst;
-
-	if (!asoc)
-		return;
+	union sctp_addr *saddr = &t->saddr;
+	struct rtable *rt = (struct rtable *)t->dst;
 
 	if (rt) {
 		saddr->v4.sin_family = AF_INET;
-		saddr->v4.sin_port = htons(asoc->base.bind_addr.port);
 		saddr->v4.sin_addr.s_addr = rt->rt_src;
 	}
 }
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index f694ee1..33d9ee6 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -2287,7 +2287,7 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
 			trans->param_flags =
 				(trans->param_flags & ~SPP_PMTUD) | pmtud_change;
 			if (update) {
-				sctp_transport_pmtu(trans);
+				sctp_transport_pmtu(trans, sctp_opt2sk(sp));
 				sctp_assoc_sync_pmtu(asoc);
 			}
 		} else if (asoc) {
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index d3ae493..2544b9b 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -211,11 +211,15 @@ void sctp_transport_set_owner(struct sctp_transport *transport,
 }
 
 /* Initialize the pmtu of a transport. */
-void sctp_transport_pmtu(struct sctp_transport *transport)
+void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk)
 {
 	struct dst_entry *dst;
+	struct flowi fl;
 
-	dst = transport->af_specific->get_dst(NULL, &transport->ipaddr, NULL);
+	dst = transport->af_specific->get_dst(transport->asoc,
+					      &transport->ipaddr,
+					      &transport->saddr,
+					      &fl, sk);
 
 	if (dst) {
 		transport->pathmtu = dst_mtu(dst);
@@ -272,15 +276,16 @@ void sctp_transport_route(struct sctp_transport *transport,
 	struct sctp_af *af = transport->af_specific;
 	union sctp_addr *daddr = &transport->ipaddr;
 	struct dst_entry *dst;
+	struct flowi fl;
 
-	dst = af->get_dst(asoc, daddr, saddr);
+	dst = af->get_dst(asoc, daddr, saddr, &fl, sctp_opt2sk(opt));
+	transport->dst = dst;
 
 	if (saddr)
 		memcpy(&transport->saddr, saddr, sizeof(union sctp_addr));
 	else
-		af->get_saddr(opt, asoc, dst, daddr, &transport->saddr);
+		af->get_saddr(opt, transport, daddr, &fl);
 
-	transport->dst = dst;
 	if ((transport->param_flags & SPP_PMTUD_DISABLE) && transport->pathmtu) {
 		return;
 	}
-- 
1.6.5.2



^ permalink raw reply related

* [PATCH net-next-2.6 1/7] sctp: fix sctp to work with ipv6 source address routing
From: Wei Yongjun @ 2011-04-26  3:45 UTC (permalink / raw)
  To: David Miller; +Cc: netdev@vger.kernel.org, lksctp
In-Reply-To: <4DB63F85.2090609@cn.fujitsu.com>

From: Weixing Shi <Weixing.Shi@windriver.com>

in the below test case, using the source address routing,
sctp can not work.
Node-A
1)ifconfig eth0 inet6 add 2001:1::1/64
2)ip -6 rule add from 2001:1::1 table 100 pref 100
3)ip -6 route add 2001:2::1 dev eth0 table 100
4)sctp_darn -H 2001:1::1 -P 250 -l &
Node-B
1)ifconfig eth0 inet6 add 2001:2::1/64
2)ip -6 rule add from 2001:2::1 table 100 pref 100
3)ip -6 route add 2001:1::1 dev eth0 table 100
4)sctp_darn -H 2001:2::1 -P 250 -h 2001:1::1 -p 250 -s

root cause:
Node-A and Node-B use the source address routing, and
at begining, source address will be NULL,sctp will
search the  routing table by the destination address,
because using the source address routing table, and
the result dst_entry will be NULL.

solution:
walk through the bind address list to get the source
address and then lookup the routing table again to get
the correct dst_entry.

Signed-off-by: Weixing Shi <Weixing.Shi@windriver.com>
Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: Wei Yongjun <yjwei@cn.fujitsu.com>
---
 net/sctp/ipv6.c |   47 +++++++++++++++++++++++++++++++++++++++++++++--
 1 files changed, 45 insertions(+), 2 deletions(-)

diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 321f175..5adf585 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -80,6 +80,9 @@
 
 #include <asm/uaccess.h>
 
+static inline int sctp_v6_addr_match_len(union sctp_addr *s1,
+					 union sctp_addr *s2);
+
 /* Event handler for inet6 address addition/deletion events.
  * The sctp_local_addr_list needs to be protocted by a spin lock since
  * multiple notifiers (say IPv4 and IPv6) may be running at the same
@@ -244,8 +247,14 @@ static struct dst_entry *sctp_v6_get_dst(struct sctp_association *asoc,
 					 union sctp_addr *daddr,
 					 union sctp_addr *saddr)
 {
-	struct dst_entry *dst;
+	struct dst_entry *dst = NULL;
 	struct flowi6 fl6;
+	struct sctp_bind_addr *bp;
+	struct sctp_sockaddr_entry *laddr;
+	union sctp_addr *baddr = NULL;
+	__u8 matchlen = 0;
+	__u8 bmatchlen;
+	sctp_scope_t scope;
 
 	memset(&fl6, 0, sizeof(fl6));
 	ipv6_addr_copy(&fl6.daddr, &daddr->v6.sin6_addr);
@@ -261,6 +270,39 @@ static struct dst_entry *sctp_v6_get_dst(struct sctp_association *asoc,
 	}
 
 	dst = ip6_route_output(&init_net, NULL, &fl6);
+	if (!asoc || saddr)
+		goto out;
+
+	if (dst->error) {
+		dst_release(dst);
+		dst = NULL;
+		bp = &asoc->base.bind_addr;
+		scope = sctp_scope(daddr);
+		/* Walk through the bind address list and try to get a dst that
+		 * matches a bind address as the source address.
+		 */
+		rcu_read_lock();
+		list_for_each_entry_rcu(laddr, &bp->address_list, list) {
+			if (!laddr->valid)
+				continue;
+			if ((laddr->state == SCTP_ADDR_SRC) &&
+			    (laddr->a.sa.sa_family == AF_INET6) &&
+			    (scope <= sctp_scope(&laddr->a))) {
+				bmatchlen = sctp_v6_addr_match_len(daddr,
+								   &laddr->a);
+				if (!baddr || (matchlen < bmatchlen)) {
+					baddr = &laddr->a;
+					matchlen = bmatchlen;
+				}
+			}
+		}
+		rcu_read_unlock();
+		if (baddr) {
+			ipv6_addr_copy(&fl6.saddr, &baddr->v6.sin6_addr);
+			dst = ip6_route_output(&init_net, NULL, &fl6);
+		}
+	}
+out:
 	if (!dst->error) {
 		struct rt6_info *rt;
 		rt = (struct rt6_info *)dst;
@@ -269,7 +311,8 @@ static struct dst_entry *sctp_v6_get_dst(struct sctp_association *asoc,
 		return dst;
 	}
 	SCTP_DEBUG_PRINTK("NO ROUTE\n");
-	dst_release(dst);
+	if (dst)
+		dst_release(dst);
 	return NULL;
 }
 
-- 
1.6.5.2



^ permalink raw reply related

* [PATCH net-next-2.6 0/7] SCTP updates for net-next-2.6
From: Wei Yongjun @ 2011-04-26  3:44 UTC (permalink / raw)
  To: David Miller; +Cc: netdev@vger.kernel.org, lksctp

Hi David

Here is a set of SCTP patches for net-next-2.6, the last part
from vlad's lksctp-dev tree, update SCTP IPv6 routing and IPSec
issues. Please apply.

Vlad Yasevich (4):
      sctp: cache the ipv6 source after route lookup
      sctp: make sctp over IPv6 work with IPsec
      sctp: remove useless arguments from get_saddr() call
      sctp: clean up route lookup calls

Wei Yongjun (2):
      sctp: clean up IPv6 route and XFRM lookups
      sctp: fix IPv6 source address output routing with IPsec

Weixing Shi (1):
      sctp: fix sctp to work with ipv6 source address routing

 include/net/sctp/structs.h |   17 ++--
 net/sctp/ipv6.c            |  186 ++++++++++++++++++++++++++-----------------
 net/sctp/protocol.c        |   57 ++++++-------
 net/sctp/socket.c          |    2 +-
 net/sctp/transport.c       |   28 ++++---
 5 files changed, 162 insertions(+), 128 deletions(-)



^ permalink raw reply

* Re: how to set vlan filter for intel 82599
From: zhou rui @ 2011-04-26  3:39 UTC (permalink / raw)
  To: Ben Hutchings; +Cc: netdev
In-Reply-To: <1303786638.3032.307.camel@localhost>

On Tue, Apr 26, 2011 at 10:57 AM, Ben Hutchings
<bhutchings@solarflare.com> wrote:
> On Tue, 2011-04-26 at 10:19 +0800, zhou rui wrote:
>> hi
>> here is the problem troubles me,how to set vlan filter for intel
>> 82599? for example
>> I want vlan id 0~31 will go to queue 0, vlan id 32-63 will go to queue
>> 1...below is my setting,but doesn't work
>>
>> don't know the exact meanning of the vlan-mask and vlan,how are they calculated?
>>
>> ./ethtool -K eth5 ntuple on
>>
>> ./ethtool -U eth5 flow-type udp4 src-ip 0x0 src-ip-mask 0x0 dst-ip 0x0
>> dst-ip-mask 0x0 src-port 0x0 src-port-mask 0x0 dst-port 0x0
>> dst-port-mask 0x0 vlan 0x0000 vlan-mask 0x00E0 user-def 0x0
>> user-def-mask 0x0 action 0
> [...]
>
> This specifies a filter for UDP/IPv4 packets, and the masks are wrong.
> If you actually wanted to filter only UDP/IPv4 packets for VID 0-31 then
> the correct syntax would be:
>
>    ethtool -U eth5 flow-type udp4 vlan 0 vlan-mask 0xf01f
>
> If you don't care about the layer 3/4 protocols then you would need to
> use 'flow-type ether', but no driver implements that yet.  (Well, sfc
> implements the *type*, but not filtering by VID only.)
>
> Ben.
>
> --
> Ben Hutchings, Senior Software Engineer, Solarflare
> Not speaking for my employer; that's the marketing department's job.
> They asked us to note that Solarflare product names are trademarked.
>
>

hi ben,thanks for your help,would you mind tell me "32~63" VID filter?
still can not understand the vlan-mask

^ permalink raw reply

* Re: [PATCH net-next-2.6 v5 5/5] sctp: Add ASCONF operation on the single-homed host
From: Wei Yongjun @ 2011-04-26  3:28 UTC (permalink / raw)
  To: Michio Honda; +Cc: netdev, lksctp-developers
In-Reply-To: <0D29144A-E384-4E7D-AA04-4CC330A2D3AF@sfc.wide.ad.jp>


> SCTP can change the IP address on the single-homed host.  
> In this case, the SCTP association transmits an ASCONF packet including addition of the new IP address and deletion of the old address.  This patch implements this functionality.  
> In this case, the ASCONF chunk is added to the beginning of the queue, because the other chunks cannot be transmitted in this state.  
>
> Signed-off-by: Michio Honda <micchie@sfc.wide.ad.jp>
> ---
> diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h
> index c70d8cc..d7a4ee3 100644
> --- a/include/net/sctp/constants.h
> +++ b/include/net/sctp/constants.h
> @@ -441,4 +441,8 @@ enum {
>   */
>  #define SCTP_AUTH_RANDOM_LENGTH 32
>  
> +/* ASCONF PARAMETERS */
> +#define SCTP_ASCONF_V4_PARAM_LEN 16
> +#define SCTP_ASCONF_V6_PARAM_LEN 28

useless defines.

> +
>  #endif /* __sctp_constants_h__ */
> diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
> index cc9185c..db4e9d0 100644
> --- a/include/net/sctp/structs.h
> +++ b/include/net/sctp/structs.h
> @@ -1901,6 +1901,8 @@ struct sctp_association {
>  	 * after reaching 4294967295.
>  	 */
>  	__u32 addip_serial;
> +	union sctp_addr *asconf_addr_del_pending;
> +	int src_out_of_asoc_ok;
>  
>  	/* SCTP AUTH: list of the endpoint shared keys.  These
>  	 * keys are provided out of band by the user applicaton
> diff --git a/net/sctp/associola.c b/net/sctp/associola.c
> index 6b04287..2082d0a 100644
> --- a/net/sctp/associola.c
> +++ b/net/sctp/associola.c
> @@ -279,6 +279,8 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
>  	asoc->peer.asconf_capable = 0;
>  	if (sctp_addip_noauth)
>  		asoc->peer.asconf_capable = 1;
> +	asoc->asconf_addr_del_pending = NULL;
> +	asoc->src_out_of_asoc_ok = 0;
>  
>  	/* Create an input queue.  */
>  	sctp_inq_init(&asoc->base.inqueue);
> @@ -443,6 +445,10 @@ void sctp_association_free(struct sctp_association *asoc)
>  
>  	asoc->peer.transport_count = 0;
>  
> +	/* Free pending address space being deleted */
> +	if (asoc->asconf_addr_del_pending != NULL)
> +		kfree(asoc->asconf_addr_del_pending);
> +
>  	/* Free any cached ASCONF_ACK chunk. */
>  	sctp_assoc_free_asconf_acks(asoc);
>  
> diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
> index 865ce7b..56c97ce 100644
> --- a/net/sctp/ipv6.c
> +++ b/net/sctp/ipv6.c
> @@ -332,6 +332,13 @@ static void sctp_v6_get_saddr(struct sctp_sock *sk,
>  				matchlen = bmatchlen;
>  			}
>  		}
> +		if (laddr->state == SCTP_ADDR_NEW && asoc->src_out_of_asoc_ok) {
> +			bmatchlen = sctp_v6_addr_match_len(daddr, &laddr->a);
> +			if (!baddr || (matchlen < bmatchlen)) {
> +				baddr = &laddr->a;
> +				matchlen = bmatchlen;
> +			}
> +		}
>  	}
>  
>  	if (baddr) {
> diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
> index 26dc005..28bccde 100644
> --- a/net/sctp/outqueue.c
> +++ b/net/sctp/outqueue.c
> @@ -744,6 +744,16 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
>  	 */
>  
>  	list_for_each_entry_safe(chunk, tmp, &q->control_chunk_list, list) {
> +		/* RFC 5061, 5.3
> +		 * F1) This means that until such time as the ASCONF
> +		 * containing the add is acknowledged, the sender MUST
> +		 * NOT use the new IP address as a source for ANY SCTP
> +		 * packet except on carrying an ASCONF Chunk.
> +		 */
> +		if (asoc->src_out_of_asoc_ok &&
> +		    chunk->chunk_hdr->type != SCTP_CID_ASCONF)
> +			continue;
> +
>  		list_del_init(&chunk->list);
>  
>  		/* Pick the right transport to use. */
> @@ -871,6 +881,9 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
>  		}
>  	}
>  
> +	if (q->asoc->src_out_of_asoc_ok)
> +		goto sctp_flush_out;
> +
>  	/* Is it OK to send data chunks?  */
>  	switch (asoc->state) {
>  	case SCTP_STATE_COOKIE_ECHOED:
> diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
> index 152976e..0733273 100644
> --- a/net/sctp/protocol.c
> +++ b/net/sctp/protocol.c
> @@ -510,7 +510,9 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc,
>  		sctp_v4_dst_saddr(&dst_saddr, dst, htons(bp->port));
>  		rcu_read_lock();
>  		list_for_each_entry_rcu(laddr, &bp->address_list, list) {
> -			if (!laddr->valid || (laddr->state != SCTP_ADDR_SRC))
> +			if (!laddr->valid || (laddr->state == SCTP_ADDR_DEL) ||
> +			    (laddr->state != SCTP_ADDR_SRC &&
> +			    !asoc->src_out_of_asoc_ok))
>  				continue;
>  			if (sctp_v4_cmp_addr(&dst_saddr, &laddr->a))
>  				goto out_unlock;
> diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
> index de98665..f341ab2 100644
> --- a/net/sctp/sm_make_chunk.c
> +++ b/net/sctp/sm_make_chunk.c
> @@ -2744,6 +2744,12 @@ struct sctp_chunk *sctp_make_asconf_update_ip(struct sctp_association *asoc,
>  	int			addr_param_len = 0;
>  	int 			totallen = 0;
>  	int 			i;
> +	sctp_addip_param_t del_param; /* 8 Bytes (Type 0xC002, Len and CrrID) */
> +	struct sctp_af *del_af;
> +	int del_addr_param_len = 0;
> +	int del_paramlen = sizeof(sctp_addip_param_t);
> +	union sctp_addr_param del_addr_param; /* (v4) 8 Bytes, (v6) 20 Bytes */
> +	int			del_pickup = 0;
>  
>  	/* Get total length of all the address parameters. */
>  	addr_buf = addrs;
> @@ -2756,6 +2762,17 @@ struct sctp_chunk *sctp_make_asconf_update_ip(struct sctp_association *asoc,
>  		totallen += addr_param_len;
>  
>  		addr_buf += af->sockaddr_len;
> +		if (asoc->asconf_addr_del_pending && !del_pickup) {
> +			if (!sctp_in_scope(asoc->asconf_addr_del_pending,
> +			    sctp_scope(addr)))
> +				continue;
> +			/* reuse the parameter length from the same scope one */
> +			totallen += paramlen;
> +			totallen += addr_param_len;
> +			del_pickup = 1;
> +			asoc->src_out_of_asoc_ok = 1;
> +			SCTP_DEBUG_PRINTK("mkasconf_update_ip: picked same-scope del_pending addr, totallen for all addresses is %d\n", totallen);
> +		}
>  	}
>  
>  	/* Create an asconf chunk with the required length. */
> @@ -2778,6 +2795,19 @@ struct sctp_chunk *sctp_make_asconf_update_ip(struct sctp_association *asoc,
>  
>  		addr_buf += af->sockaddr_len;
>  	}
> +	if (flags == SCTP_PARAM_ADD_IP && del_pickup) {
> +		addr = asoc->asconf_addr_del_pending;
> +		del_af = sctp_get_af_specific(addr->v4.sin_family);
> +		del_addr_param_len = del_af->to_addr_param(addr,
> +		    &del_addr_param);
> +		del_param.param_hdr.type = SCTP_PARAM_DEL_IP;
> +		del_param.param_hdr.length = htons(del_paramlen +
> +		    del_addr_param_len);
> +		del_param.crr_id = i;
> +
> +		sctp_addto_chunk(retval, del_paramlen, &del_param);
> +		sctp_addto_chunk(retval, del_addr_param_len, &del_addr_param);
> +	}
>  	return retval;
>  }
>  
> @@ -3193,7 +3223,8 @@ static void sctp_asconf_param_success(struct sctp_association *asoc,
>  		local_bh_enable();
>  		list_for_each_entry(transport, &asoc->peer.transport_addr_list,
>  				transports) {
> -			if (transport->state == SCTP_ACTIVE)
> +			if (transport->state == SCTP_ACTIVE &&
> +			    !asoc->src_out_of_asoc_ok)
>  				continue;
>  			dst_release(transport->dst);
>  			sctp_transport_route(transport, NULL,
> @@ -3203,6 +3234,11 @@ static void sctp_asconf_param_success(struct sctp_association *asoc,
>  	case SCTP_PARAM_DEL_IP:
>  		local_bh_disable();
>  		sctp_del_bind_addr(bp, &addr);
> +		if (asoc->asconf_addr_del_pending != NULL &&
> +		    sctp_cmp_addr_exact(asoc->asconf_addr_del_pending, &addr)) {
> +			kfree(asoc->asconf_addr_del_pending);
> +			asoc->asconf_addr_del_pending = NULL;
> +		}
>  		local_bh_enable();
>  		list_for_each_entry(transport, &asoc->peer.transport_addr_list,
>  				transports) {
> @@ -3361,6 +3397,9 @@ int sctp_process_asconf_ack(struct sctp_association *asoc,
>  		asconf_len -= length;
>  	}
>  
> +	if (no_err && asoc->src_out_of_asoc_ok)
> +		asoc->src_out_of_asoc_ok = 0;
> +
>  	/* Free the cached last sent asconf chunk. */
>  	list_del_init(&asconf->transmitted_list);
>  	sctp_chunk_free(asconf);
> diff --git a/net/sctp/socket.c b/net/sctp/socket.c
> index 3951a10..481293d 100644
> --- a/net/sctp/socket.c
> +++ b/net/sctp/socket.c
> @@ -583,10 +583,6 @@ static int sctp_send_asconf_add_ip(struct sock		*sk,
>  			goto out;
>  		}
>  
> -		retval = sctp_send_asconf(asoc, chunk);
> -		if (retval)
> -			goto out;
> -
>  		/* Add the new addresses to the bind address list with
>  		 * use_as_src set to 0.
>  		 */
> @@ -599,6 +595,23 @@ static int sctp_send_asconf_add_ip(struct sock		*sk,
>  						    SCTP_ADDR_NEW, GFP_ATOMIC);
>  			addr_buf += af->sockaddr_len;
>  		}
> +		if (asoc->src_out_of_asoc_ok) {
> +			struct sctp_transport *trans;
> +
> +			list_for_each_entry(trans,
> +			    &asoc->peer.transport_addr_list, transports) {
> +				/* Clear the source and route cache */
> +				dst_release(trans->dst);
> +				trans->cwnd = min(4*asoc->pathmtu, max_t(__u32,
> +				    2*asoc->pathmtu, 4380));
> +				trans->ssthresh = asoc->peer.i.a_rwnd;
> +				trans->rto = asoc->rto_initial;
> +				trans->rtt = trans->srtt = trans->rttvar = 0;
> +				sctp_transport_route(trans, NULL,
> +				    sctp_sk(asoc->base.sk));
> +			}
> +		}
> +		retval = sctp_send_asconf(asoc, chunk);
>  	}
>  
>  out:
> @@ -711,7 +724,9 @@ static int sctp_send_asconf_del_ip(struct sock		*sk,
>  	struct sctp_sockaddr_entry *saddr;
>  	int 			i;
>  	int 			retval = 0;
> +	int			stored = 0;
>  
> +	chunk = NULL;
>  	if (!sctp_addip_enable)
>  		return retval;
>  
> @@ -762,8 +777,32 @@ static int sctp_send_asconf_del_ip(struct sock		*sk,
>  		bp = &asoc->base.bind_addr;
>  		laddr = sctp_find_unmatch_addr(bp, (union sctp_addr *)addrs,
>  					       addrcnt, sp);
> -		if (!laddr)
> -			continue;
> +		if ((laddr == NULL) && (addrcnt == 1)) {
> +			if (asoc->asconf_addr_del_pending)
> +				continue;
> +			asoc->asconf_addr_del_pending =
> +			    kzalloc(sizeof(union sctp_addr), GFP_ATOMIC);
> +			asoc->asconf_addr_del_pending->sa.sa_family =
> +				    addrs->sa_family;
> +			asoc->asconf_addr_del_pending->v4.sin_port =
> +				    htons(bp->port);
> +			if (addrs->sa_family == AF_INET) {
> +				struct sockaddr_in *sin;
> +
> +				sin = (struct sockaddr_in *)addrs;
> +				asoc->asconf_addr_del_pending->v4.sin_addr.s_addr = sin->sin_addr.s_addr;
> +			} else if (addrs->sa_family == AF_INET6) {
> +				struct sockaddr_in6 *sin6;
> +
> +				sin6 = (struct sockaddr_in6 *)addrs;
> +				ipv6_addr_copy(&asoc->asconf_addr_del_pending->v6.sin6_addr, &sin6->sin6_addr);
> +			}
> +			SCTP_DEBUG_PRINTK_IPADDR("send_asconf_del_ip: keep the last address asoc: %p ",
> +			    " at %p\n", asoc, asoc->asconf_addr_del_pending,
> +			    asoc->asconf_addr_del_pending);
> +			stored = 1;
> +			goto skip_mkasconf;
> +		}
>  
>  		/* We do not need RCU protection throughout this loop
>  		 * because this is done under a socket lock from the
> @@ -776,6 +815,7 @@ static int sctp_send_asconf_del_ip(struct sock		*sk,
>  			goto out;
>  		}
>  
> +skip_mkasconf:
>  		/* Reset use_as_src flag for the addresses in the bind address
>  		 * list that are to be deleted.
>  		 */
> @@ -801,6 +841,9 @@ static int sctp_send_asconf_del_ip(struct sock		*sk,
>  					     sctp_sk(asoc->base.sk));
>  		}
>  
> +		if (stored)
> +			/* We don't need to transmit ASCONF */
> +			continue;
>  		retval = sctp_send_asconf(asoc, chunk);
>  	}
>  out:
>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>

^ permalink raw reply

* Re: how to set vlan filter for intel 82599
From: Ben Hutchings @ 2011-04-26  2:57 UTC (permalink / raw)
  To: zhou rui; +Cc: netdev
In-Reply-To: <BANLkTikZcdda5-3NBaXyKmij09f62Da3cQ@mail.gmail.com>

On Tue, 2011-04-26 at 10:19 +0800, zhou rui wrote:
> hi
> here is the problem troubles me,how to set vlan filter for intel
> 82599? for example
> I want vlan id 0~31 will go to queue 0, vlan id 32-63 will go to queue
> 1...below is my setting,but doesn't work
> 
> don't know the exact meanning of the vlan-mask and vlan,how are they calculated?
> 
> ./ethtool -K eth5 ntuple on
> 
> ./ethtool -U eth5 flow-type udp4 src-ip 0x0 src-ip-mask 0x0 dst-ip 0x0
> dst-ip-mask 0x0 src-port 0x0 src-port-mask 0x0 dst-port 0x0
> dst-port-mask 0x0 vlan 0x0000 vlan-mask 0x00E0 user-def 0x0
> user-def-mask 0x0 action 0
[...]

This specifies a filter for UDP/IPv4 packets, and the masks are wrong.
If you actually wanted to filter only UDP/IPv4 packets for VID 0-31 then
the correct syntax would be:

    ethtool -U eth5 flow-type udp4 vlan 0 vlan-mask 0xf01f

If you don't care about the layer 3/4 protocols then you would need to
use 'flow-type ether', but no driver implements that yet.  (Well, sfc
implements the *type*, but not filtering by VID only.)

Ben.

-- 
Ben Hutchings, Senior Software Engineer, Solarflare
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.


^ permalink raw reply

* how to set vlan filter for intel 82599
From: zhou rui @ 2011-04-26  2:19 UTC (permalink / raw)
  To: netdev

hi
here is the problem troubles me,how to set vlan filter for intel
82599? for example
I want vlan id 0~31 will go to queue 0, vlan id 32-63 will go to queue
1...below is my setting,but doesn't work

don't know the exact meanning of the vlan-mask and vlan,how are they calculated?

./ethtool -K eth5 ntuple on

./ethtool -U eth5 flow-type udp4 src-ip 0x0 src-ip-mask 0x0 dst-ip 0x0
dst-ip-mask 0x0 src-port 0x0 src-port-mask 0x0 dst-port 0x0
dst-port-mask 0x0 vlan 0x0000 vlan-mask 0x00E0 user-def 0x0
user-def-mask 0x0 action 0
./ethtool -U eth5 flow-type udp4 src-ip 0x0 src-ip-mask 0x0 dst-ip 0x0
dst-ip-mask 0x0 src-port 0x0 src-port-mask 0x0 dst-port 0x0
dst-port-mask 0x0 vlan 0x0020 vlan-mask 0x00E0 user-def 0x0
user-def-mask 0x0 action 1
./ethtool -U eth5 flow-type udp4 src-ip 0x0 src-ip-mask 0x0 dst-ip 0x0
dst-ip-mask 0x0 src-port 0x0 src-port-mask 0x0 dst-port 0x0
dst-port-mask 0x0 vlan 0x0040 vlan-mask 0x00E0 user-def 0x0
user-def-mask 0x0 action 2
./ethtool -U eth5 flow-type udp4 src-ip 0x0 src-ip-mask 0x0 dst-ip 0x0
dst-ip-mask 0x0 src-port 0x0 src-port-mask 0x0 dst-port 0x0
dst-port-mask 0x0 vlan 0x0060 vlan-mask 0x00E0 user-def 0x0
user-def-mask 0x0 action 3
./ethtool -U eth5 flow-type udp4 src-ip 0x0 src-ip-mask 0x0 dst-ip 0x0
dst-ip-mask 0x0 src-port 0x0 src-port-mask 0x0 dst-port 0x0
dst-port-mask 0x0 vlan 0x0080 vlan-mask 0x00E0 user-def 0x0
user-def-mask 0x0 action 4
./ethtool -U eth5 flow-type udp4 src-ip 0x0 src-ip-mask 0x0 dst-ip 0x0
dst-ip-mask 0x0 src-port 0x0 src-port-mask 0x0 dst-port 0x0
dst-port-mask 0x0 vlan 0x00A0 vlan-mask 0x00E0 user-def 0x0
user-def-mask 0x0 action 5
./ethtool -U eth5 flow-type udp4 src-ip 0x0 src-ip-mask 0x0 dst-ip 0x0
dst-ip-mask 0x0 src-port 0x0 src-port-mask 0x0 dst-port 0x0
dst-port-mask 0x0 vlan 0x00C0 vlan-mask 0x00E0 user-def 0x0
user-def-mask 0x0 action 6
./ethtool -U eth5 flow-type udp4 src-ip 0x0 src-ip-mask 0x0 dst-ip 0x0
dst-ip-mask 0x0 src-port 0x0 src-port-mask 0x0 dst-port 0x0
dst-port-mask 0x0 vlan 0x00E0 vlan-mask 0x00E0 user-def 0x0
user-def-mask 0x0 action 6

thanks
rui

^ permalink raw reply

* Re: [PATCH] netfilter/IPv6:  initialize TOS field in REJECT target module
From: Fernando Luis Vazquez Cao @ 2011-04-26  1:26 UTC (permalink / raw)
  To: Pablo Neira Ayuso
  Cc: Eric Dumazet, netfilter-devel, netdev, yoshfuji, jengelh, davem
In-Reply-To: <4DB61C2C.7060508@netfilter.org>

On Tue, 2011-04-26 at 03:13 +0200, Pablo Neira Ayuso wrote:
> On 22/04/11 10:37, Eric Dumazet wrote:
> > Le vendredi 22 avril 2011 à 17:11 +0900, Fernando Luis Vazquez Cao a
> > écrit :
> > 
> >> Thank you!
> >>
> >> Should we send these two patches to -stable too?
> > 
> > David takes care of stable submissions for netdev stuff, thanks.
> 
> If the patch follows the netfilter path, we'll take care of sending
> stable submissions.

David, will you take care of these two patches or should they go through
the netfilter tree?

--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH] netfilter/IPv6:  initialize TOS field in REJECT target module
From: Pablo Neira Ayuso @ 2011-04-26  1:13 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Fernando Luis Vazquez Cao, netfilter-devel, netdev, yoshfuji,
	jengelh, davem
In-Reply-To: <1303461461.3134.15.camel@edumazet-laptop>

On 22/04/11 10:37, Eric Dumazet wrote:
> Le vendredi 22 avril 2011 à 17:11 +0900, Fernando Luis Vazquez Cao a
> écrit :
> 
>> Thank you!
>>
>> Should we send these two patches to -stable too?
> 
> David takes care of stable submissions for netdev stuff, thanks.

If the patch follows the netfilter path, we'll take care of sending
stable submissions.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH net-next-2.6 v5 3/5] sctp: Add socket option operation for Auto-ASCONF
From: Wei Yongjun @ 2011-04-26  0:35 UTC (permalink / raw)
  To: Michio Honda; +Cc: netdev, lksctp-developers
In-Reply-To: <0A966B15-C985-40AC-9402-CAE9BA7F2AC2@sfc.wide.ad.jp>

Hi, Michio

> This patch allows the application to operate Auto-ASCONF on/off behavior via setsockopt() and getsockopt().  

You should update your net-next tree first, and then
create patch base on the latest source code.

> +#define SCTP_AUTO_ASCONF       29

29 has been assigned to other socket option.

>  
>

^ permalink raw reply

* [PATCH net-next 0/6] tg3: TSO loopback and EEH support
From: Matt Carlson @ 2011-04-25 22:42 UTC (permalink / raw)
  To: davem; +Cc: netdev, mcarlson

This patchset implements TSO loopback support into the selftest.  It also
adds EEH support.



^ permalink raw reply

* [PATCH net-next 6/6] tg3: Update version to 3.118
From: Matt Carlson @ 2011-04-25 22:42 UTC (permalink / raw)
  To: davem; +Cc: netdev, mcarlson

This patch updates the tg3 version to 3.118.

Signed-off-by: Matt Carlson <mcarlson@broadcom.com>
Reviewed-by: Michael Chan <mchan@broadcom.com>
Reviewed-by: Benjamin Li <benli@broadcom.com>
---
 drivers/net/tg3.c |    4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 696be59..b20538a 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -64,10 +64,10 @@
 
 #define DRV_MODULE_NAME		"tg3"
 #define TG3_MAJ_NUM			3
-#define TG3_MIN_NUM			117
+#define TG3_MIN_NUM			118
 #define DRV_MODULE_VERSION	\
 	__stringify(TG3_MAJ_NUM) "." __stringify(TG3_MIN_NUM)
-#define DRV_MODULE_RELDATE	"January 25, 2011"
+#define DRV_MODULE_RELDATE	"April 22, 2011"
 
 #define TG3_DEF_MAC_MODE	0
 #define TG3_DEF_RX_MODE		0
-- 
1.7.3.4



^ permalink raw reply related

* [PATCH net-next 2/6] tg3: Organize loopback test failure flags
From: Matt Carlson @ 2011-04-25 22:42 UTC (permalink / raw)
  To: davem; +Cc: netdev, mcarlson

As more test modes are added to each loopback mode, the need to
organise the results increases.  This patch groups the results by
loopback mode, and then by test mode.

Signed-off-by: Matt Carlson <mcarlson@broadcom.com>
Reviewed-by: Michael Chan <mchan@broadcom.com>
Reviewed-by: Benjamin Li <benli@broadcom.com>
---
 drivers/net/tg3.c |   20 ++++++++++++--------
 1 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index a72d031..88cd231 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -11276,10 +11276,12 @@ out:
 	return err;
 }
 
-#define TG3_MAC_LOOPBACK_FAILED		1
-#define TG3_PHY_LOOPBACK_FAILED		2
-#define TG3_LOOPBACK_FAILED		(TG3_MAC_LOOPBACK_FAILED |	\
-					 TG3_PHY_LOOPBACK_FAILED)
+#define TG3_STD_LOOPBACK_FAILED		1
+#define TG3_JMB_LOOPBACK_FAILED		2
+
+#define TG3_MAC_LOOPBACK_SHIFT		0
+#define TG3_PHY_LOOPBACK_SHIFT		4
+#define TG3_LOOPBACK_FAILED			0x00000033
 
 static int tg3_test_loopback(struct tg3 *tp)
 {
@@ -11338,11 +11340,11 @@ static int tg3_test_loopback(struct tg3 *tp)
 	}
 
 	if (tg3_run_loopback(tp, ETH_FRAME_LEN, TG3_MAC_LOOPBACK))
-		err |= TG3_MAC_LOOPBACK_FAILED;
+		err |= TG3_STD_LOOPBACK_FAILED << TG3_MAC_LOOPBACK_SHIFT;
 
 	if ((tp->tg3_flags & TG3_FLAG_JUMBO_RING_ENABLE) &&
 	    tg3_run_loopback(tp, 9000 + ETH_HLEN, TG3_MAC_LOOPBACK))
-		err |= (TG3_MAC_LOOPBACK_FAILED << 2);
+		err |= TG3_JMB_LOOPBACK_FAILED << TG3_MAC_LOOPBACK_SHIFT;
 
 	if (tp->tg3_flags & TG3_FLAG_CPMU_PRESENT) {
 		tw32(TG3_CPMU_CTRL, cpmuctrl);
@@ -11354,10 +11356,12 @@ static int tg3_test_loopback(struct tg3 *tp)
 	if (!(tp->phy_flags & TG3_PHYFLG_PHY_SERDES) &&
 	    !(tp->tg3_flags3 & TG3_FLG3_USE_PHYLIB)) {
 		if (tg3_run_loopback(tp, ETH_FRAME_LEN, TG3_PHY_LOOPBACK))
-			err |= TG3_PHY_LOOPBACK_FAILED;
+			err |= TG3_STD_LOOPBACK_FAILED <<
+			       TG3_PHY_LOOPBACK_SHIFT;
 		if ((tp->tg3_flags & TG3_FLAG_JUMBO_RING_ENABLE) &&
 		    tg3_run_loopback(tp, 9000 + ETH_HLEN, TG3_PHY_LOOPBACK))
-			err |= (TG3_PHY_LOOPBACK_FAILED << 2);
+			err |= TG3_JMB_LOOPBACK_FAILED <<
+			       TG3_PHY_LOOPBACK_SHIFT;
 	}
 
 	/* Re-enable gphy autopowerdown. */
-- 
1.7.3.4



^ permalink raw reply related

* [PATCH net-next 4/6] tg3: Add EEH support
From: Matt Carlson @ 2011-04-25 22:42 UTC (permalink / raw)
  To: davem; +Cc: netdev, mcarlson

This patch adds EEH support to the tg3 driver.

Signed-off-by: Matt Carlson <mcarlson@broadcom.com>
Reviewed-by: Michael Chan <mchan@broadcom.com>
Reviewed-by: Benjamin Li <benli@broadcom.com>
---
 drivers/net/tg3.c |  147 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 147 insertions(+), 0 deletions(-)

diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index fb2139a..6bc43ed 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -15395,6 +15395,8 @@ static int __devinit tg3_init_one(struct pci_dev *pdev,
 		    pdev->dma_mask == DMA_BIT_MASK(32) ? 32 :
 		    ((u64)pdev->dma_mask) == DMA_BIT_MASK(40) ? 40 : 64);
 
+	pci_save_state(pdev);
+
 	return 0;
 
 err_out_apeunmap:
@@ -15551,11 +15553,156 @@ static SIMPLE_DEV_PM_OPS(tg3_pm_ops, tg3_suspend, tg3_resume);
 
 #endif /* CONFIG_PM_SLEEP */
 
+/**
+ * tg3_io_error_detected - called when PCI error is detected
+ * @pdev: Pointer to PCI device
+ * @state: The current pci connection state
+ *
+ * This function is called after a PCI bus error affecting
+ * this device has been detected.
+ */
+static pci_ers_result_t tg3_io_error_detected(struct pci_dev *pdev,
+					      pci_channel_state_t state)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct tg3 *tp = netdev_priv(netdev);
+	pci_ers_result_t err = PCI_ERS_RESULT_NEED_RESET;
+
+	netdev_info(netdev, "PCI I/O error detected\n");
+
+	rtnl_lock();
+
+	if (!netif_running(netdev))
+		goto done;
+
+	tg3_phy_stop(tp);
+
+	tg3_netif_stop(tp);
+
+	del_timer_sync(&tp->timer);
+	tp->tg3_flags2 &= ~TG3_FLG2_RESTART_TIMER;
+
+	/* Want to make sure that the reset task doesn't run */
+	cancel_work_sync(&tp->reset_task);
+	tp->tg3_flags  &= ~TG3_FLAG_TX_RECOVERY_PENDING;
+	tp->tg3_flags2 &= ~TG3_FLG2_RESTART_TIMER;
+
+	netif_device_detach(netdev);
+
+	/* Clean up software state, even if MMIO is blocked */
+	tg3_full_lock(tp, 0);
+	tg3_halt(tp, RESET_KIND_SHUTDOWN, 0);
+	tg3_full_unlock(tp);
+
+done:
+	if (state == pci_channel_io_perm_failure)
+		err = PCI_ERS_RESULT_DISCONNECT;
+	else
+		pci_disable_device(pdev);
+
+	rtnl_unlock();
+
+	return err;
+}
+
+/**
+ * tg3_io_slot_reset - called after the pci bus has been reset.
+ * @pdev: Pointer to PCI device
+ *
+ * Restart the card from scratch, as if from a cold-boot.
+ * At this point, the card has exprienced a hard reset,
+ * followed by fixups by BIOS, and has its config space
+ * set up identically to what it was at cold boot.
+ */
+static pci_ers_result_t tg3_io_slot_reset(struct pci_dev *pdev)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct tg3 *tp = netdev_priv(netdev);
+	pci_ers_result_t rc = PCI_ERS_RESULT_DISCONNECT;
+	int err;
+
+	rtnl_lock();
+
+	if (pci_enable_device(pdev)) {
+		netdev_err(netdev, "Cannot re-enable PCI device after reset.\n");
+		goto done;
+	}
+
+	pci_set_master(pdev);
+	pci_restore_state(pdev);
+	pci_save_state(pdev);
+
+	if (!netif_running(netdev)) {
+		rc = PCI_ERS_RESULT_RECOVERED;
+		goto done;
+	}
+
+	err = tg3_power_up(tp);
+	if (err) {
+		netdev_err(netdev, "Failed to restore register access.\n");
+		goto done;
+	}
+
+	rc = PCI_ERS_RESULT_RECOVERED;
+
+done:
+	rtnl_unlock();
+
+	return rc;
+}
+
+/**
+ * tg3_io_resume - called when traffic can start flowing again.
+ * @pdev: Pointer to PCI device
+ *
+ * This callback is called when the error recovery driver tells
+ * us that its OK to resume normal operation.
+ */
+static void tg3_io_resume(struct pci_dev *pdev)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct tg3 *tp = netdev_priv(netdev);
+	int err;
+
+	rtnl_lock();
+
+	if (!netif_running(netdev))
+		goto done;
+
+	tg3_full_lock(tp, 0);
+	tp->tg3_flags |= TG3_FLAG_INIT_COMPLETE;
+	err = tg3_restart_hw(tp, 1);
+	tg3_full_unlock(tp);
+	if (err) {
+		netdev_err(netdev, "Cannot restart hardware after reset.\n");
+		goto done;
+	}
+
+	netif_device_attach(netdev);
+
+	tp->timer.expires = jiffies + tp->timer_offset;
+	add_timer(&tp->timer);
+
+	tg3_netif_start(tp);
+
+	tg3_phy_start(tp);
+
+done:
+	rtnl_unlock();
+}
+
+static struct pci_error_handlers tg3_err_handler = {
+	.error_detected	= tg3_io_error_detected,
+	.slot_reset	= tg3_io_slot_reset,
+	.resume		= tg3_io_resume
+};
+
 static struct pci_driver tg3_driver = {
 	.name		= DRV_MODULE_NAME,
 	.id_table	= tg3_pci_tbl,
 	.probe		= tg3_init_one,
 	.remove		= __devexit_p(tg3_remove_one),
+	.err_handler	= &tg3_err_handler,
 	.driver.pm	= TG3_PM_OPS,
 };
 
-- 
1.7.3.4



^ permalink raw reply related

* [PATCH net-next 3/6] tg3: Add TSO loopback test
From: Matt Carlson @ 2011-04-25 22:42 UTC (permalink / raw)
  To: davem; +Cc: netdev, mcarlson, Benjamin Li, Michael Chan

This patch adds code to exercise the TSO portion of the device through
a phy loopback test.

Signed-off-by: Matt Carlson <mcarlson@broadcom.com>
Signed-off-by: Benjamin Li <benli@broadcom.com>
Signed-off-by: Michael Chan <mchan@broadcom.com>
---
 drivers/net/tg3.c |  158 +++++++++++++++++++++++++++++++++++++++++------------
 1 files changed, 123 insertions(+), 35 deletions(-)

diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 88cd231..fb2139a 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -11076,11 +11076,35 @@ static int tg3_test_memory(struct tg3 *tp)
 
 #define TG3_MAC_LOOPBACK	0
 #define TG3_PHY_LOOPBACK	1
+#define TG3_TSO_LOOPBACK	2
+
+#define TG3_TSO_MSS		500
+
+#define TG3_TSO_IP_HDR_LEN	20
+#define TG3_TSO_TCP_HDR_LEN	20
+#define TG3_TSO_TCP_OPT_LEN	12
+
+static const u8 tg3_tso_header[] = {
+0x08, 0x00,
+0x45, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x40, 0x00,
+0x40, 0x06, 0x00, 0x00,
+0x0a, 0x00, 0x00, 0x01,
+0x0a, 0x00, 0x00, 0x02,
+0x0d, 0x00, 0xe0, 0x00,
+0x00, 0x00, 0x01, 0x00,
+0x00, 0x00, 0x02, 0x00,
+0x80, 0x10, 0x10, 0x00,
+0x14, 0x09, 0x00, 0x00,
+0x01, 0x01, 0x08, 0x0a,
+0x11, 0x11, 0x11, 0x11,
+0x11, 0x11, 0x11, 0x11,
+};
 
 static int tg3_run_loopback(struct tg3 *tp, u32 pktsz, int loopback_mode)
 {
 	u32 mac_mode, rx_start_idx, rx_idx, tx_idx, opaque_key;
-	u32 desc_idx, coal_now;
+	u32 base_flags = 0, mss = 0, desc_idx, coal_now, data_off, val;
 	struct sk_buff *skb, *rx_skb;
 	u8 *tx_data;
 	dma_addr_t map;
@@ -11119,9 +11143,7 @@ static int tg3_run_loopback(struct tg3 *tp, u32 pktsz, int loopback_mode)
 		else
 			mac_mode |= MAC_MODE_PORT_MODE_GMII;
 		tw32(MAC_MODE, mac_mode);
-	} else if (loopback_mode == TG3_PHY_LOOPBACK) {
-		u32 val;
-
+	} else {
 		if (tp->phy_flags & TG3_PHYFLG_IS_FET) {
 			tg3_phy_fet_toggle_apd(tp, false);
 			val = BMCR_LOOPBACK | BMCR_FULLDPLX | BMCR_SPEED100;
@@ -11169,8 +11191,6 @@ static int tg3_run_loopback(struct tg3 *tp, u32 pktsz, int loopback_mode)
 				break;
 			mdelay(1);
 		}
-	} else {
-		return -EINVAL;
 	}
 
 	err = -EIO;
@@ -11186,7 +11206,54 @@ static int tg3_run_loopback(struct tg3 *tp, u32 pktsz, int loopback_mode)
 
 	tw32(MAC_RX_MTU_SIZE, tx_len + ETH_FCS_LEN);
 
-	for (i = 14; i < tx_len; i++)
+	if (loopback_mode == TG3_TSO_LOOPBACK) {
+		struct iphdr *iph = (struct iphdr *)&tx_data[ETH_HLEN];
+
+		u32 hdr_len = TG3_TSO_IP_HDR_LEN + TG3_TSO_TCP_HDR_LEN +
+			      TG3_TSO_TCP_OPT_LEN;
+
+		memcpy(tx_data + ETH_ALEN * 2, tg3_tso_header,
+		       sizeof(tg3_tso_header));
+		mss = TG3_TSO_MSS;
+
+		val = tx_len - ETH_ALEN * 2 - sizeof(tg3_tso_header);
+		num_pkts = DIV_ROUND_UP(val, TG3_TSO_MSS);
+
+		/* Set the total length field in the IP header */
+		iph->tot_len = htons((u16)(mss + hdr_len));
+
+		base_flags = (TXD_FLAG_CPU_PRE_DMA |
+			      TXD_FLAG_CPU_POST_DMA);
+
+		if (tp->tg3_flags2 & TG3_FLG2_HW_TSO) {
+			struct tcphdr *th;
+			val = ETH_HLEN + TG3_TSO_IP_HDR_LEN;
+			th = (struct tcphdr *)&tx_data[val];
+			th->check = 0;
+		} else
+			base_flags |= TXD_FLAG_TCPUDP_CSUM;
+
+		if (tp->tg3_flags2 & TG3_FLG2_HW_TSO_3) {
+			mss |= (hdr_len & 0xc) << 12;
+			if (hdr_len & 0x10)
+				base_flags |= 0x00000010;
+			base_flags |= (hdr_len & 0x3e0) << 5;
+		} else if (tp->tg3_flags2 & TG3_FLG2_HW_TSO_2)
+			mss |= hdr_len << 9;
+		else if ((tp->tg3_flags2 & TG3_FLG2_HW_TSO_1) ||
+			 GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5705) {
+			mss |= (TG3_TSO_TCP_OPT_LEN << 9);
+		} else {
+			base_flags |= (TG3_TSO_TCP_OPT_LEN << 10);
+		}
+
+		data_off = ETH_ALEN * 2 + sizeof(tg3_tso_header);
+	} else {
+		num_pkts = 1;
+		data_off = ETH_HLEN;
+	}
+
+	for (i = data_off; i < tx_len; i++)
 		tx_data[i] = (u8) (i & 0xff);
 
 	map = pci_map_single(tp->pdev, skb->data, tx_len, PCI_DMA_TODEVICE);
@@ -11202,12 +11269,10 @@ static int tg3_run_loopback(struct tg3 *tp, u32 pktsz, int loopback_mode)
 
 	rx_start_idx = rnapi->hw_status->idx[0].rx_producer;
 
-	num_pkts = 0;
-
-	tg3_set_txd(tnapi, tnapi->tx_prod, map, tx_len, 0, 1);
+	tg3_set_txd(tnapi, tnapi->tx_prod, map, tx_len,
+		    base_flags, (mss << 1) | 1);
 
 	tnapi->tx_prod++;
-	num_pkts++;
 
 	tw32_tx_mbox(tnapi->prodmbox, tnapi->tx_prod);
 	tr32_mailbox(tnapi->prodmbox);
@@ -11237,38 +11302,56 @@ static int tg3_run_loopback(struct tg3 *tp, u32 pktsz, int loopback_mode)
 	if (rx_idx != rx_start_idx + num_pkts)
 		goto out;
 
-	desc = &rnapi->rx_rcb[rx_start_idx];
-	desc_idx = desc->opaque & RXD_OPAQUE_INDEX_MASK;
-	opaque_key = desc->opaque & RXD_OPAQUE_RING_MASK;
+	val = data_off;
+	while (rx_idx != rx_start_idx) {
+		desc = &rnapi->rx_rcb[rx_start_idx++];
+		desc_idx = desc->opaque & RXD_OPAQUE_INDEX_MASK;
+		opaque_key = desc->opaque & RXD_OPAQUE_RING_MASK;
 
-	if ((desc->err_vlan & RXD_ERR_MASK) != 0 &&
-	    (desc->err_vlan != RXD_ERR_ODD_NIBBLE_RCVD_MII))
-		goto out;
+		if ((desc->err_vlan & RXD_ERR_MASK) != 0 &&
+		    (desc->err_vlan != RXD_ERR_ODD_NIBBLE_RCVD_MII))
+			goto out;
 
-	rx_len = ((desc->idx_len & RXD_LEN_MASK) >> RXD_LEN_SHIFT) - 4;
-	if (rx_len != tx_len)
-		goto out;
+		rx_len = ((desc->idx_len & RXD_LEN_MASK) >> RXD_LEN_SHIFT)
+			 - ETH_FCS_LEN;
 
-	if (pktsz <= TG3_RX_STD_DMA_SZ - ETH_FCS_LEN) {
-		if (opaque_key != RXD_OPAQUE_RING_STD)
-			goto out;
+		if (loopback_mode != TG3_TSO_LOOPBACK) {
+			if (rx_len != tx_len)
+				goto out;
 
-		rx_skb = tpr->rx_std_buffers[desc_idx].skb;
-		map = dma_unmap_addr(&tpr->rx_std_buffers[desc_idx], mapping);
-	} else {
-		if (opaque_key != RXD_OPAQUE_RING_JUMBO)
+			if (pktsz <= TG3_RX_STD_DMA_SZ - ETH_FCS_LEN) {
+				if (opaque_key != RXD_OPAQUE_RING_STD)
+					goto out;
+			} else {
+				if (opaque_key != RXD_OPAQUE_RING_JUMBO)
+					goto out;
+			}
+		} else if ((desc->type_flags & RXD_FLAG_TCPUDP_CSUM) &&
+			   (desc->ip_tcp_csum & RXD_TCPCSUM_MASK)
+			    >> RXD_TCPCSUM_SHIFT == 0xffff) {
 			goto out;
+		}
 
-		rx_skb = tpr->rx_jmb_buffers[desc_idx].skb;
-		map = dma_unmap_addr(&tpr->rx_jmb_buffers[desc_idx], mapping);
-	}
+		if (opaque_key == RXD_OPAQUE_RING_STD) {
+			rx_skb = tpr->rx_std_buffers[desc_idx].skb;
+			map = dma_unmap_addr(&tpr->rx_std_buffers[desc_idx],
+					     mapping);
+		} else if (opaque_key == RXD_OPAQUE_RING_JUMBO) {
+			rx_skb = tpr->rx_jmb_buffers[desc_idx].skb;
+			map = dma_unmap_addr(&tpr->rx_jmb_buffers[desc_idx],
+					     mapping);
+		} else
+			goto out;
 
-	pci_dma_sync_single_for_cpu(tp->pdev, map, rx_len, PCI_DMA_FROMDEVICE);
+		pci_dma_sync_single_for_cpu(tp->pdev, map, rx_len,
+					    PCI_DMA_FROMDEVICE);
 
-	for (i = 14; i < tx_len; i++) {
-		if (*(rx_skb->data + i) != (u8) (i & 0xff))
-			goto out;
+		for (i = data_off; i < rx_len; i++, val++) {
+			if (*(rx_skb->data + i) != (u8) (val & 0xff))
+				goto out;
+		}
 	}
+
 	err = 0;
 
 	/* tg3_free_rings will unmap and free the rx_skb */
@@ -11278,10 +11361,11 @@ out:
 
 #define TG3_STD_LOOPBACK_FAILED		1
 #define TG3_JMB_LOOPBACK_FAILED		2
+#define TG3_TSO_LOOPBACK_FAILED		4
 
 #define TG3_MAC_LOOPBACK_SHIFT		0
 #define TG3_PHY_LOOPBACK_SHIFT		4
-#define TG3_LOOPBACK_FAILED			0x00000033
+#define TG3_LOOPBACK_FAILED		0x00000077
 
 static int tg3_test_loopback(struct tg3 *tp)
 {
@@ -11358,6 +11442,10 @@ static int tg3_test_loopback(struct tg3 *tp)
 		if (tg3_run_loopback(tp, ETH_FRAME_LEN, TG3_PHY_LOOPBACK))
 			err |= TG3_STD_LOOPBACK_FAILED <<
 			       TG3_PHY_LOOPBACK_SHIFT;
+		if ((tp->tg3_flags2 & TG3_FLG2_TSO_CAPABLE) &&
+		    tg3_run_loopback(tp, ETH_FRAME_LEN, TG3_TSO_LOOPBACK))
+			err |= TG3_TSO_LOOPBACK_FAILED <<
+			       TG3_PHY_LOOPBACK_SHIFT;
 		if ((tp->tg3_flags & TG3_FLAG_JUMBO_RING_ENABLE) &&
 		    tg3_run_loopback(tp, 9000 + ETH_HLEN, TG3_PHY_LOOPBACK))
 			err |= TG3_JMB_LOOPBACK_FAILED <<
-- 
1.7.3.4



^ permalink raw reply related

* [PATCH net-next 5/6] tg3: Whitespace cleanups
From: Matt Carlson @ 2011-04-25 22:42 UTC (permalink / raw)
  To: davem; +Cc: netdev, mcarlson

This patch gets rid of some harmless whitespace errors.

Signed-off-by: Matt Carlson <mcarlson@broadcom.com>
Reviewed-by: Michael Chan <mchan@broadcom.com>
Reviewed-by: Benjamin Li <benli@broadcom.com>
---
 drivers/net/tg3.c |    9 ++-------
 1 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 6bc43ed..696be59 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -7666,8 +7666,6 @@ static int tg3_load_5701_a0_firmware_fix(struct tg3 *tp)
 	return 0;
 }
 
-/* 5705 needs a special version of the TSO firmware.  */
-
 /* tp->lock is held. */
 static int tg3_load_tso_firmware(struct tg3 *tp)
 {
@@ -10179,7 +10177,6 @@ static int tg3_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
 		tp->tg3_flags &= ~TG3_FLAG_WOL_ENABLE;
 	spin_unlock_bh(&tp->lock);
 
-
 	return 0;
 }
 
@@ -12925,7 +12922,7 @@ static void __devinit tg3_get_eeprom_hw_cfg(struct tg3 *tp)
 done:
 	if (tp->tg3_flags & TG3_FLAG_WOL_CAP)
 		device_set_wakeup_enable(&tp->pdev->dev,
-				 tp->tg3_flags & TG3_FLAG_WOL_ENABLE);
+					 tp->tg3_flags & TG3_FLAG_WOL_ENABLE);
 	else
 		device_set_wakeup_capable(&tp->pdev->dev, false);
 }
@@ -13749,7 +13746,6 @@ static int __devinit tg3_get_invariants(struct tg3 *tp)
 	    (tp->tg3_flags2 & TG3_FLG2_5780_CLASS))
 		tp->tg3_flags2 |= TG3_FLG2_5750_PLUS;
 
-
 	if ((GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5705) ||
 	    (tp->tg3_flags2 & TG3_FLG2_5750_PLUS))
 		tp->tg3_flags2 |= TG3_FLG2_5705_PLUS;
@@ -14034,7 +14030,7 @@ static int __devinit tg3_get_invariants(struct tg3 *tp)
 	    (tp->tg3_flags3 & TG3_FLG3_57765_PLUS))
 		tp->tg3_flags |= TG3_FLAG_CPMU_PRESENT;
 
-	/* Set up tp->grc_local_ctrl before calling tg_power_up().
+	/* Set up tp->grc_local_ctrl before calling tg3_power_up().
 	 * GPIO1 driven high will bring 5700's external PHY out of reset.
 	 * It is also used as eeprom write protect on LOMs.
 	 */
@@ -14829,7 +14825,6 @@ static int __devinit tg3_test_dma(struct tg3 *tp)
 	}
 	if ((tp->dma_rwctrl & DMA_RWCTRL_WRITE_BNDRY_MASK) !=
 	    DMA_RWCTRL_WRITE_BNDRY_16) {
-
 		/* DMA test passed without adjusting DMA boundary,
 		 * now look for chipsets that are known to expose the
 		 * DMA bug without failing the test.
-- 
1.7.3.4



^ permalink raw reply related

* [PATCH net-next 1/6] tg3: Fix int generation hw bug for 5719 / 5720
From: Matt Carlson @ 2011-04-25 22:42 UTC (permalink / raw)
  To: davem; +Cc: netdev, mcarlson

On the 5719 and 5720, there is a bug where the hardware will
misinterpret a status tag update and leave interrupts permanently
disabled.  This patch enables a hardware fix that works around the
issue.

Signed-off-by: Matt Carlson <mcarlson@broadcom.com>
Reviewed-by: Michael Chan <mchan@broadcom.com>
Reviewed-by: Benjamin Li <benli@broadcom.com>
---
 drivers/net/tg3.c |    3 +++
 drivers/net/tg3.h |    1 +
 2 files changed, 4 insertions(+), 0 deletions(-)

diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 693f36e..a72d031 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -8198,6 +8198,9 @@ static int tg3_reset_hw(struct tg3 *tp, int reset_phy)
 		      ~DMA_RWCTRL_DIS_CACHE_ALIGNMENT;
 		if (tp->pci_chip_rev_id == CHIPREV_ID_57765_A0)
 			val &= ~DMA_RWCTRL_CRDRDR_RDMA_MRRS_MSK;
+		if (GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_57765 &&
+		    GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5717)
+			val |= DMA_RWCTRL_TAGGED_STAT_WA;
 		tw32(TG3PCI_DMA_RW_CTRL, val | tp->dma_rwctrl);
 	} else if (GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5784 &&
 		   GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5761) {
diff --git a/drivers/net/tg3.h b/drivers/net/tg3.h
index eaa7669..6f37d2a 100644
--- a/drivers/net/tg3.h
+++ b/drivers/net/tg3.h
@@ -188,6 +188,7 @@
 #define   METAL_REV_B2			 0x02
 #define TG3PCI_DMA_RW_CTRL		0x0000006c
 #define  DMA_RWCTRL_DIS_CACHE_ALIGNMENT  0x00000001
+#define  DMA_RWCTRL_TAGGED_STAT_WA	 0x00000080
 #define  DMA_RWCTRL_CRDRDR_RDMA_MRRS_MSK 0x00000380
 #define  DMA_RWCTRL_READ_BNDRY_MASK	 0x00000700
 #define  DMA_RWCTRL_READ_BNDRY_DISAB	 0x00000000
-- 
1.7.3.4



^ permalink raw reply related

* [RFC PATCH] netlink: Increase netlink dump skb message size
From: Greg Rose @ 2011-04-25 22:01 UTC (permalink / raw)
  To: netdev; +Cc: bhutchings, davem

The message size allocated for rtnl info dumps was limited to a single page.
This is not enough for additional interface info available with devices
that support SR-IOV.  Check that the amount of data allocated is sufficient
for the amount of data requested.

Signed-off-by: Greg Rose <gregory.v.rose@intel.com>
---

 include/linux/rtnetlink.h |    1 +
 net/core/rtnetlink.c      |    6 ++++++
 net/netlink/af_netlink.c  |   37 +++++++++++++++++++++++++++++++------
 3 files changed, 38 insertions(+), 6 deletions(-)

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index bbad657..d1ff937 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -622,6 +622,7 @@ extern int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics);
 extern int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst,
 			      u32 id, u32 ts, u32 tsage, long expires,
 			      u32 error);
+extern size_t rtnl_get_nlmsg_size(const struct net_device *dev);
 
 extern void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const void *data);
 
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index d7c4bb4..001c947 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -764,6 +764,12 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev)
 	       + rtnl_link_get_af_size(dev); /* IFLA_AF_SPEC */
 }
 
+size_t rtnl_get_nlmsg_size(const struct net_device *dev)
+{
+	return if_nlmsg_size(dev);
+}
+EXPORT_SYMBOL(rtnl_get_nlmsg_size);
+
 static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev)
 {
 	struct nlattr *vf_ports;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index c8f35b5..5b1106c 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1664,23 +1664,48 @@ static void netlink_destroy_callback(struct netlink_callback *cb)
 static int netlink_dump(struct sock *sk)
 {
 	struct netlink_sock *nlk = nlk_sk(sk);
+	struct net *net = sock_net(sk);
 	struct netlink_callback *cb;
 	struct sk_buff *skb;
 	struct nlmsghdr *nlh;
+	struct net_device *dev;
+	struct hlist_head *head;
+	struct hlist_node *node;
 	int len, err = -ENOBUFS;
-
-	skb = sock_rmalloc(sk, NLMSG_GOODSIZE, 0, GFP_KERNEL);
-	if (!skb)
-		goto errout;
+	int h, s_h;
+	int idx = 0, s_idx;
+	size_t alloc_size = NLMSG_GOODSIZE;
 
 	mutex_lock(nlk->cb_mutex);
 
 	cb = nlk->cb;
 	if (cb == NULL) {
 		err = -EINVAL;
-		goto errout_skb;
+		goto errout;
 	}
 
+	s_h = cb->args[0];
+	s_idx = cb->args[1];
+
+	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
+		idx = 0;
+		head = &net->dev_index_head[h];
+		hlist_for_each_entry(dev, node, head, index_hlist) {
+			if (idx < s_idx) {
+				idx++;
+				continue;
+			}
+			alloc_size = rtnl_get_nlmsg_size(dev);
+			if (alloc_size < NLMSG_GOODSIZE)
+				alloc_size = NLMSG_GOODSIZE;
+			break;
+		}
+	}
+
+	skb = sock_rmalloc(sk, alloc_size, 0, GFP_KERNEL);
+	if (!skb)
+		goto errout;
+
 	len = cb->dump(skb, cb);
 
 	if (len > 0) {
@@ -1717,9 +1742,9 @@ static int netlink_dump(struct sock *sk)
 	return 0;
 
 errout_skb:
-	mutex_unlock(nlk->cb_mutex);
 	kfree_skb(skb);
 errout:
+	mutex_unlock(nlk->cb_mutex);
 	return err;
 }
 


^ permalink raw reply related

* Re: [Bugme-new] [Bug 33902] New: tcpi_state field in tcp_info structure reports TCP_CLOSE instead of TCP_TIME_WAIT state
From: Andrew Morton @ 2011-04-25 21:34 UTC (permalink / raw)
  To: netdev; +Cc: bugzilla-daemon, bugme-daemon, Dmitry.Izbitsky
In-Reply-To: <bug-33902-10286@https.bugzilla.kernel.org/>


(switched to email.  Please respond via emailed reply-to-all, not via the
bugzilla web interface).

On Mon, 25 Apr 2011 08:08:36 GMT
bugzilla-daemon@bugzilla.kernel.org wrote:

> https://bugzilla.kernel.org/show_bug.cgi?id=33902
> 
>            Summary: tcpi_state field in tcp_info structure reports
>                     TCP_CLOSE instead of TCP_TIME_WAIT state
>            Product: Networking
>            Version: 2.5
>     Kernel Version: 2.6.38
>           Platform: All
>         OS/Version: Linux
>               Tree: Mainline
>             Status: NEW
>           Severity: normal
>           Priority: P1
>          Component: IPV4
>         AssignedTo: shemminger@linux-foundation.org
>         ReportedBy: Dmitry.Izbitsky@oktetlabs.ru
>         Regression: No
> 
> 
> Setup - TCP connection in ESTABLISHED state. Local socket calls
> shutdown(SHUT_RDWR). After that peer calls shutdown(SHUT_RDWR).
> 
> Local socket should now be in TIME_WAIT state (from specification point 
> of view). And it's indeed in TIME_WAIT (TCP_TIME_WAIT) state if we look at 
> /proc/net/tcp (or netstat -t). However, if one tries to get connection state
> via tcp_info (getsockopt(TCP_INFO)) the reported state is CLOSED (TCP_CLOSE).
> 
> Looks like the problem is in tcp_time_wait() function
> (net/ipv4/tcp_minisocks.c).
> It's called with state=TCP_TIME_WAIT, and sets inet_timewaitk_sock
> *tw->tw_state field to TCP_TIME_WAIT. That's why the state is reported
> correctly when looking into /proc. However, at the end it calls tcp_done(sk),
> which itself calls tcp_set_state(TCP_CLOSE), so sk->sk_state is set to
> TCP_CLOSE instead of TCP_TIME_WAIT. And it's reported this way via TCP_INFO
> socket option.
> 
> Problem is reproduced on 2.6.26, 2.6.38 and is probably observed on earlier
> kernels.


^ permalink raw reply

* [PATCH v4 20/27] HFI: Close window hypervisor call
From: dykmanj @ 2011-04-25 21:24 UTC (permalink / raw)
  To: netdev
  Cc: Jim Dykman, Piyush Chaudhary, Fu-Chung Chang,  William S. Cadden,
	 Wen C. Chen, Scot Sakolish, Jian Xiao,  Carol L. Soto,
	 Sarah J. Sheppard
In-Reply-To: <1303766647-30156-1-git-send-email-dykmanj@linux.vnet.ibm.com>

From: Jim Dykman <dykmanj@linux.vnet.ibm.com>

Signed-off-by:  Piyush Chaudhary <piyushc@linux.vnet.ibm.com>
Signed-off-by:  Jim Dykman <dykmanj@linux.vnet.ibm.com>
Signed-off-by:  Fu-Chung Chang <fcchang@linux.vnet.ibm.com>
Signed-off-by:  William S. Cadden <wscadden@linux.vnet.ibm.com>
Signed-off-by:  Wen C. Chen <winstonc@linux.vnet.ibm.com>
Signed-off-by:  Scot Sakolish <sakolish@linux.vnet.ibm.com>
Signed-off-by:  Jian Xiao <jian@linux.vnet.ibm.com>
Signed-off-by:  Carol L. Soto <clsoto@linux.vnet.ibm.com>
Signed-off-by:  Sarah J. Sheppard <sjsheppa@linux.vnet.ibm.com>
---
 drivers/net/hfi/core/hfidd_hcalls.c |   22 ++++++++++++++
 drivers/net/hfi/core/hfidd_proto.h  |    1 +
 drivers/net/hfi/core/hfidd_window.c |   53 +++++++++++++++++++++++++++++++++--
 3 files changed, 73 insertions(+), 3 deletions(-)

diff --git a/drivers/net/hfi/core/hfidd_hcalls.c b/drivers/net/hfi/core/hfidd_hcalls.c
index 1915336..4bc6525 100644
--- a/drivers/net/hfi/core/hfidd_hcalls.c
+++ b/drivers/net/hfi/core/hfidd_hcalls.c
@@ -153,6 +153,17 @@ static inline long long h_hfi_open_window(int token,
 	return rc;
 }
 
+static inline long long h_hfi_close_window(int token,
+		u64 HFI_chip_ID,
+		u64 win_num,
+		u64 flag)
+{
+	return plpar_hcall_norets(token,
+		HFI_chip_ID,
+		win_num,
+		flag);
+}
+
 long long hfi_start_nmmu(u64 chip_id, void *nmmu_info)
 {
 	return h_nmmu_start(H_NMMU_START, chip_id, nmmu_info);
@@ -249,6 +260,17 @@ long long hfi_modify_mr(u64 chip_id, u64 request, u64 mr_handle,
 	return hvrc;
 }
 
+long long hfi_close_window(u64 unit_id, u64 win_id, u64 flag)
+{
+	long long hvrc;
+
+	hvrc = h_hfi_close_window(H_HFI_CLOSE_WINDOW,
+			unit_id,
+			win_id,
+			flag);
+			return hvrc;
+}
+
 long long hfi_free_mr(u64 chip_id, u64 res, u64 mr_handle, u64 sub_region_id)
 {
 	long long	hvrc;
diff --git a/drivers/net/hfi/core/hfidd_proto.h b/drivers/net/hfi/core/hfidd_proto.h
index e065d56..f531dcd 100644
--- a/drivers/net/hfi/core/hfidd_proto.h
+++ b/drivers/net/hfi/core/hfidd_proto.h
@@ -94,6 +94,7 @@ long long hfi_modify_mr(u64 chip_id, u64 request, u64 mr_handle,
 		u64 e_addr,
 		u64 l_addr,
 		u64 num_pg_sz);
+long long hfi_close_window(u64 unit_id, u64 win_id, u64 flag);
 long long hfi_free_mr(u64 chip_id, u64 res, u64 mr_handle,
 		u64 sub_region_id);
 long long hfi_hquery_interface(u64 unit_id, u64 subtype, u64 query_p,
diff --git a/drivers/net/hfi/core/hfidd_window.c b/drivers/net/hfi/core/hfidd_window.c
index 3cfe5c3..fd692eb 100644
--- a/drivers/net/hfi/core/hfidd_window.c
+++ b/drivers/net/hfi/core/hfidd_window.c
@@ -459,6 +459,43 @@ static int hfi_hcall_to_open_window(struct hfidd_acs *p_acs,
 	return 0;
 }
 
+/* Call to CLOSE WINDOW hcall */
+static int hfi_hcall_to_close_window(struct hfidd_acs *p_acs,
+		struct hfidd_window *win_p)
+{
+	int	rc = 0;
+	long long hvrc = 0;
+	u64	start_time = get_jiffies_64();
+
+	hvrc = hfi_close_window(p_acs->dds.hfi_id,
+			win_p->index,
+			H_CLOSE);
+
+	/*
+	 * Need to call CLOSE WINDOW with flag H_CHECK_CLOSED
+	 * to check when the window is completely closed
+	 */
+	while (hvrc == H_BUSY) {
+		hvrc = hfi_close_window(p_acs->dds.hfi_id,
+				win_p->index,
+				H_CHECK_CLOSED);
+		if (hvrc != H_BUSY)
+			break;
+		if (hfidd_age_hcall(start_time))
+			break;
+	}
+
+	if (hvrc != H_SUCCESS) {
+		win_p->state = WIN_FAIL_CLOSE;
+		rc = -EIO;
+		dev_printk(KERN_ERR, p_acs->hfidd_dev,
+			"hfi_hcall_to_close_window: CLOSE WINDOW failed, "
+			"hvrc=0x%llx\n", hvrc);
+	}
+
+	return rc;
+}
+
 /*
  * Map the Effective Address pages for Memory Regions.
  * If more than one page, need to setup a page containing
@@ -1005,7 +1042,7 @@ int hfidd_open_window_func(struct hfidd_acs *p_acs, unsigned int is_userspace,
 		dev_printk(KERN_ERR, p_acs->hfidd_dev,
 			"hfidd_open_window_func: hfi_map_mmio_regs "
 			"failed, rc = 0x%x\n", rc);
-		goto hfidd_open_window_func_err4;
+		goto hfidd_open_window_func_err5;
 	}
 
 	/* tell user the local ISR id */
@@ -1019,7 +1056,7 @@ int hfidd_open_window_func(struct hfidd_acs *p_acs, unsigned int is_userspace,
 		dev_printk(KERN_ERR, p_acs->hfidd_dev,
 			"hfidd_open_window_func: hfi_copy_to_user "
 			"failed, rc = 0x%x\n", rc);
-		goto hfidd_open_window_func_err5;
+		goto hfidd_open_window_func_err6;
 	}
 
 	spin_lock(&(win_p->win_lock));
@@ -1031,9 +1068,11 @@ int hfidd_open_window_func(struct hfidd_acs *p_acs, unsigned int is_userspace,
 	kfree(local_p);
 	return rc;
 
-hfidd_open_window_func_err5:
+hfidd_open_window_func_err6:
 	if (is_userspace)
 		hfidd_unmap(local_p->mmio_regs.use.kptr, PAGE_SIZE_64K);
+hfidd_open_window_func_err5:
+	hfi_hcall_to_close_window(p_acs, win_p);
 hfidd_open_window_func_err4:
 	hfi_destroy_window_parm(p_acs, is_userspace, win_p, local_p);
 hfidd_open_window_func_err3:
@@ -1103,6 +1142,14 @@ int hfidd_close_window_internal(struct hfidd_acs *p_acs,
 		goto hfidd_close_window_internal_err0;
 	}
 
+	rc = hfi_hcall_to_close_window(p_acs, win_p);
+	if (rc) {
+		dev_printk(KERN_ERR, p_acs->hfidd_dev,
+			"hfidd_close_window_internal: hfi_hcall_to_close_window "
+			"failed, rc = 0x%x\n", rc);
+		goto hfidd_close_window_internal_err0;
+	}
+
 	hfi_destroy_window_info(p_acs, win_p);
 
 	/* Call hcall to unregister MR in the MMU */
-- 
1.7.3.5


^ permalink raw reply related

* [PATCH v4 26/27] HFI: hfi_ip fifo receive path
From: dykmanj @ 2011-04-25 21:24 UTC (permalink / raw)
  To: netdev
  Cc: Jim Dykman, Piyush Chaudhary, Fu-Chung Chang,  William S. Cadden,
	 Wen C. Chen, Scot Sakolish, Jian Xiao,  Carol L. Soto,
	 Sarah J. Sheppard
In-Reply-To: <1303766647-30156-1-git-send-email-dykmanj@linux.vnet.ibm.com>

From: Jim Dykman <dykmanj@linux.vnet.ibm.com>

Signed-off-by:  Piyush Chaudhary <piyushc@linux.vnet.ibm.com>
Signed-off-by:  Jim Dykman <dykmanj@linux.vnet.ibm.com>
Signed-off-by:  Fu-Chung Chang <fcchang@linux.vnet.ibm.com>
Signed-off-by:  William S. Cadden <wscadden@linux.vnet.ibm.com>
Signed-off-by:  Wen C. Chen <winstonc@linux.vnet.ibm.com>
Signed-off-by:  Scot Sakolish <sakolish@linux.vnet.ibm.com>
Signed-off-by:  Jian Xiao <jian@linux.vnet.ibm.com>
Signed-off-by:  Carol L. Soto <clsoto@linux.vnet.ibm.com>
Signed-off-by:  Sarah J. Sheppard <sjsheppa@linux.vnet.ibm.com>
---
 drivers/net/hfi/ip/hf_proto.h    |    2 +
 drivers/net/hfi/ip/hfi_ip_main.c |  326 +++++++++++++++++++++++++++++++++++++-
 include/linux/hfi/hfi_ip.h       |   26 +++-
 3 files changed, 351 insertions(+), 3 deletions(-)

diff --git a/drivers/net/hfi/ip/hf_proto.h b/drivers/net/hfi/ip/hf_proto.h
index b0232ab..022512a 100644
--- a/drivers/net/hfi/ip/hf_proto.h
+++ b/drivers/net/hfi/ip/hf_proto.h
@@ -34,6 +34,8 @@
 #define _HF_PROTO_H_
 
 int hf_tx_check_avail(struct hf_net *net, u32 xmit_cls);
+void hf_construct_hwhdr(struct hf_if *net_if, struct sk_buff *skb,
+			struct base_hdr *b_hdr);
 extern int hfidd_open_window_func(struct hfidd_acs *p_acs,
 		u32 is_userspace,
 		struct hfi_client_info *user_p,
diff --git a/drivers/net/hfi/ip/hfi_ip_main.c b/drivers/net/hfi/ip/hfi_ip_main.c
index 689f92e..6b2ec3f 100644
--- a/drivers/net/hfi/ip/hfi_ip_main.c
+++ b/drivers/net/hfi/ip/hfi_ip_main.c
@@ -154,6 +154,9 @@ static int hf_alloc_rx_resource(struct hf_net *net)
 
 	memset(net_if->rx_fifo.addr, 0, net_if->rx_fifo.size);
 
+	net_if->rx_fslot_debt = 0;
+	net_if->rx_pkt_valid = 1;
+
 	return 0;
 }
 
@@ -209,8 +212,18 @@ static int hf_send_intr_callback(void *parm, u32 win, u32 ext)
 	return 0;
 }
 
+static int hf_recv_intr_callback(void *parm, u32 win, u32 ext)
+{
+	struct hf_net	*net = (struct hf_net *)parm;
+
+	napi_schedule(&(net->napi));
+
+	return 0;
+}
+
 struct hf_events_cb hf_events[HF_EVENT_NUM] = {
 	{HFIDD_SEND,		(void *)hf_send_intr_callback},
+	{HFIDD_RECV,		(void *)hf_recv_intr_callback},
 };
 
 static int hf_register_ip_events(struct hf_net *net,
@@ -357,14 +370,50 @@ static int hf_set_mac_addr(struct net_device *netdev, void *p)
 	return 0;
 }
 
+static void hf_set_recv_intr(struct hf_if *net_if)
+{
+	int			offset;
+	struct hfi_hdr		*rx_pkt;
+
+	/* enable recv intr and set threshold to next packet */
+	offset = net_if->rx_fifo.head;
+
+	hf_mmio_regs_write_then_read(net_if, HFI_RFIFO_INTR_REG,
+		(HF_ENA_RECV_INTR + (offset << HF_RECV_INTR_MATCH_SHIFT)));
+
+	/* check if there is packet received in the mean time */
+	rx_pkt = net_if->rx_fifo.addr + (offset << HFI_CACHE_LINE_SHIFT);
+
+	if ((rx_pkt->id.job_id == HF_IP_JOBID) &&
+		(rx_pkt->base_hdr.pkt_valid == net_if->rx_pkt_valid)) {
+
+		/* force an immediate recv intr */
+		hf_mmio_regs_write(net_if, HFI_RFIFO_INTR_REG,
+		(HF_IMM_RECV_INTR + (offset << HF_RECV_INTR_MATCH_SHIFT)));
+	}
+}
+
 static void hf_init_hw_regs(struct hf_if *net_if)
 {
 	/* setup IP with payload threshold in cache line size */
 	hf_mmio_regs_write(net_if, HFI_IP_RECV_SIZE,
 		(HF_PAYLOAD_RX_THRESHOLD << HF_PAYLOAD_RX_THRESH_SHIFT));
 
+	/* setup recv fifo out of order intr control to disable */
+	hf_mmio_regs_write(net_if, HFI_RFIFO_OUT_EVENT_REG,
+			HF_RFIFO_OUT_CNTL_REARM);
+
+	/* setup recv fifo out of order threshold */
+	hf_mmio_regs_write(net_if, HFI_RFIFO_OUT_TH_REG, HF_RFIFO_OUT_THRESH);
+
 	/* initialize SEND INTR STATUS */
 	hf_mmio_regs_write(net_if, HFI_SINTR_STATUS_REG, 0);
+
+	hf_mmio_regs_write(net_if, HFI_RFIFO_INJ_TH_REG,
+			(HF_RFIFO_CACHE_INJ_TH << HF_RFIFO_CACHE_INJ_TH_SHIFT));
+
+	/* enable and set receive intr */
+	hf_set_recv_intr(net_if);
 }
 
 static int hf_net_delayed_open(void *parm, u16 win, u16 ext)
@@ -402,6 +451,7 @@ static int hf_net_delayed_open(void *parm, u16 win, u16 ext)
 	net_if->state = HF_NET_OPEN;
 	spin_unlock(&(net_if->lock));
 
+	napi_enable(&net->napi);
 	netif_carrier_on(netdev);
 	netif_start_queue(netdev);
 
@@ -488,6 +538,7 @@ static int hf_net_close(struct net_device *netdev)
 
 	spin_lock(&(net_if->lock));
 	if (net_if->state == HF_NET_OPEN) {
+		napi_disable(&net->napi);
 		netif_stop_queue(netdev);
 		netif_carrier_off(netdev);
 
@@ -507,6 +558,245 @@ static int hf_net_close(struct net_device *netdev)
 	return 0;
 }
 
+/* Invalidate the jobid field of each cache line before advancing head.
+ * The first cache line is protected by the valid bit, so we skip it. */
+static inline void hf_advance_rx_head(struct hf_if *net_if, u32 len)
+{
+	int		i, h;
+	u32		*cache_p;
+
+	h = (net_if->rx_fifo.head + 1) & (net_if->rx_fifo.emax);
+
+	for (i = 1; i < len; i++) {
+		cache_p = (u32 *)((char *)(net_if->rx_fifo.addr) +
+				(h << HFI_CACHE_LINE_SHIFT));
+		if (*cache_p == HF_IP_JOBID)
+			*cache_p = 0;
+		h = (h + 1) & (net_if->rx_fifo.emax);
+	}
+
+	if (net_if->rx_fifo.head > h)
+		net_if->rx_pkt_valid ^= 0x1;
+
+	net_if->rx_fifo.head = h;
+}
+
+void hf_construct_hwhdr(struct hf_if *net_if,
+			struct sk_buff *skb,
+			struct base_hdr *b_hdr)
+{
+	struct ethhdr		*hwhdr_p;
+
+	hwhdr_p = (struct ethhdr *)(skb->data);
+
+	/* MAC byte 1, bits6 = 1, locally admin MAC */
+	hwhdr_p->h_dest[0] = 0x2;
+	/* MAC byte 2, bits2-7 = cluster id */
+	hwhdr_p->h_dest[1] = 0x0;
+	*(u16 *)(&(hwhdr_p->h_dest[2])) = (u16)(b_hdr->dst_isr);
+	*(u16 *)(&(hwhdr_p->h_dest[4])) =
+			(u16)hf_get_mac(b_hdr->dst_win);
+
+	hwhdr_p->h_source[0] = 0x2;
+	hwhdr_p->h_source[1] = 0x0;
+	*(u16 *)(&(hwhdr_p->h_source[2])) = (u16)(b_hdr->src_isr);
+	*(u16 *)(&(hwhdr_p->h_source[4])) =
+				(u16)hf_get_mac(b_hdr->src_win);
+
+	hwhdr_p->h_proto = skb->protocol;
+}
+
+static inline int hf_check_hdr_version(struct hf_net *net,
+				struct hf_if_proto_hdr *hf_hdr)
+{
+	if (hf_hdr->version != HF_PROTO_HDR_VERSION) {
+		netdev_err(net->netdev,
+			"hf_check_hdr_version: hdr version 0x%x "
+			"does not match 0x%x\n",
+			hf_hdr->version, HF_PROTO_HDR_VERSION);
+		net->netdev->stats.rx_dropped++;
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static void hf_recv_ip_with_payload(struct hf_net *net,
+				    struct hfi_ip_with_payload_pkt *pkt,
+				    u32 pkt_len)
+{
+	u32			len, resid;
+	struct hf_if		*net_if = &(net->hfif);
+	struct net_device	*netdev = net->netdev;
+	struct hf_if_proto_hdr	*hf_hdr;
+	struct sk_buff		*skb;
+	void			*src, *dst;
+	u32			cache_ln_num = 0;
+	u16			proto;
+
+	/* retrieve the protocol header pointer */
+	hf_hdr = (struct hf_if_proto_hdr *)(pkt->payload);
+
+	if (hf_check_hdr_version(net, hf_hdr) != 0)
+		return;
+
+	switch (hf_hdr->msg_type) {
+	case HF_IF_ARP:
+		proto = htons(ETH_P_ARP);
+		break;
+
+	case HF_IF_FIFO:
+		proto = htons(ETH_P_IP);
+		break;
+
+	default:
+		netdev_err(net->netdev,
+			"hf_recv_ip_with_payload: unknown msg_type 0x%x\n",
+			hf_hdr->msg_type);
+		netdev->stats.rx_dropped++;
+		return;
+	}
+
+	len = hf_hdr->msg_len - HF_PROTO_LEN;
+
+	skb = netdev_alloc_skb_ip_align(net->netdev,
+				len + ETH_HLEN + HF_ALIGN_PAD);
+	if (!skb) {
+		netdev_err(net->netdev, "hf_recv_ip_with_payload: "
+				"netdev_alloc_skb_ip_align fail\n");
+		netdev->stats.rx_dropped++;
+		BUG();
+		return;
+	}
+
+	skb_reserve(skb, HF_ALIGN_PAD);
+	skb->protocol = proto;
+
+	skb_put(skb, len + ETH_HLEN);
+
+	/* construct ethhdr from base hdr */
+	hf_construct_hwhdr(net_if, skb, &(pkt->hfi_hdr.base_hdr));
+
+	skb_reset_mac_header(skb);
+
+	skb_pull(skb, ETH_HLEN);
+
+	src = (void *)(hf_hdr + 1);
+	dst = (void *)skb->data;
+
+	/* check if the payload wrapped the rx_fifo */
+	if ((net_if->rx_fifo.head + (pkt_len - 1)) > net_if->rx_fifo.emax) {
+		/* Wrapped */
+		cache_ln_num = net_if->rx_fifo.emax - net_if->rx_fifo.head + 1;
+		resid  = cache_ln_num << HFI_CACHE_LINE_SHIFT;
+		resid -= (HF_IP_HDR_LEN + HF_PROTO_LEN);
+
+		/* For netboot, pkt_len maybe larger than len */
+		if (resid > len)
+			resid = len;
+
+		memcpy(dst, src, resid);
+
+		src = (void *)net_if->rx_fifo.addr;
+		dst = (void *)skb->data + resid;
+		len -= resid;
+	}
+
+	/* copy the rest of payload */
+	if (len > 0)
+		memcpy(dst, src, len);
+
+	skb->ip_summed = CHECKSUM_NONE;
+
+	netdev->stats.rx_packets++;
+	netdev->stats.rx_bytes += skb->len;
+
+	netif_receive_skb(skb);
+}
+
+static void hf_recv_ip_good(struct hf_net *net,
+			    struct hfi_hdr *rx_curr,
+			    u32 pkt_len)
+{
+	switch (rx_curr->type.header_type) {
+
+	case  HFI_IP_WITH_PAYLOAD:
+	case  HFI_IP_MULTICAST_WITH_PAYLOAD:
+		hf_recv_ip_with_payload(net,
+			(struct hfi_ip_with_payload_pkt *)rx_curr, pkt_len);
+		break;
+
+	default:
+		netdev_err(net->netdev, "hf_rx: receive unknown "
+			"headerType = 0x%x, pkt_len = 0x%x\n",
+			rx_curr->type.header_type, pkt_len);
+
+		/* unknown packet, drop it */
+		net->netdev->stats.rx_dropped++;
+		break;
+	}
+}
+
+static int hf_rx(struct hf_net *net, int budget)
+{
+	int		num = 0;
+	struct hf_if	*net_if = &(net->hfif);
+	u32		pkt_len, status;
+	struct hfi_hdr	*rx_curr;
+	u32		job_id, pkt_valid;
+
+	rx_curr = (struct hfi_hdr *) (net_if->rx_fifo.addr +
+			(net_if->rx_fifo.head << HFI_CACHE_LINE_SHIFT));
+
+	while (budget != 0) {
+		job_id = rx_curr->id.job_id;
+		pkt_valid = rx_curr->base_hdr.pkt_valid;
+
+		isync();
+		if ((job_id != HF_IP_JOBID) ||
+		    (pkt_valid != net_if->rx_pkt_valid))
+			break;
+
+		pkt_len = hfi_pktlen_to_cachelines(rx_curr->base_hdr.pkt_len);
+
+		status = rx_curr->base_hdr.status;
+		if (status == HFI_PKT_STATUS_GOOD) {
+			hf_recv_ip_good(net, rx_curr, pkt_len);
+		} else {
+			/* bad packet */
+			netdev_err(net->netdev, "hf_rx: receive bad "
+				"status = 0x%x, pkt_len = 0x%x\n",
+				status, pkt_len);
+
+			net->netdev->stats.rx_dropped++;
+		}
+
+		net->netdev->last_rx = jiffies;
+
+		hf_advance_rx_head(net_if, pkt_len);
+
+		/* Make sure the jobid is invalidated before posting to hw */
+		wmb();
+
+		net_if->rx_fslot_debt += pkt_len;
+		if (net_if->rx_fslot_debt >= HF_INC_FSLOT_WATERMARK) {
+			hf_mmio_regs_write(net_if, HFI_RFIFO_INC_FSLOT_REG,
+					net_if->rx_fslot_debt);
+			net_if->rx_fslot_debt = 0;
+		}
+
+		budget--;
+		num++;
+		rx_curr = net_if->rx_fifo.addr +
+			(net_if->rx_fifo.head << HFI_CACHE_LINE_SHIFT);
+
+	}
+
+	netdev_dbg(net->netdev, "hf_rx: exit, head = 0x%x, recv 0x%x pkts\n",
+			net_if->rx_fifo.head, num);
+
+	return num;
+}
+
 static void hf_tx_recycle(struct hf_if *net_if)
 {
 	u32		head, head_idx, slots_per_blk;
@@ -906,6 +1196,30 @@ static void hf_if_setup(struct net_device *netdev)
 	memcpy(netdev->broadcast, hfi_bcast_addr, ETH_ALEN);
 }
 
+static int hf_poll(struct napi_struct *napi, int budget)
+{
+	int			work_done;
+	struct net_device	*netdev;
+	struct hf_net		*net;
+	struct hf_if		*net_if;
+
+	net	= container_of(napi, struct hf_net, napi);
+	net_if	= &(net->hfif);
+	netdev	= net->netdev;
+
+	work_done = hf_rx(net, budget);
+
+	/* Always assume we have received all available packets */
+	/*  and set recv intr for next packet */
+	if (work_done < budget) {
+		napi_complete(napi);
+		isync();
+		hf_set_recv_intr(net_if);
+	}
+
+	return work_done;
+}
+
 static struct hf_net *hf_init_netdev(int idx, int ai)
 {
 	struct net_device	*netdev;
@@ -924,6 +1238,7 @@ static struct hf_net *hf_init_netdev(int idx, int ai)
 	}
 
 	net = netdev_priv(netdev);
+	netif_napi_add(netdev, &(net->napi), hf_poll, HF_NAPI_WEIGHT);
 	net->netdev = netdev;
 
 	memset(&(net->hfif), 0, sizeof(struct hf_if));
@@ -939,11 +1254,16 @@ static struct hf_net *hf_init_netdev(int idx, int ai)
 		netdev_err(netdev, "hf_init_netdev: "
 				"failed to register netdev=hfi%d:hf%d, "
 				"rc = 0x%x\n", ai, idx, rc);
-		free_netdev(netdev);
-		return ERR_PTR(-ENODEV);
+		goto err_out1;
 	}
 
 	return net;
+
+err_out1:
+	netif_napi_del(&(net->napi));
+	free_netdev(netdev);
+
+	return ERR_PTR(-ENODEV);
 }
 
 static void hf_del_netdev(struct hf_net *net)
@@ -952,6 +1272,8 @@ static void hf_del_netdev(struct hf_net *net)
 
 	unregister_netdev(netdev);
 
+	netif_napi_del(&(net->napi));
+
 	free_netdev(netdev);
 }
 
diff --git a/include/linux/hfi/hfi_ip.h b/include/linux/hfi/hfi_ip.h
index 4e70c14..ec87300 100644
--- a/include/linux/hfi/hfi_ip.h
+++ b/include/linux/hfi/hfi_ip.h
@@ -38,6 +38,7 @@
 #include <linux/init.h>
 #include <linux/netdevice.h>
 #include <linux/inetdevice.h>
+#include <linux/etherdevice.h>
 #include <net/arp.h>
 
 #include <linux/hfi/hfidd_internal.h>
@@ -56,6 +57,12 @@
 #define HF_NAPI_WEIGHT			256
 #define HF_MAX_NAME_LEN			64
 
+/* rfifo intr */
+#define HF_RFIFO_OUT_CNTL_REARM		0	/* 0 to disable interrupt */
+#define HF_IMM_RECV_INTR		0xf0000000	/* bit 32-35 on */
+#define HF_ENA_RECV_INTR		0xc0000000	/* bit 32-33 on */
+#define HF_RECV_INTR_MATCH_SHIFT	7	/* bit 37-56 */
+
 /* sfifo intr: bit 39-55 is threshold */
 /*             bit 34 enable, bit 35 unmask */
 #define HF_SFIFO_INTR_ENABLE		(0x3 << (63 - 35))
@@ -74,11 +81,17 @@
 #define HF_FV_BIT_MAX			31
 #define HF_SEND_ONE			1
 
+#define HF_RFIFO_CACHE_INJ_TH		7ULL
+#define HF_RFIFO_CACHE_INJ_TH_SHIFT	61
+#define HF_RFIFO_OUT_THRESH		0
+
 #define HF_PAYLOAD_MAX			(2048 - HF_IP_HDR_LEN - HF_PROTO_LEN)
 #define HF_NET_MTU			HF_PAYLOAD_MAX
 #define HF_PAYLOAD_RX_THRESHOLD		0x10ULL
 #define HF_PAYLOAD_RX_THRESH_SHIFT	59
 
+#define HF_INC_FSLOT_WATERMARK		(HF_RFIFO_SLOTS >> 3)
+
 struct hfi_ip_extended_hdr {            /* 16B */
 	unsigned int	immediate_len:7;/* In bytes */
 	unsigned int	num_desc:3;     /* number of descriptors */
@@ -99,7 +112,9 @@ struct hfi_ip_with_payload_pkt {
 
 #define HF_IP_HDR_LEN			((sizeof(struct hfi_hdr) + \
 				sizeof(struct hfi_ip_extended_hdr)))
+
 #define HF_ALIGN_PAD			2
+
 #define HF_PROTO_HDR_VERSION		0x1
 /* HFI protocol message type */
 #define	HF_IF_ARP			0xA0
@@ -146,7 +161,10 @@ struct hf_if {
 	u32			sfifo_fv_polarity;
 	u32			sfifo_slots_per_blk;
 	u32			sfifo_packets;
+	u32			rx_pkt_valid;		/* Polarity of recv
+							   packet valid bit */
 	u32			msg_id;
+	u32			rx_fslot_debt;
 	void __iomem		*doorbell;		/* mapped mmio_regs */
 	struct hf_fifo		tx_fifo;
 	struct hf_fifo		rx_fifo;
@@ -159,6 +177,7 @@ struct hf_if {
 /* Private structure for HF inetrface */
 struct hf_net {
 	struct net_device	*netdev;
+	struct napi_struct	napi;
 	struct hf_if		hfif;
 };
 
@@ -172,7 +191,7 @@ struct hf_global_info {
 
 extern struct hf_global_info	hf_ginfo;
 
-#define HF_EVENT_NUM		1
+#define HF_EVENT_NUM		2
 
 struct hf_events_cb {
 	enum hfi_event_type	type;
@@ -182,6 +201,11 @@ struct hf_events_cb {
 #define HF_MAC_HFI_SHIFT	12
 #define HF_HDR_HFI_SHIFT	8
 
+static inline u32 hf_get_mac(u32 w)
+{
+	return ((w >> HF_HDR_HFI_SHIFT) << HF_MAC_HFI_SHIFT) | (w & 0xFF);
+}
+
 static inline u32 hf_get_win(u16 id)
 {
 	return ((id >> HF_MAC_HFI_SHIFT) << HF_HDR_HFI_SHIFT) | (id & 0xFF);
-- 
1.7.3.5


^ permalink raw reply related

* [PATCH v4 27/27] HFI: hfi_ip ethtool support
From: dykmanj @ 2011-04-25 21:24 UTC (permalink / raw)
  To: netdev
  Cc: Jim Dykman, Piyush Chaudhary, Fu-Chung Chang,  William S. Cadden,
	 Wen C. Chen, Scot Sakolish, Jian Xiao,  Carol L. Soto,
	 Sarah J. Sheppard
In-Reply-To: <1303766647-30156-1-git-send-email-dykmanj@linux.vnet.ibm.com>

From: Jim Dykman <dykmanj@linux.vnet.ibm.com>

Signed-off-by:  Piyush Chaudhary <piyushc@linux.vnet.ibm.com>
Signed-off-by:  Jim Dykman <dykmanj@linux.vnet.ibm.com>
Signed-off-by:  Fu-Chung Chang <fcchang@linux.vnet.ibm.com>
Signed-off-by:  William S. Cadden <wscadden@linux.vnet.ibm.com>
Signed-off-by:  Wen C. Chen <winstonc@linux.vnet.ibm.com>
Signed-off-by:  Scot Sakolish <sakolish@linux.vnet.ibm.com>
Signed-off-by:  Jian Xiao <jian@linux.vnet.ibm.com>
Signed-off-by:  Carol L. Soto <clsoto@linux.vnet.ibm.com>
Signed-off-by:  Sarah J. Sheppard <sjsheppa@linux.vnet.ibm.com>
---
 drivers/net/hfi/ip/Makefile      |    2 +-
 drivers/net/hfi/ip/hf_ethtool.c  |  136 ++++++++++++++++++++++++++++++++++++++
 drivers/net/hfi/ip/hf_proto.h    |    1 +
 drivers/net/hfi/ip/hfi_ip_main.c |   36 +++++++++-
 include/linux/hfi/hfi_ip.h       |   32 +++++++++-
 5 files changed, 201 insertions(+), 6 deletions(-)
 create mode 100644 drivers/net/hfi/ip/hf_ethtool.c

diff --git a/drivers/net/hfi/ip/Makefile b/drivers/net/hfi/ip/Makefile
index 90c7dea..28a4a51 100644
--- a/drivers/net/hfi/ip/Makefile
+++ b/drivers/net/hfi/ip/Makefile
@@ -3,4 +3,4 @@
 #
 obj-$(CONFIG_HFI_IP) += hfi_ip.o
 
-hfi_ip-objs :=	hfi_ip_main.o
+hfi_ip-objs :=	hfi_ip_main.o hf_ethtool.o
diff --git a/drivers/net/hfi/ip/hf_ethtool.c b/drivers/net/hfi/ip/hf_ethtool.c
new file mode 100644
index 0000000..204a1bf
--- /dev/null
+++ b/drivers/net/hfi/ip/hf_ethtool.c
@@ -0,0 +1,136 @@
+/*
+ * hf_ethtool.c
+ *
+ * HF IP driver for IBM System p
+ *
+ *  Authors:
+ *	Fu-Chung Chang <fcchang@linux.vnet.ibm.com>
+ *	William S. Cadden <wscadden@linux.vnet.ibm.com>
+ *	Wen C. Chen <winstonc@linux.vnet.ibm.com>
+ *	Scot Sakolish <sakolish@linux.vnet.ibm.com>
+ *	Jian Xiao <jian@linux.vnet.ibm.com>
+ *	Carol L. Soto <clsoto@linux.vnet.ibm.com>
+ *	Sarah J. Sheppard <sjsheppa@linux.vnet.ibm.com>
+ *
+ *  (C) Copyright IBM Corp. 2010
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include <linux/ethtool.h>
+
+#include <linux/hfi/hfi_ip.h>
+
+static char hf_ethtool_stats_keys[][ETH_GSTRING_LEN] = {
+	{"sfifo_packets"},
+	{"rdma_packets"},
+	{"tx_timeout"},
+	{"tx_queue_stop"},
+	{"tx_drop"},
+	{"tx_err_headlen"},
+	{"rx_version_mismatch"},
+	{"rx_err_restore"},
+	{"rx_err_cookie"},
+	{"rx_err_skb"},
+	{"rx_err_hdr_type"},
+	{"rx_err_msg_type"},
+	{"rx_err_status"},
+	{"rx_err_bcast_csum"},
+	{"rx_fslot_debt"},
+	{"mmio_rx_inc_avail"},
+	{"mmio_rx_post_desc"},
+	{"payload_sent"},
+	{"desc_sent"},
+	{"large_bcast_sent"},
+	{"super_sent"},
+	{"payload_recv"},
+	{"desc_recv"},
+	{"rdma_write"},
+	{"rdma_write_fail"},
+	{"rdma_cancel"},
+	{"rdma_cancel_fail"},
+	{"rdma_cancel_already"},
+	{"rdma_rndz_request_sent"},
+	{"rdma_rndz_request_fail"},
+	{"rdma_rndz_reply_recv"},
+	{"rdma_rndz_reply_fail"},
+	{"rdma_rndz_request_recv"},
+	{"rdma_rndz_reply_sent"},
+	{"bad_rdma_notification"},
+	{"bad_rdma_first_notification"},
+	{"rdma_src_completion"},
+	{"rdma_sink_completion"},
+	{"rdma_send_timeout"},
+	{"rdma_recv_timeout"},
+	{"sfifo_send_intr_armed"},
+	{"rdma_send_intr_armed"},
+	{"recv_intr_armed"},
+	{"recv_intr_offset"},
+	{"recv_imm_intr_armed"},
+	{"recv_imm_intr_offset"},
+	{"send_intr_fired"},
+	{"recv_intr_fired"},
+	{"in_poll"},
+	{"max_poll_recv"},
+};
+
+static void hf_get_drvinfo(struct net_device *netdev,
+		struct ethtool_drvinfo *info)
+{
+	strlcpy(info->driver, HF_DRV_NAME, sizeof(info->driver));
+	strlcpy(info->version, HF_DRV_VERSION, sizeof(info->version));
+}
+
+static void hf_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
+{
+	switch (stringset) {
+	case ETH_SS_STATS:
+		memcpy(data, &hf_ethtool_stats_keys,
+				sizeof(hf_ethtool_stats_keys));
+		break;
+	}
+}
+
+static int hf_get_sset_count(struct net_device *netdev, int sset)
+{
+	switch (sset) {
+	case ETH_SS_STATS:
+		return ARRAY_SIZE(hf_ethtool_stats_keys);
+	default:
+		return -EINVAL;
+	}
+}
+
+static void hf_get_ethtool_stats(struct net_device *netdev,
+		struct ethtool_stats *stats, u64 *data)
+{
+	struct hf_net	*net = netdev_priv(netdev);
+	struct hf_if	*net_if = &(net->hfif);
+
+	memcpy(data, &(net_if->eth_stats), sizeof(struct hf_ethtool_stats));
+}
+
+static const struct ethtool_ops hf_ethtool_ops = {
+	.get_drvinfo		= hf_get_drvinfo,
+	.get_strings		= hf_get_strings,
+	.get_sset_count		= hf_get_sset_count,
+	.get_ethtool_stats	= hf_get_ethtool_stats,
+};
+
+void hf_set_ethtool_ops(struct net_device *netdev)
+{
+	SET_ETHTOOL_OPS(netdev, &hf_ethtool_ops);
+}
diff --git a/drivers/net/hfi/ip/hf_proto.h b/drivers/net/hfi/ip/hf_proto.h
index 022512a..3b2b23b 100644
--- a/drivers/net/hfi/ip/hf_proto.h
+++ b/drivers/net/hfi/ip/hf_proto.h
@@ -36,6 +36,7 @@
 int hf_tx_check_avail(struct hf_net *net, u32 xmit_cls);
 void hf_construct_hwhdr(struct hf_if *net_if, struct sk_buff *skb,
 			struct base_hdr *b_hdr);
+void hf_set_ethtool_ops(struct net_device *netdev);
 extern int hfidd_open_window_func(struct hfidd_acs *p_acs,
 		u32 is_userspace,
 		struct hfi_client_info *user_p,
diff --git a/drivers/net/hfi/ip/hfi_ip_main.c b/drivers/net/hfi/ip/hfi_ip_main.c
index 6b2ec3f..4b897d3 100644
--- a/drivers/net/hfi/ip/hfi_ip_main.c
+++ b/drivers/net/hfi/ip/hfi_ip_main.c
@@ -208,6 +208,7 @@ static int hf_send_intr_callback(void *parm, u32 win, u32 ext)
 	mb();
 
 	netif_wake_queue(net->netdev);
+	net->hfif.eth_stats.send_intr_fired++;
 
 	return 0;
 }
@@ -218,6 +219,7 @@ static int hf_recv_intr_callback(void *parm, u32 win, u32 ext)
 
 	napi_schedule(&(net->napi));
 
+	net->hfif.eth_stats.recv_intr_fired++;
 	return 0;
 }
 
@@ -381,6 +383,9 @@ static void hf_set_recv_intr(struct hf_if *net_if)
 	hf_mmio_regs_write_then_read(net_if, HFI_RFIFO_INTR_REG,
 		(HF_ENA_RECV_INTR + (offset << HF_RECV_INTR_MATCH_SHIFT)));
 
+	net_if->eth_stats.recv_intr_offset = offset;
+	net_if->eth_stats.recv_intr_armed++;
+
 	/* check if there is packet received in the mean time */
 	rx_pkt = net_if->rx_fifo.addr + (offset << HFI_CACHE_LINE_SHIFT);
 
@@ -390,6 +395,9 @@ static void hf_set_recv_intr(struct hf_if *net_if)
 		/* force an immediate recv intr */
 		hf_mmio_regs_write(net_if, HFI_RFIFO_INTR_REG,
 		(HF_IMM_RECV_INTR + (offset << HF_RECV_INTR_MATCH_SHIFT)));
+
+		net_if->eth_stats.recv_imm_intr_offset = offset;
+		net_if->eth_stats.recv_imm_intr_armed++;
 	}
 }
 
@@ -507,7 +515,7 @@ static int hf_net_open(struct net_device *netdev)
 	struct hfidd_acs	*p_acs = HF_ACS(net_if);
 
 	memset(&(netdev->stats), 0, sizeof(struct net_device_stats));
-	net_if->sfifo_packets = 0;
+	memset(&(net_if->eth_stats), 0, sizeof(struct hf_ethtool_stats));
 
 	spin_lock(&(net_if->lock));
 	net_if->state = HF_NET_HALF_OPEN;
@@ -614,6 +622,7 @@ static inline int hf_check_hdr_version(struct hf_net *net,
 			"hf_check_hdr_version: hdr version 0x%x "
 			"does not match 0x%x\n",
 			hf_hdr->version, HF_PROTO_HDR_VERSION);
+		net->hfif.eth_stats.rx_version_mismatch++;
 		net->netdev->stats.rx_dropped++;
 		return -EINVAL;
 	}
@@ -710,6 +719,7 @@ static void hf_recv_ip_with_payload(struct hf_net *net,
 	netdev->stats.rx_packets++;
 	netdev->stats.rx_bytes += skb->len;
 
+	net_if->eth_stats.payload_recv++;
 	netif_receive_skb(skb);
 }
 
@@ -717,6 +727,8 @@ static void hf_recv_ip_good(struct hf_net *net,
 			    struct hfi_hdr *rx_curr,
 			    u32 pkt_len)
 {
+	struct hf_if	*net_if = &(net->hfif);
+
 	switch (rx_curr->type.header_type) {
 
 	case  HFI_IP_WITH_PAYLOAD:
@@ -731,6 +743,7 @@ static void hf_recv_ip_good(struct hf_net *net,
 			rx_curr->type.header_type, pkt_len);
 
 		/* unknown packet, drop it */
+		net_if->eth_stats.rx_err_hdr_type++;
 		net->netdev->stats.rx_dropped++;
 		break;
 	}
@@ -767,6 +780,7 @@ static int hf_rx(struct hf_net *net, int budget)
 				"status = 0x%x, pkt_len = 0x%x\n",
 				status, pkt_len);
 
+			net_if->eth_stats.rx_err_status++;
 			net->netdev->stats.rx_dropped++;
 		}
 
@@ -782,6 +796,7 @@ static int hf_rx(struct hf_net *net, int budget)
 			hf_mmio_regs_write(net_if, HFI_RFIFO_INC_FSLOT_REG,
 					net_if->rx_fslot_debt);
 			net_if->rx_fslot_debt = 0;
+			net_if->eth_stats.mmio_rx_inc_avail++;
 		}
 
 		budget--;
@@ -791,6 +806,7 @@ static int hf_rx(struct hf_net *net, int budget)
 
 	}
 
+	net_if->eth_stats.rx_fslot_debt = net_if->rx_fslot_debt;
 	netdev_dbg(net->netdev, "hf_rx: exit, head = 0x%x, recv 0x%x pkts\n",
 			net_if->rx_fifo.head, num);
 
@@ -860,9 +876,10 @@ int hf_tx_check_avail(struct hf_net *net, u32 xmit_cls)
 			u64		intr_thresh;
 
 			netif_stop_queue(netdev);
+			net_if->eth_stats.tx_queue_stop++;
 
 			/* turn on transmit interrupt */
-			intr_thresh = (net_if->sfifo_packets -
+			intr_thresh = (net_if->eth_stats.sfifo_packets -
 			HF_SFIFO_INTR_WATERMARK) & HF_SFIFO_INTR_MASK;
 
 			intr_cntl = HF_SFIFO_INTR_ENABLE |
@@ -871,6 +888,7 @@ int hf_tx_check_avail(struct hf_net *net, u32 xmit_cls)
 			hf_mmio_regs_write_then_read(net_if,
 					HFI_SFIFO_INTR_CNTL, intr_cntl);
 
+			net_if->eth_stats.sfifo_send_intr_armed++;
 			return -EBUSY;
 		}
 	}
@@ -957,6 +975,7 @@ static char *hf_build_payload_hdr(struct hf_net *net,
 			" not supported\n", hwhdr_p->h_proto);
 
 		dev_kfree_skb_any(skb);
+		net_if->eth_stats.tx_drop++;
 		return NULL;
 	}
 
@@ -1051,7 +1070,8 @@ static int hf_payload_tx(struct sk_buff *skb, struct hf_net *net, u32 is_bcast)
 		(net_if->tx_fifo.tail + xmit_cls) & (net_if->tx_fifo.emax);
 	atomic_sub(xmit_cls, &(net_if->tx_fifo.avail));
 
-	net_if->sfifo_packets++;
+	net_if->eth_stats.sfifo_packets++;
+	net_if->eth_stats.payload_sent++;
 	net->netdev->stats.tx_packets++;
 	net->netdev->stats.tx_bytes += msg_len;
 
@@ -1079,6 +1099,7 @@ static int hf_start_xmit(struct sk_buff *skb, struct net_device *netdev)
 		netdev_err(netdev, "hf_start_xmit: invalid skb->len 0x%x\n",
 						skb->len);
 		dev_kfree_skb_any(skb);
+		net_if->eth_stats.tx_drop++;
 		return NETDEV_TX_OK;
 	}
 
@@ -1119,8 +1140,12 @@ static int hf_start_xmit(struct sk_buff *skb, struct net_device *netdev)
 
 static void hf_tx_timeout(struct net_device *netdev)
 {
+	struct hf_net	*net = netdev_priv(netdev);
+	struct hf_if	*net_if = &(net->hfif);
+
 	netdev_warn(netdev, "hf_tx_timeout: queue_stopped is %d\n",
 			netif_queue_stopped(netdev));
+	net_if->eth_stats.tx_timeout++;
 }
 
 static int hf_change_mtu(struct net_device *netdev, int new_mtu)
@@ -1207,6 +1232,7 @@ static int hf_poll(struct napi_struct *napi, int budget)
 	net_if	= &(net->hfif);
 	netdev	= net->netdev;
 
+	net_if->eth_stats.in_poll++;
 	work_done = hf_rx(net, budget);
 
 	/* Always assume we have received all available packets */
@@ -1215,7 +1241,8 @@ static int hf_poll(struct napi_struct *napi, int budget)
 		napi_complete(napi);
 		isync();
 		hf_set_recv_intr(net_if);
-	}
+	} else
+		net_if->eth_stats.max_poll_recv++;
 
 	return work_done;
 }
@@ -1248,6 +1275,7 @@ static struct hf_net *hf_init_netdev(int idx, int ai)
 	net->hfif.state = HF_NET_CLOSE;
 
 	spin_lock_init(&net->hfif.lock);
+	hf_set_ethtool_ops(netdev);
 
 	rc = register_netdev(netdev);
 	if (rc) {
diff --git a/include/linux/hfi/hfi_ip.h b/include/linux/hfi/hfi_ip.h
index ec87300..d4317ee 100644
--- a/include/linux/hfi/hfi_ip.h
+++ b/include/linux/hfi/hfi_ip.h
@@ -42,6 +42,7 @@
 #include <net/arp.h>
 
 #include <linux/hfi/hfidd_internal.h>
+#include <linux/hfi/hfidd_adpt.h>
 #include <linux/hfi/hfidd_client.h>
 #include <linux/hfi/hfidd_requests.h>
 #include <linux/hfi/hfidd_regs.h>
@@ -150,6 +151,35 @@ struct hf_fifo {
 #define	HF_NET_HALF_OPEN	0xA0
 #define	HF_NET_OPEN		0xA1
 
+struct hf_ethtool_stats {
+	u64		sfifo_packets;	/* total packets send through sfifo */
+	u64		tx_timeout;
+	u64		tx_queue_stop;
+	u64		tx_drop;
+	u64		tx_err_headlen;
+	u64		rx_version_mismatch;
+	u64		rx_err_skb;
+	u64		rx_err_hdr_type;
+	u64		rx_err_msg_type;
+	u64		rx_err_status;
+	u64		rx_err_bcast_csum;
+	u64		rx_fslot_debt;
+	u64		mmio_rx_inc_avail;
+	u64		payload_sent;	/* packets from IP send with payload
+					   mode */
+	u64		payload_recv;	/* packets delivered to IP with payload
+					   mode */
+	u64		sfifo_send_intr_armed;
+	u64		recv_intr_armed;
+	u64		recv_intr_offset;
+	u64		recv_imm_intr_armed;
+	u64		recv_imm_intr_offset;
+	u64		send_intr_fired;
+	u64		recv_intr_fired;
+	u64		in_poll;
+	u64		max_poll_recv;
+};
+
 struct hf_if {
 	u32			idx;			/* 0, 1, 2, 3 ...   */
 	u32			ai;			/* 0=hfi0, 1=hfi1   */
@@ -160,7 +190,6 @@ struct hf_if {
 	spinlock_t		lock;			/* lock for state */
 	u32			sfifo_fv_polarity;
 	u32			sfifo_slots_per_blk;
-	u32			sfifo_packets;
 	u32			rx_pkt_valid;		/* Polarity of recv
 							   packet valid bit */
 	u32			msg_id;
@@ -172,6 +201,7 @@ struct hf_if {
 	struct sk_buff		**tx_skb;		/* array to store tx
 							   2k skb */
 	void			*sfifo_finishvec;
+	struct hf_ethtool_stats eth_stats;
 };
 
 /* Private structure for HF inetrface */
-- 
1.7.3.5


^ permalink raw reply related

* [PATCH v4 25/27] HFI: hfi_ip fifo transmit paths
From: dykmanj @ 2011-04-25 21:24 UTC (permalink / raw)
  To: netdev
  Cc: Jim Dykman, Piyush Chaudhary, Fu-Chung Chang,  William S. Cadden,
	 Wen C. Chen, Scot Sakolish, Jian Xiao,  Carol L. Soto,
	 Sarah J. Sheppard
In-Reply-To: <1303766647-30156-1-git-send-email-dykmanj@linux.vnet.ibm.com>

From: Jim Dykman <dykmanj@linux.vnet.ibm.com>

Signed-off-by:  Piyush Chaudhary <piyushc@linux.vnet.ibm.com>
Signed-off-by:  Jim Dykman <dykmanj@linux.vnet.ibm.com>
Signed-off-by:  Fu-Chung Chang <fcchang@linux.vnet.ibm.com>
Signed-off-by:  William S. Cadden <wscadden@linux.vnet.ibm.com>
Signed-off-by:  Wen C. Chen <winstonc@linux.vnet.ibm.com>
Signed-off-by:  Scot Sakolish <sakolish@linux.vnet.ibm.com>
Signed-off-by:  Jian Xiao <jian@linux.vnet.ibm.com>
Signed-off-by:  Carol L. Soto <clsoto@linux.vnet.ibm.com>
Signed-off-by:  Sarah J. Sheppard <sjsheppa@linux.vnet.ibm.com>
---
 drivers/net/hfi/ip/hf_proto.h    |    1 +
 drivers/net/hfi/ip/hfi_ip_main.c |  438 ++++++++++++++++++++++++++++++++++++++
 include/linux/hfi/hfi_ip.h       |   72 ++++++-
 3 files changed, 510 insertions(+), 1 deletions(-)

diff --git a/drivers/net/hfi/ip/hf_proto.h b/drivers/net/hfi/ip/hf_proto.h
index b4133b7..b0232ab 100644
--- a/drivers/net/hfi/ip/hf_proto.h
+++ b/drivers/net/hfi/ip/hf_proto.h
@@ -33,6 +33,7 @@
 #ifndef _HF_PROTO_H_
 #define _HF_PROTO_H_
 
+int hf_tx_check_avail(struct hf_net *net, u32 xmit_cls);
 extern int hfidd_open_window_func(struct hfidd_acs *p_acs,
 		u32 is_userspace,
 		struct hfi_client_info *user_p,
diff --git a/drivers/net/hfi/ip/hfi_ip_main.c b/drivers/net/hfi/ip/hfi_ip_main.c
index 0c1ebd7..689f92e 100644
--- a/drivers/net/hfi/ip/hfi_ip_main.c
+++ b/drivers/net/hfi/ip/hfi_ip_main.c
@@ -185,6 +185,87 @@ alloc_resource_err0:
 	return rc;
 }
 
+static int hf_send_intr_callback(void *parm, u32 win, u32 ext)
+{
+	struct hf_net	*net = (struct hf_net *)parm;
+	struct hf_if	*net_if = &(net->hfif);
+	u64		sintr_status;
+
+	sintr_status = hf_mmio_regs_read(net_if, HFI_SINTR_STATUS_REG);
+
+	netdev_info(net->netdev, "hf_send_intr_callback: "
+		"sintr_status 0x%016llx", sintr_status);
+
+	/* mask off the interrupt */
+	if (sintr_status & HF_SFIFO_INTR_EVENT)
+		hf_mmio_regs_write(net_if, HFI_SFIFO_INTR_CNTL, 0);
+
+	/* Make sure interrupts are masked */
+	/* Otherwise after the queue is awaken, it will get stale interrupt */
+	mb();
+
+	netif_wake_queue(net->netdev);
+
+	return 0;
+}
+
+struct hf_events_cb hf_events[HF_EVENT_NUM] = {
+	{HFIDD_SEND,		(void *)hf_send_intr_callback},
+};
+
+static int hf_register_ip_events(struct hf_net *net,
+				 struct hfidd_acs *p_acs,
+				 int flag)
+{
+	struct hf_if		*net_if = &(net->hfif);
+	int			rc = 0, i, j;
+	struct hfi_reg_events	events[HF_EVENT_NUM];
+	int			(*reg_func)(struct hfidd_acs *,
+				struct hfi_reg_events *);
+
+	if (flag == HFIDD_REQ_EVENT_REGISTER)
+		reg_func = hfidd_callback_register;
+	else
+		reg_func = hfidd_callback_unregister;
+
+	for (i = 0; i < HF_EVENT_NUM; i++) {
+		events[i].window = net_if->client.window;
+		events[i].type = FUNCTIONS_FOR_EVENTS;
+		events[i].info.func.index = hf_events[i].type;
+		events[i].info.func.function_p.use.kptr = hf_events[i].func;
+		events[i].info.func.parameter.use.kptr = (void *)(net);
+
+		events[i].hdr.req = flag;
+		events[i].hdr.req_len = sizeof(struct hfi_reg_events);
+		events[i].hdr.result.use.kptr = &(events[i]);
+
+		rc = reg_func(p_acs, &(events[i]));
+		if (rc) {
+			netdev_err(net->netdev, "hf_register_ip_events: "
+				"fail event 0x%x, flag=0x%x rc=0x%x\n",
+				hf_events[i].type, flag, rc);
+
+			if (flag == HFIDD_REQ_EVENT_REGISTER)
+				goto err_out;
+		}
+	}
+
+	return rc;
+
+err_out:
+	for (j = 0; j < i; j++) {
+		events[j].hdr.req = HFIDD_REQ_EVENT_UNREGISTER;
+		rc = hfidd_callback_unregister(p_acs, &(events[i]));
+		if (rc) {
+			netdev_err(net->netdev, "hf_register_ip_events: failed "
+				"to unregister callback event 0x%x, rc=0x%x\n",
+				events[i].info.func.index, rc);
+		}
+	}
+
+	return rc;
+}
+
 static int hf_close_ip_window(struct hf_net *net, struct hfidd_acs *p_acs)
 {
 	struct hf_if *net_if = &(net->hfif);
@@ -276,6 +357,16 @@ static int hf_set_mac_addr(struct net_device *netdev, void *p)
 	return 0;
 }
 
+static void hf_init_hw_regs(struct hf_if *net_if)
+{
+	/* setup IP with payload threshold in cache line size */
+	hf_mmio_regs_write(net_if, HFI_IP_RECV_SIZE,
+		(HF_PAYLOAD_RX_THRESHOLD << HF_PAYLOAD_RX_THRESH_SHIFT));
+
+	/* initialize SEND INTR STATUS */
+	hf_mmio_regs_write(net_if, HFI_SINTR_STATUS_REG, 0);
+}
+
 static int hf_net_delayed_open(void *parm, u16 win, u16 ext)
 {
 	struct net_device	*netdev = (struct net_device *)parm;
@@ -300,13 +391,25 @@ static int hf_net_delayed_open(void *parm, u16 win, u16 ext)
 	if (rc)
 		goto delayed_open_err1;
 
+	rc = hf_register_ip_events(net, p_acs, HFIDD_REQ_EVENT_REGISTER);
+	if (rc)
+		goto delayed_open_err2;
+
 	hf_set_mac_addr(netdev, NULL);
 
+	hf_init_hw_regs(net_if);
+
 	net_if->state = HF_NET_OPEN;
 	spin_unlock(&(net_if->lock));
 
+	netif_carrier_on(netdev);
+	netif_start_queue(netdev);
+
 	return 0;
 
+delayed_open_err2:
+	hf_close_ip_window(net, p_acs);
+
 delayed_open_err1:
 	hf_free_resource(net_if);
 
@@ -385,6 +488,11 @@ static int hf_net_close(struct net_device *netdev)
 
 	spin_lock(&(net_if->lock));
 	if (net_if->state == HF_NET_OPEN) {
+		netif_stop_queue(netdev);
+		netif_carrier_off(netdev);
+
+		hf_register_ip_events(net, p_acs, HFIDD_REQ_EVENT_UNREGISTER);
+
 		hf_close_ip_window(net, p_acs);
 
 		hf_free_resource(net_if);
@@ -399,6 +507,332 @@ static int hf_net_close(struct net_device *netdev)
 	return 0;
 }
 
+static void hf_tx_recycle(struct hf_if *net_if)
+{
+	u32		head, head_idx, slots_per_blk;
+	u32		*fv;
+	int		i;
+	u32		fv_bit;
+	u8		nr;
+
+	head = net_if->tx_fifo.head;
+
+	slots_per_blk = net_if->sfifo_slots_per_blk;
+
+	head_idx = head / slots_per_blk;
+
+	fv = (u32 *)(net_if->sfifo_finishvec);
+
+	while (1) {
+		nr = HF_FV_BIT_MAX - head_idx;
+		fv_bit = BIT(nr) & (ACCESS_ONCE(*fv));
+		fv_bit = fv_bit >> nr;
+
+		if ((fv_bit ^ (net_if->sfifo_fv_polarity)) == 0)
+			break;
+
+		for (i = 0; i < slots_per_blk; i++) {
+			struct sk_buff		*skb;
+
+			skb = net_if->tx_skb[head + i];
+			if (skb != NULL) {
+				dev_kfree_skb_any(skb);
+				net_if->tx_skb[head + i] = NULL;
+			}
+		}
+
+		head = (head + slots_per_blk) & (net_if->tx_fifo.emax);
+
+		atomic_add(slots_per_blk, &(net_if->tx_fifo.avail));
+
+		if (++head_idx == HF_FV_BIT_CNT) {
+			head_idx = 0;
+			net_if->sfifo_fv_polarity ^= 1;
+		}
+	}
+
+	net_if->tx_fifo.head = head;
+
+	return;
+}
+
+int hf_tx_check_avail(struct hf_net *net, u32 xmit_cls)
+{
+	struct net_device	*netdev = net->netdev;
+	struct hf_if		*net_if = &(net->hfif);
+
+	if (atomic_read(&net_if->tx_fifo.avail) < xmit_cls) {
+
+		hf_tx_recycle(net_if);
+
+		if (atomic_read(&net_if->tx_fifo.avail) < xmit_cls) {
+			u32		intr_cntl;
+			u64		intr_thresh;
+
+			netif_stop_queue(netdev);
+
+			/* turn on transmit interrupt */
+			intr_thresh = (net_if->sfifo_packets -
+			HF_SFIFO_INTR_WATERMARK) & HF_SFIFO_INTR_MASK;
+
+			intr_cntl = HF_SFIFO_INTR_ENABLE |
+			(intr_thresh << HF_SFIFO_INTR_CNT_SHIFT);
+
+			hf_mmio_regs_write_then_read(net_if,
+					HFI_SFIFO_INTR_CNTL, intr_cntl);
+
+			return -EBUSY;
+		}
+	}
+
+	return 0;
+}
+
+static inline void hf_fill_route(u16 dst_isr, struct base_hdr *base_hdr_p)
+{
+	base_hdr_p->route_control = HFI_HW_DIRECT_ROUTE;
+}
+
+static int hf_copy_skb_to_fifo(struct hf_net *net,
+				struct sk_buff *skb,
+				char *dst,
+				u32 len,
+				u32 offset)
+{
+	struct hf_if *net_if = &(net->hfif);
+	u64		fifo_end;
+	u32		tail_room;
+	int		rc;
+
+	fifo_end = (u64)(net_if->tx_fifo.addr) + net_if->tx_fifo.size;
+
+	tail_room = fifo_end - (u64)dst;
+	if (tail_room >= len) {
+		rc = skb_copy_bits(skb, offset, dst, len);
+		if (rc) {
+			netdev_err(net->netdev,
+				"hf_copy_skb_to_fifo: skb_copy_bits"
+				"fail1 offset=0x%x, len=0x%x, rc=0x%x\n",
+				offset, len, rc);
+			return rc;
+		}
+	} else {
+		rc = skb_copy_bits(skb, offset, dst, tail_room);
+		if (rc) {
+			netdev_err(net->netdev,
+				"hf_copy_skb_to_fifo: skb_copy_bits"
+				"fail2 offset=0x%x, len=0x%x, rc=0x%x\n",
+				offset, tail_room, rc);
+
+			return rc;
+		}
+		rc = skb_copy_bits(skb, offset + tail_room,
+				net_if->tx_fifo.addr, len - tail_room);
+		if (rc) {
+			netdev_err(net->netdev,
+				"hf_copy_skb_to_fifo: skb_copy_bits"
+				"fail3 offset=0x%x, len=0x%x, rc=0x%x\n",
+				offset + tail_room, len - tail_room, rc);
+
+			return rc;
+		}
+	}
+
+	return 0;
+}
+
+/* Build base_hdr and proto_hdr for payload pkt.
+   Return pointer to the end of proto_hdr */
+static char *hf_build_payload_hdr(struct hf_net *net,
+				  struct sk_buff *skb,
+				  u32 msg_len,
+				  u32 xmit_cls,
+				  u32 is_bcast)
+{
+	struct hf_if			*net_if = &(net->hfif);
+	struct hf_if_proto_hdr		*proto_hdr_p;
+	struct hfi_ip_with_payload_pkt	*hdr_p;
+	char				*dst;
+	u8				msg_type, msg_flag;
+	struct ethhdr			*hwhdr_p;
+
+	hwhdr_p = (struct ethhdr *)(skb->data);
+
+	if (hwhdr_p->h_proto == htons(ETH_P_IP))
+		msg_type = HF_IF_FIFO;
+	else if (hwhdr_p->h_proto == htons(ETH_P_ARP))
+		msg_type = HF_IF_ARP;
+	else {
+		netdev_err(net->netdev, "hf_build_payload_hdr: h_proto = 0x%x "
+			" not supported\n", hwhdr_p->h_proto);
+
+		dev_kfree_skb_any(skb);
+		return NULL;
+	}
+
+	dst = net_if->tx_fifo.addr +
+		(net_if->tx_fifo.tail << HFI_CACHE_LINE_SHIFT);
+
+	/* fill in base_hdr + ip_extended_hdr */
+	hdr_p = (struct hfi_ip_with_payload_pkt *)dst;
+
+	/* Do not memset over one cacheline since it might wrap */
+	memset(hdr_p, 0, HF_IP_HDR_LEN);
+
+	hdr_p->hfi_hdr.type.header_type = HFI_IP_WITH_PAYLOAD;
+	hdr_p->hfi_hdr.id.job_id = net_if->client.job_id;
+
+	if (is_bcast) {
+		hdr_p->hfi_hdr.base_hdr.dst_isr = HFIDD_DST_BCST_ISR;
+		hdr_p->hfi_hdr.base_hdr.dst_win = HFIDD_DST_BCST_WIN;
+		hdr_p->hfi_hdr.type.header_type = HFI_IP_MULTICAST_WITH_PAYLOAD;
+
+		msg_flag = HF_IF_BCAST;
+	} else {
+		u16	dst_isr, dst_win;
+
+		hf_get_dst_info(hwhdr_p, &dst_isr, &dst_win);
+		hdr_p->hfi_hdr.base_hdr.dst_isr = dst_isr;
+		hdr_p->hfi_hdr.base_hdr.dst_win = dst_win;
+
+		hf_fill_route(dst_isr, &(hdr_p->hfi_hdr.base_hdr));
+
+		msg_flag = HF_IF_UCAST;
+	}
+
+	netdev_dbg(net->netdev, "hf_build_payload_hdr: dst_isr = 0x%x, "
+			"dst_win = 0x%x, xmit_cls = 0x%x\n",
+			hdr_p->hfi_hdr.base_hdr.dst_isr,
+			hdr_p->hfi_hdr.base_hdr.dst_win, xmit_cls);
+
+	hdr_p->hfi_hdr.base_hdr.pkt_len = hfi_cachelines_to_pktlen(xmit_cls);
+
+	dst += HF_IP_HDR_LEN;
+	proto_hdr_p = (struct hf_if_proto_hdr *)dst;
+
+	proto_hdr_p->version = HF_PROTO_HDR_VERSION;
+	proto_hdr_p->msg_len = msg_len;
+	proto_hdr_p->msg_id = net_if->msg_id;
+	proto_hdr_p->msg_type = msg_type;
+	proto_hdr_p->msg_flag = msg_flag;
+
+	dst += HF_PROTO_LEN;
+
+	return dst;
+}
+
+static int hf_payload_tx(struct sk_buff *skb, struct hf_net *net, u32 is_bcast)
+{
+	struct hf_if		*net_if = &(net->hfif);
+	u32			msg_len, len;
+	u32			xmit_cls;
+	char			*dst;
+	int			rc = 0;
+
+	msg_len = skb->len - ETH_HLEN + HF_PROTO_LEN;
+	xmit_cls = hfi_bytes_to_cacheline(msg_len + HF_IP_HDR_LEN);
+
+	if (is_bcast) {
+		if (xmit_cls <= HF_BCAST_CACHE_LINE_2)
+			xmit_cls = HF_BCAST_CACHE_LINE_2;
+		else
+			xmit_cls = HF_BCAST_CACHE_LINE_16;
+	}
+
+	rc = hf_tx_check_avail(net, xmit_cls);
+	if (rc) {
+		netdev_err(net->netdev, "hf_payload_tx: hf_tx_check_avail find "
+				"no avail slot\n");
+		return rc;
+	}
+
+	dst = hf_build_payload_hdr(net, skb, msg_len, xmit_cls, is_bcast);
+	if (!dst)
+		return 0;
+
+	/* copy skb data, skipping hwhdr */
+	len = skb->len - ETH_HLEN;
+
+	rc = hf_copy_skb_to_fifo(net, skb, dst, len, ETH_HLEN);
+	if (rc)
+		return rc;
+
+	net_if->tx_fifo.tail =
+		(net_if->tx_fifo.tail + xmit_cls) & (net_if->tx_fifo.emax);
+	atomic_sub(xmit_cls, &(net_if->tx_fifo.avail));
+
+	net_if->sfifo_packets++;
+	net->netdev->stats.tx_packets++;
+	net->netdev->stats.tx_bytes += msg_len;
+
+	netdev_dbg(net->netdev, "hf_payload_tx: exit, tx_fifo tail = 0x%x, "
+		"avail = 0x%x, skb->len = 0x%x\n", net_if->tx_fifo.tail,
+		atomic_read(&(net_if->tx_fifo.avail)), skb->len);
+
+	dev_kfree_skb_any(skb);
+	return 0;
+
+}
+
+static int hf_start_xmit(struct sk_buff *skb, struct net_device *netdev)
+{
+	struct hf_net	*net = netdev_priv(netdev);
+	struct hf_if	*net_if = &(net->hfif);
+	u32		len, is_bcast;
+	u32		send_cnt = 1;
+
+	is_bcast = !memcmp(((struct ethhdr *)(skb->data))->h_dest,
+				netdev->broadcast,
+				netdev->addr_len);
+
+	if (unlikely(skb->len <= 0)) {
+		netdev_err(netdev, "hf_start_xmit: invalid skb->len 0x%x\n",
+						skb->len);
+		dev_kfree_skb_any(skb);
+		return NETDEV_TX_OK;
+	}
+
+	/* total len to transfer */
+	len = skb->len - ETH_HLEN;
+
+	if (len <= HF_PAYLOAD_MAX) {
+		/* send ip with payload */
+		if (hf_payload_tx(skb, net, is_bcast) < 0) {
+			netdev_err(netdev, "hf_start_xmit: "
+				"hf_payload_tx fail 1\n");
+
+			return NETDEV_TX_BUSY;
+		}
+	} else {
+		netdev_err(netdev, "hf_start_xmit: skb->len 0x%x "
+			"greater than max 0x%x\n",
+			skb->len, (u32)HF_PAYLOAD_MAX);
+
+		dev_kfree_skb_any(skb);
+		return NETDEV_TX_OK;
+	}
+
+	/* Make sure all fields are written before ringing hw doorbell */
+	wmb();
+
+	/* ring doorbell */
+	hf_mmio_regs_write(net_if, HFI_SFIFO_DB_REG, send_cnt);
+
+	if (atomic_read(&net_if->tx_fifo.avail) < HF_TX_LOW_WATERMARK)
+		hf_tx_check_avail(net, HF_TX_LOW_WATERMARK);
+
+	net_if->msg_id++;
+	netdev->trans_start = jiffies;
+
+	return NETDEV_TX_OK;
+}
+
+static void hf_tx_timeout(struct net_device *netdev)
+{
+	netdev_warn(netdev, "hf_tx_timeout: queue_stopped is %d\n",
+			netif_queue_stopped(netdev));
+}
+
 static int hf_change_mtu(struct net_device *netdev, int new_mtu)
 {
 	if ((new_mtu <= 68) || (new_mtu > HF_NET_MTU))
@@ -449,6 +883,8 @@ static const struct net_device_ops hf_netdev_ops = {
 	.ndo_open		= hf_net_open,
 	.ndo_stop		= hf_net_close,
 	.ndo_change_mtu		= hf_change_mtu,
+	.ndo_start_xmit		= hf_start_xmit,
+	.ndo_tx_timeout		= hf_tx_timeout,
 	.ndo_set_mac_address	= NULL,
 };
 
@@ -465,6 +901,8 @@ static void hf_if_setup(struct net_device *netdev)
 	netdev->header_ops	= &hf_header_ops;
 	netdev->netdev_ops	= &hf_netdev_ops;
 
+	netdev->watchdog_timeo	= HF_TX_TIMEOUT;
+
 	memcpy(netdev->broadcast, hfi_bcast_addr, ETH_ALEN);
 }
 
diff --git a/include/linux/hfi/hfi_ip.h b/include/linux/hfi/hfi_ip.h
index 6b6a74c..4e70c14 100644
--- a/include/linux/hfi/hfi_ip.h
+++ b/include/linux/hfi/hfi_ip.h
@@ -43,6 +43,7 @@
 #include <linux/hfi/hfidd_internal.h>
 #include <linux/hfi/hfidd_client.h>
 #include <linux/hfi/hfidd_requests.h>
+#include <linux/hfi/hfidd_regs.h>
 #include <linux/hfi/hfidd_pkt_formats.h>
 
 #define HF_DRV_VERSION			"1.0"
@@ -51,16 +52,32 @@
 
 #define MAX_HF_PER_HFI			2
 #define	HF_IP_JOBID			0xFFFFFFF0
+#define HF_TX_TIMEOUT			(500 * HZ)
+#define HF_NAPI_WEIGHT			256
 #define HF_MAX_NAME_LEN			64
 
+/* sfifo intr: bit 39-55 is threshold */
+/*             bit 34 enable, bit 35 unmask */
+#define HF_SFIFO_INTR_ENABLE		(0x3 << (63 - 35))
+#define HF_SFIFO_INTR_MASK		0x1FFFF		/* 17 bits */
+#define HF_SFIFO_INTR_CNT_SHIFT		(63 - 55)
+#define HF_SFIFO_INTR_EVENT		0x00000040 /* bit 57 */
+#define HF_SFIFO_INTR_WATERMARK		(HF_SFIFO_SLOTS - (HF_SFIFO_SLOTS >> 3))
+
 #define HF_SFIFO_SIZE			0x40000	/* 256K */
 #define HF_SFIFO_SLOTS			(HF_SFIFO_SIZE >> HFI_CACHE_LINE_SHIFT)
 #define HF_RFIFO_SIZE			0x1000000	/* 16M */
 #define HF_RFIFO_SLOTS			(HF_RFIFO_SIZE >> HFI_CACHE_LINE_SHIFT)
+#define HF_TX_LOW_WATERMARK		(HF_SFIFO_SLOTS >> 4)
 
 #define HF_FV_BIT_CNT			32
+#define HF_FV_BIT_MAX			31
+#define HF_SEND_ONE			1
 
-#define HF_NET_MTU			(2048 - HF_IP_HDR_LEN - HF_PROTO_LEN)
+#define HF_PAYLOAD_MAX			(2048 - HF_IP_HDR_LEN - HF_PROTO_LEN)
+#define HF_NET_MTU			HF_PAYLOAD_MAX
+#define HF_PAYLOAD_RX_THRESHOLD		0x10ULL
+#define HF_PAYLOAD_RX_THRESH_SHIFT	59
 
 struct hfi_ip_extended_hdr {            /* 16B */
 	unsigned int	immediate_len:7;/* In bytes */
@@ -83,6 +100,14 @@ struct hfi_ip_with_payload_pkt {
 #define HF_IP_HDR_LEN			((sizeof(struct hfi_hdr) + \
 				sizeof(struct hfi_ip_extended_hdr)))
 #define HF_ALIGN_PAD			2
+#define HF_PROTO_HDR_VERSION		0x1
+/* HFI protocol message type */
+#define	HF_IF_ARP			0xA0
+#define	HF_IF_FIFO			0xA1
+
+/* HFI protocol message flag */
+#define	HF_IF_UCAST			0xB0
+#define	HF_IF_BCAST			0xB1
 
 struct hf_if_proto_hdr {
 	u16			version;
@@ -93,6 +118,8 @@ struct hf_if_proto_hdr {
 };
 
 #define HF_PROTO_LEN		sizeof(struct hf_if_proto_hdr)
+#define HF_BCAST_CACHE_LINE_16	16
+#define HF_BCAST_CACHE_LINE_2	2
 
 struct hf_fifo {
 	void			*addr;
@@ -119,6 +146,7 @@ struct hf_if {
 	u32			sfifo_fv_polarity;
 	u32			sfifo_slots_per_blk;
 	u32			sfifo_packets;
+	u32			msg_id;
 	void __iomem		*doorbell;		/* mapped mmio_regs */
 	struct hf_fifo		tx_fifo;
 	struct hf_fifo		rx_fifo;
@@ -144,5 +172,47 @@ struct hf_global_info {
 
 extern struct hf_global_info	hf_ginfo;
 
+#define HF_EVENT_NUM		1
+
+struct hf_events_cb {
+	enum hfi_event_type	type;
+	void			*func;
+};
+
 #define HF_MAC_HFI_SHIFT	12
+#define HF_HDR_HFI_SHIFT	8
+
+static inline u32 hf_get_win(u16 id)
+{
+	return ((id >> HF_MAC_HFI_SHIFT) << HF_HDR_HFI_SHIFT) | (id & 0xFF);
+}
+
+static inline void hf_get_dst_info(struct ethhdr *hwhdr_p,
+				   u16 *d_isr,
+				   u16 *d_win)
+{
+	*d_isr = (*(u16 *)(&(hwhdr_p->h_dest[2]))) & 0xFFF;
+	*d_win = hf_get_win(*(u16 *)(&(hwhdr_p->h_dest[4])));
+}
+
+static inline void hf_mmio_regs_write_then_read(struct hf_if *net_if,
+				int off,
+				u64 data)
+{
+	__raw_writeq(data, net_if->doorbell + off);
+	isync();
+	__raw_readq(net_if->doorbell + off);
+	/* Make sure all received pkt shows up in rfifo */
+	mb();
+}
+
+static inline u64 hf_mmio_regs_read(struct hf_if *net_if, int off)
+{
+	return __raw_readq(net_if->doorbell + off);
+}
+
+static inline void hf_mmio_regs_write(struct hf_if *net_if, int off, u64 data)
+{
+	__raw_writeq(data, net_if->doorbell + off);
+}
 #endif
-- 
1.7.3.5


^ permalink raw reply related

* [PATCH v4 24/27] HFI: hfi_ip network driver
From: dykmanj @ 2011-04-25 21:24 UTC (permalink / raw)
  To: netdev
  Cc: Jim Dykman, Piyush Chaudhary, Fu-Chung Chang,  William S. Cadden,
	 Wen C. Chen, Scot Sakolish, Jian Xiao,  Carol L. Soto,
	 Sarah J. Sheppard
In-Reply-To: <1303766647-30156-1-git-send-email-dykmanj@linux.vnet.ibm.com>

From: Jim Dykman <dykmanj@linux.vnet.ibm.com>

It is a separate binary because it is not strictly necessary to use the HFI.
This patch includes module load/unload and the window open/setup with the
hfi device driver.

Signed-off-by:  Piyush Chaudhary <piyushc@linux.vnet.ibm.com>
Signed-off-by:  Jim Dykman <dykmanj@linux.vnet.ibm.com>
Signed-off-by:  Fu-Chung Chang <fcchang@linux.vnet.ibm.com>
Signed-off-by:  William S. Cadden <wscadden@linux.vnet.ibm.com>
Signed-off-by:  Wen C. Chen <winstonc@linux.vnet.ibm.com>
Signed-off-by:  Scot Sakolish <sakolish@linux.vnet.ibm.com>
Signed-off-by:  Jian Xiao <jian@linux.vnet.ibm.com>
Signed-off-by:  Carol L. Soto <clsoto@linux.vnet.ibm.com>
Signed-off-by:  Sarah J. Sheppard <sjsheppa@linux.vnet.ibm.com>
---
 drivers/net/Kconfig              |    1 +
 drivers/net/hfi/Makefile         |    1 +
 drivers/net/hfi/ip/Kconfig       |    9 +
 drivers/net/hfi/ip/Makefile      |    6 +
 drivers/net/hfi/ip/hf_proto.h    |   48 +++
 drivers/net/hfi/ip/hfi_ip_main.c |  613 ++++++++++++++++++++++++++++++++++++++
 include/linux/hfi/hfi_ip.h       |  148 +++++++++
 include/linux/if_arp.h           |    1 +
 8 files changed, 827 insertions(+), 0 deletions(-)
 create mode 100644 drivers/net/hfi/ip/Kconfig
 create mode 100644 drivers/net/hfi/ip/Makefile
 create mode 100644 drivers/net/hfi/ip/hf_proto.h
 create mode 100644 drivers/net/hfi/ip/hfi_ip_main.c
 create mode 100644 include/linux/hfi/hfi_ip.h

diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 1abbfd9..ddae700 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -3437,5 +3437,6 @@ config VMXNET3
 	  module will be called vmxnet3.
 
 source "drivers/net/hfi/core/Kconfig"
+source "drivers/net/hfi/ip/Kconfig"
 
 endif # NETDEVICES
diff --git a/drivers/net/hfi/Makefile b/drivers/net/hfi/Makefile
index 0440cbe..768f27c 100644
--- a/drivers/net/hfi/Makefile
+++ b/drivers/net/hfi/Makefile
@@ -1 +1,2 @@
 obj-$(CONFIG_HFI)                += core/
+obj-$(CONFIG_HFI_IP)             += ip/
diff --git a/drivers/net/hfi/ip/Kconfig b/drivers/net/hfi/ip/Kconfig
new file mode 100644
index 0000000..422782a
--- /dev/null
+++ b/drivers/net/hfi/ip/Kconfig
@@ -0,0 +1,9 @@
+config HFI_IP
+	tristate "IP-over-HFI"
+	depends on NETDEVICES && INET && HFI
+	---help---
+	Support for IP over HFI. It transports IP
+	packets over HFI.
+
+	To compile the driver as a module, choose M here. The module
+	will be called hfi_ip.
diff --git a/drivers/net/hfi/ip/Makefile b/drivers/net/hfi/ip/Makefile
new file mode 100644
index 0000000..90c7dea
--- /dev/null
+++ b/drivers/net/hfi/ip/Makefile
@@ -0,0 +1,6 @@
+#
+# Makefile for the HF IP interface for IBM eServer System p
+#
+obj-$(CONFIG_HFI_IP) += hfi_ip.o
+
+hfi_ip-objs :=	hfi_ip_main.o
diff --git a/drivers/net/hfi/ip/hf_proto.h b/drivers/net/hfi/ip/hf_proto.h
new file mode 100644
index 0000000..b4133b7
--- /dev/null
+++ b/drivers/net/hfi/ip/hf_proto.h
@@ -0,0 +1,48 @@
+/*
+ * hf_proto.h
+ *
+ * HF IP driver for IBM System p
+ *
+ *  Authors:
+ *      Fu-Chung Chang <fcchang@linux.vnet.ibm.com>
+ *      William S. Cadden <wscadden@linux.vnet.ibm.com>
+ *      Wen C. Chen <winstonc@linux.vnet.ibm.com>
+ *      Scot Sakolish <sakolish@linux.vnet.ibm.com>
+ *      Jian Xiao <jian@linux.vnet.ibm.com>
+ *      Carol L. Soto <clsoto@linux.vnet.ibm.com>
+ *      Sarah J. Sheppard <sjsheppa@linux.vnet.ibm.com>
+ *
+ *  (C) Copyright IBM Corp. 2010
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#ifndef _HF_PROTO_H_
+#define _HF_PROTO_H_
+
+extern int hfidd_open_window_func(struct hfidd_acs *p_acs,
+		u32 is_userspace,
+		struct hfi_client_info *user_p,
+		struct hfi_client_info *out_p);
+extern int hfidd_close_window_func(struct hfidd_acs *p_acs,
+		u32 is_userspace,
+		struct hfi_window_info *user_p);
+extern int hfidd_callback_register(struct hfidd_acs *p_acs,
+		struct hfi_reg_events *arg);
+extern int hfidd_callback_unregister(struct hfidd_acs *p_acs,
+		struct hfi_reg_events *arg);
+
+#endif
diff --git a/drivers/net/hfi/ip/hfi_ip_main.c b/drivers/net/hfi/ip/hfi_ip_main.c
new file mode 100644
index 0000000..0c1ebd7
--- /dev/null
+++ b/drivers/net/hfi/ip/hfi_ip_main.c
@@ -0,0 +1,613 @@
+/*
+ * hfi_ip_main.c
+ *
+ * HF IP driver for IBM System p
+ *
+ *  Authors:
+ *	Fu-Chung Chang <fcchang@linux.vnet.ibm.com>
+ *	William S. Cadden <wscadden@linux.vnet.ibm.com>
+ *	Wen C. Chen <winstonc@linux.vnet.ibm.com>
+ *	Scot Sakolish <sakolish@linux.vnet.ibm.com>
+ *	Jian Xiao <jian@linux.vnet.ibm.com>
+ *	Carol L. Soto <clsoto@linux.vnet.ibm.com>
+ *	Sarah J. Sheppard <sjsheppa@linux.vnet.ibm.com>
+ *
+ *  (C) Copyright IBM Corp. 2010
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include <linux/hfi/hfi_ip.h>
+#include "hf_proto.h"
+
+MODULE_AUTHOR("James Dykman <dykmanj@linux.vnet.ibm.com>, "
+		"Piyush Chaudhary <piyushc@linux.vnet.ibm.com>");
+MODULE_DESCRIPTION("IP driver v" HF_DRV_VERSION " (" HF_DRV_RELDATE ")"
+		" for IBM eServer HFI for System p");
+MODULE_VERSION(HF_DRV_VERSION);
+MODULE_LICENSE("GPL v2");
+
+struct hf_global_info		hf_ginfo;
+
+static const u8 hfi_bcast_addr[] = {
+	0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+};
+
+static void hf_free_tx_resource(struct hf_if *net_if)
+{
+	int	i;
+
+	if (net_if->tx_skb) {
+		for (i = 0; i <= net_if->tx_fifo.emax; i++) {
+			if (net_if->tx_skb[i])
+				dev_kfree_skb_any(net_if->tx_skb[i]);
+		}
+
+		free_pages((unsigned long)(net_if->tx_skb),
+				get_order((net_if->tx_fifo.emax + 1) *
+				sizeof(struct sk_buff *)));
+		net_if->tx_skb = 0;
+	}
+	if (net_if->tx_fifo.addr) {
+		free_pages((unsigned long)(net_if->tx_fifo.addr),
+				get_order(net_if->tx_fifo.size + PAGE_SIZE_4K));
+		net_if->tx_fifo.addr = 0;
+	}
+}
+
+static int hf_alloc_tx_resource(struct hf_net *net)
+{
+	struct hf_if *net_if = &(net->hfif);
+	int	i;
+
+	net_if->tx_fifo.size = HF_SFIFO_SIZE;
+	net_if->tx_fifo.head = 0;
+	net_if->tx_fifo.tail = 0;
+	net_if->tx_fifo.emax = HF_SFIFO_SLOTS - 1;
+	atomic_set(&net_if->tx_fifo.avail, HF_SFIFO_SLOTS - 1);
+
+	net_if->tx_fifo.addr =
+		(void *)__get_free_pages(GFP_KERNEL,
+				get_order(net_if->tx_fifo.size + PAGE_SIZE_4K));
+
+	if (net_if->tx_fifo.addr == 0) {
+		netdev_err(net->netdev, "%s: hf_alloc_tx_resource: "
+			"tx_fifo fail, size=0x%x\n",
+			net_if->name, net_if->tx_fifo.size);
+
+		return -ENOMEM;
+	}
+	memset(net_if->tx_fifo.addr, 0, net_if->tx_fifo.size + PAGE_SIZE_4K);
+
+	/* Sfifo finish vector locates at very next page of sfifo */
+	net_if->sfifo_finishvec = net_if->tx_fifo.addr + net_if->tx_fifo.size;
+	net_if->sfifo_fv_polarity = 0;
+	net_if->sfifo_slots_per_blk = HF_SFIFO_SLOTS / HF_FV_BIT_CNT;
+
+	/* allocate array to hold the tx skbs */
+	net_if->tx_skb =
+		(struct sk_buff **)__get_free_pages(GFP_KERNEL,
+		get_order((net_if->tx_fifo.emax + 1) *
+		sizeof(struct sk_buff *)));
+
+	if (net_if->tx_skb == 0) {
+		netdev_err(net->netdev,
+			"%s: hf_alloc_tx_resource: tx_skb failed\n",
+			net_if->name);
+
+		goto err_out;
+	}
+
+	for (i = 0; i <= net_if->tx_fifo.emax; i++)
+		net_if->tx_skb[i] = NULL;
+
+	return 0;
+
+err_out:
+	hf_free_tx_resource(net_if);
+
+	return -ENOMEM;
+}
+
+static void hf_free_rx_resource(struct hf_if *net_if)
+{
+	if (net_if->rx_fifo.addr) {
+		free_pages((unsigned long)(net_if->rx_fifo.addr),
+				get_order(net_if->rx_fifo.size));
+		net_if->rx_fifo.addr = 0;
+	}
+}
+
+static int hf_alloc_rx_resource(struct hf_net *net)
+{
+	struct hf_if *net_if = &(net->hfif);
+
+	net_if->rx_fifo.size = HF_RFIFO_SIZE;
+	net_if->rx_fifo.head = 0;
+	net_if->rx_fifo.tail = 0;
+	net_if->rx_fifo.emax = HF_RFIFO_SLOTS - 1;
+
+	net_if->rx_fifo.addr =
+		(void *)__get_free_pages(GFP_KERNEL,
+				get_order(net_if->rx_fifo.size));
+
+	if (net_if->rx_fifo.addr == 0) {
+		netdev_err(net->netdev,
+			"%s: hf_alloc_rx_resource: fail, size=0x%x\n",
+			net_if->name, net_if->rx_fifo.size);
+
+		return -ENOMEM;
+	}
+
+	memset(net_if->rx_fifo.addr, 0, net_if->rx_fifo.size);
+
+	return 0;
+}
+
+static void hf_free_resource(struct hf_if *net_if)
+{
+	hf_free_rx_resource(net_if);
+
+	hf_free_tx_resource(net_if);
+}
+
+static int hf_alloc_resource(struct hf_net *net)
+{
+	int			rc;
+	struct hf_if		*net_if = &(net->hfif);
+
+	rc = hf_alloc_tx_resource(net);
+	if (rc)
+		goto alloc_resource_err0;
+
+	rc = hf_alloc_rx_resource(net);
+	if (rc)
+		goto alloc_resource_err1;
+
+	return 0;
+
+alloc_resource_err1:
+	hf_free_tx_resource(net_if);
+alloc_resource_err0:
+	return rc;
+}
+
+static int hf_close_ip_window(struct hf_net *net, struct hfidd_acs *p_acs)
+{
+	struct hf_if *net_if = &(net->hfif);
+	int		rc;
+
+	if (net_if->doorbell) {
+		iounmap(net_if->doorbell);
+		net_if->doorbell = NULL;
+	}
+
+	/* Fill in the request structure */
+	net_if->client.hdr.req		   = HFIDD_REQ_CLOSE_WINDOW;
+	net_if->client.hdr.req_len	   = sizeof(struct hfi_window_info);
+	net_if->client.hdr.result.use.kptr = &(net_if->client);
+
+	rc = hfidd_close_window_func(HF_ACS(net_if), 0,
+			(struct hfi_window_info *)(&(net_if->client)));
+	if (rc) {
+		netdev_err(net->netdev,
+			"%s: hf_close_ip_window: fail, rc=0x%x\n",
+			net_if->name, rc);
+		return rc;
+	}
+
+	return 0;
+}
+
+static int hf_open_ip_window(struct hf_net *net,
+			     struct hfidd_acs *p_acs)
+{
+	struct hf_if		*net_if = &(net->hfif);
+	int			rc = 0;
+
+	net_if->client.win_type = HFIDD_IP_WIN;
+
+	net_if->client.sfifo.eaddr.use.kptr	 = net_if->tx_fifo.addr;
+	net_if->client.sfifo.size		 = net_if->tx_fifo.size;
+	net_if->client.rfifo.eaddr.use.kptr	 = net_if->rx_fifo.addr;
+	net_if->client.rfifo.size		 = net_if->rx_fifo.size;
+	net_if->client.sfifo_finish_vec.use.kptr = net_if->sfifo_finishvec;
+	net_if->client.job_id			 = HF_IP_JOBID;
+
+	/* Fill in the request structure */
+	net_if->client.hdr.req		   = HFIDD_REQ_OPEN_WINDOW;
+	net_if->client.hdr.req_len	   = sizeof(struct hfi_client_info);
+	net_if->client.hdr.result.use.kptr = &(net_if->client);
+
+	rc = hfidd_open_window_func(p_acs, 0, &(net_if->client),
+			&(net_if->client));
+	if (rc) {
+		netdev_err(net->netdev,
+			"%s: hf_open_ip_window: fail open rc=0x%x\n",
+			net_if->name, rc);
+		return rc;
+	}
+
+	net_if->doorbell = (ioremap(
+		(u64)(net_if->client.mmio_regs.use.kptr), PAGE_SIZE_64K));
+
+	if (unlikely(net_if->doorbell == NULL)) {
+		netdev_err(net->netdev,
+			"%s: hf_open_ip_window: fail to map doorbell\n",
+			net_if->name);
+		hf_close_ip_window(net, p_acs);
+	}
+
+	net_if->isr_id = net_if->client.local_isrid;
+
+	return 0;
+}
+
+static int hf_set_mac_addr(struct net_device *netdev, void *p)
+{
+	struct hf_net		*net = netdev_priv(netdev);
+	struct hf_if		*net_if = &(net->hfif);
+
+	/* Mac address format: 02:ClusterID:ISR:ISR:HFI_WIN:WIN */
+
+	/* Locally administered MAC address */
+	netdev->dev_addr[0] = 0x2; /* bit6=1, bit7=0 */
+
+	netdev->dev_addr[1] = 0x0; /* cluster id */
+
+	*(u16 *)(&(netdev->dev_addr[2])) = (u16)(net_if->isr_id);
+
+	*(u16 *)(&(netdev->dev_addr[4])) = (u16)
+	(((net_if->ai) << HF_MAC_HFI_SHIFT) | (net_if->client.window));
+
+	return 0;
+}
+
+static int hf_net_delayed_open(void *parm, u16 win, u16 ext)
+{
+	struct net_device	*netdev = (struct net_device *)parm;
+	struct hf_net		*net = netdev_priv(netdev);
+	struct hf_if		*net_if = &(net->hfif);
+	int			rc = 0;
+	struct hfidd_acs	*p_acs = HF_ACS(net_if);
+
+	spin_lock(&(net_if->lock));
+	if (net_if->state != HF_NET_HALF_OPEN) {
+		netdev_err(netdev, "hf_net_delayed_open: net_if state=0x%x\n",
+			net_if->state);
+		spin_unlock(&(net_if->lock));
+		return -EINVAL;
+	}
+
+	rc = hf_alloc_resource(net);
+	if (rc)
+		goto delayed_open_err0;
+
+	rc = hf_open_ip_window(net, p_acs);
+	if (rc)
+		goto delayed_open_err1;
+
+	hf_set_mac_addr(netdev, NULL);
+
+	net_if->state = HF_NET_OPEN;
+	spin_unlock(&(net_if->lock));
+
+	return 0;
+
+delayed_open_err1:
+	hf_free_resource(net_if);
+
+delayed_open_err0:
+	spin_unlock(&(net_if->lock));
+
+	return rc;
+}
+
+static int hf_register_hfi_ready_callback(struct net_device *netdev,
+					  struct hfidd_acs *p_acs,
+					  int flag)
+{
+	struct hfi_reg_events	reg_events;
+	int			rc = 0;
+
+	reg_events.hdr.req    = flag;
+	reg_events.hdr.req_len = sizeof(struct hfi_reg_events);
+	reg_events.hdr.result.use.kptr = NULL;
+	reg_events.type	= FUNCTIONS_FOR_EVENTS;
+
+	reg_events.info.func.index = HFIDD_HFI_READY_REG;
+	reg_events.info.func.function_p.use.kptr = hf_net_delayed_open;
+	reg_events.info.func.parameter.use.kptr  = (void *)(netdev);
+
+	if (flag == HFIDD_REQ_EVENT_REGISTER)
+		rc = hfidd_callback_register(p_acs, &reg_events);
+	else
+		rc = hfidd_callback_unregister(p_acs, &reg_events);
+	if (rc) {
+		netdev_err(netdev, "hf_register_hfi_ready_callback: fail"
+			" flag=0x%x rc=0x%x\n", flag, rc);
+
+		return rc;
+	}
+
+	return 0;
+}
+
+static int hf_net_open(struct net_device *netdev)
+{
+	struct hf_net		*net = netdev_priv(netdev);
+	struct hf_if		*net_if = &(net->hfif);
+	int			rc = 0;
+	struct hfidd_acs	*p_acs = HF_ACS(net_if);
+
+	memset(&(netdev->stats), 0, sizeof(struct net_device_stats));
+	net_if->sfifo_packets = 0;
+
+	spin_lock(&(net_if->lock));
+	net_if->state = HF_NET_HALF_OPEN;
+	spin_unlock(&(net_if->lock));
+
+	netif_carrier_off(netdev);
+
+	rc = hf_register_hfi_ready_callback(netdev, p_acs,
+			HFIDD_REQ_EVENT_REGISTER);
+	if (rc != 0) {
+		spin_lock(&(net_if->lock));
+		net_if->state = HF_NET_CLOSE;
+		spin_unlock(&(net_if->lock));
+
+		netdev_err(netdev, "hf_net_open: hf_register_hfi_ready_callback"
+			"fail, rc=0x%x, state=0x%x", rc, net_if->state);
+		return rc;
+	}
+
+	return 0;
+}
+
+static int hf_net_close(struct net_device *netdev)
+{
+	struct hf_net		*net = netdev_priv(netdev);
+	struct hf_if		*net_if = &(net->hfif);
+	struct hfidd_acs	*p_acs = HF_ACS(net_if);
+
+	spin_lock(&(net_if->lock));
+	if (net_if->state == HF_NET_OPEN) {
+		hf_close_ip_window(net, p_acs);
+
+		hf_free_resource(net_if);
+	}
+
+	hf_register_hfi_ready_callback(netdev, p_acs,
+			HFIDD_REQ_EVENT_UNREGISTER);
+
+	net_if->state = HF_NET_CLOSE;
+	spin_unlock(&(net_if->lock));
+
+	return 0;
+}
+
+static int hf_change_mtu(struct net_device *netdev, int new_mtu)
+{
+	if ((new_mtu <= 68) || (new_mtu > HF_NET_MTU))
+		return -ERANGE;
+
+	netdev->mtu = new_mtu;
+
+	return 0;
+}
+
+static int hf_hard_header(struct sk_buff *skb,
+			  struct net_device *netdev,
+			  u16 type,
+			  const void *daddr,
+			  const void *saddr,
+			  u32 len)
+{
+	struct ethhdr		*hwhdr_p;
+
+	skb_push(skb, ETH_HLEN);
+
+	hwhdr_p = (struct ethhdr *)(skb->data);
+	hwhdr_p->h_proto = htons(type);
+
+	if (!saddr)
+		saddr = netdev->dev_addr;
+
+	memcpy(hwhdr_p->h_source, saddr, netdev->addr_len);
+
+	if (daddr) {
+		memcpy(hwhdr_p->h_dest, daddr, netdev->addr_len);
+		return netdev->hard_header_len;
+	}
+
+	if (netdev->flags & IFF_NOARP) {
+		memset(hwhdr_p->h_dest, 0, netdev->addr_len);
+		return netdev->hard_header_len;
+	}
+
+	return -netdev->hard_header_len;
+}
+
+static const struct header_ops hf_header_ops = {
+	.create = hf_hard_header,
+};
+
+static const struct net_device_ops hf_netdev_ops = {
+	.ndo_open		= hf_net_open,
+	.ndo_stop		= hf_net_close,
+	.ndo_change_mtu		= hf_change_mtu,
+	.ndo_set_mac_address	= NULL,
+};
+
+static void hf_if_setup(struct net_device *netdev)
+{
+	netdev->type		= ARPHRD_HFI;
+	netdev->mtu		= HF_NET_MTU;
+	netdev->tx_queue_len	= 1000;
+	netdev->flags		= IFF_BROADCAST;
+	netdev->hard_header_len	= ETH_HLEN;
+	netdev->addr_len	= ETH_ALEN;
+	netdev->needed_headroom	= 0;
+
+	netdev->header_ops	= &hf_header_ops;
+	netdev->netdev_ops	= &hf_netdev_ops;
+
+	memcpy(netdev->broadcast, hfi_bcast_addr, ETH_ALEN);
+}
+
+static struct hf_net *hf_init_netdev(int idx, int ai)
+{
+	struct net_device	*netdev;
+	struct hf_net		*net;
+	int			ii;
+	int			rc;
+	char			ifname[HF_MAX_NAME_LEN];
+
+	ii = (idx * MAX_HFIS) + ai;
+	sprintf(ifname, "hf%d", ii);
+	netdev = alloc_netdev(sizeof(struct hf_net), ifname, hf_if_setup);
+	if (!netdev) {
+		printk(KERN_ERR "hf_init_netdev: "
+				"alloc_netdev for hfi%d:hf%d fail\n", ai, idx);
+		return ERR_PTR(-ENODEV);
+	}
+
+	net = netdev_priv(netdev);
+	net->netdev = netdev;
+
+	memset(&(net->hfif), 0, sizeof(struct hf_if));
+	net->hfif.idx = ii;	/* interface index */
+	net->hfif.ai  = ai;	/* adapter index */
+	strncpy(net->hfif.name, ifname, HF_MAX_NAME_LEN);
+	net->hfif.state = HF_NET_CLOSE;
+
+	spin_lock_init(&net->hfif.lock);
+
+	rc = register_netdev(netdev);
+	if (rc) {
+		netdev_err(netdev, "hf_init_netdev: "
+				"failed to register netdev=hfi%d:hf%d, "
+				"rc = 0x%x\n", ai, idx, rc);
+		free_netdev(netdev);
+		return ERR_PTR(-ENODEV);
+	}
+
+	return net;
+}
+
+static void hf_del_netdev(struct hf_net *net)
+{
+	struct net_device	*netdev = net->netdev;
+
+	unregister_netdev(netdev);
+
+	free_netdev(netdev);
+}
+
+static int hf_inet_event(struct notifier_block *this,
+			 unsigned long event,
+			 void *ifa)
+{
+	struct in_device	*in_dev;
+	struct net_device	*netdev;
+
+	in_dev = ((struct in_ifaddr *)ifa)->ifa_dev;
+
+	netdev = in_dev->dev;
+
+	if (!net_eq(dev_net(netdev), &init_net))
+		return NOTIFY_DONE;
+
+	if ((event == NETDEV_UP) && (netdev->netdev_ops == &hf_netdev_ops)) {
+		struct hf_if	*net_if;
+
+		net_if = &(((struct hf_net *)(netdev_priv(netdev)))->hfif);
+		net_if->ip_addr = ntohl(in_dev->ifa_list->ifa_address);
+	}
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block hf_inet_notifier = {
+	.notifier_call = hf_inet_event,
+};
+
+static int __init hf_init_module(void)
+{
+	u32		idx, ai;
+	int		rc;
+	struct hf_net	*net;
+
+	memset(&hf_ginfo, 0, sizeof(struct hf_global_info));
+
+	for (idx = 0; idx < MAX_HF_PER_HFI; idx++) {
+		for (ai = 0; ai < MAX_HFIS; ai++) {
+			net = hf_init_netdev(idx, ai);
+			if (IS_ERR(net)) {
+				printk(KERN_ERR "hf_init_module: hf_init_netdev"
+						" for idx %d ai %d failed rc"
+						" %ld\n",
+						idx, ai, PTR_ERR(net));
+
+				goto err_out;
+			}
+
+			hf_ginfo.net[idx][ai] = net;
+		}
+	}
+
+	register_inetaddr_notifier(&hf_inet_notifier);
+
+	printk(KERN_INFO "hfi_ip module loaded\n");
+	return 0;
+
+err_out:
+	rc = PTR_ERR(net);
+	for (idx = 0; idx < MAX_HF_PER_HFI; idx++) {
+		for (ai = 0; ai < MAX_HFIS; ai++) {
+			net = hf_ginfo.net[idx][ai];
+			if (net != NULL) {
+				hf_del_netdev(net);
+				hf_ginfo.net[idx][ai] = NULL;
+			}
+		}
+	}
+
+	return rc;
+}
+
+static void __exit hf_cleanup_module(void)
+{
+	u32		idx, ai;
+	struct hf_net	*net;
+
+	unregister_inetaddr_notifier(&hf_inet_notifier);
+	for (idx = 0; idx < MAX_HF_PER_HFI; idx++) {
+		for (ai = 0; ai < MAX_HFIS; ai++) {
+
+			net = hf_ginfo.net[idx][ai];
+			if (net != NULL) {
+				hf_del_netdev(net);
+				hf_ginfo.net[idx][ai] = NULL;
+			}
+		}
+	}
+
+	return;
+}
+
+module_init(hf_init_module);
+module_exit(hf_cleanup_module);
diff --git a/include/linux/hfi/hfi_ip.h b/include/linux/hfi/hfi_ip.h
new file mode 100644
index 0000000..6b6a74c
--- /dev/null
+++ b/include/linux/hfi/hfi_ip.h
@@ -0,0 +1,148 @@
+/*
+ * hfi_ip.h
+ *
+ * HF IP driver for IBM System p
+ *
+ *  Authors:
+ *      Fu-Chung Chang <fcchang@linux.vnet.ibm.com>
+ *      William S. Cadden <wscadden@linux.vnet.ibm.com>
+ *      Wen C. Chen <wcchen@linux.vnet.ibm.com>
+ *      Scot Sakolish <sakolish@linux.vnet.ibm.com>
+ *      Jian Xiao <jian@linux.vnet.ibm.com>
+ *      Carol L. Soto <clsoto@linux.vnet.ibm.com>
+ *      Sarah J. Sheppard <sjsheppa@linux.vnet.ibm.com>
+ *
+ *  (C) Copyright IBM Corp. 2010
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#ifndef _HFI_IP_H_
+#define _HFI_IP_H_
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
+#include <net/arp.h>
+
+#include <linux/hfi/hfidd_internal.h>
+#include <linux/hfi/hfidd_client.h>
+#include <linux/hfi/hfidd_requests.h>
+#include <linux/hfi/hfidd_pkt_formats.h>
+
+#define HF_DRV_VERSION			"1.0"
+#define HF_DRV_RELDATE			"July 7, 2010"
+#define HF_DRV_NAME			"hf"
+
+#define MAX_HF_PER_HFI			2
+#define	HF_IP_JOBID			0xFFFFFFF0
+#define HF_MAX_NAME_LEN			64
+
+#define HF_SFIFO_SIZE			0x40000	/* 256K */
+#define HF_SFIFO_SLOTS			(HF_SFIFO_SIZE >> HFI_CACHE_LINE_SHIFT)
+#define HF_RFIFO_SIZE			0x1000000	/* 16M */
+#define HF_RFIFO_SLOTS			(HF_RFIFO_SIZE >> HFI_CACHE_LINE_SHIFT)
+
+#define HF_FV_BIT_CNT			32
+
+#define HF_NET_MTU			(2048 - HF_IP_HDR_LEN - HF_PROTO_LEN)
+
+struct hfi_ip_extended_hdr {            /* 16B */
+	unsigned int	immediate_len:7;/* In bytes */
+	unsigned int	num_desc:3;     /* number of descriptors */
+					/* Logical Port ID: */
+	unsigned int	lpid_valid:1;   /* set by sending HFI */
+	unsigned int	lpid:4;         /* set by sending HFI */
+	/* Ethernet Service Header is 113 bits, which is 14 bytes + 1 bit */
+	unsigned int	ethernet_svc_hdr_hi:1;    /* Not used by HFI */
+	char            ethernet_svc_hdr[12];     /* Not used by HFI */
+	__sum16         bcast_csum;
+} __packed;
+
+struct hfi_ip_with_payload_pkt {
+	struct hfi_hdr			hfi_hdr;
+	struct hfi_ip_extended_hdr	ip_ext;
+	char				payload[2016];
+} __packed;
+
+#define HF_IP_HDR_LEN			((sizeof(struct hfi_hdr) + \
+				sizeof(struct hfi_ip_extended_hdr)))
+#define HF_ALIGN_PAD			2
+
+struct hf_if_proto_hdr {
+	u16			version;
+	u8			msg_type;
+	u8			msg_flag;
+	u32			msg_len;	/* Include HFI header */
+	u32			msg_id;
+};
+
+#define HF_PROTO_LEN		sizeof(struct hf_if_proto_hdr)
+
+struct hf_fifo {
+	void			*addr;
+	u32			size;		/* total bytes	*/
+	u32			head;
+	u32			tail;
+	u32			emax;		/* power 2 mask */
+	atomic_t		avail;		/* for tx	*/
+	atomic_t		outstanding;	/* for rx	*/
+};
+
+#define	HF_NET_CLOSE		0x00
+#define	HF_NET_HALF_OPEN	0xA0
+#define	HF_NET_OPEN		0xA1
+
+struct hf_if {
+	u32			idx;			/* 0, 1, 2, 3 ...   */
+	u32			ai;			/* 0=hfi0, 1=hfi1   */
+	char			name[HF_MAX_NAME_LEN];
+	u32			isr_id;
+	u32			ip_addr;
+	u32			state;			/* CLOSE, OPEN */
+	spinlock_t		lock;			/* lock for state */
+	u32			sfifo_fv_polarity;
+	u32			sfifo_slots_per_blk;
+	u32			sfifo_packets;
+	void __iomem		*doorbell;		/* mapped mmio_regs */
+	struct hf_fifo		tx_fifo;
+	struct hf_fifo		rx_fifo;
+	struct hfi_client_info	client;
+	struct sk_buff		**tx_skb;		/* array to store tx
+							   2k skb */
+	void			*sfifo_finishvec;
+};
+
+/* Private structure for HF inetrface */
+struct hf_net {
+	struct net_device	*netdev;
+	struct hf_if		hfif;
+};
+
+extern struct hfidd_global	hfidd_global;
+
+#define HF_ACS(net_if)		(hfidd_global.p_acs[(net_if)->ai])
+
+struct hf_global_info {
+	struct hf_net		*net[MAX_HF_PER_HFI][MAX_HFI_PER_TORRENT];
+};
+
+extern struct hf_global_info	hf_ginfo;
+
+#define HF_MAC_HFI_SHIFT	12
+#endif
diff --git a/include/linux/if_arp.h b/include/linux/if_arp.h
index 6d722f4..f2cfdc1 100644
--- a/include/linux/if_arp.h
+++ b/include/linux/if_arp.h
@@ -41,6 +41,7 @@
 #define	ARPHRD_IEEE1394	24		/* IEEE 1394 IPv4 - RFC 2734	*/
 #define ARPHRD_EUI64	27		/* EUI-64                       */
 #define ARPHRD_INFINIBAND 32		/* InfiniBand			*/
+#define ARPHRD_HFI	37		/* Host Fabric Interface	*/
 
 /* Dummy types for non ARP hardware */
 #define ARPHRD_SLIP	256
-- 
1.7.3.5


^ permalink raw reply related

* [PATCH v4 21/27] HFI: Add send and receive interrupts
From: dykmanj @ 2011-04-25 21:24 UTC (permalink / raw)
  To: netdev
  Cc: Jim Dykman, Piyush Chaudhary, Fu-Chung Chang,  William S. Cadden,
	 Wen C. Chen, Scot Sakolish, Jian Xiao,  Carol L. Soto,
	 Sarah J. Sheppard
In-Reply-To: <1303766647-30156-1-git-send-email-dykmanj@linux.vnet.ibm.com>

From: Jim Dykman <dykmanj@linux.vnet.ibm.com>

Each window has its own interrupt for send interrupts and another for receive
interrupts.

Signed-off-by:  Piyush Chaudhary <piyushc@linux.vnet.ibm.com>
Signed-off-by:  Jim Dykman <dykmanj@linux.vnet.ibm.com>
Signed-off-by:  Fu-Chung Chang <fcchang@linux.vnet.ibm.com>
Signed-off-by:  William S. Cadden <wscadden@linux.vnet.ibm.com>
Signed-off-by:  Wen C. Chen <winstonc@linux.vnet.ibm.com>
Signed-off-by:  Scot Sakolish <sakolish@linux.vnet.ibm.com>
Signed-off-by:  Jian Xiao <jian@linux.vnet.ibm.com>
Signed-off-by:  Carol L. Soto <clsoto@linux.vnet.ibm.com>
Signed-off-by:  Sarah J. Sheppard <sjsheppa@linux.vnet.ibm.com>
---
 drivers/net/hfi/core/Makefile       |    1 +
 drivers/net/hfi/core/hfidd_intr.c   |  127 +++++++++++++++++++++++++++++++++++
 drivers/net/hfi/core/hfidd_proto.h  |    3 +
 drivers/net/hfi/core/hfidd_window.c |   16 ++++-
 include/linux/hfi/hfidd_client.h    |   17 +++++
 include/linux/hfi/hfidd_internal.h  |    2 +
 6 files changed, 165 insertions(+), 1 deletions(-)
 create mode 100644 drivers/net/hfi/core/hfidd_intr.c

diff --git a/drivers/net/hfi/core/Makefile b/drivers/net/hfi/core/Makefile
index 3adf07e..d2ed86f 100644
--- a/drivers/net/hfi/core/Makefile
+++ b/drivers/net/hfi/core/Makefile
@@ -6,5 +6,6 @@ hfi_core-objs:=	hfidd_adpt.o \
 		hfidd_init.o \
 		hfidd_xlat.o \
 		hfidd_map.o \
+		hfidd_intr.o \
 		hfidd_hcalls.o
 obj-$(CONFIG_HFI) += hfi_core.o
diff --git a/drivers/net/hfi/core/hfidd_intr.c b/drivers/net/hfi/core/hfidd_intr.c
new file mode 100644
index 0000000..253de27
--- /dev/null
+++ b/drivers/net/hfi/core/hfidd_intr.c
@@ -0,0 +1,127 @@
+/*
+ * hfidd_intr.c
+ *
+ * HFI device driver for IBM System p
+ *
+ *  Authors:
+ *      Fu-Chung Chang <fcchang@linux.vnet.ibm.com>
+ *      William S. Cadden <wscadden@linux.vnet.ibm.com>
+ *      Wen C. Chen <winstonc@linux.vnet.ibm.com>
+ *      Scot Sakolish <sakolish@linux.vnet.ibm.com>
+ *      Jian Xiao <jian@linux.vnet.ibm.com>
+ *      Carol L. Soto <clsoto@linux.vnet.ibm.com>
+ *      Sarah J. Sheppard <sjsheppa@linux.vnet.ibm.com>
+ *
+ *  (C) Copyright IBM Corp. 2010
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include <linux/hfi/hfidd_internal.h>
+#include "hfidd_proto.h"
+
+static irqreturn_t send_intr_handler(int irq, void *data)
+{
+	struct hfidd_window *win_p = data;
+	struct hfidd_acs *p_acs;
+
+	p_acs = hfidd_global.p_acs[win_p->ai];
+	if (p_acs == NULL)
+		return IRQ_HANDLED;
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t recv_intr_handler(int irq, void *data)
+{
+	struct hfidd_window *win_p = data;
+	struct hfidd_acs *p_acs;
+
+	p_acs = hfidd_global.p_acs[win_p->ai];
+	if (p_acs == NULL)
+		return IRQ_HANDLED;
+
+	return IRQ_HANDLED;
+}
+
+static inline void hfidd_clear_interrupt(unsigned int int_level,
+			struct hfidd_window *win_p)
+{
+	ibmebus_free_irq(int_level, win_p);
+}
+
+static int hfidd_init_interrupt(struct hfidd_acs *p_acs,
+		struct hfidd_window *win_p ,
+		irqreturn_t (*handler)(int, void *),
+		const char *name,
+		unsigned int int_level)
+{
+	int rc;
+
+	rc = ibmebus_request_irq(int_level, handler, IRQF_DISABLED, name,
+			win_p);
+	if (rc != 0) {
+		dev_printk(KERN_ERR, p_acs->hfidd_dev,
+			"hfidd_init_interrupt: request_irq failed for "
+			"int_level 0x%x rc %d\n", int_level, rc);
+		return rc;
+	}
+	return rc;
+}
+
+int hfidd_init_win_interrupt(struct hfidd_acs *p_acs,
+		struct hfidd_window *win_p)
+{
+	int rc;
+
+	/* init send interrupt handler */
+	snprintf(win_p->send_name, IRQ_NAME_SIZE - 1, "%s%d-send%d",
+		HFIDD_DEV_NAME, p_acs->index, win_p->index);
+	rc = hfidd_init_interrupt(p_acs, win_p, send_intr_handler,
+		win_p->send_name, win_p->send_intr);
+	if (rc != 0) {
+		dev_printk(KERN_ERR, p_acs->hfidd_dev,
+			"hfidd_init_win_interrupt: send int failed, "
+			"rc = 0x%x\n", rc);
+		return rc;
+	}
+
+	/* init recv interrupt handler */
+	snprintf(win_p->recv_name, IRQ_NAME_SIZE - 1, "%s%d-recv%d",
+		HFIDD_DEV_NAME, p_acs->index, win_p->index);
+	rc = hfidd_init_interrupt(p_acs, win_p, recv_intr_handler,
+		win_p->recv_name, win_p->recv_intr);
+	if (rc != 0) {
+		dev_printk(KERN_ERR, p_acs->hfidd_dev,
+			"hfidd_init_win_interrupt: recv int failed, "
+			"rc = 0x%x\n", rc);
+		hfidd_clear_interrupt(win_p->send_intr, win_p);
+		return rc;
+	}
+	return 0;
+}
+
+void hfidd_clear_win_interrupt(struct hfidd_window *win_p)
+{
+	if (win_p->send_intr != 0) {
+		hfidd_clear_interrupt(win_p->send_intr, win_p);
+		win_p->send_intr = 0;
+	}
+	if (win_p->recv_intr != 0) {
+		hfidd_clear_interrupt(win_p->recv_intr, win_p);
+		win_p->recv_intr = 0;
+	}
+}
diff --git a/drivers/net/hfi/core/hfidd_proto.h b/drivers/net/hfi/core/hfidd_proto.h
index f531dcd..af88f0b 100644
--- a/drivers/net/hfi/core/hfidd_proto.h
+++ b/drivers/net/hfi/core/hfidd_proto.h
@@ -73,6 +73,9 @@ int hfidd_query_interface(struct hfidd_acs *p_acs, unsigned int subtype,
 int hfidd_start_nmmu(struct hfidd_acs *p_acs);
 int hfidd_start_interface(struct hfidd_acs *p_acs);
 int hfidd_stop_interface(struct hfidd_acs *p_acs, unsigned int hfi_id);
+int hfidd_init_win_interrupt(struct hfidd_acs *p_acs,
+		struct hfidd_window *win_p);
+void hfidd_clear_win_interrupt(struct hfidd_window *win_p);
 long long hfi_start_nmmu(u64 chip_id, void *nmmu_info);
 long long hfi_stop_nmmu(u64 chip_id);
 long long hfi_open_window(u64 unit_id, u64 win_id, u64 flag,
diff --git a/drivers/net/hfi/core/hfidd_window.c b/drivers/net/hfi/core/hfidd_window.c
index fd692eb..6864eae 100644
--- a/drivers/net/hfi/core/hfidd_window.c
+++ b/drivers/net/hfi/core/hfidd_window.c
@@ -1049,6 +1049,15 @@ int hfidd_open_window_func(struct hfidd_acs *p_acs, unsigned int is_userspace,
 	local_p->local_isrid = p_acs->isr;
 	win_p->client_info.local_isrid = p_acs->isr;
 
+	/* Init the send and recv interrupt handlers */
+	rc = hfidd_init_win_interrupt(p_acs, win_p);
+	if (rc) {
+		dev_printk(KERN_ERR, p_acs->hfidd_dev,
+			"hfidd_open_window_func: hfidd_init_win_interrupt "
+			"failed, rc = 0x%x\n", rc);
+		goto hfidd_open_window_func_err6;
+	}
+
 	/* Copy out the client info back to user */
 	rc = hfi_copy_to_user((void *)out_p, (void *)local_p,
 			is_userspace, sizeof(struct hfi_client_info));
@@ -1056,7 +1065,7 @@ int hfidd_open_window_func(struct hfidd_acs *p_acs, unsigned int is_userspace,
 		dev_printk(KERN_ERR, p_acs->hfidd_dev,
 			"hfidd_open_window_func: hfi_copy_to_user "
 			"failed, rc = 0x%x\n", rc);
-		goto hfidd_open_window_func_err6;
+		goto hfidd_open_window_func_err7;
 	}
 
 	spin_lock(&(win_p->win_lock));
@@ -1068,6 +1077,8 @@ int hfidd_open_window_func(struct hfidd_acs *p_acs, unsigned int is_userspace,
 	kfree(local_p);
 	return rc;
 
+hfidd_open_window_func_err7:
+	hfidd_clear_win_interrupt(win_p);
 hfidd_open_window_func_err6:
 	if (is_userspace)
 		hfidd_unmap(local_p->mmio_regs.use.kptr, PAGE_SIZE_64K);
@@ -1134,6 +1145,9 @@ int hfidd_close_window_internal(struct hfidd_acs *p_acs,
 	}
 	spin_unlock(&(win_p->win_lock));
 
+	/* Clear the send and recv interrupt handlers */
+	hfidd_clear_win_interrupt(win_p);
+
 	rc = hfi_unmap_mmio_regs(p_acs, win_p, is_userspace);
 	if (rc) {
 		dev_printk(KERN_ERR, p_acs->hfidd_dev,
diff --git a/include/linux/hfi/hfidd_client.h b/include/linux/hfi/hfidd_client.h
index 11c8973..3b2d032 100644
--- a/include/linux/hfi/hfidd_client.h
+++ b/include/linux/hfi/hfidd_client.h
@@ -121,6 +121,23 @@ struct hfi_window_info {
 	unsigned int		window;
 };
 
+/* Event Notification */
+enum hfi_event_type {
+	HFIDD_SEND		= 0,
+	HFIDD_RECV		= 1,
+	HFIDD_WIN_ERROR		= 2,
+	HFIDD_HFI_ERROR		= 3,
+	HFIDD_TERMINATE		= 4,
+	HFIDD_RELEASE_WINDOW	= 5,
+	HFIDD_CAU_ERROR		= 6,
+	HFIDD_ICS_ERROR		= 7,
+	HFIDD_HFI_READY_REG	= 8,
+	HFIDD_ROUTE_CHANGE	= 9,
+	HFIDD_IP_TRC_LVL	= 10,	/* IP Window only */
+	HFIDD_POOL_SIZE		= 11,	/* IP Window only */
+	HFIDD_NUM_EVENT_TYPES	= 12
+};
+
 #define MAX_TORRENTS            1
 #define MAX_HFI_PER_TORRENT     2
 #define MAX_HFIS                (MAX_TORRENTS * MAX_HFI_PER_TORRENT)
diff --git a/include/linux/hfi/hfidd_internal.h b/include/linux/hfi/hfidd_internal.h
index 03cac9a..a3f86b7 100644
--- a/include/linux/hfi/hfidd_internal.h
+++ b/include/linux/hfi/hfidd_internal.h
@@ -144,6 +144,8 @@ struct hfidd_global {
 	struct hfidd_acs	*p_acs[MAX_HFIS];
 };
 
+extern struct hfidd_global hfidd_global;
+
 static inline struct hfidd_window *hfi_window(struct hfidd_acs *p,
 		unsigned int idx)
 {
-- 
1.7.3.5


^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox