Netdev List
 help / color / mirror / Atom feed
* [PATCH 3/3] IPv6: Complete IPV6_DONTFRAG support
From: Brian Haley @ 2010-04-23 21:26 UTC (permalink / raw)
  To: davem, yoshfuji; +Cc: netdev
In-Reply-To: <1272057969-6526-3-git-send-email-brian.haley@hp.com>

Finally add support to detect a local IPV6_DONTFRAG event
and return the relevant data to the user if they've enabled
IPV6_RECVPATHMTU on the socket.  The next recvmsg() will
return no data, but have an IPV6_PATHMTU as ancillary data.

Signed-off-by: Brian Haley <brian.haley@hp.com>
---
 include/linux/ipv6.h  |    2 +
 include/net/ipv6.h    |    2 +
 net/ipv6/af_inet6.c   |    3 ++
 net/ipv6/datagram.c   |   87 +++++++++++++++++++++++++++++++++++++++++++++++++
 net/ipv6/ip6_output.c |   24 +++++++++----
 net/ipv6/raw.c        |    3 ++
 net/ipv6/udp.c        |    3 ++
 7 files changed, 116 insertions(+), 8 deletions(-)

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 1976942..2ab5509 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -257,6 +257,7 @@ struct inet6_skb_parm {
 };
 
 #define IP6CB(skb)	((struct inet6_skb_parm*)((skb)->cb))
+#define IP6CBMTU(skb)	((struct ip6_mtuinfo *)((skb)->cb))
 
 static inline int inet6_iif(const struct sk_buff *skb)
 {
@@ -366,6 +367,7 @@ struct ipv6_pinfo {
 
 	struct ipv6_txoptions	*opt;
 	struct sk_buff		*pktoptions;
+	struct sk_buff		*rxpmtu;
 	struct {
 		struct ipv6_txoptions *opt;
 		u8 hop_limit;
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 7ab6323..eba5cc0 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -578,9 +578,11 @@ extern int			ip6_datagram_connect(struct sock *sk,
 						     struct sockaddr *addr, int addr_len);
 
 extern int 			ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len);
+extern int 			ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len);
 extern void			ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err, __be16 port,
 						u32 info, u8 *payload);
 extern void			ipv6_local_error(struct sock *sk, int err, struct flowi *fl, u32 info);
+extern void			ipv6_local_rxpmtu(struct sock *sk, struct flowi *fl, u32 mtu);
 
 extern int inet6_release(struct socket *sock);
 extern int inet6_bind(struct socket *sock, struct sockaddr *uaddr, 
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 3192aa0..d2df314 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -417,6 +417,9 @@ void inet6_destroy_sock(struct sock *sk)
 	if ((skb = xchg(&np->pktoptions, NULL)) != NULL)
 		kfree_skb(skb);
 
+	if ((skb = xchg(&np->rxpmtu, NULL)) != NULL)
+		kfree_skb(skb);
+
 	/* Free flowlabels */
 	fl6_free_socklist(sk);
 
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index f5076d3..5959230 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -278,6 +278,45 @@ void ipv6_local_error(struct sock *sk, int err, struct flowi *fl, u32 info)
 		kfree_skb(skb);
 }
 
+void ipv6_local_rxpmtu(struct sock *sk, struct flowi *fl, u32 mtu)
+{
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct ipv6hdr *iph;
+	struct sk_buff *skb;
+	struct ip6_mtuinfo *mtu_info;
+
+	if (!np->rxopt.bits.rxpmtu)
+		return;
+
+	skb = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
+	if (!skb)
+		return;
+
+	skb_put(skb, sizeof(struct ipv6hdr));
+	skb_reset_network_header(skb);
+	iph = ipv6_hdr(skb);
+	ipv6_addr_copy(&iph->daddr, &fl->fl6_dst);
+
+	mtu_info = IP6CBMTU(skb);
+	if (!mtu_info) {
+		kfree_skb(skb);
+		return;
+	}
+
+	mtu_info->ip6m_mtu = mtu;
+	mtu_info->ip6m_addr.sin6_family = AF_INET6;
+	mtu_info->ip6m_addr.sin6_port = 0;
+	mtu_info->ip6m_addr.sin6_flowinfo = 0;
+	mtu_info->ip6m_addr.sin6_scope_id = fl->oif;
+	ipv6_addr_copy(&mtu_info->ip6m_addr.sin6_addr, &ipv6_hdr(skb)->daddr);
+
+	__skb_pull(skb, skb_tail_pointer(skb) - skb->data);
+	skb_reset_transport_header(skb);
+
+	skb = xchg(&np->rxpmtu, skb);
+	kfree_skb(skb);
+}
+
 /*
  *	Handle MSG_ERRQUEUE
  */
@@ -381,6 +420,54 @@ out:
 	return err;
 }
 
+/*
+ *	Handle IPV6_RECVPATHMTU
+ */
+int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len)
+{
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct sk_buff *skb;
+	struct sockaddr_in6 *sin;
+	struct ip6_mtuinfo mtu_info;
+	int err;
+	int copied;
+
+	err = -EAGAIN;
+	skb = xchg(&np->rxpmtu, NULL);
+	if (skb == NULL)
+		goto out;
+
+	copied = skb->len;
+	if (copied > len) {
+		msg->msg_flags |= MSG_TRUNC;
+		copied = len;
+	}
+	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+	if (err)
+		goto out_free_skb;
+
+	sock_recv_timestamp(msg, sk, skb);
+
+	memcpy(&mtu_info, IP6CBMTU(skb), sizeof(mtu_info));
+
+	sin = (struct sockaddr_in6 *)msg->msg_name;
+	if (sin) {
+		sin->sin6_family = AF_INET6;
+		sin->sin6_flowinfo = 0;
+		sin->sin6_port = 0;
+		sin->sin6_scope_id = mtu_info.ip6m_addr.sin6_scope_id;
+		ipv6_addr_copy(&sin->sin6_addr, &mtu_info.ip6m_addr.sin6_addr);
+	}
+
+	put_cmsg(msg, SOL_IPV6, IPV6_PATHMTU, sizeof(mtu_info), &mtu_info);
+
+	err = copied;
+
+out_free_skb:
+	kfree_skb(skb);
+out:
+	return err;
+}
 
 
 int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 54d43dd..61e2bef 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1219,15 +1219,23 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
 	 */
 
 	inet->cork.length += length;
-	if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
-	    (rt->u.dst.dev->features & NETIF_F_UFO)) {
+	if (length > mtu) {
+		int proto = sk->sk_protocol;
+		if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){
+			ipv6_local_rxpmtu(sk, fl, mtu-exthdrlen);
+			return -EMSGSIZE;
+		}
 
-		err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len,
-					  fragheaderlen, transhdrlen, mtu,
-					  flags);
-		if (err)
-			goto error;
-		return 0;
+		if (proto == IPPROTO_UDP &&
+		    (rt->u.dst.dev->features & NETIF_F_UFO)) {
+
+			err = ip6_ufo_append_data(sk, getfrag, from, length,
+						  hh_len, fragheaderlen,
+						  transhdrlen, mtu, flags);
+			if (err)
+				goto error;
+			return 0;
+		}
 	}
 
 	if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 44a84ea..8562738 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -461,6 +461,9 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk,
 	if (flags & MSG_ERRQUEUE)
 		return ipv6_recv_error(sk, msg, len);
 
+	if (np->rxpmtu && np->rxopt.bits.rxpmtu)
+		return ipv6_recv_rxpmtu(sk, msg, len);
+
 	skb = skb_recv_datagram(sk, flags, noblock, &err);
 	if (!skb)
 		goto out;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 39e3665..2850e35 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -335,6 +335,9 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk,
 	if (flags & MSG_ERRQUEUE)
 		return ipv6_recv_error(sk, msg, len);
 
+	if (np->rxpmtu && np->rxopt.bits.rxpmtu)
+		return ipv6_recv_rxpmtu(sk, msg, len);
+
 try_again:
 	skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
 				  &peeked, &err);
-- 
1.5.4.3


^ permalink raw reply related

* [PATCH 2/3] IPv6: Add dontfrag argument to relevant functions
From: Brian Haley @ 2010-04-23 21:26 UTC (permalink / raw)
  To: davem, yoshfuji; +Cc: netdev
In-Reply-To: <1272057969-6526-2-git-send-email-brian.haley@hp.com>

Add dontfrag argument to relevant functions for
IPV6_DONTFRAG support, as well as allowing the value
to be passed-in via ancillary cmsg data.

Signed-off-by: Brian Haley <brian.haley@hp.com>
---
 include/net/ipv6.h       |    3 ++-
 include/net/transp_v6.h  |    3 ++-
 net/ipv6/datagram.c      |   21 ++++++++++++++++++++-
 net/ipv6/icmp.c          |    5 +++--
 net/ipv6/ip6_flowlabel.c |    3 ++-
 net/ipv6/ip6_output.c    |    2 +-
 net/ipv6/ipv6_sockglue.c |    3 ++-
 net/ipv6/raw.c           |    9 +++++++--
 net/ipv6/udp.c           |    9 +++++++--
 9 files changed, 46 insertions(+), 12 deletions(-)

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index b1d8db9..7ab6323 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -503,7 +503,8 @@ extern int			ip6_append_data(struct sock *sk,
 						struct ipv6_txoptions *opt,
 						struct flowi *fl,
 						struct rt6_info *rt,
-						unsigned int flags);
+						unsigned int flags,
+						int dontfrag);
 
 extern int			ip6_push_pending_frames(struct sock *sk);
 
diff --git a/include/net/transp_v6.h b/include/net/transp_v6.h
index d65381c..42a0eb6 100644
--- a/include/net/transp_v6.h
+++ b/include/net/transp_v6.h
@@ -44,7 +44,8 @@ extern int			datagram_send_ctl(struct net *net,
 						  struct msghdr *msg,
 						  struct flowi *fl,
 						  struct ipv6_txoptions *opt,
-						  int *hlimit, int *tclass);
+						  int *hlimit, int *tclass,
+						  int *dontfrag);
 
 #define		LOOPBACK4_IPV6		cpu_to_be32(0x7f000006)
 
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 622dc79..f5076d3 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -497,7 +497,7 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
 int datagram_send_ctl(struct net *net,
 		      struct msghdr *msg, struct flowi *fl,
 		      struct ipv6_txoptions *opt,
-		      int *hlimit, int *tclass)
+		      int *hlimit, int *tclass, int *dontfrag)
 {
 	struct in6_pktinfo *src_info;
 	struct cmsghdr *cmsg;
@@ -737,6 +737,25 @@ int datagram_send_ctl(struct net *net,
 
 			break;
 		    }
+
+		case IPV6_DONTFRAG:
+		    {
+			int df;
+
+			err = -EINVAL;
+			if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) {
+				goto exit_f;
+			}
+
+			df = *(int *)CMSG_DATA(cmsg);
+			if (df < 0 || df > 1)
+				goto exit_f;
+
+			err = 0;
+			*dontfrag = df;
+
+			break;
+		    }
 		default:
 			LIMIT_NETDEBUG(KERN_DEBUG "invalid cmsg type: %d\n",
 				       cmsg->cmsg_type);
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 12d2fa4..ce79929 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -481,7 +481,7 @@ route_done:
 			      len + sizeof(struct icmp6hdr),
 			      sizeof(struct icmp6hdr), hlimit,
 			      np->tclass, NULL, &fl, (struct rt6_info*)dst,
-			      MSG_DONTWAIT);
+			      MSG_DONTWAIT, np->dontfrag);
 	if (err) {
 		ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
 		ip6_flush_pending_frames(sk);
@@ -561,7 +561,8 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
 
 	err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr),
 				sizeof(struct icmp6hdr), hlimit, np->tclass, NULL, &fl,
-				(struct rt6_info*)dst, MSG_DONTWAIT);
+				(struct rt6_info*)dst, MSG_DONTWAIT,
+				np->dontfrag);
 
 	if (err) {
 		ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index 14e2321..1365468 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -360,7 +360,8 @@ fl_create(struct net *net, struct in6_flowlabel_req *freq, char __user *optval,
 		msg.msg_control = (void*)(fl->opt+1);
 		flowi.oif = 0;
 
-		err = datagram_send_ctl(net, &msg, &flowi, fl->opt, &junk, &junk);
+		err = datagram_send_ctl(net, &msg, &flowi, fl->opt, &junk,
+					&junk, &junk);
 		if (err)
 			goto done;
 		err = -EINVAL;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 263d4cf..54d43dd 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1105,7 +1105,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
 	int offset, int len, int odd, struct sk_buff *skb),
 	void *from, int length, int transhdrlen,
 	int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl,
-	struct rt6_info *rt, unsigned int flags)
+	struct rt6_info *rt, unsigned int flags, int dontfrag)
 {
 	struct inet_sock *inet = inet_sk(sk);
 	struct ipv6_pinfo *np = inet6_sk(sk);
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 2bf9eda..bd43f01 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -458,7 +458,8 @@ sticky_done:
 		msg.msg_controllen = optlen;
 		msg.msg_control = (void*)(opt+1);
 
-		retv = datagram_send_ctl(net, &msg, &fl, opt, &junk, &junk);
+		retv = datagram_send_ctl(net, &msg, &fl, opt, &junk, &junk,
+					 &junk);
 		if (retv)
 			goto done;
 update:
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 8763b1a..44a84ea 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -733,6 +733,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 	int addr_len = msg->msg_namelen;
 	int hlimit = -1;
 	int tclass = -1;
+	int dontfrag = -1;
 	u16 proto;
 	int err;
 
@@ -811,7 +812,8 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 		memset(opt, 0, sizeof(struct ipv6_txoptions));
 		opt->tot_len = sizeof(struct ipv6_txoptions);
 
-		err = datagram_send_ctl(sock_net(sk), msg, &fl, opt, &hlimit, &tclass);
+		err = datagram_send_ctl(sock_net(sk), msg, &fl, opt, &hlimit,
+					&tclass, &dontfrag);
 		if (err < 0) {
 			fl6_sock_release(flowlabel);
 			return err;
@@ -880,6 +882,9 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 	if (tclass < 0)
 		tclass = np->tclass;
 
+	if (dontfrag < 0)
+		dontfrag = np->dontfrag;
+
 	if (msg->msg_flags&MSG_CONFIRM)
 		goto do_confirm;
 
@@ -890,7 +895,7 @@ back_from_confirm:
 		lock_sock(sk);
 		err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov,
 			len, 0, hlimit, tclass, opt, &fl, (struct rt6_info*)dst,
-			msg->msg_flags);
+			msg->msg_flags, dontfrag);
 
 		if (err)
 			ip6_flush_pending_frames(sk);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 92bf903..39e3665 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -919,6 +919,7 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 	int ulen = len;
 	int hlimit = -1;
 	int tclass = -1;
+	int dontfrag = -1;
 	int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
 	int err;
 	int connected = 0;
@@ -1049,7 +1050,8 @@ do_udp_sendmsg:
 		memset(opt, 0, sizeof(struct ipv6_txoptions));
 		opt->tot_len = sizeof(*opt);
 
-		err = datagram_send_ctl(sock_net(sk), msg, &fl, opt, &hlimit, &tclass);
+		err = datagram_send_ctl(sock_net(sk), msg, &fl, opt, &hlimit,
+					&tclass, &dontfrag);
 		if (err < 0) {
 			fl6_sock_release(flowlabel);
 			return err;
@@ -1120,6 +1122,9 @@ do_udp_sendmsg:
 	if (tclass < 0)
 		tclass = np->tclass;
 
+	if (dontfrag < 0)
+		dontfrag = np->dontfrag;
+
 	if (msg->msg_flags&MSG_CONFIRM)
 		goto do_confirm;
 back_from_confirm:
@@ -1143,7 +1148,7 @@ do_append_data:
 	err = ip6_append_data(sk, getfrag, msg->msg_iov, ulen,
 		sizeof(struct udphdr), hlimit, tclass, opt, &fl,
 		(struct rt6_info*)dst,
-		corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
+		corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags, dontfrag);
 	if (err)
 		udp_v6_flush_pending_frames(sk);
 	else if (!corkreq)
-- 
1.5.4.3


^ permalink raw reply related

* [PATCH 1/3] IPv6: data structure changes for new socket options
From: Brian Haley @ 2010-04-23 21:26 UTC (permalink / raw)
  To: davem, yoshfuji; +Cc: netdev
In-Reply-To: <1272057969-6526-1-git-send-email-brian.haley@hp.com>

Add underlying data structure changes and basic setsockopt()
and getsockopt() support for IPV6_RECVPATHMTU, IPV6_PATHMTU,
and IPV6_DONTFRAG.  IPV6_PATHMTU is actually fully functional
at this point.

Signed-off-by: Brian Haley <brian.haley@hp.com>
---
 include/linux/in6.h      |    2 +-
 include/linux/ipv6.h     |   13 ++++++++++---
 net/ipv6/ipv6_sockglue.c |   46 ++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 57 insertions(+), 4 deletions(-)

diff --git a/include/linux/in6.h b/include/linux/in6.h
index 9b90cb2..c4bf46f 100644
--- a/include/linux/in6.h
+++ b/include/linux/in6.h
@@ -221,10 +221,10 @@ struct in6_flowlabel_req {
 #define IPV6_RTHDR		57
 #define IPV6_RECVDSTOPTS	58
 #define IPV6_DSTOPTS		59
-#if 0	/* not yet */
 #define IPV6_RECVPATHMTU	60
 #define IPV6_PATHMTU		61
 #define IPV6_DONTFRAG		62
+#if 0	/* not yet */
 #define IPV6_USE_MIN_MTU	63
 #endif
 
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 1bdbebf..1976942 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -21,6 +21,10 @@ struct in6_pktinfo {
 	int		ipi6_ifindex;
 };
 
+struct ip6_mtuinfo {
+	struct sockaddr_in6	ip6m_addr;
+	__u32			ip6m_mtu;
+};
 
 struct in6_ifreq {
 	struct in6_addr	ifr6_addr;
@@ -334,22 +338,25 @@ struct ipv6_pinfo {
 				dstopts:1,
 				odstopts:1,
                                 rxflow:1,
-				rxtclass:1;
+				rxtclass:1,
+				rxpmtu:1;
 		} bits;
 		__u16		all;
 	} rxopt;
 
 	/* sockopt flags */
-	__u8			recverr:1,
+	__u16			recverr:1,
 	                        sndflow:1,
 				pmtudisc:2,
 				ipv6only:1,
-				srcprefs:3;	/* 001: prefer temporary address
+				srcprefs:3,	/* 001: prefer temporary address
 						 * 010: prefer public address
 						 * 100: prefer care-of address
 						 */
+				dontfrag:1;
 	__u8			min_hopcount;
 	__u8			tclass;
+	__u8			padding;
 
 	__u32			dst_cookie;
 
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 92295ad..2bf9eda 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -337,6 +337,13 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
 		retv = 0;
 		break;
 
+	case IPV6_RECVPATHMTU:
+		if (optlen < sizeof(int))
+			goto e_inval;
+		np->rxopt.bits.rxpmtu = valbool;
+		retv = 0;
+		break;
+
 	case IPV6_HOPOPTS:
 	case IPV6_RTHDRDSTOPTS:
 	case IPV6_RTHDR:
@@ -773,6 +780,9 @@ pref_skip_coa:
 		if (val < 0 || val > 255)
 			goto e_inval;
 		np->min_hopcount = val;
+		break;
+	case IPV6_DONTFRAG:
+		np->dontfrag = valbool;
 		retv = 0;
 		break;
 	}
@@ -1063,6 +1073,38 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
 		val = np->rxopt.bits.rxflow;
 		break;
 
+	case IPV6_RECVPATHMTU:
+		val = np->rxopt.bits.rxpmtu;
+		break;
+
+	case IPV6_PATHMTU:
+	{
+		struct dst_entry *dst;
+		struct ip6_mtuinfo mtuinfo;
+
+		if (len < sizeof(mtuinfo))
+			return -EINVAL;
+
+		len = sizeof(mtuinfo);
+		memset(&mtuinfo, 0, sizeof(mtuinfo));
+
+		rcu_read_lock();
+		dst = __sk_dst_get(sk);
+		if (dst)
+			mtuinfo.ip6m_mtu = dst_mtu(dst);
+		rcu_read_unlock();
+		if (!mtuinfo.ip6m_mtu)
+			return -ENOTCONN;
+
+		if (put_user(len, optlen))
+			return -EFAULT;
+		if (copy_to_user(optval, &mtuinfo, len))
+			return -EFAULT;
+
+		return 0;
+		break;
+	}
+
 	case IPV6_UNICAST_HOPS:
 	case IPV6_MULTICAST_HOPS:
 	{
@@ -1128,6 +1170,10 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
 		val = np->min_hopcount;
 		break;
 
+	case IPV6_DONTFRAG:
+		val = np->dontfrag;
+		break;
+
 	default:
 		return -ENOPROTOOPT;
 	}
-- 
1.5.4.3


^ permalink raw reply related

* [PATCH 0/3] IPv6: Add IPV6_RECVPATHMTU, IPV6_PATHMTU and IPV6_DONTFRAG support
From: Brian Haley @ 2010-04-23 21:26 UTC (permalink / raw)
  To: davem, yoshfuji; +Cc: netdev

This series adds support for IPV6_RECVPATHMTU, IPV6_PATHMTU, and
IPV6_DONTFRAG socket options as defined in RFC 3542.

 include/linux/in6.h      |    2 +-
 include/linux/ipv6.h     |   15 +++++-
 include/net/ipv6.h       |    5 ++-
 include/net/transp_v6.h  |    3 +-
 net/ipv6/af_inet6.c      |    3 +
 net/ipv6/datagram.c      |  108 +++++++++++++++++++++++++++++++++++++++++++++-
 net/ipv6/icmp.c          |    5 +-
 net/ipv6/ip6_flowlabel.c |    3 +-
 net/ipv6/ip6_output.c    |   26 +++++++----
 net/ipv6/ipv6_sockglue.c |   49 ++++++++++++++++++++-
 net/ipv6/raw.c           |   12 ++++-
 net/ipv6/udp.c           |   12 ++++-
 12 files changed, 219 insertions(+), 24 deletions(-)

^ permalink raw reply

* Re: eSwitch management
From: Anirban Chakraborty @ 2010-04-23 21:08 UTC (permalink / raw)
  To: Chris Wright
  Cc: Scott Feldman, David Miller, netdev@vger.kernel.org,
	Arnd Bergmann, Ameen Rahman, Amit Salecha, Rajesh Borundia,
	shemminger@vyatta.com
In-Reply-To: <20100423194455.GA3843@x200.localdomain>


On Apr 23, 2010, at 12:44 PM, Chris Wright wrote:

> * Anirban Chakraborty (anirban.chakraborty@qlogic.com) wrote:
>> On Apr 23, 2010, at 9:23 AM, Chris Wright wrote:
>>> * Anirban Chakraborty (anirban.chakraborty@qlogic.com) wrote:
>>>> It looks like ifla_vf_info does contain most of the data set. But if I use it, what NETLINK protocol family should I use in my driver to receive netlink messages? Do I need to create a private protocol family?
>>> 
>>> No, you don't need to use netlink in your driver.  You just need to fill
>>> in the relevant net_device_ops in your driver init.  Specifically:
>>> 
>>> *      SR-IOV management functions.
>>> * int (*ndo_set_vf_mac)(struct net_device *dev, int vf, u8* mac);
>>> * int (*ndo_set_vf_vlan)(struct net_device *dev, int vf, u16 vlan, u8 qos);
>>> * int (*ndo_set_vf_tx_rate)(struct net_device *dev, int vf, int rate);
>>> * int (*ndo_get_vf_config)(struct net_device *dev,
>>> *                          int vf, struct ifla_vf_info *ivf);
>>> 
>>> These are all operating on a VF indexed internally w/in the driver, so it's
>>> a little cumbersome to use from userspace.
>> 
>> These are all intended for VFs and are configureable from PF.
> 
> Yes, and while the set of callbacks can change, they are always tied to
> some net_device (typically the PF) that knows how to make hardware
> settings on behalf of a VF.
> 
>> However, in our case, there are multiple physical NIC function on a
>> port which are configureable by the eswitch.
> 
> Is there a PCI function that represents the switch?  Or a special PCI
> NIC function that has VEB mgmt plane access?  And do you have examples
> of configuration that you'll do here?
There is no PCI function that represents the switch. However, one of the NIC functions can act as a privileged function to configure the eswitch. Typically the first NIC function that is enumerated in the bus manages the eswitch. Typical configurations would be to set tx bandwidth, VLAN ID, MAC address, promiscuous mode setting for each of these ports at the start of the day. This is useful in virtualization scenario where we can do PCI passthru of the functions to the guest and these settings for the guest are configured via the driver in the host.

<snip>
> 
> One idea that has been discussed in the past is to create essentially
> a pluggable set of bridge_ops.  The first step would be purely internal
> shuffling, to make the existing sw bridge code go through the bridge_ops.
> The second step would be making your driver for whichever PCI function
> you have that supports managing the bridge create a net_device which is
> a bridge during driver init.  And now normal brctl can call into your
> VEB via the bridge_ops callbacks. </handwave>
> 
I liked the idea of iovnl as it works by utilizing port profile. That way the eswitch can be configured with the same port profile that a vswitch in a hypervisor has.

thanks,
Anirban





^ permalink raw reply

* Re: [PATCH] e100: expose broadcast_disabled as a module option
From: Erwan Velu @ 2010-04-23 21:03 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: Jeff Kirsher, netdev, David Miller, linux-kernel,
	jesse.brandeburg, bruce.w.allan, alexander.h.duyck,
	peter.p.waskiewicz.jr, john.ronciak
In-Reply-To: <20100423135816.23f5861f@nehalam>

I first tried "ifconfig -broadcast" without any success, so I forced
the driver to unset IFF_BROADCAST, the interface didn't showed anymore
the BROADCAST option with ifconfig. But I didn't noticed any reduction
in the amount of context/switches on my host.

I found the broadcast_disabled far more efficient when considering the
cpu impact.


2010/4/23 Stephen Hemminger <shemminger@vyatta.com>:
> On Fri, 23 Apr 2010 13:22:22 -0700
> Jeff Kirsher <jeffrey.t.kirsher@intel.com> wrote:
>
>> On Fri, Apr 23, 2010 at 13:14, Erwan Velu <erwanaliasr1@gmail.com> wrote:
>> > Hi folks,
>> >
>> > I've been facing a very noisy network where hundreds broadcast packets
>> > were generated every second.
>> > When this traffic can't be controlled at the source, there is a side
>> > effect on some systems.
>> > I was having some idle systems that will never be targeted by this
>> > broadcast traffic that got loaded just by receiving that "flood".
>> > I mean by loaded that this light hardware was generating 300
>> > context/switches per second.
>> >
>> > I was looking for many options to avoid this traffic to disturb this
>> > hosts and I discovered that the e100 driver was featuring a
>> > "broadcast_disabled" configure option.
>> > I realize that this option is not controllable, so I wrote this simple
>> > patch that expose this option as a module option.
>> > This allow me to tell this hosts not to listen anymore this traffic.
>> >
>> > The result is clearly good as my systems are now running at 21
>> > context/switches while being idle.
>> > Hope this patch isn't too bad and could help others that faces the same problem.
>> >
>> > Patch can be downloaded here :
>> > http://konilope.linuxeries.org/e100_broadcast_disabled.patch
>> >
>> > Even if gmail is eating the inlined, patch, at least that make it
>> > easier to read it for humans.
>> > If the patch is acked, the downloaded one will be more clean ;)
>> >
>> > This patch was generated on top of the latest 2.6 torvald's git.
>> > Cheers,
>> > Erwan
>> >
>> > Signed-off-by: Erwan Velu <erwanaliasr1@gmail.com>
>> >
>> > diff --git a/drivers/net/e100.c b/drivers/net/e100.c
>> > index b997e57..2ba582f 100644
>> > --- a/drivers/net/e100.c
>> > +++ b/drivers/net/e100.c
>> > @@ -194,12 +194,15 @@ MODULE_FIRMWARE(FIRMWARE_D102E);
>> >  static int debug = 3;
>> >  static int eeprom_bad_csum_allow = 0;
>> >  static int use_io = 0;
>> > +static int broadcast_disabled = 0;
>> >  module_param(debug, int, 0);
>> >  module_param(eeprom_bad_csum_allow, int, 0);
>> >  module_param(use_io, int, 0);
>> > +module_param(broadcast_disabled, int, 0);
>> >  MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
>> >  MODULE_PARM_DESC(eeprom_bad_csum_allow, "Allow bad eeprom checksums");
>> >  MODULE_PARM_DESC(use_io, "Force use of i/o access mode");
>> > +MODULE_PARM_DESC(broadcast_disabled, "Filter broadcast packets
>> > (0=disabled (default), 1=enabled)");
>> >  #define DPRINTK(nlevel, klevel, fmt, args...) \
>> >        (void)((NETIF_MSG_##nlevel & nic->msg_enable) && \
>> >        printk(KERN_##klevel PFX "%s: %s: " fmt, nic->netdev->name, \
>> > @@ -1131,6 +1134,8 @@ static void e100_configure(struct nic *nic,
>> > struct cb *cb, struct sk_buff *skb)
>> >                config->promiscuous_mode = 0x1;         /* 1=on, 0=off */
>> >        }
>> >
>> > +       config->broadcast_disabled = broadcast_disabled; /* Broadcast filtering */
>> > +
>> >        if (nic->flags & multicast_all)
>> >                config->multicast_all = 0x1;            /* 1=accept, 0=no */
>> > --
>>
>> Adding Netdev...
>>
>
> What is wrong with using existing IFF_BROADCAST flag?
>
>
> --
>

^ permalink raw reply

* Re: [PATCH] e100: expose broadcast_disabled as a module option
From: Stephen Hemminger @ 2010-04-23 20:58 UTC (permalink / raw)
  To: Jeff Kirsher
  Cc: Erwan Velu, netdev, David Miller, linux-kernel, jesse.brandeburg,
	bruce.w.allan, alexander.h.duyck, peter.p.waskiewicz.jr,
	john.ronciak
In-Reply-To: <h2p9929d2391004231322r23528f32z8447a711a29e28ea@mail.gmail.com>

On Fri, 23 Apr 2010 13:22:22 -0700
Jeff Kirsher <jeffrey.t.kirsher@intel.com> wrote:

> On Fri, Apr 23, 2010 at 13:14, Erwan Velu <erwanaliasr1@gmail.com> wrote:
> > Hi folks,
> >
> > I've been facing a very noisy network where hundreds broadcast packets
> > were generated every second.
> > When this traffic can't be controlled at the source, there is a side
> > effect on some systems.
> > I was having some idle systems that will never be targeted by this
> > broadcast traffic that got loaded just by receiving that "flood".
> > I mean by loaded that this light hardware was generating 300
> > context/switches per second.
> >
> > I was looking for many options to avoid this traffic to disturb this
> > hosts and I discovered that the e100 driver was featuring a
> > "broadcast_disabled" configure option.
> > I realize that this option is not controllable, so I wrote this simple
> > patch that expose this option as a module option.
> > This allow me to tell this hosts not to listen anymore this traffic.
> >
> > The result is clearly good as my systems are now running at 21
> > context/switches while being idle.
> > Hope this patch isn't too bad and could help others that faces the same problem.
> >
> > Patch can be downloaded here :
> > http://konilope.linuxeries.org/e100_broadcast_disabled.patch
> >
> > Even if gmail is eating the inlined, patch, at least that make it
> > easier to read it for humans.
> > If the patch is acked, the downloaded one will be more clean ;)
> >
> > This patch was generated on top of the latest 2.6 torvald's git.
> > Cheers,
> > Erwan
> >
> > Signed-off-by: Erwan Velu <erwanaliasr1@gmail.com>
> >
> > diff --git a/drivers/net/e100.c b/drivers/net/e100.c
> > index b997e57..2ba582f 100644
> > --- a/drivers/net/e100.c
> > +++ b/drivers/net/e100.c
> > @@ -194,12 +194,15 @@ MODULE_FIRMWARE(FIRMWARE_D102E);
> >  static int debug = 3;
> >  static int eeprom_bad_csum_allow = 0;
> >  static int use_io = 0;
> > +static int broadcast_disabled = 0;
> >  module_param(debug, int, 0);
> >  module_param(eeprom_bad_csum_allow, int, 0);
> >  module_param(use_io, int, 0);
> > +module_param(broadcast_disabled, int, 0);
> >  MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
> >  MODULE_PARM_DESC(eeprom_bad_csum_allow, "Allow bad eeprom checksums");
> >  MODULE_PARM_DESC(use_io, "Force use of i/o access mode");
> > +MODULE_PARM_DESC(broadcast_disabled, "Filter broadcast packets
> > (0=disabled (default), 1=enabled)");
> >  #define DPRINTK(nlevel, klevel, fmt, args...) \
> >        (void)((NETIF_MSG_##nlevel & nic->msg_enable) && \
> >        printk(KERN_##klevel PFX "%s: %s: " fmt, nic->netdev->name, \
> > @@ -1131,6 +1134,8 @@ static void e100_configure(struct nic *nic,
> > struct cb *cb, struct sk_buff *skb)
> >                config->promiscuous_mode = 0x1;         /* 1=on, 0=off */
> >        }
> >
> > +       config->broadcast_disabled = broadcast_disabled; /* Broadcast filtering */
> > +
> >        if (nic->flags & multicast_all)
> >                config->multicast_all = 0x1;            /* 1=accept, 0=no */
> > --
> 
> Adding Netdev...
> 

What is wrong with using existing IFF_BROADCAST flag?


-- 

^ permalink raw reply

* Re: DDoS attack causing bad effect on conntrack searches
From: Eric Dumazet @ 2010-04-23 20:57 UTC (permalink / raw)
  To: Jesper Dangaard Brouer
  Cc: paulmck, Patrick McHardy, Changli Gao, hawk,
	Linux Kernel Network Hackers, Netfilter Developers
In-Reply-To: <Pine.LNX.4.64.1004222213290.10919@ask.diku.dk>

Le jeudi 22 avril 2010 à 22:38 +0200, Jesper Dangaard Brouer a écrit :

> 
> I think its plausable, there is a lot of modification going on.
> Approx 40.000 deletes/sec and 40.000 inserts/sec.
> The hash bucket size is 300032, and with 80000 modifications/sec, we are 
> (potentially) changing 26.6% of the hash chains each second.
> 
> As can be seen from the graphs:
>   http://people.netfilter.org/hawk/DDoS/2010-04-12__001/list.html
> 
> Notice that primarily CPU2 is doing the 40k deletes/sec, while CPU1 is 
> caught searching...
> 
> 
> > maybe hash table has one slot :)
> 
> Guess I have to reproduce the DoS attack in a testlab (I will first have 
> time Tuesday).  So we can determine if its bad hashing or restart of the 
> search loop.
> 
> 
> The traffic pattern was fairly simple:
> 
> 200 bytes UDP packets, comming from approx 60 source IPs, going to one 
> destination IP.  The UDP destination port number was varied in the range 
> of 1 to 6000.   The source UDP port was varied a bit more, some ranging 
> from 32768 to 61000, and some from 1028 to 5000.
> 
> 

Re-reading this, I am not sure there is a real problem on RCU as you
pointed out.

With 800.000 entries, in a 300.032 buckets hash table, each lookup hit
about 3 entries (aka searches in conntrack stats)

300.000 packets/second -> 900.000 'searches' per second.

If you have four cpus all trying to insert/delete entries in //, they
all hit the central conntrack lock.

On a DDOS scenario, every packet needs to take this lock twice,
once to free an old conntrack (early drop), once to insert a new entry.

To scale this, only way would be to have an array of locks, like we have
for TCP/UDP hash tables.

I did some tests here, with a multiqueue card, flooded with 300.000
pack/second, 65.536 source IP, millions of flows, and nothing wrong
happened (but packets drops, of course)

My two cpus were busy 100%, after tweaking smp_affinities, because on
first try, irqbalance put "01" mask on both queues, so only one ksoftirq
was working, other cpu was idle :(




^ permalink raw reply

* Re: [PATCH net-next-2.6] rps: consistent rxhash
From: David Miller @ 2010-04-23 20:44 UTC (permalink / raw)
  To: therbert; +Cc: eric.dumazet, franco, xiaosuo, netdev
In-Reply-To: <g2m65634d661004211212t13714cccyd27936c520515684@mail.gmail.com>

From: Tom Herbert <therbert@google.com>
Date: Wed, 21 Apr 2010 12:12:41 -0700

> On Tue, Apr 20, 2010 at 2:41 PM, David Miller <davem@davemloft.net> wrote:
>> Eric, do you remember that "TCP friends" rough patch I sent you last
>> year that essentailly made TCP sockets over loopback behave like
>> AF_UNIX ones and just queue the SKBs directly to the destination
>> socket without doing any protocol work?
>>
> 
> This is sounds very interesting!  Could you post a patch? :-)

I'll see if I can find it, I sent it to Eric more than a year
ago...

The basic scheme was pretty simple:

1) Add "struct sock *friend" to struct sk_buff

2) TCP initial handshake SYN and SYN+ACK transmits set "skb->friend =
   sk" and TCP receive path notices this and stores this 'friend'
   socket pointer locally in the newly created connection socket.

   The purpose of skb->friend is to let the receiving socket on
   loopback see that the other end is on the local system and
   can be directly communicated to.

3) TCP sendmsg queues data directly to sk->friend's receive queue
   instead sending TCP protocol packets.

The only complications come from making sendmsg and recvmsg not
try to do all of the sequence handling and checking, stuff like
that.  Also, URG would need to be dealt with somehow too.

I'm sure someone suitably motivated could get a working patch
going in no time :-)

^ permalink raw reply

* Re: [PATCH] NIU support for skb->rxhash
From: David Miller @ 2010-04-23 20:28 UTC (permalink / raw)
  To: therbert; +Cc: eric.dumazet, netdev
In-Reply-To: <h2z65634d661004230832o4f7a6d35ub207bc301ee8925c@mail.gmail.com>

From: Tom Herbert <therbert@google.com>
Date: Fri, 23 Apr 2010 08:32:02 -0700

>> I looked into implementing this and it doesn't work.  The
>> problem is GRO want's to look into the packet very early
>> and we want to batch GRO a set of packets into a big packet
>> before shooting them over to a remote cpu.
>>
> 
> Can you reconsider? :-)  The majority of our servers see packet loads
> which don't allow for much batching (a lot of small RPC messages), so
> for those GRO is mostly unnecessary overhead and mechanisms that
> improve unbatched packet performance are compelling.  Also, if a
> device already does LRO, I don't see that GRO could add a lot of value
> anyway.

LRO is extremely discouraged, because it has to be disabled
when any form of forwarding or bridging is enabled.  LRO is
done such that the input packet stream cannot be reconstituted
on transmit.

GRO on the other hand, allows proper reconstitution of the input
packet stream so it can be enabled unconditionally.

We are encouraging hardware manufacturers to tweak their receive
batching offload such that it matches the rules imposed by GRO
which allow proper reconsitution on transmit.

The fact is the code patch is there and it is going to be enabled all
the time, so we have to cope with it.


^ permalink raw reply

* Re: [PATCH] e100: expose broadcast_disabled as a module option
From: Jeff Kirsher @ 2010-04-23 20:22 UTC (permalink / raw)
  To: Erwan Velu, netdev, David Miller
  Cc: linux-kernel, jesse.brandeburg, bruce.w.allan, alexander.h.duyck,
	peter.p.waskiewicz.jr, john.ronciak
In-Reply-To: <l2nb43bf5491004231314i13503c67yeccfc54bc1cae850@mail.gmail.com>

On Fri, Apr 23, 2010 at 13:14, Erwan Velu <erwanaliasr1@gmail.com> wrote:
> Hi folks,
>
> I've been facing a very noisy network where hundreds broadcast packets
> were generated every second.
> When this traffic can't be controlled at the source, there is a side
> effect on some systems.
> I was having some idle systems that will never be targeted by this
> broadcast traffic that got loaded just by receiving that "flood".
> I mean by loaded that this light hardware was generating 300
> context/switches per second.
>
> I was looking for many options to avoid this traffic to disturb this
> hosts and I discovered that the e100 driver was featuring a
> "broadcast_disabled" configure option.
> I realize that this option is not controllable, so I wrote this simple
> patch that expose this option as a module option.
> This allow me to tell this hosts not to listen anymore this traffic.
>
> The result is clearly good as my systems are now running at 21
> context/switches while being idle.
> Hope this patch isn't too bad and could help others that faces the same problem.
>
> Patch can be downloaded here :
> http://konilope.linuxeries.org/e100_broadcast_disabled.patch
>
> Even if gmail is eating the inlined, patch, at least that make it
> easier to read it for humans.
> If the patch is acked, the downloaded one will be more clean ;)
>
> This patch was generated on top of the latest 2.6 torvald's git.
> Cheers,
> Erwan
>
> Signed-off-by: Erwan Velu <erwanaliasr1@gmail.com>
>
> diff --git a/drivers/net/e100.c b/drivers/net/e100.c
> index b997e57..2ba582f 100644
> --- a/drivers/net/e100.c
> +++ b/drivers/net/e100.c
> @@ -194,12 +194,15 @@ MODULE_FIRMWARE(FIRMWARE_D102E);
>  static int debug = 3;
>  static int eeprom_bad_csum_allow = 0;
>  static int use_io = 0;
> +static int broadcast_disabled = 0;
>  module_param(debug, int, 0);
>  module_param(eeprom_bad_csum_allow, int, 0);
>  module_param(use_io, int, 0);
> +module_param(broadcast_disabled, int, 0);
>  MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
>  MODULE_PARM_DESC(eeprom_bad_csum_allow, "Allow bad eeprom checksums");
>  MODULE_PARM_DESC(use_io, "Force use of i/o access mode");
> +MODULE_PARM_DESC(broadcast_disabled, "Filter broadcast packets
> (0=disabled (default), 1=enabled)");
>  #define DPRINTK(nlevel, klevel, fmt, args...) \
>        (void)((NETIF_MSG_##nlevel & nic->msg_enable) && \
>        printk(KERN_##klevel PFX "%s: %s: " fmt, nic->netdev->name, \
> @@ -1131,6 +1134,8 @@ static void e100_configure(struct nic *nic,
> struct cb *cb, struct sk_buff *skb)
>                config->promiscuous_mode = 0x1;         /* 1=on, 0=off */
>        }
>
> +       config->broadcast_disabled = broadcast_disabled; /* Broadcast filtering */
> +
>        if (nic->flags & multicast_all)
>                config->multicast_all = 0x1;            /* 1=accept, 0=no */
> --

Adding Netdev...

-- 
Cheers,
Jeff

^ permalink raw reply

* [PATCHv5] add mergeable receiver buffers support to vhost
From: David L Stevens @ 2010-04-23 20:06 UTC (permalink / raw)
  To: mst, rusty, kvm, virtualization; +Cc: netdev

This patch adds mergeable receive buffers support to vhost.

Signed-off-by: David L Stevens <dlstevens@us.ibm.com>

diff -ruNp net-next-v0/drivers/vhost/net.c net-next-v5/drivers/vhost/net.c
--- net-next-v0/drivers/vhost/net.c	2010-04-22 11:31:57.000000000 -0700
+++ net-next-v5/drivers/vhost/net.c	2010-04-22 12:41:17.000000000 -0700
@@ -109,7 +109,7 @@ static void handle_tx(struct vhost_net *
 	};
 	size_t len, total_len = 0;
 	int err, wmem;
-	size_t hdr_size;
+	size_t vhost_hlen;
 	struct socket *sock = rcu_dereference(vq->private_data);
 	if (!sock)
 		return;
@@ -128,13 +128,13 @@ static void handle_tx(struct vhost_net *
 
 	if (wmem < sock->sk->sk_sndbuf / 2)
 		tx_poll_stop(net);
-	hdr_size = vq->hdr_size;
+	vhost_hlen = vq->vhost_hlen;
 
 	for (;;) {
-		head = vhost_get_vq_desc(&net->dev, vq, vq->iov,
-					 ARRAY_SIZE(vq->iov),
-					 &out, &in,
-					 NULL, NULL);
+		head = vhost_get_desc(&net->dev, vq, vq->iov,
+				      ARRAY_SIZE(vq->iov),
+				      &out, &in,
+				      NULL, NULL);
 		/* Nothing new?  Wait for eventfd to tell us they refilled. */
 		if (head == vq->num) {
 			wmem = atomic_read(&sock->sk->sk_wmem_alloc);
@@ -155,20 +155,20 @@ static void handle_tx(struct vhost_net *
 			break;
 		}
 		/* Skip header. TODO: support TSO. */
-		s = move_iovec_hdr(vq->iov, vq->hdr, hdr_size, out);
+		s = move_iovec_hdr(vq->iov, vq->hdr, vhost_hlen, out);
 		msg.msg_iovlen = out;
 		len = iov_length(vq->iov, out);
 		/* Sanity check */
 		if (!len) {
 			vq_err(vq, "Unexpected header len for TX: "
 			       "%zd expected %zd\n",
-			       iov_length(vq->hdr, s), hdr_size);
+			       iov_length(vq->hdr, s), vhost_hlen);
 			break;
 		}
 		/* TODO: Check specific error and bomb out unless ENOBUFS? */
 		err = sock->ops->sendmsg(NULL, sock, &msg, len);
 		if (unlikely(err < 0)) {
-			vhost_discard_vq_desc(vq);
+			vhost_discard_desc(vq, 1);
 			tx_poll_start(net, sock);
 			break;
 		}
@@ -187,12 +187,25 @@ static void handle_tx(struct vhost_net *
 	unuse_mm(net->dev.mm);
 }
 
+static int vhost_head_len(struct vhost_virtqueue *vq, struct sock *sk)
+{
+	struct sk_buff *head;
+	int len = 0;
+
+	lock_sock(sk);
+	head = skb_peek(&sk->sk_receive_queue);
+	if (head)
+		len = head->len + vq->sock_hlen;
+	release_sock(sk);
+	return len;
+}
+
 /* Expects to be always run from workqueue - which acts as
  * read-size critical section for our kind of RCU. */
 static void handle_rx(struct vhost_net *net)
 {
 	struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_RX];
-	unsigned head, out, in, log, s;
+	unsigned in, log, s;
 	struct vhost_log *vq_log;
 	struct msghdr msg = {
 		.msg_name = NULL,
@@ -203,14 +216,14 @@ static void handle_rx(struct vhost_net *
 		.msg_flags = MSG_DONTWAIT,
 	};
 
-	struct virtio_net_hdr hdr = {
-		.flags = 0,
-		.gso_type = VIRTIO_NET_HDR_GSO_NONE
+	struct virtio_net_hdr_mrg_rxbuf hdr = {
+		.hdr.flags = 0,
+		.hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE
 	};
 
 	size_t len, total_len = 0;
-	int err;
-	size_t hdr_size;
+	int err, headcount, datalen;
+	size_t vhost_hlen;
 	struct socket *sock = rcu_dereference(vq->private_data);
 	if (!sock || skb_queue_empty(&sock->sk->sk_receive_queue))
 		return;
@@ -218,18 +231,18 @@ static void handle_rx(struct vhost_net *
 	use_mm(net->dev.mm);
 	mutex_lock(&vq->mutex);
 	vhost_disable_notify(vq);
-	hdr_size = vq->hdr_size;
+	vhost_hlen = vq->vhost_hlen;
 
 	vq_log = unlikely(vhost_has_feature(&net->dev, VHOST_F_LOG_ALL)) ?
 		vq->log : NULL;
 
-	for (;;) {
-		head = vhost_get_vq_desc(&net->dev, vq, vq->iov,
-					 ARRAY_SIZE(vq->iov),
-					 &out, &in,
-					 vq_log, &log);
+	while ((datalen = vhost_head_len(vq, sock->sk))) {
+		headcount = vhost_get_desc_n(vq, vq->heads, datalen+vhost_hlen,
+					     &in, vq_log, &log);
+		if (headcount < 0)
+			break;
 		/* OK, now we need to know about added descriptors. */
-		if (head == vq->num) {
+		if (!headcount) {
 			if (unlikely(vhost_enable_notify(vq))) {
 				/* They have slipped one in as we were
 				 * doing that: check again. */
@@ -241,46 +254,54 @@ static void handle_rx(struct vhost_net *
 			break;
 		}
 		/* We don't need to be notified again. */
-		if (out) {
-			vq_err(vq, "Unexpected descriptor format for RX: "
-			       "out %d, int %d\n",
-			       out, in);
-			break;
-		}
-		/* Skip header. TODO: support TSO/mergeable rx buffers. */
-		s = move_iovec_hdr(vq->iov, vq->hdr, hdr_size, in);
+		/* Skip header. TODO: support TSO. */
+		s = move_iovec_hdr(vq->iov, vq->hdr, vhost_hlen, in);
 		msg.msg_iovlen = in;
 		len = iov_length(vq->iov, in);
 		/* Sanity check */
 		if (!len) {
 			vq_err(vq, "Unexpected header len for RX: "
 			       "%zd expected %zd\n",
-			       iov_length(vq->hdr, s), hdr_size);
+			       iov_length(vq->hdr, s), vhost_hlen);
 			break;
 		}
 		err = sock->ops->recvmsg(NULL, sock, &msg,
 					 len, MSG_DONTWAIT | MSG_TRUNC);
 		/* TODO: Check specific error and bomb out unless EAGAIN? */
 		if (err < 0) {
-			vhost_discard_vq_desc(vq);
+			vhost_discard_desc(vq, headcount);
 			break;
 		}
-		/* TODO: Should check and handle checksum. */
-		if (err > len) {
-			pr_err("Discarded truncated rx packet: "
-			       " len %d > %zd\n", err, len);
-			vhost_discard_vq_desc(vq);
+		if (err != datalen) {
+			pr_err("Discarded rx packet: "
+			       " len %d, expected %zd\n", err, datalen);
+			vhost_discard_desc(vq, headcount);
 			continue;
 		}
 		len = err;
-		err = memcpy_toiovec(vq->hdr, (unsigned char *)&hdr, hdr_size);
+		err = memcpy_toiovec(vq->hdr, (unsigned char *)&hdr,
+				     vhost_hlen);
 		if (err) {
 			vq_err(vq, "Unable to write vnet_hdr at addr %p: %d\n",
 			       vq->iov->iov_base, err);
 			break;
 		}
-		len += hdr_size;
-		vhost_add_used_and_signal(&net->dev, vq, head, len);
+		/* TODO: Should check and handle checksum. */
+		if (vhost_has_feature(&net->dev, VIRTIO_NET_F_MRG_RXBUF)) {
+			struct virtio_net_hdr_mrg_rxbuf hdr;
+			struct iovec *iov = vhost_hlen ? vq->hdr : vq->iov;
+
+			if (memcpy_toiovecend(iov, (unsigned char *)&headcount,
+				      offsetof(typeof(hdr), num_buffers),
+				      sizeof(hdr.num_buffers))) {
+				vq_err(vq, "Failed num_buffers write");
+				vhost_discard_desc(vq, headcount);
+				break;
+			}
+		}
+		len += vhost_hlen;
+		vhost_add_used_and_signal_n(&net->dev, vq, vq->heads,
+					    headcount);
 		if (unlikely(vq_log))
 			vhost_log_write(vq, vq_log, log, len);
 		total_len += len;
@@ -561,9 +582,24 @@ done:
 
 static int vhost_net_set_features(struct vhost_net *n, u64 features)
 {
-	size_t hdr_size = features & (1 << VHOST_NET_F_VIRTIO_NET_HDR) ?
-		sizeof(struct virtio_net_hdr) : 0;
+	size_t vhost_hlen;
+	size_t sock_hlen;
 	int i;
+
+	if (features & (1 << VHOST_NET_F_VIRTIO_NET_HDR)) {
+		/* vhost provides vnet_hdr */
+		vhost_hlen = sizeof(struct virtio_net_hdr);
+		if (features & (1 << VIRTIO_NET_F_MRG_RXBUF))
+			vhost_hlen = sizeof(struct virtio_net_hdr_mrg_rxbuf);
+		sock_hlen = 0;
+	} else {
+		/* socket provides vnet_hdr */
+		vhost_hlen = 0;
+		if (features & (1 << VIRTIO_NET_F_MRG_RXBUF))
+			sock_hlen = sizeof(struct virtio_net_hdr_mrg_rxbuf);
+		else
+			sock_hlen = sizeof(struct virtio_net_hdr);
+	}
 	mutex_lock(&n->dev.mutex);
 	if ((features & (1 << VHOST_F_LOG_ALL)) &&
 	    !vhost_log_access_ok(&n->dev)) {
@@ -574,7 +610,8 @@ static int vhost_net_set_features(struct
 	smp_wmb();
 	for (i = 0; i < VHOST_NET_VQ_MAX; ++i) {
 		mutex_lock(&n->vqs[i].mutex);
-		n->vqs[i].hdr_size = hdr_size;
+		n->vqs[i].vhost_hlen = vhost_hlen;
+		n->vqs[i].sock_hlen = sock_hlen;
 		mutex_unlock(&n->vqs[i].mutex);
 	}
 	vhost_net_flush(n);
diff -ruNp net-next-v0/drivers/vhost/vhost.c net-next-v5/drivers/vhost/vhost.c
--- net-next-v0/drivers/vhost/vhost.c	2010-04-22 11:31:57.000000000 -0700
+++ net-next-v5/drivers/vhost/vhost.c	2010-04-22 12:19:59.000000000 -0700
@@ -114,7 +114,8 @@ static void vhost_vq_reset(struct vhost_
 	vq->used_flags = 0;
 	vq->log_used = false;
 	vq->log_addr = -1ull;
-	vq->hdr_size = 0;
+	vq->vhost_hlen = 0;
+	vq->sock_hlen = 0;
 	vq->private_data = NULL;
 	vq->log_base = NULL;
 	vq->error_ctx = NULL;
@@ -861,6 +862,53 @@ static unsigned get_indirect(struct vhos
 	return 0;
 }
 
+/* This is a multi-buffer version of vhost_get_vq_desc
+ * @vq		- the relevant virtqueue
+ * datalen	- data length we'll be reading
+ * @iovcount	- returned count of io vectors we fill
+ * @log		- vhost log
+ * @log_num	- log offset
+ *	returns number of buffer heads allocated, negative on error
+ */
+int vhost_get_desc_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads,
+		     int datalen, int *iovcount, struct vhost_log *log,
+		     unsigned int *log_num)
+{
+	int out, in;
+	int seg = 0;		/* iov index */
+	int hc = 0;		/* head count */
+	int rv;
+
+	while (datalen > 0) {
+		if (hc >= VHOST_NET_MAX_SG) {
+			rv = -ENOBUFS;
+			goto err;
+		}
+		heads[hc].id = vhost_get_desc(vq->dev, vq, vq->iov+seg,
+					      ARRAY_SIZE(vq->iov)-seg, &out,
+					      &in, log, log_num);
+		if (heads[hc].id == vq->num) {
+			rv = 0;
+			goto err;
+		}
+		if (out || in <= 0) {
+			vq_err(vq, "unexpected descriptor format for RX: "
+				"out %d, in %d\n", out, in);
+			rv = -EINVAL;
+			goto err;
+		}
+		heads[hc].len = iov_length(vq->iov+seg, in);
+		datalen -= heads[hc].len;
+		hc++;
+		seg += in;
+	}
+	*iovcount = seg;
+	return hc;
+err:
+	vhost_discard_desc(vq, hc);
+	return rv;
+}
+
 /* This looks in the virtqueue and for the first available buffer, and converts
  * it to an iovec for convenient access.  Since descriptors consist of some
  * number of output then some number of input descriptors, it's actually two
@@ -868,7 +916,7 @@ static unsigned get_indirect(struct vhos
  *
  * This function returns the descriptor number found, or vq->num (which
  * is never a valid descriptor number) if none was found. */
-unsigned vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq,
+unsigned vhost_get_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq,
 			   struct iovec iov[], unsigned int iov_size,
 			   unsigned int *out_num, unsigned int *in_num,
 			   struct vhost_log *log, unsigned int *log_num)
@@ -986,9 +1034,9 @@ unsigned vhost_get_vq_desc(struct vhost_
 }
 
 /* Reverse the effect of vhost_get_vq_desc. Useful for error handling. */
-void vhost_discard_vq_desc(struct vhost_virtqueue *vq)
+void vhost_discard_desc(struct vhost_virtqueue *vq, int n)
 {
-	vq->last_avail_idx--;
+	vq->last_avail_idx -= n;
 }
 
 /* After we've used one of their buffers, we tell them about it.  We'll then
@@ -1017,6 +1065,54 @@ int vhost_add_used(struct vhost_virtqueu
 	if (unlikely(vq->log_used)) {
 		/* Make sure data is seen before log. */
 		smp_wmb();
+		log_write(vq->log_base, vq->log_addr + sizeof *vq->used->ring *
+			  (vq->last_used_idx % vq->num),
+			  sizeof *vq->used->ring);
+		log_write(vq->log_base, vq->log_addr, sizeof *vq->used->ring);
+		if (vq->log_ctx)
+			eventfd_signal(vq->log_ctx, 1);
+	}
+	vq->last_used_idx++;
+	return 0;
+}
+
+/* After we've used one of their buffers, we tell them about it.  We'll then
+ * want to notify the guest, using eventfd. */
+int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads,
+		   int count)
+{
+	struct vring_used_elem *used;
+	int start, n;
+
+	if (count <= 0)
+		return -EINVAL;
+
+	start = vq->last_used_idx % vq->num;
+	if (vq->num - start < count)
+		n = vq->num - start;
+	else
+		n = count;
+	used = vq->used->ring + start;
+	if (copy_to_user(used, heads, sizeof(heads[0])*n)) {
+		vq_err(vq, "Failed to write used");
+		return -EFAULT;
+	}
+	if (n < count) {	/* wrapped the ring */
+		used = vq->used->ring;
+		if (copy_to_user(used, heads+n, sizeof(heads[0])*(count-n))) {
+			vq_err(vq, "Failed to write used");
+			return -EFAULT;
+		}
+	}
+	/* Make sure buffer is written before we update index. */
+	smp_wmb();
+	if (put_user(vq->last_used_idx+count, &vq->used->idx)) {
+		vq_err(vq, "Failed to increment used idx");
+		return -EFAULT;
+	}
+	if (unlikely(vq->log_used)) {
+		/* Make sure data is seen before log. */
+		smp_wmb();
 		/* Log used ring entry write. */
 		log_write(vq->log_base,
 			  vq->log_addr +
@@ -1029,7 +1125,7 @@ int vhost_add_used(struct vhost_virtqueu
 		if (vq->log_ctx)
 			eventfd_signal(vq->log_ctx, 1);
 	}
-	vq->last_used_idx++;
+	vq->last_used_idx += count;
 	return 0;
 }
 
@@ -1062,6 +1158,15 @@ void vhost_add_used_and_signal(struct vh
 	vhost_signal(dev, vq);
 }
 
+/* multi-buffer version of vhost_add_used_and_signal */
+void vhost_add_used_and_signal_n(struct vhost_dev *dev,
+				 struct vhost_virtqueue *vq,
+				 struct vring_used_elem *heads, int count)
+{
+	vhost_add_used_n(vq, heads, count);
+	vhost_signal(dev, vq);
+}
+
 /* OK, now we need to know about added descriptors. */
 bool vhost_enable_notify(struct vhost_virtqueue *vq)
 {
@@ -1086,7 +1191,7 @@ bool vhost_enable_notify(struct vhost_vi
 		return false;
 	}
 
-	return avail_idx != vq->last_avail_idx;
+	return avail_idx != vq->avail_idx;
 }
 
 /* We don't need to be notified again. */
diff -ruNp net-next-v0/drivers/vhost/vhost.h net-next-v5/drivers/vhost/vhost.h
--- net-next-v0/drivers/vhost/vhost.h	2010-03-22 12:04:38.000000000 -0700
+++ net-next-v5/drivers/vhost/vhost.h	2010-04-22 11:35:54.000000000 -0700
@@ -84,7 +84,9 @@ struct vhost_virtqueue {
 	struct iovec indirect[VHOST_NET_MAX_SG];
 	struct iovec iov[VHOST_NET_MAX_SG];
 	struct iovec hdr[VHOST_NET_MAX_SG];
-	size_t hdr_size;
+	size_t vhost_hlen;
+	size_t sock_hlen;
+	struct vring_used_elem heads[VHOST_NET_MAX_SG];
 	/* We use a kind of RCU to access private pointer.
 	 * All readers access it from workqueue, which makes it possible to
 	 * flush the workqueue instead of synchronize_rcu. Therefore readers do
@@ -120,16 +122,23 @@ long vhost_dev_ioctl(struct vhost_dev *,
 int vhost_vq_access_ok(struct vhost_virtqueue *vq);
 int vhost_log_access_ok(struct vhost_dev *);
 
-unsigned vhost_get_vq_desc(struct vhost_dev *, struct vhost_virtqueue *,
+int vhost_get_desc_n(struct vhost_virtqueue *, struct vring_used_elem *heads,
+		     int datalen, int *iovcount, struct vhost_log *log,
+		     unsigned int *log_num);
+unsigned vhost_get_desc(struct vhost_dev *, struct vhost_virtqueue *,
 			   struct iovec iov[], unsigned int iov_count,
 			   unsigned int *out_num, unsigned int *in_num,
 			   struct vhost_log *log, unsigned int *log_num);
-void vhost_discard_vq_desc(struct vhost_virtqueue *);
+void vhost_discard_desc(struct vhost_virtqueue *, int);
 
 int vhost_add_used(struct vhost_virtqueue *, unsigned int head, int len);
-void vhost_signal(struct vhost_dev *, struct vhost_virtqueue *);
+int vhost_add_used_n(struct vhost_virtqueue *, struct vring_used_elem *heads,
+		    int count);
 void vhost_add_used_and_signal(struct vhost_dev *, struct vhost_virtqueue *,
-			       unsigned int head, int len);
+			       unsigned int id, int len);
+void vhost_add_used_and_signal_n(struct vhost_dev *, struct vhost_virtqueue *,
+			       struct vring_used_elem *heads, int count);
+void vhost_signal(struct vhost_dev *, struct vhost_virtqueue *);
 void vhost_disable_notify(struct vhost_virtqueue *);
 bool vhost_enable_notify(struct vhost_virtqueue *);
 
@@ -149,7 +158,8 @@ enum {
 	VHOST_FEATURES = (1 << VIRTIO_F_NOTIFY_ON_EMPTY) |
 			 (1 << VIRTIO_RING_F_INDIRECT_DESC) |
 			 (1 << VHOST_F_LOG_ALL) |
-			 (1 << VHOST_NET_F_VIRTIO_NET_HDR),
+			 (1 << VHOST_NET_F_VIRTIO_NET_HDR) |
+			 (1 << VIRTIO_NET_F_MRG_RXBUF),
 };
 
 static inline int vhost_has_feature(struct vhost_dev *dev, int bit)



^ permalink raw reply

* Re: eSwitch management
From: Chris Wright @ 2010-04-23 19:44 UTC (permalink / raw)
  To: Anirban Chakraborty
  Cc: Chris Wright, Scott Feldman, David Miller, netdev@vger.kernel.org,
	Arnd Bergmann, Ameen Rahman, Amit Salecha, Rajesh Borundia,
	shemminger
In-Reply-To: <8A4C54B1-B5E5-461D-9699-38526B9CEBF4@qlogic.com>

* Anirban Chakraborty (anirban.chakraborty@qlogic.com) wrote:
> On Apr 23, 2010, at 9:23 AM, Chris Wright wrote:
> > * Anirban Chakraborty (anirban.chakraborty@qlogic.com) wrote:
> >> It looks like ifla_vf_info does contain most of the data set. But if I use it, what NETLINK protocol family should I use in my driver to receive netlink messages? Do I need to create a private protocol family?
> > 
> > No, you don't need to use netlink in your driver.  You just need to fill
> > in the relevant net_device_ops in your driver init.  Specifically:
> > 
> > *      SR-IOV management functions.
> > * int (*ndo_set_vf_mac)(struct net_device *dev, int vf, u8* mac);
> > * int (*ndo_set_vf_vlan)(struct net_device *dev, int vf, u16 vlan, u8 qos);
> > * int (*ndo_set_vf_tx_rate)(struct net_device *dev, int vf, int rate);
> > * int (*ndo_get_vf_config)(struct net_device *dev,
> > *                          int vf, struct ifla_vf_info *ivf);
> > 
> > These are all operating on a VF indexed internally w/in the driver, so it's
> > a little cumbersome to use from userspace.
> 
> These are all intended for VFs and are configureable from PF.

Yes, and while the set of callbacks can change, they are always tied to
some net_device (typically the PF) that knows how to make hardware
settings on behalf of a VF.

> However, in our case, there are multiple physical NIC function on a
> port which are configureable by the eswitch.

Is there a PCI function that represents the switch?  Or a special PCI
NIC function that has VEB mgmt plane access?  And do you have examples
of configuration that you'll do here?

> So, what we are setting
> is essentially switch ports, rather than configuring any setting on the
> physical functions. If netlink doesn't fly, is sysfs going to work?

Before we go to implementation specifics (i.e. netlink vs. sysfs, and my
guess is sysfs isn't going to be the right fit), let's step back and
look at what needs setting.

> If
> we allocate a buffer and fill it up with user space tools that the driver
> grabs it and does the configuration itself?

One idea that has been discussed in the past is to create essentially
a pluggable set of bridge_ops.  The first step would be purely internal
shuffling, to make the existing sw bridge code go through the bridge_ops.
The second step would be making your driver for whichever PCI function
you have that supports managing the bridge create a net_device which is
a bridge during driver init.  And now normal brctl can call into your
VEB via the bridge_ops callbacks. </handwave>

But this too starts w/ looking at what the management requirements are
for your bridge.  Can you enumerate those?

thanks,
-chris

^ permalink raw reply

* Re: [PATCH] RCU: don't turn off lockdep when find suspicious rcu_dereference_check() usage
From: Paul E. McKenney @ 2010-04-23 19:42 UTC (permalink / raw)
  To: Miles Lane
  Cc: Vivek Goyal, Eric Paris, Lai Jiangshan, Ingo Molnar,
	Peter Zijlstra, LKML, nauman, eric.dumazet, netdev, Jens Axboe,
	Gui Jianfeng, Li Zefan
In-Reply-To: <h2xa44ae5cd1004230550uf734c89eo2b1d1945d446068c@mail.gmail.com>

On Fri, Apr 23, 2010 at 08:50:59AM -0400, Miles Lane wrote:
> Hi Paul,
> There has been a bit of back and forth, and I am not sure what patches
> I should test now.
> Could you send me a bundle of whatever needs testing now?

Hello, Miles,

I am posting my set as replies to this message.  There are a couple
of KVM fixes that are going up via Avi's tree, and a number of networking
fixes that are going up via Dave Miller's tree -- a number of these
are against quickly changing code, so it didn't make sense for me to
keep them separately.

I believe that the two splats below are addressed by this patch set
carried in the networking tree:

	https://patchwork.kernel.org/patch/90754/

							Thanx, Paul

> I currently have a build of 2.6.34-rc5-git3 with the same patch I
> tested before applied.
> I notice a few minor differences in the warnings given.  I suspect
> these do not indicate
> new issues, since the trace from <IRQ> through <EOI> is the same as before.
> 
> [   60.174809] [ INFO: suspicious rcu_dereference_check() usage. ]
> [   60.174812] ---------------------------------------------------
> [   60.174816] net/mac80211/sta_info.c:886 invoked
> rcu_dereference_check() without protection!
> [   60.174820]
> [   60.174821] other info that might help us debug this:
> [   60.174822]
> [   60.174825]
> [   60.174826] rcu_scheduler_active = 1, debug_locks = 1
> [   60.174829] no locks held by wpa_supplicant/3973.
> [   60.174832]
> [   60.174833] stack backtrace:
> [   60.174838] Pid: 3973, comm: wpa_supplicant Not tainted 2.6.34-rc5-git3 #19
> [   60.174841] Call Trace:
> [   60.174844]  <IRQ>  [<ffffffff81067faa>] lockdep_rcu_dereference+0x9d/0xa5
> [   60.174873]  [<ffffffffa014e9ae>]
> ieee80211_find_sta_by_hw+0x46/0x10f [mac80211]
> [   60.174886]  [<ffffffffa014ea8e>] ieee80211_find_sta+0x17/0x19 [mac80211]
> [   60.174902]  [<ffffffffa01a60f2>] iwl_tx_queue_reclaim+0xdb/0x1b1 [iwlcore]
> [   60.174909]  [<ffffffff81068417>] ? mark_lock+0x2d/0x235
> [   60.174920]  [<ffffffffa01d5f1c>] iwl5000_rx_reply_tx+0x4a9/0x556 [iwlagn]
> [   60.174927]  [<ffffffff8120a2d3>] ? is_swiotlb_buffer+0x2e/0x3b
> [   60.174936]  [<ffffffffa01cebf4>] iwl_rx_handle+0x163/0x2b5 [iwlagn]
> [   60.174943]  [<ffffffff810688f0>] ? trace_hardirqs_on_caller+0xfa/0x13f
> [   60.174952]  [<ffffffffa01cf3ac>] iwl_irq_tasklet+0x2bb/0x3c0 [iwlagn]
> [   60.174959]  [<ffffffff810411df>] tasklet_action+0xa7/0x10f
> [   60.174965]  [<ffffffff810421f1>] __do_softirq+0x144/0x252
> [   60.174972]  [<ffffffff81003a8c>] call_softirq+0x1c/0x34
> [   60.174977]  [<ffffffff810050e4>] do_softirq+0x38/0x80
> [   60.174982]  [<ffffffff81041cbe>] irq_exit+0x45/0x94
> [   60.174987]  [<ffffffff81004829>] do_IRQ+0xad/0xc4
> [   60.174994]  [<ffffffff813cfb13>] ret_from_intr+0x0/0xf
> [   60.174997]  <EOI>  [<ffffffff810e5114>] ? kmem_cache_alloc+0xa9/0x15f
> [   60.175010]  [<ffffffff81342182>] ? __alloc_skb+0x3d/0x155
> [   60.175016]  [<ffffffff81342182>] __alloc_skb+0x3d/0x155
> [   60.175023]  [<ffffffff8133d237>] sock_alloc_send_pskb+0xc0/0x2e5
> [   60.175030]  [<ffffffff8133d46c>] sock_alloc_send_skb+0x10/0x12
> [   60.175036]  [<ffffffff813b1ab5>] unix_stream_sendmsg+0x117/0x2e2
> [   60.175044]  [<ffffffff811bdca8>] ? avc_has_perm+0x57/0x69
> [   60.175050]  [<ffffffff8133b892>] ? sock_aio_write+0x0/0xcf
> [   60.175056]  [<ffffffff813392c2>] __sock_sendmsg+0x59/0x64
> [   60.175062]  [<ffffffff8133b94d>] sock_aio_write+0xbb/0xcf
> [   60.175069]  [<ffffffff810e98b1>] do_sync_readv_writev+0xbc/0xfb
> [   60.175077]  [<ffffffff811c1726>] ? selinux_file_permission+0xa2/0xaf
> [   60.175082]  [<ffffffff810e9638>] ? copy_from_user+0x2a/0x2c
> [   60.175089]  [<ffffffff811baf85>] ? security_file_permission+0x11/0x13
> [   60.175095]  [<ffffffff810ea64e>] do_readv_writev+0xa2/0x122
> [   60.175101]  [<ffffffff810ead3b>] ? fcheck_files+0x8f/0xc9
> [   60.175107]  [<ffffffff810ea70c>] vfs_writev+0x3e/0x49
> [   60.175113]  [<ffffffff810ea7f2>] sys_writev+0x45/0x8e
> [   60.175119]  [<ffffffff81002b6b>] system_call_fastpath+0x16/0x1b
> 
> [   60.223213] [ INFO: suspicious rcu_dereference_check() usage. ]
> [   60.223216] ---------------------------------------------------
> [   60.223221] net/mac80211/sta_info.c:886 invoked
> rcu_dereference_check() without protection!
> [   60.223224]
> [   60.223225] other info that might help us debug this:
> [   60.223227]
> [   60.223230]
> [   60.223230] rcu_scheduler_active = 1, debug_locks = 1
> [   60.223234] no locks held by udisks-daemon/4398.
> [   60.223236]
> [   60.223237] stack backtrace:
> [   60.223242] Pid: 4398, comm: udisks-daemon Not tainted 2.6.34-rc5-git3 #19
> [   60.223245] Call Trace:
> [   60.223249]  <IRQ>  [<ffffffff81067faa>] lockdep_rcu_dereference+0x9d/0xa5
> [   60.223275]  [<ffffffffa014e9fe>]
> ieee80211_find_sta_by_hw+0x96/0x10f [mac80211]
> [   60.223288]  [<ffffffffa014ea8e>] ieee80211_find_sta+0x17/0x19 [mac80211]
> [   60.223304]  [<ffffffffa01a60f2>] iwl_tx_queue_reclaim+0xdb/0x1b1 [iwlcore]
> [   60.223310]  [<ffffffff81068417>] ? mark_lock+0x2d/0x235
> [   60.223321]  [<ffffffffa01d5f1c>] iwl5000_rx_reply_tx+0x4a9/0x556 [iwlagn]
> [   60.223329]  [<ffffffff8120a2d3>] ? is_swiotlb_buffer+0x2e/0x3b
> [   60.223338]  [<ffffffffa01cebf4>] iwl_rx_handle+0x163/0x2b5 [iwlagn]
> [   60.223344]  [<ffffffff810688f0>] ? trace_hardirqs_on_caller+0xfa/0x13f
> [   60.223353]  [<ffffffffa01cf3ac>] iwl_irq_tasklet+0x2bb/0x3c0 [iwlagn]
> [   60.223360]  [<ffffffff810411df>] tasklet_action+0xa7/0x10f
> [   60.223367]  [<ffffffff810421f1>] __do_softirq+0x144/0x252
> [   60.223374]  [<ffffffff81003a8c>] call_softirq+0x1c/0x34
> [   60.223379]  [<ffffffff810050e4>] do_softirq+0x38/0x80
> [   60.223384]  [<ffffffff81041cbe>] irq_exit+0x45/0x94
> [   60.223389]  [<ffffffff81004829>] do_IRQ+0xad/0xc4
> [   60.223396]  [<ffffffff813cfb13>] ret_from_intr+0x0/0xf
> [   60.223399]  <EOI>  [<ffffffff810e34f1>] ? kmem_cache_free+0xb0/0x134
> [   60.223412]  [<ffffffff810f391a>] ? putname+0x2d/0x36
> [   60.223417]  [<ffffffff810f391a>] putname+0x2d/0x36
> [   60.223423]  [<ffffffff810f5536>] user_path_at+0x5f/0x8e
> [   60.223429]  [<ffffffff81068671>] ? mark_held_locks+0x52/0x70
> [   60.223435]  [<ffffffff810e34ee>] ? kmem_cache_free+0xad/0x134
> [   60.223441]  [<ffffffff8106890a>] ? trace_hardirqs_on_caller+0x114/0x13f
> [   60.223447]  [<ffffffff81068942>] ? trace_hardirqs_on+0xd/0xf
> [   60.223454]  [<ffffffff810ed93f>] vfs_fstatat+0x32/0x5d
> [   60.223460]  [<ffffffff810ed9bb>] vfs_lstat+0x19/0x1b
> [   60.223465]  [<ffffffff810ed9d7>] sys_newlstat+0x1a/0x38
> [   60.223471]  [<ffffffff8106890a>] ? trace_hardirqs_on_caller+0x114/0x13f
> [   60.223477]  [<ffffffff813cec00>] ? trace_hardirqs_on_thunk+0x3a/0x3f
> [   60.223485]  [<ffffffff81002b6b>] system_call_fastpath+0x16/0x1b

^ permalink raw reply

* Re: [RFC 2/2] phylib: Convert MDIO bitbang to new MDIO 45 format
From: Andy Fleming @ 2010-04-23 19:39 UTC (permalink / raw)
  To: Ben Hutchings; +Cc: davem, netdev
In-Reply-To: <1272018128.11697.37.camel@localhost>


On Apr 23, 2010, at 5:22 AM, Ben Hutchings wrote:

> On Thu, 2010-04-22 at 23:38 -0500, Andy Fleming wrote:
>> Now that we've added somewhat more complete MDIO 45 support to the PHY
>> Lib, convert the MDIO bitbang driver to use this new infrastructure.
>> 
>> Signed-off-by: Andy Fleming <afleming@freescale.com>
>> ---
>> drivers/net/phy/mdio-bitbang.c |   23 +++++++++++------------
>> 1 files changed, 11 insertions(+), 12 deletions(-)
>> 
>> diff --git a/drivers/net/phy/mdio-bitbang.c b/drivers/net/phy/mdio-bitbang.c
>> index 2f6f02e..4c0c89b 100644
>> --- a/drivers/net/phy/mdio-bitbang.c
>> +++ b/drivers/net/phy/mdio-bitbang.c
> [...]
>> @@ -157,9 +154,10 @@ static int mdiobb_read(struct mii_bus *bus, int phy, int devad, int reg)
>> 	struct mdiobb_ctrl *ctrl = bus->priv;
>> 	int ret, i;
>> 
>> -	if (reg & MII_ADDR_C45) {
>> -		reg = mdiobb_cmd_addr(ctrl, phy, reg);
>> -		mdiobb_cmd(ctrl, MDIO_C45_READ, phy, reg);
>> +	/* Clause 22 PHYs only use devad = 0, and Clause 45 only use nonzero */
>> +	if (devad) {
>> +		mdiobb_cmd_addr(ctrl, phy, devad, reg);
>> +		mdiobb_cmd(ctrl, MDIO_C45_READ, phy, devad);
>> 	} else
>> 		mdiobb_cmd(ctrl, MDIO_READ, phy, reg);
>> 
> [...]
> 
> I don't believe there's any protocol requirement in clause 45 that
> devad != 0 (although the address is not allocated).  In the mdio module
> I played safe and defined MDIO_DEVAD_NONE == -1 to indicate a clause 22
> request.


Yeah, best to play it safe.  I'm also realizing that the bus probing code has the implicit assumption that the bus will either support clause 45 and therefore use device addresses, or will not support it, but if we support both on the same bus, the probe will not catch any clause 22 PHYs.

I will fix.

Also, thank you for your work on the mdio code!

Andy

Andy

^ permalink raw reply

* pull request: wireless-next-2.6 2010-04-23
From: John W. Linville @ 2010-04-23 19:01 UTC (permalink / raw)
  To: davem-fT/PcQaiUtIeIZ0/mPfg9Q
  Cc: linux-wireless-u79uwXL29TY76Z2rM5mHXA,
	netdev-u79uwXL29TY76Z2rM5mHXA

Dave,

Yet another huge batch of updates intended for 2.6.35.  The ath9k driver
in particular gets a lot of attention, and the iwlwifi team continues
its usual strong showing.

Please let me know if there are problems!  Again, this is for the
'for-davem' branch where I have pre-resolved some merge conflicts.

Thanks,

John

---

The following changes since commit c68ed255265968c3948fa2678bf59d15c471b055:
  Tom Herbert (1):
        bnx2x: add support for receive hashing

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-next-2.6.git for-davem

Abhijeet Kolekar (1):
      iwlwifi: add debugfs ops to iwlwifi

Benoit Papillault (1):
      ath5k/ath9k: Fix 64 bits TSF reads

Christian Lamparter (1):
      p54pci: fix serious sparse warning

Dan Williams (2):
      libertas: consolidate SDIO firmware wait code
      libertas: Davinci platforms need more time loading helper firmware

Daniel Halperin (1):
      mac80211: fix typo in comments

Daniel Yingqiang Ma (1):
      ath9k: Group Key fix for VAPs

David Kilroy (3):
      orinoco: implement set_wiphy_params
      orinoco: use cfg80211_find_ie
      orinoco: have sparse check endian issues

Felix Fietkau (23):
      ath9k_hw: add silicon revision macros for AR9300
      ath9k_hw: add a macro for abstracting generic timer access
      ath9k_hw: fix a missing hex prefix for a register mask
      ath9k_hw: add simple register abstraction for some AR9300 registers
      ath9k_hw: add support for GPIO differences on AR9003
      ath9k_hw: Add AR9003 PHY register definitions
      ath9k_hw: Set the channel on AR9003
      ath9k_hw: Implement PLL control on AR9003
      ath9k_hw: Implement spur mitigation on AR9003
      ath9k_hw: Split off ANI control to the PHY ops
      ath9k: Add Rx EDMA support
      ath9k_hw: Split out the function for reading the noise floor
      ath9k_hw: move AR9280 PCI EEPROM fix to eeprom_def.c
      ath9k_hw: Update ath9k_hw_set_dma for AR9300
      ath9k: check for specific rx stuck conditions and recover from them
      ath9k: clean up tx buffer handling
      ath9k: update the MCS mask for MCS16 and above
      ath9k: update the ath_max_4ms_framelen table
      ath9k: reduce the bits_per_symbol table size, support more streams
      ath9k: initialize the number of tx/rx streams correctly
      mac80211: add flags for STBC (Space-Time Block Coding)
      ath9k: add support for Tx and Rx STBC
      ath9k: set the STBC flag in rate control if the peer supports it

Grazvydas Ignotas (3):
      wl1251: read default MAC address from EEPROM when available
      wl1251: register platform_device to pass board data
      wl1251: add support for dedicated IRQ line

Helmut Schaa (2):
      rt2x00: add txdesc parameter to write_tx_data
      rt2x00: rt2800pci: fix tx path by not accessing the skb after it was DMA mapped

Holger Schurig (2):
      mac80211: sample survey implementation for mac80211 & hwsim
      ath5k: basic support for survey

Johannes Berg (14):
      iwlwifi: remove scan_bands logic
      iwlwifi: correct atomic bitops usage
      iwlwifi: remove next_scan_jiffies
      iwlwifi: remove scan_pass_start
      iwlwifi: rename priv->scan to priv->scan_cmd
      iwlwifi: trigger scan synchronously
      iwlwifi: make BT coex config a virtual method
      iwlwifi: rename TX_CMD_FLG_BT_DIS_MSK
      iwlwifi: don't check monitor for scanning
      iwlwifi: remove monitor check
      iwlwifi: make scan antenna forcing more generic
      mac80211: fix stopping RX BA session from timer
      mac80211: add missing newline
      radiotap parser: fix endian annotation

John W. Linville (2):
      Merge branch 'wireless-next-2.6' of git://git.kernel.org/.../iwlwifi/iwlwifi-2.6
      Merge branch 'master' into for-davem

Juuso Oikarinen (2):
      mac80211: Prevent running sta_cleanup timer unnecessarily
      mac80211: Fix ieee80211_sta_conn_mon_timer with hw connection monitoring

Larry Finger (1):
      rtl818x: Move configuration details to the rtl818x directory

Luis R. Rodriguez (54):
      ath9k_hw: start building an abstraction layer for hardware routines
      ath9k_hw: AR9003 does not have AR_RC_AHB skip its setting
      ath9k_hw: remove wrapper ath9k_hw_write_regs()
      ath9k_hw: Move some RF ops to the private callbacks
      ath9k_hw: skip PLL initialization on AR9003 on Power-On-Reset
      ath9k_hw: add some comments for ath9k_set_power_network_sleep()
      ath9k_hw: add a private callback for PLL control computation
      ath9k_hw: Add AR9003 PHY support
      ath9k_hw: move init config and default after chip is up
      ath9k_hw: add the AR9003 ar9003_hw_macversion_supported()
      ath9k_hw: disable ANI for AR9003
      ath9k: disable the MIB interrupt if ANI is disabled
      ath9k_hw: add common channel select helpers for ar900[23]
      ath9k_hw: split initvals.h by hardware family
      ath9k_hw: add initvals for the AR9003 hardware family
      ath9k_hw: add helpers for processing the AR9003 INI
      ath9k_hw: add all the AR9003 PHY callbacks
      ath9k_hw: add a helper for Power Amplifier calibration for AR9002
      ath9k_hw: add a helper for the OLC tem compensation for AR9002
      ath9k_hw: rename PA calib for AR9287
      ath9k_hw: shift code for AR9280 OLC temp comp
      ath9k_hw: move the AR9280 OLC temp comp to its own helper
      ath9k_hw: simplify OLC temp compensation for AR9002
      ath9k_hw: rename the PA calib routines to match their families
      ath9k_hw: rename getNoiseFloorThresh() to ath9k_hw_loadnf()
      ath9k_hw: move the cal AR9100 calibration settings
      ath9k_hw: split calib code by hardware families
      ath9k_hw: add the AR9003 ar9003_hw_init_cal callback
      ath9k_hw: add the config_pci_powersave AR9003 callback
      ath9k_hw: split the generic hardware code by hardware family
      ath9k_hw: move the cck channel 14 INI to the AR9002 hw code
      ath9k_hw: move TX/RX gain INI stuff to its own hardware family code
      ath9k_hw: abstract the AR_PHY_AGC_CONTROL register access
      ath9k_hw: abstract loading noisefloor
      ath9k_hw: fill in the callbacks for calibration for AR9003
      ath9k_hw: complete AR9003 calibration
      ath9k_hw: rename eep_AR9287_ops to eep_ar9287_ops
      ath9k_hw: restore mac address reading logic
      ath9k_hw: add OFDM spur mitigation for AR9003
      ath9k_hw: move the RF claim stuff to AR9002 hardware family
      ath9k_hw: add the AR9300 SREV hw name print
      ath9k_hw: add TX/RX gain register initialization for AR9003
      ath9k_hw: skip asynch fifo enablement to AR9003
      ath9k_hw: skip WEP aggregation enable code for AR9003
      ath9k_hw: move AR9002 mac ops to its own file
      ath9k: add RXLP and RXHP to debugfs counters
      ath9k_hw: enable CRC check of descriptors for AR9003
      ath9k_hw: set cwmin and cwmax to 0 for for AR9003 upon txq reset
      mac80211: add LDPC control flag
      ath9k_hw: add LDPC support for AR9003
      ath9k: add LDPC support
      ath9k_hw: add the PCI ID for the first AR9300 device
      ath9k_hw: make two initvals consto for the AR9001 family
      ath9k_hw: make all AR9002 initvals use u32

Nishant Sarmukadam (1):
      cfg80211: Avoid sending IWEVASSOCREQIE and IWEVASSOCRESPIE events with NULL event body

Reinette Chatre (1):
      Merge branch 'wireless-2.6' into wireless-next-2.6

Samuel Ortiz (2):
      iwmc3200wifi: Fix sparse warnings
      iwmc3200wifi: check sparse endianness annotations

Senthil Balasubramanian (5):
      ath9k_hw: Add the PCI IDs for AR9300 and fill up the pci_id_tables
      ath9k_hw: update the chip tests for AR9003
      ath9k_hw: prevent reset control register zeroing on AR9003 reset
      ath9k_hw: the eep_map is used only for AR9280 PCI card ini fixup
      ath9k_hw: Implement AR9003 eeprom callbacks

Shanyu Zhao (2):
      iwlwifi: bring up 6000 Series 2x2 AGN Gen2 adapters
      iwlwifi: remove redundant iwl_dump_lq_cmd()

Stanislaw Gruszka (3):
      iwlwifi: check scan request ie_len
      iwlwifi: initialize iwl_wimax_coex_cmd.flags
      mac80211: document IEEE80211_CONF_CHANGE_QOS

Sujith (10):
      ath9k_htc: Cleanup beacon configuration
      ath: Add buffered register write operations
      ath9k_htc: Implement multiple register write support
      ath9k_hw: Add macros for multiple register writes
      ath9k_hw: Relocate Opmode initialization
      ath9k_hw: Use buffered register writes
      ath9k_htc: Remove GPIO set on unload
      ath9k_htc: Add dropped SKB count to debugfs
      ath9k_htc: Handle WMI timeouts properly
      ath9k_htc: Fix sparse endian warnings

Vasanthakumar Thiagarajan (26):
      ath9k_hw: Add hw cap flag for EDMA for the AR9003 family
      ath9k_hw: Fill few hw cap for edma
      ath9k_hw: Add abstraction for rx enable
      ath9k_hw: Fill rx_enable() for the AR9003 hardware family
      ath9k_hw: Add few routines for rx edma support
      ath9k_hw: Define tx control struct for AR9003
      ath9k_hw: Move code which populates ds_data to ath9k_hw
      ath9k_hw: Add abstraction to set/get link pointer
      ath9k: Use abstraction to get link pointer
      ath9k: Use memcpy in ath_clone_txbuf()
      ath9k: Remove ATH9K_TX_SW_ABORTED and introduce a bool for this purpose
      ath9k: Make bf_desc of ath_buf opaque
      ath9k_hw: Abstract the routine which returns interrupt status
      ath9k_hw: Initialize interrupt mask for AR9003
      ath9k_hw: Fill get_isr() for AR9003
      ath9k_hw: Configure Tx interrupt mitigation timer
      ath9k: Load SW filtered NF values and start NF cal during full reset for AR9003
      ath9k_hw: Define abstraction for tx desc access
      ath9k_hw: Add function to configure tx status ring buffer
      ath9k_hw: Fill descriptor abstrations for AR9003
      ath9k: Setup appropriate tx desc for regular dma and edma
      ath9k: Initialize and configure tx status for EDMA
      ath9k_hw: Compute pointer checksum over the link descriptor
      ath9k: Add Tx EDMA support
      ath9k: Enable TXOK and TXERR interrupts for TX EDMA
      ath9k_hw: Abort rx if hw is not coming out of full sleep in reset

Wey-Yi Guy (7):
      iwlwifi: set correct single/dual stream mask
      iwlwifi: more generic eeprom defines
      iwlwifi: remove duplicated debug functions
      iwlwifi: add hw revision for 6000g2 NIC
      iwlwifi: PA type for 6000g2 series
      iwlwifi: sanity check for turn on aggregation tid
      iwlwifi: more code clean up for agn devices

Xose Vazquez Perez (2):
      wireless: rt2x00: rt2800usb: identify Hawking devices
      wireless: rt2x00: rt2800usb: identify Allwin devices

 drivers/net/wireless/Kconfig                       |   85 +-
 drivers/net/wireless/ath/ath.h                     |   14 +-
 drivers/net/wireless/ath/ath5k/base.c              |   19 +
 drivers/net/wireless/ath/ath5k/pcu.c               |   31 +-
 drivers/net/wireless/ath/ath9k/Makefile            |   16 +-
 drivers/net/wireless/ath/ath9k/ani.c               |  208 +--
 drivers/net/wireless/ath/ath9k/ar5008_initvals.h   |  742 +++++++
 drivers/net/wireless/ath/ath9k/ar5008_phy.c        | 1375 +++++++++++++
 drivers/net/wireless/ath/ath9k/ar9001_initvals.h   | 1254 ++++++++++++
 drivers/net/wireless/ath/ath9k/ar9002_calib.c      | 1000 ++++++++++
 drivers/net/wireless/ath/ath9k/ar9002_hw.c         |  593 ++++++
 .../ath/ath9k/{initvals.h => ar9002_initvals.h}    | 2052 +-------------------
 drivers/net/wireless/ath/ath9k/ar9002_mac.c        |  480 +++++
 drivers/net/wireless/ath/ath9k/ar9002_phy.c        |  539 +++++
 drivers/net/wireless/ath/ath9k/ar9002_phy.h        |  572 ++++++
 drivers/net/wireless/ath/ath9k/ar9003_calib.c      |  802 ++++++++
 drivers/net/wireless/ath/ath9k/ar9003_eeprom.c     | 1856 ++++++++++++++++++
 drivers/net/wireless/ath/ath9k/ar9003_eeprom.h     |  323 +++
 drivers/net/wireless/ath/ath9k/ar9003_hw.c         |  205 ++
 drivers/net/wireless/ath/ath9k/ar9003_initvals.h   | 1793 +++++++++++++++++
 drivers/net/wireless/ath/ath9k/ar9003_mac.c        |  611 ++++++
 drivers/net/wireless/ath/ath9k/ar9003_mac.h        |  120 ++
 drivers/net/wireless/ath/ath9k/ar9003_phy.c        | 1142 +++++++++++
 drivers/net/wireless/ath/ath9k/ar9003_phy.h        |  847 ++++++++
 drivers/net/wireless/ath/ath9k/ath9k.h             |   24 +-
 drivers/net/wireless/ath/ath9k/beacon.c            |    5 +-
 drivers/net/wireless/ath/ath9k/calib.c             | 1089 +----------
 drivers/net/wireless/ath/ath9k/calib.h             |   19 +-
 drivers/net/wireless/ath/ath9k/common.h            |    4 +-
 drivers/net/wireless/ath/ath9k/debug.c             |   22 +-
 drivers/net/wireless/ath/ath9k/debug.h             |    4 +
 drivers/net/wireless/ath/ath9k/eeprom.c            |    9 +-
 drivers/net/wireless/ath/ath9k/eeprom.h            |   22 +-
 drivers/net/wireless/ath/ath9k/eeprom_4k.c         |   17 +-
 drivers/net/wireless/ath/ath9k/eeprom_9287.c       |    9 +-
 drivers/net/wireless/ath/ath9k/eeprom_def.c        |   13 +-
 drivers/net/wireless/ath/ath9k/hif_usb.c           |   13 +-
 drivers/net/wireless/ath/ath9k/htc.h               |   19 +-
 drivers/net/wireless/ath/ath9k/htc_drv_beacon.c    |   29 +-
 drivers/net/wireless/ath/ath9k/htc_drv_init.c      |  104 +-
 drivers/net/wireless/ath/ath9k/htc_drv_main.c      |   20 +-
 drivers/net/wireless/ath/ath9k/htc_drv_txrx.c      |   11 +-
 drivers/net/wireless/ath/ath9k/htc_hst.c           |    8 +-
 drivers/net/wireless/ath/ath9k/htc_hst.h           |   24 +-
 drivers/net/wireless/ath/ath9k/hw-ops.h            |  280 +++
 drivers/net/wireless/ath/ath9k/hw.c                | 1761 ++++-------------
 drivers/net/wireless/ath/ath9k/hw.h                |  253 +++-
 drivers/net/wireless/ath/ath9k/init.c              |   83 +-
 drivers/net/wireless/ath/ath9k/mac.c               |  490 ++---
 drivers/net/wireless/ath/ath9k/mac.h               |   67 +-
 drivers/net/wireless/ath/ath9k/main.c              |   82 +-
 drivers/net/wireless/ath/ath9k/pci.c               |    1 +
 drivers/net/wireless/ath/ath9k/phy.c               |  978 ----------
 drivers/net/wireless/ath/ath9k/phy.h               |  584 +------
 drivers/net/wireless/ath/ath9k/rc.c                |   13 +
 drivers/net/wireless/ath/ath9k/recv.c              |  518 ++++-
 drivers/net/wireless/ath/ath9k/reg.h               |  167 ++-
 drivers/net/wireless/ath/ath9k/wmi.c               |   16 +-
 drivers/net/wireless/ath/ath9k/wmi.h               |   19 +-
 drivers/net/wireless/ath/ath9k/xmit.c              |  488 ++++--
 drivers/net/wireless/iwlwifi/Makefile              |    1 +
 drivers/net/wireless/iwlwifi/iwl-1000.c            |    6 +
 drivers/net/wireless/iwlwifi/iwl-3945.c            |    2 +
 drivers/net/wireless/iwlwifi/iwl-3945.h            |    3 +
 drivers/net/wireless/iwlwifi/iwl-4965.c            |   15 +-
 drivers/net/wireless/iwlwifi/iwl-5000.c            |   11 +
 drivers/net/wireless/iwlwifi/iwl-6000.c            |   34 +-
 drivers/net/wireless/iwlwifi/iwl-agn-debugfs.c     |  834 ++++++++
 drivers/net/wireless/iwlwifi/iwl-agn-debugfs.h     |   56 +
 drivers/net/wireless/iwlwifi/iwl-agn-hcmd.c        |    2 +
 drivers/net/wireless/iwlwifi/iwl-agn-lib.c         |  403 ++++-
 drivers/net/wireless/iwlwifi/iwl-agn-rs.c          |   47 +-
 drivers/net/wireless/iwlwifi/iwl-agn-tx.c          |   42 +-
 drivers/net/wireless/iwlwifi/iwl-agn-ucode.c       |   36 +-
 drivers/net/wireless/iwlwifi/iwl-agn.c             |   50 +-
 drivers/net/wireless/iwlwifi/iwl-agn.h             |    3 +
 drivers/net/wireless/iwlwifi/iwl-commands.h        |    2 +-
 drivers/net/wireless/iwlwifi/iwl-core.c            |   46 +-
 drivers/net/wireless/iwlwifi/iwl-core.h            |   18 +-
 drivers/net/wireless/iwlwifi/iwl-csr.h             |    1 +
 drivers/net/wireless/iwlwifi/iwl-debug.h           |    2 +
 drivers/net/wireless/iwlwifi/iwl-debugfs.c         |  770 +-------
 drivers/net/wireless/iwlwifi/iwl-dev.h             |    9 +-
 drivers/net/wireless/iwlwifi/iwl-eeprom.h          |   32 +-
 drivers/net/wireless/iwlwifi/iwl-prph.h            |   80 +-
 drivers/net/wireless/iwlwifi/iwl-scan.c            |  506 +-----
 drivers/net/wireless/iwlwifi/iwl-sta.c             |   13 +-
 drivers/net/wireless/iwlwifi/iwl3945-base.c        |   87 +-
 drivers/net/wireless/iwmc3200wifi/Makefile         |    2 +
 drivers/net/wireless/iwmc3200wifi/rx.c             |    3 +-
 drivers/net/wireless/iwmc3200wifi/trace.h          |    4 +-
 drivers/net/wireless/iwmc3200wifi/tx.c             |    4 +-
 drivers/net/wireless/libertas/if_sdio.c            |  103 +-
 drivers/net/wireless/mac80211_hwsim.c              |   28 +
 drivers/net/wireless/orinoco/Makefile              |    3 +
 drivers/net/wireless/orinoco/cfg.c                 |   88 +-
 drivers/net/wireless/orinoco/hw.c                  |   26 +
 drivers/net/wireless/orinoco/main.h                |   12 -
 drivers/net/wireless/orinoco/orinoco.h             |    2 +
 drivers/net/wireless/orinoco/scan.c                |    4 +-
 drivers/net/wireless/orinoco/wext.c                |  183 +--
 drivers/net/wireless/p54/p54pci.c                  |    2 +-
 drivers/net/wireless/rt2x00/rt2800pci.c            |   34 +-
 drivers/net/wireless/rt2x00/rt2800usb.c            |   26 +-
 drivers/net/wireless/rt2x00/rt2x00.h               |    3 +-
 drivers/net/wireless/rt2x00/rt2x00pci.c            |    3 +-
 drivers/net/wireless/rt2x00/rt2x00pci.h            |    3 +-
 drivers/net/wireless/rt2x00/rt2x00queue.c          |    3 +-
 drivers/net/wireless/rt2x00/rt2x00usb.c            |    3 +-
 drivers/net/wireless/rt2x00/rt2x00usb.h            |    3 +-
 drivers/net/wireless/rtl818x/Kconfig               |   88 +
 drivers/net/wireless/wl12xx/wl1251_main.c          |   63 +
 drivers/net/wireless/wl12xx/wl1251_reg.h           |    7 +
 drivers/net/wireless/wl12xx/wl1251_sdio.c          |   96 +-
 include/linux/ieee80211.h                          |    1 +
 include/linux/spi/wl12xx.h                         |    2 +
 include/net/cfg80211.h                             |    2 +-
 include/net/mac80211.h                             |   17 +-
 net/mac80211/agg-rx.c                              |   18 +-
 net/mac80211/agg-tx.c                              |    2 +-
 net/mac80211/cfg.c                                 |   12 +
 net/mac80211/driver-ops.h                          |    9 +
 net/mac80211/key.c                                 |    1 +
 net/mac80211/mlme.c                                |    5 +
 net/mac80211/sta_info.c                            |   13 +-
 net/mac80211/tx.c                                  |    7 +
 net/wireless/sme.c                                 |   16 +-
 127 files changed, 19021 insertions(+), 8935 deletions(-)
 create mode 100644 drivers/net/wireless/ath/ath9k/ar5008_initvals.h
 create mode 100644 drivers/net/wireless/ath/ath9k/ar5008_phy.c
 create mode 100644 drivers/net/wireless/ath/ath9k/ar9001_initvals.h
 create mode 100644 drivers/net/wireless/ath/ath9k/ar9002_calib.c
 create mode 100644 drivers/net/wireless/ath/ath9k/ar9002_hw.c
 rename drivers/net/wireless/ath/ath9k/{initvals.h => ar9002_initvals.h} (78%)
 create mode 100644 drivers/net/wireless/ath/ath9k/ar9002_mac.c
 create mode 100644 drivers/net/wireless/ath/ath9k/ar9002_phy.c
 create mode 100644 drivers/net/wireless/ath/ath9k/ar9002_phy.h
 create mode 100644 drivers/net/wireless/ath/ath9k/ar9003_calib.c
 create mode 100644 drivers/net/wireless/ath/ath9k/ar9003_eeprom.c
 create mode 100644 drivers/net/wireless/ath/ath9k/ar9003_eeprom.h
 create mode 100644 drivers/net/wireless/ath/ath9k/ar9003_hw.c
 create mode 100644 drivers/net/wireless/ath/ath9k/ar9003_initvals.h
 create mode 100644 drivers/net/wireless/ath/ath9k/ar9003_mac.c
 create mode 100644 drivers/net/wireless/ath/ath9k/ar9003_mac.h
 create mode 100644 drivers/net/wireless/ath/ath9k/ar9003_phy.c
 create mode 100644 drivers/net/wireless/ath/ath9k/ar9003_phy.h
 create mode 100644 drivers/net/wireless/ath/ath9k/hw-ops.h
 delete mode 100644 drivers/net/wireless/ath/ath9k/phy.c
 create mode 100644 drivers/net/wireless/iwlwifi/iwl-agn-debugfs.c
 create mode 100644 drivers/net/wireless/iwlwifi/iwl-agn-debugfs.h
 create mode 100644 drivers/net/wireless/rtl818x/Kconfig

Omnibus patch is available here:

	http://www.kernel.org/pub/linux/kernel/people/linville/wireless-next-2.6-2010-04-23.patch.bz2

-- 
John W. Linville		Someday the world will need a hero, and you
linville-2XuSBdqkA4R54TAoqtyWWQ@public.gmane.org			might be all we have.  Be ready.
--
To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: eSwitch management
From: Anirban Chakraborty @ 2010-04-23 19:00 UTC (permalink / raw)
  To: Chris Wright
  Cc: Scott Feldman, David Miller, netdev@vger.kernel.org,
	Arnd Bergmann, Ameen Rahman, Amit Salecha, Rajesh Borundia
In-Reply-To: <20100423162307.GI30693@x200.localdomain>


On Apr 23, 2010, at 9:23 AM, Chris Wright wrote:

> * Anirban Chakraborty (anirban.chakraborty@qlogic.com) wrote:
>> 
>> On Apr 22, 2010, at 6:29 PM, Scott Feldman wrote:
>> 
>>> On 4/22/10 5:47 PM, "Scott Feldman" <scofeldm@cisco.com> wrote:
>>> 
>>>> On 4/22/10 4:16 PM, "Anirban Chakraborty" <anirban.chakraborty@qlogic.com>
>>>> wrote:
>>>> 
>>>>> I am following the discussions on iovnl patch closely. While it is going to
>>>>> take some time for iovnl patch to be reviewed and accepted, what would be the
>>>>> interim approach to manage the eswitch in NIC? We need to add support in
>>>>> qlcnic driver to configure the eswitch in our 10G NIC. Some of the things
>>>>> that
>>>>> we need to set to the switch are setting a port's VLAN, tx bandwidth etc. We
>>>>> would like to set these parameters for a bunch of ports at the start of the
>>>>> day and set it to the eswitch.
>>>> 
>>>> Are any of these settings covered in DCB?  (net/dcb/dcbnl.c).  Maybe you can
>>>> get a start there?  Not sure not knowing your device requirements.
>>> 
>>> Or maybe the RTM_SETLINK IFLA_VF_* ops in include/linux/if_link.h?  Those
>>> seem like what you're looking for.  I'm looking at moving iovnl here as well
>>> for port-profile.
>> 
>> It looks like ifla_vf_info does contain most of the data set. But if I use it, what NETLINK protocol family should I use in my driver to receive netlink messages? Do I need to create a private protocol family?
> 
> No, you don't need to use netlink in your driver.  You just need to fill
> in the relevant net_device_ops in your driver init.  Specifically:
> 
> *      SR-IOV management functions.
> * int (*ndo_set_vf_mac)(struct net_device *dev, int vf, u8* mac);
> * int (*ndo_set_vf_vlan)(struct net_device *dev, int vf, u16 vlan, u8 qos);
> * int (*ndo_set_vf_tx_rate)(struct net_device *dev, int vf, int rate);
> * int (*ndo_get_vf_config)(struct net_device *dev,
> *                          int vf, struct ifla_vf_info *ivf);
> 
> These are all operating on a VF indexed internally w/in the driver, so it's
> a little cumbersome to use from userspace.

These are all intended for VFs and are configureable from PF. However, in our case, there are multiple physical NIC function on a port which are configureable by the eswitch. So, what we are setting is essentially switch ports, rather than configuring any setting on the physical functions. If netlink doesn't fly, is sysfs going to work? If we allocate a buffer and fill it up with user space tools that the driver grabs it and does the configuration itself?  

thanks,
Anirban



^ permalink raw reply

* RESPONSE NEEDED !
From: Dr Raymond Kuo Fung Chien @ 2010-04-23 16:50 UTC (permalink / raw)





Dear Friend,

I am Dr Raymond Kuo Fung CHIEN Executive Director and Chief Financial
Officer of the operations of the Hang Seng Bank Ltd.
Befor the U.S and Iraqi war our client Mr.Fayez A Mohammed a business
merchant made a fixed deposit of USD30 Million for 2Yrs where i was the
only one that knew about his deposits.

Upon maturity during the war in 2003,Fayez,his wife and only daugther died
in a bomb blast that hits His Resident.
Investigations showed that he didnt declear any next of kin.As a
foreigner,I want you to stand as the next of kin to claim the fund because
soon the fund will be claimed by my government if no one comes for it.

I have an attorney that will prepare all the documents to back you up as
the next of kin to Mr.Fayez A.Mohammed.Plz let me know your willingness so
that i can provide you with more details of this transaction.

contact me on email: drfungch111@yahoo.com.hk


1. Full name and Age

2. Occupation

3. Private/office phone number

4. Current residential address

Kind Regards,
Dr Raymond Kuo Fung CHIEN.


^ permalink raw reply

* [PATCH 2/2] gianfar: Fix potential oops during OF address translation
From: Anton Vorontsov @ 2010-04-23 17:12 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Sandeep Gopalpet, linuxppc-dev

gianfar driver may pass NULL pointer to the of_translate_address(),
which may lead to a kernel oops. Fix this by using of_iomap(), which
is also much simpler and shorter.

Signed-off-by: Anton Vorontsov <avorontsov@mvista.com>
---
 drivers/net/gianfar.c |    6 +-----
 1 files changed, 1 insertions(+), 5 deletions(-)

diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c
index 080d1ce..df49af3 100644
--- a/drivers/net/gianfar.c
+++ b/drivers/net/gianfar.c
@@ -549,12 +549,8 @@ static int gfar_parse_group(struct device_node *np,
 		struct gfar_private *priv, const char *model)
 {
 	u32 *queue_mask;
-	u64 addr, size;
-
-	addr = of_translate_address(np,
-			of_get_address(np, 0, &size, NULL));
-	priv->gfargrp[priv->num_grps].regs = ioremap(addr, size);
 
+	priv->gfargrp[priv->num_grps].regs = of_iomap(np, 0);
 	if (!priv->gfargrp[priv->num_grps].regs)
 		return -ENOMEM;
 
-- 
1.7.0.5

^ permalink raw reply related

* [PATCH 1/2] fsl_pq_mdio: Fix kernel oops during OF address translation
From: Anton Vorontsov @ 2010-04-23 17:12 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Sandeep Gopalpet, linuxppc-dev

Old P1020RDB device trees were not specifing tbipa address for
MDIO nodes, which is now causing this kernel oops:

 ...
 eth2: TX BD ring size for Q[6]: 256
 eth2: TX BD ring size for Q[7]: 256
 Unable to handle kernel paging request for data at address 0x00000000
 Faulting instruction address: 0xc0015504
 Oops: Kernel access of bad area, sig: 11 [#1]
 ...
 NIP [c0015504] memcpy+0x3c/0x9c
 LR [c000a9f8] __of_translate_address+0xfc/0x21c
 Call Trace:
 [df839e00] [c000a94c] __of_translate_address+0x50/0x21c (unreliable)
 [df839e50] [c01a33e8] get_gfar_tbipa+0xb0/0xe0
 ...

The old device trees are buggy, though having a dead ethernet is
better than a dead kernel, so fix the issue by using of_iomap().

Also, a somewhat similar issue exist in the probe() routine, though
there the oops is only a possibility. Nonetheless, fix it too.

Signed-off-by: Anton Vorontsov <avorontsov@mvista.com>
---
 drivers/net/fsl_pq_mdio.c |   20 ++++++++++++++------
 1 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/drivers/net/fsl_pq_mdio.c b/drivers/net/fsl_pq_mdio.c
index d5160ed..3acac5f 100644
--- a/drivers/net/fsl_pq_mdio.c
+++ b/drivers/net/fsl_pq_mdio.c
@@ -205,8 +205,6 @@ static int fsl_pq_mdio_find_free(struct mii_bus *new_bus)
 static u32 __iomem *get_gfar_tbipa(struct fsl_pq_mdio __iomem *regs, struct device_node *np)
 {
 	struct gfar __iomem *enet_regs;
-	u32 __iomem *ioremap_tbipa;
-	u64 addr, size;
 
 	/*
 	 * This is mildly evil, but so is our hardware for doing this.
@@ -220,9 +218,7 @@ static u32 __iomem *get_gfar_tbipa(struct fsl_pq_mdio __iomem *regs, struct devi
 		return &enet_regs->tbipa;
 	} else if (of_device_is_compatible(np, "fsl,etsec2-mdio") ||
 			of_device_is_compatible(np, "fsl,etsec2-tbi")) {
-		addr = of_translate_address(np, of_get_address(np, 1, &size, NULL));
-		ioremap_tbipa = ioremap(addr, size);
-		return ioremap_tbipa;
+		return of_iomap(np, 1);
 	} else
 		return NULL;
 }
@@ -279,6 +275,7 @@ static int fsl_pq_mdio_probe(struct of_device *ofdev,
 	u32 __iomem *tbipa;
 	struct mii_bus *new_bus;
 	int tbiaddr = -1;
+	const u32 *addrp;
 	u64 addr = 0, size = 0;
 	int err = 0;
 
@@ -297,8 +294,19 @@ static int fsl_pq_mdio_probe(struct of_device *ofdev,
 	new_bus->priv = priv;
 	fsl_pq_mdio_bus_name(new_bus->id, np);
 
+	addrp = of_get_address(np, 0, &size, NULL);
+	if (!addrp) {
+		err = -EINVAL;
+		goto err_free_bus;
+	}
+
 	/* Set the PHY base address */
-	addr = of_translate_address(np, of_get_address(np, 0, &size, NULL));
+	addr = of_translate_address(np, addrp);
+	if (addr == OF_BAD_ADDR) {
+		err = -EINVAL;
+		goto err_free_bus;
+	}
+
 	map = ioremap(addr, size);
 	if (!map) {
 		err = -ENOMEM;
-- 
1.7.0.5

^ permalink raw reply related

* Re: eSwitch management
From: Chris Wright @ 2010-04-23 16:23 UTC (permalink / raw)
  To: Anirban Chakraborty
  Cc: Scott Feldman, David Miller, netdev@vger.kernel.org,
	chrisw@redhat.com, Arnd Bergmann, Ameen Rahman, Amit Salecha,
	Rajesh Borundia
In-Reply-To: <DD92D5A8-1ECC-4440-BE81-ABDCC6847021@qlogic.com>

* Anirban Chakraborty (anirban.chakraborty@qlogic.com) wrote:
> 
> On Apr 22, 2010, at 6:29 PM, Scott Feldman wrote:
> 
> > On 4/22/10 5:47 PM, "Scott Feldman" <scofeldm@cisco.com> wrote:
> > 
> >> On 4/22/10 4:16 PM, "Anirban Chakraborty" <anirban.chakraborty@qlogic.com>
> >> wrote:
> >> 
> >>> I am following the discussions on iovnl patch closely. While it is going to
> >>> take some time for iovnl patch to be reviewed and accepted, what would be the
> >>> interim approach to manage the eswitch in NIC? We need to add support in
> >>> qlcnic driver to configure the eswitch in our 10G NIC. Some of the things
> >>> that
> >>> we need to set to the switch are setting a port's VLAN, tx bandwidth etc. We
> >>> would like to set these parameters for a bunch of ports at the start of the
> >>> day and set it to the eswitch.
> >> 
> >> Are any of these settings covered in DCB?  (net/dcb/dcbnl.c).  Maybe you can
> >> get a start there?  Not sure not knowing your device requirements.
> > 
> > Or maybe the RTM_SETLINK IFLA_VF_* ops in include/linux/if_link.h?  Those
> > seem like what you're looking for.  I'm looking at moving iovnl here as well
> > for port-profile.
> 
> It looks like ifla_vf_info does contain most of the data set. But if I use it, what NETLINK protocol family should I use in my driver to receive netlink messages? Do I need to create a private protocol family?

No, you don't need to use netlink in your driver.  You just need to fill
in the relevant net_device_ops in your driver init.  Specifically:

 *      SR-IOV management functions.
 * int (*ndo_set_vf_mac)(struct net_device *dev, int vf, u8* mac);
 * int (*ndo_set_vf_vlan)(struct net_device *dev, int vf, u16 vlan, u8 qos);
 * int (*ndo_set_vf_tx_rate)(struct net_device *dev, int vf, int rate);
 * int (*ndo_get_vf_config)(struct net_device *dev,
 *                          int vf, struct ifla_vf_info *ivf);

These are all operating on a VF indexed internally w/in the driver, so it's
a little cumbersome to use from userspace.

thanks,
-chris

^ permalink raw reply

* Re: [PATCH] e100: Fix the TX workqueue race
From: Jeff Garzik @ 2010-04-23 16:20 UTC (permalink / raw)
  To: Alan Cox; +Cc: e1000-devel, netdev
In-Reply-To: <20100423143356.7092.45260.stgit@localhost.localdomain>

On 04/23/2010 10:34 AM, Alan Cox wrote:
> I'd assumed someone would have picked up on this and fixed it using rtnl_lock
> as was suggested but it seems to have fallen through the cracks ?
>
> Anyway this is I assume what was meant ?
>
> ---
>
> Nothing stops the workqueue being left to run in parallel with close or a
> few other operations. This causes double unmaps and the like.
>
> See kerneloops.org #1041230 for an example
>
> Signed-off-by: Alan Cox<alan@linux.intel.com>

Acked-by: Jeff Garzik <jgarzik@redhat.com>

Glad someone finally fixed this, it has bugged me for years...



------------------------------------------------------------------------------
_______________________________________________
E1000-devel mailing list
E1000-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/e1000-devel
To learn more about Intel&#174; Ethernet, visit http://communities.intel.com/community/wired

^ permalink raw reply

* Re: [PATCH] NIU support for skb->rxhash
From: Tom Herbert @ 2010-04-23 15:32 UTC (permalink / raw)
  To: David Miller; +Cc: eric.dumazet, netdev
In-Reply-To: <20100423.011456.48472321.davem@davemloft.net>

> I looked into implementing this and it doesn't work.  The
> problem is GRO want's to look into the packet very early
> and we want to batch GRO a set of packets into a big packet
> before shooting them over to a remote cpu.
>

Can you reconsider? :-)  The majority of our servers see packet loads
which don't allow for much batching (a lot of small RPC messages), so
for those GRO is mostly unnecessary overhead and mechanisms that
improve unbatched packet performance are compelling.  Also, if a
device already does LRO, I don't see that GRO could add a lot of value
anyway.

Tom

> This reminds me that we can start using ->rxhash as a quick
> mismatch check in the GRO flow matcher.
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>

^ permalink raw reply

* Re: [PATCH 1/7] Topcliff GbE: Add The Main code
From: Arnd Bergmann @ 2010-04-23 15:27 UTC (permalink / raw)
  To: Masayuki Ohtake; +Cc: NETDEV, Wang, Yong Y, Wang, Qi, Intel OTC, Andrew

On Friday 23 April 2010, Masayuki Ohtake wrote:
> From: Masayuki Ohtake <masa-korg@dsn.okisemi.com>
> 
> This patch adds the Main code of GbE driver for Topcliff.
> The GbE driver needs all patch[1/7 to 7/7].
> 
> Signed-off-by: Masayuki Ohtake <masa-korg@dsn.okisemi.com>

I already commented on the "Topcliff PHUB: Add The Packet Hub driver"
submission. Many of my comments there apply here as well, but
there are a few more things that you may want to address in
future submissions:

> +static int
> +pch_gbe_probe(struct pci_dev *pdev, const struct pci_device_id *pci_id);
> +static void pch_gbe_remove(struct pci_dev *pdev);
> +static int pch_gbe_suspend(struct pci_dev *pdev, pm_message_t state);
> +static int pch_gbe_resume(struct pci_dev *pdev);

Ideally, static functions are ordered such that the caller is
last, so you can drop all of the forward declarations like these.

> +/*!
> + * @ingroup PCI driver Layer
> + * @struct  pch_gbe_pcidev_id
> + * @brief   PCI Device ID Table
> + * @remarks
> + *  This is an instance of pci_device_id structure defined in linux/pci.h,
> + *  and holds information of the PCI devices that are supported by this
> driver.
> + */
> +static const struct pci_device_id pch_gbe_pcidev_id[3] = {
> + {.vendor = PCI_VENDOR_ID_INTEL,
> +  .device = PCI_DEVICE_ID_INTEL_IOH1_GBE,
> +  .subvendor = PCI_ANY_ID,
> +  .subdevice = PCI_ANY_ID,
> +  .class = (PCI_CLASS_NETWORK_ETHERNET << 8),
> +  .class_mask = (0xFFFF00)
> +  },
> + /* required last entry */
> + {0}
> +};

Your array size above is three, but you only define two members.
Better may the array automatically sized. Also, it's clearer to
use the PCI_DEVICE_CLASS() helper macro, e.g.

static const struct pci_device_id pch_gbe_pcidev_id[] = {
	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOH1_GBE) },
	{ 0 },
};

	Arnd

^ permalink raw reply

* Re: [PATCH 1/7] Topcliff GbE: Add The Main code [1/3]
From: Stephen Hemminger @ 2010-04-23 15:26 UTC (permalink / raw)
  To: Masayuki Ohtake; +Cc: NETDEV, Wang, Yong Y, Wang, Qi, Intel OTC, Andrew
In-Reply-To: <002d01cae2dd$1f6e42d0$66f8800a@maildom.okisemi.com>

On Fri, 23 Apr 2010 20:56:25 +0900
"Masayuki Ohtake" <masa-korg@dsn.okisemi.com> wrote:

Even though the patch was sent as an attachment, long lines were
wrapped.

Do you want this to go directly to kernel, or do you want help
fixing coding issues by submitting to staging tree?

The code uses a comment style that is kind of like the existing
docbook comment style; why not convert it to use the official
docbook style for examples look at other kernel code:
/**
 *	dev_alloc_skb - allocate an skbuff for receiving
 *	@length: length to allocate
 *
 *	Allocate a new &sk_buff and assign it a usage count of one. The
 *	buffer has unspecified headroom built in. Users should allocate
 *	the headroom they think they need without accounting for the
 *	built in space. The built in space is used for optimisations.
 *
 *	%NULL is returned if there is no free memory. Although this function
 *	allocates memory it can be called from an interrupt.
 */

The code is also indented with a non-standard indentation format.
Please read Documentation/CodingStyle.  Indentation is supposed
to be 4 characters (and using tabs of 8 characters).


The PCI device table could be changed to:

static DEFINE_PCI_DEVICE_TABLE(pch_gbe_pcidev_id) = {
    { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOH1_GBE) },
    { 0 }
};
Also PCI_DEVICE_ID_INTEL_IOH1_GBE is not defined anywhere I can see.

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox