Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH 11/16] ipv4: Cache input routes in fib_info nexthops.
From: David Miller @ 2012-07-19 21:35 UTC (permalink / raw)
  To: netdev


Caching input routes is slightly simpler than output routes, since we
don't need to be concerned with nexthop exceptions.  (locally
destined, and routed packets, never trigger PMTU events or redirects
that will be processed by us).

However, we have to elide caching for the DIRECTSRC and non-zero itag
cases.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_fib.h     |    1 +
 net/ipv4/fib_semantics.c |    2 ++
 net/ipv4/route.c         |   55 ++++++++++++++++++++++++++++++++++++----------
 3 files changed, 46 insertions(+), 12 deletions(-)

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index fb62c59..e69c3a4 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -82,6 +82,7 @@ struct fib_nh {
 	__be32			nh_saddr;
 	int			nh_saddr_genid;
 	struct rtable		*nh_rth_output;
+	struct rtable		*nh_rth_input;
 	struct fnhe_hash_bucket	*nh_exceptions;
 };
 
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 83d0f42..e55171f 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -173,6 +173,8 @@ static void free_fib_info_rcu(struct rcu_head *head)
 			free_nh_exceptions(nexthop_nh);
 		if (nexthop_nh->nh_rth_output)
 			dst_release(&nexthop_nh->nh_rth_output->dst);
+		if (nexthop_nh->nh_rth_input)
+			dst_release(&nexthop_nh->nh_rth_input->dst);
 	} endfor_nexthops(fi);
 
 	release_net(fi->fib_net);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 8d8b3b2..fe0aad9 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1225,6 +1225,9 @@ static void rt_cache_route(struct fib_nh *nh, struct rtable *rt)
 {
 	struct rtable *orig, *prev, **p = &nh->nh_rth_output;
 
+	if (rt_is_input_route(rt))
+		p = &nh->nh_rth_input;
+
 	orig = *p;
 
 	prev = cmpxchg(p, orig, rt);
@@ -1235,6 +1238,11 @@ static void rt_cache_route(struct fib_nh *nh, struct rtable *rt)
 	}
 }
 
+static bool rt_cache_valid(struct rtable *rt)
+{
+	return (rt && rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK);
+}
+
 static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
 			   const struct fib_result *res,
 			   struct fib_nh_exception *fnhe,
@@ -1251,8 +1259,7 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
 #ifdef CONFIG_IP_ROUTE_CLASSID
 		rt->dst.tclassid = nh->nh_tclassid;
 #endif
-		if (!(rt->dst.flags & DST_HOST) &&
-		    rt_is_output_route(rt))
+		if (!(rt->dst.flags & DST_HOST))
 			rt_cache_route(nh, rt);
 	}
 
@@ -1378,11 +1385,11 @@ static int __mkroute_input(struct sk_buff *skb,
 			   __be32 daddr, __be32 saddr, u32 tos,
 			   struct rtable **result)
 {
-	struct fib_nh_exception *fnhe;
 	struct rtable *rth;
 	int err;
 	struct in_device *out_dev;
 	unsigned int flags = 0;
+	bool do_cache;
 	u32 itag;
 
 	/* get a working reference to the output device */
@@ -1425,13 +1432,21 @@ static int __mkroute_input(struct sk_buff *skb,
 		}
 	}
 
-	fnhe = NULL;
-	if (res->fi)
-		fnhe = find_exception(&FIB_RES_NH(*res), daddr);
+	do_cache = false;
+	if (res->fi) {
+		if (!(flags & RTCF_DIRECTSRC) && !itag) {
+			rth = FIB_RES_NH(*res).nh_rth_input;
+			if (rt_cache_valid(rth)) {
+				dst_use(&rth->dst, jiffies);
+				goto out;
+			}
+			do_cache = true;
+		}
+	}
 
 	rth = rt_dst_alloc(out_dev->dev,
 			   IN_DEV_CONF_GET(in_dev, NOPOLICY),
-			   IN_DEV_CONF_GET(out_dev, NOXFRM), false);
+			   IN_DEV_CONF_GET(out_dev, NOXFRM), do_cache);
 	if (!rth) {
 		err = -ENOBUFS;
 		goto cleanup;
@@ -1450,8 +1465,8 @@ static int __mkroute_input(struct sk_buff *skb,
 	rth->dst.input = ip_forward;
 	rth->dst.output = ip_output;
 
-	rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag);
-
+	rt_set_nexthop(rth, daddr, res, NULL, res->fi, res->type, itag);
+out:
 	*result = rth;
 	err = 0;
  cleanup:
@@ -1503,6 +1518,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	struct rtable	*rth;
 	int		err = -EINVAL;
 	struct net    *net = dev_net(dev);
+	bool do_cache;
 
 	/* IP on this device is disabled. */
 
@@ -1516,6 +1532,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
 		goto martian_source;
 
+	res.fi = NULL;
 	if (ipv4_is_lbcast(daddr) || (saddr == 0 && daddr == 0))
 		goto brd_input;
 
@@ -1591,8 +1608,20 @@ brd_input:
 	RT_CACHE_STAT_INC(in_brd);
 
 local_input:
+	do_cache = false;
+	if (res.fi) {
+		if (!(flags & RTCF_DIRECTSRC) && !itag) {
+			rth = FIB_RES_NH(res).nh_rth_input;
+			if (rt_cache_valid(rth)) {
+				dst_use(&rth->dst, jiffies);
+				goto set_and_out;
+			}
+			do_cache = true;
+		}
+	}
+
 	rth = rt_dst_alloc(net->loopback_dev,
-			   IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false);
+			   IN_DEV_CONF_GET(in_dev, NOPOLICY), false, do_cache);
 	if (!rth)
 		goto e_nobufs;
 
@@ -1616,6 +1645,9 @@ local_input:
 		rth->dst.error= -err;
 		rth->rt_flags 	&= ~RTCF_LOCAL;
 	}
+	if (do_cache)
+		rt_cache_route(&FIB_RES_NH(res), rth);
+set_and_out:
 	skb_dst_set(skb, &rth->dst);
 	err = 0;
 	goto out;
@@ -1750,8 +1782,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
 		fnhe = find_exception(&FIB_RES_NH(*res), fl4->daddr);
 		if (!fnhe) {
 			rth = FIB_RES_NH(*res).nh_rth_output;
-			if (rth &&
-			    rth->dst.obsolete == DST_OBSOLETE_FORCE_CHK) {
+			if (rt_cache_valid(rth)) {
 				dst_use(&rth->dst, jiffies);
 				return rth;
 			}
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH 12/16] ipv4: Kill FLOWI_FLAG_RT_NOCACHE and associated code.
From: David Miller @ 2012-07-19 21:36 UTC (permalink / raw)
  To: netdev


Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/flow.h                 |    1 -
 include/net/inet_connection_sock.h |    3 +--
 net/dccp/ipv4.c                    |    2 +-
 net/ipv4/inet_connection_sock.c    |    5 +----
 net/ipv4/route.c                   |    3 ---
 net/ipv4/tcp_ipv4.c                |    4 ++--
 6 files changed, 5 insertions(+), 13 deletions(-)

diff --git a/include/net/flow.h b/include/net/flow.h
index ce9cb76..e1dd508 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -21,7 +21,6 @@ struct flowi_common {
 	__u8	flowic_flags;
 #define FLOWI_FLAG_ANYSRC		0x01
 #define FLOWI_FLAG_CAN_SLEEP		0x02
-#define FLOWI_FLAG_RT_NOCACHE		0x04
 	__u32	flowic_secid;
 };
 
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 2cf44b4..5ee66f5 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -250,8 +250,7 @@ extern int inet_csk_get_port(struct sock *sk, unsigned short snum);
 
 extern struct dst_entry* inet_csk_route_req(struct sock *sk,
 					    struct flowi4 *fl4,
-					    const struct request_sock *req,
-					    bool nocache);
+					    const struct request_sock *req);
 extern struct dst_entry* inet_csk_route_child_sock(struct sock *sk,
 						   struct sock *newsk,
 						   const struct request_sock *req);
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index ab4f44c..25428d0 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -508,7 +508,7 @@ static int dccp_v4_send_response(struct sock *sk, struct request_sock *req,
 	struct dst_entry *dst;
 	struct flowi4 fl4;
 
-	dst = inet_csk_route_req(sk, &fl4, req, false);
+	dst = inet_csk_route_req(sk, &fl4, req);
 	if (dst == NULL)
 		goto out;
 
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 0a290d7..db0cf17 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -368,8 +368,7 @@ EXPORT_SYMBOL(inet_csk_reset_keepalive_timer);
 
 struct dst_entry *inet_csk_route_req(struct sock *sk,
 				     struct flowi4 *fl4,
-				     const struct request_sock *req,
-				     bool nocache)
+				     const struct request_sock *req)
 {
 	struct rtable *rt;
 	const struct inet_request_sock *ireq = inet_rsk(req);
@@ -377,8 +376,6 @@ struct dst_entry *inet_csk_route_req(struct sock *sk,
 	struct net *net = sock_net(sk);
 	int flags = inet_sk_flowi_flags(sk);
 
-	if (nocache)
-		flags |= FLOWI_FLAG_RT_NOCACHE;
 	flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
 			   RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
 			   sk->sk_protocol,
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index fe0aad9..e5384f1 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1830,9 +1830,6 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
 
 	rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0);
 
-	if (fl4->flowi4_flags & FLOWI_FLAG_RT_NOCACHE)
-		rth->dst.flags |= DST_NOCACHE;
-
 	return rth;
 }
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 1d8b75a..59110ca 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -824,7 +824,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
 	struct sk_buff * skb;
 
 	/* First, grab a route. */
-	if (!dst && (dst = inet_csk_route_req(sk, &fl4, req, nocache)) == NULL)
+	if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
 		return -1;
 
 	skb = tcp_make_synack(sk, dst, req, rvp);
@@ -1378,7 +1378,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 		 */
 		if (tmp_opt.saw_tstamp &&
 		    tcp_death_row.sysctl_tw_recycle &&
-		    (dst = inet_csk_route_req(sk, &fl4, req, want_cookie)) != NULL &&
+		    (dst = inet_csk_route_req(sk, &fl4, req)) != NULL &&
 		    fl4.daddr == saddr) {
 			if (!tcp_peer_is_proven(req, dst, true)) {
 				NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH 13/16] ipv4: Dirty less cache lines in route caching paths.
From: David Miller @ 2012-07-19 21:36 UTC (permalink / raw)
  To: netdev


Don't bother incrementing dst->__use and setting dst->lastuse,
they are completely pointless and just slow things down.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c |    6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index e5384f1..1d4e317 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1437,7 +1437,7 @@ static int __mkroute_input(struct sk_buff *skb,
 		if (!(flags & RTCF_DIRECTSRC) && !itag) {
 			rth = FIB_RES_NH(*res).nh_rth_input;
 			if (rt_cache_valid(rth)) {
-				dst_use(&rth->dst, jiffies);
+				dst_hold(&rth->dst);
 				goto out;
 			}
 			do_cache = true;
@@ -1613,7 +1613,7 @@ local_input:
 		if (!(flags & RTCF_DIRECTSRC) && !itag) {
 			rth = FIB_RES_NH(res).nh_rth_input;
 			if (rt_cache_valid(rth)) {
-				dst_use(&rth->dst, jiffies);
+				dst_hold(&rth->dst);
 				goto set_and_out;
 			}
 			do_cache = true;
@@ -1783,7 +1783,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
 		if (!fnhe) {
 			rth = FIB_RES_NH(*res).nh_rth_output;
 			if (rt_cache_valid(rth)) {
-				dst_use(&rth->dst, jiffies);
+				dst_hold(&rth->dst);
 				return rth;
 			}
 		}
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH 14/16] ipv4: Kill rt->rt_oif
From: David Miller @ 2012-07-19 21:36 UTC (permalink / raw)
  To: netdev


Never actually used.

It was being set on output routes to the original OIF specified in the
flow key used for the lookup.

But the only user was in ipmr_rt_fib_lookup() which always runs on an
input route.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/route.h     |    1 -
 net/ipv4/ipmr.c         |    1 -
 net/ipv4/route.c        |    5 -----
 net/ipv4/xfrm4_policy.c |    1 -
 4 files changed, 8 deletions(-)

diff --git a/include/net/route.h b/include/net/route.h
index 3c1eeab..e789a92 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -50,7 +50,6 @@ struct rtable {
 
 	int			rt_route_iif;
 	int			rt_iif;
-	int			rt_oif;
 
 	/* Info on neighbour */
 	__be32			rt_gateway;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index eee3bf6..fa75f73 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1795,7 +1795,6 @@ static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct sk_buff *skb)
 		.daddr = iph->daddr,
 		.saddr = iph->saddr,
 		.flowi4_tos = RT_TOS(iph->tos),
-		.flowi4_oif = rt->rt_oif,
 		.flowi4_iif = rt->rt_iif,
 		.flowi4_mark = skb->mark,
 	};
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 1d4e317..b7e99a6 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1326,7 +1326,6 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	rth->rt_type	= RTN_MULTICAST;
 	rth->rt_route_iif = dev->ifindex;
 	rth->rt_iif	= dev->ifindex;
-	rth->rt_oif	= 0;
 	rth->rt_pmtu	= 0;
 	rth->rt_gateway	= 0;
 	rth->fi = NULL;
@@ -1457,7 +1456,6 @@ static int __mkroute_input(struct sk_buff *skb,
 	rth->rt_type = res->type;
 	rth->rt_route_iif = in_dev->dev->ifindex;
 	rth->rt_iif 	= in_dev->dev->ifindex;
-	rth->rt_oif 	= 0;
 	rth->rt_pmtu	= 0;
 	rth->rt_gateway	= 0;
 	rth->fi = NULL;
@@ -1636,7 +1634,6 @@ local_input:
 	rth->rt_type	= res.type;
 	rth->rt_route_iif = dev->ifindex;
 	rth->rt_iif	= dev->ifindex;
-	rth->rt_oif	= 0;
 	rth->rt_pmtu	= 0;
 	rth->rt_gateway	= 0;
 	rth->fi = NULL;
@@ -1802,7 +1799,6 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
 	rth->rt_type	= type;
 	rth->rt_route_iif = 0;
 	rth->rt_iif	= orig_oif ? : dev_out->ifindex;
-	rth->rt_oif	= orig_oif;
 	rth->rt_pmtu	= 0;
 	rth->rt_gateway = 0;
 	rth->fi = NULL;
@@ -2079,7 +2075,6 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
 
 		rt->rt_route_iif = ort->rt_route_iif;
 		rt->rt_iif = ort->rt_iif;
-		rt->rt_oif = ort->rt_oif;
 		rt->rt_pmtu = ort->rt_pmtu;
 
 		rt->rt_genid = rt_genid(net);
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 6074b69..3c99b4c 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -81,7 +81,6 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
 
 	xdst->u.rt.rt_route_iif = fl4->flowi4_iif;
 	xdst->u.rt.rt_iif = fl4->flowi4_iif;
-	xdst->u.rt.rt_oif = fl4->flowi4_oif;
 
 	xdst->u.dst.dev = dev;
 	dev_hold(dev);
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH 15/16] ipv4: Turn rt->rt_route_iif into rt->rt_is_input.
From: David Miller @ 2012-07-19 21:36 UTC (permalink / raw)
  To: netdev


That is this value's only use, as a boolean to indicate whether
a route is an input route or not.

So implement it that way, using a u16 gap present in the struct
already.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/route.h     |    6 +++---
 net/ipv4/route.c        |   10 +++++-----
 net/ipv4/xfrm4_policy.c |    2 +-
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/include/net/route.h b/include/net/route.h
index e789a92..4bafe0b 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -47,8 +47,8 @@ struct rtable {
 	int			rt_genid;
 	unsigned int		rt_flags;
 	__u16			rt_type;
+	__u16			rt_is_input;
 
-	int			rt_route_iif;
 	int			rt_iif;
 
 	/* Info on neighbour */
@@ -61,12 +61,12 @@ struct rtable {
 
 static inline bool rt_is_input_route(const struct rtable *rt)
 {
-	return rt->rt_route_iif != 0;
+	return rt->rt_is_input != 0;
 }
 
 static inline bool rt_is_output_route(const struct rtable *rt)
 {
-	return rt->rt_route_iif == 0;
+	return rt->rt_is_input == 0;
 }
 
 static inline __be32 rt_nexthop(const struct rtable *rt, __be32 daddr)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index b7e99a6..9819dfb 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1324,7 +1324,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	rth->rt_genid	= rt_genid(dev_net(dev));
 	rth->rt_flags	= RTCF_MULTICAST;
 	rth->rt_type	= RTN_MULTICAST;
-	rth->rt_route_iif = dev->ifindex;
+	rth->rt_is_input= 1;
 	rth->rt_iif	= dev->ifindex;
 	rth->rt_pmtu	= 0;
 	rth->rt_gateway	= 0;
@@ -1454,7 +1454,7 @@ static int __mkroute_input(struct sk_buff *skb,
 	rth->rt_genid = rt_genid(dev_net(rth->dst.dev));
 	rth->rt_flags = flags;
 	rth->rt_type = res->type;
-	rth->rt_route_iif = in_dev->dev->ifindex;
+	rth->rt_is_input = 1;
 	rth->rt_iif 	= in_dev->dev->ifindex;
 	rth->rt_pmtu	= 0;
 	rth->rt_gateway	= 0;
@@ -1632,7 +1632,7 @@ local_input:
 	rth->rt_genid = rt_genid(net);
 	rth->rt_flags 	= flags|RTCF_LOCAL;
 	rth->rt_type	= res.type;
-	rth->rt_route_iif = dev->ifindex;
+	rth->rt_is_input = 1;
 	rth->rt_iif	= dev->ifindex;
 	rth->rt_pmtu	= 0;
 	rth->rt_gateway	= 0;
@@ -1797,7 +1797,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
 	rth->rt_genid = rt_genid(dev_net(dev_out));
 	rth->rt_flags	= flags;
 	rth->rt_type	= type;
-	rth->rt_route_iif = 0;
+	rth->rt_is_input = 0;
 	rth->rt_iif	= orig_oif ? : dev_out->ifindex;
 	rth->rt_pmtu	= 0;
 	rth->rt_gateway = 0;
@@ -2073,7 +2073,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
 		if (new->dev)
 			dev_hold(new->dev);
 
-		rt->rt_route_iif = ort->rt_route_iif;
+		rt->rt_is_input = ort->rt_is_input;
 		rt->rt_iif = ort->rt_iif;
 		rt->rt_pmtu = ort->rt_pmtu;
 
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 3c99b4c..c628184 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -79,7 +79,6 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
 	struct rtable *rt = (struct rtable *)xdst->route;
 	const struct flowi4 *fl4 = &fl->u.ip4;
 
-	xdst->u.rt.rt_route_iif = fl4->flowi4_iif;
 	xdst->u.rt.rt_iif = fl4->flowi4_iif;
 
 	xdst->u.dst.dev = dev;
@@ -87,6 +86,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
 
 	/* Sheit... I remember I did this right. Apparently,
 	 * it was magically lost, so this code needs audit */
+	xdst->u.rt.rt_is_input = rt->rt_is_input;
 	xdst->u.rt.rt_flags = rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST |
 					      RTCF_LOCAL);
 	xdst->u.rt.rt_type = rt->rt_type;
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH 16/16] ipv4: Kill rt->fi
From: David Miller @ 2012-07-19 21:36 UTC (permalink / raw)
  To: netdev


It's not really needed.

We only grabbed a reference to the fib_info for the sake of fib_info
local metrics.

However, fib_info objects are freed using RCU, as are therefore their
private metrics (if any).

We would have triggered a route cache flush if we eliminated a
reference to a fib_info object in the routing tables.

Therefore, any existing cached routes will first check and see that
they have been invalidated before an errant reference to these
metric values would occur.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/route.h |    1 -
 net/ipv4/route.c    |   32 +-------------------------------
 2 files changed, 1 insertion(+), 32 deletions(-)

diff --git a/include/net/route.h b/include/net/route.h
index 4bafe0b..60d611d 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -56,7 +56,6 @@ struct rtable {
 
 	/* Miscellaneous cached information */
 	u32			rt_pmtu;
-	struct fib_info		*fi; /* for client ref to shared metrics */
 };
 
 static inline bool rt_is_input_route(const struct rtable *rt)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 9819dfb..1ee67f0 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -141,7 +141,6 @@ static int ip_rt_min_advmss __read_mostly	= 256;
 static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie);
 static unsigned int	 ipv4_default_advmss(const struct dst_entry *dst);
 static unsigned int	 ipv4_mtu(const struct dst_entry *dst);
-static void		 ipv4_dst_destroy(struct dst_entry *dst);
 static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
 static void		 ipv4_link_failure(struct sk_buff *skb);
 static void		 ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
@@ -171,7 +170,6 @@ static struct dst_ops ipv4_dst_ops = {
 	.default_advmss =	ipv4_default_advmss,
 	.mtu =			ipv4_mtu,
 	.cow_metrics =		ipv4_cow_metrics,
-	.destroy =		ipv4_dst_destroy,
 	.ifdown =		ipv4_dst_ifdown,
 	.negative_advice =	ipv4_negative_advice,
 	.link_failure =		ipv4_link_failure,
@@ -1034,17 +1032,6 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
 	return dst;
 }
 
-static void ipv4_dst_destroy(struct dst_entry *dst)
-{
-	struct rtable *rt = (struct rtable *) dst;
-
-	if (rt->fi) {
-		fib_info_put(rt->fi);
-		rt->fi = NULL;
-	}
-}
-
-
 static void ipv4_link_failure(struct sk_buff *skb)
 {
 	struct rtable *rt;
@@ -1158,15 +1145,6 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst)
 	return mtu;
 }
 
-static void rt_init_metrics(struct rtable *rt, struct fib_info *fi)
-{
-	if (fi->fib_metrics != (u32 *) dst_default_metrics) {
-		rt->fi = fi;
-		atomic_inc(&fi->fib_clntref);
-	}
-	dst_init_metrics(&rt->dst, fi->fib_metrics, true);
-}
-
 static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
 {
 	struct fnhe_hash_bucket *hash = nh->nh_exceptions;
@@ -1255,7 +1233,7 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
 			rt->rt_gateway = nh->nh_gw;
 		if (unlikely(fnhe))
 			rt_bind_exception(rt, fnhe, daddr);
-		rt_init_metrics(rt, fi);
+		dst_init_metrics(&rt->dst, fi->fib_metrics, true);
 #ifdef CONFIG_IP_ROUTE_CLASSID
 		rt->dst.tclassid = nh->nh_tclassid;
 #endif
@@ -1328,7 +1306,6 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	rth->rt_iif	= dev->ifindex;
 	rth->rt_pmtu	= 0;
 	rth->rt_gateway	= 0;
-	rth->fi = NULL;
 	if (our) {
 		rth->dst.input= ip_local_deliver;
 		rth->rt_flags |= RTCF_LOCAL;
@@ -1458,7 +1435,6 @@ static int __mkroute_input(struct sk_buff *skb,
 	rth->rt_iif 	= in_dev->dev->ifindex;
 	rth->rt_pmtu	= 0;
 	rth->rt_gateway	= 0;
-	rth->fi = NULL;
 
 	rth->dst.input = ip_forward;
 	rth->dst.output = ip_output;
@@ -1636,7 +1612,6 @@ local_input:
 	rth->rt_iif	= dev->ifindex;
 	rth->rt_pmtu	= 0;
 	rth->rt_gateway	= 0;
-	rth->fi = NULL;
 	if (res.type == RTN_UNREACHABLE) {
 		rth->dst.input= ip_error;
 		rth->dst.error= -err;
@@ -1801,7 +1776,6 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
 	rth->rt_iif	= orig_oif ? : dev_out->ifindex;
 	rth->rt_pmtu	= 0;
 	rth->rt_gateway = 0;
-	rth->fi = NULL;
 
 	RT_CACHE_STAT_INC(out_slow_tot);
 
@@ -2046,7 +2020,6 @@ static u32 *ipv4_rt_blackhole_cow_metrics(struct dst_entry *dst,
 static struct dst_ops ipv4_dst_blackhole_ops = {
 	.family			=	AF_INET,
 	.protocol		=	cpu_to_be16(ETH_P_IP),
-	.destroy		=	ipv4_dst_destroy,
 	.check			=	ipv4_blackhole_dst_check,
 	.mtu			=	ipv4_blackhole_mtu,
 	.default_advmss		=	ipv4_default_advmss,
@@ -2081,9 +2054,6 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
 		rt->rt_flags = ort->rt_flags;
 		rt->rt_type = ort->rt_type;
 		rt->rt_gateway = ort->rt_gateway;
-		rt->fi = ort->fi;
-		if (rt->fi)
-			atomic_inc(&rt->fi->fib_clntref);
 
 		dst_free(new);
 	}
-- 
1.7.10.4

^ permalink raw reply related

* Re: [PATCH 10/16] ipv4: Cache output routes in fib_info nexthops.
From: Eric Dumazet @ 2012-07-19 22:09 UTC (permalink / raw)
  To: David Miller; +Cc: netdev
In-Reply-To: <20120719.143540.747492487297004222.davem@davemloft.net>

On Thu, 2012-07-19 at 14:35 -0700, David Miller wrote:
> If we have an output route that lacks nexthop exceptions, we can cache
> it in the FIB info nexthop.
> 
> Such routes will have DST_HOST cleared because such routes refer to a
> family of destinations, rather than just one.
> 
> The sequence of the handling of exceptions during route lookup is
> adjusted to make the logic work properly.
> 
> Before we allocate the route, we lookup the exception.
> 
> Then we know if we will cache this route or not, and therefore whether
> DST_HOST should be set on the allocated route.
> 
> Then we use DST_HOST to key off whether we should store the resulting
> route, during rt_set_nexthop(), in the FIB nexthop cache.
> 
> Signed-off-by: David S. Miller <davem@davemloft.net>


> +static void rt_cache_route(struct fib_nh *nh, struct rtable *rt)
> +{
> +	struct rtable *orig, *prev, **p = &nh->nh_rth_output;
> +
> +	orig = *p;
> +
> +	prev = cmpxchg(p, orig, rt);
> +	if (prev == orig) {
> +		dst_clone(&rt->dst);
> +		if (orig)
> +			dst_release(&orig->dst);
>  	}
>  }
>  

Hmm...

If we find orig not null, what can protect another cpu from reading
nh->nh_rth_output, then we dst_release(orig), and other cpu  does the
dst_use() too late ?

^ permalink raw reply

* Re: [PATCH 10/16] ipv4: Cache output routes in fib_info nexthops.
From: David Miller @ 2012-07-19 22:13 UTC (permalink / raw)
  To: eric.dumazet; +Cc: netdev
In-Reply-To: <1342735743.2626.5411.camel@edumazet-glaptop>

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 20 Jul 2012 00:09:03 +0200

> If we find orig not null, what can protect another cpu from reading
> nh->nh_rth_output, then we dst_release(orig), and other cpu  does the
> dst_use() too late ?

Indeed, we'd have to defer it somehow.

Actually, we might be able to fake this by using __dst_free() on it
or something similar.

But safest would be RCU, whose protection we are still using on the
read side.

Any better ideas?

^ permalink raw reply

* Re: [PATCH 10/16] ipv4: Cache output routes in fib_info nexthops.
From: Eric Dumazet @ 2012-07-19 22:19 UTC (permalink / raw)
  To: David Miller; +Cc: netdev
In-Reply-To: <20120719.151312.1882340232448320251.davem@davemloft.net>

On Thu, 2012-07-19 at 15:13 -0700, David Miller wrote:
> From: Eric Dumazet <eric.dumazet@gmail.com>
> Date: Fri, 20 Jul 2012 00:09:03 +0200
> 
> > If we find orig not null, what can protect another cpu from reading
> > nh->nh_rth_output, then we dst_release(orig), and other cpu  does the
> > dst_use() too late ?
> 
> Indeed, we'd have to defer it somehow.
> 
> Actually, we might be able to fake this by using __dst_free() on it
> or something similar.
> 
> But safest would be RCU, whose protection we are still using on the
> read side.
> 
> Any better ideas?

Well, its very late for me in France and I really need to sleep.

I hope I really can, now that you gave me the silly idea to work on a
linux patch ;)

RCU seems a good idea at first glance.

^ permalink raw reply

* [PATCH] net: ethernet: davinci_emac: add pm_runtime support
From: Mark A. Greer @ 2012-07-19 22:22 UTC (permalink / raw)
  To: netdev
  Cc: linux-omap, linux-arm-kernel, Mark A. Greer, Sekhar Nori,
	Kevin Hilman

From: "Mark A. Greer" <mgreer@animalcreek.com>

Add pm_runtime support to the TI Davinci EMAC driver.

CC: Sekhar Nori <nsekhar@ti.com>
CC: Kevin Hilman <khilman@ti.com>
Signed-off-by: Mark A. Greer <mgreer@animalcreek.com>
---

a) This patch depends on a patch by Kevin Hilman that has been
   accepted for 3.6 and is waiting in arm-soc/for-next:
   ce9dcb8784611c50974d1c6b600c71f5c0a29308
   (ARM: davinci: add runtime PM support for clock management)

b) Applies on top of current k.o. master:
   3e4b9459fb0e149c6b74c9e89399a8fc39a92b44
   (Merge tag 'md-3.5-fixes' of git://neil.brown.name/md)

 drivers/net/ethernet/ti/davinci_emac.c | 43 +++++++++++++++++-----------------
 1 file changed, 22 insertions(+), 21 deletions(-)

diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c
index 4da93a5..f9153b7 100644
--- a/drivers/net/ethernet/ti/davinci_emac.c
+++ b/drivers/net/ethernet/ti/davinci_emac.c
@@ -57,6 +57,7 @@
 #include <linux/bitops.h>
 #include <linux/io.h>
 #include <linux/uaccess.h>
+#include <linux/pm_runtime.h>
 #include <linux/davinci_emac.h>
 
 #include <asm/irq.h>
@@ -346,10 +347,6 @@ struct emac_priv {
 	void (*int_disable) (void);
 };
 
-/* clock frequency for EMAC */
-static struct clk *emac_clk;
-static unsigned long emac_bus_frequency;
-
 /* EMAC TX Host Error description strings */
 static char *emac_txhost_errcodes[16] = {
 	"No error", "SOP error", "Ownership bit not set in SOP buffer",
@@ -1535,6 +1532,8 @@ static int emac_dev_open(struct net_device *ndev)
 	int k = 0;
 	struct emac_priv *priv = netdev_priv(ndev);
 
+	pm_runtime_get(&priv->pdev->dev);
+
 	netif_carrier_off(ndev);
 	for (cnt = 0; cnt < ETH_ALEN; cnt++)
 		ndev->dev_addr[cnt] = priv->mac_addr[cnt];
@@ -1604,7 +1603,7 @@ static int emac_dev_open(struct net_device *ndev)
 				priv->phy_id);
 			ret = PTR_ERR(priv->phydev);
 			priv->phydev = NULL;
-			return ret;
+			goto err;
 		}
 
 		priv->link = 0;
@@ -1645,7 +1644,11 @@ rollback:
 		res = platform_get_resource(priv->pdev, IORESOURCE_IRQ, k-1);
 		m = res->end;
 	}
-	return -EBUSY;
+
+	ret = -EBUSY;
+err:
+	pm_runtime_put(&priv->pdev->dev);
+	return ret;
 }
 
 /**
@@ -1687,6 +1690,7 @@ static int emac_dev_stop(struct net_device *ndev)
 	if (netif_msg_drv(priv))
 		dev_notice(emac_dev, "DaVinci EMAC: %s stopped\n", ndev->name);
 
+	pm_runtime_put(&priv->pdev->dev);
 	return 0;
 }
 
@@ -1780,6 +1784,9 @@ static int __devinit davinci_emac_probe(struct platform_device *pdev)
 	struct emac_platform_data *pdata;
 	struct device *emac_dev;
 	struct cpdma_params dma_params;
+	struct clk *emac_clk;
+	unsigned long emac_bus_frequency;
+
 
 	/* obtain emac clock from kernel */
 	emac_clk = clk_get(&pdev->dev, NULL);
@@ -1788,12 +1795,14 @@ static int __devinit davinci_emac_probe(struct platform_device *pdev)
 		return -EBUSY;
 	}
 	emac_bus_frequency = clk_get_rate(emac_clk);
+	clk_put(emac_clk);
+
 	/* TODO: Probe PHY here if possible */
 
 	ndev = alloc_etherdev(sizeof(struct emac_priv));
 	if (!ndev) {
 		rc = -ENOMEM;
-		goto free_clk;
+		goto no_ndev;
 	}
 
 	platform_set_drvdata(pdev, ndev);
@@ -1909,15 +1918,13 @@ static int __devinit davinci_emac_probe(struct platform_device *pdev)
 	SET_ETHTOOL_OPS(ndev, &ethtool_ops);
 	netif_napi_add(ndev, &priv->napi, emac_poll, EMAC_POLL_WEIGHT);
 
-	clk_enable(emac_clk);
-
 	/* register the network device */
 	SET_NETDEV_DEV(ndev, &pdev->dev);
 	rc = register_netdev(ndev);
 	if (rc) {
 		dev_err(&pdev->dev, "error in register_netdev\n");
 		rc = -ENODEV;
-		goto netdev_reg_err;
+		goto no_irq_res;
 	}
 
 
@@ -1926,10 +1933,12 @@ static int __devinit davinci_emac_probe(struct platform_device *pdev)
 			   "(regs: %p, irq: %d)\n",
 			   (void *)priv->emac_base_phys, ndev->irq);
 	}
+
+	pm_runtime_enable(&pdev->dev);
+	pm_runtime_resume(&pdev->dev);
+
 	return 0;
 
-netdev_reg_err:
-	clk_disable(emac_clk);
 no_irq_res:
 	if (priv->txchan)
 		cpdma_chan_destroy(priv->txchan);
@@ -1943,8 +1952,7 @@ no_dma:
 
 probe_quit:
 	free_netdev(ndev);
-free_clk:
-	clk_put(emac_clk);
+no_ndev:
 	return rc;
 }
 
@@ -1978,9 +1986,6 @@ static int __devexit davinci_emac_remove(struct platform_device *pdev)
 	iounmap(priv->remap_addr);
 	free_netdev(ndev);
 
-	clk_disable(emac_clk);
-	clk_put(emac_clk);
-
 	return 0;
 }
 
@@ -1992,8 +1997,6 @@ static int davinci_emac_suspend(struct device *dev)
 	if (netif_running(ndev))
 		emac_dev_stop(ndev);
 
-	clk_disable(emac_clk);
-
 	return 0;
 }
 
@@ -2002,8 +2005,6 @@ static int davinci_emac_resume(struct device *dev)
 	struct platform_device *pdev = to_platform_device(dev);
 	struct net_device *ndev = platform_get_drvdata(pdev);
 
-	clk_enable(emac_clk);
-
 	if (netif_running(ndev))
 		emac_dev_open(ndev);
 
-- 
1.7.11.2


^ permalink raw reply related

* Re: [GIT] net has been merged into net-next
From: Jeff Kirsher @ 2012-07-19 23:54 UTC (permalink / raw)
  To: David Miller; +Cc: netdev
In-Reply-To: <20120719.113140.2043522088096284293.davem@davemloft.net>

[-- Attachment #1: Type: text/plain, Size: 180 bytes --]

On Thu, 2012-07-19 at 11:31 -0700, David Miller wrote:
> Jeff, please double check my merge work on the ixgbevf driver, and send
> me any necessary fixups.
> 
> Thanks!

Ok

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply

* Re: [GIT] net has been merged into net-next
From: Jeff Kirsher @ 2012-07-20  0:36 UTC (permalink / raw)
  To: David Miller; +Cc: netdev
In-Reply-To: <20120719.113140.2043522088096284293.davem@davemloft.net>

[-- Attachment #1: Type: text/plain, Size: 252 bytes --]

On Thu, 2012-07-19 at 11:31 -0700, David Miller wrote:
> Jeff, please double check my merge work on the ixgbevf driver, and send
> me any necessary fixups.
> 
> Thanks!

Your conflict resolution was correct, so no fix-ups needed.  Thanks
Dave.

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply

* Re: [PATCH 14/16] ipv4: Kill rt->rt_oif
From: Julian Anastasov @ 2012-07-20  1:10 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Patrick McHardy
In-Reply-To: <20120719.143616.1125314165355374962.davem@davemloft.net>


	Hello,

	Added CC to Patrick McHardy as mrule author...

On Thu, 19 Jul 2012, David Miller wrote:

> Never actually used.
> 
> It was being set on output routes to the original OIF specified in the
> flow key used for the lookup.
> 
> But the only user was in ipmr_rt_fib_lookup() which always runs on an
> input route.

	It can also work with output route, it seems
copy of locally originated traffic can be sent to local
mrouter for forwarding to remote recipients. Now we search
this mrouter with rules. We can loopback traffic to
local applications or to local multicast router (even
when no local applications are listening).

> diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
> index eee3bf6..fa75f73 100644
> --- a/net/ipv4/ipmr.c
> +++ b/net/ipv4/ipmr.c
> @@ -1795,7 +1795,6 @@ static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct sk_buff *skb)
>  		.daddr = iph->daddr,
>  		.saddr = iph->saddr,
>  		.flowi4_tos = RT_TOS(iph->tos),
> -		.flowi4_oif = rt->rt_oif,
>  		.flowi4_iif = rt->rt_iif,
>  		.flowi4_mark = skb->mark,

	But it was wrong at first place to use rt_iif
here. May be we should provide devices to "mrule" just
like we do for "rule", with the only difference that
oif now is possible to match outdev instead of preferred
device, i.e. oif is always set for output routes.

	.flowi4_oif = rt_is_output_route(rt) ?
			skb->dev->ifindex : 0,
	.flowi4_iif = rt_is_output_route(rt) ?
			net->loopback_dev->ifindex :
			skb->dev->ifindex;

	Before now it was risky to use just "mrule iif XXX"
because rt_iif contains the output device for output
routes and we can match output route by mistake.

	With above code it is now safe to use just
"iif eth0" for input routes and "iif lo" for output
routes. Here is what will see the mrule now:

local_app -> mrouter: lo->dev
remote_app -> mrouter: dev->0 (only iif match is possible)

	Let me know if patch is needed

> @@ -1802,7 +1799,6 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
>  	rth->rt_type	= type;
>  	rth->rt_route_iif = 0;
>  	rth->rt_iif	= orig_oif ? : dev_out->ifindex;
> -	rth->rt_oif	= orig_oif;
>  	rth->rt_pmtu	= 0;
>  	rth->rt_gateway = 0;
>  	rth->fi = NULL;

Regards

--
Julian Anastasov <ja@ssi.bg>

^ permalink raw reply

* [net-next 0/9][pull request] Intel Wired LAN Driver Updates
From: Jeff Kirsher @ 2012-07-20  1:23 UTC (permalink / raw)
  To: davem; +Cc: Jeff Kirsher, netdev, gospo, sassmann

This series contains updates to ixgbe.

The following are changes since commit 769162e38b91e1d300752e666260fa6c7b203fbc:
  Merge branch 'net' of git://git.kernel.org/pub/scm/linux/kernel/git/cmetcalf/linux-tile
and are available in the git repository at:
  git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/net-next master

Alexander Duyck (9):
  ixgbe: Use VMDq offset to indicate the default pool
  ixgbe: Fix memory leak when SR-IOV VFs are direct assigned
  ixgbe: Drop references to deprecated pci_ DMA api and instead use
    dma_ API
  ixgbe: Cleanup configuration of FCoE registers
  ixgbe: Merge all FCoE percpu values into a single structure
  ixgbe: Make FCoE allocation and configuration closer to how rings
    work
  ixgbe: Correctly set SAN MAC RAR pool to default pool of PF
  ixgbe: Only enable anti-spoof on VF pools
  ixgbe: Enable FCoE FSO and CRC offloads based on CAPABLE instead of
    ENABLED flag

 drivers/net/ethernet/intel/ixgbe/ixgbe.h        |    5 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c  |    4 +
 drivers/net/ethernet/intel/ixgbe/ixgbe_common.c |   45 ++-
 drivers/net/ethernet/intel/ixgbe/ixgbe_common.h |    1 +
 drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c   |  378 ++++++++++++-----------
 drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.h   |   15 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c   |  117 +++----
 drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c  |   20 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe_type.h   |    3 +
 drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c   |    4 +
 10 files changed, 344 insertions(+), 248 deletions(-)

-- 
1.7.10.4

^ permalink raw reply

* [net-next 1/9] ixgbe: Use VMDq offset to indicate the default pool
From: Jeff Kirsher @ 2012-07-20  1:23 UTC (permalink / raw)
  To: davem; +Cc: Alexander Duyck, netdev, gospo, sassmann, Jeff Kirsher
In-Reply-To: <1342747446-16132-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Alexander Duyck <alexander.h.duyck@intel.com>

This change makes it so that we can use the VMDq ring feature offset value
to determine the default pool instead of using num_vfs.  The reason for
this change is to avoid issues should we fail to allocate vfinfo but have
pre-existing VFs.  What should happen in this case is that num_vfs will go
to 0, but the VMDq offset will contain the location of the first PF pool.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by: Phil Schmitt <phillip.j.schmitt@intel.com>
Tested-by: Sibai Li <Sibai.li@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe.h       |    2 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c  |   33 +++++++++++-------------
 drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c |    5 ++++
 3 files changed, 21 insertions(+), 19 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index f7f6fe2..c236500 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -113,7 +113,7 @@
 #define IXGBE_MAX_VFTA_ENTRIES          128
 #define MAX_EMULATION_MAC_ADDRS         16
 #define IXGBE_MAX_PF_MACVLANS           15
-#define VMDQ_P(p)   ((p) + adapter->num_vfs)
+#define VMDQ_P(p)   ((p) + adapter->ring_feature[RING_F_VMDQ].offset)
 #define IXGBE_82599_VF_DEVICE_ID        0x10ED
 #define IXGBE_X540_VF_DEVICE_ID         0x1515
 
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index a3dc965..f110e88 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -3118,7 +3118,7 @@ static void ixgbe_setup_psrtype(struct ixgbe_adapter *adapter)
 		psrtype |= 1 << 29;
 
 	for (p = 0; p < adapter->num_rx_pools; p++)
-		IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(adapter->num_vfs + p),
+		IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(VMDQ_P(p)),
 				psrtype);
 }
 
@@ -3135,12 +3135,12 @@ static void ixgbe_configure_virtualization(struct ixgbe_adapter *adapter)
 	vmdctl = IXGBE_READ_REG(hw, IXGBE_VT_CTL);
 	vmdctl |= IXGBE_VMD_CTL_VMDQ_EN;
 	vmdctl &= ~IXGBE_VT_CTL_POOL_MASK;
-	vmdctl |= (adapter->num_vfs << IXGBE_VT_CTL_POOL_SHIFT);
+	vmdctl |= VMDQ_P(0) << IXGBE_VT_CTL_POOL_SHIFT;
 	vmdctl |= IXGBE_VT_CTL_REPLEN;
 	IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vmdctl);
 
-	vf_shift = adapter->num_vfs % 32;
-	reg_offset = (adapter->num_vfs >= 32) ? 1 : 0;
+	vf_shift = VMDQ_P(0) % 32;
+	reg_offset = (VMDQ_P(0) >= 32) ? 1 : 0;
 
 	/* Enable only the PF's pool for Tx/Rx */
 	IXGBE_WRITE_REG(hw, IXGBE_VFRE(reg_offset), (~0) << vf_shift);
@@ -3150,7 +3150,7 @@ static void ixgbe_configure_virtualization(struct ixgbe_adapter *adapter)
 	IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN);
 
 	/* Map PF MAC address in RAR Entry 0 to first pool following VFs */
-	hw->mac.ops.set_vmdq(hw, 0, adapter->num_vfs);
+	hw->mac.ops.set_vmdq(hw, 0, VMDQ_P(0));
 
 	/*
 	 * Set up VF register offsets for selected VT Mode,
@@ -3310,10 +3310,9 @@ static int ixgbe_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 	struct ixgbe_hw *hw = &adapter->hw;
-	int pool_ndx = adapter->num_vfs;
 
 	/* add VID to filter table */
-	hw->mac.ops.set_vfta(&adapter->hw, vid, pool_ndx, true);
+	hw->mac.ops.set_vfta(&adapter->hw, vid, VMDQ_P(0), true);
 	set_bit(vid, adapter->active_vlans);
 
 	return 0;
@@ -3323,10 +3322,9 @@ static int ixgbe_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 	struct ixgbe_hw *hw = &adapter->hw;
-	int pool_ndx = adapter->num_vfs;
 
 	/* remove VID from filter table */
-	hw->mac.ops.set_vfta(&adapter->hw, vid, pool_ndx, false);
+	hw->mac.ops.set_vfta(&adapter->hw, vid, VMDQ_P(0), false);
 	clear_bit(vid, adapter->active_vlans);
 
 	return 0;
@@ -3444,7 +3442,6 @@ static int ixgbe_write_uc_addr_list(struct net_device *netdev)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 	struct ixgbe_hw *hw = &adapter->hw;
-	unsigned int vfn = adapter->num_vfs;
 	unsigned int rar_entries = IXGBE_MAX_PF_MACVLANS;
 	int count = 0;
 
@@ -3462,7 +3459,7 @@ static int ixgbe_write_uc_addr_list(struct net_device *netdev)
 			if (!rar_entries)
 				break;
 			hw->mac.ops.set_rar(hw, rar_entries--, ha->addr,
-					    vfn, IXGBE_RAH_AV);
+					    VMDQ_P(0), IXGBE_RAH_AV);
 			count++;
 		}
 	}
@@ -3536,12 +3533,14 @@ void ixgbe_set_rx_mode(struct net_device *netdev)
 		vmolr |= IXGBE_VMOLR_ROPE;
 	}
 
-	if (adapter->num_vfs) {
+	if (adapter->num_vfs)
 		ixgbe_restore_vf_multicasts(adapter);
-		vmolr |= IXGBE_READ_REG(hw, IXGBE_VMOLR(adapter->num_vfs)) &
+
+	if (hw->mac.type != ixgbe_mac_82598EB) {
+		vmolr |= IXGBE_READ_REG(hw, IXGBE_VMOLR(VMDQ_P(0))) &
 			 ~(IXGBE_VMOLR_MPE | IXGBE_VMOLR_ROMPE |
 			   IXGBE_VMOLR_ROPE);
-		IXGBE_WRITE_REG(hw, IXGBE_VMOLR(adapter->num_vfs), vmolr);
+		IXGBE_WRITE_REG(hw, IXGBE_VMOLR(VMDQ_P(0)), vmolr);
 	}
 
 	/* This is useful for sniffing bad packets. */
@@ -4120,8 +4119,7 @@ void ixgbe_reset(struct ixgbe_adapter *adapter)
 	clear_bit(__IXGBE_IN_SFP_INIT, &adapter->state);
 
 	/* reprogram the RAR[0] in case user changed it. */
-	hw->mac.ops.set_rar(hw, 0, hw->mac.addr, adapter->num_vfs,
-			    IXGBE_RAH_AV);
+	hw->mac.ops.set_rar(hw, 0, hw->mac.addr, VMDQ_P(0), IXGBE_RAH_AV);
 }
 
 /**
@@ -6445,8 +6443,7 @@ static int ixgbe_set_mac(struct net_device *netdev, void *p)
 	memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
 	memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
 
-	hw->mac.ops.set_rar(hw, 0, hw->mac.addr, adapter->num_vfs,
-			    IXGBE_RAH_AV);
+	hw->mac.ops.set_rar(hw, 0, hw->mac.addr, VMDQ_P(0), IXGBE_RAH_AV);
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
index d285443..c7d831d 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
@@ -225,6 +225,11 @@ void ixgbe_disable_sriov(struct ixgbe_adapter *adapter)
 	IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vmdctl);
 	IXGBE_WRITE_FLUSH(hw);
 
+	/* Disable VMDq flag so device will be set in VM mode */
+	if (adapter->ring_feature[RING_F_VMDQ].limit == 1)
+		adapter->flags &= ~IXGBE_FLAG_VMDQ_ENABLED;
+	adapter->ring_feature[RING_F_VMDQ].offset = 0;
+
 	/* take a breather then clean up driver data */
 	msleep(100);
 
-- 
1.7.10.4

^ permalink raw reply related

* [net-next 2/9] ixgbe: Fix memory leak when SR-IOV VFs are direct assigned
From: Jeff Kirsher @ 2012-07-20  1:23 UTC (permalink / raw)
  To: davem; +Cc: Alexander Duyck, netdev, gospo, sassmann, Jeff Kirsher
In-Reply-To: <1342747446-16132-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Alexander Duyck <alexander.h.duyck@intel.com>

The VF driver had a memory leak that would occur if VFs were assigned to a
guest.  The amount of leak would vary with the number of VFs but could max
out at about 14K per PF.  To reproduce the leak all you would need to do is
enable all the VFs on the first PF.  Then start a loop of loading and
unloading the driver with max_vfs=63 for the first port.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by: Phil Schmitt <phillip.j.schmitt@intel.com>
Tested-by: Sibai Li <sibai.li@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c |   15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
index c7d831d..4f22668 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
@@ -208,6 +208,17 @@ void ixgbe_disable_sriov(struct ixgbe_adapter *adapter)
 	u32 vmdctl;
 	int i;
 
+	/* set num VFs to 0 to prevent access to vfinfo */
+	adapter->num_vfs = 0;
+
+	/* free VF control structures */
+	kfree(adapter->vfinfo);
+	adapter->vfinfo = NULL;
+
+	/* free macvlan list */
+	kfree(adapter->mv_list);
+	adapter->mv_list = NULL;
+
 #ifdef CONFIG_PCI_IOV
 	/* disable iov and allow time for transactions to clear */
 	pci_disable_sriov(adapter->pdev);
@@ -238,11 +249,7 @@ void ixgbe_disable_sriov(struct ixgbe_adapter *adapter)
 		if (adapter->vfinfo[i].vfdev)
 			pci_dev_put(adapter->vfinfo[i].vfdev);
 	}
-	kfree(adapter->vfinfo);
-	kfree(adapter->mv_list);
-	adapter->vfinfo = NULL;
 
-	adapter->num_vfs = 0;
 	adapter->flags &= ~IXGBE_FLAG_SRIOV_ENABLED;
 }
 
-- 
1.7.10.4

^ permalink raw reply related

* [net-next 3/9] ixgbe: Drop references to deprecated pci_ DMA api and instead use dma_ API
From: Jeff Kirsher @ 2012-07-20  1:24 UTC (permalink / raw)
  To: davem; +Cc: Alexander Duyck, netdev, gospo, sassmann, Jeff Kirsher
In-Reply-To: <1342747446-16132-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Alexander Duyck <alexander.h.duyck@intel.com>

The networking side of the code had already been updated to use dma_ calls
instead of the old pci_ calls. However it looks like the FCoE code was
never updated.  This change goes through and moves everything from the pci
APIs to the dma APIs.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by: Phil Schmitt <phillip.j.schmitt@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c |   30 ++++++++++++-------------
 drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.h |    4 ++--
 2 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c
index cc28c44..9b0909f 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c
@@ -104,10 +104,10 @@ int ixgbe_fcoe_ddp_put(struct net_device *netdev, u16 xid)
 			udelay(100);
 	}
 	if (ddp->sgl)
-		pci_unmap_sg(adapter->pdev, ddp->sgl, ddp->sgc,
+		dma_unmap_sg(&adapter->pdev->dev, ddp->sgl, ddp->sgc,
 			     DMA_FROM_DEVICE);
 	if (ddp->pool) {
-		pci_pool_free(ddp->pool, ddp->udl, ddp->udp);
+		dma_pool_free(ddp->pool, ddp->udl, ddp->udp);
 		ddp->pool = NULL;
 	}
 
@@ -144,7 +144,7 @@ static int ixgbe_fcoe_ddp_setup(struct net_device *netdev, u16 xid,
 	unsigned int thislen = 0;
 	u32 fcbuff, fcdmarw, fcfltrw, fcrxctl;
 	dma_addr_t addr = 0;
-	struct pci_pool *pool;
+	struct dma_pool *pool;
 	unsigned int cpu;
 
 	if (!netdev || !sgl)
@@ -176,7 +176,7 @@ static int ixgbe_fcoe_ddp_setup(struct net_device *netdev, u16 xid,
 	ixgbe_fcoe_clear_ddp(ddp);
 
 	/* setup dma from scsi command sgl */
-	dmacount = pci_map_sg(adapter->pdev, sgl, sgc, DMA_FROM_DEVICE);
+	dmacount = dma_map_sg(&adapter->pdev->dev, sgl, sgc, DMA_FROM_DEVICE);
 	if (dmacount == 0) {
 		e_err(drv, "xid 0x%x DMA map error\n", xid);
 		return 0;
@@ -185,7 +185,7 @@ static int ixgbe_fcoe_ddp_setup(struct net_device *netdev, u16 xid,
 	/* alloc the udl from per cpu ddp pool */
 	cpu = get_cpu();
 	pool = *per_cpu_ptr(fcoe->pool, cpu);
-	ddp->udl = pci_pool_alloc(pool, GFP_ATOMIC, &ddp->udp);
+	ddp->udl = dma_pool_alloc(pool, GFP_ATOMIC, &ddp->udp);
 	if (!ddp->udl) {
 		e_err(drv, "failed allocated ddp context\n");
 		goto out_noddp_unmap;
@@ -293,11 +293,11 @@ static int ixgbe_fcoe_ddp_setup(struct net_device *netdev, u16 xid,
 	return 1;
 
 out_noddp_free:
-	pci_pool_free(pool, ddp->udl, ddp->udp);
+	dma_pool_free(pool, ddp->udl, ddp->udp);
 	ixgbe_fcoe_clear_ddp(ddp);
 
 out_noddp_unmap:
-	pci_unmap_sg(adapter->pdev, sgl, sgc, DMA_FROM_DEVICE);
+	dma_unmap_sg(&adapter->pdev->dev, sgl, sgc, DMA_FROM_DEVICE);
 	put_cpu();
 	return 0;
 }
@@ -409,7 +409,7 @@ int ixgbe_fcoe_ddp(struct ixgbe_adapter *adapter,
 		break;
 	/* unmap the sg list when FCPRSP is received */
 	case __constant_cpu_to_le32(IXGBE_RXDADV_STAT_FCSTAT_FCPRSP):
-		pci_unmap_sg(adapter->pdev, ddp->sgl,
+		dma_unmap_sg(&adapter->pdev->dev, ddp->sgl,
 			     ddp->sgc, DMA_FROM_DEVICE);
 		ddp->err = ddp_err;
 		ddp->sgl = NULL;
@@ -566,12 +566,12 @@ int ixgbe_fso(struct ixgbe_ring *tx_ring,
 static void ixgbe_fcoe_ddp_pools_free(struct ixgbe_fcoe *fcoe)
 {
 	unsigned int cpu;
-	struct pci_pool **pool;
+	struct dma_pool **pool;
 
 	for_each_possible_cpu(cpu) {
 		pool = per_cpu_ptr(fcoe->pool, cpu);
 		if (*pool)
-			pci_pool_destroy(*pool);
+			dma_pool_destroy(*pool);
 	}
 	free_percpu(fcoe->pool);
 	fcoe->pool = NULL;
@@ -581,10 +581,10 @@ static void ixgbe_fcoe_ddp_pools_alloc(struct ixgbe_adapter *adapter)
 {
 	struct ixgbe_fcoe *fcoe = &adapter->fcoe;
 	unsigned int cpu;
-	struct pci_pool **pool;
+	struct dma_pool **pool;
 	char pool_name[32];
 
-	fcoe->pool = alloc_percpu(struct pci_pool *);
+	fcoe->pool = alloc_percpu(struct dma_pool *);
 	if (!fcoe->pool)
 		return;
 
@@ -592,9 +592,9 @@ static void ixgbe_fcoe_ddp_pools_alloc(struct ixgbe_adapter *adapter)
 	for_each_possible_cpu(cpu) {
 		snprintf(pool_name, 32, "ixgbe_fcoe_ddp_%d", cpu);
 		pool = per_cpu_ptr(fcoe->pool, cpu);
-		*pool = pci_pool_create(pool_name,
-					adapter->pdev, IXGBE_FCPTR_MAX,
-					IXGBE_FCPTR_ALIGN, PAGE_SIZE);
+		*pool = dma_pool_create(pool_name, &adapter->pdev->dev,
+					IXGBE_FCPTR_MAX, IXGBE_FCPTR_ALIGN,
+					PAGE_SIZE);
 		if (!*pool) {
 			e_err(drv, "failed to alloc DDP pool on cpu:%d\n", cpu);
 			ixgbe_fcoe_ddp_pools_free(fcoe);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.h
index 1dbed17..0ef231a 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.h
@@ -62,11 +62,11 @@ struct ixgbe_fcoe_ddp {
 	struct scatterlist *sgl;
 	dma_addr_t udp;
 	u64 *udl;
-	struct pci_pool *pool;
+	struct dma_pool *pool;
 };
 
 struct ixgbe_fcoe {
-	struct pci_pool **pool;
+	struct dma_pool **pool;
 	atomic_t refcnt;
 	spinlock_t lock;
 	struct ixgbe_fcoe_ddp ddp[IXGBE_FCOE_DDP_MAX];
-- 
1.7.10.4

^ permalink raw reply related

* [net-next 4/9] ixgbe: Cleanup configuration of FCoE registers
From: Jeff Kirsher @ 2012-07-20  1:24 UTC (permalink / raw)
  To: davem; +Cc: Alexander Duyck, netdev, gospo, sassmann, Jeff Kirsher
In-Reply-To: <1342747446-16132-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Alexander Duyck <alexander.h.duyck@intel.com>

This change makes it so we always use the FCoE redirection table.  We just
set all 8 entries to the same value in the case of only having one queue
for FCoE.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by: Phil Schmitt <phillip.j.schmitt@intel.com>
Tested-by: Ross Brattain <ross.b.brattain@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c |   58 +++++++++++++------------
 drivers/net/ethernet/intel/ixgbe/ixgbe_type.h |    1 +
 2 files changed, 32 insertions(+), 27 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c
index 9b0909f..a994570 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c
@@ -618,6 +618,7 @@ void ixgbe_configure_fcoe(struct ixgbe_adapter *adapter)
 	struct ixgbe_fcoe *fcoe = &adapter->fcoe;
 	struct ixgbe_ring_feature *f = &adapter->ring_feature[RING_F_FCOE];
 	unsigned int cpu;
+	u32 etqf;
 
 	if (!fcoe->pool) {
 		spin_lock_init(&fcoe->lock);
@@ -665,40 +666,43 @@ void ixgbe_configure_fcoe(struct ixgbe_adapter *adapter)
 		}
 	}
 
-	/* Enable L2 eth type filter for FCoE */
-	IXGBE_WRITE_REG(hw, IXGBE_ETQF(IXGBE_ETQF_FILTER_FCOE),
-			(ETH_P_FCOE | IXGBE_ETQF_FCOE | IXGBE_ETQF_FILTER_EN));
-	/* Enable L2 eth type filter for FIP */
-	IXGBE_WRITE_REG(hw, IXGBE_ETQF(IXGBE_ETQF_FILTER_FIP),
-			(ETH_P_FIP | IXGBE_ETQF_FILTER_EN));
-	if (adapter->ring_feature[RING_F_FCOE].indices) {
-		/* Use multiple rx queues for FCoE by redirection table */
-		for (i = 0; i < IXGBE_FCRETA_SIZE; i++) {
-			fcoe_i = f->offset + i % f->indices;
-			fcoe_i &= IXGBE_FCRETA_ENTRY_MASK;
-			fcoe_q = adapter->rx_ring[fcoe_i]->reg_idx;
-			IXGBE_WRITE_REG(hw, IXGBE_FCRETA(i), fcoe_q);
-		}
-		IXGBE_WRITE_REG(hw, IXGBE_FCRECTL, IXGBE_FCRECTL_ENA);
-		IXGBE_WRITE_REG(hw, IXGBE_ETQS(IXGBE_ETQF_FILTER_FCOE), 0);
-	} else  {
-		/* Use single rx queue for FCoE */
-		fcoe_i = f->offset;
+	/* Enable L2 EtherType filter for FCoE, necessary for FCoE Rx CRC */
+	etqf = ETH_P_FCOE | IXGBE_ETQF_FCOE | IXGBE_ETQF_FILTER_EN;
+	if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) {
+		etqf |= IXGBE_ETQF_POOL_ENABLE;
+		etqf |= VMDQ_P(0) << IXGBE_ETQF_POOL_SHIFT;
+	}
+	IXGBE_WRITE_REG(hw, IXGBE_ETQF(IXGBE_ETQF_FILTER_FCOE), etqf);
+	IXGBE_WRITE_REG(hw, IXGBE_ETQS(IXGBE_ETQF_FILTER_FCOE), 0);
+
+	/* Use one or more Rx queues for FCoE by redirection table */
+	for (i = 0; i < IXGBE_FCRETA_SIZE; i++) {
+		fcoe_i = f->offset + (i % f->indices);
+		fcoe_i &= IXGBE_FCRETA_ENTRY_MASK;
 		fcoe_q = adapter->rx_ring[fcoe_i]->reg_idx;
-		IXGBE_WRITE_REG(hw, IXGBE_FCRECTL, 0);
-		IXGBE_WRITE_REG(hw, IXGBE_ETQS(IXGBE_ETQF_FILTER_FCOE),
-				IXGBE_ETQS_QUEUE_EN |
-				(fcoe_q << IXGBE_ETQS_RX_QUEUE_SHIFT));
+		IXGBE_WRITE_REG(hw, IXGBE_FCRETA(i), fcoe_q);
 	}
-	/* send FIP frames to the first FCoE queue */
-	fcoe_i = f->offset;
-	fcoe_q = adapter->rx_ring[fcoe_i]->reg_idx;
+	IXGBE_WRITE_REG(hw, IXGBE_FCRECTL, IXGBE_FCRECTL_ENA);
+
+	/* Enable L2 EtherType filter for FIP */
+	etqf = ETH_P_FIP | IXGBE_ETQF_FILTER_EN;
+	if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) {
+		etqf |= IXGBE_ETQF_POOL_ENABLE;
+		etqf |= VMDQ_P(0) << IXGBE_ETQF_POOL_SHIFT;
+	}
+	IXGBE_WRITE_REG(hw, IXGBE_ETQF(IXGBE_ETQF_FILTER_FIP), etqf);
+
+	/* Send FIP frames to the first FCoE queue */
+	fcoe_q = adapter->rx_ring[f->offset]->reg_idx;
 	IXGBE_WRITE_REG(hw, IXGBE_ETQS(IXGBE_ETQF_FILTER_FIP),
 			IXGBE_ETQS_QUEUE_EN |
 			(fcoe_q << IXGBE_ETQS_RX_QUEUE_SHIFT));
 
-	IXGBE_WRITE_REG(hw, IXGBE_FCRXCTRL, IXGBE_FCRXCTRL_FCCRCBO |
+	/* Configure FCoE Rx control */
+	IXGBE_WRITE_REG(hw, IXGBE_FCRXCTRL,
+			IXGBE_FCRXCTRL_FCCRCBO |
 			(FC_FCOE_VER << IXGBE_FCRXCTRL_FCOEVER_SHIFT));
+
 	return;
 out_pcpu_noddp_extra_buff_alloc_fail:
 	free_percpu(fcoe->pcpu_noddp);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
index 7416d22..482a33b 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
@@ -1452,6 +1452,7 @@ enum {
 #define IXGBE_ETQF_1588         0x40000000 /* bit 30 */
 #define IXGBE_ETQF_FILTER_EN    0x80000000 /* bit 31 */
 #define IXGBE_ETQF_POOL_ENABLE   (1 << 26) /* bit 26 */
+#define IXGBE_ETQF_POOL_SHIFT		20
 
 #define IXGBE_ETQS_RX_QUEUE     0x007F0000 /* bits 22:16 */
 #define IXGBE_ETQS_RX_QUEUE_SHIFT       16
-- 
1.7.10.4

^ permalink raw reply related

* [net-next 5/9] ixgbe: Merge all FCoE percpu values into a single structure
From: Jeff Kirsher @ 2012-07-20  1:24 UTC (permalink / raw)
  To: davem; +Cc: Alexander Duyck, netdev, gospo, sassmann, Jeff Kirsher
In-Reply-To: <1342747446-16132-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Alexander Duyck <alexander.h.duyck@intel.com>

This change merges the 2 statistics values for noddp and noddp_ext_buff
and the dma_pool into a single structure that can be allocated per CPU.

The advantages to this are several fold.  First we only need to do one
alloc_percpu call now instead of 3, so that means less overhead for
handling memory allocation failures.  Secondly in the case of
ixgbe_fcoe_ddp_setup we only need to call get_cpu once which makes things a
bit cleaner since we can drop a put_cpu() from the exception path.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by: Phil Schmitt <phillip.j.schmitt@intel.com>
Tested-by: Ross Brattain <ross.b.brattain@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c |  138 ++++++++++++-------------
 drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.h |   11 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c |   23 ++---
 3 files changed, 86 insertions(+), 86 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c
index a994570..e7c463c 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c
@@ -134,6 +134,7 @@ static int ixgbe_fcoe_ddp_setup(struct net_device *netdev, u16 xid,
 	struct ixgbe_hw *hw;
 	struct ixgbe_fcoe *fcoe;
 	struct ixgbe_fcoe_ddp *ddp;
+	struct ixgbe_fcoe_ddp_pool *ddp_pool;
 	struct scatterlist *sg;
 	unsigned int i, j, dmacount;
 	unsigned int len;
@@ -144,8 +145,6 @@ static int ixgbe_fcoe_ddp_setup(struct net_device *netdev, u16 xid,
 	unsigned int thislen = 0;
 	u32 fcbuff, fcdmarw, fcfltrw, fcrxctl;
 	dma_addr_t addr = 0;
-	struct dma_pool *pool;
-	unsigned int cpu;
 
 	if (!netdev || !sgl)
 		return 0;
@@ -162,11 +161,6 @@ static int ixgbe_fcoe_ddp_setup(struct net_device *netdev, u16 xid,
 		return 0;
 
 	fcoe = &adapter->fcoe;
-	if (!fcoe->pool) {
-		e_warn(drv, "xid=0x%x no ddp pool for fcoe\n", xid);
-		return 0;
-	}
-
 	ddp = &fcoe->ddp[xid];
 	if (ddp->sgl) {
 		e_err(drv, "xid 0x%x w/ non-null sgl=%p nents=%d\n",
@@ -175,22 +169,32 @@ static int ixgbe_fcoe_ddp_setup(struct net_device *netdev, u16 xid,
 	}
 	ixgbe_fcoe_clear_ddp(ddp);
 
+
+	if (!fcoe->ddp_pool) {
+		e_warn(drv, "No ddp_pool resources allocated\n");
+		return 0;
+	}
+
+	ddp_pool = per_cpu_ptr(fcoe->ddp_pool, get_cpu());
+	if (!ddp_pool->pool) {
+		e_warn(drv, "xid=0x%x no ddp pool for fcoe\n", xid);
+		goto out_noddp;
+	}
+
 	/* setup dma from scsi command sgl */
 	dmacount = dma_map_sg(&adapter->pdev->dev, sgl, sgc, DMA_FROM_DEVICE);
 	if (dmacount == 0) {
 		e_err(drv, "xid 0x%x DMA map error\n", xid);
-		return 0;
+		goto out_noddp;
 	}
 
 	/* alloc the udl from per cpu ddp pool */
-	cpu = get_cpu();
-	pool = *per_cpu_ptr(fcoe->pool, cpu);
-	ddp->udl = dma_pool_alloc(pool, GFP_ATOMIC, &ddp->udp);
+	ddp->udl = dma_pool_alloc(ddp_pool->pool, GFP_ATOMIC, &ddp->udp);
 	if (!ddp->udl) {
 		e_err(drv, "failed allocated ddp context\n");
 		goto out_noddp_unmap;
 	}
-	ddp->pool = pool;
+	ddp->pool = ddp_pool->pool;
 	ddp->sgl = sgl;
 	ddp->sgc = sgc;
 
@@ -201,7 +205,7 @@ static int ixgbe_fcoe_ddp_setup(struct net_device *netdev, u16 xid,
 		while (len) {
 			/* max number of buffers allowed in one DDP context */
 			if (j >= IXGBE_BUFFCNT_MAX) {
-				*per_cpu_ptr(fcoe->pcpu_noddp, cpu) += 1;
+				ddp_pool->noddp++;
 				goto out_noddp_free;
 			}
 
@@ -241,7 +245,7 @@ static int ixgbe_fcoe_ddp_setup(struct net_device *netdev, u16 xid,
 	 */
 	if (lastsize == bufflen) {
 		if (j >= IXGBE_BUFFCNT_MAX) {
-			*per_cpu_ptr(fcoe->pcpu_noddp_ext_buff, cpu) += 1;
+			ddp_pool->noddp_ext_buff++;
 			goto out_noddp_free;
 		}
 
@@ -293,11 +297,12 @@ static int ixgbe_fcoe_ddp_setup(struct net_device *netdev, u16 xid,
 	return 1;
 
 out_noddp_free:
-	dma_pool_free(pool, ddp->udl, ddp->udp);
+	dma_pool_free(ddp->pool, ddp->udl, ddp->udp);
 	ixgbe_fcoe_clear_ddp(ddp);
 
 out_noddp_unmap:
 	dma_unmap_sg(&adapter->pdev->dev, sgl, sgc, DMA_FROM_DEVICE);
+out_noddp:
 	put_cpu();
 	return 0;
 }
@@ -563,44 +568,63 @@ int ixgbe_fso(struct ixgbe_ring *tx_ring,
 	return 0;
 }
 
+static void ixgbe_fcoe_dma_pool_free(struct ixgbe_fcoe *fcoe, unsigned int cpu)
+{
+	struct ixgbe_fcoe_ddp_pool *ddp_pool;
+
+	ddp_pool = per_cpu_ptr(fcoe->ddp_pool, cpu);
+	if (ddp_pool->pool)
+		dma_pool_destroy(ddp_pool->pool);
+	ddp_pool->pool = NULL;
+}
+
 static void ixgbe_fcoe_ddp_pools_free(struct ixgbe_fcoe *fcoe)
 {
 	unsigned int cpu;
-	struct dma_pool **pool;
 
-	for_each_possible_cpu(cpu) {
-		pool = per_cpu_ptr(fcoe->pool, cpu);
-		if (*pool)
-			dma_pool_destroy(*pool);
-	}
-	free_percpu(fcoe->pool);
-	fcoe->pool = NULL;
+	for_each_possible_cpu(cpu)
+		ixgbe_fcoe_dma_pool_free(fcoe, cpu);
+
+	free_percpu(fcoe->ddp_pool);
+	fcoe->ddp_pool = NULL;
+}
+
+static int ixgbe_fcoe_dma_pool_alloc(struct ixgbe_fcoe *fcoe,
+				     struct device *dev,
+				     unsigned int cpu)
+{
+	struct ixgbe_fcoe_ddp_pool *ddp_pool;
+	struct dma_pool *pool;
+	char pool_name[32];
+
+	snprintf(pool_name, 32, "ixgbe_fcoe_ddp_%d", cpu);
+
+	pool = dma_pool_create(pool_name, dev, IXGBE_FCPTR_MAX,
+			       IXGBE_FCPTR_ALIGN, PAGE_SIZE);
+	if (!pool)
+		return -ENOMEM;
+
+	ddp_pool = per_cpu_ptr(fcoe->ddp_pool, cpu);
+	ddp_pool->pool = pool;
+	ddp_pool->noddp = 0;
+	ddp_pool->noddp_ext_buff = 0;
+
+	return 0;
 }
 
 static void ixgbe_fcoe_ddp_pools_alloc(struct ixgbe_adapter *adapter)
 {
 	struct ixgbe_fcoe *fcoe = &adapter->fcoe;
+	struct device *dev = &adapter->pdev->dev;
 	unsigned int cpu;
-	struct dma_pool **pool;
-	char pool_name[32];
 
-	fcoe->pool = alloc_percpu(struct dma_pool *);
-	if (!fcoe->pool)
+	fcoe->ddp_pool = alloc_percpu(struct ixgbe_fcoe_ddp_pool);
+	if (!fcoe->ddp_pool)
 		return;
 
 	/* allocate pci pool for each cpu */
-	for_each_possible_cpu(cpu) {
-		snprintf(pool_name, 32, "ixgbe_fcoe_ddp_%d", cpu);
-		pool = per_cpu_ptr(fcoe->pool, cpu);
-		*pool = dma_pool_create(pool_name, &adapter->pdev->dev,
-					IXGBE_FCPTR_MAX, IXGBE_FCPTR_ALIGN,
-					PAGE_SIZE);
-		if (!*pool) {
-			e_err(drv, "failed to alloc DDP pool on cpu:%d\n", cpu);
-			ixgbe_fcoe_ddp_pools_free(fcoe);
-			return;
-		}
-	}
+	for_each_possible_cpu(cpu)
+		ixgbe_fcoe_dma_pool_alloc(fcoe, dev, cpu);
 }
 
 /**
@@ -617,14 +641,13 @@ void ixgbe_configure_fcoe(struct ixgbe_adapter *adapter)
 	struct ixgbe_hw *hw = &adapter->hw;
 	struct ixgbe_fcoe *fcoe = &adapter->fcoe;
 	struct ixgbe_ring_feature *f = &adapter->ring_feature[RING_F_FCOE];
-	unsigned int cpu;
 	u32 etqf;
 
-	if (!fcoe->pool) {
+	if (!fcoe->ddp_pool) {
 		spin_lock_init(&fcoe->lock);
 
 		ixgbe_fcoe_ddp_pools_alloc(adapter);
-		if (!fcoe->pool) {
+		if (!fcoe->ddp_pool) {
 			e_err(drv, "failed to alloc percpu fcoe DDP pools\n");
 			return;
 		}
@@ -646,24 +669,6 @@ void ixgbe_configure_fcoe(struct ixgbe_adapter *adapter)
 			e_err(drv, "failed to map extra DDP buffer\n");
 			goto out_extra_ddp_buffer;
 		}
-
-		/* Alloc per cpu mem to count the ddp alloc failure number */
-		fcoe->pcpu_noddp = alloc_percpu(u64);
-		if (!fcoe->pcpu_noddp) {
-			e_err(drv, "failed to alloc noddp counter\n");
-			goto out_pcpu_noddp_alloc_fail;
-		}
-
-		fcoe->pcpu_noddp_ext_buff = alloc_percpu(u64);
-		if (!fcoe->pcpu_noddp_ext_buff) {
-			e_err(drv, "failed to alloc noddp extra buff cnt\n");
-			goto out_pcpu_noddp_extra_buff_alloc_fail;
-		}
-
-		for_each_possible_cpu(cpu) {
-			*per_cpu_ptr(fcoe->pcpu_noddp, cpu) = 0;
-			*per_cpu_ptr(fcoe->pcpu_noddp_ext_buff, cpu) = 0;
-		}
 	}
 
 	/* Enable L2 EtherType filter for FCoE, necessary for FCoE Rx CRC */
@@ -704,13 +709,6 @@ void ixgbe_configure_fcoe(struct ixgbe_adapter *adapter)
 			(FC_FCOE_VER << IXGBE_FCRXCTRL_FCOEVER_SHIFT));
 
 	return;
-out_pcpu_noddp_extra_buff_alloc_fail:
-	free_percpu(fcoe->pcpu_noddp);
-out_pcpu_noddp_alloc_fail:
-	dma_unmap_single(&adapter->pdev->dev,
-			 fcoe->extra_ddp_buffer_dma,
-			 IXGBE_FCBUFF_MIN,
-			 DMA_FROM_DEVICE);
 out_extra_ddp_buffer:
 	kfree(fcoe->extra_ddp_buffer);
 out_ddp_pools:
@@ -730,18 +728,18 @@ void ixgbe_cleanup_fcoe(struct ixgbe_adapter *adapter)
 	int i;
 	struct ixgbe_fcoe *fcoe = &adapter->fcoe;
 
-	if (!fcoe->pool)
+	if (!fcoe->ddp_pool)
 		return;
 
 	for (i = 0; i < IXGBE_FCOE_DDP_MAX; i++)
 		ixgbe_fcoe_ddp_put(adapter->netdev, i);
+
 	dma_unmap_single(&adapter->pdev->dev,
 			 fcoe->extra_ddp_buffer_dma,
 			 IXGBE_FCBUFF_MIN,
 			 DMA_FROM_DEVICE);
-	free_percpu(fcoe->pcpu_noddp);
-	free_percpu(fcoe->pcpu_noddp_ext_buff);
 	kfree(fcoe->extra_ddp_buffer);
+
 	ixgbe_fcoe_ddp_pools_free(fcoe);
 }
 
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.h
index 0ef231a..5d02873 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.h
@@ -65,16 +65,21 @@ struct ixgbe_fcoe_ddp {
 	struct dma_pool *pool;
 };
 
+/* per cpu variables */
+struct ixgbe_fcoe_ddp_pool {
+	struct dma_pool *pool;
+	u64 noddp;
+	u64 noddp_ext_buff;
+};
+
 struct ixgbe_fcoe {
-	struct dma_pool **pool;
+	struct ixgbe_fcoe_ddp_pool __percpu *ddp_pool;
 	atomic_t refcnt;
 	spinlock_t lock;
 	struct ixgbe_fcoe_ddp ddp[IXGBE_FCOE_DDP_MAX];
 	unsigned char *extra_ddp_buffer;
 	dma_addr_t extra_ddp_buffer_dma;
 	unsigned long mode;
-	u64 __percpu *pcpu_noddp;
-	u64 __percpu *pcpu_noddp_ext_buff;
 #ifdef CONFIG_IXGBE_DCB
 	u8 up;
 #endif
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index f110e88..c666259 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -5052,11 +5052,6 @@ void ixgbe_update_stats(struct ixgbe_adapter *adapter)
 	u64 non_eop_descs = 0, restart_queue = 0, tx_busy = 0;
 	u64 alloc_rx_page_failed = 0, alloc_rx_buff_failed = 0;
 	u64 bytes = 0, packets = 0, hw_csum_rx_error = 0;
-#ifdef IXGBE_FCOE
-	struct ixgbe_fcoe *fcoe = &adapter->fcoe;
-	unsigned int cpu;
-	u64 fcoe_noddp_counts_sum = 0, fcoe_noddp_ext_buff_counts_sum = 0;
-#endif /* IXGBE_FCOE */
 
 	if (test_bit(__IXGBE_DOWN, &adapter->state) ||
 	    test_bit(__IXGBE_RESETTING, &adapter->state))
@@ -5187,17 +5182,19 @@ void ixgbe_update_stats(struct ixgbe_adapter *adapter)
 		hwstats->fcoedwrc += IXGBE_READ_REG(hw, IXGBE_FCOEDWRC);
 		hwstats->fcoedwtc += IXGBE_READ_REG(hw, IXGBE_FCOEDWTC);
 		/* Add up per cpu counters for total ddp aloc fail */
-		if (fcoe->pcpu_noddp && fcoe->pcpu_noddp_ext_buff) {
+		if (adapter->fcoe.ddp_pool) {
+			struct ixgbe_fcoe *fcoe = &adapter->fcoe;
+			struct ixgbe_fcoe_ddp_pool *ddp_pool;
+			unsigned int cpu;
+			u64 noddp = 0, noddp_ext_buff = 0;
 			for_each_possible_cpu(cpu) {
-				fcoe_noddp_counts_sum +=
-					*per_cpu_ptr(fcoe->pcpu_noddp, cpu);
-				fcoe_noddp_ext_buff_counts_sum +=
-					*per_cpu_ptr(fcoe->
-						pcpu_noddp_ext_buff, cpu);
+				ddp_pool = per_cpu_ptr(fcoe->ddp_pool, cpu);
+				noddp += ddp_pool->noddp;
+				noddp_ext_buff += ddp_pool->noddp_ext_buff;
 			}
+			hwstats->fcoe_noddp = noddp;
+			hwstats->fcoe_noddp_ext_buff = noddp_ext_buff;
 		}
-		hwstats->fcoe_noddp = fcoe_noddp_counts_sum;
-		hwstats->fcoe_noddp_ext_buff = fcoe_noddp_ext_buff_counts_sum;
 #endif /* IXGBE_FCOE */
 		break;
 	default:
-- 
1.7.10.4

^ permalink raw reply related

* [net-next 8/9] ixgbe: Only enable anti-spoof on VF pools
From: Jeff Kirsher @ 2012-07-20  1:24 UTC (permalink / raw)
  To: davem; +Cc: Alexander Duyck, netdev, gospo, sassmann, Jeff Kirsher
In-Reply-To: <1342747446-16132-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Alexander Duyck <alexander.h.duyck@intel.com>

The current logic is enabling anti-spoof on all pools and then clearing
anti-spoof on just the first PF pool.  The correct approach is to only set
anti-spoof on the VF pools and to leave all of the PF pools unchecked.

This allows for items such as FCoE to use adjacent pools within the PF for
transmit and receive queues without the traffic being blocked by this
security feature.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by: Phil Schmitt <phillip.j.schmitt@intel.com>
Tested-by: Sibai Li <sibai.li@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_common.c |   20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
index bbe9d45..90e41db 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
@@ -3225,20 +3225,22 @@ void ixgbe_set_mac_anti_spoofing(struct ixgbe_hw *hw, bool enable, int pf)
 	 * PFVFSPOOF register array is size 8 with 8 bits assigned to
 	 * MAC anti-spoof enables in each register array element.
 	 */
-	for (j = 0; j < IXGBE_PFVFSPOOF_REG_COUNT; j++)
+	for (j = 0; j < pf_target_reg; j++)
 		IXGBE_WRITE_REG(hw, IXGBE_PFVFSPOOF(j), pfvfspoof);
 
-	/* If not enabling anti-spoofing then done */
-	if (!enable)
-		return;
-
 	/*
 	 * The PF should be allowed to spoof so that it can support
-	 * emulation mode NICs.  Reset the bit assigned to the PF
+	 * emulation mode NICs.  Do not set the bits assigned to the PF
+	 */
+	pfvfspoof &= (1 << pf_target_shift) - 1;
+	IXGBE_WRITE_REG(hw, IXGBE_PFVFSPOOF(j), pfvfspoof);
+
+	/*
+	 * Remaining pools belong to the PF so they do not need to have
+	 * anti-spoofing enabled.
 	 */
-	pfvfspoof = IXGBE_READ_REG(hw, IXGBE_PFVFSPOOF(pf_target_reg));
-	pfvfspoof ^= (1 << pf_target_shift);
-	IXGBE_WRITE_REG(hw, IXGBE_PFVFSPOOF(pf_target_reg), pfvfspoof);
+	for (j++; j < IXGBE_PFVFSPOOF_REG_COUNT; j++)
+		IXGBE_WRITE_REG(hw, IXGBE_PFVFSPOOF(j), 0);
 }
 
 /**
-- 
1.7.10.4

^ permalink raw reply related

* [net-next 7/9] ixgbe: Correctly set SAN MAC RAR pool to default pool of PF
From: Jeff Kirsher @ 2012-07-20  1:24 UTC (permalink / raw)
  To: davem; +Cc: Alexander Duyck, netdev, gospo, sassmann, Jeff Kirsher
In-Reply-To: <1342747446-16132-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Alexander Duyck <alexander.h.duyck@intel.com>

This change corrects an issue in which an FCoE enabled adapter was always
setting the FCoE SAN MAC MPSAR register to 0x1.  This results in the first
VF being assigned the SAN MAC address in the case of SR-IOV and as such is
incorrect.  To resolve this I am adding a new function that will update the
SAN MAC pool address after reset.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by: Phil Schmitt <phillip.j.schmitt@intel.com>
Tested-by: Ross Brattain <ross.b.brattain@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c  |    4 ++++
 drivers/net/ethernet/intel/ixgbe/ixgbe_common.c |   25 +++++++++++++++++++++++
 drivers/net/ethernet/intel/ixgbe/ixgbe_common.h |    1 +
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c   |   13 +++++++++---
 drivers/net/ethernet/intel/ixgbe/ixgbe_type.h   |    2 ++
 drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c   |    4 ++++
 6 files changed, 46 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c
index e7dddfd..50fc137 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c
@@ -1025,6 +1025,9 @@ mac_reset_top:
 		hw->mac.ops.set_rar(hw, hw->mac.num_rar_entries - 1,
 		                    hw->mac.san_addr, 0, IXGBE_RAH_AV);
 
+		/* Save the SAN MAC RAR index */
+		hw->mac.san_mac_rar_index = hw->mac.num_rar_entries - 1;
+
 		/* Reserve the last RAR for the SAN MAC address */
 		hw->mac.num_rar_entries--;
 	}
@@ -2106,6 +2109,7 @@ static struct ixgbe_mac_operations mac_ops_82599 = {
 	.set_rar                = &ixgbe_set_rar_generic,
 	.clear_rar              = &ixgbe_clear_rar_generic,
 	.set_vmdq               = &ixgbe_set_vmdq_generic,
+	.set_vmdq_san_mac	= &ixgbe_set_vmdq_san_mac_generic,
 	.clear_vmdq             = &ixgbe_clear_vmdq_generic,
 	.init_rx_addrs          = &ixgbe_init_rx_addrs_generic,
 	.update_mc_addr_list    = &ixgbe_update_mc_addr_list_generic,
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
index bb7fde4..bbe9d45 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
@@ -2848,6 +2848,31 @@ s32 ixgbe_set_vmdq_generic(struct ixgbe_hw *hw, u32 rar, u32 vmdq)
 }
 
 /**
+ *  This function should only be involved in the IOV mode.
+ *  In IOV mode, Default pool is next pool after the number of
+ *  VFs advertized and not 0.
+ *  MPSAR table needs to be updated for SAN_MAC RAR [hw->mac.san_mac_rar_index]
+ *
+ *  ixgbe_set_vmdq_san_mac - Associate default VMDq pool index with a rx address
+ *  @hw: pointer to hardware struct
+ *  @vmdq: VMDq pool index
+ **/
+s32 ixgbe_set_vmdq_san_mac_generic(struct ixgbe_hw *hw, u32 vmdq)
+{
+	u32 rar = hw->mac.san_mac_rar_index;
+
+	if (vmdq < 32) {
+		IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(rar), 1 << vmdq);
+		IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(rar), 0);
+	} else {
+		IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(rar), 0);
+		IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(rar), 1 << (vmdq - 32));
+	}
+
+	return 0;
+}
+
+/**
  *  ixgbe_init_uta_tables_generic - Initialize the Unicast Table Array
  *  @hw: pointer to hardware structure
  **/
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h
index 6222fdb..d813d11 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h
@@ -85,6 +85,7 @@ s32 ixgbe_acquire_swfw_sync(struct ixgbe_hw *hw, u16 mask);
 void ixgbe_release_swfw_sync(struct ixgbe_hw *hw, u16 mask);
 s32 ixgbe_get_san_mac_addr_generic(struct ixgbe_hw *hw, u8 *san_mac_addr);
 s32 ixgbe_set_vmdq_generic(struct ixgbe_hw *hw, u32 rar, u32 vmdq);
+s32 ixgbe_set_vmdq_san_mac_generic(struct ixgbe_hw *hw, u32 vmdq);
 s32 ixgbe_clear_vmdq_generic(struct ixgbe_hw *hw, u32 rar, u32 vmdq);
 s32 ixgbe_init_uta_tables_generic(struct ixgbe_hw *hw);
 s32 ixgbe_set_vfta_generic(struct ixgbe_hw *hw, u32 vlan,
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index e006c05..aa01558 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -4119,6 +4119,10 @@ void ixgbe_reset(struct ixgbe_adapter *adapter)
 
 	/* reprogram the RAR[0] in case user changed it. */
 	hw->mac.ops.set_rar(hw, 0, hw->mac.addr, VMDQ_P(0), IXGBE_RAH_AV);
+
+	/* update SAN MAC vmdq pool selection */
+	if (hw->mac.san_mac_rar_index)
+		hw->mac.ops.set_vmdq_san_mac(hw, VMDQ_P(0));
 }
 
 /**
@@ -6509,12 +6513,15 @@ static int ixgbe_add_sanmac_netdev(struct net_device *dev)
 {
 	int err = 0;
 	struct ixgbe_adapter *adapter = netdev_priv(dev);
-	struct ixgbe_mac_info *mac = &adapter->hw.mac;
+	struct ixgbe_hw *hw = &adapter->hw;
 
-	if (is_valid_ether_addr(mac->san_addr)) {
+	if (is_valid_ether_addr(hw->mac.san_addr)) {
 		rtnl_lock();
-		err = dev_addr_add(dev, mac->san_addr, NETDEV_HW_ADDR_T_SAN);
+		err = dev_addr_add(dev, hw->mac.san_addr, NETDEV_HW_ADDR_T_SAN);
 		rtnl_unlock();
+
+		/* update SAN MAC vmdq pool selection */
+		hw->mac.ops.set_vmdq_san_mac(hw, VMDQ_P(0));
 	}
 	return err;
 }
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
index 482a33b..d26e07a 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
@@ -2847,6 +2847,7 @@ struct ixgbe_mac_operations {
 	s32 (*set_rar)(struct ixgbe_hw *, u32, u8 *, u32, u32);
 	s32 (*clear_rar)(struct ixgbe_hw *, u32);
 	s32 (*set_vmdq)(struct ixgbe_hw *, u32, u32);
+	s32 (*set_vmdq_san_mac)(struct ixgbe_hw *, u32);
 	s32 (*clear_vmdq)(struct ixgbe_hw *, u32, u32);
 	s32 (*init_rx_addrs)(struct ixgbe_hw *);
 	s32 (*update_mc_addr_list)(struct ixgbe_hw *, struct net_device *);
@@ -2922,6 +2923,7 @@ struct ixgbe_mac_info {
 	bool                            orig_link_settings_stored;
 	bool                            autotry_restart;
 	u8                              flags;
+	u8				san_mac_rar_index;
 	struct ixgbe_thermal_sensor_data  thermal_sensor_data;
 };
 
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c
index f90ec07..de4da52 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c
@@ -156,6 +156,9 @@ mac_reset_top:
 		hw->mac.ops.set_rar(hw, hw->mac.num_rar_entries - 1,
 		                    hw->mac.san_addr, 0, IXGBE_RAH_AV);
 
+		/* Save the SAN MAC RAR index */
+		hw->mac.san_mac_rar_index = hw->mac.num_rar_entries - 1;
+
 		/* Reserve the last RAR for the SAN MAC address */
 		hw->mac.num_rar_entries--;
 	}
@@ -832,6 +835,7 @@ static struct ixgbe_mac_operations mac_ops_X540 = {
 	.set_rar                = &ixgbe_set_rar_generic,
 	.clear_rar              = &ixgbe_clear_rar_generic,
 	.set_vmdq               = &ixgbe_set_vmdq_generic,
+	.set_vmdq_san_mac	= &ixgbe_set_vmdq_san_mac_generic,
 	.clear_vmdq             = &ixgbe_clear_vmdq_generic,
 	.init_rx_addrs          = &ixgbe_init_rx_addrs_generic,
 	.update_mc_addr_list    = &ixgbe_update_mc_addr_list_generic,
-- 
1.7.10.4

^ permalink raw reply related

* [net-next 6/9] ixgbe: Make FCoE allocation and configuration closer to how rings work
From: Jeff Kirsher @ 2012-07-20  1:24 UTC (permalink / raw)
  To: davem; +Cc: Alexander Duyck, netdev, gospo, sassmann, Jeff Kirsher
In-Reply-To: <1342747446-16132-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Alexander Duyck <alexander.h.duyck@intel.com>

This patch changes the behavior of the FCoE configuration so that it is
much closer to how the main body of the ixgbe driver works for ring
allocation.

The first piece is the ixgbe_fcoe_ddp_enable/disable calls.  These allocate
the percpu values and if successful set the fcoe_ddp_xid value indicating
that we can support DDP.

The next piece is the ixgbe_setup/free_ddp_resources calls.  These are
called on open/close and will allocate and free the DMA pools.

Finally ixgbe_configure_fcoe is now just register configuration.  It can go
through and enable the registers for the FCoE redirection offload, and FIP
configuration without any interference from the DDP pool allocation.

The net result of all this is two fold.  First it adds a certain amount of
exception handling.  So for example if ixgbe_setup_fcoe_resources fails we
will actually generate an error in open and refuse to bring up the
interface.

Secondly it provides a much more graceful failure case than the previous
model which would skip setting up the registers for FCoE on failure to
allocate DDP resources leaving no Rx functionality enabled instead of just
disabling DDP.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by: Phil Schmitt <phillip.j.schmitt@intel.com>
Tested-by: Ross Brattain <ross.b.brattain@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe.h      |    3 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c |  228 ++++++++++++++-----------
 drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.h |    2 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c |   43 +++--
 4 files changed, 154 insertions(+), 122 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index c236500..5a286ad 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -691,7 +691,6 @@ extern void ixgbe_configure_fcoe(struct ixgbe_adapter *adapter);
 extern int ixgbe_fso(struct ixgbe_ring *tx_ring,
 		     struct ixgbe_tx_buffer *first,
 		     u8 *hdr_len);
-extern void ixgbe_cleanup_fcoe(struct ixgbe_adapter *adapter);
 extern int ixgbe_fcoe_ddp(struct ixgbe_adapter *adapter,
 			  union ixgbe_adv_rx_desc *rx_desc,
 			  struct sk_buff *skb);
@@ -700,6 +699,8 @@ extern int ixgbe_fcoe_ddp_get(struct net_device *netdev, u16 xid,
 extern int ixgbe_fcoe_ddp_target(struct net_device *netdev, u16 xid,
 				 struct scatterlist *sgl, unsigned int sgc);
 extern int ixgbe_fcoe_ddp_put(struct net_device *netdev, u16 xid);
+extern int ixgbe_setup_fcoe_ddp_resources(struct ixgbe_adapter *adapter);
+extern void ixgbe_free_fcoe_ddp_resources(struct ixgbe_adapter *adapter);
 extern int ixgbe_fcoe_enable(struct net_device *netdev);
 extern int ixgbe_fcoe_disable(struct net_device *netdev);
 #ifdef CONFIG_IXGBE_DCB
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c
index e7c463c..e79ba39 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c
@@ -578,17 +578,6 @@ static void ixgbe_fcoe_dma_pool_free(struct ixgbe_fcoe *fcoe, unsigned int cpu)
 	ddp_pool->pool = NULL;
 }
 
-static void ixgbe_fcoe_ddp_pools_free(struct ixgbe_fcoe *fcoe)
-{
-	unsigned int cpu;
-
-	for_each_possible_cpu(cpu)
-		ixgbe_fcoe_dma_pool_free(fcoe, cpu);
-
-	free_percpu(fcoe->ddp_pool);
-	fcoe->ddp_pool = NULL;
-}
-
 static int ixgbe_fcoe_dma_pool_alloc(struct ixgbe_fcoe *fcoe,
 				     struct device *dev,
 				     unsigned int cpu)
@@ -612,21 +601,6 @@ static int ixgbe_fcoe_dma_pool_alloc(struct ixgbe_fcoe *fcoe,
 	return 0;
 }
 
-static void ixgbe_fcoe_ddp_pools_alloc(struct ixgbe_adapter *adapter)
-{
-	struct ixgbe_fcoe *fcoe = &adapter->fcoe;
-	struct device *dev = &adapter->pdev->dev;
-	unsigned int cpu;
-
-	fcoe->ddp_pool = alloc_percpu(struct ixgbe_fcoe_ddp_pool);
-	if (!fcoe->ddp_pool)
-		return;
-
-	/* allocate pci pool for each cpu */
-	for_each_possible_cpu(cpu)
-		ixgbe_fcoe_dma_pool_alloc(fcoe, dev, cpu);
-}
-
 /**
  * ixgbe_configure_fcoe - configures registers for fcoe at start
  * @adapter: ptr to ixgbe adapter
@@ -637,39 +611,14 @@ static void ixgbe_fcoe_ddp_pools_alloc(struct ixgbe_adapter *adapter)
  */
 void ixgbe_configure_fcoe(struct ixgbe_adapter *adapter)
 {
-	int i, fcoe_q, fcoe_i;
+	struct ixgbe_ring_feature *fcoe = &adapter->ring_feature[RING_F_FCOE];
 	struct ixgbe_hw *hw = &adapter->hw;
-	struct ixgbe_fcoe *fcoe = &adapter->fcoe;
-	struct ixgbe_ring_feature *f = &adapter->ring_feature[RING_F_FCOE];
+	int i, fcoe_q, fcoe_i;
 	u32 etqf;
 
-	if (!fcoe->ddp_pool) {
-		spin_lock_init(&fcoe->lock);
-
-		ixgbe_fcoe_ddp_pools_alloc(adapter);
-		if (!fcoe->ddp_pool) {
-			e_err(drv, "failed to alloc percpu fcoe DDP pools\n");
-			return;
-		}
-
-		/* Extra buffer to be shared by all DDPs for HW work around */
-		fcoe->extra_ddp_buffer = kmalloc(IXGBE_FCBUFF_MIN, GFP_ATOMIC);
-		if (fcoe->extra_ddp_buffer == NULL) {
-			e_err(drv, "failed to allocated extra DDP buffer\n");
-			goto out_ddp_pools;
-		}
-
-		fcoe->extra_ddp_buffer_dma =
-			dma_map_single(&adapter->pdev->dev,
-				       fcoe->extra_ddp_buffer,
-				       IXGBE_FCBUFF_MIN,
-				       DMA_FROM_DEVICE);
-		if (dma_mapping_error(&adapter->pdev->dev,
-				      fcoe->extra_ddp_buffer_dma)) {
-			e_err(drv, "failed to map extra DDP buffer\n");
-			goto out_extra_ddp_buffer;
-		}
-	}
+	/* leave registers unconfigued if FCoE is disabled */
+	if (!(adapter->flags & IXGBE_FLAG_FCOE_ENABLED))
+		return;
 
 	/* Enable L2 EtherType filter for FCoE, necessary for FCoE Rx CRC */
 	etqf = ETH_P_FCOE | IXGBE_ETQF_FCOE | IXGBE_ETQF_FILTER_EN;
@@ -682,7 +631,7 @@ void ixgbe_configure_fcoe(struct ixgbe_adapter *adapter)
 
 	/* Use one or more Rx queues for FCoE by redirection table */
 	for (i = 0; i < IXGBE_FCRETA_SIZE; i++) {
-		fcoe_i = f->offset + (i % f->indices);
+		fcoe_i = fcoe->offset + (i % fcoe->indices);
 		fcoe_i &= IXGBE_FCRETA_ENTRY_MASK;
 		fcoe_q = adapter->rx_ring[fcoe_i]->reg_idx;
 		IXGBE_WRITE_REG(hw, IXGBE_FCRETA(i), fcoe_q);
@@ -698,7 +647,7 @@ void ixgbe_configure_fcoe(struct ixgbe_adapter *adapter)
 	IXGBE_WRITE_REG(hw, IXGBE_ETQF(IXGBE_ETQF_FILTER_FIP), etqf);
 
 	/* Send FIP frames to the first FCoE queue */
-	fcoe_q = adapter->rx_ring[f->offset]->reg_idx;
+	fcoe_q = adapter->rx_ring[fcoe->offset]->reg_idx;
 	IXGBE_WRITE_REG(hw, IXGBE_ETQS(IXGBE_ETQF_FILTER_FIP),
 			IXGBE_ETQS_QUEUE_EN |
 			(fcoe_q << IXGBE_ETQS_RX_QUEUE_SHIFT));
@@ -707,40 +656,122 @@ void ixgbe_configure_fcoe(struct ixgbe_adapter *adapter)
 	IXGBE_WRITE_REG(hw, IXGBE_FCRXCTRL,
 			IXGBE_FCRXCTRL_FCCRCBO |
 			(FC_FCOE_VER << IXGBE_FCRXCTRL_FCOEVER_SHIFT));
-
-	return;
-out_extra_ddp_buffer:
-	kfree(fcoe->extra_ddp_buffer);
-out_ddp_pools:
-	ixgbe_fcoe_ddp_pools_free(fcoe);
 }
 
 /**
- * ixgbe_cleanup_fcoe - release all fcoe ddp context resources
+ * ixgbe_free_fcoe_ddp_resources - release all fcoe ddp context resources
  * @adapter : ixgbe adapter
  *
  * Cleans up outstanding ddp context resources
  *
  * Returns : none
  */
-void ixgbe_cleanup_fcoe(struct ixgbe_adapter *adapter)
+void ixgbe_free_fcoe_ddp_resources(struct ixgbe_adapter *adapter)
 {
-	int i;
 	struct ixgbe_fcoe *fcoe = &adapter->fcoe;
+	int cpu, i;
 
+	/* do nothing if no DDP pools were allocated */
 	if (!fcoe->ddp_pool)
 		return;
 
 	for (i = 0; i < IXGBE_FCOE_DDP_MAX; i++)
 		ixgbe_fcoe_ddp_put(adapter->netdev, i);
 
+	for_each_possible_cpu(cpu)
+		ixgbe_fcoe_dma_pool_free(fcoe, cpu);
+
 	dma_unmap_single(&adapter->pdev->dev,
 			 fcoe->extra_ddp_buffer_dma,
 			 IXGBE_FCBUFF_MIN,
 			 DMA_FROM_DEVICE);
 	kfree(fcoe->extra_ddp_buffer);
 
-	ixgbe_fcoe_ddp_pools_free(fcoe);
+	fcoe->extra_ddp_buffer = NULL;
+	fcoe->extra_ddp_buffer_dma = 0;
+}
+
+/**
+ * ixgbe_setup_fcoe_ddp_resources - setup all fcoe ddp context resources
+ * @adapter: ixgbe adapter
+ *
+ * Sets up ddp context resouces
+ *
+ * Returns : 0 indicates success or -EINVAL on failure
+ */
+int ixgbe_setup_fcoe_ddp_resources(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_fcoe *fcoe = &adapter->fcoe;
+	struct device *dev = &adapter->pdev->dev;
+	void *buffer;
+	dma_addr_t dma;
+	unsigned int cpu;
+
+	/* do nothing if no DDP pools were allocated */
+	if (!fcoe->ddp_pool)
+		return 0;
+
+	/* Extra buffer to be shared by all DDPs for HW work around */
+	buffer = kmalloc(IXGBE_FCBUFF_MIN, GFP_ATOMIC);
+	if (!buffer) {
+		e_err(drv, "failed to allocate extra DDP buffer\n");
+		return -ENOMEM;
+	}
+
+	dma = dma_map_single(dev, buffer, IXGBE_FCBUFF_MIN, DMA_FROM_DEVICE);
+	if (dma_mapping_error(dev, dma)) {
+		e_err(drv, "failed to map extra DDP buffer\n");
+		kfree(buffer);
+		return -ENOMEM;
+	}
+
+	fcoe->extra_ddp_buffer = buffer;
+	fcoe->extra_ddp_buffer_dma = dma;
+
+	/* allocate pci pool for each cpu */
+	for_each_possible_cpu(cpu) {
+		int err = ixgbe_fcoe_dma_pool_alloc(fcoe, dev, cpu);
+		if (!err)
+			continue;
+
+		e_err(drv, "failed to alloc DDP pool on cpu:%d\n", cpu);
+		ixgbe_free_fcoe_ddp_resources(adapter);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static int ixgbe_fcoe_ddp_enable(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_fcoe *fcoe = &adapter->fcoe;
+
+	if (!(adapter->flags & IXGBE_FLAG_FCOE_CAPABLE))
+		return -EINVAL;
+
+	fcoe->ddp_pool = alloc_percpu(struct ixgbe_fcoe_ddp_pool);
+
+	if (!fcoe->ddp_pool) {
+		e_err(drv, "failed to allocate percpu DDP resources\n");
+		return -ENOMEM;
+	}
+
+	adapter->netdev->fcoe_ddp_xid = IXGBE_FCOE_DDP_MAX - 1;
+
+	return 0;
+}
+
+static void ixgbe_fcoe_ddp_disable(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_fcoe *fcoe = &adapter->fcoe;
+
+	adapter->netdev->fcoe_ddp_xid = 0;
+
+	if (!fcoe->ddp_pool)
+		return;
+
+	free_percpu(fcoe->ddp_pool);
+	fcoe->ddp_pool = NULL;
 }
 
 /**
@@ -753,40 +784,37 @@ void ixgbe_cleanup_fcoe(struct ixgbe_adapter *adapter)
  */
 int ixgbe_fcoe_enable(struct net_device *netdev)
 {
-	int rc = -EINVAL;
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 	struct ixgbe_fcoe *fcoe = &adapter->fcoe;
 
+	atomic_inc(&fcoe->refcnt);
 
 	if (!(adapter->flags & IXGBE_FLAG_FCOE_CAPABLE))
-		goto out_enable;
+		return -EINVAL;
 
-	atomic_inc(&fcoe->refcnt);
 	if (adapter->flags & IXGBE_FLAG_FCOE_ENABLED)
-		goto out_enable;
+		return -EINVAL;
 
 	e_info(drv, "Enabling FCoE offload features.\n");
 	if (netif_running(netdev))
 		netdev->netdev_ops->ndo_stop(netdev);
 
-	ixgbe_clear_interrupt_scheme(adapter);
+	/* Allocate per CPU memory to track DDP pools */
+	ixgbe_fcoe_ddp_enable(adapter);
 
+	/* enable FCoE and notify stack */
 	adapter->flags |= IXGBE_FLAG_FCOE_ENABLED;
-	adapter->ring_feature[RING_F_FCOE].limit = IXGBE_FCRETA_SIZE;
-	netdev->features |= NETIF_F_FCOE_CRC;
-	netdev->features |= NETIF_F_FSO;
-	netdev->features |= NETIF_F_FCOE_MTU;
-	netdev->fcoe_ddp_xid = IXGBE_FCOE_DDP_MAX - 1;
+	netdev->features |= NETIF_F_FSO | NETIF_F_FCOE_CRC | NETIF_F_FCOE_MTU;
+	netdev_features_change(netdev);
 
+	/* release existing queues and reallocate them */
+	ixgbe_clear_interrupt_scheme(adapter);
 	ixgbe_init_interrupt_scheme(adapter);
-	netdev_features_change(netdev);
 
 	if (netif_running(netdev))
 		netdev->netdev_ops->ndo_open(netdev);
-	rc = 0;
 
-out_enable:
-	return rc;
+	return 0;
 }
 
 /**
@@ -799,41 +827,37 @@ out_enable:
  */
 int ixgbe_fcoe_disable(struct net_device *netdev)
 {
-	int rc = -EINVAL;
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
-	struct ixgbe_fcoe *fcoe = &adapter->fcoe;
 
-	if (!(adapter->flags & IXGBE_FLAG_FCOE_CAPABLE))
-		goto out_disable;
+	if (!atomic_dec_and_test(&adapter->fcoe.refcnt))
+		return -EINVAL;
 
 	if (!(adapter->flags & IXGBE_FLAG_FCOE_ENABLED))
-		goto out_disable;
-
-	if (!atomic_dec_and_test(&fcoe->refcnt))
-		goto out_disable;
+		return -EINVAL;
 
 	e_info(drv, "Disabling FCoE offload features.\n");
-	netdev->features &= ~NETIF_F_FCOE_CRC;
-	netdev->features &= ~NETIF_F_FSO;
-	netdev->features &= ~NETIF_F_FCOE_MTU;
-	netdev->fcoe_ddp_xid = 0;
-	netdev_features_change(netdev);
-
 	if (netif_running(netdev))
 		netdev->netdev_ops->ndo_stop(netdev);
 
-	ixgbe_clear_interrupt_scheme(adapter);
+	/* Free per CPU memory to track DDP pools */
+	ixgbe_fcoe_ddp_disable(adapter);
+
+	/* disable FCoE and notify stack */
 	adapter->flags &= ~IXGBE_FLAG_FCOE_ENABLED;
-	adapter->ring_feature[RING_F_FCOE].indices = 0;
-	ixgbe_cleanup_fcoe(adapter);
+	netdev->features &= ~(NETIF_F_FCOE_CRC |
+			      NETIF_F_FSO |
+			      NETIF_F_FCOE_MTU);
+
+	netdev_features_change(netdev);
+
+	/* release existing queues and reallocate them */
+	ixgbe_clear_interrupt_scheme(adapter);
 	ixgbe_init_interrupt_scheme(adapter);
 
 	if (netif_running(netdev))
 		netdev->netdev_ops->ndo_open(netdev);
-	rc = 0;
 
-out_disable:
-	return rc;
+	return 0;
 }
 
 /**
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.h
index 5d02873..bf724da 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.h
@@ -77,7 +77,7 @@ struct ixgbe_fcoe {
 	atomic_t refcnt;
 	spinlock_t lock;
 	struct ixgbe_fcoe_ddp ddp[IXGBE_FCOE_DDP_MAX];
-	unsigned char *extra_ddp_buffer;
+	void *extra_ddp_buffer;
 	dma_addr_t extra_ddp_buffer_dma;
 	unsigned long mode;
 #ifdef CONFIG_IXGBE_DCB
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index c666259..e006c05 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -3807,12 +3807,6 @@ static void ixgbe_configure(struct ixgbe_adapter *adapter)
 	ixgbe_set_rx_mode(adapter->netdev);
 	ixgbe_restore_vlan(adapter);
 
-#ifdef IXGBE_FCOE
-	if (adapter->flags & IXGBE_FLAG_FCOE_ENABLED)
-		ixgbe_configure_fcoe(adapter);
-
-#endif /* IXGBE_FCOE */
-
 	switch (hw->mac.type) {
 	case ixgbe_mac_82599EB:
 	case ixgbe_mac_X540:
@@ -3842,6 +3836,11 @@ static void ixgbe_configure(struct ixgbe_adapter *adapter)
 
 	ixgbe_configure_virtualization(adapter);
 
+#ifdef IXGBE_FCOE
+	/* configure FCoE L2 filters, redirection table, and Rx control */
+	ixgbe_configure_fcoe(adapter);
+
+#endif /* IXGBE_FCOE */
 	ixgbe_configure_tx(adapter);
 	ixgbe_configure_rx(adapter);
 }
@@ -4434,6 +4433,11 @@ static int __devinit ixgbe_sw_init(struct ixgbe_adapter *adapter)
 		break;
 	}
 
+#ifdef IXGBE_FCOE
+	/* FCoE support exists, always init the FCoE lock */
+	spin_lock_init(&adapter->fcoe.lock);
+
+#endif
 	/* n-tuple support exists, always init our spinlock */
 	spin_lock_init(&adapter->fdir_perfect_lock);
 
@@ -4662,7 +4666,11 @@ static int ixgbe_setup_all_rx_resources(struct ixgbe_adapter *adapter)
 		goto err_setup_rx;
 	}
 
-	return 0;
+#ifdef IXGBE_FCOE
+	err = ixgbe_setup_fcoe_ddp_resources(adapter);
+	if (!err)
+#endif
+		return 0;
 err_setup_rx:
 	/* rewind the index freeing the rings as we go */
 	while (i--)
@@ -4741,6 +4749,10 @@ static void ixgbe_free_all_rx_resources(struct ixgbe_adapter *adapter)
 {
 	int i;
 
+#ifdef IXGBE_FCOE
+	ixgbe_free_fcoe_ddp_resources(adapter);
+
+#endif
 	for (i = 0; i < adapter->num_rx_queues; i++)
 		if (adapter->rx_ring[i]->desc)
 			ixgbe_free_rx_resources(adapter->rx_ring[i]);
@@ -7235,11 +7247,12 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev,
 			if (device_caps & IXGBE_DEVICE_CAPS_FCOE_OFFLOADS)
 				adapter->flags &= ~IXGBE_FLAG_FCOE_CAPABLE;
 		}
-	}
-	if (adapter->flags & IXGBE_FLAG_FCOE_CAPABLE) {
-		netdev->vlan_features |= NETIF_F_FCOE_CRC;
-		netdev->vlan_features |= NETIF_F_FSO;
-		netdev->vlan_features |= NETIF_F_FCOE_MTU;
+
+		adapter->ring_feature[RING_F_FCOE].limit = IXGBE_FCRETA_SIZE;
+
+		netdev->vlan_features |= NETIF_F_FSO |
+					 NETIF_F_FCOE_CRC |
+					 NETIF_F_FCOE_MTU;
 	}
 #endif /* IXGBE_FCOE */
 	if (pci_using_dac) {
@@ -7436,12 +7449,6 @@ static void __devexit ixgbe_remove(struct pci_dev *pdev)
 	ixgbe_sysfs_exit(adapter);
 #endif /* CONFIG_IXGBE_HWMON */
 
-#ifdef IXGBE_FCOE
-	if (adapter->flags & IXGBE_FLAG_FCOE_ENABLED)
-		ixgbe_cleanup_fcoe(adapter);
-
-#endif /* IXGBE_FCOE */
-
 	/* remove the added san mac */
 	ixgbe_del_sanmac_netdev(netdev);
 
-- 
1.7.10.4

^ permalink raw reply related

* [net-next 9/9] ixgbe: Enable FCoE FSO and CRC offloads based on CAPABLE instead of ENABLED flag
From: Jeff Kirsher @ 2012-07-20  1:24 UTC (permalink / raw)
  To: davem; +Cc: Alexander Duyck, netdev, gospo, sassmann, Jeff Kirsher
In-Reply-To: <1342747446-16132-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Alexander Duyck <alexander.h.duyck@intel.com>

Instead of only setting the FCOE segmentation offload and CRC offload flags
if we enable FCoE, we could just set them always since there are no
modifications needed to the hardware or adapter FCoE structure in order to
use these features.

The advantage to this is that if FCoE enablement fails, for example because
SR-IOV was enabled on 82599, we will still have use of the FCoE
segmentation offload and Tx/Rx CRC offloads which should still help to
improve the FCoE performance.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by: Phil Schmitt <phillip.j.schmitt@intel.com>
Tested-by: Ross Brattain <ross.b.brattain@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c |   16 +++++++++-------
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c |    5 ++++-
 2 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c
index e79ba39..ae73ef1 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c
@@ -616,11 +616,11 @@ void ixgbe_configure_fcoe(struct ixgbe_adapter *adapter)
 	int i, fcoe_q, fcoe_i;
 	u32 etqf;
 
-	/* leave registers unconfigued if FCoE is disabled */
-	if (!(adapter->flags & IXGBE_FLAG_FCOE_ENABLED))
+	/* Minimal functionality for FCoE requires at least CRC offloads */
+	if (!(adapter->netdev->features & NETIF_F_FCOE_CRC))
 		return;
 
-	/* Enable L2 EtherType filter for FCoE, necessary for FCoE Rx CRC */
+	/* Enable L2 EtherType filter for FCoE, needed for FCoE CRC and DDP */
 	etqf = ETH_P_FCOE | IXGBE_ETQF_FCOE | IXGBE_ETQF_FILTER_EN;
 	if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) {
 		etqf |= IXGBE_ETQF_POOL_ENABLE;
@@ -629,6 +629,10 @@ void ixgbe_configure_fcoe(struct ixgbe_adapter *adapter)
 	IXGBE_WRITE_REG(hw, IXGBE_ETQF(IXGBE_ETQF_FILTER_FCOE), etqf);
 	IXGBE_WRITE_REG(hw, IXGBE_ETQS(IXGBE_ETQF_FILTER_FCOE), 0);
 
+	/* leave registers un-configured if FCoE is disabled */
+	if (!(adapter->flags & IXGBE_FLAG_FCOE_ENABLED))
+		return;
+
 	/* Use one or more Rx queues for FCoE by redirection table */
 	for (i = 0; i < IXGBE_FCRETA_SIZE; i++) {
 		fcoe_i = fcoe->offset + (i % fcoe->indices);
@@ -804,7 +808,7 @@ int ixgbe_fcoe_enable(struct net_device *netdev)
 
 	/* enable FCoE and notify stack */
 	adapter->flags |= IXGBE_FLAG_FCOE_ENABLED;
-	netdev->features |= NETIF_F_FSO | NETIF_F_FCOE_CRC | NETIF_F_FCOE_MTU;
+	netdev->features |= NETIF_F_FCOE_MTU;
 	netdev_features_change(netdev);
 
 	/* release existing queues and reallocate them */
@@ -844,9 +848,7 @@ int ixgbe_fcoe_disable(struct net_device *netdev)
 
 	/* disable FCoE and notify stack */
 	adapter->flags &= ~IXGBE_FLAG_FCOE_ENABLED;
-	netdev->features &= ~(NETIF_F_FCOE_CRC |
-			      NETIF_F_FSO |
-			      NETIF_F_FCOE_MTU);
+	netdev->features &= ~NETIF_F_FCOE_MTU;
 
 	netdev_features_change(netdev);
 
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index aa01558..c521007 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -6382,7 +6382,7 @@ netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb,
 #ifdef IXGBE_FCOE
 	/* setup tx offload for FCoE */
 	if ((protocol == __constant_htons(ETH_P_FCOE)) &&
-	    (adapter->flags & IXGBE_FLAG_FCOE_ENABLED)) {
+	    (tx_ring->netdev->features & (NETIF_F_FSO | NETIF_F_FCOE_CRC))) {
 		tso = ixgbe_fso(tx_ring, first, &hdr_len);
 		if (tso < 0)
 			goto out_drop;
@@ -7257,6 +7257,9 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev,
 
 		adapter->ring_feature[RING_F_FCOE].limit = IXGBE_FCRETA_SIZE;
 
+		netdev->features |= NETIF_F_FSO |
+				    NETIF_F_FCOE_CRC;
+
 		netdev->vlan_features |= NETIF_F_FSO |
 					 NETIF_F_FCOE_CRC |
 					 NETIF_F_FCOE_MTU;
-- 
1.7.10.4

^ permalink raw reply related

* Re: [PATCH V2] ipv6: fix incorrect route 'expires' value passed to userspace
From: Li Wei @ 2012-07-20  1:32 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, shemminger
In-Reply-To: <20120719.104906.38765587582698093.davem@davemloft.net>

于 2012-7-20 1:49, David Miller 写道:
> From: Li Wei <lw@cn.fujitsu.com>
> Date: Thu, 19 Jul 2012 10:02:59 +0800
> 
>>
>> When userspace use RTM_GETROUTE to dump route table, with an already
>> expired route entry, we always got an 'expires' value(2147157)
>> calculated base on INT_MAX.
>>
>> The reason of this problem is in the following satement:
>> 	rt->dst.expires - jiffies < INT_MAX
>> gcc promoted the type of both sides of '<' to unsigned long, thus
>> a small negative value would be considered greater than INT_MAX.
>>
>> This patch fix this by use the same trick as time_after macro to
>> avoid the 'unsigned long' type promotion and deal with jiffies
>> wrapping.
>>
>> Also we should do some fix in rtnl_put_cacheinfo() which use
>> jiffies_to_clock_t(which take an unsigned long as parameter) to
>> convert jiffies to clock_t to handle the negative expires.
>>
>> Signed-off-by: Li Wei <lw@cn.fujitsu.com>
> 
> Your patch is corrupted by your email client and therefore will
> not apply cleanly.
> 
> I think this isn't the first time your patch submissions have
> had this problem, and if so then you should do the necessary
> work to prevent problem with more certainty in the future as
> such this makes a lot of extra work for other people.
> 

Really sorry for that, I'll resend this patch and before that sending
myself a copy to confirm the mail client works properly.

Thanks

^ permalink raw reply

* [PATCH V2 resend] ipv6: fix incorrect route 'expires' value passed to userspace
From: Li Wei @ 2012-07-20  1:42 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, shemminger
In-Reply-To: <20120719.104906.38765587582698093.davem@davemloft.net>


When userspace use RTM_GETROUTE to dump route table, with an already
expired route entry, we always got an 'expires' value(2147157)
calculated base on INT_MAX.

The reason of this problem is in the following satement:
	rt->dst.expires - jiffies < INT_MAX
gcc promoted the type of both sides of '<' to unsigned long, thus
a small negative value would be considered greater than INT_MAX.

This patch fix this by use the same trick as time_after macro to
avoid the 'unsigned long' type promotion and deal with jiffies
wrapping.

Also we should do some fix in rtnl_put_cacheinfo() which use
jiffies_to_clock_t(which take an unsigned long as parameter) to
convert jiffies to clock_t to handle the negative expires.

Signed-off-by: Li Wei <lw@cn.fujitsu.com>
---
 net/core/rtnetlink.c |    3 ++-
 net/ipv6/route.c     |    7 ++++---
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 21318d1..f92f3d8 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -641,7 +641,8 @@ int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id,
 	};
 
 	if (expires)
-		ci.rta_expires = jiffies_to_clock_t(expires);
+		ci.rta_expires = expires > 0 ? jiffies_to_clock_t(expires)
+			: -jiffies_to_clock_t(-expires);
 
 	return nla_put(skb, RTA_CACHEINFO, sizeof(ci), &ci);
 }
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index becb048..7875255 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2516,10 +2516,11 @@ static int rt6_fill_node(struct net *net,
 		goto nla_put_failure;
 	if (!(rt->rt6i_flags & RTF_EXPIRES))
 		expires = 0;
-	else if (rt->dst.expires - jiffies < INT_MAX)
-		expires = rt->dst.expires - jiffies;
+	else if ((long)rt->dst.expires - (long)jiffies > INT_MIN
+			&& (long)rt->dst.expires - (long)jiffies < INT_MAX)
+		expires = (long)rt->dst.expires - (long)jiffies;
 	else
-		expires = INT_MAX;
+		expires = time_is_after_jiffies(rt->dst.expires) ? INT_MAX : INT_MIN;
 
 	peer = rt->rt6i_peer;
 	ts = tsage = 0;
-- 
1.7.1

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox