Netdev List
 help / color / mirror / Atom feed
* [RFC PATCH net-next v2 1/1] ipv6: add support of ECMP
From: Nicolas Dichtel @ 2012-09-14  7:59 UTC (permalink / raw)
  To: yoshfuji; +Cc: bernat, netdev, davem, Nicolas Dichtel
In-Reply-To: <1347609548-14494-1-git-send-email-nicolas.dichtel@6wind.com>

This patch adds the support of equal cost multipath for IPv6.

The patch is based on a previous work from
Luc Saillard <luc.saillard@6wind.com>.

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
---
 include/net/ip6_fib.h |  13 ++++
 net/ipv6/Kconfig      |  33 ++++++++
 net/ipv6/ip6_fib.c    |  73 ++++++++++++++++++
 net/ipv6/route.c      | 209 +++++++++++++++++++++++++++++++++++++++++++++++++-
 4 files changed, 325 insertions(+), 3 deletions(-)

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index cd64cf3..37e502a 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -47,6 +47,10 @@ struct fib6_config {
 	unsigned long	fc_expires;
 	struct nlattr	*fc_mx;
 	int		fc_mx_len;
+#ifdef CONFIG_IPV6_MULTIPATH
+	struct nlattr	*fc_mp;
+	int		fc_mp_len;
+#endif
 
 	struct nl_info	fc_nlinfo;
 };
@@ -98,6 +102,15 @@ struct rt6_info {
 	struct fib6_node		*rt6i_node;
 
 	struct in6_addr			rt6i_gateway;
+#ifdef CONFIG_IPV6_MULTIPATH
+	/*
+	 * siblings is a list of rt6_info that have the the same metric/weight,
+	 * destination, but not the same gateway. nsiblings is just a cache
+	 * to speed up lookup.
+	 */
+	unsigned int			rt6i_nsiblings;
+	struct list_head		rt6i_siblings;
+#endif
 
 	atomic_t			rt6i_ref;
 
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 4f7fe72..e0c92dc 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -266,4 +266,37 @@ config IPV6_PIMSM_V2
 	  Support for IPv6 PIM multicast routing protocol PIM-SMv2.
 	  If unsure, say N.
 
+config IPV6_MULTIPATH
+	bool "IPv6: equal cost multipath for IPv6 routing"
+	depends on IPV6
+	default y
+	---help---
+	  Enable this option to support ECMP for IPv6.
+
+choice
+	prompt "IPv6: choose Multipath algorithm"
+	depends on IPV6_MULTIPATH
+	default IPV6_MULTIPATH_HASH
+	---help---
+	  Define the method to select route between each possible path.
+	  The recommanded algorithm (by RFC4311) is HASH method.
+
+	config IPV6_MULTIPATH_HASH
+	bool "IPv6: MULTIPATH hash/flow algorithm"
+	---help---
+	  Multipath routes are chosen according to hash of packet header to
+	  ensure a flow keeps the same route.
+	  This algorithm is recommanded by RFC4311.
+
+	config IPV6_MULTIPATH_RR
+	bool "IPv6: MULTIPATH round robin algorithm"
+	---help---
+	  Multipath routes are chosen according to Round Robin.
+
+	config IPV6_MULTIPATH_RANDOM
+	bool "IPv6: MULTIPATH random algorithm"
+	---help---
+	  Multipath routes are chosen in a random fashion.
+endchoice
+
 endif # IPV6
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 13690d6..3541e44 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -672,6 +672,10 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
 			    iter->rt6i_idev == rt->rt6i_idev &&
 			    ipv6_addr_equal(&iter->rt6i_gateway,
 					    &rt->rt6i_gateway)) {
+#ifdef CONFIG_IPV6_MULTIPATH
+				if (rt->rt6i_nsiblings)
+					rt->rt6i_nsiblings = 0;
+#endif
 				if (!(iter->rt6i_flags & RTF_EXPIRES))
 					return -EEXIST;
 				if (!(rt->rt6i_flags & RTF_EXPIRES))
@@ -680,6 +684,23 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
 					rt6_set_expires(iter, rt->dst.expires);
 				return -EEXIST;
 			}
+#ifdef CONFIG_IPV6_MULTIPATH
+			/* If we have the same destination and the same metric,
+			 * but not the same gateway, then the route we try to
+			 * add is sibling to this route, increment our counter
+			 * of siblings, and later we will add our route to the
+			 * list.
+			 * Only static routes (which don't have flag
+			 * RTF_EXPIRES) are used for ECMPv6.
+			 *
+			 * To avoid long list, we only had siblings if the
+			 * route have a gateway.
+			 */
+			if (rt->rt6i_flags & RTF_GATEWAY &&
+			    !(rt->rt6i_flags & RTF_EXPIRES) &&
+			    !(iter->rt6i_flags & RTF_EXPIRES))
+				rt->rt6i_nsiblings++;
+#endif
 		}
 
 		if (iter->rt6i_metric > rt->rt6i_metric)
@@ -692,6 +713,43 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
 	if (ins == &fn->leaf)
 		fn->rr_ptr = NULL;
 
+#ifdef CONFIG_IPV6_MULTIPATH
+	/* Link this route to others same route. */
+	if (rt->rt6i_nsiblings) {
+		unsigned int rt6i_nsiblings;
+		struct rt6_info *sibling, *temp_sibling;
+
+		/* Find the first route that have the same metric */
+		sibling = fn->leaf;
+		while (sibling) {
+			if (sibling->rt6i_metric == rt->rt6i_metric) {
+				list_add_tail(&rt->rt6i_siblings,
+					      &sibling->rt6i_siblings);
+				break;
+			}
+			sibling = sibling->dst.rt6_next;
+		}
+		/* For each sibling in the list, increment the counter of
+		 * siblings. We can check if all the counter are equal.
+		 */
+		rt6i_nsiblings = 0;
+		list_for_each_entry_safe(sibling, temp_sibling,
+					 &rt->rt6i_siblings,
+					 rt6i_siblings) {
+			sibling->rt6i_nsiblings++;
+			if (unlikely(sibling->rt6i_nsiblings !=
+				     rt->rt6i_nsiblings)) {
+				pr_err("Wrong number of siblings for route %p (%d)\n",
+				       sibling, sibling->rt6i_nsiblings);
+			}
+			rt6i_nsiblings++;
+		}
+		if (unlikely(rt6i_nsiblings != rt->rt6i_nsiblings)) {
+			pr_err("Wrong number of siblings for route %p. I have %d routes, but count %d siblings\n",
+			       rt, rt6i_nsiblings, rt->rt6i_nsiblings);
+		}
+	}
+#endif
 	/*
 	 *	insert node
 	 */
@@ -1197,6 +1255,21 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
 	if (fn->rr_ptr == rt)
 		fn->rr_ptr = NULL;
 
+#ifdef CONFIG_IPV6_MULTIPATH
+	/* Remove this entry from other siblings */
+	if (rt->rt6i_nsiblings) {
+		struct rt6_info *sibling, *next_sibling;
+
+		/* For each siblings, decrement the counter of siblings */
+		list_for_each_entry_safe(sibling, next_sibling,
+					 &rt->rt6i_siblings, rt6i_siblings) {
+			sibling->rt6i_nsiblings--;
+		}
+		rt->rt6i_nsiblings = 0;
+		list_del_init(&rt->rt6i_siblings);
+	}
+#endif
+
 	/* Adjust walkers */
 	read_lock(&fib6_walker_lock);
 	FOR_WALKERS(w) {
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 399613b..431f7ad 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -57,6 +57,9 @@
 #include <net/xfrm.h>
 #include <net/netevent.h>
 #include <net/netlink.h>
+#ifdef CONFIG_IPV6_MULTIPATH
+#include <net/nexthop.h>
+#endif
 
 #include <asm/uaccess.h>
 
@@ -288,6 +291,10 @@ static inline struct rt6_info *ip6_dst_alloc(struct net *net,
 
 		memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
 		rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
+#ifdef CONFIG_IPV6_MULTIPATH
+		INIT_LIST_HEAD(&rt->rt6i_siblings);
+		rt->rt6i_nsiblings = 0;
+#endif
 	}
 	return rt;
 }
@@ -388,6 +395,124 @@ static bool rt6_need_strict(const struct in6_addr *daddr)
 		(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
 }
 
+#ifdef CONFIG_IPV6_MULTIPATH
+/*
+ *	Multipath route selection.
+ */
+
+#ifdef CONFIG_IPV6_MULTIPATH_RANDOM
+/*
+ * Pseudo random candidate function
+ */
+static int rt6_info_hash_randomfn(unsigned int candidate_count)
+{
+	return random32() % candidate_count;
+}
+#endif
+
+#ifdef CONFIG_IPV6_MULTIPATH_RR
+/*
+ * Fake Round Robin candidate function
+ * If we want real RR, we need to add a counter in each route
+ */
+static int rt6_info_hash_falserr(unsigned int candidate_count)
+{
+	static unsigned int seed;
+	seed++;
+	return seed % candidate_count;
+}
+#endif
+
+#ifdef CONFIG_IPV6_MULTIPATH_HASH
+/*
+ * Pseudo random candidate using the src port, and other information
+ * Adapted from fib_info_hashfn()
+ */
+static int rt6_info_hash_nhsfn(unsigned int candidate_count,
+			       const struct flowi6 *fl6)
+{
+	unsigned int val = fl6->flowi6_proto;
+
+	val ^= fl6->daddr.s6_addr32[0];
+	val ^= fl6->daddr.s6_addr32[1];
+	val ^= fl6->daddr.s6_addr32[2];
+	val ^= fl6->daddr.s6_addr32[3];
+
+	val ^= fl6->saddr.s6_addr32[0];
+	val ^= fl6->saddr.s6_addr32[1];
+	val ^= fl6->saddr.s6_addr32[2];
+	val ^= fl6->saddr.s6_addr32[3];
+
+	/* Work only if this not encapsulated */
+	switch (fl6->flowi6_proto) {
+	case IPPROTO_UDP:
+	case IPPROTO_TCP:
+	case IPPROTO_SCTP:
+		val ^= fl6->fl6_sport;
+		val ^= fl6->fl6_dport;
+		break;
+
+	case IPPROTO_ICMPV6:
+		val ^= fl6->fl6_icmp_type;
+		val ^= fl6->fl6_icmp_code;
+		break;
+	}
+	/* RFC6438 recommands to use flowlabel */
+	val ^= fl6->flowlabel;
+
+	/* Perhaps, we need to tune, this function? */
+	val = val ^ (val >> 7) ^ (val >> 12);
+	return val % candidate_count;
+}
+#endif
+
+/*
+ * This function return an index used to select (at random, round robin, ...)
+ * a route between any siblings.
+ *
+ * Note: fl6 can be NULL
+ */
+static unsigned int rt6_info_hashfn(const struct rt6_info *rt,
+				    const struct flowi6 *fl6)
+{
+	int candidate_count = rt->rt6i_nsiblings + 1;
+
+#if defined(CONFIG_IPV6_MULTIPATH_RR)
+	return rt6_info_hash_falserr(candidate_count);
+#elif defined(CONFIG_IPV6_MULTIPATH_RANDOM)
+	return rt6_info_hash_randomfn(candidate_count);
+#elif defined(CONFIG_IPV6_MULTIPATH_HASH)
+	if (fl6 == NULL)
+		return 0;
+	return rt6_info_hash_nhsfn(candidate_count, fl6);
+#else
+	return 0;
+#endif
+}
+
+static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
+					     struct flowi6 *fl6)
+{
+	struct rt6_info *sibling, *next_sibling;
+	int route_choosen;
+
+	route_choosen = rt6_info_hashfn(match, fl6);
+	/* Don't change the route, if route_choosen == 0
+	 * (siblings does not include ourself)
+	 */
+	if (route_choosen)
+		list_for_each_entry_safe(sibling, next_sibling,
+				&match->rt6i_siblings, rt6i_siblings) {
+			route_choosen--;
+			if (route_choosen == 0) {
+				match = sibling;
+				break;
+			}
+		}
+	return match;
+}
+#endif /* CONFIG_IPV6_MULTIPATH */
+
 /*
  *	Route lookup. Any table->tb6_lock is implied.
  */
@@ -705,6 +830,10 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net,
 restart:
 	rt = fn->leaf;
 	rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
+#ifdef CONFIG_IPV6_MULTIPATH
+	if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
+		rt = rt6_multipath_select(rt, fl6);
+#endif
 	BACKTRACK(net, &fl6->saddr);
 out:
 	dst_use(&rt->dst, jiffies);
@@ -866,7 +995,10 @@ restart_2:
 
 restart:
 	rt = rt6_select(fn, oif, strict | reachable);
-
+#ifdef CONFIG_IPV6_MULTIPATH
+	if (rt->rt6i_nsiblings && oif == 0)
+		rt = rt6_multipath_select(rt, fl6);
+#endif
 	BACKTRACK(net, &fl6->saddr);
 	if (rt == net->ipv6.ip6_null_entry ||
 	    rt->rt6i_flags & RTF_CACHE)
@@ -2247,6 +2379,9 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
 	[RTA_IIF]		= { .type = NLA_U32 },
 	[RTA_PRIORITY]          = { .type = NLA_U32 },
 	[RTA_METRICS]           = { .type = NLA_NESTED },
+#ifdef CONFIG_IPV6_MULTIPATH
+	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
+#endif
 };
 
 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -2324,11 +2459,69 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
 	if (tb[RTA_TABLE])
 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
 
+#ifdef CONFIG_IPV6_MULTIPATH
+	if (tb[RTA_MULTIPATH]) {
+		cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
+		cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
+	}
+#endif
+
 	err = 0;
 errout:
 	return err;
 }
 
+#ifdef CONFIG_IPV6_MULTIPATH
+static int ip6_route_multipath(struct fib6_config *cfg, int add)
+{
+	struct fib6_config r_cfg;
+	struct rtnexthop *rtnh;
+	int remaining;
+	int attrlen;
+	int err = 0, last_err = 0;
+
+beginning:
+	rtnh = (struct rtnexthop *)cfg->fc_mp;
+	remaining = cfg->fc_mp_len;
+
+	/* Parse a Multipath Entry */
+	while (rtnh_ok(rtnh, remaining)) {
+		memcpy(&r_cfg, cfg, sizeof(*cfg));
+		if (rtnh->rtnh_ifindex)
+			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
+
+		attrlen = rtnh_attrlen(rtnh);
+		if (attrlen > 0) {
+			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
+
+			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
+			if (nla) {
+				nla_memcpy(&r_cfg.fc_gateway, nla, 16);
+				r_cfg.fc_flags |= RTF_GATEWAY;
+			}
+		}
+		err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
+		if (err) {
+			last_err = err;
+			/* If we are trying to remove a route, do not stop the
+			 * loop when ip6_route_del() fails (because next hop is
+			 * already gone), we should try to remove all next hops.
+			 */
+			if (add) {
+				/* If add fails, we should try to delete all
+				 * next hops that have been already added.
+				 */
+				add = 0;
+				goto beginning;
+			}
+		}
+		rtnh = rtnh_next(rtnh, &remaining);
+	}
+
+	return last_err;
+}
+#endif /* CONFIG_IPV6_MULTIPATH */
+
 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 {
 	struct fib6_config cfg;
@@ -2338,7 +2531,12 @@ static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *a
 	if (err < 0)
 		return err;
 
-	return ip6_route_del(&cfg);
+#ifdef CONFIG_IPV6_MULTIPATH
+	if (cfg.fc_mp)
+		return ip6_route_multipath(&cfg, 0);
+	else
+#endif
+		return ip6_route_del(&cfg);
 }
 
 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
@@ -2350,7 +2548,12 @@ static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *a
 	if (err < 0)
 		return err;
 
-	return ip6_route_add(&cfg);
+#ifdef CONFIG_IPV6_MULTIPATH
+	if (cfg.fc_mp)
+		return ip6_route_multipath(&cfg, 1);
+	else
+#endif
+		return ip6_route_add(&cfg);
 }
 
 static inline size_t rt6_nlmsg_size(void)
-- 
1.7.12

^ permalink raw reply related

* [RFC PATCH net-next v2 0/1] Add support of ECMPv6
From: Nicolas Dichtel @ 2012-09-14  7:59 UTC (permalink / raw)
  To: yoshfuji; +Cc: bernat, netdev, davem
In-Reply-To: <505058F5.9020707@linux-ipv6.org>

Here is a proposal to add the support of ECMPv6. The previous patch
from Vincent against iproute2 can be used, but a little other patch is needed
too, see http://patchwork.ozlabs.org/patch/183277/

If the kernel patch is approved, I will submit formally the patch for
iproute2.

Here is an example of a command to add an ECMP route:
$ ip -6 route add 3ffe:304:124:2306::/64 \
	nexthop via fe80::230:1bff:feb4:e05c dev eth0 weight 1 \
	nexthop via fe80::230:1bff:feb4:dd4f dev eth0 weight 1

v2: rename CONFIG_IPV6_MULTIPATH_ROUTE to CONFIG_IPV6_MULTIPATH_HASH
    use flowlabel in the hash function
    add reference to RFC
    fix a small identation issue
    remove "If unsure, say N." from the help of CONFIG_IPV6_MULTIPATH

Comments are welcome.

Regards,
Nicolas

^ permalink raw reply

* Re: [net-next.git 3/8 (V2)] stmmac: add the initial tx coalesce schema
From: Giuseppe CAVALLARO @ 2012-09-14  7:36 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, bhutchings
In-Reply-To: <20120913.162333.1518469374321928795.davem@davemloft.net>

On 9/13/2012 10:23 PM, David Miller wrote:
> From: Giuseppe CAVALLARO <peppe.cavallaro@st.com>
> Date: Tue, 11 Sep 2012 08:55:09 +0200
>
>> +	unsigned long flags;
>> +
>> +	spin_lock_irqsave(&priv->tx_lock, flags);
>>
>> -	spin_lock(&priv->tx_lock);
>> +	priv->xstats.tx_clean++;
>
> You are changing the locking here for the sake of the new timer.
>
> But timers run in software interrupt context, so this change is
> completely unnecessary since NAPI runs in software interrupt context
> as well, and neither timers nor NAPI run in hardware interrupts
> context.

Indeed It can be called by the ISR too in this new implementation.
I have added the spin_lock_irqsave/restore otherwise, testing with 
CONFIG_PROVE_LOOKING, I get the following warning on ARM SMP.

[    8.030000]
[    8.030000] =================================
[    8.030000] [ INFO: inconsistent lock state ]
[    8.030000] 3.4.7_stm24_0302-b2000+ #103 Not tainted
[    8.030000] ---------------------------------
[    8.030000] inconsistent {HARDIRQ-ON-W} -> {IN-HARDIRQ-W} usage.
[    8.030000] swapper/0/1 [HC1[1]:SC0[0]:HE0:SE1] takes:
[    8.030000]  (&(&priv->tx_lock)->rlock){?.-...}, at: [<802651d8>] 
stmmac_tx+0x1c/0x388
[    8.030000] {HARDIRQ-ON-W} state was registered at:
[    8.030000]   [<800562b4>] __lock_acquire+0x638/0x179c
[    8.030000]   [<80057884>] lock_acquire+0x60/0x74
[    8.030000]   [<80428a08>] _raw_spin_lock+0x40/0x50
[    8.030000]   [<802651d8>] stmmac_tx+0x1c/0x388
[    8.030000]   [<80026be0>] run_timer_softirq+0x180/0x23c
[    8.030000]   [<80020ccc>] __do_softirq+0xa0/0x114
[    8.030000]   [<80021204>] irq_exit+0x58/0x7c
[    8.030000]   [<8000dc80>] handle_IRQ+0x7c/0xb8
[    8.030000]   [<80008464>] gic_handle_irq+0x34/0x58
[    8.030000]   [<80429684>] __irq_svc+0x44/0x78
[    8.030000]   [<8001c3f4>] vprintk+0x41c/0x480
[    8.030000]   [<8042097c>] printk+0x18/0x24
[    8.030000]   [<805aef6c>] prepare_namespace+0x1c/0x1a4
[    8.030000]   [<805ae980>] kernel_init+0x1c8/0x20c
[    8.030000]   [<8000deb8>] kernel_thread_exit+0x0/0x8
[    8.030000] irq event stamp: 254745
[    8.030000] hardirqs last  enabled at (254744): [<80429240>] 
_raw_spin_unlock_irqrestore+0x3c/0x6c
[    8.030000] hardirqs last disabled at (254745): [<80429674>] 
__irq_svc+0x34/0x78
[    8.030000] softirqs last  enabled at (254741): [<8035d964>] 
dev_queue_xmit+0x6a4/0x724
[    8.030000] softirqs last disabled at (254737): [<8035d2d4>] 
dev_queue_xmit+0x14/0x724
[    8.030000]
[    8.030000] other info that might help us debug this:
[    8.030000]  Possible unsafe locking scenario:
[    8.030000]
[    8.030000]        CPU0
[    8.030000]        ----
[    8.030000]   lock(&(&priv->tx_lock)->rlock);
[    8.030000]   <Interrupt>
[    8.030000]     lock(&(&priv->tx_lock)->rlock);
[    8.030000]
[    8.030000]  *** DEADLOCK ***

> Therefore, disabling hardware interrupts for this lock is unnecessary
> and will decrease performance.
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>

^ permalink raw reply

* Re: [RFC PATCH 4/5] bnx2fc: Use new fcoe_sysfs control interface
From: Bhanu Prakash Gollapudi @ 2012-09-14  7:28 UTC (permalink / raw)
  To: Robert Love; +Cc: netdev, gregkh, linux-scsi, devel
In-Reply-To: <20120910225930.13140.97949.stgit@fritz>

On 09/10/2012 03:59 PM, Robert Love wrote:
> Convert	bnx2fc to use the new fcoe_sysfs create, delete,
> enable, disable, start and mode.
>
> bnx2fc doesn't support VN2VN. bnx2fc will not initialize
> the set_fcoe_ctlr_mode routine and therefore its instances
> will always be in FABRIC mode. There was previously an
> explicit check for the ctlr's mode, but this is no longer
> needed because not implementing set_fcoe_ctlr_mode implies
> that the ctlr cannot change from the FABRIC mode.
>
> Signed-off-by: Robert Love <robert.w.love@intel.com>
> ---
>   drivers/scsi/bnx2fc/bnx2fc_fcoe.c |   98 +++++++++++++++++++++++--------------
>   1 file changed, 60 insertions(+), 38 deletions(-)
>
> diff --git a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
> index f52f668f..560c8c8 100644
> --- a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
> +++ b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
<snip>

>   /**
> + * bnx2fc_alloc - Alocate a bnx2fc FCoE interface
> + *
> + * @cdev: The FCoE Controller Device to start
> + *
> + * Called from sysfs.
> + *
> + * Returns: 0 for success
> + */
> +static int bnx2fc_start(struct fcoe_ctlr_device *cdev)
> +{
> +	struct fcoe_ctlr *ctlr = fcoe_ctlr_device_priv(cdev);
> +	struct fc_lport *lport = ctlr->lp;
> +	struct fcoe_port *port = lport_priv(lport);
> +	struct bnx2fc_interface *interface = port->priv;
> +
> +	lport->boot_time = jiffies;
> +
> +	/* Make this master N_port */
> +	ctlr->lp = lport;

ctlr->lp should be set in bnx2fc_alloc() as we access it here in the 
beginning of this function.

> +
> +	if (!bnx2fc_link_ok(lport)) {
> +		fcoe_ctlr_link_up(ctlr);
> +		fc_host_port_type(lport->host) = FC_PORTTYPE_NPORT;
> +		set_bit(ADAPTER_STATE_READY, &interface->hba->adapter_state);
> +	}
> +
> +	BNX2FC_HBA_DBG(lport, "create: START DISC\n");
> +	bnx2fc_start_disc(interface);
I think more changes are required for bnx2fc as fc_lport_init() is 
called just before calling fc_fabric_login() - whcih is called during 
'start'. Because of this, if we just call 'create' followed by 'destroy' 
without calling 'start', lport is not initialized and I expect to see 
some panics when destroy is called.

Let me try testing your patches and send you any fixes that are required.

> +	interface->enabled = true;
> +
> +	return 0;
> +}
> +
> +/**
>    * bnx2fc_find_hba_for_cnic - maps cnic instance to bnx2fc hba instance
>    *
>    * @cnic:	Pointer to cnic device instance
> @@ -2271,10 +2292,8 @@ static struct fcoe_transport bnx2fc_transport = {
>   	.attached = false,
>   	.list = LIST_HEAD_INIT(bnx2fc_transport.list),
>   	.match = bnx2fc_match,
> -	.create = bnx2fc_create,
> +	.alloc = bnx2fc_alloc,
>   	.destroy = bnx2fc_destroy,
> -	.enable = bnx2fc_enable,
> -	.disable = bnx2fc_disable,
>   };
>   
>   /**
> @@ -2514,6 +2533,9 @@ module_init(bnx2fc_mod_init);
>   module_exit(bnx2fc_mod_exit);
>   
>   static struct fcoe_sysfs_function_template bnx2fc_fcoe_sysfs_templ = {
> +	.set_fcoe_ctlr_start = bnx2fc_start,
> +	.set_fcoe_ctlr_enable = bnx2fc_enable,
> +	.set_fcoe_ctlr_disable = bnx2fc_disable,
>   	.get_fcoe_ctlr_mode = fcoe_ctlr_get_fip_mode,
>   	.get_fcoe_ctlr_link_fail = bnx2fc_ctlr_get_lesb,
>   	.get_fcoe_ctlr_vlink_fail = bnx2fc_ctlr_get_lesb,
>
>

^ permalink raw reply

* [PATCH net-next 2/2] gre: add GSO support
From: Eric Dumazet @ 2012-09-14  7:25 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Maciej Żenczykowski

From: Eric Dumazet <edumazet@google.com>

Add GSO support to GRE tunnels.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Maciej Żenczykowski <maze@google.com>
---
 net/ipv4/ip_gre.c |   12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index b062a98..f233c1d 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -745,6 +745,10 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 	__be32 dst;
 	int    mtu;
 
+	if (skb->ip_summed == CHECKSUM_PARTIAL &&
+	    skb_checksum_help(skb))
+		goto tx_error;
+
 	if (dev->type == ARPHRD_ETHER)
 		IPCB(skb)->flags = 0;
 
@@ -1296,6 +1300,11 @@ static void ipgre_dev_free(struct net_device *dev)
 	free_netdev(dev);
 }
 
+#define GRE_FEATURES (NETIF_F_SG |		\
+		      NETIF_F_FRAGLIST |	\
+		      NETIF_F_HIGHDMA |		\
+		      NETIF_F_HW_CSUM)
+
 static void ipgre_tunnel_setup(struct net_device *dev)
 {
 	dev->netdev_ops		= &ipgre_netdev_ops;
@@ -1309,6 +1318,9 @@ static void ipgre_tunnel_setup(struct net_device *dev)
 	dev->addr_len		= 4;
 	dev->features		|= NETIF_F_NETNS_LOCAL;
 	dev->priv_flags		&= ~IFF_XMIT_DST_RELEASE;
+
+	dev->features		|= GRE_FEATURES;
+	dev->hw_features	|= GRE_FEATURES;
 }
 
 static int ipgre_tunnel_init(struct net_device *dev)

^ permalink raw reply related

* [PATCH net-next 1/2] net: provide a default dev->ethtool_ops
From: Eric Dumazet @ 2012-09-14  7:24 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Maciej Żenczykowski

From: Eric Dumazet <edumazet@google.com>

Instead of forcing device drivers to provide empty ethtool_ops or tweak
net/core/ethtool.c again, we could provide a generic ethtool_ops.

This occurred to me when I wanted to add GSO support to GRE tunnels.
ethtool -k support should be generic for all drivers.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Maciej Żenczykowski <maze@google.com>
---
 net/core/dev.c     |    5 +++
 net/core/ethtool.c |   54 +++++++++++++++++--------------------------
 2 files changed, 27 insertions(+), 32 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index b1e6d63..ff8dcfc 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6045,6 +6045,11 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
 
 	strcpy(dev->name, name);
 	dev->group = INIT_NETDEV_GROUP;
+	if (!dev->ethtool_ops) {
+		static const struct ethtool_ops default_ethtool_ops;
+
+		dev->ethtool_ops = &default_ethtool_ops;
+	}
 	return dev;
 
 free_all:
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index cbf033d..aef0162 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -175,7 +175,7 @@ static int __ethtool_get_sset_count(struct net_device *dev, int sset)
 	if (sset == ETH_SS_FEATURES)
 		return ARRAY_SIZE(netdev_features_strings);
 
-	if (ops && ops->get_sset_count && ops->get_strings)
+	if (ops->get_sset_count && ops->get_strings)
 		return ops->get_sset_count(dev, sset);
 	else
 		return -EOPNOTSUPP;
@@ -311,7 +311,7 @@ int __ethtool_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 {
 	ASSERT_RTNL();
 
-	if (!dev->ethtool_ops || !dev->ethtool_ops->get_settings)
+	if (!dev->ethtool_ops->get_settings)
 		return -EOPNOTSUPP;
 
 	memset(cmd, 0, sizeof(struct ethtool_cmd));
@@ -355,7 +355,7 @@ static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev,
 
 	memset(&info, 0, sizeof(info));
 	info.cmd = ETHTOOL_GDRVINFO;
-	if (ops && ops->get_drvinfo) {
+	if (ops->get_drvinfo) {
 		ops->get_drvinfo(dev, &info);
 	} else if (dev->dev.parent && dev->dev.parent->driver) {
 		strlcpy(info.bus_info, dev_name(dev->dev.parent),
@@ -370,7 +370,7 @@ static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev,
 	 * this method of obtaining string set info is deprecated;
 	 * Use ETHTOOL_GSSET_INFO instead.
 	 */
-	if (ops && ops->get_sset_count) {
+	if (ops->get_sset_count) {
 		int rc;
 
 		rc = ops->get_sset_count(dev, ETH_SS_TEST);
@@ -383,9 +383,9 @@ static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev,
 		if (rc >= 0)
 			info.n_priv_flags = rc;
 	}
-	if (ops && ops->get_regs_len)
+	if (ops->get_regs_len)
 		info.regdump_len = ops->get_regs_len(dev);
-	if (ops && ops->get_eeprom_len)
+	if (ops->get_eeprom_len)
 		info.eedump_len = ops->get_eeprom_len(dev);
 
 	if (copy_to_user(useraddr, &info, sizeof(info)))
@@ -1275,7 +1275,7 @@ static int ethtool_get_dump_flag(struct net_device *dev,
 	struct ethtool_dump dump;
 	const struct ethtool_ops *ops = dev->ethtool_ops;
 
-	if (!dev->ethtool_ops->get_dump_flag)
+	if (!ops->get_dump_flag)
 		return -EOPNOTSUPP;
 
 	if (copy_from_user(&dump, useraddr, sizeof(dump)))
@@ -1299,8 +1299,7 @@ static int ethtool_get_dump_data(struct net_device *dev,
 	const struct ethtool_ops *ops = dev->ethtool_ops;
 	void *data = NULL;
 
-	if (!dev->ethtool_ops->get_dump_data ||
-		!dev->ethtool_ops->get_dump_flag)
+	if (!ops->get_dump_data || !ops->get_dump_flag)
 		return -EOPNOTSUPP;
 
 	if (copy_from_user(&dump, useraddr, sizeof(dump)))
@@ -1349,7 +1348,7 @@ static int ethtool_get_ts_info(struct net_device *dev, void __user *useraddr)
 
 		err = phydev->drv->ts_info(phydev, &info);
 
-	} else if (dev->ethtool_ops && dev->ethtool_ops->get_ts_info) {
+	} else if (ops->get_ts_info) {
 
 		err = ops->get_ts_info(dev, &info);
 
@@ -1410,8 +1409,9 @@ static int ethtool_get_module_eeprom(struct net_device *dev,
 				      modinfo.eeprom_len);
 }
 
-/* The main entry point in this file.  Called from net/core/dev.c */
-
+/* The main entry point in this file.  Called from net/core/dev.c
+ * with RTNL held.
+ */
 int dev_ethtool(struct net *net, struct ifreq *ifr)
 {
 	struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
@@ -1419,25 +1419,15 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 	u32 ethcmd;
 	int rc;
 	u32 old_features;
+	const struct ethtool_ops *ops;
 
 	if (!dev || !netif_device_present(dev))
 		return -ENODEV;
 
+	ops = dev->ethtool_ops;
 	if (copy_from_user(&ethcmd, useraddr, sizeof(ethcmd)))
 		return -EFAULT;
 
-	if (!dev->ethtool_ops) {
-		/* A few commands do not require any driver support,
-		 * are unprivileged, and do not change anything, so we
-		 * can take a shortcut to them. */
-		if (ethcmd == ETHTOOL_GDRVINFO)
-			return ethtool_get_drvinfo(dev, useraddr);
-		else if (ethcmd == ETHTOOL_GET_TS_INFO)
-			return ethtool_get_ts_info(dev, useraddr);
-		else
-			return -EOPNOTSUPP;
-	}
-
 	/* Allow some commands to be done by anyone */
 	switch (ethcmd) {
 	case ETHTOOL_GSET:
@@ -1476,8 +1466,8 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 			return -EPERM;
 	}
 
-	if (dev->ethtool_ops->begin) {
-		rc = dev->ethtool_ops->begin(dev);
+	if (ops->begin) {
+		rc = ops->begin(dev);
 		if (rc  < 0)
 			return rc;
 	}
@@ -1504,11 +1494,11 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 		break;
 	case ETHTOOL_GMSGLVL:
 		rc = ethtool_get_value(dev, useraddr, ethcmd,
-				       dev->ethtool_ops->get_msglevel);
+				       ops->get_msglevel);
 		break;
 	case ETHTOOL_SMSGLVL:
 		rc = ethtool_set_value_void(dev, useraddr,
-				       dev->ethtool_ops->set_msglevel);
+					    ops->set_msglevel);
 		break;
 	case ETHTOOL_GEEE:
 		rc = ethtool_get_eee(dev, useraddr);
@@ -1570,11 +1560,11 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 		break;
 	case ETHTOOL_GPFLAGS:
 		rc = ethtool_get_value(dev, useraddr, ethcmd,
-				       dev->ethtool_ops->get_priv_flags);
+				       ops->get_priv_flags);
 		break;
 	case ETHTOOL_SPFLAGS:
 		rc = ethtool_set_value(dev, useraddr,
-				       dev->ethtool_ops->set_priv_flags);
+				       ops->set_priv_flags);
 		break;
 	case ETHTOOL_GRXFH:
 	case ETHTOOL_GRXRINGS:
@@ -1655,8 +1645,8 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 		rc = -EOPNOTSUPP;
 	}
 
-	if (dev->ethtool_ops->complete)
-		dev->ethtool_ops->complete(dev);
+	if (ops->complete)
+		ops->complete(dev);
 
 	if (old_features != dev->features)
 		netdev_features_change(dev);

^ permalink raw reply related

* Re: [PATCH] Xen backend support for paged out grant targets.
From: Ian Campbell @ 2012-09-14  7:19 UTC (permalink / raw)
  To: Andres Lagar-Cavilla
  Cc: Andres Lagar-Cavilla, xen-devel@xen.lists.org,
	Konrad Rzeszutek Wilk, David Vrabel, David Miller,
	linux-kernel@vger.kernel.org, netdev@vger.kernel.org
In-Reply-To: <B4C805E1-3390-4002-BECC-7D1AFF2BD56D@gmail.com>

On Thu, 2012-09-13 at 20:45 +0100, Andres Lagar-Cavilla wrote:
> On Sep 13, 2012, at 2:11 PM, Ian Campbell wrote:
> 
> > On Thu, 2012-09-13 at 18:28 +0100, Andres Lagar-Cavilla wrote:
> >> 
> >> * Add placeholder in array of grant table error descriptions for
> >> unrelated error code we jump over. 
> > 
> > Why not just define it, it's listed here:
> > http://xenbits.xen.org/docs/unstable/hypercall/include,public,grant_table.h.html#Enum_grant_status
> Well, a) we'd be defining something no one will be using (for the
> moment)

Even if no one in the kernel is using it, having "placeholder" as an
entry in GNTTABOP_error_msgs is just silly, even things which don't
understand GNTST_address_too_big directly could end up looking it up
here.

>  b) I would be signing-off on something unrelated.

Lets take this patch instead then.

8<------------------------------------------------

>From cb9daaf3029accb6d5fef58b450a625b27190429 Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Fri, 14 Sep 2012 08:10:06 +0100
Subject: [PATCH] xen: resynchronise grant table status codes with upstream

Adds GNTST_address_too_big and GNTST_eagain.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
---
 include/xen/interface/grant_table.h |    8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/include/xen/interface/grant_table.h b/include/xen/interface/grant_table.h
index a17d844..84a8fbf 100644
--- a/include/xen/interface/grant_table.h
+++ b/include/xen/interface/grant_table.h
@@ -519,7 +519,9 @@ DEFINE_GUEST_HANDLE_STRUCT(gnttab_get_version);
 #define GNTST_no_device_space  (-7) /* Out of space in I/O MMU.              */
 #define GNTST_permission_denied (-8) /* Not enough privilege for operation.  */
 #define GNTST_bad_page         (-9) /* Specified page was invalid for op.    */
-#define GNTST_bad_copy_arg    (-10) /* copy arguments cross page boundary */
+#define GNTST_bad_copy_arg    (-10) /* copy arguments cross page boundary.   */
+#define GNTST_address_too_big (-11) /* transfer page address too large.      */
+#define GNTST_eagain          (-12) /* Operation not done; try again.        */
 
 #define GNTTABOP_error_msgs {                   \
     "okay",                                     \
@@ -532,7 +534,9 @@ DEFINE_GUEST_HANDLE_STRUCT(gnttab_get_version);
     "no spare translation slot in the I/O MMU", \
     "permission denied",                        \
     "bad page",                                 \
-    "copy arguments cross page boundary"        \
+    "copy arguments cross page boundary",       \
+    "page address size too large",              \
+    "operation not done; try again"             \
 }
 
 #endif /* __XEN_PUBLIC_GRANT_TABLE_H__ */
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH] iproute2: bridge: finish removing replace option in man pages
From: John Fastabend @ 2012-09-14  6:50 UTC (permalink / raw)
  To: shemminger; +Cc: netdev

This patch finishes removing the replace option from the bridge
man page which I missed in this commit

commit 57b9785de32404da3d2ac5483469b7fcc5a9c9e7
Author: John Fastabend <john.r.fastabend@intel.com>
Date:   Mon Aug 27 10:52:31 2012 -0700

    iproute2: bridge: remove replace and change options

Also add documentation for "{ self | embedded }" already shown on
the cmd line help msg.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
---

 man/man8/bridge.8 |   19 +++++++++++++------
 1 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/man/man8/bridge.8 b/man/man8/bridge.8
index 63d166b..5ce8219 100644
--- a/man/man8/bridge.8
+++ b/man/man8/bridge.8
@@ -22,11 +22,12 @@ bridge \- show / manipulate bridge addresses and devices
 \fB\-s\fR[\fItatistics\fR]
 
 .ti -8
-.BR "bridge fdb" " { " add " | " del " | " change " | " replace " } "
+.BR "bridge fdb" " { " add " | " del " } "
 .I LLADDR
 .B  dev
 .IR DEV " { "
-.BR local " | " temp " }"
+.BR local " | " temp " } { "
+.BR self " } { " embedded " } "
 
 .ti -8
 .BR "bridge fdb" " [ " show " ] [ "
@@ -92,11 +93,9 @@ objects contain known Ethernet addresses on a  link.
 The corresponding commands display fdb entries, add new entries,
 and delete old ones.
 
-.SS bridge fdb add - add a new neighbor entry
-.SS bridge fdb change - change an existing entry
-.SS bridge fdb replace - add a new entry or change an existing one
+.SS bridge fdb add - add a new fdb entry
 
-These commands create new neighbor records or update existing ones.
+This command creates a new fdb entry.
 
 .TP
 .BI "ADDRESS"
@@ -117,6 +116,14 @@ and is never forwarded.
 - the address is a dynamic entry, and will be removed if not used.
 .sp
 
+.B self
+- the address is associated with a software fdb (default)
+.sp
+
+.B embedded
+- the address is associated with an offloaded fdb
+.sp
+
 .in -8
 
 .SS bridge fdb delete - delete a forwarding database entry

^ permalink raw reply related

* Re: [RFC PATCH 2/5] libfcoe: Create new libfcoe control interfaces
From: Bhanu Prakash Gollapudi @ 2012-09-14  7:06 UTC (permalink / raw)
  To: Robert Love; +Cc: netdev, gregkh, linux-scsi, devel
In-Reply-To: <20120910225919.13140.63240.stgit@fritz>

On 9/10/2012 3:59 PM, Robert Love wrote:
> This patch is the first in a series that will remove
> libfcoe's create, destroy, enable and disable module
> parameters and replace them with interface files in
> the new /sys/bus/fcoe subsystem.
>
> Old layout:
>
> /sys/module/libfcoe/parameters/{create,destroy,enable,disable,vn2vn_create}
>
> New layout:
>
> /sys/bus/fcoe/ctlr_{create,destroy}
> /sys/bus/fcoe/ctlr_X/{enable,disable,start}
>
> This patch moves fcoe drivers to the following
> initialization sequence-
>
> 1) create/alloc
> 2) configure
> 3) start
>
> A control sysfs interface at /sys/bus/fcoe/ctlr_create
> is added. Writing the interface name to this file
> will allocate memory and create a sysfs entry for a
> new fcoe_ctlr_device. The user may then tune the interface in
> any desired way. After configuration the user will
> echo any value into the /sys/bus/fcoe/devices/ctlr_X/start
> interface to proceed with logging in.
>
> VN2VN logins will still use the module parameters.
> A follow up patch to this one will make the 'mode'
> attribute of the fcoe_ctlr_device writable. Which will
> allow a user to change the ctlr's mode to 'VN2VN'.
>
> Signed-off-by: Robert Love <robert.w.love@intel.com>
> ---
>   Documentation/ABI/testing/sysfs-bus-fcoe |   43 ++++++++++++
>   drivers/scsi/fcoe/fcoe.h                 |    9 +++
>   drivers/scsi/fcoe/fcoe_ctlr.c            |    2 -
>   drivers/scsi/fcoe/fcoe_sysfs.c           |   78 ++++++++++++++++++++++
>   drivers/scsi/fcoe/fcoe_transport.c       |  105 +++++++++++++++++++++++++++++-
>   include/scsi/fcoe_sysfs.h                |    4 +
>   include/scsi/libfcoe.h                   |   14 ++++
>   7 files changed, 250 insertions(+), 5 deletions(-)
>

<snip>

> diff --git a/drivers/scsi/fcoe/fcoe_ctlr.c b/drivers/scsi/fcoe/fcoe_ctlr.c
> index bd899bf..ccb92323 100644
> --- a/drivers/scsi/fcoe/fcoe_ctlr.c
> +++ b/drivers/scsi/fcoe/fcoe_ctlr.c
> @@ -147,7 +147,7 @@ static void fcoe_ctlr_map_dest(struct fcoe_ctlr *fip)
>    */
>   void fcoe_ctlr_init(struct fcoe_ctlr *fip, enum fip_state mode)
>   {
> -	fcoe_ctlr_set_state(fip, FIP_ST_LINK_WAIT);
> +	fcoe_ctlr_set_state(fip, FIP_ST_DISABLED);

Robert, what is the reason for initializing it to DISABLED? Unless the 
FIP state is FIP_ST_LINK_WAIT, fcoe_ctlr_link_up() doesnt set 
lport->link_up and hence does not allow any FIP/FCoE frames to be sent out.

>   	fip->mode = mode;
>   	INIT_LIST_HEAD(&fip->fcfs);
>   	mutex_init(&fip->ctlr_mutex);

<snip>

> @@ -627,6 +626,108 @@ static int libfcoe_device_notification(struct notifier_block *notifier,
>   	return NOTIFY_OK;
>   }
>
> +ssize_t fcoe_ctlr_create_store(struct bus_type *bus,
> +			       const char *buf, size_t count)
> +{
> +	struct net_device *netdev = NULL;
> +	struct fcoe_transport *ft = NULL;
> +	struct fcoe_ctlr_device *ctlr_dev = NULL;
> +	int rc = -ENODEV;
> +	int err;
> +
> +	mutex_lock(&ft_mutex);
> +
> +	netdev = fcoe_if_to_netdev(buf);
> +	if (!netdev) {
> +		LIBFCOE_TRANSPORT_DBG("Invalid device %s.\n", buf);
> +		rc = -ENODEV;
> +		goto out_nodev;
> +	}
> +
> +	ft = fcoe_netdev_map_lookup(netdev);
> +	if (ft) {
> +		LIBFCOE_TRANSPORT_DBG("transport %s already has existing "
> +				      "FCoE instance on %s.\n",
> +				      ft->name, netdev->name);
> +		rc = -EEXIST;
> +		goto out_putdev;
> +	}
> +
> +	ft = fcoe_transport_lookup(netdev);
> +	if (!ft) {
> +		LIBFCOE_TRANSPORT_DBG("no FCoE transport found for %s.\n",
> +				      netdev->name);
> +		rc = -ENODEV;
> +		goto out_putdev;
> +	}
> +
> +	/* pass to transport create */
> +	err = ft->alloc ? ft->alloc(netdev) : -ENODEV;
> +	if (err) {
> +		fcoe_del_netdev_mapping(netdev);
> +		rc = -ENOMEM;
> +		goto out_putdev;
> +	}
> +
> +	err = fcoe_add_netdev_mapping(netdev, ft);
> +	if (err) {
> +		LIBFCOE_TRANSPORT_DBG("failed to add new netdev mapping "
> +				      "for FCoE transport %s for %s.\n",
> +				      ft->name, netdev->name);
> +		rc = -ENODEV;
> +		goto out_putdev;
> +	}
> +
> +	LIBFCOE_TRANSPORT_DBG("transport %s %s to create fcoe on %s.\n",
> +			      ft->name, (ctlr_dev) ? "succeeded" : "failed",
> +			      netdev->name);

Where is ctlr_dev updated? I guess you're intending to check return 
status of ft->alloc() here.

> +
> +out_putdev:
> +	dev_put(netdev);
> +out_nodev:
> +	mutex_unlock(&ft_mutex);
> +	return rc;
> +}
> +
> +ssize_t fcoe_ctlr_destroy_store(struct bus_type *bus,
> +				const char *buf, size_t count)
> +{
> +	int rc = -ENODEV;
> +	struct net_device *netdev = NULL;
> +	struct fcoe_transport *ft = NULL;
> +
> +	mutex_lock(&ft_mutex);
> +
> +	netdev = fcoe_if_to_netdev(buf);
> +	if (!netdev) {
> +		LIBFCOE_TRANSPORT_DBG("invalid device %s.\n", buf);
> +		goto out_nodev;
> +	}
> +
> +	ft = fcoe_netdev_map_lookup(netdev);
> +	if (!ft) {
> +		LIBFCOE_TRANSPORT_DBG("no FCoE transport found for %s.\n",
> +				      netdev->name);
> +		goto out_putdev;
> +	}
> +
> +	/* pass to transport destroy */
> +	rc = ft->destroy(netdev);
> +	if (rc)
> +		goto out_putdev;
> +
> +	fcoe_del_netdev_mapping(netdev);
> +	LIBFCOE_TRANSPORT_DBG("transport %s %s to destroy fcoe on %s.\n",
> +			      ft->name, (rc) ? "failed" : "succeeded",
> +			      netdev->name);
> +	rc = count; /* required for successful return */
> +out_putdev:
> +	dev_put(netdev);
> +out_nodev:
> +	mutex_unlock(&ft_mutex);
> +	return rc;
> +}
> +EXPORT_SYMBOL(fcoe_ctlr_destroy_store);
>
>   /**
>    * fcoe_transport_create() - Create a fcoe interface
> diff --git a/include/scsi/fcoe_sysfs.h b/include/scsi/fcoe_sysfs.h
> index 421ae67..8c5ea70 100644
> --- a/include/scsi/fcoe_sysfs.h
> +++ b/include/scsi/fcoe_sysfs.h
> @@ -36,6 +36,9 @@ struct fcoe_sysfs_function_template {
>   	void (*get_fcoe_ctlr_fcs_error)(struct fcoe_ctlr_device *);
>   	void (*get_fcoe_ctlr_mode)(struct fcoe_ctlr_device *);
>   	void (*set_fcoe_ctlr_mode)(struct fcoe_ctlr_device *);
> +	int  (*set_fcoe_ctlr_start)(struct fcoe_ctlr_device *);
> +	int  (*set_fcoe_ctlr_enable)(struct fcoe_ctlr_device *);
> +	int  (*set_fcoe_ctlr_disable)(struct fcoe_ctlr_device *);
>   	void (*get_fcoe_fcf_selected)(struct fcoe_fcf_device *);
>   	void (*get_fcoe_fcf_vlan_id)(struct fcoe_fcf_device *);
>   };
> @@ -64,6 +67,7 @@ struct fcoe_ctlr_device {
>
>   	int                             fcf_dev_loss_tmo;
>   	enum fip_conn_type              mode;
> +	u8                              started:1;
>
>   	/* expected in host order for displaying */
>   	struct fcoe_fc_els_lesb         lesb;
> diff --git a/include/scsi/libfcoe.h b/include/scsi/libfcoe.h
> index 20533cc..b19a489 100644
> --- a/include/scsi/libfcoe.h
> +++ b/include/scsi/libfcoe.h
> @@ -289,8 +289,11 @@ static inline bool is_fip_mode(struct fcoe_ctlr *fip)
>    * @attached:	whether this transport is already attached
>    * @list:	list linkage to all attached transports
>    * @match:	handler to allow the transport driver to match up a given netdev
> + * @alloc:      handler to allocate per-instance FCoE structures
> + *		(no discovery or login)
>    * @create:	handler to sysfs entry of create for FCoE instances
> - * @destroy:	handler to sysfs entry of destroy for FCoE instances
> + * @destroy:    handler to delete per-instance FCoE structures
> + *		(frees all memory)
>    * @enable:	handler to sysfs entry of enable for FCoE instances
>    * @disable:	handler to sysfs entry of disable for FCoE instances
>    */
> @@ -299,6 +302,7 @@ struct fcoe_transport {
>   	bool attached;
>   	struct list_head list;
>   	bool (*match) (struct net_device *device);
> +	int (*alloc) (struct net_device *device);
>   	int (*create) (struct net_device *device, enum fip_state fip_mode);
>   	int (*destroy) (struct net_device *device);
>   	int (*enable) (struct net_device *device);
> @@ -375,4 +379,12 @@ struct fcoe_netdev_mapping {
>   int fcoe_transport_attach(struct fcoe_transport *ft);
>   int fcoe_transport_detach(struct fcoe_transport *ft);
>
> +/* sysfs store handler for ctrl_control interface */
> +ssize_t fcoe_ctlr_create_store(struct bus_type *bus,
> +			       const char *buf, size_t count);
> +ssize_t fcoe_ctlr_destroy_store(struct bus_type *bus,
> +				const char *buf, size_t count);
> +
>   #endif /* _LIBFCOE_H */
> +
> +
>
>



^ permalink raw reply

* [PATCH net-next] net: dev: fix incorrect getting net device's name
From: Gao feng @ 2012-09-14  6:58 UTC (permalink / raw)
  To: davem; +Cc: ebiederm, eric.dumazet, netdev, Gao feng

When moving a nic from net namespace A to net namespace B,
in dev_change_net_namesapce,we call __dev_get_by_name to
decide if the netns B has the device has the same name.

if the netns B already has the same named device,we call
dev_get_valid_name to try to get a valid name for this nic in
the netns B,but net_device->nd_net still point to netns A now.

this patch fix it.

Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com>
---
 net/core/dev.c |   28 ++++++++++++++++++++--------
 1 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index b1e6d63..381ea68 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -959,18 +959,30 @@ int dev_alloc_name(struct net_device *dev, const char *name)
 }
 EXPORT_SYMBOL(dev_alloc_name);
 
-static int dev_get_valid_name(struct net_device *dev, const char *name)
+static int dev_alloc_name_ns(struct net *net,
+			     struct net_device *dev,
+			     const char *name)
 {
-	struct net *net;
+	char buf[IFNAMSIZ];
+	int ret;
 
-	BUG_ON(!dev_net(dev));
-	net = dev_net(dev);
+	ret = __dev_alloc_name(net, name, buf);
+	if (ret >= 0)
+		strlcpy(dev->name, buf, IFNAMSIZ);
+	return ret;
+}
+
+static int dev_get_valid_name(struct net *net,
+			      struct net_device *dev,
+			      const char *name)
+{
+	BUG_ON(!net);
 
 	if (!dev_valid_name(name))
 		return -EINVAL;
 
 	if (strchr(name, '%'))
-		return dev_alloc_name(dev, name);
+		return dev_alloc_name_ns(net, dev, name);
 	else if (__dev_get_by_name(net, name))
 		return -EEXIST;
 	else if (dev->name != name)
@@ -1006,7 +1018,7 @@ int dev_change_name(struct net_device *dev, const char *newname)
 
 	memcpy(oldname, dev->name, IFNAMSIZ);
 
-	err = dev_get_valid_name(dev, newname);
+	err = dev_get_valid_name(net, dev, newname);
 	if (err < 0)
 		return err;
 
@@ -5585,7 +5597,7 @@ int register_netdevice(struct net_device *dev)
 
 	dev->iflink = -1;
 
-	ret = dev_get_valid_name(dev, dev->name);
+	ret = dev_get_valid_name(net, dev, dev->name);
 	if (ret < 0)
 		goto out;
 
@@ -6229,7 +6241,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
 		/* We get here if we can't use the current device name */
 		if (!pat)
 			goto out;
-		if (dev_get_valid_name(dev, pat) < 0)
+		if (dev_get_valid_name(net, dev, pat) < 0)
 			goto out;
 	}
 
-- 
1.7.7.6

^ permalink raw reply related

* Re: [RFC/RFT 14/15] rtlwifi: Modify files for addition of rtl8723ae
From: Kalle Valo @ 2012-09-14  6:10 UTC (permalink / raw)
  To: Larry Finger
  Cc: Julian Calaby, linville-2XuSBdqkA4R54TAoqtyWWQ,
	linux-wireless-u79uwXL29TY76Z2rM5mHXA,
	netdev-u79uwXL29TY76Z2rM5mHXA, chaoming_li-kXabqFNEczNtrwSWzY7KCg
In-Reply-To: <505170C6.8090003-tQ5ms3gMjBLk1uMJSBkQmQ@public.gmane.org>

Larry Finger <Larry.Finger-tQ5ms3gMjBLk1uMJSBkQmQ@public.gmane.org> writes:

>> Is this supposed to be bit #29 - I notice that COMP_USB is also bit 29.
>
> Yes, that is OK. One will only be used for PCI-based drivers, and the
> other is obviously for USB. As nearly all the bits of a 32-bit
> quantity are used, I wanted to save one if possible.
>
> In the final version, I'll code this as
>
> #define COMP_USB                       BIT(29)
> #define COMP_EASY_CONCURRENT           COMP_USB
>
> That way will be more obvious.

You could also add a comment explaining it, just to make sure that
everyone understands it.

-- 
Kalle Valo
--
To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* [PATCH net-next] ipv6: recursive check rt->dst.from when call rt6_check_expired
From: roy.qing.li @ 2012-09-14  5:54 UTC (permalink / raw)
  To: gaofeng, netdev

From: Li RongQing <roy.qing.li@gmail.com>

If dst cache dst_a copies from dst_b, and dst_b copies from dst_c, check
if dst_a is expired or not, we should not end with dst_a->dst.from, dst_b,
we should check dst_c.

CC: Gao feng <gaofeng@cn.fujitsu.com>
Signed-off-by: Li RongQing <roy.qing.li@gmail.com>
---
 net/ipv6/route.c |    6 +-----
 1 files changed, 1 insertions(+), 5 deletions(-)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 83dafa5..0607ee3 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -369,15 +369,11 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
 
 static bool rt6_check_expired(const struct rt6_info *rt)
 {
-	struct rt6_info *ort = NULL;
-
 	if (rt->rt6i_flags & RTF_EXPIRES) {
 		if (time_after(jiffies, rt->dst.expires))
 			return true;
 	} else if (rt->dst.from) {
-		ort = (struct rt6_info *) rt->dst.from;
-		return (ort->rt6i_flags & RTF_EXPIRES) &&
-			time_after(jiffies, ort->dst.expires);
+		return rt6_check_expired((struct rt6_info *) rt->dst.from);
 	}
 	return false;
 }
-- 
1.7.4.1

^ permalink raw reply related

* Re: [PATCH] ipconfig: Inform user if carrier is not ready
From: Francois Romieu @ 2012-09-14  5:36 UTC (permalink / raw)
  To: Erwan Velu; +Cc: David Miller, netdev
In-Reply-To: <50525758.1090609@gmail.com>

Erwan Velu <erwanaliasr1@gmail.com> :
[...]
> This patch is just adding a simple message every second telling we are
> waiting the carrier to come up.
> ---
>  net/ipv4/ipconfig.c |    8 ++++++++
>  1 file changed, 8 insertions(+)

The Signed-off-by: line is missing.

> diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
> index 67e8a6b..d9f34b7 100644
> --- a/net/ipv4/ipconfig.c
> +++ b/net/ipv4/ipconfig.c
> @@ -205,6 +205,7 @@ static int __init ic_open_devs(void)
>      struct net_device *dev;
>      unsigned short oflags;
>      unsigned long start;
> +    unsigned int loops=0;

(nit)
	unsigned int loops = 0;

>      last = &ic_first_dev;
>      rtnl_lock();
> @@ -266,6 +267,13 @@ static int __init ic_open_devs(void)
>              if (ic_is_init_dev(dev) && netif_carrier_ok(dev))
>                  goto have_carrier;
> 
> +        loops++;
> +        /* This loop is blocking the boot process until we get the
> carrier or reach the timeout.

Please split it into 80 cols max lines.

[...]
> +         * Every second, we display a short message indicating we
> wait the carrier */

(you can remove this part of the comment)

-- 
Ueimor

^ permalink raw reply

* Re: [PATCH net-next] r8169: use unlimited DMA burst for TX
From: Francois Romieu @ 2012-09-14  5:19 UTC (permalink / raw)
  To: Michal Schmidt; +Cc: netdev, Hayes Wang, Ivan Vecera
In-Reply-To: <1347234926-5263-1-git-send-email-mschmidt@redhat.com>

Michal Schmidt <mschmidt@redhat.com> :
[...]
> Signed-off-by: Michal Schmidt <mschmidt@redhat.com>

Acked-by: Francois Romieu <romieu@fr.zoreil.com>

-- 
Ueimor

^ permalink raw reply

* linux-next: manual merge of the workqueues tree with the net tree
From: Stephen Rothwell @ 2012-09-14  5:34 UTC (permalink / raw)
  To: Tejun Heo; +Cc: linux-next, linux-kernel, David Miller, netdev, Karsten Keil

[-- Attachment #1: Type: text/plain, Size: 523 bytes --]

Hi Tejun,

Today's linux-next merge of the workqueues tree got a conflict in
drivers/isdn/mISDN/hwchannel.c between commit 4b921eda5336 ("mISDN: Fix
wrong usage of flush_work_sync while holding locks") from the  tree and
commit 43829731dd37 ("workqueue: deprecate flush[_delayed]_work_sync()")
from the workqueues tree.

The former supercedes the latter (I think) so I used that and can carry
the fix as necessary (no action is required).

-- 
Cheers,
Stephen Rothwell                    sfr@canb.auug.org.au

[-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply

* Re: [PATCH] bnx2x: fix rx checksum validation for IPv6
From: Eilon Greenstein @ 2012-09-14  5:20 UTC (permalink / raw)
  To: Michal Schmidt
  Cc: Eric Dumazet, netdev, Eric Dumazet, Yaniv Rosner, Yuval Mintz,
	Merav Sicron, Robert Evans, Tom Herbert, Willem de Bruijn,
	David Miller, Havard Skinnemoen
In-Reply-To: <1347578079.8555.141.camel@edumazet-glaptop>

On Fri, 2012-09-14 at 01:14 +0200, Eric Dumazet wrote:
> On Fri, 2012-09-14 at 00:59 +0200, Michal Schmidt wrote:
> > Commit d6cb3e41 "bnx2x: fix checksum validation" caused a performance
> > regression for IPv6. Rx checksum offload does not work. IPv6 packets
> > are passed to the stack with CHECKSUM_NONE.
> > 
> > The hardware obviously cannot perform IP checksum validation for IPv6,
> > because there is no checksum in the IPv6 header. This should not prevent
> > us from setting CHECKSUM_UNNECESSARY.
> > 
> > Tested on BCM57711.
> > 
> > Signed-off-by: Michal Schmidt <mschmidt@redhat.com>
> > ---
> >  drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 12 +++++++-----
> >  1 file changed, 7 insertions(+), 5 deletions(-)
> > 
> > diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
> > index af20c6e..e8e97a7 100644
> > --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
> > +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
> > @@ -662,14 +662,16 @@ void bnx2x_csum_validate(struct sk_buff *skb, union eth_rx_cqe *cqe,
> >  				 struct bnx2x_fastpath *fp,
> >  				 struct bnx2x_eth_q_stats *qstats)
> >  {
> > -	/* Do nothing if no IP/L4 csum validation was done */
> > -
> > +	/* Do nothing if no L4 csum validation was done.
> > +	 * We do not check whether IP csum was validated. For IPv4 we assume
> > +	 * that if the card got as far as validating the L4 csum, it also
> > +	 * validated the IP csum. IPv6 has no IP csum.
> > +	 */
> >  	if (cqe->fast_path_cqe.status_flags &
> > -	    (ETH_FAST_PATH_RX_CQE_IP_XSUM_NO_VALIDATION_FLG |
> > -	     ETH_FAST_PATH_RX_CQE_L4_XSUM_NO_VALIDATION_FLG))
> > +	    ETH_FAST_PATH_RX_CQE_L4_XSUM_NO_VALIDATION_FLG)
> >  		return;
> >  
> > -	/* If both IP/L4 validation were done, check if an error was found. */
> > +	/* If L4 validation was done, check if an error was found. */
> >  
> >  	if (cqe->fast_path_cqe.type_error_flags &
> >  	    (ETH_FAST_PATH_RX_CQE_IP_BAD_XSUM_FLG |
> 
> Thanks for fixing this bug !
> 
> Acked-by: Eric Dumazet <edumazet@google.com>

Indeed - thanks Michal!

Acked-by: Eilon Greenstein <eilong@broadcom.com>

^ permalink raw reply

* Re: [PATCH] netfilter: Allow xt_nat.c and x_tables.c to compiled in
From: Cong Wang @ 2012-09-14  3:33 UTC (permalink / raw)
  To: netfilter-devel; +Cc: netdev
In-Reply-To: <87627hfi69.fsf@xmission.com>

On Fri, 14 Sep 2012 at 02:32 GMT, Eric W. Biederman <ebiederm@xmission.com> wrote:
>
> xt_init in x_tables.c must be called before xt_nat_init in xt_nat.c
> Reorder the makefile so that x_tables.o comes before xt_nat.o in
> netfilter.o.
>
> This allows me to built a kernel with both of these modules compiled in.
>

There is a patch to fix the same issue:
http://1984.lsi.us.es/git/nf-next/commit/?id=00545bec9412d130c77f72a08d6c8b6ad21d4a1


^ permalink raw reply

* [PATCH] netfilter: Allow xt_nat.c and x_tables.c to compiled in
From: Eric W. Biederman @ 2012-09-14  2:32 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, netfilter-devel, Patrick McHardy


xt_init in x_tables.c must be called before xt_nat_init in xt_nat.c
Reorder the makefile so that x_tables.o comes before xt_nat.o in
netfilter.o.

This allows me to built a kernel with both of these modules compiled in.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 net/netfilter/Makefile |    6 +++---
 1 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 98244d4..1f652b6 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -43,6 +43,9 @@ obj-$(CONFIG_NF_CONNTRACK_SANE) += nf_conntrack_sane.o
 obj-$(CONFIG_NF_CONNTRACK_SIP) += nf_conntrack_sip.o
 obj-$(CONFIG_NF_CONNTRACK_TFTP) += nf_conntrack_tftp.o
 
+# generic X tables 
+obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
+
 nf_nat-y	:= nf_nat_core.o nf_nat_proto_unknown.o nf_nat_proto_common.o \
 		   nf_nat_proto_udp.o nf_nat_proto_tcp.o nf_nat_helper.o
 
@@ -64,9 +67,6 @@ obj-$(CONFIG_NF_NAT_TFTP) += nf_nat_tftp.o
 # transparent proxy support
 obj-$(CONFIG_NETFILTER_TPROXY) += nf_tproxy_core.o
 
-# generic X tables 
-obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
-
 # combos
 obj-$(CONFIG_NETFILTER_XT_MARK) += xt_mark.o
 obj-$(CONFIG_NETFILTER_XT_CONNMARK) += xt_connmark.o
-- 
1.7.5.4


^ permalink raw reply related

* linux-next: manual merge of the net-next tree with the net tree
From: Stephen Rothwell @ 2012-09-14  1:18 UTC (permalink / raw)
  To: David Miller, netdev
  Cc: linux-next, linux-kernel, Eric Dumazet, Eric W. Biederman

[-- Attachment #1: Type: text/plain, Size: 1713 bytes --]

Hi all,

Today's linux-next merge of the net-next tree got a conflict in
net/netfilter/nfnetlink_log.c between commit 0626af313957 ("netfilter:
take care of timewait sockets") from the  tree and commit 9eea9515cb5f
("userns: nfnetlink_log: Report socket uids in the log sockets user
namespace") from the net-next tree.

Just context changes. I fixed it up (see below) and can carry the fix as
necessary (no action is required).

-- 
Cheers,
Stephen Rothwell                    sfr@canb.auug.org.au

diff --cc net/netfilter/nfnetlink_log.c
index 5cfb5be,8cb67c4..0000000
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@@ -500,14 -501,16 +502,17 @@@ __build_packet_message(struct nfulnl_in
  	}
  
  	/* UID */
 -	if (skb->sk) {
 -		read_lock_bh(&skb->sk->sk_callback_lock);
 -		if (skb->sk->sk_socket && skb->sk->sk_socket->file) {
 -			struct file *file = skb->sk->sk_socket->file;
 +	sk = skb->sk;
 +	if (sk && sk->sk_state != TCP_TIME_WAIT) {
 +		read_lock_bh(&sk->sk_callback_lock);
 +		if (sk->sk_socket && sk->sk_socket->file) {
 +			struct file *file = sk->sk_socket->file;
- 			__be32 uid = htonl(file->f_cred->fsuid);
- 			__be32 gid = htonl(file->f_cred->fsgid);
+ 			__be32 uid = htonl(from_kuid_munged(inst->peer_user_ns,
+ 							    file->f_cred->fsuid));
+ 			__be32 gid = htonl(from_kgid_munged(inst->peer_user_ns,
+ 							    file->f_cred->fsgid));
+ 			/* need to unlock here since NLA_PUT may goto */
 -			read_unlock_bh(&skb->sk->sk_callback_lock);
 +			read_unlock_bh(&sk->sk_callback_lock);
  			if (nla_put_be32(inst->skb, NFULA_UID, uid) ||
  			    nla_put_be32(inst->skb, NFULA_GID, gid))
  				goto nla_put_failure;

[-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply

* linux-next: manual merge of the net-next tree with the net tree
From: Stephen Rothwell @ 2012-09-14  1:17 UTC (permalink / raw)
  To: David Miller, netdev
  Cc: linux-next, linux-kernel, Eric W. Biederman, Eric Dumazet

[-- Attachment #1: Type: text/plain, Size: 1409 bytes --]

Hi all,

Today's linux-next merge of the net-next tree got a conflict in
net/netfilter/xt_LOG.c between commit 0626af313957 ("netfilter: take care
of timewait sockets") from the net tree and commit 8c6e2a941ae7 ("userns:
Convert xt_LOG to print socket kuids and kgids as uids and gids") from
the net-next tree.

I fixed it up (I think - see below) and can carry the fix as necessary
(no action is required).

-- 
Cheers,
Stephen Rothwell                    sfr@canb.auug.org.au

diff --cc net/netfilter/xt_LOG.c
index 91e9af4,02a2bf4..0000000
--- a/net/netfilter/xt_LOG.c
+++ b/net/netfilter/xt_LOG.c
@@@ -145,19 -145,6 +145,21 @@@ static int dump_tcp_header(struct sbuf
  	return 0;
  }
  
 +static void dump_sk_uid_gid(struct sbuff *m, struct sock *sk)
 +{
 +	if (!sk || sk->sk_state == TCP_TIME_WAIT)
 +		return;
 +
 +	read_lock_bh(&sk->sk_callback_lock);
 +	if (sk->sk_socket && sk->sk_socket->file) {
++		const struct cred *cred = sk->sk_socket->file->f_cred;
 +		sb_add(m, "UID=%u GID=%u ",
- 			sk->sk_socket->file->f_cred->fsuid,
- 			sk->sk_socket->file->f_cred->fsgid);
++			from_kuid_munged(&init_user_ns, cred->fsuid),
++			from_kgid_munged(&init_user_ns, cred->fsgid));
++	}
 +	read_unlock_bh(&sk->sk_callback_lock);
 +}
 +
  /* One level of recursion won't kill us */
  static void dump_ipv4_packet(struct sbuff *m,
  			const struct nf_loginfo *info,

[-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply

* Re: [PATCH net-next] r8169: use unlimited DMA burst for TX
From: Michal Schmidt @ 2012-09-13 23:27 UTC (permalink / raw)
  To: 'Francois Romieu'
  Cc: hayeswang, 'David Miller', netdev, ivecera
In-Reply-To: <CAF7E57619E34A17A56F6D13097342EA@realtek.com.tw>

On 09/11/2012 10:09 AM, hayeswang wrote:
> [Francois Romieu wrote:]
>> Hayes, should we:
>> - mimic Realtek's 8168, 8169 and 810x drivers ?
>> - always set TX_DMA_BURST at the max value ?
>> - do something different (per chipset) ?
>
> Our hw engineer suggets to set unlimited for both TX_DMA_BURST and RX_DMA_BURST
> for all chipsets.

Francois,
as this is exactly what the patch does, would you give an ACK?

Michal

^ permalink raw reply

* Re: [PATCH] bnx2x: fix rx checksum validation for IPv6
From: Eric Dumazet @ 2012-09-13 23:14 UTC (permalink / raw)
  To: Michal Schmidt
  Cc: netdev, Eilon Greenstein, Eric Dumazet, Yaniv Rosner, Yuval Mintz,
	Merav Sicron, Robert Evans, Tom Herbert, Willem de Bruijn,
	David Miller, Havard Skinnemoen
In-Reply-To: <1347577184-8417-1-git-send-email-mschmidt@redhat.com>

On Fri, 2012-09-14 at 00:59 +0200, Michal Schmidt wrote:
> Commit d6cb3e41 "bnx2x: fix checksum validation" caused a performance
> regression for IPv6. Rx checksum offload does not work. IPv6 packets
> are passed to the stack with CHECKSUM_NONE.
> 
> The hardware obviously cannot perform IP checksum validation for IPv6,
> because there is no checksum in the IPv6 header. This should not prevent
> us from setting CHECKSUM_UNNECESSARY.
> 
> Tested on BCM57711.
> 
> Signed-off-by: Michal Schmidt <mschmidt@redhat.com>
> ---
>  drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 12 +++++++-----
>  1 file changed, 7 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
> index af20c6e..e8e97a7 100644
> --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
> +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
> @@ -662,14 +662,16 @@ void bnx2x_csum_validate(struct sk_buff *skb, union eth_rx_cqe *cqe,
>  				 struct bnx2x_fastpath *fp,
>  				 struct bnx2x_eth_q_stats *qstats)
>  {
> -	/* Do nothing if no IP/L4 csum validation was done */
> -
> +	/* Do nothing if no L4 csum validation was done.
> +	 * We do not check whether IP csum was validated. For IPv4 we assume
> +	 * that if the card got as far as validating the L4 csum, it also
> +	 * validated the IP csum. IPv6 has no IP csum.
> +	 */
>  	if (cqe->fast_path_cqe.status_flags &
> -	    (ETH_FAST_PATH_RX_CQE_IP_XSUM_NO_VALIDATION_FLG |
> -	     ETH_FAST_PATH_RX_CQE_L4_XSUM_NO_VALIDATION_FLG))
> +	    ETH_FAST_PATH_RX_CQE_L4_XSUM_NO_VALIDATION_FLG)
>  		return;
>  
> -	/* If both IP/L4 validation were done, check if an error was found. */
> +	/* If L4 validation was done, check if an error was found. */
>  
>  	if (cqe->fast_path_cqe.type_error_flags &
>  	    (ETH_FAST_PATH_RX_CQE_IP_BAD_XSUM_FLG |

Thanks for fixing this bug !

Acked-by: Eric Dumazet <edumazet@google.com>

^ permalink raw reply

* Re: [net-next.git 3/8 (V2)] stmmac: add the initial tx coalesce schema
From: Ben Hutchings @ 2012-09-13 23:11 UTC (permalink / raw)
  To: David Miller; +Cc: peppe.cavallaro, netdev
In-Reply-To: <20120913.173727.314155374058895289.davem@davemloft.net>

On Thu, 2012-09-13 at 17:37 -0400, David Miller wrote:
> From: Ben Hutchings <bhutchings@solarflare.com>
> Date: Thu, 13 Sep 2012 22:10:50 +0100
> 
> > On Thu, 2012-09-13 at 16:46 -0400, David Miller wrote:
> >> From: Ben Hutchings <bhutchings@solarflare.com>
> >> Date: Thu, 13 Sep 2012 21:42:51 +0100
> >> 
> >> Well written NAPI drivers never need to disable hardware interrupts
> >> in their ->poll() method and it's callers, neither should you.
> > 
> > Perhaps you should get round to reviewing netpoll, because it does
> > exactly this.
> 
> Then I don't understand the point you're trying to make.
> 
> Hardware interrupt disabling has absolutely no place in the
> NAPI polling fast paths.
> 
> If NAPI drivers can't be implemented without hardware interrupt
> toggling in ->poll(), we've failed.

Right.

The problem being that NAPI poll functions *are* sometimes called in
hardware interrupt context.  Thus, any spinlock that may be taken by a
NAPI handler, may well need to be taken with spinlock_irq or
spinlock_irqsave elsewhere.  (This is horrible and I think it's well
past time that we ripped the NAPI polling out of netpoll.)

I think you're right that stmmac_tx() (completion handler?) doesn't need
to disable hardware interrupts, but sadly stmmac_xmit() does right now
unless Giuseppe can work out how to make their interaction lockless.

Ben.

-- 
Ben Hutchings, Staff Engineer, Solarflare
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.

^ permalink raw reply

* [PATCH] bnx2x: fix rx checksum validation for IPv6
From: Michal Schmidt @ 2012-09-13 22:59 UTC (permalink / raw)
  To: netdev
  Cc: Eilon Greenstein, Eric Dumazet, Yaniv Rosner, Yuval Mintz,
	Merav Sicron, Robert Evans, Tom Herbert, Willem de Bruijn,
	David Miller

Commit d6cb3e41 "bnx2x: fix checksum validation" caused a performance
regression for IPv6. Rx checksum offload does not work. IPv6 packets
are passed to the stack with CHECKSUM_NONE.

The hardware obviously cannot perform IP checksum validation for IPv6,
because there is no checksum in the IPv6 header. This should not prevent
us from setting CHECKSUM_UNNECESSARY.

Tested on BCM57711.

Signed-off-by: Michal Schmidt <mschmidt@redhat.com>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index af20c6e..e8e97a7 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -662,14 +662,16 @@ void bnx2x_csum_validate(struct sk_buff *skb, union eth_rx_cqe *cqe,
 				 struct bnx2x_fastpath *fp,
 				 struct bnx2x_eth_q_stats *qstats)
 {
-	/* Do nothing if no IP/L4 csum validation was done */
-
+	/* Do nothing if no L4 csum validation was done.
+	 * We do not check whether IP csum was validated. For IPv4 we assume
+	 * that if the card got as far as validating the L4 csum, it also
+	 * validated the IP csum. IPv6 has no IP csum.
+	 */
 	if (cqe->fast_path_cqe.status_flags &
-	    (ETH_FAST_PATH_RX_CQE_IP_XSUM_NO_VALIDATION_FLG |
-	     ETH_FAST_PATH_RX_CQE_L4_XSUM_NO_VALIDATION_FLG))
+	    ETH_FAST_PATH_RX_CQE_L4_XSUM_NO_VALIDATION_FLG)
 		return;
 
-	/* If both IP/L4 validation were done, check if an error was found. */
+	/* If L4 validation was done, check if an error was found. */
 
 	if (cqe->fast_path_cqe.type_error_flags &
 	    (ETH_FAST_PATH_RX_CQE_IP_BAD_XSUM_FLG |
-- 
1.7.11.4

^ permalink raw reply related

* [PATCH] ipconfig: Inform user if carrier is not ready
From: Erwan Velu @ 2012-09-13 21:59 UTC (permalink / raw)
  To: David Miller; +Cc: netdev
In-Reply-To: <20120913.164525.1171098883605242394.davem@davemloft.net>

From: Erwan Velu <erwanaliasr1@gmail.com>

While using the ip= option at the cmdline, the kernel can hold the boot
process for 2 minutes (CONF_CARRIER_TIMEOUT) if the carrier is not
present.

While waiting the carrier, user is not informed about this situation and
so could think the kernel is frozen.

This patch is just adding a simple message every second telling we are
waiting the carrier to come up.
---
  net/ipv4/ipconfig.c |    8 ++++++++
  1 file changed, 8 insertions(+)

diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 67e8a6b..d9f34b7 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -205,6 +205,7 @@ static int __init ic_open_devs(void)
      struct net_device *dev;
      unsigned short oflags;
      unsigned long start;
+    unsigned int loops=0;

      last = &ic_first_dev;
      rtnl_lock();
@@ -266,6 +267,13 @@ static int __init ic_open_devs(void)
              if (ic_is_init_dev(dev) && netif_carrier_ok(dev))
                  goto have_carrier;

+        loops++;
+        /* This loop is blocking the boot process until we get the 
carrier or reach the timeout.
+         * We have to inform the user about the situation as it could 
look like a kernel freeze.
+         * Every second, we display a short message indicating we wait 
the carrier */
+        if ((loops % 1000) == 0) {
+            pr_info("IP-Config: Waiting Carrier (%d/%d):\n",loops / 
1000, CONF_CARRIER_TIMEOUT / 1000);
+        }
          msleep(1);
      }
  have_carrier:
-- 
1.7.10

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox