Netdev List
 help / color / mirror / Atom feed
* Re: pull request: wireless-next 2012-11-28
From: David Miller @ 2012-11-28 23:05 UTC (permalink / raw)
  To: linville; +Cc: linux-wireless, netdev
In-Reply-To: <20121128192352.GB9118@tuxdriver.com>

From: "John W. Linville" <linville@tuxdriver.com>
Date: Wed, 28 Nov 2012 14:23:52 -0500

> This pull request is intended for the 3.8 stream.  It is a bit large
> -- I guess Thanksgiving got me off track!  At least the code got to
> spend some time in linux-next... :-)

Wow, that's a lot.

Pulled, thanks.

I'll push it out after I do a bunch of build tests.

Thanks.

^ permalink raw reply

* Re: [PATCH] net: ICMPv6 packets transmitted on wrong interface if nfmark is mangled
From: David Miller @ 2012-11-28 23:30 UTC (permalink / raw)
  To: dries.dewinter; +Cc: pablo, kaber, netdev, netfilter-devel
In-Reply-To: <14515182.2480.1354093791878.JavaMail.driesdw@sahwcmp0020>

From: Dries De Winter <dries.dewinter@gmail.com>
Date: Wed, 28 Nov 2012 10:09:55 +0100 (CET)

> I propose a patch which allows to mark a dst_entry as "non-reroutable".
> icmp6_dst_alloc() (used by ndisc and MLD implementation) will always mark the
> allocated dst_entry as such. A check is added to netfilter (IPv6-only) so
> packets heading for a non-reroutable destination are never rerouted.

What about addrconf_dst_alloc()?  Shouldn't it have this new flag set
as well?

Regardless of the answer to that question, it should be explained
in the commit message.

Thanks.


^ permalink raw reply

* Re: [PATCH 1/1] Introduce notification events for routing changes
From: Jozsef Kadlecsik @ 2012-11-28 23:34 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, netfilter-devel
In-Reply-To: <20121128.175929.618012822860698468.davem@davemloft.net>

On Wed, 28 Nov 2012, David Miller wrote:

> > The netfilter MASQUERADE target does not handle the case when the routing
> > changes and the source address of existing connections become invalid.
> > The problem can be solved if routing modifications create events to which
> > the MASQUERADE target can subscribe and then delete the affected
> > connections.
> > 
> > The patch adds the required event support for IPv4/IPv6.
> > 
> > Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
> 
> What part of the information are you actually interested in?

Actually, just the pointer to struct net is used.

> Because just saying that a route is added or removed using fib_info X
> doesn't tell you a whole lot.

We have to scan the whole conntrack table to find out which entries are 
affected by the routing change, whatever it was. More precisely, for which 
entry did the output interface changed? The output interface is stored in 
the nat part of conntrack for MASQUERADE, so it can directly be compared 
to the result of the route lookup.
 
> fib_info only encapsulates the information that can be shared heaving
> with many ipv4 routes.  It doesn't include the TOS or other aspects
> stored in the fib_alias part.  I can only guess that you did not
> use fib_alias in order to avoid having to export that structure to
> the callers, as it is currently private to net/ipv4/
> 
> The notifier doesn't seem to distinguish between adds or removes
> either, making it less useful in another way.
> 
> I would suggest passing a super-structure that gives the event type:
> 
> 	struct route_changed_info {
> 		enum {
> 			add,
> 			remove,
> 		} event_type;
> 		void *data;
> 	};
> 
> or something like that.
> 
> Can you also show us exactly how this will be used?  Otherwise we
> have to guess.

Yes, sure. Here follows the patch against conntrack/MASQUERADE:

diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h
index bd8eea7..65b1b51 100644
--- a/include/net/netfilter/nf_nat.h
+++ b/include/net/netfilter/nf_nat.h
@@ -46,6 +46,12 @@ struct nf_conn_nat {
     defined(CONFIG_IP_NF_TARGET_MASQUERADE_MODULE) || \
     defined(CONFIG_IP6_NF_TARGET_MASQUERADE) || \
     defined(CONFIG_IP6_NF_TARGET_MASQUERADE_MODULE)
+    	union {
+#if IS_ENABLED(CONFIG_IP6_NF_TARGET_MASQUERADE)
+		__be32 flowlabel;
+#endif
+		u8 tos;
+	} u;
 	int masq_index;
 #endif
 };
diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h
index 1644cdd..3b47b32 100644
--- a/include/uapi/linux/netfilter/nf_conntrack_common.h
+++ b/include/uapi/linux/netfilter/nf_conntrack_common.h
@@ -87,6 +87,10 @@ enum ip_conntrack_status {
 	/* Conntrack got a helper explicitly attached via CT target. */
 	IPS_HELPER_BIT = 13,
 	IPS_HELPER = (1 << IPS_HELPER_BIT),
+
+	/* Conntrack must be deleted when routing changed (MASQUERADE). */
+	IPS_ROUTING_DEPENDENT_BIT = 14,
+	IPS_ROUTING_DEPENDENT = (1 << IPS_ROUTING_DEPENDENT_BIT),
 };
 
 /* Connection tracking event types */
diff --git a/include/uapi/linux/netfilter/nf_nat.h b/include/uapi/linux/netfilter/nf_nat.h
index bf0cc37..a0dfac7 100644
--- a/include/uapi/linux/netfilter/nf_nat.h
+++ b/include/uapi/linux/netfilter/nf_nat.h
@@ -8,6 +8,7 @@
 #define NF_NAT_RANGE_PROTO_SPECIFIED	2
 #define NF_NAT_RANGE_PROTO_RANDOM	4
 #define NF_NAT_RANGE_PERSISTENT		8
+#define NF_NAT_ROUTING_DEPENDENT	16
 
 struct nf_nat_ipv4_range {
 	unsigned int			flags;
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index 5d5d4d1..1056d99 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -19,6 +19,7 @@
 #include <net/ip.h>
 #include <net/checksum.h>
 #include <net/route.h>
+#include <net/ip_fib.h>
 #include <linux/netfilter_ipv4.h>
 #include <linux/netfilter/x_tables.h>
 #include <net/netfilter/nf_nat.h>
@@ -88,6 +89,11 @@ masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par)
 	newrange.min_proto   = mr->range[0].min;
 	newrange.max_proto   = mr->range[0].max;
 
+	if (mr->range[0].flags & NF_NAT_ROUTING_DEPENDENT) {
+		nat->u.tos = RT_TOS(ip_hdr(skb)->tos);
+		set_bit(IPS_ROUTING_DEPENDENT, &ct->status);
+	}
+
 	/* Hand modified range to generic setup. */
 	return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC);
 }
@@ -132,6 +138,74 @@ static int masq_inet_event(struct notifier_block *this,
 	return masq_device_event(this, event, dev);
 }
 
+struct nf_net_fl4 {
+	struct net *net;
+	struct flowi4 fl4;
+	struct fib_result res;
+};
+
+static int
+route_cmp(struct nf_conn *ct, void *ptr)
+{
+	const struct nf_conn_nat *nat = nfct_nat(ct);
+	struct nf_net_fl4 *nf = ptr;
+	int ret, found = 0;
+
+	if (!nat)
+		return 0;
+	if (nf_ct_l3num(ct) != NFPROTO_IPV4)
+		return 0;
+	if (!test_bit(IPS_ROUTING_DEPENDENT, &ct->status))
+		return 0;
+
+	/* We don't have an skb and have to re-check the routing */
+	nf->fl4.daddr = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip;
+	nf->fl4.saddr = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip;
+	nf->fl4.flowi4_tos = nat->u.tos;
+#if defined(CONFIG_NF_CONNTRACK_MARK)
+	nf->fl4.flowi4_mark = ct->mark;
+#endif
+	rcu_read_lock();
+	if (!fib_lookup(nf->net, &nf->fl4, &nf->res)) {
+		rcu_read_unlock();
+		/* Routing changed and no route. Purge the entry */
+		return 1;
+	}
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+	for (ret = 0; ret < nf->res.fi->fib_nhs; ret++) {
+		struct fib_nh *nh = &nf->res.fi->fib_nh[ret];
+
+		if (nat->masq_index != (int)(long)nh->nh_dev->ifindex) {
+			found = 1;
+			break;
+		}
+	}
+#else
+	found = nat->masq_index != (int)(long)FIB_RES_DEV(nf->res)->ifindex;
+#endif
+	rcu_read_unlock();
+	return found;
+}
+
+static int masq_route_event(struct notifier_block *this,
+			    unsigned long event,
+			    void *ptr)
+{
+	struct net *net = ((struct fib_info *)ptr)->fib_net;
+	struct nf_net_fl4 nf = {
+		.net = net,
+		.fl4 = {
+			.flowi4_scope = RT_SCOPE_UNIVERSE,
+		},
+	};
+
+	if (event == NETDEV_ROUTE_CHANGED)
+		/* Routing changed, delete marked entries */
+		nf_ct_iterate_cleanup(net, route_cmp, (void *)&nf);
+
+	return NOTIFY_DONE;
+}
+
 static struct notifier_block masq_dev_notifier = {
 	.notifier_call	= masq_device_event,
 };
@@ -140,6 +214,10 @@ static struct notifier_block masq_inet_notifier = {
 	.notifier_call	= masq_inet_event,
 };
 
+static struct notifier_block masq_route_notifier = {
+	.notifier_call	= masq_route_event,
+};
+
 static struct xt_target masquerade_tg_reg __read_mostly = {
 	.name		= "MASQUERADE",
 	.family		= NFPROTO_IPV4,
@@ -162,6 +240,8 @@ static int __init masquerade_tg_init(void)
 		register_netdevice_notifier(&masq_dev_notifier);
 		/* Register IP address change reports */
 		register_inetaddr_notifier(&masq_inet_notifier);
+		/* Register route change reports */
+		register_iproute_notifier(&masq_route_notifier);
 	}
 
 	return ret;
@@ -172,6 +252,7 @@ static void __exit masquerade_tg_exit(void)
 	xt_unregister_target(&masquerade_tg_reg);
 	unregister_netdevice_notifier(&masq_dev_notifier);
 	unregister_inetaddr_notifier(&masq_inet_notifier);
+	unregister_iproute_notifier(&masq_route_notifier);
 }
 
 module_init(masquerade_tg_init);
diff --git a/net/ipv6/netfilter/ip6t_MASQUERADE.c b/net/ipv6/netfilter/ip6t_MASQUERADE.c
index 60e9053..20dfa1d 100644
--- a/net/ipv6/netfilter/ip6t_MASQUERADE.c
+++ b/net/ipv6/netfilter/ip6t_MASQUERADE.c
@@ -19,6 +19,8 @@
 #include <net/netfilter/nf_nat.h>
 #include <net/addrconf.h>
 #include <net/ipv6.h>
+#include <net/ip6_route.h>
+#include <uapi/linux/route.h>
 
 static unsigned int
 masquerade_tg6(struct sk_buff *skb, const struct xt_action_param *par)
@@ -45,6 +47,12 @@ masquerade_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 	newrange.min_proto	= range->min_proto;
 	newrange.max_proto	= range->max_proto;
 
+	if (range->flags & NF_NAT_ROUTING_DEPENDENT) {
+		nfct_nat(ct)->u.flowlabel =
+			(* (__be32 *) ipv6_hdr(skb)) & IPV6_FLOWINFO_MASK;
+		set_bit(IPS_ROUTING_DEPENDENT, &ct->status);
+	}
+
 	return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC);
 }
 
@@ -97,6 +105,65 @@ static struct notifier_block masq_inet_notifier = {
 	.notifier_call	= masq_inet_event,
 };
 
+struct nf_net_fl6 {
+	struct net *net;
+	struct flowi6 fl6;
+};
+
+static int
+route_cmp(struct nf_conn *ct, void *ptr)
+{
+	const struct nf_conn_nat *nat = nfct_nat(ct);
+	struct nf_net_fl6 *nf = ptr;
+	struct rt6_info *rt;
+	int ret;
+
+	if (!nat)
+		return 0;
+	if (nf_ct_l3num(ct) != NFPROTO_IPV6)
+		return 0;
+	if (!test_bit(IPS_ROUTING_DEPENDENT, &ct->status))
+		return 0;
+
+	/* We don't have an skb and have to re-check the routing */
+	nf->fl6.daddr = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in6;
+	nf->fl6.saddr = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.in6;
+	nf->fl6.flowlabel = nat->u.flowlabel;
+	nf->fl6.flowi6_proto =
+		ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum;
+#if defined(CONFIG_NF_CONNTRACK_MARK)
+	nf->fl6.flowi6_mark = ct->mark;
+#endif
+	rt = (void *) ip6_route_lookup(nf->net, &nf->fl6,
+				       RT6_LOOKUP_F_HAS_SADDR);
+	ret = rt->dst.error ||
+	      (rt->rt6i_flags & RTF_REJECT) ||
+	      nat->masq_index != (int)(long)rt->rt6i_idev->dev->ifindex;
+
+	dst_release(&rt->dst);
+	return ret;
+}
+
+static int masq_route_event(struct notifier_block *this,
+			    unsigned long event,
+			    void *ptr)
+{
+	struct net *net = dev_net(((struct rt6_info *)ptr)->dst.dev);
+	struct nf_net_fl6 nf = {
+		.net = net,
+	};
+
+	if (event == NETDEV_ROUTE_CHANGED)
+		/* Routing changed, delete marked entries */
+		nf_ct_iterate_cleanup(net, route_cmp, (void *)&nf);
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block masq_route_notifier = {
+	.notifier_call	= masq_route_event,
+};
+
 static struct xt_target masquerade_tg6_reg __read_mostly = {
 	.name		= "MASQUERADE",
 	.family		= NFPROTO_IPV6,
@@ -116,12 +183,14 @@ static int __init masquerade_tg6_init(void)
 	if (err == 0) {
 		register_netdevice_notifier(&masq_dev_notifier);
 		register_inet6addr_notifier(&masq_inet_notifier);
+		register_ip6route_notifier(&masq_route_notifier);
 	}
 
 	return err;
 }
 static void __exit masquerade_tg6_exit(void)
 {
+	unregister_ip6route_notifier(&masq_route_notifier);
 	unregister_inet6addr_notifier(&masq_inet_notifier);
 	unregister_netdevice_notifier(&masq_dev_notifier);
 	xt_unregister_target(&masquerade_tg6_reg);


Best regards,
Jozsef
-
E-mail  : kadlec@blackhole.kfki.hu, kadlecsik.jozsef@wigner.mta.hu
PGP key : http://www.kfki.hu/~kadlec/pgp_public_key.txt
Address : Wigner Research Centre for Physics, Hungarian Academy of Sciences
          H-1525 Budapest 114, POB. 49, Hungary

^ permalink raw reply related

* Re: Re: RTL 8169  linux driver question
From: Francois Romieu @ 2012-11-28 23:18 UTC (permalink / raw)
  To: David Laight; +Cc: Stéphane ANCELOT, netdev, sancelot, Hayes Wang
In-Reply-To: <AE90C24D6B3A694183C094C60CF0A2F6026B70CA@saturn3.aculab.com>

David Laight <David.Laight@ACULAB.COM> :
[David's life]


The version below fixes several bugs and refuses the frame or timing
values it can't set. Hayes's Tx parameters still need to be pluged
into rtl_coalesce_scale.

Rx delays seem lower than what I had expected when testing with a 8168b
(XID 18000000).

diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index 248f883..d2594b1 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -349,6 +349,12 @@ enum rtl_registers {
 	RxMaxSize	= 0xda,
 	CPlusCmd	= 0xe0,
 	IntrMitigate	= 0xe2,
+
+#define RTL_COALESCE_MASK	0x0f
+#define RTL_COALESCE_SHIFT	4
+#define RTL_COALESCE_T_MAX	(RTL_COALESCE_MASK)
+#define RTL_COALESCE_FRAME_MAX	(RTL_COALESCE_MASK << 2)
+
 	RxDescAddrLow	= 0xe4,
 	RxDescAddrHigh	= 0xe8,
 	EarlyTxThres	= 0xec,	/* 8169. Unit of 32 bytes. */
@@ -1997,10 +2003,121 @@ static void rtl8169_get_strings(struct net_device *dev, u32 stringset, u8 *data)
 	}
 }
 
+static struct rtl_coalesce_scale {
+	u32 speed;
+	/* Rx / Tx */
+	u16 usecs[2];
+} rtl_coalesce_info[] = {
+	{ .speed = SPEED_10,	.usecs = { 8000, 10000 } },
+	{ .speed = SPEED_100,	.usecs = { 1000,  1000 } },
+	{ .speed = SPEED_1000,	.usecs = {  125,   125 } }
+};
+
+static struct rtl_coalesce_scale *rtl_coalesce_scale(struct net_device *dev)
+{
+	struct ethtool_cmd ecmd;
+	int rc, i;
+
+	rc = rtl8169_get_settings(dev, &ecmd);
+	if (rc < 0)
+		return ERR_PTR(rc);
+
+	for (i = 0; i < ARRAY_SIZE(rtl_coalesce_info); i++) {
+		if (ethtool_cmd_speed(&ecmd) == rtl_coalesce_info[i].speed)
+			return rtl_coalesce_info + i;
+	}
+
+	return ERR_PTR(-EINVAL);
+}
+
+static int rtl_get_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
+{
+	struct rtl8169_private *tp = netdev_priv(dev);
+	void __iomem *ioaddr = tp->mmio_addr;
+	struct rtl_coalesce_scale *scale;
+	struct {
+		u32 *max_frames;
+		u32 *usecs;
+	} coal_settings [] = {
+		{ &ec->rx_max_coalesced_frames, &ec->rx_coalesce_usecs },
+		{ &ec->tx_max_coalesced_frames, &ec->tx_coalesce_usecs }
+	}, *p = coal_settings;
+	int i;
+	u16 w;
+
+	memset(ec, 0, sizeof(*ec));
+
+	for (w = RTL_R16(IntrMitigate); w; w >>= RTL_COALESCE_SHIFT, p++) {
+		*p->max_frames = (w & RTL_COALESCE_MASK) << 2;
+		w >>= RTL_COALESCE_SHIFT;
+		*p->usecs = w & RTL_COALESCE_MASK;
+	}
+
+	/* Except for null parameeters, the meaning of coalescing parameters
+	 * depends on the link speed.
+	 */
+	scale = rtl_coalesce_scale(dev);
+	if (IS_ERR(scale) && (p != coal_settings))
+		return PTR_ERR(scale);
+
+	for (i = 0; i < 2; i++) {
+		p = coal_settings + i;
+		*p->usecs *= scale->usecs[i];
+		if (!*p->usecs && !*p->max_frames)
+			*p->max_frames = 1;
+	}
+
+	return 0;
+}
+
+static int rtl_set_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
+{
+	struct rtl8169_private *tp = netdev_priv(dev);
+	void __iomem *ioaddr = tp->mmio_addr;
+	struct rtl_coalesce_scale *scale;
+	struct {
+		u32 frames;
+		u32 usecs;
+	} coal_settings [] = {
+		{ ec->rx_max_coalesced_frames, ec->rx_coalesce_usecs },
+		{ ec->tx_max_coalesced_frames, ec->tx_coalesce_usecs }
+	}, *p = coal_settings;
+	u16 w = 0;
+	int i;
+
+	scale = rtl_coalesce_scale(dev);
+
+	for (i = 0; i < 2; i++, p++) {
+		u32 units;
+
+		if (p->usecs || p->frames != 1) {
+			if (IS_ERR(scale))
+				return PTR_ERR(scale);
+		} else
+			p->frames = 0;
+
+		units = p->usecs / scale->usecs[i];
+		if (units > RTL_COALESCE_T_MAX || p->usecs % scale->usecs[i] ||
+		    p->frames > RTL_COALESCE_FRAME_MAX || p->frames % 4)
+			return -EINVAL;
+
+		w <<= RTL_COALESCE_SHIFT;
+		w |= units;
+		w <<= RTL_COALESCE_SHIFT;
+		w |= p->frames >> 2;
+	}
+
+	RTL_W16(IntrMitigate, swab16(w));
+
+	return 0;
+}
+
 static const struct ethtool_ops rtl8169_ethtool_ops = {
 	.get_drvinfo		= rtl8169_get_drvinfo,
 	.get_regs_len		= rtl8169_get_regs_len,
 	.get_link		= ethtool_op_get_link,
+	.get_coalesce		= rtl_get_coalesce,
+	.set_coalesce		= rtl_set_coalesce,
 	.get_settings		= rtl8169_get_settings,
 	.set_settings		= rtl8169_set_settings,
 	.get_msglevel		= rtl8169_get_msglevel,

^ permalink raw reply related

* [net-next PATCH v1] 8021q: fix vlan device to inherit the unicast filtering capability flag
From: Yi Zou @ 2012-11-28 23:45 UTC (permalink / raw)
  To: netdev; +Cc: devel

This bug is observed on running FCoE over a VLAN device associated w/
a real device that has IFF_UNICAST_FLT set since FCoE would add unicast
address such as FLOGI MAC to the VLAN interface that FCoE is on. Since
currently, VLAN device is not inheriting the IFF_UNICAST_FLT flag from the
parent real device even though the real device is capable of doing unicast
filtering. This forces the VLAN device and its real device go to promiscuous
mode unnecessarily even the added address is actually being added to the
available unicast filter table in real device.

Signed-off-by: Yi Zou <yi.zou@intel.com>
Cc: devel@open-fcoe.org
---

 net/8021q/vlan.c |    1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index ee07072..ca03c17 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -242,6 +242,7 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id)
 	 * hope the underlying device can handle it.
 	 */
 	new_dev->mtu = real_dev->mtu;
+	new_dev->priv_flags |= (real_dev->priv_flags & IFF_UNICAST_FLT);
 
 	vlan_dev_priv(new_dev)->vlan_id = vlan_id;
 	vlan_dev_priv(new_dev)->real_dev = real_dev;

^ permalink raw reply related

* Re: BUG: scheduling while atomic: ifup-bonding/3711/0x00000002 -- V3.6.7
From: Linda Walsh @ 2012-11-29  1:04 UTC (permalink / raw)
  To: Cong Wang; +Cc: LKML, Linux Kernel Network Developers
In-Reply-To: <50B67F6B.6050008@tlinx.org>


Cong Wang wrote:
> On Wed, Nov 28, 2012 at 4:37 AM, Linda Walsh <lkml@tlinx.org> wrote:  
>> Is this a known problem / bug, or should I file a bug on it? 
> Does this quick fix help?
> ...
> Thanks!
>   

    Applied:
--- bond_main.c.orig  2012-09-30 16:47:46.000000000 -0700
+++ bond_main.c 2012-11-28 12:58:34.064931997 -0800
@@ -1778,7 +1778,9 @@
    new_slave->link == BOND_LINK_DOWN ? "DOWN" :
      (new_slave->link == BOND_LINK_UP ? "UP" : "BACK"));
 
+ read_unlock(&bond->lock);
  bond_update_speed_duplex(new_slave);
+ read_lock(&bond->lock);
 
  if (USES_PRIMARY(bond->params.mode) && bond->params.primary[0]) {
    /* if there is a primary slave, remember it */
----
Recompile/run:
Linux Ishtar 3.6.8-Isht-Van #4 SMP PREEMPT Wed Nov 28 12:59:13 PST 2012 
x86_64 x86_64 x86_64 GNU/Linux

---

Similar.  The tracebacks are below.

Since I am running in round-robin, trying for RAID0 of the 2 links--
simple bandwidth aggregation, do I even need miimon?  I mean, what load
is there to balance?

Not that this is likely the root of the bug, but it might make it
not happen in my case, if I remove the load-bal stuff...??




[   52.457633] bonding: bond0: Adding slave p2p1.
[   52.941390] bonding: bond0: enslaving p2p1 as an active interface 
with a down link.
[   52.959329] bonding: bond0: Adding slave p2p2.
[   53.442769] bonding: bond0: enslaving p2p2 as an active interface 
with a down link.
[   58.588410] ixgbe 0000:06:00.0: p2p1: NIC Link is Up 10 Gbps, Flow 
Control: None
[   58.666760] BUG: scheduling while atomic: kworker/u:1/103/0x00000002
[   58.673144] 4 locks held by kworker/u:1/103:
[   58.673145]  #0:  ((bond_dev->name)){......}, at: 
[<ffffffff8105a956>] process_one_work+0x146/0x680
[   58.673161]  #1:  ((&(&bond->mii_work)->work)){......}, at: 
[<ffffffff8105a956>] process_one_work+0x146/0x680
[   58.673167]  #2:  (rtnl_mutex){......}, at: [<ffffffff815a4dd0>] 
rtnl_trylock+0x10/0x20
[   58.673175]  #3:  (&bond->lock){......}, at: [<ffffffff81480b5d>] 
bond_mii_monitor+0x2ed/0x640
[   58.673183] Modules linked in: fan kvm_intel mousedev kvm iTCO_wdt 
iTCO_vendor_support acpi_cpufreq tpm_tis tpm tpm_bios mperf processor
[   58.673196] Pid: 103, comm: kworker/u:1 Not tainted 3.6.8-Isht-Van #4
[   58.673198] Call Trace:
[   58.673203]  [<ffffffff8167bb36>] __schedule_bug+0x5e/0x6c
[   58.673208]  [<ffffffff816859bc>] __schedule+0x77c/0x810
[   58.673211]  [<ffffffff81685ad4>] schedule+0x24/0x70
[   58.673214]  [<ffffffff81684bec>] 
schedule_hrtimeout_range_clock+0xfc/0x140
[   58.673218]  [<ffffffff81064c80>] ? update_rmtp+0x60/0x60
[   58.673222]  [<ffffffff81065a1f>] ? hrtimer_start_range_ns+0xf/0x20
[   58.673225]  [<ffffffff81684c3e>] schedule_hrtimeout_range+0xe/0x10
[   58.673229]  [<ffffffff8104bddb>] usleep_range+0x3b/0x40
[   58.673235]  [<ffffffff814d220c>] ixgbe_acquire_swfw_sync_X540+0xbc/0x110
[   58.673238]  [<ffffffff814ce4dd>] ixgbe_read_phy_reg_generic+0x3d/0x120
[   58.673241]  [<ffffffff814ce74c>] 
ixgbe_get_copper_link_capabilities_generic+0x2c/0x60
[   58.673244]  [<ffffffff81480b5d>] ? bond_mii_monitor+0x2ed/0x640
[   58.673248]  [<ffffffff814c6454>] ixgbe_get_settings+0x34/0x2b0
[   58.673253]  [<ffffffff8159af55>] __ethtool_get_settings+0x85/0x140
[   58.673256]  [<ffffffff8147c6e3>] bond_update_speed_duplex+0x23/0x60
[   58.673259]  [<ffffffff81480bc4>] bond_mii_monitor+0x354/0x640
[   58.673262]  [<ffffffff8105a9b7>] process_one_work+0x1a7/0x680
[   58.673264]  [<ffffffff8105a956>] ? process_one_work+0x146/0x680
[   58.673269]  [<ffffffff8108c7ce>] ? put_lock_stats.isra.21+0xe/0x40
[   58.673279]  [<ffffffff81480870>] ? bond_loadbalance_arp_mon+0x2c0/0x2c0
[   58.673286]  [<ffffffff8105b9ed>] worker_thread+0x18d/0x4f0
[   58.673296]  [<ffffffff81070991>] ? sub_preempt_count+0x51/0x60
[   58.673303]  [<ffffffff8105b860>] ? manage_workers+0x320/0x320
[   58.673312]  [<ffffffff81060f7d>] kthread+0x9d/0xb0
[   58.673317]  [<ffffffff816892e4>] kernel_thread_helper+0x4/0x10
[   58.673320]  [<ffffffff8106c197>] ? finish_task_switch+0x77/0x100
[   58.673323]  [<ffffffff81687526>] ? _raw_spin_unlock_irq+0x36/0x60
[   58.673326]  [<ffffffff81687a5d>] ? retint_restore_args+0xe/0xe
[   58.673329]  [<ffffffff81060ee0>] ? flush_kthread_worker+0x160/0x160
[   58.673332]  [<ffffffff816892e0>] ? gs_change+0xb/0xb
[   58.676704] BUG: scheduling while atomic: kworker/u:1/103/0x00000002
[   58.683107] 4 locks held by kworker/u:1/103:
[   58.683109]  #0:  ((bond_dev->name)){......}, at: 
[<ffffffff8105a956>] process_one_work+0x146/0x680
[   58.683120]  #1:  ((&(&bond->mii_work)->work)){......}, at: 
[<ffffffff8105a956>] process_one_work+0x146/0x680
[   58.683128]  #2:  (rtnl_mutex){......}, at: [<ffffffff815a4dd0>] 
rtnl_trylock+0x10/0x20
[   58.683136]  #3:  (&bond->lock){......}, at: [<ffffffff81480b5d>] 
bond_mii_monitor+0x2ed/0x640
[   58.683145] Modules linked in: fan kvm_intel mousedev kvm iTCO_wdt 
iTCO_vendor_support acpi_cpufreq tpm_tis tpm tpm_bios mperf processor
[   58.683162] Pid: 103, comm: kworker/u:1 Tainted: G        W    
3.6.8-Isht-Van #4
[   58.683164] Call Trace:
[   58.683170]  [<ffffffff8167bb36>] __schedule_bug+0x5e/0x6c
[   58.683175]  [<ffffffff816859bc>] __schedule+0x77c/0x810
[   58.683180]  [<ffffffff81685ad4>] schedule+0x24/0x70
[   58.683184]  [<ffffffff81684bec>] 
schedule_hrtimeout_range_clock+0xfc/0x140
[   58.683189]  [<ffffffff81064c80>] ? update_rmtp+0x60/0x60
[   58.683194]  [<ffffffff81064c80>] ? update_rmtp+0x60/0x60
[   58.683198]  [<ffffffff81065a1f>] ? hrtimer_start_range_ns+0xf/0x20
[   58.683203]  [<ffffffff81684c3e>] schedule_hrtimeout_range+0xe/0x10
[   58.683208]  [<ffffffff8104bddb>] usleep_range+0x3b/0x40
[   58.683213]  [<ffffffff814d213e>] ixgbe_release_swfw_sync_X540+0x4e/0x60
[   58.683217]  [<ffffffff814ce5a1>] ixgbe_read_phy_reg_generic+0x101/0x120
[   58.683222]  [<ffffffff814ce74c>] 
ixgbe_get_copper_link_capabilities_generic+0x2c/0x60
[   58.683227]  [<ffffffff81480b5d>] ? bond_mii_monitor+0x2ed/0x640
[   58.683231]  [<ffffffff814c6454>] ixgbe_get_settings+0x34/0x2b0
[   58.683237]  [<ffffffff8159af55>] __ethtool_get_settings+0x85/0x140
[   58.683241]  [<ffffffff8147c6e3>] bond_update_speed_duplex+0x23/0x60
[   58.683246]  [<ffffffff81480bc4>] bond_mii_monitor+0x354/0x640
[   58.683250]  [<ffffffff8105a9b7>] process_one_work+0x1a7/0x680
[   58.683254]  [<ffffffff8105a956>] ? process_one_work+0x146/0x680
[   58.683259]  [<ffffffff8108c7ce>] ? put_lock_stats.isra.21+0xe/0x40
[   58.683264]  [<ffffffff81480870>] ? bond_loadbalance_arp_mon+0x2c0/0x2c0
[   58.683268]  [<ffffffff8105b9ed>] worker_thread+0x18d/0x4f0
[   58.683273]  [<ffffffff81070991>] ? sub_preempt_count+0x51/0x60
[   58.683278]  [<ffffffff8105b860>] ? manage_workers+0x320/0x320
[   58.683283]  [<ffffffff81060f7d>] kthread+0x9d/0xb0
[   58.683288]  [<ffffffff816892e4>] kernel_thread_helper+0x4/0x10
[   58.683293]  [<ffffffff8106c197>] ? finish_task_switch+0x77/0x100
[   58.683297]  [<ffffffff81687526>] ? _raw_spin_unlock_irq+0x36/0x60
[   58.683301]  [<ffffffff81687a5d>] ? retint_restore_args+0xe/0xe
[   58.683306]  [<ffffffff81060ee0>] ? flush_kthread_worker+0x160/0x160
[   58.683311]  [<ffffffff816892e0>] ? gs_change+0xb/0xb
[   58.686755] bonding: bond0: link status definitely up for interface 
p2p1, 10000 Mbps full duplex.
[   58.943059] e1000e: eth1 NIC Link is Up 1000 Mbps Full Duplex, Flow 
Control: Rx/Tx
[   59.717848] ixgbe 0000:06:00.1: p2p2: NIC Link is Up 10 Gbps, Flow 
Control: None
[   59.784848] BUG: scheduling while atomic: kworker/u:1/103/0x00000002
[   59.791219] 4 locks held by kworker/u:1/103:
[   59.791222]  #0:  ((bond_dev->name)){......}, at: 
[<ffffffff8105a956>] process_one_work+0x146/0x680
[   59.791237]  #1:  ((&(&bond->mii_work)->work)){......}, at: 
[<ffffffff8105a956>] process_one_work+0x146/0x680
[   59.791245]  #2:  (rtnl_mutex){......}, at: [<ffffffff815a4dd0>] 
rtnl_trylock+0x10/0x20
[   59.791256]  #3:  (&bond->lock){......}, at: [<ffffffff81480b5d>] 
bond_mii_monitor+0x2ed/0x640
[   59.791276] Modules linked in: fan kvm_intel mousedev kvm iTCO_wdt 
iTCO_vendor_support acpi_cpufreq tpm_tis tpm tpm_bios mperf processor
[   59.791296] Pid: 103, comm: kworker/u:1 Tainted: G        W    
3.6.8-Isht-Van #4
[   59.791299] Call Trace:
[   59.791306]  [<ffffffff8167bb36>] __schedule_bug+0x5e/0x6c
[   59.791312]  [<ffffffff816859bc>] __schedule+0x77c/0x810
[   59.791317]  [<ffffffff81685ad4>] schedule+0x24/0x70
[   59.791322]  [<ffffffff81684bec>] 
schedule_hrtimeout_range_clock+0xfc/0x140
[   59.791329]  [<ffffffff81064c80>] ? update_rmtp+0x60/0x60
[   59.791334]  [<ffffffff81065a1f>] ? hrtimer_start_range_ns+0xf/0x20
[   59.791339]  [<ffffffff81684c3e>] schedule_hrtimeout_range+0xe/0x10
[   59.791345]  [<ffffffff8104bddb>] usleep_range+0x3b/0x40
[   59.791352]  [<ffffffff814d220c>] ixgbe_acquire_swfw_sync_X540+0xbc/0x110
[   59.791357]  [<ffffffff814ce4dd>] ixgbe_read_phy_reg_generic+0x3d/0x120
[   59.791361]  [<ffffffff814ce74c>] 
ixgbe_get_copper_link_capabilities_generic+0x2c/0x60
[   59.791366]  [<ffffffff81480b5d>] ? bond_mii_monitor+0x2ed/0x640
[   59.791372]  [<ffffffff814c6454>] ixgbe_get_settings+0x34/0x2b0
[   59.791381]  [<ffffffff8159af55>] __ethtool_get_settings+0x85/0x140
[   59.791386]  [<ffffffff8147c6e3>] bond_update_speed_duplex+0x23/0x60
[   59.791389]  [<ffffffff81480bc4>] bond_mii_monitor+0x354/0x640
[   59.791393]  [<ffffffff8105a9b7>] process_one_work+0x1a7/0x680
[   59.791396]  [<ffffffff8105a956>] ? process_one_work+0x146/0x680
[   59.791402]  [<ffffffff8108c7ce>] ? put_lock_stats.isra.21+0xe/0x40
[   59.791411]  [<ffffffff81480870>] ? bond_loadbalance_arp_mon+0x2c0/0x2c0
[   59.791421]  [<ffffffff8105b9ed>] worker_thread+0x18d/0x4f0
[   59.791434]  [<ffffffff81070991>] ? sub_preempt_count+0x51/0x60
[   59.791442]  [<ffffffff8105b860>] ? manage_workers+0x320/0x320
[   59.791453]  [<ffffffff81060f7d>] kthread+0x9d/0xb0
[   59.791460]  [<ffffffff816892e4>] kernel_thread_helper+0x4/0x10
[   59.791464]  [<ffffffff8106c197>] ? finish_task_switch+0x77/0x100
[   59.791468]  [<ffffffff81687526>] ? _raw_spin_unlock_irq+0x36/0x60
[   59.791472]  [<ffffffff81687a5d>] ? retint_restore_args+0xe/0xe
[   59.791476]  [<ffffffff81060ee0>] ? flush_kthread_worker+0x160/0x160
[   59.791480]  [<ffffffff816892e0>] ? gs_change+0xb/0xb
[   59.794932] BUG: scheduling while atomic: kworker/u:1/103/0x00000002
[   59.801333] 4 locks held by kworker/u:1/103:
[   59.801340]  #0:  ((bond_dev->name)){......}, at: 
[<ffffffff8105a956>] process_one_work+0x146/0x680
[   59.801345]  #1:  ((&(&bond->mii_work)->work)){......}, at: 
[<ffffffff8105a956>] process_one_work+0x146/0x680
[   59.801350]  #2:  (rtnl_mutex){......}, at: [<ffffffff815a4dd0>] 
rtnl_trylock+0x10/0x20
[   59.801356]  #3:  (&bond->lock){......}, at: [<ffffffff81480b5d>] 
bond_mii_monitor+0x2ed/0x640
[   59.801365] Modules linked in: fan kvm_intel mousedev kvm iTCO_wdt 
iTCO_vendor_support acpi_cpufreq tpm_tis tpm tpm_bios mperf processor
[   59.801368] Pid: 103, comm: kworker/u:1 Tainted: G        W    
3.6.8-Isht-Van #4
[   59.801369] Call Trace:
[   59.801373]  [<ffffffff8167bb36>] __schedule_bug+0x5e/0x6c
[   59.801380]  [<ffffffff816859bc>] __schedule+0x77c/0x810
[   59.801385]  [<ffffffff81685ad4>] schedule+0x24/0x70
[   59.801391]  [<ffffffff81684bec>] 
schedule_hrtimeout_range_clock+0xfc/0x140
[   59.801395]  [<ffffffff81064c80>] ? update_rmtp+0x60/0x60
[   59.801399]  [<ffffffff81064c80>] ? update_rmtp+0x60/0x60
[   59.801404]  [<ffffffff81065a1f>] ? hrtimer_start_range_ns+0xf/0x20
[   59.801409]  [<ffffffff81684c3e>] schedule_hrtimeout_range+0xe/0x10
[   59.801414]  [<ffffffff8104bddb>] usleep_range+0x3b/0x40
[   59.801419]  [<ffffffff814d213e>] ixgbe_release_swfw_sync_X540+0x4e/0x60
[   59.801424]  [<ffffffff814ce5a1>] ixgbe_read_phy_reg_generic+0x101/0x120
[   59.801429]  [<ffffffff814ce74c>] 
ixgbe_get_copper_link_capabilities_generic+0x2c/0x60
[   59.801433]  [<ffffffff81480b5d>] ? bond_mii_monitor+0x2ed/0x640
[   59.801441]  [<ffffffff814c6454>] ixgbe_get_settings+0x34/0x2b0
[   59.801446]  [<ffffffff8159af55>] __ethtool_get_settings+0x85/0x140
[   59.801450]  [<ffffffff8147c6e3>] bond_update_speed_duplex+0x23/0x60
[   59.801471]  [<ffffffff81480bc4>] bond_mii_monitor+0x354/0x640
[   59.801475]  [<ffffffff8105a9b7>] process_one_work+0x1a7/0x680
[   59.801477]  [<ffffffff8105a956>] ? process_one_work+0x146/0x680
[   59.801481]  [<ffffffff8108c7ce>] ? put_lock_stats.isra.21+0xe/0x40
[   59.801484]  [<ffffffff81480870>] ? bond_loadbalance_arp_mon+0x2c0/0x2c0
[   59.801489]  [<ffffffff8105b9ed>] worker_thread+0x18d/0x4f0
[   59.801495]  [<ffffffff81070991>] ? sub_preempt_count+0x51/0x60
[   59.801500]  [<ffffffff8105b860>] ? manage_workers+0x320/0x320
[   59.801505]  [<ffffffff81060f7d>] kthread+0x9d/0xb0
[   59.801510]  [<ffffffff816892e4>] kernel_thread_helper+0x4/0x10
[   59.801515]  [<ffffffff8106c197>] ? finish_task_switch+0x77/0x100
[   59.801519]  [<ffffffff81687526>] ? _raw_spin_unlock_irq+0x36/0x60
[   59.801524]  [<ffffffff81687a5d>] ? retint_restore_args+0xe/0xe
[   59.801530]  [<ffffffff81060ee0>] ? flush_kthread_worker+0x160/0x160
[   59.801536]  [<ffffffff816892e0>] ? gs_change+0xb/0xb
[   59.804986] bonding: bond0: link status definitely up for interface 
p2p2, 10000 Mbps full duplex.

^ permalink raw reply

* [Consult] excuse me: sorry for the negative effects which I made.
From: Chen Gang @ 2012-11-29  1:43 UTC (permalink / raw)
  To: Shan Wei, Eric Dumazet, David Miller; +Cc: netdev
In-Reply-To: <50B447F3.2090806@gmail.com>

Hi Shan Wei, Eric, David

  maybe what I have done have made negative effect to you.
  I am sorry for that, firstly.
    excuse me, I am a newbie in *@vger.kernel.org, so not know many things.
    also excuse me, my English is not quite well (so at least, not quite gentle)

  what I want to do is only to:
    provide contributes to outside, without negative effects, base on human resources limitations.
    but it seems that I am failed (at least, in net subsystem).

  next, I will be careful to continue under another sub systems, and leave net sub system (at least for a while).

  sorry again for the negative effects which I made.

  Bye.

gchen.


于 2012年11月27日 12:56, Shan Wei 写道:
> Chen Gang said, at 2012/11/27 12:18:
>>>
>>>>
>>>>  for the format of information which seq_printf output:
>>>>    it is not belong to OS API level for outside (at least, for current case, it is true). 
>>>>    so we need not keep 'compatible' of it, so %16s is not necessary.
>>>
>>> Can you explain If we don't change to %s, what will happen?
>>>
>>
>>   for outside, nothing will happen.
>>
>>   so it is not for correctness, it is only for "keep source code simple and clear".
> 
> So, it's a clean-up type patch which is just for developer,
> but with the change of /proc interface which is for user.
> user is first, so let us end this thread unless you have necessary reasons to do it. 
> 
> Thanks  
> Shan Wei
> 
> 


-- 
Chen Gang

Asianux Corporation

^ permalink raw reply

* Re: BUG: scheduling while atomic: ifup-bonding/3711/0x00000002 -- V3.6.7
From: Jay Vosburgh @ 2012-11-29  1:57 UTC (permalink / raw)
  To: Linda Walsh; +Cc: Cong Wang, LKML, Linux Kernel Network Developers
In-Reply-To: <50B6B4B6.3070304@tlinx.org>

Linda Walsh <lkml@tlinx.org> wrote:

>
>Cong Wang wrote:
>> On Wed, Nov 28, 2012 at 4:37 AM, Linda Walsh <lkml@tlinx.org> wrote:  
>>> Is this a known problem / bug, or should I file a bug on it? 
>> Does this quick fix help?
>> ...
>> Thanks!
>>   
>
>   Applied:
>--- bond_main.c.orig  2012-09-30 16:47:46.000000000 -0700
>+++ bond_main.c 2012-11-28 12:58:34.064931997 -0800
>@@ -1778,7 +1778,9 @@
>   new_slave->link == BOND_LINK_DOWN ? "DOWN" :
>     (new_slave->link == BOND_LINK_UP ? "UP" : "BACK"));
>
>+ read_unlock(&bond->lock);
> bond_update_speed_duplex(new_slave);
>+ read_lock(&bond->lock);
>
> if (USES_PRIMARY(bond->params.mode) && bond->params.primary[0]) {
>   /* if there is a primary slave, remember it */
>----
>Recompile/run:
>Linux Ishtar 3.6.8-Isht-Van #4 SMP PREEMPT Wed Nov 28 12:59:13 PST 2012
>x86_64 x86_64 x86_64 GNU/Linux
>
>---
>
>Similar.  The tracebacks are below.
>
>Since I am running in round-robin, trying for RAID0 of the 2 links--
>simple bandwidth aggregation, do I even need miimon?  I mean, what load
>is there to balance?
>
>Not that this is likely the root of the bug, but it might make it
>not happen in my case, if I remove the load-bal stuff...??

	The miimon functionality is used to check link state and notice
when slaves lose carrier.  Running without it will not detect failure of
the bonding slaves, which is likely not what you want.  The mode,
balance-rr in your case, is what selects the load balance to use, and is
separate from the miimon.

	That said, the problem you're seeing appears to be caused by two
things: bonding holds a lock (in addition to RTNL) when calling
__ethtool_get_settings, and an ixgbe function in the call path to
retrieve the settings, ixgbe_acquire_swfw_sync_X540, can sleep.

	The test patch above handles one case in bond_enslave, but there
is another case in bond_miimon_commit when a slave changes link state
from down to up, which will occur shortly after the slave is added.

	A similar test patch for the case I describe would be the
following:

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 5f5b69f..b25ac47 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -2467,7 +2467,9 @@ static void bond_miimon_commit(struct bonding *bond)
 				bond_set_backup_slave(slave);
 			}
 
+			read_unlock(&bond->lock);
 			bond_update_speed_duplex(slave);
+			read_lock(&bond->lock);
 
 			pr_info("%s: link status definitely up for interface %s, %u Mbps %s duplex.\n",
 				bond->dev->name, slave->dev->name,

	I haven't tested this at all (or even compiled it), but I
suspect it will make the warnings go away.

	-J

>[   52.457633] bonding: bond0: Adding slave p2p1.
>[   52.941390] bonding: bond0: enslaving p2p1 as an active interface with
>a down link.
>[   52.959329] bonding: bond0: Adding slave p2p2.
>[   53.442769] bonding: bond0: enslaving p2p2 as an active interface with
>a down link.
>[   58.588410] ixgbe 0000:06:00.0: p2p1: NIC Link is Up 10 Gbps, Flow
>Control: None
>[   58.666760] BUG: scheduling while atomic: kworker/u:1/103/0x00000002
>[   58.673144] 4 locks held by kworker/u:1/103:
>[   58.673145]  #0:  ((bond_dev->name)){......}, at: [<ffffffff8105a956>]
>process_one_work+0x146/0x680
>[   58.673161]  #1:  ((&(&bond->mii_work)->work)){......}, at:
>[<ffffffff8105a956>] process_one_work+0x146/0x680
>[   58.673167]  #2:  (rtnl_mutex){......}, at: [<ffffffff815a4dd0>]
>rtnl_trylock+0x10/0x20
>[   58.673175]  #3:  (&bond->lock){......}, at: [<ffffffff81480b5d>]
>bond_mii_monitor+0x2ed/0x640
>[   58.673183] Modules linked in: fan kvm_intel mousedev kvm iTCO_wdt
>iTCO_vendor_support acpi_cpufreq tpm_tis tpm tpm_bios mperf processor
>[   58.673196] Pid: 103, comm: kworker/u:1 Not tainted 3.6.8-Isht-Van #4
>[   58.673198] Call Trace:
>[   58.673203]  [<ffffffff8167bb36>] __schedule_bug+0x5e/0x6c
>[   58.673208]  [<ffffffff816859bc>] __schedule+0x77c/0x810
>[   58.673211]  [<ffffffff81685ad4>] schedule+0x24/0x70
>[   58.673214]  [<ffffffff81684bec>]
>schedule_hrtimeout_range_clock+0xfc/0x140
>[   58.673218]  [<ffffffff81064c80>] ? update_rmtp+0x60/0x60
>[   58.673222]  [<ffffffff81065a1f>] ? hrtimer_start_range_ns+0xf/0x20
>[   58.673225]  [<ffffffff81684c3e>] schedule_hrtimeout_range+0xe/0x10
>[   58.673229]  [<ffffffff8104bddb>] usleep_range+0x3b/0x40
>[   58.673235]  [<ffffffff814d220c>] ixgbe_acquire_swfw_sync_X540+0xbc/0x110
>[   58.673238]  [<ffffffff814ce4dd>] ixgbe_read_phy_reg_generic+0x3d/0x120
>[   58.673241]  [<ffffffff814ce74c>]
>ixgbe_get_copper_link_capabilities_generic+0x2c/0x60
>[   58.673244]  [<ffffffff81480b5d>] ? bond_mii_monitor+0x2ed/0x640
>[   58.673248]  [<ffffffff814c6454>] ixgbe_get_settings+0x34/0x2b0
>[   58.673253]  [<ffffffff8159af55>] __ethtool_get_settings+0x85/0x140
>[   58.673256]  [<ffffffff8147c6e3>] bond_update_speed_duplex+0x23/0x60
>[   58.673259]  [<ffffffff81480bc4>] bond_mii_monitor+0x354/0x640
>[   58.673262]  [<ffffffff8105a9b7>] process_one_work+0x1a7/0x680
>[   58.673264]  [<ffffffff8105a956>] ? process_one_work+0x146/0x680
>[   58.673269]  [<ffffffff8108c7ce>] ? put_lock_stats.isra.21+0xe/0x40
>[   58.673279]  [<ffffffff81480870>] ? bond_loadbalance_arp_mon+0x2c0/0x2c0
>[   58.673286]  [<ffffffff8105b9ed>] worker_thread+0x18d/0x4f0
>[   58.673296]  [<ffffffff81070991>] ? sub_preempt_count+0x51/0x60
>[   58.673303]  [<ffffffff8105b860>] ? manage_workers+0x320/0x320
>[   58.673312]  [<ffffffff81060f7d>] kthread+0x9d/0xb0
>[   58.673317]  [<ffffffff816892e4>] kernel_thread_helper+0x4/0x10
>[   58.673320]  [<ffffffff8106c197>] ? finish_task_switch+0x77/0x100
>[   58.673323]  [<ffffffff81687526>] ? _raw_spin_unlock_irq+0x36/0x60
>[   58.673326]  [<ffffffff81687a5d>] ? retint_restore_args+0xe/0xe
>[   58.673329]  [<ffffffff81060ee0>] ? flush_kthread_worker+0x160/0x160
>[   58.673332]  [<ffffffff816892e0>] ? gs_change+0xb/0xb
>[   58.676704] BUG: scheduling while atomic: kworker/u:1/103/0x00000002
>[   58.683107] 4 locks held by kworker/u:1/103:
>[   58.683109]  #0:  ((bond_dev->name)){......}, at: [<ffffffff8105a956>]
>process_one_work+0x146/0x680
>[   58.683120]  #1:  ((&(&bond->mii_work)->work)){......}, at:
>[<ffffffff8105a956>] process_one_work+0x146/0x680
>[   58.683128]  #2:  (rtnl_mutex){......}, at: [<ffffffff815a4dd0>]
>rtnl_trylock+0x10/0x20
>[   58.683136]  #3:  (&bond->lock){......}, at: [<ffffffff81480b5d>]
>bond_mii_monitor+0x2ed/0x640
>[   58.683145] Modules linked in: fan kvm_intel mousedev kvm iTCO_wdt
>iTCO_vendor_support acpi_cpufreq tpm_tis tpm tpm_bios mperf processor
>[   58.683162] Pid: 103, comm: kworker/u:1 Tainted: G        W
>3.6.8-Isht-Van #4
>[   58.683164] Call Trace:
>[   58.683170]  [<ffffffff8167bb36>] __schedule_bug+0x5e/0x6c
>[   58.683175]  [<ffffffff816859bc>] __schedule+0x77c/0x810
>[   58.683180]  [<ffffffff81685ad4>] schedule+0x24/0x70
>[   58.683184]  [<ffffffff81684bec>]
>schedule_hrtimeout_range_clock+0xfc/0x140
>[   58.683189]  [<ffffffff81064c80>] ? update_rmtp+0x60/0x60
>[   58.683194]  [<ffffffff81064c80>] ? update_rmtp+0x60/0x60
>[   58.683198]  [<ffffffff81065a1f>] ? hrtimer_start_range_ns+0xf/0x20
>[   58.683203]  [<ffffffff81684c3e>] schedule_hrtimeout_range+0xe/0x10
>[   58.683208]  [<ffffffff8104bddb>] usleep_range+0x3b/0x40
>[   58.683213]  [<ffffffff814d213e>] ixgbe_release_swfw_sync_X540+0x4e/0x60
>[   58.683217]  [<ffffffff814ce5a1>] ixgbe_read_phy_reg_generic+0x101/0x120
>[   58.683222]  [<ffffffff814ce74c>]
>ixgbe_get_copper_link_capabilities_generic+0x2c/0x60
>[   58.683227]  [<ffffffff81480b5d>] ? bond_mii_monitor+0x2ed/0x640
>[   58.683231]  [<ffffffff814c6454>] ixgbe_get_settings+0x34/0x2b0
>[   58.683237]  [<ffffffff8159af55>] __ethtool_get_settings+0x85/0x140
>[   58.683241]  [<ffffffff8147c6e3>] bond_update_speed_duplex+0x23/0x60
>[   58.683246]  [<ffffffff81480bc4>] bond_mii_monitor+0x354/0x640
>[   58.683250]  [<ffffffff8105a9b7>] process_one_work+0x1a7/0x680
>[   58.683254]  [<ffffffff8105a956>] ? process_one_work+0x146/0x680
>[   58.683259]  [<ffffffff8108c7ce>] ? put_lock_stats.isra.21+0xe/0x40
>[   58.683264]  [<ffffffff81480870>] ? bond_loadbalance_arp_mon+0x2c0/0x2c0
>[   58.683268]  [<ffffffff8105b9ed>] worker_thread+0x18d/0x4f0
>[   58.683273]  [<ffffffff81070991>] ? sub_preempt_count+0x51/0x60
>[   58.683278]  [<ffffffff8105b860>] ? manage_workers+0x320/0x320
>[   58.683283]  [<ffffffff81060f7d>] kthread+0x9d/0xb0
>[   58.683288]  [<ffffffff816892e4>] kernel_thread_helper+0x4/0x10
>[   58.683293]  [<ffffffff8106c197>] ? finish_task_switch+0x77/0x100
>[   58.683297]  [<ffffffff81687526>] ? _raw_spin_unlock_irq+0x36/0x60
>[   58.683301]  [<ffffffff81687a5d>] ? retint_restore_args+0xe/0xe
>[   58.683306]  [<ffffffff81060ee0>] ? flush_kthread_worker+0x160/0x160
>[   58.683311]  [<ffffffff816892e0>] ? gs_change+0xb/0xb
>[   58.686755] bonding: bond0: link status definitely up for interface
>p2p1, 10000 Mbps full duplex.
>[   58.943059] e1000e: eth1 NIC Link is Up 1000 Mbps Full Duplex, Flow
>Control: Rx/Tx
>[   59.717848] ixgbe 0000:06:00.1: p2p2: NIC Link is Up 10 Gbps, Flow
>Control: None
>[   59.784848] BUG: scheduling while atomic: kworker/u:1/103/0x00000002
>[   59.791219] 4 locks held by kworker/u:1/103:
>[   59.791222]  #0:  ((bond_dev->name)){......}, at: [<ffffffff8105a956>]
>process_one_work+0x146/0x680
>[   59.791237]  #1:  ((&(&bond->mii_work)->work)){......}, at:
>[<ffffffff8105a956>] process_one_work+0x146/0x680
>[   59.791245]  #2:  (rtnl_mutex){......}, at: [<ffffffff815a4dd0>]
>rtnl_trylock+0x10/0x20
>[   59.791256]  #3:  (&bond->lock){......}, at: [<ffffffff81480b5d>]
>bond_mii_monitor+0x2ed/0x640
>[   59.791276] Modules linked in: fan kvm_intel mousedev kvm iTCO_wdt
>iTCO_vendor_support acpi_cpufreq tpm_tis tpm tpm_bios mperf processor
>[   59.791296] Pid: 103, comm: kworker/u:1 Tainted: G        W
>3.6.8-Isht-Van #4
>[   59.791299] Call Trace:
>[   59.791306]  [<ffffffff8167bb36>] __schedule_bug+0x5e/0x6c
>[   59.791312]  [<ffffffff816859bc>] __schedule+0x77c/0x810
>[   59.791317]  [<ffffffff81685ad4>] schedule+0x24/0x70
>[   59.791322]  [<ffffffff81684bec>]
>schedule_hrtimeout_range_clock+0xfc/0x140
>[   59.791329]  [<ffffffff81064c80>] ? update_rmtp+0x60/0x60
>[   59.791334]  [<ffffffff81065a1f>] ? hrtimer_start_range_ns+0xf/0x20
>[   59.791339]  [<ffffffff81684c3e>] schedule_hrtimeout_range+0xe/0x10
>[   59.791345]  [<ffffffff8104bddb>] usleep_range+0x3b/0x40
>[   59.791352]  [<ffffffff814d220c>] ixgbe_acquire_swfw_sync_X540+0xbc/0x110
>[   59.791357]  [<ffffffff814ce4dd>] ixgbe_read_phy_reg_generic+0x3d/0x120
>[   59.791361]  [<ffffffff814ce74c>]
>ixgbe_get_copper_link_capabilities_generic+0x2c/0x60
>[   59.791366]  [<ffffffff81480b5d>] ? bond_mii_monitor+0x2ed/0x640
>[   59.791372]  [<ffffffff814c6454>] ixgbe_get_settings+0x34/0x2b0
>[   59.791381]  [<ffffffff8159af55>] __ethtool_get_settings+0x85/0x140
>[   59.791386]  [<ffffffff8147c6e3>] bond_update_speed_duplex+0x23/0x60
>[   59.791389]  [<ffffffff81480bc4>] bond_mii_monitor+0x354/0x640
>[   59.791393]  [<ffffffff8105a9b7>] process_one_work+0x1a7/0x680
>[   59.791396]  [<ffffffff8105a956>] ? process_one_work+0x146/0x680
>[   59.791402]  [<ffffffff8108c7ce>] ? put_lock_stats.isra.21+0xe/0x40
>[   59.791411]  [<ffffffff81480870>] ? bond_loadbalance_arp_mon+0x2c0/0x2c0
>[   59.791421]  [<ffffffff8105b9ed>] worker_thread+0x18d/0x4f0
>[   59.791434]  [<ffffffff81070991>] ? sub_preempt_count+0x51/0x60
>[   59.791442]  [<ffffffff8105b860>] ? manage_workers+0x320/0x320
>[   59.791453]  [<ffffffff81060f7d>] kthread+0x9d/0xb0
>[   59.791460]  [<ffffffff816892e4>] kernel_thread_helper+0x4/0x10
>[   59.791464]  [<ffffffff8106c197>] ? finish_task_switch+0x77/0x100
>[   59.791468]  [<ffffffff81687526>] ? _raw_spin_unlock_irq+0x36/0x60
>[   59.791472]  [<ffffffff81687a5d>] ? retint_restore_args+0xe/0xe
>[   59.791476]  [<ffffffff81060ee0>] ? flush_kthread_worker+0x160/0x160
>[   59.791480]  [<ffffffff816892e0>] ? gs_change+0xb/0xb
>[   59.794932] BUG: scheduling while atomic: kworker/u:1/103/0x00000002
>[   59.801333] 4 locks held by kworker/u:1/103:
>[   59.801340]  #0:  ((bond_dev->name)){......}, at: [<ffffffff8105a956>]
>process_one_work+0x146/0x680
>[   59.801345]  #1:  ((&(&bond->mii_work)->work)){......}, at:
>[<ffffffff8105a956>] process_one_work+0x146/0x680
>[   59.801350]  #2:  (rtnl_mutex){......}, at: [<ffffffff815a4dd0>]
>rtnl_trylock+0x10/0x20
>[   59.801356]  #3:  (&bond->lock){......}, at: [<ffffffff81480b5d>]
>bond_mii_monitor+0x2ed/0x640
>[   59.801365] Modules linked in: fan kvm_intel mousedev kvm iTCO_wdt
>iTCO_vendor_support acpi_cpufreq tpm_tis tpm tpm_bios mperf processor
>[   59.801368] Pid: 103, comm: kworker/u:1 Tainted: G        W
>3.6.8-Isht-Van #4
>[   59.801369] Call Trace:
>[   59.801373]  [<ffffffff8167bb36>] __schedule_bug+0x5e/0x6c
>[   59.801380]  [<ffffffff816859bc>] __schedule+0x77c/0x810
>[   59.801385]  [<ffffffff81685ad4>] schedule+0x24/0x70
>[   59.801391]  [<ffffffff81684bec>]
>schedule_hrtimeout_range_clock+0xfc/0x140
>[   59.801395]  [<ffffffff81064c80>] ? update_rmtp+0x60/0x60
>[   59.801399]  [<ffffffff81064c80>] ? update_rmtp+0x60/0x60
>[   59.801404]  [<ffffffff81065a1f>] ? hrtimer_start_range_ns+0xf/0x20
>[   59.801409]  [<ffffffff81684c3e>] schedule_hrtimeout_range+0xe/0x10
>[   59.801414]  [<ffffffff8104bddb>] usleep_range+0x3b/0x40
>[   59.801419]  [<ffffffff814d213e>] ixgbe_release_swfw_sync_X540+0x4e/0x60
>[   59.801424]  [<ffffffff814ce5a1>] ixgbe_read_phy_reg_generic+0x101/0x120
>[   59.801429]  [<ffffffff814ce74c>]
>ixgbe_get_copper_link_capabilities_generic+0x2c/0x60
>[   59.801433]  [<ffffffff81480b5d>] ? bond_mii_monitor+0x2ed/0x640
>[   59.801441]  [<ffffffff814c6454>] ixgbe_get_settings+0x34/0x2b0
>[   59.801446]  [<ffffffff8159af55>] __ethtool_get_settings+0x85/0x140
>[   59.801450]  [<ffffffff8147c6e3>] bond_update_speed_duplex+0x23/0x60
>[   59.801471]  [<ffffffff81480bc4>] bond_mii_monitor+0x354/0x640
>[   59.801475]  [<ffffffff8105a9b7>] process_one_work+0x1a7/0x680
>[   59.801477]  [<ffffffff8105a956>] ? process_one_work+0x146/0x680
>[   59.801481]  [<ffffffff8108c7ce>] ? put_lock_stats.isra.21+0xe/0x40
>[   59.801484]  [<ffffffff81480870>] ? bond_loadbalance_arp_mon+0x2c0/0x2c0
>[   59.801489]  [<ffffffff8105b9ed>] worker_thread+0x18d/0x4f0
>[   59.801495]  [<ffffffff81070991>] ? sub_preempt_count+0x51/0x60
>[   59.801500]  [<ffffffff8105b860>] ? manage_workers+0x320/0x320
>[   59.801505]  [<ffffffff81060f7d>] kthread+0x9d/0xb0
>[   59.801510]  [<ffffffff816892e4>] kernel_thread_helper+0x4/0x10
>[   59.801515]  [<ffffffff8106c197>] ? finish_task_switch+0x77/0x100
>[   59.801519]  [<ffffffff81687526>] ? _raw_spin_unlock_irq+0x36/0x60
>[   59.801524]  [<ffffffff81687a5d>] ? retint_restore_args+0xe/0xe
>[   59.801530]  [<ffffffff81060ee0>] ? flush_kthread_worker+0x160/0x160
>[   59.801536]  [<ffffffff816892e0>] ? gs_change+0xb/0xb
>[   59.804986] bonding: bond0: link status definitely up for interface
>p2p2, 10000 Mbps full duplex.

---
	-Jay Vosburgh, IBM Linux Technology Center, fubar@us.ibm.com

^ permalink raw reply related

* Re: [PATCH] smsc75xx: don't call usbnet_resume if usbnet_suspend fails
From: David Miller @ 2012-11-29  2:03 UTC (permalink / raw)
  To: steve.glendinning; +Cc: netdev
In-Reply-To: <1354125469-29671-1-git-send-email-steve.glendinning@shawell.net>

From: Steve Glendinning <steve.glendinning@shawell.net>
Date: Wed, 28 Nov 2012 17:57:49 +0000

> If usbnet_suspend returns an error we don't want to call
> usbnet_resume to clean up, but instead just return the error.
> 
> If usbnet_suspend *does* succeed, and we have a problem further
> on, the desired behaviour is still to call usbnet_resume
> to clean up before returning.
> 
> Signed-off-by: Steve Glendinning <steve.glendinning@shawell.net>

Steve, can you do me a huge favor?

Get rid of these silly macros from this driver.

Any CPP macro that alters control flow is too evil to live.  And we've
worked tirelessly to remove every instance of such a macro from the
core networking interfaces, because they always lead to hard to
diagnose bugs.

THanks.

^ permalink raw reply

* Re: [PATCH 2/3] smsc95xx: fix error handling in suspend failure case
From: David Miller @ 2012-11-29  2:04 UTC (permalink / raw)
  To: steve.glendinning; +Cc: netdev
In-Reply-To: <1354124819-29531-3-git-send-email-steve.glendinning@shawell.net>

From: Steve Glendinning <steve.glendinning@shawell.net>
Date: Wed, 28 Nov 2012 17:46:58 +0000

> -		check_warn_return(ret, "Error reading WUCSR\n");
> +		check_warn_goto_done(ret, "Error reading WUCSR\n");

This is just another example of how error prone these macros
are, kill them.

^ permalink raw reply

* Re: VPN traffic leaks in IPv6/IPv4 dual-stack networks/hosts
From: Fernando Gont @ 2012-11-29  2:29 UTC (permalink / raw)
  To: Jan Engelhardt; +Cc: netdev
In-Reply-To: <alpine.LNX.2.01.1211282203310.11510@nerf07.vanv.qr>

On 11/28/2012 06:37 PM, Jan Engelhardt wrote:
>> On 11/28/2012 05:06 PM, Jan Engelhardt wrote:
>>>> If the VPN is supposed to secure all traffic, and the VPN just fails to
>>>> support v6, then for me, it's questionable to have your traffic leak out
>>>> the VPN just because of that lack of IPv6 support.
>>>
>>> Well, what I am saying is that a server may not
>>> be conveying "all", but only "0.0.0.0/0"[0/0].
>>
>> In such scenarios, doing nothing about IPv6 would be an oversight/error,
> 
> Without additional input from the user, e.g. by means of a config 
> setting, the software itself cannot distinguish between an 
> oversight/error and a deliberate configuration.

Exactly. So fail on the safe side, and disable IPv6. Most users
forwarding all IPv4 traffic are meaning to secure all their traffic with
the VPN.

If you do nothing about v6, then it just takes a local attacker to
trigger v6 connectivity (or the user to connect to a dual-stacked
network) for the supposedly-secure traffic to go out in the clear.

Many people don't realize that v6 and v4, while being to different
protocols, are sticked together by means of the DNS. And the
aforementioned issue will come up as a surprise in most scenario.

Not to mention that nowadays, you will miss virtually nothing on the
Internet by having v6 off.

Thanks,
-- 
Fernando Gont
e-mail: fernando@gont.com.ar || fgont@si6networks.com
PGP Fingerprint: 7809 84F5 322E 45C7 F1C9 3945 96EE A9EF D076 FFF1

^ permalink raw reply

* [GIT] Networking
From: David Miller @ 2012-11-29  2:47 UTC (permalink / raw)
  To: torvalds; +Cc: akpm, netdev, linux-kernel


Some more fixes trickled in over the past few days:

1) PIM device names can overflow the IFNAMSIZ buffer unless we
   properly limit the allowed indexes, fix from Eric Dumazet.

2) Under heavy load we can OOPS in icmp reply processing due to
   an unchecked inet_putpeer() call.  Fix from Neal Cardwell.

3) SCTP round trip calculations need to use 64-bit math to avoid
   overflows, fix from Schoch Christian.

4) Fix a memory leak and an error return flub in SCTP and IRDA
   triggerable by userspace.  Fix from Tommi Rantala and found by
   the syscall fuzzer (trinity).

5) MLX4 driver gives bogus size to memcpy() call, fix from Amir
   Vadai.

6) Fix length calculation in VHOST descriptor translation, from
   Michael S. Tsirkin.

7) Ambassador ATM driver loops forever while loading firmware, fix
   from Dan Carpenter.

8) Over MTU packets in openvswitch warn about wrong device, fix
   from Jesse Gross.

9) Netfilter IPSET's netlink code can overrun a string buffer
   because it's not properly limited to IFNAMSIZ.  Fix from
   Florian Westphal.

10) PCAN USB driver sets wrong timestamp in SKB, from Oliver
    Hartkopp.

11) Make sure the RX ifindex always has a valid value in the
    CAN BCM driver, even if we haven't received a frame yet.
    Fix also from Oliver Hartkopp.

Please pull, thanks a lot!

The following changes since commit 194d9831f0419b5125dc94ec0ece4434d8ef74f0:

  Merge tag 'sound-3.7' of git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound (2012-11-24 08:32:11 -1000)

are available in the git repository at:


  git://git.kernel.org/pub/scm/linux/kernel/git/davem/net.git master

for you to fetch changes up to a45085f6a7801f95cd5682290195224e268627fd:

  Merge branch 'fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/jesse/openvswitch (2012-11-28 18:00:47 -0500)

----------------------------------------------------------------

Amir Vadai (1):
      net/mlx4_en: Can set maxrate only for TC0

Bjørn Mork (1):
      net: qmi_wwan: add Huawei E173

Dan Carpenter (1):
      atm: forever loop loading ambassador firmware

David S. Miller (2):
      Merge branch 'master' of git://1984.lsi.us.es/nf
      Merge branch 'fixes' of git://git.kernel.org/.../jesse/openvswitch

Eric Dumazet (1):
      net: ipmr: limit MRT_TABLE identifiers

Florian Westphal (1):
      netfilter: ipset: fix netiface set name overflow

Jesse Gross (1):
      openvswitch: Print device when warning about over MTU packets.

Jiri Pirko (1):
      team: fix hw_features setup

Mehak Mahajan (1):
      openvswitch: Store flow key len if ARP opcode is not request or reply.

Michael S. Tsirkin (1):
      vhost: fix length for cross region descriptor

Neal Cardwell (1):
      ipv4: avoid passing NULL to inet_putpeer() in icmpv4_xrlim_allow()

Oliver Hartkopp (2):
      can: peak_usb: fix hwtstamp assignment
      can: bcm: initialize ifindex for timeouts without previous frame reception

Schoch Christian (1):
      sctp: Error in calculation of RTTvar

Tommi Rantala (3):
      sctp: fix memory leak in sctp_datamsg_from_user() when copy from user space fails
      sctp: fix -ENOMEM result with invalid user space pointer in sendto() syscall
      irda: irttp: fix memory leak in irttp_open_tsap() error path

 drivers/atm/ambassador.c                       |  1 +
 drivers/net/can/usb/peak_usb/pcan_usb.c        |  8 ++++++--
 drivers/net/can/usb/peak_usb/pcan_usb_pro.c    |  8 ++++++--
 drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c |  2 +-
 drivers/net/team/team.c                        |  4 +++-
 drivers/net/usb/qmi_wwan.c                     |  1 +
 drivers/vhost/vhost.c                          |  2 +-
 net/can/bcm.c                                  |  3 +++
 net/ipv4/icmp.c                                |  3 ++-
 net/ipv4/ipmr.c                                |  4 ++++
 net/irda/irttp.c                               |  1 +
 net/netfilter/ipset/ip_set_hash_netiface.c     |  2 +-
 net/openvswitch/flow.c                         | 14 +++++---------
 net/openvswitch/vport-netdev.c                 |  2 +-
 net/sctp/chunk.c                               | 20 ++++++++++++++------
 net/sctp/socket.c                              |  4 ++--
 net/sctp/transport.c                           |  2 +-
 17 files changed, 53 insertions(+), 28 deletions(-)

^ permalink raw reply

* Re: [E1000-devel] 82571EB: Detected Hardware Unit Hang
From: Ethan Zhao @ 2012-11-29  3:10 UTC (permalink / raw)
  To: Fujinaka, Todd
  Cc: Joe Jin, Ben Hutchings, Mary Mcgrath, netdev@vger.kernel.org,
	e1000-devel@lists.sf.net, linux-kernel@vger.kernel.org, linux-pci
In-Reply-To: <9B4A1B1917080E46B64F07F2989DADD62F2D8AAC@ORSMSX102.amr.corp.intel.com>

Joe,
    Possibly your customer is running a kernel without source code on
a platform whose vendor wouldn't like to fix BIOS issue( Is that a
HP/Dell server ?).
    Anyway, to see if is a payload issue or,  you could change the
payload size with setpci tool to those devices and set the link
retrain bit to trigger the link retraining to debug the issue and
identity the root cause.  I thinks it is much easier than modify the
BIOS or  eeprom of NIC.

    e.g.
   set device control register to 0f 00   (128 bytes payload size)
   #   setpci -v -s 00:02.0 98.w=000f
   set device link control register to 60h (retrain the link)
   #  setpci -v -s 00:02.0 a0.b=60

  Hope it works,  Just my 2 cents.

Ethan.zhao@oracle.com

On Wed, Nov 28, 2012 at 11:53 PM, Fujinaka, Todd
<todd.fujinaka@intel.com> wrote:
> The only EEPROM I know about or can speak to is the one attached to the 82571 and it doesn't set the MaxPayloadSize. That's done by the BIOS.
>
> Todd Fujinaka
> Technical Marketing Engineer
> LAN Access Division (LAD)
> Intel Corporation
> todd.fujinaka@intel.com
> (503) 712-4565
>
>
> -----Original Message-----
> From: Joe Jin [mailto:joe.jin@oracle.com]
> Sent: Wednesday, November 28, 2012 12:31 AM
> To: Ben Hutchings
> Cc: Fujinaka, Todd; Mary Mcgrath; netdev@vger.kernel.org; e1000-devel@lists.sf.net; linux-kernel@vger.kernel.org; linux-pci
> Subject: Re: [E1000-devel] 82571EB: Detected Hardware Unit Hang
>
> On 11/28/12 02:10, Ben Hutchings wrote:
>> On Tue, 2012-11-27 at 17:32 +0000, Fujinaka, Todd wrote:
>>> Forgive me if I'm being too repetitious as I think some of this has
>>> been mentioned in the past.
>>>
>>> We (and by we I mean the Ethernet part and driver) can only change
>>> the advertised availability of a larger MaxPayloadSize. The size is
>>> negotiated by both sides of the link when the link is established.
>>> The driver should not change the size of the link as it would be
>>> poking at registers outside of its scope and is controlled by the
>>> upstream bridge (not us).
>> [...]
>>
>> MaxPayloadSize (MPS) is not negotiated between devices but is
>> programmed by the system firmware (at least for devices present at
>> boot - the kernel may be responsible in case of hotplug).  You can use
>> the kernel parameter 'pci=pcie_bus_perf' (or one of several others) to
>> set a policy that overrides this, but no policy will allow setting MPS
>> above the device's MaxPayloadSizeSupported (MPSS).
>>
>
> Ben,
>
> Unfortunately I'm using 3.0.x kernel and this is not included in the kernel.
> So I'm trying to use ethtool modify it from eeprom to see if help or no.
>
>
> Todd, I'll review all MaxPayload for all devices, but need to say if it mismatch, customer could not modify it from BIOS for there was not entry at there, to test it, we have to find how to verify if this is the root cause, so still need to find the offset in eeprom.
>
> Thanks in advance,
> Joe
>

^ permalink raw reply

* Re: VPN traffic leaks in IPv6/IPv4 dual-stack networks/hosts
From: Jan Engelhardt @ 2012-11-29  3:15 UTC (permalink / raw)
  To: Fernando Gont; +Cc: netdev
In-Reply-To: <50B6C8A5.2090404@gont.com.ar>


On Thursday 2012-11-29 03:29, Fernando Gont wrote:
>
>Not to mention that nowadays, you will miss virtually nothing on the
>Internet by having v6 off.

Don't judge others by your own standards..

^ permalink raw reply

* Re: VPN traffic leaks in IPv6/IPv4 dual-stack networks/hosts
From: Fernando Gont @ 2012-11-29  4:38 UTC (permalink / raw)
  To: Jan Engelhardt; +Cc: netdev
In-Reply-To: <alpine.LNX.2.01.1211290413330.31436@nerf07.vanv.qr>

On 11/29/2012 12:15 AM, Jan Engelhardt wrote:
> 
> On Thursday 2012-11-29 03:29, Fernando Gont wrote:
>>
>> Not to mention that nowadays, you will miss virtually nothing on the
>> Internet by having v6 off.
> 
> Don't judge others by your own standards..
> 

You don't know me enough to make claims about my standards.

Whenever you please, please come up with a list of services (by domain
name) that are not available on v4, then talk.

And, in any case, what I mentioned still holds true: preventing VPN
leaks follows the principle of "least surprise".

I said what I had to say...

Cheers,
-- 
Fernando Gont
e-mail: fernando@gont.com.ar || fgont@si6networks.com
PGP Fingerprint: 7809 84F5 322E 45C7 F1C9 3945 96EE A9EF D076 FFF1

^ permalink raw reply

* Re: [PATCH] smsc95xx: fix suspend buffer overflow
From: Joe Perches @ 2012-11-29  4:54 UTC (permalink / raw)
  To: Steve Glendinning; +Cc: Steve Glendinning, netdev, Dan Carpenter
In-Reply-To: <CAKh2mn4pJG-F4zxpBj3MmRSNSV1Gwnxjw=kvfD0xqqYxErdEHg@mail.gmail.com>

On Wed, 2012-11-28 at 18:06 +0000, Steve Glendinning wrote:
> that filter code isn't pretty!  If you have time
> to knock up a patch I'd be happy to test it.

Looking a bit at the code, I don't know how it's supposed to work.

This function seems broken:

static u16 smsc_crc(const u8 *buffer, size_t len, int filter)
{
        return bitrev16(crc16(0xFFFF, buffer, len)) << ((filter % 2) * 16);
}

It always returns 0 when filter is odd.

I imagine 2 things:
o It should return u32
o when multiple WAKE_<foo> flags are set,
  the code doesn't work properly.

^ permalink raw reply

* Re: "tuntap: multiqueue support" causes udev fork bombs
From: Jason Wang @ 2012-11-29  5:47 UTC (permalink / raw)
  To: Jiri Slaby
  Cc: David S. Miller, ML netdev, LKML, rmilasan, Jiri Slaby, maxk,
	vtun
In-Reply-To: <50B5E6A5.6040009@suse.cz>

On Wednesday, November 28, 2012 11:25:41 AM Jiri Slaby wrote:
> Hi,
> 
> with this commit:
> commit c8d68e6be1c3b242f1c598595830890b65cea64a
> Author: Jason Wang <jasowang@redhat.com>
> Date:   Wed Oct 31 19:46:00 2012 +0000
> 
>     tuntap: multiqueue support
> 
> 
> I see fork bombs from udev. It is trying to create 2048 processes. 1024
> for tx, 1024 for rx. OOM killer indeed steps in and kills everything.

Hi, thanks for the reporting, could you pls try the following patch?

---

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index b44d7b7..cc3f878 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -492,9 +492,6 @@ static int tun_attach(struct tun_struct *tun, struct file *file)
 
 	tun_set_real_num_queues(tun);
 
-	if (tun->numqueues == 1)
-		netif_carrier_on(tun->dev);
-
 	/* device is allowed to go away first, so no need to hold extra
 	 * refcnt.
 	 */
@@ -1611,6 +1608,10 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
 			TUN_USER_FEATURES;
 		dev->features = dev->hw_features;
 
+		err = tun_attach(tun, file);
+		if (err < 0)
+			goto err_free_dev;
+
 		err = register_netdevice(tun->dev);
 		if (err < 0)
 			goto err_free_dev;
@@ -1620,9 +1621,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
 		    device_create_file(&tun->dev->dev, &dev_attr_group))
 			pr_err("Failed to create tun sysfs files\n");
 
-		err = tun_attach(tun, file);
-		if (err < 0)
-			goto err_free_dev;
+		netif_carrier_on(tun->dev);
 	}
 
 	tun_debug(KERN_INFO, tun, "tun_set_iff\n");

^ permalink raw reply related

* [PATCH 1/2] of_mdio: Honour "status=disabled" property of device
From: Alexander Sverdlin @ 2012-11-29  7:45 UTC (permalink / raw)
  To: Stephen Warren, devicetree-discuss-uLR06cmDAlY/bJ5BZ2RsiQ,
	Rob Herring, Grant Likely
  Cc: netdev-u79uwXL29TY76Z2rM5mHXA, Barry.Song-kQvG35nSl+M,
	w.sang-bIcnvbaLZ9MEGnE8C9+IrQ, alexander sverdlin

From: Alexander Sverdlin <alexander.sverdlin-uSbOeAmDUekAvxtiuMwx3w@public.gmane.org>

of_mdio: Honour "status=disabled" property of device

Currently of_mdiobus_register() function registers all PHY devices,
independetly from their status property in device tree. According to
"ePAPR 1.1" spec, device should only be registered if there is no
"status" property, or it has "ok" (or "okay") value (see
of_device_is_available()). In case of "platform devices",
of_platform_device_create_pdata() checks for "status" and ensures
that disabled devices are not pupulated. But such check for MDIO buses
was missing until now. Fix it.

Signed-off-by: Alexander Sverdlin <alexander.sverdlin-uSbOeAmDUekAvxtiuMwx3w@public.gmane.org>
---
--- linux.orig/drivers/of/of_mdio.c
+++ linux/drivers/of/of_mdio.c
@@ -53,7 +53,7 @@ int of_mdiobus_register(struct mii_bus *
 		return rc;
 
 	/* Loop over the child nodes and register a phy_device for each one */
-	for_each_child_of_node(np, child) {
+	for_each_available_child_of_node(np, child) {
 		const __be32 *paddr;
 		u32 addr;
 		int len;

^ permalink raw reply

* [PATCH: net-next] core: make GRO methods static.
From: Rami Rosen @ 2012-11-29  7:55 UTC (permalink / raw)
  To: davem; +Cc: netdev, Rami Rosen

This patch changes three methods to be static and removes their
EXPORT_SYMBOLs in core/dev.c and their external declaration in
netdevice.h. The methods, dev_gro_receive(), napi_frags_finish() and
napi_skb_finish(), which are in the GRO rx path, are not used
outside core/dev.c.

Signed-off-by: Rami Rosen <ramirose@gmail.com>
---
 include/linux/netdevice.h | 6 ------
 net/core/dev.c            | 9 +++------
 2 files changed, 3 insertions(+), 12 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index e9929ab..18c5dc9 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2153,16 +2153,10 @@ extern void dev_kfree_skb_any(struct sk_buff *skb);
 extern int		netif_rx(struct sk_buff *skb);
 extern int		netif_rx_ni(struct sk_buff *skb);
 extern int		netif_receive_skb(struct sk_buff *skb);
-extern gro_result_t	dev_gro_receive(struct napi_struct *napi,
-					struct sk_buff *skb);
-extern gro_result_t	napi_skb_finish(gro_result_t ret, struct sk_buff *skb);
 extern gro_result_t	napi_gro_receive(struct napi_struct *napi,
 					 struct sk_buff *skb);
 extern void		napi_gro_flush(struct napi_struct *napi, bool flush_old);
 extern struct sk_buff *	napi_get_frags(struct napi_struct *napi);
-extern gro_result_t	napi_frags_finish(struct napi_struct *napi,
-					  struct sk_buff *skb,
-					  gro_result_t ret);
 extern gro_result_t	napi_gro_frags(struct napi_struct *napi);
 
 static inline void napi_free_frags(struct napi_struct *napi)
diff --git a/net/core/dev.c b/net/core/dev.c
index 2a5f5586..2f94df2 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3592,7 +3592,7 @@ void napi_gro_flush(struct napi_struct *napi, bool flush_old)
 }
 EXPORT_SYMBOL(napi_gro_flush);
 
-enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
+static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 {
 	struct sk_buff **pp = NULL;
 	struct packet_offload *ptype;
@@ -3683,7 +3683,6 @@ normal:
 	ret = GRO_NORMAL;
 	goto pull;
 }
-EXPORT_SYMBOL(dev_gro_receive);
 
 static inline gro_result_t
 __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
@@ -3710,7 +3709,7 @@ __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 	return dev_gro_receive(napi, skb);
 }
 
-gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
+static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
 {
 	switch (ret) {
 	case GRO_NORMAL:
@@ -3736,7 +3735,6 @@ gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
 
 	return ret;
 }
-EXPORT_SYMBOL(napi_skb_finish);
 
 static void skb_gro_reset_offset(struct sk_buff *skb)
 {
@@ -3788,7 +3786,7 @@ struct sk_buff *napi_get_frags(struct napi_struct *napi)
 }
 EXPORT_SYMBOL(napi_get_frags);
 
-gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb,
+static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb,
 			       gro_result_t ret)
 {
 	switch (ret) {
@@ -3813,7 +3811,6 @@ gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb,
 
 	return ret;
 }
-EXPORT_SYMBOL(napi_frags_finish);
 
 static struct sk_buff *napi_frags_skb(struct napi_struct *napi)
 {
-- 
1.7.11.7

^ permalink raw reply related

* RDS: sendto() with very large buffer triggering WARNING: at mm/page_alloc.c:2403
From: Tommi Rantala @ 2012-11-29  8:39 UTC (permalink / raw)
  To: netdev, Venkat Venkatsubra, rds-devel; +Cc: Dave Jones

Hello,

Is RDS supposed to cap the sendto() buffer size? Saw the WARNING while
fuzzing with Trinity.

#include <string.h>
#include <arpa/inet.h>
#include <sys/socket.h>

static const char buf[1234000567];

int main(void)
{
        int fd;
        struct sockaddr_in sa;

        fd = socket(21 /* AF_RDS */, SOCK_SEQPACKET, 0);
        if (fd < 0)
                return 1;

        memset(&sa, 0, sizeof(sa));
        sa.sin_family = AF_INET;
        sa.sin_addr.s_addr = inet_addr("127.0.0.1");
        sa.sin_port = htons(11111);

        bind(fd, (struct sockaddr *)&sa, sizeof(sa));

        sendto(fd, buf, sizeof(buf), 0, (struct sockaddr *)&sa, sizeof(sa));

        return 0;
}

$ strace -e sendto ./rds-sendto
sendto(3, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"...,
1234000567, 0, {sa_family=AF_INET, sin_port=htons(11111),
sin_addr=inet_addr("127.0.0.1")}, 16) = -1 ENOMEM (Cannot allocate
memory)

[ 7421.592595] ------------[ cut here ]------------
[ 7421.592621] WARNING: at mm/page_alloc.c:2403
__alloc_pages_nodemask+0x2c0/0x9f0()
[ 7421.592628] Hardware name: EB1012
[ 7421.592633] Modules linked in:
[ 7421.592645] Pid: 3082, comm: rds-sendto Not tainted 3.7.0-rc7+ #58
[ 7421.592650] Call Trace:
[ 7421.592667]  [<ffffffff810a197b>] warn_slowpath_common+0x7b/0xc0
[ 7421.592678]  [<ffffffff810a19d5>] warn_slowpath_null+0x15/0x20
[ 7421.592689]  [<ffffffff81171900>] __alloc_pages_nodemask+0x2c0/0x9f0
[ 7421.592700]  [<ffffffff81107c90>] ? __lock_acquire+0x3a0/0x9f0
[ 7421.592711]  [<ffffffff81107c90>] ? __lock_acquire+0x3a0/0x9f0
[ 7421.592724]  [<ffffffff811ac6bf>] alloc_pages_current+0x7f/0xf0
[ 7421.592735]  [<ffffffff8116cc19>] __get_free_pages+0x9/0x40
[ 7421.592746]  [<ffffffff811b3d8a>] kmalloc_order_trace+0x3a/0x190
[ 7421.592755]  [<ffffffff81107c90>] ? __lock_acquire+0x3a0/0x9f0
[ 7421.592765]  [<ffffffff811b4f59>] __kmalloc+0x229/0x240
[ 7421.592778]  [<ffffffff81d1b06e>] rds_message_alloc+0x1e/0xa0
[ 7421.592789]  [<ffffffff81d1db66>] rds_sendmsg+0x196/0x720
[ 7421.592802]  [<ffffffff81a72a80>] ? sock_update_classid+0xf0/0x2b0
[ 7421.592813]  [<ffffffff81a6abec>] sock_sendmsg+0xdc/0xf0
[ 7421.592828]  [<ffffffff8118e9e5>] ? might_fault+0x85/0x90
[ 7421.592838]  [<ffffffff8118e99c>] ? might_fault+0x3c/0x90
[ 7421.592848]  [<ffffffff81a6e0fa>] sys_sendto+0xfa/0x130
[ 7421.592859]  [<ffffffff8110953d>] ? trace_hardirqs_on_caller+0x10d/0x1a0
[ 7421.592868]  [<ffffffff811095dd>] ? trace_hardirqs_on+0xd/0x10
[ 7421.592881]  [<ffffffff81e94c9d>] ? _raw_spin_unlock_irq+0x3d/0x70
[ 7421.592892]  [<ffffffff810bab44>] ? ptrace_notify+0x74/0x90
[ 7421.592904]  [<ffffffff81e96250>] tracesys+0xdd/0xe2
[ 7421.592911] ---[ end trace d9d681d0d60abf69 ]---

Tommi

^ permalink raw reply

* Re: [PATCH 1/1] Introduce notification events for routing changes
From: Jozsef Kadlecsik @ 2012-11-29  9:05 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, netfilter-devel
In-Reply-To: <20121128.175929.618012822860698468.davem@davemloft.net>

On Wed, 28 Nov 2012, David Miller wrote:

> > The netfilter MASQUERADE target does not handle the case when the routing
> > changes and the source address of existing connections become invalid.
> > The problem can be solved if routing modifications create events to which
> > the MASQUERADE target can subscribe and then delete the affected
> > connections.
> > 
> > The patch adds the required event support for IPv4/IPv6.
> > 
> > Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
> 
> What part of the information are you actually interested in?
> 
> Because just saying that a route is added or removed using fib_info X
> doesn't tell you a whole lot.
[...]

Please discard my patch. I found a simpler and more efficient way to solve 
the issue in netfilter itself.

Best regards,
Jozsef
-
E-mail  : kadlec@blackhole.kfki.hu, kadlecsik.jozsef@wigner.mta.hu
PGP key : http://www.kfki.hu/~kadlec/pgp_public_key.txt
Address : Wigner Research Centre for Physics, Hungarian Academy of Sciences
          H-1525 Budapest 114, POB. 49, Hungary

^ permalink raw reply

* [PATCH] ipv6: unify logic evaluating inet6_dev's accept_ra property
From: Shmulik Ladkani @ 2012-11-29  9:26 UTC (permalink / raw)
  To: David S. Miller
  Cc: netdev, Hideaki YOSHIFUJI, Thomas Graf, Tore Anderson,
	shmulik.ladkani

As of 026359b [ipv6: Send ICMPv6 RSes only when RAs are accepted], the
logic determining whether to send Router Solicitations is identical
to the logic determining whether kernel accepts Router Advertisements.

However the condition itself is repeated in several code locations.

Unify it by introducing 'ipv6_accept_ra()' accessor.

Also, simplify the condition expression, making it more readable.
No semantic change.

Signed-off-by: Shmulik Ladkani <shmulik.ladkani@gmail.com>
---

Not sure about the proper header to place the 'ipv6_accept_ra' accessor.
Currently placed in include/net/ipv6.h.
Other candidates were if_inet6.h and addrconf.h.

 include/net/ipv6.h  |   10 ++++++++++
 net/ipv6/addrconf.c |    3 +--
 net/ipv6/ndisc.c    |   16 ++--------------
 3 files changed, 13 insertions(+), 16 deletions(-)

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 979bf6c..cd15585 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -271,6 +271,16 @@ struct ipv6_txoptions *ipv6_fixup_options(struct ipv6_txoptions *opt_space,
 
 extern bool ipv6_opt_accepted(const struct sock *sk, const struct sk_buff *skb);
 
+static inline bool ipv6_accept_ra(struct inet6_dev *idev)
+{
+	/*
+	 * If forwarding is enabled, RA are not accepted unless the special
+	 * hybrid mode (accept_ra=2) is enabled.
+	 */
+	return idev->cnf.forwarding ? idev->cnf.accept_ra == 2 :
+	    idev->cnf.accept_ra;
+}
+
 #if IS_ENABLED(CONFIG_IPV6)
 static inline int ip6_frag_nqueues(struct net *net)
 {
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 0424e4e..ca1ed8a 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3005,8 +3005,7 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp)
 	   router advertisements, start sending router solicitations.
 	 */
 
-	if (((ifp->idev->cnf.accept_ra == 1 && !ifp->idev->cnf.forwarding) ||
-	     ifp->idev->cnf.accept_ra == 2) &&
+	if (ipv6_accept_ra(ifp->idev) &&
 	    ifp->idev->cnf.rtr_solicits > 0 &&
 	    (dev->flags&IFF_LOOPBACK) == 0 &&
 	    (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)) {
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 2edce30..980cdc3 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1033,18 +1033,6 @@ errout:
 	rtnl_set_sk_err(net, RTNLGRP_ND_USEROPT, err);
 }
 
-static inline int accept_ra(struct inet6_dev *in6_dev)
-{
-	/*
-	 * If forwarding is enabled, RA are not accepted unless the special
-	 * hybrid mode (accept_ra=2) is enabled.
-	 */
-	if (in6_dev->cnf.forwarding && in6_dev->cnf.accept_ra < 2)
-		return 0;
-
-	return in6_dev->cnf.accept_ra;
-}
-
 static void ndisc_router_discovery(struct sk_buff *skb)
 {
 	struct ra_msg *ra_msg = (struct ra_msg *)skb_transport_header(skb);
@@ -1092,7 +1080,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
 		return;
 	}
 
-	if (!accept_ra(in6_dev))
+	if (!ipv6_accept_ra(in6_dev))
 		goto skip_linkparms;
 
 #ifdef CONFIG_IPV6_NDISC_NODETYPE
@@ -1248,7 +1236,7 @@ skip_linkparms:
 			     NEIGH_UPDATE_F_ISROUTER);
 	}
 
-	if (!accept_ra(in6_dev))
+	if (!ipv6_accept_ra(in6_dev))
 		goto out;
 
 #ifdef CONFIG_IPV6_ROUTE_INFO
-- 
1.7.9

^ permalink raw reply related

* Re: [net-next RFC] pktgen: don't wait for the device who doesn't free skb immediately after sent
From: Jason Wang @ 2012-11-29 10:13 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: mst, netdev, linux-kernel, virtualization, davem
In-Reply-To: <20121128085305.2b0503ce@nehalam.linuxnetplumber.net>

On Wednesday, November 28, 2012 08:53:05 AM Stephen Hemminger wrote:
> On Wed, 28 Nov 2012 14:48:52 +0800
> 
> Jason Wang <jasowang@redhat.com> wrote:
> > On 11/28/2012 12:49 AM, Stephen Hemminger wrote:
> > > On Tue, 27 Nov 2012 14:45:13 +0800
> > > 
> > > Jason Wang <jasowang@redhat.com> wrote:
> > >> On 11/27/2012 01:37 AM, Stephen Hemminger wrote:
> > >>> On Mon, 26 Nov 2012 15:56:52 +0800
> > >>> 
> > >>> Jason Wang <jasowang@redhat.com> wrote:
> > >>>> Some deivces do not free the old tx skbs immediately after it has
> > >>>> been sent
> > >>>> (usually in tx interrupt). One such example is virtio-net which
> > >>>> optimizes for virt and only free the possible old tx skbs during the
> > >>>> next packet sending. This would lead the pktgen to wait forever in
> > >>>> the refcount of the skb if no other pakcet will be sent afterwards.
> > >>>> 
> > >>>> Solving this issue by introducing a new flag IFF_TX_SKB_FREE_DELAY
> > >>>> which could notify the pktgen that the device does not free skb
> > >>>> immediately after it has been sent and let it not to wait for the
> > >>>> refcount to be one.
> > >>>> 
> > >>>> Signed-off-by: Jason Wang <jasowang@redhat.com>
> > >>> 
> > >>> Another alternative would be using skb_orphan() and skb->destructor.
> > >>> There are other cases where skb's are not freed right away.
> > >>> --
> > >>> To unsubscribe from this list: send the line "unsubscribe netdev" in
> > >>> the body of a message to majordomo@vger.kernel.org
> > >>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > >> 
> > >> Hi Stephen:
> > >> 
> > >> Do you mean registering a skb->destructor for pktgen then set and check
> > >> bits in skb->tx_flag?
> > > 
> > > Yes. Register a destructor that does something like update a counter
> > > (number of packets pending), then just spin while number of packets
> > > pending is over threshold.
> > > --
> > 
> > Not sure this is the best method, since pktgen was used to test the tx
> > process of the device driver and NIC. If we use skb_orhpan(), we would
> > miss the test of tx completion part.
> 
> There are other places that delay freeing and your solution would mean
> finding and fixing all those. Code that does that already has to use
> skb_orphan() to work, and I was looking for a way that could use that.
> Introducing another flag value seems like a long term burden.
> 

Get the point, will draft another version.
> Alternatively, virtio could do cleanup more aggressively. Maybe in response
> to ring getting half full, or add a cleanup timer or something to avoid the
> problem.

May worth to try. Another method is that virtio has a feature to notify guest 
when tx ring is empty, we could free the old tx skbs there. But it may brings 
extra overhead. If we could let virtio_net free the old tx skb timely, it 
would be easier to bring BQL support to virtio_net also.

Thanks
> 
> 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH v2 3/3] pppoatm: protect against freeing of vcc
From: Krzysztof Mazur @ 2012-11-29 10:57 UTC (permalink / raw)
  To: David Woodhouse
  Cc: David Laight, chas williams - CONTRACTOR, davem, netdev,
	linux-kernel, nathan
In-Reply-To: <1354141115.21562.101.camel@shinybook.infradead.org>

On Wed, Nov 28, 2012 at 10:18:35PM +0000, David Woodhouse wrote:
> On Wed, 2012-11-28 at 09:21 +0000, David Laight wrote:
> > Even when it might make sense to sleep in close until tx drains
> > there needs to be a finite timeout before it become abortive.
> 
> You are, of course, right. We should never wait for hardware for ever.
> And just to serve me right, I seem to have hit a bug in the latest Solos
> firmware (1.11) which makes it sometimes lock up when I reboot. So it
> never responds to the PKT_PCLOSE packet... and thus it deadlocks when I
> try to kill pppd and unload the module to reset it :)
> 
> New version...
> 
> From 53dd01c08fec5b26006a009b25e4210127fdb27a Mon Sep 17 00:00:00 2001
> From: David Woodhouse <David.Woodhouse@intel.com>
> Date: Tue, 27 Nov 2012 23:49:24 +0000
> Subject: [PATCH] solos-pci: Wait for pending TX to complete when releasing
>  vcc
> 
> We should no longer be calling the old pop routine for the vcc, after
> vcc_release() has completed. Make sure we wait for any pending TX skbs
> to complete, by waiting for our own PKT_PCLOSE control skb to be sent.
> 
> Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
> ---
>  drivers/atm/solos-pci.c | 17 +++++++++++++++--
>  1 file changed, 15 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/atm/solos-pci.c b/drivers/atm/solos-pci.c
> index 9851093..3720670 100644
> --- a/drivers/atm/solos-pci.c
> +++ b/drivers/atm/solos-pci.c
> @@ -92,6 +92,7 @@ struct pkt_hdr {
>  };
>  
>  struct solos_skb_cb {
> +	struct completion c;
>  	struct atm_vcc *vcc;
>  	uint32_t dma_addr;
>  };
> @@ -881,11 +882,18 @@ static void pclose(struct atm_vcc *vcc)
>  	header->vci = cpu_to_le16(vcc->vci);
>  	header->type = cpu_to_le16(PKT_PCLOSE);
>  
> +	init_completion(&SKB_CB(skb)->c);
> +
>  	fpga_queue(card, SOLOS_CHAN(vcc->dev), skb, NULL);
>  
>  	clear_bit(ATM_VF_ADDR, &vcc->flags);
>  	clear_bit(ATM_VF_READY, &vcc->flags);
>  
> +	if (!wait_for_completion_timeout(&SKB_CB(skb)->c,
> +					 jiffies + msecs_to_jiffies(5000)))
> +		dev_warn(&card->dev->dev, "Timeout waiting for VCC close on port %d\n",
> +			 SOLOS_CHAN(vcc->dev));
> +

do we really need to wait here?

Why don't just do something like that:

	tasklet_disable(&card->tlet);
	spin_lock(&card->tx_queue_lock);
	for each skb in queue
		SKB_CB(skb)->vcc = NULL;
	spin_unlock(&card->tx_queue_lock);
	tasklet_enable(&card->tlet);

or if we really want to call vcc->pop() for such skbs:

	tasklet_disable(&card->tlet);
	spin_lock(&card->tx_queue_lock);
	for each skb in queue {
		skb_get(skb);
		solos_pop(SKB_CB(skb)->vcc, skb);
		SKB_CB(skb)->vcc = NULL;
	}
	spin_unlock(&card->tx_queue_lock);
	tasklet_enable(&card->tlet);

Krzysiek

^ permalink raw reply

* [PATCH] mISDN: improve bitops usage
From: Akinobu Mita @ 2012-11-29 11:27 UTC (permalink / raw)
  To: netdev; +Cc: Akinobu Mita, Karsten Keil

This improves bitops usages in several points:

- Convert u64 to a proper bitmap declaration.  This enables to remove
  superfluous typecasting from 'u64' to 'unsigned long *'.

- Convert superfluous atomic bitops to non atomic bitops.  The bitmap
  is allocated on the stack and it is not accessed by any other threads,
  so using atomic bitops is not necessary.

- Use find_next_zero_bit and find_next_zero_bit instead of calling
  test_bit() for each bit.

Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Karsten Keil <isdn@linux-pingi.de>
Cc: netdev@vger.kernel.org
---
 drivers/isdn/mISDN/tei.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/isdn/mISDN/tei.c b/drivers/isdn/mISDN/tei.c
index be88728..592f597 100644
--- a/drivers/isdn/mISDN/tei.c
+++ b/drivers/isdn/mISDN/tei.c
@@ -250,7 +250,7 @@ tei_debug(struct FsmInst *fi, char *fmt, ...)
 static int
 get_free_id(struct manager *mgr)
 {
-	u64		ids = 0;
+	DECLARE_BITMAP(ids, 64) = { [0 ... BITS_TO_LONGS(64) - 1] = 0 };
 	int		i;
 	struct layer2	*l2;
 
@@ -261,11 +261,11 @@ get_free_id(struct manager *mgr)
 			       __func__);
 			return -EBUSY;
 		}
-		test_and_set_bit(l2->ch.nr, (u_long *)&ids);
+		__set_bit(l2->ch.nr, ids);
 	}
-	for (i = 1; i < 64; i++)
-		if (!test_bit(i, (u_long *)&ids))
-			return i;
+	i = find_next_zero_bit(ids, 64, 1);
+	if (i < 64)
+		return i;
 	printk(KERN_WARNING "%s: more as 63 layer2 for one device\n",
 	       __func__);
 	return -EBUSY;
@@ -274,7 +274,7 @@ get_free_id(struct manager *mgr)
 static int
 get_free_tei(struct manager *mgr)
 {
-	u64		ids = 0;
+	DECLARE_BITMAP(ids, 64) = { [0 ... BITS_TO_LONGS(64) - 1] = 0 };
 	int		i;
 	struct layer2	*l2;
 
@@ -288,11 +288,11 @@ get_free_tei(struct manager *mgr)
 			continue;
 		i -= 64;
 
-		test_and_set_bit(i, (u_long *)&ids);
+		__set_bit(i, ids);
 	}
-	for (i = 0; i < 64; i++)
-		if (!test_bit(i, (u_long *)&ids))
-			return i + 64;
+	i = find_first_zero_bit(ids, 64);
+	if (i < 64)
+		return i + 64;
 	printk(KERN_WARNING "%s: more as 63 dynamic tei for one device\n",
 	       __func__);
 	return -1;
-- 
1.7.11.7

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox