Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH v2 net-next] fix rtnl notification in atomic context
From: Alexei Starovoitov @ 2013-10-23 23:02 UTC (permalink / raw)
  To: David S. Miller; +Cc: Nicolas Dichtel, Cong Wang, Veaceslav Falico, netdev

commit 991fb3f74c "dev: always advertise rx_flags changes via netlink"
introduced rtnl notification from __dev_set_promiscuity(),
which can be called in atomic context.

Steps to reproduce:
ip tuntap add dev tap1 mode tap
ifconfig tap1 up
tcpdump -nei tap1 &
ip tuntap del dev tap1 mode tap

[  271.627994] device tap1 left promiscuous mode
[  271.639897] BUG: sleeping function called from invalid context at mm/slub.c:940
[  271.664491] in_atomic(): 1, irqs_disabled(): 0, pid: 3394, name: ip
[  271.677525] INFO: lockdep is turned off.
[  271.690503] CPU: 0 PID: 3394 Comm: ip Tainted: G        W    3.12.0-rc3+ #73
[  271.703996] Hardware name: System manufacturer System Product Name/P8Z77 WS, BIOS 3007 07/26/2012
[  271.731254]  ffffffff81a58506 ffff8807f0d57a58 ffffffff817544e5 ffff88082fa0f428
[  271.760261]  ffff8808071f5f40 ffff8807f0d57a88 ffffffff8108bad1 ffffffff81110ff8
[  271.790683]  0000000000000010 00000000000000d0 00000000000000d0 ffff8807f0d57af8
[  271.822332] Call Trace:
[  271.838234]  [<ffffffff817544e5>] dump_stack+0x55/0x76
[  271.854446]  [<ffffffff8108bad1>] __might_sleep+0x181/0x240
[  271.870836]  [<ffffffff81110ff8>] ? rcu_irq_exit+0x68/0xb0
[  271.887076]  [<ffffffff811a80be>] kmem_cache_alloc_node+0x4e/0x2a0
[  271.903368]  [<ffffffff810b4ddc>] ? vprintk_emit+0x1dc/0x5a0
[  271.919716]  [<ffffffff81614d67>] ? __alloc_skb+0x57/0x2a0
[  271.936088]  [<ffffffff810b4de0>] ? vprintk_emit+0x1e0/0x5a0
[  271.952504]  [<ffffffff81614d67>] __alloc_skb+0x57/0x2a0
[  271.968902]  [<ffffffff8163a0b2>] rtmsg_ifinfo+0x52/0x100
[  271.985302]  [<ffffffff8162ac6d>] __dev_notify_flags+0xad/0xc0
[  272.001642]  [<ffffffff8162ad0c>] __dev_set_promiscuity+0x8c/0x1c0
[  272.017917]  [<ffffffff81731ea5>] ? packet_notifier+0x5/0x380
[  272.033961]  [<ffffffff8162b109>] dev_set_promiscuity+0x29/0x50
[  272.049855]  [<ffffffff8172e937>] packet_dev_mc+0x87/0xc0
[  272.065494]  [<ffffffff81732052>] packet_notifier+0x1b2/0x380
[  272.080915]  [<ffffffff81731ea5>] ? packet_notifier+0x5/0x380
[  272.096009]  [<ffffffff81761c66>] notifier_call_chain+0x66/0x150
[  272.110803]  [<ffffffff8108503e>] __raw_notifier_call_chain+0xe/0x10
[  272.125468]  [<ffffffff81085056>] raw_notifier_call_chain+0x16/0x20
[  272.139984]  [<ffffffff81620190>] call_netdevice_notifiers_info+0x40/0x70
[  272.154523]  [<ffffffff816201d6>] call_netdevice_notifiers+0x16/0x20
[  272.168552]  [<ffffffff816224c5>] rollback_registered_many+0x145/0x240
[  272.182263]  [<ffffffff81622641>] rollback_registered+0x31/0x40
[  272.195369]  [<ffffffff816229c8>] unregister_netdevice_queue+0x58/0x90
[  272.208230]  [<ffffffff81547ca0>] __tun_detach+0x140/0x340
[  272.220686]  [<ffffffff81547ed6>] tun_chr_close+0x36/0x60

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
---
 drivers/net/bonding/bond_main.c |    4 ++--
 include/linux/rtnetlink.h       |    2 +-
 net/core/dev.c                  |   16 ++++++++--------
 net/core/rtnetlink.c            |    9 +++++----
 4 files changed, 16 insertions(+), 15 deletions(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 2daa066..a141f40 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1213,7 +1213,7 @@ static int bond_master_upper_dev_link(struct net_device *bond_dev,
 	if (err)
 		return err;
 	slave_dev->flags |= IFF_SLAVE;
-	rtmsg_ifinfo(RTM_NEWLINK, slave_dev, IFF_SLAVE);
+	rtmsg_ifinfo(RTM_NEWLINK, slave_dev, IFF_SLAVE, GFP_KERNEL);
 	return 0;
 }
 
@@ -1222,7 +1222,7 @@ static void bond_upper_dev_unlink(struct net_device *bond_dev,
 {
 	netdev_upper_dev_unlink(slave_dev, bond_dev);
 	slave_dev->flags &= ~IFF_SLAVE;
-	rtmsg_ifinfo(RTM_NEWLINK, slave_dev, IFF_SLAVE);
+	rtmsg_ifinfo(RTM_NEWLINK, slave_dev, IFF_SLAVE, GFP_KERNEL);
 }
 
 /* enslave device <slave> to bond device <master> */
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index f28544b..939428a 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -15,7 +15,7 @@ extern int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics);
 extern int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst,
 			      u32 id, long expires, u32 error);
 
-extern void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change);
+void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change, gfp_t flags);
 
 /* RTNL is used as a global lock for all changes to network configuration  */
 extern void rtnl_lock(void);
diff --git a/net/core/dev.c b/net/core/dev.c
index 0918aad..5d7e821 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1203,7 +1203,7 @@ void netdev_state_change(struct net_device *dev)
 {
 	if (dev->flags & IFF_UP) {
 		call_netdevice_notifiers(NETDEV_CHANGE, dev);
-		rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
+		rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL);
 	}
 }
 EXPORT_SYMBOL(netdev_state_change);
@@ -1293,7 +1293,7 @@ int dev_open(struct net_device *dev)
 	if (ret < 0)
 		return ret;
 
-	rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
+	rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL);
 	call_netdevice_notifiers(NETDEV_UP, dev);
 
 	return ret;
@@ -1371,7 +1371,7 @@ static int dev_close_many(struct list_head *head)
 	__dev_close_many(head);
 
 	list_for_each_entry_safe(dev, tmp, head, close_list) {
-		rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
+		rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL);
 		call_netdevice_notifiers(NETDEV_DOWN, dev);
 		list_del_init(&dev->close_list);
 	}
@@ -5257,7 +5257,7 @@ void __dev_notify_flags(struct net_device *dev, unsigned int old_flags,
 	unsigned int changes = dev->flags ^ old_flags;
 
 	if (gchanges)
-		rtmsg_ifinfo(RTM_NEWLINK, dev, gchanges);
+		rtmsg_ifinfo(RTM_NEWLINK, dev, gchanges, GFP_ATOMIC);
 
 	if (changes & IFF_UP) {
 		if (dev->flags & IFF_UP)
@@ -5489,7 +5489,7 @@ static void rollback_registered_many(struct list_head *head)
 
 		if (!dev->rtnl_link_ops ||
 		    dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
-			rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
+			rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL);
 
 		/*
 		 *	Flush the unicast and multicast chains
@@ -5888,7 +5888,7 @@ int register_netdevice(struct net_device *dev)
 	 */
 	if (!dev->rtnl_link_ops ||
 	    dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
-		rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
+		rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL);
 
 out:
 	return ret;
@@ -6500,7 +6500,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
 	call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
 	rcu_barrier();
 	call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
-	rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
+	rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL);
 
 	/*
 	 *	Flush the unicast and multicast chains
@@ -6539,7 +6539,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
 	 *	Prevent userspace races by waiting until the network
 	 *	device is fully setup before sending notifications.
 	 */
-	rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
+	rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL);
 
 	synchronize_net();
 	err = 0;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 4aedf03..cf67144 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1984,14 +1984,15 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
 	return skb->len;
 }
 
-void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change)
+void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change,
+		  gfp_t flags)
 {
 	struct net *net = dev_net(dev);
 	struct sk_buff *skb;
 	int err = -ENOBUFS;
 	size_t if_info_size;
 
-	skb = nlmsg_new((if_info_size = if_nlmsg_size(dev, 0)), GFP_KERNEL);
+	skb = nlmsg_new((if_info_size = if_nlmsg_size(dev, 0)), flags);
 	if (skb == NULL)
 		goto errout;
 
@@ -2002,7 +2003,7 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change)
 		kfree_skb(skb);
 		goto errout;
 	}
-	rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_KERNEL);
+	rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, flags);
 	return;
 errout:
 	if (err < 0)
@@ -2716,7 +2717,7 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi
 	case NETDEV_JOIN:
 		break;
 	default:
-		rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
+		rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL);
 		break;
 	}
 	return NOTIFY_DONE;
-- 
1.7.9.5

^ permalink raw reply related

* Re: [virtio-net] BUG: sleeping function called from invalid context at kernel/mutex.c:616
From: Fengguang Wu @ 2013-10-23 23:20 UTC (permalink / raw)
  To: Jason Wang; +Cc: netdev, linux-kernel, virtualization
In-Reply-To: <526638D4.1030403@redhat.com>

Hi Jason,

On Tue, Oct 22, 2013 at 04:35:32PM +0800, Jason Wang wrote:
> On 10/20/2013 10:34 AM, Fengguang Wu wrote:
> > Greetings,
> >
> > I got the below dmesg and the first bad commit is
> >
> > commit 3ab098df35f8b98b6553edc2e40234af512ba877
> > Author: Jason Wang <jasowang@redhat.com>
> > Date:   Tue Oct 15 11:18:58 2013 +0800
> >
> >     virtio-net: don't respond to cpu hotplug notifier if we're not ready
> >     
> >     We're trying to re-configure the affinity unconditionally in cpu hotplug
> >     callback. This may lead the issue during resuming from s3/s4 since
> >     
> >     - virt queues haven't been allocated at that time.
> >     - it's unnecessary since thaw method will re-configure the affinity.
> >     
> >     Fix this issue by checking the config_enable and do nothing is we're not ready.
> >     
> >     The bug were introduced by commit 8de4b2f3ae90c8fc0f17eeaab87d5a951b66ee17
> >     (virtio-net: reset virtqueue affinity when doing cpu hotplug).
> >     
> >     Cc: Rusty Russell <rusty@rustcorp.com.au>
> >     Cc: Michael S. Tsirkin <mst@redhat.com>
> >     Cc: Wanlong Gao <gaowanlong@cn.fujitsu.com>
> >     Acked-by: Michael S. Tsirkin <mst@redhat.com>
> >     Reviewed-by: Wanlong Gao <gaowanlong@cn.fujitsu.com>
> >     Signed-off-by: Jason Wang <jasowang@redhat.com>
> >     Signed-off-by: David S. Miller <davem@davemloft.net>
> >
> > [  622.944441] CPU0 attaching NULL sched-domain.
> > [  622.944446] CPU1 attaching NULL sched-domain.
> > [  622.944485] CPU0 attaching NULL sched-domain.
> > [  622.950795] BUG: sleeping function called from invalid context at kernel/mutex.c:616
> > [  622.950796] in_atomic(): 1, irqs_disabled(): 1, pid: 10, name: migration/1
> > [  622.950796] no locks held by migration/1/10.
> > [  622.950798] CPU: 1 PID: 10 Comm: migration/1 Not tainted 3.12.0-rc5-wl-01249-gb91e82d #317
> > [  622.950799] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
> > [  622.950802]  0000000000000000 ffff88001d42dba0 ffffffff81a32f22 ffff88001bfb9c70
> > [  622.950803]  ffff88001d42dbb0 ffffffff810edb02 ffff88001d42dc38 ffffffff81a396ed
> > [  622.950805]  0000000000000046 ffff88001d42dbe8 ffffffff810e861d 0000000000000000
> > [  622.950805] Call Trace:
> > [  622.950810]  [<ffffffff81a32f22>] dump_stack+0x54/0x74
> > [  622.950815]  [<ffffffff810edb02>] __might_sleep+0x112/0x114
> > [  622.950817]  [<ffffffff81a396ed>] mutex_lock_nested+0x3c/0x3c6
> > [  622.950818]  [<ffffffff810e861d>] ? up+0x39/0x3e
> > [  622.950821]  [<ffffffff8153ea7c>] ? acpi_os_signal_semaphore+0x21/0x2d
> > [  622.950824]  [<ffffffff81565ed1>] ? acpi_ut_release_mutex+0x5e/0x62
> > [  622.950828]  [<ffffffff816d04ec>] virtnet_cpu_callback+0x33/0x87
> > [  622.950830]  [<ffffffff81a42576>] notifier_call_chain+0x3c/0x5e
> > [  622.950832]  [<ffffffff810e86a8>] __raw_notifier_call_chain+0xe/0x10
> > [  622.950835]  [<ffffffff810c5556>] __cpu_notify+0x20/0x37
> > [  622.950836]  [<ffffffff810c5580>] cpu_notify+0x13/0x15
> > [  622.950838]  [<ffffffff81a237cd>] take_cpu_down+0x27/0x3a
> > [  622.950841]  [<ffffffff81136289>] stop_machine_cpu_stop+0x93/0xf1
> > [  622.950842]  [<ffffffff81136167>] cpu_stopper_thread+0xa0/0x12f
> > [  622.950844]  [<ffffffff811361f6>] ? cpu_stopper_thread+0x12f/0x12f
> > [  622.950847]  [<ffffffff81119710>] ? lock_release_holdtime.part.7+0xa3/0xa8
> > [  622.950848]  [<ffffffff81135e4b>] ? cpu_stop_should_run+0x3f/0x47
> > [  622.950850]  [<ffffffff810ea9b0>] smpboot_thread_fn+0x1c5/0x1e3
> > [  622.950852]  [<ffffffff810ea7eb>] ? lg_global_unlock+0x67/0x67
> > [  622.950854]  [<ffffffff810e36b7>] kthread+0xd8/0xe0
> > [  622.950857]  [<ffffffff81a3bfad>] ? wait_for_common+0x12f/0x164
> > [  622.950859]  [<ffffffff810e35df>] ? kthread_create_on_node+0x124/0x124
> > [  622.950861]  [<ffffffff81a45ffc>] ret_from_fork+0x7c/0xb0
> > [  622.950862]  [<ffffffff810e35df>] ? kthread_create_on_node+0x124/0x124
> > [  622.950876] smpboot: CPU 1 is now offline
> > [  623.194556] SMP alternatives: lockdep: fixing up alternatives
> > [  623.194559] smpboot: Booting Node 0 Processor 1 APIC 0x1
>  
> Thanks for the testing Fengguang, could you please try the attached
> patch to see if it works?

Yes it reduces the sleeping function bug:

/kernel/x86_64-lkp-CONFIG_SCSI_DEBUG/7c4ed2767afb813493b0a8fb18d666cd44550963

+------------------------------------------------------------------------------------+-----------+--------------+--------------+
|                                                                                    | v3.12-rc3 | 3ab098df35f8 | 7c4ed2767afb |
+------------------------------------------------------------------------------------+-----------+--------------+--------------+
| good_boots                                                                         | 30        | 0            | 93           |
| has_kernel_error_warning                                                           | 0         | 20           | 7            |
| BUG:sleeping_function_called_from_invalid_context_at_kernel/mutex.c                | 0         | 20           |              |
| INFO:rcu_sched_self-detected_stall_on_CPU(t=jiffies_g=c=q=)                        | 0         | 0            | 1            |
| INFO:task_blocked_for_more_than_seconds                                            | 0         | 0            | 2            |
| INFO:NMI_handler(arch_trigger_all_cpu_backtrace_handler)took_too_long_to_run:msecs | 0         | 0            | 1            |
| Kernel_panic-not_syncing:hung_task:blocked_tasks                                   | 0         | 0            | 1            |
| BUG:kernel_test_crashed                                                            | 0         | 0            | 3            |
| BUG:kernel_test_hang                                                               | 0         | 0            | 1            |
+------------------------------------------------------------------------------------+-----------+--------------+--------------+

However I'll need to increase tests on v3.12-rc3 to make sure it's not
the patch that added the other error messages.

Thanks,
Fengguang

> >From 01e6c3f71c202aa02e4feda169e7cc9fb24193f5 Mon Sep 17 00:00:00 2001
> From: Jason Wang <jasowang@redhat.com>
> Date: Mon, 21 Oct 2013 20:39:09 +0800
> Subject: [PATCH] virtio-net: fix
> 
> ---
>  drivers/net/virtio_net.c | 13 ++++++-------
>  1 file changed, 6 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index 9fbdfcd..bbc9cb8 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -1118,11 +1118,6 @@ static int virtnet_cpu_callback(struct notifier_block *nfb,
>  {
>  	struct virtnet_info *vi = container_of(nfb, struct virtnet_info, nb);
>  
> -	mutex_lock(&vi->config_lock);
> -
> -	if (!vi->config_enable)
> -		goto done;
> -
>  	switch(action & ~CPU_TASKS_FROZEN) {
>  	case CPU_ONLINE:
>  	case CPU_DOWN_FAILED:
> @@ -1136,8 +1131,6 @@ static int virtnet_cpu_callback(struct notifier_block *nfb,
>  		break;
>  	}
>  
> -done:
> -	mutex_unlock(&vi->config_lock);
>  	return NOTIFY_OK;
>  }
>  
> @@ -1699,6 +1692,8 @@ static int virtnet_freeze(struct virtio_device *vdev)
>  	struct virtnet_info *vi = vdev->priv;
>  	int i;
>  
> +	unregister_hotcpu_notifier(&vi->nb);
> +
>  	/* Prevent config work handler from accessing the device */
>  	mutex_lock(&vi->config_lock);
>  	vi->config_enable = false;
> @@ -1747,6 +1742,10 @@ static int virtnet_restore(struct virtio_device *vdev)
>  	virtnet_set_queues(vi, vi->curr_queue_pairs);
>  	rtnl_unlock();
>  
> +	err = register_hotcpu_notifier(&vi->nb);
> +	if (err)
> +		return err;
> +
>  	return 0;
>  }
>  #endif
> -- 
> 1.8.1.2
> 

^ permalink raw reply

* Deadlock in BPF JIT functions when running upowerd?
From: Darrick J. Wong @ 2013-10-24  1:17 UTC (permalink / raw)
  To: Eric Dumazet, David S. Miller, darrick.wong; +Cc: netdev, linux-kernel

Hi,

I've been observing a softlockup with 3.11.6 and 3.12-rc6.  It looks like
there's a deadlock occurring on purge_lock in __purge_vmap_area_lazy().  In
short, the BPF JIT code has been changed[1] to call set_memory_r[ow]() when
compiling and freeing JIT bytecode memory.  It seems that it's possible for
upowerd to be compiling some BPF program and call __purge_vmap_area_lazy, then
the timer interrupt comes in (due to the IPI?) and a softirq calls
bpf_jit_free, which also calls __purge_vmap_area_lazy.

I'm not really sure who's at fault here--is this a BPF bug?

[1] 314beb9bcabfd6b4542ccbced2402af2c6f6142a
    "x86: bpf_jit_comp: secure bpf jit against spraying attacks"

--D

Here's what 3.11.6 spits out; the 3.12-rc6 message has the same traceback.

[   52.370437] BUG: soft lockup - CPU#3 stuck for 22s! [upowerd:8359]
[   52.370440] Modules linked in: ipt_MASQUERADE iptable_nat nf_nat_ipv4 xt_conntrack xt_CHECKSUM iptable_mangle fuse tun microcode nfsd nfs_acl exportfs auth_rpcgss nfs lockd sunrpc af_packet xt_physdev xt_hl ip6t_rt nf_conntrack_ipv6 nf_defrag_ipv6 ipt_REJECT xt_sctp xt_limit xt_tcpudp xt_addrtype nf_conntrack_ipv4 nf_defrag_ipv4 xt_state ip6table_filter ip6_tables nf_conntrack_netbios_ns nf_conntrack_broadcast nf_nat_ftp nf_nat nf_conntrack_ftp nf_conntrack iptable_filter ip_tables x_tables sch_fq_codel bridge stp llc lpc_ich mfd_core loop bcache dm_crypt zlib_deflate libcrc32c firewire_ohci firewire_core usb_storage mpt2sas scsi_transport_sas raid_class
[   52.370471] CPU: 3 PID: 8359 Comm: upowerd Not tainted 3.11.6-60-flax #1
[   52.370472] Hardware name: OEM OEM/131-GT-E767, BIOS 6.00 PG 08/25/2011
[   52.370474] task: ffff8806621f9700 ti: ffff88064b6a0000 task.ti: ffff88064b6a0000
[   52.370475] RIP: 0010:[<ffffffff816b5a22>]  [<ffffffff816b5a22>] _raw_spin_lock+0x32/0x40
[   52.370480] RSP: 0018:ffff88067fc63c10  EFLAGS: 00000297
[   52.370481] RAX: 0000000000000061 RBX: ffff88065a318600 RCX: 0000000000000000
[   52.370483] RDX: 0000000000000062 RSI: ffff88067fc63ce0 RDI: ffffffff81ea42bc
[   52.370484] RBP: ffff88067fc63c10 R08: ffffffff81cdd608 R09: 0000000000000000
[   52.370485] R10: ffff88067fc6d8e0 R11: 0000000000000000 R12: ffff88067fc63b88
[   52.370486] R13: ffffffff816b7a47 R14: ffff88067fc63c10 R15: ffff88067fc63cd8
[   52.370487] FS:  00007f55fff297c0(0000) GS:ffff88067fc60000(0000) knlGS:0000000000000000
[   52.370488] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   52.370489] CR2: 00007f55fff47000 CR3: 000000065dd10000 CR4: 00000000000007e0
[   52.370490] Stack:
[   52.370491]  ffff88067fc63cb0 ffffffff811955fd 0000000000000096 0000000000000347
[   52.370494]  00000000000003c1 0000000000000001 0000000000000000 0000000000000000
[   52.370496]  0000000000000033 ffff88067fc63c58 ffff88067fc63c58 0000000000000001
[   52.370499] Call Trace:
[   52.370500]  <IRQ> 
[   52.370501]  [<ffffffff811955fd>] __purge_vmap_area_lazy+0x12d/0x4c0
[   52.370507]  [<ffffffff8119612c>] vm_unmap_aliases+0x17c/0x190
[   52.370512]  [<ffffffff81079814>] change_page_attr_set_clr+0xb4/0x4a0
[   52.370516]  [<ffffffff810a927e>] ? irq_exit+0x7e/0xb0
[   52.370519]  [<ffffffff81048e44>] ? smp_irq_work_interrupt+0x34/0x40
[   52.370522]  [<ffffffff81079d8f>] set_memory_rw+0x2f/0x40
[   52.370525]  [<ffffffff810a0a7c>] bpf_jit_free+0x2c/0x40
[   52.370528]  [<ffffffff815f48aa>] sk_filter_release_rcu+0x1a/0x30
[   52.370532]  [<ffffffff811262d2>] rcu_process_callbacks+0x1e2/0x5b0
[   52.370535]  [<ffffffff810c9999>] ? enqueue_hrtimer+0x39/0xf0
[   52.370537]  [<ffffffff810a8f20>] __do_softirq+0xe0/0x2f0
[   52.370541]  [<ffffffff816b851c>] call_softirq+0x1c/0x30
[   52.370543]  [<ffffffff81046155>] do_softirq+0x55/0x90
[   52.370545]  [<ffffffff810a928e>] irq_exit+0x8e/0xb0
[   52.370547]  [<ffffffff816b8b0a>] smp_apic_timer_interrupt+0x4a/0x60
[   52.370549]  [<ffffffff816b7a47>] apic_timer_interrupt+0x67/0x70
[   52.370550]  <EOI> 
[   52.370552]  [<ffffffff8106eeb4>] ? default_send_IPI_mask_allbutself_phys+0xb4/0xe0
[   52.370559]  [<ffffffff81188af7>] ? handle_pte_fault+0x567/0x920
[   52.370561]  [<ffffffff8107cf30>] ? rbt_memtype_copy_nth_element+0xc0/0xc0
[   52.370563]  [<ffffffff81072057>] physflat_send_IPI_allbutself+0x17/0x20
[   52.370566]  [<ffffffff8106a992>] native_send_call_func_ipi+0x72/0x80
[   52.370568]  [<ffffffff8107cf30>] ? rbt_memtype_copy_nth_element+0xc0/0xc0
[   52.370570]  [<ffffffff81105834>] smp_call_function_many+0x1f4/0x290
[   52.370572]  [<ffffffff81105a8a>] smp_call_function+0x3a/0x60
[   52.370574]  [<ffffffff8107cf30>] ? rbt_memtype_copy_nth_element+0xc0/0xc0
[   52.370576]  [<ffffffff81105b18>] on_each_cpu+0x38/0x80
[   52.370578]  [<ffffffff8107d59d>] flush_tlb_kernel_range+0x6d/0x70
[   52.370581]  [<ffffffff81195916>] __purge_vmap_area_lazy+0x446/0x4c0
[   52.370584]  [<ffffffff81228e85>] ? ext4_file_open+0x75/0x1b0
[   52.370586]  [<ffffffff8119612c>] vm_unmap_aliases+0x17c/0x190
[   52.370590]  [<ffffffff81079814>] change_page_attr_set_clr+0xb4/0x4a0
[   52.370592]  [<ffffffff81196ac2>] ? map_vm_area+0x32/0x50
[   52.370595]  [<ffffffff81197761>] ? __vmalloc_node_range+0x121/0x1f0
[   52.370597]  [<ffffffff810a08ab>] ? bpf_jit_compile+0x105b/0x1200
[   52.370600]  [<ffffffff81079d4f>] set_memory_ro+0x2f/0x40
[   52.370602]  [<ffffffff810744ca>] ? module_alloc+0x5a/0x60
[   52.370604]  [<ffffffff810a081c>] bpf_jit_compile+0xfcc/0x1200
[   52.370607]  [<ffffffff811aa75b>] ? __kmalloc+0x18b/0x1f0
[   52.370610]  [<ffffffff811aa606>] ? __kmalloc+0x36/0x1f0
[   52.370612]  [<ffffffff815f4b43>] ? sk_chk_filter+0x283/0x390
[   52.370614]  [<ffffffff815f4d4b>] sk_attach_filter+0xfb/0x1b0
[   52.370617]  [<ffffffff815d071d>] sock_setsockopt+0x4fd/0x900
[   52.370620]  [<ffffffff811d2342>] ? fget_light+0x92/0x100
[   52.370623]  [<ffffffff815cbdd6>] SyS_setsockopt+0xc6/0xd0
[   52.370625]  [<ffffffff816b6dc6>] system_call_fastpath+0x1a/0x1f
[   52.370626] Code: 89 e5 65 48 8b 04 25 f0 b8 00 00 83 80 44 e0 ff ff 01 b8 00 01 00 00 f0 66 0f c1 07 0f b6 d4 38 c2 74 0f 66 0f 1f 44 00 00 f3 90 <0f> b6 07 38 d0 75 f7 5d c3 0f 1f 44 00 00 66 66 66 66 90 55 48 

^ permalink raw reply

* 16% regression on 10G caused by TCP small queues
From: Stephen Hemminger @ 2013-10-24  2:29 UTC (permalink / raw)
  To: Eric Dumazet, David Miller, Dave Täht; +Cc: netdev

In the course of testing routing functionality, I discovered a that the single flow TCP
throughput was much worse than expected. At first, it looked like a router problem,
or maybe because one end was a FreeBSD system (which has noticeably slower TCP performance).
But reducing it down to two systems directly connected over 10G (ixgbe) found the problem.

With a single TCP flow, in 3.5 kernel the performance with iperf is 9.41 Gbit/sec
which is at the link limit for TCP with timestamps etc. But in 3.6 and later the
throughput dropped to 7.9 Gbit/sec which is a regression of 16%.

Doing bisect shows that the commit causing this is:

  commit 46d3ceabd8d98ed0ad10f20c595ca784e34786c5
  Author: Eric Dumazet <eric.dumazet@gmail.com>
  Date:   Wed Jul 11 05:50:31 2012 +0000

    tcp: TCP Small Queues

    This introduce TSQ (TCP Small Queues)

There are several options at this point:
  0. Ignore it. Sorry, this is not acceptable.
     People do transfer files over 10G and expect line rate!

  1. Rip it out. which adds to the buffer bloat.
     This is a throughput vs latency tradeoff.

  2. Neuter it by making TCP small queues configurable and default off.
     Allows people who are willing to sacrifice performance go ahead and
     enable it.

  3. Tweak it. Make the default queue value in kernel big enough that no loss is
     observable.

  4. Do something smarter like a dynamic TCP small queue that adapts.

^ permalink raw reply

* [PACTH net-next] SUNRPC: remove an unnecessary if statement
From: wangweidong @ 2013-10-24  2:35 UTC (permalink / raw)
  To: davem, Trond.Myklebust, bfields; +Cc: dingtianhong, netdev, linux-nfs

If req allocated failed just goto out_free, no need to check the
'i < num_prealloc'. There is just code simplification, no
functional changes.

Signed-off-by: Wang Weidong <wangweidong1@huawei.com>
---
 net/sunrpc/xprt.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 095363e..a8e20de 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -1087,11 +1087,9 @@ struct rpc_xprt *xprt_alloc(struct net *net, size_t size,
 	for (i = 0; i < num_prealloc; i++) {
 		req = kzalloc(sizeof(struct rpc_rqst), GFP_KERNEL);
 		if (!req)
-			break;
+			goto out_free;
 		list_add(&req->rq_list, &xprt->free);
 	}
-	if (i < num_prealloc)
-		goto out_free;
 	if (max_alloc > num_prealloc)
 		xprt->max_reqs = max_alloc;
 	else
-- 1.7.12

^ permalink raw reply related

* Re: 16% regression on 10G caused by TCP small queues
From: Neal Cardwell @ 2013-10-24  2:37 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: Eric Dumazet, David Miller, Dave Täht, Netdev
In-Reply-To: <20131023192954.3dd9c784@nehalam.linuxnetplumber.net>

On Wed, Oct 23, 2013 at 10:29 PM, Stephen Hemminger
<stephen@networkplumber.org> wrote:
> In the course of testing routing functionality, I discovered a that the single flow TCP
> throughput was much worse than expected. At first, it looked like a router problem,
> or maybe because one end was a FreeBSD system (which has noticeably slower TCP performance).
> But reducing it down to two systems directly connected over 10G (ixgbe) found the problem.
...
>   4. Do something smarter like a dynamic TCP small queue that adapts.

Yep, Eric made TSQ dynamic a few weeks ago, and mentioned that his
commit helps a single flow on 10Gbps link:

http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=c9eeec26e32e087359160406f96e0949b3cc6f10

Can you please check the performance in your setup on 3.12-rc4 or newer? :-)

Thanks!

neal

---

commit c9eeec26e32e087359160406f96e0949b3cc6f10
Author: Eric Dumazet <edumazet@google.com>
Date:   Fri Sep 27 03:28:54 2013 -0700

    tcp: TSQ can use a dynamic limit

    When TCP Small Queues was added, we used a sysctl to limit amount of
    packets queues on Qdisc/device queues for a given TCP flow.

    Problem is this limit is either too big for low rates, or too small
    for high rates.

    Now TCP stack has rate estimation in sk->sk_pacing_rate, and TSO
    auto sizing, it can better control number of packets in Qdisc/device
    queues.

    New limit is two packets or at least 1 to 2 ms worth of packets.

    Low rates flows benefit from this patch by having even smaller
    number of packets in queues, allowing for faster recovery,
    better RTT estimations.

    High rates flows benefit from this patch by allowing more than 2 packets
    in flight as we had reports this was a limiting factor to reach line
    rate. [ In particular if TX completion is delayed because of coalescing
    parameters ]

    Example for a single flow on 10Gbp link controlled by FQ/pacing

    14 packets in flight instead of 2
    ...

^ permalink raw reply

* [PATCH net-next v2 1/5] bonding: remove bond read lock for bond_mii_monitor()
From: Ding Tianhong @ 2013-10-24  3:09 UTC (permalink / raw)
  To: Jay Vosburgh, Andy Gospodarek, David S. Miller,
	Nikolay Aleksandrov, Veaceslav Falico, Netdev

The bond slave list may change when the monitor is running, the slave list is no longer
protected by bond->lock, only protected by rtnl lock(), so we has 3 way to modify it:
1.add bond_master_upper_dev_link() and bond_upper_dev_unlink() in bond->lock, but it is unsafe
to call call_netdevice_notifiers() in write lock.
2.remove unused bond->lock for monitor function, only use the exist rtnl lock().
3.use rcu_read_lock() to protect it, of course, it will transform bond_for_each_slave to
bond_for_each_slave_rcu() and performance is better, but in slow path, it is ignored.
so I remove the bond->lock and move the rtnl lock to protect the whole monitor function.

Signed-off-by: Ding Tianhong <dingtianhong@huawei.com>
---
 drivers/net/bonding/bond_main.c | 44 +++++++++++------------------------------
 1 file changed, 12 insertions(+), 32 deletions(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index d90734f..ba90f45 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -2155,49 +2155,29 @@ void bond_mii_monitor(struct work_struct *work)
 	struct bonding *bond = container_of(work, struct bonding,
 					    mii_work.work);
 	bool should_notify_peers = false;
-	unsigned long delay;
 
-	read_lock(&bond->lock);
-
-	delay = msecs_to_jiffies(bond->params.miimon);
+	if (!rtnl_trylock())
+		goto re_arm;
 
-	if (!bond_has_slaves(bond))
+	if (!bond_has_slaves(bond)) {
+		rtnl_unlock();
 		goto re_arm;
+	}
 
 	should_notify_peers = bond_should_notify_peers(bond);
 
-	if (bond_miimon_inspect(bond)) {
-		read_unlock(&bond->lock);
-
-		/* Race avoidance with bond_close cancel of workqueue */
-		if (!rtnl_trylock()) {
-			read_lock(&bond->lock);
-			delay = 1;
-			should_notify_peers = false;
-			goto re_arm;
-		}
-
-		read_lock(&bond->lock);
-
+	if (bond_miimon_inspect(bond))
 		bond_miimon_commit(bond);
 
-		read_unlock(&bond->lock);
-		rtnl_unlock();	/* might sleep, hold no other locks */
-		read_lock(&bond->lock);
-	}
+	if (should_notify_peers)
+		call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, bond->dev);
+
+	rtnl_unlock();
 
 re_arm:
 	if (bond->params.miimon)
-		queue_delayed_work(bond->wq, &bond->mii_work, delay);
-
-	read_unlock(&bond->lock);
-
-	if (should_notify_peers) {
-		if (!rtnl_trylock())
-			return;
-		call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, bond->dev);
-		rtnl_unlock();
-	}
+		queue_delayed_work(bond->wq, &bond->mii_work,
+				msecs_to_jiffies(bond->params.miimon));
 }
 
 static bool bond_has_this_ip(struct bonding *bond, __be32 ip)
-- 
1.8.2.1

^ permalink raw reply related

* Re: 16% regression on 10G caused by TCP small queues
From: Stephen Hemminger @ 2013-10-24  3:09 UTC (permalink / raw)
  To: Neal Cardwell; +Cc: Eric Dumazet, David Miller, Dave Täht, Netdev
In-Reply-To: <CADVnQymDr2K7z3yfKpW-H3R3W3NP+iuPQF2eMfeyS6dn-szdgA@mail.gmail.com>

On Wed, Oct 23, 2013 at 7:37 PM, Neal Cardwell <ncardwell@google.com> wrote:
> On Wed, Oct 23, 2013 at 10:29 PM, Stephen Hemminger
> <stephen@networkplumber.org> wrote:
>> In the course of testing routing functionality, I discovered a that the single flow TCP
>> throughput was much worse than expected. At first, it looked like a router problem,
>> or maybe because one end was a FreeBSD system (which has noticeably slower TCP performance).
>> But reducing it down to two systems directly connected over 10G (ixgbe) found the problem.
> ...
>>   4. Do something smarter like a dynamic TCP small queue that adapts.
>
> Yep, Eric made TSQ dynamic a few weeks ago, and mentioned that his
> commit helps a single flow on 10Gbps link:
>
> http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=c9eeec26e32e087359160406f96e0949b3cc6f10
>
> Can you please check the performance in your setup on 3.12-rc4 or newer? :-)
>
> Thanks!
>
> neal

I will check 3.12, but what about users on 3.10 which is the LTS
kernel used by most distros?

^ permalink raw reply

* [PATCH net-next v2 0/5] bonding: patchset for rcu use in bonding
From: Ding Tianhong @ 2013-10-24  3:08 UTC (permalink / raw)
  To: Jay Vosburgh, Andy Gospodarek, David S. Miller,
	Nikolay Aleksandrov, Veaceslav Falico, Netdev

Hi:

The slave list will add and del by bond_master_upper_dev_link() and bond_upper_dev_unlink(),
which will call call_netdevice_notifiers(), even it is safe to call it in write bond lock now,
but we can't sure that whether it is safe later, because other drivers may deal NETDEV_CHANGEUPPER
in sleep way, so I didn't admit move the bond_upper_dev_unlink() in write bond lock.

now the bond_for_each_slave only protect by rtnl_lock(), maybe use bond_for_each_slave_rcu is a good
way to protect slave list for bond, but as a system slow path, it is no need to transform bond_for_each_slave()
to bond_for_each_slave_rcu() in slow path, so in the patchset, I will remove the unused read bond lock
for monitor function, maybe it is a better way, I will wait to accept any relay for it.

Thanks for the Veaceslav Falico opinion.

v2: add and modify commit for patchset and patch, it will be the first step for the whole patchset.

Ding Tianhong (5):
  bonding: remove bond read lock for bond_mii_monitor()
  bonding: remove bond read lock for bond_alb_monitor()
  bonding: remove bond read lock for bond_loadbalance_arp_mon()
  bonding: remove bond read lock for bond_activebackup_arp_mon()
  bonding: remove bond read lock for bond_3ad_state_machine_handler()

 drivers/net/bonding/bond_3ad.c  |   9 ++--
 drivers/net/bonding/bond_alb.c  |  20 ++------
 drivers/net/bonding/bond_main.c | 100 +++++++++++++---------------------------
 3 files changed, 40 insertions(+), 89 deletions(-)

-- 
1.8.2.1

^ permalink raw reply

* [PATCH net-next v2 4/5] bonding: remove bond read lock for bond_activebackup_arp_mon()
From: Ding Tianhong @ 2013-10-24  3:09 UTC (permalink / raw)
  To: Jay Vosburgh, Andy Gospodarek, David S. Miller,
	Nikolay Aleksandrov, Veaceslav Falico, Netdev

The bond slave list may change when the monitor is running, the slave list is no longer
protected by bond->lock, only protected by rtnl lock(), so we has 3 way to modify it:
1.add bond_master_upper_dev_link() and bond_upper_dev_unlink() in bond->lock, but it is unsafe
to call call_netdevice_notifiers() in write lock.
2.remove unused bond->lock for monitor function, only use the exist rtnl lock().
3.use rcu_read_lock() to protect it, of course, it will transform bond_for_each_slave to
bond_for_each_slave_rcu() and performance is better, but in slow path, it is ignored.
so I remove the bond->lock and move the rtnl lock to protect the whole monitor function.

Signed-off-by: Ding Tianhong <dingtianhong@huawei.com>
---
 drivers/net/bonding/bond_main.c | 46 ++++++++++++-----------------------------
 1 file changed, 13 insertions(+), 33 deletions(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 149f4b9..f3df532 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -2763,51 +2763,31 @@ void bond_activebackup_arp_mon(struct work_struct *work)
 	struct bonding *bond = container_of(work, struct bonding,
 					    arp_work.work);
 	bool should_notify_peers = false;
-	int delta_in_ticks;
 
-	read_lock(&bond->lock);
-
-	delta_in_ticks = msecs_to_jiffies(bond->params.arp_interval);
+	if (!rtnl_trylock())
+		goto re_arm;
 
-	if (!bond_has_slaves(bond))
+	if (!bond_has_slaves(bond)) {
+		rtnl_unlock();
 		goto re_arm;
+	}
 
 	should_notify_peers = bond_should_notify_peers(bond);
 
-	if (bond_ab_arp_inspect(bond)) {
-		read_unlock(&bond->lock);
-
-		/* Race avoidance with bond_close flush of workqueue */
-		if (!rtnl_trylock()) {
-			read_lock(&bond->lock);
-			delta_in_ticks = 1;
-			should_notify_peers = false;
-			goto re_arm;
-		}
-
-		read_lock(&bond->lock);
-
+	if (bond_ab_arp_inspect(bond))
 		bond_ab_arp_commit(bond);
 
-		read_unlock(&bond->lock);
-		rtnl_unlock();
-		read_lock(&bond->lock);
-	}
-
 	bond_ab_arp_probe(bond);
 
-re_arm:
-	if (bond->params.arp_interval)
-		queue_delayed_work(bond->wq, &bond->arp_work, delta_in_ticks);
+	if (should_notify_peers)
+		call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, bond->dev);
 
-	read_unlock(&bond->lock);
+	rtnl_unlock();
 
-	if (should_notify_peers) {
-		if (!rtnl_trylock())
-			return;
-		call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, bond->dev);
-		rtnl_unlock();
-	}
+re_arm:
+	if (bond->params.arp_interval)
+		queue_delayed_work(bond->wq, &bond->arp_work,
+				msecs_to_jiffies(bond->params.arp_interval));
 }
 
 /*-------------------------- netdev event handling --------------------------*/
-- 
1.8.2.1

^ permalink raw reply related

* [PATCH net-next v2 3/5] bonding: remove bond read lock for bond_loadbalance_arp_mon()
From: Ding Tianhong @ 2013-10-24  3:09 UTC (permalink / raw)
  To: Jay Vosburgh, Andy Gospodarek, David S. Miller,
	Nikolay Aleksandrov, Veaceslav Falico, Netdev

The bond slave list may change when the monitor is running, the slave list is no longer
protected by bond->lock, only protected by rtnl lock(), so we has 3 way to modify it:
1.add bond_master_upper_dev_link() and bond_upper_dev_unlink() in bond->lock, but it is unsafe
to call call_netdevice_notifiers() in write lock.
2.remove unused bond->lock for monitor function, only use the exist rtnl lock().
3.use rcu_read_lock() to protect it, of course, it will transform bond_for_each_slave to
bond_for_each_slave_rcu() and performance is better, but in slow path, it is ignored.
so I remove the bond->lock and add the rtnl lock to protect the whole monitor function.

Signed-off-by: Ding Tianhong <dingtianhong@huawei.com>
---
 drivers/net/bonding/bond_main.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index ba90f45..149f4b9 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -2433,10 +2433,13 @@ void bond_loadbalance_arp_mon(struct work_struct *work)
 	struct list_head *iter;
 	int do_failover = 0;

-	read_lock(&bond->lock);
+	if (!rtnl_trylock())
+		goto re_arm;

-	if (!bond_has_slaves(bond))
+	if (!bond_has_slaves(bond)) {
+		rtnl_unlock();
 		goto re_arm;
+	}

 	oldcurrent = bond->curr_active_slave;
 	/* see if any of the previous devices are up now (i.e. they have
@@ -2518,13 +2521,12 @@ void bond_loadbalance_arp_mon(struct work_struct *work)
 		write_unlock_bh(&bond->curr_slave_lock);
 		unblock_netpoll_tx();
 	}
+	rtnl_unlock();

 re_arm:
 	if (bond->params.arp_interval)
 		queue_delayed_work(bond->wq, &bond->arp_work,
 				   msecs_to_jiffies(bond->params.arp_interval));
-
-	read_unlock(&bond->lock);
 }

 /*
-- 
1.8.2.1

^ permalink raw reply related

* [PATCH net-next v2 5/5] bonding: remove bond read lock for bond_3ad_state_machine_handler()
From: Ding Tianhong @ 2013-10-24  3:09 UTC (permalink / raw)
  To: Jay Vosburgh, Andy Gospodarek, David S. Miller,
	Nikolay Aleksandrov, Veaceslav Falico, Netdev

The bond slave list may change when the monitor is running, the slave list is no longer
protected by bond->lock, only protected by rtnl lock(), so we has 3 way to modify it:
1.add bond_master_upper_dev_link() and bond_upper_dev_unlink() in bond->lock, but it is unsafe
to call call_netdevice_notifiers() in write lock.
2.remove unused bond->lock for monitor function, only use the exist rtnl lock().
3.use rcu_read_lock() to protect it, of course, it will transform bond_for_each_slave to
bond_for_each_slave_rcu() and performance is better, but in slow path, it is ignored.
so I remove the bond->lock and move the rtnl lock to protect the whole monitor function.

Signed-off-by: Ding Tianhong <dingtianhong@huawei.com>
---
 drivers/net/bonding/bond_3ad.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index 187b1b7..d6fe00b 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -2068,8 +2068,10 @@ void bond_3ad_state_machine_handler(struct work_struct *work)
 	struct slave *slave;
 	struct port *port;

-	read_lock(&bond->lock);
-
+	if (!rtnl_trylock()) {
+		queue_delayed_work(bond->wq, &bond->ad_work, ad_delta_in_ticks);
+		return;
+	}
 	//check if there are any slaves
 	if (!bond_has_slaves(bond))
 		goto re_arm;
@@ -2122,9 +2124,8 @@ void bond_3ad_state_machine_handler(struct work_struct *work)
 	}

 re_arm:
+	rtnl_unlock();
 	queue_delayed_work(bond->wq, &bond->ad_work, ad_delta_in_ticks);
-
-	read_unlock(&bond->lock);
 }

 /**
-- 
1.8.2.1

^ permalink raw reply related

* [PATCH net-next v2 2/5] bonding: remove bond read lock for bond_alb_monitor()
From: Ding Tianhong @ 2013-10-24  3:09 UTC (permalink / raw)
  To: Jay Vosburgh, Andy Gospodarek, David S. Miller,
	Nikolay Aleksandrov, Veaceslav Falico, Netdev

The bond slave list may change when the monitor is running, the slave list is no longer
protected by bond->lock, only protected by rtnl lock(), so we has 3 way to modify it:
1.add bond_master_upper_dev_link() and bond_upper_dev_unlink() in bond->lock, but it is unsafe
to call call_netdevice_notifiers() in write lock.
2.remove unused bond->lock for monitor function, only use the exist rtnl lock().
3.use rcu_read_lock() to protect it, of course, it will transform bond_for_each_slave to
bond_for_each_slave_rcu() and performance is better, but in slow path, it is ignored.
so I remove the bond->lock and move the rtnl lock to protect the whole monitor function.

Signed-off-by: Ding Tianhong <dingtianhong@huawei.com>
---
 drivers/net/bonding/bond_alb.c | 20 ++++----------------
 1 file changed, 4 insertions(+), 16 deletions(-)

diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index 0287240..5d79f5e 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -1495,11 +1495,13 @@ void bond_alb_monitor(struct work_struct *work)
 	struct list_head *iter;
 	struct slave *slave;
 
-	read_lock(&bond->lock);
+	if (!rtnl_trylock())
+		goto re_arm;
 
 	if (!bond_has_slaves(bond)) {
 		bond_info->tx_rebalance_counter = 0;
 		bond_info->lp_counter = 0;
+		rtnl_unlock();
 		goto re_arm;
 	}
 
@@ -1548,16 +1550,6 @@ void bond_alb_monitor(struct work_struct *work)
 		if (bond_info->primary_is_promisc &&
 		    (++bond_info->rlb_promisc_timeout_counter >= RLB_PROMISC_TIMEOUT)) {
 
-			/*
-			 * dev_set_promiscuity requires rtnl and
-			 * nothing else.  Avoid race with bond_close.
-			 */
-			read_unlock(&bond->lock);
-			if (!rtnl_trylock()) {
-				read_lock(&bond->lock);
-				goto re_arm;
-			}
-
 			bond_info->rlb_promisc_timeout_counter = 0;
 
 			/* If the primary was set to promiscuous mode
@@ -1566,9 +1558,6 @@ void bond_alb_monitor(struct work_struct *work)
 			 */
 			dev_set_promiscuity(bond->curr_active_slave->dev, -1);
 			bond_info->primary_is_promisc = 0;
-
-			rtnl_unlock();
-			read_lock(&bond->lock);
 		}
 
 		if (bond_info->rlb_rebalance) {
@@ -1591,10 +1580,9 @@ void bond_alb_monitor(struct work_struct *work)
 		}
 	}
 
+	rtnl_unlock();
 re_arm:
 	queue_delayed_work(bond->wq, &bond->alb_work, alb_delta_in_ticks);
-
-	read_unlock(&bond->lock);
 }
 
 /* assumption: called before the slave is attached to the bond
-- 
1.8.2.1

^ permalink raw reply related

* [PATCH 0/5 NEXT V2] staging: r8188eu: Fix sparse warnings.
From: Larry Finger @ 2013-10-24  3:31 UTC (permalink / raw)
  To: gregkh; +Cc: devel, netdev, Larry Finger

This set of patches resubmits the set of fixes for sparse warnings. The two that
were criticized have been removed, and this set should not have white-space
damage.

Signed-off-by: Larry Finger <Larry.Finger@lwfinger.net>


Larry Finger (5):
  staging: r8188eu: Fix sparse warnings in ioctl_linux.c
  staging: r8188eu: Fix sparse warnings in rtw_ieee80211.c
  staging: r8188eu: Fix sparse warnings in rtl8188e.cmd.c
  staging: r8188eu: Fix sparse warnings in rtw_mlme_ext.c
  staging: r8188eu: Fix sparse warnings in rtl_p2p.c

 drivers/staging/rtl8188eu/core/rtw_mlme_ext.c  | 41 +++++++++++++-------------
 drivers/staging/rtl8188eu/core/rtw_p2p.c       |  8 ++---
 drivers/staging/rtl8188eu/hal/rtl8188e_cmd.c   |  8 ++---
 drivers/staging/rtl8188eu/include/ieee80211.h  | 16 +++++-----
 drivers/staging/rtl8188eu/os_dep/ioctl_linux.c | 20 ++++++-------
 6 files changed, 47 insertions(+), 46 deletions(-)

-- 
1.8.4

^ permalink raw reply

* [PATCH NEXT 4/5] staging: r8188eu: Fix sparse warnings in rtw_mlme_ext.c
From: Larry Finger @ 2013-10-24  3:31 UTC (permalink / raw)
  To: gregkh; +Cc: devel, netdev, Larry Finger
In-Reply-To: <1382585497-6787-1-git-send-email-Larry.Finger@lwfinger.net>

Sparse displays the following:

  CHECK   drivers/staging/rtl8188eu/core/rtw_mlme_ext.c
drivers/staging/rtl8188eu/core/rtw_mlme_ext.c:1874:15: warning: incorrect type in assignment (different base types)
drivers/staging/rtl8188eu/core/rtw_mlme_ext.c:1874:15:    expected unsigned short *fctrl
drivers/staging/rtl8188eu/core/rtw_mlme_ext.c:1874:15:    got restricted __le16 *<noident>
drivers/staging/rtl8188eu/core/rtw_mlme_ext.c:2221:15: warning: incorrect type in assignment (different base types)
drivers/staging/rtl8188eu/core/rtw_mlme_ext.c:2221:15:    expected unsigned short *fctrl
drivers/staging/rtl8188eu/core/rtw_mlme_ext.c:2221:15:    got restricted __le16 *<noident>
drivers/staging/rtl8188eu/core/rtw_mlme_ext.c:2583:15: warning: incorrect type in assignment (different base types)
drivers/staging/rtl8188eu/core/rtw_mlme_ext.c:2583:15:    expected unsigned short *fctrl
drivers/staging/rtl8188eu/core/rtw_mlme_ext.c:2583:15:    got restricted __le16 *<noident>
drivers/staging/rtl8188eu/core/rtw_mlme_ext.c:2750:15: warning: incorrect type in assignment (different base types)
drivers/staging/rtl8188eu/core/rtw_mlme_ext.c:2750:15:    expected unsigned short *fctrl
drivers/staging/rtl8188eu/core/rtw_mlme_ext.c:2750:15:    got restricted __le16 *<noident>
drivers/staging/rtl8188eu/core/rtw_mlme_ext.c:3002:15: warning: incorrect type in assignment (different base types)
drivers/staging/rtl8188eu/core/rtw_mlme_ext.c:3002:15:    expected unsigned short *fctrl
drivers/staging/rtl8188eu/core/rtw_mlme_ext.c:3002:15:    got restricted __le16 *<noident>
drivers/staging/rtl8188eu/core/rtw_mlme_ext.c:3197:15: warning: incorrect type in assignment (different base types)
drivers/staging/rtl8188eu/core/rtw_mlme_ext.c:3197:15:    expected unsigned short *fctrl
drivers/staging/rtl8188eu/core/rtw_mlme_ext.c:3197:15:    got restricted __le16 *<noident>
drivers/staging/rtl8188eu/core/rtw_mlme_ext.c:3311:15: warning: incorrect type in assignment (different base types)
drivers/staging/rtl8188eu/core/rtw_mlme_ext.c:3311:15:    expected unsigned short *fctrl
drivers/staging/rtl8188eu/core/rtw_mlme_ext.c:3311:15:    got restricted __le16 *<noident>
drivers/staging/rtl8188eu/core/rtw_mlme_ext.c:3563:15: warning: incorrect type in assignment (different base types)
drivers/staging/rtl8188eu/core/rtw_mlme_ext.c:3563:15:    expected unsigned short *fctrl
drivers/staging/rtl8188eu/core/rtw_mlme_ext.c:3563:15:    got restricted __le16 *<noident>
drivers/staging/rtl8188eu/core/rtw_mlme_ext.c:4522:15: warning: incorrect type in assignment (different base types)
drivers/staging/rtl8188eu/core/rtw_mlme_ext.c:4522:15:    expected unsigned short *fctrl
drivers/staging/rtl8188eu/core/rtw_mlme_ext.c:4522:15:    got restricted __le16 *<noident>
drivers/staging/rtl8188eu/core/rtw_mlme_ext.c:4750:15: warning: incorrect type in assignment (different base types)
drivers/staging/rtl8188eu/core/rtw_mlme_ext.c:4750:15:    expected unsigned short *fctrl
drivers/staging/rtl8188eu/core/rtw_mlme_ext.c:4750:15:    got restricted __le16 *<noident>
drivers/staging/rtl8188eu/core/rtw_mlme_ext.c:4906:15: warning: incorrect type in assignment (different base types)
drivers/staging/rtl8188eu/core/rtw_mlme_ext.c:4906:15:    expected unsigned short *fctrl
drivers/staging/rtl8188eu/core/rtw_mlme_ext.c:4906:15:    got restricted __le16 *<noident>
drivers/staging/rtl8188eu/core/rtw_mlme_ext.c:5040:15: warning: incorrect type in assignment (different base types)
drivers/staging/rtl8188eu/core/rtw_mlme_ext.c:5040:15:    expected unsigned short *fctrl
drivers/staging/rtl8188eu/core/rtw_mlme_ext.c:5040:15:    got restricted __le16 *<noident>
drivers/staging/rtl8188eu/core/rtw_mlme_ext.c:5184:15: warning: incorrect type in assignment (different base types)
drivers/staging/rtl8188eu/core/rtw_mlme_ext.c:5184:15:    expected unsigned short *fctrl
drivers/staging/rtl8188eu/core/rtw_mlme_ext.c:5184:15:    got restricted __le16 *<noident>
drivers/staging/rtl8188eu/core/rtw_mlme_ext.c:5322:15: warning: incorrect type in assignment (different base types)
drivers/staging/rtl8188eu/core/rtw_mlme_ext.c:5322:15:    expected unsigned short *fctrl
drivers/staging/rtl8188eu/core/rtw_mlme_ext.c:5322:15:    got restricted __le16 *<noident>
drivers/staging/rtl8188eu/core/rtw_mlme_ext.c:5654:15: warning: incorrect type in assignment (different base types)
drivers/staging/rtl8188eu/core/rtw_mlme_ext.c:5654:15:    expected unsigned short *fctrl
drivers/staging/rtl8188eu/core/rtw_mlme_ext.c:5654:15:    got restricted __le16 *<noident>
drivers/staging/rtl8188eu/core/rtw_mlme_ext.c:5769:15: warning: incorrect type in assignment (different base types)
drivers/staging/rtl8188eu/core/rtw_mlme_ext.c:5769:15:    expected unsigned short *fctrl
drivers/staging/rtl8188eu/core/rtw_mlme_ext.c:5769:15:    got restricted __le16 *<noident>
drivers/staging/rtl8188eu/core/rtw_mlme_ext.c:5894:15: warning: incorrect type in assignment (different base types)
drivers/staging/rtl8188eu/core/rtw_mlme_ext.c:5894:15:    expected unsigned short *fctrl
drivers/staging/rtl8188eu/core/rtw_mlme_ext.c:5894:15:    got restricted __le16 *<noident>
drivers/staging/rtl8188eu/core/rtw_mlme_ext.c:5996:15: warning: incorrect type in assignment (different base types)
drivers/staging/rtl8188eu/core/rtw_mlme_ext.c:5996:15:    expected unsigned short *fctrl
drivers/staging/rtl8188eu/core/rtw_mlme_ext.c:5996:15:    got restricted __le16 *<noident>
drivers/staging/rtl8188eu/core/rtw_mlme_ext.c:6066:15: warning: incorrect type in assignment (different base types)
drivers/staging/rtl8188eu/core/rtw_mlme_ext.c:6066:15:    expected unsigned short [usertype] *fctrl
drivers/staging/rtl8188eu/core/rtw_mlme_ext.c:6066:15:    got restricted __le16 *<noident>
drivers/staging/rtl8188eu/core/rtw_mlme_ext.c:6200:15: warning: incorrect type in assignment (different base types)
drivers/staging/rtl8188eu/core/rtw_mlme_ext.c:6200:15:    expected unsigned short *fctrl
drivers/staging/rtl8188eu/core/rtw_mlme_ext.c:6200:15:    got restricted __le16 *<noident>

Signed-off-by: Larry Finger <Larry.Finger@lwfinger.net>
---
 drivers/staging/rtl8188eu/core/rtw_mlme_ext.c | 41 ++++++++++++++-------------
 1 file changed, 21 insertions(+), 20 deletions(-)

diff --git a/drivers/staging/rtl8188eu/core/rtw_mlme_ext.c b/drivers/staging/rtl8188eu/core/rtw_mlme_ext.c
index 9982dd0..7ab5ff0 100644
--- a/drivers/staging/rtl8188eu/core/rtw_mlme_ext.c
+++ b/drivers/staging/rtl8188eu/core/rtw_mlme_ext.c
@@ -1852,7 +1852,7 @@ void issue_p2p_GO_request(struct adapter *padapter, u8 *raddr)
 	struct pkt_attrib *pattrib;
 	unsigned char *pframe;
 	struct rtw_ieee80211_hdr	*pwlanhdr;
-	unsigned short *fctrl;
+	__le16 *fctrl;
 	struct xmit_priv *pxmitpriv = &(padapter->xmitpriv);
 	struct mlme_ext_priv	*pmlmeext = &(padapter->mlmeextpriv);
 	struct wifidirect_info	*pwdinfo = &(padapter->wdinfo);
@@ -2199,7 +2199,7 @@ static void issue_p2p_GO_response(struct adapter *padapter, u8 *raddr, u8 *frame
 	struct pkt_attrib			*pattrib;
 	unsigned char					*pframe;
 	struct rtw_ieee80211_hdr	*pwlanhdr;
-	unsigned short				*fctrl;
+	__le16 *fctrl;
 	struct xmit_priv			*pxmitpriv = &(padapter->xmitpriv);
 	struct mlme_ext_priv	*pmlmeext = &(padapter->mlmeextpriv);
 	struct wifidirect_info	*pwdinfo = &(padapter->wdinfo);
@@ -2561,7 +2561,7 @@ static void issue_p2p_GO_confirm(struct adapter *padapter, u8 *raddr, u8 result)
 	struct pkt_attrib			*pattrib;
 	unsigned char					*pframe;
 	struct rtw_ieee80211_hdr	*pwlanhdr;
-	unsigned short				*fctrl;
+	__le16 *fctrl;
 	struct xmit_priv			*pxmitpriv = &(padapter->xmitpriv);
 	struct mlme_ext_priv	*pmlmeext = &(padapter->mlmeextpriv);
 	struct wifidirect_info	*pwdinfo = &(padapter->wdinfo);
@@ -2729,7 +2729,7 @@ void issue_p2p_invitation_request(struct adapter *padapter, u8 *raddr)
 	struct pkt_attrib			*pattrib;
 	unsigned char					*pframe;
 	struct rtw_ieee80211_hdr	*pwlanhdr;
-	unsigned short				*fctrl;
+	__le16 *fctrl;
 	struct xmit_priv			*pxmitpriv = &(padapter->xmitpriv);
 	struct mlme_ext_priv	*pmlmeext = &(padapter->mlmeextpriv);
 	struct wifidirect_info	*pwdinfo = &(padapter->wdinfo);
@@ -2981,7 +2981,7 @@ void issue_p2p_invitation_response(struct adapter *padapter, u8 *raddr, u8 dialo
 	struct pkt_attrib			*pattrib;
 	unsigned char					*pframe;
 	struct rtw_ieee80211_hdr	*pwlanhdr;
-	unsigned short				*fctrl;
+	__le16 *fctrl;
 	struct xmit_priv			*pxmitpriv = &(padapter->xmitpriv);
 	struct mlme_ext_priv	*pmlmeext = &(padapter->mlmeextpriv);
 	struct wifidirect_info	*pwdinfo = &(padapter->wdinfo);
@@ -3175,7 +3175,7 @@ void issue_p2p_provision_request(struct adapter *padapter, u8 *pssid, u8 ussidle
 	struct pkt_attrib			*pattrib;
 	unsigned char					*pframe;
 	struct rtw_ieee80211_hdr	*pwlanhdr;
-	unsigned short				*fctrl;
+	__le16 *fctrl;
 	struct xmit_priv			*pxmitpriv = &(padapter->xmitpriv);
 	struct mlme_ext_priv	*pmlmeext = &(padapter->mlmeextpriv);
 	struct wifidirect_info	*pwdinfo = &(padapter->wdinfo);
@@ -3283,7 +3283,7 @@ void issue_probersp_p2p(struct adapter *padapter, unsigned char *da)
 	struct pkt_attrib			*pattrib;
 	unsigned char					*pframe;
 	struct rtw_ieee80211_hdr	*pwlanhdr;
-	unsigned short				*fctrl;
+	__le16 *fctrl;
 	unsigned char					*mac;
 	struct xmit_priv	*pxmitpriv = &(padapter->xmitpriv);
 	struct mlme_ext_priv	*pmlmeext = &(padapter->mlmeextpriv);
@@ -3534,7 +3534,7 @@ static int _issue_probereq_p2p(struct adapter *padapter, u8 *da, int wait_ack)
 	struct pkt_attrib		*pattrib;
 	unsigned char			*pframe;
 	struct rtw_ieee80211_hdr	*pwlanhdr;
-	unsigned short		*fctrl;
+	__le16 *fctrl;
 	unsigned char			*mac;
 	struct xmit_priv		*pxmitpriv = &(padapter->xmitpriv);
 	struct mlme_ext_priv	*pmlmeext = &(padapter->mlmeextpriv);
@@ -4484,7 +4484,7 @@ void issue_beacon(struct adapter *padapter, int timeout_ms)
 	struct pkt_attrib	*pattrib;
 	unsigned char	*pframe;
 	struct rtw_ieee80211_hdr *pwlanhdr;
-	unsigned short *fctrl;
+	__le16 *fctrl;
 	unsigned int	rate_len;
 	struct xmit_priv	*pxmitpriv = &(padapter->xmitpriv);
 #if defined(CONFIG_88EU_AP_MODE)
@@ -4713,7 +4713,7 @@ void issue_probersp(struct adapter *padapter, unsigned char *da, u8 is_valid_p2p
 	struct pkt_attrib			*pattrib;
 	unsigned char					*pframe;
 	struct rtw_ieee80211_hdr	*pwlanhdr;
-	unsigned short				*fctrl;
+	__le16 *fctrl;
 	unsigned char					*mac, *bssid;
 	struct xmit_priv	*pxmitpriv = &(padapter->xmitpriv);
 #if defined (CONFIG_88EU_AP_MODE)
@@ -4876,7 +4876,7 @@ static int _issue_probereq(struct adapter *padapter, struct ndis_802_11_ssid *ps
 	struct pkt_attrib		*pattrib;
 	unsigned char			*pframe;
 	struct rtw_ieee80211_hdr	*pwlanhdr;
-	unsigned short		*fctrl;
+	__le16 *fctrl;
 	unsigned char			*mac;
 	unsigned char			bssrate[NumRates];
 	struct xmit_priv		*pxmitpriv = &(padapter->xmitpriv);
@@ -5013,7 +5013,7 @@ void issue_auth(struct adapter *padapter, struct sta_info *psta, unsigned short
 	struct pkt_attrib *pattrib;
 	unsigned char *pframe;
 	struct rtw_ieee80211_hdr *pwlanhdr;
-	unsigned short *fctrl;
+	__le16 *fctrl;
 	unsigned int val32;
 	u16 val16;
 #ifdef CONFIG_88EU_AP_MODE
@@ -5153,7 +5153,7 @@ void issue_asocrsp(struct adapter *padapter, unsigned short status, struct sta_i
 	struct pkt_attrib *pattrib;
 	unsigned char	*pbuf, *pframe;
 	unsigned short val;
-	unsigned short *fctrl;
+	__le16 *fctrl;
 	struct xmit_priv *pxmitpriv = &(padapter->xmitpriv);
 	struct mlme_priv *pmlmepriv = &(padapter->mlmepriv);
 	struct mlme_ext_priv *pmlmeext = &padapter->mlmeextpriv;
@@ -5290,7 +5290,7 @@ void issue_assocreq(struct adapter *padapter)
 	struct pkt_attrib	*pattrib;
 	unsigned char		*pframe, *p;
 	struct rtw_ieee80211_hdr	*pwlanhdr;
-	unsigned short	*fctrl;
+	__le16 *fctrl;
 	__le16		le_tmp;
 	unsigned int	i, j, ie_len, index = 0;
 	unsigned char	rf_type, bssrate[NumRates], sta_bssrate[NumRates];
@@ -5625,7 +5625,7 @@ static int _issue_nulldata(struct adapter *padapter, unsigned char *da, unsigned
 	struct pkt_attrib			*pattrib;
 	unsigned char					*pframe;
 	struct rtw_ieee80211_hdr	*pwlanhdr;
-	unsigned short				*fctrl;
+	__le16 *fctrl;
 	struct xmit_priv	*pxmitpriv;
 	struct mlme_ext_priv	*pmlmeext;
 	struct mlme_ext_info	*pmlmeinfo;
@@ -5740,7 +5740,8 @@ static int _issue_qos_nulldata(struct adapter *padapter, unsigned char *da, u16
 	struct pkt_attrib			*pattrib;
 	unsigned char					*pframe;
 	struct rtw_ieee80211_hdr	*pwlanhdr;
-	unsigned short				*fctrl, *qc;
+	__le16 *fctrl;
+	unsigned short *qc;
 	struct xmit_priv			*pxmitpriv = &(padapter->xmitpriv);
 	struct mlme_ext_priv	*pmlmeext = &(padapter->mlmeextpriv);
 	struct mlme_ext_info	*pmlmeinfo = &(pmlmeext->mlmext_info);
@@ -5860,7 +5861,7 @@ static int _issue_deauth(struct adapter *padapter, unsigned char *da, unsigned s
 	struct pkt_attrib			*pattrib;
 	unsigned char					*pframe;
 	struct rtw_ieee80211_hdr	*pwlanhdr;
-	unsigned short				*fctrl;
+	__le16 *fctrl;
 	struct xmit_priv			*pxmitpriv = &(padapter->xmitpriv);
 	struct mlme_ext_priv	*pmlmeext = &(padapter->mlmeextpriv);
 	struct mlme_ext_info	*pmlmeinfo = &(pmlmeext->mlmext_info);
@@ -5972,7 +5973,7 @@ void issue_action_spct_ch_switch (struct adapter *padapter, u8 *ra, u8 new_ch, u
 	struct pkt_attrib			*pattrib;
 	unsigned char				*pframe;
 	struct rtw_ieee80211_hdr	*pwlanhdr;
-	unsigned short			*fctrl;
+	__le16 *fctrl;
 	struct xmit_priv			*pxmitpriv = &(padapter->xmitpriv);
 	struct mlme_ext_priv	*pmlmeext = &(padapter->mlmeextpriv);
 
@@ -6040,7 +6041,7 @@ void issue_action_BA(struct adapter *padapter, unsigned char *raddr, unsigned ch
 	struct pkt_attrib *pattrib;
 	u8 *pframe;
 	struct rtw_ieee80211_hdr *pwlanhdr;
-	u16 *fctrl;
+	__le16 *fctrl;
 	struct xmit_priv *pxmitpriv = &(padapter->xmitpriv);
 	struct mlme_ext_priv *pmlmeext = &(padapter->mlmeextpriv);
 	struct mlme_ext_info *pmlmeinfo = &(pmlmeext->mlmext_info);
@@ -6162,7 +6163,7 @@ static void issue_action_BSSCoexistPacket(struct adapter *padapter)
 	struct pkt_attrib			*pattrib;
 	unsigned char				*pframe;
 	struct rtw_ieee80211_hdr	*pwlanhdr;
-	unsigned short			*fctrl;
+	__le16 *fctrl;
 	struct	wlan_network	*pnetwork = NULL;
 	struct xmit_priv			*pxmitpriv = &(padapter->xmitpriv);
 	struct mlme_priv *pmlmepriv = &padapter->mlmepriv;
-- 
1.8.4

^ permalink raw reply related

* [PATCH NEXT 5/5] staging: r8188eu: Fix sparse warnings in rtl_p2p.c
From: Larry Finger @ 2013-10-24  3:31 UTC (permalink / raw)
  To: gregkh; +Cc: devel, netdev, Larry Finger
In-Reply-To: <1382585497-6787-1-git-send-email-Larry.Finger@lwfinger.net>

Sparse displays the following:

  CHECK   drivers/staging/rtl8188eu/core/rtw_p2p.c
drivers/staging/rtl8188eu/core/rtw_p2p.c:162:15: warning: incorrect type in assignment (different base types)
drivers/staging/rtl8188eu/core/rtw_p2p.c:162:15:    expected unsigned short *fctrl
drivers/staging/rtl8188eu/core/rtw_p2p.c:162:15:    got restricted __le16 *<noident>
drivers/staging/rtl8188eu/core/rtw_p2p.c:221:15: warning: incorrect type in assignment (different base types)
drivers/staging/rtl8188eu/core/rtw_p2p.c:221:15:    expected unsigned short *fctrl
drivers/staging/rtl8188eu/core/rtw_p2p.c:221:15:    got restricted __le16 *<noident>
drivers/staging/rtl8188eu/core/rtw_p2p.c:292:15: warning: incorrect type in assignment (different base types)
drivers/staging/rtl8188eu/core/rtw_p2p.c:292:15:    expected unsigned short *fctrl
drivers/staging/rtl8188eu/core/rtw_p2p.c:292:15:    got restricted __le16 *<noident>
drivers/staging/rtl8188eu/core/rtw_p2p.c:371:15: warning: incorrect type in assignment (different base types)
drivers/staging/rtl8188eu/core/rtw_p2p.c:371:15:    expected unsigned short *fctrl
drivers/staging/rtl8188eu/core/rtw_p2p.c:371:15:    got restricted __le16 *<noident>

Signed-off-by: Larry Finger <Larry.Finger@lwfinger.net>
---
 drivers/staging/rtl8188eu/core/rtw_p2p.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/staging/rtl8188eu/core/rtw_p2p.c b/drivers/staging/rtl8188eu/core/rtw_p2p.c
index 8cf915f..f46cab1 100644
--- a/drivers/staging/rtl8188eu/core/rtw_p2p.c
+++ b/drivers/staging/rtl8188eu/core/rtw_p2p.c
@@ -135,7 +135,7 @@ static void issue_group_disc_req(struct wifidirect_info *pwdinfo, u8 *da)
 	struct pkt_attrib			*pattrib;
 	unsigned char					*pframe;
 	struct rtw_ieee80211_hdr	*pwlanhdr;
-	unsigned short				*fctrl;
+	__le16 *fctrl;
 	struct adapter *padapter = pwdinfo->padapter;
 	struct xmit_priv			*pxmitpriv = &(padapter->xmitpriv);
 	struct mlme_ext_priv	*pmlmeext = &(padapter->mlmeextpriv);
@@ -192,7 +192,7 @@ static void issue_p2p_devdisc_resp(struct wifidirect_info *pwdinfo, u8 *da, u8 s
 	struct pkt_attrib			*pattrib;
 	unsigned char					*pframe;
 	struct rtw_ieee80211_hdr	*pwlanhdr;
-	unsigned short				*fctrl;
+	__le16 *fctrl;
 	struct adapter *padapter = pwdinfo->padapter;
 	struct xmit_priv			*pxmitpriv = &(padapter->xmitpriv);
 	struct mlme_ext_priv	*pmlmeext = &(padapter->mlmeextpriv);
@@ -272,7 +272,7 @@ static void issue_p2p_provision_resp(struct wifidirect_info *pwdinfo, u8 *raddr,
 	struct pkt_attrib			*pattrib;
 	unsigned char					*pframe;
 	struct rtw_ieee80211_hdr	*pwlanhdr;
-	unsigned short				*fctrl;
+	__le16 *fctrl;
 	struct xmit_priv			*pxmitpriv = &(padapter->xmitpriv);
 	struct mlme_ext_priv	*pmlmeext = &(padapter->mlmeextpriv);
 
@@ -342,7 +342,7 @@ static void issue_p2p_presence_resp(struct wifidirect_info *pwdinfo, u8 *da, u8
 	struct pkt_attrib			*pattrib;
 	unsigned char					*pframe;
 	struct rtw_ieee80211_hdr	*pwlanhdr;
-	unsigned short				*fctrl;
+	__le16 *fctrl;
 	struct adapter *padapter = pwdinfo->padapter;
 	struct xmit_priv			*pxmitpriv = &(padapter->xmitpriv);
 	struct mlme_ext_priv	*pmlmeext = &(padapter->mlmeextpriv);
-- 
1.8.4

^ permalink raw reply related

* [PATCH NEXT 1/5] staging: r8188eu: Fix sparse warnings in ioctl_linux.c
From: Larry Finger @ 2013-10-24  3:31 UTC (permalink / raw)
  To: gregkh; +Cc: netdev, devel, Larry Finger
In-Reply-To: <1382585497-6787-1-git-send-email-Larry.Finger@lwfinger.net>

Sparse checking results in the following warnings:

  CHECK   drivers/staging/rtl8188eu/os_dep/ioctl_linux.c
drivers/staging/rtl8188eu/os_dep/ioctl_linux.c:3932:17: warning: cast removes address space of expression
drivers/staging/rtl8188eu/os_dep/ioctl_linux.c:3933:31: warning: incorrect type in argument 1 (different address spaces)
drivers/staging/rtl8188eu/os_dep/ioctl_linux.c:3933:31:    expected void const *<noident>
drivers/staging/rtl8188eu/os_dep/ioctl_linux.c:3933:31:    got void [noderef] <asn:1>*pointer
drivers/staging/rtl8188eu/os_dep/ioctl_linux.c:3935:38: warning: incorrect type in argument 1 (different address spaces)
drivers/staging/rtl8188eu/os_dep/ioctl_linux.c:3935:38:    expected void const *<noident>
drivers/staging/rtl8188eu/os_dep/ioctl_linux.c:3935:38:    got void [noderef] <asn:1>*pointer
drivers/staging/rtl8188eu/os_dep/ioctl_linux.c:3937:38: warning: incorrect type in argument 1 (different address spaces)
drivers/staging/rtl8188eu/os_dep/ioctl_linux.c:3937:38:    expected void const *<noident>
drivers/staging/rtl8188eu/os_dep/ioctl_linux.c:3937:38:    got void [noderef] <asn:1>*pointer
drivers/staging/rtl8188eu/os_dep/ioctl_linux.c:3939:38: warning: incorrect type in argument 1 (different address spaces)
drivers/staging/rtl8188eu/os_dep/ioctl_linux.c:3939:38:    expected void const *<noident>
drivers/staging/rtl8188eu/os_dep/ioctl_linux.c:3939:38:    got void [noderef] <asn:1>*pointer
drivers/staging/rtl8188eu/os_dep/ioctl_linux.c:3941:38: warning: incorrect type in argument 1 (different address spaces)
drivers/staging/rtl8188eu/os_dep/ioctl_linux.c:3941:38:    expected void const *<noident>
drivers/staging/rtl8188eu/os_dep/ioctl_linux.c:3941:38:    got void [noderef] <asn:1>*pointer
drivers/staging/rtl8188eu/os_dep/ioctl_linux.c:3944:38: warning: incorrect type in argument 1 (different address spaces)
drivers/staging/rtl8188eu/os_dep/ioctl_linux.c:3944:38:    expected void const *<noident>
drivers/staging/rtl8188eu/os_dep/ioctl_linux.c:3944:38:    got void [noderef] <asn:1>*pointer
drivers/staging/rtl8188eu/os_dep/ioctl_linux.c:3946:38: warning: incorrect type in argument 1 (different address spaces)
drivers/staging/rtl8188eu/os_dep/ioctl_linux.c:3946:38:    expected void const *<noident>
drivers/staging/rtl8188eu/os_dep/ioctl_linux.c:3946:38:    got void [noderef] <asn:1>*pointer
drivers/staging/rtl8188eu/os_dep/ioctl_linux.c:3949:38: warning: incorrect type in argument 1 (different address spaces)
drivers/staging/rtl8188eu/os_dep/ioctl_linux.c:3949:38:    expected void const *<noident>
drivers/staging/rtl8188eu/os_dep/ioctl_linux.c:3949:38:    got void [noderef] <asn:1>*pointer
drivers/staging/rtl8188eu/os_dep/ioctl_linux.c:3963:9: warning: cast removes address space of expression

Signed-off-by: Larry Finger <Larry.Finger@lwfinger.net>
---
 drivers/staging/rtl8188eu/os_dep/ioctl_linux.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c b/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c
index ae54587..5dc3fed 100644
--- a/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c
+++ b/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c
@@ -3929,24 +3929,24 @@ static int rtw_p2p_get(struct net_device *dev,
 	struct adapter *padapter = (struct adapter *)rtw_netdev_priv(dev);
 
 	if (padapter->bShowGetP2PState)
-		DBG_88E("[%s] extra = %s\n", __func__, (char *)wrqu->data.pointer);
-	if (!memcmp(wrqu->data.pointer, "status", 6)) {
+		DBG_88E("[%s] extra = %s\n", __func__, (__force char *)wrqu->data.pointer);
+	if (!memcmp((__force const char *)wrqu->data.pointer, "status", 6)) {
 		rtw_p2p_get_status(dev, info, wrqu, extra);
-	} else if (!memcmp(wrqu->data.pointer, "role", 4)) {
+	} else if (!memcmp((__force const char *)wrqu->data.pointer, "role", 4)) {
 		rtw_p2p_get_role(dev, info, wrqu, extra);
-	} else if (!memcmp(wrqu->data.pointer, "peer_ifa", 8)) {
+	} else if (!memcmp((__force const char *)wrqu->data.pointer, "peer_ifa", 8)) {
 		rtw_p2p_get_peer_ifaddr(dev, info, wrqu, extra);
-	} else if (!memcmp(wrqu->data.pointer, "req_cm", 6)) {
+	} else if (!memcmp((__force const char *)wrqu->data.pointer, "req_cm", 6)) {
 		rtw_p2p_get_req_cm(dev, info, wrqu, extra);
-	} else if (!memcmp(wrqu->data.pointer, "peer_deva", 9)) {
+	} else if (!memcmp((__force const char *)wrqu->data.pointer, "peer_deva", 9)) {
 		/*	Get the P2P device address when receiving the provision discovery request frame. */
 		rtw_p2p_get_peer_devaddr(dev, info, wrqu, extra);
-	} else if (!memcmp(wrqu->data.pointer, "group_id", 8)) {
+	} else if (!memcmp((__force const char *)wrqu->data.pointer, "group_id", 8)) {
 		rtw_p2p_get_groupid(dev, info, wrqu, extra);
-	} else if (!memcmp(wrqu->data.pointer, "peer_deva_inv", 9)) {
+	} else if (!memcmp((__force const char *)wrqu->data.pointer, "peer_deva_inv", 9)) {
 		/*	Get the P2P device address when receiving the P2P Invitation request frame. */
 		rtw_p2p_get_peer_devaddr_by_invitation(dev, info, wrqu, extra);
-	} else if (!memcmp(wrqu->data.pointer, "op_ch", 5)) {
+	} else if (!memcmp((__force const char *)wrqu->data.pointer, "op_ch", 5)) {
 		rtw_p2p_get_op_ch(dev, info, wrqu, extra);
 	}
 #endif /* CONFIG_88EU_P2P */
@@ -3960,7 +3960,7 @@ static int rtw_p2p_get2(struct net_device *dev,
 	int ret = 0;
 
 #ifdef CONFIG_88EU_P2P
-	DBG_88E("[%s] extra = %s\n", __func__, (char *)wrqu->data.pointer);
+	DBG_88E("[%s] extra = %s\n", __func__, (__force char *)wrqu->data.pointer);
 	if (!memcmp(extra, "wpsCM =", 6)) {
 		wrqu->data.length -= 6;
 		rtw_p2p_get_wps_configmethod(dev, info, wrqu,  &extra[6]);
-- 
1.8.4

^ permalink raw reply related

* [PATCH NEXT 2/5] staging: r8188eu: Fix sparse warnings in rtw_ieee80211.c
From: Larry Finger @ 2013-10-24  3:31 UTC (permalink / raw)
  To: gregkh; +Cc: netdev, devel, Larry Finger
In-Reply-To: <1382585497-6787-1-git-send-email-Larry.Finger@lwfinger.net>

Sparse reports the following:

  CHECK   drivers/staging/rtl8188eu/core/rtw_ieee80211.c
drivers/staging/rtl8188eu/core/rtw_ieee80211.c:1593:14: warning: cast to restricted __le16

Signed-off-by: Larry Finger <Larry.Finger@lwfinger.net>
---
 drivers/staging/rtl8188eu/include/ieee80211.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/staging/rtl8188eu/include/ieee80211.h b/drivers/staging/rtl8188eu/include/ieee80211.h
index cd37ea4..c4d38d1 100644
--- a/drivers/staging/rtl8188eu/include/ieee80211.h
+++ b/drivers/staging/rtl8188eu/include/ieee80211.h
@@ -306,8 +306,8 @@ struct ieee_ibss_seq {
 };
 
 struct rtw_ieee80211_hdr {
-	u16 frame_ctl;
-	u16 duration_id;
+	__le16 frame_ctl;
+	__le16 duration_id;
 	u8 addr1[ETH_ALEN];
 	u8 addr2[ETH_ALEN];
 	u8 addr3[ETH_ALEN];
@@ -316,8 +316,8 @@ struct rtw_ieee80211_hdr {
 } __packed;
 
 struct rtw_ieee80211_hdr_3addr {
-	u16 frame_ctl;
-	u16 duration_id;
+	__le16 frame_ctl;
+	__le16 duration_id;
 	u8 addr1[ETH_ALEN];
 	u8 addr2[ETH_ALEN];
 	u8 addr3[ETH_ALEN];
@@ -325,8 +325,8 @@ struct rtw_ieee80211_hdr_3addr {
 } __packed;
 
 struct rtw_ieee80211_hdr_qos {
-	u16 frame_ctl;
-	u16 duration_id;
+	__le16 frame_ctl;
+	__le16 duration_id;
 	u8 addr1[ETH_ALEN];
 	u8 addr2[ETH_ALEN];
 	u8 addr3[ETH_ALEN];
@@ -336,8 +336,8 @@ struct rtw_ieee80211_hdr_qos {
 }  __packed;
 
 struct rtw_ieee80211_hdr_3addr_qos {
-	u16 frame_ctl;
-	u16 duration_id;
+	__le16 frame_ctl;
+	__le16 duration_id;
 	u8 addr1[ETH_ALEN];
 	u8 addr2[ETH_ALEN];
 	u8 addr3[ETH_ALEN];
-- 
1.8.4

^ permalink raw reply related

* [PATCH NEXT 3/5] staging: r8188eu: Fix sparse warnings in rtl8188e.cmd.c
From: Larry Finger @ 2013-10-24  3:31 UTC (permalink / raw)
  To: gregkh; +Cc: netdev, devel, Larry Finger
In-Reply-To: <1382585497-6787-1-git-send-email-Larry.Finger@lwfinger.net>

Sparse displays the following:

  CHECK   drivers/staging/rtl8188eu/hal/rtl8188e_cmd.c
drivers/staging/rtl8188eu/hal/rtl8188e_cmd.c:285:15: warning: incorrect type in assignment (different base types)
drivers/staging/rtl8188eu/hal/rtl8188e_cmd.c:285:15:    expected unsigned short [usertype] *fctrl
drivers/staging/rtl8188eu/hal/rtl8188e_cmd.c:285:15:    got restricted __le16 *<noident>
drivers/staging/rtl8188eu/hal/rtl8188e_cmd.c:368:15: warning: incorrect type in assignment (different base types)
drivers/staging/rtl8188eu/hal/rtl8188e_cmd.c:368:15:    expected unsigned short [usertype] *fctrl
drivers/staging/rtl8188eu/hal/rtl8188e_cmd.c:368:15:    got restricted __le16 *<noident>
drivers/staging/rtl8188eu/hal/rtl8188e_cmd.c:403:15: warning: incorrect type in assignment (different base types)
drivers/staging/rtl8188eu/hal/rtl8188e_cmd.c:403:15:    expected unsigned short [usertype] *fctrl
drivers/staging/rtl8188eu/hal/rtl8188e_cmd.c:403:15:    got restricted __le16 *<noident>
drivers/staging/rtl8188eu/hal/rtl8188e_cmd.c:465:15: warning: incorrect type in assignment (different base types)
drivers/staging/rtl8188eu/hal/rtl8188e_cmd.c:465:15:    expected unsigned short [usertype] *fctrl
drivers/staging/rtl8188eu/hal/rtl8188e_cmd.c:465:15:    got restricted __le16 *<noident>

Signed-off-by: Larry Finger <Larry.Finger@lwfinger.net>
---
 drivers/staging/rtl8188eu/hal/rtl8188e_cmd.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/staging/rtl8188eu/hal/rtl8188e_cmd.c b/drivers/staging/rtl8188eu/hal/rtl8188e_cmd.c
index 0b0c68c..8be2ad7 100644
--- a/drivers/staging/rtl8188eu/hal/rtl8188e_cmd.c
+++ b/drivers/staging/rtl8188eu/hal/rtl8188e_cmd.c
@@ -273,7 +273,7 @@ void rtl8188e_set_FwMediaStatus_cmd(struct adapter *adapt, __le16 mstatus_rpt)
 static void ConstructBeacon(struct adapter *adapt, u8 *pframe, u32 *pLength)
 {
 	struct rtw_ieee80211_hdr	*pwlanhdr;
-	u16 *fctrl;
+	__le16 *fctrl;
 	u32 rate_len, pktlen;
 	struct mlme_ext_priv *pmlmeext = &(adapt->mlmeextpriv);
 	struct mlme_ext_info	*pmlmeinfo = &(pmlmeext->mlmext_info);
@@ -360,7 +360,7 @@ static void ConstructPSPoll(struct adapter *adapt, u8 *pframe, u32 *pLength)
 	struct rtw_ieee80211_hdr	*pwlanhdr;
 	struct mlme_ext_priv *pmlmeext = &(adapt->mlmeextpriv);
 	struct mlme_ext_info	*pmlmeinfo = &(pmlmeext->mlmext_info);
-	u16 *fctrl;
+	__le16 *fctrl;
 
 	pwlanhdr = (struct rtw_ieee80211_hdr *)pframe;
 
@@ -391,7 +391,7 @@ static void ConstructNullFunctionData(struct adapter *adapt, u8 *pframe,
 	u8 bForcePowerSave)
 {
 	struct rtw_ieee80211_hdr	*pwlanhdr;
-	u16 *fctrl;
+	__le16 *fctrl;
 	u32 pktlen;
 	struct mlme_priv *pmlmepriv = &adapt->mlmepriv;
 	struct wlan_network		*cur_network = &pmlmepriv->cur_network;
@@ -450,7 +450,7 @@ static void ConstructNullFunctionData(struct adapter *adapt, u8 *pframe,
 static void ConstructProbeRsp(struct adapter *adapt, u8 *pframe, u32 *pLength, u8 *StaAddr, bool bHideSSID)
 {
 	struct rtw_ieee80211_hdr	*pwlanhdr;
-	u16 *fctrl;
+	__le16 *fctrl;
 	u8 *mac, *bssid;
 	u32 pktlen;
 	struct mlme_ext_priv *pmlmeext = &(adapt->mlmeextpriv);
-- 
1.8.4

^ permalink raw reply related

* Re: 16% regression on 10G caused by TCP small queues
From: David Miller @ 2013-10-24  3:38 UTC (permalink / raw)
  To: stephen; +Cc: ncardwell, eric.dumazet, dave.taht, netdev
In-Reply-To: <CAOaVG15indAxBz7E-T+9MK03indiXBa0UhQZ5j6K5DxYP44Mxg@mail.gmail.com>

From: Stephen Hemminger <stephen@networkplumber.org>
Date: Wed, 23 Oct 2013 20:09:49 -0700

> I will check 3.12, but what about users on 3.10 which is the LTS
> kernel used by most distros?

The fix will be backported to -stable, relax Stephen.

^ permalink raw reply

* [PATCH] ibm emac: Don't call napi_complete if napi_reschedule failed
From: Alistair Popple @ 2013-10-24  3:36 UTC (permalink / raw)
  To: netdev, davem; +Cc: benh, Alistair Popple

This patch fixes a bug which would trigger the BUG_ON() at
net/core/dev.c:4156. It was found that this was due to napi_complete
being called even when the corresponding call to napi_reschedule had
failed.

This patch ensures that we only contine processing rotting packets in
the current mal_poll call if we are not already on the polling list.
It also adds locking calls to protect the read-write-modify of the
MAL_CFG DCR.

Signed-off-by: Alistair Popple <alistair@popple.id.au>
---
 drivers/net/ethernet/ibm/emac/mal.c |   17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/ibm/emac/mal.c b/drivers/net/ethernet/ibm/emac/mal.c
index 856ea66..b10ad5d 100644
--- a/drivers/net/ethernet/ibm/emac/mal.c
+++ b/drivers/net/ethernet/ibm/emac/mal.c
@@ -263,7 +263,9 @@ static inline void mal_schedule_poll(struct mal_instance *mal)
 {
 	if (likely(napi_schedule_prep(&mal->napi))) {
 		MAL_DBG2(mal, "schedule_poll" NL);
+		spin_lock(&mal->lock);
 		mal_disable_eob_irq(mal);
+		spin_unlock(&mal->lock);
 		__napi_schedule(&mal->napi);
 	} else
 		MAL_DBG2(mal, "already in poll" NL);
@@ -442,17 +444,14 @@ static int mal_poll(struct napi_struct *napi, int budget)
 		if (unlikely(mc->ops->peek_rx(mc->dev) ||
 			     test_bit(MAL_COMMAC_RX_STOPPED, &mc->flags))) {
 			MAL_DBG2(mal, "rotting packet" NL);
-			if (napi_reschedule(napi))
-				mal_disable_eob_irq(mal);
-			else
-				MAL_DBG2(mal, "already in poll list" NL);
-
-			if (budget > 0)
-				goto again;
-			else
+			if (!napi_reschedule(napi))
 				goto more_work;
+
+			spin_lock_irqsave(&mal->lock, flags);
+			mal_disable_eob_irq(mal);
+			spin_unlock_irqrestore(&mal->lock, flags);
+			goto again;
 		}
-		mc->ops->poll_tx(mc->dev);
 	}
 
  more_work:
-- 
1.7.10.4

^ permalink raw reply related

* Re: 16% regression on 10G caused by TCP small queues
From: Stephen Hemminger @ 2013-10-24  4:45 UTC (permalink / raw)
  To: David Miller; +Cc: ncardwell, eric.dumazet, dave.taht, netdev
In-Reply-To: <20131023.233816.1396918839771657154.davem@davemloft.net>

On Wed, 23 Oct 2013 23:38:16 -0400 (EDT)
David Miller <davem@davemloft.net> wrote:

> From: Stephen Hemminger <stephen@networkplumber.org>
> Date: Wed, 23 Oct 2013 20:09:49 -0700
> 
> > I will check 3.12, but what about users on 3.10 which is the LTS
> > kernel used by most distros?

3.12-rc6 gets line rate again (9.41 Gbit/sec)

> The fix will be backported to -stable, relax Stephen.

Sorry, thought sk_pacing_rate depended on FQ qdisc but it is other way around.
In which case doing merge of these two was sufficient to fix the problem.
With a minor manual fix up to tcp.h.


commit 95bd09eb27507691520d39ee1044d6ad831c1168
Author: Eric Dumazet <edumazet@google.com>
Date:   Tue Aug 27 05:46:32 2013 -0700

    tcp: TSO packets automatic sizing

commit c9eeec26e32e087359160406f96e0949b3cc6f10
Author: Eric Dumazet <edumazet@google.com>
Date:   Fri Sep 27 03:28:54 2013 -0700

    tcp: TSQ can use a dynamic limit

^ permalink raw reply

* [PATCH next] be2net: add support for ndo_busy_poll
From: Sathya Perla @ 2013-10-24  5:47 UTC (permalink / raw)
  To: netdev

Includes:
- ndo_busy_poll implementation
- Locking between napi and busy_poll
- Fix rx_post_starvation (replenish rx-queues in out-of-mememory scenario)
  logic to accomodate busy_poll.

Signed-off-by: Sathya Perla <sathya.perla@emulex.com>
---
 drivers/net/ethernet/emulex/benet/be.h      |  122 +++++++++++++++++++++++++++
 drivers/net/ethernet/emulex/benet/be_main.c |   86 +++++++++++++++----
 2 files changed, 189 insertions(+), 19 deletions(-)

diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h
index 1bce77f..b2765eb 100644
--- a/drivers/net/ethernet/emulex/benet/be.h
+++ b/drivers/net/ethernet/emulex/benet/be.h
@@ -199,6 +199,19 @@ struct be_eq_obj {
 	u16 spurious_intr;
 	struct napi_struct napi;
 	struct be_adapter *adapter;
+
+#ifdef CONFIG_NET_RX_BUSY_POLL
+#define BE_EQ_IDLE		0
+#define BE_EQ_NAPI		1	/* napi owns this EQ */
+#define BE_EQ_POLL		2	/* poll owns this EQ */
+#define BE_EQ_LOCKED		(BE_EQ_NAPI | BE_EQ_POLL)
+#define BE_EQ_NAPI_YIELD	4	/* napi yielded this EQ */
+#define BE_EQ_POLL_YIELD	8	/* poll yielded this EQ */
+#define BE_EQ_YIELD		(BE_EQ_NAPI_YIELD | BE_EQ_POLL_YIELD)
+#define BE_EQ_USER_PEND		(BE_EQ_POLL | BE_EQ_POLL_YIELD)
+	unsigned int state;
+	spinlock_t lock;	/* lock to serialize napi and busy-poll */
+#endif  /* CONFIG_NET_RX_BUSY_POLL */
 } ____cacheline_aligned_in_smp;
 
 struct be_aic_obj {		/* Adaptive interrupt coalescing (AIC) info */
@@ -212,6 +225,11 @@ struct be_aic_obj {		/* Adaptive interrupt coalescing (AIC) info */
 	u64 tx_reqs_prev;	/* Used to calculate TX pps */
 };
 
+enum {
+	NAPI_POLLING,
+	BUSY_POLLING
+};
+
 struct be_mcc_obj {
 	struct be_queue_info q;
 	struct be_queue_info cq;
@@ -561,6 +579,10 @@ extern const struct ethtool_ops be_ethtool_ops;
 	for (i = 0, eqo = &adapter->eq_obj[i]; i < adapter->num_evt_qs; \
 		i++, eqo++)
 
+#define for_all_rx_queues_on_eq(adapter, eqo, rxo, i)			\
+	for (i = eqo->idx, rxo = &adapter->rx_obj[i]; i < adapter->num_rx_qs;\
+		 i += adapter->num_evt_qs, rxo += adapter->num_evt_qs)
+
 #define is_mcc_eqo(eqo)			(eqo->idx == 0)
 #define mcc_eqo(adapter)		(&adapter->eq_obj[0])
 
@@ -711,6 +733,106 @@ static inline int qnq_async_evt_rcvd(struct be_adapter *adapter)
 	return adapter->flags & BE_FLAGS_QNQ_ASYNC_EVT_RCVD;
 }
 
+#ifdef CONFIG_NET_RX_BUSY_POLL
+static inline bool be_lock_napi(struct be_eq_obj *eqo)
+{
+	bool status = true;
+
+	spin_lock(&eqo->lock); /* BH is already disabled */
+	if (eqo->state & BE_EQ_LOCKED) {
+		WARN_ON(eqo->state & BE_EQ_NAPI);
+		eqo->state |= BE_EQ_NAPI_YIELD;
+		status = false;
+	} else {
+		eqo->state = BE_EQ_NAPI;
+	}
+	spin_unlock(&eqo->lock);
+	return status;
+}
+
+static inline void be_unlock_napi(struct be_eq_obj *eqo)
+{
+	spin_lock(&eqo->lock); /* BH is already disabled */
+
+	WARN_ON(eqo->state & (BE_EQ_POLL | BE_EQ_NAPI_YIELD));
+	eqo->state = BE_EQ_IDLE;
+
+	spin_unlock(&eqo->lock);
+}
+
+static inline bool be_lock_busy_poll(struct be_eq_obj *eqo)
+{
+	bool status = true;
+
+	spin_lock_bh(&eqo->lock);
+	if (eqo->state & BE_EQ_LOCKED) {
+		eqo->state |= BE_EQ_POLL_YIELD;
+		status = false;
+	} else {
+		eqo->state |= BE_EQ_POLL;
+	}
+	spin_unlock_bh(&eqo->lock);
+	return status;
+}
+
+static inline void be_unlock_busy_poll(struct be_eq_obj *eqo)
+{
+	spin_lock_bh(&eqo->lock);
+
+	WARN_ON(eqo->state & (BE_EQ_NAPI));
+	eqo->state = BE_EQ_IDLE;
+
+	spin_unlock_bh(&eqo->lock);
+}
+
+static inline void be_enable_busy_poll(struct be_eq_obj *eqo)
+{
+	spin_lock_init(&eqo->lock);
+	eqo->state = BE_EQ_IDLE;
+}
+
+static inline void be_disable_busy_poll(struct be_eq_obj *eqo)
+{
+	local_bh_disable();
+
+	/* It's enough to just acquire napi lock on the eqo to stop
+	 * be_busy_poll() from processing any queueus.
+	 */
+	while (!be_lock_napi(eqo))
+		mdelay(1);
+
+	local_bh_enable();
+}
+
+#else /* CONFIG_NET_RX_BUSY_POLL */
+
+static inline bool be_lock_napi(struct be_eq_obj *eqo)
+{
+	return true;
+}
+
+static inline void be_unlock_napi(struct be_eq_obj *eqo)
+{
+}
+
+static inline bool be_lock_busy_poll(struct be_eq_obj *eqo)
+{
+	return false;
+}
+
+static inline void be_unlock_busy_poll(struct be_eq_obj *eqo)
+{
+}
+
+static inline void be_enable_busy_poll(struct be_eq_obj *eqo)
+{
+}
+
+static inline void be_disable_busy_poll(struct be_eq_obj *eqo)
+{
+}
+#endif /* CONFIG_NET_RX_BUSY_POLL */
+
 void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm,
 		  u16 num_popped);
 void be_link_status_update(struct be_adapter *adapter, u8 link_status);
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 393e3dc..bb1cd55 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -22,6 +22,7 @@
 #include <asm/div64.h>
 #include <linux/aer.h>
 #include <linux/if_bridge.h>
+#include <net/busy_poll.h>
 
 MODULE_VERSION(DRV_VER);
 MODULE_DEVICE_TABLE(pci, be_dev_ids);
@@ -1556,7 +1557,7 @@ static void skb_fill_rx_data(struct be_rx_obj *rxo, struct sk_buff *skb,
 }
 
 /* Process the RX completion indicated by rxcp when GRO is disabled */
-static void be_rx_compl_process(struct be_rx_obj *rxo,
+static void be_rx_compl_process(struct be_rx_obj *rxo, struct napi_struct *napi,
 				struct be_rx_compl_info *rxcp)
 {
 	struct be_adapter *adapter = rxo->adapter;
@@ -1581,7 +1582,7 @@ static void be_rx_compl_process(struct be_rx_obj *rxo,
 	skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
 	if (netdev->features & NETIF_F_RXHASH)
 		skb->rxhash = rxcp->rss_hash;
-
+	skb_mark_napi_id(skb, napi);
 
 	if (rxcp->vlanf)
 		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
@@ -1639,6 +1640,7 @@ static void be_rx_compl_process_gro(struct be_rx_obj *rxo,
 	skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
 	if (adapter->netdev->features & NETIF_F_RXHASH)
 		skb->rxhash = rxcp->rss_hash;
+	skb_mark_napi_id(skb, napi);
 
 	if (rxcp->vlanf)
 		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
@@ -1819,6 +1821,8 @@ static void be_post_rx_frags(struct be_rx_obj *rxo, gfp_t gfp)
 
 	if (posted) {
 		atomic_add(posted, &rxq->used);
+		if (rxo->rx_post_starved)
+			rxo->rx_post_starved = false;
 		be_rxq_notify(adapter, rxq->id, posted);
 	} else if (atomic_read(&rxq->used) == 0) {
 		/* Let be_worker replenish when memory is available */
@@ -2021,6 +2025,7 @@ static void be_evt_queues_destroy(struct be_adapter *adapter)
 		if (eqo->q.created) {
 			be_eq_clean(eqo);
 			be_cmd_q_destroy(adapter, &eqo->q, QTYPE_EQ);
+			napi_hash_del(&eqo->napi);
 			netif_napi_del(&eqo->napi);
 		}
 		be_queue_free(adapter, &eqo->q);
@@ -2262,7 +2267,7 @@ static inline bool do_gro(struct be_rx_compl_info *rxcp)
 }
 
 static int be_process_rx(struct be_rx_obj *rxo, struct napi_struct *napi,
-			int budget)
+			int budget, int polling)
 {
 	struct be_adapter *adapter = rxo->adapter;
 	struct be_queue_info *rx_cq = &rxo->cq;
@@ -2293,19 +2298,27 @@ static int be_process_rx(struct be_rx_obj *rxo, struct napi_struct *napi,
 			goto loop_continue;
 		}
 
-		if (do_gro(rxcp))
+		/* Don't do gro when we're busy_polling */
+		if (do_gro(rxcp) && polling != BUSY_POLLING)
 			be_rx_compl_process_gro(rxo, napi, rxcp);
 		else
-			be_rx_compl_process(rxo, rxcp);
+			be_rx_compl_process(rxo, napi, rxcp);
+
 loop_continue:
 		be_rx_stats_update(rxo, rxcp);
 	}
 
 	if (work_done) {
+		gfp_t gfp = (polling == BUSY_POLLING) ? GFP_KERNEL : GFP_ATOMIC;
+
 		be_cq_notify(adapter, rx_cq->id, true, work_done);
 
-		if (atomic_read(&rxo->q.used) < RX_FRAGS_REFILL_WM)
-			be_post_rx_frags(rxo, GFP_ATOMIC);
+		/* When an rx-obj gets into post_starved state, just
+		 * let be_worker do the posting.
+		 */
+		if (atomic_read(&rxo->q.used) < RX_FRAGS_REFILL_WM &&
+		    !rxo->rx_post_starved)
+			be_post_rx_frags(rxo, gfp);
 	}
 
 	return work_done;
@@ -2349,6 +2362,7 @@ int be_poll(struct napi_struct *napi, int budget)
 	struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
 	struct be_adapter *adapter = eqo->adapter;
 	int max_work = 0, work, i, num_evts;
+	struct be_rx_obj *rxo;
 	bool tx_done;
 
 	num_evts = events_get(eqo);
@@ -2361,13 +2375,18 @@ int be_poll(struct napi_struct *napi, int budget)
 			max_work = budget;
 	}
 
-	/* This loop will iterate twice for EQ0 in which
-	 * completions of the last RXQ (default one) are also processed
-	 * For other EQs the loop iterates only once
-	 */
-	for (i = eqo->idx; i < adapter->num_rx_qs; i += adapter->num_evt_qs) {
-		work = be_process_rx(&adapter->rx_obj[i], napi, budget);
-		max_work = max(work, max_work);
+	if (be_lock_napi(eqo)) {
+		/* This loop will iterate twice for EQ0 in which
+		 * completions of the last RXQ (default one) are also processed
+		 * For other EQs the loop iterates only once
+		 */
+		for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
+			work = be_process_rx(rxo, napi, budget, NAPI_POLLING);
+			max_work = max(work, max_work);
+		}
+		be_unlock_napi(eqo);
+	} else {
+		max_work = budget;
 	}
 
 	if (is_mcc_eqo(eqo))
@@ -2383,6 +2402,28 @@ int be_poll(struct napi_struct *napi, int budget)
 	return max_work;
 }
 
+#ifdef CONFIG_NET_RX_BUSY_POLL
+static int be_busy_poll(struct napi_struct *napi)
+{
+	struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
+	struct be_adapter *adapter = eqo->adapter;
+	struct be_rx_obj *rxo;
+	int i, work = 0;
+
+	if (!be_lock_busy_poll(eqo))
+		return LL_FLUSH_BUSY;
+
+	for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
+		work = be_process_rx(rxo, napi, 4, BUSY_POLLING);
+		if (work)
+			break;
+	}
+
+	be_unlock_busy_poll(eqo);
+	return work;
+}
+#endif
+
 void be_detect_error(struct be_adapter *adapter)
 {
 	u32 ue_lo = 0, ue_hi = 0, ue_lo_mask = 0, ue_hi_mask = 0;
@@ -2614,9 +2655,11 @@ static int be_close(struct net_device *netdev)
 
 	be_roce_dev_close(adapter);
 
-	if (adapter->flags & BE_FLAGS_NAPI_ENABLED) {
-		for_all_evt_queues(adapter, eqo, i)
+	for_all_evt_queues(adapter, eqo, i) {
+		if (adapter->flags & BE_FLAGS_NAPI_ENABLED) {
 			napi_disable(&eqo->napi);
+			be_disable_busy_poll(eqo);
+		}
 		adapter->flags &= ~BE_FLAGS_NAPI_ENABLED;
 	}
 
@@ -2727,6 +2770,7 @@ static int be_open(struct net_device *netdev)
 
 	for_all_evt_queues(adapter, eqo, i) {
 		napi_enable(&eqo->napi);
+		be_enable_busy_poll(eqo);
 		be_eq_notify(adapter, eqo->q.id, true, false, 0);
 	}
 	adapter->flags |= BE_FLAGS_NAPI_ENABLED;
@@ -3989,6 +4033,9 @@ static const struct net_device_ops be_netdev_ops = {
 #endif
 	.ndo_bridge_setlink	= be_ndo_bridge_setlink,
 	.ndo_bridge_getlink	= be_ndo_bridge_getlink,
+#ifdef CONFIG_NET_RX_BUSY_POLL
+	.ndo_busy_poll		= be_busy_poll
+#endif
 };
 
 static void be_netdev_init(struct net_device *netdev)
@@ -4376,10 +4423,11 @@ static void be_worker(struct work_struct *work)
 		be_cmd_get_die_temperature(adapter);
 
 	for_all_rx_queues(adapter, rxo, i) {
-		if (rxo->rx_post_starved) {
-			rxo->rx_post_starved = false;
+		/* Replenish RX-queues starved due to memory
+		 * allocation failures.
+		 */
+		if (rxo->rx_post_starved)
 			be_post_rx_frags(rxo, GFP_KERNEL);
-		}
 	}
 
 	be_eqd_update(adapter);
-- 
1.7.1

^ permalink raw reply related

* [PATCH net 1/2] ipv6: reset dst.expires value when clearing expire flag
From: Hannes Frederic Sowa @ 2013-10-24  5:48 UTC (permalink / raw)
  To: netdev; +Cc: sgunderson, valentyn, yoshfuji

On receiving a packet too big icmp error we update the expire value by
calling rt6_update_expires. This function uses dst_set_expires which is
implemented that it can only reduce the expiration value of the dst entry.

If we insert new routing non-expiry information into the ipv6 fib where
we already have a matching rt6_info we only clear the RTF_EXPIRES flag
in rt6i_flags and leave the dst.expires value as is.

When new mtu information arrives for that cached dst_entry we again
call dst_set_expires. This time it won't update the dst.expire value
because we left the dst.expire value intact from the last update. So
dst_set_expires won't touch dst.expires.

Fix this by resetting dst.expires when clearing the RTF_EXPIRE flag.
dst_set_expires checks for a zero expiration and updates the
dst.expires.

In the past this (not updating dst.expires) was necessary because
dst.expire was placed in a union with the dst_entry *from reference. So
an update on the value would have caused page faults. This split happend
in ecd9883724b78cc72ed92c98bcb1a46c764fff21 ("ipv6: fix race condition
regarding dst->expires and dst->from").

Reported-by: Steinar H. Gunderson <sgunderson@bigfoot.com>
Reported-by: Valentijn Sessink <valentyn@blub.net>
Cc: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
---
I would propose this patch for -stable.

 include/net/ip6_fib.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 48ec25a..5e661a9 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -165,6 +165,7 @@ static inline struct inet6_dev *ip6_dst_idev(struct dst_entry *dst)
 static inline void rt6_clean_expires(struct rt6_info *rt)
 {
 	rt->rt6i_flags &= ~RTF_EXPIRES;
+	rt->dst.expires = 0;
 }

 static inline void rt6_set_expires(struct rt6_info *rt, unsigned long expires)
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH net 2/2] ipv6: ip6_dst_check needs to check for expired dst_entries
From: Hannes Frederic Sowa @ 2013-10-24  5:48 UTC (permalink / raw)
  To: netdev; +Cc: sgunderson, valentyn, yoshfuji

On receiving a packet too big icmp error we check if our current cached
dst_entry in the socket is still valid. This validation check did not
care about the expiration of the (cached) route.

The error path I traced down:
The socket receives a packet too big mtu notification. It still has a
valid dst_entry and thus issues the ip6_rt_pmtu_update on this dst_entry,
setting RTF_EXPIRE and updates the dst.expiration value (which could
fail because of not up-to-date expiration values, see previous patch).

In some seldom cases we race with a) the ip6_fib gc or b) another routing
lookup which would result in a recreation of the cached rt6_info from its
parent non-cached rt6_info. While copying the rt6_info we reinitialize the
metrics store by copying it over from the parent thus invalidating the
just installed pmtu update (both dsts use the same key to the inetpeer
storage). The dst_entry with the just invalidated metrics data would
just get its RTF_EXPIRES flag cleared and would continue to stay valid
for the socket.

We should have not issued the pmtu update on the already expired dst_entry
in the first placed. By checking the expiration on the dst entry and
doing a relookup in case it is out of date we close the race because
we would install a new rt6_info into the fib before we issue the pmtu
update, thus closing this race.

Not reliably updating the dst.expire value was fixed by the patch "ipv6:
reset dst.expires value when clearing expire flag".

Reported-by: Steinar H. Gunderson <sgunderson@bigfoot.com>
Reported-by: Valentijn Sessink <valentyn@blub.net>
Cc: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
---
I would propose this patch for -stable.

 net/ipv6/route.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index f54e3a1..04e17b3 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1087,10 +1087,13 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
 	if (rt->rt6i_genid != rt_genid_ipv6(dev_net(rt->dst.dev)))
 		return NULL;

-	if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
-		return dst;
+	if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
+		return NULL;

-	return NULL;
+	if (rt6_check_expired(rt))
+		return NULL;
+
+	return dst;
 }

 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
-- 
1.8.3.1

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox