Netdev List
 help / color / mirror / Atom feed
* [PATCH] ipv4: remove ip_rt_secret timer
From: Neil Horman @ 2010-05-06 17:16 UTC (permalink / raw)
  To: netdev; +Cc: davem, kuznet, jmorris, yoshfuji, kaber, nhorman

A while back there was a discussion regarding the rt_secret_interval timer.
Given that we've had the ability to do emergency route cache rebuilds for awhile
now, based on a statistical analysis of the various hash chain lengths in the
cache, the use of the flush timer is somewhat redundant.  This patch removes the
rt_secret_interval sysctl, allowing us to rely solely on the statistical
analysis mechanism to determine the need for route cache flushes.

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>


 include/net/netns/ipv4.h |    1 
 net/ipv4/route.c         |  108 -----------------------------------------------
 2 files changed, 2 insertions(+), 107 deletions(-)

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index ae07fee..d68c3f1 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -55,7 +55,6 @@ struct netns_ipv4 {
 	int sysctl_rt_cache_rebuild_count;
 	int current_rt_cache_rebuild_count;
 
-	struct timer_list rt_secret_timer;
 	atomic_t rt_genid;
 
 #ifdef CONFIG_IP_MROUTE
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index a947428..ffd3da1 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -129,7 +129,6 @@ static int ip_rt_gc_elasticity __read_mostly	= 8;
 static int ip_rt_mtu_expires __read_mostly	= 10 * 60 * HZ;
 static int ip_rt_min_pmtu __read_mostly		= 512 + 20 + 20;
 static int ip_rt_min_advmss __read_mostly	= 256;
-static int ip_rt_secret_interval __read_mostly	= 10 * 60 * HZ;
 static int rt_chain_length_max __read_mostly	= 20;
 
 static struct delayed_work expires_work;
@@ -918,32 +917,11 @@ void rt_cache_flush_batch(void)
 	rt_do_flush(!in_softirq());
 }
 
-/*
- * We change rt_genid and let gc do the cleanup
- */
-static void rt_secret_rebuild(unsigned long __net)
-{
-	struct net *net = (struct net *)__net;
-	rt_cache_invalidate(net);
-	mod_timer(&net->ipv4.rt_secret_timer, jiffies + ip_rt_secret_interval);
-}
-
-static void rt_secret_rebuild_oneshot(struct net *net)
-{
-	del_timer_sync(&net->ipv4.rt_secret_timer);
-	rt_cache_invalidate(net);
-	if (ip_rt_secret_interval)
-		mod_timer(&net->ipv4.rt_secret_timer, jiffies + ip_rt_secret_interval);
-}
-
 static void rt_emergency_hash_rebuild(struct net *net)
 {
-	if (net_ratelimit()) {
+	if (net_ratelimit())
 		printk(KERN_WARNING "Route hash chain too long!\n");
-		printk(KERN_WARNING "Adjust your secret_interval!\n");
-	}
-
-	rt_secret_rebuild_oneshot(net);
+	rt_cache_invalidate(net);
 }
 
 /*
@@ -3101,48 +3079,6 @@ static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write,
 	return -EINVAL;
 }
 
-static void rt_secret_reschedule(int old)
-{
-	struct net *net;
-	int new = ip_rt_secret_interval;
-	int diff = new - old;
-
-	if (!diff)
-		return;
-
-	rtnl_lock();
-	for_each_net(net) {
-		int deleted = del_timer_sync(&net->ipv4.rt_secret_timer);
-		long time;
-
-		if (!new)
-			continue;
-
-		if (deleted) {
-			time = net->ipv4.rt_secret_timer.expires - jiffies;
-
-			if (time <= 0 || (time += diff) <= 0)
-				time = 0;
-		} else
-			time = new;
-
-		mod_timer(&net->ipv4.rt_secret_timer, jiffies + time);
-	}
-	rtnl_unlock();
-}
-
-static int ipv4_sysctl_rt_secret_interval(ctl_table *ctl, int write,
-					  void __user *buffer, size_t *lenp,
-					  loff_t *ppos)
-{
-	int old = ip_rt_secret_interval;
-	int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
-
-	rt_secret_reschedule(old);
-
-	return ret;
-}
-
 static ctl_table ipv4_route_table[] = {
 	{
 		.procname	= "gc_thresh",
@@ -3251,13 +3187,6 @@ static ctl_table ipv4_route_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
-	{
-		.procname	= "secret_interval",
-		.data		= &ip_rt_secret_interval,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= ipv4_sysctl_rt_secret_interval,
-	},
 	{ }
 };
 
@@ -3337,36 +3266,6 @@ static __net_initdata struct pernet_operations sysctl_route_ops = {
 #endif
 
 
-static __net_init int rt_secret_timer_init(struct net *net)
-{
-	atomic_set(&net->ipv4.rt_genid,
-			(int) ((num_physpages ^ (num_physpages>>8)) ^
-			(jiffies ^ (jiffies >> 7))));
-
-	net->ipv4.rt_secret_timer.function = rt_secret_rebuild;
-	net->ipv4.rt_secret_timer.data = (unsigned long)net;
-	init_timer_deferrable(&net->ipv4.rt_secret_timer);
-
-	if (ip_rt_secret_interval) {
-		net->ipv4.rt_secret_timer.expires =
-			jiffies + net_random() % ip_rt_secret_interval +
-			ip_rt_secret_interval;
-		add_timer(&net->ipv4.rt_secret_timer);
-	}
-	return 0;
-}
-
-static __net_exit void rt_secret_timer_exit(struct net *net)
-{
-	del_timer_sync(&net->ipv4.rt_secret_timer);
-}
-
-static __net_initdata struct pernet_operations rt_secret_timer_ops = {
-	.init = rt_secret_timer_init,
-	.exit = rt_secret_timer_exit,
-};
-
-
 #ifdef CONFIG_NET_CLS_ROUTE
 struct ip_rt_acct __percpu *ip_rt_acct __read_mostly;
 #endif /* CONFIG_NET_CLS_ROUTE */
@@ -3424,9 +3323,6 @@ int __init ip_rt_init(void)
 	schedule_delayed_work(&expires_work,
 		net_random() % ip_rt_gc_interval + ip_rt_gc_interval);
 
-	if (register_pernet_subsys(&rt_secret_timer_ops))
-		printk(KERN_ERR "Unable to setup rt_secret_timer\n");
-
 	if (ip_rt_proc_init())
 		printk(KERN_ERR "Unable to create route proc files\n");
 #ifdef CONFIG_XFRM

^ permalink raw reply related

* Re: [net-next-2.6 V5 PATCH 2/3] Add ndo_{set|get}_vf_port_profile op support for enic dynamic vnics
From: Arnd Bergmann @ 2010-05-06 16:45 UTC (permalink / raw)
  To: Scott Feldman; +Cc: davem, netdev, chrisw
In-Reply-To: <C8083B9D.2EC18%scofeldm@cisco.com>

On Thursday 06 May 2010, Scott Feldman wrote:
> Dynamic enics have all-zero mac address on init, so we assign a random mac
> addr to the interface.  This would seem less funny:
> 
>     if (enic_is_dynamic(enic) && is_zero_ether_addr(addr))
>         random_ether_addr(netdev->dev_addr);
>     else
>         ...
> 
> I'll make that change and resubmit with your VDP additions if you like.

The change is ok, but what I think would be more helpful is a code comment
with your above sentence.

	Arnd

^ permalink raw reply

* Re: [net-next-2.6 V5 PATCH 0/3] Add port-profile netlink support
From: Arnd Bergmann @ 2010-05-06 16:42 UTC (permalink / raw)
  To: Scott Feldman; +Cc: davem, netdev, chrisw
In-Reply-To: <C8083A0F.2EC10%scofeldm@cisco.com>

On Thursday 06 May 2010, Scott Feldman wrote:
> On 5/6/10 6:51 AM, "Arnd Bergmann" <arnd@arndb.de> wrote:
> 
> > On Thursday 06 May 2010, Scott Feldman wrote:
> >> The intent of this patch set is to cover both definitions of port-profile
> >> as defined by Cisco's enic use and as defined by VSI discover protocol (VDP),
> >> used in VEPA implemenations.  While both definitions are based on pre-
> >> standards, the concept of a port-profile to be applied to an external switch
> >> port on behalf of a virtual machine interface is common, as well as many
> >> of the fields defining the protocols.
> > 
> > The description no either no longer matches the patches, or you did not make
> > the
> > changes that were needed based on our last discussion.
> > 
> > What happened to the base-device argument that you were planning to pass?
> 
> Using the IFLA_VF_* model works better for us where the recipient of the
> netlink msg is the PF but the msg is to be applied to the VF.  The third
> patch illustrates how this fits nicely with SR-IOV devices.  The PF is the
> base device.

Ah, got it. I did not notice that you had put a vf field in there.
It now makes a lot more sense to me, and is more in line with what
we need for VDP.

It does however make me wonder how this could be implemented for
a software-only implementation of your protocol that does not refer
to vf numbers. One way would be to define the 'vf' field as implementation
specific and just use the ifindex in this case, which would also work
in case of network namespaces. Alternatively, it could use whatever
tag you use in your wire protocol (e.g. an S-VID)

Both are a bit of a stretch, but I see no technical problems with them.

> > The fields that I mentioned are needed for VDP
> > (associate/pre-associate/disassociate-flag,
> > VLAN ID, etc) are not there. I assume that means we should use a different
> > data structure for VDP, but then your description above should be updated
> > to state that this is no longer common for the two.
> > 
> > I'll follow up with a draft for VDP based on your definitions.
> 
> I tried to accommodate space for VDP, but was hoping you could add the
> definitions on top of what I had since your more familiar with VDP and can
> do the testing.
> 
> Also, I wasn't sure if you could use the existing IFLA_VF_VLAN msg to apply
> the VLAN ID or if you wanted VLAN ID also added to IFLA_VF_PORT_PROFILE.

The IFLA_VF_VLAN would not work well here because of the issue we discussed
before that I think we need to keep device setup separate from the protocol
exchange. IFLA_VF_VLAN configures the VLAN, while we need to tell the switch
about the configuration.

One (new) point that came up today is that your protocol is actually much
more closely related to the 'CDCP' protocol in 802.1Qbg than to 'VDP'.
I'll also try to make sure that we cover this case as well. It should
also be possible to do VDP over a dynamic enic VF and have multiple guests
using macvtap on that function, and there will probably be adapters that
need to use IFLA_VF_PORT_PROFILE (or another set) as the interface between
libvirt and the adapter firmware for doing CDCP.

To give some background, CDCP is an LLDP extension that is used to create
virtual channels between a physical NIC and the phys bridge on the other side,
using S-VLAN tagging. You can either assign one of these channels to a
guest directly (similar to what enic does), or use VDP on the channel
to connect multiple guests using a bridge device or macvtap in the same
way that we also do VDP on the physical device in the absence of CDCP.

	Arnd

^ permalink raw reply

* Re: [net-next-2.6 V5 PATCH 2/3] Add ndo_{set|get}_vf_port_profile op support for enic dynamic vnics
From: Scott Feldman @ 2010-05-06 16:25 UTC (permalink / raw)
  To: Arnd Bergmann; +Cc: davem, netdev, chrisw
In-Reply-To: <201005061547.22400.arnd@arndb.de>

On 5/6/10 6:47 AM, "Arnd Bergmann" <arnd@arndb.de> wrote:

> On Thursday 06 May 2010, Scott Feldman wrote:
>> @@ -810,14 +819,24 @@ static void enic_reset_mcaddrs(struct enic *enic)
>>  
>>  static int enic_set_mac_addr(struct net_device *netdev, char *addr)
>>  {
>> -       if (!is_valid_ether_addr(addr))
>> -               return -EADDRNOTAVAIL;
>> +       struct enic *enic = netdev_priv(netdev);
>>  
>> -       memcpy(netdev->dev_addr, addr, netdev->addr_len);
>> +       if (enic_is_dynamic(enic)) {
>> +               random_ether_addr(netdev->dev_addr);
>> +       } else {
>> +               if (!is_valid_ether_addr(addr))
>> +                       return -EADDRNOTAVAIL;
>> +               memcpy(netdev->dev_addr, addr, netdev->addr_len);
>> +       }
>>  
>>         return 0;
>>  }
>>  
>> +static int enic_set_mac_address(struct net_device *netdev, void *p)
>> +{
>> +       return -EOPNOTSUPP;
>> +}
>> +
>>  /* netif_tx_lock held, BHs disabled */
>>  static void enic_set_multicast_list(struct net_device *netdev)
>>  {
> 
> Thsi looks funny. So you just ignore the address that gets passed to
> enic_set_mac_addr for dynamic interfaces and instead set a random
> address?

Dynamic enics have all-zero mac address on init, so we assign a random mac
addr to the interface.  This would seem less funny:

    if (enic_is_dynamic(enic) && is_zero_ether_addr(addr))
        random_ether_addr(netdev->dev_addr);
    else
        ...

I'll make that change and resubmit with your VDP additions if you like.

-scott


^ permalink raw reply

* Re: ixgbe and mac-vlans problem
From: Ben Greear @ 2010-05-06 16:23 UTC (permalink / raw)
  To: Tantilov, Emil S; +Cc: Arnd Bergmann, NetDev, Patrick McHardy
In-Reply-To: <EA929A9653AAE14F841771FB1DE5A1365FE5560B00@rrsmsx501.amr.corp.intel.com>

On 04/30/2010 03:26 PM, Tantilov, Emil S wrote:
> Ben Greear wrote:
>> On 04/30/2010 02:13 PM, Tantilov, Emil S wrote:

>>> I ran a quick test in my setup with 82599 and was able to pass
>>> traffic on all 50 mac-vlans without issues. This is on net-next.
>>
>> For an 82599 system, I can get 127 mac-vlans working out of 500
>> created.
>>
>> That NIC also does not go PROMISC with lots (500) of mac-vlans.
>>
>> Once I put it in promisc mode manually, it works fine.
>>
>> So, I think whatever logic is supposed to put the NIC into promisc
>> mode when it overflows it's lookup tables isn't working for ixgbe
>> in 2.6.31.12.
>
> Yeah, you're right. I was able to repro it.
>
> We'll look into it.

I'd be happy to test out a patch if you have one available.

If you don't expect to have one soon, please let me know and
I'll add work-arounds to my code to throw ixgbe NICs into PROMISC
mode manually.

Thanks,
Ben

-- 
Ben Greear <greearb@candelatech.com>
Candela Technologies Inc  http://www.candelatech.com


^ permalink raw reply

* Re: [net-next-2.6 V5 PATCH 0/3] Add port-profile netlink support
From: Scott Feldman @ 2010-05-06 16:19 UTC (permalink / raw)
  To: Arnd Bergmann; +Cc: davem, netdev, chrisw
In-Reply-To: <201005061551.35254.arnd@arndb.de>

On 5/6/10 6:51 AM, "Arnd Bergmann" <arnd@arndb.de> wrote:

> On Thursday 06 May 2010, Scott Feldman wrote:
>> The intent of this patch set is to cover both definitions of port-profile
>> as defined by Cisco's enic use and as defined by VSI discover protocol (VDP),
>> used in VEPA implemenations.  While both definitions are based on pre-
>> standards, the concept of a port-profile to be applied to an external switch
>> port on behalf of a virtual machine interface is common, as well as many
>> of the fields defining the protocols.
> 
> The description no either no longer matches the patches, or you did not make
> the
> changes that were needed based on our last discussion.
> 
> What happened to the base-device argument that you were planning to pass?

Using the IFLA_VF_* model works better for us where the recipient of the
netlink msg is the PF but the msg is to be applied to the VF.  The third
patch illustrates how this fits nicely with SR-IOV devices.  The PF is the
base device.
 
> The fields that I mentioned are needed for VDP
> (associate/pre-associate/disassociate-flag,
> VLAN ID, etc) are not there. I assume that means we should use a different
> data structure for VDP, but then your description above should be updated
> to state that this is no longer common for the two.
> 
> I'll follow up with a draft for VDP based on your definitions.

I tried to accommodate space for VDP, but was hoping you could add the
definitions on top of what I had since your more familiar with VDP and can
do the testing.

Also, I wasn't sure if you could use the existing IFLA_VF_VLAN msg to apply
the VLAN ID or if you wanted VLAN ID also added to IFLA_VF_PORT_PROFILE.

-scott


^ permalink raw reply

* Re: [PATCH net-next-2.6] net: Consistent skb timestamping
From: Eric Dumazet @ 2010-05-06 16:14 UTC (permalink / raw)
  To: Tom Herbert; +Cc: David Miller, netdev
In-Reply-To: <1273160276.2853.27.camel@edumazet-laptop>

Le jeudi 06 mai 2010 à 17:37 +0200, Eric Dumazet a écrit :

> Right now, timestamping is not meant for userland pleasure, but for
> sniffers and network diagnostics. (I mean with current API, not with a
> new one we could add later)
> 
> Once we settle a per socket timestamping, not global, we can reconsider
> the thing (or not reconsider it, since socket timestamping will be done
> after RPS dispatch)
> 
> Its true our global variable to enable/disable timestamp sucks, but its
> a separate issue ;)
> 
> We probably could have a sysctl to let admin chose the moment timestamp
> takes place (before or after RPS)

Here is v2 of patch,
introducing /proc/sys/net/core/netdev_tstamp_prequeue

Thanks

[PATCH v2 net-next-2.6] net: Consistent skb timestamping

With RPS inclusion, skb timestamping is not consistent in RX path.

If netif_receive_skb() is used, its deferred after RPS dispatch.

If netif_rx() is used, its done before RPS dispatch.

This can give strange tcpdump timestamps results.

I think timestamping should be done as soon as possible in the receive
path, to get meaningful values (ie timestamps taken at the time packet
was delivered by NIC driver to our stack), even if NAPI already can
defer timestamping a bit (RPS can help to reduce the gap)

Tom Herbert prefer to sample timestamps after RPS dispatch. In case
sampling is expensive (HPET/acpi_pm on x86), this makes sense.

Let admins switch from one mode to another, using a new
sysctl, /proc/sys/net/core/netdev_tstamp_prequeue

Its default value (1), means timestamps are taken as soon as possible,
before backlog queueing, giving accurate timestamps.

Setting a 0 value permits to sample timestamps when processing backlog,
after RPS dispatch, to lower the load of the pre-RPS cpu.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
---
 Documentation/sysctl/net.txt |   10 ++++++
 include/linux/netdevice.h    |    1 
 net/core/dev.c               |   50 ++++++++++++++++++++-------------
 net/core/sysctl_net_core.c   |    7 ++++
 4 files changed, 49 insertions(+), 19 deletions(-)

diff --git a/Documentation/sysctl/net.txt b/Documentation/sysctl/net.txt
index df38ef0..cbd05ff 100644
--- a/Documentation/sysctl/net.txt
+++ b/Documentation/sysctl/net.txt
@@ -84,6 +84,16 @@ netdev_max_backlog
 Maximum number  of  packets,  queued  on  the  INPUT  side, when the interface
 receives packets faster than kernel can process them.
 
+netdev_tstamp_prequeue
+----------------------
+
+If set to 0, RX packet timestamps can be sampled after RPS processing, when
+the target CPU processes packets. It might give some delay on timestamps, but
+permit to distribute the load on several cpus.
+
+If set to 1 (default), timestamps are sampled as soon as possible, before
+queueing.
+
 optmem_max
 ----------
 
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 69022d4..c1b2341 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2100,6 +2100,7 @@ extern const struct net_device_stats *dev_get_stats(struct net_device *dev);
 extern void		dev_txq_stats_fold(const struct net_device *dev, struct net_device_stats *stats);
 
 extern int		netdev_max_backlog;
+extern int		netdev_tstamp_prequeue;
 extern int		weight_p;
 extern int		netdev_set_master(struct net_device *dev, struct net_device *master);
 extern int skb_checksum_help(struct sk_buff *skb);
diff --git a/net/core/dev.c b/net/core/dev.c
index 36d53be..1ca4de8 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1454,7 +1454,7 @@ void net_disable_timestamp(void)
 }
 EXPORT_SYMBOL(net_disable_timestamp);
 
-static inline void net_timestamp(struct sk_buff *skb)
+static inline void net_timestamp_set(struct sk_buff *skb)
 {
 	if (atomic_read(&netstamp_needed))
 		__net_timestamp(skb);
@@ -1462,6 +1462,12 @@ static inline void net_timestamp(struct sk_buff *skb)
 		skb->tstamp.tv64 = 0;
 }
 
+static inline void net_timestamp_check(struct sk_buff *skb)
+{
+	if (!skb->tstamp.tv64 && atomic_read(&netstamp_needed))
+		__net_timestamp(skb);
+}
+
 /**
  * dev_forward_skb - loopback an skb to another netif
  *
@@ -1509,9 +1515,9 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
 
 #ifdef CONFIG_NET_CLS_ACT
 	if (!(skb->tstamp.tv64 && (G_TC_FROM(skb->tc_verd) & AT_INGRESS)))
-		net_timestamp(skb);
+		net_timestamp_set(skb);
 #else
-	net_timestamp(skb);
+	net_timestamp_set(skb);
 #endif
 
 	rcu_read_lock();
@@ -2202,6 +2208,7 @@ EXPORT_SYMBOL(dev_queue_xmit);
   =======================================================================*/
 
 int netdev_max_backlog __read_mostly = 1000;
+int netdev_tstamp_prequeue __read_mostly = 1;
 int netdev_budget __read_mostly = 300;
 int weight_p __read_mostly = 64;            /* old backlog weight */
 
@@ -2458,8 +2465,8 @@ int netif_rx(struct sk_buff *skb)
 	if (netpoll_rx(skb))
 		return NET_RX_DROP;
 
-	if (!skb->tstamp.tv64)
-		net_timestamp(skb);
+	if (netdev_tstamp_prequeue)
+		net_timestamp_check(skb);
 
 #ifdef CONFIG_RPS
 	{
@@ -2780,8 +2787,8 @@ static int __netif_receive_skb(struct sk_buff *skb)
 	int ret = NET_RX_DROP;
 	__be16 type;
 
-	if (!skb->tstamp.tv64)
-		net_timestamp(skb);
+	if (!netdev_tstamp_prequeue)
+		net_timestamp_check(skb);
 
 	if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb))
 		return NET_RX_SUCCESS;
@@ -2899,23 +2906,28 @@ out:
  */
 int netif_receive_skb(struct sk_buff *skb)
 {
+	if (netdev_tstamp_prequeue)
+		net_timestamp_check(skb);
+
 #ifdef CONFIG_RPS
-	struct rps_dev_flow voidflow, *rflow = &voidflow;
-	int cpu, ret;
+	{
+		struct rps_dev_flow voidflow, *rflow = &voidflow;
+		int cpu, ret;
 
-	rcu_read_lock();
+		rcu_read_lock();
+
+		cpu = get_rps_cpu(skb->dev, skb, &rflow);
 
-	cpu = get_rps_cpu(skb->dev, skb, &rflow);
+		if (cpu >= 0) {
+			ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
+			rcu_read_unlock();
+		} else {
+			rcu_read_unlock();
+			ret = __netif_receive_skb(skb);
+		}
 
-	if (cpu >= 0) {
-		ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
-		rcu_read_unlock();
-	} else {
-		rcu_read_unlock();
-		ret = __netif_receive_skb(skb);
+		return ret;
 	}
-
-	return ret;
 #else
 	return __netif_receive_skb(skb);
 #endif
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index dcc7d25..01eee5d 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -122,6 +122,13 @@ static struct ctl_table net_core_table[] = {
 		.proc_handler	= proc_dointvec
 	},
 	{
+		.procname	= "netdev_tstamp_prequeue",
+		.data		= &netdev_tstamp_prequeue,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
+	{
 		.procname	= "message_cost",
 		.data		= &net_ratelimit_state.interval,
 		.maxlen		= sizeof(int),



^ permalink raw reply related

* Re: [PATCH/RFC] cxgb4: Add MAINTAINERS info
From: Roland Dreier @ 2010-05-06 16:07 UTC (permalink / raw)
  To: Or Gerlitz
  Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, netdev-u79uwXL29TY76Z2rM5mHXA,
	swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW, dm-ut6Up61K2wZBDgjK7y7TUQ
In-Reply-To: <4BE25A3D.20800-smomgflXvOZWk0Htik3J/w@public.gmane.org>

 > not sure who's the butterfly that caused this, but this was somehow
 > committed as  "CXGB4 ETHERNET DRIVER (CXGB3)" and same goes for the
 > IW_ piece

Thanks, I think I committed, saw the problem, fixed it up, sent the RFC,
and then pushed my tree.  I fixed it up now.  Pretty impressive eagle
eyes to notice that...
-- 
Roland Dreier <rolandd-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org> || For corporate legal information go to:
http://www.cisco.com/web/about/doing_business/legal/cri/index.html
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* [PATCH 8/8] tehuti: Remove unnecessary memset of netdev private data
From: Tobias Klauser @ 2010-05-06 15:43 UTC (permalink / raw)
  To: baum, davem, netdev; +Cc: kernel-janitors, andy, Tobias Klauser

The memory for the private data is allocated using kzalloc in
alloc_etherdev (or alloc_netdev_mq respectively) so there is no need to
set it to 0 again.

Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
---
 drivers/net/tehuti.c |    1 -
 1 files changed, 0 insertions(+), 1 deletions(-)

diff --git a/drivers/net/tehuti.c b/drivers/net/tehuti.c
index e29f495..20ab161 100644
--- a/drivers/net/tehuti.c
+++ b/drivers/net/tehuti.c
@@ -2033,7 +2033,6 @@ bdx_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	/************** priv ****************/
 		priv = nic->priv[port] = netdev_priv(ndev);
 
-		memset(priv, 0, sizeof(struct bdx_priv));
 		priv->pBdxRegs = nic->regs + port * 0x8000;
 		priv->port = port;
 		priv->pdev = pdev;
-- 
1.6.3.3


^ permalink raw reply related

* [PATCH 7/8] sunhme: Remove unnecessary memset of netdev private data
From: Tobias Klauser @ 2010-05-06 15:41 UTC (permalink / raw)
  To: davem, netdev; +Cc: kernel-janitors, Tobias Klauser

The memory for the private data is allocated using kzalloc in
alloc_etherdev (or alloc_netdev_mq respectively) so there is no need to
set it to 0 again.

Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
---
 drivers/net/sunhme.c |    1 -
 1 files changed, 0 insertions(+), 1 deletions(-)

diff --git a/drivers/net/sunhme.c b/drivers/net/sunhme.c
index 20deb14..982ff12 100644
--- a/drivers/net/sunhme.c
+++ b/drivers/net/sunhme.c
@@ -3004,7 +3004,6 @@ static int __devinit happy_meal_pci_probe(struct pci_dev *pdev,
 	dev->base_addr = (long) pdev;
 
 	hp = netdev_priv(dev);
-	memset(hp, 0, sizeof(*hp));
 
 	hp->happy_dev = pdev;
 	hp->dma_dev = &pdev->dev;
-- 
1.6.3.3


^ permalink raw reply related

* [PATCH 6/8] smc9194: Remove unnecessary memset of netdev private data
From: Tobias Klauser @ 2010-05-06 15:41 UTC (permalink / raw)
  To: davem, netdev; +Cc: kernel-janitors, Tobias Klauser

The memory for the private data is allocated using kzalloc in
alloc_etherdev (or alloc_netdev_mq respectively) so there is no need to
set it to 0 again.

Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
---
 drivers/net/smc9194.c |    3 ---
 1 files changed, 0 insertions(+), 3 deletions(-)

diff --git a/drivers/net/smc9194.c b/drivers/net/smc9194.c
index e94521c..d76c815 100644
--- a/drivers/net/smc9194.c
+++ b/drivers/net/smc9194.c
@@ -1042,9 +1042,6 @@ static int __init smc_probe(struct net_device *dev, int ioaddr)
 	*/
 	printk("ADDR: %pM\n", dev->dev_addr);
 
-	/* set the private data to zero by default */
-	memset(netdev_priv(dev), 0, sizeof(struct smc_local));
-
 	/* Grab the IRQ */
       	retval = request_irq(dev->irq, smc_interrupt, 0, DRV_NAME, dev);
       	if (retval) {
-- 
1.6.3.3


^ permalink raw reply related

* [PATCH 5/8] ethoc: Remove unnecessary memset of napi member in netdev private data
From: Tobias Klauser @ 2010-05-06 15:41 UTC (permalink / raw)
  To: davem, netdev; +Cc: kernel-janitors, Tobias Klauser

The memory for the private data is allocated using kzalloc in
alloc_etherdev (or alloc_netdev_mq respectively) so there is no need to
set the napi member it to 0 explicitely.

Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
---
 drivers/net/ethoc.c |    1 -
 1 files changed, 0 insertions(+), 1 deletions(-)

diff --git a/drivers/net/ethoc.c b/drivers/net/ethoc.c
index 6bd03c8..ad1bc73 100644
--- a/drivers/net/ethoc.c
+++ b/drivers/net/ethoc.c
@@ -1040,7 +1040,6 @@ static int ethoc_probe(struct platform_device *pdev)
 	netdev->features |= 0;
 
 	/* setup NAPI */
-	memset(&priv->napi, 0, sizeof(priv->napi));
 	netif_napi_add(netdev, &priv->napi, ethoc_poll, 64);
 
 	spin_lock_init(&priv->rx_lock);
-- 
1.6.3.3


^ permalink raw reply related

* [PATCH 4/8] bcm63xx_enet: Remove unnecessary memset of netdev private data
From: Tobias Klauser @ 2010-05-06 15:40 UTC (permalink / raw)
  To: davem, netdev; +Cc: kernel-janitors, Tobias Klauser

The memory for the private data is allocated using kzalloc in
alloc_etherdev (or alloc_netdev_mq respectively) so there is no need to
set it to 0 again.

Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
---
 drivers/net/bcm63xx_enet.c |    1 -
 1 files changed, 0 insertions(+), 1 deletions(-)

diff --git a/drivers/net/bcm63xx_enet.c b/drivers/net/bcm63xx_enet.c
index 9a8bdea..f48ba80 100644
--- a/drivers/net/bcm63xx_enet.c
+++ b/drivers/net/bcm63xx_enet.c
@@ -1647,7 +1647,6 @@ static int __devinit bcm_enet_probe(struct platform_device *pdev)
 	if (!dev)
 		return -ENOMEM;
 	priv = netdev_priv(dev);
-	memset(priv, 0, sizeof(*priv));
 
 	ret = compute_hw_mtu(priv, dev->mtu);
 	if (ret)
-- 
1.6.3.3


^ permalink raw reply related

* [PATCH 3/8] KS8695: Remove unnecessary memset of netdev private data
From: Tobias Klauser @ 2010-05-06 15:40 UTC (permalink / raw)
  To: davem, netdev; +Cc: kernel-janitors, Tobias Klauser

The memory for the private data is allocated using kzalloc in
alloc_etherdev (or alloc_netdev_mq respectively) so there is no need to
set it to 0 again.

Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
---
 drivers/net/arm/ks8695net.c |    1 -
 1 files changed, 0 insertions(+), 1 deletions(-)

diff --git a/drivers/net/arm/ks8695net.c b/drivers/net/arm/ks8695net.c
index 7413a87..6404704 100644
--- a/drivers/net/arm/ks8695net.c
+++ b/drivers/net/arm/ks8695net.c
@@ -1472,7 +1472,6 @@ ks8695_probe(struct platform_device *pdev)
 
 	/* Configure our private structure a little */
 	ksp = netdev_priv(ndev);
-	memset(ksp, 0, sizeof(struct ks8695_priv));
 
 	ksp->dev = &pdev->dev;
 	ksp->ndev = ndev;
-- 
1.6.3.3


^ permalink raw reply related

* [PATCH 2/8] 3c523: Remove unnecessary memset of netdev private data
From: Tobias Klauser @ 2010-05-06 15:39 UTC (permalink / raw)
  To: davem, netdev; +Cc: kernel-janitors, Tobias Klauser

The memory for the private data is allocated using kzalloc in
alloc_etherdev (or alloc_netdev_mq respectively) so there is no need to
set it to 0 again.

Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
---
 drivers/net/3c523.c |    1 -
 1 files changed, 0 insertions(+), 1 deletions(-)

diff --git a/drivers/net/3c523.c b/drivers/net/3c523.c
index 8c70686..55d219e 100644
--- a/drivers/net/3c523.c
+++ b/drivers/net/3c523.c
@@ -503,7 +503,6 @@ static int __init do_elmc_probe(struct net_device *dev)
 		break;
 	}
 
-	memset(pr, 0, sizeof(struct priv));
 	pr->slot = slot;
 
 	pr_info("%s: 3Com 3c523 Rev 0x%x at %#lx\n", dev->name, (int) revision,
-- 
1.6.3.3


^ permalink raw reply related

* [PATCH 1/8] 3c507: Remove unnecessary memset of netdev private data
From: Tobias Klauser @ 2010-05-06 15:39 UTC (permalink / raw)
  To: davem, netdev; +Cc: kernel-janitors, Tobias Klauser

The memory for the private data is allocated using kzalloc in
alloc_etherdev (or alloc_netdev_mq respectively) so there is no need to
set it to 0 again.

Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
---
 drivers/net/3c507.c |    1 -
 1 files changed, 0 insertions(+), 1 deletions(-)

diff --git a/drivers/net/3c507.c b/drivers/net/3c507.c
index b32b7a1..9e95afa 100644
--- a/drivers/net/3c507.c
+++ b/drivers/net/3c507.c
@@ -449,7 +449,6 @@ static int __init el16_probe1(struct net_device *dev, int ioaddr)
 		pr_debug("%s", version);
 
 	lp = netdev_priv(dev);
- 	memset(lp, 0, sizeof(*lp));
 	spin_lock_init(&lp->lock);
 	lp->base = ioremap(dev->mem_start, RX_BUF_END);
 	if (!lp->base) {
-- 
1.6.3.3


^ permalink raw reply related

* Re: [PATCH net-next-2.6] net: Consistent skb timestamping
From: Eric Dumazet @ 2010-05-06 15:37 UTC (permalink / raw)
  To: Tom Herbert; +Cc: David Miller, netdev
In-Reply-To: <AANLkTikLgHvtpCtBTKmJZBwixmZDHjRjGb1c59oAemli@mail.gmail.com>

Le jeudi 06 mai 2010 à 08:12 -0700, Tom Herbert a écrit :
> On Thu, May 6, 2010 at 5:01 AM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> > With RPS inclusion, skb timestamping is not consistent in RX path.
> >
> > If netif_receive_skb() is used, its deferred after RPS dispatch.
> >
> > If netif_rx() is used, its done before RPS dispatch.
> >
> > This can give strange tcpdump timestamps results.
> >
> > I think timestamping should be done as soon as possible in the receive
> > path, to get meaningful values (ie timestamps taken at the time packet
> > was delivered by NIC driver to our stack), even if NAPI already can
> > defer timestamping a bit (RPS can help to reduce the gap)
> >
> The counter argument to this is that it moves another thing into the
> serialized path for networking which slows everyone down.  I'm not
> concerned about when tcpdump is running since performance will suck
> anyway, but what is bad is if any single socket in the system turns on
> SO_TIMESTAMP, overhead is incurred on *every* packet.  This happens
> regardless of whether the application ever actually gets a timestamp,
> or even whether timestamps are supported by the protocol (try setting
> SO_TIMESTAMP on a TCP socket ;-) ).  I'm contemplating changing
> SO_TIMESTAMP to not enable global timestamps, but only take the
> timestamp for a packet once the socket is identified and the timestamp
> flag is set (this is the technique done in FreeBSD and Solaris, so I
> believe the external semantics would still be valid).

I agree with you, thanks for this excellent argument.

Right now, timestamping is not meant for userland pleasure, but for
sniffers and network diagnostics. (I mean with current API, not with a
new one we could add later)

Once we settle a per socket timestamping, not global, we can reconsider
the thing (or not reconsider it, since socket timestamping will be done
after RPS dispatch)

Its true our global variable to enable/disable timestamp sucks, but its
a separate issue ;)

We probably could have a sysctl to let admin chose the moment timestamp
takes place (before or after RPS)

If TSC is available, here is the "perf top" of the cpu handling
1.200.000 packets per second, while timestamping is requested :
You can hardly see something about time services :

--------------------------------------------------------------------------------------------------------------------------
   PerfTop:     983 irqs/sec  kernel:99.5% [1000Hz cycles],  (all, cpu: 10)
--------------------------------------------------------------------------------------------------------------------------

             samples  pcnt function                            DSO
             _______ _____ ___________________________________ _______

             1568.00 14.9% bnx2x_rx_int                        vmlinux
             1133.00 10.7% eth_type_trans                      vmlinux
              798.00  7.6% kmem_cache_alloc_node               vmlinux
              720.00  6.8% _raw_spin_lock                      vmlinux
              709.00  6.7% __kmalloc_node_track_caller         vmlinux
              547.00  5.2% __memset                            vmlinux
              540.00  5.1% __slab_alloc                        vmlinux
              453.00  4.3% get_rps_cpu                         vmlinux
              402.00  3.8% _raw_spin_lock_irqsave              vmlinux
              295.00  2.8% enqueue_to_backlog                  vmlinux
              271.00  2.6% default_send_IPI_mask_sequence_phys vmlinux
              259.00  2.5% get_partial_node                    vmlinux
              235.00  2.2% __alloc_skb                         vmlinux
              227.00  2.2% vlan_gro_common                     vmlinux
              206.00  2.0% swiotlb_dma_mapping_error           vmlinux
              201.00  1.9% skb_put                             vmlinux
              118.00  1.1% getnstimeofday                      vmlinux
               97.00  0.9% csd_lock                            vmlinux
               96.00  0.9% swiotlb_map_page                    vmlinux
               85.00  0.8% read_tsc                            vmlinux
               76.00  0.7% dev_gro_receive                     vmlinux
               75.00  0.7% __napi_complete                     vmlinux
               74.00  0.7% bnx2x_poll                          vmlinux
               73.00  0.7% unmap_single                        vmlinux
               72.00  0.7% netif_receive_skb                   vmlinux
               66.00  0.6% irq_entries_start                   vmlinux
               65.00  0.6% net_rps_action_and_irq_enable       vmlinux
               62.00  0.6% __phys_addr                         vmlinux


If HPET or acpi_pm is used, then you can cry :)
 (820.000 pps, or 570.000 pps max)
--------------------------------------------------------------------------------------------------------------------------
   PerfTop:    1001 irqs/sec  kernel:100.0% [1000Hz cycles],  (all, cpu: 10)
--------------------------------------------------------------------------------------------------------------------------

             samples  pcnt function                            DSO
             _______ _____ ___________________________________ _______

             6488.00 48.4% read_hpet                           vmlinux
             1214.00  9.1% bnx2x_rx_int                        vmlinux
              820.00  6.1% eth_type_trans                      vmlinux
              679.00  5.1% _raw_spin_lock                      vmlinux
              678.00  5.1% kmem_cache_alloc_node               vmlinux
              607.00  4.5% __slab_alloc                        vmlinux
              478.00  3.6% __kmalloc_node_track_caller         vmlinux
              404.00  3.0% __memset                            vmlinux
              246.00  1.8% get_partial_node                    vmlinux
              213.00  1.6% get_rps_cpu                         vmlinux
              195.00  1.5% enqueue_to_backlog                  vmlinux
              171.00  1.3% __alloc_skb                         vmlinux
              163.00  1.2% vlan_gro_common                     vmlinux
              135.00  1.0% swiotlb_dma_mapping_error           vmlinux
              118.00  0.9% skb_put                             vmlinux
               88.00  0.7% getnstimeofday                      vmlinux
               60.00  0.4% swiotlb_map_page                    vmlinux
               59.00  0.4% dev_gro_receive                     vmlinux

--------------------------------------------------------------------------------------------------------------------------
   PerfTop:    1001 irqs/sec  kernel:100.0% [1000Hz cycles],  (all, cpu: 10)
--------------------------------------------------------------------------------------------------------------------------

             samples  pcnt function                            DSO
             _______ _____ ___________________________________ _______

             2573.00 68.3% acpi_pm_read                        vmlinux
              237.00  6.3% bnx2x_rx_int                        vmlinux
              153.00  4.1% eth_type_trans                      vmlinux
              101.00  2.7% kmem_cache_alloc_node               vmlinux
               99.00  2.6% __kmalloc_node_track_caller         vmlinux
               79.00  2.1% get_rps_cpu                         vmlinux
               75.00  2.0% __memset                            vmlinux
               72.00  1.9% _raw_spin_lock                      vmlinux
               68.00  1.8% __slab_alloc                        vmlinux
               40.00  1.1% enqueue_to_backlog                  vmlinux
               39.00  1.0% __alloc_skb                         vmlinux
               27.00  0.7% get_partial_node                    vmlinux
               23.00  0.6% swiotlb_dma_mapping_error           vmlinux
               22.00  0.6% vlan_gro_common                     vmlinux



^ permalink raw reply

* Re: [PATCH net-next-2.6] net: Consistent skb timestamping
From: Tom Herbert @ 2010-05-06 15:12 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: David Miller, netdev
In-Reply-To: <1273147309.2357.59.camel@edumazet-laptop>

On Thu, May 6, 2010 at 5:01 AM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> With RPS inclusion, skb timestamping is not consistent in RX path.
>
> If netif_receive_skb() is used, its deferred after RPS dispatch.
>
> If netif_rx() is used, its done before RPS dispatch.
>
> This can give strange tcpdump timestamps results.
>
> I think timestamping should be done as soon as possible in the receive
> path, to get meaningful values (ie timestamps taken at the time packet
> was delivered by NIC driver to our stack), even if NAPI already can
> defer timestamping a bit (RPS can help to reduce the gap)
>
The counter argument to this is that it moves another thing into the
serialized path for networking which slows everyone down.  I'm not
concerned about when tcpdump is running since performance will suck
anyway, but what is bad is if any single socket in the system turns on
SO_TIMESTAMP, overhead is incurred on *every* packet.  This happens
regardless of whether the application ever actually gets a timestamp,
or even whether timestamps are supported by the protocol (try setting
SO_TIMESTAMP on a TCP socket ;-) ).  I'm contemplating changing
SO_TIMESTAMP to not enable global timestamps, but only take the
timestamp for a packet once the socket is identified and the timestamp
flag is set (this is the technique done in FreeBSD and Solaris, so I
believe the external semantics would still be valid).

> Remove timestamping from __netif_receive_skb, and add it to
> netif_receive_skb(), before RPS.
>
> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
> ---
>  net/core/dev.c |   46 ++++++++++++++++++++++++++--------------------
>  1 file changed, 26 insertions(+), 20 deletions(-)
>
> diff --git a/net/core/dev.c b/net/core/dev.c
> index 36d53be..3278003 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -1454,7 +1454,7 @@ void net_disable_timestamp(void)
>  }
>  EXPORT_SYMBOL(net_disable_timestamp);
>
> -static inline void net_timestamp(struct sk_buff *skb)
> +static inline void net_timestamp_set(struct sk_buff *skb)
>  {
>        if (atomic_read(&netstamp_needed))
>                __net_timestamp(skb);
> @@ -1462,6 +1462,12 @@ static inline void net_timestamp(struct sk_buff *skb)
>                skb->tstamp.tv64 = 0;
>  }
>
> +static inline void net_timestamp_check(struct sk_buff *skb)
> +{
> +       if (!skb->tstamp.tv64 && atomic_read(&netstamp_needed))
> +               __net_timestamp(skb);
> +}
> +
>  /**
>  * dev_forward_skb - loopback an skb to another netif
>  *
> @@ -1509,9 +1515,9 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
>
>  #ifdef CONFIG_NET_CLS_ACT
>        if (!(skb->tstamp.tv64 && (G_TC_FROM(skb->tc_verd) & AT_INGRESS)))
> -               net_timestamp(skb);
> +               net_timestamp_set(skb);
>  #else
> -       net_timestamp(skb);
> +       net_timestamp_set(skb);
>  #endif
>
>        rcu_read_lock();
> @@ -2458,8 +2464,7 @@ int netif_rx(struct sk_buff *skb)
>        if (netpoll_rx(skb))
>                return NET_RX_DROP;
>
> -       if (!skb->tstamp.tv64)
> -               net_timestamp(skb);
> +       net_timestamp_check(skb);
>
>  #ifdef CONFIG_RPS
>        {
> @@ -2780,9 +2785,6 @@ static int __netif_receive_skb(struct sk_buff *skb)
>        int ret = NET_RX_DROP;
>        __be16 type;
>
> -       if (!skb->tstamp.tv64)
> -               net_timestamp(skb);
> -
>        if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb))
>                return NET_RX_SUCCESS;
>
> @@ -2899,23 +2901,27 @@ out:
>  */
>  int netif_receive_skb(struct sk_buff *skb)
>  {
> +       net_timestamp_check(skb);
> +
>  #ifdef CONFIG_RPS
> -       struct rps_dev_flow voidflow, *rflow = &voidflow;
> -       int cpu, ret;
> +       {
> +               struct rps_dev_flow voidflow, *rflow = &voidflow;
> +               int cpu, ret;
>
> -       rcu_read_lock();
> +               rcu_read_lock();
>
> -       cpu = get_rps_cpu(skb->dev, skb, &rflow);
> +               cpu = get_rps_cpu(skb->dev, skb, &rflow);
>
> -       if (cpu >= 0) {
> -               ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
> -               rcu_read_unlock();
> -       } else {
> -               rcu_read_unlock();
> -               ret = __netif_receive_skb(skb);
> -       }
> +               if (cpu >= 0) {
> +                       ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
> +                       rcu_read_unlock();
> +               } else {
> +                       rcu_read_unlock();
> +                       ret = __netif_receive_skb(skb);
> +               }
>
> -       return ret;
> +               return ret;
> +       }
>  #else
>        return __netif_receive_skb(skb);
>  #endif
>
>
>

^ permalink raw reply

* Re: 2.6.33.2: Turn tx power off/on for Atheros card
From: Yegor Yefremov @ 2010-05-06 14:52 UTC (permalink / raw)
  To: linux-wireless; +Cc: netdev
In-Reply-To: <r2xf69abfc31005050326oe123cb60s131ab4969341ef57@mail.gmail.com>

On Wed, May 5, 2010 at 12:26 PM, Yegor Yefremov
<yegorslists@googlemail.com> wrote:
> I'm using kernel 2.6.33.2 with AR2413 WLAN card. Issuing
>
> iwconfig wlan0 txpower off
>
> turns txpower off. I can see this status by iwconfig wlan0 and the
> communication with AP terminates. But when I turn the txpower on
>
> iwconfig wlan0 txpower on
>
> nothing happens. Though iwconfig shows the previous tx power value.
> Only ifconfig wlan0 down and then up recovers the transmission.
>
> Is it a known bug or I'm doing something wrong?

I made some debugging and found out that after iwconfig wlan0 txpower
off dev_close() will be invoked, so that local->open_count will be 0.
The next time txpower on will be called, it will be checked if
local->open_count > 0 and this conditions fails, so no  hardware
configuration will be made.

I've made a quick and dirty hack, that opens the wireless device by
enabling the txpower, if it was closed before. Is there any proper
solution? Is it really necessary to close device to tunr txpower off?

Best regards,
Yegor

Index: b/net/wireless/wext-compat.c
===================================================================
--- a/net/wireless/wext-compat.c	2010-04-30 05:02:05.000000000 +0200
+++ b/net/wireless/wext-compat.c	2010-05-06 16:31:20.000000000 +0200
@@ -15,6 +15,7 @@
 #include <linux/slab.h>
 #include <net/iw_handler.h>
 #include <net/cfg80211.h>
+#include "../mac80211/ieee80211_i.h"
 #include "wext-compat.h"
 #include "core.h"

@@ -824,6 +825,7 @@
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	struct ieee80211_local *local = wiphy_priv(wdev->wiphy);
 	enum tx_power_setting type;
 	int dbm = 0;

@@ -861,6 +863,8 @@
 				type = TX_POWER_LIMITED;
 			}
 		}
+		if(!local->open_count)
+			dev_open(wdev->netdev);
 	} else {
 		rfkill_set_sw_state(rdev->rfkill, true);
 		schedule_work(&rdev->rfkill_sync);

^ permalink raw reply

* Re: [PATCH] ipv6: udp: make short packet logging consistent with ipv4
From: Eric Dumazet @ 2010-05-06 14:49 UTC (permalink / raw)
  To: Bjørn Mork; +Cc: netdev
In-Reply-To: <1273153475-32363-2-git-send-email-bjorn@mork.no>

Le jeudi 06 mai 2010 à 15:44 +0200, Bjørn Mork a écrit :
> Adding addresses and ports to the short packet log message,
> like ipv4/udp.c does it, makes these messages a lot more useful:
> 
> [  822.182450] UDPv6: short packet: From [2001:db8:ffb4:3::1]:47839 23715/178 to [2001:db8:ffb4:3:5054:ff:feff:200]:1234
> 
> This requires us to drop logging in case pskb_may_pull() fails,
> which also is consistent with ipv4/udp.c
> 
> Signed-off-by: Bjørn Mork <bjorn@mork.no>
> ---
>  net/ipv6/udp.c |   11 ++++++++---
>  1 files changed, 8 insertions(+), 3 deletions(-)

Acked-by: Eric Dumazet <eric.dumazet@gmail.com>



^ permalink raw reply

* Re: [PATCH] ipv4: udp: fix short packet and bad checksum logging
From: Eric Dumazet @ 2010-05-06 14:48 UTC (permalink / raw)
  To: Bjørn Mork; +Cc: netdev, stable
In-Reply-To: <1273153475-32363-1-git-send-email-bjorn@mork.no>

Le jeudi 06 mai 2010 à 15:44 +0200, Bjørn Mork a écrit :
> commit 2783ef23 moved the initialisation of saddr and daddr after
> pskb_may_pull() to avoid a potential data corruption.  Unfortunately
> also placing it after the short packet and bad checksum error paths,
> where these variables are used for logging.  The result is bogus
> output like
> 
> [92238.389505] UDP: short packet: From 2.0.0.0:65535 23715/178 to 0.0.0.0:65535
> 
> Moving the saddr and daddr initialisation above the error paths, while still
> keeping it after the pskb_may_pull() to keep the fix from commit 2783ef23.
> 
> Signed-off-by: Bjørn Mork <bjorn@mork.no>
> Cc: stable@kernel.org
> ---
>  net/ipv4/udp.c |    6 +++---
>  1 files changed, 3 insertions(+), 3 deletions(-)

Well done :)

Acked-by: Eric Dumazet <eric.dumazet@gmail.com>

To be backported to 2.6.29 and up kernels ;)




^ permalink raw reply

* Re: [PATCH net-next-2.6] rps: consistent rxhash
From: Tom Herbert @ 2010-05-06 14:45 UTC (permalink / raw)
  To: David Miller; +Cc: eric.dumazet, franco, xiaosuo, netdev
In-Reply-To: <20100506.010651.173849727.davem@davemloft.net>

On Thu, May 6, 2010 at 1:06 AM, David Miller <davem@davemloft.net> wrote:
> From: Tom Herbert <therbert@google.com>
> Date: Wed, 21 Apr 2010 12:12:41 -0700
>
>> On Tue, Apr 20, 2010 at 2:41 PM, David Miller <davem@davemloft.net> wrote:
>>> From: Eric Dumazet <eric.dumazet@gmail.com>
>>> Date: Tue, 20 Apr 2010 16:57:01 +0200
>>>
>>>> I know many applications using TCP on loopback, they are real :)
>>>
>>> This is all true and I support your hashing patch and all of that.
>>>
>>> But if we really want TCP over loopback to go fast, there are much
>>> better ways to do this.
>>>
>>> Eric, do you remember that "TCP friends" rough patch I sent you last
>>> year that essentailly made TCP sockets over loopback behave like
>>> AF_UNIX ones and just queue the SKBs directly to the destination
>>> socket without doing any protocol work?
>>>
>> This is sounds very interesting!  Could you post a patch? :-)
>
> I was finally able to unearth a copy, it's completely raw, it's at least
> a year old, and it's not fully implemented at all.
>
> But you asked for it :-)
>
Thanks!  We'll take a look... I've always thought sockets should have
friends :-)

> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> index 299ec4b..7f855d3 100644
> --- a/include/linux/skbuff.h
> +++ b/include/linux/skbuff.h
> @@ -206,6 +206,7 @@ typedef unsigned char *sk_buff_data_t;
>  *     @mac_header: Link layer header
>  *     @dst: destination entry
>  *     @sp: the security path, used for xfrm
> + *     @friend: loopback friend socket
>  *     @cb: Control buffer. Free for use by every layer. Put private vars here
>  *     @len: Length of actual data
>  *     @data_len: Data length
> @@ -262,6 +263,7 @@ struct sk_buff {
>                struct  rtable          *rtable;
>        };
>        struct  sec_path        *sp;
> +       struct sock             *friend;
>
>        /*
>         * This is the control buffer. It is free to use for every
> diff --git a/include/net/request_sock.h b/include/net/request_sock.h
> index b220b5f..52b2f7a 100644
> --- a/include/net/request_sock.h
> +++ b/include/net/request_sock.h
> @@ -53,6 +53,7 @@ struct request_sock {
>        unsigned long                   expires;
>        const struct request_sock_ops   *rsk_ops;
>        struct sock                     *sk;
> +       struct sock                     *friend;
>        u32                             secid;
>        u32                             peer_secid;
>  };
> diff --git a/include/net/sock.h b/include/net/sock.h
> index dc42b44..3e86190 100644
> --- a/include/net/sock.h
> +++ b/include/net/sock.h
> @@ -137,6 +137,7 @@ struct sock_common {
>   *    @sk_userlocks: %SO_SNDBUF and %SO_RCVBUF settings
>   *    @sk_lock:       synchronizer
>   *    @sk_rcvbuf: size of receive buffer in bytes
> +  *    @sk_friend: loopback friend socket
>   *    @sk_sleep: sock wait queue
>   *    @sk_dst_cache: destination cache
>   *    @sk_dst_lock: destination cache lock
> @@ -227,6 +228,7 @@ struct sock {
>                struct sk_buff *head;
>                struct sk_buff *tail;
>        } sk_backlog;
> +       struct sock             *sk_friend;
>        wait_queue_head_t       *sk_sleep;
>        struct dst_entry        *sk_dst_cache;
>        struct xfrm_policy      *sk_policy[2];
> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> index 4fe605f..0eef90a 100644
> --- a/net/core/skbuff.c
> +++ b/net/core/skbuff.c
> @@ -435,6 +435,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
>  #ifdef CONFIG_INET
>        new->sp                 = secpath_get(old->sp);
>  #endif
> +       new->friend             = old->friend;
>        memcpy(new->cb, old->cb, sizeof(old->cb));
>        new->csum_start         = old->csum_start;
>        new->csum_offset        = old->csum_offset;
> diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
> index 828ea21..375dc2e 100644
> --- a/net/ipv4/inet_connection_sock.c
> +++ b/net/ipv4/inet_connection_sock.c
> @@ -503,6 +503,8 @@ struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req,
>        if (newsk != NULL) {
>                struct inet_connection_sock *newicsk = inet_csk(newsk);
>
> +               newsk->sk_friend = req->friend;
> +
>                newsk->sk_state = TCP_SYN_RECV;
>                newicsk->icsk_bind_hash = NULL;
>
> diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
> index 58ac838..042ee1d 100644
> --- a/net/ipv4/tcp.c
> +++ b/net/ipv4/tcp.c
> @@ -474,7 +474,8 @@ static inline int forced_push(struct tcp_sock *tp)
>        return after(tp->write_seq, tp->pushed_seq + (tp->max_window >> 1));
>  }
>
> -static inline void skb_entail(struct sock *sk, struct sk_buff *skb)
> +static inline void skb_entail(struct sock *sk, struct sk_buff *skb,
> +                             struct sk_buff_head *friend_queue)
>  {
>        struct tcp_sock *tp = tcp_sk(sk);
>        struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
> @@ -484,7 +485,10 @@ static inline void skb_entail(struct sock *sk, struct sk_buff *skb)
>        tcb->flags   = TCPCB_FLAG_ACK;
>        tcb->sacked  = 0;
>        skb_header_release(skb);
> -       tcp_add_write_queue_tail(sk, skb);
> +       if (sk->sk_friend)
> +               __skb_queue_tail(friend_queue, skb);
> +       else
> +               tcp_add_write_queue_tail(sk, skb);
>        sk->sk_wmem_queued += skb->truesize;
>        sk_mem_charge(sk, skb->truesize);
>        if (tp->nonagle & TCP_NAGLE_PUSH)
> @@ -501,7 +505,7 @@ static inline void tcp_mark_urg(struct tcp_sock *tp, int flags,
>  }
>
>  static inline void tcp_push(struct sock *sk, int flags, int mss_now,
> -                           int nonagle)
> +                           int nonagle, struct sk_buff_head *friend_queue)
>  {
>        struct tcp_sock *tp = tcp_sk(sk);
>
> @@ -512,6 +516,19 @@ static inline void tcp_push(struct sock *sk, int flags, int mss_now,
>                tcp_mark_urg(tp, flags, skb);
>                __tcp_push_pending_frames(sk, mss_now,
>                                          (flags & MSG_MORE) ? TCP_NAGLE_CORK : nonagle);
> +       } else if (sk->sk_friend) {
> +               struct sock *friend = sk->sk_friend;
> +               struct sk_buff *skb;
> +               unsigned int len;
> +
> +               spin_lock_bh(&friend->sk_lock.slock);
> +               len = 0;
> +               while ((skb = __skb_dequeue(friend_queue)) != NULL) {
> +                       len += skb->len;
> +                       __skb_queue_tail(&sk->sk_receive_queue, skb);
> +               }
> +               sk->sk_data_ready(friend, len);
> +               spin_unlock_bh(&friend->sk_lock.slock);
>        }
>  }
>
> @@ -658,6 +675,7 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse
>                         size_t psize, int flags)
>  {
>        struct tcp_sock *tp = tcp_sk(sk);
> +       struct sk_buff_head friend_queue;
>        int mss_now, size_goal;
>        int err;
>        ssize_t copied;
> @@ -674,6 +692,8 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse
>        size_goal = tp->xmit_size_goal;
>        copied = 0;
>
> +       skb_queue_head_init(&friend_queue);
> +
>        err = -EPIPE;
>        if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
>                goto do_error;
> @@ -694,7 +714,7 @@ new_segment:
>                        if (!skb)
>                                goto wait_for_memory;
>
> -                       skb_entail(sk, skb);
> +                       skb_entail(sk, skb, &friend_queue);
>                        copy = size_goal;
>                }
>
> @@ -749,7 +769,8 @@ wait_for_sndbuf:
>                set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
>  wait_for_memory:
>                if (copied)
> -                       tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
> +                       tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH,
> +                                &friend_queue);
>
>                if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
>                        goto do_error;
> @@ -760,7 +781,7 @@ wait_for_memory:
>
>  out:
>        if (copied)
> -               tcp_push(sk, flags, mss_now, tp->nonagle);
> +               tcp_push(sk, flags, mss_now, tp->nonagle, &friend_queue);
>        return copied;
>
>  do_error:
> @@ -817,6 +838,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
>        struct sock *sk = sock->sk;
>        struct iovec *iov;
>        struct tcp_sock *tp = tcp_sk(sk);
> +       struct sk_buff_head friend_queue;
>        struct sk_buff *skb;
>        int iovlen, flags;
>        int mss_now, size_goal;
> @@ -849,6 +871,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
>        if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
>                goto do_error;
>
> +       skb_queue_head_init(&friend_queue);
>        while (--iovlen >= 0) {
>                int seglen = iov->iov_len;
>                unsigned char __user *from = iov->iov_base;
> @@ -881,7 +904,7 @@ new_segment:
>                                if (sk->sk_route_caps & NETIF_F_ALL_CSUM)
>                                        skb->ip_summed = CHECKSUM_PARTIAL;
>
> -                               skb_entail(sk, skb);
> +                               skb_entail(sk, skb, &friend_queue);
>                                copy = size_goal;
>                        }
>
> @@ -995,7 +1018,8 @@ wait_for_sndbuf:
>                        set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
>  wait_for_memory:
>                        if (copied)
> -                               tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
> +                               tcp_push(sk, flags & ~MSG_MORE, mss_now,
> +                                        TCP_NAGLE_PUSH, &friend_queue);
>
>                        if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
>                                goto do_error;
> @@ -1007,7 +1031,7 @@ wait_for_memory:
>
>  out:
>        if (copied)
> -               tcp_push(sk, flags, mss_now, tp->nonagle);
> +               tcp_push(sk, flags, mss_now, tp->nonagle, &friend_queue);
>        TCP_CHECK_TIMER(sk);
>        release_sock(sk);
>        return copied;
> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> index cdc051b..eb6f914 100644
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -4998,6 +4998,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
>                 *    state to ESTABLISHED..."
>                 */
>
> +               sk->sk_friend = skb->friend;
>                TCP_ECN_rcv_synack(tp, th);
>
>                tp->snd_wl1 = TCP_SKB_CB(skb)->seq;
> diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
> index 7766151..4d91ff4 100644
> --- a/net/ipv4/tcp_ipv4.c
> +++ b/net/ipv4/tcp_ipv4.c
> @@ -1289,6 +1289,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
>        if (!req)
>                goto drop;
>
> +       req->friend = skb->friend;
>  #ifdef CONFIG_TCP_MD5SIG
>        tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
>  #endif
> diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
> index debf235..a4d4c14 100644
> --- a/net/ipv4/tcp_output.c
> +++ b/net/ipv4/tcp_output.c
> @@ -577,6 +577,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
>        }
>
>        if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) {
> +               skb->friend = sk;
>                tcp_syn_build_options((__be32 *)(th + 1),
>                                      tcp_advertise_mss(sk),
>                                      (sysctl_flags & SYSCTL_FLAG_TSTAMPS),
> @@ -1006,6 +1007,8 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed)
>                xmit_size_goal = tcp_bound_to_half_wnd(tp, xmit_size_goal);
>                xmit_size_goal -= (xmit_size_goal % mss_now);
>        }
> +       if (sk->sk_friend)
> +               xmit_size_goal = ~(u16)0;
>        tp->xmit_size_goal = xmit_size_goal;
>
>        return mss_now;
> diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
> index 715965f..c79d3ea 100644
> --- a/net/ipv6/tcp_ipv6.c
> +++ b/net/ipv6/tcp_ipv6.c
> @@ -1280,6 +1280,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
>        if (req == NULL)
>                goto drop;
>
> +       req->friend = skb->friend;
>  #ifdef CONFIG_TCP_MD5SIG
>        tcp_rsk(req)->af_specific = &tcp_request_sock_ipv6_ops;
>  #endif
>

^ permalink raw reply

* r8169 transmit queue time outs
From: Kyle McMartin @ 2010-05-06 14:17 UTC (permalink / raw)
  To: Francois Romieu; +Cc: netdev

Hi Francois,

Some of our users have been seeing their r8169 cards just up and stop
transmitting packets pretty quickly after boot with recent kernels.

After trying a few things:
 1- Updating the driver to the latest upstream (and -next).
 2- Downgrading the driver to the last known working release version.

Nothing seems to have helped.

I take it from the fact that the last known working driver no longer
works on a newer kernel to mean that something has changed in the core
net code that makes the r8169 driver particularly unhappy?

The WARN_ON getting triggered is:
------------[ cut here ]------------
WARNING: at net/sched/sch_generic.c:246 dev_watchdog+0xc6/0x12d() (Not
tainted)
Hardware name: VX800
NETDEV WATCHDOG: eth0 (r8169): transmit queue 0 timed out
Modules linked in: sunrpc ip6_tables cpufreq_ondemand acpi_cpufreq
dm_multipath
uinput snd_hda_codec_via snd_hda_intel snd_hda_codec snd_hwdep snd_seq
snd_seq_device snd_pcm snd_timer i2c_viapro snd soundcore snd_page_alloc
r8169
serio_raw i2c_core mii pata_acpi ata_generic pata_via [last unloaded:
scsi_wait_scan]
Pid: 0, comm: swapper Not tainted 2.6.31.5-127.fc12.i686.PAE #1
Call Trace:
 [<c043db03>] warn_slowpath_common+0x70/0x87
 [<c06ff9d0>] ? dev_watchdog+0xc6/0x12d
 [<c043db58>] warn_slowpath_fmt+0x29/0x2c
 [<c06ff9d0>] dev_watchdog+0xc6/0x12d
 [<c04471bd>] ? mod_timer+0x20/0x27
 [<c0680021>] ? usb_hcd_poll_rh_status+0x126/0x12e
 [<c06ff90a>] ? dev_watchdog+0x0/0x12d
 [<c0446e91>] run_timer_softirq+0x14e/0x1af
 [<c0442daa>] __do_softirq+0xb1/0x157
 [<c0442e86>] do_softirq+0x36/0x41
 [<c0442f78>] irq_exit+0x2e/0x61
 [<c041cf17>] smp_apic_timer_interrupt+0x6d/0x7b
 [<c04099b5>] apic_timer_interrupt+0x31/0x38
 [<c040f34b>] ? mwait_idle+0x67/0x85
 [<c040811f>] cpu_idle+0x96/0xaf
 [<c0765784>] rest_init+0x58/0x5a
 [<c09a78c3>] start_kernel+0x32b/0x330
 [<c09a7081>] i386_start_kernel+0x70/0x77
---[ end trace 8eb83276b856939b ]---  

And the RH bugzilla ref is:
https://bugzilla.redhat.com/show_bug.cgi?id=538920

I'd be happy to do whatever is necessary to help figure this out.

regards, Kyle

^ permalink raw reply

* [PATCH] ipv4: udp: fix short packet and bad checksum logging
From: Bjørn Mork @ 2010-05-06 13:44 UTC (permalink / raw)
  To: netdev; +Cc: Bjørn Mork, stable

commit 2783ef23 moved the initialisation of saddr and daddr after
pskb_may_pull() to avoid a potential data corruption.  Unfortunately
also placing it after the short packet and bad checksum error paths,
where these variables are used for logging.  The result is bogus
output like

[92238.389505] UDP: short packet: From 2.0.0.0:65535 23715/178 to 0.0.0.0:65535

Moving the saddr and daddr initialisation above the error paths, while still
keeping it after the pskb_may_pull() to keep the fix from commit 2783ef23.

Signed-off-by: Bjørn Mork <bjorn@mork.no>
Cc: stable@kernel.org
---
 net/ipv4/udp.c |    6 +++---
 1 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 8fef859..c36522a 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1527,6 +1527,9 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
 
 	uh   = udp_hdr(skb);
 	ulen = ntohs(uh->len);
+	saddr = ip_hdr(skb)->saddr;
+	daddr = ip_hdr(skb)->daddr;
+
 	if (ulen > skb->len)
 		goto short_packet;
 
@@ -1540,9 +1543,6 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
 	if (udp4_csum_init(skb, uh, proto))
 		goto csum_error;
 
-	saddr = ip_hdr(skb)->saddr;
-	daddr = ip_hdr(skb)->daddr;
-
 	if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
 		return __udp4_lib_mcast_deliver(net, skb, uh,
 				saddr, daddr, udptable);
-- 
1.5.6.5


^ permalink raw reply related

* [PATCH] ipv6: udp: make short packet logging consistent with ipv4
From: Bjørn Mork @ 2010-05-06 13:44 UTC (permalink / raw)
  To: netdev; +Cc: Bjørn Mork
In-Reply-To: <1273153475-32363-1-git-send-email-bjorn@mork.no>

Adding addresses and ports to the short packet log message,
like ipv4/udp.c does it, makes these messages a lot more useful:

[  822.182450] UDPv6: short packet: From [2001:db8:ffb4:3::1]:47839 23715/178 to [2001:db8:ffb4:3:5054:ff:feff:200]:1234

This requires us to drop logging in case pskb_may_pull() fails,
which also is consistent with ipv4/udp.c

Signed-off-by: Bjørn Mork <bjorn@mork.no>
---
 net/ipv6/udp.c |   11 ++++++++---
 1 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 9082485..d799244 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -692,7 +692,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
 	u32 ulen = 0;
 
 	if (!pskb_may_pull(skb, sizeof(struct udphdr)))
-		goto short_packet;
+		goto discard;
 
 	saddr = &ipv6_hdr(skb)->saddr;
 	daddr = &ipv6_hdr(skb)->daddr;
@@ -770,9 +770,14 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
 	return 0;
 
 short_packet:
-	LIMIT_NETDEBUG(KERN_DEBUG "UDP%sv6: short packet: %d/%u\n",
+	LIMIT_NETDEBUG(KERN_DEBUG "UDP%sv6: short packet: From [%pI6c]:%u %d/%d to [%pI6c]:%u\n",
 		       proto == IPPROTO_UDPLITE ? "-Lite" : "",
-		       ulen, skb->len);
+		       saddr,
+		       ntohs(uh->source),
+		       ulen,
+		       skb->len,
+		       daddr,
+		       ntohs(uh->dest));
 
 discard:
 	UDP6_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
-- 
1.5.6.5


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox