Netdev List
 help / color / mirror / Atom feed
* [PATCH net-next] pktgen: Use pr_debug
From: Joe Perches @ 2012-05-17  3:50 UTC (permalink / raw)
  To: David S. Miller; +Cc: netdev, linux-kernel

Convert printk(KERN_DEBUG to pr_debug which can
enable dynamic debugging.

Remove embedded prefixes from the conversions as
pr_fmt adds them.

Align arguments.

Signed-off-by: Joe Perches <joe@perches.com>
---
 net/core/pktgen.c |   41 ++++++++++++++++++-----------------------
 1 files changed, 18 insertions(+), 23 deletions(-)

diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 3391257..d22509b 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -891,8 +891,8 @@ static ssize_t pktgen_if_write(struct file *file,
 		if (copy_from_user(tb, user_buffer, copy))
 			return -EFAULT;
 		tb[copy] = 0;
-		printk(KERN_DEBUG "pktgen: %s,%lu  buffer -:%s:-\n", name,
-		       (unsigned long)count, tb);
+		pr_debug("%s,%lu  buffer -:%s:-\n",
+			 name, (unsigned long)count, tb);
 	}
 
 	if (!strcmp(name, "min_pkt_size")) {
@@ -1261,8 +1261,7 @@ static ssize_t pktgen_if_write(struct file *file,
 			pkt_dev->cur_daddr = pkt_dev->daddr_min;
 		}
 		if (debug)
-			printk(KERN_DEBUG "pktgen: dst_min set to: %s\n",
-			       pkt_dev->dst_min);
+			pr_debug("dst_min set to: %s\n", pkt_dev->dst_min);
 		i += len;
 		sprintf(pg_result, "OK: dst_min=%s", pkt_dev->dst_min);
 		return count;
@@ -1284,8 +1283,7 @@ static ssize_t pktgen_if_write(struct file *file,
 			pkt_dev->cur_daddr = pkt_dev->daddr_max;
 		}
 		if (debug)
-			printk(KERN_DEBUG "pktgen: dst_max set to: %s\n",
-			       pkt_dev->dst_max);
+			pr_debug("dst_max set to: %s\n", pkt_dev->dst_max);
 		i += len;
 		sprintf(pg_result, "OK: dst_max=%s", pkt_dev->dst_max);
 		return count;
@@ -1307,7 +1305,7 @@ static ssize_t pktgen_if_write(struct file *file,
 		pkt_dev->cur_in6_daddr = pkt_dev->in6_daddr;
 
 		if (debug)
-			printk(KERN_DEBUG "pktgen: dst6 set to: %s\n", buf);
+			pr_debug("dst6 set to: %s\n", buf);
 
 		i += len;
 		sprintf(pg_result, "OK: dst6=%s", buf);
@@ -1329,7 +1327,7 @@ static ssize_t pktgen_if_write(struct file *file,
 
 		pkt_dev->cur_in6_daddr = pkt_dev->min_in6_daddr;
 		if (debug)
-			printk(KERN_DEBUG "pktgen: dst6_min set to: %s\n", buf);
+			pr_debug("dst6_min set to: %s\n", buf);
 
 		i += len;
 		sprintf(pg_result, "OK: dst6_min=%s", buf);
@@ -1350,7 +1348,7 @@ static ssize_t pktgen_if_write(struct file *file,
 		snprintf(buf, sizeof(buf), "%pI6c", &pkt_dev->max_in6_daddr);
 
 		if (debug)
-			printk(KERN_DEBUG "pktgen: dst6_max set to: %s\n", buf);
+			pr_debug("dst6_max set to: %s\n", buf);
 
 		i += len;
 		sprintf(pg_result, "OK: dst6_max=%s", buf);
@@ -1373,7 +1371,7 @@ static ssize_t pktgen_if_write(struct file *file,
 		pkt_dev->cur_in6_saddr = pkt_dev->in6_saddr;
 
 		if (debug)
-			printk(KERN_DEBUG "pktgen: src6 set to: %s\n", buf);
+			pr_debug("src6 set to: %s\n", buf);
 
 		i += len;
 		sprintf(pg_result, "OK: src6=%s", buf);
@@ -1394,8 +1392,7 @@ static ssize_t pktgen_if_write(struct file *file,
 			pkt_dev->cur_saddr = pkt_dev->saddr_min;
 		}
 		if (debug)
-			printk(KERN_DEBUG "pktgen: src_min set to: %s\n",
-			       pkt_dev->src_min);
+			pr_debug("src_min set to: %s\n", pkt_dev->src_min);
 		i += len;
 		sprintf(pg_result, "OK: src_min=%s", pkt_dev->src_min);
 		return count;
@@ -1415,8 +1412,7 @@ static ssize_t pktgen_if_write(struct file *file,
 			pkt_dev->cur_saddr = pkt_dev->saddr_max;
 		}
 		if (debug)
-			printk(KERN_DEBUG "pktgen: src_max set to: %s\n",
-			       pkt_dev->src_max);
+			pr_debug("src_max set to: %s\n", pkt_dev->src_max);
 		i += len;
 		sprintf(pg_result, "OK: src_max=%s", pkt_dev->src_max);
 		return count;
@@ -1527,7 +1523,7 @@ static ssize_t pktgen_if_write(struct file *file,
 			pkt_dev->svlan_id = 0xffff;
 
 			if (debug)
-				printk(KERN_DEBUG "pktgen: VLAN/SVLAN auto turned off\n");
+				pr_debug("VLAN/SVLAN auto turned off\n");
 		}
 		return count;
 	}
@@ -1542,10 +1538,10 @@ static ssize_t pktgen_if_write(struct file *file,
 			pkt_dev->vlan_id = value;  /* turn on VLAN */
 
 			if (debug)
-				printk(KERN_DEBUG "pktgen: VLAN turned on\n");
+				pr_debug("VLAN turned on\n");
 
 			if (debug && pkt_dev->nr_labels)
-				printk(KERN_DEBUG "pktgen: MPLS auto turned off\n");
+				pr_debug("MPLS auto turned off\n");
 
 			pkt_dev->nr_labels = 0;    /* turn off MPLS */
 			sprintf(pg_result, "OK: vlan_id=%u", pkt_dev->vlan_id);
@@ -1554,7 +1550,7 @@ static ssize_t pktgen_if_write(struct file *file,
 			pkt_dev->svlan_id = 0xffff;
 
 			if (debug)
-				printk(KERN_DEBUG "pktgen: VLAN/SVLAN turned off\n");
+				pr_debug("VLAN/SVLAN turned off\n");
 		}
 		return count;
 	}
@@ -1599,10 +1595,10 @@ static ssize_t pktgen_if_write(struct file *file,
 			pkt_dev->svlan_id = value;  /* turn on SVLAN */
 
 			if (debug)
-				printk(KERN_DEBUG "pktgen: SVLAN turned on\n");
+				pr_debug("SVLAN turned on\n");
 
 			if (debug && pkt_dev->nr_labels)
-				printk(KERN_DEBUG "pktgen: MPLS auto turned off\n");
+				pr_debug("MPLS auto turned off\n");
 
 			pkt_dev->nr_labels = 0;    /* turn off MPLS */
 			sprintf(pg_result, "OK: svlan_id=%u", pkt_dev->svlan_id);
@@ -1611,7 +1607,7 @@ static ssize_t pktgen_if_write(struct file *file,
 			pkt_dev->svlan_id = 0xffff;
 
 			if (debug)
-				printk(KERN_DEBUG "pktgen: VLAN/SVLAN turned off\n");
+				pr_debug("VLAN/SVLAN turned off\n");
 		}
 		return count;
 	}
@@ -1779,8 +1775,7 @@ static ssize_t pktgen_thread_write(struct file *file,
 	i += len;
 
 	if (debug)
-		printk(KERN_DEBUG "pktgen: t=%s, count=%lu\n",
-		       name, (unsigned long)count);
+		pr_debug("t=%s, count=%lu\n", name, (unsigned long)count);
 
 	if (!t) {
 		pr_err("ERROR: No thread\n");

^ permalink raw reply related

* Re: [PATCH] virtio_net: invoke softirqs after __napi_schedule
From: David Miller @ 2012-05-17  3:40 UTC (permalink / raw)
  To: rusty; +Cc: netdev, virtualization, linux-kernel, mst
In-Reply-To: <87vcjvzdlm.fsf@rustcorp.com.au>

From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 17 May 2012 13:02:53 +0930

> On Wed, 16 May 2012 10:57:13 +0300, "Michael S. Tsirkin" <mst@redhat.com> wrote:
>> __napi_schedule might raise softirq but nothing
>> causes do_softirq to trigger, so it does not in fact
>> run. As a result,
>> the error message "NOHZ: local_softirq_pending 08"
>> sometimes occurs during boot of a KVM guest when the network service is
>> started and we are oom:
>> 
>>   ...
>>   Bringing up loopback interface:  [  OK  ]
>>   Bringing up interface eth0:
>>   Determining IP information for eth0...NOHZ: local_softirq_pending 08
>>    done.
>>   [  OK  ]
>>   ...
>> 
>> Further, receive queue processing might get delayed
>> indefinitely until some interrupt triggers:
>> virtio_net expected napi to be run immediately.
>> 
>> One way to cause do_softirq to be executed is by
>> invoking local_bh_enable(). As __napi_schedule is
>> normally called from bh or irq context, this
>> seems to make sense: disable bh before __napi_schedule
>> and enable afterwards.
>> 
>> Reported-by: Ulrich Obergfell <uobergfe@redhat.com>
>> Tested-by: Ulrich Obergfell <uobergfe@redhat.com>
>> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
 ...
> Acked-by: Rusty Russell <rusty@rustcorp.com.au>

Michael, you're best to submit this directly to Linus as I just
made what I hope is my last push to him for 3.4 today.

^ permalink raw reply

* Re: [PATCH] virtio_net: invoke softirqs after __napi_schedule
From: Rusty Russell @ 2012-05-17  3:32 UTC (permalink / raw)
  To: Michael S. Tsirkin, David Miller
  Cc: netdev, virtualization, linux-kernel, Michael S. Tsirkin
In-Reply-To: <20120516075712.GA2921@redhat.com>

On Wed, 16 May 2012 10:57:13 +0300, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> __napi_schedule might raise softirq but nothing
> causes do_softirq to trigger, so it does not in fact
> run. As a result,
> the error message "NOHZ: local_softirq_pending 08"
> sometimes occurs during boot of a KVM guest when the network service is
> started and we are oom:
> 
>   ...
>   Bringing up loopback interface:  [  OK  ]
>   Bringing up interface eth0:
>   Determining IP information for eth0...NOHZ: local_softirq_pending 08
>    done.
>   [  OK  ]
>   ...
> 
> Further, receive queue processing might get delayed
> indefinitely until some interrupt triggers:
> virtio_net expected napi to be run immediately.
> 
> One way to cause do_softirq to be executed is by
> invoking local_bh_enable(). As __napi_schedule is
> normally called from bh or irq context, this
> seems to make sense: disable bh before __napi_schedule
> and enable afterwards.
> 
> Reported-by: Ulrich Obergfell <uobergfe@redhat.com>
> Tested-by: Ulrich Obergfell <uobergfe@redhat.com>
> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> ---
> 
> To test, one can hack try_fill_recv to always report oom.
> I'm not sure it's not too late for 3.4, but we can try.
> Rusty, could you review ASAP pls?

It's missing a big comment: it's a very complicated way of calling
do_softirq().

Indeed, this function is only used when we are not in interrupt
context.  It's not hot at all, in any ideal scenario.

Acked-by: Rusty Russell <rusty@rustcorp.com.au>

^ permalink raw reply

* Re: [PATCH v5 2/2] decrement static keys on real destroy time
From: Glauber Costa @ 2012-05-17  3:09 UTC (permalink / raw)
  To: Andrew Morton
  Cc: cgroups-u79uwXL29TY76Z2rM5mHXA, linux-mm-Bw31MaZKKs3YtjvyW6yDsg,
	devel-GEFAQzZX7r8dnm+yROfE0A,
	kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A,
	netdev-u79uwXL29TY76Z2rM5mHXA, Tejun Heo, Li Zefan,
	Johannes Weiner, Michal Hocko
In-Reply-To: <20120516141342.911931e7.akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b@public.gmane.org>

On 05/17/2012 01:13 AM, Andrew Morton wrote:
> On Fri, 11 May 2012 17:11:17 -0300
> Glauber Costa<glommer-bzQdu9zFT3WakBO8gow8eQ@public.gmane.org>  wrote:
>
>> We call the destroy function when a cgroup starts to be removed,
>> such as by a rmdir event.
>>
>> However, because of our reference counters, some objects are still
>> inflight. Right now, we are decrementing the static_keys at destroy()
>> time, meaning that if we get rid of the last static_key reference,
>> some objects will still have charges, but the code to properly
>> uncharge them won't be run.
>>
>> This becomes a problem specially if it is ever enabled again, because
>> now new charges will be added to the staled charges making keeping
>> it pretty much impossible.
>>
>> We just need to be careful with the static branch activation:
>> since there is no particular preferred order of their activation,
>> we need to make sure that we only start using it after all
>> call sites are active. This is achieved by having a per-memcg
>> flag that is only updated after static_key_slow_inc() returns.
>> At this time, we are sure all sites are active.
>>
>> This is made per-memcg, not global, for a reason:
>> it also has the effect of making socket accounting more
>> consistent. The first memcg to be limited will trigger static_key()
>> activation, therefore, accounting. But all the others will then be
>> accounted no matter what. After this patch, only limited memcgs
>> will have its sockets accounted.
>
> So I'm scratching my head over what the actual bug is, and how
> important it is.  AFAICT it will cause charging stats to exhibit some
> inaccuracy when memcg's are being torn down?
>
> I don't know how serious this in in the real world and so can't decide
> which kernel version(s) we should fix.
>
> When fixing bugs, please always fully describe the bug's end-user
> impact, so that I and others can make these sorts of decisions.

Hi Andrew.

I believe that was described in patch 0/2 ?
In any case, this is something we need fixed, but it is not -stable 
material or anything.

The bug leads to misaccounting when we quickly enable and disable limit 
in a loop. We have a synthetic script to demonstrate that.

^ permalink raw reply

* Re: [PATCH v5 2/2] decrement static keys on real destroy time
From: Glauber Costa @ 2012-05-17  3:06 UTC (permalink / raw)
  To: Andrew Morton
  Cc: cgroups, linux-mm, devel, kamezawa.hiroyu, netdev, Tejun Heo,
	Li Zefan, Johannes Weiner, Michal Hocko
In-Reply-To: <20120516140637.17741df6.akpm@linux-foundation.org>

On 05/17/2012 01:06 AM, Andrew Morton wrote:
> On Fri, 11 May 2012 17:11:17 -0300
> Glauber Costa<glommer@parallels.com>  wrote:
>
>> We call the destroy function when a cgroup starts to be removed,
>> such as by a rmdir event.
>>
>> However, because of our reference counters, some objects are still
>> inflight. Right now, we are decrementing the static_keys at destroy()
>> time, meaning that if we get rid of the last static_key reference,
>> some objects will still have charges, but the code to properly
>> uncharge them won't be run.
>>
>> This becomes a problem specially if it is ever enabled again, because
>> now new charges will be added to the staled charges making keeping
>> it pretty much impossible.
>>
>> We just need to be careful with the static branch activation:
>> since there is no particular preferred order of their activation,
>> we need to make sure that we only start using it after all
>> call sites are active. This is achieved by having a per-memcg
>> flag that is only updated after static_key_slow_inc() returns.
>> At this time, we are sure all sites are active.
>>
>> This is made per-memcg, not global, for a reason:
>> it also has the effect of making socket accounting more
>> consistent. The first memcg to be limited will trigger static_key()
>> activation, therefore, accounting. But all the others will then be
>> accounted no matter what. After this patch, only limited memcgs
>> will have its sockets accounted.
>>
>> ...
>>
>> @@ -107,10 +104,31 @@ static int tcp_update_limit(struct mem_cgroup *memcg, u64 val)
>>   		tcp->tcp_prot_mem[i] = min_t(long, val>>  PAGE_SHIFT,
>>   					     net->ipv4.sysctl_tcp_mem[i]);
>>
>> -	if (val == RESOURCE_MAX&&  old_lim != RESOURCE_MAX)
>> -		static_key_slow_dec(&memcg_socket_limit_enabled);
>> -	else if (old_lim == RESOURCE_MAX&&  val != RESOURCE_MAX)
>> -		static_key_slow_inc(&memcg_socket_limit_enabled);
>> +	if (val == RESOURCE_MAX)
>> +		cg_proto->active = false;
>> +	else if (val != RESOURCE_MAX) {
>> +		/*
>> +		 * ->activated needs to be written after the static_key update.
>> +		 *  This is what guarantees that the socket activation function
>> +		 *  is the last one to run. See sock_update_memcg() for details,
>> +		 *  and note that we don't mark any socket as belonging to this
>> +		 *  memcg until that flag is up.
>> +		 *
>> +		 *  We need to do this, because static_keys will span multiple
>> +		 *  sites, but we can't control their order. If we mark a socket
>> +		 *  as accounted, but the accounting functions are not patched in
>> +		 *  yet, we'll lose accounting.
>> +		 *
>> +		 *  We never race with the readers in sock_update_memcg(), because
>> +		 *  when this value change, the code to process it is not patched in
>> +		 *  yet.
>> +		 */
>> +		if (!cg_proto->activated) {
>> +			static_key_slow_inc(&memcg_socket_limit_enabled);
>> +			cg_proto->activated = true;
>> +		}
>
> If two threads run this code concurrently, they can both see
> cg_proto->activated==false and they will both run
> static_key_slow_inc().
>
> Hopefully there's some locking somewhere which prevents this, but it is
> unobvious.  We should comment this, probably at the cg_proto.activated
> definition site.  Or we should fix the bug ;)
>
If that happens, locking in static_key_slow_inc will prevent any damage.
My previous version had explicit code to prevent that, but we were 
pointed out that this is already part of the static_key expectations, so 
that was dropped.

^ permalink raw reply

* Re: linux-next: manual merge of the net-next tree with the sparc-next tree
From: Stephen Rothwell @ 2012-05-17  3:04 UTC (permalink / raw)
  To: Sam Ravnborg; +Cc: David Miller, netdev, linux-next, linux-kernel
In-Reply-To: <20120516050245.GA407@merkur.ravnborg.org>

[-- Attachment #1: Type: text/plain, Size: 1859 bytes --]

Hi Sam,

On Wed, 16 May 2012 07:02:45 +0200 Sam Ravnborg <sam@ravnborg.org> wrote:
>
> On Wed, May 16, 2012 at 02:39:44PM +1000, Stephen Rothwell wrote:
> > Hi all,
> > 
> > Today's linux-next merge of the net-next tree got a conflict in
> > arch/sparc/Makefile between commit e1d7de8377e6 ("sparc: introduce
> > arch/sparc/Kbuild") from the sparc-next tree and commit 2809a2087cc4
> > ("net: filter: Just In Time compiler for sparc") from the net-next tree.
> > 
> > I suspect that the core-y net bit below should be changed to be a obj-y
> > bit of arch/sparc/Kbuild ...
> 
> Correct - like this:
> 
> arch/sparc/Kbuild:
> 
> obj-y += net/

So I applied this merge fixup to the merge of the net-next tree today
(and can carry it as necessary):

From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Thu, 17 May 2012 13:00:07 +1000
Subject: [PATCH] net: arch/sparc/Makefile merge fixup

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
---
 arch/sparc/Kbuild   |    1 +
 arch/sparc/Makefile |    1 -
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/sparc/Kbuild b/arch/sparc/Kbuild
index 27b540d..5cd0116 100644
--- a/arch/sparc/Kbuild
+++ b/arch/sparc/Kbuild
@@ -5,3 +5,4 @@
 obj-y += kernel/
 obj-y += mm/
 obj-y += math-emu/
+obj-y += net/
diff --git a/arch/sparc/Makefile b/arch/sparc/Makefile
index 554e38f..b9a72e2 100644
--- a/arch/sparc/Makefile
+++ b/arch/sparc/Makefile
@@ -54,7 +54,6 @@ head-y                 += arch/sparc/kernel/init_task.o
 
 # See arch/sparc/Kbuild for the core part of the kernel
 core-y                 += arch/sparc/
-core-y                 += arch/sparc/net/
 
 libs-y                 += arch/sparc/prom/
 libs-y                 += arch/sparc/lib/
-- 
1.7.10.280.gaa39

-- 
Cheers,
Stephen Rothwell                    sfr@canb.auug.org.au

[-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply related

* Re: [V2 PATCH 2/9] macvtap: zerocopy: fix truesize underestimation
From: Jason Wang @ 2012-05-17  2:59 UTC (permalink / raw)
  To: Shirley Ma; +Cc: eric.dumazet, mst, netdev, linux-kernel, ebiederm, davem
In-Reply-To: <1337180585.10741.6.camel@oc3660625478.ibm.com>

On 05/16/2012 11:03 PM, Shirley Ma wrote:
> On Wed, 2012-05-16 at 11:04 +0800, Jason Wang wrote:
>>>> diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
>>>> index bd4a70d..7cb2684 100644
>>>> --- a/drivers/net/macvtap.c
>>>> +++ b/drivers/net/macvtap.c
>>>> @@ -519,6 +519,7 @@ static int zerocopy_sg_from_iovec(struct
>> sk_buff
>>>> *skb, const struct iovec *from,
>>>>                   struct page *page[MAX_SKB_FRAGS];
>>>>                   int num_pages;
>>>>                   unsigned long base;
>>>> +               unsigned long truesize;
>>>>
>>>>                   len = from->iov_len - offset;
>>>>                   if (!len) {
>>>> @@ -533,10 +534,11 @@ static int zerocopy_sg_from_iovec(struct
>> sk_buff
>>>> *skb, const struct iovec *from,
>>>>                       (num_pages>   MAX_SKB_FRAGS -
>>>> skb_shinfo(skb)->nr_frags))
>>>>                           /* put_page is in skb free */
>>>>                           return -EFAULT;
>>>> +               truesize = size * PAGE_SIZE;
>>> Here should be truesize = size * PAGE_SIZE - offset, right?
>>>
>> We get the whole user page, so need to account them all. Also this is
>> aligned with skb_copy_ubufs().
> Then this would double count the size of "first" offset left from
> previous copy, both skb->len and truesize.
>
> Thanks
> Shirley
>

Didn't see how this affact skb->len. And for truesize, I think they are 
different, when the offset were not zero, the data in this vector were 
divided into two parts. First part is copied into skb directly, and the 
second were pinned from a whole userspace page by get_user_pages_fast(), 
so we need count the whole page to the socket limit to prevent evil 
application.

Thanks

^ permalink raw reply

* Re: [V2 PATCH 9/9] vhost: zerocopy: poll vq in zerocopy callback
From: Jason Wang @ 2012-05-17  2:50 UTC (permalink / raw)
  To: Shirley Ma
  Cc: Michael S. Tsirkin, eric.dumazet, netdev, linux-kernel, ebiederm,
	davem
In-Reply-To: <1337189525.10741.24.camel@oc3660625478.ibm.com>

On 05/17/2012 01:32 AM, Shirley Ma wrote:
> On Wed, 2012-05-16 at 18:14 +0300, Michael S. Tsirkin wrote:
>> On Wed, May 16, 2012 at 08:10:27AM -0700, Shirley Ma wrote:
>>> On Wed, 2012-05-16 at 10:58 +0800, Jason Wang wrote:
>>>>>>    drivers/vhost/vhost.c |    1 +
>>>>>>    1 files changed, 1 insertions(+), 0 deletions(-)
>>>>>>
>>>>>> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
>>>>>> index 947f00d..7b75fdf 100644
>>>>>> --- a/drivers/vhost/vhost.c
>>>>>> +++ b/drivers/vhost/vhost.c
>>>>>> @@ -1604,6 +1604,7 @@ void vhost_zerocopy_callback(void *arg)
>>>>>>           struct vhost_ubuf_ref *ubufs = ubuf->arg;
>>>>>>           struct vhost_virtqueue *vq = ubufs->vq;
>>>>>>
>>>>>> +       vhost_poll_queue(&vq->poll);
>>>>>>           /* set len = 1 to mark this desc buffers done DMA */
>>>>>>           vq->heads[ubuf->desc].len = VHOST_DMA_DONE_LEN;
>>>>>>           kref_put(&ubufs->kref, vhost_zerocopy_done_signal);
>>>>> Doing so, we might have redundant vhost_poll_queue(). Do you
>> know in
>>>>> which scenario there might be missing of adding and signaling
>> during
>>>>> zerocopy?
>>>> Yes, as we only do signaling and adding during tx work, if there's
>> no
>>>> tx
>>>> work when the skb were sent, we may lose the opportunity to let
>> guest
>>>> know about the completion. It's easy to be reproduced with netperf
>>>> test.
>>> The reason which host signals guest is to free guest tx buffers, if
>>> there is no tx work, then it's not necessary to signal the guest
>> unless
>>> guest runs out of memory. The pending buffers will be released
>>> virtio_net device gone.

Looks like we only free the skbs in .ndo_start_xmit().
>>>
>>> What's the behavior of netperf test when you hit this situation?
>>>
>>> Thanks
>>> Shirley
>> IIRC guest networking seems to be lost.
> It seems vhost_enable_notify is missing in somewhere else?
>
> Thanks
> Shirley
>

The problem is we may stop the tx queue when there no enough capacity to 
place packets, at this moment  we depends on the tx interrupt to 
re-enable the tx queue. So if we didn't poll the vhost during callback, 
guest may lose the tx interrupt to re-enable the tx queue which could 
stall the whole tx queue.

Thanks

^ permalink raw reply

* Re: [PATCH 1/1] smsc95xx: add FLAG_POINTTOPOINT flag for driver_info
From: Xiao Jiang @ 2012-05-17  2:23 UTC (permalink / raw)
  To: Ming Lei; +Cc: steve.glendinning, gregkh, netdev, linux-usb, linux-kernel
In-Reply-To: <CACVXFVNzmq74BKYZN1SpXYULneV2ASmniMhs4LhevPm-XgSJpg@mail.gmail.com>

Ming Lei wrote:
> On Wed, May 16, 2012 at 4:01 PM,  <jgq516@gmail.com> wrote:
>   
>> From: Xiao Jiang <jgq516@gmail.com>
>>
>> commit c26134 introduced FLAG_POINTTOPOINT flag for USB ethernet devices
>> which possibly use "usb%d" names, add this flag to make sure pandaboard
>> can mount nfs with smsc95xx NIC.
>>     
>
> Without the flag, I also can mount nfs successfully on my Pandaboard...
>
>   
I have pulled latest tree 
(git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
commit 0e93b4b304ae052ba1bc73f6d34a68556fe93429), and enable related 
options (USB_NET_SMSC95XX,
USB_EHCI_HCD and USB_EHCI_HCD_OMAP) with omap2plus_config, However the 
kernel still can't mount
nfs, pls see below infos.

[    3.114105] smsc95xx v1.0.4
[    4.533752] smsc95xx 1-1.1:1.0: *eth0*: register 'smsc95xx' at 
usb-ehci-omap.0-1.1, smsc95xx USB 2.0 Ethernet, fe:b9:1b:07:8e:d1
[  108.854217] VFS: Unable to mount root fs via NFS, trying floppy.
[  108.861114] VFS: Cannot open root device "nfs" or unknown-block(2,0): 
error -6
[  108.868713] Please append a correct "root=" boot option; here are the 
available partitions:
[  108.877655] b300         7761920 mmcblk0  driver: mmcblk
[  108.883239]   b301           40131 mmcblk0p1 
00000000-0000-0000-0000-000000000mmcblk0p1
[  108.891662]   b302         7719232 mmcblk0p2 
00000000-0000-0000-0000-000000000mmcblk0p2
[  108.900146] Kernel panic - not syncing: VFS: Unable to mount root fs 
on unknown-block(2,0)

BTW: I tested it with OMAP4430 ES2.2 pandaboard, the issue can be solved 
with apply the patch.

Is there something which I missed? thanks.

Regards,
Xiao
>> Signed-off-by: Xiao Jiang <jgq516@gmail.com>
>> ---
>>  drivers/net/usb/smsc95xx.c |    3 ++-
>>  1 files changed, 2 insertions(+), 1 deletions(-)
>>
>> diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c
>> index 94ae669..e158288 100644
>> --- a/drivers/net/usb/smsc95xx.c
>> +++ b/drivers/net/usb/smsc95xx.c
>> @@ -1192,7 +1192,8 @@ static const struct driver_info smsc95xx_info = {
>>        .rx_fixup       = smsc95xx_rx_fixup,
>>        .tx_fixup       = smsc95xx_tx_fixup,
>>        .status         = smsc95xx_status,
>> -       .flags          = FLAG_ETHER | FLAG_SEND_ZLP | FLAG_LINK_INTR,
>> +       .flags          = FLAG_ETHER | FLAG_POINTTOPOINT | FLAG_SEND_ZLP |
>> +                         FLAG_LINK_INTR,
>>  };
>>
>>  static const struct usb_device_id products[] = {
>> --
>> 1.7.3
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>> Please read the FAQ at  http://www.tux.org/lkml/
>>     
>
>
> Thanks,
>   

^ permalink raw reply

* Re: [PATCH v5 2/2] decrement static keys on real destroy time
From: KAMEZAWA Hiroyuki @ 2012-05-17  0:07 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Glauber Costa, cgroups-u79uwXL29TY76Z2rM5mHXA,
	linux-mm-Bw31MaZKKs3YtjvyW6yDsg, devel-GEFAQzZX7r8dnm+yROfE0A,
	netdev-u79uwXL29TY76Z2rM5mHXA, Tejun Heo, Li Zefan,
	Johannes Weiner, Michal Hocko
In-Reply-To: <20120516141342.911931e7.akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b@public.gmane.org>

(2012/05/17 6:13), Andrew Morton wrote:

> On Fri, 11 May 2012 17:11:17 -0300
> Glauber Costa <glommer-bzQdu9zFT3WakBO8gow8eQ@public.gmane.org> wrote:
> 
>> We call the destroy function when a cgroup starts to be removed,
>> such as by a rmdir event.
>>
>> However, because of our reference counters, some objects are still
>> inflight. Right now, we are decrementing the static_keys at destroy()
>> time, meaning that if we get rid of the last static_key reference,
>> some objects will still have charges, but the code to properly
>> uncharge them won't be run.
>>
>> This becomes a problem specially if it is ever enabled again, because
>> now new charges will be added to the staled charges making keeping
>> it pretty much impossible.
>>
>> We just need to be careful with the static branch activation:
>> since there is no particular preferred order of their activation,
>> we need to make sure that we only start using it after all
>> call sites are active. This is achieved by having a per-memcg
>> flag that is only updated after static_key_slow_inc() returns.
>> At this time, we are sure all sites are active.
>>
>> This is made per-memcg, not global, for a reason:
>> it also has the effect of making socket accounting more
>> consistent. The first memcg to be limited will trigger static_key()
>> activation, therefore, accounting. But all the others will then be
>> accounted no matter what. After this patch, only limited memcgs
>> will have its sockets accounted.
> 
> So I'm scratching my head over what the actual bug is, and how
> important it is.  AFAICT it will cause charging stats to exhibit some
> inaccuracy when memcg's are being torn down?
> 
> I don't know how serious this in in the real world and so can't decide
> which kernel version(s) we should fix.
> 
> When fixing bugs, please always fully describe the bug's end-user
> impact, so that I and others can make these sorts of decisions.
> 


Ah, this was a bug report from me. tcp accounting can be easily broken.
Costa, could you include this ?
==

tcp memcontrol uses static_branch to optimize limit=RESOURCE_MAX case.
If all cgroup's limit=RESOUCE_MAX, resource usage is not accounted.
But it's buggy now.

For example, do following
# while sleep 1;do
   echo 9223372036854775807 > /cgroup/memory/A/memory.kmem.tcp.limit_in_bytes;
   echo 300M > /cgroup/memory/A/memory.kmem.tcp.limit_in_bytes;
   done
and run network application under A. tcp's usage is sometimes accounted
and sometimes not accounted because of frequent changes of static_branch.
Then,  you can see broken tcp.usage_in_bytes.
WARN_ON() is printed because res_counter->usage goes below 0.
==
kernel: ------------[ cut here ]----------
kernel: WARNING: at kernel/res_counter.c:96 res_counter_uncharge_locked+0x37/0x40()
 <snip>
kernel: Pid: 17753, comm: bash Tainted: G  W    3.3.0+ #99
kernel: Call Trace:
kernel: <IRQ>  [<ffffffff8104cc9f>] warn_slowpath_common+0x7f/0xc0
kernel: [<ffffffff810d7e88>] ? rb_reserve__next_event+0x68/0x470
kernel: [<ffffffff8104ccfa>] warn_slowpath_null+0x1a/0x20
kernel: [<ffffffff810b4e37>] res_counter_uncharge_locked+0x37/0x40
 ...
==

^ permalink raw reply

* Re: [PATCH 0/7] netfilter updates for net-next (batch 3)
From: David Miller @ 2012-05-17  0:00 UTC (permalink / raw)
  To: pablo; +Cc: netfilter-devel, netdev
In-Reply-To: <1337209604-3412-1-git-send-email-pablo@netfilter.org>

From: pablo@netfilter.org
Date: Thu, 17 May 2012 01:06:37 +0200

> The following patchset contains small updates for net-next, more relevantly:
> 
> * One fix for potential NULL dereference in xt_HMARK by Dan Carpenter.
> 
> * Conversion to use _ALL macro in xt_hashlimit as you suggested by
>   Florian Westphal.
> 
> * One fix for timeout overflow from Jozsef Kadlecsik.
> 
> * Replace usage of modulus for hash calculation in xt_HMARK as you suggested
>   from myself.
> 
> You can pull these changes from:
> 
> git://1984.lsi.us.es/net-next master

Pulled, thanks a lot!

^ permalink raw reply

* [IPROUTE2 2/2] tc-netem: Add support for ECN packet marking
From: Vijay Subramanian @ 2012-05-16 23:51 UTC (permalink / raw)
  To: netdev; +Cc: Eric Dumazet, Stephen Hemminger, Vijay Subramanian
In-Reply-To: <1337212318-2100-1-git-send-email-subramanian.vijay@gmail.com>

This patch provides support for marking packets with ECN instead of
dropping them with netem. This makes it possible to make use of the
netem ECN marking feature that was added recently to the kernel.

Signed-off-by: Vijay Subramanian <subramanian.vijay@gmail.com>
---
 include/linux/pkt_sched.h |    1 +
 tc/q_netem.c              |   26 ++++++++++++++++++++++++++
 2 files changed, 27 insertions(+), 0 deletions(-)

diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h
index 410b33d..ffe975c 100644
--- a/include/linux/pkt_sched.h
+++ b/include/linux/pkt_sched.h
@@ -509,6 +509,7 @@ enum {
 	TCA_NETEM_CORRUPT,
 	TCA_NETEM_LOSS,
 	TCA_NETEM_RATE,
+	TCA_NETEM_ECN,
 	__TCA_NETEM_MAX,
 };
 
diff --git a/tc/q_netem.c b/tc/q_netem.c
index 360080c..f8489e9 100644
--- a/tc/q_netem.c
+++ b/tc/q_netem.c
@@ -38,6 +38,7 @@ static void explain(void)
 "                 [ loss random PERCENT [CORRELATION]]\n" \
 "                 [ loss state P13 [P31 [P32 [P23 P14]]]\n" \
 "                 [ loss gemodel PERCENT [R [1-H [1-K]]]\n" \
+"                 [ ecn ]\n" \
 "                 [ reorder PRECENT [CORRELATION] [ gap DISTANCE ]]\n" \
 "                 [ rate RATE [PACKETOVERHEAD] [CELLSIZE] [CELLOVERHEAD]]\n");
 }
@@ -326,6 +327,8 @@ static int netem_parse_opt(struct qdisc_util *qu, int argc, char **argv,
 					*argv);
 				return -1;
 			}
+		} else if (matches(*argv, "ecn") == 0) {
+				present[TCA_NETEM_ECN] = 1;
 		} else if (matches(*argv, "reorder") == 0) {
 			NEXT_ARG();
 			present[TCA_NETEM_REORDER] = 1;
@@ -437,6 +440,14 @@ static int netem_parse_opt(struct qdisc_util *qu, int argc, char **argv,
 		return -1;
 	}
 
+	if (present[TCA_NETEM_ECN]) {
+		if (opt.loss <= 0 && loss_type == NETEM_LOSS_UNSPEC) {
+			fprintf(stderr, "ecn requested without loss model\n");
+			explain();
+			return -1;
+		}
+	}
+
 	if (dist_data && (opt.latency == 0 || opt.jitter == 0)) {
 		fprintf(stderr, "distribution specified but no latency and jitter values\n");
 		explain();
@@ -454,6 +465,11 @@ static int netem_parse_opt(struct qdisc_util *qu, int argc, char **argv,
 	    addattr_l(n, 1024, TCA_NETEM_REORDER, &reorder, sizeof(reorder)) < 0)
 		return -1;
 
+	if (present[TCA_NETEM_ECN] &&
+	    addattr_l(n, 1024, TCA_NETEM_ECN, &present[TCA_NETEM_ECN],
+		      sizeof(present[TCA_NETEM_ECN])) < 0)
+			return -1;
+
 	if (present[TCA_NETEM_CORRUPT] &&
 	    addattr_l(n, 1024, TCA_NETEM_CORRUPT, &corrupt, sizeof(corrupt)) < 0)
 		return -1;
@@ -500,6 +516,7 @@ static int netem_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt)
 	const struct tc_netem_corrupt *corrupt = NULL;
 	const struct tc_netem_gimodel *gimodel = NULL;
 	const struct tc_netem_gemodel *gemodel = NULL;
+	int *ecn = NULL;
 	struct tc_netem_qopt qopt;
 	const struct tc_netem_rate *rate = NULL;
 	int len = RTA_PAYLOAD(opt) - sizeof(qopt);
@@ -548,6 +565,11 @@ static int netem_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt)
 				return -1;
 			rate = RTA_DATA(tb[TCA_NETEM_RATE]);
 		}
+		if (tb[TCA_NETEM_ECN]) {
+			if (RTA_PAYLOAD(tb[TCA_NETEM_ECN]) < sizeof(*ecn))
+				return -1;
+			ecn = RTA_DATA(tb[TCA_NETEM_ECN]);
+		}
 	}
 
 	fprintf(f, "limit %d", qopt.limit);
@@ -617,9 +639,13 @@ static int netem_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt)
 			fprintf(f, " celloverhead %d", rate->cell_overhead);
 	}
 
+	if (ecn)
+		fprintf(f, " ecn ");
+
 	if (qopt.gap)
 		fprintf(f, " gap %lu", (unsigned long)qopt.gap);
 
+
 	return 0;
 }
 
-- 
1.7.0.4

^ permalink raw reply related

* [IPROUTE2 1/2] Update tc-netem manpage to add ecn capability
From: Vijay Subramanian @ 2012-05-16 23:51 UTC (permalink / raw)
  To: netdev; +Cc: Eric Dumazet, Stephen Hemminger, Vijay Subramanian
In-Reply-To: <1337212318-2100-1-git-send-email-subramanian.vijay@gmail.com>

This patch updates the netem manpage to describe how to use
netem to mark packets with ecn instead of dropping them.

Signed-off-by: Vijay Subramanian <subramanian.vijay@gmail.com>
---
 man/man8/tc-netem.8 |    8 ++++++--
 1 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/man/man8/tc-netem.8 b/man/man8/tc-netem.8
index 39f8454..b0b7864 100644
--- a/man/man8/tc-netem.8
+++ b/man/man8/tc-netem.8
@@ -30,8 +30,8 @@ NetEm \- Network Emulator
 .IR p13 " [ " p31 " [ " p32 " [ " p23 " [ " p14 "]]]] |"
 .br
 .RB "               " gemodel
-.IR p " [ " r " [ " 1-h " [ " 1-k " ]]]"
-.BR " }"
+.IR p " [ " r " [ " 1-h " [ " 1-k " ]]] } "
+.RB  " [ " ecn " ] "
 
 .IR CORRUPT " := "
 .B corrupt
@@ -102,6 +102,10 @@ model. As known, p and r are the transition probabilities between the bad and
 the good states, 1-h is the loss probability in the bad state and 1-k is the
 loss probability in the good state.
 
+.SS ecn
+can be used optionally to mark packets instead of dropping them. A loss model
+has to be used for this to be enabled.
+
 .SS corrupt
 allows the emulation of random noise introducing an error in a random position
 for a chosen percent of packets. It is also possible to add a correlation
-- 
1.7.0.4

^ permalink raw reply related

* [IPROUTE2 0/2] Add ECN support to tc-netem
From: Vijay Subramanian @ 2012-05-16 23:51 UTC (permalink / raw)
  To: netdev; +Cc: Eric Dumazet, Stephen Hemminger, Vijay Subramanian

Recent patch to net-next kernel from Eric Dumazet (e4ae004b84b netem: add ECN
capability) made it possible for netem to mark packets with ECN instead of
dropping them. These two patches add support to iproute2/tc and update the
manpage.

Vijay Subramanian (2):
  Update tc-netem manpage to add ecn capability
  tc-netem: Add support for ECN packet marking

 include/linux/pkt_sched.h |    1 +
 man/man8/tc-netem.8       |    8 ++++++--
 tc/q_netem.c              |   25 +++++++++++++++++++++++++
 3 files changed, 32 insertions(+), 2 deletions(-)

^ permalink raw reply

* Re: [PATCH] netfilter: xt_HMARK: fix endian bugs and warnings
From: Pablo Neira Ayuso @ 2012-05-16 23:26 UTC (permalink / raw)
  To: Hans Schillstrom
  Cc: kaber, jengelh, netfilter-devel, netdev, dan.carpenter, hans
In-Reply-To: <1337180442-32581-1-git-send-email-hans.schillstrom@ericsson.com>

On Wed, May 16, 2012 at 05:00:42PM +0200, Hans Schillstrom wrote:
> A mix of u32 and __be32 causes endian warning.
> The hash value produced is now the same for big and little endian machines.
> i.e. a mix of Big and Little endian in a cluster is now possible.
> 
> Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
> Signed-off-by: Hans Schillstrom <hans.schillstrom@ericsson.com>
> ---
>  include/linux/netfilter/xt_HMARK.h |    5 ++-
>  net/netfilter/xt_HMARK.c           |   68 ++++++++++++++++++++---------------
>  2 files changed, 42 insertions(+), 31 deletions(-)
> 
> diff --git a/include/linux/netfilter/xt_HMARK.h b/include/linux/netfilter/xt_HMARK.h
> index abb1650..8b6307a 100644
> --- a/include/linux/netfilter/xt_HMARK.h
> +++ b/include/linux/netfilter/xt_HMARK.h
> @@ -24,10 +24,11 @@ enum {
>  
>  union hmark_ports {
>  	struct {
> -		__u16	src;
> -		__u16	dst;
> +		__be16	src;
> +		__be16	dst;
>  	} p16;
>  	__u32	v32;
> +	__be32	b32;
>  };
>  
>  struct xt_hmark_info {
> diff --git a/net/netfilter/xt_HMARK.c b/net/netfilter/xt_HMARK.c
> index 32fbd73..7bb7b5e 100644
> --- a/net/netfilter/xt_HMARK.c
> +++ b/net/netfilter/xt_HMARK.c
> @@ -32,13 +32,13 @@ MODULE_ALIAS("ipt_HMARK");
>  MODULE_ALIAS("ip6t_HMARK");
>  
>  struct hmark_tuple {
> -	u32			src;
> -	u32			dst;
> +	__be32			src;
> +	__be32			dst;
>  	union hmark_ports	uports;
>  	uint8_t			proto;
>  };
>  
> -static inline u32 hmark_addr6_mask(const __u32 *addr32, const __u32 *mask)
> +static inline __be32 hmark_addr6_mask(const __be32 *addr32, const __be32 *mask)
>  {
>  	return (addr32[0] & mask[0]) ^
>  	       (addr32[1] & mask[1]) ^
> @@ -46,8 +46,8 @@ static inline u32 hmark_addr6_mask(const __u32 *addr32, const __u32 *mask)
>  	       (addr32[3] & mask[3]);
>  }
>  
> -static inline u32
> -hmark_addr_mask(int l3num, const __u32 *addr32, const __u32 *mask)
> +static inline __be32
> +hmark_addr_mask(int l3num, const __be32 *addr32, const __be32 *mask)
>  {
>  	switch (l3num) {
>  	case AF_INET:
> @@ -58,6 +58,25 @@ hmark_addr_mask(int l3num, const __u32 *addr32, const __u32 *mask)
>  	return 0;
>  }
>  
> +static inline void hmark_port_order(union hmark_ports *uports,

comestical change, better call this hmark_swap_ports

> +				    const struct xt_hmark_info *info)
> +{
> +	union hmark_ports hp;
> +
> +	hp.b32 = (uports->b32 & info->port_mask.b32) | info->port_set.b32;
> +	hp.v32 = ntohl(hp.b32);
> +	/* Make it endian safe into jhash() */
> +#if defined(__LITTLE_ENDIAN)
> +	if ((__force u16) uports->p16.dst >
> +	    (__force u16) uports->p16.src)
> +#else
> +	if ((__force u16) uports->p16.src >
> +	    (__force u16) uports->p16.dst)
> +#endif

This ifdef is ugly. I prefer if you use ntohs the ports and store
the values in some local variable, then compare and swap if required.

Just like you do with the IPv4 address.

> +		swap(hp.p16.src, hp.p16.dst);
> +	uports->v32 = hp.v32;
> +}
> +
>  static int
>  hmark_ct_set_htuple(const struct sk_buff *skb, struct hmark_tuple *t,
>  		    const struct xt_hmark_info *info)
> @@ -74,10 +93,10 @@ hmark_ct_set_htuple(const struct sk_buff *skb, struct hmark_tuple *t,
>  	otuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
>  	rtuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
>  
> -	t->src = hmark_addr_mask(otuple->src.l3num, otuple->src.u3.all,
> -				 info->src_mask.all);
> -	t->dst = hmark_addr_mask(otuple->src.l3num, rtuple->src.u3.all,
> -				 info->dst_mask.all);
> +	t->src = hmark_addr_mask(otuple->src.l3num, otuple->src.u3.ip6,
> +				 info->src_mask.ip6);
> +	t->dst = hmark_addr_mask(otuple->src.l3num, rtuple->src.u3.ip6,
> +				 info->dst_mask.ip6);
>  
>  	if (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3))
>  		return 0;
> @@ -86,10 +105,7 @@ hmark_ct_set_htuple(const struct sk_buff *skb, struct hmark_tuple *t,
>  	if (t->proto != IPPROTO_ICMP) {
>  		t->uports.p16.src = otuple->src.u.all;
>  		t->uports.p16.dst = rtuple->src.u.all;
> -		t->uports.v32 = (t->uports.v32 & info->port_mask.v32) |
> -				info->port_set.v32;
> -		if (t->uports.p16.dst < t->uports.p16.src)
> -			swap(t->uports.p16.dst, t->uports.p16.src);
> +		hmark_port_order(&t->uports, info);
>  	}
>  
>  	return 0;
> @@ -102,11 +118,13 @@ static inline u32
>  hmark_hash(struct hmark_tuple *t, const struct xt_hmark_info *info)
>  {
>  	u32 hash;
> +	u32 src = ntohl(t->src);
> +	u32 dst = ntohl(t->dst);
>  
> -	if (t->dst < t->src)
> -		swap(t->src, t->dst);
> +	if (dst < src)
> +		swap(src, dst);
>  
> -	hash = jhash_3words(t->src, t->dst, t->uports.v32, info->hashrnd);
> +	hash = jhash_3words(src, dst, t->uports.v32, info->hashrnd);
>  	hash = hash ^ (t->proto & info->proto_mask);
>  
>  	return (((u64)hash * info->hmodulus) >> 32) + info->hoffset;
> @@ -125,12 +143,7 @@ hmark_set_tuple_ports(const struct sk_buff *skb, unsigned int nhoff,
>  	nhoff += protoff;
>  	if (skb_copy_bits(skb, nhoff, &t->uports, sizeof(t->uports)) < 0)
>  		return;
> -
> -	t->uports.v32 = (t->uports.v32 & info->port_mask.v32) |
> -			info->port_set.v32;
> -
> -	if (t->uports.p16.dst < t->uports.p16.src)
> -		swap(t->uports.p16.dst, t->uports.p16.src);
> +	hmark_port_order(&t->uports, info);
>  }
>  
>  #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
> @@ -178,8 +191,8 @@ hmark_pkt_set_htuple_ipv6(const struct sk_buff *skb, struct hmark_tuple *t,
>  			return -1;
>  	}
>  noicmp:
> -	t->src = hmark_addr6_mask(ip6->saddr.s6_addr32, info->src_mask.all);
> -	t->dst = hmark_addr6_mask(ip6->daddr.s6_addr32, info->dst_mask.all);
> +	t->src = hmark_addr6_mask(ip6->saddr.s6_addr32, info->src_mask.ip6);
> +	t->dst = hmark_addr6_mask(ip6->daddr.s6_addr32, info->dst_mask.ip6);
>  
>  	if (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3))
>  		return 0;
> @@ -255,11 +268,8 @@ hmark_pkt_set_htuple_ipv4(const struct sk_buff *skb, struct hmark_tuple *t,
>  		}
>  	}
>  
> -	t->src = (__force u32) ip->saddr;
> -	t->dst = (__force u32) ip->daddr;
> -
> -	t->src &= info->src_mask.ip;
> -	t->dst &= info->dst_mask.ip;
> +	t->src = ip->saddr & info->src_mask.ip;
> +	t->dst = ip->daddr & info->dst_mask.ip;
>  
>  	if (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3))
>  		return 0;
> -- 
> 1.7.2.3
> 
> --
> To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [RFC 13/13] USB: Disable hub-initiated LPM for comms devices.
From: Greg Kroah-Hartman @ 2012-05-16 23:20 UTC (permalink / raw)
  To: Sarah Sharp
  Cc: linux-usb-u79uwXL29TY76Z2rM5mHXA, Alan Stern,
	linux-bluetooth-u79uwXL29TY76Z2rM5mHXA,
	gigaset307x-common-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f,
	netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-wireless-u79uwXL29TY76Z2rM5mHXA,
	ath9k-devel-xDcbHBWguxHbcTqmT+pZeQ,
	libertas-dev-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
	users-poMEt7QlJxcwIE2E9O76wjtx2kNaKg5H
In-Reply-To: <20120516224528.GA25936@xanatos>

On Wed, May 16, 2012 at 03:45:28PM -0700, Sarah Sharp wrote:
> [Resending with a smaller Cc list]
> 
> Hub-initiated LPM is not good for USB communications devices.  Comms
> devices should be able to tell when their link can go into a lower power
> state, because they know when an incoming transmission is finished.
> Ideally, these devices would slam their links into a lower power state,
> using the device-initiated LPM, after finishing the last packet of their
> data transfer.
> 
> If we enable the idle timeouts for the parent hubs to enable
> hub-initiated LPM, we will get a lot of useless LPM packets on the bus
> as the devices reject LPM transitions when they're in the middle of
> receiving data.  Worse, some devices might blindly accept the
> hub-initiated LPM and power down their radios while they're in the
> middle of receiving a transmission.
> 
> The Intel Windows folks are disabling hub-initiated LPM for all USB
> communications devices under a xHCI USB 3.0 host.  In order to keep
> the Linux behavior as close as possible to Windows, we need to do the
> same in Linux.

How is the USB core on Windows determining that LPM should be turned off
for these devices?  Surely they aren't modifying each individual driver
like this is, right?  Any way we also can do this in the core?

Or, turn it around the other way, and only enable it if we know it's
safe to do so, in each driver, but I guess that would be even messier.

thanks,

greg k-h
--
To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* [PATCH 6/7] netfilter: xt_CT: remove redundant header include
From: pablo @ 2012-05-16 23:06 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1337209604-3412-1-git-send-email-pablo@netfilter.org>

From: Eldad Zack <eldad@fogrefinery.com>

nf_conntrack_l4proto.h is included twice.

Signed-off-by: Eldad Zack <eldad@fogrefinery.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/xt_CT.c |    1 -
 1 file changed, 1 deletion(-)

diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index 3746d8b..a51de9b 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -17,7 +17,6 @@
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_ecache.h>
-#include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_timeout.h>
 #include <net/netfilter/nf_conntrack_zones.h>
 
-- 
1.7.10

^ permalink raw reply related

* [PATCH 0/7] netfilter updates for net-next (batch 3)
From: pablo @ 2012-05-16 23:06 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev

From: Pablo Neira Ayuso <pablo@netfilter.org>

Hi David,

The following patchset contains small updates for net-next, more relevantly:

* One fix for potential NULL dereference in xt_HMARK by Dan Carpenter.

* Conversion to use _ALL macro in xt_hashlimit as you suggested by
  Florian Westphal.

* One fix for timeout overflow from Jozsef Kadlecsik.

* Replace usage of modulus for hash calculation in xt_HMARK as you suggested
  from myself.

You can pull these changes from:

git://1984.lsi.us.es/net-next master

Thanks!

Dan Carpenter (1):
  netfilter: xt_HMARK: potential NULL dereference in get_inner_hdr()

Eldad Zack (1):
  netfilter: xt_CT: remove redundant header include

Florian Westphal (1):
  netfilter: xt_hashlimit: use _ALL macro to reject unknown flag bits

Jozsef Kadlecsik (1):
  netfilter: ipset: fix timeout value overflow bug

Pablo Neira Ayuso (3):
  netfilter: xt_HMARK: modulus is expensive for hash calculation
  netfilter: nf_ct_tcp: extend log message for invalid ignored packets
  netfilter: nf_ct_h323: fix usage of MODULE_ALIAS_NFCT_HELPER

 include/linux/netfilter/ipset/ip_set_timeout.h |    4 ++++
 include/linux/netfilter/xt_hashlimit.h         |    6 ++++--
 net/netfilter/nf_conntrack_h323_main.c         |    4 +++-
 net/netfilter/nf_conntrack_proto_tcp.c         |    3 ++-
 net/netfilter/xt_CT.c                          |    1 -
 net/netfilter/xt_HMARK.c                       |    4 ++--
 net/netfilter/xt_hashlimit.c                   |    2 +-
 net/netfilter/xt_set.c                         |   15 +++++++++++++--
 8 files changed, 29 insertions(+), 10 deletions(-)

-- 
1.7.10

^ permalink raw reply

* [PATCH 4/7] netfilter: nf_ct_tcp: extend log message for invalid ignored packets
From: pablo @ 2012-05-16 23:06 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1337209604-3412-1-git-send-email-pablo@netfilter.org>

From: Pablo Neira Ayuso <pablo@netfilter.org>

Extend log message if packets are ignored to include the TCP state, ie.
replace:

[ 3968.070196] nf_ct_tcp: invalid packet ignored IN= OUT= SRC=...

by:

[ 3968.070196] nf_ct_tcp: invalid packet ignored in state ESTABLISHED IN= OUT= SRC=...

This information is useful to know in what state we were while ignoring the
packet.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Acked-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
 net/netfilter/nf_conntrack_proto_tcp.c |    3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 4dfbfa8..21ff1a9 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -952,7 +952,8 @@ static int tcp_packet(struct nf_conn *ct,
 		spin_unlock_bh(&ct->lock);
 		if (LOG_INVALID(net, IPPROTO_TCP))
 			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
-				  "nf_ct_tcp: invalid packet ignored ");
+				  "nf_ct_tcp: invalid packet ignored in "
+				  "state %s ", tcp_conntrack_names[old_state]);
 		return NF_ACCEPT;
 	case TCP_CONNTRACK_MAX:
 		/* Invalid packet */
-- 
1.7.10

^ permalink raw reply related

* [PATCH 7/7] netfilter: nf_ct_h323: fix usage of MODULE_ALIAS_NFCT_HELPER
From: pablo @ 2012-05-16 23:06 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1337209604-3412-1-git-send-email-pablo@netfilter.org>

From: Pablo Neira Ayuso <pablo@netfilter.org>

ctnetlink uses the aliases that are created by MODULE_ALIAS_NFCT_HELPER
to auto-load the module based on the helper name. Thus, we have to use
RAS, Q.931 and H.245, not H.323.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conntrack_h323_main.c |    4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index 93c13eb..46d69d7 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -1830,4 +1830,6 @@ MODULE_AUTHOR("Jing Min Zhao <zhaojingmin@users.sourceforge.net>");
 MODULE_DESCRIPTION("H.323 connection tracking helper");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("ip_conntrack_h323");
-MODULE_ALIAS_NFCT_HELPER("h323");
+MODULE_ALIAS_NFCT_HELPER("RAS");
+MODULE_ALIAS_NFCT_HELPER("Q.931");
+MODULE_ALIAS_NFCT_HELPER("H.245");
-- 
1.7.10


^ permalink raw reply related

* [PATCH 5/7] netfilter: ipset: fix timeout value overflow bug
From: pablo @ 2012-05-16 23:06 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1337209604-3412-1-git-send-email-pablo@netfilter.org>

From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>

Large timeout parameters could result wrong timeout values due to
an overflow at msec to jiffies conversion (reported by Andreas Herz)

[ This patch was mangled by Pablo Neira Ayuso since David Laight and
  Eric Dumazet noticed that we were using hardcoded 1000 instead of
  MSEC_PER_SEC to calculate the timeout ]

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/ipset/ip_set_timeout.h |    4 ++++
 net/netfilter/xt_set.c                         |   15 +++++++++++++--
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/include/linux/netfilter/ipset/ip_set_timeout.h b/include/linux/netfilter/ipset/ip_set_timeout.h
index 4792320..41d9cfa 100644
--- a/include/linux/netfilter/ipset/ip_set_timeout.h
+++ b/include/linux/netfilter/ipset/ip_set_timeout.h
@@ -30,6 +30,10 @@ ip_set_timeout_uget(struct nlattr *tb)
 {
 	unsigned int timeout = ip_set_get_h32(tb);
 
+	/* Normalize to fit into jiffies */
+	if (timeout > UINT_MAX/MSEC_PER_SEC)
+		timeout = UINT_MAX/MSEC_PER_SEC;
+
 	/* Userspace supplied TIMEOUT parameter: adjust crazy size */
 	return timeout == IPSET_NO_TIMEOUT ? IPSET_NO_TIMEOUT - 1 : timeout;
 }
diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c
index 0ec8138..035960e 100644
--- a/net/netfilter/xt_set.c
+++ b/net/netfilter/xt_set.c
@@ -44,6 +44,14 @@ const struct ip_set_adt_opt n = {	\
 	.cmdflags = cfs,		\
 	.timeout = t,			\
 }
+#define ADT_MOPT(n, f, d, fs, cfs, t)	\
+struct ip_set_adt_opt n = {		\
+	.family	= f,			\
+	.dim = d,			\
+	.flags = fs,			\
+	.cmdflags = cfs,		\
+	.timeout = t,			\
+}
 
 /* Revision 0 interface: backward compatible with netfilter/iptables */
 
@@ -296,11 +304,14 @@ static unsigned int
 set_target_v2(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_set_info_target_v2 *info = par->targinfo;
-	ADT_OPT(add_opt, par->family, info->add_set.dim,
-		info->add_set.flags, info->flags, info->timeout);
+	ADT_MOPT(add_opt, par->family, info->add_set.dim,
+		 info->add_set.flags, info->flags, info->timeout);
 	ADT_OPT(del_opt, par->family, info->del_set.dim,
 		info->del_set.flags, 0, UINT_MAX);
 
+	/* Normalize to fit into jiffies */
+	if (add_opt.timeout > UINT_MAX/MSEC_PER_SEC)
+		add_opt.timeout = UINT_MAX/MSEC_PER_SEC;
 	if (info->add_set.index != IPSET_INVALID_ID)
 		ip_set_add(info->add_set.index, skb, par, &add_opt);
 	if (info->del_set.index != IPSET_INVALID_ID)
-- 
1.7.10


^ permalink raw reply related

* [PATCH 3/7] netfilter: xt_HMARK: modulus is expensive for hash calculation
From: pablo @ 2012-05-16 23:06 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1337209604-3412-1-git-send-email-pablo@netfilter.org>

From: Pablo Neira Ayuso <pablo@netfilter.org>

Use:

((u64)(HASH_VAL * HASH_SIZE)) >> 32

as suggested by David S. Miller.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/xt_HMARK.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netfilter/xt_HMARK.c b/net/netfilter/xt_HMARK.c
index 5817d03..0a96a43 100644
--- a/net/netfilter/xt_HMARK.c
+++ b/net/netfilter/xt_HMARK.c
@@ -109,7 +109,7 @@ hmark_hash(struct hmark_tuple *t, const struct xt_hmark_info *info)
 	hash = jhash_3words(t->src, t->dst, t->uports.v32, info->hashrnd);
 	hash = hash ^ (t->proto & info->proto_mask);
 
-	return (hash % info->hmodulus) + info->hoffset;
+	return (((u64)hash * info->hmodulus) >> 32) + info->hoffset;
 }
 
 static void
-- 
1.7.10


^ permalink raw reply related

* [PATCH 2/7] netfilter: xt_HMARK: potential NULL dereference in get_inner_hdr()
From: pablo @ 2012-05-16 23:06 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1337209604-3412-1-git-send-email-pablo@netfilter.org>

From: Dan Carpenter <dan.carpenter@oracle.com>

There is a typo in the error checking and "&&" was used instead of "||".
If skb_header_pointer() returns NULL then it leads to a NULL
dereference.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Acked-by: Hans Schillstrom <hans.schillstrom@ericsson.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/xt_HMARK.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netfilter/xt_HMARK.c b/net/netfilter/xt_HMARK.c
index 32fbd73..5817d03 100644
--- a/net/netfilter/xt_HMARK.c
+++ b/net/netfilter/xt_HMARK.c
@@ -223,7 +223,7 @@ static int get_inner_hdr(const struct sk_buff *skb, int iphsz, int *nhoff)
 
 	/* Not enough header? */
 	icmph = skb_header_pointer(skb, *nhoff + iphsz, sizeof(_ih), &_ih);
-	if (icmph == NULL && icmph->type > NR_ICMP_TYPES)
+	if (icmph == NULL || icmph->type > NR_ICMP_TYPES)
 		return 0;
 
 	/* Error message? */
-- 
1.7.10


^ permalink raw reply related

* [PATCH 1/7] netfilter: xt_hashlimit: use _ALL macro to reject unknown flag bits
From: pablo @ 2012-05-16 23:06 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1337209604-3412-1-git-send-email-pablo@netfilter.org>

From: Florian Westphal <fw@strlen.de>

David Miller says:
     The canonical way to validate if the set bits are in a valid
     range is to have a "_ALL" macro, and test:
     if (val & ~XT_HASHLIMIT_ALL)
         goto err;"

make it so.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/xt_hashlimit.h |    6 ++++--
 net/netfilter/xt_hashlimit.c           |    2 +-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/include/linux/netfilter/xt_hashlimit.h b/include/linux/netfilter/xt_hashlimit.h
index 05fe799..c42e52f 100644
--- a/include/linux/netfilter/xt_hashlimit.h
+++ b/include/linux/netfilter/xt_hashlimit.h
@@ -22,10 +22,12 @@ enum {
 	XT_HASHLIMIT_HASH_SPT = 1 << 3,
 	XT_HASHLIMIT_INVERT   = 1 << 4,
 	XT_HASHLIMIT_BYTES    = 1 << 5,
+};
 #ifdef __KERNEL__
-	XT_HASHLIMIT_MAX      = 1 << 6,
+#define XT_HASHLIMIT_ALL (XT_HASHLIMIT_HASH_DIP | XT_HASHLIMIT_HASH_DPT | \
+			  XT_HASHLIMIT_HASH_SIP | XT_HASHLIMIT_HASH_SPT | \
+			  XT_HASHLIMIT_INVERT | XT_HASHLIMIT_BYTES)
 #endif
-};
 
 struct hashlimit_cfg {
 	__u32 mode;	  /* bitmask of XT_HASHLIMIT_HASH_* */
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 5d5af1d..26a668a 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -647,7 +647,7 @@ static int hashlimit_mt_check(const struct xt_mtchk_param *par)
 			return -EINVAL;
 	}
 
-	if (info->cfg.mode >= XT_HASHLIMIT_MAX) {
+	if (info->cfg.mode & ~XT_HASHLIMIT_ALL) {
 		pr_info("Unknown mode mask %X, kernel too old?\n",
 						info->cfg.mode);
 		return -EINVAL;
-- 
1.7.10


^ permalink raw reply related

* [RFC 00/13] USB 3.0 Link Power Management (LPM) support
From: Sarah Sharp @ 2012-05-16 22:45 UTC (permalink / raw)
  To: Greg Kroah-Hartman
  Cc: linux-usb, Alan Stern, linux-bluetooth, gigaset307x-common,
	netdev, linux-wireless, ath9k-devel, libertas-dev, users

[Resending with a smaller Cc list]

The USB 3.0 bus specification added a new (optional) low power mechanism
called Link Power Management (LPM).  This mechanism takes advantage of
the fact that USB 3.0 packets are routed, not broadcast across the bus.
It does not help *at all* with USB 2.0 devices.

The idea is that when links between a parent hub and a child hub/device
become idle, they should be put into a lower power state.  When a USB
device goes into this lower power state, it may shut down PLLs or other
circuitry, since it knows it won't receive any transfers until its link
is driven into the active link state (U0).  Isochronous time stamp
packets are also not routed to links in a lower power state.  This
should (theoretically) save power.

There are two lower power link states, U1 and U2.  Each USB device
advertises the amount of time that it will take to power up and come
back to U0, in the U1/U2 Device Exit Latency values in the SuperSpeed
BOS descriptor.  Hubs may also incur an exit latency to bring a
downstream link into U0.  U2 is supposed to provide the deepest power
savings, but it will also have the highest exit latency.

Either a USB device or a parent hub can request that the link go into U1
or U2.  Hubs will only initiate U1 or U2 after the link has been idle
for a software-programmed timeout.

It may not make sense to enable hub-initiated LPM for communications
devices.  These types of devices often exist to send data to the host,
and the device usually knows when a transfer is likely to occur.  So
these devices should know when to enable device-initiated LPM.  On the
other hand, we don't want to introduce any latency for these devices, so
enabling hub-initiated LPM may not make sense.

Introduce a new usb_driver flag to disable hub-initiated USB 3.0 LPM,
and set it for all communications USB drivers.  Let me know if I've
missed any ones.

The policy for how to set the hub-initiated U1/U2 idle timeouts is very
host specific.  It depends on many factors, including endpoint types,
how many hub tiers are between the device and host, and some obscure
knowledge of the hardware's internal bus schedule policy.  Introduce
some new xHCI infrastructure, so that xHCI host vendors can add
functions to implement their own timeout policies.

The only U1/U2 timeout policy I know of is for the Intel xHCI host
controller.  I've tried to make the patchset as host-agnostic as
possible, and there's only about 100 lines of Intel-specific code in the
second to last patch.

This patchset still needs a bit of polish, but the code is stable.  I
need to retest it since refactoring the code a bit, but I think it's
ready for review.

I have verified with a USB 3.0 bus analyzer that the TI USB 3.0 hub
prototype I have does go into U1 and U2 at the request of the xHCI host
after the U1/U2 timeouts expire.  I see the host (or device, it's hard
to tell which on the Lecroy I have) send an LGO_U1 and the partner
respond with an LAU (link accept).

Unfortunately, there aren't very many other devices that do go into U1
or U2.  They don't break, in any obvious way, but when they are sent an
LGO_U1 or LGO_U2, they always refuse by sending an LXU (link reject).


The following changes since commit 87f0194060c191f4149193833636f4fec52cdfd1:

  xhci: Reset reserved command ring TRBs on cleanup. (2012-05-10 10:13:37 -0700)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/sarah/xhci.git lpm-usb3-v7

for you to fetch changes up to 3c634afbbe15598cdcbf77bb9393d22ad4bfa373:

  USB: Disable hub-initiated LPM for comms devices. (2012-05-16 14:23:40 -0700)

----------------------------------------------------------------
Sarah Sharp (13):
      xhci: Add roothub code to set U1/U2 timeouts.
      USB: Make sure to fetch the BOS desc for roothubs.
      USB: Refactor code to set LPM support flag.
      USB: Calculate USB 3.0 exit latencies for LPM.
      USB: Allow drivers to disable hub-initiated LPM.
      USB: Add support to enable/disable USB3 link states.
      USB: Disable USB 3.0 LPM in critical sections.
      xhci: Some Evaluate Context commands must succeed.
      xhci: Reserve one command for USB3 LPM disable.
      USB: Add macros for interrupt endpoint types.
      xhci: Add infrastructure for host-specific LPM policies.
      xhci: Add Intel U1/U2 timeout policy.
      USB: Disable hub-initiated LPM for comms devices.

 drivers/bluetooth/ath3k.c                     |    1 +
 drivers/bluetooth/bcm203x.c                   |    1 +
 drivers/bluetooth/bfusb.c                     |    1 +
 drivers/bluetooth/bpa10x.c                    |    1 +
 drivers/bluetooth/btusb.c                     |    1 +
 drivers/isdn/gigaset/bas-gigaset.c            |    1 +
 drivers/isdn/gigaset/usb-gigaset.c            |    1 +
 drivers/isdn/hardware/mISDN/hfcsusb.c         |    1 +
 drivers/isdn/hisax/hfc_usb.c                  |    1 +
 drivers/isdn/hisax/st5481_init.c              |    1 +
 drivers/net/usb/asix.c                        |    1 +
 drivers/net/usb/catc.c                        |    1 +
 drivers/net/usb/cdc-phonet.c                  |    1 +
 drivers/net/usb/cdc_eem.c                     |    1 +
 drivers/net/usb/cdc_ether.c                   |    1 +
 drivers/net/usb/cdc_ncm.c                     |    1 +
 drivers/net/usb/cdc_subset.c                  |    1 +
 drivers/net/usb/cx82310_eth.c                 |    1 +
 drivers/net/usb/dm9601.c                      |    1 +
 drivers/net/usb/gl620a.c                      |    1 +
 drivers/net/usb/hso.c                         |    1 +
 drivers/net/usb/int51x1.c                     |    1 +
 drivers/net/usb/ipheth.c                      |    1 +
 drivers/net/usb/kalmia.c                      |    3 +-
 drivers/net/usb/kaweth.c                      |    1 +
 drivers/net/usb/lg-vl600.c                    |    1 +
 drivers/net/usb/mcs7830.c                     |    1 +
 drivers/net/usb/net1080.c                     |    1 +
 drivers/net/usb/pegasus.c                     |    1 +
 drivers/net/usb/plusb.c                       |    1 +
 drivers/net/usb/qmi_wwan.c                    |    1 +
 drivers/net/usb/rndis_host.c                  |    1 +
 drivers/net/usb/rtl8150.c                     |    3 +-
 drivers/net/usb/sierra_net.c                  |    1 +
 drivers/net/usb/smsc75xx.c                    |    1 +
 drivers/net/usb/smsc95xx.c                    |    1 +
 drivers/net/usb/zaurus.c                      |    1 +
 drivers/net/wireless/at76c50x-usb.c           |    1 +
 drivers/net/wireless/ath/ath6kl/usb.c         |    1 +
 drivers/net/wireless/ath/ath9k/hif_usb.c      |    1 +
 drivers/net/wireless/ath/carl9170/usb.c       |    1 +
 drivers/net/wireless/brcm80211/brcmfmac/usb.c |    1 +
 drivers/net/wireless/libertas/if_usb.c        |    1 +
 drivers/net/wireless/libertas_tf/if_usb.c     |    1 +
 drivers/net/wireless/orinoco/orinoco_usb.c    |    1 +
 drivers/net/wireless/p54/p54usb.c             |    1 +
 drivers/net/wireless/rndis_wlan.c             |    1 +
 drivers/net/wireless/rt2x00/rt2500usb.c       |    1 +
 drivers/net/wireless/rt2x00/rt2800usb.c       |    1 +
 drivers/net/wireless/rt2x00/rt73usb.c         |    1 +
 drivers/net/wireless/rtl818x/rtl8187/dev.c    |    1 +
 drivers/net/wireless/rtlwifi/rtl8192cu/sw.c   |    1 +
 drivers/net/wireless/zd1201.c                 |    1 +
 drivers/net/wireless/zd1211rw/zd_usb.c        |    1 +
 drivers/usb/class/cdc-acm.c                   |    1 +
 drivers/usb/class/cdc-wdm.c                   |    1 +
 drivers/usb/core/driver.c                     |   71 +++
 drivers/usb/core/hcd.c                        |    9 +
 drivers/usb/core/hub.c                        |  629 ++++++++++++++++++++++++-
 drivers/usb/core/message.c                    |   47 ++
 drivers/usb/host/xhci-hub.c                   |   19 +
 drivers/usb/host/xhci-mem.c                   |   12 +
 drivers/usb/host/xhci-pci.c                   |   13 +
 drivers/usb/host/xhci-ring.c                  |    4 +-
 drivers/usb/host/xhci.c                       |  454 ++++++++++++++++++-
 drivers/usb/host/xhci.h                       |   14 +-
 include/linux/usb.h                           |   50 ++
 include/linux/usb/ch11.h                      |    2 +
 include/linux/usb/ch9.h                       |   56 +++
 include/linux/usb/hcd.h                       |    9 +
 70 files changed, 1440 insertions(+), 9 deletions(-)

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox