Netdev List
 help / color / mirror / Atom feed
* Re: [PATCH] rps: add flow director support
From: Tom Herbert @ 2010-04-12 13:34 UTC (permalink / raw)
  To: Changli Gao; +Cc: David S. Miller, netdev
In-Reply-To: <1271022140-3917-1-git-send-email-xiaosuo@gmail.com>

On Sun, Apr 11, 2010 at 2:42 PM, Changli Gao <xiaosuo@gmail.com> wrote:
> add rps flow director support
>
> with rps flow director, users can do weighted packet dispatching among CPUs.
> For example, CPU0:CPU1 is 1:3 for eth0's rx-0:
>
"Flow director" is a misnomer here in that it has no per flow
awareness, that is what RFS provides.  Please use a different name.

>  localhost linux # echo 4 > /sys/class/net/eth0/queues/rx-0/rps_flows
>  localhost linux # echo 0 > /sys/class/net/eth0/queues/rx-0/rps_flow_0
>  localhost linux # echo 1 > /sys/class/net/eth0/queues/rx-0/rps_flow_1
>  localhost linux # echo 1 > /sys/class/net/eth0/queues/rx-0/rps_flow_2
>  localhost linux # echo 1 > /sys/class/net/eth0/queues/rx-0/rps_flow_3
>
It might be better to put this in its own directory and also do it per
CPU instead of hash entry.  This should result in a lot fewer entries
and I'm not sure how you would deal with holes in the hash table for
unspecified entries.  Also, it would be nice not to have to specify a
number of entries.  Maybe something like:

localhost linux # echo 1 > /sys/class/net/eth0/queues/rx-0/rps_cpu_map/0
localhost linux # echo 3 > /sys/class/net/eth0/queues/rx-0/rps_cpu_map/1

To specify CPU 0 with weight 1, CPU 1 with weight 3.

> Signed-off-by: Changli Gao <xiaosuo@gmail.com>
> ----
>  net/core/net-sysfs.c |  176 +++++++++++++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 172 insertions(+), 4 deletions(-)
> diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
> index 1e7fdd6..d904610 100644
> --- a/net/core/net-sysfs.c
> +++ b/net/core/net-sysfs.c
> @@ -511,6 +511,109 @@ static struct sysfs_ops rx_queue_sysfs_ops = {
>        .store = rx_queue_attr_store,
>  };
>
> +static DEFINE_MUTEX(rps_map_lock);
> +
> +static ssize_t show_rps_flow(struct netdev_rx_queue *queue,
> +                            struct rx_queue_attribute *attribute, char *buf)
> +{
> +       unsigned long flowid;
> +       struct rps_map *map;
> +       u16 cpu;
> +
> +       strict_strtoul(attribute->attr.name + strlen("rps_flow_"), 10, &flowid);
> +       rcu_read_lock();
> +       map = rcu_dereference(queue->rps_map);
> +       if (map && flowid < map->len)
> +               cpu = map->cpus[flowid];
> +       else
> +               cpu = 0;
> +       rcu_read_unlock();
> +       return sprintf(buf, "%hu\n", cpu);
> +}
> +
> +static ssize_t store_rps_flow(struct netdev_rx_queue *queue,
> +                             struct rx_queue_attribute *attribute,
> +                             const char *buf, size_t len)
> +{
> +       unsigned long flowid, cpu;
> +       struct rps_map *map;
> +
> +       if (!capable(CAP_NET_ADMIN))
> +               return -EPERM;
> +
> +       if (strict_strtoul(buf, 0, &cpu))
> +               return -EINVAL;
> +       strict_strtoul(attribute->attr.name + strlen("rps_flow_"), 10, &flowid);
> +
> +       mutex_lock(&rps_map_lock);
> +       map = queue->rps_map;
> +       if (map && flowid < map->len)
> +               map->cpus[flowid] = cpu;
> +       mutex_unlock(&rps_map_lock);
> +
> +       return len;
> +}
> +
> +static struct rx_queue_attribute **rps_flow_attribute;
> +static int rps_flow_attribute_size;
> +
> +/* must be called with rps_map_lock locked */
> +static int update_rps_flow_files(struct kobject *kobj,
> +                                struct rps_map *old_map, struct rps_map *map)
> +{
> +       int i;
> +       int old_map_len = old_map ? old_map->len : 0;
> +       int map_len = map ? map->len : 0;
> +
> +       if (old_map_len >= map_len) {
> +               for (i = map_len; i < old_map_len; i++)
> +                       sysfs_remove_file(kobj, &rps_flow_attribute[i]->attr);
> +               return 0;
> +       }
> +
> +       if (map_len > rps_flow_attribute_size) {
> +               struct rx_queue_attribute **attrs;
> +               char name[sizeof("rps_flow_4294967295")];
> +               char *pname;
> +
> +               attrs = krealloc(rps_flow_attribute, map_len * sizeof(void *),
> +                                GFP_KERNEL);
> +               if (attrs == NULL)
> +                       return -ENOMEM;
> +               rps_flow_attribute = attrs;
> +               for (i = rps_flow_attribute_size; i < map_len; i++) {
> +                       rps_flow_attribute[i] = kmalloc(sizeof(**attrs),
> +                                                       GFP_KERNEL);
> +                       if (rps_flow_attribute[i] == NULL)
> +                               break;
> +                       sprintf(name, "rps_flow_%d", i);
> +                       pname = kstrdup(name, GFP_KERNEL);
> +                       if (pname == NULL) {
> +                               kfree(rps_flow_attribute[i]);
> +                               break;
> +                       }
> +                       rps_flow_attribute[i]->attr.name = pname;
> +                       rps_flow_attribute[i]->attr.mode = S_IRUGO | S_IWUSR;
> +                       rps_flow_attribute[i]->show = show_rps_flow;
> +                       rps_flow_attribute[i]->store = store_rps_flow;
> +               }
> +               rps_flow_attribute_size = i;
> +               if (i != map_len)
> +                       return -ENOMEM;
> +       }
> +
> +       for (i = old_map_len; i < map_len; i++) {
> +               if (sysfs_create_file(kobj, &rps_flow_attribute[i]->attr)) {
> +                       while (--i >= old_map_len)
> +                               sysfs_remove_file(kobj,
> +                                                 &rps_flow_attribute[i]->attr);
> +                       return -ENOMEM;
> +               }
> +       }
> +
> +       return 0;
> +}
> +
>  static ssize_t show_rps_map(struct netdev_rx_queue *queue,
>                            struct rx_queue_attribute *attribute, char *buf)
>  {
> @@ -555,7 +658,6 @@ ssize_t store_rps_map(struct netdev_rx_queue *queue,
>        struct rps_map *old_map, *map;
>        cpumask_var_t mask;
>        int err, cpu, i;
> -       static DEFINE_SPINLOCK(rps_map_lock);
>
>        if (!capable(CAP_NET_ADMIN))
>                return -EPERM;
> @@ -588,10 +690,15 @@ ssize_t store_rps_map(struct netdev_rx_queue *queue,
>                map = NULL;
>        }
>
> -       spin_lock(&rps_map_lock);
> +       mutex_lock(&rps_map_lock);
>        old_map = queue->rps_map;
> -       rcu_assign_pointer(queue->rps_map, map);
> -       spin_unlock(&rps_map_lock);
> +       err = update_rps_flow_files(&queue->kobj, old_map, map);
> +       if (!err)
> +               rcu_assign_pointer(queue->rps_map, map);
> +       mutex_unlock(&rps_map_lock);
> +
> +       if (err)
> +               return err;
>
>        if (old_map)
>                call_rcu(&old_map->rcu, rps_map_release);
> @@ -603,8 +710,69 @@ ssize_t store_rps_map(struct netdev_rx_queue *queue,
>  static struct rx_queue_attribute rps_cpus_attribute =
>        __ATTR(rps_cpus, S_IRUGO | S_IWUSR, show_rps_map, store_rps_map);
>
> +static ssize_t show_rps_flows(struct netdev_rx_queue *queue,
> +               struct rx_queue_attribute *attribute, char *buf)
> +{
> +       struct rps_map *map;
> +       unsigned int len;
> +
> +       rcu_read_lock();
> +       map = rcu_dereference(queue->rps_map);
> +       len = map ? map->len : 0;
> +       rcu_read_unlock();
> +       return sprintf(buf, "%u\n", len);
> +}
> +
> +static ssize_t store_rps_flows(struct netdev_rx_queue *queue,
> +                              struct rx_queue_attribute *attribute,
> +                              const char *buf, size_t len)
> +{
> +       struct rps_map *old_map, *map;
> +       unsigned long flows;
> +       int err;
> +
> +       if (!capable(CAP_NET_ADMIN))
> +               return -EPERM;
> +
> +       if (strict_strtoul(buf, 0, &flows))
> +               return -EINVAL;
> +       if (flows != 0) {
> +               map = kzalloc(max_t(unsigned, RPS_MAP_SIZE(flows),
> +                                   L1_CACHE_BYTES), GFP_KERNEL);
> +               if (map == NULL)
> +                       return -ENOMEM;
> +               map->len = flows;
> +       } else {
> +               map = NULL;
> +       }
> +
> +       mutex_lock(&rps_map_lock);
> +       old_map = queue->rps_map;
> +       err = update_rps_flow_files(&queue->kobj, old_map, map);
> +       if (!err) {
> +               if (old_map && map)
> +                       memcpy(map->cpus, old_map->cpus,
> +                              sizeof(map->cpus[0]) *
> +                              min_t(unsigned int, flows, old_map->len));
> +               rcu_assign_pointer(queue->rps_map, map);
> +       }
> +       mutex_unlock(&rps_map_lock);
> +
> +       if (err)
> +               return err;
> +
> +       if (old_map)
> +               call_rcu(&old_map->rcu, rps_map_release);
> +
> +       return len;
> +}
> +
> +static struct rx_queue_attribute rps_flows_attribute =
> +       __ATTR(rps_flows, S_IRUGO | S_IWUSR, show_rps_flows, store_rps_flows);
> +
>  static struct attribute *rx_queue_default_attrs[] = {
>        &rps_cpus_attribute.attr,
> +       &rps_flows_attribute.attr,
>        NULL
>  };
>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>

^ permalink raw reply

* hdlc_ppp: why no detach()?
From: Michael Barkowski @ 2010-04-12 14:15 UTC (permalink / raw)
  To: Krzysztof Halasa
  Cc: David S. Miller, Julia Lawall, netdev@vger.kernel.org,
	linux-kernel@vger.kernel.org

Hello Krzyztof,

I am looking at your hdlc_ppp code and I don't understand: why is there
not the equivalent of fr_detach() in there?

pc8300_drv:cpc_remove_one() frees netdevs quite confidently but I wonder
how it can be so sure that there are not skbs in hdlc_ppp's tx_queue
associated with those devices before freeing them....

Even if you wanted to switch a device from PPP to Frame Relay, I don't
see the method right now.  If I may ask, please, what am I missing?

If you agree there is a need for detach(), I would be happy to work on
it and make a submission.

thanks for your time,

-- 
Michael Barkowski
RuggedCom, Inc.


^ permalink raw reply

* Re: [PATCH] rps: add flow director support
From: Changli Gao @ 2010-04-12 14:27 UTC (permalink / raw)
  To: Tom Herbert; +Cc: David S. Miller, netdev
In-Reply-To: <z2o65634d661004120634h8336409er33af1fb75c2a9d1b@mail.gmail.com>

On Mon, Apr 12, 2010 at 9:34 PM, Tom Herbert <therbert@google.com> wrote:
> On Sun, Apr 11, 2010 at 2:42 PM, Changli Gao <xiaosuo@gmail.com> wrote:
>> add rps flow director support
>>
>> with rps flow director, users can do weighted packet dispatching among CPUs.
>> For example, CPU0:CPU1 is 1:3 for eth0's rx-0:
>>
> "Flow director" is a misnomer here in that it has no per flow
> awareness, that is what RFS provides.  Please use a different name.

Flow here is a bundle of flow, not the original meaning. How about
"rps_buckets" and "rps_bucket_x"?

>
>>  localhost linux # echo 4 > /sys/class/net/eth0/queues/rx-0/rps_flows
>>  localhost linux # echo 0 > /sys/class/net/eth0/queues/rx-0/rps_flow_0
>>  localhost linux # echo 1 > /sys/class/net/eth0/queues/rx-0/rps_flow_1
>>  localhost linux # echo 1 > /sys/class/net/eth0/queues/rx-0/rps_flow_2
>>  localhost linux # echo 1 > /sys/class/net/eth0/queues/rx-0/rps_flow_3
>>
> It might be better to put this in its own directory

I have thought that before, but since they control the same data in
kernel as rps_cpus does, I put them in the same directory.

> and also do it per
> CPU instead of hash entry.  This should result in a lot fewer entries
> and I'm not sure how you would deal with holes in the hash table for
> unspecified entries.  Also, it would be nice not to have to specify a
> number of entries.  Maybe something like:
>
> localhost linux # echo 1 > /sys/class/net/eth0/queues/rx-0/rps_cpu_map/0
> localhost linux # echo 3 > /sys/class/net/eth0/queues/rx-0/rps_cpu_map/1
>
> To specify CPU 0 with weight 1, CPU 1 with weight 3.
>

Your way is more simple and straightforward. My idea has it own advantage:
1. control the rate precision through rps_flows.
2. do dynamic weighted packet dispatching by migrating some flows from
some CPUs to other CPUs. During this operations, only the flows
migrated are affected, and OOO only occurs in these flows.

-- 
Regards,
Changli Gao(xiaosuo@gmail.com)

^ permalink raw reply

* Re: hdlc_ppp: why no detach()?
From: Michael Barkowski @ 2010-04-12 14:34 UTC (permalink / raw)
  To: Krzysztof Halasa
  Cc: David S. Miller, Julia Lawall, netdev@vger.kernel.org,
	linux-kernel@vger.kernel.org
In-Reply-To: <4BC32B00.1030600@ruggedcom.com>

Michael Barkowski wrote:
> Hello Krzyztof,
> 
> I am looking at your hdlc_ppp code and I don't understand: why is there
> not the equivalent of fr_detach() in there?
> 
> pc8300_drv:cpc_remove_one() frees netdevs quite confidently but I wonder
> how it can be so sure that there are not skbs in hdlc_ppp's tx_queue
> associated with those devices before freeing them....
> 

the above is the real danger I see - free the netdev, then ppp's timer
comes along and dequeues from tx_queue an skb with invalid device.

> Even if you wanted to switch a device from PPP to Frame Relay, I don't
> see the method right now.  If I may ask, please, what am I missing?
> 

Ok - this part was a momentary lapse on my part - please strike from
the record :)

> If you agree there is a need for detach(), I would be happy to work on
> it and make a submission.
> 
> thanks for your time,
> 


-- 
Michael Barkowski
905-482-4577

^ permalink raw reply

* Re: Strange packet drops with heavy firewalling
From: Benny Lyne Amorsen @ 2010-04-12 14:44 UTC (permalink / raw)
  To: zhigang gong; +Cc: netdev
In-Reply-To: <q2v40c9f5b21004120116p766df82dj88c6af4e4cad55f@mail.gmail.com>

man, 12 04 2010 kl. 16:16 +0800, skrev zhigang gong:

> How do you know the per CPU usage data, by oprofile? I'm just a little
> surprised with the result, as it shows your new core is running 10x
> faster than your old core :). 

Well the old server had only two CPU's plus hyperthreading, and the
CPU's were Pentium-4-based. Add a slow memory bus to that and you have a
fairly slow system. It's almost 5 years old, so Moore's law says 2**3
increase in number of transistors...

In about the same time frame Linux has gone from being able to fill
1Gbps ethernet to being able to fill 10Gbps ethernet 

> What's the average packet size?

I asked the switch (I can't find a handy equivalent to ifstat which
counts packets instead of bytes). The 5 minute average packet sizes seem
to vary in the range 450 to 550 bytes.

> If your packet size is 64 bytes, then the pps(packet per second) rate
> should be about 585Kpps. As I know, this value is almost the best
> result when the standard linux kernel is processing the networking
> traffic with a normal 1Gb ethernet card (without multi-queue support)
> on a intel box. If it is the case, to buy a better ethernet card with
> multi-queue support should be a good choice. Otherwise, it may not
> help. 

I am far from that, perhaps 1/10th of that. I do a lot more processing
on at least some of the packets though (the ones starting new flows).


/Benny



^ permalink raw reply

* Very Important
From: Jiang Jianmin @ 2010-04-12 14:56 UTC (permalink / raw)


Good Day,
 
I have a secured business proposal of $28,272,000.00.Contact me via my private email(cncn1_jiang_jianmin2011@yahoo.com.cn)if interested.
 
Mr Jiang Jianmin.

^ permalink raw reply

* Re: forcedeth driver hangs under heavy load
From: Eric Dumazet @ 2010-04-12 15:24 UTC (permalink / raw)
  To: stephen mulcahy; +Cc: netdev, Ben Hutchings, Ayaz Abdulla, 572201
In-Reply-To: <4BC31DDE.7010005@gmail.com>

Le lundi 12 avril 2010 à 14:19 +0100, stephen mulcahy a écrit :

> Does that help?

Well, yes, because it seems a TCP problem.

root@node20:~# tcpdump host node20 and node05
tcpdump: verbose output suppressed, use -v or -vv for full protocol decode
listening on eth0, link-type EN10MB (Ethernet), capture size 96 bytes
14:12:59.612626 IP node05.webstar.cnet.36295 > node20.ssh: Flags [S], seq 3677858646, win 5840, options [mss 1460,sackOK,TS val 1599534 ecr 0,nop,wscale 7], length 0
14:12:59.612656 IP node20.ssh > node05.webstar.cnet.36295: Flags [S.], seq 3610575850, ack 3677858647, win 5792, options [mss 1460,sackOK,TS val 1598775 ecr 1599534,nop,wscale 7], length 0
14:12:59.612718 IP node05.webstar.cnet.36295 > node20.ssh: Flags [.], ack 1, win 46, options [nop,nop,TS val 1599534 ecr 1598775], length 0
14:12:59.617434 IP node20.ssh > node05.webstar.cnet.36295: Flags [P.], seq 1:33, ack 1, win 46, options [nop,nop,TS val 1598776 ecr 1599534], length 32
14:12:59.617522 IP node05.webstar.cnet.36295 > node20.ssh: Flags [.], ack 33, win 46, options [nop,nop,TS val 1599535 ecr 1598776], length 0
14:12:59.617609 IP node05.webstar.cnet.36295 > node20.ssh: Flags [P.], seq 1:33, ack 33, win 46, options [nop,nop,TS val 1599535 ecr 1598776], length 32

All following xmitted frames are completely out of sync, this makes no sense.

Sequence number went backward.

14:12:59.820434 IP node05.webstar.cnet.36295 > node20.ssh: Flags [P.], seq 4294936586:4294936618, ack 2620194849, win 46, options [nop,nop,TS val 1599586 ecr 1598776], length 32
14:13:00.229069 IP node05.webstar.cnet.36295 > node20.ssh: Flags [P.], seq 4294961734:4294961766, ack 3928358945, win 46, options [nop,nop,TS val 1599688 ecr 1598776], length 32
14:13:01.044396 IP node05.webstar.cnet.36295 > node20.ssh: Flags [P.], seq 4294964167:4294964199, ack 410320929, win 46, options [nop,nop,TS val 1599892 ecr 1598776], length 32


14:13:02.676308 IP node05.webstar.cnet.36295 > node20.ssh: Flags [P.], seq 1:33, ack 33, win 46, options [nop,nop,TS val 1600300 ecr 1598776], length 32
14:13:05.940804 IP node05.webstar.cnet.36295 > node20.ssh: Flags [P.], seq 17294:17326, ack 3045851169, win 46, options [nop,nop,TS val 1601116 ecr 1598776], length 32
14:13:12.468484 IP node05.webstar.cnet.36295 > node20.ssh: Flags [P.], seq 17294:17326, ack 3045851169, win 46, options [nop,nop,TS val 1602748 ecr 1598776], length 32
14:13:25.523850 IP node05.webstar.cnet.36295 > node20.ssh: Flags [P.], seq 1:33, ack 33, win 46, options [nop,nop,TS val 1606012 ecr 1598776], length 32
14:13:51.633934 IP node05.webstar.cnet.36295 > node20.ssh: Flags [P.], seq 4294963190:4294963222, ack 9830433, win 46, options [nop,nop,TS val 1612540 ecr 1598776], length 32
14:14:43.855380 IP node05.webstar.cnet.36295 > node20.ssh: Flags [P.], seq 1:33, ack 33, win 46, options [nop,nop,TS val 1625596 ecr 1598776], length 32
14:14:59.617675 IP node20.ssh > node05.webstar.cnet.36295: Flags [F.], seq 33, ack 1, win 46, options [nop,nop,TS val 1628777 ecr 1599535], length 0
14:14:59.618202 IP node05.webstar.cnet.36295 > node20.ssh: Flags [FP.], seq 4294959654:4294960446, ack 3930456098, win 46, options [nop,nop,TS val 1629536 ecr 1628777], length 792
14:14:59.821527 IP node20.ssh > node05.webstar.cnet.36295: Flags [F.], seq 33, ack 1, win 46, options [nop,nop,TS val 1628828 ecr 1599535], length 0
14:14:59.821598 IP node05.webstar.cnet.36295 > node20.ssh: Flags [.], ack 34, win 46, options [nop,nop,TS val 1629587 ecr 1628828,nop,nop,sack 1 {33:34}], length 0

Do you have some netfilters rules ?



^ permalink raw reply

* Re: [Bonding-devel] [v3 Patch 2/3] bridge: make bridge support netpoll
From: Stephen Hemminger @ 2010-04-12 15:38 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Cong Wang, Jay Vosburgh, Neil Horman, netdev, Matt Mackall,
	bridge, linux-kernel, David Miller, Jeff Moyer, Andy Gospodarek,
	bonding-devel
In-Reply-To: <1271068737.16881.18.camel@edumazet-laptop>

On Mon, 12 Apr 2010 12:38:57 +0200
Eric Dumazet <eric.dumazet@gmail.com> wrote:

> Le lundi 12 avril 2010 à 18:37 +0800, Cong Wang a écrit :
> > Stephen Hemminger wrote:
> > > There is no protection on dev->priv_flags for SMP access.
> > > It would better bit value in dev->state if you are using it as control flag.
> > > 
> > > Then you could use 
> > > 			if (unlikely(test_and_clear_bit(__IN_NETPOLL, &skb->dev->state)))
> > > 				netpoll_send_skb(...)
> > > 
> > > 
> > 
> > Hmm, I think we can't use ->state here, it is not for this kind of purpose,
> > according to its comments.
> > 
> > Also, I find other usages of IFF_XXX flags of ->priv_flags are also using
> > &, | to set or clear the flags. So there must be some other things preventing
> > the race...
> 
> Yes, its RTNL that protects priv_flags changes, hopefully...
> 

The patch was not protecting priv_flags with RTNL.
For example..


@@ -308,7 +312,9 @@ static void netpoll_send_skb(struct netp
 		     tries > 0; --tries) {
 			if (__netif_tx_trylock(txq)) {
 				if (!netif_tx_queue_stopped(txq)) {
+					dev->priv_flags |= IFF_IN_NETPOLL;
 					status = ops->ndo_start_xmit(skb, dev);
+					dev->priv_flags &= ~IFF_IN_NETPOLL;
 					if (status == NETDEV_TX_OK)
 						txq_trans_update(txq);

^ permalink raw reply

* [PATCH 0/4] IPv6 addrconf related fixes
From: Stephen Hemminger @ 2010-04-12 15:41 UTC (permalink / raw)
  To: davem; +Cc: netdev

These apply to net-next, the problems do not exist in earlier kernels.
The problems started when I added changes to retain IPv6 addresses
when link goes down.

-- 


^ permalink raw reply

* [PATCH 1/4] IPv6: keep route for tentative address
From: Stephen Hemminger @ 2010-04-12 15:41 UTC (permalink / raw)
  To: David S. Miller, Tantilov, Emil S; +Cc: netdev
In-Reply-To: <20100412154130.397252857@vyatta.com>

[-- Attachment #1: ipv6-addrconf1.patch --]
[-- Type: text/plain, Size: 689 bytes --]

Recent changes preserve IPv6 address when link goes down (good).
But would cause address to point to dead dst entry (bad).
The simplest fix is to just not delete route if address is
being held for later use.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>


--- a/net/ipv6/addrconf.c	2010-04-11 12:19:37.938082190 -0700
+++ b/net/ipv6/addrconf.c	2010-04-11 12:25:05.349309074 -0700
@@ -4046,7 +4046,8 @@ static void __ipv6_ifa_notify(int event,
 			addrconf_leave_anycast(ifp);
 		addrconf_leave_solict(ifp->idev, &ifp->addr);
 		dst_hold(&ifp->rt->u.dst);
-		if (ip6_del_rt(ifp->rt))
+
+		if (ifp->dead && ip6_del_rt(ifp->rt))
 			dst_free(&ifp->rt->u.dst);
 		break;
 	}

-- 


^ permalink raw reply

* [PATCH 2/4] IPv6: keep tentative addresses in hash table
From: Stephen Hemminger @ 2010-04-12 15:41 UTC (permalink / raw)
  To: davem; +Cc: netdev
In-Reply-To: <20100412154130.397252857@vyatta.com>

[-- Attachment #1: ipv6-addrconf2.patch --]
[-- Type: text/plain, Size: 1105 bytes --]

When link goes down, want address to be preserved but in a tentative
state, therefore it has to stay in hash list.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>

--- a/net/ipv6/addrconf.c	2010-04-11 12:25:05.349309074 -0700
+++ b/net/ipv6/addrconf.c	2010-04-11 12:25:10.408996382 -0700
@@ -2703,17 +2703,18 @@ static int addrconf_ifdown(struct net_de
 			/* Flag it for later restoration when link comes up */
 			ifa->flags |= IFA_F_TENTATIVE;
 			in6_ifa_hold(ifa);
+			write_unlock_bh(&idev->lock);
 		} else {
 			list_del(&ifa->if_list);
 			ifa->dead = 1;
-		}
-		write_unlock_bh(&idev->lock);
+			write_unlock_bh(&idev->lock);
 
-		/* clear hash table */
-		spin_lock_bh(&addrconf_hash_lock);
-		hlist_del_init_rcu(&ifa->addr_lst);
-		__in6_ifa_put(ifa);
-		spin_unlock_bh(&addrconf_hash_lock);
+			/* clear hash table */
+			spin_lock_bh(&addrconf_hash_lock);
+			hlist_del_init_rcu(&ifa->addr_lst);
+			__in6_ifa_put(ifa);
+			spin_unlock_bh(&addrconf_hash_lock);
+		}
 
 		__ipv6_ifa_notify(RTM_DELADDR, ifa);
 		atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifa);

-- 


^ permalink raw reply

* [PATCH 3/4] ipv6: additional ref count for hash list unnecessary
From: Stephen Hemminger @ 2010-04-12 15:41 UTC (permalink / raw)
  To: davem; +Cc: netdev
In-Reply-To: <20100412154130.397252857@vyatta.com>

[-- Attachment #1: ipv6-addrconf3.patch --]
[-- Type: text/plain, Size: 1002 bytes --]

Since an address in hash list has to already have a ref count,
no additional ref count is needed. 

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>


--- a/net/ipv6/addrconf.c	2010-04-11 12:25:32.609002374 -0700
+++ b/net/ipv6/addrconf.c	2010-04-11 12:26:52.715246164 -0700
@@ -675,7 +675,6 @@ ipv6_add_addr(struct inet6_dev *idev, co
 	hash = ipv6_addr_hash(addr);
 
 	hlist_add_head_rcu(&ifa->addr_lst, &inet6_addr_lst[hash]);
-	in6_ifa_hold(ifa);
 	spin_unlock(&addrconf_hash_lock);
 
 	write_lock(&idev->lock);
@@ -723,7 +722,6 @@ static void ipv6_del_addr(struct inet6_i
 
 	spin_lock_bh(&addrconf_hash_lock);
 	hlist_del_init_rcu(&ifp->addr_lst);
-	__in6_ifa_put(ifp);
 	spin_unlock_bh(&addrconf_hash_lock);
 
 	write_lock_bh(&idev->lock);
@@ -2712,7 +2710,6 @@ static int addrconf_ifdown(struct net_de
 			/* clear hash table */
 			spin_lock_bh(&addrconf_hash_lock);
 			hlist_del_init_rcu(&ifa->addr_lst);
-			__in6_ifa_put(ifa);
 			spin_unlock_bh(&addrconf_hash_lock);
 		}
 

-- 


^ permalink raw reply

* [PATCH 4/4] IPv6: only notify protocols if address is compeletely gone
From: Stephen Hemminger @ 2010-04-12 15:41 UTC (permalink / raw)
  To: davem; +Cc: netdev
In-Reply-To: <20100412154130.397252857@vyatta.com>

[-- Attachment #1: ipv6-addrconf4.patch --]
[-- Type: text/plain, Size: 793 bytes --]

The notifier for address down should only be called if address is completely
gone, not just being marked as tentative on link transistion. The code
in net-next would case bonding/sctp/s390 to see address disappear on link
down, but they would never see it reappear on link up.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>

--- a/net/ipv6/addrconf.c	2010-04-11 14:34:36.919767724 -0700
+++ b/net/ipv6/addrconf.c	2010-04-11 14:35:00.533967946 -0700
@@ -2714,7 +2714,9 @@ static int addrconf_ifdown(struct net_de
 		}
 
 		__ipv6_ifa_notify(RTM_DELADDR, ifa);
-		atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifa);
+		if (ifa->dead)
+			atomic_notifier_call_chain(&inet6addr_chain,
+						   NETDEV_DOWN, ifa);
 		in6_ifa_put(ifa);
 
 		write_lock_bh(&idev->lock);

-- 


^ permalink raw reply

* Re: forcedeth driver hangs under heavy load
From: stephen mulcahy @ 2010-04-12 16:11 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: netdev, Ben Hutchings, Ayaz Abdulla, 572201
In-Reply-To: <1271085862.16881.38.camel@edumazet-laptop>

Eric Dumazet wrote:
> Le lundi 12 avril 2010 à 14:19 +0100, stephen mulcahy a écrit :
> 
> Do you have some netfilters rules ?
> 

Hi Eric,

I don't have any netfilters rules:

root@node34:~# for table in filter nat mangle raw; do iptables -t $table 
-L; done
Chain INPUT (policy ACCEPT)
target     prot opt source               destination

Chain FORWARD (policy ACCEPT)
target     prot opt source               destination

Chain OUTPUT (policy ACCEPT)
target     prot opt source               destination
Chain PREROUTING (policy ACCEPT)
target     prot opt source               destination

Chain POSTROUTING (policy ACCEPT)
target     prot opt source               destination

Chain OUTPUT (policy ACCEPT)
target     prot opt source               destination
Chain PREROUTING (policy ACCEPT)
target     prot opt source               destination

Chain INPUT (policy ACCEPT)
target     prot opt source               destination

Chain FORWARD (policy ACCEPT)
target     prot opt source               destination

Chain OUTPUT (policy ACCEPT)
target     prot opt source               destination

Chain POSTROUTING (policy ACCEPT)
target     prot opt source               destination
Chain PREROUTING (policy ACCEPT)
target     prot opt source               destination

Chain OUTPUT (policy ACCEPT)
target     prot opt source               destination


I re-ran this on the 2.6.32 kernel (with the 2.6.32 forcedeth module) 
just in case that was screwing something up.

node33 is in the unresponsive state this time. I'm running tcpdump on 
node34. on node33 I try to ssh to node34 (using ip address of node34). I 
note that I can ping between node33 and node34.

root@node34:~# tcpdump -v host node34 and node33
tcpdump: listening on eth0, link-type EN10MB (Ethernet), capture size 96 
bytes
17:05:19.622384 IP (tos 0x0, ttl 64, id 21435, offset 0, flags [DF], 
proto TCP (6), length 60)
     node33.webstar.cnet.43653 > node34.ssh: Flags [S], cksum 0xb994 
(correct), seq 1675314077, win 5840, options [mss 1460,sackOK,TS val 
331814 ecr 0,nop,wscale 7], length 0
17:05:19.622754 IP (tos 0x0, ttl 64, id 0, offset 0, flags [DF], proto 
TCP (6), length 60)
     node34.ssh > node33.webstar.cnet.43653: Flags [S.], cksum 0x9d81 
(correct), seq 1669769379, ack 1675314078, win 5792, options [mss 
1460,sackOK,TS val 331779 ecr 331814,nop,wscale 7], length 0
17:05:19.622813 IP (tos 0x0, ttl 64, id 21436, offset 0, flags [DF], 
proto TCP (6), length 52)
     node33.webstar.cnet.43653 > node34.ssh: Flags [.], cksum 0xe2bf 
(correct), ack 1, win 46, options [nop,nop,TS val 331814 ecr 331779], 
length 0
17:05:19.627666 IP (tos 0x0, ttl 64, id 47271, offset 0, flags [DF], 
proto TCP (6), length 84)
     node34.ssh > node33.webstar.cnet.43653: Flags [P.], seq 1:33, ack 
1, win 46, options [nop,nop,TS val 331780 ecr 331814], length 32
17:05:19.627748 IP (tos 0x0, ttl 64, id 21437, offset 0, flags [DF], 
proto TCP (6), length 52)
     node33.webstar.cnet.43653 > node34.ssh: Flags [.], cksum 0xe29c 
(correct), ack 33, win 46, options [nop,nop,TS val 331816 ecr 331780], 
length 0
17:05:19.627833 IP (tos 0x0, ttl 64, id 21438, offset 0, flags [DF], 
proto TCP (6), length 84, bad cksum 1f8a (->d189)!)
     node33.webstar.cnet.43653 > node34.ssh: Flags [P.], seq 
23413:23445, ack 2749038625, win 46, options [nop,nop,TS val 331816 ecr 
331780], length 32
17:05:19.831634 IP (tos 0x0, ttl 64, id 21439, offset 0, flags [DF], 
proto TCP (6), length 84, bad cksum d189 (->d188)!)
     node33.webstar.cnet.43653 > node34.ssh: Flags [P.], seq 1:33, ack 
33, win 46, options [nop,nop,TS val 331867 ecr 331780], length 32
17:05:20.239603 IP (tos 0x0, ttl 64, id 21440, offset 0, flags [DF], 
proto TCP (6), length 84, bad cksum 15c6 (->d187)!)
     node33.webstar.cnet.43653 > node34.ssh: Flags [P.], seq 
30492:30524, ack 809893921, win 46, options [nop,nop,TS val 331969 ecr 
331780], length 32
17:05:21.055534 IP (tos 0x0, ttl 64, id 21441, offset 0, flags [DF], 
proto TCP (6), length 84, bad cksum d187 (->d186)!)
     node33.webstar.cnet.43653 > node34.ssh: Flags [P.], seq 1:33, ack 
33, win 46, options [nop,nop,TS val 332173 ecr 331780], length 32
17:05:22.687386 IP (tos 0x0, ttl 64, id 21442, offset 0, flags [DF], 
proto TCP (6), length 84, bad cksum d186 (->d185)!)
     node33.webstar.cnet.43653 > node34.ssh: Flags [P.], seq 1:33, ack 
33, win 46, options [nop,nop,TS val 332581 ecr 331780], length 32
17:05:25.950935 IP (tos 0x0, ttl 64, id 21443, offset 0, flags [DF], 
proto TCP (6), length 84, bad cksum 15c4 (->d184)!)
     node33.webstar.cnet.43653 > node34.ssh: Flags [P.], seq 
30492:30524, ack 809893921, win 46, options [nop,nop,TS val 333397 ecr 
331780], length 32
17:05:32.478527 IP (tos 0x0, ttl 64, id 21444, offset 0, flags [DF], 
proto TCP (6), length 84, bad cksum c01 (->d183)!)
     node33.webstar.cnet.43653 > node34.ssh: Flags [P.], seq 
43997:44029, ack 1311047713, win 46, options [nop,nop,TS val 335029 ecr 
331780], length 32
17:05:45.533370 IP (tos 0x0, ttl 64, id 21445, offset 0, flags [DF], 
proto TCP (6), length 84, bad cksum 23d (->d182)!)
     node33.webstar.cnet.43653 > node34.ssh: Flags [P.], seq 3348:3380, 
ack 4054450209, win 46, options [nop,nop,TS val 338293 ecr 331780], 
length 32
17:06:08.719187 IP (tos 0x0, ttl 64, id 27660, offset 0, flags [DF], 
proto TCP (6), length 1500, bad cksum 5360 (->b3b3)!)
     node33.webstar.cnet.50060 > node34.35725: Flags [.], seq 
1203473738:1203475186, ack 1191452767, win 54, options [nop,nop,TS val 
344089 ecr 256770], length 1448
17:06:11.643080 IP (tos 0x0, ttl 64, id 21446, offset 0, flags [DF], 
proto TCP (6), length 84, bad cksum e4f2 (->d181)!)
     node33.webstar.cnet.43653 > node34.ssh: Flags [P.], seq 
47331:47363, ack 4110811169, win 46, options [nop,nop,TS val 344821 ecr 
331780], length 32
17:06:13.715233 ARP, Ethernet (len 6), IPv4 (len 4), Request who-has 
node34 tell node33.webstar.cnet, length 46
17:06:13.715257 ARP, Ethernet (len 6), IPv4 (len 4), Reply node34 is-at 
00:30:48:f0:06:72 (oui Unknown), length 28
17:07:03.866492 IP (tos 0x0, ttl 64, id 21447, offset 0, flags [DF], 
proto TCP (6), length 84, bad cksum b413 (->d180)!)
     node33.webstar.cnet.43653 > node34.ssh: Flags [P.], seq 
28939:28971, ack 1913782305, win 46, options [nop,nop,TS val 357877 ecr 
331780], length 32
17:07:08.862055 ARP, Ethernet (len 6), IPv4 (len 4), Request who-has 
node34 tell node33.webstar.cnet, length 46
17:07:08.862370 ARP, Ethernet (len 6), IPv4 (len 4), Reply node34 is-at 
00:30:48:f0:06:72 (oui Unknown), length 28
17:07:19.627910 IP (tos 0x0, ttl 64, id 47272, offset 0, flags [DF], 
proto TCP (6), length 52)
     node34.ssh > node33.webstar.cnet.43653: Flags [F.], cksum 0x6d6b 
(correct), seq 33, ack 1, win 46, options [nop,nop,TS val 361780 ecr 
331816], length 0
17:07:19.628403 IP (tos 0x0, ttl 64, id 21448, offset 0, flags [DF], 
proto TCP (6), length 844, bad cksum aa4d (->ce87)!)
     node33.webstar.cnet.43653 > node34.ssh: Flags [FP.], seq 
20399:21191, ack 2356871202, win 46, options [nop,nop,TS val 361818 ecr 
361780], length 792
17:07:19.833456 IP (tos 0x0, ttl 64, id 47273, offset 0, flags [DF], 
proto TCP (6), length 52)
     node34.ssh > node33.webstar.cnet.43653: Flags [F.], cksum 0x6d37 
(correct), seq 33, ack 1, win 46, options [nop,nop,TS val 361832 ecr 
331816], length 0
17:07:19.833517 IP (tos 0x0, ttl 64, id 21449, offset 0, flags [DF], 
proto TCP (6), length 64)
     node33.webstar.cnet.43653 > node34.ssh: Flags [.], cksum 0xa5e9 
(correct), ack 34, win 46, options [nop,nop,TS val 361870 ecr 
361832,nop,nop,sack 1 {33:34}], length 0

At this point, I see a "Connection closed by 10.141.0.34" message on 
node33 (from where I am attempting to ssh).

Again, if I ifdown on node33 and ifup again - I can then see from node33 
to node34 without problems.

-stephen

^ permalink raw reply

* Re: NULL pointer dereference panic in stable (2.6.33.2), amd64
From: Denys Fedorysychenko @ 2010-04-12 16:11 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Krishna Kumar2, David Miller, netdev
In-Reply-To: <1271064703.16881.16.camel@edumazet-laptop>

[-- Attachment #1: Type: Text/Plain, Size: 163 bytes --]

On Monday 12 April 2010 12:31:43 Eric Dumazet wrote:
.
Seems problem still remain. Patched kernel, but paniced now.
Btw, i dont have any multiqueue card, i think.

[-- Attachment #2: x.txt --]
[-- Type: text/plain, Size: 12174 bytes --]

Apr 12 18:46:58 80.83.17.1 dropbear[4843]: exit before auth: Disconnect received
Apr 12 18:46:59 80.83.17.1 dropbear[4845]: Child connection from 82.113.44.186:48692
Apr 12 18:46:59 80.83.17.1 dropbear[4844]: exit before auth: Disconnect received
Apr 12 18:46:59 80.83.17.1 kernel: [12598.956375] BUG: unable to handle kernel NULL pointer dereference at (null)
Apr 12 18:46:59 80.83.17.1 kernel: [12598.956571] IP: [<ffffffff811e587f>] dev_queue_xmit+0x28c/0x46d
Apr 12 18:46:59 80.83.17.1 kernel: [12598.956762] PGD 21debc067 PUD 21c881067 PMD 0 
Apr 12 18:46:59 80.83.17.1 kernel: [12598.956947] Oops: 0000 [#1] SMP 
Apr 12 18:46:59 80.83.17.1 kernel: [12598.957126] last sysfs file: /sys/devices/virtual/vc/vcs3/dev
Apr 12 18:46:59 80.83.17.1 kernel: [12598.957311] CPU 0 
Apr 12 18:46:59 80.83.17.1 kernel: [12598.957342] Pid: 0, comm: swapper Not tainted 2.6.33.2-build-0052test-64 #2         /        
Apr 12 18:46:59 80.83.17.1 kernel: [12598.957342] RIP: 0010:[<ffffffff811e587f>]  [<ffffffff811e587f>] dev_queue_xmit+0x28c/0x46d
Apr 12 18:46:59 80.83.17.1 kernel: [12598.957342] RSP: 0000:ffff880028203a30  EFLAGS: 00010202
Apr 12 18:46:59 80.83.17.1 kernel: [12598.957342] RAX: 0000000000002000 RBX: 0000000000000000 RCX: ffff880209d8a900
Apr 12 18:46:59 80.83.17.1 kernel: [12598.957342] RDX: ffff88021d870000 RSI: 0000000000000000 RDI: ffff88020a7b48e8
Apr 12 18:46:59 80.83.17.1 kernel: [12598.957342] RBP: ffff880028203a60 R08: ffff88021c8be89c R09: ffff88021c8bec00
Apr 12 18:46:59 80.83.17.1 kernel: [12598.957342] R10: dead000000200200 R11: dead000000100100 R12: ffff88021f98a880
Apr 12 18:46:59 80.83.17.1 kernel: [12598.957342] R13: ffff88021d5c0900 R14: ffff88020a7b48e8 R15: ffff88021cbad000
Apr 12 18:46:59 80.83.17.1 kernel: [12598.957342] FS:  0000000000000000(0000) GS:ffff880028200000(0000) knlGS:0000000000000000
Apr 12 18:46:59 80.83.17.1 kernel: [12598.957342] CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
Apr 12 18:46:59 80.83.17.1 kernel: [12598.957342] CR2: 0000000000000000 CR3: 000000021c9d8000 CR4: 00000000000006f0
Apr 12 18:46:59 80.83.17.1 kernel: [12598.957342] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
Apr 12 18:46:59 80.83.17.1 kernel: [12598.957342] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Apr 12 18:46:59 80.83.17.1 kernel: [12598.957342] Process swapper (pid: 0, threadinfo ffffffff81392000, task ffffffff813a1020)
Apr 12 18:46:59 80.83.17.1 kernel: [12598.957342] Stack:
Apr 12 18:46:59 80.83.17.1 kernel: [12598.957342]  ffff88021d870000 ffff88021d5c0900 0000000000000042 ffff88021d5c0900
Apr 12 18:46:59 80.83.17.1 kernel: [12598.957342] <0> ffff88021cbad000 ffff88021cbad000 ffff880028203a80 ffffffffa01c12a9
Apr 12 18:46:59 80.83.17.1 kernel: [12598.957342] <0> 0000000000000000 ffff88020a7b48e8 ffff880028203ad0 ffffffff811e540e
Apr 12 18:46:59 80.83.17.1 kernel: [12598.957342] Call Trace:
Apr 12 18:46:59 80.83.17.1 kernel: [12598.957342]  <IRQ> 
Apr 12 18:46:59 80.83.17.1 kernel: [12598.957342]  [<ffffffffa01c12a9>] vlan_dev_hwaccel_hard_start_xmit+0x68/0x86 [8021q]
Apr 12 18:46:59 80.83.17.1 kernel: [12598.957342]  [<ffffffff811e540e>] dev_hard_start_xmit+0x232/0x304
Apr 12 18:46:59 80.83.17.1 kernel: [12598.957342]  [<ffffffff811f648a>] sch_direct_xmit+0x5d/0x16b
Apr 12 18:46:59 80.83.17.1 kernel: [12598.957342]  [<ffffffff811f6654>] __qdisc_run+0xbc/0xdc
Apr 12 18:46:59 80.83.17.1 kernel: [12598.957342]  [<ffffffff811e5939>] dev_queue_xmit+0x346/0x46d
Apr 12 18:46:59 80.83.17.1 kernel: [12598.957342]  [<ffffffff8120a384>] ip_finish_output2+0x1c2/0x206
Apr 12 18:46:59 80.83.17.1 kernel: [12598.957342]  [<ffffffff8120a430>] ip_finish_output+0x68/0x6a
Apr 12 18:46:59 80.83.17.1 kernel: [12598.957342]  [<ffffffff8120a4d2>] ip_output+0xa0/0xa5
Apr 12 18:46:59 80.83.17.1 kernel: [12598.957342]  [<ffffffff81206d2e>] ip_forward_finish+0x2e/0x32
Apr 12 18:46:59 80.83.17.1 kernel: [12598.957342]  [<ffffffff81206ff4>] ip_forward+0x2c2/0x322
Apr 12 18:46:59 80.83.17.1 kernel: [12598.957342]  [<ffffffff81205ae0>] ip_rcv_finish+0x2f0/0x30a
Apr 12 18:46:59 80.83.17.1 kernel: [12598.957342]  [<ffffffff81205d77>] ip_rcv+0x27d/0x2a4
Apr 12 18:46:59 80.83.17.1 kernel: [12598.957342]  [<ffffffff8124ad48>] ? vlan_hwaccel_do_receive+0x2b/0xda
Apr 12 18:46:59 80.83.17.1 kernel: [12598.957342]  [<ffffffff811e47b6>] netif_receive_skb+0x450/0x475
Apr 12 18:46:59 80.83.17.1 kernel: [12598.957342]  [<ffffffff811e4909>] napi_skb_finish+0x24/0x3b
Apr 12 18:46:59 80.83.17.1 kernel: [12598.957342]  [<ffffffff8124b01b>] vlan_gro_receive+0x7c/0x81
Apr 12 18:46:59 80.83.17.1 kernel: [12598.957342]  [<ffffffffa015d6c5>] e1000_receive_skb+0x4a/0x65 [e1000e]
Apr 12 18:46:59 80.83.17.1 kernel: [12598.957342]  [<ffffffffa015d8cb>] e1000_clean_rx_irq+0x1eb/0x29c [e1000e]
Apr 12 18:46:59 80.83.17.1 kernel: [12598.957342]  [<ffffffffa015ebfb>] e1000_clean+0x75/0x22e [e1000e]
Apr 12 18:46:59 80.83.17.1 kernel: [12598.957342]  [<ffffffffa0234d6c>] ? hfsc_dequeue+0x171/0x2a6 [sch_hfsc]
Apr 12 18:46:59 80.83.17.1 kernel: [12598.957342]  [<ffffffff811e4e56>] net_rx_action+0xa7/0x17a
Apr 12 18:46:59 80.83.17.1 kernel: [12598.957342]  [<ffffffff81039670>] __do_softirq+0x96/0x11a
Apr 12 18:46:59 80.83.17.1 kernel: [12598.957342]  [<ffffffff810037cc>] call_softirq+0x1c/0x28
Apr 12 18:46:59 80.83.17.1 kernel: [12598.957342]  [<ffffffff81005543>] do_softirq+0x33/0x68
Apr 12 18:46:59 80.83.17.1 kernel: [12598.957342]  [<ffffffff81039407>] irq_exit+0x36/0x75
Apr 12 18:46:59 80.83.17.1 kernel: [12598.957342]  [<ffffffff81004c3e>] do_IRQ+0xaa/0xc1
Apr 12 18:46:59 80.83.17.1 kernel: [12598.957342]  [<ffffffff8125ba93>] ret_from_intr+0x0/0xa
Apr 12 18:46:59 80.83.17.1 kernel: [12598.957342]  <EOI> 
Apr 12 18:46:59 80.83.17.1 kernel: [12598.957342]  [<ffffffff8100a0c7>] ? mwait_idle+0x66/0x6b
Apr 12 18:46:59 80.83.17.1 kernel: [12598.957342]  [<ffffffff81001d24>] ? enter_idle+0x20/0x22
Apr 12 18:46:59 80.83.17.1 kernel: [12598.957342]  [<ffffffff81001d7b>] cpu_idle+0x55/0x8d
Apr 12 18:46:59 80.83.17.1 kernel: [12598.957342]  [<ffffffff8124bba5>] rest_init+0x79/0x7b
Apr 12 18:46:59 80.83.17.1 kernel: [12598.957342]  [<ffffffff813fca70>] start_kernel+0x362/0x36d
Apr 12 18:46:59 80.83.17.1 kernel: [12598.957342]  [<ffffffff813fc0a8>] x86_64_start_reservations+0xa5/0xa9
Apr 12 18:46:59 80.83.17.1 kernel: [12598.957342]  [<ffffffff813fc189>] x86_64_start_kernel+0xdd/0xe4
Apr 12 18:46:59 80.83.17.1 kernel: [12598.957342] Code: e2 48 8b 55 d0 49 c1 e4 07 66 41 8b 86 a6 00 00 00 4c 03 a2 00 03 00 00 80 e4 cf 80 cc 20 49 8b 5c 24 08 66 41 89 86 a6 00 00 00 <48> 83 3b 00 0f 84 bb 00 00 00 4c 8d ab 9c 00 00 00 4c 89 ef e8 
Apr 12 18:46:59 80.83.17.1 kernel: [12598.957342] RIP  [<ffffffff811e587f>] dev_queue_xmit+0x28c/0x46d
Apr 12 18:46:59 80.83.17.1 kernel: [12598.957342]  RSP <ffff880028203a30>
Apr 12 18:46:59 80.83.17.1 kernel: [12598.957342] CR2: 0000000000000000
Apr 12 18:46:59 80.83.17.1 kernel: [12598.974856] ---[ end trace 739e5480c8ab784f ]---
Apr 12 18:46:59 80.83.17.1 kernel: [12598.975082] Kernel panic - not syncing: Fatal exception in interrupt
Apr 12 18:46:59 80.83.17.1 kernel: [12598.975311] Pid: 0, comm: swapper Tainted: G      D    2.6.33.2-build-0052test-64 #2
Apr 12 18:46:59 80.83.17.1 kernel: [12598.975706] Call Trace:
Apr 12 18:46:59 80.83.17.1 kernel: [12598.975920]  <IRQ>  [<ffffffff81259753>] panic+0xa0/0x161
Apr 12 18:46:59 80.83.17.1 kernel: [12598.976200]  [<ffffffff81003293>] ? apic_timer_interrupt+0x13/0x20
Apr 12 18:46:59 80.83.17.1 kernel: [12598.976431]  [<ffffffff81035673>] ? kmsg_dump+0x112/0x12c
Apr 12 18:46:59 80.83.17.1 kernel: [12598.976657]  [<ffffffff81006651>] oops_end+0xaa/0xba
Apr 12 18:46:59 80.83.17.1 kernel: [12598.976882]  [<ffffffff8101e653>] no_context+0x1f3/0x202
Apr 12 18:46:59 80.83.17.1 kernel: [12598.977113]  [<ffffffff8101e81c>] __bad_area_nosemaphore+0x1ba/0x1e0
Apr 12 18:46:59 80.83.17.1 kernel: [12598.977347]  [<ffffffff8113f8b3>] ? swiotlb_map_page+0x0/0xd5
Apr 12 18:46:59 80.83.17.1 kernel: [12598.977577]  [<ffffffffa015c55a>] ? pci_map_single+0x8a/0x99 [e1000e]
Apr 12 18:46:59 80.83.17.1 kernel: [12598.977806]  [<ffffffff8113f0c0>] ? swiotlb_dma_mapping_error+0x18/0x25
Apr 12 18:46:59 80.83.17.1 kernel: [12598.978045]  [<ffffffffa015a2e0>] ? pci_dma_mapping_error+0x31/0x3d [e1000e]
Apr 12 18:46:59 80.83.17.1 kernel: [12598.978282]  [<ffffffffa015cc37>] ? e1000_xmit_frame+0x6ce/0xa43 [e1000e]
Apr 12 18:46:59 80.83.17.1 kernel: [12598.978513]  [<ffffffff8101e850>] bad_area_nosemaphore+0xe/0x10
Apr 12 18:46:59 80.83.17.1 kernel: [12598.978741]  [<ffffffff8101eb32>] do_page_fault+0x114/0x24a
Apr 12 18:46:59 80.83.17.1 kernel: [12598.978967]  [<ffffffff8125bc9f>] page_fault+0x1f/0x30
Apr 12 18:46:59 80.83.17.1 kernel: [12598.979196]  [<ffffffff811e587f>] ? dev_queue_xmit+0x28c/0x46d
Apr 12 18:46:59 80.83.17.1 kernel: [12598.979426]  [<ffffffffa01c12a9>] vlan_dev_hwaccel_hard_start_xmit+0x68/0x86 [8021q]
Apr 12 18:46:59 80.83.17.1 kernel: [12598.979821]  [<ffffffff811e540e>] dev_hard_start_xmit+0x232/0x304
Apr 12 18:46:59 80.83.17.1 kernel: [12598.980055]  [<ffffffff811f648a>] sch_direct_xmit+0x5d/0x16b
Apr 12 18:46:59 80.83.17.1 kernel: [12598.980284]  [<ffffffff811f6654>] __qdisc_run+0xbc/0xdc
Apr 12 18:46:59 80.83.17.1 kernel: [12598.980514]  [<ffffffff811e5939>] dev_queue_xmit+0x346/0x46d
Apr 12 18:46:59 80.83.17.1 kernel: [12598.980740]  [<ffffffff8120a384>] ip_finish_output2+0x1c2/0x206
Apr 12 18:46:59 80.83.17.1 kernel: [12598.980966]  [<ffffffff8120a430>] ip_finish_output+0x68/0x6a
Apr 12 18:46:59 80.83.17.1 kernel: [12598.981197]  [<ffffffff8120a4d2>] ip_output+0xa0/0xa5
Apr 12 18:46:59 80.83.17.1 kernel: [12598.981427]  [<ffffffff81206d2e>] ip_forward_finish+0x2e/0x32
Apr 12 18:46:59 80.83.17.1 kernel: [12598.981654]  [<ffffffff81206ff4>] ip_forward+0x2c2/0x322
Apr 12 18:46:59 80.83.17.1 kernel: [12598.981880]  [<ffffffff81205ae0>] ip_rcv_finish+0x2f0/0x30a
Apr 12 18:46:59 80.83.17.1 kernel: [12598.982111]  [<ffffffff81205d77>] ip_rcv+0x27d/0x2a4
Apr 12 18:46:59 80.83.17.1 kernel: [12598.982337]  [<ffffffff8124ad48>] ? vlan_hwaccel_do_receive+0x2b/0xda
Apr 12 18:46:59 80.83.17.1 kernel: [12598.982566]  [<ffffffff811e47b6>] netif_receive_skb+0x450/0x475
Apr 12 18:46:59 80.83.17.1 kernel: [12598.982793]  [<ffffffff811e4909>] napi_skb_finish+0x24/0x3b
Apr 12 18:46:59 80.83.17.1 kernel: [12598.983025]  [<ffffffff8124b01b>] vlan_gro_receive+0x7c/0x81
Apr 12 18:46:59 80.83.17.1 kernel: [12598.983260]  [<ffffffffa015d6c5>] e1000_receive_skb+0x4a/0x65 [e1000e]
Apr 12 18:46:59 80.83.17.1 kernel: [12598.983492]  [<ffffffffa015d8cb>] e1000_clean_rx_irq+0x1eb/0x29c [e1000e]
Apr 12 18:46:59 80.83.17.1 kernel: [12598.983727]  [<ffffffffa015ebfb>] e1000_clean+0x75/0x22e [e1000e]
Apr 12 18:46:59 80.83.17.1 kernel: [12598.983955]  [<ffffffffa0234d6c>] ? hfsc_dequeue+0x171/0x2a6 [sch_hfsc]
Apr 12 18:46:59 80.83.17.1 kernel: [12598.984190]  [<ffffffff811e4e56>] net_rx_action+0xa7/0x17a
Apr 12 18:46:59 80.83.17.1 kernel: [12598.984416]  [<ffffffff81039670>] __do_softirq+0x96/0x11a
Apr 12 18:46:59 80.83.17.1 kernel: [12598.984642]  [<ffffffff810037cc>] call_softirq+0x1c/0x28
Apr 12 18:46:59 80.83.17.1 kernel: [12598.984866]  [<ffffffff81005543>] do_softirq+0x33/0x68
Apr 12 18:46:59 80.83.17.1 kernel: [12598.985097]  [<ffffffff81039407>] irq_exit+0x36/0x75
Apr 12 18:46:59 80.83.17.1 kernel: [12598.985323]  [<ffffffff81004c3e>] do_IRQ+0xaa/0xc1
Apr 12 18:46:59 80.83.17.1 kernel: [12598.985546]  [<ffffffff8125ba93>] ret_from_intr+0x0/0xa
Apr 12 18:46:59 80.83.17.1 kernel: [12598.985770]  <EOI>  [<ffffffff8100a0c7>] ? mwait_idle+0x66/0x6b
Apr 12 18:46:59 80.83.17.1 kernel: [12598.986048]  [<ffffffff81001d24>] ? enter_idle+0x20/0x22
Apr 12 18:46:59 80.83.17.1 kernel: [12598.986284]  [<ffffffff81001d7b>] cpu_idle+0x55/0x8d
Apr 12 18:46:59 80.83.17.1 kernel: [12598.986507]  [<ffffffff8124bba5>] rest_init+0x79/0x7b
Apr 12 18:46:59 80.83.17.1 kernel: [12598.986730]  [<ffffffff813fca70>] start_kernel+0x362/0x36d
Apr 12 18:46:59 80.83.17.1 kernel: [12598.986955]  [<ffffffff813fc0a8>] x86_64_start_reservations+0xa5/0xa9
Apr 12 18:46:59 80.83.17.1 kernel: [12598.987189]  [<ffffffff813fc189>] x86_64_start_kernel+0xdd/0xe4

^ permalink raw reply

* Re: [PATCH] iproute2: add option to build m_xt as a tc module.
From: Stephen Hemminger @ 2010-04-12 15:33 UTC (permalink / raw)
  To: Andreas Henriksson; +Cc: netdev
In-Reply-To: <20100412115538.GA28338@amd64.fatal.se>

On Mon, 12 Apr 2010 13:55:38 +0200
Andreas Henriksson <andreas@fatal.se> wrote:

> Add TC_CONFIG_XT_MODULE option that can be added
> either to Config (after ./configure) or as an argument to "make".

I like the idea and will incorporate it, but do not like having more
build options. Adding more configuration options like this is just
lazy design "we can't figure this out, let's make the user do it".

So put the patch in but there it will always be true.

^ permalink raw reply

* [PATCH net-next 1/8] tg3: Disable CLKREQ in L2
From: Matt Carlson @ 2010-04-12 16:58 UTC (permalink / raw)
  To: davem; +Cc: netdev, andy, mcarlson

This patch disables CLKREQ in L2 to workaround a chipset bug.

Signed-off-by: Matt Carlson <mcarlson@broadcom.com>
Reviewed-by: Michael Chan <mchan@broadcom.com>
---
 drivers/net/tg3.c |   14 ++++++++++++++
 drivers/net/tg3.h |    2 ++
 2 files changed, 16 insertions(+), 0 deletions(-)

diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 460a0c2..4ae01b3 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -7642,6 +7642,20 @@ static int tg3_reset_hw(struct tg3 *tp, int reset_phy)
 		tw32(GRC_MODE, grc_mode);
 	}
 
+	if (tp->pci_chip_rev_id == CHIPREV_ID_57765_A0) {
+		u32 grc_mode = tr32(GRC_MODE);
+
+		/* Access the lower 1K of PL PCIE block registers. */
+		val = grc_mode & ~GRC_MODE_PCIE_PORT_MASK;
+		tw32(GRC_MODE, val | GRC_MODE_PCIE_PL_SEL);
+
+		val = tr32(TG3_PCIE_TLDLPL_PORT + TG3_PCIE_PL_LO_PHYCTL5);
+		tw32(TG3_PCIE_TLDLPL_PORT + TG3_PCIE_PL_LO_PHYCTL5,
+		     val | TG3_PCIE_PL_LO_PHYCTL5_DIS_L2CLKREQ);
+
+		tw32(GRC_MODE, grc_mode);
+	}
+
 	/* This works around an issue with Athlon chipsets on
 	 * B3 tigon3 silicon.  This bit has no effect on any
 	 * other revision.  But do not set this on PCI Express
diff --git a/drivers/net/tg3.h b/drivers/net/tg3.h
index 5d7f72a..8a6012a 100644
--- a/drivers/net/tg3.h
+++ b/drivers/net/tg3.h
@@ -1854,6 +1854,8 @@
 #define TG3_PCIE_TLDLPL_PORT		0x00007c00
 #define TG3_PCIE_PL_LO_PHYCTL1		 0x00000004
 #define TG3_PCIE_PL_LO_PHYCTL1_L1PLLPD_EN	  0x00001000
+#define TG3_PCIE_PL_LO_PHYCTL5		 0x00000014
+#define TG3_PCIE_PL_LO_PHYCTL5_DIS_L2CLKREQ	  0x80000000
 
 /* OTP bit definitions */
 #define TG3_OTP_AGCTGT_MASK		0x000000e0
-- 
1.6.4.4



^ permalink raw reply related

* [PATCH net-next 2/8] tg3: Set card 57765 card reader MRRS to 1024B
From: Matt Carlson @ 2010-04-12 16:58 UTC (permalink / raw)
  To: davem; +Cc: netdev, andy, mcarlson

This patch sets the Maximum Read Request Size for the card reader
function to 1024 bytes to prevent an SD controller lockup.

Signed-off-by: Matt Carlson <mcarlson@broadcom.com>
Reviewed-by: Michael Chan <mchan@broadcom.com>
---
 drivers/net/tg3.c |    2 ++
 drivers/net/tg3.h |    1 +
 2 files changed, 3 insertions(+), 0 deletions(-)

diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 4ae01b3..a0ab89e 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -7704,6 +7704,8 @@ static int tg3_reset_hw(struct tg3 *tp, int reset_phy)
 	    GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_57765) {
 		val = tr32(TG3PCI_DMA_RW_CTRL) &
 		      ~DMA_RWCTRL_DIS_CACHE_ALIGNMENT;
+		if (tp->pci_chip_rev_id == CHIPREV_ID_57765_A0)
+			val &= ~DMA_RWCTRL_CRDRDR_RDMA_MRRS_MSK;
 		tw32(TG3PCI_DMA_RW_CTRL, val | tp->dma_rwctrl);
 	} else if (GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5784 &&
 		   GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5761) {
diff --git a/drivers/net/tg3.h b/drivers/net/tg3.h
index 8a6012a..9e7fe0e 100644
--- a/drivers/net/tg3.h
+++ b/drivers/net/tg3.h
@@ -183,6 +183,7 @@
 #define   METAL_REV_B2			 0x02
 #define TG3PCI_DMA_RW_CTRL		0x0000006c
 #define  DMA_RWCTRL_DIS_CACHE_ALIGNMENT  0x00000001
+#define  DMA_RWCTRL_CRDRDR_RDMA_MRRS_MSK 0x00000380
 #define  DMA_RWCTRL_READ_BNDRY_MASK	 0x00000700
 #define  DMA_RWCTRL_READ_BNDRY_DISAB	 0x00000000
 #define  DMA_RWCTRL_READ_BNDRY_16	 0x00000100
-- 
1.6.4.4



^ permalink raw reply related

* [PATCH net-next 6/8] tg3: Unify max pkt size preprocessor constants
From: Matt Carlson @ 2010-04-12 16:58 UTC (permalink / raw)
  To: davem; +Cc: netdev, andy, mcarlson

The maximum packet size that gets programmed into the standard producer
ring control block is directly related to the packet size used to
allocate packet buffers.  This patch removes the redundant preprocessor
constant.

Signed-off-by: Matt Carlson <mcarlson@broadcom.com>
Reviewed-by: Michael Chan <mchan@broadcom.com>
---
 drivers/net/tg3.c |    4 ++--
 drivers/net/tg3.h |    1 -
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 3263f50..a331ec5 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -7905,9 +7905,9 @@ static int tg3_reset_hw(struct tg3 *tp, int reset_phy)
 		if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5717 ||
 		    GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_57765)
 			val = (RX_STD_MAX_SIZE_5705 << BDINFO_FLAGS_MAXLEN_SHIFT) |
-			      (RX_STD_MAX_SIZE << 2);
+			      (TG3_RX_STD_DMA_SZ << 2);
 		else
-			val = RX_STD_MAX_SIZE << BDINFO_FLAGS_MAXLEN_SHIFT;
+			val = TG3_RX_STD_DMA_SZ << BDINFO_FLAGS_MAXLEN_SHIFT;
 	} else
 		val = RX_STD_MAX_SIZE_5705 << BDINFO_FLAGS_MAXLEN_SHIFT;
 
diff --git a/drivers/net/tg3.h b/drivers/net/tg3.h
index 43dd1d2..b71083d 100644
--- a/drivers/net/tg3.h
+++ b/drivers/net/tg3.h
@@ -25,7 +25,6 @@
 
 #define TG3_RX_INTERNAL_RING_SZ_5906	32
 
-#define RX_STD_MAX_SIZE			1536
 #define RX_STD_MAX_SIZE_5705		512
 #define RX_JUMBO_MAX_SIZE		0xdeadbeef /* XXX */
 
-- 
1.6.4.4



^ permalink raw reply related

* [PATCH net-next 3/8] tg3: Reduce 57765 core clock when link at 10Mbps
From: Matt Carlson @ 2010-04-12 16:58 UTC (permalink / raw)
  To: davem; +Cc: netdev, andy, mcarlson

This patch reduces the core clock to 6.25MHz when operating at 10Mbps
link speed.  This is needed to prevent a bug that will ultimately cause
transmits to cease.

Signed-off-by: Matt Carlson <mcarlson@broadcom.com>
Reviewed-by: Michael Chan <mchan@broadcom.com>
---
 drivers/net/tg3.c |    5 +++++
 1 files changed, 5 insertions(+), 0 deletions(-)

diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index a0ab89e..3e89323 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -7654,6 +7654,11 @@ static int tg3_reset_hw(struct tg3 *tp, int reset_phy)
 		     val | TG3_PCIE_PL_LO_PHYCTL5_DIS_L2CLKREQ);
 
 		tw32(GRC_MODE, grc_mode);
+
+		val = tr32(TG3_CPMU_LSPD_10MB_CLK);
+		val &= ~CPMU_LSPD_10MB_MACCLK_MASK;
+		val |= CPMU_LSPD_10MB_MACCLK_6_25;
+		tw32(TG3_CPMU_LSPD_10MB_CLK, val);
 	}
 
 	/* This works around an issue with Athlon chipsets on
-- 
1.6.4.4



^ permalink raw reply related

* [PATCH net-next 8/8] tg3: Update version to 3.110
From: Matt Carlson @ 2010-04-12 16:58 UTC (permalink / raw)
  To: davem; +Cc: netdev, andy, mcarlson

This patch updates the tg3 version to 3.110.

Signed-off-by: Matt Carlson <mcarlson@broadcom.com>
Reviewed-by: Michael Chan <mchan@broadcom.com>
---
 drivers/net/tg3.c |    4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 3f8feb1..61089fd 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -67,8 +67,8 @@
 #include "tg3.h"
 
 #define DRV_MODULE_NAME		"tg3"
-#define DRV_MODULE_VERSION	"3.109"
-#define DRV_MODULE_RELDATE	"April 2, 2010"
+#define DRV_MODULE_VERSION	"3.110"
+#define DRV_MODULE_RELDATE	"April 9, 2010"
 
 #define TG3_DEF_MAC_MODE	0
 #define TG3_DEF_RX_MODE		0
-- 
1.6.4.4



^ permalink raw reply related

* [PATCH net-next 7/8] tg3: Remove function errors flagged by checkpatch
From: Matt Carlson @ 2010-04-12 16:58 UTC (permalink / raw)
  To: davem; +Cc: netdev, andy, mcarlson

This patch removes the following checkpatch errors:

* return is not a function, parentheses are not required
* space prohibited between function name and open parenthesis '('

Signed-off-by: Matt Carlson <mcarlson@broadcom.com>
Reviewed-by: Michael Chan <mchan@broadcom.com>
---
 drivers/net/tg3.c |   18 +++++++++---------
 1 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index a331ec5..3f8feb1 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -383,7 +383,7 @@ static void tg3_write32(struct tg3 *tp, u32 off, u32 val)
 
 static u32 tg3_read32(struct tg3 *tp, u32 off)
 {
-	return (readl(tp->regs + off));
+	return readl(tp->regs + off);
 }
 
 static void tg3_ape_write32(struct tg3 *tp, u32 off, u32 val)
@@ -393,7 +393,7 @@ static void tg3_ape_write32(struct tg3 *tp, u32 off, u32 val)
 
 static u32 tg3_ape_read32(struct tg3 *tp, u32 off)
 {
-	return (readl(tp->aperegs + off));
+	return readl(tp->aperegs + off);
 }
 
 static void tg3_write_indirect_reg32(struct tg3 *tp, u32 off, u32 val)
@@ -511,7 +511,7 @@ static void tg3_write32_tx_mbox(struct tg3 *tp, u32 off, u32 val)
 
 static u32 tg3_read32_mbox_5906(struct tg3 *tp, u32 off)
 {
-	return (readl(tp->regs + off + GRCMBOX_BASE));
+	return readl(tp->regs + off + GRCMBOX_BASE);
 }
 
 static void tg3_write32_mbox_5906(struct tg3 *tp, u32 off, u32 val)
@@ -5775,7 +5775,7 @@ static netdev_tx_t tg3_start_xmit_dma_bug(struct sk_buff *skb,
 		hdr_len = ip_tcp_len + tcp_opt_len;
 		if (unlikely((ETH_HLEN + hdr_len) > 80) &&
 			     (tp->tg3_flags2 & TG3_FLG2_TSO_BUG))
-			return (tg3_tso_bug(tp, skb));
+			return tg3_tso_bug(tp, skb);
 
 		base_flags |= (TXD_FLAG_CPU_PRE_DMA |
 			       TXD_FLAG_CPU_POST_DMA);
@@ -9285,10 +9285,10 @@ static void __tg3_set_rx_mode(struct net_device *dev)
 		rx_mode |= RX_MODE_PROMISC;
 	} else if (dev->flags & IFF_ALLMULTI) {
 		/* Accept all multicast. */
-		tg3_set_multi (tp, 1);
+		tg3_set_multi(tp, 1);
 	} else if (netdev_mc_empty(dev)) {
 		/* Reject all multicast. */
-		tg3_set_multi (tp, 0);
+		tg3_set_multi(tp, 0);
 	} else {
 		/* Accept one or more multicast(s). */
 		struct netdev_hw_addr *ha;
@@ -10030,7 +10030,7 @@ static int tg3_set_tx_csum(struct net_device *dev, u32 data)
 	return 0;
 }
 
-static int tg3_get_sset_count (struct net_device *dev, int sset)
+static int tg3_get_sset_count(struct net_device *dev, int sset)
 {
 	switch (sset) {
 	case ETH_SS_TEST:
@@ -10042,7 +10042,7 @@ static int tg3_get_sset_count (struct net_device *dev, int sset)
 	}
 }
 
-static void tg3_get_strings (struct net_device *dev, u32 stringset, u8 *buf)
+static void tg3_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
 {
 	switch (stringset) {
 	case ETH_SS_STATS:
@@ -10089,7 +10089,7 @@ static int tg3_phys_id(struct net_device *dev, u32 data)
 	return 0;
 }
 
-static void tg3_get_ethtool_stats (struct net_device *dev,
+static void tg3_get_ethtool_stats(struct net_device *dev,
 				   struct ethtool_stats *estats, u64 *tmp_stats)
 {
 	struct tg3 *tp = netdev_priv(dev);
-- 
1.6.4.4



^ permalink raw reply related

* [PATCH net-next 0/8] tg3 updates
From: Matt Carlson @ 2010-04-12 16:58 UTC (permalink / raw)
  To: davem; +Cc: netdev, andy, mcarlson

This patchset adds code to support the 57765 OTP ROM bootcode, fixes
bug related to VLANs and adds some minor improvements.



^ permalink raw reply

* [PATCH net-next 5/8] tg3: Re-inline VLAN tags when appropriate
From: Matt Carlson @ 2010-04-12 16:58 UTC (permalink / raw)
  To: davem; +Cc: netdev, andy, mcarlson

The tg3 driver is written so that VLAN tagged packets can be accepted,
even if CONFIG_VLAN_8021Q or CONFIG_VLAN_8021Q_MODULE is not defined.
(Think raw interfaces.)  If the device has ASF support enabled, the
firmware requires the driver to enable VLAN tag stripping.  If VLAN
tagging is not explicitly supported by the kernel and ASF is enabled,
the driver will have to reinject the VLAN tag back into the packet
stream.

Signed-off-by: Matt Carlson <mcarlson@broadcom.com>
Reviewed-by: Michael Chan <mchan@broadcom.com>
---
 drivers/net/tg3.c |   44 +++++++++++++++++++++++++++++++++-----------
 1 files changed, 33 insertions(+), 11 deletions(-)

diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 5791405..3263f50 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -126,6 +126,9 @@
 				 TG3_TX_RING_SIZE)
 #define NEXT_TX(N)		(((N) + 1) & (TG3_TX_RING_SIZE - 1))
 
+#define TG3_RX_DMA_ALIGN		16
+#define TG3_RX_HEADROOM			ALIGN(VLAN_HLEN, TG3_RX_DMA_ALIGN)
+
 #define TG3_DMA_BYTE_ENAB		64
 
 #define TG3_RX_STD_DMA_SZ		1536
@@ -4624,6 +4627,8 @@ static int tg3_rx(struct tg3_napi *tnapi, int budget)
 		struct sk_buff *skb;
 		dma_addr_t dma_addr;
 		u32 opaque_key, desc_idx, *post_ptr;
+		bool hw_vlan __maybe_unused = false;
+		u16 vtag __maybe_unused = 0;
 
 		desc_idx = desc->opaque & RXD_OPAQUE_INDEX_MASK;
 		opaque_key = desc->opaque & RXD_OPAQUE_RING_MASK;
@@ -4682,12 +4687,12 @@ static int tg3_rx(struct tg3_napi *tnapi, int budget)
 			tg3_recycle_rx(tnapi, tpr, opaque_key,
 				       desc_idx, *post_ptr);
 
-			copy_skb = netdev_alloc_skb(tp->dev,
-						    len + TG3_RAW_IP_ALIGN);
+			copy_skb = netdev_alloc_skb(tp->dev, len + VLAN_HLEN +
+						    TG3_RAW_IP_ALIGN);
 			if (copy_skb == NULL)
 				goto drop_it_no_recycle;
 
-			skb_reserve(copy_skb, TG3_RAW_IP_ALIGN);
+			skb_reserve(copy_skb, TG3_RAW_IP_ALIGN + VLAN_HLEN);
 			skb_put(copy_skb, len);
 			pci_dma_sync_single_for_cpu(tp->pdev, dma_addr, len, PCI_DMA_FROMDEVICE);
 			skb_copy_from_linear_data(skb, copy_skb->data, len);
@@ -4713,12 +4718,29 @@ static int tg3_rx(struct tg3_napi *tnapi, int budget)
 			goto next_pkt;
 		}
 
+		if (desc->type_flags & RXD_FLAG_VLAN &&
+		    !(tp->rx_mode & RX_MODE_KEEP_VLAN_TAG)) {
+			vtag = desc->err_vlan & RXD_VLAN_MASK;
 #if TG3_VLAN_TAG_USED
-		if (tp->vlgrp != NULL &&
-		    desc->type_flags & RXD_FLAG_VLAN) {
-			vlan_gro_receive(&tnapi->napi, tp->vlgrp,
-					 desc->err_vlan & RXD_VLAN_MASK, skb);
-		} else
+			if (tp->vlgrp)
+				hw_vlan = true;
+			else
+#endif
+			{
+				struct vlan_ethhdr *ve = (struct vlan_ethhdr *)
+						    __skb_push(skb, VLAN_HLEN);
+
+				memmove(ve, skb->data + VLAN_HLEN,
+					ETH_ALEN * 2);
+				ve->h_vlan_proto = htons(ETH_P_8021Q);
+				ve->h_vlan_TCI = htons(vtag);
+			}
+		}
+
+#if TG3_VLAN_TAG_USED
+		if (hw_vlan)
+			vlan_gro_receive(&tnapi->napi, tp->vlgrp, vtag, skb);
+		else
 #endif
 			napi_gro_receive(&tnapi->napi, skb);
 
@@ -13481,13 +13503,13 @@ static int __devinit tg3_get_invariants(struct tg3 *tp)
 	else
 		tp->tg3_flags &= ~TG3_FLAG_POLL_SERDES;
 
-	tp->rx_offset = NET_IP_ALIGN;
+	tp->rx_offset = NET_IP_ALIGN + TG3_RX_HEADROOM;
 	tp->rx_copy_thresh = TG3_RX_COPY_THRESHOLD;
 	if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5701 &&
 	    (tp->tg3_flags & TG3_FLAG_PCIX_MODE) != 0) {
-		tp->rx_offset = 0;
+		tp->rx_offset -= NET_IP_ALIGN;
 #ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
-		tp->rx_copy_thresh = ~0;
+		tp->rx_copy_thresh = ~(u16)0;
 #endif
 	}
 
-- 
1.6.4.4



^ permalink raw reply related

* [PATCH net-next 4/8] tg3: Optimize rx double copy test
From: Matt Carlson @ 2010-04-12 16:58 UTC (permalink / raw)
  To: davem; +Cc: netdev, andy, mcarlson

On a PCIX bus, the 5701 has a bug which requires the driver to double
copy all rx packets.  The rx code uses the rx_offset device member as a
flag to determine if this workaround should take effect.  The following
patch will modify the rx_offset member such that this test will become
less clear.

The patch starts by integrating the workaround check into the packet
length check.  It rounds out the implementation by relaxing the
workaround restrictions if the platform has efficient unaligned
accesses.

Signed-off-by: Matt Carlson <mcarlson@broadcom.com>
Reviewed-by: Michael Chan <mchan@broadcom.com>
---
 drivers/net/tg3.c |   32 +++++++++++++++++++++++++-------
 drivers/net/tg3.h |    5 ++---
 2 files changed, 27 insertions(+), 10 deletions(-)

diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 3e89323..5791405 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -144,6 +144,24 @@
 
 #define TG3_RSS_MIN_NUM_MSIX_VECS	2
 
+/* Due to a hardware bug, the 5701 can only DMA to memory addresses
+ * that are at least dword aligned when used in PCIX mode.  The driver
+ * works around this bug by double copying the packet.  This workaround
+ * is built into the normal double copy length check for efficiency.
+ *
+ * However, the double copy is only necessary on those architectures
+ * where unaligned memory accesses are inefficient.  For those architectures
+ * where unaligned memory accesses incur little penalty, we can reintegrate
+ * the 5701 in the normal rx path.  Doing so saves a device structure
+ * dereference by hardcoding the double copy threshold in place.
+ */
+#define TG3_RX_COPY_THRESHOLD		256
+#if NET_IP_ALIGN == 0 || defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
+	#define TG3_RX_COPY_THRESH(tp)	TG3_RX_COPY_THRESHOLD
+#else
+	#define TG3_RX_COPY_THRESH(tp)	((tp)->rx_copy_thresh)
+#endif
+
 /* minimum number of free TX descriptors required to wake up TX process */
 #define TG3_TX_WAKEUP_THRESH(tnapi)		((tnapi)->tx_pending / 4)
 
@@ -4639,12 +4657,7 @@ static int tg3_rx(struct tg3_napi *tnapi, int budget)
 		len = ((desc->idx_len & RXD_LEN_MASK) >> RXD_LEN_SHIFT) -
 		      ETH_FCS_LEN;
 
-		if (len > RX_COPY_THRESHOLD &&
-		    tp->rx_offset == NET_IP_ALIGN) {
-		    /* rx_offset will likely not equal NET_IP_ALIGN
-		     * if this is a 5701 card running in PCI-X mode
-		     * [see tg3_get_invariants()]
-		     */
+		if (len > TG3_RX_COPY_THRESH(tp)) {
 			int skb_size;
 
 			skb_size = tg3_alloc_rx_skb(tp, tpr, opaque_key,
@@ -13469,9 +13482,14 @@ static int __devinit tg3_get_invariants(struct tg3 *tp)
 		tp->tg3_flags &= ~TG3_FLAG_POLL_SERDES;
 
 	tp->rx_offset = NET_IP_ALIGN;
+	tp->rx_copy_thresh = TG3_RX_COPY_THRESHOLD;
 	if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5701 &&
-	    (tp->tg3_flags & TG3_FLAG_PCIX_MODE) != 0)
+	    (tp->tg3_flags & TG3_FLAG_PCIX_MODE) != 0) {
 		tp->rx_offset = 0;
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+		tp->rx_copy_thresh = ~0;
+#endif
+	}
 
 	tp->rx_std_max_post = TG3_RX_RING_SIZE;
 
diff --git a/drivers/net/tg3.h b/drivers/net/tg3.h
index 9e7fe0e..43dd1d2 100644
--- a/drivers/net/tg3.h
+++ b/drivers/net/tg3.h
@@ -23,8 +23,6 @@
 #define TG3_BDINFO_NIC_ADDR		0xcUL /* 32-bit */
 #define TG3_BDINFO_SIZE			0x10UL
 
-#define RX_COPY_THRESHOLD		256
-
 #define TG3_RX_INTERNAL_RING_SZ_5906	32
 
 #define RX_STD_MAX_SIZE			1536
@@ -2754,9 +2752,11 @@ struct tg3 {
 	struct tg3_napi			napi[TG3_IRQ_MAX_VECS];
 	void				(*write32_rx_mbox) (struct tg3 *, u32,
 							    u32);
+	u32				rx_copy_thresh;
 	u32				rx_pending;
 	u32				rx_jumbo_pending;
 	u32				rx_std_max_post;
+	u32				rx_offset;
 	u32				rx_pkt_map_sz;
 #if TG3_VLAN_TAG_USED
 	struct vlan_group		*vlgrp;
@@ -2776,7 +2776,6 @@ struct tg3 {
 	unsigned long			last_event_jiffies;
 	};
 
-	u32				rx_offset;
 	u32				tg3_flags;
 #define TG3_FLAG_TAGGED_STATUS		0x00000001
 #define TG3_FLAG_TXD_MBOX_HWBUG		0x00000002
-- 
1.6.4.4



^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox