Netdev List
 help / color / mirror / Atom feed
* Re: [PATCH] cls_rsvp.h was outdated
From: Eric Dumazet @ 2011-08-30 11:38 UTC (permalink / raw)
  To: igorm; +Cc: netdev, linux-kernel
In-Reply-To: <0f79c88be72b75d0526d3f3c2ebf826b.squirrel@kondor.etf.bg.ac.rs>

Le mardi 30 août 2011 à 13:11 +0200, "Igor Maravić" a écrit :
> File cls_rsvp.h in /net/sched was outdated. I'm sending you patch for this
> file.
> Patch was done against Linux 2.6.38-8
> 
> Signed-off-by: Igor Maravić <igorm@etf.rs>
> 
> --- linux-2.6.38.8/net/sched/cls_rsvp.h.orig	2011-08-30 12:46:42.663443918
> +0200

Two last points :

1) Your mail client added line wraps, please take a look at
Documentation/email-clients.txt


> 
>  static struct tcf_proto_ops RSVP_OPS = {
> -	.next		=	NULL,
> +	//.next		=	NULL,
>  	.kind		=	RSVP_ID,
>  	.classify	=	rsvp_classify,
>  	.init		=	rsvp_init,
> 

2) Dont add // comments, just remove the line.

You also could add __read_mostly here :

static struct tcf_proto_ops RSVP_OPS __read_mostly = {


Thanks

^ permalink raw reply

* Re: [PATCH] MAINTAINERS: Update ATLX driver maintainers
From: Franco Fichtner @ 2011-08-30 11:25 UTC (permalink / raw)
  To: Ian Campbell
  Cc: netdev, Jay Cliburn, Chris Snook, Jie Yang, Andrew Morton,
	Joe Perches
In-Reply-To: <1314696887-22518-1-git-send-email-ian.campbell@citrix.com>

Hi Ian,

On 08/30/2011 11:34 AM, Ian Campbell wrote:
> jie.yang@atheros.com bounces and I get a 550 "Unknown address error". Perhaps
> they have moved on?

Atheros is now part of Qualcomm. There is a patch by Luis floating
around fixing the MAINTAINERS file properly, but it hasn't been applied
yet. No need to apply this one here.


Franco

^ permalink raw reply

* RE: [PATCH] MAINTAINERS: Update Cisco VIC driver maintainers
From: David Wang (dwang2) @ 2011-08-30 11:42 UTC (permalink / raw)
  To: Ian Campbell, netdev
  Cc: Christian Benvenuti (benve), Roopa Prabhu (roprabhu),
	Andrew Morton, Joe Perches
In-Reply-To: <1314697269-22594-1-git-send-email-ian.campbell@citrix.com>

Ian,

Vasanthy is no longer with Cisco; we will be removing her from any
future submissions.

Regards,

- Dave 

> -----Original Message-----
> From: Ian Campbell [mailto:ian.campbell@citrix.com] 
> Sent: Tuesday, August 30, 2011 2:41 AM
> To: netdev@vger.kernel.org
> Cc: Ian Campbell; Christian Benvenuti (benve); Roopa Prabhu 
> (roprabhu); David Wang (dwang2); Andrew Morton; Joe Perches
> Subject: [PATCH] MAINTAINERS: Update Cisco VIC driver maintainers
> 
> vkolluri@cisco.com bounces and I get "Unknown address error 550".
> 
> Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
> Cc: Christian Benvenuti <benve@cisco.com>
> Cc: Roopa Prabhu <roprabhu@cisco.com>
> Cc: David Wang <dwang2@cisco.com>
> Cc: Andrew Morton <akpm@linux-foundation.org>
> Cc: Joe Perches <joe@perches.com>
> Cc: netdev@vger.kernel.org
> ---
>  MAINTAINERS |    1 -
>  1 files changed, 0 insertions(+), 1 deletions(-)
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index cb6ad5f..a5e0b11 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -1757,7 +1757,6 @@ F:	Documentation/zh_CN/
>  
>  CISCO VIC ETHERNET NIC DRIVER
>  M:	Christian Benvenuti <benve@cisco.com>
> -M:	Vasanthy Kolluri <vkolluri@cisco.com>
>  M:	Roopa Prabhu <roprabhu@cisco.com>
>  M:	David Wang <dwang2@cisco.com>
>  S:	Supported
> --
> 1.7.2.5
> 
> 

^ permalink raw reply

* [PATCH] cls_rsvp.h was outdated
From: "Igor Maravić" @ 2011-08-30 12:12 UTC (permalink / raw)
  To: netdev; +Cc: linux-kernel, eric.dumazet

File cls_rsvp.h in /net/sched was outdated. I'm sending you patch for this
file.
Patch was done against Linux 2.6.38-8

Signed-off-by: Igor Maravić <igorm@etf.rs>

---

--- linux-2.6.38.8/net/sched/cls_rsvp.h.orig	2011-08-30 12:46:42.663443918
+0200
+++ linux-2.6.38.8/net/sched/cls_rsvp.h	2011-08-30 13:45:02.135445119 +0200
@@ -425,7 +425,7 @@ static int rsvp_change(struct tcf_proto
 	struct rsvp_filter *f, **fp;
 	struct rsvp_session *s, **sp;
 	struct tc_rsvp_pinfo *pinfo = NULL;
-	struct nlattr *opt = tca[TCA_OPTIONS-1];
+	struct nlattr *opt = tca[TCA_OPTIONS];
 	struct nlattr *tb[TCA_RSVP_MAX + 1];
 	struct tcf_exts e;
 	unsigned h1, h2;
@@ -439,7 +439,7 @@ static int rsvp_change(struct tcf_proto
 	if (err < 0)
 		return err;

-	err = tcf_exts_validate(tp, tb, tca[TCA_RATE-1], &e, &rsvp_ext_map);
+	err = tcf_exts_validate(tp, tb, tca[TCA_RATE], &e, &rsvp_ext_map);
 	if (err < 0)
 		return err;

@@ -448,8 +448,8 @@ static int rsvp_change(struct tcf_proto

 		if (f->handle != handle && handle)
 			goto errout2;
-		if (tb[TCA_RSVP_CLASSID-1]) {
-			f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID-1]);
+		if (tb[TCA_RSVP_CLASSID]) {
+			f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]);
 			tcf_bind_filter(tp, &f->res, base);
 		}

@@ -461,7 +461,7 @@ static int rsvp_change(struct tcf_proto
 	err = -EINVAL;
 	if (handle)
 		goto errout2;
-	if (tb[TCA_RSVP_DST-1] == NULL)
+	if (tb[TCA_RSVP_DST] == NULL)
 		goto errout2;

 	err = -ENOBUFS;
@@ -470,19 +470,19 @@ static int rsvp_change(struct tcf_proto
 		goto errout2;

 	h2 = 16;
-	if (tb[TCA_RSVP_SRC-1]) {
-		memcpy(f->src, nla_data(tb[TCA_RSVP_SRC-1]), sizeof(f->src));
+	if (tb[TCA_RSVP_SRC]) {
+		memcpy(f->src, nla_data(tb[TCA_RSVP_SRC]), sizeof(f->src));
 		h2 = hash_src(f->src);
 	}
-	if (tb[TCA_RSVP_PINFO-1]) {
-		pinfo = nla_data(tb[TCA_RSVP_PINFO-1]);
+	if (tb[TCA_RSVP_PINFO]) {
+		pinfo = nla_data(tb[TCA_RSVP_PINFO]);
 		f->spi = pinfo->spi;
 		f->tunnelhdr = pinfo->tunnelhdr;
 	}
-	if (tb[TCA_RSVP_CLASSID-1])
-		f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID-1]);
+	if (tb[TCA_RSVP_CLASSID])
+		f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]);

-	dst = nla_data(tb[TCA_RSVP_DST-1]);
+	dst = nla_data(tb[TCA_RSVP_DST]);
 	h1 = hash_dst(dst, pinfo ? pinfo->protocol : 0, pinfo ? pinfo->tunnelid
: 0);

 	err = -ENOMEM;
@@ -641,8 +641,7 @@ nla_put_failure:
 	return -1;
 }

-static struct tcf_proto_ops RSVP_OPS = {
-	.next		=	NULL,
+static struct tcf_proto_ops RSVP_OPS __read_mostly = {
 	.kind		=	RSVP_ID,
 	.classify	=	rsvp_classify,
 	.init		=	rsvp_init,

^ permalink raw reply

* Re: [PATCH] bridge: netfilter: work around shared nfct struct
From: Patrick McHardy @ 2011-08-30 12:43 UTC (permalink / raw)
  To: Florian Westphal; +Cc: netfilter-devel, netdev
In-Reply-To: <1314701827-21702-1-git-send-email-fw@strlen.de>

On 30.08.2011 12:57, Florian Westphal wrote:
> When incoking iptables hooks from bridge netfilter, the assumption
> that non-confirmed skb->nfct is never shared does no longer hold,
> as bridge code clones skbs when e.g. forwarding packets to multiple
> bridge ports.
> 
> When NFQUEUE is used, we can BUG because nf_nat_setup_info can be
> invoked simultaneously for the same conntrack:

I'm wondering how this can happen, when flooding packets to multiple
ports, they are still processed by the same CPU one after another,
so for the second and further packets, nf_nat should notice that
the mappings are already set up.

> [ 3196.798768] kernel BUG at net/ipv4/netfilter/nf_nat_core.c:300!
> [..]
> [ 3196.798768]  [<ffffffff8120d73e>] ? nf_hook_slow+0x21a/0x282
> [ 3196.798768]  [<ffffffffa03207e4>] ? br_handle_frame_finish+0x0/0x13b [bridge]
> [ 3196.798768]  [<ffffffffa02a61a5>] ? alloc_null_binding+0x47/0x4c [iptable_nat]
> [ 3196.798768]  [<ffffffffa02a64eb>] ? nf_nat_fn+0x193/0x1fb [iptable_nat]
> [ 3196.798768]  [<ffffffff8120d4c5>] ? nf_iterate+0x40/0x9f
> [ 3196.798768]  [<ffffffff8120d73e>] ? nf_hook_slow+0x21a/0x282
> [ 3196.798768]  [<ffffffff81213c94>] ? ip_local_deliver_finish+0x0/0x1f1
> [ 3196.798768]  [<ffffffff81213c94>] ? ip_local_deliver_finish+0x0/0x1f1
> [ 3196.798768]  [<ffffffff8120d73e>] ? nf_hook_slow+0x21a/0x282
> [ 3196.798768]  [<ffffffff8121369c>] ? ip_rcv_finish+0x0/0x340
> [ 3196.798768]  [<ffffffff81213ed7>] ? ip_local_deliver+0x52/0x6c
> [ 3196.798768]  [<ffffffff812139c2>] ? ip_rcv_finish+0x326/0x340
> [ 3196.798768]  [<ffffffff81213c4f>] ? ip_rcv+0x273/0x2b8
> [ 3196.798768]  [<ffffffff811f1384>] ? process_backlog+0x8d/0xc6
> [ 3196.798768]  [<ffffffff811f2f85>] ? net_rx_action+0xa2/0x1cf
> [ 3196.798768]  [<ffffffff8103d3c2>] ? __do_softirq+0x8b/0x10b
> [ 3196.798768]  [<ffffffff8100c9dc>] ? call_softirq+0x1c/0x28
> [ 3196.798768]  [<ffffffff8100dd15>] ? do_softirq+0x31/0x66
> [ 3196.798768]  [<ffffffff8103d267>] ? irq_exit+0x36/0x78
> [ 3196.798768]  [<ffffffff8100d41a>] ? do_IRQ+0xa0/0xb6
> [ 3196.798768]  [<ffffffff8100c253>] ? ret_from_intr+0x0/0xa
> [..]
> [ 3196.798768] Code: be 2b 01 00 00 48 c7 c7 e8 cd 29 a0 e8 e8 d7 d9 e0 45 85 ff 49 8b 45 78 75 06 48 c1 e8 07 eb 04 48 c1 e8 08 83 e0 01 85 c0 74 04 <0f> 0b eb fe 49 8d 75 50 48 8d bc 24 80 00 00 00 e8 83 38 f7 ff
> [ 3196.798768] RIP  [<ffffffffa029b68f>] nf_nat_setup_info+0x8a/0x564 [nf_nat]
> [ 3196.798768]  RSP <ffff880001603bf0>
> 
> Fix this by changing ->nfct of all clones to untracked.
> 
> This should be OK, because if we do a full copy of ->nfct we'd
> end up trying to confirm the same tuples multiple times, which results in
> NF_DROP for the cloned skbs.
> 
> Also, we only need to do this if the conntrack is unconfirmed.
> 
> Signed-off-by: Florian Westphal <fw@strlen.de>
> ---
>  net/bridge/br_netfilter.c |   34 ++++++++++++++++++++++++++++++++++
>  1 files changed, 34 insertions(+), 0 deletions(-)
> 
>  I have one alternate patch that changes nf_nat_setup_info
>  to detect conflicts by forcing serialization via ct->lock spinlock.
> 
>  But it is silly to do this for the sake of bridge netfilter only...
> 
>  Any other ideas?
> diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
> index 3fa1231..7d47f34 100644
> --- a/net/bridge/br_netfilter.c
> +++ b/net/bridge/br_netfilter.c
> @@ -42,6 +42,10 @@
>  #include <linux/sysctl.h>
>  #endif
>  
> +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
> +#include <net/netfilter/nf_conntrack.h>
> +#endif
> +
>  #define skb_origaddr(skb)	 (((struct bridge_skb_cb *) \
>  				 (skb->nf_bridge->data))->daddr.ipv4)
>  #define store_orig_dstaddr(skb)	 (skb_origaddr(skb) = ip_hdr(skb)->daddr)
> @@ -158,10 +162,40 @@ static inline struct nf_bridge_info *nf_bridge_alloc(struct sk_buff *skb)
>  	return skb->nf_bridge;
>  }
>  
> +
> +/* conntrack assumes exclusive ownership of skb->nfct
> + * if conntrack has not yet been confirmed.
> + *
> + * Without this, we may BUG because we might try to set up
> + * NAT bindings for the same conntrack struct simultaneously.
> + *
> + * Work around this by forcing untracked state.
> + */
> +static inline void nf_bridge_unshare_nfct(struct sk_buff *skb)
> +{
> +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
> +	struct nf_conn *ct, *ct_orig = (void *) skb->nfct;
> +
> +	if (!ct_orig || nf_ct_is_untracked(ct_orig))
> +		return;
> +
> +	if (likely(nf_ct_is_confirmed(ct_orig)) ||
> +	    atomic_read(&ct_orig->ct_general.use) == 1)
> +		return;
> +
> +	ct = nf_ct_untracked_get();

This will introduce a module dependency on nf_conntrack, which we really
shouldn't be doing.

> +	atomic_inc(&ct->ct_general.use);
> +	nf_conntrack_put(skb->nfct);
> +	skb->nfct = &ct->ct_general;
> +#endif

^ permalink raw reply

* Re: [PATCH 06/24] netfilter: Remove unnecessary OOM logging messages
From: Patrick McHardy @ 2011-08-30 12:46 UTC (permalink / raw)
  To: Joe Perches
  Cc: Bart De Schuymer, Wensong Zhang, Simon Horman, Julian Anastasov,
	Stephen Hemminger, David S. Miller, Alexey Kuznetsov,
	James Morris, Hideaki YOSHIFUJI, netfilter-devel, netfilter,
	coreteam, bridge, netdev, linux-kernel, lvs-devel
In-Reply-To: <13c1c12486cae409dfa5254b1435e660f2b17e05.1314650069.git.joe@perches.com>

On 29.08.2011 23:17, Joe Perches wrote:
> Removing unnecessary messages saves code and text.
> 
> Site specific OOM messages are duplications of a generic MM
> out of memory message and aren't really useful, so just
> delete them.

Looks good to me. Do you want me to apply this patch or are you
intending to have the entire series go through Dave?

^ permalink raw reply

* Re: [PATCH] bridge: netfilter: work around shared nfct struct
From: Florian Westphal @ 2011-08-30 12:54 UTC (permalink / raw)
  To: Patrick McHardy; +Cc: Florian Westphal, netfilter-devel, netdev
In-Reply-To: <4E5CDADC.7000902@trash.net>

Patrick McHardy <kaber@trash.net> wrote:
> On 30.08.2011 12:57, Florian Westphal wrote:
> > When incoking iptables hooks from bridge netfilter, the assumption
> > that non-confirmed skb->nfct is never shared does no longer hold,
> > as bridge code clones skbs when e.g. forwarding packets to multiple
> > bridge ports.
> > 
> > When NFQUEUE is used, we can BUG because nf_nat_setup_info can be
> > invoked simultaneously for the same conntrack:
> 
> I'm wondering how this can happen, when flooding packets to multiple
> ports, they are still processed by the same CPU one after another,
> so for the second and further packets, nf_nat should notice that
> the mappings are already set up.

Main problem is that we end up with same ->nfct in both
INPUT and POSTROUTING (br_pass_frame_up vs. br_forward).

its extremely unlikely but reproduceable with something like
hping2 -i u1200 -2 -p 138 -d 128 192.168.0.255

(assuming bridge interface has an address within that network).

Also, with recent change nf_reinject can be run in parallel.
(the original problem was observed on 2.6.32.24, but i can
 reproduce it with nf-next, too).

^ permalink raw reply

* 802.1Q VLAN random tag injected when vlan configured on forcedeth interface
From: Ruslan N. Marchenko @ 2011-08-30 12:51 UTC (permalink / raw)
  To: netdev

Hi guys,
I've faced with strange behaviour of 8021q driver: when enabling vlan subinterface on eth interface I'm getting ~50% packetloss due to packets are marked with incorrect tags (and eventually dropped by kernel since no vlans configured for such IDs).
Scenario:
[    0.476950] cpufreq-nforce2: No nForce2 chipset.
[    1.519133] forcedeth: Reverse Engineered nForce ethernet driver. Version 0.64.
[    1.519991] forcedeth 0000:00:0a.0: PCI INT A -> Link[LMAC] -> GSI 22 (level, low) -> IRQ 22
[    1.520037] forcedeth 0000:00:0a.0: setting latency timer to 64
[    1.586526] forcedeth 0000:00:0a.0: ifname eth0, PHY OUI 0x732 @ 3, addr 00:26:18:40:21:61
[    1.586542] forcedeth 0000:00:0a.0: highdma csum pwrctl gbit lnktim msi desc-v3

modprobe 8021q

- network still works properly, packets are comming not marked at all.

ip li add link eth0 name vl6 type vlan id 6

- from this moment massive packetdrop starting to happen, almost half of the *incoming* packets are shown in tcpdump as 
14:15:52.859296 00:13:f7:1e:fe:e4 > 00:26:18:40:21:61, ethertype 802.1Q (0x8100), length 102: vlan 64, p 3, ethertype IPv4, [|ip]
14:15:56.869572 00:13:f7:1e:fe:e4 > 00:26:18:40:21:61, ethertype 802.1Q (0x8100), length 102: vlan 2112, p 7, ethertype IPv4, [|ip]

mostly only these two tags appears (64 & 2112). Moreover this happens as on native vlan level (pure ethernet) so on tagged subinterface (as if qinq double tagging) for properly tagged with ID 6 incomming packets.

I've tried disabling all offloads:

Offload parameters for eth0:
rx-checksumming: off
tx-checksumming: off
scatter-gather: off
tcp-segmentation-offload: off
udp-fragmentation-offload: off
generic-segmentation-offload: off
generic-receive-offload: off
large-receive-offload: off
rx-vlan-offload: off
tx-vlan-offload: off
ntuple-filters: off
receive-hashing: off

- doesn't have any effect.
Once executing 
ip li del vl6 type vlan
misterious tags disappear and everything works smoothly. Don't know who injects that garbage into frames - 8021q or forcedeth driver :(
Any ideas or suggestions to narrow the problem down?

Additional data.
Link level data dump example for broken frame:
12:35:32.175523 00:13:f7:1e:fe:e4 > 00:26:18:40:21:61, ethertype 802.1Q (0x8100), length 102: vlan 2112, p 2, ethertype IPv4, [|ip]
        0x0000:  0026 1840 2161 0013 f71e fee4 8100 4840
        0x0010:  0800 4500 0054 7a12 0000 4001 eb0f
0x0C-0D - TPID: ethertype 802.1Q (0x8100) 
0x0E-0F - TCI (0100100001000000) PCP 010, CFI 0, VID 100001000000/0x840/2112
0x10-11 - ethertype IPv4
normal ping reply follows, which appears untagged in 50% cases with vlan configured and 100% cases without.

Interface is plugged into openwrt box into non-switched (wan) gigabit port with vid 6 subinterface configured.

Regards,
Ruslan

^ permalink raw reply

* Re: [PATCH] bridge: netfilter: work around shared nfct struct
From: Patrick McHardy @ 2011-08-30 13:08 UTC (permalink / raw)
  To: Florian Westphal; +Cc: netfilter-devel, netdev
In-Reply-To: <20110830125453.GC7548@Chamillionaire.breakpoint.cc>

On 30.08.2011 14:54, Florian Westphal wrote:
> Patrick McHardy <kaber@trash.net> wrote:
>> On 30.08.2011 12:57, Florian Westphal wrote:
>>> When incoking iptables hooks from bridge netfilter, the assumption
>>> that non-confirmed skb->nfct is never shared does no longer hold,
>>> as bridge code clones skbs when e.g. forwarding packets to multiple
>>> bridge ports.
>>>
>>> When NFQUEUE is used, we can BUG because nf_nat_setup_info can be
>>> invoked simultaneously for the same conntrack:
>>
>> I'm wondering how this can happen, when flooding packets to multiple
>> ports, they are still processed by the same CPU one after another,
>> so for the second and further packets, nf_nat should notice that
>> the mappings are already set up.
> 
> Main problem is that we end up with same ->nfct in both
> INPUT and POSTROUTING (br_pass_frame_up vs. br_forward).
> 
> its extremely unlikely but reproduceable with something like
> hping2 -i u1200 -2 -p 138 -d 128 192.168.0.255
> 
> (assuming bridge interface has an address within that network).
> 
> Also, with recent change nf_reinject can be run in parallel.
> (the original problem was observed on 2.6.32.24, but i can
>  reproduce it with nf-next, too).

I see. We still need to avoid the module dependency on nf_conntrack
though, so I think this will have to be fixed in nf_nat_fn().

^ permalink raw reply

* [PATCH] net: sh_eth: remove duplicated #include
From: Huang Weiyi @ 2011-08-30 13:09 UTC (permalink / raw)
  To: davem; +Cc: netdev, Huang Weiyi

Remove duplicated #include('s) in
  drivers/net/sh_eth.c

Signed-off-by: Huang Weiyi <weiyi.huang@gmail.com>
---
 drivers/net/sh_eth.c |    1 -
 1 files changed, 0 insertions(+), 1 deletions(-)

diff --git a/drivers/net/sh_eth.c b/drivers/net/sh_eth.c
index 1c1666e..190f619 100644
--- a/drivers/net/sh_eth.c
+++ b/drivers/net/sh_eth.c
@@ -31,7 +31,6 @@
 #include <linux/phy.h>
 #include <linux/cache.h>
 #include <linux/io.h>
-#include <linux/interrupt.h>
 #include <linux/pm_runtime.h>
 #include <linux/slab.h>
 #include <linux/ethtool.h>
-- 
1.6.1.3

^ permalink raw reply related

* [PATCH] cls_rsvp.h was outdated
From: "Igor Maravić" @ 2011-08-30 13:10 UTC (permalink / raw)
  To: netdev; +Cc: linux-kernel

File cls_rsvp.h in /net/sched was outdated. I'm sending you patch for this
file.

Sorry for the word-wrap in previous messages

Signed-off-by: Igor Maravić <igorm@etf.rs>

---

--- linux-2.6.38.8/net/sched/cls_rsvp.h.orig	2011-08-30 12:46:42.663443918
+0200
+++ linux-2.6.38.8/net/sched/cls_rsvp.h	2011-08-30 13:45:02.135445119 +0200
@@ -425,7 +425,7 @@ static int rsvp_change(struct tcf_proto
 	struct rsvp_filter *f, **fp;
 	struct rsvp_session *s, **sp;
 	struct tc_rsvp_pinfo *pinfo = NULL;
-	struct nlattr *opt = tca[TCA_OPTIONS-1];
+	struct nlattr *opt = tca[TCA_OPTIONS];
 	struct nlattr *tb[TCA_RSVP_MAX + 1];
 	struct tcf_exts e;
 	unsigned h1, h2;
@@ -439,7 +439,7 @@ static int rsvp_change(struct tcf_proto
 	if (err < 0)
 		return err;

-	err = tcf_exts_validate(tp, tb, tca[TCA_RATE-1], &e, &rsvp_ext_map);
+	err = tcf_exts_validate(tp, tb, tca[TCA_RATE], &e, &rsvp_ext_map);
 	if (err < 0)
 		return err;

@@ -448,8 +448,8 @@ static int rsvp_change(struct tcf_proto

 		if (f->handle != handle && handle)
 			goto errout2;
-		if (tb[TCA_RSVP_CLASSID-1]) {
-			f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID-1]);
+		if (tb[TCA_RSVP_CLASSID]) {
+			f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]);
 			tcf_bind_filter(tp, &f->res, base);
 		}

@@ -461,7 +461,7 @@ static int rsvp_change(struct tcf_proto
 	err = -EINVAL;
 	if (handle)
 		goto errout2;
-	if (tb[TCA_RSVP_DST-1] == NULL)
+	if (tb[TCA_RSVP_DST] == NULL)
 		goto errout2;

 	err = -ENOBUFS;
@@ -470,19 +470,19 @@ static int rsvp_change(struct tcf_proto
 		goto errout2;

 	h2 = 16;
-	if (tb[TCA_RSVP_SRC-1]) {
-		memcpy(f->src, nla_data(tb[TCA_RSVP_SRC-1]), sizeof(f->src));
+	if (tb[TCA_RSVP_SRC]) {
+		memcpy(f->src, nla_data(tb[TCA_RSVP_SRC]), sizeof(f->src));
 		h2 = hash_src(f->src);
 	}
-	if (tb[TCA_RSVP_PINFO-1]) {
-		pinfo = nla_data(tb[TCA_RSVP_PINFO-1]);
+	if (tb[TCA_RSVP_PINFO]) {
+		pinfo = nla_data(tb[TCA_RSVP_PINFO]);
 		f->spi = pinfo->spi;
 		f->tunnelhdr = pinfo->tunnelhdr;
 	}
-	if (tb[TCA_RSVP_CLASSID-1])
-		f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID-1]);
+	if (tb[TCA_RSVP_CLASSID])
+		f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]);

-	dst = nla_data(tb[TCA_RSVP_DST-1]);
+	dst = nla_data(tb[TCA_RSVP_DST]);
 	h1 = hash_dst(dst, pinfo ? pinfo->protocol : 0, pinfo ? pinfo->tunnelid
: 0);

 	err = -ENOMEM;
@@ -641,8 +641,7 @@ nla_put_failure:
 	return -1;
 }

-static struct tcf_proto_ops RSVP_OPS = {
-	.next		=	NULL,
+static struct tcf_proto_ops RSVP_OPS __read_mostly = {
 	.kind		=	RSVP_ID,
 	.classify	=	rsvp_classify,
 	.init		=	rsvp_init,

^ permalink raw reply

* [PATCH] cls_rsvp.h was outdated
From: "Igor Maravić" @ 2011-08-30 13:12 UTC (permalink / raw)
  To: netdev; +Cc: linux-kernel

File cls_rsvp.h in /net/sched was outdated. I'm sending you patch for this
file.

Sorry for the word-wrap in previous messages

Signed-off-by: Igor Maravić <igorm@etf.rs>

---

--- linux-2.6.38.8/net/sched/cls_rsvp.h.orig	2011-08-30 12:46:42.663443918 +0200
+++ linux-2.6.38.8/net/sched/cls_rsvp.h	2011-08-30 13:45:02.135445119 +0200
@@ -425,7 +425,7 @@ static int rsvp_change(struct tcf_proto
 	struct rsvp_filter *f, **fp;
 	struct rsvp_session *s, **sp;
 	struct tc_rsvp_pinfo *pinfo = NULL;
-	struct nlattr *opt = tca[TCA_OPTIONS-1];
+	struct nlattr *opt = tca[TCA_OPTIONS];
 	struct nlattr *tb[TCA_RSVP_MAX + 1];
 	struct tcf_exts e;
 	unsigned h1, h2;
@@ -439,7 +439,7 @@ static int rsvp_change(struct tcf_proto
 	if (err < 0)
 		return err;

-	err = tcf_exts_validate(tp, tb, tca[TCA_RATE-1], &e, &rsvp_ext_map);
+	err = tcf_exts_validate(tp, tb, tca[TCA_RATE], &e, &rsvp_ext_map);
 	if (err < 0)
 		return err;

@@ -448,8 +448,8 @@ static int rsvp_change(struct tcf_proto

 		if (f->handle != handle && handle)
 			goto errout2;
-		if (tb[TCA_RSVP_CLASSID-1]) {
-			f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID-1]);
+		if (tb[TCA_RSVP_CLASSID]) {
+			f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]);
 			tcf_bind_filter(tp, &f->res, base);
 		}

@@ -461,7 +461,7 @@ static int rsvp_change(struct tcf_proto
 	err = -EINVAL;
 	if (handle)
 		goto errout2;
-	if (tb[TCA_RSVP_DST-1] == NULL)
+	if (tb[TCA_RSVP_DST] == NULL)
 		goto errout2;

 	err = -ENOBUFS;
@@ -470,19 +470,19 @@ static int rsvp_change(struct tcf_proto
 		goto errout2;

 	h2 = 16;
-	if (tb[TCA_RSVP_SRC-1]) {
-		memcpy(f->src, nla_data(tb[TCA_RSVP_SRC-1]), sizeof(f->src));
+	if (tb[TCA_RSVP_SRC]) {
+		memcpy(f->src, nla_data(tb[TCA_RSVP_SRC]), sizeof(f->src));
 		h2 = hash_src(f->src);
 	}
-	if (tb[TCA_RSVP_PINFO-1]) {
-		pinfo = nla_data(tb[TCA_RSVP_PINFO-1]);
+	if (tb[TCA_RSVP_PINFO]) {
+		pinfo = nla_data(tb[TCA_RSVP_PINFO]);
 		f->spi = pinfo->spi;
 		f->tunnelhdr = pinfo->tunnelhdr;
 	}
-	if (tb[TCA_RSVP_CLASSID-1])
-		f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID-1]);
+	if (tb[TCA_RSVP_CLASSID])
+		f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]);

-	dst = nla_data(tb[TCA_RSVP_DST-1]);
+	dst = nla_data(tb[TCA_RSVP_DST]);
 	h1 = hash_dst(dst, pinfo ? pinfo->protocol : 0, pinfo ? pinfo->tunnelid : 0);

 	err = -ENOMEM;
@@ -641,8 +641,7 @@ nla_put_failure:
 	return -1;
 }

-static struct tcf_proto_ops RSVP_OPS = {
-	.next		=	NULL,
+static struct tcf_proto_ops RSVP_OPS __read_mostly = {
 	.kind		=	RSVP_ID,
 	.classify	=	rsvp_classify,
 	.init		=	rsvp_init,

^ permalink raw reply

* Re: [PATCH] bridge: netfilter: work around shared nfct struct
From: Florian Westphal @ 2011-08-30 13:19 UTC (permalink / raw)
  To: Patrick McHardy; +Cc: Florian Westphal, netfilter-devel, netdev
In-Reply-To: <4E5CE0BD.7040103@trash.net>

Patrick McHardy <kaber@trash.net> wrote:
> On 30.08.2011 14:54, Florian Westphal wrote:
> > Patrick McHardy <kaber@trash.net> wrote:
> >> On 30.08.2011 12:57, Florian Westphal wrote:
> >>> When incoking iptables hooks from bridge netfilter, the assumption
> >>> that non-confirmed skb->nfct is never shared does no longer hold,
> >>> as bridge code clones skbs when e.g. forwarding packets to multiple
> >>> bridge ports.
> >>>
> >>> When NFQUEUE is used, we can BUG because nf_nat_setup_info can be
> >>> invoked simultaneously for the same conntrack:
> >>
> >> I'm wondering how this can happen, when flooding packets to multiple
> >> ports, they are still processed by the same CPU one after another,
> >> so for the second and further packets, nf_nat should notice that
> >> the mappings are already set up.
> > 
> > Main problem is that we end up with same ->nfct in both
> > INPUT and POSTROUTING (br_pass_frame_up vs. br_forward).
> > 
> > its extremely unlikely but reproduceable with something like
> > hping2 -i u1200 -2 -p 138 -d 128 192.168.0.255
> > 
> > (assuming bridge interface has an address within that network).
> > 
> > Also, with recent change nf_reinject can be run in parallel.
> > (the original problem was observed on 2.6.32.24, but i can
> >  reproduce it with nf-next, too).
> 
> I see. We still need to avoid the module dependency on nf_conntrack
> though, so I think this will have to be fixed in nf_nat_fn().

Right, I failed to spot the call to the destroy hook 8-/

I'll submit an alternate patch shortly.

^ permalink raw reply

* [PATCH v2] tcp: Change possible SYN flooding messages
From: Eric Dumazet @ 2011-08-30 13:21 UTC (permalink / raw)
  To: Tom Herbert, David Miller; +Cc: netdev
In-Reply-To: <alpine.DEB.2.00.1108102229130.5341@pokey.mtv.corp.google.com>

"Possible SYN flooding on port xxxx " messages can fill logs on servers.

Change logic to log the message only once per listener, and add two new
SNMP counters to track :

TCPReqQFullDoCookies : number of times a SYNCOOKIE was replied to client

TCPReqQFullDrop : number of times a SYN request was dropped because
syncookies were not enabled.

Based on a prior patch from Tom Herbert, and suggestions from David.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Tom Herbert <therbert@google.com>
---
 include/linux/snmp.h       |    2 +
 include/net/request_sock.h |    3 +-
 include/net/tcp.h          |    3 ++
 net/ipv4/proc.c            |    2 +
 net/ipv4/tcp_ipv4.c        |   49 ++++++++++++++++++++---------------
 net/ipv6/tcp_ipv6.c        |   31 ++--------------------
 6 files changed, 40 insertions(+), 50 deletions(-)

diff --git a/include/linux/snmp.h b/include/linux/snmp.h
index 12b2b18..e16557a 100644
--- a/include/linux/snmp.h
+++ b/include/linux/snmp.h
@@ -231,6 +231,8 @@ enum
 	LINUX_MIB_TCPDEFERACCEPTDROP,
 	LINUX_MIB_IPRPFILTER, /* IP Reverse Path Filter (rp_filter) */
 	LINUX_MIB_TCPTIMEWAITOVERFLOW,		/* TCPTimeWaitOverflow */
+	LINUX_MIB_TCPREQQFULLDOCOOKIES,		/* TCPReqQFullDoCookies */
+	LINUX_MIB_TCPREQQFULLDROP,		/* TCPReqQFullDrop */
 	__LINUX_MIB_MAX
 };
 
diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index 99e6e19..4c0766e 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -96,7 +96,8 @@ extern int sysctl_max_syn_backlog;
  */
 struct listen_sock {
 	u8			max_qlen_log;
-	/* 3 bytes hole, try to use */
+	u8			synflood_warned;
+	/* 2 bytes hole, try to use */
 	int			qlen;
 	int			qlen_young;
 	int			clock_hand;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 149a415..e9b48b0 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -460,6 +460,9 @@ extern int tcp_write_wakeup(struct sock *);
 extern void tcp_send_fin(struct sock *sk);
 extern void tcp_send_active_reset(struct sock *sk, gfp_t priority);
 extern int tcp_send_synack(struct sock *);
+extern int tcp_syn_flood_action(struct sock *sk,
+				const struct sk_buff *skb,
+				const char *proto);
 extern void tcp_push_one(struct sock *, unsigned int mss_now);
 extern void tcp_send_ack(struct sock *sk);
 extern void tcp_send_delayed_ack(struct sock *sk);
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index b14ec7d..4bfad5d 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -254,6 +254,8 @@ static const struct snmp_mib snmp4_net_list[] = {
 	SNMP_MIB_ITEM("TCPDeferAcceptDrop", LINUX_MIB_TCPDEFERACCEPTDROP),
 	SNMP_MIB_ITEM("IPReversePathFilter", LINUX_MIB_IPRPFILTER),
 	SNMP_MIB_ITEM("TCPTimeWaitOverflow", LINUX_MIB_TCPTIMEWAITOVERFLOW),
+	SNMP_MIB_ITEM("TCPReqQFullDoCookies", LINUX_MIB_TCPREQQFULLDOCOOKIES),
+	SNMP_MIB_ITEM("TCPReqQFullDrop", LINUX_MIB_TCPREQQFULLDROP),
 	SNMP_MIB_SENTINEL
 };
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index b3f2611..c29912c 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -808,20 +808,38 @@ static void tcp_v4_reqsk_destructor(struct request_sock *req)
 	kfree(inet_rsk(req)->opt);
 }
 
-static void syn_flood_warning(const struct sk_buff *skb)
+/*
+ * Return 1 if a syncookie should be sent
+ */
+int tcp_syn_flood_action(struct sock *sk,
+			 const struct sk_buff *skb,
+			 const char *proto)
 {
-	const char *msg;
+	const char *msg = "Dropping request";
+	int want_cookie = 0;
+	struct listen_sock *lopt;
+
+
 
 #ifdef CONFIG_SYN_COOKIES
-	if (sysctl_tcp_syncookies)
+	if (sysctl_tcp_syncookies) {
 		msg = "Sending cookies";
-	else
+		want_cookie = 1;
+		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
+	} else
 #endif
-		msg = "Dropping request";
+		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
 
-	pr_info("TCP: Possible SYN flooding on port %d. %s.\n",
-				ntohs(tcp_hdr(skb)->dest), msg);
+	lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
+	if (!lopt->synflood_warned) {
+		lopt->synflood_warned = 1;
+		pr_info("%s: Possible SYN flooding on port %d. %s. "
+			" Check SNMP counters.\n",
+			proto, ntohs(tcp_hdr(skb)->dest), msg);
+	}
+	return want_cookie;
 }
+EXPORT_SYMBOL(tcp_syn_flood_action);
 
 /*
  * Save and compile IPv4 options into the request_sock if needed.
@@ -1235,11 +1253,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	__be32 saddr = ip_hdr(skb)->saddr;
 	__be32 daddr = ip_hdr(skb)->daddr;
 	__u32 isn = TCP_SKB_CB(skb)->when;
-#ifdef CONFIG_SYN_COOKIES
 	int want_cookie = 0;
-#else
-#define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */
-#endif
 
 	/* Never answer to SYNs send to broadcast or multicast */
 	if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
@@ -1250,14 +1264,9 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	 * evidently real one.
 	 */
 	if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
-		if (net_ratelimit())
-			syn_flood_warning(skb);
-#ifdef CONFIG_SYN_COOKIES
-		if (sysctl_tcp_syncookies) {
-			want_cookie = 1;
-		} else
-#endif
-		goto drop;
+		want_cookie = tcp_syn_flood_action(sk, skb, "TCP");
+		if (!want_cookie)
+			goto drop;
 	}
 
 	/* Accept backlog is full. If we have already queued enough
@@ -1303,9 +1312,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 		while (l-- > 0)
 			*c++ ^= *hash_location++;
 
-#ifdef CONFIG_SYN_COOKIES
 		want_cookie = 0;	/* not our kind of cookie */
-#endif
 		tmp_ext.cookie_out_never = 0; /* false */
 		tmp_ext.cookie_plus = tmp_opt.cookie_plus;
 	} else if (!tp->rx_opt.cookie_in_always) {
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 44a5859..12bdb9a 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -531,20 +531,6 @@ static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req,
 	return tcp_v6_send_synack(sk, req, rvp);
 }
 
-static inline void syn_flood_warning(struct sk_buff *skb)
-{
-#ifdef CONFIG_SYN_COOKIES
-	if (sysctl_tcp_syncookies)
-		printk(KERN_INFO
-		       "TCPv6: Possible SYN flooding on port %d. "
-		       "Sending cookies.\n", ntohs(tcp_hdr(skb)->dest));
-	else
-#endif
-		printk(KERN_INFO
-		       "TCPv6: Possible SYN flooding on port %d. "
-		       "Dropping request.\n", ntohs(tcp_hdr(skb)->dest));
-}
-
 static void tcp_v6_reqsk_destructor(struct request_sock *req)
 {
 	kfree_skb(inet6_rsk(req)->pktopts);
@@ -1179,11 +1165,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	struct tcp_sock *tp = tcp_sk(sk);
 	__u32 isn = TCP_SKB_CB(skb)->when;
 	struct dst_entry *dst = NULL;
-#ifdef CONFIG_SYN_COOKIES
 	int want_cookie = 0;
-#else
-#define want_cookie 0
-#endif
 
 	if (skb->protocol == htons(ETH_P_IP))
 		return tcp_v4_conn_request(sk, skb);
@@ -1192,14 +1174,9 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 		goto drop;
 
 	if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
-		if (net_ratelimit())
-			syn_flood_warning(skb);
-#ifdef CONFIG_SYN_COOKIES
-		if (sysctl_tcp_syncookies)
-			want_cookie = 1;
-		else
-#endif
-		goto drop;
+		want_cookie = tcp_syn_flood_action(sk, skb, "TCPv6");
+		if (!want_cookie)
+			goto drop;
 	}
 
 	if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
@@ -1249,9 +1226,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 		while (l-- > 0)
 			*c++ ^= *hash_location++;
 
-#ifdef CONFIG_SYN_COOKIES
 		want_cookie = 0;	/* not our kind of cookie */
-#endif
 		tmp_ext.cookie_out_never = 0; /* false */
 		tmp_ext.cookie_plus = tmp_opt.cookie_plus;
 	} else if (!tp->rx_opt.cookie_in_always) {

^ permalink raw reply related

* Re: 802.1Q VLAN random tag injected when vlan configured on forcedeth interface
From: Eric Dumazet @ 2011-08-30 13:23 UTC (permalink / raw)
  To: Ruslan N. Marchenko; +Cc: netdev
In-Reply-To: <20110830125111.GA28341@ruff.mobi>

Le mardi 30 août 2011 à 14:51 +0200, Ruslan N. Marchenko a écrit :
> Hi guys,
> I've faced with strange behaviour of 8021q driver: when enabling vlan subinterface on eth interface I'm getting ~50% packetloss due to packets are marked with incorrect tags (and eventually dropped by kernel since no vlans configured for such IDs).
> Scenario:
> [    0.476950] cpufreq-nforce2: No nForce2 chipset.
> [    1.519133] forcedeth: Reverse Engineered nForce ethernet driver. Version 0.64.
> [    1.519991] forcedeth 0000:00:0a.0: PCI INT A -> Link[LMAC] -> GSI 22 (level, low) -> IRQ 22
> [    1.520037] forcedeth 0000:00:0a.0: setting latency timer to 64
> [    1.586526] forcedeth 0000:00:0a.0: ifname eth0, PHY OUI 0x732 @ 3, addr 00:26:18:40:21:61
> [    1.586542] forcedeth 0000:00:0a.0: highdma csum pwrctl gbit lnktim msi desc-v3
> 
> modprobe 8021q
> 
> - network still works properly, packets are comming not marked at all.
> 
> ip li add link eth0 name vl6 type vlan id 6
> 
> - from this moment massive packetdrop starting to happen, almost half of the *incoming* packets are shown in tcpdump as 
> 14:15:52.859296 00:13:f7:1e:fe:e4 > 00:26:18:40:21:61, ethertype 802.1Q (0x8100), length 102: vlan 64, p 3, ethertype IPv4, [|ip]
> 14:15:56.869572 00:13:f7:1e:fe:e4 > 00:26:18:40:21:61, ethertype 802.1Q (0x8100), length 102: vlan 2112, p 7, ethertype IPv4, [|ip]
> 
> mostly only these two tags appears (64 & 2112). Moreover this happens as on native vlan level (pure ethernet) so on tagged subinterface (as if qinq double tagging) for properly tagged with ID 6 incomming packets.
> 
> I've tried disabling all offloads:
> 
> Offload parameters for eth0:
> rx-checksumming: off
> tx-checksumming: off
> scatter-gather: off
> tcp-segmentation-offload: off
> udp-fragmentation-offload: off
> generic-segmentation-offload: off
> generic-receive-offload: off
> large-receive-offload: off
> rx-vlan-offload: off
> tx-vlan-offload: off
> ntuple-filters: off
> receive-hashing: off
> 
> - doesn't have any effect.
> Once executing 
> ip li del vl6 type vlan
> misterious tags disappear and everything works smoothly. Don't know who injects that garbage into frames - 8021q or forcedeth driver :(
> Any ideas or suggestions to narrow the problem down?
> 
> Additional data.
> Link level data dump example for broken frame:
> 12:35:32.175523 00:13:f7:1e:fe:e4 > 00:26:18:40:21:61, ethertype 802.1Q (0x8100), length 102: vlan 2112, p 2, ethertype IPv4, [|ip]
>         0x0000:  0026 1840 2161 0013 f71e fee4 8100 4840
>         0x0010:  0800 4500 0054 7a12 0000 4001 eb0f
> 0x0C-0D - TPID: ethertype 802.1Q (0x8100) 
> 0x0E-0F - TCI (0100100001000000) PCP 010, CFI 0, VID 100001000000/0x840/2112
> 0x10-11 - ethertype IPv4
> normal ping reply follows, which appears untagged in 50% cases with vlan configured and 100% cases without.
> 
> Interface is plugged into openwrt box into non-switched (wan) gigabit port with vid 6 subinterface configured.
> 

What kernel version are you using ?

^ permalink raw reply

* Re: 802.1Q VLAN random tag injected when vlan configured on forcedeth interface
From: Ruslan N. Marchenko @ 2011-08-30 13:46 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: netdev
In-Reply-To: <1314710628.2935.22.camel@edumazet-HP-Compaq-6005-Pro-SFF-PC>

On Tue, Aug 30, 2011 at 03:23:48PM +0200, Eric Dumazet wrote:
> 
> What kernel version are you using ?
> 
Oh, sorry for missing it, it runs on 
Linux ruff.mobi 2.6.38-11-generic #48-Ubuntu SMP Fri Jul 29 19:05:14 UTC 2011 i686 i686 i386 GNU/Linux

Just fyi - the openwrt box to which it is connected is 
Linux OpenWrt 2.6.39.2 #2 Fri Aug 12 09:36:23 EEST 2011 mips GNU/Linux
although packet drop happens even if there're no vlans configured on remote side.

Regards,
Ruslan

^ permalink raw reply

* [patch 1/3 -next] 6LoWPAN: use kfree_skb() instead of kfree()
From: Dan Carpenter @ 2011-08-30 13:45 UTC (permalink / raw)
  To: Alexander Smirnov
  Cc: Dmitry Eremin-Solenikov, Sergey Lapin, David S. Miller,
	open list:IEEE 802.15.4 SUB..., open list:NETWORKING [GENERAL],
	kernel-janitors

Use kfree_skb() to free sbk_buffs.

Signed-off-by: Dan Carpenter <error27@gmail.com>

diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c
index cf304cc..8a9dbaa 100644
--- a/net/ieee802154/6lowpan.c
+++ b/net/ieee802154/6lowpan.c
@@ -674,7 +674,7 @@ lowpan_process_data(struct sk_buff *skb)
 							sizeof(hdr));
 	return lowpan_skb_deliver(skb, &hdr);
 drop:
-	kfree(skb);
+	kfree_skb(skb);
 	return -EINVAL;
 }
 

^ permalink raw reply related

* [patch 2/3 -next] 6LoWPAN: use the _safe version of list_for_each
From: Dan Carpenter @ 2011-08-30 13:46 UTC (permalink / raw)
  To: Alexander Smirnov
  Cc: Dmitry Eremin-Solenikov, Sergey Lapin, David S. Miller,
	open list:IEEE 802.15.4 SUB..., open list:NETWORKING [GENERAL],
	kernel-janitors

When we kfree(entry) that causes a use-after-free bug so we have to
use list_for_each_entry_safe() safe here.

Signed-off-by: Dan Carpenter <error27@gmail.com>
---
Curly parens are not needed here, but kernel style is to use them for
multi-line indent blocks.

diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c
index cf304cc..5dc0489 100644
--- a/net/ieee802154/6lowpan.c
+++ b/net/ieee802154/6lowpan.c
@@ -813,15 +813,17 @@ static void lowpan_dellink(struct net_device *dev, struct list_head *head)
 	struct lowpan_dev_info *lowpan_dev = lowpan_dev_info(dev);
 	struct net_device *real_dev = lowpan_dev->real_dev;
 	struct lowpan_dev_record *entry;
+	struct lowpan_dev_record *tmp;
 
 	ASSERT_RTNL();
 
 	mutex_lock(&lowpan_dev_info(dev)->dev_list_mtx);
-	list_for_each_entry(entry, &lowpan_devices, list)
+	list_for_each_entry_safe(entry, tmp, &lowpan_devices, list) {
 		if (entry->ldev == dev) {
 			list_del(&entry->list);
 			kfree(entry);
 		}
+	}
 	mutex_unlock(&lowpan_dev_info(dev)->dev_list_mtx);
 
 	mutex_destroy(&lowpan_dev_info(dev)->dev_list_mtx);

^ permalink raw reply related

* [patch 3/3 -next] 6LoWPAN: call dev_put() on error in lowpan_newlink()
From: Dan Carpenter @ 2011-08-30 13:51 UTC (permalink / raw)
  To: Alexander Smirnov
  Cc: Dmitry Eremin-Solenikov, Sergey Lapin, David S. Miller,
	open list:IEEE 802.15.4 SUB..., open list:NETWORKING [GENERAL],
	kernel-janitors

We should release the dev_hold() on error before returning here.

Signed-off-by: Dan Carpenter <error27@gmail.com>
---
All three of these patches were compile tested only, but this is the
one I'm not very certain of.  I've obviously tried to get it right,
but please review it carefully.

diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c
index 5dc0489..f0d1536 100644
--- a/net/ieee802154/6lowpan.c
+++ b/net/ieee802154/6lowpan.c
@@ -793,8 +793,11 @@ static int lowpan_newlink(struct net *src_net, struct net_device *dev,
 	mutex_init(&lowpan_dev_info(dev)->dev_list_mtx);
 
 	entry = kzalloc(sizeof(struct lowpan_dev_record), GFP_KERNEL);
-	if (!entry)
+	if (!entry) {
+		dev_put(real_dev);
+		lowpan_dev_info(dev)->real_dev = NULL;
 		return -ENOMEM;
+	}
 
 	entry->ldev = dev;
 

^ permalink raw reply related

* PROTECTED PROJECT!!
From: KimJr @ 2011-08-30  6:25 UTC (permalink / raw)


I want to discuss an important issue with you .
I write to know if this is your valid email.
Please, let me know if your email is still valid.
My valid Email:  ikimyu@9.cn

KimJr

^ permalink raw reply

* Re: BQL crap and wireless
From: Jim Gettys @ 2011-08-30 13:58 UTC (permalink / raw)
  To: Adrian Chadd
  Cc: Tom Herbert, Luis R. Rodriguez, Dave Taht, linux-wireless,
	Andrew McGregor, Matt Smith, Kevin Hayes, Derek Smithies,
	netdev-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <CAJ-Vmonwur-SXddNwjPEidCMqes+PwbRWFBddfdwTp2jOMu64g-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>

On 08/29/2011 11:42 PM, Adrian Chadd wrote:
> On 30 August 2011 11:34, Tom Herbert <therbert-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org> wrote:
>
>> The generalization of BQL would be to set the queue limit in terms of
>> a cost function implemented by the driver.  The cost function would
>> most likely be an estimate of time to transmit a packet.  So C(P)
>> could represent cost of a packet, sum(C(P) for P queued) is aggregate
>> cost of queue packets, and queue limit is the maximum cost sum.  For
>> wired Ethernet, number of bytes in packet might be a reasonable
>> function (although framing cost could be included, but I'm not sure
>> that would make a material difference).  For wireless, maybe the
>> function could be more complex possibly taking multicast, previous
>> history of transmission times, or other arbitrary characteristics of
>> the packet into account...
>>
>> I can post a new patch with this generalization if this is interesting.
> As I said before, I think this is the kind of thing the rate control
> code needs to get its dirty hands into.
>
> With 802.11 you have to care about the PHY side of things too, so your
> cost suddenly would include the PER for combinations of {remote node,
> antenna setup, TX rate, sub-frame length, aggregate length}, etc. Do
> you choose that up front and then match a cost to it, or do you
> involve the rate control code in deciding a "good enough" way of
> handling what's on the queue by making rate decisions, then implement
> random/weighted/etc drop of what's left? Do you do some weighted/etc
> drop beforehand in the face of congestion, then pass what's left to
> the rate control code, then discard the rest?
>
> C(P) is going to be quite variable - a full frame retransmit of a 4ms
> long aggregate frame is SUM(exponential backoff, grab the air,
> preamble, header, 4ms, etc. for each pass.)
>
It's not clear to me that doing heroic measures to compute the cost is
going to be worthwhile due to the rate at which the costs can change on
wireless; just getting into the rough ballpark may be enough. But
buffering algorithms and AQM algorithms are going to need an estimate of
the *time* it will take to transmit data, more than # of bytes or packets.

Ultimately, if the queue starts builds, we'll need an AQM algorithm to
control the buffer growth.

Hopefully we can start testing SFB and other possibilities in CeroWrt
soon; Kathleen Nichols and Van Jacobson have been making some progress
on an algorithm called "RED light" which is based on the observed
transfer rate as well.  The eBDP algorithm in debloat testing also
helps, which Van pointed us at late last year when this came up (though
John Linville says eBDP needs rework before it can go upstream). We
didn't want to start testing SFB and other options while we were aware
of other problems in the wireless driver itself; Andrew and Felix's work
with Dave have apparently brought that problem to a decent point. 
                - Jim

--
To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* [PATCH 0/9] update the stmmac to the version Aug_2011 (v2)
From: Giuseppe CAVALLARO @ 2011-08-30 14:20 UTC (permalink / raw)
  To: netdev; +Cc: Giuseppe Cavallaro

These patches update the driver adding new supports:
MMC (HW management counters), HW feature register and
debugfs support.

MMC counters are exposed via ethtool.

These also update the documentation adding a new section
for the debugging.

Deepak Sikri (1):
  stmmac: support wake up irq from external sources

Giuseppe Cavallaro (8):
  stmmac: remove the STBus bridge setting from the GMAC code
  stmmac: remove the mmc code
  stmmac: add MMC support exported via ethtool
  stmmac: export DMA TX/RX rings via debugfs.
  stmmac: rework the code to get the Synopsys ID
  stmmac: add HW DMA feature register
  stmmac: update the doc with new info about the driver's debug.
  stmmac: update the driver version (Aug_2011)

 Documentation/networking/stmmac.txt |   33 ++++-
 drivers/net/stmmac/Kconfig          |    8 +
 drivers/net/stmmac/Makefile         |    3 +-
 drivers/net/stmmac/common.h         |   46 +++++--
 drivers/net/stmmac/dwmac1000_core.c |   11 +-
 drivers/net/stmmac/dwmac1000_dma.c  |   14 +-
 drivers/net/stmmac/dwmac100_core.c  |   12 +--
 drivers/net/stmmac/dwmac_dma.h      |    1 +
 drivers/net/stmmac/mmc.h            |  131 ++++++++++++++++
 drivers/net/stmmac/mmc_core.c       |  265 ++++++++++++++++++++++++++++++++
 drivers/net/stmmac/stmmac.h         |    5 +-
 drivers/net/stmmac/stmmac_ethtool.c |  149 ++++++++++++++++--
 drivers/net/stmmac/stmmac_main.c    |  287 ++++++++++++++++++++++++++++++++++-
 13 files changed, 901 insertions(+), 64 deletions(-)
 create mode 100644 drivers/net/stmmac/mmc.h
 create mode 100644 drivers/net/stmmac/mmc_core.c

-- 
1.7.4.4

^ permalink raw reply

* [PATCH 1/9] stmmac: remove the STBus bridge setting from the GMAC code (v2)
From: Giuseppe CAVALLARO @ 2011-08-30 14:20 UTC (permalink / raw)
  To: netdev; +Cc: Giuseppe Cavallaro
In-Reply-To: <1314714064-29101-1-git-send-email-peppe.cavallaro@st.com>

This patch removes a piece of code (actually commented)
only useful for some ST platforms in the past.

This kind of setting now can be done by using the platform
callbacks provided in linux/stmmac.h (see the stmmac.txt for
further details).

Signed-off-by: Giuseppe Cavallaro <peppe.cavallaro@st.com>
---
 drivers/net/stmmac/dwmac1000_core.c |    3 ---
 1 files changed, 0 insertions(+), 3 deletions(-)

diff --git a/drivers/net/stmmac/dwmac1000_core.c b/drivers/net/stmmac/dwmac1000_core.c
index 0f63b3c..eea184a 100644
--- a/drivers/net/stmmac/dwmac1000_core.c
+++ b/drivers/net/stmmac/dwmac1000_core.c
@@ -37,9 +37,6 @@ static void dwmac1000_core_init(void __iomem *ioaddr)
 	value |= GMAC_CORE_INIT;
 	writel(value, ioaddr + GMAC_CONTROL);
 
-	/* STBus Bridge Configuration */
-	/*writel(0xc5608, ioaddr + 0x00007000);*/
-
 	/* Freeze MMC counters */
 	writel(0x8, ioaddr + GMAC_MMC_CTRL);
 	/* Mask GMAC interrupts */
-- 
1.7.4.4

^ permalink raw reply related

* [PATCH 2/9] stmmac: remove the mmc code (v2)
From: Giuseppe CAVALLARO @ 2011-08-30 14:20 UTC (permalink / raw)
  To: netdev; +Cc: Giuseppe Cavallaro
In-Reply-To: <1314714064-29101-1-git-send-email-peppe.cavallaro@st.com>

DWMAC Management Counters (MMC) are not fully support.
The minimal support added in the past allowed to
only disable counters (if present) and mask their
interrupts.
This patch prepares the driver to support the MMC
removing obsolete code.

Signed-off-by: Giuseppe Cavallaro <peppe.cavallaro@st.com>
---
 drivers/net/stmmac/common.h         |   11 -----------
 drivers/net/stmmac/dwmac1000_core.c |    2 --
 drivers/net/stmmac/dwmac100_core.c  |   11 -----------
 drivers/net/stmmac/stmmac_main.c    |    4 ----
 4 files changed, 0 insertions(+), 28 deletions(-)

diff --git a/drivers/net/stmmac/common.h b/drivers/net/stmmac/common.h
index 375ea19..290b97a 100644
--- a/drivers/net/stmmac/common.h
+++ b/drivers/net/stmmac/common.h
@@ -130,17 +130,6 @@ enum tx_dma_irq_status {
 #define MAC_ENABLE_TX		0x00000008	/* Transmitter Enable */
 #define MAC_RNABLE_RX		0x00000004	/* Receiver Enable */
 
-/* MAC Management Counters register */
-#define MMC_CONTROL		0x00000100	/* MMC Control */
-#define MMC_HIGH_INTR		0x00000104	/* MMC High Interrupt */
-#define MMC_LOW_INTR		0x00000108	/* MMC Low Interrupt */
-#define MMC_HIGH_INTR_MASK	0x0000010c	/* MMC High Interrupt Mask */
-#define MMC_LOW_INTR_MASK	0x00000110	/* MMC Low Interrupt Mask */
-
-#define MMC_CONTROL_MAX_FRM_MASK	0x0003ff8	/* Maximum Frame Size */
-#define MMC_CONTROL_MAX_FRM_SHIFT	3
-#define MMC_CONTROL_MAX_FRAME		0x7FF
-
 struct stmmac_desc_ops {
 	/* DMA RX descriptor ring initialization */
 	void (*init_rx_desc) (struct dma_desc *p, unsigned int ring_size,
diff --git a/drivers/net/stmmac/dwmac1000_core.c b/drivers/net/stmmac/dwmac1000_core.c
index eea184a..9ba9cae 100644
--- a/drivers/net/stmmac/dwmac1000_core.c
+++ b/drivers/net/stmmac/dwmac1000_core.c
@@ -37,8 +37,6 @@ static void dwmac1000_core_init(void __iomem *ioaddr)
 	value |= GMAC_CORE_INIT;
 	writel(value, ioaddr + GMAC_CONTROL);
 
-	/* Freeze MMC counters */
-	writel(0x8, ioaddr + GMAC_MMC_CTRL);
 	/* Mask GMAC interrupts */
 	writel(0x207, ioaddr + GMAC_INT_MASK);
 
diff --git a/drivers/net/stmmac/dwmac100_core.c b/drivers/net/stmmac/dwmac100_core.c
index 743a580..aacfc6e 100644
--- a/drivers/net/stmmac/dwmac100_core.c
+++ b/drivers/net/stmmac/dwmac100_core.c
@@ -70,17 +70,6 @@ static void dwmac100_dump_mac_regs(void __iomem *ioaddr)
 		readl(ioaddr + MAC_VLAN1));
 	pr_info("\tVLAN2 tag (offset 0x%x): 0x%08x\n", MAC_VLAN2,
 		readl(ioaddr + MAC_VLAN2));
-	pr_info("\n\tMAC management counter registers\n");
-	pr_info("\t MMC crtl (offset 0x%x): 0x%08x\n",
-		MMC_CONTROL, readl(ioaddr + MMC_CONTROL));
-	pr_info("\t MMC High Interrupt (offset 0x%x): 0x%08x\n",
-		MMC_HIGH_INTR, readl(ioaddr + MMC_HIGH_INTR));
-	pr_info("\t MMC Low Interrupt (offset 0x%x): 0x%08x\n",
-		MMC_LOW_INTR, readl(ioaddr + MMC_LOW_INTR));
-	pr_info("\t MMC High Interrupt Mask (offset 0x%x): 0x%08x\n",
-		MMC_HIGH_INTR_MASK, readl(ioaddr + MMC_HIGH_INTR_MASK));
-	pr_info("\t MMC Low Interrupt Mask (offset 0x%x): 0x%08x\n",
-		MMC_LOW_INTR_MASK, readl(ioaddr + MMC_LOW_INTR_MASK));
 }
 
 static void dwmac100_irq_status(void __iomem *ioaddr)
diff --git a/drivers/net/stmmac/stmmac_main.c b/drivers/net/stmmac/stmmac_main.c
index c6e567e..da11405 100644
--- a/drivers/net/stmmac/stmmac_main.c
+++ b/drivers/net/stmmac/stmmac_main.c
@@ -826,10 +826,6 @@ static int stmmac_open(struct net_device *dev)
 		pr_info("\tTX Checksum insertion supported\n");
 	netdev_update_features(dev);
 
-	/* Initialise the MMC (if present) to disable all interrupts. */
-	writel(0xffffffff, priv->ioaddr + MMC_HIGH_INTR_MASK);
-	writel(0xffffffff, priv->ioaddr + MMC_LOW_INTR_MASK);
-
 	/* Request the IRQ lines */
 	ret = request_irq(dev->irq, stmmac_interrupt,
 			 IRQF_SHARED, dev->name, dev);
-- 
1.7.4.4

^ permalink raw reply related

* [PATCH 3/9] stmmac: support wake up irq from external sources (v2)
From: Giuseppe CAVALLARO @ 2011-08-30 14:20 UTC (permalink / raw)
  To: netdev; +Cc: Deepak Sikri, Giuseppe Cavallaro
In-Reply-To: <1314714064-29101-1-git-send-email-peppe.cavallaro@st.com>

From: Deepak Sikri <deepak.sikri@st.com>

On some platforms e.g. SPEAr the wake up irq differs from the
GMAC interrupt source.
With this patch an external wake up irq can be passed through the
platform code and named as "eth_wake_irq".

In case the wake up interrupt is not passed from the platform
so the driver will continue to use the mac irq (ndev->irq)

Signed-off-by: Deepak Sikri <deepak.sikri@st.com>
Signed-off-by: Giuseppe Cavallaro <peppe.cavallaro@st.com>
---
 drivers/net/stmmac/stmmac.h         |    1 +
 drivers/net/stmmac/stmmac_ethtool.c |    4 ++--
 drivers/net/stmmac/stmmac_main.c    |   14 +++++++++++++-
 3 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/drivers/net/stmmac/stmmac.h b/drivers/net/stmmac/stmmac.h
index de1929b..619e3af 100644
--- a/drivers/net/stmmac/stmmac.h
+++ b/drivers/net/stmmac/stmmac.h
@@ -72,6 +72,7 @@ struct stmmac_priv {
 	spinlock_t lock;
 	int wolopts;
 	int wolenabled;
+	int wol_irq;
 #ifdef CONFIG_STMMAC_TIMER
 	struct stmmac_timer *tm;
 #endif
diff --git a/drivers/net/stmmac/stmmac_ethtool.c b/drivers/net/stmmac/stmmac_ethtool.c
index 7ed8fb6..79df79d 100644
--- a/drivers/net/stmmac/stmmac_ethtool.c
+++ b/drivers/net/stmmac/stmmac_ethtool.c
@@ -321,10 +321,10 @@ static int stmmac_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
 	if (wol->wolopts) {
 		pr_info("stmmac: wakeup enable\n");
 		device_set_wakeup_enable(priv->device, 1);
-		enable_irq_wake(dev->irq);
+		enable_irq_wake(priv->wol_irq);
 	} else {
 		device_set_wakeup_enable(priv->device, 0);
-		disable_irq_wake(dev->irq);
+		disable_irq_wake(priv->wol_irq);
 	}
 
 	spin_lock_irq(&priv->lock);
diff --git a/drivers/net/stmmac/stmmac_main.c b/drivers/net/stmmac/stmmac_main.c
index da11405..931cbf6 100644
--- a/drivers/net/stmmac/stmmac_main.c
+++ b/drivers/net/stmmac/stmmac_main.c
@@ -1512,7 +1512,7 @@ static int stmmac_mac_device_setup(struct net_device *dev)
 
 	if (device_can_wakeup(priv->device)) {
 		priv->wolopts = WAKE_MAGIC; /* Magic Frame as default */
-		enable_irq_wake(dev->irq);
+		enable_irq_wake(priv->wol_irq);
 	}
 
 	return 0;
@@ -1585,6 +1585,18 @@ static int stmmac_dvr_probe(struct platform_device *pdev)
 		pr_info("\tPMT module supported\n");
 		device_set_wakeup_capable(&pdev->dev, 1);
 	}
+	/*
+	 * On some platforms e.g. SPEAr the wake up irq differs from the mac irq
+	 * The external wake up irq can be passed through the platform code
+	 * named as "eth_wake_irq"
+	 *
+	 * In case the wake up interrupt is not passed from the platform
+	 * so the driver will continue to use the mac irq (ndev->irq)
+	 */
+	priv->wol_irq = platform_get_irq_byname(pdev, "eth_wake_irq");
+	if (priv->wol_irq == -ENXIO)
+		priv->wol_irq = ndev->irq;
+
 
 	platform_set_drvdata(pdev, ndev);
 
-- 
1.7.4.4

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox