Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH 4/5] netns netfilter: per-netns arp_tables FILTER
From: Alexey Dobriyan @ 2008-01-24 12:29 UTC (permalink / raw)
  To: kaber; +Cc: netdev, netfilter-devel, devel

Signed-off-by: Alexey Dobriyan <adobriyan@sw.ru>
---

 include/net/netns/ipv4.h             |    1 
 net/ipv4/netfilter/arptable_filter.c |   38 +++++++++++++++++++++++++----------
 2 files changed, 29 insertions(+), 10 deletions(-)

--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -32,6 +32,7 @@ struct netns_ipv4 {
 	struct xt_table		*iptable_filter;
 	struct xt_table		*iptable_mangle;
 	struct xt_table		*iptable_raw;
+	struct xt_table		*arptable_filter;
 #endif
 };
 #endif
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -20,7 +20,7 @@ static struct
 	struct arpt_replace repl;
 	struct arpt_standard entries[3];
 	struct arpt_error term;
-} initial_table __initdata = {
+} initial_table __net_initdata = {
 	.repl = {
 		.name = "filter",
 		.valid_hooks = FILTER_VALID_HOOKS,
@@ -45,7 +45,7 @@ static struct
 	.term = ARPT_ERROR_INIT,
 };
 
-static struct arpt_table __packet_filter = {
+static struct arpt_table packet_filter = {
 	.name		= "filter",
 	.valid_hooks	= FILTER_VALID_HOOKS,
 	.lock		= RW_LOCK_UNLOCKED,
@@ -53,7 +53,6 @@ static struct arpt_table __packet_filter = {
 	.me		= THIS_MODULE,
 	.af		= NF_ARP,
 };
-static struct arpt_table *packet_filter;
 
 /* The work comes in here from netfilter.c */
 static unsigned int arpt_hook(unsigned int hook,
@@ -62,7 +61,7 @@ static unsigned int arpt_hook(unsigned int hook,
 			      const struct net_device *out,
 			      int (*okfn)(struct sk_buff *))
 {
-	return arpt_do_table(skb, hook, in, out, packet_filter);
+	return arpt_do_table(skb, hook, in, out, init_net.ipv4.arptable_filter);
 }
 
 static struct nf_hook_ops arpt_ops[] __read_mostly = {
@@ -86,14 +85,33 @@ static struct nf_hook_ops arpt_ops[] __read_mostly = {
 	},
 };
 
+static int __net_init arptable_filter_net_init(struct net *net)
+{
+	/* Register table */
+	net->ipv4.arptable_filter =
+		arpt_register_table(net, &packet_filter, &initial_table.repl);
+	if (IS_ERR(net->ipv4.arptable_filter))
+		return PTR_ERR(net->ipv4.arptable_filter);
+	return 0;
+}
+
+static void __net_exit arptable_filter_net_exit(struct net *net)
+{
+	arpt_unregister_table(net->ipv4.arptable_filter);
+}
+
+static struct pernet_operations arptable_filter_net_ops = {
+	.init = arptable_filter_net_init,
+	.exit = arptable_filter_net_exit,
+};
+
 static int __init arptable_filter_init(void)
 {
 	int ret;
 
-	/* Register table */
-	packet_filter = arpt_register_table(&init_net, &__packet_filter, &initial_table.repl);
-	if (IS_ERR(packet_filter))
-		return PTR_ERR(packet_filter);
+	ret = register_pernet_subsys(&arptable_filter_net_ops);
+	if (ret < 0)
+		return ret;
 
 	ret = nf_register_hooks(arpt_ops, ARRAY_SIZE(arpt_ops));
 	if (ret < 0)
@@ -101,14 +119,14 @@ static int __init arptable_filter_init(void)
 	return ret;
 
 cleanup_table:
-	arpt_unregister_table(packet_filter);
+	unregister_pernet_subsys(&arptable_filter_net_ops);
 	return ret;
 }
 
 static void __exit arptable_filter_fini(void)
 {
 	nf_unregister_hooks(arpt_ops, ARRAY_SIZE(arpt_ops));
-	arpt_unregister_table(packet_filter);
+	unregister_pernet_subsys(&arptable_filter_net_ops);
 }
 
 module_init(arptable_filter_init);


^ permalink raw reply

* [PATCH 5/5] netns netfilter: put table module on netns stop
From: Alexey Dobriyan @ 2008-01-24 12:30 UTC (permalink / raw)
  To: kaber; +Cc: netdev, netfilter-devel, devel

When number of entries exceeds number of initial entries, foo-tables code
will pin table module. But during table unregister on netns stop,
that additional pin was forgotten.

Signed-off-by: Alexey Dobriyan <adobriyan@sw.ru>
---

 net/ipv4/netfilter/arp_tables.c |    3 +++
 net/ipv4/netfilter/ip_tables.c  |    3 +++
 net/ipv6/netfilter/ip6_tables.c |    3 +++
 3 files changed, 9 insertions(+)

--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -1773,6 +1773,7 @@ void arpt_unregister_table(struct arpt_table *table)
 {
 	struct xt_table_info *private;
 	void *loc_cpu_entry;
+	struct module *table_owner = table->me;
 
 	private = xt_unregister_table(table);
 
@@ -1780,6 +1781,8 @@ void arpt_unregister_table(struct arpt_table *table)
 	loc_cpu_entry = private->entries[raw_smp_processor_id()];
 	ARPT_ENTRY_ITERATE(loc_cpu_entry, private->size,
 			   cleanup_entry, NULL);
+	if (private->number > private->initial_entries)
+		module_put(table_owner);
 	xt_free_table_info(private);
 }
 
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -2095,12 +2095,15 @@ void ipt_unregister_table(struct xt_table *table)
 {
 	struct xt_table_info *private;
 	void *loc_cpu_entry;
+	struct module *table_owner = table->me;
 
 	private = xt_unregister_table(table);
 
 	/* Decrease module usage counts and free resources */
 	loc_cpu_entry = private->entries[raw_smp_processor_id()];
 	IPT_ENTRY_ITERATE(loc_cpu_entry, private->size, cleanup_entry, NULL);
+	if (private->number > private->initial_entries)
+		module_put(table_owner);
 	xt_free_table_info(private);
 }
 
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -2120,12 +2120,15 @@ void ip6t_unregister_table(struct xt_table *table)
 {
 	struct xt_table_info *private;
 	void *loc_cpu_entry;
+	struct module *table_owner = table->me;
 
 	private = xt_unregister_table(table);
 
 	/* Decrease module usage counts and free resources */
 	loc_cpu_entry = private->entries[raw_smp_processor_id()];
 	IP6T_ENTRY_ITERATE(loc_cpu_entry, private->size, cleanup_entry, NULL);
+	if (private->number > private->initial_entries)
+		module_put(table_owner);
 	xt_free_table_info(private);
 }
 


^ permalink raw reply

* Re: 2.6.24-rc8-mm1 : net tcp_input.c warnings
From: Kamalesh Babulal @ 2008-01-24 13:11 UTC (permalink / raw)
  To: Ilpo Järvinen
  Cc: Dave Young, Kamalesh Babulal, Krishna Kumar2,
	Denys Fedoryshchenko, David Miller, LKML, Netdev, Andrew Morton
In-Reply-To: <Pine.LNX.4.64.0801241108400.31652@kivilampi-30.cs.helsinki.fi>

On Thu, Jan 24, 2008 at 11:54:18AM +0200, Ilpo Järvinen wrote:
> On Thu, 24 Jan 2008, Dave Young wrote:
> 
> Hi Dave (& others),
> 
> > Thanks.
> 
> Thanks a lot, I was first to ignore all these because they occurred 
> with newreno, but looked again... :-/
> 
> > New warning trigged with your debug patch:
> 
> This was probably with the earlier one I sent to you because there's still 
> this case remaining which itself is valid:
> 
> > P: 5 L: 0 vs 0 S: 0 vs 1 w: 2044790889-2044796616 (0)
> 
> ...snip... this is still ok state (S+L <= P):
> 
> > P: 5 L: 0 vs 0 S: 0 vs 3 w: 2044790889-2044796616 (0)
> > TCP wq(s)      <
> > TCP wq(h) +++h+<
> > l0 s3 f0 p5 seq: su2044790889 hs2044795029 sn2044796616
> > ------------[ cut here ]------------
> > WARNING: at net/ipv4/tcp_input.c:2169 tcp_mark_head_lost+0x122/0x150()
> > Modules linked in: snd_seq_dummy snd_seq_oss snd_seq_midi_event
> > snd_seq snd_seq_device snd_pcm_oss snd_mixer_oss eeprom e100 psmouse
> > snd_hda_intel snd_pcm snd_timer btusb bluetooth serio_raw snd 3c59x sg
> > evdev thermal soundcore rtc_cmos snd_page_alloc rtc_core rtc_lib
> > i2c_i801 processor button intel_agp dcdbas pcspkr agpgart
> > Pid: 0, comm: swapper Not tainted 2.6.24-rc8-mm1 #8
> >  [<c0132100>] ? have_callable_console+0x20/0x30
> >  [<c0131844>] warn_on_slowpath+0x54/0x80
> >  [<c03ffe54>] ? tcp_print_queue+0x1a4/0x230
> >  [<c0132438>] ? vprintk+0x308/0x320
.
.
<snip>
.
.

> > ---[ end trace 14b601818e6903ac ]---
> 
> ...But this no longer is, and even more, L: 5 is not valid state at this 
> point all (should only happen if we went to RTO but it would reset S to 
> zero with newreno):
> 
> > P: 5 L: 5 vs 5 S: 0 vs 3 w: 2044790889-2044796616 (0)
> > TCP wq(s) LLLLl<
> > TCP wq(h) +++h+<
> > l5 s3 f0 p5 seq: su2044790889 hs2044795029 sn2044796616
> 
> Surprisingly, it was the first time the WARN_ON for left_out returned 
> correct location. This also explains why the patch I sent to Krishna 
> didn't print anything (it didn't end up into printing because I forgot 
> to add L+S>P check into to the state checking if).
> 
> ...so please, could you (others than Denys) try this patch, it should 
> solve the issue. And Denys, could you confirm (and if necessary double 
> check) that the kernel you saw this similar problem with is the pure 
> Linus' mainline, i.e., without any net-2.6.25 or mm bits please, if so, 
> that problem persists. And anyway, there were some fackets_out related 
> problems reported as well and this doesn't help for that but I think I've 
> lost track of who was seeing it due to large number of reports :-), could 
> somebody refresh my memory because I currently don't have time to dig it 
> up from archives (at least on this week).
> 
> 
> -- 
>  i.
> 
> --
> [PATCH] [TCP]: NewReno must count every skb while marking losses
> 
> NewReno should add cnt per skb (as with FACK) instead of depending
> on SACKED_ACKED bits which won't be set with it at all.
> Effectively, NewReno should always exists after the first
> iteration anyway (or immediately if there's already head in
> lost_out.
> 
> This was fixed earlier in net-2.6.25 but got reverted among other
> stuff and I didn't notice that this is still necessary (actually
> wasn't even considering this case while trying to figure out the
> reports because I lived with different kind of code than it in
> reality was).
> 
> This should solve the WARN_ONs in TCP code that as a result of
> this triggered multiple times in every place we check for this
> invariant.
> 
> Special thanks to Dave Young <hidave.darkstar@gmail.com> and
> Krishna Kumar2 <krkumar2@in.ibm.com> for trying with my debug
> patches.

Hi,

Thanks, after applying the patch the warning is not seen.


  Tested-by: Kamalesh Babulal <kamalesh@linux.vnet.ibm.com> 
> Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
> ---
>  net/ipv4/tcp_input.c |    2 +-
>  1 files changed, 1 insertions(+), 1 deletions(-)
> 
> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> index 295490e..aa409a5 100644
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -2156,7 +2156,7 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int fast_rexmit)
>  		tp->lost_skb_hint = skb;
>  		tp->lost_cnt_hint = cnt;
> 
> -		if (tcp_is_fack(tp) ||
> +		if (tcp_is_fack(tp) || tcp_is_reno(tp) ||
>  		    (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
>  			cnt += tcp_skb_pcount(skb);
> 
> -- 
> 1.5.2.2


-- 
Thanks & Regards,
Kamalesh Babulal,
Linux Technology Center,
IBM, ISTL.

^ permalink raw reply

* [PATCH net-2.6.25][NETNS]: Fix race between put_net() and netlink_kernel_create().
From: Pavel Emelyanov @ 2008-01-24 13:15 UTC (permalink / raw)
  To: David Miller; +Cc: Denis Lunev, Linux Netdev List, devel, Alexey Dobriyan

The comment about "race free view of the set of network 
namespaces" was a bit hasty. Look (there even can be only 
one CPU, as discovered by Alexey Dobriyan and Denis Lunev):

put_net()
  if (atomic_dec_and_test(&net->refcnt))
    /* true */
      __put_net(net);
        queue_work(...);

/*
 * note: the net now has refcnt 0, but still in
 * the global list of net namespaces
 */

== re-schedule ==

register_pernet_subsys(&some_ops);
  register_pernet_operations(&some_ops);
    (*some_ops)->init(net);
      /*
       * we call netlink_kernel_create() here
       * in some places
       */
      netlink_kernel_create();
         sk_alloc();
            get_net(net); /* refcnt = 1 */
         /*
          * now we drop the net refcount not to
          * block the net namespace exit in the
          * future (or this can be done on the 
          * error path)
          */
         put_net(sk->sk_net);
             if (atomic_dec_and_test(&...))
                   /*
                    * true. BOOOM! The net is
                    * scheduled for release twice
                    */

When thinking on this problem, I decided, that getting and
putting the net in init callback is wrong. If some init
callback needs to have a refcount-less reference on the struct
net, _it_ has to be careful himself, rather than relying on
the infrastructure to handle this correctly.

In case of netlink_kernel_create(), the problem is that the
sk_alloc() gets the given namespace, but passing the info
that we don't want to get it inside this call is too heavy.

Instead, I propose to crate the socket inside an init_net
namespace and then re-attach it to the desired one right
after the socket is created.

After doing this, we also have to be careful on error paths
not to drop the reference on the namespace, we didn't get
the one on.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Acked-by: Denis Lunev <den@openvz.org>

---

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 6b178e1..ff9fb6b 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1344,6 +1344,22 @@ static void netlink_data_ready(struct sock *sk, int len)
  *	queueing.
  */
 
+static void __netlink_release(struct sock *sk)
+{
+	/*
+	 * Last sock_put should drop referrence to sk->sk_net. It has already
+	 * been dropped in netlink_kernel_create. Taking referrence to stopping
+	 * namespace is not an option.
+	 * Take referrence to a socket to remove it from netlink lookup table
+	 * _alive_ and after that destroy it in the context of init_net.
+	 */
+
+	sock_hold(sk);
+	sock_release(sk->sk_socket);
+	sk->sk_net = get_net(&init_net);
+	sock_put(sk);
+}
+
 struct sock *
 netlink_kernel_create(struct net *net, int unit, unsigned int groups,
 		      void (*input)(struct sk_buff *skb),
@@ -1362,8 +1378,18 @@ netlink_kernel_create(struct net *net, int unit, unsigned int groups,
 	if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock))
 		return NULL;
 
-	if (__netlink_create(net, sock, cb_mutex, unit) < 0)
-		goto out_sock_release;
+	/*
+	 * We have to just have a reference on the net from sk, but don't
+	 * get_net it. Besides, we cannot get and then put the net here.
+	 * So we create one inside init_net and the move it to net.
+	 */
+
+	if (__netlink_create(&init_net, sock, cb_mutex, unit) < 0)
+		goto out_sock_release_nosk;
+
+	sk = sock->sk;
+	put_net(sk->sk_net);
+	sk->sk_net = net;
 
 	if (groups < 32)
 		groups = 32;
@@ -1372,7 +1398,6 @@ netlink_kernel_create(struct net *net, int unit, unsigned int groups,
 	if (!listeners)
 		goto out_sock_release;
 
-	sk = sock->sk;
 	sk->sk_data_ready = netlink_data_ready;
 	if (input)
 		nlk_sk(sk)->netlink_rcv = input;
@@ -1395,14 +1420,14 @@ netlink_kernel_create(struct net *net, int unit, unsigned int groups,
 		nl_table[unit].registered++;
 	}
 	netlink_table_ungrab();
-
-	/* Do not hold an extra referrence to a namespace as this socket is
-	 * internal to a namespace and does not prevent it to stop. */
-	put_net(net);
 	return sk;
 
 out_sock_release:
 	kfree(listeners);
+	__netlink_release(sk);
+	return NULL;
+
+out_sock_release_nosk:
 	sock_release(sock);
 	return NULL;
 }
@@ -1415,18 +1440,7 @@ netlink_kernel_release(struct sock *sk)
 	if (sk == NULL || sk->sk_socket == NULL)
 		return;
 
-	/*
-	 * Last sock_put should drop referrence to sk->sk_net. It has already
-	 * been dropped in netlink_kernel_create. Taking referrence to stopping
-	 * namespace is not an option.
-	 * Take referrence to a socket to remove it from netlink lookup table
-	 * _alive_ and after that destroy it in the context of init_net.
-	 */
-	sock_hold(sk);
-	sock_release(sk->sk_socket);
-
-	sk->sk_net = get_net(&init_net);
-	sock_put(sk);
+	__netlink_release(sk);
 }
 EXPORT_SYMBOL(netlink_kernel_release);
 

^ permalink raw reply related

* Re: [Bugme-new] [Bug 9808] New: system hung with htb QoS
From: Andrew Morton @ 2008-01-24 14:11 UTC (permalink / raw)
  To: netdev; +Cc: bilias, bugme-daemon, Auke Kok, Jesse Brandeburg
In-Reply-To: <bug-9808-10286@http.bugzilla.kernel.org/>

> On Thu, 24 Jan 2008 03:03:11 -0800 (PST) bugme-daemon@bugzilla.kernel.org wrote:
> http://bugzilla.kernel.org/show_bug.cgi?id=9808
> 
>            Summary: system hung with htb QoS
>            Product: Networking
>            Version: 2.5
>      KernelVersion: 2.6.23.9
>           Platform: All
>         OS/Version: Linux
>               Tree: Fedora
>             Status: NEW
>           Severity: normal
>           Priority: P1
>          Component: Netfilter/Iptables
>         AssignedTo: networking_netfilter-iptables@kernel-bugs.osdl.org
>         ReportedBy: bilias@edu.physics.uoc.gr
> 
> 
> Hi,
> 
> I've setup QoS on my ftp server to limit outgoing traffic. Apparently the
> server
> stops responding (no output no keyboard) in an unpredictable manner. Sometimes
> it 
> takes an hour, sometimes up to 4 days for the system to hung.
> 
> I have attached my QoS startup script, dmesg output,
> lspci -vvv, iptables that interact with QoS.
> 
> I'm also receiving this quite often:
> Jan 15 12:23:17 ftp kernel: e1000: eth0: e1000_clean_tx_irq: Detected Tx Unit
> Hang
> Jan 15 12:23:17 ftp kernel:   Tx Queue             <0>
> Jan 15 12:23:17 ftp kernel:   TDH                  <2a>
> Jan 15 12:23:17 ftp kernel:   TDT                  <17>
> Jan 15 12:23:17 ftp kernel:   next_to_use          <17>
> Jan 15 12:23:17 ftp kernel:   next_to_clean        <2a>
> Jan 15 12:23:17 ftp kernel: buffer_info[next_to_clean]
> Jan 15 12:23:17 ftp kernel:   time_stamp           <5798144>
> Jan 15 12:23:17 ftp kernel:   next_to_watch        <2d>
> Jan 15 12:23:17 ftp kernel:   jiffies              <57988ef>
> Jan 15 12:23:17 ftp kernel:   next_to_watch.status <0>
> Jan 15 12:23:19 ftp kernel: e1000: eth0: e1000_clean_tx_irq: Detected Tx Unit
> Hang
> 
> Today for the first time (after applying options to e1000 driver in
> modprobe.conf) I got a kernel panic:
> 
> BUG: unable to handle kernel paging request at virtual address a0379120
> EIP: 0060: [<c05db2dc>] Not Tainted VLI
> EIP is at ip_rcv+0x286/0x4ba
> Kernel panic - not syncing: Fatal exception in interrupt
> 
> This is what I wrote on paper cause there wasn't logged anywhere.
> Usually it hungs without a kernel panic.
> 
> System in Fedoca Core 8 up2date
> 2.6.23.9-85.fc8PAE
> 2x Intel(R) Xeon(TM) CPU 3.20GHz
> 4G RAM
> 
> Without the QoS loaded system never hungs. It must be related to this. However
> the e1000 error I'm receiving must have to do with the e1000 driver. I've seen
> this bug in the past that's why I tried to apply the options in modprobe.conf
> 
> any help will be appreciated
> thanx in advance 
> 
> Giannis
> 
> QoS startup script:
> # default WAN limit
> LIMIT="80mbit"
> LOW_LIMIT="50mbit"
> 
> start() {
>         echo -n "Starting QoS: (WAN limit set to ${LIMIT})"
> tc qdisc del dev eth0 root    2> /dev/null > /dev/null
> tc qdisc del dev eth0 ingress 2> /dev/null > /dev/null
> ADD_CLASS="tc class add dev eth0 "
> ###### uplink
> # install root HTB, point default traffic to 1:25
> tc qdisc add dev eth0 root handle 1: htb  default 25
> 
> tc class add dev eth0 parent 1: classid 1:1 htb rate 1000mbit
> # class for outgoing SYN packets + Minimize-Delay TOS
> ${ADD_CLASS} parent 1:1 classid 1:11 htb rate 2mbit ceil 5mbit prio 1
> # class for internal LAN traffic
> ${ADD_CLASS} parent 1:1 classid 1:12 htb rate 500mbit ceil 800mbit prio 2
> # class for WAN traffic
> ${ADD_CLASS} parent 1:1 classid 1:2 htb rate ${LIMIT} ceil ${LIMIT} prio 3
> # class for WAN http traffic
> ${ADD_CLASS} parent 1:2 classid 1:24 htb rate 30mbit ceil ${LIMIT} prio 4
> # default class, rest WAN traffic
> ${ADD_CLASS} parent 1:2 classid 1:25 htb rate 20mbit ceil ${LIMIT} prio 5
> 
> tc filter add dev eth0 protocol ip parent 1:0 prio 1 handle 1 fw flowid 1:11
> tc filter add dev eth0 protocol ip parent 1:0 prio 2 handle 2 fw flowid 1:12
> tc filter add dev eth0 protocol ip parent 1:0 prio 4 u32 \
>         match ip sport 80 0xffff flowid 1:24
> 
> tc qdisc add dev eth0 parent 1:11 handle 11: sfq perturb 10
> tc qdisc add dev eth0 parent 1:12 handle 12: sfq perturb 10
> tc qdisc add dev eth0 parent 1:24 handle 24: sfq perturb 10
> tc qdisc add dev eth0 parent 1:25 handle 25: sfq perturb 10
> 
>         echo
> }
> 
> stop() {
>         echo -n "Stopping QoS: "
>         tc qdisc del dev eth0 root    2> /dev/null > /dev/null
>         tc qdisc del dev eth0 ingress 2> /dev/null > /dev/null
>         echo
> }
> 
> -------------------
> QoS startup script: http://www.edu.physics.uoc.gr/~bilias/ftp/QoS
> dmesg: http://www.edu.physics.uoc.gr/~bilias/ftp/dmesg
> lspci -vvv: http://www.edu.physics.uoc.gr/~bilias/ftp/lspci
> iptables for QoS: http://www.edu.physics.uoc.gr/~bilias/ftp/iptables
> 
> modprobe.conf options for e1000: 
> options e1000 XsumRX=0 Speed=1000 Duplex=2 InterruptThrottleRate=0
> FlowControl=3 RxDescriptors=4096 TxDescriptors=4096 RxIntDelay=0 TxIntDelay=0
> 
> 
> -- 
> Configure bugmail: http://bugzilla.kernel.org/userprefs.cgi?tab=email
> ------- You are receiving this mail because: -------
> You are on the CC list for the bug, or are watching someone who is.

^ permalink raw reply

* Re: [PATCH net-2.6.25] Add packet filtering based on process's security context.
From: Paul Moore @ 2008-01-24 15:03 UTC (permalink / raw)
  To: Tetsuo Handa; +Cc: netdev, davem, linux-security-module, netfilter-devel
In-Reply-To: <200801242047.JEI35479.OJLFHMtOOFQFVS@I-love.SAKURA.ne.jp>

On Thursday 24 January 2008 6:47:55 am Tetsuo Handa wrote:
> Are there any remaining questions/problems about this patch?
> If none, I want this patch applied to net-2.6.25 tree.

Hello,

Taking into consideration that there are no current in-tree users of 
this patch and the only known user of this functionality is TOMOYO, 
which is still dealing with some unresolved VFS issues, I suggest not 
merging this patch at the current time.  My recommendation is to 
continue to work on resolving the VFS issues (which it appears you are 
working on) and then submitting all of the required TOMOYO changes at 
once.

As a general rule, removing functionality from the kernel tends to be 
much more difficult then adding it.

-- 
paul moore
linux security @ hp

^ permalink raw reply

* [NET]: should explicitely initialize atomic_t field in struct dst_ops
From: Eric Dumazet @ 2008-01-24 15:11 UTC (permalink / raw)
  To: David Miller; +Cc: netdev@vger.kernel.org

All but one struct dst_ops static initializations miss explicit
initialization of entries field.

As this field is atomic_t, we should use ATOMIC_INIT(0), and not
rely on atomic_t implementation.

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>

 net/ipv4/route.c        |    2 ++
 net/ipv4/xfrm4_policy.c |    1 +
 net/ipv6/route.c        |    2 ++
 net/ipv6/xfrm6_policy.c |    1 +
 4 files changed, 6 insertions(+)

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 896c768..163086b 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -169,6 +169,7 @@ static struct dst_ops ipv4_dst_ops = {
 	.update_pmtu =		ip_rt_update_pmtu,
 	.local_out =		ip_local_out,
 	.entry_size =		sizeof(struct rtable),
+	.entries =		ATOMIC_INIT(0),
 };
 
 #define ECN_OR_COST(class)	TC_PRIO_##class
@@ -2498,6 +2499,7 @@ static struct dst_ops ipv4_dst_blackhole_ops = {
 	.check			=	ipv4_dst_check,
 	.update_pmtu		=	ipv4_rt_blackhole_update_pmtu,
 	.entry_size		=	sizeof(struct rtable),
+	.entries		=	ATOMIC_INIT(0),
 };
 
 
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 3783e3e..10ed704 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -247,6 +247,7 @@ static struct dst_ops xfrm4_dst_ops = {
 	.local_out =		__ip_local_out,
 	.gc_thresh =		1024,
 	.entry_size =		sizeof(struct xfrm_dst),
+	.entries =		ATOMIC_INIT(0),
 };
 
 static struct xfrm_policy_afinfo xfrm4_policy_afinfo = {
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 4004c5f..8d669bb 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -107,6 +107,7 @@ static struct dst_ops ip6_dst_ops = {
 	.update_pmtu		=	ip6_rt_update_pmtu,
 	.local_out		=	ip6_local_out,
 	.entry_size		=	sizeof(struct rt6_info),
+	.entries		=	ATOMIC_INIT(0),
 };
 
 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
@@ -120,6 +121,7 @@ static struct dst_ops ip6_dst_blackhole_ops = {
 	.check			=	ip6_dst_check,
 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
 	.entry_size		=	sizeof(struct rt6_info),
+	.entries		=	ATOMIC_INIT(0),
 };
 
 struct rt6_info ip6_null_entry = {
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index c25a6b5..7d20199 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -272,6 +272,7 @@ static struct dst_ops xfrm6_dst_ops = {
 	.local_out =		__ip6_local_out,
 	.gc_thresh =		1024,
 	.entry_size =		sizeof(struct xfrm_dst),
+	.entries =		ATOMIC_INIT(0),
 };
 
 static struct xfrm_policy_afinfo xfrm6_policy_afinfo = {

^ permalink raw reply related

* Re: [PATCH] UCC TDM driver for QE based MPC83xx platforms.
From: Timur Tabi @ 2008-01-24 15:39 UTC (permalink / raw)
  To: Stephen Rothwell
  Cc: Poonam_Aggrwal-b10812, kumar.gala, akpm, linux-kernel, netdev,
	rubini, linuxppc-dev, michael.barkowski, rich.cutler,
	ashish.kalra
In-Reply-To: <20080124171920.49d805cb.sfr@canb.auug.org.au>

Stephen Rothwell wrote:

>> +	tdm_ctrl[device_num]->ut_info->uf_info.tdm_tx_clk =
>> +			(char *) of_get_property(np, "fsl,tdm-tx-clk", NULL);
>                                 ^
> We don't normall put spaces here.

Since when?

-- 
Timur Tabi
Linux kernel developer at Freescale

^ permalink raw reply

* Re: [PATCH UCC TDM 1/3 Updated] Platform changes for UCC TDM driver for MPC8323eRDB. Also includes related QE changes and dts entries.
From: Anton Vorontsov @ 2008-01-24 15:48 UTC (permalink / raw)
  To: Poonam_Aggrwal-b10812
  Cc: kumar.gala, akpm, linux-kernel, netdev, rubini, linuxppc-dev,
	michael.barkowski, rich.cutler, ashish.kalra, timur
In-Reply-To: <Pine.LNX.4.64.0801241553270.4984@linux121>

Hello Poonam,

On Thu, Jan 24, 2008 at 04:00:06PM +0530, Poonam_Aggrwal-b10812 wrote:
> Thanks Stephen for your comments, incorporated them.
> From: Poonam Aggrwal <b10812@freescale.com>
> 
> This patch makes necessary changes in the QE and UCC framework to support 
> TDM. It also adds support to configure the BRG properly through device 
> tree entries. Includes the device tree changes for UCC TDM driver as well.
> It also includes device tree entries for UCC TDM driver.
> 
> Tested on MPC8323ERDB platform.
> 
> Signed-off-by: Poonam Aggrwal <b10812@freescale.com>
> Signed-off-by: Ashish Kalra <ashish.kalra@freescale.com>
> Signed-off-by: Kim Phillips <Kim.Phillips@freescale.com>
> Signed-off-by: Michael Barkowski <michael.barkowski@freescale.com>
> ---
>  arch/powerpc/boot/dts/mpc832x_rdb.dts |   58 +++++++
>  arch/powerpc/sysdev/qe_lib/qe.c       |  184 +++++++++++++++++++++--
>  arch/powerpc/sysdev/qe_lib/ucc.c      |  265 +++++++++++++++++++++++++++++++++
>  arch/powerpc/sysdev/qe_lib/ucc_fast.c |   37 +++++
>  include/asm-powerpc/qe.h              |    8 +
>  include/asm-powerpc/ucc.h             |    4 +
>  include/asm-powerpc/ucc_fast.h        |    4 +
>  7 files changed, 548 insertions(+), 12 deletions(-)
> 
> diff --git a/arch/powerpc/boot/dts/mpc832x_rdb.dts b/arch/powerpc/boot/dts/mpc832x_rdb.dts
> index 388c8a7..c0e6283 100644
> --- a/arch/powerpc/boot/dts/mpc832x_rdb.dts
> +++ b/arch/powerpc/boot/dts/mpc832x_rdb.dts
> @@ -105,6 +105,17 @@
>  			device_type = "par_io";
>  			num-ports = <7>;
>  
> +			ucc1pio:ucc_pin@01 {
> +				pio-map = <
> +			/* port  pin  dir  open_drain  assignment  has_irq */
> +					0  e  2  0  1  0	/* CLK11 */
> +					3 16  1  0  2  0	/* BRG9 */
> +					3 1b  1  0  2  0	/* BRG3 */
> +					0  0  3  0  2  0	/* TDMATxD0 */
> +					0  4  3  0  2  0	/* TDMARxD0 */
> +					3 1b  2  0  1  0>;	/* CLK1 */
> +			};
> +

Can we not introduce new pio-maps in the device trees? There
were debates regarding this, and if I understood everything
correctly, pio-maps considered as a bad taste. Better
do bunch of par_io_config_pin() in the board file. Better
yet fixup the firmware (u-boot) to set up dedicated pins
correctly.


Thanks,

-- 
Anton Vorontsov
email: cbou@mail.ru
backup email: ya-cbou@yandex.ru
irc://irc.freenode.net/bd2

^ permalink raw reply

* [PATCH net-2.6.25][IPV6]: Introduce the INET6_TW_MATCH() macro.
From: Pavel Emelyanov @ 2008-01-24 15:54 UTC (permalink / raw)
  To: David Miller; +Cc: Linux Netdev List, devel

We have INET_MATCH, INET_TW_MATCH and INET6_MATCH to test
sockets and twbuckets for matching, but ipv6 twbuckets are
tested manually.

Here's the INET6_TW_MATCH to help with it.

Since the commit b3652b2dc5ec6ccd946ae9136b30c6babb81305a
    [IPV6]: Mischecked tw match in __inet6_check_established.
is accepted this patch will not break the __inet6_check_est.
logic.

This will also help with per-namespace socket lookup patches
in the nearest future.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>

---

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 5d35a4c..c347860 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -465,6 +465,14 @@ static inline struct raw6_sock *raw6_sk(const struct sock *sk)
 	 ipv6_addr_equal(&inet6_sk(__sk)->rcv_saddr, (__daddr))	&& \
 	 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
 
+#define INET6_TW_MATCH(__sk, __hash, __saddr, __daddr, __ports, __dif) \
+	(((__sk)->sk_hash == (__hash))					&& \
+	 (*((__portpair *)&(inet_twsk(__sk)->tw_dport)) == (__ports))	&& \
+	 ((__sk)->sk_family	       == PF_INET6)			&& \
+	 (ipv6_addr_equal(&inet6_twsk(__sk)->tw_v6_daddr, (__saddr)))	&& \
+	 (ipv6_addr_equal(&inet6_twsk(__sk)->tw_v6_rcv_saddr, (__daddr))) && \
+	 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
+
 #endif /* __KERNEL__ */
 
 #endif /* _IPV6_H */
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index d0b3447..06b01be 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -80,17 +80,8 @@ struct sock *__inet6_lookup_established(struct inet_hashinfo *hashinfo,
 	}
 	/* Must check for a TIME_WAIT'er before going to listener hash. */
 	sk_for_each(sk, node, &head->twchain) {
-		const struct inet_timewait_sock *tw = inet_twsk(sk);
-
-		if(*((__portpair *)&(tw->tw_dport))	== ports	&&
-		   sk->sk_family		== PF_INET6) {
-			const struct inet6_timewait_sock *tw6 = inet6_twsk(sk);
-
-			if (ipv6_addr_equal(&tw6->tw_v6_daddr, saddr)	&&
-			    ipv6_addr_equal(&tw6->tw_v6_rcv_saddr, daddr)	&&
-			    (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif))
-				goto hit;
-		}
+		if (INET6_TW_MATCH(sk, hash, saddr, daddr, ports, dif))
+			goto hit;
 	}
 	read_unlock(lock);
 	return NULL;
@@ -185,15 +176,9 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
 
 	/* Check TIME-WAIT sockets first. */
 	sk_for_each(sk2, node, &head->twchain) {
-		const struct inet6_timewait_sock *tw6 = inet6_twsk(sk2);
-
 		tw = inet_twsk(sk2);
 
-		if(*((__portpair *)&(tw->tw_dport)) == ports		 &&
-		   sk2->sk_family	       == PF_INET6	 &&
-		   ipv6_addr_equal(&tw6->tw_v6_daddr, saddr)	 &&
-		   ipv6_addr_equal(&tw6->tw_v6_rcv_saddr, daddr) &&
-		   sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
+		if (INET6_TW_MATCH(sk2, hash, saddr, daddr, ports, dif)) {
 			if (twsk_unique(sk, sk2, twp))
 				goto unique;
 			else

^ permalink raw reply related

* Re: [PATCH UCC TDM 1/3 Updated] Platform changes for UCC TDM driver for MPC8323eRDB. Also includes related QE changes and dts entries.
From: Timur Tabi @ 2008-01-24 15:55 UTC (permalink / raw)
  To: avorontsov
  Cc: Poonam_Aggrwal-b10812, kumar.gala, akpm, linux-kernel, netdev,
	rubini, linuxppc-dev, michael.barkowski, rich.cutler,
	ashish.kalra
In-Reply-To: <20080124154804.GA22178@localhost.localdomain>

Anton Vorontsov wrote:

> Can we not introduce new pio-maps in the device trees? There
> were debates regarding this, and if I understood everything
> correctly, pio-maps considered as a bad taste. Better
> do bunch of par_io_config_pin() in the board file. Better
> yet fixup the firmware (u-boot) to set up dedicated pins
> correctly.

I'm on the fence with respect to pio-maps vs. par_io_config_pin() calls.  The 
problem is that the configuration of these pins is board-specific, but pins are 
used by devices.  A device driver can't call par_io_config_pin(), because the 
calls are different depending on which SoC and which UCC you're using.  The 
platform code can't call par_io_config_pin(), because that configuration depends 
on which drivers are loaded.

In other words, the pin configurations are dependent on the UCC configurations, 
and the UCC configurations are stored in the device tree.  So it makes sense to 
put the pin configurations in the device tree, too.

-- 
Timur Tabi
Linux kernel developer at Freescale

^ permalink raw reply

* [VLAN] set_rx_mode support for unicast address list
From: Chris Leech @ 2008-01-24 16:07 UTC (permalink / raw)
  To: netdev, Patrick McHardy; +Cc: Chris Leech

Reuse the existing logic for multicast list synchronization for the unicast
address list. The core of dev_mc_sync/unsync are split out as
__dev_addr_sync/unsync and moved from dev_mcast.c to dev.c.  These are then
used to implement dev_unicast_sync/unsync as well.

I'm working on cleaning up Intel's FCoE stack, which generates new MAC
addresses from the fibre channel device id assigned by the fabric as per the
current draft specification in T11.  When using such a protocol in a VLAN
environment it would be nice to not always be forced into promiscuous mode,
assuming the underlying Ethernet driver supports multiple unicast addresses as
well.

Signed-off-by: Chris Leech <christopher.leech@intel.com>
---

 include/linux/netdevice.h |    4 ++
 net/8021q/vlan_dev.c      |    7 ++-
 net/core/dev.c            |   96 +++++++++++++++++++++++++++++++++++++++++++++
 net/core/dev_mcast.c      |   39 ++----------------
 4 files changed, 110 insertions(+), 36 deletions(-)


diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b0813c3..047d432 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1414,12 +1414,16 @@ extern void		dev_set_rx_mode(struct net_device *dev);
 extern void		__dev_set_rx_mode(struct net_device *dev);
 extern int		dev_unicast_delete(struct net_device *dev, void *addr, int alen);
 extern int		dev_unicast_add(struct net_device *dev, void *addr, int alen);
+extern int		dev_unicast_sync(struct net_device *to, struct net_device *from);
+extern void		dev_unicast_unsync(struct net_device *to, struct net_device *from);
 extern int 		dev_mc_delete(struct net_device *dev, void *addr, int alen, int all);
 extern int		dev_mc_add(struct net_device *dev, void *addr, int alen, int newonly);
 extern int		dev_mc_sync(struct net_device *to, struct net_device *from);
 extern void		dev_mc_unsync(struct net_device *to, struct net_device *from);
 extern int 		__dev_addr_delete(struct dev_addr_list **list, int *count, void *addr, int alen, int all);
 extern int		__dev_addr_add(struct dev_addr_list **list, int *count, void *addr, int alen, int newonly);
+extern int		__dev_addr_sync(struct dev_addr_list **to, int *to_count, struct dev_addr_list **from, int *from_count);
+extern void		__dev_addr_unsync(struct dev_addr_list **to, int *to_count, struct dev_addr_list **from, int *from_count);
 extern void		dev_set_promiscuity(struct net_device *dev, int inc);
 extern void		dev_set_allmulti(struct net_device *dev, int inc);
 extern void		netdev_state_change(struct net_device *dev);
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 8059fa4..77f04e4 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -563,6 +563,7 @@ static int vlan_dev_stop(struct net_device *dev)
 	struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
 
 	dev_mc_unsync(real_dev, dev);
+	dev_unicast_unsync(real_dev, dev);
 	if (dev->flags & IFF_ALLMULTI)
 		dev_set_allmulti(real_dev, -1);
 	if (dev->flags & IFF_PROMISC)
@@ -634,9 +635,10 @@ static void vlan_dev_change_rx_flags(struct net_device *dev, int change)
 		dev_set_promiscuity(real_dev, dev->flags & IFF_PROMISC ? 1 : -1);
 }
 
-static void vlan_dev_set_multicast_list(struct net_device *vlan_dev)
+static void vlan_dev_set_rx_mode(struct net_device *vlan_dev)
 {
 	dev_mc_sync(vlan_dev_info(vlan_dev)->real_dev, vlan_dev);
+	dev_unicast_sync(vlan_dev_info(vlan_dev)->real_dev, vlan_dev);
 }
 
 /*
@@ -702,7 +704,8 @@ void vlan_setup(struct net_device *dev)
 	dev->open		= vlan_dev_open;
 	dev->stop		= vlan_dev_stop;
 	dev->set_mac_address	= vlan_dev_set_mac_address;
-	dev->set_multicast_list	= vlan_dev_set_multicast_list;
+	dev->set_rx_mode	= vlan_dev_set_rx_mode;
+	dev->set_multicast_list	= vlan_dev_set_rx_mode;
 	dev->change_rx_flags	= vlan_dev_change_rx_flags;
 	dev->do_ioctl		= vlan_dev_ioctl;
 	dev->destructor		= free_netdev;
diff --git a/net/core/dev.c b/net/core/dev.c
index c9c593e..edaff27 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2962,6 +2962,102 @@ int dev_unicast_add(struct net_device *dev, void *addr, int alen)
 }
 EXPORT_SYMBOL(dev_unicast_add);
 
+int __dev_addr_sync(struct dev_addr_list **to, int *to_count,
+		    struct dev_addr_list **from, int *from_count)
+{
+	struct dev_addr_list *da, *next;
+	int err = 0;
+
+	da = *from;
+	while (da != NULL) {
+		next = da->next;
+		if (!da->da_synced) {
+			err = __dev_addr_add(to, to_count,
+					     da->da_addr, da->da_addrlen, 0);
+			if (err < 0)
+				break;
+			da->da_synced = 1;
+			da->da_users++;
+		} else if (da->da_users == 1) {
+			__dev_addr_delete(to, to_count,
+					  da->da_addr, da->da_addrlen, 0);
+			__dev_addr_delete(from, from_count,
+					  da->da_addr, da->da_addrlen, 0);
+		}
+		da = next;
+	}
+	return err;
+}
+
+void __dev_addr_unsync(struct dev_addr_list **to, int *to_count,
+		       struct dev_addr_list **from, int *from_count)
+{
+	struct dev_addr_list *da, *next;
+
+	da = *from;
+	while (da != NULL) {
+		next = da->next;
+		if (da->da_synced) {
+			__dev_addr_delete(to, to_count,
+					  da->da_addr, da->da_addrlen, 0);
+			da->da_synced = 0;
+			__dev_addr_delete(from, from_count,
+					  da->da_addr, da->da_addrlen, 0);
+		}
+		da = next;
+	}
+}
+
+/**
+ *	dev_unicast_sync - Synchronize device's unicast list to another device
+ *	@to: destination device
+ *	@from: source device
+ *
+ *	Add newly added addresses to the destination device and release
+ *	addresses that have no users left. The source device must be
+ *	locked by netif_tx_lock_bh.
+ *
+ *	This function is intended to be called from the dev->set_rx_mode
+ *	function of layered software devices.
+ */
+int dev_unicast_sync(struct net_device *to, struct net_device *from)
+{
+	int err = 0;
+
+	netif_tx_lock_bh(to);
+	err = __dev_addr_sync(&to->uc_list, &to->uc_count,
+			      &from->uc_list, &from->uc_count);
+	if (!err)
+		__dev_set_rx_mode(to);
+	netif_tx_unlock_bh(to);
+	return err;
+}
+EXPORT_SYMBOL(dev_unicast_sync);
+
+/**
+ *	dev_unicast_unsync - Remove synchronized addresses from the destination
+ *			     device
+ *	@to: destination device
+ *	@from: source device
+ *
+ *	Remove all addresses that were added to the destination device by
+ *	dev_unicast_sync(). This function is intended to be called from the
+ *	dev->stop function of layered software devices.
+ */
+void dev_unicast_unsync(struct net_device *to, struct net_device *from)
+{
+	netif_tx_lock_bh(from);
+	netif_tx_lock_bh(to);
+
+	__dev_addr_unsync(&to->uc_list, &to->uc_count,
+			  &from->uc_list, &from->uc_count);
+	__dev_set_rx_mode(to);
+
+	netif_tx_unlock_bh(to);
+	netif_tx_unlock_bh(from);
+}
+EXPORT_SYMBOL(dev_unicast_unsync);
+
 static void __dev_addr_discard(struct dev_addr_list **list)
 {
 	struct dev_addr_list *tmp;
diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c
index cadbfbf..cec5825 100644
--- a/net/core/dev_mcast.c
+++ b/net/core/dev_mcast.c
@@ -113,32 +113,15 @@ int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl)
  * 	locked by netif_tx_lock_bh.
  *
  *	This function is intended to be called from the dev->set_multicast_list
- *	function of layered software devices.
+ *	or dev->set_rx_mode function of layered software devices.
  */
 int dev_mc_sync(struct net_device *to, struct net_device *from)
 {
-	struct dev_addr_list *da, *next;
 	int err = 0;
 
 	netif_tx_lock_bh(to);
-	da = from->mc_list;
-	while (da != NULL) {
-		next = da->next;
-		if (!da->da_synced) {
-			err = __dev_addr_add(&to->mc_list, &to->mc_count,
-					     da->da_addr, da->da_addrlen, 0);
-			if (err < 0)
-				break;
-			da->da_synced = 1;
-			da->da_users++;
-		} else if (da->da_users == 1) {
-			__dev_addr_delete(&to->mc_list, &to->mc_count,
-					  da->da_addr, da->da_addrlen, 0);
-			__dev_addr_delete(&from->mc_list, &from->mc_count,
-					  da->da_addr, da->da_addrlen, 0);
-		}
-		da = next;
-	}
+	err = __dev_addr_sync(&to->mc_list, &to->mc_count,
+			      &from->mc_list, &from->mc_count);
 	if (!err)
 		__dev_set_rx_mode(to);
 	netif_tx_unlock_bh(to);
@@ -160,23 +143,11 @@ EXPORT_SYMBOL(dev_mc_sync);
  */
 void dev_mc_unsync(struct net_device *to, struct net_device *from)
 {
-	struct dev_addr_list *da, *next;
-
 	netif_tx_lock_bh(from);
 	netif_tx_lock_bh(to);
 
-	da = from->mc_list;
-	while (da != NULL) {
-		next = da->next;
-		if (da->da_synced) {
-			__dev_addr_delete(&to->mc_list, &to->mc_count,
-					  da->da_addr, da->da_addrlen, 0);
-			da->da_synced = 0;
-			__dev_addr_delete(&from->mc_list, &from->mc_count,
-					  da->da_addr, da->da_addrlen, 0);
-		}
-		da = next;
-	}
+	__dev_addr_unsync(&to->mc_list, &to->mc_count,
+			  &from->mc_list, &from->mc_count);
 	__dev_set_rx_mode(to);
 
 	netif_tx_unlock_bh(to);


^ permalink raw reply related

* Re: [IPV4 0/9] TRIE performance patches
From: Stephen Hemminger @ 2008-01-24 16:18 UTC (permalink / raw)
  To: Robert Olsson; +Cc: Robert Olsson, David Miller, netdev
In-Reply-To: <18328.23597.205001.332244@robur.slu.se>

On Thu, 24 Jan 2008 10:36:45 +0100
Robert Olsson <Robert.Olsson@data.slu.se> wrote:

> 
> Stephen Hemminger writes:
> 
>  > Dumping by prefix is possible, but unless 32x slower. Dumping in
>  > address order is just as logical. Like I said, I'm investigating what
>  > quagga handles.
> 
>  How about taking a snapshot to in address order (as you did) to some 
>  allocated memory, returning from that memory in prefix order? This would 
>  solve the  -EBUSY too and give a consistent view of the routing table at
>  the time for the dump/snapshot.
> 
>  Cheers
> 					--ro

Snapshotting is going to work, because of scale and because the kernel can't
tell when application is going to come back.

-- 
Stephen Hemminger <stephen.hemminger@vyatta.com>

^ permalink raw reply

* Re: [PATCH UCC TDM 1/3 Updated] Platform changes for UCC TDM driver for MPC8323eRDB. Also includes related QE changes and dts entries.
From: Anton Vorontsov @ 2008-01-24 16:23 UTC (permalink / raw)
  To: Timur Tabi
  Cc: Poonam_Aggrwal-b10812, kumar.gala, akpm, linux-kernel, netdev,
	rubini, linuxppc-dev, michael.barkowski, rich.cutler,
	ashish.kalra
In-Reply-To: <4798B4F3.2010101@freescale.com>

On Thu, Jan 24, 2008 at 09:55:31AM -0600, Timur Tabi wrote:
> Anton Vorontsov wrote:
> 
> >Can we not introduce new pio-maps in the device trees? There
> >were debates regarding this, and if I understood everything
> >correctly, pio-maps considered as a bad taste. Better
> >do bunch of par_io_config_pin() in the board file. Better
> >yet fixup the firmware (u-boot) to set up dedicated pins
> >correctly.
> 
> I'm on the fence with respect to pio-maps vs. par_io_config_pin() calls.  
> The problem is that the configuration of these pins is board-specific, but 
> pins are used by devices.  A device driver can't call par_io_config_pin(), 
> because the calls are different depending on which SoC and which UCC you're 
> using.  The platform code can't call par_io_config_pin(), because that 
> configuration depends on which drivers are loaded.

Are you saying that TDM is sharing same pins with the other QE device,
and we can choose to use/not use some device depending on which driver
is loaded? I think this particular board and patch isn't that case.

Even if someday there will be the case when drivers are mutually
exclusive, i.e. presence of some driver should trigger pins
reconfiguration, then anyway this should be handled differently.

That is, we should not _register_ two mutually exclusive devices
in the first place, so drivers will not probe them. That's board
setup code authority, and pins configuration still should happen
there.

[ Irrelevant to UCCs and this particular case: lately I've
  encountered one interesting case of Par IO usage. FHCI USB needs
  switching between pin's dedicated functions and GPIO functions.
  So, firstly it is using pins as dedicated, and later (at the bus
  reset) driver turns them to act as GPIOs. This is still handled
  without pio-map though, via gpios = <> property for that driver. ]

> In other words, the pin configurations are dependent on the UCC 
> configurations, and the UCC configurations are stored in the device tree.  
> So it makes sense to put the pin configurations in the device tree, too.

In that particular case UCC configuration is static, for every UCC.
So, we can set up all pins in the firmware/board file.

Please correct me if I'm wrong.

-- 
Anton Vorontsov
email: cbou@mail.ru
backup email: ya-cbou@yandex.ru
irc://irc.freenode.net/bd2

^ permalink raw reply

* Re: [PATCH UCC TDM 1/3 Updated] Platform changes for UCC TDM driver for MPC8323eRDB. Also includes related QE changes and dts entries.
From: Timur Tabi @ 2008-01-24 16:33 UTC (permalink / raw)
  To: avorontsov
  Cc: Poonam_Aggrwal-b10812, kumar.gala, akpm, linux-kernel, netdev,
	rubini, linuxppc-dev, michael.barkowski, rich.cutler,
	ashish.kalra
In-Reply-To: <20080124162345.GA27359@localhost.localdomain>

Anton Vorontsov wrote:

> Are you saying that TDM is sharing same pins with the other QE device,
> and we can choose to use/not use some device depending on which driver
> is loaded?

No.  I'd have to closely examine the DTS, but I don't think that UCC devices 
share pins at all.  But that isn't my point.

> In that particular case UCC configuration is static, for every UCC.
> So, we can set up all pins in the firmware/board file.

Yes, but deciding what the UCC does might not be static.  At what point do we 
declare, "UCC5 is for eth0 and eth0 only"?

The advantage of putting the pin configurations in the device tree is that they 
now become configurable.  I can envision a scenario where UCC5 could be either 
an Ethernet or a UART, depending on the setting of some jumpers on the board. 
That's what the QE was designed for: any UCC can do any task, and you can even 
have a UCC change its purpose while the system is running.  So I don't want the 
pin configurations hard-coded into the kernel.  Having them in the device tree 
gives me some flexibility.

For instance, I have a plan (that I keep postponing) to introduce a new feature 
in U-Boot where U-Boot can determine the settings of some board jumpers and 
modify the device tree accordingly.  The instructions on how to modify the 
device tree would be embedded in the tree itself.  I can't support this feature 
if the kernel calls par_io_config_pin() regardless of what's in the device tree.

-- 
Timur Tabi
Linux kernel developer at Freescale

^ permalink raw reply

* Re: [Bugme-new] [Bug 9806] New: (tun dev) Impossible to deassert IFF_ONE_QUEUE or IFF_NO_PI
From: Nathaniel Filardo @ 2008-01-24 17:12 UTC (permalink / raw)
  To: maxk, vtun, netdev
In-Reply-To: <20080124003356.ec51432a.akpm@linux-foundation.org>

[-- Attachment #1: Type: text/plain, Size: 330 bytes --]

On Jan 24, 2008 3:33 AM, Andrew Morton <akpm@linux-foundation.org> wrote:
> > On Wed, 23 Jan 2008 13:13:13 -0800 (PST) bugme-daemon@bugzilla.kernel.org wrote:
> > http://bugzilla.kernel.org/show_bug.cgi?id=9806
> Thanks.   Could you please submit the patch via email?  Send it to
> all recipients of this email.

Attached.
--nwf;

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: tun.patch --]
[-- Type: text/x-diff; name=tun.patch, Size: 381 bytes --]

--- tun.c.orig	2008-01-23 16:09:25.000000000 -0500
+++ tun.c	2008-01-23 16:12:26.000000000 -0500
@@ -540,9 +540,13 @@
 
 	if (ifr->ifr_flags & IFF_NO_PI)
 		tun->flags |= TUN_NO_PI;
+	else
+		tun->flags &= ~TUN_NO_PI;
 
 	if (ifr->ifr_flags & IFF_ONE_QUEUE)
 		tun->flags |= TUN_ONE_QUEUE;
+	else
+		tun->flags &= ~TUN_ONE_QUEUE;
 
 	file->private_data = tun;
 	tun->attached = 1;

^ permalink raw reply

* Re: [XFRM]: constify 'struct xfrm_type'
From: Joe Perches @ 2008-01-24 17:17 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: David Miller, netdev@vger.kernel.org
In-Reply-To: <20080124122621.3248c651.dada1@cosmosbay.com>

On Thu, 2008-01-24 at 12:26 +0100, Eric Dumazet wrote:
> -	struct xfrm_type *type;
> +	const struct xfrm_type *type;

Perhaps const foo * const bar; for most or all of these
conversions?



^ permalink raw reply

* Re: [PATCH UCC TDM 1/3 Updated] Platform changes for UCC TDM driver for MPC8323eRDB. Also includes related QE changes and dts entries.
From: Anton Vorontsov @ 2008-01-24 17:23 UTC (permalink / raw)
  To: Timur Tabi
  Cc: Poonam_Aggrwal-b10812, kumar.gala, akpm, linux-kernel, netdev,
	rubini, linuxppc-dev, michael.barkowski, rich.cutler,
	ashish.kalra
In-Reply-To: <4798BDEB.2010501@freescale.com>

On Thu, Jan 24, 2008 at 10:33:47AM -0600, Timur Tabi wrote:
> Anton Vorontsov wrote:
> 
> >Are you saying that TDM is sharing same pins with the other QE device,
> >and we can choose to use/not use some device depending on which driver
> >is loaded?
> 
> No.  I'd have to closely examine the DTS, but I don't think that UCC 
> devices share pins at all.  But that isn't my point.
> 
> >In that particular case UCC configuration is static, for every UCC.
> >So, we can set up all pins in the firmware/board file.
> 
> Yes, but deciding what the UCC does might not be static.  At what point do 
> we declare, "UCC5 is for eth0 and eth0 only"?
> 
> The advantage of putting the pin configurations in the device tree is that 
> they now become configurable.  I can envision a scenario where UCC5 could 
> be either an Ethernet or a UART, depending on the setting of some jumpers 
> on the board. That's what the QE was designed for: any UCC can do any task, 
> and you can even have a UCC change its purpose while the system is running. 
> So I don't want the pin configurations hard-coded into the kernel.  Having 
> them in the device tree gives me some flexibility.

If hardware configuration is selected at the bootup time, by jumpers
or switches, it's even easier to do it right. Without pio-map.

> For instance, I have a plan (that I keep postponing) to introduce a new 
> feature in U-Boot where U-Boot can determine the settings of some board 
> jumpers and modify the device tree accordingly. The instructions on how to 
> modify the device tree would be embedded in the tree itself.

Why you need to modify the device tree for that? Let the U-Boot simply
setup pins for the kernel. Regarding kernel overwriting pins
configuration...

> I can't 
> support this feature if the kernel calls par_io_config_pin() regardless of 
> what's in the device tree.

What I've understood from the previous debates, is that ideally kernel
should not touch pins' configuration. Today we're using pio-map solely
to fix up some old firmware misconfiguration. And we can do this in the
board file still. To determine if we need to fixup the firmware or not,
we can use some device tree property instead (firmware version?).

p.s.
I'm neither for pio-map nor against. I just want some consequence
regarding this. Last thread ended with consequence that pio-map is a
bad thing to use...

-- 
Anton Vorontsov
email: cbou@mail.ru
backup email: ya-cbou@yandex.ru
irc://irc.freenode.net/bd2

^ permalink raw reply

* Re: [VLAN] set_rx_mode support for unicast address list
From: Patrick McHardy @ 2008-01-24 17:27 UTC (permalink / raw)
  To: Chris Leech; +Cc: netdev
In-Reply-To: <20080124160738.11781.23745.stgit@localhost.localdomain>

Chris Leech wrote:
> Reuse the existing logic for multicast list synchronization for the unicast
> address list. The core of dev_mc_sync/unsync are split out as
> __dev_addr_sync/unsync and moved from dev_mcast.c to dev.c.  These are then
> used to implement dev_unicast_sync/unsync as well.
> 
> I'm working on cleaning up Intel's FCoE stack, which generates new MAC
> addresses from the fibre channel device id assigned by the fabric as per the
> current draft specification in T11.  When using such a protocol in a VLAN
> environment it would be nice to not always be forced into promiscuous mode,
> assuming the underlying Ethernet driver supports multiple unicast addresses as
> well.


Looks good, applied. Thanks Chris.

^ permalink raw reply

* Re: [PATCH 1/5] netns netfilter: per-netns ip6tables
From: Patrick McHardy @ 2008-01-24 17:33 UTC (permalink / raw)
  To: Alexey Dobriyan; +Cc: netdev, netfilter-devel, devel
In-Reply-To: <20080124122328.GA27337@localhost.sw.ru>

Alexey Dobriyan wrote:
> * Propagate netns from userspace down to xt_find_table_lock()
> * Register ip6 tables in netns (modules still use init_net)
> 
> Signed-off-by: Alexey Dobriyan <adobriyan@sw.ru>
> ---
> 
>  include/linux/netfilter_ipv6/ip6_tables.h |    3 +
>  net/ipv6/netfilter/ip6_tables.c           |   50 +++++++++++++++---------------
>  net/ipv6/netfilter/ip6table_filter.c      |    2 -
>  net/ipv6/netfilter/ip6table_mangle.c      |    2 -
>  net/ipv6/netfilter/ip6table_raw.c         |    2 -
>  5 files changed, 31 insertions(+), 28 deletions(-)

This adds checkpatch warnings:

WARNING: line over 80 characters
#96: FILE: net/ipv6/netfilter/ip6_tables.c:1361:
+do_add_counters(struct net *net, void __user *user, unsigned int len, 
int compat)

WARNING: line over 80 characters
#229: FILE: net/ipv6/netfilter/ip6table_filter.c:135:
+       packet_filter = ip6t_register_table(&init_net, &__packet_filter, 
&initial_table.repl);

WARNING: line over 80 characters
#242: FILE: net/ipv6/netfilter/ip6table_mangle.c:167:
+       packet_mangler = ip6t_register_table(&init_net, 
&__packet_mangler, &initial_table.repl);

WARNING: line over 80 characters
#255: FILE: net/ipv6/netfilter/ip6table_raw.c:80:
+       packet_raw = ip6t_register_table(&init_net, &__packet_raw, 
&initial_table.repl);

ERROR: Missing Signed-off-by: line(s)

total: 1 errors, 4 warnings, 214 lines checked

I'll fix them up, lets hope that it doesn't cause clashes
with the following patches.

^ permalink raw reply

* Re: [PATCH 2/5] netns netfilter: per-netns IPv6 FILTER, MANGLE, RAW
From: Patrick McHardy @ 2008-01-24 17:40 UTC (permalink / raw)
  To: Alexey Dobriyan; +Cc: netdev, netfilter-devel, devel
In-Reply-To: <20080124122748.GB27337@localhost.sw.ru>

Alexey Dobriyan wrote:
> Now it's possible to list and manipulate per-netns ip6tables rules.
> Filtering decisions are based on init_net's table so far.
> 
> P.S.: remove init_net check in inet6_create() to see the effect


OK, this patch fixes all but one checkpatch warning again.
Please try to make each patch checkpatch-clean next time
since that causes unnecessary work. Thanks (and applied).

^ permalink raw reply

* Re: [PATCH 3/5] netns netfilter: per-netns arp_tables
From: Patrick McHardy @ 2008-01-24 17:41 UTC (permalink / raw)
  To: Alexey Dobriyan; +Cc: netdev, netfilter-devel, devel
In-Reply-To: <20080124122852.GC27337@localhost.sw.ru>

Alexey Dobriyan wrote:
> * Propagate netns from userspace.
> * arpt_register_table() registers table in supplied netns.

Applied.

^ permalink raw reply

* Re: [PATCH 4/5] netns netfilter: per-netns arp_tables FILTER
From: Patrick McHardy @ 2008-01-24 17:46 UTC (permalink / raw)
  To: Alexey Dobriyan; +Cc: netdev, netfilter-devel, devel
In-Reply-To: <20080124122930.GD27337@localhost.sw.ru>

Alexey Dobriyan wrote:
> Signed-off-by: Alexey Dobriyan <adobriyan@sw.ru>

Applied.

^ permalink raw reply

* Re: [PATCH 5/5] netns netfilter: put table module on netns stop
From: Patrick McHardy @ 2008-01-24 17:48 UTC (permalink / raw)
  To: Alexey Dobriyan; +Cc: netdev, netfilter-devel, devel
In-Reply-To: <20080124123002.GE27337@localhost.sw.ru>

Alexey Dobriyan wrote:
> When number of entries exceeds number of initial entries, foo-tables code
> will pin table module. But during table unregister on netns stop,
> that additional pin was forgotten.

Applied, thanks.



^ permalink raw reply

* Slow OOM  in netif_RX function
From: Ivan Dichev @ 2008-01-24 17:28 UTC (permalink / raw)
  To: netdev

Hello,
I got problem with my linux router. It has slow persistent OOM
problems from few months ago.
Every working(I mean days when more traffic is generated) day my
router is leaking with 15-20 MB memory and
after 2 weeks the restart is a MUST.
>From /proc/slabinfo I saw that size-2048 and size-512 are growing
rapidly every day when traffic occur.

--------- /proc/slabinfo --------------------
size-2048          20322  20349   2072    3    2 : tunables   24  
12    0 : slabdata   6780   6783      0
size-512           50984  51016    536    7    1 : tunables   32  
16    0 : slabdata   7288   7288      0


I was wondering who is allocating this mem pools and then I changed
the kernel with 2.6.23-rc12 including  options
CONFIG_DEBUG_SLAB=y
CONFIG_DEBUG_SLAB_LEAK=y


Unfortunately changing the kernel didn't solve the mem leak....
Now /proc/slab_allocators is showing that 3c59x driver is allocating
2048 and 512 bytes memory pools
caused by RX function.
--------- from /proc/slab_allocators ------------------------------
7612 size-2048: boomerang_rx+0x33b/0x437 [3c59x]
16018 size-512: boomerang_rx+0x165/0x437 [3c59x]

I was thinking that the 3com driver is bogus, .. but not!
After few days I changed the cards with rtl8139 and now ....
--------- from /proc/slab_allocators ------------------------------
size-2048: 20159 rtl8139_rx+0x155/0x2dc [8139too]
size-1024: 2693 rtl8139_rx+0x155/0x2dc [8139too]
size-512: 50515 rtl8139_rx+0x155/0x2dc [8139too]

the memory leak appear again in the same function(RX).

I did search over the mailing list and found as similar only this
http://www.spinics.net/lists/kernel/old/2003-q4/msg03071.html


For sure it does not depend on kernel version and network
driver(except case if both drivers are bogus :)
Any ideas ?

Ivan


^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox