Netdev List
 help / color / mirror / Atom feed
* Re: [REGRESSION] 2.6.24-rc7: e1000: Detected Tx Unit Hang
From: Frans Pop @ 2008-01-15  6:17 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, linux-kernel
In-Reply-To: <20080114.215317.38045859.davem@davemloft.net>

Wow. That's fast! :-)

On Tuesday 15 January 2008, David Miller wrote:
> From: Frans Pop <elendil@planet.nl>
>
> > kernel: e1000: eth0: e1000_clean_tx_irq: Detected Tx Unit Hang
>
> Does this make the problem go away?

I'm compiling a kernel with the patch now. Will let you know the result.
May take a while as I don't know how to trigger the bug, so I'll just have 
to let it run for some time.

^ permalink raw reply

* [PATCH 3/4] bonding: Fix work rearming
From: Makito SHIOKAWA @ 2008-01-15  6:36 UTC (permalink / raw)
  To: netdev; +Cc: Makito SHIOKAWA
In-Reply-To: <20080115063604.577118000@miraclelinux.com>

[-- Attachment #1: bonding-fix-monitor-rearming.patch --]
[-- Type: text/plain, Size: 706 bytes --]

Change code not to rearm bond_mii_monitor() when value 0 is set for miimon.

Signed-off-by: Makito SHIOKAWA <mshiokawa@miraclelinux.com>
---
 drivers/net/bonding/bond_main.c |    3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -2388,7 +2388,8 @@ void bond_mii_monitor(struct work_struct
 
 	delay = ((bond->params.miimon * HZ) / 1000) ? : 1;
 	read_unlock(&bond->lock);
-	queue_delayed_work(bond->wq, &bond->mii_work, delay);
+	if (bond->params.miimon)
+		queue_delayed_work(bond->wq, &bond->mii_work, delay);
 }
 
 static __be32 bond_glean_dev_ip(struct net_device *dev)

-- 
Makito SHIOKAWA
MIRACLE LINUX CORPORATION 

^ permalink raw reply

* [PATCH 0/4] bonding: Fix workqueue manipulation and lock taking
From: Makito SHIOKAWA @ 2008-01-15  6:36 UTC (permalink / raw)
  To: netdev

These patches fix some workqueue manipulation and lock taking in bonding driver.

PATCH 1/4 through PATCH 3/4 are on workqueue manipulation, PATCH 4/4 is on lock taking, and each patch are logically independent. Patches are for latest netdev-2.6 git.

-- 
Makito SHIOKAWA
MIRACLE LINUX CORPORATION 

^ permalink raw reply

* [PATCH 2/4] bonding: Add destroy_workqueue() to bond device destroying process
From: Makito SHIOKAWA @ 2008-01-15  6:36 UTC (permalink / raw)
  To: netdev; +Cc: Makito SHIOKAWA
In-Reply-To: <20080115063604.577118000@miraclelinux.com>

[-- Attachment #1: bonding-guarantee-destroy-work.patch --]
[-- Type: text/plain, Size: 6186 bytes --]

There is no destroy_workqueue() in bond device destroying process, therefore
worker thread will remain even if bond device is destroyed. So add
destroy_workqueue(), and also, ensure all works are canceled before
destroy_workqueue() is called.

Signed-off-by: Makito SHIOKAWA <mshiokawa@miraclelinux.com>
---

 drivers/net/bonding/bond_main.c  |   72 +++++++++++++++++++-------------------
 drivers/net/bonding/bond_sysfs.c |    9 ++++
 drivers/net/bonding/bonding.h    |    5 ++
 3 files changed, 48 insertions(+), 38 deletions(-)

--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -190,7 +190,6 @@ struct bond_parm_tbl arp_validate_tbl[] 
 /*-------------------------- Forward declarations ---------------------------*/
 
 static void bond_send_gratuitous_arp(struct bonding *bond);
-static void bond_deinit(struct net_device *bond_dev);
 
 /*---------------------------- General routines -----------------------------*/
 
@@ -862,7 +861,7 @@ static void bond_resend_igmp_join_reques
 /*
  * Totally destroys the mc_list in bond
  */
-static void bond_mc_list_destroy(struct bonding *bond)
+void bond_mc_list_destroy(struct bonding *bond)
 {
 	struct dev_mc_list *dmi;
 
@@ -1875,7 +1874,7 @@ int  bond_release_and_destroy(struct net
 /*
  * This function releases all slaves.
  */
-static int bond_release_all(struct net_device *bond_dev)
+int bond_release_all(struct net_device *bond_dev)
 {
 	struct bonding *bond = bond_dev->priv;
 	struct slave *slave;
@@ -4392,6 +4391,25 @@ static const struct ethtool_ops bond_eth
 	.get_drvinfo		= bond_ethtool_get_drvinfo,
 };
 
+/* Caller must not hold rtnl_lock */
+void bond_work_cancel_all(struct bonding *bond)
+{
+	/* ensure all works are stopped */
+	cancel_delayed_work_sync(&bond->alb_work);
+	cancel_delayed_work_sync(&bond->mii_work);
+	cancel_delayed_work_sync(&bond->ab_arp_work);
+	cancel_delayed_work_sync(&bond->lb_arp_work);
+	cancel_delayed_work_sync(&bond->ad_work);
+}
+
+static void bond_free_netdev(struct net_device *dev)
+{
+	struct bonding *bond = netdev_priv(dev);
+
+	destroy_workqueue(bond->wq);
+	free_netdev(dev);
+}
+
 /*
  * Does not allocate but creates a /proc entry.
  * Allowed to fail.
@@ -4441,7 +4459,7 @@ static int bond_init(struct net_device *
 
 	bond_set_mode_ops(bond, bond->params.mode);
 
-	bond_dev->destructor = free_netdev;
+	bond_dev->destructor = bond_free_netdev;
 
 	/* Initialize the device options */
 	bond_dev->tx_queue_len = 0;
@@ -4483,7 +4501,7 @@ static int bond_init(struct net_device *
 /* De-initialize device specific data.
  * Caller must hold rtnl_lock.
  */
-static void bond_deinit(struct net_device *bond_dev)
+void bond_deinit(struct net_device *bond_dev)
 {
 	struct bonding *bond = bond_dev->priv;
 
@@ -4494,29 +4512,6 @@ static void bond_deinit(struct net_devic
 #endif
 }
 
-static void bond_work_cancel_all(struct bonding *bond)
-{
-	write_lock_bh(&bond->lock);
-	bond->kill_timers = 1;
-	write_unlock_bh(&bond->lock);
-
-	if (bond->params.miimon)
-		cancel_delayed_work(&bond->mii_work);
-
-	if (bond->params.arp_interval) {
-		if (bond->params.mode == BOND_MODE_ACTIVEBACKUP)
-			cancel_delayed_work(&bond->ab_arp_work);
-		else
-			cancel_delayed_work(&bond->lb_arp_work);
-	}
-
-	if (bond->params.mode == BOND_MODE_ALB)
-		cancel_delayed_work(&bond->alb_work);
-
-	if (bond->params.mode == BOND_MODE_8023AD)
-		cancel_delayed_work(&bond->ad_work);
-}
-
 /* Unregister and free all bond devices.
  * Caller must hold rtnl_lock.
  */
@@ -4527,11 +4522,14 @@ static void bond_free_all(void)
 	list_for_each_entry_safe(bond, nxt, &bond_dev_list, bond_list) {
 		struct net_device *bond_dev = bond->dev;
 
+		bond_deinit(bond_dev);
+		rtnl_unlock();
 		bond_work_cancel_all(bond);
+		rtnl_lock();
 		bond_mc_list_destroy(bond);
 		/* Release the bonded slaves */
 		bond_release_all(bond_dev);
-		bond_deinit(bond_dev);
+		bond_destroy_sysfs_entry(bond);
 		unregister_netdevice(bond_dev);
 	}
 
@@ -4921,8 +4919,16 @@ int bond_create(char *name, struct bond_
 
 out_bond:
 	bond_deinit(bond_dev);
+	if (*newbond)
+		unregister_netdevice(bond_dev);
+	else {
+		rtnl_unlock();
+		bond_work_cancel_all(netdev_priv(bond_dev));
+		rtnl_lock();
+		destroy_workqueue(((struct bonding *)netdev_priv(bond_dev))->wq);
 out_netdev:
-	free_netdev(bond_dev);
+		free_netdev(bond_dev);
+	}
 out_rtnl:
 	rtnl_unlock();
 	return res;
@@ -4932,7 +4938,6 @@ static int __init bonding_init(void)
 {
 	int i;
 	int res;
-	struct bonding *bond, *nxt;
 
 	printk(KERN_INFO "%s", version);
 
@@ -4959,11 +4964,6 @@ static int __init bonding_init(void)
 
 	goto out;
 err:
-	list_for_each_entry_safe(bond, nxt, &bond_dev_list, bond_list) {
-		bond_work_cancel_all(bond);
-		destroy_workqueue(bond->wq);
-	}
-
 	rtnl_lock();
 	bond_free_all();
 	bond_destroy_sysfs();
--- a/drivers/net/bonding/bond_sysfs.c
+++ b/drivers/net/bonding/bond_sysfs.c
@@ -163,7 +163,14 @@ static ssize_t bonding_store_bonds(struc
 				printk(KERN_INFO DRV_NAME
 					": %s is being deleted...\n",
 					bond->dev->name);
-				bond_destroy(bond);
+ 				bond_deinit(bond->dev);
+				rtnl_unlock();
+				bond_work_cancel_all(bond);
+				rtnl_lock();
+				bond_mc_list_destroy(bond);
+				bond_release_all(bond->dev);
+ 		        	bond_destroy_sysfs_entry(bond);
+ 				unregister_netdevice(bond->dev);
 				rtnl_unlock();
 				goto out;
 			}
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -301,7 +301,10 @@ static inline void bond_unset_master_alb
 struct vlan_entry *bond_next_vlan(struct bonding *bond, struct vlan_entry *curr);
 int bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb, struct net_device *slave_dev);
 int bond_create(char *name, struct bond_params *params, struct bonding **newbond);
-void bond_destroy(struct bonding *bond);
+void bond_deinit(struct net_device *bond_dev);
+void bond_work_cancel_all(struct bonding *bond);
+void bond_mc_list_destroy(struct bonding *bond);
+int bond_release_all(struct net_device *bond_dev);
 int  bond_release_and_destroy(struct net_device *bond_dev, struct net_device *slave_dev);
 int bond_create_sysfs(void);
 void bond_destroy_sysfs(void);

-- 
Makito SHIOKAWA
MIRACLE LINUX CORPORATION 

^ permalink raw reply

* [PATCH 1/4] bonding: Fix work initing and cancelling
From: Makito SHIOKAWA @ 2008-01-15  6:36 UTC (permalink / raw)
  To: netdev; +Cc: Makito SHIOKAWA
In-Reply-To: <20080115063604.577118000@miraclelinux.com>

[-- Attachment #1: bonding-fix-work-manipulation-in-sysfs.patch --]
[-- Type: text/plain, Size: 7400 bytes --]

delayed_work_pending() is used to check whether work is being queued or not,
when queueing or cancelling corresponding work. However,
delayed_work_pending() returns 0 when work is just running, and in that case,
work_struct will be destroyed or work won't be cancelled. So remove all
delayed_work_pending(), and move all INIT_DELAYED_WORK to bonding init
process, just queue or cancel work. For this purpose, arp_work is divided into
two works.

Signed-off-by: Makito SHIOKAWA <mshiokawa@miraclelinux.com>
---

 drivers/net/bonding/bond_main.c  |   46 ++++++++++++++++++++------------------
 drivers/net/bonding/bond_sysfs.c |   36 +++++++++--------------------
 drivers/net/bonding/bonding.h    |    3 +-
 3 files changed, 39 insertions(+), 46 deletions(-)

--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -2692,7 +2692,7 @@ out:
 void bond_loadbalance_arp_mon(struct work_struct *work)
 {
 	struct bonding *bond = container_of(work, struct bonding,
-					    arp_work.work);
+					    lb_arp_work.work);
 	struct slave *slave, *oldcurrent;
 	int do_failover = 0;
 	int delta_in_ticks;
@@ -2803,7 +2803,7 @@ void bond_loadbalance_arp_mon(struct wor
 
 re_arm:
 	if (bond->params.arp_interval)
-		queue_delayed_work(bond->wq, &bond->arp_work, delta_in_ticks);
+		queue_delayed_work(bond->wq, &bond->lb_arp_work, delta_in_ticks);
 out:
 	read_unlock(&bond->lock);
 }
@@ -2826,7 +2826,7 @@ out:
 void bond_activebackup_arp_mon(struct work_struct *work)
 {
 	struct bonding *bond = container_of(work, struct bonding,
-					    arp_work.work);
+					    ab_arp_work.work);
 	struct slave *slave;
 	int delta_in_ticks;
 	int i;
@@ -3060,7 +3060,7 @@ void bond_activebackup_arp_mon(struct wo
 
 re_arm:
 	if (bond->params.arp_interval) {
-		queue_delayed_work(bond->wq, &bond->arp_work, delta_in_ticks);
+		queue_delayed_work(bond->wq, &bond->ab_arp_work, delta_in_ticks);
 	}
 out:
 	read_unlock(&bond->lock);
@@ -3679,30 +3679,23 @@ static int bond_open(struct net_device *
 			return -1;
 		}
 
-		INIT_DELAYED_WORK(&bond->alb_work, bond_alb_monitor);
 		queue_delayed_work(bond->wq, &bond->alb_work, 0);
 	}
 
 	if (bond->params.miimon) {  /* link check interval, in milliseconds. */
-		INIT_DELAYED_WORK(&bond->mii_work, bond_mii_monitor);
 		queue_delayed_work(bond->wq, &bond->mii_work, 0);
 	}
 
 	if (bond->params.arp_interval) {  /* arp interval, in milliseconds. */
 		if (bond->params.mode == BOND_MODE_ACTIVEBACKUP)
-			INIT_DELAYED_WORK(&bond->arp_work,
-					  bond_activebackup_arp_mon);
+			queue_delayed_work(bond->wq, &bond->ab_arp_work, 0);
 		else
-			INIT_DELAYED_WORK(&bond->arp_work,
-					  bond_loadbalance_arp_mon);
-
-		queue_delayed_work(bond->wq, &bond->arp_work, 0);
+			queue_delayed_work(bond->wq, &bond->lb_arp_work, 0);
 		if (bond->params.arp_validate)
 			bond_register_arp(bond);
 	}
 
 	if (bond->params.mode == BOND_MODE_8023AD) {
-		INIT_DELAYED_WORK(&bond->ad_work, bond_3ad_state_machine_handler);
 		queue_delayed_work(bond->wq, &bond->ad_work, 0);
 		/* register to receive LACPDUs */
 		bond_register_lacpdu(bond);
@@ -3736,7 +3729,10 @@ static int bond_close(struct net_device 
 	}
 
 	if (bond->params.arp_interval) {  /* arp interval, in milliseconds. */
-		cancel_delayed_work(&bond->arp_work);
+		if (bond->params.mode == BOND_MODE_ACTIVEBACKUP)
+			cancel_delayed_work(&bond->ab_arp_work);
+		else
+			cancel_delayed_work(&bond->lb_arp_work);
 	}
 
 	switch (bond->params.mode) {
@@ -4416,6 +4412,12 @@ static int bond_init(struct net_device *
 	if (!bond->wq)
 		return -ENOMEM;
 
+	INIT_DELAYED_WORK(&bond->alb_work, bond_alb_monitor);
+	INIT_DELAYED_WORK(&bond->mii_work, bond_mii_monitor);
+	INIT_DELAYED_WORK(&bond->ab_arp_work, bond_activebackup_arp_mon);
+	INIT_DELAYED_WORK(&bond->lb_arp_work, bond_loadbalance_arp_mon);
+	INIT_DELAYED_WORK(&bond->ad_work, bond_3ad_state_machine_handler);
+
 	/* Initialize pointers */
 	bond->first_slave = NULL;
 	bond->curr_active_slave = NULL;
@@ -4498,18 +4500,20 @@ static void bond_work_cancel_all(struct 
 	bond->kill_timers = 1;
 	write_unlock_bh(&bond->lock);
 
-	if (bond->params.miimon && delayed_work_pending(&bond->mii_work))
+	if (bond->params.miimon)
 		cancel_delayed_work(&bond->mii_work);
 
-	if (bond->params.arp_interval && delayed_work_pending(&bond->arp_work))
-		cancel_delayed_work(&bond->arp_work);
+	if (bond->params.arp_interval) {
+		if (bond->params.mode == BOND_MODE_ACTIVEBACKUP)
+			cancel_delayed_work(&bond->ab_arp_work);
+		else
+			cancel_delayed_work(&bond->lb_arp_work);
+	}
 
-	if (bond->params.mode == BOND_MODE_ALB &&
-	    delayed_work_pending(&bond->alb_work))
+	if (bond->params.mode == BOND_MODE_ALB)
 		cancel_delayed_work(&bond->alb_work);
 
-	if (bond->params.mode == BOND_MODE_8023AD &&
-	    delayed_work_pending(&bond->ad_work))
+	if (bond->params.mode == BOND_MODE_8023AD)
 		cancel_delayed_work(&bond->ad_work);
 }
 
--- a/drivers/net/bonding/bond_sysfs.c
+++ b/drivers/net/bonding/bond_sysfs.c
@@ -643,10 +643,8 @@ static ssize_t bonding_store_arp_interva
 		       "%s Disabling MII monitoring.\n",
 		       bond->dev->name, bond->dev->name);
 		bond->params.miimon = 0;
-		if (delayed_work_pending(&bond->mii_work)) {
-			cancel_delayed_work(&bond->mii_work);
-			flush_workqueue(bond->wq);
-		}
+		cancel_delayed_work(&bond->mii_work);
+		flush_workqueue(bond->wq);
 	}
 	if (!bond->params.arp_targets[0]) {
 		printk(KERN_INFO DRV_NAME
@@ -660,16 +658,10 @@ static ssize_t bonding_store_arp_interva
 		 * timer will get fired off when the open function
 		 * is called.
 		 */
-		if (!delayed_work_pending(&bond->arp_work)) {
-			if (bond->params.mode == BOND_MODE_ACTIVEBACKUP)
-				INIT_DELAYED_WORK(&bond->arp_work,
-						  bond_activebackup_arp_mon);
-			else
-				INIT_DELAYED_WORK(&bond->arp_work,
-						  bond_loadbalance_arp_mon);
-
-			queue_delayed_work(bond->wq, &bond->arp_work, 0);
-		}
+		if (bond->params.mode == BOND_MODE_ACTIVEBACKUP)
+			queue_delayed_work(bond->wq, &bond->ab_arp_work, 0);
+		else
+			queue_delayed_work(bond->wq, &bond->lb_arp_work, 0);
 	}
 
 out:
@@ -1022,10 +1014,11 @@ static ssize_t bonding_store_miimon(stru
 				bond->params.arp_validate =
 					BOND_ARP_VALIDATE_NONE;
 			}
-			if (delayed_work_pending(&bond->arp_work)) {
-				cancel_delayed_work(&bond->arp_work);
-				flush_workqueue(bond->wq);
-			}
+			if (bond->params.mode == BOND_MODE_ACTIVEBACKUP)
+				cancel_delayed_work(&bond->ab_arp_work);
+			else
+				cancel_delayed_work(&bond->lb_arp_work);
+			flush_workqueue(bond->wq);
 		}
 
 		if (bond->dev->flags & IFF_UP) {
@@ -1034,12 +1027,7 @@ static ssize_t bonding_store_miimon(stru
 			 * timer will get fired off when the open function
 			 * is called.
 			 */
-			if (!delayed_work_pending(&bond->mii_work)) {
-				INIT_DELAYED_WORK(&bond->mii_work,
-						  bond_mii_monitor);
-				queue_delayed_work(bond->wq,
-						   &bond->mii_work, 0);
-			}
+			queue_delayed_work(bond->wq, &bond->mii_work, 0);
 		}
 	}
 out:
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -206,7 +206,8 @@ struct bonding {
 	struct   packet_type arp_mon_pt;
 	struct   workqueue_struct *wq;
 	struct   delayed_work mii_work;
-	struct   delayed_work arp_work;
+	struct   delayed_work ab_arp_work;
+	struct   delayed_work lb_arp_work;
 	struct   delayed_work alb_work;
 	struct   delayed_work ad_work;
 };

-- 
Makito SHIOKAWA
MIRACLE LINUX CORPORATION 

^ permalink raw reply

* [PATCH 4/4] bonding: Fix some RTNL taking
From: Makito SHIOKAWA @ 2008-01-15  6:36 UTC (permalink / raw)
  To: netdev; +Cc: Makito SHIOKAWA
In-Reply-To: <20080115063604.577118000@miraclelinux.com>

[-- Attachment #1: bonding-fix-some-rtnl-taking.patch --]
[-- Type: text/plain, Size: 3810 bytes --]

Fix some RTNL lock taking:

* RTNL (mutex; may sleep) must not be taken under read_lock (spinlock; must be
atomic). However, RTNL is taken under read_lock in bond_loadbalance_arp_mon()
and bond_activebackup_arp_mon(). So change code to take RTNL outside of read_lock.

* rtnl_unlock() calls netdev_run_todo() which takes net_todo_run_mutex, and
rtnl_unlock() is called under read_lock in bond_mii_monitor(). So for the same
reason as above, change code to call rtnl_unlock() outside of read_lock.

Signed-off-by: Makito SHIOKAWA <mshiokawa@miraclelinux.com>
---
 drivers/net/bonding/bond_main.c |   24 ++++++++++--------------
 1 file changed, 10 insertions(+), 14 deletions(-)

--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -2372,6 +2372,7 @@ void bond_mii_monitor(struct work_struct
 	struct bonding *bond = container_of(work, struct bonding,
 					    mii_work.work);
 	unsigned long delay;
+	int need_unlock = 0;
 
 	read_lock(&bond->lock);
 	if (bond->kill_timers) {
@@ -2383,13 +2384,16 @@ void bond_mii_monitor(struct work_struct
 		rtnl_lock();
 		read_lock(&bond->lock);
 		__bond_mii_monitor(bond, 1);
-		rtnl_unlock();
+		need_unlock = 1;
 	}
 
 	delay = ((bond->params.miimon * HZ) / 1000) ? : 1;
-	read_unlock(&bond->lock);
 	if (bond->params.miimon)
 		queue_delayed_work(bond->wq, &bond->mii_work, delay);
+	read_unlock(&bond->lock);
+	/* rtnl_unlock() may sleep, so call it after read_unlock() */
+	if (need_unlock)
+		rtnl_unlock();
 }
 
 static __be32 bond_glean_dev_ip(struct net_device *dev)
@@ -2698,6 +2702,7 @@ void bond_loadbalance_arp_mon(struct wor
 	int delta_in_ticks;
 	int i;
 
+	rtnl_lock();
 	read_lock(&bond->lock);
 
 	delta_in_ticks = (bond->params.arp_interval * HZ) / 1000;
@@ -2791,14 +2796,11 @@ void bond_loadbalance_arp_mon(struct wor
 	}
 
 	if (do_failover) {
-		rtnl_lock();
 		write_lock_bh(&bond->curr_slave_lock);
 
 		bond_select_active_slave(bond);
 
 		write_unlock_bh(&bond->curr_slave_lock);
-		rtnl_unlock();
-
 	}
 
 re_arm:
@@ -2806,6 +2808,7 @@ re_arm:
 		queue_delayed_work(bond->wq, &bond->lb_arp_work, delta_in_ticks);
 out:
 	read_unlock(&bond->lock);
+	rtnl_unlock();
 }
 
 /*
@@ -2831,6 +2834,7 @@ void bond_activebackup_arp_mon(struct wo
 	int delta_in_ticks;
 	int i;
 
+	rtnl_lock();
 	read_lock(&bond->lock);
 
 	delta_in_ticks = (bond->params.arp_interval * HZ) / 1000;
@@ -2855,8 +2859,6 @@ void bond_activebackup_arp_mon(struct wo
 
 				slave->link = BOND_LINK_UP;
 
-				rtnl_lock();
-
 				write_lock_bh(&bond->curr_slave_lock);
 
 				if ((!bond->curr_active_slave) &&
@@ -2892,7 +2894,6 @@ void bond_activebackup_arp_mon(struct wo
 				}
 
 				write_unlock_bh(&bond->curr_slave_lock);
-				rtnl_unlock();
 			}
 		} else {
 			read_lock(&bond->curr_slave_lock);
@@ -2962,7 +2963,6 @@ void bond_activebackup_arp_mon(struct wo
 			       bond->dev->name,
 			       slave->dev->name);
 
-			rtnl_lock();
 			write_lock_bh(&bond->curr_slave_lock);
 
 			bond_select_active_slave(bond);
@@ -2970,8 +2970,6 @@ void bond_activebackup_arp_mon(struct wo
 
 			write_unlock_bh(&bond->curr_slave_lock);
 
-			rtnl_unlock();
-
 			bond->current_arp_slave = slave;
 
 			if (slave) {
@@ -2989,13 +2987,10 @@ void bond_activebackup_arp_mon(struct wo
 			       bond->primary_slave->dev->name);
 
 			/* primary is up so switch to it */
-			rtnl_lock();
 			write_lock_bh(&bond->curr_slave_lock);
 			bond_change_active_slave(bond, bond->primary_slave);
 			write_unlock_bh(&bond->curr_slave_lock);
 
-			rtnl_unlock();
-
 			slave = bond->primary_slave;
 			slave->jiffies = jiffies;
 		} else {
@@ -3064,6 +3059,7 @@ re_arm:
 	}
 out:
 	read_unlock(&bond->lock);
+	rtnl_unlock();
 }
 
 /*------------------------------ proc/seq_file-------------------------------*/

-- 
Makito SHIOKAWA
MIRACLE LINUX CORPORATION 

^ permalink raw reply

* Re: [PATCH 3/6] [IPV4] trie: put leaf nodes in a slab cache
From: Eric Dumazet @ 2008-01-15  6:49 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: David Miller, robert.olsson, netdev
In-Reply-To: <20080114164621.2bc5011f@deepthought>

Stephen Hemminger a écrit :
> This improves locality for operations that touch all the leaves.
> Later patch will grow the size of the leaf so it becomes more
> important.
> 
> Signed-off-by: Stephen Hemminger <stephen.hemminger@vyatta.com>
> 
> 
> --- a/net/ipv4/fib_trie.c	2008-01-14 12:26:51.000000000 -0800
> +++ b/net/ipv4/fib_trie.c	2008-01-14 13:41:00.000000000 -0800
> @@ -162,6 +162,7 @@ static struct tnode *halve(struct trie *
>  static void tnode_free(struct tnode *tn);
>  
>  static struct kmem_cache *fn_alias_kmem __read_mostly;
> +static struct kmem_cache *trie_leaf_kmem __read_mostly;
>  
>  static inline struct tnode *node_parent(struct node *node)
>  {
> @@ -316,7 +317,8 @@ static inline void alias_free_mem_rcu(st
>  
>  static void __leaf_free_rcu(struct rcu_head *head)
>  {
> -	kfree(container_of(head, struct leaf, rcu));
> +	struct leaf *leaf = container_of(head, struct leaf, rcu);
> +	kmem_cache_free(trie_leaf_kmem, leaf);
>  }
>  
>  static void __leaf_info_free_rcu(struct rcu_head *head)
> @@ -366,7 +368,7 @@ static inline void tnode_free(struct tno
>  
>  static struct leaf *leaf_new(void)
>  {
> -	struct leaf *l = kmalloc(sizeof(struct leaf),  GFP_KERNEL);
> +	struct leaf *l = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL);
>  	if (l) {
>  		l->parent = T_LEAF;
>  		INIT_HLIST_HEAD(&l->list);
> @@ -1927,6 +1929,9 @@ void __init fib_hash_init(void)
>  {
>  	fn_alias_kmem = kmem_cache_create("ip_fib_alias", sizeof(struct fib_alias),
>  					  0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
> +
> +	trie_leaf_kmem = kmem_cache_create("ip_fib_trie", sizeof(struct leaf),
> +					   0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
>  }
>  

Do we really need HWCACHE_ALIGN ? so many wasted space ...



^ permalink raw reply

* Re: [PATCH] [IPV4] fib_trie: size and statistics
From: Eric Dumazet @ 2008-01-15  6:55 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: David Miller, robert.olsson, netdev, stephen.hemminger
In-Reply-To: <20080114125755.6157a3bf@deepthought>

Stephen Hemminger a écrit :
> Show number of entries in trie, the size field was being set but never used,
> but it only counted leaves, not all entries. Refactor the two cases in
> fib_triestat_seq_show into a single routine.
> 
> Note: the stat structure was being malloc'd but the stack usage isn't so
> high (288 bytes) that it is worth the additional complexity.
> 
> Signed-off-by: Stephen Hemminger <stephen.hemminger@vyatta.com>
> ---
> Patch against current net-2.6.25
> 
> --- a/net/ipv4/fib_trie.c	2008-01-14 10:16:06.000000000 -0800
> +++ b/net/ipv4/fib_trie.c	2008-01-14 10:30:11.000000000 -0800
> @@ -148,10 +148,10 @@ struct trie_stat {
>  
>  struct trie {
>  	struct node *trie;
> +	unsigned int size;
>  #ifdef CONFIG_IP_FIB_TRIE_STATS
>  	struct trie_use_stats stats;
>  #endif
> -	int size;
>  };
>  
>  static void put_child(struct trie *t, struct tnode *tn, int i, struct node *n);
> @@ -1045,7 +1045,6 @@ static struct list_head *fib_insert_node
>  		insert_leaf_info(&l->list, li);
>  		goto done;
>  	}
> -	t->size++;
>  	l = leaf_new();
>  
>  	if (!l)
> @@ -1258,6 +1257,8 @@ static int fn_trie_insert(struct fib_tab
>  	list_add_tail_rcu(&new_fa->fa_list,
>  			  (fa ? &fa->fa_list : fa_head));
>  
> +	t->size++;
> +
>  	rt_cache_flush(-1);
>  	rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id,
>  		  &cfg->fc_nlinfo, 0);
> @@ -2128,50 +2129,34 @@ static void trie_show_usage(struct seq_f
>  }
>  #endif /*  CONFIG_IP_FIB_TRIE_STATS */
>  
> +static void fib_trie_show(struct seq_file *seq, const char *name, struct trie *trie)
> +{
> +	struct trie_stat stat;
> +
> +	seq_printf(seq, "%s: %d\n", name, trie->size);
> +	trie_collect_stats(trie, &stat);
> +	trie_show_stats(seq, &stat);
> +#ifdef CONFIG_IP_FIB_TRIE_STATS
> +	trie_show_usage(seq, &trie->stats);
> +#endif
> +}
>  
>  static int fib_triestat_seq_show(struct seq_file *seq, void *v)
>  {
>  	struct net *net = (struct net *)seq->private;
> -	struct trie *trie_local, *trie_main;
> -	struct trie_stat *stat;
>  	struct fib_table *tb;
>  
> -	trie_local = NULL;
> +	seq_printf(seq,
> +		   "Basic info: size of leaf: %Zd bytes, size of tnode: %Zd bytes.\n",
> +		   sizeof(struct leaf), sizeof(struct tnode));
> +
>  	tb = fib_get_table(net, RT_TABLE_LOCAL);
>  	if (tb)
> -		trie_local = (struct trie *) tb->tb_data;
> -
> -	trie_main = NULL;
> +		fib_trie_show(seq, "Local", (struct trie *) tb->tb_data);
> +
>  	tb = fib_get_table(net, RT_TABLE_MAIN);
>  	if (tb)
> -		trie_main = (struct trie *) tb->tb_data;
> -
> -
> -	stat = kmalloc(sizeof(*stat), GFP_KERNEL);
> -	if (!stat)
> -		return -ENOMEM;
> -
> -	seq_printf(seq, "Basic info: size of leaf: %Zd bytes, size of tnode: %Zd bytes.\n",
> -		   sizeof(struct leaf), sizeof(struct tnode));
> -
> -	if (trie_local) {
> -		seq_printf(seq, "Local:\n");
> -		trie_collect_stats(trie_local, stat);
> -		trie_show_stats(seq, stat);
> -#ifdef CONFIG_IP_FIB_TRIE_STATS
> -		trie_show_usage(seq, &trie_local->stats);
> -#endif
> -	}
> -
> -	if (trie_main) {
> -		seq_printf(seq, "Main:\n");
> -		trie_collect_stats(trie_main, stat);
> -		trie_show_stats(seq, stat);
> -#ifdef CONFIG_IP_FIB_TRIE_STATS
> -		trie_show_usage(seq, &trie_main->stats);
> -#endif
> -	}
> -	kfree(stat);
> +		fib_trie_show(seq, "Main", (struct trie *) tb->tb_data);
>  
>  	return 0;
>  }

Keeping a 'size' counter is not necessary, since trie_collect_stats() must go
through all the tree and get this information for free.

This 'size' field only slows down inserts/deletes and dirty a cacheline that 
readers will hit.



^ permalink raw reply

* Re: [PATCH 6/8 net-2.6.25] [ARP] neigh_parms_put(destroy) are essentially local to core/neighbour.c.
From: David Miller @ 2008-01-15  7:04 UTC (permalink / raw)
  To: den; +Cc: netdev, devel
In-Reply-To: <1200322801-19054-6-git-send-email-den@openvz.org>

From: "Denis V. Lunev" <den@openvz.org>
Date: Mon, 14 Jan 2008 17:59:59 +0300

> Make them static.
> 
> Signed-off-by: Denis V. Lunev <den@openvz.org>

It's completely awkward to put the inline after all
the call sites.  A non-global-optimizing compiler
won't be able to even inline it.

I've reworked this patch as follows when applying
it.

diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index a0d42a5..ebbfb50 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -213,7 +213,6 @@ extern struct neighbour 	*neigh_event_ns(struct neigh_table *tbl,
 
 extern struct neigh_parms	*neigh_parms_alloc(struct net_device *dev, struct neigh_table *tbl);
 extern void			neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms);
-extern void			neigh_parms_destroy(struct neigh_parms *parms);
 extern unsigned long		neigh_rand_reach_time(unsigned long base);
 
 extern void			pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
@@ -254,12 +253,6 @@ static inline void __neigh_parms_put(struct neigh_parms *parms)
 	atomic_dec(&parms->refcnt);
 }
 
-static inline void neigh_parms_put(struct neigh_parms *parms)
-{
-	if (atomic_dec_and_test(&parms->refcnt))
-		neigh_parms_destroy(parms);
-}
-
 static inline struct neigh_parms *neigh_parms_clone(struct neigh_parms *parms)
 {
 	atomic_inc(&parms->refcnt);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 9b0b773..7cc772a 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -577,6 +577,13 @@ static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
 	return -ENOENT;
 }
 
+static void neigh_parms_destroy(struct neigh_parms *parms);
+
+static inline void neigh_parms_put(struct neigh_parms *parms)
+{
+	if (atomic_dec_and_test(&parms->refcnt))
+		neigh_parms_destroy(parms);
+}
 
 /*
  *	neighbour must already be out of the table;
@@ -1348,7 +1355,7 @@ void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
 	NEIGH_PRINTK1("neigh_parms_release: not found\n");
 }
 
-void neigh_parms_destroy(struct neigh_parms *parms)
+static void neigh_parms_destroy(struct neigh_parms *parms)
 {
 	release_net(parms->net);
 	if (parms->dev)

^ permalink raw reply related

* Re: [PATCH 0/8 2.6.25] a set of small code cleanups
From: David Miller @ 2008-01-15  7:06 UTC (permalink / raw)
  To: den; +Cc: devel, netdev
In-Reply-To: <478B78CB.4030404@openvz.org>

From: "Denis V. Lunev" <den@openvz.org>
Date: Mon, 14 Jan 2008 17:59:23 +0300

> This set contains a set of small improvements found in IPv4 code during
> preparations to support ARP in the network namespace. These fixes are
> mostly independent except 2 last ones.
> 
> Signed-off-by: Denis V. Lunev <den@openvz.org>

All applied, thanks Denis.

^ permalink raw reply

* Re: [PATCH 2/9] get rid of unused revision element
From: David Miller @ 2008-01-15  7:07 UTC (permalink / raw)
  To: shemminger; +Cc: Robert.Olsson, robert.olsson, netdev, stephen.hemminger
In-Reply-To: <20080114083555.48792f6b@deepthought>

From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Mon, 14 Jan 2008 08:35:55 -0800

> I will be glad to get this working.  Is there any point in doing the a
> small systems version as well?

Not at this time.

^ permalink raw reply

* Re: [FIB]: Avoid using static variables without proper locking
From: David Miller @ 2008-01-15  7:10 UTC (permalink / raw)
  To: dada1; +Cc: Robert.Olsson, netdev
In-Reply-To: <478BB78F.8060609@cosmosbay.com>

From: Eric Dumazet <dada1@cosmosbay.com>
Date: Mon, 14 Jan 2008 20:27:11 +0100

> fib_trie_seq_show() uses two helper functions, rtn_scope() and 
> rtn_type() that can
> write to static storage without locking.
> 
> Just pass to them a temporary buffer to avoid potential  corruption
> (probably not triggerable but still...)
> 
> Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>

Applied to net-2.6.25, but I had to tweak it to apply
cleanly since the %d in rtn_type() is now a %u

^ permalink raw reply

* Re: [PATCH] [IPV4] fib_trie: size and statistics
From: David Miller @ 2008-01-15  7:12 UTC (permalink / raw)
  To: shemminger; +Cc: robert.olsson, netdev, stephen.hemminger
In-Reply-To: <20080114125755.6157a3bf@deepthought>

From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Mon, 14 Jan 2008 12:57:55 -0800

> Show number of entries in trie, the size field was being set but never used,
> but it only counted leaves, not all entries. Refactor the two cases in
> fib_triestat_seq_show into a single routine.
> 
> Note: the stat structure was being malloc'd but the stack usage isn't so
> high (288 bytes) that it is worth the additional complexity.
> 
> Signed-off-by: Stephen Hemminger <stephen.hemminger@vyatta.com>

Applied, thanks.

^ permalink raw reply

* Re: questions on NAPI processing latency and dropped network packets
From: Jarek Poplawski @ 2008-01-15  7:19 UTC (permalink / raw)
  To: Chris Friesen; +Cc: David Miller, netdev, linux-kernel
In-Reply-To: <478B86BA.1050706@nortel.com>

On 14-01-2008 16:58, Chris Friesen wrote:
...
> How close to bleeding edge do we need to be for it to be considered 
> acceptable to ask questions on netdev?
> 
> Given that the embedded space tends to be perpetually stuck on older 
> kernels (our "current" release is based on 2.6.14) do you have any 
> suggestion on how we can obtain information that would be available on 
> netdev if we were using newer kernels?

IMHO, checking this with a current stable, which probably you are going
to do some day, anyway, should be 100% acceptable: giving some input to
netdev, while still working for yourself.

Regards,
Jarek P.

^ permalink raw reply

* Re: [PATCH 2/6] [IPV4] fib hash|trie initialization
From: David Miller @ 2008-01-15  7:14 UTC (permalink / raw)
  To: stephen.hemminger; +Cc: robert.olsson, netdev
In-Reply-To: <20080114210008.784db159@deepthought>

From: Stephen Hemminger <stephen.hemminger@vyatta.com>
Date: Mon, 14 Jan 2008 21:00:08 -0800

> Initialization of the slab cache's should be done when IP is
> initialized to make sure of available memory, and that code
> can be marked __init.
> 
> Signed-off-by: Stephen Hemminger <stephen.hemminger@vyatta.com>

Applied.

^ permalink raw reply

* Re: [PATCH] [IPV4] fib_trie: size and statistics
From: David Miller @ 2008-01-15  7:28 UTC (permalink / raw)
  To: dada1; +Cc: shemminger, robert.olsson, netdev, stephen.hemminger
In-Reply-To: <478C58FD.9030407@cosmosbay.com>

From: Eric Dumazet <dada1@cosmosbay.com>
Date: Tue, 15 Jan 2008 07:55:57 +0100

> Keeping a 'size' counter is not necessary, since trie_collect_stats() must go
> through all the tree and get this information for free.
> 
> This 'size' field only slows down inserts/deletes and dirty a cacheline that 
> readers will hit.

Good point, we can do this better in a follow-on patch.

^ permalink raw reply

* Re: [PATCH 3/6] [IPV4] trie: put leaf nodes in a slab cache
From: David Miller @ 2008-01-15  7:29 UTC (permalink / raw)
  To: stephen.hemminger; +Cc: robert.olsson, netdev
In-Reply-To: <20080114164621.2bc5011f@deepthought>

From: Stephen Hemminger <stephen.hemminger@vyatta.com>
Date: Mon, 14 Jan 2008 16:46:21 -0800

> This improves locality for operations that touch all the leaves.
> Later patch will grow the size of the leaf so it becomes more
> important.
> 
> Signed-off-by: Stephen Hemminger <stephen.hemminger@vyatta.com>

I'll let 3, 4, 5, and 6 sit while you work out the issues
brought up by Eric Dumazet.

^ permalink raw reply

* [PATCH net-2.6.25] [XFRM] Remove unused definition XFRM_POLICY_LOCALOK in linux/xfrm.h + typos in net/xfrm.h
From: Rami Rosen @ 2008-01-15  8:17 UTC (permalink / raw)
  To: David Miller, netdev

[-- Attachment #1: Type: text/plain, Size: 166 bytes --]

Hi,
- XFRM_POLICY_LOCALOK in linux/xfrm.h is unused definition.
- correct 4 typos in net/xfrm.h

Regards,
Rami Rosen


Signed-off-by: Rami Rosen <ramirose@gmail.com>

[-- Attachment #2: patch.txt --]
[-- Type: text/plain, Size: 1742 bytes --]

diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h
index 9b5b00c..f5cfb75 100644
--- a/include/linux/xfrm.h
+++ b/include/linux/xfrm.h
@@ -363,7 +363,6 @@ struct xfrm_userpolicy_info {
 #define XFRM_POLICY_ALLOW	0
 #define XFRM_POLICY_BLOCK	1
 	__u8				flags;
-#define XFRM_POLICY_LOCALOK	1	/* Allow user to override global policy */
 	/* Automatically expand selector to include matching ICMP payloads. */
 #define XFRM_POLICY_ICMP	2
 	__u8				share;
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 5ebb9ba..09b9bda 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -73,7 +73,7 @@ extern struct mutex xfrm_cfg_mutex;
    Lookup is plain linear search until the first match with selector.
 
    If "action" is "block", then we prohibit the flow, otherwise:
-   if "xfrms_nr" is zero, the flow passes untransformed. Otherwise,
+   if "xfrm_nr" is zero, the flow passes untransformed. Otherwise,
    policy entry has list of up to XFRM_MAX_DEPTH transformations,
    described by templates xfrm_tmpl. Each template is resolved
    to a complete xfrm_state (see below) and we pack bundle of transformations
@@ -84,10 +84,10 @@ extern struct mutex xfrm_cfg_mutex;
                      |---. child .-> dst -. xfrm .-> xfrm_state #3
                                       |---. child .-> NULL
 
-   Bundles are cached at xrfm_policy struct (field ->bundles).
+   Bundles are cached at xfrm_policy struct (field ->bundles).
 
 
-   Resolution of xrfm_tmpl
+   Resolution of xfrm_tmpl
    -----------------------
    Template contains:
    1. ->mode		Mode: transport or tunnel
@@ -133,7 +133,7 @@ struct xfrm_state
 
 	u32			genid;
 
-	/* Key manger bits */
+	/* Key manager bits */
 	struct {
 		u8		state;
 		u8		dying;

^ permalink raw reply related

* [FIB]: Fix rcu_dereference() abuses in fib_trie.c
From: Eric Dumazet @ 2008-01-15  8:29 UTC (permalink / raw)
  To: David Miller; +Cc: netdev@vger.kernel.org, Stephen Hemminger, Herbert Xu

node_parent() and tnode_get_child() currently use rcu_dereference().

These functions are called from both 
- readers only paths (where rcu_dereference() is needed), and 
- writer path (where rcu_dereference() is not needed)

To make explicit where rcu_dereference() is really needed, I introduced
new node_parent_rcu() and tnode_get_child_rcu() functions which use
rcu_dereference(), while node_parent() and tnode_get_child() dont use it.

Then I changed calling sites where rcu_dereference() was really needed to
call the _rcu() variants.

This should have no impact but for alpha architecture, and may help future
sparse checks.

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 6dab753..e053775 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -165,9 +165,13 @@ static struct kmem_cache *fn_alias_kmem __read_mostly;
 
 static inline struct tnode *node_parent(struct node *node)
 {
-	struct tnode *ret;
+	return (struct tnode *)(node->parent & ~NODE_TYPE_MASK);
+}
+
+static inline struct tnode *node_parent_rcu(struct node *node)
+{
+	struct tnode *ret = node_parent(node);
 
-	ret = (struct tnode *)(node->parent & ~NODE_TYPE_MASK);
 	return rcu_dereference(ret);
 }
 
@@ -177,13 +181,18 @@ static inline void node_set_parent(struct node *node, struct tnode *ptr)
 			   (unsigned long)ptr | NODE_TYPE(node));
 }
 
-/* rcu_read_lock needs to be hold by caller from readside */
+static inline struct node *tnode_get_child(struct tnode *tn, unsigned int i)
+{
+	BUG_ON(i >= 1U << tn->bits);
 
-static inline struct node *tnode_get_child(struct tnode *tn, int i)
+	return tn->child[i];
+}
+
+static inline struct node *tnode_get_child_rcu(struct tnode *tn, unsigned int i)
 {
-	BUG_ON(i >= 1 << tn->bits);
+	struct node *ret = tnode_get_child(tn, i);
 
-	return rcu_dereference(tn->child[i]);
+	return rcu_dereference(ret);
 }
 
 static inline int tnode_child_length(const struct tnode *tn)
@@ -938,7 +947,7 @@ fib_find_node(struct trie *t, u32 key)
 
 		if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
 			pos = tn->pos + tn->bits;
-			n = tnode_get_child(tn, tkey_extract_bits(key, tn->pos, tn->bits));
+			n = tnode_get_child_rcu(tn, tkey_extract_bits(key, tn->pos, tn->bits));
 		} else
 			break;
 	}
@@ -1685,7 +1694,7 @@ static struct leaf *nextleaf(struct trie *t, struct leaf *thisleaf)
 
 		p = (struct tnode*) trie;  /* Start */
 	} else
-		p = node_parent(c);
+		p = node_parent_rcu(c);
 
 	while (p) {
 		int pos, last;
@@ -1722,7 +1731,7 @@ static struct leaf *nextleaf(struct trie *t, struct leaf *thisleaf)
 up:
 		/* No more children go up one step  */
 		c = (struct node *) p;
-		p = node_parent(c);
+		p = node_parent_rcu(c);
 	}
 	return NULL; /* Ready. Root of trie */
 }
@@ -1984,7 +1993,7 @@ static struct node *fib_trie_get_next(struct fib_trie_iter *iter)
 		 iter->tnode, iter->index, iter->depth);
 rescan:
 	while (cindex < (1<<tn->bits)) {
-		struct node *n = tnode_get_child(tn, cindex);
+		struct node *n = tnode_get_child_rcu(tn, cindex);
 
 		if (n) {
 			if (IS_LEAF(n)) {
@@ -2003,7 +2012,7 @@ rescan:
 	}
 
 	/* Current node exhausted, pop back up */
-	p = node_parent((struct node *)tn);
+	p = node_parent_rcu((struct node *)tn);
 	if (p) {
 		cindex = tkey_extract_bits(tn->key, p->pos, p->bits)+1;
 		tn = p;
@@ -2312,7 +2321,7 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v)
 	if (v == SEQ_START_TOKEN)
 		return 0;
 
-	if (!node_parent(n)) {
+	if (!node_parent_rcu(n)) {
 		if (iter->trie == iter->trie_local)
 			seq_puts(seq, "<local>:\n");
 		else

^ permalink raw reply related

* Re: [PATCH net-2.6.25] [XFRM] Remove unused definition XFRM_POLICY_LOCALOK in linux/xfrm.h + typos in net/xfrm.h
From: David Miller @ 2008-01-15  8:47 UTC (permalink / raw)
  To: ramirose; +Cc: netdev
In-Reply-To: <eb3ff54b0801150017s425c2d10xe570cc70d2ca596b@mail.gmail.com>

From: "Rami Rosen" <ramirose@gmail.com>
Date: Tue, 15 Jan 2008 10:17:37 +0200

> Hi,
> - XFRM_POLICY_LOCALOK in linux/xfrm.h is unused definition.
> - correct 4 typos in net/xfrm.h

Values in linux/xfrm.h are user visible, so I am worried that this
might break the build of some userland application.

Herbert explained that this value is unused inside of the
kernel, and that is true, but I'm not so sure that there
are no applications using the value.

We might have to keep it there.

^ permalink raw reply

* Re: [PATCH 3/4] bonding: Fix work rearming
From: Jarek Poplawski @ 2008-01-15  9:05 UTC (permalink / raw)
  To: Makito SHIOKAWA; +Cc: netdev
In-Reply-To: <20080115063650.149555000@miraclelinux.com>

On 15-01-2008 07:36, Makito SHIOKAWA wrote:
> Change code not to rearm bond_mii_monitor() when value 0 is set for miimon.
> 
> Signed-off-by: Makito SHIOKAWA <mshiokawa@miraclelinux.com>
> ---
>  drivers/net/bonding/bond_main.c |    3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> --- a/drivers/net/bonding/bond_main.c
> +++ b/drivers/net/bonding/bond_main.c
> @@ -2388,7 +2388,8 @@ void bond_mii_monitor(struct work_struct
>  
>  	delay = ((bond->params.miimon * HZ) / 1000) ? : 1;
>  	read_unlock(&bond->lock);
> -	queue_delayed_work(bond->wq, &bond->mii_work, delay);
> +	if (bond->params.miimon)
> +		queue_delayed_work(bond->wq, &bond->mii_work, delay);
>  }

Maybe I miss something, but is this bond_mii_monitor() function
supposed to be ever started if (!bond->params.miimon)? (IOW: isn't
it enough to control this where the parameter is changed only?)

Regards,
Jarek P.

^ permalink raw reply

* [PATCH][IPV6]: Mischecked tw match in __inet6_check_established.
From: Pavel Emelyanov @ 2008-01-15  9:22 UTC (permalink / raw)
  To: David Miller; +Cc: Linux Netdev List, devel

When looking for a conflicting connection the !sk->sk_bound_dev_if
check is performed only for live sockets, but not for timewait-ed.

This is not the case for ipv4, for __inet6_lookup_established in
both ipv4 and ipv6 and for other places that check for tw-s.

Was this missed accidentally? If so, then this patch fixes it and
besides makes use if the dif variable declared in the function.

Fits both, net-2.6 and net-2.6.25.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>

---

diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index d0b3447..a66a7d8 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -193,7 +193,7 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
 		   sk2->sk_family	       == PF_INET6	 &&
 		   ipv6_addr_equal(&tw6->tw_v6_daddr, saddr)	 &&
 		   ipv6_addr_equal(&tw6->tw_v6_rcv_saddr, daddr) &&
-		   sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
+		   (!sk2->sk_bound_dev_if || sk2->sk_bound_dev_if == dif)) {
 			if (twsk_unique(sk, sk2, twp))
 				goto unique;
 			else

^ permalink raw reply related

* Not understand some in htb_do_events function
From: Badalian Vyacheslav @ 2008-01-15 10:11 UTC (permalink / raw)
  To: netdev

Hello all.
I have many messages like "htb: too many events !" in dmesg.

Try to see code and find that function try do 500 events at call.
Hm... may anyone ask why 500? Why its not dynamic value based on 
performance of PC?

Thanks for answers.


^ permalink raw reply

* Re: Not understand some in htb_do_events function
From: Patrick McHardy @ 2008-01-15 10:15 UTC (permalink / raw)
  To: Badalian Vyacheslav; +Cc: netdev, Martin Devera
In-Reply-To: <478C86E6.3050508@bigtelecom.ru>

Badalian Vyacheslav wrote:
> Hello all.
> I have many messages like "htb: too many events !" in dmesg.
> 
> Try to see code and find that function try do 500 events at call.
> Hm... may anyone ask why 500? Why its not dynamic value based on 
> performance of PC?


Thats a good question, I wonder why it is limited at all.
Martin, any hints?



^ permalink raw reply

* [PATCH 1/3] skb_partial_csum_set
From: Rusty Russell @ 2008-01-15 10:41 UTC (permalink / raw)
  To: netdev; +Cc: virtualization

Implement skb_partial_csum_set, for setting partial csums on untrusted packets.

Use it in virtio_net (replacing buggy version there), it's also going
to be used by TAP for partial csum support.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/net/virtio_net.c |   11 +----------
 include/linux/skbuff.h   |    1 +
 net/core/skbuff.c        |   29 +++++++++++++++++++++++++++++
 3 files changed, 31 insertions(+), 10 deletions(-)

diff -r 72be3d596d31 include/linux/skbuff.h
--- a/include/linux/skbuff.h	Wed Jan 09 15:57:40 2008 +1100
+++ b/include/linux/skbuff.h	Wed Jan 09 16:56:41 2008 +1100
@@ -1804,5 +1804,6 @@ static inline void skb_forward_csum(stru
 		skb->ip_summed = CHECKSUM_NONE;
 }
 
+bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off);
 #endif	/* __KERNEL__ */
 #endif	/* _LINUX_SKBUFF_H */
diff -r 72be3d596d31 net/core/skbuff.c
--- a/net/core/skbuff.c	Wed Jan 09 15:57:40 2008 +1100
+++ b/net/core/skbuff.c	Wed Jan 09 16:56:41 2008 +1100
@@ -2214,6 +2214,34 @@ int skb_cow_data(struct sk_buff *skb, in
 	return elt;
 }
 
+/**
+ * skb_partial_csum_set - set up and verify partial csum values for packet
+ * @skb: the skb to set
+ * @start: the number of bytes after skb->data to start checksumming.
+ * @off: the offset from start to place the checksum.
+ *
+ * For untrusted partially-checksummed packets, we need to make sure the values
+ * for skb->csum_start and skb->csum_offset are valid so we don't oops.
+ *
+ * This function checks and sets those values and skb->ip_summed: if this
+ * returns false you should drop the packet.
+ */
+bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off)
+{
+	if (unlikely(start > skb->len - 2) || 
+	    unlikely((int)start + off > skb->len - 2)) {
+		if (net_ratelimit())
+			printk(KERN_WARNING
+			       "bad partial csum: csum=%u/%u len=%u\n",
+			       start, off, skb->len);
+		return false;
+	}
+	skb->ip_summed = CHECKSUM_PARTIAL;
+	skb->csum_start = skb_headroom(skb) + start;
+	skb->csum_offset = off;
+	return true;
+}
+
 EXPORT_SYMBOL(___pskb_trim);
 EXPORT_SYMBOL(__kfree_skb);
 EXPORT_SYMBOL(kfree_skb);
@@ -2250,3 +2278,4 @@ EXPORT_SYMBOL(skb_append_datato_frags);
 
 EXPORT_SYMBOL_GPL(skb_to_sgvec);
 EXPORT_SYMBOL_GPL(skb_cow_data);
+EXPORT_SYMBOL_GPL(skb_partial_csum_set);
diff -r 72be3d596d31 drivers/net/virtio_net.c
--- a/drivers/net/virtio_net.c	Wed Jan 09 15:57:40 2008 +1100
+++ b/drivers/net/virtio_net.c	Wed Jan 09 16:56:41 2008 +1100
@@ -89,17 +89,8 @@ static void receive_skb(struct net_devic
 
 	if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
 		pr_debug("Needs csum!\n");
-		skb->ip_summed = CHECKSUM_PARTIAL;
-		skb->csum_start = hdr->csum_start;
-		skb->csum_offset = hdr->csum_offset;
-		if (skb->csum_start > skb->len - 2
-		    || skb->csum_offset > skb->len - 2) {
-			if (net_ratelimit())
-				printk(KERN_WARNING "%s: csum=%u/%u len=%u\n",
-				       dev->name, skb->csum_start,
-				       skb->csum_offset, skb->len);
+		if (!skb_partial_csum_set(skb,hdr->csum_start,hdr->csum_offset))
 			goto frame_err;
-		}
 	}
 
 	if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox