Netdev List
 help / color / mirror / Atom feed
* Re: [REGRESSION] 2.6.24-rc7: e1000: Detected Tx Unit Hang
From: David Miller @ 2008-01-15  5:53 UTC (permalink / raw)
  To: elendil; +Cc: netdev, linux-kernel
In-Reply-To: <200801150625.10823.elendil@planet.nl>

From: Frans Pop <elendil@planet.nl>
Date: Tue, 15 Jan 2008 06:25:10 +0100

> kernel: e1000: eth0: e1000_clean_tx_irq: Detected Tx Unit Hang

Does this make the problem go away?

(Note this isn't the final correct patch we should apply.  There
 is no reason why this revert back to the older ->poll() logic
 here should have any effect on the TX hang triggering...)

diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c
index 13d57b0..cada32c 100644
--- a/drivers/net/e1000/e1000_main.c
+++ b/drivers/net/e1000/e1000_main.c
@@ -3919,7 +3919,7 @@ e1000_clean(struct napi_struct *napi, int budget)
 {
 	struct e1000_adapter *adapter = container_of(napi, struct e1000_adapter, napi);
 	struct net_device *poll_dev = adapter->netdev;
-	int work_done = 0;
+	int tx_work = 0, work_done = 0;
 
 	/* Must NOT use netdev_priv macro here. */
 	adapter = poll_dev->priv;
@@ -3929,8 +3929,8 @@ e1000_clean(struct napi_struct *napi, int budget)
 	 * simultaneously.  A failure obtaining the lock means
 	 * tx_ring[0] is currently being cleaned anyway. */
 	if (spin_trylock(&adapter->tx_queue_lock)) {
-		e1000_clean_tx_irq(adapter,
-				   &adapter->tx_ring[0]);
+		tx_work = e1000_clean_tx_irq(adapter,
+					     &adapter->tx_ring[0]);
 		spin_unlock(&adapter->tx_queue_lock);
 	}
 
@@ -3938,7 +3938,7 @@ e1000_clean(struct napi_struct *napi, int budget)
 	                  &work_done, budget);
 
 	/* If budget not fully consumed, exit the polling mode */
-	if (work_done < budget) {
+	if (!tx_work && (work_done < budget)) {
 		if (likely(adapter->itr_setting & 3))
 			e1000_set_itr(adapter);
 		netif_rx_complete(poll_dev, napi);

^ permalink raw reply related

* Re: [RFC 6/6] fib_trie: combine leaf and info
From: Eric Dumazet @ 2008-01-15  6:12 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: David Miller, robert.olsson, netdev
In-Reply-To: <20080114210716.4b09c84d@deepthought>

[-- Attachment #1: Type: text/plain, Size: 9629 bytes --]

Stephen Hemminger a écrit :
> Combine the prefix information and the leaf together into one
> allocation. This is furthur simplified by converting the hlist
> into a simple bitfield.
> 
> Signed-off-by: Stephen Hemminger <stephen.hemminger@vyatta.com>
> 
> This patch is experimental (ie it boots and loads routes), but
> is slower for the 163,000 route dump test.
> 

Its also very memory expensive. That was not Robert suggestion I believe.

Its suggestion is to embedd one info into each leaves.

Please find attached to this mail a preliminary and incomplete patch I wrote 
this morning before coffee :), to get the idea.

Before patching this file, maybe we should first do a cleanup (checkpatch.pl 
and obvious modern style formating :) )
> ---
>  net/ipv4/fib_trie.c |  165 ++++++++++++++++++++--------------------------------
>  1 file changed, 65 insertions(+), 100 deletions(-)
> 
> --- a/net/ipv4/fib_trie.c	2008-01-14 18:37:51.000000000 -0800
> +++ b/net/ipv4/fib_trie.c	2008-01-14 20:57:12.000000000 -0800
> @@ -50,10 +50,9 @@
>   *		Patrick McHardy <kaber@trash.net>
>   */
>  
> -#define VERSION "0.408"
> +#define VERSION "0.409"
> +
>  
> -#include <asm/uaccess.h>
> -#include <asm/system.h>
>  #include <linux/bitops.h>
>  #include <linux/types.h>
>  #include <linux/kernel.h>
> @@ -80,6 +79,8 @@
>  #include <net/tcp.h>
>  #include <net/sock.h>
>  #include <net/ip_fib.h>
> +#include <asm/bitops.h>
> +
>  #include "fib_lookup.h"
>  
>  #define MAX_STAT_DEPTH 32
> @@ -101,20 +102,24 @@ struct node {
>  	t_key key;
>  };
>  
> +struct leaf_info {
> +	struct list_head falh;
> +};
> +
> +#define INFO_SIZE	33
> +/* NB: bitmap is in reverse order, so that find_first returns largest prefix */
> +#define PLEN2BIT(x)	(INFO_SIZE - (x))
> +
>  struct leaf {
>  	unsigned long parent;
>  	t_key key;
> -	struct hlist_head list;
>  	struct rcu_head rcu;
> -};
>  
> -struct leaf_info {
> -	struct hlist_node hlist;
> -	struct rcu_head rcu;
> -	int plen;
> -	struct list_head falh;
> +	unsigned long bitmap[INFO_SIZE / BITS_PER_LONG + 1];
> +	struct leaf_info info[INFO_SIZE];
>  };
>  
> +
>  struct tnode {
>  	unsigned long parent;
>  	t_key key;
> @@ -321,16 +326,6 @@ static void __leaf_free_rcu(struct rcu_h
>  	kmem_cache_free(trie_leaf_kmem, leaf);
>  }
>  
> -static void __leaf_info_free_rcu(struct rcu_head *head)
> -{
> -	kfree(container_of(head, struct leaf_info, rcu));
> -}
> -
> -static inline void free_leaf_info(struct leaf_info *leaf)
> -{
> -	call_rcu(&leaf->rcu, __leaf_info_free_rcu);
> -}
> -
>  static struct tnode *tnode_alloc(size_t size)
>  {
>  	struct page *pages;
> @@ -371,21 +366,37 @@ static struct leaf *leaf_new(void)
>  	struct leaf *l = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL);
>  	if (l) {
>  		l->parent = T_LEAF;
> -		INIT_HLIST_HEAD(&l->list);
> +		bitmap_zero(l->bitmap, INFO_SIZE);
>  	}
>  	return l;
>  }
>  
> -static struct leaf_info *leaf_info_new(int plen)
> +static inline struct leaf_info *find_leaf_info(struct leaf *l, int plen)
>  {
> -	struct leaf_info *li = kmalloc(sizeof(struct leaf_info),  GFP_KERNEL);
> -	if (li) {
> -		li->plen = plen;
> -		INIT_LIST_HEAD(&li->falh);
> -	}
> +	return test_bit(PLEN2BIT(plen), l->bitmap) ? &l->info[plen] : NULL;
> +}
> +
> +static struct leaf_info *set_leaf_info(struct leaf *l, int plen)
> +{
> +	struct leaf_info *li = &l->info[plen];
> +
> +	INIT_LIST_HEAD(&li->falh);
> +	set_bit(PLEN2BIT(plen), l->bitmap);
> +
>  	return li;
>  }
>  
> +static inline void clear_leaf_info(struct leaf *l, int plen)
> +{
> +	smp_mb__before_clear_bit();
> +	clear_bit(PLEN2BIT(plen), &l->bitmap);
> +}
> +
> +static inline int leaf_info_empty(const struct leaf *l)
> +{
> +	return find_first_bit(l->bitmap, INFO_SIZE) >= INFO_SIZE;
> +}
> +
>  static struct tnode* tnode_new(t_key key, int pos, int bits)
>  {
>  	size_t sz = sizeof(struct tnode) + (sizeof(struct node *) << bits);
> @@ -876,20 +887,6 @@ nomem:
>  
>  /* readside must use rcu_read_lock currently dump routines
>   via get_fa_head and dump */
> -
> -static struct leaf_info *find_leaf_info(struct leaf *l, int plen)
> -{
> -	struct hlist_head *head = &l->list;
> -	struct hlist_node *node;
> -	struct leaf_info *li;
> -
> -	hlist_for_each_entry_rcu(li, node, head, hlist)
> -		if (li->plen == plen)
> -			return li;
> -
> -	return NULL;
> -}
> -
>  static inline struct list_head * get_fa_head(struct leaf *l, int plen)
>  {
>  	struct leaf_info *li = find_leaf_info(l, plen);
> @@ -900,26 +897,6 @@ static inline struct list_head * get_fa_
>  	return &li->falh;
>  }
>  
> -static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new)
> -{
> -	struct leaf_info *li = NULL, *last = NULL;
> -	struct hlist_node *node;
> -
> -	if (hlist_empty(head)) {
> -		hlist_add_head_rcu(&new->hlist, head);
> -	} else {
> -		hlist_for_each_entry(li, node, head, hlist) {
> -			if (new->plen > li->plen)
> -				break;
> -
> -			last = li;
> -		}
> -		if (last)
> -			hlist_add_after_rcu(&last->hlist, &new->hlist);
> -		else
> -			hlist_add_before_rcu(&new->hlist, &li->hlist);
> -	}
> -}
>  
>  /* rcu_read_lock needs to be hold by caller from readside */
>  
> @@ -993,6 +970,8 @@ static struct list_head *fib_insert_node
>  	pos = 0;
>  	n = t->trie;
>  
> +	pr_debug("fib_insert_node: %x/%d\n", key, plen);
> +
>  	/* If we point to NULL, stop. Either the tree is empty and we should
>  	 * just put a new leaf in if, or we have reached an empty child slot,
>  	 * and we should just put our new leaf in that.
> @@ -1038,13 +1017,9 @@ static struct list_head *fib_insert_node
>  
>  	if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) {
>  		l = (struct leaf *) n;
> -		li = leaf_info_new(plen);
> -
> -		if (!li)
> -			return NULL;
> -
> +		li = set_leaf_info(l, plen);
>  		fa_head = &li->falh;
> -		insert_leaf_info(&l->list, li);
> +
>  		goto done;
>  	}
>  	l = leaf_new();
> @@ -1053,15 +1028,8 @@ static struct list_head *fib_insert_node
>  		return NULL;
>  
>  	l->key = key;
> -	li = leaf_info_new(plen);
> -
> -	if (!li) {
> -		tnode_free((struct tnode *) l);
> -		return NULL;
> -	}
> -
> +	li = set_leaf_info(l, plen);
>  	fa_head = &li->falh;
> -	insert_leaf_info(&l->list, li);
>  
>  	if (t->trie && n == NULL) {
>  		/* Case 2: n is NULL, and will just insert a new leaf */
> @@ -1091,7 +1059,6 @@ static struct list_head *fib_insert_node
>  		}
>  
>  		if (!tn) {
> -			free_leaf_info(li);
>  			tnode_free((struct tnode *) l);
>  			return NULL;
>  		}
> @@ -1119,7 +1086,7 @@ static struct list_head *fib_insert_node
>  
>  	rcu_assign_pointer(t->trie, trie_rebalance(t, tp));
>  done:
> -	return fa_head;
> +	return &li->falh;
>  }
>  
>  /*
> @@ -1281,14 +1248,14 @@ static int check_leaf(struct trie *t, st
>  		      t_key key,  const struct flowi *flp,
>  		      struct fib_result *res)
>  {
> -	struct leaf_info *li;
> -	struct hlist_head *hhead = &l->list;
> -	struct hlist_node *node;
> +	int bit;
>  
> -	hlist_for_each_entry_rcu(li, node, hhead, hlist) {
> -		int err;
> -		int plen = li->plen;
> +	/* need find highest prefix */
> +	for_each_bit(bit, l->bitmap, INFO_SIZE) {
> +		int plen = PLEN2BIT(bit);
> +		struct leaf_info *li = &l->info[plen];
>  		__be32 mask = inet_make_mask(plen);
> +		int err;
>  
>  		if (l->key != (key & ntohl(mask)))
>  			continue;
> @@ -1622,12 +1589,10 @@ static int fn_trie_delete(struct fib_tab
>  
>  	list_del_rcu(&fa->fa_list);
>  
> -	if (list_empty(fa_head)) {
> -		hlist_del_rcu(&li->hlist);
> -		free_leaf_info(li);
> -	}
> +	if (list_empty(fa_head))
> +		clear_leaf_info(l, plen);
>  
> -	if (hlist_empty(&l->list))
> +	if (leaf_info_empty(l))
>  		trie_leaf_remove(t, key);
>  
>  	if (fa->fa_state & FA_S_ACCESSED)
> @@ -1659,18 +1624,17 @@ static int trie_flush_list(struct trie *
>  static int trie_flush_leaf(struct trie *t, struct leaf *l)
>  {
>  	int found = 0;
> -	struct hlist_head *lih = &l->list;
> -	struct hlist_node *node, *tmp;
> -	struct leaf_info *li = NULL;
> +	int bit;
>  
> -	hlist_for_each_entry_safe(li, node, tmp, lih, hlist) {
> +	for_each_bit(bit, l->bitmap, INFO_SIZE) {
> +		int plen = PLEN2BIT(bit);
> +		struct leaf_info *li = &l->info[plen];
>  		found += trie_flush_list(t, &li->falh);
>  
> -		if (list_empty(&li->falh)) {
> -			hlist_del_rcu(&li->hlist);
> -			free_leaf_info(li);
> -		}
> +		if (list_empty(&li->falh))
> +			clear_leaf_info(l, plen);
>  	}
> +
>  	return found;
>  }
>  
> @@ -1746,12 +1710,12 @@ static int fn_trie_flush(struct fib_tabl
>  	for (h = 0; (l = nextleaf(t, l)) != NULL; h++) {
>  		found += trie_flush_leaf(t, l);
>  
> -		if (ll && hlist_empty(&ll->list))
> +		if (ll && leaf_info_empty(ll))
>  			trie_leaf_remove(t, ll->key);
>  		ll = l;
>  	}
>  
> -	if (ll && hlist_empty(&ll->list))
> +	if (ll && leaf_info_empty(ll))
>  		trie_leaf_remove(t, ll->key);
>  
>  	pr_debug("trie_flush found=%d\n", found);
> @@ -2428,10 +2392,11 @@ static int fib_route_seq_show(struct seq
>  
>  	if (iter->trie == iter->trie_local)
>  		return 0;
> +
>  	if (IS_TNODE(l))
>  		return 0;
>  
> -	for (i=32; i>=0; i--) {
> +	for (i = 32; i >= 0; i--) {
>  		struct leaf_info *li = find_leaf_info(l, i);
>  		struct fib_alias *fa;
>  		__be32 mask, prefix;
> @@ -2439,7 +2404,7 @@ static int fib_route_seq_show(struct seq
>  		if (!li)
>  			continue;
>  
> -		mask = inet_make_mask(li->plen);
> +		mask = inet_make_mask(i);
>  		prefix = htonl(l->key);
>  
>  		list_for_each_entry_rcu(fa, &li->falh, fa_list) {
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
> 


[-- Attachment #2: info_embedded.patch --]
[-- Type: text/plain, Size: 3670 bytes --]

--- net/ipv4/fib_trie.c
+++ net/ipv4/fib_trie.c.rcu
@@ -97,30 +97,22 @@
 #define IS_LEAF(n) (n->parent & T_LEAF)
 
 struct node {
-	unsigned long		parent;
-	t_key			key;
+	unsigned long parent;
+	t_key key;
 };
 
 struct leaf {
-	unsigned long		parent;
-	t_key			key;
-	/*
-	 * Because we have at least one info per leaf, we embedd one here
-	 * to save some space and speedup lookups (sharing cache line)
-	 * Note : not inside a structure so that we dont have holes on 64bit 
-	 */
-	int 			plen_iinfo;
-	struct list_head	falh_iinfo;
-
-	struct hlist_head	list;
-	struct rcu_head		rcu;
+	unsigned long parent;
+	t_key key;
+	struct hlist_head list;
+	struct rcu_head rcu;
 };
 
 struct leaf_info {
-	struct hlist_node	hlist;
-	int			plen;
-	struct list_head	falh;
-	struct rcu_head		rcu;
+	struct hlist_node hlist;
+	struct rcu_head rcu;
+	int plen;
+	struct list_head falh;
 };
 
 struct tnode {
@@ -381,13 +373,11 @@
 		call_rcu(&tn->rcu, __tnode_free_rcu);
 }
 
-static struct leaf *leaf_new(int plen)
+static struct leaf *leaf_new(void)
 {
 	struct leaf *l = kmalloc(sizeof(struct leaf),  GFP_KERNEL);
 	if (l) {
 		l->parent = T_LEAF;
-		l->plen_iinfo = plen;
-		INIT_LIST_HEAD(&l->falh_iinfo);
 		INIT_HLIST_HEAD(&l->list);
 	}
 	return l;
@@ -909,12 +899,7 @@
 
 static inline struct list_head * get_fa_head(struct leaf *l, int plen)
 {
-	struct leaf_info *li;
-
-	if (plen == l->plen_iinfo)
-		return &l->falh_iinfo;
-
-	li = find_leaf_info(l, plen);
+	struct leaf_info *li = find_leaf_info(l, plen);
 
 	if (!li)
 		return NULL;
@@ -1071,22 +1056,21 @@
 		goto done;
 	}
 	t->size++;
-	l = leaf_new(plen);
+	l = leaf_new();
 
 	if (!l)
 		return NULL;
 
 	l->key = key;
-//	li = leaf_info_new(plen);
+	li = leaf_info_new(plen);
+
+	if (!li) {
+		tnode_free((struct tnode *) l);
+		return NULL;
+	}
 
-//	if (!li) {
-//		tnode_free((struct tnode *) l);
-//		return NULL;
-//	}
-
-//	fa_head = &li->falh;
-//	insert_leaf_info(&l->list, li);
-	fa_head = &l->falh_iinfo;
+	fa_head = &li->falh;
+	insert_leaf_info(&l->list, li);
 
 	if (t->trie && n == NULL) {
 		/* Case 2: n is NULL, and will just insert a new leaf */
@@ -1116,7 +1100,7 @@
 		}
 
 		if (!tn) {
-//			free_leaf_info(li);
+			free_leaf_info(li);
 			tnode_free((struct tnode *) l);
 			return NULL;
 		}
@@ -1640,7 +1624,7 @@
 	li = find_leaf_info(l, plen);
 
 	list_del_rcu(&fa->fa_list);
-// FIXME
+
 	if (list_empty(fa_head)) {
 		hlist_del_rcu(&li->hlist);
 		free_leaf_info(li);
@@ -2382,11 +2366,10 @@
 		seq_indent(seq, iter->depth);
 		seq_printf(seq, "  |-- %d.%d.%d.%d\n", NIPQUAD(val));
 		for (i = 32; i >= 0; i--) {
-//			struct leaf_info *li = get_fa_head(l, i); //find_leaf_info(l, i);
-			struct list_head *fa_head = get_fa_head(l, i);
-			if (fa_head) {
+			struct leaf_info *li = find_leaf_info(l, i);
+			if (li) {
 				struct fib_alias *fa;
-				list_for_each_entry_rcu(fa, fa_head, fa_list) {
+				list_for_each_entry_rcu(fa, &li->falh, fa_list) {
 					seq_indent(seq, iter->depth+1);
 					seq_printf(seq, "  /%d %s %s", i,
 						   rtn_scope(fa->fa_scope),
@@ -2465,18 +2448,17 @@
 		return 0;
 
 	for (i=32; i>=0; i--) {
-		//struct leaf_info *li = find_leaf_info(l, i);
-		struct list_head *fa_head = get_fa_head(l, i);
+		struct leaf_info *li = find_leaf_info(l, i);
 		struct fib_alias *fa;
 		__be32 mask, prefix;
 
-		if (!fa_head)
+		if (!li)
 			continue;
 
-		mask = inet_make_mask(i);
+		mask = inet_make_mask(li->plen);
 		prefix = htonl(l->key);
 
-		list_for_each_entry_rcu(fa, fa_head, fa_list) {
+		list_for_each_entry_rcu(fa, &li->falh, fa_list) {
 			const struct fib_info *fi = fa->fa_info;
 			unsigned flags = fib_flag_trans(fa->fa_type, mask, fi);
 

^ permalink raw reply

* Re: [RFC 6/6] fib_trie: combine leaf and info
From: Eric Dumazet @ 2008-01-15  6:16 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: David Miller, robert.olsson, netdev
In-Reply-To: <478C4EB7.6060807@cosmosbay.com>

[-- Attachment #1: Type: text/plain, Size: 770 bytes --]

Eric Dumazet a écrit :
> Stephen Hemminger a écrit :
>> Combine the prefix information and the leaf together into one
>> allocation. This is furthur simplified by converting the hlist
>> into a simple bitfield.
>>
>> Signed-off-by: Stephen Hemminger <stephen.hemminger@vyatta.com>
>>
>> This patch is experimental (ie it boots and loads routes), but
>> is slower for the 163,000 route dump test.
>>
> 
> Its also very memory expensive. That was not Robert suggestion I believe.
> 
> Its suggestion is to embedd one info into each leaves.
> 
> Please find attached to this mail a preliminary and incomplete patch I 
> wrote this morning before coffee :), to get the idea.

Oops, patch reversed, sorry, and against another work pending in my tree.

Now time for cofee :)


[-- Attachment #2: info_embedded.patch --]
[-- Type: text/plain, Size: 3670 bytes --]

--- net/ipv4/fib_trie.c.old
+++ net/ipv4/fib_trie.c
@@ -97,22 +97,30 @@
 #define IS_LEAF(n) (n->parent & T_LEAF)
 
 struct node {
-	unsigned long parent;
-	t_key key;
+	unsigned long		parent;
+	t_key			key;
 };
 
 struct leaf {
-	unsigned long parent;
-	t_key key;
-	struct hlist_head list;
-	struct rcu_head rcu;
+	unsigned long		parent;
+	t_key			key;
+	/*
+	 * Because we have at least one info per leaf, we embedd one here
+	 * to save some space and speedup lookups (sharing cache line)
+	 * Note : not inside a structure so that we dont have holes on 64bit 
+	 */
+	int 			plen_iinfo;
+	struct list_head	falh_iinfo;
+
+	struct hlist_head	list;
+	struct rcu_head		rcu;
 };
 
 struct leaf_info {
-	struct hlist_node hlist;
-	struct rcu_head rcu;
-	int plen;
-	struct list_head falh;
+	struct hlist_node	hlist;
+	int			plen;
+	struct list_head	falh;
+	struct rcu_head		rcu;
 };
 
 struct tnode {
@@ -373,11 +381,13 @@
 		call_rcu(&tn->rcu, __tnode_free_rcu);
 }
 
-static struct leaf *leaf_new(void)
+static struct leaf *leaf_new(int plen)
 {
 	struct leaf *l = kmalloc(sizeof(struct leaf),  GFP_KERNEL);
 	if (l) {
 		l->parent = T_LEAF;
+		l->plen_iinfo = plen;
+		INIT_LIST_HEAD(&l->falh_iinfo);
 		INIT_HLIST_HEAD(&l->list);
 	}
 	return l;
@@ -899,7 +909,12 @@
 
 static inline struct list_head * get_fa_head(struct leaf *l, int plen)
 {
-	struct leaf_info *li = find_leaf_info(l, plen);
+	struct leaf_info *li;
+
+	if (plen == l->plen_iinfo)
+		return &l->falh_iinfo;
+
+	li = find_leaf_info(l, plen);
 
 	if (!li)
 		return NULL;
@@ -1056,21 +1071,22 @@
 		goto done;
 	}
 	t->size++;
-	l = leaf_new();
+	l = leaf_new(plen);
 
 	if (!l)
 		return NULL;
 
 	l->key = key;
-	li = leaf_info_new(plen);
-
-	if (!li) {
-		tnode_free((struct tnode *) l);
-		return NULL;
-	}
+//	li = leaf_info_new(plen);
 
-	fa_head = &li->falh;
-	insert_leaf_info(&l->list, li);
+//	if (!li) {
+//		tnode_free((struct tnode *) l);
+//		return NULL;
+//	}
+
+//	fa_head = &li->falh;
+//	insert_leaf_info(&l->list, li);
+	fa_head = &l->falh_iinfo;
 
 	if (t->trie && n == NULL) {
 		/* Case 2: n is NULL, and will just insert a new leaf */
@@ -1100,7 +1116,7 @@
 		}
 
 		if (!tn) {
-			free_leaf_info(li);
+//			free_leaf_info(li);
 			tnode_free((struct tnode *) l);
 			return NULL;
 		}
@@ -1624,7 +1640,7 @@
 	li = find_leaf_info(l, plen);
 
 	list_del_rcu(&fa->fa_list);
-
+// FIXME
 	if (list_empty(fa_head)) {
 		hlist_del_rcu(&li->hlist);
 		free_leaf_info(li);
@@ -2366,10 +2382,11 @@
 		seq_indent(seq, iter->depth);
 		seq_printf(seq, "  |-- %d.%d.%d.%d\n", NIPQUAD(val));
 		for (i = 32; i >= 0; i--) {
-			struct leaf_info *li = find_leaf_info(l, i);
-			if (li) {
+//			struct leaf_info *li = get_fa_head(l, i); //find_leaf_info(l, i);
+			struct list_head *fa_head = get_fa_head(l, i);
+			if (fa_head) {
 				struct fib_alias *fa;
-				list_for_each_entry_rcu(fa, &li->falh, fa_list) {
+				list_for_each_entry_rcu(fa, fa_head, fa_list) {
 					seq_indent(seq, iter->depth+1);
 					seq_printf(seq, "  /%d %s %s", i,
 						   rtn_scope(fa->fa_scope),
@@ -2448,17 +2465,18 @@
 		return 0;
 
 	for (i=32; i>=0; i--) {
-		struct leaf_info *li = find_leaf_info(l, i);
+		//struct leaf_info *li = find_leaf_info(l, i);
+		struct list_head *fa_head = get_fa_head(l, i);
 		struct fib_alias *fa;
 		__be32 mask, prefix;
 
-		if (!li)
+		if (!fa_head)
 			continue;
 
-		mask = inet_make_mask(li->plen);
+		mask = inet_make_mask(i);
 		prefix = htonl(l->key);
 
-		list_for_each_entry_rcu(fa, &li->falh, fa_list) {
+		list_for_each_entry_rcu(fa, fa_head, fa_list) {
 			const struct fib_info *fi = fa->fa_info;
 			unsigned flags = fib_flag_trans(fa->fa_type, mask, fi);
 

^ permalink raw reply

* Re: [REGRESSION] 2.6.24-rc7: e1000: Detected Tx Unit Hang
From: Frans Pop @ 2008-01-15  6:17 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, linux-kernel
In-Reply-To: <20080114.215317.38045859.davem@davemloft.net>

Wow. That's fast! :-)

On Tuesday 15 January 2008, David Miller wrote:
> From: Frans Pop <elendil@planet.nl>
>
> > kernel: e1000: eth0: e1000_clean_tx_irq: Detected Tx Unit Hang
>
> Does this make the problem go away?

I'm compiling a kernel with the patch now. Will let you know the result.
May take a while as I don't know how to trigger the bug, so I'll just have 
to let it run for some time.

^ permalink raw reply

* [PATCH 3/4] bonding: Fix work rearming
From: Makito SHIOKAWA @ 2008-01-15  6:36 UTC (permalink / raw)
  To: netdev; +Cc: Makito SHIOKAWA
In-Reply-To: <20080115063604.577118000@miraclelinux.com>

[-- Attachment #1: bonding-fix-monitor-rearming.patch --]
[-- Type: text/plain, Size: 706 bytes --]

Change code not to rearm bond_mii_monitor() when value 0 is set for miimon.

Signed-off-by: Makito SHIOKAWA <mshiokawa@miraclelinux.com>
---
 drivers/net/bonding/bond_main.c |    3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -2388,7 +2388,8 @@ void bond_mii_monitor(struct work_struct
 
 	delay = ((bond->params.miimon * HZ) / 1000) ? : 1;
 	read_unlock(&bond->lock);
-	queue_delayed_work(bond->wq, &bond->mii_work, delay);
+	if (bond->params.miimon)
+		queue_delayed_work(bond->wq, &bond->mii_work, delay);
 }
 
 static __be32 bond_glean_dev_ip(struct net_device *dev)

-- 
Makito SHIOKAWA
MIRACLE LINUX CORPORATION 

^ permalink raw reply

* [PATCH 0/4] bonding: Fix workqueue manipulation and lock taking
From: Makito SHIOKAWA @ 2008-01-15  6:36 UTC (permalink / raw)
  To: netdev

These patches fix some workqueue manipulation and lock taking in bonding driver.

PATCH 1/4 through PATCH 3/4 are on workqueue manipulation, PATCH 4/4 is on lock taking, and each patch are logically independent. Patches are for latest netdev-2.6 git.

-- 
Makito SHIOKAWA
MIRACLE LINUX CORPORATION 

^ permalink raw reply

* [PATCH 2/4] bonding: Add destroy_workqueue() to bond device destroying process
From: Makito SHIOKAWA @ 2008-01-15  6:36 UTC (permalink / raw)
  To: netdev; +Cc: Makito SHIOKAWA
In-Reply-To: <20080115063604.577118000@miraclelinux.com>

[-- Attachment #1: bonding-guarantee-destroy-work.patch --]
[-- Type: text/plain, Size: 6186 bytes --]

There is no destroy_workqueue() in bond device destroying process, therefore
worker thread will remain even if bond device is destroyed. So add
destroy_workqueue(), and also, ensure all works are canceled before
destroy_workqueue() is called.

Signed-off-by: Makito SHIOKAWA <mshiokawa@miraclelinux.com>
---

 drivers/net/bonding/bond_main.c  |   72 +++++++++++++++++++-------------------
 drivers/net/bonding/bond_sysfs.c |    9 ++++
 drivers/net/bonding/bonding.h    |    5 ++
 3 files changed, 48 insertions(+), 38 deletions(-)

--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -190,7 +190,6 @@ struct bond_parm_tbl arp_validate_tbl[] 
 /*-------------------------- Forward declarations ---------------------------*/
 
 static void bond_send_gratuitous_arp(struct bonding *bond);
-static void bond_deinit(struct net_device *bond_dev);
 
 /*---------------------------- General routines -----------------------------*/
 
@@ -862,7 +861,7 @@ static void bond_resend_igmp_join_reques
 /*
  * Totally destroys the mc_list in bond
  */
-static void bond_mc_list_destroy(struct bonding *bond)
+void bond_mc_list_destroy(struct bonding *bond)
 {
 	struct dev_mc_list *dmi;
 
@@ -1875,7 +1874,7 @@ int  bond_release_and_destroy(struct net
 /*
  * This function releases all slaves.
  */
-static int bond_release_all(struct net_device *bond_dev)
+int bond_release_all(struct net_device *bond_dev)
 {
 	struct bonding *bond = bond_dev->priv;
 	struct slave *slave;
@@ -4392,6 +4391,25 @@ static const struct ethtool_ops bond_eth
 	.get_drvinfo		= bond_ethtool_get_drvinfo,
 };
 
+/* Caller must not hold rtnl_lock */
+void bond_work_cancel_all(struct bonding *bond)
+{
+	/* ensure all works are stopped */
+	cancel_delayed_work_sync(&bond->alb_work);
+	cancel_delayed_work_sync(&bond->mii_work);
+	cancel_delayed_work_sync(&bond->ab_arp_work);
+	cancel_delayed_work_sync(&bond->lb_arp_work);
+	cancel_delayed_work_sync(&bond->ad_work);
+}
+
+static void bond_free_netdev(struct net_device *dev)
+{
+	struct bonding *bond = netdev_priv(dev);
+
+	destroy_workqueue(bond->wq);
+	free_netdev(dev);
+}
+
 /*
  * Does not allocate but creates a /proc entry.
  * Allowed to fail.
@@ -4441,7 +4459,7 @@ static int bond_init(struct net_device *
 
 	bond_set_mode_ops(bond, bond->params.mode);
 
-	bond_dev->destructor = free_netdev;
+	bond_dev->destructor = bond_free_netdev;
 
 	/* Initialize the device options */
 	bond_dev->tx_queue_len = 0;
@@ -4483,7 +4501,7 @@ static int bond_init(struct net_device *
 /* De-initialize device specific data.
  * Caller must hold rtnl_lock.
  */
-static void bond_deinit(struct net_device *bond_dev)
+void bond_deinit(struct net_device *bond_dev)
 {
 	struct bonding *bond = bond_dev->priv;
 
@@ -4494,29 +4512,6 @@ static void bond_deinit(struct net_devic
 #endif
 }
 
-static void bond_work_cancel_all(struct bonding *bond)
-{
-	write_lock_bh(&bond->lock);
-	bond->kill_timers = 1;
-	write_unlock_bh(&bond->lock);
-
-	if (bond->params.miimon)
-		cancel_delayed_work(&bond->mii_work);
-
-	if (bond->params.arp_interval) {
-		if (bond->params.mode == BOND_MODE_ACTIVEBACKUP)
-			cancel_delayed_work(&bond->ab_arp_work);
-		else
-			cancel_delayed_work(&bond->lb_arp_work);
-	}
-
-	if (bond->params.mode == BOND_MODE_ALB)
-		cancel_delayed_work(&bond->alb_work);
-
-	if (bond->params.mode == BOND_MODE_8023AD)
-		cancel_delayed_work(&bond->ad_work);
-}
-
 /* Unregister and free all bond devices.
  * Caller must hold rtnl_lock.
  */
@@ -4527,11 +4522,14 @@ static void bond_free_all(void)
 	list_for_each_entry_safe(bond, nxt, &bond_dev_list, bond_list) {
 		struct net_device *bond_dev = bond->dev;
 
+		bond_deinit(bond_dev);
+		rtnl_unlock();
 		bond_work_cancel_all(bond);
+		rtnl_lock();
 		bond_mc_list_destroy(bond);
 		/* Release the bonded slaves */
 		bond_release_all(bond_dev);
-		bond_deinit(bond_dev);
+		bond_destroy_sysfs_entry(bond);
 		unregister_netdevice(bond_dev);
 	}
 
@@ -4921,8 +4919,16 @@ int bond_create(char *name, struct bond_
 
 out_bond:
 	bond_deinit(bond_dev);
+	if (*newbond)
+		unregister_netdevice(bond_dev);
+	else {
+		rtnl_unlock();
+		bond_work_cancel_all(netdev_priv(bond_dev));
+		rtnl_lock();
+		destroy_workqueue(((struct bonding *)netdev_priv(bond_dev))->wq);
 out_netdev:
-	free_netdev(bond_dev);
+		free_netdev(bond_dev);
+	}
 out_rtnl:
 	rtnl_unlock();
 	return res;
@@ -4932,7 +4938,6 @@ static int __init bonding_init(void)
 {
 	int i;
 	int res;
-	struct bonding *bond, *nxt;
 
 	printk(KERN_INFO "%s", version);
 
@@ -4959,11 +4964,6 @@ static int __init bonding_init(void)
 
 	goto out;
 err:
-	list_for_each_entry_safe(bond, nxt, &bond_dev_list, bond_list) {
-		bond_work_cancel_all(bond);
-		destroy_workqueue(bond->wq);
-	}
-
 	rtnl_lock();
 	bond_free_all();
 	bond_destroy_sysfs();
--- a/drivers/net/bonding/bond_sysfs.c
+++ b/drivers/net/bonding/bond_sysfs.c
@@ -163,7 +163,14 @@ static ssize_t bonding_store_bonds(struc
 				printk(KERN_INFO DRV_NAME
 					": %s is being deleted...\n",
 					bond->dev->name);
-				bond_destroy(bond);
+ 				bond_deinit(bond->dev);
+				rtnl_unlock();
+				bond_work_cancel_all(bond);
+				rtnl_lock();
+				bond_mc_list_destroy(bond);
+				bond_release_all(bond->dev);
+ 		        	bond_destroy_sysfs_entry(bond);
+ 				unregister_netdevice(bond->dev);
 				rtnl_unlock();
 				goto out;
 			}
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -301,7 +301,10 @@ static inline void bond_unset_master_alb
 struct vlan_entry *bond_next_vlan(struct bonding *bond, struct vlan_entry *curr);
 int bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb, struct net_device *slave_dev);
 int bond_create(char *name, struct bond_params *params, struct bonding **newbond);
-void bond_destroy(struct bonding *bond);
+void bond_deinit(struct net_device *bond_dev);
+void bond_work_cancel_all(struct bonding *bond);
+void bond_mc_list_destroy(struct bonding *bond);
+int bond_release_all(struct net_device *bond_dev);
 int  bond_release_and_destroy(struct net_device *bond_dev, struct net_device *slave_dev);
 int bond_create_sysfs(void);
 void bond_destroy_sysfs(void);

-- 
Makito SHIOKAWA
MIRACLE LINUX CORPORATION 

^ permalink raw reply

* [PATCH 1/4] bonding: Fix work initing and cancelling
From: Makito SHIOKAWA @ 2008-01-15  6:36 UTC (permalink / raw)
  To: netdev; +Cc: Makito SHIOKAWA
In-Reply-To: <20080115063604.577118000@miraclelinux.com>

[-- Attachment #1: bonding-fix-work-manipulation-in-sysfs.patch --]
[-- Type: text/plain, Size: 7400 bytes --]

delayed_work_pending() is used to check whether work is being queued or not,
when queueing or cancelling corresponding work. However,
delayed_work_pending() returns 0 when work is just running, and in that case,
work_struct will be destroyed or work won't be cancelled. So remove all
delayed_work_pending(), and move all INIT_DELAYED_WORK to bonding init
process, just queue or cancel work. For this purpose, arp_work is divided into
two works.

Signed-off-by: Makito SHIOKAWA <mshiokawa@miraclelinux.com>
---

 drivers/net/bonding/bond_main.c  |   46 ++++++++++++++++++++------------------
 drivers/net/bonding/bond_sysfs.c |   36 +++++++++--------------------
 drivers/net/bonding/bonding.h    |    3 +-
 3 files changed, 39 insertions(+), 46 deletions(-)

--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -2692,7 +2692,7 @@ out:
 void bond_loadbalance_arp_mon(struct work_struct *work)
 {
 	struct bonding *bond = container_of(work, struct bonding,
-					    arp_work.work);
+					    lb_arp_work.work);
 	struct slave *slave, *oldcurrent;
 	int do_failover = 0;
 	int delta_in_ticks;
@@ -2803,7 +2803,7 @@ void bond_loadbalance_arp_mon(struct wor
 
 re_arm:
 	if (bond->params.arp_interval)
-		queue_delayed_work(bond->wq, &bond->arp_work, delta_in_ticks);
+		queue_delayed_work(bond->wq, &bond->lb_arp_work, delta_in_ticks);
 out:
 	read_unlock(&bond->lock);
 }
@@ -2826,7 +2826,7 @@ out:
 void bond_activebackup_arp_mon(struct work_struct *work)
 {
 	struct bonding *bond = container_of(work, struct bonding,
-					    arp_work.work);
+					    ab_arp_work.work);
 	struct slave *slave;
 	int delta_in_ticks;
 	int i;
@@ -3060,7 +3060,7 @@ void bond_activebackup_arp_mon(struct wo
 
 re_arm:
 	if (bond->params.arp_interval) {
-		queue_delayed_work(bond->wq, &bond->arp_work, delta_in_ticks);
+		queue_delayed_work(bond->wq, &bond->ab_arp_work, delta_in_ticks);
 	}
 out:
 	read_unlock(&bond->lock);
@@ -3679,30 +3679,23 @@ static int bond_open(struct net_device *
 			return -1;
 		}
 
-		INIT_DELAYED_WORK(&bond->alb_work, bond_alb_monitor);
 		queue_delayed_work(bond->wq, &bond->alb_work, 0);
 	}
 
 	if (bond->params.miimon) {  /* link check interval, in milliseconds. */
-		INIT_DELAYED_WORK(&bond->mii_work, bond_mii_monitor);
 		queue_delayed_work(bond->wq, &bond->mii_work, 0);
 	}
 
 	if (bond->params.arp_interval) {  /* arp interval, in milliseconds. */
 		if (bond->params.mode == BOND_MODE_ACTIVEBACKUP)
-			INIT_DELAYED_WORK(&bond->arp_work,
-					  bond_activebackup_arp_mon);
+			queue_delayed_work(bond->wq, &bond->ab_arp_work, 0);
 		else
-			INIT_DELAYED_WORK(&bond->arp_work,
-					  bond_loadbalance_arp_mon);
-
-		queue_delayed_work(bond->wq, &bond->arp_work, 0);
+			queue_delayed_work(bond->wq, &bond->lb_arp_work, 0);
 		if (bond->params.arp_validate)
 			bond_register_arp(bond);
 	}
 
 	if (bond->params.mode == BOND_MODE_8023AD) {
-		INIT_DELAYED_WORK(&bond->ad_work, bond_3ad_state_machine_handler);
 		queue_delayed_work(bond->wq, &bond->ad_work, 0);
 		/* register to receive LACPDUs */
 		bond_register_lacpdu(bond);
@@ -3736,7 +3729,10 @@ static int bond_close(struct net_device 
 	}
 
 	if (bond->params.arp_interval) {  /* arp interval, in milliseconds. */
-		cancel_delayed_work(&bond->arp_work);
+		if (bond->params.mode == BOND_MODE_ACTIVEBACKUP)
+			cancel_delayed_work(&bond->ab_arp_work);
+		else
+			cancel_delayed_work(&bond->lb_arp_work);
 	}
 
 	switch (bond->params.mode) {
@@ -4416,6 +4412,12 @@ static int bond_init(struct net_device *
 	if (!bond->wq)
 		return -ENOMEM;
 
+	INIT_DELAYED_WORK(&bond->alb_work, bond_alb_monitor);
+	INIT_DELAYED_WORK(&bond->mii_work, bond_mii_monitor);
+	INIT_DELAYED_WORK(&bond->ab_arp_work, bond_activebackup_arp_mon);
+	INIT_DELAYED_WORK(&bond->lb_arp_work, bond_loadbalance_arp_mon);
+	INIT_DELAYED_WORK(&bond->ad_work, bond_3ad_state_machine_handler);
+
 	/* Initialize pointers */
 	bond->first_slave = NULL;
 	bond->curr_active_slave = NULL;
@@ -4498,18 +4500,20 @@ static void bond_work_cancel_all(struct 
 	bond->kill_timers = 1;
 	write_unlock_bh(&bond->lock);
 
-	if (bond->params.miimon && delayed_work_pending(&bond->mii_work))
+	if (bond->params.miimon)
 		cancel_delayed_work(&bond->mii_work);
 
-	if (bond->params.arp_interval && delayed_work_pending(&bond->arp_work))
-		cancel_delayed_work(&bond->arp_work);
+	if (bond->params.arp_interval) {
+		if (bond->params.mode == BOND_MODE_ACTIVEBACKUP)
+			cancel_delayed_work(&bond->ab_arp_work);
+		else
+			cancel_delayed_work(&bond->lb_arp_work);
+	}
 
-	if (bond->params.mode == BOND_MODE_ALB &&
-	    delayed_work_pending(&bond->alb_work))
+	if (bond->params.mode == BOND_MODE_ALB)
 		cancel_delayed_work(&bond->alb_work);
 
-	if (bond->params.mode == BOND_MODE_8023AD &&
-	    delayed_work_pending(&bond->ad_work))
+	if (bond->params.mode == BOND_MODE_8023AD)
 		cancel_delayed_work(&bond->ad_work);
 }
 
--- a/drivers/net/bonding/bond_sysfs.c
+++ b/drivers/net/bonding/bond_sysfs.c
@@ -643,10 +643,8 @@ static ssize_t bonding_store_arp_interva
 		       "%s Disabling MII monitoring.\n",
 		       bond->dev->name, bond->dev->name);
 		bond->params.miimon = 0;
-		if (delayed_work_pending(&bond->mii_work)) {
-			cancel_delayed_work(&bond->mii_work);
-			flush_workqueue(bond->wq);
-		}
+		cancel_delayed_work(&bond->mii_work);
+		flush_workqueue(bond->wq);
 	}
 	if (!bond->params.arp_targets[0]) {
 		printk(KERN_INFO DRV_NAME
@@ -660,16 +658,10 @@ static ssize_t bonding_store_arp_interva
 		 * timer will get fired off when the open function
 		 * is called.
 		 */
-		if (!delayed_work_pending(&bond->arp_work)) {
-			if (bond->params.mode == BOND_MODE_ACTIVEBACKUP)
-				INIT_DELAYED_WORK(&bond->arp_work,
-						  bond_activebackup_arp_mon);
-			else
-				INIT_DELAYED_WORK(&bond->arp_work,
-						  bond_loadbalance_arp_mon);
-
-			queue_delayed_work(bond->wq, &bond->arp_work, 0);
-		}
+		if (bond->params.mode == BOND_MODE_ACTIVEBACKUP)
+			queue_delayed_work(bond->wq, &bond->ab_arp_work, 0);
+		else
+			queue_delayed_work(bond->wq, &bond->lb_arp_work, 0);
 	}
 
 out:
@@ -1022,10 +1014,11 @@ static ssize_t bonding_store_miimon(stru
 				bond->params.arp_validate =
 					BOND_ARP_VALIDATE_NONE;
 			}
-			if (delayed_work_pending(&bond->arp_work)) {
-				cancel_delayed_work(&bond->arp_work);
-				flush_workqueue(bond->wq);
-			}
+			if (bond->params.mode == BOND_MODE_ACTIVEBACKUP)
+				cancel_delayed_work(&bond->ab_arp_work);
+			else
+				cancel_delayed_work(&bond->lb_arp_work);
+			flush_workqueue(bond->wq);
 		}
 
 		if (bond->dev->flags & IFF_UP) {
@@ -1034,12 +1027,7 @@ static ssize_t bonding_store_miimon(stru
 			 * timer will get fired off when the open function
 			 * is called.
 			 */
-			if (!delayed_work_pending(&bond->mii_work)) {
-				INIT_DELAYED_WORK(&bond->mii_work,
-						  bond_mii_monitor);
-				queue_delayed_work(bond->wq,
-						   &bond->mii_work, 0);
-			}
+			queue_delayed_work(bond->wq, &bond->mii_work, 0);
 		}
 	}
 out:
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -206,7 +206,8 @@ struct bonding {
 	struct   packet_type arp_mon_pt;
 	struct   workqueue_struct *wq;
 	struct   delayed_work mii_work;
-	struct   delayed_work arp_work;
+	struct   delayed_work ab_arp_work;
+	struct   delayed_work lb_arp_work;
 	struct   delayed_work alb_work;
 	struct   delayed_work ad_work;
 };

-- 
Makito SHIOKAWA
MIRACLE LINUX CORPORATION 

^ permalink raw reply

* [PATCH 4/4] bonding: Fix some RTNL taking
From: Makito SHIOKAWA @ 2008-01-15  6:36 UTC (permalink / raw)
  To: netdev; +Cc: Makito SHIOKAWA
In-Reply-To: <20080115063604.577118000@miraclelinux.com>

[-- Attachment #1: bonding-fix-some-rtnl-taking.patch --]
[-- Type: text/plain, Size: 3810 bytes --]

Fix some RTNL lock taking:

* RTNL (mutex; may sleep) must not be taken under read_lock (spinlock; must be
atomic). However, RTNL is taken under read_lock in bond_loadbalance_arp_mon()
and bond_activebackup_arp_mon(). So change code to take RTNL outside of read_lock.

* rtnl_unlock() calls netdev_run_todo() which takes net_todo_run_mutex, and
rtnl_unlock() is called under read_lock in bond_mii_monitor(). So for the same
reason as above, change code to call rtnl_unlock() outside of read_lock.

Signed-off-by: Makito SHIOKAWA <mshiokawa@miraclelinux.com>
---
 drivers/net/bonding/bond_main.c |   24 ++++++++++--------------
 1 file changed, 10 insertions(+), 14 deletions(-)

--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -2372,6 +2372,7 @@ void bond_mii_monitor(struct work_struct
 	struct bonding *bond = container_of(work, struct bonding,
 					    mii_work.work);
 	unsigned long delay;
+	int need_unlock = 0;
 
 	read_lock(&bond->lock);
 	if (bond->kill_timers) {
@@ -2383,13 +2384,16 @@ void bond_mii_monitor(struct work_struct
 		rtnl_lock();
 		read_lock(&bond->lock);
 		__bond_mii_monitor(bond, 1);
-		rtnl_unlock();
+		need_unlock = 1;
 	}
 
 	delay = ((bond->params.miimon * HZ) / 1000) ? : 1;
-	read_unlock(&bond->lock);
 	if (bond->params.miimon)
 		queue_delayed_work(bond->wq, &bond->mii_work, delay);
+	read_unlock(&bond->lock);
+	/* rtnl_unlock() may sleep, so call it after read_unlock() */
+	if (need_unlock)
+		rtnl_unlock();
 }
 
 static __be32 bond_glean_dev_ip(struct net_device *dev)
@@ -2698,6 +2702,7 @@ void bond_loadbalance_arp_mon(struct wor
 	int delta_in_ticks;
 	int i;
 
+	rtnl_lock();
 	read_lock(&bond->lock);
 
 	delta_in_ticks = (bond->params.arp_interval * HZ) / 1000;
@@ -2791,14 +2796,11 @@ void bond_loadbalance_arp_mon(struct wor
 	}
 
 	if (do_failover) {
-		rtnl_lock();
 		write_lock_bh(&bond->curr_slave_lock);
 
 		bond_select_active_slave(bond);
 
 		write_unlock_bh(&bond->curr_slave_lock);
-		rtnl_unlock();
-
 	}
 
 re_arm:
@@ -2806,6 +2808,7 @@ re_arm:
 		queue_delayed_work(bond->wq, &bond->lb_arp_work, delta_in_ticks);
 out:
 	read_unlock(&bond->lock);
+	rtnl_unlock();
 }
 
 /*
@@ -2831,6 +2834,7 @@ void bond_activebackup_arp_mon(struct wo
 	int delta_in_ticks;
 	int i;
 
+	rtnl_lock();
 	read_lock(&bond->lock);
 
 	delta_in_ticks = (bond->params.arp_interval * HZ) / 1000;
@@ -2855,8 +2859,6 @@ void bond_activebackup_arp_mon(struct wo
 
 				slave->link = BOND_LINK_UP;
 
-				rtnl_lock();
-
 				write_lock_bh(&bond->curr_slave_lock);
 
 				if ((!bond->curr_active_slave) &&
@@ -2892,7 +2894,6 @@ void bond_activebackup_arp_mon(struct wo
 				}
 
 				write_unlock_bh(&bond->curr_slave_lock);
-				rtnl_unlock();
 			}
 		} else {
 			read_lock(&bond->curr_slave_lock);
@@ -2962,7 +2963,6 @@ void bond_activebackup_arp_mon(struct wo
 			       bond->dev->name,
 			       slave->dev->name);
 
-			rtnl_lock();
 			write_lock_bh(&bond->curr_slave_lock);
 
 			bond_select_active_slave(bond);
@@ -2970,8 +2970,6 @@ void bond_activebackup_arp_mon(struct wo
 
 			write_unlock_bh(&bond->curr_slave_lock);
 
-			rtnl_unlock();
-
 			bond->current_arp_slave = slave;
 
 			if (slave) {
@@ -2989,13 +2987,10 @@ void bond_activebackup_arp_mon(struct wo
 			       bond->primary_slave->dev->name);
 
 			/* primary is up so switch to it */
-			rtnl_lock();
 			write_lock_bh(&bond->curr_slave_lock);
 			bond_change_active_slave(bond, bond->primary_slave);
 			write_unlock_bh(&bond->curr_slave_lock);
 
-			rtnl_unlock();
-
 			slave = bond->primary_slave;
 			slave->jiffies = jiffies;
 		} else {
@@ -3064,6 +3059,7 @@ re_arm:
 	}
 out:
 	read_unlock(&bond->lock);
+	rtnl_unlock();
 }
 
 /*------------------------------ proc/seq_file-------------------------------*/

-- 
Makito SHIOKAWA
MIRACLE LINUX CORPORATION 

^ permalink raw reply

* Re: [PATCH 3/6] [IPV4] trie: put leaf nodes in a slab cache
From: Eric Dumazet @ 2008-01-15  6:49 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: David Miller, robert.olsson, netdev
In-Reply-To: <20080114164621.2bc5011f@deepthought>

Stephen Hemminger a écrit :
> This improves locality for operations that touch all the leaves.
> Later patch will grow the size of the leaf so it becomes more
> important.
> 
> Signed-off-by: Stephen Hemminger <stephen.hemminger@vyatta.com>
> 
> 
> --- a/net/ipv4/fib_trie.c	2008-01-14 12:26:51.000000000 -0800
> +++ b/net/ipv4/fib_trie.c	2008-01-14 13:41:00.000000000 -0800
> @@ -162,6 +162,7 @@ static struct tnode *halve(struct trie *
>  static void tnode_free(struct tnode *tn);
>  
>  static struct kmem_cache *fn_alias_kmem __read_mostly;
> +static struct kmem_cache *trie_leaf_kmem __read_mostly;
>  
>  static inline struct tnode *node_parent(struct node *node)
>  {
> @@ -316,7 +317,8 @@ static inline void alias_free_mem_rcu(st
>  
>  static void __leaf_free_rcu(struct rcu_head *head)
>  {
> -	kfree(container_of(head, struct leaf, rcu));
> +	struct leaf *leaf = container_of(head, struct leaf, rcu);
> +	kmem_cache_free(trie_leaf_kmem, leaf);
>  }
>  
>  static void __leaf_info_free_rcu(struct rcu_head *head)
> @@ -366,7 +368,7 @@ static inline void tnode_free(struct tno
>  
>  static struct leaf *leaf_new(void)
>  {
> -	struct leaf *l = kmalloc(sizeof(struct leaf),  GFP_KERNEL);
> +	struct leaf *l = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL);
>  	if (l) {
>  		l->parent = T_LEAF;
>  		INIT_HLIST_HEAD(&l->list);
> @@ -1927,6 +1929,9 @@ void __init fib_hash_init(void)
>  {
>  	fn_alias_kmem = kmem_cache_create("ip_fib_alias", sizeof(struct fib_alias),
>  					  0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
> +
> +	trie_leaf_kmem = kmem_cache_create("ip_fib_trie", sizeof(struct leaf),
> +					   0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
>  }
>  

Do we really need HWCACHE_ALIGN ? so many wasted space ...



^ permalink raw reply

* Re: [PATCH] [IPV4] fib_trie: size and statistics
From: Eric Dumazet @ 2008-01-15  6:55 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: David Miller, robert.olsson, netdev, stephen.hemminger
In-Reply-To: <20080114125755.6157a3bf@deepthought>

Stephen Hemminger a écrit :
> Show number of entries in trie, the size field was being set but never used,
> but it only counted leaves, not all entries. Refactor the two cases in
> fib_triestat_seq_show into a single routine.
> 
> Note: the stat structure was being malloc'd but the stack usage isn't so
> high (288 bytes) that it is worth the additional complexity.
> 
> Signed-off-by: Stephen Hemminger <stephen.hemminger@vyatta.com>
> ---
> Patch against current net-2.6.25
> 
> --- a/net/ipv4/fib_trie.c	2008-01-14 10:16:06.000000000 -0800
> +++ b/net/ipv4/fib_trie.c	2008-01-14 10:30:11.000000000 -0800
> @@ -148,10 +148,10 @@ struct trie_stat {
>  
>  struct trie {
>  	struct node *trie;
> +	unsigned int size;
>  #ifdef CONFIG_IP_FIB_TRIE_STATS
>  	struct trie_use_stats stats;
>  #endif
> -	int size;
>  };
>  
>  static void put_child(struct trie *t, struct tnode *tn, int i, struct node *n);
> @@ -1045,7 +1045,6 @@ static struct list_head *fib_insert_node
>  		insert_leaf_info(&l->list, li);
>  		goto done;
>  	}
> -	t->size++;
>  	l = leaf_new();
>  
>  	if (!l)
> @@ -1258,6 +1257,8 @@ static int fn_trie_insert(struct fib_tab
>  	list_add_tail_rcu(&new_fa->fa_list,
>  			  (fa ? &fa->fa_list : fa_head));
>  
> +	t->size++;
> +
>  	rt_cache_flush(-1);
>  	rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id,
>  		  &cfg->fc_nlinfo, 0);
> @@ -2128,50 +2129,34 @@ static void trie_show_usage(struct seq_f
>  }
>  #endif /*  CONFIG_IP_FIB_TRIE_STATS */
>  
> +static void fib_trie_show(struct seq_file *seq, const char *name, struct trie *trie)
> +{
> +	struct trie_stat stat;
> +
> +	seq_printf(seq, "%s: %d\n", name, trie->size);
> +	trie_collect_stats(trie, &stat);
> +	trie_show_stats(seq, &stat);
> +#ifdef CONFIG_IP_FIB_TRIE_STATS
> +	trie_show_usage(seq, &trie->stats);
> +#endif
> +}
>  
>  static int fib_triestat_seq_show(struct seq_file *seq, void *v)
>  {
>  	struct net *net = (struct net *)seq->private;
> -	struct trie *trie_local, *trie_main;
> -	struct trie_stat *stat;
>  	struct fib_table *tb;
>  
> -	trie_local = NULL;
> +	seq_printf(seq,
> +		   "Basic info: size of leaf: %Zd bytes, size of tnode: %Zd bytes.\n",
> +		   sizeof(struct leaf), sizeof(struct tnode));
> +
>  	tb = fib_get_table(net, RT_TABLE_LOCAL);
>  	if (tb)
> -		trie_local = (struct trie *) tb->tb_data;
> -
> -	trie_main = NULL;
> +		fib_trie_show(seq, "Local", (struct trie *) tb->tb_data);
> +
>  	tb = fib_get_table(net, RT_TABLE_MAIN);
>  	if (tb)
> -		trie_main = (struct trie *) tb->tb_data;
> -
> -
> -	stat = kmalloc(sizeof(*stat), GFP_KERNEL);
> -	if (!stat)
> -		return -ENOMEM;
> -
> -	seq_printf(seq, "Basic info: size of leaf: %Zd bytes, size of tnode: %Zd bytes.\n",
> -		   sizeof(struct leaf), sizeof(struct tnode));
> -
> -	if (trie_local) {
> -		seq_printf(seq, "Local:\n");
> -		trie_collect_stats(trie_local, stat);
> -		trie_show_stats(seq, stat);
> -#ifdef CONFIG_IP_FIB_TRIE_STATS
> -		trie_show_usage(seq, &trie_local->stats);
> -#endif
> -	}
> -
> -	if (trie_main) {
> -		seq_printf(seq, "Main:\n");
> -		trie_collect_stats(trie_main, stat);
> -		trie_show_stats(seq, stat);
> -#ifdef CONFIG_IP_FIB_TRIE_STATS
> -		trie_show_usage(seq, &trie_main->stats);
> -#endif
> -	}
> -	kfree(stat);
> +		fib_trie_show(seq, "Main", (struct trie *) tb->tb_data);
>  
>  	return 0;
>  }

Keeping a 'size' counter is not necessary, since trie_collect_stats() must go
through all the tree and get this information for free.

This 'size' field only slows down inserts/deletes and dirty a cacheline that 
readers will hit.



^ permalink raw reply

* Re: [PATCH 6/8 net-2.6.25] [ARP] neigh_parms_put(destroy) are essentially local to core/neighbour.c.
From: David Miller @ 2008-01-15  7:04 UTC (permalink / raw)
  To: den; +Cc: netdev, devel
In-Reply-To: <1200322801-19054-6-git-send-email-den@openvz.org>

From: "Denis V. Lunev" <den@openvz.org>
Date: Mon, 14 Jan 2008 17:59:59 +0300

> Make them static.
> 
> Signed-off-by: Denis V. Lunev <den@openvz.org>

It's completely awkward to put the inline after all
the call sites.  A non-global-optimizing compiler
won't be able to even inline it.

I've reworked this patch as follows when applying
it.

diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index a0d42a5..ebbfb50 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -213,7 +213,6 @@ extern struct neighbour 	*neigh_event_ns(struct neigh_table *tbl,
 
 extern struct neigh_parms	*neigh_parms_alloc(struct net_device *dev, struct neigh_table *tbl);
 extern void			neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms);
-extern void			neigh_parms_destroy(struct neigh_parms *parms);
 extern unsigned long		neigh_rand_reach_time(unsigned long base);
 
 extern void			pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
@@ -254,12 +253,6 @@ static inline void __neigh_parms_put(struct neigh_parms *parms)
 	atomic_dec(&parms->refcnt);
 }
 
-static inline void neigh_parms_put(struct neigh_parms *parms)
-{
-	if (atomic_dec_and_test(&parms->refcnt))
-		neigh_parms_destroy(parms);
-}
-
 static inline struct neigh_parms *neigh_parms_clone(struct neigh_parms *parms)
 {
 	atomic_inc(&parms->refcnt);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 9b0b773..7cc772a 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -577,6 +577,13 @@ static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
 	return -ENOENT;
 }
 
+static void neigh_parms_destroy(struct neigh_parms *parms);
+
+static inline void neigh_parms_put(struct neigh_parms *parms)
+{
+	if (atomic_dec_and_test(&parms->refcnt))
+		neigh_parms_destroy(parms);
+}
 
 /*
  *	neighbour must already be out of the table;
@@ -1348,7 +1355,7 @@ void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
 	NEIGH_PRINTK1("neigh_parms_release: not found\n");
 }
 
-void neigh_parms_destroy(struct neigh_parms *parms)
+static void neigh_parms_destroy(struct neigh_parms *parms)
 {
 	release_net(parms->net);
 	if (parms->dev)

^ permalink raw reply related

* Re: [PATCH 0/8 2.6.25] a set of small code cleanups
From: David Miller @ 2008-01-15  7:06 UTC (permalink / raw)
  To: den; +Cc: devel, netdev
In-Reply-To: <478B78CB.4030404@openvz.org>

From: "Denis V. Lunev" <den@openvz.org>
Date: Mon, 14 Jan 2008 17:59:23 +0300

> This set contains a set of small improvements found in IPv4 code during
> preparations to support ARP in the network namespace. These fixes are
> mostly independent except 2 last ones.
> 
> Signed-off-by: Denis V. Lunev <den@openvz.org>

All applied, thanks Denis.

^ permalink raw reply

* Re: [PATCH 2/9] get rid of unused revision element
From: David Miller @ 2008-01-15  7:07 UTC (permalink / raw)
  To: shemminger; +Cc: Robert.Olsson, robert.olsson, netdev, stephen.hemminger
In-Reply-To: <20080114083555.48792f6b@deepthought>

From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Mon, 14 Jan 2008 08:35:55 -0800

> I will be glad to get this working.  Is there any point in doing the a
> small systems version as well?

Not at this time.

^ permalink raw reply

* Re: [FIB]: Avoid using static variables without proper locking
From: David Miller @ 2008-01-15  7:10 UTC (permalink / raw)
  To: dada1; +Cc: Robert.Olsson, netdev
In-Reply-To: <478BB78F.8060609@cosmosbay.com>

From: Eric Dumazet <dada1@cosmosbay.com>
Date: Mon, 14 Jan 2008 20:27:11 +0100

> fib_trie_seq_show() uses two helper functions, rtn_scope() and 
> rtn_type() that can
> write to static storage without locking.
> 
> Just pass to them a temporary buffer to avoid potential  corruption
> (probably not triggerable but still...)
> 
> Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>

Applied to net-2.6.25, but I had to tweak it to apply
cleanly since the %d in rtn_type() is now a %u

^ permalink raw reply

* Re: [PATCH] [IPV4] fib_trie: size and statistics
From: David Miller @ 2008-01-15  7:12 UTC (permalink / raw)
  To: shemminger; +Cc: robert.olsson, netdev, stephen.hemminger
In-Reply-To: <20080114125755.6157a3bf@deepthought>

From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Mon, 14 Jan 2008 12:57:55 -0800

> Show number of entries in trie, the size field was being set but never used,
> but it only counted leaves, not all entries. Refactor the two cases in
> fib_triestat_seq_show into a single routine.
> 
> Note: the stat structure was being malloc'd but the stack usage isn't so
> high (288 bytes) that it is worth the additional complexity.
> 
> Signed-off-by: Stephen Hemminger <stephen.hemminger@vyatta.com>

Applied, thanks.

^ permalink raw reply

* Re: questions on NAPI processing latency and dropped network packets
From: Jarek Poplawski @ 2008-01-15  7:19 UTC (permalink / raw)
  To: Chris Friesen; +Cc: David Miller, netdev, linux-kernel
In-Reply-To: <478B86BA.1050706@nortel.com>

On 14-01-2008 16:58, Chris Friesen wrote:
...
> How close to bleeding edge do we need to be for it to be considered 
> acceptable to ask questions on netdev?
> 
> Given that the embedded space tends to be perpetually stuck on older 
> kernels (our "current" release is based on 2.6.14) do you have any 
> suggestion on how we can obtain information that would be available on 
> netdev if we were using newer kernels?

IMHO, checking this with a current stable, which probably you are going
to do some day, anyway, should be 100% acceptable: giving some input to
netdev, while still working for yourself.

Regards,
Jarek P.

^ permalink raw reply

* Re: [PATCH 2/6] [IPV4] fib hash|trie initialization
From: David Miller @ 2008-01-15  7:14 UTC (permalink / raw)
  To: stephen.hemminger; +Cc: robert.olsson, netdev
In-Reply-To: <20080114210008.784db159@deepthought>

From: Stephen Hemminger <stephen.hemminger@vyatta.com>
Date: Mon, 14 Jan 2008 21:00:08 -0800

> Initialization of the slab cache's should be done when IP is
> initialized to make sure of available memory, and that code
> can be marked __init.
> 
> Signed-off-by: Stephen Hemminger <stephen.hemminger@vyatta.com>

Applied.

^ permalink raw reply

* Re: [PATCH] [IPV4] fib_trie: size and statistics
From: David Miller @ 2008-01-15  7:28 UTC (permalink / raw)
  To: dada1; +Cc: shemminger, robert.olsson, netdev, stephen.hemminger
In-Reply-To: <478C58FD.9030407@cosmosbay.com>

From: Eric Dumazet <dada1@cosmosbay.com>
Date: Tue, 15 Jan 2008 07:55:57 +0100

> Keeping a 'size' counter is not necessary, since trie_collect_stats() must go
> through all the tree and get this information for free.
> 
> This 'size' field only slows down inserts/deletes and dirty a cacheline that 
> readers will hit.

Good point, we can do this better in a follow-on patch.

^ permalink raw reply

* Re: [PATCH 3/6] [IPV4] trie: put leaf nodes in a slab cache
From: David Miller @ 2008-01-15  7:29 UTC (permalink / raw)
  To: stephen.hemminger; +Cc: robert.olsson, netdev
In-Reply-To: <20080114164621.2bc5011f@deepthought>

From: Stephen Hemminger <stephen.hemminger@vyatta.com>
Date: Mon, 14 Jan 2008 16:46:21 -0800

> This improves locality for operations that touch all the leaves.
> Later patch will grow the size of the leaf so it becomes more
> important.
> 
> Signed-off-by: Stephen Hemminger <stephen.hemminger@vyatta.com>

I'll let 3, 4, 5, and 6 sit while you work out the issues
brought up by Eric Dumazet.

^ permalink raw reply

* [PATCH net-2.6.25] [XFRM] Remove unused definition XFRM_POLICY_LOCALOK in linux/xfrm.h + typos in net/xfrm.h
From: Rami Rosen @ 2008-01-15  8:17 UTC (permalink / raw)
  To: David Miller, netdev

[-- Attachment #1: Type: text/plain, Size: 166 bytes --]

Hi,
- XFRM_POLICY_LOCALOK in linux/xfrm.h is unused definition.
- correct 4 typos in net/xfrm.h

Regards,
Rami Rosen


Signed-off-by: Rami Rosen <ramirose@gmail.com>

[-- Attachment #2: patch.txt --]
[-- Type: text/plain, Size: 1742 bytes --]

diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h
index 9b5b00c..f5cfb75 100644
--- a/include/linux/xfrm.h
+++ b/include/linux/xfrm.h
@@ -363,7 +363,6 @@ struct xfrm_userpolicy_info {
 #define XFRM_POLICY_ALLOW	0
 #define XFRM_POLICY_BLOCK	1
 	__u8				flags;
-#define XFRM_POLICY_LOCALOK	1	/* Allow user to override global policy */
 	/* Automatically expand selector to include matching ICMP payloads. */
 #define XFRM_POLICY_ICMP	2
 	__u8				share;
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 5ebb9ba..09b9bda 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -73,7 +73,7 @@ extern struct mutex xfrm_cfg_mutex;
    Lookup is plain linear search until the first match with selector.
 
    If "action" is "block", then we prohibit the flow, otherwise:
-   if "xfrms_nr" is zero, the flow passes untransformed. Otherwise,
+   if "xfrm_nr" is zero, the flow passes untransformed. Otherwise,
    policy entry has list of up to XFRM_MAX_DEPTH transformations,
    described by templates xfrm_tmpl. Each template is resolved
    to a complete xfrm_state (see below) and we pack bundle of transformations
@@ -84,10 +84,10 @@ extern struct mutex xfrm_cfg_mutex;
                      |---. child .-> dst -. xfrm .-> xfrm_state #3
                                       |---. child .-> NULL
 
-   Bundles are cached at xrfm_policy struct (field ->bundles).
+   Bundles are cached at xfrm_policy struct (field ->bundles).
 
 
-   Resolution of xrfm_tmpl
+   Resolution of xfrm_tmpl
    -----------------------
    Template contains:
    1. ->mode		Mode: transport or tunnel
@@ -133,7 +133,7 @@ struct xfrm_state
 
 	u32			genid;
 
-	/* Key manger bits */
+	/* Key manager bits */
 	struct {
 		u8		state;
 		u8		dying;

^ permalink raw reply related

* [FIB]: Fix rcu_dereference() abuses in fib_trie.c
From: Eric Dumazet @ 2008-01-15  8:29 UTC (permalink / raw)
  To: David Miller; +Cc: netdev@vger.kernel.org, Stephen Hemminger, Herbert Xu

node_parent() and tnode_get_child() currently use rcu_dereference().

These functions are called from both 
- readers only paths (where rcu_dereference() is needed), and 
- writer path (where rcu_dereference() is not needed)

To make explicit where rcu_dereference() is really needed, I introduced
new node_parent_rcu() and tnode_get_child_rcu() functions which use
rcu_dereference(), while node_parent() and tnode_get_child() dont use it.

Then I changed calling sites where rcu_dereference() was really needed to
call the _rcu() variants.

This should have no impact but for alpha architecture, and may help future
sparse checks.

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 6dab753..e053775 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -165,9 +165,13 @@ static struct kmem_cache *fn_alias_kmem __read_mostly;
 
 static inline struct tnode *node_parent(struct node *node)
 {
-	struct tnode *ret;
+	return (struct tnode *)(node->parent & ~NODE_TYPE_MASK);
+}
+
+static inline struct tnode *node_parent_rcu(struct node *node)
+{
+	struct tnode *ret = node_parent(node);
 
-	ret = (struct tnode *)(node->parent & ~NODE_TYPE_MASK);
 	return rcu_dereference(ret);
 }
 
@@ -177,13 +181,18 @@ static inline void node_set_parent(struct node *node, struct tnode *ptr)
 			   (unsigned long)ptr | NODE_TYPE(node));
 }
 
-/* rcu_read_lock needs to be hold by caller from readside */
+static inline struct node *tnode_get_child(struct tnode *tn, unsigned int i)
+{
+	BUG_ON(i >= 1U << tn->bits);
 
-static inline struct node *tnode_get_child(struct tnode *tn, int i)
+	return tn->child[i];
+}
+
+static inline struct node *tnode_get_child_rcu(struct tnode *tn, unsigned int i)
 {
-	BUG_ON(i >= 1 << tn->bits);
+	struct node *ret = tnode_get_child(tn, i);
 
-	return rcu_dereference(tn->child[i]);
+	return rcu_dereference(ret);
 }
 
 static inline int tnode_child_length(const struct tnode *tn)
@@ -938,7 +947,7 @@ fib_find_node(struct trie *t, u32 key)
 
 		if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
 			pos = tn->pos + tn->bits;
-			n = tnode_get_child(tn, tkey_extract_bits(key, tn->pos, tn->bits));
+			n = tnode_get_child_rcu(tn, tkey_extract_bits(key, tn->pos, tn->bits));
 		} else
 			break;
 	}
@@ -1685,7 +1694,7 @@ static struct leaf *nextleaf(struct trie *t, struct leaf *thisleaf)
 
 		p = (struct tnode*) trie;  /* Start */
 	} else
-		p = node_parent(c);
+		p = node_parent_rcu(c);
 
 	while (p) {
 		int pos, last;
@@ -1722,7 +1731,7 @@ static struct leaf *nextleaf(struct trie *t, struct leaf *thisleaf)
 up:
 		/* No more children go up one step  */
 		c = (struct node *) p;
-		p = node_parent(c);
+		p = node_parent_rcu(c);
 	}
 	return NULL; /* Ready. Root of trie */
 }
@@ -1984,7 +1993,7 @@ static struct node *fib_trie_get_next(struct fib_trie_iter *iter)
 		 iter->tnode, iter->index, iter->depth);
 rescan:
 	while (cindex < (1<<tn->bits)) {
-		struct node *n = tnode_get_child(tn, cindex);
+		struct node *n = tnode_get_child_rcu(tn, cindex);
 
 		if (n) {
 			if (IS_LEAF(n)) {
@@ -2003,7 +2012,7 @@ rescan:
 	}
 
 	/* Current node exhausted, pop back up */
-	p = node_parent((struct node *)tn);
+	p = node_parent_rcu((struct node *)tn);
 	if (p) {
 		cindex = tkey_extract_bits(tn->key, p->pos, p->bits)+1;
 		tn = p;
@@ -2312,7 +2321,7 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v)
 	if (v == SEQ_START_TOKEN)
 		return 0;
 
-	if (!node_parent(n)) {
+	if (!node_parent_rcu(n)) {
 		if (iter->trie == iter->trie_local)
 			seq_puts(seq, "<local>:\n");
 		else

^ permalink raw reply related

* Re: [PATCH net-2.6.25] [XFRM] Remove unused definition XFRM_POLICY_LOCALOK in linux/xfrm.h + typos in net/xfrm.h
From: David Miller @ 2008-01-15  8:47 UTC (permalink / raw)
  To: ramirose; +Cc: netdev
In-Reply-To: <eb3ff54b0801150017s425c2d10xe570cc70d2ca596b@mail.gmail.com>

From: "Rami Rosen" <ramirose@gmail.com>
Date: Tue, 15 Jan 2008 10:17:37 +0200

> Hi,
> - XFRM_POLICY_LOCALOK in linux/xfrm.h is unused definition.
> - correct 4 typos in net/xfrm.h

Values in linux/xfrm.h are user visible, so I am worried that this
might break the build of some userland application.

Herbert explained that this value is unused inside of the
kernel, and that is true, but I'm not so sure that there
are no applications using the value.

We might have to keep it there.

^ permalink raw reply

* Re: [PATCH 3/4] bonding: Fix work rearming
From: Jarek Poplawski @ 2008-01-15  9:05 UTC (permalink / raw)
  To: Makito SHIOKAWA; +Cc: netdev
In-Reply-To: <20080115063650.149555000@miraclelinux.com>

On 15-01-2008 07:36, Makito SHIOKAWA wrote:
> Change code not to rearm bond_mii_monitor() when value 0 is set for miimon.
> 
> Signed-off-by: Makito SHIOKAWA <mshiokawa@miraclelinux.com>
> ---
>  drivers/net/bonding/bond_main.c |    3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> --- a/drivers/net/bonding/bond_main.c
> +++ b/drivers/net/bonding/bond_main.c
> @@ -2388,7 +2388,8 @@ void bond_mii_monitor(struct work_struct
>  
>  	delay = ((bond->params.miimon * HZ) / 1000) ? : 1;
>  	read_unlock(&bond->lock);
> -	queue_delayed_work(bond->wq, &bond->mii_work, delay);
> +	if (bond->params.miimon)
> +		queue_delayed_work(bond->wq, &bond->mii_work, delay);
>  }

Maybe I miss something, but is this bond_mii_monitor() function
supposed to be ever started if (!bond->params.miimon)? (IOW: isn't
it enough to control this where the parameter is changed only?)

Regards,
Jarek P.

^ permalink raw reply

* [PATCH][IPV6]: Mischecked tw match in __inet6_check_established.
From: Pavel Emelyanov @ 2008-01-15  9:22 UTC (permalink / raw)
  To: David Miller; +Cc: Linux Netdev List, devel

When looking for a conflicting connection the !sk->sk_bound_dev_if
check is performed only for live sockets, but not for timewait-ed.

This is not the case for ipv4, for __inet6_lookup_established in
both ipv4 and ipv6 and for other places that check for tw-s.

Was this missed accidentally? If so, then this patch fixes it and
besides makes use if the dif variable declared in the function.

Fits both, net-2.6 and net-2.6.25.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>

---

diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index d0b3447..a66a7d8 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -193,7 +193,7 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
 		   sk2->sk_family	       == PF_INET6	 &&
 		   ipv6_addr_equal(&tw6->tw_v6_daddr, saddr)	 &&
 		   ipv6_addr_equal(&tw6->tw_v6_rcv_saddr, daddr) &&
-		   sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
+		   (!sk2->sk_bound_dev_if || sk2->sk_bound_dev_if == dif)) {
 			if (twsk_unique(sk, sk2, twp))
 				goto unique;
 			else

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox