From mboxrd@z Thu Jan 1 00:00:00 1970 From: Jarek Poplawski Subject: Re: rib_trie / Fix inflate_threshold_root. Now=15 size=11 bits Date: Sun, 28 Jun 2009 16:35:11 +0200 Message-ID: <20090628143511.GA20541@ami.dom.local> References: <19012.37515.146191.198843@robur.slu.se> <20090627192057.GA5041@ami.dom.local> <19015.20051.432572.460207@robur.slu.se> Mime-Version: 1.0 Content-Type: text/plain; charset=us-ascii Cc: Robert Olsson , =?us-ascii?B?PT9JU08tODg1OS0yP1E/UGF3ZT1CM19TdGFzemV3c2tpPz0=?= , "Jorge Boncompte [DTI2]" , Eric Dumazet , Robert Olsson , Linux Network Development list To: Robert Olsson Return-path: Received: from mail-fx0-f218.google.com ([209.85.220.218]:41572 "EHLO mail-fx0-f218.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751524AbZF1OfY (ORCPT ); Sun, 28 Jun 2009 10:35:24 -0400 Received: by fxm18 with SMTP id 18so520919fxm.37 for ; Sun, 28 Jun 2009 07:35:26 -0700 (PDT) Content-Disposition: inline In-Reply-To: <19015.20051.432572.460207@robur.slu.se> Sender: netdev-owner@vger.kernel.org List-ID: On Sun, Jun 28, 2009 at 01:04:51PM +0200, Robert Olsson wrote: ... > The memory patches and "manual RCU" are also interesting to address > the case with PREEMTP's. Since 2.6.29 looks like prefered here, and there were a lot of takes in this thread, I attach below a combined all-in-one patch including: - 2.6.29 -> 2.6.30 preemption disable patch - 2 RCU vs. preemption fixes from 2.6.31-rc - "manual RCU" patch to force vfree/kfree before root's resize (take 3) - "automatic" inflate_threshold_root fix (take 2) Thanks, Jarek P. --- (for 2.6.29.x or even .28 or .27; any testing appreciated) diff -Nurp a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c --- a/net/ipv4/fib_trie.c 2009-06-27 20:25:06.000000000 +0200 +++ b/net/ipv4/fib_trie.c 2009-06-28 15:45:02.000000000 +0200 @@ -123,6 +123,7 @@ struct tnode { union { struct rcu_head rcu; struct work_struct work; + struct tnode *tnode_free; }; struct node *child[0]; }; @@ -161,6 +162,8 @@ static void tnode_put_child_reorg(struct static struct node *resize(struct trie *t, struct tnode *tn); static struct tnode *inflate(struct trie *t, struct tnode *tn); static struct tnode *halve(struct trie *t, struct tnode *tn); +/* tnodes to free after resize(); protected by RTNL */ +static struct tnode *tnode_free_head; static struct kmem_cache *fn_alias_kmem __read_mostly; static struct kmem_cache *trie_leaf_kmem __read_mostly; @@ -315,6 +318,7 @@ static const int halve_threshold = 25; static const int inflate_threshold = 50; static const int halve_threshold_root = 8; static const int inflate_threshold_root = 15; +static int inflate_threshold_root_fix; static void __alias_free_mem(struct rcu_head *head) @@ -363,6 +367,17 @@ static void __tnode_vfree(struct work_st vfree(tn); } +static void __tnode_free(struct tnode *tn) +{ + size_t size = sizeof(struct tnode) + + (sizeof(struct node *) << tn->bits); + + if (size <= PAGE_SIZE) + kfree(tn); + else + vfree(tn); +} + static void __tnode_free_rcu(struct rcu_head *head) { struct tnode *tn = container_of(head, struct tnode, rcu); @@ -385,6 +400,24 @@ static inline void tnode_free(struct tno call_rcu(&tn->rcu, __tnode_free_rcu); } +static void tnode_free_safe(struct tnode *tn) +{ + BUG_ON(IS_LEAF(tn)); + tn->tnode_free = tnode_free_head; + tnode_free_head = tn; +} + +static void tnode_free_flush(void) +{ + struct tnode *tn; + + while ((tn = tnode_free_head)) { + tnode_free_head = tn->tnode_free; + tn->tnode_free = NULL; + __tnode_free(tn); + } +} + static struct leaf *leaf_new(void) { struct leaf *l = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL); @@ -495,7 +528,7 @@ static struct node *resize(struct trie * /* No children */ if (tn->empty_children == tnode_child_length(tn)) { - tnode_free(tn); + tnode_free_safe(tn); return NULL; } /* One child */ @@ -509,7 +542,7 @@ static struct node *resize(struct trie * /* compress one level */ node_set_parent(n, NULL); - tnode_free(tn); + tnode_free_safe(tn); return n; } /* @@ -581,7 +614,8 @@ static struct node *resize(struct trie * /* Keep root node larger */ if (!tn->parent) - inflate_threshold_use = inflate_threshold_root; + inflate_threshold_use = inflate_threshold_root + + inflate_threshold_root_fix; else inflate_threshold_use = inflate_threshold; @@ -605,15 +639,22 @@ static struct node *resize(struct trie * } if (max_resize < 0) { - if (!tn->parent) - pr_warning("Fix inflate_threshold_root." - " Now=%d size=%d bits\n", - inflate_threshold_root, tn->bits); - else + if (!tn->parent) { + if (inflate_threshold_root_fix * 2 < + inflate_threshold_root) + inflate_threshold_root_fix++; + else + pr_warning("Fix inflate_threshold_root." + " Now=%d size=%d bits fix=%d\n", + inflate_threshold_root, tn->bits, + inflate_threshold_root_fix); + } else { pr_warning("Fix inflate_threshold." " Now=%d size=%d bits\n", inflate_threshold, tn->bits); - } + } + } else if (max_resize > 4 && !tn->parent && inflate_threshold_root_fix) + inflate_threshold_root_fix--; check_tnode(tn); @@ -670,7 +711,7 @@ static struct node *resize(struct trie * /* compress one level */ node_set_parent(n, NULL); - tnode_free(tn); + tnode_free_safe(tn); return n; } @@ -756,7 +797,7 @@ static struct tnode *inflate(struct trie put_child(t, tn, 2*i, inode->child[0]); put_child(t, tn, 2*i+1, inode->child[1]); - tnode_free(inode); + tnode_free_safe(inode); continue; } @@ -801,9 +842,9 @@ static struct tnode *inflate(struct trie put_child(t, tn, 2*i, resize(t, left)); put_child(t, tn, 2*i+1, resize(t, right)); - tnode_free(inode); + tnode_free_safe(inode); } - tnode_free(oldtnode); + tnode_free_safe(oldtnode); return tn; nomem: { @@ -885,7 +926,7 @@ static struct tnode *halve(struct trie * put_child(t, newBinNode, 1, right); put_child(t, tn, i/2, resize(t, newBinNode)); } - tnode_free(oldtnode); + tnode_free_safe(oldtnode); return tn; nomem: { @@ -983,12 +1024,14 @@ fib_find_node(struct trie *t, u32 key) return NULL; } -static struct node *trie_rebalance(struct trie *t, struct tnode *tn) +static void trie_rebalance(struct trie *t, struct tnode *tn) { int wasfull; - t_key cindex, key = tn->key; + t_key cindex, key; struct tnode *tp; + key = tn->key; + while (tn != NULL && (tp = node_parent((struct node *)tn)) != NULL) { cindex = tkey_extract_bits(key, tp->pos, tp->bits); wasfull = tnode_full(tp, tnode_get_child(tp, cindex)); @@ -1003,11 +1046,22 @@ static struct node *trie_rebalance(struc tn = tp; } + if (tnode_free_head) { + synchronize_rcu(); + tnode_free_flush(); + } + /* Handle last (top) tnode */ - if (IS_TNODE(tn)) + if (IS_TNODE(tn)) { tn = (struct tnode *)resize(t, (struct tnode *)tn); + rcu_assign_pointer(t->trie, (struct node *)tn); + synchronize_rcu(); + tnode_free_flush(); + } else { + rcu_assign_pointer(t->trie, (struct node *)tn); + } - return (struct node *)tn; + return; } /* only used from updater-side */ @@ -1155,7 +1209,7 @@ static struct list_head *fib_insert_node /* Rebalance the trie */ - rcu_assign_pointer(t->trie, trie_rebalance(t, tp)); + trie_rebalance(t, tp); done: return fa_head; } @@ -1575,7 +1629,7 @@ static void trie_leaf_remove(struct trie if (tp) { t_key cindex = tkey_extract_bits(l->key, tp->pos, tp->bits); put_child(t, (struct tnode *)tp, cindex, NULL); - rcu_assign_pointer(t->trie, trie_rebalance(t, tp)); + trie_rebalance(t, tp); } else rcu_assign_pointer(t->trie, NULL);