netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Robert Olsson <Robert.Olsson@data.slu.se>
To: Stephen Hemminger <shemminger@vyatta.com>
Cc: Robert Olsson <Robert.Olsson@data.slu.se>,
	David Miller <davem@davemloft.net>,
	netdev@vger.kernel.org
Subject: [RFC] fib_trie: flush improvement
Date: Wed, 2 Apr 2008 11:31:19 +0200	[thread overview]
Message-ID: <18419.21095.85697.414845@robur.slu.se> (raw)
In-Reply-To: <20080401172702.094c0700@extreme>


Stephen Hemminger writes:
 > This is an attempt to fix the problem described in:
 >      http://bugzilla.kernel.org/show_bug.cgi?id=6648
 > I can reproduce this by loading lots and lots of routes and the taking
 > the interface down. This causes all entries in trie to be flushed, but
 > each leaf removal causes a rebalance of the trie. And since the removal
 > is depth first, it creates lots of needless work.

 OK! Havn't seen it on our boxes yet. Yes it useless work to rebalance
 if we are flushing.

 > Instead on flush, just walk the trie and prune as we go.

 Yes something like that should work. 

 An alternative could be to add an argument to trie_leaf_remove (and 
 fib_insert_node) to control if rebalance should be run. Of course
 from flush we tell trie_leaf_remove not to rebalance.

 In general I'll think the trie should look pretty OK even if we rebalance 
 every 2:nd or 3:rd etc insert/delete. Which could decrease the load and 
 speed up BGP route convergence even more.

 It could be idea to some tuning possiblites from userland. For example the 
 node threshold values and ev. rebalance threshold

 Cheers
					--ro

 > 
 > --- a/net/ipv4/fib_trie.c	2008-04-01 13:54:53.000000000 -0700
 > +++ b/net/ipv4/fib_trie.c	2008-04-01 17:19:00.000000000 -0700
 > @@ -1665,7 +1665,7 @@ static int fn_trie_delete(struct fib_tab
 >  	return 0;
 >  }
 >  
 > -static int trie_flush_list(struct trie *t, struct list_head *head)
 > +static int trie_flush_list(struct list_head *head)
 >  {
 >  	struct fib_alias *fa, *fa_node;
 >  	int found = 0;
 > @@ -1683,24 +1683,74 @@ static int trie_flush_list(struct trie *
 >  	return found;
 >  }
 >  
 > -static int trie_flush_leaf(struct trie *t, struct leaf *l)
 > +static int trie_flush_leaf(struct leaf *l)
 >  {
 > -	int found = 0;
 >  	struct hlist_head *lih = &l->list;
 >  	struct hlist_node *node, *tmp;
 >  	struct leaf_info *li = NULL;
 > +	int found = 0;
 >  
 >  	hlist_for_each_entry_safe(li, node, tmp, lih, hlist) {
 > -		found += trie_flush_list(t, &li->falh);
 > +		found += trie_flush_list(&li->falh);
 >  
 >  		if (list_empty(&li->falh)) {
 >  			hlist_del_rcu(&li->hlist);
 >  			free_leaf_info(li);
 >  		}
 >  	}
 > +	tnode_free((struct tnode *) l);
 > +
 >  	return found;
 >  }
 >  
 > +static int trie_flush(struct tnode *p)
 > +{
 > +	struct node *c = NULL;
 > +	t_key idx = 0;
 > +	int found = 0;
 > +
 > +	for(;;) {
 > +		for ( ;idx < (1u << p->bits); ++idx) {
 > +			struct node *c = p->child[idx];
 > +			if (!c)
 > +				continue;
 > +
 > +			rcu_assign_pointer(p->child[idx], NULL);
 > +			if (IS_LEAF(c)) {
 > +				found += trie_flush_leaf((struct leaf *) c);
 > +				continue;
 > +			}
 > +
 > +			/* Descend one level */
 > +			p = (struct tnode *) c;
 > +			idx = 0;
 > +		}
 > +
 > +		/* Node empty, free and go back up */
 > +		tnode_free((struct tnode *) c);
 > +		c = (struct node *) p;
 > +		p = node_parent(c);
 > +		if (p == NULL)
 > +			return found; /* at root */
 > +
 > +		idx = tkey_extract_bits(c->key, p->pos, p->bits) + 1;
 > +	}
 > +}
 > +
 > +static int fn_trie_flush(struct fib_table *tb)
 > +{
 > +	struct trie *t = (struct trie *) tb->tb_data;
 > +	struct tnode *n = (struct tnode *) t->trie;
 > +
 > +	ASSERT_RTNL();
 > +	rcu_assign_pointer(t->trie, NULL);
 > +
 > +	if (IS_LEAF(n))
 > +		return trie_flush_leaf((struct leaf *) n);
 > +	else
 > +		return trie_flush(n);
 > +}
 > +
 >  /*
 >   * Scan for the next right leaf starting at node p->child[idx]
 >   * Since we have back pointer, no recursion necessary.
 > @@ -1772,30 +1822,6 @@ static struct leaf *trie_leafindex(struc
 >  }
 >  
 >  
 > -/*
 > - * Caller must hold RTNL.
 > - */
 > -static int fn_trie_flush(struct fib_table *tb)
 > -{
 > -	struct trie *t = (struct trie *) tb->tb_data;
 > -	struct leaf *l, *ll = NULL;
 > -	int found = 0;
 > -
 > -	for (l = trie_firstleaf(t); l; l = trie_nextleaf(l)) {
 > -		found += trie_flush_leaf(t, l);
 > -
 > -		if (ll && hlist_empty(&ll->list))
 > -			trie_leaf_remove(t, ll);
 > -		ll = l;
 > -	}
 > -
 > -	if (ll && hlist_empty(&ll->list))
 > -		trie_leaf_remove(t, ll);
 > -
 > -	pr_debug("trie_flush found=%d\n", found);
 > -	return found;
 > -}
 > -
 >  static void fn_trie_select_default(struct fib_table *tb,
 >  				   const struct flowi *flp,
 >  				   struct fib_result *res)

      parent reply	other threads:[~2008-04-02 10:02 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-04-02  0:27 [RFC] fib_trie: flush improvement Stephen Hemminger
2008-04-02  8:01 ` Eric Dumazet
2008-04-02 14:35   ` Eric Dumazet
2008-04-02 18:03     ` Stephen Hemminger
2008-04-02 19:36       ` Eric Dumazet
2008-04-04 16:02         ` [RFC] fib_trie: memory waste solutions Stephen Hemminger
2008-04-07  6:55           ` Robert Olsson
2008-04-07  7:58             ` Andi Kleen
2008-04-07 14:42               ` Robert Olsson
2008-04-07 15:15                 ` Andi Kleen
2008-04-07 15:36                   ` Eric Dumazet
2008-04-07 16:46           ` Eric Dumazet
2008-04-07 22:48             ` Stephen Hemminger
2008-04-10  9:57               ` David Miller
2008-04-02  9:31 ` Robert Olsson [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=18419.21095.85697.414845@robur.slu.se \
    --to=robert.olsson@data.slu.se \
    --cc=davem@davemloft.net \
    --cc=netdev@vger.kernel.org \
    --cc=shemminger@vyatta.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).