public inbox for netdev@vger.kernel.org
 help / color / mirror / Atom feed
From: Alexander Duyck <alexander.h.duyck@redhat.com>
To: netdev@vger.kernel.org
Subject: [RFC PATCH 06/29] fib_trie: Fib walk rcu should take a tnode and key instead of a trie and a leaf
Date: Tue, 24 Feb 2015 12:48:35 -0800	[thread overview]
Message-ID: <20150224204835.26106.84691.stgit@ahduyck-vm-fedora20> (raw)
In-Reply-To: <20150224202837.26106.87623.stgit@ahduyck-vm-fedora20>

This change makes it so that leaf_walk_rcu takes a tnode and a key instead
of the trie and a leaf.

The main idea behind this is to avoid using the leaf parent pointer as that
can have additional overhead in the future as I am trying to reduce the
size of a leaf down to 16 bytes on 64b systems and 12b on 32b systems.

Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
---
 net/ipv4/fib_trie.c |  216 ++++++++++++++++++++++++++++-----------------------
 1 file changed, 120 insertions(+), 96 deletions(-)

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 02e5126..4c82e60 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1487,71 +1487,71 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
 	return 0;
 }
 
-/* Scan for the next right leaf starting at node p->child[idx]
- * Since we have back pointer, no recursion necessary.
- */
-static struct tnode *leaf_walk_rcu(struct tnode *p, struct tnode *c)
+/* Scan for the next leaf starting at the provided key value */
+static struct tnode *leaf_walk_rcu(struct tnode **pn, t_key key)
 {
-	do {
-		unsigned long idx = c ? idx = get_index(c->key, p) + 1 : 0;
-
-		while (idx < tnode_child_length(p)) {
-			c = tnode_get_child_rcu(p, idx++);
-			if (!c)
-				continue;
-
-			if (IS_LEAF(c))
-				return c;
-
-			/* Rescan start scanning in new node */
-			p = c;
-			idx = 0;
-		}
+	struct tnode *tn = NULL, *n = *pn;
+	unsigned long cindex;
 
-		/* Node empty, walk back up to parent */
-		c = p;
-	} while ((p = node_parent_rcu(c)) != NULL);
+	/* record parent node for backtracing */
+	tn = n;
+	cindex = n ? get_index(key, n) : 0;
 
-	return NULL; /* Root of trie */
-}
+	/* this loop is meant to try and find the key in the trie */
+	while (n) {
+		unsigned long idx = get_index(key, n);
 
-static struct tnode *trie_firstleaf(struct trie *t)
-{
-	struct tnode *n = rcu_dereference_rtnl(t->trie);
+		/* guarantee forward progress on the keys */
+		if (IS_LEAF(n) && (n->key >= key))
+			goto found;
+		if (idx >> n->bits)
+			break;
 
-	if (!n)
-		return NULL;
+		/* record parent and next child index */
+		tn = n;
+		cindex = idx;
 
-	if (IS_LEAF(n))          /* trie is just a leaf */
-		return n;
+		/* descend into the next child */
+		n = tnode_get_child_rcu(tn, cindex++);
+	}
 
-	return leaf_walk_rcu(n, NULL);
-}
+	/* this loop will search for the next leaf with a greater key */
+	while (tn) {
+		/* if we exhausted the parent node we will need to climb */
+		if (cindex >> tn->bits) {
+			t_key pkey = tn->key;
 
-static struct tnode *trie_nextleaf(struct tnode *l)
-{
-	struct tnode *p = node_parent_rcu(l);
+			tn = node_parent_rcu(tn);
+			if (!tn)
+				break;
 
-	if (!p)
-		return NULL;	/* trie with just one leaf */
+			cindex = get_index(pkey, tn) + 1;
+			continue;
+		}
 
-	return leaf_walk_rcu(p, l);
-}
+		/* grab the next available node */
+		n = tnode_get_child_rcu(tn, cindex++);
+		if (!n)
+			continue;
 
-static struct tnode *trie_leafindex(struct trie *t, int index)
-{
-	struct tnode *l = trie_firstleaf(t);
+		/* no need to compare keys since we bumped the index */
+		if (IS_LEAF(n))
+			goto found;
 
-	while (l && index-- > 0)
-		l = trie_nextleaf(l);
+		/* Rescan start scanning in new node */
+		tn = n;
+		cindex = 0;
+	}
 
-	return l;
+	*pn = tn;
+	return NULL; /* Root of trie */
+found:
+	/* if we are at the limit for keys just return NULL for the tnode */
+	*pn = (n->key == KEY_MAX) ? NULL : tn;
+	return n;
 }
 
-
-/*
- * Caller must hold RTNL.
- */
+/* Caller must hold RTNL. */
 int fib_table_flush(struct fib_table *tb)
 {
 	struct trie *t = (struct trie *)tb->tb_data;
@@ -1682,42 +1682,42 @@ static int fn_trie_dump_leaf(struct tnode *l, struct fib_table *tb,
 int fib_table_dump(struct fib_table *tb, struct sk_buff *skb,
 		   struct netlink_callback *cb)
 {
-	struct tnode *l;
-	struct trie *t = (struct trie *) tb->tb_data;
-	t_key key = cb->args[2];
-	int count = cb->args[3];
-
-	rcu_read_lock();
+	struct trie *t = (struct trie *)tb->tb_data;
+	struct tnode *l, *tp;
 	/* Dump starting at last key.
 	 * Note: 0.0.0.0/0 (ie default) is first key.
 	 */
-	if (count == 0)
-		l = trie_firstleaf(t);
-	else {
-		/* Normally, continue from last key, but if that is missing
-		 * fallback to using slow rescan
-		 */
-		l = fib_find_node(t, key);
-		if (!l)
-			l = trie_leafindex(t, count);
-	}
+	int count = cb->args[2];
+	t_key key = cb->args[3];
+
+	rcu_read_lock();
 
-	while (l) {
-		cb->args[2] = l->key;
+	tp = rcu_dereference_rtnl(t->trie);
+
+	while ((l = leaf_walk_rcu(&tp, key)) != NULL) {
 		if (fn_trie_dump_leaf(l, tb, skb, cb) < 0) {
-			cb->args[3] = count;
+			cb->args[3] = key;
+			cb->args[2] = count;
 			rcu_read_unlock();
 			return -1;
 		}
 
 		++count;
-		l = trie_nextleaf(l);
+		key = l->key + 1;
+
 		memset(&cb->args[4], 0,
 		       sizeof(cb->args) - 4*sizeof(cb->args[0]));
+
+		/* stop loop if key wrapped back to 0 */
+		if (key < l->key)
+			break;
 	}
-	cb->args[3] = count;
+
 	rcu_read_unlock();
 
+	cb->args[3] = key;
+	cb->args[2] = count;
+
 	return skb->len;
 }
 
@@ -2188,31 +2188,46 @@ static const struct file_operations fib_trie_fops = {
 
 struct fib_route_iter {
 	struct seq_net_private p;
-	struct trie *main_trie;
+	struct fib_table *main_tb;
+	struct tnode *tnode;
 	loff_t	pos;
 	t_key	key;
 };
 
 static struct tnode *fib_route_get_idx(struct fib_route_iter *iter, loff_t pos)
 {
-	struct tnode *l = NULL;
-	struct trie *t = iter->main_trie;
+	struct fib_table *tb = iter->main_tb;
+	struct tnode *l, **tp = &iter->tnode;
+	struct trie *t;
+	t_key key;
 
-	/* use cache location of last found key */
-	if (iter->pos > 0 && pos >= iter->pos && (l = fib_find_node(t, iter->key)))
+	/* use cache location of next-to-find key */
+	if (iter->pos > 0 && pos >= iter->pos) {
 		pos -= iter->pos;
-	else {
+		key = iter->key;
+	} else {
+		t = (struct trie *)tb->tb_data;
+		iter->tnode = rcu_dereference_rtnl(t->trie);
 		iter->pos = 0;
-		l = trie_firstleaf(t);
+		key = 0;
 	}
 
-	while (l && pos-- > 0) {
+	while ((l = leaf_walk_rcu(tp, key)) != NULL) {
+		key = l->key + 1;
 		iter->pos++;
-		l = trie_nextleaf(l);
+
+		if (pos-- <= 0)
+			break;
+
+		l = NULL;
+
+		/* handle unlikely case of a key wrap */
+		if (!key)
+			break;
 	}
 
 	if (l)
-		iter->key = pos;	/* remember it */
+		iter->key = key;	/* remember it */
 	else
 		iter->pos = 0;		/* forget it */
 
@@ -2224,37 +2239,46 @@ static void *fib_route_seq_start(struct seq_file *seq, loff_t *pos)
 {
 	struct fib_route_iter *iter = seq->private;
 	struct fib_table *tb;
+	struct trie *t;
 
 	rcu_read_lock();
+
 	tb = fib_get_table(seq_file_net(seq), RT_TABLE_MAIN);
 	if (!tb)
 		return NULL;
 
-	iter->main_trie = (struct trie *) tb->tb_data;
-	if (*pos == 0)
-		return SEQ_START_TOKEN;
-	else
-		return fib_route_get_idx(iter, *pos - 1);
+	iter->main_tb = tb;
+
+	if (*pos != 0)
+		return fib_route_get_idx(iter, *pos);
+
+	t = (struct trie *)tb->tb_data;
+	iter->tnode = rcu_dereference_rtnl(t->trie);
+	iter->pos = 0;
+	iter->key = 0;
+
+	return SEQ_START_TOKEN;
 }
 
 static void *fib_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
 	struct fib_route_iter *iter = seq->private;
-	struct tnode *l = v;
+	struct tnode *l = NULL;
+	t_key key = iter->key;
 
 	++*pos;
-	if (v == SEQ_START_TOKEN) {
-		iter->pos = 0;
-		l = trie_firstleaf(iter->main_trie);
-	} else {
+
+	/* only allow key of 0 for start of sequence */
+	if ((v == SEQ_START_TOKEN) || key)
+		l = leaf_walk_rcu(&iter->tnode, key);
+
+	if (l) {
+		iter->key = l->key + 1;
 		iter->pos++;
-		l = trie_nextleaf(l);
+	} else {
+		iter->pos = 0;
 	}
 
-	if (l)
-		iter->key = l->key;
-	else
-		iter->pos = 0;
 	return l;
 }
 

  parent reply	other threads:[~2015-02-24 20:48 UTC|newest]

Thread overview: 39+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-02-24 20:47 [RFC PATCH 00/29] Phase 2 of fib_trie updates Alexander Duyck
2015-02-24 20:48 ` [RFC PATCH 01/29] fib_trie: Convert fib_alias to hlist from list Alexander Duyck
2015-02-24 21:51   ` Or Gerlitz
2015-02-24 21:52     ` Or Gerlitz
2015-02-24 22:08     ` David Miller
2015-02-24 22:14       ` Alexander Duyck
2015-02-24 22:47   ` Julian Anastasov
2015-02-24 23:09   ` Julian Anastasov
2015-02-24 20:48 ` [RFC PATCH 02/29] fib_trie: Replace plen with slen in leaf_info Alexander Duyck
2015-02-24 20:48 ` [RFC PATCH 03/29] fib_trie: Add slen to fib alias Alexander Duyck
2015-02-24 20:48 ` [RFC PATCH 04/29] fib_trie: Remove leaf_info Alexander Duyck
2015-02-24 20:48 ` [RFC PATCH 05/29] fib_trie: Only resize N/2 times instead N * log(N) times in fib_table_flush Alexander Duyck
2015-02-24 20:48 ` Alexander Duyck [this message]
2015-02-24 20:48 ` [RFC PATCH 07/29] fib_trie: Fib find node should return parent Alexander Duyck
2015-02-24 20:48 ` [RFC PATCH 08/29] fib_trie: Update insert and delete to make use of tp from find_node Alexander Duyck
2015-02-24 20:48 ` [RFC PATCH 09/29] fib_trie: Make fib_table rcu safe Alexander Duyck
2015-02-24 20:49 ` [RFC PATCH 10/29] fib_trie: Return pointer to tnode pointer in resize/inflate/halve Alexander Duyck
2015-02-24 20:49 ` [RFC PATCH 11/29] fib_trie: Rename tnode to key_vector Alexander Duyck
2015-02-24 20:49 ` [RFC PATCH 12/29] fib_trie: move leaf and tnode to occupy the same spot in the key vector Alexander Duyck
2015-02-24 20:49 ` [RFC PATCH 13/29] fib_trie: replace tnode_get_child functions with get_child macros Alexander Duyck
2015-02-24 20:49 ` [RFC PATCH 14/29] fib_trie: Rename tnode_child_length to child_length Alexander Duyck
2015-02-24 20:49 ` [RFC PATCH 15/29] fib_trie: Add tnode struct as a container for fields not needed in key_vector Alexander Duyck
2015-02-24 20:49 ` [RFC PATCH 16/29] fib_trie: Move rcu from key_vector to tnode, add accessors Alexander Duyck
2015-02-24 20:49 ` [RFC PATCH 17/29] fib_trie: Pull empty_children and full_children into tnode Alexander Duyck
2015-02-24 20:49 ` [RFC PATCH 18/29] fib_trie: Move parent from key_vector to tnode Alexander Duyck
2015-02-24 20:50 ` [RFC PATCH 19/29] fib_trie: Add key vector to root, return parent key_vector in resize Alexander Duyck
2015-02-24 20:50 ` [RFC PATCH 20/29] fib_trie: Push net pointer down into fib_trie insert/delete/flush calls Alexander Duyck
2015-02-24 20:50 ` [RFC PATCH 21/29] fib_trie: Rewrite handling of RCU to include parent in replacement Alexander Duyck
2015-02-24 20:50 ` [RFC PATCH 22/29] fib_trie: Allocate tnode as array of key_vectors instead of key_vector as array of tnode pointers Alexander Duyck
2015-02-24 20:50 ` [RFC PATCH 23/29] fib_trie: Add leaf_init Alexander Duyck
2015-02-24 20:50 ` [RFC PATCH 24/29] fib_trie: Update tnode_new to drop use of put_child_root Alexander Duyck
2015-02-24 20:50 ` [RFC PATCH 25/29] fib_trie: Add function for dropping children from trie Alexander Duyck
2015-02-24 20:50 ` [RFC PATCH 26/29] fib_trie: Use put_child to only copy key_vectors instead of pointers Alexander Duyck
2015-02-24 20:50 ` [RFC PATCH 27/29] fib_trie: Move key and pos into key_vector from tnode Alexander Duyck
2015-02-24 20:51 ` [RFC PATCH 28/29] fib_trie: Move slen from tnode to key vector Alexander Duyck
2015-02-24 20:51 ` [RFC PATCH 29/29] fib_trie: Push bits up one level, and move leaves up into parent key_vector array Alexander Duyck
2015-02-25  3:53 ` [RFC PATCH 00/29] Phase 2 of fib_trie updates David Miller
2015-02-25  5:12   ` Alexander Duyck
2015-02-27 21:01     ` David Miller

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20150224204835.26106.84691.stgit@ahduyck-vm-fedora20 \
    --to=alexander.h.duyck@redhat.com \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox