From: Alexander Duyck <alexander.h.duyck@redhat.com>
To: netdev@vger.kernel.org
Subject: [net-next PATCH 15/17] fib_trie: inflate/halve nodes in a more RCU friendly way
Date: Wed, 31 Dec 2014 10:56:55 -0800 [thread overview]
Message-ID: <20141231185655.3006.59505.stgit@ahduyck-vm-fedora20> (raw)
In-Reply-To: <20141231184649.3006.29958.stgit@ahduyck-vm-fedora20>
This change pulls the node_set_parent functionality out of put_child_reorg
and instead leaves that to the function to take care of as well. By doing
this we can fully construct the new cluster of tnodes and all of the
pointers out of it before we start routing pointers into it.
I am suspecting this will likely fix some concurency issues though I don't
have a good test to show as such.
Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
---
net/ipv4/fib_trie.c | 236 +++++++++++++++++++++++++--------------------------
1 file changed, 115 insertions(+), 121 deletions(-)
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 485983e..0c88df0 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -391,8 +391,6 @@ static void put_child(struct tnode *tn, unsigned long i, struct tnode *n)
else if (!wasfull && isfull)
tn->full_children++;
- node_set_parent(n, tn);
-
rcu_assign_pointer(tn->child[i], n);
}
@@ -436,10 +434,8 @@ static void tnode_free(struct tnode *tn)
static int inflate(struct trie *t, struct tnode *oldtnode)
{
- unsigned long olen = tnode_child_length(oldtnode);
- struct tnode *tp = node_parent(oldtnode);
- struct tnode *tn;
- unsigned long i;
+ struct tnode *inode, *node0, *node1, *tn, *tp;
+ unsigned long i, j, k;
t_key m;
pr_debug("In inflate\n");
@@ -448,43 +444,13 @@ static int inflate(struct trie *t, struct tnode *oldtnode)
if (!tn)
return -ENOMEM;
- /*
- * Preallocate and store tnodes before the actual work so we
- * don't get into an inconsistent state if memory allocation
- * fails. In case of failure we return the oldnode and inflate
- * of tnode is ignored.
+ /* Assemble all of the pointers in our cluster, in this case that
+ * represents all of the pointers out of our allocated nodes that
+ * point to existing tnodes and the links between our allocated
+ * nodes.
*/
- for (i = 0, m = 1u << tn->pos; i < olen; i++) {
- struct tnode *inode = tnode_get_child(oldtnode, i);
-
- if (tnode_full(oldtnode, inode) && (inode->bits > 1)) {
- struct tnode *left, *right;
-
- left = tnode_new(inode->key & ~m, inode->pos,
- inode->bits - 1);
- if (!left)
- goto nomem;
- tnode_free_append(tn, left);
-
- right = tnode_new(inode->key | m, inode->pos,
- inode->bits - 1);
-
- if (!right)
- goto nomem;
- tnode_free_append(tn, right);
-
- put_child(tn, 2*i, left);
- put_child(tn, 2*i+1, right);
- }
- }
-
- /* prepare oldtnode to be freed */
- tnode_free_init(oldtnode);
-
- for (i = 0; i < olen; i++) {
- struct tnode *inode = tnode_get_child(oldtnode, i);
- struct tnode *left, *right;
- unsigned long size, j;
+ for (i = tnode_child_length(oldtnode), m = 1u << tn->pos; i;) {
+ inode = tnode_get_child(oldtnode, --i);
/* An empty child */
if (inode == NULL)
@@ -496,65 +462,99 @@ static int inflate(struct trie *t, struct tnode *oldtnode)
continue;
}
- /* drop the node in the old tnode free list */
- tnode_free_append(oldtnode, inode);
-
/* An internal node with two children */
if (inode->bits == 1) {
- put_child(tn, 2*i, rtnl_dereference(inode->child[0]));
- put_child(tn, 2*i+1, rtnl_dereference(inode->child[1]));
+ put_child(tn, 2 * i + 1, tnode_get_child(inode, 1));
+ put_child(tn, 2 * i, tnode_get_child(inode, 0));
continue;
}
- /* An internal node with more than two children */
-
/* We will replace this node 'inode' with two new
- * ones, 'left' and 'right', each with half of the
+ * ones, 'node0' and 'node1', each with half of the
* original children. The two new nodes will have
* a position one bit further down the key and this
* means that the "significant" part of their keys
* (see the discussion near the top of this file)
* will differ by one bit, which will be "0" in
- * left's key and "1" in right's key. Since we are
+ * node0's key and "1" in node1's key. Since we are
* moving the key position by one step, the bit that
* we are moving away from - the bit at position
- * (inode->pos) - is the one that will differ between
- * left and right. So... we synthesize that bit in the
- * two new keys.
- * The mask 'm' below will be a single "one" bit at
- * the position (inode->pos)
+ * (tn->pos) - is the one that will differ between
+ * node0 and node1. So... we synthesize that bit in the
+ * two new keys.
*/
+ node1 = tnode_new(inode->key | m, inode->pos, inode->bits - 1);
+ if (!node1)
+ goto nomem;
+ tnode_free_append(tn, node1);
+
+ node0 = tnode_new(inode->key & ~m, inode->pos, inode->bits - 1);
+ if (!node0)
+ goto nomem;
+ tnode_free_append(tn, node0);
+
+ /* populate child pointers in new nodes */
+ for (k = tnode_child_length(inode), j = k / 2; j;) {
+ put_child(node1, --j, tnode_get_child(inode, --k));
+ put_child(node0, j, tnode_get_child(inode, j));
+ put_child(node1, --j, tnode_get_child(inode, --k));
+ put_child(node0, j, tnode_get_child(inode, j));
+ }
- /* Use the old key, but set the new significant
- * bit to zero.
- */
+ /* link new nodes to parent */
+ NODE_INIT_PARENT(node1, tn);
+ NODE_INIT_PARENT(node0, tn);
+
+ /* link parent to nodes */
+ put_child(tn, 2 * i + 1, node1);
+ put_child(tn, 2 * i, node0);
+ }
- left = tnode_get_child(tn, 2*i);
- put_child(tn, 2*i, NULL);
+ /* setup the parent pointer into and out of this node */
+ tp = node_parent(oldtnode);
+ NODE_INIT_PARENT(tn, tp);
+ put_child_root(tp, t, tn->key, tn);
+
+ /* prepare oldtnode to be freed */
+ tnode_free_init(oldtnode);
- BUG_ON(!left);
+ /* update all child nodes parent pointers to route to us */
+ for (i = tnode_child_length(oldtnode); i;) {
+ inode = tnode_get_child(oldtnode, --i);
+
+ /* A leaf or an internal node with skipped bits */
+ if (!tnode_full(oldtnode, inode)) {
+ node_set_parent(inode, tn);
+ continue;
+ }
- right = tnode_get_child(tn, 2*i+1);
- put_child(tn, 2*i+1, NULL);
+ /* drop the node in the old tnode free list */
+ tnode_free_append(oldtnode, inode);
- BUG_ON(!right);
+ /* fetch new nodes */
+ node1 = tnode_get_child(tn, 2 * i + 1);
+ node0 = tnode_get_child(tn, 2 * i);
- size = tnode_child_length(left);
- for (j = 0; j < size; j++) {
- put_child(left, j, rtnl_dereference(inode->child[j]));
- put_child(right, j, rtnl_dereference(inode->child[j + size]));
+ /* bits == 1 then node0 and node1 represent inode's children */
+ if (inode->bits == 1) {
+ node_set_parent(node1, tn);
+ node_set_parent(node0, tn);
+ continue;
}
- put_child(tn, 2 * i, left);
- put_child(tn, 2 * i + 1, right);
+ /* update parent pointers in child node's children */
+ for (k = tnode_child_length(inode), j = k / 2; j;) {
+ node_set_parent(tnode_get_child(inode, --k), node1);
+ node_set_parent(tnode_get_child(inode, --j), node0);
+ node_set_parent(tnode_get_child(inode, --k), node1);
+ node_set_parent(tnode_get_child(inode, --j), node0);
+ }
/* resize child nodes */
- resize(t, left);
- resize(t, right);
+ resize(t, node1);
+ resize(t, node0);
}
- put_child_root(tp, t, tn->key, tn);
-
/* we completed without error, prepare to free old node */
tnode_free(oldtnode);
return 0;
@@ -566,10 +566,8 @@ nomem:
static int halve(struct trie *t, struct tnode *oldtnode)
{
- unsigned long olen = tnode_child_length(oldtnode);
- struct tnode *tp = node_parent(oldtnode);
- struct tnode *tn, *left, *right;
- int i;
+ struct tnode *tn, *tp, *inode, *node0, *node1;
+ unsigned long i;
pr_debug("In halve\n");
@@ -577,68 +575,64 @@ static int halve(struct trie *t, struct tnode *oldtnode)
if (!tn)
return -ENOMEM;
- /*
- * Preallocate and store tnodes before the actual work so we
- * don't get into an inconsistent state if memory allocation
- * fails. In case of failure we return the oldnode and halve
- * of tnode is ignored.
+ /* Assemble all of the pointers in our cluster, in this case that
+ * represents all of the pointers out of our allocated nodes that
+ * point to existing tnodes and the links between our allocated
+ * nodes.
*/
+ for (i = tnode_child_length(oldtnode); i;) {
+ node1 = tnode_get_child(oldtnode, --i);
+ node0 = tnode_get_child(oldtnode, --i);
- for (i = 0; i < olen; i += 2) {
- left = tnode_get_child(oldtnode, i);
- right = tnode_get_child(oldtnode, i+1);
+ /* At least one of the children is empty */
+ if (!node1 || !node0) {
+ put_child(tn, i / 2, node1 ? : node0);
+ continue;
+ }
/* Two nonempty children */
- if (left && right) {
- struct tnode *newn;
-
- newn = tnode_new(left->key, oldtnode->pos, 1);
- if (!newn) {
- tnode_free(tn);
- return -ENOMEM;
- }
- tnode_free_append(tn, newn);
-
- put_child(tn, i/2, newn);
+ inode = tnode_new(node0->key, oldtnode->pos, 1);
+ if (!inode) {
+ tnode_free(tn);
+ return -ENOMEM;
}
+ tnode_free_append(tn, inode);
+ /* initialize pointers out of node */
+ put_child(inode, 1, node1);
+ put_child(inode, 0, node0);
+ NODE_INIT_PARENT(inode, tn);
+
+ /* link parent to node */
+ put_child(tn, i / 2, inode);
}
+ /* setup the parent pointer out of and back into this node */
+ tp = node_parent(oldtnode);
+ NODE_INIT_PARENT(tn, tp);
+ put_child_root(tp, t, tn->key, tn);
+
/* prepare oldtnode to be freed */
tnode_free_init(oldtnode);
- for (i = 0; i < olen; i += 2) {
- struct tnode *newBinNode;
-
- left = tnode_get_child(oldtnode, i);
- right = tnode_get_child(oldtnode, i+1);
-
- /* At least one of the children is empty */
- if (left == NULL) {
- if (right == NULL) /* Both are empty */
- continue;
- put_child(tn, i/2, right);
- continue;
- }
+ /* update all of the child parent pointers */
+ for (i = tnode_child_length(tn); i;) {
+ inode = tnode_get_child(tn, --i);
- if (right == NULL) {
- put_child(tn, i/2, left);
+ /* only new tnodes will be considered "full" nodes */
+ if (!tnode_full(tn, inode)) {
+ node_set_parent(inode, tn);
continue;
}
/* Two nonempty children */
- newBinNode = tnode_get_child(tn, i/2);
- put_child(newBinNode, 0, left);
- put_child(newBinNode, 1, right);
-
- put_child(tn, i / 2, newBinNode);
+ node_set_parent(tnode_get_child(inode, 1), inode);
+ node_set_parent(tnode_get_child(inode, 0), inode);
/* resize child node */
- resize(t, newBinNode);
+ resize(t, inode);
}
- put_child_root(tp, t, tn->key, tn);
-
/* all pointers should be clean so we are done */
tnode_free(oldtnode);
next prev parent reply other threads:[~2014-12-31 18:56 UTC|newest]
Thread overview: 23+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-12-31 18:55 [net-next PATCH 00/17] fib_trie: Reduce time spent in fib_table_lookup by 35 to 75% Alexander Duyck
2014-12-31 18:55 ` [net-next PATCH 01/17] fib_trie: Update usage stats to be percpu instead of global variables Alexander Duyck
2014-12-31 18:55 ` [net-next PATCH 02/17] fib_trie: Make leaf and tnode more uniform Alexander Duyck
2014-12-31 18:55 ` [net-next PATCH 03/17] fib_trie: Merge tnode_free and leaf_free into node_free Alexander Duyck
2014-12-31 18:55 ` [net-next PATCH 04/17] fib_trie: Merge leaf into tnode Alexander Duyck
2014-12-31 18:55 ` [net-next PATCH 05/17] fib_trie: Optimize fib_table_lookup to avoid wasting time on loops/variables Alexander Duyck
2014-12-31 18:56 ` [net-next PATCH 06/17] fib_trie: Optimize fib_find_node Alexander Duyck
2014-12-31 18:56 ` [net-next PATCH 07/17] fib_trie: Optimize fib_table_insert Alexander Duyck
2014-12-31 18:56 ` [net-next PATCH 08/17] fib_trie: Update meaning of pos to represent unchecked bits Alexander Duyck
2014-12-31 18:56 ` [net-next PATCH 09/17] fib_trie: Use unsigned long for anything dealing with a shift by bits Alexander Duyck
2014-12-31 18:56 ` [net-next PATCH 10/17] fib_trie: Push rcu_read_lock/unlock to callers Alexander Duyck
2014-12-31 18:56 ` [net-next PATCH 11/17] fib_trie: Move resize to after inflate/halve Alexander Duyck
2014-12-31 18:56 ` [net-next PATCH 12/17] fib_trie: Add functions should_inflate and should_halve Alexander Duyck
2014-12-31 18:56 ` [net-next PATCH 13/17] fib_trie: Push assignment of child to parent down into inflate/halve Alexander Duyck
2014-12-31 18:56 ` [net-next PATCH 14/17] fib_trie: Push tnode flushing down to inflate/halve Alexander Duyck
2014-12-31 18:56 ` Alexander Duyck [this message]
2014-12-31 18:57 ` [net-next PATCH 16/17] fib_trie: Remove checks for index >= tnode_child_length from tnode_get_child Alexander Duyck
2014-12-31 18:57 ` [net-next PATCH 17/17] fib_trie: Add tracking value for suffix length Alexander Duyck
2014-12-31 23:46 ` [net-next PATCH 00/17] fib_trie: Reduce time spent in fib_table_lookup by 35 to 75% David Miller
2015-01-01 2:32 ` Alexander Duyck
2015-01-02 2:08 ` David Miller
2015-01-02 16:28 ` Alexander Duyck
2015-01-02 20:34 ` David Miller
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20141231185655.3006.59505.stgit@ahduyck-vm-fedora20 \
--to=alexander.h.duyck@redhat.com \
--cc=netdev@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).