netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [net-next PATCH 0/4] fib_trie: Remove leaf_info structure
@ 2015-02-25 19:05 Alexander Duyck
  2015-02-25 19:06 ` [net-next PATCH 1/4] fib_trie: Convert fib_alias to hlist from list Alexander Duyck
                   ` (4 more replies)
  0 siblings, 5 replies; 14+ messages in thread
From: Alexander Duyck @ 2015-02-25 19:05 UTC (permalink / raw)
  To: netdev; +Cc: ja, davem

This patch set removes the leaf_info structure from the IPv4 fib_trie.  The
general idea is that the leaf_info structure itself only held about 6
actual bits of data, beyond that it was mostly just waste.  As such we can
drop the structure, move the 1 byte representing the prefix/suffix length
into the fib_alias and just link it all into one list.

My testing shows that this saves somewhere between 4 to 10ns depending on
the type of test performed.  I'm suspecting that this represents 1 to 2 L1
cache misses saved per look-up.

One side effect of this change is that semantic_match_miss will now only
increment once per leaf instead of once per leaf_info miss.  However the
stat is already skewed now that we perform a preliminary check on the leaf
as a part of the look-up.

I also have gone through and addressed a number of ordering issues in the
first patch since I had misread the behavior of list_add_tail.

I have since run some additional testing and verified the resulting lists
are in the same order when combining multiple prefix length and tos values
in a single leaf.

---

Alexander Duyck (4):
      fib_trie: Convert fib_alias to hlist from list
      fib_trie: Replace plen with slen in leaf_info
      fib_trie: Add slen to fib alias
      fib_trie: Remove leaf_info


 include/net/ip_fib.h     |    2 
 net/ipv4/fib_lookup.h    |    3 
 net/ipv4/fib_semantics.c |    4 
 net/ipv4/fib_trie.c      |  489 ++++++++++++++++------------------------------
 4 files changed, 176 insertions(+), 322 deletions(-)

--

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [net-next PATCH 1/4] fib_trie: Convert fib_alias to hlist from list
  2015-02-25 19:05 [net-next PATCH 0/4] fib_trie: Remove leaf_info structure Alexander Duyck
@ 2015-02-25 19:06 ` Alexander Duyck
  2015-02-25 19:06 ` [net-next PATCH 2/4] fib_trie: Replace plen with slen in leaf_info Alexander Duyck
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 14+ messages in thread
From: Alexander Duyck @ 2015-02-25 19:06 UTC (permalink / raw)
  To: netdev; +Cc: ja, davem

There isn't any advantage to having it as a list and by making it an hlist
we make the fib_alias more compatible with the list_info in terms of the
type of list used.

Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
---
 include/net/ip_fib.h     |    2 +
 net/ipv4/fib_lookup.h    |    2 +
 net/ipv4/fib_semantics.c |    4 +-
 net/ipv4/fib_trie.c      |   80 +++++++++++++++++++++++++---------------------
 4 files changed, 48 insertions(+), 40 deletions(-)

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 5bd120e..cba4b7c 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -136,7 +136,7 @@ struct fib_result {
 	u32		tclassid;
 	struct fib_info *fi;
 	struct fib_table *table;
-	struct list_head *fa_head;
+	struct hlist_head *fa_head;
 };
 
 struct fib_result_nl {
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index 825981b..3cd444f 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -6,7 +6,7 @@
 #include <net/ip_fib.h>
 
 struct fib_alias {
-	struct list_head	fa_list;
+	struct hlist_node	fa_list;
 	struct fib_info		*fa_info;
 	u8			fa_tos;
 	u8			fa_type;
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 1e2090e..c6d2674 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -1163,12 +1163,12 @@ int fib_sync_down_dev(struct net_device *dev, int force)
 void fib_select_default(struct fib_result *res)
 {
 	struct fib_info *fi = NULL, *last_resort = NULL;
-	struct list_head *fa_head = res->fa_head;
+	struct hlist_head *fa_head = res->fa_head;
 	struct fib_table *tb = res->table;
 	int order = -1, last_idx = -1;
 	struct fib_alias *fa;
 
-	list_for_each_entry_rcu(fa, fa_head, fa_list) {
+	hlist_for_each_entry_rcu(fa, fa_head, fa_list) {
 		struct fib_info *next_fi = fa->fa_info;
 
 		if (next_fi->fib_scope != res->scope ||
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 3daf022..f17e2239 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -116,7 +116,7 @@ struct leaf_info {
 	struct hlist_node hlist;
 	int plen;
 	u32 mask_plen; /* ntohl(inet_make_mask(plen)) */
-	struct list_head falh;
+	struct hlist_head falh;
 	struct rcu_head rcu;
 };
 
@@ -339,7 +339,7 @@ static struct leaf_info *leaf_info_new(int plen)
 	if (li) {
 		li->plen = plen;
 		li->mask_plen = ntohl(inet_make_mask(plen));
-		INIT_LIST_HEAD(&li->falh);
+		INIT_HLIST_HEAD(&li->falh);
 	}
 	return li;
 }
@@ -881,7 +881,7 @@ static struct leaf_info *find_leaf_info(struct tnode *l, int plen)
 	return NULL;
 }
 
-static inline struct list_head *get_fa_head(struct tnode *l, int plen)
+static inline struct hlist_head *get_fa_head(struct tnode *l, int plen)
 {
 	struct leaf_info *li = find_leaf_info(l, plen);
 
@@ -994,14 +994,15 @@ static struct tnode *fib_find_node(struct trie *t, u32 key)
 /* Return the first fib alias matching TOS with
  * priority less than or equal to PRIO.
  */
-static struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio)
+static struct fib_alias *fib_find_alias(struct hlist_head *fah, u8 tos,
+					u32 prio)
 {
 	struct fib_alias *fa;
 
 	if (!fah)
 		return NULL;
 
-	list_for_each_entry(fa, fah, fa_list) {
+	hlist_for_each_entry(fa, fah, fa_list) {
 		if (fa->fa_tos > tos)
 			continue;
 		if (fa->fa_info->fib_priority >= prio || fa->fa_tos < tos)
@@ -1027,9 +1028,9 @@ static void trie_rebalance(struct trie *t, struct tnode *tn)
 
 /* only used from updater-side */
 
-static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
+static struct hlist_head *fib_insert_node(struct trie *t, u32 key, int plen)
 {
-	struct list_head *fa_head = NULL;
+	struct hlist_head *fa_head = NULL;
 	struct tnode *l, *n, *tp = NULL;
 	struct leaf_info *li;
 
@@ -1130,7 +1131,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
 {
 	struct trie *t = (struct trie *) tb->tb_data;
 	struct fib_alias *fa, *new_fa;
-	struct list_head *fa_head = NULL;
+	struct hlist_head *fa_head = NULL;
 	struct fib_info *fi;
 	int plen = cfg->fc_dst_len;
 	u8 tos = cfg->fc_tos;
@@ -1171,10 +1172,8 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
 	 * exists or to the node before which we will insert new one.
 	 *
 	 * If fa is NULL, we will need to allocate a new one and
-	 * insert to the head of f.
-	 *
-	 * If f is NULL, no fib node matched the destination key
-	 * and we need to allocate a new one of those as well.
+	 * insert to the tail of the section matching the suffix length
+	 * of the new alias.
 	 */
 
 	if (fa && fa->fa_tos == tos &&
@@ -1192,8 +1191,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
 		 */
 		fa_match = NULL;
 		fa_first = fa;
-		fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list);
-		list_for_each_entry_continue(fa, fa_head, fa_list) {
+		hlist_for_each_entry_from(fa, fa_list) {
 			if (fa->fa_tos != tos)
 				break;
 			if (fa->fa_info->fib_priority != fi->fib_priority)
@@ -1227,7 +1225,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
 			state = fa->fa_state;
 			new_fa->fa_state = state & ~FA_S_ACCESSED;
 
-			list_replace_rcu(&fa->fa_list, &new_fa->fa_list);
+			hlist_replace_rcu(&fa->fa_list, &new_fa->fa_list);
 			alias_free_mem_rcu(fa);
 
 			fib_release_info(fi_drop);
@@ -1276,8 +1274,19 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
 	if (!plen)
 		tb->tb_num_default++;
 
-	list_add_tail_rcu(&new_fa->fa_list,
-			  (fa ? &fa->fa_list : fa_head));
+	if (fa) {
+		hlist_add_before_rcu(&new_fa->fa_list, &fa->fa_list);
+	} else {
+		struct fib_alias *last;
+
+		hlist_for_each_entry(last, fa_head, fa_list)
+			fa = last;
+
+		if (fa)
+			hlist_add_behind_rcu(&new_fa->fa_list, &fa->fa_list);
+		else
+			hlist_add_head_rcu(&new_fa->fa_list, fa_head);
+	}
 
 	rt_cache_flush(cfg->fc_nlinfo.nl_net);
 	rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id,
@@ -1419,7 +1428,7 @@ found:
 		if ((key ^ n->key) & li->mask_plen)
 			continue;
 
-		list_for_each_entry_rcu(fa, &li->falh, fa_list) {
+		hlist_for_each_entry_rcu(fa, &li->falh, fa_list) {
 			struct fib_info *fi = fa->fa_info;
 			int nhsel, err;
 
@@ -1501,7 +1510,7 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
 	int plen = cfg->fc_dst_len;
 	u8 tos = cfg->fc_tos;
 	struct fib_alias *fa, *fa_to_delete;
-	struct list_head *fa_head;
+	struct hlist_head *fa_head;
 	struct tnode *l;
 	struct leaf_info *li;
 
@@ -1534,8 +1543,7 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
 	pr_debug("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t);
 
 	fa_to_delete = NULL;
-	fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list);
-	list_for_each_entry_continue(fa, fa_head, fa_list) {
+	hlist_for_each_entry_from(fa, fa_list) {
 		struct fib_info *fi = fa->fa_info;
 
 		if (fa->fa_tos != tos)
@@ -1561,12 +1569,12 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
 	rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id,
 		  &cfg->fc_nlinfo, 0);
 
-	list_del_rcu(&fa->fa_list);
+	hlist_del_rcu(&fa->fa_list);
 
 	if (!plen)
 		tb->tb_num_default--;
 
-	if (list_empty(fa_head)) {
+	if (hlist_empty(fa_head)) {
 		remove_leaf_info(l, li);
 		free_leaf_info(li);
 	}
@@ -1582,16 +1590,17 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
 	return 0;
 }
 
-static int trie_flush_list(struct list_head *head)
+static int trie_flush_list(struct hlist_head *head)
 {
-	struct fib_alias *fa, *fa_node;
+	struct hlist_node *tmp;
+	struct fib_alias *fa;
 	int found = 0;
 
-	list_for_each_entry_safe(fa, fa_node, head, fa_list) {
+	hlist_for_each_entry_safe(fa, tmp, head, fa_list) {
 		struct fib_info *fi = fa->fa_info;
 
 		if (fi && (fi->fib_flags & RTNH_F_DEAD)) {
-			list_del_rcu(&fa->fa_list);
+			hlist_del_rcu(&fa->fa_list);
 			fib_release_info(fa->fa_info);
 			alias_free_mem_rcu(fa);
 			found++;
@@ -1603,15 +1612,14 @@ static int trie_flush_list(struct list_head *head)
 static int trie_flush_leaf(struct tnode *l)
 {
 	int found = 0;
-	struct hlist_head *lih = &l->list;
 	struct hlist_node *tmp;
-	struct leaf_info *li = NULL;
+	struct leaf_info *li;
 	unsigned char plen = KEYLENGTH;
 
-	hlist_for_each_entry_safe(li, tmp, lih, hlist) {
+	hlist_for_each_entry_safe(li, tmp, &l->list, hlist) {
 		found += trie_flush_list(&li->falh);
 
-		if (list_empty(&li->falh)) {
+		if (hlist_empty(&li->falh)) {
 			hlist_del_rcu(&li->hlist);
 			free_leaf_info(li);
 			continue;
@@ -1731,7 +1739,7 @@ void fib_free_table(struct fib_table *tb)
 	kfree(tb);
 }
 
-static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah,
+static int fn_trie_dump_fa(t_key key, int plen, struct hlist_head *fah,
 			   struct fib_table *tb,
 			   struct sk_buff *skb, struct netlink_callback *cb)
 {
@@ -1744,7 +1752,7 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah,
 
 	/* rcu_read_lock is hold by caller */
 
-	list_for_each_entry_rcu(fa, fah, fa_list) {
+	hlist_for_each_entry_rcu(fa, fah, fa_list) {
 		if (i < s_i) {
 			i++;
 			continue;
@@ -1787,7 +1795,7 @@ static int fn_trie_dump_leaf(struct tnode *l, struct fib_table *tb,
 		if (i > s_i)
 			cb->args[5] = 0;
 
-		if (list_empty(&li->falh))
+		if (hlist_empty(&li->falh))
 			continue;
 
 		if (fn_trie_dump_fa(l->key, li->plen, &li->falh, tb, skb, cb) < 0) {
@@ -2272,7 +2280,7 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v)
 		hlist_for_each_entry_rcu(li, &n->list, hlist) {
 			struct fib_alias *fa;
 
-			list_for_each_entry_rcu(fa, &li->falh, fa_list) {
+			hlist_for_each_entry_rcu(fa, &li->falh, fa_list) {
 				char buf1[32], buf2[32];
 
 				seq_indent(seq, iter->depth+1);
@@ -2429,7 +2437,7 @@ static int fib_route_seq_show(struct seq_file *seq, void *v)
 		mask = inet_make_mask(li->plen);
 		prefix = htonl(l->key);
 
-		list_for_each_entry_rcu(fa, &li->falh, fa_list) {
+		hlist_for_each_entry_rcu(fa, &li->falh, fa_list) {
 			const struct fib_info *fi = fa->fa_info;
 			unsigned int flags = fib_flag_trans(fa->fa_type, mask, fi);
 

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [net-next PATCH 2/4] fib_trie: Replace plen with slen in leaf_info
  2015-02-25 19:05 [net-next PATCH 0/4] fib_trie: Remove leaf_info structure Alexander Duyck
  2015-02-25 19:06 ` [net-next PATCH 1/4] fib_trie: Convert fib_alias to hlist from list Alexander Duyck
@ 2015-02-25 19:06 ` Alexander Duyck
  2015-02-25 21:15   ` Julian Anastasov
  2015-02-25 19:06 ` [net-next PATCH 3/4] fib_trie: Add slen to fib alias Alexander Duyck
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 14+ messages in thread
From: Alexander Duyck @ 2015-02-25 19:06 UTC (permalink / raw)
  To: netdev; +Cc: ja, davem

This replaces the prefix length variable in the leaf_info structure with a
suffix length value, or host identifier length in bits.  By doing this it
makes it easier to sort out since the tnodes and leaf are carrying this
value as well since it is compatible with the ->pos field in tnodes.

I also cleaned up one spot that had some list manipulation that could be
simplified.  I basically updated it so that we just use hlist_add_head_rcu
instead of calling hlist_add_before_rcu on the first node in the list.

Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
---
 net/ipv4/fib_trie.c |   63 ++++++++++++++++++++++++---------------------------
 1 file changed, 30 insertions(+), 33 deletions(-)

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index f17e2239..ef5d91b 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -114,8 +114,7 @@ struct tnode {
 
 struct leaf_info {
 	struct hlist_node hlist;
-	int plen;
-	u32 mask_plen; /* ntohl(inet_make_mask(plen)) */
+	unsigned char slen;
 	struct hlist_head falh;
 	struct rcu_head rcu;
 };
@@ -337,8 +336,7 @@ static struct leaf_info *leaf_info_new(int plen)
 {
 	struct leaf_info *li = kmalloc(sizeof(struct leaf_info),  GFP_KERNEL);
 	if (li) {
-		li->plen = plen;
-		li->mask_plen = ntohl(inet_make_mask(plen));
+		li->slen = KEYLENGTH - plen;
 		INIT_HLIST_HEAD(&li->falh);
 	}
 	return li;
@@ -873,9 +871,10 @@ static struct leaf_info *find_leaf_info(struct tnode *l, int plen)
 {
 	struct hlist_head *head = &l->list;
 	struct leaf_info *li;
+	int slen = KEYLENGTH - plen;
 
 	hlist_for_each_entry_rcu(li, head, hlist)
-		if (li->plen == plen)
+		if (li->slen == slen)
 			return li;
 
 	return NULL;
@@ -929,33 +928,29 @@ static void remove_leaf_info(struct tnode *l, struct leaf_info *old)
 		return;
 
 	/* update the trie with the latest suffix length */
-	l->slen = KEYLENGTH - li->plen;
+	l->slen = li->slen;
 	leaf_pull_suffix(l);
 }
 
 static void insert_leaf_info(struct tnode *l, struct leaf_info *new)
 {
 	struct hlist_head *head = &l->list;
-	struct leaf_info *li = NULL, *last = NULL;
-
-	if (hlist_empty(head)) {
-		hlist_add_head_rcu(&new->hlist, head);
-	} else {
-		hlist_for_each_entry(li, head, hlist) {
-			if (new->plen > li->plen)
-				break;
+	struct leaf_info *li, *last = NULL;
 
-			last = li;
-		}
-		if (last)
-			hlist_add_behind_rcu(&new->hlist, &last->hlist);
-		else
-			hlist_add_before_rcu(&new->hlist, &li->hlist);
+	hlist_for_each_entry(li, head, hlist) {
+		if (new->slen < li->slen)
+			break;
+		last = li;
 	}
 
+	if (last)
+		hlist_add_behind_rcu(&new->hlist, &last->hlist);
+	else
+		hlist_add_head_rcu(&new->hlist, head);
+
 	/* if we added to the tail node then we need to update slen */
-	if (l->slen < (KEYLENGTH - new->plen)) {
-		l->slen = KEYLENGTH - new->plen;
+	if (l->slen < new->slen) {
+		l->slen = new->slen;
 		leaf_push_suffix(l);
 	}
 }
@@ -1139,7 +1134,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
 	int err;
 	struct tnode *l;
 
-	if (plen > 32)
+	if (plen > KEYLENGTH)
 		return -EINVAL;
 
 	key = ntohl(cfg->fc_dst);
@@ -1425,7 +1420,8 @@ found:
 	hlist_for_each_entry_rcu(li, &n->list, hlist) {
 		struct fib_alias *fa;
 
-		if ((key ^ n->key) & li->mask_plen)
+		if (((key ^ n->key) >> li->slen) &&
+		    ((BITS_PER_LONG > KEYLENGTH) || (li->slen != KEYLENGTH)))
 			continue;
 
 		hlist_for_each_entry_rcu(fa, &li->falh, fa_list) {
@@ -1459,7 +1455,7 @@ found:
 				if (!(fib_flags & FIB_LOOKUP_NOREF))
 					atomic_inc(&fi->fib_clntref);
 
-				res->prefixlen = li->plen;
+				res->prefixlen = KEYLENGTH - li->slen;
 				res->nh_sel = nhsel;
 				res->type = fa->fa_type;
 				res->scope = fi->fib_scope;
@@ -1614,7 +1610,7 @@ static int trie_flush_leaf(struct tnode *l)
 	int found = 0;
 	struct hlist_node *tmp;
 	struct leaf_info *li;
-	unsigned char plen = KEYLENGTH;
+	unsigned char slen = 0;
 
 	hlist_for_each_entry_safe(li, tmp, &l->list, hlist) {
 		found += trie_flush_list(&li->falh);
@@ -1625,10 +1621,10 @@ static int trie_flush_leaf(struct tnode *l)
 			continue;
 		}
 
-		plen = li->plen;
+		slen = li->slen;
 	}
 
-	l->slen = KEYLENGTH - plen;
+	l->slen = slen;
 
 	return found;
 }
@@ -1739,7 +1735,7 @@ void fib_free_table(struct fib_table *tb)
 	kfree(tb);
 }
 
-static int fn_trie_dump_fa(t_key key, int plen, struct hlist_head *fah,
+static int fn_trie_dump_fa(t_key key, int slen, struct hlist_head *fah,
 			   struct fib_table *tb,
 			   struct sk_buff *skb, struct netlink_callback *cb)
 {
@@ -1764,7 +1760,7 @@ static int fn_trie_dump_fa(t_key key, int plen, struct hlist_head *fah,
 				  tb->tb_id,
 				  fa->fa_type,
 				  xkey,
-				  plen,
+				  KEYLENGTH - slen,
 				  fa->fa_tos,
 				  fa->fa_info, NLM_F_MULTI) < 0) {
 			cb->args[5] = i;
@@ -1798,7 +1794,7 @@ static int fn_trie_dump_leaf(struct tnode *l, struct fib_table *tb,
 		if (hlist_empty(&li->falh))
 			continue;
 
-		if (fn_trie_dump_fa(l->key, li->plen, &li->falh, tb, skb, cb) < 0) {
+		if (fn_trie_dump_fa(l->key, li->slen, &li->falh, tb, skb, cb) < 0) {
 			cb->args[4] = i;
 			return -1;
 		}
@@ -2284,7 +2280,8 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v)
 				char buf1[32], buf2[32];
 
 				seq_indent(seq, iter->depth+1);
-				seq_printf(seq, "  /%d %s %s", li->plen,
+				seq_printf(seq, "  /%zu %s %s",
+					   KEYLENGTH - li->slen,
 					   rtn_scope(buf1, sizeof(buf1),
 						     fa->fa_info->fib_scope),
 					   rtn_type(buf2, sizeof(buf2),
@@ -2434,7 +2431,7 @@ static int fib_route_seq_show(struct seq_file *seq, void *v)
 		struct fib_alias *fa;
 		__be32 mask, prefix;
 
-		mask = inet_make_mask(li->plen);
+		mask = inet_make_mask(KEYLENGTH - li->slen);
 		prefix = htonl(l->key);
 
 		hlist_for_each_entry_rcu(fa, &li->falh, fa_list) {

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [net-next PATCH 3/4] fib_trie: Add slen to fib alias
  2015-02-25 19:05 [net-next PATCH 0/4] fib_trie: Remove leaf_info structure Alexander Duyck
  2015-02-25 19:06 ` [net-next PATCH 1/4] fib_trie: Convert fib_alias to hlist from list Alexander Duyck
  2015-02-25 19:06 ` [net-next PATCH 2/4] fib_trie: Replace plen with slen in leaf_info Alexander Duyck
@ 2015-02-25 19:06 ` Alexander Duyck
  2015-02-25 19:06 ` [net-next PATCH 4/4] fib_trie: Remove leaf_info Alexander Duyck
  2015-02-27 22:06 ` [net-next PATCH 0/4] fib_trie: Remove leaf_info structure David Miller
  4 siblings, 0 replies; 14+ messages in thread
From: Alexander Duyck @ 2015-02-25 19:06 UTC (permalink / raw)
  To: netdev; +Cc: ja, davem

Make use of an empty spot in the alias to store the suffix length so that
we don't need to pull that information from the leaf_info structure.

This patch also makes a slight change to the user statistics.  Instead of
incrementing semantic_match_miss once per leaf_info miss we now just
increment it once per leaf if a match was not found.

Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
---
 net/ipv4/fib_lookup.h |    1 +
 net/ipv4/fib_trie.c   |   37 ++++++++++++++++++-------------------
 2 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index 3cd444f..ae2e6ee 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -11,6 +11,7 @@ struct fib_alias {
 	u8			fa_tos;
 	u8			fa_type;
 	u8			fa_state;
+	u8			fa_slen;
 	struct rcu_head		rcu;
 };
 
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index ef5d91b..8c9f789 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1219,6 +1219,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
 			new_fa->fa_type = cfg->fc_type;
 			state = fa->fa_state;
 			new_fa->fa_state = state & ~FA_S_ACCESSED;
+			new_fa->fa_slen = fa->fa_slen;
 
 			hlist_replace_rcu(&fa->fa_list, &new_fa->fa_list);
 			alias_free_mem_rcu(fa);
@@ -1254,10 +1255,9 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
 	new_fa->fa_tos = tos;
 	new_fa->fa_type = cfg->fc_type;
 	new_fa->fa_state = 0;
-	/*
-	 * Insert new entry to the list.
-	 */
+	new_fa->fa_slen = KEYLENGTH - plen;
 
+	/* Insert new entry to the list. */
 	if (!fa_head) {
 		fa_head = fib_insert_node(t, key, plen);
 		if (unlikely(!fa_head)) {
@@ -1420,14 +1420,14 @@ found:
 	hlist_for_each_entry_rcu(li, &n->list, hlist) {
 		struct fib_alias *fa;
 
-		if (((key ^ n->key) >> li->slen) &&
-		    ((BITS_PER_LONG > KEYLENGTH) || (li->slen != KEYLENGTH)))
-			continue;
-
 		hlist_for_each_entry_rcu(fa, &li->falh, fa_list) {
 			struct fib_info *fi = fa->fa_info;
 			int nhsel, err;
 
+			if (((key ^ n->key) >> fa->fa_slen) &&
+			    ((BITS_PER_LONG > KEYLENGTH) ||
+			     (fa->fa_slen != KEYLENGTH)))
+				continue;
 			if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos)
 				continue;
 			if (fi->fib_dead)
@@ -1455,7 +1455,7 @@ found:
 				if (!(fib_flags & FIB_LOOKUP_NOREF))
 					atomic_inc(&fi->fib_clntref);
 
-				res->prefixlen = KEYLENGTH - li->slen;
+				res->prefixlen = KEYLENGTH - fa->fa_slen;
 				res->nh_sel = nhsel;
 				res->type = fa->fa_type;
 				res->scope = fi->fib_scope;
@@ -1468,11 +1468,10 @@ found:
 				return err;
 			}
 		}
-
+	}
 #ifdef CONFIG_IP_FIB_TRIE_STATS
-		this_cpu_inc(stats->semantic_match_miss);
+	this_cpu_inc(stats->semantic_match_miss);
 #endif
-	}
 	goto backtrace;
 }
 EXPORT_SYMBOL_GPL(fib_table_lookup);
@@ -1735,7 +1734,7 @@ void fib_free_table(struct fib_table *tb)
 	kfree(tb);
 }
 
-static int fn_trie_dump_fa(t_key key, int slen, struct hlist_head *fah,
+static int fn_trie_dump_fa(t_key key, struct hlist_head *fah,
 			   struct fib_table *tb,
 			   struct sk_buff *skb, struct netlink_callback *cb)
 {
@@ -1760,7 +1759,7 @@ static int fn_trie_dump_fa(t_key key, int slen, struct hlist_head *fah,
 				  tb->tb_id,
 				  fa->fa_type,
 				  xkey,
-				  KEYLENGTH - slen,
+				  KEYLENGTH - fa->fa_slen,
 				  fa->fa_tos,
 				  fa->fa_info, NLM_F_MULTI) < 0) {
 			cb->args[5] = i;
@@ -1794,7 +1793,7 @@ static int fn_trie_dump_leaf(struct tnode *l, struct fib_table *tb,
 		if (hlist_empty(&li->falh))
 			continue;
 
-		if (fn_trie_dump_fa(l->key, li->slen, &li->falh, tb, skb, cb) < 0) {
+		if (fn_trie_dump_fa(l->key, &li->falh, tb, skb, cb) < 0) {
 			cb->args[4] = i;
 			return -1;
 		}
@@ -2281,7 +2280,7 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v)
 
 				seq_indent(seq, iter->depth+1);
 				seq_printf(seq, "  /%zu %s %s",
-					   KEYLENGTH - li->slen,
+					   KEYLENGTH - fa->fa_slen,
 					   rtn_scope(buf1, sizeof(buf1),
 						     fa->fa_info->fib_scope),
 					   rtn_type(buf2, sizeof(buf2),
@@ -2419,6 +2418,7 @@ static int fib_route_seq_show(struct seq_file *seq, void *v)
 {
 	struct tnode *l = v;
 	struct leaf_info *li;
+	__be32 prefix;
 
 	if (v == SEQ_START_TOKEN) {
 		seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway "
@@ -2427,15 +2427,14 @@ static int fib_route_seq_show(struct seq_file *seq, void *v)
 		return 0;
 	}
 
+	prefix = htonl(l->key);
+
 	hlist_for_each_entry_rcu(li, &l->list, hlist) {
 		struct fib_alias *fa;
-		__be32 mask, prefix;
-
-		mask = inet_make_mask(KEYLENGTH - li->slen);
-		prefix = htonl(l->key);
 
 		hlist_for_each_entry_rcu(fa, &li->falh, fa_list) {
 			const struct fib_info *fi = fa->fa_info;
+			__be32 mask = inet_make_mask(KEYLENGTH - fa->fa_slen);
 			unsigned int flags = fib_flag_trans(fa->fa_type, mask, fi);
 
 			if (fa->fa_type == RTN_BROADCAST

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [net-next PATCH 4/4] fib_trie: Remove leaf_info
  2015-02-25 19:05 [net-next PATCH 0/4] fib_trie: Remove leaf_info structure Alexander Duyck
                   ` (2 preceding siblings ...)
  2015-02-25 19:06 ` [net-next PATCH 3/4] fib_trie: Add slen to fib alias Alexander Duyck
@ 2015-02-25 19:06 ` Alexander Duyck
  2015-02-25 22:29   ` Julian Anastasov
  2015-02-27 22:06 ` [net-next PATCH 0/4] fib_trie: Remove leaf_info structure David Miller
  4 siblings, 1 reply; 14+ messages in thread
From: Alexander Duyck @ 2015-02-25 19:06 UTC (permalink / raw)
  To: netdev; +Cc: ja, davem

At this point the leaf_info hash is redundant.  By adding the suffix length
to the fib_alias hash list we no longer have need of leaf_info as we can
determine the prefix length from fa_slen.  So we can compress things by
dropping the leaf_info structure from fib_trie and instead directly connect
the leaves to the fib_alias hash list.

Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
---
 net/ipv4/fib_trie.c |  463 +++++++++++++++++----------------------------------
 1 file changed, 156 insertions(+), 307 deletions(-)

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 8c9f789..f485345 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -108,17 +108,10 @@ struct tnode {
 			struct tnode __rcu *child[0];
 		};
 		/* This list pointer if valid if bits == 0 (LEAF) */
-		struct hlist_head list;
+		struct hlist_head leaf;
 	};
 };
 
-struct leaf_info {
-	struct hlist_node hlist;
-	unsigned char slen;
-	struct hlist_head falh;
-	struct rcu_head rcu;
-};
-
 #ifdef CONFIG_IP_FIB_TRIE_STATS
 struct trie_use_stats {
 	unsigned int gets;
@@ -289,11 +282,6 @@ static void __node_free_rcu(struct rcu_head *head)
 
 #define node_free(n) call_rcu(&n->rcu, __node_free_rcu)
 
-static inline void free_leaf_info(struct leaf_info *leaf)
-{
-	kfree_rcu(leaf, rcu);
-}
-
 static struct tnode *tnode_alloc(size_t size)
 {
 	if (size <= PAGE_SIZE)
@@ -327,21 +315,11 @@ static struct tnode *leaf_new(t_key key)
 		/* set bits to 0 indicating we are not a tnode */
 		l->bits = 0;
 
-		INIT_HLIST_HEAD(&l->list);
+		INIT_HLIST_HEAD(&l->leaf);
 	}
 	return l;
 }
 
-static struct leaf_info *leaf_info_new(int plen)
-{
-	struct leaf_info *li = kmalloc(sizeof(struct leaf_info),  GFP_KERNEL);
-	if (li) {
-		li->slen = KEYLENGTH - plen;
-		INIT_HLIST_HEAD(&li->falh);
-	}
-	return li;
-}
-
 static struct tnode *tnode_new(t_key key, int pos, int bits)
 {
 	size_t sz = offsetof(struct tnode, child[1ul << bits]);
@@ -864,32 +842,6 @@ static void resize(struct trie *t, struct tnode *tn)
 	}
 }
 
-/* readside must use rcu_read_lock currently dump routines
- via get_fa_head and dump */
-
-static struct leaf_info *find_leaf_info(struct tnode *l, int plen)
-{
-	struct hlist_head *head = &l->list;
-	struct leaf_info *li;
-	int slen = KEYLENGTH - plen;
-
-	hlist_for_each_entry_rcu(li, head, hlist)
-		if (li->slen == slen)
-			return li;
-
-	return NULL;
-}
-
-static inline struct hlist_head *get_fa_head(struct tnode *l, int plen)
-{
-	struct leaf_info *li = find_leaf_info(l, plen);
-
-	if (!li)
-		return NULL;
-
-	return &li->falh;
-}
-
 static void leaf_pull_suffix(struct tnode *l)
 {
 	struct tnode *tp = node_parent(l);
@@ -914,43 +866,47 @@ static void leaf_push_suffix(struct tnode *l)
 	}
 }
 
-static void remove_leaf_info(struct tnode *l, struct leaf_info *old)
+static void fib_remove_alias(struct tnode *l, struct fib_alias *old)
 {
 	/* record the location of the previous list_info entry */
-	struct hlist_node **pprev = old->hlist.pprev;
-	struct leaf_info *li = hlist_entry(pprev, typeof(*li), hlist.next);
+	struct hlist_node **pprev = old->fa_list.pprev;
+	struct fib_alias *fa = hlist_entry(pprev, typeof(*fa), fa_list.next);
 
-	/* remove the leaf info from the list */
-	hlist_del_rcu(&old->hlist);
+	/* remove the fib_alias from the list */
+	hlist_del_rcu(&old->fa_list);
 
-	/* only access li if it is pointing at the last valid hlist_node */
-	if (hlist_empty(&l->list) || (*pprev))
+	/* only access fa if it is pointing at the last valid hlist_node */
+	if (hlist_empty(&l->leaf) || (*pprev))
 		return;
 
 	/* update the trie with the latest suffix length */
-	l->slen = li->slen;
+	l->slen = fa->fa_slen;
 	leaf_pull_suffix(l);
 }
 
-static void insert_leaf_info(struct tnode *l, struct leaf_info *new)
+static void fib_insert_alias(struct tnode *l, struct fib_alias *fa,
+			     struct fib_alias *new)
 {
-	struct hlist_head *head = &l->list;
-	struct leaf_info *li, *last = NULL;
+	if (fa) {
+		hlist_add_before_rcu(&new->fa_list, &fa->fa_list);
+	} else {
+		struct fib_alias *last;
 
-	hlist_for_each_entry(li, head, hlist) {
-		if (new->slen < li->slen)
-			break;
-		last = li;
-	}
+		hlist_for_each_entry(last, &l->leaf, fa_list) {
+			if (new->fa_slen < last->fa_slen)
+				break;
+			fa = last;
+		}
 
-	if (last)
-		hlist_add_behind_rcu(&new->hlist, &last->hlist);
-	else
-		hlist_add_head_rcu(&new->hlist, head);
+		if (fa)
+			hlist_add_behind_rcu(&new->fa_list, &fa->fa_list);
+		else
+			hlist_add_head_rcu(&new->fa_list, &l->leaf);
+	}
 
 	/* if we added to the tail node then we need to update slen */
-	if (l->slen < new->slen) {
-		l->slen = new->slen;
+	if (l->slen < new->fa_slen) {
+		l->slen = new->fa_slen;
 		leaf_push_suffix(l);
 	}
 }
@@ -989,8 +945,8 @@ static struct tnode *fib_find_node(struct trie *t, u32 key)
 /* Return the first fib alias matching TOS with
  * priority less than or equal to PRIO.
  */
-static struct fib_alias *fib_find_alias(struct hlist_head *fah, u8 tos,
-					u32 prio)
+static struct fib_alias *fib_find_alias(struct hlist_head *fah, u8 slen,
+					u8 tos, u32 prio)
 {
 	struct fib_alias *fa;
 
@@ -998,6 +954,10 @@ static struct fib_alias *fib_find_alias(struct hlist_head *fah, u8 tos,
 		return NULL;
 
 	hlist_for_each_entry(fa, fah, fa_list) {
+		if (fa->fa_slen < slen)
+			continue;
+		if (fa->fa_slen != slen)
+			break;
 		if (fa->fa_tos > tos)
 			continue;
 		if (fa->fa_info->fib_priority >= prio || fa->fa_tos < tos)
@@ -1023,16 +983,9 @@ static void trie_rebalance(struct trie *t, struct tnode *tn)
 
 /* only used from updater-side */
 
-static struct hlist_head *fib_insert_node(struct trie *t, u32 key, int plen)
+static struct tnode *fib_insert_node(struct trie *t, u32 key, int plen)
 {
-	struct hlist_head *fa_head = NULL;
 	struct tnode *l, *n, *tp = NULL;
-	struct leaf_info *li;
-
-	li = leaf_info_new(plen);
-	if (!li)
-		return NULL;
-	fa_head = &li->falh;
 
 	n = rtnl_dereference(t->trie);
 
@@ -1063,8 +1016,7 @@ static struct hlist_head *fib_insert_node(struct trie *t, u32 key, int plen)
 		/* we have found a leaf. Prefixes have already been compared */
 		if (IS_LEAF(n)) {
 			/* Case 1: n is a leaf, and prefixes match*/
-			insert_leaf_info(n, li);
-			return fa_head;
+			return n;
 		}
 
 		tp = n;
@@ -1072,12 +1024,8 @@ static struct hlist_head *fib_insert_node(struct trie *t, u32 key, int plen)
 	}
 
 	l = leaf_new(key);
-	if (!l) {
-		free_leaf_info(li);
+	if (!l)
 		return NULL;
-	}
-
-	insert_leaf_info(l, li);
 
 	/* Case 2: n is a LEAF or a TNODE and the key doesn't match.
 	 *
@@ -1090,7 +1038,6 @@ static struct hlist_head *fib_insert_node(struct trie *t, u32 key, int plen)
 
 		tn = tnode_new(key, __fls(key ^ n->key), 1);
 		if (!tn) {
-			free_leaf_info(li);
 			node_free(l);
 			return NULL;
 		}
@@ -1116,7 +1063,7 @@ static struct hlist_head *fib_insert_node(struct trie *t, u32 key, int plen)
 		rcu_assign_pointer(t->trie, l);
 	}
 
-	return fa_head;
+	return l;
 }
 
 /*
@@ -1126,9 +1073,9 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
 {
 	struct trie *t = (struct trie *) tb->tb_data;
 	struct fib_alias *fa, *new_fa;
-	struct hlist_head *fa_head = NULL;
 	struct fib_info *fi;
-	int plen = cfg->fc_dst_len;
+	u8 plen = cfg->fc_dst_len;
+	u8 slen = KEYLENGTH - plen;
 	u8 tos = cfg->fc_tos;
 	u32 key, mask;
 	int err;
@@ -1146,8 +1093,6 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
 	if (key & ~mask)
 		return -EINVAL;
 
-	key = key & mask;
-
 	fi = fib_create_info(cfg);
 	if (IS_ERR(fi)) {
 		err = PTR_ERR(fi);
@@ -1155,12 +1100,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
 	}
 
 	l = fib_find_node(t, key);
-	fa = NULL;
-
-	if (l) {
-		fa_head = get_fa_head(l, plen);
-		fa = fib_find_alias(fa_head, tos, fi->fib_priority);
-	}
+	fa = l ? fib_find_alias(&l->leaf, slen, tos, fi->fib_priority) : NULL;
 
 	/* Now fa, if non-NULL, points to the first fib alias
 	 * with the same keys [prefix,tos,priority], if such key already
@@ -1187,7 +1127,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
 		fa_match = NULL;
 		fa_first = fa;
 		hlist_for_each_entry_from(fa, fa_list) {
-			if (fa->fa_tos != tos)
+			if ((fa->fa_slen != slen) || (fa->fa_tos != tos))
 				break;
 			if (fa->fa_info->fib_priority != fi->fib_priority)
 				break;
@@ -1255,12 +1195,12 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
 	new_fa->fa_tos = tos;
 	new_fa->fa_type = cfg->fc_type;
 	new_fa->fa_state = 0;
-	new_fa->fa_slen = KEYLENGTH - plen;
+	new_fa->fa_slen = slen;
 
 	/* Insert new entry to the list. */
-	if (!fa_head) {
-		fa_head = fib_insert_node(t, key, plen);
-		if (unlikely(!fa_head)) {
+	if (!l) {
+		l = fib_insert_node(t, key, plen);
+		if (unlikely(!l)) {
 			err = -ENOMEM;
 			goto out_free_new_fa;
 		}
@@ -1269,19 +1209,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
 	if (!plen)
 		tb->tb_num_default++;
 
-	if (fa) {
-		hlist_add_before_rcu(&new_fa->fa_list, &fa->fa_list);
-	} else {
-		struct fib_alias *last;
-
-		hlist_for_each_entry(last, fa_head, fa_list)
-			fa = last;
-
-		if (fa)
-			hlist_add_behind_rcu(&new_fa->fa_list, &fa->fa_list);
-		else
-			hlist_add_head_rcu(&new_fa->fa_list, fa_head);
-	}
+	fib_insert_alias(l, fa, new_fa);
 
 	rt_cache_flush(cfg->fc_nlinfo.nl_net);
 	rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id,
@@ -1314,7 +1242,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
 #endif
 	const t_key key = ntohl(flp->daddr);
 	struct tnode *n, *pn;
-	struct leaf_info *li;
+	struct fib_alias *fa;
 	t_key cindex;
 
 	n = rcu_dereference(t->trie);
@@ -1417,56 +1345,51 @@ backtrace:
 
 found:
 	/* Step 3: Process the leaf, if that fails fall back to backtracing */
-	hlist_for_each_entry_rcu(li, &n->list, hlist) {
-		struct fib_alias *fa;
-
-		hlist_for_each_entry_rcu(fa, &li->falh, fa_list) {
-			struct fib_info *fi = fa->fa_info;
-			int nhsel, err;
+	hlist_for_each_entry_rcu(fa, &n->leaf, fa_list) {
+		struct fib_info *fi = fa->fa_info;
+		int nhsel, err;
 
-			if (((key ^ n->key) >> fa->fa_slen) &&
-			    ((BITS_PER_LONG > KEYLENGTH) ||
-			     (fa->fa_slen != KEYLENGTH)))
-				continue;
-			if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos)
-				continue;
-			if (fi->fib_dead)
+		if (((key ^ n->key) >= (1ul << fa->fa_slen)) &&
+		    ((BITS_PER_LONG > KEYLENGTH) || (fa->fa_slen != KEYLENGTH)))
 				continue;
-			if (fa->fa_info->fib_scope < flp->flowi4_scope)
-				continue;
-			fib_alias_accessed(fa);
-			err = fib_props[fa->fa_type].error;
-			if (unlikely(err < 0)) {
+		if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos)
+			continue;
+		if (fi->fib_dead)
+			continue;
+		if (fa->fa_info->fib_scope < flp->flowi4_scope)
+			continue;
+		fib_alias_accessed(fa);
+		err = fib_props[fa->fa_type].error;
+		if (unlikely(err < 0)) {
 #ifdef CONFIG_IP_FIB_TRIE_STATS
-				this_cpu_inc(stats->semantic_match_passed);
+			this_cpu_inc(stats->semantic_match_passed);
 #endif
-				return err;
-			}
-			if (fi->fib_flags & RTNH_F_DEAD)
+			return err;
+		}
+		if (fi->fib_flags & RTNH_F_DEAD)
+			continue;
+		for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) {
+			const struct fib_nh *nh = &fi->fib_nh[nhsel];
+
+			if (nh->nh_flags & RTNH_F_DEAD)
+				continue;
+			if (flp->flowi4_oif && flp->flowi4_oif != nh->nh_oif)
 				continue;
-			for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) {
-				const struct fib_nh *nh = &fi->fib_nh[nhsel];
-
-				if (nh->nh_flags & RTNH_F_DEAD)
-					continue;
-				if (flp->flowi4_oif && flp->flowi4_oif != nh->nh_oif)
-					continue;
-
-				if (!(fib_flags & FIB_LOOKUP_NOREF))
-					atomic_inc(&fi->fib_clntref);
-
-				res->prefixlen = KEYLENGTH - fa->fa_slen;
-				res->nh_sel = nhsel;
-				res->type = fa->fa_type;
-				res->scope = fi->fib_scope;
-				res->fi = fi;
-				res->table = tb;
-				res->fa_head = &li->falh;
+
+			if (!(fib_flags & FIB_LOOKUP_NOREF))
+				atomic_inc(&fi->fib_clntref);
+
+			res->prefixlen = KEYLENGTH - fa->fa_slen;
+			res->nh_sel = nhsel;
+			res->type = fa->fa_type;
+			res->scope = fi->fib_scope;
+			res->fi = fi;
+			res->table = tb;
+			res->fa_head = &n->leaf;
 #ifdef CONFIG_IP_FIB_TRIE_STATS
-				this_cpu_inc(stats->semantic_match_passed);
+			this_cpu_inc(stats->semantic_match_passed);
 #endif
-				return err;
-			}
+			return err;
 		}
 	}
 #ifdef CONFIG_IP_FIB_TRIE_STATS
@@ -1501,15 +1424,14 @@ static void trie_leaf_remove(struct trie *t, struct tnode *l)
 int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
 {
 	struct trie *t = (struct trie *) tb->tb_data;
-	u32 key, mask;
-	int plen = cfg->fc_dst_len;
-	u8 tos = cfg->fc_tos;
 	struct fib_alias *fa, *fa_to_delete;
-	struct hlist_head *fa_head;
+	u8 plen = cfg->fc_dst_len;
+	u8 tos = cfg->fc_tos;
+	u8 slen = KEYLENGTH - plen;
 	struct tnode *l;
-	struct leaf_info *li;
+	u32 key, mask;
 
-	if (plen > 32)
+	if (plen > KEYLENGTH)
 		return -EINVAL;
 
 	key = ntohl(cfg->fc_dst);
@@ -1518,19 +1440,11 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
 	if (key & ~mask)
 		return -EINVAL;
 
-	key = key & mask;
 	l = fib_find_node(t, key);
-
 	if (!l)
 		return -ESRCH;
 
-	li = find_leaf_info(l, plen);
-
-	if (!li)
-		return -ESRCH;
-
-	fa_head = &li->falh;
-	fa = fib_find_alias(fa_head, tos, 0);
+	fa = fib_find_alias(&l->leaf, slen, tos, 0);
 
 	if (!fa)
 		return -ESRCH;
@@ -1541,7 +1455,7 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
 	hlist_for_each_entry_from(fa, fa_list) {
 		struct fib_info *fi = fa->fa_info;
 
-		if (fa->fa_tos != tos)
+		if ((fa->fa_slen != slen) || (fa->fa_tos != tos))
 			break;
 
 		if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) &&
@@ -1564,17 +1478,12 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
 	rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id,
 		  &cfg->fc_nlinfo, 0);
 
-	hlist_del_rcu(&fa->fa_list);
+	fib_remove_alias(l, fa);
 
 	if (!plen)
 		tb->tb_num_default--;
 
-	if (hlist_empty(fa_head)) {
-		remove_leaf_info(l, li);
-		free_leaf_info(li);
-	}
-
-	if (hlist_empty(&l->list))
+	if (hlist_empty(&l->leaf))
 		trie_leaf_remove(t, l);
 
 	if (fa->fa_state & FA_S_ACCESSED)
@@ -1585,13 +1494,14 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
 	return 0;
 }
 
-static int trie_flush_list(struct hlist_head *head)
+static int trie_flush_leaf(struct tnode *l)
 {
 	struct hlist_node *tmp;
+	unsigned char slen = 0;
 	struct fib_alias *fa;
 	int found = 0;
 
-	hlist_for_each_entry_safe(fa, tmp, head, fa_list) {
+	hlist_for_each_entry_safe(fa, tmp, &l->leaf, fa_list) {
 		struct fib_info *fi = fa->fa_info;
 
 		if (fi && (fi->fib_flags & RTNH_F_DEAD)) {
@@ -1599,28 +1509,11 @@ static int trie_flush_list(struct hlist_head *head)
 			fib_release_info(fa->fa_info);
 			alias_free_mem_rcu(fa);
 			found++;
-		}
-	}
-	return found;
-}
-
-static int trie_flush_leaf(struct tnode *l)
-{
-	int found = 0;
-	struct hlist_node *tmp;
-	struct leaf_info *li;
-	unsigned char slen = 0;
-
-	hlist_for_each_entry_safe(li, tmp, &l->list, hlist) {
-		found += trie_flush_list(&li->falh);
 
-		if (hlist_empty(&li->falh)) {
-			hlist_del_rcu(&li->hlist);
-			free_leaf_info(li);
 			continue;
 		}
 
-		slen = li->slen;
+		slen = fa->fa_slen;
 	}
 
 	l->slen = slen;
@@ -1628,8 +1521,7 @@ static int trie_flush_leaf(struct tnode *l)
 	return found;
 }
 
-/*
- * Scan for the next right leaf starting at node p->child[idx]
+/* Scan for the next right leaf starting at node p->child[idx]
  * Since we have back pointer, no recursion necessary.
  */
 static struct tnode *leaf_walk_rcu(struct tnode *p, struct tnode *c)
@@ -1704,7 +1596,7 @@ int fib_table_flush(struct fib_table *tb)
 		found += trie_flush_leaf(l);
 
 		if (ll) {
-			if (hlist_empty(&ll->list))
+			if (hlist_empty(&ll->leaf))
 				trie_leaf_remove(t, ll);
 			else
 				leaf_pull_suffix(ll);
@@ -1714,7 +1606,7 @@ int fib_table_flush(struct fib_table *tb)
 	}
 
 	if (ll) {
-		if (hlist_empty(&ll->list))
+		if (hlist_empty(&ll->leaf))
 			trie_leaf_remove(t, ll);
 		else
 			leaf_pull_suffix(ll);
@@ -1734,20 +1626,18 @@ void fib_free_table(struct fib_table *tb)
 	kfree(tb);
 }
 
-static int fn_trie_dump_fa(t_key key, struct hlist_head *fah,
-			   struct fib_table *tb,
-			   struct sk_buff *skb, struct netlink_callback *cb)
+static int fn_trie_dump_leaf(struct tnode *l, struct fib_table *tb,
+			     struct sk_buff *skb, struct netlink_callback *cb)
 {
-	int i, s_i;
+	__be32 xkey = htonl(l->key);
 	struct fib_alias *fa;
-	__be32 xkey = htonl(key);
+	int i, s_i;
 
-	s_i = cb->args[5];
+	s_i = cb->args[4];
 	i = 0;
 
 	/* rcu_read_lock is hold by caller */
-
-	hlist_for_each_entry_rcu(fa, fah, fa_list) {
+	hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
 		if (i < s_i) {
 			i++;
 			continue;
@@ -1762,38 +1652,6 @@ static int fn_trie_dump_fa(t_key key, struct hlist_head *fah,
 				  KEYLENGTH - fa->fa_slen,
 				  fa->fa_tos,
 				  fa->fa_info, NLM_F_MULTI) < 0) {
-			cb->args[5] = i;
-			return -1;
-		}
-		i++;
-	}
-	cb->args[5] = i;
-	return skb->len;
-}
-
-static int fn_trie_dump_leaf(struct tnode *l, struct fib_table *tb,
-			struct sk_buff *skb, struct netlink_callback *cb)
-{
-	struct leaf_info *li;
-	int i, s_i;
-
-	s_i = cb->args[4];
-	i = 0;
-
-	/* rcu_read_lock is hold by caller */
-	hlist_for_each_entry_rcu(li, &l->list, hlist) {
-		if (i < s_i) {
-			i++;
-			continue;
-		}
-
-		if (i > s_i)
-			cb->args[5] = 0;
-
-		if (hlist_empty(&li->falh))
-			continue;
-
-		if (fn_trie_dump_fa(l->key, &li->falh, tb, skb, cb) < 0) {
 			cb->args[4] = i;
 			return -1;
 		}
@@ -1853,8 +1711,7 @@ void __init fib_trie_init(void)
 					  0, SLAB_PANIC, NULL);
 
 	trie_leaf_kmem = kmem_cache_create("ip_fib_trie",
-					   max(sizeof(struct tnode),
-					       sizeof(struct leaf_info)),
+					   sizeof(struct tnode),
 					   0, SLAB_PANIC, NULL);
 }
 
@@ -1976,14 +1833,14 @@ static void trie_collect_stats(struct trie *t, struct trie_stat *s)
 	rcu_read_lock();
 	for (n = fib_trie_get_first(&iter, t); n; n = fib_trie_get_next(&iter)) {
 		if (IS_LEAF(n)) {
-			struct leaf_info *li;
+			struct fib_alias *fa;
 
 			s->leaves++;
 			s->totdepth += iter.depth;
 			if (iter.depth > s->maxdepth)
 				s->maxdepth = iter.depth;
 
-			hlist_for_each_entry_rcu(li, &n->list, hlist)
+			hlist_for_each_entry_rcu(fa, &n->leaf, fa_list)
 				++s->prefixes;
 		} else {
 			s->tnodes++;
@@ -2015,7 +1872,7 @@ static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat)
 	bytes = sizeof(struct tnode) * stat->leaves;
 
 	seq_printf(seq, "\tPrefixes:       %u\n", stat->prefixes);
-	bytes += sizeof(struct leaf_info) * stat->prefixes;
+	bytes += sizeof(struct fib_alias) * stat->prefixes;
 
 	seq_printf(seq, "\tInternal nodes: %u\n\t", stat->tnodes);
 	bytes += sizeof(struct tnode) * stat->tnodes;
@@ -2266,29 +2123,25 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v)
 			   &prf, KEYLENGTH - n->pos - n->bits, n->bits,
 			   n->full_children, n->empty_children);
 	} else {
-		struct leaf_info *li;
 		__be32 val = htonl(n->key);
+		struct fib_alias *fa;
 
 		seq_indent(seq, iter->depth);
 		seq_printf(seq, "  |-- %pI4\n", &val);
 
-		hlist_for_each_entry_rcu(li, &n->list, hlist) {
-			struct fib_alias *fa;
-
-			hlist_for_each_entry_rcu(fa, &li->falh, fa_list) {
-				char buf1[32], buf2[32];
-
-				seq_indent(seq, iter->depth+1);
-				seq_printf(seq, "  /%zu %s %s",
-					   KEYLENGTH - fa->fa_slen,
-					   rtn_scope(buf1, sizeof(buf1),
-						     fa->fa_info->fib_scope),
-					   rtn_type(buf2, sizeof(buf2),
-						    fa->fa_type));
-				if (fa->fa_tos)
-					seq_printf(seq, " tos=%d", fa->fa_tos);
-				seq_putc(seq, '\n');
-			}
+		hlist_for_each_entry_rcu(fa, &n->leaf, fa_list) {
+			char buf1[32], buf2[32];
+
+			seq_indent(seq, iter->depth + 1);
+			seq_printf(seq, "  /%zu %s %s",
+				   KEYLENGTH - fa->fa_slen,
+				   rtn_scope(buf1, sizeof(buf1),
+					     fa->fa_info->fib_scope),
+				   rtn_type(buf2, sizeof(buf2),
+					    fa->fa_type));
+			if (fa->fa_tos)
+				seq_printf(seq, " tos=%d", fa->fa_tos);
+			seq_putc(seq, '\n');
 		}
 	}
 
@@ -2416,8 +2269,8 @@ static unsigned int fib_flag_trans(int type, __be32 mask, const struct fib_info
  */
 static int fib_route_seq_show(struct seq_file *seq, void *v)
 {
+	struct fib_alias *fa;
 	struct tnode *l = v;
-	struct leaf_info *li;
 	__be32 prefix;
 
 	if (v == SEQ_START_TOKEN) {
@@ -2429,42 +2282,38 @@ static int fib_route_seq_show(struct seq_file *seq, void *v)
 
 	prefix = htonl(l->key);
 
-	hlist_for_each_entry_rcu(li, &l->list, hlist) {
-		struct fib_alias *fa;
+	hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
+		const struct fib_info *fi = fa->fa_info;
+		__be32 mask = inet_make_mask(KEYLENGTH - fa->fa_slen);
+		unsigned int flags = fib_flag_trans(fa->fa_type, mask, fi);
 
-		hlist_for_each_entry_rcu(fa, &li->falh, fa_list) {
-			const struct fib_info *fi = fa->fa_info;
-			__be32 mask = inet_make_mask(KEYLENGTH - fa->fa_slen);
-			unsigned int flags = fib_flag_trans(fa->fa_type, mask, fi);
-
-			if (fa->fa_type == RTN_BROADCAST
-			    || fa->fa_type == RTN_MULTICAST)
-				continue;
+		if ((fa->fa_type == RTN_BROADCAST) ||
+		    (fa->fa_type == RTN_MULTICAST))
+			continue;
 
-			seq_setwidth(seq, 127);
-
-			if (fi)
-				seq_printf(seq,
-					 "%s\t%08X\t%08X\t%04X\t%d\t%u\t"
-					 "%d\t%08X\t%d\t%u\t%u",
-					 fi->fib_dev ? fi->fib_dev->name : "*",
-					 prefix,
-					 fi->fib_nh->nh_gw, flags, 0, 0,
-					 fi->fib_priority,
-					 mask,
-					 (fi->fib_advmss ?
-					  fi->fib_advmss + 40 : 0),
-					 fi->fib_window,
-					 fi->fib_rtt >> 3);
-			else
-				seq_printf(seq,
-					 "*\t%08X\t%08X\t%04X\t%d\t%u\t"
-					 "%d\t%08X\t%d\t%u\t%u",
-					 prefix, 0, flags, 0, 0, 0,
-					 mask, 0, 0, 0);
+		seq_setwidth(seq, 127);
+
+		if (fi)
+			seq_printf(seq,
+				   "%s\t%08X\t%08X\t%04X\t%d\t%u\t"
+				   "%d\t%08X\t%d\t%u\t%u",
+				   fi->fib_dev ? fi->fib_dev->name : "*",
+				   prefix,
+				   fi->fib_nh->nh_gw, flags, 0, 0,
+				   fi->fib_priority,
+				   mask,
+				   (fi->fib_advmss ?
+				    fi->fib_advmss + 40 : 0),
+				   fi->fib_window,
+				   fi->fib_rtt >> 3);
+		else
+			seq_printf(seq,
+				   "*\t%08X\t%08X\t%04X\t%d\t%u\t"
+				   "%d\t%08X\t%d\t%u\t%u",
+				   prefix, 0, flags, 0, 0, 0,
+				   mask, 0, 0, 0);
 
-			seq_pad(seq, '\n');
-		}
+		seq_pad(seq, '\n');
 	}
 
 	return 0;

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [net-next PATCH 2/4] fib_trie: Replace plen with slen in leaf_info
  2015-02-25 19:06 ` [net-next PATCH 2/4] fib_trie: Replace plen with slen in leaf_info Alexander Duyck
@ 2015-02-25 21:15   ` Julian Anastasov
  2015-02-25 21:30     ` Alexander Duyck
  0 siblings, 1 reply; 14+ messages in thread
From: Julian Anastasov @ 2015-02-25 21:15 UTC (permalink / raw)
  To: Alexander Duyck; +Cc: netdev, davem


	Hello,

On Wed, 25 Feb 2015, Alexander Duyck wrote:

>  	hlist_for_each_entry_rcu(li, &n->list, hlist) {
>  		struct fib_alias *fa;
>  
> -		if ((key ^ n->key) & li->mask_plen)
> +		if (((key ^ n->key) >> li->slen) &&
> +		    ((BITS_PER_LONG > KEYLENGTH) || (li->slen != KEYLENGTH)))
>  			continue;

	The '(BITS_PER_LONG > KEYLENGTH) ||' part is not
needed because on 64-bit processor we can still use
32-bit register (due to 32-bit t_key type) and to get
undefined (!0) result from rshift with 32. We do not want
to continue in this case. Is it really working on 64-bit for
0.0.0.0/0 ?

Regards

--
Julian Anastasov <ja@ssi.bg>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [net-next PATCH 2/4] fib_trie: Replace plen with slen in leaf_info
  2015-02-25 21:15   ` Julian Anastasov
@ 2015-02-25 21:30     ` Alexander Duyck
  2015-02-25 22:43       ` Julian Anastasov
  0 siblings, 1 reply; 14+ messages in thread
From: Alexander Duyck @ 2015-02-25 21:30 UTC (permalink / raw)
  To: Julian Anastasov; +Cc: netdev, davem


On 02/25/2015 01:15 PM, Julian Anastasov wrote:
> 	Hello,
>
> On Wed, 25 Feb 2015, Alexander Duyck wrote:
>
>>   	hlist_for_each_entry_rcu(li, &n->list, hlist) {
>>   		struct fib_alias *fa;
>>   
>> -		if ((key ^ n->key) & li->mask_plen)
>> +		if (((key ^ n->key) >> li->slen) &&
>> +		    ((BITS_PER_LONG > KEYLENGTH) || (li->slen != KEYLENGTH)))
>>   			continue;
> 	The '(BITS_PER_LONG > KEYLENGTH) ||' part is not
> needed because on 64-bit processor we can still use
> 32-bit register (due to 32-bit t_key type) and to get
> undefined (!0) result from rshift with 32. We do not want
> to continue in this case. Is it really working on 64-bit for
> 0.0.0.0/0 ?
>
> Regards
>
> --
> Julian Anastasov <ja@ssi.bg>

It is working but that may be due to the fact that gcc decided to place 
the key in a 64b register.

The last patch in the series probably does a better job of addressing 
your concern.  It replaces the shift with a comparison to (1ul << 
fa->fa_slen).  In that case I believe the BITS_PER_LONG check would then 
be appropriate would it not?

- Alex

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [net-next PATCH 4/4] fib_trie: Remove leaf_info
  2015-02-25 19:06 ` [net-next PATCH 4/4] fib_trie: Remove leaf_info Alexander Duyck
@ 2015-02-25 22:29   ` Julian Anastasov
  2015-02-25 23:09     ` Alexander Duyck
  0 siblings, 1 reply; 14+ messages in thread
From: Julian Anastasov @ 2015-02-25 22:29 UTC (permalink / raw)
  To: Alexander Duyck; +Cc: netdev, David S. Miller


	Hello,

On Wed, 25 Feb 2015, Alexander Duyck wrote:

> -static void insert_leaf_info(struct tnode *l, struct leaf_info *new)
> +static void fib_insert_alias(struct tnode *l, struct fib_alias *fa,
> +			     struct fib_alias *new)
>  {
> -	struct hlist_head *head = &l->list;
> -	struct leaf_info *li, *last = NULL;
> +	if (fa) {
> +		hlist_add_before_rcu(&new->fa_list, &fa->fa_list);
> +	} else {
> +		struct fib_alias *last;
>  
> -	hlist_for_each_entry(li, head, hlist) {
> -		if (new->slen < li->slen)
> -			break;
> -		last = li;
> -	}
> +		hlist_for_each_entry(last, &l->leaf, fa_list) {
> +			if (new->fa_slen < last->fa_slen)
> +				break;

	If there is some fa in list with higher fa_slen
fib_find_alias will always stop the loop and come with
fa != NULL, so above 'if...break' is not needed, we are
always going to add at tail when fa is NULL.

> +			fa = last;
> +		}
>  
> -	if (last)
> -		hlist_add_behind_rcu(&new->hlist, &last->hlist);
> -	else
> -		hlist_add_head_rcu(&new->hlist, head);
> +		if (fa)
> +			hlist_add_behind_rcu(&new->fa_list, &fa->fa_list);
> +		else
> +			hlist_add_head_rcu(&new->fa_list, &l->leaf);
> +	}
>  

> @@ -1187,7 +1127,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
>  		fa_match = NULL;
>  		fa_first = fa;
>  		hlist_for_each_entry_from(fa, fa_list) {
> -			if (fa->fa_tos != tos)
> +			if ((fa->fa_slen != slen) || (fa->fa_tos != tos))

	'fa->fa_slen == slen' check is also needed in the
above 'if' that is not present in the patch:

if (fa && fa->fa_slen == slen && fa->fa_tos == tos &&
    fa->fa_info->fib_priority == fi->fib_priority) {

	Without such check we may wrongly enter the 'if'
when fa->fa_slen > slen and to get some error, to
replace the wrong fa or to append after it.

> +	hlist_for_each_entry_rcu(fa, &n->leaf, fa_list) {
> +		struct fib_info *fi = fa->fa_info;
> +		int nhsel, err;
>  
> -			if (((key ^ n->key) >> fa->fa_slen) &&
> -			    ((BITS_PER_LONG > KEYLENGTH) ||
> -			     (fa->fa_slen != KEYLENGTH)))
> -				continue;
> -			if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos)
> -				continue;
> -			if (fi->fib_dead)
> +		if (((key ^ n->key) >= (1ul << fa->fa_slen)) &&
> +		    ((BITS_PER_LONG > KEYLENGTH) || (fa->fa_slen != KEYLENGTH)))
>  				continue;

	lshift 32 for 32-bit int has the same side effects, so
BITS_PER_LONG is not needed.

	Both left and right shift get same result on 64-bit:

# ./a 32
7
7

#include <stdio.h>
#include <stdlib.h>

int main (int argc, char *argv[])
{
        unsigned int a = 7;

        printf("%u\n", a >> atoi(argv[1]));
        printf("%u\n", a << atoi(argv[1]));
        return 0;
}

Regards

--
Julian Anastasov <ja@ssi.bg>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [net-next PATCH 2/4] fib_trie: Replace plen with slen in leaf_info
  2015-02-25 21:30     ` Alexander Duyck
@ 2015-02-25 22:43       ` Julian Anastasov
  2015-02-25 22:57         ` Alexander Duyck
  0 siblings, 1 reply; 14+ messages in thread
From: Julian Anastasov @ 2015-02-25 22:43 UTC (permalink / raw)
  To: Alexander Duyck; +Cc: netdev, davem


	Hello,

On Wed, 25 Feb 2015, Alexander Duyck wrote:

> > > +		if (((key ^ n->key) >> li->slen) &&
> > > +		    ((BITS_PER_LONG > KEYLENGTH) || (li->slen != KEYLENGTH)))
> > >      continue;
> > 	The '(BITS_PER_LONG > KEYLENGTH) ||' part is not
> > needed because on 64-bit processor we can still use
> > 32-bit register (due to 32-bit t_key type) and to get
> > undefined (!0) result from rshift with 32. We do not want
> > to continue in this case. Is it really working on 64-bit for
> > 0.0.0.0/0 ?
> 
> It is working but that may be due to the fact that gcc decided to place the
> key in a 64b register.
> 
> The last patch in the series probably does a better job of addressing your
> concern.  It replaces the shift with a comparison to (1ul << fa->fa_slen).  In
> that case I believe the BITS_PER_LONG check would then be appropriate would it
> not?

	I guess, it expands value to 64 bits due to the
"l" letter, try with "1U << fa->fa_slen" and BITS_PER_LONG
will not be needed. Or more correctly ((t_key)1 << fa->fa_slen).

Regards

--
Julian Anastasov <ja@ssi.bg>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [net-next PATCH 2/4] fib_trie: Replace plen with slen in leaf_info
  2015-02-25 22:43       ` Julian Anastasov
@ 2015-02-25 22:57         ` Alexander Duyck
  0 siblings, 0 replies; 14+ messages in thread
From: Alexander Duyck @ 2015-02-25 22:57 UTC (permalink / raw)
  To: Julian Anastasov; +Cc: netdev, davem


On 02/25/2015 02:43 PM, Julian Anastasov wrote:
> 	Hello,
>
> On Wed, 25 Feb 2015, Alexander Duyck wrote:
>
>>>> +		if (((key ^ n->key) >> li->slen) &&
>>>> +		    ((BITS_PER_LONG > KEYLENGTH) || (li->slen != KEYLENGTH)))
>>>>       continue;
>>> 	The '(BITS_PER_LONG > KEYLENGTH) ||' part is not
>>> needed because on 64-bit processor we can still use
>>> 32-bit register (due to 32-bit t_key type) and to get
>>> undefined (!0) result from rshift with 32. We do not want
>>> to continue in this case. Is it really working on 64-bit for
>>> 0.0.0.0/0 ?
>> It is working but that may be due to the fact that gcc decided to place the
>> key in a 64b register.
>>
>> The last patch in the series probably does a better job of addressing your
>> concern.  It replaces the shift with a comparison to (1ul << fa->fa_slen).  In
>> that case I believe the BITS_PER_LONG check would then be appropriate would it
>> not?
> 	I guess, it expands value to 64 bits due to the
> "l" letter, try with "1U << fa->fa_slen" and BITS_PER_LONG
> will not be needed. Or more correctly ((t_key)1 << fa->fa_slen).
>
> Regards
>
> --
> Julian Anastasov <ja@ssi.bg>

I think you are kind of missing the point.  By using casting the 1 as a 
long on 64b systems we can avoid the need to check to see if fa_slen is 
equal to KEYLENGTH.  What the BITS_PER_LONG check does is allow us to 
strip the check for fa_slen == KEYLENGTH on systems that support 64b 
longs since (1ul << fa->fa_slen) will always be a defined value in that 
case so we don't need to check and see if fa->fa_slen is equal to 
KEYLENGTH or not.

- Alex

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [net-next PATCH 4/4] fib_trie: Remove leaf_info
  2015-02-25 22:29   ` Julian Anastasov
@ 2015-02-25 23:09     ` Alexander Duyck
  2015-02-26  0:06       ` Julian Anastasov
  0 siblings, 1 reply; 14+ messages in thread
From: Alexander Duyck @ 2015-02-25 23:09 UTC (permalink / raw)
  To: Julian Anastasov; +Cc: netdev, David S. Miller


On 02/25/2015 02:29 PM, Julian Anastasov wrote:
> 	Hello,
>
> On Wed, 25 Feb 2015, Alexander Duyck wrote:
>
>> -static void insert_leaf_info(struct tnode *l, struct leaf_info *new)
>> +static void fib_insert_alias(struct tnode *l, struct fib_alias *fa,
>> +			     struct fib_alias *new)
>>   {
>> -	struct hlist_head *head = &l->list;
>> -	struct leaf_info *li, *last = NULL;
>> +	if (fa) {
>> +		hlist_add_before_rcu(&new->fa_list, &fa->fa_list);
>> +	} else {
>> +		struct fib_alias *last;
>>   
>> -	hlist_for_each_entry(li, head, hlist) {
>> -		if (new->slen < li->slen)
>> -			break;
>> -		last = li;
>> -	}
>> +		hlist_for_each_entry(last, &l->leaf, fa_list) {
>> +			if (new->fa_slen < last->fa_slen)
>> +				break;
> 	If there is some fa in list with higher fa_slen
> fib_find_alias will always stop the loop and come with
> fa != NULL, so above 'if...break' is not needed, we are
> always going to add at tail when fa is NULL.

Actually fib_find_alias will return NULL in the case that there was a 
hole in which the suffix length does not exist.

So for example if we have a suffix length of 8 and one of 10 and we are 
adding a suffix length of 9 then fib_find_alias will return NULL and we 
need to walk though the list and find the hole we are supposed to drop 
the suffix in.

>
>> +			fa = last;
>> +		}
>>   
>> -	if (last)
>> -		hlist_add_behind_rcu(&new->hlist, &last->hlist);
>> -	else
>> -		hlist_add_head_rcu(&new->hlist, head);
>> +		if (fa)
>> +			hlist_add_behind_rcu(&new->fa_list, &fa->fa_list);
>> +		else
>> +			hlist_add_head_rcu(&new->fa_list, &l->leaf);
>> +	}
>>   
>> @@ -1187,7 +1127,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
>>   		fa_match = NULL;
>>   		fa_first = fa;
>>   		hlist_for_each_entry_from(fa, fa_list) {
>> -			if (fa->fa_tos != tos)
>> +			if ((fa->fa_slen != slen) || (fa->fa_tos != tos))
> 	'fa->fa_slen == slen' check is also needed in the
> above 'if' that is not present in the patch:
>
> if (fa && fa->fa_slen == slen && fa->fa_tos == tos &&
>      fa->fa_info->fib_priority == fi->fib_priority) {
>
> 	Without such check we may wrongly enter the 'if'
> when fa->fa_slen > slen and to get some error, to
> replace the wrong fa or to append after it.

Actually we don't need it because fib_find_alias will return NULL if the 
slen value doesn't match.

>> +	hlist_for_each_entry_rcu(fa, &n->leaf, fa_list) {
>> +		struct fib_info *fi = fa->fa_info;
>> +		int nhsel, err;
>>   
>> -			if (((key ^ n->key) >> fa->fa_slen) &&
>> -			    ((BITS_PER_LONG > KEYLENGTH) ||
>> -			     (fa->fa_slen != KEYLENGTH)))
>> -				continue;
>> -			if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos)
>> -				continue;
>> -			if (fi->fib_dead)
>> +		if (((key ^ n->key) >= (1ul << fa->fa_slen)) &&
>> +		    ((BITS_PER_LONG > KEYLENGTH) || (fa->fa_slen != KEYLENGTH)))
>>   				continue;
> 	lshift 32 for 32-bit int has the same side effects, so
> BITS_PER_LONG is not needed.

I'll submit a v2 just to clean up the first few patches so that they 
correctly use the 1ul w/ left shift instead of performing a right shift 
on the key value.

>
> 	Both left and right shift get same result on 64-bit:
>
> # ./a 32
> 7
> 7
>
> #include <stdio.h>
> #include <stdlib.h>
>
> int main (int argc, char *argv[])
> {
>          unsigned int a = 7;
>
>          printf("%u\n", a >> atoi(argv[1]));
>          printf("%u\n", a << atoi(argv[1]));
>          return 0;
> }
>
> Regards
>
> --
> Julian Anastasov <ja@ssi.bg>

Why are you showing me an example with a 32b int when I am using a 
long?  For x86 a 32b shift on a 32b value is undefined so we need to 
compare the suffix length to the KEYLENGTH.  For 64b a long value can be 
shifted up to 63 bits and still be a defined value.  That is why I use 
"1ul" as the value being shifted and then also perform the check for 
KEYLENGTH vs BITS_PER_LONG in order to determine if I still need the 
check for fa_slen != KEYLENGTH.

- Alex

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [net-next PATCH 4/4] fib_trie: Remove leaf_info
  2015-02-25 23:09     ` Alexander Duyck
@ 2015-02-26  0:06       ` Julian Anastasov
  2015-02-26  0:16         ` Alexander Duyck
  0 siblings, 1 reply; 14+ messages in thread
From: Julian Anastasov @ 2015-02-26  0:06 UTC (permalink / raw)
  To: Alexander Duyck; +Cc: netdev, David S. Miller


	Hello,

On Wed, 25 Feb 2015, Alexander Duyck wrote:

> > 	If there is some fa in list with higher fa_slen
> > fib_find_alias will always stop the loop and come with
> > fa != NULL, so above 'if...break' is not needed, we are
> > always going to add at tail when fa is NULL.
> 
> Actually fib_find_alias will return NULL in the case that there was a hole in
> which the suffix length does not exist.
> 
> So for example if we have a suffix length of 8 and one of 10 and we are adding
> a suffix length of 9 then fib_find_alias will return NULL and we need to walk
> though the list and find the hole we are supposed to drop the suffix in.

	I missed the fact that we return NULL instead of fa.
I thought, it would be more consistent with the old logic
to return a stop position. And we avoid walking the list
again. But in practice we should not see many entries here,
right?

> Why are you showing me an example with a 32b int when I am using a long?  For
> x86 a 32b shift on a 32b value is undefined so we need to compare the suffix
> length to the KEYLENGTH.  For 64b a long value can be shifted up to 63 bits
> and still be a defined value.  That is why I use "1ul" as the value being
> shifted and then also perform the check for KEYLENGTH vs BITS_PER_LONG in
> order to determine if I still need the check for fa_slen != KEYLENGTH.

	I see, so, on 64-bit platform we avoid the
KEYLENGTH check... OK, that is better.

Regards

--
Julian Anastasov <ja@ssi.bg>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [net-next PATCH 4/4] fib_trie: Remove leaf_info
  2015-02-26  0:06       ` Julian Anastasov
@ 2015-02-26  0:16         ` Alexander Duyck
  0 siblings, 0 replies; 14+ messages in thread
From: Alexander Duyck @ 2015-02-26  0:16 UTC (permalink / raw)
  To: Julian Anastasov, Alexander Duyck; +Cc: netdev, David S. Miller

On 02/25/2015 04:06 PM, Julian Anastasov wrote:
> 	Hello,
>
> On Wed, 25 Feb 2015, Alexander Duyck wrote:
>
>>> 	If there is some fa in list with higher fa_slen
>>> fib_find_alias will always stop the loop and come with
>>> fa != NULL, so above 'if...break' is not needed, we are
>>> always going to add at tail when fa is NULL.
>> Actually fib_find_alias will return NULL in the case that there was a hole in
>> which the suffix length does not exist.
>>
>> So for example if we have a suffix length of 8 and one of 10 and we are adding
>> a suffix length of 9 then fib_find_alias will return NULL and we need to walk
>> though the list and find the hole we are supposed to drop the suffix in.
> 	I missed the fact that we return NULL instead of fa.
> I thought, it would be more consistent with the old logic
> to return a stop position. And we avoid walking the list
> again. But in practice we should not see many entries here,
> right?

Most users should have a pretty shallow list here.  In the case of BGP
routes there might be more entries per slen but the odds of encountering
a NULL in that case should be pretty low.

>> Why are you showing me an example with a 32b int when I am using a long?  For
>> x86 a 32b shift on a 32b value is undefined so we need to compare the suffix
>> length to the KEYLENGTH.  For 64b a long value can be shifted up to 63 bits
>> and still be a defined value.  That is why I use "1ul" as the value being
>> shifted and then also perform the check for KEYLENGTH vs BITS_PER_LONG in
>> order to determine if I still need the check for fa_slen != KEYLENGTH.
> 	I see, so, on 64-bit platform we avoid the
> KEYLENGTH check... OK, that is better.
>
> Regards
>
> --
> Julian Anastasov <ja@ssi.bg>

Yes, the BIT_PER_LONG check will be broken down to either 0 or 1 by the
complier so it will be stripped out in the resulting assembler.

- Alex

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [net-next PATCH 0/4] fib_trie: Remove leaf_info structure
  2015-02-25 19:05 [net-next PATCH 0/4] fib_trie: Remove leaf_info structure Alexander Duyck
                   ` (3 preceding siblings ...)
  2015-02-25 19:06 ` [net-next PATCH 4/4] fib_trie: Remove leaf_info Alexander Duyck
@ 2015-02-27 22:06 ` David Miller
  4 siblings, 0 replies; 14+ messages in thread
From: David Miller @ 2015-02-27 22:06 UTC (permalink / raw)
  To: alexander.h.duyck; +Cc: netdev, ja

From: Alexander Duyck <alexander.h.duyck@redhat.com>
Date: Wed, 25 Feb 2015 11:05:55 -0800

> This patch set removes the leaf_info structure from the IPv4 fib_trie.  The
> general idea is that the leaf_info structure itself only held about 6
> actual bits of data, beyond that it was mostly just waste.  As such we can
> drop the structure, move the 1 byte representing the prefix/suffix length
> into the fib_alias and just link it all into one list.
> 
> My testing shows that this saves somewhere between 4 to 10ns depending on
> the type of test performed.  I'm suspecting that this represents 1 to 2 L1
> cache misses saved per look-up.
> 
> One side effect of this change is that semantic_match_miss will now only
> increment once per leaf instead of once per leaf_info miss.  However the
> stat is already skewed now that we perform a preliminary check on the leaf
> as a part of the look-up.
> 
> I also have gone through and addressed a number of ordering issues in the
> first patch since I had misread the behavior of list_add_tail.
> 
> I have since run some additional testing and verified the resulting lists
> are in the same order when combining multiple prefix length and tos values
> in a single leaf.

Series applied, thanks Alexander.

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2015-02-27 22:06 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-02-25 19:05 [net-next PATCH 0/4] fib_trie: Remove leaf_info structure Alexander Duyck
2015-02-25 19:06 ` [net-next PATCH 1/4] fib_trie: Convert fib_alias to hlist from list Alexander Duyck
2015-02-25 19:06 ` [net-next PATCH 2/4] fib_trie: Replace plen with slen in leaf_info Alexander Duyck
2015-02-25 21:15   ` Julian Anastasov
2015-02-25 21:30     ` Alexander Duyck
2015-02-25 22:43       ` Julian Anastasov
2015-02-25 22:57         ` Alexander Duyck
2015-02-25 19:06 ` [net-next PATCH 3/4] fib_trie: Add slen to fib alias Alexander Duyck
2015-02-25 19:06 ` [net-next PATCH 4/4] fib_trie: Remove leaf_info Alexander Duyck
2015-02-25 22:29   ` Julian Anastasov
2015-02-25 23:09     ` Alexander Duyck
2015-02-26  0:06       ` Julian Anastasov
2015-02-26  0:16         ` Alexander Duyck
2015-02-27 22:06 ` [net-next PATCH 0/4] fib_trie: Remove leaf_info structure David Miller

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).