From: Alexander Duyck <alexander.h.duyck@redhat.com>
To: netdev@vger.kernel.org
Subject: [RFC PATCH 09/29] fib_trie: Make fib_table rcu safe
Date: Tue, 24 Feb 2015 12:48:56 -0800 [thread overview]
Message-ID: <20150224204856.26106.60450.stgit@ahduyck-vm-fedora20> (raw)
In-Reply-To: <20150224202837.26106.87623.stgit@ahduyck-vm-fedora20>
The fib_table was wrapped in several places with an
rcu_read_lock/rcu_read_unlock however after looking over the code I found
several spots where the tables were being accessed as just standard
pointers without any protections. This change fixes that so that all of
the proper protections are in place when accessing the table to take RCU
replacement of the table into account.
Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
---
include/net/ip_fib.h | 69 +++++++++++++++++++++++++++-------------------
include/net/netns/ipv4.h | 7 +++--
net/ipv4/fib_frontend.c | 52 +++++++++++++++++++++++++----------
net/ipv4/fib_trie.c | 28 +++++++++++++------
4 files changed, 102 insertions(+), 54 deletions(-)
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index cba4b7c..8aa6f82 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -206,12 +206,16 @@ void fib_free_table(struct fib_table *tb);
static inline struct fib_table *fib_get_table(struct net *net, u32 id)
{
+ struct hlist_node *tb_hlist;
struct hlist_head *ptr;
ptr = id == RT_TABLE_LOCAL ?
&net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX] :
&net->ipv4.fib_table_hash[TABLE_MAIN_INDEX];
- return hlist_entry(ptr->first, struct fib_table, tb_hlist);
+
+ tb_hlist = rcu_dereference_rtnl(hlist_first_rcu(ptr));
+
+ return hlist_entry(tb_hlist, struct fib_table, tb_hlist);
}
static inline struct fib_table *fib_new_table(struct net *net, u32 id)
@@ -222,15 +226,19 @@ static inline struct fib_table *fib_new_table(struct net *net, u32 id)
static inline int fib_lookup(struct net *net, const struct flowi4 *flp,
struct fib_result *res)
{
- int err = -ENETUNREACH;
+ struct fib_table *tb;
+ int err;
rcu_read_lock();
- if (!fib_table_lookup(fib_get_table(net, RT_TABLE_LOCAL), flp, res,
- FIB_LOOKUP_NOREF) ||
- !fib_table_lookup(fib_get_table(net, RT_TABLE_MAIN), flp, res,
- FIB_LOOKUP_NOREF))
- err = 0;
+ for (err = 0; !err; err = -ENETUNREACH) {
+ tb = fib_get_table(net, RT_TABLE_LOCAL);
+ if (tb && !fib_table_lookup(tb, flp, res, FIB_LOOKUP_NOREF))
+ break;
+ tb = fib_get_table(net, RT_TABLE_MAIN);
+ if (tb && !fib_table_lookup(tb, flp, res, FIB_LOOKUP_NOREF))
+ break;
+ }
rcu_read_unlock();
@@ -249,28 +257,33 @@ int __fib_lookup(struct net *net, struct flowi4 *flp, struct fib_result *res);
static inline int fib_lookup(struct net *net, struct flowi4 *flp,
struct fib_result *res)
{
- if (!net->ipv4.fib_has_custom_rules) {
- int err = -ENETUNREACH;
-
- rcu_read_lock();
-
- res->tclassid = 0;
- if ((net->ipv4.fib_local &&
- !fib_table_lookup(net->ipv4.fib_local, flp, res,
- FIB_LOOKUP_NOREF)) ||
- (net->ipv4.fib_main &&
- !fib_table_lookup(net->ipv4.fib_main, flp, res,
- FIB_LOOKUP_NOREF)) ||
- (net->ipv4.fib_default &&
- !fib_table_lookup(net->ipv4.fib_default, flp, res,
- FIB_LOOKUP_NOREF)))
- err = 0;
-
- rcu_read_unlock();
-
- return err;
+ struct fib_table *tb;
+ int err;
+
+ if (net->ipv4.fib_has_custom_rules)
+ return __fib_lookup(net, flp, res);
+
+ rcu_read_lock();
+
+ res->tclassid = 0;
+
+ for (err = 0; !err; err = -ENETUNREACH) {
+ tb = rcu_dereference_rtnl(net->ipv4.fib_local);
+ if (tb && !fib_table_lookup(tb, flp, res, FIB_LOOKUP_NOREF))
+ break;
+
+ tb = rcu_dereference_rtnl(net->ipv4.fib_main);
+ if (tb && !fib_table_lookup(tb, flp, res, FIB_LOOKUP_NOREF))
+ break;
+
+ tb = rcu_dereference_rtnl(net->ipv4.fib_default);
+ if (tb && !fib_table_lookup(tb, flp, res, FIB_LOOKUP_NOREF))
+ break;
}
- return __fib_lookup(net, flp, res);
+
+ rcu_read_unlock();
+
+ return err;
}
#endif /* CONFIG_IP_MULTIPLE_TABLES */
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index dbe2254..d4f5b6f 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -7,6 +7,7 @@
#include <linux/uidgid.h>
#include <net/inet_frag.h>
+#include <linux/rcupdate.h>
struct tcpm_hash_bucket;
struct ctl_table_header;
@@ -38,9 +39,9 @@ struct netns_ipv4 {
#ifdef CONFIG_IP_MULTIPLE_TABLES
struct fib_rules_ops *rules_ops;
bool fib_has_custom_rules;
- struct fib_table *fib_local;
- struct fib_table *fib_main;
- struct fib_table *fib_default;
+ struct fib_table __rcu *fib_local;
+ struct fib_table __rcu *fib_main;
+ struct fib_table __rcu *fib_default;
#endif
#ifdef CONFIG_IP_ROUTE_CLASSID
int fib_num_tclassid_users;
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 57be71d..220c4b4 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -89,17 +89,14 @@ struct fib_table *fib_new_table(struct net *net, u32 id)
switch (id) {
case RT_TABLE_LOCAL:
- net->ipv4.fib_local = tb;
+ rcu_assign_pointer(net->ipv4.fib_local, tb);
break;
-
case RT_TABLE_MAIN:
- net->ipv4.fib_main = tb;
+ rcu_assign_pointer(net->ipv4.fib_main, tb);
break;
-
case RT_TABLE_DEFAULT:
- net->ipv4.fib_default = tb;
+ rcu_assign_pointer(net->ipv4.fib_default, tb);
break;
-
default:
break;
}
@@ -132,13 +129,14 @@ struct fib_table *fib_get_table(struct net *net, u32 id)
static void fib_flush(struct net *net)
{
int flushed = 0;
- struct fib_table *tb;
- struct hlist_head *head;
unsigned int h;
for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
- head = &net->ipv4.fib_table_hash[h];
- hlist_for_each_entry(tb, head, tb_hlist)
+ struct hlist_head *head = &net->ipv4.fib_table_hash[h];
+ struct hlist_node *tmp;
+ struct fib_table *tb;
+
+ hlist_for_each_entry_safe(tb, tmp, head, tb_hlist)
flushed += fib_table_flush(tb);
}
@@ -665,10 +663,12 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
s_h = cb->args[0];
s_e = cb->args[1];
+ rcu_read_lock();
+
for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
e = 0;
head = &net->ipv4.fib_table_hash[h];
- hlist_for_each_entry(tb, head, tb_hlist) {
+ hlist_for_each_entry_rcu(tb, head, tb_hlist) {
if (e < s_e)
goto next;
if (dumped)
@@ -682,6 +682,8 @@ next:
}
}
out:
+ rcu_read_unlock();
+
cb->args[1] = e;
cb->args[0] = h;
@@ -1117,14 +1119,34 @@ static void ip_fib_net_exit(struct net *net)
rtnl_lock();
for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
- struct fib_table *tb;
- struct hlist_head *head;
+ struct hlist_head *head = &net->ipv4.fib_table_hash[i];
struct hlist_node *tmp;
+ struct fib_table *tb;
+
+ /* this is done in two passes as flushing the table could
+ * cause it to be reallocated in order to accommodate new
+ * tnodes at the root as the table shrinks.
+ */
+ hlist_for_each_entry_safe(tb, tmp, head, tb_hlist)
+ fib_table_flush(tb);
- head = &net->ipv4.fib_table_hash[i];
hlist_for_each_entry_safe(tb, tmp, head, tb_hlist) {
+#ifdef CONFIG_IP_MULTIPLE_TABLES
+ switch (tb->tb_id) {
+ case RT_TABLE_LOCAL:
+ RCU_INIT_POINTER(net->ipv4.fib_local, NULL);
+ break;
+ case RT_TABLE_MAIN:
+ RCU_INIT_POINTER(net->ipv4.fib_main, NULL);
+ break;
+ case RT_TABLE_DEFAULT:
+ RCU_INIT_POINTER(net->ipv4.fib_default, NULL);
+ break;
+ default:
+ break;
+ }
+#endif
hlist_del(&tb->tb_hlist);
- fib_table_flush(tb);
fib_free_table(tb);
}
}
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index dcdf636..b895ee7 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -138,6 +138,7 @@ struct trie {
#ifdef CONFIG_IP_FIB_TRIE_STATS
struct trie_use_stats __percpu *stats;
#endif
+ struct rcu_head rcu;
};
static void resize(struct trie *t, struct tnode *tn);
@@ -190,6 +191,13 @@ static inline struct tnode *tnode_get_child_rcu(const struct tnode *tn,
return rcu_dereference_rtnl(tn->child[i]);
}
+static inline struct fib_table *trie_get_table(struct trie *t)
+{
+ unsigned long *tb_data = (unsigned long *)t;
+
+ return container_of(tb_data, struct fib_table, tb_data[0]);
+}
+
/* To understand this stuff, an understanding of keys and all their bits is
* necessary. Every node in the trie has a key associated with it, but not
* all of the bits in that key are significant.
@@ -1584,16 +1592,24 @@ flush_complete:
return found;
}
-void fib_free_table(struct fib_table *tb)
+static void __trie_free_rcu(struct rcu_head *head)
{
-#ifdef CONFIG_IP_FIB_TRIE_STATS
- struct trie *t = (struct trie *)tb->tb_data;
+ struct trie *t = container_of(head, struct trie, rcu);
+ struct fib_table *tb = trie_get_table(t);
+#ifdef CONFIG_IP_FIB_TRIE_STATS
free_percpu(t->stats);
#endif /* CONFIG_IP_FIB_TRIE_STATS */
kfree(tb);
}
+void fib_free_table(struct fib_table *tb)
+{
+ struct trie *t = (struct trie *)tb->tb_data;
+
+ call_rcu(&t->rcu, __trie_free_rcu);
+}
+
static int fn_trie_dump_leaf(struct tnode *l, struct fib_table *tb,
struct sk_buff *skb, struct netlink_callback *cb)
{
@@ -1630,6 +1646,7 @@ static int fn_trie_dump_leaf(struct tnode *l, struct fib_table *tb,
return skb->len;
}
+/* rcu_read_lock needs to be hold by caller from readside */
int fib_table_dump(struct fib_table *tb, struct sk_buff *skb,
struct netlink_callback *cb)
{
@@ -1641,15 +1658,12 @@ int fib_table_dump(struct fib_table *tb, struct sk_buff *skb,
int count = cb->args[2];
t_key key = cb->args[3];
- rcu_read_lock();
-
tp = rcu_dereference_rtnl(t->trie);
while ((l = leaf_walk_rcu(&tp, key)) != NULL) {
if (fn_trie_dump_leaf(l, tb, skb, cb) < 0) {
cb->args[3] = key;
cb->args[2] = count;
- rcu_read_unlock();
return -1;
}
@@ -1664,8 +1678,6 @@ int fib_table_dump(struct fib_table *tb, struct sk_buff *skb,
break;
}
- rcu_read_unlock();
-
cb->args[3] = key;
cb->args[2] = count;
next prev parent reply other threads:[~2015-02-24 20:48 UTC|newest]
Thread overview: 39+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-02-24 20:47 [RFC PATCH 00/29] Phase 2 of fib_trie updates Alexander Duyck
2015-02-24 20:48 ` [RFC PATCH 01/29] fib_trie: Convert fib_alias to hlist from list Alexander Duyck
2015-02-24 21:51 ` Or Gerlitz
2015-02-24 21:52 ` Or Gerlitz
2015-02-24 22:08 ` David Miller
2015-02-24 22:14 ` Alexander Duyck
2015-02-24 22:47 ` Julian Anastasov
2015-02-24 23:09 ` Julian Anastasov
2015-02-24 20:48 ` [RFC PATCH 02/29] fib_trie: Replace plen with slen in leaf_info Alexander Duyck
2015-02-24 20:48 ` [RFC PATCH 03/29] fib_trie: Add slen to fib alias Alexander Duyck
2015-02-24 20:48 ` [RFC PATCH 04/29] fib_trie: Remove leaf_info Alexander Duyck
2015-02-24 20:48 ` [RFC PATCH 05/29] fib_trie: Only resize N/2 times instead N * log(N) times in fib_table_flush Alexander Duyck
2015-02-24 20:48 ` [RFC PATCH 06/29] fib_trie: Fib walk rcu should take a tnode and key instead of a trie and a leaf Alexander Duyck
2015-02-24 20:48 ` [RFC PATCH 07/29] fib_trie: Fib find node should return parent Alexander Duyck
2015-02-24 20:48 ` [RFC PATCH 08/29] fib_trie: Update insert and delete to make use of tp from find_node Alexander Duyck
2015-02-24 20:48 ` Alexander Duyck [this message]
2015-02-24 20:49 ` [RFC PATCH 10/29] fib_trie: Return pointer to tnode pointer in resize/inflate/halve Alexander Duyck
2015-02-24 20:49 ` [RFC PATCH 11/29] fib_trie: Rename tnode to key_vector Alexander Duyck
2015-02-24 20:49 ` [RFC PATCH 12/29] fib_trie: move leaf and tnode to occupy the same spot in the key vector Alexander Duyck
2015-02-24 20:49 ` [RFC PATCH 13/29] fib_trie: replace tnode_get_child functions with get_child macros Alexander Duyck
2015-02-24 20:49 ` [RFC PATCH 14/29] fib_trie: Rename tnode_child_length to child_length Alexander Duyck
2015-02-24 20:49 ` [RFC PATCH 15/29] fib_trie: Add tnode struct as a container for fields not needed in key_vector Alexander Duyck
2015-02-24 20:49 ` [RFC PATCH 16/29] fib_trie: Move rcu from key_vector to tnode, add accessors Alexander Duyck
2015-02-24 20:49 ` [RFC PATCH 17/29] fib_trie: Pull empty_children and full_children into tnode Alexander Duyck
2015-02-24 20:49 ` [RFC PATCH 18/29] fib_trie: Move parent from key_vector to tnode Alexander Duyck
2015-02-24 20:50 ` [RFC PATCH 19/29] fib_trie: Add key vector to root, return parent key_vector in resize Alexander Duyck
2015-02-24 20:50 ` [RFC PATCH 20/29] fib_trie: Push net pointer down into fib_trie insert/delete/flush calls Alexander Duyck
2015-02-24 20:50 ` [RFC PATCH 21/29] fib_trie: Rewrite handling of RCU to include parent in replacement Alexander Duyck
2015-02-24 20:50 ` [RFC PATCH 22/29] fib_trie: Allocate tnode as array of key_vectors instead of key_vector as array of tnode pointers Alexander Duyck
2015-02-24 20:50 ` [RFC PATCH 23/29] fib_trie: Add leaf_init Alexander Duyck
2015-02-24 20:50 ` [RFC PATCH 24/29] fib_trie: Update tnode_new to drop use of put_child_root Alexander Duyck
2015-02-24 20:50 ` [RFC PATCH 25/29] fib_trie: Add function for dropping children from trie Alexander Duyck
2015-02-24 20:50 ` [RFC PATCH 26/29] fib_trie: Use put_child to only copy key_vectors instead of pointers Alexander Duyck
2015-02-24 20:50 ` [RFC PATCH 27/29] fib_trie: Move key and pos into key_vector from tnode Alexander Duyck
2015-02-24 20:51 ` [RFC PATCH 28/29] fib_trie: Move slen from tnode to key vector Alexander Duyck
2015-02-24 20:51 ` [RFC PATCH 29/29] fib_trie: Push bits up one level, and move leaves up into parent key_vector array Alexander Duyck
2015-02-25 3:53 ` [RFC PATCH 00/29] Phase 2 of fib_trie updates David Miller
2015-02-25 5:12 ` Alexander Duyck
2015-02-27 21:01 ` David Miller
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20150224204856.26106.60450.stgit@ahduyck-vm-fedora20 \
--to=alexander.h.duyck@redhat.com \
--cc=netdev@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox