From: David Ahern <dsahern@kernel.org>
To: davem@davemloft.net, netdev@vger.kernel.org
Cc: idosch@mellanox.com, David Ahern <dsahern@gmail.com>
Subject: [PATCH net-next 4/8] ipv6: Move exceptions to fib_nh_common
Date: Fri, 29 Mar 2019 17:47:27 -0700 [thread overview]
Message-ID: <20190330004731.23959-5-dsahern@kernel.org> (raw)
In-Reply-To: <20190330004731.23959-1-dsahern@kernel.org>
From: David Ahern <dsahern@gmail.com>
Exceptions are really per device, so move rt6i_exception_bucket to
fib_nh_common as a generic nhc_exception_bucket. Move the flushed
flag to common as well. fib_nh_common for both is a strategic choice
to reduce memory consumption. Moving to fib6_nh pushes the struct over
256 which increases the actual allocation of a fib entry to 512.
Exception flushing when a fib entry is deleted is limited to the exceptions
per nexthop that reference the to-be-deleted fib entry (ie., 'from' points
to it). When a fib6_nh is released, all exceptions are flushed.
Move the core logic of rt6_flush_exceptions, rt6_remove_exception_rt and
rt6_update_exception_stamp_rt to helpers that can be invoked per fib6_nh.
For fib6_nh_flush_exceptions, only remove the exception if from is NULL
(ie., flushing all exceptions) or the rt6_info->from matches (fib delete).
Signed-off-by: David Ahern <dsahern@gmail.com>
---
include/net/ip6_fib.h | 6 +--
include/net/ip_fib.h | 4 +-
net/ipv6/ip6_fib.c | 7 ---
net/ipv6/route.c | 140 ++++++++++++++++++++++++++++++++------------------
4 files changed, 96 insertions(+), 61 deletions(-)
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 58dbb4e82908..c1d1e32e1a19 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -153,7 +153,6 @@ struct fib6_info {
struct rt6key fib6_prefsrc;
struct rt6_info * __percpu *rt6i_pcpu;
- struct rt6_exception_bucket __rcu *rt6i_exception_bucket;
#ifdef CONFIG_IPV6_ROUTER_PREF
unsigned long last_probe;
@@ -162,12 +161,11 @@ struct fib6_info {
u32 fib6_metric;
u8 fib6_protocol;
u8 fib6_type;
- u8 exception_bucket_flushed:1,
- should_flush:1,
+ u8 should_flush:1,
dst_nocount:1,
dst_nopolicy:1,
dst_host:1,
- unused:3;
+ unused:4;
struct fib6_nh fib6_nh;
struct rcu_head rcu;
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index cce437a1b2ff..063430ca0c6e 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -84,7 +84,8 @@ struct fib_nh_common {
unsigned char nhc_scope;
u8 nhc_family;
u8 nhc_has_gw:1,
- unused:7;
+ nhc_exceptions_flushed:1,
+ unused:6;
union {
__be32 ipv4;
struct in6_addr ipv6;
@@ -96,6 +97,7 @@ struct fib_nh_common {
/* v4 specific, but allows v6 gw with v4 routes */
struct rtable __rcu * __percpu *nhc_pcpu_rth_output;
struct rtable __rcu *nhc_rth_input;
+ void __rcu *nhc_exceptions;
};
struct fib_nh {
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 8c00609a1513..cce976a59a8c 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -170,16 +170,9 @@ struct fib6_info *fib6_info_alloc(gfp_t gfp_flags)
void fib6_info_destroy_rcu(struct rcu_head *head)
{
struct fib6_info *f6i = container_of(head, struct fib6_info, rcu);
- struct rt6_exception_bucket *bucket;
WARN_ON(f6i->fib6_node);
- bucket = rcu_dereference_protected(f6i->rt6i_exception_bucket, 1);
- if (bucket) {
- f6i->rt6i_exception_bucket = NULL;
- kfree(bucket);
- }
-
if (f6i->rt6i_pcpu) {
int cpu;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index e0ee30cbd079..c66b9ac37036 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1413,6 +1413,7 @@ static unsigned int fib6_mtu(const struct fib6_info *rt)
static int rt6_insert_exception(struct rt6_info *nrt,
struct fib6_info *ort)
{
+ struct fib_nh_common *nhc = &ort->fib6_nh.nh_common;
struct net *net = dev_net(nrt->dst.dev);
struct rt6_exception_bucket *bucket;
struct in6_addr *src_key = NULL;
@@ -1421,12 +1422,12 @@ static int rt6_insert_exception(struct rt6_info *nrt,
spin_lock_bh(&rt6_exception_lock);
- if (ort->exception_bucket_flushed) {
+ if (nhc->nhc_exceptions_flushed) {
err = -EINVAL;
goto out;
}
- bucket = rcu_dereference_protected(ort->rt6i_exception_bucket,
+ bucket = rcu_dereference_protected(nhc->nhc_exceptions,
lockdep_is_held(&rt6_exception_lock));
if (!bucket) {
bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
@@ -1435,7 +1436,7 @@ static int rt6_insert_exception(struct rt6_info *nrt,
err = -ENOMEM;
goto out;
}
- rcu_assign_pointer(ort->rt6i_exception_bucket, bucket);
+ rcu_assign_pointer(nhc->nhc_exceptions, bucket);
}
#ifdef CONFIG_IPV6_SUBTREES
@@ -1490,8 +1491,9 @@ static int rt6_insert_exception(struct rt6_info *nrt,
return err;
}
-void rt6_flush_exceptions(struct fib6_info *rt)
+static void fib6_nh_flush_exceptions(struct fib6_nh *nh, struct fib6_info *from)
{
+ struct fib_nh_common *nhc = &nh->nh_common;
struct rt6_exception_bucket *bucket;
struct rt6_exception *rt6_ex;
struct hlist_node *tmp;
@@ -1499,17 +1501,21 @@ void rt6_flush_exceptions(struct fib6_info *rt)
spin_lock_bh(&rt6_exception_lock);
/* Prevent rt6_insert_exception() to recreate the bucket list */
- rt->exception_bucket_flushed = 1;
+ if (!from)
+ nhc->nhc_exceptions_flushed = 1;
- bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
+ bucket = rcu_dereference_protected(nhc->nhc_exceptions,
lockdep_is_held(&rt6_exception_lock));
if (!bucket)
goto out;
for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
- hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist)
- rt6_remove_exception(bucket, rt6_ex);
- WARN_ON_ONCE(bucket->depth);
+ hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist) {
+ if (!from ||
+ rcu_access_pointer(rt6_ex->rt6i->from) == from)
+ rt6_remove_exception(bucket, rt6_ex);
+ }
+ WARN_ON_ONCE(!from && bucket->depth);
bucket++;
}
@@ -1517,6 +1523,11 @@ void rt6_flush_exceptions(struct fib6_info *rt)
spin_unlock_bh(&rt6_exception_lock);
}
+void rt6_flush_exceptions(struct fib6_info *f6i)
+{
+ fib6_nh_flush_exceptions(&f6i->fib6_nh, f6i);
+}
+
/* Find cached rt in the hash table inside passed in rt
* Caller has to hold rcu_read_lock()
*/
@@ -1524,12 +1535,13 @@ static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
struct in6_addr *daddr,
struct in6_addr *saddr)
{
+ struct fib_nh_common *nhc = &rt->fib6_nh.nh_common;
struct rt6_exception_bucket *bucket;
struct in6_addr *src_key = NULL;
struct rt6_exception *rt6_ex;
struct rt6_info *res = NULL;
- bucket = rcu_dereference(rt->rt6i_exception_bucket);
+ bucket = rcu_dereference(nhc->nhc_exceptions);
#ifdef CONFIG_IPV6_SUBTREES
/* rt6i_src.plen != 0 indicates rt is in subtree
@@ -1549,25 +1561,20 @@ static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
return res;
}
-/* Remove the passed in cached rt from the hash table that contains it */
-static int rt6_remove_exception_rt(struct rt6_info *rt)
+static int fib6_nh_remove_exception(struct fib6_nh *nh, int plen,
+ const struct rt6_info *rt)
{
+ struct fib_nh_common *nhc = &nh->nh_common;
+ const struct in6_addr *src_key = NULL;
struct rt6_exception_bucket *bucket;
- struct in6_addr *src_key = NULL;
struct rt6_exception *rt6_ex;
- struct fib6_info *from;
- int err;
-
- from = rcu_dereference(rt->from);
- if (!from ||
- !(rt->rt6i_flags & RTF_CACHE))
- return -EINVAL;
+ int err = 0;
- if (!rcu_access_pointer(from->rt6i_exception_bucket))
+ if (!rcu_access_pointer(nhc->nhc_exceptions))
return -ENOENT;
spin_lock_bh(&rt6_exception_lock);
- bucket = rcu_dereference_protected(from->rt6i_exception_bucket,
+ bucket = rcu_dereference_protected(nhc->nhc_exceptions,
lockdep_is_held(&rt6_exception_lock));
#ifdef CONFIG_IPV6_SUBTREES
/* rt6i_src.plen != 0 indicates 'from' is in subtree
@@ -1576,39 +1583,43 @@ static int rt6_remove_exception_rt(struct rt6_info *rt)
* Otherwise, the exception table is indexed by
* a hash of only rt6i_dst.
*/
- if (from->fib6_src.plen)
+ if (plen)
src_key = &rt->rt6i_src.addr;
#endif
rt6_ex = __rt6_find_exception_spinlock(&bucket,
&rt->rt6i_dst.addr,
src_key);
- if (rt6_ex) {
+ if (rt6_ex)
rt6_remove_exception(bucket, rt6_ex);
- err = 0;
- } else {
+ else
err = -ENOENT;
- }
spin_unlock_bh(&rt6_exception_lock);
return err;
}
-/* Find rt6_ex which contains the passed in rt cache and
- * refresh its stamp
- */
-static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
+/* Remove the passed in cached rt from the hash table that contains it */
+static int rt6_remove_exception_rt(struct rt6_info *rt)
{
- struct rt6_exception_bucket *bucket;
- struct in6_addr *src_key = NULL;
- struct rt6_exception *rt6_ex;
struct fib6_info *from;
- rcu_read_lock();
from = rcu_dereference(rt->from);
if (!from || !(rt->rt6i_flags & RTF_CACHE))
- goto unlock;
+ return -EINVAL;
- bucket = rcu_dereference(from->rt6i_exception_bucket);
+ return fib6_nh_remove_exception(&from->fib6_nh,
+ from->fib6_src.plen, rt);
+}
+
+static void fib6_nh_update_exception(struct fib6_nh *nh, int plen,
+ const struct rt6_info *rt)
+{
+ struct fib_nh_common *nhc = &nh->nh_common;
+ const struct in6_addr *src_key = NULL;
+ struct rt6_exception_bucket *bucket;
+ struct rt6_exception *rt6_ex;
+
+ bucket = rcu_dereference(nhc->nhc_exceptions);
#ifdef CONFIG_IPV6_SUBTREES
/* rt6i_src.plen != 0 indicates 'from' is in subtree
@@ -1617,15 +1628,28 @@ static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
* Otherwise, the exception table is indexed by
* a hash of only rt6i_dst.
*/
- if (from->fib6_src.plen)
+ if (plen)
src_key = &rt->rt6i_src.addr;
#endif
- rt6_ex = __rt6_find_exception_rcu(&bucket,
- &rt->rt6i_dst.addr,
- src_key);
+ rt6_ex = __rt6_find_exception_rcu(&bucket, &rt->rt6i_dst.addr, src_key);
if (rt6_ex)
rt6_ex->stamp = jiffies;
+}
+
+/* Find rt6_ex which contains the passed in rt cache and
+ * refresh its stamp
+ */
+static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
+{
+ struct fib6_info *from;
+
+ rcu_read_lock();
+ from = rcu_dereference(rt->from);
+ if (!from || !(rt->rt6i_flags & RTF_CACHE))
+ goto unlock;
+
+ fib6_nh_update_exception(&from->fib6_nh, from->fib6_src.plen, rt);
unlock:
rcu_read_unlock();
}
@@ -1655,11 +1679,12 @@ static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
struct fib6_info *rt, int mtu)
{
+ struct fib_nh_common *nhc = &rt->fib6_nh.nh_common;
struct rt6_exception_bucket *bucket;
struct rt6_exception *rt6_ex;
int i;
- bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
+ bucket = rcu_dereference_protected(nhc->nhc_exceptions,
lockdep_is_held(&rt6_exception_lock));
if (!bucket)
@@ -1686,16 +1711,17 @@ static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
static void rt6_exceptions_clean_tohost(struct fib6_info *rt,
struct in6_addr *gateway)
{
+ struct fib_nh_common *nhc = &rt->fib6_nh.nh_common;
struct rt6_exception_bucket *bucket;
struct rt6_exception *rt6_ex;
struct hlist_node *tmp;
int i;
- if (!rcu_access_pointer(rt->rt6i_exception_bucket))
+ if (!rcu_access_pointer(nhc->nhc_exceptions))
return;
spin_lock_bh(&rt6_exception_lock);
- bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
+ bucket = rcu_dereference_protected(nhc->nhc_exceptions,
lockdep_is_held(&rt6_exception_lock));
if (bucket) {
@@ -1768,15 +1794,18 @@ void rt6_age_exceptions(struct fib6_info *rt,
{
struct rt6_exception_bucket *bucket;
struct rt6_exception *rt6_ex;
+ struct fib_nh_common *nhc;
struct hlist_node *tmp;
int i;
- if (!rcu_access_pointer(rt->rt6i_exception_bucket))
- return;
-
rcu_read_lock_bh();
+
+ nhc = &rt->fib6_nh.nh_common;
+ if (!rcu_access_pointer(nhc->nhc_exceptions))
+ goto out;
+
spin_lock(&rt6_exception_lock);
- bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
+ bucket = rcu_dereference_protected(nhc->nhc_exceptions,
lockdep_is_held(&rt6_exception_lock));
if (bucket) {
@@ -1790,6 +1819,7 @@ void rt6_age_exceptions(struct fib6_info *rt,
}
}
spin_unlock(&rt6_exception_lock);
+out:
rcu_read_unlock_bh();
}
@@ -2596,6 +2626,7 @@ static unsigned int ip6_mtu(const struct dst_entry *dst)
u32 ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
struct in6_addr *saddr)
{
+ struct fib_nh_common *nhc = &f6i->fib6_nh.nh_common;
struct rt6_exception_bucket *bucket;
struct rt6_exception *rt6_ex;
struct in6_addr *src_key;
@@ -2614,7 +2645,7 @@ u32 ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
src_key = saddr;
#endif
- bucket = rcu_dereference(f6i->rt6i_exception_bucket);
+ bucket = rcu_dereference(nhc->nhc_exceptions);
rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
mtu = dst_metric_raw(&rt6_ex->rt6i->dst, RTAX_MTU);
@@ -3011,6 +3042,17 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
void fib6_nh_release(struct fib6_nh *fib6_nh)
{
+ struct fib_nh_common *nhc = &fib6_nh->nh_common;
+ struct rt6_exception_bucket *bucket;
+
+ fib6_nh_flush_exceptions(fib6_nh, NULL);
+
+ bucket = rcu_dereference_protected(nhc->nhc_exceptions, 1);
+ if (bucket) {
+ rcu_assign_pointer(nhc->nhc_exceptions, NULL);
+ kfree(bucket);
+ }
+
fib_nh_common_release(&fib6_nh->nh_common);
}
--
2.11.0
next prev parent reply other threads:[~2019-03-30 0:47 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-03-30 0:47 [PATCH net-next 0/8] net: More movement to fib_nh_common David Ahern
2019-03-30 0:47 ` [PATCH net-next 1/8] ipv4: Update fib_table_lookup tracepoint to take common nexthop David Ahern
2019-03-30 0:47 ` [PATCH net-next 2/8] ipv4: Add fib_nh_common to fib_result David Ahern
2019-03-30 0:47 ` [PATCH net-next 3/8] ipv4: Move cached routes to fib_nh_common David Ahern
2019-03-31 0:11 ` David Ahern
2019-03-31 1:41 ` David Miller
2019-03-30 0:47 ` David Ahern [this message]
2019-03-30 0:47 ` [PATCH net-next 5/8] ipv4: Switch to nhc_exceptions for exception bucket David Ahern
2019-03-30 0:47 ` [PATCH net-next 6/8] ipv4: Refactor nexthop attributes in fib_dump_info David Ahern
2019-03-30 0:47 ` [PATCH net-next 7/8] ipv4: Change fib_nexthop_info and fib_add_nexthop to take fib_nh_common David Ahern
2019-03-30 0:47 ` [PATCH net-next 8/8] ipv6: Flip to fib_nexthop_info David Ahern
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20190330004731.23959-5-dsahern@kernel.org \
--to=dsahern@kernel.org \
--cc=davem@davemloft.net \
--cc=dsahern@gmail.com \
--cc=idosch@mellanox.com \
--cc=netdev@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.