* [PATCH net-next 03/16] ipv6: prepare fib6_remove_prefsrc() for exception table
From: Wei Wang @ 2017-10-06 19:05 UTC (permalink / raw)
To: David Miller, netdev; +Cc: Eric Dumazet, Martin KaFai Lau, Wei Wang
In-Reply-To: <20171006190611.110633-3-tracywwnj@gmail.com>
From: Wei Wang <weiwan@google.com>
After we move cached dst entries into the exception table under its
parent route, current fib6_remove_prefsrc() no longer can access them.
This commit makes fib6_remove_prefsrc() also go through all routes
in the exception table to remove the pref src.
This is a preparation patch in order to move all cached dst into the
exception table.
Signed-off-by: Wei Wang <weiwan@google.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
net/ipv6/route.c | 29 +++++++++++++++++++++++++++++
1 file changed, 29 insertions(+)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index dc5e70975966..f52ac57dcc99 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1264,6 +1264,12 @@ static int rt6_insert_exception(struct rt6_info *nrt,
if (ort->rt6i_src.plen)
src_key = &nrt->rt6i_src.addr;
#endif
+
+ /* Update rt6i_prefsrc as it could be changed
+ * in rt6_remove_prefsrc()
+ */
+ nrt->rt6i_prefsrc = ort->rt6i_prefsrc;
+
rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr,
src_key);
if (rt6_ex)
@@ -1432,6 +1438,25 @@ static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
rcu_read_unlock();
}
+static void rt6_exceptions_remove_prefsrc(struct rt6_info *rt)
+{
+ struct rt6_exception_bucket *bucket;
+ struct rt6_exception *rt6_ex;
+ int i;
+
+ bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
+ lockdep_is_held(&rt6_exception_lock));
+
+ if (bucket) {
+ for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
+ hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
+ rt6_ex->rt6i->rt6i_prefsrc.plen = 0;
+ }
+ bucket++;
+ }
+ }
+}
+
struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
int oif, struct flowi6 *fl6, int flags)
{
@@ -3159,8 +3184,12 @@ static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
if (((void *)rt->dst.dev == dev || !dev) &&
rt != net->ipv6.ip6_null_entry &&
ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
+ spin_lock_bh(&rt6_exception_lock);
/* remove prefsrc entry */
rt->rt6i_prefsrc.plen = 0;
+ /* need to update cache as well */
+ rt6_exceptions_remove_prefsrc(rt);
+ spin_unlock_bh(&rt6_exception_lock);
}
return 0;
}
--
2.14.2.920.gcf0c67979c-goog
^ permalink raw reply related
* [PATCH net-next 04/16] ipv6: prepare rt6_mtu_change() for exception table
From: Wei Wang @ 2017-10-06 19:05 UTC (permalink / raw)
To: David Miller, netdev; +Cc: Eric Dumazet, Martin KaFai Lau, Wei Wang
In-Reply-To: <20171006190611.110633-4-tracywwnj@gmail.com>
From: Wei Wang <weiwan@google.com>
If we move all cached dst into the exception table under the main route,
current rt6_mtu_change() will no longer be able to access them.
This commit makes rt6_mtu_change_route() function to also go through all
cached routes in the exception table under the main route and do proper
updates on the mtu.
This is a preparation in order to move all cached routes into the
exception table.
Signed-off-by: Wei Wang <weiwan@google.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
net/ipv6/route.c | 40 ++++++++++++++++++++++++++++++++++++++++
1 file changed, 40 insertions(+)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index f52ac57dcc99..d9805a857809 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1269,6 +1269,14 @@ static int rt6_insert_exception(struct rt6_info *nrt,
* in rt6_remove_prefsrc()
*/
nrt->rt6i_prefsrc = ort->rt6i_prefsrc;
+ /* rt6_mtu_change() might lower mtu on ort.
+ * Only insert this exception route if its mtu
+ * is less than ort's mtu value.
+ */
+ if (nrt->rt6i_pmtu >= dst_mtu(&ort->dst)) {
+ err = -EINVAL;
+ goto out;
+ }
rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr,
src_key);
@@ -1457,6 +1465,32 @@ static void rt6_exceptions_remove_prefsrc(struct rt6_info *rt)
}
}
+static void rt6_exceptions_update_pmtu(struct rt6_info *rt, int mtu)
+{
+ struct rt6_exception_bucket *bucket;
+ struct rt6_exception *rt6_ex;
+ int i;
+
+ bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
+ lockdep_is_held(&rt6_exception_lock));
+
+ if (bucket) {
+ for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
+ hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
+ struct rt6_info *entry = rt6_ex->rt6i;
+ /* For RTF_CACHE with rt6i_pmtu == 0
+ * (i.e. a redirected route),
+ * the metrics of its rt->dst.from has already
+ * been updated.
+ */
+ if (entry->rt6i_pmtu && entry->rt6i_pmtu > mtu)
+ entry->rt6i_pmtu = mtu;
+ }
+ bucket++;
+ }
+ }
+}
+
struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
int oif, struct flowi6 *fl6, int flags)
{
@@ -3296,6 +3330,10 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
if (rt->dst.dev == arg->dev &&
dst_metric_raw(&rt->dst, RTAX_MTU) &&
!dst_metric_locked(&rt->dst, RTAX_MTU)) {
+ spin_lock_bh(&rt6_exception_lock);
+ /* This case will be removed once the exception table
+ * is hooked up.
+ */
if (rt->rt6i_flags & RTF_CACHE) {
/* For RTF_CACHE with rt6i_pmtu == 0
* (i.e. a redirected route),
@@ -3309,6 +3347,8 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
}
+ rt6_exceptions_update_pmtu(rt, arg->mtu);
+ spin_unlock_bh(&rt6_exception_lock);
}
return 0;
}
--
2.14.2.920.gcf0c67979c-goog
^ permalink raw reply related
* [PATCH net-next 05/16] ipv6: prepare rt6_clean_tohost() for exception table
From: Wei Wang @ 2017-10-06 19:06 UTC (permalink / raw)
To: David Miller, netdev; +Cc: Eric Dumazet, Martin KaFai Lau, Wei Wang
In-Reply-To: <20171006190611.110633-5-tracywwnj@gmail.com>
From: Wei Wang <weiwan@google.com>
If we move all cached dst into the exception table under the main route,
current rt6_clean_tohost() will no longer be able to access them.
This commit makes fib6_clean_tohost() to also go through all cached
routes in exception table and removes cached gateway routes to the
passed in gateway.
This is a preparation in order to move all cached routes into the
exception table.
Signed-off-by: Wei Wang <weiwan@google.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
net/ipv6/route.c | 48 +++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 47 insertions(+), 1 deletion(-)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index d9805a857809..e8e901589564 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1491,6 +1491,43 @@ static void rt6_exceptions_update_pmtu(struct rt6_info *rt, int mtu)
}
}
+#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
+
+static void rt6_exceptions_clean_tohost(struct rt6_info *rt,
+ struct in6_addr *gateway)
+{
+ struct rt6_exception_bucket *bucket;
+ struct rt6_exception *rt6_ex;
+ struct hlist_node *tmp;
+ int i;
+
+ if (!rcu_access_pointer(rt->rt6i_exception_bucket))
+ return;
+
+ spin_lock_bh(&rt6_exception_lock);
+ bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
+ lockdep_is_held(&rt6_exception_lock));
+
+ if (bucket) {
+ for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
+ hlist_for_each_entry_safe(rt6_ex, tmp,
+ &bucket->chain, hlist) {
+ struct rt6_info *entry = rt6_ex->rt6i;
+
+ if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) ==
+ RTF_CACHE_GATEWAY &&
+ ipv6_addr_equal(gateway,
+ &entry->rt6i_gateway)) {
+ rt6_remove_exception(bucket, rt6_ex);
+ }
+ }
+ bucket++;
+ }
+ }
+
+ spin_unlock_bh(&rt6_exception_lock);
+}
+
struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
int oif, struct flowi6 *fl6, int flags)
{
@@ -3240,18 +3277,27 @@ void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
}
#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
-#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
/* Remove routers and update dst entries when gateway turn into host. */
static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
{
struct in6_addr *gateway = (struct in6_addr *)arg;
+ /* RTF_CACHE_GATEWAY case will be removed once the exception
+ * table is hooked up to store all cached routes.
+ */
if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
return -1;
}
+
+ /* Further clean up cached routes in exception table.
+ * This is needed because cached route may have a different
+ * gateway than its 'parent' in the case of an ip redirect.
+ */
+ rt6_exceptions_clean_tohost(rt, gateway);
+
return 0;
}
--
2.14.2.920.gcf0c67979c-goog
^ permalink raw reply related
* [PATCH net-next 06/16] ipv6: prepare fib6_age() for exception table
From: Wei Wang @ 2017-10-06 19:06 UTC (permalink / raw)
To: David Miller, netdev; +Cc: Eric Dumazet, Martin KaFai Lau, Wei Wang
In-Reply-To: <20171006190611.110633-6-tracywwnj@gmail.com>
From: Wei Wang <weiwan@google.com>
If all dst cache entries are stored in the exception table under the
main route, we have to go through them during fib6_age() when doing
garbage collecting.
Introduce a new function rt6_age_exception() which goes through all dst
entries in the exception table and remove those entries that are expired.
This function is called in fib6_age() so that all dst caches are also
garbage collected.
Signed-off-by: Wei Wang <weiwan@google.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
include/net/ip6_fib.h | 13 +++++++++++
include/net/ip6_route.h | 2 ++
net/ipv6/ip6_fib.c | 26 ++++++++-------------
net/ipv6/route.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 84 insertions(+), 17 deletions(-)
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index c4864c1e8f13..11a79ef87a28 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -29,6 +29,14 @@
#define FIB6_TABLE_HASHSZ 1
#endif
+#define RT6_DEBUG 2
+
+#if RT6_DEBUG >= 3
+#define RT6_TRACE(x...) pr_debug(x)
+#else
+#define RT6_TRACE(x...) do { ; } while (0)
+#endif
+
struct rt6_info;
struct fib6_config {
@@ -75,6 +83,11 @@ struct fib6_node {
struct rcu_head rcu;
};
+struct fib6_gc_args {
+ int timeout;
+ int more;
+};
+
#ifndef CONFIG_IPV6_SUBTREES
#define FIB6_SUBTREE(fn) NULL
#else
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 3315605f34c9..a0087fb9864b 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -97,6 +97,8 @@ int ip6_del_rt(struct rt6_info *);
void rt6_flush_exceptions(struct rt6_info *rt);
int rt6_remove_exception_rt(struct rt6_info *rt);
+void rt6_age_exceptions(struct rt6_info *rt, struct fib6_gc_args *gc_args,
+ unsigned long now);
static inline int ip6_route_get_saddr(struct net *net, struct rt6_info *rt,
const struct in6_addr *daddr,
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 0ba4fbb2f855..3afbe50f2779 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -38,14 +38,6 @@
#include <net/ip6_fib.h>
#include <net/ip6_route.h>
-#define RT6_DEBUG 2
-
-#if RT6_DEBUG >= 3
-#define RT6_TRACE(x...) pr_debug(x)
-#else
-#define RT6_TRACE(x...) do { ; } while (0)
-#endif
-
static struct kmem_cache *fib6_node_kmem __read_mostly;
struct fib6_cleaner {
@@ -1890,12 +1882,6 @@ static void fib6_flush_trees(struct net *net)
* Garbage collection
*/
-struct fib6_gc_args
-{
- int timeout;
- int more;
-};
-
static int fib6_age(struct rt6_info *rt, void *arg)
{
struct fib6_gc_args *gc_args = arg;
@@ -1904,9 +1890,6 @@ static int fib6_age(struct rt6_info *rt, void *arg)
/*
* check addrconf expiration here.
* Routes are expired even if they are in use.
- *
- * Also age clones. Note, that clones are aged out
- * only if they are not in use now.
*/
if (rt->rt6i_flags & RTF_EXPIRES && rt->dst.expires) {
@@ -1915,6 +1898,9 @@ static int fib6_age(struct rt6_info *rt, void *arg)
return -1;
}
gc_args->more++;
+ /* The following part will soon be removed when the exception
+ * table is hooked up to store all cached routes.
+ */
} else if (rt->rt6i_flags & RTF_CACHE) {
if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout))
rt->dst.obsolete = DST_OBSOLETE_KILL;
@@ -1940,6 +1926,12 @@ static int fib6_age(struct rt6_info *rt, void *arg)
gc_args->more++;
}
+ /* Also age clones in the exception table.
+ * Note, that clones are aged out
+ * only if they are not in use now.
+ */
+ rt6_age_exceptions(rt, gc_args, now);
+
return 0;
}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index e8e901589564..d2dd55f58b5d 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1528,6 +1528,66 @@ static void rt6_exceptions_clean_tohost(struct rt6_info *rt,
spin_unlock_bh(&rt6_exception_lock);
}
+static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
+ struct rt6_exception *rt6_ex,
+ struct fib6_gc_args *gc_args,
+ unsigned long now)
+{
+ struct rt6_info *rt = rt6_ex->rt6i;
+
+ if (atomic_read(&rt->dst.__refcnt) == 1 &&
+ time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
+ RT6_TRACE("aging clone %p\n", rt);
+ rt6_remove_exception(bucket, rt6_ex);
+ return;
+ } else if (rt->rt6i_flags & RTF_GATEWAY) {
+ struct neighbour *neigh;
+ __u8 neigh_flags = 0;
+
+ neigh = dst_neigh_lookup(&rt->dst, &rt->rt6i_gateway);
+ if (neigh) {
+ neigh_flags = neigh->flags;
+ neigh_release(neigh);
+ }
+ if (!(neigh_flags & NTF_ROUTER)) {
+ RT6_TRACE("purging route %p via non-router but gateway\n",
+ rt);
+ rt6_remove_exception(bucket, rt6_ex);
+ return;
+ }
+ }
+ gc_args->more++;
+}
+
+void rt6_age_exceptions(struct rt6_info *rt,
+ struct fib6_gc_args *gc_args,
+ unsigned long now)
+{
+ struct rt6_exception_bucket *bucket;
+ struct rt6_exception *rt6_ex;
+ struct hlist_node *tmp;
+ int i;
+
+ if (!rcu_access_pointer(rt->rt6i_exception_bucket))
+ return;
+
+ spin_lock_bh(&rt6_exception_lock);
+ bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
+ lockdep_is_held(&rt6_exception_lock));
+
+ if (bucket) {
+ for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
+ hlist_for_each_entry_safe(rt6_ex, tmp,
+ &bucket->chain, hlist) {
+ rt6_age_examine_exception(bucket, rt6_ex,
+ gc_args, now);
+ }
+ bucket++;
+ }
+ }
+ spin_unlock_bh(&rt6_exception_lock);
+}
+
struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
int oif, struct flowi6 *fl6, int flags)
{
--
2.14.2.920.gcf0c67979c-goog
^ permalink raw reply related
* [PATCH net-next 07/16] ipv6: prepare fib6_locate() for exception table
From: Wei Wang @ 2017-10-06 19:06 UTC (permalink / raw)
To: David Miller, netdev; +Cc: Eric Dumazet, Martin KaFai Lau, Wei Wang
In-Reply-To: <20171006190611.110633-7-tracywwnj@gmail.com>
From: Wei Wang <weiwan@google.com>
fib6_locate() is used to find the fib6_node according to the passed in
prefix address key. It currently tries to find the fib6_node with the
exact match of the passed in key. However, when we move cached routes
into the exception table, fib6_locate() will fail to find the fib6_node
for it as the cached routes will be stored in the exception table under
the fib6_node with the longest prefix match of the cache's dst addr key.
This commit adds a new parameter to let the caller specify if it needs
exact match or longest prefix match.
Right now, all callers still does exact match when calling
fib6_locate(). It will be changed in later commit where exception table
is hooked up to store cached routes.
Signed-off-by: Wei Wang <weiwan@google.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
include/net/ip6_fib.h | 3 ++-
net/ipv6/addrconf.c | 2 +-
net/ipv6/ip6_fib.c | 30 +++++++++++++++++++++++-------
net/ipv6/route.c | 5 +++--
4 files changed, 29 insertions(+), 11 deletions(-)
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 11a79ef87a28..4497a1eb4d41 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -357,7 +357,8 @@ struct fib6_node *fib6_lookup(struct fib6_node *root,
struct fib6_node *fib6_locate(struct fib6_node *root,
const struct in6_addr *daddr, int dst_len,
- const struct in6_addr *saddr, int src_len);
+ const struct in6_addr *saddr, int src_len,
+ bool exact_match);
void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg),
void *arg);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 837418ff2d4b..3ccaf52824c9 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2322,7 +2322,7 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
return NULL;
read_lock_bh(&table->tb6_lock);
- fn = fib6_locate(&table->tb6_root, pfx, plen, NULL, 0);
+ fn = fib6_locate(&table->tb6_root, pfx, plen, NULL, 0, true);
if (!fn)
goto out;
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 3afbe50f2779..b3e4cf0962f8 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -1343,14 +1343,21 @@ struct fib6_node *fib6_lookup(struct fib6_node *root, const struct in6_addr *dad
/*
* Get node with specified destination prefix (and source prefix,
* if subtrees are used)
+ * exact_match == true means we try to find fn with exact match of
+ * the passed in prefix addr
+ * exact_match == false means we try to find fn with longest prefix
+ * match of the passed in prefix addr. This is useful for finding fn
+ * for cached route as it will be stored in the exception table under
+ * the node with longest prefix length.
*/
static struct fib6_node *fib6_locate_1(struct fib6_node *root,
const struct in6_addr *addr,
- int plen, int offset)
+ int plen, int offset,
+ bool exact_match)
{
- struct fib6_node *fn;
+ struct fib6_node *fn, *prev = NULL;
for (fn = root; fn ; ) {
struct rt6key *key = (struct rt6key *)((u8 *)fn->leaf + offset);
@@ -1360,11 +1367,13 @@ static struct fib6_node *fib6_locate_1(struct fib6_node *root,
*/
if (plen < fn->fn_bit ||
!ipv6_prefix_equal(&key->addr, addr, fn->fn_bit))
- return NULL;
+ goto out;
if (plen == fn->fn_bit)
return fn;
+ prev = fn;
+
/*
* We have more bits to go
*/
@@ -1373,24 +1382,31 @@ static struct fib6_node *fib6_locate_1(struct fib6_node *root,
else
fn = fn->left;
}
- return NULL;
+out:
+ if (exact_match)
+ return NULL;
+ else
+ return prev;
}
struct fib6_node *fib6_locate(struct fib6_node *root,
const struct in6_addr *daddr, int dst_len,
- const struct in6_addr *saddr, int src_len)
+ const struct in6_addr *saddr, int src_len,
+ bool exact_match)
{
struct fib6_node *fn;
fn = fib6_locate_1(root, daddr, dst_len,
- offsetof(struct rt6_info, rt6i_dst));
+ offsetof(struct rt6_info, rt6i_dst),
+ exact_match);
#ifdef CONFIG_IPV6_SUBTREES
if (src_len) {
WARN_ON(saddr == NULL);
if (fn && fn->subtree)
fn = fib6_locate_1(fn->subtree, saddr, src_len,
- offsetof(struct rt6_info, rt6i_src));
+ offsetof(struct rt6_info, rt6i_src),
+ exact_match);
}
#endif
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index d2dd55f58b5d..855b4ceec349 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2800,7 +2800,8 @@ static int ip6_route_del(struct fib6_config *cfg,
fn = fib6_locate(&table->tb6_root,
&cfg->fc_dst, cfg->fc_dst_len,
- &cfg->fc_src, cfg->fc_src_len);
+ &cfg->fc_src, cfg->fc_src_len,
+ true);
if (fn) {
for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
@@ -3009,7 +3010,7 @@ static struct rt6_info *rt6_get_route_info(struct net *net,
return NULL;
read_lock_bh(&table->tb6_lock);
- fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
+ fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true);
if (!fn)
goto out;
--
2.14.2.920.gcf0c67979c-goog
^ permalink raw reply related
* [PATCH net-next 08/16] ipv6: hook up exception table to store dst cache
From: Wei Wang @ 2017-10-06 19:06 UTC (permalink / raw)
To: David Miller, netdev; +Cc: Eric Dumazet, Martin KaFai Lau, Wei Wang
In-Reply-To: <20171006190611.110633-8-tracywwnj@gmail.com>
From: Wei Wang <weiwan@google.com>
This commit makes use of the exception hash table implementation to
store dst caches created by pmtu discovery and ip redirect into the hash
table under the rt_info and no longer inserts these routes into fib6
tree.
This makes the fib6 tree only contain static configured routes and could
now be protected by rcu instead of a rw lock.
With this change, in the route lookup related functions, after finding
the rt6_info with the longest prefix, we also need to search for the
exception table before doing backtracking.
In the route delete function, if the route being deleted is not a dst
cache, deletion of this route also need to flush the whole hash table
under it. If it is a dst cache, then only delete the cached dst in the
hash table.
Note: for fib6_walk_continue() function, w->root now is always pointing
to a root node considering that fib6_prune_clones() is removed from the
code. So we add a WARN_ON() msg to make sure w->root always points to a
root node and also removed the update of w->root in fib6_repair_tree().
This is a prerequisite for later patch because we don't need to make
w->root as rcu protected when replacing rwlock with RCU.
Also, we remove all prune related variables as it is no longer used.
Signed-off-by: Wei Wang <weiwan@google.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
include/net/ip6_fib.h | 1 -
net/ipv6/addrconf.c | 1 -
net/ipv6/ip6_fib.c | 95 ++++++++------------------------------------
net/ipv6/route.c | 108 +++++++++++++++++++++++++-------------------------
4 files changed, 72 insertions(+), 133 deletions(-)
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 4497a1eb4d41..d0b7283073e3 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -280,7 +280,6 @@ struct fib6_walker {
struct fib6_node *root, *node;
struct rt6_info *leaf;
enum fib6_walk_state state;
- bool prune;
unsigned int skip;
unsigned int count;
int (*func)(struct fib6_walker *);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 3ccaf52824c9..873afafddfc4 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2326,7 +2326,6 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
if (!fn)
goto out;
- noflags |= RTF_CACHE;
for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
if (rt->dst.dev->ifindex != dev->ifindex)
continue;
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index b3e4cf0962f8..9c8e704e6af7 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -54,7 +54,6 @@ struct fib6_cleaner {
#define FWS_INIT FWS_L
#endif
-static void fib6_prune_clones(struct net *net, struct fib6_node *fn);
static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn);
static struct fib6_node *fib6_repair_tree(struct net *net, struct fib6_node *fn);
static int fib6_walk(struct net *net, struct fib6_walker *w);
@@ -1101,6 +1100,8 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
if (WARN_ON_ONCE(!atomic_read(&rt->dst.__refcnt)))
return -EINVAL;
+ if (WARN_ON_ONCE(rt->rt6i_flags & RTF_CACHE))
+ return -EINVAL;
if (info->nlh) {
if (!(info->nlh->nlmsg_flags & NLM_F_CREATE))
@@ -1192,11 +1193,8 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
#endif
err = fib6_add_rt2node(fn, rt, info, mxc);
- if (!err) {
+ if (!err)
fib6_start_gc(info->nl_net, rt);
- if (!(rt->rt6i_flags & RTF_CACHE))
- fib6_prune_clones(info->nl_net, pn);
- }
out:
if (err) {
@@ -1511,19 +1509,12 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
read_lock(&net->ipv6.fib6_walker_lock);
FOR_WALKERS(net, w) {
if (!child) {
- if (w->root == fn) {
- w->root = w->node = NULL;
- RT6_TRACE("W %p adjusted by delroot 1\n", w);
- } else if (w->node == fn) {
+ if (w->node == fn) {
RT6_TRACE("W %p adjusted by delnode 1, s=%d/%d\n", w, w->state, nstate);
w->node = pn;
w->state = nstate;
}
} else {
- if (w->root == fn) {
- w->root = child;
- RT6_TRACE("W %p adjusted by delroot 2\n", w);
- }
if (w->node == fn) {
w->node = child;
if (children&2) {
@@ -1557,12 +1548,17 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
RT6_TRACE("fib6_del_route\n");
+ WARN_ON_ONCE(rt->rt6i_flags & RTF_CACHE);
+
/* Unlink it */
*rtp = rt->dst.rt6_next;
rt->rt6i_node = NULL;
net->ipv6.rt6_stats->fib_rt_entries--;
net->ipv6.rt6_stats->fib_discarded_routes++;
+ /* Flush all cached dst in exception table */
+ rt6_flush_exceptions(rt);
+
/* Reset round-robin state, if necessary */
if (fn->rr_ptr == rt)
fn->rr_ptr = NULL;
@@ -1625,18 +1621,9 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info)
WARN_ON(!(fn->fn_flags & RTN_RTINFO));
- if (!(rt->rt6i_flags & RTF_CACHE)) {
- struct fib6_node *pn = fn;
-#ifdef CONFIG_IPV6_SUBTREES
- /* clones of this route might be in another subtree */
- if (rt->rt6i_src.plen) {
- while (!(pn->fn_flags & RTN_ROOT))
- pn = pn->parent;
- pn = pn->parent;
- }
-#endif
- fib6_prune_clones(info->nl_net, pn);
- }
+ /* remove cached dst from exception table */
+ if (rt->rt6i_flags & RTF_CACHE)
+ return rt6_remove_exception_rt(rt);
/*
* Walk the leaf entries looking for ourself
@@ -1679,16 +1666,14 @@ static int fib6_walk_continue(struct fib6_walker *w)
{
struct fib6_node *fn, *pn;
+ /* w->root should always be table->tb6_root */
+ WARN_ON_ONCE(!(w->root->fn_flags & RTN_TL_ROOT));
+
for (;;) {
fn = w->node;
if (!fn)
return 0;
- if (w->prune && fn != w->root &&
- fn->fn_flags & RTN_RTINFO && w->state < FWS_C) {
- w->state = FWS_C;
- w->leaf = fn->leaf;
- }
switch (w->state) {
#ifdef CONFIG_IPV6_SUBTREES
case FWS_S:
@@ -1820,20 +1805,16 @@ static int fib6_clean_node(struct fib6_walker *w)
* func is called on each route.
* It may return -1 -> delete this route.
* 0 -> continue walking
- *
- * prune==1 -> only immediate children of node (certainly,
- * ignoring pure split nodes) will be scanned.
*/
static void fib6_clean_tree(struct net *net, struct fib6_node *root,
int (*func)(struct rt6_info *, void *arg),
- bool prune, int sernum, void *arg)
+ int sernum, void *arg)
{
struct fib6_cleaner c;
c.w.root = root;
c.w.func = fib6_clean_node;
- c.w.prune = prune;
c.w.count = 0;
c.w.skip = 0;
c.func = func;
@@ -1858,7 +1839,7 @@ static void __fib6_clean_all(struct net *net,
hlist_for_each_entry_rcu(table, head, tb6_hlist) {
write_lock_bh(&table->tb6_lock);
fib6_clean_tree(net, &table->tb6_root,
- func, false, sernum, arg);
+ func, sernum, arg);
write_unlock_bh(&table->tb6_lock);
}
}
@@ -1871,22 +1852,6 @@ void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *),
__fib6_clean_all(net, func, FIB6_NO_SERNUM_CHANGE, arg);
}
-static int fib6_prune_clone(struct rt6_info *rt, void *arg)
-{
- if (rt->rt6i_flags & RTF_CACHE) {
- RT6_TRACE("pruning clone %p\n", rt);
- return -1;
- }
-
- return 0;
-}
-
-static void fib6_prune_clones(struct net *net, struct fib6_node *fn)
-{
- fib6_clean_tree(net, fn, fib6_prune_clone, true,
- FIB6_NO_SERNUM_CHANGE, NULL);
-}
-
static void fib6_flush_trees(struct net *net)
{
int new_sernum = fib6_new_sernum(net);
@@ -1914,32 +1879,6 @@ static int fib6_age(struct rt6_info *rt, void *arg)
return -1;
}
gc_args->more++;
- /* The following part will soon be removed when the exception
- * table is hooked up to store all cached routes.
- */
- } else if (rt->rt6i_flags & RTF_CACHE) {
- if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout))
- rt->dst.obsolete = DST_OBSOLETE_KILL;
- if (atomic_read(&rt->dst.__refcnt) == 1 &&
- rt->dst.obsolete == DST_OBSOLETE_KILL) {
- RT6_TRACE("aging clone %p\n", rt);
- return -1;
- } else if (rt->rt6i_flags & RTF_GATEWAY) {
- struct neighbour *neigh;
- __u8 neigh_flags = 0;
-
- neigh = dst_neigh_lookup(&rt->dst, &rt->rt6i_gateway);
- if (neigh) {
- neigh_flags = neigh->flags;
- neigh_release(neigh);
- }
- if (!(neigh_flags & NTF_ROUTER)) {
- RT6_TRACE("purging route %p via non-router but gateway\n",
- rt);
- return -1;
- }
- }
- gc_args->more++;
}
/* Also age clones in the exception table.
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 855b4ceec349..65130dde276a 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -878,8 +878,8 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net,
struct fib6_table *table,
struct flowi6 *fl6, int flags)
{
+ struct rt6_info *rt, *rt_cache;
struct fib6_node *fn;
- struct rt6_info *rt;
read_lock_bh(&table->tb6_lock);
fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
@@ -893,6 +893,11 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net,
if (fn)
goto restart;
}
+ /* Search through exception table */
+ rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr);
+ if (rt_cache)
+ rt = rt_cache;
+
dst_use(&rt->dst, jiffies);
read_unlock_bh(&table->tb6_lock);
@@ -1592,7 +1597,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
int oif, struct flowi6 *fl6, int flags)
{
struct fib6_node *fn, *saved_fn;
- struct rt6_info *rt;
+ struct rt6_info *rt, *rt_cache;
int strict = 0;
strict |= flags & RT6_LOOKUP_F_IFACE;
@@ -1624,6 +1629,10 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
}
}
+ /*Search through exception table */
+ rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr);
+ if (rt_cache)
+ rt = rt_cache;
if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
dst_use(&rt->dst, jiffies);
@@ -1988,23 +1997,17 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
if (!rt6_cache_allowed_for_pmtu(rt6)) {
rt6_do_update_pmtu(rt6, mtu);
+ /* update rt6_ex->stamp for cache */
+ if (rt6->rt6i_flags & RTF_CACHE)
+ rt6_update_exception_stamp_rt(rt6);
} else if (daddr) {
struct rt6_info *nrt6;
nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
if (nrt6) {
rt6_do_update_pmtu(nrt6, mtu);
-
- /* ip6_ins_rt(nrt6) will bump the
- * rt6->rt6i_node->fn_sernum
- * which will fail the next rt6_check() and
- * invalidate the sk->sk_dst_cache.
- */
- ip6_ins_rt(nrt6);
- /* Release the reference taken in
- * ip6_rt_cache_alloc()
- */
- dst_release(&nrt6->dst);
+ if (rt6_insert_exception(nrt6, rt6))
+ dst_release_immediate(&nrt6->dst);
}
}
}
@@ -2068,7 +2071,7 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
int flags)
{
struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
- struct rt6_info *rt;
+ struct rt6_info *rt, *rt_cache;
struct fib6_node *fn;
/* Get the "current" route for this destination and
@@ -2093,8 +2096,23 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
continue;
if (fl6->flowi6_oif != rt->dst.dev->ifindex)
continue;
- if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
+ /* rt_cache's gateway might be different from its 'parent'
+ * in the case of an ip redirect.
+ * So we keep searching in the exception table if the gateway
+ * is different.
+ */
+ if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway)) {
+ rt_cache = rt6_find_cached_rt(rt,
+ &fl6->daddr,
+ &fl6->saddr);
+ if (rt_cache &&
+ ipv6_addr_equal(&rdfl->gateway,
+ &rt_cache->rt6i_gateway)) {
+ rt = rt_cache;
+ break;
+ }
continue;
+ }
break;
}
@@ -2785,9 +2803,9 @@ static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
static int ip6_route_del(struct fib6_config *cfg,
struct netlink_ext_ack *extack)
{
+ struct rt6_info *rt, *rt_cache;
struct fib6_table *table;
struct fib6_node *fn;
- struct rt6_info *rt;
int err = -ESRCH;
table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
@@ -2801,13 +2819,17 @@ static int ip6_route_del(struct fib6_config *cfg,
fn = fib6_locate(&table->tb6_root,
&cfg->fc_dst, cfg->fc_dst_len,
&cfg->fc_src, cfg->fc_src_len,
- true);
+ !(cfg->fc_flags & RTF_CACHE));
if (fn) {
for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
- if ((rt->rt6i_flags & RTF_CACHE) &&
- !(cfg->fc_flags & RTF_CACHE))
- continue;
+ if (cfg->fc_flags & RTF_CACHE) {
+ rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst,
+ &cfg->fc_src);
+ if (!rt_cache)
+ continue;
+ rt = rt_cache;
+ }
if (cfg->fc_ifindex &&
(!rt->dst.dev ||
rt->dst.dev->ifindex != cfg->fc_ifindex))
@@ -2933,8 +2955,14 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
nrt->rt6i_protocol = RTPROT_REDIRECT;
nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
- if (ip6_ins_rt(nrt))
- goto out_release;
+ /* No need to remove rt from the exception table if rt is
+ * a cached route because rt6_insert_exception() will
+ * takes care of it
+ */
+ if (rt6_insert_exception(nrt, rt)) {
+ dst_release_immediate(&nrt->dst);
+ goto out;
+ }
netevent.old = &rt->dst;
netevent.new = &nrt->dst;
@@ -2942,17 +2970,6 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
netevent.neigh = neigh;
call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
- if (rt->rt6i_flags & RTF_CACHE) {
- rt = (struct rt6_info *) dst_clone(&rt->dst);
- ip6_del_rt(rt);
- }
-
-out_release:
- /* Release the reference taken in
- * ip6_rt_cache_alloc()
- */
- dst_release(&nrt->dst);
-
out:
neigh_release(neigh);
}
@@ -3344,12 +3361,8 @@ static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
{
struct in6_addr *gateway = (struct in6_addr *)arg;
- /* RTF_CACHE_GATEWAY case will be removed once the exception
- * table is hooked up to store all cached routes.
- */
- if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
- ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
- ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
+ if (((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
+ ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
return -1;
}
@@ -3438,20 +3451,9 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
dst_metric_raw(&rt->dst, RTAX_MTU) &&
!dst_metric_locked(&rt->dst, RTAX_MTU)) {
spin_lock_bh(&rt6_exception_lock);
- /* This case will be removed once the exception table
- * is hooked up.
- */
- if (rt->rt6i_flags & RTF_CACHE) {
- /* For RTF_CACHE with rt6i_pmtu == 0
- * (i.e. a redirected route),
- * the metrics of its rt->dst.from has already
- * been updated.
- */
- if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
- rt->rt6i_pmtu = arg->mtu;
- } else if (dst_mtu(&rt->dst) >= arg->mtu ||
- (dst_mtu(&rt->dst) < arg->mtu &&
- dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
+ if (dst_mtu(&rt->dst) >= arg->mtu ||
+ (dst_mtu(&rt->dst) < arg->mtu &&
+ dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
}
rt6_exceptions_update_pmtu(rt, arg->mtu);
--
2.14.2.920.gcf0c67979c-goog
^ permalink raw reply related
* [PATCH net-next 09/16] ipv6: grab rt->rt6i_ref before allocating pcpu rt
From: Wei Wang @ 2017-10-06 19:06 UTC (permalink / raw)
To: David Miller, netdev; +Cc: Eric Dumazet, Martin KaFai Lau, Wei Wang
In-Reply-To: <20171006190611.110633-9-tracywwnj@gmail.com>
From: Wei Wang <weiwan@google.com>
After rwlock is replaced with rcu and spinlock, ip6_pol_route() will be
called with only rcu held. That means rt6 route deletion could happen
simultaneously with rt6_make_pcpu_rt(). This could potentially cause
memory leak if rt6_release() is called right before rt6_make_pcpu_rt()
on the same route.
This patch grabs rt->rt6i_ref safely before calling rt6_make_pcpu_rt()
to make sure rt6_release() will not get triggered while
rt6_make_pcpu_rt() is in progress. And rt6_release() is called after
rt6_make_pcpu_rt() is finished.
Note: As we are incrementing rt->rt6i_ref in ip6_pol_route(), there is a
very slim chance that fib6_purge_rt() will be triggered unnecessarily
when deleting a route if ip6_pol_route() running on another thread picks
this route as well and tries to make pcpu cache for it.
Signed-off-by: Wei Wang <weiwan@google.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
net/ipv6/route.c | 58 ++++++++++++++++++++++++++++----------------------------
1 file changed, 29 insertions(+), 29 deletions(-)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 65130dde276a..941c062389d2 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1070,7 +1070,6 @@ static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
{
- struct fib6_table *table = rt->rt6i_table;
struct rt6_info *pcpu_rt, *prev, **p;
pcpu_rt = ip6_rt_pcpu_alloc(rt);
@@ -1081,28 +1080,20 @@ static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
return net->ipv6.ip6_null_entry;
}
- read_lock_bh(&table->tb6_lock);
- if (rt->rt6i_pcpu) {
- p = this_cpu_ptr(rt->rt6i_pcpu);
- prev = cmpxchg(p, NULL, pcpu_rt);
- if (prev) {
- /* If someone did it before us, return prev instead */
- dst_release_immediate(&pcpu_rt->dst);
- pcpu_rt = prev;
- }
- } else {
- /* rt has been removed from the fib6 tree
- * before we have a chance to acquire the read_lock.
- * In this case, don't brother to create a pcpu rt
- * since rt is going away anyway. The next
- * dst_check() will trigger a re-lookup.
- */
+ dst_hold(&pcpu_rt->dst);
+ p = this_cpu_ptr(rt->rt6i_pcpu);
+ prev = cmpxchg(p, NULL, pcpu_rt);
+ if (prev) {
+ /* If someone did it before us, return prev instead */
+ /* release refcnt taken by ip6_rt_pcpu_alloc() */
+ dst_release_immediate(&pcpu_rt->dst);
+ /* release refcnt taken by above dst_hold() */
dst_release_immediate(&pcpu_rt->dst);
- pcpu_rt = rt;
+ dst_hold(&prev->dst);
+ pcpu_rt = prev;
}
- dst_hold(&pcpu_rt->dst);
+
rt6_dst_from_metrics_check(pcpu_rt);
- read_unlock_bh(&table->tb6_lock);
return pcpu_rt;
}
@@ -1683,19 +1674,28 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
if (pcpu_rt) {
read_unlock_bh(&table->tb6_lock);
} else {
- /* We have to do the read_unlock first
- * because rt6_make_pcpu_route() may trigger
- * ip6_dst_gc() which will take the write_lock.
- */
- dst_hold(&rt->dst);
- read_unlock_bh(&table->tb6_lock);
- pcpu_rt = rt6_make_pcpu_route(rt);
- dst_release(&rt->dst);
+ /* atomic_inc_not_zero() is needed when using rcu */
+ if (atomic_inc_not_zero(&rt->rt6i_ref)) {
+ /* We have to do the read_unlock first
+ * because rt6_make_pcpu_route() may trigger
+ * ip6_dst_gc() which will take the write_lock.
+ *
+ * No dst_hold() on rt is needed because grabbing
+ * rt->rt6i_ref makes sure rt can't be released.
+ */
+ read_unlock_bh(&table->tb6_lock);
+ pcpu_rt = rt6_make_pcpu_route(rt);
+ rt6_release(rt);
+ } else {
+ /* rt is already removed from tree */
+ read_unlock_bh(&table->tb6_lock);
+ pcpu_rt = net->ipv6.ip6_null_entry;
+ dst_hold(&pcpu_rt->dst);
+ }
}
trace_fib6_table_lookup(net, pcpu_rt, table->tb6_id, fl6);
return pcpu_rt;
-
}
}
EXPORT_SYMBOL_GPL(ip6_pol_route);
--
2.14.2.920.gcf0c67979c-goog
^ permalink raw reply related
* [PATCH net-next 10/16] ipv6: don't release rt->rt6i_pcpu memory during rt6_release()
From: Wei Wang @ 2017-10-06 19:06 UTC (permalink / raw)
To: David Miller, netdev; +Cc: Eric Dumazet, Martin KaFai Lau, Wei Wang
In-Reply-To: <20171006190611.110633-10-tracywwnj@gmail.com>
From: Wei Wang <weiwan@google.com>
After rwlock is replaced with rcu and spinlock, route lookup can happen
simultanously with route deletion.
This patch removes the call to free_percpu(rt->rt6i_pcpu) from
rt6_release() to avoid the race condition between rt6_release() and
rt6_get_pcpu_route(). And as free_percpu(rt->rt6i_pcpu) is already
called in ip6_dst_destroy() after the rcu grace period, it is safe to do
this change.
Signed-off-by: Wei Wang <weiwan@google.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
net/ipv6/ip6_fib.c | 3 ---
1 file changed, 3 deletions(-)
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 9c8e704e6af7..eee392f7b1f6 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -190,9 +190,6 @@ void rt6_free_pcpu(struct rt6_info *non_pcpu_rt)
*ppcpu_rt = NULL;
}
}
-
- free_percpu(non_pcpu_rt->rt6i_pcpu);
- non_pcpu_rt->rt6i_pcpu = NULL;
}
EXPORT_SYMBOL_GPL(rt6_free_pcpu);
--
2.14.2.920.gcf0c67979c-goog
^ permalink raw reply related
* [PATCH net-next 11/16] ipv6: replace dst_hold() with dst_hold_safe() in routing code
From: Wei Wang @ 2017-10-06 19:06 UTC (permalink / raw)
To: David Miller, netdev; +Cc: Eric Dumazet, Martin KaFai Lau, Wei Wang
In-Reply-To: <20171006190611.110633-11-tracywwnj@gmail.com>
From: Wei Wang <weiwan@google.com>
With rwlock, it is safe to call dst_hold() in the read thread because
read thread is guaranteed to be separated from write thread.
However, after we replace rwlock with rcu, it is no longer safe to use
dst_hold(). A dst might already have been deleted but is waiting for the
rcu grace period to pass before freeing the memory when a read thread is
trying to do dst_hold(). This could potentially cause double free issue.
So this commit replaces all dst_hold() with dst_hold_safe() in all read
thread to avoid this double free issue.
And in order to make the code more compact, a new function ip6_hold_safe()
is introduced. It calls dst_hold_safe() first, and if that fails, it will
either fall back to hold and return net->ipv6.ip6_null_entry or set rt to
NULL according to the caller's need.
Signed-off-by: Wei Wang <weiwan@google.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
net/ipv6/addrconf.c | 3 ++-
net/ipv6/route.c | 71 +++++++++++++++++++++++++++++++++++++++--------------
2 files changed, 54 insertions(+), 20 deletions(-)
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 873afafddfc4..f86e931d555e 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2333,7 +2333,8 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
continue;
if ((rt->rt6i_flags & noflags) != 0)
continue;
- dst_hold(&rt->dst);
+ if (!dst_hold_safe(&rt->dst))
+ rt = NULL;
break;
}
out:
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 941c062389d2..aeb349aea429 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -874,6 +874,23 @@ static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
}
}
+static bool ip6_hold_safe(struct net *net, struct rt6_info **prt,
+ bool null_fallback)
+{
+ struct rt6_info *rt = *prt;
+
+ if (dst_hold_safe(&rt->dst))
+ return true;
+ if (null_fallback) {
+ rt = net->ipv6.ip6_null_entry;
+ dst_hold(&rt->dst);
+ } else {
+ rt = NULL;
+ }
+ *prt = rt;
+ return false;
+}
+
static struct rt6_info *ip6_pol_route_lookup(struct net *net,
struct fib6_table *table,
struct flowi6 *fl6, int flags)
@@ -898,7 +915,9 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net,
if (rt_cache)
rt = rt_cache;
- dst_use(&rt->dst, jiffies);
+ if (ip6_hold_safe(net, &rt, true))
+ dst_use_noref(&rt->dst, jiffies);
+
read_unlock_bh(&table->tb6_lock);
trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
@@ -1061,10 +1080,9 @@ static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
p = this_cpu_ptr(rt->rt6i_pcpu);
pcpu_rt = *p;
- if (pcpu_rt) {
- dst_hold(&pcpu_rt->dst);
+ if (pcpu_rt && ip6_hold_safe(NULL, &pcpu_rt, false))
rt6_dst_from_metrics_check(pcpu_rt);
- }
+
return pcpu_rt;
}
@@ -1625,12 +1643,17 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
if (rt_cache)
rt = rt_cache;
- if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
- dst_use(&rt->dst, jiffies);
+ if (rt == net->ipv6.ip6_null_entry) {
+ read_unlock_bh(&table->tb6_lock);
+ dst_hold(&rt->dst);
+ trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
+ return rt;
+ } else if (rt->rt6i_flags & RTF_CACHE) {
+ if (ip6_hold_safe(net, &rt, true)) {
+ dst_use_noref(&rt->dst, jiffies);
+ rt6_dst_from_metrics_check(rt);
+ }
read_unlock_bh(&table->tb6_lock);
-
- rt6_dst_from_metrics_check(rt);
-
trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
return rt;
} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
@@ -1643,7 +1666,13 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
struct rt6_info *uncached_rt;
- dst_use(&rt->dst, jiffies);
+ if (ip6_hold_safe(net, &rt, true)) {
+ dst_use_noref(&rt->dst, jiffies);
+ } else {
+ read_unlock_bh(&table->tb6_lock);
+ uncached_rt = rt;
+ goto uncached_rt_out;
+ }
read_unlock_bh(&table->tb6_lock);
uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
@@ -1659,6 +1688,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
dst_hold(&uncached_rt->dst);
}
+uncached_rt_out:
trace_fib6_table_lookup(net, uncached_rt, table->tb6_id, fl6);
return uncached_rt;
@@ -1667,8 +1697,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
struct rt6_info *pcpu_rt;
- rt->dst.lastuse = jiffies;
- rt->dst.__use++;
+ dst_use_noref(&rt->dst, jiffies);
pcpu_rt = rt6_get_pcpu_route(rt);
if (pcpu_rt) {
@@ -2130,7 +2159,7 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
}
out:
- dst_hold(&rt->dst);
+ ip6_hold_safe(net, &rt, true);
read_unlock_bh(&table->tb6_lock);
@@ -2841,7 +2870,8 @@ static int ip6_route_del(struct fib6_config *cfg,
continue;
if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol)
continue;
- dst_hold(&rt->dst);
+ if (!dst_hold_safe(&rt->dst))
+ break;
read_unlock_bh(&table->tb6_lock);
/* if gateway was specified only delete the one hop */
@@ -3038,7 +3068,7 @@ static struct rt6_info *rt6_get_route_info(struct net *net,
continue;
if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
continue;
- dst_hold(&rt->dst);
+ ip6_hold_safe(NULL, &rt, false);
break;
}
out:
@@ -3096,7 +3126,7 @@ struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_dev
break;
}
if (rt)
- dst_hold(&rt->dst);
+ ip6_hold_safe(NULL, &rt, false);
read_unlock_bh(&table->tb6_lock);
return rt;
}
@@ -3139,9 +3169,12 @@ static void __rt6_purge_dflt_routers(struct fib6_table *table)
for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
(!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
- dst_hold(&rt->dst);
- read_unlock_bh(&table->tb6_lock);
- ip6_del_rt(rt);
+ if (dst_hold_safe(&rt->dst)) {
+ read_unlock_bh(&table->tb6_lock);
+ ip6_del_rt(rt);
+ } else {
+ read_unlock_bh(&table->tb6_lock);
+ }
goto restart;
}
}
--
2.14.2.920.gcf0c67979c-goog
^ permalink raw reply related
* [PATCH net-next 12/16] ipv6: update fn_sernum after route is inserted to tree
From: Wei Wang @ 2017-10-06 19:06 UTC (permalink / raw)
To: David Miller, netdev; +Cc: Eric Dumazet, Martin KaFai Lau, Wei Wang
In-Reply-To: <20171006190611.110633-12-tracywwnj@gmail.com>
From: Wei Wang <weiwan@google.com>
fib6_add() logic currently calls fib6_add_1() to figure out what node
should be used for the newly added route and then call
fib6_add_rt2node() to insert the route to the node.
And during the call of fib6_add_1(), fn_sernum is updated for all nodes
that share the same prefix as the new route.
This does not have issue in the current code because reader thread will
not be able to access the tree while writer thread is inserting new
route to it. However, it is not the case once we transition to use RCU.
Reader thread could potentially see the new fn_sernum before the new
route is inserted. As a result, reader thread's route lookup will return
a stale route with the new fn_sernum.
In order to solve this issue, we remove all the update of fn_sernum in
fib6_add_1(), and instead, introduce a new function that updates fn_sernum
for all related nodes and call this functions once the route is
successfully inserted to the tree.
Also, smp_wmb() is used after a route is successfully inserted into the
fib tree and right before the updated of fn->sernum. And smp_rmb() is
used right after fn->sernum is accessed in rt6_get_cookie_safe(). This
is to guarantee that when the reader thread sees the new fn->sernum, the
new route is already inserted in the tree in memory.
Signed-off-by: Wei Wang <weiwan@google.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
include/net/ip6_fib.h | 2 ++
net/ipv6/ip6_fib.c | 39 +++++++++++++++++++++------------------
2 files changed, 23 insertions(+), 18 deletions(-)
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index d0b7283073e3..6bf929b50951 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -220,6 +220,8 @@ static inline bool rt6_get_cookie_safe(const struct rt6_info *rt,
if (fn) {
*cookie = fn->fn_sernum;
+ /* pairs with smp_wmb() in fib6_update_sernum_upto_root() */
+ smp_rmb();
status = true;
}
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index eee392f7b1f6..f604b311cc3e 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -585,7 +585,7 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
static struct fib6_node *fib6_add_1(struct fib6_node *root,
struct in6_addr *addr, int plen,
int offset, int allow_create,
- int replace_required, int sernum,
+ int replace_required,
struct netlink_ext_ack *extack)
{
struct fib6_node *fn, *in, *ln;
@@ -631,8 +631,6 @@ static struct fib6_node *fib6_add_1(struct fib6_node *root,
fn->leaf = NULL;
}
- fn->fn_sernum = sernum;
-
return fn;
}
@@ -641,7 +639,6 @@ static struct fib6_node *fib6_add_1(struct fib6_node *root,
*/
/* Try to walk down on tree. */
- fn->fn_sernum = sernum;
dir = addr_bit_set(addr, fn->fn_bit);
pn = fn;
fn = dir ? fn->right : fn->left;
@@ -677,7 +674,6 @@ static struct fib6_node *fib6_add_1(struct fib6_node *root,
ln->fn_bit = plen;
ln->parent = pn;
- ln->fn_sernum = sernum;
if (dir)
pn->right = ln;
@@ -737,8 +733,6 @@ static struct fib6_node *fib6_add_1(struct fib6_node *root,
in->leaf = fn->leaf;
atomic_inc(&in->leaf->rt6i_ref);
- in->fn_sernum = sernum;
-
/* update parent pointer */
if (dir)
pn->right = in;
@@ -750,8 +744,6 @@ static struct fib6_node *fib6_add_1(struct fib6_node *root,
ln->parent = in;
fn->parent = in;
- ln->fn_sernum = sernum;
-
if (addr_bit_set(addr, bit)) {
in->right = ln;
in->left = fn;
@@ -776,8 +768,6 @@ static struct fib6_node *fib6_add_1(struct fib6_node *root,
ln->parent = pn;
- ln->fn_sernum = sernum;
-
if (dir)
pn->right = ln;
else
@@ -1079,6 +1069,20 @@ void fib6_force_start_gc(struct net *net)
jiffies + net->ipv6.sysctl.ip6_rt_gc_interval);
}
+static void fib6_update_sernum_upto_root(struct rt6_info *rt,
+ int sernum)
+{
+ struct fib6_node *fn = rcu_dereference_protected(rt->rt6i_node,
+ lockdep_is_held(&rt->rt6i_table->tb6_lock));
+
+ /* paired with smp_rmb() in rt6_get_cookie_safe() */
+ smp_wmb();
+ while (fn) {
+ fn->fn_sernum = sernum;
+ fn = fn->parent;
+ }
+}
+
/*
* Add routing information to the routing tree.
* <destination addr>/<source addr>
@@ -1111,7 +1115,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
fn = fib6_add_1(root, &rt->rt6i_dst.addr, rt->rt6i_dst.plen,
offsetof(struct rt6_info, rt6i_dst), allow_create,
- replace_required, sernum, extack);
+ replace_required, extack);
if (IS_ERR(fn)) {
err = PTR_ERR(fn);
fn = NULL;
@@ -1145,15 +1149,13 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
sfn->leaf = info->nl_net->ipv6.ip6_null_entry;
atomic_inc(&info->nl_net->ipv6.ip6_null_entry->rt6i_ref);
sfn->fn_flags = RTN_ROOT;
- sfn->fn_sernum = sernum;
/* Now add the first leaf node to new subtree */
sn = fib6_add_1(sfn, &rt->rt6i_src.addr,
rt->rt6i_src.plen,
offsetof(struct rt6_info, rt6i_src),
- allow_create, replace_required, sernum,
- extack);
+ allow_create, replace_required, extack);
if (IS_ERR(sn)) {
/* If it is failed, discard just allocated
@@ -1172,8 +1174,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
sn = fib6_add_1(fn->subtree, &rt->rt6i_src.addr,
rt->rt6i_src.plen,
offsetof(struct rt6_info, rt6i_src),
- allow_create, replace_required, sernum,
- extack);
+ allow_create, replace_required, extack);
if (IS_ERR(sn)) {
err = PTR_ERR(sn);
@@ -1190,8 +1191,10 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
#endif
err = fib6_add_rt2node(fn, rt, info, mxc);
- if (!err)
+ if (!err) {
+ fib6_update_sernum_upto_root(rt, sernum);
fib6_start_gc(info->nl_net, rt);
+ }
out:
if (err) {
--
2.14.2.920.gcf0c67979c-goog
^ permalink raw reply related
* [PATCH net-next 14/16] ipv6: add key length check into rt6_select()
From: Wei Wang @ 2017-10-06 19:06 UTC (permalink / raw)
To: David Miller, netdev; +Cc: Eric Dumazet, Martin KaFai Lau, Wei Wang
In-Reply-To: <20171006190611.110633-14-tracywwnj@gmail.com>
From: Wei Wang <weiwan@google.com>
After rwlock is replaced with rcu and spinlock, fib6_lookup() could
potentially return an intermediate node if other thread is doing
fib6_del() on a route which is the only route on the node so that
fib6_repair_tree() will be called on this node and potentially assigns
fn->leaf to the its child's fn->leaf.
In order to detect this situation in rt6_select(), we have to check if
fn->fn_bit is consistent with the key length stored in the route. And
depending on if the fn is in the subtree or not, the key is either
rt->rt6i_dst or rt->rt6i_src.
If any inconsistency is found, that means the node no longer holds valid
routes in it. So net->ipv6.ip6_null_entry is returned.
Signed-off-by: Wei Wang <weiwan@google.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
net/ipv6/route.c | 14 ++++++++++++++
1 file changed, 14 insertions(+)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 05dc450af441..24b80f43bbfb 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -755,6 +755,7 @@ static struct rt6_info *rt6_select(struct net *net, struct fib6_node *fn,
struct rt6_info *leaf = fn->leaf;
struct rt6_info *match, *rt0;
bool do_rr = false;
+ int key_plen;
if (!leaf)
return net->ipv6.ip6_null_entry;
@@ -763,6 +764,19 @@ static struct rt6_info *rt6_select(struct net *net, struct fib6_node *fn,
if (!rt0)
fn->rr_ptr = rt0 = leaf;
+ /* Double check to make sure fn is not an intermediate node
+ * and fn->leaf does not points to its child's leaf
+ * (This might happen if all routes under fn are deleted from
+ * the tree and fib6_repair_tree() is called on the node.)
+ */
+ key_plen = rt0->rt6i_dst.plen;
+#ifdef CONFIG_IPV6_SUBTREES
+ if (rt0->rt6i_src.plen)
+ key_plen = rt0->rt6i_src.plen;
+#endif
+ if (fn->fn_bit != key_plen)
+ return net->ipv6.ip6_null_entry;
+
match = find_rr_leaf(fn, leaf, rt0, rt0->rt6i_metric, oif, strict,
&do_rr);
--
2.14.2.920.gcf0c67979c-goog
^ permalink raw reply related
* [PATCH net-next 13/16] ipv6: check fn->leaf before it is used
From: Wei Wang @ 2017-10-06 19:06 UTC (permalink / raw)
To: David Miller, netdev; +Cc: Eric Dumazet, Martin KaFai Lau, Wei Wang
In-Reply-To: <20171006190611.110633-13-tracywwnj@gmail.com>
From: Wei Wang <weiwan@google.com>
If rwlock is replaced with rcu and spinlock, it is possible that the
reader thread will see fn->leaf as NULL in the following scenarios:
1. fib6_add() is in progress and we have already inserted a new node but
not yet inserted the route.
2. fib6_del_route() is in progress and we have already set fn->leaf to
NULL but not yet freed the node because of rcu grace period.
This patch makes sure all the reader threads check fn->leaf first before
using it. And together with later patch to grab rcu_read_lock() and
rcu_dereference() fn->leaf, it makes sure reader threads are safe when
accessing fn->leaf.
Signed-off-by: Wei Wang <weiwan@google.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
net/ipv6/ip6_fib.c | 23 ++++++++++++++++++-----
net/ipv6/route.c | 20 ++++++++++++--------
2 files changed, 30 insertions(+), 13 deletions(-)
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index f604b311cc3e..cf6137e81408 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -1279,10 +1279,13 @@ static struct fib6_node *fib6_lookup_1(struct fib6_node *root,
while (fn) {
if (FIB6_SUBTREE(fn) || fn->fn_flags & RTN_RTINFO) {
+ struct rt6_info *leaf = fn->leaf;
struct rt6key *key;
- key = (struct rt6key *) ((u8 *) fn->leaf +
- args->offset);
+ if (!leaf)
+ goto backtrack;
+
+ key = (struct rt6key *) ((u8 *)leaf + args->offset);
if (ipv6_prefix_equal(&key->addr, args->addr, key->plen)) {
#ifdef CONFIG_IPV6_SUBTREES
@@ -1299,9 +1302,7 @@ static struct fib6_node *fib6_lookup_1(struct fib6_node *root,
return fn;
}
}
-#ifdef CONFIG_IPV6_SUBTREES
backtrack:
-#endif
if (fn->fn_flags & RTN_ROOT)
break;
@@ -1358,7 +1359,18 @@ static struct fib6_node *fib6_locate_1(struct fib6_node *root,
struct fib6_node *fn, *prev = NULL;
for (fn = root; fn ; ) {
- struct rt6key *key = (struct rt6key *)((u8 *)fn->leaf + offset);
+ struct rt6_info *leaf = fn->leaf;
+ struct rt6key *key;
+
+ /* This node is being deleted */
+ if (!leaf) {
+ if (plen <= fn->fn_bit)
+ goto out;
+ else
+ goto next;
+ }
+
+ key = (struct rt6key *)((u8 *)leaf + offset);
/*
* Prefix match
@@ -1372,6 +1384,7 @@ static struct fib6_node *fib6_locate_1(struct fib6_node *root,
prev = fn;
+next:
/*
* We have more bits to go
*/
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index aeb349aea429..05dc450af441 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -712,6 +712,7 @@ static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
}
static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
+ struct rt6_info *leaf,
struct rt6_info *rr_head,
u32 metric, int oif, int strict,
bool *do_rr)
@@ -730,7 +731,7 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
match = find_match(rt, oif, strict, &mpri, match, do_rr);
}
- for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
+ for (rt = leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
if (rt->rt6i_metric != metric) {
cont = rt;
break;
@@ -748,17 +749,21 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
return match;
}
-static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
+static struct rt6_info *rt6_select(struct net *net, struct fib6_node *fn,
+ int oif, int strict)
{
+ struct rt6_info *leaf = fn->leaf;
struct rt6_info *match, *rt0;
- struct net *net;
bool do_rr = false;
+ if (!leaf)
+ return net->ipv6.ip6_null_entry;
+
rt0 = fn->rr_ptr;
if (!rt0)
- fn->rr_ptr = rt0 = fn->leaf;
+ fn->rr_ptr = rt0 = leaf;
- match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
+ match = find_rr_leaf(fn, leaf, rt0, rt0->rt6i_metric, oif, strict,
&do_rr);
if (do_rr) {
@@ -766,13 +771,12 @@ static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
/* no entries matched; do round-robin */
if (!next || next->rt6i_metric != rt0->rt6i_metric)
- next = fn->leaf;
+ next = leaf;
if (next != rt0)
fn->rr_ptr = next;
}
- net = dev_net(rt0->dst.dev);
return match ? match : net->ipv6.ip6_null_entry;
}
@@ -1623,7 +1627,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
oif = 0;
redo_rt6_select:
- rt = rt6_select(fn, oif, strict);
+ rt = rt6_select(net, fn, oif, strict);
if (rt->rt6i_nsiblings)
rt = rt6_multipath_select(rt, fl6, oif, strict);
if (rt == net->ipv6.ip6_null_entry) {
--
2.14.2.920.gcf0c67979c-goog
^ permalink raw reply related
* [PATCH net-next 15/16] ipv6: replace rwlock with rcu and spinlock in fib6_table
From: Wei Wang @ 2017-10-06 19:06 UTC (permalink / raw)
To: David Miller, netdev; +Cc: Eric Dumazet, Martin KaFai Lau, Wei Wang
In-Reply-To: <20171006190611.110633-15-tracywwnj@gmail.com>
From: Wei Wang <weiwan@google.com>
With all the preparation work before, we are now ready to replace rwlock
with rcu and spinlock in fib6_table.
That means now all fib6_node in fib6_table are protected by rcu. And
when freeing fib6_node, call_rcu() is used to wait for the rcu grace
period before releasing the memory.
When accessing fib6_node, corresponding rcu APIs need to be used.
And all previous sessions protected by the write lock will now be
protected by the spin lock per table.
All previous sessions protected by read lock will now be protected by
rcu_read_lock().
A couple of things to note here:
1. As part of the work of replacing rwlock with rcu, the linked list of
fn->leaf now has to be rcu protected as well. So both fn->leaf and
rt->dst.rt6_next are now __rcu tagged and corresponding rcu APIs are
used when manipulating them.
2. For fn->rr_ptr, first of all, it also needs to be rcu protected now
and is tagged with __rcu and rcu APIs are used in corresponding places.
Secondly, fn->rr_ptr is changed in rt6_select() which is a reader
thread. This makes the issue a bit complicated. We think a valid
solution for it is to let rt6_select() grab the tb6_lock if it decides
to change it. As it is not in the normal operation and only happens when
there is no valid neighbor cache for the route, we think the performance
impact should be low.
3. fib6_walk_continue() has to be called with tb6_lock held even in the
route dumping related functions, e.g. inet6_dump_fib(),
fib6_tables_dump() and ipv6_route_seq_ops. It is because
fib6_walk_continue() makes modifications to the walker structure, and so
are fib6_repair_tree() and fib6_del_route(). In order to do proper
syncing between them, we need to let fib6_walk_continue() hold the lock.
We may be able to do further improvement on the way we do the tree walk
to get rid of the need for holding the spin lock. But not for now.
4. When fib6_del_route() removes a route from the tree, we no longer
mark rt->dst.rt6_next to NULL to make simultaneous reader be able to
further traverse the list with rcu. However, rt->dst.rt6_next is only
valid within this same rcu period. No one should access it later.
5. All the operation of atomic_inc(rt->rt6i_ref) is changed to be
performed before we publish this route (either by linking it to fn->leaf
or insert it in the list pointed by fn->leaf) just to be safe because as
soon as we publish the route, some read thread will be able to access it.
Signed-off-by: Wei Wang <weiwan@google.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
include/net/dst.h | 2 +-
include/net/ip6_fib.h | 24 ++-
net/ipv6/addrconf.c | 11 +-
net/ipv6/ip6_fib.c | 405 ++++++++++++++++++++++++++++++--------------------
net/ipv6/route.c | 121 ++++++++-------
5 files changed, 333 insertions(+), 230 deletions(-)
diff --git a/include/net/dst.h b/include/net/dst.h
index 06a6765da074..204c19e25456 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -101,7 +101,7 @@ struct dst_entry {
union {
struct dst_entry *next;
struct rtable __rcu *rt_next;
- struct rt6_info *rt6_next;
+ struct rt6_info __rcu *rt6_next;
struct dn_route __rcu *dn_next;
};
};
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 6bf929b50951..0b438b9bcb10 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -68,18 +68,18 @@ struct fib6_config {
};
struct fib6_node {
- struct fib6_node *parent;
- struct fib6_node *left;
- struct fib6_node *right;
+ struct fib6_node __rcu *parent;
+ struct fib6_node __rcu *left;
+ struct fib6_node __rcu *right;
#ifdef CONFIG_IPV6_SUBTREES
- struct fib6_node *subtree;
+ struct fib6_node __rcu *subtree;
#endif
- struct rt6_info *leaf;
+ struct rt6_info __rcu *leaf;
__u16 fn_bit; /* bit key */
__u16 fn_flags;
int fn_sernum;
- struct rt6_info *rr_ptr;
+ struct rt6_info __rcu *rr_ptr;
struct rcu_head rcu;
};
@@ -91,7 +91,7 @@ struct fib6_gc_args {
#ifndef CONFIG_IPV6_SUBTREES
#define FIB6_SUBTREE(fn) NULL
#else
-#define FIB6_SUBTREE(fn) ((fn)->subtree)
+#define FIB6_SUBTREE(fn) (rcu_dereference_protected((fn)->subtree, 1))
#endif
struct mx6_config {
@@ -174,6 +174,14 @@ struct rt6_info {
unused:7;
};
+#define for_each_fib6_node_rt_rcu(fn) \
+ for (rt = rcu_dereference((fn)->leaf); rt; \
+ rt = rcu_dereference(rt->dst.rt6_next))
+
+#define for_each_fib6_walker_rt(w) \
+ for (rt = (w)->leaf; rt; \
+ rt = rcu_dereference_protected(rt->dst.rt6_next, 1))
+
static inline struct inet6_dev *ip6_dst_idev(struct dst_entry *dst)
{
return ((struct rt6_info *)dst)->rt6i_idev;
@@ -310,7 +318,7 @@ struct rt6_statistics {
struct fib6_table {
struct hlist_node tb6_hlist;
u32 tb6_id;
- rwlock_t tb6_lock;
+ spinlock_t tb6_lock;
struct fib6_node tb6_root;
struct inet_peer_base tb6_peers;
unsigned int flags;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index f86e931d555e..9854d93e45bb 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2321,12 +2321,12 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
if (!table)
return NULL;
- read_lock_bh(&table->tb6_lock);
+ rcu_read_lock();
fn = fib6_locate(&table->tb6_root, pfx, plen, NULL, 0, true);
if (!fn)
goto out;
- for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
+ for_each_fib6_node_rt_rcu(fn) {
if (rt->dst.dev->ifindex != dev->ifindex)
continue;
if ((rt->rt6i_flags & flags) != flags)
@@ -2338,7 +2338,7 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
break;
}
out:
- read_unlock_bh(&table->tb6_lock);
+ rcu_read_unlock();
return rt;
}
@@ -5898,10 +5898,9 @@ void addrconf_disable_policy_idev(struct inet6_dev *idev, int val)
spin_lock(&ifa->lock);
if (ifa->rt) {
struct rt6_info *rt = ifa->rt;
- struct fib6_table *table = rt->rt6i_table;
int cpu;
- read_lock(&table->tb6_lock);
+ rcu_read_lock();
addrconf_set_nopolicy(ifa->rt, val);
if (rt->rt6i_pcpu) {
for_each_possible_cpu(cpu) {
@@ -5911,7 +5910,7 @@ void addrconf_disable_policy_idev(struct inet6_dev *idev, int val)
addrconf_set_nopolicy(*rtp, val);
}
}
- read_unlock(&table->tb6_lock);
+ rcu_read_unlock();
}
spin_unlock(&ifa->lock);
}
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index cf6137e81408..3f95908b39c3 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -54,8 +54,12 @@ struct fib6_cleaner {
#define FWS_INIT FWS_L
#endif
-static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn);
-static struct fib6_node *fib6_repair_tree(struct net *net, struct fib6_node *fn);
+static struct rt6_info *fib6_find_prefix(struct net *net,
+ struct fib6_table *table,
+ struct fib6_node *fn);
+static struct fib6_node *fib6_repair_tree(struct net *net,
+ struct fib6_table *table,
+ struct fib6_node *fn);
static int fib6_walk(struct net *net, struct fib6_walker *w);
static int fib6_walk_continue(struct fib6_walker *w);
@@ -107,12 +111,12 @@ void fib6_update_sernum(struct rt6_info *rt)
struct net *net = dev_net(rt->dst.dev);
struct fib6_node *fn;
- write_lock_bh(&table->tb6_lock);
+ spin_lock_bh(&table->tb6_lock);
fn = rcu_dereference_protected(rt->rt6i_node,
lockdep_is_held(&table->tb6_lock));
if (fn)
fn->fn_sernum = fib6_new_sernum(net);
- write_unlock_bh(&table->tb6_lock);
+ spin_unlock_bh(&table->tb6_lock);
}
/*
@@ -207,8 +211,7 @@ static void fib6_link_table(struct net *net, struct fib6_table *tb)
* Initialize table lock at a single place to give lockdep a key,
* tables aren't visible prior to being linked to the list.
*/
- rwlock_init(&tb->tb6_lock);
-
+ spin_lock_init(&tb->tb6_lock);
h = tb->tb6_id & (FIB6_TABLE_HASHSZ - 1);
/*
@@ -227,7 +230,8 @@ static struct fib6_table *fib6_alloc_table(struct net *net, u32 id)
table = kzalloc(sizeof(*table), GFP_ATOMIC);
if (table) {
table->tb6_id = id;
- table->tb6_root.leaf = net->ipv6.ip6_null_entry;
+ rcu_assign_pointer(table->tb6_root.leaf,
+ net->ipv6.ip6_null_entry);
table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
inet_peer_base_init(&table->tb6_peers);
}
@@ -324,11 +328,8 @@ unsigned int fib6_tables_seq_read(struct net *net)
struct hlist_head *head = &net->ipv6.fib_table_hash[h];
struct fib6_table *tb;
- hlist_for_each_entry_rcu(tb, head, tb6_hlist) {
- read_lock_bh(&tb->tb6_lock);
+ hlist_for_each_entry_rcu(tb, head, tb6_hlist)
fib_seq += tb->fib_seq;
- read_unlock_bh(&tb->tb6_lock);
- }
}
rcu_read_unlock();
@@ -374,7 +375,7 @@ static int fib6_node_dump(struct fib6_walker *w)
{
struct rt6_info *rt;
- for (rt = w->leaf; rt; rt = rt->dst.rt6_next)
+ for_each_fib6_walker_rt(w)
fib6_rt_dump(rt, w->args);
w->leaf = NULL;
return 0;
@@ -384,9 +385,9 @@ static void fib6_table_dump(struct net *net, struct fib6_table *tb,
struct fib6_walker *w)
{
w->root = &tb->tb6_root;
- read_lock_bh(&tb->tb6_lock);
+ spin_lock_bh(&tb->tb6_lock);
fib6_walk(net, w);
- read_unlock_bh(&tb->tb6_lock);
+ spin_unlock_bh(&tb->tb6_lock);
}
/* Called with rcu_read_lock() */
@@ -423,7 +424,7 @@ static int fib6_dump_node(struct fib6_walker *w)
int res;
struct rt6_info *rt;
- for (rt = w->leaf; rt; rt = rt->dst.rt6_next) {
+ for_each_fib6_walker_rt(w) {
res = rt6_dump_route(rt, w->args);
if (res < 0) {
/* Frame is full, suspend walking */
@@ -482,9 +483,9 @@ static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb,
w->count = 0;
w->skip = 0;
- read_lock_bh(&table->tb6_lock);
+ spin_lock_bh(&table->tb6_lock);
res = fib6_walk(net, w);
- read_unlock_bh(&table->tb6_lock);
+ spin_unlock_bh(&table->tb6_lock);
if (res > 0) {
cb->args[4] = 1;
cb->args[5] = w->root->fn_sernum;
@@ -499,9 +500,9 @@ static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb,
} else
w->skip = 0;
- read_lock_bh(&table->tb6_lock);
+ spin_lock_bh(&table->tb6_lock);
res = fib6_walk_continue(w);
- read_unlock_bh(&table->tb6_lock);
+ spin_unlock_bh(&table->tb6_lock);
if (res <= 0) {
fib6_walker_unlink(net, w);
cb->args[4] = 0;
@@ -582,11 +583,12 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
* node.
*/
-static struct fib6_node *fib6_add_1(struct fib6_node *root,
- struct in6_addr *addr, int plen,
- int offset, int allow_create,
- int replace_required,
- struct netlink_ext_ack *extack)
+static struct fib6_node *fib6_add_1(struct fib6_table *table,
+ struct fib6_node *root,
+ struct in6_addr *addr, int plen,
+ int offset, int allow_create,
+ int replace_required,
+ struct netlink_ext_ack *extack)
{
struct fib6_node *fn, *in, *ln;
struct fib6_node *pn = NULL;
@@ -601,7 +603,9 @@ static struct fib6_node *fib6_add_1(struct fib6_node *root,
fn = root;
do {
- key = (struct rt6key *)((u8 *)fn->leaf + offset);
+ struct rt6_info *leaf = rcu_dereference_protected(fn->leaf,
+ lockdep_is_held(&table->tb6_lock));
+ key = (struct rt6key *)((u8 *)leaf + offset);
/*
* Prefix match
@@ -627,8 +631,8 @@ static struct fib6_node *fib6_add_1(struct fib6_node *root,
if (plen == fn->fn_bit) {
/* clean up an intermediate node */
if (!(fn->fn_flags & RTN_RTINFO)) {
- rt6_release(fn->leaf);
- fn->leaf = NULL;
+ RCU_INIT_POINTER(fn->leaf, NULL);
+ rt6_release(leaf);
}
return fn;
@@ -641,7 +645,11 @@ static struct fib6_node *fib6_add_1(struct fib6_node *root,
/* Try to walk down on tree. */
dir = addr_bit_set(addr, fn->fn_bit);
pn = fn;
- fn = dir ? fn->right : fn->left;
+ fn = dir ?
+ rcu_dereference_protected(fn->right,
+ lockdep_is_held(&table->tb6_lock)) :
+ rcu_dereference_protected(fn->left,
+ lockdep_is_held(&table->tb6_lock));
} while (fn);
if (!allow_create) {
@@ -672,13 +680,12 @@ static struct fib6_node *fib6_add_1(struct fib6_node *root,
if (!ln)
return ERR_PTR(-ENOMEM);
ln->fn_bit = plen;
-
- ln->parent = pn;
+ RCU_INIT_POINTER(ln->parent, pn);
if (dir)
- pn->right = ln;
+ rcu_assign_pointer(pn->right, ln);
else
- pn->left = ln;
+ rcu_assign_pointer(pn->left, ln);
return ln;
@@ -692,7 +699,8 @@ static struct fib6_node *fib6_add_1(struct fib6_node *root,
* and the current
*/
- pn = fn->parent;
+ pn = rcu_dereference_protected(fn->parent,
+ lockdep_is_held(&table->tb6_lock));
/* find 1st bit in difference between the 2 addrs.
@@ -729,27 +737,28 @@ static struct fib6_node *fib6_add_1(struct fib6_node *root,
in->fn_bit = bit;
- in->parent = pn;
+ RCU_INIT_POINTER(in->parent, pn);
in->leaf = fn->leaf;
- atomic_inc(&in->leaf->rt6i_ref);
+ atomic_inc(&rcu_dereference_protected(in->leaf,
+ lockdep_is_held(&table->tb6_lock))->rt6i_ref);
/* update parent pointer */
if (dir)
- pn->right = in;
+ rcu_assign_pointer(pn->right, in);
else
- pn->left = in;
+ rcu_assign_pointer(pn->left, in);
ln->fn_bit = plen;
- ln->parent = in;
- fn->parent = in;
+ RCU_INIT_POINTER(ln->parent, in);
+ rcu_assign_pointer(fn->parent, in);
if (addr_bit_set(addr, bit)) {
- in->right = ln;
- in->left = fn;
+ rcu_assign_pointer(in->right, ln);
+ rcu_assign_pointer(in->left, fn);
} else {
- in->left = ln;
- in->right = fn;
+ rcu_assign_pointer(in->left, ln);
+ rcu_assign_pointer(in->right, fn);
}
} else { /* plen <= bit */
@@ -766,19 +775,19 @@ static struct fib6_node *fib6_add_1(struct fib6_node *root,
ln->fn_bit = plen;
- ln->parent = pn;
-
- if (dir)
- pn->right = ln;
- else
- pn->left = ln;
+ RCU_INIT_POINTER(ln->parent, pn);
if (addr_bit_set(&key->addr, plen))
- ln->right = fn;
+ RCU_INIT_POINTER(ln->right, fn);
else
- ln->left = fn;
+ RCU_INIT_POINTER(ln->left, fn);
+
+ rcu_assign_pointer(fn->parent, ln);
- fn->parent = ln;
+ if (dir)
+ rcu_assign_pointer(pn->right, ln);
+ else
+ rcu_assign_pointer(pn->left, ln);
}
return ln;
}
@@ -824,6 +833,8 @@ static int fib6_commit_metrics(struct dst_entry *dst, struct mx6_config *mxc)
static void fib6_purge_rt(struct rt6_info *rt, struct fib6_node *fn,
struct net *net)
{
+ struct fib6_table *table = rt->rt6i_table;
+
if (atomic_read(&rt->rt6i_ref) != 1) {
/* This route is used as dummy address holder in some split
* nodes. It is not leaked, but it still holds other resources,
@@ -832,12 +843,17 @@ static void fib6_purge_rt(struct rt6_info *rt, struct fib6_node *fn,
* to still alive ones.
*/
while (fn) {
- if (!(fn->fn_flags & RTN_RTINFO) && fn->leaf == rt) {
- fn->leaf = fib6_find_prefix(net, fn);
- atomic_inc(&fn->leaf->rt6i_ref);
+ struct rt6_info *leaf = rcu_dereference_protected(fn->leaf,
+ lockdep_is_held(&table->tb6_lock));
+ struct rt6_info *new_leaf;
+ if (!(fn->fn_flags & RTN_RTINFO) && leaf == rt) {
+ new_leaf = fib6_find_prefix(net, table, fn);
+ atomic_inc(&new_leaf->rt6i_ref);
+ rcu_assign_pointer(fn->leaf, new_leaf);
rt6_release(rt);
}
- fn = fn->parent;
+ fn = rcu_dereference_protected(fn->parent,
+ lockdep_is_held(&table->tb6_lock));
}
}
}
@@ -849,9 +865,11 @@ static void fib6_purge_rt(struct rt6_info *rt, struct fib6_node *fn,
static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
struct nl_info *info, struct mx6_config *mxc)
{
+ struct rt6_info *leaf = rcu_dereference_protected(fn->leaf,
+ lockdep_is_held(&rt->rt6i_table->tb6_lock));
struct rt6_info *iter = NULL;
- struct rt6_info **ins;
- struct rt6_info **fallback_ins = NULL;
+ struct rt6_info __rcu **ins;
+ struct rt6_info __rcu **fallback_ins = NULL;
int replace = (info->nlh &&
(info->nlh->nlmsg_flags & NLM_F_REPLACE));
int add = (!info->nlh ||
@@ -866,7 +884,9 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
ins = &fn->leaf;
- for (iter = fn->leaf; iter; iter = iter->dst.rt6_next) {
+ for (iter = leaf; iter;
+ iter = rcu_dereference_protected(iter->dst.rt6_next,
+ lockdep_is_held(&rt->rt6i_table->tb6_lock))) {
/*
* Search for duplicates
*/
@@ -928,7 +948,8 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
if (fallback_ins && !found) {
/* No ECMP-able route found, replace first non-ECMP one */
ins = fallback_ins;
- iter = *ins;
+ iter = rcu_dereference_protected(*ins,
+ lockdep_is_held(&rt->rt6i_table->tb6_lock));
found++;
}
@@ -942,7 +963,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
struct rt6_info *sibling, *temp_sibling;
/* Find the first route that have the same metric */
- sibling = fn->leaf;
+ sibling = leaf;
while (sibling) {
if (sibling->rt6i_metric == rt->rt6i_metric &&
rt6_qualify_for_ecmp(sibling)) {
@@ -950,7 +971,8 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
&sibling->rt6i_siblings);
break;
}
- sibling = sibling->dst.rt6_next;
+ sibling = rcu_dereference_protected(sibling->dst.rt6_next,
+ lockdep_is_held(&rt->rt6i_table->tb6_lock));
}
/* For each sibling in the list, increment the counter of
* siblings. BUG() if counters does not match, list of siblings
@@ -979,10 +1001,10 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
if (err)
return err;
- rt->dst.rt6_next = iter;
- *ins = rt;
- rcu_assign_pointer(rt->rt6i_node, fn);
+ rcu_assign_pointer(rt->dst.rt6_next, iter);
atomic_inc(&rt->rt6i_ref);
+ rcu_assign_pointer(rt->rt6i_node, fn);
+ rcu_assign_pointer(*ins, rt);
call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_ADD,
rt);
if (!info->skip_notify)
@@ -1008,10 +1030,10 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
if (err)
return err;
- *ins = rt;
+ atomic_inc(&rt->rt6i_ref);
rcu_assign_pointer(rt->rt6i_node, fn);
rt->dst.rt6_next = iter->dst.rt6_next;
- atomic_inc(&rt->rt6i_ref);
+ rcu_assign_pointer(*ins, rt);
call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_REPLACE,
rt);
if (!info->skip_notify)
@@ -1023,14 +1045,15 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
nsiblings = iter->rt6i_nsiblings;
iter->rt6i_node = NULL;
fib6_purge_rt(iter, fn, info->nl_net);
- if (fn->rr_ptr == iter)
+ if (rcu_access_pointer(fn->rr_ptr) == iter)
fn->rr_ptr = NULL;
rt6_release(iter);
if (nsiblings) {
/* Replacing an ECMP route, remove all siblings */
ins = &rt->dst.rt6_next;
- iter = *ins;
+ iter = rcu_dereference_protected(*ins,
+ lockdep_is_held(&rt->rt6i_table->tb6_lock));
while (iter) {
if (iter->rt6i_metric > rt->rt6i_metric)
break;
@@ -1038,14 +1061,15 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
*ins = iter->dst.rt6_next;
iter->rt6i_node = NULL;
fib6_purge_rt(iter, fn, info->nl_net);
- if (fn->rr_ptr == iter)
+ if (rcu_access_pointer(fn->rr_ptr) == iter)
fn->rr_ptr = NULL;
rt6_release(iter);
nsiblings--;
} else {
ins = &iter->dst.rt6_next;
}
- iter = *ins;
+ iter = rcu_dereference_protected(*ins,
+ lockdep_is_held(&rt->rt6i_table->tb6_lock));
}
WARN_ON(nsiblings != 0);
}
@@ -1079,7 +1103,8 @@ static void fib6_update_sernum_upto_root(struct rt6_info *rt,
smp_wmb();
while (fn) {
fn->fn_sernum = sernum;
- fn = fn->parent;
+ fn = rcu_dereference_protected(fn->parent,
+ lockdep_is_held(&rt->rt6i_table->tb6_lock));
}
}
@@ -1087,12 +1112,14 @@ static void fib6_update_sernum_upto_root(struct rt6_info *rt,
* Add routing information to the routing tree.
* <destination addr>/<source addr>
* with source addr info in sub-trees
+ * Need to own table->tb6_lock
*/
int fib6_add(struct fib6_node *root, struct rt6_info *rt,
struct nl_info *info, struct mx6_config *mxc,
struct netlink_ext_ack *extack)
{
+ struct fib6_table *table = rt->rt6i_table;
struct fib6_node *fn, *pn = NULL;
int err = -ENOMEM;
int allow_create = 1;
@@ -1113,7 +1140,8 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
if (!allow_create && !replace_required)
pr_warn("RTM_NEWROUTE with no NLM_F_CREATE or NLM_F_REPLACE\n");
- fn = fib6_add_1(root, &rt->rt6i_dst.addr, rt->rt6i_dst.plen,
+ fn = fib6_add_1(table, root,
+ &rt->rt6i_dst.addr, rt->rt6i_dst.plen,
offsetof(struct rt6_info, rt6i_dst), allow_create,
replace_required, extack);
if (IS_ERR(fn)) {
@@ -1128,7 +1156,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
if (rt->rt6i_src.plen) {
struct fib6_node *sn;
- if (!fn->subtree) {
+ if (!rcu_access_pointer(fn->subtree)) {
struct fib6_node *sfn;
/*
@@ -1146,13 +1174,14 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
if (!sfn)
goto failure;
- sfn->leaf = info->nl_net->ipv6.ip6_null_entry;
atomic_inc(&info->nl_net->ipv6.ip6_null_entry->rt6i_ref);
+ rcu_assign_pointer(sfn->leaf,
+ info->nl_net->ipv6.ip6_null_entry);
sfn->fn_flags = RTN_ROOT;
/* Now add the first leaf node to new subtree */
- sn = fib6_add_1(sfn, &rt->rt6i_src.addr,
+ sn = fib6_add_1(table, sfn, &rt->rt6i_src.addr,
rt->rt6i_src.plen,
offsetof(struct rt6_info, rt6i_src),
allow_create, replace_required, extack);
@@ -1168,10 +1197,10 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
}
/* Now link new subtree to main tree */
- sfn->parent = fn;
- fn->subtree = sfn;
+ rcu_assign_pointer(sfn->parent, fn);
+ rcu_assign_pointer(fn->subtree, sfn);
} else {
- sn = fib6_add_1(fn->subtree, &rt->rt6i_src.addr,
+ sn = fib6_add_1(table, FIB6_SUBTREE(fn), &rt->rt6i_src.addr,
rt->rt6i_src.plen,
offsetof(struct rt6_info, rt6i_src),
allow_create, replace_required, extack);
@@ -1182,9 +1211,9 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
}
}
- if (!fn->leaf) {
- fn->leaf = rt;
+ if (!rcu_access_pointer(fn->leaf)) {
atomic_inc(&rt->rt6i_ref);
+ rcu_assign_pointer(fn->leaf, rt);
}
fn = sn;
}
@@ -1203,19 +1232,23 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
* If fib6_add_1 has cleared the old leaf pointer in the
* super-tree leaf node we have to find a new one for it.
*/
- if (pn != fn && pn->leaf == rt) {
- pn->leaf = NULL;
+ struct rt6_info *pn_leaf = rcu_dereference_protected(pn->leaf,
+ lockdep_is_held(&table->tb6_lock));
+ if (pn != fn && pn_leaf == rt) {
+ pn_leaf = NULL;
+ RCU_INIT_POINTER(pn->leaf, NULL);
atomic_dec(&rt->rt6i_ref);
}
- if (pn != fn && !pn->leaf && !(pn->fn_flags & RTN_RTINFO)) {
- pn->leaf = fib6_find_prefix(info->nl_net, pn);
+ if (pn != fn && !pn_leaf && !(pn->fn_flags & RTN_RTINFO)) {
+ pn_leaf = fib6_find_prefix(info->nl_net, table, pn);
#if RT6_DEBUG >= 2
- if (!pn->leaf) {
- WARN_ON(pn->leaf == NULL);
- pn->leaf = info->nl_net->ipv6.ip6_null_entry;
+ if (!pn_leaf) {
+ WARN_ON(!pn_leaf);
+ pn_leaf = info->nl_net->ipv6.ip6_null_entry;
}
#endif
- atomic_inc(&pn->leaf->rt6i_ref);
+ atomic_inc(&pn_leaf->rt6i_ref);
+ rcu_assign_pointer(pn->leaf, pn_leaf);
}
#endif
goto failure;
@@ -1230,7 +1263,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
* fn->leaf.
*/
if (fn && !(fn->fn_flags & (RTN_RTINFO|RTN_ROOT)))
- fib6_repair_tree(info->nl_net, fn);
+ fib6_repair_tree(info->nl_net, table, fn);
/* Always release dst as dst->__refcnt is guaranteed
* to be taken before entering this function
*/
@@ -1268,7 +1301,8 @@ static struct fib6_node *fib6_lookup_1(struct fib6_node *root,
dir = addr_bit_set(args->addr, fn->fn_bit);
- next = dir ? fn->right : fn->left;
+ next = dir ? rcu_dereference(fn->right) :
+ rcu_dereference(fn->left);
if (next) {
fn = next;
@@ -1278,8 +1312,10 @@ static struct fib6_node *fib6_lookup_1(struct fib6_node *root,
}
while (fn) {
- if (FIB6_SUBTREE(fn) || fn->fn_flags & RTN_RTINFO) {
- struct rt6_info *leaf = fn->leaf;
+ struct fib6_node *subtree = FIB6_SUBTREE(fn);
+
+ if (subtree || fn->fn_flags & RTN_RTINFO) {
+ struct rt6_info *leaf = rcu_dereference(fn->leaf);
struct rt6key *key;
if (!leaf)
@@ -1289,10 +1325,9 @@ static struct fib6_node *fib6_lookup_1(struct fib6_node *root,
if (ipv6_prefix_equal(&key->addr, args->addr, key->plen)) {
#ifdef CONFIG_IPV6_SUBTREES
- if (fn->subtree) {
+ if (subtree) {
struct fib6_node *sfn;
- sfn = fib6_lookup_1(fn->subtree,
- args + 1);
+ sfn = fib6_lookup_1(subtree, args + 1);
if (!sfn)
goto backtrack;
fn = sfn;
@@ -1306,12 +1341,14 @@ static struct fib6_node *fib6_lookup_1(struct fib6_node *root,
if (fn->fn_flags & RTN_ROOT)
break;
- fn = fn->parent;
+ fn = rcu_dereference(fn->parent);
}
return NULL;
}
+/* called with rcu_read_lock() held
+ */
struct fib6_node *fib6_lookup(struct fib6_node *root, const struct in6_addr *daddr,
const struct in6_addr *saddr)
{
@@ -1359,7 +1396,7 @@ static struct fib6_node *fib6_locate_1(struct fib6_node *root,
struct fib6_node *fn, *prev = NULL;
for (fn = root; fn ; ) {
- struct rt6_info *leaf = fn->leaf;
+ struct rt6_info *leaf = rcu_dereference(fn->leaf);
struct rt6key *key;
/* This node is being deleted */
@@ -1389,9 +1426,9 @@ static struct fib6_node *fib6_locate_1(struct fib6_node *root,
* We have more bits to go
*/
if (addr_bit_set(addr, fn->fn_bit))
- fn = fn->right;
+ fn = rcu_dereference(fn->right);
else
- fn = fn->left;
+ fn = rcu_dereference(fn->left);
}
out:
if (exact_match)
@@ -1413,9 +1450,11 @@ struct fib6_node *fib6_locate(struct fib6_node *root,
#ifdef CONFIG_IPV6_SUBTREES
if (src_len) {
+ struct fib6_node *subtree = FIB6_SUBTREE(fn);
+
WARN_ON(saddr == NULL);
- if (fn && fn->subtree)
- fn = fib6_locate_1(fn->subtree, saddr, src_len,
+ if (fn && subtree)
+ fn = fib6_locate_1(subtree, saddr, src_len,
offsetof(struct rt6_info, rt6i_src),
exact_match);
}
@@ -1433,16 +1472,26 @@ struct fib6_node *fib6_locate(struct fib6_node *root,
*
*/
-static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn)
+static struct rt6_info *fib6_find_prefix(struct net *net,
+ struct fib6_table *table,
+ struct fib6_node *fn)
{
+ struct fib6_node *child_left, *child_right;
+
if (fn->fn_flags & RTN_ROOT)
return net->ipv6.ip6_null_entry;
while (fn) {
- if (fn->left)
- return fn->left->leaf;
- if (fn->right)
- return fn->right->leaf;
+ child_left = rcu_dereference_protected(fn->left,
+ lockdep_is_held(&table->tb6_lock));
+ child_right = rcu_dereference_protected(fn->right,
+ lockdep_is_held(&table->tb6_lock));
+ if (child_left)
+ return rcu_dereference_protected(child_left->leaf,
+ lockdep_is_held(&table->tb6_lock));
+ if (child_right)
+ return rcu_dereference_protected(child_right->leaf,
+ lockdep_is_held(&table->tb6_lock));
fn = FIB6_SUBTREE(fn);
}
@@ -1452,31 +1501,49 @@ static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn)
/*
* Called to trim the tree of intermediate nodes when possible. "fn"
* is the node we want to try and remove.
+ * Need to own table->tb6_lock
*/
static struct fib6_node *fib6_repair_tree(struct net *net,
- struct fib6_node *fn)
+ struct fib6_table *table,
+ struct fib6_node *fn)
{
int children;
int nstate;
- struct fib6_node *child, *pn;
+ struct fib6_node *child;
struct fib6_walker *w;
int iter = 0;
for (;;) {
+ struct fib6_node *fn_r = rcu_dereference_protected(fn->right,
+ lockdep_is_held(&table->tb6_lock));
+ struct fib6_node *fn_l = rcu_dereference_protected(fn->left,
+ lockdep_is_held(&table->tb6_lock));
+ struct fib6_node *pn = rcu_dereference_protected(fn->parent,
+ lockdep_is_held(&table->tb6_lock));
+ struct fib6_node *pn_r = rcu_dereference_protected(pn->right,
+ lockdep_is_held(&table->tb6_lock));
+ struct fib6_node *pn_l = rcu_dereference_protected(pn->left,
+ lockdep_is_held(&table->tb6_lock));
+ struct rt6_info *fn_leaf = rcu_dereference_protected(fn->leaf,
+ lockdep_is_held(&table->tb6_lock));
+ struct rt6_info *pn_leaf = rcu_dereference_protected(pn->leaf,
+ lockdep_is_held(&table->tb6_lock));
+ struct rt6_info *new_fn_leaf;
+
RT6_TRACE("fixing tree: plen=%d iter=%d\n", fn->fn_bit, iter);
iter++;
WARN_ON(fn->fn_flags & RTN_RTINFO);
WARN_ON(fn->fn_flags & RTN_TL_ROOT);
- WARN_ON(fn->leaf);
+ WARN_ON(fn_leaf);
children = 0;
child = NULL;
- if (fn->right)
- child = fn->right, children |= 1;
- if (fn->left)
- child = fn->left, children |= 2;
+ if (fn_r)
+ child = fn_r, children |= 1;
+ if (fn_l)
+ child = fn_l, children |= 2;
if (children == 3 || FIB6_SUBTREE(fn)
#ifdef CONFIG_IPV6_SUBTREES
@@ -1484,36 +1551,36 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
|| (children && fn->fn_flags & RTN_ROOT)
#endif
) {
- fn->leaf = fib6_find_prefix(net, fn);
+ new_fn_leaf = fib6_find_prefix(net, table, fn);
#if RT6_DEBUG >= 2
- if (!fn->leaf) {
- WARN_ON(!fn->leaf);
- fn->leaf = net->ipv6.ip6_null_entry;
+ if (!new_fn_leaf) {
+ WARN_ON(!new_fn_leaf);
+ new_fn_leaf = net->ipv6.ip6_null_entry;
}
#endif
- atomic_inc(&fn->leaf->rt6i_ref);
- return fn->parent;
+ atomic_inc(&new_fn_leaf->rt6i_ref);
+ rcu_assign_pointer(fn->leaf, new_fn_leaf);
+ return pn;
}
- pn = fn->parent;
#ifdef CONFIG_IPV6_SUBTREES
if (FIB6_SUBTREE(pn) == fn) {
WARN_ON(!(fn->fn_flags & RTN_ROOT));
- FIB6_SUBTREE(pn) = NULL;
+ RCU_INIT_POINTER(pn->subtree, NULL);
nstate = FWS_L;
} else {
WARN_ON(fn->fn_flags & RTN_ROOT);
#endif
- if (pn->right == fn)
- pn->right = child;
- else if (pn->left == fn)
- pn->left = child;
+ if (pn_r == fn)
+ rcu_assign_pointer(pn->right, child);
+ else if (pn_l == fn)
+ rcu_assign_pointer(pn->left, child);
#if RT6_DEBUG >= 2
else
WARN_ON(1);
#endif
if (child)
- child->parent = pn;
+ rcu_assign_pointer(child->parent, pn);
nstate = FWS_R;
#ifdef CONFIG_IPV6_SUBTREES
}
@@ -1546,17 +1613,18 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
if (pn->fn_flags & RTN_RTINFO || FIB6_SUBTREE(pn))
return pn;
- rt6_release(pn->leaf);
- pn->leaf = NULL;
+ RCU_INIT_POINTER(pn->leaf, NULL);
+ rt6_release(pn_leaf);
fn = pn;
}
}
-static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
- struct nl_info *info)
+static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
+ struct rt6_info __rcu **rtp, struct nl_info *info)
{
struct fib6_walker *w;
- struct rt6_info *rt = *rtp;
+ struct rt6_info *rt = rcu_dereference_protected(*rtp,
+ lockdep_is_held(&table->tb6_lock));
struct net *net = info->nl_net;
RT6_TRACE("fib6_del_route\n");
@@ -1573,7 +1641,7 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
rt6_flush_exceptions(rt);
/* Reset round-robin state, if necessary */
- if (fn->rr_ptr == rt)
+ if (rcu_access_pointer(fn->rr_ptr) == rt)
fn->rr_ptr = NULL;
/* Remove this entry from other siblings */
@@ -1592,20 +1660,19 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
FOR_WALKERS(net, w) {
if (w->state == FWS_C && w->leaf == rt) {
RT6_TRACE("walker %p adjusted by delroute\n", w);
- w->leaf = rt->dst.rt6_next;
+ w->leaf = rcu_dereference_protected(rt->dst.rt6_next,
+ lockdep_is_held(&table->tb6_lock));
if (!w->leaf)
w->state = FWS_U;
}
}
read_unlock(&net->ipv6.fib6_walker_lock);
- rt->dst.rt6_next = NULL;
-
/* If it was last route, expunge its radix tree node */
- if (!fn->leaf) {
+ if (!rcu_access_pointer(fn->leaf)) {
fn->fn_flags &= ~RTN_RTINFO;
net->ipv6.rt6_stats->fib_route_nodes--;
- fn = fib6_repair_tree(net, fn);
+ fn = fib6_repair_tree(net, table, fn);
}
fib6_purge_rt(rt, fn, net);
@@ -1616,12 +1683,15 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
rt6_release(rt);
}
+/* Need to own table->tb6_lock */
int fib6_del(struct rt6_info *rt, struct nl_info *info)
{
struct fib6_node *fn = rcu_dereference_protected(rt->rt6i_node,
lockdep_is_held(&rt->rt6i_table->tb6_lock));
+ struct fib6_table *table = rt->rt6i_table;
struct net *net = info->nl_net;
- struct rt6_info **rtp;
+ struct rt6_info __rcu **rtp;
+ struct rt6_info __rcu **rtp_next;
#if RT6_DEBUG >= 2
if (rt->dst.obsolete > 0) {
@@ -1642,11 +1712,14 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info)
* Walk the leaf entries looking for ourself
*/
- for (rtp = &fn->leaf; *rtp; rtp = &(*rtp)->dst.rt6_next) {
- if (*rtp == rt) {
- fib6_del_route(fn, rtp, info);
+ for (rtp = &fn->leaf; *rtp; rtp = rtp_next) {
+ struct rt6_info *cur = rcu_dereference_protected(*rtp,
+ lockdep_is_held(&table->tb6_lock));
+ if (rt == cur) {
+ fib6_del_route(table, fn, rtp, info);
return 0;
}
+ rtp_next = &cur->dst.rt6_next;
}
return -ENOENT;
}
@@ -1673,11 +1746,13 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info)
* 0 -> walk is complete.
* >0 -> walk is incomplete (i.e. suspended)
* <0 -> walk is terminated by an error.
+ *
+ * This function is called with tb6_lock held.
*/
static int fib6_walk_continue(struct fib6_walker *w)
{
- struct fib6_node *fn, *pn;
+ struct fib6_node *fn, *pn, *left, *right;
/* w->root should always be table->tb6_root */
WARN_ON_ONCE(!(w->root->fn_flags & RTN_TL_ROOT));
@@ -1697,20 +1772,22 @@ static int fib6_walk_continue(struct fib6_walker *w)
w->state = FWS_L;
#endif
case FWS_L:
- if (fn->left) {
- w->node = fn->left;
+ left = rcu_dereference_protected(fn->left, 1);
+ if (left) {
+ w->node = left;
w->state = FWS_INIT;
continue;
}
w->state = FWS_R;
case FWS_R:
- if (fn->right) {
- w->node = fn->right;
+ right = rcu_dereference_protected(fn->right, 1);
+ if (right) {
+ w->node = right;
w->state = FWS_INIT;
continue;
}
w->state = FWS_C;
- w->leaf = fn->leaf;
+ w->leaf = rcu_dereference_protected(fn->leaf, 1);
case FWS_C:
if (w->leaf && fn->fn_flags & RTN_RTINFO) {
int err;
@@ -1732,7 +1809,9 @@ static int fib6_walk_continue(struct fib6_walker *w)
case FWS_U:
if (fn == w->root)
return 0;
- pn = fn->parent;
+ pn = rcu_dereference_protected(fn->parent, 1);
+ left = rcu_dereference_protected(pn->left, 1);
+ right = rcu_dereference_protected(pn->right, 1);
w->node = pn;
#ifdef CONFIG_IPV6_SUBTREES
if (FIB6_SUBTREE(pn) == fn) {
@@ -1741,13 +1820,13 @@ static int fib6_walk_continue(struct fib6_walker *w)
continue;
}
#endif
- if (pn->left == fn) {
+ if (left == fn) {
w->state = FWS_R;
continue;
}
- if (pn->right == fn) {
+ if (right == fn) {
w->state = FWS_C;
- w->leaf = w->node->leaf;
+ w->leaf = rcu_dereference_protected(w->node->leaf, 1);
continue;
}
#if RT6_DEBUG >= 2
@@ -1790,7 +1869,7 @@ static int fib6_clean_node(struct fib6_walker *w)
return 0;
}
- for (rt = w->leaf; rt; rt = rt->dst.rt6_next) {
+ for_each_fib6_walker_rt(w) {
res = c->func(rt, c->arg);
if (res < 0) {
w->leaf = rt;
@@ -1850,10 +1929,10 @@ static void __fib6_clean_all(struct net *net,
for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
head = &net->ipv6.fib_table_hash[h];
hlist_for_each_entry_rcu(table, head, tb6_hlist) {
- write_lock_bh(&table->tb6_lock);
+ spin_lock_bh(&table->tb6_lock);
fib6_clean_tree(net, &table->tb6_root,
func, sernum, arg);
- write_unlock_bh(&table->tb6_lock);
+ spin_unlock_bh(&table->tb6_lock);
}
}
rcu_read_unlock();
@@ -1967,7 +2046,8 @@ static int __net_init fib6_net_init(struct net *net)
goto out_fib_table_hash;
net->ipv6.fib6_main_tbl->tb6_id = RT6_TABLE_MAIN;
- net->ipv6.fib6_main_tbl->tb6_root.leaf = net->ipv6.ip6_null_entry;
+ rcu_assign_pointer(net->ipv6.fib6_main_tbl->tb6_root.leaf,
+ net->ipv6.ip6_null_entry);
net->ipv6.fib6_main_tbl->tb6_root.fn_flags =
RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
inet_peer_base_init(&net->ipv6.fib6_main_tbl->tb6_peers);
@@ -1978,7 +2058,8 @@ static int __net_init fib6_net_init(struct net *net)
if (!net->ipv6.fib6_local_tbl)
goto out_fib6_main_tbl;
net->ipv6.fib6_local_tbl->tb6_id = RT6_TABLE_LOCAL;
- net->ipv6.fib6_local_tbl->tb6_root.leaf = net->ipv6.ip6_null_entry;
+ rcu_assign_pointer(net->ipv6.fib6_local_tbl->tb6_root.leaf,
+ net->ipv6.ip6_null_entry);
net->ipv6.fib6_local_tbl->tb6_root.fn_flags =
RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
inet_peer_base_init(&net->ipv6.fib6_local_tbl->tb6_peers);
@@ -2108,7 +2189,9 @@ static int ipv6_route_yield(struct fib6_walker *w)
return 1;
do {
- iter->w.leaf = iter->w.leaf->dst.rt6_next;
+ iter->w.leaf = rcu_dereference_protected(
+ iter->w.leaf->dst.rt6_next,
+ lockdep_is_held(&iter->tbl->tb6_lock));
iter->skip--;
if (!iter->skip && iter->w.leaf)
return 1;
@@ -2173,7 +2256,7 @@ static void *ipv6_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
if (!v)
goto iter_table;
- n = ((struct rt6_info *)v)->dst.rt6_next;
+ n = rcu_dereference(((struct rt6_info *)v)->dst.rt6_next);
if (n) {
++*pos;
return n;
@@ -2181,9 +2264,9 @@ static void *ipv6_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
iter_table:
ipv6_route_check_sernum(iter);
- read_lock(&iter->tbl->tb6_lock);
+ spin_lock_bh(&iter->tbl->tb6_lock);
r = fib6_walk_continue(&iter->w);
- read_unlock(&iter->tbl->tb6_lock);
+ spin_unlock_bh(&iter->tbl->tb6_lock);
if (r > 0) {
if (v)
++*pos;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 24b80f43bbfb..cf44d0994b1e 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -488,7 +488,7 @@ static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
}
/*
- * Route lookup. Any table->tb6_lock is implied.
+ * Route lookup. rcu_read_lock() should be held.
*/
static inline struct rt6_info *rt6_device_match(struct net *net,
@@ -503,7 +503,7 @@ static inline struct rt6_info *rt6_device_match(struct net *net,
if (!oif && ipv6_addr_any(saddr))
goto out;
- for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
+ for (sprt = rt; sprt; sprt = rcu_dereference(sprt->dst.rt6_next)) {
struct net_device *dev = sprt->dst.dev;
if (oif) {
@@ -722,7 +722,7 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
match = NULL;
cont = NULL;
- for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
+ for (rt = rr_head; rt; rt = rcu_dereference(rt->dst.rt6_next)) {
if (rt->rt6i_metric != metric) {
cont = rt;
break;
@@ -731,7 +731,8 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
match = find_match(rt, oif, strict, &mpri, match, do_rr);
}
- for (rt = leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
+ for (rt = leaf; rt && rt != rr_head;
+ rt = rcu_dereference(rt->dst.rt6_next)) {
if (rt->rt6i_metric != metric) {
cont = rt;
break;
@@ -743,7 +744,7 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
if (match || !cont)
return match;
- for (rt = cont; rt; rt = rt->dst.rt6_next)
+ for (rt = cont; rt; rt = rcu_dereference(rt->dst.rt6_next))
match = find_match(rt, oif, strict, &mpri, match, do_rr);
return match;
@@ -752,7 +753,7 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
static struct rt6_info *rt6_select(struct net *net, struct fib6_node *fn,
int oif, int strict)
{
- struct rt6_info *leaf = fn->leaf;
+ struct rt6_info *leaf = rcu_dereference(fn->leaf);
struct rt6_info *match, *rt0;
bool do_rr = false;
int key_plen;
@@ -760,9 +761,9 @@ static struct rt6_info *rt6_select(struct net *net, struct fib6_node *fn,
if (!leaf)
return net->ipv6.ip6_null_entry;
- rt0 = fn->rr_ptr;
+ rt0 = rcu_dereference(fn->rr_ptr);
if (!rt0)
- fn->rr_ptr = rt0 = leaf;
+ rt0 = leaf;
/* Double check to make sure fn is not an intermediate node
* and fn->leaf does not points to its child's leaf
@@ -781,14 +782,19 @@ static struct rt6_info *rt6_select(struct net *net, struct fib6_node *fn,
&do_rr);
if (do_rr) {
- struct rt6_info *next = rt0->dst.rt6_next;
+ struct rt6_info *next = rcu_dereference(rt0->dst.rt6_next);
/* no entries matched; do round-robin */
if (!next || next->rt6i_metric != rt0->rt6i_metric)
next = leaf;
- if (next != rt0)
- fn->rr_ptr = next;
+ if (next != rt0) {
+ spin_lock_bh(&leaf->rt6i_table->tb6_lock);
+ /* make sure next is not being deleted from the tree */
+ if (next->rt6i_node)
+ rcu_assign_pointer(fn->rr_ptr, next);
+ spin_unlock_bh(&leaf->rt6i_table->tb6_lock);
+ }
}
return match ? match : net->ipv6.ip6_null_entry;
@@ -878,13 +884,14 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
struct in6_addr *saddr)
{
- struct fib6_node *pn;
+ struct fib6_node *pn, *sn;
while (1) {
if (fn->fn_flags & RTN_TL_ROOT)
return NULL;
- pn = fn->parent;
- if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
- fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
+ pn = rcu_dereference(fn->parent);
+ sn = FIB6_SUBTREE(pn);
+ if (sn && sn != fn)
+ fn = fib6_lookup(sn, NULL, saddr);
else
fn = pn;
if (fn->fn_flags & RTN_RTINFO)
@@ -916,13 +923,19 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net,
struct rt6_info *rt, *rt_cache;
struct fib6_node *fn;
- read_lock_bh(&table->tb6_lock);
+ rcu_read_lock();
fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
restart:
- rt = fn->leaf;
- rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
- if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
- rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
+ rt = rcu_dereference(fn->leaf);
+ if (!rt) {
+ rt = net->ipv6.ip6_null_entry;
+ } else {
+ rt = rt6_device_match(net, rt, &fl6->saddr,
+ fl6->flowi6_oif, flags);
+ if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
+ rt = rt6_multipath_select(rt, fl6,
+ fl6->flowi6_oif, flags);
+ }
if (rt == net->ipv6.ip6_null_entry) {
fn = fib6_backtrack(fn, &fl6->saddr);
if (fn)
@@ -936,7 +949,7 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net,
if (ip6_hold_safe(net, &rt, true))
dst_use_noref(&rt->dst, jiffies);
- read_unlock_bh(&table->tb6_lock);
+ rcu_read_unlock();
trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
@@ -990,9 +1003,9 @@ static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
struct fib6_table *table;
table = rt->rt6i_table;
- write_lock_bh(&table->tb6_lock);
+ spin_lock_bh(&table->tb6_lock);
err = fib6_add(&table->tb6_root, rt, info, mxc, extack);
- write_unlock_bh(&table->tb6_lock);
+ spin_unlock_bh(&table->tb6_lock);
return err;
}
@@ -1090,7 +1103,7 @@ static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
return pcpu_rt;
}
-/* It should be called with read_lock_bh(&tb6_lock) acquired */
+/* It should be called with rcu_read_lock() acquired */
static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
{
struct rt6_info *pcpu_rt, **p;
@@ -1632,7 +1645,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
if (net->ipv6.devconf_all->forwarding == 0)
strict |= RT6_LOOKUP_F_REACHABLE;
- read_lock_bh(&table->tb6_lock);
+ rcu_read_lock();
fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
saved_fn = fn;
@@ -1662,7 +1675,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
rt = rt_cache;
if (rt == net->ipv6.ip6_null_entry) {
- read_unlock_bh(&table->tb6_lock);
+ rcu_read_unlock();
dst_hold(&rt->dst);
trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
return rt;
@@ -1671,7 +1684,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
dst_use_noref(&rt->dst, jiffies);
rt6_dst_from_metrics_check(rt);
}
- read_unlock_bh(&table->tb6_lock);
+ rcu_read_unlock();
trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
return rt;
} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
@@ -1687,11 +1700,11 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
if (ip6_hold_safe(net, &rt, true)) {
dst_use_noref(&rt->dst, jiffies);
} else {
- read_unlock_bh(&table->tb6_lock);
+ rcu_read_unlock();
uncached_rt = rt;
goto uncached_rt_out;
}
- read_unlock_bh(&table->tb6_lock);
+ rcu_read_unlock();
uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
dst_release(&rt->dst);
@@ -1719,7 +1732,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
pcpu_rt = rt6_get_pcpu_route(rt);
if (pcpu_rt) {
- read_unlock_bh(&table->tb6_lock);
+ rcu_read_unlock();
} else {
/* atomic_inc_not_zero() is needed when using rcu */
if (atomic_inc_not_zero(&rt->rt6i_ref)) {
@@ -1730,12 +1743,12 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
* No dst_hold() on rt is needed because grabbing
* rt->rt6i_ref makes sure rt can't be released.
*/
- read_unlock_bh(&table->tb6_lock);
+ rcu_read_unlock();
pcpu_rt = rt6_make_pcpu_route(rt);
rt6_release(rt);
} else {
/* rt is already removed from tree */
- read_unlock_bh(&table->tb6_lock);
+ rcu_read_unlock();
pcpu_rt = net->ipv6.ip6_null_entry;
dst_hold(&pcpu_rt->dst);
}
@@ -2131,10 +2144,10 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
* routes.
*/
- read_lock_bh(&table->tb6_lock);
+ rcu_read_lock();
fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
restart:
- for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
+ for_each_fib6_node_rt_rcu(fn) {
if (rt6_check_expired(rt))
continue;
if (rt->dst.error)
@@ -2179,7 +2192,7 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
out:
ip6_hold_safe(net, &rt, true);
- read_unlock_bh(&table->tb6_lock);
+ rcu_read_unlock();
trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
return rt;
@@ -2778,9 +2791,9 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
}
table = rt->rt6i_table;
- write_lock_bh(&table->tb6_lock);
+ spin_lock_bh(&table->tb6_lock);
err = fib6_del(rt, info);
- write_unlock_bh(&table->tb6_lock);
+ spin_unlock_bh(&table->tb6_lock);
out:
ip6_rt_put(rt);
@@ -2806,7 +2819,7 @@ static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
if (rt == net->ipv6.ip6_null_entry)
goto out_put;
table = rt->rt6i_table;
- write_lock_bh(&table->tb6_lock);
+ spin_lock_bh(&table->tb6_lock);
if (rt->rt6i_nsiblings && cfg->fc_delete_all_nh) {
struct rt6_info *sibling, *next_sibling;
@@ -2836,7 +2849,7 @@ static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
err = fib6_del(rt, info);
out_unlock:
- write_unlock_bh(&table->tb6_lock);
+ spin_unlock_bh(&table->tb6_lock);
out_put:
ip6_rt_put(rt);
@@ -2861,7 +2874,7 @@ static int ip6_route_del(struct fib6_config *cfg,
return err;
}
- read_lock_bh(&table->tb6_lock);
+ rcu_read_lock();
fn = fib6_locate(&table->tb6_root,
&cfg->fc_dst, cfg->fc_dst_len,
@@ -2869,7 +2882,7 @@ static int ip6_route_del(struct fib6_config *cfg,
!(cfg->fc_flags & RTF_CACHE));
if (fn) {
- for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
+ for_each_fib6_node_rt_rcu(fn) {
if (cfg->fc_flags & RTF_CACHE) {
rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst,
&cfg->fc_src);
@@ -2890,7 +2903,7 @@ static int ip6_route_del(struct fib6_config *cfg,
continue;
if (!dst_hold_safe(&rt->dst))
break;
- read_unlock_bh(&table->tb6_lock);
+ rcu_read_unlock();
/* if gateway was specified only delete the one hop */
if (cfg->fc_flags & RTF_GATEWAY)
@@ -2899,7 +2912,7 @@ static int ip6_route_del(struct fib6_config *cfg,
return __ip6_del_rt_siblings(rt, cfg);
}
}
- read_unlock_bh(&table->tb6_lock);
+ rcu_read_unlock();
return err;
}
@@ -3074,12 +3087,12 @@ static struct rt6_info *rt6_get_route_info(struct net *net,
if (!table)
return NULL;
- read_lock_bh(&table->tb6_lock);
+ rcu_read_lock();
fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true);
if (!fn)
goto out;
- for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
+ for_each_fib6_node_rt_rcu(fn) {
if (rt->dst.dev->ifindex != ifindex)
continue;
if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
@@ -3090,7 +3103,7 @@ static struct rt6_info *rt6_get_route_info(struct net *net,
break;
}
out:
- read_unlock_bh(&table->tb6_lock);
+ rcu_read_unlock();
return rt;
}
@@ -3136,8 +3149,8 @@ struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_dev
if (!table)
return NULL;
- read_lock_bh(&table->tb6_lock);
- for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
+ rcu_read_lock();
+ for_each_fib6_node_rt_rcu(&table->tb6_root) {
if (dev == rt->dst.dev &&
((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
ipv6_addr_equal(&rt->rt6i_gateway, addr))
@@ -3145,7 +3158,7 @@ struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_dev
}
if (rt)
ip6_hold_safe(NULL, &rt, false);
- read_unlock_bh(&table->tb6_lock);
+ rcu_read_unlock();
return rt;
}
@@ -3183,20 +3196,20 @@ static void __rt6_purge_dflt_routers(struct fib6_table *table)
struct rt6_info *rt;
restart:
- read_lock_bh(&table->tb6_lock);
- for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
+ rcu_read_lock();
+ for_each_fib6_node_rt_rcu(&table->tb6_root) {
if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
(!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
if (dst_hold_safe(&rt->dst)) {
- read_unlock_bh(&table->tb6_lock);
+ rcu_read_unlock();
ip6_del_rt(rt);
} else {
- read_unlock_bh(&table->tb6_lock);
+ rcu_read_unlock();
}
goto restart;
}
}
- read_unlock_bh(&table->tb6_lock);
+ rcu_read_unlock();
table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
}
--
2.14.2.920.gcf0c67979c-goog
^ permalink raw reply related
* [PATCH net-next 16/16] ipv6: take care of rt6_stats
From: Wei Wang @ 2017-10-06 19:06 UTC (permalink / raw)
To: David Miller, netdev; +Cc: Eric Dumazet, Martin KaFai Lau, Wei Wang
In-Reply-To: <20171006190611.110633-16-tracywwnj@gmail.com>
From: Wei Wang <weiwan@google.com>
Currently, most of the rt6_stats are not hooked up correctly. As the
last part of this patch series, hook up all existing rt6_stats and add
one new stat fib_rt_uncache to indicate the number of routes in the
uncached list.
For details of the stats, please refer to the comments added in
include/net/ip6_fib.h.
Note: fib_rt_alloc and fib_rt_uncache are not guaranteed to be modified
under a lock. So atomic_t is used for them.
Signed-off-by: Wei Wang <weiwan@google.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
include/net/ip6_fib.h | 15 +++++++++------
net/ipv6/ip6_fib.c | 42 ++++++++++++++++++++++++------------------
net/ipv6/route.c | 16 ++++++++++++++--
3 files changed, 47 insertions(+), 26 deletions(-)
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 0b438b9bcb10..10c913816032 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -297,12 +297,15 @@ struct fib6_walker {
};
struct rt6_statistics {
- __u32 fib_nodes;
- __u32 fib_route_nodes;
- __u32 fib_rt_alloc; /* permanent routes */
- __u32 fib_rt_entries; /* rt entries in table */
- __u32 fib_rt_cache; /* cache routes */
- __u32 fib_discarded_routes;
+ __u32 fib_nodes; /* all fib6 nodes */
+ __u32 fib_route_nodes; /* intermediate nodes */
+ __u32 fib_rt_entries; /* rt entries in fib table */
+ __u32 fib_rt_cache; /* cached rt entries in exception table */
+ __u32 fib_discarded_routes; /* total number of routes delete */
+
+ /* The following stats are not protected by any lock */
+ atomic_t fib_rt_alloc; /* total number of routes alloced */
+ atomic_t fib_rt_uncache; /* rt entries in uncached list */
};
#define RTN_TL_ROOT 0x0001
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 3f95908b39c3..52a29ba32928 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -149,18 +149,21 @@ static __be32 addr_bit_set(const void *token, int fn_bit)
addr[fn_bit >> 5];
}
-static struct fib6_node *node_alloc(void)
+static struct fib6_node *node_alloc(struct net *net)
{
struct fib6_node *fn;
fn = kmem_cache_zalloc(fib6_node_kmem, GFP_ATOMIC);
+ if (fn)
+ net->ipv6.rt6_stats->fib_nodes++;
return fn;
}
-static void node_free_immediate(struct fib6_node *fn)
+static void node_free_immediate(struct net *net, struct fib6_node *fn)
{
kmem_cache_free(fib6_node_kmem, fn);
+ net->ipv6.rt6_stats->fib_nodes--;
}
static void node_free_rcu(struct rcu_head *head)
@@ -170,9 +173,10 @@ static void node_free_rcu(struct rcu_head *head)
kmem_cache_free(fib6_node_kmem, fn);
}
-static void node_free(struct fib6_node *fn)
+static void node_free(struct net *net, struct fib6_node *fn)
{
call_rcu(&fn->rcu, node_free_rcu);
+ net->ipv6.rt6_stats->fib_nodes--;
}
void rt6_free_pcpu(struct rt6_info *non_pcpu_rt)
@@ -583,7 +587,8 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
* node.
*/
-static struct fib6_node *fib6_add_1(struct fib6_table *table,
+static struct fib6_node *fib6_add_1(struct net *net,
+ struct fib6_table *table,
struct fib6_node *root,
struct in6_addr *addr, int plen,
int offset, int allow_create,
@@ -675,7 +680,7 @@ static struct fib6_node *fib6_add_1(struct fib6_table *table,
* Create new leaf node without children.
*/
- ln = node_alloc();
+ ln = node_alloc(net);
if (!ln)
return ERR_PTR(-ENOMEM);
@@ -716,14 +721,14 @@ static struct fib6_node *fib6_add_1(struct fib6_table *table,
* (new leaf node)[ln] (old node)[fn]
*/
if (plen > bit) {
- in = node_alloc();
- ln = node_alloc();
+ in = node_alloc(net);
+ ln = node_alloc(net);
if (!in || !ln) {
if (in)
- node_free_immediate(in);
+ node_free_immediate(net, in);
if (ln)
- node_free_immediate(ln);
+ node_free_immediate(net, ln);
return ERR_PTR(-ENOMEM);
}
@@ -768,7 +773,7 @@ static struct fib6_node *fib6_add_1(struct fib6_table *table,
* (old node)[fn] NULL
*/
- ln = node_alloc();
+ ln = node_alloc(net);
if (!ln)
return ERR_PTR(-ENOMEM);
@@ -1065,6 +1070,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
fn->rr_ptr = NULL;
rt6_release(iter);
nsiblings--;
+ info->nl_net->ipv6.rt6_stats->fib_rt_entries--;
} else {
ins = &iter->dst.rt6_next;
}
@@ -1140,7 +1146,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
if (!allow_create && !replace_required)
pr_warn("RTM_NEWROUTE with no NLM_F_CREATE or NLM_F_REPLACE\n");
- fn = fib6_add_1(table, root,
+ fn = fib6_add_1(info->nl_net, table, root,
&rt->rt6i_dst.addr, rt->rt6i_dst.plen,
offsetof(struct rt6_info, rt6i_dst), allow_create,
replace_required, extack);
@@ -1170,7 +1176,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
*/
/* Create subtree root node */
- sfn = node_alloc();
+ sfn = node_alloc(info->nl_net);
if (!sfn)
goto failure;
@@ -1181,8 +1187,8 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
/* Now add the first leaf node to new subtree */
- sn = fib6_add_1(table, sfn, &rt->rt6i_src.addr,
- rt->rt6i_src.plen,
+ sn = fib6_add_1(info->nl_net, table, sfn,
+ &rt->rt6i_src.addr, rt->rt6i_src.plen,
offsetof(struct rt6_info, rt6i_src),
allow_create, replace_required, extack);
@@ -1191,7 +1197,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
root, and then (in failure) stale node
in main tree.
*/
- node_free_immediate(sfn);
+ node_free_immediate(info->nl_net, sfn);
err = PTR_ERR(sn);
goto failure;
}
@@ -1200,8 +1206,8 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
rcu_assign_pointer(sfn->parent, fn);
rcu_assign_pointer(fn->subtree, sfn);
} else {
- sn = fib6_add_1(table, FIB6_SUBTREE(fn), &rt->rt6i_src.addr,
- rt->rt6i_src.plen,
+ sn = fib6_add_1(info->nl_net, table, FIB6_SUBTREE(fn),
+ &rt->rt6i_src.addr, rt->rt6i_src.plen,
offsetof(struct rt6_info, rt6i_src),
allow_create, replace_required, extack);
@@ -1609,7 +1615,7 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
}
read_unlock(&net->ipv6.fib6_walker_lock);
- node_free(fn);
+ node_free(net, fn);
if (pn->fn_flags & RTN_RTINFO || FIB6_SUBTREE(pn))
return pn;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index cf44d0994b1e..399d1bceec4a 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -143,9 +143,11 @@ static void rt6_uncached_list_del(struct rt6_info *rt)
{
if (!list_empty(&rt->rt6i_uncached)) {
struct uncached_list *ul = rt->rt6i_uncached_list;
+ struct net *net = dev_net(rt->dst.dev);
spin_lock_bh(&ul->lock);
list_del(&rt->rt6i_uncached);
+ atomic_dec(&net->ipv6.rt6_stats->fib_rt_uncache);
spin_unlock_bh(&ul->lock);
}
}
@@ -359,8 +361,10 @@ static struct rt6_info *__ip6_dst_alloc(struct net *net,
struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
1, DST_OBSOLETE_FORCE_CHK, flags);
- if (rt)
+ if (rt) {
rt6_info_init(rt);
+ atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
+ }
return rt;
}
@@ -1156,6 +1160,8 @@ static DEFINE_SPINLOCK(rt6_exception_lock);
static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
struct rt6_exception *rt6_ex)
{
+ struct net *net = dev_net(rt6_ex->rt6i->dst.dev);
+
if (!bucket || !rt6_ex)
return;
rt6_ex->rt6i->rt6i_node = NULL;
@@ -1164,6 +1170,7 @@ static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
kfree_rcu(rt6_ex, rcu);
WARN_ON_ONCE(!bucket->depth);
bucket->depth--;
+ net->ipv6.rt6_stats->fib_rt_cache--;
}
/* Remove oldest rt6_ex in bucket and free the memory
@@ -1270,6 +1277,7 @@ __rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
static int rt6_insert_exception(struct rt6_info *nrt,
struct rt6_info *ort)
{
+ struct net *net = dev_net(ort->dst.dev);
struct rt6_exception_bucket *bucket;
struct in6_addr *src_key = NULL;
struct rt6_exception *rt6_ex;
@@ -1339,6 +1347,7 @@ static int rt6_insert_exception(struct rt6_info *nrt,
nrt->rt6i_node = ort->rt6i_node;
hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
bucket->depth++;
+ net->ipv6.rt6_stats->fib_rt_cache++;
if (bucket->depth > FIB6_MAX_DEPTH)
rt6_exception_remove_oldest(bucket);
@@ -1714,6 +1723,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
* No need for another dst_hold()
*/
rt6_uncached_list_add(uncached_rt);
+ atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
} else {
uncached_rt = net->ipv6.ip6_null_entry;
dst_hold(&uncached_rt->dst);
@@ -1894,6 +1904,7 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori
DST_OBSOLETE_NONE, 0);
if (rt) {
rt6_info_init(rt);
+ atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
new = &rt->dst;
new->__use = 1;
@@ -2341,6 +2352,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
* do proper release of the net_device
*/
rt6_uncached_list_add(rt);
+ atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
@@ -4422,7 +4434,7 @@ static int rt6_stats_seq_show(struct seq_file *seq, void *v)
seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
net->ipv6.rt6_stats->fib_nodes,
net->ipv6.rt6_stats->fib_route_nodes,
- net->ipv6.rt6_stats->fib_rt_alloc,
+ atomic_read(&net->ipv6.rt6_stats->fib_rt_alloc),
net->ipv6.rt6_stats->fib_rt_entries,
net->ipv6.rt6_stats->fib_rt_cache,
dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
--
2.14.2.920.gcf0c67979c-goog
^ permalink raw reply related
* Re: [PATCH v2] isdn/gigaset: Convert timers to use timer_setup()
From: Kees Cook @ 2017-10-06 19:39 UTC (permalink / raw)
To: Paul Bolle
Cc: Karsten Keil, David S. Miller, Johan Hovold, LKML,
gigaset307x-common, Network Development
In-Reply-To: <1507316433.6960.19.camel@tiscali.nl>
On Fri, Oct 6, 2017 at 12:00 PM, Paul Bolle <pebolle@tiscali.nl> wrote:
> On Thu, 2017-10-05 at 12:31 -0700, Kees Cook wrote:
>> --- a/drivers/isdn/gigaset/bas-gigaset.c
>> +++ b/drivers/isdn/gigaset/bas-gigaset.c
>
>> -static void cmd_in_timeout(unsigned long data)
>> +static void cmd_in_timeout(struct timer_list *t)
>> {
>> - struct cardstate *cs = (struct cardstate *) data;
>> - struct bas_cardstate *ucs = cs->hw.bas;
>> + struct bas_cardstate *ucs = from_timer(ucs, t, timer_cmd_in);
>> + struct urb *urb = ucs->urb_int_in;
>> + struct cardstate *cs = urb->context;
>
> This makes me nervous. Are you sure urb->context points to a struct cardstate
> here and in the other two places this patch changes?
>
> Anyhow, I'd like to have some time to do my review. So what's your timeframe
> here? I do hope I have at least a few weeks. (In other words: I hope gigaset
> isn't the only driver where the ability to use random pointers in these timer
> callbacks is removed.)
Hi!
I'm in no rush for any specific change. There are about 900 call sites
I'm making my way through, about 2/3rd are pretty trivial, and the
less obvious is what I've started sending out now, since I expect some
will need some more careful review.
Thanks for taking a look at it!
-Kees
--
Kees Cook
Pixel Security
^ permalink raw reply
* [PATCH net-next 1/2] bnxt_en: don't consider building bnxt_tc.o if option not enabled
From: Jonathan Toppins @ 2017-10-06 19:48 UTC (permalink / raw)
To: michael.chan; +Cc: netdev, open list
Instead of zeroing out bnxt_tc.c with a #ifdef foo, instead don't compile
the file when the option is not enabled. Now make and the preprocessor do
not have to waste time compiling a no-op.
Signed-off-by: Jonathan Toppins <jtoppins@redhat.com>
---
drivers/net/ethernet/broadcom/bnxt/Makefile | 3 ++-
drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c | 5 -----
2 files changed, 2 insertions(+), 6 deletions(-)
diff --git a/drivers/net/ethernet/broadcom/bnxt/Makefile b/drivers/net/ethernet/broadcom/bnxt/Makefile
index 4f0cb8e1ffc0..457201f409a7 100644
--- a/drivers/net/ethernet/broadcom/bnxt/Makefile
+++ b/drivers/net/ethernet/broadcom/bnxt/Makefile
@@ -1,3 +1,4 @@
obj-$(CONFIG_BNXT) += bnxt_en.o
-bnxt_en-y := bnxt.o bnxt_sriov.o bnxt_ethtool.o bnxt_dcb.o bnxt_ulp.o bnxt_xdp.o bnxt_vfr.o bnxt_tc.o
+bnxt_en-y := bnxt.o bnxt_sriov.o bnxt_ethtool.o bnxt_dcb.o bnxt_ulp.o bnxt_xdp.o bnxt_vfr.o
+bnxt_en-$(CONFIG_BNXT_FLOWER_OFFLOAD) += bnxt_tc.o
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
index 7dd3d131043a..4730c048ed9b 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
@@ -23,8 +23,6 @@
#include "bnxt_tc.h"
#include "bnxt_vfr.h"
-#ifdef CONFIG_BNXT_FLOWER_OFFLOAD
-
#define BNXT_FID_INVALID 0xffff
#define VLAN_TCI(vid, prio) ((vid) | ((prio) << VLAN_PRIO_SHIFT))
@@ -833,6 +831,3 @@ void bnxt_shutdown_tc(struct bnxt *bp)
rhashtable_destroy(&tc_info->flow_table);
rhashtable_destroy(&tc_info->l2_table);
}
-
-#else
-#endif
--
2.13.6
^ permalink raw reply related
* [PATCH net-next 2/2] bnxt_en: tc: only the function prototypes need to be wrapped in #ifdef
From: Jonathan Toppins @ 2017-10-06 19:48 UTC (permalink / raw)
To: michael.chan; +Cc: netdev, open list
In-Reply-To: <1a4d96f368f199bba0425012f50d55d634484cce.1507319311.git.jtoppins@redhat.com>
There is no reason to wrap the data structures inside the ifdef.
Signed-off-by: Jonathan Toppins <jtoppins@redhat.com>
---
drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h
index 6c4c1ed279ef..6e771e9eed51 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h
@@ -10,8 +10,6 @@
#ifndef BNXT_TC_H
#define BNXT_TC_H
-#ifdef CONFIG_BNXT_FLOWER_OFFLOAD
-
/* Structs used for storing the filter/actions of the TC cmd.
*/
struct bnxt_tc_l2_key {
@@ -133,6 +131,8 @@ struct bnxt_tc_flow_node {
struct rcu_head rcu;
};
+#if IS_ENABLED(CONFIG_BNXT_FLOWER_OFFLOAD)
+
int bnxt_tc_setup_flower(struct bnxt *bp, u16 src_fid,
struct tc_cls_flower_offload *cls_flower);
int bnxt_init_tc(struct bnxt *bp);
--
2.13.6
^ permalink raw reply related
* Re: [PATCH net] net: enable interface alias removal via rtnl
From: Oliver Hartkopp @ 2017-10-06 20:10 UTC (permalink / raw)
To: David Ahern, Nicolas Dichtel, davem
Cc: netdev, Oliver Hartkopp, Stephen Hemminger
In-Reply-To: <b6c121f1-c585-6fa4-8dc9-8847ea5ee20c@gmail.com>
On 10/06/2017 08:18 PM, David Ahern wrote:
> On 10/5/17 4:19 AM, Nicolas Dichtel wrote:
>> IFLA_IFALIAS is defined as NLA_STRING. It means that the minimal length of
>> the attribute is 1 ("\0"). However, to remove an alias, the attribute
>> length must be 0 (see dev_set_alias()).
>
> why not add a check in dev_set_alias that if len is 1 and the 1
> character is '\0' it means remove the alias?
Yes. That looks indeed better than changing NLA_STRING to NLA_BINARY
which does not really hit the point.
Nicolas, can you send an updated patch picking up David's suggestion?
Tnx & best regards,
Oliver
>
>>
>> Let's define the type to NLA_BINARY, so that the alias can be removed.
>
> that changes the uapi
>
^ permalink raw reply
* [PATCH v1 RFC 0/7] Modify KSZ9477 DSA driver in preparation to add other KSZ switch drivers
From: Tristram.Ha @ 2017-10-06 20:32 UTC (permalink / raw)
To: Andrew Lunn, Florian Fainelli, Pavel Machek, Ruediger Schmitt
Cc: Tristram Ha, muvarov, nathan.leigh.conrad, vivien.didelot,
UNGLinuxDriver, netdev, linux-kernel
From: Tristram Ha <Tristram.Ha@microchip.com>
This series of patches is to modify the original KSZ9477 DSA driver so
that other KSZ switch drivers can be added and use the common code.
There are several steps to accomplish this achievement. First is to
rename some function names with a prefix to indicate chip specific
function. Second is to move common code into header that can be shared.
Last is to modify tag_ksz.c so that it can handle many tail tag formats
used by different KSZ switch drivers.
ksz_common.c will contain the common code used by all KSZ switch drivers.
ksz9477.c will contain KSZ9477 code from the original ksz_common.c.
ksz9477_spi.c is renamed from ksz_spi.c.
ksz9477_reg.h is renamed from ksz_9477_reg.h.
ksz_common.h is added to provide common code access to KSZ switch
drivers.
ksz_spi.h is added to provide common SPI access functions to KSZ SPI
drivers.
v1
- Each patch in the set is self-contained
- Use ksz9477 prefix to indicate KSZ9477 specific code
- Simplify MIB counter reading code
- Switch driver code is not accessed from tag_ksz.c
Tristram Ha (7):
Replace license with GPL.
Clean up code according to patch check suggestions.
Rename some functions with ksz9477 prefix to separate chip specific
code from common code.
Rename ksz_spi.c to ksz9477_spi.c and update Kconfig in preparation to
add more KSZ switch drivers.
Break KSZ9477 DSA driver into two files in preparation to add more KSZ
switch drivers. Add common functions in ksz_common.h so that other
KSZ switch drivers can access code in ksz_common.c. Add ksz_spi.h
for common functions used by KSZ switch SPI drivers.
Add MIB counter reading support. Rename ksz_9477_reg.h to
ksz9477_reg.h for consistency as the product name is always KSZ####.
Header file ksz_priv.h no longer contains any chip specific data.
Modify tag_ksz.c so that tail tag code can be used by other KSZ switch
drivers.
drivers/net/dsa/microchip/Kconfig | 14 +-
drivers/net/dsa/microchip/Makefile | 4 +-
drivers/net/dsa/microchip/ksz9477.c | 1376 ++++++++++++++++++++
.../microchip/{ksz_9477_reg.h => ksz9477_reg.h} | 23 +-
drivers/net/dsa/microchip/ksz9477_spi.c | 188 +++
drivers/net/dsa/microchip/ksz_common.c | 1210 ++++-------------
drivers/net/dsa/microchip/ksz_common.h | 234 ++++
drivers/net/dsa/microchip/ksz_priv.h | 258 ++--
drivers/net/dsa/microchip/ksz_spi.c | 216 ---
drivers/net/dsa/microchip/ksz_spi.h | 82 ++
include/net/dsa.h | 2 +-
net/dsa/Kconfig | 4 +
net/dsa/dsa.c | 4 +-
net/dsa/dsa_priv.h | 2 +-
net/dsa/tag_ksz.c | 107 +-
15 files changed, 2331 insertions(+), 1393 deletions(-)
create mode 100644 drivers/net/dsa/microchip/ksz9477.c
rename drivers/net/dsa/microchip/{ksz_9477_reg.h => ksz9477_reg.h} (98%)
create mode 100644 drivers/net/dsa/microchip/ksz9477_spi.c
create mode 100644 drivers/net/dsa/microchip/ksz_common.h
delete mode 100644 drivers/net/dsa/microchip/ksz_spi.c
create mode 100644 drivers/net/dsa/microchip/ksz_spi.h
--
1.9.1
^ permalink raw reply
* [PATCH v1 RFC 1/7] Replace license with GPL
From: Tristram.Ha @ 2017-10-06 20:32 UTC (permalink / raw)
To: Andrew Lunn, Florian Fainelli, Pavel Machek, Ruediger Schmitt
Cc: Tristram Ha, muvarov, nathan.leigh.conrad, vivien.didelot,
UNGLinuxDriver, netdev, linux-kernel
In-Reply-To: <1507321985-15097-1-git-send-email-Tristram.Ha@microchip.com>
From: Tristram Ha <Tristram.Ha@microchip.com>
Replace license with GPL.
Signed-off-by: Tristram Ha <Tristram.Ha@microchip.com>
---
drivers/net/dsa/microchip/ksz_9477_reg.h | 23 ++++++++++++-----------
drivers/net/dsa/microchip/ksz_common.c | 23 ++++++++++++-----------
drivers/net/dsa/microchip/ksz_priv.h | 23 ++++++++++++-----------
drivers/net/dsa/microchip/ksz_spi.c | 23 ++++++++++++-----------
4 files changed, 48 insertions(+), 44 deletions(-)
diff --git a/drivers/net/dsa/microchip/ksz_9477_reg.h b/drivers/net/dsa/microchip/ksz_9477_reg.h
index 6aa6752..26a0e4b 100644
--- a/drivers/net/dsa/microchip/ksz_9477_reg.h
+++ b/drivers/net/dsa/microchip/ksz_9477_reg.h
@@ -1,19 +1,20 @@
/*
* Microchip KSZ9477 register definitions
*
- * Copyright (C) 2017
+ * Copyright (C) 2017 Microchip Technology Inc.
*
- * Permission to use, copy, modify, and/or distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
*
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __KSZ9477_REGS_H
diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c
index 56cd6d3..685dafd 100644
--- a/drivers/net/dsa/microchip/ksz_common.c
+++ b/drivers/net/dsa/microchip/ksz_common.c
@@ -1,19 +1,20 @@
/*
* Microchip switch driver main logic
*
- * Copyright (C) 2017
+ * Copyright (C) 2017 Microchip Technology Inc.
*
- * Permission to use, copy, modify, and/or distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
*
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/delay.h>
diff --git a/drivers/net/dsa/microchip/ksz_priv.h b/drivers/net/dsa/microchip/ksz_priv.h
index 2a98dbd..d461468 100644
--- a/drivers/net/dsa/microchip/ksz_priv.h
+++ b/drivers/net/dsa/microchip/ksz_priv.h
@@ -1,19 +1,20 @@
/*
* Microchip KSZ series switch common definitions
*
- * Copyright (C) 2017
+ * Copyright (C) 2017 Microchip Technology Inc.
*
- * Permission to use, copy, modify, and/or distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
*
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __KSZ_PRIV_H
diff --git a/drivers/net/dsa/microchip/ksz_spi.c b/drivers/net/dsa/microchip/ksz_spi.c
index c519469..d0d6920 100644
--- a/drivers/net/dsa/microchip/ksz_spi.c
+++ b/drivers/net/dsa/microchip/ksz_spi.c
@@ -1,19 +1,20 @@
/*
* Microchip KSZ series register access through SPI
*
- * Copyright (C) 2017
+ * Copyright (C) 2017 Microchip Technology Inc.
*
- * Permission to use, copy, modify, and/or distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
*
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include <asm/unaligned.h>
--
1.9.1
^ permalink raw reply related
* [PATCH v1 RFC 2/7] Clean up code according to patch check suggestions
From: Tristram.Ha @ 2017-10-06 20:33 UTC (permalink / raw)
To: Andrew Lunn, Florian Fainelli, Pavel Machek, Ruediger Schmitt
Cc: Tristram Ha, muvarov, nathan.leigh.conrad, vivien.didelot,
UNGLinuxDriver, netdev, linux-kernel
In-Reply-To: <1507321985-15097-1-git-send-email-Tristram.Ha@microchip.com>
From: Tristram Ha <Tristram.Ha@microchip.com>
Clean up code according to patch check suggestions.
Signed-off-by: Tristram Ha <Tristram.Ha@microchip.com>
---
drivers/net/dsa/microchip/ksz_common.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c
index 685dafd..d72aad7 100644
--- a/drivers/net/dsa/microchip/ksz_common.c
+++ b/drivers/net/dsa/microchip/ksz_common.c
@@ -898,9 +898,9 @@ static void ksz_port_mdb_add(struct dsa_switch *ds, int port,
if (static_table[0] & ALU_V_STATIC_VALID) {
/* check this has same vid & mac address */
- if (((static_table[2] >> ALU_V_FID_S) == (mdb->vid)) &&
+ if (((static_table[2] >> ALU_V_FID_S) == mdb->vid) &&
((static_table[2] & ALU_V_MAC_ADDR_HI) == mac_hi) &&
- (static_table[3] == mac_lo)) {
+ static_table[3] == mac_lo) {
/* found matching one */
break;
}
@@ -971,9 +971,9 @@ static int ksz_port_mdb_del(struct dsa_switch *ds, int port,
if (static_table[0] & ALU_V_STATIC_VALID) {
/* check this has same vid & mac address */
- if (((static_table[2] >> ALU_V_FID_S) == (mdb->vid)) &&
+ if (((static_table[2] >> ALU_V_FID_S) == mdb->vid) &&
((static_table[2] & ALU_V_MAC_ADDR_HI) == mac_hi) &&
- (static_table[3] == mac_lo)) {
+ static_table[3] == mac_lo) {
/* found matching one */
break;
}
--
1.9.1
^ permalink raw reply related
* [PATCH v1 RFC 3/7] Rename some functions with ksz9477 prefix
From: Tristram.Ha @ 2017-10-06 20:33 UTC (permalink / raw)
To: Andrew Lunn, Florian Fainelli, Pavel Machek, Ruediger Schmitt
Cc: Tristram Ha, muvarov, nathan.leigh.conrad, vivien.didelot,
UNGLinuxDriver, netdev, linux-kernel
In-Reply-To: <1507321985-15097-1-git-send-email-Tristram.Ha@microchip.com>
From: Tristram Ha <Tristram.Ha@microchip.com>
Rename some functions with ksz9477 prefix to separate chip specific code
from common code.
Signed-off-by: Tristram Ha <Tristram.Ha@microchip.com>
---
drivers/net/dsa/microchip/ksz_common.c | 116 +++++++++++++++++----------------
1 file changed, 59 insertions(+), 57 deletions(-)
diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c
index d72aad7..7a0d10f 100644
--- a/drivers/net/dsa/microchip/ksz_common.c
+++ b/drivers/net/dsa/microchip/ksz_common.c
@@ -265,9 +265,8 @@ static int wait_alu_sta_ready(struct ksz_device *dev, u32 waiton, int timeout)
return 0;
}
-static int ksz_reset_switch(struct dsa_switch *ds)
+static int ksz9477_reset_switch(struct ksz_device *dev)
{
- struct ksz_device *dev = ds->priv;
u8 data8;
u16 data16;
u32 data32;
@@ -300,7 +299,7 @@ static int ksz_reset_switch(struct dsa_switch *ds)
return 0;
}
-static void port_setup(struct ksz_device *dev, int port, bool cpu_port)
+static void ksz9477_port_setup(struct ksz_device *dev, int port, bool cpu_port)
{
u8 data8;
u16 data16;
@@ -346,7 +345,7 @@ static void port_setup(struct ksz_device *dev, int port, bool cpu_port)
ksz_pread16(dev, port, REG_PORT_PHY_INT_ENABLE, &data16);
}
-static void ksz_config_cpu_port(struct dsa_switch *ds)
+static void ksz9477_config_cpu_port(struct dsa_switch *ds)
{
struct ksz_device *dev = ds->priv;
int i;
@@ -358,12 +357,12 @@ static void ksz_config_cpu_port(struct dsa_switch *ds)
dev->cpu_port = i;
/* enable cpu port */
- port_setup(dev, i, true);
+ ksz9477_port_setup(dev, i, true);
}
}
}
-static int ksz_setup(struct dsa_switch *ds)
+static int ksz9477_setup(struct dsa_switch *ds)
{
struct ksz_device *dev = ds->priv;
int ret = 0;
@@ -373,7 +372,7 @@ static int ksz_setup(struct dsa_switch *ds)
if (!dev->vlan_cache)
return -ENOMEM;
- ret = ksz_reset_switch(ds);
+ ret = ksz9477_reset_switch(dev);
if (ret) {
dev_err(ds->dev, "failed to reset switch\n");
return ret;
@@ -382,7 +381,7 @@ static int ksz_setup(struct dsa_switch *ds)
/* accept packet up to 2000bytes */
ksz_cfg(dev, REG_SW_MAC_CTRL_1, SW_LEGAL_PACKET_DISABLE, true);
- ksz_config_cpu_port(ds);
+ ksz9477_config_cpu_port(ds);
ksz_cfg(dev, REG_SW_MAC_CTRL_1, MULTICAST_STORM_DISABLE, true);
@@ -395,12 +394,12 @@ static int ksz_setup(struct dsa_switch *ds)
return 0;
}
-static enum dsa_tag_protocol ksz_get_tag_protocol(struct dsa_switch *ds)
+static enum dsa_tag_protocol ksz9477_get_tag_protocol(struct dsa_switch *ds)
{
return DSA_TAG_PROTO_KSZ;
}
-static int ksz_phy_read16(struct dsa_switch *ds, int addr, int reg)
+static int ksz9477_phy_read16(struct dsa_switch *ds, int addr, int reg)
{
struct ksz_device *dev = ds->priv;
u16 val = 0;
@@ -410,7 +409,8 @@ static int ksz_phy_read16(struct dsa_switch *ds, int addr, int reg)
return val;
}
-static int ksz_phy_write16(struct dsa_switch *ds, int addr, int reg, u16 val)
+static int ksz9477_phy_write16(struct dsa_switch *ds, int addr, int reg,
+ u16 val)
{
struct ksz_device *dev = ds->priv;
@@ -425,7 +425,7 @@ static int ksz_enable_port(struct dsa_switch *ds, int port,
struct ksz_device *dev = ds->priv;
/* setup slave port */
- port_setup(dev, port, false);
+ ksz9477_port_setup(dev, port, false);
return 0;
}
@@ -444,7 +444,7 @@ static int ksz_sset_count(struct dsa_switch *ds)
return TOTAL_SWITCH_COUNTER_NUM;
}
-static void ksz_get_strings(struct dsa_switch *ds, int port, uint8_t *buf)
+static void ksz9477_get_strings(struct dsa_switch *ds, int port, uint8_t *buf)
{
int i;
@@ -494,7 +494,8 @@ static void ksz_get_ethtool_stats(struct dsa_switch *ds, int port,
mutex_unlock(&dev->stats_mutex);
}
-static void ksz_port_stp_state_set(struct dsa_switch *ds, int port, u8 state)
+static void ksz9477_port_stp_state_set(struct dsa_switch *ds, int port,
+ u8 state)
{
struct ksz_device *dev = ds->priv;
u8 data;
@@ -539,7 +540,8 @@ static void ksz_port_fast_age(struct dsa_switch *ds, int port)
ksz_write8(dev, REG_SW_LUE_CTRL_1, data8);
}
-static int ksz_port_vlan_filtering(struct dsa_switch *ds, int port, bool flag)
+static int ksz9477_port_vlan_filtering(struct dsa_switch *ds, int port,
+ bool flag)
{
struct ksz_device *dev = ds->priv;
@@ -567,9 +569,9 @@ static int ksz_port_vlan_prepare(struct dsa_switch *ds, int port,
return 0;
}
-static void ksz_port_vlan_add(struct dsa_switch *ds, int port,
- const struct switchdev_obj_port_vlan *vlan,
- struct switchdev_trans *trans)
+static void ksz9477_port_vlan_add(struct dsa_switch *ds, int port,
+ const struct switchdev_obj_port_vlan *vlan,
+ struct switchdev_trans *trans)
{
struct ksz_device *dev = ds->priv;
u32 vlan_table[3];
@@ -602,8 +604,8 @@ static void ksz_port_vlan_add(struct dsa_switch *ds, int port,
}
}
-static int ksz_port_vlan_del(struct dsa_switch *ds, int port,
- const struct switchdev_obj_port_vlan *vlan)
+static int ksz9477_port_vlan_del(struct dsa_switch *ds, int port,
+ const struct switchdev_obj_port_vlan *vlan)
{
struct ksz_device *dev = ds->priv;
bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED;
@@ -658,8 +660,8 @@ struct alu_struct {
u8 mac[ETH_ALEN];
};
-static int ksz_port_fdb_add(struct dsa_switch *ds, int port,
- const unsigned char *addr, u16 vid)
+static int ksz9477_port_fdb_add(struct dsa_switch *ds, int port,
+ const unsigned char *addr, u16 vid)
{
struct ksz_device *dev = ds->priv;
u32 alu_table[4];
@@ -715,8 +717,8 @@ static int ksz_port_fdb_add(struct dsa_switch *ds, int port,
return ret;
}
-static int ksz_port_fdb_del(struct dsa_switch *ds, int port,
- const unsigned char *addr, u16 vid)
+static int ksz9477_port_fdb_del(struct dsa_switch *ds, int port,
+ const unsigned char *addr, u16 vid)
{
struct ksz_device *dev = ds->priv;
u32 alu_table[4];
@@ -805,8 +807,8 @@ static void convert_alu(struct alu_struct *alu, u32 *alu_table)
alu->mac[5] = alu_table[3] & 0xFF;
}
-static int ksz_port_fdb_dump(struct dsa_switch *ds, int port,
- dsa_fdb_dump_cb_t *cb, void *data)
+static int ksz9477_port_fdb_dump(struct dsa_switch *ds, int port,
+ dsa_fdb_dump_cb_t *cb, void *data)
{
struct ksz_device *dev = ds->priv;
int ret = 0;
@@ -865,9 +867,9 @@ static int ksz_port_mdb_prepare(struct dsa_switch *ds, int port,
return 0;
}
-static void ksz_port_mdb_add(struct dsa_switch *ds, int port,
- const struct switchdev_obj_port_mdb *mdb,
- struct switchdev_trans *trans)
+static void ksz9477_port_mdb_add(struct dsa_switch *ds, int port,
+ const struct switchdev_obj_port_mdb *mdb,
+ struct switchdev_trans *trans)
{
struct ksz_device *dev = ds->priv;
u32 static_table[4];
@@ -936,8 +938,8 @@ static void ksz_port_mdb_add(struct dsa_switch *ds, int port,
mutex_unlock(&dev->alu_mutex);
}
-static int ksz_port_mdb_del(struct dsa_switch *ds, int port,
- const struct switchdev_obj_port_mdb *mdb)
+static int ksz9477_port_mdb_del(struct dsa_switch *ds, int port,
+ const struct switchdev_obj_port_mdb *mdb)
{
struct ksz_device *dev = ds->priv;
u32 static_table[4];
@@ -1013,9 +1015,9 @@ static int ksz_port_mdb_del(struct dsa_switch *ds, int port,
return ret;
}
-static int ksz_port_mirror_add(struct dsa_switch *ds, int port,
- struct dsa_mall_mirror_tc_entry *mirror,
- bool ingress)
+static int ksz9477_port_mirror_add(struct dsa_switch *ds, int port,
+ struct dsa_mall_mirror_tc_entry *mirror,
+ bool ingress)
{
struct ksz_device *dev = ds->priv;
@@ -1035,8 +1037,8 @@ static int ksz_port_mirror_add(struct dsa_switch *ds, int port,
return 0;
}
-static void ksz_port_mirror_del(struct dsa_switch *ds, int port,
- struct dsa_mall_mirror_tc_entry *mirror)
+static void ksz9477_port_mirror_del(struct dsa_switch *ds, int port,
+ struct dsa_mall_mirror_tc_entry *mirror)
{
struct ksz_device *dev = ds->priv;
u8 data;
@@ -1054,29 +1056,29 @@ static void ksz_port_mirror_del(struct dsa_switch *ds, int port,
}
static const struct dsa_switch_ops ksz_switch_ops = {
- .get_tag_protocol = ksz_get_tag_protocol,
- .setup = ksz_setup,
- .phy_read = ksz_phy_read16,
- .phy_write = ksz_phy_write16,
+ .get_tag_protocol = ksz9477_get_tag_protocol,
+ .setup = ksz9477_setup,
+ .phy_read = ksz9477_phy_read16,
+ .phy_write = ksz9477_phy_write16,
.port_enable = ksz_enable_port,
.port_disable = ksz_disable_port,
- .get_strings = ksz_get_strings,
+ .get_strings = ksz9477_get_strings,
.get_ethtool_stats = ksz_get_ethtool_stats,
.get_sset_count = ksz_sset_count,
- .port_stp_state_set = ksz_port_stp_state_set,
+ .port_stp_state_set = ksz9477_port_stp_state_set,
.port_fast_age = ksz_port_fast_age,
- .port_vlan_filtering = ksz_port_vlan_filtering,
+ .port_vlan_filtering = ksz9477_port_vlan_filtering,
.port_vlan_prepare = ksz_port_vlan_prepare,
- .port_vlan_add = ksz_port_vlan_add,
- .port_vlan_del = ksz_port_vlan_del,
- .port_fdb_dump = ksz_port_fdb_dump,
- .port_fdb_add = ksz_port_fdb_add,
- .port_fdb_del = ksz_port_fdb_del,
+ .port_vlan_add = ksz9477_port_vlan_add,
+ .port_vlan_del = ksz9477_port_vlan_del,
+ .port_fdb_dump = ksz9477_port_fdb_dump,
+ .port_fdb_add = ksz9477_port_fdb_add,
+ .port_fdb_del = ksz9477_port_fdb_del,
.port_mdb_prepare = ksz_port_mdb_prepare,
- .port_mdb_add = ksz_port_mdb_add,
- .port_mdb_del = ksz_port_mdb_del,
- .port_mirror_add = ksz_port_mirror_add,
- .port_mirror_del = ksz_port_mirror_del,
+ .port_mdb_add = ksz9477_port_mdb_add,
+ .port_mdb_del = ksz9477_port_mdb_del,
+ .port_mirror_add = ksz9477_port_mirror_add,
+ .port_mirror_del = ksz9477_port_mirror_del,
};
struct ksz_chip_data {
@@ -1089,7 +1091,7 @@ struct ksz_chip_data {
int port_cnt;
};
-static const struct ksz_chip_data ksz_switch_chips[] = {
+static const struct ksz_chip_data ksz9477_switch_chips[] = {
{
.chip_id = 0x00947700,
.dev_name = "KSZ9477",
@@ -1101,7 +1103,7 @@ struct ksz_chip_data {
},
};
-static int ksz_switch_init(struct ksz_device *dev)
+static int ksz9477_switch_init(struct ksz_device *dev)
{
int i;
@@ -1112,8 +1114,8 @@ static int ksz_switch_init(struct ksz_device *dev)
dev->ds->ops = &ksz_switch_ops;
- for (i = 0; i < ARRAY_SIZE(ksz_switch_chips); i++) {
- const struct ksz_chip_data *chip = &ksz_switch_chips[i];
+ for (i = 0; i < ARRAY_SIZE(ksz9477_switch_chips); i++) {
+ const struct ksz_chip_data *chip = &ksz9477_switch_chips[i];
if (dev->chip_id == chip->chip_id) {
dev->name = chip->dev_name;
@@ -1197,7 +1199,7 @@ int ksz_switch_register(struct ksz_device *dev)
if (ksz_switch_detect(dev))
return -EINVAL;
- ret = ksz_switch_init(dev);
+ ret = ksz9477_switch_init(dev);
if (ret)
return ret;
--
1.9.1
^ permalink raw reply related
* [PATCH v1 RFC 4/7] Rename ksz_spi.c to ksz9477_spi.c
From: Tristram.Ha @ 2017-10-06 20:33 UTC (permalink / raw)
To: Andrew Lunn, Florian Fainelli, Pavel Machek, Ruediger Schmitt
Cc: Tristram Ha, muvarov, nathan.leigh.conrad, vivien.didelot,
UNGLinuxDriver, netdev, linux-kernel
In-Reply-To: <1507321985-15097-1-git-send-email-Tristram.Ha@microchip.com>
From: Tristram Ha <Tristram.Ha@microchip.com>
Rename ksz_spi.c to ksz9477_spi.c and update Kconfig in preparation to add
more KSZ switch drivers.
Signed-off-by: Tristram Ha <Tristram.Ha@microchip.com>
---
drivers/net/dsa/microchip/Kconfig | 12 ++++++------
drivers/net/dsa/microchip/Makefile | 4 ++--
drivers/net/dsa/microchip/{ksz_spi.c => ksz9477_spi.c} | 0
3 files changed, 8 insertions(+), 8 deletions(-)
rename drivers/net/dsa/microchip/{ksz_spi.c => ksz9477_spi.c} (100%)
diff --git a/drivers/net/dsa/microchip/Kconfig b/drivers/net/dsa/microchip/Kconfig
index a8b8f59..5a8660d 100644
--- a/drivers/net/dsa/microchip/Kconfig
+++ b/drivers/net/dsa/microchip/Kconfig
@@ -1,12 +1,12 @@
-menuconfig MICROCHIP_KSZ
- tristate "Microchip KSZ series switch support"
+menuconfig MICROCHIP_KSZ9477
+ tristate "Microchip KSZ9477 series switch support"
depends on NET_DSA
select NET_DSA_TAG_KSZ
help
- This driver adds support for Microchip KSZ switch chips.
+ This driver adds support for Microchip KSZ9477 switch chips.
-config MICROCHIP_KSZ_SPI_DRIVER
- tristate "KSZ series SPI connected switch driver"
- depends on MICROCHIP_KSZ && SPI
+config MICROCHIP_KSZ9477_SPI_DRIVER
+ tristate "KSZ9477 series SPI connected switch driver"
+ depends on MICROCHIP_KSZ9477 && SPI
help
Select to enable support for registering switches configured through SPI.
diff --git a/drivers/net/dsa/microchip/Makefile b/drivers/net/dsa/microchip/Makefile
index ed335e2..5b6325b 100644
--- a/drivers/net/dsa/microchip/Makefile
+++ b/drivers/net/dsa/microchip/Makefile
@@ -1,2 +1,2 @@
-obj-$(CONFIG_MICROCHIP_KSZ) += ksz_common.o
-obj-$(CONFIG_MICROCHIP_KSZ_SPI_DRIVER) += ksz_spi.o
+obj-$(CONFIG_MICROCHIP_KSZ9477) += ksz_common.o
+obj-$(CONFIG_MICROCHIP_KSZ9477_SPI_DRIVER) += ksz9477_spi.o
diff --git a/drivers/net/dsa/microchip/ksz_spi.c b/drivers/net/dsa/microchip/ksz9477_spi.c
similarity index 100%
rename from drivers/net/dsa/microchip/ksz_spi.c
rename to drivers/net/dsa/microchip/ksz9477_spi.c
--
1.9.1
^ permalink raw reply related
* [PATCH v1 RFC 5/7] Break KSZ9477 DSA driver into two files
From: Tristram.Ha @ 2017-10-06 20:33 UTC (permalink / raw)
To: Andrew Lunn, Florian Fainelli, Pavel Machek, Ruediger Schmitt
Cc: Tristram Ha, muvarov, nathan.leigh.conrad, vivien.didelot,
UNGLinuxDriver, netdev, linux-kernel
In-Reply-To: <1507321985-15097-1-git-send-email-Tristram.Ha@microchip.com>
From: Tristram Ha <Tristram.Ha@microchip.com>
Break KSZ9477 DSA driver into two files in preparation to add more KSZ
switch drivers.
Add common functions in ksz_common.h so that other KSZ switch drivers
can access code in ksz_common.c.
Add ksz_spi.h for common functions used by KSZ switch SPI drivers.
Signed-off-by: Tristram Ha <Tristram.Ha@microchip.com>
---
drivers/net/dsa/microchip/Makefile | 2 +-
drivers/net/dsa/microchip/ksz9477.c | 1328 +++++++++++++++++++++++++++++++
drivers/net/dsa/microchip/ksz9477_spi.c | 143 ++--
drivers/net/dsa/microchip/ksz_common.c | 1133 +++-----------------------
drivers/net/dsa/microchip/ksz_common.h | 230 ++++++
drivers/net/dsa/microchip/ksz_priv.h | 229 +++---
drivers/net/dsa/microchip/ksz_spi.h | 82 ++
7 files changed, 1926 insertions(+), 1221 deletions(-)
create mode 100644 drivers/net/dsa/microchip/ksz9477.c
create mode 100644 drivers/net/dsa/microchip/ksz_common.h
create mode 100644 drivers/net/dsa/microchip/ksz_spi.h
diff --git a/drivers/net/dsa/microchip/Makefile b/drivers/net/dsa/microchip/Makefile
index 5b6325b..13dd8f0 100644
--- a/drivers/net/dsa/microchip/Makefile
+++ b/drivers/net/dsa/microchip/Makefile
@@ -1,2 +1,2 @@
-obj-$(CONFIG_MICROCHIP_KSZ9477) += ksz_common.o
+obj-$(CONFIG_MICROCHIP_KSZ9477) += ksz9477.o ksz_common.o
obj-$(CONFIG_MICROCHIP_KSZ9477_SPI_DRIVER) += ksz9477_spi.o
diff --git a/drivers/net/dsa/microchip/ksz9477.c b/drivers/net/dsa/microchip/ksz9477.c
new file mode 100644
index 0000000..214d380
--- /dev/null
+++ b/drivers/net/dsa/microchip/ksz9477.c
@@ -0,0 +1,1328 @@
+/*
+ * Microchip KSZ9477 switch driver main logic
+ *
+ * Copyright (C) 2017 Microchip Technology Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/delay.h>
+#include <linux/export.h>
+#include <linux/gpio.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_data/microchip-ksz.h>
+#include <linux/phy.h>
+#include <linux/etherdevice.h>
+#include <linux/if_bridge.h>
+#include <net/dsa.h>
+#include <net/switchdev.h>
+
+#include "ksz_priv.h"
+#include "ksz_common.h"
+#include "ksz_9477_reg.h"
+
+static const struct {
+ int index;
+ char string[ETH_GSTRING_LEN];
+} mib_names[TOTAL_SWITCH_COUNTER_NUM] = {
+ { 0x00, "rx_hi" },
+ { 0x01, "rx_undersize" },
+ { 0x02, "rx_fragments" },
+ { 0x03, "rx_oversize" },
+ { 0x04, "rx_jabbers" },
+ { 0x05, "rx_symbol_err" },
+ { 0x06, "rx_crc_err" },
+ { 0x07, "rx_align_err" },
+ { 0x08, "rx_mac_ctrl" },
+ { 0x09, "rx_pause" },
+ { 0x0A, "rx_bcast" },
+ { 0x0B, "rx_mcast" },
+ { 0x0C, "rx_ucast" },
+ { 0x0D, "rx_64_or_less" },
+ { 0x0E, "rx_65_127" },
+ { 0x0F, "rx_128_255" },
+ { 0x10, "rx_256_511" },
+ { 0x11, "rx_512_1023" },
+ { 0x12, "rx_1024_1522" },
+ { 0x13, "rx_1523_2000" },
+ { 0x14, "rx_2001" },
+ { 0x15, "tx_hi" },
+ { 0x16, "tx_late_col" },
+ { 0x17, "tx_pause" },
+ { 0x18, "tx_bcast" },
+ { 0x19, "tx_mcast" },
+ { 0x1A, "tx_ucast" },
+ { 0x1B, "tx_deferred" },
+ { 0x1C, "tx_total_col" },
+ { 0x1D, "tx_exc_col" },
+ { 0x1E, "tx_single_col" },
+ { 0x1F, "tx_mult_col" },
+ { 0x80, "rx_total" },
+ { 0x81, "tx_total" },
+ { 0x82, "rx_discards" },
+ { 0x83, "tx_discards" },
+};
+
+static void ksz_cfg32(struct ksz_device *dev, u32 addr, u32 bits, bool set)
+{
+ u32 data;
+
+ ksz_read32(dev, addr, &data);
+ if (set)
+ data |= bits;
+ else
+ data &= ~bits;
+ ksz_write32(dev, addr, data);
+}
+
+static void ksz_port_cfg32(struct ksz_device *dev, int port, int offset,
+ u32 bits, bool set)
+{
+ u32 addr;
+ u32 data;
+
+ addr = PORT_CTRL_ADDR(port, offset);
+ ksz_read32(dev, addr, &data);
+
+ if (set)
+ data |= bits;
+ else
+ data &= ~bits;
+
+ ksz_write32(dev, addr, data);
+}
+
+static int wait_vlan_ctrl_ready(struct ksz_device *dev, u32 waiton, int timeout)
+{
+ u8 data;
+
+ do {
+ ksz_read8(dev, REG_SW_VLAN_CTRL, &data);
+ if (!(data & waiton))
+ break;
+ usleep_range(1, 10);
+ } while (timeout-- > 0);
+
+ if (timeout <= 0)
+ return -ETIMEDOUT;
+
+ return 0;
+}
+
+static int get_vlan_table(struct dsa_switch *ds, u16 vid, u32 *vlan_table)
+{
+ struct ksz_device *dev = ds->priv;
+ int ret;
+
+ mutex_lock(&dev->vlan_mutex);
+
+ ksz_write16(dev, REG_SW_VLAN_ENTRY_INDEX__2, vid & VLAN_INDEX_M);
+ ksz_write8(dev, REG_SW_VLAN_CTRL, VLAN_READ | VLAN_START);
+
+ /* wait to be cleared */
+ ret = wait_vlan_ctrl_ready(dev, VLAN_START, 1000);
+ if (ret < 0) {
+ dev_dbg(dev->dev, "Failed to read vlan table\n");
+ goto exit;
+ }
+
+ ksz_read32(dev, REG_SW_VLAN_ENTRY__4, &vlan_table[0]);
+ ksz_read32(dev, REG_SW_VLAN_ENTRY_UNTAG__4, &vlan_table[1]);
+ ksz_read32(dev, REG_SW_VLAN_ENTRY_PORTS__4, &vlan_table[2]);
+
+ ksz_write8(dev, REG_SW_VLAN_CTRL, 0);
+
+exit:
+ mutex_unlock(&dev->vlan_mutex);
+
+ return ret;
+}
+
+static int set_vlan_table(struct dsa_switch *ds, u16 vid, u32 *vlan_table)
+{
+ struct ksz_device *dev = ds->priv;
+ int ret;
+
+ mutex_lock(&dev->vlan_mutex);
+
+ ksz_write32(dev, REG_SW_VLAN_ENTRY__4, vlan_table[0]);
+ ksz_write32(dev, REG_SW_VLAN_ENTRY_UNTAG__4, vlan_table[1]);
+ ksz_write32(dev, REG_SW_VLAN_ENTRY_PORTS__4, vlan_table[2]);
+
+ ksz_write16(dev, REG_SW_VLAN_ENTRY_INDEX__2, vid & VLAN_INDEX_M);
+ ksz_write8(dev, REG_SW_VLAN_CTRL, VLAN_START | VLAN_WRITE);
+
+ /* wait to be cleared */
+ ret = wait_vlan_ctrl_ready(dev, VLAN_START, 1000);
+ if (ret < 0) {
+ dev_dbg(dev->dev, "Failed to write vlan table\n");
+ goto exit;
+ }
+
+ ksz_write8(dev, REG_SW_VLAN_CTRL, 0);
+
+ /* update vlan cache table */
+ dev->vlan_cache[vid].table[0] = vlan_table[0];
+ dev->vlan_cache[vid].table[1] = vlan_table[1];
+ dev->vlan_cache[vid].table[2] = vlan_table[2];
+
+exit:
+ mutex_unlock(&dev->vlan_mutex);
+
+ return ret;
+}
+
+static void read_table(struct dsa_switch *ds, u32 *table)
+{
+ struct ksz_device *dev = ds->priv;
+
+ ksz_read32(dev, REG_SW_ALU_VAL_A, &table[0]);
+ ksz_read32(dev, REG_SW_ALU_VAL_B, &table[1]);
+ ksz_read32(dev, REG_SW_ALU_VAL_C, &table[2]);
+ ksz_read32(dev, REG_SW_ALU_VAL_D, &table[3]);
+}
+
+static void write_table(struct dsa_switch *ds, u32 *table)
+{
+ struct ksz_device *dev = ds->priv;
+
+ ksz_write32(dev, REG_SW_ALU_VAL_A, table[0]);
+ ksz_write32(dev, REG_SW_ALU_VAL_B, table[1]);
+ ksz_write32(dev, REG_SW_ALU_VAL_C, table[2]);
+ ksz_write32(dev, REG_SW_ALU_VAL_D, table[3]);
+}
+
+static int wait_alu_ready(struct ksz_device *dev, u32 waiton, int timeout)
+{
+ u32 data;
+
+ do {
+ ksz_read32(dev, REG_SW_ALU_CTRL__4, &data);
+ if (!(data & waiton))
+ break;
+ usleep_range(1, 10);
+ } while (timeout-- > 0);
+
+ if (timeout <= 0)
+ return -ETIMEDOUT;
+
+ return 0;
+}
+
+static int wait_alu_sta_ready(struct ksz_device *dev, u32 waiton, int timeout)
+{
+ u32 data;
+
+ do {
+ ksz_read32(dev, REG_SW_ALU_STAT_CTRL__4, &data);
+ if (!(data & waiton))
+ break;
+ usleep_range(1, 10);
+ } while (timeout-- > 0);
+
+ if (timeout <= 0)
+ return -ETIMEDOUT;
+
+ return 0;
+}
+
+static int ksz9477_reset_switch(struct ksz_device *dev)
+{
+ u8 data8;
+ u16 data16;
+ u32 data32;
+
+ /* reset switch */
+ ksz_cfg(dev, REG_SW_OPERATION, SW_RESET, true);
+
+ /* turn off SPI DO Edge select */
+ ksz_read8(dev, REG_SW_GLOBAL_SERIAL_CTRL_0, &data8);
+ data8 &= ~SPI_AUTO_EDGE_DETECTION;
+ ksz_write8(dev, REG_SW_GLOBAL_SERIAL_CTRL_0, data8);
+
+ /* default configuration */
+ ksz_read8(dev, REG_SW_LUE_CTRL_1, &data8);
+ data8 = SW_AGING_ENABLE | SW_LINK_AUTO_AGING |
+ SW_SRC_ADDR_FILTER | SW_FLUSH_STP_TABLE | SW_FLUSH_MSTP_TABLE;
+ ksz_write8(dev, REG_SW_LUE_CTRL_1, data8);
+
+ /* disable interrupts */
+ ksz_write32(dev, REG_SW_INT_MASK__4, SWITCH_INT_MASK);
+ ksz_write32(dev, REG_SW_PORT_INT_MASK__4, 0x7F);
+ ksz_read32(dev, REG_SW_PORT_INT_STATUS__4, &data32);
+
+ /* set broadcast storm protection 10% rate */
+ ksz_read16(dev, REG_SW_MAC_CTRL_2, &data16);
+ data16 &= ~BROADCAST_STORM_RATE;
+ data16 |= (BROADCAST_STORM_VALUE * BROADCAST_STORM_PROT_RATE) / 100;
+ ksz_write16(dev, REG_SW_MAC_CTRL_2, data16);
+
+ return 0;
+}
+
+static enum dsa_tag_protocol ksz9477_get_tag_protocol(struct dsa_switch *ds)
+{
+ return DSA_TAG_PROTO_KSZ;
+}
+
+static int ksz9477_phy_read16(struct dsa_switch *ds, int addr, int reg)
+{
+ struct ksz_device *dev = ds->priv;
+ u16 val = 0xffff;
+
+ /* No real PHY after this. Simulate the PHY.
+ * A fixed PHY can be setup in the device tree, but this function is
+ * still called for that port during initialization.
+ * For RGMII PHY there is no way to access it so the fixed PHY should
+ * be used. For SGMII PHY the supporting code will be added later.
+ */
+ if (addr >= dev->phy_port_cnt) {
+ struct ksz_port *p = &dev->ports[addr];
+
+ switch (reg) {
+ case MII_BMCR:
+ val = 0x1140;
+ break;
+ case MII_BMSR:
+ val = 0x796d;
+ break;
+ case MII_PHYSID1:
+ val = 0x0022;
+ break;
+ case MII_PHYSID2:
+ val = 0x1631;
+ break;
+ case MII_ADVERTISE:
+ val = 0x05e1;
+ break;
+ case MII_LPA:
+ val = 0xc5e1;
+ break;
+ case MII_CTRL1000:
+ val = 0x0700;
+ break;
+ case MII_STAT1000:
+ if (p->speed == SPEED_1000)
+ val = 0x3800;
+ else
+ val = 0;
+ break;
+ }
+ } else {
+ ksz_pread16(dev, addr, 0x100 + (reg << 1), &val);
+ }
+
+ return val;
+}
+
+static int ksz9477_phy_write16(struct dsa_switch *ds, int addr, int reg,
+ u16 val)
+{
+ struct ksz_device *dev = ds->priv;
+
+ /* No real PHY after this. */
+ if (addr >= dev->phy_port_cnt)
+ return 0;
+ ksz_pwrite16(dev, addr, 0x100 + (reg << 1), val);
+
+ return 0;
+}
+
+static void ksz9477_get_strings(struct dsa_switch *ds, int port, uint8_t *buf)
+{
+ int i;
+
+ for (i = 0; i < TOTAL_SWITCH_COUNTER_NUM; i++) {
+ memcpy(buf + i * ETH_GSTRING_LEN, mib_names[i].string,
+ ETH_GSTRING_LEN);
+ }
+}
+
+static void ksz_get_ethtool_stats(struct dsa_switch *ds, int port,
+ uint64_t *buf)
+{
+ struct ksz_device *dev = ds->priv;
+ int i;
+ u32 data;
+ int timeout;
+
+ mutex_lock(&dev->stats_mutex);
+
+ for (i = 0; i < TOTAL_SWITCH_COUNTER_NUM; i++) {
+ data = MIB_COUNTER_READ;
+ data |= ((mib_names[i].index & 0xFF) << MIB_COUNTER_INDEX_S);
+ ksz_pwrite32(dev, port, REG_PORT_MIB_CTRL_STAT__4, data);
+
+ timeout = 1000;
+ do {
+ ksz_pread32(dev, port, REG_PORT_MIB_CTRL_STAT__4,
+ &data);
+ usleep_range(1, 10);
+ if (!(data & MIB_COUNTER_READ))
+ break;
+ } while (timeout-- > 0);
+
+ /* failed to read MIB. get out of loop */
+ if (!timeout) {
+ dev_dbg(dev->dev, "Failed to get MIB\n");
+ break;
+ }
+
+ /* count resets upon read */
+ ksz_pread32(dev, port, REG_PORT_MIB_DATA, &data);
+
+ dev->mib_value[i] += (uint64_t)data;
+ buf[i] = dev->mib_value[i];
+ }
+
+ mutex_unlock(&dev->stats_mutex);
+}
+
+static void ksz9477_cfg_port_member(struct ksz_device *dev, int port,
+ u8 member)
+{
+ ksz_pwrite32(dev, port, REG_PORT_VLAN_MEMBERSHIP__4, member);
+ dev->ports[port].member = member;
+}
+
+static void ksz9477_port_stp_state_set(struct dsa_switch *ds, int port,
+ u8 state)
+{
+ struct ksz_device *dev = ds->priv;
+ struct ksz_port *p = &dev->ports[port];
+ u8 data;
+ int member = -1;
+
+ ksz_pread8(dev, port, P_STP_CTRL, &data);
+ data &= ~(PORT_TX_ENABLE | PORT_RX_ENABLE | PORT_LEARN_DISABLE);
+
+ switch (state) {
+ case BR_STATE_DISABLED:
+ data |= PORT_LEARN_DISABLE;
+ if (port != dev->cpu_port)
+ member = 0;
+ break;
+ case BR_STATE_LISTENING:
+ data |= (PORT_RX_ENABLE | PORT_LEARN_DISABLE);
+ if (port != dev->cpu_port &&
+ p->stp_state == BR_STATE_DISABLED)
+ member = dev->host_mask | p->vid_member;
+ break;
+ case BR_STATE_LEARNING:
+ data |= PORT_RX_ENABLE;
+ break;
+ case BR_STATE_FORWARDING:
+ data |= (PORT_TX_ENABLE | PORT_RX_ENABLE);
+
+ /* This function is also used internally. */
+ if (port == dev->cpu_port)
+ break;
+
+ /* Port is a member of a bridge. */
+ if (dev->br_member & (1 << port)) {
+ dev->member |= (1 << port);
+ member = dev->member;
+ } else {
+ member = dev->host_mask | p->vid_member;
+ }
+ break;
+ case BR_STATE_BLOCKING:
+ data |= PORT_LEARN_DISABLE;
+ if (port != dev->cpu_port &&
+ p->stp_state == BR_STATE_DISABLED)
+ member = dev->host_mask | p->vid_member;
+ break;
+ default:
+ dev_err(ds->dev, "invalid STP state: %d\n", state);
+ return;
+ }
+
+ ksz_pwrite8(dev, port, P_STP_CTRL, data);
+ if (data & PORT_RX_ENABLE)
+ dev->rx_ports |= (1 << port);
+ else
+ dev->rx_ports &= ~(1 << port);
+ if (data & PORT_TX_ENABLE)
+ dev->tx_ports |= (1 << port);
+ else
+ dev->tx_ports &= ~(1 << port);
+
+ /* Port membership may share register with STP state. */
+ if (member >= 0 && member != p->member)
+ ksz9477_cfg_port_member(dev, port, (u8)member);
+
+ /* Check if forwarding needs to be updated. */
+ if (state != BR_STATE_FORWARDING) {
+ if (dev->br_member & (1 << port))
+ dev->member &= ~(1 << port);
+ }
+
+ /* When topology has changed the function ksz_update_port_member
+ * should be called to modify port forwarding behavior. However
+ * as the offload_fwd_mark indication cannot be reported here
+ * the switch forwarding function is not enabled.
+ */
+}
+
+static void ksz9477_flush_dyn_mac_table(struct ksz_device *dev, int port)
+{
+ u8 data;
+
+ ksz_read8(dev, REG_SW_LUE_CTRL_2, &data);
+ data &= ~(SW_FLUSH_OPTION_M << SW_FLUSH_OPTION_S);
+ data |= (SW_FLUSH_OPTION_DYN_MAC << SW_FLUSH_OPTION_S);
+ ksz_write8(dev, REG_SW_LUE_CTRL_2, data);
+ if (port < dev->mib_port_cnt) {
+ /* flush individual port */
+ ksz_pread8(dev, port, P_STP_CTRL, &data);
+ if (!(data & PORT_LEARN_DISABLE))
+ ksz_pwrite8(dev, port, P_STP_CTRL,
+ data | PORT_LEARN_DISABLE);
+ ksz_cfg(dev, S_FLUSH_TABLE_CTRL, SW_FLUSH_DYN_MAC_TABLE, true);
+ ksz_pwrite8(dev, port, P_STP_CTRL, data);
+ } else {
+ /* flush all */
+ ksz_cfg(dev, S_FLUSH_TABLE_CTRL, SW_FLUSH_STP_TABLE, true);
+ }
+}
+
+static int ksz9477_port_vlan_filtering(struct dsa_switch *ds, int port,
+ bool flag)
+{
+ struct ksz_device *dev = ds->priv;
+
+ if (flag) {
+ ksz_port_cfg(dev, port, REG_PORT_LUE_CTRL,
+ PORT_VLAN_LOOKUP_VID_0, true);
+ ksz_cfg32(dev, REG_SW_QM_CTRL__4, UNICAST_VLAN_BOUNDARY, true);
+ ksz_cfg(dev, REG_SW_LUE_CTRL_0, SW_VLAN_ENABLE, true);
+ } else {
+ ksz_cfg(dev, REG_SW_LUE_CTRL_0, SW_VLAN_ENABLE, false);
+ ksz_cfg32(dev, REG_SW_QM_CTRL__4, UNICAST_VLAN_BOUNDARY, false);
+ ksz_port_cfg(dev, port, REG_PORT_LUE_CTRL,
+ PORT_VLAN_LOOKUP_VID_0, false);
+ }
+
+ return 0;
+}
+
+static void ksz9477_port_vlan_add(struct dsa_switch *ds, int port,
+ const struct switchdev_obj_port_vlan *vlan,
+ struct switchdev_trans *trans)
+{
+ struct ksz_device *dev = ds->priv;
+ u32 vlan_table[3];
+ u16 vid;
+ bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED;
+
+ for (vid = vlan->vid_begin; vid <= vlan->vid_end; vid++) {
+ if (get_vlan_table(ds, vid, vlan_table)) {
+ dev_dbg(dev->dev, "Failed to get vlan table\n");
+ return;
+ }
+
+ vlan_table[0] = VLAN_VALID | (vid & VLAN_FID_M);
+ if (untagged)
+ vlan_table[1] |= BIT(port);
+ else
+ vlan_table[1] &= ~BIT(port);
+ vlan_table[1] &= ~(BIT(dev->cpu_port));
+
+ vlan_table[2] |= BIT(port) | BIT(dev->cpu_port);
+
+ if (set_vlan_table(ds, vid, vlan_table)) {
+ dev_dbg(dev->dev, "Failed to set vlan table\n");
+ return;
+ }
+
+ /* change PVID */
+ if (vlan->flags & BRIDGE_VLAN_INFO_PVID)
+ ksz_pwrite16(dev, port, REG_PORT_DEFAULT_VID, vid);
+ }
+}
+
+static int ksz9477_port_vlan_del(struct dsa_switch *ds, int port,
+ const struct switchdev_obj_port_vlan *vlan)
+{
+ struct ksz_device *dev = ds->priv;
+ bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED;
+ u32 vlan_table[3];
+ u16 vid;
+ u16 pvid;
+
+ ksz_pread16(dev, port, REG_PORT_DEFAULT_VID, &pvid);
+ pvid = pvid & 0xFFF;
+
+ for (vid = vlan->vid_begin; vid <= vlan->vid_end; vid++) {
+ if (get_vlan_table(ds, vid, vlan_table)) {
+ dev_dbg(dev->dev, "Failed to get vlan table\n");
+ return -ETIMEDOUT;
+ }
+
+ vlan_table[2] &= ~BIT(port);
+
+ if (pvid == vid)
+ pvid = 1;
+
+ if (untagged)
+ vlan_table[1] &= ~BIT(port);
+
+ if (set_vlan_table(ds, vid, vlan_table)) {
+ dev_dbg(dev->dev, "Failed to set vlan table\n");
+ return -ETIMEDOUT;
+ }
+ }
+
+ ksz_pwrite16(dev, port, REG_PORT_DEFAULT_VID, pvid);
+
+ return 0;
+}
+
+static int ksz9477_port_fdb_add(struct dsa_switch *ds, int port,
+ const unsigned char *addr, u16 vid)
+{
+ struct ksz_device *dev = ds->priv;
+ u32 alu_table[4];
+ u32 data;
+ int ret = 0;
+
+ mutex_lock(&dev->alu_mutex);
+
+ /* find any entry with mac & vid */
+ data = vid << ALU_FID_INDEX_S;
+ data |= ((addr[0] << 8) | addr[1]);
+ ksz_write32(dev, REG_SW_ALU_INDEX_0, data);
+
+ data = ((addr[2] << 24) | (addr[3] << 16));
+ data |= ((addr[4] << 8) | addr[5]);
+ ksz_write32(dev, REG_SW_ALU_INDEX_1, data);
+
+ /* start read operation */
+ ksz_write32(dev, REG_SW_ALU_CTRL__4, ALU_READ | ALU_START);
+
+ /* wait to be finished */
+ ret = wait_alu_ready(dev, ALU_START, 1000);
+ if (ret < 0) {
+ dev_dbg(dev->dev, "Failed to read ALU\n");
+ goto exit;
+ }
+
+ /* read ALU entry */
+ read_table(ds, alu_table);
+
+ /* update ALU entry */
+ alu_table[0] = ALU_V_STATIC_VALID;
+ alu_table[1] |= BIT(port);
+ if (vid)
+ alu_table[1] |= ALU_V_USE_FID;
+ alu_table[2] = (vid << ALU_V_FID_S);
+ alu_table[2] |= ((addr[0] << 8) | addr[1]);
+ alu_table[3] = ((addr[2] << 24) | (addr[3] << 16));
+ alu_table[3] |= ((addr[4] << 8) | addr[5]);
+
+ write_table(ds, alu_table);
+
+ ksz_write32(dev, REG_SW_ALU_CTRL__4, ALU_WRITE | ALU_START);
+
+ /* wait to be finished */
+ ret = wait_alu_ready(dev, ALU_START, 1000);
+ if (ret < 0)
+ dev_dbg(dev->dev, "Failed to write ALU\n");
+
+exit:
+ mutex_unlock(&dev->alu_mutex);
+
+ return ret;
+}
+
+static int ksz9477_port_fdb_del(struct dsa_switch *ds, int port,
+ const unsigned char *addr, u16 vid)
+{
+ struct ksz_device *dev = ds->priv;
+ u32 alu_table[4];
+ u32 data;
+ int ret = 0;
+
+ mutex_lock(&dev->alu_mutex);
+
+ /* read any entry with mac & vid */
+ data = vid << ALU_FID_INDEX_S;
+ data |= ((addr[0] << 8) | addr[1]);
+ ksz_write32(dev, REG_SW_ALU_INDEX_0, data);
+
+ data = ((addr[2] << 24) | (addr[3] << 16));
+ data |= ((addr[4] << 8) | addr[5]);
+ ksz_write32(dev, REG_SW_ALU_INDEX_1, data);
+
+ /* start read operation */
+ ksz_write32(dev, REG_SW_ALU_CTRL__4, ALU_READ | ALU_START);
+
+ /* wait to be finished */
+ ret = wait_alu_ready(dev, ALU_START, 1000);
+ if (ret < 0) {
+ dev_dbg(dev->dev, "Failed to read ALU\n");
+ goto exit;
+ }
+
+ ksz_read32(dev, REG_SW_ALU_VAL_A, &alu_table[0]);
+ if (alu_table[0] & ALU_V_STATIC_VALID) {
+ ksz_read32(dev, REG_SW_ALU_VAL_B, &alu_table[1]);
+ ksz_read32(dev, REG_SW_ALU_VAL_C, &alu_table[2]);
+ ksz_read32(dev, REG_SW_ALU_VAL_D, &alu_table[3]);
+
+ /* clear forwarding port */
+ alu_table[2] &= ~BIT(port);
+
+ /* if there is no port to forward, clear table */
+ if ((alu_table[2] & ALU_V_PORT_MAP) == 0) {
+ alu_table[0] = 0;
+ alu_table[1] = 0;
+ alu_table[2] = 0;
+ alu_table[3] = 0;
+ }
+ } else {
+ alu_table[0] = 0;
+ alu_table[1] = 0;
+ alu_table[2] = 0;
+ alu_table[3] = 0;
+ }
+
+ write_table(ds, alu_table);
+
+ ksz_write32(dev, REG_SW_ALU_CTRL__4, ALU_WRITE | ALU_START);
+
+ /* wait to be finished */
+ ret = wait_alu_ready(dev, ALU_START, 1000);
+ if (ret < 0)
+ dev_dbg(dev->dev, "Failed to write ALU\n");
+
+exit:
+ mutex_unlock(&dev->alu_mutex);
+
+ return ret;
+}
+
+static void convert_alu(struct alu_struct *alu, u32 *alu_table)
+{
+ alu->is_static = !!(alu_table[0] & ALU_V_STATIC_VALID);
+ alu->is_src_filter = !!(alu_table[0] & ALU_V_SRC_FILTER);
+ alu->is_dst_filter = !!(alu_table[0] & ALU_V_DST_FILTER);
+ alu->prio_age = (alu_table[0] >> ALU_V_PRIO_AGE_CNT_S) &
+ ALU_V_PRIO_AGE_CNT_M;
+ alu->mstp = alu_table[0] & ALU_V_MSTP_M;
+
+ alu->is_override = !!(alu_table[1] & ALU_V_OVERRIDE);
+ alu->is_use_fid = !!(alu_table[1] & ALU_V_USE_FID);
+ alu->port_forward = alu_table[1] & ALU_V_PORT_MAP;
+
+ alu->fid = (alu_table[2] >> ALU_V_FID_S) & ALU_V_FID_M;
+
+ alu->mac[0] = (alu_table[2] >> 8) & 0xFF;
+ alu->mac[1] = alu_table[2] & 0xFF;
+ alu->mac[2] = (alu_table[3] >> 24) & 0xFF;
+ alu->mac[3] = (alu_table[3] >> 16) & 0xFF;
+ alu->mac[4] = (alu_table[3] >> 8) & 0xFF;
+ alu->mac[5] = alu_table[3] & 0xFF;
+}
+
+static int ksz9477_port_fdb_dump(struct dsa_switch *ds, int port,
+ dsa_fdb_dump_cb_t *cb, void *data)
+{
+ struct ksz_device *dev = ds->priv;
+ int ret = 0;
+ u32 ksz_data;
+ u32 alu_table[4];
+ struct alu_struct alu;
+ int timeout;
+
+ mutex_lock(&dev->alu_mutex);
+
+ /* start ALU search */
+ ksz_write32(dev, REG_SW_ALU_CTRL__4, ALU_START | ALU_SEARCH);
+
+ do {
+ timeout = 1000;
+ do {
+ ksz_read32(dev, REG_SW_ALU_CTRL__4, &ksz_data);
+ if ((ksz_data & ALU_VALID) || !(ksz_data & ALU_START))
+ break;
+ usleep_range(1, 10);
+ } while (timeout-- > 0);
+
+ if (!timeout) {
+ dev_dbg(dev->dev, "Failed to search ALU\n");
+ ret = -ETIMEDOUT;
+ goto exit;
+ }
+
+ /* read ALU table */
+ read_table(ds, alu_table);
+
+ convert_alu(&alu, alu_table);
+
+ if (alu.port_forward & BIT(port)) {
+ ret = cb(alu.mac, alu.fid, alu.is_static, data);
+ if (ret)
+ goto exit;
+ }
+ } while (ksz_data & ALU_START);
+
+exit:
+
+ /* stop ALU search */
+ ksz_write32(dev, REG_SW_ALU_CTRL__4, 0);
+
+ mutex_unlock(&dev->alu_mutex);
+
+ return ret;
+}
+
+static void ksz9477_port_mdb_add(struct dsa_switch *ds, int port,
+ const struct switchdev_obj_port_mdb *mdb,
+ struct switchdev_trans *trans)
+{
+ struct ksz_device *dev = ds->priv;
+ u32 static_table[4];
+ u32 data;
+ int index;
+ u32 mac_hi, mac_lo;
+
+ mac_hi = ((mdb->addr[0] << 8) | mdb->addr[1]);
+ mac_lo = ((mdb->addr[2] << 24) | (mdb->addr[3] << 16));
+ mac_lo |= ((mdb->addr[4] << 8) | mdb->addr[5]);
+
+ mutex_lock(&dev->alu_mutex);
+
+ for (index = 0; index < dev->num_statics; index++) {
+ /* find empty slot first */
+ data = (index << ALU_STAT_INDEX_S) |
+ ALU_STAT_READ | ALU_STAT_START;
+ ksz_write32(dev, REG_SW_ALU_STAT_CTRL__4, data);
+
+ /* wait to be finished */
+ if (wait_alu_sta_ready(dev, ALU_STAT_START, 1000) < 0) {
+ dev_dbg(dev->dev, "Failed to read ALU STATIC\n");
+ goto exit;
+ }
+
+ /* read ALU static table */
+ read_table(ds, static_table);
+
+ if (static_table[0] & ALU_V_STATIC_VALID) {
+ /* check this has same vid & mac address */
+ if (((static_table[2] >> ALU_V_FID_S) == mdb->vid) &&
+ ((static_table[2] & ALU_V_MAC_ADDR_HI) == mac_hi) &&
+ static_table[3] == mac_lo) {
+ /* found matching one */
+ break;
+ }
+ } else {
+ /* found empty one */
+ break;
+ }
+ }
+
+ /* no available entry */
+ if (index == dev->num_statics)
+ goto exit;
+
+ /* add entry */
+ static_table[0] = ALU_V_STATIC_VALID;
+ static_table[1] |= BIT(port);
+ if (mdb->vid)
+ static_table[1] |= ALU_V_USE_FID;
+ static_table[2] = (mdb->vid << ALU_V_FID_S);
+ static_table[2] |= mac_hi;
+ static_table[3] = mac_lo;
+
+ write_table(ds, static_table);
+
+ data = (index << ALU_STAT_INDEX_S) | ALU_STAT_START;
+ ksz_write32(dev, REG_SW_ALU_STAT_CTRL__4, data);
+
+ /* wait to be finished */
+ if (wait_alu_sta_ready(dev, ALU_STAT_START, 1000) < 0)
+ dev_dbg(dev->dev, "Failed to read ALU STATIC\n");
+
+exit:
+ mutex_unlock(&dev->alu_mutex);
+}
+
+static int ksz9477_port_mdb_del(struct dsa_switch *ds, int port,
+ const struct switchdev_obj_port_mdb *mdb)
+{
+ struct ksz_device *dev = ds->priv;
+ u32 static_table[4];
+ u32 data;
+ int index;
+ int ret = 0;
+ u32 mac_hi, mac_lo;
+
+ mac_hi = ((mdb->addr[0] << 8) | mdb->addr[1]);
+ mac_lo = ((mdb->addr[2] << 24) | (mdb->addr[3] << 16));
+ mac_lo |= ((mdb->addr[4] << 8) | mdb->addr[5]);
+
+ mutex_lock(&dev->alu_mutex);
+
+ for (index = 0; index < dev->num_statics; index++) {
+ /* find empty slot first */
+ data = (index << ALU_STAT_INDEX_S) |
+ ALU_STAT_READ | ALU_STAT_START;
+ ksz_write32(dev, REG_SW_ALU_STAT_CTRL__4, data);
+
+ /* wait to be finished */
+ ret = wait_alu_sta_ready(dev, ALU_STAT_START, 1000);
+ if (ret < 0) {
+ dev_dbg(dev->dev, "Failed to read ALU STATIC\n");
+ goto exit;
+ }
+
+ /* read ALU static table */
+ read_table(ds, static_table);
+
+ if (static_table[0] & ALU_V_STATIC_VALID) {
+ /* check this has same vid & mac address */
+
+ if (((static_table[2] >> ALU_V_FID_S) == mdb->vid) &&
+ ((static_table[2] & ALU_V_MAC_ADDR_HI) == mac_hi) &&
+ static_table[3] == mac_lo) {
+ /* found matching one */
+ break;
+ }
+ }
+ }
+
+ /* no available entry */
+ if (index == dev->num_statics) {
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ /* clear port */
+ static_table[1] &= ~BIT(port);
+
+ if ((static_table[1] & ALU_V_PORT_MAP) == 0) {
+ /* delete entry */
+ static_table[0] = 0;
+ static_table[1] = 0;
+ static_table[2] = 0;
+ static_table[3] = 0;
+ }
+
+ write_table(ds, static_table);
+
+ data = (index << ALU_STAT_INDEX_S) | ALU_STAT_START;
+ ksz_write32(dev, REG_SW_ALU_STAT_CTRL__4, data);
+
+ /* wait to be finished */
+ ret = wait_alu_sta_ready(dev, ALU_STAT_START, 1000);
+ if (ret < 0)
+ dev_dbg(dev->dev, "Failed to read ALU STATIC\n");
+
+exit:
+ mutex_unlock(&dev->alu_mutex);
+
+ return ret;
+}
+
+static int ksz9477_port_mirror_add(struct dsa_switch *ds, int port,
+ struct dsa_mall_mirror_tc_entry *mirror,
+ bool ingress)
+{
+ struct ksz_device *dev = ds->priv;
+
+ if (ingress)
+ ksz_port_cfg(dev, port, P_MIRROR_CTRL, PORT_MIRROR_RX, true);
+ else
+ ksz_port_cfg(dev, port, P_MIRROR_CTRL, PORT_MIRROR_TX, true);
+
+ ksz_port_cfg(dev, port, P_MIRROR_CTRL, PORT_MIRROR_SNIFFER, false);
+
+ /* configure mirror port */
+ ksz_port_cfg(dev, mirror->to_local_port, P_MIRROR_CTRL,
+ PORT_MIRROR_SNIFFER, true);
+
+ ksz_cfg(dev, S_MIRROR_CTRL, SW_MIRROR_RX_TX, false);
+
+ return 0;
+}
+
+static void ksz9477_port_mirror_del(struct dsa_switch *ds, int port,
+ struct dsa_mall_mirror_tc_entry *mirror)
+{
+ struct ksz_device *dev = ds->priv;
+ u8 data;
+
+ if (mirror->ingress)
+ ksz_port_cfg(dev, port, P_MIRROR_CTRL, PORT_MIRROR_RX, false);
+ else
+ ksz_port_cfg(dev, port, P_MIRROR_CTRL, PORT_MIRROR_TX, false);
+
+ ksz_pread8(dev, port, P_MIRROR_CTRL, &data);
+
+ if (!(data & (PORT_MIRROR_RX | PORT_MIRROR_TX)))
+ ksz_port_cfg(dev, mirror->to_local_port, P_MIRROR_CTRL,
+ PORT_MIRROR_SNIFFER, false);
+}
+
+static void ksz9477_port_setup(struct ksz_device *dev, int port, bool cpu_port)
+{
+ u8 data8;
+ u8 member;
+ u16 data16;
+ struct ksz_port *p = &dev->ports[port];
+
+ /* enable tag tail for host port */
+ if (cpu_port)
+ ksz_port_cfg(dev, port, REG_PORT_CTRL_0, PORT_TAIL_TAG_ENABLE,
+ true);
+
+ ksz_port_cfg(dev, port, REG_PORT_CTRL_0, PORT_MAC_LOOPBACK, false);
+
+ /* set back pressure */
+ ksz_port_cfg(dev, port, REG_PORT_MAC_CTRL_1, PORT_BACK_PRESSURE, true);
+
+ /* enable broadcast storm limit */
+ ksz_port_cfg(dev, port, P_BCAST_STORM_CTRL, PORT_BROADCAST_STORM, true);
+
+ /* disable DiffServ priority */
+ ksz_port_cfg(dev, port, P_PRIO_CTRL, PORT_DIFFSERV_PRIO_ENABLE, false);
+
+ /* replace priority */
+ ksz_port_cfg(dev, port, REG_PORT_MRI_MAC_CTRL, PORT_USER_PRIO_CEILING,
+ false);
+ ksz_port_cfg32(dev, port, REG_PORT_MTI_QUEUE_CTRL_0__4,
+ MTI_PVID_REPLACE, false);
+
+ /* enable 802.1p priority */
+ ksz_port_cfg(dev, port, P_PRIO_CTRL, PORT_802_1P_PRIO_ENABLE, true);
+
+ if (port < dev->phy_port_cnt) {
+ /* do not force flow control */
+ ksz_port_cfg(dev, port, REG_PORT_CTRL_0,
+ PORT_FORCE_TX_FLOW_CTRL | PORT_FORCE_RX_FLOW_CTRL,
+ false);
+
+ } else {
+ /* force flow control */
+ ksz_port_cfg(dev, port, REG_PORT_CTRL_0,
+ PORT_FORCE_TX_FLOW_CTRL | PORT_FORCE_RX_FLOW_CTRL,
+ true);
+
+ /* configure MAC to 1G & RGMII mode */
+ ksz_pread8(dev, port, REG_PORT_XMII_CTRL_1, &data8);
+ data8 &= ~PORT_MII_NOT_1GBIT;
+ data8 &= ~PORT_MII_SEL_M;
+ switch (dev->interface) {
+ case PHY_INTERFACE_MODE_MII:
+ data8 |= PORT_MII_NOT_1GBIT;
+ data8 |= PORT_MII_SEL;
+ p->speed = SPEED_100;
+ break;
+ case PHY_INTERFACE_MODE_RMII:
+ data8 |= PORT_MII_NOT_1GBIT;
+ data8 |= PORT_RMII_SEL;
+ p->speed = SPEED_100;
+ break;
+ case PHY_INTERFACE_MODE_GMII:
+ data8 |= PORT_GMII_SEL;
+ p->speed = SPEED_1000;
+ break;
+ default:
+ data8 &= ~PORT_RGMII_ID_IG_ENABLE;
+ data8 &= ~PORT_RGMII_ID_EG_ENABLE;
+ if (dev->interface == PHY_INTERFACE_MODE_RGMII_ID ||
+ dev->interface == PHY_INTERFACE_MODE_RGMII_RXID)
+ data8 |= PORT_RGMII_ID_IG_ENABLE;
+ if (dev->interface == PHY_INTERFACE_MODE_RGMII_ID ||
+ dev->interface == PHY_INTERFACE_MODE_RGMII_TXID)
+ data8 |= PORT_RGMII_ID_EG_ENABLE;
+ data8 |= PORT_RGMII_SEL;
+ p->speed = SPEED_1000;
+ break;
+ }
+ ksz_pwrite8(dev, port, REG_PORT_XMII_CTRL_1, data8);
+ p->duplex = 1;
+ }
+ if (cpu_port) {
+ member = dev->port_mask;
+ dev->on_ports = dev->host_mask;
+ dev->live_ports = dev->host_mask;
+ } else {
+ member = dev->host_mask | p->vid_member;
+ dev->on_ports |= (1 << port);
+
+ /* Link was detected before port is enabled. */
+ if (p->link_up)
+ dev->live_ports |= (1 << port);
+ }
+ ksz9477_cfg_port_member(dev, port, member);
+
+ /* clear pending interrupts */
+ ksz_pread16(dev, port, REG_PORT_PHY_INT_ENABLE, &data16);
+}
+
+static void ksz9477_config_cpu_port(struct dsa_switch *ds)
+{
+ struct ksz_device *dev = ds->priv;
+ struct ksz_port *p;
+ int i;
+
+ ds->num_ports = dev->port_cnt;
+
+ for (i = 0; i < ds->num_ports; i++) {
+ if (dsa_is_cpu_port(ds, i) && (dev->cpu_ports & (1 << i))) {
+ dev->cpu_port = i;
+ dev->host_mask = (1 << dev->cpu_port);
+ dev->port_mask |= dev->host_mask;
+
+ /* enable cpu port */
+ ksz9477_port_setup(dev, i, true);
+ p = &dev->ports[dev->cpu_port];
+ p->vid_member = dev->port_mask;
+ p->on = 1;
+ }
+ }
+
+ dev->member = dev->host_mask;
+
+ for (i = 0; i < dev->mib_port_cnt; i++) {
+ if (i == dev->cpu_port)
+ continue;
+ p = &dev->ports[i];
+
+ /* Initialize to non-zero so that ksz_cfg_port_member() will
+ * be called.
+ */
+ p->vid_member = (1 << i);
+ p->member = dev->port_mask;
+ ksz9477_port_stp_state_set(ds, i, BR_STATE_DISABLED);
+ p->on = 1;
+ if (i < dev->phy_port_cnt)
+ p->phy = 1;
+ if (dev->chip_id == 0x00947700 && i == 6) {
+ p->sgmii = 1;
+
+ /* SGMII PHY detection code is not implemented yet. */
+ p->phy = 0;
+ }
+ }
+}
+
+static int ksz9477_set_addr(struct dsa_switch *ds, u8 *addr)
+{
+ struct ksz_device *dev = ds->priv;
+
+ ksz_set(dev, REG_SW_MAC_ADDR_0, addr, ETH_ALEN);
+
+ return 0;
+}
+
+static int ksz9477_setup(struct dsa_switch *ds)
+{
+ struct ksz_device *dev = ds->priv;
+ int ret = 0;
+
+ dev->vlan_cache = devm_kcalloc(dev->dev, sizeof(struct vlan_table),
+ dev->num_vlans, GFP_KERNEL);
+ if (!dev->vlan_cache)
+ return -ENOMEM;
+
+ ret = ksz9477_reset_switch(dev);
+ if (ret) {
+ dev_err(ds->dev, "failed to reset switch\n");
+ return ret;
+ }
+
+ /* accept packet up to 2000bytes */
+ ksz_cfg(dev, REG_SW_MAC_CTRL_1, SW_LEGAL_PACKET_DISABLE, true);
+
+ ksz9477_config_cpu_port(ds);
+
+ ksz_cfg(dev, REG_SW_MAC_CTRL_1, MULTICAST_STORM_DISABLE, true);
+
+ /* queue based egress rate limit */
+ ksz_cfg(dev, REG_SW_MAC_CTRL_5, SW_OUT_RATE_LIMIT_QUEUE_BASED, true);
+
+ /* start switch */
+ ksz_cfg(dev, REG_SW_OPERATION, SW_START, true);
+
+ return 0;
+}
+
+static const struct dsa_switch_ops ksz9477_switch_ops = {
+ .get_tag_protocol = ksz9477_get_tag_protocol,
+ .setup = ksz9477_setup,
+ .set_addr = ksz9477_set_addr,
+ .phy_read = ksz9477_phy_read16,
+ .phy_write = ksz9477_phy_write16,
+ .port_enable = ksz_enable_port,
+ .port_disable = ksz_disable_port,
+ .get_strings = ksz9477_get_strings,
+ .get_ethtool_stats = ksz_get_ethtool_stats,
+ .get_sset_count = ksz_sset_count,
+ .port_bridge_join = ksz_port_bridge_join,
+ .port_bridge_leave = ksz_port_bridge_leave,
+ .port_stp_state_set = ksz9477_port_stp_state_set,
+ .port_fast_age = ksz_port_fast_age,
+ .port_vlan_filtering = ksz9477_port_vlan_filtering,
+ .port_vlan_prepare = ksz_port_vlan_prepare,
+ .port_vlan_add = ksz9477_port_vlan_add,
+ .port_vlan_del = ksz9477_port_vlan_del,
+ .port_fdb_dump = ksz9477_port_fdb_dump,
+ .port_fdb_add = ksz9477_port_fdb_add,
+ .port_fdb_del = ksz9477_port_fdb_del,
+ .port_mdb_prepare = ksz_port_mdb_prepare,
+ .port_mdb_add = ksz9477_port_mdb_add,
+ .port_mdb_del = ksz9477_port_mdb_del,
+ .port_mirror_add = ksz9477_port_mirror_add,
+ .port_mirror_del = ksz9477_port_mirror_del,
+};
+
+static u32 ksz9477_get_port_addr(int port, int offset)
+{
+ return PORT_CTRL_ADDR(port, offset);
+}
+
+static int ksz9477_switch_detect(struct ksz_device *dev)
+{
+ u8 data8;
+ u32 id32;
+ int ret;
+
+ /* turn off SPI DO Edge select */
+ ret = ksz_read8(dev, REG_SW_GLOBAL_SERIAL_CTRL_0, &data8);
+ if (ret)
+ return ret;
+
+ data8 &= ~SPI_AUTO_EDGE_DETECTION;
+ ret = ksz_write8(dev, REG_SW_GLOBAL_SERIAL_CTRL_0, data8);
+ if (ret)
+ return ret;
+
+ /* read chip id */
+ ret = ksz_read32(dev, REG_CHIP_ID0__1, &id32);
+ if (ret)
+ return ret;
+
+ /* Number of ports can be reduced depending on chip. */
+ dev->mib_port_cnt = TOTAL_PORT_NUM;
+ dev->phy_port_cnt = 5;
+
+ dev->chip_id = id32;
+
+ return 0;
+}
+
+struct ksz_chip_data {
+ u32 chip_id;
+ const char *dev_name;
+ int num_vlans;
+ int num_alus;
+ int num_statics;
+ int cpu_ports;
+ int port_cnt;
+};
+
+static const struct ksz_chip_data ksz9477_switch_chips[] = {
+ {
+ .chip_id = 0x00947700,
+ .dev_name = "KSZ9477",
+ .num_vlans = 4096,
+ .num_alus = 4096,
+ .num_statics = 16,
+ .cpu_ports = 0x7F, /* can be configured as cpu port */
+ .port_cnt = 7, /* total physical port count */
+ },
+};
+
+static int ksz9477_switch_init(struct ksz_device *dev)
+{
+ int i;
+
+ mutex_init(&dev->reg_mutex);
+ mutex_init(&dev->stats_mutex);
+ mutex_init(&dev->alu_mutex);
+ mutex_init(&dev->vlan_mutex);
+
+ dev->ds->ops = &ksz9477_switch_ops;
+
+ for (i = 0; i < ARRAY_SIZE(ksz9477_switch_chips); i++) {
+ const struct ksz_chip_data *chip = &ksz9477_switch_chips[i];
+
+ if (dev->chip_id == chip->chip_id) {
+ dev->name = chip->dev_name;
+ dev->num_vlans = chip->num_vlans;
+ dev->num_alus = chip->num_alus;
+ dev->num_statics = chip->num_statics;
+ dev->port_cnt = chip->port_cnt;
+ dev->cpu_ports = chip->cpu_ports;
+
+ break;
+ }
+ }
+
+ /* no switch found */
+ if (!dev->port_cnt)
+ return -ENODEV;
+
+ dev->port_mask = (1 << dev->port_cnt) - 1;
+
+ dev->reg_mib_cnt = SWITCH_COUNTER_NUM;
+ dev->mib_cnt = TOTAL_SWITCH_COUNTER_NUM;
+
+ i = dev->mib_port_cnt;
+ dev->ports = devm_kzalloc(dev->dev, sizeof(struct ksz_port) * i,
+ GFP_KERNEL);
+ if (!dev->ports)
+ return -ENOMEM;
+ for (i = 0; i < dev->mib_port_cnt; i++) {
+ dev->ports[i].mib.counters =
+ devm_kzalloc(dev->dev,
+ sizeof(u64) *
+ (TOTAL_SWITCH_COUNTER_NUM + 1),
+ GFP_KERNEL);
+ if (!dev->ports[i].mib.counters)
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static void ksz9477_switch_exit(struct ksz_device *dev)
+{
+ ksz9477_reset_switch(dev);
+}
+
+static const struct ksz_dev_ops ksz9477_dev_ops = {
+ .get_port_addr = ksz9477_get_port_addr,
+ .cfg_port_member = ksz9477_cfg_port_member,
+ .flush_dyn_mac_table = ksz9477_flush_dyn_mac_table,
+ .port_setup = ksz9477_port_setup,
+ .shutdown = ksz9477_reset_switch,
+ .detect = ksz9477_switch_detect,
+ .init = ksz9477_switch_init,
+ .exit = ksz9477_switch_exit,
+};
+
+int ksz9477_switch_register(struct ksz_device *dev)
+{
+ return ksz_switch_register(dev, &ksz9477_dev_ops);
+}
+EXPORT_SYMBOL(ksz9477_switch_register);
+
+MODULE_AUTHOR("Woojung Huh <Woojung.Huh@microchip.com>");
+MODULE_DESCRIPTION("Microchip KSZ9477 Series Switch DSA Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/dsa/microchip/ksz9477_spi.c b/drivers/net/dsa/microchip/ksz9477_spi.c
index d0d6920..966981a 100644
--- a/drivers/net/dsa/microchip/ksz9477_spi.c
+++ b/drivers/net/dsa/microchip/ksz9477_spi.c
@@ -1,5 +1,5 @@
/*
- * Microchip KSZ series register access through SPI
+ * Microchip KSZ9477 series register access through SPI
*
* Copyright (C) 2017 Microchip Technology Inc.
*
@@ -25,6 +25,7 @@
#include <linux/spi/spi.h>
#include "ksz_priv.h"
+#include "ksz_spi.h"
/* SPI frame opcodes */
#define KS_SPIOP_RD 3
@@ -34,8 +35,11 @@
#define SPI_ADDR_MASK (BIT(SPI_ADDR_SHIFT) - 1)
#define SPI_TURNAROUND_SHIFT 5
-static int ksz_spi_read_reg(struct spi_device *spi, u32 reg, u8 *val,
- unsigned int len)
+/* Enough to read all switch port registers. */
+#define SPI_TX_BUF_LEN 0x100
+
+static int ksz9477_spi_read_reg(struct spi_device *spi, u32 reg, u8 *val,
+ unsigned int len)
{
u32 txbuf;
int ret;
@@ -49,27 +53,36 @@ static int ksz_spi_read_reg(struct spi_device *spi, u32 reg, u8 *val,
return ret;
}
-static int ksz_spi_read(struct ksz_device *dev, u32 reg, u8 *data,
- unsigned int len)
+static int ksz9477_spi_write_reg(struct spi_device *spi, u32 reg, u8 *val,
+ unsigned int len)
{
- struct spi_device *spi = dev->priv;
+ u32 *txbuf = (u32 *)val;
- return ksz_spi_read_reg(spi, reg, data, len);
+ *txbuf = reg & SPI_ADDR_MASK;
+ *txbuf |= (KS_SPIOP_WR << SPI_ADDR_SHIFT);
+ *txbuf <<= SPI_TURNAROUND_SHIFT;
+ *txbuf = cpu_to_be32(*txbuf);
+
+ return spi_write(spi, txbuf, 4 + len);
}
-static int ksz_spi_read8(struct ksz_device *dev, u32 reg, u8 *val)
+static int ksz_spi_read(struct ksz_device *dev, u32 reg, u8 *data,
+ unsigned int len)
{
- return ksz_spi_read(dev, reg, val, 1);
+ struct spi_device *spi = dev->priv;
+
+ return ksz9477_spi_read_reg(spi, reg, data, len);
}
-static int ksz_spi_read16(struct ksz_device *dev, u32 reg, u16 *val)
+static int ksz_spi_write(struct ksz_device *dev, u32 reg, void *data,
+ unsigned int len)
{
- int ret = ksz_spi_read(dev, reg, (u8 *)val, 2);
-
- if (!ret)
- *val = be16_to_cpu(*val);
+ struct spi_device *spi = dev->priv;
- return ret;
+ if (len > SPI_TX_BUF_LEN)
+ len = SPI_TX_BUF_LEN;
+ memcpy(&dev->txbuf[4], data, len);
+ return ksz9477_spi_write_reg(spi, reg, dev->txbuf, len);
}
static int ksz_spi_read24(struct ksz_device *dev, u32 reg, u32 *val)
@@ -87,72 +100,15 @@ static int ksz_spi_read24(struct ksz_device *dev, u32 reg, u32 *val)
return ret;
}
-static int ksz_spi_read32(struct ksz_device *dev, u32 reg, u32 *val)
-{
- int ret = ksz_spi_read(dev, reg, (u8 *)val, 4);
-
- if (!ret)
- *val = be32_to_cpu(*val);
-
- return ret;
-}
-
-static int ksz_spi_write_reg(struct spi_device *spi, u32 reg, u8 *val,
- unsigned int len)
-{
- u32 txbuf;
- u8 data[12];
- int i;
-
- txbuf = reg & SPI_ADDR_MASK;
- txbuf |= (KS_SPIOP_WR << SPI_ADDR_SHIFT);
- txbuf <<= SPI_TURNAROUND_SHIFT;
- txbuf = cpu_to_be32(txbuf);
-
- data[0] = txbuf & 0xFF;
- data[1] = (txbuf & 0xFF00) >> 8;
- data[2] = (txbuf & 0xFF0000) >> 16;
- data[3] = (txbuf & 0xFF000000) >> 24;
- for (i = 0; i < len; i++)
- data[i + 4] = val[i];
-
- return spi_write(spi, &data, 4 + len);
-}
-
-static int ksz_spi_write8(struct ksz_device *dev, u32 reg, u8 value)
-{
- struct spi_device *spi = dev->priv;
-
- return ksz_spi_write_reg(spi, reg, &value, 1);
-}
-
-static int ksz_spi_write16(struct ksz_device *dev, u32 reg, u16 value)
-{
- struct spi_device *spi = dev->priv;
-
- value = cpu_to_be16(value);
- return ksz_spi_write_reg(spi, reg, (u8 *)&value, 2);
-}
-
static int ksz_spi_write24(struct ksz_device *dev, u32 reg, u32 value)
{
- struct spi_device *spi = dev->priv;
-
/* make it to big endian 24bit from MSB */
value <<= 8;
value = cpu_to_be32(value);
- return ksz_spi_write_reg(spi, reg, (u8 *)&value, 3);
+ return ksz_spi_write(dev, reg, &value, 3);
}
-static int ksz_spi_write32(struct ksz_device *dev, u32 reg, u32 value)
-{
- struct spi_device *spi = dev->priv;
-
- value = cpu_to_be32(value);
- return ksz_spi_write_reg(spi, reg, (u8 *)&value, 4);
-}
-
-static const struct ksz_io_ops ksz_spi_ops = {
+static const struct ksz_io_ops ksz9477_spi_ops = {
.read8 = ksz_spi_read8,
.read16 = ksz_spi_read16,
.read24 = ksz_spi_read24,
@@ -161,21 +117,27 @@ static int ksz_spi_write32(struct ksz_device *dev, u32 reg, u32 value)
.write16 = ksz_spi_write16,
.write24 = ksz_spi_write24,
.write32 = ksz_spi_write32,
+ .get = ksz_spi_get,
+ .set = ksz_spi_set,
};
-static int ksz_spi_probe(struct spi_device *spi)
+static int ksz9477_spi_probe(struct spi_device *spi)
{
struct ksz_device *dev;
int ret;
- dev = ksz_switch_alloc(&spi->dev, &ksz_spi_ops, spi);
+ dev = ksz_switch_alloc(&spi->dev, &ksz9477_spi_ops, spi);
if (!dev)
return -ENOMEM;
if (spi->dev.platform_data)
dev->pdata = spi->dev.platform_data;
- ret = ksz_switch_register(dev);
+ dev->txbuf = devm_kzalloc(dev->dev, 4 + SPI_TX_BUF_LEN, GFP_KERNEL);
+
+ ret = ksz9477_switch_register(dev);
+
+ /* Main DSA driver may not be started yet. */
if (ret)
return ret;
@@ -184,7 +146,7 @@ static int ksz_spi_probe(struct spi_device *spi)
return 0;
}
-static int ksz_spi_remove(struct spi_device *spi)
+static int ksz9477_spi_remove(struct spi_device *spi)
{
struct ksz_device *dev = spi_get_drvdata(spi);
@@ -194,24 +156,33 @@ static int ksz_spi_remove(struct spi_device *spi)
return 0;
}
-static const struct of_device_id ksz_dt_ids[] = {
+static void ksz9477_spi_shutdown(struct spi_device *spi)
+{
+ struct ksz_device *dev = spi_get_drvdata(spi);
+
+ if (dev && dev->dev_ops->shutdown)
+ dev->dev_ops->shutdown(dev);
+}
+
+static const struct of_device_id ksz9477_dt_ids[] = {
{ .compatible = "microchip,ksz9477" },
{},
};
-MODULE_DEVICE_TABLE(of, ksz_dt_ids);
+MODULE_DEVICE_TABLE(of, ksz9477_dt_ids);
-static struct spi_driver ksz_spi_driver = {
+static struct spi_driver ksz9477_spi_driver = {
.driver = {
.name = "ksz9477-switch",
.owner = THIS_MODULE,
- .of_match_table = of_match_ptr(ksz_dt_ids),
+ .of_match_table = of_match_ptr(ksz9477_dt_ids),
},
- .probe = ksz_spi_probe,
- .remove = ksz_spi_remove,
+ .probe = ksz9477_spi_probe,
+ .remove = ksz9477_spi_remove,
+ .shutdown = ksz9477_spi_shutdown,
};
-module_spi_driver(ksz_spi_driver);
+module_spi_driver(ksz9477_spi_driver);
MODULE_AUTHOR("Woojung Huh <Woojung.Huh@microchip.com>");
-MODULE_DESCRIPTION("Microchip KSZ Series Switch SPI access Driver");
+MODULE_DESCRIPTION("Microchip KSZ9477 Series Switch SPI access Driver");
MODULE_LICENSE("GPL");
diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c
index 7a0d10f..1c9c4c5 100644
--- a/drivers/net/dsa/microchip/ksz_common.c
+++ b/drivers/net/dsa/microchip/ksz_common.c
@@ -26,959 +26,202 @@
#include <linux/phy.h>
#include <linux/etherdevice.h>
#include <linux/if_bridge.h>
+#include <linux/of_net.h>
#include <net/dsa.h>
#include <net/switchdev.h>
#include "ksz_priv.h"
-static const struct {
- int index;
- char string[ETH_GSTRING_LEN];
-} mib_names[TOTAL_SWITCH_COUNTER_NUM] = {
- { 0x00, "rx_hi" },
- { 0x01, "rx_undersize" },
- { 0x02, "rx_fragments" },
- { 0x03, "rx_oversize" },
- { 0x04, "rx_jabbers" },
- { 0x05, "rx_symbol_err" },
- { 0x06, "rx_crc_err" },
- { 0x07, "rx_align_err" },
- { 0x08, "rx_mac_ctrl" },
- { 0x09, "rx_pause" },
- { 0x0A, "rx_bcast" },
- { 0x0B, "rx_mcast" },
- { 0x0C, "rx_ucast" },
- { 0x0D, "rx_64_or_less" },
- { 0x0E, "rx_65_127" },
- { 0x0F, "rx_128_255" },
- { 0x10, "rx_256_511" },
- { 0x11, "rx_512_1023" },
- { 0x12, "rx_1024_1522" },
- { 0x13, "rx_1523_2000" },
- { 0x14, "rx_2001" },
- { 0x15, "tx_hi" },
- { 0x16, "tx_late_col" },
- { 0x17, "tx_pause" },
- { 0x18, "tx_bcast" },
- { 0x19, "tx_mcast" },
- { 0x1A, "tx_ucast" },
- { 0x1B, "tx_deferred" },
- { 0x1C, "tx_total_col" },
- { 0x1D, "tx_exc_col" },
- { 0x1E, "tx_single_col" },
- { 0x1F, "tx_mult_col" },
- { 0x80, "rx_total" },
- { 0x81, "tx_total" },
- { 0x82, "rx_discards" },
- { 0x83, "tx_discards" },
-};
-
-static void ksz_cfg(struct ksz_device *dev, u32 addr, u8 bits, bool set)
-{
- u8 data;
-
- ksz_read8(dev, addr, &data);
- if (set)
- data |= bits;
- else
- data &= ~bits;
- ksz_write8(dev, addr, data);
-}
-
-static void ksz_cfg32(struct ksz_device *dev, u32 addr, u32 bits, bool set)
-{
- u32 data;
-
- ksz_read32(dev, addr, &data);
- if (set)
- data |= bits;
- else
- data &= ~bits;
- ksz_write32(dev, addr, data);
-}
-
-static void ksz_port_cfg(struct ksz_device *dev, int port, int offset, u8 bits,
- bool set)
-{
- u32 addr;
- u8 data;
-
- addr = PORT_CTRL_ADDR(port, offset);
- ksz_read8(dev, addr, &data);
-
- if (set)
- data |= bits;
- else
- data &= ~bits;
-
- ksz_write8(dev, addr, data);
-}
-
-static void ksz_port_cfg32(struct ksz_device *dev, int port, int offset,
- u32 bits, bool set)
-{
- u32 addr;
- u32 data;
-
- addr = PORT_CTRL_ADDR(port, offset);
- ksz_read32(dev, addr, &data);
-
- if (set)
- data |= bits;
- else
- data &= ~bits;
-
- ksz_write32(dev, addr, data);
-}
-
-static int wait_vlan_ctrl_ready(struct ksz_device *dev, u32 waiton, int timeout)
-{
- u8 data;
-
- do {
- ksz_read8(dev, REG_SW_VLAN_CTRL, &data);
- if (!(data & waiton))
- break;
- usleep_range(1, 10);
- } while (timeout-- > 0);
-
- if (timeout <= 0)
- return -ETIMEDOUT;
-
- return 0;
-}
-
-static int get_vlan_table(struct dsa_switch *ds, u16 vid, u32 *vlan_table)
-{
- struct ksz_device *dev = ds->priv;
- int ret;
-
- mutex_lock(&dev->vlan_mutex);
-
- ksz_write16(dev, REG_SW_VLAN_ENTRY_INDEX__2, vid & VLAN_INDEX_M);
- ksz_write8(dev, REG_SW_VLAN_CTRL, VLAN_READ | VLAN_START);
-
- /* wait to be cleared */
- ret = wait_vlan_ctrl_ready(dev, VLAN_START, 1000);
- if (ret < 0) {
- dev_dbg(dev->dev, "Failed to read vlan table\n");
- goto exit;
- }
-
- ksz_read32(dev, REG_SW_VLAN_ENTRY__4, &vlan_table[0]);
- ksz_read32(dev, REG_SW_VLAN_ENTRY_UNTAG__4, &vlan_table[1]);
- ksz_read32(dev, REG_SW_VLAN_ENTRY_PORTS__4, &vlan_table[2]);
-
- ksz_write8(dev, REG_SW_VLAN_CTRL, 0);
-
-exit:
- mutex_unlock(&dev->vlan_mutex);
-
- return ret;
-}
-
-static int set_vlan_table(struct dsa_switch *ds, u16 vid, u32 *vlan_table)
-{
- struct ksz_device *dev = ds->priv;
- int ret;
-
- mutex_lock(&dev->vlan_mutex);
-
- ksz_write32(dev, REG_SW_VLAN_ENTRY__4, vlan_table[0]);
- ksz_write32(dev, REG_SW_VLAN_ENTRY_UNTAG__4, vlan_table[1]);
- ksz_write32(dev, REG_SW_VLAN_ENTRY_PORTS__4, vlan_table[2]);
-
- ksz_write16(dev, REG_SW_VLAN_ENTRY_INDEX__2, vid & VLAN_INDEX_M);
- ksz_write8(dev, REG_SW_VLAN_CTRL, VLAN_START | VLAN_WRITE);
-
- /* wait to be cleared */
- ret = wait_vlan_ctrl_ready(dev, VLAN_START, 1000);
- if (ret < 0) {
- dev_dbg(dev->dev, "Failed to write vlan table\n");
- goto exit;
- }
-
- ksz_write8(dev, REG_SW_VLAN_CTRL, 0);
-
- /* update vlan cache table */
- dev->vlan_cache[vid].table[0] = vlan_table[0];
- dev->vlan_cache[vid].table[1] = vlan_table[1];
- dev->vlan_cache[vid].table[2] = vlan_table[2];
-
-exit:
- mutex_unlock(&dev->vlan_mutex);
-
- return ret;
-}
-
-static void read_table(struct dsa_switch *ds, u32 *table)
-{
- struct ksz_device *dev = ds->priv;
-
- ksz_read32(dev, REG_SW_ALU_VAL_A, &table[0]);
- ksz_read32(dev, REG_SW_ALU_VAL_B, &table[1]);
- ksz_read32(dev, REG_SW_ALU_VAL_C, &table[2]);
- ksz_read32(dev, REG_SW_ALU_VAL_D, &table[3]);
-}
-
-static void write_table(struct dsa_switch *ds, u32 *table)
-{
- struct ksz_device *dev = ds->priv;
-
- ksz_write32(dev, REG_SW_ALU_VAL_A, table[0]);
- ksz_write32(dev, REG_SW_ALU_VAL_B, table[1]);
- ksz_write32(dev, REG_SW_ALU_VAL_C, table[2]);
- ksz_write32(dev, REG_SW_ALU_VAL_D, table[3]);
-}
-
-static int wait_alu_ready(struct ksz_device *dev, u32 waiton, int timeout)
-{
- u32 data;
-
- do {
- ksz_read32(dev, REG_SW_ALU_CTRL__4, &data);
- if (!(data & waiton))
- break;
- usleep_range(1, 10);
- } while (timeout-- > 0);
-
- if (timeout <= 0)
- return -ETIMEDOUT;
-
- return 0;
-}
-
-static int wait_alu_sta_ready(struct ksz_device *dev, u32 waiton, int timeout)
+void ksz_update_port_member(struct ksz_device *dev, int port)
{
- u32 data;
-
- do {
- ksz_read32(dev, REG_SW_ALU_STAT_CTRL__4, &data);
- if (!(data & waiton))
- break;
- usleep_range(1, 10);
- } while (timeout-- > 0);
-
- if (timeout <= 0)
- return -ETIMEDOUT;
-
- return 0;
-}
-
-static int ksz9477_reset_switch(struct ksz_device *dev)
-{
- u8 data8;
- u16 data16;
- u32 data32;
-
- /* reset switch */
- ksz_cfg(dev, REG_SW_OPERATION, SW_RESET, true);
-
- /* turn off SPI DO Edge select */
- ksz_read8(dev, REG_SW_GLOBAL_SERIAL_CTRL_0, &data8);
- data8 &= ~SPI_AUTO_EDGE_DETECTION;
- ksz_write8(dev, REG_SW_GLOBAL_SERIAL_CTRL_0, data8);
-
- /* default configuration */
- ksz_read8(dev, REG_SW_LUE_CTRL_1, &data8);
- data8 = SW_AGING_ENABLE | SW_LINK_AUTO_AGING |
- SW_SRC_ADDR_FILTER | SW_FLUSH_STP_TABLE | SW_FLUSH_MSTP_TABLE;
- ksz_write8(dev, REG_SW_LUE_CTRL_1, data8);
-
- /* disable interrupts */
- ksz_write32(dev, REG_SW_INT_MASK__4, SWITCH_INT_MASK);
- ksz_write32(dev, REG_SW_PORT_INT_MASK__4, 0x7F);
- ksz_read32(dev, REG_SW_PORT_INT_STATUS__4, &data32);
-
- /* set broadcast storm protection 10% rate */
- ksz_read16(dev, REG_SW_MAC_CTRL_2, &data16);
- data16 &= ~BROADCAST_STORM_RATE;
- data16 |= (BROADCAST_STORM_VALUE * BROADCAST_STORM_PROT_RATE) / 100;
- ksz_write16(dev, REG_SW_MAC_CTRL_2, data16);
-
- return 0;
-}
-
-static void ksz9477_port_setup(struct ksz_device *dev, int port, bool cpu_port)
-{
- u8 data8;
- u16 data16;
-
- /* enable tag tail for host port */
- if (cpu_port)
- ksz_port_cfg(dev, port, REG_PORT_CTRL_0, PORT_TAIL_TAG_ENABLE,
- true);
-
- ksz_port_cfg(dev, port, REG_PORT_CTRL_0, PORT_MAC_LOOPBACK, false);
-
- /* set back pressure */
- ksz_port_cfg(dev, port, REG_PORT_MAC_CTRL_1, PORT_BACK_PRESSURE, true);
-
- /* set flow control */
- ksz_port_cfg(dev, port, REG_PORT_CTRL_0,
- PORT_FORCE_TX_FLOW_CTRL | PORT_FORCE_RX_FLOW_CTRL, true);
-
- /* enable broadcast storm limit */
- ksz_port_cfg(dev, port, P_BCAST_STORM_CTRL, PORT_BROADCAST_STORM, true);
-
- /* disable DiffServ priority */
- ksz_port_cfg(dev, port, P_PRIO_CTRL, PORT_DIFFSERV_PRIO_ENABLE, false);
-
- /* replace priority */
- ksz_port_cfg(dev, port, REG_PORT_MRI_MAC_CTRL, PORT_USER_PRIO_CEILING,
- false);
- ksz_port_cfg32(dev, port, REG_PORT_MTI_QUEUE_CTRL_0__4,
- MTI_PVID_REPLACE, false);
-
- /* enable 802.1p priority */
- ksz_port_cfg(dev, port, P_PRIO_CTRL, PORT_802_1P_PRIO_ENABLE, true);
-
- /* configure MAC to 1G & RGMII mode */
- ksz_pread8(dev, port, REG_PORT_XMII_CTRL_1, &data8);
- data8 |= PORT_RGMII_ID_EG_ENABLE;
- data8 &= ~PORT_MII_NOT_1GBIT;
- data8 &= ~PORT_MII_SEL_M;
- data8 |= PORT_RGMII_SEL;
- ksz_pwrite8(dev, port, REG_PORT_XMII_CTRL_1, data8);
-
- /* clear pending interrupts */
- ksz_pread16(dev, port, REG_PORT_PHY_INT_ENABLE, &data16);
-}
-
-static void ksz9477_config_cpu_port(struct dsa_switch *ds)
-{
- struct ksz_device *dev = ds->priv;
+ struct ksz_port *p;
int i;
- ds->num_ports = dev->port_cnt;
-
- for (i = 0; i < ds->num_ports; i++) {
- if (dsa_is_cpu_port(ds, i) && (dev->cpu_ports & (1 << i))) {
- dev->cpu_port = i;
+ for (i = 0; i < dev->port_cnt; i++) {
+ if (i == port || i == dev->cpu_port)
+ continue;
+ p = &dev->ports[i];
+ if (!(dev->member & (1 << i)))
+ continue;
- /* enable cpu port */
- ksz9477_port_setup(dev, i, true);
- }
+ /* Port is a member of the bridge and is forwarding. */
+ if (p->stp_state == BR_STATE_FORWARDING)
+ dev->dev_ops->cfg_port_member(dev, i, dev->member);
}
}
-static int ksz9477_setup(struct dsa_switch *ds)
+int ksz_phy_read16(struct dsa_switch *ds, int addr, int reg)
{
struct ksz_device *dev = ds->priv;
- int ret = 0;
-
- dev->vlan_cache = devm_kcalloc(dev->dev, sizeof(struct vlan_table),
- dev->num_vlans, GFP_KERNEL);
- if (!dev->vlan_cache)
- return -ENOMEM;
-
- ret = ksz9477_reset_switch(dev);
- if (ret) {
- dev_err(ds->dev, "failed to reset switch\n");
- return ret;
- }
-
- /* accept packet up to 2000bytes */
- ksz_cfg(dev, REG_SW_MAC_CTRL_1, SW_LEGAL_PACKET_DISABLE, true);
-
- ksz9477_config_cpu_port(ds);
-
- ksz_cfg(dev, REG_SW_MAC_CTRL_1, MULTICAST_STORM_DISABLE, true);
-
- /* queue based egress rate limit */
- ksz_cfg(dev, REG_SW_MAC_CTRL_5, SW_OUT_RATE_LIMIT_QUEUE_BASED, true);
+ u16 val = 0xffff;
- /* start switch */
- ksz_cfg(dev, REG_SW_OPERATION, SW_START, true);
-
- return 0;
-}
-
-static enum dsa_tag_protocol ksz9477_get_tag_protocol(struct dsa_switch *ds)
-{
- return DSA_TAG_PROTO_KSZ;
-}
-
-static int ksz9477_phy_read16(struct dsa_switch *ds, int addr, int reg)
-{
- struct ksz_device *dev = ds->priv;
- u16 val = 0;
-
- ksz_pread16(dev, addr, 0x100 + (reg << 1), &val);
+ dev->dev_ops->r_phy(dev, addr, reg, &val);
return val;
}
-static int ksz9477_phy_write16(struct dsa_switch *ds, int addr, int reg,
- u16 val)
+int ksz_phy_write16(struct dsa_switch *ds, int addr, int reg, u16 val)
{
struct ksz_device *dev = ds->priv;
- ksz_pwrite16(dev, addr, 0x100 + (reg << 1), val);
+ dev->dev_ops->w_phy(dev, addr, reg, val);
return 0;
}
-static int ksz_enable_port(struct dsa_switch *ds, int port,
- struct phy_device *phy)
+int ksz_sset_count(struct dsa_switch *ds)
{
struct ksz_device *dev = ds->priv;
- /* setup slave port */
- ksz9477_port_setup(dev, port, false);
-
- return 0;
-}
-
-static void ksz_disable_port(struct dsa_switch *ds, int port,
- struct phy_device *phy)
-{
- struct ksz_device *dev = ds->priv;
-
- /* there is no port disable */
- ksz_port_cfg(dev, port, REG_PORT_CTRL_0, PORT_MAC_LOOPBACK, true);
-}
-
-static int ksz_sset_count(struct dsa_switch *ds)
-{
- return TOTAL_SWITCH_COUNTER_NUM;
-}
-
-static void ksz9477_get_strings(struct dsa_switch *ds, int port, uint8_t *buf)
-{
- int i;
-
- for (i = 0; i < TOTAL_SWITCH_COUNTER_NUM; i++) {
- memcpy(buf + i * ETH_GSTRING_LEN, mib_names[i].string,
- ETH_GSTRING_LEN);
- }
+ return dev->mib_cnt;
}
-static void ksz_get_ethtool_stats(struct dsa_switch *ds, int port,
- uint64_t *buf)
+int ksz_port_bridge_join(struct dsa_switch *ds, int port,
+ struct net_device *br)
{
struct ksz_device *dev = ds->priv;
- int i;
- u32 data;
- int timeout;
-
- mutex_lock(&dev->stats_mutex);
-
- for (i = 0; i < TOTAL_SWITCH_COUNTER_NUM; i++) {
- data = MIB_COUNTER_READ;
- data |= ((mib_names[i].index & 0xFF) << MIB_COUNTER_INDEX_S);
- ksz_pwrite32(dev, port, REG_PORT_MIB_CTRL_STAT__4, data);
-
- timeout = 1000;
- do {
- ksz_pread32(dev, port, REG_PORT_MIB_CTRL_STAT__4,
- &data);
- usleep_range(1, 10);
- if (!(data & MIB_COUNTER_READ))
- break;
- } while (timeout-- > 0);
- /* failed to read MIB. get out of loop */
- if (!timeout) {
- dev_dbg(dev->dev, "Failed to get MIB\n");
- break;
- }
-
- /* count resets upon read */
- ksz_pread32(dev, port, REG_PORT_MIB_DATA, &data);
+ dev->br_member |= (1 << port);
- dev->mib_value[i] += (uint64_t)data;
- buf[i] = dev->mib_value[i];
- }
-
- mutex_unlock(&dev->stats_mutex);
-}
-
-static void ksz9477_port_stp_state_set(struct dsa_switch *ds, int port,
- u8 state)
-{
- struct ksz_device *dev = ds->priv;
- u8 data;
-
- ksz_pread8(dev, port, P_STP_CTRL, &data);
- data &= ~(PORT_TX_ENABLE | PORT_RX_ENABLE | PORT_LEARN_DISABLE);
-
- switch (state) {
- case BR_STATE_DISABLED:
- data |= PORT_LEARN_DISABLE;
- break;
- case BR_STATE_LISTENING:
- data |= (PORT_RX_ENABLE | PORT_LEARN_DISABLE);
- break;
- case BR_STATE_LEARNING:
- data |= PORT_RX_ENABLE;
- break;
- case BR_STATE_FORWARDING:
- data |= (PORT_TX_ENABLE | PORT_RX_ENABLE);
- break;
- case BR_STATE_BLOCKING:
- data |= PORT_LEARN_DISABLE;
- break;
- default:
- dev_err(ds->dev, "invalid STP state: %d\n", state);
- return;
- }
+ /* port_stp_state_set() will be called after to put the port in
+ * appropriate state so there is no need to do anything.
+ */
- ksz_pwrite8(dev, port, P_STP_CTRL, data);
+ return 0;
}
-static void ksz_port_fast_age(struct dsa_switch *ds, int port)
+void ksz_port_bridge_leave(struct dsa_switch *ds, int port,
+ struct net_device *br)
{
struct ksz_device *dev = ds->priv;
- u8 data8;
- ksz_read8(dev, REG_SW_LUE_CTRL_1, &data8);
- data8 |= SW_FAST_AGING;
- ksz_write8(dev, REG_SW_LUE_CTRL_1, data8);
+ dev->br_member &= ~(1 << port);
+ dev->member &= ~(1 << port);
- data8 &= ~SW_FAST_AGING;
- ksz_write8(dev, REG_SW_LUE_CTRL_1, data8);
+ /* port_stp_state_set() will be called after to put the port in
+ * forwarding state so there is no need to do anything.
+ */
}
-static int ksz9477_port_vlan_filtering(struct dsa_switch *ds, int port,
- bool flag)
+void ksz_port_fast_age(struct dsa_switch *ds, int port)
{
struct ksz_device *dev = ds->priv;
- if (flag) {
- ksz_port_cfg(dev, port, REG_PORT_LUE_CTRL,
- PORT_VLAN_LOOKUP_VID_0, true);
- ksz_cfg32(dev, REG_SW_QM_CTRL__4, UNICAST_VLAN_BOUNDARY, true);
- ksz_cfg(dev, REG_SW_LUE_CTRL_0, SW_VLAN_ENABLE, true);
- } else {
- ksz_cfg(dev, REG_SW_LUE_CTRL_0, SW_VLAN_ENABLE, false);
- ksz_cfg32(dev, REG_SW_QM_CTRL__4, UNICAST_VLAN_BOUNDARY, false);
- ksz_port_cfg(dev, port, REG_PORT_LUE_CTRL,
- PORT_VLAN_LOOKUP_VID_0, false);
- }
-
- return 0;
+ dev->dev_ops->flush_dyn_mac_table(dev, port);
}
-static int ksz_port_vlan_prepare(struct dsa_switch *ds, int port,
- const struct switchdev_obj_port_vlan *vlan,
- struct switchdev_trans *trans)
+int ksz_port_vlan_prepare(struct dsa_switch *ds, int port,
+ const struct switchdev_obj_port_vlan *vlan,
+ struct switchdev_trans *trans)
{
/* nothing needed */
return 0;
}
-static void ksz9477_port_vlan_add(struct dsa_switch *ds, int port,
- const struct switchdev_obj_port_vlan *vlan,
- struct switchdev_trans *trans)
-{
- struct ksz_device *dev = ds->priv;
- u32 vlan_table[3];
- u16 vid;
- bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED;
-
- for (vid = vlan->vid_begin; vid <= vlan->vid_end; vid++) {
- if (get_vlan_table(ds, vid, vlan_table)) {
- dev_dbg(dev->dev, "Failed to get vlan table\n");
- return;
- }
-
- vlan_table[0] = VLAN_VALID | (vid & VLAN_FID_M);
- if (untagged)
- vlan_table[1] |= BIT(port);
- else
- vlan_table[1] &= ~BIT(port);
- vlan_table[1] &= ~(BIT(dev->cpu_port));
-
- vlan_table[2] |= BIT(port) | BIT(dev->cpu_port);
-
- if (set_vlan_table(ds, vid, vlan_table)) {
- dev_dbg(dev->dev, "Failed to set vlan table\n");
- return;
- }
-
- /* change PVID */
- if (vlan->flags & BRIDGE_VLAN_INFO_PVID)
- ksz_pwrite16(dev, port, REG_PORT_DEFAULT_VID, vid);
- }
-}
-
-static int ksz9477_port_vlan_del(struct dsa_switch *ds, int port,
- const struct switchdev_obj_port_vlan *vlan)
-{
- struct ksz_device *dev = ds->priv;
- bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED;
- u32 vlan_table[3];
- u16 vid;
- u16 pvid;
-
- ksz_pread16(dev, port, REG_PORT_DEFAULT_VID, &pvid);
- pvid = pvid & 0xFFF;
-
- for (vid = vlan->vid_begin; vid <= vlan->vid_end; vid++) {
- if (get_vlan_table(ds, vid, vlan_table)) {
- dev_dbg(dev->dev, "Failed to get vlan table\n");
- return -ETIMEDOUT;
- }
-
- vlan_table[2] &= ~BIT(port);
-
- if (pvid == vid)
- pvid = 1;
-
- if (untagged)
- vlan_table[1] &= ~BIT(port);
-
- if (set_vlan_table(ds, vid, vlan_table)) {
- dev_dbg(dev->dev, "Failed to set vlan table\n");
- return -ETIMEDOUT;
- }
- }
-
- ksz_pwrite16(dev, port, REG_PORT_DEFAULT_VID, pvid);
-
- return 0;
-}
-
-struct alu_struct {
- /* entry 1 */
- u8 is_static:1;
- u8 is_src_filter:1;
- u8 is_dst_filter:1;
- u8 prio_age:3;
- u32 _reserv_0_1:23;
- u8 mstp:3;
- /* entry 2 */
- u8 is_override:1;
- u8 is_use_fid:1;
- u32 _reserv_1_1:23;
- u8 port_forward:7;
- /* entry 3 & 4*/
- u32 _reserv_2_1:9;
- u8 fid:7;
- u8 mac[ETH_ALEN];
-};
-
-static int ksz9477_port_fdb_add(struct dsa_switch *ds, int port,
- const unsigned char *addr, u16 vid)
-{
- struct ksz_device *dev = ds->priv;
- u32 alu_table[4];
- u32 data;
- int ret = 0;
-
- mutex_lock(&dev->alu_mutex);
-
- /* find any entry with mac & vid */
- data = vid << ALU_FID_INDEX_S;
- data |= ((addr[0] << 8) | addr[1]);
- ksz_write32(dev, REG_SW_ALU_INDEX_0, data);
-
- data = ((addr[2] << 24) | (addr[3] << 16));
- data |= ((addr[4] << 8) | addr[5]);
- ksz_write32(dev, REG_SW_ALU_INDEX_1, data);
-
- /* start read operation */
- ksz_write32(dev, REG_SW_ALU_CTRL__4, ALU_READ | ALU_START);
-
- /* wait to be finished */
- ret = wait_alu_ready(dev, ALU_START, 1000);
- if (ret < 0) {
- dev_dbg(dev->dev, "Failed to read ALU\n");
- goto exit;
- }
-
- /* read ALU entry */
- read_table(ds, alu_table);
-
- /* update ALU entry */
- alu_table[0] = ALU_V_STATIC_VALID;
- alu_table[1] |= BIT(port);
- if (vid)
- alu_table[1] |= ALU_V_USE_FID;
- alu_table[2] = (vid << ALU_V_FID_S);
- alu_table[2] |= ((addr[0] << 8) | addr[1]);
- alu_table[3] = ((addr[2] << 24) | (addr[3] << 16));
- alu_table[3] |= ((addr[4] << 8) | addr[5]);
-
- write_table(ds, alu_table);
-
- ksz_write32(dev, REG_SW_ALU_CTRL__4, ALU_WRITE | ALU_START);
-
- /* wait to be finished */
- ret = wait_alu_ready(dev, ALU_START, 1000);
- if (ret < 0)
- dev_dbg(dev->dev, "Failed to write ALU\n");
-
-exit:
- mutex_unlock(&dev->alu_mutex);
-
- return ret;
-}
-
-static int ksz9477_port_fdb_del(struct dsa_switch *ds, int port,
- const unsigned char *addr, u16 vid)
-{
- struct ksz_device *dev = ds->priv;
- u32 alu_table[4];
- u32 data;
- int ret = 0;
-
- mutex_lock(&dev->alu_mutex);
-
- /* read any entry with mac & vid */
- data = vid << ALU_FID_INDEX_S;
- data |= ((addr[0] << 8) | addr[1]);
- ksz_write32(dev, REG_SW_ALU_INDEX_0, data);
-
- data = ((addr[2] << 24) | (addr[3] << 16));
- data |= ((addr[4] << 8) | addr[5]);
- ksz_write32(dev, REG_SW_ALU_INDEX_1, data);
-
- /* start read operation */
- ksz_write32(dev, REG_SW_ALU_CTRL__4, ALU_READ | ALU_START);
-
- /* wait to be finished */
- ret = wait_alu_ready(dev, ALU_START, 1000);
- if (ret < 0) {
- dev_dbg(dev->dev, "Failed to read ALU\n");
- goto exit;
- }
-
- ksz_read32(dev, REG_SW_ALU_VAL_A, &alu_table[0]);
- if (alu_table[0] & ALU_V_STATIC_VALID) {
- ksz_read32(dev, REG_SW_ALU_VAL_B, &alu_table[1]);
- ksz_read32(dev, REG_SW_ALU_VAL_C, &alu_table[2]);
- ksz_read32(dev, REG_SW_ALU_VAL_D, &alu_table[3]);
-
- /* clear forwarding port */
- alu_table[2] &= ~BIT(port);
-
- /* if there is no port to forward, clear table */
- if ((alu_table[2] & ALU_V_PORT_MAP) == 0) {
- alu_table[0] = 0;
- alu_table[1] = 0;
- alu_table[2] = 0;
- alu_table[3] = 0;
- }
- } else {
- alu_table[0] = 0;
- alu_table[1] = 0;
- alu_table[2] = 0;
- alu_table[3] = 0;
- }
-
- write_table(ds, alu_table);
-
- ksz_write32(dev, REG_SW_ALU_CTRL__4, ALU_WRITE | ALU_START);
-
- /* wait to be finished */
- ret = wait_alu_ready(dev, ALU_START, 1000);
- if (ret < 0)
- dev_dbg(dev->dev, "Failed to write ALU\n");
-
-exit:
- mutex_unlock(&dev->alu_mutex);
-
- return ret;
-}
-
-static void convert_alu(struct alu_struct *alu, u32 *alu_table)
-{
- alu->is_static = !!(alu_table[0] & ALU_V_STATIC_VALID);
- alu->is_src_filter = !!(alu_table[0] & ALU_V_SRC_FILTER);
- alu->is_dst_filter = !!(alu_table[0] & ALU_V_DST_FILTER);
- alu->prio_age = (alu_table[0] >> ALU_V_PRIO_AGE_CNT_S) &
- ALU_V_PRIO_AGE_CNT_M;
- alu->mstp = alu_table[0] & ALU_V_MSTP_M;
-
- alu->is_override = !!(alu_table[1] & ALU_V_OVERRIDE);
- alu->is_use_fid = !!(alu_table[1] & ALU_V_USE_FID);
- alu->port_forward = alu_table[1] & ALU_V_PORT_MAP;
-
- alu->fid = (alu_table[2] >> ALU_V_FID_S) & ALU_V_FID_M;
-
- alu->mac[0] = (alu_table[2] >> 8) & 0xFF;
- alu->mac[1] = alu_table[2] & 0xFF;
- alu->mac[2] = (alu_table[3] >> 24) & 0xFF;
- alu->mac[3] = (alu_table[3] >> 16) & 0xFF;
- alu->mac[4] = (alu_table[3] >> 8) & 0xFF;
- alu->mac[5] = alu_table[3] & 0xFF;
-}
-
-static int ksz9477_port_fdb_dump(struct dsa_switch *ds, int port,
- dsa_fdb_dump_cb_t *cb, void *data)
+int ksz_port_fdb_dump(struct dsa_switch *ds, int port, dsa_fdb_dump_cb_t *cb,
+ void *data)
{
struct ksz_device *dev = ds->priv;
int ret = 0;
- u32 ksz_data;
- u32 alu_table[4];
+ u16 i = 0;
+ u16 entries = 0;
+ u8 timestamp = 0;
+ u8 fid;
+ u8 member;
struct alu_struct alu;
- int timeout;
mutex_lock(&dev->alu_mutex);
-
- /* start ALU search */
- ksz_write32(dev, REG_SW_ALU_CTRL__4, ALU_START | ALU_SEARCH);
-
do {
- timeout = 1000;
- do {
- ksz_read32(dev, REG_SW_ALU_CTRL__4, &ksz_data);
- if ((ksz_data & ALU_VALID) || !(ksz_data & ALU_START))
- break;
- usleep_range(1, 10);
- } while (timeout-- > 0);
-
- if (!timeout) {
- dev_dbg(dev->dev, "Failed to search ALU\n");
- ret = -ETIMEDOUT;
- goto exit;
- }
-
- /* read ALU table */
- read_table(ds, alu_table);
-
- convert_alu(&alu, alu_table);
-
- if (alu.port_forward & BIT(port)) {
+ alu.is_static = false;
+ ret = dev->dev_ops->r_dyn_mac_table(dev, i, alu.mac, &fid,
+ &member, ×tamp,
+ &entries);
+ if (!ret && (member & BIT(port))) {
ret = cb(alu.mac, alu.fid, alu.is_static, data);
if (ret)
- goto exit;
+ break;
}
- } while (ksz_data & ALU_START);
-
-exit:
-
- /* stop ALU search */
- ksz_write32(dev, REG_SW_ALU_CTRL__4, 0);
-
+ i++;
+ } while (i < entries);
mutex_unlock(&dev->alu_mutex);
+ if (i >= entries)
+ ret = 0;
return ret;
}
-static int ksz_port_mdb_prepare(struct dsa_switch *ds, int port,
- const struct switchdev_obj_port_mdb *mdb,
- struct switchdev_trans *trans)
+int ksz_port_mdb_prepare(struct dsa_switch *ds, int port,
+ const struct switchdev_obj_port_mdb *mdb,
+ struct switchdev_trans *trans)
{
/* nothing to do */
return 0;
}
-static void ksz9477_port_mdb_add(struct dsa_switch *ds, int port,
- const struct switchdev_obj_port_mdb *mdb,
- struct switchdev_trans *trans)
+void ksz_port_mdb_add(struct dsa_switch *ds, int port,
+ const struct switchdev_obj_port_mdb *mdb,
+ struct switchdev_trans *trans)
{
struct ksz_device *dev = ds->priv;
- u32 static_table[4];
- u32 data;
+ struct alu_struct alu;
int index;
- u32 mac_hi, mac_lo;
-
- mac_hi = ((mdb->addr[0] << 8) | mdb->addr[1]);
- mac_lo = ((mdb->addr[2] << 24) | (mdb->addr[3] << 16));
- mac_lo |= ((mdb->addr[4] << 8) | mdb->addr[5]);
+ int empty = 0;
mutex_lock(&dev->alu_mutex);
for (index = 0; index < dev->num_statics; index++) {
- /* find empty slot first */
- data = (index << ALU_STAT_INDEX_S) |
- ALU_STAT_READ | ALU_STAT_START;
- ksz_write32(dev, REG_SW_ALU_STAT_CTRL__4, data);
-
- /* wait to be finished */
- if (wait_alu_sta_ready(dev, ALU_STAT_START, 1000) < 0) {
- dev_dbg(dev->dev, "Failed to read ALU STATIC\n");
- goto exit;
- }
-
- /* read ALU static table */
- read_table(ds, static_table);
-
- if (static_table[0] & ALU_V_STATIC_VALID) {
- /* check this has same vid & mac address */
- if (((static_table[2] >> ALU_V_FID_S) == mdb->vid) &&
- ((static_table[2] & ALU_V_MAC_ADDR_HI) == mac_hi) &&
- static_table[3] == mac_lo) {
- /* found matching one */
+ if (!dev->dev_ops->r_sta_mac_table(dev, index, &alu)) {
+ /* Found one already in static MAC table. */
+ if (!memcmp(alu.mac, mdb->addr, ETH_ALEN) &&
+ alu.fid == mdb->vid)
break;
- }
- } else {
- /* found empty one */
- break;
+ /* Remember the first empty entry. */
+ } else if (!empty) {
+ empty = index + 1;
}
}
/* no available entry */
- if (index == dev->num_statics)
+ if (index == dev->num_statics && !empty)
goto exit;
/* add entry */
- static_table[0] = ALU_V_STATIC_VALID;
- static_table[1] |= BIT(port);
- if (mdb->vid)
- static_table[1] |= ALU_V_USE_FID;
- static_table[2] = (mdb->vid << ALU_V_FID_S);
- static_table[2] |= mac_hi;
- static_table[3] = mac_lo;
-
- write_table(ds, static_table);
-
- data = (index << ALU_STAT_INDEX_S) | ALU_STAT_START;
- ksz_write32(dev, REG_SW_ALU_STAT_CTRL__4, data);
+ if (index == dev->num_statics) {
+ index = empty - 1;
+ memset(&alu, 0, sizeof(alu));
+ memcpy(alu.mac, mdb->addr, ETH_ALEN);
+ alu.is_static = true;
+ }
+ alu.port_forward |= BIT(port);
+ if (mdb->vid) {
+ alu.is_use_fid = true;
- /* wait to be finished */
- if (wait_alu_sta_ready(dev, ALU_STAT_START, 1000) < 0)
- dev_dbg(dev->dev, "Failed to read ALU STATIC\n");
+ /* Need a way to map VID to FID. */
+ alu.fid = mdb->vid;
+ }
+ dev->dev_ops->w_sta_mac_table(dev, index, &alu);
exit:
mutex_unlock(&dev->alu_mutex);
}
-static int ksz9477_port_mdb_del(struct dsa_switch *ds, int port,
- const struct switchdev_obj_port_mdb *mdb)
+int ksz_port_mdb_del(struct dsa_switch *ds, int port,
+ const struct switchdev_obj_port_mdb *mdb)
{
struct ksz_device *dev = ds->priv;
- u32 static_table[4];
- u32 data;
+ struct alu_struct alu;
int index;
int ret = 0;
- u32 mac_hi, mac_lo;
-
- mac_hi = ((mdb->addr[0] << 8) | mdb->addr[1]);
- mac_lo = ((mdb->addr[2] << 24) | (mdb->addr[3] << 16));
- mac_lo |= ((mdb->addr[4] << 8) | mdb->addr[5]);
mutex_lock(&dev->alu_mutex);
for (index = 0; index < dev->num_statics; index++) {
- /* find empty slot first */
- data = (index << ALU_STAT_INDEX_S) |
- ALU_STAT_READ | ALU_STAT_START;
- ksz_write32(dev, REG_SW_ALU_STAT_CTRL__4, data);
-
- /* wait to be finished */
- ret = wait_alu_sta_ready(dev, ALU_STAT_START, 1000);
- if (ret < 0) {
- dev_dbg(dev->dev, "Failed to read ALU STATIC\n");
- goto exit;
- }
-
- /* read ALU static table */
- read_table(ds, static_table);
-
- if (static_table[0] & ALU_V_STATIC_VALID) {
- /* check this has same vid & mac address */
-
- if (((static_table[2] >> ALU_V_FID_S) == mdb->vid) &&
- ((static_table[2] & ALU_V_MAC_ADDR_HI) == mac_hi) &&
- static_table[3] == mac_lo) {
- /* found matching one */
+ if (!dev->dev_ops->r_sta_mac_table(dev, index, &alu)) {
+ /* Found one already in static MAC table. */
+ if (!memcmp(alu.mac, mdb->addr, ETH_ALEN) &&
+ alu.fid == mdb->vid)
break;
- }
}
}
@@ -989,25 +232,10 @@ static int ksz9477_port_mdb_del(struct dsa_switch *ds, int port,
}
/* clear port */
- static_table[1] &= ~BIT(port);
-
- if ((static_table[1] & ALU_V_PORT_MAP) == 0) {
- /* delete entry */
- static_table[0] = 0;
- static_table[1] = 0;
- static_table[2] = 0;
- static_table[3] = 0;
- }
-
- write_table(ds, static_table);
-
- data = (index << ALU_STAT_INDEX_S) | ALU_STAT_START;
- ksz_write32(dev, REG_SW_ALU_STAT_CTRL__4, data);
-
- /* wait to be finished */
- ret = wait_alu_sta_ready(dev, ALU_STAT_START, 1000);
- if (ret < 0)
- dev_dbg(dev->dev, "Failed to read ALU STATIC\n");
+ alu.port_forward &= ~BIT(port);
+ if (!alu.port_forward)
+ alu.is_static = false;
+ dev->dev_ops->w_sta_mac_table(dev, index, &alu);
exit:
mutex_unlock(&dev->alu_mutex);
@@ -1015,125 +243,30 @@ static int ksz9477_port_mdb_del(struct dsa_switch *ds, int port,
return ret;
}
-static int ksz9477_port_mirror_add(struct dsa_switch *ds, int port,
- struct dsa_mall_mirror_tc_entry *mirror,
- bool ingress)
+int ksz_enable_port(struct dsa_switch *ds, int port, struct phy_device *phy)
{
struct ksz_device *dev = ds->priv;
- if (ingress)
- ksz_port_cfg(dev, port, P_MIRROR_CTRL, PORT_MIRROR_RX, true);
- else
- ksz_port_cfg(dev, port, P_MIRROR_CTRL, PORT_MIRROR_TX, true);
-
- ksz_port_cfg(dev, port, P_MIRROR_CTRL, PORT_MIRROR_SNIFFER, false);
-
- /* configure mirror port */
- ksz_port_cfg(dev, mirror->to_local_port, P_MIRROR_CTRL,
- PORT_MIRROR_SNIFFER, true);
+ /* setup slave port */
+ dev->dev_ops->port_setup(dev, port, false);
- ksz_cfg(dev, S_MIRROR_CTRL, SW_MIRROR_RX_TX, false);
+ /* port_stp_state_set() will be called after to enable the port so
+ * there is no need to do anything.
+ */
return 0;
}
-static void ksz9477_port_mirror_del(struct dsa_switch *ds, int port,
- struct dsa_mall_mirror_tc_entry *mirror)
+void ksz_disable_port(struct dsa_switch *ds, int port, struct phy_device *phy)
{
struct ksz_device *dev = ds->priv;
- u8 data;
-
- if (mirror->ingress)
- ksz_port_cfg(dev, port, P_MIRROR_CTRL, PORT_MIRROR_RX, false);
- else
- ksz_port_cfg(dev, port, P_MIRROR_CTRL, PORT_MIRROR_TX, false);
- ksz_pread8(dev, port, P_MIRROR_CTRL, &data);
+ dev->on_ports &= ~(1 << port);
+ dev->live_ports &= ~(1 << port);
- if (!(data & (PORT_MIRROR_RX | PORT_MIRROR_TX)))
- ksz_port_cfg(dev, mirror->to_local_port, P_MIRROR_CTRL,
- PORT_MIRROR_SNIFFER, false);
-}
-
-static const struct dsa_switch_ops ksz_switch_ops = {
- .get_tag_protocol = ksz9477_get_tag_protocol,
- .setup = ksz9477_setup,
- .phy_read = ksz9477_phy_read16,
- .phy_write = ksz9477_phy_write16,
- .port_enable = ksz_enable_port,
- .port_disable = ksz_disable_port,
- .get_strings = ksz9477_get_strings,
- .get_ethtool_stats = ksz_get_ethtool_stats,
- .get_sset_count = ksz_sset_count,
- .port_stp_state_set = ksz9477_port_stp_state_set,
- .port_fast_age = ksz_port_fast_age,
- .port_vlan_filtering = ksz9477_port_vlan_filtering,
- .port_vlan_prepare = ksz_port_vlan_prepare,
- .port_vlan_add = ksz9477_port_vlan_add,
- .port_vlan_del = ksz9477_port_vlan_del,
- .port_fdb_dump = ksz9477_port_fdb_dump,
- .port_fdb_add = ksz9477_port_fdb_add,
- .port_fdb_del = ksz9477_port_fdb_del,
- .port_mdb_prepare = ksz_port_mdb_prepare,
- .port_mdb_add = ksz9477_port_mdb_add,
- .port_mdb_del = ksz9477_port_mdb_del,
- .port_mirror_add = ksz9477_port_mirror_add,
- .port_mirror_del = ksz9477_port_mirror_del,
-};
-
-struct ksz_chip_data {
- u32 chip_id;
- const char *dev_name;
- int num_vlans;
- int num_alus;
- int num_statics;
- int cpu_ports;
- int port_cnt;
-};
-
-static const struct ksz_chip_data ksz9477_switch_chips[] = {
- {
- .chip_id = 0x00947700,
- .dev_name = "KSZ9477",
- .num_vlans = 4096,
- .num_alus = 4096,
- .num_statics = 16,
- .cpu_ports = 0x7F, /* can be configured as cpu port */
- .port_cnt = 7, /* total physical port count */
- },
-};
-
-static int ksz9477_switch_init(struct ksz_device *dev)
-{
- int i;
-
- mutex_init(&dev->reg_mutex);
- mutex_init(&dev->stats_mutex);
- mutex_init(&dev->alu_mutex);
- mutex_init(&dev->vlan_mutex);
-
- dev->ds->ops = &ksz_switch_ops;
-
- for (i = 0; i < ARRAY_SIZE(ksz9477_switch_chips); i++) {
- const struct ksz_chip_data *chip = &ksz9477_switch_chips[i];
-
- if (dev->chip_id == chip->chip_id) {
- dev->name = chip->dev_name;
- dev->num_vlans = chip->num_vlans;
- dev->num_alus = chip->num_alus;
- dev->num_statics = chip->num_statics;
- dev->port_cnt = chip->port_cnt;
- dev->cpu_ports = chip->cpu_ports;
-
- break;
- }
- }
-
- /* no switch found */
- if (!dev->port_cnt)
- return -ENODEV;
-
- return 0;
+ /* port_stp_state_set() will be called after to disable the port so
+ * there is no need to do anything.
+ */
}
struct ksz_device *ksz_switch_alloc(struct device *base,
@@ -1162,53 +295,43 @@ struct ksz_device *ksz_switch_alloc(struct device *base,
}
EXPORT_SYMBOL(ksz_switch_alloc);
-int ksz_switch_detect(struct ksz_device *dev)
-{
- u8 data8;
- u32 id32;
- int ret;
-
- /* turn off SPI DO Edge select */
- ret = ksz_read8(dev, REG_SW_GLOBAL_SERIAL_CTRL_0, &data8);
- if (ret)
- return ret;
-
- data8 &= ~SPI_AUTO_EDGE_DETECTION;
- ret = ksz_write8(dev, REG_SW_GLOBAL_SERIAL_CTRL_0, data8);
- if (ret)
- return ret;
-
- /* read chip id */
- ret = ksz_read32(dev, REG_CHIP_ID0__1, &id32);
- if (ret)
- return ret;
-
- dev->chip_id = id32;
-
- return 0;
-}
-EXPORT_SYMBOL(ksz_switch_detect);
-
-int ksz_switch_register(struct ksz_device *dev)
+int ksz_switch_register(struct ksz_device *dev,
+ const struct ksz_dev_ops *ops)
{
int ret;
if (dev->pdata)
dev->chip_id = dev->pdata->chip_id;
- if (ksz_switch_detect(dev))
+ dev->dev_ops = ops;
+
+ if (dev->dev_ops->detect(dev))
return -EINVAL;
- ret = ksz9477_switch_init(dev);
+ ret = dev->dev_ops->init(dev);
if (ret)
return ret;
- return dsa_register_switch(dev->ds);
+ dev->interface = PHY_INTERFACE_MODE_MII;
+ if (dev->dev->of_node) {
+ ret = of_get_phy_mode(dev->dev->of_node);
+ if (ret >= 0)
+ dev->interface = ret;
+ }
+
+ ret = dsa_register_switch(dev->ds);
+ if (ret) {
+ dev->dev_ops->exit(dev);
+ return ret;
+ }
+
+ return 0;
}
EXPORT_SYMBOL(ksz_switch_register);
void ksz_switch_remove(struct ksz_device *dev)
{
+ dev->dev_ops->exit(dev);
dsa_unregister_switch(dev->ds);
}
EXPORT_SYMBOL(ksz_switch_remove);
diff --git a/drivers/net/dsa/microchip/ksz_common.h b/drivers/net/dsa/microchip/ksz_common.h
new file mode 100644
index 0000000..1c1cbad
--- /dev/null
+++ b/drivers/net/dsa/microchip/ksz_common.h
@@ -0,0 +1,230 @@
+/*
+ * Microchip switch driver common header
+ *
+ * Copyright (C) 2017 Microchip Technology Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __KSZ_COMMON_H
+#define __KSZ_COMMON_H
+
+void ksz_update_port_member(struct ksz_device *dev, int port);
+
+/* Common DSA access functions */
+
+int ksz_phy_read16(struct dsa_switch *ds, int addr, int reg);
+int ksz_phy_write16(struct dsa_switch *ds, int addr, int reg, u16 val);
+int ksz_sset_count(struct dsa_switch *ds);
+int ksz_port_bridge_join(struct dsa_switch *ds, int port,
+ struct net_device *br);
+void ksz_port_bridge_leave(struct dsa_switch *ds, int port,
+ struct net_device *br);
+void ksz_port_fast_age(struct dsa_switch *ds, int port);
+int ksz_port_vlan_prepare(struct dsa_switch *ds, int port,
+ const struct switchdev_obj_port_vlan *vlan,
+ struct switchdev_trans *trans);
+int ksz_port_fdb_dump(struct dsa_switch *ds, int port, dsa_fdb_dump_cb_t *cb,
+ void *data);
+int ksz_port_mdb_prepare(struct dsa_switch *ds, int port,
+ const struct switchdev_obj_port_mdb *mdb,
+ struct switchdev_trans *trans);
+void ksz_port_mdb_add(struct dsa_switch *ds, int port,
+ const struct switchdev_obj_port_mdb *mdb,
+ struct switchdev_trans *trans);
+int ksz_port_mdb_del(struct dsa_switch *ds, int port,
+ const struct switchdev_obj_port_mdb *mdb);
+int ksz_enable_port(struct dsa_switch *ds, int port, struct phy_device *phy);
+void ksz_disable_port(struct dsa_switch *ds, int port, struct phy_device *phy);
+
+/* Common register access functions */
+
+static inline int ksz_read8(struct ksz_device *dev, u32 reg, u8 *val)
+{
+ int ret;
+
+ mutex_lock(&dev->reg_mutex);
+ ret = dev->ops->read8(dev, reg, val);
+ mutex_unlock(&dev->reg_mutex);
+
+ return ret;
+}
+
+static inline int ksz_read16(struct ksz_device *dev, u32 reg, u16 *val)
+{
+ int ret;
+
+ mutex_lock(&dev->reg_mutex);
+ ret = dev->ops->read16(dev, reg, val);
+ mutex_unlock(&dev->reg_mutex);
+
+ return ret;
+}
+
+static inline int ksz_read24(struct ksz_device *dev, u32 reg, u32 *val)
+{
+ int ret;
+
+ mutex_lock(&dev->reg_mutex);
+ ret = dev->ops->read24(dev, reg, val);
+ mutex_unlock(&dev->reg_mutex);
+
+ return ret;
+}
+
+static inline int ksz_read32(struct ksz_device *dev, u32 reg, u32 *val)
+{
+ int ret;
+
+ mutex_lock(&dev->reg_mutex);
+ ret = dev->ops->read32(dev, reg, val);
+ mutex_unlock(&dev->reg_mutex);
+
+ return ret;
+}
+
+static inline int ksz_write8(struct ksz_device *dev, u32 reg, u8 value)
+{
+ int ret;
+
+ mutex_lock(&dev->reg_mutex);
+ ret = dev->ops->write8(dev, reg, value);
+ mutex_unlock(&dev->reg_mutex);
+
+ return ret;
+}
+
+static inline int ksz_write16(struct ksz_device *dev, u32 reg, u16 value)
+{
+ int ret;
+
+ mutex_lock(&dev->reg_mutex);
+ ret = dev->ops->write16(dev, reg, value);
+ mutex_unlock(&dev->reg_mutex);
+
+ return ret;
+}
+
+static inline int ksz_write24(struct ksz_device *dev, u32 reg, u32 value)
+{
+ int ret;
+
+ mutex_lock(&dev->reg_mutex);
+ ret = dev->ops->write24(dev, reg, value);
+ mutex_unlock(&dev->reg_mutex);
+
+ return ret;
+}
+
+static inline int ksz_write32(struct ksz_device *dev, u32 reg, u32 value)
+{
+ int ret;
+
+ mutex_lock(&dev->reg_mutex);
+ ret = dev->ops->write32(dev, reg, value);
+ mutex_unlock(&dev->reg_mutex);
+
+ return ret;
+}
+
+static inline int ksz_get(struct ksz_device *dev, u32 reg, void *data,
+ size_t len)
+{
+ int ret;
+
+ mutex_lock(&dev->reg_mutex);
+ ret = dev->ops->get(dev, reg, data, len);
+ mutex_unlock(&dev->reg_mutex);
+
+ return ret;
+}
+
+static inline int ksz_set(struct ksz_device *dev, u32 reg, void *data,
+ size_t len)
+{
+ int ret;
+
+ mutex_lock(&dev->reg_mutex);
+ ret = dev->ops->set(dev, reg, data, len);
+ mutex_unlock(&dev->reg_mutex);
+
+ return ret;
+}
+
+static inline void ksz_pread8(struct ksz_device *dev, int port, int offset,
+ u8 *data)
+{
+ ksz_read8(dev, dev->dev_ops->get_port_addr(port, offset), data);
+}
+
+static inline void ksz_pread16(struct ksz_device *dev, int port, int offset,
+ u16 *data)
+{
+ ksz_read16(dev, dev->dev_ops->get_port_addr(port, offset), data);
+}
+
+static inline void ksz_pread32(struct ksz_device *dev, int port, int offset,
+ u32 *data)
+{
+ ksz_read32(dev, dev->dev_ops->get_port_addr(port, offset), data);
+}
+
+static inline void ksz_pwrite8(struct ksz_device *dev, int port, int offset,
+ u8 data)
+{
+ ksz_write8(dev, dev->dev_ops->get_port_addr(port, offset), data);
+}
+
+static inline void ksz_pwrite16(struct ksz_device *dev, int port, int offset,
+ u16 data)
+{
+ ksz_write16(dev, dev->dev_ops->get_port_addr(port, offset), data);
+}
+
+static inline void ksz_pwrite32(struct ksz_device *dev, int port, int offset,
+ u32 data)
+{
+ ksz_write32(dev, dev->dev_ops->get_port_addr(port, offset), data);
+}
+
+static void ksz_cfg(struct ksz_device *dev, u32 addr, u8 bits, bool set)
+{
+ u8 data;
+
+ ksz_read8(dev, addr, &data);
+ if (set)
+ data |= bits;
+ else
+ data &= ~bits;
+ ksz_write8(dev, addr, data);
+}
+
+static void ksz_port_cfg(struct ksz_device *dev, int port, int offset, u8 bits,
+ bool set)
+{
+ u32 addr;
+ u8 data;
+
+ addr = dev->dev_ops->get_port_addr(port, offset);
+ ksz_read8(dev, addr, &data);
+
+ if (set)
+ data |= bits;
+ else
+ data &= ~bits;
+
+ ksz_write8(dev, addr, data);
+}
+
+#endif
diff --git a/drivers/net/dsa/microchip/ksz_priv.h b/drivers/net/dsa/microchip/ksz_priv.h
index d461468..4126749 100644
--- a/drivers/net/dsa/microchip/ksz_priv.h
+++ b/drivers/net/dsa/microchip/ksz_priv.h
@@ -34,6 +34,30 @@ struct vlan_table {
u32 table[3];
};
+struct ksz_port_mib {
+ u8 cnt_ptr;
+ u64 *counters;
+};
+
+struct ksz_port {
+ u16 member;
+ u16 vid_member;
+ int stp_state;
+ u16 speed;
+ u8 duplex;
+ u8 flow_ctrl;
+
+ u32 on:1; /* port is not disabled by hardware */
+ u32 phy:1; /* port has a PHY */
+ u32 fiber:1; /* port is fiber */
+ u32 sgmii:1; /* port is SGMII */
+ u32 force:1;
+ u32 link_down:1; /* link just goes down */
+ u32 link_up:1; /* link is up */
+
+ struct ksz_port_mib mib;
+};
+
struct ksz_device {
struct dsa_switch *ds;
struct ksz_platform_data *pdata;
@@ -44,6 +68,7 @@ struct ksz_device {
struct mutex alu_mutex; /* ALU access */
struct mutex vlan_mutex; /* vlan access */
const struct ksz_io_ops *ops;
+ const struct ksz_dev_ops *dev_ops;
struct device *dev;
@@ -56,11 +81,37 @@ struct ksz_device {
int num_statics;
int cpu_port; /* port connected to CPU */
int cpu_ports; /* port bitmap can be cpu port */
+ int phy_port_cnt;
int port_cnt;
+ int reg_mib_cnt;
+ int mib_cnt;
+ int mib_port_cnt;
+ int last_port; /* ports after that not used */
+ int interface;
+ u32 regs_size;
struct vlan_table *vlan_cache;
u64 mib_value[TOTAL_SWITCH_COUNTER_NUM];
+
+ u8 *txbuf;
+
+ struct ksz_port *ports;
+ struct timer_list mib_read_timer;
+ struct work_struct mib_read;
+ unsigned long mib_read_interval;
+ u16 br_member;
+ u16 member;
+ u16 live_ports;
+ u16 on_ports; /* ports enabled by DSA */
+ u16 rx_ports;
+ u16 tx_ports;
+ u16 mirror_rx;
+ u16 mirror_tx;
+ u32 features; /* chip specific features */
+ u32 overrides; /* chip functions set by user */
+ u16 host_mask;
+ u16 port_mask;
};
struct ksz_io_ops {
@@ -72,140 +123,60 @@ struct ksz_io_ops {
int (*write16)(struct ksz_device *dev, u32 reg, u16 value);
int (*write24)(struct ksz_device *dev, u32 reg, u32 value);
int (*write32)(struct ksz_device *dev, u32 reg, u32 value);
- int (*phy_read16)(struct ksz_device *dev, int addr, int reg,
- u16 *value);
- int (*phy_write16)(struct ksz_device *dev, int addr, int reg,
- u16 value);
+ int (*get)(struct ksz_device *dev, u32 reg, void *data, size_t len);
+ int (*set)(struct ksz_device *dev, u32 reg, void *data, size_t len);
+};
+
+struct alu_struct {
+ /* entry 1 */
+ u8 is_static:1;
+ u8 is_src_filter:1;
+ u8 is_dst_filter:1;
+ u8 prio_age:3;
+ u32 _reserv_0_1:23;
+ u8 mstp:3;
+ /* entry 2 */
+ u8 is_override:1;
+ u8 is_use_fid:1;
+ u32 _reserv_1_1:23;
+ u8 port_forward:7;
+ /* entry 3 & 4*/
+ u32 _reserv_2_1:9;
+ u8 fid:7;
+ u8 mac[ETH_ALEN];
+};
+
+struct ksz_dev_ops {
+ u32 (*get_port_addr)(int port, int offset);
+ void (*cfg_port_member)(struct ksz_device *dev, int port, u8 member);
+ void (*flush_dyn_mac_table)(struct ksz_device *dev, int port);
+ void (*port_setup)(struct ksz_device *dev, int port, bool cpu_port);
+ void (*r_phy)(struct ksz_device *dev, u16 phy, u16 reg, u16 *val);
+ void (*w_phy)(struct ksz_device *dev, u16 phy, u16 reg, u16 val);
+ int (*r_dyn_mac_table)(struct ksz_device *dev, u16 addr, u8 *mac_addr,
+ u8 *fid, u8 *src_port, u8 *timestamp,
+ u16 *entries);
+ int (*r_sta_mac_table)(struct ksz_device *dev, u16 addr,
+ struct alu_struct *alu);
+ void (*w_sta_mac_table)(struct ksz_device *dev, u16 addr,
+ struct alu_struct *alu);
+ void (*r_mib_cnt)(struct ksz_device *dev, int port, u16 addr,
+ u64 *cnt);
+ void (*r_mib_pkt)(struct ksz_device *dev, int port, u16 addr,
+ u64 *dropped, u64 *cnt);
+ void (*port_init_cnt)(struct ksz_device *dev, int port);
+ int (*shutdown)(struct ksz_device *dev);
+ int (*detect)(struct ksz_device *dev);
+ int (*init)(struct ksz_device *dev);
+ void (*exit)(struct ksz_device *dev);
};
struct ksz_device *ksz_switch_alloc(struct device *base,
const struct ksz_io_ops *ops, void *priv);
-int ksz_switch_detect(struct ksz_device *dev);
-int ksz_switch_register(struct ksz_device *dev);
+int ksz_switch_register(struct ksz_device *dev,
+ const struct ksz_dev_ops *ops);
void ksz_switch_remove(struct ksz_device *dev);
-static inline int ksz_read8(struct ksz_device *dev, u32 reg, u8 *val)
-{
- int ret;
-
- mutex_lock(&dev->reg_mutex);
- ret = dev->ops->read8(dev, reg, val);
- mutex_unlock(&dev->reg_mutex);
-
- return ret;
-}
-
-static inline int ksz_read16(struct ksz_device *dev, u32 reg, u16 *val)
-{
- int ret;
-
- mutex_lock(&dev->reg_mutex);
- ret = dev->ops->read16(dev, reg, val);
- mutex_unlock(&dev->reg_mutex);
-
- return ret;
-}
-
-static inline int ksz_read24(struct ksz_device *dev, u32 reg, u32 *val)
-{
- int ret;
-
- mutex_lock(&dev->reg_mutex);
- ret = dev->ops->read24(dev, reg, val);
- mutex_unlock(&dev->reg_mutex);
-
- return ret;
-}
-
-static inline int ksz_read32(struct ksz_device *dev, u32 reg, u32 *val)
-{
- int ret;
-
- mutex_lock(&dev->reg_mutex);
- ret = dev->ops->read32(dev, reg, val);
- mutex_unlock(&dev->reg_mutex);
-
- return ret;
-}
-
-static inline int ksz_write8(struct ksz_device *dev, u32 reg, u8 value)
-{
- int ret;
-
- mutex_lock(&dev->reg_mutex);
- ret = dev->ops->write8(dev, reg, value);
- mutex_unlock(&dev->reg_mutex);
-
- return ret;
-}
-
-static inline int ksz_write16(struct ksz_device *dev, u32 reg, u16 value)
-{
- int ret;
-
- mutex_lock(&dev->reg_mutex);
- ret = dev->ops->write16(dev, reg, value);
- mutex_unlock(&dev->reg_mutex);
-
- return ret;
-}
-
-static inline int ksz_write24(struct ksz_device *dev, u32 reg, u32 value)
-{
- int ret;
-
- mutex_lock(&dev->reg_mutex);
- ret = dev->ops->write24(dev, reg, value);
- mutex_unlock(&dev->reg_mutex);
-
- return ret;
-}
-
-static inline int ksz_write32(struct ksz_device *dev, u32 reg, u32 value)
-{
- int ret;
-
- mutex_lock(&dev->reg_mutex);
- ret = dev->ops->write32(dev, reg, value);
- mutex_unlock(&dev->reg_mutex);
-
- return ret;
-}
-
-static inline void ksz_pread8(struct ksz_device *dev, int port, int offset,
- u8 *data)
-{
- ksz_read8(dev, PORT_CTRL_ADDR(port, offset), data);
-}
-
-static inline void ksz_pread16(struct ksz_device *dev, int port, int offset,
- u16 *data)
-{
- ksz_read16(dev, PORT_CTRL_ADDR(port, offset), data);
-}
-
-static inline void ksz_pread32(struct ksz_device *dev, int port, int offset,
- u32 *data)
-{
- ksz_read32(dev, PORT_CTRL_ADDR(port, offset), data);
-}
-
-static inline void ksz_pwrite8(struct ksz_device *dev, int port, int offset,
- u8 data)
-{
- ksz_write8(dev, PORT_CTRL_ADDR(port, offset), data);
-}
-
-static inline void ksz_pwrite16(struct ksz_device *dev, int port, int offset,
- u16 data)
-{
- ksz_write16(dev, PORT_CTRL_ADDR(port, offset), data);
-}
-
-static inline void ksz_pwrite32(struct ksz_device *dev, int port, int offset,
- u32 data)
-{
- ksz_write32(dev, PORT_CTRL_ADDR(port, offset), data);
-}
+int ksz9477_switch_register(struct ksz_device *dev);
#endif
diff --git a/drivers/net/dsa/microchip/ksz_spi.h b/drivers/net/dsa/microchip/ksz_spi.h
new file mode 100644
index 0000000..a05d404
--- /dev/null
+++ b/drivers/net/dsa/microchip/ksz_spi.h
@@ -0,0 +1,82 @@
+/*
+ * Microchip KSZ series SPI access common header
+ *
+ * Copyright (C) 2017 Microchip Technology Inc.
+ * Tristram Ha <Tristram.Ha@microchip.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __KSZ_SPI_H
+#define __KSZ_SPI_H
+
+/* Chip dependent SPI access */
+static int ksz_spi_read(struct ksz_device *dev, u32 reg, u8 *data,
+ unsigned int len);
+static int ksz_spi_write(struct ksz_device *dev, u32 reg, void *data,
+ unsigned int len);
+
+static int ksz_spi_read8(struct ksz_device *dev, u32 reg, u8 *val)
+{
+ return ksz_spi_read(dev, reg, val, 1);
+}
+
+static int ksz_spi_read16(struct ksz_device *dev, u32 reg, u16 *val)
+{
+ int ret = ksz_spi_read(dev, reg, (u8 *)val, 2);
+
+ if (!ret)
+ *val = be16_to_cpu(*val);
+
+ return ret;
+}
+
+static int ksz_spi_read32(struct ksz_device *dev, u32 reg, u32 *val)
+{
+ int ret = ksz_spi_read(dev, reg, (u8 *)val, 4);
+
+ if (!ret)
+ *val = be32_to_cpu(*val);
+
+ return ret;
+}
+
+static int ksz_spi_write8(struct ksz_device *dev, u32 reg, u8 value)
+{
+ return ksz_spi_write(dev, reg, &value, 1);
+}
+
+static int ksz_spi_write16(struct ksz_device *dev, u32 reg, u16 value)
+{
+ value = cpu_to_be16(value);
+ return ksz_spi_write(dev, reg, &value, 2);
+}
+
+static int ksz_spi_write32(struct ksz_device *dev, u32 reg, u32 value)
+{
+ value = cpu_to_be32(value);
+ return ksz_spi_write(dev, reg, &value, 4);
+}
+
+static int ksz_spi_get(struct ksz_device *dev, u32 reg, void *data, size_t len)
+{
+ return ksz_spi_read(dev, reg, data, len);
+}
+
+static int ksz_spi_set(struct ksz_device *dev, u32 reg, void *data, size_t len)
+{
+ return ksz_spi_write(dev, reg, data, len);
+}
+
+#endif
--
1.9.1
^ permalink raw reply related
* [PATCH v1 RFC 6/7] Add MIB counter reading support
From: Tristram.Ha @ 2017-10-06 20:33 UTC (permalink / raw)
To: Andrew Lunn, Florian Fainelli, Pavel Machek, Ruediger Schmitt
Cc: Tristram Ha, muvarov, nathan.leigh.conrad, vivien.didelot,
UNGLinuxDriver, netdev, linux-kernel
In-Reply-To: <1507321985-15097-1-git-send-email-Tristram.Ha@microchip.com>
From: Tristram Ha <Tristram.Ha@microchip.com>
Add MIB counter reading support.
Rename ksz_9477_reg.h to ksz9477_reg.h for consistency as the product
name is always KSZ####.
Header file ksz_priv.h no longer contains any chip specific data.
Signed-off-by: Tristram Ha <Tristram.Ha@microchip.com>
---
drivers/net/dsa/microchip/ksz9477.c | 130 ++++++++++++++-------
.../microchip/{ksz_9477_reg.h => ksz9477_reg.h} | 0
drivers/net/dsa/microchip/ksz_common.c | 122 +++++++++++++++++++
drivers/net/dsa/microchip/ksz_common.h | 4 +
drivers/net/dsa/microchip/ksz_priv.h | 8 +-
5 files changed, 219 insertions(+), 45 deletions(-)
rename drivers/net/dsa/microchip/{ksz_9477_reg.h => ksz9477_reg.h} (100%)
diff --git a/drivers/net/dsa/microchip/ksz9477.c b/drivers/net/dsa/microchip/ksz9477.c
index 214d380..9579d03 100644
--- a/drivers/net/dsa/microchip/ksz9477.c
+++ b/drivers/net/dsa/microchip/ksz9477.c
@@ -31,7 +31,7 @@
#include "ksz_priv.h"
#include "ksz_common.h"
-#include "ksz_9477_reg.h"
+#include "ksz9477_reg.h"
static const struct {
int index;
@@ -272,6 +272,74 @@ static int ksz9477_reset_switch(struct ksz_device *dev)
return 0;
}
+static void ksz9477_r_mib_cnt(struct ksz_device *dev, int port, u16 addr,
+ u64 *cnt)
+{
+ u32 data;
+ int timeout;
+
+ /* retain the flush/freeze bit */
+ ksz_pread32(dev, port, REG_PORT_MIB_CTRL_STAT__4, &data);
+ data &= MIB_COUNTER_FLUSH_FREEZE;
+
+ data |= MIB_COUNTER_READ;
+ data |= (addr << MIB_COUNTER_INDEX_S);
+ ksz_pwrite32(dev, port, REG_PORT_MIB_CTRL_STAT__4, data);
+
+ timeout = 1000;
+ do {
+ ksz_pread32(dev, port, REG_PORT_MIB_CTRL_STAT__4,
+ &data);
+ usleep_range(1, 10);
+ if (!(data & MIB_COUNTER_READ))
+ break;
+ } while (timeout-- > 0);
+
+ /* failed to read MIB. get out of loop */
+ if (!timeout) {
+ dev_dbg(dev->dev, "Failed to get MIB\n");
+ return;
+ }
+
+ /* count resets upon read */
+ ksz_pread32(dev, port, REG_PORT_MIB_DATA, &data);
+ *cnt += data;
+}
+
+static void ksz9477_r_mib_pkt(struct ksz_device *dev, int port, u16 addr,
+ u64 *dropped, u64 *cnt)
+{
+ addr = mib_names[addr].index;
+ ksz9477_r_mib_cnt(dev, port, addr, cnt);
+}
+
+static void ksz9477_freeze_mib(struct ksz_device *dev, int port, bool freeze)
+{
+ /* enable the port for flush/freeze function */
+ if (freeze)
+ ksz_pwrite32(dev, port, REG_PORT_MIB_CTRL_STAT__4,
+ MIB_COUNTER_FLUSH_FREEZE);
+ ksz_cfg(dev, REG_SW_MAC_CTRL_6, SW_MIB_COUNTER_FREEZE, freeze);
+
+ /* disable the port after freeze is done */
+ if (!freeze)
+ ksz_pwrite32(dev, port, REG_PORT_MIB_CTRL_STAT__4, 0);
+}
+
+static void ksz9477_port_init_cnt(struct ksz_device *dev, int port)
+{
+ struct ksz_port_mib *mib = &dev->ports[port].mib;
+
+ /* flush all enabled port MIB counters */
+ ksz_pwrite32(dev, port, REG_PORT_MIB_CTRL_STAT__4,
+ MIB_COUNTER_FLUSH_FREEZE);
+ ksz_write8(dev, REG_SW_MAC_CTRL_6, SW_MIB_COUNTER_FLUSH);
+ ksz_pwrite32(dev, port, REG_PORT_MIB_CTRL_STAT__4, 0);
+
+ mib->cnt_ptr = 0;
+ memset(mib->counters, 0, dev->mib_cnt * sizeof(u64));
+}
+
static enum dsa_tag_protocol ksz9477_get_tag_protocol(struct dsa_switch *ds)
{
return DSA_TAG_PROTO_KSZ;
@@ -350,46 +418,6 @@ static void ksz9477_get_strings(struct dsa_switch *ds, int port, uint8_t *buf)
}
}
-static void ksz_get_ethtool_stats(struct dsa_switch *ds, int port,
- uint64_t *buf)
-{
- struct ksz_device *dev = ds->priv;
- int i;
- u32 data;
- int timeout;
-
- mutex_lock(&dev->stats_mutex);
-
- for (i = 0; i < TOTAL_SWITCH_COUNTER_NUM; i++) {
- data = MIB_COUNTER_READ;
- data |= ((mib_names[i].index & 0xFF) << MIB_COUNTER_INDEX_S);
- ksz_pwrite32(dev, port, REG_PORT_MIB_CTRL_STAT__4, data);
-
- timeout = 1000;
- do {
- ksz_pread32(dev, port, REG_PORT_MIB_CTRL_STAT__4,
- &data);
- usleep_range(1, 10);
- if (!(data & MIB_COUNTER_READ))
- break;
- } while (timeout-- > 0);
-
- /* failed to read MIB. get out of loop */
- if (!timeout) {
- dev_dbg(dev->dev, "Failed to get MIB\n");
- break;
- }
-
- /* count resets upon read */
- ksz_pread32(dev, port, REG_PORT_MIB_DATA, &data);
-
- dev->mib_value[i] += (uint64_t)data;
- buf[i] = dev->mib_value[i];
- }
-
- mutex_unlock(&dev->stats_mutex);
-}
-
static void ksz9477_cfg_port_member(struct ksz_device *dev, int port,
u8 member)
{
@@ -977,6 +1005,17 @@ static void ksz9477_port_mirror_del(struct dsa_switch *ds, int port,
PORT_MIRROR_SNIFFER, false);
}
+static void ksz9477_phy_setup(struct ksz_device *dev, int port,
+ struct phy_device *phy)
+{
+ if (port < dev->phy_port_cnt) {
+ /* SUPPORTED_Asym_Pause and SUPPORTED_Pause can be removed to
+ * disable flow control when rate limiting is used.
+ */
+ phy->advertising = phy->supported;
+ }
+}
+
static void ksz9477_port_setup(struct ksz_device *dev, int port, bool cpu_port)
{
u8 data8;
@@ -1159,6 +1198,8 @@ static int ksz9477_setup(struct dsa_switch *ds)
/* start switch */
ksz_cfg(dev, REG_SW_OPERATION, SW_START, true);
+ ksz_init_mib_timer(dev);
+
return 0;
}
@@ -1168,6 +1209,7 @@ static int ksz9477_setup(struct dsa_switch *ds)
.set_addr = ksz9477_set_addr,
.phy_read = ksz9477_phy_read16,
.phy_write = ksz9477_phy_write16,
+ .adjust_link = ksz_adjust_link,
.port_enable = ksz_enable_port,
.port_disable = ksz_disable_port,
.get_strings = ksz9477_get_strings,
@@ -1289,6 +1331,7 @@ static int ksz9477_switch_init(struct ksz_device *dev)
if (!dev->ports)
return -ENOMEM;
for (i = 0; i < dev->mib_port_cnt; i++) {
+ mutex_init(&dev->ports[i].mib.cnt_mutex);
dev->ports[i].mib.counters =
devm_kzalloc(dev->dev,
sizeof(u64) *
@@ -1310,7 +1353,12 @@ static void ksz9477_switch_exit(struct ksz_device *dev)
.get_port_addr = ksz9477_get_port_addr,
.cfg_port_member = ksz9477_cfg_port_member,
.flush_dyn_mac_table = ksz9477_flush_dyn_mac_table,
+ .phy_setup = ksz9477_phy_setup,
.port_setup = ksz9477_port_setup,
+ .r_mib_cnt = ksz9477_r_mib_cnt,
+ .r_mib_pkt = ksz9477_r_mib_pkt,
+ .freeze_mib = ksz9477_freeze_mib,
+ .port_init_cnt = ksz9477_port_init_cnt,
.shutdown = ksz9477_reset_switch,
.detect = ksz9477_switch_detect,
.init = ksz9477_switch_init,
diff --git a/drivers/net/dsa/microchip/ksz_9477_reg.h b/drivers/net/dsa/microchip/ksz9477_reg.h
similarity index 100%
rename from drivers/net/dsa/microchip/ksz_9477_reg.h
rename to drivers/net/dsa/microchip/ksz9477_reg.h
diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c
index 1c9c4c5..885b3e3 100644
--- a/drivers/net/dsa/microchip/ksz_common.c
+++ b/drivers/net/dsa/microchip/ksz_common.c
@@ -50,6 +50,81 @@ void ksz_update_port_member(struct ksz_device *dev, int port)
}
}
+static void port_r_cnt(struct ksz_device *dev, int port)
+{
+ struct ksz_port_mib *mib = &dev->ports[port].mib;
+ u64 *dropped;
+
+ /* Some ports may not have MIB counters before SWITCH_COUNTER_NUM. */
+ while (mib->cnt_ptr < dev->reg_mib_cnt) {
+ dev->dev_ops->r_mib_cnt(dev, port, mib->cnt_ptr,
+ &mib->counters[mib->cnt_ptr]);
+ ++mib->cnt_ptr;
+ }
+
+ /* last one in storage */
+ dropped = &mib->counters[dev->mib_cnt];
+
+ /* Some ports may not have MIB counters after SWITCH_COUNTER_NUM. */
+ while (mib->cnt_ptr < dev->mib_cnt) {
+ dev->dev_ops->r_mib_pkt(dev, port, mib->cnt_ptr,
+ dropped, &mib->counters[mib->cnt_ptr]);
+ ++mib->cnt_ptr;
+ }
+ mib->cnt_ptr = 0;
+}
+
+static void ksz_mib_read_work(struct work_struct *work)
+{
+ struct ksz_device *dev =
+ container_of(work, struct ksz_device, mib_read);
+ struct ksz_port *p;
+ struct ksz_port_mib *mib;
+ int i;
+
+ for (i = 0; i < dev->mib_port_cnt; i++) {
+ p = &dev->ports[i];
+ if (!p->on)
+ continue;
+ mib = &p->mib;
+ mutex_lock(&mib->cnt_mutex);
+
+ /* read only dropped counters when link is not up */
+ if (p->link_down)
+ p->link_down = 0;
+ else if (!p->link_up)
+ mib->cnt_ptr = dev->reg_mib_cnt;
+ port_r_cnt(dev, i);
+ mutex_unlock(&mib->cnt_mutex);
+ }
+}
+
+static void mib_monitor(unsigned long ptr)
+{
+ struct ksz_device *dev = (struct ksz_device *)ptr;
+
+ mod_timer(&dev->mib_read_timer, jiffies + dev->mib_read_interval);
+ schedule_work(&dev->mib_read);
+}
+
+void ksz_init_mib_timer(struct ksz_device *dev)
+{
+ int i;
+
+ /* Read MIB counters every 30 seconds to avoid overflow. */
+ dev->mib_read_interval = msecs_to_jiffies(30000);
+
+ INIT_WORK(&dev->mib_read, ksz_mib_read_work);
+ setup_timer(&dev->mib_read_timer, mib_monitor, (unsigned long)dev);
+
+ for (i = 0; i < dev->mib_port_cnt; i++)
+ dev->dev_ops->port_init_cnt(dev, i);
+
+ /* Start the timer 2 seconds later. */
+ dev->mib_read_timer.expires = jiffies + msecs_to_jiffies(2000);
+ add_timer(&dev->mib_read_timer);
+}
+
int ksz_phy_read16(struct dsa_switch *ds, int addr, int reg)
{
struct ksz_device *dev = ds->priv;
@@ -69,6 +144,25 @@ int ksz_phy_write16(struct dsa_switch *ds, int addr, int reg, u16 val)
return 0;
}
+void ksz_adjust_link(struct dsa_switch *ds, int port,
+ struct phy_device *phydev)
+{
+ struct ksz_device *dev = ds->priv;
+ struct ksz_port *p = &dev->ports[port];
+
+ if (phydev->link) {
+ p->speed = phydev->speed;
+ p->duplex = phydev->duplex;
+ p->flow_ctrl = phydev->pause;
+ p->link_up = 1;
+ dev->live_ports |= (1 << port) & dev->on_ports;
+ } else if (p->link_up) {
+ p->link_up = 0;
+ p->link_down = 1;
+ dev->live_ports &= ~(1 << port);
+ }
+}
+
int ksz_sset_count(struct dsa_switch *ds)
{
struct ksz_device *dev = ds->priv;
@@ -76,6 +170,27 @@ int ksz_sset_count(struct dsa_switch *ds)
return dev->mib_cnt;
}
+void ksz_get_ethtool_stats(struct dsa_switch *ds, int port, uint64_t *buf)
+{
+ struct ksz_device *dev = ds->priv;
+ struct ksz_port_mib *mib;
+
+ mib = &dev->ports[port].mib;
+
+ mutex_lock(&dev->stats_mutex);
+
+ /* freeze MIB counters if supported */
+ if (dev->dev_ops->freeze_mib)
+ dev->dev_ops->freeze_mib(dev, port, true);
+ mutex_lock(&mib->cnt_mutex);
+ port_r_cnt(dev, port);
+ mutex_unlock(&mib->cnt_mutex);
+ if (dev->dev_ops->freeze_mib)
+ dev->dev_ops->freeze_mib(dev, port, false);
+ memcpy(buf, mib->counters, dev->mib_cnt * sizeof(u64));
+ mutex_unlock(&dev->stats_mutex);
+}
+
int ksz_port_bridge_join(struct dsa_switch *ds, int port,
struct net_device *br)
{
@@ -249,6 +364,7 @@ int ksz_enable_port(struct dsa_switch *ds, int port, struct phy_device *phy)
/* setup slave port */
dev->dev_ops->port_setup(dev, port, false);
+ dev->dev_ops->phy_setup(dev, port, phy);
/* port_stp_state_set() will be called after to enable the port so
* there is no need to do anything.
@@ -331,6 +447,12 @@ int ksz_switch_register(struct ksz_device *dev,
void ksz_switch_remove(struct ksz_device *dev)
{
+ /* timer started */
+ if (dev->mib_read_timer.expires) {
+ del_timer_sync(&dev->mib_read_timer);
+ flush_work(&dev->mib_read);
+ }
+
dev->dev_ops->exit(dev);
dsa_unregister_switch(dev->ds);
}
diff --git a/drivers/net/dsa/microchip/ksz_common.h b/drivers/net/dsa/microchip/ksz_common.h
index 1c1cbad..20afea3f 100644
--- a/drivers/net/dsa/microchip/ksz_common.h
+++ b/drivers/net/dsa/microchip/ksz_common.h
@@ -21,12 +21,16 @@
#define __KSZ_COMMON_H
void ksz_update_port_member(struct ksz_device *dev, int port);
+void ksz_init_mib_timer(struct ksz_device *dev);
/* Common DSA access functions */
int ksz_phy_read16(struct dsa_switch *ds, int addr, int reg);
int ksz_phy_write16(struct dsa_switch *ds, int addr, int reg, u16 val);
+void ksz_adjust_link(struct dsa_switch *ds, int port,
+ struct phy_device *phydev);
int ksz_sset_count(struct dsa_switch *ds);
+void ksz_get_ethtool_stats(struct dsa_switch *ds, int port, uint64_t *buf);
int ksz_port_bridge_join(struct dsa_switch *ds, int port,
struct net_device *br);
void ksz_port_bridge_leave(struct dsa_switch *ds, int port,
diff --git a/drivers/net/dsa/microchip/ksz_priv.h b/drivers/net/dsa/microchip/ksz_priv.h
index 4126749..0e1ccd3 100644
--- a/drivers/net/dsa/microchip/ksz_priv.h
+++ b/drivers/net/dsa/microchip/ksz_priv.h
@@ -26,8 +26,6 @@
#include <linux/etherdevice.h>
#include <net/dsa.h>
-#include "ksz_9477_reg.h"
-
struct ksz_io_ops;
struct vlan_table {
@@ -35,6 +33,7 @@ struct vlan_table {
};
struct ksz_port_mib {
+ struct mutex cnt_mutex; /* structure access */
u8 cnt_ptr;
u64 *counters;
};
@@ -92,8 +91,6 @@ struct ksz_device {
struct vlan_table *vlan_cache;
- u64 mib_value[TOTAL_SWITCH_COUNTER_NUM];
-
u8 *txbuf;
struct ksz_port *ports;
@@ -150,6 +147,8 @@ struct ksz_dev_ops {
u32 (*get_port_addr)(int port, int offset);
void (*cfg_port_member)(struct ksz_device *dev, int port, u8 member);
void (*flush_dyn_mac_table)(struct ksz_device *dev, int port);
+ void (*phy_setup)(struct ksz_device *dev, int port,
+ struct phy_device *phy);
void (*port_setup)(struct ksz_device *dev, int port, bool cpu_port);
void (*r_phy)(struct ksz_device *dev, u16 phy, u16 reg, u16 *val);
void (*w_phy)(struct ksz_device *dev, u16 phy, u16 reg, u16 val);
@@ -164,6 +163,7 @@ struct ksz_dev_ops {
u64 *cnt);
void (*r_mib_pkt)(struct ksz_device *dev, int port, u16 addr,
u64 *dropped, u64 *cnt);
+ void (*freeze_mib)(struct ksz_device *dev, int port, bool freeze);
void (*port_init_cnt)(struct ksz_device *dev, int port);
int (*shutdown)(struct ksz_device *dev);
int (*detect)(struct ksz_device *dev);
--
1.9.1
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox