* [PATCH v3 net-next 11/12] gtp: Experimental support encpasulating over IPv6
From: Tom Herbert @ 2017-09-25 3:29 UTC (permalink / raw)
To: davem; +Cc: pablo, laforge, aschultz, netdev, rohit, Tom Herbert
In-Reply-To: <20170925032941.14586-1-tom@quantonium.net>
Allows using GTP datapath over IPv6. Remote peers are indicated by IPv6.
Note this is experimental, more work is needed to make this
compliant with 3GPP standard.
Signed-off-by: Tom Herbert <tom@quantonium.net>
---
drivers/net/gtp.c | 248 ++++++++++++++++++++++++++++++++++---------
include/uapi/linux/gtp.h | 1 +
include/uapi/linux/if_link.h | 3 +
3 files changed, 200 insertions(+), 52 deletions(-)
diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c
index 919ec6e14973..1c580df4cfc5 100644
--- a/drivers/net/gtp.c
+++ b/drivers/net/gtp.c
@@ -28,6 +28,7 @@
#include <net/net_namespace.h>
#include <net/protocol.h>
#include <net/ip.h>
+#include <net/ip6_tunnel.h>
#include <net/udp.h>
#include <net/udp_tunnel.h>
#include <net/icmp.h>
@@ -59,16 +60,22 @@ struct pdp_ctx {
__be16 gtp_port;
u16 ms_af;
+ u16 peer_af;
#if GTP_IPV6
union {
struct in_addr ms_addr_ip4;
struct in6_addr ms_addr_ip6;
};
+
+ union {
+ struct in_addr peer_addr_ip4;
+ struct in6_addr peer_addr_ip6;
+ };
#else
struct in_addr ms_addr_ip4;
+ struct in_addr peer_addr_ip4;
#endif
- struct in_addr peer_addr_ip4;
struct sock *sk;
struct net_device *dev;
@@ -93,8 +100,11 @@ struct gtp_dev {
struct hlist_head *tid_hash;
struct hlist_head *addr4_hash;
+
#if GTP_IPV6
struct hlist_head *addr6_hash;
+
+ unsigned int is_ipv6:1;
#endif
struct gro_cells gro_cells;
@@ -534,8 +544,6 @@ static int gtp_xmit(struct sk_buff *skb, struct net_device *dev,
{
struct iphdr *inner_iph = NULL;
struct sock *sk = pctx->sk;
- __be32 saddr = inet_sk(sk)->inet_saddr;
- struct rtable *rt;
int err = 0;
if (skb->protocol == ETH_P_IP)
@@ -548,38 +556,84 @@ static int gtp_xmit(struct sk_buff *skb, struct net_device *dev,
skb_reset_inner_headers(skb);
- /* Source address returned by route lookup is ignored since
- * we get the address from a socket.
- */
- rt = ip_tunnel_get_route(dev, skb, sk->sk_protocol,
- sk->sk_bound_dev_if, RT_CONN_FLAGS(sk),
- pctx->peer_addr_ip4.s_addr, &saddr,
- pctx->gtp_port, pctx->gtp_port,
- &pctx->dst_cache, NULL);
-
- if (IS_ERR(rt)) {
- err = PTR_ERR(rt);
- goto out_err;
- }
+ if (pctx->peer_af == AF_INET) {
+ __be32 saddr = inet_sk(sk)->inet_saddr;
+ struct rtable *rt;
+
+ /* Source address returned by route lookup is ignored since
+ * we get the address from a socket.
+ */
+ rt = ip_tunnel_get_route(dev, skb, sk->sk_protocol,
+ sk->sk_bound_dev_if, RT_CONN_FLAGS(sk),
+ pctx->peer_addr_ip4.s_addr, &saddr,
+ pctx->gtp_port, pctx->gtp_port,
+ &pctx->dst_cache, NULL);
+
+ if (IS_ERR(rt)) {
+ err = PTR_ERR(rt);
+ goto out_err;
+ }
+
+ skb_dst_drop(skb);
- skb_dst_drop(skb);
+ gtp_push_header(skb, pctx);
- gtp_push_header(skb, pctx);
+ if (inner_iph)
+ __iptunnel_update_pmtu(dev, skb, &rt->dst,
+ !!inner_iph->frag_off,
+ inner_iph, pctx->hlen,
+ pctx->peer_addr_ip4.s_addr);
- if (inner_iph)
- __iptunnel_update_pmtu(dev, skb, &rt->dst,
- !!inner_iph->frag_off,
- inner_iph, pctx->hlen,
- pctx->peer_addr_ip4.s_addr);
+ udp_tunnel_xmit_skb(rt, sk, skb, saddr,
+ pctx->peer_addr_ip4.s_addr,
+ 0, ip4_dst_hoplimit(&rt->dst), 0,
+ pctx->gtp_port, pctx->gtp_port,
+ false, false);
- udp_tunnel_xmit_skb(rt, sk, skb, saddr,
- pctx->peer_addr_ip4.s_addr,
- 0, ip4_dst_hoplimit(&rt->dst), 0,
- pctx->gtp_port, pctx->gtp_port,
- false, false);
+ netdev_dbg(dev, "gtp -> IP src: %pI4 dst: %pI4\n",
+ &saddr, &pctx->peer_addr_ip4.s_addr);
- netdev_dbg(dev, "gtp -> IP src: %pI4 dst: %pI4\n",
- &saddr, &pctx->peer_addr_ip4.s_addr);
+#if GTP_IPV6
+#if IS_ENABLED(CONFIG_IPV6)
+ } else if (pctx->peer_af == AF_INET6) {
+ struct in6_addr saddr = inet6_sk(sk)->saddr;
+ struct dst_entry *dst;
+
+ /* Source address returned by route lookup is ignored since
+ * we get the address from a socket.
+ */
+ dst = ip6_tnl_get_route(dev, skb, sk, sk->sk_protocol,
+ sk->sk_bound_dev_if, 0,
+ 0, &pctx->peer_addr_ip6, &saddr,
+ pctx->gtp_port, pctx->gtp_port,
+ &pctx->dst_cache, NULL);
+
+ if (IS_ERR(dst)) {
+ err = PTR_ERR(dst);
+ goto out_err;
+ }
+
+ skb_dst_drop(skb);
+
+ gtp_push_header(skb, pctx);
+
+ if (inner_iph)
+ __iptunnel_update_pmtu(dev, skb, dst,
+ !!inner_iph->frag_off,
+ inner_iph, pctx->hlen, 0);
+
+ udp_tunnel6_xmit_skb(dst, sk, skb, dev,
+ &saddr, &pctx->peer_addr_ip6,
+ 0, ip6_dst_hoplimit(dst), 0,
+ pctx->gtp_port, pctx->gtp_port,
+ false);
+
+ netdev_dbg(dev, "gtp -> IP src: %pI6 dst: %pI6\n",
+ &saddr, &pctx->peer_addr_ip6);
+
+#endif
+#endif
+ }
return 0;
@@ -688,7 +742,12 @@ static void gtp_link_setup(struct net_device *dev)
/* Assume largest header, ie. GTPv0. */
dev->needed_headroom = LL_MAX_HEADER +
+#if GTP_IPV6
+ max_t(int, sizeof(struct iphdr),
+ sizeof(struct ipv6hdr)) +
+#else
sizeof(struct iphdr) +
+#endif
sizeof(struct udphdr) +
sizeof(struct gtp0_header);
@@ -697,12 +756,15 @@ static void gtp_link_setup(struct net_device *dev)
static int gtp_hashtable_new(struct gtp_dev *gtp, int hsize);
static void gtp_hashtable_free(struct gtp_dev *gtp);
-static int gtp_encap_enable(struct gtp_dev *gtp, struct nlattr *data[]);
+static int gtp_encap_enable(struct gtp_dev *gtp, struct nlattr *data[],
+ bool is_ipv6);
static int gtp_newlink(struct net *src_net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[],
struct netlink_ext_ack *extack)
{
+ unsigned int role = GTP_ROLE_GGSN;
+ bool is_ipv6 = false;
struct gtp_dev *gtp;
struct gtp_net *gn;
int hashsize, err;
@@ -710,9 +772,32 @@ static int gtp_newlink(struct net *src_net, struct net_device *dev,
if (!data[IFLA_GTP_FD0] && !data[IFLA_GTP_FD1])
return -EINVAL;
+ if (data[IFLA_GTP_ROLE]) {
+ role = nla_get_u32(data[IFLA_GTP_ROLE]);
+ if (role > GTP_ROLE_SGSN)
+ return -EINVAL;
+ }
+
+ if (data[IFLA_GTP_AF]) {
+ u16 af = nla_get_u16(data[IFLA_GTP_AF]);
+
+ switch (af) {
+ case AF_INET:
+ is_ipv6 = false;
+ break;
+#if GTP_IPV6
+ case AF_INET6:
+ is_ipv6 = true;
+ break;
+#endif
+ default:
+ return -EINVAL;
+ }
+ }
+
gtp = netdev_priv(dev);
- err = gtp_encap_enable(gtp, data);
+ err = gtp_encap_enable(gtp, data, is_ipv6);
if (err < 0)
return err;
@@ -731,6 +816,11 @@ static int gtp_newlink(struct net *src_net, struct net_device *dev,
goto out_hashtable;
}
+ gtp->role = role;
+#if GTP_IPV6
+ gtp->is_ipv6 = is_ipv6;
+#endif
+
gn = net_generic(dev_net(dev), gtp_net_id);
list_add_rcu(>p->list, &gn->gtp_dev_list);
@@ -860,7 +950,8 @@ static void gtp_hashtable_free(struct gtp_dev *gtp)
}
static struct sock *gtp_encap_enable_socket(int fd, int type,
- struct gtp_dev *gtp)
+ struct gtp_dev *gtp,
+ bool is_ipv6)
{
struct udp_tunnel_sock_cfg tuncfg = {NULL};
struct socket *sock;
@@ -881,6 +972,12 @@ static struct sock *gtp_encap_enable_socket(int fd, int type,
goto out_sock;
}
+ if (sock->sk->sk_family != (is_ipv6 ? AF_INET6 : AF_INET)) {
+ pr_debug("socket fd=%d not right family\n", fd);
+ sk = ERR_PTR(-EINVAL);
+ goto out_sock;
+ }
+
if (rcu_dereference_sk_user_data(sock->sk)) {
sk = ERR_PTR(-EBUSY);
goto out_sock;
@@ -913,16 +1010,16 @@ static struct sock *gtp_encap_enable_socket(int fd, int type,
return sk;
}
-static int gtp_encap_enable(struct gtp_dev *gtp, struct nlattr *data[])
+static int gtp_encap_enable(struct gtp_dev *gtp, struct nlattr *data[],
+ bool is_ipv6)
{
- struct sock *sk1u = NULL;
- struct sock *sk0 = NULL;
- unsigned int role = GTP_ROLE_GGSN;
+ struct sock *sk0 = NULL, *sk1u = NULL;
if (data[IFLA_GTP_FD0]) {
u32 fd0 = nla_get_u32(data[IFLA_GTP_FD0]);
- sk0 = gtp_encap_enable_socket(fd0, UDP_ENCAP_GTP0, gtp);
+ sk0 = gtp_encap_enable_socket(fd0, UDP_ENCAP_GTP0, gtp,
+ is_ipv6);
if (IS_ERR(sk0))
return PTR_ERR(sk0);
}
@@ -930,7 +1027,8 @@ static int gtp_encap_enable(struct gtp_dev *gtp, struct nlattr *data[])
if (data[IFLA_GTP_FD1]) {
u32 fd1 = nla_get_u32(data[IFLA_GTP_FD1]);
- sk1u = gtp_encap_enable_socket(fd1, UDP_ENCAP_GTP1U, gtp);
+ sk1u = gtp_encap_enable_socket(fd1, UDP_ENCAP_GTP1U, gtp,
+ is_ipv6);
if (IS_ERR(sk1u)) {
if (sk0)
gtp_encap_disable_sock(sk0);
@@ -938,15 +1036,8 @@ static int gtp_encap_enable(struct gtp_dev *gtp, struct nlattr *data[])
}
}
- if (data[IFLA_GTP_ROLE]) {
- role = nla_get_u32(data[IFLA_GTP_ROLE]);
- if (role > GTP_ROLE_SGSN)
- return -EINVAL;
- }
-
gtp->sk0 = sk0;
gtp->sk1u = sk1u;
- gtp->role = role;
return 0;
}
@@ -982,8 +1073,18 @@ static void pdp_fill(struct pdp_ctx *pctx, struct genl_info *info)
__be16 default_port = 0;
pctx->gtp_version = nla_get_u32(info->attrs[GTPA_VERSION]);
- pctx->peer_addr_ip4.s_addr =
- nla_get_be32(info->attrs[GTPA_PEER_ADDRESS]);
+
+ if (info->attrs[GTPA_PEER_ADDRESS]) {
+ pctx->peer_af = AF_INET;
+ pctx->peer_addr_ip4.s_addr =
+ nla_get_in_addr(info->attrs[GTPA_PEER_ADDRESS]);
+#if GTP_IPV6
+ } else if (info->attrs[GTPA_PEER6_ADDRESS]) {
+ pctx->peer_af = AF_INET6;
+ pctx->peer_addr_ip6 = nla_get_in6_addr(
+ info->attrs[GTPA_PEER6_ADDRESS]);
+#endif
+ }
switch (pctx->gtp_version) {
case GTP_V0:
@@ -1162,11 +1263,17 @@ static int gtp_genl_new_pdp(struct sk_buff *skb, struct genl_info *info)
int err;
if (!info->attrs[GTPA_VERSION] ||
- !info->attrs[GTPA_LINK] ||
- !info->attrs[GTPA_PEER_ADDRESS])
+ !info->attrs[GTPA_LINK])
return -EINVAL;
#if GTP_IPV6
+ if (!(!!info->attrs[GTPA_PEER_ADDRESS] ^
+ !!info->attrs[GTPA_PEER6_ADDRESS])) {
+ /* Either v4 or v6 peer address must be set */
+
+ return -EINVAL;
+ }
+
if (!(!!info->attrs[GTPA_MS_ADDRESS] ^
!!info->attrs[GTPA_MS6_ADDRESS])) {
/* Either v4 or v6 mobile subscriber address must be set */
@@ -1174,6 +1281,12 @@ static int gtp_genl_new_pdp(struct sk_buff *skb, struct genl_info *info)
return -EINVAL;
}
#else
+ if (!info->attrs[GTPA_PEER_ADDRESS]) {
+ /* v4 peer address must be set */
+
+ return -EINVAL;
+ }
+
if (!info->attrs[GTPA_MS_ADDRESS]) {
/* v4 mobile subscriber address must be set */
@@ -1207,6 +1320,14 @@ static int gtp_genl_new_pdp(struct sk_buff *skb, struct genl_info *info)
goto out_unlock;
}
+#if GTP_IPV6
+ if ((info->attrs[GTPA_PEER_ADDRESS] && gtp->is_ipv6) ||
+ (info->attrs[GTPA_PEER6_ADDRESS] && !gtp->is_ipv6)) {
+ err = -EINVAL;
+ goto out_unlock;
+ }
+#endif
+
if (version == GTP_V0)
sk = gtp->sk0;
else if (version == GTP_V1)
@@ -1315,10 +1436,31 @@ static int gtp_genl_fill_info(struct sk_buff *skb, u32 snd_portid, u32 snd_seq,
if (genlh == NULL)
goto nlmsg_failure;
- if (nla_put_u32(skb, GTPA_VERSION, pctx->gtp_version) ||
- nla_put_be32(skb, GTPA_PEER_ADDRESS, pctx->peer_addr_ip4.s_addr))
+ if (nla_put_u32(skb, GTPA_VERSION, pctx->gtp_version))
goto nla_put_failure;
+ if (nla_put_u32(skb, GTPA_LINK, pctx->dev->ifindex))
+ goto nla_put_failure;
+
+ switch (pctx->peer_af) {
+ case AF_INET:
+ if (nla_put_be32(skb, GTPA_PEER_ADDRESS,
+ pctx->peer_addr_ip4.s_addr))
+ goto nla_put_failure;
+
+ break;
+#if GTP_IPV6
+ case AF_INET6:
+ if (nla_put_in6_addr(skb, GTPA_PEER6_ADDRESS,
+ &pctx->peer_addr_ip6))
+ goto nla_put_failure;
+
+ break;
+#endif
+ default:
+ goto nla_put_failure;
+ }
+
switch (pctx->ms_af) {
case AF_INET:
if (nla_put_be32(skb, GTPA_MS_ADDRESS,
@@ -1448,6 +1590,8 @@ static struct nla_policy gtp_genl_policy[GTPA_MAX + 1] = {
[GTPA_PEER_ADDRESS] = { .type = NLA_U32, },
[GTPA_MS_ADDRESS] = { .type = NLA_U32, },
#if GTP_IPV6
+ [GTPA_PEER6_ADDRESS] = { .len = FIELD_SIZEOF(struct ipv6hdr,
+ daddr) },
[GTPA_MS6_ADDRESS] = { .len = FIELD_SIZEOF(struct ipv6hdr,
daddr) },
#endif
diff --git a/include/uapi/linux/gtp.h b/include/uapi/linux/gtp.h
index ae4e632c0360..8eec519fa754 100644
--- a/include/uapi/linux/gtp.h
+++ b/include/uapi/linux/gtp.h
@@ -29,6 +29,7 @@ enum gtp_attrs {
GTPA_PAD,
GTPA_PORT,
GTPA_MS6_ADDRESS,
+ GTPA_PEER6_ADDRESS,
__GTPA_MAX,
};
#define GTPA_MAX (__GTPA_MAX + 1)
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 8d062c58d5cb..81c26864abeb 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -552,6 +552,9 @@ enum {
IFLA_GTP_FD1,
IFLA_GTP_PDP_HASHSIZE,
IFLA_GTP_ROLE,
+ IFLA_GTP_AF,
+ IFLA_GTP_PORT0,
+ IFLA_GTP_PORT1,
__IFLA_GTP_MAX,
};
#define IFLA_GTP_MAX (__IFLA_GTP_MAX - 1)
--
2.11.0
^ permalink raw reply related
* [PATCH v3 net-next 12/12] gtp: Allow configuring GTP interface as standalone
From: Tom Herbert @ 2017-09-25 3:29 UTC (permalink / raw)
To: davem; +Cc: pablo, laforge, aschultz, netdev, rohit, Tom Herbert
In-Reply-To: <20170925032941.14586-1-tom@quantonium.net>
Add new configuration of GTP interfaces that allow specifying a port to
listen on (as opposed to having to get sockets from a userspace control
plane). This allows GTP interfaces to be configured and the data path
tested without requiring a GTP-C daemon.
Signed-off-by: Tom Herbert <tom@quantonium.net>
---
drivers/net/gtp.c | 215 ++++++++++++++++++++++++++++++++++++-----------
include/uapi/linux/gtp.h | 5 ++
2 files changed, 169 insertions(+), 51 deletions(-)
diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c
index 1c580df4cfc5..dc1fcd3034af 100644
--- a/drivers/net/gtp.c
+++ b/drivers/net/gtp.c
@@ -93,6 +93,9 @@ struct gtp_dev {
struct sock *sk0;
struct sock *sk1u;
+ struct socket *sock0;
+ struct socket *sock1u;
+
struct net_device *dev;
unsigned int role;
@@ -451,26 +454,33 @@ static void gtp_encap_destroy(struct sock *sk)
}
}
-static void gtp_encap_disable_sock(struct sock *sk)
+static void gtp_encap_release(struct gtp_dev *gtp)
{
- if (!sk)
- return;
+ if (gtp->sk0) {
+ if (gtp->sock0) {
+ udp_tunnel_sock_release(gtp->sock0);
+ gtp->sock0 = NULL;
+ } else {
+ gtp_encap_destroy(gtp->sk0);
+ }
- gtp_encap_destroy(sk);
-}
+ gtp->sk0 = NULL;
+ }
-static void gtp_encap_disable(struct gtp_dev *gtp)
-{
- gtp_encap_disable_sock(gtp->sk0);
- gtp_encap_disable_sock(gtp->sk1u);
+ if (gtp->sk1u) {
+ if (gtp->sock1u) {
+ udp_tunnel_sock_release(gtp->sock1u);
+ gtp->sock1u = NULL;
+ } else {
+ gtp_encap_destroy(gtp->sk1u);
+ }
+
+ gtp->sk1u = NULL;
+ }
}
static int gtp_dev_init(struct net_device *dev)
{
- struct gtp_dev *gtp = netdev_priv(dev);
-
- gtp->dev = dev;
-
dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
if (!dev->tstats)
return -ENOMEM;
@@ -482,7 +492,8 @@ static void gtp_dev_uninit(struct net_device *dev)
{
struct gtp_dev *gtp = netdev_priv(dev);
- gtp_encap_disable(gtp);
+ gtp_encap_release(gtp);
+
free_percpu(dev->tstats);
}
@@ -751,6 +762,8 @@ static void gtp_link_setup(struct net_device *dev)
sizeof(struct udphdr) +
sizeof(struct gtp0_header);
+ gtp->dev = dev;
+
gro_cells_init(>p->gro_cells, dev);
}
@@ -764,13 +777,19 @@ static int gtp_newlink(struct net *src_net, struct net_device *dev,
struct netlink_ext_ack *extack)
{
unsigned int role = GTP_ROLE_GGSN;
+ bool have_fd, have_ports;
bool is_ipv6 = false;
struct gtp_dev *gtp;
struct gtp_net *gn;
int hashsize, err;
- if (!data[IFLA_GTP_FD0] && !data[IFLA_GTP_FD1])
+ have_fd = !!data[IFLA_GTP_FD0] || !!data[IFLA_GTP_FD1];
+ have_ports = !!data[IFLA_GTP_PORT0] || !!data[IFLA_GTP_PORT1];
+
+ if (!(have_fd ^ have_ports)) {
+ /* Either got fd(s) or port(s) */
return -EINVAL;
+ }
if (data[IFLA_GTP_ROLE]) {
role = nla_get_u32(data[IFLA_GTP_ROLE]);
@@ -831,7 +850,7 @@ static int gtp_newlink(struct net *src_net, struct net_device *dev,
out_hashtable:
gtp_hashtable_free(gtp);
out_encap:
- gtp_encap_disable(gtp);
+ gtp_encap_release(gtp);
return err;
}
@@ -840,7 +859,7 @@ static void gtp_dellink(struct net_device *dev, struct list_head *head)
struct gtp_dev *gtp = netdev_priv(dev);
gro_cells_destroy(>p->gro_cells);
- gtp_encap_disable(gtp);
+ gtp_encap_release(gtp);
gtp_hashtable_free(gtp);
list_del_rcu(>p->list);
unregister_netdevice_queue(dev, head);
@@ -851,6 +870,8 @@ static const struct nla_policy gtp_policy[IFLA_GTP_MAX + 1] = {
[IFLA_GTP_FD1] = { .type = NLA_U32 },
[IFLA_GTP_PDP_HASHSIZE] = { .type = NLA_U32 },
[IFLA_GTP_ROLE] = { .type = NLA_U32 },
+ [IFLA_GTP_PORT0] = { .type = NLA_U16 },
+ [IFLA_GTP_PORT1] = { .type = NLA_U16 },
};
static int gtp_validate(struct nlattr *tb[], struct nlattr *data[],
@@ -949,11 +970,35 @@ static void gtp_hashtable_free(struct gtp_dev *gtp)
kfree(gtp->tid_hash);
}
-static struct sock *gtp_encap_enable_socket(int fd, int type,
- struct gtp_dev *gtp,
- bool is_ipv6)
+static int gtp_encap_enable_sock(struct socket *sock, int type,
+ struct gtp_dev *gtp)
{
struct udp_tunnel_sock_cfg tuncfg = {NULL};
+
+ switch (type) {
+ case UDP_ENCAP_GTP0:
+ tuncfg.encap_rcv = gtp0_udp_encap_recv;
+ break;
+ case UDP_ENCAP_GTP1U:
+ tuncfg.encap_rcv = gtp1u_udp_encap_recv;
+ break;
+ default:
+ pr_debug("Unknown encap type %u\n", type);
+ return -EINVAL;
+ }
+
+ tuncfg.sk_user_data = gtp;
+ tuncfg.encap_type = type;
+ tuncfg.encap_destroy = gtp_encap_destroy;
+
+ setup_udp_tunnel_sock(sock_net(sock->sk), sock, &tuncfg);
+
+ return 0;
+}
+
+static struct sock *gtp_encap_enable_fd(int fd, int type, struct gtp_dev *gtp,
+ bool is_ipv6)
+{
struct socket *sock;
struct sock *sk;
int err;
@@ -986,60 +1031,128 @@ static struct sock *gtp_encap_enable_socket(int fd, int type,
sk = sock->sk;
sock_hold(sk);
- switch (type) {
- case UDP_ENCAP_GTP0:
- tuncfg.encap_rcv = gtp0_udp_encap_recv;
- break;
- case UDP_ENCAP_GTP1U:
- tuncfg.encap_rcv = gtp1u_udp_encap_recv;
- break;
- default:
- pr_debug("Unknown encap type %u\n", type);
- sk = ERR_PTR(-EINVAL);
- goto out_sock;
- }
-
- tuncfg.sk_user_data = gtp;
- tuncfg.encap_type = type;
- tuncfg.encap_destroy = gtp_encap_destroy;
-
- setup_udp_tunnel_sock(sock_net(sock->sk), sock, &tuncfg);
+ err = gtp_encap_enable_sock(sock, type, gtp);
+ if (err < 0)
+ sk = ERR_PTR(err);
out_sock:
sockfd_put(sock);
return sk;
}
+static struct socket *gtp_create_sock(struct net *net, bool ipv6,
+ __be16 port, u32 flags)
+{
+ struct socket *sock;
+ struct udp_port_cfg udp_conf;
+ int err;
+
+ memset(&udp_conf, 0, sizeof(udp_conf));
+
+#if GTP_IPV6
+ if (ipv6) {
+ udp_conf.family = AF_INET6;
+ udp_conf.ipv6_v6only = 1;
+ } else {
+ udp_conf.family = AF_INET;
+ }
+#else
+ udp_conf.family = AF_INET;
+#endif
+
+ udp_conf.local_udp_port = port;
+
+ /* Open UDP socket */
+ err = udp_sock_create(net, &udp_conf, &sock);
+ if (err)
+ return ERR_PTR(err);
+
+ return sock;
+}
+
static int gtp_encap_enable(struct gtp_dev *gtp, struct nlattr *data[],
bool is_ipv6)
{
+ int err;
+
+ struct socket *sock0 = NULL, *sock1u = NULL;
struct sock *sk0 = NULL, *sk1u = NULL;
if (data[IFLA_GTP_FD0]) {
u32 fd0 = nla_get_u32(data[IFLA_GTP_FD0]);
- sk0 = gtp_encap_enable_socket(fd0, UDP_ENCAP_GTP0, gtp,
- is_ipv6);
- if (IS_ERR(sk0))
- return PTR_ERR(sk0);
+ sk0 = gtp_encap_enable_fd(fd0, UDP_ENCAP_GTP0, gtp, is_ipv6);
+ if (IS_ERR(sk0)) {
+ err = PTR_ERR(sk0);
+ sk0 = NULL;
+ goto out_err;
+ }
+ } else if (data[IFLA_GTP_PORT0]) {
+ __be16 port = nla_get_u16(data[IFLA_GTP_PORT0]);
+
+ sock0 = gtp_create_sock(dev_net(gtp->dev), is_ipv6, port, 0);
+ if (IS_ERR(sock0)) {
+ err = PTR_ERR(sock0);
+ sock0 = NULL;
+ goto out_err;
+ }
+
+ err = gtp_encap_enable_sock(sock0, UDP_ENCAP_GTP0, gtp);
+ if (err)
+ goto out_err;
}
if (data[IFLA_GTP_FD1]) {
u32 fd1 = nla_get_u32(data[IFLA_GTP_FD1]);
- sk1u = gtp_encap_enable_socket(fd1, UDP_ENCAP_GTP1U, gtp,
- is_ipv6);
+ sk1u = gtp_encap_enable_fd(fd1, UDP_ENCAP_GTP1U, gtp, is_ipv6);
if (IS_ERR(sk1u)) {
- if (sk0)
- gtp_encap_disable_sock(sk0);
- return PTR_ERR(sk1u);
+ err = PTR_ERR(sk1u);
+ sk1u = NULL;
+ goto out_err;
+ }
+ } else if (data[IFLA_GTP_PORT1]) {
+ __be16 port = nla_get_u16(data[IFLA_GTP_PORT1]);
+
+ sock1u = gtp_create_sock(dev_net(gtp->dev), is_ipv6, port, 0);
+ if (IS_ERR(sock1u)) {
+ err = PTR_ERR(sock1u);
+ sock1u = NULL;
+ goto out_err;
}
+
+ err = gtp_encap_enable_sock(sock1u, UDP_ENCAP_GTP1U, gtp);
+ if (err)
+ goto out_err;
+ }
+
+ if (sock0) {
+ gtp->sock0 = sock0;
+ gtp->sk0 = sock0->sk;
+ } else {
+ gtp->sk0 = sk0;
}
- gtp->sk0 = sk0;
- gtp->sk1u = sk1u;
+ if (sock1u) {
+ gtp->sock1u = sock1u;
+ gtp->sk1u = sock1u->sk;
+ } else {
+ gtp->sk1u = sk1u;
+ }
return 0;
+
+out_err:
+ if (sk0)
+ gtp_encap_destroy(sk0);
+ if (sk1u)
+ gtp_encap_destroy(sk1u);
+ if (sock0)
+ udp_tunnel_sock_release(sock0);
+ if (sock1u)
+ udp_tunnel_sock_release(sock1u);
+
+ return err;
}
static struct gtp_dev *gtp_find_dev(struct net *src_net, struct nlattr *nla[])
@@ -1624,8 +1737,8 @@ static const struct genl_ops gtp_genl_ops[] = {
};
static struct genl_family gtp_genl_family __ro_after_init = {
- .name = "gtp",
- .version = 0,
+ .name = GTP_GENL_NAME,
+ .version = GTP_GENL_VERSION,
.hdrsize = 0,
.maxattr = GTPA_MAX,
.netnsok = true,
diff --git a/include/uapi/linux/gtp.h b/include/uapi/linux/gtp.h
index 8eec519fa754..0da18aa88be8 100644
--- a/include/uapi/linux/gtp.h
+++ b/include/uapi/linux/gtp.h
@@ -9,6 +9,11 @@ enum gtp_genl_cmds {
GTP_CMD_MAX,
};
+/* NETLINK_GENERIC related info
+ */
+#define GTP_GENL_NAME "gtp"
+#define GTP_GENL_VERSION 0
+
enum gtp_version {
GTP_V0 = 0,
GTP_V1,
--
2.11.0
^ permalink raw reply related
* Re: [PATCH] brcm80211: make const array ucode_ofdm_rates static, reduces object code size
From: Kalle Valo @ 2017-09-25 4:20 UTC (permalink / raw)
To: Arend van Spriel
Cc: Colin King, Franky Lin, Hante Meuleman, Chi-Hsien Lin,
Wright Feng, linux-wireless, brcm80211-dev-list.pdl,
brcm80211-dev-list, netdev, kernel-janitors, linux-kernel
In-Reply-To: <96caf646-a43c-9b78-a4a9-2becf7e74125@broadcom.com>
Arend van Spriel <arend.vanspriel@broadcom.com> writes:
> Please use 'brcmsmac:' as prefix instead of 'brcm80211:'.
I can fix that.
--
Kalle Valo
^ permalink raw reply
* Re: usb/wireless/rsi_91x: use-after-free write in __run_timers
From: Kalle Valo @ 2017-09-25 4:26 UTC (permalink / raw)
To: Andrey Konovalov
Cc: Amitkumar Karwar, Prameela Rani Garnepudi, Karun Eagalapati,
linux-wireless, netdev, LKML, Dmitry Vyukov, Kostya Serebryany,
syzkaller
In-Reply-To: <CAAeHK+wXCaLZ8=enfsmjg-+FJsW1UhSCz8GUmSWhFxB4orLQ6w@mail.gmail.com>
Andrey Konovalov <andreyknvl@google.com> writes:
> I've got the following report while fuzzing the kernel with syzkaller.
>
> On commit 6e80ecdddf4ea6f3cd84e83720f3d852e6624a68 (Sep 21).
>
> ==================================================================
> BUG: KASAN: use-after-free in __run_timers+0xc0e/0xd40
> Write of size 8 at addr ffff880069f701b8 by task swapper/0/0
>
> CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.14.0-rc1-42311-g6e80ecdddf4e #234
> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
[...]
> Allocated by task 1845:
> save_stack_trace+0x1b/0x20 arch/x86/kernel/stacktrace.c:59
> save_stack+0x43/0xd0 mm/kasan/kasan.c:447
> set_track mm/kasan/kasan.c:459
> kasan_kmalloc+0xad/0xe0 mm/kasan/kasan.c:551
> kmem_cache_alloc_trace+0x11e/0x2d0 mm/slub.c:2772
> kmalloc ./include/linux/slab.h:493
> kzalloc ./include/linux/slab.h:666
> rsi_91x_init+0x98/0x510 drivers/net/wireless/rsi/rsi_91x_main.c:203
> rsi_probe+0xb6/0x13b0 drivers/net/wireless/rsi/rsi_91x_usb.c:665
> usb_probe_interface+0x35d/0x8e0 drivers/usb/core/driver.c:361
I'm curious about your setup. Apparently you are running syzkaller on
QEMU but what I don't understand is how the rsi device comes into the
picture. Did you have a rsi usb device connected to the virtual machine
or what? Or does syzkaller do some kind of magic here?
--
Kalle Valo
^ permalink raw reply
* Re: [PATCH v4 1/9] brcmsmac: make some local variables 'static const' to reduce stack size
From: Kalle Valo @ 2017-09-25 4:33 UTC (permalink / raw)
To: Arnd Bergmann
Cc: Arend van Spriel, Franky Lin, Hante Meuleman, Chi-Hsien Lin,
Wright Feng, Mauro Carvalho Chehab, Jiri Pirko, David S. Miller,
Andrey Ryabinin, Alexander Potapenko, Dmitry Vyukov,
Masahiro Yamada, Michal Marek, Andrew Morton, Kees Cook,
Geert Uytterhoeven, Greg Kroah-Hartman, linux-media, linux-kernel
In-Reply-To: <20170922212930.620249-2-arnd@arndb.de>
Arnd Bergmann <arnd@arndb.de> writes:
> With KASAN and a couple of other patches applied, this driver is one
> of the few remaining ones that actually use more than 2048 bytes of
> kernel stack:
>
> broadcom/brcm80211/brcmsmac/phy/phy_n.c: In function 'wlc_phy_workarounds_nphy_gainctrl':
> broadcom/brcm80211/brcmsmac/phy/phy_n.c:16065:1: warning: the frame size of 3264 bytes is larger than 2048 bytes [-Wframe-larger-than=]
> broadcom/brcm80211/brcmsmac/phy/phy_n.c: In function 'wlc_phy_workarounds_nphy':
> broadcom/brcm80211/brcmsmac/phy/phy_n.c:17138:1: warning: the frame size of 2864 bytes is larger than 2048 bytes [-Wframe-larger-than=]
>
> Here, I'm reducing the stack size by marking as many local variables as
> 'static const' as I can without changing the actual code.
>
> This is the first of three patches to improve the stack usage in this
> driver. It would be good to have this backported to stabl kernels
> to get all drivers in 'allmodconfig' below the 2048 byte limit so
> we can turn on the frame warning again globally, but I realize that
> the patch is larger than the normal limit for stable backports.
>
> The other two patches do not need to be backported.
>
> Acked-by: Arend van Spriel <arend.vanspriel@broadcom.com>
> Signed-off-by: Arnd Bergmann <arnd@arndb.de>
I'll queue this and the two following brcmsmac patches for 4.14.
Also I'll add (only for this patch):
Cc: <stable@vger.kernel.org>
--
Kalle Valo
^ permalink raw reply
* Re: [PATCH] mac80211: aead api to reduce redundancy
From: Herbert Xu @ 2017-09-25 4:56 UTC (permalink / raw)
To: Johannes Berg
Cc: Xiang Gao, David S. Miller, linux-crypto, linux-kernel,
linux-wireless, netdev
In-Reply-To: <1506274966.2909.7.camel@sipsolutions.net>
On Sun, Sep 24, 2017 at 07:42:46PM +0200, Johannes Berg wrote:
>
> Unrelated to this, I'm not sure whose tree this should go through -
> probably Herbert's (or DaveM's with his ACK? not sure if there's a
> crypto tree?) or so?
Since you're just rearranging code invoking the crypto API, rather
than touching actual crypto API code, I think you should handle it
as you do with any other wireless patch.
Thanks,
--
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
^ permalink raw reply
* Re: [PATCH] mac80211: aead api to reduce redundancy
From: Johannes Berg @ 2017-09-25 5:22 UTC (permalink / raw)
To: Herbert Xu
Cc: Xiang Gao, David S. Miller, linux-crypto, linux-kernel,
linux-wireless, netdev
In-Reply-To: <20170925045641.GB22947@gondor.apana.org.au>
On Mon, 2017-09-25 at 12:56 +0800, Herbert Xu wrote:
> On Sun, Sep 24, 2017 at 07:42:46PM +0200, Johannes Berg wrote:
> >
> > Unrelated to this, I'm not sure whose tree this should go through -
> > probably Herbert's (or DaveM's with his ACK? not sure if there's a
> > crypto tree?) or so?
>
> Since you're just rearranging code invoking the crypto API, rather
> than touching actual crypto API code, I think you should handle it
> as you do with any other wireless patch.
The code moves to crypto/ though, and I'm not even sure I can vouch for
the Makefile choice there.
johannes
^ permalink raw reply
* Re: [PATCH net-next] sch_netem: faster rb tree removal
From: Eric Dumazet @ 2017-09-25 5:27 UTC (permalink / raw)
To: David Ahern; +Cc: David Miller, netdev, Stephen Hemminger
In-Reply-To: <aa146e80-4cfd-888f-f6d2-bf4ec0eb7373@gmail.com>
On Sun, 2017-09-24 at 20:05 -0600, David Ahern wrote:
> On 9/24/17 7:57 PM, David Ahern wrote:
> > Hi Eric:
> >
> > I'm guessing the cost is in the rb_first and rb_next computations. Did
> > you consider something like this:
> >
> > struct rb_root *root
> > struct rb_node **p = &root->rb_node;
> >
> > while (*p != NULL) {
> > struct foobar *fb;
> >
> > fb = container_of(*p, struct foobar, rb_node);
> > // fb processing
> rb_erase(&nh->rb_node, root);
>
> > p = &root->rb_node;
> > }
> >
>
> Oops, dropped the rb_erase in my consolidating the code to this snippet.
Hi David
This gives about same numbers than method_1
I tried with 10^7 skbs in the tree :
Your suggestion takes 66ns per skb, while the one I chose takes 37ns per
skb.
Thanks.
^ permalink raw reply
* Re: [patch net-next v2 03/12] ipmr: Add FIB notification access functions
From: Yotam Gigi @ 2017-09-25 5:38 UTC (permalink / raw)
To: Yunsheng Lin, Jiri Pirko, netdev; +Cc: davem, idosch, mlxsw, nikolay, andrew
In-Reply-To: <fbf5db3b-a324-eed2-e6b8-b4b89e11f3c3@huawei.com>
On 09/25/2017 04:19 AM, Yunsheng Lin wrote:
> Hi, Jiri
>
> On 2017/9/25 1:22, Jiri Pirko wrote:
>> From: Yotam Gigi <yotamg@mellanox.com>
>>
>> Make the ipmr module register as a FIB notifier. To do that, implement both
>> the ipmr_seq_read and ipmr_dump ops.
>>
>> The ipmr_seq_read op returns a sequence counter that is incremented on
>> every notification related operation done by the ipmr. To implement that,
>> add a sequence counter in the netns_ipv4 struct and increment it whenever a
>> new MFC route or VIF are added or deleted. The sequence operations are
>> protected by the RTNL lock.
>>
>> The ipmr_dump iterates the list of MFC routes and the list of VIF entries
>> and sends notifications about them. The entries dump is done under RCU
>> where the VIF dump uses the mrt_lock too, as the vif->dev field can change
>> under RCU.
>>
>> Signed-off-by: Yotam Gigi <yotamg@mellanox.com>
>> Reviewed-by: Ido Schimmel <idosch@mellanox.com>
>> Signed-off-by: Jiri Pirko <jiri@mellanox.com>
>> ---
>> v1->v2:
>> - Take the mrt_lock when dumping VIF entries.
>> ---
>> include/linux/mroute.h | 15 ++++++
>> include/net/netns/ipv4.h | 3 ++
>> net/ipv4/ipmr.c | 137 ++++++++++++++++++++++++++++++++++++++++++++++-
>> 3 files changed, 153 insertions(+), 2 deletions(-)
>>
>> diff --git a/include/linux/mroute.h b/include/linux/mroute.h
>> index 10028f2..54c5cb8 100644
>> --- a/include/linux/mroute.h
>> +++ b/include/linux/mroute.h
>> @@ -5,6 +5,7 @@
>> #include <linux/pim.h>
>> #include <linux/rhashtable.h>
>> #include <net/sock.h>
>> +#include <net/fib_notifier.h>
>> #include <uapi/linux/mroute.h>
>>
>> #ifdef CONFIG_IP_MROUTE
>> @@ -58,6 +59,14 @@ struct vif_device {
>> int link; /* Physical interface index */
>> };
>>
>> +struct vif_entry_notifier_info {
>> + struct fib_notifier_info info;
>> + struct net_device *dev;
>> + vifi_t vif_index;
>> + unsigned short vif_flags;
>> + u32 tb_id;
>> +};
>> +
>> #define VIFF_STATIC 0x8000
>>
>> #define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL)
>> @@ -146,6 +155,12 @@ struct mfc_cache {
>> struct rcu_head rcu;
>> };
>>
>> +struct mfc_entry_notifier_info {
>> + struct fib_notifier_info info;
>> + struct mfc_cache *mfc;
>> + u32 tb_id;
>> +};
>> +
>> struct rtmsg;
>> int ipmr_get_route(struct net *net, struct sk_buff *skb,
>> __be32 saddr, __be32 daddr,
>> diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
>> index 8387f09..abc84d9 100644
>> --- a/include/net/netns/ipv4.h
>> +++ b/include/net/netns/ipv4.h
>> @@ -163,6 +163,9 @@ struct netns_ipv4 {
>> struct fib_notifier_ops *notifier_ops;
>> unsigned int fib_seq; /* protected by rtnl_mutex */
>>
>> + struct fib_notifier_ops *ipmr_notifier_ops;
> Can we add a const here?
It cannot be const as it get initialized it in ipmr_notifier_init.
>
>> + unsigned int ipmr_seq; /* protected by rtnl_mutex */
>> +
>> atomic_t rt_genid;
>> };
>> #endif
>> diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
>> index 86dc5f9..49879c3 100644
>> --- a/net/ipv4/ipmr.c
>> +++ b/net/ipv4/ipmr.c
>> @@ -264,6 +264,16 @@ static void __net_exit ipmr_rules_exit(struct net *net)
>> fib_rules_unregister(net->ipv4.mr_rules_ops);
>> rtnl_unlock();
>> }
>> +
>> +static int ipmr_rules_dump(struct net *net, struct notifier_block *nb)
>> +{
>> + return fib_rules_dump(net, nb, RTNL_FAMILY_IPMR);
>> +}
>> +
>> +static unsigned int ipmr_rules_seq_read(struct net *net)
>> +{
>> + return fib_rules_seq_read(net, RTNL_FAMILY_IPMR);
>> +}
>> #else
>> #define ipmr_for_each_table(mrt, net) \
>> for (mrt = net->ipv4.mrt; mrt; mrt = NULL)
>> @@ -298,6 +308,16 @@ static void __net_exit ipmr_rules_exit(struct net *net)
>> net->ipv4.mrt = NULL;
>> rtnl_unlock();
>> }
>> +
>> +static int ipmr_rules_dump(struct net *net, struct notifier_block *nb)
>> +{
>> + return 0;
>> +}
>> +
>> +static unsigned int ipmr_rules_seq_read(struct net *net)
>> +{
>> + return 0;
>> +}
>> #endif
>>
>> static inline int ipmr_hash_cmp(struct rhashtable_compare_arg *arg,
>> @@ -587,6 +607,43 @@ static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
>> }
>> #endif
>>
>> +static int call_ipmr_vif_entry_notifier(struct notifier_block *nb,
>> + struct net *net,
>> + enum fib_event_type event_type,
>> + struct vif_device *vif,
>> + vifi_t vif_index, u32 tb_id)
>> +{
>> + struct vif_entry_notifier_info info = {
>> + .info = {
>> + .family = RTNL_FAMILY_IPMR,
>> + .net = net,
>> + },
>> + .dev = vif->dev,
>> + .vif_index = vif_index,
>> + .vif_flags = vif->flags,
>> + .tb_id = tb_id,
>> + };
> We only use info.info which is fib_notifier_info, the
> vif_entry_notifier_info seems to be not needed, why not just
> use fib_notifier_info?
No, that's not true.
The driver gets the notification with a pointer to a fib_notifier_info struct,
and according to the type field uses container_of to get to the parent struct,
which in this case is vif_entry_notifier_info. All the fields here are needed.
You can see this code in patch 10.
By the way, this function is completely symmetric to fib4 (which is in
fib_trie.c +88) and fib6 (which is in ip6_fib +336) notify functions, who uses
the exact same process.
>
>> +
>> + return call_fib_notifier(nb, net, event_type, &info.info);
>> +}
>> +
>> +static int call_ipmr_mfc_entry_notifier(struct notifier_block *nb,
>> + struct net *net,
>> + enum fib_event_type event_type,
>> + struct mfc_cache *mfc, u32 tb_id)
>> +{
>> + struct mfc_entry_notifier_info info = {
>> + .info = {
>> + .family = RTNL_FAMILY_IPMR,
>> + .net = net,
>> + },
>> + .mfc = mfc,
>> + .tb_id = tb_id
>> + };
>> +
> As above.
As above.
>
>> + return call_fib_notifier(nb, net, event_type, &info.info);
>> +}
>> +
>> /**
>> * vif_delete - Delete a VIF entry
>> * @notify: Set to 1, if the caller is a notifier_call
>> @@ -3050,14 +3107,87 @@ static const struct net_protocol pim_protocol = {
>> };
>> #endif
>>
>> +static unsigned int ipmr_seq_read(struct net *net)
>> +{
>> + ASSERT_RTNL();
>> +
>> + return net->ipv4.ipmr_seq + ipmr_rules_seq_read(net);
>> +}
>> +
>> +static int ipmr_dump(struct net *net, struct notifier_block *nb)
>> +{
>> + struct mr_table *mrt;
>> + int err;
>> +
>> + err = ipmr_rules_dump(net, nb);
>> + if (err)
>> + return err;
>> +
>> + ipmr_for_each_table(mrt, net) {
>> + struct vif_device *v = &mrt->vif_table[0];
>> + struct mfc_cache *mfc;
>> + int vifi;
>> +
>> + /* Notifiy on table VIF entries */
>> + read_lock(&mrt_lock);
>> + for (vifi = 0; vifi < mrt->maxvif; vifi++, v++) {
>> + if (!v->dev)
>> + continue;
>> +
>> + call_ipmr_vif_entry_notifier(nb, net, FIB_EVENT_VIF_ADD,
>> + v, vifi, mrt->id);
>> + }
>> + read_unlock(&mrt_lock);
>> +
>> + /* Notify on table MFC entries */
>> + list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list)
>> + call_ipmr_mfc_entry_notifier(nb, net,
>> + FIB_EVENT_ENTRY_ADD, mfc,
>> + mrt->id);
>> + }
>> +
>> + return 0;
>> +}
>> +
>> +static const struct fib_notifier_ops ipmr_notifier_ops_template = {
>> + .family = RTNL_FAMILY_IPMR,
>> + .fib_seq_read = ipmr_seq_read,
>> + .fib_dump = ipmr_dump,
>> + .owner = THIS_MODULE,
>> +};
>> +
>> +int __net_init ipmr_notifier_init(struct net *net)
>> +{
>> + struct fib_notifier_ops *ops;
>> +
>> + net->ipv4.ipmr_seq = 0;
>> +
>> + ops = fib_notifier_ops_register(&ipmr_notifier_ops_template, net);
>> + if (IS_ERR(ops))
>> + return PTR_ERR(ops);
>> + net->ipv4.ipmr_notifier_ops = ops;
>> +
>> + return 0;
>> +}
>> +
>> +static void __net_exit ipmr_notifier_exit(struct net *net)
>> +{
>> + fib_notifier_ops_unregister(net->ipv4.ipmr_notifier_ops);
>> + net->ipv4.ipmr_notifier_ops = NULL;
>> +}
>> +
>> /* Setup for IP multicast routing */
>> static int __net_init ipmr_net_init(struct net *net)
>> {
>> int err;
>>
>> + err = ipmr_notifier_init(net);
>> + if (err)
>> + goto ipmr_notifier_fail;
>> +
>> err = ipmr_rules_init(net);
>> if (err < 0)
>> - goto fail;
>> + goto ipmr_rules_fail;
>>
>> #ifdef CONFIG_PROC_FS
>> err = -ENOMEM;
>> @@ -3074,7 +3204,9 @@ static int __net_init ipmr_net_init(struct net *net)
>> proc_vif_fail:
>> ipmr_rules_exit(net);
>> #endif
>> -fail:
>> +ipmr_rules_fail:
>> + ipmr_notifier_exit(net);
>> +ipmr_notifier_fail:
>> return err;
>> }
>>
>> @@ -3084,6 +3216,7 @@ static void __net_exit ipmr_net_exit(struct net *net)
>> remove_proc_entry("ip_mr_cache", net->proc_net);
>> remove_proc_entry("ip_mr_vif", net->proc_net);
>> #endif
>> + ipmr_notifier_exit(net);
>> ipmr_rules_exit(net);
>> }
>>
>>
^ permalink raw reply
* Re: [patch net-next v2 06/12] net: mroute: Check if rule is a default rule
From: Yotam Gigi @ 2017-09-25 5:39 UTC (permalink / raw)
To: Yunsheng Lin, Jiri Pirko, netdev; +Cc: davem, idosch, mlxsw, nikolay, andrew
In-Reply-To: <93dcea9d-94eb-39cf-4102-e5c59fb2dfa5@huawei.com>
On 09/25/2017 04:28 AM, Yunsheng Lin wrote:
> Hi, Jiri
>
> On 2017/9/25 1:22, Jiri Pirko wrote:
>> From: Yotam Gigi <yotamg@mellanox.com>
>>
>> When the ipmr starts, it adds one default FIB rule that matches all packets
>> and sends them to the DEFAULT (multicast) FIB table. A more complex rule
>> can be added by user to specify that for a specific interface, a packet
>> should be look up at either an arbitrary table or according to the l3mdev
>> of the interface.
>>
>> For drivers willing to offload the ipmr logic into a hardware but don't
>> want to offload all the FIB rules functionality, provide a function that
>> can indicate whether the FIB rule is the default multicast rule, thus only
>> one routing table is needed.
>>
>> This way, a driver can register to the FIB notification chain, get
>> notifications about FIB rules added and trigger some kind of an internal
>> abort mechanism when a non default rule is added by the user.
>>
>> Signed-off-by: Yotam Gigi <yotamg@mellanox.com>
>> Reviewed-by: Ido Schimmel <idosch@mellanox.com>
>> Signed-off-by: Jiri Pirko <jiri@mellanox.com>
>> ---
>> include/linux/mroute.h | 7 +++++++
>> net/ipv4/ipmr.c | 10 ++++++++++
>> 2 files changed, 17 insertions(+)
>>
>> diff --git a/include/linux/mroute.h b/include/linux/mroute.h
>> index 5566580..b072a84 100644
>> --- a/include/linux/mroute.h
>> +++ b/include/linux/mroute.h
>> @@ -5,6 +5,7 @@
>> #include <linux/pim.h>
>> #include <linux/rhashtable.h>
>> #include <net/sock.h>
>> +#include <net/fib_rules.h>
>> #include <net/fib_notifier.h>
>> #include <uapi/linux/mroute.h>
>>
>> @@ -19,6 +20,7 @@ int ip_mroute_getsockopt(struct sock *, int, char __user *, int __user *);
>> int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg);
>> int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg);
>> int ip_mr_init(void);
>> +bool ipmr_rule_default(const struct fib_rule *rule);
>> #else
>> static inline int ip_mroute_setsockopt(struct sock *sock, int optname,
>> char __user *optval, unsigned int optlen)
>> @@ -46,6 +48,11 @@ static inline int ip_mroute_opt(int opt)
>> {
>> return 0;
>> }
>> +
>> +static inline bool ipmr_rule_default(const struct fib_rule *rule)
>> +{
>> + return true;
>> +}
>> #endif
>>
>> struct vif_device {
>> diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
>> index 2a795d2..a714f55 100644
>> --- a/net/ipv4/ipmr.c
>> +++ b/net/ipv4/ipmr.c
>> @@ -320,6 +320,16 @@ static unsigned int ipmr_rules_seq_read(struct net *net)
>> }
>> #endif
>>
>> +bool ipmr_rule_default(const struct fib_rule *rule)
>> +{
>> +#if IS_ENABLED(CONFIG_FIB_RULES)
>> + return fib_rule_matchall(rule) && rule->table == RT_TABLE_DEFAULT;
>> +#else
>> + return true;
>> +#endif
> In patch 02, You have the following, can you do the same for the above?
> +#ifdef CONFIG_IP_MROUTE
> +void ipmr_cache_free(struct mfc_cache *mfc_cache);
> +#else
> +static inline void ipmr_cache_free(struct mfc_cache *mfc_cache)
> +{
> +}
> +#endif
OK.
>> +}
>> +EXPORT_SYMBOL(ipmr_rule_default);
>> +
>> static inline int ipmr_hash_cmp(struct rhashtable_compare_arg *arg,
>> const void *ptr)
>> {
>>
^ permalink raw reply
* Re: [patch net-next v2 07/12] mlxsw: spectrum: Add the multicast routing offloading logic
From: Yotam Gigi @ 2017-09-25 5:55 UTC (permalink / raw)
To: Yunsheng Lin, Jiri Pirko, netdev; +Cc: davem, idosch, mlxsw, nikolay, andrew
In-Reply-To: <fb5d1fcc-ad70-d656-f860-7b05cdd2834a@huawei.com>
On 09/25/2017 04:48 AM, Yunsheng Lin wrote:
> Hi, Jiri
>
> On 2017/9/25 1:22, Jiri Pirko wrote:
>> From: Yotam Gigi <yotamg@mellanox.com>
>>
>> Add the multicast router offloading logic, which is in charge of handling
>> the VIF and MFC notifications and translating it to the hardware logic API.
>>
>> The offloading logic has to overcome several obstacles in order to safely
>> comply with the kernel multicast router user API:
>> - It must keep track of the mapping between VIFs to netdevices. The user
>> can add an MFC cache entry pointing to a VIF, delete the VIF and add
>> re-add it with a different netdevice. The offloading logic has to handle
>> this in order to be compatible with the kernel logic.
>> - It must keep track of the mapping between netdevices to spectrum RIFs,
>> as the current hardware implementation assume having a RIF for every
>> port in a multicast router.
>> - It must handle routes pointing to pimreg device to be trapped to the
>> kernel, as the packet should be delivered to userspace.
>> - It must handle routes pointing tunnel VIFs. The current implementation
>> does not support multicast forwarding to tunnels, thus routes that point
>> to a tunnel should be trapped to the kernel.
>> - It must be aware of proxy multicast routes, which include both (*,*)
>> routes and duplicate routes. Currently proxy routes are not offloaded
>> and trigger the abort mechanism: removal of all routes from hardware and
>> triggering the traffic to go through the kernel.
>>
>> The multicast routing offloading logic also updates the counters of the
>> offloaded MFC routes in a periodic work.
>>
>> Signed-off-by: Yotam Gigi <yotamg@mellanox.com>
>> Reviewed-by: Ido Schimmel <idosch@mellanox.com>
>> Signed-off-by: Jiri Pirko <jiri@mellanox.com>
>> ---
>> v1->v2:
>> - Update the lastuse MFC entry field too, in addition to packets an bytes.
>> ---
>> drivers/net/ethernet/mellanox/mlxsw/Makefile | 3 +-
>> drivers/net/ethernet/mellanox/mlxsw/spectrum.h | 1 +
>> drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c | 1014 +++++++++++++++++++++
>> drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h | 133 +++
>> 4 files changed, 1150 insertions(+), 1 deletion(-)
>> create mode 100644 drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c
>> create mode 100644 drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h
>>
>> diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile b/drivers/net/ethernet/mellanox/mlxsw/Makefile
>> index 4b88158..9b29764 100644
>> --- a/drivers/net/ethernet/mellanox/mlxsw/Makefile
>> +++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile
>> @@ -17,7 +17,8 @@ mlxsw_spectrum-objs := spectrum.o spectrum_buffers.o \
>> spectrum_kvdl.o spectrum_acl_tcam.o \
>> spectrum_acl.o spectrum_flower.o \
>> spectrum_cnt.o spectrum_fid.o \
>> - spectrum_ipip.o spectrum_acl_flex_actions.o
>> + spectrum_ipip.o spectrum_acl_flex_actions.o \
>> + spectrum_mr.o
>> mlxsw_spectrum-$(CONFIG_MLXSW_SPECTRUM_DCB) += spectrum_dcb.o
>> mlxsw_spectrum-$(CONFIG_NET_DEVLINK) += spectrum_dpipe.o
>> obj-$(CONFIG_MLXSW_MINIMAL) += mlxsw_minimal.o
>> diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
>> index e907ec4..51d8b9f 100644
>> --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
>> +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
>> @@ -153,6 +153,7 @@ struct mlxsw_sp {
>> struct mlxsw_sp_sb *sb;
>> struct mlxsw_sp_bridge *bridge;
>> struct mlxsw_sp_router *router;
>> + struct mlxsw_sp_mr *mr;
>> struct mlxsw_afa *afa;
>> struct mlxsw_sp_acl *acl;
>> struct mlxsw_sp_fid_core *fid_core;
>> diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c
>> new file mode 100644
>> index 0000000..89b2e60
>> --- /dev/null
>> +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c
>> @@ -0,0 +1,1014 @@
>> +/*
>> + * drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c
>> + * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
>> + * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com>
>> + *
>> + * Redistribution and use in source and binary forms, with or without
>> + * modification, are permitted provided that the following conditions are met:
>> + *
>> + * 1. Redistributions of source code must retain the above copyright
>> + * notice, this list of conditions and the following disclaimer.
>> + * 2. Redistributions in binary form must reproduce the above copyright
>> + * notice, this list of conditions and the following disclaimer in the
>> + * documentation and/or other materials provided with the distribution.
>> + * 3. Neither the names of the copyright holders nor the names of its
>> + * contributors may be used to endorse or promote products derived from
>> + * this software without specific prior written permission.
>> + *
>> + * Alternatively, this software may be distributed under the terms of the
>> + * GNU General Public License ("GPL") version 2 as published by the Free
>> + * Software Foundation.
>> + *
>> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
>> + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
>> + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
>> + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
>> + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
>> + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
>> + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
>> + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
>> + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
>> + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
>> + * POSSIBILITY OF SUCH DAMAGE.
>> + */
>> +
>> +#include <linux/rhashtable.h>
>> +
>> +#include "spectrum_mr.h"
>> +#include "spectrum_router.h"
>> +
>> +struct mlxsw_sp_mr {
>> + const struct mlxsw_sp_mr_ops *mr_ops;
>> + void *catchall_route_priv;
>> + struct delayed_work stats_update_dw;
>> + struct list_head table_list;
>> +#define MLXSW_SP_MR_ROUTES_COUNTER_UPDATE_INTERVAL 5000 /* ms */
>> + unsigned long priv[0];
>> + /* priv has to be always the last item */
>> +};
>> +
>> +struct mlxsw_sp_mr_vif {
>> + struct net_device *dev;
>> + const struct mlxsw_sp_rif *rif;
>> + unsigned long vif_flags;
>> +
>> + /* A list of route_vif_entry structs that point to routes that the VIF
>> + * instance is used as one of the egress VIFs
>> + */
>> + struct list_head route_evif_list;
>> +
>> + /* A list of route_vif_entry structs that point to routes that the VIF
>> + * instance is used as an ingress VIF
>> + */
>> + struct list_head route_ivif_list;
>> +};
>> +
>> +struct mlxsw_sp_mr_route_vif_entry {
>> + struct list_head vif_node;
>> + struct list_head route_node;
>> + struct mlxsw_sp_mr_vif *mr_vif;
>> + struct mlxsw_sp_mr_route *mr_route;
>> +};
>> +
>> +struct mlxsw_sp_mr_table {
>> + struct list_head node;
>> + enum mlxsw_sp_l3proto proto;
>> + struct mlxsw_sp *mlxsw_sp;
>> + u32 vr_id;
>> + struct mlxsw_sp_mr_vif vifs[MAXVIFS];
>> + struct list_head route_list;
>> + struct rhashtable route_ht;
>> + char catchall_route_priv[0];
>> + /* catchall_route_priv has to be always the last item */
>> +};
>> +
>> +struct mlxsw_sp_mr_route {
>> + struct list_head node;
>> + struct rhash_head ht_node;
>> + struct mlxsw_sp_mr_route_key key;
>> + enum mlxsw_sp_mr_route_action route_action;
>> + u16 min_mtu;
>> + struct mfc_cache *mfc4;
>> + void *route_priv;
>> + const struct mlxsw_sp_mr_table *mr_table;
>> + /* A list of route_vif_entry structs that point to the egress VIFs */
>> + struct list_head evif_list;
>> + /* A route_vif_entry struct that point to the ingress VIF */
>> + struct mlxsw_sp_mr_route_vif_entry ivif;
>> +};
>> +
>> +static const struct rhashtable_params mlxsw_sp_mr_route_ht_params = {
>> + .key_len = sizeof(struct mlxsw_sp_mr_route_key),
>> + .key_offset = offsetof(struct mlxsw_sp_mr_route, key),
>> + .head_offset = offsetof(struct mlxsw_sp_mr_route, ht_node),
>> + .automatic_shrinking = true,
>> +};
>> +
>> +static bool mlxsw_sp_mr_vif_regular(const struct mlxsw_sp_mr_vif *vif)
>> +{
>> + return !(vif->vif_flags & (VIFF_TUNNEL | VIFF_REGISTER));
>> +}
>> +
>> +static bool mlxsw_sp_mr_vif_valid(const struct mlxsw_sp_mr_vif *vif)
>> +{
>> + return mlxsw_sp_mr_vif_regular(vif) && vif->dev && vif->rif;
>> +}
>> +
>> +static bool mlxsw_sp_mr_vif_rif_invalid(const struct mlxsw_sp_mr_vif *vif)
>> +{
>> + return mlxsw_sp_mr_vif_regular(vif) && vif->dev && !vif->rif;
>> +}
>> +
>> +static bool
>> +mlxsw_sp_mr_route_ivif_in_evifs(const struct mlxsw_sp_mr_route *mr_route)
>> +{
>> + vifi_t ivif;
>> +
>> + switch (mr_route->mr_table->proto) {
>> + case MLXSW_SP_L3_PROTO_IPV4:
>> + ivif = mr_route->mfc4->mfc_parent;
>> + return mr_route->mfc4->mfc_un.res.ttls[ivif] != 255;
>> + case MLXSW_SP_L3_PROTO_IPV6:
>> + /* fall through */
>> + default:
>> + WARN_ON_ONCE(1);
>> + }
>> + return false;
>> +}
>> +
>> +static int
>> +mlxsw_sp_mr_route_valid_evifs_num(const struct mlxsw_sp_mr_route *mr_route)
>> +{
>> + struct mlxsw_sp_mr_route_vif_entry *rve;
>> + int valid_evifs = 0;
>> +
>> + valid_evifs = 0;
> you are doing valid_evifs = 0 twice.
Didn't notice. I will fix and send v3.
Thanks.
>
>> + list_for_each_entry(rve, &mr_route->evif_list, route_node)
>> + if (mlxsw_sp_mr_vif_valid(rve->mr_vif))
>> + valid_evifs++;
>> + return valid_evifs;
>> +}
>> +
>> +static bool mlxsw_sp_mr_route_starg(const struct mlxsw_sp_mr_route *mr_route)
>> +{
>> + switch (mr_route->mr_table->proto) {
>> + case MLXSW_SP_L3_PROTO_IPV4:
>> + return mr_route->key.source_mask.addr4 == INADDR_ANY;
>> + case MLXSW_SP_L3_PROTO_IPV6:
>> + /* fall through */
>> + default:
>> + WARN_ON_ONCE(1);
>> + }
>> + return false;
>> +}
>> +
>> +static enum mlxsw_sp_mr_route_action
>> +mlxsw_sp_mr_route_action(const struct mlxsw_sp_mr_route *mr_route)
>> +{
>> + struct mlxsw_sp_mr_route_vif_entry *rve;
>> +
>> + /* If the ingress port is not regular and resolved, trap the route */
>> + if (!mlxsw_sp_mr_vif_valid(mr_route->ivif.mr_vif))
>> + return MLXSW_SP_MR_ROUTE_ACTION_TRAP;
>> +
>> + /* The kernel does not match a (*,G) route that the ingress interface is
>> + * not one of the egress interfaces, so trap these kind of routes.
>> + */
>> + if (mlxsw_sp_mr_route_starg(mr_route) &&
>> + !mlxsw_sp_mr_route_ivif_in_evifs(mr_route))
>> + return MLXSW_SP_MR_ROUTE_ACTION_TRAP;
>> +
>> + /* If the route has no valid eVIFs, trap it. */
>> + if (!mlxsw_sp_mr_route_valid_evifs_num(mr_route))
>> + return MLXSW_SP_MR_ROUTE_ACTION_TRAP;
>> +
>> + /* If either one of the eVIFs is not regular (VIF of type pimreg or
>> + * tunnel) or one of the VIFs has no matching RIF, trap the packet.
>> + */
>> + list_for_each_entry(rve, &mr_route->evif_list, route_node) {
>> + if (!mlxsw_sp_mr_vif_regular(rve->mr_vif) ||
>> + mlxsw_sp_mr_vif_rif_invalid(rve->mr_vif))
>> + return MLXSW_SP_MR_ROUTE_ACTION_TRAP;
>> + }
>> + return MLXSW_SP_MR_ROUTE_ACTION_FORWARD;
>> +}
>> +
>> +static enum mlxsw_sp_mr_route_prio
>> +mlxsw_sp_mr_route_prio(const struct mlxsw_sp_mr_route *mr_route)
>> +{
>> + return mlxsw_sp_mr_route_starg(mr_route) ?
>> + MLXSW_SP_MR_ROUTE_PRIO_STARG : MLXSW_SP_MR_ROUTE_PRIO_SG;
>> +}
>> +
>> +static void mlxsw_sp_mr_route4_key(struct mlxsw_sp_mr_table *mr_table,
>> + struct mlxsw_sp_mr_route_key *key,
>> + const struct mfc_cache *mfc)
>> +{
>> + bool starg = (mfc->mfc_origin == INADDR_ANY);
>> +
>> + memset(key, 0, sizeof(*key));
>> + key->vrid = mr_table->vr_id;
>> + key->proto = mr_table->proto;
>> + key->group.addr4 = mfc->mfc_mcastgrp;
>> + key->group_mask.addr4 = 0xffffffff;
>> + key->source.addr4 = mfc->mfc_origin;
>> + key->source_mask.addr4 = starg ? 0 : 0xffffffff;
>> +}
>> +
>> +static int mlxsw_sp_mr_route_evif_link(struct mlxsw_sp_mr_route *mr_route,
>> + struct mlxsw_sp_mr_vif *mr_vif)
>> +{
>> + struct mlxsw_sp_mr_route_vif_entry *rve;
>> +
>> + rve = kzalloc(sizeof(*rve), GFP_KERNEL);
>> + if (!rve)
>> + return -ENOMEM;
>> + rve->mr_route = mr_route;
>> + rve->mr_vif = mr_vif;
>> + list_add_tail(&rve->route_node, &mr_route->evif_list);
>> + list_add_tail(&rve->vif_node, &mr_vif->route_evif_list);
>> + return 0;
>> +}
>> +
>> +static void
>> +mlxsw_sp_mr_route_evif_unlink(struct mlxsw_sp_mr_route_vif_entry *rve)
>> +{
>> + list_del(&rve->route_node);
>> + list_del(&rve->vif_node);
>> + kfree(rve);
>> +}
>> +
>> +static void mlxsw_sp_mr_route_ivif_link(struct mlxsw_sp_mr_route *mr_route,
>> + struct mlxsw_sp_mr_vif *mr_vif)
>> +{
>> + mr_route->ivif.mr_route = mr_route;
>> + mr_route->ivif.mr_vif = mr_vif;
>> + list_add_tail(&mr_route->ivif.vif_node, &mr_vif->route_ivif_list);
>> +}
>> +
>> +static void mlxsw_sp_mr_route_ivif_unlink(struct mlxsw_sp_mr_route *mr_route)
>> +{
>> + list_del(&mr_route->ivif.vif_node);
>> +}
>> +
>> +static int
>> +mlxsw_sp_mr_route_info_create(struct mlxsw_sp_mr_table *mr_table,
>> + struct mlxsw_sp_mr_route *mr_route,
>> + struct mlxsw_sp_mr_route_info *route_info)
>> +{
>> + struct mlxsw_sp_mr_route_vif_entry *rve;
>> + u16 *erif_indices;
>> + u16 irif_index;
>> + u16 erif = 0;
>> +
>> + erif_indices = kmalloc_array(MAXVIFS, sizeof(*erif_indices),
>> + GFP_KERNEL);
>> + if (!erif_indices)
>> + return -ENOMEM;
>> +
>> + list_for_each_entry(rve, &mr_route->evif_list, route_node) {
>> + if (mlxsw_sp_mr_vif_valid(rve->mr_vif)) {
>> + u16 rifi = mlxsw_sp_rif_index(rve->mr_vif->rif);
>> +
>> + erif_indices[erif++] = rifi;
>> + }
>> + }
>> +
>> + if (mlxsw_sp_mr_vif_valid(mr_route->ivif.mr_vif))
>> + irif_index = mlxsw_sp_rif_index(mr_route->ivif.mr_vif->rif);
>> + else
>> + irif_index = 0;
>> +
>> + route_info->irif_index = irif_index;
>> + route_info->erif_indices = erif_indices;
>> + route_info->min_mtu = mr_route->min_mtu;
>> + route_info->route_action = mr_route->route_action;
>> + route_info->erif_num = erif;
>> + return 0;
>> +}
>> +
>> +static void
>> +mlxsw_sp_mr_route_info_destroy(struct mlxsw_sp_mr_route_info *route_info)
>> +{
>> + kfree(route_info->erif_indices);
>> +}
>> +
>> +static int mlxsw_sp_mr_route_write(struct mlxsw_sp_mr_table *mr_table,
>> + struct mlxsw_sp_mr_route *mr_route,
>> + bool replace)
>> +{
>> + struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp;
>> + struct mlxsw_sp_mr_route_info route_info;
>> + struct mlxsw_sp_mr *mr = mlxsw_sp->mr;
>> + int err;
>> +
>> + err = mlxsw_sp_mr_route_info_create(mr_table, mr_route, &route_info);
>> + if (err)
>> + return err;
>> +
>> + if (!replace) {
>> + struct mlxsw_sp_mr_route_params route_params;
>> +
>> + mr_route->route_priv = kzalloc(mr->mr_ops->route_priv_size,
>> + GFP_KERNEL);
>> + if (!mr_route->route_priv) {
>> + err = -ENOMEM;
>> + goto out;
>> + }
>> +
>> + route_params.key = mr_route->key;
>> + route_params.value = route_info;
>> + route_params.prio = mlxsw_sp_mr_route_prio(mr_route);
>> + err = mr->mr_ops->route_create(mlxsw_sp, mr->priv,
>> + mr_route->route_priv,
>> + &route_params);
>> + if (err)
>> + kfree(mr_route->route_priv);
>> + } else {
>> + err = mr->mr_ops->route_update(mlxsw_sp, mr_route->route_priv,
>> + &route_info);
>> + }
>> +out:
>> + mlxsw_sp_mr_route_info_destroy(&route_info);
>> + return err;
>> +}
>> +
>> +static void mlxsw_sp_mr_route_erase(struct mlxsw_sp_mr_table *mr_table,
>> + struct mlxsw_sp_mr_route *mr_route)
>> +{
>> + struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp;
>> + struct mlxsw_sp_mr *mr = mlxsw_sp->mr;
>> +
>> + mr->mr_ops->route_destroy(mlxsw_sp, mr->priv, mr_route->route_priv);
>> + kfree(mr_route->route_priv);
>> +}
>> +
>> +static struct mlxsw_sp_mr_route *
>> +mlxsw_sp_mr_route4_create(struct mlxsw_sp_mr_table *mr_table,
>> + struct mfc_cache *mfc)
>> +{
>> + struct mlxsw_sp_mr_route_vif_entry *rve, *tmp;
>> + struct mlxsw_sp_mr_route *mr_route;
>> + int err;
>> + int i;
>> +
>> + /* Allocate and init a new route and fill it with parameters */
>> + mr_route = kzalloc(sizeof(*mr_table), GFP_KERNEL);
>> + if (!mr_route)
>> + return ERR_PTR(-ENOMEM);
>> + INIT_LIST_HEAD(&mr_route->evif_list);
>> + mlxsw_sp_mr_route4_key(mr_table, &mr_route->key, mfc);
>> +
>> + /* Find min_mtu and link iVIF and eVIFs */
>> + mr_route->min_mtu = ETH_MAX_MTU;
>> + ipmr_cache_hold(mfc);
>> + mr_route->mfc4 = mfc;
>> + mr_route->mr_table = mr_table;
>> + for (i = 0; i < MAXVIFS; i++) {
>> + if (mfc->mfc_un.res.ttls[i] != 255) {
>> + err = mlxsw_sp_mr_route_evif_link(mr_route,
>> + &mr_table->vifs[i]);
>> + if (err)
>> + goto err;
>> + if (mr_table->vifs[i].dev &&
>> + mr_table->vifs[i].dev->mtu < mr_route->min_mtu)
>> + mr_route->min_mtu = mr_table->vifs[i].dev->mtu;
>> + }
>> + }
>> + mlxsw_sp_mr_route_ivif_link(mr_route, &mr_table->vifs[mfc->mfc_parent]);
>> + if (err)
>> + goto err;
>> +
>> + mr_route->route_action = mlxsw_sp_mr_route_action(mr_route);
>> + return mr_route;
>> +err:
>> + ipmr_cache_put(mfc);
>> + list_for_each_entry_safe(rve, tmp, &mr_route->evif_list, route_node)
>> + mlxsw_sp_mr_route_evif_unlink(rve);
>> + kfree(mr_route);
>> + return ERR_PTR(err);
>> +}
>> +
>> +static void mlxsw_sp_mr_route4_destroy(struct mlxsw_sp_mr_table *mr_table,
>> + struct mlxsw_sp_mr_route *mr_route)
>> +{
>> + struct mlxsw_sp_mr_route_vif_entry *rve, *tmp;
>> +
>> + mlxsw_sp_mr_route_ivif_unlink(mr_route);
>> + ipmr_cache_put(mr_route->mfc4);
>> + list_for_each_entry_safe(rve, tmp, &mr_route->evif_list, route_node)
>> + mlxsw_sp_mr_route_evif_unlink(rve);
>> + kfree(mr_route);
>> +}
>> +
>> +static void mlxsw_sp_mr_route_destroy(struct mlxsw_sp_mr_table *mr_table,
>> + struct mlxsw_sp_mr_route *mr_route)
>> +{
>> + switch (mr_table->proto) {
>> + case MLXSW_SP_L3_PROTO_IPV4:
>> + mlxsw_sp_mr_route4_destroy(mr_table, mr_route);
>> + break;
>> + case MLXSW_SP_L3_PROTO_IPV6:
>> + /* fall through */
>> + default:
>> + WARN_ON_ONCE(1);
>> + }
>> +}
>> +
>> +static void mlxsw_sp_mr_mfc_offload_set(struct mlxsw_sp_mr_route *mr_route,
>> + bool offload)
>> +{
>> + switch (mr_route->mr_table->proto) {
>> + case MLXSW_SP_L3_PROTO_IPV4:
>> + if (offload)
>> + mr_route->mfc4->mfc_flags |= MFC_OFFLOAD;
>> + else
>> + mr_route->mfc4->mfc_flags &= ~MFC_OFFLOAD;
>> + break;
>> + case MLXSW_SP_L3_PROTO_IPV6:
>> + /* fall through */
>> + default:
>> + WARN_ON_ONCE(1);
>> + }
>> +}
>> +
>> +static void mlxsw_sp_mr_mfc_offload_update(struct mlxsw_sp_mr_route *mr_route)
>> +{
>> + bool offload;
>> +
>> + offload = mr_route->route_action != MLXSW_SP_MR_ROUTE_ACTION_TRAP;
>> + mlxsw_sp_mr_mfc_offload_set(mr_route, offload);
>> +}
>> +
>> +static void __mlxsw_sp_mr_route_del(struct mlxsw_sp_mr_table *mr_table,
>> + struct mlxsw_sp_mr_route *mr_route)
>> +{
>> + mlxsw_sp_mr_mfc_offload_set(mr_route, false);
>> + mlxsw_sp_mr_route_erase(mr_table, mr_route);
>> + rhashtable_remove_fast(&mr_table->route_ht, &mr_route->ht_node,
>> + mlxsw_sp_mr_route_ht_params);
>> + list_del(&mr_route->node);
>> + mlxsw_sp_mr_route_destroy(mr_table, mr_route);
>> +}
>> +
>> +int mlxsw_sp_mr_route4_add(struct mlxsw_sp_mr_table *mr_table,
>> + struct mfc_cache *mfc, bool replace)
>> +{
>> + struct mlxsw_sp_mr_route *mr_orig_route = NULL;
>> + struct mlxsw_sp_mr_route *mr_route;
>> + int err;
>> +
>> + /* If the route is a (*,*) route, abort, as these kind of routes are
>> + * used for proxy routes.
>> + */
>> + if (mfc->mfc_origin == INADDR_ANY && mfc->mfc_mcastgrp == INADDR_ANY) {
>> + dev_warn(mr_table->mlxsw_sp->bus_info->dev,
>> + "Offloading proxy routes is not supported.\n");
> You are return err, why not use dev_err?
This err return value is properly handled by the caller (spectrum_router.c),
which will trigger the driver abort mechanism. The kernel will still be
functional, but the driver will stop offloading and eject all current offloaded
routes.
It is totally valid for a user to add a proxy route on machine with Spectrum,
but he should be warned that from now on, the routes go through slowpath. This
is why it is only a warning print.
Again, this is symmetric to the case of a failure in ipv4 and ipv6 route
offloading, which does not use dev_err either and only print with dev_warn.
>
>> + return -EINVAL;
>> + }
>> +
>> + /* Create a new route */
>> + mr_route = mlxsw_sp_mr_route4_create(mr_table, mfc);
>> + if (IS_ERR(mr_route))
>> + return PTR_ERR(mr_route);
>> +
>> + /* Find any route with a matching key */
>> + mr_orig_route = rhashtable_lookup_fast(&mr_table->route_ht,
>> + &mr_route->key,
>> + mlxsw_sp_mr_route_ht_params);
>> + if (replace) {
>> + /* On replace case, make the route point to the new route_priv.
>> + */
>> + if (WARN_ON(!mr_orig_route)) {
>> + err = -ENOENT;
>> + goto err_no_orig_route;
>> + }
>> + mr_route->route_priv = mr_orig_route->route_priv;
>> + } else if (mr_orig_route) {
>> + /* On non replace case, if another route with the same key was
>> + * found, abort, as duplicate routes are used for proxy routes.
>> + */
>> + dev_warn(mr_table->mlxsw_sp->bus_info->dev,
>> + "Offloading proxy routes is not supported.\n");
> Same as here.
Same as above.
>
>> + err = -EINVAL;
>> + goto err_duplicate_route;
>> + }
>> +
>> + /* Put it in the table data-structures */
>> + list_add_tail(&mr_route->node, &mr_table->route_list);
>> + err = rhashtable_insert_fast(&mr_table->route_ht,
>> + &mr_route->ht_node,
>> + mlxsw_sp_mr_route_ht_params);
>> + if (err)
>> + goto err_rhashtable_insert;
>> +
>> + /* Write the route to the hardware */
>> + err = mlxsw_sp_mr_route_write(mr_table, mr_route, replace);
>> + if (err)
>> + goto err_mr_route_write;
>> +
>> + /* Destroy the original route */
>> + if (replace) {
>> + rhashtable_remove_fast(&mr_table->route_ht,
>> + &mr_orig_route->ht_node,
>> + mlxsw_sp_mr_route_ht_params);
>> + list_del(&mr_orig_route->node);
>> + mlxsw_sp_mr_route4_destroy(mr_table, mr_orig_route);
>> + }
>> +
>> + mlxsw_sp_mr_mfc_offload_update(mr_route);
>> + return 0;
>> +
>> +err_mr_route_write:
>> + rhashtable_remove_fast(&mr_table->route_ht, &mr_route->ht_node,
>> + mlxsw_sp_mr_route_ht_params);
>> +err_rhashtable_insert:
>> + list_del(&mr_route->node);
>> +err_no_orig_route:
>> +err_duplicate_route:
>> + mlxsw_sp_mr_route4_destroy(mr_table, mr_route);
>> + return err;
>> +}
>> +
>> +void mlxsw_sp_mr_route4_del(struct mlxsw_sp_mr_table *mr_table,
>> + struct mfc_cache *mfc)
>> +{
>> + struct mlxsw_sp_mr_route *mr_route;
>> + struct mlxsw_sp_mr_route_key key;
>> +
>> + mlxsw_sp_mr_route4_key(mr_table, &key, mfc);
>> + mr_route = rhashtable_lookup_fast(&mr_table->route_ht, &key,
>> + mlxsw_sp_mr_route_ht_params);
>> + if (mr_route)
>> + __mlxsw_sp_mr_route_del(mr_table, mr_route);
>> +}
>> +
>> +/* Should be called after the VIF struct is updated */
>> +static int
>> +mlxsw_sp_mr_route_ivif_resolve(struct mlxsw_sp_mr_table *mr_table,
>> + struct mlxsw_sp_mr_route_vif_entry *rve)
>> +{
>> + struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp;
>> + enum mlxsw_sp_mr_route_action route_action;
>> + struct mlxsw_sp_mr *mr = mlxsw_sp->mr;
>> + u16 irif_index;
>> + int err;
>> +
>> + route_action = mlxsw_sp_mr_route_action(rve->mr_route);
>> + if (route_action == MLXSW_SP_MR_ROUTE_ACTION_TRAP)
>> + return 0;
>> +
>> + /* rve->mr_vif->rif is guaranteed to be valid at this stage */
>> + irif_index = mlxsw_sp_rif_index(rve->mr_vif->rif);
>> + err = mr->mr_ops->route_irif_update(mlxsw_sp, rve->mr_route->route_priv,
>> + irif_index);
>> + if (err)
>> + return err;
>> +
>> + err = mr->mr_ops->route_action_update(mlxsw_sp,
>> + rve->mr_route->route_priv,
>> + route_action);
>> + if (err)
>> + /* No need to rollback here because the iRIF change only takes
>> + * place after the action has been updated.
>> + */
>> + return err;
>> +
>> + rve->mr_route->route_action = route_action;
>> + mlxsw_sp_mr_mfc_offload_update(rve->mr_route);
>> + return 0;
>> +}
>> +
>> +static void
>> +mlxsw_sp_mr_route_ivif_unresolve(struct mlxsw_sp_mr_table *mr_table,
>> + struct mlxsw_sp_mr_route_vif_entry *rve)
>> +{
>> + struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp;
>> + struct mlxsw_sp_mr *mr = mlxsw_sp->mr;
>> +
>> + mr->mr_ops->route_action_update(mlxsw_sp, rve->mr_route->route_priv,
>> + MLXSW_SP_MR_ROUTE_ACTION_TRAP);
>> + rve->mr_route->route_action = MLXSW_SP_MR_ROUTE_ACTION_TRAP;
>> + mlxsw_sp_mr_mfc_offload_update(rve->mr_route);
>> +}
>> +
>> +/* Should be called after the RIF struct is updated */
>> +static int
>> +mlxsw_sp_mr_route_evif_resolve(struct mlxsw_sp_mr_table *mr_table,
>> + struct mlxsw_sp_mr_route_vif_entry *rve)
>> +{
>> + struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp;
>> + enum mlxsw_sp_mr_route_action route_action;
>> + struct mlxsw_sp_mr *mr = mlxsw_sp->mr;
>> + u16 erif_index = 0;
>> + int err;
>> +
>> + /* Update the route action, as the new eVIF can be a tunnel or a pimreg
>> + * device which will require updating the action.
>> + */
>> + route_action = mlxsw_sp_mr_route_action(rve->mr_route);
>> + if (route_action != rve->mr_route->route_action) {
>> + err = mr->mr_ops->route_action_update(mlxsw_sp,
>> + rve->mr_route->route_priv,
>> + route_action);
>> + if (err)
>> + return err;
>> + }
>> +
>> + /* Add the eRIF */
>> + if (mlxsw_sp_mr_vif_valid(rve->mr_vif)) {
>> + erif_index = mlxsw_sp_rif_index(rve->mr_vif->rif);
>> + err = mr->mr_ops->route_erif_add(mlxsw_sp,
>> + rve->mr_route->route_priv,
>> + erif_index);
>> + if (err)
>> + goto err_route_erif_add;
>> + }
>> +
>> + /* Update the minimum MTU */
>> + if (rve->mr_vif->dev->mtu < rve->mr_route->min_mtu) {
>> + rve->mr_route->min_mtu = rve->mr_vif->dev->mtu;
>> + err = mr->mr_ops->route_min_mtu_update(mlxsw_sp,
>> + rve->mr_route->route_priv,
>> + rve->mr_route->min_mtu);
>> + if (err)
>> + goto err_route_min_mtu_update;
>> + }
>> +
>> + rve->mr_route->route_action = route_action;
>> + mlxsw_sp_mr_mfc_offload_update(rve->mr_route);
>> + return 0;
>> +
>> +err_route_min_mtu_update:
>> + if (mlxsw_sp_mr_vif_valid(rve->mr_vif))
>> + mr->mr_ops->route_erif_del(mlxsw_sp, rve->mr_route->route_priv,
>> + erif_index);
>> +err_route_erif_add:
>> + if (route_action != rve->mr_route->route_action)
>> + mr->mr_ops->route_action_update(mlxsw_sp,
>> + rve->mr_route->route_priv,
>> + rve->mr_route->route_action);
>> + return err;
>> +}
>> +
>> +/* Should be called before the RIF struct is updated */
>> +static void
>> +mlxsw_sp_mr_route_evif_unresolve(struct mlxsw_sp_mr_table *mr_table,
>> + struct mlxsw_sp_mr_route_vif_entry *rve)
>> +{
>> + struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp;
>> + enum mlxsw_sp_mr_route_action route_action;
>> + struct mlxsw_sp_mr *mr = mlxsw_sp->mr;
>> + u16 rifi;
>> +
>> + /* If the unresolved RIF was not valid, no need to delete it */
>> + if (!mlxsw_sp_mr_vif_valid(rve->mr_vif))
>> + return;
>> +
>> + /* Update the route action: if there is only one valid eVIF in the
>> + * route, set the action to trap as the VIF deletion will lead to zero
>> + * valid eVIFs. On any other case, use the mlxsw_sp_mr_route_action to
>> + * determine the route action.
>> + */
>> + if (mlxsw_sp_mr_route_valid_evifs_num(rve->mr_route) == 1)
>> + route_action = MLXSW_SP_MR_ROUTE_ACTION_TRAP;
>> + else
>> + route_action = mlxsw_sp_mr_route_action(rve->mr_route);
>> + if (route_action != rve->mr_route->route_action)
>> + mr->mr_ops->route_action_update(mlxsw_sp,
>> + rve->mr_route->route_priv,
>> + route_action);
>> +
>> + /* Delete the erif from the route */
>> + rifi = mlxsw_sp_rif_index(rve->mr_vif->rif);
>> + mr->mr_ops->route_erif_del(mlxsw_sp, rve->mr_route->route_priv, rifi);
>> + rve->mr_route->route_action = route_action;
>> + mlxsw_sp_mr_mfc_offload_update(rve->mr_route);
>> +}
>> +
>> +static int mlxsw_sp_mr_vif_resolve(struct mlxsw_sp_mr_table *mr_table,
>> + struct net_device *dev,
>> + struct mlxsw_sp_mr_vif *mr_vif,
>> + unsigned long vif_flags,
>> + const struct mlxsw_sp_rif *rif)
>> +{
>> + struct mlxsw_sp_mr_route_vif_entry *irve, *erve;
>> + int err;
>> +
>> + /* Update the VIF */
>> + mr_vif->dev = dev;
>> + mr_vif->rif = rif;
>> + mr_vif->vif_flags = vif_flags;
>> +
>> + /* Update all routes where this VIF is used as an unresolved iRIF */
>> + list_for_each_entry(irve, &mr_vif->route_ivif_list, vif_node) {
>> + err = mlxsw_sp_mr_route_ivif_resolve(mr_table, irve);
>> + if (err)
>> + goto err_irif_unresolve;
>> + }
>> +
>> + /* Update all routes where this VIF is used as an unresolved eRIF */
>> + list_for_each_entry(erve, &mr_vif->route_evif_list, vif_node) {
>> + err = mlxsw_sp_mr_route_evif_resolve(mr_table, erve);
>> + if (err)
>> + goto err_erif_unresolve;
>> + }
>> + return 0;
>> +
>> +err_erif_unresolve:
>> + list_for_each_entry_from_reverse(erve, &mr_vif->route_evif_list,
>> + vif_node)
>> + mlxsw_sp_mr_route_evif_unresolve(mr_table, erve);
>> +err_irif_unresolve:
>> + list_for_each_entry_from_reverse(irve, &mr_vif->route_ivif_list,
>> + vif_node)
>> + mlxsw_sp_mr_route_ivif_unresolve(mr_table, irve);
>> + mr_vif->rif = NULL;
>> + return err;
>> +}
>> +
>> +static void mlxsw_sp_mr_vif_unresolve(struct mlxsw_sp_mr_table *mr_table,
>> + struct net_device *dev,
>> + struct mlxsw_sp_mr_vif *mr_vif)
>> +{
>> + struct mlxsw_sp_mr_route_vif_entry *rve;
>> +
>> + /* Update all routes where this VIF is used as an unresolved eRIF */
>> + list_for_each_entry(rve, &mr_vif->route_evif_list, vif_node)
>> + mlxsw_sp_mr_route_evif_unresolve(mr_table, rve);
>> +
>> + /* Update all routes where this VIF is used as an unresolved iRIF */
>> + list_for_each_entry(rve, &mr_vif->route_ivif_list, vif_node)
>> + mlxsw_sp_mr_route_ivif_unresolve(mr_table, rve);
>> +
>> + /* Update the VIF */
>> + mr_vif->dev = dev;
>> + mr_vif->rif = NULL;
>> +}
>> +
>> +int mlxsw_sp_mr_vif_add(struct mlxsw_sp_mr_table *mr_table,
>> + struct net_device *dev, vifi_t vif_index,
>> + unsigned long vif_flags, const struct mlxsw_sp_rif *rif)
>> +{
>> + struct mlxsw_sp_mr_vif *mr_vif = &mr_table->vifs[vif_index];
>> +
>> + if (WARN_ON(vif_index >= MAXVIFS))
>> + return -EINVAL;
>> + if (mr_vif->dev)
>> + return -EEXIST;
> -ENODEV?
No, Look carefully. The error is returned if mr_vif->dev is *not* NULL, which
means that the VIF was already added before, hence -EEXIST.
This error will happen if the mlxsw_sp_mr_vif_add function is called twice with
the same VIF index.
>
>> + return mlxsw_sp_mr_vif_resolve(mr_table, dev, mr_vif, vif_flags, rif);
>> +}
>> +
>> +void mlxsw_sp_mr_vif_del(struct mlxsw_sp_mr_table *mr_table, vifi_t vif_index)
>> +{
>> + struct mlxsw_sp_mr_vif *mr_vif = &mr_table->vifs[vif_index];
>> +
>> + if (WARN_ON(vif_index >= MAXVIFS))
>> + return;
>> + if (WARN_ON(!mr_vif->dev))
>> + return;
>> + mlxsw_sp_mr_vif_unresolve(mr_table, NULL, mr_vif);
>> +}
>> +
>> +struct mlxsw_sp_mr_vif *
>> +mlxsw_sp_mr_dev_vif_lookup(struct mlxsw_sp_mr_table *mr_table,
>> + const struct net_device *dev)
>> +{
>> + vifi_t vif_index;
>> +
>> + for (vif_index = 0; vif_index < MAXVIFS; vif_index++)
>> + if (mr_table->vifs[vif_index].dev == dev)
>> + return &mr_table->vifs[vif_index];
>> + return NULL;
>> +}
>> +
>> +int mlxsw_sp_mr_rif_add(struct mlxsw_sp_mr_table *mr_table,
>> + const struct mlxsw_sp_rif *rif)
>> +{
>> + const struct net_device *rif_dev = mlxsw_sp_rif_dev(rif);
>> + struct mlxsw_sp_mr_vif *mr_vif;
>> +
>> + if (!rif_dev)
>> + return 0;
>> +
>> + mr_vif = mlxsw_sp_mr_dev_vif_lookup(mr_table, rif_dev);
>> + if (!mr_vif)
>> + return 0;
>> + return mlxsw_sp_mr_vif_resolve(mr_table, mr_vif->dev, mr_vif,
>> + mr_vif->vif_flags, rif);
>> +}
>> +
>> +void mlxsw_sp_mr_rif_del(struct mlxsw_sp_mr_table *mr_table,
>> + const struct mlxsw_sp_rif *rif)
>> +{
>> + const struct net_device *rif_dev = mlxsw_sp_rif_dev(rif);
>> + struct mlxsw_sp_mr_vif *mr_vif;
>> +
>> + if (!rif_dev)
>> + return;
>> +
>> + mr_vif = mlxsw_sp_mr_dev_vif_lookup(mr_table, rif_dev);
>> + if (!mr_vif)
>> + return;
>> + mlxsw_sp_mr_vif_unresolve(mr_table, mr_vif->dev, mr_vif);
>> +}
>> +
>> +void mlxsw_sp_mr_rif_mtu_update(struct mlxsw_sp_mr_table *mr_table,
>> + const struct mlxsw_sp_rif *rif, int mtu)
>> +{
>> + const struct net_device *rif_dev = mlxsw_sp_rif_dev(rif);
>> + struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp;
>> + struct mlxsw_sp_mr_route_vif_entry *rve;
>> + struct mlxsw_sp_mr *mr = mlxsw_sp->mr;
>> + struct mlxsw_sp_mr_vif *mr_vif;
>> +
>> + if (!rif_dev)
>> + return;
>> +
>> + /* Search for a VIF that use that RIF */
>> + mr_vif = mlxsw_sp_mr_dev_vif_lookup(mr_table, rif_dev);
>> + if (!mr_vif)
>> + return;
>> +
>> + /* Update all the routes that uses that VIF as eVIF */
>> + list_for_each_entry(rve, &mr_vif->route_evif_list, vif_node) {
>> + if (mtu < rve->mr_route->min_mtu) {
>> + rve->mr_route->min_mtu = mtu;
>> + mr->mr_ops->route_min_mtu_update(mlxsw_sp,
>> + rve->mr_route->route_priv,
>> + mtu);
>> + }
>> + }
>> +}
>> +
>> +struct mlxsw_sp_mr_table *mlxsw_sp_mr_table_create(struct mlxsw_sp *mlxsw_sp,
>> + u32 vr_id,
>> + enum mlxsw_sp_l3proto proto)
>> +{
>> + struct mlxsw_sp_mr_route_params catchall_route_params = {
>> + .prio = MLXSW_SP_MR_ROUTE_PRIO_CATCHALL,
>> + .key = {
>> + .vrid = vr_id,
>> + },
>> + .value = {
>> + .route_action = MLXSW_SP_MR_ROUTE_ACTION_TRAP,
>> + }
>> + };
>> + struct mlxsw_sp_mr *mr = mlxsw_sp->mr;
>> + struct mlxsw_sp_mr_table *mr_table;
>> + int err;
>> + int i;
>> +
>> + mr_table = kzalloc(sizeof(*mr_table) + mr->mr_ops->route_priv_size,
>> + GFP_KERNEL);
>> + if (!mr_table)
>> + return ERR_PTR(-ENOMEM);
>> +
>> + mr_table->vr_id = vr_id;
>> + mr_table->mlxsw_sp = mlxsw_sp;
>> + mr_table->proto = proto;
>> + INIT_LIST_HEAD(&mr_table->route_list);
>> +
>> + err = rhashtable_init(&mr_table->route_ht,
>> + &mlxsw_sp_mr_route_ht_params);
>> + if (err)
>> + goto err_route_rhashtable_init;
>> +
>> + for (i = 0; i < MAXVIFS; i++) {
>> + INIT_LIST_HEAD(&mr_table->vifs[i].route_evif_list);
>> + INIT_LIST_HEAD(&mr_table->vifs[i].route_ivif_list);
>> + }
>> +
>> + err = mr->mr_ops->route_create(mlxsw_sp, mr->priv,
>> + mr_table->catchall_route_priv,
>> + &catchall_route_params);
>> + if (err)
>> + goto err_ops_route_create;
>> + list_add_tail(&mr_table->node, &mr->table_list);
>> + return mr_table;
>> +
>> +err_ops_route_create:
>> + rhashtable_destroy(&mr_table->route_ht);
>> +err_route_rhashtable_init:
>> + kfree(mr_table);
>> + return ERR_PTR(err);
>> +}
>> +
>> +void mlxsw_sp_mr_table_destroy(struct mlxsw_sp_mr_table *mr_table)
>> +{
>> + struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp;
>> + struct mlxsw_sp_mr *mr = mlxsw_sp->mr;
>> +
>> + WARN_ON(!mlxsw_sp_mr_table_empty(mr_table));
>> + list_del(&mr_table->node);
>> + mr->mr_ops->route_destroy(mlxsw_sp, mr->priv,
>> + &mr_table->catchall_route_priv);
>> + rhashtable_destroy(&mr_table->route_ht);
>> + kfree(mr_table);
>> +}
>> +
>> +void mlxsw_sp_mr_table_flush(struct mlxsw_sp_mr_table *mr_table)
>> +{
>> + struct mlxsw_sp_mr_route *mr_route, *tmp;
>> + int i;
>> +
>> + list_for_each_entry_safe(mr_route, tmp, &mr_table->route_list, node)
>> + __mlxsw_sp_mr_route_del(mr_table, mr_route);
>> +
>> + for (i = 0; i < MAXVIFS; i++) {
>> + mr_table->vifs[i].dev = NULL;
>> + mr_table->vifs[i].rif = NULL;
>> + }
>> +}
>> +
>> +bool mlxsw_sp_mr_table_empty(const struct mlxsw_sp_mr_table *mr_table)
>> +{
>> + int i;
>> +
>> + for (i = 0; i < MAXVIFS; i++)
>> + if (mr_table->vifs[i].dev)
>> + return false;
>> + return list_empty(&mr_table->route_list);
>> +}
>> +
>> +static void mlxsw_sp_mr_route_stats_update(struct mlxsw_sp *mlxsw_sp,
>> + struct mlxsw_sp_mr_route *mr_route)
>> +{
>> + struct mlxsw_sp_mr *mr = mlxsw_sp->mr;
>> + u64 packets, bytes;
>> +
>> + if (mr_route->route_action == MLXSW_SP_MR_ROUTE_ACTION_TRAP)
>> + return;
>> +
>> + mr->mr_ops->route_stats(mlxsw_sp, mr_route->route_priv, &packets,
>> + &bytes);
>> +
>> + switch (mr_route->mr_table->proto) {
>> + case MLXSW_SP_L3_PROTO_IPV4:
>> + if (mr_route->mfc4->mfc_un.res.pkt != packets)
>> + mr_route->mfc4->mfc_un.res.lastuse = jiffies;
>> + mr_route->mfc4->mfc_un.res.pkt = packets;
>> + mr_route->mfc4->mfc_un.res.bytes = bytes;
>> + break;
>> + case MLXSW_SP_L3_PROTO_IPV6:
>> + /* fall through */
>> + default:
>> + WARN_ON_ONCE(1);
>> + }
>> +}
>> +
>> +static void mlxsw_sp_mr_stats_update(struct work_struct *work)
>> +{
>> + struct mlxsw_sp_mr *mr = container_of(work, struct mlxsw_sp_mr,
>> + stats_update_dw.work);
>> + struct mlxsw_sp_mr_table *mr_table;
>> + struct mlxsw_sp_mr_route *mr_route;
>> + unsigned long interval;
>> +
>> + rtnl_lock();
>> + list_for_each_entry(mr_table, &mr->table_list, node)
>> + list_for_each_entry(mr_route, &mr_table->route_list, node)
>> + mlxsw_sp_mr_route_stats_update(mr_table->mlxsw_sp,
>> + mr_route);
>> + rtnl_unlock();
>> +
>> + interval = msecs_to_jiffies(MLXSW_SP_MR_ROUTES_COUNTER_UPDATE_INTERVAL);
>> + mlxsw_core_schedule_dw(&mr->stats_update_dw, interval);
>> +}
>> +
>> +int mlxsw_sp_mr_init(struct mlxsw_sp *mlxsw_sp,
>> + const struct mlxsw_sp_mr_ops *mr_ops)
>> +{
>> + struct mlxsw_sp_mr *mr;
>> + unsigned long interval;
>> + int err;
>> +
>> + mr = kzalloc(sizeof(*mr) + mr_ops->priv_size, GFP_KERNEL);
>> + if (!mr)
>> + return -ENOMEM;
>> + mr->mr_ops = mr_ops;
>> + mlxsw_sp->mr = mr;
>> + INIT_LIST_HEAD(&mr->table_list);
>> +
>> + err = mr_ops->init(mlxsw_sp, mr->priv);
>> + if (err)
>> + goto err;
>> +
>> + /* Create the delayed work for counter updates */
>> + INIT_DELAYED_WORK(&mr->stats_update_dw, mlxsw_sp_mr_stats_update);
>> + interval = msecs_to_jiffies(MLXSW_SP_MR_ROUTES_COUNTER_UPDATE_INTERVAL);
>> + mlxsw_core_schedule_dw(&mr->stats_update_dw, interval);
>> + return 0;
>> +err:
>> + kfree(mr);
>> + return err;
>> +}
>> +
>> +void mlxsw_sp_mr_fini(struct mlxsw_sp *mlxsw_sp)
>> +{
>> + struct mlxsw_sp_mr *mr = mlxsw_sp->mr;
>> +
>> + cancel_delayed_work_sync(&mr->stats_update_dw);
>> + mr->mr_ops->fini(mr->priv);
>> + kfree(mr);
>> +}
>> diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h
>> new file mode 100644
>> index 0000000..c851b23
>> --- /dev/null
>> +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h
>> @@ -0,0 +1,133 @@
>> +/*
>> + * drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h
>> + * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
>> + * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com>
>> + *
>> + * Redistribution and use in source and binary forms, with or without
>> + * modification, are permitted provided that the following conditions are met:
>> + *
>> + * 1. Redistributions of source code must retain the above copyright
>> + * notice, this list of conditions and the following disclaimer.
>> + * 2. Redistributions in binary form must reproduce the above copyright
>> + * notice, this list of conditions and the following disclaimer in the
>> + * documentation and/or other materials provided with the distribution.
>> + * 3. Neither the names of the copyright holders nor the names of its
>> + * contributors may be used to endorse or promote products derived from
>> + * this software without specific prior written permission.
>> + *
>> + * Alternatively, this software may be distributed under the terms of the
>> + * GNU General Public License ("GPL") version 2 as published by the Free
>> + * Software Foundation.
>> + *
>> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
>> + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
>> + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
>> + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
>> + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
>> + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
>> + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
>> + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
>> + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
>> + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
>> + * POSSIBILITY OF SUCH DAMAGE.
>> + */
>> +
>> +#ifndef _MLXSW_SPECTRUM_MCROUTER_H
>> +#define _MLXSW_SPECTRUM_MCROUTER_H
>> +
>> +#include <linux/mroute.h>
>> +#include "spectrum_router.h"
>> +#include "spectrum.h"
>> +
>> +enum mlxsw_sp_mr_route_action {
>> + MLXSW_SP_MR_ROUTE_ACTION_FORWARD,
>> + MLXSW_SP_MR_ROUTE_ACTION_TRAP,
>> +};
>> +
>> +enum mlxsw_sp_mr_route_prio {
>> + MLXSW_SP_MR_ROUTE_PRIO_SG,
>> + MLXSW_SP_MR_ROUTE_PRIO_STARG,
>> + MLXSW_SP_MR_ROUTE_PRIO_CATCHALL,
>> + __MLXSW_SP_MR_ROUTE_PRIO_MAX
>> +};
>> +
>> +#define MLXSW_SP_MR_ROUTE_PRIO_MAX (__MLXSW_SP_MR_ROUTE_PRIO_MAX - 1)
>> +
>> +struct mlxsw_sp_mr_route_key {
>> + int vrid;
>> + enum mlxsw_sp_l3proto proto;
>> + union mlxsw_sp_l3addr group;
>> + union mlxsw_sp_l3addr group_mask;
>> + union mlxsw_sp_l3addr source;
>> + union mlxsw_sp_l3addr source_mask;
>> +};
>> +
>> +struct mlxsw_sp_mr_route_info {
>> + enum mlxsw_sp_mr_route_action route_action;
>> + u16 irif_index;
>> + u16 *erif_indices;
>> + size_t erif_num;
>> + u16 min_mtu;
>> +};
>> +
>> +struct mlxsw_sp_mr_route_params {
>> + struct mlxsw_sp_mr_route_key key;
>> + struct mlxsw_sp_mr_route_info value;
>> + enum mlxsw_sp_mr_route_prio prio;
>> +};
>> +
>> +struct mlxsw_sp_mr_ops {
>> + int priv_size;
>> + int route_priv_size;
>> + int (*init)(struct mlxsw_sp *mlxsw_sp, void *priv);
>> + int (*route_create)(struct mlxsw_sp *mlxsw_sp, void *priv,
>> + void *route_priv,
>> + struct mlxsw_sp_mr_route_params *route_params);
>> + int (*route_update)(struct mlxsw_sp *mlxsw_sp, void *route_priv,
>> + struct mlxsw_sp_mr_route_info *route_info);
>> + int (*route_stats)(struct mlxsw_sp *mlxsw_sp, void *route_priv,
>> + u64 *packets, u64 *bytes);
>> + int (*route_action_update)(struct mlxsw_sp *mlxsw_sp, void *route_priv,
>> + enum mlxsw_sp_mr_route_action route_action);
>> + int (*route_min_mtu_update)(struct mlxsw_sp *mlxsw_sp, void *route_priv,
>> + u16 min_mtu);
>> + int (*route_irif_update)(struct mlxsw_sp *mlxsw_sp, void *route_priv,
>> + u16 irif_index);
>> + int (*route_erif_add)(struct mlxsw_sp *mlxsw_sp, void *route_priv,
>> + u16 erif_index);
>> + int (*route_erif_del)(struct mlxsw_sp *mlxsw_sp, void *route_priv,
>> + u16 erif_index);
>> + void (*route_destroy)(struct mlxsw_sp *mlxsw_sp, void *priv,
>> + void *route_priv);
>> + void (*fini)(void *priv);
>> +};
>> +
>> +struct mlxsw_sp_mr;
>> +struct mlxsw_sp_mr_table;
>> +
>> +int mlxsw_sp_mr_init(struct mlxsw_sp *mlxsw_sp,
>> + const struct mlxsw_sp_mr_ops *mr_ops);
>> +void mlxsw_sp_mr_fini(struct mlxsw_sp *mlxsw_sp);
>> +int mlxsw_sp_mr_route4_add(struct mlxsw_sp_mr_table *mr_table,
>> + struct mfc_cache *mfc, bool replace);
>> +void mlxsw_sp_mr_route4_del(struct mlxsw_sp_mr_table *mr_table,
>> + struct mfc_cache *mfc);
>> +int mlxsw_sp_mr_vif_add(struct mlxsw_sp_mr_table *mr_table,
>> + struct net_device *dev, vifi_t vif_index,
>> + unsigned long vif_flags,
>> + const struct mlxsw_sp_rif *rif);
>> +void mlxsw_sp_mr_vif_del(struct mlxsw_sp_mr_table *mr_table, vifi_t vif_index);
>> +int mlxsw_sp_mr_rif_add(struct mlxsw_sp_mr_table *mr_table,
>> + const struct mlxsw_sp_rif *rif);
>> +void mlxsw_sp_mr_rif_del(struct mlxsw_sp_mr_table *mr_table,
>> + const struct mlxsw_sp_rif *rif);
>> +void mlxsw_sp_mr_rif_mtu_update(struct mlxsw_sp_mr_table *mr_table,
>> + const struct mlxsw_sp_rif *rif, int mtu);
>> +struct mlxsw_sp_mr_table *mlxsw_sp_mr_table_create(struct mlxsw_sp *mlxsw_sp,
>> + u32 tb_id,
>> + enum mlxsw_sp_l3proto proto);
>> +void mlxsw_sp_mr_table_destroy(struct mlxsw_sp_mr_table *mr_table);
>> +void mlxsw_sp_mr_table_flush(struct mlxsw_sp_mr_table *mr_table);
>> +bool mlxsw_sp_mr_table_empty(const struct mlxsw_sp_mr_table *mr_table);
>> +
>> +#endif
>>
^ permalink raw reply
* Re: [PATCH] mac80211: aead api to reduce redundancy
From: Herbert Xu @ 2017-09-25 6:14 UTC (permalink / raw)
To: Johannes Berg
Cc: Xiang Gao, David S. Miller, linux-crypto, linux-kernel,
linux-wireless, netdev
In-Reply-To: <1506316946.2909.8.camel@sipsolutions.net>
On Mon, Sep 25, 2017 at 07:22:26AM +0200, Johannes Berg wrote:
>
> The code moves to crypto/ though, and I'm not even sure I can vouch for
> the Makefile choice there.
Thanks, I missed that. I don't think this belongs in crypto.
This proposed helper is only useful for wireless so it should
stay there.
Cheers,
--
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
^ permalink raw reply
* Re: [patch net-next v2 03/12] ipmr: Add FIB notification access functions
From: Yunsheng Lin @ 2017-09-25 6:32 UTC (permalink / raw)
To: Yotam Gigi, Jiri Pirko, netdev; +Cc: davem, idosch, mlxsw, nikolay, andrew
In-Reply-To: <54bef87c-8b7a-2512-3763-bac3469db421@mellanox.com>
Hi, Yotam
On 2017/9/25 13:38, Yotam Gigi wrote:
> On 09/25/2017 04:19 AM, Yunsheng Lin wrote:
>> Hi, Jiri
>>
>> On 2017/9/25 1:22, Jiri Pirko wrote:
>>> From: Yotam Gigi <yotamg@mellanox.com>
>>>
>>> Make the ipmr module register as a FIB notifier. To do that, implement both
>>> the ipmr_seq_read and ipmr_dump ops.
>>>
>>> The ipmr_seq_read op returns a sequence counter that is incremented on
>>> every notification related operation done by the ipmr. To implement that,
>>> add a sequence counter in the netns_ipv4 struct and increment it whenever a
>>> new MFC route or VIF are added or deleted. The sequence operations are
>>> protected by the RTNL lock.
>>>
>>> The ipmr_dump iterates the list of MFC routes and the list of VIF entries
>>> and sends notifications about them. The entries dump is done under RCU
>>> where the VIF dump uses the mrt_lock too, as the vif->dev field can change
>>> under RCU.
>>>
>>> Signed-off-by: Yotam Gigi <yotamg@mellanox.com>
>>> Reviewed-by: Ido Schimmel <idosch@mellanox.com>
>>> Signed-off-by: Jiri Pirko <jiri@mellanox.com>
>>> ---
>>> v1->v2:
>>> - Take the mrt_lock when dumping VIF entries.
>>> ---
>>> include/linux/mroute.h | 15 ++++++
>>> include/net/netns/ipv4.h | 3 ++
>>> net/ipv4/ipmr.c | 137 ++++++++++++++++++++++++++++++++++++++++++++++-
>>> 3 files changed, 153 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/include/linux/mroute.h b/include/linux/mroute.h
>>> index 10028f2..54c5cb8 100644
>>> --- a/include/linux/mroute.h
>>> +++ b/include/linux/mroute.h
>>> @@ -5,6 +5,7 @@
>>> #include <linux/pim.h>
>>> #include <linux/rhashtable.h>
>>> #include <net/sock.h>
>>> +#include <net/fib_notifier.h>
>>> #include <uapi/linux/mroute.h>
>>>
>>> #ifdef CONFIG_IP_MROUTE
>>> @@ -58,6 +59,14 @@ struct vif_device {
>>> int link; /* Physical interface index */
>>> };
>>>
>>> +struct vif_entry_notifier_info {
>>> + struct fib_notifier_info info;
>>> + struct net_device *dev;
>>> + vifi_t vif_index;
>>> + unsigned short vif_flags;
>>> + u32 tb_id;
>>> +};
>>> +
>>> #define VIFF_STATIC 0x8000
>>>
>>> #define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL)
>>> @@ -146,6 +155,12 @@ struct mfc_cache {
>>> struct rcu_head rcu;
>>> };
>>>
>>> +struct mfc_entry_notifier_info {
>>> + struct fib_notifier_info info;
>>> + struct mfc_cache *mfc;
>>> + u32 tb_id;
>>> +};
>>> +
>>> struct rtmsg;
>>> int ipmr_get_route(struct net *net, struct sk_buff *skb,
>>> __be32 saddr, __be32 daddr,
>>> diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
>>> index 8387f09..abc84d9 100644
>>> --- a/include/net/netns/ipv4.h
>>> +++ b/include/net/netns/ipv4.h
>>> @@ -163,6 +163,9 @@ struct netns_ipv4 {
>>> struct fib_notifier_ops *notifier_ops;
>>> unsigned int fib_seq; /* protected by rtnl_mutex */
>>>
>>> + struct fib_notifier_ops *ipmr_notifier_ops;
>> Can we add a const here?
>
> It cannot be const as it get initialized it in ipmr_notifier_init.
>
>>
>>> + unsigned int ipmr_seq; /* protected by rtnl_mutex */
>>> +
>>> atomic_t rt_genid;
>>> };
>>> #endif
>>> diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
>>> index 86dc5f9..49879c3 100644
>>> --- a/net/ipv4/ipmr.c
>>> +++ b/net/ipv4/ipmr.c
>>> @@ -264,6 +264,16 @@ static void __net_exit ipmr_rules_exit(struct net *net)
>>> fib_rules_unregister(net->ipv4.mr_rules_ops);
>>> rtnl_unlock();
>>> }
>>> +
>>> +static int ipmr_rules_dump(struct net *net, struct notifier_block *nb)
>>> +{
>>> + return fib_rules_dump(net, nb, RTNL_FAMILY_IPMR);
>>> +}
>>> +
>>> +static unsigned int ipmr_rules_seq_read(struct net *net)
>>> +{
>>> + return fib_rules_seq_read(net, RTNL_FAMILY_IPMR);
>>> +}
>>> #else
>>> #define ipmr_for_each_table(mrt, net) \
>>> for (mrt = net->ipv4.mrt; mrt; mrt = NULL)
>>> @@ -298,6 +308,16 @@ static void __net_exit ipmr_rules_exit(struct net *net)
>>> net->ipv4.mrt = NULL;
>>> rtnl_unlock();
>>> }
>>> +
>>> +static int ipmr_rules_dump(struct net *net, struct notifier_block *nb)
>>> +{
>>> + return 0;
>>> +}
>>> +
>>> +static unsigned int ipmr_rules_seq_read(struct net *net)
>>> +{
>>> + return 0;
>>> +}
>>> #endif
>>>
>>> static inline int ipmr_hash_cmp(struct rhashtable_compare_arg *arg,
>>> @@ -587,6 +607,43 @@ static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
>>> }
>>> #endif
>>>
>>> +static int call_ipmr_vif_entry_notifier(struct notifier_block *nb,
>>> + struct net *net,
>>> + enum fib_event_type event_type,
>>> + struct vif_device *vif,
>>> + vifi_t vif_index, u32 tb_id)
>>> +{
>>> + struct vif_entry_notifier_info info = {
>>> + .info = {
>>> + .family = RTNL_FAMILY_IPMR,
>>> + .net = net,
>>> + },
>>> + .dev = vif->dev,
>>> + .vif_index = vif_index,
>>> + .vif_flags = vif->flags,
>>> + .tb_id = tb_id,
>>> + };
>> We only use info.info which is fib_notifier_info, the
>> vif_entry_notifier_info seems to be not needed, why not just
>> use fib_notifier_info?
>
> No, that's not true.
>
> The driver gets the notification with a pointer to a fib_notifier_info struct,
> and according to the type field uses container_of to get to the parent struct,
> which in this case is vif_entry_notifier_info. All the fields here are needed.
> You can see this code in patch 10.
>
> By the way, this function is completely symmetric to fib4 (which is in
> fib_trie.c +88) and fib6 (which is in ip6_fib +336) notify functions, who uses
> the exact same process.
Thanks for clarifying, I am not familar with ipmr, so only checking
coding style.
>
>>
>>> +
>>> + return call_fib_notifier(nb, net, event_type, &info.info);
>>> +}
>>> +
>>> +static int call_ipmr_mfc_entry_notifier(struct notifier_block *nb,
>>> + struct net *net,
>>> + enum fib_event_type event_type,
>>> + struct mfc_cache *mfc, u32 tb_id)
>>> +{
>>> + struct mfc_entry_notifier_info info = {
>>> + .info = {
>>> + .family = RTNL_FAMILY_IPMR,
>>> + .net = net,
>>> + },
>>> + .mfc = mfc,
>>> + .tb_id = tb_id
>>> + };
>>> +
>> As above.
>
>
> As above.
>
>
>>
>>> + return call_fib_notifier(nb, net, event_type, &info.info);
>>> +}
>>> +
>>> /**
>>> * vif_delete - Delete a VIF entry
>>> * @notify: Set to 1, if the caller is a notifier_call
>>> @@ -3050,14 +3107,87 @@ static const struct net_protocol pim_protocol = {
>>> };
>>> #endif
>>>
>>> +static unsigned int ipmr_seq_read(struct net *net)
>>> +{
>>> + ASSERT_RTNL();
>>> +
>>> + return net->ipv4.ipmr_seq + ipmr_rules_seq_read(net);
>>> +}
>>> +
>>> +static int ipmr_dump(struct net *net, struct notifier_block *nb)
>>> +{
>>> + struct mr_table *mrt;
>>> + int err;
>>> +
>>> + err = ipmr_rules_dump(net, nb);
>>> + if (err)
>>> + return err;
>>> +
>>> + ipmr_for_each_table(mrt, net) {
>>> + struct vif_device *v = &mrt->vif_table[0];
>>> + struct mfc_cache *mfc;
>>> + int vifi;
>>> +
>>> + /* Notifiy on table VIF entries */
>>> + read_lock(&mrt_lock);
>>> + for (vifi = 0; vifi < mrt->maxvif; vifi++, v++) {
>>> + if (!v->dev)
>>> + continue;
>>> +
>>> + call_ipmr_vif_entry_notifier(nb, net, FIB_EVENT_VIF_ADD,
>>> + v, vifi, mrt->id);
>>> + }
>>> + read_unlock(&mrt_lock);
>>> +
>>> + /* Notify on table MFC entries */
>>> + list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list)
>>> + call_ipmr_mfc_entry_notifier(nb, net,
>>> + FIB_EVENT_ENTRY_ADD, mfc,
>>> + mrt->id);
>>> + }
>>> +
>>> + return 0;
>>> +}
>>> +
>>> +static const struct fib_notifier_ops ipmr_notifier_ops_template = {
>>> + .family = RTNL_FAMILY_IPMR,
>>> + .fib_seq_read = ipmr_seq_read,
>>> + .fib_dump = ipmr_dump,
>>> + .owner = THIS_MODULE,
>>> +};
>>> +
>>> +int __net_init ipmr_notifier_init(struct net *net)
>>> +{
>>> + struct fib_notifier_ops *ops;
>>> +
>>> + net->ipv4.ipmr_seq = 0;
>>> +
>>> + ops = fib_notifier_ops_register(&ipmr_notifier_ops_template, net);
>>> + if (IS_ERR(ops))
>>> + return PTR_ERR(ops);
>>> + net->ipv4.ipmr_notifier_ops = ops;
>>> +
>>> + return 0;
>>> +}
>>> +
>>> +static void __net_exit ipmr_notifier_exit(struct net *net)
>>> +{
>>> + fib_notifier_ops_unregister(net->ipv4.ipmr_notifier_ops);
>>> + net->ipv4.ipmr_notifier_ops = NULL;
>>> +}
>>> +
>>> /* Setup for IP multicast routing */
>>> static int __net_init ipmr_net_init(struct net *net)
>>> {
>>> int err;
>>>
>>> + err = ipmr_notifier_init(net);
>>> + if (err)
>>> + goto ipmr_notifier_fail;
>>> +
>>> err = ipmr_rules_init(net);
>>> if (err < 0)
>>> - goto fail;
>>> + goto ipmr_rules_fail;
>>>
>>> #ifdef CONFIG_PROC_FS
>>> err = -ENOMEM;
>>> @@ -3074,7 +3204,9 @@ static int __net_init ipmr_net_init(struct net *net)
>>> proc_vif_fail:
>>> ipmr_rules_exit(net);
>>> #endif
>>> -fail:
>>> +ipmr_rules_fail:
>>> + ipmr_notifier_exit(net);
>>> +ipmr_notifier_fail:
>>> return err;
>>> }
>>>
>>> @@ -3084,6 +3216,7 @@ static void __net_exit ipmr_net_exit(struct net *net)
>>> remove_proc_entry("ip_mr_cache", net->proc_net);
>>> remove_proc_entry("ip_mr_vif", net->proc_net);
>>> #endif
>>> + ipmr_notifier_exit(net);
>>> ipmr_rules_exit(net);
>>> }
>>>
>>>
>
>
> .
>
^ permalink raw reply
* Re: [PATCH net-next 10/10] net: hns3: Add mqprio support when interacting with network stack
From: Jiri Pirko @ 2017-09-25 6:57 UTC (permalink / raw)
To: Yunsheng Lin
Cc: davem@davemloft.net, huangdaode, xuwei (O), Liguozhu (Kenneth),
Zhuangyuzeng (Yisen), Gabriele Paoloni, John Garry, Linuxarm,
Salil Mehta, lipeng (Y), netdev@vger.kernel.org,
linux-kernel@vger.kernel.org
In-Reply-To: <290b0679-bfc2-c23c-00ee-43768c1c2327@huawei.com>
Mon, Sep 25, 2017 at 02:45:08AM CEST, linyunsheng@huawei.com wrote:
>Hi, Jiri
>
>On 2017/9/24 19:37, Jiri Pirko wrote:
>> Sat, Sep 23, 2017 at 02:47:20AM CEST, linyunsheng@huawei.com wrote:
>>> Hi, Jiri
>>>
>>> On 2017/9/23 0:03, Jiri Pirko wrote:
>>>> Fri, Sep 22, 2017 at 04:11:51PM CEST, linyunsheng@huawei.com wrote:
>>>>> Hi, Jiri
>>>>>
>>>>>>> - if (!tc) {
>>>>>>> + if (if_running) {
>>>>>>> + (void)hns3_nic_net_stop(netdev);
>>>>>>> + msleep(100);
>>>>>>> + }
>>>>>>> +
>>>>>>> + ret = (kinfo->dcb_ops && kinfo->dcb_ops->>setup_tc) ?
>>>>>>> + kinfo->dcb_ops->setup_tc(h, tc, prio_tc) : ->EOPNOTSUPP;
>>>>>
>>>>>> This is most odd. Why do you call dcb_ops from >ndo_setup_tc callback?
>>>>>> Why are you mixing this together? prio->tc mapping >can be done
>>>>>> directly in dcbnl
>>>>>
>>>>> Here is what we do in dcb_ops->setup_tc:
>>>>> Firstly, if current tc num is different from the tc num
>>>>> that user provide, then we setup the queues for each
>>>>> tc.
>>>>>
>>>>> Secondly, we tell hardware the pri to tc mapping that
>>>>> the stack is using. In rx direction, our hardware need
>>>>> that mapping to put different packet into different tc'
>>>>> queues according to the priority of the packet, then
>>>>> rss decides which specific queue in the tc should the
>>>>> packet goto.
>>>>>
>>>>> By mixing, I suppose you meant why we need the
>>>>> pri to tc infomation?
>>>>
>>>> by mixing, I mean what I wrote. You are calling dcb_ops callback from
>>>> ndo_setup_tc callback. So you are mixing DCBNL subsystem and TC
>>>> subsystem. Why? Why do you need sch_mqprio? Why DCBNL is not enough for
>>>> all?
>>>
>>> When using lldptool, dcbnl is involved.
>>>
>>> But when using tc qdisc, dcbbl is not involved, below is the a few key
>>> call graph in the kernel when tc qdisc cmd is executed.
>>>
>>> cmd:
>>> tc qdisc add dev eth0 root handle 1:0 mqprio num_tc 4 map 1 2 3 3 1 3 1 1 hw 1
>>>
>>> call graph:
>>> rtnetlink_rcv_msg -> tc_modify_qdisc -> qdisc_create -> mqprio_init ->
>>> hns3_nic_setup_tc
>>>
>>> When hns3_nic_setup_tc is called, we need to know how many tc num and
>>> prio_tc mapping from the tc_mqprio_qopt which is provided in the paramter
>>> in the ndo_setup_tc function, and dcb_ops is the our hardware specific
>>> method to setup the tc related parameter to the hardware, so this is why
>>> we call dcb_ops callback in ndo_setup_tc callback.
>>>
>>> I hope this will answer your question, thanks for your time.
>>
>> Okay. I understand that you have a usecase for mqprio mapping offload
>> without lldptool being involved. Ok. I believe it is wrong to call dcb_ops
>> from tc callback. You should have a generic layer inside the driver and
>> call it from both dcb_ops and tc callbacks.
>
>Actually, dcb_ops is our generic layer inside the driver.
>Below is high level architecture:
>
> [ tc qdisc ] [ lldpad ]
> | |
> | |
> | |
> [ hns3_enet ] [ hns3_dcbnl ]
> \ /
> \ /
> \ /
> [ hclge_dcb ]
> / \
> / \
> / \
> [ hclgc_main ] [ hclge_tm ]
>
>hns3_enet.c implements the ndo_setup_tc callback.
>hns3_dcbnl.c implements the dcbnl_rtnl_ops for stack's DCBNL system.
>hclge_dcb implements the dcb_ops.
>So we already have a generic layer that tc and dcbnl all call from.
>
>>
>> Also, what happens If I run lldptool concurrently with mqprio? Who wins
>> and is going to configure the mapping?
>
>Both lldptool and tc qdisc cmd use rtnl interface provided by stack, so
>they are both protected by rtnl_lock, so we do not have to do the locking
>in the driver.
I was not asking about locking, which is obvious, I was asking about the
behaviour. Like for example:
If I use tc to configure some mapping, later on I run lldptool and change
the mapping. Does the tc dump show the updated values or the original
ones?
>
>The locking is in rtnetlink_rcv_msg:
>
> rtnl_lock();
> handlers = rtnl_dereference(rtnl_msg_handlers[family]);
> if (handlers) {
> doit = READ_ONCE(handlers[type].doit);
> if (doit)
> err = doit(skb, nlh, extack);
> }
> rtnl_unlock();
>
>Thanks.
>
>>
>>
>>>
>>>>
>>>>
>>>>
>>>>> I hope I did not misunderstand your question, thanks
>>>>> for your time reviewing.
>>>>
>>>> .
>>>>
>>>
>>
>> .
>>
>
^ permalink raw reply
* Re: [8/10] ath9k: Use ARRAY_SIZE macro
From: Kalle Valo @ 2017-09-25 7:15 UTC (permalink / raw)
To: Thomas Meyer; +Cc: kvalo, linux-wireless, netdev, linux-kernel
In-Reply-To: <1504439110050-1887263060-8-diffsplit-thomas@m3y3r.de>
Thomas Meyer <thomas@m3y3r.de> wrote:
> Use ARRAY_SIZE macro, rather than explicitly coding some variant of it
> yourself.
> Found with: find -type f -name "*.c" -o -name "*.h" | xargs perl -p -i -e
> 's/\bsizeof\s*\(\s*(\w+)\s*\)\s*\ /\s*sizeof\s*\(\s*\1\s*\[\s*0\s*\]\s*\)
> /ARRAY_SIZE(\1)/g' and manual check/verification.
>
> Signed-off-by: Thomas Meyer <thomas@m3y3r.de>
> Signed-off-by: Kalle Valo <kvalo@qca.qualcomm.com>
Patch applied to ath-next branch of ath.git, thanks.
896cbefadf62 ath9k: Use ARRAY_SIZE macro
--
https://patchwork.kernel.org/patch/9936271/
https://wireless.wiki.kernel.org/en/developers/documentation/submittingpatches
^ permalink raw reply
* Re: ath10k: make ath10k_hw_ce_regs const
From: Kalle Valo @ 2017-09-25 7:16 UTC (permalink / raw)
To: Bhumika Goyal
Cc: julia.lawall, ath10k, linux-wireless, netdev, linux-kernel,
Bhumika Goyal
In-Reply-To: <1505329312-26432-1-git-send-email-bhumirks@gmail.com>
Bhumika Goyal <bhumirks@gmail.com> wrote:
> Make them const as they are not modified in the file referencing
> them. They are only stored in the const field 'hw_ce_reg' of an ath10k
> structure. Also, make the declarations in the header const.
>
> Signed-off-by: Bhumika Goyal <bhumirks@gmail.com>
> Signed-off-by: Kalle Valo <kvalo@qca.qualcomm.com>
Patch applied to ath-next branch of ath.git, thanks.
496cbf3ebb6b ath10k: make ath10k_hw_ce_regs const
--
https://patchwork.kernel.org/patch/9951901/
https://wireless.wiki.kernel.org/en/developers/documentation/submittingpatches
^ permalink raw reply
* Re: [2/2] ath9k: Avoid a potential deadlock
From: Kalle Valo @ 2017-09-25 7:18 UTC (permalink / raw)
To: Ville Syrjälä
Cc: linux-wireless, QCA ath9k Development, Kalle Valo, netdev,
Ville Syrjälä
In-Reply-To: <20170918195919.15860-2-ville.syrjala@linux.intel.com>
Ville Syrjälä wrote:
> Lockdep warns us that sc_pm_lock and cc_lock can cause a deadlock when
> cc_lock is acquired by itself with interrupts enabled. Disable irqs
> whenever taking cc_lock to avoid this.
>
> [ 19.094524] kworker/u2:0/5 just changed the state of lock:
> [ 19.094578] (&(&sc->sc_pm_lock)->rlock){-.-...}, at: [<f836c00e>] ath_isr+0x15e/0x200 [ath9k]
> [ 19.094674] but this lock took another, HARDIRQ-unsafe lock in the past:
> [ 19.094731] (&(&common->cc_lock)->rlock){+.-...}
> [ 19.094741]
>
> and interrupts could create inverse lock ordering between them.
>
> [ 19.094866]
> other info that might help us debug this:
> [ 19.094926] Possible interrupt unsafe locking scenario:
>
> [ 19.094985] CPU0 CPU1
> [ 19.095036] ---- ----
> [ 19.095086] lock(&(&common->cc_lock)->rlock);
> [ 19.095197] local_irq_disable();
> [ 19.095305] lock(&(&sc->sc_pm_lock)->rlock);
> [ 19.095423] lock(&(&common->cc_lock)->rlock);
> [ 19.095539] <Interrupt>
> [ 19.095636] lock(&(&sc->sc_pm_lock)->rlock);
> [ 19.095745]
> *** DEADLOCK ***
>
> [ 19.095965] 3 locks held by kworker/u2:0/5:
> [ 19.096067] #0: ("%s"wiphy_name(local->hw.wiphy)){.+.+.+}, at: [<c1067f37>] process_one_work+0x127/0x580
> [ 19.096260] #1: ((&local->dynamic_ps_enable_work)){+.+...}, at: [<c1067f37>] process_one_work+0x127/0x580
> [ 19.096447] #2: (&sc->mutex){+.+...}, at: [<f836b8b0>] ath9k_config+0x30/0x1d0 [ath9k]
> [ 19.096639]
> the shortest dependencies between 2nd lock and 1st lock:
> [ 19.096813] -> (&(&common->cc_lock)->rlock){+.-...} ops: 38 {
> [ 19.096816] HARDIRQ-ON-W at:
> [ 19.096816] __lock_acquire+0x57e/0x1260
> [ 19.096816] lock_acquire+0xb1/0x1c0
> [ 19.096816] _raw_spin_lock_bh+0x3f/0x50
> [ 19.096816] ath_chanctx_set_channel+0xb6/0x2c0 [ath9k]
> [ 19.096816] ath9k_config+0xa8/0x1d0 [ath9k]
> [ 19.096816] ieee80211_hw_config+0xa8/0x5f0 [mac80211]
> [ 19.096816] ieee80211_do_open+0x67a/0x920 [mac80211]
> [ 19.096816] ieee80211_open+0x41/0x50 [mac80211]
> [ 19.096816] __dev_open+0xab/0x140
> [ 19.096816] __dev_change_flags+0x89/0x150
> [ 19.096816] dev_change_flags+0x28/0x60
> [ 19.096816] do_setlink+0x290/0x890
> [ 19.096816] rtnl_newlink+0x7cf/0x8e0
> [ 19.096816] rtnetlink_rcv_msg+0xbf/0x1f0
> [ 19.096816] netlink_rcv_skb+0xb9/0xe0
> [ 19.096816] rtnetlink_rcv+0x1e/0x30
> [ 19.096816] netlink_unicast+0x13a/0x2c0
> [ 19.096816] netlink_sendmsg+0x290/0x380
> [ 19.096816] ___sys_sendmsg+0x1e2/0x280
> [ 19.096816] __sys_sendmsg+0x3f/0x80
> [ 19.096816] SyS_socketcall+0x58c/0x6b0
> [ 19.096816] do_fast_syscall_32+0x96/0x1d0
> [ 19.096816] entry_SYSENTER_32+0x4c/0x7b
> [ 19.096816] IN-SOFTIRQ-W at:
> [ 19.096816] __lock_acquire+0x55a/0x1260
> [ 19.096816] lock_acquire+0xb1/0x1c0
> [ 19.096816] _raw_spin_lock+0x3c/0x50
> [ 19.096816] ath_ps_full_sleep+0x24/0x70 [ath9k]
> [ 19.096816] call_timer_fn+0xa4/0x300
> [ 19.096816] run_timer_softirq+0x1b1/0x560
> [ 19.096816] __do_softirq+0xb0/0x430
> [ 19.096816] do_softirq_own_stack+0x33/0x40
> [ 19.096816] irq_exit+0xad/0xc0
> [ 19.096816] smp_apic_timer_interrupt+0x31/0x40
> [ 19.096816] apic_timer_interrupt+0x37/0x3c
> [ 19.096816] wp_page_copy+0xb8/0x580
> [ 19.096816] do_wp_page+0x64/0x420
> [ 19.096816] handle_mm_fault+0x430/0x990
> [ 19.096816] __do_page_fault+0x18b/0x430
> [ 19.096816] do_page_fault+0xb/0x10
> [ 19.096816] common_exception+0x62/0x6a
> [ 19.096816] INITIAL USE at:
> [ 19.096816] __lock_acquire+0x204/0x1260
> [ 19.096816] lock_acquire+0xb1/0x1c0
> [ 19.096816] _raw_spin_lock_bh+0x3f/0x50
> [ 19.096816] ath_chanctx_set_channel+0xb6/0x2c0 [ath9k]
> [ 19.096816] ath9k_config+0xa8/0x1d0 [ath9k]
> [ 19.096816] ieee80211_hw_config+0xa8/0x5f0 [mac80211]
> [ 19.096816] ieee80211_do_open+0x67a/0x920 [mac80211]
> [ 19.096816] ieee80211_open+0x41/0x50 [mac80211]
> [ 19.096816] __dev_open+0xab/0x140
> [ 19.096816] __dev_change_flags+0x89/0x150
> [ 19.096816] dev_change_flags+0x28/0x60
> [ 19.096816] do_setlink+0x290/0x890
> [ 19.096816] rtnl_newlink+0x7cf/0x8e0
> [ 19.096816] rtnetlink_rcv_msg+0xbf/0x1f0
> [ 19.096816] netlink_rcv_skb+0xb9/0xe0
> [ 19.096816] rtnetlink_rcv+0x1e/0x30
> [ 19.096816] netlink_unicast+0x13a/0x2c0
> [ 19.096816] netlink_sendmsg+0x290/0x380
> [ 19.096816] ___sys_sendmsg+0x1e2/0x280
> [ 19.096816] __sys_sendmsg+0x3f/0x80
> [ 19.096816] SyS_socketcall+0x58c/0x6b0
> [ 19.096816] do_fast_syscall_32+0x96/0x1d0
> [ 19.096816] entry_SYSENTER_32+0x4c/0x7b
> [ 19.096816] }
> [ 19.096816] ... key at: [<f837b694>] __key.61991+0x0/0xffffc96c [ath9k]
> [ 19.096816] ... acquired at:
> [ 19.096816] lock_acquire+0xb1/0x1c0
> [ 19.096816] _raw_spin_lock+0x3c/0x50
> [ 19.096816] ath9k_ps_wakeup+0x85/0xe0 [ath9k]
> [ 19.096816] ath9k_bss_info_changed+0x2a/0x1b0 [ath9k]
> [ 19.096816] ieee80211_bss_info_change_notify+0xf3/0x360 [mac80211]
> [ 19.096816] ieee80211_recalc_txpower+0x33/0x40 [mac80211]
> [ 19.096816] ieee80211_set_tx_power+0x45/0x1d0 [mac80211]
> [ 19.096816] cfg80211_wext_siwtxpower+0xd3/0x350 [cfg80211]
> [ 19.096816] ioctl_standard_call+0x4e/0x400
> [ 19.096816] wext_handle_ioctl+0xf4/0x190
> [ 19.096816] dev_ioctl+0xb7/0x630
> [ 19.096816] sock_ioctl+0x13e/0x2d0
> [ 19.096816] do_vfs_ioctl+0x84/0x750
> [ 19.096816] SyS_ioctl+0x34/0x60
> [ 19.096816] do_fast_syscall_32+0x96/0x1d0
> [ 19.096816] entry_SYSENTER_32+0x4c/0x7b
>
> [ 19.096816] -> (&(&sc->sc_pm_lock)->rlock){-.-...} ops: 597 {
> [ 19.096816] IN-HARDIRQ-W at:
> [ 19.096816] __lock_acquire+0x6ae/0x1260
> [ 19.096816] lock_acquire+0xb1/0x1c0
> [ 19.096816] _raw_spin_lock_irqsave+0x45/0x60
> [ 19.096816] ath_isr+0x15e/0x200 [ath9k]
> [ 19.096816] __handle_irq_event_percpu+0x44/0x340
> [ 19.096816] handle_irq_event_percpu+0x1d/0x50
> [ 19.096816] handle_irq_event+0x32/0x60
> [ 19.096816] handle_level_irq+0x81/0x100
> [ 19.096816] handle_irq+0x9c/0xd0
> [ 19.096816] do_IRQ+0x5c/0x120
> [ 19.096816] common_interrupt+0x36/0x3c
> [ 19.096816] _raw_spin_unlock_irqrestore+0x57/0x70
> [ 19.096816] ath9k_config+0x16a/0x1d0 [ath9k]
> [ 19.096816] ieee80211_hw_config+0xa8/0x5f0 [mac80211]
> [ 19.096816] ieee80211_dynamic_ps_enable_work+0x1c3/0x680 [mac80211]
> [ 19.096816] process_one_work+0x1d1/0x580
> [ 19.096816] worker_thread+0x31/0x380
> [ 19.096816] kthread+0xd9/0x110
> [ 19.096816] ret_from_fork+0x19/0x24
> [ 19.096816] IN-SOFTIRQ-W at:
> [ 19.096816] __lock_acquire+0x55a/0x1260
> [ 19.096816] lock_acquire+0xb1/0x1c0
> [ 19.096816] _raw_spin_lock_irqsave+0x45/0x60
> [ 19.096816] ath9k_ps_wakeup+0x24/0xe0 [ath9k]
> [ 19.096816] ath9k_tasklet+0x42/0x260 [ath9k]
> [ 19.096816] tasklet_action+0x196/0x1e0
> [ 19.096816] __do_softirq+0xb0/0x430
> [ 19.096816] do_softirq_own_stack+0x33/0x40
> [ 19.096816] irq_exit+0xad/0xc0
> [ 19.096816] do_IRQ+0x65/0x120
> [ 19.096816] common_interrupt+0x36/0x3c
> [ 19.096816] get_page_from_freelist+0x20a/0x970
> [ 19.096816] __alloc_pages_nodemask+0xca/0xed0
> [ 19.096816] __get_free_pages+0x14/0x30
> [ 19.096816] pgd_alloc+0x1d/0x160
> [ 19.096816] mm_init.isra.47+0x13a/0x1b0
> [ 19.096816] copy_process.part.54+0xb55/0x1700
> [ 19.096816] _do_fork+0xd4/0x6a0
> [ 19.096816] SyS_clone+0x27/0x30
> [ 19.096816] do_fast_syscall_32+0x96/0x1d0
> [ 19.096816] entry_SYSENTER_32+0x4c/0x7b
> [ 19.096816] INITIAL USE at:
> [ 19.096816] __lock_acquire+0x204/0x1260
> [ 19.096816] lock_acquire+0xb1/0x1c0
> [ 19.096816] _raw_spin_lock_irqsave+0x45/0x60
> [ 19.096816] ath9k_ps_wakeup+0x24/0xe0 [ath9k]
> [ 19.096816] ath9k_start+0x29/0x1f0 [ath9k]
> [ 19.096816] drv_start+0x71/0x270 [mac80211]
> [ 19.096816] ieee80211_do_open+0x31f/0x920 [mac80211]
> [ 19.096816] ieee80211_open+0x41/0x50 [mac80211]
> [ 19.096816] __dev_open+0xab/0x140
> [ 19.096816] __dev_change_flags+0x89/0x150
> [ 19.096816] dev_change_flags+0x28/0x60
> [ 19.096816] do_setlink+0x290/0x890
> [ 19.096816] rtnl_newlink+0x7cf/0x8e0
> [ 19.096816] rtnetlink_rcv_msg+0xbf/0x1f0
> [ 19.096816] netlink_rcv_skb+0xb9/0xe0
> [ 19.096816] rtnetlink_rcv+0x1e/0x30
> [ 19.096816] netlink_unicast+0x13a/0x2c0
> [ 19.096816] netlink_sendmsg+0x290/0x380
> [ 19.096816] ___sys_sendmsg+0x1e2/0x280
> [ 19.096816] __sys_sendmsg+0x3f/0x80
> [ 19.096816] SyS_socketcall+0x58c/0x6b0
> [ 19.096816] do_fast_syscall_32+0x96/0x1d0
> [ 19.096816] entry_SYSENTER_32+0x4c/0x7b
> [ 19.096816] }
> [ 19.096816] ... key at: [<f837b67c>] __key.61994+0x0/0xffffc984 [ath9k]
> [ 19.096816] ... acquired at:
> [ 19.096816] check_usage_forwards+0x118/0x120
> [ 19.096816] mark_lock+0x2e4/0x590
> [ 19.096816] __lock_acquire+0x6ae/0x1260
> [ 19.096816] lock_acquire+0xb1/0x1c0
> [ 19.096816] _raw_spin_lock_irqsave+0x45/0x60
> [ 19.096816] ath_isr+0x15e/0x200 [ath9k]
> [ 19.096816] __handle_irq_event_percpu+0x44/0x340
> [ 19.096816] handle_irq_event_percpu+0x1d/0x50
> [ 19.096816] handle_irq_event+0x32/0x60
> [ 19.096816] handle_level_irq+0x81/0x100
> [ 19.096816] handle_irq+0x9c/0xd0
> [ 19.096816] do_IRQ+0x5c/0x120
> [ 19.096816] common_interrupt+0x36/0x3c
> [ 19.096816] _raw_spin_unlock_irqrestore+0x57/0x70
> [ 19.096816] ath9k_config+0x16a/0x1d0 [ath9k]
> [ 19.096816] ieee80211_hw_config+0xa8/0x5f0 [mac80211]
> [ 19.096816] ieee80211_dynamic_ps_enable_work+0x1c3/0x680 [mac80211]
> [ 19.096816] process_one_work+0x1d1/0x580
> [ 19.096816] worker_thread+0x31/0x380
> [ 19.096816] kthread+0xd9/0x110
> [ 19.096816] ret_from_fork+0x19/0x24
>
> [ 19.096816]
> stack backtrace:
> [ 19.096816] CPU: 0 PID: 5 Comm: kworker/u2:0 Not tainted 4.13.0-mgm-ovl+ #51
> [ 19.096816] Hardware name: FUJITSU SIEMENS LIFEBOOK S6120/FJNB16C, BIOS Version 1.26 05/10/2004
> [ 19.096816] Workqueue: phy0 ieee80211_dynamic_ps_enable_work [mac80211]
> [ 19.096816] Call Trace:
> [ 19.096816] <IRQ>
> [ 19.096816] dump_stack+0x16/0x19
> [ 19.096816] print_irq_inversion_bug.part.37+0x16c/0x179
> [ 19.096816] check_usage_forwards+0x118/0x120
> [ 19.096816] ? ret_from_fork+0x19/0x24
> [ 19.096816] ? print_shortest_lock_dependencies+0x1a0/0x1a0
> [ 19.096816] mark_lock+0x2e4/0x590
> [ 19.096816] ? print_shortest_lock_dependencies+0x1a0/0x1a0
> [ 19.096816] __lock_acquire+0x6ae/0x1260
> [ 19.096816] lock_acquire+0xb1/0x1c0
> [ 19.096816] ? ath_isr+0x15e/0x200 [ath9k]
> [ 19.096816] _raw_spin_lock_irqsave+0x45/0x60
> [ 19.096816] ? ath_isr+0x15e/0x200 [ath9k]
> [ 19.096816] ath_isr+0x15e/0x200 [ath9k]
> [ 19.096816] __handle_irq_event_percpu+0x44/0x340
> [ 19.096816] handle_irq_event_percpu+0x1d/0x50
> [ 19.096816] handle_irq_event+0x32/0x60
> [ 19.096816] ? handle_nested_irq+0x100/0x100
> [ 19.096816] handle_level_irq+0x81/0x100
> [ 19.096816] handle_irq+0x9c/0xd0
> [ 19.096816] </IRQ>
> [ 19.096816] do_IRQ+0x5c/0x120
> [ 19.096816] common_interrupt+0x36/0x3c
> [ 19.096816] EIP: _raw_spin_unlock_irqrestore+0x57/0x70
> [ 19.096816] EFLAGS: 00000286 CPU: 0
> [ 19.096816] EAX: f60a3600 EBX: 00000286 ECX: 00000006 EDX: 00000001
> [ 19.096816] ESI: f46c9e68 EDI: f46c8620 EBP: f60b5e8c ESP: f60b5e84
> [ 19.096816] DS: 007b ES: 007b FS: 0000 GS: 0000 SS: 0068
> [ 19.096816] ath9k_config+0x16a/0x1d0 [ath9k]
> [ 19.096816] ieee80211_hw_config+0xa8/0x5f0 [mac80211]
> [ 19.096816] ? ieee80211_hw_config+0x1db/0x5f0 [mac80211]
> [ 19.096816] ieee80211_dynamic_ps_enable_work+0x1c3/0x680 [mac80211]
> [ 19.096816] ? process_one_work+0x127/0x580
> [ 19.096816] ? process_one_work+0x127/0x580
> [ 19.096816] process_one_work+0x1d1/0x580
> [ 19.096816] ? process_one_work+0x127/0x580
> [ 19.096816] worker_thread+0x31/0x380
> [ 19.096816] kthread+0xd9/0x110
> [ 19.096816] ? process_one_work+0x580/0x580
> [ 19.096816] ? kthread_create_on_node+0x30/0x30
> [ 19.096816] ret_from_fork+0x19/0x24
>
> Cc: QCA ath9k Development <ath9k-devel@qca.qualcomm.com>
> Cc: Kalle Valo <kvalo@codeaurora.org>
> Cc: netdev@vger.kernel.org
> Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
> Signed-off-by: Kalle Valo <kvalo@qca.qualcomm.com>
Patch applied to ath-next branch of ath.git, thanks.
ba24d63dd374 ath9k: Avoid a potential deadlock
--
https://patchwork.kernel.org/patch/9957575/
https://wireless.wiki.kernel.org/en/developers/documentation/submittingpatches
^ permalink raw reply
* Re: [PATCH net-next 10/10] net: hns3: Add mqprio support when interacting with network stack
From: Yunsheng Lin @ 2017-09-25 7:22 UTC (permalink / raw)
To: Jiri Pirko
Cc: davem@davemloft.net, huangdaode, xuwei (O), Liguozhu (Kenneth),
Zhuangyuzeng (Yisen), Gabriele Paoloni, John Garry, Linuxarm,
Salil Mehta, lipeng (Y), netdev@vger.kernel.org,
linux-kernel@vger.kernel.org
In-Reply-To: <20170925065740.GB1899@nanopsycho>
Hi, Jiri
On 2017/9/25 14:57, Jiri Pirko wrote:
> Mon, Sep 25, 2017 at 02:45:08AM CEST, linyunsheng@huawei.com wrote:
>> Hi, Jiri
>>
>> On 2017/9/24 19:37, Jiri Pirko wrote:
>>> Sat, Sep 23, 2017 at 02:47:20AM CEST, linyunsheng@huawei.com wrote:
>>>> Hi, Jiri
>>>>
>>>> On 2017/9/23 0:03, Jiri Pirko wrote:
>>>>> Fri, Sep 22, 2017 at 04:11:51PM CEST, linyunsheng@huawei.com wrote:
>>>>>> Hi, Jiri
>>>>>>
>>>>>>>> - if (!tc) {
>>>>>>>> + if (if_running) {
>>>>>>>> + (void)hns3_nic_net_stop(netdev);
>>>>>>>> + msleep(100);
>>>>>>>> + }
>>>>>>>> +
>>>>>>>> + ret = (kinfo->dcb_ops && kinfo->dcb_ops->>setup_tc) ?
>>>>>>>> + kinfo->dcb_ops->setup_tc(h, tc, prio_tc) : ->EOPNOTSUPP;
>>>>>>
>>>>>>> This is most odd. Why do you call dcb_ops from >ndo_setup_tc callback?
>>>>>>> Why are you mixing this together? prio->tc mapping >can be done
>>>>>>> directly in dcbnl
>>>>>>
>>>>>> Here is what we do in dcb_ops->setup_tc:
>>>>>> Firstly, if current tc num is different from the tc num
>>>>>> that user provide, then we setup the queues for each
>>>>>> tc.
>>>>>>
>>>>>> Secondly, we tell hardware the pri to tc mapping that
>>>>>> the stack is using. In rx direction, our hardware need
>>>>>> that mapping to put different packet into different tc'
>>>>>> queues according to the priority of the packet, then
>>>>>> rss decides which specific queue in the tc should the
>>>>>> packet goto.
>>>>>>
>>>>>> By mixing, I suppose you meant why we need the
>>>>>> pri to tc infomation?
>>>>>
>>>>> by mixing, I mean what I wrote. You are calling dcb_ops callback from
>>>>> ndo_setup_tc callback. So you are mixing DCBNL subsystem and TC
>>>>> subsystem. Why? Why do you need sch_mqprio? Why DCBNL is not enough for
>>>>> all?
>>>>
>>>> When using lldptool, dcbnl is involved.
>>>>
>>>> But when using tc qdisc, dcbbl is not involved, below is the a few key
>>>> call graph in the kernel when tc qdisc cmd is executed.
>>>>
>>>> cmd:
>>>> tc qdisc add dev eth0 root handle 1:0 mqprio num_tc 4 map 1 2 3 3 1 3 1 1 hw 1
>>>>
>>>> call graph:
>>>> rtnetlink_rcv_msg -> tc_modify_qdisc -> qdisc_create -> mqprio_init ->
>>>> hns3_nic_setup_tc
>>>>
>>>> When hns3_nic_setup_tc is called, we need to know how many tc num and
>>>> prio_tc mapping from the tc_mqprio_qopt which is provided in the paramter
>>>> in the ndo_setup_tc function, and dcb_ops is the our hardware specific
>>>> method to setup the tc related parameter to the hardware, so this is why
>>>> we call dcb_ops callback in ndo_setup_tc callback.
>>>>
>>>> I hope this will answer your question, thanks for your time.
>>>
>>> Okay. I understand that you have a usecase for mqprio mapping offload
>>> without lldptool being involved. Ok. I believe it is wrong to call dcb_ops
>>> from tc callback. You should have a generic layer inside the driver and
>>> call it from both dcb_ops and tc callbacks.
>>
>> Actually, dcb_ops is our generic layer inside the driver.
>> Below is high level architecture:
>>
>> [ tc qdisc ] [ lldpad ]
>> | |
>> | |
>> | |
>> [ hns3_enet ] [ hns3_dcbnl ]
>> \ /
>> \ /
>> \ /
>> [ hclge_dcb ]
>> / \
>> / \
>> / \
>> [ hclgc_main ] [ hclge_tm ]
>>
>> hns3_enet.c implements the ndo_setup_tc callback.
>> hns3_dcbnl.c implements the dcbnl_rtnl_ops for stack's DCBNL system.
>> hclge_dcb implements the dcb_ops.
>> So we already have a generic layer that tc and dcbnl all call from.
>>
>>>
>>> Also, what happens If I run lldptool concurrently with mqprio? Who wins
>>> and is going to configure the mapping?
>>
>> Both lldptool and tc qdisc cmd use rtnl interface provided by stack, so
>> they are both protected by rtnl_lock, so we do not have to do the locking
>> in the driver.
>
> I was not asking about locking, which is obvious, I was asking about the
> behaviour. Like for example:
> If I use tc to configure some mapping, later on I run lldptool and change
> the mapping. Does the tc dump show the updated values or the original
> ones?
If it is that case, tc dump show the updated values.
Normally, we use tc qdisc to configure the netdev to use mqprio, and
then use the lldptool the tc_prio mapping, tc schedule mode, tc bandwidth
and pfc option.
if lldptool change the tc num and tc_prio mapping, it will tell the tc
system by the following function which is called in client_ops->setup_tc:
netdev_set_tc_queue
netdev_set_prio_tc_map
So lldptool and tc qdisc can work together.
>
>>
>> The locking is in rtnetlink_rcv_msg:
>>
>> rtnl_lock();
>> handlers = rtnl_dereference(rtnl_msg_handlers[family]);
>> if (handlers) {
>> doit = READ_ONCE(handlers[type].doit);
>> if (doit)
>> err = doit(skb, nlh, extack);
>> }
>> rtnl_unlock();
>>
>> Thanks.
>>
>>>
>>>
>>>>
>>>>>
>>>>>
>>>>>
>>>>>> I hope I did not misunderstand your question, thanks
>>>>>> for your time reviewing.
>>>>>
>>>>> .
>>>>>
>>>>
>>>
>>> .
>>>
>>
>
> .
>
^ permalink raw reply
* [PATCH v2 1/4] net: af_packet: use setup_timer() helper.
From: Allen Pais @ 2017-09-25 7:30 UTC (permalink / raw)
To: netdev; +Cc: davem, sameo, Allen Pais
Use setup_timer function instead of initializing timer with the
function and data fields.
Signed-off-by: Allen Pais <allen.lkml@gmail.com>
---
v2: rebased to latest net-next.
net/packet/af_packet.c | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index d288f52..b0f2218 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -544,9 +544,7 @@ static void prb_init_blk_timer(struct packet_sock *po,
struct tpacket_kbdq_core *pkc,
void (*func) (unsigned long))
{
- init_timer(&pkc->retire_blk_timer);
- pkc->retire_blk_timer.data = (long)po;
- pkc->retire_blk_timer.function = func;
+ setup_timer(&pkc->retire_blk_timer, func, (long)po);
pkc->retire_blk_timer.expires = jiffies;
}
--
2.7.4
^ permalink raw reply related
* Re: [PATCH 0/5] use setup_timer() helper function.
From: Allen @ 2017-09-25 7:30 UTC (permalink / raw)
To: David Miller; +Cc: netdev, sameo
In-Reply-To: <20170922.182231.828352137786459100.davem@davemloft.net>
>
> There was a recent change to the nfc code in net-next which causes
> your patches to not apply.
>
> Please repsin against net-next, thanks.
Sent out V2.
Thanks.
^ permalink raw reply
* [PATCH v2 2/4] net: nfc: hci: use setup_timer() helper.
From: Allen Pais @ 2017-09-25 7:30 UTC (permalink / raw)
To: netdev; +Cc: davem, sameo, Allen Pais
In-Reply-To: <1506324605-10160-1-git-send-email-allen.lkml@gmail.com>
Use setup_timer function instead of initializing timer with the
function and data fields.
Signed-off-by: Allen Pais <allen.lkml@gmail.com>
---
v2: rebased to latest net-next.
net/nfc/hci/core.c | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c
index b740fef..a8a6e78 100644
--- a/net/nfc/hci/core.c
+++ b/net/nfc/hci/core.c
@@ -1004,9 +1004,8 @@ int nfc_hci_register_device(struct nfc_hci_dev *hdev)
INIT_WORK(&hdev->msg_tx_work, nfc_hci_msg_tx_work);
- init_timer(&hdev->cmd_timer);
- hdev->cmd_timer.data = (unsigned long)hdev;
- hdev->cmd_timer.function = nfc_hci_cmd_timeout;
+ setup_timer(&hdev->cmd_timer, nfc_hci_cmd_timeout,
+ (unsigned long)hdev);
skb_queue_head_init(&hdev->rx_hcp_frags);
--
2.7.4
^ permalink raw reply related
* [PATCH v2 3/4] net: nfc: hci: llc_shdlc: use setup_timer() helper.
From: Allen Pais @ 2017-09-25 7:30 UTC (permalink / raw)
To: netdev; +Cc: davem, sameo, Allen Pais
In-Reply-To: <1506324605-10160-1-git-send-email-allen.lkml@gmail.com>
Use setup_timer function instead of initializing timer with the
function and data fields.
Signed-off-by: Allen Pais <allen.lkml@gmail.com>
---
v2: rebased to latest net-next.
net/nfc/hci/llc_shdlc.c | 15 ++++++---------
1 file changed, 6 insertions(+), 9 deletions(-)
diff --git a/net/nfc/hci/llc_shdlc.c b/net/nfc/hci/llc_shdlc.c
index 17e59a0..58df37e 100644
--- a/net/nfc/hci/llc_shdlc.c
+++ b/net/nfc/hci/llc_shdlc.c
@@ -763,17 +763,14 @@ static void *llc_shdlc_init(struct nfc_hci_dev *hdev, xmit_to_drv_t xmit_to_drv,
mutex_init(&shdlc->state_mutex);
shdlc->state = SHDLC_DISCONNECTED;
- init_timer(&shdlc->connect_timer);
- shdlc->connect_timer.data = (unsigned long)shdlc;
- shdlc->connect_timer.function = llc_shdlc_connect_timeout;
+ setup_timer(&shdlc->connect_timer, llc_shdlc_connect_timeout,
+ (unsigned long)shdlc);
- init_timer(&shdlc->t1_timer);
- shdlc->t1_timer.data = (unsigned long)shdlc;
- shdlc->t1_timer.function = llc_shdlc_t1_timeout;
+ setup_timer(&shdlc->t1_timer, llc_shdlc_t1_timeout,
+ (unsigned long)shdlc);
- init_timer(&shdlc->t2_timer);
- shdlc->t2_timer.data = (unsigned long)shdlc;
- shdlc->t2_timer.function = llc_shdlc_t2_timeout;
+ setup_timer(&shdlc->t2_timer, llc_shdlc_t2_timeout,
+ (unsigned long)shdlc);
shdlc->w = SHDLC_MAX_WINDOW;
shdlc->srej_support = SHDLC_SREJ_SUPPORT;
--
2.7.4
^ permalink raw reply related
* [PATCH v2 4/4] net: nfc: llcp_core: use setup_timer() helper.
From: Allen Pais @ 2017-09-25 7:30 UTC (permalink / raw)
To: netdev; +Cc: davem, sameo, Allen Pais
In-Reply-To: <1506324605-10160-1-git-send-email-allen.lkml@gmail.com>
Use setup_timer function instead of initializing timer with the
function and data fields.
Signed-off-by: Allen Pais <allen.lkml@gmail.com>
---
v2: rebased to latest net-next.
net/nfc/llcp_core.c | 10 ++++------
1 file changed, 4 insertions(+), 6 deletions(-)
diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c
index 02eef5c..7988185 100644
--- a/net/nfc/llcp_core.c
+++ b/net/nfc/llcp_core.c
@@ -1573,9 +1573,8 @@ int nfc_llcp_register_device(struct nfc_dev *ndev)
INIT_LIST_HEAD(&local->list);
kref_init(&local->ref);
mutex_init(&local->sdp_lock);
- init_timer(&local->link_timer);
- local->link_timer.data = (unsigned long) local;
- local->link_timer.function = nfc_llcp_symm_timer;
+ setup_timer(&local->link_timer, nfc_llcp_symm_timer,
+ (unsigned long)local);
skb_queue_head_init(&local->tx_queue);
INIT_WORK(&local->tx_work, nfc_llcp_tx_work);
@@ -1601,9 +1600,8 @@ int nfc_llcp_register_device(struct nfc_dev *ndev)
mutex_init(&local->sdreq_lock);
INIT_HLIST_HEAD(&local->pending_sdreqs);
- init_timer(&local->sdreq_timer);
- local->sdreq_timer.data = (unsigned long) local;
- local->sdreq_timer.function = nfc_llcp_sdreq_timer;
+ setup_timer(&local->sdreq_timer, nfc_llcp_sdreq_timer,
+ (unsigned long)local);
INIT_WORK(&local->sdreq_timeout_work, nfc_llcp_sdreq_timeout_work);
list_add(&local->list, &llcp_devices);
--
2.7.4
^ permalink raw reply related
* [PATCH net v2 0/4] net:ethernet:aquantia: Atlantic driver bugfixes und improvements
From: Igor Russkikh @ 2017-09-25 7:48 UTC (permalink / raw)
To: David S . Miller
Cc: netdev, David Arcari, Pavel Belous, Nadezhda Krupnina,
Simon Edelhaus, Igor Russkikh
This series contains bugfixes for aQuantia Atlantic driver.
Changes in v2:
Review comments applied:
- min_mtu set removed
- extra mtu range check is removed
- err codes handling improved
Igor Russkikh (3):
net:ethernet:aquantia: Setup max_mtu in ndev to enable jumbo frames
net:ethernet:aquantia: Fix Tx queue hangups
net:ethernet:aquantia: Fix transient invalid link down/up indications
Pavel Belous (1):
net:ethernet:atlantic: fix iommu errors
drivers/net/ethernet/aquantia/atlantic/aq_cfg.h | 4 +
drivers/net/ethernet/aquantia/atlantic/aq_nic.c | 145 ++++++++++-----------
drivers/net/ethernet/aquantia/atlantic/aq_nic.h | 2 -
drivers/net/ethernet/aquantia/atlantic/aq_ring.c | 53 ++++++--
drivers/net/ethernet/aquantia/atlantic/aq_ring.h | 10 +-
drivers/net/ethernet/aquantia/atlantic/aq_vec.c | 8 +-
.../aquantia/atlantic/hw_atl/hw_atl_b0_internal.h | 2 +-
.../aquantia/atlantic/hw_atl/hw_atl_utils.c | 3 +-
8 files changed, 129 insertions(+), 98 deletions(-)
--
2.7.4
^ permalink raw reply
* [PATCH net v2 1/4] net:ethernet:aquantia: Setup max_mtu in ndev to enable jumbo frames
From: Igor Russkikh @ 2017-09-25 7:48 UTC (permalink / raw)
To: David S . Miller
Cc: netdev, David Arcari, Pavel Belous, Nadezhda Krupnina,
Simon Edelhaus, Igor Russkikh, Pavel Belous
In-Reply-To: <cover.1506324091.git.igor.russkikh@aquantia.com>
Although hardware is capable for almost 16K MTU, without max_mtu field
correctly set it only allows standard MTU to be used.
This patch enables max MTU, calculating it from hardware maximum frame size
of 16352 octets (including FCS).
Fixes: 5513e16421cb ("net: ethernet: aquantia: Fixes for aq_ndev_change_mtu")
Signed-off-by: Pavel Belous <Pavel.Belous@aquantia.com>
Signed-off-by: Igor Russkikh <igor.russkikh@aquantia.com>
---
drivers/net/ethernet/aquantia/atlantic/aq_nic.c | 11 ++---------
.../ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h | 2 +-
2 files changed, 3 insertions(+), 10 deletions(-)
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
index 6ac9e26..bf26a59 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
@@ -214,7 +214,6 @@ struct aq_nic_s *aq_nic_alloc_cold(const struct net_device_ops *ndev_ops,
SET_NETDEV_DEV(ndev, dev);
ndev->if_port = port;
- ndev->min_mtu = ETH_MIN_MTU;
self->ndev = ndev;
self->aq_pci_func = aq_pci_func;
@@ -283,6 +282,7 @@ int aq_nic_ndev_init(struct aq_nic_s *self)
self->ndev->features = aq_hw_caps->hw_features;
self->ndev->priv_flags = aq_hw_caps->hw_priv_flags;
self->ndev->mtu = aq_nic_cfg->mtu - ETH_HLEN;
+ self->ndev->max_mtu = self->aq_hw_caps.mtu - ETH_FCS_LEN - ETH_HLEN;
return 0;
}
@@ -693,16 +693,9 @@ int aq_nic_set_multicast_list(struct aq_nic_s *self, struct net_device *ndev)
int aq_nic_set_mtu(struct aq_nic_s *self, int new_mtu)
{
- int err = 0;
-
- if (new_mtu > self->aq_hw_caps.mtu) {
- err = -EINVAL;
- goto err_exit;
- }
self->aq_nic_cfg.mtu = new_mtu;
-err_exit:
- return err;
+ return 0;
}
int aq_nic_set_mac(struct aq_nic_s *self, struct net_device *ndev)
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h
index f3957e93..fcf89e2 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h
@@ -16,7 +16,7 @@
#include "../aq_common.h"
-#define HW_ATL_B0_MTU_JUMBO (16000U)
+#define HW_ATL_B0_MTU_JUMBO 16352U
#define HW_ATL_B0_MTU 1514U
#define HW_ATL_B0_TX_RINGS 4U
--
2.7.4
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox