* [PATCH v4 net-next 11/12] gtp: Experimental support encpasulating over IPv6
From: Tom Herbert @ 2017-09-27 4:58 UTC (permalink / raw)
To: davem; +Cc: pablo, laforge, aschultz, netdev, rohit, Tom Herbert
In-Reply-To: <20170927045803.2477-1-tom@quantonium.net>
Allows using GTP datapath over IPv6. Remote peers are indicated by IPv6.
Note this is experimental, more work is needed to make this
compliant with 3GPP standard.
Signed-off-by: Tom Herbert <tom@quantonium.net>
---
drivers/net/gtp.c | 248 ++++++++++++++++++++++++++++++++++---------
include/uapi/linux/gtp.h | 1 +
include/uapi/linux/if_link.h | 3 +
3 files changed, 200 insertions(+), 52 deletions(-)
diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c
index 919ec6e14973..1c580df4cfc5 100644
--- a/drivers/net/gtp.c
+++ b/drivers/net/gtp.c
@@ -28,6 +28,7 @@
#include <net/net_namespace.h>
#include <net/protocol.h>
#include <net/ip.h>
+#include <net/ip6_tunnel.h>
#include <net/udp.h>
#include <net/udp_tunnel.h>
#include <net/icmp.h>
@@ -59,16 +60,22 @@ struct pdp_ctx {
__be16 gtp_port;
u16 ms_af;
+ u16 peer_af;
#if GTP_IPV6
union {
struct in_addr ms_addr_ip4;
struct in6_addr ms_addr_ip6;
};
+
+ union {
+ struct in_addr peer_addr_ip4;
+ struct in6_addr peer_addr_ip6;
+ };
#else
struct in_addr ms_addr_ip4;
+ struct in_addr peer_addr_ip4;
#endif
- struct in_addr peer_addr_ip4;
struct sock *sk;
struct net_device *dev;
@@ -93,8 +100,11 @@ struct gtp_dev {
struct hlist_head *tid_hash;
struct hlist_head *addr4_hash;
+
#if GTP_IPV6
struct hlist_head *addr6_hash;
+
+ unsigned int is_ipv6:1;
#endif
struct gro_cells gro_cells;
@@ -534,8 +544,6 @@ static int gtp_xmit(struct sk_buff *skb, struct net_device *dev,
{
struct iphdr *inner_iph = NULL;
struct sock *sk = pctx->sk;
- __be32 saddr = inet_sk(sk)->inet_saddr;
- struct rtable *rt;
int err = 0;
if (skb->protocol == ETH_P_IP)
@@ -548,38 +556,84 @@ static int gtp_xmit(struct sk_buff *skb, struct net_device *dev,
skb_reset_inner_headers(skb);
- /* Source address returned by route lookup is ignored since
- * we get the address from a socket.
- */
- rt = ip_tunnel_get_route(dev, skb, sk->sk_protocol,
- sk->sk_bound_dev_if, RT_CONN_FLAGS(sk),
- pctx->peer_addr_ip4.s_addr, &saddr,
- pctx->gtp_port, pctx->gtp_port,
- &pctx->dst_cache, NULL);
-
- if (IS_ERR(rt)) {
- err = PTR_ERR(rt);
- goto out_err;
- }
+ if (pctx->peer_af == AF_INET) {
+ __be32 saddr = inet_sk(sk)->inet_saddr;
+ struct rtable *rt;
+
+ /* Source address returned by route lookup is ignored since
+ * we get the address from a socket.
+ */
+ rt = ip_tunnel_get_route(dev, skb, sk->sk_protocol,
+ sk->sk_bound_dev_if, RT_CONN_FLAGS(sk),
+ pctx->peer_addr_ip4.s_addr, &saddr,
+ pctx->gtp_port, pctx->gtp_port,
+ &pctx->dst_cache, NULL);
+
+ if (IS_ERR(rt)) {
+ err = PTR_ERR(rt);
+ goto out_err;
+ }
+
+ skb_dst_drop(skb);
- skb_dst_drop(skb);
+ gtp_push_header(skb, pctx);
- gtp_push_header(skb, pctx);
+ if (inner_iph)
+ __iptunnel_update_pmtu(dev, skb, &rt->dst,
+ !!inner_iph->frag_off,
+ inner_iph, pctx->hlen,
+ pctx->peer_addr_ip4.s_addr);
- if (inner_iph)
- __iptunnel_update_pmtu(dev, skb, &rt->dst,
- !!inner_iph->frag_off,
- inner_iph, pctx->hlen,
- pctx->peer_addr_ip4.s_addr);
+ udp_tunnel_xmit_skb(rt, sk, skb, saddr,
+ pctx->peer_addr_ip4.s_addr,
+ 0, ip4_dst_hoplimit(&rt->dst), 0,
+ pctx->gtp_port, pctx->gtp_port,
+ false, false);
- udp_tunnel_xmit_skb(rt, sk, skb, saddr,
- pctx->peer_addr_ip4.s_addr,
- 0, ip4_dst_hoplimit(&rt->dst), 0,
- pctx->gtp_port, pctx->gtp_port,
- false, false);
+ netdev_dbg(dev, "gtp -> IP src: %pI4 dst: %pI4\n",
+ &saddr, &pctx->peer_addr_ip4.s_addr);
- netdev_dbg(dev, "gtp -> IP src: %pI4 dst: %pI4\n",
- &saddr, &pctx->peer_addr_ip4.s_addr);
+#if GTP_IPV6
+#if IS_ENABLED(CONFIG_IPV6)
+ } else if (pctx->peer_af == AF_INET6) {
+ struct in6_addr saddr = inet6_sk(sk)->saddr;
+ struct dst_entry *dst;
+
+ /* Source address returned by route lookup is ignored since
+ * we get the address from a socket.
+ */
+ dst = ip6_tnl_get_route(dev, skb, sk, sk->sk_protocol,
+ sk->sk_bound_dev_if, 0,
+ 0, &pctx->peer_addr_ip6, &saddr,
+ pctx->gtp_port, pctx->gtp_port,
+ &pctx->dst_cache, NULL);
+
+ if (IS_ERR(dst)) {
+ err = PTR_ERR(dst);
+ goto out_err;
+ }
+
+ skb_dst_drop(skb);
+
+ gtp_push_header(skb, pctx);
+
+ if (inner_iph)
+ __iptunnel_update_pmtu(dev, skb, dst,
+ !!inner_iph->frag_off,
+ inner_iph, pctx->hlen, 0);
+
+ udp_tunnel6_xmit_skb(dst, sk, skb, dev,
+ &saddr, &pctx->peer_addr_ip6,
+ 0, ip6_dst_hoplimit(dst), 0,
+ pctx->gtp_port, pctx->gtp_port,
+ false);
+
+ netdev_dbg(dev, "gtp -> IP src: %pI6 dst: %pI6\n",
+ &saddr, &pctx->peer_addr_ip6);
+
+#endif
+#endif
+ }
return 0;
@@ -688,7 +742,12 @@ static void gtp_link_setup(struct net_device *dev)
/* Assume largest header, ie. GTPv0. */
dev->needed_headroom = LL_MAX_HEADER +
+#if GTP_IPV6
+ max_t(int, sizeof(struct iphdr),
+ sizeof(struct ipv6hdr)) +
+#else
sizeof(struct iphdr) +
+#endif
sizeof(struct udphdr) +
sizeof(struct gtp0_header);
@@ -697,12 +756,15 @@ static void gtp_link_setup(struct net_device *dev)
static int gtp_hashtable_new(struct gtp_dev *gtp, int hsize);
static void gtp_hashtable_free(struct gtp_dev *gtp);
-static int gtp_encap_enable(struct gtp_dev *gtp, struct nlattr *data[]);
+static int gtp_encap_enable(struct gtp_dev *gtp, struct nlattr *data[],
+ bool is_ipv6);
static int gtp_newlink(struct net *src_net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[],
struct netlink_ext_ack *extack)
{
+ unsigned int role = GTP_ROLE_GGSN;
+ bool is_ipv6 = false;
struct gtp_dev *gtp;
struct gtp_net *gn;
int hashsize, err;
@@ -710,9 +772,32 @@ static int gtp_newlink(struct net *src_net, struct net_device *dev,
if (!data[IFLA_GTP_FD0] && !data[IFLA_GTP_FD1])
return -EINVAL;
+ if (data[IFLA_GTP_ROLE]) {
+ role = nla_get_u32(data[IFLA_GTP_ROLE]);
+ if (role > GTP_ROLE_SGSN)
+ return -EINVAL;
+ }
+
+ if (data[IFLA_GTP_AF]) {
+ u16 af = nla_get_u16(data[IFLA_GTP_AF]);
+
+ switch (af) {
+ case AF_INET:
+ is_ipv6 = false;
+ break;
+#if GTP_IPV6
+ case AF_INET6:
+ is_ipv6 = true;
+ break;
+#endif
+ default:
+ return -EINVAL;
+ }
+ }
+
gtp = netdev_priv(dev);
- err = gtp_encap_enable(gtp, data);
+ err = gtp_encap_enable(gtp, data, is_ipv6);
if (err < 0)
return err;
@@ -731,6 +816,11 @@ static int gtp_newlink(struct net *src_net, struct net_device *dev,
goto out_hashtable;
}
+ gtp->role = role;
+#if GTP_IPV6
+ gtp->is_ipv6 = is_ipv6;
+#endif
+
gn = net_generic(dev_net(dev), gtp_net_id);
list_add_rcu(>p->list, &gn->gtp_dev_list);
@@ -860,7 +950,8 @@ static void gtp_hashtable_free(struct gtp_dev *gtp)
}
static struct sock *gtp_encap_enable_socket(int fd, int type,
- struct gtp_dev *gtp)
+ struct gtp_dev *gtp,
+ bool is_ipv6)
{
struct udp_tunnel_sock_cfg tuncfg = {NULL};
struct socket *sock;
@@ -881,6 +972,12 @@ static struct sock *gtp_encap_enable_socket(int fd, int type,
goto out_sock;
}
+ if (sock->sk->sk_family != (is_ipv6 ? AF_INET6 : AF_INET)) {
+ pr_debug("socket fd=%d not right family\n", fd);
+ sk = ERR_PTR(-EINVAL);
+ goto out_sock;
+ }
+
if (rcu_dereference_sk_user_data(sock->sk)) {
sk = ERR_PTR(-EBUSY);
goto out_sock;
@@ -913,16 +1010,16 @@ static struct sock *gtp_encap_enable_socket(int fd, int type,
return sk;
}
-static int gtp_encap_enable(struct gtp_dev *gtp, struct nlattr *data[])
+static int gtp_encap_enable(struct gtp_dev *gtp, struct nlattr *data[],
+ bool is_ipv6)
{
- struct sock *sk1u = NULL;
- struct sock *sk0 = NULL;
- unsigned int role = GTP_ROLE_GGSN;
+ struct sock *sk0 = NULL, *sk1u = NULL;
if (data[IFLA_GTP_FD0]) {
u32 fd0 = nla_get_u32(data[IFLA_GTP_FD0]);
- sk0 = gtp_encap_enable_socket(fd0, UDP_ENCAP_GTP0, gtp);
+ sk0 = gtp_encap_enable_socket(fd0, UDP_ENCAP_GTP0, gtp,
+ is_ipv6);
if (IS_ERR(sk0))
return PTR_ERR(sk0);
}
@@ -930,7 +1027,8 @@ static int gtp_encap_enable(struct gtp_dev *gtp, struct nlattr *data[])
if (data[IFLA_GTP_FD1]) {
u32 fd1 = nla_get_u32(data[IFLA_GTP_FD1]);
- sk1u = gtp_encap_enable_socket(fd1, UDP_ENCAP_GTP1U, gtp);
+ sk1u = gtp_encap_enable_socket(fd1, UDP_ENCAP_GTP1U, gtp,
+ is_ipv6);
if (IS_ERR(sk1u)) {
if (sk0)
gtp_encap_disable_sock(sk0);
@@ -938,15 +1036,8 @@ static int gtp_encap_enable(struct gtp_dev *gtp, struct nlattr *data[])
}
}
- if (data[IFLA_GTP_ROLE]) {
- role = nla_get_u32(data[IFLA_GTP_ROLE]);
- if (role > GTP_ROLE_SGSN)
- return -EINVAL;
- }
-
gtp->sk0 = sk0;
gtp->sk1u = sk1u;
- gtp->role = role;
return 0;
}
@@ -982,8 +1073,18 @@ static void pdp_fill(struct pdp_ctx *pctx, struct genl_info *info)
__be16 default_port = 0;
pctx->gtp_version = nla_get_u32(info->attrs[GTPA_VERSION]);
- pctx->peer_addr_ip4.s_addr =
- nla_get_be32(info->attrs[GTPA_PEER_ADDRESS]);
+
+ if (info->attrs[GTPA_PEER_ADDRESS]) {
+ pctx->peer_af = AF_INET;
+ pctx->peer_addr_ip4.s_addr =
+ nla_get_in_addr(info->attrs[GTPA_PEER_ADDRESS]);
+#if GTP_IPV6
+ } else if (info->attrs[GTPA_PEER6_ADDRESS]) {
+ pctx->peer_af = AF_INET6;
+ pctx->peer_addr_ip6 = nla_get_in6_addr(
+ info->attrs[GTPA_PEER6_ADDRESS]);
+#endif
+ }
switch (pctx->gtp_version) {
case GTP_V0:
@@ -1162,11 +1263,17 @@ static int gtp_genl_new_pdp(struct sk_buff *skb, struct genl_info *info)
int err;
if (!info->attrs[GTPA_VERSION] ||
- !info->attrs[GTPA_LINK] ||
- !info->attrs[GTPA_PEER_ADDRESS])
+ !info->attrs[GTPA_LINK])
return -EINVAL;
#if GTP_IPV6
+ if (!(!!info->attrs[GTPA_PEER_ADDRESS] ^
+ !!info->attrs[GTPA_PEER6_ADDRESS])) {
+ /* Either v4 or v6 peer address must be set */
+
+ return -EINVAL;
+ }
+
if (!(!!info->attrs[GTPA_MS_ADDRESS] ^
!!info->attrs[GTPA_MS6_ADDRESS])) {
/* Either v4 or v6 mobile subscriber address must be set */
@@ -1174,6 +1281,12 @@ static int gtp_genl_new_pdp(struct sk_buff *skb, struct genl_info *info)
return -EINVAL;
}
#else
+ if (!info->attrs[GTPA_PEER_ADDRESS]) {
+ /* v4 peer address must be set */
+
+ return -EINVAL;
+ }
+
if (!info->attrs[GTPA_MS_ADDRESS]) {
/* v4 mobile subscriber address must be set */
@@ -1207,6 +1320,14 @@ static int gtp_genl_new_pdp(struct sk_buff *skb, struct genl_info *info)
goto out_unlock;
}
+#if GTP_IPV6
+ if ((info->attrs[GTPA_PEER_ADDRESS] && gtp->is_ipv6) ||
+ (info->attrs[GTPA_PEER6_ADDRESS] && !gtp->is_ipv6)) {
+ err = -EINVAL;
+ goto out_unlock;
+ }
+#endif
+
if (version == GTP_V0)
sk = gtp->sk0;
else if (version == GTP_V1)
@@ -1315,10 +1436,31 @@ static int gtp_genl_fill_info(struct sk_buff *skb, u32 snd_portid, u32 snd_seq,
if (genlh == NULL)
goto nlmsg_failure;
- if (nla_put_u32(skb, GTPA_VERSION, pctx->gtp_version) ||
- nla_put_be32(skb, GTPA_PEER_ADDRESS, pctx->peer_addr_ip4.s_addr))
+ if (nla_put_u32(skb, GTPA_VERSION, pctx->gtp_version))
goto nla_put_failure;
+ if (nla_put_u32(skb, GTPA_LINK, pctx->dev->ifindex))
+ goto nla_put_failure;
+
+ switch (pctx->peer_af) {
+ case AF_INET:
+ if (nla_put_be32(skb, GTPA_PEER_ADDRESS,
+ pctx->peer_addr_ip4.s_addr))
+ goto nla_put_failure;
+
+ break;
+#if GTP_IPV6
+ case AF_INET6:
+ if (nla_put_in6_addr(skb, GTPA_PEER6_ADDRESS,
+ &pctx->peer_addr_ip6))
+ goto nla_put_failure;
+
+ break;
+#endif
+ default:
+ goto nla_put_failure;
+ }
+
switch (pctx->ms_af) {
case AF_INET:
if (nla_put_be32(skb, GTPA_MS_ADDRESS,
@@ -1448,6 +1590,8 @@ static struct nla_policy gtp_genl_policy[GTPA_MAX + 1] = {
[GTPA_PEER_ADDRESS] = { .type = NLA_U32, },
[GTPA_MS_ADDRESS] = { .type = NLA_U32, },
#if GTP_IPV6
+ [GTPA_PEER6_ADDRESS] = { .len = FIELD_SIZEOF(struct ipv6hdr,
+ daddr) },
[GTPA_MS6_ADDRESS] = { .len = FIELD_SIZEOF(struct ipv6hdr,
daddr) },
#endif
diff --git a/include/uapi/linux/gtp.h b/include/uapi/linux/gtp.h
index ae4e632c0360..8eec519fa754 100644
--- a/include/uapi/linux/gtp.h
+++ b/include/uapi/linux/gtp.h
@@ -29,6 +29,7 @@ enum gtp_attrs {
GTPA_PAD,
GTPA_PORT,
GTPA_MS6_ADDRESS,
+ GTPA_PEER6_ADDRESS,
__GTPA_MAX,
};
#define GTPA_MAX (__GTPA_MAX + 1)
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 8d062c58d5cb..81c26864abeb 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -552,6 +552,9 @@ enum {
IFLA_GTP_FD1,
IFLA_GTP_PDP_HASHSIZE,
IFLA_GTP_ROLE,
+ IFLA_GTP_AF,
+ IFLA_GTP_PORT0,
+ IFLA_GTP_PORT1,
__IFLA_GTP_MAX,
};
#define IFLA_GTP_MAX (__IFLA_GTP_MAX - 1)
--
2.11.0
^ permalink raw reply related
* [PATCH v4 net-next 10/12] gtp: Experimental encapsulation of IPv6 packets
From: Tom Herbert @ 2017-09-27 4:58 UTC (permalink / raw)
To: davem; +Cc: pablo, laforge, aschultz, netdev, rohit, Tom Herbert
In-Reply-To: <20170927045803.2477-1-tom@quantonium.net>
Allow IPv6 mobile subscriber packets. This entails adding an IPv6 mobile
subscriber address to pdp context and IPv6 specific variants to find pdp
contexts by address.
Note that this is experimental support of IPv6, more work is
necessary to make this compliant with 3GPP standard.
Signed-off-by: Tom Herbert <tom@quantonium.net>
---
drivers/net/Kconfig | 12 +-
drivers/net/gtp.c | 324 +++++++++++++++++++++++++++++++++++++++--------
include/uapi/linux/gtp.h | 1 +
3 files changed, 280 insertions(+), 57 deletions(-)
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index d4292d56bb02..21836f657e5a 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -226,7 +226,17 @@ config GTP
3GPP TS 29.060 standards.
To compile this drivers as a module, choose M here: the module
- wil be called gtp.
+ will be called gtp.
+
+config GTP_IPV6_EXPERIMENTAL
+ bool "GTP IPv6 datapath (EXPERIMENTAL)"
+ default n
+ depends on GTP
+ ---help---
+ This is an experimental implementation that allows encapsulating
+ IPv6 over GTP and using GTP over IPv6 for testing and development
+ purpose. This is not a standards conformant implementation for
+ IPv6 and GTP. More work is needed reach that level.
config MACSEC
tristate "IEEE 802.1AE MAC-level encryption (MACsec)"
diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c
index 44844eba8df2..919ec6e14973 100644
--- a/drivers/net/gtp.c
+++ b/drivers/net/gtp.c
@@ -36,6 +36,8 @@
#include <net/netns/generic.h>
#include <net/gtp.h>
+#define GTP_IPV6 IS_ENABLED(CONFIG_GTP_IPV6_EXPERIMENTAL)
+
/* An active session for the subscriber. */
struct pdp_ctx {
struct hlist_node hlist_tid;
@@ -55,9 +57,17 @@ struct pdp_ctx {
u8 gtp_version;
u8 hlen;
__be16 gtp_port;
- u16 af;
- struct in_addr ms_addr_ip4;
+ u16 ms_af;
+#if GTP_IPV6
+ union {
+ struct in_addr ms_addr_ip4;
+ struct in6_addr ms_addr_ip6;
+ };
+#else
+ struct in_addr ms_addr_ip4;
+#endif
+
struct in_addr peer_addr_ip4;
struct sock *sk;
@@ -81,7 +91,11 @@ struct gtp_dev {
unsigned int role;
unsigned int hash_size;
struct hlist_head *tid_hash;
- struct hlist_head *addr_hash;
+
+ struct hlist_head *addr4_hash;
+#if GTP_IPV6
+ struct hlist_head *addr6_hash;
+#endif
struct gro_cells gro_cells;
};
@@ -99,6 +113,7 @@ static void pdp_context_delete(struct pdp_ctx *pctx);
static inline u32 gtp0_hashfn(u64 tid)
{
u32 *tid32 = (u32 *) &tid;
+
return jhash_2words(tid32[0], tid32[1], gtp_h_initval);
}
@@ -107,11 +122,6 @@ static inline u32 gtp1u_hashfn(u32 tid)
return jhash_1word(tid, gtp_h_initval);
}
-static inline u32 ipv4_hashfn(__be32 ip)
-{
- return jhash_1word((__force u32)ip, gtp_h_initval);
-}
-
/* Resolve a PDP context structure based on the 64bit TID. */
static struct pdp_ctx *gtp0_pdp_find(struct gtp_dev *gtp, u64 tid)
{
@@ -144,16 +154,21 @@ static struct pdp_ctx *gtp1_pdp_find(struct gtp_dev *gtp, u32 tid)
return NULL;
}
+static inline u32 gtp_ipv4_hashfn(__be32 ip)
+{
+ return jhash_1word((__force u32)ip, gtp_h_initval);
+}
+
/* Resolve a PDP context based on IPv4 address of MS. */
static struct pdp_ctx *ipv4_pdp_find(struct gtp_dev *gtp, __be32 ms_addr)
{
struct hlist_head *head;
struct pdp_ctx *pdp;
- head = >p->addr_hash[ipv4_hashfn(ms_addr) % gtp->hash_size];
+ head = >p->addr4_hash[gtp_ipv4_hashfn(ms_addr) % gtp->hash_size];
hlist_for_each_entry_rcu(pdp, head, hlist_addr) {
- if (pdp->af == AF_INET &&
+ if (pdp->ms_af == AF_INET &&
pdp->ms_addr_ip4.s_addr == ms_addr)
return pdp;
}
@@ -177,33 +192,109 @@ static bool gtp_check_ms_ipv4(struct sk_buff *skb, struct pdp_ctx *pctx,
return iph->saddr == pctx->ms_addr_ip4.s_addr;
}
+#if GTP_IPV6
+
+static inline u32 gtp_ipv6_hashfn(const struct in6_addr *a)
+{
+ return __ipv6_addr_jhash(a, gtp_h_initval);
+}
+
+/* Resolve a PDP context based on IPv6 address of MS. */
+static struct pdp_ctx *ipv6_pdp_find(struct gtp_dev *gtp,
+ const struct in6_addr *ms_addr)
+{
+ struct hlist_head *head;
+ struct pdp_ctx *pdp;
+
+ head = >p->addr6_hash[gtp_ipv6_hashfn(ms_addr) % gtp->hash_size];
+
+ hlist_for_each_entry_rcu(pdp, head, hlist_addr) {
+ if (pdp->ms_af == AF_INET6 &&
+ ipv6_addr_equal(&pdp->ms_addr_ip6, ms_addr))
+ return pdp;
+ }
+
+ return NULL;
+}
+
+static bool gtp_check_ms_ipv6(struct sk_buff *skb, struct pdp_ctx *pctx,
+ unsigned int hdrlen, unsigned int role)
+{
+ struct ipv6hdr *ipv6h;
+
+ if (!pskb_may_pull(skb, hdrlen + sizeof(struct ipv6hdr)))
+ return false;
+
+ ipv6h = (struct ipv6hdr *)(skb->data + hdrlen);
+
+ if (role == GTP_ROLE_SGSN)
+ return ipv6_addr_equal(&ipv6h->daddr, &pctx->ms_addr_ip6);
+ else
+ return ipv6_addr_equal(&ipv6h->saddr, &pctx->ms_addr_ip6);
+}
+
+#endif
+
/* Check if the inner IP address in this packet is assigned to any
* existing mobile subscriber.
*/
static bool gtp_check_ms(struct sk_buff *skb, struct pdp_ctx *pctx,
unsigned int hdrlen, unsigned int role)
{
- switch (ntohs(skb->protocol)) {
- case ETH_P_IP:
+ struct iphdr *iph;
+
+ /* Minimally there needs to be an IPv4 header */
+ if (!pskb_may_pull(skb, hdrlen + sizeof(struct iphdr)))
+ return false;
+
+ iph = (struct iphdr *)(skb->data + hdrlen);
+
+ switch (iph->version) {
+ case 4:
return gtp_check_ms_ipv4(skb, pctx, hdrlen, role);
+#if GTP_IPV6
+ case 6:
+ return gtp_check_ms_ipv6(skb, pctx, hdrlen, role);
+#endif
}
+
return false;
}
+static u16 ipver_to_eth(struct iphdr *iph)
+{
+ switch (iph->version) {
+ case 4:
+ return htons(ETH_P_IP);
+#if GTP_IPV6
+ case 6:
+ return htons(ETH_P_IPV6);
+#endif
+ default:
+ return 0;
+ }
+}
+
static int gtp_rx(struct pdp_ctx *pctx, struct sk_buff *skb,
- unsigned int hdrlen, unsigned int role)
+ unsigned int hdrlen, unsigned int role)
{
struct gtp_dev *gtp = netdev_priv(pctx->dev);
struct pcpu_sw_netstats *stats;
+ u16 inner_protocol;
if (!gtp_check_ms(skb, pctx, hdrlen, role)) {
netdev_dbg(pctx->dev, "No PDP ctx for this MS\n");
return 1;
}
+ inner_protocol = ipver_to_eth((struct iphdr *)(skb->data + hdrlen));
+ if (!inner_protocol)
+ return -1;
+
/* Get rid of the GTP + UDP headers. */
- if (iptunnel_pull_header(skb, hdrlen, skb->protocol,
- !net_eq(sock_net(pctx->sk), dev_net(pctx->dev))))
+ if (iptunnel_pull_header(skb, hdrlen, inner_protocol,
+ !net_eq(sock_net(pctx->sk),
+ dev_net(pctx->dev))))
return -1;
netdev_dbg(pctx->dev, "forwarding packet from GGSN to uplink\n");
@@ -241,7 +332,8 @@ static int gtp0_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
if (!gtp)
goto pass;
- if (!pskb_may_pull(skb, hdrlen))
+ /* Pull through IP header since gtp_rx looks at IP version */
+ if (!pskb_may_pull(skb, hdrlen + sizeof(struct iphdr)))
goto drop;
gtp0 = (struct gtp0_header *)(skb->data + sizeof(struct udphdr));
@@ -287,7 +379,8 @@ static int gtp1u_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
if (!gtp)
goto pass;
- if (!pskb_may_pull(skb, hdrlen))
+ /* Pull through IP header since gtp_rx looks at IP version */
+ if (!pskb_may_pull(skb, hdrlen + sizeof(struct iphdr)))
goto drop;
gtp1 = (struct gtp1_header *)(skb->data + sizeof(struct udphdr));
@@ -309,8 +402,10 @@ static int gtp1u_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
if (gtp1->flags & GTP1_F_MASK)
hdrlen += 4;
- /* Make sure the header is larger enough, including extensions. */
- if (!pskb_may_pull(skb, hdrlen))
+ /* Make sure the header is larger enough, including extensions and
+ * also an IP header since gtp_rx looks at IP version
+ */
+ if (!pskb_may_pull(skb, hdrlen + sizeof(struct iphdr)))
goto drop;
gtp1 = (struct gtp1_header *)(skb->data + sizeof(struct udphdr));
@@ -391,7 +486,8 @@ static inline void gtp0_push_header(struct sk_buff *skb, struct pdp_ctx *pctx)
gtp0->flags = 0x1e; /* v0, GTP-non-prime. */
gtp0->type = GTP_TPDU;
gtp0->length = htons(payload_len);
- gtp0->seq = htons((atomic_inc_return(&pctx->tx_seq) - 1) % 0xffff);
+ gtp0->seq = htons((atomic_inc_return(&pctx->tx_seq) - 1) %
+ 0xffff);
gtp0->flow = htons(pctx->u.v0.flow);
gtp0->number = 0xff;
gtp0->spare[0] = gtp0->spare[1] = gtp0->spare[2] = 0xff;
@@ -523,6 +619,25 @@ static netdev_tx_t gtp_dev_xmit(struct sk_buff *skb, struct net_device *dev)
break;
}
+#if GTP_IPV6
+ case ETH_P_IPV6: {
+ struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+
+ if (gtp->role == GTP_ROLE_SGSN)
+ pctx = ipv6_pdp_find(gtp, &ipv6h->saddr);
+ else
+ pctx = ipv6_pdp_find(gtp, &ipv6h->daddr);
+
+ if (!pctx) {
+ netdev_dbg(dev, "no PDP ctx found for %pI6, skip\n",
+ &ipv6h->daddr);
+ err = -ENOENT;
+ goto tx_err;
+ }
+
+ break;
+ }
+#endif
default:
err = -EOPNOTSUPP;
goto tx_err;
@@ -692,23 +807,38 @@ static int gtp_hashtable_new(struct gtp_dev *gtp, int hsize)
{
int i;
- gtp->addr_hash = kmalloc(sizeof(struct hlist_head) * hsize, GFP_KERNEL);
- if (gtp->addr_hash == NULL)
- return -ENOMEM;
+ gtp->addr4_hash = kmalloc_array(hsize, sizeof(*gtp->addr4_hash),
+ GFP_KERNEL);
+ if (!gtp->addr4_hash)
+ goto err;
+
+#if GTP_IPV6
+ gtp->addr6_hash = kmalloc_array(hsize, sizeof(*gtp->addr6_hash),
+ GFP_KERNEL);
+ if (!gtp->addr6_hash)
+ goto err;
+#endif
- gtp->tid_hash = kmalloc(sizeof(struct hlist_head) * hsize, GFP_KERNEL);
- if (gtp->tid_hash == NULL)
- goto err1;
+ gtp->tid_hash = kmalloc_array(hsize, sizeof(struct hlist_head),
+ GFP_KERNEL);
+ if (!gtp->tid_hash)
+ goto err;
gtp->hash_size = hsize;
for (i = 0; i < hsize; i++) {
- INIT_HLIST_HEAD(>p->addr_hash[i]);
+ INIT_HLIST_HEAD(>p->addr4_hash[i]);
+#if GTP_IPV6
+ INIT_HLIST_HEAD(>p->addr6_hash[i]);
+#endif
INIT_HLIST_HEAD(>p->tid_hash[i]);
}
return 0;
-err1:
- kfree(gtp->addr_hash);
+err:
+ kfree(gtp->addr4_hash);
+#if GTP_IPV6
+ kfree(gtp->addr6_hash);
+#endif
return -ENOMEM;
}
@@ -722,7 +852,10 @@ static void gtp_hashtable_free(struct gtp_dev *gtp)
pdp_context_delete(pctx);
synchronize_rcu();
- kfree(gtp->addr_hash);
+ kfree(gtp->addr4_hash);
+#if GTP_IPV6
+ kfree(gtp->addr6_hash);
+#endif
kfree(gtp->tid_hash);
}
@@ -844,16 +977,13 @@ static struct gtp_dev *gtp_find_dev(struct net *src_net, struct nlattr *nla[])
return gtp;
}
-static void ipv4_pdp_fill(struct pdp_ctx *pctx, struct genl_info *info)
+static void pdp_fill(struct pdp_ctx *pctx, struct genl_info *info)
{
__be16 default_port = 0;
pctx->gtp_version = nla_get_u32(info->attrs[GTPA_VERSION]);
- pctx->af = AF_INET;
pctx->peer_addr_ip4.s_addr =
nla_get_be32(info->attrs[GTPA_PEER_ADDRESS]);
- pctx->ms_addr_ip4.s_addr =
- nla_get_be32(info->attrs[GTPA_MS_ADDRESS]);
switch (pctx->gtp_version) {
case GTP_V0:
@@ -882,33 +1012,59 @@ static void ipv4_pdp_fill(struct pdp_ctx *pctx, struct genl_info *info)
pctx->gtp_port = default_port;
}
-static int ipv4_pdp_add(struct gtp_dev *gtp, struct sock *sk,
- struct genl_info *info)
+static int gtp_pdp_add(struct gtp_dev *gtp, struct sock *sk,
+ struct genl_info *info)
{
struct net_device *dev = gtp->dev;
+ struct hlist_head *addr_list;
+ struct pdp_ctx *pctx = NULL;
u32 hash_ms, hash_tid = 0;
- struct pdp_ctx *pctx;
- bool found = false;
- __be32 ms_addr;
+#if GTP_IPV6
+ struct in6_addr ms6_addr;
+#endif
+ __be32 ms_addr = 0;
+ int ms_af;
int err;
- ms_addr = nla_get_be32(info->attrs[GTPA_MS_ADDRESS]);
- hash_ms = ipv4_hashfn(ms_addr) % gtp->hash_size;
+#if GTP_IPV6
+ /* Caller ensures we have either v4 or v6 mobile subscriber address */
+ if (info->attrs[GTPA_MS_ADDRESS]) {
+ /* IPv4 mobile subscriber */
- hlist_for_each_entry_rcu(pctx, >p->addr_hash[hash_ms], hlist_addr) {
- if (pctx->ms_addr_ip4.s_addr == ms_addr) {
- found = true;
- break;
- }
+ ms_addr = nla_get_in_addr(info->attrs[GTPA_MS_ADDRESS]);
+ hash_ms = gtp_ipv4_hashfn(ms_addr) % gtp->hash_size;
+ addr_list = >p->addr4_hash[hash_ms];
+ ms_af = AF_INET;
+
+ pctx = ipv4_pdp_find(gtp, ms_addr);
+ } else {
+ /* IPv6 mobile subscriber */
+
+ ms6_addr = nla_get_in6_addr(info->attrs[GTPA_MS6_ADDRESS]);
+ hash_ms = gtp_ipv6_hashfn(&ms6_addr) % gtp->hash_size;
+ addr_list = >p->addr6_hash[hash_ms];
+ ms_af = AF_INET6;
+
+ pctx = ipv6_pdp_find(gtp, &ms6_addr);
}
+#else
+ /* IPv4 mobile subscriber */
- if (found) {
+ ms_addr = nla_get_in_addr(info->attrs[GTPA_MS_ADDRESS]);
+ hash_ms = gtp_ipv4_hashfn(ms_addr) % gtp->hash_size;
+ addr_list = >p->addr4_hash[hash_ms];
+ ms_af = AF_INET;
+
+ pctx = ipv4_pdp_find(gtp, ms_addr);
+#endif
+
+ if (pctx) {
if (info->nlhdr->nlmsg_flags & NLM_F_EXCL)
return -EEXIST;
if (info->nlhdr->nlmsg_flags & NLM_F_REPLACE)
return -EOPNOTSUPP;
- ipv4_pdp_fill(pctx, info);
+ pdp_fill(pctx, info);
if (pctx->gtp_version == GTP_V0)
netdev_dbg(dev, "GTPv0-U: update tunnel id = %llx (pdp %p)\n",
@@ -934,7 +1090,20 @@ static int ipv4_pdp_add(struct gtp_dev *gtp, struct sock *sk,
sock_hold(sk);
pctx->sk = sk;
pctx->dev = gtp->dev;
- ipv4_pdp_fill(pctx, info);
+ pctx->ms_af = ms_af;
+
+ switch (ms_af) {
+ case AF_INET:
+ pctx->ms_addr_ip4.s_addr = ms_addr;
+ break;
+#if GTP_IPV6
+ case AF_INET6:
+ pctx->ms_addr_ip6 = ms6_addr;
+ break;
+#endif
+ }
+
+ pdp_fill(pctx, info);
atomic_set(&pctx->tx_seq, 0);
switch (pctx->gtp_version) {
@@ -951,7 +1120,7 @@ static int ipv4_pdp_add(struct gtp_dev *gtp, struct sock *sk,
break;
}
- hlist_add_head_rcu(&pctx->hlist_addr, >p->addr_hash[hash_ms]);
+ hlist_add_head_rcu(&pctx->hlist_addr, addr_list);
hlist_add_head_rcu(&pctx->hlist_tid, >p->tid_hash[hash_tid]);
switch (pctx->gtp_version) {
@@ -993,11 +1162,25 @@ static int gtp_genl_new_pdp(struct sk_buff *skb, struct genl_info *info)
int err;
if (!info->attrs[GTPA_VERSION] ||
- !info->attrs[GTPA_LINK] ||
- !info->attrs[GTPA_PEER_ADDRESS] ||
- !info->attrs[GTPA_MS_ADDRESS])
+ !info->attrs[GTPA_LINK] ||
+ !info->attrs[GTPA_PEER_ADDRESS])
return -EINVAL;
+#if GTP_IPV6
+ if (!(!!info->attrs[GTPA_MS_ADDRESS] ^
+ !!info->attrs[GTPA_MS6_ADDRESS])) {
+ /* Either v4 or v6 mobile subscriber address must be set */
+
+ return -EINVAL;
+ }
+#else
+ if (!info->attrs[GTPA_MS_ADDRESS]) {
+ /* v4 mobile subscriber address must be set */
+
+ return -EINVAL;
+ }
+#endif
+
version = nla_get_u32(info->attrs[GTPA_VERSION]);
switch (version) {
@@ -1036,7 +1219,7 @@ static int gtp_genl_new_pdp(struct sk_buff *skb, struct genl_info *info)
goto out_unlock;
}
- err = ipv4_pdp_add(gtp, sk, info);
+ err = gtp_pdp_add(gtp, sk, info);
out_unlock:
rcu_read_unlock();
@@ -1056,6 +1239,13 @@ static struct pdp_ctx *gtp_find_pdp_by_link(struct net *net,
__be32 ip = nla_get_be32(nla[GTPA_MS_ADDRESS]);
return ipv4_pdp_find(gtp, ip);
+#if GTP_IPV6
+ } else if (nla[GTPA_MS6_ADDRESS]) {
+ struct in6_addr ip6 =
+ nla_get_in6_addr(nla[GTPA_MS6_ADDRESS]);
+
+ return ipv6_pdp_find(gtp, &ip6);
+#endif
} else if (nla[GTPA_VERSION]) {
u32 gtp_version = nla_get_u32(nla[GTPA_VERSION]);
@@ -1126,9 +1316,27 @@ static int gtp_genl_fill_info(struct sk_buff *skb, u32 snd_portid, u32 snd_seq,
goto nlmsg_failure;
if (nla_put_u32(skb, GTPA_VERSION, pctx->gtp_version) ||
- nla_put_be32(skb, GTPA_PEER_ADDRESS, pctx->peer_addr_ip4.s_addr) ||
- nla_put_be32(skb, GTPA_MS_ADDRESS, pctx->ms_addr_ip4.s_addr))
+ nla_put_be32(skb, GTPA_PEER_ADDRESS, pctx->peer_addr_ip4.s_addr))
+ goto nla_put_failure;
+
+ switch (pctx->ms_af) {
+ case AF_INET:
+ if (nla_put_be32(skb, GTPA_MS_ADDRESS,
+ pctx->ms_addr_ip4.s_addr))
+ goto nla_put_failure;
+
+ break;
+#if GTP_IPV6
+ case AF_INET6:
+ if (nla_put_in6_addr(skb, GTPA_MS6_ADDRESS,
+ &pctx->ms_addr_ip6))
+ goto nla_put_failure;
+
+ break;
+#endif
+ default:
goto nla_put_failure;
+ }
switch (pctx->gtp_version) {
case GTP_V0:
@@ -1239,6 +1447,10 @@ static struct nla_policy gtp_genl_policy[GTPA_MAX + 1] = {
[GTPA_TID] = { .type = NLA_U64, },
[GTPA_PEER_ADDRESS] = { .type = NLA_U32, },
[GTPA_MS_ADDRESS] = { .type = NLA_U32, },
+#if GTP_IPV6
+ [GTPA_MS6_ADDRESS] = { .len = FIELD_SIZEOF(struct ipv6hdr,
+ daddr) },
+#endif
[GTPA_FLOW] = { .type = NLA_U16, },
[GTPA_NET_NS_FD] = { .type = NLA_U32, },
[GTPA_I_TEI] = { .type = NLA_U32, },
diff --git a/include/uapi/linux/gtp.h b/include/uapi/linux/gtp.h
index b2283a5c6d7f..ae4e632c0360 100644
--- a/include/uapi/linux/gtp.h
+++ b/include/uapi/linux/gtp.h
@@ -28,6 +28,7 @@ enum gtp_attrs {
GTPA_O_TEI, /* for GTPv1 only */
GTPA_PAD,
GTPA_PORT,
+ GTPA_MS6_ADDRESS,
__GTPA_MAX,
};
#define GTPA_MAX (__GTPA_MAX + 1)
--
2.11.0
^ permalink raw reply related
* [PATCH v4 net-next 09/12] gtp: Eliminate pktinfo and add port configuration
From: Tom Herbert @ 2017-09-27 4:58 UTC (permalink / raw)
To: davem; +Cc: pablo, laforge, aschultz, netdev, rohit, Tom Herbert
In-Reply-To: <20170927045803.2477-1-tom@quantonium.net>
The gtp pktinfo structure is unnecessary and needs a lot of code to
manage it. Remove it. Also, add per pdp port configuration for transmit.
Signed-off-by: Tom Herbert <tom@quantonium.net>
---
drivers/net/gtp.c | 177 +++++++++++++++++++++--------------------------
include/uapi/linux/gtp.h | 1 +
2 files changed, 80 insertions(+), 98 deletions(-)
diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c
index bbb08f8849d3..44844eba8df2 100644
--- a/drivers/net/gtp.c
+++ b/drivers/net/gtp.c
@@ -54,6 +54,7 @@ struct pdp_ctx {
} u;
u8 gtp_version;
u8 hlen;
+ __be16 gtp_port;
u16 af;
struct in_addr ms_addr_ip4;
@@ -420,73 +421,36 @@ static inline void gtp1_push_header(struct sk_buff *skb, struct pdp_ctx *pctx)
*/
}
-struct gtp_pktinfo {
- struct sock *sk;
- struct iphdr *iph;
- struct flowi4 fl4;
- struct rtable *rt;
- struct pdp_ctx *pctx;
- struct net_device *dev;
- __be16 gtph_port;
-};
-
-static void gtp_push_header(struct sk_buff *skb, struct gtp_pktinfo *pktinfo)
+static void gtp_push_header(struct sk_buff *skb, struct pdp_ctx *pctx)
{
- switch (pktinfo->pctx->gtp_version) {
+ switch (pctx->gtp_version) {
case GTP_V0:
- pktinfo->gtph_port = htons(GTP0_PORT);
- gtp0_push_header(skb, pktinfo->pctx);
+ gtp0_push_header(skb, pctx);
break;
case GTP_V1:
- pktinfo->gtph_port = htons(GTP1U_PORT);
- gtp1_push_header(skb, pktinfo->pctx);
+ gtp1_push_header(skb, pctx);
break;
}
}
-static inline void gtp_set_pktinfo_ipv4(struct gtp_pktinfo *pktinfo,
- struct sock *sk, struct iphdr *iph,
- struct pdp_ctx *pctx, struct rtable *rt,
- struct flowi4 *fl4,
- struct net_device *dev)
-{
- pktinfo->sk = sk;
- pktinfo->iph = iph;
- pktinfo->pctx = pctx;
- pktinfo->rt = rt;
- pktinfo->fl4 = *fl4;
- pktinfo->dev = dev;
-}
-
-static int gtp_build_skb_ip4(struct sk_buff *skb, struct net_device *dev,
- struct gtp_pktinfo *pktinfo)
+static int gtp_xmit(struct sk_buff *skb, struct net_device *dev,
+ struct pdp_ctx *pctx)
{
- struct gtp_dev *gtp = netdev_priv(dev);
- struct pdp_ctx *pctx;
+ struct iphdr *inner_iph = NULL;
+ struct sock *sk = pctx->sk;
+ __be32 saddr = inet_sk(sk)->inet_saddr;
struct rtable *rt;
- struct flowi4 fl4;
- struct iphdr *iph;
- struct sock *sk;
- __be32 saddr;
+ int err = 0;
- /* Read the IP destination address and resolve the PDP context.
- * Prepend PDP header with TEI/TID from PDP ctx.
- */
- iph = ip_hdr(skb);
- if (gtp->role == GTP_ROLE_SGSN)
- pctx = ipv4_pdp_find(gtp, iph->saddr);
- else
- pctx = ipv4_pdp_find(gtp, iph->daddr);
+ if (skb->protocol == ETH_P_IP)
+ inner_iph = ip_hdr(skb);
- if (!pctx) {
- netdev_dbg(dev, "no PDP ctx found for %pI4, skip\n",
- &iph->daddr);
- return -ENOENT;
- }
- netdev_dbg(dev, "found PDP context %p\n", pctx);
+ /* Ensure there is sufficient headroom. */
+ err = skb_cow_head(skb, dev->needed_headroom);
+ if (unlikely(err))
+ goto out_err;
- sk = pctx->sk;
- saddr = inet_sk(sk)->inet_saddr;
+ skb_reset_inner_headers(skb);
/* Source address returned by route lookup is ignored since
* we get the address from a socket.
@@ -494,81 +458,89 @@ static int gtp_build_skb_ip4(struct sk_buff *skb, struct net_device *dev,
rt = ip_tunnel_get_route(dev, skb, sk->sk_protocol,
sk->sk_bound_dev_if, RT_CONN_FLAGS(sk),
pctx->peer_addr_ip4.s_addr, &saddr,
- pktinfo->gtph_port, pktinfo->gtph_port,
+ pctx->gtp_port, pctx->gtp_port,
&pctx->dst_cache, NULL);
if (IS_ERR(rt)) {
- if (rt == ERR_PTR(-ELOOP)) {
- netdev_dbg(dev, "circular route to SSGN %pI4\n",
- &pctx->peer_addr_ip4.s_addr);
- dev->stats.collisions++;
- goto err_rt;
- } else {
- netdev_dbg(dev, "no route to SSGN %pI4\n",
- &pctx->peer_addr_ip4.s_addr);
- dev->stats.tx_carrier_errors++;
- goto err;
- }
+ err = PTR_ERR(rt);
+ goto out_err;
}
skb_dst_drop(skb);
- gtp_set_pktinfo_ipv4(pktinfo, sk, iph, pctx, rt, &fl4, dev);
- gtp_push_header(skb, pktinfo);
+ gtp_push_header(skb, pctx);
+
+ if (inner_iph)
+ __iptunnel_update_pmtu(dev, skb, &rt->dst,
+ !!inner_iph->frag_off,
+ inner_iph, pctx->hlen,
+ pctx->peer_addr_ip4.s_addr);
- __iptunnel_update_pmtu(dev, skb, &rt->dst, !!iph->frag_off, iph,
- pctx->hlen, pctx->peer_addr_ip4.s_addr);
+ udp_tunnel_xmit_skb(rt, sk, skb, saddr,
+ pctx->peer_addr_ip4.s_addr,
+ 0, ip4_dst_hoplimit(&rt->dst), 0,
+ pctx->gtp_port, pctx->gtp_port,
+ false, false);
+
+ netdev_dbg(dev, "gtp -> IP src: %pI4 dst: %pI4\n",
+ &saddr, &pctx->peer_addr_ip4.s_addr);
return 0;
-err_rt:
- ip_rt_put(rt);
-err:
- return -EBADMSG;
+
+out_err:
+ if (err == -ELOOP)
+ dev->stats.collisions++;
+ else
+ dev->stats.tx_carrier_errors++;
+
+ return err;
}
static netdev_tx_t gtp_dev_xmit(struct sk_buff *skb, struct net_device *dev)
{
unsigned int proto = ntohs(skb->protocol);
- struct gtp_pktinfo pktinfo;
+ struct gtp_dev *gtp = netdev_priv(dev);
+ struct pdp_ctx *pctx;
int err;
- /* Ensure there is sufficient headroom. */
- if (skb_cow_head(skb, dev->needed_headroom))
- goto tx_err;
-
- skb_reset_inner_headers(skb);
-
/* PDP context lookups in gtp_build_skb_*() need rcu read-side lock. */
rcu_read_lock();
switch (proto) {
- case ETH_P_IP:
- err = gtp_build_skb_ip4(skb, dev, &pktinfo);
+ case ETH_P_IP: {
+ struct iphdr *iph = ip_hdr(skb);
+
+ if (gtp->role == GTP_ROLE_SGSN)
+ pctx = ipv4_pdp_find(gtp, iph->saddr);
+ else
+ pctx = ipv4_pdp_find(gtp, iph->daddr);
+
+ if (!pctx) {
+ netdev_dbg(dev, "no PDP ctx found for %pI4, skip\n",
+ &iph->daddr);
+ err = -ENOENT;
+ goto tx_err;
+ }
+
break;
+ }
default:
err = -EOPNOTSUPP;
- break;
+ goto tx_err;
}
- rcu_read_unlock();
+
+ netdev_dbg(dev, "found PDP context %p\n", pctx);
+
+ err = gtp_xmit(skb, dev, pctx);
if (err < 0)
goto tx_err;
- switch (proto) {
- case ETH_P_IP:
- netdev_dbg(pktinfo.dev, "gtp -> IP src: %pI4 dst: %pI4\n",
- &pktinfo.iph->saddr, &pktinfo.iph->daddr);
- udp_tunnel_xmit_skb(pktinfo.rt, pktinfo.sk, skb,
- pktinfo.fl4.saddr, pktinfo.fl4.daddr,
- pktinfo.iph->tos,
- ip4_dst_hoplimit(&pktinfo.rt->dst),
- 0,
- pktinfo.gtph_port, pktinfo.gtph_port,
- true, false);
- break;
- }
+ rcu_read_unlock();
return NETDEV_TX_OK;
+
tx_err:
+ rcu_read_unlock();
dev->stats.tx_errors++;
dev_kfree_skb(skb);
return NETDEV_TX_OK;
@@ -874,6 +846,8 @@ static struct gtp_dev *gtp_find_dev(struct net *src_net, struct nlattr *nla[])
static void ipv4_pdp_fill(struct pdp_ctx *pctx, struct genl_info *info)
{
+ __be16 default_port = 0;
+
pctx->gtp_version = nla_get_u32(info->attrs[GTPA_VERSION]);
pctx->af = AF_INET;
pctx->peer_addr_ip4.s_addr =
@@ -890,15 +864,22 @@ static void ipv4_pdp_fill(struct pdp_ctx *pctx, struct genl_info *info)
pctx->u.v0.tid = nla_get_u64(info->attrs[GTPA_TID]);
pctx->u.v0.flow = nla_get_u16(info->attrs[GTPA_FLOW]);
pctx->hlen = sizeof(struct udphdr) + sizeof(struct gtp0_header);
+ default_port = htons(GTP0_PORT);
break;
case GTP_V1:
pctx->u.v1.i_tei = nla_get_u32(info->attrs[GTPA_I_TEI]);
pctx->u.v1.o_tei = nla_get_u32(info->attrs[GTPA_O_TEI]);
pctx->hlen = sizeof(struct udphdr) + sizeof(struct gtp1_header);
+ default_port = htons(GTP1U_PORT);
break;
default:
break;
}
+
+ if (info->attrs[GTPA_PORT])
+ pctx->gtp_port = nla_get_u16(info->attrs[GTPA_PORT]);
+ else
+ pctx->gtp_port = default_port;
}
static int ipv4_pdp_add(struct gtp_dev *gtp, struct sock *sk,
diff --git a/include/uapi/linux/gtp.h b/include/uapi/linux/gtp.h
index 57d1edb8efd9..b2283a5c6d7f 100644
--- a/include/uapi/linux/gtp.h
+++ b/include/uapi/linux/gtp.h
@@ -27,6 +27,7 @@ enum gtp_attrs {
GTPA_I_TEI, /* for GTPv1 only */
GTPA_O_TEI, /* for GTPv1 only */
GTPA_PAD,
+ GTPA_PORT,
__GTPA_MAX,
};
#define GTPA_MAX (__GTPA_MAX + 1)
--
2.11.0
^ permalink raw reply related
* [PATCH v4 net-next 08/12] gtp: Call function to update path mtu
From: Tom Herbert @ 2017-09-27 4:57 UTC (permalink / raw)
To: davem; +Cc: pablo, laforge, aschultz, netdev, rohit, Tom Herbert
In-Reply-To: <20170927045803.2477-1-tom@quantonium.net>
Replace mtu handling with call to __iptunnel_update_pmtu.
Signed-off-by: Tom Herbert <tom@quantonium.net>
---
drivers/net/gtp.c | 36 ++++++------------------------------
1 file changed, 6 insertions(+), 30 deletions(-)
diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c
index a6e2e0a1f424..bbb08f8849d3 100644
--- a/drivers/net/gtp.c
+++ b/drivers/net/gtp.c
@@ -53,6 +53,7 @@ struct pdp_ctx {
} v1;
} u;
u8 gtp_version;
+ u8 hlen;
u16 af;
struct in_addr ms_addr_ip4;
@@ -467,8 +468,6 @@ static int gtp_build_skb_ip4(struct sk_buff *skb, struct net_device *dev,
struct iphdr *iph;
struct sock *sk;
__be32 saddr;
- __be16 df;
- int mtu;
/* Read the IP destination address and resolve the PDP context.
* Prepend PDP header with TEI/TID from PDP ctx.
@@ -514,37 +513,12 @@ static int gtp_build_skb_ip4(struct sk_buff *skb, struct net_device *dev,
skb_dst_drop(skb);
- /* This is similar to tnl_update_pmtu(). */
- df = iph->frag_off;
- if (df) {
- mtu = dst_mtu(&rt->dst) - dev->hard_header_len -
- sizeof(struct iphdr) - sizeof(struct udphdr);
- switch (pctx->gtp_version) {
- case GTP_V0:
- mtu -= sizeof(struct gtp0_header);
- break;
- case GTP_V1:
- mtu -= sizeof(struct gtp1_header);
- break;
- }
- } else {
- mtu = dst_mtu(&rt->dst);
- }
-
- rt->dst.ops->update_pmtu(&rt->dst, NULL, skb, mtu);
-
- if (!skb_is_gso(skb) && (iph->frag_off & htons(IP_DF)) &&
- mtu < ntohs(iph->tot_len)) {
- netdev_dbg(dev, "packet too big, fragmentation needed\n");
- memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
- icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
- htonl(mtu));
- goto err_rt;
- }
-
gtp_set_pktinfo_ipv4(pktinfo, sk, iph, pctx, rt, &fl4, dev);
gtp_push_header(skb, pktinfo);
+ __iptunnel_update_pmtu(dev, skb, &rt->dst, !!iph->frag_off, iph,
+ pctx->hlen, pctx->peer_addr_ip4.s_addr);
+
return 0;
err_rt:
ip_rt_put(rt);
@@ -915,10 +889,12 @@ static void ipv4_pdp_fill(struct pdp_ctx *pctx, struct genl_info *info)
*/
pctx->u.v0.tid = nla_get_u64(info->attrs[GTPA_TID]);
pctx->u.v0.flow = nla_get_u16(info->attrs[GTPA_FLOW]);
+ pctx->hlen = sizeof(struct udphdr) + sizeof(struct gtp0_header);
break;
case GTP_V1:
pctx->u.v1.i_tei = nla_get_u32(info->attrs[GTPA_I_TEI]);
pctx->u.v1.o_tei = nla_get_u32(info->attrs[GTPA_O_TEI]);
+ pctx->hlen = sizeof(struct udphdr) + sizeof(struct gtp1_header);
break;
default:
break;
--
2.11.0
^ permalink raw reply related
* [PATCH v4 net-next 07/12] gtp: udp recv clean up
From: Tom Herbert @ 2017-09-27 4:57 UTC (permalink / raw)
To: davem; +Cc: pablo, laforge, aschultz, netdev, rohit, Tom Herbert
In-Reply-To: <20170927045803.2477-1-tom@quantonium.net>
Create separate UDP receive functions for GTP version 0 and version 1.
Set encap_rcv appropriately when configuring a socket.
Signed-off-by: Tom Herbert <tom@quantonium.net>
---
drivers/net/gtp.c | 100 ++++++++++++++++++++++++++----------------------------
1 file changed, 49 insertions(+), 51 deletions(-)
diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c
index 00e5ea5cb935..a6e2e0a1f424 100644
--- a/drivers/net/gtp.c
+++ b/drivers/net/gtp.c
@@ -225,14 +225,20 @@ static int gtp_rx(struct pdp_ctx *pctx, struct sk_buff *skb,
return 0;
}
-/* 1 means pass up to the stack, -1 means drop and 0 means decapsulated. */
-static int gtp0_udp_encap_recv(struct gtp_dev *gtp, struct sk_buff *skb)
+/* UDP encapsulation receive handler for GTPv0-U . See net/ipv4/udp.c.
+ * Return codes: 0: success, <0: error, >0: pass up to userspace UDP socket.
+ */
+static int gtp0_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
{
+ struct gtp_dev *gtp = rcu_dereference_sk_user_data(sk);
unsigned int hdrlen = sizeof(struct udphdr) +
sizeof(struct gtp0_header);
struct gtp0_header *gtp0;
struct pdp_ctx *pctx;
+ if (!gtp)
+ goto pass;
+
if (!pskb_may_pull(skb, hdrlen))
goto drop;
@@ -244,26 +250,41 @@ static int gtp0_udp_encap_recv(struct gtp_dev *gtp, struct sk_buff *skb)
if (gtp0->type != GTP_TPDU)
goto pass;
+ netdev_dbg(gtp->dev, "received GTP0 packet\n");
+
pctx = gtp0_pdp_find(gtp, be64_to_cpu(gtp0->tid));
if (!pctx) {
netdev_dbg(gtp->dev, "No PDP ctx to decap skb=%p\n", skb);
goto pass;
}
- return gtp_rx(pctx, skb, hdrlen, gtp->role);
+ if (!gtp_rx(pctx, skb, hdrlen, gtp->role)) {
+ /* Successfully received */
+ return 0;
+ }
+
drop:
- return -1;
+ kfree_skb(skb);
+ return 0;
+
pass:
return 1;
}
-static int gtp1u_udp_encap_recv(struct gtp_dev *gtp, struct sk_buff *skb)
+/* UDP encapsulation receive handler for GTPv0-U . See net/ipv4/udp.c.
+ * Return codes: 0: success, <0: error, >0: pass up to userspace UDP socket.
+ */
+static int gtp1u_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
{
+ struct gtp_dev *gtp = rcu_dereference_sk_user_data(sk);
unsigned int hdrlen = sizeof(struct udphdr) +
sizeof(struct gtp1_header);
struct gtp1_header *gtp1;
struct pdp_ctx *pctx;
+ if (!gtp)
+ goto pass;
+
if (!pskb_may_pull(skb, hdrlen))
goto drop;
@@ -275,6 +296,8 @@ static int gtp1u_udp_encap_recv(struct gtp_dev *gtp, struct sk_buff *skb)
if (gtp1->type != GTP_TPDU)
goto pass;
+ netdev_dbg(gtp->dev, "received GTP1 packet\n");
+
/* From 29.060: "This field shall be present if and only if any one or
* more of the S, PN and E flags are set.".
*
@@ -296,9 +319,15 @@ static int gtp1u_udp_encap_recv(struct gtp_dev *gtp, struct sk_buff *skb)
goto drop;
}
- return gtp_rx(pctx, skb, hdrlen, gtp->role);
+ if (!gtp_rx(pctx, skb, hdrlen, gtp->role)) {
+ /* Successfully received */
+ return 0;
+ }
+
drop:
- return -1;
+ kfree_skb(skb);
+ return 0;
+
pass:
return 1;
}
@@ -329,49 +358,6 @@ static void gtp_encap_disable(struct gtp_dev *gtp)
gtp_encap_disable_sock(gtp->sk1u);
}
-/* UDP encapsulation receive handler. See net/ipv4/udp.c.
- * Return codes: 0: success, <0: error, >0: pass up to userspace UDP socket.
- */
-static int gtp_encap_recv(struct sock *sk, struct sk_buff *skb)
-{
- struct gtp_dev *gtp;
- int ret = 0;
-
- gtp = rcu_dereference_sk_user_data(sk);
- if (!gtp)
- return 1;
-
- netdev_dbg(gtp->dev, "encap_recv sk=%p\n", sk);
-
- switch (udp_sk(sk)->encap_type) {
- case UDP_ENCAP_GTP0:
- netdev_dbg(gtp->dev, "received GTP0 packet\n");
- ret = gtp0_udp_encap_recv(gtp, skb);
- break;
- case UDP_ENCAP_GTP1U:
- netdev_dbg(gtp->dev, "received GTP1U packet\n");
- ret = gtp1u_udp_encap_recv(gtp, skb);
- break;
- default:
- ret = -1; /* Shouldn't happen. */
- }
-
- switch (ret) {
- case 1:
- netdev_dbg(gtp->dev, "pass up to the process\n");
- break;
- case 0:
- break;
- case -1:
- netdev_dbg(gtp->dev, "GTP packet has been dropped\n");
- kfree_skb(skb);
- ret = 0;
- break;
- }
-
- return ret;
-}
-
static int gtp_dev_init(struct net_device *dev)
{
struct gtp_dev *gtp = netdev_priv(dev);
@@ -824,9 +810,21 @@ static struct sock *gtp_encap_enable_socket(int fd, int type,
sk = sock->sk;
sock_hold(sk);
+ switch (type) {
+ case UDP_ENCAP_GTP0:
+ tuncfg.encap_rcv = gtp0_udp_encap_recv;
+ break;
+ case UDP_ENCAP_GTP1U:
+ tuncfg.encap_rcv = gtp1u_udp_encap_recv;
+ break;
+ default:
+ pr_debug("Unknown encap type %u\n", type);
+ sk = ERR_PTR(-EINVAL);
+ goto out_sock;
+ }
+
tuncfg.sk_user_data = gtp;
tuncfg.encap_type = type;
- tuncfg.encap_rcv = gtp_encap_recv;
tuncfg.encap_destroy = gtp_encap_destroy;
setup_udp_tunnel_sock(sock_net(sock->sk), sock, &tuncfg);
--
2.11.0
^ permalink raw reply related
* [PATCH v4 net-next 06/12] gtp: Use goto for exceptions in gtp_udp_encap_recv funcs
From: Tom Herbert @ 2017-09-27 4:57 UTC (permalink / raw)
To: davem; +Cc: pablo, laforge, aschultz, netdev, rohit, Tom Herbert
In-Reply-To: <20170927045803.2477-1-tom@quantonium.net>
Consolidate return logic to make it easier to extend.
Signed-off-by: Tom Herbert <tom@quantonium.net>
---
drivers/net/gtp.c | 26 +++++++++++++++++---------
1 file changed, 17 insertions(+), 9 deletions(-)
diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c
index f2aac5d01143..00e5ea5cb935 100644
--- a/drivers/net/gtp.c
+++ b/drivers/net/gtp.c
@@ -234,23 +234,27 @@ static int gtp0_udp_encap_recv(struct gtp_dev *gtp, struct sk_buff *skb)
struct pdp_ctx *pctx;
if (!pskb_may_pull(skb, hdrlen))
- return -1;
+ goto drop;
gtp0 = (struct gtp0_header *)(skb->data + sizeof(struct udphdr));
if ((gtp0->flags >> 5) != GTP_V0)
- return 1;
+ goto pass;
if (gtp0->type != GTP_TPDU)
- return 1;
+ goto pass;
pctx = gtp0_pdp_find(gtp, be64_to_cpu(gtp0->tid));
if (!pctx) {
netdev_dbg(gtp->dev, "No PDP ctx to decap skb=%p\n", skb);
- return 1;
+ goto pass;
}
return gtp_rx(pctx, skb, hdrlen, gtp->role);
+drop:
+ return -1;
+pass:
+ return 1;
}
static int gtp1u_udp_encap_recv(struct gtp_dev *gtp, struct sk_buff *skb)
@@ -261,15 +265,15 @@ static int gtp1u_udp_encap_recv(struct gtp_dev *gtp, struct sk_buff *skb)
struct pdp_ctx *pctx;
if (!pskb_may_pull(skb, hdrlen))
- return -1;
+ goto drop;
gtp1 = (struct gtp1_header *)(skb->data + sizeof(struct udphdr));
if ((gtp1->flags >> 5) != GTP_V1)
- return 1;
+ goto pass;
if (gtp1->type != GTP_TPDU)
- return 1;
+ goto pass;
/* From 29.060: "This field shall be present if and only if any one or
* more of the S, PN and E flags are set.".
@@ -282,17 +286,21 @@ static int gtp1u_udp_encap_recv(struct gtp_dev *gtp, struct sk_buff *skb)
/* Make sure the header is larger enough, including extensions. */
if (!pskb_may_pull(skb, hdrlen))
- return -1;
+ goto drop;
gtp1 = (struct gtp1_header *)(skb->data + sizeof(struct udphdr));
pctx = gtp1_pdp_find(gtp, ntohl(gtp1->tid));
if (!pctx) {
netdev_dbg(gtp->dev, "No PDP ctx to decap skb=%p\n", skb);
- return 1;
+ goto drop;
}
return gtp_rx(pctx, skb, hdrlen, gtp->role);
+drop:
+ return -1;
+pass:
+ return 1;
}
static void gtp_encap_destroy(struct sock *sk)
--
2.11.0
^ permalink raw reply related
* [PATCH v4 net-next 05/12] gtp: Change to use gro_cells
From: Tom Herbert @ 2017-09-27 4:57 UTC (permalink / raw)
To: davem; +Cc: pablo, laforge, aschultz, netdev, rohit, Tom Herbert
In-Reply-To: <20170927045803.2477-1-tom@quantonium.net>
Call gro_cells_receive instead of netif_rx.
Signed-off-by: Tom Herbert <tom@quantonium.net>
---
drivers/net/Kconfig | 1 +
drivers/net/gtp.c | 11 ++++++++++-
2 files changed, 11 insertions(+), 1 deletion(-)
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index aba0d652095b..d4292d56bb02 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -214,6 +214,7 @@ config GTP
tristate "GPRS Tunneling Protocol datapath (GTP-U)"
depends on INET && NET_UDP_TUNNEL
select NET_IP_TUNNEL
+ select GRO_CELLS
---help---
This allows one to create gtp virtual interfaces that provide
the GPRS Tunneling Protocol datapath (GTP-U). This tunneling protocol
diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c
index 6dabd605607c..f2aac5d01143 100644
--- a/drivers/net/gtp.c
+++ b/drivers/net/gtp.c
@@ -80,6 +80,8 @@ struct gtp_dev {
unsigned int hash_size;
struct hlist_head *tid_hash;
struct hlist_head *addr_hash;
+
+ struct gro_cells gro_cells;
};
static unsigned int gtp_net_id __read_mostly;
@@ -189,6 +191,7 @@ static bool gtp_check_ms(struct sk_buff *skb, struct pdp_ctx *pctx,
static int gtp_rx(struct pdp_ctx *pctx, struct sk_buff *skb,
unsigned int hdrlen, unsigned int role)
{
+ struct gtp_dev *gtp = netdev_priv(pctx->dev);
struct pcpu_sw_netstats *stats;
if (!gtp_check_ms(skb, pctx, hdrlen, role)) {
@@ -217,7 +220,8 @@ static int gtp_rx(struct pdp_ctx *pctx, struct sk_buff *skb,
stats->rx_bytes += skb->len;
u64_stats_update_end(&stats->syncp);
- netif_rx(skb);
+ gro_cells_receive(>p->gro_cells, skb);
+
return 0;
}
@@ -611,6 +615,8 @@ static const struct net_device_ops gtp_netdev_ops = {
static void gtp_link_setup(struct net_device *dev)
{
+ struct gtp_dev *gtp = netdev_priv(dev);
+
dev->netdev_ops = >p_netdev_ops;
dev->needs_free_netdev = true;
@@ -630,6 +636,8 @@ static void gtp_link_setup(struct net_device *dev)
sizeof(struct iphdr) +
sizeof(struct udphdr) +
sizeof(struct gtp0_header);
+
+ gro_cells_init(>p->gro_cells, dev);
}
static int gtp_hashtable_new(struct gtp_dev *gtp, int hsize);
@@ -686,6 +694,7 @@ static void gtp_dellink(struct net_device *dev, struct list_head *head)
{
struct gtp_dev *gtp = netdev_priv(dev);
+ gro_cells_destroy(>p->gro_cells);
gtp_encap_disable(gtp);
gtp_hashtable_free(gtp);
list_del_rcu(>p->list);
--
2.11.0
^ permalink raw reply related
* [PATCH v4 net-next 04/12] iptunnel: Generalize tunnel update pmtu
From: Tom Herbert @ 2017-09-27 4:57 UTC (permalink / raw)
To: davem; +Cc: pablo, laforge, aschultz, netdev, rohit, Tom Herbert
In-Reply-To: <20170927045803.2477-1-tom@quantonium.net>
Add __iptunnel_update_pmtu exported function which does not take
an iptunnel argument but instead includes the fields from the
iptunnel structure as arguments which are needed in the function.
iptunnel_update_pmtu was modified to call __iptunnel_update_pmtu.
Signed-off-by: Tom Herbert <tom@quantonium.net>
---
include/net/ip_tunnels.h | 4 ++++
net/ipv4/ip_tunnel.c | 30 ++++++++++++++++++++----------
2 files changed, 24 insertions(+), 10 deletions(-)
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 2b05ae24f4f6..be675836d35a 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -317,6 +317,10 @@ static inline struct rtable *ip_tunnel_get_route(struct net_device *dev,
dst_cache, use_cache);
}
+int __iptunnel_update_pmtu(struct net_device *dev, struct sk_buff *skb,
+ struct dst_entry *dst, __be16 df,
+ const struct iphdr *inner_iph, int hlen, u32 daddr);
+
struct ip_tunnel_encap_ops {
size_t (*encap_hlen)(struct ip_tunnel_encap *e);
int (*build_header)(struct sk_buff *skb, struct ip_tunnel_encap *e,
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index c97525d8dff9..0ee84ca76e0f 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -506,17 +506,16 @@ int ip_tunnel_encap_setup(struct ip_tunnel *t,
}
EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
-static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
- struct rtable *rt, __be16 df,
- const struct iphdr *inner_iph)
+int __iptunnel_update_pmtu(struct net_device *dev, struct sk_buff *skb,
+ struct dst_entry *dst, __be16 df,
+ const struct iphdr *inner_iph, int hlen, u32 daddr)
{
- struct ip_tunnel *tunnel = netdev_priv(dev);
- int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
+ int pkt_size = skb->len - hlen - dev->hard_header_len;
int mtu;
if (df)
- mtu = dst_mtu(&rt->dst) - dev->hard_header_len
- - sizeof(struct iphdr) - tunnel->hlen;
+ mtu = dst_mtu(dst) - dev->hard_header_len
+ - sizeof(struct iphdr) - hlen;
else
mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
@@ -538,8 +537,7 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
mtu >= IPV6_MIN_MTU) {
- if ((tunnel->parms.iph.daddr &&
- !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
+ if ((daddr && !ipv4_is_multicast(daddr)) ||
rt6->rt6i_dst.plen == 128) {
rt6->rt6i_flags |= RTF_MODIFIED;
dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
@@ -555,6 +553,17 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
#endif
return 0;
}
+EXPORT_SYMBOL(__iptunnel_update_pmtu);
+
+static int iptunnel_update_pmtu(struct net_device *dev, struct sk_buff *skb,
+ struct rtable *rt, __be16 df,
+ const struct iphdr *inner_iph)
+{
+ struct ip_tunnel *tunnel = netdev_priv(dev);
+
+ return __iptunnel_update_pmtu(dev, skb, &rt->dst, df, inner_iph,
+ tunnel->hlen, tunnel->parms.iph.daddr);
+}
void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto)
{
@@ -739,7 +748,8 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
goto tx_error;
}
- if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off, inner_iph)) {
+ if (iptunnel_update_pmtu(dev, skb, rt, tnl_params->frag_off,
+ inner_iph)) {
ip_rt_put(rt);
goto tx_error;
}
--
2.11.0
^ permalink raw reply related
* [PATCH v4 net-next 03/12] gtp: Call common functions to get tunnel routes and add dst_cache
From: Tom Herbert @ 2017-09-27 4:57 UTC (permalink / raw)
To: davem; +Cc: pablo, laforge, aschultz, netdev, rohit, Tom Herbert
In-Reply-To: <20170927045803.2477-1-tom@quantonium.net>
Call ip_tunnel_get_route and dst_cache to pdp context which should
improve performance by obviating the need to perform a route lookup
on every packet.
Signed-off-by: Tom Herbert <tom@quantonium.net>
---
drivers/net/gtp.c | 62 +++++++++++++++++++++++++++++++------------------------
1 file changed, 35 insertions(+), 27 deletions(-)
diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c
index f38e32a7ec9c..6dabd605607c 100644
--- a/drivers/net/gtp.c
+++ b/drivers/net/gtp.c
@@ -63,6 +63,8 @@ struct pdp_ctx {
atomic_t tx_seq;
struct rcu_head rcu_head;
+
+ struct dst_cache dst_cache;
};
/* One instance of the GTP device. */
@@ -379,20 +381,6 @@ static void gtp_dev_uninit(struct net_device *dev)
free_percpu(dev->tstats);
}
-static struct rtable *ip4_route_output_gtp(struct flowi4 *fl4,
- const struct sock *sk,
- __be32 daddr)
-{
- memset(fl4, 0, sizeof(*fl4));
- fl4->flowi4_oif = sk->sk_bound_dev_if;
- fl4->daddr = daddr;
- fl4->saddr = inet_sk(sk)->inet_saddr;
- fl4->flowi4_tos = RT_CONN_FLAGS(sk);
- fl4->flowi4_proto = sk->sk_protocol;
-
- return ip_route_output_key(sock_net(sk), fl4);
-}
-
static inline void gtp0_push_header(struct sk_buff *skb, struct pdp_ctx *pctx)
{
int payload_len = skb->len;
@@ -479,6 +467,8 @@ static int gtp_build_skb_ip4(struct sk_buff *skb, struct net_device *dev,
struct rtable *rt;
struct flowi4 fl4;
struct iphdr *iph;
+ struct sock *sk;
+ __be32 saddr;
__be16 df;
int mtu;
@@ -498,19 +488,30 @@ static int gtp_build_skb_ip4(struct sk_buff *skb, struct net_device *dev,
}
netdev_dbg(dev, "found PDP context %p\n", pctx);
- rt = ip4_route_output_gtp(&fl4, pctx->sk, pctx->peer_addr_ip4.s_addr);
- if (IS_ERR(rt)) {
- netdev_dbg(dev, "no route to SSGN %pI4\n",
- &pctx->peer_addr_ip4.s_addr);
- dev->stats.tx_carrier_errors++;
- goto err;
- }
+ sk = pctx->sk;
+ saddr = inet_sk(sk)->inet_saddr;
- if (rt->dst.dev == dev) {
- netdev_dbg(dev, "circular route to SSGN %pI4\n",
- &pctx->peer_addr_ip4.s_addr);
- dev->stats.collisions++;
- goto err_rt;
+ /* Source address returned by route lookup is ignored since
+ * we get the address from a socket.
+ */
+ rt = ip_tunnel_get_route(dev, skb, sk->sk_protocol,
+ sk->sk_bound_dev_if, RT_CONN_FLAGS(sk),
+ pctx->peer_addr_ip4.s_addr, &saddr,
+ pktinfo->gtph_port, pktinfo->gtph_port,
+ &pctx->dst_cache, NULL);
+
+ if (IS_ERR(rt)) {
+ if (rt == ERR_PTR(-ELOOP)) {
+ netdev_dbg(dev, "circular route to SSGN %pI4\n",
+ &pctx->peer_addr_ip4.s_addr);
+ dev->stats.collisions++;
+ goto err_rt;
+ } else {
+ netdev_dbg(dev, "no route to SSGN %pI4\n",
+ &pctx->peer_addr_ip4.s_addr);
+ dev->stats.tx_carrier_errors++;
+ goto err;
+ }
}
skb_dst_drop(skb);
@@ -543,7 +544,7 @@ static int gtp_build_skb_ip4(struct sk_buff *skb, struct net_device *dev,
goto err_rt;
}
- gtp_set_pktinfo_ipv4(pktinfo, pctx->sk, iph, pctx, rt, &fl4, dev);
+ gtp_set_pktinfo_ipv4(pktinfo, sk, iph, pctx, rt, &fl4, dev);
gtp_push_header(skb, pktinfo);
return 0;
@@ -917,6 +918,7 @@ static int ipv4_pdp_add(struct gtp_dev *gtp, struct sock *sk,
struct pdp_ctx *pctx;
bool found = false;
__be32 ms_addr;
+ int err;
ms_addr = nla_get_be32(info->attrs[GTPA_MS_ADDRESS]);
hash_ms = ipv4_hashfn(ms_addr) % gtp->hash_size;
@@ -951,6 +953,12 @@ static int ipv4_pdp_add(struct gtp_dev *gtp, struct sock *sk,
if (pctx == NULL)
return -ENOMEM;
+ err = dst_cache_init(&pctx->dst_cache, GFP_KERNEL);
+ if (err) {
+ kfree(pctx);
+ return err;
+ }
+
sock_hold(sk);
pctx->sk = sk;
pctx->dev = gtp->dev;
--
2.11.0
^ permalink raw reply related
* [PATCH v4 net-next 02/12] vxlan: Call common functions to get tunnel routes
From: Tom Herbert @ 2017-09-27 4:57 UTC (permalink / raw)
To: davem; +Cc: pablo, laforge, aschultz, netdev, rohit, Tom Herbert
In-Reply-To: <20170927045803.2477-1-tom@quantonium.net>
Call ip_tunnel_get_route and ip6_tnl_get_route to handle getting a route
and dealing with the dst_cache.
Signed-off-by: Tom Herbert <tom@quantonium.net>
---
drivers/net/vxlan.c | 84 ++++-------------------------------------------------
1 file changed, 5 insertions(+), 79 deletions(-)
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index d7c49cf1d5e9..810caa9adf37 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -1867,47 +1867,11 @@ static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan, struct net_device
struct dst_cache *dst_cache,
const struct ip_tunnel_info *info)
{
- bool use_cache = ip_tunnel_dst_cache_usable(skb, info);
- struct rtable *rt = NULL;
- struct flowi4 fl4;
-
if (!sock4)
return ERR_PTR(-EIO);
- if (tos && !info)
- use_cache = false;
- if (use_cache) {
- rt = dst_cache_get_ip4(dst_cache, saddr);
- if (rt)
- return rt;
- }
-
- memset(&fl4, 0, sizeof(fl4));
- fl4.flowi4_oif = oif;
- fl4.flowi4_tos = RT_TOS(tos);
- fl4.flowi4_mark = skb->mark;
- fl4.flowi4_proto = IPPROTO_UDP;
- fl4.daddr = daddr;
- fl4.saddr = *saddr;
- fl4.fl4_dport = dport;
- fl4.fl4_sport = sport;
-
- rt = ip_route_output_key(vxlan->net, &fl4);
- if (likely(!IS_ERR(rt))) {
- if (rt->dst.dev == dev) {
- netdev_dbg(dev, "circular route to %pI4\n", &daddr);
- ip_rt_put(rt);
- return ERR_PTR(-ELOOP);
- }
-
- *saddr = fl4.saddr;
- if (use_cache)
- dst_cache_set_ip4(dst_cache, &rt->dst, fl4.saddr);
- } else {
- netdev_dbg(dev, "no route to %pI4\n", &daddr);
- return ERR_PTR(-ENETUNREACH);
- }
- return rt;
+ return ip_tunnel_get_route(dev, skb, IPPROTO_UDP, oif, tos, daddr,
+ saddr, dport, sport, dst_cache, info);
}
#if IS_ENABLED(CONFIG_IPV6)
@@ -1922,50 +1886,12 @@ static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan,
struct dst_cache *dst_cache,
const struct ip_tunnel_info *info)
{
- bool use_cache = ip_tunnel_dst_cache_usable(skb, info);
- struct dst_entry *ndst;
- struct flowi6 fl6;
- int err;
-
if (!sock6)
return ERR_PTR(-EIO);
- if (tos && !info)
- use_cache = false;
- if (use_cache) {
- ndst = dst_cache_get_ip6(dst_cache, saddr);
- if (ndst)
- return ndst;
- }
-
- memset(&fl6, 0, sizeof(fl6));
- fl6.flowi6_oif = oif;
- fl6.daddr = *daddr;
- fl6.saddr = *saddr;
- fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tos), label);
- fl6.flowi6_mark = skb->mark;
- fl6.flowi6_proto = IPPROTO_UDP;
- fl6.fl6_dport = dport;
- fl6.fl6_sport = sport;
-
- err = ipv6_stub->ipv6_dst_lookup(vxlan->net,
- sock6->sock->sk,
- &ndst, &fl6);
- if (unlikely(err < 0)) {
- netdev_dbg(dev, "no route to %pI6\n", daddr);
- return ERR_PTR(-ENETUNREACH);
- }
-
- if (unlikely(ndst->dev == dev)) {
- netdev_dbg(dev, "circular route to %pI6\n", daddr);
- dst_release(ndst);
- return ERR_PTR(-ELOOP);
- }
-
- *saddr = fl6.saddr;
- if (use_cache)
- dst_cache_set_ip6(dst_cache, ndst, saddr);
- return ndst;
+ return ip6_tnl_get_route(dev, skb, sock6->sock->sk, IPPROTO_UDP, oif,
+ tos, label, daddr, saddr, dport, sport,
+ dst_cache, info);
}
#endif
--
2.11.0
^ permalink raw reply related
* [PATCH v4 net-next 01/12] iptunnel: Add common functions to get a tunnel route
From: Tom Herbert @ 2017-09-27 4:57 UTC (permalink / raw)
To: davem; +Cc: pablo, laforge, aschultz, netdev, rohit, Tom Herbert
In-Reply-To: <20170927045803.2477-1-tom@quantonium.net>
ip_tunnel_get_route and ip6_tnl_get_route are created to return
routes for a tunnel. These functions are derived from the VXLAN
functions.
Signed-off-by: Tom Herbert <tom@quantonium.net>
---
include/net/ip6_tunnel.h | 33 +++++++++++++++++++++++++++++++++
include/net/ip_tunnels.h | 32 ++++++++++++++++++++++++++++++++
net/ipv4/ip_tunnel.c | 40 ++++++++++++++++++++++++++++++++++++++++
net/ipv6/route.c | 42 ++++++++++++++++++++++++++++++++++++++++++
4 files changed, 147 insertions(+)
diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h
index 08fbc7f7d8d7..f84325aacdaf 100644
--- a/include/net/ip6_tunnel.h
+++ b/include/net/ip6_tunnel.h
@@ -142,6 +142,39 @@ __u32 ip6_tnl_get_cap(struct ip6_tnl *t, const struct in6_addr *laddr,
struct net *ip6_tnl_get_link_net(const struct net_device *dev);
int ip6_tnl_get_iflink(const struct net_device *dev);
int ip6_tnl_change_mtu(struct net_device *dev, int new_mtu);
+struct dst_entry *__ip6_tnl_get_route(struct net_device *dev,
+ struct sk_buff *skb, struct sock *sk,
+ u8 proto, int oif, u8 tos, __be32 label,
+ const struct in6_addr *daddr,
+ struct in6_addr *saddr,
+ __be16 dport, __be16 sport,
+ struct dst_cache *dst_cache,
+ bool use_cache);
+
+static inline struct dst_entry *ip6_tnl_get_route(struct net_device *dev,
+ struct sk_buff *skb, struct sock *sk, u8 proto,
+ int oif, u8 tos, __be32 label,
+ const struct in6_addr *daddr,
+ struct in6_addr *saddr,
+ __be16 dport, __be16 sport,
+ struct dst_cache *dst_cache,
+ const struct ip_tunnel_info *info)
+{
+ bool use_cache = (ip_tunnel_dst_cache_usable(skb, info) &&
+ (!tos || info));
+
+#if IS_ENABLED(CONFIG_IPV6)
+ if (use_cache) {
+ struct dst_entry *ndst = dst_cache_get_ip6(dst_cache, saddr);
+
+ if (ndst)
+ return ndst;
+ }
+#endif
+
+ return __ip6_tnl_get_route(dev, skb, sk, proto, oif, tos, label, daddr,
+ saddr, dport, sport, dst_cache, use_cache);
+}
static inline void ip6tunnel_xmit(struct sock *sk, struct sk_buff *skb,
struct net_device *dev)
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index b41a1e057fce..2b05ae24f4f6 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -285,6 +285,38 @@ int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
struct ip_tunnel_parm *p, __u32 fwmark);
void ip_tunnel_setup(struct net_device *dev, unsigned int net_id);
+struct rtable *__ip_tunnel_get_route(struct net_device *dev,
+ struct sk_buff *skb, u8 proto,
+ int oif, u8 tos,
+ __be32 daddr, __be32 *saddr,
+ __be16 dport, __be16 sport,
+ struct dst_cache *dst_cache,
+ bool use_cache);
+
+static inline struct rtable *ip_tunnel_get_route(struct net_device *dev,
+ struct sk_buff *skb, u8 proto,
+ int oif, u8 tos,
+ __be32 daddr, __be32 *saddr,
+ __be16 dport, __be16 sport,
+ struct dst_cache *dst_cache,
+ const struct ip_tunnel_info *info)
+{
+ bool use_cache = (ip_tunnel_dst_cache_usable(skb, info) &&
+ (!tos || info));
+
+ if (use_cache) {
+ struct rtable *rt;
+
+ rt = dst_cache_get_ip4(dst_cache, saddr);
+ if (rt)
+ return rt;
+ }
+
+ return __ip_tunnel_get_route(dev, skb, proto, oif, tos,
+ daddr, saddr, dport, sport,
+ dst_cache, use_cache);
+}
+
struct ip_tunnel_encap_ops {
size_t (*encap_hlen)(struct ip_tunnel_encap *e);
int (*build_header)(struct sk_buff *skb, struct ip_tunnel_encap *e,
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index fe6fee728ce4..c97525d8dff9 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -935,6 +935,46 @@ int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
}
EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
+struct rtable *__ip_tunnel_get_route(struct net_device *dev,
+ struct sk_buff *skb, u8 proto,
+ int oif, u8 tos,
+ __be32 daddr, __be32 *saddr,
+ __be16 dport, __be16 sport,
+ struct dst_cache *dst_cache,
+ bool use_cache)
+{
+ struct rtable *rt = NULL;
+ struct flowi4 fl4;
+
+ memset(&fl4, 0, sizeof(fl4));
+ fl4.flowi4_oif = oif;
+ fl4.flowi4_tos = RT_TOS(tos);
+ fl4.flowi4_mark = skb->mark;
+ fl4.flowi4_proto = proto;
+ fl4.daddr = daddr;
+ fl4.saddr = *saddr;
+ fl4.fl4_dport = dport;
+ fl4.fl4_sport = sport;
+
+ rt = ip_route_output_key(dev_net(dev), &fl4);
+ if (likely(!IS_ERR(rt))) {
+ if (rt->dst.dev == dev) {
+ netdev_dbg(dev, "circular route to %pI4\n", &daddr);
+ ip_rt_put(rt);
+ return ERR_PTR(-ELOOP);
+ }
+
+ *saddr = fl4.saddr;
+ if (use_cache)
+ dst_cache_set_ip4(dst_cache, &rt->dst, fl4.saddr);
+ } else {
+ netdev_dbg(dev, "no route to %pI4\n", &daddr);
+ return ERR_PTR(-ENETUNREACH);
+ }
+ return rt;
+}
+EXPORT_SYMBOL_GPL(__ip_tunnel_get_route);
+
int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 26cc9f483b6d..57cb8649b4eb 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1884,6 +1884,48 @@ static struct rt6_info *ip6_nh_lookup_table(struct net *net,
return rt;
}
+struct dst_entry *__ip6_tnl_get_route(struct net_device *dev,
+ struct sk_buff *skb, struct sock *sk,
+ u8 proto, int oif, u8 tos, __be32 label,
+ const struct in6_addr *daddr,
+ struct in6_addr *saddr,
+ __be16 dport, __be16 sport,
+ struct dst_cache *dst_cache,
+ bool use_cache)
+{
+ struct dst_entry *ndst;
+ struct flowi6 fl6;
+ int err;
+
+ memset(&fl6, 0, sizeof(fl6));
+ fl6.flowi6_oif = oif;
+ fl6.daddr = *daddr;
+ fl6.saddr = *saddr;
+ fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tos), label);
+ fl6.flowi6_mark = skb->mark;
+ fl6.flowi6_proto = proto;
+ fl6.fl6_dport = dport;
+ fl6.fl6_sport = sport;
+
+ err = ipv6_stub->ipv6_dst_lookup(dev_net(dev), sk, &ndst, &fl6);
+ if (unlikely(err < 0)) {
+ netdev_dbg(dev, "no route to %pI6\n", daddr);
+ return ERR_PTR(-ENETUNREACH);
+ }
+
+ if (unlikely(ndst->dev == dev)) {
+ netdev_dbg(dev, "circular route to %pI6\n", daddr);
+ dst_release(ndst);
+ return ERR_PTR(-ELOOP);
+ }
+
+ *saddr = fl6.saddr;
+ if (use_cache)
+ dst_cache_set_ip6(dst_cache, ndst, saddr);
+ return ndst;
+}
+EXPORT_SYMBOL_GPL(__ip6_tnl_get_route);
+
static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
struct netlink_ext_ack *extack)
{
--
2.11.0
^ permalink raw reply related
* [PATCH v4 net-next 00/12] gtp: Additional feature support - Part I
From: Tom Herbert @ 2017-09-27 4:57 UTC (permalink / raw)
To: davem; +Cc: pablo, laforge, aschultz, netdev, rohit, Tom Herbert
This patch set builds upon the initial GTP implementation to make
support closer to that enjoyed by other encapsulation protocols.
The major items are:
- Experimental IPv6 support
- Configurable networking interfaces so that GTP kernel can be
used and tested without needing GSN network emulation (i.e. no user
space daemon needed).
- Addition of a dst_cache in the GTP structure and other cleanup
Additionally, this patch set also includes:
- Common functions to get a route fo for an IP tunnel
For IPv6 support, the mobile subscriber needs to allow IPv6 addresses,
and the remote endpoint can be IPv6.
For configurable interfaces, configuration is added to allow an
alternate means to configure a GTP and device. This follows the
typical UDP encapsulation model of specifying a listener port for
receive, and a remote address and port for transmit.
Configuration is performed by iproute2/ip. I will post that
in a subsequent patch set.
Tested:
Configured the matrix of IPv4/IPv6 mobile subscriber, IPv4/IPv6 remote
peer, and GTP version 0 and 1 (eight combinations). Observed
connectivity and functional netperf. Also, tested VXLAN for
regression.
Test using openggs with ggsn and kernel module on one side and
emulated sgsn on the other. Observed connectivity and
functional netperf.
v2:
- Split the original patch to post in parts in order to make
review more manageable
- Make IPv6 support experimental with a configuration option for it
- Prepend hash functions with gtp
- Generalize iptunnel update path MTU function and call it from gtp
instead using custom code
- Split original patch cleaning up udp_recv into several for easier
review
v3: Properly include netdev on cc
v4:
- Move __ip6_tnl_get_route to ipv6/route.c to avoid creting dependency on ip6_tunnel
- Add "select GRO_CELLS" fo Kconfig for GTP
Tom Herbert (12):
iptunnel: Add common functions to get a tunnel route
vxlan: Call common functions to get tunnel routes
gtp: Call common functions to get tunnel routes and add dst_cache
iptunnel: Generalize tunnel update pmtu
gtp: Change to use gro_cells
gtp: Use goto for exceptions in gtp_udp_encap_recv funcs
gtp: udp recv clean up
gtp: Call function to update path mtu
gtp: Eliminate pktinfo and add port configuration
gtp: Experimental encapsulation of IPv6 packets
gtp: Experimental support encpasulating over IPv6
gtp: Allow configuring GTP interface as standalone
drivers/net/Kconfig | 13 +-
drivers/net/gtp.c | 1043 ++++++++++++++++++++++++++++++------------
drivers/net/vxlan.c | 84 +---
include/net/ip6_tunnel.h | 33 ++
include/net/ip_tunnels.h | 36 ++
include/uapi/linux/gtp.h | 8 +
include/uapi/linux/if_link.h | 3 +
net/ipv4/ip_tunnel.c | 70 ++-
net/ipv6/route.c | 42 ++
9 files changed, 945 insertions(+), 387 deletions(-)
--
2.11.0
^ permalink raw reply
* Re: [PATCH] net: stmmac: Meet alignment requirements for DMA
From: David Miller @ 2017-09-27 4:53 UTC (permalink / raw)
To: paul.burton
Cc: matt.redfearn, netdev, alexandre.torgue, peppe.cavallaro,
linux-kernel, linux-mips, james.hogan
In-Reply-To: <2520219.WSsBr6LeCR@np-p-burton>
From: Paul Burton <paul.burton@imgtec.com>
Date: Tue, 26 Sep 2017 21:30:56 -0700
> Nobody said that you are required to do anything, I suggested that
> it would be beneficial if you were to suggest a change to the
> documented DMA API such that it allows your usage where it currently
> does not.
Documentation is often wrong and it is here. What 200+ drivers
actually do and depend upon trumps a simple text document.
The requirement is that the memory remains quiescent on the cpu side
while the device messes with it. And that this quiescence requirement
may or may not be on a cache line basis.
There is absolutely no requirement that the buffers themselves are
cache line aligned.
In fact, receive buffers for networking are intentionally 2-byte
aligned in order for the ipv4 headers to be naturally 32-bit aligned.
Cache line aligning receive buffers will actually make some
architectures trap because of the bad alignment.
So see, this cache line alignment requirement is pure madness from
just about any perspective whatsoever.
^ permalink raw reply
* Re: [PATCH v2 16/16] net: Add support for networking over Thunderbolt cable
From: David Miller @ 2017-09-27 4:47 UTC (permalink / raw)
To: mika.westerberg
Cc: gregkh, andreas.noever, michael.jamet, yehezkel.bernat,
amir.jer.levy, Mario.Limonciello, lukas, andriy.shevchenko,
andrew, linux-kernel, netdev
In-Reply-To: <20170925110738.68382-17-mika.westerberg@linux.intel.com>
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Mon, 25 Sep 2017 14:07:38 +0300
> +struct thunderbolt_ip_header {
> + u32 route_hi;
> + u32 route_lo;
> + u32 length_sn;
> + uuid_t uuid;
> + uuid_t initiator_uuid;
> + uuid_t target_uuid;
> + u32 type;
> + u32 command_id;
> +} __packed;
Again, the __packed attribute should not be necessary and needs to be
removed.
> +static void tbnet_pull_tail(struct sk_buff *skb)
> +{
> + skb_frag_t *frag = &skb_shinfo(skb)->frags[0];
> + unsigned int pull_len;
> + void *hdr;
> +
> + hdr = skb_frag_address(frag);
> + pull_len = eth_get_headlen(hdr, TBNET_RX_HDR_SIZE);
> +
> + /* Align pull length to size of long to optimize memcpy performance */
> + skb_copy_to_linear_data(skb, hdr, ALIGN(pull_len, sizeof(long)));
You do not need to copy here, instead you can build SKB's where the
skb->data points directly at the head of your first frag page memory.
See build_skb().
> + skb = net->skb;
> + if (!skb) {
> + skb = netdev_alloc_skb_ip_align(net->dev,
> + TBNET_RX_HDR_SIZE);
> + net->skb = skb;
> + }
> + if (!skb)
> + break;
> +
> + /* Single small buffer we can copy directly to the
> + * header part of the skb.
> + */
> + if (hdr->frame_count == 1 && frame_size <= TBNET_RX_HDR_SIZE) {
Here you would use build_skb() instead of netdev_alloc_skb*() for the first
frag, and keep the existing code tacking on subsequent frags using
skb_add_Rx_frag().
> + ret = register_netdev(dev);
> + if (ret) {
> + free_netdev(dev);
> + return ret;
> + }
> +
> + net->handler.uuid = &tbnet_svc_uuid;
> + net->handler.callback = tbnet_handle_packet,
> + net->handler.data = net;
> + tb_register_protocol_handler(&net->handler);
> +
> + tb_service_set_drvdata(svc, net);
There could be races here.
At the exact moment you call register_netdev(), your device can be
brought UP, packets transmitted, etc. You entire set of driver code
paths can be executed.
The rest of those initializations after register_netdev() probably
are needed by the rest of the driver to function properly, so may
need to happen before register_netdev() publishes the device to the
entire world.
^ permalink raw reply
* Re: [PATCH v2 06/16] thunderbolt: Add support for XDomain discovery protocol
From: David Miller @ 2017-09-27 4:35 UTC (permalink / raw)
To: mika.westerberg
Cc: gregkh, andreas.noever, michael.jamet, yehezkel.bernat,
amir.jer.levy, Mario.Limonciello, lukas, andriy.shevchenko,
andrew, linux-kernel, netdev
In-Reply-To: <20170925110738.68382-7-mika.westerberg@linux.intel.com>
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Mon, 25 Sep 2017 14:07:28 +0300
> +struct icm_fr_event_xdomain_connected {
> + struct icm_pkg_header hdr;
> + u16 reserved;
> + u16 link_info;
> + uuid_t remote_uuid;
> + uuid_t local_uuid;
> + u32 local_route_hi;
> + u32 local_route_lo;
> + u32 remote_route_hi;
> + u32 remote_route_lo;
> +} __packed;
> +
> +struct icm_fr_event_xdomain_disconnected {
> + struct icm_pkg_header hdr;
> + u16 reserved;
> + u16 link_info;
> + uuid_t remote_uuid;
> +} __packed;
> +
> struct icm_fr_pkg_add_device_key {
> struct icm_pkg_header hdr;
> uuid_t ep_uuid;
Again, __packed should be avoided unless absolutely necessary.
Thank you.
^ permalink raw reply
* Re: [PATCH v2 02/16] thunderbolt: Add support for XDomain properties
From: David Miller @ 2017-09-27 4:33 UTC (permalink / raw)
To: mika.westerberg
Cc: gregkh, andreas.noever, michael.jamet, yehezkel.bernat,
amir.jer.levy, Mario.Limonciello, lukas, andriy.shevchenko,
andrew, linux-kernel, netdev
In-Reply-To: <20170925110738.68382-3-mika.westerberg@linux.intel.com>
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Mon, 25 Sep 2017 14:07:24 +0300
> +struct tb_property_entry {
> + u32 key_hi;
> + u32 key_lo;
> + u16 length;
> + u8 reserved;
> + u8 type;
> + u32 value;
> +} __packed;
> +
> +struct tb_property_rootdir_entry {
> + u32 magic;
> + u32 length;
> + struct tb_property_entry entries[];
> +} __packed;
> +
> +struct tb_property_dir_entry {
> + u32 uuid[4];
> + struct tb_property_entry entries[];
> +} __packed;
There is no apparent need for __packed here, and __packed should be
avoided unless absolutely necessary as it pessimizes the code
significantly on some architectures.
Please remove __packed from these datastructures unless you can
prove it is absolutely needed and, in such case, please document
in a comment why that requirement exists. Because from the layout
of these types, everything will be packed in just fine without
__packed.
Thank you.
^ permalink raw reply
* [PATCH] net-ipv6: add support for sockopt(SOL_IPV6, IPV6_FREEBIND)
From: Maciej Żenczykowski @ 2017-09-27 4:32 UTC (permalink / raw)
To: Maciej Żenczykowski, David S . Miller; +Cc: netdev
From: Maciej Żenczykowski <maze@google.com>
So far we've been relying on sockopt(SOL_IP, IP_FREEBIND) being usable
even on IPv6 sockets.
However, it turns out it is perfectly reasonable to want to set freebind
on an AF_INET6 SOCK_RAW socket - but there is no way to set any SOL_IP
socket option on such a socket (they're all blindly errored out).
One use case for this is to allow spoofing src ip on a raw socket
via sendmsg cmsg.
Tested:
built, and booted
# python
>>> import socket
>>> SOL_IP = socket.SOL_IP
>>> SOL_IPV6 = socket.IPPROTO_IPV6
>>> IP_FREEBIND = 15
>>> IPV6_FREEBIND = 78
>>> s = socket.socket(socket.AF_INET6, socket.SOCK_DGRAM, 0)
>>> s.getsockopt(SOL_IP, IP_FREEBIND)
0
>>> s.getsockopt(SOL_IPV6, IPV6_FREEBIND)
0
>>> s.setsockopt(SOL_IPV6, IPV6_FREEBIND, 1)
>>> s.getsockopt(SOL_IP, IP_FREEBIND)
1
>>> s.getsockopt(SOL_IPV6, IPV6_FREEBIND)
1
Signed-off-by: Maciej Żenczykowski <maze@google.com>
---
include/uapi/linux/in6.h | 1 +
net/ipv6/ipv6_sockglue.c | 12 ++++++++++++
2 files changed, 13 insertions(+)
diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h
index 46444f8fbee4..4f8f3eb0699f 100644
--- a/include/uapi/linux/in6.h
+++ b/include/uapi/linux/in6.h
@@ -284,6 +284,7 @@ struct in6_flowlabel_req {
#define IPV6_TRANSPARENT 75
#define IPV6_UNICAST_IF 76
#define IPV6_RECVFRAGSIZE 77
+#define IPV6_FREEBIND 78
/*
* Multicast Routing:
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index a5e466d4e093..b9404feabd78 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -377,6 +377,14 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
retv = 0;
break;
+ case IPV6_FREEBIND:
+ if (optlen < sizeof(int))
+ goto e_inval;
+ /* we also don't have a separate freebind bit for IPV6 */
+ inet_sk(sk)->freebind = valbool;
+ retv = 0;
+ break;
+
case IPV6_RECVORIGDSTADDR:
if (optlen < sizeof(int))
goto e_inval;
@@ -1214,6 +1222,10 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
val = inet_sk(sk)->transparent;
break;
+ case IPV6_FREEBIND:
+ val = inet_sk(sk)->freebind;
+ break;
+
case IPV6_RECVORIGDSTADDR:
val = np->rxopt.bits.rxorigdstaddr;
break;
--
2.14.1.992.g2c7b836f3a-goog
^ permalink raw reply related
* Re: [PATCH] net: stmmac: Meet alignment requirements for DMA
From: Paul Burton @ 2017-09-27 4:30 UTC (permalink / raw)
To: David Miller
Cc: matt.redfearn, netdev, alexandre.torgue, peppe.cavallaro,
linux-kernel, linux-mips, james.hogan
In-Reply-To: <20170926.195244.506518182147628099.davem@davemloft.net>
[-- Attachment #1: Type: text/plain, Size: 1433 bytes --]
Hi David,
On Tuesday, 26 September 2017 19:52:44 PDT David Miller wrote:
> From: Paul Burton <paul.burton@imgtec.com>
> Date: Tue, 26 Sep 2017 14:30:33 -0700
>
> > I'd suggest that at a minimum if you're unwilling to obey the API as
> > described in Documentation/DMA-API.txt then it would be beneficial
> > if you could propose a change to it such that it works for you, and
> > perhaps we can extend the API & its documentation to allow your
> > usage whilst also allowing us to catch broken uses.
>
> The networking driver code works fine as is.
>
> I also didn't write that ill-advised documentation in the DMA docs,
> nor the non-merged new MIPS assertion.
>
> So I'm trying to figure out on what basis I am required to do
> anything.
>
> Thank you.
Nobody said you wrote the documentation, but you do maintain code which
disobeys the documented DMA API & now you're being an ass about it
unnecessarily.
Nobody said that you are required to do anything, I suggested that it would be
beneficial if you were to suggest a change to the documented DMA API such that
it allows your usage where it currently does not. If you don't want to have
any input into that, and you actually think that your current approach of
ignoring the documented API is the best path forwards, then we're probably
done here & I'll be making a note to avoid yourself & anything under net/ to
whatever extent is possible...
Thanks,
Paul
[-- Attachment #2: This is a digitally signed message part. --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply
* Re: [PATCH net-next 0/7] nfp: flower vxlan tunnel offload
From: David Miller @ 2017-09-27 4:29 UTC (permalink / raw)
To: simon.horman; +Cc: jakub.kicinski, netdev, oss-drivers
In-Reply-To: <1506335021-32024-1-git-send-email-simon.horman@netronome.com>
From: Simon Horman <simon.horman@netronome.com>
Date: Mon, 25 Sep 2017 12:23:34 +0200
> From: Simon Horman <simon.horman@netronome.com>
>
> John says:
>
> This patch set allows offloading of TC flower match and set tunnel fields
> to the NFP. The initial focus is on VXLAN traffic. Due to the current
> state of the NFP firmware, only VXLAN traffic on well known port 4789 is
> handled. The match and action fields must explicity set this value to be
> supported. Tunnel end point information is also offloaded to the NFP for
> both encapsulation and decapsulation. The NFP expects 3 separate data sets
> to be supplied.
...
Series applied, thanks.
I see there is some discussion about ipv6 flow dissector key handling
and ND keepalives, but those should be addressable in follow-on changes.
Thanks.
^ permalink raw reply
* Re: [PATCH net v2] sctp: Fix a big endian bug in sctp_diag_dump()
From: David Miller @ 2017-09-27 4:17 UTC (permalink / raw)
To: dan.carpenter
Cc: vyasevich, lucien.xin, nhorman, linux-sctp, netdev,
kernel-janitors
In-Reply-To: <20170925101926.db4f6x4hblh7tcvo@mwanda>
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Mon, 25 Sep 2017 13:19:26 +0300
> The sctp_for_each_transport() function takes an pointer to int. The
> cb->args[] array holds longs so it's only using the high 32 bits. It
> works on little endian system but will break on big endian 64 bit
> machines.
>
> Fixes: d25adbeb0cdb ("sctp: fix an use-after-free issue in sctp_sock_dump")
> Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
> ---
> v2: The v1 patch changed the function to take a long pointer, but v2
> just changes the caller.
Applied, thanks.
^ permalink raw reply
* Re: [PATCH net] net/ncsi: Don't assume last available channel exists
From: Samuel Mendoza-Jonas @ 2017-09-27 4:12 UTC (permalink / raw)
To: David Miller; +Cc: netdev, linux-kernel, Benjamin Herrenschmidt
In-Reply-To: <20170921.181111.2086501653519863373.davem@davemloft.net>
On Thu, 2017-09-21 at 18:11 -0700, David Miller wrote:
> From: Samuel Mendoza-Jonas <sam@mendozajonas.com>
> Date: Fri, 22 Sep 2017 11:00:00 +1000
>
> > If we haven't configured a channel yet (or are in the process of doing
> > so) we won't have a hot_channel - does it make more sense to
> > - check against the hot_channel as currently done,
> > - only check the filter size at configure time for /each/ channel,
> > - only conditionally enable the .ndo_vlan_rx_add_vid net_device callback
> > once we've configured a channel (eg. for ftgmac100 in the
> > ftgmac100_ncsi_handler() callback?)
>
> The last isn't so feasible.
>
> The device shouldn't be marked attached until a channel is available,
> because it seems like communication cannot occur until one is. Right?
Yes that's right.
>
> You could experiment with netif_device_detach()/netif_device_attach().
>
> When the device is in the detached state, callbacks such as
> ->ndo_vlan_rx_add_vid() will not be invoked.
This looked like the way at first, but _detach() ceases any tx/rx on the
interface right?
NCSI still needs the interface to be active since the 'channels' are on a
separate network controller that the interface is connected to, eg on the
machines I'm using:
BMC 'Host' network controller
---------------------- ----------------------------
|ftgmac100 interface | <---- NCSI Link ----> | BCM5719 interface | --> external interface
---------------------- ----------------------------
Looking at the NCSI init path I believe we're guaranteed to have an ndp
struct by the time ndo_vlan_rx_add_vid() is called, making some of those
checks overly cautious. It might be easiest to just track new vids as we
see them (up to the NCSI spec limit), and then deal with configured
channels on a case by case basis since their limits can be different.
I'll work on a V2 but hopefully I haven't misinterpreted
_detach()/_attach() :)
Sam
^ permalink raw reply
* [PATCH] net-ipv6: remove unused IP6_ECN_clear() function
From: Maciej Żenczykowski @ 2017-09-27 3:37 UTC (permalink / raw)
To: Maciej Żenczykowski, David S . Miller; +Cc: netdev
From: Maciej Żenczykowski <maze@google.com>
This function is unused, and furthermore it is buggy since it suffers
from the same issue that requires IP6_ECN_set_ce() to take a pointer
to the skb so that it may (in case of CHECKSUM_COMPLETE) update skb->csum
Instead of fixing it, let's just outright remove it.
Tested: builds, and 'git grep IP6_ECN_clear' comes up empty
Signed-off-by: Maciej Żenczykowski <maze@google.com>
---
include/net/inet_ecn.h | 5 -----
1 file changed, 5 deletions(-)
diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h
index dce2d586d9ce..f5ff16d72fe6 100644
--- a/include/net/inet_ecn.h
+++ b/include/net/inet_ecn.h
@@ -133,11 +133,6 @@ static inline int IP6_ECN_set_ce(struct sk_buff *skb, struct ipv6hdr *iph)
return 1;
}
-static inline void IP6_ECN_clear(struct ipv6hdr *iph)
-{
- *(__be32*)iph &= ~htonl(INET_ECN_MASK << 20);
-}
-
static inline void ipv6_copy_dscp(unsigned int dscp, struct ipv6hdr *inner)
{
dscp &= ~INET_ECN_MASK;
--
2.14.1.992.g2c7b836f3a-goog
^ permalink raw reply related
* [PATCH v5 3/4] ipv4: Namespaceify tcp_fastopen_key knob
From: Haishuang Yan @ 2017-09-27 3:35 UTC (permalink / raw)
To: David S. Miller, Alexey Kuznetsov, Eric Dumazet, Wei Wang,
Luca BRUNO
Cc: netdev, linux-kernel, Haishuang Yan
In-Reply-To: <1506483343-11544-1-git-send-email-yanhaishuang@cmss.chinamobile.com>
Different namespace application might require different tcp_fastopen_key
independently of the host.
David Miller pointed out there is a leak without releasing the context
of tcp_fastopen_key during netns teardown. So add the release action in
exit_batch path.
Tested:
1. Container namespace:
# cat /proc/sys/net/ipv4/tcp_fastopen_key:
2817fff2-f803cf97-eadfd1f3-78c0992b
cookie key in tcp syn packets:
Fast Open Cookie
Kind: TCP Fast Open Cookie (34)
Length: 10
Fast Open Cookie: 1e5dd82a8c492ca9
2. Host:
# cat /proc/sys/net/ipv4/tcp_fastopen_key:
107d7c5f-68eb2ac7-02fb06e6-ed341702
cookie key in tcp syn packets:
Fast Open Cookie
Kind: TCP Fast Open Cookie (34)
Length: 10
Fast Open Cookie: e213c02bf0afbc8a
Signed-off-by: Haishuang Yan <yanhaishuang@cmss.chinamobile.com>
---
include/net/netns/ipv4.h | 4 +++
include/net/tcp.h | 6 ++---
net/ipv4/af_inet.c | 2 +-
net/ipv4/sysctl_net_ipv4.c | 21 ++++++++-------
net/ipv4/tcp.c | 2 +-
net/ipv4/tcp_fastopen.c | 64 +++++++++++++++++++++++++++++++---------------
net/ipv4/tcp_ipv4.c | 6 +++++
7 files changed, 70 insertions(+), 35 deletions(-)
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index ce6dde0..66b8335 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -36,6 +36,8 @@ struct inet_timewait_death_row {
int sysctl_max_tw_buckets;
};
+struct tcp_fastopen_context;
+
struct netns_ipv4 {
#ifdef CONFIG_SYSCTL
struct ctl_table_header *forw_hdr;
@@ -128,6 +130,8 @@ struct netns_ipv4 {
struct inet_timewait_death_row tcp_death_row;
int sysctl_max_syn_backlog;
int sysctl_tcp_fastopen;
+ struct tcp_fastopen_context __rcu *tcp_fastopen_ctx;
+ spinlock_t tcp_fastopen_ctx_lock;
#ifdef CONFIG_NET_L3_MASTER_DEV
int sysctl_udp_l3mdev_accept;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 061c128..e27bd18 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1556,13 +1556,13 @@ struct tcp_fastopen_request {
};
void tcp_free_fastopen_req(struct tcp_sock *tp);
-extern struct tcp_fastopen_context __rcu *tcp_fastopen_ctx;
-int tcp_fastopen_reset_cipher(void *key, unsigned int len);
+void tcp_fastopen_ctx_destroy(struct net *net);
+int tcp_fastopen_reset_cipher(struct net *net, void *key, unsigned int len);
void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb);
struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
struct request_sock *req,
struct tcp_fastopen_cookie *foc);
-void tcp_fastopen_init_key_once(void);
+void tcp_fastopen_init_key_once(struct net *net);
bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss,
struct tcp_fastopen_cookie *cookie);
bool tcp_fastopen_defer_connect(struct sock *sk, int *err);
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index e73ce79..43a1bbe 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -222,7 +222,7 @@ int inet_listen(struct socket *sock, int backlog)
(tcp_fastopen & TFO_SERVER_ENABLE) &&
!inet_csk(sk)->icsk_accept_queue.fastopenq.max_qlen) {
fastopen_queue_tune(sk, backlog);
- tcp_fastopen_init_key_once();
+ tcp_fastopen_init_key_once(sock_net(sk));
}
err = inet_csk_listen_start(sk, backlog);
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index f6324ea..20e19fe 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -251,10 +251,12 @@ static int proc_allowed_congestion_control(struct ctl_table *ctl,
return ret;
}
-static int proc_tcp_fastopen_key(struct ctl_table *ctl, int write,
+static int proc_tcp_fastopen_key(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos)
{
+ struct net *net = container_of(table->data, struct net,
+ ipv4.sysctl_tcp_fastopen);
struct ctl_table tbl = { .maxlen = (TCP_FASTOPEN_KEY_LENGTH * 2 + 10) };
struct tcp_fastopen_context *ctxt;
int ret;
@@ -265,7 +267,7 @@ static int proc_tcp_fastopen_key(struct ctl_table *ctl, int write,
return -ENOMEM;
rcu_read_lock();
- ctxt = rcu_dereference(tcp_fastopen_ctx);
+ ctxt = rcu_dereference(net->ipv4.tcp_fastopen_ctx);
if (ctxt)
memcpy(user_key, ctxt->key, TCP_FASTOPEN_KEY_LENGTH);
else
@@ -282,7 +284,7 @@ static int proc_tcp_fastopen_key(struct ctl_table *ctl, int write,
ret = -EINVAL;
goto bad_key;
}
- tcp_fastopen_reset_cipher(user_key, TCP_FASTOPEN_KEY_LENGTH);
+ tcp_fastopen_reset_cipher(net, user_key, TCP_FASTOPEN_KEY_LENGTH);
}
bad_key:
@@ -396,12 +398,6 @@ static int proc_tcp_available_ulp(struct ctl_table *ctl,
.proc_handler = proc_dointvec
},
{
- .procname = "tcp_fastopen_key",
- .mode = 0600,
- .maxlen = ((TCP_FASTOPEN_KEY_LENGTH * 2) + 10),
- .proc_handler = proc_tcp_fastopen_key,
- },
- {
.procname = "tcp_fastopen_blackhole_timeout_sec",
.data = &sysctl_tcp_fastopen_blackhole_timeout,
.maxlen = sizeof(int),
@@ -1080,6 +1076,13 @@ static int proc_tcp_available_ulp(struct ctl_table *ctl,
.mode = 0644,
.proc_handler = proc_dointvec,
},
+ {
+ .procname = "tcp_fastopen_key",
+ .mode = 0600,
+ .data = &init_net.ipv4.sysctl_tcp_fastopen,
+ .maxlen = ((TCP_FASTOPEN_KEY_LENGTH * 2) + 10),
+ .proc_handler = proc_tcp_fastopen_key,
+ },
#ifdef CONFIG_IP_ROUTE_MULTIPATH
{
.procname = "fib_multipath_use_neigh",
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 4e39545..23225c9 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2749,7 +2749,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
case TCP_FASTOPEN:
if (val >= 0 && ((1 << sk->sk_state) & (TCPF_CLOSE |
TCPF_LISTEN))) {
- tcp_fastopen_init_key_once();
+ tcp_fastopen_init_key_once(net);
fastopen_queue_tune(sk, val);
} else {
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 8c8f0f0..4eae44a 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -9,13 +9,18 @@
#include <net/inetpeer.h>
#include <net/tcp.h>
-struct tcp_fastopen_context __rcu *tcp_fastopen_ctx;
-
-static DEFINE_SPINLOCK(tcp_fastopen_ctx_lock);
-
-void tcp_fastopen_init_key_once(void)
+void tcp_fastopen_init_key_once(struct net *net)
{
- static u8 key[TCP_FASTOPEN_KEY_LENGTH];
+ u8 key[TCP_FASTOPEN_KEY_LENGTH];
+ struct tcp_fastopen_context *ctxt;
+
+ rcu_read_lock();
+ ctxt = rcu_dereference(net->ipv4.tcp_fastopen_ctx);
+ if (ctxt) {
+ rcu_read_unlock();
+ return;
+ }
+ rcu_read_unlock();
/* tcp_fastopen_reset_cipher publishes the new context
* atomically, so we allow this race happening here.
@@ -23,8 +28,8 @@ void tcp_fastopen_init_key_once(void)
* All call sites of tcp_fastopen_cookie_gen also check
* for a valid cookie, so this is an acceptable risk.
*/
- if (net_get_random_once(key, sizeof(key)))
- tcp_fastopen_reset_cipher(key, sizeof(key));
+ get_random_bytes(key, sizeof(key));
+ tcp_fastopen_reset_cipher(net, key, sizeof(key));
}
static void tcp_fastopen_ctx_free(struct rcu_head *head)
@@ -35,7 +40,22 @@ static void tcp_fastopen_ctx_free(struct rcu_head *head)
kfree(ctx);
}
-int tcp_fastopen_reset_cipher(void *key, unsigned int len)
+void tcp_fastopen_ctx_destroy(struct net *net)
+{
+ struct tcp_fastopen_context *ctxt;
+
+ spin_lock(&net->ipv4.tcp_fastopen_ctx_lock);
+
+ ctxt = rcu_dereference_protected(net->ipv4.tcp_fastopen_ctx,
+ lockdep_is_held(&net->ipv4.tcp_fastopen_ctx_lock));
+ rcu_assign_pointer(net->ipv4.tcp_fastopen_ctx, NULL);
+ spin_unlock(&net->ipv4.tcp_fastopen_ctx_lock);
+
+ if (ctxt)
+ call_rcu(&ctxt->rcu, tcp_fastopen_ctx_free);
+}
+
+int tcp_fastopen_reset_cipher(struct net *net, void *key, unsigned int len)
{
int err;
struct tcp_fastopen_context *ctx, *octx;
@@ -59,26 +79,27 @@ int tcp_fastopen_reset_cipher(void *key, unsigned int len)
}
memcpy(ctx->key, key, len);
- spin_lock(&tcp_fastopen_ctx_lock);
+ spin_lock(&net->ipv4.tcp_fastopen_ctx_lock);
- octx = rcu_dereference_protected(tcp_fastopen_ctx,
- lockdep_is_held(&tcp_fastopen_ctx_lock));
- rcu_assign_pointer(tcp_fastopen_ctx, ctx);
- spin_unlock(&tcp_fastopen_ctx_lock);
+ octx = rcu_dereference_protected(net->ipv4.tcp_fastopen_ctx,
+ lockdep_is_held(&net->ipv4.tcp_fastopen_ctx_lock));
+ rcu_assign_pointer(net->ipv4.tcp_fastopen_ctx, ctx);
+ spin_unlock(&net->ipv4.tcp_fastopen_ctx_lock);
if (octx)
call_rcu(&octx->rcu, tcp_fastopen_ctx_free);
return err;
}
-static bool __tcp_fastopen_cookie_gen(const void *path,
+static bool __tcp_fastopen_cookie_gen(struct net *net,
+ const void *path,
struct tcp_fastopen_cookie *foc)
{
struct tcp_fastopen_context *ctx;
bool ok = false;
rcu_read_lock();
- ctx = rcu_dereference(tcp_fastopen_ctx);
+ ctx = rcu_dereference(net->ipv4.tcp_fastopen_ctx);
if (ctx) {
crypto_cipher_encrypt_one(ctx->tfm, foc->val, path);
foc->len = TCP_FASTOPEN_COOKIE_SIZE;
@@ -94,7 +115,8 @@ static bool __tcp_fastopen_cookie_gen(const void *path,
*
* XXX (TFO) - refactor when TCP_FASTOPEN_COOKIE_SIZE != AES_BLOCK_SIZE.
*/
-static bool tcp_fastopen_cookie_gen(struct request_sock *req,
+static bool tcp_fastopen_cookie_gen(struct net *net,
+ struct request_sock *req,
struct sk_buff *syn,
struct tcp_fastopen_cookie *foc)
{
@@ -102,7 +124,7 @@ static bool tcp_fastopen_cookie_gen(struct request_sock *req,
const struct iphdr *iph = ip_hdr(syn);
__be32 path[4] = { iph->saddr, iph->daddr, 0, 0 };
- return __tcp_fastopen_cookie_gen(path, foc);
+ return __tcp_fastopen_cookie_gen(net, path, foc);
}
#if IS_ENABLED(CONFIG_IPV6)
@@ -110,13 +132,13 @@ static bool tcp_fastopen_cookie_gen(struct request_sock *req,
const struct ipv6hdr *ip6h = ipv6_hdr(syn);
struct tcp_fastopen_cookie tmp;
- if (__tcp_fastopen_cookie_gen(&ip6h->saddr, &tmp)) {
+ if (__tcp_fastopen_cookie_gen(net, &ip6h->saddr, &tmp)) {
struct in6_addr *buf = &tmp.addr;
int i;
for (i = 0; i < 4; i++)
buf->s6_addr32[i] ^= ip6h->daddr.s6_addr32[i];
- return __tcp_fastopen_cookie_gen(buf, foc);
+ return __tcp_fastopen_cookie_gen(net, buf, foc);
}
}
#endif
@@ -296,7 +318,7 @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
goto fastopen;
if (foc->len >= 0 && /* Client presents or requests a cookie */
- tcp_fastopen_cookie_gen(req, skb, &valid_foc) &&
+ tcp_fastopen_cookie_gen(sock_net(sk), req, skb, &valid_foc) &&
foc->len == TCP_FASTOPEN_COOKIE_SIZE &&
foc->len == valid_foc.len &&
!memcmp(foc->val, valid_foc.val, foc->len)) {
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 88409b1..49c74c0 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2473,6 +2473,7 @@ static int __net_init tcp_sk_init(struct net *net)
net->ipv4.sysctl_tcp_timestamps = 1;
net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE;
+ spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock);
return 0;
fail:
@@ -2483,7 +2484,12 @@ static int __net_init tcp_sk_init(struct net *net)
static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
{
+ struct net *net;
+
inet_twsk_purge(&tcp_hashinfo, AF_INET);
+
+ list_for_each_entry(net, net_exit_list, exit_list)
+ tcp_fastopen_ctx_destroy(net);
}
static struct pernet_operations __net_initdata tcp_sk_ops = {
--
1.8.3.1
^ permalink raw reply related
* [PATCH v5 4/4] ipv4: Namespaceify tcp_fastopen_blackhole_timeout knob
From: Haishuang Yan @ 2017-09-27 3:35 UTC (permalink / raw)
To: David S. Miller, Alexey Kuznetsov, Eric Dumazet, Wei Wang,
Luca BRUNO
Cc: netdev, linux-kernel, Haishuang Yan
In-Reply-To: <1506483343-11544-1-git-send-email-yanhaishuang@cmss.chinamobile.com>
Different namespace application might require different time period in
second to disable Fastopen on active TCP sockets.
Tested:
Simulate following similar situation that the server's data gets dropped
after 3WHS.
C ---- syn-data ---> S
C <--- syn/ack ----- S
C ---- ack --------> S
S (accept & write)
C? X <- data ------ S
[retry and timeout]
And then print netstat of TCPFastOpenBlackhole, the counter increased as
expected when the firewall blackhole issue is detected and active TFO is
disabled.
# cat /proc/net/netstat | awk '{print $91}'
TCPFastOpenBlackhole
1
Signed-off-by: Haishuang Yan <yanhaishuang@cmss.chinamobile.com>
---
include/net/netns/ipv4.h | 3 +++
net/ipv4/sysctl_net_ipv4.c | 20 +++++++++++---------
net/ipv4/tcp_fastopen.c | 30 +++++++++++-------------------
net/ipv4/tcp_ipv4.c | 2 ++
4 files changed, 27 insertions(+), 28 deletions(-)
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 66b8335..d76edde 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -132,6 +132,9 @@ struct netns_ipv4 {
int sysctl_tcp_fastopen;
struct tcp_fastopen_context __rcu *tcp_fastopen_ctx;
spinlock_t tcp_fastopen_ctx_lock;
+ unsigned int sysctl_tcp_fastopen_blackhole_timeout;
+ atomic_t tfo_active_disable_times;
+ unsigned long tfo_active_disable_stamp;
#ifdef CONFIG_NET_L3_MASTER_DEV
int sysctl_udp_l3mdev_accept;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 20e19fe..cac8dd3 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -355,11 +355,13 @@ static int proc_tfo_blackhole_detect_timeout(struct ctl_table *table,
void __user *buffer,
size_t *lenp, loff_t *ppos)
{
+ struct net *net = container_of(table->data, struct net,
+ ipv4.sysctl_tcp_fastopen_blackhole_timeout);
int ret;
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (write && ret == 0)
- tcp_fastopen_active_timeout_reset();
+ atomic_set(&net->ipv4.tfo_active_disable_times, 0);
return ret;
}
@@ -398,14 +400,6 @@ static int proc_tcp_available_ulp(struct ctl_table *ctl,
.proc_handler = proc_dointvec
},
{
- .procname = "tcp_fastopen_blackhole_timeout_sec",
- .data = &sysctl_tcp_fastopen_blackhole_timeout,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_tfo_blackhole_detect_timeout,
- .extra1 = &zero,
- },
- {
.procname = "tcp_abort_on_overflow",
.data = &sysctl_tcp_abort_on_overflow,
.maxlen = sizeof(int),
@@ -1083,6 +1077,14 @@ static int proc_tcp_available_ulp(struct ctl_table *ctl,
.maxlen = ((TCP_FASTOPEN_KEY_LENGTH * 2) + 10),
.proc_handler = proc_tcp_fastopen_key,
},
+ {
+ .procname = "tcp_fastopen_blackhole_timeout_sec",
+ .data = &init_net.ipv4.sysctl_tcp_fastopen_blackhole_timeout,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_tfo_blackhole_detect_timeout,
+ .extra1 = &zero,
+ },
#ifdef CONFIG_IP_ROUTE_MULTIPATH
{
.procname = "fib_multipath_use_neigh",
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 4eae44a..de470e7 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -422,25 +422,16 @@ bool tcp_fastopen_defer_connect(struct sock *sk, int *err)
* TFO connection with data exchanges.
*/
-/* Default to 1hr */
-unsigned int sysctl_tcp_fastopen_blackhole_timeout __read_mostly = 60 * 60;
-static atomic_t tfo_active_disable_times __read_mostly = ATOMIC_INIT(0);
-static unsigned long tfo_active_disable_stamp __read_mostly;
-
/* Disable active TFO and record current jiffies and
* tfo_active_disable_times
*/
void tcp_fastopen_active_disable(struct sock *sk)
{
- atomic_inc(&tfo_active_disable_times);
- tfo_active_disable_stamp = jiffies;
- NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENBLACKHOLE);
-}
+ struct net *net = sock_net(sk);
-/* Reset tfo_active_disable_times to 0 */
-void tcp_fastopen_active_timeout_reset(void)
-{
- atomic_set(&tfo_active_disable_times, 0);
+ atomic_inc(&net->ipv4.tfo_active_disable_times);
+ net->ipv4.tfo_active_disable_stamp = jiffies;
+ NET_INC_STATS(net, LINUX_MIB_TCPFASTOPENBLACKHOLE);
}
/* Calculate timeout for tfo active disable
@@ -449,17 +440,18 @@ void tcp_fastopen_active_timeout_reset(void)
*/
bool tcp_fastopen_active_should_disable(struct sock *sk)
{
- int tfo_da_times = atomic_read(&tfo_active_disable_times);
- int multiplier;
+ unsigned int tfo_bh_timeout = sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout;
+ int tfo_da_times = atomic_read(&sock_net(sk)->ipv4.tfo_active_disable_times);
unsigned long timeout;
+ int multiplier;
if (!tfo_da_times)
return false;
/* Limit timout to max: 2^6 * initial timeout */
multiplier = 1 << min(tfo_da_times - 1, 6);
- timeout = multiplier * sysctl_tcp_fastopen_blackhole_timeout * HZ;
- if (time_before(jiffies, tfo_active_disable_stamp + timeout))
+ timeout = multiplier * tfo_bh_timeout * HZ;
+ if (time_before(jiffies, sock_net(sk)->ipv4.tfo_active_disable_stamp + timeout))
return true;
/* Mark check bit so we can check for successful active TFO
@@ -495,10 +487,10 @@ void tcp_fastopen_active_disable_ofo_check(struct sock *sk)
}
}
} else if (tp->syn_fastopen_ch &&
- atomic_read(&tfo_active_disable_times)) {
+ atomic_read(&sock_net(sk)->ipv4.tfo_active_disable_times)) {
dst = sk_dst_get(sk);
if (!(dst && dst->dev && (dst->dev->flags & IFF_LOOPBACK)))
- tcp_fastopen_active_timeout_reset();
+ atomic_set(&sock_net(sk)->ipv4.tfo_active_disable_times, 0);
dst_release(dst);
}
}
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 49c74c0..ad3b5bb 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2474,6 +2474,8 @@ static int __net_init tcp_sk_init(struct net *net)
net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE;
spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock);
+ net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 60 * 60;
+ atomic_set(&net->ipv4.tfo_active_disable_times, 0);
return 0;
fail:
--
1.8.3.1
^ permalink raw reply related
* [PATCH v5 2/4] ipv4: Remove the 'publish' logic in tcp_fastopen_init_key_once
From: Haishuang Yan @ 2017-09-27 3:35 UTC (permalink / raw)
To: David S. Miller, Alexey Kuznetsov, Eric Dumazet, Wei Wang,
Luca BRUNO
Cc: netdev, linux-kernel, Haishuang Yan
In-Reply-To: <1506483343-11544-1-git-send-email-yanhaishuang@cmss.chinamobile.com>
The 'publish' logic is not necessary after commit dfea2aa65424 ("tcp:
Do not call tcp_fastopen_reset_cipher from interrupt context"), because
in tcp_fastopen_cookie_gen,it wouldn't call tcp_fastopen_init_key_once.
Signed-off-by: Haishuang Yan <yanhaishuang@cmss.chinamobile.com>
---
include/net/tcp.h | 2 +-
net/ipv4/af_inet.c | 2 +-
net/ipv4/sysctl_net_ipv4.c | 5 -----
net/ipv4/tcp.c | 2 +-
net/ipv4/tcp_fastopen.c | 4 ++--
5 files changed, 5 insertions(+), 10 deletions(-)
diff --git a/include/net/tcp.h b/include/net/tcp.h
index f628967..061c128 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1562,7 +1562,7 @@ struct tcp_fastopen_request {
struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
struct request_sock *req,
struct tcp_fastopen_cookie *foc);
-void tcp_fastopen_init_key_once(bool publish);
+void tcp_fastopen_init_key_once(void);
bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss,
struct tcp_fastopen_cookie *cookie);
bool tcp_fastopen_defer_connect(struct sock *sk, int *err);
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index ddd126d..e73ce79 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -222,7 +222,7 @@ int inet_listen(struct socket *sock, int backlog)
(tcp_fastopen & TFO_SERVER_ENABLE) &&
!inet_csk(sk)->icsk_accept_queue.fastopenq.max_qlen) {
fastopen_queue_tune(sk, backlog);
- tcp_fastopen_init_key_once(true);
+ tcp_fastopen_init_key_once();
}
err = inet_csk_listen_start(sk, backlog);
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index e31e853c..f6324ea 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -282,11 +282,6 @@ static int proc_tcp_fastopen_key(struct ctl_table *ctl, int write,
ret = -EINVAL;
goto bad_key;
}
- /* Generate a dummy secret but don't publish it. This
- * is needed so we don't regenerate a new key on the
- * first invocation of tcp_fastopen_cookie_gen
- */
- tcp_fastopen_init_key_once(false);
tcp_fastopen_reset_cipher(user_key, TCP_FASTOPEN_KEY_LENGTH);
}
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index dac56c4..4e39545 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2749,7 +2749,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
case TCP_FASTOPEN:
if (val >= 0 && ((1 << sk->sk_state) & (TCPF_CLOSE |
TCPF_LISTEN))) {
- tcp_fastopen_init_key_once(true);
+ tcp_fastopen_init_key_once();
fastopen_queue_tune(sk, val);
} else {
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 31b08ec..8c8f0f0 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -13,7 +13,7 @@
static DEFINE_SPINLOCK(tcp_fastopen_ctx_lock);
-void tcp_fastopen_init_key_once(bool publish)
+void tcp_fastopen_init_key_once(void)
{
static u8 key[TCP_FASTOPEN_KEY_LENGTH];
@@ -23,7 +23,7 @@ void tcp_fastopen_init_key_once(bool publish)
* All call sites of tcp_fastopen_cookie_gen also check
* for a valid cookie, so this is an acceptable risk.
*/
- if (net_get_random_once(key, sizeof(key)) && publish)
+ if (net_get_random_once(key, sizeof(key)))
tcp_fastopen_reset_cipher(key, sizeof(key));
}
--
1.8.3.1
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox