* [PATCH v4 1/2] netfilter: add new sysctl toggle for lightweight tunnel netfilter hooks
2021-08-02 11:34 [PATCH v4 0/2] netfilter: add netfilter hooks to track SRv6-encapsulated flows proelbtn
@ 2021-08-02 11:34 ` proelbtn
2021-08-03 18:07 ` kernel test robot
2021-08-05 11:52 ` Pablo Neira Ayuso
2021-08-02 11:34 ` [PATCH v4 2/2] netfilter: add netfilter hooks to SRv6 data plane proelbtn
1 sibling, 2 replies; 6+ messages in thread
From: proelbtn @ 2021-08-02 11:34 UTC (permalink / raw)
To: netfilter-devel
Cc: pablo, stefano.salsano, andrea.mayer, davem, kuba, yoshfuji,
dsahern, proelbtn
This patch introduces new sysctl toggle for enabling lightweight tunnel
netfilter hooks.
Signed-off-by: proelbtn <contact@proelbtn.com>
---
.../networking/nf_conntrack-sysctl.rst | 7 +++
include/net/lwtunnel.h | 3 ++
include/net/netfilter/nf_conntrack_lwtunnel.h | 15 ++++++
net/core/lwtunnel.c | 3 ++
net/netfilter/Makefile | 3 ++
net/netfilter/nf_conntrack_lwtunnel.c | 52 +++++++++++++++++++
net/netfilter/nf_conntrack_standalone.c | 13 +++++
7 files changed, 96 insertions(+)
create mode 100644 include/net/netfilter/nf_conntrack_lwtunnel.h
create mode 100644 net/netfilter/nf_conntrack_lwtunnel.c
diff --git a/Documentation/networking/nf_conntrack-sysctl.rst b/Documentation/networking/nf_conntrack-sysctl.rst
index d31ed6c1cb0d..5afa4603aa4b 100644
--- a/Documentation/networking/nf_conntrack-sysctl.rst
+++ b/Documentation/networking/nf_conntrack-sysctl.rst
@@ -30,6 +30,13 @@ nf_conntrack_checksum - BOOLEAN
in INVALID state. If this is enabled, such packets will not be
considered for connection tracking.
+nf_conntrack_lwtunnel - BOOLEAN
+ - 0 - disabled (default)
+ - not 0 - enabled
+
+ If this option is enabled, the lightweight tunnel netfilter hooks are
+ enabled. This option cannot be disabled once it is enabled.
+
nf_conntrack_count - INTEGER (read-only)
Number of currently allocated flow entries.
diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h
index 05cfd6ff6528..11a2e3ce50b3 100644
--- a/include/net/lwtunnel.h
+++ b/include/net/lwtunnel.h
@@ -51,6 +51,9 @@ struct lwtunnel_encap_ops {
};
#ifdef CONFIG_LWTUNNEL
+
+DECLARE_STATIC_KEY_FALSE(nf_ct_lwtunnel_enabled);
+
void lwtstate_free(struct lwtunnel_state *lws);
static inline struct lwtunnel_state *
diff --git a/include/net/netfilter/nf_conntrack_lwtunnel.h b/include/net/netfilter/nf_conntrack_lwtunnel.h
new file mode 100644
index 000000000000..230206d035b7
--- /dev/null
+++ b/include/net/netfilter/nf_conntrack_lwtunnel.h
@@ -0,0 +1,15 @@
+#include <linux/sysctl.h>
+#include <linux/types.h>
+
+#ifdef CONFIG_LWTUNNEL
+int nf_conntrack_lwtunnel_sysctl_handler(struct ctl_table *table, int write,
+ void *buffer, size_t *lenp,
+ loff_t *ppos);
+#else // CONFIG_LWTUNNEL
+int nf_conntrack_lwtunnel_sysctl_handler(struct ctl_table *table, int write,
+ void *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ return 0;
+}
+#endif
\ No newline at end of file
diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c
index 8ec7d13d2860..8be3274e30ec 100644
--- a/net/core/lwtunnel.c
+++ b/net/core/lwtunnel.c
@@ -23,6 +23,9 @@
#include <net/ip6_fib.h>
#include <net/rtnh.h>
+DEFINE_STATIC_KEY_FALSE(nf_ct_lwtunnel_enabled);
+EXPORT_SYMBOL_GPL(nf_ct_lwtunnel_enabled);
+
#ifdef CONFIG_MODULES
static const char *lwtunnel_encap_str(enum lwtunnel_encap_types encap_type)
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 049890e00a3d..07209930b5e4 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -212,3 +212,6 @@ obj-$(CONFIG_IP_SET) += ipset/
# IPVS
obj-$(CONFIG_IP_VS) += ipvs/
+
+# lwtunnel
+obj-$(CONFIG_LWTUNNEL) += nf_conntrack_lwtunnel.o
diff --git a/net/netfilter/nf_conntrack_lwtunnel.c b/net/netfilter/nf_conntrack_lwtunnel.c
new file mode 100644
index 000000000000..cddbf8c5883a
--- /dev/null
+++ b/net/netfilter/nf_conntrack_lwtunnel.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/sysctl.h>
+#include <net/lwtunnel.h>
+#include <net/netfilter/nf_conntrack.h>
+
+static inline int nf_conntrack_lwtunnel_get(void)
+{
+ if (static_branch_unlikely(&nf_ct_lwtunnel_enabled))
+ return 1;
+ else
+ return 0;
+}
+
+static inline int nf_conntrack_lwtunnel_set(int enable)
+{
+ if (static_branch_unlikely(&nf_ct_lwtunnel_enabled)) {
+ if (!enable)
+ return -EPERM;
+ } else if (enable) {
+ static_branch_enable(&nf_ct_lwtunnel_enabled);
+ }
+
+ return 0;
+}
+
+int nf_conntrack_lwtunnel_sysctl_handler(struct ctl_table *table, int write,
+ void *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ int proc_nf_ct_lwtunnel_enabled = 0;
+ struct ctl_table tmp = {
+ .procname = table->procname,
+ .data = &proc_nf_ct_lwtunnel_enabled,
+ .maxlen = sizeof(int),
+ .mode = table->mode,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ };
+ int ret;
+
+ if (!write)
+ proc_nf_ct_lwtunnel_enabled = nf_conntrack_lwtunnel_get();
+
+ ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
+
+ if (write && ret == 0)
+ ret = nf_conntrack_lwtunnel_set(proc_nf_ct_lwtunnel_enabled);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_lwtunnel_sysctl_handler);
\ No newline at end of file
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 214d9f9e499b..bb00c8f131e8 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -22,6 +22,9 @@
#include <net/netfilter/nf_conntrack_acct.h>
#include <net/netfilter/nf_conntrack_zones.h>
#include <net/netfilter/nf_conntrack_timestamp.h>
+#ifdef CONFIG_LWTUNNEL
+#include <net/netfilter/nf_conntrack_lwtunnel.h>
+#endif
#include <linux/rculist_nulls.h>
static bool enable_hooks __read_mostly;
@@ -552,6 +555,7 @@ enum nf_ct_sysctl_index {
NF_SYSCTL_CT_COUNT,
NF_SYSCTL_CT_BUCKETS,
NF_SYSCTL_CT_CHECKSUM,
+ NF_SYSCTL_CT_LWTUNNEL,
NF_SYSCTL_CT_LOG_INVALID,
NF_SYSCTL_CT_EXPECT_MAX,
NF_SYSCTL_CT_ACCT,
@@ -650,6 +654,15 @@ static struct ctl_table nf_ct_sysctl_table[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
+#ifdef CONFIG_LWTUNNEL
+ [NF_SYSCTL_CT_LWTUNNEL] = {
+ .procname = "nf_conntrack_lwtunnel",
+ .data = NULL,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = nf_conntrack_lwtunnel_sysctl_handler,
+ },
+#endif
[NF_SYSCTL_CT_LOG_INVALID] = {
.procname = "nf_conntrack_log_invalid",
.data = &init_net.ct.sysctl_log_invalid,
--
2.25.1
^ permalink raw reply related [flat|nested] 6+ messages in thread* [PATCH v4 2/2] netfilter: add netfilter hooks to SRv6 data plane
2021-08-02 11:34 [PATCH v4 0/2] netfilter: add netfilter hooks to track SRv6-encapsulated flows proelbtn
2021-08-02 11:34 ` [PATCH v4 1/2] netfilter: add new sysctl toggle for lightweight tunnel netfilter hooks proelbtn
@ 2021-08-02 11:34 ` proelbtn
1 sibling, 0 replies; 6+ messages in thread
From: proelbtn @ 2021-08-02 11:34 UTC (permalink / raw)
To: netfilter-devel
Cc: pablo, stefano.salsano, andrea.mayer, davem, kuba, yoshfuji,
dsahern, proelbtn
This patch introduces netfilter hooks for solving the problem that
conntrack couldn't record both inner flows and outer flows.
Signed-off-by: Ryoga Saito <contact@proelbtn.com>
---
net/ipv6/seg6_iptunnel.c | 68 +++++++++++++++++++++--
net/ipv6/seg6_local.c | 115 +++++++++++++++++++++++++++------------
2 files changed, 141 insertions(+), 42 deletions(-)
diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
index 897fa59c47de..77a2aafcb52f 100644
--- a/net/ipv6/seg6_iptunnel.c
+++ b/net/ipv6/seg6_iptunnel.c
@@ -26,6 +26,7 @@
#ifdef CONFIG_IPV6_SEG6_HMAC
#include <net/seg6_hmac.h>
#endif
+#include <net/netfilter/nf_conntrack.h>
static size_t seg6_lwt_headroom(struct seg6_iptunnel_encap *tuninfo)
{
@@ -295,25 +296,33 @@ static int seg6_do_srh(struct sk_buff *skb)
ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
skb_set_transport_header(skb, sizeof(struct ipv6hdr));
+ nf_reset_ct(skb);
return 0;
}
-static int seg6_input(struct sk_buff *skb)
+static int seg6_input_finish(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
+{
+ return dst_input(skb);
+}
+
+static int seg6_input_core(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
{
struct dst_entry *orig_dst = skb_dst(skb);
struct dst_entry *dst = NULL;
- struct seg6_lwt *slwt;
+ struct seg6_lwt *slwt = NULL;
int err;
+ slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate);
+
err = seg6_do_srh(skb);
if (unlikely(err)) {
kfree_skb(skb);
return err;
}
- slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate);
-
preempt_disable();
dst = dst_cache_get(&slwt->cache);
preempt_enable();
@@ -337,10 +346,35 @@ static int seg6_input(struct sk_buff *skb)
if (unlikely(err))
return err;
- return dst_input(skb);
+ if (static_branch_unlikely(&nf_ct_lwtunnel_enabled))
+ return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
+ dev_net(skb->dev), NULL, skb, NULL,
+ skb_dst(skb)->dev, seg6_input_finish);
+
+ return seg6_input_finish(dev_net(skb->dev), NULL, skb);
}
-static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
+static int seg6_input(struct sk_buff *skb)
+{
+ int proto;
+
+ if (skb->protocol == htons(ETH_P_IPV6))
+ proto = NFPROTO_IPV6;
+ else if (skb->protocol == htons(ETH_P_IP))
+ proto = NFPROTO_IPV4;
+ else
+ return -EINVAL;
+
+ if (static_branch_unlikely(&nf_ct_lwtunnel_enabled))
+ return NF_HOOK(proto, NF_INET_POST_ROUTING, dev_net(skb->dev),
+ NULL, skb, NULL, skb_dst(skb)->dev,
+ seg6_input_core);
+
+ return seg6_input_core(dev_net(skb->dev), NULL, skb);
+}
+
+static int seg6_output_core(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
{
struct dst_entry *orig_dst = skb_dst(skb);
struct dst_entry *dst = NULL;
@@ -387,12 +421,34 @@ static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
if (unlikely(err))
goto drop;
+ if (static_branch_unlikely(&nf_ct_lwtunnel_enabled))
+ return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb,
+ NULL, skb_dst(skb)->dev, dst_output);
+
return dst_output(net, sk, skb);
drop:
kfree_skb(skb);
return err;
}
+static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
+{
+ int proto;
+
+ if (skb->protocol == htons(ETH_P_IPV6))
+ proto = NFPROTO_IPV6;
+ else if (skb->protocol == htons(ETH_P_IP))
+ proto = NFPROTO_IPV4;
+ else
+ return -EINVAL;
+
+ if (static_branch_unlikely(&nf_ct_lwtunnel_enabled))
+ return NF_HOOK(proto, NF_INET_POST_ROUTING, net, sk, skb, NULL,
+ skb_dst(skb)->dev, seg6_output_core);
+
+ return seg6_output_core(net, sk, skb);
+}
+
static int seg6_build_state(struct net *net, struct nlattr *nla,
unsigned int family, const void *cfg,
struct lwtunnel_state **ts,
diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c
index 60bf3b877957..d883475be043 100644
--- a/net/ipv6/seg6_local.c
+++ b/net/ipv6/seg6_local.c
@@ -30,6 +30,7 @@
#include <net/seg6_local.h>
#include <linux/etherdevice.h>
#include <linux/bpf.h>
+#include <net/netfilter/nf_conntrack.h>
#define SEG6_F_ATTR(i) BIT(i)
@@ -413,12 +414,33 @@ static int input_action_end_dx2(struct sk_buff *skb,
return -EINVAL;
}
+static int input_action_end_dx6_finish(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
+{
+ struct dst_entry *orig_dst = skb_dst(skb);
+ struct seg6_local_lwt *slwt = NULL;
+ struct in6_addr *nhaddr = NULL;
+
+ slwt = seg6_local_lwtunnel(orig_dst->lwtstate);
+
+ /* The inner packet is not associated to any local interface,
+ * so we do not call netif_rx().
+ *
+ * If slwt->nh6 is set to ::, then lookup the nexthop for the
+ * inner packet's DA. Otherwise, use the specified nexthop.
+ */
+ if (!ipv6_addr_any(&slwt->nh6))
+ nhaddr = &slwt->nh6;
+
+ seg6_lookup_nexthop(skb, nhaddr, 0);
+
+ return dst_input(skb);
+}
+
/* decapsulate and forward to specified nexthop */
static int input_action_end_dx6(struct sk_buff *skb,
struct seg6_local_lwt *slwt)
{
- struct in6_addr *nhaddr = NULL;
-
/* this function accepts IPv6 encapsulated packets, with either
* an SRH with SL=0, or no SRH.
*/
@@ -429,55 +451,64 @@ static int input_action_end_dx6(struct sk_buff *skb,
if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
goto drop;
- /* The inner packet is not associated to any local interface,
- * so we do not call netif_rx().
- *
- * If slwt->nh6 is set to ::, then lookup the nexthop for the
- * inner packet's DA. Otherwise, use the specified nexthop.
- */
-
- if (!ipv6_addr_any(&slwt->nh6))
- nhaddr = &slwt->nh6;
-
skb_set_transport_header(skb, sizeof(struct ipv6hdr));
+ nf_reset_ct(skb);
- seg6_lookup_nexthop(skb, nhaddr, 0);
+ if (static_branch_unlikely(&nf_ct_lwtunnel_enabled))
+ return NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING,
+ dev_net(skb->dev), NULL, skb, NULL,
+ skb_dst(skb)->dev, input_action_end_dx6_finish);
- return dst_input(skb);
+ return input_action_end_dx6_finish(dev_net(skb->dev), NULL, skb);
drop:
kfree_skb(skb);
return -EINVAL;
}
-static int input_action_end_dx4(struct sk_buff *skb,
- struct seg6_local_lwt *slwt)
+static int input_action_end_dx4_finish(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
{
+ struct dst_entry *orig_dst = skb_dst(skb);
+ struct seg6_local_lwt *slwt = NULL;
struct iphdr *iph;
__be32 nhaddr;
int err;
- if (!decap_and_validate(skb, IPPROTO_IPIP))
- goto drop;
-
- if (!pskb_may_pull(skb, sizeof(struct iphdr)))
- goto drop;
-
- skb->protocol = htons(ETH_P_IP);
+ slwt = seg6_local_lwtunnel(orig_dst->lwtstate);
iph = ip_hdr(skb);
-
nhaddr = slwt->nh4.s_addr ?: iph->daddr;
skb_dst_drop(skb);
- skb_set_transport_header(skb, sizeof(struct iphdr));
-
err = ip_route_input(skb, nhaddr, iph->saddr, 0, skb->dev);
- if (err)
- goto drop;
+ if (err) {
+ kfree_skb(skb);
+ return -EINVAL;
+ }
return dst_input(skb);
+}
+static int input_action_end_dx4(struct sk_buff *skb,
+ struct seg6_local_lwt *slwt)
+{
+ if (!decap_and_validate(skb, IPPROTO_IPIP))
+ goto drop;
+
+ if (!pskb_may_pull(skb, sizeof(struct iphdr)))
+ goto drop;
+
+ skb->protocol = htons(ETH_P_IP);
+ skb_set_transport_header(skb, sizeof(struct iphdr));
+ nf_reset_ct(skb);
+
+ if (static_branch_unlikely(&nf_ct_lwtunnel_enabled))
+ return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING,
+ dev_net(skb->dev), NULL, skb, NULL,
+ skb_dst(skb)->dev, input_action_end_dx4_finish);
+
+ return input_action_end_dx4_finish(dev_net(skb->dev), NULL, skb);
drop:
kfree_skb(skb);
return -EINVAL;
@@ -645,6 +676,7 @@ static struct sk_buff *end_dt_vrf_core(struct sk_buff *skb,
skb_dst_drop(skb);
skb_set_transport_header(skb, hdrlen);
+ nf_reset_ct(skb);
return end_dt_vrf_rcv(skb, family, vrf);
@@ -1078,22 +1110,18 @@ static void seg6_local_update_counters(struct seg6_local_lwt *slwt,
u64_stats_update_end(&pcounters->syncp);
}
-static int seg6_local_input(struct sk_buff *skb)
+static int seg6_local_input_core(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
{
struct dst_entry *orig_dst = skb_dst(skb);
+ struct seg6_local_lwt *slwt = NULL;
struct seg6_action_desc *desc;
- struct seg6_local_lwt *slwt;
unsigned int len = skb->len;
int rc;
- if (skb->protocol != htons(ETH_P_IPV6)) {
- kfree_skb(skb);
- return -EINVAL;
- }
-
slwt = seg6_local_lwtunnel(orig_dst->lwtstate);
- desc = slwt->desc;
+ desc = slwt->desc;
rc = desc->input(skb, slwt);
if (!seg6_lwtunnel_counters_enabled(slwt))
@@ -1104,6 +1132,21 @@ static int seg6_local_input(struct sk_buff *skb)
return rc;
}
+static int seg6_local_input(struct sk_buff *skb)
+{
+ if (skb->protocol != htons(ETH_P_IPV6)) {
+ kfree_skb(skb);
+ return -EINVAL;
+ }
+
+ if (static_branch_unlikely(&nf_ct_lwtunnel_enabled))
+ return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_IN,
+ dev_net(skb->dev), NULL, skb, skb->dev, NULL,
+ seg6_local_input_core);
+
+ return seg6_local_input_core(dev_net(skb->dev), NULL, skb);
+}
+
static const struct nla_policy seg6_local_policy[SEG6_LOCAL_MAX + 1] = {
[SEG6_LOCAL_ACTION] = { .type = NLA_U32 },
[SEG6_LOCAL_SRH] = { .type = NLA_BINARY },
--
2.25.1
^ permalink raw reply related [flat|nested] 6+ messages in thread