Netdev List
 help / color / mirror / Atom feed
* [PATCH 34/51] netfilter: ebtables: add ebt_get_target and ebt_get_target_c
From: Pablo Neira Ayuso @ 2018-05-06 22:46 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <20180506224709.29100-1-pablo@netfilter.org>

From: Taehee Yoo <ap420073@gmail.com>

ebt_get_target similar to {ip/ip6/arp}t_get_target.
and ebt_get_target_c similar to {ip/ip6/arp}t_get_target_c.

Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter_bridge/ebtables.h |  6 ++++++
 net/bridge/netfilter/ebtables.c                | 22 +++++++++++++---------
 2 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/include/uapi/linux/netfilter_bridge/ebtables.h b/include/uapi/linux/netfilter_bridge/ebtables.h
index 0c7dc8315013..3b86c14ea49d 100644
--- a/include/uapi/linux/netfilter_bridge/ebtables.h
+++ b/include/uapi/linux/netfilter_bridge/ebtables.h
@@ -191,6 +191,12 @@ struct ebt_entry {
 	unsigned char elems[0] __attribute__ ((aligned (__alignof__(struct ebt_replace))));
 };
 
+static __inline__ struct ebt_entry_target *
+ebt_get_target(struct ebt_entry *e)
+{
+	return (void *)e + e->target_offset;
+}
+
 /* {g,s}etsockopt numbers */
 #define EBT_BASE_CTL            128
 
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 7c07221369c0..9be240129448 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -177,6 +177,12 @@ struct ebt_entry *ebt_next_entry(const struct ebt_entry *entry)
 	return (void *)entry + entry->next_offset;
 }
 
+static inline const struct ebt_entry_target *
+ebt_get_target_c(const struct ebt_entry *e)
+{
+	return ebt_get_target((struct ebt_entry *)e);
+}
+
 /* Do some firewalling */
 unsigned int ebt_do_table(struct sk_buff *skb,
 			  const struct nf_hook_state *state,
@@ -230,8 +236,7 @@ unsigned int ebt_do_table(struct sk_buff *skb,
 		 */
 		EBT_WATCHER_ITERATE(point, ebt_do_watcher, skb, &acpar);
 
-		t = (struct ebt_entry_target *)
-		   (((char *)point) + point->target_offset);
+		t = ebt_get_target_c(point);
 		/* standard target */
 		if (!t->u.target->target)
 			verdict = ((struct ebt_standard_target *)t)->verdict;
@@ -637,7 +642,7 @@ ebt_cleanup_entry(struct ebt_entry *e, struct net *net, unsigned int *cnt)
 		return 1;
 	EBT_WATCHER_ITERATE(e, ebt_cleanup_watcher, net, NULL);
 	EBT_MATCH_ITERATE(e, ebt_cleanup_match, net, NULL);
-	t = (struct ebt_entry_target *)(((char *)e) + e->target_offset);
+	t = ebt_get_target(e);
 
 	par.net      = net;
 	par.target   = t->u.target;
@@ -716,7 +721,7 @@ ebt_check_entry(struct ebt_entry *e, struct net *net,
 	ret = EBT_WATCHER_ITERATE(e, ebt_check_watcher, &tgpar, &j);
 	if (ret != 0)
 		goto cleanup_watchers;
-	t = (struct ebt_entry_target *)(((char *)e) + e->target_offset);
+	t = ebt_get_target(e);
 	gap = e->next_offset - e->target_offset;
 
 	target = xt_request_find_target(NFPROTO_BRIDGE, t->u.name, 0);
@@ -789,8 +794,7 @@ static int check_chainloops(const struct ebt_entries *chain, struct ebt_cl_stack
 			if (pos == nentries)
 				continue;
 		}
-		t = (struct ebt_entry_target *)
-		   (((char *)e) + e->target_offset);
+		t = ebt_get_target_c(e);
 		if (strcmp(t->u.name, EBT_STANDARD_TARGET))
 			goto letscontinue;
 		if (e->target_offset + sizeof(struct ebt_standard_target) >
@@ -1396,7 +1400,7 @@ static inline int ebt_entry_to_user(struct ebt_entry *e, const char *base,
 		return -EFAULT;
 
 	hlp = ubase + (((char *)e + e->target_offset) - base);
-	t = (struct ebt_entry_target *)(((char *)e) + e->target_offset);
+	t = ebt_get_target_c(e);
 
 	ret = EBT_MATCH_ITERATE(e, ebt_match_to_user, base, ubase);
 	if (ret != 0)
@@ -1737,7 +1741,7 @@ static int compat_copy_entry_to_user(struct ebt_entry *e, void __user **dstptr,
 		return ret;
 	target_offset = e->target_offset - (origsize - *size);
 
-	t = (struct ebt_entry_target *) ((char *) e + e->target_offset);
+	t = ebt_get_target(e);
 
 	ret = compat_target_to_user(t, dstptr, size);
 	if (ret)
@@ -1785,7 +1789,7 @@ static int compat_calc_entry(const struct ebt_entry *e,
 	EBT_MATCH_ITERATE(e, compat_calc_match, &off);
 	EBT_WATCHER_ITERATE(e, compat_calc_watcher, &off);
 
-	t = (const struct ebt_entry_target *) ((char *) e + e->target_offset);
+	t = ebt_get_target_c(e);
 
 	off += xt_compat_target_offset(t->u.target);
 	off += ebt_compat_entry_padsize();
-- 
2.11.0

^ permalink raw reply related

* [PATCH 33/51] netfilter: x_tables: remove duplicate ip6t_get_target function call
From: Pablo Neira Ayuso @ 2018-05-06 22:46 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <20180506224709.29100-1-pablo@netfilter.org>

From: Taehee Yoo <ap420073@gmail.com>

In the check_target, ip6t_get_target is called twice.

Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv6/netfilter/ip6_tables.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 65c9e1a58305..7097bbf95843 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -528,7 +528,6 @@ static int check_target(struct ip6t_entry *e, struct net *net, const char *name)
 		.family    = NFPROTO_IPV6,
 	};
 
-	t = ip6t_get_target(e);
 	return xt_check_target(&par, t->u.target_size - sizeof(*t),
 			       e->ipv6.proto,
 			       e->ipv6.invflags & IP6T_INV_PROTO);
-- 
2.11.0

^ permalink raw reply related

* [PATCH 32/51] netfilter: ebtables: remove EBT_MATCH and EBT_NOMATCH
From: Pablo Neira Ayuso @ 2018-05-06 22:46 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <20180506224709.29100-1-pablo@netfilter.org>

From: Taehee Yoo <ap420073@gmail.com>

EBT_MATCH and EBT_NOMATCH are used to change return value.
match functions(ebt_xxx.c) return false when received frame is not matched
and returns true when received frame is matched.
but, EBT_MATCH_ITERATE understands oppositely.
so, to change return value, EBT_MATCH and EBT_NOMATCH are used.
but, we can use operation '!' simply.

Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter_bridge/ebtables.h | 4 ----
 net/bridge/netfilter/ebtables.c           | 2 +-
 2 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h
index 0773b5a032f1..c6935be7c6ca 100644
--- a/include/linux/netfilter_bridge/ebtables.h
+++ b/include/linux/netfilter_bridge/ebtables.h
@@ -17,10 +17,6 @@
 #include <linux/if_ether.h>
 #include <uapi/linux/netfilter_bridge/ebtables.h>
 
-/* return values for match() functions */
-#define EBT_MATCH 0
-#define EBT_NOMATCH 1
-
 struct ebt_match {
 	struct list_head list;
 	const char name[EBT_FUNCTION_MAXNAMELEN];
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 355410b13316..7c07221369c0 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -101,7 +101,7 @@ ebt_do_match(struct ebt_entry_match *m, const struct sk_buff *skb,
 {
 	par->match     = m->u.match;
 	par->matchinfo = m->data;
-	return m->u.match->match(skb, par) ? EBT_MATCH : EBT_NOMATCH;
+	return !m->u.match->match(skb, par);
 }
 
 static inline int
-- 
2.11.0

^ permalink raw reply related

* [PATCH 29/51] netfilter: add NAT support for shifted portmap ranges
From: Pablo Neira Ayuso @ 2018-05-06 22:46 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <20180506224709.29100-1-pablo@netfilter.org>

From: Thierry Du Tre <thierry@dtsystems.be>

This is a patch proposal to support shifted ranges in portmaps.  (i.e. tcp/udp
incoming port 5000-5100 on WAN redirected to LAN 192.168.1.5:2000-2100)

Currently DNAT only works for single port or identical port ranges.  (i.e.
ports 5000-5100 on WAN interface redirected to a LAN host while original
destination port is not altered) When different port ranges are configured,
either 'random' mode should be used, or else all incoming connections are
mapped onto the first port in the redirect range. (in described example
WAN:5000-5100 will all be mapped to 192.168.1.5:2000)

This patch introduces a new mode indicated by flag NF_NAT_RANGE_PROTO_OFFSET
which uses a base port value to calculate an offset with the destination port
present in the incoming stream. That offset is then applied as index within the
redirect port range (index modulo rangewidth to handle range overflow).

In described example the base port would be 5000. An incoming stream with
destination port 5004 would result in an offset value 4 which means that the
NAT'ed stream will be using destination port 2004.

Other possibilities include deterministic mapping of larger or multiple ranges
to a smaller range : WAN:5000-5999 -> LAN:5000-5099 (maps WAN port 5*xx to port
51xx)

This patch does not change any current behavior. It just adds new NAT proto
range functionality which must be selected via the specific flag when intended
to use.

A patch for iptables (libipt_DNAT.c + libip6t_DNAT.c) will also be proposed
which makes this functionality immediately available.

Signed-off-by: Thierry Du Tre <thierry@dtsystems.be>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/ipv4/nf_nat_masquerade.h |  2 +-
 include/net/netfilter/ipv6/nf_nat_masquerade.h |  2 +-
 include/net/netfilter/nf_nat.h                 |  2 +-
 include/net/netfilter/nf_nat_l3proto.h         |  4 +-
 include/net/netfilter/nf_nat_l4proto.h         |  8 +--
 include/net/netfilter/nf_nat_redirect.h        |  2 +-
 include/uapi/linux/netfilter/nf_nat.h          | 12 ++++-
 net/ipv4/netfilter/ipt_MASQUERADE.c            |  2 +-
 net/ipv4/netfilter/nf_nat_h323.c               |  4 +-
 net/ipv4/netfilter/nf_nat_l3proto_ipv4.c       |  4 +-
 net/ipv4/netfilter/nf_nat_masquerade_ipv4.c    |  4 +-
 net/ipv4/netfilter/nf_nat_pptp.c               |  2 +-
 net/ipv4/netfilter/nf_nat_proto_gre.c          |  2 +-
 net/ipv4/netfilter/nf_nat_proto_icmp.c         |  2 +-
 net/ipv4/netfilter/nft_masq_ipv4.c             |  2 +-
 net/ipv6/netfilter/ip6t_MASQUERADE.c           |  2 +-
 net/ipv6/netfilter/nf_nat_l3proto_ipv6.c       |  4 +-
 net/ipv6/netfilter/nf_nat_masquerade_ipv6.c    |  4 +-
 net/ipv6/netfilter/nf_nat_proto_icmpv6.c       |  2 +-
 net/ipv6/netfilter/nft_masq_ipv6.c             |  2 +-
 net/ipv6/netfilter/nft_redir_ipv6.c            |  2 +-
 net/netfilter/nf_nat_core.c                    | 27 +++++-----
 net/netfilter/nf_nat_helper.c                  |  2 +-
 net/netfilter/nf_nat_proto_common.c            |  9 ++--
 net/netfilter/nf_nat_proto_dccp.c              |  2 +-
 net/netfilter/nf_nat_proto_sctp.c              |  2 +-
 net/netfilter/nf_nat_proto_tcp.c               |  2 +-
 net/netfilter/nf_nat_proto_udp.c               |  4 +-
 net/netfilter/nf_nat_proto_unknown.c           |  2 +-
 net/netfilter/nf_nat_redirect.c                |  6 +--
 net/netfilter/nf_nat_sip.c                     |  2 +-
 net/netfilter/nft_nat.c                        |  2 +-
 net/netfilter/xt_NETMAP.c                      |  8 +--
 net/netfilter/xt_REDIRECT.c                    |  2 +-
 net/netfilter/xt_nat.c                         | 72 +++++++++++++++++++++++---
 net/openvswitch/conntrack.c                    |  4 +-
 36 files changed, 145 insertions(+), 71 deletions(-)

diff --git a/include/net/netfilter/ipv4/nf_nat_masquerade.h b/include/net/netfilter/ipv4/nf_nat_masquerade.h
index ebd869473603..cd24be4c4a99 100644
--- a/include/net/netfilter/ipv4/nf_nat_masquerade.h
+++ b/include/net/netfilter/ipv4/nf_nat_masquerade.h
@@ -6,7 +6,7 @@
 
 unsigned int
 nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
-		       const struct nf_nat_range *range,
+		       const struct nf_nat_range2 *range,
 		       const struct net_device *out);
 
 void nf_nat_masquerade_ipv4_register_notifier(void);
diff --git a/include/net/netfilter/ipv6/nf_nat_masquerade.h b/include/net/netfilter/ipv6/nf_nat_masquerade.h
index 1ed4f2631ed6..0c3b5ebf0bb8 100644
--- a/include/net/netfilter/ipv6/nf_nat_masquerade.h
+++ b/include/net/netfilter/ipv6/nf_nat_masquerade.h
@@ -3,7 +3,7 @@
 #define _NF_NAT_MASQUERADE_IPV6_H_
 
 unsigned int
-nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range *range,
+nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
 		       const struct net_device *out);
 void nf_nat_masquerade_ipv6_register_notifier(void);
 void nf_nat_masquerade_ipv6_unregister_notifier(void);
diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h
index 207a467e7ca6..da3d601cadee 100644
--- a/include/net/netfilter/nf_nat.h
+++ b/include/net/netfilter/nf_nat.h
@@ -39,7 +39,7 @@ struct nf_conn_nat {
 
 /* Set up the info structure to map into this range. */
 unsigned int nf_nat_setup_info(struct nf_conn *ct,
-			       const struct nf_nat_range *range,
+			       const struct nf_nat_range2 *range,
 			       enum nf_nat_manip_type maniptype);
 
 extern unsigned int nf_nat_alloc_null_binding(struct nf_conn *ct,
diff --git a/include/net/netfilter/nf_nat_l3proto.h b/include/net/netfilter/nf_nat_l3proto.h
index ce7c2b4e64bb..ac47098a61dc 100644
--- a/include/net/netfilter/nf_nat_l3proto.h
+++ b/include/net/netfilter/nf_nat_l3proto.h
@@ -7,7 +7,7 @@ struct nf_nat_l3proto {
 	u8	l3proto;
 
 	bool	(*in_range)(const struct nf_conntrack_tuple *t,
-			    const struct nf_nat_range *range);
+			    const struct nf_nat_range2 *range);
 
 	u32 	(*secure_port)(const struct nf_conntrack_tuple *t, __be16);
 
@@ -33,7 +33,7 @@ struct nf_nat_l3proto {
 				  struct flowi *fl);
 
 	int	(*nlattr_to_range)(struct nlattr *tb[],
-				   struct nf_nat_range *range);
+				   struct nf_nat_range2 *range);
 };
 
 int nf_nat_l3proto_register(const struct nf_nat_l3proto *);
diff --git a/include/net/netfilter/nf_nat_l4proto.h b/include/net/netfilter/nf_nat_l4proto.h
index 67835ff8a2d9..b4d6b29bca62 100644
--- a/include/net/netfilter/nf_nat_l4proto.h
+++ b/include/net/netfilter/nf_nat_l4proto.h
@@ -34,12 +34,12 @@ struct nf_nat_l4proto {
 	 */
 	void (*unique_tuple)(const struct nf_nat_l3proto *l3proto,
 			     struct nf_conntrack_tuple *tuple,
-			     const struct nf_nat_range *range,
+			     const struct nf_nat_range2 *range,
 			     enum nf_nat_manip_type maniptype,
 			     const struct nf_conn *ct);
 
 	int (*nlattr_to_range)(struct nlattr *tb[],
-			       struct nf_nat_range *range);
+			       struct nf_nat_range2 *range);
 };
 
 /* Protocol registration. */
@@ -72,11 +72,11 @@ bool nf_nat_l4proto_in_range(const struct nf_conntrack_tuple *tuple,
 
 void nf_nat_l4proto_unique_tuple(const struct nf_nat_l3proto *l3proto,
 				 struct nf_conntrack_tuple *tuple,
-				 const struct nf_nat_range *range,
+				 const struct nf_nat_range2 *range,
 				 enum nf_nat_manip_type maniptype,
 				 const struct nf_conn *ct, u16 *rover);
 
 int nf_nat_l4proto_nlattr_to_range(struct nlattr *tb[],
-				   struct nf_nat_range *range);
+				   struct nf_nat_range2 *range);
 
 #endif /*_NF_NAT_L4PROTO_H*/
diff --git a/include/net/netfilter/nf_nat_redirect.h b/include/net/netfilter/nf_nat_redirect.h
index 5ddabb08c472..c129aacc8ae8 100644
--- a/include/net/netfilter/nf_nat_redirect.h
+++ b/include/net/netfilter/nf_nat_redirect.h
@@ -7,7 +7,7 @@ nf_nat_redirect_ipv4(struct sk_buff *skb,
 		     const struct nf_nat_ipv4_multi_range_compat *mr,
 		     unsigned int hooknum);
 unsigned int
-nf_nat_redirect_ipv6(struct sk_buff *skb, const struct nf_nat_range *range,
+nf_nat_redirect_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
 		     unsigned int hooknum);
 
 #endif /* _NF_NAT_REDIRECT_H_ */
diff --git a/include/uapi/linux/netfilter/nf_nat.h b/include/uapi/linux/netfilter/nf_nat.h
index a33000da7229..4a95c0db14d4 100644
--- a/include/uapi/linux/netfilter/nf_nat.h
+++ b/include/uapi/linux/netfilter/nf_nat.h
@@ -10,6 +10,7 @@
 #define NF_NAT_RANGE_PROTO_RANDOM		(1 << 2)
 #define NF_NAT_RANGE_PERSISTENT			(1 << 3)
 #define NF_NAT_RANGE_PROTO_RANDOM_FULLY		(1 << 4)
+#define NF_NAT_RANGE_PROTO_OFFSET		(1 << 5)
 
 #define NF_NAT_RANGE_PROTO_RANDOM_ALL		\
 	(NF_NAT_RANGE_PROTO_RANDOM | NF_NAT_RANGE_PROTO_RANDOM_FULLY)
@@ -17,7 +18,7 @@
 #define NF_NAT_RANGE_MASK					\
 	(NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED |	\
 	 NF_NAT_RANGE_PROTO_RANDOM | NF_NAT_RANGE_PERSISTENT |	\
-	 NF_NAT_RANGE_PROTO_RANDOM_FULLY)
+	 NF_NAT_RANGE_PROTO_RANDOM_FULLY | NF_NAT_RANGE_PROTO_OFFSET)
 
 struct nf_nat_ipv4_range {
 	unsigned int			flags;
@@ -40,4 +41,13 @@ struct nf_nat_range {
 	union nf_conntrack_man_proto	max_proto;
 };
 
+struct nf_nat_range2 {
+	unsigned int			flags;
+	union nf_inet_addr		min_addr;
+	union nf_inet_addr		max_addr;
+	union nf_conntrack_man_proto	min_proto;
+	union nf_conntrack_man_proto	max_proto;
+	union nf_conntrack_man_proto	base_proto;
+};
+
 #endif /* _NETFILTER_NF_NAT_H */
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index a03e4e7ef5f9..ce1512b02cb2 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -47,7 +47,7 @@ static int masquerade_tg_check(const struct xt_tgchk_param *par)
 static unsigned int
 masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
-	struct nf_nat_range range;
+	struct nf_nat_range2 range;
 	const struct nf_nat_ipv4_multi_range_compat *mr;
 
 	mr = par->targinfo;
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
index ac8342dcb55e..4e6b53ab6c33 100644
--- a/net/ipv4/netfilter/nf_nat_h323.c
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -395,7 +395,7 @@ static int nat_h245(struct sk_buff *skb, struct nf_conn *ct,
 static void ip_nat_q931_expect(struct nf_conn *new,
 			       struct nf_conntrack_expect *this)
 {
-	struct nf_nat_range range;
+	struct nf_nat_range2 range;
 
 	if (this->tuple.src.u3.ip != 0) {	/* Only accept calls from GK */
 		nf_nat_follow_master(new, this);
@@ -497,7 +497,7 @@ static int nat_q931(struct sk_buff *skb, struct nf_conn *ct,
 static void ip_nat_callforwarding_expect(struct nf_conn *new,
 					 struct nf_conntrack_expect *this)
 {
-	struct nf_nat_range range;
+	struct nf_nat_range2 range;
 
 	/* This must be a fresh one. */
 	BUG_ON(new->status & IPS_NAT_DONE_MASK);
diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
index f7ff6a364d7b..4346336cee4c 100644
--- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
@@ -63,7 +63,7 @@ static void nf_nat_ipv4_decode_session(struct sk_buff *skb,
 #endif /* CONFIG_XFRM */
 
 static bool nf_nat_ipv4_in_range(const struct nf_conntrack_tuple *t,
-				 const struct nf_nat_range *range)
+				 const struct nf_nat_range2 *range)
 {
 	return ntohl(t->src.u3.ip) >= ntohl(range->min_addr.ip) &&
 	       ntohl(t->src.u3.ip) <= ntohl(range->max_addr.ip);
@@ -143,7 +143,7 @@ static void nf_nat_ipv4_csum_recalc(struct sk_buff *skb,
 
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
 static int nf_nat_ipv4_nlattr_to_range(struct nlattr *tb[],
-				       struct nf_nat_range *range)
+				       struct nf_nat_range2 *range)
 {
 	if (tb[CTA_NAT_V4_MINIP]) {
 		range->min_addr.ip = nla_get_be32(tb[CTA_NAT_V4_MINIP]);
diff --git a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
index 0c366aad89cb..f538c5001547 100644
--- a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
@@ -24,13 +24,13 @@
 
 unsigned int
 nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
-		       const struct nf_nat_range *range,
+		       const struct nf_nat_range2 *range,
 		       const struct net_device *out)
 {
 	struct nf_conn *ct;
 	struct nf_conn_nat *nat;
 	enum ip_conntrack_info ctinfo;
-	struct nf_nat_range newrange;
+	struct nf_nat_range2 newrange;
 	const struct rtable *rt;
 	__be32 newsrc, nh;
 
diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c
index 8a69363b4884..5d259a12e25f 100644
--- a/net/ipv4/netfilter/nf_nat_pptp.c
+++ b/net/ipv4/netfilter/nf_nat_pptp.c
@@ -48,7 +48,7 @@ static void pptp_nat_expected(struct nf_conn *ct,
 	struct nf_conntrack_tuple t = {};
 	const struct nf_ct_pptp_master *ct_pptp_info;
 	const struct nf_nat_pptp *nat_pptp_info;
-	struct nf_nat_range range;
+	struct nf_nat_range2 range;
 	struct nf_conn_nat *nat;
 
 	nat = nf_ct_nat_ext_add(ct);
diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c
index edf05002d674..00fda6331ce5 100644
--- a/net/ipv4/netfilter/nf_nat_proto_gre.c
+++ b/net/ipv4/netfilter/nf_nat_proto_gre.c
@@ -41,7 +41,7 @@ MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE");
 static void
 gre_unique_tuple(const struct nf_nat_l3proto *l3proto,
 		 struct nf_conntrack_tuple *tuple,
-		 const struct nf_nat_range *range,
+		 const struct nf_nat_range2 *range,
 		 enum nf_nat_manip_type maniptype,
 		 const struct nf_conn *ct)
 {
diff --git a/net/ipv4/netfilter/nf_nat_proto_icmp.c b/net/ipv4/netfilter/nf_nat_proto_icmp.c
index 7b98baa13ede..6d7cf1d79baf 100644
--- a/net/ipv4/netfilter/nf_nat_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_icmp.c
@@ -30,7 +30,7 @@ icmp_in_range(const struct nf_conntrack_tuple *tuple,
 static void
 icmp_unique_tuple(const struct nf_nat_l3proto *l3proto,
 		  struct nf_conntrack_tuple *tuple,
-		  const struct nf_nat_range *range,
+		  const struct nf_nat_range2 *range,
 		  enum nf_nat_manip_type maniptype,
 		  const struct nf_conn *ct)
 {
diff --git a/net/ipv4/netfilter/nft_masq_ipv4.c b/net/ipv4/netfilter/nft_masq_ipv4.c
index f18677277119..f1193e1e928a 100644
--- a/net/ipv4/netfilter/nft_masq_ipv4.c
+++ b/net/ipv4/netfilter/nft_masq_ipv4.c
@@ -21,7 +21,7 @@ static void nft_masq_ipv4_eval(const struct nft_expr *expr,
 			       const struct nft_pktinfo *pkt)
 {
 	struct nft_masq *priv = nft_expr_priv(expr);
-	struct nf_nat_range range;
+	struct nf_nat_range2 range;
 
 	memset(&range, 0, sizeof(range));
 	range.flags = priv->flags;
diff --git a/net/ipv6/netfilter/ip6t_MASQUERADE.c b/net/ipv6/netfilter/ip6t_MASQUERADE.c
index 92c0047e7e33..491f808e356a 100644
--- a/net/ipv6/netfilter/ip6t_MASQUERADE.c
+++ b/net/ipv6/netfilter/ip6t_MASQUERADE.c
@@ -29,7 +29,7 @@ masquerade_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 
 static int masquerade_tg6_checkentry(const struct xt_tgchk_param *par)
 {
-	const struct nf_nat_range *range = par->targinfo;
+	const struct nf_nat_range2 *range = par->targinfo;
 
 	if (range->flags & NF_NAT_RANGE_MAP_IPS)
 		return -EINVAL;
diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
index 6b7f075f811f..56d75eb5448f 100644
--- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
@@ -62,7 +62,7 @@ static void nf_nat_ipv6_decode_session(struct sk_buff *skb,
 #endif
 
 static bool nf_nat_ipv6_in_range(const struct nf_conntrack_tuple *t,
-				 const struct nf_nat_range *range)
+				 const struct nf_nat_range2 *range)
 {
 	return ipv6_addr_cmp(&t->src.u3.in6, &range->min_addr.in6) >= 0 &&
 	       ipv6_addr_cmp(&t->src.u3.in6, &range->max_addr.in6) <= 0;
@@ -151,7 +151,7 @@ static void nf_nat_ipv6_csum_recalc(struct sk_buff *skb,
 
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
 static int nf_nat_ipv6_nlattr_to_range(struct nlattr *tb[],
-				       struct nf_nat_range *range)
+				       struct nf_nat_range2 *range)
 {
 	if (tb[CTA_NAT_V6_MINIP]) {
 		nla_memcpy(&range->min_addr.ip6, tb[CTA_NAT_V6_MINIP],
diff --git a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
index 98f61fcb9108..9dfc2b90c362 100644
--- a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
@@ -26,14 +26,14 @@
 static atomic_t v6_worker_count;
 
 unsigned int
-nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range *range,
+nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
 		       const struct net_device *out)
 {
 	enum ip_conntrack_info ctinfo;
 	struct nf_conn_nat *nat;
 	struct in6_addr src;
 	struct nf_conn *ct;
-	struct nf_nat_range newrange;
+	struct nf_nat_range2 newrange;
 
 	ct = nf_ct_get(skb, &ctinfo);
 	WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
diff --git a/net/ipv6/netfilter/nf_nat_proto_icmpv6.c b/net/ipv6/netfilter/nf_nat_proto_icmpv6.c
index 57593b00c5b4..d9bf42ba44fa 100644
--- a/net/ipv6/netfilter/nf_nat_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_nat_proto_icmpv6.c
@@ -32,7 +32,7 @@ icmpv6_in_range(const struct nf_conntrack_tuple *tuple,
 static void
 icmpv6_unique_tuple(const struct nf_nat_l3proto *l3proto,
 		    struct nf_conntrack_tuple *tuple,
-		    const struct nf_nat_range *range,
+		    const struct nf_nat_range2 *range,
 		    enum nf_nat_manip_type maniptype,
 		    const struct nf_conn *ct)
 {
diff --git a/net/ipv6/netfilter/nft_masq_ipv6.c b/net/ipv6/netfilter/nft_masq_ipv6.c
index 4146536e9c15..dd0122f3cffe 100644
--- a/net/ipv6/netfilter/nft_masq_ipv6.c
+++ b/net/ipv6/netfilter/nft_masq_ipv6.c
@@ -22,7 +22,7 @@ static void nft_masq_ipv6_eval(const struct nft_expr *expr,
 			       const struct nft_pktinfo *pkt)
 {
 	struct nft_masq *priv = nft_expr_priv(expr);
-	struct nf_nat_range range;
+	struct nf_nat_range2 range;
 
 	memset(&range, 0, sizeof(range));
 	range.flags = priv->flags;
diff --git a/net/ipv6/netfilter/nft_redir_ipv6.c b/net/ipv6/netfilter/nft_redir_ipv6.c
index a27e424f690d..74269865acc8 100644
--- a/net/ipv6/netfilter/nft_redir_ipv6.c
+++ b/net/ipv6/netfilter/nft_redir_ipv6.c
@@ -22,7 +22,7 @@ static void nft_redir_ipv6_eval(const struct nft_expr *expr,
 				const struct nft_pktinfo *pkt)
 {
 	struct nft_redir *priv = nft_expr_priv(expr);
-	struct nf_nat_range range;
+	struct nf_nat_range2 range;
 
 	memset(&range, 0, sizeof(range));
 	if (priv->sreg_proto_min) {
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 617693ff9f4c..37b3c9913b08 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -157,7 +157,7 @@ EXPORT_SYMBOL(nf_nat_used_tuple);
 static int in_range(const struct nf_nat_l3proto *l3proto,
 		    const struct nf_nat_l4proto *l4proto,
 		    const struct nf_conntrack_tuple *tuple,
-		    const struct nf_nat_range *range)
+		    const struct nf_nat_range2 *range)
 {
 	/* If we are supposed to map IPs, then we must be in the
 	 * range specified, otherwise let this drag us onto a new src IP.
@@ -194,7 +194,7 @@ find_appropriate_src(struct net *net,
 		     const struct nf_nat_l4proto *l4proto,
 		     const struct nf_conntrack_tuple *tuple,
 		     struct nf_conntrack_tuple *result,
-		     const struct nf_nat_range *range)
+		     const struct nf_nat_range2 *range)
 {
 	unsigned int h = hash_by_src(net, tuple);
 	const struct nf_conn *ct;
@@ -224,7 +224,7 @@ find_appropriate_src(struct net *net,
 static void
 find_best_ips_proto(const struct nf_conntrack_zone *zone,
 		    struct nf_conntrack_tuple *tuple,
-		    const struct nf_nat_range *range,
+		    const struct nf_nat_range2 *range,
 		    const struct nf_conn *ct,
 		    enum nf_nat_manip_type maniptype)
 {
@@ -298,7 +298,7 @@ find_best_ips_proto(const struct nf_conntrack_zone *zone,
 static void
 get_unique_tuple(struct nf_conntrack_tuple *tuple,
 		 const struct nf_conntrack_tuple *orig_tuple,
-		 const struct nf_nat_range *range,
+		 const struct nf_nat_range2 *range,
 		 struct nf_conn *ct,
 		 enum nf_nat_manip_type maniptype)
 {
@@ -349,9 +349,10 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
 	/* Only bother mapping if it's not already in range and unique */
 	if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) {
 		if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) {
-			if (l4proto->in_range(tuple, maniptype,
-					      &range->min_proto,
-					      &range->max_proto) &&
+			if (!(range->flags & NF_NAT_RANGE_PROTO_OFFSET) &&
+			    l4proto->in_range(tuple, maniptype,
+			          &range->min_proto,
+			          &range->max_proto) &&
 			    (range->min_proto.all == range->max_proto.all ||
 			     !nf_nat_used_tuple(tuple, ct)))
 				goto out;
@@ -360,7 +361,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
 		}
 	}
 
-	/* Last change: get protocol to try to obtain unique tuple. */
+	/* Last chance: get protocol to try to obtain unique tuple. */
 	l4proto->unique_tuple(l3proto, tuple, range, maniptype, ct);
 out:
 	rcu_read_unlock();
@@ -381,7 +382,7 @@ EXPORT_SYMBOL_GPL(nf_ct_nat_ext_add);
 
 unsigned int
 nf_nat_setup_info(struct nf_conn *ct,
-		  const struct nf_nat_range *range,
+		  const struct nf_nat_range2 *range,
 		  enum nf_nat_manip_type maniptype)
 {
 	struct net *net = nf_ct_net(ct);
@@ -459,7 +460,7 @@ __nf_nat_alloc_null_binding(struct nf_conn *ct, enum nf_nat_manip_type manip)
 		(manip == NF_NAT_MANIP_SRC ?
 		ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3 :
 		ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3);
-	struct nf_nat_range range = {
+	struct nf_nat_range2 range = {
 		.flags		= NF_NAT_RANGE_MAP_IPS,
 		.min_addr	= ip,
 		.max_addr	= ip,
@@ -702,7 +703,7 @@ static const struct nla_policy protonat_nla_policy[CTA_PROTONAT_MAX+1] = {
 
 static int nfnetlink_parse_nat_proto(struct nlattr *attr,
 				     const struct nf_conn *ct,
-				     struct nf_nat_range *range)
+				     struct nf_nat_range2 *range)
 {
 	struct nlattr *tb[CTA_PROTONAT_MAX+1];
 	const struct nf_nat_l4proto *l4proto;
@@ -730,7 +731,7 @@ static const struct nla_policy nat_nla_policy[CTA_NAT_MAX+1] = {
 
 static int
 nfnetlink_parse_nat(const struct nlattr *nat,
-		    const struct nf_conn *ct, struct nf_nat_range *range,
+		    const struct nf_conn *ct, struct nf_nat_range2 *range,
 		    const struct nf_nat_l3proto *l3proto)
 {
 	struct nlattr *tb[CTA_NAT_MAX+1];
@@ -758,7 +759,7 @@ nfnetlink_parse_nat_setup(struct nf_conn *ct,
 			  enum nf_nat_manip_type manip,
 			  const struct nlattr *attr)
 {
-	struct nf_nat_range range;
+	struct nf_nat_range2 range;
 	const struct nf_nat_l3proto *l3proto;
 	int err;
 
diff --git a/net/netfilter/nf_nat_helper.c b/net/netfilter/nf_nat_helper.c
index 607a373379b4..99606baedda4 100644
--- a/net/netfilter/nf_nat_helper.c
+++ b/net/netfilter/nf_nat_helper.c
@@ -191,7 +191,7 @@ EXPORT_SYMBOL(nf_nat_mangle_udp_packet);
 void nf_nat_follow_master(struct nf_conn *ct,
 			  struct nf_conntrack_expect *exp)
 {
-	struct nf_nat_range range;
+	struct nf_nat_range2 range;
 
 	/* This must be a fresh one. */
 	BUG_ON(ct->status & IPS_NAT_DONE_MASK);
diff --git a/net/netfilter/nf_nat_proto_common.c b/net/netfilter/nf_nat_proto_common.c
index 7d7466dbf663..5d849d835561 100644
--- a/net/netfilter/nf_nat_proto_common.c
+++ b/net/netfilter/nf_nat_proto_common.c
@@ -36,7 +36,7 @@ EXPORT_SYMBOL_GPL(nf_nat_l4proto_in_range);
 
 void nf_nat_l4proto_unique_tuple(const struct nf_nat_l3proto *l3proto,
 				 struct nf_conntrack_tuple *tuple,
-				 const struct nf_nat_range *range,
+				 const struct nf_nat_range2 *range,
 				 enum nf_nat_manip_type maniptype,
 				 const struct nf_conn *ct,
 				 u16 *rover)
@@ -83,6 +83,8 @@ void nf_nat_l4proto_unique_tuple(const struct nf_nat_l3proto *l3proto,
 						  : tuple->src.u.all);
 	} else if (range->flags & NF_NAT_RANGE_PROTO_RANDOM_FULLY) {
 		off = prandom_u32();
+	} else if (range->flags & NF_NAT_RANGE_PROTO_OFFSET) {
+		off = (ntohs(*portptr) - ntohs(range->base_proto.all));
 	} else {
 		off = *rover;
 	}
@@ -91,7 +93,8 @@ void nf_nat_l4proto_unique_tuple(const struct nf_nat_l3proto *l3proto,
 		*portptr = htons(min + off % range_size);
 		if (++i != range_size && nf_nat_used_tuple(tuple, ct))
 			continue;
-		if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL))
+		if (!(range->flags & (NF_NAT_RANGE_PROTO_RANDOM_ALL|
+					NF_NAT_RANGE_PROTO_OFFSET)))
 			*rover = off;
 		return;
 	}
@@ -100,7 +103,7 @@ EXPORT_SYMBOL_GPL(nf_nat_l4proto_unique_tuple);
 
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
 int nf_nat_l4proto_nlattr_to_range(struct nlattr *tb[],
-				   struct nf_nat_range *range)
+				   struct nf_nat_range2 *range)
 {
 	if (tb[CTA_PROTONAT_PORT_MIN]) {
 		range->min_proto.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MIN]);
diff --git a/net/netfilter/nf_nat_proto_dccp.c b/net/netfilter/nf_nat_proto_dccp.c
index 269fcd5dc34c..67ea0d83aa5a 100644
--- a/net/netfilter/nf_nat_proto_dccp.c
+++ b/net/netfilter/nf_nat_proto_dccp.c
@@ -23,7 +23,7 @@ static u_int16_t dccp_port_rover;
 static void
 dccp_unique_tuple(const struct nf_nat_l3proto *l3proto,
 		  struct nf_conntrack_tuple *tuple,
-		  const struct nf_nat_range *range,
+		  const struct nf_nat_range2 *range,
 		  enum nf_nat_manip_type maniptype,
 		  const struct nf_conn *ct)
 {
diff --git a/net/netfilter/nf_nat_proto_sctp.c b/net/netfilter/nf_nat_proto_sctp.c
index c57ee3240b1d..1c5d9b65fbba 100644
--- a/net/netfilter/nf_nat_proto_sctp.c
+++ b/net/netfilter/nf_nat_proto_sctp.c
@@ -17,7 +17,7 @@ static u_int16_t nf_sctp_port_rover;
 static void
 sctp_unique_tuple(const struct nf_nat_l3proto *l3proto,
 		  struct nf_conntrack_tuple *tuple,
-		  const struct nf_nat_range *range,
+		  const struct nf_nat_range2 *range,
 		  enum nf_nat_manip_type maniptype,
 		  const struct nf_conn *ct)
 {
diff --git a/net/netfilter/nf_nat_proto_tcp.c b/net/netfilter/nf_nat_proto_tcp.c
index 4f8820fc5148..f15fcd475f98 100644
--- a/net/netfilter/nf_nat_proto_tcp.c
+++ b/net/netfilter/nf_nat_proto_tcp.c
@@ -23,7 +23,7 @@ static u16 tcp_port_rover;
 static void
 tcp_unique_tuple(const struct nf_nat_l3proto *l3proto,
 		 struct nf_conntrack_tuple *tuple,
-		 const struct nf_nat_range *range,
+		 const struct nf_nat_range2 *range,
 		 enum nf_nat_manip_type maniptype,
 		 const struct nf_conn *ct)
 {
diff --git a/net/netfilter/nf_nat_proto_udp.c b/net/netfilter/nf_nat_proto_udp.c
index edd4a77dc09a..5790f70a83b2 100644
--- a/net/netfilter/nf_nat_proto_udp.c
+++ b/net/netfilter/nf_nat_proto_udp.c
@@ -22,7 +22,7 @@ static u16 udp_port_rover;
 static void
 udp_unique_tuple(const struct nf_nat_l3proto *l3proto,
 		 struct nf_conntrack_tuple *tuple,
-		 const struct nf_nat_range *range,
+		 const struct nf_nat_range2 *range,
 		 enum nf_nat_manip_type maniptype,
 		 const struct nf_conn *ct)
 {
@@ -100,7 +100,7 @@ static bool udplite_manip_pkt(struct sk_buff *skb,
 static void
 udplite_unique_tuple(const struct nf_nat_l3proto *l3proto,
 		     struct nf_conntrack_tuple *tuple,
-		     const struct nf_nat_range *range,
+		     const struct nf_nat_range2 *range,
 		     enum nf_nat_manip_type maniptype,
 		     const struct nf_conn *ct)
 {
diff --git a/net/netfilter/nf_nat_proto_unknown.c b/net/netfilter/nf_nat_proto_unknown.c
index 6e494d584412..c5db3e251232 100644
--- a/net/netfilter/nf_nat_proto_unknown.c
+++ b/net/netfilter/nf_nat_proto_unknown.c
@@ -27,7 +27,7 @@ static bool unknown_in_range(const struct nf_conntrack_tuple *tuple,
 
 static void unknown_unique_tuple(const struct nf_nat_l3proto *l3proto,
 				 struct nf_conntrack_tuple *tuple,
-				 const struct nf_nat_range *range,
+				 const struct nf_nat_range2 *range,
 				 enum nf_nat_manip_type maniptype,
 				 const struct nf_conn *ct)
 {
diff --git a/net/netfilter/nf_nat_redirect.c b/net/netfilter/nf_nat_redirect.c
index 25b06b959118..7c4bb0a773ca 100644
--- a/net/netfilter/nf_nat_redirect.c
+++ b/net/netfilter/nf_nat_redirect.c
@@ -36,7 +36,7 @@ nf_nat_redirect_ipv4(struct sk_buff *skb,
 	struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
 	__be32 newdst;
-	struct nf_nat_range newrange;
+	struct nf_nat_range2 newrange;
 
 	WARN_ON(hooknum != NF_INET_PRE_ROUTING &&
 		hooknum != NF_INET_LOCAL_OUT);
@@ -82,10 +82,10 @@ EXPORT_SYMBOL_GPL(nf_nat_redirect_ipv4);
 static const struct in6_addr loopback_addr = IN6ADDR_LOOPBACK_INIT;
 
 unsigned int
-nf_nat_redirect_ipv6(struct sk_buff *skb, const struct nf_nat_range *range,
+nf_nat_redirect_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
 		     unsigned int hooknum)
 {
-	struct nf_nat_range newrange;
+	struct nf_nat_range2 newrange;
 	struct in6_addr newdst;
 	enum ip_conntrack_info ctinfo;
 	struct nf_conn *ct;
diff --git a/net/netfilter/nf_nat_sip.c b/net/netfilter/nf_nat_sip.c
index 791fac4fd745..1f3086074981 100644
--- a/net/netfilter/nf_nat_sip.c
+++ b/net/netfilter/nf_nat_sip.c
@@ -316,7 +316,7 @@ static void nf_nat_sip_seq_adjust(struct sk_buff *skb, unsigned int protoff,
 static void nf_nat_sip_expected(struct nf_conn *ct,
 				struct nf_conntrack_expect *exp)
 {
-	struct nf_nat_range range;
+	struct nf_nat_range2 range;
 
 	/* This must be a fresh one. */
 	BUG_ON(ct->status & IPS_NAT_DONE_MASK);
diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
index 1f36954c2ba9..c15807d10b91 100644
--- a/net/netfilter/nft_nat.c
+++ b/net/netfilter/nft_nat.c
@@ -43,7 +43,7 @@ static void nft_nat_eval(const struct nft_expr *expr,
 	const struct nft_nat *priv = nft_expr_priv(expr);
 	enum ip_conntrack_info ctinfo;
 	struct nf_conn *ct = nf_ct_get(pkt->skb, &ctinfo);
-	struct nf_nat_range range;
+	struct nf_nat_range2 range;
 
 	memset(&range, 0, sizeof(range));
 	if (priv->sreg_addr_min) {
diff --git a/net/netfilter/xt_NETMAP.c b/net/netfilter/xt_NETMAP.c
index 58aa9dd3c5b7..1d437875e15a 100644
--- a/net/netfilter/xt_NETMAP.c
+++ b/net/netfilter/xt_NETMAP.c
@@ -21,8 +21,8 @@
 static unsigned int
 netmap_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 {
-	const struct nf_nat_range *range = par->targinfo;
-	struct nf_nat_range newrange;
+	const struct nf_nat_range2 *range = par->targinfo;
+	struct nf_nat_range2 newrange;
 	struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
 	union nf_inet_addr new_addr, netmask;
@@ -56,7 +56,7 @@ netmap_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 
 static int netmap_tg6_checkentry(const struct xt_tgchk_param *par)
 {
-	const struct nf_nat_range *range = par->targinfo;
+	const struct nf_nat_range2 *range = par->targinfo;
 
 	if (!(range->flags & NF_NAT_RANGE_MAP_IPS))
 		return -EINVAL;
@@ -75,7 +75,7 @@ netmap_tg4(struct sk_buff *skb, const struct xt_action_param *par)
 	enum ip_conntrack_info ctinfo;
 	__be32 new_ip, netmask;
 	const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
-	struct nf_nat_range newrange;
+	struct nf_nat_range2 newrange;
 
 	WARN_ON(xt_hooknum(par) != NF_INET_PRE_ROUTING &&
 		xt_hooknum(par) != NF_INET_POST_ROUTING &&
diff --git a/net/netfilter/xt_REDIRECT.c b/net/netfilter/xt_REDIRECT.c
index 98a4c6d4f1cb..5ce9461e979c 100644
--- a/net/netfilter/xt_REDIRECT.c
+++ b/net/netfilter/xt_REDIRECT.c
@@ -36,7 +36,7 @@ redirect_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 
 static int redirect_tg6_checkentry(const struct xt_tgchk_param *par)
 {
-	const struct nf_nat_range *range = par->targinfo;
+	const struct nf_nat_range2 *range = par->targinfo;
 
 	if (range->flags & NF_NAT_RANGE_MAP_IPS)
 		return -EINVAL;
diff --git a/net/netfilter/xt_nat.c b/net/netfilter/xt_nat.c
index bdb689cdc829..8af9707f8789 100644
--- a/net/netfilter/xt_nat.c
+++ b/net/netfilter/xt_nat.c
@@ -37,11 +37,12 @@ static void xt_nat_destroy(const struct xt_tgdtor_param *par)
 	nf_ct_netns_put(par->net, par->family);
 }
 
-static void xt_nat_convert_range(struct nf_nat_range *dst,
+static void xt_nat_convert_range(struct nf_nat_range2 *dst,
 				 const struct nf_nat_ipv4_range *src)
 {
 	memset(&dst->min_addr, 0, sizeof(dst->min_addr));
 	memset(&dst->max_addr, 0, sizeof(dst->max_addr));
+	memset(&dst->base_proto, 0, sizeof(dst->base_proto));
 
 	dst->flags	 = src->flags;
 	dst->min_addr.ip = src->min_ip;
@@ -54,7 +55,7 @@ static unsigned int
 xt_snat_target_v0(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
-	struct nf_nat_range range;
+	struct nf_nat_range2 range;
 	enum ip_conntrack_info ctinfo;
 	struct nf_conn *ct;
 
@@ -71,7 +72,7 @@ static unsigned int
 xt_dnat_target_v0(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
-	struct nf_nat_range range;
+	struct nf_nat_range2 range;
 	enum ip_conntrack_info ctinfo;
 	struct nf_conn *ct;
 
@@ -86,7 +87,8 @@ xt_dnat_target_v0(struct sk_buff *skb, const struct xt_action_param *par)
 static unsigned int
 xt_snat_target_v1(struct sk_buff *skb, const struct xt_action_param *par)
 {
-	const struct nf_nat_range *range = par->targinfo;
+	const struct nf_nat_range *range_v1 = par->targinfo;
+	struct nf_nat_range2 range;
 	enum ip_conntrack_info ctinfo;
 	struct nf_conn *ct;
 
@@ -95,13 +97,49 @@ xt_snat_target_v1(struct sk_buff *skb, const struct xt_action_param *par)
 		 (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
 		  ctinfo == IP_CT_RELATED_REPLY)));
 
-	return nf_nat_setup_info(ct, range, NF_NAT_MANIP_SRC);
+	memcpy(&range, range_v1, sizeof(*range_v1));
+	memset(&range.base_proto, 0, sizeof(range.base_proto));
+
+	return nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC);
 }
 
 static unsigned int
 xt_dnat_target_v1(struct sk_buff *skb, const struct xt_action_param *par)
 {
-	const struct nf_nat_range *range = par->targinfo;
+	const struct nf_nat_range *range_v1 = par->targinfo;
+	struct nf_nat_range2 range;
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	WARN_ON(!(ct != NULL &&
+		 (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)));
+
+	memcpy(&range, range_v1, sizeof(*range_v1));
+	memset(&range.base_proto, 0, sizeof(range.base_proto));
+
+	return nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST);
+}
+
+static unsigned int
+xt_snat_target_v2(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	const struct nf_nat_range2 *range = par->targinfo;
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	WARN_ON(!(ct != NULL &&
+		 (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
+		  ctinfo == IP_CT_RELATED_REPLY)));
+
+	return nf_nat_setup_info(ct, range, NF_NAT_MANIP_SRC);
+}
+
+static unsigned int
+xt_dnat_target_v2(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	const struct nf_nat_range2 *range = par->targinfo;
 	enum ip_conntrack_info ctinfo;
 	struct nf_conn *ct;
 
@@ -163,6 +201,28 @@ static struct xt_target xt_nat_target_reg[] __read_mostly = {
 				  (1 << NF_INET_LOCAL_OUT),
 		.me		= THIS_MODULE,
 	},
+	{
+		.name		= "SNAT",
+		.revision	= 2,
+		.checkentry	= xt_nat_checkentry,
+		.destroy	= xt_nat_destroy,
+		.target		= xt_snat_target_v2,
+		.targetsize	= sizeof(struct nf_nat_range2),
+		.table		= "nat",
+		.hooks		= (1 << NF_INET_POST_ROUTING) |
+				  (1 << NF_INET_LOCAL_IN),
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "DNAT",
+		.revision	= 2,
+		.target		= xt_dnat_target_v2,
+		.targetsize	= sizeof(struct nf_nat_range2),
+		.table		= "nat",
+		.hooks		= (1 << NF_INET_PRE_ROUTING) |
+				  (1 << NF_INET_LOCAL_OUT),
+		.me		= THIS_MODULE,
+	},
 };
 
 static int __init xt_nat_init(void)
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index c5904f629091..02fc343feb66 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -72,7 +72,7 @@ struct ovs_conntrack_info {
 	struct md_mark mark;
 	struct md_labels labels;
 #ifdef CONFIG_NF_NAT_NEEDED
-	struct nf_nat_range range;  /* Only present for SRC NAT and DST NAT. */
+	struct nf_nat_range2 range;  /* Only present for SRC NAT and DST NAT. */
 #endif
 };
 
@@ -710,7 +710,7 @@ static bool skb_nfct_cached(struct net *net,
  */
 static int ovs_ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct,
 			      enum ip_conntrack_info ctinfo,
-			      const struct nf_nat_range *range,
+			      const struct nf_nat_range2 *range,
 			      enum nf_nat_manip_type maniptype)
 {
 	int hooknum, nh_off, err = NF_ACCEPT;
-- 
2.11.0

^ permalink raw reply related

* [PATCH 30/51] netfilter: add __exit mark to helper modules
From: Pablo Neira Ayuso @ 2018-05-06 22:46 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <20180506224709.29100-1-pablo@netfilter.org>

From: Taehee Yoo <ap420073@gmail.com>

There are no __exit mark in the helper modules.
because these exit functions used to be called by init function
but now that is not. so we can add __exit mark.

Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conntrack_ftp.c  | 3 +--
 net/netfilter/nf_conntrack_irc.c  | 6 +-----
 net/netfilter/nf_conntrack_sane.c | 3 +--
 net/netfilter/nf_conntrack_sip.c  | 2 +-
 net/netfilter/nf_conntrack_tftp.c | 2 +-
 5 files changed, 5 insertions(+), 11 deletions(-)

diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index f0e9a7511e1a..a11c304fb771 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -566,8 +566,7 @@ static const struct nf_conntrack_expect_policy ftp_exp_policy = {
 	.timeout	= 5 * 60,
 };
 
-/* don't make this __exit, since it's called from __init ! */
-static void nf_conntrack_ftp_fini(void)
+static void __exit nf_conntrack_ftp_fini(void)
 {
 	nf_conntrack_helpers_unregister(ftp, ports_c * 2);
 	kfree(ftp_buffer);
diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c
index 5523acce9d69..4099f4d79bae 100644
--- a/net/netfilter/nf_conntrack_irc.c
+++ b/net/netfilter/nf_conntrack_irc.c
@@ -232,8 +232,6 @@ static int help(struct sk_buff *skb, unsigned int protoff,
 static struct nf_conntrack_helper irc[MAX_PORTS] __read_mostly;
 static struct nf_conntrack_expect_policy irc_exp_policy;
 
-static void nf_conntrack_irc_fini(void);
-
 static int __init nf_conntrack_irc_init(void)
 {
 	int i, ret;
@@ -276,9 +274,7 @@ static int __init nf_conntrack_irc_init(void)
 	return 0;
 }
 
-/* This function is intentionally _NOT_ defined as __exit, because
- * it is needed by the init function */
-static void nf_conntrack_irc_fini(void)
+static void __exit nf_conntrack_irc_fini(void)
 {
 	nf_conntrack_helpers_unregister(irc, ports_c);
 	kfree(irc_buffer);
diff --git a/net/netfilter/nf_conntrack_sane.c b/net/netfilter/nf_conntrack_sane.c
index ae457f39d5ce..5072ff96ab33 100644
--- a/net/netfilter/nf_conntrack_sane.c
+++ b/net/netfilter/nf_conntrack_sane.c
@@ -173,8 +173,7 @@ static const struct nf_conntrack_expect_policy sane_exp_policy = {
 	.timeout	= 5 * 60,
 };
 
-/* don't make this __exit, since it's called from __init ! */
-static void nf_conntrack_sane_fini(void)
+static void __exit nf_conntrack_sane_fini(void)
 {
 	nf_conntrack_helpers_unregister(sane, ports_c * 2);
 	kfree(sane_buffer);
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 4dbb5bad4363..148ce1a52cc7 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -1609,7 +1609,7 @@ static const struct nf_conntrack_expect_policy sip_exp_policy[SIP_EXPECT_MAX + 1
 	},
 };
 
-static void nf_conntrack_sip_fini(void)
+static void __exit nf_conntrack_sip_fini(void)
 {
 	nf_conntrack_helpers_unregister(sip, ports_c * 4);
 }
diff --git a/net/netfilter/nf_conntrack_tftp.c b/net/netfilter/nf_conntrack_tftp.c
index 0ec6779fd5d9..548b673b3625 100644
--- a/net/netfilter/nf_conntrack_tftp.c
+++ b/net/netfilter/nf_conntrack_tftp.c
@@ -104,7 +104,7 @@ static const struct nf_conntrack_expect_policy tftp_exp_policy = {
 	.timeout	= 5 * 60,
 };
 
-static void nf_conntrack_tftp_fini(void)
+static void __exit nf_conntrack_tftp_fini(void)
 {
 	nf_conntrack_helpers_unregister(tftp, ports_c * 2);
 }
-- 
2.11.0

^ permalink raw reply related

* [PATCH 31/51] netfilter: ebtables: add ebt_free_table_info function
From: Pablo Neira Ayuso @ 2018-05-06 22:46 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <20180506224709.29100-1-pablo@netfilter.org>

From: Taehee Yoo <ap420073@gmail.com>

A ebt_free_table_info frees all of chainstacks.
It similar to xt_free_table_info. this inline function
reduces code line.

Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/bridge/netfilter/ebtables.c | 39 +++++++++++++++------------------------
 1 file changed, 15 insertions(+), 24 deletions(-)

diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 032e0fe45940..355410b13316 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -343,6 +343,16 @@ find_table_lock(struct net *net, const char *name, int *error,
 				"ebtable_", error, mutex);
 }
 
+static inline void ebt_free_table_info(struct ebt_table_info *info)
+{
+	int i;
+
+	if (info->chainstack) {
+		for_each_possible_cpu(i)
+			vfree(info->chainstack[i]);
+		vfree(info->chainstack);
+	}
+}
 static inline int
 ebt_check_match(struct ebt_entry_match *m, struct xt_mtchk_param *par,
 		unsigned int *cnt)
@@ -975,7 +985,7 @@ static void get_counters(const struct ebt_counter *oldcounters,
 static int do_replace_finish(struct net *net, struct ebt_replace *repl,
 			      struct ebt_table_info *newinfo)
 {
-	int ret, i;
+	int ret;
 	struct ebt_counter *counterstmp = NULL;
 	/* used to be able to unlock earlier */
 	struct ebt_table_info *table;
@@ -1051,13 +1061,8 @@ static int do_replace_finish(struct net *net, struct ebt_replace *repl,
 			  ebt_cleanup_entry, net, NULL);
 
 	vfree(table->entries);
-	if (table->chainstack) {
-		for_each_possible_cpu(i)
-			vfree(table->chainstack[i]);
-		vfree(table->chainstack);
-	}
+	ebt_free_table_info(table);
 	vfree(table);
-
 	vfree(counterstmp);
 
 #ifdef CONFIG_AUDIT
@@ -1078,11 +1083,7 @@ static int do_replace_finish(struct net *net, struct ebt_replace *repl,
 free_counterstmp:
 	vfree(counterstmp);
 	/* can be initialized in translate_table() */
-	if (newinfo->chainstack) {
-		for_each_possible_cpu(i)
-			vfree(newinfo->chainstack[i]);
-		vfree(newinfo->chainstack);
-	}
+	ebt_free_table_info(newinfo);
 	return ret;
 }
 
@@ -1147,8 +1148,6 @@ static int do_replace(struct net *net, const void __user *user,
 
 static void __ebt_unregister_table(struct net *net, struct ebt_table *table)
 {
-	int i;
-
 	mutex_lock(&ebt_mutex);
 	list_del(&table->list);
 	mutex_unlock(&ebt_mutex);
@@ -1157,11 +1156,7 @@ static void __ebt_unregister_table(struct net *net, struct ebt_table *table)
 	if (table->private->nentries)
 		module_put(table->me);
 	vfree(table->private->entries);
-	if (table->private->chainstack) {
-		for_each_possible_cpu(i)
-			vfree(table->private->chainstack[i]);
-		vfree(table->private->chainstack);
-	}
+	ebt_free_table_info(table->private);
 	vfree(table->private);
 	kfree(table);
 }
@@ -1263,11 +1258,7 @@ int ebt_register_table(struct net *net, const struct ebt_table *input_table,
 free_unlock:
 	mutex_unlock(&ebt_mutex);
 free_chainstack:
-	if (newinfo->chainstack) {
-		for_each_possible_cpu(i)
-			vfree(newinfo->chainstack[i]);
-		vfree(newinfo->chainstack);
-	}
+	ebt_free_table_info(newinfo);
 	vfree(newinfo->entries);
 free_newinfo:
 	vfree(newinfo);
-- 
2.11.0

^ permalink raw reply related

* [PATCH 28/51] netfilter: nf_tables: Simplify set backend selection
From: Pablo Neira Ayuso @ 2018-05-06 22:46 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <20180506224709.29100-1-pablo@netfilter.org>

From: Phil Sutter <phil@nwl.cc>

Drop nft_set_type's ability to act as a container of multiple backend
implementations it chooses from. Instead consolidate the whole selection
logic in nft_select_set_ops() and the actual backend provided estimate()
callback.

This turns nf_tables_set_types into a list containing all available
backends which is traversed when selecting one matching userspace
requested criteria.

Also, this change allows to embed nft_set_ops structure into
nft_set_type and pull flags field into the latter as it's only used
during selection phase.

A crucial part of this change is to make sure the new layout respects
hash backend constraints formerly enforced by nft_hash_select_ops()
function: This is achieved by introduction of a specific estimate()
callback for nft_hash_fast_ops which returns false for key lengths != 4.
In turn, nft_hash_estimate() is changed to return false for key lengths
== 4 so it won't be chosen by accident. Also, both callbacks must return
false for unbounded sets as their size estimate depends on a known
maximum element count.

Note that this patch partially reverts commit 4f2921ca21b71 ("netfilter:
nf_tables: meter: pick a set backend that supports updates") by making
nft_set_ops_candidate() not explicitly look for an update callback but
make NFT_SET_EVAL a regular backend feature flag which is checked along
with the others. This way all feature requirements are checked in one
go.

Signed-off-by: Phil Sutter <phil@nwl.cc>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h |  34 ++++-----
 net/netfilter/nf_tables_api.c     |  25 +++----
 net/netfilter/nft_set_bitmap.c    |  34 ++++-----
 net/netfilter/nft_set_hash.c      | 153 +++++++++++++++++++++-----------------
 net/netfilter/nft_set_rbtree.c    |  36 ++++-----
 5 files changed, 139 insertions(+), 143 deletions(-)

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 123e82a2f8bb..de77d36e36b3 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -275,23 +275,6 @@ struct nft_set_estimate {
 	enum nft_set_class	space;
 };
 
-/**
- *      struct nft_set_type - nf_tables set type
- *
- *      @select_ops: function to select nft_set_ops
- *      @ops: default ops, used when no select_ops functions is present
- *      @list: used internally
- *      @owner: module reference
- */
-struct nft_set_type {
-	const struct nft_set_ops	*(*select_ops)(const struct nft_ctx *,
-						       const struct nft_set_desc *desc,
-						       u32 flags);
-	const struct nft_set_ops	*ops;
-	struct list_head		list;
-	struct module			*owner;
-};
-
 struct nft_set_ext;
 struct nft_expr;
 
@@ -310,7 +293,6 @@ struct nft_expr;
  *	@init: initialize private data of new set instance
  *	@destroy: destroy private data of set instance
  *	@elemsize: element private size
- *	@features: features supported by the implementation
  */
 struct nft_set_ops {
 	bool				(*lookup)(const struct net *net,
@@ -361,9 +343,23 @@ struct nft_set_ops {
 	void				(*destroy)(const struct nft_set *set);
 
 	unsigned int			elemsize;
+};
+
+/**
+ *      struct nft_set_type - nf_tables set type
+ *
+ *      @ops: set ops for this type
+ *      @list: used internally
+ *      @owner: module reference
+ *      @features: features supported by the implementation
+ */
+struct nft_set_type {
+	const struct nft_set_ops	ops;
+	struct list_head		list;
+	struct module			*owner;
 	u32				features;
-	const struct nft_set_type	*type;
 };
+#define to_set_type(o) container_of(o, struct nft_set_type, ops)
 
 int nft_register_set(struct nft_set_type *type);
 void nft_unregister_set(struct nft_set_type *type);
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 2f14cadd9922..9ce35acf491d 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2523,14 +2523,12 @@ void nft_unregister_set(struct nft_set_type *type)
 EXPORT_SYMBOL_GPL(nft_unregister_set);
 
 #define NFT_SET_FEATURES	(NFT_SET_INTERVAL | NFT_SET_MAP | \
-				 NFT_SET_TIMEOUT | NFT_SET_OBJECT)
+				 NFT_SET_TIMEOUT | NFT_SET_OBJECT | \
+				 NFT_SET_EVAL)
 
-static bool nft_set_ops_candidate(const struct nft_set_ops *ops, u32 flags)
+static bool nft_set_ops_candidate(const struct nft_set_type *type, u32 flags)
 {
-	if ((flags & NFT_SET_EVAL) && !ops->update)
-		return false;
-
-	return (flags & ops->features) == (flags & NFT_SET_FEATURES);
+	return (flags & type->features) == (flags & NFT_SET_FEATURES);
 }
 
 /*
@@ -2567,14 +2565,9 @@ nft_select_set_ops(const struct nft_ctx *ctx,
 	best.space  = ~0;
 
 	list_for_each_entry(type, &nf_tables_set_types, list) {
-		if (!type->select_ops)
-			ops = type->ops;
-		else
-			ops = type->select_ops(ctx, desc, flags);
-		if (!ops)
-			continue;
+		ops = &type->ops;
 
-		if (!nft_set_ops_candidate(ops, flags))
+		if (!nft_set_ops_candidate(type, flags))
 			continue;
 		if (!ops->estimate(desc, flags, &est))
 			continue;
@@ -2605,7 +2598,7 @@ nft_select_set_ops(const struct nft_ctx *ctx,
 		if (!try_module_get(type->owner))
 			continue;
 		if (bops != NULL)
-			module_put(bops->type->owner);
+			module_put(to_set_type(bops)->owner);
 
 		bops = ops;
 		best = est;
@@ -3247,14 +3240,14 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
 err2:
 	kvfree(set);
 err1:
-	module_put(ops->type->owner);
+	module_put(to_set_type(ops)->owner);
 	return err;
 }
 
 static void nft_set_destroy(struct nft_set *set)
 {
 	set->ops->destroy(set);
-	module_put(set->ops->type->owner);
+	module_put(to_set_type(set->ops)->owner);
 	kfree(set->name);
 	kvfree(set);
 }
diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c
index 45fb2752fb63..d6626e01c7ee 100644
--- a/net/netfilter/nft_set_bitmap.c
+++ b/net/netfilter/nft_set_bitmap.c
@@ -296,27 +296,23 @@ static bool nft_bitmap_estimate(const struct nft_set_desc *desc, u32 features,
 	return true;
 }
 
-static struct nft_set_type nft_bitmap_type;
-static struct nft_set_ops nft_bitmap_ops __read_mostly = {
-	.type		= &nft_bitmap_type,
-	.privsize	= nft_bitmap_privsize,
-	.elemsize	= offsetof(struct nft_bitmap_elem, ext),
-	.estimate	= nft_bitmap_estimate,
-	.init		= nft_bitmap_init,
-	.destroy	= nft_bitmap_destroy,
-	.insert		= nft_bitmap_insert,
-	.remove		= nft_bitmap_remove,
-	.deactivate	= nft_bitmap_deactivate,
-	.flush		= nft_bitmap_flush,
-	.activate	= nft_bitmap_activate,
-	.lookup		= nft_bitmap_lookup,
-	.walk		= nft_bitmap_walk,
-	.get		= nft_bitmap_get,
-};
-
 static struct nft_set_type nft_bitmap_type __read_mostly = {
-	.ops		= &nft_bitmap_ops,
 	.owner		= THIS_MODULE,
+	.ops		= {
+		.privsize	= nft_bitmap_privsize,
+		.elemsize	= offsetof(struct nft_bitmap_elem, ext),
+		.estimate	= nft_bitmap_estimate,
+		.init		= nft_bitmap_init,
+		.destroy	= nft_bitmap_destroy,
+		.insert		= nft_bitmap_insert,
+		.remove		= nft_bitmap_remove,
+		.deactivate	= nft_bitmap_deactivate,
+		.flush		= nft_bitmap_flush,
+		.activate	= nft_bitmap_activate,
+		.lookup		= nft_bitmap_lookup,
+		.walk		= nft_bitmap_walk,
+		.get		= nft_bitmap_get,
+	},
 };
 
 static int __init nft_bitmap_module_init(void)
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index fc9c6d5d64cd..dbf1f4ad077c 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -605,6 +605,12 @@ static void nft_hash_destroy(const struct nft_set *set)
 static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features,
 			      struct nft_set_estimate *est)
 {
+	if (!desc->size)
+		return false;
+
+	if (desc->klen == 4)
+		return false;
+
 	est->size   = sizeof(struct nft_hash) +
 		      nft_hash_buckets(desc->size) * sizeof(struct hlist_head) +
 		      desc->size * sizeof(struct nft_hash_elem);
@@ -614,91 +620,100 @@ static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features,
 	return true;
 }
 
-static struct nft_set_type nft_hash_type;
-static struct nft_set_ops nft_rhash_ops __read_mostly = {
-	.type		= &nft_hash_type,
-	.privsize       = nft_rhash_privsize,
-	.elemsize	= offsetof(struct nft_rhash_elem, ext),
-	.estimate	= nft_rhash_estimate,
-	.init		= nft_rhash_init,
-	.destroy	= nft_rhash_destroy,
-	.insert		= nft_rhash_insert,
-	.activate	= nft_rhash_activate,
-	.deactivate	= nft_rhash_deactivate,
-	.flush		= nft_rhash_flush,
-	.remove		= nft_rhash_remove,
-	.lookup		= nft_rhash_lookup,
-	.update		= nft_rhash_update,
-	.walk		= nft_rhash_walk,
-	.get		= nft_rhash_get,
-	.features	= NFT_SET_MAP | NFT_SET_OBJECT | NFT_SET_TIMEOUT,
-};
+static bool nft_hash_fast_estimate(const struct nft_set_desc *desc, u32 features,
+			      struct nft_set_estimate *est)
+{
+	if (!desc->size)
+		return false;
 
-static struct nft_set_ops nft_hash_ops __read_mostly = {
-	.type		= &nft_hash_type,
-	.privsize       = nft_hash_privsize,
-	.elemsize	= offsetof(struct nft_hash_elem, ext),
-	.estimate	= nft_hash_estimate,
-	.init		= nft_hash_init,
-	.destroy	= nft_hash_destroy,
-	.insert		= nft_hash_insert,
-	.activate	= nft_hash_activate,
-	.deactivate	= nft_hash_deactivate,
-	.flush		= nft_hash_flush,
-	.remove		= nft_hash_remove,
-	.lookup		= nft_hash_lookup,
-	.walk		= nft_hash_walk,
-	.get		= nft_hash_get,
-	.features	= NFT_SET_MAP | NFT_SET_OBJECT,
-};
+	if (desc->klen != 4)
+		return false;
 
-static struct nft_set_ops nft_hash_fast_ops __read_mostly = {
-	.type		= &nft_hash_type,
-	.privsize       = nft_hash_privsize,
-	.elemsize	= offsetof(struct nft_hash_elem, ext),
-	.estimate	= nft_hash_estimate,
-	.init		= nft_hash_init,
-	.destroy	= nft_hash_destroy,
-	.insert		= nft_hash_insert,
-	.activate	= nft_hash_activate,
-	.deactivate	= nft_hash_deactivate,
-	.flush		= nft_hash_flush,
-	.remove		= nft_hash_remove,
-	.lookup		= nft_hash_lookup_fast,
-	.walk		= nft_hash_walk,
-	.get		= nft_hash_get,
-	.features	= NFT_SET_MAP | NFT_SET_OBJECT,
-};
-
-static const struct nft_set_ops *
-nft_hash_select_ops(const struct nft_ctx *ctx, const struct nft_set_desc *desc,
-		    u32 flags)
-{
-	if (desc->size && !(flags & (NFT_SET_EVAL | NFT_SET_TIMEOUT))) {
-		switch (desc->klen) {
-		case 4:
-			return &nft_hash_fast_ops;
-		default:
-			return &nft_hash_ops;
-		}
-	}
+	est->size   = sizeof(struct nft_hash) +
+		      nft_hash_buckets(desc->size) * sizeof(struct hlist_head) +
+		      desc->size * sizeof(struct nft_hash_elem);
+	est->lookup = NFT_SET_CLASS_O_1;
+	est->space  = NFT_SET_CLASS_O_N;
 
-	return &nft_rhash_ops;
+	return true;
 }
 
+static struct nft_set_type nft_rhash_type __read_mostly = {
+	.owner		= THIS_MODULE,
+	.features	= NFT_SET_MAP | NFT_SET_OBJECT |
+			  NFT_SET_TIMEOUT | NFT_SET_EVAL,
+	.ops		= {
+		.privsize       = nft_rhash_privsize,
+		.elemsize	= offsetof(struct nft_rhash_elem, ext),
+		.estimate	= nft_rhash_estimate,
+		.init		= nft_rhash_init,
+		.destroy	= nft_rhash_destroy,
+		.insert		= nft_rhash_insert,
+		.activate	= nft_rhash_activate,
+		.deactivate	= nft_rhash_deactivate,
+		.flush		= nft_rhash_flush,
+		.remove		= nft_rhash_remove,
+		.lookup		= nft_rhash_lookup,
+		.update		= nft_rhash_update,
+		.walk		= nft_rhash_walk,
+		.get		= nft_rhash_get,
+	},
+};
+
 static struct nft_set_type nft_hash_type __read_mostly = {
-	.select_ops	= nft_hash_select_ops,
 	.owner		= THIS_MODULE,
+	.features	= NFT_SET_MAP | NFT_SET_OBJECT,
+	.ops		= {
+		.privsize       = nft_hash_privsize,
+		.elemsize	= offsetof(struct nft_hash_elem, ext),
+		.estimate	= nft_hash_estimate,
+		.init		= nft_hash_init,
+		.destroy	= nft_hash_destroy,
+		.insert		= nft_hash_insert,
+		.activate	= nft_hash_activate,
+		.deactivate	= nft_hash_deactivate,
+		.flush		= nft_hash_flush,
+		.remove		= nft_hash_remove,
+		.lookup		= nft_hash_lookup,
+		.walk		= nft_hash_walk,
+		.get		= nft_hash_get,
+	},
+};
+
+static struct nft_set_type nft_hash_fast_type __read_mostly = {
+	.owner		= THIS_MODULE,
+	.features	= NFT_SET_MAP | NFT_SET_OBJECT,
+	.ops		= {
+		.privsize       = nft_hash_privsize,
+		.elemsize	= offsetof(struct nft_hash_elem, ext),
+		.estimate	= nft_hash_fast_estimate,
+		.init		= nft_hash_init,
+		.destroy	= nft_hash_destroy,
+		.insert		= nft_hash_insert,
+		.activate	= nft_hash_activate,
+		.deactivate	= nft_hash_deactivate,
+		.flush		= nft_hash_flush,
+		.remove		= nft_hash_remove,
+		.lookup		= nft_hash_lookup_fast,
+		.walk		= nft_hash_walk,
+		.get		= nft_hash_get,
+	},
 };
 
 static int __init nft_hash_module_init(void)
 {
-	return nft_register_set(&nft_hash_type);
+	if (nft_register_set(&nft_hash_fast_type) ||
+	    nft_register_set(&nft_hash_type) ||
+	    nft_register_set(&nft_rhash_type))
+		return 1;
+	return 0;
 }
 
 static void __exit nft_hash_module_exit(void)
 {
+	nft_unregister_set(&nft_rhash_type);
 	nft_unregister_set(&nft_hash_type);
+	nft_unregister_set(&nft_hash_fast_type);
 }
 
 module_init(nft_hash_module_init);
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index e6f08bc5f359..22c57d7612c4 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -393,28 +393,24 @@ static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features,
 	return true;
 }
 
-static struct nft_set_type nft_rbtree_type;
-static struct nft_set_ops nft_rbtree_ops __read_mostly = {
-	.type		= &nft_rbtree_type,
-	.privsize	= nft_rbtree_privsize,
-	.elemsize	= offsetof(struct nft_rbtree_elem, ext),
-	.estimate	= nft_rbtree_estimate,
-	.init		= nft_rbtree_init,
-	.destroy	= nft_rbtree_destroy,
-	.insert		= nft_rbtree_insert,
-	.remove		= nft_rbtree_remove,
-	.deactivate	= nft_rbtree_deactivate,
-	.flush		= nft_rbtree_flush,
-	.activate	= nft_rbtree_activate,
-	.lookup		= nft_rbtree_lookup,
-	.walk		= nft_rbtree_walk,
-	.get		= nft_rbtree_get,
-	.features	= NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT,
-};
-
 static struct nft_set_type nft_rbtree_type __read_mostly = {
-	.ops		= &nft_rbtree_ops,
 	.owner		= THIS_MODULE,
+	.features	= NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT,
+	.ops		= {
+		.privsize	= nft_rbtree_privsize,
+		.elemsize	= offsetof(struct nft_rbtree_elem, ext),
+		.estimate	= nft_rbtree_estimate,
+		.init		= nft_rbtree_init,
+		.destroy	= nft_rbtree_destroy,
+		.insert		= nft_rbtree_insert,
+		.remove		= nft_rbtree_remove,
+		.deactivate	= nft_rbtree_deactivate,
+		.flush		= nft_rbtree_flush,
+		.activate	= nft_rbtree_activate,
+		.lookup		= nft_rbtree_lookup,
+		.walk		= nft_rbtree_walk,
+		.get		= nft_rbtree_get,
+	},
 };
 
 static int __init nft_rbtree_module_init(void)
-- 
2.11.0

^ permalink raw reply related

* [PATCH 26/51] netfilter: nf_tables: simplify lookup functions
From: Pablo Neira Ayuso @ 2018-05-06 22:46 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <20180506224709.29100-1-pablo@netfilter.org>

Replace the nf_tables_ prefix by nft_ and merge code into single lookup
function whenever possible. In many cases we go over the 80-chars
boundary function names, this save us ~50 LoC.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h |  12 +-
 net/netfilter/nf_tables_api.c     | 249 +++++++++++++++-----------------------
 net/netfilter/nft_flow_offload.c  |   5 +-
 net/netfilter/nft_objref.c        |   4 +-
 4 files changed, 110 insertions(+), 160 deletions(-)

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 2f2062ae1c45..123e82a2f8bb 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -1015,9 +1015,9 @@ static inline void *nft_obj_data(const struct nft_object *obj)
 
 #define nft_expr_obj(expr)	*((struct nft_object **)nft_expr_priv(expr))
 
-struct nft_object *nf_tables_obj_lookup(const struct nft_table *table,
-					const struct nlattr *nla, u32 objtype,
-					u8 genmask);
+struct nft_object *nft_obj_lookup(const struct nft_table *table,
+				  const struct nlattr *nla, u32 objtype,
+				  u8 genmask);
 
 void nft_obj_notify(struct net *net, struct nft_table *table,
 		    struct nft_object *obj, u32 portid, u32 seq,
@@ -1106,9 +1106,9 @@ struct nft_flowtable {
 	struct nf_flowtable		data;
 };
 
-struct nft_flowtable *nf_tables_flowtable_lookup(const struct nft_table *table,
-						 const struct nlattr *nla,
-						 u8 genmask);
+struct nft_flowtable *nft_flowtable_lookup(const struct nft_table *table,
+					   const struct nlattr *nla,
+					   u8 genmask);
 
 void nft_register_flowtable_type(struct nf_flowtable_type *type);
 void nft_unregister_flowtable_type(struct nf_flowtable_type *type);
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 16b67f54b3d2..f65e650b61aa 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -386,13 +386,17 @@ static struct nft_table *nft_table_lookup(const struct net *net,
 {
 	struct nft_table *table;
 
+	if (nla == NULL)
+		return ERR_PTR(-EINVAL);
+
 	list_for_each_entry(table, &net->nft.tables, list) {
 		if (!nla_strcmp(nla, table->name) &&
 		    table->family == family &&
 		    nft_active_genmask(table, genmask))
 			return table;
 	}
-	return NULL;
+
+	return ERR_PTR(-ENOENT);
 }
 
 static struct nft_table *nft_table_lookup_byhandle(const struct net *net,
@@ -406,37 +410,6 @@ static struct nft_table *nft_table_lookup_byhandle(const struct net *net,
 		    nft_active_genmask(table, genmask))
 			return table;
 	}
-	return NULL;
-}
-
-static struct nft_table *nf_tables_table_lookup(const struct net *net,
-						const struct nlattr *nla,
-						u8 family, u8 genmask)
-{
-	struct nft_table *table;
-
-	if (nla == NULL)
-		return ERR_PTR(-EINVAL);
-
-	table = nft_table_lookup(net, nla, family, genmask);
-	if (table != NULL)
-		return table;
-
-	return ERR_PTR(-ENOENT);
-}
-
-static struct nft_table *nf_tables_table_lookup_byhandle(const struct net *net,
-							 const struct nlattr *nla,
-							 u8 genmask)
-{
-	struct nft_table *table;
-
-	if (nla == NULL)
-		return ERR_PTR(-EINVAL);
-
-	table = nft_table_lookup_byhandle(net, nla, genmask);
-	if (table != NULL)
-		return table;
 
 	return ERR_PTR(-ENOENT);
 }
@@ -608,8 +581,7 @@ static int nf_tables_gettable(struct net *net, struct sock *nlsk,
 		return netlink_dump_start(nlsk, skb, nlh, &c);
 	}
 
-	table = nf_tables_table_lookup(net, nla[NFTA_TABLE_NAME], family,
-				       genmask);
+	table = nft_table_lookup(net, nla[NFTA_TABLE_NAME], family, genmask);
 	if (IS_ERR(table))
 		return PTR_ERR(table);
 
@@ -735,7 +707,7 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk,
 	int err;
 
 	name = nla[NFTA_TABLE_NAME];
-	table = nf_tables_table_lookup(net, name, family, genmask);
+	table = nft_table_lookup(net, name, family, genmask);
 	if (IS_ERR(table)) {
 		if (PTR_ERR(table) != -ENOENT)
 			return PTR_ERR(table);
@@ -893,12 +865,11 @@ static int nf_tables_deltable(struct net *net, struct sock *nlsk,
 		return nft_flush(&ctx, family);
 
 	if (nla[NFTA_TABLE_HANDLE])
-		table = nf_tables_table_lookup_byhandle(net,
-							nla[NFTA_TABLE_HANDLE],
-							genmask);
+		table = nft_table_lookup_byhandle(net, nla[NFTA_TABLE_HANDLE],
+						  genmask);
 	else
-		table = nf_tables_table_lookup(net, nla[NFTA_TABLE_NAME],
-					       family, genmask);
+		table = nft_table_lookup(net, nla[NFTA_TABLE_NAME], family,
+					 genmask);
 
 	if (IS_ERR(table))
 		return PTR_ERR(table);
@@ -949,8 +920,7 @@ EXPORT_SYMBOL_GPL(nft_unregister_chain_type);
  */
 
 static struct nft_chain *
-nf_tables_chain_lookup_byhandle(const struct nft_table *table, u64 handle,
-				u8 genmask)
+nft_chain_lookup_byhandle(const struct nft_table *table, u64 handle, u8 genmask)
 {
 	struct nft_chain *chain;
 
@@ -963,9 +933,8 @@ nf_tables_chain_lookup_byhandle(const struct nft_table *table, u64 handle,
 	return ERR_PTR(-ENOENT);
 }
 
-static struct nft_chain *nf_tables_chain_lookup(const struct nft_table *table,
-						const struct nlattr *nla,
-						u8 genmask)
+static struct nft_chain *nft_chain_lookup(const struct nft_table *table,
+					  const struct nlattr *nla, u8 genmask)
 {
 	struct nft_chain *chain;
 
@@ -1194,12 +1163,11 @@ static int nf_tables_getchain(struct net *net, struct sock *nlsk,
 		return netlink_dump_start(nlsk, skb, nlh, &c);
 	}
 
-	table = nf_tables_table_lookup(net, nla[NFTA_CHAIN_TABLE], family,
-				       genmask);
+	table = nft_table_lookup(net, nla[NFTA_CHAIN_TABLE], family, genmask);
 	if (IS_ERR(table))
 		return PTR_ERR(table);
 
-	chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME], genmask);
+	chain = nft_chain_lookup(table, nla[NFTA_CHAIN_NAME], genmask);
 	if (IS_ERR(chain))
 		return PTR_ERR(chain);
 
@@ -1513,8 +1481,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
 	    nla[NFTA_CHAIN_NAME]) {
 		struct nft_chain *chain2;
 
-		chain2 = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME],
-						genmask);
+		chain2 = nft_chain_lookup(table, nla[NFTA_CHAIN_NAME], genmask);
 		if (!IS_ERR(chain2))
 			return -EEXIST;
 	}
@@ -1576,8 +1543,7 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
 
 	create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
 
-	table = nf_tables_table_lookup(net, nla[NFTA_CHAIN_TABLE], family,
-				       genmask);
+	table = nft_table_lookup(net, nla[NFTA_CHAIN_TABLE], family, genmask);
 	if (IS_ERR(table))
 		return PTR_ERR(table);
 
@@ -1586,11 +1552,11 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
 
 	if (nla[NFTA_CHAIN_HANDLE]) {
 		handle = be64_to_cpu(nla_get_be64(nla[NFTA_CHAIN_HANDLE]));
-		chain = nf_tables_chain_lookup_byhandle(table, handle, genmask);
+		chain = nft_chain_lookup_byhandle(table, handle, genmask);
 		if (IS_ERR(chain))
 			return PTR_ERR(chain);
 	} else {
-		chain = nf_tables_chain_lookup(table, name, genmask);
+		chain = nft_chain_lookup(table, name, genmask);
 		if (IS_ERR(chain)) {
 			if (PTR_ERR(chain) != -ENOENT)
 				return PTR_ERR(chain);
@@ -1647,16 +1613,15 @@ static int nf_tables_delchain(struct net *net, struct sock *nlsk,
 	u32 use;
 	int err;
 
-	table = nf_tables_table_lookup(net, nla[NFTA_CHAIN_TABLE], family,
-				       genmask);
+	table = nft_table_lookup(net, nla[NFTA_CHAIN_TABLE], family, genmask);
 	if (IS_ERR(table))
 		return PTR_ERR(table);
 
 	if (nla[NFTA_CHAIN_HANDLE]) {
 		handle = be64_to_cpu(nla_get_be64(nla[NFTA_CHAIN_HANDLE]));
-		chain = nf_tables_chain_lookup_byhandle(table, handle, genmask);
+		chain = nft_chain_lookup_byhandle(table, handle, genmask);
 	} else {
-		chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME], genmask);
+		chain = nft_chain_lookup(table, nla[NFTA_CHAIN_NAME], genmask);
 	}
 	if (IS_ERR(chain))
 		return PTR_ERR(chain);
@@ -1939,8 +1904,8 @@ void nft_expr_destroy(const struct nft_ctx *ctx, struct nft_expr *expr)
  * Rules
  */
 
-static struct nft_rule *__nf_tables_rule_lookup(const struct nft_chain *chain,
-						u64 handle)
+static struct nft_rule *__nft_rule_lookup(const struct nft_chain *chain,
+					  u64 handle)
 {
 	struct nft_rule *rule;
 
@@ -1953,13 +1918,13 @@ static struct nft_rule *__nf_tables_rule_lookup(const struct nft_chain *chain,
 	return ERR_PTR(-ENOENT);
 }
 
-static struct nft_rule *nf_tables_rule_lookup(const struct nft_chain *chain,
-					      const struct nlattr *nla)
+static struct nft_rule *nft_rule_lookup(const struct nft_chain *chain,
+					const struct nlattr *nla)
 {
 	if (nla == NULL)
 		return ERR_PTR(-EINVAL);
 
-	return __nf_tables_rule_lookup(chain, be64_to_cpu(nla_get_be64(nla)));
+	return __nft_rule_lookup(chain, be64_to_cpu(nla_get_be64(nla)));
 }
 
 static const struct nla_policy nft_rule_policy[NFTA_RULE_MAX + 1] = {
@@ -2191,16 +2156,15 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk,
 		return netlink_dump_start(nlsk, skb, nlh, &c);
 	}
 
-	table = nf_tables_table_lookup(net, nla[NFTA_RULE_TABLE], family,
-				       genmask);
+	table = nft_table_lookup(net, nla[NFTA_RULE_TABLE], family, genmask);
 	if (IS_ERR(table))
 		return PTR_ERR(table);
 
-	chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN], genmask);
+	chain = nft_chain_lookup(table, nla[NFTA_RULE_CHAIN], genmask);
 	if (IS_ERR(chain))
 		return PTR_ERR(chain);
 
-	rule = nf_tables_rule_lookup(chain, nla[NFTA_RULE_HANDLE]);
+	rule = nft_rule_lookup(chain, nla[NFTA_RULE_HANDLE]);
 	if (IS_ERR(rule))
 		return PTR_ERR(rule);
 
@@ -2265,18 +2229,17 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
 
 	create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
 
-	table = nf_tables_table_lookup(net, nla[NFTA_RULE_TABLE], family,
-				       genmask);
+	table = nft_table_lookup(net, nla[NFTA_RULE_TABLE], family, genmask);
 	if (IS_ERR(table))
 		return PTR_ERR(table);
 
-	chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN], genmask);
+	chain = nft_chain_lookup(table, nla[NFTA_RULE_CHAIN], genmask);
 	if (IS_ERR(chain))
 		return PTR_ERR(chain);
 
 	if (nla[NFTA_RULE_HANDLE]) {
 		handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_HANDLE]));
-		rule = __nf_tables_rule_lookup(chain, handle);
+		rule = __nft_rule_lookup(chain, handle);
 		if (IS_ERR(rule))
 			return PTR_ERR(rule);
 
@@ -2300,7 +2263,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
 			return -EOPNOTSUPP;
 
 		pos_handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_POSITION]));
-		old_rule = __nf_tables_rule_lookup(chain, pos_handle);
+		old_rule = __nft_rule_lookup(chain, pos_handle);
 		if (IS_ERR(old_rule))
 			return PTR_ERR(old_rule);
 	}
@@ -2435,14 +2398,12 @@ static int nf_tables_delrule(struct net *net, struct sock *nlsk,
 	int family = nfmsg->nfgen_family, err = 0;
 	struct nft_ctx ctx;
 
-	table = nf_tables_table_lookup(net, nla[NFTA_RULE_TABLE], family,
-				       genmask);
+	table = nft_table_lookup(net, nla[NFTA_RULE_TABLE], family, genmask);
 	if (IS_ERR(table))
 		return PTR_ERR(table);
 
 	if (nla[NFTA_RULE_CHAIN]) {
-		chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN],
-					       genmask);
+		chain = nft_chain_lookup(table, nla[NFTA_RULE_CHAIN], genmask);
 		if (IS_ERR(chain))
 			return PTR_ERR(chain);
 	}
@@ -2451,8 +2412,7 @@ static int nf_tables_delrule(struct net *net, struct sock *nlsk,
 
 	if (chain) {
 		if (nla[NFTA_RULE_HANDLE]) {
-			rule = nf_tables_rule_lookup(chain,
-						     nla[NFTA_RULE_HANDLE]);
+			rule = nft_rule_lookup(chain, nla[NFTA_RULE_HANDLE]);
 			if (IS_ERR(rule))
 				return PTR_ERR(rule);
 
@@ -2635,8 +2595,8 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, struct net *net,
 	struct nft_table *table = NULL;
 
 	if (nla[NFTA_SET_TABLE] != NULL) {
-		table = nf_tables_table_lookup(net, nla[NFTA_SET_TABLE],
-					       family, genmask);
+		table = nft_table_lookup(net, nla[NFTA_SET_TABLE], family,
+					 genmask);
 		if (IS_ERR(table))
 			return PTR_ERR(table);
 	}
@@ -2645,8 +2605,8 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, struct net *net,
 	return 0;
 }
 
-static struct nft_set *nf_tables_set_lookup(const struct nft_table *table,
-					    const struct nlattr *nla, u8 genmask)
+static struct nft_set *nft_set_lookup(const struct nft_table *table,
+				      const struct nlattr *nla, u8 genmask)
 {
 	struct nft_set *set;
 
@@ -2661,14 +2621,12 @@ static struct nft_set *nf_tables_set_lookup(const struct nft_table *table,
 	return ERR_PTR(-ENOENT);
 }
 
-static struct nft_set *nf_tables_set_lookup_byhandle(const struct nft_table *table,
-						     const struct nlattr *nla, u8 genmask)
+static struct nft_set *nft_set_lookup_byhandle(const struct nft_table *table,
+					       const struct nlattr *nla,
+					       u8 genmask)
 {
 	struct nft_set *set;
 
-	if (nla == NULL)
-		return ERR_PTR(-EINVAL);
-
 	list_for_each_entry(set, &table->sets, list) {
 		if (be64_to_cpu(nla_get_be64(nla)) == set->handle &&
 		    nft_active_genmask(set, genmask))
@@ -2677,9 +2635,8 @@ static struct nft_set *nf_tables_set_lookup_byhandle(const struct nft_table *tab
 	return ERR_PTR(-ENOENT);
 }
 
-static struct nft_set *nf_tables_set_lookup_byid(const struct net *net,
-						 const struct nlattr *nla,
-						 u8 genmask)
+static struct nft_set *nft_set_lookup_byid(const struct net *net,
+					   const struct nlattr *nla, u8 genmask)
 {
 	struct nft_trans *trans;
 	u32 id = ntohl(nla_get_be32(nla));
@@ -2703,12 +2660,12 @@ struct nft_set *nft_set_lookup_global(const struct net *net,
 {
 	struct nft_set *set;
 
-	set = nf_tables_set_lookup(table, nla_set_name, genmask);
+	set = nft_set_lookup(table, nla_set_name, genmask);
 	if (IS_ERR(set)) {
 		if (!nla_set_id)
 			return set;
 
-		set = nf_tables_set_lookup_byid(net, nla_set_id, genmask);
+		set = nft_set_lookup_byid(net, nla_set_id, genmask);
 	}
 	return set;
 }
@@ -2980,7 +2937,7 @@ static int nf_tables_getset(struct net *net, struct sock *nlsk,
 	if (!nla[NFTA_SET_TABLE])
 		return -EINVAL;
 
-	set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME], genmask);
+	set = nft_set_lookup(ctx.table, nla[NFTA_SET_NAME], genmask);
 	if (IS_ERR(set))
 		return PTR_ERR(set);
 
@@ -3132,14 +3089,13 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
 
 	create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
 
-	table = nf_tables_table_lookup(net, nla[NFTA_SET_TABLE], family,
-				       genmask);
+	table = nft_table_lookup(net, nla[NFTA_SET_TABLE], family, genmask);
 	if (IS_ERR(table))
 		return PTR_ERR(table);
 
 	nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
 
-	set = nf_tables_set_lookup(table, nla[NFTA_SET_NAME], genmask);
+	set = nft_set_lookup(table, nla[NFTA_SET_NAME], genmask);
 	if (IS_ERR(set)) {
 		if (PTR_ERR(set) != -ENOENT)
 			return PTR_ERR(set);
@@ -3262,9 +3218,10 @@ static int nf_tables_delset(struct net *net, struct sock *nlsk,
 		return err;
 
 	if (nla[NFTA_SET_HANDLE])
-		set = nf_tables_set_lookup_byhandle(ctx.table, nla[NFTA_SET_HANDLE], genmask);
+		set = nft_set_lookup_byhandle(ctx.table, nla[NFTA_SET_HANDLE],
+					      genmask);
 	else
-		set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME], genmask);
+		set = nft_set_lookup(ctx.table, nla[NFTA_SET_NAME], genmask);
 	if (IS_ERR(set))
 		return PTR_ERR(set);
 
@@ -3404,8 +3361,8 @@ static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx, struct net *net,
 	int family = nfmsg->nfgen_family;
 	struct nft_table *table;
 
-	table = nf_tables_table_lookup(net, nla[NFTA_SET_ELEM_LIST_TABLE],
-				       family, genmask);
+	table = nft_table_lookup(net, nla[NFTA_SET_ELEM_LIST_TABLE], family,
+				 genmask);
 	if (IS_ERR(table))
 		return PTR_ERR(table);
 
@@ -3741,8 +3698,7 @@ static int nf_tables_getsetelem(struct net *net, struct sock *nlsk,
 	if (err < 0)
 		return err;
 
-	set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET],
-				   genmask);
+	set = nft_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET], genmask);
 	if (IS_ERR(set))
 		return PTR_ERR(set);
 
@@ -3954,8 +3910,8 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
 			err = -EINVAL;
 			goto err2;
 		}
-		obj = nf_tables_obj_lookup(ctx->table, nla[NFTA_SET_ELEM_OBJREF],
-					   set->objtype, genmask);
+		obj = nft_obj_lookup(ctx->table, nla[NFTA_SET_ELEM_OBJREF],
+				     set->objtype, genmask);
 		if (IS_ERR(obj)) {
 			err = PTR_ERR(obj);
 			goto err2;
@@ -4284,8 +4240,7 @@ static int nf_tables_delsetelem(struct net *net, struct sock *nlsk,
 	if (err < 0)
 		return err;
 
-	set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET],
-				   genmask);
+	set = nft_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET], genmask);
 	if (IS_ERR(set))
 		return PTR_ERR(set);
 	if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT)
@@ -4373,9 +4328,9 @@ void nft_unregister_obj(struct nft_object_type *obj_type)
 }
 EXPORT_SYMBOL_GPL(nft_unregister_obj);
 
-struct nft_object *nf_tables_obj_lookup(const struct nft_table *table,
-					const struct nlattr *nla,
-					u32 objtype, u8 genmask)
+struct nft_object *nft_obj_lookup(const struct nft_table *table,
+				  const struct nlattr *nla, u32 objtype,
+				  u8 genmask)
 {
 	struct nft_object *obj;
 
@@ -4387,11 +4342,11 @@ struct nft_object *nf_tables_obj_lookup(const struct nft_table *table,
 	}
 	return ERR_PTR(-ENOENT);
 }
-EXPORT_SYMBOL_GPL(nf_tables_obj_lookup);
+EXPORT_SYMBOL_GPL(nft_obj_lookup);
 
-static struct nft_object *nf_tables_obj_lookup_byhandle(const struct nft_table *table,
-							const struct nlattr *nla,
-							u32 objtype, u8 genmask)
+static struct nft_object *nft_obj_lookup_byhandle(const struct nft_table *table,
+						  const struct nlattr *nla,
+						  u32 objtype, u8 genmask)
 {
 	struct nft_object *obj;
 
@@ -4535,13 +4490,12 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk,
 	    !nla[NFTA_OBJ_DATA])
 		return -EINVAL;
 
-	table = nf_tables_table_lookup(net, nla[NFTA_OBJ_TABLE], family,
-				       genmask);
+	table = nft_table_lookup(net, nla[NFTA_OBJ_TABLE], family, genmask);
 	if (IS_ERR(table))
 		return PTR_ERR(table);
 
 	objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
-	obj = nf_tables_obj_lookup(table, nla[NFTA_OBJ_NAME], objtype, genmask);
+	obj = nft_obj_lookup(table, nla[NFTA_OBJ_NAME], objtype, genmask);
 	if (IS_ERR(obj)) {
 		err = PTR_ERR(obj);
 		if (err != -ENOENT)
@@ -4761,13 +4715,12 @@ static int nf_tables_getobj(struct net *net, struct sock *nlsk,
 	    !nla[NFTA_OBJ_TYPE])
 		return -EINVAL;
 
-	table = nf_tables_table_lookup(net, nla[NFTA_OBJ_TABLE], family,
-				       genmask);
+	table = nft_table_lookup(net, nla[NFTA_OBJ_TABLE], family, genmask);
 	if (IS_ERR(table))
 		return PTR_ERR(table);
 
 	objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
-	obj = nf_tables_obj_lookup(table, nla[NFTA_OBJ_NAME], objtype, genmask);
+	obj = nft_obj_lookup(table, nla[NFTA_OBJ_NAME], objtype, genmask);
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
 
@@ -4817,18 +4770,17 @@ static int nf_tables_delobj(struct net *net, struct sock *nlsk,
 	    (!nla[NFTA_OBJ_NAME] && !nla[NFTA_OBJ_HANDLE]))
 		return -EINVAL;
 
-	table = nf_tables_table_lookup(net, nla[NFTA_OBJ_TABLE], family,
-				       genmask);
+	table = nft_table_lookup(net, nla[NFTA_OBJ_TABLE], family, genmask);
 	if (IS_ERR(table))
 		return PTR_ERR(table);
 
 	objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
 	if (nla[NFTA_OBJ_HANDLE])
-		obj = nf_tables_obj_lookup_byhandle(table, nla[NFTA_OBJ_HANDLE],
-						    objtype, genmask);
+		obj = nft_obj_lookup_byhandle(table, nla[NFTA_OBJ_HANDLE],
+					      objtype, genmask);
 	else
-		obj = nf_tables_obj_lookup(table, nla[NFTA_OBJ_NAME],
-					   objtype, genmask);
+		obj = nft_obj_lookup(table, nla[NFTA_OBJ_NAME], objtype,
+				     genmask);
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
 	if (obj->use > 0)
@@ -4903,9 +4855,8 @@ static const struct nla_policy nft_flowtable_policy[NFTA_FLOWTABLE_MAX + 1] = {
 	[NFTA_FLOWTABLE_HANDLE]		= { .type = NLA_U64 },
 };
 
-struct nft_flowtable *nf_tables_flowtable_lookup(const struct nft_table *table,
-						 const struct nlattr *nla,
-						 u8 genmask)
+struct nft_flowtable *nft_flowtable_lookup(const struct nft_table *table,
+					   const struct nlattr *nla, u8 genmask)
 {
 	struct nft_flowtable *flowtable;
 
@@ -4916,11 +4867,11 @@ struct nft_flowtable *nf_tables_flowtable_lookup(const struct nft_table *table,
 	}
 	return ERR_PTR(-ENOENT);
 }
-EXPORT_SYMBOL_GPL(nf_tables_flowtable_lookup);
+EXPORT_SYMBOL_GPL(nft_flowtable_lookup);
 
 static struct nft_flowtable *
-nf_tables_flowtable_lookup_byhandle(const struct nft_table *table,
-				    const struct nlattr *nla, u8 genmask)
+nft_flowtable_lookup_byhandle(const struct nft_table *table,
+			      const struct nlattr *nla, u8 genmask)
 {
        struct nft_flowtable *flowtable;
 
@@ -5093,13 +5044,13 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
 	    !nla[NFTA_FLOWTABLE_HOOK])
 		return -EINVAL;
 
-	table = nf_tables_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE],
-				       family, genmask);
+	table = nft_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE], family,
+				 genmask);
 	if (IS_ERR(table))
 		return PTR_ERR(table);
 
-	flowtable = nf_tables_flowtable_lookup(table, nla[NFTA_FLOWTABLE_NAME],
-					       genmask);
+	flowtable = nft_flowtable_lookup(table, nla[NFTA_FLOWTABLE_NAME],
+					 genmask);
 	if (IS_ERR(flowtable)) {
 		err = PTR_ERR(flowtable);
 		if (err != -ENOENT)
@@ -5210,19 +5161,19 @@ static int nf_tables_delflowtable(struct net *net, struct sock *nlsk,
 	     !nla[NFTA_FLOWTABLE_HANDLE]))
 		return -EINVAL;
 
-	table = nf_tables_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE],
-				       family, genmask);
+	table = nft_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE], family,
+				 genmask);
 	if (IS_ERR(table))
 		return PTR_ERR(table);
 
 	if (nla[NFTA_FLOWTABLE_HANDLE])
-		flowtable = nf_tables_flowtable_lookup_byhandle(table,
-								nla[NFTA_FLOWTABLE_HANDLE],
-								genmask);
+		flowtable = nft_flowtable_lookup_byhandle(table,
+							  nla[NFTA_FLOWTABLE_HANDLE],
+							  genmask);
 	else
-		flowtable = nf_tables_flowtable_lookup(table,
-						       nla[NFTA_FLOWTABLE_NAME],
-						       genmask);
+		flowtable = nft_flowtable_lookup(table,
+						 nla[NFTA_FLOWTABLE_NAME],
+						 genmask);
 	if (IS_ERR(flowtable))
                 return PTR_ERR(flowtable);
 	if (flowtable->use > 0)
@@ -5407,13 +5358,13 @@ static int nf_tables_getflowtable(struct net *net, struct sock *nlsk,
 	if (!nla[NFTA_FLOWTABLE_NAME])
 		return -EINVAL;
 
-	table = nf_tables_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE],
-				       family, genmask);
+	table = nft_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE], family,
+				 genmask);
 	if (IS_ERR(table))
 		return PTR_ERR(table);
 
-	flowtable = nf_tables_flowtable_lookup(table, nla[NFTA_FLOWTABLE_NAME],
-					       genmask);
+	flowtable = nft_flowtable_lookup(table, nla[NFTA_FLOWTABLE_NAME],
+					 genmask);
 	if (IS_ERR(flowtable))
 		return PTR_ERR(flowtable);
 
@@ -6382,8 +6333,8 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
 	case NFT_GOTO:
 		if (!tb[NFTA_VERDICT_CHAIN])
 			return -EINVAL;
-		chain = nf_tables_chain_lookup(ctx->table,
-					       tb[NFTA_VERDICT_CHAIN], genmask);
+		chain = nft_chain_lookup(ctx->table, tb[NFTA_VERDICT_CHAIN],
+					 genmask);
 		if (IS_ERR(chain))
 			return PTR_ERR(chain);
 		if (nft_is_base_chain(chain))
diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
index b65829b2be22..d6bab8c3cbb0 100644
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -142,9 +142,8 @@ static int nft_flow_offload_init(const struct nft_ctx *ctx,
 	if (!tb[NFTA_FLOW_TABLE_NAME])
 		return -EINVAL;
 
-	flowtable = nf_tables_flowtable_lookup(ctx->table,
-					       tb[NFTA_FLOW_TABLE_NAME],
-					       genmask);
+	flowtable = nft_flowtable_lookup(ctx->table, tb[NFTA_FLOW_TABLE_NAME],
+					 genmask);
 	if (IS_ERR(flowtable))
 		return PTR_ERR(flowtable);
 
diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c
index 0b02407773ad..cdf348f751ec 100644
--- a/net/netfilter/nft_objref.c
+++ b/net/netfilter/nft_objref.c
@@ -38,8 +38,8 @@ static int nft_objref_init(const struct nft_ctx *ctx,
 		return -EINVAL;
 
 	objtype = ntohl(nla_get_be32(tb[NFTA_OBJREF_IMM_TYPE]));
-	obj = nf_tables_obj_lookup(ctx->table, tb[NFTA_OBJREF_IMM_NAME], objtype,
-				   genmask);
+	obj = nft_obj_lookup(ctx->table, tb[NFTA_OBJREF_IMM_NAME], objtype,
+			     genmask);
 	if (IS_ERR(obj))
 		return -ENOENT;
 
-- 
2.11.0

^ permalink raw reply related

* [PATCH 27/51] netfilter: nf_tables: initial support for extended ACK reporting
From: Pablo Neira Ayuso @ 2018-05-06 22:46 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <20180506224709.29100-1-pablo@netfilter.org>

Keep it simple to start with, just report attribute offsets that can be
useful to userspace when representating errors to users.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 299 +++++++++++++++++++++++++++++-------------
 1 file changed, 206 insertions(+), 93 deletions(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index f65e650b61aa..2f14cadd9922 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -582,8 +582,10 @@ static int nf_tables_gettable(struct net *net, struct sock *nlsk,
 	}
 
 	table = nft_table_lookup(net, nla[NFTA_TABLE_NAME], family, genmask);
-	if (IS_ERR(table))
+	if (IS_ERR(table)) {
+		NL_SET_BAD_ATTR(extack, nla[NFTA_TABLE_NAME]);
 		return PTR_ERR(table);
+	}
 
 	skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
 	if (!skb2)
@@ -699,21 +701,23 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk,
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	u8 genmask = nft_genmask_next(net);
-	const struct nlattr *name;
-	struct nft_table *table;
 	int family = nfmsg->nfgen_family;
+	const struct nlattr *attr;
+	struct nft_table *table;
 	u32 flags = 0;
 	struct nft_ctx ctx;
 	int err;
 
-	name = nla[NFTA_TABLE_NAME];
-	table = nft_table_lookup(net, name, family, genmask);
+	attr = nla[NFTA_TABLE_NAME];
+	table = nft_table_lookup(net, attr, family, genmask);
 	if (IS_ERR(table)) {
 		if (PTR_ERR(table) != -ENOENT)
 			return PTR_ERR(table);
 	} else {
-		if (nlh->nlmsg_flags & NLM_F_EXCL)
+		if (nlh->nlmsg_flags & NLM_F_EXCL) {
+			NL_SET_BAD_ATTR(extack, attr);
 			return -EEXIST;
+		}
 		if (nlh->nlmsg_flags & NLM_F_REPLACE)
 			return -EOPNOTSUPP;
 
@@ -732,7 +736,7 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk,
 	if (table == NULL)
 		goto err_kzalloc;
 
-	table->name = nla_strdup(name, GFP_KERNEL);
+	table->name = nla_strdup(attr, GFP_KERNEL);
 	if (table->name == NULL)
 		goto err_strdup;
 
@@ -855,8 +859,9 @@ static int nf_tables_deltable(struct net *net, struct sock *nlsk,
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	u8 genmask = nft_genmask_next(net);
-	struct nft_table *table;
 	int family = nfmsg->nfgen_family;
+	const struct nlattr *attr;
+	struct nft_table *table;
 	struct nft_ctx ctx;
 
 	nft_ctx_init(&ctx, net, skb, nlh, 0, NULL, NULL, nla);
@@ -864,15 +869,18 @@ static int nf_tables_deltable(struct net *net, struct sock *nlsk,
 	    (!nla[NFTA_TABLE_NAME] && !nla[NFTA_TABLE_HANDLE]))
 		return nft_flush(&ctx, family);
 
-	if (nla[NFTA_TABLE_HANDLE])
-		table = nft_table_lookup_byhandle(net, nla[NFTA_TABLE_HANDLE],
-						  genmask);
-	else
-		table = nft_table_lookup(net, nla[NFTA_TABLE_NAME], family,
-					 genmask);
+	if (nla[NFTA_TABLE_HANDLE]) {
+		attr = nla[NFTA_TABLE_HANDLE];
+		table = nft_table_lookup_byhandle(net, attr, genmask);
+	} else {
+		attr = nla[NFTA_TABLE_NAME];
+		table = nft_table_lookup(net, attr, family, genmask);
+	}
 
-	if (IS_ERR(table))
+	if (IS_ERR(table)) {
+		NL_SET_BAD_ATTR(extack, attr);
 		return PTR_ERR(table);
+	}
 
 	if (nlh->nlmsg_flags & NLM_F_NONREC &&
 	    table->use > 0)
@@ -1164,12 +1172,16 @@ static int nf_tables_getchain(struct net *net, struct sock *nlsk,
 	}
 
 	table = nft_table_lookup(net, nla[NFTA_CHAIN_TABLE], family, genmask);
-	if (IS_ERR(table))
+	if (IS_ERR(table)) {
+		NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_TABLE]);
 		return PTR_ERR(table);
+	}
 
 	chain = nft_chain_lookup(table, nla[NFTA_CHAIN_NAME], genmask);
-	if (IS_ERR(chain))
+	if (IS_ERR(chain)) {
+		NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_NAME]);
 		return PTR_ERR(chain);
+	}
 
 	skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
 	if (!skb2)
@@ -1531,9 +1543,9 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
 			      struct netlink_ext_ack *extack)
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
-	const struct nlattr * uninitialized_var(name);
 	u8 genmask = nft_genmask_next(net);
 	int family = nfmsg->nfgen_family;
+	const struct nlattr *attr;
 	struct nft_table *table;
 	struct nft_chain *chain;
 	u8 policy = NF_ACCEPT;
@@ -1544,34 +1556,45 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
 	create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
 
 	table = nft_table_lookup(net, nla[NFTA_CHAIN_TABLE], family, genmask);
-	if (IS_ERR(table))
+	if (IS_ERR(table)) {
+		NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_TABLE]);
 		return PTR_ERR(table);
+	}
 
 	chain = NULL;
-	name = nla[NFTA_CHAIN_NAME];
+	attr = nla[NFTA_CHAIN_NAME];
 
 	if (nla[NFTA_CHAIN_HANDLE]) {
 		handle = be64_to_cpu(nla_get_be64(nla[NFTA_CHAIN_HANDLE]));
 		chain = nft_chain_lookup_byhandle(table, handle, genmask);
-		if (IS_ERR(chain))
+		if (IS_ERR(chain)) {
+			NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_HANDLE]);
 			return PTR_ERR(chain);
+		}
+		attr = nla[NFTA_CHAIN_HANDLE];
 	} else {
-		chain = nft_chain_lookup(table, name, genmask);
+		chain = nft_chain_lookup(table, attr, genmask);
 		if (IS_ERR(chain)) {
-			if (PTR_ERR(chain) != -ENOENT)
+			if (PTR_ERR(chain) != -ENOENT) {
+				NL_SET_BAD_ATTR(extack, attr);
 				return PTR_ERR(chain);
+			}
 			chain = NULL;
 		}
 	}
 
 	if (nla[NFTA_CHAIN_POLICY]) {
 		if (chain != NULL &&
-		    !nft_is_base_chain(chain))
+		    !nft_is_base_chain(chain)) {
+			NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_POLICY]);
 			return -EOPNOTSUPP;
+		}
 
 		if (chain == NULL &&
-		    nla[NFTA_CHAIN_HOOK] == NULL)
+		    nla[NFTA_CHAIN_HOOK] == NULL) {
+			NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_POLICY]);
 			return -EOPNOTSUPP;
+		}
 
 		policy = ntohl(nla_get_be32(nla[NFTA_CHAIN_POLICY]));
 		switch (policy) {
@@ -1586,8 +1609,10 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
 	nft_ctx_init(&ctx, net, skb, nlh, family, table, chain, nla);
 
 	if (chain != NULL) {
-		if (nlh->nlmsg_flags & NLM_F_EXCL)
+		if (nlh->nlmsg_flags & NLM_F_EXCL) {
+			NL_SET_BAD_ATTR(extack, attr);
 			return -EEXIST;
+		}
 		if (nlh->nlmsg_flags & NLM_F_REPLACE)
 			return -EOPNOTSUPP;
 
@@ -1604,27 +1629,34 @@ static int nf_tables_delchain(struct net *net, struct sock *nlsk,
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	u8 genmask = nft_genmask_next(net);
+	int family = nfmsg->nfgen_family;
+	const struct nlattr *attr;
 	struct nft_table *table;
 	struct nft_chain *chain;
 	struct nft_rule *rule;
-	int family = nfmsg->nfgen_family;
 	struct nft_ctx ctx;
 	u64 handle;
 	u32 use;
 	int err;
 
 	table = nft_table_lookup(net, nla[NFTA_CHAIN_TABLE], family, genmask);
-	if (IS_ERR(table))
+	if (IS_ERR(table)) {
+		NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_TABLE]);
 		return PTR_ERR(table);
+	}
 
 	if (nla[NFTA_CHAIN_HANDLE]) {
-		handle = be64_to_cpu(nla_get_be64(nla[NFTA_CHAIN_HANDLE]));
+		attr = nla[NFTA_CHAIN_HANDLE];
+		handle = be64_to_cpu(nla_get_be64(attr));
 		chain = nft_chain_lookup_byhandle(table, handle, genmask);
 	} else {
-		chain = nft_chain_lookup(table, nla[NFTA_CHAIN_NAME], genmask);
+		attr = nla[NFTA_CHAIN_NAME];
+		chain = nft_chain_lookup(table, attr, genmask);
 	}
-	if (IS_ERR(chain))
+	if (IS_ERR(chain)) {
+		NL_SET_BAD_ATTR(extack, attr);
 		return PTR_ERR(chain);
+	}
 
 	if (nlh->nlmsg_flags & NLM_F_NONREC &&
 	    chain->use > 0)
@@ -1646,8 +1678,10 @@ static int nf_tables_delchain(struct net *net, struct sock *nlsk,
 	/* There are rules and elements that are still holding references to us,
 	 * we cannot do a recursive removal in this case.
 	 */
-	if (use > 0)
+	if (use > 0) {
+		NL_SET_BAD_ATTR(extack, attr);
 		return -EBUSY;
+	}
 
 	return nft_delchain(&ctx);
 }
@@ -2157,16 +2191,22 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk,
 	}
 
 	table = nft_table_lookup(net, nla[NFTA_RULE_TABLE], family, genmask);
-	if (IS_ERR(table))
+	if (IS_ERR(table)) {
+		NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_TABLE]);
 		return PTR_ERR(table);
+	}
 
 	chain = nft_chain_lookup(table, nla[NFTA_RULE_CHAIN], genmask);
-	if (IS_ERR(chain))
+	if (IS_ERR(chain)) {
+		NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN]);
 		return PTR_ERR(chain);
+	}
 
 	rule = nft_rule_lookup(chain, nla[NFTA_RULE_HANDLE]);
-	if (IS_ERR(rule))
+	if (IS_ERR(rule)) {
+		NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_HANDLE]);
 		return PTR_ERR(rule);
+	}
 
 	skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
 	if (!skb2)
@@ -2230,21 +2270,29 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
 	create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
 
 	table = nft_table_lookup(net, nla[NFTA_RULE_TABLE], family, genmask);
-	if (IS_ERR(table))
+	if (IS_ERR(table)) {
+		NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_TABLE]);
 		return PTR_ERR(table);
+	}
 
 	chain = nft_chain_lookup(table, nla[NFTA_RULE_CHAIN], genmask);
-	if (IS_ERR(chain))
+	if (IS_ERR(chain)) {
+		NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN]);
 		return PTR_ERR(chain);
+	}
 
 	if (nla[NFTA_RULE_HANDLE]) {
 		handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_HANDLE]));
 		rule = __nft_rule_lookup(chain, handle);
-		if (IS_ERR(rule))
+		if (IS_ERR(rule)) {
+			NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_HANDLE]);
 			return PTR_ERR(rule);
+		}
 
-		if (nlh->nlmsg_flags & NLM_F_EXCL)
+		if (nlh->nlmsg_flags & NLM_F_EXCL) {
+			NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_HANDLE]);
 			return -EEXIST;
+		}
 		if (nlh->nlmsg_flags & NLM_F_REPLACE)
 			old_rule = rule;
 		else
@@ -2264,8 +2312,10 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
 
 		pos_handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_POSITION]));
 		old_rule = __nft_rule_lookup(chain, pos_handle);
-		if (IS_ERR(old_rule))
+		if (IS_ERR(old_rule)) {
+			NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_POSITION]);
 			return PTR_ERR(old_rule);
+		}
 	}
 
 	nft_ctx_init(&ctx, net, skb, nlh, family, table, chain, nla);
@@ -2399,13 +2449,17 @@ static int nf_tables_delrule(struct net *net, struct sock *nlsk,
 	struct nft_ctx ctx;
 
 	table = nft_table_lookup(net, nla[NFTA_RULE_TABLE], family, genmask);
-	if (IS_ERR(table))
+	if (IS_ERR(table)) {
+		NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_TABLE]);
 		return PTR_ERR(table);
+	}
 
 	if (nla[NFTA_RULE_CHAIN]) {
 		chain = nft_chain_lookup(table, nla[NFTA_RULE_CHAIN], genmask);
-		if (IS_ERR(chain))
+		if (IS_ERR(chain)) {
+			NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN]);
 			return PTR_ERR(chain);
+		}
 	}
 
 	nft_ctx_init(&ctx, net, skb, nlh, family, table, chain, nla);
@@ -2413,14 +2467,18 @@ static int nf_tables_delrule(struct net *net, struct sock *nlsk,
 	if (chain) {
 		if (nla[NFTA_RULE_HANDLE]) {
 			rule = nft_rule_lookup(chain, nla[NFTA_RULE_HANDLE]);
-			if (IS_ERR(rule))
+			if (IS_ERR(rule)) {
+				NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_HANDLE]);
 				return PTR_ERR(rule);
+			}
 
 			err = nft_delrule(&ctx, rule);
 		} else if (nla[NFTA_RULE_ID]) {
 			rule = nft_rule_lookup_byid(net, nla[NFTA_RULE_ID]);
-			if (IS_ERR(rule))
+			if (IS_ERR(rule)) {
+				NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_ID]);
 				return PTR_ERR(rule);
+			}
 
 			err = nft_delrule(&ctx, rule);
 		} else {
@@ -2588,6 +2646,7 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, struct net *net,
 				     const struct sk_buff *skb,
 				     const struct nlmsghdr *nlh,
 				     const struct nlattr * const nla[],
+				     struct netlink_ext_ack *extack,
 				     u8 genmask)
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
@@ -2597,8 +2656,10 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, struct net *net,
 	if (nla[NFTA_SET_TABLE] != NULL) {
 		table = nft_table_lookup(net, nla[NFTA_SET_TABLE], family,
 					 genmask);
-		if (IS_ERR(table))
+		if (IS_ERR(table)) {
+			NL_SET_BAD_ATTR(extack, nla[NFTA_SET_TABLE]);
 			return PTR_ERR(table);
+		}
 	}
 
 	nft_ctx_init(ctx, net, skb, nlh, family, table, NULL, nla);
@@ -2910,7 +2971,8 @@ static int nf_tables_getset(struct net *net, struct sock *nlsk,
 	int err;
 
 	/* Verify existence before starting dump */
-	err = nft_ctx_init_from_setattr(&ctx, net, skb, nlh, nla, genmask);
+	err = nft_ctx_init_from_setattr(&ctx, net, skb, nlh, nla, extack,
+					genmask);
 	if (err < 0)
 		return err;
 
@@ -3090,20 +3152,27 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
 	create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
 
 	table = nft_table_lookup(net, nla[NFTA_SET_TABLE], family, genmask);
-	if (IS_ERR(table))
+	if (IS_ERR(table)) {
+		NL_SET_BAD_ATTR(extack, nla[NFTA_SET_TABLE]);
 		return PTR_ERR(table);
+	}
 
 	nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
 
 	set = nft_set_lookup(table, nla[NFTA_SET_NAME], genmask);
 	if (IS_ERR(set)) {
-		if (PTR_ERR(set) != -ENOENT)
+		if (PTR_ERR(set) != -ENOENT) {
+			NL_SET_BAD_ATTR(extack, nla[NFTA_SET_NAME]);
 			return PTR_ERR(set);
+		}
 	} else {
-		if (nlh->nlmsg_flags & NLM_F_EXCL)
+		if (nlh->nlmsg_flags & NLM_F_EXCL) {
+			NL_SET_BAD_ATTR(extack, nla[NFTA_SET_NAME]);
 			return -EEXIST;
+		}
 		if (nlh->nlmsg_flags & NLM_F_REPLACE)
 			return -EOPNOTSUPP;
+
 		return 0;
 	}
 
@@ -3204,6 +3273,7 @@ static int nf_tables_delset(struct net *net, struct sock *nlsk,
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	u8 genmask = nft_genmask_next(net);
+	const struct nlattr *attr;
 	struct nft_set *set;
 	struct nft_ctx ctx;
 	int err;
@@ -3213,21 +3283,28 @@ static int nf_tables_delset(struct net *net, struct sock *nlsk,
 	if (nla[NFTA_SET_TABLE] == NULL)
 		return -EINVAL;
 
-	err = nft_ctx_init_from_setattr(&ctx, net, skb, nlh, nla, genmask);
+	err = nft_ctx_init_from_setattr(&ctx, net, skb, nlh, nla, extack,
+					genmask);
 	if (err < 0)
 		return err;
 
-	if (nla[NFTA_SET_HANDLE])
-		set = nft_set_lookup_byhandle(ctx.table, nla[NFTA_SET_HANDLE],
-					      genmask);
-	else
-		set = nft_set_lookup(ctx.table, nla[NFTA_SET_NAME], genmask);
-	if (IS_ERR(set))
-		return PTR_ERR(set);
+	if (nla[NFTA_SET_HANDLE]) {
+		attr = nla[NFTA_SET_HANDLE];
+		set = nft_set_lookup_byhandle(ctx.table, attr, genmask);
+	} else {
+		attr = nla[NFTA_SET_NAME];
+		set = nft_set_lookup(ctx.table, attr, genmask);
+	}
 
+	if (IS_ERR(set)) {
+		NL_SET_BAD_ATTR(extack, attr);
+		return PTR_ERR(set);
+	}
 	if (!list_empty(&set->bindings) ||
-	    (nlh->nlmsg_flags & NLM_F_NONREC && atomic_read(&set->nelems) > 0))
+	    (nlh->nlmsg_flags & NLM_F_NONREC && atomic_read(&set->nelems) > 0)) {
+		NL_SET_BAD_ATTR(extack, attr);
 		return -EBUSY;
+	}
 
 	return nft_delset(&ctx, set);
 }
@@ -3355,6 +3432,7 @@ static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx, struct net *net,
 				      const struct sk_buff *skb,
 				      const struct nlmsghdr *nlh,
 				      const struct nlattr * const nla[],
+				      struct netlink_ext_ack *extack,
 				      u8 genmask)
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
@@ -3363,8 +3441,10 @@ static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx, struct net *net,
 
 	table = nft_table_lookup(net, nla[NFTA_SET_ELEM_LIST_TABLE], family,
 				 genmask);
-	if (IS_ERR(table))
+	if (IS_ERR(table)) {
+		NL_SET_BAD_ATTR(extack, nla[NFTA_SET_ELEM_LIST_TABLE]);
 		return PTR_ERR(table);
+	}
 
 	nft_ctx_init(ctx, net, skb, nlh, family, table, NULL, nla);
 	return 0;
@@ -3694,7 +3774,8 @@ static int nf_tables_getsetelem(struct net *net, struct sock *nlsk,
 	struct nft_ctx ctx;
 	int rem, err = 0;
 
-	err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, genmask);
+	err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, extack,
+					 genmask);
 	if (err < 0)
 		return err;
 
@@ -4048,7 +4129,8 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk,
 	if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL)
 		return -EINVAL;
 
-	err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, genmask);
+	err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, extack,
+					 genmask);
 	if (err < 0)
 		return err;
 
@@ -4236,7 +4318,8 @@ static int nf_tables_delsetelem(struct net *net, struct sock *nlsk,
 	struct nft_ctx ctx;
 	int rem, err = 0;
 
-	err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, genmask);
+	err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, extack,
+					 genmask);
 	if (err < 0)
 		return err;
 
@@ -4491,20 +4574,24 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk,
 		return -EINVAL;
 
 	table = nft_table_lookup(net, nla[NFTA_OBJ_TABLE], family, genmask);
-	if (IS_ERR(table))
+	if (IS_ERR(table)) {
+		NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_TABLE]);
 		return PTR_ERR(table);
+	}
 
 	objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
 	obj = nft_obj_lookup(table, nla[NFTA_OBJ_NAME], objtype, genmask);
 	if (IS_ERR(obj)) {
 		err = PTR_ERR(obj);
-		if (err != -ENOENT)
+		if (err != -ENOENT) {
+			NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_NAME]);
 			return err;
-
+		}
 	} else {
-		if (nlh->nlmsg_flags & NLM_F_EXCL)
+		if (nlh->nlmsg_flags & NLM_F_EXCL) {
+			NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_NAME]);
 			return -EEXIST;
-
+		}
 		return 0;
 	}
 
@@ -4716,13 +4803,17 @@ static int nf_tables_getobj(struct net *net, struct sock *nlsk,
 		return -EINVAL;
 
 	table = nft_table_lookup(net, nla[NFTA_OBJ_TABLE], family, genmask);
-	if (IS_ERR(table))
+	if (IS_ERR(table)) {
+		NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_TABLE]);
 		return PTR_ERR(table);
+	}
 
 	objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
 	obj = nft_obj_lookup(table, nla[NFTA_OBJ_NAME], objtype, genmask);
-	if (IS_ERR(obj))
+	if (IS_ERR(obj)) {
+		NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_NAME]);
 		return PTR_ERR(obj);
+	}
 
 	skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
 	if (!skb2)
@@ -4761,6 +4852,7 @@ static int nf_tables_delobj(struct net *net, struct sock *nlsk,
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	u8 genmask = nft_genmask_next(net);
 	int family = nfmsg->nfgen_family;
+	const struct nlattr *attr;
 	struct nft_table *table;
 	struct nft_object *obj;
 	struct nft_ctx ctx;
@@ -4771,20 +4863,28 @@ static int nf_tables_delobj(struct net *net, struct sock *nlsk,
 		return -EINVAL;
 
 	table = nft_table_lookup(net, nla[NFTA_OBJ_TABLE], family, genmask);
-	if (IS_ERR(table))
+	if (IS_ERR(table)) {
+		NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_TABLE]);
 		return PTR_ERR(table);
+	}
 
 	objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
-	if (nla[NFTA_OBJ_HANDLE])
-		obj = nft_obj_lookup_byhandle(table, nla[NFTA_OBJ_HANDLE],
-					      objtype, genmask);
-	else
-		obj = nft_obj_lookup(table, nla[NFTA_OBJ_NAME], objtype,
-				     genmask);
-	if (IS_ERR(obj))
+	if (nla[NFTA_OBJ_HANDLE]) {
+		attr = nla[NFTA_OBJ_HANDLE];
+		obj = nft_obj_lookup_byhandle(table, attr, objtype, genmask);
+	} else {
+		attr = nla[NFTA_OBJ_NAME];
+		obj = nft_obj_lookup(table, attr, objtype, genmask);
+	}
+
+	if (IS_ERR(obj)) {
+		NL_SET_BAD_ATTR(extack, attr);
 		return PTR_ERR(obj);
-	if (obj->use > 0)
+	}
+	if (obj->use > 0) {
+		NL_SET_BAD_ATTR(extack, attr);
 		return -EBUSY;
+	}
 
 	nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
 
@@ -5046,18 +5146,24 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
 
 	table = nft_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE], family,
 				 genmask);
-	if (IS_ERR(table))
+	if (IS_ERR(table)) {
+		NL_SET_BAD_ATTR(extack, nla[NFTA_FLOWTABLE_TABLE]);
 		return PTR_ERR(table);
+	}
 
 	flowtable = nft_flowtable_lookup(table, nla[NFTA_FLOWTABLE_NAME],
 					 genmask);
 	if (IS_ERR(flowtable)) {
 		err = PTR_ERR(flowtable);
-		if (err != -ENOENT)
+		if (err != -ENOENT) {
+			NL_SET_BAD_ATTR(extack, nla[NFTA_FLOWTABLE_NAME]);
 			return err;
+		}
 	} else {
-		if (nlh->nlmsg_flags & NLM_F_EXCL)
+		if (nlh->nlmsg_flags & NLM_F_EXCL) {
+			NL_SET_BAD_ATTR(extack, nla[NFTA_FLOWTABLE_NAME]);
 			return -EEXIST;
+		}
 
 		return 0;
 	}
@@ -5153,6 +5259,7 @@ static int nf_tables_delflowtable(struct net *net, struct sock *nlsk,
 	u8 genmask = nft_genmask_next(net);
 	int family = nfmsg->nfgen_family;
 	struct nft_flowtable *flowtable;
+	const struct nlattr *attr;
 	struct nft_table *table;
 	struct nft_ctx ctx;
 
@@ -5163,21 +5270,27 @@ static int nf_tables_delflowtable(struct net *net, struct sock *nlsk,
 
 	table = nft_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE], family,
 				 genmask);
-	if (IS_ERR(table))
+	if (IS_ERR(table)) {
+		NL_SET_BAD_ATTR(extack, nla[NFTA_FLOWTABLE_TABLE]);
 		return PTR_ERR(table);
+	}
 
-	if (nla[NFTA_FLOWTABLE_HANDLE])
-		flowtable = nft_flowtable_lookup_byhandle(table,
-							  nla[NFTA_FLOWTABLE_HANDLE],
-							  genmask);
-	else
-		flowtable = nft_flowtable_lookup(table,
-						 nla[NFTA_FLOWTABLE_NAME],
-						 genmask);
-	if (IS_ERR(flowtable))
-                return PTR_ERR(flowtable);
-	if (flowtable->use > 0)
+	if (nla[NFTA_FLOWTABLE_HANDLE]) {
+		attr = nla[NFTA_FLOWTABLE_HANDLE];
+		flowtable = nft_flowtable_lookup_byhandle(table, attr, genmask);
+	} else {
+		attr = nla[NFTA_FLOWTABLE_NAME];
+		flowtable = nft_flowtable_lookup(table, attr, genmask);
+	}
+
+	if (IS_ERR(flowtable)) {
+		NL_SET_BAD_ATTR(extack, attr);
+		return PTR_ERR(flowtable);
+	}
+	if (flowtable->use > 0) {
+		NL_SET_BAD_ATTR(extack, attr);
 		return -EBUSY;
+	}
 
 	nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
 
-- 
2.11.0

^ permalink raw reply related

* [PATCH 25/51] netfilter: nf_flow_table: fix offloading connections with SNAT+DNAT
From: Pablo Neira Ayuso @ 2018-05-06 22:46 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <20180506224709.29100-1-pablo@netfilter.org>

From: Felix Fietkau <nbd@nbd.name>

Pass all NAT types to the flow offload struct, otherwise parts of the
address/port pair do not get translated properly, causing connection
stalls

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_flow_table_core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 0699981a8511..eb0d1658ac05 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -84,7 +84,7 @@ flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route)
 
 	if (ct->status & IPS_SRC_NAT)
 		flow->flags |= FLOW_OFFLOAD_SNAT;
-	else if (ct->status & IPS_DST_NAT)
+	if (ct->status & IPS_DST_NAT)
 		flow->flags |= FLOW_OFFLOAD_DNAT;
 
 	return flow;
-- 
2.11.0

^ permalink raw reply related

* [PATCH 24/51] netfilter: nf_flow_table: add missing condition for TCP state check
From: Pablo Neira Ayuso @ 2018-05-06 22:46 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <20180506224709.29100-1-pablo@netfilter.org>

From: Felix Fietkau <nbd@nbd.name>

Avoid looking at unrelated fields in UDP packets

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_flow_table_ip.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index 692c75ef5cb7..82451b7e0acb 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -15,11 +15,14 @@
 #include <linux/tcp.h>
 #include <linux/udp.h>
 
-static int nf_flow_tcp_state_check(struct flow_offload *flow,
-				   struct sk_buff *skb, unsigned int thoff)
+static int nf_flow_state_check(struct flow_offload *flow, int proto,
+			       struct sk_buff *skb, unsigned int thoff)
 {
 	struct tcphdr *tcph;
 
+	if (proto != IPPROTO_TCP)
+		return 0;
+
 	if (!pskb_may_pull(skb, thoff + sizeof(*tcph)))
 		return -1;
 
@@ -248,7 +251,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
 		return NF_DROP;
 
 	thoff = ip_hdr(skb)->ihl * 4;
-	if (nf_flow_tcp_state_check(flow, skb, thoff))
+	if (nf_flow_state_check(flow, ip_hdr(skb)->protocol, skb, thoff))
 		return NF_ACCEPT;
 
 	if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) &&
@@ -460,7 +463,8 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
 	if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
 		return NF_ACCEPT;
 
-	if (nf_flow_tcp_state_check(flow, skb, sizeof(*ip6h)))
+	if (nf_flow_state_check(flow, ipv6_hdr(skb)->nexthdr, skb,
+				sizeof(*ip6h)))
 		return NF_ACCEPT;
 
 	if (skb_try_make_writable(skb, sizeof(*ip6h)))
-- 
2.11.0

^ permalink raw reply related

* [PATCH 22/51] netfilter: nf_flow_table: add support for sending flows back to the slow path
From: Pablo Neira Ayuso @ 2018-05-06 22:46 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <20180506224709.29100-1-pablo@netfilter.org>

From: Felix Fietkau <nbd@nbd.name>

Since conntrack hasn't seen any packets from the offloaded flow in a
while, and the timeout for offloaded flows is set to an extremely long
value, we need to fix up the state before we can send a flow back to the
slow path.

For TCP, reset td_maxwin in both directions, which makes it resync its
state on the next packets.

Use the regular timeout for TCP and UDP established connections.

This allows the slow path to take over again once the offload state has
been torn down

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_flow_table_core.c | 50 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 49 insertions(+), 1 deletion(-)

diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index ff5e17a15963..0699981a8511 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -100,6 +100,43 @@ flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route)
 }
 EXPORT_SYMBOL_GPL(flow_offload_alloc);
 
+static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp)
+{
+	tcp->state = TCP_CONNTRACK_ESTABLISHED;
+	tcp->seen[0].td_maxwin = 0;
+	tcp->seen[1].td_maxwin = 0;
+}
+
+static void flow_offload_fixup_ct_state(struct nf_conn *ct)
+{
+	const struct nf_conntrack_l4proto *l4proto;
+	struct net *net = nf_ct_net(ct);
+	unsigned int *timeouts;
+	unsigned int timeout;
+	int l4num;
+
+	l4num = nf_ct_protonum(ct);
+	if (l4num == IPPROTO_TCP)
+		flow_offload_fixup_tcp(&ct->proto.tcp);
+
+	l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), l4num);
+	if (!l4proto)
+		return;
+
+	timeouts = l4proto->get_timeouts(net);
+	if (!timeouts)
+		return;
+
+	if (l4num == IPPROTO_TCP)
+		timeout = timeouts[TCP_CONNTRACK_ESTABLISHED];
+	else if (l4num == IPPROTO_UDP)
+		timeout = timeouts[UDP_CT_REPLIED];
+	else
+		return;
+
+	ct->timeout = nfct_time_stamp + timeout;
+}
+
 void flow_offload_free(struct flow_offload *flow)
 {
 	struct flow_offload_entry *e;
@@ -107,7 +144,8 @@ void flow_offload_free(struct flow_offload *flow)
 	dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache);
 	dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache);
 	e = container_of(flow, struct flow_offload_entry, flow);
-	nf_ct_delete(e->ct, 0, 0);
+	if (flow->flags & FLOW_OFFLOAD_DYING)
+		nf_ct_delete(e->ct, 0, 0);
 	nf_ct_put(e->ct);
 	kfree_rcu(e, rcu_head);
 }
@@ -164,6 +202,8 @@ EXPORT_SYMBOL_GPL(flow_offload_add);
 static void flow_offload_del(struct nf_flowtable *flow_table,
 			     struct flow_offload *flow)
 {
+	struct flow_offload_entry *e;
+
 	rhashtable_remove_fast(&flow_table->rhashtable,
 			       &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
 			       nf_flow_offload_rhash_params);
@@ -171,12 +211,20 @@ static void flow_offload_del(struct nf_flowtable *flow_table,
 			       &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
 			       nf_flow_offload_rhash_params);
 
+	e = container_of(flow, struct flow_offload_entry, flow);
+	clear_bit(IPS_OFFLOAD_BIT, &e->ct->status);
+
 	flow_offload_free(flow);
 }
 
 void flow_offload_teardown(struct flow_offload *flow)
 {
+	struct flow_offload_entry *e;
+
 	flow->flags |= FLOW_OFFLOAD_TEARDOWN;
+
+	e = container_of(flow, struct flow_offload_entry, flow);
+	flow_offload_fixup_ct_state(e->ct);
 }
 EXPORT_SYMBOL_GPL(flow_offload_teardown);
 
-- 
2.11.0

^ permalink raw reply related

* [PATCH 23/51] netfilter: nf_flow_table: tear down TCP flows if RST or FIN was seen
From: Pablo Neira Ayuso @ 2018-05-06 22:46 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <20180506224709.29100-1-pablo@netfilter.org>

From: Felix Fietkau <nbd@nbd.name>

Allow the slow path to handle the shutdown of the connection with proper
timeouts. The packet containing RST/FIN is also sent to the slow path
and the TCP conntrack module will update its state.

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_flow_table_ip.c | 30 +++++++++++++++++++++++++++---
 1 file changed, 27 insertions(+), 3 deletions(-)

diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index dc570fb7641d..692c75ef5cb7 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -15,6 +15,23 @@
 #include <linux/tcp.h>
 #include <linux/udp.h>
 
+static int nf_flow_tcp_state_check(struct flow_offload *flow,
+				   struct sk_buff *skb, unsigned int thoff)
+{
+	struct tcphdr *tcph;
+
+	if (!pskb_may_pull(skb, thoff + sizeof(*tcph)))
+		return -1;
+
+	tcph = (void *)(skb_network_header(skb) + thoff);
+	if (unlikely(tcph->fin || tcph->rst)) {
+		flow_offload_teardown(flow);
+		return -1;
+	}
+
+	return 0;
+}
+
 static int nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff,
 			      __be32 addr, __be32 new_addr)
 {
@@ -119,10 +136,9 @@ static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb,
 }
 
 static int nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
-			  enum flow_offload_tuple_dir dir)
+			  unsigned int thoff, enum flow_offload_tuple_dir dir)
 {
 	struct iphdr *iph = ip_hdr(skb);
-	unsigned int thoff = iph->ihl * 4;
 
 	if (flow->flags & FLOW_OFFLOAD_SNAT &&
 	    (nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
@@ -202,6 +218,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
 	struct flow_offload *flow;
 	struct net_device *outdev;
 	const struct rtable *rt;
+	unsigned int thoff;
 	struct iphdr *iph;
 	__be32 nexthop;
 
@@ -230,8 +247,12 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
 	if (skb_try_make_writable(skb, sizeof(*iph)))
 		return NF_DROP;
 
+	thoff = ip_hdr(skb)->ihl * 4;
+	if (nf_flow_tcp_state_check(flow, skb, thoff))
+		return NF_ACCEPT;
+
 	if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) &&
-	    nf_flow_nat_ip(flow, skb, dir) < 0)
+	    nf_flow_nat_ip(flow, skb, thoff, dir) < 0)
 		return NF_DROP;
 
 	flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
@@ -439,6 +460,9 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
 	if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
 		return NF_ACCEPT;
 
+	if (nf_flow_tcp_state_check(flow, skb, sizeof(*ip6h)))
+		return NF_ACCEPT;
+
 	if (skb_try_make_writable(skb, sizeof(*ip6h)))
 		return NF_DROP;
 
-- 
2.11.0

^ permalink raw reply related

* [PATCH 21/51] netfilter: nf_flow_table: in flow_offload_lookup, skip entries being deleted
From: Pablo Neira Ayuso @ 2018-05-06 22:46 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <20180506224709.29100-1-pablo@netfilter.org>

From: Felix Fietkau <nbd@nbd.name>

Preparation for sending flows back to the slow path

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_flow_table_core.c | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 5a81e4f771e9..ff5e17a15963 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -184,8 +184,21 @@ struct flow_offload_tuple_rhash *
 flow_offload_lookup(struct nf_flowtable *flow_table,
 		    struct flow_offload_tuple *tuple)
 {
-	return rhashtable_lookup_fast(&flow_table->rhashtable, tuple,
-				      nf_flow_offload_rhash_params);
+	struct flow_offload_tuple_rhash *tuplehash;
+	struct flow_offload *flow;
+	int dir;
+
+	tuplehash = rhashtable_lookup_fast(&flow_table->rhashtable, tuple,
+					   nf_flow_offload_rhash_params);
+	if (!tuplehash)
+		return NULL;
+
+	dir = tuplehash->tuple.dir;
+	flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
+	if (flow->flags & (FLOW_OFFLOAD_DYING | FLOW_OFFLOAD_TEARDOWN))
+		return NULL;
+
+	return tuplehash;
 }
 EXPORT_SYMBOL_GPL(flow_offload_lookup);
 
-- 
2.11.0

^ permalink raw reply related

* [PATCH 20/51] netfilter: nf_flow_table: add a new flow state for tearing down offloading
From: Pablo Neira Ayuso @ 2018-05-06 22:46 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <20180506224709.29100-1-pablo@netfilter.org>

From: Felix Fietkau <nbd@nbd.name>

On cleanup, this will be treated differently from FLOW_OFFLOAD_DYING:

If FLOW_OFFLOAD_DYING is set, the connection is going away, so both the
offload state and the connection tracking entry will be deleted.

If FLOW_OFFLOAD_TEARDOWN is set, the connection remains alive, but
the offload state is torn down. This is useful for cases that require
more complex state tracking / timeout handling on TCP, or if the
connection has been idle for too long.

Support for sending flows back to the slow path will be implemented in
a following patch

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_flow_table.h |  2 ++
 net/netfilter/nf_flow_table_core.c    | 22 ++++++++++++++--------
 2 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index 5aa49524ebef..ba9fa4592f2b 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -68,6 +68,7 @@ struct flow_offload_tuple_rhash {
 #define FLOW_OFFLOAD_SNAT	0x1
 #define FLOW_OFFLOAD_DNAT	0x2
 #define FLOW_OFFLOAD_DYING	0x4
+#define FLOW_OFFLOAD_TEARDOWN	0x8
 
 struct flow_offload {
 	struct flow_offload_tuple_rhash		tuplehash[FLOW_OFFLOAD_DIR_MAX];
@@ -103,6 +104,7 @@ void nf_flow_table_cleanup(struct net *net, struct net_device *dev);
 int nf_flow_table_init(struct nf_flowtable *flow_table);
 void nf_flow_table_free(struct nf_flowtable *flow_table);
 
+void flow_offload_teardown(struct flow_offload *flow);
 static inline void flow_offload_dead(struct flow_offload *flow)
 {
 	flow->flags |= FLOW_OFFLOAD_DYING;
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 0d38f20fd226..5a81e4f771e9 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -174,6 +174,12 @@ static void flow_offload_del(struct nf_flowtable *flow_table,
 	flow_offload_free(flow);
 }
 
+void flow_offload_teardown(struct flow_offload *flow)
+{
+	flow->flags |= FLOW_OFFLOAD_TEARDOWN;
+}
+EXPORT_SYMBOL_GPL(flow_offload_teardown);
+
 struct flow_offload_tuple_rhash *
 flow_offload_lookup(struct nf_flowtable *flow_table,
 		    struct flow_offload_tuple *tuple)
@@ -226,11 +232,6 @@ static inline bool nf_flow_has_expired(const struct flow_offload *flow)
 	return (__s32)(flow->timeout - (u32)jiffies) <= 0;
 }
 
-static inline bool nf_flow_is_dying(const struct flow_offload *flow)
-{
-	return flow->flags & FLOW_OFFLOAD_DYING;
-}
-
 static int nf_flow_offload_gc_step(struct nf_flowtable *flow_table)
 {
 	struct flow_offload_tuple_rhash *tuplehash;
@@ -258,7 +259,8 @@ static int nf_flow_offload_gc_step(struct nf_flowtable *flow_table)
 		flow = container_of(tuplehash, struct flow_offload, tuplehash[0]);
 
 		if (nf_flow_has_expired(flow) ||
-		    nf_flow_is_dying(flow))
+		    (flow->flags & (FLOW_OFFLOAD_DYING |
+				    FLOW_OFFLOAD_TEARDOWN)))
 			flow_offload_del(flow_table, flow);
 	}
 out:
@@ -419,10 +421,14 @@ static void nf_flow_table_do_cleanup(struct flow_offload *flow, void *data)
 {
 	struct net_device *dev = data;
 
-	if (dev && flow->tuplehash[0].tuple.iifidx != dev->ifindex)
+	if (!dev) {
+		flow_offload_teardown(flow);
 		return;
+	}
 
-	flow_offload_dead(flow);
+	if (flow->tuplehash[0].tuple.iifidx == dev->ifindex ||
+	    flow->tuplehash[1].tuple.iifidx == dev->ifindex)
+		flow_offload_dead(flow);
 }
 
 static void nf_flow_table_iterate_cleanup(struct nf_flowtable *flowtable,
-- 
2.11.0

^ permalink raw reply related

* [PATCH 18/51] netfilter: nf_flow_table: track flow tables in nf_flow_table directly
From: Pablo Neira Ayuso @ 2018-05-06 22:46 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <20180506224709.29100-1-pablo@netfilter.org>

From: Felix Fietkau <nbd@nbd.name>

Avoids having nf_flow_table depend on nftables (useful for future
iptables backport work)

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_flow_table.h |  1 +
 include/net/netfilter/nf_tables.h     |  3 ---
 net/netfilter/nf_flow_table_core.c    | 21 ++++++++++++++++++---
 net/netfilter/nf_tables_api.c         | 17 -----------------
 4 files changed, 19 insertions(+), 23 deletions(-)

diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index f876e32a60b8..ab408adba688 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -21,6 +21,7 @@ struct nf_flowtable_type {
 };
 
 struct nf_flowtable {
+	struct list_head		list;
 	struct rhashtable		rhashtable;
 	const struct nf_flowtable_type	*type;
 	struct delayed_work		gc_work;
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index cd368d1b8cb8..2f2062ae1c45 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -1109,9 +1109,6 @@ struct nft_flowtable {
 struct nft_flowtable *nf_tables_flowtable_lookup(const struct nft_table *table,
 						 const struct nlattr *nla,
 						 u8 genmask);
-void nft_flow_table_iterate(struct net *net,
-			    void (*iter)(struct nf_flowtable *flowtable, void *data),
-			    void *data);
 
 void nft_register_flowtable_type(struct nf_flowtable_type *type);
 void nft_unregister_flowtable_type(struct nf_flowtable_type *type);
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 09d1be669c39..e761359b56a9 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -18,6 +18,9 @@ struct flow_offload_entry {
 	struct rcu_head		rcu_head;
 };
 
+static DEFINE_MUTEX(flowtable_lock);
+static LIST_HEAD(flowtables);
+
 static void
 flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
 		      struct nf_flow_route *route,
@@ -410,6 +413,10 @@ int nf_flow_table_init(struct nf_flowtable *flowtable)
 	queue_delayed_work(system_power_efficient_wq,
 			   &flowtable->gc_work, HZ);
 
+	mutex_lock(&flowtable_lock);
+	list_add(&flowtable->list, &flowtables);
+	mutex_unlock(&flowtable_lock);
+
 	return 0;
 }
 EXPORT_SYMBOL_GPL(nf_flow_table_init);
@@ -425,20 +432,28 @@ static void nf_flow_table_do_cleanup(struct flow_offload *flow, void *data)
 }
 
 static void nf_flow_table_iterate_cleanup(struct nf_flowtable *flowtable,
-					  void *data)
+					  struct net_device *dev)
 {
-	nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, data);
+	nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, dev);
 	flush_delayed_work(&flowtable->gc_work);
 }
 
 void nf_flow_table_cleanup(struct net *net, struct net_device *dev)
 {
-	nft_flow_table_iterate(net, nf_flow_table_iterate_cleanup, dev);
+	struct nf_flowtable *flowtable;
+
+	mutex_lock(&flowtable_lock);
+	list_for_each_entry(flowtable, &flowtables, list)
+		nf_flow_table_iterate_cleanup(flowtable, dev);
+	mutex_unlock(&flowtable_lock);
 }
 EXPORT_SYMBOL_GPL(nf_flow_table_cleanup);
 
 void nf_flow_table_free(struct nf_flowtable *flow_table)
 {
+	mutex_lock(&flowtable_lock);
+	list_del(&flow_table->list);
+	mutex_unlock(&flowtable_lock);
 	cancel_delayed_work_sync(&flow_table->gc_work);
 	nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL);
 	WARN_ON(!nf_flow_offload_gc_step(flow_table));
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 517bb93c00fb..16b67f54b3d2 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -5060,23 +5060,6 @@ static const struct nf_flowtable_type *nft_flowtable_type_get(u8 family)
 	return ERR_PTR(-ENOENT);
 }
 
-void nft_flow_table_iterate(struct net *net,
-			    void (*iter)(struct nf_flowtable *flowtable, void *data),
-			    void *data)
-{
-	struct nft_flowtable *flowtable;
-	const struct nft_table *table;
-
-	nfnl_lock(NFNL_SUBSYS_NFTABLES);
-	list_for_each_entry(table, &net->nft.tables, list) {
-		list_for_each_entry(flowtable, &table->flowtables, list) {
-			iter(&flowtable->data, data);
-		}
-	}
-	nfnl_unlock(NFNL_SUBSYS_NFTABLES);
-}
-EXPORT_SYMBOL_GPL(nft_flow_table_iterate);
-
 static void nft_unregister_flowtable_net_hooks(struct net *net,
 					       struct nft_flowtable *flowtable)
 {
-- 
2.11.0

^ permalink raw reply related

* [PATCH 19/51] netfilter: nf_flow_table: make flow_offload_dead inline
From: Pablo Neira Ayuso @ 2018-05-06 22:46 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <20180506224709.29100-1-pablo@netfilter.org>

From: Felix Fietkau <nbd@nbd.name>

It is too trivial to keep as a separate exported function

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_flow_table.h | 5 ++++-
 net/netfilter/nf_flow_table_core.c    | 6 ------
 2 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index ab408adba688..5aa49524ebef 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -103,7 +103,10 @@ void nf_flow_table_cleanup(struct net *net, struct net_device *dev);
 int nf_flow_table_init(struct nf_flowtable *flow_table);
 void nf_flow_table_free(struct nf_flowtable *flow_table);
 
-void flow_offload_dead(struct flow_offload *flow);
+static inline void flow_offload_dead(struct flow_offload *flow)
+{
+	flow->flags |= FLOW_OFFLOAD_DYING;
+}
 
 int nf_flow_snat_port(const struct flow_offload *flow,
 		      struct sk_buff *skb, unsigned int thoff,
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index e761359b56a9..0d38f20fd226 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -113,12 +113,6 @@ void flow_offload_free(struct flow_offload *flow)
 }
 EXPORT_SYMBOL_GPL(flow_offload_free);
 
-void flow_offload_dead(struct flow_offload *flow)
-{
-	flow->flags |= FLOW_OFFLOAD_DYING;
-}
-EXPORT_SYMBOL_GPL(flow_offload_dead);
-
 static u32 flow_offload_hash(const void *data, u32 len, u32 seed)
 {
 	const struct flow_offload_tuple *tuple = data;
-- 
2.11.0

^ permalink raw reply related

* [PATCH 16/51] netfilter: nf_flow_table: move init code to nf_flow_table_core.c
From: Pablo Neira Ayuso @ 2018-05-06 22:46 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <20180506224709.29100-1-pablo@netfilter.org>

From: Felix Fietkau <nbd@nbd.name>

Reduces duplication of .gc and .params in flowtable type definitions and
makes the API clearer

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_flow_table.h   |   6 +-
 net/ipv4/netfilter/nf_flow_table_ipv4.c |   3 +-
 net/ipv6/netfilter/nf_flow_table_ipv6.c |   3 +-
 net/netfilter/nf_flow_table_core.c      | 102 +++++++++++++++++++-------------
 net/netfilter/nf_flow_table_inet.c      |   3 +-
 net/netfilter/nf_tables_api.c           |  22 +++----
 6 files changed, 74 insertions(+), 65 deletions(-)

diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index 76ee5c81b752..f876e32a60b8 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -14,9 +14,8 @@ struct nf_flowtable;
 struct nf_flowtable_type {
 	struct list_head		list;
 	int				family;
-	void				(*gc)(struct work_struct *work);
+	int				(*init)(struct nf_flowtable *ft);
 	void				(*free)(struct nf_flowtable *ft);
-	const struct rhashtable_params	*params;
 	nf_hookfn			*hook;
 	struct module			*owner;
 };
@@ -100,9 +99,8 @@ int nf_flow_table_iterate(struct nf_flowtable *flow_table,
 
 void nf_flow_table_cleanup(struct net *net, struct net_device *dev);
 
+int nf_flow_table_init(struct nf_flowtable *flow_table);
 void nf_flow_table_free(struct nf_flowtable *flow_table);
-void nf_flow_offload_work_gc(struct work_struct *work);
-extern const struct rhashtable_params nf_flow_offload_rhash_params;
 
 void flow_offload_dead(struct flow_offload *flow);
 
diff --git a/net/ipv4/netfilter/nf_flow_table_ipv4.c b/net/ipv4/netfilter/nf_flow_table_ipv4.c
index b6e43ff0c7b7..e1e56d7123d2 100644
--- a/net/ipv4/netfilter/nf_flow_table_ipv4.c
+++ b/net/ipv4/netfilter/nf_flow_table_ipv4.c
@@ -7,8 +7,7 @@
 
 static struct nf_flowtable_type flowtable_ipv4 = {
 	.family		= NFPROTO_IPV4,
-	.params		= &nf_flow_offload_rhash_params,
-	.gc		= nf_flow_offload_work_gc,
+	.init		= nf_flow_table_init,
 	.free		= nf_flow_table_free,
 	.hook		= nf_flow_offload_ip_hook,
 	.owner		= THIS_MODULE,
diff --git a/net/ipv6/netfilter/nf_flow_table_ipv6.c b/net/ipv6/netfilter/nf_flow_table_ipv6.c
index f1804ce8d561..c511d206bf9b 100644
--- a/net/ipv6/netfilter/nf_flow_table_ipv6.c
+++ b/net/ipv6/netfilter/nf_flow_table_ipv6.c
@@ -8,8 +8,7 @@
 
 static struct nf_flowtable_type flowtable_ipv6 = {
 	.family		= NFPROTO_IPV6,
-	.params		= &nf_flow_offload_rhash_params,
-	.gc		= nf_flow_offload_work_gc,
+	.init		= nf_flow_table_init,
 	.free		= nf_flow_table_free,
 	.hook		= nf_flow_offload_ipv6_hook,
 	.owner		= THIS_MODULE,
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 7403a0dfddf7..09d1be669c39 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -116,16 +116,50 @@ void flow_offload_dead(struct flow_offload *flow)
 }
 EXPORT_SYMBOL_GPL(flow_offload_dead);
 
+static u32 flow_offload_hash(const void *data, u32 len, u32 seed)
+{
+	const struct flow_offload_tuple *tuple = data;
+
+	return jhash(tuple, offsetof(struct flow_offload_tuple, dir), seed);
+}
+
+static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed)
+{
+	const struct flow_offload_tuple_rhash *tuplehash = data;
+
+	return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, dir), seed);
+}
+
+static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg,
+					const void *ptr)
+{
+	const struct flow_offload_tuple *tuple = arg->key;
+	const struct flow_offload_tuple_rhash *x = ptr;
+
+	if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, dir)))
+		return 1;
+
+	return 0;
+}
+
+static const struct rhashtable_params nf_flow_offload_rhash_params = {
+	.head_offset		= offsetof(struct flow_offload_tuple_rhash, node),
+	.hashfn			= flow_offload_hash,
+	.obj_hashfn		= flow_offload_hash_obj,
+	.obj_cmpfn		= flow_offload_hash_cmp,
+	.automatic_shrinking	= true,
+};
+
 int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
 {
 	flow->timeout = (u32)jiffies;
 
 	rhashtable_insert_fast(&flow_table->rhashtable,
 			       &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
-			       *flow_table->type->params);
+			       nf_flow_offload_rhash_params);
 	rhashtable_insert_fast(&flow_table->rhashtable,
 			       &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
-			       *flow_table->type->params);
+			       nf_flow_offload_rhash_params);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(flow_offload_add);
@@ -135,10 +169,10 @@ static void flow_offload_del(struct nf_flowtable *flow_table,
 {
 	rhashtable_remove_fast(&flow_table->rhashtable,
 			       &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
-			       *flow_table->type->params);
+			       nf_flow_offload_rhash_params);
 	rhashtable_remove_fast(&flow_table->rhashtable,
 			       &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
-			       *flow_table->type->params);
+			       nf_flow_offload_rhash_params);
 
 	flow_offload_free(flow);
 }
@@ -148,7 +182,7 @@ flow_offload_lookup(struct nf_flowtable *flow_table,
 		    struct flow_offload_tuple *tuple)
 {
 	return rhashtable_lookup_fast(&flow_table->rhashtable, tuple,
-				      *flow_table->type->params);
+				      nf_flow_offload_rhash_params);
 }
 EXPORT_SYMBOL_GPL(flow_offload_lookup);
 
@@ -237,7 +271,7 @@ static int nf_flow_offload_gc_step(struct nf_flowtable *flow_table)
 	return 1;
 }
 
-void nf_flow_offload_work_gc(struct work_struct *work)
+static void nf_flow_offload_work_gc(struct work_struct *work)
 {
 	struct nf_flowtable *flow_table;
 
@@ -245,42 +279,6 @@ void nf_flow_offload_work_gc(struct work_struct *work)
 	nf_flow_offload_gc_step(flow_table);
 	queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ);
 }
-EXPORT_SYMBOL_GPL(nf_flow_offload_work_gc);
-
-static u32 flow_offload_hash(const void *data, u32 len, u32 seed)
-{
-	const struct flow_offload_tuple *tuple = data;
-
-	return jhash(tuple, offsetof(struct flow_offload_tuple, dir), seed);
-}
-
-static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed)
-{
-	const struct flow_offload_tuple_rhash *tuplehash = data;
-
-	return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, dir), seed);
-}
-
-static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg,
-					const void *ptr)
-{
-	const struct flow_offload_tuple *tuple = arg->key;
-	const struct flow_offload_tuple_rhash *x = ptr;
-
-	if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, dir)))
-		return 1;
-
-	return 0;
-}
-
-const struct rhashtable_params nf_flow_offload_rhash_params = {
-	.head_offset		= offsetof(struct flow_offload_tuple_rhash, node),
-	.hashfn			= flow_offload_hash,
-	.obj_hashfn		= flow_offload_hash_obj,
-	.obj_cmpfn		= flow_offload_hash_cmp,
-	.automatic_shrinking	= true,
-};
-EXPORT_SYMBOL_GPL(nf_flow_offload_rhash_params);
 
 static int nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff,
 				__be16 port, __be16 new_port)
@@ -398,6 +396,24 @@ int nf_flow_dnat_port(const struct flow_offload *flow,
 }
 EXPORT_SYMBOL_GPL(nf_flow_dnat_port);
 
+int nf_flow_table_init(struct nf_flowtable *flowtable)
+{
+	int err;
+
+	INIT_DEFERRABLE_WORK(&flowtable->gc_work, nf_flow_offload_work_gc);
+
+	err = rhashtable_init(&flowtable->rhashtable,
+			      &nf_flow_offload_rhash_params);
+	if (err < 0)
+		return err;
+
+	queue_delayed_work(system_power_efficient_wq,
+			   &flowtable->gc_work, HZ);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nf_flow_table_init);
+
 static void nf_flow_table_do_cleanup(struct flow_offload *flow, void *data)
 {
 	struct net_device *dev = data;
@@ -423,8 +439,10 @@ EXPORT_SYMBOL_GPL(nf_flow_table_cleanup);
 
 void nf_flow_table_free(struct nf_flowtable *flow_table)
 {
+	cancel_delayed_work_sync(&flow_table->gc_work);
 	nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL);
 	WARN_ON(!nf_flow_offload_gc_step(flow_table));
+	rhashtable_destroy(&flow_table->rhashtable);
 }
 EXPORT_SYMBOL_GPL(nf_flow_table_free);
 
diff --git a/net/netfilter/nf_flow_table_inet.c b/net/netfilter/nf_flow_table_inet.c
index 375a1881d93d..99771aa7e7ea 100644
--- a/net/netfilter/nf_flow_table_inet.c
+++ b/net/netfilter/nf_flow_table_inet.c
@@ -22,8 +22,7 @@ nf_flow_offload_inet_hook(void *priv, struct sk_buff *skb,
 
 static struct nf_flowtable_type flowtable_inet = {
 	.family		= NFPROTO_INET,
-	.params		= &nf_flow_offload_rhash_params,
-	.gc		= nf_flow_offload_work_gc,
+	.init		= nf_flow_table_init,
 	.free		= nf_flow_table_free,
 	.hook		= nf_flow_offload_inet_hook,
 	.owner		= THIS_MODULE,
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 9134cc429ad4..6cd9955916e5 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -5150,14 +5150,14 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
 	}
 
 	flowtable->data.type = type;
-	err = rhashtable_init(&flowtable->data.rhashtable, type->params);
+	err = type->init(&flowtable->data);
 	if (err < 0)
 		goto err3;
 
 	err = nf_tables_flowtable_parse_hook(&ctx, nla[NFTA_FLOWTABLE_HOOK],
 					     flowtable);
 	if (err < 0)
-		goto err3;
+		goto err4;
 
 	for (i = 0; i < flowtable->ops_len; i++) {
 		if (!flowtable->ops[i].dev)
@@ -5171,37 +5171,35 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
 				if (flowtable->ops[i].dev == ft->ops[k].dev &&
 				    flowtable->ops[i].pf == ft->ops[k].pf) {
 					err = -EBUSY;
-					goto err4;
+					goto err5;
 				}
 			}
 		}
 
 		err = nf_register_net_hook(net, &flowtable->ops[i]);
 		if (err < 0)
-			goto err4;
+			goto err5;
 	}
 
 	err = nft_trans_flowtable_add(&ctx, NFT_MSG_NEWFLOWTABLE, flowtable);
 	if (err < 0)
-		goto err5;
-
-	INIT_DEFERRABLE_WORK(&flowtable->data.gc_work, type->gc);
-	queue_delayed_work(system_power_efficient_wq,
-			   &flowtable->data.gc_work, HZ);
+		goto err6;
 
 	list_add_tail_rcu(&flowtable->list, &table->flowtables);
 	table->use++;
 
 	return 0;
-err5:
+err6:
 	i = flowtable->ops_len;
-err4:
+err5:
 	for (k = i - 1; k >= 0; k--) {
 		kfree(flowtable->dev_name[k]);
 		nf_unregister_net_hook(net, &flowtable->ops[k]);
 	}
 
 	kfree(flowtable->ops);
+err4:
+	flowtable->data.type->free(&flowtable->data);
 err3:
 	module_put(type->owner);
 err2:
@@ -5485,11 +5483,9 @@ static void nf_tables_flowtable_notify(struct nft_ctx *ctx,
 
 static void nf_tables_flowtable_destroy(struct nft_flowtable *flowtable)
 {
-	cancel_delayed_work_sync(&flowtable->data.gc_work);
 	kfree(flowtable->ops);
 	kfree(flowtable->name);
 	flowtable->data.type->free(&flowtable->data);
-	rhashtable_destroy(&flowtable->data.rhashtable);
 	module_put(flowtable->data.type->owner);
 }
 
-- 
2.11.0

^ permalink raw reply related

* [PATCH 17/51] netfilter: nf_flow_table: fix priv pointer for netdev hook
From: Pablo Neira Ayuso @ 2018-05-06 22:46 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <20180506224709.29100-1-pablo@netfilter.org>

From: Felix Fietkau <nbd@nbd.name>

The offload ip hook expects a pointer to the flowtable, not to the
rhashtable. Since the rhashtable is the first member, this is safe for
the moment, but breaks as soon as the structure layout changes

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 6cd9955916e5..517bb93c00fb 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -5019,7 +5019,7 @@ static int nf_tables_flowtable_parse_hook(const struct nft_ctx *ctx,
 		flowtable->ops[i].pf		= NFPROTO_NETDEV;
 		flowtable->ops[i].hooknum	= hooknum;
 		flowtable->ops[i].priority	= priority;
-		flowtable->ops[i].priv		= &flowtable->data.rhashtable;
+		flowtable->ops[i].priv		= &flowtable->data;
 		flowtable->ops[i].hook		= flowtable->data.type->hook;
 		flowtable->ops[i].dev		= dev_array[i];
 		flowtable->dev_name[i]		= kstrdup(dev_array[i]->name,
-- 
2.11.0

^ permalink raw reply related

* [PATCH 14/51] netfilter: nf_flow_table: move ipv6 offload hook code to nf_flow_table
From: Pablo Neira Ayuso @ 2018-05-06 22:46 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <20180506224709.29100-1-pablo@netfilter.org>

From: Felix Fietkau <nbd@nbd.name>

Useful as preparation for adding iptables support for offload.

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv6/netfilter/nf_flow_table_ipv6.c | 232 --------------------------------
 net/netfilter/nf_flow_table_ip.c        | 215 +++++++++++++++++++++++++++++
 2 files changed, 215 insertions(+), 232 deletions(-)

diff --git a/net/ipv6/netfilter/nf_flow_table_ipv6.c b/net/ipv6/netfilter/nf_flow_table_ipv6.c
index 0e6328490142..f1804ce8d561 100644
--- a/net/ipv6/netfilter/nf_flow_table_ipv6.c
+++ b/net/ipv6/netfilter/nf_flow_table_ipv6.c
@@ -3,240 +3,8 @@
 #include <linux/module.h>
 #include <linux/netfilter.h>
 #include <linux/rhashtable.h>
-#include <linux/ipv6.h>
-#include <linux/netdevice.h>
-#include <net/ipv6.h>
-#include <net/ip6_route.h>
-#include <net/neighbour.h>
 #include <net/netfilter/nf_flow_table.h>
 #include <net/netfilter/nf_tables.h>
-/* For layer 4 checksum field offset. */
-#include <linux/tcp.h>
-#include <linux/udp.h>
-
-static int nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff,
-				struct in6_addr *addr,
-				struct in6_addr *new_addr)
-{
-	struct tcphdr *tcph;
-
-	if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
-	    skb_try_make_writable(skb, thoff + sizeof(*tcph)))
-		return -1;
-
-	tcph = (void *)(skb_network_header(skb) + thoff);
-	inet_proto_csum_replace16(&tcph->check, skb, addr->s6_addr32,
-				  new_addr->s6_addr32, true);
-
-	return 0;
-}
-
-static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
-				struct in6_addr *addr,
-				struct in6_addr *new_addr)
-{
-	struct udphdr *udph;
-
-	if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
-	    skb_try_make_writable(skb, thoff + sizeof(*udph)))
-		return -1;
-
-	udph = (void *)(skb_network_header(skb) + thoff);
-	if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
-		inet_proto_csum_replace16(&udph->check, skb, addr->s6_addr32,
-					  new_addr->s6_addr32, true);
-		if (!udph->check)
-			udph->check = CSUM_MANGLED_0;
-	}
-
-	return 0;
-}
-
-static int nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h,
-				    unsigned int thoff, struct in6_addr *addr,
-				    struct in6_addr *new_addr)
-{
-	switch (ip6h->nexthdr) {
-	case IPPROTO_TCP:
-		if (nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr) < 0)
-			return NF_DROP;
-		break;
-	case IPPROTO_UDP:
-		if (nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr) < 0)
-			return NF_DROP;
-		break;
-	}
-
-	return 0;
-}
-
-static int nf_flow_snat_ipv6(const struct flow_offload *flow,
-			     struct sk_buff *skb, struct ipv6hdr *ip6h,
-			     unsigned int thoff,
-			     enum flow_offload_tuple_dir dir)
-{
-	struct in6_addr addr, new_addr;
-
-	switch (dir) {
-	case FLOW_OFFLOAD_DIR_ORIGINAL:
-		addr = ip6h->saddr;
-		new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6;
-		ip6h->saddr = new_addr;
-		break;
-	case FLOW_OFFLOAD_DIR_REPLY:
-		addr = ip6h->daddr;
-		new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6;
-		ip6h->daddr = new_addr;
-		break;
-	default:
-		return -1;
-	}
-
-	return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
-}
-
-static int nf_flow_dnat_ipv6(const struct flow_offload *flow,
-			     struct sk_buff *skb, struct ipv6hdr *ip6h,
-			     unsigned int thoff,
-			     enum flow_offload_tuple_dir dir)
-{
-	struct in6_addr addr, new_addr;
-
-	switch (dir) {
-	case FLOW_OFFLOAD_DIR_ORIGINAL:
-		addr = ip6h->daddr;
-		new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6;
-		ip6h->daddr = new_addr;
-		break;
-	case FLOW_OFFLOAD_DIR_REPLY:
-		addr = ip6h->saddr;
-		new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6;
-		ip6h->saddr = new_addr;
-		break;
-	default:
-		return -1;
-	}
-
-	return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
-}
-
-static int nf_flow_nat_ipv6(const struct flow_offload *flow,
-			    struct sk_buff *skb,
-			    enum flow_offload_tuple_dir dir)
-{
-	struct ipv6hdr *ip6h = ipv6_hdr(skb);
-	unsigned int thoff = sizeof(*ip6h);
-
-	if (flow->flags & FLOW_OFFLOAD_SNAT &&
-	    (nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
-	     nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
-		return -1;
-	if (flow->flags & FLOW_OFFLOAD_DNAT &&
-	    (nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
-	     nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
-		return -1;
-
-	return 0;
-}
-
-static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
-			      struct flow_offload_tuple *tuple)
-{
-	struct flow_ports *ports;
-	struct ipv6hdr *ip6h;
-	unsigned int thoff;
-
-	if (!pskb_may_pull(skb, sizeof(*ip6h)))
-		return -1;
-
-	ip6h = ipv6_hdr(skb);
-
-	if (ip6h->nexthdr != IPPROTO_TCP &&
-	    ip6h->nexthdr != IPPROTO_UDP)
-		return -1;
-
-	thoff = sizeof(*ip6h);
-	if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
-		return -1;
-
-	ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
-
-	tuple->src_v6		= ip6h->saddr;
-	tuple->dst_v6		= ip6h->daddr;
-	tuple->src_port		= ports->source;
-	tuple->dst_port		= ports->dest;
-	tuple->l3proto		= AF_INET6;
-	tuple->l4proto		= ip6h->nexthdr;
-	tuple->iifidx		= dev->ifindex;
-
-	return 0;
-}
-
-/* Based on ip_exceeds_mtu(). */
-static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
-{
-	if (skb->len <= mtu)
-		return false;
-
-	if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
-		return false;
-
-	return true;
-}
-
-unsigned int
-nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
-			  const struct nf_hook_state *state)
-{
-	struct flow_offload_tuple_rhash *tuplehash;
-	struct nf_flowtable *flow_table = priv;
-	struct flow_offload_tuple tuple = {};
-	enum flow_offload_tuple_dir dir;
-	struct flow_offload *flow;
-	struct net_device *outdev;
-	struct in6_addr *nexthop;
-	struct ipv6hdr *ip6h;
-	struct rt6_info *rt;
-
-	if (skb->protocol != htons(ETH_P_IPV6))
-		return NF_ACCEPT;
-
-	if (nf_flow_tuple_ipv6(skb, state->in, &tuple) < 0)
-		return NF_ACCEPT;
-
-	tuplehash = flow_offload_lookup(flow_table, &tuple);
-	if (tuplehash == NULL)
-		return NF_ACCEPT;
-
-	outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx);
-	if (!outdev)
-		return NF_ACCEPT;
-
-	dir = tuplehash->tuple.dir;
-	flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
-	rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst_cache;
-
-	if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
-		return NF_ACCEPT;
-
-	if (skb_try_make_writable(skb, sizeof(*ip6h)))
-		return NF_DROP;
-
-	if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) &&
-	    nf_flow_nat_ipv6(flow, skb, dir) < 0)
-		return NF_DROP;
-
-	flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
-	ip6h = ipv6_hdr(skb);
-	ip6h->hop_limit--;
-
-	skb->dev = outdev;
-	nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
-	neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
-
-	return NF_STOLEN;
-}
-EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook);
 
 static struct nf_flowtable_type flowtable_ipv6 = {
 	.family		= NFPROTO_IPV6,
diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index 103263e0c7c2..dc570fb7641d 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -4,8 +4,11 @@
 #include <linux/netfilter.h>
 #include <linux/rhashtable.h>
 #include <linux/ip.h>
+#include <linux/ipv6.h>
 #include <linux/netdevice.h>
 #include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
 #include <net/neighbour.h>
 #include <net/netfilter/nf_flow_table.h>
 /* For layer 4 checksum field offset. */
@@ -242,3 +245,215 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
 	return NF_STOLEN;
 }
 EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook);
+
+static int nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff,
+				struct in6_addr *addr,
+				struct in6_addr *new_addr)
+{
+	struct tcphdr *tcph;
+
+	if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
+	    skb_try_make_writable(skb, thoff + sizeof(*tcph)))
+		return -1;
+
+	tcph = (void *)(skb_network_header(skb) + thoff);
+	inet_proto_csum_replace16(&tcph->check, skb, addr->s6_addr32,
+				  new_addr->s6_addr32, true);
+
+	return 0;
+}
+
+static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
+				struct in6_addr *addr,
+				struct in6_addr *new_addr)
+{
+	struct udphdr *udph;
+
+	if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
+	    skb_try_make_writable(skb, thoff + sizeof(*udph)))
+		return -1;
+
+	udph = (void *)(skb_network_header(skb) + thoff);
+	if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
+		inet_proto_csum_replace16(&udph->check, skb, addr->s6_addr32,
+					  new_addr->s6_addr32, true);
+		if (!udph->check)
+			udph->check = CSUM_MANGLED_0;
+	}
+
+	return 0;
+}
+
+static int nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h,
+				    unsigned int thoff, struct in6_addr *addr,
+				    struct in6_addr *new_addr)
+{
+	switch (ip6h->nexthdr) {
+	case IPPROTO_TCP:
+		if (nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr) < 0)
+			return NF_DROP;
+		break;
+	case IPPROTO_UDP:
+		if (nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr) < 0)
+			return NF_DROP;
+		break;
+	}
+
+	return 0;
+}
+
+static int nf_flow_snat_ipv6(const struct flow_offload *flow,
+			     struct sk_buff *skb, struct ipv6hdr *ip6h,
+			     unsigned int thoff,
+			     enum flow_offload_tuple_dir dir)
+{
+	struct in6_addr addr, new_addr;
+
+	switch (dir) {
+	case FLOW_OFFLOAD_DIR_ORIGINAL:
+		addr = ip6h->saddr;
+		new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6;
+		ip6h->saddr = new_addr;
+		break;
+	case FLOW_OFFLOAD_DIR_REPLY:
+		addr = ip6h->daddr;
+		new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6;
+		ip6h->daddr = new_addr;
+		break;
+	default:
+		return -1;
+	}
+
+	return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
+}
+
+static int nf_flow_dnat_ipv6(const struct flow_offload *flow,
+			     struct sk_buff *skb, struct ipv6hdr *ip6h,
+			     unsigned int thoff,
+			     enum flow_offload_tuple_dir dir)
+{
+	struct in6_addr addr, new_addr;
+
+	switch (dir) {
+	case FLOW_OFFLOAD_DIR_ORIGINAL:
+		addr = ip6h->daddr;
+		new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6;
+		ip6h->daddr = new_addr;
+		break;
+	case FLOW_OFFLOAD_DIR_REPLY:
+		addr = ip6h->saddr;
+		new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6;
+		ip6h->saddr = new_addr;
+		break;
+	default:
+		return -1;
+	}
+
+	return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
+}
+
+static int nf_flow_nat_ipv6(const struct flow_offload *flow,
+			    struct sk_buff *skb,
+			    enum flow_offload_tuple_dir dir)
+{
+	struct ipv6hdr *ip6h = ipv6_hdr(skb);
+	unsigned int thoff = sizeof(*ip6h);
+
+	if (flow->flags & FLOW_OFFLOAD_SNAT &&
+	    (nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
+	     nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
+		return -1;
+	if (flow->flags & FLOW_OFFLOAD_DNAT &&
+	    (nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
+	     nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
+		return -1;
+
+	return 0;
+}
+
+static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
+			      struct flow_offload_tuple *tuple)
+{
+	struct flow_ports *ports;
+	struct ipv6hdr *ip6h;
+	unsigned int thoff;
+
+	if (!pskb_may_pull(skb, sizeof(*ip6h)))
+		return -1;
+
+	ip6h = ipv6_hdr(skb);
+
+	if (ip6h->nexthdr != IPPROTO_TCP &&
+	    ip6h->nexthdr != IPPROTO_UDP)
+		return -1;
+
+	thoff = sizeof(*ip6h);
+	if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
+		return -1;
+
+	ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
+
+	tuple->src_v6		= ip6h->saddr;
+	tuple->dst_v6		= ip6h->daddr;
+	tuple->src_port		= ports->source;
+	tuple->dst_port		= ports->dest;
+	tuple->l3proto		= AF_INET6;
+	tuple->l4proto		= ip6h->nexthdr;
+	tuple->iifidx		= dev->ifindex;
+
+	return 0;
+}
+
+unsigned int
+nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
+			  const struct nf_hook_state *state)
+{
+	struct flow_offload_tuple_rhash *tuplehash;
+	struct nf_flowtable *flow_table = priv;
+	struct flow_offload_tuple tuple = {};
+	enum flow_offload_tuple_dir dir;
+	struct flow_offload *flow;
+	struct net_device *outdev;
+	struct in6_addr *nexthop;
+	struct ipv6hdr *ip6h;
+	struct rt6_info *rt;
+
+	if (skb->protocol != htons(ETH_P_IPV6))
+		return NF_ACCEPT;
+
+	if (nf_flow_tuple_ipv6(skb, state->in, &tuple) < 0)
+		return NF_ACCEPT;
+
+	tuplehash = flow_offload_lookup(flow_table, &tuple);
+	if (tuplehash == NULL)
+		return NF_ACCEPT;
+
+	outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx);
+	if (!outdev)
+		return NF_ACCEPT;
+
+	dir = tuplehash->tuple.dir;
+	flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
+	rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst_cache;
+
+	if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
+		return NF_ACCEPT;
+
+	if (skb_try_make_writable(skb, sizeof(*ip6h)))
+		return NF_DROP;
+
+	if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) &&
+	    nf_flow_nat_ipv6(flow, skb, dir) < 0)
+		return NF_DROP;
+
+	flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
+	ip6h = ipv6_hdr(skb);
+	ip6h->hop_limit--;
+
+	skb->dev = outdev;
+	nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
+	neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
+
+	return NF_STOLEN;
+}
+EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook);
-- 
2.11.0

^ permalink raw reply related

* [PATCH 15/51] netfilter: nf_flow_table: relax mixed ipv4/ipv6 flowtable dependencies
From: Pablo Neira Ayuso @ 2018-05-06 22:46 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <20180506224709.29100-1-pablo@netfilter.org>

From: Felix Fietkau <nbd@nbd.name>

Since the offload hook code was moved, this table no longer depends on
the IPv4 and IPv6 flowtable modules

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/Kconfig | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 704b3832dbad..d20664b02ae4 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -666,8 +666,7 @@ endif # NF_TABLES
 
 config NF_FLOW_TABLE_INET
 	tristate "Netfilter flow table mixed IPv4/IPv6 module"
-	depends on NF_FLOW_TABLE_IPV4
-	depends on NF_FLOW_TABLE_IPV6
+	depends on NF_FLOW_TABLE
 	help
           This option adds the flow table mixed IPv4/IPv6 support.
 
-- 
2.11.0

^ permalink raw reply related

* [PATCH 13/51] netfilter: nf_flow_table: move ip header check out of nf_flow_exceeds_mtu
From: Pablo Neira Ayuso @ 2018-05-06 22:46 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <20180506224709.29100-1-pablo@netfilter.org>

From: Felix Fietkau <nbd@nbd.name>

Allows the function to be shared with the IPv6 hook code

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_flow_table_ip.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index 034fda963392..103263e0c7c2 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -182,9 +182,6 @@ static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
 	if (skb->len <= mtu)
 		return false;
 
-	if ((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0)
-		return false;
-
 	if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
 		return false;
 
@@ -223,7 +220,8 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
 	flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
 	rt = (const struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
 
-	if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
+	if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)) &&
+	    (ip_hdr(skb)->frag_off & htons(IP_DF)) != 0)
 		return NF_ACCEPT;
 
 	if (skb_try_make_writable(skb, sizeof(*iph)))
-- 
2.11.0

^ permalink raw reply related

* [PATCH 12/51] netfilter: nf_flow_table: move ipv4 offload hook code to nf_flow_table
From: Pablo Neira Ayuso @ 2018-05-06 22:46 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <20180506224709.29100-1-pablo@netfilter.org>

From: Felix Fietkau <nbd@nbd.name>

Allows some minor code sharing with the ipv6 hook code and is also
useful as preparation for adding iptables support for offload

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv4/netfilter/nf_flow_table_ipv4.c | 241 -------------------------------
 net/netfilter/Makefile                  |   2 +-
 net/netfilter/nf_flow_table_ip.c        | 246 ++++++++++++++++++++++++++++++++
 3 files changed, 247 insertions(+), 242 deletions(-)
 create mode 100644 net/netfilter/nf_flow_table_ip.c

diff --git a/net/ipv4/netfilter/nf_flow_table_ipv4.c b/net/ipv4/netfilter/nf_flow_table_ipv4.c
index 461b1815e633..b6e43ff0c7b7 100644
--- a/net/ipv4/netfilter/nf_flow_table_ipv4.c
+++ b/net/ipv4/netfilter/nf_flow_table_ipv4.c
@@ -2,249 +2,8 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/netfilter.h>
-#include <linux/rhashtable.h>
-#include <linux/ip.h>
-#include <linux/netdevice.h>
-#include <net/ip.h>
-#include <net/neighbour.h>
 #include <net/netfilter/nf_flow_table.h>
 #include <net/netfilter/nf_tables.h>
-/* For layer 4 checksum field offset. */
-#include <linux/tcp.h>
-#include <linux/udp.h>
-
-static int nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff,
-			      __be32 addr, __be32 new_addr)
-{
-	struct tcphdr *tcph;
-
-	if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
-	    skb_try_make_writable(skb, thoff + sizeof(*tcph)))
-		return -1;
-
-	tcph = (void *)(skb_network_header(skb) + thoff);
-	inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, true);
-
-	return 0;
-}
-
-static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
-			      __be32 addr, __be32 new_addr)
-{
-	struct udphdr *udph;
-
-	if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
-	    skb_try_make_writable(skb, thoff + sizeof(*udph)))
-		return -1;
-
-	udph = (void *)(skb_network_header(skb) + thoff);
-	if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
-		inet_proto_csum_replace4(&udph->check, skb, addr,
-					 new_addr, true);
-		if (!udph->check)
-			udph->check = CSUM_MANGLED_0;
-	}
-
-	return 0;
-}
-
-static int nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph,
-				  unsigned int thoff, __be32 addr,
-				  __be32 new_addr)
-{
-	switch (iph->protocol) {
-	case IPPROTO_TCP:
-		if (nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr) < 0)
-			return NF_DROP;
-		break;
-	case IPPROTO_UDP:
-		if (nf_flow_nat_ip_udp(skb, thoff, addr, new_addr) < 0)
-			return NF_DROP;
-		break;
-	}
-
-	return 0;
-}
-
-static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb,
-			   struct iphdr *iph, unsigned int thoff,
-			   enum flow_offload_tuple_dir dir)
-{
-	__be32 addr, new_addr;
-
-	switch (dir) {
-	case FLOW_OFFLOAD_DIR_ORIGINAL:
-		addr = iph->saddr;
-		new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr;
-		iph->saddr = new_addr;
-		break;
-	case FLOW_OFFLOAD_DIR_REPLY:
-		addr = iph->daddr;
-		new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
-		iph->daddr = new_addr;
-		break;
-	default:
-		return -1;
-	}
-	csum_replace4(&iph->check, addr, new_addr);
-
-	return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
-}
-
-static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb,
-			   struct iphdr *iph, unsigned int thoff,
-			   enum flow_offload_tuple_dir dir)
-{
-	__be32 addr, new_addr;
-
-	switch (dir) {
-	case FLOW_OFFLOAD_DIR_ORIGINAL:
-		addr = iph->daddr;
-		new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr;
-		iph->daddr = new_addr;
-		break;
-	case FLOW_OFFLOAD_DIR_REPLY:
-		addr = iph->saddr;
-		new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
-		iph->saddr = new_addr;
-		break;
-	default:
-		return -1;
-	}
-	csum_replace4(&iph->check, addr, new_addr);
-
-	return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
-}
-
-static int nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
-			  enum flow_offload_tuple_dir dir)
-{
-	struct iphdr *iph = ip_hdr(skb);
-	unsigned int thoff = iph->ihl * 4;
-
-	if (flow->flags & FLOW_OFFLOAD_SNAT &&
-	    (nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
-	     nf_flow_snat_ip(flow, skb, iph, thoff, dir) < 0))
-		return -1;
-	if (flow->flags & FLOW_OFFLOAD_DNAT &&
-	    (nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
-	     nf_flow_dnat_ip(flow, skb, iph, thoff, dir) < 0))
-		return -1;
-
-	return 0;
-}
-
-static bool ip_has_options(unsigned int thoff)
-{
-	return thoff != sizeof(struct iphdr);
-}
-
-static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
-			    struct flow_offload_tuple *tuple)
-{
-	struct flow_ports *ports;
-	unsigned int thoff;
-	struct iphdr *iph;
-
-	if (!pskb_may_pull(skb, sizeof(*iph)))
-		return -1;
-
-	iph = ip_hdr(skb);
-	thoff = iph->ihl * 4;
-
-	if (ip_is_fragment(iph) ||
-	    unlikely(ip_has_options(thoff)))
-		return -1;
-
-	if (iph->protocol != IPPROTO_TCP &&
-	    iph->protocol != IPPROTO_UDP)
-		return -1;
-
-	thoff = iph->ihl * 4;
-	if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
-		return -1;
-
-	ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
-
-	tuple->src_v4.s_addr	= iph->saddr;
-	tuple->dst_v4.s_addr	= iph->daddr;
-	tuple->src_port		= ports->source;
-	tuple->dst_port		= ports->dest;
-	tuple->l3proto		= AF_INET;
-	tuple->l4proto		= iph->protocol;
-	tuple->iifidx		= dev->ifindex;
-
-	return 0;
-}
-
-/* Based on ip_exceeds_mtu(). */
-static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
-{
-	if (skb->len <= mtu)
-		return false;
-
-	if ((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0)
-		return false;
-
-	if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
-		return false;
-
-	return true;
-}
-
-unsigned int
-nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
-			const struct nf_hook_state *state)
-{
-	struct flow_offload_tuple_rhash *tuplehash;
-	struct nf_flowtable *flow_table = priv;
-	struct flow_offload_tuple tuple = {};
-	enum flow_offload_tuple_dir dir;
-	struct flow_offload *flow;
-	struct net_device *outdev;
-	const struct rtable *rt;
-	struct iphdr *iph;
-	__be32 nexthop;
-
-	if (skb->protocol != htons(ETH_P_IP))
-		return NF_ACCEPT;
-
-	if (nf_flow_tuple_ip(skb, state->in, &tuple) < 0)
-		return NF_ACCEPT;
-
-	tuplehash = flow_offload_lookup(flow_table, &tuple);
-	if (tuplehash == NULL)
-		return NF_ACCEPT;
-
-	outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx);
-	if (!outdev)
-		return NF_ACCEPT;
-
-	dir = tuplehash->tuple.dir;
-	flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
-	rt = (const struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
-
-	if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
-		return NF_ACCEPT;
-
-	if (skb_try_make_writable(skb, sizeof(*iph)))
-		return NF_DROP;
-
-	if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) &&
-	    nf_flow_nat_ip(flow, skb, dir) < 0)
-		return NF_DROP;
-
-	flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
-	iph = ip_hdr(skb);
-	ip_decrease_ttl(iph);
-
-	skb->dev = outdev;
-	nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
-	neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
-
-	return NF_STOLEN;
-}
-EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook);
 
 static struct nf_flowtable_type flowtable_ipv4 = {
 	.family		= NFPROTO_IPV4,
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 700c5d51e405..3103ed1efe17 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -111,7 +111,7 @@ obj-$(CONFIG_NFT_FWD_NETDEV)	+= nft_fwd_netdev.o
 
 # flow table infrastructure
 obj-$(CONFIG_NF_FLOW_TABLE)	+= nf_flow_table.o
-nf_flow_table-objs := nf_flow_table_core.o
+nf_flow_table-objs := nf_flow_table_core.o nf_flow_table_ip.o
 
 obj-$(CONFIG_NF_FLOW_TABLE_INET) += nf_flow_table_inet.o
 
diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
new file mode 100644
index 000000000000..034fda963392
--- /dev/null
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -0,0 +1,246 @@
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <linux/rhashtable.h>
+#include <linux/ip.h>
+#include <linux/netdevice.h>
+#include <net/ip.h>
+#include <net/neighbour.h>
+#include <net/netfilter/nf_flow_table.h>
+/* For layer 4 checksum field offset. */
+#include <linux/tcp.h>
+#include <linux/udp.h>
+
+static int nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff,
+			      __be32 addr, __be32 new_addr)
+{
+	struct tcphdr *tcph;
+
+	if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
+	    skb_try_make_writable(skb, thoff + sizeof(*tcph)))
+		return -1;
+
+	tcph = (void *)(skb_network_header(skb) + thoff);
+	inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, true);
+
+	return 0;
+}
+
+static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
+			      __be32 addr, __be32 new_addr)
+{
+	struct udphdr *udph;
+
+	if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
+	    skb_try_make_writable(skb, thoff + sizeof(*udph)))
+		return -1;
+
+	udph = (void *)(skb_network_header(skb) + thoff);
+	if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
+		inet_proto_csum_replace4(&udph->check, skb, addr,
+					 new_addr, true);
+		if (!udph->check)
+			udph->check = CSUM_MANGLED_0;
+	}
+
+	return 0;
+}
+
+static int nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph,
+				  unsigned int thoff, __be32 addr,
+				  __be32 new_addr)
+{
+	switch (iph->protocol) {
+	case IPPROTO_TCP:
+		if (nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr) < 0)
+			return NF_DROP;
+		break;
+	case IPPROTO_UDP:
+		if (nf_flow_nat_ip_udp(skb, thoff, addr, new_addr) < 0)
+			return NF_DROP;
+		break;
+	}
+
+	return 0;
+}
+
+static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb,
+			   struct iphdr *iph, unsigned int thoff,
+			   enum flow_offload_tuple_dir dir)
+{
+	__be32 addr, new_addr;
+
+	switch (dir) {
+	case FLOW_OFFLOAD_DIR_ORIGINAL:
+		addr = iph->saddr;
+		new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr;
+		iph->saddr = new_addr;
+		break;
+	case FLOW_OFFLOAD_DIR_REPLY:
+		addr = iph->daddr;
+		new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
+		iph->daddr = new_addr;
+		break;
+	default:
+		return -1;
+	}
+	csum_replace4(&iph->check, addr, new_addr);
+
+	return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
+}
+
+static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb,
+			   struct iphdr *iph, unsigned int thoff,
+			   enum flow_offload_tuple_dir dir)
+{
+	__be32 addr, new_addr;
+
+	switch (dir) {
+	case FLOW_OFFLOAD_DIR_ORIGINAL:
+		addr = iph->daddr;
+		new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr;
+		iph->daddr = new_addr;
+		break;
+	case FLOW_OFFLOAD_DIR_REPLY:
+		addr = iph->saddr;
+		new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
+		iph->saddr = new_addr;
+		break;
+	default:
+		return -1;
+	}
+	csum_replace4(&iph->check, addr, new_addr);
+
+	return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
+}
+
+static int nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
+			  enum flow_offload_tuple_dir dir)
+{
+	struct iphdr *iph = ip_hdr(skb);
+	unsigned int thoff = iph->ihl * 4;
+
+	if (flow->flags & FLOW_OFFLOAD_SNAT &&
+	    (nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
+	     nf_flow_snat_ip(flow, skb, iph, thoff, dir) < 0))
+		return -1;
+	if (flow->flags & FLOW_OFFLOAD_DNAT &&
+	    (nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
+	     nf_flow_dnat_ip(flow, skb, iph, thoff, dir) < 0))
+		return -1;
+
+	return 0;
+}
+
+static bool ip_has_options(unsigned int thoff)
+{
+	return thoff != sizeof(struct iphdr);
+}
+
+static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
+			    struct flow_offload_tuple *tuple)
+{
+	struct flow_ports *ports;
+	unsigned int thoff;
+	struct iphdr *iph;
+
+	if (!pskb_may_pull(skb, sizeof(*iph)))
+		return -1;
+
+	iph = ip_hdr(skb);
+	thoff = iph->ihl * 4;
+
+	if (ip_is_fragment(iph) ||
+	    unlikely(ip_has_options(thoff)))
+		return -1;
+
+	if (iph->protocol != IPPROTO_TCP &&
+	    iph->protocol != IPPROTO_UDP)
+		return -1;
+
+	thoff = iph->ihl * 4;
+	if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
+		return -1;
+
+	ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
+
+	tuple->src_v4.s_addr	= iph->saddr;
+	tuple->dst_v4.s_addr	= iph->daddr;
+	tuple->src_port		= ports->source;
+	tuple->dst_port		= ports->dest;
+	tuple->l3proto		= AF_INET;
+	tuple->l4proto		= iph->protocol;
+	tuple->iifidx		= dev->ifindex;
+
+	return 0;
+}
+
+/* Based on ip_exceeds_mtu(). */
+static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
+{
+	if (skb->len <= mtu)
+		return false;
+
+	if ((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0)
+		return false;
+
+	if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
+		return false;
+
+	return true;
+}
+
+unsigned int
+nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
+			const struct nf_hook_state *state)
+{
+	struct flow_offload_tuple_rhash *tuplehash;
+	struct nf_flowtable *flow_table = priv;
+	struct flow_offload_tuple tuple = {};
+	enum flow_offload_tuple_dir dir;
+	struct flow_offload *flow;
+	struct net_device *outdev;
+	const struct rtable *rt;
+	struct iphdr *iph;
+	__be32 nexthop;
+
+	if (skb->protocol != htons(ETH_P_IP))
+		return NF_ACCEPT;
+
+	if (nf_flow_tuple_ip(skb, state->in, &tuple) < 0)
+		return NF_ACCEPT;
+
+	tuplehash = flow_offload_lookup(flow_table, &tuple);
+	if (tuplehash == NULL)
+		return NF_ACCEPT;
+
+	outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx);
+	if (!outdev)
+		return NF_ACCEPT;
+
+	dir = tuplehash->tuple.dir;
+	flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
+	rt = (const struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
+
+	if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
+		return NF_ACCEPT;
+
+	if (skb_try_make_writable(skb, sizeof(*iph)))
+		return NF_DROP;
+
+	if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) &&
+	    nf_flow_nat_ip(flow, skb, dir) < 0)
+		return NF_DROP;
+
+	flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
+	iph = ip_hdr(skb);
+	ip_decrease_ttl(iph);
+
+	skb->dev = outdev;
+	nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
+	neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
+
+	return NF_STOLEN;
+}
+EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook);
-- 
2.11.0

^ permalink raw reply related

* [PATCH 11/51] netfilter: nf_flow_table: rename nf_flow_table.c to nf_flow_table_core.c
From: Pablo Neira Ayuso @ 2018-05-06 22:46 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <20180506224709.29100-1-pablo@netfilter.org>

From: Felix Fietkau <nbd@nbd.name>

Preparation for adding more code to the same module

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/Makefile                                  | 2 ++
 net/netfilter/{nf_flow_table.c => nf_flow_table_core.c} | 0
 2 files changed, 2 insertions(+)
 rename net/netfilter/{nf_flow_table.c => nf_flow_table_core.c} (100%)

diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index fd32bd2c9521..700c5d51e405 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -111,6 +111,8 @@ obj-$(CONFIG_NFT_FWD_NETDEV)	+= nft_fwd_netdev.o
 
 # flow table infrastructure
 obj-$(CONFIG_NF_FLOW_TABLE)	+= nf_flow_table.o
+nf_flow_table-objs := nf_flow_table_core.o
+
 obj-$(CONFIG_NF_FLOW_TABLE_INET) += nf_flow_table_inet.o
 
 # generic X tables 
diff --git a/net/netfilter/nf_flow_table.c b/net/netfilter/nf_flow_table_core.c
similarity index 100%
rename from net/netfilter/nf_flow_table.c
rename to net/netfilter/nf_flow_table_core.c
-- 
2.11.0

^ permalink raw reply related

* [PATCH 10/51] netfilter: nf_flow_table: cache mtu in struct flow_offload_tuple
From: Pablo Neira Ayuso @ 2018-05-06 22:46 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <20180506224709.29100-1-pablo@netfilter.org>

From: Felix Fietkau <nbd@nbd.name>

Reduces the number of cache lines touched in the offload forwarding
path. This is safe because PMTU limits are bypassed for the forwarding
path (see commit f87c10a8aa1e for more details).

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_flow_table.h   |  2 ++
 net/ipv4/netfilter/nf_flow_table_ipv4.c | 17 +++--------------
 net/ipv6/netfilter/nf_flow_table_ipv6.c | 17 +++--------------
 net/netfilter/nf_flow_table.c           |  8 ++++++--
 4 files changed, 14 insertions(+), 30 deletions(-)

diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index 09ba67598991..76ee5c81b752 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -55,6 +55,8 @@ struct flow_offload_tuple {
 
 	int				oifidx;
 
+	u16				mtu;
+
 	struct dst_entry		*dst_cache;
 };
 
diff --git a/net/ipv4/netfilter/nf_flow_table_ipv4.c b/net/ipv4/netfilter/nf_flow_table_ipv4.c
index 0cd46bffa469..461b1815e633 100644
--- a/net/ipv4/netfilter/nf_flow_table_ipv4.c
+++ b/net/ipv4/netfilter/nf_flow_table_ipv4.c
@@ -178,7 +178,7 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
 }
 
 /* Based on ip_exceeds_mtu(). */
-static bool __nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
+static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
 {
 	if (skb->len <= mtu)
 		return false;
@@ -192,17 +192,6 @@ static bool __nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
 	return true;
 }
 
-static bool nf_flow_exceeds_mtu(struct sk_buff *skb, const struct rtable *rt)
-{
-	u32 mtu;
-
-	mtu = ip_dst_mtu_maybe_forward(&rt->dst, true);
-	if (__nf_flow_exceeds_mtu(skb, mtu))
-		return true;
-
-	return false;
-}
-
 unsigned int
 nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
 			const struct nf_hook_state *state)
@@ -233,9 +222,9 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
 
 	dir = tuplehash->tuple.dir;
 	flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
-
 	rt = (const struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
-	if (unlikely(nf_flow_exceeds_mtu(skb, rt)))
+
+	if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
 		return NF_ACCEPT;
 
 	if (skb_try_make_writable(skb, sizeof(*iph)))
diff --git a/net/ipv6/netfilter/nf_flow_table_ipv6.c b/net/ipv6/netfilter/nf_flow_table_ipv6.c
index 207cb35569b1..0e6328490142 100644
--- a/net/ipv6/netfilter/nf_flow_table_ipv6.c
+++ b/net/ipv6/netfilter/nf_flow_table_ipv6.c
@@ -173,7 +173,7 @@ static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
 }
 
 /* Based on ip_exceeds_mtu(). */
-static bool __nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
+static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
 {
 	if (skb->len <= mtu)
 		return false;
@@ -184,17 +184,6 @@ static bool __nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
 	return true;
 }
 
-static bool nf_flow_exceeds_mtu(struct sk_buff *skb, const struct rt6_info *rt)
-{
-	u32 mtu;
-
-	mtu = ip6_dst_mtu_forward(&rt->dst);
-	if (__nf_flow_exceeds_mtu(skb, mtu))
-		return true;
-
-	return false;
-}
-
 unsigned int
 nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
 			  const struct nf_hook_state *state)
@@ -225,9 +214,9 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
 
 	dir = tuplehash->tuple.dir;
 	flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
-
 	rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst_cache;
-	if (unlikely(nf_flow_exceeds_mtu(skb, rt)))
+
+	if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
 		return NF_ACCEPT;
 
 	if (skb_try_make_writable(skb, sizeof(*ip6h)))
diff --git a/net/netfilter/nf_flow_table.c b/net/netfilter/nf_flow_table.c
index db0673a40b97..7403a0dfddf7 100644
--- a/net/netfilter/nf_flow_table.c
+++ b/net/netfilter/nf_flow_table.c
@@ -4,6 +4,8 @@
 #include <linux/netfilter.h>
 #include <linux/rhashtable.h>
 #include <linux/netdevice.h>
+#include <net/ip.h>
+#include <net/ip6_route.h>
 #include <net/netfilter/nf_tables.h>
 #include <net/netfilter/nf_flow_table.h>
 #include <net/netfilter/nf_conntrack.h>
@@ -23,6 +25,7 @@ flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
 {
 	struct flow_offload_tuple *ft = &flow->tuplehash[dir].tuple;
 	struct nf_conntrack_tuple *ctt = &ct->tuplehash[dir].tuple;
+	struct dst_entry *dst = route->tuple[dir].dst;
 
 	ft->dir = dir;
 
@@ -30,10 +33,12 @@ flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
 	case NFPROTO_IPV4:
 		ft->src_v4 = ctt->src.u3.in;
 		ft->dst_v4 = ctt->dst.u3.in;
+		ft->mtu = ip_dst_mtu_maybe_forward(dst, true);
 		break;
 	case NFPROTO_IPV6:
 		ft->src_v6 = ctt->src.u3.in6;
 		ft->dst_v6 = ctt->dst.u3.in6;
+		ft->mtu = ip6_dst_mtu_forward(dst);
 		break;
 	}
 
@@ -44,8 +49,7 @@ flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
 
 	ft->iifidx = route->tuple[dir].ifindex;
 	ft->oifidx = route->tuple[!dir].ifindex;
-
-	ft->dst_cache = route->tuple[dir].dst;
+	ft->dst_cache = dst;
 }
 
 struct flow_offload *
-- 
2.11.0

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox