* [PATCH 06/50] netfilter: built-in NAT support for UDPlite
From: Pablo Neira Ayuso @ 2016-12-07 21:52 UTC (permalink / raw)
To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1481147576-5690-1-git-send-email-pablo@netfilter.org>
From: Davide Caratti <dcaratti@redhat.com>
CONFIG_NF_NAT_PROTO_UDPLITE is no more a tristate. When set to y, NAT
support for UDPlite protocol is built-in into nf_nat.ko.
footprint test:
(nf_nat_proto_) |udplite || nf_nat
--------------------------+--------++--------
no builtin | 408048 || 2241312
UDPLITE builtin | - || 2577256
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
include/net/netfilter/nf_nat_l4proto.h | 3 +++
net/netfilter/Kconfig | 2 +-
net/netfilter/Makefile | 5 ++---
net/netfilter/nf_nat_core.c | 4 ++++
net/netfilter/nf_nat_proto_udplite.c | 35 +---------------------------------
5 files changed, 11 insertions(+), 38 deletions(-)
diff --git a/include/net/netfilter/nf_nat_l4proto.h b/include/net/netfilter/nf_nat_l4proto.h
index 2cbaf3856e21..3923150f2a1e 100644
--- a/include/net/netfilter/nf_nat_l4proto.h
+++ b/include/net/netfilter/nf_nat_l4proto.h
@@ -60,6 +60,9 @@ extern const struct nf_nat_l4proto nf_nat_l4proto_dccp;
#ifdef CONFIG_NF_NAT_PROTO_SCTP
extern const struct nf_nat_l4proto nf_nat_l4proto_sctp;
#endif
+#ifdef CONFIG_NF_NAT_PROTO_UDPLITE
+extern const struct nf_nat_l4proto nf_nat_l4proto_udplite;
+#endif
bool nf_nat_l4proto_in_range(const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype,
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index ad72edf1f6ec..496e1dcbd003 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -389,7 +389,7 @@ config NF_NAT_PROTO_DCCP
default NF_NAT && NF_CT_PROTO_DCCP
config NF_NAT_PROTO_UDPLITE
- tristate
+ bool
depends on NF_NAT && NF_CT_PROTO_UDPLITE
default NF_NAT && NF_CT_PROTO_UDPLITE
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 02ef6decf94d..3b97d89df2cd 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -45,8 +45,10 @@ obj-$(CONFIG_NF_CONNTRACK_TFTP) += nf_conntrack_tftp.o
nf_nat-y := nf_nat_core.o nf_nat_proto_unknown.o nf_nat_proto_common.o \
nf_nat_proto_udp.o nf_nat_proto_tcp.o nf_nat_helper.o
+# NAT protocols (nf_nat)
nf_nat-$(CONFIG_NF_NAT_PROTO_DCCP) += nf_nat_proto_dccp.o
nf_nat-$(CONFIG_NF_NAT_PROTO_SCTP) += nf_nat_proto_sctp.o
+nf_nat-$(CONFIG_NF_NAT_PROTO_UDPLITE) += nf_nat_proto_udplite.o
# generic transport layer logging
obj-$(CONFIG_NF_LOG_COMMON) += nf_log_common.o
@@ -57,9 +59,6 @@ obj-$(CONFIG_NF_LOG_NETDEV) += nf_log_netdev.o
obj-$(CONFIG_NF_NAT) += nf_nat.o
obj-$(CONFIG_NF_NAT_REDIRECT) += nf_nat_redirect.o
-# NAT protocols (nf_nat)
-obj-$(CONFIG_NF_NAT_PROTO_UDPLITE) += nf_nat_proto_udplite.o
-
# NAT helpers
obj-$(CONFIG_NF_NAT_AMANDA) += nf_nat_amanda.o
obj-$(CONFIG_NF_NAT_FTP) += nf_nat_ftp.o
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 80858bd110cc..94b14c5a8b17 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -690,6 +690,10 @@ int nf_nat_l3proto_register(const struct nf_nat_l3proto *l3proto)
RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_SCTP],
&nf_nat_l4proto_sctp);
#endif
+#ifdef CONFIG_NF_NAT_PROTO_UDPLITE
+ RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_UDPLITE],
+ &nf_nat_l4proto_udplite);
+#endif
mutex_unlock(&nf_nat_proto_mutex);
RCU_INIT_POINTER(nf_nat_l3protos[l3proto->l3proto], l3proto);
diff --git a/net/netfilter/nf_nat_proto_udplite.c b/net/netfilter/nf_nat_proto_udplite.c
index 58340c97bd83..366bfbfd82a1 100644
--- a/net/netfilter/nf_nat_proto_udplite.c
+++ b/net/netfilter/nf_nat_proto_udplite.c
@@ -8,11 +8,9 @@
*/
#include <linux/types.h>
-#include <linux/init.h>
#include <linux/udp.h>
#include <linux/netfilter.h>
-#include <linux/module.h>
#include <net/netfilter/nf_nat.h>
#include <net/netfilter/nf_nat_l3proto.h>
#include <net/netfilter/nf_nat_l4proto.h>
@@ -64,7 +62,7 @@ udplite_manip_pkt(struct sk_buff *skb,
return true;
}
-static const struct nf_nat_l4proto nf_nat_l4proto_udplite = {
+const struct nf_nat_l4proto nf_nat_l4proto_udplite = {
.l4proto = IPPROTO_UDPLITE,
.manip_pkt = udplite_manip_pkt,
.in_range = nf_nat_l4proto_in_range,
@@ -73,34 +71,3 @@ static const struct nf_nat_l4proto nf_nat_l4proto_udplite = {
.nlattr_to_range = nf_nat_l4proto_nlattr_to_range,
#endif
};
-
-static int __init nf_nat_proto_udplite_init(void)
-{
- int err;
-
- err = nf_nat_l4proto_register(NFPROTO_IPV4, &nf_nat_l4proto_udplite);
- if (err < 0)
- goto err1;
- err = nf_nat_l4proto_register(NFPROTO_IPV6, &nf_nat_l4proto_udplite);
- if (err < 0)
- goto err2;
- return 0;
-
-err2:
- nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_udplite);
-err1:
- return err;
-}
-
-static void __exit nf_nat_proto_udplite_fini(void)
-{
- nf_nat_l4proto_unregister(NFPROTO_IPV6, &nf_nat_l4proto_udplite);
- nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_udplite);
-}
-
-module_init(nf_nat_proto_udplite_init);
-module_exit(nf_nat_proto_udplite_fini);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("UDP-Lite NAT protocol helper");
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
--
2.1.4
^ permalink raw reply related
* [PATCH 00/50] Netfilter/IPVS updates for net-next
From: Pablo Neira Ayuso @ 2016-12-07 21:52 UTC (permalink / raw)
To: netfilter-devel; +Cc: davem, netdev
Hi David,
The following patchset contains a large Netfilter update for net-next,
to summarise:
1) Add support for stateful objects. This series provides a nf_tables
native alternative to the extended accounting infrastructure for
nf_tables. Two initial stateful objects are supported: counters and
quotas. Objects are identified by a user-defined name, you can fetch
and reset them anytime. You can also use a maps to allow fast lookups
using any arbitrary key combination. More info at:
http://marc.info/?l=netfilter-devel&m=148029128323837&w=2
2) On-demand registration of nf_conntrack and defrag hooks per netns.
Register nf_conntrack hooks if we have a stateful ruleset, ie.
state-based filtering or NAT. The new nf_conntrack_default_on sysctl
enables this from newly created netnamespaces. Default behaviour is not
modified. Patches from Florian Westphal.
3) Allocate 4k chunks and then use these for x_tables counter allocation
requests, this improves ruleset load time and also datapath ruleset
evaluation, patches from Florian Westphal.
4) Add support for ebpf to the existing x_tables bpf extension.
From Willem de Bruijn.
5) Update layer 4 checksum if any of the pseudoheader fields is updated.
This provides a limited form of 1:1 stateless NAT that make sense in
specific scenario, eg. load balancing.
6) Add support to flush sets in nf_tables. This series comes with a new
set->ops->deactivate_one() indirection given that we have to walk
over the list of set elements, then deactivate them one by one.
The existing set->ops->deactivate() performs an element lookup that
we don't need.
7) Two patches to avoid cloning packets, thus speed up packet forwarding
via nft_fwd from ingress. From Florian Westphal.
8) Two IPVS patches via Simon Horman: Decrement ttl in all modes to
prevent infinite loops, patch from Dwip Banerjee. And one minor
refactoring from Gao feng.
9) Revisit recent log support for nf_tables netdev families: One patch
to ensure that we correctly handle non-ethernet packets. Another
patch to add missing logger definition for netdev. Patches from
Liping Zhang.
10) Three patches for nft_fib, one to address insufficient register
initialization and another to solve incorrect (although harmless)
byteswap operation. Moreover update xt_rpfilter and nft_fib to match
lbcast packets with zeronet as source, eg. DHCP Discover packets
(0.0.0.0 -> 255.255.255.255). Also from Liping Zhang.
11) Built-in DCCP, SCTP and UDPlite conntrack and NAT support, from
Davide Caratti. While DCCP is rather hopeless lately, and UDPlite has
been broken in many-cast mode for some little time, let's give them a
chance by placing them at the same level as other existing protocols.
Thus, users don't explicitly have to modprobe support for this and
NAT rules work for them. Some people point to the lack of support in
SOHO Linux-based routers that make deployment of new protocols harder.
I guess other middleboxes outthere on the Internet are also to blame.
Anyway, let's see if this has any impact in the midrun.
12) Skip software SCTP software checksum calculation if the NIC comes
with SCTP checksum offload support. From Davide Caratti.
13) Initial core factoring to prepare conversion to hook array. Three
patches from Aaron Conole.
14) Gao Feng made a wrong conversion to switch in the xt_multiport
extension in a patch coming in the previous batch. Fix it in this
batch.
15) Get vmalloc call in sync with kmalloc flags to avoid a warning
and likely OOM killer intervention from x_tables. From Marcelo
Ricardo Leitner.
16) Update Arturo Borrero's email address in all source code headers.
You can pull these changes from:
git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next.git
Thanks!
----------------------------------------------------------------
The following changes since commit adc176c5472214971d77c1a61c83db9b01e9cdc7:
ipv6 addrconf: Implemented enhanced DAD (RFC7527) (2016-12-03 23:21:37 -0500)
are available in the git repository at:
git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next.git HEAD
for you to fetch changes up to 73c25fb139337ac4fe1695ae3c056961855594db:
netfilter: nft_quota: allow to restore consumed quota (2016-12-07 14:40:53 +0100)
----------------------------------------------------------------
Aaron Conole (3):
netfilter: introduce accessor functions for hook entries
netfilter: decouple nf_hook_entry and nf_hook_ops
netfilter: convert while loops to for loops
Arturo Borrero Gonzalez (1):
netfilter: update Arturo Borrero Gonzalez email address
Davide Caratti (8):
netfilter: built-in NAT support for DCCP
netfilter: built-in NAT support for SCTP
netfilter: built-in NAT support for UDPlite
netfilter: nf_conntrack_tuple_common.h: fix #include
netfilter: conntrack: built-in support for DCCP
netfilter: conntrack: built-in support for SCTP
netfilter: conntrack: built-in support for UDPlite
netfilter: nat: skip checksum on offload SCTP packets
Dwip Banerjee (1):
ipvs: Decrement ttl
Florian Westphal (12):
netfilter: conntrack: remove unused init_net hook
netfilter: add and use nf_ct_netns_get/put
netfilter: nat: add dependencies on conntrack module
netfilter: nf_tables: add conntrack dependencies for nat/masq/redir expressions
netfilter: conntrack: register hooks in netns when needed by ruleset
netfilter: conntrack: add nf_conntrack_default_on sysctl
netfilter: defrag: only register defrag functionality if needed
netfilter: x_tables: pass xt_counters struct instead of packet counter
netfilter: x_tables: pass xt_counters struct to counter allocator
netfilter: x_tables: pack percpu counter allocations
netfilter: ingress: translate 0 nf_hook_slow retval to -1
netfilter: add and use nf_fwd_netdev_egress
Gao Feng (2):
ipvs: Use IS_ERR_OR_NULL(svc) instead of IS_ERR(svc) || svc == NULL
netfilter: xt_multiport: Fix wrong unmatch result with multiple ports
Liping Zhang (5):
netfilter: nf_log: do not assume ethernet header in netdev family
netfilter: nfnetlink_log: add "nf-logger-5-1" module alias name
netfilter: nft_fib: convert htonl to ntohl properly
netfilter: nft_fib_ipv4: initialize *dest to zero
netfilter: rpfilter: bypass ipv4 lbcast packets with zeronet source
Marcelo Ricardo Leitner (1):
netfilter: x_tables: avoid warn and OOM killer on vmalloc call
Pablo Neira Ayuso (17):
Merge tag 'ipvs-for-v4.10' of https://git.kernel.org/.../horms/ipvs-next
netfilter: nft_payload: layer 4 checksum adjustment for pseudoheader fields
netfilter: nf_tables: add stateful objects
netfilter: nft_counter: add stateful object type
netfilter: nft_quota: add stateful object type
netfilter: nf_tables: add stateful object reference expression
netfilter: nft_quota: dump consumed quota
netfilter: nf_tables: atomic dump and reset for stateful objects
netfilter: nf_tables: notify internal updates of stateful objects
netfilter: nft_quota: add depleted flag for objects
netfilter: nf_tables: add stateful object reference to set elements
netfilter: nft_objref: support for stateful object maps
netfilter: nf_tables: allow to filter stateful object dumps by type
netfilter: nf_tables: constify struct nft_ctx * parameter in nft_trans_alloc()
netfilter: nft_set: introduce nft_{hash, rbtree}_deactivate_one()
netfilter: nf_tables: support for set flushing
netfilter: nft_quota: allow to restore consumed quota
Willem de Bruijn (1):
netfilter: xt_bpf: support ebpf
Documentation/networking/nf_conntrack-sysctl.txt | 11 +
include/linux/netfilter.h | 31 +-
include/linux/netfilter/nf_conntrack_dccp.h | 2 +-
include/linux/netfilter/x_tables.h | 38 +-
include/linux/netfilter_ingress.h | 7 +-
include/net/netfilter/ipv4/nf_conntrack_ipv4.h | 9 +
include/net/netfilter/ipv4/nf_defrag_ipv4.h | 3 +-
include/net/netfilter/ipv6/nf_conntrack_ipv6.h | 9 +
include/net/netfilter/ipv6/nf_defrag_ipv6.h | 3 +-
include/net/netfilter/nf_conntrack.h | 4 +
include/net/netfilter/nf_conntrack_l3proto.h | 16 +-
include/net/netfilter/nf_dup_netdev.h | 1 +
include/net/netfilter/nf_log.h | 4 +-
include/net/netfilter/nf_nat_l4proto.h | 9 +
include/net/netfilter/nf_tables.h | 101 ++-
include/net/netfilter/nf_tables_core.h | 1 +
include/net/netns/conntrack.h | 43 ++
include/net/netns/netfilter.h | 6 +
.../linux/netfilter/nf_conntrack_tuple_common.h | 3 +
include/uapi/linux/netfilter/nf_tables.h | 70 ++
include/uapi/linux/netfilter/xt_bpf.h | 21 +
net/bridge/br_netfilter_hooks.c | 8 +-
net/bridge/netfilter/nf_log_bridge.c | 3 +-
net/ipv4/netfilter/arp_tables.c | 16 +-
net/ipv4/netfilter/ip_tables.c | 16 +-
net/ipv4/netfilter/ipt_CLUSTERIP.c | 4 +-
net/ipv4/netfilter/ipt_MASQUERADE.c | 8 +-
net/ipv4/netfilter/ipt_SYNPROXY.c | 4 +-
net/ipv4/netfilter/ipt_rpfilter.c | 8 +-
net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 71 +-
net/ipv4/netfilter/nf_defrag_ipv4.c | 41 +-
net/ipv4/netfilter/nft_fib_ipv4.c | 17 +-
net/ipv4/netfilter/nft_masq_ipv4.c | 11 +-
net/ipv4/netfilter/nft_redir_ipv4.c | 11 +-
net/ipv6/netfilter/ip6_tables.c | 17 +-
net/ipv6/netfilter/ip6t_SYNPROXY.c | 4 +-
net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 70 +-
net/ipv6/netfilter/nf_defrag_ipv6_hooks.c | 42 +-
net/ipv6/netfilter/nft_fib_ipv6.c | 2 +-
net/ipv6/netfilter/nft_masq_ipv6.c | 11 +-
net/ipv6/netfilter/nft_redir_ipv6.c | 11 +-
net/netfilter/Kconfig | 30 +-
net/netfilter/Makefile | 18 +-
net/netfilter/core.c | 16 +-
net/netfilter/ipvs/ip_vs_ctl.c | 2 +-
net/netfilter/ipvs/ip_vs_xmit.c | 54 ++
net/netfilter/nf_conntrack_proto.c | 73 ++-
net/netfilter/nf_conntrack_proto_dccp.c | 79 +--
net/netfilter/nf_conntrack_proto_sctp.c | 76 +--
net/netfilter/nf_conntrack_proto_udplite.c | 79 +--
net/netfilter/nf_conntrack_standalone.c | 10 +
net/netfilter/nf_dup_netdev.c | 33 +-
net/netfilter/nf_log_common.c | 3 +-
net/netfilter/nf_log_netdev.c | 3 +-
net/netfilter/nf_nat_core.c | 12 +
net/netfilter/nf_nat_proto_dccp.c | 36 +-
net/netfilter/nf_nat_proto_sctp.c | 40 +-
net/netfilter/nf_nat_proto_udplite.c | 35 +-
net/netfilter/nf_queue.c | 5 +-
net/netfilter/nf_tables_api.c | 718 ++++++++++++++++++++-
net/netfilter/nfnetlink_log.c | 1 +
net/netfilter/nft_counter.c | 186 +++++-
net/netfilter/nft_ct.c | 26 +-
net/netfilter/nft_fib.c | 2 +-
net/netfilter/nft_fwd_netdev.c | 4 +-
net/netfilter/nft_masq.c | 6 +-
net/netfilter/nft_nat.c | 11 +-
net/netfilter/nft_objref.c | 226 +++++++
net/netfilter/nft_payload.c | 107 ++-
net/netfilter/nft_quota.c | 158 ++++-
net/netfilter/nft_redir.c | 6 +-
net/netfilter/nft_set_hash.c | 25 +-
net/netfilter/nft_set_rbtree.c | 12 +-
net/netfilter/x_tables.c | 58 +-
net/netfilter/xt_CONNSECMARK.c | 4 +-
net/netfilter/xt_CT.c | 6 +-
net/netfilter/xt_NETMAP.c | 11 +-
net/netfilter/xt_REDIRECT.c | 12 +-
net/netfilter/xt_TPROXY.c | 15 +-
net/netfilter/xt_bpf.c | 96 ++-
net/netfilter/xt_connbytes.c | 4 +-
net/netfilter/xt_connlabel.c | 6 +-
net/netfilter/xt_connlimit.c | 6 +-
net/netfilter/xt_connmark.c | 8 +-
net/netfilter/xt_conntrack.c | 4 +-
net/netfilter/xt_helper.c | 4 +-
net/netfilter/xt_multiport.c | 26 +-
net/netfilter/xt_nat.c | 18 +-
net/netfilter/xt_socket.c | 33 +-
net/netfilter/xt_state.c | 4 +-
90 files changed, 2513 insertions(+), 661 deletions(-)
create mode 100644 net/netfilter/nft_objref.c
^ permalink raw reply
* [PATCH 49/50] netfilter: xt_bpf: support ebpf
From: Pablo Neira Ayuso @ 2016-12-07 21:52 UTC (permalink / raw)
To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1481147576-5690-1-git-send-email-pablo@netfilter.org>
From: Willem de Bruijn <willemb@google.com>
Add support for attaching an eBPF object by file descriptor.
The iptables binary can be called with a path to an elf object or a
pinned bpf object. Also pass the mode and path to the kernel to be
able to return it later for iptables dump and save.
Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
include/uapi/linux/netfilter/xt_bpf.h | 21 ++++++++
net/netfilter/xt_bpf.c | 96 +++++++++++++++++++++++++++++------
2 files changed, 101 insertions(+), 16 deletions(-)
diff --git a/include/uapi/linux/netfilter/xt_bpf.h b/include/uapi/linux/netfilter/xt_bpf.h
index 1fad2c27ac32..b97725af2ac0 100644
--- a/include/uapi/linux/netfilter/xt_bpf.h
+++ b/include/uapi/linux/netfilter/xt_bpf.h
@@ -2,9 +2,11 @@
#define _XT_BPF_H
#include <linux/filter.h>
+#include <linux/limits.h>
#include <linux/types.h>
#define XT_BPF_MAX_NUM_INSTR 64
+#define XT_BPF_PATH_MAX (XT_BPF_MAX_NUM_INSTR * sizeof(struct sock_filter))
struct bpf_prog;
@@ -16,4 +18,23 @@ struct xt_bpf_info {
struct bpf_prog *filter __attribute__((aligned(8)));
};
+enum xt_bpf_modes {
+ XT_BPF_MODE_BYTECODE,
+ XT_BPF_MODE_FD_PINNED,
+ XT_BPF_MODE_FD_ELF,
+};
+
+struct xt_bpf_info_v1 {
+ __u16 mode;
+ __u16 bpf_program_num_elem;
+ __s32 fd;
+ union {
+ struct sock_filter bpf_program[XT_BPF_MAX_NUM_INSTR];
+ char path[XT_BPF_PATH_MAX];
+ };
+
+ /* only used in the kernel */
+ struct bpf_prog *filter __attribute__((aligned(8)));
+};
+
#endif /*_XT_BPF_H */
diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c
index dffee9d47ec4..2dedaa23ab0a 100644
--- a/net/netfilter/xt_bpf.c
+++ b/net/netfilter/xt_bpf.c
@@ -10,6 +10,7 @@
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/filter.h>
+#include <linux/bpf.h>
#include <linux/netfilter/xt_bpf.h>
#include <linux/netfilter/x_tables.h>
@@ -20,15 +21,15 @@ MODULE_LICENSE("GPL");
MODULE_ALIAS("ipt_bpf");
MODULE_ALIAS("ip6t_bpf");
-static int bpf_mt_check(const struct xt_mtchk_param *par)
+static int __bpf_mt_check_bytecode(struct sock_filter *insns, __u16 len,
+ struct bpf_prog **ret)
{
- struct xt_bpf_info *info = par->matchinfo;
struct sock_fprog_kern program;
- program.len = info->bpf_program_num_elem;
- program.filter = info->bpf_program;
+ program.len = len;
+ program.filter = insns;
- if (bpf_prog_create(&info->filter, &program)) {
+ if (bpf_prog_create(ret, &program)) {
pr_info("bpf: check failed: parse error\n");
return -EINVAL;
}
@@ -36,6 +37,42 @@ static int bpf_mt_check(const struct xt_mtchk_param *par)
return 0;
}
+static int __bpf_mt_check_fd(int fd, struct bpf_prog **ret)
+{
+ struct bpf_prog *prog;
+
+ prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCKET_FILTER);
+ if (IS_ERR(prog))
+ return PTR_ERR(prog);
+
+ *ret = prog;
+ return 0;
+}
+
+static int bpf_mt_check(const struct xt_mtchk_param *par)
+{
+ struct xt_bpf_info *info = par->matchinfo;
+
+ return __bpf_mt_check_bytecode(info->bpf_program,
+ info->bpf_program_num_elem,
+ &info->filter);
+}
+
+static int bpf_mt_check_v1(const struct xt_mtchk_param *par)
+{
+ struct xt_bpf_info_v1 *info = par->matchinfo;
+
+ if (info->mode == XT_BPF_MODE_BYTECODE)
+ return __bpf_mt_check_bytecode(info->bpf_program,
+ info->bpf_program_num_elem,
+ &info->filter);
+ else if (info->mode == XT_BPF_MODE_FD_PINNED ||
+ info->mode == XT_BPF_MODE_FD_ELF)
+ return __bpf_mt_check_fd(info->fd, &info->filter);
+ else
+ return -EINVAL;
+}
+
static bool bpf_mt(const struct sk_buff *skb, struct xt_action_param *par)
{
const struct xt_bpf_info *info = par->matchinfo;
@@ -43,31 +80,58 @@ static bool bpf_mt(const struct sk_buff *skb, struct xt_action_param *par)
return BPF_PROG_RUN(info->filter, skb);
}
+static bool bpf_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
+{
+ const struct xt_bpf_info_v1 *info = par->matchinfo;
+
+ return !!bpf_prog_run_save_cb(info->filter, (struct sk_buff *) skb);
+}
+
static void bpf_mt_destroy(const struct xt_mtdtor_param *par)
{
const struct xt_bpf_info *info = par->matchinfo;
+
+ bpf_prog_destroy(info->filter);
+}
+
+static void bpf_mt_destroy_v1(const struct xt_mtdtor_param *par)
+{
+ const struct xt_bpf_info_v1 *info = par->matchinfo;
+
bpf_prog_destroy(info->filter);
}
-static struct xt_match bpf_mt_reg __read_mostly = {
- .name = "bpf",
- .revision = 0,
- .family = NFPROTO_UNSPEC,
- .checkentry = bpf_mt_check,
- .match = bpf_mt,
- .destroy = bpf_mt_destroy,
- .matchsize = sizeof(struct xt_bpf_info),
- .me = THIS_MODULE,
+static struct xt_match bpf_mt_reg[] __read_mostly = {
+ {
+ .name = "bpf",
+ .revision = 0,
+ .family = NFPROTO_UNSPEC,
+ .checkentry = bpf_mt_check,
+ .match = bpf_mt,
+ .destroy = bpf_mt_destroy,
+ .matchsize = sizeof(struct xt_bpf_info),
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "bpf",
+ .revision = 1,
+ .family = NFPROTO_UNSPEC,
+ .checkentry = bpf_mt_check_v1,
+ .match = bpf_mt_v1,
+ .destroy = bpf_mt_destroy_v1,
+ .matchsize = sizeof(struct xt_bpf_info_v1),
+ .me = THIS_MODULE,
+ },
};
static int __init bpf_mt_init(void)
{
- return xt_register_match(&bpf_mt_reg);
+ return xt_register_matches(bpf_mt_reg, ARRAY_SIZE(bpf_mt_reg));
}
static void __exit bpf_mt_exit(void)
{
- xt_unregister_match(&bpf_mt_reg);
+ xt_unregister_matches(bpf_mt_reg, ARRAY_SIZE(bpf_mt_reg));
}
module_init(bpf_mt_init);
--
2.1.4
^ permalink raw reply related
* [PATCH 47/50] netfilter: nf_tables: support for set flushing
From: Pablo Neira Ayuso @ 2016-12-07 21:52 UTC (permalink / raw)
To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1481147576-5690-1-git-send-email-pablo@netfilter.org>
This patch adds support for set flushing, that consists of walking over
the set elements if the NFTA_SET_ELEM_LIST_ELEMENTS attribute is set.
This patch requires the following changes:
1) Add set->ops->deactivate_one() operation: This allows us to
deactivate an element from the set element walk path, given we can
skip the lookup that happens in ->deactivate().
2) Add a new nft_trans_alloc_gfp() function since we need to allocate
transactions using GFP_ATOMIC given the set walk path happens with
held rcu_read_lock.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
include/net/netfilter/nf_tables.h | 6 ++++-
net/netfilter/nf_tables_api.c | 55 ++++++++++++++++++++++++++++++++++-----
net/netfilter/nft_set_hash.c | 1 +
net/netfilter/nft_set_rbtree.c | 1 +
4 files changed, 56 insertions(+), 7 deletions(-)
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 85f0f03f1e87..924325c46aab 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -259,7 +259,8 @@ struct nft_expr;
* @lookup: look up an element within the set
* @insert: insert new element into set
* @activate: activate new element in the next generation
- * @deactivate: deactivate element in the next generation
+ * @deactivate: lookup for element and deactivate it in the next generation
+ * @deactivate_one: deactivate element in the next generation
* @remove: remove element from set
* @walk: iterate over all set elemeennts
* @privsize: function to return size of set private data
@@ -294,6 +295,9 @@ struct nft_set_ops {
void * (*deactivate)(const struct net *net,
const struct nft_set *set,
const struct nft_set_elem *elem);
+ bool (*deactivate_one)(const struct net *net,
+ const struct nft_set *set,
+ void *priv);
void (*remove)(const struct nft_set *set,
const struct nft_set_elem *elem);
void (*walk)(const struct nft_ctx *ctx,
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index b42059795819..a019a87e58ee 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -111,12 +111,12 @@ static void nft_ctx_init(struct nft_ctx *ctx,
ctx->seq = nlh->nlmsg_seq;
}
-static struct nft_trans *nft_trans_alloc(const struct nft_ctx *ctx,
- int msg_type, u32 size)
+static struct nft_trans *nft_trans_alloc_gfp(const struct nft_ctx *ctx,
+ int msg_type, u32 size, gfp_t gfp)
{
struct nft_trans *trans;
- trans = kzalloc(sizeof(struct nft_trans) + size, GFP_KERNEL);
+ trans = kzalloc(sizeof(struct nft_trans) + size, gfp);
if (trans == NULL)
return NULL;
@@ -126,6 +126,12 @@ static struct nft_trans *nft_trans_alloc(const struct nft_ctx *ctx,
return trans;
}
+static struct nft_trans *nft_trans_alloc(const struct nft_ctx *ctx,
+ int msg_type, u32 size)
+{
+ return nft_trans_alloc_gfp(ctx, msg_type, size, GFP_KERNEL);
+}
+
static void nft_trans_destroy(struct nft_trans *trans)
{
list_del(&trans->list);
@@ -3876,6 +3882,34 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
return err;
}
+static int nft_flush_set(const struct nft_ctx *ctx,
+ const struct nft_set *set,
+ const struct nft_set_iter *iter,
+ const struct nft_set_elem *elem)
+{
+ struct nft_trans *trans;
+ int err;
+
+ trans = nft_trans_alloc_gfp(ctx, NFT_MSG_DELSETELEM,
+ sizeof(struct nft_trans_elem), GFP_ATOMIC);
+ if (!trans)
+ return -ENOMEM;
+
+ if (!set->ops->deactivate_one(ctx->net, set, elem->priv)) {
+ err = -ENOENT;
+ goto err1;
+ }
+
+ nft_trans_elem_set(trans) = (struct nft_set *)set;
+ nft_trans_elem(trans) = *((struct nft_set_elem *)elem);
+ list_add_tail(&trans->list, &ctx->net->nft.commit_list);
+
+ return 0;
+err1:
+ kfree(trans);
+ return err;
+}
+
static int nf_tables_delsetelem(struct net *net, struct sock *nlsk,
struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
@@ -3886,9 +3920,6 @@ static int nf_tables_delsetelem(struct net *net, struct sock *nlsk,
struct nft_ctx ctx;
int rem, err = 0;
- if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL)
- return -EINVAL;
-
err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, genmask);
if (err < 0)
return err;
@@ -3900,6 +3931,18 @@ static int nf_tables_delsetelem(struct net *net, struct sock *nlsk,
if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT)
return -EBUSY;
+ if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL) {
+ struct nft_set_dump_args args = {
+ .iter = {
+ .genmask = genmask,
+ .fn = nft_flush_set,
+ },
+ };
+ set->ops->walk(&ctx, set, &args.iter);
+
+ return args.iter.err;
+ }
+
nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
err = nft_del_setelem(&ctx, set, attr);
if (err < 0)
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index 73f7687c5656..1e20e2bbb6d9 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -397,6 +397,7 @@ static struct nft_set_ops nft_hash_ops __read_mostly = {
.insert = nft_hash_insert,
.activate = nft_hash_activate,
.deactivate = nft_hash_deactivate,
+ .deactivate_one = nft_hash_deactivate_one,
.remove = nft_hash_remove,
.lookup = nft_hash_lookup,
.update = nft_hash_update,
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index 5580bb64dc0f..08376e50f6cd 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -304,6 +304,7 @@ static struct nft_set_ops nft_rbtree_ops __read_mostly = {
.insert = nft_rbtree_insert,
.remove = nft_rbtree_remove,
.deactivate = nft_rbtree_deactivate,
+ .deactivate_one = nft_rbtree_deactivate_one,
.activate = nft_rbtree_activate,
.lookup = nft_rbtree_lookup,
.walk = nft_rbtree_walk,
--
2.1.4
^ permalink raw reply related
* [PATCH 43/50] netfilter: rpfilter: bypass ipv4 lbcast packets with zeronet source
From: Pablo Neira Ayuso @ 2016-12-07 21:52 UTC (permalink / raw)
To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1481147576-5690-1-git-send-email-pablo@netfilter.org>
From: Liping Zhang <zlpnobody@gmail.com>
Otherwise, DHCP Discover packets(0.0.0.0->255.255.255.255) may be
dropped incorrectly.
Signed-off-by: Liping Zhang <zlpnobody@gmail.com>
Acked-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
net/ipv4/netfilter/ipt_rpfilter.c | 8 +++++---
net/ipv4/netfilter/nft_fib_ipv4.c | 13 +++++++------
2 files changed, 12 insertions(+), 9 deletions(-)
diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c
index 59b49945b481..f273098e48fd 100644
--- a/net/ipv4/netfilter/ipt_rpfilter.c
+++ b/net/ipv4/netfilter/ipt_rpfilter.c
@@ -83,10 +83,12 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
return true ^ invert;
iph = ip_hdr(skb);
- if (ipv4_is_multicast(iph->daddr)) {
- if (ipv4_is_zeronet(iph->saddr))
- return ipv4_is_local_multicast(iph->daddr) ^ invert;
+ if (ipv4_is_zeronet(iph->saddr)) {
+ if (ipv4_is_lbcast(iph->daddr) ||
+ ipv4_is_local_multicast(iph->daddr))
+ return true ^ invert;
}
+
flow.flowi4_iif = LOOPBACK_IFINDEX;
flow.daddr = iph->saddr;
flow.saddr = rpfilter_get_saddr(iph->daddr);
diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c
index 258136364f5e..965b1a161369 100644
--- a/net/ipv4/netfilter/nft_fib_ipv4.c
+++ b/net/ipv4/netfilter/nft_fib_ipv4.c
@@ -101,12 +101,13 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs,
}
iph = ip_hdr(pkt->skb);
- if (ipv4_is_multicast(iph->daddr) &&
- ipv4_is_zeronet(iph->saddr) &&
- ipv4_is_local_multicast(iph->daddr)) {
- nft_fib_store_result(dest, priv->result, pkt,
- get_ifindex(pkt->skb->dev));
- return;
+ if (ipv4_is_zeronet(iph->saddr)) {
+ if (ipv4_is_lbcast(iph->daddr) ||
+ ipv4_is_local_multicast(iph->daddr)) {
+ nft_fib_store_result(dest, priv->result, pkt,
+ get_ifindex(pkt->skb->dev));
+ return;
+ }
}
if (priv->flags & NFTA_FIB_F_MARK)
--
2.1.4
^ permalink raw reply related
* [PATCH 42/50] netfilter: nf_tables: allow to filter stateful object dumps by type
From: Pablo Neira Ayuso @ 2016-12-07 21:52 UTC (permalink / raw)
To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1481147576-5690-1-git-send-email-pablo@netfilter.org>
This patch adds the netlink code to filter out dump of stateful objects,
through the NFTA_OBJ_TYPE netlink attribute.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
net/netfilter/nf_tables_api.c | 50 +++++++++++++++++++++++++++++++++++++++++++
1 file changed, 50 insertions(+)
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index b4db5bf4c135..b04d4ee1d533 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -4183,12 +4183,18 @@ static int nf_tables_fill_obj_info(struct sk_buff *skb, struct net *net,
return -1;
}
+struct nft_obj_filter {
+ char table[NFT_OBJ_MAXNAMELEN];
+ u32 type;
+};
+
static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
{
const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
const struct nft_af_info *afi;
const struct nft_table *table;
unsigned int idx = 0, s_idx = cb->args[0];
+ struct nft_obj_filter *filter = cb->data;
struct net *net = sock_net(skb->sk);
int family = nfmsg->nfgen_family;
struct nft_object *obj;
@@ -4213,6 +4219,13 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
if (idx > s_idx)
memset(&cb->args[1], 0,
sizeof(cb->args) - sizeof(cb->args[0]));
+ if (filter->table[0] &&
+ strcmp(filter->table, table->name))
+ goto cont;
+ if (filter->type != NFT_OBJECT_UNSPEC &&
+ obj->type->type != filter->type)
+ goto cont;
+
if (nf_tables_fill_obj_info(skb, net, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
NFT_MSG_NEWOBJ,
@@ -4233,6 +4246,31 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
return skb->len;
}
+static int nf_tables_dump_obj_done(struct netlink_callback *cb)
+{
+ kfree(cb->data);
+
+ return 0;
+}
+
+static struct nft_obj_filter *
+nft_obj_filter_alloc(const struct nlattr * const nla[])
+{
+ struct nft_obj_filter *filter;
+
+ filter = kzalloc(sizeof(*filter), GFP_KERNEL);
+ if (!filter)
+ return ERR_PTR(-ENOMEM);
+
+ if (nla[NFTA_OBJ_TABLE])
+ nla_strlcpy(filter->table, nla[NFTA_OBJ_TABLE],
+ NFT_TABLE_MAXNAMELEN);
+ if (nla[NFTA_OBJ_TYPE])
+ filter->type = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
+
+ return filter;
+}
+
static int nf_tables_getobj(struct net *net, struct sock *nlsk,
struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
@@ -4251,7 +4289,19 @@ static int nf_tables_getobj(struct net *net, struct sock *nlsk,
if (nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
.dump = nf_tables_dump_obj,
+ .done = nf_tables_dump_obj_done,
};
+
+ if (nla[NFTA_OBJ_TABLE] ||
+ nla[NFTA_OBJ_TYPE]) {
+ struct nft_obj_filter *filter;
+
+ filter = nft_obj_filter_alloc(nla);
+ if (IS_ERR(filter))
+ return -ENOMEM;
+
+ c.data = filter;
+ }
return netlink_dump_start(nlsk, skb, nlh, &c);
}
--
2.1.4
^ permalink raw reply related
* [PATCH 37/50] netfilter: nf_tables: atomic dump and reset for stateful objects
From: Pablo Neira Ayuso @ 2016-12-07 21:52 UTC (permalink / raw)
To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1481147576-5690-1-git-send-email-pablo@netfilter.org>
This patch adds a new NFT_MSG_GETOBJ_RESET command perform an atomic
dump-and-reset of the stateful object. This also comes with add support
for atomic dump and reset for counter and quota objects.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
include/net/netfilter/nf_tables.h | 3 +-
include/uapi/linux/netfilter/nf_tables.h | 2 ++
net/netfilter/nf_tables_api.c | 29 ++++++++++++-----
net/netfilter/nft_counter.c | 56 +++++++++++++++++++++++++++-----
net/netfilter/nft_quota.c | 18 ++++++----
5 files changed, 85 insertions(+), 23 deletions(-)
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 903cd618f50e..6f7d6a1dc09c 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -997,7 +997,8 @@ struct nft_object_type {
struct nft_object *obj);
void (*destroy)(struct nft_object *obj);
int (*dump)(struct sk_buff *skb,
- const struct nft_object *obj);
+ struct nft_object *obj,
+ bool reset);
};
int nft_register_obj(struct nft_object_type *obj_type);
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 3d47582caa80..399eac1eee91 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -89,6 +89,7 @@ enum nft_verdicts {
* @NFT_MSG_NEWOBJ: create a stateful object (enum nft_obj_attributes)
* @NFT_MSG_GETOBJ: get a stateful object (enum nft_obj_attributes)
* @NFT_MSG_DELOBJ: delete a stateful object (enum nft_obj_attributes)
+ * @NFT_MSG_GETOBJ_RESET: get and reset a stateful object (enum nft_obj_attributes)
*/
enum nf_tables_msg_types {
NFT_MSG_NEWTABLE,
@@ -112,6 +113,7 @@ enum nf_tables_msg_types {
NFT_MSG_NEWOBJ,
NFT_MSG_GETOBJ,
NFT_MSG_DELOBJ,
+ NFT_MSG_GETOBJ_RESET,
NFT_MSG_MAX,
};
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 2ae717c5dcb8..bfc015af366a 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -3972,14 +3972,14 @@ static struct nft_object *nft_obj_init(const struct nft_object_type *type,
}
static int nft_object_dump(struct sk_buff *skb, unsigned int attr,
- const struct nft_object *obj)
+ struct nft_object *obj, bool reset)
{
struct nlattr *nest;
nest = nla_nest_start(skb, attr);
if (!nest)
goto nla_put_failure;
- if (obj->type->dump(skb, obj) < 0)
+ if (obj->type->dump(skb, obj, reset) < 0)
goto nla_put_failure;
nla_nest_end(skb, nest);
return 0;
@@ -4096,7 +4096,7 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk,
static int nf_tables_fill_obj_info(struct sk_buff *skb, struct net *net,
u32 portid, u32 seq, int event, u32 flags,
int family, const struct nft_table *table,
- const struct nft_object *obj)
+ struct nft_object *obj, bool reset)
{
struct nfgenmsg *nfmsg;
struct nlmsghdr *nlh;
@@ -4115,7 +4115,7 @@ static int nf_tables_fill_obj_info(struct sk_buff *skb, struct net *net,
nla_put_string(skb, NFTA_OBJ_NAME, obj->name) ||
nla_put_be32(skb, NFTA_OBJ_TYPE, htonl(obj->type->type)) ||
nla_put_be32(skb, NFTA_OBJ_USE, htonl(obj->use)) ||
- nft_object_dump(skb, NFTA_OBJ_DATA, obj))
+ nft_object_dump(skb, NFTA_OBJ_DATA, obj, reset))
goto nla_put_failure;
nlmsg_end(skb, nlh);
@@ -4131,10 +4131,14 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
const struct nft_af_info *afi;
const struct nft_table *table;
- const struct nft_object *obj;
unsigned int idx = 0, s_idx = cb->args[0];
struct net *net = sock_net(skb->sk);
int family = nfmsg->nfgen_family;
+ struct nft_object *obj;
+ bool reset = false;
+
+ if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET)
+ reset = true;
rcu_read_lock();
cb->seq = net->nft.base_seq;
@@ -4156,7 +4160,7 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
cb->nlh->nlmsg_seq,
NFT_MSG_NEWOBJ,
NLM_F_MULTI | NLM_F_APPEND,
- afi->family, table, obj) < 0)
+ afi->family, table, obj, reset) < 0)
goto done;
nl_dump_check_consistent(cb, nlmsg_hdr(skb));
@@ -4183,6 +4187,7 @@ static int nf_tables_getobj(struct net *net, struct sock *nlsk,
const struct nft_table *table;
struct nft_object *obj;
struct sk_buff *skb2;
+ bool reset = false;
u32 objtype;
int err;
@@ -4214,9 +4219,12 @@ static int nf_tables_getobj(struct net *net, struct sock *nlsk,
if (!skb2)
return -ENOMEM;
+ if (NFNL_MSG_TYPE(nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET)
+ reset = true;
+
err = nf_tables_fill_obj_info(skb2, net, NETLINK_CB(skb).portid,
nlh->nlmsg_seq, NFT_MSG_NEWOBJ, 0,
- family, table, obj);
+ family, table, obj, reset);
if (err < 0)
goto err;
@@ -4291,7 +4299,7 @@ static int nf_tables_obj_notify(const struct nft_ctx *ctx,
err = nf_tables_fill_obj_info(skb, ctx->net, ctx->portid, ctx->seq,
event, 0, ctx->afi->family, ctx->table,
- obj);
+ obj, false);
if (err < 0) {
kfree_skb(skb);
goto err;
@@ -4482,6 +4490,11 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
.attr_count = NFTA_OBJ_MAX,
.policy = nft_obj_policy,
},
+ [NFT_MSG_GETOBJ_RESET] = {
+ .call = nf_tables_getobj,
+ .attr_count = NFTA_OBJ_MAX,
+ .policy = nft_obj_policy,
+ },
};
static void nft_chain_commit_update(struct nft_trans *trans)
diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c
index 6f3dd429f865..f6a02c5071c2 100644
--- a/net/netfilter/nft_counter.c
+++ b/net/netfilter/nft_counter.c
@@ -100,10 +100,10 @@ static void nft_counter_obj_destroy(struct nft_object *obj)
nft_counter_do_destroy(priv);
}
-static void nft_counter_fetch(const struct nft_counter_percpu __percpu *counter,
+static void nft_counter_fetch(struct nft_counter_percpu __percpu *counter,
struct nft_counter *total)
{
- const struct nft_counter_percpu *cpu_stats;
+ struct nft_counter_percpu *cpu_stats;
u64 bytes, packets;
unsigned int seq;
int cpu;
@@ -122,12 +122,52 @@ static void nft_counter_fetch(const struct nft_counter_percpu __percpu *counter,
}
}
+static u64 __nft_counter_reset(u64 *counter)
+{
+ u64 ret, old;
+
+ do {
+ old = *counter;
+ ret = cmpxchg64(counter, old, 0);
+ } while (ret != old);
+
+ return ret;
+}
+
+static void nft_counter_reset(struct nft_counter_percpu __percpu *counter,
+ struct nft_counter *total)
+{
+ struct nft_counter_percpu *cpu_stats;
+ u64 bytes, packets;
+ unsigned int seq;
+ int cpu;
+
+ memset(total, 0, sizeof(*total));
+ for_each_possible_cpu(cpu) {
+ bytes = packets = 0;
+
+ cpu_stats = per_cpu_ptr(counter, cpu);
+ do {
+ seq = u64_stats_fetch_begin_irq(&cpu_stats->syncp);
+ packets += __nft_counter_reset(&cpu_stats->counter.packets);
+ bytes += __nft_counter_reset(&cpu_stats->counter.bytes);
+ } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, seq));
+
+ total->packets += packets;
+ total->bytes += bytes;
+ }
+}
+
static int nft_counter_do_dump(struct sk_buff *skb,
- const struct nft_counter_percpu_priv *priv)
+ const struct nft_counter_percpu_priv *priv,
+ bool reset)
{
struct nft_counter total;
- nft_counter_fetch(priv->counter, &total);
+ if (reset)
+ nft_counter_reset(priv->counter, &total);
+ else
+ nft_counter_fetch(priv->counter, &total);
if (nla_put_be64(skb, NFTA_COUNTER_BYTES, cpu_to_be64(total.bytes),
NFTA_COUNTER_PAD) ||
@@ -141,11 +181,11 @@ static int nft_counter_do_dump(struct sk_buff *skb,
}
static int nft_counter_obj_dump(struct sk_buff *skb,
- const struct nft_object *obj)
+ struct nft_object *obj, bool reset)
{
- const struct nft_counter_percpu_priv *priv = nft_obj_data(obj);
+ struct nft_counter_percpu_priv *priv = nft_obj_data(obj);
- return nft_counter_do_dump(skb, priv);
+ return nft_counter_do_dump(skb, priv, reset);
}
static const struct nla_policy nft_counter_policy[NFTA_COUNTER_MAX + 1] = {
@@ -178,7 +218,7 @@ static int nft_counter_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
const struct nft_counter_percpu_priv *priv = nft_expr_priv(expr);
- return nft_counter_do_dump(skb, priv);
+ return nft_counter_do_dump(skb, priv, false);
}
static int nft_counter_init(const struct nft_ctx *ctx,
diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c
index 0d344209803a..5d25f57497cb 100644
--- a/net/netfilter/nft_quota.c
+++ b/net/netfilter/nft_quota.c
@@ -83,12 +83,17 @@ static int nft_quota_obj_init(const struct nlattr * const tb[],
return nft_quota_do_init(tb, priv);
}
-static int nft_quota_do_dump(struct sk_buff *skb, const struct nft_quota *priv)
+static int nft_quota_do_dump(struct sk_buff *skb, struct nft_quota *priv,
+ bool reset)
{
u32 flags = priv->invert ? NFT_QUOTA_F_INV : 0;
u64 consumed;
- consumed = atomic64_read(&priv->consumed);
+ if (reset)
+ consumed = atomic64_xchg(&priv->consumed, 0);
+ else
+ consumed = atomic64_read(&priv->consumed);
+
/* Since we inconditionally increment consumed quota for each packet
* that we see, don't go over the quota boundary in what we send to
* userspace.
@@ -108,11 +113,12 @@ static int nft_quota_do_dump(struct sk_buff *skb, const struct nft_quota *priv)
return -1;
}
-static int nft_quota_obj_dump(struct sk_buff *skb, const struct nft_object *obj)
+static int nft_quota_obj_dump(struct sk_buff *skb, struct nft_object *obj,
+ bool reset)
{
struct nft_quota *priv = nft_obj_data(obj);
- return nft_quota_do_dump(skb, priv);
+ return nft_quota_do_dump(skb, priv, reset);
}
static struct nft_object_type nft_quota_obj __read_mostly = {
@@ -146,9 +152,9 @@ static int nft_quota_init(const struct nft_ctx *ctx,
static int nft_quota_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
- const struct nft_quota *priv = nft_expr_priv(expr);
+ struct nft_quota *priv = nft_expr_priv(expr);
- return nft_quota_do_dump(skb, priv);
+ return nft_quota_do_dump(skb, priv, false);
}
static struct nft_expr_type nft_quota_type;
--
2.1.4
^ permalink raw reply related
* [PATCH 35/50] netfilter: nf_tables: add stateful object reference expression
From: Pablo Neira Ayuso @ 2016-12-07 21:52 UTC (permalink / raw)
To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1481147576-5690-1-git-send-email-pablo@netfilter.org>
This new expression allows us to refer to existing stateful objects from
rules.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
include/uapi/linux/netfilter/nf_tables.h | 14 ++++
net/netfilter/Kconfig | 6 ++
net/netfilter/Makefile | 1 +
net/netfilter/nft_objref.c | 112 +++++++++++++++++++++++++++++++
4 files changed, 133 insertions(+)
create mode 100644 net/netfilter/nft_objref.c
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index ad0577ba5d2a..1043ce4250c5 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -1138,6 +1138,20 @@ enum nft_fwd_attributes {
#define NFTA_FWD_MAX (__NFTA_FWD_MAX - 1)
/**
+ * enum nft_objref_attributes - nf_tables stateful object expression netlink attributes
+ *
+ * @NFTA_OBJREF_IMM_TYPE: object type for immediate reference (NLA_U32: nft_register)
+ * @NFTA_OBJREF_IMM_NAME: object name for immediate reference (NLA_STRING)
+ */
+enum nft_objref_attributes {
+ NFTA_OBJREF_UNSPEC,
+ NFTA_OBJREF_IMM_TYPE,
+ NFTA_OBJREF_IMM_NAME,
+ __NFTA_OBJREF_MAX
+};
+#define NFTA_OBJREF_MAX (__NFTA_OBJREF_MAX - 1)
+
+/**
* enum nft_gen_attributes - nf_tables ruleset generation attributes
*
* @NFTA_GEN_ID: Ruleset generation ID (NLA_U32)
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index def4be06cda6..63729b489c2c 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -551,6 +551,12 @@ config NFT_NAT
This option adds the "nat" expression that you can use to perform
typical Network Address Translation (NAT) packet transformations.
+config NFT_OBJREF
+ tristate "Netfilter nf_tables stateful object reference module"
+ help
+ This option adds the "objref" expression that allows you to refer to
+ stateful objects, such as counters and quotas.
+
config NFT_QUEUE
depends on NETFILTER_NETLINK_QUEUE
tristate "Netfilter nf_tables queue module"
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index e4c8c1d7aaed..ca30d1960f1d 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -88,6 +88,7 @@ obj-$(CONFIG_NFT_NUMGEN) += nft_numgen.o
obj-$(CONFIG_NFT_CT) += nft_ct.o
obj-$(CONFIG_NFT_LIMIT) += nft_limit.o
obj-$(CONFIG_NFT_NAT) += nft_nat.o
+obj-$(CONFIG_NFT_OBJREF) += nft_objref.o
obj-$(CONFIG_NFT_QUEUE) += nft_queue.o
obj-$(CONFIG_NFT_QUOTA) += nft_quota.o
obj-$(CONFIG_NFT_REJECT) += nft_reject.o
diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c
new file mode 100644
index 000000000000..23820f796aad
--- /dev/null
+++ b/net/netfilter/nft_objref.c
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2012-2016 Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+
+#define nft_objref_priv(expr) *((struct nft_object **)nft_expr_priv(expr))
+
+static void nft_objref_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_object *obj = nft_objref_priv(expr);
+
+ obj->type->eval(obj, regs, pkt);
+}
+
+static int nft_objref_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_object *obj = nft_objref_priv(expr);
+ u8 genmask = nft_genmask_next(ctx->net);
+ u32 objtype;
+
+ if (!tb[NFTA_OBJREF_IMM_NAME] ||
+ !tb[NFTA_OBJREF_IMM_TYPE])
+ return -EINVAL;
+
+ objtype = ntohl(nla_get_be32(tb[NFTA_OBJREF_IMM_TYPE]));
+ obj = nf_tables_obj_lookup(ctx->table, tb[NFTA_OBJREF_IMM_NAME], objtype,
+ genmask);
+ if (IS_ERR(obj))
+ return -ENOENT;
+
+ nft_objref_priv(expr) = obj;
+ obj->use++;
+
+ return 0;
+}
+
+static int nft_objref_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ const struct nft_object *obj = nft_objref_priv(expr);
+
+ if (nla_put_string(skb, NFTA_OBJREF_IMM_NAME, obj->name) ||
+ nla_put_be32(skb, NFTA_OBJREF_IMM_TYPE, htonl(obj->type->type)))
+ goto nla_put_failure;
+
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static void nft_objref_destroy(const struct nft_ctx *ctx,
+ const struct nft_expr *expr)
+{
+ struct nft_object *obj = nft_objref_priv(expr);
+
+ obj->use--;
+}
+
+static struct nft_expr_type nft_objref_type;
+static const struct nft_expr_ops nft_objref_ops = {
+ .type = &nft_objref_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_object *)),
+ .eval = nft_objref_eval,
+ .init = nft_objref_init,
+ .destroy = nft_objref_destroy,
+ .dump = nft_objref_dump,
+};
+
+static const struct nla_policy nft_objref_policy[NFTA_OBJREF_MAX + 1] = {
+ [NFTA_OBJREF_IMM_NAME] = { .type = NLA_STRING },
+ [NFTA_OBJREF_IMM_TYPE] = { .type = NLA_U32 },
+};
+
+static struct nft_expr_type nft_objref_type __read_mostly = {
+ .name = "objref",
+ .ops = &nft_objref_ops,
+ .policy = nft_objref_policy,
+ .maxattr = NFTA_OBJREF_MAX,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_objref_module_init(void)
+{
+ return nft_register_expr(&nft_objref_type);
+}
+
+static void __exit nft_objref_module_exit(void)
+{
+ nft_unregister_expr(&nft_objref_type);
+}
+
+module_init(nft_objref_module_init);
+module_exit(nft_objref_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_ALIAS_NFT_EXPR("objref");
--
2.1.4
^ permalink raw reply related
* [PATCH 33/50] netfilter: nft_counter: add stateful object type
From: Pablo Neira Ayuso @ 2016-12-07 21:52 UTC (permalink / raw)
To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1481147576-5690-1-git-send-email-pablo@netfilter.org>
Register a new percpu counter stateful object type into the stateful
object infrastructure.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
include/uapi/linux/netfilter/nf_tables.h | 1 +
net/netfilter/nft_counter.c | 140 +++++++++++++++++++++++++------
2 files changed, 114 insertions(+), 27 deletions(-)
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 18e30dbc8c3f..e352ef65d753 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -1186,6 +1186,7 @@ enum nft_fib_flags {
};
#define NFT_OBJECT_UNSPEC 0
+#define NFT_OBJECT_COUNTER 1
/**
* enum nft_object_attributes - nf_tables stateful object netlink attributes
diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c
index 77db8358ab14..6f3dd429f865 100644
--- a/net/netfilter/nft_counter.c
+++ b/net/netfilter/nft_counter.c
@@ -31,11 +31,10 @@ struct nft_counter_percpu_priv {
struct nft_counter_percpu __percpu *counter;
};
-static void nft_counter_eval(const struct nft_expr *expr,
- struct nft_regs *regs,
- const struct nft_pktinfo *pkt)
+static inline void nft_counter_do_eval(struct nft_counter_percpu_priv *priv,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
{
- struct nft_counter_percpu_priv *priv = nft_expr_priv(expr);
struct nft_counter_percpu *this_cpu;
local_bh_disable();
@@ -47,6 +46,60 @@ static void nft_counter_eval(const struct nft_expr *expr,
local_bh_enable();
}
+static inline void nft_counter_obj_eval(struct nft_object *obj,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_counter_percpu_priv *priv = nft_obj_data(obj);
+
+ nft_counter_do_eval(priv, regs, pkt);
+}
+
+static int nft_counter_do_init(const struct nlattr * const tb[],
+ struct nft_counter_percpu_priv *priv)
+{
+ struct nft_counter_percpu __percpu *cpu_stats;
+ struct nft_counter_percpu *this_cpu;
+
+ cpu_stats = netdev_alloc_pcpu_stats(struct nft_counter_percpu);
+ if (cpu_stats == NULL)
+ return -ENOMEM;
+
+ preempt_disable();
+ this_cpu = this_cpu_ptr(cpu_stats);
+ if (tb[NFTA_COUNTER_PACKETS]) {
+ this_cpu->counter.packets =
+ be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS]));
+ }
+ if (tb[NFTA_COUNTER_BYTES]) {
+ this_cpu->counter.bytes =
+ be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES]));
+ }
+ preempt_enable();
+ priv->counter = cpu_stats;
+ return 0;
+}
+
+static int nft_counter_obj_init(const struct nlattr * const tb[],
+ struct nft_object *obj)
+{
+ struct nft_counter_percpu_priv *priv = nft_obj_data(obj);
+
+ return nft_counter_do_init(tb, priv);
+}
+
+static void nft_counter_do_destroy(struct nft_counter_percpu_priv *priv)
+{
+ free_percpu(priv->counter);
+}
+
+static void nft_counter_obj_destroy(struct nft_object *obj)
+{
+ struct nft_counter_percpu_priv *priv = nft_obj_data(obj);
+
+ nft_counter_do_destroy(priv);
+}
+
static void nft_counter_fetch(const struct nft_counter_percpu __percpu *counter,
struct nft_counter *total)
{
@@ -69,9 +122,9 @@ static void nft_counter_fetch(const struct nft_counter_percpu __percpu *counter,
}
}
-static int nft_counter_dump(struct sk_buff *skb, const struct nft_expr *expr)
+static int nft_counter_do_dump(struct sk_buff *skb,
+ const struct nft_counter_percpu_priv *priv)
{
- struct nft_counter_percpu_priv *priv = nft_expr_priv(expr);
struct nft_counter total;
nft_counter_fetch(priv->counter, &total);
@@ -87,36 +140,54 @@ static int nft_counter_dump(struct sk_buff *skb, const struct nft_expr *expr)
return -1;
}
+static int nft_counter_obj_dump(struct sk_buff *skb,
+ const struct nft_object *obj)
+{
+ const struct nft_counter_percpu_priv *priv = nft_obj_data(obj);
+
+ return nft_counter_do_dump(skb, priv);
+}
+
static const struct nla_policy nft_counter_policy[NFTA_COUNTER_MAX + 1] = {
[NFTA_COUNTER_PACKETS] = { .type = NLA_U64 },
[NFTA_COUNTER_BYTES] = { .type = NLA_U64 },
};
+static struct nft_object_type nft_counter_obj __read_mostly = {
+ .type = NFT_OBJECT_COUNTER,
+ .size = sizeof(struct nft_counter_percpu_priv),
+ .maxattr = NFTA_COUNTER_MAX,
+ .policy = nft_counter_policy,
+ .eval = nft_counter_obj_eval,
+ .init = nft_counter_obj_init,
+ .destroy = nft_counter_obj_destroy,
+ .dump = nft_counter_obj_dump,
+ .owner = THIS_MODULE,
+};
+
+static void nft_counter_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_counter_percpu_priv *priv = nft_expr_priv(expr);
+
+ nft_counter_do_eval(priv, regs, pkt);
+}
+
+static int nft_counter_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ const struct nft_counter_percpu_priv *priv = nft_expr_priv(expr);
+
+ return nft_counter_do_dump(skb, priv);
+}
+
static int nft_counter_init(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nlattr * const tb[])
{
struct nft_counter_percpu_priv *priv = nft_expr_priv(expr);
- struct nft_counter_percpu __percpu *cpu_stats;
- struct nft_counter_percpu *this_cpu;
- cpu_stats = netdev_alloc_pcpu_stats(struct nft_counter_percpu);
- if (cpu_stats == NULL)
- return -ENOMEM;
-
- preempt_disable();
- this_cpu = this_cpu_ptr(cpu_stats);
- if (tb[NFTA_COUNTER_PACKETS]) {
- this_cpu->counter.packets =
- be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS]));
- }
- if (tb[NFTA_COUNTER_BYTES]) {
- this_cpu->counter.bytes =
- be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES]));
- }
- preempt_enable();
- priv->counter = cpu_stats;
- return 0;
+ return nft_counter_do_init(tb, priv);
}
static void nft_counter_destroy(const struct nft_ctx *ctx,
@@ -124,7 +195,7 @@ static void nft_counter_destroy(const struct nft_ctx *ctx,
{
struct nft_counter_percpu_priv *priv = nft_expr_priv(expr);
- free_percpu(priv->counter);
+ nft_counter_do_destroy(priv);
}
static int nft_counter_clone(struct nft_expr *dst, const struct nft_expr *src)
@@ -174,12 +245,26 @@ static struct nft_expr_type nft_counter_type __read_mostly = {
static int __init nft_counter_module_init(void)
{
- return nft_register_expr(&nft_counter_type);
+ int err;
+
+ err = nft_register_obj(&nft_counter_obj);
+ if (err < 0)
+ return err;
+
+ err = nft_register_expr(&nft_counter_type);
+ if (err < 0)
+ goto err1;
+
+ return 0;
+err1:
+ nft_unregister_obj(&nft_counter_obj);
+ return err;
}
static void __exit nft_counter_module_exit(void)
{
nft_unregister_expr(&nft_counter_type);
+ nft_unregister_obj(&nft_counter_obj);
}
module_init(nft_counter_module_init);
@@ -188,3 +273,4 @@ module_exit(nft_counter_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
MODULE_ALIAS_NFT_EXPR("counter");
+MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_COUNTER);
--
2.1.4
^ permalink raw reply related
* [PATCH 26/50] netfilter: nft_fib: convert htonl to ntohl properly
From: Pablo Neira Ayuso @ 2016-12-07 21:52 UTC (permalink / raw)
To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1481147576-5690-1-git-send-email-pablo@netfilter.org>
From: Liping Zhang <zlpnobody@gmail.com>
Acctually ntohl and htonl are identical, so this doesn't affect
anything, but it is conceptually wrong.
Signed-off-by: Liping Zhang <zlpnobody@gmail.com>
Acked-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
net/ipv4/netfilter/nft_fib_ipv4.c | 2 +-
net/ipv6/netfilter/nft_fib_ipv6.c | 2 +-
net/netfilter/nft_fib.c | 2 +-
3 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c
index 1b49966484b3..bfffa742f397 100644
--- a/net/ipv4/netfilter/nft_fib_ipv4.c
+++ b/net/ipv4/netfilter/nft_fib_ipv4.c
@@ -198,7 +198,7 @@ nft_fib4_select_ops(const struct nft_ctx *ctx,
if (!tb[NFTA_FIB_RESULT])
return ERR_PTR(-EINVAL);
- result = htonl(nla_get_be32(tb[NFTA_FIB_RESULT]));
+ result = ntohl(nla_get_be32(tb[NFTA_FIB_RESULT]));
switch (result) {
case NFT_FIB_RESULT_OIF:
diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c
index d526bb594956..c947aad8bcc6 100644
--- a/net/ipv6/netfilter/nft_fib_ipv6.c
+++ b/net/ipv6/netfilter/nft_fib_ipv6.c
@@ -235,7 +235,7 @@ nft_fib6_select_ops(const struct nft_ctx *ctx,
if (!tb[NFTA_FIB_RESULT])
return ERR_PTR(-EINVAL);
- result = htonl(nla_get_be32(tb[NFTA_FIB_RESULT]));
+ result = ntohl(nla_get_be32(tb[NFTA_FIB_RESULT]));
switch (result) {
case NFT_FIB_RESULT_OIF:
diff --git a/net/netfilter/nft_fib.c b/net/netfilter/nft_fib.c
index 249c9b80c150..29a4906adc27 100644
--- a/net/netfilter/nft_fib.c
+++ b/net/netfilter/nft_fib.c
@@ -86,7 +86,7 @@ int nft_fib_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
if ((priv->flags & (NFTA_FIB_F_SADDR | NFTA_FIB_F_DADDR)) == 0)
return -EINVAL;
- priv->result = htonl(nla_get_be32(tb[NFTA_FIB_RESULT]));
+ priv->result = ntohl(nla_get_be32(tb[NFTA_FIB_RESULT]));
priv->dreg = nft_parse_register(tb[NFTA_FIB_DREG]);
switch (priv->result) {
--
2.1.4
^ permalink raw reply related
* [PATCH 25/50] netfilter: x_tables: pack percpu counter allocations
From: Pablo Neira Ayuso @ 2016-12-07 21:52 UTC (permalink / raw)
To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1481147576-5690-1-git-send-email-pablo@netfilter.org>
From: Florian Westphal <fw@strlen.de>
instead of allocating each xt_counter individually, allocate 4k chunks
and then use these for counter allocation requests.
This should speed up rule evaluation by increasing data locality,
also speeds up ruleset loading because we reduce calls to the percpu
allocator.
As Eric points out we can't use PAGE_SIZE, page_allocator would fail on
arches with 64k page size.
Suggested-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
include/linux/netfilter/x_tables.h | 7 ++++++-
net/ipv4/netfilter/arp_tables.c | 9 ++++++---
net/ipv4/netfilter/ip_tables.c | 9 ++++++---
net/ipv6/netfilter/ip6_tables.c | 9 ++++++---
net/netfilter/x_tables.c | 33 ++++++++++++++++++++++++---------
5 files changed, 48 insertions(+), 19 deletions(-)
diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 05a94bd32c55..5117e4d2ddfa 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -403,8 +403,13 @@ static inline unsigned long ifname_compare_aligned(const char *_a,
return ret;
}
+struct xt_percpu_counter_alloc_state {
+ unsigned int off;
+ const char __percpu *mem;
+};
-bool xt_percpu_counter_alloc(struct xt_counters *counters);
+bool xt_percpu_counter_alloc(struct xt_percpu_counter_alloc_state *state,
+ struct xt_counters *counter);
void xt_percpu_counter_free(struct xt_counters *cnt);
static inline struct xt_counters *
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 808deb275ceb..1258a9ab62ef 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -411,13 +411,14 @@ static inline int check_target(struct arpt_entry *e, const char *name)
}
static inline int
-find_check_entry(struct arpt_entry *e, const char *name, unsigned int size)
+find_check_entry(struct arpt_entry *e, const char *name, unsigned int size,
+ struct xt_percpu_counter_alloc_state *alloc_state)
{
struct xt_entry_target *t;
struct xt_target *target;
int ret;
- if (!xt_percpu_counter_alloc(&e->counters))
+ if (!xt_percpu_counter_alloc(alloc_state, &e->counters))
return -ENOMEM;
t = arpt_get_target(e);
@@ -525,6 +526,7 @@ static inline void cleanup_entry(struct arpt_entry *e)
static int translate_table(struct xt_table_info *newinfo, void *entry0,
const struct arpt_replace *repl)
{
+ struct xt_percpu_counter_alloc_state alloc_state = { 0 };
struct arpt_entry *iter;
unsigned int *offsets;
unsigned int i;
@@ -587,7 +589,8 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
/* Finally, each sanity check must pass */
i = 0;
xt_entry_foreach(iter, entry0, newinfo->size) {
- ret = find_check_entry(iter, repl->name, repl->size);
+ ret = find_check_entry(iter, repl->name, repl->size,
+ &alloc_state);
if (ret != 0)
break;
++i;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index a48430d3420f..308b456723f0 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -531,7 +531,8 @@ static int check_target(struct ipt_entry *e, struct net *net, const char *name)
static int
find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
- unsigned int size)
+ unsigned int size,
+ struct xt_percpu_counter_alloc_state *alloc_state)
{
struct xt_entry_target *t;
struct xt_target *target;
@@ -540,7 +541,7 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
struct xt_mtchk_param mtpar;
struct xt_entry_match *ematch;
- if (!xt_percpu_counter_alloc(&e->counters))
+ if (!xt_percpu_counter_alloc(alloc_state, &e->counters))
return -ENOMEM;
j = 0;
@@ -676,6 +677,7 @@ static int
translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
const struct ipt_replace *repl)
{
+ struct xt_percpu_counter_alloc_state alloc_state = { 0 };
struct ipt_entry *iter;
unsigned int *offsets;
unsigned int i;
@@ -735,7 +737,8 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
/* Finally, each sanity check must pass */
i = 0;
xt_entry_foreach(iter, entry0, newinfo->size) {
- ret = find_check_entry(iter, net, repl->name, repl->size);
+ ret = find_check_entry(iter, net, repl->name, repl->size,
+ &alloc_state);
if (ret != 0)
break;
++i;
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index a5a92083fd62..d56d8ac09a94 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -562,7 +562,8 @@ static int check_target(struct ip6t_entry *e, struct net *net, const char *name)
static int
find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
- unsigned int size)
+ unsigned int size,
+ struct xt_percpu_counter_alloc_state *alloc_state)
{
struct xt_entry_target *t;
struct xt_target *target;
@@ -571,7 +572,7 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
struct xt_mtchk_param mtpar;
struct xt_entry_match *ematch;
- if (!xt_percpu_counter_alloc(&e->counters))
+ if (!xt_percpu_counter_alloc(alloc_state, &e->counters))
return -ENOMEM;
j = 0;
@@ -705,6 +706,7 @@ static int
translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
const struct ip6t_replace *repl)
{
+ struct xt_percpu_counter_alloc_state alloc_state = { 0 };
struct ip6t_entry *iter;
unsigned int *offsets;
unsigned int i;
@@ -764,7 +766,8 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
/* Finally, each sanity check must pass */
i = 0;
xt_entry_foreach(iter, entry0, newinfo->size) {
- ret = find_check_entry(iter, net, repl->name, repl->size);
+ ret = find_check_entry(iter, net, repl->name, repl->size,
+ &alloc_state);
if (ret != 0)
break;
++i;
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index be5e83047594..f6ce4a7036e6 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -40,6 +40,7 @@ MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
MODULE_DESCRIPTION("{ip,ip6,arp,eb}_tables backend module");
#define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
+#define XT_PCPU_BLOCK_SIZE 4096
struct compat_delta {
unsigned int offset; /* offset in kernel */
@@ -1618,6 +1619,7 @@ EXPORT_SYMBOL_GPL(xt_proto_fini);
/**
* xt_percpu_counter_alloc - allocate x_tables rule counter
*
+ * @state: pointer to xt_percpu allocation state
* @counter: pointer to counter struct inside the ip(6)/arpt_entry struct
*
* On SMP, the packet counter [ ip(6)t_entry->counters.pcnt ] will then
@@ -1626,21 +1628,34 @@ EXPORT_SYMBOL_GPL(xt_proto_fini);
* Rule evaluation needs to use xt_get_this_cpu_counter() helper
* to fetch the real percpu counter.
*
+ * To speed up allocation and improve data locality, a 4kb block is
+ * allocated.
+ *
+ * xt_percpu_counter_alloc_state contains the base address of the
+ * allocated page and the current sub-offset.
+ *
* returns false on error.
*/
-bool xt_percpu_counter_alloc(struct xt_counters *counter)
+bool xt_percpu_counter_alloc(struct xt_percpu_counter_alloc_state *state,
+ struct xt_counters *counter)
{
- void __percpu *res;
+ BUILD_BUG_ON(XT_PCPU_BLOCK_SIZE < (sizeof(*counter) * 2));
if (nr_cpu_ids <= 1)
return true;
- res = __alloc_percpu(sizeof(struct xt_counters),
- sizeof(struct xt_counters));
- if (!res)
- return false;
-
- counter->pcnt = (__force unsigned long)res;
+ if (!state->mem) {
+ state->mem = __alloc_percpu(XT_PCPU_BLOCK_SIZE,
+ XT_PCPU_BLOCK_SIZE);
+ if (!state->mem)
+ return false;
+ }
+ counter->pcnt = (__force unsigned long)(state->mem + state->off);
+ state->off += sizeof(*counter);
+ if (state->off > (XT_PCPU_BLOCK_SIZE - sizeof(*counter))) {
+ state->mem = NULL;
+ state->off = 0;
+ }
return true;
}
EXPORT_SYMBOL_GPL(xt_percpu_counter_alloc);
@@ -1649,7 +1664,7 @@ void xt_percpu_counter_free(struct xt_counters *counters)
{
unsigned long pcnt = counters->pcnt;
- if (nr_cpu_ids > 1)
+ if (nr_cpu_ids > 1 && (pcnt & (XT_PCPU_BLOCK_SIZE - 1)) == 0)
free_percpu((void __percpu *)pcnt);
}
EXPORT_SYMBOL_GPL(xt_percpu_counter_free);
--
2.1.4
^ permalink raw reply related
* [PATCH 23/50] netfilter: x_tables: pass xt_counters struct instead of packet counter
From: Pablo Neira Ayuso @ 2016-12-07 21:52 UTC (permalink / raw)
To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1481147576-5690-1-git-send-email-pablo@netfilter.org>
From: Florian Westphal <fw@strlen.de>
On SMP we overload the packet counter (unsigned long) to contain
percpu offset. Hide this from callers and pass xt_counters address
instead.
Preparation patch to allocate the percpu counters in page-sized batch
chunks.
Signed-off-by: Florian Westphal <fw@strlen.de>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
include/linux/netfilter/x_tables.h | 6 +-----
net/ipv4/netfilter/arp_tables.c | 4 ++--
net/ipv4/netfilter/ip_tables.c | 4 ++--
net/ipv6/netfilter/ip6_tables.c | 5 ++---
net/netfilter/x_tables.c | 9 +++++++++
5 files changed, 16 insertions(+), 12 deletions(-)
diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index cd4eaf8df445..6e61edeb68e3 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -430,11 +430,7 @@ static inline unsigned long xt_percpu_counter_alloc(void)
return 0;
}
-static inline void xt_percpu_counter_free(u64 pcnt)
-{
- if (nr_cpu_ids > 1)
- free_percpu((void __percpu *) (unsigned long) pcnt);
-}
+void xt_percpu_counter_free(struct xt_counters *cnt);
static inline struct xt_counters *
xt_get_this_cpu_counter(struct xt_counters *cnt)
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 848a0704b28f..019f8e8dda6d 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -439,7 +439,7 @@ find_check_entry(struct arpt_entry *e, const char *name, unsigned int size)
err:
module_put(t->u.kernel.target->me);
out:
- xt_percpu_counter_free(e->counters.pcnt);
+ xt_percpu_counter_free(&e->counters);
return ret;
}
@@ -519,7 +519,7 @@ static inline void cleanup_entry(struct arpt_entry *e)
if (par.target->destroy != NULL)
par.target->destroy(&par);
module_put(par.target->me);
- xt_percpu_counter_free(e->counters.pcnt);
+ xt_percpu_counter_free(&e->counters);
}
/* Checks and translates the user-supplied table segment (held in
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 46815c8a60d7..acc9a0c45bdf 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -582,7 +582,7 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
cleanup_match(ematch, net);
}
- xt_percpu_counter_free(e->counters.pcnt);
+ xt_percpu_counter_free(&e->counters);
return ret;
}
@@ -670,7 +670,7 @@ cleanup_entry(struct ipt_entry *e, struct net *net)
if (par.target->destroy != NULL)
par.target->destroy(&par);
module_put(par.target->me);
- xt_percpu_counter_free(e->counters.pcnt);
+ xt_percpu_counter_free(&e->counters);
}
/* Checks and translates the user-supplied table segment (held in
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 6ff42b8301cc..88b56a98905b 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -612,7 +612,7 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
cleanup_match(ematch, net);
}
- xt_percpu_counter_free(e->counters.pcnt);
+ xt_percpu_counter_free(&e->counters);
return ret;
}
@@ -699,8 +699,7 @@ static void cleanup_entry(struct ip6t_entry *e, struct net *net)
if (par.target->destroy != NULL)
par.target->destroy(&par);
module_put(par.target->me);
-
- xt_percpu_counter_free(e->counters.pcnt);
+ xt_percpu_counter_free(&e->counters);
}
/* Checks and translates the user-supplied table segment (held in
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index ad818e52859b..0580029eb0ee 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1615,6 +1615,15 @@ void xt_proto_fini(struct net *net, u_int8_t af)
}
EXPORT_SYMBOL_GPL(xt_proto_fini);
+void xt_percpu_counter_free(struct xt_counters *counters)
+{
+ unsigned long pcnt = counters->pcnt;
+
+ if (nr_cpu_ids > 1)
+ free_percpu((void __percpu *)pcnt);
+}
+EXPORT_SYMBOL_GPL(xt_percpu_counter_free);
+
static int __net_init xt_net_init(struct net *net)
{
int i;
--
2.1.4
^ permalink raw reply related
* [PATCH 15/50] netfilter: nat: add dependencies on conntrack module
From: Pablo Neira Ayuso @ 2016-12-07 21:52 UTC (permalink / raw)
To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1481147576-5690-1-git-send-email-pablo@netfilter.org>
From: Florian Westphal <fw@strlen.de>
MASQUERADE, S/DNAT and REDIRECT already call functions that depend on the
conntrack module.
However, since the conntrack hooks are now registered in a lazy fashion
(i.e., only when needed) a symbol reference is not enough.
Thus, when something is added to a nat table, make sure that it will see
packets by calling nf_ct_netns_get() which will register the conntrack
hooks in the current netns.
An alternative would be to add these dependencies to the NAT table.
However, that has problems when using non-modular builds -- we might
register e.g. ipv6 conntrack before its initcall has run, leading to NULL
deref crashes since its per-netns storage has not yet been allocated.
Adding the dependency in the modules instead has the advantage that nat
table also does not register its hooks until rules are added.
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
net/ipv4/netfilter/ipt_MASQUERADE.c | 8 +++++++-
net/netfilter/xt_NETMAP.c | 11 +++++++++--
net/netfilter/xt_REDIRECT.c | 12 ++++++++++--
net/netfilter/xt_nat.c | 18 +++++++++++++++++-
4 files changed, 43 insertions(+), 6 deletions(-)
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index 34cfb9b0bc0a..a03e4e7ef5f9 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -41,7 +41,7 @@ static int masquerade_tg_check(const struct xt_tgchk_param *par)
pr_debug("bad rangesize %u\n", mr->rangesize);
return -EINVAL;
}
- return 0;
+ return nf_ct_netns_get(par->net, par->family);
}
static unsigned int
@@ -59,6 +59,11 @@ masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par)
xt_out(par));
}
+static void masquerade_tg_destroy(const struct xt_tgdtor_param *par)
+{
+ nf_ct_netns_put(par->net, par->family);
+}
+
static struct xt_target masquerade_tg_reg __read_mostly = {
.name = "MASQUERADE",
.family = NFPROTO_IPV4,
@@ -67,6 +72,7 @@ static struct xt_target masquerade_tg_reg __read_mostly = {
.table = "nat",
.hooks = 1 << NF_INET_POST_ROUTING,
.checkentry = masquerade_tg_check,
+ .destroy = masquerade_tg_destroy,
.me = THIS_MODULE,
};
diff --git a/net/netfilter/xt_NETMAP.c b/net/netfilter/xt_NETMAP.c
index 94d0b5411192..e45a01255e70 100644
--- a/net/netfilter/xt_NETMAP.c
+++ b/net/netfilter/xt_NETMAP.c
@@ -60,7 +60,12 @@ static int netmap_tg6_checkentry(const struct xt_tgchk_param *par)
if (!(range->flags & NF_NAT_RANGE_MAP_IPS))
return -EINVAL;
- return 0;
+ return nf_ct_netns_get(par->net, par->family);
+}
+
+static void netmap_tg_destroy(const struct xt_tgdtor_param *par)
+{
+ nf_ct_netns_put(par->net, par->family);
}
static unsigned int
@@ -111,7 +116,7 @@ static int netmap_tg4_check(const struct xt_tgchk_param *par)
pr_debug("bad rangesize %u.\n", mr->rangesize);
return -EINVAL;
}
- return 0;
+ return nf_ct_netns_get(par->net, par->family);
}
static struct xt_target netmap_tg_reg[] __read_mostly = {
@@ -127,6 +132,7 @@ static struct xt_target netmap_tg_reg[] __read_mostly = {
(1 << NF_INET_LOCAL_OUT) |
(1 << NF_INET_LOCAL_IN),
.checkentry = netmap_tg6_checkentry,
+ .destroy = netmap_tg_destroy,
.me = THIS_MODULE,
},
{
@@ -141,6 +147,7 @@ static struct xt_target netmap_tg_reg[] __read_mostly = {
(1 << NF_INET_LOCAL_OUT) |
(1 << NF_INET_LOCAL_IN),
.checkentry = netmap_tg4_check,
+ .destroy = netmap_tg_destroy,
.me = THIS_MODULE,
},
};
diff --git a/net/netfilter/xt_REDIRECT.c b/net/netfilter/xt_REDIRECT.c
index 651dce65a30b..98a4c6d4f1cb 100644
--- a/net/netfilter/xt_REDIRECT.c
+++ b/net/netfilter/xt_REDIRECT.c
@@ -40,7 +40,13 @@ static int redirect_tg6_checkentry(const struct xt_tgchk_param *par)
if (range->flags & NF_NAT_RANGE_MAP_IPS)
return -EINVAL;
- return 0;
+
+ return nf_ct_netns_get(par->net, par->family);
+}
+
+static void redirect_tg_destroy(const struct xt_tgdtor_param *par)
+{
+ nf_ct_netns_put(par->net, par->family);
}
/* FIXME: Take multiple ranges --RR */
@@ -56,7 +62,7 @@ static int redirect_tg4_check(const struct xt_tgchk_param *par)
pr_debug("bad rangesize %u.\n", mr->rangesize);
return -EINVAL;
}
- return 0;
+ return nf_ct_netns_get(par->net, par->family);
}
static unsigned int
@@ -72,6 +78,7 @@ static struct xt_target redirect_tg_reg[] __read_mostly = {
.revision = 0,
.table = "nat",
.checkentry = redirect_tg6_checkentry,
+ .destroy = redirect_tg_destroy,
.target = redirect_tg6,
.targetsize = sizeof(struct nf_nat_range),
.hooks = (1 << NF_INET_PRE_ROUTING) |
@@ -85,6 +92,7 @@ static struct xt_target redirect_tg_reg[] __read_mostly = {
.table = "nat",
.target = redirect_tg4,
.checkentry = redirect_tg4_check,
+ .destroy = redirect_tg_destroy,
.targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat),
.hooks = (1 << NF_INET_PRE_ROUTING) |
(1 << NF_INET_LOCAL_OUT),
diff --git a/net/netfilter/xt_nat.c b/net/netfilter/xt_nat.c
index bea7464cc43f..8107b3eb865f 100644
--- a/net/netfilter/xt_nat.c
+++ b/net/netfilter/xt_nat.c
@@ -23,7 +23,17 @@ static int xt_nat_checkentry_v0(const struct xt_tgchk_param *par)
par->target->name);
return -EINVAL;
}
- return 0;
+ return nf_ct_netns_get(par->net, par->family);
+}
+
+static int xt_nat_checkentry(const struct xt_tgchk_param *par)
+{
+ return nf_ct_netns_get(par->net, par->family);
+}
+
+static void xt_nat_destroy(const struct xt_tgdtor_param *par)
+{
+ nf_ct_netns_put(par->net, par->family);
}
static void xt_nat_convert_range(struct nf_nat_range *dst,
@@ -106,6 +116,7 @@ static struct xt_target xt_nat_target_reg[] __read_mostly = {
.name = "SNAT",
.revision = 0,
.checkentry = xt_nat_checkentry_v0,
+ .destroy = xt_nat_destroy,
.target = xt_snat_target_v0,
.targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat),
.family = NFPROTO_IPV4,
@@ -118,6 +129,7 @@ static struct xt_target xt_nat_target_reg[] __read_mostly = {
.name = "DNAT",
.revision = 0,
.checkentry = xt_nat_checkentry_v0,
+ .destroy = xt_nat_destroy,
.target = xt_dnat_target_v0,
.targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat),
.family = NFPROTO_IPV4,
@@ -129,6 +141,8 @@ static struct xt_target xt_nat_target_reg[] __read_mostly = {
{
.name = "SNAT",
.revision = 1,
+ .checkentry = xt_nat_checkentry,
+ .destroy = xt_nat_destroy,
.target = xt_snat_target_v1,
.targetsize = sizeof(struct nf_nat_range),
.table = "nat",
@@ -139,6 +153,8 @@ static struct xt_target xt_nat_target_reg[] __read_mostly = {
{
.name = "DNAT",
.revision = 1,
+ .checkentry = xt_nat_checkentry,
+ .destroy = xt_nat_destroy,
.target = xt_dnat_target_v1,
.targetsize = sizeof(struct nf_nat_range),
.table = "nat",
--
2.1.4
^ permalink raw reply related
* [PATCH 11/50] netfilter: conntrack: built-in support for SCTP
From: Pablo Neira Ayuso @ 2016-12-07 21:52 UTC (permalink / raw)
To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1481147576-5690-1-git-send-email-pablo@netfilter.org>
From: Davide Caratti <dcaratti@redhat.com>
CONFIG_NF_CT_PROTO_SCTP is no more a tristate. When set to y, connection
tracking support for SCTP protocol is built-in into nf_conntrack.ko.
footprint test:
$ ls -l net/netfilter/nf_conntrack{_proto_sctp,}.ko \
net/ipv4/netfilter/nf_conntrack_ipv4.ko \
net/ipv6/netfilter/nf_conntrack_ipv6.ko
(builtin)|| sctp | ipv4 | ipv6 | nf_conntrack
---------++--------+--------+--------+--------------
none || 498243 | 828755 | 828676 | 6141434
SCTP || - | 829254 | 829175 | 6547872
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
include/net/netfilter/ipv4/nf_conntrack_ipv4.h | 3 +
include/net/netfilter/ipv6/nf_conntrack_ipv6.h | 3 +
include/net/netns/conntrack.h | 13 +++++
net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 3 +
net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 3 +
net/netfilter/Kconfig | 7 +--
net/netfilter/Makefile | 2 +-
net/netfilter/nf_conntrack_proto_sctp.c | 76 +++-----------------------
8 files changed, 38 insertions(+), 72 deletions(-)
diff --git a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h
index c2f155fd9299..5f1fc15a51fb 100644
--- a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h
+++ b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h
@@ -18,6 +18,9 @@ extern struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp;
#ifdef CONFIG_NF_CT_PROTO_DCCP
extern struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4;
#endif
+#ifdef CONFIG_NF_CT_PROTO_SCTP
+extern struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4;
+#endif
int nf_conntrack_ipv4_compat_init(void);
void nf_conntrack_ipv4_compat_fini(void);
diff --git a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
index 5ec66c0d21c4..f70d191a8820 100644
--- a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
+++ b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
@@ -9,6 +9,9 @@ extern struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6;
#ifdef CONFIG_NF_CT_PROTO_DCCP
extern struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6;
#endif
+#ifdef CONFIG_NF_CT_PROTO_SCTP
+extern struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6;
+#endif
#include <linux/sysctl.h>
extern struct ctl_table nf_ct_ipv6_sysctl_table[];
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index 440b781baf0b..17724c62de97 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -9,6 +9,9 @@
#ifdef CONFIG_NF_CT_PROTO_DCCP
#include <linux/netfilter/nf_conntrack_dccp.h>
#endif
+#ifdef CONFIG_NF_CT_PROTO_SCTP
+#include <linux/netfilter/nf_conntrack_sctp.h>
+#endif
#include <linux/seqlock.h>
struct ctl_table_header;
@@ -59,6 +62,13 @@ struct nf_dccp_net {
};
#endif
+#ifdef CONFIG_NF_CT_PROTO_SCTP
+struct nf_sctp_net {
+ struct nf_proto_net pn;
+ unsigned int timeouts[SCTP_CONNTRACK_MAX];
+};
+#endif
+
struct nf_ip_net {
struct nf_generic_net generic;
struct nf_tcp_net tcp;
@@ -68,6 +78,9 @@ struct nf_ip_net {
#ifdef CONFIG_NF_CT_PROTO_DCCP
struct nf_dccp_net dccp;
#endif
+#ifdef CONFIG_NF_CT_PROTO_SCTP
+ struct nf_sctp_net sctp;
+#endif
};
struct ct_pcpu {
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index cb3cf770b00c..0a9d354ef314 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -343,6 +343,9 @@ static struct nf_conntrack_l4proto *builtin_l4proto4[] = {
#ifdef CONFIG_NF_CT_PROTO_DCCP
&nf_conntrack_l4proto_dccp4,
#endif
+#ifdef CONFIG_NF_CT_PROTO_SCTP
+ &nf_conntrack_l4proto_sctp4,
+#endif
};
static int ipv4_net_init(struct net *net)
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index f52338d02951..1d8daafb1685 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -343,6 +343,9 @@ static struct nf_conntrack_l4proto *builtin_l4proto6[] = {
#ifdef CONFIG_NF_CT_PROTO_DCCP
&nf_conntrack_l4proto_dccp6,
#endif
+#ifdef CONFIG_NF_CT_PROTO_SCTP
+ &nf_conntrack_l4proto_sctp6,
+#endif
};
static int ipv6_net_init(struct net *net)
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 27a3d8c8f8ce..29c0bf0a315d 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -159,15 +159,14 @@ config NF_CT_PROTO_GRE
tristate
config NF_CT_PROTO_SCTP
- tristate 'SCTP protocol connection tracking support'
+ bool 'SCTP protocol connection tracking support'
depends on NETFILTER_ADVANCED
- default IP_SCTP
+ default y
help
With this option enabled, the layer 3 independent connection
tracking code will be able to do state tracking on SCTP connections.
- If you want to compile it as a module, say M here and read
- <file:Documentation/kbuild/modules.txt>. If unsure, say `N'.
+ If unsure, say Y.
config NF_CT_PROTO_UDPLITE
tristate 'UDP-Lite protocol connection tracking support'
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index bbd0cc08eff0..6545c28ab746 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -6,6 +6,7 @@ nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMESTAMP) += nf_conntrack_timestamp.o
nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o
nf_conntrack-$(CONFIG_NF_CONNTRACK_LABELS) += nf_conntrack_labels.o
nf_conntrack-$(CONFIG_NF_CT_PROTO_DCCP) += nf_conntrack_proto_dccp.o
+nf_conntrack-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o
obj-$(CONFIG_NETFILTER) = netfilter.o
@@ -18,7 +19,6 @@ obj-$(CONFIG_NETFILTER_NETLINK_LOG) += nfnetlink_log.o
obj-$(CONFIG_NF_CONNTRACK) += nf_conntrack.o
obj-$(CONFIG_NF_CT_PROTO_GRE) += nf_conntrack_proto_gre.o
-obj-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o
obj-$(CONFIG_NF_CT_PROTO_UDPLITE) += nf_conntrack_proto_udplite.o
# netlink interface for nf_conntrack
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index d096c2d6b87b..a0efde38da44 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -15,7 +15,6 @@
#include <linux/types.h>
#include <linux/timer.h>
#include <linux/netfilter.h>
-#include <linux/module.h>
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/sctp.h>
@@ -144,15 +143,9 @@ static const u8 sctp_conntracks[2][11][SCTP_CONNTRACK_MAX] = {
}
};
-static unsigned int sctp_net_id __read_mostly;
-struct sctp_net {
- struct nf_proto_net pn;
- unsigned int timeouts[SCTP_CONNTRACK_MAX];
-};
-
-static inline struct sctp_net *sctp_pernet(struct net *net)
+static inline struct nf_sctp_net *sctp_pernet(struct net *net)
{
- return net_generic(net, sctp_net_id);
+ return &net->ct.nf_ct_proto.sctp;
}
static bool sctp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
@@ -600,7 +593,7 @@ static int sctp_timeout_nlattr_to_obj(struct nlattr *tb[],
struct net *net, void *data)
{
unsigned int *timeouts = data;
- struct sctp_net *sn = sctp_pernet(net);
+ struct nf_sctp_net *sn = sctp_pernet(net);
int i;
/* set default SCTP timeouts. */
@@ -708,7 +701,7 @@ static struct ctl_table sctp_sysctl_table[] = {
#endif
static int sctp_kmemdup_sysctl_table(struct nf_proto_net *pn,
- struct sctp_net *sn)
+ struct nf_sctp_net *sn)
{
#ifdef CONFIG_SYSCTL
if (pn->ctl_table)
@@ -735,7 +728,7 @@ static int sctp_kmemdup_sysctl_table(struct nf_proto_net *pn,
static int sctp_init_net(struct net *net, u_int16_t proto)
{
- struct sctp_net *sn = sctp_pernet(net);
+ struct nf_sctp_net *sn = sctp_pernet(net);
struct nf_proto_net *pn = &sn->pn;
if (!pn->users) {
@@ -748,7 +741,7 @@ static int sctp_init_net(struct net *net, u_int16_t proto)
return sctp_kmemdup_sysctl_table(pn, sn);
}
-static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = {
+struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = {
.l3proto = PF_INET,
.l4proto = IPPROTO_SCTP,
.name = "sctp",
@@ -778,11 +771,11 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = {
.nla_policy = sctp_timeout_nla_policy,
},
#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
- .net_id = &sctp_net_id,
.init_net = sctp_init_net,
};
+EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_sctp4);
-static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = {
+struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = {
.l3proto = PF_INET6,
.l4proto = IPPROTO_SCTP,
.name = "sctp",
@@ -812,57 +805,6 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = {
},
#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
#endif
- .net_id = &sctp_net_id,
.init_net = sctp_init_net,
};
-
-static struct nf_conntrack_l4proto *sctp_proto[] = {
- &nf_conntrack_l4proto_sctp4,
- &nf_conntrack_l4proto_sctp6,
-};
-
-static int sctp_net_init(struct net *net)
-{
- return nf_ct_l4proto_pernet_register(net, sctp_proto,
- ARRAY_SIZE(sctp_proto));
-}
-
-static void sctp_net_exit(struct net *net)
-{
- nf_ct_l4proto_pernet_unregister(net, sctp_proto,
- ARRAY_SIZE(sctp_proto));
-}
-
-static struct pernet_operations sctp_net_ops = {
- .init = sctp_net_init,
- .exit = sctp_net_exit,
- .id = &sctp_net_id,
- .size = sizeof(struct sctp_net),
-};
-
-static int __init nf_conntrack_proto_sctp_init(void)
-{
- int ret;
-
- ret = register_pernet_subsys(&sctp_net_ops);
- if (ret < 0)
- return ret;
- ret = nf_ct_l4proto_register(sctp_proto, ARRAY_SIZE(sctp_proto));
- if (ret < 0)
- unregister_pernet_subsys(&sctp_net_ops);
- return ret;
-}
-
-static void __exit nf_conntrack_proto_sctp_fini(void)
-{
- nf_ct_l4proto_unregister(sctp_proto, ARRAY_SIZE(sctp_proto));
- unregister_pernet_subsys(&sctp_net_ops);
-}
-
-module_init(nf_conntrack_proto_sctp_init);
-module_exit(nf_conntrack_proto_sctp_fini);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Kiran Kumar Immidi");
-MODULE_DESCRIPTION("Netfilter connection tracking protocol helper for SCTP");
-MODULE_ALIAS("ip_conntrack_proto_sctp");
+EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_sctp6);
--
2.1.4
^ permalink raw reply related
* [PATCH 10/50] netfilter: conntrack: built-in support for DCCP
From: Pablo Neira Ayuso @ 2016-12-07 21:52 UTC (permalink / raw)
To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1481147576-5690-1-git-send-email-pablo@netfilter.org>
From: Davide Caratti <dcaratti@redhat.com>
CONFIG_NF_CT_PROTO_DCCP is no more a tristate. When set to y, connection
tracking support for DCCP protocol is built-in into nf_conntrack.ko.
footprint test:
$ ls -l net/netfilter/nf_conntrack{_proto_dccp,}.ko \
net/ipv4/netfilter/nf_conntrack_ipv4.ko \
net/ipv6/netfilter/nf_conntrack_ipv6.ko
(builtin)|| dccp | ipv4 | ipv6 | nf_conntrack
---------++--------+--------+--------+--------------
none || 469140 | 828755 | 828676 | 6141434
DCCP || - | 830566 | 829935 | 6533526
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
include/linux/netfilter/nf_conntrack_dccp.h | 2 +-
include/net/netfilter/ipv4/nf_conntrack_ipv4.h | 3 +
include/net/netfilter/ipv6/nf_conntrack_ipv6.h | 3 +
include/net/netns/conntrack.h | 14 +++++
net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 3 +
net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 3 +
net/netfilter/Kconfig | 6 +-
net/netfilter/Makefile | 3 +-
net/netfilter/nf_conntrack_proto_dccp.c | 79 ++++----------------------
9 files changed, 41 insertions(+), 75 deletions(-)
diff --git a/include/linux/netfilter/nf_conntrack_dccp.h b/include/linux/netfilter/nf_conntrack_dccp.h
index 40dcc82058d1..ff721d7325cf 100644
--- a/include/linux/netfilter/nf_conntrack_dccp.h
+++ b/include/linux/netfilter/nf_conntrack_dccp.h
@@ -25,7 +25,7 @@ enum ct_dccp_roles {
#define CT_DCCP_ROLE_MAX (__CT_DCCP_ROLE_MAX - 1)
#ifdef __KERNEL__
-#include <net/netfilter/nf_conntrack_tuple.h>
+#include <linux/netfilter/nf_conntrack_tuple_common.h>
struct nf_ct_dccp {
u_int8_t role[IP_CT_DIR_MAX];
diff --git a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h
index 981c327374da..c2f155fd9299 100644
--- a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h
+++ b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h
@@ -15,6 +15,9 @@ extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4;
extern struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4;
extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4;
extern struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp;
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+extern struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4;
+#endif
int nf_conntrack_ipv4_compat_init(void);
void nf_conntrack_ipv4_compat_fini(void);
diff --git a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
index a4c993685795..5ec66c0d21c4 100644
--- a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
+++ b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
@@ -6,6 +6,9 @@ extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6;
extern struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6;
extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6;
extern struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6;
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+extern struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6;
+#endif
#include <linux/sysctl.h>
extern struct ctl_table nf_ct_ipv6_sysctl_table[];
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index 3d06d94d2e52..440b781baf0b 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -6,6 +6,9 @@
#include <linux/atomic.h>
#include <linux/workqueue.h>
#include <linux/netfilter/nf_conntrack_tcp.h>
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+#include <linux/netfilter/nf_conntrack_dccp.h>
+#endif
#include <linux/seqlock.h>
struct ctl_table_header;
@@ -48,12 +51,23 @@ struct nf_icmp_net {
unsigned int timeout;
};
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+struct nf_dccp_net {
+ struct nf_proto_net pn;
+ int dccp_loose;
+ unsigned int dccp_timeout[CT_DCCP_MAX + 1];
+};
+#endif
+
struct nf_ip_net {
struct nf_generic_net generic;
struct nf_tcp_net tcp;
struct nf_udp_net udp;
struct nf_icmp_net icmp;
struct nf_icmp_net icmpv6;
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+ struct nf_dccp_net dccp;
+#endif
};
struct ct_pcpu {
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 7130ed5dc1fa..cb3cf770b00c 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -340,6 +340,9 @@ static struct nf_conntrack_l4proto *builtin_l4proto4[] = {
&nf_conntrack_l4proto_tcp4,
&nf_conntrack_l4proto_udp4,
&nf_conntrack_l4proto_icmp,
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+ &nf_conntrack_l4proto_dccp4,
+#endif
};
static int ipv4_net_init(struct net *net)
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 500be28ff563..f52338d02951 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -340,6 +340,9 @@ static struct nf_conntrack_l4proto *builtin_l4proto6[] = {
&nf_conntrack_l4proto_tcp6,
&nf_conntrack_l4proto_udp6,
&nf_conntrack_l4proto_icmpv6,
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+ &nf_conntrack_l4proto_dccp6,
+#endif
};
static int ipv6_net_init(struct net *net)
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 496e1dcbd003..27a3d8c8f8ce 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -146,14 +146,14 @@ config NF_CONNTRACK_LABELS
to connection tracking entries. It selected by the connlabel match.
config NF_CT_PROTO_DCCP
- tristate 'DCCP protocol connection tracking support'
+ bool 'DCCP protocol connection tracking support'
depends on NETFILTER_ADVANCED
- default IP_DCCP
+ default y
help
With this option enabled, the layer 3 independent connection
tracking code will be able to do state tracking on DCCP connections.
- If unsure, say 'N'.
+ If unsure, say Y.
config NF_CT_PROTO_GRE
tristate
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 3b97d89df2cd..bbd0cc08eff0 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -5,6 +5,7 @@ nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMEOUT) += nf_conntrack_timeout.o
nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMESTAMP) += nf_conntrack_timestamp.o
nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o
nf_conntrack-$(CONFIG_NF_CONNTRACK_LABELS) += nf_conntrack_labels.o
+nf_conntrack-$(CONFIG_NF_CT_PROTO_DCCP) += nf_conntrack_proto_dccp.o
obj-$(CONFIG_NETFILTER) = netfilter.o
@@ -16,8 +17,6 @@ obj-$(CONFIG_NETFILTER_NETLINK_LOG) += nfnetlink_log.o
# connection tracking
obj-$(CONFIG_NF_CONNTRACK) += nf_conntrack.o
-# SCTP protocol connection tracking
-obj-$(CONFIG_NF_CT_PROTO_DCCP) += nf_conntrack_proto_dccp.o
obj-$(CONFIG_NF_CT_PROTO_GRE) += nf_conntrack_proto_gre.o
obj-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o
obj-$(CONFIG_NF_CT_PROTO_UDPLITE) += nf_conntrack_proto_udplite.o
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index 073b047314dc..b68ce6ac13b3 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -9,7 +9,6 @@
*
*/
#include <linux/kernel.h>
-#include <linux/module.h>
#include <linux/init.h>
#include <linux/sysctl.h>
#include <linux/spinlock.h>
@@ -384,17 +383,9 @@ dccp_state_table[CT_DCCP_ROLE_MAX + 1][DCCP_PKT_SYNCACK + 1][CT_DCCP_MAX + 1] =
},
};
-/* this module per-net specifics */
-static unsigned int dccp_net_id __read_mostly;
-struct dccp_net {
- struct nf_proto_net pn;
- int dccp_loose;
- unsigned int dccp_timeout[CT_DCCP_MAX + 1];
-};
-
-static inline struct dccp_net *dccp_pernet(struct net *net)
+static inline struct nf_dccp_net *dccp_pernet(struct net *net)
{
- return net_generic(net, dccp_net_id);
+ return &net->ct.nf_ct_proto.dccp;
}
static bool dccp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
@@ -424,7 +415,7 @@ static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
unsigned int dataoff, unsigned int *timeouts)
{
struct net *net = nf_ct_net(ct);
- struct dccp_net *dn;
+ struct nf_dccp_net *dn;
struct dccp_hdr _dh, *dh;
const char *msg;
u_int8_t state;
@@ -719,7 +710,7 @@ static int dccp_nlattr_size(void)
static int dccp_timeout_nlattr_to_obj(struct nlattr *tb[],
struct net *net, void *data)
{
- struct dccp_net *dn = dccp_pernet(net);
+ struct nf_dccp_net *dn = dccp_pernet(net);
unsigned int *timeouts = data;
int i;
@@ -820,7 +811,7 @@ static struct ctl_table dccp_sysctl_table[] = {
#endif /* CONFIG_SYSCTL */
static int dccp_kmemdup_sysctl_table(struct net *net, struct nf_proto_net *pn,
- struct dccp_net *dn)
+ struct nf_dccp_net *dn)
{
#ifdef CONFIG_SYSCTL
if (pn->ctl_table)
@@ -850,7 +841,7 @@ static int dccp_kmemdup_sysctl_table(struct net *net, struct nf_proto_net *pn,
static int dccp_init_net(struct net *net, u_int16_t proto)
{
- struct dccp_net *dn = dccp_pernet(net);
+ struct nf_dccp_net *dn = dccp_pernet(net);
struct nf_proto_net *pn = &dn->pn;
if (!pn->users) {
@@ -868,7 +859,7 @@ static int dccp_init_net(struct net *net, u_int16_t proto)
return dccp_kmemdup_sysctl_table(net, pn, dn);
}
-static struct nf_conntrack_l4proto dccp_proto4 __read_mostly = {
+struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4 __read_mostly = {
.l3proto = AF_INET,
.l4proto = IPPROTO_DCCP,
.name = "dccp",
@@ -898,11 +889,11 @@ static struct nf_conntrack_l4proto dccp_proto4 __read_mostly = {
.nla_policy = dccp_timeout_nla_policy,
},
#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
- .net_id = &dccp_net_id,
.init_net = dccp_init_net,
};
+EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_dccp4);
-static struct nf_conntrack_l4proto dccp_proto6 __read_mostly = {
+struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6 __read_mostly = {
.l3proto = AF_INET6,
.l4proto = IPPROTO_DCCP,
.name = "dccp",
@@ -932,56 +923,6 @@ static struct nf_conntrack_l4proto dccp_proto6 __read_mostly = {
.nla_policy = dccp_timeout_nla_policy,
},
#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
- .net_id = &dccp_net_id,
.init_net = dccp_init_net,
};
-
-static struct nf_conntrack_l4proto *dccp_proto[] = {
- &dccp_proto4,
- &dccp_proto6,
-};
-
-static __net_init int dccp_net_init(struct net *net)
-{
- return nf_ct_l4proto_pernet_register(net, dccp_proto,
- ARRAY_SIZE(dccp_proto));
-}
-
-static __net_exit void dccp_net_exit(struct net *net)
-{
- nf_ct_l4proto_pernet_unregister(net, dccp_proto,
- ARRAY_SIZE(dccp_proto));
-}
-
-static struct pernet_operations dccp_net_ops = {
- .init = dccp_net_init,
- .exit = dccp_net_exit,
- .id = &dccp_net_id,
- .size = sizeof(struct dccp_net),
-};
-
-static int __init nf_conntrack_proto_dccp_init(void)
-{
- int ret;
-
- ret = register_pernet_subsys(&dccp_net_ops);
- if (ret < 0)
- return ret;
- ret = nf_ct_l4proto_register(dccp_proto, ARRAY_SIZE(dccp_proto));
- if (ret < 0)
- unregister_pernet_subsys(&dccp_net_ops);
- return ret;
-}
-
-static void __exit nf_conntrack_proto_dccp_fini(void)
-{
- nf_ct_l4proto_unregister(dccp_proto, ARRAY_SIZE(dccp_proto));
- unregister_pernet_subsys(&dccp_net_ops);
-}
-
-module_init(nf_conntrack_proto_dccp_init);
-module_exit(nf_conntrack_proto_dccp_fini);
-
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_DESCRIPTION("DCCP connection tracking protocol helper");
-MODULE_LICENSE("GPL");
+EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_dccp6);
--
2.1.4
^ permalink raw reply related
* [PATCH 05/50] netfilter: built-in NAT support for SCTP
From: Pablo Neira Ayuso @ 2016-12-07 21:52 UTC (permalink / raw)
To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1481147576-5690-1-git-send-email-pablo@netfilter.org>
From: Davide Caratti <dcaratti@redhat.com>
CONFIG_NF_NAT_PROTO_SCTP is no more a tristate. When set to y, NAT
support for SCTP protocol is built-in into nf_nat.ko.
footprint test:
(nf_nat_proto_) | sctp || nf_nat
--------------------------+--------++--------
no builtin | 428344 || 2241312
SCTP builtin | - || 2597032
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
include/net/netfilter/nf_nat_l4proto.h | 3 +++
net/netfilter/Kconfig | 2 +-
net/netfilter/Makefile | 2 +-
net/netfilter/nf_nat_core.c | 4 ++++
net/netfilter/nf_nat_proto_sctp.c | 35 +---------------------------------
5 files changed, 10 insertions(+), 36 deletions(-)
diff --git a/include/net/netfilter/nf_nat_l4proto.h b/include/net/netfilter/nf_nat_l4proto.h
index 92b147be00ef..2cbaf3856e21 100644
--- a/include/net/netfilter/nf_nat_l4proto.h
+++ b/include/net/netfilter/nf_nat_l4proto.h
@@ -57,6 +57,9 @@ extern const struct nf_nat_l4proto nf_nat_l4proto_unknown;
#ifdef CONFIG_NF_NAT_PROTO_DCCP
extern const struct nf_nat_l4proto nf_nat_l4proto_dccp;
#endif
+#ifdef CONFIG_NF_NAT_PROTO_SCTP
+extern const struct nf_nat_l4proto nf_nat_l4proto_sctp;
+#endif
bool nf_nat_l4proto_in_range(const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype,
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 13092e5cd245..ad72edf1f6ec 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -394,7 +394,7 @@ config NF_NAT_PROTO_UDPLITE
default NF_NAT && NF_CT_PROTO_UDPLITE
config NF_NAT_PROTO_SCTP
- tristate
+ bool
default NF_NAT && NF_CT_PROTO_SCTP
depends on NF_NAT && NF_CT_PROTO_SCTP
select LIBCRC32C
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 9ea0c98e51e6..02ef6decf94d 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -46,6 +46,7 @@ nf_nat-y := nf_nat_core.o nf_nat_proto_unknown.o nf_nat_proto_common.o \
nf_nat_proto_udp.o nf_nat_proto_tcp.o nf_nat_helper.o
nf_nat-$(CONFIG_NF_NAT_PROTO_DCCP) += nf_nat_proto_dccp.o
+nf_nat-$(CONFIG_NF_NAT_PROTO_SCTP) += nf_nat_proto_sctp.o
# generic transport layer logging
obj-$(CONFIG_NF_LOG_COMMON) += nf_log_common.o
@@ -58,7 +59,6 @@ obj-$(CONFIG_NF_NAT_REDIRECT) += nf_nat_redirect.o
# NAT protocols (nf_nat)
obj-$(CONFIG_NF_NAT_PROTO_UDPLITE) += nf_nat_proto_udplite.o
-obj-$(CONFIG_NF_NAT_PROTO_SCTP) += nf_nat_proto_sctp.o
# NAT helpers
obj-$(CONFIG_NF_NAT_AMANDA) += nf_nat_amanda.o
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 69b121d11275..80858bd110cc 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -686,6 +686,10 @@ int nf_nat_l3proto_register(const struct nf_nat_l3proto *l3proto)
RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_DCCP],
&nf_nat_l4proto_dccp);
#endif
+#ifdef CONFIG_NF_NAT_PROTO_SCTP
+ RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_SCTP],
+ &nf_nat_l4proto_sctp);
+#endif
mutex_unlock(&nf_nat_proto_mutex);
RCU_INIT_POINTER(nf_nat_l3protos[l3proto->l3proto], l3proto);
diff --git a/net/netfilter/nf_nat_proto_sctp.c b/net/netfilter/nf_nat_proto_sctp.c
index cbc7ade1487b..2e14108ff697 100644
--- a/net/netfilter/nf_nat_proto_sctp.c
+++ b/net/netfilter/nf_nat_proto_sctp.c
@@ -7,9 +7,7 @@
*/
#include <linux/types.h>
-#include <linux/init.h>
#include <linux/sctp.h>
-#include <linux/module.h>
#include <net/sctp/checksum.h>
#include <net/netfilter/nf_nat_l4proto.h>
@@ -54,7 +52,7 @@ sctp_manip_pkt(struct sk_buff *skb,
return true;
}
-static const struct nf_nat_l4proto nf_nat_l4proto_sctp = {
+const struct nf_nat_l4proto nf_nat_l4proto_sctp = {
.l4proto = IPPROTO_SCTP,
.manip_pkt = sctp_manip_pkt,
.in_range = nf_nat_l4proto_in_range,
@@ -63,34 +61,3 @@ static const struct nf_nat_l4proto nf_nat_l4proto_sctp = {
.nlattr_to_range = nf_nat_l4proto_nlattr_to_range,
#endif
};
-
-static int __init nf_nat_proto_sctp_init(void)
-{
- int err;
-
- err = nf_nat_l4proto_register(NFPROTO_IPV4, &nf_nat_l4proto_sctp);
- if (err < 0)
- goto err1;
- err = nf_nat_l4proto_register(NFPROTO_IPV6, &nf_nat_l4proto_sctp);
- if (err < 0)
- goto err2;
- return 0;
-
-err2:
- nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_sctp);
-err1:
- return err;
-}
-
-static void __exit nf_nat_proto_sctp_exit(void)
-{
- nf_nat_l4proto_unregister(NFPROTO_IPV6, &nf_nat_l4proto_sctp);
- nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_sctp);
-}
-
-module_init(nf_nat_proto_sctp_init);
-module_exit(nf_nat_proto_sctp_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("SCTP NAT protocol helper");
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
--
2.1.4
^ permalink raw reply related
* [PATCH 04/50] netfilter: built-in NAT support for DCCP
From: Pablo Neira Ayuso @ 2016-12-07 21:52 UTC (permalink / raw)
To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1481147576-5690-1-git-send-email-pablo@netfilter.org>
From: Davide Caratti <dcaratti@redhat.com>
CONFIG_NF_NAT_PROTO_DCCP is no more a tristate. When set to y, NAT
support for DCCP protocol is built-in into nf_nat.ko.
footprint test:
(nf_nat_proto_) | dccp || nf_nat
--------------------------+--------++--------
no builtin | 409800 || 2241312
DCCP builtin | - || 2578968
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
include/net/netfilter/nf_nat_l4proto.h | 3 +++
net/netfilter/Kconfig | 2 +-
net/netfilter/Makefile | 3 ++-
net/netfilter/nf_nat_core.c | 4 ++++
net/netfilter/nf_nat_proto_dccp.c | 36 +---------------------------------
5 files changed, 11 insertions(+), 37 deletions(-)
diff --git a/include/net/netfilter/nf_nat_l4proto.h b/include/net/netfilter/nf_nat_l4proto.h
index 12f4cc841b6e..92b147be00ef 100644
--- a/include/net/netfilter/nf_nat_l4proto.h
+++ b/include/net/netfilter/nf_nat_l4proto.h
@@ -54,6 +54,9 @@ extern const struct nf_nat_l4proto nf_nat_l4proto_udp;
extern const struct nf_nat_l4proto nf_nat_l4proto_icmp;
extern const struct nf_nat_l4proto nf_nat_l4proto_icmpv6;
extern const struct nf_nat_l4proto nf_nat_l4proto_unknown;
+#ifdef CONFIG_NF_NAT_PROTO_DCCP
+extern const struct nf_nat_l4proto nf_nat_l4proto_dccp;
+#endif
bool nf_nat_l4proto_in_range(const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype,
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 44410d30d461..13092e5cd245 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -384,7 +384,7 @@ config NF_NAT_NEEDED
default y
config NF_NAT_PROTO_DCCP
- tristate
+ bool
depends on NF_NAT && NF_CT_PROTO_DCCP
default NF_NAT && NF_CT_PROTO_DCCP
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 5bbf767672ec..9ea0c98e51e6 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -45,6 +45,8 @@ obj-$(CONFIG_NF_CONNTRACK_TFTP) += nf_conntrack_tftp.o
nf_nat-y := nf_nat_core.o nf_nat_proto_unknown.o nf_nat_proto_common.o \
nf_nat_proto_udp.o nf_nat_proto_tcp.o nf_nat_helper.o
+nf_nat-$(CONFIG_NF_NAT_PROTO_DCCP) += nf_nat_proto_dccp.o
+
# generic transport layer logging
obj-$(CONFIG_NF_LOG_COMMON) += nf_log_common.o
@@ -55,7 +57,6 @@ obj-$(CONFIG_NF_NAT) += nf_nat.o
obj-$(CONFIG_NF_NAT_REDIRECT) += nf_nat_redirect.o
# NAT protocols (nf_nat)
-obj-$(CONFIG_NF_NAT_PROTO_DCCP) += nf_nat_proto_dccp.o
obj-$(CONFIG_NF_NAT_PROTO_UDPLITE) += nf_nat_proto_udplite.o
obj-$(CONFIG_NF_NAT_PROTO_SCTP) += nf_nat_proto_sctp.o
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 5b9c884a452e..69b121d11275 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -682,6 +682,10 @@ int nf_nat_l3proto_register(const struct nf_nat_l3proto *l3proto)
&nf_nat_l4proto_tcp);
RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_UDP],
&nf_nat_l4proto_udp);
+#ifdef CONFIG_NF_NAT_PROTO_DCCP
+ RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_DCCP],
+ &nf_nat_l4proto_dccp);
+#endif
mutex_unlock(&nf_nat_proto_mutex);
RCU_INIT_POINTER(nf_nat_l3protos[l3proto->l3proto], l3proto);
diff --git a/net/netfilter/nf_nat_proto_dccp.c b/net/netfilter/nf_nat_proto_dccp.c
index 15c47b246d0d..269fcd5dc34c 100644
--- a/net/netfilter/nf_nat_proto_dccp.c
+++ b/net/netfilter/nf_nat_proto_dccp.c
@@ -10,8 +10,6 @@
*/
#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
#include <linux/skbuff.h>
#include <linux/dccp.h>
@@ -73,7 +71,7 @@ dccp_manip_pkt(struct sk_buff *skb,
return true;
}
-static const struct nf_nat_l4proto nf_nat_l4proto_dccp = {
+const struct nf_nat_l4proto nf_nat_l4proto_dccp = {
.l4proto = IPPROTO_DCCP,
.manip_pkt = dccp_manip_pkt,
.in_range = nf_nat_l4proto_in_range,
@@ -82,35 +80,3 @@ static const struct nf_nat_l4proto nf_nat_l4proto_dccp = {
.nlattr_to_range = nf_nat_l4proto_nlattr_to_range,
#endif
};
-
-static int __init nf_nat_proto_dccp_init(void)
-{
- int err;
-
- err = nf_nat_l4proto_register(NFPROTO_IPV4, &nf_nat_l4proto_dccp);
- if (err < 0)
- goto err1;
- err = nf_nat_l4proto_register(NFPROTO_IPV6, &nf_nat_l4proto_dccp);
- if (err < 0)
- goto err2;
- return 0;
-
-err2:
- nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_dccp);
-err1:
- return err;
-}
-
-static void __exit nf_nat_proto_dccp_fini(void)
-{
- nf_nat_l4proto_unregister(NFPROTO_IPV6, &nf_nat_l4proto_dccp);
- nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_dccp);
-
-}
-
-module_init(nf_nat_proto_dccp_init);
-module_exit(nf_nat_proto_dccp_fini);
-
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_DESCRIPTION("DCCP NAT protocol helper");
-MODULE_LICENSE("GPL");
--
2.1.4
^ permalink raw reply related
* [PATCH 03/50] netfilter: update Arturo Borrero Gonzalez email address
From: Pablo Neira Ayuso @ 2016-12-07 21:52 UTC (permalink / raw)
To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1481147576-5690-1-git-send-email-pablo@netfilter.org>
From: Arturo Borrero Gonzalez <arturo@debian.org>
The email address has changed, let's update the copyright statements.
Signed-off-by: Arturo Borrero Gonzalez <arturo@debian.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
net/ipv4/netfilter/nft_masq_ipv4.c | 4 ++--
net/ipv4/netfilter/nft_redir_ipv4.c | 4 ++--
net/ipv6/netfilter/nft_masq_ipv6.c | 4 ++--
net/ipv6/netfilter/nft_redir_ipv6.c | 4 ++--
net/netfilter/nft_masq.c | 4 ++--
net/netfilter/nft_redir.c | 4 ++--
6 files changed, 12 insertions(+), 12 deletions(-)
diff --git a/net/ipv4/netfilter/nft_masq_ipv4.c b/net/ipv4/netfilter/nft_masq_ipv4.c
index 4f697e431811..4d69f99b8707 100644
--- a/net/ipv4/netfilter/nft_masq_ipv4.c
+++ b/net/ipv4/netfilter/nft_masq_ipv4.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>
+ * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo@debian.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -77,5 +77,5 @@ module_init(nft_masq_ipv4_module_init);
module_exit(nft_masq_ipv4_module_exit);
MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>");
+MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org");
MODULE_ALIAS_NFT_AF_EXPR(AF_INET, "masq");
diff --git a/net/ipv4/netfilter/nft_redir_ipv4.c b/net/ipv4/netfilter/nft_redir_ipv4.c
index 16df0493c5ce..62c18e68ac58 100644
--- a/net/ipv4/netfilter/nft_redir_ipv4.c
+++ b/net/ipv4/netfilter/nft_redir_ipv4.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>
+ * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo@debian.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -71,5 +71,5 @@ module_init(nft_redir_ipv4_module_init);
module_exit(nft_redir_ipv4_module_exit);
MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>");
+MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org>");
MODULE_ALIAS_NFT_AF_EXPR(AF_INET, "redir");
diff --git a/net/ipv6/netfilter/nft_masq_ipv6.c b/net/ipv6/netfilter/nft_masq_ipv6.c
index a2aff1277b40..93d758f70334 100644
--- a/net/ipv6/netfilter/nft_masq_ipv6.c
+++ b/net/ipv6/netfilter/nft_masq_ipv6.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>
+ * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo@debian.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -78,5 +78,5 @@ module_init(nft_masq_ipv6_module_init);
module_exit(nft_masq_ipv6_module_exit);
MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>");
+MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org>");
MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "masq");
diff --git a/net/ipv6/netfilter/nft_redir_ipv6.c b/net/ipv6/netfilter/nft_redir_ipv6.c
index bfcd5af6bc15..2850fcd8583f 100644
--- a/net/ipv6/netfilter/nft_redir_ipv6.c
+++ b/net/ipv6/netfilter/nft_redir_ipv6.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>
+ * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo@debian.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -72,5 +72,5 @@ module_init(nft_redir_ipv6_module_init);
module_exit(nft_redir_ipv6_module_exit);
MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>");
+MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org>");
MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "redir");
diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c
index 81b5ad6165ac..bf92de01410f 100644
--- a/net/netfilter/nft_masq.c
+++ b/net/netfilter/nft_masq.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>
+ * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo@debian.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -105,4 +105,4 @@ int nft_masq_dump(struct sk_buff *skb, const struct nft_expr *expr)
EXPORT_SYMBOL_GPL(nft_masq_dump);
MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>");
+MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org>");
diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c
index 03f7bf40ae75..967e09b099b2 100644
--- a/net/netfilter/nft_redir.c
+++ b/net/netfilter/nft_redir.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>
+ * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo@debian.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -108,4 +108,4 @@ int nft_redir_dump(struct sk_buff *skb, const struct nft_expr *expr)
EXPORT_SYMBOL_GPL(nft_redir_dump);
MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>");
+MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org>");
--
2.1.4
^ permalink raw reply related
* [PATCH 02/50] ipvs: Decrement ttl
From: Pablo Neira Ayuso @ 2016-12-07 21:52 UTC (permalink / raw)
To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1481147576-5690-1-git-send-email-pablo@netfilter.org>
From: Dwip Banerjee <dwip@linux.vnet.ibm.com>
We decrement the IP ttl in all the modes in order to prevent infinite
route loops. The changes were done based on Julian Anastasov's
suggestions in a prior thread.
The ttl based check/discard and the actual decrement are done in
__ip_vs_get_out_rt() and in __ip_vs_get_out_rt_v6(), for the IPv6
case. decrement_ttl() implements the actual functionality for the
two cases.
Signed-off-by: Dwip Banerjee <dwip@linux.vnet.ibm.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
net/netfilter/ipvs/ip_vs_xmit.c | 54 +++++++++++++++++++++++++++++++++++++++++
1 file changed, 54 insertions(+)
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 01d3d894de46..4e1a98fcc8c3 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -254,6 +254,54 @@ static inline bool ensure_mtu_is_adequate(struct netns_ipvs *ipvs, int skb_af,
return true;
}
+static inline bool decrement_ttl(struct netns_ipvs *ipvs,
+ int skb_af,
+ struct sk_buff *skb)
+{
+ struct net *net = ipvs->net;
+
+#ifdef CONFIG_IP_VS_IPV6
+ if (skb_af == AF_INET6) {
+ struct dst_entry *dst = skb_dst(skb);
+
+ /* check and decrement ttl */
+ if (ipv6_hdr(skb)->hop_limit <= 1) {
+ /* Force OUTPUT device used as source address */
+ skb->dev = dst->dev;
+ icmpv6_send(skb, ICMPV6_TIME_EXCEED,
+ ICMPV6_EXC_HOPLIMIT, 0);
+ __IP6_INC_STATS(net, ip6_dst_idev(dst),
+ IPSTATS_MIB_INHDRERRORS);
+
+ return false;
+ }
+
+ /* don't propagate ttl change to cloned packets */
+ if (!skb_make_writable(skb, sizeof(struct ipv6hdr)))
+ return false;
+
+ ipv6_hdr(skb)->hop_limit--;
+ } else
+#endif
+ {
+ if (ip_hdr(skb)->ttl <= 1) {
+ /* Tell the sender its packet died... */
+ __IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS);
+ icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0);
+ return false;
+ }
+
+ /* don't propagate ttl change to cloned packets */
+ if (!skb_make_writable(skb, sizeof(struct iphdr)))
+ return false;
+
+ /* Decrease ttl */
+ ip_decrease_ttl(ip_hdr(skb));
+ }
+
+ return true;
+}
+
/* Get route to destination or remote server */
static int
__ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
@@ -326,6 +374,9 @@ __ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
return local;
}
+ if (!decrement_ttl(ipvs, skb_af, skb))
+ goto err_put;
+
if (likely(!(rt_mode & IP_VS_RT_MODE_TUNNEL))) {
mtu = dst_mtu(&rt->dst);
} else {
@@ -473,6 +524,9 @@ __ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
return local;
}
+ if (!decrement_ttl(ipvs, skb_af, skb))
+ goto err_put;
+
/* MTU checking */
if (likely(!(rt_mode & IP_VS_RT_MODE_TUNNEL)))
mtu = dst_mtu(&rt->dst);
--
2.1.4
^ permalink raw reply related
* [PATCH 01/50] ipvs: Use IS_ERR_OR_NULL(svc) instead of IS_ERR(svc) || svc == NULL
From: Pablo Neira Ayuso @ 2016-12-07 21:52 UTC (permalink / raw)
To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1481147576-5690-1-git-send-email-pablo@netfilter.org>
From: Gao Feng <fgao@ikuai8.com>
This minor refactoring does not change the logic of function
ip_vs_genl_dump_dests.
Signed-off-by: Gao Feng <fgao@ikuai8.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
net/netfilter/ipvs/ip_vs_ctl.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 6b85ded4f91d..217e0105b5e0 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -3260,7 +3260,7 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb,
svc = ip_vs_genl_find_service(ipvs, attrs[IPVS_CMD_ATTR_SERVICE]);
- if (IS_ERR(svc) || svc == NULL)
+ if (IS_ERR_OR_NULL(svc))
goto out_err;
/* Dump the destinations */
--
2.1.4
^ permalink raw reply related
* RE: [net-next 20/20] i40e: don't allow i40e_vsi_(add|kill)_vlan to operate when VID<1
From: Keller, Jacob E @ 2016-12-07 21:50 UTC (permalink / raw)
To: Sergei Shtylyov, Kirsher, Jeffrey T, davem@davemloft.net
Cc: netdev@vger.kernel.org, nhorman@redhat.com, sassmann@redhat.com,
jogreene@redhat.com, guru.anbalagane@oracle.com
In-Reply-To: <8f82cd1d-0118-7b37-1a05-fa7b77d4e75c@cogentembedded.com>
> -----Original Message-----
> From: Sergei Shtylyov [mailto:sergei.shtylyov@cogentembedded.com]
> Sent: Wednesday, December 07, 2016 2:11 AM
> To: Kirsher, Jeffrey T <jeffrey.t.kirsher@intel.com>; davem@davemloft.net
> Cc: Keller, Jacob E <jacob.e.keller@intel.com>; netdev@vger.kernel.org;
> nhorman@redhat.com; sassmann@redhat.com; jogreene@redhat.com;
> guru.anbalagane@oracle.com
> Subject: Re: [net-next 20/20] i40e: don't allow i40e_vsi_(add|kill)_vlan to operate
> when VID<1
>
> Hello!
> > + if (!(vid > 0) || vsi->info.pvid)
>
> Why not just '!vid'?
Left over artifact of this previously being a signed value. We can fix this.
Thanks,
Jake
> > -void i40e_vsi_kill_vlan(struct i40e_vsi *vsi, s16 vid)
> > +void i40e_vsi_kill_vlan(struct i40e_vsi *vsi, u16 vid)
> > {
> > + if (!(vid > 0) || vsi->info.pvid)
>
> Likewise.
Same here. Can get this fixed.
Thanks,
Jake
>
> > + return;
> > +
> > spin_lock_bh(&vsi->mac_filter_hash_lock);
> > i40e_rm_vlan_all_mac(vsi, vid);
> > spin_unlock_bh(&vsi->mac_filter_hash_lock);
>
> MBR, Sergei
^ permalink raw reply
* Re: [PATCH 2/2] net: ethernet: stmmac: remove private tx queue lock
From: Lino Sanfilippo @ 2016-12-07 21:43 UTC (permalink / raw)
To: Pavel Machek
Cc: bh74.an, ks.giri, vipul.pandya, peppe.cavallaro, alexandre.torgue,
davem, linux-kernel, netdev
In-Reply-To: <20161207213757.GC2250@amd>
Hi Pavel,
On 07.12.2016 22:37, Pavel Machek wrote:
> On Wed 2016-12-07 21:05:38, Lino Sanfilippo wrote:
>> The driver uses a private lock for synchronization between the xmit
>> function and the xmit completion handler, but since the NETIF_F_LLTX flag
>> is not set, the xmit function is also called with the xmit_lock held.
>>
>> On the other hand the xmit completion handler first takes the private lock
>> and (in case that the tx queue has been stopped) the xmit_lock, leading to
>> a reverse locking order and the potential danger of a deadlock.
>>
>> Fix this by removing the private lock completely and synchronizing the xmit
>> function and completion handler solely by means of the xmit_lock. By doing
>> this remove also the now unnecessary double check for a stopped tx queue.
>>
>
> FYI, here's modified version. I believe _bh versions are needed, and
> I'm testing that version now. (Oh and I also ported it to net-next).
>
> It survived 30 minutes of testing so far...
>
First off, thanks for testing.
Hmm. I dont understand why _bh would be needed. We call that function from
BH context only (napi poll and timer).
Any idea?
Lino
^ permalink raw reply
* Re: [PATCH 2/2] net: ethernet: stmmac: remove private tx queue lock
From: Pavel Machek @ 2016-12-07 21:37 UTC (permalink / raw)
To: Lino Sanfilippo
Cc: bh74.an, ks.giri, vipul.pandya, peppe.cavallaro, alexandre.torgue,
davem, linux-kernel, netdev
In-Reply-To: <1481141138-19466-3-git-send-email-LinoSanfilippo@gmx.de>
[-- Attachment #1: Type: text/plain, Size: 5197 bytes --]
On Wed 2016-12-07 21:05:38, Lino Sanfilippo wrote:
> The driver uses a private lock for synchronization between the xmit
> function and the xmit completion handler, but since the NETIF_F_LLTX flag
> is not set, the xmit function is also called with the xmit_lock held.
>
> On the other hand the xmit completion handler first takes the private lock
> and (in case that the tx queue has been stopped) the xmit_lock, leading to
> a reverse locking order and the potential danger of a deadlock.
>
> Fix this by removing the private lock completely and synchronizing the xmit
> function and completion handler solely by means of the xmit_lock. By doing
> this remove also the now unnecessary double check for a stopped tx queue.
>
FYI, here's modified version. I believe _bh versions are needed, and
I'm testing that version now. (Oh and I also ported it to net-next).
It survived 30 minutes of testing so far...
Best regards,
Pavel
Signed-off-by: Lino Sanfilippo <LinoSanfilippo@gmx.de>
Signed-off-by: Pavel Machek <pavel@denx.de>
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index dbacb80..eab04ae 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -64,7 +64,6 @@ struct stmmac_priv {
dma_addr_t dma_tx_phy;
int tx_coalesce;
int hwts_tx_en;
- spinlock_t tx_lock;
bool tx_path_in_lpi_mode;
struct timer_list txtimer;
bool tso;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 982c952..7415bc2 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -1308,7 +1308,7 @@ static void stmmac_tx_clean(struct stmmac_priv *priv)
unsigned int bytes_compl = 0, pkts_compl = 0;
unsigned int entry = priv->dirty_tx;
- spin_lock(&priv->tx_lock);
+ netif_tx_lock_bh(priv->dev);
priv->xstats.tx_clean++;
@@ -1378,23 +1378,18 @@ static void stmmac_tx_clean(struct stmmac_priv *priv)
netdev_completed_queue(priv->dev, pkts_compl, bytes_compl);
- if (unlikely(netif_queue_stopped(priv->dev) &&
- stmmac_tx_avail(priv) > STMMAC_TX_THRESH)) {
- netif_tx_lock(priv->dev);
- if (netif_queue_stopped(priv->dev) &&
- stmmac_tx_avail(priv) > STMMAC_TX_THRESH) {
- netif_dbg(priv, tx_done, priv->dev,
- "%s: restart transmit\n", __func__);
- netif_wake_queue(priv->dev);
- }
- netif_tx_unlock(priv->dev);
+ if (netif_queue_stopped(priv->dev) &&
+ stmmac_tx_avail(priv) > STMMAC_TX_THRESH) {
+ netif_dbg(priv, tx_done, priv->dev,
+ "%s: restart transmit\n", __func__);
+ netif_wake_queue(priv->dev);
}
if ((priv->eee_enabled) && (!priv->tx_path_in_lpi_mode)) {
stmmac_enable_eee_mode(priv);
mod_timer(&priv->eee_ctrl_timer, STMMAC_LPI_T(eee_timer));
}
- spin_unlock(&priv->tx_lock);
+ netif_tx_unlock_bh(priv->dev);
}
static inline void stmmac_enable_dma_irq(struct stmmac_priv *priv)
@@ -2006,8 +2001,6 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
u8 proto_hdr_len;
int i;
- spin_lock(&priv->tx_lock);
-
/* Compute header lengths */
proto_hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
@@ -2021,7 +2014,6 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
"%s: Tx Ring full when queue awake\n",
__func__);
}
- spin_unlock(&priv->tx_lock);
return NETDEV_TX_BUSY;
}
@@ -2156,11 +2148,9 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
priv->hw->dma->set_tx_tail_ptr(priv->ioaddr, priv->tx_tail_addr,
STMMAC_CHAN0);
- spin_unlock(&priv->tx_lock);
return NETDEV_TX_OK;
dma_map_err:
- spin_unlock(&priv->tx_lock);
dev_err(priv->device, "Tx dma map failed\n");
dev_kfree_skb(skb);
priv->dev->stats.tx_dropped++;
@@ -2192,10 +2182,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
return stmmac_tso_xmit(skb, dev);
}
- spin_lock(&priv->tx_lock);
-
if (unlikely(stmmac_tx_avail(priv) < nfrags + 1)) {
- spin_unlock(&priv->tx_lock);
if (!netif_queue_stopped(dev)) {
netif_stop_queue(dev);
/* This is a hard error, log it. */
@@ -2366,11 +2353,9 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
priv->hw->dma->set_tx_tail_ptr(priv->ioaddr, priv->tx_tail_addr,
STMMAC_CHAN0);
- spin_unlock(&priv->tx_lock);
return NETDEV_TX_OK;
dma_map_err:
- spin_unlock(&priv->tx_lock);
netdev_err(priv->dev, "Tx DMA map failed\n");
dev_kfree_skb(skb);
priv->dev->stats.tx_dropped++;
@@ -3357,7 +3342,6 @@ int stmmac_dvr_probe(struct device *device,
netif_napi_add(ndev, &priv->napi, stmmac_poll, 64);
spin_lock_init(&priv->lock);
- spin_lock_init(&priv->tx_lock);
ret = register_netdev(ndev);
if (ret) {
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 181 bytes --]
^ permalink raw reply related
* net-next closing, README
From: David Miller @ 2016-12-07 21:28 UTC (permalink / raw)
To: netdev
The merge window is about to open soon, and next week I will be
having sporadic internet access while travelling around, therefore
I am closing net-next up tonight.
Therefore, please do not submit any new features or cleanups for
net-next. Bug fixes for problems introduced in net-next are fine,
however.
Thank you.
^ permalink raw reply
* Re: [patch] ser_gigaset: return -ENOMEM on error instead of success
From: Paul Bolle @ 2016-12-07 21:08 UTC (permalink / raw)
To: Tilman Schmidt, Dan Carpenter
Cc: Karsten Keil, David S. Miller, gigaset307x-common, netdev,
kernel-janitors
In-Reply-To: <d8eb1f69-e68e-7792-00de-b14e6bac1606@imap.cc>
Hi Tilman,
On Wed, 2016-12-07 at 21:57 +0100, Tilman Schmidt wrote:
> Not much of a mess, I reckon. Everything that has been allocated and
> registered up to that point is properly deallocated and unregistered.
> The code just fails to tell the kernel that module initialization has
> failed, so the module remains loaded even though it can never be
> called because it isn't hooked anywhere. That's a nuisance and a
> waste of RAM, but not much more.
Yes.
But then the removal of the module, which is the only reasonable thing to do
after all this has happened, seems to trigger a WARN in driver_unregister().
And it's that WARN that I think requires the entire stable song and dance.
Otherwise it would be, as far as I can tell, a hard to hit problem in an
obscure driver without any side effects.
Paul Bolle
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox