* [PATCH 37/79] IPVS: netns awarness to lblc sheduler
From: kaber @ 2011-01-19 19:14 UTC (permalink / raw)
To: davem; +Cc: netfilter-devel, netdev
In-Reply-To: <1295464519-21763-1-git-send-email-kaber@trash.net>
From: Hans Schillstrom <hans.schillstrom@ericsson.com>
var sysctl_ip_vs_lblc_expiration moved to ipvs struct as
sysctl_lblc_expiration
procfs updated to handle this.
Signed-off-by: Hans Schillstrom <hans.schillstrom@ericsson.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
include/net/netns/ip_vs.h | 4 +++
net/netfilter/ipvs/ip_vs_lblc.c | 50 ++++++++++++++++++++++++++------------
2 files changed, 38 insertions(+), 16 deletions(-)
diff --git a/include/net/netns/ip_vs.h b/include/net/netns/ip_vs.h
index 51a92ee..d14581c 100644
--- a/include/net/netns/ip_vs.h
+++ b/include/net/netns/ip_vs.h
@@ -29,6 +29,10 @@ struct netns_ipvs {
struct list_head rs_table[IP_VS_RTAB_SIZE];
+ /* ip_vs_lblc */
+ int sysctl_lblc_expiration;
+ struct ctl_table_header *lblc_ctl_header;
+ struct ctl_table *lblc_ctl_table;
/* ip_vs_lblcr */
int sysctl_lblcr_expiration;
struct ctl_table_header *lblcr_ctl_header;
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index 84278fb..d5bec33 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -70,7 +70,6 @@
* entries that haven't been touched for a day.
*/
#define COUNT_FOR_FULL_EXPIRATION 30
-static int sysctl_ip_vs_lblc_expiration = 24*60*60*HZ;
/*
@@ -117,7 +116,7 @@ struct ip_vs_lblc_table {
static ctl_table vs_vars_table[] = {
{
.procname = "lblc_expiration",
- .data = &sysctl_ip_vs_lblc_expiration,
+ .data = NULL,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
@@ -125,8 +124,6 @@ static ctl_table vs_vars_table[] = {
{ }
};
-static struct ctl_table_header * sysctl_header;
-
static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en)
{
list_del(&en->list);
@@ -248,6 +245,7 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc)
struct ip_vs_lblc_entry *en, *nxt;
unsigned long now = jiffies;
int i, j;
+ struct netns_ipvs *ipvs = net_ipvs(svc->net);
for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) {
j = (j + 1) & IP_VS_LBLC_TAB_MASK;
@@ -255,7 +253,8 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc)
write_lock(&svc->sched_lock);
list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
if (time_before(now,
- en->lastuse + sysctl_ip_vs_lblc_expiration))
+ en->lastuse +
+ ipvs->sysctl_lblc_expiration))
continue;
ip_vs_lblc_free(en);
@@ -548,23 +547,43 @@ static struct ip_vs_scheduler ip_vs_lblc_scheduler =
*/
static int __net_init __ip_vs_lblc_init(struct net *net)
{
- if (!net_eq(net, &init_net)) /* netns not enabled yet */
- return -EPERM;
-
- sysctl_header = register_net_sysctl_table(net, net_vs_ctl_path,
- vs_vars_table);
- if (!sysctl_header)
- return -ENOMEM;
+ struct netns_ipvs *ipvs = net_ipvs(net);
+
+ if (!net_eq(net, &init_net)) {
+ ipvs->lblc_ctl_table = kmemdup(vs_vars_table,
+ sizeof(vs_vars_table),
+ GFP_KERNEL);
+ if (ipvs->lblc_ctl_table == NULL)
+ goto err_dup;
+ } else
+ ipvs->lblc_ctl_table = vs_vars_table;
+ ipvs->sysctl_lblc_expiration = 24*60*60*HZ;
+ ipvs->lblc_ctl_table[0].data = &ipvs->sysctl_lblc_expiration;
+
+ ipvs->lblc_ctl_header =
+ register_net_sysctl_table(net, net_vs_ctl_path,
+ ipvs->lblc_ctl_table);
+ if (!ipvs->lblc_ctl_header)
+ goto err_reg;
return 0;
+
+err_reg:
+ if (!net_eq(net, &init_net))
+ kfree(ipvs->lblc_ctl_table);
+
+err_dup:
+ return -ENOMEM;
}
static void __net_exit __ip_vs_lblc_exit(struct net *net)
{
- if (!net_eq(net, &init_net)) /* netns not enabled yet */
- return;
+ struct netns_ipvs *ipvs = net_ipvs(net);
+
+ unregister_net_sysctl_table(ipvs->lblc_ctl_header);
- unregister_net_sysctl_table(sysctl_header);
+ if (!net_eq(net, &init_net))
+ kfree(ipvs->lblc_ctl_table);
}
static struct pernet_operations ip_vs_lblc_ops = {
@@ -586,7 +605,6 @@ static int __init ip_vs_lblc_init(void)
return ret;
}
-
static void __exit ip_vs_lblc_cleanup(void)
{
unregister_ip_vs_scheduler(&ip_vs_lblc_scheduler);
--
1.7.2.3
^ permalink raw reply related
* [PATCH 39/79] IPVS: netns preparation for proto_tcp
From: kaber @ 2011-01-19 19:14 UTC (permalink / raw)
To: davem; +Cc: netfilter-devel, netdev
In-Reply-To: <1295464519-21763-1-git-send-email-kaber@trash.net>
From: Hans Schillstrom <hans.schillstrom@ericsson.com>
In this phase (one), all local vars will be moved to ipvs struct.
Remaining work, add param struct net *net to a couple of
functions that is common for all protos and use all
ip_vs_proto_data
*v3
Removed unused function as sugested by Simon
Signed-off-by: Hans Schillstrom <hans.schillstrom@ericsson.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
include/net/ip_vs.h | 2 +-
include/net/netns/ip_vs.h | 8 +++
net/netfilter/ipvs/ip_vs_ftp.c | 8 ++-
net/netfilter/ipvs/ip_vs_proto.c | 13 ++++-
net/netfilter/ipvs/ip_vs_proto_tcp.c | 97 ++++++++++++++++++----------------
5 files changed, 79 insertions(+), 49 deletions(-)
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 88d4e40..3c45a00 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -807,7 +807,7 @@ extern void ip_vs_conn_expire_now(struct ip_vs_conn *cp);
extern const char * ip_vs_state_name(__u16 proto, int state);
-extern void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp);
+extern void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp);
extern int ip_vs_check_template(struct ip_vs_conn *ct);
extern void ip_vs_random_dropentry(void);
extern int ip_vs_conn_init(void);
diff --git a/include/net/netns/ip_vs.h b/include/net/netns/ip_vs.h
index 6f4e089..ac77363 100644
--- a/include/net/netns/ip_vs.h
+++ b/include/net/netns/ip_vs.h
@@ -31,6 +31,14 @@ struct netns_ipvs {
/* ip_vs_proto */
#define IP_VS_PROTO_TAB_SIZE 32 /* must be power of 2 */
struct ip_vs_proto_data *proto_data_table[IP_VS_PROTO_TAB_SIZE];
+ /* ip_vs_proto_tcp */
+#ifdef CONFIG_IP_VS_PROTO_TCP
+ #define TCP_APP_TAB_BITS 4
+ #define TCP_APP_TAB_SIZE (1 << TCP_APP_TAB_BITS)
+ #define TCP_APP_TAB_MASK (TCP_APP_TAB_SIZE - 1)
+ struct list_head tcp_apps[TCP_APP_TAB_SIZE];
+ spinlock_t tcp_app_lock;
+#endif
/* ip_vs_lblc */
int sysctl_lblc_expiration;
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index 0e762f3..b38ae94 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -157,6 +157,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
int ret = 0;
enum ip_conntrack_info ctinfo;
struct nf_conn *ct;
+ struct net *net;
#ifdef CONFIG_IP_VS_IPV6
/* This application helper doesn't work with IPv6 yet,
@@ -257,8 +258,9 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
* would be adjusted twice.
*/
+ net = skb_net(skb);
cp->app_data = NULL;
- ip_vs_tcp_conn_listen(n_cp);
+ ip_vs_tcp_conn_listen(net, n_cp);
ip_vs_conn_put(n_cp);
return ret;
}
@@ -287,6 +289,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
union nf_inet_addr to;
__be16 port;
struct ip_vs_conn *n_cp;
+ struct net *net;
#ifdef CONFIG_IP_VS_IPV6
/* This application helper doesn't work with IPv6 yet,
@@ -378,7 +381,8 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
/*
* Move tunnel to listen state
*/
- ip_vs_tcp_conn_listen(n_cp);
+ net = skb_net(skb);
+ ip_vs_tcp_conn_listen(net, n_cp);
ip_vs_conn_put(n_cp);
return 1;
diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c
index 576e296..320c6a6 100644
--- a/net/netfilter/ipvs/ip_vs_proto.c
+++ b/net/netfilter/ipvs/ip_vs_proto.c
@@ -307,12 +307,23 @@ ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp,
*/
static int __net_init __ip_vs_protocol_init(struct net *net)
{
+#ifdef CONFIG_IP_VS_PROTO_TCP
+ register_ip_vs_proto_netns(net, &ip_vs_protocol_tcp);
+#endif
return 0;
}
static void __net_exit __ip_vs_protocol_cleanup(struct net *net)
{
- /* empty */
+ struct netns_ipvs *ipvs = net_ipvs(net);
+ struct ip_vs_proto_data *pd;
+ int i;
+
+ /* unregister all the ipvs proto data for this netns */
+ for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) {
+ while ((pd = ipvs->proto_data_table[i]) != NULL)
+ unregister_ip_vs_proto_netns(net, pd);
+ }
}
static struct pernet_operations ipvs_proto_ops = {
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index c175d31..9d9df3d 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -9,8 +9,12 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
- * Changes:
+ * Changes: Hans Schillstrom <hans.schillstrom@ericsson.com>
*
+ * Network name space (netns) aware.
+ * Global data moved to netns i.e struct netns_ipvs
+ * tcp_timeouts table has copy per netns in a hash table per
+ * protocol ip_vs_proto_data and is handled by netns
*/
#define KMSG_COMPONENT "IPVS"
@@ -345,7 +349,7 @@ static const int tcp_state_off[IP_VS_DIR_LAST] = {
/*
* Timeout table[state]
*/
-static int tcp_timeouts[IP_VS_TCP_S_LAST+1] = {
+static const int tcp_timeouts[IP_VS_TCP_S_LAST+1] = {
[IP_VS_TCP_S_NONE] = 2*HZ,
[IP_VS_TCP_S_ESTABLISHED] = 15*60*HZ,
[IP_VS_TCP_S_SYN_SENT] = 2*60*HZ,
@@ -460,13 +464,6 @@ static void tcp_timeout_change(struct ip_vs_protocol *pp, int flags)
tcp_state_table = (on? tcp_states_dos : tcp_states);
}
-static int
-tcp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
-{
- return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_TCP_S_LAST,
- tcp_state_name_table, sname, to);
-}
-
static inline int tcp_state_idx(struct tcphdr *th)
{
if (th->rst)
@@ -487,6 +484,7 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
int state_idx;
int new_state = IP_VS_TCP_S_CLOSE;
int state_off = tcp_state_off[direction];
+ struct ip_vs_proto_data *pd; /* Temp fix */
/*
* Update state offset to INPUT_ONLY if necessary
@@ -542,10 +540,13 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
}
}
- cp->timeout = pp->timeout_table[cp->state = new_state];
+ pd = ip_vs_proto_data_get(&init_net, pp->protocol);
+ if (likely(pd))
+ cp->timeout = pd->timeout_table[cp->state = new_state];
+ else /* What to do ? */
+ cp->timeout = tcp_timeouts[cp->state = new_state];
}
-
/*
* Handle state transitions
*/
@@ -573,17 +574,6 @@ tcp_state_transition(struct ip_vs_conn *cp, int direction,
return 1;
}
-
-/*
- * Hash table for TCP application incarnations
- */
-#define TCP_APP_TAB_BITS 4
-#define TCP_APP_TAB_SIZE (1 << TCP_APP_TAB_BITS)
-#define TCP_APP_TAB_MASK (TCP_APP_TAB_SIZE - 1)
-
-static struct list_head tcp_apps[TCP_APP_TAB_SIZE];
-static DEFINE_SPINLOCK(tcp_app_lock);
-
static inline __u16 tcp_app_hashkey(__be16 port)
{
return (((__force u16)port >> TCP_APP_TAB_BITS) ^ (__force u16)port)
@@ -597,21 +587,23 @@ static int tcp_register_app(struct ip_vs_app *inc)
__u16 hash;
__be16 port = inc->port;
int ret = 0;
+ struct netns_ipvs *ipvs = net_ipvs(&init_net);
+ struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, IPPROTO_TCP);
hash = tcp_app_hashkey(port);
- spin_lock_bh(&tcp_app_lock);
- list_for_each_entry(i, &tcp_apps[hash], p_list) {
+ spin_lock_bh(&ipvs->tcp_app_lock);
+ list_for_each_entry(i, &ipvs->tcp_apps[hash], p_list) {
if (i->port == port) {
ret = -EEXIST;
goto out;
}
}
- list_add(&inc->p_list, &tcp_apps[hash]);
- atomic_inc(&ip_vs_protocol_tcp.appcnt);
+ list_add(&inc->p_list, &ipvs->tcp_apps[hash]);
+ atomic_inc(&pd->pp->appcnt);
out:
- spin_unlock_bh(&tcp_app_lock);
+ spin_unlock_bh(&ipvs->tcp_app_lock);
return ret;
}
@@ -619,16 +611,20 @@ static int tcp_register_app(struct ip_vs_app *inc)
static void
tcp_unregister_app(struct ip_vs_app *inc)
{
- spin_lock_bh(&tcp_app_lock);
- atomic_dec(&ip_vs_protocol_tcp.appcnt);
+ struct netns_ipvs *ipvs = net_ipvs(&init_net);
+ struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, IPPROTO_TCP);
+
+ spin_lock_bh(&ipvs->tcp_app_lock);
+ atomic_dec(&pd->pp->appcnt);
list_del(&inc->p_list);
- spin_unlock_bh(&tcp_app_lock);
+ spin_unlock_bh(&ipvs->tcp_app_lock);
}
static int
tcp_app_conn_bind(struct ip_vs_conn *cp)
{
+ struct netns_ipvs *ipvs = net_ipvs(&init_net);
int hash;
struct ip_vs_app *inc;
int result = 0;
@@ -640,12 +636,12 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)
/* Lookup application incarnations and bind the right one */
hash = tcp_app_hashkey(cp->vport);
- spin_lock(&tcp_app_lock);
- list_for_each_entry(inc, &tcp_apps[hash], p_list) {
+ spin_lock(&ipvs->tcp_app_lock);
+ list_for_each_entry(inc, &ipvs->tcp_apps[hash], p_list) {
if (inc->port == cp->vport) {
if (unlikely(!ip_vs_app_inc_get(inc)))
break;
- spin_unlock(&tcp_app_lock);
+ spin_unlock(&ipvs->tcp_app_lock);
IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->"
"%s:%u to app %s on port %u\n",
@@ -662,7 +658,7 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)
goto out;
}
}
- spin_unlock(&tcp_app_lock);
+ spin_unlock(&ipvs->tcp_app_lock);
out:
return result;
@@ -672,24 +668,34 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)
/*
* Set LISTEN timeout. (ip_vs_conn_put will setup timer)
*/
-void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp)
+void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp)
{
+ struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
+
spin_lock(&cp->lock);
cp->state = IP_VS_TCP_S_LISTEN;
- cp->timeout = ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_LISTEN];
+ cp->timeout = (pd ? pd->timeout_table[IP_VS_TCP_S_LISTEN]
+ : tcp_timeouts[IP_VS_TCP_S_LISTEN]);
spin_unlock(&cp->lock);
}
-
-static void ip_vs_tcp_init(struct ip_vs_protocol *pp)
+/* ---------------------------------------------
+ * timeouts is netns related now.
+ * ---------------------------------------------
+ */
+static void __ip_vs_tcp_init(struct net *net, struct ip_vs_proto_data *pd)
{
- IP_VS_INIT_HASH_TABLE(tcp_apps);
- pp->timeout_table = tcp_timeouts;
-}
+ struct netns_ipvs *ipvs = net_ipvs(net);
+ ip_vs_init_hash_table(ipvs->tcp_apps, TCP_APP_TAB_SIZE);
+ spin_lock_init(&ipvs->tcp_app_lock);
+ pd->timeout_table = ip_vs_create_timeout_table((int *)tcp_timeouts,
+ sizeof(tcp_timeouts));
+}
-static void ip_vs_tcp_exit(struct ip_vs_protocol *pp)
+static void __ip_vs_tcp_exit(struct net *net, struct ip_vs_proto_data *pd)
{
+ kfree(pd->timeout_table);
}
@@ -699,8 +705,10 @@ struct ip_vs_protocol ip_vs_protocol_tcp = {
.num_states = IP_VS_TCP_S_LAST,
.dont_defrag = 0,
.appcnt = ATOMIC_INIT(0),
- .init = ip_vs_tcp_init,
- .exit = ip_vs_tcp_exit,
+ .init = NULL,
+ .exit = NULL,
+ .init_netns = __ip_vs_tcp_init,
+ .exit_netns = __ip_vs_tcp_exit,
.register_app = tcp_register_app,
.unregister_app = tcp_unregister_app,
.conn_schedule = tcp_conn_schedule,
@@ -714,5 +722,4 @@ struct ip_vs_protocol ip_vs_protocol_tcp = {
.app_conn_bind = tcp_app_conn_bind,
.debug_packet = ip_vs_tcpudp_debug_packet,
.timeout_change = tcp_timeout_change,
- .set_state_timeout = tcp_set_state_timeout,
};
--
1.7.2.3
^ permalink raw reply related
* [PATCH 40/79] IPVS: netns preparation for proto_udp
From: kaber @ 2011-01-19 19:14 UTC (permalink / raw)
To: davem; +Cc: netfilter-devel, netdev
In-Reply-To: <1295464519-21763-1-git-send-email-kaber@trash.net>
From: Hans Schillstrom <hans.schillstrom@ericsson.com>
In this phase (one), all local vars will be moved to ipvs struct.
Remaining work, add param struct net *net to a couple of
functions that is common for all protos and use ip_vs_proto_data
*v3
Removed unused function set_state_timeout()
Signed-off-by: Hans Schillstrom <hans.schillstrom@ericsson.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
include/net/netns/ip_vs.h | 8 +++
net/netfilter/ipvs/ip_vs_proto.c | 3 +
net/netfilter/ipvs/ip_vs_proto_udp.c | 86 +++++++++++++++++-----------------
3 files changed, 54 insertions(+), 43 deletions(-)
diff --git a/include/net/netns/ip_vs.h b/include/net/netns/ip_vs.h
index ac77363..62b1448 100644
--- a/include/net/netns/ip_vs.h
+++ b/include/net/netns/ip_vs.h
@@ -39,6 +39,14 @@ struct netns_ipvs {
struct list_head tcp_apps[TCP_APP_TAB_SIZE];
spinlock_t tcp_app_lock;
#endif
+ /* ip_vs_proto_udp */
+#ifdef CONFIG_IP_VS_PROTO_UDP
+ #define UDP_APP_TAB_BITS 4
+ #define UDP_APP_TAB_SIZE (1 << UDP_APP_TAB_BITS)
+ #define UDP_APP_TAB_MASK (UDP_APP_TAB_SIZE - 1)
+ struct list_head udp_apps[UDP_APP_TAB_SIZE];
+ spinlock_t udp_app_lock;
+#endif
/* ip_vs_lblc */
int sysctl_lblc_expiration;
diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c
index 320c6a6..cdc4142 100644
--- a/net/netfilter/ipvs/ip_vs_proto.c
+++ b/net/netfilter/ipvs/ip_vs_proto.c
@@ -310,6 +310,9 @@ static int __net_init __ip_vs_protocol_init(struct net *net)
#ifdef CONFIG_IP_VS_PROTO_TCP
register_ip_vs_proto_netns(net, &ip_vs_protocol_tcp);
#endif
+#ifdef CONFIG_IP_VS_PROTO_UDP
+ register_ip_vs_proto_netns(net, &ip_vs_protocol_udp);
+#endif
return 0;
}
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c
index 5ab54f6..71a4721 100644
--- a/net/netfilter/ipvs/ip_vs_proto_udp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_udp.c
@@ -9,7 +9,8 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
- * Changes:
+ * Changes: Hans Schillstrom <hans.schillstrom@ericsson.com>
+ * Network name space (netns) aware.
*
*/
@@ -345,19 +346,6 @@ udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
return 1;
}
-
-/*
- * Note: the caller guarantees that only one of register_app,
- * unregister_app or app_conn_bind is called each time.
- */
-
-#define UDP_APP_TAB_BITS 4
-#define UDP_APP_TAB_SIZE (1 << UDP_APP_TAB_BITS)
-#define UDP_APP_TAB_MASK (UDP_APP_TAB_SIZE - 1)
-
-static struct list_head udp_apps[UDP_APP_TAB_SIZE];
-static DEFINE_SPINLOCK(udp_app_lock);
-
static inline __u16 udp_app_hashkey(__be16 port)
{
return (((__force u16)port >> UDP_APP_TAB_BITS) ^ (__force u16)port)
@@ -371,22 +359,24 @@ static int udp_register_app(struct ip_vs_app *inc)
__u16 hash;
__be16 port = inc->port;
int ret = 0;
+ struct netns_ipvs *ipvs = net_ipvs(&init_net);
+ struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, IPPROTO_UDP);
hash = udp_app_hashkey(port);
- spin_lock_bh(&udp_app_lock);
- list_for_each_entry(i, &udp_apps[hash], p_list) {
+ spin_lock_bh(&ipvs->udp_app_lock);
+ list_for_each_entry(i, &ipvs->udp_apps[hash], p_list) {
if (i->port == port) {
ret = -EEXIST;
goto out;
}
}
- list_add(&inc->p_list, &udp_apps[hash]);
- atomic_inc(&ip_vs_protocol_udp.appcnt);
+ list_add(&inc->p_list, &ipvs->udp_apps[hash]);
+ atomic_inc(&pd->pp->appcnt);
out:
- spin_unlock_bh(&udp_app_lock);
+ spin_unlock_bh(&ipvs->udp_app_lock);
return ret;
}
@@ -394,15 +384,19 @@ static int udp_register_app(struct ip_vs_app *inc)
static void
udp_unregister_app(struct ip_vs_app *inc)
{
- spin_lock_bh(&udp_app_lock);
- atomic_dec(&ip_vs_protocol_udp.appcnt);
+ struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, IPPROTO_UDP);
+ struct netns_ipvs *ipvs = net_ipvs(&init_net);
+
+ spin_lock_bh(&ipvs->udp_app_lock);
+ atomic_dec(&pd->pp->appcnt);
list_del(&inc->p_list);
- spin_unlock_bh(&udp_app_lock);
+ spin_unlock_bh(&ipvs->udp_app_lock);
}
static int udp_app_conn_bind(struct ip_vs_conn *cp)
{
+ struct netns_ipvs *ipvs = net_ipvs(&init_net);
int hash;
struct ip_vs_app *inc;
int result = 0;
@@ -414,12 +408,12 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp)
/* Lookup application incarnations and bind the right one */
hash = udp_app_hashkey(cp->vport);
- spin_lock(&udp_app_lock);
- list_for_each_entry(inc, &udp_apps[hash], p_list) {
+ spin_lock(&ipvs->udp_app_lock);
+ list_for_each_entry(inc, &ipvs->udp_apps[hash], p_list) {
if (inc->port == cp->vport) {
if (unlikely(!ip_vs_app_inc_get(inc)))
break;
- spin_unlock(&udp_app_lock);
+ spin_unlock(&ipvs->udp_app_lock);
IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->"
"%s:%u to app %s on port %u\n",
@@ -436,14 +430,14 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp)
goto out;
}
}
- spin_unlock(&udp_app_lock);
+ spin_unlock(&ipvs->udp_app_lock);
out:
return result;
}
-static int udp_timeouts[IP_VS_UDP_S_LAST+1] = {
+static const int udp_timeouts[IP_VS_UDP_S_LAST+1] = {
[IP_VS_UDP_S_NORMAL] = 5*60*HZ,
[IP_VS_UDP_S_LAST] = 2*HZ,
};
@@ -453,14 +447,6 @@ static const char *const udp_state_name_table[IP_VS_UDP_S_LAST+1] = {
[IP_VS_UDP_S_LAST] = "BUG!",
};
-
-static int
-udp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
-{
- return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_UDP_S_LAST,
- udp_state_name_table, sname, to);
-}
-
static const char * udp_state_name(int state)
{
if (state >= IP_VS_UDP_S_LAST)
@@ -473,18 +459,31 @@ udp_state_transition(struct ip_vs_conn *cp, int direction,
const struct sk_buff *skb,
struct ip_vs_protocol *pp)
{
- cp->timeout = pp->timeout_table[IP_VS_UDP_S_NORMAL];
+ struct ip_vs_proto_data *pd; /* Temp fix, pp will be replaced by pd */
+
+ pd = ip_vs_proto_data_get(&init_net, IPPROTO_UDP);
+ if (unlikely(!pd)) {
+ pr_err("UDP no ns data\n");
+ return 0;
+ }
+
+ cp->timeout = pd->timeout_table[IP_VS_UDP_S_NORMAL];
return 1;
}
-static void udp_init(struct ip_vs_protocol *pp)
+static void __udp_init(struct net *net, struct ip_vs_proto_data *pd)
{
- IP_VS_INIT_HASH_TABLE(udp_apps);
- pp->timeout_table = udp_timeouts;
+ struct netns_ipvs *ipvs = net_ipvs(net);
+
+ ip_vs_init_hash_table(ipvs->udp_apps, UDP_APP_TAB_SIZE);
+ spin_lock_init(&ipvs->udp_app_lock);
+ pd->timeout_table = ip_vs_create_timeout_table((int *)udp_timeouts,
+ sizeof(udp_timeouts));
}
-static void udp_exit(struct ip_vs_protocol *pp)
+static void __udp_exit(struct net *net, struct ip_vs_proto_data *pd)
{
+ kfree(pd->timeout_table);
}
@@ -493,8 +492,10 @@ struct ip_vs_protocol ip_vs_protocol_udp = {
.protocol = IPPROTO_UDP,
.num_states = IP_VS_UDP_S_LAST,
.dont_defrag = 0,
- .init = udp_init,
- .exit = udp_exit,
+ .init = NULL,
+ .exit = NULL,
+ .init_netns = __udp_init,
+ .exit_netns = __udp_exit,
.conn_schedule = udp_conn_schedule,
.conn_in_get = ip_vs_conn_in_get_proto,
.conn_out_get = ip_vs_conn_out_get_proto,
@@ -508,5 +509,4 @@ struct ip_vs_protocol ip_vs_protocol_udp = {
.app_conn_bind = udp_app_conn_bind,
.debug_packet = ip_vs_tcpudp_debug_packet,
.timeout_change = NULL,
- .set_state_timeout = udp_set_state_timeout,
};
--
1.7.2.3
^ permalink raw reply related
* [PATCH 42/79] IPVS: netns preparation for proto_ah_esp
From: kaber @ 2011-01-19 19:14 UTC (permalink / raw)
To: davem; +Cc: netfilter-devel, netdev
In-Reply-To: <1295464519-21763-1-git-send-email-kaber@trash.net>
From: Hans Schillstrom <hans.schillstrom@ericsson.com>
In this phase (one), all local vars will be moved to ipvs struct.
Remaining work, add param struct net *net to a couple of
functions that common for all protos.
Signed-off-by: Hans Schillstrom <hans.schillstrom@ericsson.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
net/netfilter/ipvs/ip_vs_proto.c | 6 ++++++
net/netfilter/ipvs/ip_vs_proto_ah_esp.c | 20 ++++----------------
2 files changed, 10 insertions(+), 16 deletions(-)
diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c
index 001b2f8..9f609d4 100644
--- a/net/netfilter/ipvs/ip_vs_proto.c
+++ b/net/netfilter/ipvs/ip_vs_proto.c
@@ -316,6 +316,12 @@ static int __net_init __ip_vs_protocol_init(struct net *net)
#ifdef CONFIG_IP_VS_PROTO_SCTP
register_ip_vs_proto_netns(net, &ip_vs_protocol_sctp);
#endif
+#ifdef CONFIG_IP_VS_PROTO_AH
+ register_ip_vs_proto_netns(net, &ip_vs_protocol_ah);
+#endif
+#ifdef CONFIG_IP_VS_PROTO_ESP
+ register_ip_vs_proto_netns(net, &ip_vs_protocol_esp);
+#endif
return 0;
}
diff --git a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
index 3a04611..b8b37fa 100644
--- a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
@@ -117,26 +117,14 @@ ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
return 0;
}
-static void ah_esp_init(struct ip_vs_protocol *pp)
-{
- /* nothing to do now */
-}
-
-
-static void ah_esp_exit(struct ip_vs_protocol *pp)
-{
- /* nothing to do now */
-}
-
-
#ifdef CONFIG_IP_VS_PROTO_AH
struct ip_vs_protocol ip_vs_protocol_ah = {
.name = "AH",
.protocol = IPPROTO_AH,
.num_states = 1,
.dont_defrag = 1,
- .init = ah_esp_init,
- .exit = ah_esp_exit,
+ .init = NULL,
+ .exit = NULL,
.conn_schedule = ah_esp_conn_schedule,
.conn_in_get = ah_esp_conn_in_get,
.conn_out_get = ah_esp_conn_out_get,
@@ -159,8 +147,8 @@ struct ip_vs_protocol ip_vs_protocol_esp = {
.protocol = IPPROTO_ESP,
.num_states = 1,
.dont_defrag = 1,
- .init = ah_esp_init,
- .exit = ah_esp_exit,
+ .init = NULL,
+ .exit = NULL,
.conn_schedule = ah_esp_conn_schedule,
.conn_in_get = ah_esp_conn_in_get,
.conn_out_get = ah_esp_conn_out_get,
--
1.7.2.3
^ permalink raw reply related
* [PATCH 43/79] IPVS: netns, use ip_vs_proto_data as param.
From: kaber @ 2011-01-19 19:14 UTC (permalink / raw)
To: davem; +Cc: netfilter-devel, netdev
In-Reply-To: <1295464519-21763-1-git-send-email-kaber@trash.net>
From: Hans Schillstrom <hans.schillstrom@ericsson.com>
ip_vs_protocol *pp is replaced by ip_vs_proto_data *pd in
function call in ip_vs_protocol struct i.e. :,
- timeout_change()
- state_transition()
ip_vs_protocol_timeout_change() got ipvs as param, due to above
and a upcoming patch - defence work
Most of this changes are triggered by Julians comment:
"tcp_timeout_change should work with the new struct ip_vs_proto_data
so that tcp_state_table will go to pd->state_table
and set_tcp_state will get pd instead of pp"
*v3
Mostly comments from Julian
The pp -> pd conversion should start from functions like
ip_vs_out() that use pp = ip_vs_proto_get(iph.protocol),
now they should use ip_vs_proto_data_get(net, iph.protocol).
conn_in_get() and conn_out_get() unused param *pp, removed.
*v4
ip_vs_protocol_timeout_change() walk the proto_data path.
Signed-off-by: Hans Schillstrom <hans.schillstrom@ericsson.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
include/net/ip_vs.h | 18 ++-----
net/netfilter/ipvs/ip_vs_conn.c | 2 -
net/netfilter/ipvs/ip_vs_core.c | 77 +++++++++++++++++++------------
net/netfilter/ipvs/ip_vs_ctl.c | 55 ++++++++++++++--------
net/netfilter/ipvs/ip_vs_proto.c | 21 ++++++---
net/netfilter/ipvs/ip_vs_proto_ah_esp.c | 10 ++--
net/netfilter/ipvs/ip_vs_proto_sctp.c | 16 +++----
net/netfilter/ipvs/ip_vs_proto_tcp.c | 27 +++++------
net/netfilter/ipvs/ip_vs_proto_udp.c | 11 ++---
net/netfilter/xt_ipvs.c | 2 +-
10 files changed, 129 insertions(+), 110 deletions(-)
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 3c45a00..464ea36 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -372,13 +372,12 @@ struct ip_vs_protocol {
void (*exit_netns)(struct net *net, struct ip_vs_proto_data *pd);
int (*conn_schedule)(int af, struct sk_buff *skb,
- struct ip_vs_protocol *pp,
+ struct ip_vs_proto_data *pd,
int *verdict, struct ip_vs_conn **cpp);
struct ip_vs_conn *
(*conn_in_get)(int af,
const struct sk_buff *skb,
- struct ip_vs_protocol *pp,
const struct ip_vs_iphdr *iph,
unsigned int proto_off,
int inverse);
@@ -386,7 +385,6 @@ struct ip_vs_protocol {
struct ip_vs_conn *
(*conn_out_get)(int af,
const struct sk_buff *skb,
- struct ip_vs_protocol *pp,
const struct ip_vs_iphdr *iph,
unsigned int proto_off,
int inverse);
@@ -404,7 +402,7 @@ struct ip_vs_protocol {
int (*state_transition)(struct ip_vs_conn *cp, int direction,
const struct sk_buff *skb,
- struct ip_vs_protocol *pp);
+ struct ip_vs_proto_data *pd);
int (*register_app)(struct ip_vs_app *inc);
@@ -417,9 +415,7 @@ struct ip_vs_protocol {
int offset,
const char *msg);
- void (*timeout_change)(struct ip_vs_protocol *pp, int flags);
-
- int (*set_state_timeout)(struct ip_vs_protocol *pp, char *sname, int to);
+ void (*timeout_change)(struct ip_vs_proto_data *pd, int flags);
};
/*
@@ -778,7 +774,6 @@ struct ip_vs_conn *ip_vs_conn_in_get(const struct ip_vs_conn_param *p);
struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p);
struct ip_vs_conn * ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb,
- struct ip_vs_protocol *pp,
const struct ip_vs_iphdr *iph,
unsigned int proto_off,
int inverse);
@@ -786,7 +781,6 @@ struct ip_vs_conn * ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb,
struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p);
struct ip_vs_conn * ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb,
- struct ip_vs_protocol *pp,
const struct ip_vs_iphdr *iph,
unsigned int proto_off,
int inverse);
@@ -917,7 +911,7 @@ static inline void ip_vs_pe_put(const struct ip_vs_pe *pe)
*/
extern int ip_vs_protocol_init(void);
extern void ip_vs_protocol_cleanup(void);
-extern void ip_vs_protocol_timeout_change(int flags);
+extern void ip_vs_protocol_timeout_change(struct netns_ipvs *ipvs, int flags);
extern int *ip_vs_create_timeout_table(int *table, int size);
extern int
ip_vs_set_state_timeout(int *table, int num, const char *const *names,
@@ -947,9 +941,9 @@ extern struct ip_vs_scheduler *ip_vs_scheduler_get(const char *sched_name);
extern void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler);
extern struct ip_vs_conn *
ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
- struct ip_vs_protocol *pp, int *ignored);
+ struct ip_vs_proto_data *pd, int *ignored);
extern int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
- struct ip_vs_protocol *pp);
+ struct ip_vs_proto_data *pd);
/*
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 7a0e79e..a7aba6a 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -329,7 +329,6 @@ ip_vs_conn_fill_param_proto(int af, const struct sk_buff *skb,
struct ip_vs_conn *
ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb,
- struct ip_vs_protocol *pp,
const struct ip_vs_iphdr *iph,
unsigned int proto_off, int inverse)
{
@@ -428,7 +427,6 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p)
struct ip_vs_conn *
ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb,
- struct ip_vs_protocol *pp,
const struct ip_vs_iphdr *iph,
unsigned int proto_off, int inverse)
{
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index d0616ea..9317aff 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -177,11 +177,11 @@ ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc)
static inline int
ip_vs_set_state(struct ip_vs_conn *cp, int direction,
const struct sk_buff *skb,
- struct ip_vs_protocol *pp)
+ struct ip_vs_proto_data *pd)
{
- if (unlikely(!pp->state_transition))
+ if (unlikely(!pd->pp->state_transition))
return 0;
- return pp->state_transition(cp, direction, skb, pp);
+ return pd->pp->state_transition(cp, direction, skb, pd);
}
static inline int
@@ -378,8 +378,9 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
*/
struct ip_vs_conn *
ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
- struct ip_vs_protocol *pp, int *ignored)
+ struct ip_vs_proto_data *pd, int *ignored)
{
+ struct ip_vs_protocol *pp = pd->pp;
struct ip_vs_conn *cp = NULL;
struct ip_vs_iphdr iph;
struct ip_vs_dest *dest;
@@ -408,7 +409,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
* Do not schedule replies from local real server.
*/
if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
- (cp = pp->conn_in_get(svc->af, skb, pp, &iph, iph.len, 1))) {
+ (cp = pp->conn_in_get(svc->af, skb, &iph, iph.len, 1))) {
IP_VS_DBG_PKT(12, svc->af, pp, skb, 0,
"Not scheduling reply for existing connection");
__ip_vs_conn_put(cp);
@@ -479,11 +480,12 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
* no destination is available for a new connection.
*/
int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
- struct ip_vs_protocol *pp)
+ struct ip_vs_proto_data *pd)
{
__be16 _ports[2], *pptr;
struct ip_vs_iphdr iph;
int unicast;
+
ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports);
@@ -530,10 +532,10 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
ip_vs_in_stats(cp, skb);
/* set state */
- cs = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pp);
+ cs = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd);
/* transmit the first SYN packet */
- ret = cp->packet_xmit(skb, cp, pp);
+ ret = cp->packet_xmit(skb, cp, pd->pp);
/* do not touch skb anymore */
atomic_inc(&cp->in_pkts);
@@ -840,7 +842,7 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related,
ip_vs_fill_iphdr(AF_INET, cih, &ciph);
/* The embedded headers contain source and dest in reverse order */
- cp = pp->conn_out_get(AF_INET, skb, pp, &ciph, offset, 1);
+ cp = pp->conn_out_get(AF_INET, skb, &ciph, offset, 1);
if (!cp)
return NF_ACCEPT;
@@ -917,7 +919,7 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related,
ip_vs_fill_iphdr(AF_INET6, cih, &ciph);
/* The embedded headers contain source and dest in reverse order */
- cp = pp->conn_out_get(AF_INET6, skb, pp, &ciph, offset, 1);
+ cp = pp->conn_out_get(AF_INET6, skb, &ciph, offset, 1);
if (!cp)
return NF_ACCEPT;
@@ -956,9 +958,11 @@ static inline int is_tcp_reset(const struct sk_buff *skb, int nh_len)
* Used for NAT and local client.
*/
static unsigned int
-handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
+handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
struct ip_vs_conn *cp, int ihl)
{
+ struct ip_vs_protocol *pp = pd->pp;
+
IP_VS_DBG_PKT(11, af, pp, skb, 0, "Outgoing packet");
if (!skb_make_writable(skb, ihl))
@@ -1007,7 +1011,7 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
IP_VS_DBG_PKT(10, af, pp, skb, 0, "After SNAT");
ip_vs_out_stats(cp, skb);
- ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp);
+ ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pd);
skb->ipvs_property = 1;
if (!(cp->flags & IP_VS_CONN_F_NFCT))
ip_vs_notrack(skb);
@@ -1034,6 +1038,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
struct net *net = NULL;
struct ip_vs_iphdr iph;
struct ip_vs_protocol *pp;
+ struct ip_vs_proto_data *pd;
struct ip_vs_conn *cp;
EnterFunction(11);
@@ -1079,9 +1084,10 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
}
- pp = ip_vs_proto_get(iph.protocol);
- if (unlikely(!pp))
+ pd = ip_vs_proto_data_get(net, iph.protocol);
+ if (unlikely(!pd))
return NF_ACCEPT;
+ pp = pd->pp;
/* reassemble IP fragments */
#ifdef CONFIG_IP_VS_IPV6
@@ -1107,10 +1113,10 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
/*
* Check if the packet belongs to an existing entry
*/
- cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 0);
+ cp = pp->conn_out_get(af, skb, &iph, iph.len, 0);
if (likely(cp))
- return handle_response(af, skb, pp, cp, iph.len);
+ return handle_response(af, skb, pd, cp, iph.len);
if (sysctl_ip_vs_nat_icmp_send &&
(pp->protocol == IPPROTO_TCP ||
pp->protocol == IPPROTO_UDP ||
@@ -1236,12 +1242,14 @@ ip_vs_local_reply6(unsigned int hooknum, struct sk_buff *skb,
static int
ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
{
+ struct net *net = NULL;
struct iphdr *iph;
struct icmphdr _icmph, *ic;
struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */
struct ip_vs_iphdr ciph;
struct ip_vs_conn *cp;
struct ip_vs_protocol *pp;
+ struct ip_vs_proto_data *pd;
unsigned int offset, ihl, verdict;
union nf_inet_addr snet;
@@ -1283,9 +1291,11 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
if (cih == NULL)
return NF_ACCEPT; /* The packet looks wrong, ignore */
- pp = ip_vs_proto_get(cih->protocol);
- if (!pp)
+ net = skb_net(skb);
+ pd = ip_vs_proto_data_get(net, cih->protocol);
+ if (!pd)
return NF_ACCEPT;
+ pp = pd->pp;
/* Is the embedded protocol header present? */
if (unlikely(cih->frag_off & htons(IP_OFFSET) &&
@@ -1299,10 +1309,10 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
ip_vs_fill_iphdr(AF_INET, cih, &ciph);
/* The embedded headers contain source and dest in reverse order */
- cp = pp->conn_in_get(AF_INET, skb, pp, &ciph, offset, 1);
+ cp = pp->conn_in_get(AF_INET, skb, &ciph, offset, 1);
if (!cp) {
/* The packet could also belong to a local client */
- cp = pp->conn_out_get(AF_INET, skb, pp, &ciph, offset, 1);
+ cp = pp->conn_out_get(AF_INET, skb, &ciph, offset, 1);
if (cp) {
snet.ip = iph->saddr;
return handle_response_icmp(AF_INET, skb, &snet,
@@ -1346,6 +1356,7 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
static int
ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
{
+ struct net *net = NULL;
struct ipv6hdr *iph;
struct icmp6hdr _icmph, *ic;
struct ipv6hdr _ciph, *cih; /* The ip header contained
@@ -1353,6 +1364,7 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
struct ip_vs_iphdr ciph;
struct ip_vs_conn *cp;
struct ip_vs_protocol *pp;
+ struct ip_vs_proto_data *pd;
unsigned int offset, verdict;
union nf_inet_addr snet;
struct rt6_info *rt;
@@ -1395,9 +1407,11 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
if (cih == NULL)
return NF_ACCEPT; /* The packet looks wrong, ignore */
- pp = ip_vs_proto_get(cih->nexthdr);
- if (!pp)
+ net = skb_net(skb);
+ pd = ip_vs_proto_data_get(net, cih->nexthdr);
+ if (!pd)
return NF_ACCEPT;
+ pp = pd->pp;
/* Is the embedded protocol header present? */
/* TODO: we don't support fragmentation at the moment anyways */
@@ -1411,10 +1425,10 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
ip_vs_fill_iphdr(AF_INET6, cih, &ciph);
/* The embedded headers contain source and dest in reverse order */
- cp = pp->conn_in_get(AF_INET6, skb, pp, &ciph, offset, 1);
+ cp = pp->conn_in_get(AF_INET6, skb, &ciph, offset, 1);
if (!cp) {
/* The packet could also belong to a local client */
- cp = pp->conn_out_get(AF_INET6, skb, pp, &ciph, offset, 1);
+ cp = pp->conn_out_get(AF_INET6, skb, &ciph, offset, 1);
if (cp) {
ipv6_addr_copy(&snet.in6, &iph->saddr);
return handle_response_icmp(AF_INET6, skb, &snet,
@@ -1457,8 +1471,10 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
static unsigned int
ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
{
+ struct net *net = NULL;
struct ip_vs_iphdr iph;
struct ip_vs_protocol *pp;
+ struct ip_vs_proto_data *pd;
struct ip_vs_conn *cp;
int ret, restart, pkts;
@@ -1514,20 +1530,21 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
}
+ net = skb_net(skb);
/* Protocol supported? */
- pp = ip_vs_proto_get(iph.protocol);
- if (unlikely(!pp))
+ pd = ip_vs_proto_data_get(net, iph.protocol);
+ if (unlikely(!pd))
return NF_ACCEPT;
-
+ pp = pd->pp;
/*
* Check if the packet belongs to an existing connection entry
*/
- cp = pp->conn_in_get(af, skb, pp, &iph, iph.len, 0);
+ cp = pp->conn_in_get(af, skb, &iph, iph.len, 0);
if (unlikely(!cp)) {
int v;
- if (!pp->conn_schedule(af, skb, pp, &v, &cp))
+ if (!pp->conn_schedule(af, skb, pd, &v, &cp))
return v;
}
@@ -1555,7 +1572,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
}
ip_vs_in_stats(cp, skb);
- restart = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pp);
+ restart = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd);
if (cp->packet_xmit)
ret = cp->packet_xmit(skb, cp, pp);
/* do not touch skb anymore */
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 2d7c96b..88474f1 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -38,6 +38,7 @@
#include <linux/mutex.h>
#include <net/net_namespace.h>
+#include <linux/nsproxy.h>
#include <net/ip.h>
#ifdef CONFIG_IP_VS_IPV6
#include <net/ipv6.h>
@@ -125,7 +126,7 @@ static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
* update_defense_level is called from keventd and from sysctl,
* so it needs to protect itself from softirqs
*/
-static void update_defense_level(void)
+static void update_defense_level(struct netns_ipvs *ipvs)
{
struct sysinfo i;
static int old_secure_tcp = 0;
@@ -239,7 +240,8 @@ static void update_defense_level(void)
}
old_secure_tcp = sysctl_ip_vs_secure_tcp;
if (to_change >= 0)
- ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);
+ ip_vs_protocol_timeout_change(ipvs,
+ sysctl_ip_vs_secure_tcp > 1);
spin_unlock(&ip_vs_securetcp_lock);
local_bh_enable();
@@ -255,7 +257,10 @@ static DECLARE_DELAYED_WORK(defense_work, defense_work_handler);
static void defense_work_handler(struct work_struct *work)
{
- update_defense_level();
+ struct net *net = &init_net;
+ struct netns_ipvs *ipvs = net_ipvs(net);
+
+ update_defense_level(ipvs);
if (atomic_read(&ip_vs_dropentry))
ip_vs_random_dropentry();
@@ -1502,6 +1507,7 @@ static int
proc_do_defense_mode(ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
+ struct net *net = current->nsproxy->net_ns;
int *valp = table->data;
int val = *valp;
int rc;
@@ -1512,7 +1518,7 @@ proc_do_defense_mode(ctl_table *table, int write,
/* Restore the correct value */
*valp = val;
} else {
- update_defense_level();
+ update_defense_level(net_ipvs(net));
}
}
return rc;
@@ -2033,8 +2039,10 @@ static const struct file_operations ip_vs_stats_fops = {
/*
* Set timeout values for tcp tcpfin udp in the timeout_table.
*/
-static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
+static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u)
{
+ struct ip_vs_proto_data *pd;
+
IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
u->tcp_timeout,
u->tcp_fin_timeout,
@@ -2042,19 +2050,22 @@ static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
#ifdef CONFIG_IP_VS_PROTO_TCP
if (u->tcp_timeout) {
- ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED]
+ pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
+ pd->timeout_table[IP_VS_TCP_S_ESTABLISHED]
= u->tcp_timeout * HZ;
}
if (u->tcp_fin_timeout) {
- ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT]
+ pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
+ pd->timeout_table[IP_VS_TCP_S_FIN_WAIT]
= u->tcp_fin_timeout * HZ;
}
#endif
#ifdef CONFIG_IP_VS_PROTO_UDP
if (u->udp_timeout) {
- ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL]
+ pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
+ pd->timeout_table[IP_VS_UDP_S_NORMAL]
= u->udp_timeout * HZ;
}
#endif
@@ -2158,7 +2169,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
goto out_unlock;
} else if (cmd == IP_VS_SO_SET_TIMEOUT) {
/* Set timeout values for (tcp tcpfin udp) */
- ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg);
+ ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg);
goto out_unlock;
} else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
@@ -2370,17 +2381,19 @@ __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
}
static inline void
-__ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
+__ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u)
{
+ struct ip_vs_proto_data *pd;
+
#ifdef CONFIG_IP_VS_PROTO_TCP
- u->tcp_timeout =
- ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
- u->tcp_fin_timeout =
- ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
+ pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
+ u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
+ u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
#endif
#ifdef CONFIG_IP_VS_PROTO_UDP
+ pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
u->udp_timeout =
- ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
+ pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
#endif
}
@@ -2521,7 +2534,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
{
struct ip_vs_timeout_user t;
- __ip_vs_get_timeouts(&t);
+ __ip_vs_get_timeouts(net, &t);
if (copy_to_user(user, &t, sizeof(t)) != 0)
ret = -EFAULT;
}
@@ -3092,11 +3105,11 @@ static int ip_vs_genl_del_daemon(struct nlattr **attrs)
return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
}
-static int ip_vs_genl_set_config(struct nlattr **attrs)
+static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
{
struct ip_vs_timeout_user t;
- __ip_vs_get_timeouts(&t);
+ __ip_vs_get_timeouts(net, &t);
if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
@@ -3108,7 +3121,7 @@ static int ip_vs_genl_set_config(struct nlattr **attrs)
if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
- return ip_vs_set_timeout(&t);
+ return ip_vs_set_timeout(net, &t);
}
static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
@@ -3129,7 +3142,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
ret = ip_vs_flush(net);
goto out;
} else if (cmd == IPVS_CMD_SET_CONFIG) {
- ret = ip_vs_genl_set_config(info->attrs);
+ ret = ip_vs_genl_set_config(net, info->attrs);
goto out;
} else if (cmd == IPVS_CMD_NEW_DAEMON ||
cmd == IPVS_CMD_DEL_DAEMON) {
@@ -3281,7 +3294,7 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
{
struct ip_vs_timeout_user t;
- __ip_vs_get_timeouts(&t);
+ __ip_vs_get_timeouts(net, &t);
#ifdef CONFIG_IP_VS_PROTO_TCP
NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c
index 9f609d4..6ac986c 100644
--- a/net/netfilter/ipvs/ip_vs_proto.c
+++ b/net/netfilter/ipvs/ip_vs_proto.c
@@ -152,9 +152,8 @@ EXPORT_SYMBOL(ip_vs_proto_get);
* get ip_vs_protocol object data by netns and proto
*/
struct ip_vs_proto_data *
-ip_vs_proto_data_get(struct net *net, unsigned short proto)
+__ipvs_proto_data_get(struct netns_ipvs *ipvs, unsigned short proto)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
struct ip_vs_proto_data *pd;
unsigned hash = IP_VS_PROTO_HASH(proto);
@@ -165,20 +164,28 @@ ip_vs_proto_data_get(struct net *net, unsigned short proto)
return NULL;
}
+
+struct ip_vs_proto_data *
+ip_vs_proto_data_get(struct net *net, unsigned short proto)
+{
+ struct netns_ipvs *ipvs = net_ipvs(net);
+
+ return __ipvs_proto_data_get(ipvs, proto);
+}
EXPORT_SYMBOL(ip_vs_proto_data_get);
/*
* Propagate event for state change to all protocols
*/
-void ip_vs_protocol_timeout_change(int flags)
+void ip_vs_protocol_timeout_change(struct netns_ipvs *ipvs, int flags)
{
- struct ip_vs_protocol *pp;
+ struct ip_vs_proto_data *pd;
int i;
for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) {
- for (pp = ip_vs_proto_table[i]; pp; pp = pp->next) {
- if (pp->timeout_change)
- pp->timeout_change(pp, flags);
+ for (pd = ipvs->proto_data_table[i]; pd; pd = pd->next) {
+ if (pd->pp->timeout_change)
+ pd->pp->timeout_change(pd, flags);
}
}
}
diff --git a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
index b8b37fa..28039cb 100644
--- a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
@@ -55,7 +55,7 @@ ah_esp_conn_fill_param_proto(int af, const struct ip_vs_iphdr *iph,
}
static struct ip_vs_conn *
-ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
+ah_esp_conn_in_get(int af, const struct sk_buff *skb,
const struct ip_vs_iphdr *iph, unsigned int proto_off,
int inverse)
{
@@ -72,7 +72,7 @@ ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for outin packet "
"%s%s %s->%s\n",
inverse ? "ICMP+" : "",
- pp->name,
+ ip_vs_proto_get(iph->protocol)->name,
IP_VS_DBG_ADDR(af, &iph->saddr),
IP_VS_DBG_ADDR(af, &iph->daddr));
}
@@ -83,7 +83,6 @@ ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
static struct ip_vs_conn *
ah_esp_conn_out_get(int af, const struct sk_buff *skb,
- struct ip_vs_protocol *pp,
const struct ip_vs_iphdr *iph,
unsigned int proto_off,
int inverse)
@@ -97,7 +96,7 @@ ah_esp_conn_out_get(int af, const struct sk_buff *skb,
IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for inout packet "
"%s%s %s->%s\n",
inverse ? "ICMP+" : "",
- pp->name,
+ ip_vs_proto_get(iph->protocol)->name,
IP_VS_DBG_ADDR(af, &iph->saddr),
IP_VS_DBG_ADDR(af, &iph->daddr));
}
@@ -107,7 +106,7 @@ ah_esp_conn_out_get(int af, const struct sk_buff *skb,
static int
-ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
+ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
int *verdict, struct ip_vs_conn **cpp)
{
/*
@@ -137,7 +136,6 @@ struct ip_vs_protocol ip_vs_protocol_ah = {
.app_conn_bind = NULL,
.debug_packet = ip_vs_tcpudp_debug_packet,
.timeout_change = NULL, /* ISAKMP */
- .set_state_timeout = NULL,
};
#endif
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index f826dd1..19bc379 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -9,7 +9,7 @@
#include <net/ip_vs.h>
static int
-sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
+sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
int *verdict, struct ip_vs_conn **cpp)
{
struct net *net;
@@ -47,10 +47,10 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
* Let the virtual server select a real server for the
* incoming connection, and create a connection entry.
*/
- *cpp = ip_vs_schedule(svc, skb, pp, &ignored);
+ *cpp = ip_vs_schedule(svc, skb, pd, &ignored);
if (!*cpp && ignored <= 0) {
if (!ignored)
- *verdict = ip_vs_leave(svc, skb, pp);
+ *verdict = ip_vs_leave(svc, skb, pd);
else {
ip_vs_service_put(svc);
*verdict = NF_DROP;
@@ -907,14 +907,13 @@ static const char *sctp_state_name(int state)
}
static inline int
-set_sctp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
+set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
int direction, const struct sk_buff *skb)
{
sctp_chunkhdr_t _sctpch, *sch;
unsigned char chunk_type;
int event, next_state;
int ihl;
- struct ip_vs_proto_data *pd;
#ifdef CONFIG_IP_VS_IPV6
ihl = cp->af == AF_INET ? ip_hdrlen(skb) : sizeof(struct ipv6hdr);
@@ -966,7 +965,7 @@ set_sctp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
IP_VS_DBG_BUF(8, "%s %s %s:%d->"
"%s:%d state: %s->%s conn->refcnt:%d\n",
- pp->name,
+ pd->pp->name,
((direction == IP_VS_DIR_OUTPUT) ?
"output " : "input "),
IP_VS_DBG_ADDR(cp->af, &cp->daddr),
@@ -990,7 +989,6 @@ set_sctp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
}
}
}
- pd = ip_vs_proto_data_get(&init_net, pp->protocol); /* tmp fix */
if (likely(pd))
cp->timeout = pd->timeout_table[cp->state = next_state];
else /* What to do ? */
@@ -1001,12 +999,12 @@ set_sctp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
static int
sctp_state_transition(struct ip_vs_conn *cp, int direction,
- const struct sk_buff *skb, struct ip_vs_protocol *pp)
+ const struct sk_buff *skb, struct ip_vs_proto_data *pd)
{
int ret = 0;
spin_lock(&cp->lock);
- ret = set_sctp_state(pp, cp, direction, skb);
+ ret = set_sctp_state(pd, cp, direction, skb);
spin_unlock(&cp->lock);
return ret;
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index 9d9df3d..d7c2455 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -32,7 +32,7 @@
#include <net/ip_vs.h>
static int
-tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
+tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
int *verdict, struct ip_vs_conn **cpp)
{
struct net *net;
@@ -68,10 +68,10 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
* Let the virtual server select a real server for the
* incoming connection, and create a connection entry.
*/
- *cpp = ip_vs_schedule(svc, skb, pp, &ignored);
+ *cpp = ip_vs_schedule(svc, skb, pd, &ignored);
if (!*cpp && ignored <= 0) {
if (!ignored)
- *verdict = ip_vs_leave(svc, skb, pp);
+ *verdict = ip_vs_leave(svc, skb, pd);
else {
ip_vs_service_put(svc);
*verdict = NF_DROP;
@@ -448,10 +448,7 @@ static struct tcp_states_t tcp_states_dos [] = {
/*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
};
-static struct tcp_states_t *tcp_state_table = tcp_states;
-
-
-static void tcp_timeout_change(struct ip_vs_protocol *pp, int flags)
+static void tcp_timeout_change(struct ip_vs_proto_data *pd, int flags)
{
int on = (flags & 1); /* secure_tcp */
@@ -461,7 +458,7 @@ static void tcp_timeout_change(struct ip_vs_protocol *pp, int flags)
** for most if not for all of the applications. Something
** like "capabilities" (flags) for each object.
*/
- tcp_state_table = (on? tcp_states_dos : tcp_states);
+ pd->tcp_state_table = (on ? tcp_states_dos : tcp_states);
}
static inline int tcp_state_idx(struct tcphdr *th)
@@ -478,13 +475,12 @@ static inline int tcp_state_idx(struct tcphdr *th)
}
static inline void
-set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
+set_tcp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
int direction, struct tcphdr *th)
{
int state_idx;
int new_state = IP_VS_TCP_S_CLOSE;
int state_off = tcp_state_off[direction];
- struct ip_vs_proto_data *pd; /* Temp fix */
/*
* Update state offset to INPUT_ONLY if necessary
@@ -502,7 +498,8 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
goto tcp_state_out;
}
- new_state = tcp_state_table[state_off+state_idx].next_state[cp->state];
+ new_state =
+ pd->tcp_state_table[state_off+state_idx].next_state[cp->state];
tcp_state_out:
if (new_state != cp->state) {
@@ -510,7 +507,7 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
IP_VS_DBG_BUF(8, "%s %s [%c%c%c%c] %s:%d->"
"%s:%d state: %s->%s conn->refcnt:%d\n",
- pp->name,
+ pd->pp->name,
((state_off == TCP_DIR_OUTPUT) ?
"output " : "input "),
th->syn ? 'S' : '.',
@@ -540,7 +537,6 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
}
}
- pd = ip_vs_proto_data_get(&init_net, pp->protocol);
if (likely(pd))
cp->timeout = pd->timeout_table[cp->state = new_state];
else /* What to do ? */
@@ -553,7 +549,7 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
static int
tcp_state_transition(struct ip_vs_conn *cp, int direction,
const struct sk_buff *skb,
- struct ip_vs_protocol *pp)
+ struct ip_vs_proto_data *pd)
{
struct tcphdr _tcph, *th;
@@ -568,7 +564,7 @@ tcp_state_transition(struct ip_vs_conn *cp, int direction,
return 0;
spin_lock(&cp->lock);
- set_tcp_state(pp, cp, direction, th);
+ set_tcp_state(pd, cp, direction, th);
spin_unlock(&cp->lock);
return 1;
@@ -691,6 +687,7 @@ static void __ip_vs_tcp_init(struct net *net, struct ip_vs_proto_data *pd)
spin_lock_init(&ipvs->tcp_app_lock);
pd->timeout_table = ip_vs_create_timeout_table((int *)tcp_timeouts,
sizeof(tcp_timeouts));
+ pd->tcp_state_table = tcp_states;
}
static void __ip_vs_tcp_exit(struct net *net, struct ip_vs_proto_data *pd)
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c
index 71a4721..aa85df2 100644
--- a/net/netfilter/ipvs/ip_vs_proto_udp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_udp.c
@@ -29,7 +29,7 @@
#include <net/ip6_checksum.h>
static int
-udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
+udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
int *verdict, struct ip_vs_conn **cpp)
{
struct net *net;
@@ -64,10 +64,10 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
* Let the virtual server select a real server for the
* incoming connection, and create a connection entry.
*/
- *cpp = ip_vs_schedule(svc, skb, pp, &ignored);
+ *cpp = ip_vs_schedule(svc, skb, pd, &ignored);
if (!*cpp && ignored <= 0) {
if (!ignored)
- *verdict = ip_vs_leave(svc, skb, pp);
+ *verdict = ip_vs_leave(svc, skb, pd);
else {
ip_vs_service_put(svc);
*verdict = NF_DROP;
@@ -457,11 +457,8 @@ static const char * udp_state_name(int state)
static int
udp_state_transition(struct ip_vs_conn *cp, int direction,
const struct sk_buff *skb,
- struct ip_vs_protocol *pp)
+ struct ip_vs_proto_data *pd)
{
- struct ip_vs_proto_data *pd; /* Temp fix, pp will be replaced by pd */
-
- pd = ip_vs_proto_data_get(&init_net, IPPROTO_UDP);
if (unlikely(!pd)) {
pr_err("UDP no ns data\n");
return 0;
diff --git a/net/netfilter/xt_ipvs.c b/net/netfilter/xt_ipvs.c
index 9127a3d..bb10b07 100644
--- a/net/netfilter/xt_ipvs.c
+++ b/net/netfilter/xt_ipvs.c
@@ -85,7 +85,7 @@ ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par)
/*
* Check if the packet belongs to an existing entry
*/
- cp = pp->conn_out_get(family, skb, pp, &iph, iph.len, 1 /* inverse */);
+ cp = pp->conn_out_get(family, skb, &iph, iph.len, 1 /* inverse */);
if (unlikely(cp == NULL)) {
match = false;
goto out;
--
1.7.2.3
^ permalink raw reply related
* [PATCH 44/79] IPVS: netns, common protocol changes and use of appcnt.
From: kaber @ 2011-01-19 19:14 UTC (permalink / raw)
To: davem; +Cc: netfilter-devel, netdev
In-Reply-To: <1295464519-21763-1-git-send-email-kaber@trash.net>
From: Hans Schillstrom <hans.schillstrom@ericsson.com>
appcnt and timeout_table moved from struct ip_vs_protocol to
ip_vs proto_data.
struct net *net added as first param to
- register_app()
- unregister_app()
- app_conn_bind()
- ip_vs_conn_new()
[horms@verge.net.au: removed cosmetic-change-only hunk]
Signed-off-by: Hans Schillstrom <hans.schillstrom@ericsson.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
include/net/ip_vs.h | 2 -
net/netfilter/ipvs/ip_vs_conn.c | 6 ++--
net/netfilter/ipvs/ip_vs_proto_sctp.c | 4 +-
net/netfilter/ipvs/ip_vs_proto_tcp.c | 5 +--
net/netfilter/ipvs/ip_vs_proto_udp.c | 4 +-
net/netfilter/ipvs/ip_vs_sync.c | 55 ++++++++++++++++++---------------
6 files changed, 39 insertions(+), 37 deletions(-)
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 464ea36..cc6ae62 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -360,8 +360,6 @@ struct ip_vs_protocol {
u16 protocol;
u16 num_states;
int dont_defrag;
- atomic_t appcnt; /* counter of proto app incs */
- int *timeout_table; /* protocol timeout table */
void (*init)(struct ip_vs_protocol *pp);
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index a7aba6a..b2024c9 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -804,7 +804,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
struct ip_vs_dest *dest, __u32 fwmark)
{
struct ip_vs_conn *cp;
- struct ip_vs_protocol *pp = ip_vs_proto_get(p->protocol);
+ struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, p->protocol);
cp = kmem_cache_zalloc(ip_vs_conn_cachep, GFP_ATOMIC);
if (cp == NULL) {
@@ -863,8 +863,8 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
#endif
ip_vs_bind_xmit(cp);
- if (unlikely(pp && atomic_read(&pp->appcnt)))
- ip_vs_bind_app(cp, pp);
+ if (unlikely(pd && atomic_read(&pd->appcnt)))
+ ip_vs_bind_app(cp, pd->pp);
/*
* Allow conntrack to be preserved. By default, conntrack
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index 19bc379..0f14f79 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -1035,7 +1035,7 @@ static int sctp_register_app(struct ip_vs_app *inc)
}
}
list_add(&inc->p_list, &ipvs->sctp_apps[hash]);
- atomic_inc(&pd->pp->appcnt);
+ atomic_inc(&pd->appcnt);
out:
spin_unlock_bh(&ipvs->sctp_app_lock);
@@ -1048,7 +1048,7 @@ static void sctp_unregister_app(struct ip_vs_app *inc)
struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, IPPROTO_SCTP);
spin_lock_bh(&ipvs->sctp_app_lock);
- atomic_dec(&pd->pp->appcnt);
+ atomic_dec(&pd->appcnt);
list_del(&inc->p_list);
spin_unlock_bh(&ipvs->sctp_app_lock);
}
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index d7c2455..290b380 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -596,7 +596,7 @@ static int tcp_register_app(struct ip_vs_app *inc)
}
}
list_add(&inc->p_list, &ipvs->tcp_apps[hash]);
- atomic_inc(&pd->pp->appcnt);
+ atomic_inc(&pd->appcnt);
out:
spin_unlock_bh(&ipvs->tcp_app_lock);
@@ -611,7 +611,7 @@ tcp_unregister_app(struct ip_vs_app *inc)
struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, IPPROTO_TCP);
spin_lock_bh(&ipvs->tcp_app_lock);
- atomic_dec(&pd->pp->appcnt);
+ atomic_dec(&pd->appcnt);
list_del(&inc->p_list);
spin_unlock_bh(&ipvs->tcp_app_lock);
}
@@ -701,7 +701,6 @@ struct ip_vs_protocol ip_vs_protocol_tcp = {
.protocol = IPPROTO_TCP,
.num_states = IP_VS_TCP_S_LAST,
.dont_defrag = 0,
- .appcnt = ATOMIC_INIT(0),
.init = NULL,
.exit = NULL,
.init_netns = __ip_vs_tcp_init,
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c
index aa85df2..3719837 100644
--- a/net/netfilter/ipvs/ip_vs_proto_udp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_udp.c
@@ -373,7 +373,7 @@ static int udp_register_app(struct ip_vs_app *inc)
}
}
list_add(&inc->p_list, &ipvs->udp_apps[hash]);
- atomic_inc(&pd->pp->appcnt);
+ atomic_inc(&pd->appcnt);
out:
spin_unlock_bh(&ipvs->udp_app_lock);
@@ -388,7 +388,7 @@ udp_unregister_app(struct ip_vs_app *inc)
struct netns_ipvs *ipvs = net_ipvs(&init_net);
spin_lock_bh(&ipvs->udp_app_lock);
- atomic_dec(&pd->pp->appcnt);
+ atomic_dec(&pd->appcnt);
list_del(&inc->p_list);
spin_unlock_bh(&ipvs->udp_app_lock);
}
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 662aa2c..6831e8f 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -725,17 +725,16 @@ ip_vs_conn_fill_param_sync(int af, union ip_vs_sync_conn *sc,
* Param: ...
* timeout is in sec.
*/
-static void ip_vs_proc_conn(struct ip_vs_conn_param *param, unsigned flags,
- unsigned state, unsigned protocol, unsigned type,
+static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
+ unsigned int flags, unsigned int state,
+ unsigned int protocol, unsigned int type,
const union nf_inet_addr *daddr, __be16 dport,
unsigned long timeout, __u32 fwmark,
- struct ip_vs_sync_conn_options *opt,
- struct ip_vs_protocol *pp)
+ struct ip_vs_sync_conn_options *opt)
{
struct ip_vs_dest *dest;
struct ip_vs_conn *cp;
-
if (!(flags & IP_VS_CONN_F_TEMPLATE))
cp = ip_vs_conn_in_get(param);
else
@@ -821,17 +820,23 @@ static void ip_vs_proc_conn(struct ip_vs_conn_param *param, unsigned flags,
if (timeout > MAX_SCHEDULE_TIMEOUT / HZ)
timeout = MAX_SCHEDULE_TIMEOUT / HZ;
cp->timeout = timeout*HZ;
- } else if (!(flags & IP_VS_CONN_F_TEMPLATE) && pp->timeout_table)
- cp->timeout = pp->timeout_table[state];
- else
- cp->timeout = (3*60*HZ);
+ } else {
+ struct ip_vs_proto_data *pd;
+
+ pd = ip_vs_proto_data_get(net, protocol);
+ if (!(flags & IP_VS_CONN_F_TEMPLATE) && pd && pd->timeout_table)
+ cp->timeout = pd->timeout_table[state];
+ else
+ cp->timeout = (3*60*HZ);
+ }
ip_vs_conn_put(cp);
}
/*
* Process received multicast message for Version 0
*/
-static void ip_vs_process_message_v0(const char *buffer, const size_t buflen)
+static void ip_vs_process_message_v0(struct net *net, const char *buffer,
+ const size_t buflen)
{
struct ip_vs_sync_mesg_v0 *m = (struct ip_vs_sync_mesg_v0 *)buffer;
struct ip_vs_sync_conn_v0 *s;
@@ -879,7 +884,6 @@ static void ip_vs_process_message_v0(const char *buffer, const size_t buflen)
}
} else {
/* protocol in templates is not used for state/timeout */
- pp = NULL;
if (state > 0) {
IP_VS_DBG(2, "BACKUP v0, Invalid template state %u\n",
state);
@@ -894,9 +898,9 @@ static void ip_vs_process_message_v0(const char *buffer, const size_t buflen)
s->vport, ¶m);
/* Send timeout as Zero */
- ip_vs_proc_conn(¶m, flags, state, s->protocol, AF_INET,
+ ip_vs_proc_conn(net, ¶m, flags, state, s->protocol, AF_INET,
(union nf_inet_addr *)&s->daddr, s->dport,
- 0, 0, opt, pp);
+ 0, 0, opt);
}
}
@@ -945,7 +949,7 @@ static int ip_vs_proc_str(__u8 *p, unsigned int plen, unsigned int *data_len,
/*
* Process a Version 1 sync. connection
*/
-static inline int ip_vs_proc_sync_conn(__u8 *p, __u8 *msg_end)
+static inline int ip_vs_proc_sync_conn(struct net *net, __u8 *p, __u8 *msg_end)
{
struct ip_vs_sync_conn_options opt;
union ip_vs_sync_conn *s;
@@ -1043,7 +1047,6 @@ static inline int ip_vs_proc_sync_conn(__u8 *p, __u8 *msg_end)
}
} else {
/* protocol in templates is not used for state/timeout */
- pp = NULL;
if (state > 0) {
IP_VS_DBG(3, "BACKUP, Invalid template state %u\n",
state);
@@ -1058,18 +1061,18 @@ static inline int ip_vs_proc_sync_conn(__u8 *p, __u8 *msg_end)
}
/* If only IPv4, just silent skip IPv6 */
if (af == AF_INET)
- ip_vs_proc_conn(¶m, flags, state, s->v4.protocol, af,
+ ip_vs_proc_conn(net, ¶m, flags, state, s->v4.protocol, af,
(union nf_inet_addr *)&s->v4.daddr, s->v4.dport,
ntohl(s->v4.timeout), ntohl(s->v4.fwmark),
- (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL),
- pp);
+ (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL)
+ );
#ifdef CONFIG_IP_VS_IPV6
else
- ip_vs_proc_conn(¶m, flags, state, s->v6.protocol, af,
+ ip_vs_proc_conn(net, ¶m, flags, state, s->v6.protocol, af,
(union nf_inet_addr *)&s->v6.daddr, s->v6.dport,
ntohl(s->v6.timeout), ntohl(s->v6.fwmark),
- (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL),
- pp);
+ (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL)
+ );
#endif
return 0;
/* Error exit */
@@ -1083,7 +1086,8 @@ out:
* ip_vs_conn entries.
* Handles Version 0 & 1
*/
-static void ip_vs_process_message(__u8 *buffer, const size_t buflen)
+static void ip_vs_process_message(struct net *net, __u8 *buffer,
+ const size_t buflen)
{
struct ip_vs_sync_mesg *m2 = (struct ip_vs_sync_mesg *)buffer;
__u8 *p, *msg_end;
@@ -1136,7 +1140,8 @@ static void ip_vs_process_message(__u8 *buffer, const size_t buflen)
return;
}
/* Process a single sync_conn */
- if ((retc=ip_vs_proc_sync_conn(p, msg_end)) < 0) {
+ retc = ip_vs_proc_sync_conn(net, p, msg_end);
+ if (retc < 0) {
IP_VS_ERR_RL("BACKUP, Dropping buffer, Err: %d in decoding\n",
retc);
return;
@@ -1146,7 +1151,7 @@ static void ip_vs_process_message(__u8 *buffer, const size_t buflen)
}
} else {
/* Old type of message */
- ip_vs_process_message_v0(buffer, buflen);
+ ip_vs_process_message_v0(net, buffer, buflen);
return;
}
}
@@ -1500,7 +1505,7 @@ static int sync_thread_backup(void *data)
/* disable bottom half, because it accesses the data
shared by softirq while getting/creating conns */
local_bh_disable();
- ip_vs_process_message(tinfo->buf, len);
+ ip_vs_process_message(&init_net, tinfo->buf, len);
local_bh_enable();
}
}
--
1.7.2.3
^ permalink raw reply related
* [PATCH 45/79] IPVS: netns awareness to ip_vs_app
From: kaber @ 2011-01-19 19:14 UTC (permalink / raw)
To: davem; +Cc: netfilter-devel, netdev
In-Reply-To: <1295464519-21763-1-git-send-email-kaber@trash.net>
From: Hans Schillstrom <hans.schillstrom@ericsson.com>
All variables moved to struct ipvs,
most external changes fixed (i.e. init_net removed)
in ip_vs_protocol param struct net *net added to:
- register_app()
- unregister_app()
This affected almost all proto_xxx.c files
Signed-off-by: Hans Schillstrom <hans.schillstrom@ericsson.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
include/net/ip_vs.h | 12 +++---
include/net/netns/ip_vs.h | 5 ++
net/netfilter/ipvs/ip_vs_app.c | 73 +++++++++++++++++++-------------
net/netfilter/ipvs/ip_vs_ftp.c | 8 ++--
net/netfilter/ipvs/ip_vs_proto_sctp.c | 12 +++---
net/netfilter/ipvs/ip_vs_proto_tcp.c | 12 +++---
net/netfilter/ipvs/ip_vs_proto_udp.c | 12 +++---
7 files changed, 76 insertions(+), 58 deletions(-)
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index cc6ae62..0cdd8ce 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -402,9 +402,9 @@ struct ip_vs_protocol {
const struct sk_buff *skb,
struct ip_vs_proto_data *pd);
- int (*register_app)(struct ip_vs_app *inc);
+ int (*register_app)(struct net *net, struct ip_vs_app *inc);
- void (*unregister_app)(struct ip_vs_app *inc);
+ void (*unregister_app)(struct net *net, struct ip_vs_app *inc);
int (*app_conn_bind)(struct ip_vs_conn *cp);
@@ -871,12 +871,12 @@ ip_vs_control_add(struct ip_vs_conn *cp, struct ip_vs_conn *ctl_cp)
* (from ip_vs_app.c)
*/
#define IP_VS_APP_MAX_PORTS 8
-extern int register_ip_vs_app(struct ip_vs_app *app);
-extern void unregister_ip_vs_app(struct ip_vs_app *app);
+extern int register_ip_vs_app(struct net *net, struct ip_vs_app *app);
+extern void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app);
extern int ip_vs_bind_app(struct ip_vs_conn *cp, struct ip_vs_protocol *pp);
extern void ip_vs_unbind_app(struct ip_vs_conn *cp);
-extern int
-register_ip_vs_app_inc(struct ip_vs_app *app, __u16 proto, __u16 port);
+extern int register_ip_vs_app_inc(struct net *net, struct ip_vs_app *app,
+ __u16 proto, __u16 port);
extern int ip_vs_app_inc_get(struct ip_vs_app *inc);
extern void ip_vs_app_inc_put(struct ip_vs_app *inc);
diff --git a/include/net/netns/ip_vs.h b/include/net/netns/ip_vs.h
index 58bd3fd..03f7fe1 100644
--- a/include/net/netns/ip_vs.h
+++ b/include/net/netns/ip_vs.h
@@ -28,6 +28,11 @@ struct netns_ipvs {
#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
struct list_head rs_table[IP_VS_RTAB_SIZE];
+ /* ip_vs_app */
+ struct list_head app_list;
+ struct mutex app_mutex;
+ struct lock_class_key app_key; /* mutex debuging */
+
/* ip_vs_proto */
#define IP_VS_PROTO_TAB_SIZE 32 /* must be power of 2 */
struct ip_vs_proto_data *proto_data_table[IP_VS_PROTO_TAB_SIZE];
diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c
index 40b09cc..286f465 100644
--- a/net/netfilter/ipvs/ip_vs_app.c
+++ b/net/netfilter/ipvs/ip_vs_app.c
@@ -43,11 +43,6 @@ EXPORT_SYMBOL(register_ip_vs_app);
EXPORT_SYMBOL(unregister_ip_vs_app);
EXPORT_SYMBOL(register_ip_vs_app_inc);
-/* ipvs application list head */
-static LIST_HEAD(ip_vs_app_list);
-static DEFINE_MUTEX(__ip_vs_app_mutex);
-
-
/*
* Get an ip_vs_app object
*/
@@ -67,7 +62,8 @@ static inline void ip_vs_app_put(struct ip_vs_app *app)
* Allocate/initialize app incarnation and register it in proto apps.
*/
static int
-ip_vs_app_inc_new(struct ip_vs_app *app, __u16 proto, __u16 port)
+ip_vs_app_inc_new(struct net *net, struct ip_vs_app *app, __u16 proto,
+ __u16 port)
{
struct ip_vs_protocol *pp;
struct ip_vs_app *inc;
@@ -98,7 +94,7 @@ ip_vs_app_inc_new(struct ip_vs_app *app, __u16 proto, __u16 port)
}
}
- ret = pp->register_app(inc);
+ ret = pp->register_app(net, inc);
if (ret)
goto out;
@@ -119,7 +115,7 @@ ip_vs_app_inc_new(struct ip_vs_app *app, __u16 proto, __u16 port)
* Release app incarnation
*/
static void
-ip_vs_app_inc_release(struct ip_vs_app *inc)
+ip_vs_app_inc_release(struct net *net, struct ip_vs_app *inc)
{
struct ip_vs_protocol *pp;
@@ -127,7 +123,7 @@ ip_vs_app_inc_release(struct ip_vs_app *inc)
return;
if (pp->unregister_app)
- pp->unregister_app(inc);
+ pp->unregister_app(net, inc);
IP_VS_DBG(9, "%s App %s:%u unregistered\n",
pp->name, inc->name, ntohs(inc->port));
@@ -168,15 +164,17 @@ void ip_vs_app_inc_put(struct ip_vs_app *inc)
* Register an application incarnation in protocol applications
*/
int
-register_ip_vs_app_inc(struct ip_vs_app *app, __u16 proto, __u16 port)
+register_ip_vs_app_inc(struct net *net, struct ip_vs_app *app, __u16 proto,
+ __u16 port)
{
+ struct netns_ipvs *ipvs = net_ipvs(net);
int result;
- mutex_lock(&__ip_vs_app_mutex);
+ mutex_lock(&ipvs->app_mutex);
- result = ip_vs_app_inc_new(app, proto, port);
+ result = ip_vs_app_inc_new(net, app, proto, port);
- mutex_unlock(&__ip_vs_app_mutex);
+ mutex_unlock(&ipvs->app_mutex);
return result;
}
@@ -185,16 +183,17 @@ register_ip_vs_app_inc(struct ip_vs_app *app, __u16 proto, __u16 port)
/*
* ip_vs_app registration routine
*/
-int register_ip_vs_app(struct ip_vs_app *app)
+int register_ip_vs_app(struct net *net, struct ip_vs_app *app)
{
+ struct netns_ipvs *ipvs = net_ipvs(net);
/* increase the module use count */
ip_vs_use_count_inc();
- mutex_lock(&__ip_vs_app_mutex);
+ mutex_lock(&ipvs->app_mutex);
- list_add(&app->a_list, &ip_vs_app_list);
+ list_add(&app->a_list, &ipvs->app_list);
- mutex_unlock(&__ip_vs_app_mutex);
+ mutex_unlock(&ipvs->app_mutex);
return 0;
}
@@ -204,19 +203,20 @@ int register_ip_vs_app(struct ip_vs_app *app)
* ip_vs_app unregistration routine
* We are sure there are no app incarnations attached to services
*/
-void unregister_ip_vs_app(struct ip_vs_app *app)
+void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app)
{
+ struct netns_ipvs *ipvs = net_ipvs(net);
struct ip_vs_app *inc, *nxt;
- mutex_lock(&__ip_vs_app_mutex);
+ mutex_lock(&ipvs->app_mutex);
list_for_each_entry_safe(inc, nxt, &app->incs_list, a_list) {
- ip_vs_app_inc_release(inc);
+ ip_vs_app_inc_release(net, inc);
}
list_del(&app->a_list);
- mutex_unlock(&__ip_vs_app_mutex);
+ mutex_unlock(&ipvs->app_mutex);
/* decrease the module use count */
ip_vs_use_count_dec();
@@ -226,7 +226,8 @@ void unregister_ip_vs_app(struct ip_vs_app *app)
/*
* Bind ip_vs_conn to its ip_vs_app (called by cp constructor)
*/
-int ip_vs_bind_app(struct ip_vs_conn *cp, struct ip_vs_protocol *pp)
+int ip_vs_bind_app(struct ip_vs_conn *cp,
+ struct ip_vs_protocol *pp)
{
return pp->app_conn_bind(cp);
}
@@ -481,11 +482,11 @@ int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb)
* /proc/net/ip_vs_app entry function
*/
-static struct ip_vs_app *ip_vs_app_idx(loff_t pos)
+static struct ip_vs_app *ip_vs_app_idx(struct netns_ipvs *ipvs, loff_t pos)
{
struct ip_vs_app *app, *inc;
- list_for_each_entry(app, &ip_vs_app_list, a_list) {
+ list_for_each_entry(app, &ipvs->app_list, a_list) {
list_for_each_entry(inc, &app->incs_list, a_list) {
if (pos-- == 0)
return inc;
@@ -497,19 +498,24 @@ static struct ip_vs_app *ip_vs_app_idx(loff_t pos)
static void *ip_vs_app_seq_start(struct seq_file *seq, loff_t *pos)
{
- mutex_lock(&__ip_vs_app_mutex);
+ struct net *net = seq_file_net(seq);
+ struct netns_ipvs *ipvs = net_ipvs(net);
+
+ mutex_lock(&ipvs->app_mutex);
- return *pos ? ip_vs_app_idx(*pos - 1) : SEQ_START_TOKEN;
+ return *pos ? ip_vs_app_idx(ipvs, *pos - 1) : SEQ_START_TOKEN;
}
static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
struct ip_vs_app *inc, *app;
struct list_head *e;
+ struct net *net = seq_file_net(seq);
+ struct netns_ipvs *ipvs = net_ipvs(net);
++*pos;
if (v == SEQ_START_TOKEN)
- return ip_vs_app_idx(0);
+ return ip_vs_app_idx(ipvs, 0);
inc = v;
app = inc->app;
@@ -518,7 +524,7 @@ static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos)
return list_entry(e, struct ip_vs_app, a_list);
/* go on to next application */
- for (e = app->a_list.next; e != &ip_vs_app_list; e = e->next) {
+ for (e = app->a_list.next; e != &ipvs->app_list; e = e->next) {
app = list_entry(e, struct ip_vs_app, a_list);
list_for_each_entry(inc, &app->incs_list, a_list) {
return inc;
@@ -529,7 +535,9 @@ static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos)
static void ip_vs_app_seq_stop(struct seq_file *seq, void *v)
{
- mutex_unlock(&__ip_vs_app_mutex);
+ struct netns_ipvs *ipvs = net_ipvs(seq_file_net(seq));
+
+ mutex_unlock(&ipvs->app_mutex);
}
static int ip_vs_app_seq_show(struct seq_file *seq, void *v)
@@ -557,7 +565,8 @@ static const struct seq_operations ip_vs_app_seq_ops = {
static int ip_vs_app_open(struct inode *inode, struct file *file)
{
- return seq_open(file, &ip_vs_app_seq_ops);
+ return seq_open_net(inode, file, &ip_vs_app_seq_ops,
+ sizeof(struct seq_net_private));
}
static const struct file_operations ip_vs_app_fops = {
@@ -571,9 +580,13 @@ static const struct file_operations ip_vs_app_fops = {
static int __net_init __ip_vs_app_init(struct net *net)
{
+ struct netns_ipvs *ipvs = net_ipvs(net);
+
if (!net_eq(net, &init_net)) /* netns not enabled yet */
return -EPERM;
+ INIT_LIST_HEAD(&ipvs->app_list);
+ __mutex_init(&ipvs->app_mutex, "ipvs->app_mutex", &ipvs->app_key);
proc_net_fops_create(net, "ip_vs_app", 0, &ip_vs_app_fops);
return 0;
}
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index b38ae94..77b0036 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -414,14 +414,14 @@ static int __net_init __ip_vs_ftp_init(struct net *net)
if (!net_eq(net, &init_net)) /* netns not enabled yet */
return -EPERM;
- ret = register_ip_vs_app(app);
+ ret = register_ip_vs_app(net, app);
if (ret)
return ret;
for (i=0; i<IP_VS_APP_MAX_PORTS; i++) {
if (!ports[i])
continue;
- ret = register_ip_vs_app_inc(app, app->protocol, ports[i]);
+ ret = register_ip_vs_app_inc(net, app, app->protocol, ports[i]);
if (ret)
break;
pr_info("%s: loaded support on port[%d] = %d\n",
@@ -429,7 +429,7 @@ static int __net_init __ip_vs_ftp_init(struct net *net)
}
if (ret)
- unregister_ip_vs_app(app);
+ unregister_ip_vs_app(net, app);
return ret;
}
@@ -443,7 +443,7 @@ static void __ip_vs_ftp_exit(struct net *net)
if (!net_eq(net, &init_net)) /* netns not enabled yet */
return;
- unregister_ip_vs_app(app);
+ unregister_ip_vs_app(net, app);
}
static struct pernet_operations ip_vs_ftp_ops = {
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index 0f14f79..569e77b 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -1016,14 +1016,14 @@ static inline __u16 sctp_app_hashkey(__be16 port)
& SCTP_APP_TAB_MASK;
}
-static int sctp_register_app(struct ip_vs_app *inc)
+static int sctp_register_app(struct net *net, struct ip_vs_app *inc)
{
struct ip_vs_app *i;
__u16 hash;
__be16 port = inc->port;
int ret = 0;
- struct netns_ipvs *ipvs = net_ipvs(&init_net);
- struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, IPPROTO_SCTP);
+ struct netns_ipvs *ipvs = net_ipvs(net);
+ struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP);
hash = sctp_app_hashkey(port);
@@ -1042,10 +1042,10 @@ out:
return ret;
}
-static void sctp_unregister_app(struct ip_vs_app *inc)
+static void sctp_unregister_app(struct net *net, struct ip_vs_app *inc)
{
- struct netns_ipvs *ipvs = net_ipvs(&init_net);
- struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, IPPROTO_SCTP);
+ struct netns_ipvs *ipvs = net_ipvs(net);
+ struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP);
spin_lock_bh(&ipvs->sctp_app_lock);
atomic_dec(&pd->appcnt);
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index 290b380..757aaaf 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -577,14 +577,14 @@ static inline __u16 tcp_app_hashkey(__be16 port)
}
-static int tcp_register_app(struct ip_vs_app *inc)
+static int tcp_register_app(struct net *net, struct ip_vs_app *inc)
{
struct ip_vs_app *i;
__u16 hash;
__be16 port = inc->port;
int ret = 0;
- struct netns_ipvs *ipvs = net_ipvs(&init_net);
- struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, IPPROTO_TCP);
+ struct netns_ipvs *ipvs = net_ipvs(net);
+ struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
hash = tcp_app_hashkey(port);
@@ -605,10 +605,10 @@ static int tcp_register_app(struct ip_vs_app *inc)
static void
-tcp_unregister_app(struct ip_vs_app *inc)
+tcp_unregister_app(struct net *net, struct ip_vs_app *inc)
{
- struct netns_ipvs *ipvs = net_ipvs(&init_net);
- struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, IPPROTO_TCP);
+ struct netns_ipvs *ipvs = net_ipvs(net);
+ struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
spin_lock_bh(&ipvs->tcp_app_lock);
atomic_dec(&pd->appcnt);
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c
index 3719837..1dc3941 100644
--- a/net/netfilter/ipvs/ip_vs_proto_udp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_udp.c
@@ -353,14 +353,14 @@ static inline __u16 udp_app_hashkey(__be16 port)
}
-static int udp_register_app(struct ip_vs_app *inc)
+static int udp_register_app(struct net *net, struct ip_vs_app *inc)
{
struct ip_vs_app *i;
__u16 hash;
__be16 port = inc->port;
int ret = 0;
- struct netns_ipvs *ipvs = net_ipvs(&init_net);
- struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, IPPROTO_UDP);
+ struct netns_ipvs *ipvs = net_ipvs(net);
+ struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
hash = udp_app_hashkey(port);
@@ -382,10 +382,10 @@ static int udp_register_app(struct ip_vs_app *inc)
static void
-udp_unregister_app(struct ip_vs_app *inc)
+udp_unregister_app(struct net *net, struct ip_vs_app *inc)
{
- struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, IPPROTO_UDP);
- struct netns_ipvs *ipvs = net_ipvs(&init_net);
+ struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
+ struct netns_ipvs *ipvs = net_ipvs(net);
spin_lock_bh(&ipvs->udp_app_lock);
atomic_dec(&pd->appcnt);
--
1.7.2.3
^ permalink raw reply related
* [PATCH 47/79] IPVS: netns awareness to ip_vs_sync
From: kaber @ 2011-01-19 19:14 UTC (permalink / raw)
To: davem; +Cc: netfilter-devel, netdev
In-Reply-To: <1295464519-21763-1-git-send-email-kaber@trash.net>
From: Hans Schillstrom <hans.schillstrom@ericsson.com>
All global variables moved to struct ipvs,
most external changes fixed (i.e. init_net removed)
in sync_buf create + 4 replaced by sizeof(struct..)
Signed-off-by: Hans Schillstrom <hans.schillstrom@ericsson.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
include/net/ip_vs.h | 14 +-
include/net/netns/ip_vs.h | 16 ++
net/netfilter/ipvs/ip_vs_core.c | 15 +-
net/netfilter/ipvs/ip_vs_ctl.c | 52 ++++---
net/netfilter/ipvs/ip_vs_sync.c | 334 +++++++++++++++++++++------------------
5 files changed, 240 insertions(+), 191 deletions(-)
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index c08927b..4265b5e 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -958,7 +958,7 @@ extern struct ip_vs_stats ip_vs_stats;
extern const struct ctl_path net_vs_ctl_path[];
extern int sysctl_ip_vs_sync_ver;
-extern void ip_vs_sync_switch_mode(int mode);
+extern void ip_vs_sync_switch_mode(struct net *net, int mode);
extern struct ip_vs_service *
ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
const union nf_inet_addr *vaddr, __be16 vport);
@@ -987,14 +987,10 @@ extern struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp);
* IPVS sync daemon data and function prototypes
* (from ip_vs_sync.c)
*/
-extern volatile int ip_vs_sync_state;
-extern volatile int ip_vs_master_syncid;
-extern volatile int ip_vs_backup_syncid;
-extern char ip_vs_master_mcast_ifn[IP_VS_IFNAME_MAXLEN];
-extern char ip_vs_backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
-extern int start_sync_thread(int state, char *mcast_ifn, __u8 syncid);
-extern int stop_sync_thread(int state);
-extern void ip_vs_sync_conn(struct ip_vs_conn *cp);
+extern int start_sync_thread(struct net *net, int state, char *mcast_ifn,
+ __u8 syncid);
+extern int stop_sync_thread(struct net *net, int state);
+extern void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp);
extern int ip_vs_sync_init(void);
extern void ip_vs_sync_cleanup(void);
diff --git a/include/net/netns/ip_vs.h b/include/net/netns/ip_vs.h
index db02401..aba78f3 100644
--- a/include/net/netns/ip_vs.h
+++ b/include/net/netns/ip_vs.h
@@ -74,6 +74,22 @@ struct netns_ipvs {
struct list_head est_list; /* estimator list */
spinlock_t est_lock;
struct timer_list est_timer; /* Estimation timer */
+ /* ip_vs_sync */
+ struct list_head sync_queue;
+ spinlock_t sync_lock;
+ struct ip_vs_sync_buff *sync_buff;
+ spinlock_t sync_buff_lock;
+ struct sockaddr_in sync_mcast_addr;
+ struct task_struct *master_thread;
+ struct task_struct *backup_thread;
+ int send_mesg_maxlen;
+ int recv_mesg_maxlen;
+ volatile int sync_state;
+ volatile int master_syncid;
+ volatile int backup_syncid;
+ /* multicast interface name */
+ char master_mcast_ifn[IP_VS_IFNAME_MAXLEN];
+ char backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
};
#endif /* IP_VS_H_ */
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 9317aff..5531d56 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1471,12 +1471,13 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
static unsigned int
ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
{
- struct net *net = NULL;
+ struct net *net;
struct ip_vs_iphdr iph;
struct ip_vs_protocol *pp;
struct ip_vs_proto_data *pd;
struct ip_vs_conn *cp;
int ret, restart, pkts;
+ struct netns_ipvs *ipvs;
/* Already marked as IPVS request or reply? */
if (skb->ipvs_property)
@@ -1556,7 +1557,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
}
IP_VS_DBG_PKT(11, af, pp, skb, 0, "Incoming packet");
-
+ net = skb_net(skb);
+ ipvs = net_ipvs(net);
/* Check the server status */
if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) {
/* the destination server is not available */
@@ -1589,12 +1591,13 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
*
* For ONE_PKT let ip_vs_sync_conn() do the filter work.
*/
+
if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
pkts = sysctl_ip_vs_sync_threshold[0];
else
pkts = atomic_add_return(1, &cp->in_pkts);
- if ((ip_vs_sync_state & IP_VS_STATE_MASTER) &&
+ if ((ipvs->sync_state & IP_VS_STATE_MASTER) &&
cp->protocol == IPPROTO_SCTP) {
if ((cp->state == IP_VS_SCTP_S_ESTABLISHED &&
(pkts % sysctl_ip_vs_sync_threshold[1]
@@ -1603,13 +1606,13 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
((cp->state == IP_VS_SCTP_S_CLOSED) ||
(cp->state == IP_VS_SCTP_S_SHUT_ACK_CLI) ||
(cp->state == IP_VS_SCTP_S_SHUT_ACK_SER)))) {
- ip_vs_sync_conn(cp);
+ ip_vs_sync_conn(net, cp);
goto out;
}
}
/* Keep this block last: TCP and others with pp->num_states <= 1 */
- else if ((ip_vs_sync_state & IP_VS_STATE_MASTER) &&
+ else if ((ipvs->sync_state & IP_VS_STATE_MASTER) &&
(((cp->protocol != IPPROTO_TCP ||
cp->state == IP_VS_TCP_S_ESTABLISHED) &&
(pkts % sysctl_ip_vs_sync_threshold[1]
@@ -1619,7 +1622,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
(cp->state == IP_VS_TCP_S_CLOSE) ||
(cp->state == IP_VS_TCP_S_CLOSE_WAIT) ||
(cp->state == IP_VS_TCP_S_TIME_WAIT)))))
- ip_vs_sync_conn(cp);
+ ip_vs_sync_conn(net, cp);
out:
cp->old_state = cp->state;
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index c89beb8..03f8631 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -1559,7 +1559,8 @@ proc_do_sync_mode(ctl_table *table, int write,
/* Restore the correct value */
*valp = val;
} else {
- ip_vs_sync_switch_mode(val);
+ struct net *net = current->nsproxy->net_ns;
+ ip_vs_sync_switch_mode(net, val);
}
}
return rc;
@@ -2174,11 +2175,12 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
goto out_unlock;
} else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
- ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid);
+ ret = start_sync_thread(net, dm->state, dm->mcast_ifn,
+ dm->syncid);
goto out_unlock;
} else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
- ret = stop_sync_thread(dm->state);
+ ret = stop_sync_thread(net, dm->state);
goto out_unlock;
}
@@ -2424,6 +2426,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
int ret = 0;
unsigned int copylen;
struct net *net = sock_net(sk);
+ struct netns_ipvs *ipvs = net_ipvs(net);
BUG_ON(!net);
if (!capable(CAP_NET_ADMIN))
@@ -2546,15 +2549,17 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
struct ip_vs_daemon_user d[2];
memset(&d, 0, sizeof(d));
- if (ip_vs_sync_state & IP_VS_STATE_MASTER) {
+ if (ipvs->sync_state & IP_VS_STATE_MASTER) {
d[0].state = IP_VS_STATE_MASTER;
- strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn));
- d[0].syncid = ip_vs_master_syncid;
+ strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn,
+ sizeof(d[0].mcast_ifn));
+ d[0].syncid = ipvs->master_syncid;
}
- if (ip_vs_sync_state & IP_VS_STATE_BACKUP) {
+ if (ipvs->sync_state & IP_VS_STATE_BACKUP) {
d[1].state = IP_VS_STATE_BACKUP;
- strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn));
- d[1].syncid = ip_vs_backup_syncid;
+ strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn,
+ sizeof(d[1].mcast_ifn));
+ d[1].syncid = ipvs->backup_syncid;
}
if (copy_to_user(user, &d, sizeof(d)) != 0)
ret = -EFAULT;
@@ -3061,20 +3066,23 @@ nla_put_failure:
static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
struct netlink_callback *cb)
{
+ struct net *net = skb_net(skb);
+ struct netns_ipvs *ipvs = net_ipvs(net);
+
mutex_lock(&__ip_vs_mutex);
- if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
+ if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
- ip_vs_master_mcast_ifn,
- ip_vs_master_syncid, cb) < 0)
+ ipvs->master_mcast_ifn,
+ ipvs->master_syncid, cb) < 0)
goto nla_put_failure;
cb->args[0] = 1;
}
- if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
+ if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
- ip_vs_backup_mcast_ifn,
- ip_vs_backup_syncid, cb) < 0)
+ ipvs->backup_mcast_ifn,
+ ipvs->backup_syncid, cb) < 0)
goto nla_put_failure;
cb->args[1] = 1;
@@ -3086,24 +3094,26 @@ nla_put_failure:
return skb->len;
}
-static int ip_vs_genl_new_daemon(struct nlattr **attrs)
+static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs)
{
if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
return -EINVAL;
- return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
+ return start_sync_thread(net,
+ nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
}
-static int ip_vs_genl_del_daemon(struct nlattr **attrs)
+static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs)
{
if (!attrs[IPVS_DAEMON_ATTR_STATE])
return -EINVAL;
- return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
+ return stop_sync_thread(net,
+ nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
}
static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
@@ -3159,9 +3169,9 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
}
if (cmd == IPVS_CMD_NEW_DAEMON)
- ret = ip_vs_genl_new_daemon(daemon_attrs);
+ ret = ip_vs_genl_new_daemon(net, daemon_attrs);
else
- ret = ip_vs_genl_del_daemon(daemon_attrs);
+ ret = ip_vs_genl_del_daemon(net, daemon_attrs);
goto out;
} else if (cmd == IPVS_CMD_ZERO &&
!info->attrs[IPVS_CMD_ATTR_SERVICE]) {
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 6831e8f..c29e73d 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -192,6 +192,7 @@ union ip_vs_sync_conn {
#define IPVS_OPT_F_PARAM (1 << (IPVS_OPT_PARAM-1))
struct ip_vs_sync_thread_data {
+ struct net *net;
struct socket *sock;
char *buf;
};
@@ -259,10 +260,6 @@ struct ip_vs_sync_mesg {
/* ip_vs_sync_conn entries start here */
};
-/* the maximum length of sync (sending/receiving) message */
-static int sync_send_mesg_maxlen;
-static int sync_recv_mesg_maxlen;
-
struct ip_vs_sync_buff {
struct list_head list;
unsigned long firstuse;
@@ -273,28 +270,6 @@ struct ip_vs_sync_buff {
unsigned char *end;
};
-
-/* the sync_buff list head and the lock */
-static LIST_HEAD(ip_vs_sync_queue);
-static DEFINE_SPINLOCK(ip_vs_sync_lock);
-
-/* current sync_buff for accepting new conn entries */
-static struct ip_vs_sync_buff *curr_sb = NULL;
-static DEFINE_SPINLOCK(curr_sb_lock);
-
-/* ipvs sync daemon state */
-volatile int ip_vs_sync_state = IP_VS_STATE_NONE;
-volatile int ip_vs_master_syncid = 0;
-volatile int ip_vs_backup_syncid = 0;
-
-/* multicast interface name */
-char ip_vs_master_mcast_ifn[IP_VS_IFNAME_MAXLEN];
-char ip_vs_backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
-
-/* sync daemon tasks */
-static struct task_struct *sync_master_thread;
-static struct task_struct *sync_backup_thread;
-
/* multicast addr */
static struct sockaddr_in mcast_addr = {
.sin_family = AF_INET,
@@ -324,20 +299,20 @@ static void hton_seq(struct ip_vs_seq *ho, struct ip_vs_seq *no)
put_unaligned_be32(ho->previous_delta, &no->previous_delta);
}
-static inline struct ip_vs_sync_buff *sb_dequeue(void)
+static inline struct ip_vs_sync_buff *sb_dequeue(struct netns_ipvs *ipvs)
{
struct ip_vs_sync_buff *sb;
- spin_lock_bh(&ip_vs_sync_lock);
- if (list_empty(&ip_vs_sync_queue)) {
+ spin_lock_bh(&ipvs->sync_lock);
+ if (list_empty(&ipvs->sync_queue)) {
sb = NULL;
} else {
- sb = list_entry(ip_vs_sync_queue.next,
+ sb = list_entry(ipvs->sync_queue.next,
struct ip_vs_sync_buff,
list);
list_del(&sb->list);
}
- spin_unlock_bh(&ip_vs_sync_lock);
+ spin_unlock_bh(&ipvs->sync_lock);
return sb;
}
@@ -345,25 +320,27 @@ static inline struct ip_vs_sync_buff *sb_dequeue(void)
/*
* Create a new sync buffer for Version 1 proto.
*/
-static inline struct ip_vs_sync_buff * ip_vs_sync_buff_create(void)
+static inline struct ip_vs_sync_buff *
+ip_vs_sync_buff_create(struct netns_ipvs *ipvs)
{
struct ip_vs_sync_buff *sb;
if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC)))
return NULL;
- if (!(sb->mesg=kmalloc(sync_send_mesg_maxlen, GFP_ATOMIC))) {
+ sb->mesg = kmalloc(ipvs->send_mesg_maxlen, GFP_ATOMIC);
+ if (!sb->mesg) {
kfree(sb);
return NULL;
}
sb->mesg->reserved = 0; /* old nr_conns i.e. must be zeo now */
sb->mesg->version = SYNC_PROTO_VER;
- sb->mesg->syncid = ip_vs_master_syncid;
+ sb->mesg->syncid = ipvs->master_syncid;
sb->mesg->size = sizeof(struct ip_vs_sync_mesg);
sb->mesg->nr_conns = 0;
sb->mesg->spare = 0;
sb->head = (unsigned char *)sb->mesg + sizeof(struct ip_vs_sync_mesg);
- sb->end = (unsigned char *)sb->mesg + sync_send_mesg_maxlen;
+ sb->end = (unsigned char *)sb->mesg + ipvs->send_mesg_maxlen;
sb->firstuse = jiffies;
return sb;
@@ -375,14 +352,16 @@ static inline void ip_vs_sync_buff_release(struct ip_vs_sync_buff *sb)
kfree(sb);
}
-static inline void sb_queue_tail(struct ip_vs_sync_buff *sb)
+static inline void sb_queue_tail(struct netns_ipvs *ipvs)
{
- spin_lock(&ip_vs_sync_lock);
- if (ip_vs_sync_state & IP_VS_STATE_MASTER)
- list_add_tail(&sb->list, &ip_vs_sync_queue);
+ struct ip_vs_sync_buff *sb = ipvs->sync_buff;
+
+ spin_lock(&ipvs->sync_lock);
+ if (ipvs->sync_state & IP_VS_STATE_MASTER)
+ list_add_tail(&sb->list, &ipvs->sync_queue);
else
ip_vs_sync_buff_release(sb);
- spin_unlock(&ip_vs_sync_lock);
+ spin_unlock(&ipvs->sync_lock);
}
/*
@@ -390,18 +369,18 @@ static inline void sb_queue_tail(struct ip_vs_sync_buff *sb)
* than the specified time or the specified time is zero.
*/
static inline struct ip_vs_sync_buff *
-get_curr_sync_buff(unsigned long time)
+get_curr_sync_buff(struct netns_ipvs *ipvs, unsigned long time)
{
struct ip_vs_sync_buff *sb;
- spin_lock_bh(&curr_sb_lock);
- if (curr_sb && (time == 0 ||
- time_before(jiffies - curr_sb->firstuse, time))) {
- sb = curr_sb;
- curr_sb = NULL;
+ spin_lock_bh(&ipvs->sync_buff_lock);
+ if (ipvs->sync_buff && (time == 0 ||
+ time_before(jiffies - ipvs->sync_buff->firstuse, time))) {
+ sb = ipvs->sync_buff;
+ ipvs->sync_buff = NULL;
} else
sb = NULL;
- spin_unlock_bh(&curr_sb_lock);
+ spin_unlock_bh(&ipvs->sync_buff_lock);
return sb;
}
@@ -409,33 +388,37 @@ get_curr_sync_buff(unsigned long time)
* Switch mode from sending version 0 or 1
* - must handle sync_buf
*/
-void ip_vs_sync_switch_mode(int mode) {
+void ip_vs_sync_switch_mode(struct net *net, int mode)
+{
+ struct netns_ipvs *ipvs = net_ipvs(net);
- if (!ip_vs_sync_state & IP_VS_STATE_MASTER)
+ if (!ipvs->sync_state & IP_VS_STATE_MASTER)
return;
- if (mode == sysctl_ip_vs_sync_ver || !curr_sb)
+ if (mode == sysctl_ip_vs_sync_ver || !ipvs->sync_buff)
return;
- spin_lock_bh(&curr_sb_lock);
+ spin_lock_bh(&ipvs->sync_buff_lock);
/* Buffer empty ? then let buf_create do the job */
- if ( curr_sb->mesg->size <= sizeof(struct ip_vs_sync_mesg)) {
- kfree(curr_sb);
- curr_sb = NULL;
+ if (ipvs->sync_buff->mesg->size <= sizeof(struct ip_vs_sync_mesg)) {
+ kfree(ipvs->sync_buff);
+ ipvs->sync_buff = NULL;
} else {
- spin_lock_bh(&ip_vs_sync_lock);
- if (ip_vs_sync_state & IP_VS_STATE_MASTER)
- list_add_tail(&curr_sb->list, &ip_vs_sync_queue);
+ spin_lock_bh(&ipvs->sync_lock);
+ if (ipvs->sync_state & IP_VS_STATE_MASTER)
+ list_add_tail(&ipvs->sync_buff->list,
+ &ipvs->sync_queue);
else
- ip_vs_sync_buff_release(curr_sb);
- spin_unlock_bh(&ip_vs_sync_lock);
+ ip_vs_sync_buff_release(ipvs->sync_buff);
+ spin_unlock_bh(&ipvs->sync_lock);
}
- spin_unlock_bh(&curr_sb_lock);
+ spin_unlock_bh(&ipvs->sync_buff_lock);
}
/*
* Create a new sync buffer for Version 0 proto.
*/
-static inline struct ip_vs_sync_buff * ip_vs_sync_buff_create_v0(void)
+static inline struct ip_vs_sync_buff *
+ip_vs_sync_buff_create_v0(struct netns_ipvs *ipvs)
{
struct ip_vs_sync_buff *sb;
struct ip_vs_sync_mesg_v0 *mesg;
@@ -443,16 +426,17 @@ static inline struct ip_vs_sync_buff * ip_vs_sync_buff_create_v0(void)
if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC)))
return NULL;
- if (!(sb->mesg=kmalloc(sync_send_mesg_maxlen, GFP_ATOMIC))) {
+ sb->mesg = kmalloc(ipvs->send_mesg_maxlen, GFP_ATOMIC);
+ if (!sb->mesg) {
kfree(sb);
return NULL;
}
mesg = (struct ip_vs_sync_mesg_v0 *)sb->mesg;
mesg->nr_conns = 0;
- mesg->syncid = ip_vs_master_syncid;
- mesg->size = 4;
- sb->head = (unsigned char *)mesg + 4;
- sb->end = (unsigned char *)mesg + sync_send_mesg_maxlen;
+ mesg->syncid = ipvs->master_syncid;
+ mesg->size = sizeof(struct ip_vs_sync_mesg_v0);
+ sb->head = (unsigned char *)mesg + sizeof(struct ip_vs_sync_mesg_v0);
+ sb->end = (unsigned char *)mesg + ipvs->send_mesg_maxlen;
sb->firstuse = jiffies;
return sb;
}
@@ -461,8 +445,9 @@ static inline struct ip_vs_sync_buff * ip_vs_sync_buff_create_v0(void)
* Version 0 , could be switched in by sys_ctl.
* Add an ip_vs_conn information into the current sync_buff.
*/
-void ip_vs_sync_conn_v0(struct ip_vs_conn *cp)
+void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp)
{
+ struct netns_ipvs *ipvs = net_ipvs(net);
struct ip_vs_sync_mesg_v0 *m;
struct ip_vs_sync_conn_v0 *s;
int len;
@@ -473,10 +458,12 @@ void ip_vs_sync_conn_v0(struct ip_vs_conn *cp)
if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
return;
- spin_lock(&curr_sb_lock);
- if (!curr_sb) {
- if (!(curr_sb=ip_vs_sync_buff_create_v0())) {
- spin_unlock(&curr_sb_lock);
+ spin_lock(&ipvs->sync_buff_lock);
+ if (!ipvs->sync_buff) {
+ ipvs->sync_buff =
+ ip_vs_sync_buff_create_v0(ipvs);
+ if (!ipvs->sync_buff) {
+ spin_unlock(&ipvs->sync_buff_lock);
pr_err("ip_vs_sync_buff_create failed.\n");
return;
}
@@ -484,8 +471,8 @@ void ip_vs_sync_conn_v0(struct ip_vs_conn *cp)
len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE :
SIMPLE_CONN_SIZE;
- m = (struct ip_vs_sync_mesg_v0 *)curr_sb->mesg;
- s = (struct ip_vs_sync_conn_v0 *)curr_sb->head;
+ m = (struct ip_vs_sync_mesg_v0 *)ipvs->sync_buff->mesg;
+ s = (struct ip_vs_sync_conn_v0 *)ipvs->sync_buff->head;
/* copy members */
s->reserved = 0;
@@ -506,18 +493,18 @@ void ip_vs_sync_conn_v0(struct ip_vs_conn *cp)
m->nr_conns++;
m->size += len;
- curr_sb->head += len;
+ ipvs->sync_buff->head += len;
/* check if there is a space for next one */
- if (curr_sb->head + FULL_CONN_SIZE > curr_sb->end) {
- sb_queue_tail(curr_sb);
- curr_sb = NULL;
+ if (ipvs->sync_buff->head + FULL_CONN_SIZE > ipvs->sync_buff->end) {
+ sb_queue_tail(ipvs);
+ ipvs->sync_buff = NULL;
}
- spin_unlock(&curr_sb_lock);
+ spin_unlock(&ipvs->sync_buff_lock);
/* synchronize its controller if it has */
if (cp->control)
- ip_vs_sync_conn(cp->control);
+ ip_vs_sync_conn(net, cp->control);
}
/*
@@ -525,8 +512,9 @@ void ip_vs_sync_conn_v0(struct ip_vs_conn *cp)
* Called by ip_vs_in.
* Sending Version 1 messages
*/
-void ip_vs_sync_conn(struct ip_vs_conn *cp)
+void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp)
{
+ struct netns_ipvs *ipvs = net_ipvs(net);
struct ip_vs_sync_mesg *m;
union ip_vs_sync_conn *s;
__u8 *p;
@@ -534,7 +522,7 @@ void ip_vs_sync_conn(struct ip_vs_conn *cp)
/* Handle old version of the protocol */
if (sysctl_ip_vs_sync_ver == 0) {
- ip_vs_sync_conn_v0(cp);
+ ip_vs_sync_conn_v0(net, cp);
return;
}
/* Do not sync ONE PACKET */
@@ -551,7 +539,7 @@ sloop:
pe_name_len = strnlen(cp->pe->name, IP_VS_PENAME_MAXLEN);
}
- spin_lock(&curr_sb_lock);
+ spin_lock(&ipvs->sync_buff_lock);
#ifdef CONFIG_IP_VS_IPV6
if (cp->af == AF_INET6)
@@ -570,26 +558,27 @@ sloop:
/* check if there is a space for this one */
pad = 0;
- if (curr_sb) {
- pad = (4 - (size_t)curr_sb->head) & 3;
- if (curr_sb->head + len + pad > curr_sb->end) {
- sb_queue_tail(curr_sb);
- curr_sb = NULL;
+ if (ipvs->sync_buff) {
+ pad = (4 - (size_t)ipvs->sync_buff->head) & 3;
+ if (ipvs->sync_buff->head + len + pad > ipvs->sync_buff->end) {
+ sb_queue_tail(ipvs);
+ ipvs->sync_buff = NULL;
pad = 0;
}
}
- if (!curr_sb) {
- if (!(curr_sb=ip_vs_sync_buff_create())) {
- spin_unlock(&curr_sb_lock);
+ if (!ipvs->sync_buff) {
+ ipvs->sync_buff = ip_vs_sync_buff_create(ipvs);
+ if (!ipvs->sync_buff) {
+ spin_unlock(&ipvs->sync_buff_lock);
pr_err("ip_vs_sync_buff_create failed.\n");
return;
}
}
- m = curr_sb->mesg;
- p = curr_sb->head;
- curr_sb->head += pad + len;
+ m = ipvs->sync_buff->mesg;
+ p = ipvs->sync_buff->head;
+ ipvs->sync_buff->head += pad + len;
m->size += pad + len;
/* Add ev. padding from prev. sync_conn */
while (pad--)
@@ -647,7 +636,7 @@ sloop:
}
}
- spin_unlock(&curr_sb_lock);
+ spin_unlock(&ipvs->sync_buff_lock);
control:
/* synchronize its controller if it has */
@@ -699,7 +688,8 @@ ip_vs_conn_fill_param_sync(int af, union ip_vs_sync_conn *sc,
buff[pe_name_len]=0;
p->pe = __ip_vs_pe_getbyname(buff);
if (!p->pe) {
- IP_VS_DBG(3, "BACKUP, no %s engine found/loaded\n", buff);
+ IP_VS_DBG(3, "BACKUP, no %s engine found/loaded\n",
+ buff);
return 1;
}
} else {
@@ -748,7 +738,7 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
* If it is not found the connection will remain unbound
* but still handled.
*/
- dest = ip_vs_find_dest(&init_net, type, daddr, dport, param->vaddr,
+ dest = ip_vs_find_dest(net, type, daddr, dport, param->vaddr,
param->vport, protocol, fwmark);
/* Set the approprite ativity flag */
@@ -1089,6 +1079,7 @@ out:
static void ip_vs_process_message(struct net *net, __u8 *buffer,
const size_t buflen)
{
+ struct netns_ipvs *ipvs = net_ipvs(net);
struct ip_vs_sync_mesg *m2 = (struct ip_vs_sync_mesg *)buffer;
__u8 *p, *msg_end;
int i, nr_conns;
@@ -1105,7 +1096,7 @@ static void ip_vs_process_message(struct net *net, __u8 *buffer,
return;
}
/* SyncID sanity check */
- if (ip_vs_backup_syncid != 0 && m2->syncid != ip_vs_backup_syncid) {
+ if (ipvs->backup_syncid != 0 && m2->syncid != ipvs->backup_syncid) {
IP_VS_DBG(7, "BACKUP, Ignoring syncid = %d\n", m2->syncid);
return;
}
@@ -1190,8 +1181,10 @@ static int set_mcast_if(struct sock *sk, char *ifname)
{
struct net_device *dev;
struct inet_sock *inet = inet_sk(sk);
+ struct net *net = sock_net(sk);
- if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL)
+ dev = __dev_get_by_name(net, ifname);
+ if (!dev)
return -ENODEV;
if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
@@ -1210,30 +1203,33 @@ static int set_mcast_if(struct sock *sk, char *ifname)
* Set the maximum length of sync message according to the
* specified interface's MTU.
*/
-static int set_sync_mesg_maxlen(int sync_state)
+static int set_sync_mesg_maxlen(struct net *net, int sync_state)
{
+ struct netns_ipvs *ipvs = net_ipvs(net);
struct net_device *dev;
int num;
if (sync_state == IP_VS_STATE_MASTER) {
- if ((dev = __dev_get_by_name(&init_net, ip_vs_master_mcast_ifn)) == NULL)
+ dev = __dev_get_by_name(net, ipvs->master_mcast_ifn);
+ if (!dev)
return -ENODEV;
num = (dev->mtu - sizeof(struct iphdr) -
sizeof(struct udphdr) -
SYNC_MESG_HEADER_LEN - 20) / SIMPLE_CONN_SIZE;
- sync_send_mesg_maxlen = SYNC_MESG_HEADER_LEN +
+ ipvs->send_mesg_maxlen = SYNC_MESG_HEADER_LEN +
SIMPLE_CONN_SIZE * min(num, MAX_CONNS_PER_SYNCBUFF);
IP_VS_DBG(7, "setting the maximum length of sync sending "
- "message %d.\n", sync_send_mesg_maxlen);
+ "message %d.\n", ipvs->send_mesg_maxlen);
} else if (sync_state == IP_VS_STATE_BACKUP) {
- if ((dev = __dev_get_by_name(&init_net, ip_vs_backup_mcast_ifn)) == NULL)
+ dev = __dev_get_by_name(net, ipvs->backup_mcast_ifn);
+ if (!dev)
return -ENODEV;
- sync_recv_mesg_maxlen = dev->mtu -
+ ipvs->recv_mesg_maxlen = dev->mtu -
sizeof(struct iphdr) - sizeof(struct udphdr);
IP_VS_DBG(7, "setting the maximum length of sync receiving "
- "message %d.\n", sync_recv_mesg_maxlen);
+ "message %d.\n", ipvs->recv_mesg_maxlen);
}
return 0;
@@ -1248,6 +1244,7 @@ static int set_sync_mesg_maxlen(int sync_state)
static int
join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname)
{
+ struct net *net = sock_net(sk);
struct ip_mreqn mreq;
struct net_device *dev;
int ret;
@@ -1255,7 +1252,8 @@ join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname)
memset(&mreq, 0, sizeof(mreq));
memcpy(&mreq.imr_multiaddr, addr, sizeof(struct in_addr));
- if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL)
+ dev = __dev_get_by_name(net, ifname);
+ if (!dev)
return -ENODEV;
if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
return -EINVAL;
@@ -1272,11 +1270,13 @@ join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname)
static int bind_mcastif_addr(struct socket *sock, char *ifname)
{
+ struct net *net = sock_net(sock->sk);
struct net_device *dev;
__be32 addr;
struct sockaddr_in sin;
- if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL)
+ dev = __dev_get_by_name(net, ifname);
+ if (!dev)
return -ENODEV;
addr = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
@@ -1298,8 +1298,9 @@ static int bind_mcastif_addr(struct socket *sock, char *ifname)
/*
* Set up sending multicast socket over UDP
*/
-static struct socket * make_send_sock(void)
+static struct socket *make_send_sock(struct net *net)
{
+ struct netns_ipvs *ipvs = net_ipvs(net);
struct socket *sock;
int result;
@@ -1310,7 +1311,7 @@ static struct socket * make_send_sock(void)
return ERR_PTR(result);
}
- result = set_mcast_if(sock->sk, ip_vs_master_mcast_ifn);
+ result = set_mcast_if(sock->sk, ipvs->master_mcast_ifn);
if (result < 0) {
pr_err("Error setting outbound mcast interface\n");
goto error;
@@ -1319,7 +1320,7 @@ static struct socket * make_send_sock(void)
set_mcast_loop(sock->sk, 0);
set_mcast_ttl(sock->sk, 1);
- result = bind_mcastif_addr(sock, ip_vs_master_mcast_ifn);
+ result = bind_mcastif_addr(sock, ipvs->master_mcast_ifn);
if (result < 0) {
pr_err("Error binding address of the mcast interface\n");
goto error;
@@ -1343,8 +1344,9 @@ static struct socket * make_send_sock(void)
/*
* Set up receiving multicast socket over UDP
*/
-static struct socket * make_receive_sock(void)
+static struct socket *make_receive_sock(struct net *net)
{
+ struct netns_ipvs *ipvs = net_ipvs(net);
struct socket *sock;
int result;
@@ -1368,7 +1370,7 @@ static struct socket * make_receive_sock(void)
/* join the multicast group */
result = join_mcast_group(sock->sk,
(struct in_addr *) &mcast_addr.sin_addr,
- ip_vs_backup_mcast_ifn);
+ ipvs->backup_mcast_ifn);
if (result < 0) {
pr_err("Error joining to the multicast group\n");
goto error;
@@ -1439,20 +1441,21 @@ ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen)
static int sync_thread_master(void *data)
{
struct ip_vs_sync_thread_data *tinfo = data;
+ struct netns_ipvs *ipvs = net_ipvs(tinfo->net);
struct ip_vs_sync_buff *sb;
pr_info("sync thread started: state = MASTER, mcast_ifn = %s, "
"syncid = %d\n",
- ip_vs_master_mcast_ifn, ip_vs_master_syncid);
+ ipvs->master_mcast_ifn, ipvs->master_syncid);
while (!kthread_should_stop()) {
- while ((sb = sb_dequeue())) {
+ while ((sb = sb_dequeue(ipvs))) {
ip_vs_send_sync_msg(tinfo->sock, sb->mesg);
ip_vs_sync_buff_release(sb);
}
- /* check if entries stay in curr_sb for 2 seconds */
- sb = get_curr_sync_buff(2 * HZ);
+ /* check if entries stay in ipvs->sync_buff for 2 seconds */
+ sb = get_curr_sync_buff(ipvs, 2 * HZ);
if (sb) {
ip_vs_send_sync_msg(tinfo->sock, sb->mesg);
ip_vs_sync_buff_release(sb);
@@ -1462,14 +1465,13 @@ static int sync_thread_master(void *data)
}
/* clean up the sync_buff queue */
- while ((sb=sb_dequeue())) {
+ while ((sb = sb_dequeue(ipvs)))
ip_vs_sync_buff_release(sb);
- }
/* clean up the current sync_buff */
- if ((sb = get_curr_sync_buff(0))) {
+ sb = get_curr_sync_buff(ipvs, 0);
+ if (sb)
ip_vs_sync_buff_release(sb);
- }
/* release the sending multicast socket */
sock_release(tinfo->sock);
@@ -1482,11 +1484,12 @@ static int sync_thread_master(void *data)
static int sync_thread_backup(void *data)
{
struct ip_vs_sync_thread_data *tinfo = data;
+ struct netns_ipvs *ipvs = net_ipvs(tinfo->net);
int len;
pr_info("sync thread started: state = BACKUP, mcast_ifn = %s, "
"syncid = %d\n",
- ip_vs_backup_mcast_ifn, ip_vs_backup_syncid);
+ ipvs->backup_mcast_ifn, ipvs->backup_syncid);
while (!kthread_should_stop()) {
wait_event_interruptible(*sk_sleep(tinfo->sock->sk),
@@ -1496,7 +1499,7 @@ static int sync_thread_backup(void *data)
/* do we have data now? */
while (!skb_queue_empty(&(tinfo->sock->sk->sk_receive_queue))) {
len = ip_vs_receive(tinfo->sock, tinfo->buf,
- sync_recv_mesg_maxlen);
+ ipvs->recv_mesg_maxlen);
if (len <= 0) {
pr_err("receiving message error\n");
break;
@@ -1505,7 +1508,7 @@ static int sync_thread_backup(void *data)
/* disable bottom half, because it accesses the data
shared by softirq while getting/creating conns */
local_bh_disable();
- ip_vs_process_message(&init_net, tinfo->buf, len);
+ ip_vs_process_message(tinfo->net, tinfo->buf, len);
local_bh_enable();
}
}
@@ -1519,11 +1522,12 @@ static int sync_thread_backup(void *data)
}
-int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)
+int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid)
{
struct ip_vs_sync_thread_data *tinfo;
struct task_struct **realtask, *task;
struct socket *sock;
+ struct netns_ipvs *ipvs = net_ipvs(net);
char *name, *buf = NULL;
int (*threadfn)(void *data);
int result = -ENOMEM;
@@ -1533,27 +1537,27 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)
sizeof(struct ip_vs_sync_conn_v0));
if (state == IP_VS_STATE_MASTER) {
- if (sync_master_thread)
+ if (ipvs->master_thread)
return -EEXIST;
- strlcpy(ip_vs_master_mcast_ifn, mcast_ifn,
- sizeof(ip_vs_master_mcast_ifn));
- ip_vs_master_syncid = syncid;
- realtask = &sync_master_thread;
- name = "ipvs_syncmaster";
+ strlcpy(ipvs->master_mcast_ifn, mcast_ifn,
+ sizeof(ipvs->master_mcast_ifn));
+ ipvs->master_syncid = syncid;
+ realtask = &ipvs->master_thread;
+ name = "ipvs_master:%d";
threadfn = sync_thread_master;
- sock = make_send_sock();
+ sock = make_send_sock(net);
} else if (state == IP_VS_STATE_BACKUP) {
- if (sync_backup_thread)
+ if (ipvs->backup_thread)
return -EEXIST;
- strlcpy(ip_vs_backup_mcast_ifn, mcast_ifn,
- sizeof(ip_vs_backup_mcast_ifn));
- ip_vs_backup_syncid = syncid;
- realtask = &sync_backup_thread;
- name = "ipvs_syncbackup";
+ strlcpy(ipvs->backup_mcast_ifn, mcast_ifn,
+ sizeof(ipvs->backup_mcast_ifn));
+ ipvs->backup_syncid = syncid;
+ realtask = &ipvs->backup_thread;
+ name = "ipvs_backup:%d";
threadfn = sync_thread_backup;
- sock = make_receive_sock();
+ sock = make_receive_sock(net);
} else {
return -EINVAL;
}
@@ -1563,9 +1567,9 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)
goto out;
}
- set_sync_mesg_maxlen(state);
+ set_sync_mesg_maxlen(net, state);
if (state == IP_VS_STATE_BACKUP) {
- buf = kmalloc(sync_recv_mesg_maxlen, GFP_KERNEL);
+ buf = kmalloc(ipvs->recv_mesg_maxlen, GFP_KERNEL);
if (!buf)
goto outsocket;
}
@@ -1574,10 +1578,11 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)
if (!tinfo)
goto outbuf;
+ tinfo->net = net;
tinfo->sock = sock;
tinfo->buf = buf;
- task = kthread_run(threadfn, tinfo, name);
+ task = kthread_run(threadfn, tinfo, name, ipvs->gen);
if (IS_ERR(task)) {
result = PTR_ERR(task);
goto outtinfo;
@@ -1585,7 +1590,7 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)
/* mark as active */
*realtask = task;
- ip_vs_sync_state |= state;
+ ipvs->sync_state |= state;
/* increase the module use count */
ip_vs_use_count_inc();
@@ -1603,16 +1608,18 @@ out:
}
-int stop_sync_thread(int state)
+int stop_sync_thread(struct net *net, int state)
{
+ struct netns_ipvs *ipvs = net_ipvs(net);
+
IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
if (state == IP_VS_STATE_MASTER) {
- if (!sync_master_thread)
+ if (!ipvs->master_thread)
return -ESRCH;
pr_info("stopping master sync thread %d ...\n",
- task_pid_nr(sync_master_thread));
+ task_pid_nr(ipvs->master_thread));
/*
* The lock synchronizes with sb_queue_tail(), so that we don't
@@ -1620,21 +1627,21 @@ int stop_sync_thread(int state)
* progress of stopping the master sync daemon.
*/
- spin_lock_bh(&ip_vs_sync_lock);
- ip_vs_sync_state &= ~IP_VS_STATE_MASTER;
- spin_unlock_bh(&ip_vs_sync_lock);
- kthread_stop(sync_master_thread);
- sync_master_thread = NULL;
+ spin_lock_bh(&ipvs->sync_lock);
+ ipvs->sync_state &= ~IP_VS_STATE_MASTER;
+ spin_unlock_bh(&ipvs->sync_lock);
+ kthread_stop(ipvs->master_thread);
+ ipvs->master_thread = NULL;
} else if (state == IP_VS_STATE_BACKUP) {
- if (!sync_backup_thread)
+ if (!ipvs->backup_thread)
return -ESRCH;
pr_info("stopping backup sync thread %d ...\n",
- task_pid_nr(sync_backup_thread));
+ task_pid_nr(ipvs->backup_thread));
- ip_vs_sync_state &= ~IP_VS_STATE_BACKUP;
- kthread_stop(sync_backup_thread);
- sync_backup_thread = NULL;
+ ipvs->sync_state &= ~IP_VS_STATE_BACKUP;
+ kthread_stop(ipvs->backup_thread);
+ ipvs->backup_thread = NULL;
} else {
return -EINVAL;
}
@@ -1650,12 +1657,29 @@ int stop_sync_thread(int state)
*/
static int __net_init __ip_vs_sync_init(struct net *net)
{
+ struct netns_ipvs *ipvs = net_ipvs(net);
+
+ if (!net_eq(net, &init_net)) /* netns not enabled yet */
+ return -EPERM;
+
+ INIT_LIST_HEAD(&ipvs->sync_queue);
+ spin_lock_init(&ipvs->sync_lock);
+ spin_lock_init(&ipvs->sync_buff_lock);
+
+ ipvs->sync_mcast_addr.sin_family = AF_INET;
+ ipvs->sync_mcast_addr.sin_port = cpu_to_be16(IP_VS_SYNC_PORT);
+ ipvs->sync_mcast_addr.sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP);
return 0;
}
static void __ip_vs_sync_cleanup(struct net *net)
{
+ if (!net_eq(net, &init_net)) /* netns not enabled yet */
+ return;
+ stop_sync_thread(net, IP_VS_STATE_MASTER);
+ stop_sync_thread(net, IP_VS_STATE_BACKUP);
}
+
static struct pernet_operations ipvs_sync_ops = {
.init = __ip_vs_sync_init,
.exit = __ip_vs_sync_cleanup,
--
1.7.2.3
^ permalink raw reply related
* [PATCH 49/79] IPVS: netns, connection hash got net as param.
From: kaber @ 2011-01-19 19:14 UTC (permalink / raw)
To: davem; +Cc: netfilter-devel, netdev
In-Reply-To: <1295464519-21763-1-git-send-email-kaber@trash.net>
From: Hans Schillstrom <hans.schillstrom@ericsson.com>
Connection hash table is now name space aware.
i.e. net ptr >> 8 is xor:ed to the hash,
and this is the first param to be compared.
The net struct is 0xa40 in size ( a little bit smaller for 32 bit arch:s)
and cache-line aligned, so a ptr >> 5 might be a more clever solution ?
All lookups where net is compared uses net_eq() which returns 1 when netns
is disabled, and the compiler seems to do something clever in that case.
ip_vs_conn_fill_param() have *net as first param now.
Three new inlines added to keep conn struct smaller
when names space is disabled.
- ip_vs_conn_net()
- ip_vs_conn_net_set()
- ip_vs_conn_net_eq()
*v3
moved net compare to the end in "fast path"
Signed-off-by: Hans Schillstrom <hans.schillstrom@ericsson.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
include/net/ip_vs.h | 53 ++++++++++++---
include/net/netns/ip_vs.h | 2 +
net/netfilter/ipvs/ip_vs_conn.c | 112 +++++++++++++++++++------------
net/netfilter/ipvs/ip_vs_core.c | 15 +++--
net/netfilter/ipvs/ip_vs_ftp.c | 14 ++--
net/netfilter/ipvs/ip_vs_nfct.c | 6 +-
net/netfilter/ipvs/ip_vs_proto_ah_esp.c | 15 +++--
net/netfilter/ipvs/ip_vs_proto_sctp.c | 2 +-
net/netfilter/ipvs/ip_vs_proto_tcp.c | 2 +-
net/netfilter/ipvs/ip_vs_proto_udp.c | 2 +-
net/netfilter/ipvs/ip_vs_sync.c | 13 ++--
11 files changed, 153 insertions(+), 83 deletions(-)
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 605d5db..f82c0ff 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -477,6 +477,7 @@ extern struct ip_vs_proto_data *ip_vs_proto_data_get(struct net *net,
unsigned short proto);
struct ip_vs_conn_param {
+ struct net *net;
const union nf_inet_addr *caddr;
const union nf_inet_addr *vaddr;
__be16 cport;
@@ -494,17 +495,19 @@ struct ip_vs_conn_param {
*/
struct ip_vs_conn {
struct list_head c_list; /* hashed list heads */
-
+#ifdef CONFIG_NET_NS
+ struct net *net; /* Name space */
+#endif
/* Protocol, addresses and port numbers */
- u16 af; /* address family */
- union nf_inet_addr caddr; /* client address */
- union nf_inet_addr vaddr; /* virtual address */
- union nf_inet_addr daddr; /* destination address */
- volatile __u32 flags; /* status flags */
- __u32 fwmark; /* Fire wall mark from skb */
- __be16 cport;
- __be16 vport;
- __be16 dport;
+ u16 af; /* address family */
+ __be16 cport;
+ __be16 vport;
+ __be16 dport;
+ __u32 fwmark; /* Fire wall mark from skb */
+ union nf_inet_addr caddr; /* client address */
+ union nf_inet_addr vaddr; /* virtual address */
+ union nf_inet_addr daddr; /* destination address */
+ volatile __u32 flags; /* status flags */
__u16 protocol; /* Which protocol (TCP/UDP) */
/* counter and timer */
@@ -547,6 +550,33 @@ struct ip_vs_conn {
__u8 pe_data_len;
};
+/*
+ * To save some memory in conn table when name space is disabled.
+ */
+static inline struct net *ip_vs_conn_net(const struct ip_vs_conn *cp)
+{
+#ifdef CONFIG_NET_NS
+ return cp->net;
+#else
+ return &init_net;
+#endif
+}
+static inline void ip_vs_conn_net_set(struct ip_vs_conn *cp, struct net *net)
+{
+#ifdef CONFIG_NET_NS
+ cp->net = net;
+#endif
+}
+
+static inline int ip_vs_conn_net_eq(const struct ip_vs_conn *cp,
+ struct net *net)
+{
+#ifdef CONFIG_NET_NS
+ return cp->net == net;
+#else
+ return 1;
+#endif
+}
/*
* Extended internal versions of struct ip_vs_service_user and
@@ -796,13 +826,14 @@ enum {
IP_VS_DIR_LAST,
};
-static inline void ip_vs_conn_fill_param(int af, int protocol,
+static inline void ip_vs_conn_fill_param(struct net *net, int af, int protocol,
const union nf_inet_addr *caddr,
__be16 cport,
const union nf_inet_addr *vaddr,
__be16 vport,
struct ip_vs_conn_param *p)
{
+ p->net = net;
p->af = af;
p->protocol = protocol;
p->caddr = caddr;
diff --git a/include/net/netns/ip_vs.h b/include/net/netns/ip_vs.h
index bd1dad8..1acfb33 100644
--- a/include/net/netns/ip_vs.h
+++ b/include/net/netns/ip_vs.h
@@ -66,6 +66,8 @@ struct netns_ipvs {
struct ip_vs_cpu_stats __percpu *cpustats; /* Stats per cpu */
seqcount_t *ustats_seq; /* u64 read retry */
+ /* ip_vs_conn */
+ atomic_t conn_count; /* connection counter */
/* ip_vs_lblc */
int sysctl_lblc_expiration;
struct ctl_table_header *lblc_ctl_header;
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index b2024c9..0d5e4fe 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -64,9 +64,6 @@ static struct list_head *ip_vs_conn_tab __read_mostly;
/* SLAB cache for IPVS connections */
static struct kmem_cache *ip_vs_conn_cachep __read_mostly;
-/* counter for current IPVS connections */
-static atomic_t ip_vs_conn_count = ATOMIC_INIT(0);
-
/* counter for no client port connections */
static atomic_t ip_vs_conn_no_cport_cnt = ATOMIC_INIT(0);
@@ -76,7 +73,7 @@ static unsigned int ip_vs_conn_rnd __read_mostly;
/*
* Fine locking granularity for big connection hash table
*/
-#define CT_LOCKARRAY_BITS 4
+#define CT_LOCKARRAY_BITS 5
#define CT_LOCKARRAY_SIZE (1<<CT_LOCKARRAY_BITS)
#define CT_LOCKARRAY_MASK (CT_LOCKARRAY_SIZE-1)
@@ -133,19 +130,19 @@ static inline void ct_write_unlock_bh(unsigned key)
/*
* Returns hash value for IPVS connection entry
*/
-static unsigned int ip_vs_conn_hashkey(int af, unsigned proto,
+static unsigned int ip_vs_conn_hashkey(struct net *net, int af, unsigned proto,
const union nf_inet_addr *addr,
__be16 port)
{
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6)
- return jhash_3words(jhash(addr, 16, ip_vs_conn_rnd),
- (__force u32)port, proto, ip_vs_conn_rnd)
- & ip_vs_conn_tab_mask;
+ return (jhash_3words(jhash(addr, 16, ip_vs_conn_rnd),
+ (__force u32)port, proto, ip_vs_conn_rnd) ^
+ ((size_t)net>>8)) & ip_vs_conn_tab_mask;
#endif
- return jhash_3words((__force u32)addr->ip, (__force u32)port, proto,
- ip_vs_conn_rnd)
- & ip_vs_conn_tab_mask;
+ return (jhash_3words((__force u32)addr->ip, (__force u32)port, proto,
+ ip_vs_conn_rnd) ^
+ ((size_t)net>>8)) & ip_vs_conn_tab_mask;
}
static unsigned int ip_vs_conn_hashkey_param(const struct ip_vs_conn_param *p,
@@ -166,15 +163,15 @@ static unsigned int ip_vs_conn_hashkey_param(const struct ip_vs_conn_param *p,
port = p->vport;
}
- return ip_vs_conn_hashkey(p->af, p->protocol, addr, port);
+ return ip_vs_conn_hashkey(p->net, p->af, p->protocol, addr, port);
}
static unsigned int ip_vs_conn_hashkey_conn(const struct ip_vs_conn *cp)
{
struct ip_vs_conn_param p;
- ip_vs_conn_fill_param(cp->af, cp->protocol, &cp->caddr, cp->cport,
- NULL, 0, &p);
+ ip_vs_conn_fill_param(ip_vs_conn_net(cp), cp->af, cp->protocol,
+ &cp->caddr, cp->cport, NULL, 0, &p);
if (cp->pe) {
p.pe = cp->pe;
@@ -186,7 +183,7 @@ static unsigned int ip_vs_conn_hashkey_conn(const struct ip_vs_conn *cp)
}
/*
- * Hashes ip_vs_conn in ip_vs_conn_tab by proto,addr,port.
+ * Hashes ip_vs_conn in ip_vs_conn_tab by netns,proto,addr,port.
* returns bool success.
*/
static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
@@ -269,11 +266,12 @@ __ip_vs_conn_in_get(const struct ip_vs_conn_param *p)
list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
if (cp->af == p->af &&
+ p->cport == cp->cport && p->vport == cp->vport &&
ip_vs_addr_equal(p->af, p->caddr, &cp->caddr) &&
ip_vs_addr_equal(p->af, p->vaddr, &cp->vaddr) &&
- p->cport == cp->cport && p->vport == cp->vport &&
((!p->cport) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) &&
- p->protocol == cp->protocol) {
+ p->protocol == cp->protocol &&
+ ip_vs_conn_net_eq(cp, p->net)) {
/* HIT */
atomic_inc(&cp->refcnt);
ct_read_unlock(hash);
@@ -313,17 +311,18 @@ ip_vs_conn_fill_param_proto(int af, const struct sk_buff *skb,
struct ip_vs_conn_param *p)
{
__be16 _ports[2], *pptr;
+ struct net *net = skb_net(skb);
pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
if (pptr == NULL)
return 1;
if (likely(!inverse))
- ip_vs_conn_fill_param(af, iph->protocol, &iph->saddr, pptr[0],
- &iph->daddr, pptr[1], p);
+ ip_vs_conn_fill_param(net, af, iph->protocol, &iph->saddr,
+ pptr[0], &iph->daddr, pptr[1], p);
else
- ip_vs_conn_fill_param(af, iph->protocol, &iph->daddr, pptr[1],
- &iph->saddr, pptr[0], p);
+ ip_vs_conn_fill_param(net, af, iph->protocol, &iph->daddr,
+ pptr[1], &iph->saddr, pptr[0], p);
return 0;
}
@@ -352,6 +351,8 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p)
ct_read_lock(hash);
list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
+ if (!ip_vs_conn_net_eq(cp, p->net))
+ continue;
if (p->pe_data && p->pe->ct_match) {
if (p->pe == cp->pe && p->pe->ct_match(p, cp))
goto out;
@@ -403,10 +404,11 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p)
list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
if (cp->af == p->af &&
+ p->vport == cp->cport && p->cport == cp->dport &&
ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) &&
ip_vs_addr_equal(p->af, p->caddr, &cp->daddr) &&
- p->vport == cp->cport && p->cport == cp->dport &&
- p->protocol == cp->protocol) {
+ p->protocol == cp->protocol &&
+ ip_vs_conn_net_eq(cp, p->net)) {
/* HIT */
atomic_inc(&cp->refcnt);
ret = cp;
@@ -609,8 +611,8 @@ struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp)
struct ip_vs_dest *dest;
if ((cp) && (!cp->dest)) {
- dest = ip_vs_find_dest(&init_net, cp->af, &cp->daddr, cp->dport,
- &cp->vaddr, cp->vport,
+ dest = ip_vs_find_dest(ip_vs_conn_net(cp), cp->af, &cp->daddr,
+ cp->dport, &cp->vaddr, cp->vport,
cp->protocol, cp->fwmark);
ip_vs_bind_dest(cp, dest);
return dest;
@@ -728,6 +730,7 @@ int ip_vs_check_template(struct ip_vs_conn *ct)
static void ip_vs_conn_expire(unsigned long data)
{
struct ip_vs_conn *cp = (struct ip_vs_conn *)data;
+ struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp));
cp->timeout = 60*HZ;
@@ -770,7 +773,7 @@ static void ip_vs_conn_expire(unsigned long data)
ip_vs_unbind_dest(cp);
if (cp->flags & IP_VS_CONN_F_NO_CPORT)
atomic_dec(&ip_vs_conn_no_cport_cnt);
- atomic_dec(&ip_vs_conn_count);
+ atomic_dec(&ipvs->conn_count);
kmem_cache_free(ip_vs_conn_cachep, cp);
return;
@@ -804,7 +807,9 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
struct ip_vs_dest *dest, __u32 fwmark)
{
struct ip_vs_conn *cp;
- struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, p->protocol);
+ struct netns_ipvs *ipvs = net_ipvs(p->net);
+ struct ip_vs_proto_data *pd = ip_vs_proto_data_get(p->net,
+ p->protocol);
cp = kmem_cache_zalloc(ip_vs_conn_cachep, GFP_ATOMIC);
if (cp == NULL) {
@@ -814,6 +819,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
INIT_LIST_HEAD(&cp->c_list);
setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp);
+ ip_vs_conn_net_set(cp, p->net);
cp->af = p->af;
cp->protocol = p->protocol;
ip_vs_addr_copy(p->af, &cp->caddr, p->caddr);
@@ -844,7 +850,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
atomic_set(&cp->n_control, 0);
atomic_set(&cp->in_pkts, 0);
- atomic_inc(&ip_vs_conn_count);
+ atomic_inc(&ipvs->conn_count);
if (flags & IP_VS_CONN_F_NO_CPORT)
atomic_inc(&ip_vs_conn_no_cport_cnt);
@@ -886,17 +892,22 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
* /proc/net/ip_vs_conn entries
*/
#ifdef CONFIG_PROC_FS
+struct ip_vs_iter_state {
+ struct seq_net_private p;
+ struct list_head *l;
+};
static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos)
{
int idx;
struct ip_vs_conn *cp;
+ struct ip_vs_iter_state *iter = seq->private;
for (idx = 0; idx < ip_vs_conn_tab_size; idx++) {
ct_read_lock_bh(idx);
list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
if (pos-- == 0) {
- seq->private = &ip_vs_conn_tab[idx];
+ iter->l = &ip_vs_conn_tab[idx];
return cp;
}
}
@@ -908,14 +919,17 @@ static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos)
static void *ip_vs_conn_seq_start(struct seq_file *seq, loff_t *pos)
{
- seq->private = NULL;
+ struct ip_vs_iter_state *iter = seq->private;
+
+ iter->l = NULL;
return *pos ? ip_vs_conn_array(seq, *pos - 1) :SEQ_START_TOKEN;
}
static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
struct ip_vs_conn *cp = v;
- struct list_head *e, *l = seq->private;
+ struct ip_vs_iter_state *iter = seq->private;
+ struct list_head *e, *l = iter->l;
int idx;
++*pos;
@@ -932,18 +946,19 @@ static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos)
while (++idx < ip_vs_conn_tab_size) {
ct_read_lock_bh(idx);
list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
- seq->private = &ip_vs_conn_tab[idx];
+ iter->l = &ip_vs_conn_tab[idx];
return cp;
}
ct_read_unlock_bh(idx);
}
- seq->private = NULL;
+ iter->l = NULL;
return NULL;
}
static void ip_vs_conn_seq_stop(struct seq_file *seq, void *v)
{
- struct list_head *l = seq->private;
+ struct ip_vs_iter_state *iter = seq->private;
+ struct list_head *l = iter->l;
if (l)
ct_read_unlock_bh(l - ip_vs_conn_tab);
@@ -957,9 +972,12 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v)
"Pro FromIP FPrt ToIP TPrt DestIP DPrt State Expires PEName PEData\n");
else {
const struct ip_vs_conn *cp = v;
+ struct net *net = seq_file_net(seq);
char pe_data[IP_VS_PENAME_MAXLEN + IP_VS_PEDATA_MAXLEN + 3];
size_t len = 0;
+ if (!ip_vs_conn_net_eq(cp, net))
+ return 0;
if (cp->pe_data) {
pe_data[0] = ' ';
len = strlen(cp->pe->name);
@@ -1004,7 +1022,8 @@ static const struct seq_operations ip_vs_conn_seq_ops = {
static int ip_vs_conn_open(struct inode *inode, struct file *file)
{
- return seq_open(file, &ip_vs_conn_seq_ops);
+ return seq_open_net(inode, file, &ip_vs_conn_seq_ops,
+ sizeof(struct ip_vs_iter_state));
}
static const struct file_operations ip_vs_conn_fops = {
@@ -1031,6 +1050,10 @@ static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v)
"Pro FromIP FPrt ToIP TPrt DestIP DPrt State Origin Expires\n");
else {
const struct ip_vs_conn *cp = v;
+ struct net *net = seq_file_net(seq);
+
+ if (!ip_vs_conn_net_eq(cp, net))
+ return 0;
#ifdef CONFIG_IP_VS_IPV6
if (cp->af == AF_INET6)
@@ -1067,7 +1090,8 @@ static const struct seq_operations ip_vs_conn_sync_seq_ops = {
static int ip_vs_conn_sync_open(struct inode *inode, struct file *file)
{
- return seq_open(file, &ip_vs_conn_sync_seq_ops);
+ return seq_open_net(inode, file, &ip_vs_conn_sync_seq_ops,
+ sizeof(struct ip_vs_iter_state));
}
static const struct file_operations ip_vs_conn_sync_fops = {
@@ -1168,10 +1192,11 @@ void ip_vs_random_dropentry(void)
/*
* Flush all the connection entries in the ip_vs_conn_tab
*/
-static void ip_vs_conn_flush(void)
+static void ip_vs_conn_flush(struct net *net)
{
int idx;
struct ip_vs_conn *cp;
+ struct netns_ipvs *ipvs = net_ipvs(net);
flush_again:
for (idx = 0; idx < ip_vs_conn_tab_size; idx++) {
@@ -1181,7 +1206,8 @@ static void ip_vs_conn_flush(void)
ct_write_lock_bh(idx);
list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
-
+ if (!ip_vs_conn_net_eq(cp, net))
+ continue;
IP_VS_DBG(4, "del connection\n");
ip_vs_conn_expire_now(cp);
if (cp->control) {
@@ -1194,7 +1220,7 @@ static void ip_vs_conn_flush(void)
/* the counter may be not NULL, because maybe some conn entries
are run by slow timer handler or unhashed but still referred */
- if (atomic_read(&ip_vs_conn_count) != 0) {
+ if (atomic_read(&ipvs->conn_count) != 0) {
schedule();
goto flush_again;
}
@@ -1204,8 +1230,11 @@ static void ip_vs_conn_flush(void)
*/
int __net_init __ip_vs_conn_init(struct net *net)
{
+ struct netns_ipvs *ipvs = net_ipvs(net);
+
if (!net_eq(net, &init_net)) /* netns not enabled yet */
return -EPERM;
+ atomic_set(&ipvs->conn_count, 0);
proc_net_fops_create(net, "ip_vs_conn", 0, &ip_vs_conn_fops);
proc_net_fops_create(net, "ip_vs_conn_sync", 0, &ip_vs_conn_sync_fops);
@@ -1217,6 +1246,8 @@ static void __net_exit __ip_vs_conn_cleanup(struct net *net)
if (!net_eq(net, &init_net)) /* netns not enabled yet */
return;
+ /* flush all the connection entries first */
+ ip_vs_conn_flush(net);
proc_net_remove(net, "ip_vs_conn");
proc_net_remove(net, "ip_vs_conn_sync");
}
@@ -1277,9 +1308,6 @@ int __init ip_vs_conn_init(void)
void ip_vs_conn_cleanup(void)
{
unregister_pernet_subsys(&ipvs_conn_ops);
- /* flush all the connection entries first */
- ip_vs_conn_flush();
-
/* Release the empty cache */
kmem_cache_destroy(ip_vs_conn_cachep);
vfree(ip_vs_conn_tab);
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 7e6a2a0..7205b49 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -205,7 +205,8 @@ ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc,
const union nf_inet_addr *vaddr, __be16 vport,
struct ip_vs_conn_param *p)
{
- ip_vs_conn_fill_param(svc->af, protocol, caddr, cport, vaddr, vport, p);
+ ip_vs_conn_fill_param(svc->net, svc->af, protocol, caddr, cport, vaddr,
+ vport, p);
p->pe = svc->pe;
if (p->pe && p->pe->fill_param)
return p->pe->fill_param(p, skb);
@@ -348,8 +349,8 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
/*
* Create a new connection according to the template
*/
- ip_vs_conn_fill_param(svc->af, iph.protocol, &iph.saddr, src_port,
- &iph.daddr, dst_port, ¶m);
+ ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol, &iph.saddr,
+ src_port, &iph.daddr, dst_port, ¶m);
cp = ip_vs_conn_new(¶m, &dest->addr, dport, flags, dest, skb->mark);
if (cp == NULL) {
@@ -464,8 +465,10 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
*/
{
struct ip_vs_conn_param p;
- ip_vs_conn_fill_param(svc->af, iph.protocol, &iph.saddr,
- pptr[0], &iph.daddr, pptr[1], &p);
+
+ ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol,
+ &iph.saddr, pptr[0], &iph.daddr, pptr[1],
+ &p);
cp = ip_vs_conn_new(&p, &dest->addr,
dest->port ? dest->port : pptr[1],
flags, dest, skb->mark);
@@ -532,7 +535,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__);
{
struct ip_vs_conn_param p;
- ip_vs_conn_fill_param(svc->af, iph.protocol,
+ ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol,
&iph.saddr, pptr[0],
&iph.daddr, pptr[1], &p);
cp = ip_vs_conn_new(&p, &daddr, 0,
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index 77b0036..6a04f9a 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -198,13 +198,15 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
*/
{
struct ip_vs_conn_param p;
- ip_vs_conn_fill_param(AF_INET, iph->protocol,
- &from, port, &cp->caddr, 0, &p);
+ ip_vs_conn_fill_param(ip_vs_conn_net(cp), AF_INET,
+ iph->protocol, &from, port,
+ &cp->caddr, 0, &p);
n_cp = ip_vs_conn_out_get(&p);
}
if (!n_cp) {
struct ip_vs_conn_param p;
- ip_vs_conn_fill_param(AF_INET, IPPROTO_TCP, &cp->caddr,
+ ip_vs_conn_fill_param(ip_vs_conn_net(cp),
+ AF_INET, IPPROTO_TCP, &cp->caddr,
0, &cp->vaddr, port, &p);
n_cp = ip_vs_conn_new(&p, &from, port,
IP_VS_CONN_F_NO_CPORT |
@@ -361,9 +363,9 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
{
struct ip_vs_conn_param p;
- ip_vs_conn_fill_param(AF_INET, iph->protocol, &to, port,
- &cp->vaddr, htons(ntohs(cp->vport)-1),
- &p);
+ ip_vs_conn_fill_param(ip_vs_conn_net(cp), AF_INET,
+ iph->protocol, &to, port, &cp->vaddr,
+ htons(ntohs(cp->vport)-1), &p);
n_cp = ip_vs_conn_in_get(&p);
if (!n_cp) {
n_cp = ip_vs_conn_new(&p, &cp->daddr,
diff --git a/net/netfilter/ipvs/ip_vs_nfct.c b/net/netfilter/ipvs/ip_vs_nfct.c
index 4680647..f454c80 100644
--- a/net/netfilter/ipvs/ip_vs_nfct.c
+++ b/net/netfilter/ipvs/ip_vs_nfct.c
@@ -141,6 +141,7 @@ static void ip_vs_nfct_expect_callback(struct nf_conn *ct,
struct nf_conntrack_tuple *orig, new_reply;
struct ip_vs_conn *cp;
struct ip_vs_conn_param p;
+ struct net *net = nf_ct_net(ct);
if (exp->tuple.src.l3num != PF_INET)
return;
@@ -155,7 +156,7 @@ static void ip_vs_nfct_expect_callback(struct nf_conn *ct,
/* RS->CLIENT */
orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
- ip_vs_conn_fill_param(exp->tuple.src.l3num, orig->dst.protonum,
+ ip_vs_conn_fill_param(net, exp->tuple.src.l3num, orig->dst.protonum,
&orig->src.u3, orig->src.u.tcp.port,
&orig->dst.u3, orig->dst.u.tcp.port, &p);
cp = ip_vs_conn_out_get(&p);
@@ -268,7 +269,8 @@ void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp)
" for conn " FMT_CONN "\n",
__func__, ARG_TUPLE(&tuple), ARG_CONN(cp));
- h = nf_conntrack_find_get(&init_net, NF_CT_DEFAULT_ZONE, &tuple);
+ h = nf_conntrack_find_get(ip_vs_conn_net(cp), NF_CT_DEFAULT_ZONE,
+ &tuple);
if (h) {
ct = nf_ct_tuplehash_to_ctrack(h);
/* Show what happens instead of calling nf_ct_kill() */
diff --git a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
index 28039cb..5b8eb8b 100644
--- a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
@@ -41,15 +41,16 @@ struct isakmp_hdr {
#define PORT_ISAKMP 500
static void
-ah_esp_conn_fill_param_proto(int af, const struct ip_vs_iphdr *iph,
- int inverse, struct ip_vs_conn_param *p)
+ah_esp_conn_fill_param_proto(struct net *net, int af,
+ const struct ip_vs_iphdr *iph, int inverse,
+ struct ip_vs_conn_param *p)
{
if (likely(!inverse))
- ip_vs_conn_fill_param(af, IPPROTO_UDP,
+ ip_vs_conn_fill_param(net, af, IPPROTO_UDP,
&iph->saddr, htons(PORT_ISAKMP),
&iph->daddr, htons(PORT_ISAKMP), p);
else
- ip_vs_conn_fill_param(af, IPPROTO_UDP,
+ ip_vs_conn_fill_param(net, af, IPPROTO_UDP,
&iph->daddr, htons(PORT_ISAKMP),
&iph->saddr, htons(PORT_ISAKMP), p);
}
@@ -61,8 +62,9 @@ ah_esp_conn_in_get(int af, const struct sk_buff *skb,
{
struct ip_vs_conn *cp;
struct ip_vs_conn_param p;
+ struct net *net = skb_net(skb);
- ah_esp_conn_fill_param_proto(af, iph, inverse, &p);
+ ah_esp_conn_fill_param_proto(net, af, iph, inverse, &p);
cp = ip_vs_conn_in_get(&p);
if (!cp) {
/*
@@ -89,8 +91,9 @@ ah_esp_conn_out_get(int af, const struct sk_buff *skb,
{
struct ip_vs_conn *cp;
struct ip_vs_conn_param p;
+ struct net *net = skb_net(skb);
- ah_esp_conn_fill_param_proto(af, iph, inverse, &p);
+ ah_esp_conn_fill_param_proto(net, af, iph, inverse, &p);
cp = ip_vs_conn_out_get(&p);
if (!cp) {
IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for inout packet "
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index 569e77b..550365a 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -1055,7 +1055,7 @@ static void sctp_unregister_app(struct net *net, struct ip_vs_app *inc)
static int sctp_app_conn_bind(struct ip_vs_conn *cp)
{
- struct netns_ipvs *ipvs = net_ipvs(&init_net);
+ struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp));
int hash;
struct ip_vs_app *inc;
int result = 0;
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index 757aaaf..d8b3f9f 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -620,7 +620,7 @@ tcp_unregister_app(struct net *net, struct ip_vs_app *inc)
static int
tcp_app_conn_bind(struct ip_vs_conn *cp)
{
- struct netns_ipvs *ipvs = net_ipvs(&init_net);
+ struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp));
int hash;
struct ip_vs_app *inc;
int result = 0;
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c
index 1dc3941..581157b 100644
--- a/net/netfilter/ipvs/ip_vs_proto_udp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_udp.c
@@ -396,7 +396,7 @@ udp_unregister_app(struct net *net, struct ip_vs_app *inc)
static int udp_app_conn_bind(struct ip_vs_conn *cp)
{
- struct netns_ipvs *ipvs = net_ipvs(&init_net);
+ struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp));
int hash;
struct ip_vs_app *inc;
int result = 0;
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index c29e73d..f85e47d 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -660,21 +660,21 @@ control:
* fill_param used by version 1
*/
static inline int
-ip_vs_conn_fill_param_sync(int af, union ip_vs_sync_conn *sc,
+ip_vs_conn_fill_param_sync(struct net *net, int af, union ip_vs_sync_conn *sc,
struct ip_vs_conn_param *p,
__u8 *pe_data, unsigned int pe_data_len,
__u8 *pe_name, unsigned int pe_name_len)
{
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6)
- ip_vs_conn_fill_param(af, sc->v6.protocol,
+ ip_vs_conn_fill_param(net, af, sc->v6.protocol,
(const union nf_inet_addr *)&sc->v6.caddr,
sc->v6.cport,
(const union nf_inet_addr *)&sc->v6.vaddr,
sc->v6.vport, p);
else
#endif
- ip_vs_conn_fill_param(af, sc->v4.protocol,
+ ip_vs_conn_fill_param(net, af, sc->v4.protocol,
(const union nf_inet_addr *)&sc->v4.caddr,
sc->v4.cport,
(const union nf_inet_addr *)&sc->v4.vaddr,
@@ -881,7 +881,7 @@ static void ip_vs_process_message_v0(struct net *net, const char *buffer,
}
}
- ip_vs_conn_fill_param(AF_INET, s->protocol,
+ ip_vs_conn_fill_param(net, AF_INET, s->protocol,
(const union nf_inet_addr *)&s->caddr,
s->cport,
(const union nf_inet_addr *)&s->vaddr,
@@ -1043,9 +1043,8 @@ static inline int ip_vs_proc_sync_conn(struct net *net, __u8 *p, __u8 *msg_end)
state = 0;
}
}
- if (ip_vs_conn_fill_param_sync(af, s, ¶m,
- pe_data, pe_data_len,
- pe_name, pe_name_len)) {
+ if (ip_vs_conn_fill_param_sync(net, af, s, ¶m, pe_data,
+ pe_data_len, pe_name, pe_name_len)) {
retc = 50;
goto out;
}
--
1.7.2.3
^ permalink raw reply related
* [PATCH 50/79] IPVS: netns, ip_vs_ctl local vars moved to ipvs struct.
From: kaber @ 2011-01-19 19:14 UTC (permalink / raw)
To: davem; +Cc: netfilter-devel, netdev
In-Reply-To: <1295464519-21763-1-git-send-email-kaber@trash.net>
From: Hans Schillstrom <hans.schillstrom@ericsson.com>
Moving global vars to ipvs struct, except for svc table lock.
Next patch for ctl will be drop-rate handling.
*v3
__ip_vs_mutex remains global
ip_vs_conntrack_enabled(struct netns_ipvs *ipvs)
Signed-off-by: Hans Schillstrom <hans.schillstrom@ericsson.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
include/net/ip_vs.h | 27 ++--
include/net/netns/ip_vs.h | 37 ++++-
net/netfilter/ipvs/ip_vs_conn.c | 7 +-
net/netfilter/ipvs/ip_vs_core.c | 34 +++--
net/netfilter/ipvs/ip_vs_ctl.c | 291 +++++++++++++++++----------------
net/netfilter/ipvs/ip_vs_proto_sctp.c | 2 +-
net/netfilter/ipvs/ip_vs_proto_tcp.c | 2 +-
net/netfilter/ipvs/ip_vs_proto_udp.c | 2 +-
net/netfilter/ipvs/ip_vs_sync.c | 9 +-
9 files changed, 230 insertions(+), 181 deletions(-)
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index f82c0ff..af9acf4 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -41,7 +41,7 @@ static inline struct netns_ipvs *net_ipvs(struct net* net)
* Get net ptr from skb in traffic cases
* use skb_sknet when call is from userland (ioctl or netlink)
*/
-static inline struct net *skb_net(struct sk_buff *skb)
+static inline struct net *skb_net(const struct sk_buff *skb)
{
#ifdef CONFIG_NET_NS
#ifdef CONFIG_IP_VS_DEBUG
@@ -69,7 +69,7 @@ static inline struct net *skb_net(struct sk_buff *skb)
#endif
}
-static inline struct net *skb_sknet(struct sk_buff *skb)
+static inline struct net *skb_sknet(const struct sk_buff *skb)
{
#ifdef CONFIG_NET_NS
#ifdef CONFIG_IP_VS_DEBUG
@@ -1023,13 +1023,6 @@ extern int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
/*
* IPVS control data and functions (from ip_vs_ctl.c)
*/
-extern int sysctl_ip_vs_cache_bypass;
-extern int sysctl_ip_vs_expire_nodest_conn;
-extern int sysctl_ip_vs_expire_quiescent_template;
-extern int sysctl_ip_vs_sync_threshold[2];
-extern int sysctl_ip_vs_nat_icmp_send;
-extern int sysctl_ip_vs_conntrack;
-extern int sysctl_ip_vs_snat_reroute;
extern struct ip_vs_stats ip_vs_stats;
extern const struct ctl_path net_vs_ctl_path[];
extern int sysctl_ip_vs_sync_ver;
@@ -1119,11 +1112,13 @@ extern int ip_vs_icmp_xmit_v6
extern int ip_vs_drop_rate;
extern int ip_vs_drop_counter;
-static __inline__ int ip_vs_todrop(void)
+static inline int ip_vs_todrop(struct netns_ipvs *ipvs)
{
- if (!ip_vs_drop_rate) return 0;
- if (--ip_vs_drop_counter > 0) return 0;
- ip_vs_drop_counter = ip_vs_drop_rate;
+ if (!ipvs->drop_rate)
+ return 0;
+ if (--ipvs->drop_counter > 0)
+ return 0;
+ ipvs->drop_counter = ipvs->drop_rate;
return 1;
}
@@ -1211,9 +1206,9 @@ static inline void ip_vs_notrack(struct sk_buff *skb)
* Netfilter connection tracking
* (from ip_vs_nfct.c)
*/
-static inline int ip_vs_conntrack_enabled(void)
+static inline int ip_vs_conntrack_enabled(struct netns_ipvs *ipvs)
{
- return sysctl_ip_vs_conntrack;
+ return ipvs->sysctl_conntrack;
}
extern void ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp,
@@ -1226,7 +1221,7 @@ extern void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp);
#else
-static inline int ip_vs_conntrack_enabled(void)
+static inline int ip_vs_conntrack_enabled(struct netns_ipvs *ipvs)
{
return 0;
}
diff --git a/include/net/netns/ip_vs.h b/include/net/netns/ip_vs.h
index 1acfb33..c4b1abf 100644
--- a/include/net/netns/ip_vs.h
+++ b/include/net/netns/ip_vs.h
@@ -61,13 +61,46 @@ struct netns_ipvs {
struct list_head sctp_apps[SCTP_APP_TAB_SIZE];
spinlock_t sctp_app_lock;
#endif
+ /* ip_vs_conn */
+ atomic_t conn_count; /* connection counter */
+
/* ip_vs_ctl */
struct ip_vs_stats *tot_stats; /* Statistics & est. */
struct ip_vs_cpu_stats __percpu *cpustats; /* Stats per cpu */
seqcount_t *ustats_seq; /* u64 read retry */
- /* ip_vs_conn */
- atomic_t conn_count; /* connection counter */
+ int num_services; /* no of virtual services */
+ /* 1/rate drop and drop-entry variables */
+ int drop_rate;
+ int drop_counter;
+ atomic_t dropentry;
+ /* locks in ctl.c */
+ spinlock_t dropentry_lock; /* drop entry handling */
+ spinlock_t droppacket_lock; /* drop packet handling */
+ spinlock_t securetcp_lock; /* state and timeout tables */
+ rwlock_t rs_lock; /* real services table */
+ /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
+ struct lock_class_key ctl_key; /* ctl_mutex debuging */
+ /* sys-ctl struct */
+ struct ctl_table_header *sysctl_hdr;
+ struct ctl_table *sysctl_tbl;
+ /* sysctl variables */
+ int sysctl_amemthresh;
+ int sysctl_am_droprate;
+ int sysctl_drop_entry;
+ int sysctl_drop_packet;
+ int sysctl_secure_tcp;
+#ifdef CONFIG_IP_VS_NFCT
+ int sysctl_conntrack;
+#endif
+ int sysctl_snat_reroute;
+ int sysctl_sync_ver;
+ int sysctl_cache_bypass;
+ int sysctl_expire_nodest_conn;
+ int sysctl_expire_quiescent_template;
+ int sysctl_sync_threshold[2];
+ int sysctl_nat_icmp_send;
+
/* ip_vs_lblc */
int sysctl_lblc_expiration;
struct ctl_table_header *lblc_ctl_header;
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 0d5e4fe..5ba205a 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -686,13 +686,14 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp)
int ip_vs_check_template(struct ip_vs_conn *ct)
{
struct ip_vs_dest *dest = ct->dest;
+ struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(ct));
/*
* Checking the dest server status.
*/
if ((dest == NULL) ||
!(dest->flags & IP_VS_DEST_F_AVAILABLE) ||
- (sysctl_ip_vs_expire_quiescent_template &&
+ (ipvs->sysctl_expire_quiescent_template &&
(atomic_read(&dest->weight) == 0))) {
IP_VS_DBG_BUF(9, "check_template: dest not available for "
"protocol %s s:%s:%d v:%s:%d "
@@ -879,7 +880,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
* IP_VS_CONN_F_ONE_PACKET too.
*/
- if (ip_vs_conntrack_enabled())
+ if (ip_vs_conntrack_enabled(ipvs))
cp->flags |= IP_VS_CONN_F_NFCT;
/* Hash it in the ip_vs_conn_tab finally */
@@ -1198,7 +1199,7 @@ static void ip_vs_conn_flush(struct net *net)
struct ip_vs_conn *cp;
struct netns_ipvs *ipvs = net_ipvs(net);
- flush_again:
+flush_again:
for (idx = 0; idx < ip_vs_conn_tab_size; idx++) {
/*
* Lock is actually needed in this loop.
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 7205b49..a7c59a7 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -499,6 +499,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
struct ip_vs_proto_data *pd)
{
+ struct netns_ipvs *ipvs;
__be16 _ports[2], *pptr;
struct ip_vs_iphdr iph;
int unicast;
@@ -521,7 +522,8 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
/* if it is fwmark-based service, the cache_bypass sysctl is up
and the destination is a non-local unicast, then create
a cache_bypass connection entry */
- if (sysctl_ip_vs_cache_bypass && svc->fwmark && unicast) {
+ ipvs = net_ipvs(skb_net(skb));
+ if (ipvs->sysctl_cache_bypass && svc->fwmark && unicast) {
int ret, cs;
struct ip_vs_conn *cp;
unsigned int flags = (svc->flags & IP_VS_SVC_F_ONEPACKET &&
@@ -733,6 +735,7 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
struct ip_vs_protocol *pp,
unsigned int offset, unsigned int ihl)
{
+ struct netns_ipvs *ipvs;
unsigned int verdict = NF_DROP;
if (IP_VS_FWD_METHOD(cp) != 0) {
@@ -754,6 +757,8 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
if (!skb_make_writable(skb, offset))
goto out;
+ ipvs = net_ipvs(skb_net(skb));
+
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6)
ip_vs_nat_icmp_v6(skb, pp, cp, 1);
@@ -763,11 +768,11 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6) {
- if (sysctl_ip_vs_snat_reroute && ip6_route_me_harder(skb) != 0)
+ if (ipvs->sysctl_snat_reroute && ip6_route_me_harder(skb) != 0)
goto out;
} else
#endif
- if ((sysctl_ip_vs_snat_reroute ||
+ if ((ipvs->sysctl_snat_reroute ||
skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&
ip_route_me_harder(skb, RTN_LOCAL) != 0)
goto out;
@@ -979,6 +984,7 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
struct ip_vs_conn *cp, int ihl)
{
struct ip_vs_protocol *pp = pd->pp;
+ struct netns_ipvs *ipvs;
IP_VS_DBG_PKT(11, af, pp, skb, 0, "Outgoing packet");
@@ -1014,13 +1020,15 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
* if it came from this machine itself. So re-compute
* the routing information.
*/
+ ipvs = net_ipvs(skb_net(skb));
+
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6) {
- if (sysctl_ip_vs_snat_reroute && ip6_route_me_harder(skb) != 0)
+ if (ipvs->sysctl_snat_reroute && ip6_route_me_harder(skb) != 0)
goto drop;
} else
#endif
- if ((sysctl_ip_vs_snat_reroute ||
+ if ((ipvs->sysctl_snat_reroute ||
skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&
ip_route_me_harder(skb, RTN_LOCAL) != 0)
goto drop;
@@ -1057,6 +1065,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
struct ip_vs_protocol *pp;
struct ip_vs_proto_data *pd;
struct ip_vs_conn *cp;
+ struct netns_ipvs *ipvs;
EnterFunction(11);
@@ -1131,10 +1140,11 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
* Check if the packet belongs to an existing entry
*/
cp = pp->conn_out_get(af, skb, &iph, iph.len, 0);
+ ipvs = net_ipvs(net);
if (likely(cp))
return handle_response(af, skb, pd, cp, iph.len);
- if (sysctl_ip_vs_nat_icmp_send &&
+ if (ipvs->sysctl_nat_icmp_send &&
(pp->protocol == IPPROTO_TCP ||
pp->protocol == IPPROTO_UDP ||
pp->protocol == IPPROTO_SCTP)) {
@@ -1580,7 +1590,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) {
/* the destination server is not available */
- if (sysctl_ip_vs_expire_nodest_conn) {
+ if (ipvs->sysctl_expire_nodest_conn) {
/* try to expire the connection immediately */
ip_vs_conn_expire_now(cp);
}
@@ -1610,15 +1620,15 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
*/
if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
- pkts = sysctl_ip_vs_sync_threshold[0];
+ pkts = ipvs->sysctl_sync_threshold[0];
else
pkts = atomic_add_return(1, &cp->in_pkts);
if ((ipvs->sync_state & IP_VS_STATE_MASTER) &&
cp->protocol == IPPROTO_SCTP) {
if ((cp->state == IP_VS_SCTP_S_ESTABLISHED &&
- (pkts % sysctl_ip_vs_sync_threshold[1]
- == sysctl_ip_vs_sync_threshold[0])) ||
+ (pkts % ipvs->sysctl_sync_threshold[1]
+ == ipvs->sysctl_sync_threshold[0])) ||
(cp->old_state != cp->state &&
((cp->state == IP_VS_SCTP_S_CLOSED) ||
(cp->state == IP_VS_SCTP_S_SHUT_ACK_CLI) ||
@@ -1632,8 +1642,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
else if ((ipvs->sync_state & IP_VS_STATE_MASTER) &&
(((cp->protocol != IPPROTO_TCP ||
cp->state == IP_VS_TCP_S_ESTABLISHED) &&
- (pkts % sysctl_ip_vs_sync_threshold[1]
- == sysctl_ip_vs_sync_threshold[0])) ||
+ (pkts % ipvs->sysctl_sync_threshold[1]
+ == ipvs->sysctl_sync_threshold[0])) ||
((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) &&
((cp->state == IP_VS_TCP_S_FIN_WAIT) ||
(cp->state == IP_VS_TCP_S_CLOSE) ||
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index cbd58c6..183ac18 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -58,42 +58,7 @@ static DEFINE_MUTEX(__ip_vs_mutex);
/* lock for service table */
static DEFINE_RWLOCK(__ip_vs_svc_lock);
-/* lock for table with the real services */
-static DEFINE_RWLOCK(__ip_vs_rs_lock);
-
-/* lock for state and timeout tables */
-static DEFINE_SPINLOCK(ip_vs_securetcp_lock);
-
-/* lock for drop entry handling */
-static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
-
-/* lock for drop packet handling */
-static DEFINE_SPINLOCK(__ip_vs_droppacket_lock);
-
-/* 1/rate drop and drop-entry variables */
-int ip_vs_drop_rate = 0;
-int ip_vs_drop_counter = 0;
-static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
-
-/* number of virtual services */
-static int ip_vs_num_services = 0;
-
/* sysctl variables */
-static int sysctl_ip_vs_drop_entry = 0;
-static int sysctl_ip_vs_drop_packet = 0;
-static int sysctl_ip_vs_secure_tcp = 0;
-static int sysctl_ip_vs_amemthresh = 1024;
-static int sysctl_ip_vs_am_droprate = 10;
-int sysctl_ip_vs_cache_bypass = 0;
-int sysctl_ip_vs_expire_nodest_conn = 0;
-int sysctl_ip_vs_expire_quiescent_template = 0;
-int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
-int sysctl_ip_vs_nat_icmp_send = 0;
-#ifdef CONFIG_IP_VS_NFCT
-int sysctl_ip_vs_conntrack;
-#endif
-int sysctl_ip_vs_snat_reroute = 1;
-int sysctl_ip_vs_sync_ver = 1; /* Default version of sync proto */
#ifdef CONFIG_IP_VS_DEBUG
static int sysctl_ip_vs_debug_level = 0;
@@ -142,73 +107,73 @@ static void update_defense_level(struct netns_ipvs *ipvs)
/* si_swapinfo(&i); */
/* availmem = availmem - (i.totalswap - i.freeswap); */
- nomem = (availmem < sysctl_ip_vs_amemthresh);
+ nomem = (availmem < ipvs->sysctl_amemthresh);
local_bh_disable();
/* drop_entry */
- spin_lock(&__ip_vs_dropentry_lock);
- switch (sysctl_ip_vs_drop_entry) {
+ spin_lock(&ipvs->dropentry_lock);
+ switch (ipvs->sysctl_drop_entry) {
case 0:
- atomic_set(&ip_vs_dropentry, 0);
+ atomic_set(&ipvs->dropentry, 0);
break;
case 1:
if (nomem) {
- atomic_set(&ip_vs_dropentry, 1);
- sysctl_ip_vs_drop_entry = 2;
+ atomic_set(&ipvs->dropentry, 1);
+ ipvs->sysctl_drop_entry = 2;
} else {
- atomic_set(&ip_vs_dropentry, 0);
+ atomic_set(&ipvs->dropentry, 0);
}
break;
case 2:
if (nomem) {
- atomic_set(&ip_vs_dropentry, 1);
+ atomic_set(&ipvs->dropentry, 1);
} else {
- atomic_set(&ip_vs_dropentry, 0);
- sysctl_ip_vs_drop_entry = 1;
+ atomic_set(&ipvs->dropentry, 0);
+ ipvs->sysctl_drop_entry = 1;
};
break;
case 3:
- atomic_set(&ip_vs_dropentry, 1);
+ atomic_set(&ipvs->dropentry, 1);
break;
}
- spin_unlock(&__ip_vs_dropentry_lock);
+ spin_unlock(&ipvs->dropentry_lock);
/* drop_packet */
- spin_lock(&__ip_vs_droppacket_lock);
- switch (sysctl_ip_vs_drop_packet) {
+ spin_lock(&ipvs->droppacket_lock);
+ switch (ipvs->sysctl_drop_packet) {
case 0:
- ip_vs_drop_rate = 0;
+ ipvs->drop_rate = 0;
break;
case 1:
if (nomem) {
- ip_vs_drop_rate = ip_vs_drop_counter
- = sysctl_ip_vs_amemthresh /
- (sysctl_ip_vs_amemthresh-availmem);
- sysctl_ip_vs_drop_packet = 2;
+ ipvs->drop_rate = ipvs->drop_counter
+ = ipvs->sysctl_amemthresh /
+ (ipvs->sysctl_amemthresh-availmem);
+ ipvs->sysctl_drop_packet = 2;
} else {
- ip_vs_drop_rate = 0;
+ ipvs->drop_rate = 0;
}
break;
case 2:
if (nomem) {
- ip_vs_drop_rate = ip_vs_drop_counter
- = sysctl_ip_vs_amemthresh /
- (sysctl_ip_vs_amemthresh-availmem);
+ ipvs->drop_rate = ipvs->drop_counter
+ = ipvs->sysctl_amemthresh /
+ (ipvs->sysctl_amemthresh-availmem);
} else {
- ip_vs_drop_rate = 0;
- sysctl_ip_vs_drop_packet = 1;
+ ipvs->drop_rate = 0;
+ ipvs->sysctl_drop_packet = 1;
}
break;
case 3:
- ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
+ ipvs->drop_rate = ipvs->sysctl_am_droprate;
break;
}
- spin_unlock(&__ip_vs_droppacket_lock);
+ spin_unlock(&ipvs->droppacket_lock);
/* secure_tcp */
- spin_lock(&ip_vs_securetcp_lock);
- switch (sysctl_ip_vs_secure_tcp) {
+ spin_lock(&ipvs->securetcp_lock);
+ switch (ipvs->sysctl_secure_tcp) {
case 0:
if (old_secure_tcp >= 2)
to_change = 0;
@@ -217,7 +182,7 @@ static void update_defense_level(struct netns_ipvs *ipvs)
if (nomem) {
if (old_secure_tcp < 2)
to_change = 1;
- sysctl_ip_vs_secure_tcp = 2;
+ ipvs->sysctl_secure_tcp = 2;
} else {
if (old_secure_tcp >= 2)
to_change = 0;
@@ -230,7 +195,7 @@ static void update_defense_level(struct netns_ipvs *ipvs)
} else {
if (old_secure_tcp >= 2)
to_change = 0;
- sysctl_ip_vs_secure_tcp = 1;
+ ipvs->sysctl_secure_tcp = 1;
}
break;
case 3:
@@ -238,11 +203,11 @@ static void update_defense_level(struct netns_ipvs *ipvs)
to_change = 1;
break;
}
- old_secure_tcp = sysctl_ip_vs_secure_tcp;
+ old_secure_tcp = ipvs->sysctl_secure_tcp;
if (to_change >= 0)
ip_vs_protocol_timeout_change(ipvs,
- sysctl_ip_vs_secure_tcp > 1);
- spin_unlock(&ip_vs_securetcp_lock);
+ ipvs->sysctl_secure_tcp > 1);
+ spin_unlock(&ipvs->securetcp_lock);
local_bh_enable();
}
@@ -260,7 +225,7 @@ static void defense_work_handler(struct work_struct *work)
struct netns_ipvs *ipvs = net_ipvs(&init_net);
update_defense_level(ipvs);
- if (atomic_read(&ip_vs_dropentry))
+ if (atomic_read(&ipvs->dropentry))
ip_vs_random_dropentry();
schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
@@ -602,7 +567,7 @@ ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
*/
hash = ip_vs_rs_hashkey(af, daddr, dport);
- read_lock(&__ip_vs_rs_lock);
+ read_lock(&ipvs->rs_lock);
list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) {
if ((dest->af == af)
&& ip_vs_addr_equal(af, &dest->addr, daddr)
@@ -610,11 +575,11 @@ ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
&& ((dest->protocol == protocol) ||
dest->vfwmark)) {
/* HIT */
- read_unlock(&__ip_vs_rs_lock);
+ read_unlock(&ipvs->rs_lock);
return dest;
}
}
- read_unlock(&__ip_vs_rs_lock);
+ read_unlock(&ipvs->rs_lock);
return NULL;
}
@@ -788,9 +753,9 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
* Put the real service in rs_table if not present.
* For now only for NAT!
*/
- write_lock_bh(&__ip_vs_rs_lock);
+ write_lock_bh(&ipvs->rs_lock);
ip_vs_rs_hash(ipvs, dest);
- write_unlock_bh(&__ip_vs_rs_lock);
+ write_unlock_bh(&ipvs->rs_lock);
}
atomic_set(&dest->conn_flags, conn_flags);
@@ -1022,14 +987,16 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
*/
static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)
{
+ struct netns_ipvs *ipvs = net_ipvs(net);
+
ip_vs_kill_estimator(net, &dest->stats);
/*
* Remove it from the d-linked list with the real services.
*/
- write_lock_bh(&__ip_vs_rs_lock);
+ write_lock_bh(&ipvs->rs_lock);
ip_vs_rs_unhash(dest);
- write_unlock_bh(&__ip_vs_rs_lock);
+ write_unlock_bh(&ipvs->rs_lock);
/*
* Decrease the refcnt of the dest, and free the dest
@@ -1092,7 +1059,6 @@ static int
ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
{
struct ip_vs_dest *dest;
- struct net *net = svc->net;
__be16 dport = udest->port;
EnterFunction(2);
@@ -1121,7 +1087,7 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
/*
* Delete the destination
*/
- __ip_vs_del_dest(net, dest);
+ __ip_vs_del_dest(svc->net, dest);
LeaveFunction(2);
@@ -1140,6 +1106,7 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
struct ip_vs_scheduler *sched = NULL;
struct ip_vs_pe *pe = NULL;
struct ip_vs_service *svc = NULL;
+ struct netns_ipvs *ipvs = net_ipvs(net);
/* increase the module use count */
ip_vs_use_count_inc();
@@ -1219,7 +1186,7 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
/* Count only IPv4 services for old get/setsockopt interface */
if (svc->af == AF_INET)
- ip_vs_num_services++;
+ ipvs->num_services++;
/* Hash the service into the service table */
write_lock_bh(&__ip_vs_svc_lock);
@@ -1359,12 +1326,13 @@ static void __ip_vs_del_service(struct ip_vs_service *svc)
struct ip_vs_dest *dest, *nxt;
struct ip_vs_scheduler *old_sched;
struct ip_vs_pe *old_pe;
+ struct netns_ipvs *ipvs = net_ipvs(svc->net);
pr_info("%s: enter\n", __func__);
/* Count only IPv4 services for old get/setsockopt interface */
if (svc->af == AF_INET)
- ip_vs_num_services--;
+ ipvs->num_services--;
ip_vs_kill_estimator(svc->net, &svc->stats);
@@ -1589,42 +1557,31 @@ proc_do_sync_mode(ctl_table *table, int write,
/*
* IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
+ * Do not change order or insert new entries without
+ * align with netns init in __ip_vs_control_init()
*/
static struct ctl_table vs_vars[] = {
{
.procname = "amemthresh",
- .data = &sysctl_ip_vs_amemthresh,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
-#ifdef CONFIG_IP_VS_DEBUG
- {
- .procname = "debug_level",
- .data = &sysctl_ip_vs_debug_level,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
-#endif
{
.procname = "am_droprate",
- .data = &sysctl_ip_vs_am_droprate,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "drop_entry",
- .data = &sysctl_ip_vs_drop_entry,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_do_defense_mode,
},
{
.procname = "drop_packet",
- .data = &sysctl_ip_vs_drop_packet,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_do_defense_mode,
@@ -1632,7 +1589,6 @@ static struct ctl_table vs_vars[] = {
#ifdef CONFIG_IP_VS_NFCT
{
.procname = "conntrack",
- .data = &sysctl_ip_vs_conntrack,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec,
@@ -1640,25 +1596,62 @@ static struct ctl_table vs_vars[] = {
#endif
{
.procname = "secure_tcp",
- .data = &sysctl_ip_vs_secure_tcp,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_do_defense_mode,
},
{
.procname = "snat_reroute",
- .data = &sysctl_ip_vs_snat_reroute,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec,
},
{
.procname = "sync_version",
- .data = &sysctl_ip_vs_sync_ver,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_do_sync_mode,
},
+ {
+ .procname = "cache_bypass",
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "expire_nodest_conn",
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "expire_quiescent_template",
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "sync_threshold",
+ .maxlen =
+ sizeof(((struct netns_ipvs *)0)->sysctl_sync_threshold),
+ .mode = 0644,
+ .proc_handler = proc_do_sync_threshold,
+ },
+ {
+ .procname = "nat_icmp_send",
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+#ifdef CONFIG_IP_VS_DEBUG
+ {
+ .procname = "debug_level",
+ .data = &sysctl_ip_vs_debug_level,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+#endif
#if 0
{
.procname = "timeout_established",
@@ -1745,41 +1738,6 @@ static struct ctl_table vs_vars[] = {
.proc_handler = proc_dointvec_jiffies,
},
#endif
- {
- .procname = "cache_bypass",
- .data = &sysctl_ip_vs_cache_bypass,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "expire_nodest_conn",
- .data = &sysctl_ip_vs_expire_nodest_conn,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "expire_quiescent_template",
- .data = &sysctl_ip_vs_expire_quiescent_template,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "sync_threshold",
- .data = &sysctl_ip_vs_sync_threshold,
- .maxlen = sizeof(sysctl_ip_vs_sync_threshold),
- .mode = 0644,
- .proc_handler = proc_do_sync_threshold,
- },
- {
- .procname = "nat_icmp_send",
- .data = &sysctl_ip_vs_nat_icmp_send,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
{ }
};
@@ -1791,8 +1749,6 @@ const struct ctl_path net_vs_ctl_path[] = {
};
EXPORT_SYMBOL_GPL(net_vs_ctl_path);
-static struct ctl_table_header * sysctl_header;
-
#ifdef CONFIG_PROC_FS
struct ip_vs_iter {
@@ -2543,7 +2499,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
struct ip_vs_getinfo info;
info.version = IP_VS_VERSION_CODE;
info.size = ip_vs_conn_tab_size;
- info.num_services = ip_vs_num_services;
+ info.num_services = ipvs->num_services;
if (copy_to_user(user, &info, sizeof(info)) != 0)
ret = -EFAULT;
}
@@ -3014,7 +2970,7 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb,
struct ip_vs_service *svc;
struct ip_vs_dest *dest;
struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
- struct net *net;
+ struct net *net = skb_sknet(skb);
mutex_lock(&__ip_vs_mutex);
@@ -3023,7 +2979,7 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb,
IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
goto out_err;
- net = skb_sknet(skb);
+
svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]);
if (IS_ERR(svc) || svc == NULL)
goto out_err;
@@ -3215,8 +3171,10 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
int ret = 0, cmd;
int need_full_svc = 0, need_full_dest = 0;
struct net *net;
+ struct netns_ipvs *ipvs;
net = skb_sknet(skb);
+ ipvs = net_ipvs(net);
cmd = info->genlhdr->cmd;
mutex_lock(&__ip_vs_mutex);
@@ -3326,8 +3284,10 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
void *reply;
int ret, cmd, reply_cmd;
struct net *net;
+ struct netns_ipvs *ipvs;
net = skb_sknet(skb);
+ ipvs = net_ipvs(net);
cmd = info->genlhdr->cmd;
if (cmd == IPVS_CMD_GET_SERVICE)
@@ -3530,9 +3490,21 @@ int __net_init __ip_vs_control_init(struct net *net)
{
int idx;
struct netns_ipvs *ipvs = net_ipvs(net);
+ struct ctl_table *tbl;
if (!net_eq(net, &init_net)) /* netns not enabled yet */
return -EPERM;
+
+ atomic_set(&ipvs->dropentry, 0);
+ spin_lock_init(&ipvs->dropentry_lock);
+ spin_lock_init(&ipvs->droppacket_lock);
+ spin_lock_init(&ipvs->securetcp_lock);
+ ipvs->rs_lock = __RW_LOCK_UNLOCKED(ipvs->rs_lock);
+
+ /* Initialize rs_table */
+ for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
+ INIT_LIST_HEAD(&ipvs->rs_table[idx]);
+
/* procfs stats */
ipvs->tot_stats = kzalloc(sizeof(struct ip_vs_stats), GFP_KERNEL);
if (ipvs->tot_stats == NULL) {
@@ -3553,14 +3525,51 @@ int __net_init __ip_vs_control_init(struct net *net)
proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops);
proc_net_fops_create(net, "ip_vs_stats_percpu", 0,
&ip_vs_stats_percpu_fops);
- sysctl_header = register_net_sysctl_table(net, net_vs_ctl_path,
+
+ if (!net_eq(net, &init_net)) {
+ tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL);
+ if (tbl == NULL)
+ goto err_dup;
+ } else
+ tbl = vs_vars;
+ /* Initialize sysctl defaults */
+ idx = 0;
+ ipvs->sysctl_amemthresh = 1024;
+ tbl[idx++].data = &ipvs->sysctl_amemthresh;
+ ipvs->sysctl_am_droprate = 10;
+ tbl[idx++].data = &ipvs->sysctl_am_droprate;
+ tbl[idx++].data = &ipvs->sysctl_drop_entry;
+ tbl[idx++].data = &ipvs->sysctl_drop_packet;
+#ifdef CONFIG_IP_VS_NFCT
+ tbl[idx++].data = &ipvs->sysctl_conntrack;
+#endif
+ tbl[idx++].data = &ipvs->sysctl_secure_tcp;
+ ipvs->sysctl_snat_reroute = 1;
+ tbl[idx++].data = &ipvs->sysctl_snat_reroute;
+ ipvs->sysctl_sync_ver = 1;
+ tbl[idx++].data = &ipvs->sysctl_sync_ver;
+ tbl[idx++].data = &ipvs->sysctl_cache_bypass;
+ tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
+ tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
+ ipvs->sysctl_sync_threshold[0] = 3;
+ ipvs->sysctl_sync_threshold[1] = 50;
+ tbl[idx].data = &ipvs->sysctl_sync_threshold;
+ tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
+ tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
+
+
+ ipvs->sysctl_hdr = register_net_sysctl_table(net, net_vs_ctl_path,
vs_vars);
- if (sysctl_header == NULL)
+ if (ipvs->sysctl_hdr == NULL)
goto err_reg;
ip_vs_new_estimator(net, ipvs->tot_stats);
+ ipvs->sysctl_tbl = tbl;
return 0;
err_reg:
+ if (!net_eq(net, &init_net))
+ kfree(tbl);
+err_dup:
free_percpu(ipvs->cpustats);
err_alloc:
kfree(ipvs->tot_stats);
@@ -3575,7 +3584,7 @@ static void __net_exit __ip_vs_control_cleanup(struct net *net)
return;
ip_vs_kill_estimator(net, ipvs->tot_stats);
- unregister_net_sysctl_table(sysctl_header);
+ unregister_net_sysctl_table(ipvs->sysctl_hdr);
proc_net_remove(net, "ip_vs_stats_percpu");
proc_net_remove(net, "ip_vs_stats");
proc_net_remove(net, "ip_vs");
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index 550365a..fb2d04a 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -34,7 +34,7 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
&iph.daddr, sh->dest))) {
int ignored;
- if (ip_vs_todrop()) {
+ if (ip_vs_todrop(net_ipvs(net))) {
/*
* It seems that we are very loaded.
* We have to drop this packet :(
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index d8b3f9f..c0cc341 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -54,7 +54,7 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
&iph.daddr, th->dest))) {
int ignored;
- if (ip_vs_todrop()) {
+ if (ip_vs_todrop(net_ipvs(net))) {
/*
* It seems that we are very loaded.
* We have to drop this packet :(
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c
index 581157b..f1282cb 100644
--- a/net/netfilter/ipvs/ip_vs_proto_udp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_udp.c
@@ -50,7 +50,7 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
if (svc) {
int ignored;
- if (ip_vs_todrop()) {
+ if (ip_vs_todrop(net_ipvs(net))) {
/*
* It seems that we are very loaded.
* We have to drop this packet :(
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index f85e47d..b178056 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -394,7 +394,7 @@ void ip_vs_sync_switch_mode(struct net *net, int mode)
if (!ipvs->sync_state & IP_VS_STATE_MASTER)
return;
- if (mode == sysctl_ip_vs_sync_ver || !ipvs->sync_buff)
+ if (mode == ipvs->sysctl_sync_ver || !ipvs->sync_buff)
return;
spin_lock_bh(&ipvs->sync_buff_lock);
@@ -521,7 +521,7 @@ void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp)
unsigned int len, pe_name_len, pad;
/* Handle old version of the protocol */
- if (sysctl_ip_vs_sync_ver == 0) {
+ if (ipvs->sysctl_sync_ver == 0) {
ip_vs_sync_conn_v0(net, cp);
return;
}
@@ -650,7 +650,7 @@ control:
if (cp->flags & IP_VS_CONN_F_TEMPLATE) {
int pkts = atomic_add_return(1, &cp->in_pkts);
- if (pkts % sysctl_ip_vs_sync_threshold[1] != 1)
+ if (pkts % ipvs->sysctl_sync_threshold[1] != 1)
return;
}
goto sloop;
@@ -724,6 +724,7 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
{
struct ip_vs_dest *dest;
struct ip_vs_conn *cp;
+ struct netns_ipvs *ipvs = net_ipvs(net);
if (!(flags & IP_VS_CONN_F_TEMPLATE))
cp = ip_vs_conn_in_get(param);
@@ -794,7 +795,7 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
if (opt)
memcpy(&cp->in_seq, opt, sizeof(*opt));
- atomic_set(&cp->in_pkts, sysctl_ip_vs_sync_threshold[0]);
+ atomic_set(&cp->in_pkts, ipvs->sysctl_sync_threshold[0]);
cp->state = state;
cp->old_state = cp->state;
/*
--
1.7.2.3
^ permalink raw reply related
* [PATCH 51/79] IPVS: netns, defense work timer.
From: kaber @ 2011-01-19 19:14 UTC (permalink / raw)
To: davem; +Cc: netfilter-devel, netdev
In-Reply-To: <1295464519-21763-1-git-send-email-kaber@trash.net>
From: Hans Schillstrom <hans.schillstrom@ericsson.com>
This patch makes defense work timer per name-space,
A net ptr had to be added to the ipvs struct,
since it's needed by defense_work_handler.
[ horms@verge.net.au: Use cancel_delayed_work_sync() instead of
cancel_rearming_delayed_work(). Found during
merge conflict resoliution ]
Signed-off-by: Hans Schillstrom <hans.schillstrom@ericsson.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
include/net/ip_vs.h | 2 +-
include/net/netns/ip_vs.h | 3 +++
net/netfilter/ipvs/ip_vs_conn.c | 5 +++--
net/netfilter/ipvs/ip_vs_core.c | 1 +
net/netfilter/ipvs/ip_vs_ctl.c | 20 +++++++++-----------
5 files changed, 17 insertions(+), 14 deletions(-)
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index af9acf4..fbe660f 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -877,7 +877,7 @@ extern const char * ip_vs_state_name(__u16 proto, int state);
extern void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp);
extern int ip_vs_check_template(struct ip_vs_conn *ct);
-extern void ip_vs_random_dropentry(void);
+extern void ip_vs_random_dropentry(struct net *net);
extern int ip_vs_conn_init(void);
extern void ip_vs_conn_cleanup(void);
diff --git a/include/net/netns/ip_vs.h b/include/net/netns/ip_vs.h
index c4b1abf..4133261 100644
--- a/include/net/netns/ip_vs.h
+++ b/include/net/netns/ip_vs.h
@@ -71,6 +71,7 @@ struct netns_ipvs {
int num_services; /* no of virtual services */
/* 1/rate drop and drop-entry variables */
+ struct delayed_work defense_work; /* Work handler */
int drop_rate;
int drop_counter;
atomic_t dropentry;
@@ -129,6 +130,8 @@ struct netns_ipvs {
/* multicast interface name */
char master_mcast_ifn[IP_VS_IFNAME_MAXLEN];
char backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
+ /* net name space ptr */
+ struct net *net; /* Needed by timer routines */
};
#endif /* IP_VS_H_ */
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 5ba205a..28bdaf7 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -1138,7 +1138,7 @@ static inline int todrop_entry(struct ip_vs_conn *cp)
}
/* Called from keventd and must protect itself from softirqs */
-void ip_vs_random_dropentry(void)
+void ip_vs_random_dropentry(struct net *net)
{
int idx;
struct ip_vs_conn *cp;
@@ -1158,7 +1158,8 @@ void ip_vs_random_dropentry(void)
if (cp->flags & IP_VS_CONN_F_TEMPLATE)
/* connection template */
continue;
-
+ if (!ip_vs_conn_net_eq(cp, net))
+ continue;
if (cp->protocol == IPPROTO_TCP) {
switch(cp->state) {
case IP_VS_TCP_S_SYN_RECV:
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index a7c59a7..bdda346 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1884,6 +1884,7 @@ static int __net_init __ip_vs_init(struct net *net)
pr_err("%s(): no memory.\n", __func__);
return -ENOMEM;
}
+ ipvs->net = net;
/* Counters used for creating unique names */
ipvs->gen = atomic_read(&ipvs_netns_cnt);
atomic_inc(&ipvs_netns_cnt);
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 183ac18..6a963d4 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -217,18 +217,16 @@ static void update_defense_level(struct netns_ipvs *ipvs)
* Timer for checking the defense
*/
#define DEFENSE_TIMER_PERIOD 1*HZ
-static void defense_work_handler(struct work_struct *work);
-static DECLARE_DELAYED_WORK(defense_work, defense_work_handler);
static void defense_work_handler(struct work_struct *work)
{
- struct netns_ipvs *ipvs = net_ipvs(&init_net);
+ struct netns_ipvs *ipvs =
+ container_of(work, struct netns_ipvs, defense_work.work);
update_defense_level(ipvs);
if (atomic_read(&ipvs->dropentry))
- ip_vs_random_dropentry();
-
- schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
+ ip_vs_random_dropentry(ipvs->net);
+ schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
}
int
@@ -3564,6 +3562,9 @@ int __net_init __ip_vs_control_init(struct net *net)
goto err_reg;
ip_vs_new_estimator(net, ipvs->tot_stats);
ipvs->sysctl_tbl = tbl;
+ /* Schedule defense work */
+ INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler);
+ schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
return 0;
err_reg:
@@ -3588,6 +3589,8 @@ static void __net_exit __ip_vs_control_cleanup(struct net *net)
proc_net_remove(net, "ip_vs_stats_percpu");
proc_net_remove(net, "ip_vs_stats");
proc_net_remove(net, "ip_vs");
+ cancel_delayed_work_sync(&ipvs->defense_work);
+ cancel_work_sync(&ipvs->defense_work.work);
free_percpu(ipvs->cpustats);
kfree(ipvs->tot_stats);
}
@@ -3631,9 +3634,6 @@ int __init ip_vs_control_init(void)
goto err_net;
}
- /* Hook the defense timer */
- schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
-
LeaveFunction(2);
return 0;
@@ -3648,8 +3648,6 @@ void ip_vs_control_cleanup(void)
{
EnterFunction(2);
ip_vs_trash_cleanup();
- cancel_delayed_work_sync(&defense_work);
- cancel_work_sync(&defense_work.work);
unregister_pernet_subsys(&ipvs_control_ops);
ip_vs_genl_unregister();
nf_unregister_sockopt(&ip_vs_sockopts);
--
1.7.2.3
^ permalink raw reply related
* [PATCH 54/79] IPVS: netns, misc init_net removal in core.
From: kaber @ 2011-01-19 19:14 UTC (permalink / raw)
To: davem; +Cc: netfilter-devel, netdev
In-Reply-To: <1295464519-21763-1-git-send-email-kaber@trash.net>
From: Hans Schillstrom <hans.schillstrom@ericsson.com>
init_net removed in __ip_vs_addr_is_local_v6, and got net as param.
Signed-off-by: Hans Schillstrom <hans.schillstrom@ericsson.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
net/netfilter/ipvs/ip_vs_core.c | 6 ++++--
net/netfilter/ipvs/ip_vs_ctl.c | 9 +++++----
2 files changed, 9 insertions(+), 6 deletions(-)
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index bdda346..9e10c7a 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -499,6 +499,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
struct ip_vs_proto_data *pd)
{
+ struct net *net;
struct netns_ipvs *ipvs;
__be16 _ports[2], *pptr;
struct ip_vs_iphdr iph;
@@ -511,18 +512,19 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
ip_vs_service_put(svc);
return NF_DROP;
}
+ net = skb_net(skb);
#ifdef CONFIG_IP_VS_IPV6
if (svc->af == AF_INET6)
unicast = ipv6_addr_type(&iph.daddr.in6) & IPV6_ADDR_UNICAST;
else
#endif
- unicast = (inet_addr_type(&init_net, iph.daddr.ip) == RTN_UNICAST);
+ unicast = (inet_addr_type(net, iph.daddr.ip) == RTN_UNICAST);
/* if it is fwmark-based service, the cache_bypass sysctl is up
and the destination is a non-local unicast, then create
a cache_bypass connection entry */
- ipvs = net_ipvs(skb_net(skb));
+ ipvs = net_ipvs(net);
if (ipvs->sysctl_cache_bypass && svc->fwmark && unicast) {
int ret, cs;
struct ip_vs_conn *cp;
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 65f5de4..edf2b6d 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -71,7 +71,8 @@ int ip_vs_get_debug_level(void)
#ifdef CONFIG_IP_VS_IPV6
/* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
-static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
+static int __ip_vs_addr_is_local_v6(struct net *net,
+ const struct in6_addr *addr)
{
struct rt6_info *rt;
struct flowi fl = {
@@ -80,7 +81,7 @@ static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
.fl6_src = { .s6_addr32 = {0, 0, 0, 0} },
};
- rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
+ rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl);
if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
return 1;
@@ -810,12 +811,12 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
atype = ipv6_addr_type(&udest->addr.in6);
if ((!(atype & IPV6_ADDR_UNICAST) ||
atype & IPV6_ADDR_LINKLOCAL) &&
- !__ip_vs_addr_is_local_v6(&udest->addr.in6))
+ !__ip_vs_addr_is_local_v6(svc->net, &udest->addr.in6))
return -EINVAL;
} else
#endif
{
- atype = inet_addr_type(&init_net, udest->addr.ip);
+ atype = inet_addr_type(svc->net, udest->addr.ip);
if (atype != RTN_LOCAL && atype != RTN_UNICAST)
return -EINVAL;
}
--
1.7.2.3
^ permalink raw reply related
* [PATCH 61/79] netfilter: nf_conntrack: use is_vmalloc_addr()
From: kaber @ 2011-01-19 19:15 UTC (permalink / raw)
To: davem; +Cc: netfilter-devel, netdev
In-Reply-To: <1295464519-21763-1-git-send-email-kaber@trash.net>
From: Patrick McHardy <kaber@trash.net>
Use is_vmalloc_addr() in nf_ct_free_hashtable() and get rid of
the vmalloc flags to indicate that a hash table has been allocated
using vmalloc().
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
include/net/netfilter/nf_conntrack.h | 4 ++--
include/net/netns/conntrack.h | 2 --
include/net/netns/ipv4.h | 1 -
net/ipv4/netfilter/nf_nat_core.c | 6 ++----
net/netfilter/nf_conntrack_core.c | 26 +++++++++-----------------
net/netfilter/nf_conntrack_expect.c | 9 +++------
net/netfilter/nf_conntrack_helper.c | 10 +++-------
7 files changed, 19 insertions(+), 39 deletions(-)
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 2bc344c..d0d1337 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -202,9 +202,9 @@ extern void nf_ct_l3proto_module_put(unsigned short l3proto);
* Allocate a hashtable of hlist_head (if nulls == 0),
* or hlist_nulls_head (if nulls == 1)
*/
-extern void *nf_ct_alloc_hashtable(unsigned int *sizep, int *vmalloced, int nulls);
+extern void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls);
-extern void nf_ct_free_hashtable(void *hash, int vmalloced, unsigned int size);
+extern void nf_ct_free_hashtable(void *hash, unsigned int size);
extern struct nf_conntrack_tuple_hash *
__nf_conntrack_find(struct net *net, u16 zone,
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index d4958d4..5cf8a8c 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -28,8 +28,6 @@ struct netns_ct {
struct ctl_table_header *acct_sysctl_header;
struct ctl_table_header *event_sysctl_header;
#endif
- int hash_vmalloc;
- int expect_vmalloc;
char *slabname;
};
#endif
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index d68c3f1..e2e2ef5 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -43,7 +43,6 @@ struct netns_ipv4 {
struct xt_table *nat_table;
struct hlist_head *nat_bysource;
unsigned int nat_htable_size;
- int nat_vmalloced;
#endif
int sysctl_icmp_echo_ignore_all;
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index eb55835..6972cee 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -682,8 +682,7 @@ static int __net_init nf_nat_net_init(struct net *net)
{
/* Leave them the same for the moment. */
net->ipv4.nat_htable_size = net->ct.htable_size;
- net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&net->ipv4.nat_htable_size,
- &net->ipv4.nat_vmalloced, 0);
+ net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&net->ipv4.nat_htable_size, 0);
if (!net->ipv4.nat_bysource)
return -ENOMEM;
return 0;
@@ -705,8 +704,7 @@ static void __net_exit nf_nat_net_exit(struct net *net)
{
nf_ct_iterate_cleanup(net, &clean_nat, NULL);
synchronize_rcu();
- nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_vmalloced,
- net->ipv4.nat_htable_size);
+ nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_htable_size);
}
static struct pernet_operations nf_nat_net_ops = {
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index e95ac42..dc2ff2c 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1202,9 +1202,9 @@ static int kill_all(struct nf_conn *i, void *data)
return 1;
}
-void nf_ct_free_hashtable(void *hash, int vmalloced, unsigned int size)
+void nf_ct_free_hashtable(void *hash, unsigned int size)
{
- if (vmalloced)
+ if (is_vmalloc_addr(hash))
vfree(hash);
else
free_pages((unsigned long)hash,
@@ -1271,8 +1271,7 @@ static void nf_conntrack_cleanup_net(struct net *net)
goto i_see_dead_people;
}
- nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc,
- net->ct.htable_size);
+ nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size);
nf_conntrack_ecache_fini(net);
nf_conntrack_acct_fini(net);
nf_conntrack_expect_fini(net);
@@ -1301,21 +1300,18 @@ void nf_conntrack_cleanup(struct net *net)
}
}
-void *nf_ct_alloc_hashtable(unsigned int *sizep, int *vmalloced, int nulls)
+void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls)
{
struct hlist_nulls_head *hash;
unsigned int nr_slots, i;
size_t sz;
- *vmalloced = 0;
-
BUILD_BUG_ON(sizeof(struct hlist_nulls_head) != sizeof(struct hlist_head));
nr_slots = *sizep = roundup(*sizep, PAGE_SIZE / sizeof(struct hlist_nulls_head));
sz = nr_slots * sizeof(struct hlist_nulls_head);
hash = (void *)__get_free_pages(GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
get_order(sz));
if (!hash) {
- *vmalloced = 1;
printk(KERN_WARNING "nf_conntrack: falling back to vmalloc.\n");
hash = __vmalloc(sz, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
PAGE_KERNEL);
@@ -1331,7 +1327,7 @@ EXPORT_SYMBOL_GPL(nf_ct_alloc_hashtable);
int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
{
- int i, bucket, vmalloced, old_vmalloced;
+ int i, bucket;
unsigned int hashsize, old_size;
struct hlist_nulls_head *hash, *old_hash;
struct nf_conntrack_tuple_hash *h;
@@ -1348,7 +1344,7 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
if (!hashsize)
return -EINVAL;
- hash = nf_ct_alloc_hashtable(&hashsize, &vmalloced, 1);
+ hash = nf_ct_alloc_hashtable(&hashsize, 1);
if (!hash)
return -ENOMEM;
@@ -1370,15 +1366,13 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
}
}
old_size = init_net.ct.htable_size;
- old_vmalloced = init_net.ct.hash_vmalloc;
old_hash = init_net.ct.hash;
init_net.ct.htable_size = nf_conntrack_htable_size = hashsize;
- init_net.ct.hash_vmalloc = vmalloced;
init_net.ct.hash = hash;
spin_unlock_bh(&nf_conntrack_lock);
- nf_ct_free_hashtable(old_hash, old_vmalloced, old_size);
+ nf_ct_free_hashtable(old_hash, old_size);
return 0;
}
EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize);
@@ -1491,8 +1485,7 @@ static int nf_conntrack_init_net(struct net *net)
}
net->ct.htable_size = nf_conntrack_htable_size;
- net->ct.hash = nf_ct_alloc_hashtable(&net->ct.htable_size,
- &net->ct.hash_vmalloc, 1);
+ net->ct.hash = nf_ct_alloc_hashtable(&net->ct.htable_size, 1);
if (!net->ct.hash) {
ret = -ENOMEM;
printk(KERN_ERR "Unable to create nf_conntrack_hash\n");
@@ -1515,8 +1508,7 @@ err_ecache:
err_acct:
nf_conntrack_expect_fini(net);
err_expect:
- nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc,
- net->ct.htable_size);
+ nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size);
err_hash:
kmem_cache_destroy(net->ct.nf_conntrack_cachep);
err_cache:
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 4a9ed23..cd1e8e0 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -639,8 +639,7 @@ int nf_conntrack_expect_init(struct net *net)
}
net->ct.expect_count = 0;
- net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize,
- &net->ct.expect_vmalloc, 0);
+ net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize, 0);
if (net->ct.expect_hash == NULL)
goto err1;
@@ -662,8 +661,7 @@ err3:
if (net_eq(net, &init_net))
kmem_cache_destroy(nf_ct_expect_cachep);
err2:
- nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc,
- nf_ct_expect_hsize);
+ nf_ct_free_hashtable(net->ct.expect_hash, nf_ct_expect_hsize);
err1:
return err;
}
@@ -675,6 +673,5 @@ void nf_conntrack_expect_fini(struct net *net)
rcu_barrier(); /* Wait for call_rcu() before destroy */
kmem_cache_destroy(nf_ct_expect_cachep);
}
- nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc,
- nf_ct_expect_hsize);
+ nf_ct_free_hashtable(net->ct.expect_hash, nf_ct_expect_hsize);
}
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 767bbe9..1bdfea3 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -33,7 +33,6 @@ static DEFINE_MUTEX(nf_ct_helper_mutex);
static struct hlist_head *nf_ct_helper_hash __read_mostly;
static unsigned int nf_ct_helper_hsize __read_mostly;
static unsigned int nf_ct_helper_count __read_mostly;
-static int nf_ct_helper_vmalloc;
/* Stupid hash, but collision free for the default registrations of the
@@ -267,8 +266,7 @@ int nf_conntrack_helper_init(void)
int err;
nf_ct_helper_hsize = 1; /* gets rounded up to use one page */
- nf_ct_helper_hash = nf_ct_alloc_hashtable(&nf_ct_helper_hsize,
- &nf_ct_helper_vmalloc, 0);
+ nf_ct_helper_hash = nf_ct_alloc_hashtable(&nf_ct_helper_hsize, 0);
if (!nf_ct_helper_hash)
return -ENOMEM;
@@ -279,14 +277,12 @@ int nf_conntrack_helper_init(void)
return 0;
err1:
- nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_vmalloc,
- nf_ct_helper_hsize);
+ nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_hsize);
return err;
}
void nf_conntrack_helper_fini(void)
{
nf_ct_extend_unregister(&helper_extend);
- nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_vmalloc,
- nf_ct_helper_hsize);
+ nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_hsize);
}
--
1.7.2.3
^ permalink raw reply related
* [PATCH 64/79] netfilter: xtables: add missing aliases for autoloading via iptables
From: kaber @ 2011-01-19 19:15 UTC (permalink / raw)
To: davem; +Cc: netfilter-devel, netdev
In-Reply-To: <1295464519-21763-1-git-send-email-kaber@trash.net>
From: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
net/netfilter/xt_IDLETIMER.c | 2 ++
net/netfilter/xt_LED.c | 2 ++
net/netfilter/xt_cpu.c | 2 ++
3 files changed, 6 insertions(+), 0 deletions(-)
diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c
index be1f22e..3bdd443 100644
--- a/net/netfilter/xt_IDLETIMER.c
+++ b/net/netfilter/xt_IDLETIMER.c
@@ -313,3 +313,5 @@ MODULE_AUTHOR("Timo Teras <ext-timo.teras@nokia.com>");
MODULE_AUTHOR("Luciano Coelho <luciano.coelho@nokia.com>");
MODULE_DESCRIPTION("Xtables: idle time monitor");
MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("ipt_IDLETIMER");
+MODULE_ALIAS("ip6t_IDLETIMER");
diff --git a/net/netfilter/xt_LED.c b/net/netfilter/xt_LED.c
index a414050..993de2b 100644
--- a/net/netfilter/xt_LED.c
+++ b/net/netfilter/xt_LED.c
@@ -31,6 +31,8 @@
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Adam Nielsen <a.nielsen@shikadi.net>");
MODULE_DESCRIPTION("Xtables: trigger LED devices on packet match");
+MODULE_ALIAS("ipt_LED");
+MODULE_ALIAS("ip6t_LED");
static LIST_HEAD(xt_led_triggers);
static DEFINE_MUTEX(xt_led_mutex);
diff --git a/net/netfilter/xt_cpu.c b/net/netfilter/xt_cpu.c
index b39db8a..c7a2e54 100644
--- a/net/netfilter/xt_cpu.c
+++ b/net/netfilter/xt_cpu.c
@@ -22,6 +22,8 @@
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Eric Dumazet <eric.dumazet@gmail.com>");
MODULE_DESCRIPTION("Xtables: CPU match");
+MODULE_ALIAS("ipt_cpu");
+MODULE_ALIAS("ip6t_cpu");
static int cpu_mt_check(const struct xt_mtchk_param *par)
{
--
1.7.2.3
^ permalink raw reply related
* [PATCH 67/79] netfilter: xtables: use __uXX guarded types for userspace exports
From: kaber @ 2011-01-19 19:15 UTC (permalink / raw)
To: davem; +Cc: netfilter-devel, netdev
In-Reply-To: <1295464519-21763-1-git-send-email-kaber@trash.net>
From: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
include/linux/netfilter_bridge/ebt_802_3.h | 24 ++++++++++++------------
include/linux/netfilter_bridge/ebt_among.h | 2 +-
include/linux/netfilter_bridge/ebt_arp.h | 4 ++--
include/linux/netfilter_bridge/ebt_ip.h | 12 ++++++------
include/linux/netfilter_bridge/ebt_ip6.h | 16 ++++++++--------
include/linux/netfilter_bridge/ebt_limit.h | 8 ++++----
include/linux/netfilter_bridge/ebt_log.h | 6 +++---
include/linux/netfilter_bridge/ebt_mark_m.h | 4 ++--
include/linux/netfilter_bridge/ebt_nflog.h | 10 +++++-----
include/linux/netfilter_bridge/ebt_pkttype.h | 4 ++--
include/linux/netfilter_bridge/ebt_stp.h | 24 ++++++++++++------------
include/linux/netfilter_bridge/ebt_ulog.h | 2 +-
include/linux/netfilter_bridge/ebt_vlan.h | 8 ++++----
include/linux/netfilter_ipv4/ipt_CLUSTERIP.h | 14 +++++++-------
include/linux/netfilter_ipv4/ipt_ECN.h | 6 +++---
include/linux/netfilter_ipv4/ipt_SAME.h | 6 +++---
include/linux/netfilter_ipv4/ipt_TTL.h | 4 ++--
include/linux/netfilter_ipv4/ipt_addrtype.h | 14 +++++++-------
include/linux/netfilter_ipv4/ipt_ah.h | 4 ++--
include/linux/netfilter_ipv4/ipt_ecn.h | 8 ++++----
include/linux/netfilter_ipv4/ipt_ttl.h | 4 ++--
include/linux/netfilter_ipv6/ip6t_HL.h | 4 ++--
include/linux/netfilter_ipv6/ip6t_REJECT.h | 2 +-
include/linux/netfilter_ipv6/ip6t_ah.h | 8 ++++----
include/linux/netfilter_ipv6/ip6t_frag.h | 8 ++++----
include/linux/netfilter_ipv6/ip6t_hl.h | 4 ++--
| 6 +++---
include/linux/netfilter_ipv6/ip6t_mh.h | 4 ++--
include/linux/netfilter_ipv6/ip6t_opts.h | 10 +++++-----
include/linux/netfilter_ipv6/ip6t_rt.h | 12 ++++++------
30 files changed, 121 insertions(+), 121 deletions(-)
diff --git a/include/linux/netfilter_bridge/ebt_802_3.h b/include/linux/netfilter_bridge/ebt_802_3.h
index c73ef0b..c427764 100644
--- a/include/linux/netfilter_bridge/ebt_802_3.h
+++ b/include/linux/netfilter_bridge/ebt_802_3.h
@@ -24,24 +24,24 @@
/* ui has one byte ctrl, ni has two */
struct hdr_ui {
- uint8_t dsap;
- uint8_t ssap;
- uint8_t ctrl;
- uint8_t orig[3];
+ __u8 dsap;
+ __u8 ssap;
+ __u8 ctrl;
+ __u8 orig[3];
__be16 type;
};
struct hdr_ni {
- uint8_t dsap;
- uint8_t ssap;
+ __u8 dsap;
+ __u8 ssap;
__be16 ctrl;
- uint8_t orig[3];
+ __u8 orig[3];
__be16 type;
};
struct ebt_802_3_hdr {
- uint8_t daddr[6];
- uint8_t saddr[6];
+ __u8 daddr[6];
+ __u8 saddr[6];
__be16 len;
union {
struct hdr_ui ui;
@@ -59,10 +59,10 @@ static inline struct ebt_802_3_hdr *ebt_802_3_hdr(const struct sk_buff *skb)
#endif
struct ebt_802_3_info {
- uint8_t sap;
+ __u8 sap;
__be16 type;
- uint8_t bitmask;
- uint8_t invflags;
+ __u8 bitmask;
+ __u8 invflags;
};
#endif
diff --git a/include/linux/netfilter_bridge/ebt_among.h b/include/linux/netfilter_bridge/ebt_among.h
index 0009558..686c961 100644
--- a/include/linux/netfilter_bridge/ebt_among.h
+++ b/include/linux/netfilter_bridge/ebt_among.h
@@ -30,7 +30,7 @@
*/
struct ebt_mac_wormhash_tuple {
- uint32_t cmp[2];
+ __u32 cmp[2];
__be32 ip;
};
diff --git a/include/linux/netfilter_bridge/ebt_arp.h b/include/linux/netfilter_bridge/ebt_arp.h
index cbf4843..e62b5af 100644
--- a/include/linux/netfilter_bridge/ebt_arp.h
+++ b/include/linux/netfilter_bridge/ebt_arp.h
@@ -27,8 +27,8 @@ struct ebt_arp_info
unsigned char smmsk[ETH_ALEN];
unsigned char dmaddr[ETH_ALEN];
unsigned char dmmsk[ETH_ALEN];
- uint8_t bitmask;
- uint8_t invflags;
+ __u8 bitmask;
+ __u8 invflags;
};
#endif
diff --git a/include/linux/netfilter_bridge/ebt_ip.h b/include/linux/netfilter_bridge/ebt_ip.h
index 6a708fb..d99de58 100644
--- a/include/linux/netfilter_bridge/ebt_ip.h
+++ b/include/linux/netfilter_bridge/ebt_ip.h
@@ -31,12 +31,12 @@ struct ebt_ip_info {
__be32 daddr;
__be32 smsk;
__be32 dmsk;
- uint8_t tos;
- uint8_t protocol;
- uint8_t bitmask;
- uint8_t invflags;
- uint16_t sport[2];
- uint16_t dport[2];
+ __u8 tos;
+ __u8 protocol;
+ __u8 bitmask;
+ __u8 invflags;
+ __u16 sport[2];
+ __u16 dport[2];
};
#endif
diff --git a/include/linux/netfilter_bridge/ebt_ip6.h b/include/linux/netfilter_bridge/ebt_ip6.h
index 22af18a..998e9d5 100644
--- a/include/linux/netfilter_bridge/ebt_ip6.h
+++ b/include/linux/netfilter_bridge/ebt_ip6.h
@@ -31,17 +31,17 @@ struct ebt_ip6_info {
struct in6_addr daddr;
struct in6_addr smsk;
struct in6_addr dmsk;
- uint8_t tclass;
- uint8_t protocol;
- uint8_t bitmask;
- uint8_t invflags;
+ __u8 tclass;
+ __u8 protocol;
+ __u8 bitmask;
+ __u8 invflags;
union {
- uint16_t sport[2];
- uint8_t icmpv6_type[2];
+ __u16 sport[2];
+ __u8 icmpv6_type[2];
};
union {
- uint16_t dport[2];
- uint8_t icmpv6_code[2];
+ __u16 dport[2];
+ __u8 icmpv6_code[2];
};
};
diff --git a/include/linux/netfilter_bridge/ebt_limit.h b/include/linux/netfilter_bridge/ebt_limit.h
index 4bf76b7..721d51f 100644
--- a/include/linux/netfilter_bridge/ebt_limit.h
+++ b/include/linux/netfilter_bridge/ebt_limit.h
@@ -10,13 +10,13 @@
seconds, or one every 59 hours. */
struct ebt_limit_info {
- u_int32_t avg; /* Average secs between packets * scale */
- u_int32_t burst; /* Period multiplier for upper limit. */
+ __u32 avg; /* Average secs between packets * scale */
+ __u32 burst; /* Period multiplier for upper limit. */
/* Used internally by the kernel */
unsigned long prev;
- u_int32_t credit;
- u_int32_t credit_cap, cost;
+ __u32 credit;
+ __u32 credit_cap, cost;
};
#endif
diff --git a/include/linux/netfilter_bridge/ebt_log.h b/include/linux/netfilter_bridge/ebt_log.h
index cc2cdfb..564beb4 100644
--- a/include/linux/netfilter_bridge/ebt_log.h
+++ b/include/linux/netfilter_bridge/ebt_log.h
@@ -10,9 +10,9 @@
#define EBT_LOG_WATCHER "log"
struct ebt_log_info {
- uint8_t loglevel;
- uint8_t prefix[EBT_LOG_PREFIX_SIZE];
- uint32_t bitmask;
+ __u8 loglevel;
+ __u8 prefix[EBT_LOG_PREFIX_SIZE];
+ __u32 bitmask;
};
#endif
diff --git a/include/linux/netfilter_bridge/ebt_mark_m.h b/include/linux/netfilter_bridge/ebt_mark_m.h
index 9ceb10e..97b96c4 100644
--- a/include/linux/netfilter_bridge/ebt_mark_m.h
+++ b/include/linux/netfilter_bridge/ebt_mark_m.h
@@ -6,8 +6,8 @@
#define EBT_MARK_MASK (EBT_MARK_AND | EBT_MARK_OR)
struct ebt_mark_m_info {
unsigned long mark, mask;
- uint8_t invert;
- uint8_t bitmask;
+ __u8 invert;
+ __u8 bitmask;
};
#define EBT_MARK_MATCH "mark_m"
diff --git a/include/linux/netfilter_bridge/ebt_nflog.h b/include/linux/netfilter_bridge/ebt_nflog.h
index 0528178..477315b 100644
--- a/include/linux/netfilter_bridge/ebt_nflog.h
+++ b/include/linux/netfilter_bridge/ebt_nflog.h
@@ -10,11 +10,11 @@
#define EBT_NFLOG_DEFAULT_THRESHOLD 1
struct ebt_nflog_info {
- u_int32_t len;
- u_int16_t group;
- u_int16_t threshold;
- u_int16_t flags;
- u_int16_t pad;
+ __u32 len;
+ __u16 group;
+ __u16 threshold;
+ __u16 flags;
+ __u16 pad;
char prefix[EBT_NFLOG_PREFIX_SIZE];
};
diff --git a/include/linux/netfilter_bridge/ebt_pkttype.h b/include/linux/netfilter_bridge/ebt_pkttype.h
index 51a7998..7c0fb0f 100644
--- a/include/linux/netfilter_bridge/ebt_pkttype.h
+++ b/include/linux/netfilter_bridge/ebt_pkttype.h
@@ -2,8 +2,8 @@
#define __LINUX_BRIDGE_EBT_PKTTYPE_H
struct ebt_pkttype_info {
- uint8_t pkt_type;
- uint8_t invert;
+ __u8 pkt_type;
+ __u8 invert;
};
#define EBT_PKTTYPE_MATCH "pkttype"
diff --git a/include/linux/netfilter_bridge/ebt_stp.h b/include/linux/netfilter_bridge/ebt_stp.h
index e503a0a..13a0bd4 100644
--- a/include/linux/netfilter_bridge/ebt_stp.h
+++ b/include/linux/netfilter_bridge/ebt_stp.h
@@ -21,24 +21,24 @@
#define EBT_STP_MATCH "stp"
struct ebt_stp_config_info {
- uint8_t flags;
- uint16_t root_priol, root_priou;
+ __u8 flags;
+ __u16 root_priol, root_priou;
char root_addr[6], root_addrmsk[6];
- uint32_t root_costl, root_costu;
- uint16_t sender_priol, sender_priou;
+ __u32 root_costl, root_costu;
+ __u16 sender_priol, sender_priou;
char sender_addr[6], sender_addrmsk[6];
- uint16_t portl, portu;
- uint16_t msg_agel, msg_ageu;
- uint16_t max_agel, max_ageu;
- uint16_t hello_timel, hello_timeu;
- uint16_t forward_delayl, forward_delayu;
+ __u16 portl, portu;
+ __u16 msg_agel, msg_ageu;
+ __u16 max_agel, max_ageu;
+ __u16 hello_timel, hello_timeu;
+ __u16 forward_delayl, forward_delayu;
};
struct ebt_stp_info {
- uint8_t type;
+ __u8 type;
struct ebt_stp_config_info config;
- uint16_t bitmask;
- uint16_t invflags;
+ __u16 bitmask;
+ __u16 invflags;
};
#endif
diff --git a/include/linux/netfilter_bridge/ebt_ulog.h b/include/linux/netfilter_bridge/ebt_ulog.h
index b677e26..de35a51 100644
--- a/include/linux/netfilter_bridge/ebt_ulog.h
+++ b/include/linux/netfilter_bridge/ebt_ulog.h
@@ -10,7 +10,7 @@
#define EBT_ULOG_VERSION 1
struct ebt_ulog_info {
- uint32_t nlgroup;
+ __u32 nlgroup;
unsigned int cprange;
unsigned int qthreshold;
char prefix[EBT_ULOG_PREFIX_LEN];
diff --git a/include/linux/netfilter_bridge/ebt_vlan.h b/include/linux/netfilter_bridge/ebt_vlan.h
index 1d98be4..48dffc1 100644
--- a/include/linux/netfilter_bridge/ebt_vlan.h
+++ b/include/linux/netfilter_bridge/ebt_vlan.h
@@ -8,12 +8,12 @@
#define EBT_VLAN_MATCH "vlan"
struct ebt_vlan_info {
- uint16_t id; /* VLAN ID {1-4095} */
- uint8_t prio; /* VLAN User Priority {0-7} */
+ __u16 id; /* VLAN ID {1-4095} */
+ __u8 prio; /* VLAN User Priority {0-7} */
__be16 encap; /* VLAN Encapsulated frame code {0-65535} */
- uint8_t bitmask; /* Args bitmask bit 1=1 - ID arg,
+ __u8 bitmask; /* Args bitmask bit 1=1 - ID arg,
bit 2=1 User-Priority arg, bit 3=1 encap*/
- uint8_t invflags; /* Inverse bitmask bit 1=1 - inversed ID arg,
+ __u8 invflags; /* Inverse bitmask bit 1=1 - inversed ID arg,
bit 2=1 - inversed Pirority arg */
};
diff --git a/include/linux/netfilter_ipv4/ipt_CLUSTERIP.h b/include/linux/netfilter_ipv4/ipt_CLUSTERIP.h
index e5a3687..3114f06 100644
--- a/include/linux/netfilter_ipv4/ipt_CLUSTERIP.h
+++ b/include/linux/netfilter_ipv4/ipt_CLUSTERIP.h
@@ -17,15 +17,15 @@ struct clusterip_config;
struct ipt_clusterip_tgt_info {
- u_int32_t flags;
+ __u32 flags;
/* only relevant for new ones */
- u_int8_t clustermac[6];
- u_int16_t num_total_nodes;
- u_int16_t num_local_nodes;
- u_int16_t local_nodes[CLUSTERIP_MAX_NODES];
- u_int32_t hash_mode;
- u_int32_t hash_initval;
+ __u8 clustermac[6];
+ __u16 num_total_nodes;
+ __u16 num_local_nodes;
+ __u16 local_nodes[CLUSTERIP_MAX_NODES];
+ __u32 hash_mode;
+ __u32 hash_initval;
/* Used internally by the kernel */
struct clusterip_config *config;
diff --git a/include/linux/netfilter_ipv4/ipt_ECN.h b/include/linux/netfilter_ipv4/ipt_ECN.h
index 7ca4591..c6e3e01 100644
--- a/include/linux/netfilter_ipv4/ipt_ECN.h
+++ b/include/linux/netfilter_ipv4/ipt_ECN.h
@@ -19,11 +19,11 @@
#define IPT_ECN_OP_MASK 0xce
struct ipt_ECN_info {
- u_int8_t operation; /* bitset of operations */
- u_int8_t ip_ect; /* ECT codepoint of IPv4 header, pre-shifted */
+ __u8 operation; /* bitset of operations */
+ __u8 ip_ect; /* ECT codepoint of IPv4 header, pre-shifted */
union {
struct {
- u_int8_t ece:1, cwr:1; /* TCP ECT bits */
+ __u8 ece:1, cwr:1; /* TCP ECT bits */
} tcp;
} proto;
};
diff --git a/include/linux/netfilter_ipv4/ipt_SAME.h b/include/linux/netfilter_ipv4/ipt_SAME.h
index 2529660..fa0ebec 100644
--- a/include/linux/netfilter_ipv4/ipt_SAME.h
+++ b/include/linux/netfilter_ipv4/ipt_SAME.h
@@ -7,9 +7,9 @@
struct ipt_same_info {
unsigned char info;
- u_int32_t rangesize;
- u_int32_t ipnum;
- u_int32_t *iparray;
+ __u32 rangesize;
+ __u32 ipnum;
+ __u32 *iparray;
/* hangs off end. */
struct nf_nat_range range[IPT_SAME_MAX_RANGE];
diff --git a/include/linux/netfilter_ipv4/ipt_TTL.h b/include/linux/netfilter_ipv4/ipt_TTL.h
index ee6611e..f6250e4 100644
--- a/include/linux/netfilter_ipv4/ipt_TTL.h
+++ b/include/linux/netfilter_ipv4/ipt_TTL.h
@@ -13,8 +13,8 @@ enum {
#define IPT_TTL_MAXMODE IPT_TTL_DEC
struct ipt_TTL_info {
- u_int8_t mode;
- u_int8_t ttl;
+ __u8 mode;
+ __u8 ttl;
};
diff --git a/include/linux/netfilter_ipv4/ipt_addrtype.h b/include/linux/netfilter_ipv4/ipt_addrtype.h
index 446de6a..f29c3cf 100644
--- a/include/linux/netfilter_ipv4/ipt_addrtype.h
+++ b/include/linux/netfilter_ipv4/ipt_addrtype.h
@@ -9,17 +9,17 @@ enum {
};
struct ipt_addrtype_info_v1 {
- u_int16_t source; /* source-type mask */
- u_int16_t dest; /* dest-type mask */
- u_int32_t flags;
+ __u16 source; /* source-type mask */
+ __u16 dest; /* dest-type mask */
+ __u32 flags;
};
/* revision 0 */
struct ipt_addrtype_info {
- u_int16_t source; /* source-type mask */
- u_int16_t dest; /* dest-type mask */
- u_int32_t invert_source;
- u_int32_t invert_dest;
+ __u16 source; /* source-type mask */
+ __u16 dest; /* dest-type mask */
+ __u32 invert_source;
+ __u32 invert_dest;
};
#endif
diff --git a/include/linux/netfilter_ipv4/ipt_ah.h b/include/linux/netfilter_ipv4/ipt_ah.h
index 2e555b4..8fea283 100644
--- a/include/linux/netfilter_ipv4/ipt_ah.h
+++ b/include/linux/netfilter_ipv4/ipt_ah.h
@@ -2,8 +2,8 @@
#define _IPT_AH_H
struct ipt_ah {
- u_int32_t spis[2]; /* Security Parameter Index */
- u_int8_t invflags; /* Inverse flags */
+ __u32 spis[2]; /* Security Parameter Index */
+ __u8 invflags; /* Inverse flags */
};
diff --git a/include/linux/netfilter_ipv4/ipt_ecn.h b/include/linux/netfilter_ipv4/ipt_ecn.h
index 9945baa..78b98aa 100644
--- a/include/linux/netfilter_ipv4/ipt_ecn.h
+++ b/include/linux/netfilter_ipv4/ipt_ecn.h
@@ -20,12 +20,12 @@
/* match info */
struct ipt_ecn_info {
- u_int8_t operation;
- u_int8_t invert;
- u_int8_t ip_ect;
+ __u8 operation;
+ __u8 invert;
+ __u8 ip_ect;
union {
struct {
- u_int8_t ect;
+ __u8 ect;
} tcp;
} proto;
};
diff --git a/include/linux/netfilter_ipv4/ipt_ttl.h b/include/linux/netfilter_ipv4/ipt_ttl.h
index ee24fd8..93d9a06 100644
--- a/include/linux/netfilter_ipv4/ipt_ttl.h
+++ b/include/linux/netfilter_ipv4/ipt_ttl.h
@@ -13,8 +13,8 @@ enum {
struct ipt_ttl_info {
- u_int8_t mode;
- u_int8_t ttl;
+ __u8 mode;
+ __u8 ttl;
};
diff --git a/include/linux/netfilter_ipv6/ip6t_HL.h b/include/linux/netfilter_ipv6/ip6t_HL.h
index afb7813..81cdaf0 100644
--- a/include/linux/netfilter_ipv6/ip6t_HL.h
+++ b/include/linux/netfilter_ipv6/ip6t_HL.h
@@ -14,8 +14,8 @@ enum {
#define IP6T_HL_MAXMODE IP6T_HL_DEC
struct ip6t_HL_info {
- u_int8_t mode;
- u_int8_t hop_limit;
+ __u8 mode;
+ __u8 hop_limit;
};
diff --git a/include/linux/netfilter_ipv6/ip6t_REJECT.h b/include/linux/netfilter_ipv6/ip6t_REJECT.h
index 6be6504..b999aa4 100644
--- a/include/linux/netfilter_ipv6/ip6t_REJECT.h
+++ b/include/linux/netfilter_ipv6/ip6t_REJECT.h
@@ -12,7 +12,7 @@ enum ip6t_reject_with {
};
struct ip6t_reject_info {
- u_int32_t with; /* reject type */
+ __u32 with; /* reject type */
};
#endif /*_IP6T_REJECT_H*/
diff --git a/include/linux/netfilter_ipv6/ip6t_ah.h b/include/linux/netfilter_ipv6/ip6t_ah.h
index 17a745c..a602c16 100644
--- a/include/linux/netfilter_ipv6/ip6t_ah.h
+++ b/include/linux/netfilter_ipv6/ip6t_ah.h
@@ -2,10 +2,10 @@
#define _IP6T_AH_H
struct ip6t_ah {
- u_int32_t spis[2]; /* Security Parameter Index */
- u_int32_t hdrlen; /* Header Length */
- u_int8_t hdrres; /* Test of the Reserved Filed */
- u_int8_t invflags; /* Inverse flags */
+ __u32 spis[2]; /* Security Parameter Index */
+ __u32 hdrlen; /* Header Length */
+ __u8 hdrres; /* Test of the Reserved Filed */
+ __u8 invflags; /* Inverse flags */
};
#define IP6T_AH_SPI 0x01
diff --git a/include/linux/netfilter_ipv6/ip6t_frag.h b/include/linux/netfilter_ipv6/ip6t_frag.h
index 3724d08..538b31e 100644
--- a/include/linux/netfilter_ipv6/ip6t_frag.h
+++ b/include/linux/netfilter_ipv6/ip6t_frag.h
@@ -2,10 +2,10 @@
#define _IP6T_FRAG_H
struct ip6t_frag {
- u_int32_t ids[2]; /* Security Parameter Index */
- u_int32_t hdrlen; /* Header Length */
- u_int8_t flags; /* */
- u_int8_t invflags; /* Inverse flags */
+ __u32 ids[2]; /* Security Parameter Index */
+ __u32 hdrlen; /* Header Length */
+ __u8 flags; /* */
+ __u8 invflags; /* Inverse flags */
};
#define IP6T_FRAG_IDS 0x01
diff --git a/include/linux/netfilter_ipv6/ip6t_hl.h b/include/linux/netfilter_ipv6/ip6t_hl.h
index 5ef91b8..c6fddcb 100644
--- a/include/linux/netfilter_ipv6/ip6t_hl.h
+++ b/include/linux/netfilter_ipv6/ip6t_hl.h
@@ -14,8 +14,8 @@ enum {
struct ip6t_hl_info {
- u_int8_t mode;
- u_int8_t hop_limit;
+ __u8 mode;
+ __u8 hop_limit;
};
--git a/include/linux/netfilter_ipv6/ip6t_ipv6header.h b/include/linux/netfilter_ipv6/ip6t_ipv6header.h
index 01dfd44..73d53bd 100644
--- a/include/linux/netfilter_ipv6/ip6t_ipv6header.h
+++ b/include/linux/netfilter_ipv6/ip6t_ipv6header.h
@@ -9,9 +9,9 @@ on whether they contain certain headers */
#define __IPV6HEADER_H
struct ip6t_ipv6header_info {
- u_int8_t matchflags;
- u_int8_t invflags;
- u_int8_t modeflag;
+ __u8 matchflags;
+ __u8 invflags;
+ __u8 modeflag;
};
#define MASK_HOPOPTS 128
diff --git a/include/linux/netfilter_ipv6/ip6t_mh.h b/include/linux/netfilter_ipv6/ip6t_mh.h
index 18549bc..98c8cf6 100644
--- a/include/linux/netfilter_ipv6/ip6t_mh.h
+++ b/include/linux/netfilter_ipv6/ip6t_mh.h
@@ -3,8 +3,8 @@
/* MH matching stuff */
struct ip6t_mh {
- u_int8_t types[2]; /* MH type range */
- u_int8_t invflags; /* Inverse flags */
+ __u8 types[2]; /* MH type range */
+ __u8 invflags; /* Inverse flags */
};
/* Values for "invflags" field in struct ip6t_mh. */
diff --git a/include/linux/netfilter_ipv6/ip6t_opts.h b/include/linux/netfilter_ipv6/ip6t_opts.h
index 62d89bc..405d309 100644
--- a/include/linux/netfilter_ipv6/ip6t_opts.h
+++ b/include/linux/netfilter_ipv6/ip6t_opts.h
@@ -4,11 +4,11 @@
#define IP6T_OPTS_OPTSNR 16
struct ip6t_opts {
- u_int32_t hdrlen; /* Header Length */
- u_int8_t flags; /* */
- u_int8_t invflags; /* Inverse flags */
- u_int16_t opts[IP6T_OPTS_OPTSNR]; /* opts */
- u_int8_t optsnr; /* Nr of OPts */
+ __u32 hdrlen; /* Header Length */
+ __u8 flags; /* */
+ __u8 invflags; /* Inverse flags */
+ __u16 opts[IP6T_OPTS_OPTSNR]; /* opts */
+ __u8 optsnr; /* Nr of OPts */
};
#define IP6T_OPTS_LEN 0x01
diff --git a/include/linux/netfilter_ipv6/ip6t_rt.h b/include/linux/netfilter_ipv6/ip6t_rt.h
index ab91bfd..e8dad20 100644
--- a/include/linux/netfilter_ipv6/ip6t_rt.h
+++ b/include/linux/netfilter_ipv6/ip6t_rt.h
@@ -6,13 +6,13 @@
#define IP6T_RT_HOPS 16
struct ip6t_rt {
- u_int32_t rt_type; /* Routing Type */
- u_int32_t segsleft[2]; /* Segments Left */
- u_int32_t hdrlen; /* Header Length */
- u_int8_t flags; /* */
- u_int8_t invflags; /* Inverse flags */
+ __u32 rt_type; /* Routing Type */
+ __u32 segsleft[2]; /* Segments Left */
+ __u32 hdrlen; /* Header Length */
+ __u8 flags; /* */
+ __u8 invflags; /* Inverse flags */
struct in6_addr addrs[IP6T_RT_HOPS]; /* Hops */
- u_int8_t addrnr; /* Nr of Addresses */
+ __u8 addrnr; /* Nr of Addresses */
};
#define IP6T_RT_TYP 0x01
--
1.7.2.3
^ permalink raw reply related
* [PATCH 68/79] netfilter: xtables: add missing header files to export list
From: kaber @ 2011-01-19 19:15 UTC (permalink / raw)
To: davem; +Cc: netfilter-devel, netdev
In-Reply-To: <1295464519-21763-1-git-send-email-kaber@trash.net>
From: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
include/linux/netfilter/Kbuild | 2 ++
1 files changed, 2 insertions(+), 0 deletions(-)
diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild
index 9f11fbc..fc4e0aa 100644
--- a/include/linux/netfilter/Kbuild
+++ b/include/linux/netfilter/Kbuild
@@ -56,6 +56,8 @@ header-y += xt_rateest.h
header-y += xt_realm.h
header-y += xt_recent.h
header-y += xt_sctp.h
+header-y += xt_secmark.h
+header-y += xt_socket.h
header-y += xt_state.h
header-y += xt_statistic.h
header-y += xt_string.h
--
1.7.2.3
^ permalink raw reply related
* [PATCH 71/79] netfilter: Kconfig: NFQUEUE is useless without NETFILTER_NETLINK_QUEUE
From: kaber @ 2011-01-19 19:15 UTC (permalink / raw)
To: davem; +Cc: netfilter-devel, netdev
In-Reply-To: <1295464519-21763-1-git-send-email-kaber@trash.net>
From: Florian Westphal <fw@strlen.de>
NFLOG already does the same thing for NETFILTER_NETLINK_LOG.
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
net/netfilter/Kconfig | 1 +
1 files changed, 1 insertions(+), 0 deletions(-)
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 93918f0..e2480bd 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -487,6 +487,7 @@ config NETFILTER_XT_TARGET_NFLOG
config NETFILTER_XT_TARGET_NFQUEUE
tristate '"NFQUEUE" target Support'
depends on NETFILTER_ADVANCED
+ select NETFILTER_NETLINK_QUEUE
help
This target replaced the old obsolete QUEUE target.
--
1.7.2.3
^ permalink raw reply related
* [PATCH 73/79] netfilter: nfnetlink_queue: do not free skb on error
From: kaber @ 2011-01-19 19:15 UTC (permalink / raw)
To: davem; +Cc: netfilter-devel, netdev
In-Reply-To: <1295464519-21763-1-git-send-email-kaber@trash.net>
From: Florian Westphal <fw@strlen.de>
Move free responsibility from nf_queue to caller.
This enables more flexible error handling; we can now accept the skb
instead of freeing it.
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
net/netfilter/core.c | 7 +++++--
net/netfilter/nf_queue.c | 17 ++++++++++-------
2 files changed, 15 insertions(+), 9 deletions(-)
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 91d66d2f..0c5b796 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -181,8 +181,11 @@ next_hook:
} else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) {
ret = nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
verdict >> NF_VERDICT_BITS);
- if (ret == -ECANCELED)
- goto next_hook;
+ if (ret < 0) {
+ if (ret == -ECANCELED)
+ goto next_hook;
+ kfree_skb(skb);
+ }
ret = 0;
}
rcu_read_unlock();
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index ad25c7e..5c4b730 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -163,9 +163,8 @@ static int __nf_queue(struct sk_buff *skb,
/* If it's going away, ignore hook. */
if (!try_module_get(entry->elem->owner)) {
- rcu_read_unlock();
- kfree(entry);
- return -ECANCELED;
+ status = -ECANCELED;
+ goto err_unlock;
}
/* Bump dev refs so they don't vanish while packet is out */
if (indev)
@@ -198,7 +197,6 @@ static int __nf_queue(struct sk_buff *skb,
err_unlock:
rcu_read_unlock();
err:
- kfree_skb(skb);
kfree(entry);
return status;
}
@@ -229,7 +227,6 @@ int nf_queue(struct sk_buff *skb,
}
segs = skb_gso_segment(skb, 0);
- kfree_skb(skb);
/* Does not use PTR_ERR to limit the number of error codes that can be
* returned by nf_queue. For instance, callers rely on -ECANCELED to mean
* 'ignore this hook'.
@@ -253,8 +250,11 @@ int nf_queue(struct sk_buff *skb,
segs = nskb;
} while (segs);
+ /* also free orig skb if only some segments were queued */
if (unlikely(err && queued))
err = 0;
+ if (err == 0)
+ kfree_skb(skb);
return err;
}
@@ -300,8 +300,11 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
err = __nf_queue(skb, elem, entry->pf, entry->hook,
entry->indev, entry->outdev, entry->okfn,
verdict >> NF_VERDICT_BITS);
- if (err == -ECANCELED)
- goto next_hook;
+ if (err < 0) {
+ if (err == -ECANCELED)
+ goto next_hook;
+ kfree_skb(skb);
+ }
break;
case NF_STOLEN:
default:
--
1.7.2.3
^ permalink raw reply related
* [PATCH 74/79] netfilter: reduce NF_VERDICT_MASK to 0xff
From: kaber @ 2011-01-19 19:15 UTC (permalink / raw)
To: davem; +Cc: netfilter-devel, netdev
In-Reply-To: <1295464519-21763-1-git-send-email-kaber@trash.net>
From: Florian Westphal <fw@strlen.de>
NF_VERDICT_MASK is currently 0xffff. This is because the upper
16 bits are used to store errno (for NF_DROP) or the queue number
(NF_QUEUE verdict).
As there are up to 0xffff different queues available, there is no more
room to store additional flags.
At the moment there are only 6 different verdicts, i.e. we can reduce
NF_VERDICT_MASK to 0xff to allow storing additional flags in the 0xff00 space.
NF_VERDICT_BITS would then be reduced to 8, but because the value is
exported to userspace, this might cause breakage; e.g.:
e.g. 'queuenr = (1 << NF_VERDICT_BITS) | NF_QUEUE' would now break.
Thus, remove NF_VERDICT_BITS usage in the kernel and move the old value
to the 'userspace compat' section.
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
include/linux/netfilter.h | 20 +++++++++++++++-----
net/netfilter/core.c | 4 ++--
net/netfilter/nf_queue.c | 2 +-
3 files changed, 18 insertions(+), 8 deletions(-)
diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 0ab7ca7..78b73cc 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -24,16 +24,19 @@
#define NF_MAX_VERDICT NF_STOP
/* we overload the higher bits for encoding auxiliary data such as the queue
- * number. Not nice, but better than additional function arguments. */
-#define NF_VERDICT_MASK 0x0000ffff
-#define NF_VERDICT_BITS 16
+ * number or errno values. Not nice, but better than additional function
+ * arguments. */
+#define NF_VERDICT_MASK 0x000000ff
+
+/* extra verdict flags have mask 0x0000ff00 */
+/* queue number (NF_QUEUE) or errno (NF_DROP) */
#define NF_VERDICT_QMASK 0xffff0000
#define NF_VERDICT_QBITS 16
-#define NF_QUEUE_NR(x) ((((x) << NF_VERDICT_BITS) & NF_VERDICT_QMASK) | NF_QUEUE)
+#define NF_QUEUE_NR(x) ((((x) << 16) & NF_VERDICT_QMASK) | NF_QUEUE)
-#define NF_DROP_ERR(x) (((-x) << NF_VERDICT_BITS) | NF_DROP)
+#define NF_DROP_ERR(x) (((-x) << 16) | NF_DROP)
/* only for userspace compatibility */
#ifndef __KERNEL__
@@ -41,6 +44,9 @@
<= 0x2000 is used for protocol-flags. */
#define NFC_UNKNOWN 0x4000
#define NFC_ALTERED 0x8000
+
+/* NF_VERDICT_BITS should be 8 now, but userspace might break if this changes */
+#define NF_VERDICT_BITS 16
#endif
enum nf_inet_hooks {
@@ -72,6 +78,10 @@ union nf_inet_addr {
#ifdef __KERNEL__
#ifdef CONFIG_NETFILTER
+static inline int NF_DROP_GETERR(int verdict)
+{
+ return -(verdict >> NF_VERDICT_QBITS);
+}
static inline int nf_inet_addr_cmp(const union nf_inet_addr *a1,
const union nf_inet_addr *a2)
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 0c5b796..4d88e45 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -175,12 +175,12 @@ next_hook:
ret = 1;
} else if ((verdict & NF_VERDICT_MASK) == NF_DROP) {
kfree_skb(skb);
- ret = -(verdict >> NF_VERDICT_BITS);
+ ret = NF_DROP_GETERR(verdict);
if (ret == 0)
ret = -EPERM;
} else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) {
ret = nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
- verdict >> NF_VERDICT_BITS);
+ verdict >> NF_VERDICT_QBITS);
if (ret < 0) {
if (ret == -ECANCELED)
goto next_hook;
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 5c4b730..ce1150d4a 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -299,7 +299,7 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
case NF_QUEUE:
err = __nf_queue(skb, elem, entry->pf, entry->hook,
entry->indev, entry->outdev, entry->okfn,
- verdict >> NF_VERDICT_BITS);
+ verdict >> NF_VERDICT_QBITS);
if (err < 0) {
if (err == -ECANCELED)
goto next_hook;
--
1.7.2.3
^ permalink raw reply related
* [PATCH 75/79] netfilter: allow NFQUEUE bypass if no listener is available
From: kaber @ 2011-01-19 19:15 UTC (permalink / raw)
To: davem; +Cc: netfilter-devel, netdev
In-Reply-To: <1295464519-21763-1-git-send-email-kaber@trash.net>
From: Florian Westphal <fw@strlen.de>
If an skb is to be NF_QUEUE'd, but no program has opened the queue, the
packet is dropped.
This adds a v2 target revision of xt_NFQUEUE that allows packets to
continue through the ruleset instead.
Because the actual queueing happens outside of the target context, the
'bypass' flag has to be communicated back to the netfilter core.
Unfortunately the only choice to do this without adding a new function
argument is to use the target function return value (i.e. the verdict).
In the NF_QUEUE case, the upper 16bit already contain the queue number
to use. The previous patch reduced NF_VERDICT_MASK to 0xff, i.e.
we now have extra room for a new flag.
If a hook issued a NF_QUEUE verdict, then the netfilter core will
continue packet processing if the queueing hook
returns -ESRCH (== "this queue does not exist") and the new
NF_VERDICT_FLAG_QUEUE_BYPASS flag is set in the verdict value.
Note: If the queue exists, but userspace does not consume packets fast
enough, the skb will still be dropped.
Signed-off-by: Florian Westphal <fwestphal@astaro.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
include/linux/netfilter.h | 1 +
include/linux/netfilter/xt_NFQUEUE.h | 6 ++++++
net/netfilter/core.c | 3 +++
net/netfilter/nf_queue.c | 7 ++++++-
net/netfilter/xt_NFQUEUE.c | 28 +++++++++++++++++++++++++---
5 files changed, 41 insertions(+), 4 deletions(-)
diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 78b73cc..eeec00a 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -29,6 +29,7 @@
#define NF_VERDICT_MASK 0x000000ff
/* extra verdict flags have mask 0x0000ff00 */
+#define NF_VERDICT_FLAG_QUEUE_BYPASS 0x00008000
/* queue number (NF_QUEUE) or errno (NF_DROP) */
#define NF_VERDICT_QMASK 0xffff0000
diff --git a/include/linux/netfilter/xt_NFQUEUE.h b/include/linux/netfilter/xt_NFQUEUE.h
index 2584f4a..9eafdbb 100644
--- a/include/linux/netfilter/xt_NFQUEUE.h
+++ b/include/linux/netfilter/xt_NFQUEUE.h
@@ -20,4 +20,10 @@ struct xt_NFQ_info_v1 {
__u16 queues_total;
};
+struct xt_NFQ_info_v2 {
+ __u16 queuenum;
+ __u16 queues_total;
+ __u16 bypass;
+};
+
#endif /* _XT_NFQ_TARGET_H */
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 4d88e45..1e00bf7 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -184,6 +184,9 @@ next_hook:
if (ret < 0) {
if (ret == -ECANCELED)
goto next_hook;
+ if (ret == -ESRCH &&
+ (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS))
+ goto next_hook;
kfree_skb(skb);
}
ret = 0;
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index ce1150d4a..5ab22e2 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -138,8 +138,10 @@ static int __nf_queue(struct sk_buff *skb,
rcu_read_lock();
qh = rcu_dereference(queue_handler[pf]);
- if (!qh)
+ if (!qh) {
+ status = -ESRCH;
goto err_unlock;
+ }
afinfo = nf_get_afinfo(pf);
if (!afinfo)
@@ -303,6 +305,9 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
if (err < 0) {
if (err == -ECANCELED)
goto next_hook;
+ if (err == -ESRCH &&
+ (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS))
+ goto next_hook;
kfree_skb(skb);
}
break;
diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c
index 3962770..d4f4b5d 100644
--- a/net/netfilter/xt_NFQUEUE.c
+++ b/net/netfilter/xt_NFQUEUE.c
@@ -83,9 +83,20 @@ nfqueue_tg_v1(struct sk_buff *skb, const struct xt_action_param *par)
return NF_QUEUE_NR(queue);
}
-static int nfqueue_tg_v1_check(const struct xt_tgchk_param *par)
+static unsigned int
+nfqueue_tg_v2(struct sk_buff *skb, const struct xt_action_param *par)
{
- const struct xt_NFQ_info_v1 *info = par->targinfo;
+ const struct xt_NFQ_info_v2 *info = par->targinfo;
+ unsigned int ret = nfqueue_tg_v1(skb, par);
+
+ if (info->bypass)
+ ret |= NF_VERDICT_FLAG_QUEUE_BYPASS;
+ return ret;
+}
+
+static int nfqueue_tg_check(const struct xt_tgchk_param *par)
+{
+ const struct xt_NFQ_info_v2 *info = par->targinfo;
u32 maxid;
if (unlikely(!rnd_inited)) {
@@ -102,6 +113,8 @@ static int nfqueue_tg_v1_check(const struct xt_tgchk_param *par)
info->queues_total, maxid);
return -ERANGE;
}
+ if (par->target->revision == 2 && info->bypass > 1)
+ return -EINVAL;
return 0;
}
@@ -117,11 +130,20 @@ static struct xt_target nfqueue_tg_reg[] __read_mostly = {
.name = "NFQUEUE",
.revision = 1,
.family = NFPROTO_UNSPEC,
- .checkentry = nfqueue_tg_v1_check,
+ .checkentry = nfqueue_tg_check,
.target = nfqueue_tg_v1,
.targetsize = sizeof(struct xt_NFQ_info_v1),
.me = THIS_MODULE,
},
+ {
+ .name = "NFQUEUE",
+ .revision = 2,
+ .family = NFPROTO_UNSPEC,
+ .checkentry = nfqueue_tg_check,
+ .target = nfqueue_tg_v2,
+ .targetsize = sizeof(struct xt_NFQ_info_v2),
+ .me = THIS_MODULE,
+ },
};
static int __init nfqueue_tg_init(void)
--
1.7.2.3
^ permalink raw reply related
* [PATCH 76/79] netfilter: ipt_CLUSTERIP: remove "no conntrack!"
From: kaber @ 2011-01-19 19:15 UTC (permalink / raw)
To: davem; +Cc: netfilter-devel, netdev
In-Reply-To: <1295464519-21763-1-git-send-email-kaber@trash.net>
From: Eric Dumazet <eric.dumazet@gmail.com>
When a packet is meant to be handled by another node of the cluster,
silently drop it instead of flooding kernel log.
Note : INVALID packets are also dropped without notice.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
net/ipv4/netfilter/ipt_CLUSTERIP.c | 7 +------
1 files changed, 1 insertions(+), 6 deletions(-)
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 1e26a48..403ca57 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -300,13 +300,8 @@ clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par)
* that the ->target() function isn't called after ->destroy() */
ct = nf_ct_get(skb, &ctinfo);
- if (ct == NULL) {
- pr_info("no conntrack!\n");
- /* FIXME: need to drop invalid ones, since replies
- * to outgoing connections of other nodes will be
- * marked as INVALID */
+ if (ct == NULL)
return NF_DROP;
- }
/* special case: ICMP error handling. conntrack distinguishes between
* error messages (RELATED) and information requests (see below) */
--
1.7.2.3
^ permalink raw reply related
* [PATCH 78/79] netfilter: nf_conntrack_tstamp: add flow-based timestamp extension
From: kaber @ 2011-01-19 19:15 UTC (permalink / raw)
To: davem; +Cc: netfilter-devel, netdev
In-Reply-To: <1295464519-21763-1-git-send-email-kaber@trash.net>
From: Pablo Neira Ayuso <pablo@netfilter.org>
This patch adds flow-based timestamping for conntracks. This
conntrack extension is disabled by default. Basically, we use
two 64-bits variables to store the creation timestamp once the
conntrack has been confirmed and the other to store the deletion
time. This extension is disabled by default, to enable it, you
have to:
echo 1 > /proc/sys/net/netfilter/nf_conntrack_timestamp
This patch allows to save memory for user-space flow-based
loogers such as ulogd2. In short, ulogd2 does not need to
keep a hashtable with the conntrack in user-space to know
when they were created and destroyed, instead we use the
kernel timestamp. If we want to have a sane IPFIX implementation
in user-space, this nanosecs resolution timestamps are also
useful. Other custom user-space applications can benefit from
this via libnetfilter_conntrack.
This patch modifies the /proc output to display the delta time
in seconds since the flow start. You can also obtain the
flow-start date by means of the conntrack-tools.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
include/linux/netfilter/nfnetlink_conntrack.h | 9 ++
include/net/netfilter/nf_conntrack_extend.h | 4 +
include/net/netfilter/nf_conntrack_timestamp.h | 53 +++++++++++
include/net/netns/conntrack.h | 2 +
net/netfilter/Kconfig | 11 ++
net/netfilter/Makefile | 1 +
net/netfilter/nf_conntrack_core.c | 26 +++++
net/netfilter/nf_conntrack_netlink.c | 46 +++++++++-
net/netfilter/nf_conntrack_standalone.c | 41 ++++++++
net/netfilter/nf_conntrack_timestamp.c | 120 ++++++++++++++++++++++++
10 files changed, 312 insertions(+), 1 deletions(-)
create mode 100644 include/net/netfilter/nf_conntrack_timestamp.h
create mode 100644 net/netfilter/nf_conntrack_timestamp.c
diff --git a/include/linux/netfilter/nfnetlink_conntrack.h b/include/linux/netfilter/nfnetlink_conntrack.h
index 19711e3..debf1ae 100644
--- a/include/linux/netfilter/nfnetlink_conntrack.h
+++ b/include/linux/netfilter/nfnetlink_conntrack.h
@@ -42,6 +42,7 @@ enum ctattr_type {
CTA_SECMARK, /* obsolete */
CTA_ZONE,
CTA_SECCTX,
+ CTA_TIMESTAMP,
__CTA_MAX
};
#define CTA_MAX (__CTA_MAX - 1)
@@ -127,6 +128,14 @@ enum ctattr_counters {
};
#define CTA_COUNTERS_MAX (__CTA_COUNTERS_MAX - 1)
+enum ctattr_tstamp {
+ CTA_TIMESTAMP_UNSPEC,
+ CTA_TIMESTAMP_START,
+ CTA_TIMESTAMP_STOP,
+ __CTA_TIMESTAMP_MAX
+};
+#define CTA_TIMESTAMP_MAX (__CTA_TIMESTAMP_MAX - 1)
+
enum ctattr_nat {
CTA_NAT_UNSPEC,
CTA_NAT_MINIP,
diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h
index 1a9f96d..2dcf317 100644
--- a/include/net/netfilter/nf_conntrack_extend.h
+++ b/include/net/netfilter/nf_conntrack_extend.h
@@ -17,6 +17,9 @@ enum nf_ct_ext_id {
#ifdef CONFIG_NF_CONNTRACK_ZONES
NF_CT_EXT_ZONE,
#endif
+#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
+ NF_CT_EXT_TSTAMP,
+#endif
NF_CT_EXT_NUM,
};
@@ -25,6 +28,7 @@ enum nf_ct_ext_id {
#define NF_CT_EXT_ACCT_TYPE struct nf_conn_counter
#define NF_CT_EXT_ECACHE_TYPE struct nf_conntrack_ecache
#define NF_CT_EXT_ZONE_TYPE struct nf_conntrack_zone
+#define NF_CT_EXT_TSTAMP_TYPE struct nf_conn_tstamp
/* Extensions: optional stuff which isn't permanently in struct. */
struct nf_ct_ext {
diff --git a/include/net/netfilter/nf_conntrack_timestamp.h b/include/net/netfilter/nf_conntrack_timestamp.h
new file mode 100644
index 0000000..f17dcb6
--- /dev/null
+++ b/include/net/netfilter/nf_conntrack_timestamp.h
@@ -0,0 +1,53 @@
+#ifndef _NF_CONNTRACK_TSTAMP_H
+#define _NF_CONNTRACK_TSTAMP_H
+
+#include <net/net_namespace.h>
+#include <linux/netfilter/nf_conntrack_common.h>
+#include <linux/netfilter/nf_conntrack_tuple_common.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_extend.h>
+
+struct nf_conn_tstamp {
+ u_int64_t start;
+ u_int64_t stop;
+};
+
+static inline
+struct nf_conn_tstamp *nf_conn_tstamp_find(const struct nf_conn *ct)
+{
+#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
+ return nf_ct_ext_find(ct, NF_CT_EXT_TSTAMP);
+#else
+ return NULL;
+#endif
+}
+
+static inline
+struct nf_conn_tstamp *nf_ct_tstamp_ext_add(struct nf_conn *ct, gfp_t gfp)
+{
+#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
+ struct net *net = nf_ct_net(ct);
+
+ if (!net->ct.sysctl_tstamp)
+ return NULL;
+
+ return nf_ct_ext_add(ct, NF_CT_EXT_TSTAMP, gfp);
+#else
+ return NULL;
+#endif
+};
+
+static inline bool nf_ct_tstamp_enabled(struct net *net)
+{
+ return net->ct.sysctl_tstamp != 0;
+}
+
+static inline void nf_ct_set_tstamp(struct net *net, bool enable)
+{
+ net->ct.sysctl_tstamp = enable;
+}
+
+extern int nf_conntrack_tstamp_init(struct net *net);
+extern void nf_conntrack_tstamp_fini(struct net *net);
+
+#endif /* _NF_CONNTRACK_TSTAMP_H */
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index 5cf8a8c..341eb08 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -21,11 +21,13 @@ struct netns_ct {
int sysctl_events;
unsigned int sysctl_events_retry_timeout;
int sysctl_acct;
+ int sysctl_tstamp;
int sysctl_checksum;
unsigned int sysctl_log_invalid; /* Log invalid packets */
#ifdef CONFIG_SYSCTL
struct ctl_table_header *sysctl_header;
struct ctl_table_header *acct_sysctl_header;
+ struct ctl_table_header *tstamp_sysctl_header;
struct ctl_table_header *event_sysctl_header;
#endif
char *slabname;
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 939b504..faf7412 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -85,6 +85,17 @@ config NF_CONNTRACK_EVENTS
If unsure, say `N'.
+config NF_CONNTRACK_TIMESTAMP
+ bool 'Connection tracking timestamping'
+ depends on NETFILTER_ADVANCED
+ help
+ This option enables support for connection tracking timestamping.
+ This allows you to store the flow start-time and to obtain
+ the flow-stop time (once it has been destroyed) via Connection
+ tracking events.
+
+ If unsure, say `N'.
+
config NF_CT_PROTO_DCCP
tristate 'DCCP protocol connection tracking support (EXPERIMENTAL)'
depends on EXPERIMENTAL
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 2c2628d..9ae6878 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -1,6 +1,7 @@
netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o
nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o nf_conntrack_extend.o nf_conntrack_acct.o
+nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMESTAMP) += nf_conntrack_timestamp.o
nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o
obj-$(CONFIG_NETFILTER) = netfilter.o
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index f47ac67..1909311 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -43,6 +43,7 @@
#include <net/netfilter/nf_conntrack_acct.h>
#include <net/netfilter/nf_conntrack_ecache.h>
#include <net/netfilter/nf_conntrack_zones.h>
+#include <net/netfilter/nf_conntrack_timestamp.h>
#include <net/netfilter/nf_nat.h>
#include <net/netfilter/nf_nat_core.h>
@@ -282,6 +283,11 @@ EXPORT_SYMBOL_GPL(nf_ct_insert_dying_list);
static void death_by_timeout(unsigned long ul_conntrack)
{
struct nf_conn *ct = (void *)ul_conntrack;
+ struct nf_conn_tstamp *tstamp;
+
+ tstamp = nf_conn_tstamp_find(ct);
+ if (tstamp && tstamp->stop == 0)
+ tstamp->stop = ktime_to_ns(ktime_get_real());
if (!test_bit(IPS_DYING_BIT, &ct->status) &&
unlikely(nf_conntrack_event(IPCT_DESTROY, ct) < 0)) {
@@ -419,6 +425,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
struct nf_conntrack_tuple_hash *h;
struct nf_conn *ct;
struct nf_conn_help *help;
+ struct nf_conn_tstamp *tstamp;
struct hlist_nulls_node *n;
enum ip_conntrack_info ctinfo;
struct net *net;
@@ -488,6 +495,14 @@ __nf_conntrack_confirm(struct sk_buff *skb)
atomic_inc(&ct->ct_general.use);
ct->status |= IPS_CONFIRMED;
+ /* set conntrack timestamp, if enabled. */
+ tstamp = nf_conn_tstamp_find(ct);
+ if (tstamp) {
+ if (skb->tstamp.tv64 == 0)
+ __net_timestamp((struct sk_buff *)skb);
+
+ tstamp->start = ktime_to_ns(skb->tstamp);
+ }
/* Since the lookup is lockless, hash insertion must be done after
* starting the timer and setting the CONFIRMED bit. The RCU barriers
* guarantee that no other CPU can find the conntrack before the above
@@ -746,6 +761,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
}
nf_ct_acct_ext_add(ct, GFP_ATOMIC);
+ nf_ct_tstamp_ext_add(ct, GFP_ATOMIC);
ecache = tmpl ? nf_ct_ecache_find(tmpl) : NULL;
nf_ct_ecache_ext_add(ct, ecache ? ecache->ctmask : 0,
@@ -1186,6 +1202,11 @@ struct __nf_ct_flush_report {
static int kill_report(struct nf_conn *i, void *data)
{
struct __nf_ct_flush_report *fr = (struct __nf_ct_flush_report *)data;
+ struct nf_conn_tstamp *tstamp;
+
+ tstamp = nf_conn_tstamp_find(i);
+ if (tstamp && tstamp->stop == 0)
+ tstamp->stop = ktime_to_ns(ktime_get_real());
/* If we fail to deliver the event, death_by_timeout() will retry */
if (nf_conntrack_event_report(IPCT_DESTROY, i,
@@ -1497,6 +1518,9 @@ static int nf_conntrack_init_net(struct net *net)
ret = nf_conntrack_acct_init(net);
if (ret < 0)
goto err_acct;
+ ret = nf_conntrack_tstamp_init(net);
+ if (ret < 0)
+ goto err_tstamp;
ret = nf_conntrack_ecache_init(net);
if (ret < 0)
goto err_ecache;
@@ -1504,6 +1528,8 @@ static int nf_conntrack_init_net(struct net *net)
return 0;
err_ecache:
+ nf_conntrack_tstamp_fini(net);
+err_tstamp:
nf_conntrack_acct_fini(net);
err_acct:
nf_conntrack_expect_fini(net);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 9eabaa6..715d56c 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -42,6 +42,7 @@
#include <net/netfilter/nf_conntrack_tuple.h>
#include <net/netfilter/nf_conntrack_acct.h>
#include <net/netfilter/nf_conntrack_zones.h>
+#include <net/netfilter/nf_conntrack_timestamp.h>
#ifdef CONFIG_NF_NAT_NEEDED
#include <net/netfilter/nf_nat_core.h>
#include <net/netfilter/nf_nat_protocol.h>
@@ -230,6 +231,33 @@ nla_put_failure:
return -1;
}
+static int
+ctnetlink_dump_timestamp(struct sk_buff *skb, const struct nf_conn *ct)
+{
+ struct nlattr *nest_count;
+ const struct nf_conn_tstamp *tstamp;
+
+ tstamp = nf_conn_tstamp_find(ct);
+ if (!tstamp)
+ return 0;
+
+ nest_count = nla_nest_start(skb, CTA_TIMESTAMP | NLA_F_NESTED);
+ if (!nest_count)
+ goto nla_put_failure;
+
+ NLA_PUT_BE64(skb, CTA_TIMESTAMP_START, cpu_to_be64(tstamp->start));
+ if (tstamp->stop != 0) {
+ NLA_PUT_BE64(skb, CTA_TIMESTAMP_STOP,
+ cpu_to_be64(tstamp->stop));
+ }
+ nla_nest_end(skb, nest_count);
+
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
#ifdef CONFIG_NF_CONNTRACK_MARK
static inline int
ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct)
@@ -404,6 +432,7 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
ctnetlink_dump_timeout(skb, ct) < 0 ||
ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0 ||
+ ctnetlink_dump_timestamp(skb, ct) < 0 ||
ctnetlink_dump_protoinfo(skb, ct) < 0 ||
ctnetlink_dump_helpinfo(skb, ct) < 0 ||
ctnetlink_dump_mark(skb, ct) < 0 ||
@@ -471,6 +500,18 @@ ctnetlink_secctx_size(const struct nf_conn *ct)
}
static inline size_t
+ctnetlink_timestamp_size(const struct nf_conn *ct)
+{
+#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
+ if (!nf_ct_ext_exist(ct, NF_CT_EXT_TSTAMP))
+ return 0;
+ return nla_total_size(0) + 2 * nla_total_size(sizeof(uint64_t));
+#else
+ return 0;
+#endif
+}
+
+static inline size_t
ctnetlink_nlmsg_size(const struct nf_conn *ct)
{
return NLMSG_ALIGN(sizeof(struct nfgenmsg))
@@ -481,6 +522,7 @@ ctnetlink_nlmsg_size(const struct nf_conn *ct)
+ nla_total_size(sizeof(u_int32_t)) /* CTA_ID */
+ nla_total_size(sizeof(u_int32_t)) /* CTA_STATUS */
+ ctnetlink_counters_size(ct)
+ + ctnetlink_timestamp_size(ct)
+ nla_total_size(sizeof(u_int32_t)) /* CTA_TIMEOUT */
+ nla_total_size(0) /* CTA_PROTOINFO */
+ nla_total_size(0) /* CTA_HELP */
@@ -571,7 +613,8 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
if (events & (1 << IPCT_DESTROY)) {
if (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
- ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0)
+ ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0 ||
+ ctnetlink_dump_timestamp(skb, ct) < 0)
goto nla_put_failure;
} else {
if (ctnetlink_dump_timeout(skb, ct) < 0)
@@ -1360,6 +1403,7 @@ ctnetlink_create_conntrack(struct net *net, u16 zone,
}
nf_ct_acct_ext_add(ct, GFP_ATOMIC);
+ nf_ct_tstamp_ext_add(ct, GFP_ATOMIC);
nf_ct_ecache_ext_add(ct, 0, 0, GFP_ATOMIC);
/* we must add conntrack extensions before confirmation. */
ct->status |= IPS_CONFIRMED;
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 8257bf6..69107fd 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -29,6 +29,7 @@
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_acct.h>
#include <net/netfilter/nf_conntrack_zones.h>
+#include <net/netfilter/nf_conntrack_timestamp.h>
#include <linux/rculist_nulls.h>
MODULE_LICENSE("GPL");
@@ -46,6 +47,7 @@ EXPORT_SYMBOL_GPL(print_tuple);
struct ct_iter_state {
struct seq_net_private p;
unsigned int bucket;
+ u_int64_t time_now;
};
static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
@@ -96,6 +98,9 @@ static struct hlist_nulls_node *ct_get_idx(struct seq_file *seq, loff_t pos)
static void *ct_seq_start(struct seq_file *seq, loff_t *pos)
__acquires(RCU)
{
+ struct ct_iter_state *st = seq->private;
+
+ st->time_now = ktime_to_ns(ktime_get_real());
rcu_read_lock();
return ct_get_idx(seq, *pos);
}
@@ -135,6 +140,39 @@ static inline int ct_show_secctx(struct seq_file *s, const struct nf_conn *ct)
}
#endif
+#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
+static u_int64_t ct_delta_time(u_int64_t time_now, const struct nf_conn *ct)
+{
+ struct nf_conn_tstamp *tstamp;
+
+ tstamp = nf_conn_tstamp_find(ct);
+ if (tstamp) {
+ u_int64_t delta_time = time_now - tstamp->start;
+ return delta_time > 0 ? div_s64(delta_time, NSEC_PER_SEC) : 0;
+ }
+ return -1;
+}
+
+static int ct_show_delta_time(struct seq_file *s, const struct nf_conn *ct)
+{
+ struct ct_iter_state *st = s->private;
+ u_int64_t delta_time;
+
+ delta_time = ct_delta_time(st->time_now, ct);
+ if (delta_time < 0)
+ return 0;
+
+ return seq_printf(s, "delta-time=%llu ",
+ (unsigned long long)delta_time);
+}
+#else
+static inline int
+ct_show_delta_time(struct seq_file *s, const struct nf_conn *ct)
+{
+ return 0;
+}
+#endif
+
/* return 0 on success, 1 in case of error */
static int ct_seq_show(struct seq_file *s, void *v)
{
@@ -203,6 +241,9 @@ static int ct_seq_show(struct seq_file *s, void *v)
goto release;
#endif
+ if (ct_show_delta_time(s, ct))
+ goto release;
+
if (seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use)))
goto release;
diff --git a/net/netfilter/nf_conntrack_timestamp.c b/net/netfilter/nf_conntrack_timestamp.c
new file mode 100644
index 0000000..af7dd31
--- /dev/null
+++ b/net/netfilter/nf_conntrack_timestamp.c
@@ -0,0 +1,120 @@
+/*
+ * (C) 2010 Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation (or any later at your option).
+ */
+
+#include <linux/netfilter.h>
+#include <linux/slab.h>
+#include <linux/kernel.h>
+#include <linux/moduleparam.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_extend.h>
+#include <net/netfilter/nf_conntrack_timestamp.h>
+
+static int nf_ct_tstamp __read_mostly;
+
+module_param_named(tstamp, nf_ct_tstamp, bool, 0644);
+MODULE_PARM_DESC(tstamp, "Enable connection tracking flow timestamping.");
+
+#ifdef CONFIG_SYSCTL
+static struct ctl_table tstamp_sysctl_table[] = {
+ {
+ .procname = "nf_conntrack_timestamp",
+ .data = &init_net.ct.sysctl_tstamp,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {}
+};
+#endif /* CONFIG_SYSCTL */
+
+static struct nf_ct_ext_type tstamp_extend __read_mostly = {
+ .len = sizeof(struct nf_conn_tstamp),
+ .align = __alignof__(struct nf_conn_tstamp),
+ .id = NF_CT_EXT_TSTAMP,
+};
+
+#ifdef CONFIG_SYSCTL
+static int nf_conntrack_tstamp_init_sysctl(struct net *net)
+{
+ struct ctl_table *table;
+
+ table = kmemdup(tstamp_sysctl_table, sizeof(tstamp_sysctl_table),
+ GFP_KERNEL);
+ if (!table)
+ goto out;
+
+ table[0].data = &net->ct.sysctl_tstamp;
+
+ net->ct.tstamp_sysctl_header = register_net_sysctl_table(net,
+ nf_net_netfilter_sysctl_path, table);
+ if (!net->ct.tstamp_sysctl_header) {
+ printk(KERN_ERR "nf_ct_tstamp: can't register to sysctl.\n");
+ goto out_register;
+ }
+ return 0;
+
+out_register:
+ kfree(table);
+out:
+ return -ENOMEM;
+}
+
+static void nf_conntrack_tstamp_fini_sysctl(struct net *net)
+{
+ struct ctl_table *table;
+
+ table = net->ct.tstamp_sysctl_header->ctl_table_arg;
+ unregister_net_sysctl_table(net->ct.tstamp_sysctl_header);
+ kfree(table);
+}
+#else
+static int nf_conntrack_tstamp_init_sysctl(struct net *net)
+{
+ return 0;
+}
+
+static void nf_conntrack_tstamp_fini_sysctl(struct net *net)
+{
+}
+#endif
+
+int nf_conntrack_tstamp_init(struct net *net)
+{
+ int ret;
+
+ net->ct.sysctl_tstamp = nf_ct_tstamp;
+
+ if (net_eq(net, &init_net)) {
+ ret = nf_ct_extend_register(&tstamp_extend);
+ if (ret < 0) {
+ printk(KERN_ERR "nf_ct_tstamp: Unable to register "
+ "extension\n");
+ goto out_extend_register;
+ }
+ }
+
+ ret = nf_conntrack_tstamp_init_sysctl(net);
+ if (ret < 0)
+ goto out_sysctl;
+
+ return 0;
+
+out_sysctl:
+ if (net_eq(net, &init_net))
+ nf_ct_extend_unregister(&tstamp_extend);
+out_extend_register:
+ return ret;
+}
+
+void nf_conntrack_tstamp_fini(struct net *net)
+{
+ nf_conntrack_tstamp_fini_sysctl(net);
+ if (net_eq(net, &init_net))
+ nf_ct_extend_unregister(&tstamp_extend);
+}
--
1.7.2.3
^ permalink raw reply related
* [PATCH 79/79] netfilter: nf_conntrack: fix lifetime display for disabled connections
From: kaber @ 2011-01-19 19:15 UTC (permalink / raw)
To: davem; +Cc: netfilter-devel, netdev
In-Reply-To: <1295464519-21763-1-git-send-email-kaber@trash.net>
From: Patrick McHardy <kaber@trash.net>
When no tstamp extension exists, ct_delta_time() returns -1, which is
then assigned to an u64 and tested for negative values to decide
whether to display the lifetime. This obviously doesn't work, use
a s64 and merge the two minor functions into one.
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
net/netfilter/nf_conntrack_standalone.c | 29 ++++++++++++-----------------
1 files changed, 12 insertions(+), 17 deletions(-)
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 69107fd..0ae1428 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -141,29 +141,24 @@ static inline int ct_show_secctx(struct seq_file *s, const struct nf_conn *ct)
#endif
#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
-static u_int64_t ct_delta_time(u_int64_t time_now, const struct nf_conn *ct)
+static int ct_show_delta_time(struct seq_file *s, const struct nf_conn *ct)
{
+ struct ct_iter_state *st = s->private;
struct nf_conn_tstamp *tstamp;
+ s64 delta_time;
tstamp = nf_conn_tstamp_find(ct);
if (tstamp) {
- u_int64_t delta_time = time_now - tstamp->start;
- return delta_time > 0 ? div_s64(delta_time, NSEC_PER_SEC) : 0;
+ delta_time = st->time_now - tstamp->start;
+ if (delta_time > 0)
+ delta_time = div_s64(delta_time, NSEC_PER_SEC);
+ else
+ delta_time = 0;
+
+ return seq_printf(s, "delta-time=%llu ",
+ (unsigned long long)delta_time);
}
- return -1;
-}
-
-static int ct_show_delta_time(struct seq_file *s, const struct nf_conn *ct)
-{
- struct ct_iter_state *st = s->private;
- u_int64_t delta_time;
-
- delta_time = ct_delta_time(st->time_now, ct);
- if (delta_time < 0)
- return 0;
-
- return seq_printf(s, "delta-time=%llu ",
- (unsigned long long)delta_time);
+ return 0;
}
#else
static inline int
--
1.7.2.3
^ permalink raw reply related
* [PATCH 01/79] netfilter: nf_conntrack: don't always initialize ct->proto
From: kaber @ 2011-01-19 19:14 UTC (permalink / raw)
To: davem; +Cc: netfilter-devel, netdev
In-Reply-To: <1295464519-21763-1-git-send-email-kaber@trash.net>
From: Changli Gao <xiaosuo@gmail.com>
ct->proto is big(60 bytes) due to structure ip_ct_tcp, and we don't need
to initialize the whole for all the other protocols. This patch moves
proto to the end of structure nf_conn, and pushes the initialization down
to the individual protocols.
Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
include/net/netfilter/nf_conntrack.h | 6 +++---
net/netfilter/nf_conntrack_core.c | 3 ++-
net/netfilter/nf_conntrack_netlink.c | 1 +
net/netfilter/nf_conntrack_proto_dccp.c | 3 +++
net/netfilter/nf_conntrack_proto_sctp.c | 1 +
net/netfilter/nf_conntrack_proto_tcp.c | 14 +++-----------
6 files changed, 13 insertions(+), 15 deletions(-)
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index caf17db..abfff1e 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -116,14 +116,14 @@ struct nf_conn {
u_int32_t secmark;
#endif
- /* Storage reserved for other modules: */
- union nf_conntrack_proto proto;
-
/* Extensions */
struct nf_ct_ext *ext;
#ifdef CONFIG_NET_NS
struct net *ct_net;
#endif
+
+ /* Storage reserved for other modules, must be the last member */
+ union nf_conntrack_proto proto;
};
static inline struct nf_conn *
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 27a5ea6..0ba7d48 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -651,7 +651,8 @@ __nf_conntrack_alloc(struct net *net, u16 zone,
* and ct->tuplehash[IP_CT_DIR_REPLY].hnnode.next unchanged.
*/
memset(&ct->tuplehash[IP_CT_DIR_MAX], 0,
- sizeof(*ct) - offsetof(struct nf_conn, tuplehash[IP_CT_DIR_MAX]));
+ offsetof(struct nf_conn, proto) -
+ offsetof(struct nf_conn, tuplehash[IP_CT_DIR_MAX]));
spin_lock_init(&ct->lock);
ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode.pprev = NULL;
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index b729ace..7f59be8 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1375,6 +1375,7 @@ ctnetlink_create_conntrack(struct net *net, u16 zone,
}
#endif
+ memset(&ct->proto, 0, sizeof(ct->proto));
if (cda[CTA_PROTOINFO]) {
err = ctnetlink_change_protoinfo(ct, cda);
if (err < 0)
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index 5292560..9ae57c5 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -452,6 +452,9 @@ static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_CLIENT;
ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_SERVER;
ct->proto.dccp.state = CT_DCCP_NONE;
+ ct->proto.dccp.last_pkt = DCCP_PKT_REQUEST;
+ ct->proto.dccp.last_dir = IP_CT_DIR_ORIGINAL;
+ ct->proto.dccp.handshake_seq = 0;
return true;
out_invalid:
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index c6049c2..6f4ee70 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -413,6 +413,7 @@ static bool sctp_new(struct nf_conn *ct, const struct sk_buff *skb,
test_bit(SCTP_CID_COOKIE_ACK, map))
return false;
+ memset(&ct->proto.sctp, 0, sizeof(ct->proto.sctp));
new_state = SCTP_CONNTRACK_MAX;
for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) {
/* Don't need lock here: this conntrack not in circulation yet */
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 3fb2b73..6f38d0e 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -1066,9 +1066,7 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
BUG_ON(th == NULL);
/* Don't need lock here: this conntrack not in circulation yet */
- new_state
- = tcp_conntracks[0][get_conntrack_index(th)]
- [TCP_CONNTRACK_NONE];
+ new_state = tcp_conntracks[0][get_conntrack_index(th)][TCP_CONNTRACK_NONE];
/* Invalid: delete conntrack */
if (new_state >= TCP_CONNTRACK_MAX) {
@@ -1077,6 +1075,7 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
}
if (new_state == TCP_CONNTRACK_SYN_SENT) {
+ memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
/* SYN packet */
ct->proto.tcp.seen[0].td_end =
segment_seq_plus_len(ntohl(th->seq), skb->len,
@@ -1088,11 +1087,11 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
ct->proto.tcp.seen[0].td_end;
tcp_options(skb, dataoff, th, &ct->proto.tcp.seen[0]);
- ct->proto.tcp.seen[1].flags = 0;
} else if (nf_ct_tcp_loose == 0) {
/* Don't try to pick up connections. */
return false;
} else {
+ memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
/*
* We are in the middle of a connection,
* its history is lost for us.
@@ -1107,7 +1106,6 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
ct->proto.tcp.seen[0].td_maxend =
ct->proto.tcp.seen[0].td_end +
ct->proto.tcp.seen[0].td_maxwin;
- ct->proto.tcp.seen[0].td_scale = 0;
/* We assume SACK and liberal window checking to handle
* window scaling */
@@ -1116,13 +1114,7 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
IP_CT_TCP_FLAG_BE_LIBERAL;
}
- ct->proto.tcp.seen[1].td_end = 0;
- ct->proto.tcp.seen[1].td_maxend = 0;
- ct->proto.tcp.seen[1].td_maxwin = 0;
- ct->proto.tcp.seen[1].td_scale = 0;
-
/* tcp_packet will set them */
- ct->proto.tcp.state = TCP_CONNTRACK_NONE;
ct->proto.tcp.last_index = TCP_NONE_SET;
pr_debug("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i "
--
1.7.2.3
^ permalink raw reply related
* [PATCH 00/79] netfilter: netfilter update
From: kaber @ 2011-01-19 19:14 UTC (permalink / raw)
To: davem; +Cc: netfilter-devel, netdev
following is a first batch of netfilter patches for 2.6.39, containing:
- cleanups and removal of atomic operations from Changli Gao
- RCU annotations, removal of atomic operations and cleanups from Eric
- a new queue bypass option for nfnetlink_queue from Florian Westphal
- arp_tables support for xt_CLASSIFY from Frederic Leroy
- a new backup protocol for IPVS from and Schillstrom
- IPVS netns support from Hans Schillstrom
- type and header cleanups from Jan
- missing module aliases from Jan
- a new SNMP conntrack helper from Jiri Olsa
- optional timestamping for conntrack entries to simplify userspace IPFIX,
implementations, from Pablo
- a new AUDIT target for logging packets through the audit framework,
from Thomas
- Improvements for the IPVS persistence engine from Simon
ipset from Jozsef will follow soon, once I'm done reviewing it.
Please pull from
git://git.kernel.org/pub/scm/linux/kernel/git/kaber/nf-next-2.6.git master
Thanks!
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox