* [rfc v2 01/10] ipvs network name space aware: include files
2010-10-22 20:09 [rfc v2 00/10] ipvs network name space (netns) aware Simon Horman
@ 2010-10-22 20:09 ` Simon Horman
2010-10-22 20:09 ` [rfc v2 02/10] ipvs network name space aware: app Simon Horman
` (10 subsequent siblings)
11 siblings, 0 replies; 14+ messages in thread
From: Simon Horman @ 2010-10-22 20:09 UTC (permalink / raw)
To: lvs-devel, netdev, netfilter-devel
Cc: Hans Schillstrom, Julian Anastasov, Daniel Lezcano, Wensong Zhang
[-- Attachment #1: ipvs-netns-1.patch --]
[-- Type: text/plain, Size: 14882 bytes --]
This part contains the include files
where include/net/netns/ip_vs.h is new and contains all moved vars.
SUMMARY
include/net/ip_vs.h | 136 ++++---
include/net/net_namespace.h | 2 +
include/net/netns/ip_vs.h | 112 +++++
Signed-off-by:Hans Schillstrom <hans.schillstrom@ericsson.com>
---
* Simon Horman
- Remove now unused sysctl variable declarations from include/net/ip_vs.h
- Handle conntract and snat_reroute sysctls
Index: lvs-test-2.6/include/net/ip_vs.h
===================================================================
--- lvs-test-2.6.orig/include/net/ip_vs.h 2010-10-22 21:48:31.000000000 +0200
+++ lvs-test-2.6/include/net/ip_vs.h 2010-10-22 21:48:57.000000000 +0200
@@ -290,6 +290,7 @@ struct iphdr;
struct ip_vs_conn;
struct ip_vs_app;
struct sk_buff;
+struct ip_vs_proto_data;
struct ip_vs_protocol {
struct ip_vs_protocol *next;
@@ -304,6 +305,10 @@ struct ip_vs_protocol {
void (*exit)(struct ip_vs_protocol *pp);
+ void (*init_netns)(struct net *net, struct ip_vs_proto_data *pd);
+
+ void (*exit_netns)(struct net *net, struct ip_vs_proto_data *pd);
+
int (*conn_schedule)(int af, struct sk_buff *skb,
struct ip_vs_protocol *pp,
int *verdict, struct ip_vs_conn **cpp);
@@ -339,11 +344,11 @@ struct ip_vs_protocol {
const struct sk_buff *skb,
struct ip_vs_protocol *pp);
- int (*register_app)(struct ip_vs_app *inc);
+ int (*register_app)(struct net *net, struct ip_vs_app *inc);
- void (*unregister_app)(struct ip_vs_app *inc);
+ void (*unregister_app)(struct net *net, struct ip_vs_app *inc);
- int (*app_conn_bind)(struct ip_vs_conn *cp);
+ int (*app_conn_bind)(struct net *net, struct ip_vs_conn *cp);
void (*debug_packet)(int af, struct ip_vs_protocol *pp,
const struct sk_buff *skb,
@@ -352,10 +357,24 @@ struct ip_vs_protocol {
void (*timeout_change)(struct ip_vs_protocol *pp, int flags);
- int (*set_state_timeout)(struct ip_vs_protocol *pp, char *sname, int to);
+ /*
+ int (*set_state_timeout)(struct ip_vs_protocol *pp,
+ char *sname,
+ int to); Not used -Hans S */
+};
+/*
+ * protocol data per netns
+ */
+struct ip_vs_proto_data {
+ struct ip_vs_proto_data *next;
+ struct ip_vs_protocol *pp;
+ int *timeout_table; /* protocol timeout table */
+ atomic_t appcnt; /* counter of proto app incs. */
};
-extern struct ip_vs_protocol * ip_vs_proto_get(unsigned short proto);
+extern struct ip_vs_protocol * ip_vs_proto_get(unsigned short proto);
+extern struct ip_vs_proto_data * ip_vs_proto_data_get(struct net *net,
+ unsigned short proto);
struct ip_vs_conn_param {
const union nf_inet_addr *caddr;
@@ -368,6 +387,8 @@ struct ip_vs_conn_param {
const struct ip_vs_pe *pe;
char *pe_data;
__u8 pe_data_len;
+
+ struct net *net;
};
/*
@@ -414,6 +435,8 @@ struct ip_vs_conn {
int (*packet_xmit)(struct sk_buff *skb, struct ip_vs_conn *cp,
struct ip_vs_protocol *pp);
+ struct net *net; /* netns ptr needed in timer */
+
/* Note: we can group the following members into a structure,
in order to save more space, and the following members are
only used in VS/NAT anyway */
@@ -674,7 +697,7 @@ enum {
IP_VS_DIR_LAST,
};
-static inline void ip_vs_conn_fill_param(int af, int protocol,
+static inline void ip_vs_conn_fill_param(struct net *net, int af, int protocol,
const union nf_inet_addr *caddr,
__be16 cport,
const union nf_inet_addr *vaddr,
@@ -689,6 +712,7 @@ static inline void ip_vs_conn_fill_param
p->vport = vport;
p->pe = NULL;
p->pe_data = NULL;
+ p->net = net;
}
struct ip_vs_conn *ip_vs_conn_in_get(const struct ip_vs_conn_param *p);
@@ -714,7 +738,8 @@ static inline void __ip_vs_conn_put(stru
atomic_dec(&cp->refcnt);
}
extern void ip_vs_conn_put(struct ip_vs_conn *cp);
-extern void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport);
+extern void ip_vs_conn_fill_cport(struct net *net, struct ip_vs_conn *cp,
+ __be16 cport);
struct ip_vs_conn *ip_vs_conn_new(const struct ip_vs_conn_param *p,
const union nf_inet_addr *daddr,
@@ -724,9 +749,9 @@ extern void ip_vs_conn_expire_now(struct
extern const char * ip_vs_state_name(__u16 proto, int state);
-extern void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp);
-extern int ip_vs_check_template(struct ip_vs_conn *ct);
-extern void ip_vs_random_dropentry(void);
+extern void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp);
+extern int ip_vs_check_template(struct net *net, struct ip_vs_conn *ct);
+extern void ip_vs_random_dropentry(struct net *net);
extern int ip_vs_conn_init(void);
extern void ip_vs_conn_cleanup(void);
@@ -796,12 +821,15 @@ ip_vs_control_add(struct ip_vs_conn *cp,
* (from ip_vs_app.c)
*/
#define IP_VS_APP_MAX_PORTS 8
-extern int register_ip_vs_app(struct ip_vs_app *app);
-extern void unregister_ip_vs_app(struct ip_vs_app *app);
-extern int ip_vs_bind_app(struct ip_vs_conn *cp, struct ip_vs_protocol *pp);
+extern int register_ip_vs_app(struct net *net, struct ip_vs_app *app);
+extern void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app);
+extern int ip_vs_bind_app(struct net *net, struct ip_vs_conn *cp,
+ struct ip_vs_protocol *pp);
extern void ip_vs_unbind_app(struct ip_vs_conn *cp);
-extern int
-register_ip_vs_app_inc(struct ip_vs_app *app, __u16 proto, __u16 port);
+extern int register_ip_vs_app_inc(struct net *net,
+ struct ip_vs_app *app,
+ __u16 proto,
+ __u16 port);
extern int ip_vs_app_inc_get(struct ip_vs_app *inc);
extern void ip_vs_app_inc_put(struct ip_vs_app *inc);
@@ -823,7 +851,7 @@ extern void ip_vs_pe_put(struct ip_vs_pe
extern int ip_vs_protocol_init(void);
extern void ip_vs_protocol_cleanup(void);
extern void ip_vs_protocol_timeout_change(int flags);
-extern int *ip_vs_create_timeout_table(int *table, int size);
+extern int *ip_vs_create_timeout_table(const int *table, int size);
extern int
ip_vs_set_state_timeout(int *table, int num, const char *const *names,
const char *name, int to);
@@ -856,22 +884,14 @@ ip_vs_schedule(struct ip_vs_service *svc
extern int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
struct ip_vs_protocol *pp);
-
/*
* IPVS control data and functions (from ip_vs_ctl.c)
*/
-extern int sysctl_ip_vs_cache_bypass;
-extern int sysctl_ip_vs_expire_nodest_conn;
-extern int sysctl_ip_vs_expire_quiescent_template;
-extern int sysctl_ip_vs_sync_threshold[2];
-extern int sysctl_ip_vs_nat_icmp_send;
-extern int sysctl_ip_vs_conntrack;
-extern int sysctl_ip_vs_snat_reroute;
extern struct ip_vs_stats ip_vs_stats;
extern const struct ctl_path net_vs_ctl_path[];
extern struct ip_vs_service *
-ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
+ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
const union nf_inet_addr *vaddr, __be16 vport);
static inline void ip_vs_service_put(struct ip_vs_service *svc)
@@ -880,7 +900,7 @@ static inline void ip_vs_service_put(str
}
extern struct ip_vs_dest *
-ip_vs_lookup_real_service(int af, __u16 protocol,
+ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
const union nf_inet_addr *daddr, __be16 dport);
extern int ip_vs_use_count_inc(void);
@@ -888,23 +908,22 @@ extern void ip_vs_use_count_dec(void);
extern int ip_vs_control_init(void);
extern void ip_vs_control_cleanup(void);
extern struct ip_vs_dest *
-ip_vs_find_dest(int af, const union nf_inet_addr *daddr, __be16 dport,
+ip_vs_find_dest(struct net *net, int af,
+ const union nf_inet_addr *daddr, __be16 dport,
const union nf_inet_addr *vaddr, __be16 vport, __u16 protocol);
-extern struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp);
-
+extern struct ip_vs_dest *ip_vs_try_bind_dest(struct net *net,
+ struct ip_vs_conn *cp);
/*
* IPVS sync daemon data and function prototypes
* (from ip_vs_sync.c)
*/
-extern volatile int ip_vs_sync_state;
-extern volatile int ip_vs_master_syncid;
-extern volatile int ip_vs_backup_syncid;
-extern char ip_vs_master_mcast_ifn[IP_VS_IFNAME_MAXLEN];
-extern char ip_vs_backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
-extern int start_sync_thread(int state, char *mcast_ifn, __u8 syncid);
-extern int stop_sync_thread(int state);
-extern void ip_vs_sync_conn(struct ip_vs_conn *cp);
+extern int start_sync_thread(struct net *net, int state, char *mcast_ifn,
+ __u8 syncid);
+extern int stop_sync_thread(struct net *net, int state);
+extern void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp);
+extern int ip_vs_sync_init(void);
+extern void ip_vs_sync_cleanup(void);
/*
@@ -912,8 +931,8 @@ extern void ip_vs_sync_conn(struct ip_vs
*/
extern int ip_vs_estimator_init(void);
extern void ip_vs_estimator_cleanup(void);
-extern void ip_vs_new_estimator(struct ip_vs_stats *stats);
-extern void ip_vs_kill_estimator(struct ip_vs_stats *stats);
+extern void ip_vs_new_estimator(struct net *net, struct ip_vs_stats *stats);
+extern void ip_vs_kill_estimator(struct net *net, struct ip_vs_stats *stats);
extern void ip_vs_zero_estimator(struct ip_vs_stats *stats);
/*
@@ -929,8 +948,8 @@ extern int ip_vs_tunnel_xmit
(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp);
extern int ip_vs_dr_xmit
(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp);
-extern int ip_vs_icmp_xmit
-(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, int offset);
+extern int ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
+ struct ip_vs_protocol *pp, int offset);
extern void ip_vs_dst_reset(struct ip_vs_dest *dest);
#ifdef CONFIG_IP_VS_IPV6
Index: lvs-test-2.6/include/net/net_namespace.h
===================================================================
--- lvs-test-2.6.orig/include/net/net_namespace.h 2010-10-22 21:48:31.000000000 +0200
+++ lvs-test-2.6/include/net/net_namespace.h 2010-10-22 21:48:40.000000000 +0200
@@ -15,6 +15,7 @@
#include <net/netns/ipv4.h>
#include <net/netns/ipv6.h>
#include <net/netns/dccp.h>
+#include <net/netns/ip_vs.h>
#include <net/netns/x_tables.h>
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
#include <net/netns/conntrack.h>
@@ -91,6 +92,7 @@ struct net {
struct sk_buff_head wext_nlevents;
#endif
struct net_generic *gen;
+ struct netns_ipvs *ipvs;
};
Index: lvs-test-2.6/include/net/netns/ip_vs.h
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ lvs-test-2.6/include/net/netns/ip_vs.h 2010-10-22 21:48:40.000000000 +0200
@@ -0,0 +1,117 @@
+#ifndef __NETNS_IP_VS_H_
+#define __NETNS_IP_VS_H_
+
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/list_nulls.h>
+#include <linux/ip_vs.h>
+#include <asm/atomic.h>
+#include <linux/in.h>
+
+struct ip_vs_stats;
+struct ip_vs_sync_buff;
+struct ctl_table_header;
+
+struct netns_ipvs {
+ int inc; /* incarnation */
+ /* ip_vs_app */
+ struct list_head app_list;
+ struct mutex app_mutex;
+ struct lock_class_key app_key; /* Grrr, for mutex debuging */
+ /* ip_vs_conn */
+ unsigned char conn_cname[20]; /* Connection hash name */
+ struct list_head *conn_tab; /* Connection hash: for in and output packets */
+ struct kmem_cache *conn_cachep; /* SLAB cache for IPVS connections */
+ atomic_t conn_count; /* counter for current IPVS connections */
+ atomic_t conn_no_cport_cnt; /* counter for no client port connections */
+ unsigned int conn_rnd; /* random value for IPVS connection hash */
+ /* ip_vs_ctl */
+ struct ip_vs_stats *ctl_stats; /* Statistics & estimator */
+ /* Hash table: for virtual service lookups */
+ #define IP_VS_SVC_TAB_BITS 8
+ #define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
+ #define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
+ /* the service table hashed by <protocol, addr, port> */
+ struct list_head ctl_svc_table[IP_VS_SVC_TAB_SIZE];
+ /* the service table hashed by fwmark */
+ struct list_head ctl_fwm_table[IP_VS_SVC_TAB_SIZE];
+ /* Hash table: for real service lookups */
+ #define IP_VS_RTAB_BITS 4
+ #define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
+ #define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
+ struct list_head ctl_rtable[IP_VS_RTAB_SIZE]; /* Hash table: for real service */
+ struct list_head ctl_dest_trash; /* Trash for destinations */
+ atomic_t ctl_ftpsvc_counter;
+ atomic_t ctl_nullsvc_counter;
+ /* sys-ctl struct */
+ struct ctl_table_header *sysctl_hdr;
+ struct ctl_table *sysctl_tbl;
+ /* sysctl variables */
+ int sysctl_amemthresh;
+ int sysctl_am_droprate;
+ int sysctl_drop_entry;
+ int sysctl_drop_packet;
+#ifdef CONFIG_IP_VS_NFCT
+ int sysctl_conntrack;
+#endif
+ int sysctl_secure_tcp;
+ int sysctl_snat_reroute;
+ int sysctl_cache_bypass;
+ int sysctl_expire_nodest_conn;
+ int sysctl_expire_quiescent_template;
+ int sysctl_sync_threshold[2];
+ int sysctl_nat_icmp_send;
+
+ /* ip_vs_proto */
+ #define IP_VS_PROTO_TAB_SIZE 32 /* must be power of 2 */
+ struct ip_vs_proto_data *proto_data_table[IP_VS_PROTO_TAB_SIZE];
+ /* ip_vs_proto_tcp */
+#ifdef CONFIG_IP_VS_PROTO_TCP
+ #define TCP_APP_TAB_BITS 4
+ #define TCP_APP_TAB_SIZE (1 << TCP_APP_TAB_BITS)
+ #define TCP_APP_TAB_MASK (TCP_APP_TAB_SIZE - 1)
+ struct list_head tcp_apps[TCP_APP_TAB_SIZE];
+ spinlock_t tcp_app_lock;
+#endif
+ /* ip_vs_proto_udp */
+#ifdef CONFIG_IP_VS_PROTO_UDP
+ #define UDP_APP_TAB_BITS 4
+ #define UDP_APP_TAB_SIZE (1 << UDP_APP_TAB_BITS)
+ #define UDP_APP_TAB_MASK (UDP_APP_TAB_SIZE - 1)
+ struct list_head udp_apps[UDP_APP_TAB_SIZE];
+ spinlock_t udp_app_lock;
+#endif
+ /* ip_vs_proto_sctp */
+ #define SCTP_APP_TAB_BITS 4
+ #define SCTP_APP_TAB_SIZE (1 << SCTP_APP_TAB_BITS)
+ #define SCTP_APP_TAB_MASK (SCTP_APP_TAB_SIZE - 1)
+ /* Hash table for SCTP application incarnations */
+ struct list_head sctp_apps[SCTP_APP_TAB_SIZE];
+ spinlock_t sctp_app_lock;
+
+ /* ip_vs_est */
+ struct list_head est_list; /* estimator list */
+ spinlock_t est_lock;
+ /* ip_vs_sync */
+ struct list_head sync_queue;
+ spinlock_t sync_lock;
+ struct ip_vs_sync_buff *sync_buff;
+ spinlock_t sync_buff_lock;
+ struct sockaddr_in sync_mcast_addr;
+ /* sync daemon tasks */
+ struct task_struct *sync_master_thread;
+ struct task_struct *sync_backup_thread;
+ /* the maximum length of sync (sending/receiving) message */
+ int sync_send_mesg_maxlen;
+ int sync_recv_mesg_maxlen;
+
+ volatile int sync_state;
+ volatile int master_syncid;
+ volatile int backup_syncid;
+ /* multicast interface name */
+ char master_mcast_ifn[IP_VS_IFNAME_MAXLEN];
+ char backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
+
+};
+
+#endif /*__NETNS_IP_VS_H_*/
^ permalink raw reply [flat|nested] 14+ messages in thread* [rfc v2 02/10] ipvs network name space aware: app
2010-10-22 20:09 [rfc v2 00/10] ipvs network name space (netns) aware Simon Horman
2010-10-22 20:09 ` [rfc v2 01/10] ipvs network name space aware: include files Simon Horman
@ 2010-10-22 20:09 ` Simon Horman
2010-10-22 20:09 ` [rfc v2 03/10] ipvs network name space aware: conn Simon Horman
` (9 subsequent siblings)
11 siblings, 0 replies; 14+ messages in thread
From: Simon Horman @ 2010-10-22 20:09 UTC (permalink / raw)
To: lvs-devel, netdev, netfilter-devel, Daniel Lezcano
Cc: Hans Schillstrom, Julian Anastasov, Wensong Zhang
[-- Attachment #1: ipvs-netns-2.patch --]
[-- Type: text/plain, Size: 7472 bytes --]
This is patch 2/9 previous was just a copy of patch 1/9
This patch just contains ip_vs_app.c
There is nothing special whith this file,
just the normal,
- moving to vars to struct ipvs
- adding per netns init and exit
Signed-off-by:Hans Schillstrom <hans.schillstrom@ericsson.com>
Index: lvs-test-2.6/net/netfilter/ipvs/ip_vs_app.c
===================================================================
--- lvs-test-2.6.orig/net/netfilter/ipvs/ip_vs_app.c 2010-10-22 21:33:42.000000000 +0200
+++ lvs-test-2.6/net/netfilter/ipvs/ip_vs_app.c 2010-10-22 21:33:52.000000000 +0200
@@ -43,11 +43,6 @@ EXPORT_SYMBOL(register_ip_vs_app);
EXPORT_SYMBOL(unregister_ip_vs_app);
EXPORT_SYMBOL(register_ip_vs_app_inc);
-/* ipvs application list head */
-static LIST_HEAD(ip_vs_app_list);
-static DEFINE_MUTEX(__ip_vs_app_mutex);
-
-
/*
* Get an ip_vs_app object
*/
@@ -67,7 +62,8 @@ static inline void ip_vs_app_put(struct
* Allocate/initialize app incarnation and register it in proto apps.
*/
static int
-ip_vs_app_inc_new(struct ip_vs_app *app, __u16 proto, __u16 port)
+ip_vs_app_inc_new(struct net *net, struct ip_vs_app *app, __u16 proto,
+ __u16 port)
{
struct ip_vs_protocol *pp;
struct ip_vs_app *inc;
@@ -98,7 +94,7 @@ ip_vs_app_inc_new(struct ip_vs_app *app,
}
}
- ret = pp->register_app(inc);
+ ret = pp->register_app(net, inc);
if (ret)
goto out;
@@ -119,7 +115,7 @@ ip_vs_app_inc_new(struct ip_vs_app *app,
* Release app incarnation
*/
static void
-ip_vs_app_inc_release(struct ip_vs_app *inc)
+ip_vs_app_inc_release(struct net *net, struct ip_vs_app *inc)
{
struct ip_vs_protocol *pp;
@@ -127,7 +123,7 @@ ip_vs_app_inc_release(struct ip_vs_app *
return;
if (pp->unregister_app)
- pp->unregister_app(inc);
+ pp->unregister_app(net, inc);
IP_VS_DBG(9, "%s App %s:%u unregistered\n",
pp->name, inc->name, ntohs(inc->port));
@@ -167,16 +163,16 @@ void ip_vs_app_inc_put(struct ip_vs_app
/*
* Register an application incarnation in protocol applications
*/
-int
-register_ip_vs_app_inc(struct ip_vs_app *app, __u16 proto, __u16 port)
+int register_ip_vs_app_inc(struct net *net, struct ip_vs_app *app, __u16 proto,
+ __u16 port)
{
int result;
- mutex_lock(&__ip_vs_app_mutex);
+ mutex_lock(&net->ipvs->app_mutex);
- result = ip_vs_app_inc_new(app, proto, port);
+ result = ip_vs_app_inc_new(net, app, proto, port);
- mutex_unlock(&__ip_vs_app_mutex);
+ mutex_unlock(&net->ipvs->app_mutex);
return result;
}
@@ -185,16 +181,16 @@ register_ip_vs_app_inc(struct ip_vs_app
/*
* ip_vs_app registration routine
*/
-int register_ip_vs_app(struct ip_vs_app *app)
+int register_ip_vs_app(struct net *net, struct ip_vs_app *app)
{
/* increase the module use count */
ip_vs_use_count_inc();
- mutex_lock(&__ip_vs_app_mutex);
+ mutex_lock(&net->ipvs->app_mutex);
- list_add(&app->a_list, &ip_vs_app_list);
+ list_add(&app->a_list, &net->ipvs->app_list);
- mutex_unlock(&__ip_vs_app_mutex);
+ mutex_unlock(&net->ipvs->app_mutex);
return 0;
}
@@ -204,19 +200,19 @@ int register_ip_vs_app(struct ip_vs_app
* ip_vs_app unregistration routine
* We are sure there are no app incarnations attached to services
*/
-void unregister_ip_vs_app(struct ip_vs_app *app)
+void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app)
{
struct ip_vs_app *inc, *nxt;
- mutex_lock(&__ip_vs_app_mutex);
+ mutex_lock(&net->ipvs->app_mutex);
list_for_each_entry_safe(inc, nxt, &app->incs_list, a_list) {
- ip_vs_app_inc_release(inc);
+ ip_vs_app_inc_release(net, inc);
}
list_del(&app->a_list);
- mutex_unlock(&__ip_vs_app_mutex);
+ mutex_unlock(&net->ipvs->app_mutex);
/* decrease the module use count */
ip_vs_use_count_dec();
@@ -226,9 +222,9 @@ void unregister_ip_vs_app(struct ip_vs_a
/*
* Bind ip_vs_conn to its ip_vs_app (called by cp constructor)
*/
-int ip_vs_bind_app(struct ip_vs_conn *cp, struct ip_vs_protocol *pp)
+int ip_vs_bind_app(struct net *net, struct ip_vs_conn *cp, struct ip_vs_protocol *pp)
{
- return pp->app_conn_bind(cp);
+ return pp->app_conn_bind(net, cp);
}
@@ -481,11 +477,12 @@ int ip_vs_app_pkt_in(struct ip_vs_conn *
* /proc/net/ip_vs_app entry function
*/
-static struct ip_vs_app *ip_vs_app_idx(loff_t pos)
+static struct ip_vs_app *ip_vs_app_idx(struct net *net, loff_t pos)
{
struct ip_vs_app *app, *inc;
+ struct netns_ipvs *ipvs = net->ipvs;
- list_for_each_entry(app, &ip_vs_app_list, a_list) {
+ list_for_each_entry(app, &ipvs->app_list, a_list) {
list_for_each_entry(inc, &app->incs_list, a_list) {
if (pos-- == 0)
return inc;
@@ -497,19 +494,22 @@ static struct ip_vs_app *ip_vs_app_idx(l
static void *ip_vs_app_seq_start(struct seq_file *seq, loff_t *pos)
{
- mutex_lock(&__ip_vs_app_mutex);
+ struct net *net = seq_file_net(seq);
+ mutex_lock(&net->ipvs->app_mutex);
- return *pos ? ip_vs_app_idx(*pos - 1) : SEQ_START_TOKEN;
+ return *pos ? ip_vs_app_idx(net, *pos - 1) : SEQ_START_TOKEN;
}
static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
struct ip_vs_app *inc, *app;
struct list_head *e;
+ struct net *net = seq_file_net(seq);
+ struct netns_ipvs *ipvs = net->ipvs;
++*pos;
if (v == SEQ_START_TOKEN)
- return ip_vs_app_idx(0);
+ return ip_vs_app_idx(net, 0);
inc = v;
app = inc->app;
@@ -518,7 +518,7 @@ static void *ip_vs_app_seq_next(struct s
return list_entry(e, struct ip_vs_app, a_list);
/* go on to next application */
- for (e = app->a_list.next; e != &ip_vs_app_list; e = e->next) {
+ for (e = app->a_list.next; e != &ipvs->app_list; e = e->next) {
app = list_entry(e, struct ip_vs_app, a_list);
list_for_each_entry(inc, &app->incs_list, a_list) {
return inc;
@@ -529,7 +529,9 @@ static void *ip_vs_app_seq_next(struct s
static void ip_vs_app_seq_stop(struct seq_file *seq, void *v)
{
- mutex_unlock(&__ip_vs_app_mutex);
+ struct net *net = seq_file_net(seq);
+
+ mutex_unlock(&net->ipvs->app_mutex);
}
static int ip_vs_app_seq_show(struct seq_file *seq, void *v)
@@ -557,7 +559,8 @@ static const struct seq_operations ip_vs
static int ip_vs_app_open(struct inode *inode, struct file *file)
{
- return seq_open(file, &ip_vs_app_seq_ops);
+
+ return seq_open_net(inode,file, &ip_vs_app_seq_ops, sizeof(struct seq_net_private));
}
static const struct file_operations ip_vs_app_fops = {
@@ -565,19 +568,38 @@ static const struct file_operations ip_v
.open = ip_vs_app_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = seq_release,
+ .release = seq_release_private,
};
#endif
-int __init ip_vs_app_init(void)
+static int __net_init __ip_vs_app_init(struct net *net)
{
- /* we will replace it with proc_net_ipvs_create() soon */
- proc_net_fops_create(&init_net, "ip_vs_app", 0, &ip_vs_app_fops);
+ INIT_LIST_HEAD(&net->ipvs->app_list);
+ __mutex_init(&net->ipvs->app_mutex,"ipvs->app_mutex", &net->ipvs->app_key);
+ proc_net_fops_create(net, "ip_vs_app", 0, &ip_vs_app_fops);
return 0;
}
+static void __net_exit __ip_vs_app_cleanup(struct net *net)
+{
+ proc_net_remove(net, "ip_vs_app");
+}
+
+static struct pernet_operations ip_vs_app_ops = {
+ .init = __ip_vs_app_init,
+ .exit = __ip_vs_app_cleanup,
+};
+
+int __init ip_vs_app_init(void)
+{
+ int rv;
+
+ rv = register_pernet_subsys(&ip_vs_app_ops);
+ return rv;
+}
+
void ip_vs_app_cleanup(void)
{
- proc_net_remove(&init_net, "ip_vs_app");
+ unregister_pernet_subsys(&ip_vs_app_ops);
}
^ permalink raw reply [flat|nested] 14+ messages in thread* [rfc v2 03/10] ipvs network name space aware: conn
2010-10-22 20:09 [rfc v2 00/10] ipvs network name space (netns) aware Simon Horman
2010-10-22 20:09 ` [rfc v2 01/10] ipvs network name space aware: include files Simon Horman
2010-10-22 20:09 ` [rfc v2 02/10] ipvs network name space aware: app Simon Horman
@ 2010-10-22 20:09 ` Simon Horman
2010-10-26 22:35 ` Simon Horman
2010-10-22 20:09 ` [rfc v2 04/10] ipvs network name space aware: core Simon Horman
` (8 subsequent siblings)
11 siblings, 1 reply; 14+ messages in thread
From: Simon Horman @ 2010-10-22 20:09 UTC (permalink / raw)
To: lvs-devel, netdev, netfilter-devel
Cc: Hans Schillstrom, Julian Anastasov, Daniel Lezcano, Wensong Zhang
[-- Attachment #1: ipvs-netns-3.patch --]
[-- Type: text/plain, Size: 19853 bytes --]
This patch just contains ip_vs_conn.c
and does the normal
- moving to vars to struct ipvs
- adding per netns init and exit
proc_fs required some extra work with adding/chaning private data to get the net ptr.
Signed-off-by:Hans Schillstrom <hans.schillstrom@ericsson.com>
Index: lvs-test-2.6/net/netfilter/ipvs/ip_vs_conn.c
===================================================================
--- lvs-test-2.6.orig/net/netfilter/ipvs/ip_vs_conn.c 2010-10-22 21:33:39.000000000 +0200
+++ lvs-test-2.6/net/netfilter/ipvs/ip_vs_conn.c 2010-10-22 21:34:55.000000000 +0200
@@ -56,23 +56,12 @@ MODULE_PARM_DESC(conn_tab_bits, "Set con
int ip_vs_conn_tab_size;
int ip_vs_conn_tab_mask;
-/*
- * Connection hash table: for input and output packets lookups of IPVS
- */
-static struct list_head *ip_vs_conn_tab;
-
-/* SLAB cache for IPVS connections */
-static struct kmem_cache *ip_vs_conn_cachep __read_mostly;
-
-/* counter for current IPVS connections */
-static atomic_t ip_vs_conn_count = ATOMIC_INIT(0);
-
-/* counter for no client port connections */
-static atomic_t ip_vs_conn_no_cport_cnt = ATOMIC_INIT(0);
-
/* random value for IPVS connection hash */
static unsigned int ip_vs_conn_rnd;
+/* cache name cnt */
+static atomic_t conn_cache_nr = ATOMIC_INIT(0);
+
/*
* Fine locking granularity for big connection hash table
*/
@@ -173,8 +162,8 @@ static unsigned int ip_vs_conn_hashkey_c
{
struct ip_vs_conn_param p;
- ip_vs_conn_fill_param(cp->af, cp->protocol, &cp->caddr, cp->cport,
- NULL, 0, &p);
+ ip_vs_conn_fill_param(NULL, cp->af, cp->protocol, &cp->caddr,
+ cp->cport, NULL, 0, &p);
if (cp->dest && cp->dest->svc->pe) {
p.pe = cp->dest->svc->pe;
@@ -189,7 +178,7 @@ static unsigned int ip_vs_conn_hashkey_c
* Hashes ip_vs_conn in ip_vs_conn_tab by proto,addr,port.
* returns bool success.
*/
-static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
+static inline int ip_vs_conn_hash(struct net *net, struct ip_vs_conn *cp)
{
unsigned hash;
int ret;
@@ -204,7 +193,7 @@ static inline int ip_vs_conn_hash(struct
spin_lock(&cp->lock);
if (!(cp->flags & IP_VS_CONN_F_HASHED)) {
- list_add(&cp->c_list, &ip_vs_conn_tab[hash]);
+ list_add(&cp->c_list, &net->ipvs->conn_tab[hash]);
cp->flags |= IP_VS_CONN_F_HASHED;
atomic_inc(&cp->refcnt);
ret = 1;
@@ -262,12 +251,13 @@ __ip_vs_conn_in_get(const struct ip_vs_c
{
unsigned hash;
struct ip_vs_conn *cp;
+ struct netns_ipvs *ipvs = p->net->ipvs;
hash = ip_vs_conn_hashkey_param(p, false);
ct_read_lock(hash);
- list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
+ list_for_each_entry(cp, &ipvs->conn_tab[hash], c_list) {
if (cp->af == p->af &&
ip_vs_addr_equal(p->af, p->caddr, &cp->caddr) &&
ip_vs_addr_equal(p->af, p->vaddr, &cp->vaddr) &&
@@ -286,12 +276,13 @@ __ip_vs_conn_in_get(const struct ip_vs_c
return NULL;
}
-struct ip_vs_conn *ip_vs_conn_in_get(const struct ip_vs_conn_param *p)
+struct ip_vs_conn *
+ip_vs_conn_in_get(const struct ip_vs_conn_param *p)
{
struct ip_vs_conn *cp;
cp = __ip_vs_conn_in_get(p);
- if (!cp && atomic_read(&ip_vs_conn_no_cport_cnt)) {
+ if (!cp && atomic_read(&p->net->ipvs->conn_no_cport_cnt)) {
struct ip_vs_conn_param cport_zero_p = *p;
cport_zero_p.cport = 0;
cp = __ip_vs_conn_in_get(&cport_zero_p);
@@ -313,16 +304,19 @@ ip_vs_conn_fill_param_proto(int af, cons
struct ip_vs_conn_param *p)
{
__be16 _ports[2], *pptr;
+ struct net *net = dev_net(skb->dev);
pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
if (pptr == NULL)
return 1;
if (likely(!inverse))
- ip_vs_conn_fill_param(af, iph->protocol, &iph->saddr, pptr[0],
+ ip_vs_conn_fill_param(net, af, iph->protocol,
+ &iph->saddr, pptr[0],
&iph->daddr, pptr[1], p);
else
- ip_vs_conn_fill_param(af, iph->protocol, &iph->daddr, pptr[1],
+ ip_vs_conn_fill_param(net, af, iph->protocol,
+ &iph->daddr, pptr[1],
&iph->saddr, pptr[0], p);
return 0;
}
@@ -347,12 +341,13 @@ struct ip_vs_conn *ip_vs_ct_in_get(const
{
unsigned hash;
struct ip_vs_conn *cp;
+ struct netns_ipvs *ipvs = p->net->ipvs;
hash = ip_vs_conn_hashkey_param(p, false);
ct_read_lock(hash);
- list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
+ list_for_each_entry(cp, &ipvs->conn_tab[hash], c_list) {
if (p->pe_data && p->pe->ct_match) {
if (p->pe->ct_match(p, cp))
goto out;
@@ -394,6 +389,7 @@ struct ip_vs_conn *ip_vs_conn_out_get(co
{
unsigned hash;
struct ip_vs_conn *cp, *ret=NULL;
+ struct netns_ipvs *ipvs = p->net->ipvs;
/*
* Check for "full" addressed entries
@@ -402,7 +398,7 @@ struct ip_vs_conn *ip_vs_conn_out_get(co
ct_read_lock(hash);
- list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
+ list_for_each_entry(cp, &ipvs->conn_tab[hash], c_list) {
if (cp->af == p->af &&
ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) &&
ip_vs_addr_equal(p->af, p->caddr, &cp->daddr) &&
@@ -457,19 +453,19 @@ void ip_vs_conn_put(struct ip_vs_conn *c
/*
* Fill a no_client_port connection with a client port number
*/
-void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport)
+void ip_vs_conn_fill_cport(struct net *net, struct ip_vs_conn *cp, __be16 cport)
{
if (ip_vs_conn_unhash(cp)) {
spin_lock(&cp->lock);
if (cp->flags & IP_VS_CONN_F_NO_CPORT) {
- atomic_dec(&ip_vs_conn_no_cport_cnt);
+ atomic_dec(&net->ipvs->conn_no_cport_cnt);
cp->flags &= ~IP_VS_CONN_F_NO_CPORT;
cp->cport = cport;
}
spin_unlock(&cp->lock);
/* hash on new dport */
- ip_vs_conn_hash(cp);
+ ip_vs_conn_hash(net, cp);
}
}
@@ -606,12 +602,12 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, s
* Check if there is a destination for the connection, if so
* bind the connection to the destination.
*/
-struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp)
+struct ip_vs_dest *ip_vs_try_bind_dest(struct net *net, struct ip_vs_conn *cp)
{
struct ip_vs_dest *dest;
if ((cp) && (!cp->dest)) {
- dest = ip_vs_find_dest(cp->af, &cp->daddr, cp->dport,
+ dest = ip_vs_find_dest(net, cp->af, &cp->daddr, cp->dport,
&cp->vaddr, cp->vport,
cp->protocol);
ip_vs_bind_dest(cp, dest);
@@ -683,7 +679,7 @@ static inline void ip_vs_unbind_dest(str
* If available, return 1, otherwise invalidate this connection
* template and return 0.
*/
-int ip_vs_check_template(struct ip_vs_conn *ct)
+int ip_vs_check_template(struct net *net, struct ip_vs_conn *ct)
{
struct ip_vs_dest *dest = ct->dest;
@@ -692,7 +688,7 @@ int ip_vs_check_template(struct ip_vs_co
*/
if ((dest == NULL) ||
!(dest->flags & IP_VS_DEST_F_AVAILABLE) ||
- (sysctl_ip_vs_expire_quiescent_template &&
+ (net->ipvs->sysctl_expire_quiescent_template &&
(atomic_read(&dest->weight) == 0))) {
IP_VS_DBG_BUF(9, "check_template: dest not available for "
"protocol %s s:%s:%d v:%s:%d "
@@ -713,7 +709,7 @@ int ip_vs_check_template(struct ip_vs_co
ct->dport = htons(0xffff);
ct->vport = htons(0xffff);
ct->cport = 0;
- ip_vs_conn_hash(ct);
+ ip_vs_conn_hash(net, ct);
}
}
@@ -763,22 +759,22 @@ static void ip_vs_conn_expire(unsigned l
ip_vs_control_del(cp);
if (cp->flags & IP_VS_CONN_F_NFCT)
- ip_vs_conn_drop_conntrack(/cp);
+ ip_vs_conn_drop_conntrack(cp);
kfree(cp->pe_data);
if (unlikely(cp->app != NULL))
ip_vs_unbind_app(cp);
ip_vs_unbind_dest(cp);
if (cp->flags & IP_VS_CONN_F_NO_CPORT)
- atomic_dec(&ip_vs_conn_no_cport_cnt);
- atomic_dec(&ip_vs_conn_count);
+ atomic_dec(&cp->net->ipvs->conn_no_cport_cnt);
+ atomic_dec(&cp->net->ipvs->conn_count);
- kmem_cache_free(ip_vs_conn_cachep, cp);
+ kmem_cache_free(cp->net->ipvs->conn_cachep, cp);
return;
}
/* hash it back to the table */
- ip_vs_conn_hash(cp);
+ ip_vs_conn_hash(cp->net, cp);
expire_later:
IP_VS_DBG(7, "delayed: conn->refcnt-1=%d conn->n_control=%d\n",
@@ -795,9 +791,9 @@ void ip_vs_conn_expire_now(struct ip_vs_
mod_timer(&cp->timer, jiffies);
}
-
/*
- * Create a new connection entry and hash it into the ip_vs_conn_tab
+ * Create a new connection entry and hash it into the ip_vs_conn_tab,
+ * netns ptr will be stored in ip_vs_con here.
*/
struct ip_vs_conn *
ip_vs_conn_new(const struct ip_vs_conn_param *p,
@@ -805,9 +801,12 @@ ip_vs_conn_new(const struct ip_vs_conn_p
struct ip_vs_dest *dest)
{
struct ip_vs_conn *cp;
- struct ip_vs_protocol *pp = ip_vs_proto_get(p->protocol);
+ struct ip_vs_proto_data *pd = ip_vs_proto_data_get(p->net,
+ p->protocol);
+ struct ip_vs_protocol *pp;
+ struct netns_ipvs *ipvs = p->net->ipvs;
- cp = kmem_cache_zalloc(ip_vs_conn_cachep, GFP_ATOMIC);
+ cp = kmem_cache_zalloc(ipvs->conn_cachep, GFP_ATOMIC);
if (cp == NULL) {
IP_VS_ERR_RL("%s(): no memory\n", __func__);
return NULL;
@@ -842,9 +841,9 @@ ip_vs_conn_new(const struct ip_vs_conn_p
atomic_set(&cp->n_control, 0);
atomic_set(&cp->in_pkts, 0);
- atomic_inc(&ip_vs_conn_count);
+ atomic_inc(&ipvs->conn_count);
if (flags & IP_VS_CONN_F_NO_CPORT)
- atomic_inc(&ip_vs_conn_no_cport_cnt);
+ atomic_inc(&ipvs->conn_no_cport_cnt);
/* Bind the connection with a destination server */
ip_vs_bind_dest(cp, dest);
@@ -861,8 +860,12 @@ ip_vs_conn_new(const struct ip_vs_conn_p
#endif
ip_vs_bind_xmit(cp);
- if (unlikely(pp && atomic_read(&pp->appcnt)))
- ip_vs_bind_app(cp, pp);
+ cp->net = p->net; /* netns ptr needed in timer */
+ if (pd) {
+ pp = pd->pp;
+ if (unlikely(pp && atomic_read(&pd->appcnt)))
+ ip_vs_bind_app(p->net, cp, pp);
+ }
/*
* Allow conntrack to be preserved. By default, conntrack
@@ -875,11 +878,27 @@ ip_vs_conn_new(const struct ip_vs_conn_p
cp->flags |= IP_VS_CONN_F_NFCT;
/* Hash it in the ip_vs_conn_tab finally */
- ip_vs_conn_hash(cp);
+ ip_vs_conn_hash(p->net, cp);
return cp;
}
+struct ipvs_private {
+ struct seq_net_private p;
+ void *private;
+};
+
+static inline void ipvs_seq_priv_set(struct seq_file *seq, void *data)
+{
+ struct ipvs_private *ipriv=(struct ipvs_private *)seq->private;
+ ipriv->private = data;
+}
+
+static inline void *ipvs_seq_priv_get(struct seq_file *seq)
+{
+ return ((struct ipvs_private *)seq->private)->private;
+}
+
/*
* /proc/net/ip_vs_conn entries
*/
@@ -889,13 +908,15 @@ static void *ip_vs_conn_array(struct seq
{
int idx;
struct ip_vs_conn *cp;
+ struct net *net = seq_file_net(seq);
+ struct netns_ipvs *ipvs = net->ipvs;
for (idx = 0; idx < ip_vs_conn_tab_size; idx++) {
ct_read_lock_bh(idx);
- list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
+ list_for_each_entry(cp, &ipvs->conn_tab[idx], c_list) {
if (pos-- == 0) {
- seq->private = &ip_vs_conn_tab[idx];
- return cp;
+ ipvs_seq_priv_set(seq, &ipvs->conn_tab[idx]);
+ return cp;
}
}
ct_read_unlock_bh(idx);
@@ -906,15 +927,17 @@ static void *ip_vs_conn_array(struct seq
static void *ip_vs_conn_seq_start(struct seq_file *seq, loff_t *pos)
{
- seq->private = NULL;
+ ipvs_seq_priv_set(seq, NULL);
return *pos ? ip_vs_conn_array(seq, *pos - 1) :SEQ_START_TOKEN;
}
-
+ /* netns: conn_tab OK */
static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
struct ip_vs_conn *cp = v;
- struct list_head *e, *l = seq->private;
+ struct list_head *e, *l = ipvs_seq_priv_get(seq);
int idx;
+ struct net *net = seq_file_net(seq);
+ struct netns_ipvs *ipvs = net->ipvs;
++*pos;
if (v == SEQ_START_TOKEN)
@@ -924,27 +947,28 @@ static void *ip_vs_conn_seq_next(struct
if ((e = cp->c_list.next) != l)
return list_entry(e, struct ip_vs_conn, c_list);
- idx = l - ip_vs_conn_tab;
+ idx = l - ipvs->conn_tab;
ct_read_unlock_bh(idx);
while (++idx < ip_vs_conn_tab_size) {
ct_read_lock_bh(idx);
- list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
- seq->private = &ip_vs_conn_tab[idx];
+ list_for_each_entry(cp, &ipvs->conn_tab[idx], c_list) {
+ ipvs_seq_priv_set(seq, &ipvs->conn_tab[idx]);
return cp;
}
ct_read_unlock_bh(idx);
}
- seq->private = NULL;
+ ipvs_seq_priv_set(seq, NULL);
return NULL;
}
-
+/* netns: conn_tab OK */
static void ip_vs_conn_seq_stop(struct seq_file *seq, void *v)
{
- struct list_head *l = seq->private;
+ struct list_head *l = ipvs_seq_priv_get(seq);
+ struct net *net = seq_file_net(seq);
if (l)
- ct_read_unlock_bh(l - ip_vs_conn_tab);
+ ct_read_unlock_bh(l - net->ipvs->conn_tab);
}
static int ip_vs_conn_seq_show(struct seq_file *seq, void *v)
@@ -1004,7 +1028,16 @@ static const struct seq_operations ip_vs
static int ip_vs_conn_open(struct inode *inode, struct file *file)
{
- return seq_open(file, &ip_vs_conn_seq_ops);
+ int ret;
+ struct ipvs_private *priv;
+
+ ret = seq_open_net(inode, file, &ip_vs_conn_seq_ops,
+ sizeof(struct ipvs_private));
+ if (!ret) {
+ priv = ((struct seq_file *)file->private_data)->private;
+ priv->private = NULL;
+ }
+ return ret;
}
static const struct file_operations ip_vs_conn_fops = {
@@ -1012,7 +1045,8 @@ static const struct file_operations ip_v
.open = ip_vs_conn_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = seq_release,
+ .release = seq_release_private,
+
};
static const char *ip_vs_origin_name(unsigned flags)
@@ -1067,7 +1101,17 @@ static const struct seq_operations ip_vs
static int ip_vs_conn_sync_open(struct inode *inode, struct file *file)
{
- return seq_open(file, &ip_vs_conn_sync_seq_ops);
+ int ret;
+ struct ipvs_private *ipriv;
+
+ ret = seq_open_net(inode, file, &ip_vs_conn_sync_seq_ops,
+ sizeof(struct ipvs_private));
+ if (!ret) {
+ ipriv = ((struct seq_file *)file->private_data)->private;
+ ipriv->private = NULL;
+ }
+ return ret;
+// return seq_open(file, &ip_vs_conn_sync_seq_ops);
}
static const struct file_operations ip_vs_conn_sync_fops = {
@@ -1075,7 +1119,7 @@ static const struct file_operations ip_v
.open = ip_vs_conn_sync_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = seq_release,
+ .release = seq_release_private,
};
#endif
@@ -1112,11 +1156,14 @@ static inline int todrop_entry(struct ip
return 1;
}
-/* Called from keventd and must protect itself from softirqs */
-void ip_vs_random_dropentry(void)
+/* Called from keventd and must protect itself from softirqs
+ * netns: conn_tab OK
+ */
+void ip_vs_random_dropentry(struct net *net)
{
int idx;
struct ip_vs_conn *cp;
+ struct netns_ipvs *ipvs = net->ipvs;
/*
* Randomly scan 1/32 of the whole table every second
@@ -1129,7 +1176,7 @@ void ip_vs_random_dropentry(void)
*/
ct_write_lock_bh(hash);
- list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
+ list_for_each_entry(cp, &ipvs->conn_tab[hash], c_list) {
if (cp->flags & IP_VS_CONN_F_TEMPLATE)
/* connection template */
continue;
@@ -1167,11 +1214,13 @@ void ip_vs_random_dropentry(void)
/*
* Flush all the connection entries in the ip_vs_conn_tab
+ * netns: conn_tab OK
*/
-static void ip_vs_conn_flush(void)
+static void ip_vs_conn_flush(struct net *net)
{
int idx;
struct ip_vs_conn *cp;
+ struct netns_ipvs *ipvs = net->ipvs;
flush_again:
for (idx = 0; idx < ip_vs_conn_tab_size; idx++) {
@@ -1180,7 +1229,7 @@ static void ip_vs_conn_flush(void)
*/
ct_write_lock_bh(idx);
- list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
+ list_for_each_entry(cp, &ipvs->conn_tab[idx], c_list) {
IP_VS_DBG(4, "del connection\n");
ip_vs_conn_expire_now(cp);
@@ -1194,16 +1243,17 @@ static void ip_vs_conn_flush(void)
/* the counter may be not NULL, because maybe some conn entries
are run by slow timer handler or unhashed but still referred */
- if (atomic_read(&ip_vs_conn_count) != 0) {
+ if (atomic_read(&ipvs->conn_count) != 0) {
schedule();
goto flush_again;
}
}
-int __init ip_vs_conn_init(void)
+int __net_init __ip_vs_conn_init(struct net *net)
{
int idx;
+ struct netns_ipvs *ipvs = net->ipvs;
/* Compute size and mask */
ip_vs_conn_tab_size = 1 << ip_vs_conn_tab_bits;
@@ -1212,19 +1262,26 @@ int __init ip_vs_conn_init(void)
/*
* Allocate the connection hash table and initialize its list heads
*/
- ip_vs_conn_tab = vmalloc(ip_vs_conn_tab_size *
+ ipvs->conn_tab = vmalloc(ip_vs_conn_tab_size *
sizeof(struct list_head));
- if (!ip_vs_conn_tab)
+ if (!ipvs->conn_tab)
return -ENOMEM;
/* Allocate ip_vs_conn slab cache */
- ip_vs_conn_cachep = kmem_cache_create("ip_vs_conn",
+ /* Todo: find a better way to name the cache */
+ snprintf(ipvs->conn_cname, sizeof(ipvs->conn_cname)-1,
+ "ipvs_conn_%d", atomic_read(&conn_cache_nr) );
+ atomic_inc(&conn_cache_nr);
+
+ ipvs->conn_cachep = kmem_cache_create(ipvs->conn_cname,
sizeof(struct ip_vs_conn), 0,
SLAB_HWCACHE_ALIGN, NULL);
- if (!ip_vs_conn_cachep) {
- vfree(ip_vs_conn_tab);
+ if (!ipvs->conn_cachep) {
+ vfree(ipvs->conn_tab);
return -ENOMEM;
}
+ atomic_set(&ipvs->conn_count, 0);
+ atomic_set(&ipvs->conn_no_cport_cnt, 0);
pr_info("Connection hash table configured "
"(size=%d, memory=%ldKbytes)\n",
@@ -1234,31 +1291,46 @@ int __init ip_vs_conn_init(void)
sizeof(struct ip_vs_conn));
for (idx = 0; idx < ip_vs_conn_tab_size; idx++) {
- INIT_LIST_HEAD(&ip_vs_conn_tab[idx]);
+ INIT_LIST_HEAD(&ipvs->conn_tab[idx]);
}
for (idx = 0; idx < CT_LOCKARRAY_SIZE; idx++) {
rwlock_init(&__ip_vs_conntbl_lock_array[idx].l);
}
- proc_net_fops_create(&init_net, "ip_vs_conn", 0, &ip_vs_conn_fops);
- proc_net_fops_create(&init_net, "ip_vs_conn_sync", 0, &ip_vs_conn_sync_fops);
-
- /* calculate the random value for connection hash */
- get_random_bytes(&ip_vs_conn_rnd, sizeof(ip_vs_conn_rnd));
+ proc_net_fops_create(net, "ip_vs_conn", 0, &ip_vs_conn_fops);
+ proc_net_fops_create(net, "ip_vs_conn_sync", 0, &ip_vs_conn_sync_fops);
return 0;
}
+/* Cleanup and release all netns related ... */
+static void __net_exit __ip_vs_conn_cleanup(struct net *net) {
+ /* flush all the connection entries first */
+ ip_vs_conn_flush(net);
+ /* Release the empty cache */
+ kmem_cache_destroy(net->ipvs->conn_cachep);
+ proc_net_remove(net, "ip_vs_conn");
+ proc_net_remove(net, "ip_vs_conn_sync");
+ vfree(net->ipvs->conn_tab);
+}
+static struct pernet_operations ipvs_conn_ops = {
+ .init = __ip_vs_conn_init,
+ .exit = __ip_vs_conn_cleanup,
+};
-void ip_vs_conn_cleanup(void)
+int __init ip_vs_conn_init(void)
{
- /* flush all the connection entries first */
- ip_vs_conn_flush();
+ int rv;
- /* Release the empty cache */
- kmem_cache_destroy(ip_vs_conn_cachep);
- proc_net_remove(&init_net, "ip_vs_conn");
- proc_net_remove(&init_net, "ip_vs_conn_sync");
- vfree(ip_vs_conn_tab);
+ rv = register_pernet_subsys(&ipvs_conn_ops);
+
+ /* calculate the random value for connection hash */
+ get_random_bytes(&ip_vs_conn_rnd, sizeof(ip_vs_conn_rnd));
+ return rv;
+}
+
+void ip_vs_conn_cleanup(void)
+{
+ unregister_pernet_subsys(&ipvs_conn_ops);
}
Index: lvs-test-2.6/include/net/ip_vs.h
===================================================================
--- lvs-test-2.6.orig/include/net/ip_vs.h 2010-10-22 21:34:49.000000000 +0200
+++ lvs-test-2.6/include/net/ip_vs.h 2010-10-22 21:35:00.000000000 +0200
@@ -1060,9 +1060,9 @@ static inline void ip_vs_notrack(struct
* Netfilter connection tracking
* (from ip_vs_nfct.c)
*/
-static inline int ip_vs_conntrack_enabled(void)
+static inline int ip_vs_conntrack_enabled(struct net *net)
{
- return sysctl_ip_vs_conntrack;
+ return net->ipvs->sysctl_conntrack;
}
extern void ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp,
@@ -1075,7 +1075,7 @@ extern void ip_vs_conn_drop_conntrack(st
#else
-static inline int ip_vs_conntrack_enabled(void)
+static inline int ip_vs_conntrack_enabled(struct net *net)
{
return 0;
}
^ permalink raw reply [flat|nested] 14+ messages in thread* Re: [rfc v2 03/10] ipvs network name space aware: conn
2010-10-22 20:09 ` [rfc v2 03/10] ipvs network name space aware: conn Simon Horman
@ 2010-10-26 22:35 ` Simon Horman
0 siblings, 0 replies; 14+ messages in thread
From: Simon Horman @ 2010-10-26 22:35 UTC (permalink / raw)
To: lvs-devel, netdev, netfilter-devel
Cc: Hans Schillstrom, Julian Anastasov, Daniel Lezcano, Wensong Zhang
On Fri, Oct 22, 2010 at 10:09:37PM +0200, Simon Horman wrote:
>
> This patch just contains ip_vs_conn.c
> and does the normal
> - moving to vars to struct ipvs
> - adding per netns init and exit
>
> proc_fs required some extra work with adding/chaning private data to get the net ptr.
>
> Signed-off-by:Hans Schillstrom <hans.schillstrom@ericsson.com>
Sorry, I messed this patch up a bit and will repost.
* I still have not addressed any of the problems beyond the
original scope of my post, which was to rebase Hans's changes.
In particular I have not addressed any of the issues that
Julian raised in response to my patches. Hans, are you planning
to look into that or should I take another stab at things?
^ permalink raw reply [flat|nested] 14+ messages in thread
* [rfc v2 04/10] ipvs network name space aware: core
2010-10-22 20:09 [rfc v2 00/10] ipvs network name space (netns) aware Simon Horman
` (2 preceding siblings ...)
2010-10-22 20:09 ` [rfc v2 03/10] ipvs network name space aware: conn Simon Horman
@ 2010-10-22 20:09 ` Simon Horman
2010-10-22 20:09 ` [rfc v2 05/10] ipvs: Add ipvs_skbnet Simon Horman
` (7 subsequent siblings)
11 siblings, 0 replies; 14+ messages in thread
From: Simon Horman @ 2010-10-22 20:09 UTC (permalink / raw)
To: lvs-devel, netdev, netfilter-devel
Cc: Hans Schillstrom, Julian Anastasov, Daniel Lezcano, Wensong Zhang
[-- Attachment #1: ipvs-netns-4.patch --]
[-- Type: text/plain, Size: 14166 bytes --]
This patch just contains ip_vs_core.c
Signed-off-by:Hans Schillstrom <hans.schillstrom@ericsson.com>
Index: lvs-test-2.6/net/netfilter/ipvs/ip_vs_core.c
===================================================================
--- lvs-test-2.6.orig/net/netfilter/ipvs/ip_vs_core.c 2010-10-22 21:38:58.000000000 +0200
+++ lvs-test-2.6/net/netfilter/ipvs/ip_vs_core.c 2010-10-22 21:42:03.000000000 +0200
@@ -68,6 +68,8 @@ EXPORT_SYMBOL(ip_vs_conn_put);
EXPORT_SYMBOL(ip_vs_get_debug_level);
#endif
+/* netns cnt used for uniqueness */
+static atomic_t ipvs_netns_cnt = ATOMIC_INIT(0);
/* ID used in ICMP lookups */
#define icmp_id(icmph) (((icmph)->un).echo.id)
@@ -108,6 +110,8 @@ static inline void
ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
{
struct ip_vs_dest *dest = cp->dest;
+ struct net *net = dev_net(skb->dev);
+
if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
spin_lock(&dest->stats.lock);
dest->stats.ustats.inpkts++;
@@ -119,10 +123,10 @@ ip_vs_in_stats(struct ip_vs_conn *cp, st
dest->svc->stats.ustats.inbytes += skb->len;
spin_unlock(&dest->svc->stats.lock);
- spin_lock(&ip_vs_stats.lock);
- ip_vs_stats.ustats.inpkts++;
- ip_vs_stats.ustats.inbytes += skb->len;
- spin_unlock(&ip_vs_stats.lock);
+ spin_lock(&net->ipvs->ctl_stats->lock);
+ net->ipvs->ctl_stats->ustats.inpkts++;
+ net->ipvs->ctl_stats->ustats.inbytes += skb->len;
+ spin_unlock(&net->ipvs->ctl_stats->lock);
}
}
@@ -131,7 +135,10 @@ static inline void
ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
{
struct ip_vs_dest *dest = cp->dest;
+ struct net *net = dev_net(skb->dev);
+
if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
+ struct ip_vs_stats *ctl_stats = net->ipvs->ctl_stats;
spin_lock(&dest->stats.lock);
dest->stats.ustats.outpkts++;
dest->stats.ustats.outbytes += skb->len;
@@ -142,16 +149,16 @@ ip_vs_out_stats(struct ip_vs_conn *cp, s
dest->svc->stats.ustats.outbytes += skb->len;
spin_unlock(&dest->svc->stats.lock);
- spin_lock(&ip_vs_stats.lock);
- ip_vs_stats.ustats.outpkts++;
- ip_vs_stats.ustats.outbytes += skb->len;
- spin_unlock(&ip_vs_stats.lock);
+ spin_lock(&ctl_stats->lock);
+ net->ipvs->ctl_stats->ustats.outpkts++;
+ net->ipvs->ctl_stats->ustats.outbytes += skb->len;
+ spin_unlock(&ctl_stats->lock);
}
}
static inline void
-ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc)
+ip_vs_conn_stats(struct net *net, struct ip_vs_conn *cp, struct ip_vs_service *svc)
{
spin_lock(&cp->dest->stats.lock);
cp->dest->stats.ustats.conns++;
@@ -161,9 +168,9 @@ ip_vs_conn_stats(struct ip_vs_conn *cp,
svc->stats.ustats.conns++;
spin_unlock(&svc->stats.lock);
- spin_lock(&ip_vs_stats.lock);
- ip_vs_stats.ustats.conns++;
- spin_unlock(&ip_vs_stats.lock);
+ spin_lock(&net->ipvs->ctl_stats->lock);
+ net->ipvs->ctl_stats->ustats.conns++;
+ spin_unlock(&net->ipvs->ctl_stats->lock);
}
@@ -178,13 +185,15 @@ ip_vs_set_state(struct ip_vs_conn *cp, i
}
static inline void
-ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc,
+ip_vs_conn_fill_param_persist(struct net *net,
+ const struct ip_vs_service *svc,
struct sk_buff *skb, int protocol,
const union nf_inet_addr *caddr, __be16 cport,
const union nf_inet_addr *vaddr, __be16 vport,
struct ip_vs_conn_param *p)
{
- ip_vs_conn_fill_param(svc->af, protocol, caddr, cport, vaddr, vport, p);
+ ip_vs_conn_fill_param(net, svc->af, protocol, caddr, cport,
+ vaddr, vport, p);
p->pe = svc->pe;
if (p->pe && p->pe->fill_param)
p->pe->fill_param(p, skb);
@@ -211,6 +220,7 @@ ip_vs_sched_persist(struct ip_vs_service
struct ip_vs_conn_param param;
union nf_inet_addr snet; /* source network of the client,
after masking */
+ struct net *net = dev_net(skb->dev);
ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
@@ -268,13 +278,13 @@ ip_vs_sched_persist(struct ip_vs_service
vaddr = &fwmark;
}
}
- ip_vs_conn_fill_param_persist(svc, skb, protocol, &snet, 0,
+ ip_vs_conn_fill_param_persist(net, svc, skb, protocol, &snet, 0,
vaddr, vport, ¶m);
}
/* Check if a template already exists */
ct = ip_vs_ct_in_get(¶m);
- if (!ct || !ip_vs_check_template(ct)) {
+ if (!ct || !ip_vs_check_template(net, ct)) {
/* No template found or the dest of the connection
* template is not available.
*/
@@ -317,7 +327,7 @@ ip_vs_sched_persist(struct ip_vs_service
/*
* Create a new connection according to the template
*/
- ip_vs_conn_fill_param(svc->af, iph.protocol, &iph.saddr, ports[0],
+ ip_vs_conn_fill_param(net, svc->af, iph.protocol, &iph.saddr, ports[0],
&iph.daddr, ports[1], ¶m);
cp = ip_vs_conn_new(¶m, &dest->addr, dport, flags, dest);
if (cp == NULL) {
@@ -331,7 +341,7 @@ ip_vs_sched_persist(struct ip_vs_service
ip_vs_control_add(cp, ct);
ip_vs_conn_put(ct);
- ip_vs_conn_stats(cp, svc);
+ ip_vs_conn_stats(net, cp, svc);
return cp;
}
@@ -351,6 +361,7 @@ ip_vs_schedule(struct ip_vs_service *svc
struct ip_vs_dest *dest;
__be16 _ports[2], *pptr;
unsigned int flags;
+ struct net *net = dev_net(skb->dev);
*ignored = 1;
ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
@@ -419,7 +430,7 @@ ip_vs_schedule(struct ip_vs_service *svc
*/
{
struct ip_vs_conn_param p;
- ip_vs_conn_fill_param(svc->af, iph.protocol, &iph.saddr,
+ ip_vs_conn_fill_param(net, svc->af, iph.protocol, &iph.saddr,
pptr[0], &iph.daddr, pptr[1], &p);
cp = ip_vs_conn_new(&p, &dest->addr,
dest->port ? dest->port : pptr[1],
@@ -436,7 +447,7 @@ ip_vs_schedule(struct ip_vs_service *svc
IP_VS_DBG_ADDR(svc->af, &cp->daddr), ntohs(cp->dport),
cp->flags, atomic_read(&cp->refcnt));
- ip_vs_conn_stats(cp, svc);
+ ip_vs_conn_stats(net, cp, svc);
return cp;
}
@@ -452,6 +463,8 @@ int ip_vs_leave(struct ip_vs_service *sv
__be16 _ports[2], *pptr;
struct ip_vs_iphdr iph;
int unicast;
+ struct net *net = dev_net(skb->dev);
+
ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports);
@@ -465,12 +478,12 @@ int ip_vs_leave(struct ip_vs_service *sv
unicast = ipv6_addr_type(&iph.daddr.in6) & IPV6_ADDR_UNICAST;
else
#endif
- unicast = (inet_addr_type(&init_net, iph.daddr.ip) == RTN_UNICAST);
+ unicast = (inet_addr_type(net, iph.daddr.ip) == RTN_UNICAST);
/* if it is fwmark-based service, the cache_bypass sysctl is up
and the destination is a non-local unicast, then create
a cache_bypass connection entry */
- if (sysctl_ip_vs_cache_bypass && svc->fwmark && unicast) {
+ if (net->ipvs->sysctl_cache_bypass && svc->fwmark && unicast) {
int ret, cs;
struct ip_vs_conn *cp;
unsigned int flags = (svc->flags & IP_VS_SVC_F_ONEPACKET &&
@@ -484,7 +497,7 @@ int ip_vs_leave(struct ip_vs_service *sv
IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__);
{
struct ip_vs_conn_param p;
- ip_vs_conn_fill_param(svc->af, iph.protocol,
+ ip_vs_conn_fill_param(net, svc->af, iph.protocol,
&iph.saddr, pptr[0],
&iph.daddr, pptr[1], &p);
cp = ip_vs_conn_new(&p, &daddr, 0,
@@ -683,6 +696,7 @@ static int handle_response_icmp(int af,
unsigned int offset, unsigned int ihl)
{
unsigned int verdict = NF_DROP;
+ struct net *net = dev_net(skb->dev);
if (IP_VS_FWD_METHOD(cp) != 0) {
pr_err("shouldn't reach here, because the box is on the "
@@ -712,11 +726,12 @@ static int handle_response_icmp(int af,
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6) {
- if (sysctl_ip_vs_snat_reroute && ip6_route_me_harder(skb) != 0)
+ if (net->ipvs->sysctl_snat_reroute &&
+ ip6_route_me_harder(skb) != 0)
goto out;
} else
#endif
- if ((sysctl_ip_vs_snat_reroute ||
+ if ((net->ipvs->sysctl_snat_reroute ||
skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&
ip_route_me_harder(skb, RTN_LOCAL) != 0)
goto out;
@@ -927,6 +942,8 @@ static unsigned int
handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
struct ip_vs_conn *cp, int ihl)
{
+ struct net *net = dev_net(skb->dev);
+
IP_VS_DBG_PKT(11, af, pp, skb, 0, "Outgoing packet");
if (!skb_make_writable(skb, ihl))
@@ -963,11 +980,12 @@ handle_response(int af, struct sk_buff *
*/
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6) {
- if (sysctl_ip_vs_snat_reroute && ip6_route_me_harder(skb) != 0)
+ if (net->ipvs->sysctl_snat_reroute &&
+ ip6_route_me_harder(skb) != 0)
goto drop;
} else
#endif
- if ((sysctl_ip_vs_snat_reroute ||
+ if ((net->ipvs->sysctl_snat_reroute ||
skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&
ip_route_me_harder(skb, RTN_LOCAL) != 0)
goto drop;
@@ -1002,6 +1020,7 @@ ip_vs_out(unsigned int hooknum, struct s
struct ip_vs_iphdr iph;
struct ip_vs_protocol *pp;
struct ip_vs_conn *cp;
+ struct net *net = dev_net(skb->dev);
EnterFunction(11);
@@ -1077,7 +1096,7 @@ ip_vs_out(unsigned int hooknum, struct s
if (likely(cp))
return handle_response(af, skb, pp, cp, iph.len);
- if (sysctl_ip_vs_nat_icmp_send &&
+ if (net->ipvs->sysctl_nat_icmp_send &&
(pp->protocol == IPPROTO_TCP ||
pp->protocol == IPPROTO_UDP ||
pp->protocol == IPPROTO_SCTP)) {
@@ -1087,7 +1106,7 @@ ip_vs_out(unsigned int hooknum, struct s
sizeof(_ports), _ports);
if (pptr == NULL)
return NF_ACCEPT; /* Not for me */
- if (ip_vs_lookup_real_service(af, iph.protocol,
+ if (ip_vs_lookup_real_service(net, af, iph.protocol,
&iph.saddr,
pptr[0])) {
/*
@@ -1427,6 +1446,7 @@ ip_vs_in(unsigned int hooknum, struct sk
struct ip_vs_protocol *pp;
struct ip_vs_conn *cp;
int ret, restart, pkts;
+ struct net *net = dev_net(skb->dev);
/* Already marked as IPVS request or reply? */
if (skb->ipvs_property)
@@ -1510,7 +1530,7 @@ ip_vs_in(unsigned int hooknum, struct sk
if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) {
/* the destination server is not available */
- if (sysctl_ip_vs_expire_nodest_conn) {
+ if (net->ipvs->sysctl_expire_nodest_conn) {
/* try to expire the connection immediately */
ip_vs_conn_expire_now(cp);
}
@@ -1537,33 +1557,33 @@ ip_vs_in(unsigned int hooknum, struct sk
* encorage the standby servers to update the connections timeout
*/
pkts = atomic_add_return(1, &cp->in_pkts);
- if (af == AF_INET && (ip_vs_sync_state & IP_VS_STATE_MASTER) &&
+ if (af == AF_INET && (net->ipvs->sync_state & IP_VS_STATE_MASTER) &&
cp->protocol == IPPROTO_SCTP) {
if ((cp->state == IP_VS_SCTP_S_ESTABLISHED &&
- (pkts % sysctl_ip_vs_sync_threshold[1]
- == sysctl_ip_vs_sync_threshold[0])) ||
+ (pkts % net->ipvs->sysctl_sync_threshold[1]
+ == net->ipvs->sysctl_sync_threshold[0])) ||
(cp->old_state != cp->state &&
((cp->state == IP_VS_SCTP_S_CLOSED) ||
(cp->state == IP_VS_SCTP_S_SHUT_ACK_CLI) ||
(cp->state == IP_VS_SCTP_S_SHUT_ACK_SER)))) {
- ip_vs_sync_conn(cp);
+ ip_vs_sync_conn(net, cp);
goto out;
}
}
/* Keep this block last: TCP and others with pp->num_states <= 1 */
else if (af == AF_INET &&
- (ip_vs_sync_state & IP_VS_STATE_MASTER) &&
+ (net->ipvs->sync_state & IP_VS_STATE_MASTER) &&
(((cp->protocol != IPPROTO_TCP ||
cp->state == IP_VS_TCP_S_ESTABLISHED) &&
- (pkts % sysctl_ip_vs_sync_threshold[1]
- == sysctl_ip_vs_sync_threshold[0])) ||
+ (pkts % net->ipvs->sysctl_sync_threshold[1]
+ == net->ipvs->sysctl_sync_threshold[0])) ||
((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) &&
((cp->state == IP_VS_TCP_S_FIN_WAIT) ||
(cp->state == IP_VS_TCP_S_CLOSE) ||
(cp->state == IP_VS_TCP_S_CLOSE_WAIT) ||
(cp->state == IP_VS_TCP_S_TIME_WAIT)))))
- ip_vs_sync_conn(cp);
+ ip_vs_sync_conn(net,cp);
out:
cp->old_state = cp->state;
@@ -1782,7 +1802,37 @@ static struct nf_hook_ops ip_vs_ops[] __
},
#endif
};
+/*
+ * Initialize IP Virtual Server netns mem.
+ */
+static int __net_init __ip_vs_init(struct net *net)
+{
+ struct netns_ipvs *ipvs = 0;
+ ipvs = kzalloc(sizeof(struct netns_ipvs), GFP_ATOMIC);
+ if( ipvs == NULL ) {
+ pr_err("%s(): no memory.\n", __func__);
+ return -ENOMEM;
+ }
+ ipvs->inc = atomic_read(&ipvs_netns_cnt);
+ atomic_inc(&ipvs_netns_cnt);
+ IP_VS_DBG(10, "Creating new netns *net=%p *ipvs=%p size=%lu\n",
+ net, ipvs, sizeof(struct netns_ipvs));
+ net->ipvs = ipvs;
+
+ return 0;
+}
+
+static void __net_exit __ip_vs_cleanup(struct net *net)
+{
+ IP_VS_DBG(10, "ipvs netns %p released\n", net);
+ kfree(net->ipvs);
+}
+
+static struct pernet_operations ipvs_core_ops = {
+ .init = __ip_vs_init,
+ .exit = __ip_vs_cleanup,
+};
/*
* Initialize IP Virtual Server
@@ -1791,6 +1841,10 @@ static int __init ip_vs_init(void)
{
int ret;
+ ret = register_pernet_subsys(&ipvs_core_ops); /* Alloc ip_vs struct */
+ if( ret < 0 )
+ return ret;
+
ip_vs_estimator_init();
ret = ip_vs_control_init();
@@ -1813,15 +1867,22 @@ static int __init ip_vs_init(void)
goto cleanup_app;
}
+ ret = ip_vs_sync_init();
+ if (ret < 0) {
+ pr_err("can't setup sync data.\n");
+ goto cleanup_conn;
+ }
ret = nf_register_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
if (ret < 0) {
pr_err("can't register hooks.\n");
- goto cleanup_conn;
+ goto cleanup_sync;
}
pr_info("ipvs loaded.\n");
return ret;
+ cleanup_sync:
+ ip_vs_sync_cleanup();
cleanup_conn:
ip_vs_conn_cleanup();
cleanup_app:
@@ -1831,17 +1892,20 @@ static int __init ip_vs_init(void)
ip_vs_control_cleanup();
cleanup_estimator:
ip_vs_estimator_cleanup();
+ unregister_pernet_subsys(&ipvs_core_ops); /* free ip_vs struct */
return ret;
}
static void __exit ip_vs_cleanup(void)
{
nf_unregister_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
+ ip_vs_sync_cleanup();
ip_vs_conn_cleanup();
ip_vs_app_cleanup();
ip_vs_protocol_cleanup();
ip_vs_control_cleanup();
ip_vs_estimator_cleanup();
+ unregister_pernet_subsys(&ipvs_core_ops); /* free ip_vs struct */
pr_info("ipvs unloaded.\n");
}
^ permalink raw reply [flat|nested] 14+ messages in thread* [rfc v2 05/10] ipvs: Add ipvs_skbnet
2010-10-22 20:09 [rfc v2 00/10] ipvs network name space (netns) aware Simon Horman
` (3 preceding siblings ...)
2010-10-22 20:09 ` [rfc v2 04/10] ipvs network name space aware: core Simon Horman
@ 2010-10-22 20:09 ` Simon Horman
2010-10-22 20:09 ` [rfc v2 06/10] ipvs network name space aware: ctl Simon Horman
` (6 subsequent siblings)
11 siblings, 0 replies; 14+ messages in thread
From: Simon Horman @ 2010-10-22 20:09 UTC (permalink / raw)
To: lvs-devel, netdev, netfilter-devel
Cc: Hans Schillstrom, Julian Anastasov, Daniel Lezcano, Wensong Zhang
[-- Attachment #1: ipvs_skbnet.patch --]
[-- Type: text/plain, Size: 648 bytes --]
As suggested by Julian Anastasov
Cc: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
Index: lvs-test-2.6/include/net/ip_vs.h
===================================================================
--- lvs-test-2.6.orig/include/net/ip_vs.h 2010-10-22 21:16:52.000000000 +0200
+++ lvs-test-2.6/include/net/ip_vs.h 2010-10-22 21:16:56.000000000 +0200
@@ -1097,6 +1097,11 @@ static inline void ip_vs_conn_drop_connt
/* CONFIG_IP_VS_NFCT */
#endif
+static inline struct net *ipvs_skbnet(struct sk_buff *skb)
+{
+ return dev_net(skb->dev ? : skb_dst(skb)->dev);
+}
+
#endif /* __KERNEL__ */
#endif /* _NET_IP_VS_H */
^ permalink raw reply [flat|nested] 14+ messages in thread* [rfc v2 06/10] ipvs network name space aware: ctl
2010-10-22 20:09 [rfc v2 00/10] ipvs network name space (netns) aware Simon Horman
` (4 preceding siblings ...)
2010-10-22 20:09 ` [rfc v2 05/10] ipvs: Add ipvs_skbnet Simon Horman
@ 2010-10-22 20:09 ` Simon Horman
2010-10-22 20:09 ` [rfc v2 07/10] ipvs network name space aware: est Simon Horman
` (5 subsequent siblings)
11 siblings, 0 replies; 14+ messages in thread
From: Simon Horman @ 2010-10-22 20:09 UTC (permalink / raw)
To: lvs-devel, netdev, netfilter-devel
Cc: Hans Schillstrom, Julian Anastasov, Daniel Lezcano, Wensong Zhang
[-- Attachment #1: ipvs-netns-5.patch --]
[-- Type: text/plain, Size: 59289 bytes --]
This patch just contains ip_vs_ctl
Signed-off-by:Hans Schillstrom <hans.schillstrom@ericsson.com>
Index: lvs-test-2.6/net/netfilter/ipvs/ip_vs_ctl.c
===================================================================
--- lvs-test-2.6.orig/net/netfilter/ipvs/ip_vs_ctl.c 2010-10-22 21:38:58.000000000 +0200
+++ lvs-test-2.6/net/netfilter/ipvs/ip_vs_ctl.c 2010-10-22 21:44:35.000000000 +0200
@@ -38,6 +38,7 @@
#include <linux/mutex.h>
#include <net/net_namespace.h>
+#include <linux/nsproxy.h>
#include <net/ip.h>
#ifdef CONFIG_IP_VS_IPV6
#include <net/ipv6.h>
@@ -77,23 +78,6 @@ static atomic_t ip_vs_dropentry = ATOMIC
/* number of virtual services */
static int ip_vs_num_services = 0;
-/* sysctl variables */
-static int sysctl_ip_vs_drop_entry = 0;
-static int sysctl_ip_vs_drop_packet = 0;
-static int sysctl_ip_vs_secure_tcp = 0;
-static int sysctl_ip_vs_amemthresh = 1024;
-static int sysctl_ip_vs_am_droprate = 10;
-int sysctl_ip_vs_cache_bypass = 0;
-int sysctl_ip_vs_expire_nodest_conn = 0;
-int sysctl_ip_vs_expire_quiescent_template = 0;
-int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
-int sysctl_ip_vs_nat_icmp_send = 0;
-#ifdef CONFIG_IP_VS_NFCT
-int sysctl_ip_vs_conntrack;
-#endif
-int sysctl_ip_vs_snat_reroute = 1;
-
-
#ifdef CONFIG_IP_VS_DEBUG
static int sysctl_ip_vs_debug_level = 0;
@@ -105,7 +89,8 @@ int ip_vs_get_debug_level(void)
#ifdef CONFIG_IP_VS_IPV6
/* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
-static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
+static int __ip_vs_addr_is_local_v6(struct net *net,
+ const struct in6_addr *addr)
{
struct rt6_info *rt;
struct flowi fl = {
@@ -116,7 +101,7 @@ static int __ip_vs_addr_is_local_v6(cons
.saddr = { .s6_addr32 = {0, 0, 0, 0} }, } },
};
- rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
+ rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl);
if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
return 1;
@@ -127,8 +112,9 @@ static int __ip_vs_addr_is_local_v6(cons
* update_defense_level is called from keventd and from sysctl,
* so it needs to protect itself from softirqs
*/
-static void update_defense_level(void)
+static void update_defense_level(struct net *net)
{
+ struct netns_ipvs *ipvs = net->ipvs;
struct sysinfo i;
static int old_secure_tcp = 0;
int availmem;
@@ -143,20 +129,20 @@ static void update_defense_level(void)
/* si_swapinfo(&i); */
/* availmem = availmem - (i.totalswap - i.freeswap); */
- nomem = (availmem < sysctl_ip_vs_amemthresh);
+ nomem = (availmem < ipvs->sysctl_amemthresh);
local_bh_disable();
/* drop_entry */
spin_lock(&__ip_vs_dropentry_lock);
- switch (sysctl_ip_vs_drop_entry) {
+ switch (ipvs->sysctl_drop_entry) {
case 0:
atomic_set(&ip_vs_dropentry, 0);
break;
case 1:
if (nomem) {
atomic_set(&ip_vs_dropentry, 1);
- sysctl_ip_vs_drop_entry = 2;
+ ipvs->sysctl_drop_entry = 2;
} else {
atomic_set(&ip_vs_dropentry, 0);
}
@@ -166,7 +152,7 @@ static void update_defense_level(void)
atomic_set(&ip_vs_dropentry, 1);
} else {
atomic_set(&ip_vs_dropentry, 0);
- sysctl_ip_vs_drop_entry = 1;
+ ipvs->sysctl_drop_entry = 1;
};
break;
case 3:
@@ -177,16 +163,16 @@ static void update_defense_level(void)
/* drop_packet */
spin_lock(&__ip_vs_droppacket_lock);
- switch (sysctl_ip_vs_drop_packet) {
+ switch (ipvs->sysctl_drop_packet) {
case 0:
ip_vs_drop_rate = 0;
break;
case 1:
if (nomem) {
ip_vs_drop_rate = ip_vs_drop_counter
- = sysctl_ip_vs_amemthresh /
- (sysctl_ip_vs_amemthresh-availmem);
- sysctl_ip_vs_drop_packet = 2;
+ = ipvs->sysctl_amemthresh /
+ (ipvs->sysctl_amemthresh-availmem);
+ ipvs->sysctl_drop_packet = 2;
} else {
ip_vs_drop_rate = 0;
}
@@ -194,22 +180,22 @@ static void update_defense_level(void)
case 2:
if (nomem) {
ip_vs_drop_rate = ip_vs_drop_counter
- = sysctl_ip_vs_amemthresh /
- (sysctl_ip_vs_amemthresh-availmem);
+ = ipvs->sysctl_amemthresh /
+ (ipvs->sysctl_amemthresh-availmem);
} else {
ip_vs_drop_rate = 0;
- sysctl_ip_vs_drop_packet = 1;
+ ipvs->sysctl_drop_packet = 1;
}
break;
case 3:
- ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
+ ip_vs_drop_rate = ipvs->sysctl_am_droprate;
break;
}
spin_unlock(&__ip_vs_droppacket_lock);
/* secure_tcp */
spin_lock(&ip_vs_securetcp_lock);
- switch (sysctl_ip_vs_secure_tcp) {
+ switch (ipvs->sysctl_secure_tcp) {
case 0:
if (old_secure_tcp >= 2)
to_change = 0;
@@ -218,7 +204,7 @@ static void update_defense_level(void)
if (nomem) {
if (old_secure_tcp < 2)
to_change = 1;
- sysctl_ip_vs_secure_tcp = 2;
+ ipvs->sysctl_secure_tcp = 2;
} else {
if (old_secure_tcp >= 2)
to_change = 0;
@@ -231,7 +217,7 @@ static void update_defense_level(void)
} else {
if (old_secure_tcp >= 2)
to_change = 0;
- sysctl_ip_vs_secure_tcp = 1;
+ ipvs->sysctl_secure_tcp = 1;
}
break;
case 3:
@@ -239,9 +225,9 @@ static void update_defense_level(void)
to_change = 1;
break;
}
- old_secure_tcp = sysctl_ip_vs_secure_tcp;
+ old_secure_tcp = ipvs->sysctl_secure_tcp;
if (to_change >= 0)
- ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);
+ ip_vs_protocol_timeout_change(ipvs->sysctl_secure_tcp>1);
spin_unlock(&ip_vs_securetcp_lock);
local_bh_enable();
@@ -257,9 +243,16 @@ static DECLARE_DELAYED_WORK(defense_work
static void defense_work_handler(struct work_struct *work)
{
- update_defense_level();
- if (atomic_read(&ip_vs_dropentry))
- ip_vs_random_dropentry();
+ struct net *net;
+
+ for_each_net(net)
+ update_defense_level(net);
+
+ if (atomic_read(&ip_vs_dropentry)) {
+ /* Should another sched period be used to reduce peak load ?*/
+ for_each_net(net)
+ ip_vs_random_dropentry(net);
+ }
schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
}
@@ -276,40 +269,6 @@ ip_vs_use_count_dec(void)
module_put(THIS_MODULE);
}
-
-/*
- * Hash table: for virtual service lookups
- */
-#define IP_VS_SVC_TAB_BITS 8
-#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
-#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
-
-/* the service table hashed by <protocol, addr, port> */
-static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
-/* the service table hashed by fwmark */
-static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
-
-/*
- * Hash table: for real service lookups
- */
-#define IP_VS_RTAB_BITS 4
-#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
-#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
-
-static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
-
-/*
- * Trash for destinations
- */
-static LIST_HEAD(ip_vs_dest_trash);
-
-/*
- * FTP & NULL virtual service counters
- */
-static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
-static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
-
-
/*
* Returns hash value for virtual service
*/
@@ -340,10 +299,10 @@ static __inline__ unsigned ip_vs_svc_fwm
/*
* Hashes a service in the ip_vs_svc_table by <proto,addr,port>
- * or in the ip_vs_svc_fwm_table by fwmark.
+ * or in the net->ipvs->ctl_fwm_table by fwmark.
* Should be called with locked tables.
*/
-static int ip_vs_svc_hash(struct ip_vs_service *svc)
+static int ip_vs_svc_hash(struct net *net, struct ip_vs_service *svc)
{
unsigned hash;
@@ -359,13 +318,13 @@ static int ip_vs_svc_hash(struct ip_vs_s
*/
hash = ip_vs_svc_hashkey(svc->af, svc->protocol, &svc->addr,
svc->port);
- list_add(&svc->s_list, &ip_vs_svc_table[hash]);
+ list_add(&svc->s_list, &net->ipvs->ctl_svc_table[hash]);
} else {
/*
- * Hash it by fwmark in ip_vs_svc_fwm_table
+ * Hash it by fwmark in net->ipvs->ctl_fwm_table
*/
hash = ip_vs_svc_fwm_hashkey(svc->fwmark);
- list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
+ list_add(&svc->f_list, &net->ipvs->ctl_fwm_table[hash]);
}
svc->flags |= IP_VS_SVC_F_HASHED;
@@ -376,7 +335,7 @@ static int ip_vs_svc_hash(struct ip_vs_s
/*
- * Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table.
+ * Unhashes a service from net->ipvs->ctl_svc_table/net->ipvs->ctl_fwm_table.
* Should be called with locked tables.
*/
static int ip_vs_svc_unhash(struct ip_vs_service *svc)
@@ -388,10 +347,10 @@ static int ip_vs_svc_unhash(struct ip_vs
}
if (svc->fwmark == 0) {
- /* Remove it from the ip_vs_svc_table table */
+ /* Remove it from the net->ipvs->ctl_svc_table table */
list_del(&svc->s_list);
} else {
- /* Remove it from the ip_vs_svc_fwm_table table */
+ /* Remove it from the net->ipvs->ctl_fwm_table table */
list_del(&svc->f_list);
}
@@ -405,16 +364,17 @@ static int ip_vs_svc_unhash(struct ip_vs
* Get service by {proto,addr,port} in the service table.
*/
static inline struct ip_vs_service *
-__ip_vs_service_find(int af, __u16 protocol, const union nf_inet_addr *vaddr,
- __be16 vport)
+__ip_vs_service_find(struct net *net, int af, __u16 protocol,
+ const union nf_inet_addr *vaddr, __be16 vport)
{
unsigned hash;
struct ip_vs_service *svc;
+ struct netns_ipvs *ipvs = net->ipvs;
/* Check for "full" addressed entries */
hash = ip_vs_svc_hashkey(af, protocol, vaddr, vport);
- list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
+ list_for_each_entry(svc, &ipvs->ctl_fwm_table[hash], s_list){
if ((svc->af == af)
&& ip_vs_addr_equal(af, &svc->addr, vaddr)
&& (svc->port == vport)
@@ -432,15 +392,16 @@ __ip_vs_service_find(int af, __u16 proto
* Get service by {fwmark} in the service table.
*/
static inline struct ip_vs_service *
-__ip_vs_svc_fwm_find(int af, __u32 fwmark)
+__ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
{
unsigned hash;
struct ip_vs_service *svc;
+ struct netns_ipvs *ipvs = net->ipvs;
/* Check for fwmark addressed entries */
hash = ip_vs_svc_fwm_hashkey(fwmark);
- list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
+ list_for_each_entry(svc, &ipvs->ctl_fwm_table[hash], f_list) {
if (svc->fwmark == fwmark && svc->af == af) {
/* HIT */
return svc;
@@ -451,7 +412,7 @@ __ip_vs_svc_fwm_find(int af, __u32 fwmar
}
struct ip_vs_service *
-ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
+ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
const union nf_inet_addr *vaddr, __be16 vport)
{
struct ip_vs_service *svc;
@@ -461,32 +422,33 @@ ip_vs_service_get(int af, __u32 fwmark,
/*
* Check the table hashed by fwmark first
*/
- if (fwmark && (svc = __ip_vs_svc_fwm_find(af, fwmark)))
+ if (fwmark && (svc = __ip_vs_svc_fwm_find(net, af, fwmark)))
goto out;
/*
* Check the table hashed by <protocol,addr,port>
* for "full" addressed entries
*/
- svc = __ip_vs_service_find(af, protocol, vaddr, vport);
+ svc = __ip_vs_service_find(net, af, protocol, vaddr, vport);
if (svc == NULL
&& protocol == IPPROTO_TCP
- && atomic_read(&ip_vs_ftpsvc_counter)
+ && atomic_read(&net->ipvs->ctl_ftpsvc_counter)
&& (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
/*
* Check if ftp service entry exists, the packet
* might belong to FTP data connections.
*/
- svc = __ip_vs_service_find(af, protocol, vaddr, FTPPORT);
+ svc = __ip_vs_service_find(net, af, protocol, vaddr,
+ FTPPORT);
}
if (svc == NULL
- && atomic_read(&ip_vs_nullsvc_counter)) {
+ && atomic_read(&net->ipvs->ctl_nullsvc_counter)) {
/*
* Check if the catch-all port (port zero) exists
*/
- svc = __ip_vs_service_find(af, protocol, vaddr, 0);
+ svc = __ip_vs_service_find(net, af, protocol, vaddr, 0);
}
out:
@@ -547,10 +509,10 @@ static inline unsigned ip_vs_rs_hashkey(
}
/*
- * Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>.
+ * Hashes ip_vs_dest in net->ipvs->ctl_rtable by <proto,addr,port>.
* should be called with locked tables.
*/
-static int ip_vs_rs_hash(struct ip_vs_dest *dest)
+static int ip_vs_rs_hash(struct net *net, struct ip_vs_dest *dest)
{
unsigned hash;
@@ -564,19 +526,19 @@ static int ip_vs_rs_hash(struct ip_vs_de
*/
hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
- list_add(&dest->d_list, &ip_vs_rtable[hash]);
+ list_add(&dest->d_list, &net->ipvs->ctl_rtable[hash]);
return 1;
}
/*
- * UNhashes ip_vs_dest from ip_vs_rtable.
+ * UNhashes ip_vs_dest from net->ipvs->ctl_rtable.
* should be called with locked tables.
*/
static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
{
/*
- * Remove it from the ip_vs_rtable table.
+ * Remove it from the net->ipvs->ctl_rtable table.
*/
if (!list_empty(&dest->d_list)) {
list_del(&dest->d_list);
@@ -590,12 +552,13 @@ static int ip_vs_rs_unhash(struct ip_vs_
* Lookup real service by <proto,addr,port> in the real service table.
*/
struct ip_vs_dest *
-ip_vs_lookup_real_service(int af, __u16 protocol,
+ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
const union nf_inet_addr *daddr,
__be16 dport)
{
unsigned hash;
struct ip_vs_dest *dest;
+ struct netns_ipvs *ipvs = net->ipvs;
/*
* Check for "full" addressed entries
@@ -604,7 +567,7 @@ ip_vs_lookup_real_service(int af, __u16
hash = ip_vs_rs_hashkey(af, daddr, dport);
read_lock(&__ip_vs_rs_lock);
- list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) {
+ list_for_each_entry(dest, &ipvs->ctl_rtable[hash], d_list) {
if ((dest->af == af)
&& ip_vs_addr_equal(af, &dest->addr, daddr)
&& (dest->port == dport)
@@ -654,15 +617,15 @@ ip_vs_lookup_dest(struct ip_vs_service *
* ip_vs_lookup_real_service() looked promissing, but
* seems not working as expected.
*/
-struct ip_vs_dest *ip_vs_find_dest(int af, const union nf_inet_addr *daddr,
- __be16 dport,
- const union nf_inet_addr *vaddr,
- __be16 vport, __u16 protocol)
+struct ip_vs_dest *
+ip_vs_find_dest(struct net *net, int af,
+ const union nf_inet_addr *daddr, __be16 dport,
+ const union nf_inet_addr *vaddr, __be16 vport, __u16 protocol)
{
struct ip_vs_dest *dest;
struct ip_vs_service *svc;
- svc = ip_vs_service_get(af, 0, protocol, vaddr, vport);
+ svc = ip_vs_service_get(net, af, 0, protocol, vaddr, vport);
if (!svc)
return NULL;
dest = ip_vs_lookup_dest(svc, daddr, dport);
@@ -683,15 +646,16 @@ struct ip_vs_dest *ip_vs_find_dest(int a
* scheduling.
*/
static struct ip_vs_dest *
-ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
- __be16 dport)
+ip_vs_trash_get_dest(struct net *net, struct ip_vs_service *svc,
+ const union nf_inet_addr *daddr, __be16 dport)
{
struct ip_vs_dest *dest, *nxt;
+ struct netns_ipvs *ipvs = net->ipvs;
/*
* Find the destination in trash
*/
- list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
+ list_for_each_entry_safe(dest, nxt, &ipvs->ctl_dest_trash, n_list) {
IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
"dest->refcnt=%d\n",
dest->vfwmark,
@@ -739,11 +703,12 @@ ip_vs_trash_get_dest(struct ip_vs_servic
* are expired, and the refcnt of each destination in the trash must
* be 1, so we simply release them here.
*/
-static void ip_vs_trash_cleanup(void)
+static void ip_vs_trash_cleanup(struct net *net)
{
struct ip_vs_dest *dest, *nxt;
+ struct netns_ipvs *ipvs = net->ipvs;
- list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
+ list_for_each_entry_safe(dest, nxt, &ipvs->ctl_dest_trash, n_list) {
list_del(&dest->n_list);
ip_vs_dst_reset(dest);
__ip_vs_unbind_svc(dest);
@@ -752,8 +717,7 @@ static void ip_vs_trash_cleanup(void)
}
-static void
-ip_vs_zero_stats(struct ip_vs_stats *stats)
+static void ip_vs_zero_stats(struct ip_vs_stats *stats)
{
spin_lock_bh(&stats->lock);
@@ -767,7 +731,8 @@ ip_vs_zero_stats(struct ip_vs_stats *sta
* Update a destination in the given service
*/
static void
-__ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
+__ip_vs_update_dest(struct net *net, struct ip_vs_service *svc,
+ struct ip_vs_dest *dest,
struct ip_vs_dest_user_kern *udest, int add)
{
int conn_flags;
@@ -782,11 +747,11 @@ __ip_vs_update_dest(struct ip_vs_service
conn_flags |= IP_VS_CONN_F_NOOUTPUT;
} else {
/*
- * Put the real service in ip_vs_rtable if not present.
- * For now only for NAT!
+ * Put the real service in net->ipvs->ctl_rtable if not present.
+ * For now only for NAT!
*/
write_lock_bh(&__ip_vs_rs_lock);
- ip_vs_rs_hash(dest);
+ ip_vs_rs_hash(net, dest);
write_unlock_bh(&__ip_vs_rs_lock);
}
atomic_set(&dest->conn_flags, conn_flags);
@@ -815,7 +780,7 @@ __ip_vs_update_dest(struct ip_vs_service
spin_unlock(&dest->dst_lock);
if (add)
- ip_vs_new_estimator(&dest->stats);
+ ip_vs_new_estimator(net, &dest->stats);
write_lock_bh(&__ip_vs_svc_lock);
@@ -839,8 +804,8 @@ __ip_vs_update_dest(struct ip_vs_service
* Create a destination for the given service
*/
static int
-ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
- struct ip_vs_dest **dest_p)
+ip_vs_new_dest(struct net *net, struct ip_vs_service *svc,
+ struct ip_vs_dest_user_kern *udest, struct ip_vs_dest **dest_p)
{
struct ip_vs_dest *dest;
unsigned atype;
@@ -852,12 +817,12 @@ ip_vs_new_dest(struct ip_vs_service *svc
atype = ipv6_addr_type(&udest->addr.in6);
if ((!(atype & IPV6_ADDR_UNICAST) ||
atype & IPV6_ADDR_LINKLOCAL) &&
- !__ip_vs_addr_is_local_v6(&udest->addr.in6))
+ !__ip_vs_addr_is_local_v6(net, &udest->addr.in6))
return -EINVAL;
} else
#endif
{
- atype = inet_addr_type(&init_net, udest->addr.ip);
+ atype = inet_addr_type(net, udest->addr.ip);
if (atype != RTN_LOCAL && atype != RTN_UNICAST)
return -EINVAL;
}
@@ -884,7 +849,7 @@ ip_vs_new_dest(struct ip_vs_service *svc
INIT_LIST_HEAD(&dest->d_list);
spin_lock_init(&dest->dst_lock);
spin_lock_init(&dest->stats.lock);
- __ip_vs_update_dest(svc, dest, udest, 1);
+ __ip_vs_update_dest(net, svc, dest, udest, 1);
*dest_p = dest;
@@ -896,8 +861,8 @@ ip_vs_new_dest(struct ip_vs_service *svc
/*
* Add a destination into an existing service
*/
-static int
-ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
+static int ip_vs_add_dest(struct net *net, struct ip_vs_service *svc,
+ struct ip_vs_dest_user_kern *udest)
{
struct ip_vs_dest *dest;
union nf_inet_addr daddr;
@@ -933,7 +898,7 @@ ip_vs_add_dest(struct ip_vs_service *svc
* Check if the dest already exists in the trash and
* is from the same service
*/
- dest = ip_vs_trash_get_dest(svc, &daddr, dport);
+ dest = ip_vs_trash_get_dest(net, svc, &daddr, dport);
if (dest != NULL) {
IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
@@ -949,13 +914,13 @@ ip_vs_add_dest(struct ip_vs_service *svc
*/
list_del(&dest->n_list);
- __ip_vs_update_dest(svc, dest, udest, 1);
+ __ip_vs_update_dest(net, svc, dest, udest, 1);
ret = 0;
} else {
/*
* Allocate and initialize the dest structure
*/
- ret = ip_vs_new_dest(svc, udest, &dest);
+ ret = ip_vs_new_dest(net, svc, udest, &dest);
}
LeaveFunction(2);
@@ -966,8 +931,8 @@ ip_vs_add_dest(struct ip_vs_service *svc
/*
* Edit a destination in the given service
*/
-static int
-ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
+static int ip_vs_edit_dest(struct net *net, struct ip_vs_service *svc,
+ struct ip_vs_dest_user_kern *udest)
{
struct ip_vs_dest *dest;
union nf_inet_addr daddr;
@@ -998,7 +963,7 @@ ip_vs_edit_dest(struct ip_vs_service *sv
return -ENOENT;
}
- __ip_vs_update_dest(svc, dest, udest, 0);
+ __ip_vs_update_dest(net, svc, dest, udest, 0);
LeaveFunction(2);
return 0;
@@ -1008,9 +973,9 @@ ip_vs_edit_dest(struct ip_vs_service *sv
/*
* Delete a destination (must be already unlinked from the service)
*/
-static void __ip_vs_del_dest(struct ip_vs_dest *dest)
+static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)
{
- ip_vs_kill_estimator(&dest->stats);
+ ip_vs_kill_estimator(net, &dest->stats);
/*
* Remove it from the d-linked list with the real services.
@@ -1043,7 +1008,7 @@ static void __ip_vs_del_dest(struct ip_v
IP_VS_DBG_ADDR(dest->af, &dest->addr),
ntohs(dest->port),
atomic_read(&dest->refcnt));
- list_add(&dest->n_list, &ip_vs_dest_trash);
+ list_add(&dest->n_list, &net->ipvs->ctl_dest_trash);
atomic_inc(&dest->refcnt);
}
}
@@ -1075,8 +1040,8 @@ static void __ip_vs_unlink_dest(struct i
/*
* Delete a destination server in the given service
*/
-static int
-ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
+static int ip_vs_del_dest(struct net *net, struct ip_vs_service *svc,
+ struct ip_vs_dest_user_kern *udest)
{
struct ip_vs_dest *dest;
__be16 dport = udest->port;
@@ -1107,7 +1072,7 @@ ip_vs_del_dest(struct ip_vs_service *svc
/*
* Delete the destination
*/
- __ip_vs_del_dest(dest);
+ __ip_vs_del_dest(net, dest);
LeaveFunction(2);
@@ -1119,7 +1084,7 @@ ip_vs_del_dest(struct ip_vs_service *svc
* Add a service into the service hash table
*/
static int
-ip_vs_add_service(struct ip_vs_service_user_kern *u,
+ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
struct ip_vs_service **svc_p)
{
int ret = 0;
@@ -1191,11 +1156,11 @@ ip_vs_add_service(struct ip_vs_service_u
/* Update the virtual service counters */
if (svc->port == FTPPORT)
- atomic_inc(&ip_vs_ftpsvc_counter);
+ atomic_inc(&net->ipvs->ctl_ftpsvc_counter);
else if (svc->port == 0)
- atomic_inc(&ip_vs_nullsvc_counter);
+ atomic_inc(&net->ipvs->ctl_nullsvc_counter);
- ip_vs_new_estimator(&svc->stats);
+ ip_vs_new_estimator(net, &svc->stats);
/* Count only IPv4 services for old get/setsockopt interface */
if (svc->af == AF_INET)
@@ -1203,7 +1168,7 @@ ip_vs_add_service(struct ip_vs_service_u
/* Hash the service into the service table */
write_lock_bh(&__ip_vs_svc_lock);
- ip_vs_svc_hash(svc);
+ ip_vs_svc_hash(net, svc);
write_unlock_bh(&__ip_vs_svc_lock);
*svc_p = svc;
@@ -1331,7 +1296,7 @@ ip_vs_edit_service(struct ip_vs_service
* - The service must be unlinked, unlocked and not referenced!
* - We are called under _bh lock
*/
-static void __ip_vs_del_service(struct ip_vs_service *svc)
+static void __ip_vs_del_service(struct net *net, struct ip_vs_service *svc)
{
struct ip_vs_dest *dest, *nxt;
struct ip_vs_scheduler *old_sched;
@@ -1343,7 +1308,7 @@ static void __ip_vs_del_service(struct i
if (svc->af == AF_INET)
ip_vs_num_services--;
- ip_vs_kill_estimator(&svc->stats);
+ ip_vs_kill_estimator(net, &svc->stats);
/* Unbind scheduler */
old_sched = svc->scheduler;
@@ -1366,16 +1331,16 @@ static void __ip_vs_del_service(struct i
*/
list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
__ip_vs_unlink_dest(svc, dest, 0);
- __ip_vs_del_dest(dest);
+ __ip_vs_del_dest(net, dest);
}
/*
* Update the virtual service counters
*/
if (svc->port == FTPPORT)
- atomic_dec(&ip_vs_ftpsvc_counter);
+ atomic_dec(&net->ipvs->ctl_ftpsvc_counter);
else if (svc->port == 0)
- atomic_dec(&ip_vs_nullsvc_counter);
+ atomic_dec(&net->ipvs->ctl_nullsvc_counter);
/*
* Free the service if nobody refers to it
@@ -1395,7 +1360,7 @@ static void __ip_vs_del_service(struct i
/*
* Unlink a service from list and try to delete it if its refcnt reached 0
*/
-static void ip_vs_unlink_service(struct ip_vs_service *svc)
+static void ip_vs_unlink_service(struct net *net, struct ip_vs_service *svc)
{
/*
* Unhash it from the service table
@@ -1409,7 +1374,7 @@ static void ip_vs_unlink_service(struct
*/
IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
- __ip_vs_del_service(svc);
+ __ip_vs_del_service(net, svc);
write_unlock_bh(&__ip_vs_svc_lock);
}
@@ -1417,11 +1382,11 @@ static void ip_vs_unlink_service(struct
/*
* Delete a service from the service list
*/
-static int ip_vs_del_service(struct ip_vs_service *svc)
+static int ip_vs_del_service(struct net *net, struct ip_vs_service *svc)
{
if (svc == NULL)
return -EEXIST;
- ip_vs_unlink_service(svc);
+ ip_vs_unlink_service(net, svc);
return 0;
}
@@ -1430,17 +1395,19 @@ static int ip_vs_del_service(struct ip_v
/*
* Flush all the virtual services
*/
-static int ip_vs_flush(void)
+static int ip_vs_flush(struct net *net)
{
int idx;
struct ip_vs_service *svc, *nxt;
+ struct netns_ipvs *ipvs = net->ipvs;
/*
* Flush the service table hashed by <protocol,addr,port>
*/
for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
- list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) {
- ip_vs_unlink_service(svc);
+ list_for_each_entry_safe(svc, nxt,
+ &ipvs->ctl_fwm_table[idx], s_list) {
+ ip_vs_unlink_service(net, svc);
}
}
@@ -1449,8 +1416,8 @@ static int ip_vs_flush(void)
*/
for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
list_for_each_entry_safe(svc, nxt,
- &ip_vs_svc_fwm_table[idx], f_list) {
- ip_vs_unlink_service(svc);
+ &ipvs->ctl_fwm_table[idx], f_list) {
+ ip_vs_unlink_service(net, svc);
}
}
@@ -1474,24 +1441,25 @@ static int ip_vs_zero_service(struct ip_
return 0;
}
-static int ip_vs_zero_all(void)
+static int ip_vs_zero_all(struct net *net)
{
int idx;
struct ip_vs_service *svc;
+ struct netns_ipvs *ipvs = net->ipvs;
for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
- list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
+ list_for_each_entry(svc, &ipvs->ctl_svc_table[idx], s_list) {
ip_vs_zero_service(svc);
}
}
for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
- list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
+ list_for_each_entry(svc, &ipvs->ctl_fwm_table[idx], f_list) {
ip_vs_zero_service(svc);
}
}
- ip_vs_zero_stats(&ip_vs_stats);
+ ip_vs_zero_stats(ipvs->ctl_stats);
return 0;
}
@@ -1500,6 +1468,7 @@ static int
proc_do_defense_mode(ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
+ struct net *net = current->nsproxy->net_ns;
int *valp = table->data;
int val = *valp;
int rc;
@@ -1510,7 +1479,7 @@ proc_do_defense_mode(ctl_table *table, i
/* Restore the correct value */
*valp = val;
} else {
- update_defense_level();
+ update_defense_level(net);
}
}
return rc;
@@ -1539,42 +1508,31 @@ proc_do_sync_threshold(ctl_table *table,
/*
* IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
+ * Do not change order or insert new entries without
+ * align with netns init in __ip_vs_control_init()
*/
static struct ctl_table vs_vars[] = {
{
.procname = "amemthresh",
- .data = &sysctl_ip_vs_amemthresh,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
-#ifdef CONFIG_IP_VS_DEBUG
- {
- .procname = "debug_level",
- .data = &sysctl_ip_vs_debug_level,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
-#endif
{
.procname = "am_droprate",
- .data = &sysctl_ip_vs_am_droprate,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "drop_entry",
- .data = &sysctl_ip_vs_drop_entry,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_do_defense_mode,
},
{
.procname = "drop_packet",
- .data = &sysctl_ip_vs_drop_packet,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_do_defense_mode,
@@ -1582,7 +1540,6 @@ static struct ctl_table vs_vars[] = {
#ifdef CONFIG_IP_VS_NFCT
{
.procname = "conntrack",
- .data = &sysctl_ip_vs_conntrack,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec,
@@ -1590,18 +1547,46 @@ static struct ctl_table vs_vars[] = {
#endif
{
.procname = "secure_tcp",
- .data = &sysctl_ip_vs_secure_tcp,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_do_defense_mode,
},
{
.procname = "snat_reroute",
- .data = &sysctl_ip_vs_snat_reroute,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec,
},
+ {
+ .procname = "cache_bypass",
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "expire_nodest_conn",
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "expire_quiescent_template",
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "sync_threshold",
+ .maxlen = sizeof(int) * 2,
+ .mode = 0644,
+ .proc_handler = proc_do_sync_threshold,
+ },
+ {
+ .procname = "nat_icmp_send",
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
#if 0
{
.procname = "timeout_established",
@@ -1688,41 +1673,15 @@ static struct ctl_table vs_vars[] = {
.proc_handler = proc_dointvec_jiffies,
},
#endif
+#ifdef CONFIG_IP_VS_DEBUG
{
- .procname = "cache_bypass",
- .data = &sysctl_ip_vs_cache_bypass,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "expire_nodest_conn",
- .data = &sysctl_ip_vs_expire_nodest_conn,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "expire_quiescent_template",
- .data = &sysctl_ip_vs_expire_quiescent_template,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "sync_threshold",
- .data = &sysctl_ip_vs_sync_threshold,
- .maxlen = sizeof(sysctl_ip_vs_sync_threshold),
- .mode = 0644,
- .proc_handler = proc_do_sync_threshold,
- },
- {
- .procname = "nat_icmp_send",
- .data = &sysctl_ip_vs_nat_icmp_send,
+ .procname = "debug_level",
+ .data = &sysctl_ip_vs_debug_level,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
+#endif
{ }
};
@@ -1734,11 +1693,10 @@ const struct ctl_path net_vs_ctl_path[]
};
EXPORT_SYMBOL_GPL(net_vs_ctl_path);
-static struct ctl_table_header * sysctl_header;
-
#ifdef CONFIG_PROC_FS
struct ip_vs_iter {
+ struct seq_net_private p; /* Do not move this, netns depends upon it*/
struct list_head *table;
int bucket;
};
@@ -1768,12 +1726,14 @@ static struct ip_vs_service *ip_vs_info_
struct ip_vs_iter *iter = seq->private;
int idx;
struct ip_vs_service *svc;
+ struct net *net = seq_file_net(seq);
+ struct netns_ipvs *ipvs = net->ipvs;
/* look in hash by protocol */
for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
- list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
+ list_for_each_entry(svc, &ipvs->ctl_svc_table[idx], s_list) {
if (pos-- == 0){
- iter->table = ip_vs_svc_table;
+ iter->table = ipvs->ctl_svc_table;
iter->bucket = idx;
return svc;
}
@@ -1782,9 +1742,9 @@ static struct ip_vs_service *ip_vs_info_
/* keep looking in fwmark */
for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
- list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
+ list_for_each_entry(svc, &ipvs->ctl_fwm_table[idx], f_list) {
if (pos-- == 0) {
- iter->table = ip_vs_svc_fwm_table;
+ iter->table = ipvs->ctl_fwm_table;
iter->bucket = idx;
return svc;
}
@@ -1808,6 +1768,8 @@ static void *ip_vs_info_seq_next(struct
struct list_head *e;
struct ip_vs_iter *iter;
struct ip_vs_service *svc;
+ struct net *net = seq_file_net(seq);
+ struct netns_ipvs *ipvs = net->ipvs;
++*pos;
if (v == SEQ_START_TOKEN)
@@ -1816,31 +1778,31 @@ static void *ip_vs_info_seq_next(struct
svc = v;
iter = seq->private;
- if (iter->table == ip_vs_svc_table) {
+ if (iter->table == ipvs->ctl_svc_table) {
/* next service in table hashed by protocol */
- if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
+ if ((e = svc->s_list.next) != &ipvs->ctl_svc_table[iter->bucket])
return list_entry(e, struct ip_vs_service, s_list);
while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
- list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
+ list_for_each_entry(svc, &ipvs->ctl_svc_table[iter->bucket],
s_list) {
return svc;
}
}
- iter->table = ip_vs_svc_fwm_table;
+ iter->table = ipvs->ctl_fwm_table;
iter->bucket = -1;
goto scan_fwmark;
}
/* next service in hashed by fwmark */
- if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
+ if ((e = svc->f_list.next) != &ipvs->ctl_fwm_table[iter->bucket])
return list_entry(e, struct ip_vs_service, f_list);
scan_fwmark:
while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
- list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
+ list_for_each_entry(svc, &ipvs->ctl_fwm_table[iter->bucket],
f_list)
return svc;
}
@@ -1869,8 +1831,9 @@ static int ip_vs_info_seq_show(struct se
const struct ip_vs_service *svc = v;
const struct ip_vs_iter *iter = seq->private;
const struct ip_vs_dest *dest;
+ struct net *net = seq_file_net(seq);
- if (iter->table == ip_vs_svc_table) {
+ if (iter->table == net->ipvs->ctl_svc_table) {
#ifdef CONFIG_IP_VS_IPV6
if (svc->af == AF_INET6)
seq_printf(seq, "%s [%pI6]:%04X %s ",
@@ -1937,7 +1900,7 @@ static const struct seq_operations ip_vs
static int ip_vs_info_open(struct inode *inode, struct file *file)
{
- return seq_open_private(file, &ip_vs_info_seq_ops,
+ return seq_open_net(inode, file, &ip_vs_info_seq_ops,
sizeof(struct ip_vs_iter));
}
@@ -1951,13 +1914,12 @@ static const struct file_operations ip_v
#endif
-struct ip_vs_stats ip_vs_stats = {
- .lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock),
-};
-
#ifdef CONFIG_PROC_FS
static int ip_vs_stats_show(struct seq_file *seq, void *v)
{
+ /* single_open_net returns net in private */
+ struct net *net = (struct net *)seq->private;
+ struct ip_vs_stats *ctl_stats = net->ipvs->ctl_stats;
/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
seq_puts(seq,
@@ -1965,29 +1927,29 @@ static int ip_vs_stats_show(struct seq_f
seq_printf(seq,
" Conns Packets Packets Bytes Bytes\n");
- spin_lock_bh(&ip_vs_stats.lock);
- seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.ustats.conns,
- ip_vs_stats.ustats.inpkts, ip_vs_stats.ustats.outpkts,
- (unsigned long long) ip_vs_stats.ustats.inbytes,
- (unsigned long long) ip_vs_stats.ustats.outbytes);
+ spin_lock_bh(&ctl_stats->lock);
+ seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ctl_stats->ustats.conns,
+ ctl_stats->ustats.inpkts, ctl_stats->ustats.outpkts,
+ (unsigned long long) ctl_stats->ustats.inbytes,
+ (unsigned long long) ctl_stats->ustats.outbytes);
/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
seq_puts(seq,
" Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
seq_printf(seq,"%8X %8X %8X %16X %16X\n",
- ip_vs_stats.ustats.cps,
- ip_vs_stats.ustats.inpps,
- ip_vs_stats.ustats.outpps,
- ip_vs_stats.ustats.inbps,
- ip_vs_stats.ustats.outbps);
- spin_unlock_bh(&ip_vs_stats.lock);
+ ctl_stats->ustats.cps,
+ ctl_stats->ustats.inpps,
+ ctl_stats->ustats.outpps,
+ ctl_stats->ustats.inbps,
+ ctl_stats->ustats.outbps);
+ spin_unlock_bh(&ctl_stats->lock);
return 0;
}
static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
{
- return single_open(file, ip_vs_stats_show, NULL);
+ return single_open_net(inode, file, ip_vs_stats_show);
}
static const struct file_operations ip_vs_stats_fops = {
@@ -1995,7 +1957,7 @@ static const struct file_operations ip_v
.open = ip_vs_stats_seq_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = single_release,
+ .release = single_release_net,
};
#endif
@@ -2003,29 +1965,32 @@ static const struct file_operations ip_v
/*
* Set timeout values for tcp tcpfin udp in the timeout_table.
*/
-static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
+static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u)
{
+ struct ip_vs_proto_data *pd;
IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
u->tcp_timeout,
u->tcp_fin_timeout,
u->udp_timeout);
#ifdef CONFIG_IP_VS_PROTO_TCP
- if (u->tcp_timeout) {
- ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED]
- = u->tcp_timeout * HZ;
+ pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
+ if (u->tcp_timeout && pd) {
+ pd->timeout_table[IP_VS_TCP_S_ESTABLISHED]
+ = u->tcp_timeout * HZ;
}
- if (u->tcp_fin_timeout) {
- ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT]
+ if (u->tcp_fin_timeout && pd) {
+ pd->timeout_table[IP_VS_TCP_S_FIN_WAIT]
= u->tcp_fin_timeout * HZ;
}
#endif
#ifdef CONFIG_IP_VS_PROTO_UDP
if (u->udp_timeout) {
- ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL]
- = u->udp_timeout * HZ;
+ if( (pd = ip_vs_proto_data_get(net, IPPROTO_UDP)) )
+ pd->timeout_table[IP_VS_UDP_S_NORMAL]
+ = u->udp_timeout * HZ;
}
#endif
return 0;
@@ -2096,6 +2061,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cm
struct ip_vs_service *svc;
struct ip_vs_dest_user *udest_compat;
struct ip_vs_dest_user_kern udest;
+ struct net *net = sock_net(sk);
if (!capable(CAP_NET_ADMIN))
return -EPERM;
@@ -2123,19 +2089,19 @@ do_ip_vs_set_ctl(struct sock *sk, int cm
if (cmd == IP_VS_SO_SET_FLUSH) {
/* Flush the virtual service */
- ret = ip_vs_flush();
+ ret = ip_vs_flush(net);
goto out_unlock;
} else if (cmd == IP_VS_SO_SET_TIMEOUT) {
/* Set timeout values for (tcp tcpfin udp) */
- ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg);
+ ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg);
goto out_unlock;
} else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
- ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid);
+ ret = start_sync_thread(net, dm->state, dm->mcast_ifn, dm->syncid);
goto out_unlock;
} else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
- ret = stop_sync_thread(dm->state);
+ ret = stop_sync_thread(net, dm->state);
goto out_unlock;
}
@@ -2150,7 +2116,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cm
if (cmd == IP_VS_SO_SET_ZERO) {
/* if no service address is set, zero counters in all */
if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
- ret = ip_vs_zero_all();
+ ret = ip_vs_zero_all(net);
goto out_unlock;
}
}
@@ -2167,10 +2133,10 @@ do_ip_vs_set_ctl(struct sock *sk, int cm
/* Lookup the exact service by <protocol, addr, port> or fwmark */
if (usvc.fwmark == 0)
- svc = __ip_vs_service_find(usvc.af, usvc.protocol,
+ svc = __ip_vs_service_find(net, usvc.af, usvc.protocol,
&usvc.addr, usvc.port);
else
- svc = __ip_vs_svc_fwm_find(usvc.af, usvc.fwmark);
+ svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark);
if (cmd != IP_VS_SO_SET_ADD
&& (svc == NULL || svc->protocol != usvc.protocol)) {
@@ -2183,13 +2149,13 @@ do_ip_vs_set_ctl(struct sock *sk, int cm
if (svc != NULL)
ret = -EEXIST;
else
- ret = ip_vs_add_service(&usvc, &svc);
+ ret = ip_vs_add_service(net, &usvc, &svc);
break;
case IP_VS_SO_SET_EDIT:
ret = ip_vs_edit_service(svc, &usvc);
break;
case IP_VS_SO_SET_DEL:
- ret = ip_vs_del_service(svc);
+ ret = ip_vs_del_service(net, svc);
if (!ret)
goto out_unlock;
break;
@@ -2197,13 +2163,13 @@ do_ip_vs_set_ctl(struct sock *sk, int cm
ret = ip_vs_zero_service(svc);
break;
case IP_VS_SO_SET_ADDDEST:
- ret = ip_vs_add_dest(svc, &udest);
+ ret = ip_vs_add_dest(net, svc, &udest);
break;
case IP_VS_SO_SET_EDITDEST:
- ret = ip_vs_edit_dest(svc, &udest);
+ ret = ip_vs_edit_dest(net, svc, &udest);
break;
case IP_VS_SO_SET_DELDEST:
- ret = ip_vs_del_dest(svc, &udest);
+ ret = ip_vs_del_dest(net, svc, &udest);
break;
default:
ret = -EINVAL;
@@ -2223,7 +2189,7 @@ static void
ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
{
spin_lock_bh(&src->lock);
- memcpy(dst, &src->ustats, sizeof(*dst));
+ memcpy(dst, &src->ustats, sizeof(struct ip_vs_stats_user));
spin_unlock_bh(&src->lock);
}
@@ -2243,16 +2209,17 @@ ip_vs_copy_service(struct ip_vs_service_
}
static inline int
-__ip_vs_get_service_entries(const struct ip_vs_get_services *get,
+__ip_vs_get_service_entries(struct net *net, const struct ip_vs_get_services *get,
struct ip_vs_get_services __user *uptr)
{
int idx, count=0;
struct ip_vs_service *svc;
struct ip_vs_service_entry entry;
+ struct netns_ipvs *ipvs = net->ipvs;
int ret = 0;
for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
- list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
+ list_for_each_entry(svc, &ipvs->ctl_svc_table[idx], s_list) {
/* Only expose IPv4 entries to old interface */
if (svc->af != AF_INET)
continue;
@@ -2271,7 +2238,7 @@ __ip_vs_get_service_entries(const struct
}
for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
- list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
+ list_for_each_entry(svc, &ipvs->ctl_fwm_table[idx], f_list) {
/* Only expose IPv4 entries to old interface */
if (svc->af != AF_INET)
continue;
@@ -2293,7 +2260,7 @@ __ip_vs_get_service_entries(const struct
}
static inline int
-__ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
+__ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
struct ip_vs_get_dests __user *uptr)
{
struct ip_vs_service *svc;
@@ -2301,9 +2268,9 @@ __ip_vs_get_dest_entries(const struct ip
int ret = 0;
if (get->fwmark)
- svc = __ip_vs_svc_fwm_find(AF_INET, get->fwmark);
+ svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark);
else
- svc = __ip_vs_service_find(AF_INET, get->protocol, &addr,
+ svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr,
get->port);
if (svc) {
@@ -2338,17 +2305,23 @@ __ip_vs_get_dest_entries(const struct ip
}
static inline void
-__ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
+__ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u)
{
+ struct ip_vs_proto_data *pd;
+
#ifdef CONFIG_IP_VS_PROTO_TCP
- u->tcp_timeout =
- ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
- u->tcp_fin_timeout =
- ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
+ pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
+ if (pd) {
+ u->tcp_timeout=pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
+ u->tcp_fin_timeout=pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
+ } else {
+ u->tcp_timeout = 0;
+ u->tcp_fin_timeout = 0;
+ }
#endif
#ifdef CONFIG_IP_VS_PROTO_UDP
- u->udp_timeout =
- ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
+ pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
+ u->udp_timeout = (pd ? pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ : 0);
#endif
}
@@ -2377,6 +2350,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cm
unsigned char arg[128];
int ret = 0;
unsigned int copylen;
+ struct net *net = sock_net(sk);
if (!capable(CAP_NET_ADMIN))
return -EPERM;
@@ -2439,7 +2413,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cm
ret = -EINVAL;
goto out;
}
- ret = __ip_vs_get_service_entries(get, user);
+ ret = __ip_vs_get_service_entries(net, get, user);
}
break;
@@ -2452,9 +2426,11 @@ do_ip_vs_get_ctl(struct sock *sk, int cm
entry = (struct ip_vs_service_entry *)arg;
addr.ip = entry->addr;
if (entry->fwmark)
- svc = __ip_vs_svc_fwm_find(AF_INET, entry->fwmark);
+ svc = __ip_vs_svc_fwm_find(net, AF_INET,
+ entry->fwmark);
else
- svc = __ip_vs_service_find(AF_INET, entry->protocol,
+ svc = __ip_vs_service_find(net, AF_INET,
+ entry->protocol,
&addr, entry->port);
if (svc) {
ip_vs_copy_service(entry, svc);
@@ -2478,7 +2454,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cm
ret = -EINVAL;
goto out;
}
- ret = __ip_vs_get_dest_entries(get, user);
+ ret = __ip_vs_get_dest_entries(net, get, user);
}
break;
@@ -2486,7 +2462,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cm
{
struct ip_vs_timeout_user t;
- __ip_vs_get_timeouts(&t);
+ __ip_vs_get_timeouts(net, &t);
if (copy_to_user(user, &t, sizeof(t)) != 0)
ret = -EFAULT;
}
@@ -2497,15 +2473,15 @@ do_ip_vs_get_ctl(struct sock *sk, int cm
struct ip_vs_daemon_user d[2];
memset(&d, 0, sizeof(d));
- if (ip_vs_sync_state & IP_VS_STATE_MASTER) {
+ if (net->ipvs->sync_state & IP_VS_STATE_MASTER) {
d[0].state = IP_VS_STATE_MASTER;
- strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn));
- d[0].syncid = ip_vs_master_syncid;
+ strlcpy(d[0].mcast_ifn, net->ipvs->master_mcast_ifn, sizeof(d[0].mcast_ifn));
+ d[0].syncid = net->ipvs->master_syncid;
}
- if (ip_vs_sync_state & IP_VS_STATE_BACKUP) {
+ if (net->ipvs->sync_state & IP_VS_STATE_BACKUP) {
d[1].state = IP_VS_STATE_BACKUP;
- strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn));
- d[1].syncid = ip_vs_backup_syncid;
+ strlcpy(d[1].mcast_ifn, net->ipvs->backup_mcast_ifn, sizeof(d[1].mcast_ifn));
+ d[1].syncid = net->ipvs->backup_syncid;
}
if (copy_to_user(user, &d, sizeof(d)) != 0)
ret = -EFAULT;
@@ -2544,6 +2520,7 @@ static struct genl_family ip_vs_genl_fam
.name = IPVS_GENL_NAME,
.version = IPVS_GENL_VERSION,
.maxattr = IPVS_CMD_MAX,
+ .netnsok = true, /* Make ipvsadm to work on netns */
};
/* Policy used for first-level command attributes */
@@ -2698,10 +2675,15 @@ static int ip_vs_genl_dump_services(stru
int idx = 0, i;
int start = cb->args[0];
struct ip_vs_service *svc;
+ struct net *net = ipvs_skbnet(skb);
+ struct netns_ipvs *ipvs;
+ if (!net)
+ net = dev_net(skb->dev);
+ ipvs = net->ipvs;
mutex_lock(&__ip_vs_mutex);
for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
- list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
+ list_for_each_entry(svc, &ipvs->ctl_svc_table[i], s_list) {
if (++idx <= start)
continue;
if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
@@ -2712,7 +2694,7 @@ static int ip_vs_genl_dump_services(stru
}
for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
- list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
+ list_for_each_entry(svc, &net->ipvs->ctl_fwm_table[i], f_list) {
if (++idx <= start)
continue;
if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
@@ -2729,7 +2711,8 @@ nla_put_failure:
return skb->len;
}
-static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
+static int ip_vs_genl_parse_service(struct net *net,
+ struct ip_vs_service_user_kern *usvc,
struct nlattr *nla, int full_entry,
struct ip_vs_service **ret_svc)
{
@@ -2772,9 +2755,9 @@ static int ip_vs_genl_parse_service(stru
}
if (usvc->fwmark)
- svc = __ip_vs_svc_fwm_find(usvc->af, usvc->fwmark);
+ svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark);
else
- svc = __ip_vs_service_find(usvc->af, usvc->protocol,
+ svc = __ip_vs_service_find(net, usvc->af, usvc->protocol,
&usvc->addr, usvc->port);
*ret_svc = svc;
@@ -2811,13 +2794,14 @@ static int ip_vs_genl_parse_service(stru
return 0;
}
-static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
+static struct ip_vs_service *ip_vs_genl_find_service(struct net *net,
+ struct nlattr *nla)
{
struct ip_vs_service_user_kern usvc;
struct ip_vs_service *svc;
int ret;
- ret = ip_vs_genl_parse_service(&usvc, nla, 0, &svc);
+ ret = ip_vs_genl_parse_service(net, &usvc, nla, 0, &svc);
return ret ? ERR_PTR(ret) : svc;
}
@@ -2885,7 +2869,10 @@ static int ip_vs_genl_dump_dests(struct
struct ip_vs_service *svc;
struct ip_vs_dest *dest;
struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
+ struct net *net = ipvs_skbnet(skb);
+ if (!net)
+ net = dev_net(skb->dev);
mutex_lock(&__ip_vs_mutex);
/* Try to find the service for which to dump destinations */
@@ -2893,7 +2880,7 @@ static int ip_vs_genl_dump_dests(struct
IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
goto out_err;
- svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]);
+ svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]);
if (IS_ERR(svc) || svc == NULL)
goto out_err;
@@ -3007,20 +2994,22 @@ nla_put_failure:
static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
struct netlink_callback *cb)
{
+ struct net *net = sock_net(skb->sk);
+
mutex_lock(&__ip_vs_mutex);
- if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
+ if ((net->ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
- ip_vs_master_mcast_ifn,
- ip_vs_master_syncid, cb) < 0)
+ net->ipvs->master_mcast_ifn,
+ net->ipvs->master_syncid, cb) < 0)
goto nla_put_failure;
cb->args[0] = 1;
}
- if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
+ if ((net->ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
- ip_vs_backup_mcast_ifn,
- ip_vs_backup_syncid, cb) < 0)
+ net->ipvs->backup_mcast_ifn,
+ net->ipvs->backup_syncid, cb) < 0)
goto nla_put_failure;
cb->args[1] = 1;
@@ -3032,31 +3021,33 @@ nla_put_failure:
return skb->len;
}
-static int ip_vs_genl_new_daemon(struct nlattr **attrs)
+static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs)
{
if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
return -EINVAL;
- return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
+ return start_sync_thread(net,
+ nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
}
-static int ip_vs_genl_del_daemon(struct nlattr **attrs)
+static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs)
{
if (!attrs[IPVS_DAEMON_ATTR_STATE])
return -EINVAL;
- return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
+ return stop_sync_thread(net,
+ nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
}
-static int ip_vs_genl_set_config(struct nlattr **attrs)
+static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
{
struct ip_vs_timeout_user t;
- __ip_vs_get_timeouts(&t);
+ __ip_vs_get_timeouts(net, &t);
if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
@@ -3068,7 +3059,7 @@ static int ip_vs_genl_set_config(struct
if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
- return ip_vs_set_timeout(&t);
+ return ip_vs_set_timeout(net, &t);
}
static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
@@ -3078,16 +3069,19 @@ static int ip_vs_genl_set_cmd(struct sk_
struct ip_vs_dest_user_kern udest;
int ret = 0, cmd;
int need_full_svc = 0, need_full_dest = 0;
+ struct net *net = ipvs_skbnet(skb);
+ if (!net)
+ net = dev_net(skb->dev);
cmd = info->genlhdr->cmd;
mutex_lock(&__ip_vs_mutex);
if (cmd == IPVS_CMD_FLUSH) {
- ret = ip_vs_flush();
+ ret = ip_vs_flush(net);
goto out;
} else if (cmd == IPVS_CMD_SET_CONFIG) {
- ret = ip_vs_genl_set_config(info->attrs);
+ ret = ip_vs_genl_set_config(net, info->attrs);
goto out;
} else if (cmd == IPVS_CMD_NEW_DAEMON ||
cmd == IPVS_CMD_DEL_DAEMON) {
@@ -3103,13 +3097,13 @@ static int ip_vs_genl_set_cmd(struct sk_
}
if (cmd == IPVS_CMD_NEW_DAEMON)
- ret = ip_vs_genl_new_daemon(daemon_attrs);
+ ret = ip_vs_genl_new_daemon(net, daemon_attrs);
else
- ret = ip_vs_genl_del_daemon(daemon_attrs);
+ ret = ip_vs_genl_del_daemon(net, daemon_attrs);
goto out;
} else if (cmd == IPVS_CMD_ZERO &&
!info->attrs[IPVS_CMD_ATTR_SERVICE]) {
- ret = ip_vs_zero_all();
+ ret = ip_vs_zero_all(net);
goto out;
}
@@ -3119,7 +3113,7 @@ static int ip_vs_genl_set_cmd(struct sk_
if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
need_full_svc = 1;
- ret = ip_vs_genl_parse_service(&usvc,
+ ret = ip_vs_genl_parse_service(net, &usvc,
info->attrs[IPVS_CMD_ATTR_SERVICE],
need_full_svc, &svc);
if (ret)
@@ -3149,7 +3143,7 @@ static int ip_vs_genl_set_cmd(struct sk_
switch (cmd) {
case IPVS_CMD_NEW_SERVICE:
if (svc == NULL)
- ret = ip_vs_add_service(&usvc, &svc);
+ ret = ip_vs_add_service(net, &usvc, &svc);
else
ret = -EEXIST;
break;
@@ -3157,17 +3151,17 @@ static int ip_vs_genl_set_cmd(struct sk_
ret = ip_vs_edit_service(svc, &usvc);
break;
case IPVS_CMD_DEL_SERVICE:
- ret = ip_vs_del_service(svc);
+ ret = ip_vs_del_service(net, svc);
/* do not use svc, it can be freed */
break;
case IPVS_CMD_NEW_DEST:
- ret = ip_vs_add_dest(svc, &udest);
+ ret = ip_vs_add_dest(net, svc, &udest);
break;
case IPVS_CMD_SET_DEST:
- ret = ip_vs_edit_dest(svc, &udest);
+ ret = ip_vs_edit_dest(net, svc, &udest);
break;
case IPVS_CMD_DEL_DEST:
- ret = ip_vs_del_dest(svc, &udest);
+ ret = ip_vs_del_dest(net, svc, &udest);
break;
case IPVS_CMD_ZERO:
ret = ip_vs_zero_service(svc);
@@ -3187,7 +3181,10 @@ static int ip_vs_genl_get_cmd(struct sk_
struct sk_buff *msg;
void *reply;
int ret, cmd, reply_cmd;
+ struct net *net = ipvs_skbnet(skb);
+ if (unlikely(!net))
+ net = dev_net(skb->dev);
cmd = info->genlhdr->cmd;
if (cmd == IPVS_CMD_GET_SERVICE)
@@ -3216,7 +3213,7 @@ static int ip_vs_genl_get_cmd(struct sk_
{
struct ip_vs_service *svc;
- svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]);
+ svc = ip_vs_genl_find_service(net, info->attrs[IPVS_CMD_ATTR_SERVICE]);
if (IS_ERR(svc)) {
ret = PTR_ERR(svc);
goto out_err;
@@ -3236,7 +3233,7 @@ static int ip_vs_genl_get_cmd(struct sk_
{
struct ip_vs_timeout_user t;
- __ip_vs_get_timeouts(&t);
+ __ip_vs_get_timeouts(net, &t);
#ifdef CONFIG_IP_VS_PROTO_TCP
NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
@@ -3382,44 +3379,133 @@ static void ip_vs_genl_unregister(void)
/* End of Generic Netlink interface definitions */
+/*
+ * per netns intit/exit func.
+ */
+int /*__net_init*/ __ip_vs_control_init(struct net *net)
+{
+ int idx;
+ struct netns_ipvs *ipvs = net->ipvs;
+ struct ctl_table *tbl;
+
+ ipvs->ctl_stats=kzalloc(sizeof(struct ip_vs_stats), GFP_KERNEL);
+ if (ipvs->ctl_stats == NULL) {
+ pr_err("%s(): no memory.\n", __func__);
+ return -ENOMEM;
+ }
+
+ proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops);
+ proc_net_fops_create(net, "ip_vs_stats",0, &ip_vs_stats_fops);
+ if (net != &init_net) {
+ tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL);
+ if (tbl == NULL)
+ goto err_dup;
+ } else
+ tbl = vs_vars;
+ /* Initialize sysctl defaults */
+ idx = 0;
+ ipvs->sysctl_amemthresh = 1024;
+ tbl[idx++].data = &ipvs->sysctl_amemthresh;
+ ipvs->sysctl_am_droprate = 10;
+ tbl[idx++].data = &ipvs->sysctl_am_droprate;
+ ipvs->sysctl_drop_entry = 0;
+ tbl[idx++].data = &ipvs->sysctl_drop_entry;
+ ipvs->sysctl_drop_packet = 0;
+ tbl[idx++].data = &ipvs->sysctl_drop_packet;
+#ifdef CONFIG_IP_VS_NFCT
+ tbl[idx++].data = &ipvs->sysctl_conntrack;
+#endif
+ ipvs->sysctl_secure_tcp = 0;
+ tbl[idx++].data = &ipvs->sysctl_secure_tcp;
+ ipvs->sysctl_snat_reroute = 1;
+ tbl[idx++].data = &ipvs->sysctl_snat_reroute;
+ ipvs->sysctl_cache_bypass = 0;
+ tbl[idx++].data = &ipvs->sysctl_cache_bypass;
+ ipvs->sysctl_expire_nodest_conn = 0;
+ tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
+ ipvs->sysctl_expire_quiescent_template = 0;
+ tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
+ ipvs->sysctl_sync_threshold[0] = 3;
+ ipvs->sysctl_sync_threshold[1] = 50;
+ tbl[idx].data = &ipvs->sysctl_sync_threshold;
+ tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
+ ipvs->sysctl_nat_icmp_send = 0;
+ tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
+
+ ipvs->sysctl_hdr = register_net_sysctl_table(net, net_vs_ctl_path, tbl);
+ if (ipvs->sysctl_hdr == NULL)
+ goto err_reg;
+ ipvs->sysctl_tbl = tbl;
+ /* Initialize net->ipvs->ctl_svc_table, net->ipvs->ctl_fwm_table, net->ipvs->ctl_rtable */
+ spin_lock_init(&ipvs->ctl_stats->lock);
+
+ for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
+ INIT_LIST_HEAD(&ipvs->ctl_svc_table[idx]);
+ INIT_LIST_HEAD(&ipvs->ctl_fwm_table[idx]);
+ }
+
+ for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++) {
+ INIT_LIST_HEAD(&ipvs->ctl_rtable[idx]);
+ }
+ INIT_LIST_HEAD(&ipvs->ctl_dest_trash);
+ atomic_set(&ipvs->ctl_ftpsvc_counter, 0);
+ atomic_set(&ipvs->ctl_nullsvc_counter, 0);
+ ip_vs_new_estimator(net, ipvs->ctl_stats);
+ return 0;
+
+err_reg:
+ if (net != &init_net)
+ kfree(tbl);
+err_dup:
+ kfree(ipvs->ctl_stats);
+ return -ENOMEM;
+}
+
+static void __net_exit __ip_vs_control_cleanup(struct net *net)
+{
+ ip_vs_kill_estimator(net, net->ipvs->ctl_stats);
+ unregister_sysctl_table(net->ipvs->sysctl_hdr);
+ proc_net_remove(net, "ip_vs_stats");
+ proc_net_remove(net, "ip_vs");
+ ip_vs_trash_cleanup(net);
+ cancel_rearming_delayed_work(&defense_work);
+ cancel_work_sync(&defense_work.work);
+ kfree(net->ipvs->ctl_stats);
+ if ( net != &init_net )
+ kfree(net->ipvs->sysctl_tbl);
+}
+
+static struct pernet_operations ipvs_control_ops = {
+ .init = __ip_vs_control_init,
+ .exit = __ip_vs_control_cleanup,
+};
int __init ip_vs_control_init(void)
{
int ret;
- int idx;
EnterFunction(2);
- /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
- for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
- INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
- INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
- }
- for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++) {
- INIT_LIST_HEAD(&ip_vs_rtable[idx]);
- }
+ ret = register_pernet_subsys(&ipvs_control_ops);
+ if (ret)
+ return ret;
smp_wmb();
ret = nf_register_sockopt(&ip_vs_sockopts);
if (ret) {
pr_err("cannot register sockopt.\n");
+ unregister_pernet_subsys(&ipvs_control_ops);
return ret;
}
ret = ip_vs_genl_register();
if (ret) {
pr_err("cannot register Generic Netlink interface.\n");
+ unregister_pernet_subsys(&ipvs_control_ops);
nf_unregister_sockopt(&ip_vs_sockopts);
return ret;
}
- proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
- proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
-
- sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars);
-
- ip_vs_new_estimator(&ip_vs_stats);
-
/* Hook the defense timer */
schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
@@ -3431,13 +3517,7 @@ int __init ip_vs_control_init(void)
void ip_vs_control_cleanup(void)
{
EnterFunction(2);
- ip_vs_trash_cleanup();
- cancel_rearming_delayed_work(&defense_work);
- cancel_work_sync(&defense_work.work);
- ip_vs_kill_estimator(&ip_vs_stats);
- unregister_sysctl_table(sysctl_header);
- proc_net_remove(&init_net, "ip_vs_stats");
- proc_net_remove(&init_net, "ip_vs");
+ unregister_pernet_subsys(&ipvs_control_ops);
ip_vs_genl_unregister();
nf_unregister_sockopt(&ip_vs_sockopts);
LeaveFunction(2);
^ permalink raw reply [flat|nested] 14+ messages in thread* [rfc v2 07/10] ipvs network name space aware: est
2010-10-22 20:09 [rfc v2 00/10] ipvs network name space (netns) aware Simon Horman
` (5 preceding siblings ...)
2010-10-22 20:09 ` [rfc v2 06/10] ipvs network name space aware: ctl Simon Horman
@ 2010-10-22 20:09 ` Simon Horman
2010-10-22 20:09 ` [rfc v2 08/10] ipvs network name space aware: ftp Simon Horman
` (4 subsequent siblings)
11 siblings, 0 replies; 14+ messages in thread
From: Simon Horman @ 2010-10-22 20:09 UTC (permalink / raw)
To: lvs-devel, netdev, netfilter-devel
Cc: Hans Schillstrom, Julian Anastasov, Daniel Lezcano, Wensong Zhang
[-- Attachment #1: ipvs-netns-6.patch --]
[-- Type: text/plain, Size: 5906 bytes --]
This patch just contains ip_vs_est.c
There is one estimator i.e not one per netns
When est runs it loops all netns
for_each_net(net) { ... }
Signed-off-by:Hans Schillstrom <hans.schillstrom@ericsson.com>
diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c
index ff28801..e8c185d 100644
--- a/net/netfilter/ipvs/ip_vs_est.c
+++ b/net/netfilter/ipvs/ip_vs_est.c
@@ -8,8 +8,13 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
- * Changes:
+ * Changes: Hans Schillstrom <hans.schillstrom@ericsson.com>
*
+ * Network name space (netns) aware.
+ * Global data moved to netns i.e struct netns_ipvs
+ * Affected data: est_list and est_lock.
+ * estimation_timer() runs with a common timer, but
+ * do update every netns on timeout.
*/
#define KMSG_COMPONENT "IPVS"
@@ -45,13 +50,13 @@
rate is ~2.15Gbits/s, average pps and cps are scaled by 2^10.
* A lot code is taken from net/sched/estimator.c
+
+ * netns: estimation_timer runs every netns
*/
static void estimation_timer(unsigned long arg);
-static LIST_HEAD(est_list);
-static DEFINE_SPINLOCK(est_lock);
static DEFINE_TIMER(est_timer, estimation_timer, 0, 0);
static void estimation_timer(unsigned long arg)
@@ -62,50 +67,55 @@ static void estimation_timer(unsigned long arg)
u32 n_inpkts, n_outpkts;
u64 n_inbytes, n_outbytes;
u32 rate;
-
- spin_lock(&est_lock);
- list_for_each_entry(e, &est_list, list) {
- s = container_of(e, struct ip_vs_stats, est);
-
- spin_lock(&s->lock);
- n_conns = s->ustats.conns;
- n_inpkts = s->ustats.inpkts;
- n_outpkts = s->ustats.outpkts;
- n_inbytes = s->ustats.inbytes;
- n_outbytes = s->ustats.outbytes;
-
- /* scaled by 2^10, but divided 2 seconds */
- rate = (n_conns - e->last_conns)<<9;
- e->last_conns = n_conns;
- e->cps += ((long)rate - (long)e->cps)>>2;
- s->ustats.cps = (e->cps+0x1FF)>>10;
-
- rate = (n_inpkts - e->last_inpkts)<<9;
- e->last_inpkts = n_inpkts;
- e->inpps += ((long)rate - (long)e->inpps)>>2;
- s->ustats.inpps = (e->inpps+0x1FF)>>10;
-
- rate = (n_outpkts - e->last_outpkts)<<9;
- e->last_outpkts = n_outpkts;
- e->outpps += ((long)rate - (long)e->outpps)>>2;
- s->ustats.outpps = (e->outpps+0x1FF)>>10;
-
- rate = (n_inbytes - e->last_inbytes)<<4;
- e->last_inbytes = n_inbytes;
- e->inbps += ((long)rate - (long)e->inbps)>>2;
- s->ustats.inbps = (e->inbps+0xF)>>5;
-
- rate = (n_outbytes - e->last_outbytes)<<4;
- e->last_outbytes = n_outbytes;
- e->outbps += ((long)rate - (long)e->outbps)>>2;
- s->ustats.outbps = (e->outbps+0xF)>>5;
- spin_unlock(&s->lock);
+ struct net *net;
+ struct netns_ipvs *ipvs;
+
+ for_each_net(net) {
+ ipvs = net->ipvs;
+ spin_lock(&ipvs->est_lock);
+ list_for_each_entry(e, &ipvs->est_list, list) {
+ s = container_of(e, struct ip_vs_stats, est);
+
+ spin_lock(&s->lock);
+ n_conns = s->ustats.conns;
+ n_inpkts = s->ustats.inpkts;
+ n_outpkts = s->ustats.outpkts;
+ n_inbytes = s->ustats.inbytes;
+ n_outbytes = s->ustats.outbytes;
+
+ /* scaled by 2^10, but divided 2 seconds */
+ rate = (n_conns - e->last_conns)<<9;
+ e->last_conns = n_conns;
+ e->cps += ((long)rate - (long)e->cps)>>2;
+ s->ustats.cps = (e->cps+0x1FF)>>10;
+
+ rate = (n_inpkts - e->last_inpkts)<<9;
+ e->last_inpkts = n_inpkts;
+ e->inpps += ((long)rate - (long)e->inpps)>>2;
+ s->ustats.inpps = (e->inpps+0x1FF)>>10;
+
+ rate = (n_outpkts - e->last_outpkts)<<9;
+ e->last_outpkts = n_outpkts;
+ e->outpps += ((long)rate - (long)e->outpps)>>2;
+ s->ustats.outpps = (e->outpps+0x1FF)>>10;
+
+ rate = (n_inbytes - e->last_inbytes)<<4;
+ e->last_inbytes = n_inbytes;
+ e->inbps += ((long)rate - (long)e->inbps)>>2;
+ s->ustats.inbps = (e->inbps+0xF)>>5;
+
+ rate = (n_outbytes - e->last_outbytes)<<4;
+ e->last_outbytes = n_outbytes;
+ e->outbps += ((long)rate - (long)e->outbps)>>2;
+ s->ustats.outbps = (e->outbps+0xF)>>5;
+ spin_unlock(&s->lock);
+ }
+ spin_unlock(&ipvs->est_lock);
}
- spin_unlock(&est_lock);
mod_timer(&est_timer, jiffies + 2*HZ);
}
-void ip_vs_new_estimator(struct ip_vs_stats *stats)
+void ip_vs_new_estimator(struct net *net, struct ip_vs_stats *stats)
{
struct ip_vs_estimator *est = &stats->est;
@@ -126,18 +136,18 @@ void ip_vs_new_estimator(struct ip_vs_stats *stats)
est->last_outbytes = stats->ustats.outbytes;
est->outbps = stats->ustats.outbps<<5;
- spin_lock_bh(&est_lock);
- list_add(&est->list, &est_list);
- spin_unlock_bh(&est_lock);
+ spin_lock_bh(&net->ipvs->est_lock);
+ list_add(&est->list, &net->ipvs->est_list);
+ spin_unlock_bh(&net->ipvs->est_lock);
}
-void ip_vs_kill_estimator(struct ip_vs_stats *stats)
+void ip_vs_kill_estimator(struct net *net, struct ip_vs_stats *stats)
{
struct ip_vs_estimator *est = &stats->est;
- spin_lock_bh(&est_lock);
+ spin_lock_bh(&net->ipvs->est_lock);
list_del(&est->list);
- spin_unlock_bh(&est_lock);
+ spin_unlock_bh(&net->ipvs->est_lock);
}
void ip_vs_zero_estimator(struct ip_vs_stats *stats)
@@ -156,14 +166,31 @@ void ip_vs_zero_estimator(struct ip_vs_stats *stats)
est->inbps = 0;
est->outbps = 0;
}
+static int __net_init __ip_vs_estimator_init(struct net *net)
+{
+ INIT_LIST_HEAD(&net->ipvs->est_list);
+ spin_lock_init(&net->ipvs->est_lock);
+ return 0;
+}
+
+static struct pernet_operations ip_vs_app_ops = {
+ .init = __ip_vs_estimator_init,
+// .exit = __ip_vs_estimator_cleanup,
+};
int __init ip_vs_estimator_init(void)
{
+ int rv;
+
+ rv = register_pernet_subsys(&ip_vs_app_ops);
+ if(rv < 0)
+ return rv;
mod_timer(&est_timer, jiffies + 2 * HZ);
- return 0;
+ return rv;
}
void ip_vs_estimator_cleanup(void)
{
del_timer_sync(&est_timer);
+ unregister_pernet_subsys(&ip_vs_app_ops);
}
--
Regards
Hans Schillstrom <hans.schillstrom@ericsson.com>
^ permalink raw reply related [flat|nested] 14+ messages in thread* [rfc v2 08/10] ipvs network name space aware: ftp
2010-10-22 20:09 [rfc v2 00/10] ipvs network name space (netns) aware Simon Horman
` (6 preceding siblings ...)
2010-10-22 20:09 ` [rfc v2 07/10] ipvs network name space aware: est Simon Horman
@ 2010-10-22 20:09 ` Simon Horman
2010-10-22 20:09 ` [rfc v2 09/10] ipvs network name space aware: proto Simon Horman
` (3 subsequent siblings)
11 siblings, 0 replies; 14+ messages in thread
From: Simon Horman @ 2010-10-22 20:09 UTC (permalink / raw)
To: lvs-devel, netdev, netfilter-devel
Cc: Hans Schillstrom, Julian Anastasov, Daniel Lezcano, Wensong Zhang
[-- Attachment #1: ipvs-netns-7.patch --]
[-- Type: text/plain, Size: 4604 bytes --]
This patch just contains ip_vs_ftp.c
minor changes.
Signed-off-by:Hans Schillstrom <hans.schillstrom@ericsson.com>
Index: lvs-test-2.6/net/netfilter/ipvs/ip_vs_ftp.c
===================================================================
--- lvs-test-2.6.orig/net/netfilter/ipvs/ip_vs_ftp.c 2010-10-22 20:28:10.000000000 +0200
+++ lvs-test-2.6/net/netfilter/ipvs/ip_vs_ftp.c 2010-10-22 20:30:58.000000000 +0200
@@ -157,6 +157,7 @@ static int ip_vs_ftp_out(struct ip_vs_ap
int ret = 0;
enum ip_conntrack_info ctinfo;
struct nf_conn *ct;
+ struct net *net = dev_net(skb->dev);
#ifdef CONFIG_IP_VS_IPV6
/* This application helper doesn't work with IPv6 yet,
@@ -197,13 +198,14 @@ static int ip_vs_ftp_out(struct ip_vs_ap
*/
{
struct ip_vs_conn_param p;
- ip_vs_conn_fill_param(AF_INET, iph->protocol,
+ ip_vs_conn_fill_param(net, AF_INET, iph->protocol,
&from, port, &cp->caddr, 0, &p);
n_cp = ip_vs_conn_out_get(&p);
}
if (!n_cp) {
struct ip_vs_conn_param p;
- ip_vs_conn_fill_param(AF_INET, IPPROTO_TCP, &cp->caddr,
+ ip_vs_conn_fill_param(net, AF_INET, IPPROTO_TCP,
+ &cp->caddr,
0, &cp->vaddr, port, &p);
n_cp = ip_vs_conn_new(&p, &from, port,
IP_VS_CONN_F_NO_CPORT |
@@ -258,7 +260,7 @@ static int ip_vs_ftp_out(struct ip_vs_ap
*/
cp->app_data = NULL;
- ip_vs_tcp_conn_listen(n_cp);
+ ip_vs_tcp_conn_listen(net, n_cp);
ip_vs_conn_put(n_cp);
return ret;
}
@@ -287,6 +289,7 @@ static int ip_vs_ftp_in(struct ip_vs_app
union nf_inet_addr to;
__be16 port;
struct ip_vs_conn *n_cp;
+ struct net *net = dev_net(skb->dev);
#ifdef CONFIG_IP_VS_IPV6
/* This application helper doesn't work with IPv6 yet,
@@ -358,7 +361,7 @@ static int ip_vs_ftp_in(struct ip_vs_app
{
struct ip_vs_conn_param p;
- ip_vs_conn_fill_param(AF_INET, iph->protocol, &to, port,
+ ip_vs_conn_fill_param(net, AF_INET, iph->protocol, &to, port,
&cp->vaddr, htons(ntohs(cp->vport)-1),
&p);
n_cp = ip_vs_conn_in_get(&p);
@@ -377,7 +380,7 @@ static int ip_vs_ftp_in(struct ip_vs_app
/*
* Move tunnel to listen state
*/
- ip_vs_tcp_conn_listen(n_cp);
+ ip_vs_tcp_conn_listen(net, n_cp);
ip_vs_conn_put(n_cp);
return 1;
@@ -398,23 +401,22 @@ static struct ip_vs_app ip_vs_ftp = {
.pkt_in = ip_vs_ftp_in,
};
-
/*
- * ip_vs_ftp initialization
+ * per netns ip_vs_ftp initialization
*/
-static int __init ip_vs_ftp_init(void)
+static int __net_init __ip_vs_ftp_init(struct net *net)
{
int i, ret;
struct ip_vs_app *app = &ip_vs_ftp;
- ret = register_ip_vs_app(app);
+ ret = register_ip_vs_app(net, app);
if (ret)
return ret;
for (i=0; i<IP_VS_APP_MAX_PORTS; i++) {
if (!ports[i])
continue;
- ret = register_ip_vs_app_inc(app, app->protocol, ports[i]);
+ ret = register_ip_vs_app_inc(net, app, app->protocol, ports[i]);
if (ret)
break;
pr_info("%s: loaded support on port[%d] = %d\n",
@@ -422,18 +424,39 @@ static int __init ip_vs_ftp_init(void)
}
if (ret)
- unregister_ip_vs_app(app);
+ unregister_ip_vs_app(net, app);
return ret;
}
+/*
+ * netns exit
+ */
+static void __ip_vs_ftp_exit(struct net *net)
+{
+ struct ip_vs_app *app = &ip_vs_ftp;
+
+ unregister_ip_vs_app(net, app);
+}
+
+static struct pernet_operations ip_vs_ftp_ops = {
+ .init = __ip_vs_ftp_init,
+ .exit = __ip_vs_ftp_exit,
+};
+int __init ip_vs_ftp_init(void)
+{
+ int rv;
+
+ rv = register_pernet_subsys(&ip_vs_ftp_ops);
+ return rv;
+}
/*
* ip_vs_ftp finish.
*/
static void __exit ip_vs_ftp_exit(void)
{
- unregister_ip_vs_app(&ip_vs_ftp);
+ unregister_pernet_subsys(&ip_vs_ftp_ops);
}
Index: lvs-test-2.6/net/netfilter/ipvs/ip_vs_nfct.c
===================================================================
--- lvs-test-2.6.orig/net/netfilter/ipvs/ip_vs_nfct.c 2010-10-22 20:28:10.000000000 +0200
+++ lvs-test-2.6/net/netfilter/ipvs/ip_vs_nfct.c 2010-10-22 20:29:28.000000000 +0200
@@ -141,6 +141,7 @@ static void ip_vs_nfct_expect_callback(s
struct nf_conntrack_tuple *orig, new_reply;
struct ip_vs_conn *cp;
struct ip_vs_conn_param p;
+ struct net *net = nf_ct_net(ct);
if (exp->tuple.src.l3num != PF_INET)
return;
@@ -155,7 +156,7 @@ static void ip_vs_nfct_expect_callback(s
/* RS->CLIENT */
orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
- ip_vs_conn_fill_param(exp->tuple.src.l3num, orig->dst.protonum,
+ ip_vs_conn_fill_param(net, exp->tuple.src.l3num, orig->dst.protonum,
&orig->src.u3, orig->src.u.tcp.port,
&orig->dst.u3, orig->dst.u.tcp.port, &p);
cp = ip_vs_conn_out_get(&p);
^ permalink raw reply [flat|nested] 14+ messages in thread* [rfc v2 09/10] ipvs network name space aware: proto
2010-10-22 20:09 [rfc v2 00/10] ipvs network name space (netns) aware Simon Horman
` (7 preceding siblings ...)
2010-10-22 20:09 ` [rfc v2 08/10] ipvs network name space aware: ftp Simon Horman
@ 2010-10-22 20:09 ` Simon Horman
2010-10-22 20:09 ` [rfc v2 10/10] ipvs network name space aware: sync and xmit Simon Horman
` (2 subsequent siblings)
11 siblings, 0 replies; 14+ messages in thread
From: Simon Horman @ 2010-10-22 20:09 UTC (permalink / raw)
To: lvs-devel, netdev, netfilter-devel
Cc: Hans Schillstrom, Julian Anastasov, Daniel Lezcano, Wensong Zhang
[-- Attachment #1: ipvs-netns-8.patch --]
[-- Type: text/plain, Size: 30032 bytes --]
This patch contains all proto files
All timeouts are moved to ipvs struct.
Global "timeout tables" are used as default values only.
Signed-off-by:Hans Schillstrom <hans.schillstrom@ericsson.com>
Index: lvs-test-2.6/net/netfilter/ipvs/ip_vs_proto.c
===================================================================
--- lvs-test-2.6.orig/net/netfilter/ipvs/ip_vs_proto.c 2010-10-22 20:21:44.000000000 +0200
+++ lvs-test-2.6/net/netfilter/ipvs/ip_vs_proto.c 2010-10-22 20:24:53.000000000 +0200
@@ -38,7 +38,6 @@
* ipvs protocol table.
*/
-#define IP_VS_PROTO_TAB_SIZE 32 /* must be power of 2 */
#define IP_VS_PROTO_HASH(proto) ((proto) & (IP_VS_PROTO_TAB_SIZE-1))
static struct ip_vs_protocol *ip_vs_proto_table[IP_VS_PROTO_TAB_SIZE];
@@ -60,6 +59,30 @@ static int __used __init register_ip_vs_
return 0;
}
+/*
+ * register an ipvs protocols netns related data
+ */
+static int
+register_ip_vs_proto_netns(struct net *net, struct ip_vs_protocol *pp )
+{
+ unsigned hash = IP_VS_PROTO_HASH(pp->protocol);
+ struct ip_vs_proto_data *pd =
+ kzalloc(sizeof(struct ip_vs_proto_data), GFP_ATOMIC);
+
+ if (!pd) {
+ pr_err("%s(): no memory.\n", __func__);
+ return -ENOMEM;
+ }
+ pd->pp=pp; /* For speed issues */
+ pd->next = net->ipvs->proto_data_table[hash];
+ net->ipvs->proto_data_table[hash] = pd;
+ atomic_set(&pd->appcnt,0); /* Init app counter */
+
+ if (pp->init_netns != NULL)
+ pp->init_netns(net, pd);
+
+ return 0;
+}
/*
* unregister an ipvs protocol
@@ -81,6 +104,28 @@ static int unregister_ip_vs_protocol(str
return -ESRCH;
}
+/*
+ * unregister an ipvs protocols netns data
+ */
+static int
+unregister_ip_vs_proto_netns(struct net *net, struct ip_vs_proto_data *pd)
+{
+ struct ip_vs_proto_data **pd_p;
+ unsigned hash = IP_VS_PROTO_HASH(pd->pp->protocol);
+
+ pd_p = &net->ipvs->proto_data_table[hash];
+ for (; *pd_p; pd_p = &(*pd_p)->next) {
+ if (*pd_p == pd) {
+ *pd_p = pd->next;
+ if (pd->pp->exit_netns != NULL)
+ pd->pp->exit_netns(net, pd);
+ kfree(pd);
+ return 0;
+ }
+ }
+
+ return -ESRCH;
+}
/*
@@ -100,6 +145,24 @@ struct ip_vs_protocol * ip_vs_proto_get(
}
EXPORT_SYMBOL(ip_vs_proto_get);
+/*
+ * get ip_vs_protocol object data by netns and proto
+ */
+struct ip_vs_proto_data *
+ip_vs_proto_data_get(struct net *net, unsigned short proto)
+{
+ struct ip_vs_proto_data *pd;
+ unsigned hash = IP_VS_PROTO_HASH(proto);
+ struct netns_ipvs *ipvs = net->ipvs;
+
+ for (pd = ipvs->proto_data_table[hash]; pd; pd = pd->next) {
+ if (pd->pp->protocol == proto)
+ return pd;
+ }
+
+ return NULL;
+}
+EXPORT_SYMBOL(ip_vs_proto_data_get);
/*
* Propagate event for state change to all protocols
@@ -118,8 +181,7 @@ void ip_vs_protocol_timeout_change(int f
}
-int *
-ip_vs_create_timeout_table(int *table, int size)
+int *ip_vs_create_timeout_table(const int *table, int size)
{
return kmemdup(table, size, GFP_ATOMIC);
}
@@ -235,7 +297,44 @@ ip_vs_tcpudp_debug_packet(int af, struct
#endif
ip_vs_tcpudp_debug_packet_v4(pp, skb, offset, msg);
}
+static int __net_init __ip_vs_protocol_init(struct net *net)
+{
+#ifdef CONFIG_IP_VS_PROTO_TCP
+ register_ip_vs_proto_netns(net, &ip_vs_protocol_tcp);
+#endif
+#ifdef CONFIG_IP_VS_PROTO_UDP
+ register_ip_vs_proto_netns(net, &ip_vs_protocol_udp);
+#endif
+#ifdef CONFIG_IP_VS_PROTO_SCTP
+ register_ip_vs_proto_netns(net, &ip_vs_protocol_sctp);
+#endif
+#ifdef CONFIG_IP_VS_PROTO_AH
+ register_ip_vs_proto_netns(net, &ip_vs_protocol_ah);
+#endif
+#ifdef CONFIG_IP_VS_PROTO_ESP
+ register_ip_vs_proto_netns(net, &ip_vs_protocol_esp);
+#endif
+ return 0;
+}
+
+static void __net_exit __ip_vs_protocol_cleanup(struct net *net)
+{
+ struct ip_vs_proto_data *pd;
+ int i;
+ struct netns_ipvs *ipvs = net->ipvs;
+
+ /* unregister all the ipvs proto data for this netns */
+ for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) {
+ while ((pd = ipvs->proto_data_table[i]) != NULL)
+ unregister_ip_vs_proto_netns(net, pd);
+ }
+}
+
+static struct pernet_operations ipvs_proto_ops = {
+ .init = __ip_vs_protocol_init,
+ .exit = __ip_vs_protocol_cleanup,
+};
int __init ip_vs_protocol_init(void)
{
@@ -266,7 +365,7 @@ int __init ip_vs_protocol_init(void)
#endif
pr_info("Registered protocols (%s)\n", &protocols[2]);
- return 0;
+ return register_pernet_subsys(&ipvs_proto_ops);
}
@@ -275,6 +374,7 @@ void ip_vs_protocol_cleanup(void)
struct ip_vs_protocol *pp;
int i;
+ unregister_pernet_subsys(&ipvs_proto_ops);
/* unregister all the ipvs protocols */
for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) {
while ((pp = ip_vs_proto_table[i]) != NULL)
Index: lvs-test-2.6/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
===================================================================
--- lvs-test-2.6.orig/net/netfilter/ipvs/ip_vs_proto_ah_esp.c 2010-10-22 20:21:44.000000000 +0200
+++ lvs-test-2.6/net/netfilter/ipvs/ip_vs_proto_ah_esp.c 2010-10-22 20:27:12.000000000 +0200
@@ -41,15 +41,16 @@ struct isakmp_hdr {
#define PORT_ISAKMP 500
static void
-ah_esp_conn_fill_param_proto(int af, const struct ip_vs_iphdr *iph,
+ah_esp_conn_fill_param_proto(struct net *net, int af,
+ const struct ip_vs_iphdr *iph,
int inverse, struct ip_vs_conn_param *p)
{
if (likely(!inverse))
- ip_vs_conn_fill_param(af, IPPROTO_UDP,
+ ip_vs_conn_fill_param(net, af, IPPROTO_UDP,
&iph->saddr, htons(PORT_ISAKMP),
&iph->daddr, htons(PORT_ISAKMP), p);
else
- ip_vs_conn_fill_param(af, IPPROTO_UDP,
+ ip_vs_conn_fill_param(net, af, IPPROTO_UDP,
&iph->daddr, htons(PORT_ISAKMP),
&iph->saddr, htons(PORT_ISAKMP), p);
}
@@ -61,8 +62,9 @@ ah_esp_conn_in_get(int af, const struct
{
struct ip_vs_conn *cp;
struct ip_vs_conn_param p;
+ struct net *net = dev_net(skb->dev);
- ah_esp_conn_fill_param_proto(af, iph, inverse, &p);
+ ah_esp_conn_fill_param_proto(net, af, iph, inverse, &p);
cp = ip_vs_conn_in_get(&p);
if (!cp) {
/*
@@ -90,8 +92,9 @@ ah_esp_conn_out_get(int af, const struct
{
struct ip_vs_conn *cp;
struct ip_vs_conn_param p;
+ struct net *net = dev_net(skb->dev);
- ah_esp_conn_fill_param_proto(af, iph, inverse, &p);
+ ah_esp_conn_fill_param_proto(net, af, iph, inverse, &p);
cp = ip_vs_conn_out_get(&p);
if (!cp) {
IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for inout packet "
@@ -149,7 +152,6 @@ struct ip_vs_protocol ip_vs_protocol_ah
.app_conn_bind = NULL,
.debug_packet = ip_vs_tcpudp_debug_packet,
.timeout_change = NULL, /* ISAKMP */
- .set_state_timeout = NULL,
};
#endif
@@ -159,8 +161,8 @@ struct ip_vs_protocol ip_vs_protocol_esp
.protocol = IPPROTO_ESP,
.num_states = 1,
.dont_defrag = 1,
- .init = ah_esp_init,
- .exit = ah_esp_exit,
+ .init = NULL,
+ .exit = NULL,
.conn_schedule = ah_esp_conn_schedule,
.conn_in_get = ah_esp_conn_in_get,
.conn_out_get = ah_esp_conn_out_get,
Index: lvs-test-2.6/net/netfilter/ipvs/ip_vs_proto_sctp.c
===================================================================
--- lvs-test-2.6.orig/net/netfilter/ipvs/ip_vs_proto_sctp.c 2010-10-22 20:21:44.000000000 +0200
+++ lvs-test-2.6/net/netfilter/ipvs/ip_vs_proto_sctp.c 2010-10-22 20:24:53.000000000 +0200
@@ -16,6 +16,7 @@ sctp_conn_schedule(int af, struct sk_buf
sctp_chunkhdr_t _schunkh, *sch;
sctp_sctphdr_t *sh, _sctph;
struct ip_vs_iphdr iph;
+ struct net *net = dev_net(skb->dev);
ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
@@ -29,7 +30,7 @@ sctp_conn_schedule(int af, struct sk_buf
return 0;
if ((sch->type == SCTP_CID_INIT) &&
- (svc = ip_vs_service_get(af, skb->mark, iph.protocol,
+ (svc = ip_vs_service_get(net, af, skb->mark, iph.protocol,
&iph.daddr, sh->dest))) {
int ignored;
@@ -226,7 +227,7 @@ static enum ipvs_sctp_event_t sctp_event
IP_VS_SCTP_EVE_SHUT_COM_CLI,
};
-static struct ipvs_sctp_nextstate
+static const struct ipvs_sctp_nextstate
sctp_states_table[IP_VS_SCTP_S_LAST][IP_VS_SCTP_EVE_LAST] = {
/*
* STATE : IP_VS_SCTP_S_NONE
@@ -855,7 +856,7 @@ static struct ipvs_sctp_nextstate
/*
* Timeout table[state]
*/
-static int sctp_timeouts[IP_VS_SCTP_S_LAST + 1] = {
+static const int sctp_timeouts[IP_VS_SCTP_S_LAST + 1] = {
[IP_VS_SCTP_S_NONE] = 2 * HZ,
[IP_VS_SCTP_S_INIT_CLI] = 1 * 60 * HZ,
[IP_VS_SCTP_S_INIT_SER] = 1 * 60 * HZ,
@@ -903,6 +904,7 @@ static void sctp_timeout_change(struct i
{
}
+/*
static int
sctp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
{
@@ -910,7 +912,7 @@ sctp_set_state_timeout(struct ip_vs_prot
return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_SCTP_S_LAST,
sctp_state_name_table, sname, to);
}
-
+*/
static inline int
set_sctp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
int direction, const struct sk_buff *skb)
@@ -919,6 +921,8 @@ set_sctp_state(struct ip_vs_protocol *pp
unsigned char chunk_type;
int event, next_state;
int ihl;
+ struct net *net = dev_net(skb->dev);
+ struct ip_vs_proto_data *pd;
#ifdef CONFIG_IP_VS_IPV6
ihl = cp->af == AF_INET ? ip_hdrlen(skb) : sizeof(struct ipv6hdr);
@@ -994,10 +998,13 @@ set_sctp_state(struct ip_vs_protocol *pp
}
}
}
+ pd = ip_vs_proto_data_get(net, pp->protocol);
+ if(likely(pd))
+ cp->timeout = pd->timeout_table[cp->state = next_state];
+ else /* What to do ? */
+ cp->timeout = sctp_timeouts[cp->state = next_state];
- cp->timeout = pp->timeout_table[cp->state = next_state];
-
- return 1;
+ return 1;
}
static int
@@ -1013,59 +1020,54 @@ sctp_state_transition(struct ip_vs_conn
return ret;
}
-/*
- * Hash table for SCTP application incarnations
- */
-#define SCTP_APP_TAB_BITS 4
-#define SCTP_APP_TAB_SIZE (1 << SCTP_APP_TAB_BITS)
-#define SCTP_APP_TAB_MASK (SCTP_APP_TAB_SIZE - 1)
-
-static struct list_head sctp_apps[SCTP_APP_TAB_SIZE];
-static DEFINE_SPINLOCK(sctp_app_lock);
-
static inline __u16 sctp_app_hashkey(__be16 port)
{
return (((__force u16)port >> SCTP_APP_TAB_BITS) ^ (__force u16)port)
& SCTP_APP_TAB_MASK;
}
-static int sctp_register_app(struct ip_vs_app *inc)
+static int sctp_register_app(struct net *net, struct ip_vs_app *inc)
{
struct ip_vs_app *i;
__u16 hash;
__be16 port = inc->port;
int ret = 0;
+ struct netns_ipvs *ipvs = net->ipvs;
+ struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP);
hash = sctp_app_hashkey(port);
- spin_lock_bh(&sctp_app_lock);
- list_for_each_entry(i, &sctp_apps[hash], p_list) {
+ spin_lock_bh(&ipvs->sctp_app_lock);
+ list_for_each_entry(i, &ipvs->sctp_apps[hash], p_list) {
if (i->port == port) {
ret = -EEXIST;
goto out;
}
}
- list_add(&inc->p_list, &sctp_apps[hash]);
- atomic_inc(&ip_vs_protocol_sctp.appcnt);
+ list_add(&inc->p_list, &ipvs->sctp_apps[hash]);
+ atomic_inc(&pd->appcnt);
out:
- spin_unlock_bh(&sctp_app_lock);
+ spin_unlock_bh(&ipvs->sctp_app_lock);
return ret;
}
-static void sctp_unregister_app(struct ip_vs_app *inc)
+static void sctp_unregister_app(struct net *net, struct ip_vs_app *inc)
{
- spin_lock_bh(&sctp_app_lock);
- atomic_dec(&ip_vs_protocol_sctp.appcnt);
+ struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP);
+
+ spin_lock_bh(&net->ipvs->sctp_app_lock);
+ atomic_dec(&pd->appcnt);
list_del(&inc->p_list);
- spin_unlock_bh(&sctp_app_lock);
+ spin_unlock_bh(&net->ipvs->sctp_app_lock);
}
-static int sctp_app_conn_bind(struct ip_vs_conn *cp)
+static int sctp_app_conn_bind(struct net *net, struct ip_vs_conn *cp)
{
int hash;
struct ip_vs_app *inc;
int result = 0;
+ struct netns_ipvs *ipvs = net->ipvs;
/* Default binding: bind app only for NAT */
if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
@@ -1073,12 +1075,12 @@ static int sctp_app_conn_bind(struct ip_
/* Lookup application incarnations and bind the right one */
hash = sctp_app_hashkey(cp->vport);
- spin_lock(&sctp_app_lock);
- list_for_each_entry(inc, &sctp_apps[hash], p_list) {
+ spin_lock(&ipvs->sctp_app_lock);
+ list_for_each_entry(inc, &ipvs->sctp_apps[hash], p_list) {
if (inc->port == cp->vport) {
if (unlikely(!ip_vs_app_inc_get(inc)))
break;
- spin_unlock(&sctp_app_lock);
+ spin_unlock(&ipvs->sctp_app_lock);
IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->"
"%s:%u to app %s on port %u\n",
@@ -1094,43 +1096,50 @@ static int sctp_app_conn_bind(struct ip_
goto out;
}
}
- spin_unlock(&sctp_app_lock);
+ spin_unlock(&ipvs->sctp_app_lock);
out:
return result;
}
-static void ip_vs_sctp_init(struct ip_vs_protocol *pp)
+/* ---------------------------------------------
+ * timeouts is netns related now.
+ * ---------------------------------------------
+ */
+static void __ip_vs_sctp_init(struct net *net, struct ip_vs_proto_data *pd)
{
- IP_VS_INIT_HASH_TABLE(sctp_apps);
- pp->timeout_table = sctp_timeouts;
+ ip_vs_init_hash_table(net->ipvs->sctp_apps, SCTP_APP_TAB_SIZE);
+ spin_lock_init(&net->ipvs->tcp_app_lock);
+ pd->timeout_table = ip_vs_create_timeout_table(sctp_timeouts,
+ sizeof(sctp_timeouts));
}
-
-static void ip_vs_sctp_exit(struct ip_vs_protocol *pp)
+static void __ip_vs_sctp_exit(struct net *net, struct ip_vs_proto_data *pd)
{
-
+ kfree(pd->timeout_table);
}
+
struct ip_vs_protocol ip_vs_protocol_sctp = {
- .name = "SCTP",
- .protocol = IPPROTO_SCTP,
- .num_states = IP_VS_SCTP_S_LAST,
- .dont_defrag = 0,
- .appcnt = ATOMIC_INIT(0),
- .init = ip_vs_sctp_init,
- .exit = ip_vs_sctp_exit,
- .register_app = sctp_register_app,
+ .name = "SCTP",
+ .protocol = IPPROTO_SCTP,
+ .num_states = IP_VS_SCTP_S_LAST,
+ .dont_defrag = 0,
+ .init = NULL,
+ .exit = NULL,
+ .init_netns = __ip_vs_sctp_init,
+ .exit_netns = __ip_vs_sctp_exit,
+ .register_app = sctp_register_app,
.unregister_app = sctp_unregister_app,
- .conn_schedule = sctp_conn_schedule,
- .conn_in_get = ip_vs_conn_in_get_proto,
- .conn_out_get = ip_vs_conn_out_get_proto,
- .snat_handler = sctp_snat_handler,
- .dnat_handler = sctp_dnat_handler,
- .csum_check = sctp_csum_check,
- .state_name = sctp_state_name,
+ .conn_schedule = sctp_conn_schedule,
+ .conn_in_get = ip_vs_conn_in_get_proto,
+ .conn_out_get = ip_vs_conn_out_get_proto,
+ .snat_handler = sctp_snat_handler,
+ .dnat_handler = sctp_dnat_handler,
+ .csum_check = sctp_csum_check,
+ .state_name = sctp_state_name,
.state_transition = sctp_state_transition,
- .app_conn_bind = sctp_app_conn_bind,
- .debug_packet = ip_vs_tcpudp_debug_packet,
+ .app_conn_bind = sctp_app_conn_bind,
+ .debug_packet = ip_vs_tcpudp_debug_packet,
.timeout_change = sctp_timeout_change,
- .set_state_timeout = sctp_set_state_timeout,
+/* .set_state_timeout = sctp_set_state_timeout, */
};
Index: lvs-test-2.6/net/netfilter/ipvs/ip_vs_proto_tcp.c
===================================================================
--- lvs-test-2.6.orig/net/netfilter/ipvs/ip_vs_proto_tcp.c 2010-10-22 20:21:44.000000000 +0200
+++ lvs-test-2.6/net/netfilter/ipvs/ip_vs_proto_tcp.c 2010-10-22 20:24:53.000000000 +0200
@@ -9,7 +9,12 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
- * Changes:
+ * Changes: Hans Schillstrom <hans.schillstrom@ericsson.com>
+ *
+ * Network name space (netns) aware.
+ * Global data moved to netns i.e struct netns_ipvs
+ * tcp_timeouts table has copy per netns in a hash table per
+ * protocol ip_vs_proto_data and is handled by netns
*
*/
@@ -34,6 +39,7 @@ tcp_conn_schedule(int af, struct sk_buff
struct ip_vs_service *svc;
struct tcphdr _tcph, *th;
struct ip_vs_iphdr iph;
+ struct net *net = dev_net(skb->dev);
ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
@@ -45,8 +51,8 @@ tcp_conn_schedule(int af, struct sk_buff
/* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */
if (th->syn &&
- (svc = ip_vs_service_get(af, skb->mark, iph.protocol, &iph.daddr,
- th->dest))) {
+ (svc = ip_vs_service_get(net, af, skb->mark, iph.protocol,
+ &iph.daddr, th->dest))) {
int ignored;
if (ip_vs_todrop()) {
@@ -338,7 +344,7 @@ static const int tcp_state_off[IP_VS_DIR
/*
* Timeout table[state]
*/
-static int tcp_timeouts[IP_VS_TCP_S_LAST+1] = {
+static const int tcp_timeouts[IP_VS_TCP_S_LAST+1] = {
[IP_VS_TCP_S_NONE] = 2*HZ,
[IP_VS_TCP_S_ESTABLISHED] = 15*60*HZ,
[IP_VS_TCP_S_SYN_SENT] = 2*60*HZ,
@@ -452,13 +458,13 @@ static void tcp_timeout_change(struct ip
*/
tcp_state_table = (on? tcp_states_dos : tcp_states);
}
-
+/* Removed not used
static int
tcp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
{
return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_TCP_S_LAST,
tcp_state_name_table, sname, to);
-}
+} */
static inline int tcp_state_idx(struct tcphdr *th)
{
@@ -474,12 +480,13 @@ static inline int tcp_state_idx(struct t
}
static inline void
-set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
+set_tcp_state(struct net *net, struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
int direction, struct tcphdr *th)
{
int state_idx;
int new_state = IP_VS_TCP_S_CLOSE;
int state_off = tcp_state_off[direction];
+ struct ip_vs_proto_data *pd;
/*
* Update state offset to INPUT_ONLY if necessary
@@ -534,8 +541,12 @@ set_tcp_state(struct ip_vs_protocol *pp,
}
}
}
-
- cp->timeout = pp->timeout_table[cp->state = new_state];
+ pd = ip_vs_proto_data_get(net, pp->protocol);
+ if(likely(pd))
+ cp->timeout = pd->timeout_table[cp->state = new_state];
+ else /* What to do ? */
+ cp->timeout = tcp_timeouts[cp->state = new_state];
+ IP_VS_DBG(8, "%s() timeout=%lu, pd=%p def=%d\n", __func__, cp->timeout, pd->timeout_table, tcp_timeouts[new_state]);
}
@@ -547,6 +558,7 @@ tcp_state_transition(struct ip_vs_conn *
const struct sk_buff *skb,
struct ip_vs_protocol *pp)
{
+ struct net *net = dev_net(skb->dev);
struct tcphdr _tcph, *th;
#ifdef CONFIG_IP_VS_IPV6
@@ -560,7 +572,7 @@ tcp_state_transition(struct ip_vs_conn *
return 0;
spin_lock(&cp->lock);
- set_tcp_state(pp, cp, direction, th);
+ set_tcp_state(net, pp, cp, direction, th);
spin_unlock(&cp->lock);
return 1;
@@ -570,12 +582,6 @@ tcp_state_transition(struct ip_vs_conn *
/*
* Hash table for TCP application incarnations
*/
-#define TCP_APP_TAB_BITS 4
-#define TCP_APP_TAB_SIZE (1 << TCP_APP_TAB_BITS)
-#define TCP_APP_TAB_MASK (TCP_APP_TAB_SIZE - 1)
-
-static struct list_head tcp_apps[TCP_APP_TAB_SIZE];
-static DEFINE_SPINLOCK(tcp_app_lock);
static inline __u16 tcp_app_hashkey(__be16 port)
{
@@ -584,47 +590,50 @@ static inline __u16 tcp_app_hashkey(__be
}
-static int tcp_register_app(struct ip_vs_app *inc)
+static int tcp_register_app(struct net *net, struct ip_vs_app *inc)
{
struct ip_vs_app *i;
__u16 hash;
__be16 port = inc->port;
int ret = 0;
+ struct netns_ipvs *ipvs = net->ipvs;
+ struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
hash = tcp_app_hashkey(port);
- spin_lock_bh(&tcp_app_lock);
- list_for_each_entry(i, &tcp_apps[hash], p_list) {
+ spin_lock_bh(&ipvs->tcp_app_lock);
+ list_for_each_entry(i, &ipvs->tcp_apps[hash], p_list) {
if (i->port == port) {
ret = -EEXIST;
goto out;
}
}
- list_add(&inc->p_list, &tcp_apps[hash]);
- atomic_inc(&ip_vs_protocol_tcp.appcnt);
+ list_add(&inc->p_list, &ipvs->tcp_apps[hash]);
+ atomic_inc(&pd->appcnt);
out:
- spin_unlock_bh(&tcp_app_lock);
+ spin_unlock_bh(&ipvs->tcp_app_lock);
return ret;
}
-static void
-tcp_unregister_app(struct ip_vs_app *inc)
+static void tcp_unregister_app(struct net *net, struct ip_vs_app *inc)
{
- spin_lock_bh(&tcp_app_lock);
- atomic_dec(&ip_vs_protocol_tcp.appcnt);
+ struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
+
+ spin_lock_bh(&net->ipvs->tcp_app_lock);
+ atomic_dec(&pd->appcnt);
list_del(&inc->p_list);
- spin_unlock_bh(&tcp_app_lock);
+ spin_unlock_bh(&net->ipvs->tcp_app_lock);
}
-static int
-tcp_app_conn_bind(struct ip_vs_conn *cp)
+static int tcp_app_conn_bind(struct net *net, struct ip_vs_conn *cp)
{
int hash;
struct ip_vs_app *inc;
int result = 0;
+ struct netns_ipvs *ipvs = net->ipvs;
/* Default binding: bind app only for NAT */
if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
@@ -633,12 +642,12 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)
/* Lookup application incarnations and bind the right one */
hash = tcp_app_hashkey(cp->vport);
- spin_lock(&tcp_app_lock);
- list_for_each_entry(inc, &tcp_apps[hash], p_list) {
+ spin_lock(&ipvs->tcp_app_lock);
+ list_for_each_entry(inc, &ipvs->tcp_apps[hash], p_list) {
if (inc->port == cp->vport) {
if (unlikely(!ip_vs_app_inc_get(inc)))
break;
- spin_unlock(&tcp_app_lock);
+ spin_unlock(&ipvs->tcp_app_lock);
IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->"
"%s:%u to app %s on port %u\n",
@@ -655,7 +664,7 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)
goto out;
}
}
- spin_unlock(&tcp_app_lock);
+ spin_unlock(&ipvs->tcp_app_lock);
out:
return result;
@@ -665,24 +674,32 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)
/*
* Set LISTEN timeout. (ip_vs_conn_put will setup timer)
*/
-void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp)
+void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp)
{
+ struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
+
spin_lock(&cp->lock);
cp->state = IP_VS_TCP_S_LISTEN;
- cp->timeout = ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_LISTEN];
+ cp->timeout = ( pd ? pd->timeout_table[IP_VS_TCP_S_LISTEN]
+ : tcp_timeouts[IP_VS_TCP_S_LISTEN] );
spin_unlock(&cp->lock);
}
-
-static void ip_vs_tcp_init(struct ip_vs_protocol *pp)
+/* ---------------------------------------------
+ * timeouts is netns related now.
+ * ---------------------------------------------
+ */
+static void __ip_vs_tcp_init(struct net *net, struct ip_vs_proto_data *pd)
{
- IP_VS_INIT_HASH_TABLE(tcp_apps);
- pp->timeout_table = tcp_timeouts;
+ ip_vs_init_hash_table(net->ipvs->tcp_apps, TCP_APP_TAB_SIZE);
+ spin_lock_init(&net->ipvs->tcp_app_lock);
+ pd->timeout_table = ip_vs_create_timeout_table(tcp_timeouts,
+ sizeof(tcp_timeouts));
}
-
-static void ip_vs_tcp_exit(struct ip_vs_protocol *pp)
+static void __ip_vs_tcp_exit(struct net *net, struct ip_vs_proto_data *pd)
{
+ kfree(pd->timeout_table);
}
@@ -691,9 +708,10 @@ struct ip_vs_protocol ip_vs_protocol_tcp
.protocol = IPPROTO_TCP,
.num_states = IP_VS_TCP_S_LAST,
.dont_defrag = 0,
- .appcnt = ATOMIC_INIT(0),
- .init = ip_vs_tcp_init,
- .exit = ip_vs_tcp_exit,
+ .init = NULL,
+ .exit = NULL,
+ .init_netns = __ip_vs_tcp_init,
+ .exit_netns = __ip_vs_tcp_exit,
.register_app = tcp_register_app,
.unregister_app = tcp_unregister_app,
.conn_schedule = tcp_conn_schedule,
@@ -707,5 +725,5 @@ struct ip_vs_protocol ip_vs_protocol_tcp
.app_conn_bind = tcp_app_conn_bind,
.debug_packet = ip_vs_tcpudp_debug_packet,
.timeout_change = tcp_timeout_change,
- .set_state_timeout = tcp_set_state_timeout,
+/* .set_state_timeout = tcp_set_state_timeout, */
};
Index: lvs-test-2.6/net/netfilter/ipvs/ip_vs_proto_udp.c
===================================================================
--- lvs-test-2.6.orig/net/netfilter/ipvs/ip_vs_proto_udp.c 2010-10-22 20:21:44.000000000 +0200
+++ lvs-test-2.6/net/netfilter/ipvs/ip_vs_proto_udp.c 2010-10-22 20:24:53.000000000 +0200
@@ -9,7 +9,10 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
- * Changes:
+ * Changes: Hans Schillstrom <hans.schillstrom@ericsson.com>
+ *
+ * Network name space (netns) aware.
+ * Global data moved to netns i.e struct netns_ipvs
*
*/
@@ -34,6 +37,7 @@ udp_conn_schedule(int af, struct sk_buff
struct ip_vs_service *svc;
struct udphdr _udph, *uh;
struct ip_vs_iphdr iph;
+ struct net *net = dev_net(skb->dev);
ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
@@ -43,7 +47,7 @@ udp_conn_schedule(int af, struct sk_buff
return 0;
}
- svc = ip_vs_service_get(af, skb->mark, iph.protocol,
+ svc = ip_vs_service_get(net, af, skb->mark, iph.protocol,
&iph.daddr, uh->dest);
if (svc) {
int ignored;
@@ -344,13 +348,6 @@ udp_csum_check(int af, struct sk_buff *s
* unregister_app or app_conn_bind is called each time.
*/
-#define UDP_APP_TAB_BITS 4
-#define UDP_APP_TAB_SIZE (1 << UDP_APP_TAB_BITS)
-#define UDP_APP_TAB_MASK (UDP_APP_TAB_SIZE - 1)
-
-static struct list_head udp_apps[UDP_APP_TAB_SIZE];
-static DEFINE_SPINLOCK(udp_app_lock);
-
static inline __u16 udp_app_hashkey(__be16 port)
{
return (((__force u16)port >> UDP_APP_TAB_BITS) ^ (__force u16)port)
@@ -358,47 +355,50 @@ static inline __u16 udp_app_hashkey(__be
}
-static int udp_register_app(struct ip_vs_app *inc)
+static int udp_register_app(struct net *net, struct ip_vs_app *inc)
{
struct ip_vs_app *i;
__u16 hash;
__be16 port = inc->port;
int ret = 0;
+ struct netns_ipvs *ipvs = net->ipvs;
+ struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
hash = udp_app_hashkey(port);
-
- spin_lock_bh(&udp_app_lock);
- list_for_each_entry(i, &udp_apps[hash], p_list) {
+ spin_lock_bh(&ipvs->udp_app_lock);
+ list_for_each_entry(i, &ipvs->udp_apps[hash], p_list) {
if (i->port == port) {
ret = -EEXIST;
goto out;
}
}
- list_add(&inc->p_list, &udp_apps[hash]);
- atomic_inc(&ip_vs_protocol_udp.appcnt);
+ list_add(&inc->p_list, &ipvs->udp_apps[hash]);
+ atomic_inc(&pd->appcnt);
out:
- spin_unlock_bh(&udp_app_lock);
+ spin_unlock_bh(&ipvs->udp_app_lock);
return ret;
}
-static void
-udp_unregister_app(struct ip_vs_app *inc)
+static void udp_unregister_app(struct net *net, struct ip_vs_app *inc)
{
- spin_lock_bh(&udp_app_lock);
- atomic_dec(&ip_vs_protocol_udp.appcnt);
+ struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
+
+ spin_lock_bh(&net->ipvs->udp_app_lock);
+ atomic_dec(&pd->appcnt);
list_del(&inc->p_list);
- spin_unlock_bh(&udp_app_lock);
+ spin_unlock_bh(&net->ipvs->udp_app_lock);
}
-static int udp_app_conn_bind(struct ip_vs_conn *cp)
+static int udp_app_conn_bind(struct net *net, struct ip_vs_conn *cp)
{
int hash;
struct ip_vs_app *inc;
int result = 0;
+ struct netns_ipvs *ipvs = net->ipvs;
/* Default binding: bind app only for NAT */
if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
@@ -407,12 +407,12 @@ static int udp_app_conn_bind(struct ip_v
/* Lookup application incarnations and bind the right one */
hash = udp_app_hashkey(cp->vport);
- spin_lock(&udp_app_lock);
- list_for_each_entry(inc, &udp_apps[hash], p_list) {
+ spin_lock(&ipvs->udp_app_lock);
+ list_for_each_entry(inc, &ipvs->udp_apps[hash], p_list) {
if (inc->port == cp->vport) {
if (unlikely(!ip_vs_app_inc_get(inc)))
break;
- spin_unlock(&udp_app_lock);
+ spin_unlock(&ipvs->udp_app_lock);
IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->"
"%s:%u to app %s on port %u\n",
@@ -429,14 +429,14 @@ static int udp_app_conn_bind(struct ip_v
goto out;
}
}
- spin_unlock(&udp_app_lock);
+ spin_unlock(&ipvs->udp_app_lock);
out:
return result;
}
-static int udp_timeouts[IP_VS_UDP_S_LAST+1] = {
+static const int udp_timeouts[IP_VS_UDP_S_LAST+1] = {
[IP_VS_UDP_S_NORMAL] = 5*60*HZ,
[IP_VS_UDP_S_LAST] = 2*HZ,
};
@@ -446,14 +446,20 @@ static const char *const udp_state_name_
[IP_VS_UDP_S_LAST] = "BUG!",
};
-
+/*
static int
-udp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
+udp_set_state_timeout(struct net *net, struct ip_vs_protocol *pp, char *sname,
+ int to)
{
- return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_UDP_S_LAST,
- udp_state_name_table, sname, to);
+ struct ip_vs_proto_data *pd=ip_vs_proto_data_get(net, IPPROTO_UDP);
+ if (pd)
+ return ip_vs_set_state_timeout(pd->timeout_table,
+ IP_VS_UDP_S_LAST,
+ udp_state_name_table, sname, to);
+ else
+ return -ENOENT;
}
-
+*/
static const char * udp_state_name(int state)
{
if (state >= IP_VS_UDP_S_LAST)
@@ -466,28 +472,40 @@ udp_state_transition(struct ip_vs_conn *
const struct sk_buff *skb,
struct ip_vs_protocol *pp)
{
- cp->timeout = pp->timeout_table[IP_VS_UDP_S_NORMAL];
+ struct net *net = dev_net(skb->dev);
+ struct ip_vs_proto_data *pd=ip_vs_proto_data_get(net, IPPROTO_UDP);
+ if(unlikely(pd))
+ return 0;
+
+ cp->timeout = pd->timeout_table[IP_VS_UDP_S_NORMAL];
return 1;
}
-
-static void udp_init(struct ip_vs_protocol *pp)
+/* ---------------------------------------------
+ * timeouts is netns related now.
+ * ---------------------------------------------
+ */
+static void __udp_init(struct net *net, struct ip_vs_proto_data *pd)
{
- IP_VS_INIT_HASH_TABLE(udp_apps);
- pp->timeout_table = udp_timeouts;
+ ip_vs_init_hash_table(net->ipvs->udp_apps, UDP_APP_TAB_SIZE);
+ spin_lock_init(&net->ipvs->udp_app_lock);
+ pd->timeout_table = ip_vs_create_timeout_table(udp_timeouts,
+ sizeof(udp_timeouts));
}
-static void udp_exit(struct ip_vs_protocol *pp)
+static void __udp_exit(struct net *net, struct ip_vs_proto_data *pd)
{
+ kfree(pd->timeout_table);
}
-
struct ip_vs_protocol ip_vs_protocol_udp = {
.name = "UDP",
.protocol = IPPROTO_UDP,
.num_states = IP_VS_UDP_S_LAST,
.dont_defrag = 0,
- .init = udp_init,
- .exit = udp_exit,
+ .init = NULL,
+ .exit = NULL,
+ .init_netns = __udp_init,
+ .exit_netns = __udp_exit,
.conn_schedule = udp_conn_schedule,
.conn_in_get = ip_vs_conn_in_get_proto,
.conn_out_get = ip_vs_conn_out_get_proto,
@@ -501,5 +519,5 @@ struct ip_vs_protocol ip_vs_protocol_udp
.app_conn_bind = udp_app_conn_bind,
.debug_packet = ip_vs_tcpudp_debug_packet,
.timeout_change = NULL,
- .set_state_timeout = udp_set_state_timeout,
+/* .set_state_timeout = udp_set_state_timeout, */
};
^ permalink raw reply [flat|nested] 14+ messages in thread* [rfc v2 10/10] ipvs network name space aware: sync and xmit
2010-10-22 20:09 [rfc v2 00/10] ipvs network name space (netns) aware Simon Horman
` (8 preceding siblings ...)
2010-10-22 20:09 ` [rfc v2 09/10] ipvs network name space aware: proto Simon Horman
@ 2010-10-22 20:09 ` Simon Horman
2010-10-23 8:44 ` [rfc v2 00/10] ipvs network name space (netns) aware Hans Schillstrom
2010-10-23 9:04 ` Julian Anastasov
11 siblings, 0 replies; 14+ messages in thread
From: Simon Horman @ 2010-10-22 20:09 UTC (permalink / raw)
To: lvs-devel, netdev, netfilter-devel
Cc: Hans Schillstrom, Julian Anastasov, Daniel Lezcano, Wensong Zhang
[-- Attachment #1: ipvs-netns-9.patch --]
[-- Type: text/plain, Size: 24661 bytes --]
This patch contains ip_vs_sync.c and ip_vs_xmit.c
There is one sync daemon per netns, and a number is prepended to its name.
(a kind of incarnation counter)
Part of the netns migration in ip_vs_xmit.c was done in the IPv6 tunnel patch,
so make sure that "[patch v4] ipvs: IPv6 tunnel mode" is applied
Signed-off-by:Hans Schillstrom <hans.schillstrom@ericsson.com>
---
* Add ipvs_skbnet() as suggested by Julian Anastasov
Index: lvs-test-2.6/net/netfilter/ipvs/ip_vs_sync.c
===================================================================
--- lvs-test-2.6.orig/net/netfilter/ipvs/ip_vs_sync.c 2010-10-22 21:38:57.000000000 +0200
+++ lvs-test-2.6/net/netfilter/ipvs/ip_vs_sync.c 2010-10-22 21:39:08.000000000 +0200
@@ -74,6 +74,7 @@ struct ip_vs_sync_conn_options {
struct ip_vs_sync_thread_data {
struct socket *sock;
char *buf;
+ struct net *net;
};
#define SIMPLE_CONN_SIZE (sizeof(struct ip_vs_sync_conn))
@@ -113,9 +114,6 @@ struct ip_vs_sync_mesg {
/* ip_vs_sync_conn entries start here */
};
-/* the maximum length of sync (sending/receiving) message */
-static int sync_send_mesg_maxlen;
-static int sync_recv_mesg_maxlen;
struct ip_vs_sync_buff {
struct list_head list;
@@ -127,70 +125,41 @@ struct ip_vs_sync_buff {
unsigned char *end;
};
-
-/* the sync_buff list head and the lock */
-static LIST_HEAD(ip_vs_sync_queue);
-static DEFINE_SPINLOCK(ip_vs_sync_lock);
-
-/* current sync_buff for accepting new conn entries */
-static struct ip_vs_sync_buff *curr_sb = NULL;
-static DEFINE_SPINLOCK(curr_sb_lock);
-
-/* ipvs sync daemon state */
-volatile int ip_vs_sync_state = IP_VS_STATE_NONE;
-volatile int ip_vs_master_syncid = 0;
-volatile int ip_vs_backup_syncid = 0;
-
-/* multicast interface name */
-char ip_vs_master_mcast_ifn[IP_VS_IFNAME_MAXLEN];
-char ip_vs_backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
-
-/* sync daemon tasks */
-static struct task_struct *sync_master_thread;
-static struct task_struct *sync_backup_thread;
-
-/* multicast addr */
-static struct sockaddr_in mcast_addr = {
- .sin_family = AF_INET,
- .sin_port = cpu_to_be16(IP_VS_SYNC_PORT),
- .sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP),
-};
-
-
-static inline struct ip_vs_sync_buff *sb_dequeue(void)
+static inline struct ip_vs_sync_buff *sb_dequeue(struct net *net)
{
struct ip_vs_sync_buff *sb;
+ struct netns_ipvs *ipvs = net->ipvs;
- spin_lock_bh(&ip_vs_sync_lock);
- if (list_empty(&ip_vs_sync_queue)) {
+ spin_lock_bh(&ipvs->sync_lock);
+ if (list_empty(&ipvs->sync_queue)) {
sb = NULL;
} else {
- sb = list_entry(ip_vs_sync_queue.next,
+ sb = list_entry(ipvs->sync_queue.next,
struct ip_vs_sync_buff,
list);
list_del(&sb->list);
}
- spin_unlock_bh(&ip_vs_sync_lock);
+ spin_unlock_bh(&ipvs->sync_lock);
return sb;
}
-static inline struct ip_vs_sync_buff * ip_vs_sync_buff_create(void)
+static inline struct ip_vs_sync_buff * ip_vs_sync_buff_create(struct net *net)
{
struct ip_vs_sync_buff *sb;
if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC)))
return NULL;
- if (!(sb->mesg=kmalloc(sync_send_mesg_maxlen, GFP_ATOMIC))) {
+ if (!(sb->mesg=kmalloc(net->ipvs->sync_send_mesg_maxlen, GFP_ATOMIC))) {
kfree(sb);
return NULL;
}
sb->mesg->nr_conns = 0;
- sb->mesg->syncid = ip_vs_master_syncid;
+ sb->mesg->syncid = net->ipvs->master_syncid;
sb->mesg->size = 4;
sb->head = (unsigned char *)sb->mesg + 4;
- sb->end = (unsigned char *)sb->mesg + sync_send_mesg_maxlen;
+ sb->end = (unsigned char *)sb->mesg + net->ipvs->sync_send_mesg_maxlen;
sb->firstuse = jiffies;
return sb;
}
@@ -201,14 +170,16 @@ static inline void ip_vs_sync_buff_relea
kfree(sb);
}
-static inline void sb_queue_tail(struct ip_vs_sync_buff *sb)
+static inline void sb_queue_tail(struct net *net, struct ip_vs_sync_buff *sb)
{
- spin_lock(&ip_vs_sync_lock);
- if (ip_vs_sync_state & IP_VS_STATE_MASTER)
- list_add_tail(&sb->list, &ip_vs_sync_queue);
+ struct netns_ipvs *ipvs = net->ipvs;
+
+ spin_lock(&ipvs->sync_lock);
+ if (ipvs->sync_state & IP_VS_STATE_MASTER)
+ list_add_tail(&sb->list, &ipvs->sync_queue);
else
ip_vs_sync_buff_release(sb);
- spin_unlock(&ip_vs_sync_lock);
+ spin_unlock(&ipvs->sync_lock);
}
/*
@@ -216,18 +187,19 @@ static inline void sb_queue_tail(struct
* than the specified time or the specified time is zero.
*/
static inline struct ip_vs_sync_buff *
-get_curr_sync_buff(unsigned long time)
+get_curr_sync_buff(struct net *net, unsigned long time)
{
struct ip_vs_sync_buff *sb;
+ struct netns_ipvs *ipvs = net->ipvs;
- spin_lock_bh(&curr_sb_lock);
- if (curr_sb && (time == 0 ||
- time_before(jiffies - curr_sb->firstuse, time))) {
- sb = curr_sb;
- curr_sb = NULL;
+ spin_lock_bh(&ipvs->sync_buff_lock);
+ if (ipvs->sync_buff && (time == 0 ||
+ time_before(jiffies - ipvs->sync_buff->firstuse, time))) {
+ sb = ipvs->sync_buff;
+ ipvs->sync_buff = NULL;
} else
sb = NULL;
- spin_unlock_bh(&curr_sb_lock);
+ spin_unlock_bh(&ipvs->sync_buff_lock);
return sb;
}
@@ -236,16 +208,17 @@ get_curr_sync_buff(unsigned long time)
* Add an ip_vs_conn information into the current sync_buff.
* Called by ip_vs_in.
*/
-void ip_vs_sync_conn(struct ip_vs_conn *cp)
+void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp)
{
struct ip_vs_sync_mesg *m;
struct ip_vs_sync_conn *s;
int len;
+ struct netns_ipvs *ipvs = net->ipvs;
- spin_lock(&curr_sb_lock);
- if (!curr_sb) {
- if (!(curr_sb=ip_vs_sync_buff_create())) {
- spin_unlock(&curr_sb_lock);
+ spin_lock(&ipvs->sync_buff_lock);
+ if (!ipvs->sync_buff) {
+ if (!(ipvs->sync_buff=ip_vs_sync_buff_create(net))) {
+ spin_unlock(&ipvs->sync_buff_lock);
pr_err("ip_vs_sync_buff_create failed.\n");
return;
}
@@ -253,8 +226,8 @@ void ip_vs_sync_conn(struct ip_vs_conn *
len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE :
SIMPLE_CONN_SIZE;
- m = curr_sb->mesg;
- s = (struct ip_vs_sync_conn *)curr_sb->head;
+ m = ipvs->sync_buff->mesg;
+ s = (struct ip_vs_sync_conn *)ipvs->sync_buff->head;
/* copy members */
s->protocol = cp->protocol;
@@ -274,28 +247,28 @@ void ip_vs_sync_conn(struct ip_vs_conn *
m->nr_conns++;
m->size += len;
- curr_sb->head += len;
+ ipvs->sync_buff->head += len;
/* check if there is a space for next one */
- if (curr_sb->head+FULL_CONN_SIZE > curr_sb->end) {
- sb_queue_tail(curr_sb);
- curr_sb = NULL;
+ if (ipvs->sync_buff->head+FULL_CONN_SIZE > ipvs->sync_buff->end) {
+ sb_queue_tail(net, ipvs->sync_buff);
+ ipvs->sync_buff = NULL;
}
- spin_unlock(&curr_sb_lock);
+ spin_unlock(&ipvs->sync_buff_lock);
/* synchronize its controller if it has */
if (cp->control)
- ip_vs_sync_conn(cp->control);
+ ip_vs_sync_conn(net, cp->control);
}
static inline int
-ip_vs_conn_fill_param_sync(int af, int protocol,
+ip_vs_conn_fill_param_sync(struct net *net, int af, int protocol,
const union nf_inet_addr *caddr, __be16 cport,
const union nf_inet_addr *vaddr, __be16 vport,
struct ip_vs_conn_param *p)
{
/* XXX: Need to take into account persistence engine */
- ip_vs_conn_fill_param(af, protocol, caddr, cport, vaddr, vport, p);
+ ip_vs_conn_fill_param(net, af, protocol, caddr, cport, vaddr, vport, p);
return 0;
}
@@ -303,13 +276,15 @@ ip_vs_conn_fill_param_sync(int af, int p
* Process received multicast message and create the corresponding
* ip_vs_conn entries.
*/
-static void ip_vs_process_message(const char *buffer, const size_t buflen)
+static void
+ip_vs_process_message(struct net *net, const char *buffer, const size_t buflen)
{
struct ip_vs_sync_mesg *m = (struct ip_vs_sync_mesg *)buffer;
struct ip_vs_sync_conn *s;
struct ip_vs_sync_conn_options *opt;
struct ip_vs_conn *cp;
struct ip_vs_protocol *pp;
+ struct ip_vs_proto_data *pd;
struct ip_vs_dest *dest;
struct ip_vs_conn_param param;
char *p;
@@ -329,7 +304,7 @@ static void ip_vs_process_message(const
}
/* SyncID sanity check */
- if (ip_vs_backup_syncid != 0 && m->syncid != ip_vs_backup_syncid) {
+ if (net->ipvs->backup_syncid != 0 && m->syncid != net->ipvs->backup_syncid) {
IP_VS_DBG(7, "Ignoring incoming msg with syncid = %d\n",
m->syncid);
return;
@@ -382,7 +357,8 @@ static void ip_vs_process_message(const
}
{
- if (ip_vs_conn_fill_param_sync(AF_INET, s->protocol,
+ if (ip_vs_conn_fill_param_sync(net, AF_INET,
+ s->protocol,
(union nf_inet_addr *)&s->caddr,
s->cport,
(union nf_inet_addr *)&s->vaddr,
@@ -401,7 +377,7 @@ static void ip_vs_process_message(const
* If it is not found the connection will remain unbound
* but still handled.
*/
- dest = ip_vs_find_dest(AF_INET,
+ dest = ip_vs_find_dest(net, AF_INET,
(union nf_inet_addr *)&s->daddr,
s->dport,
(union nf_inet_addr *)&s->vaddr,
@@ -429,7 +405,7 @@ static void ip_vs_process_message(const
return;
}
} else if (!cp->dest) {
- dest = ip_vs_try_bind_dest(cp);
+ dest = ip_vs_try_bind_dest(net, cp);
if (dest)
atomic_dec(&dest->refcnt);
} else if ((cp->dest) && (cp->protocol == IPPROTO_TCP) &&
@@ -460,7 +436,7 @@ static void ip_vs_process_message(const
if (opt)
memcpy(&cp->in_seq, opt, sizeof(*opt));
- atomic_set(&cp->in_pkts, sysctl_ip_vs_sync_threshold[0]);
+ atomic_set(&cp->in_pkts, net->ipvs->sysctl_sync_threshold[0]);
cp->state = state;
cp->old_state = cp->state;
/*
@@ -469,8 +445,9 @@ static void ip_vs_process_message(const
* virtual service. If needed, we can do it for
* non-fwmark persistent services.
*/
- if (!(flags & IP_VS_CONN_F_TEMPLATE) && pp->timeout_table)
- cp->timeout = pp->timeout_table[state];
+ pd = ip_vs_proto_data_get(net,cp->protocol);
+ if (!(flags & IP_VS_CONN_F_TEMPLATE) && pd && pd->timeout_table )
+ cp->timeout = pd->timeout_table[state];
else
cp->timeout = (3*60*HZ);
ip_vs_conn_put(cp);
@@ -511,8 +488,9 @@ static int set_mcast_if(struct sock *sk,
{
struct net_device *dev;
struct inet_sock *inet = inet_sk(sk);
+ struct net *net = sock_net(sk);
- if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL)
+ if ((dev = __dev_get_by_name(net, ifname)) == NULL)
return -ENODEV;
if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
@@ -531,30 +509,31 @@ static int set_mcast_if(struct sock *sk,
* Set the maximum length of sync message according to the
* specified interface's MTU.
*/
-static int set_sync_mesg_maxlen(int sync_state)
+static int set_sync_mesg_maxlen(struct net *net, int sync_state)
{
struct net_device *dev;
int num;
+ struct netns_ipvs *ipvs = net->ipvs;
if (sync_state == IP_VS_STATE_MASTER) {
- if ((dev = __dev_get_by_name(&init_net, ip_vs_master_mcast_ifn)) == NULL)
+ if ((dev = __dev_get_by_name(net, ipvs->master_mcast_ifn)) == NULL)
return -ENODEV;
num = (dev->mtu - sizeof(struct iphdr) -
sizeof(struct udphdr) -
SYNC_MESG_HEADER_LEN - 20) / SIMPLE_CONN_SIZE;
- sync_send_mesg_maxlen = SYNC_MESG_HEADER_LEN +
+ ipvs->sync_send_mesg_maxlen = SYNC_MESG_HEADER_LEN +
SIMPLE_CONN_SIZE * min(num, MAX_CONNS_PER_SYNCBUFF);
IP_VS_DBG(7, "setting the maximum length of sync sending "
- "message %d.\n", sync_send_mesg_maxlen);
+ "message %d.\n", ipvs->sync_send_mesg_maxlen);
} else if (sync_state == IP_VS_STATE_BACKUP) {
- if ((dev = __dev_get_by_name(&init_net, ip_vs_backup_mcast_ifn)) == NULL)
+ if ((dev = __dev_get_by_name(net, ipvs->backup_mcast_ifn)) == NULL)
return -ENODEV;
- sync_recv_mesg_maxlen = dev->mtu -
+ ipvs->sync_recv_mesg_maxlen = dev->mtu -
sizeof(struct iphdr) - sizeof(struct udphdr);
IP_VS_DBG(7, "setting the maximum length of sync receiving "
- "message %d.\n", sync_recv_mesg_maxlen);
+ "message %d.\n", ipvs->sync_recv_mesg_maxlen);
}
return 0;
@@ -572,11 +551,12 @@ join_mcast_group(struct sock *sk, struct
struct ip_mreqn mreq;
struct net_device *dev;
int ret;
+ struct net *net = sock_net(sk);
memset(&mreq, 0, sizeof(mreq));
memcpy(&mreq.imr_multiaddr, addr, sizeof(struct in_addr));
- if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL)
+ if ((dev = __dev_get_by_name(net, ifname)) == NULL)
return -ENODEV;
if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
return -EINVAL;
@@ -596,8 +576,9 @@ static int bind_mcastif_addr(struct sock
struct net_device *dev;
__be32 addr;
struct sockaddr_in sin;
+ struct net *net = sock_net(sock->sk);
- if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL)
+ if ((dev = __dev_get_by_name(net, ifname)) == NULL)
return -ENODEV;
addr = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
@@ -619,19 +600,19 @@ static int bind_mcastif_addr(struct sock
/*
* Set up sending multicast socket over UDP
*/
-static struct socket * make_send_sock(void)
+static struct socket * make_send_sock(struct net *net)
{
struct socket *sock;
int result;
- /* First create a socket */
- result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
+ /* First create a socket in current netns */
+ result = sock_create(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
if (result < 0) {
pr_err("Error during creation of socket; terminating\n");
return ERR_PTR(result);
}
- result = set_mcast_if(sock->sk, ip_vs_master_mcast_ifn);
+ result = set_mcast_if(sock->sk, net->ipvs->master_mcast_ifn);
if (result < 0) {
pr_err("Error setting outbound mcast interface\n");
goto error;
@@ -640,13 +621,14 @@ static struct socket * make_send_sock(vo
set_mcast_loop(sock->sk, 0);
set_mcast_ttl(sock->sk, 1);
- result = bind_mcastif_addr(sock, ip_vs_master_mcast_ifn);
+ result = bind_mcastif_addr(sock, net->ipvs->master_mcast_ifn);
if (result < 0) {
pr_err("Error binding address of the mcast interface\n");
goto error;
}
- result = sock->ops->connect(sock, (struct sockaddr *) &mcast_addr,
+ result = sock->ops->connect(sock,
+ (struct sockaddr *) &net->ipvs->sync_mcast_addr,
sizeof(struct sockaddr), 0);
if (result < 0) {
pr_err("Error connecting to the multicast addr\n");
@@ -664,13 +646,13 @@ static struct socket * make_send_sock(vo
/*
* Set up receiving multicast socket over UDP
*/
-static struct socket * make_receive_sock(void)
+static struct socket * make_receive_sock(struct net *net)
{
struct socket *sock;
int result;
- /* First create a socket */
- result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
+ /* First create a socket in current netns */
+ result = sock_create(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
if (result < 0) {
pr_err("Error during creation of socket; terminating\n");
return ERR_PTR(result);
@@ -679,7 +661,8 @@ static struct socket * make_receive_sock
/* it is equivalent to the REUSEADDR option in user-space */
sock->sk->sk_reuse = 1;
- result = sock->ops->bind(sock, (struct sockaddr *) &mcast_addr,
+ result = sock->ops->bind(sock,
+ (struct sockaddr *) &net->ipvs->sync_mcast_addr,
sizeof(struct sockaddr));
if (result < 0) {
pr_err("Error binding to the multicast addr\n");
@@ -688,8 +671,8 @@ static struct socket * make_receive_sock
/* join the multicast group */
result = join_mcast_group(sock->sk,
- (struct in_addr *) &mcast_addr.sin_addr,
- ip_vs_backup_mcast_ifn);
+ (struct in_addr *) &net->ipvs->sync_mcast_addr.sin_addr,
+ net->ipvs->backup_mcast_ifn);
if (result < 0) {
pr_err("Error joining to the multicast group\n");
goto error;
@@ -764,16 +747,17 @@ static int sync_thread_master(void *data
pr_info("sync thread started: state = MASTER, mcast_ifn = %s, "
"syncid = %d\n",
- ip_vs_master_mcast_ifn, ip_vs_master_syncid);
+ tinfo->net->ipvs->master_mcast_ifn,
+ tinfo->net->ipvs->master_syncid);
while (!kthread_should_stop()) {
- while ((sb = sb_dequeue())) {
+ while ((sb = sb_dequeue(tinfo->net))) {
ip_vs_send_sync_msg(tinfo->sock, sb->mesg);
ip_vs_sync_buff_release(sb);
}
/* check if entries stay in curr_sb for 2 seconds */
- sb = get_curr_sync_buff(2 * HZ);
+ sb = get_curr_sync_buff(tinfo->net, 2 * HZ);
if (sb) {
ip_vs_send_sync_msg(tinfo->sock, sb->mesg);
ip_vs_sync_buff_release(sb);
@@ -783,12 +767,12 @@ static int sync_thread_master(void *data
}
/* clean up the sync_buff queue */
- while ((sb=sb_dequeue())) {
+ while ((sb=sb_dequeue(tinfo->net))) {
ip_vs_sync_buff_release(sb);
}
/* clean up the current sync_buff */
- if ((sb = get_curr_sync_buff(0))) {
+ if ((sb = get_curr_sync_buff(tinfo->net, 0))) {
ip_vs_sync_buff_release(sb);
}
@@ -807,7 +791,8 @@ static int sync_thread_backup(void *data
pr_info("sync thread started: state = BACKUP, mcast_ifn = %s, "
"syncid = %d\n",
- ip_vs_backup_mcast_ifn, ip_vs_backup_syncid);
+ tinfo->net->ipvs->backup_mcast_ifn,
+ tinfo->net->ipvs->backup_syncid);
while (!kthread_should_stop()) {
wait_event_interruptible(*sk_sleep(tinfo->sock->sk),
@@ -817,16 +802,15 @@ static int sync_thread_backup(void *data
/* do we have data now? */
while (!skb_queue_empty(&(tinfo->sock->sk->sk_receive_queue))) {
len = ip_vs_receive(tinfo->sock, tinfo->buf,
- sync_recv_mesg_maxlen);
+ tinfo->net->ipvs->sync_recv_mesg_maxlen);
if (len <= 0) {
pr_err("receiving message error\n");
break;
}
-
- /* disable bottom half, because it accesses the data
+ /* disable bottom half per netns, because it accesses the data
shared by softirq while getting/creating conns */
local_bh_disable();
- ip_vs_process_message(tinfo->buf, len);
+ ip_vs_process_message(tinfo->net, tinfo->buf, len);
local_bh_enable();
}
}
@@ -840,41 +824,43 @@ static int sync_thread_backup(void *data
}
-int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)
+int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid)
{
struct ip_vs_sync_thread_data *tinfo;
struct task_struct **realtask, *task;
struct socket *sock;
+ struct netns_ipvs *ipvs = net->ipvs;
char *name, *buf = NULL;
int (*threadfn)(void *data);
int result = -ENOMEM;
- IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
+ IP_VS_DBG(7, "%s(): pid %d inc:%d\n", __func__, task_pid_nr(current),
+ ipvs->inc);
IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n",
sizeof(struct ip_vs_sync_conn));
if (state == IP_VS_STATE_MASTER) {
- if (sync_master_thread)
+ if (ipvs->sync_master_thread)
return -EEXIST;
- strlcpy(ip_vs_master_mcast_ifn, mcast_ifn,
- sizeof(ip_vs_master_mcast_ifn));
- ip_vs_master_syncid = syncid;
- realtask = &sync_master_thread;
- name = "ipvs_syncmaster";
+ strlcpy(ipvs->master_mcast_ifn, mcast_ifn,
+ sizeof(ipvs->master_mcast_ifn));
+ ipvs->master_syncid = syncid;
+ realtask = &ipvs->sync_master_thread;
+ name = "ipvs_master:%d";
threadfn = sync_thread_master;
- sock = make_send_sock();
+ sock = make_send_sock(net);
} else if (state == IP_VS_STATE_BACKUP) {
- if (sync_backup_thread)
+ if (ipvs->sync_backup_thread)
return -EEXIST;
- strlcpy(ip_vs_backup_mcast_ifn, mcast_ifn,
- sizeof(ip_vs_backup_mcast_ifn));
- ip_vs_backup_syncid = syncid;
- realtask = &sync_backup_thread;
- name = "ipvs_syncbackup";
+ strlcpy(ipvs->backup_mcast_ifn, mcast_ifn,
+ sizeof(ipvs->backup_mcast_ifn));
+ ipvs->backup_syncid = syncid;
+ realtask = &ipvs->sync_backup_thread;
+ name = "ipvs_backup:%d";
threadfn = sync_thread_backup;
- sock = make_receive_sock();
+ sock = make_receive_sock(net);
} else {
return -EINVAL;
}
@@ -884,9 +870,9 @@ int start_sync_thread(int state, char *m
goto out;
}
- set_sync_mesg_maxlen(state);
+ set_sync_mesg_maxlen(net, state);
if (state == IP_VS_STATE_BACKUP) {
- buf = kmalloc(sync_recv_mesg_maxlen, GFP_KERNEL);
+ buf = kmalloc(ipvs->sync_recv_mesg_maxlen, GFP_KERNEL);
if (!buf)
goto outsocket;
}
@@ -897,16 +883,17 @@ int start_sync_thread(int state, char *m
tinfo->sock = sock;
tinfo->buf = buf;
+ tinfo->net = net;
- task = kthread_run(threadfn, tinfo, name);
+ task = kthread_run(threadfn, tinfo, name, ipvs->inc);
if (IS_ERR(task)) {
result = PTR_ERR(task);
goto outtinfo;
}
-
+ IP_VS_DBG(1, "kthread %s started (%d)\n", name, task->pid);
/* mark as active */
*realtask = task;
- ip_vs_sync_state |= state;
+ ipvs->sync_state |= state;
/* increase the module use count */
ip_vs_use_count_inc();
@@ -924,16 +911,19 @@ out:
}
-int stop_sync_thread(int state)
+int stop_sync_thread(struct net *net, int state)
{
+ struct netns_ipvs *ipvs = net->ipvs;
+
IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
if (state == IP_VS_STATE_MASTER) {
- if (!sync_master_thread)
+ if (!ipvs->sync_master_thread)
return -ESRCH;
- pr_info("stopping master sync thread %d ...\n",
- task_pid_nr(sync_master_thread));
+ pr_info("stopping master sync thread %d inc:%d...\n",
+ task_pid_nr(ipvs->sync_master_thread),
+ ipvs->inc);
/*
* The lock synchronizes with sb_queue_tail(), so that we don't
@@ -941,21 +931,22 @@ int stop_sync_thread(int state)
* progress of stopping the master sync daemon.
*/
- spin_lock_bh(&ip_vs_sync_lock);
- ip_vs_sync_state &= ~IP_VS_STATE_MASTER;
- spin_unlock_bh(&ip_vs_sync_lock);
- kthread_stop(sync_master_thread);
- sync_master_thread = NULL;
+ spin_lock_bh(&ipvs->sync_lock);
+ ipvs->sync_state &= ~IP_VS_STATE_MASTER;
+ spin_unlock_bh(&ipvs->sync_lock);
+ kthread_stop(ipvs->sync_master_thread);
+ ipvs->sync_master_thread = NULL;
} else if (state == IP_VS_STATE_BACKUP) {
- if (!sync_backup_thread)
+ if (!ipvs->sync_backup_thread)
return -ESRCH;
- pr_info("stopping backup sync thread %d ...\n",
- task_pid_nr(sync_backup_thread));
-
- ip_vs_sync_state &= ~IP_VS_STATE_BACKUP;
- kthread_stop(sync_backup_thread);
- sync_backup_thread = NULL;
+ pr_info("stopping backup sync thread %d inc:%d...\n",
+ task_pid_nr(ipvs->sync_backup_thread),
+ ipvs->inc);
+
+ ipvs->sync_state &= ~IP_VS_STATE_BACKUP;
+ kthread_stop(ipvs->sync_backup_thread);
+ ipvs->sync_backup_thread = NULL;
} else {
return -EINVAL;
}
@@ -965,3 +956,41 @@ int stop_sync_thread(int state)
return 0;
}
+
+/*
+ * Initialize data struct for each netns
+ */
+static int __net_init __ip_vs_sync_init(struct net *net)
+{
+ struct netns_ipvs *ipvs = net->ipvs;
+ INIT_LIST_HEAD(&ipvs->sync_queue);
+ spin_lock_init(&ipvs->sync_lock);
+ spin_lock_init(&ipvs->sync_buff_lock);
+
+ ipvs->sync_mcast_addr.sin_family = AF_INET;
+ ipvs->sync_mcast_addr.sin_port = cpu_to_be16(IP_VS_SYNC_PORT);
+ ipvs->sync_mcast_addr.sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP);
+ return 0;
+}
+
+static void __ip_vs_sync_cleanup(struct net *net)
+{
+ stop_sync_thread(net, net->ipvs->sync_state &
+ (IP_VS_STATE_MASTER | IP_VS_STATE_BACKUP));
+ return;
+}
+static struct pernet_operations ipvs_sync_ops = {
+ .init = __ip_vs_sync_init,
+ .exit = __ip_vs_sync_cleanup,
+};
+
+
+int __init ip_vs_sync_init(void)
+{
+ return register_pernet_subsys(&ipvs_sync_ops);
+}
+
+void __exit ip_vs_sync_cleanup(void)
+{
+ unregister_pernet_subsys(&ipvs_sync_ops);
+}
Index: lvs-test-2.6/net/netfilter/ipvs/ip_vs_xmit.c
===================================================================
--- lvs-test-2.6.orig/net/netfilter/ipvs/ip_vs_xmit.c 2010-10-22 21:38:57.000000000 +0200
+++ lvs-test-2.6/net/netfilter/ipvs/ip_vs_xmit.c 2010-10-22 21:39:08.000000000 +0200
@@ -522,13 +522,16 @@ ip_vs_nat_xmit(struct sk_buff *skb, stru
/* check if it is a connection of no-client-port */
if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
__be16 _pt, *p;
+ struct net *net;
p = skb_header_pointer(skb, iph->ihl*4, sizeof(_pt), &_pt);
if (p == NULL)
goto tx_error;
- ip_vs_conn_fill_cport(cp, *p);
+ net = ipvs_skbnet(skb);
+ ip_vs_conn_fill_cport(net, cp, *p);
IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
}
+ IP_VS_DBG(10, "%s() dst:%x\n", __func__, iph->daddr);
if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
RT_TOS(iph->tos), 1|2|4)))
goto tx_error_icmp;
@@ -634,11 +637,13 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, s
/* check if it is a connection of no-client-port */
if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
__be16 _pt, *p;
+ struct net *net;
p = skb_header_pointer(skb, sizeof(struct ipv6hdr),
sizeof(_pt), &_pt);
if (p == NULL)
goto tx_error;
- ip_vs_conn_fill_cport(cp, *p);
+ net = ipvs_skbnet(skb);
+ ip_vs_conn_fill_cport(net, cp, *p);
IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
}
^ permalink raw reply [flat|nested] 14+ messages in thread* Re: [rfc v2 00/10] ipvs network name space (netns) aware
2010-10-22 20:09 [rfc v2 00/10] ipvs network name space (netns) aware Simon Horman
` (9 preceding siblings ...)
2010-10-22 20:09 ` [rfc v2 10/10] ipvs network name space aware: sync and xmit Simon Horman
@ 2010-10-23 8:44 ` Hans Schillstrom
2010-10-23 9:04 ` Julian Anastasov
11 siblings, 0 replies; 14+ messages in thread
From: Hans Schillstrom @ 2010-10-23 8:44 UTC (permalink / raw)
To: Simon Horman
Cc: lvs-devel, netdev, netfilter-devel, Hans Schillstrom,
Julian Anastasov, Daniel Lezcano, Wensong Zhang
Hi Simon
Thanx a lot. for your work, I will use this a the new base
On Friday, October 22, 2010 22:09:34 Simon Horman wrote:
> Hi Hans,
>
> this is a re-base of your patch-set against the current nf-next-2.6 tree,
> which includes all the changes currently queued for 2.6.37-rc1 and nothing
> else.
>
> I also removed the BUG_ON() statements and incorported various
> suggestions that were made in response to your original post.
>
> It is compile tested only (partly because I am in an areoplane).
>
> I have not re-split the patches into logical units.
> Having worked with these patches a bit, I really think
> that split needs to occur.
Daniel will help me with that, i.e. we will split the work between us.
The plan is to do it in small steps
>
> For the benefit of others, your original cover email is below,
> updated as appropriate.
>
> -----
>
> This patch series adds network name space (netns) support to the LVS.
>
> REVISION
>
> This is version 2
>
> OVERVIEW
>
> The patch doesn't remove or add any functionality except for netns.
> For users that don't use network name space (netns) this patch is
> completely transparent.
>
> No it's possible to run LVS in a Linux container (see lxc-tools)
> i.e. a light weight virtualization. For example it's possible to run
> one or several lvs on a real server in their own network name spaces.
> >From the LVS point of view it looks like it runs on it's own machine.
>
> IMPLEMENTATION
> Basic requirements for netns awareness
> - Global variables has to be moved to dyn. allocated memory.
>
> Most global variables now resides in a struct ipvs { } in netns/ip_vs.h.
> What is moved and what is not ?
>
> Some cache aligned locks are still in global, module init params and some debug_level.
>
> Algorithm files they are untouched.
>
> QUESTIONS
> Drop rate in ip_vs_ctl per netns or grand total ?
> Should more lock variables be moved (or less) ?
>
> Include files,
> A new file added include/net/netns/ip_vs.h containg all netns specific data.
> include/net/net_namespce.h, pointer to "struct ipvs" added.
> include/net/ip_vs.h a new struct added, and many prototypes changed.
>
> * ip_vs_core.c
> All netns init origins from this file - ip_vs_init()
>
> * ip_vs_conn.c
> Lock array for conn table is kept due to performance,
> (or am I wrong here ?).
> "static struct ip_vs_aligned_lock
> __ip_vs_conntbl_lock_array[CT_LOCKARRAY_SIZE] __cacheline_aligned;"
>
> * ip_vs_ctl.c
> drop_ rate is still global
>
> TESTING
> This patch have been running for a month now with three LVS/machine
> one in root name-space and two in other name-space.
> Both IPv4 & IPv6 have been tested in all three modes DR/TUN and NAT
> Only a limited set of algos have been used (read rr).
>
> Backup have been there all the time and a switch has been performed a couple of times.
>
> Not tested yet:
> Drop level, DOS, schedulers, performance ....
> Netns exit after usage of LVS (due to a bug in netdev/ipip somewhere tunl0 and
>
> --
> To unsubscribe from this list: send the line "unsubscribe lvs-devel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
--
Mvh
Hasse Schillstrom <hans@schillstrom.com>
^ permalink raw reply [flat|nested] 14+ messages in thread* Re: [rfc v2 00/10] ipvs network name space (netns) aware
2010-10-22 20:09 [rfc v2 00/10] ipvs network name space (netns) aware Simon Horman
` (10 preceding siblings ...)
2010-10-23 8:44 ` [rfc v2 00/10] ipvs network name space (netns) aware Hans Schillstrom
@ 2010-10-23 9:04 ` Julian Anastasov
11 siblings, 0 replies; 14+ messages in thread
From: Julian Anastasov @ 2010-10-23 9:04 UTC (permalink / raw)
To: Simon Horman
Cc: lvs-devel, netdev, netfilter-devel, Hans Schillstrom,
Daniel Lezcano, Wensong Zhang
Hello,
On Fri, 22 Oct 2010, Simon Horman wrote:
> Hi Hans,
>
> this is a re-base of your patch-set against the current nf-next-2.6 tree,
> which includes all the changes currently queued for 2.6.37-rc1 and nothing
> else.
>
> I also removed the BUG_ON() statements and incorported various
> suggestions that were made in response to your original post.
>
> It is compile tested only (partly because I am in an areoplane).
>
> I have not re-split the patches into logical units.
> Having worked with these patches a bit, I really think
> that split needs to occur.
>
> For the benefit of others, your original cover email is below,
> updated as appropriate.
>
> -----
>
> This patch series adds network name space (netns) support to the LVS.
>
> REVISION
>
> This is version 2
>
> OVERVIEW
>
> The patch doesn't remove or add any functionality except for netns.
> For users that don't use network name space (netns) this patch is
> completely transparent.
>
> No it's possible to run LVS in a Linux container (see lxc-tools)
> i.e. a light weight virtualization. For example it's possible to run
> one or several lvs on a real server in their own network name spaces.
>> From the LVS point of view it looks like it runs on it's own machine.
>
> IMPLEMENTATION
> Basic requirements for netns awareness
> - Global variables has to be moved to dyn. allocated memory.
>
> Most global variables now resides in a struct ipvs { } in netns/ip_vs.h.
> What is moved and what is not ?
>
> Some cache aligned locks are still in global, module init params and some debug_level.
>
> Algorithm files they are untouched.
>
> QUESTIONS
> Drop rate in ip_vs_ctl per netns or grand total ?
If different containers can have different memory limit
we should restrict their memory with per-ns limits
and variables, i.e. DoS logic per-ns.
> Should more lock variables be moved (or less) ?
>
> Include files,
> A new file added include/net/netns/ip_vs.h containg all netns specific data.
> include/net/net_namespce.h, pointer to "struct ipvs" added.
> include/net/ip_vs.h a new struct added, and many prototypes changed.
>
> * ip_vs_core.c
> All netns init origins from this file - ip_vs_init()
>
> * ip_vs_conn.c
> Lock array for conn table is kept due to performance,
> (or am I wrong here ?).
> "static struct ip_vs_aligned_lock
> __ip_vs_conntbl_lock_array[CT_LOCKARRAY_SIZE] __cacheline_aligned;"
>
> * ip_vs_ctl.c
> drop_ rate is still global
May be should be per-ns
> TESTING
> This patch have been running for a month now with three LVS/machine
> one in root name-space and two in other name-space.
> Both IPv4 & IPv6 have been tested in all three modes DR/TUN and NAT
> Only a limited set of algos have been used (read rr).
>
> Backup have been there all the time and a switch has been performed a couple of times.
>
> Not tested yet:
> Drop level, DOS, schedulers, performance ....
> Netns exit after usage of LVS (due to a bug in netdev/ipip somewhere tunl0 and
Main points:
- May be we have to use global table for connections and to
filter by cp->net
- We have to use ip_vs_proto_data_get in many places where
pp = ip_vs_proto_get(protocol) was used. Then when pp
is needed we can use pd->pp->XXX
- tcp_timeout_change should work with the new struct ip_vs_proto_data
so that tcp_state_table will go to pd->state_table
and set_tcp_state will get pd instead of pp
- ipvs_skbnet must be used only for traffic after the
check for !skb_dst(skb)
Other notes:
rfc v2 01/10:
set_state_timeout: infrastructure is there but never added
to ipvsadm. If we keep it, it should be per-ns
Functions that can use cp->net and do not need argument:
ip_vs_conn_fill_cport
ip_vs_tcp_conn_listen
ip_vs_bind_app?
ip_vs_unbind_app
rfc v2 02/10
rfc v2 03/10
ip_vs_conn_hash: use cp->net
ip_vs_conn_unhash: use cp->net
ip_vs_conn_fill_param_proto: use ipvs_skbnet(skb)
ip_vs_conn_fill_cport: use cp->net
ip_vs_try_bind_dest: use cp->net
ip_vs_check_template: use ct->net
ip_vs_conn_new: assign cp->net from p->net early before
using it for ip_vs_bind_app, etc
Why not using global ip_vs_conn_tab[], we have cp->net
rfc v2 04/10
ip_vs_in_stats: use cp->net
ip_vs_out_stats: use cp->net
ip_vs_conn_stats: use cp->net
ip_vs_sched_persist: use ipvs_skbnet
ip_vs_schedule: use ipvs_skbnet
handle_response_icmp: use ipvs_skbnet
handle_response: use cp->net
ip_vs_out: assign net with ipvs_skbnet after
'if (unlikely(!skb_dst(skb)))' check
ip_vs_in: assign net with ipvs_skbnet before if-block for
ip_vs_in_icmp_v6 after skb_dst check
ip_vs_sync_conn: use cp->net
rfc v2 05/10
ipvs_skbnet will be used only from skbs containing traffic,
i.e. replace dev_net(skb->dev) with ipvs_skbnet(skb)
when used for traffic
rfc v2 06/10
sysctl_drop_entry is per net but update_defense_level
changes global ip_vs_dropentry?
ip_vs_protocol_timeout_change: where is net? It must call
pp->timeout_change for every struct ip_vs_proto_data
ip_vs_genl_dump_services: DO NOT USE ipvs_skbnet, may be
from skb->sk? sock_net(skb->sk) ?
ip_vs_genl_dump_dests: DO NOT USE ipvs_skbnet
ip_vs_genl_set_cmd: DO NOT USE ipvs_skbnet
ip_vs_genl_get_cmd: DO NOT USE ipvs_skbnet
rfc v2 07/10
rfc v2 08/10
ip_vs_ftp_out: use ipvs_skbnet
ip_vs_ftp_in: use ipvs_skbnet
rfc v2 09/10
register_ip_vs_proto_netns result is not checked in
__ip_vs_protocol_init
ah_esp_conn_in_get: use ipvs_skbnet
ah_esp_conn_out_get: use ipvs_skbnet
sctp_conn_schedule: use ipvs_skbnet
set_sctp_state: use cp->net
sctp_app_conn_bind: use cp->net
tcp_conn_schedule: use ipvs_skbnet
set_tcp_state: use cp->net
tcp_app_conn_bind: use cp->net
ip_vs_tcp_conn_listen: use cp->net
udp_conn_schedule: use ipvs_skbnet
udp_app_conn_bind: use cp->net
udp_state_transition: use cp->net
rfc v2 10/10
ip_vs_sync_conn: use cp->net
ip_vs_nat_xmit*: ip_vs_conn_fill_cport should use cp->net
Regards
--
Julian Anastasov <ja@ssi.bg>
^ permalink raw reply [flat|nested] 14+ messages in thread