* [patch v2 ] IPVS: Conditionally include sysctl code
@ 2011-03-05 23:45 Simon Horman
2011-03-05 23:45 ` [PATCH 01/18] ipvs: move struct netns_ipvs Simon Horman
` (17 more replies)
0 siblings, 18 replies; 26+ messages in thread
From: Simon Horman @ 2011-03-05 23:45 UTC (permalink / raw)
To: netdev, netfilter-devel, netfilter, lvs-devel
Cc: Julian Anastasov, Hans Schillstrom
In the case where CONFIG_SYSCTL is undefined related
control code in IPVS is unnecessary. This patch series
attempts to make the inclusion of all such code
conditional on CONFIG_SYSCTL.
The first 4 patches in this series is Julian's series
"[PATCH 0/4] ipvs: changes for stats" which are required
for many of the subsequent changes.
The changes are available in the git repository at:
git://git.kernel.org/pub/scm/linux/kernel/git/horms/lvs-test-2.6.git config-sysctl2
At this point I would like to merge both Julian's patches
and my changes.
If this series is successful I will examine what if any similar
changes are appropriate for the case where CONFIG_PROC_FS is undefined.
Julian Anastasov (4):
ipvs: move struct netns_ipvs
ipvs: reorganize tot_stats
ipvs: zero percpu stats
ipvs: remove unused seqcount stats
Simon Horman (14):
IPVS: Add ip_vs_route_me_harder()
IPVS: Add sysctl_snat_reroute()
IPVS: Add sysctl_nat_icmp_send()
IPVS: Add {sysctl_sync_threshold,period}()
IPVS: Add sysctl_sync_ver()
IPVS: Add sysctl_expire_nodest_conn()
IPVS: Add expire_quiescent_template()
IPVS: Conditinally use sysctl_lblc{r}_expiration
IPVS: ip_vs_todrop() becomes a noop when CONFIG_SYSCTL is undefined
IPVS: Conditional ip_vs_conntrack_enabled()
IPVS: Minimise ip_vs_leave when CONFIG_SYSCTL is undefined
IPVS: Conditionally define and use ip_vs_lblc{r}_table
IPVS: Add __ip_vs_control_{init,cleanup}_sysctl()
IPVS: Conditionally include sysctl members of struct netns_ipvs
include/net/ip_vs.h | 189 ++++++++++++++++++++++++++++++++++---
include/net/net_namespace.h | 2 +-
include/net/netns/ip_vs.h | 143 ----------------------------
net/netfilter/ipvs/ip_vs_conn.c | 13 ++-
net/netfilter/ipvs/ip_vs_core.c | 104 +++++++++++++--------
net/netfilter/ipvs/ip_vs_ctl.c | 158 ++++++++++++++++++++-----------
net/netfilter/ipvs/ip_vs_est.c | 3 +-
net/netfilter/ipvs/ip_vs_lblc.c | 31 +++++--
net/netfilter/ipvs/ip_vs_lblcr.c | 35 +++++--
net/netfilter/ipvs/ip_vs_sync.c | 8 +-
10 files changed, 404 insertions(+), 282 deletions(-)
delete mode 100644 include/net/netns/ip_vs.h
^ permalink raw reply [flat|nested] 26+ messages in thread
* [PATCH 01/18] ipvs: move struct netns_ipvs
2011-03-05 23:45 [patch v2 ] IPVS: Conditionally include sysctl code Simon Horman
@ 2011-03-05 23:45 ` Simon Horman
2011-03-05 23:45 ` [PATCH 02/18] ipvs: reorganize tot_stats Simon Horman
` (16 subsequent siblings)
17 siblings, 0 replies; 26+ messages in thread
From: Simon Horman @ 2011-03-05 23:45 UTC (permalink / raw)
To: netdev, netfilter-devel, netfilter, lvs-devel
Cc: Julian Anastasov, Hans Schillstrom, Simon Horman
From: Julian Anastasov <ja@ssi.bg>
Remove include/net/netns/ip_vs.h because it depends on
structures from include/net/ip_vs.h. As ipvs is pointer in
struct net it is better to move struct netns_ipvs into
include/net/ip_vs.h, so that we can easily use other structures
in struct netns_ipvs.
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
include/net/ip_vs.h | 122 ++++++++++++++++++++++++++++++++++++
include/net/net_namespace.h | 2 +-
include/net/netns/ip_vs.h | 143 -------------------------------------------
3 files changed, 123 insertions(+), 144 deletions(-)
delete mode 100644 include/net/netns/ip_vs.h
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index e74da41e..7280661 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -803,6 +803,128 @@ struct ip_vs_app {
void (*timeout_change)(struct ip_vs_app *app, int flags);
};
+/* IPVS in network namespace */
+struct netns_ipvs {
+ int gen; /* Generation */
+ /*
+ * Hash table: for real service lookups
+ */
+ #define IP_VS_RTAB_BITS 4
+ #define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
+ #define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
+
+ struct list_head rs_table[IP_VS_RTAB_SIZE];
+ /* ip_vs_app */
+ struct list_head app_list;
+ struct mutex app_mutex;
+ struct lock_class_key app_key; /* mutex debuging */
+
+ /* ip_vs_proto */
+ #define IP_VS_PROTO_TAB_SIZE 32 /* must be power of 2 */
+ struct ip_vs_proto_data *proto_data_table[IP_VS_PROTO_TAB_SIZE];
+ /* ip_vs_proto_tcp */
+#ifdef CONFIG_IP_VS_PROTO_TCP
+ #define TCP_APP_TAB_BITS 4
+ #define TCP_APP_TAB_SIZE (1 << TCP_APP_TAB_BITS)
+ #define TCP_APP_TAB_MASK (TCP_APP_TAB_SIZE - 1)
+ struct list_head tcp_apps[TCP_APP_TAB_SIZE];
+ spinlock_t tcp_app_lock;
+#endif
+ /* ip_vs_proto_udp */
+#ifdef CONFIG_IP_VS_PROTO_UDP
+ #define UDP_APP_TAB_BITS 4
+ #define UDP_APP_TAB_SIZE (1 << UDP_APP_TAB_BITS)
+ #define UDP_APP_TAB_MASK (UDP_APP_TAB_SIZE - 1)
+ struct list_head udp_apps[UDP_APP_TAB_SIZE];
+ spinlock_t udp_app_lock;
+#endif
+ /* ip_vs_proto_sctp */
+#ifdef CONFIG_IP_VS_PROTO_SCTP
+ #define SCTP_APP_TAB_BITS 4
+ #define SCTP_APP_TAB_SIZE (1 << SCTP_APP_TAB_BITS)
+ #define SCTP_APP_TAB_MASK (SCTP_APP_TAB_SIZE - 1)
+ /* Hash table for SCTP application incarnations */
+ struct list_head sctp_apps[SCTP_APP_TAB_SIZE];
+ spinlock_t sctp_app_lock;
+#endif
+ /* ip_vs_conn */
+ atomic_t conn_count; /* connection counter */
+
+ /* ip_vs_ctl */
+ struct ip_vs_stats *tot_stats; /* Statistics & est. */
+ struct ip_vs_cpu_stats __percpu *cpustats; /* Stats per cpu */
+ seqcount_t *ustats_seq; /* u64 read retry */
+
+ int num_services; /* no of virtual services */
+ /* 1/rate drop and drop-entry variables */
+ struct delayed_work defense_work; /* Work handler */
+ int drop_rate;
+ int drop_counter;
+ atomic_t dropentry;
+ /* locks in ctl.c */
+ spinlock_t dropentry_lock; /* drop entry handling */
+ spinlock_t droppacket_lock; /* drop packet handling */
+ spinlock_t securetcp_lock; /* state and timeout tables */
+ rwlock_t rs_lock; /* real services table */
+ /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
+ struct lock_class_key ctl_key; /* ctl_mutex debuging */
+ /* Trash for destinations */
+ struct list_head dest_trash;
+ /* Service counters */
+ atomic_t ftpsvc_counter;
+ atomic_t nullsvc_counter;
+
+ /* sys-ctl struct */
+ struct ctl_table_header *sysctl_hdr;
+ struct ctl_table *sysctl_tbl;
+ /* sysctl variables */
+ int sysctl_amemthresh;
+ int sysctl_am_droprate;
+ int sysctl_drop_entry;
+ int sysctl_drop_packet;
+ int sysctl_secure_tcp;
+#ifdef CONFIG_IP_VS_NFCT
+ int sysctl_conntrack;
+#endif
+ int sysctl_snat_reroute;
+ int sysctl_sync_ver;
+ int sysctl_cache_bypass;
+ int sysctl_expire_nodest_conn;
+ int sysctl_expire_quiescent_template;
+ int sysctl_sync_threshold[2];
+ int sysctl_nat_icmp_send;
+
+ /* ip_vs_lblc */
+ int sysctl_lblc_expiration;
+ struct ctl_table_header *lblc_ctl_header;
+ struct ctl_table *lblc_ctl_table;
+ /* ip_vs_lblcr */
+ int sysctl_lblcr_expiration;
+ struct ctl_table_header *lblcr_ctl_header;
+ struct ctl_table *lblcr_ctl_table;
+ /* ip_vs_est */
+ struct list_head est_list; /* estimator list */
+ spinlock_t est_lock;
+ struct timer_list est_timer; /* Estimation timer */
+ /* ip_vs_sync */
+ struct list_head sync_queue;
+ spinlock_t sync_lock;
+ struct ip_vs_sync_buff *sync_buff;
+ spinlock_t sync_buff_lock;
+ struct sockaddr_in sync_mcast_addr;
+ struct task_struct *master_thread;
+ struct task_struct *backup_thread;
+ int send_mesg_maxlen;
+ int recv_mesg_maxlen;
+ volatile int sync_state;
+ volatile int master_syncid;
+ volatile int backup_syncid;
+ /* multicast interface name */
+ char master_mcast_ifn[IP_VS_IFNAME_MAXLEN];
+ char backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
+ /* net name space ptr */
+ struct net *net; /* Needed by timer routines */
+};
/*
* IPVS core functions
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index b3b4a34..3ae4919 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -20,7 +20,6 @@
#include <net/netns/conntrack.h>
#endif
#include <net/netns/xfrm.h>
-#include <net/netns/ip_vs.h>
struct proc_dir_entry;
struct net_device;
@@ -28,6 +27,7 @@ struct sock;
struct ctl_table_header;
struct net_generic;
struct sock;
+struct netns_ipvs;
#define NETDEV_HASHBITS 8
diff --git a/include/net/netns/ip_vs.h b/include/net/netns/ip_vs.h
deleted file mode 100644
index 259ebac..0000000
--- a/include/net/netns/ip_vs.h
+++ /dev/null
@@ -1,143 +0,0 @@
-/*
- * IP Virtual Server
- * Data structure for network namspace
- *
- */
-
-#ifndef IP_VS_H_
-#define IP_VS_H_
-
-#include <linux/list.h>
-#include <linux/mutex.h>
-#include <linux/list_nulls.h>
-#include <linux/ip_vs.h>
-#include <asm/atomic.h>
-#include <linux/in.h>
-
-struct ip_vs_stats;
-struct ip_vs_sync_buff;
-struct ctl_table_header;
-
-struct netns_ipvs {
- int gen; /* Generation */
- /*
- * Hash table: for real service lookups
- */
- #define IP_VS_RTAB_BITS 4
- #define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
- #define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
-
- struct list_head rs_table[IP_VS_RTAB_SIZE];
- /* ip_vs_app */
- struct list_head app_list;
- struct mutex app_mutex;
- struct lock_class_key app_key; /* mutex debuging */
-
- /* ip_vs_proto */
- #define IP_VS_PROTO_TAB_SIZE 32 /* must be power of 2 */
- struct ip_vs_proto_data *proto_data_table[IP_VS_PROTO_TAB_SIZE];
- /* ip_vs_proto_tcp */
-#ifdef CONFIG_IP_VS_PROTO_TCP
- #define TCP_APP_TAB_BITS 4
- #define TCP_APP_TAB_SIZE (1 << TCP_APP_TAB_BITS)
- #define TCP_APP_TAB_MASK (TCP_APP_TAB_SIZE - 1)
- struct list_head tcp_apps[TCP_APP_TAB_SIZE];
- spinlock_t tcp_app_lock;
-#endif
- /* ip_vs_proto_udp */
-#ifdef CONFIG_IP_VS_PROTO_UDP
- #define UDP_APP_TAB_BITS 4
- #define UDP_APP_TAB_SIZE (1 << UDP_APP_TAB_BITS)
- #define UDP_APP_TAB_MASK (UDP_APP_TAB_SIZE - 1)
- struct list_head udp_apps[UDP_APP_TAB_SIZE];
- spinlock_t udp_app_lock;
-#endif
- /* ip_vs_proto_sctp */
-#ifdef CONFIG_IP_VS_PROTO_SCTP
- #define SCTP_APP_TAB_BITS 4
- #define SCTP_APP_TAB_SIZE (1 << SCTP_APP_TAB_BITS)
- #define SCTP_APP_TAB_MASK (SCTP_APP_TAB_SIZE - 1)
- /* Hash table for SCTP application incarnations */
- struct list_head sctp_apps[SCTP_APP_TAB_SIZE];
- spinlock_t sctp_app_lock;
-#endif
- /* ip_vs_conn */
- atomic_t conn_count; /* connection counter */
-
- /* ip_vs_ctl */
- struct ip_vs_stats *tot_stats; /* Statistics & est. */
- struct ip_vs_cpu_stats __percpu *cpustats; /* Stats per cpu */
- seqcount_t *ustats_seq; /* u64 read retry */
-
- int num_services; /* no of virtual services */
- /* 1/rate drop and drop-entry variables */
- struct delayed_work defense_work; /* Work handler */
- int drop_rate;
- int drop_counter;
- atomic_t dropentry;
- /* locks in ctl.c */
- spinlock_t dropentry_lock; /* drop entry handling */
- spinlock_t droppacket_lock; /* drop packet handling */
- spinlock_t securetcp_lock; /* state and timeout tables */
- rwlock_t rs_lock; /* real services table */
- /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
- struct lock_class_key ctl_key; /* ctl_mutex debuging */
- /* Trash for destinations */
- struct list_head dest_trash;
- /* Service counters */
- atomic_t ftpsvc_counter;
- atomic_t nullsvc_counter;
-
- /* sys-ctl struct */
- struct ctl_table_header *sysctl_hdr;
- struct ctl_table *sysctl_tbl;
- /* sysctl variables */
- int sysctl_amemthresh;
- int sysctl_am_droprate;
- int sysctl_drop_entry;
- int sysctl_drop_packet;
- int sysctl_secure_tcp;
-#ifdef CONFIG_IP_VS_NFCT
- int sysctl_conntrack;
-#endif
- int sysctl_snat_reroute;
- int sysctl_sync_ver;
- int sysctl_cache_bypass;
- int sysctl_expire_nodest_conn;
- int sysctl_expire_quiescent_template;
- int sysctl_sync_threshold[2];
- int sysctl_nat_icmp_send;
-
- /* ip_vs_lblc */
- int sysctl_lblc_expiration;
- struct ctl_table_header *lblc_ctl_header;
- struct ctl_table *lblc_ctl_table;
- /* ip_vs_lblcr */
- int sysctl_lblcr_expiration;
- struct ctl_table_header *lblcr_ctl_header;
- struct ctl_table *lblcr_ctl_table;
- /* ip_vs_est */
- struct list_head est_list; /* estimator list */
- spinlock_t est_lock;
- struct timer_list est_timer; /* Estimation timer */
- /* ip_vs_sync */
- struct list_head sync_queue;
- spinlock_t sync_lock;
- struct ip_vs_sync_buff *sync_buff;
- spinlock_t sync_buff_lock;
- struct sockaddr_in sync_mcast_addr;
- struct task_struct *master_thread;
- struct task_struct *backup_thread;
- int send_mesg_maxlen;
- int recv_mesg_maxlen;
- volatile int sync_state;
- volatile int master_syncid;
- volatile int backup_syncid;
- /* multicast interface name */
- char master_mcast_ifn[IP_VS_IFNAME_MAXLEN];
- char backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
- /* net name space ptr */
- struct net *net; /* Needed by timer routines */
-};
^ permalink raw reply related [flat|nested] 26+ messages in thread
* [PATCH 02/18] ipvs: reorganize tot_stats
2011-03-05 23:45 [patch v2 ] IPVS: Conditionally include sysctl code Simon Horman
2011-03-05 23:45 ` [PATCH 01/18] ipvs: move struct netns_ipvs Simon Horman
@ 2011-03-05 23:45 ` Simon Horman
2011-03-05 23:45 ` [PATCH 03/18] ipvs: zero percpu stats Simon Horman
` (15 subsequent siblings)
17 siblings, 0 replies; 26+ messages in thread
From: Simon Horman @ 2011-03-05 23:45 UTC (permalink / raw)
To: netdev, netfilter-devel, netfilter, lvs-devel
Cc: Julian Anastasov, Hans Schillstrom, Simon Horman
From: Julian Anastasov <ja@ssi.bg>
The global tot_stats contains cpustats field just like the
stats for dest and svc, so better use it to simplify the usage
in estimation_timer. As tot_stats is registered as estimator
we can remove the special ip_vs_read_cpu_stats call for
tot_stats. Fix ip_vs_read_cpu_stats to be called under
stats lock because it is still used as synchronization between
estimation timer and user context (the stats readers).
Also, make sure ip_vs_stats_percpu_show reads properly
the u64 stats from user context.
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
include/net/ip_vs.h | 3 +-
net/netfilter/ipvs/ip_vs_core.c | 6 ++--
net/netfilter/ipvs/ip_vs_ctl.c | 45 ++++++++++++++++++++------------------
net/netfilter/ipvs/ip_vs_est.c | 3 +-
4 files changed, 29 insertions(+), 28 deletions(-)
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 7280661..f17841d 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -851,8 +851,7 @@ struct netns_ipvs {
atomic_t conn_count; /* connection counter */
/* ip_vs_ctl */
- struct ip_vs_stats *tot_stats; /* Statistics & est. */
- struct ip_vs_cpu_stats __percpu *cpustats; /* Stats per cpu */
+ struct ip_vs_stats tot_stats; /* Statistics & est. */
seqcount_t *ustats_seq; /* u64 read retry */
int num_services; /* no of virtual services */
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 2d1f932..6f4940e 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -132,7 +132,7 @@ ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
s->ustats.inbytes += skb->len;
u64_stats_update_end(&s->syncp);
- s = this_cpu_ptr(ipvs->cpustats);
+ s = this_cpu_ptr(ipvs->tot_stats.cpustats);
s->ustats.inpkts++;
u64_stats_update_begin(&s->syncp);
s->ustats.inbytes += skb->len;
@@ -162,7 +162,7 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
s->ustats.outbytes += skb->len;
u64_stats_update_end(&s->syncp);
- s = this_cpu_ptr(ipvs->cpustats);
+ s = this_cpu_ptr(ipvs->tot_stats.cpustats);
s->ustats.outpkts++;
u64_stats_update_begin(&s->syncp);
s->ustats.outbytes += skb->len;
@@ -183,7 +183,7 @@ ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc)
s = this_cpu_ptr(svc->stats.cpustats);
s->ustats.conns++;
- s = this_cpu_ptr(ipvs->cpustats);
+ s = this_cpu_ptr(ipvs->tot_stats.cpustats);
s->ustats.conns++;
}
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index f0369d6..a2a67ad 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -1481,7 +1481,7 @@ static int ip_vs_zero_all(struct net *net)
}
}
- ip_vs_zero_stats(net_ipvs(net)->tot_stats);
+ ip_vs_zero_stats(&net_ipvs(net)->tot_stats);
return 0;
}
@@ -1963,7 +1963,7 @@ static const struct file_operations ip_vs_info_fops = {
static int ip_vs_stats_show(struct seq_file *seq, void *v)
{
struct net *net = seq_file_single_net(seq);
- struct ip_vs_stats *tot_stats = net_ipvs(net)->tot_stats;
+ struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats;
/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
seq_puts(seq,
@@ -2007,7 +2007,8 @@ static const struct file_operations ip_vs_stats_fops = {
static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
{
struct net *net = seq_file_single_net(seq);
- struct ip_vs_stats *tot_stats = net_ipvs(net)->tot_stats;
+ struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats;
+ struct ip_vs_cpu_stats *cpustats = tot_stats->cpustats;
int i;
/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
@@ -2017,11 +2018,20 @@ static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
"CPU Conns Packets Packets Bytes Bytes\n");
for_each_possible_cpu(i) {
- struct ip_vs_cpu_stats *u = per_cpu_ptr(net->ipvs->cpustats, i);
+ struct ip_vs_cpu_stats *u = per_cpu_ptr(cpustats, i);
+ unsigned int start;
+ __u64 inbytes, outbytes;
+
+ do {
+ start = u64_stats_fetch_begin_bh(&u->syncp);
+ inbytes = u->ustats.inbytes;
+ outbytes = u->ustats.outbytes;
+ } while (u64_stats_fetch_retry_bh(&u->syncp, start));
+
seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n",
- i, u->ustats.conns, u->ustats.inpkts,
- u->ustats.outpkts, (__u64)u->ustats.inbytes,
- (__u64)u->ustats.outbytes);
+ i, u->ustats.conns, u->ustats.inpkts,
+ u->ustats.outpkts, (__u64)inbytes,
+ (__u64)outbytes);
}
spin_lock_bh(&tot_stats->lock);
@@ -3505,17 +3515,12 @@ int __net_init __ip_vs_control_init(struct net *net)
atomic_set(&ipvs->nullsvc_counter, 0);
/* procfs stats */
- ipvs->tot_stats = kzalloc(sizeof(struct ip_vs_stats), GFP_KERNEL);
- if (ipvs->tot_stats == NULL) {
- pr_err("%s(): no memory.\n", __func__);
- return -ENOMEM;
- }
- ipvs->cpustats = alloc_percpu(struct ip_vs_cpu_stats);
- if (!ipvs->cpustats) {
+ ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
+ if (!ipvs->tot_stats.cpustats) {
pr_err("%s() alloc_percpu failed\n", __func__);
goto err_alloc;
}
- spin_lock_init(&ipvs->tot_stats->lock);
+ spin_lock_init(&ipvs->tot_stats.lock);
proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops);
proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops);
@@ -3563,7 +3568,7 @@ int __net_init __ip_vs_control_init(struct net *net)
goto err_dup;
}
#endif
- ip_vs_new_estimator(net, ipvs->tot_stats);
+ ip_vs_new_estimator(net, &ipvs->tot_stats);
ipvs->sysctl_tbl = tbl;
/* Schedule defense work */
INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler);
@@ -3571,9 +3576,8 @@ int __net_init __ip_vs_control_init(struct net *net)
return 0;
err_dup:
- free_percpu(ipvs->cpustats);
+ free_percpu(ipvs->tot_stats.cpustats);
err_alloc:
- kfree(ipvs->tot_stats);
return -ENOMEM;
}
@@ -3582,7 +3586,7 @@ static void __net_exit __ip_vs_control_cleanup(struct net *net)
struct netns_ipvs *ipvs = net_ipvs(net);
ip_vs_trash_cleanup(net);
- ip_vs_kill_estimator(net, ipvs->tot_stats);
+ ip_vs_kill_estimator(net, &ipvs->tot_stats);
cancel_delayed_work_sync(&ipvs->defense_work);
cancel_work_sync(&ipvs->defense_work.work);
#ifdef CONFIG_SYSCTL
@@ -3591,8 +3595,7 @@ static void __net_exit __ip_vs_control_cleanup(struct net *net)
proc_net_remove(net, "ip_vs_stats_percpu");
proc_net_remove(net, "ip_vs_stats");
proc_net_remove(net, "ip_vs");
- free_percpu(ipvs->cpustats);
- kfree(ipvs->tot_stats);
+ free_percpu(ipvs->tot_stats.cpustats);
}
static struct pernet_operations ipvs_control_ops = {
diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c
index 88bd716..b3751cf 100644
--- a/net/netfilter/ipvs/ip_vs_est.c
+++ b/net/netfilter/ipvs/ip_vs_est.c
@@ -101,13 +101,12 @@ static void estimation_timer(unsigned long arg)
struct netns_ipvs *ipvs;
ipvs = net_ipvs(net);
- ip_vs_read_cpu_stats(&ipvs->tot_stats->ustats, ipvs->cpustats);
spin_lock(&ipvs->est_lock);
list_for_each_entry(e, &ipvs->est_list, list) {
s = container_of(e, struct ip_vs_stats, est);
- ip_vs_read_cpu_stats(&s->ustats, s->cpustats);
spin_lock(&s->lock);
+ ip_vs_read_cpu_stats(&s->ustats, s->cpustats);
n_conns = s->ustats.conns;
n_inpkts = s->ustats.inpkts;
n_outpkts = s->ustats.outpkts;
--
1.7.2.3
^ permalink raw reply related [flat|nested] 26+ messages in thread
* [PATCH 03/18] ipvs: zero percpu stats
2011-03-05 23:45 [patch v2 ] IPVS: Conditionally include sysctl code Simon Horman
2011-03-05 23:45 ` [PATCH 01/18] ipvs: move struct netns_ipvs Simon Horman
2011-03-05 23:45 ` [PATCH 02/18] ipvs: reorganize tot_stats Simon Horman
@ 2011-03-05 23:45 ` Simon Horman
2011-03-06 9:06 ` Eric Dumazet
2011-03-05 23:45 ` [PATCH 04/18] ipvs: remove unused seqcount stats Simon Horman
` (14 subsequent siblings)
17 siblings, 1 reply; 26+ messages in thread
From: Simon Horman @ 2011-03-05 23:45 UTC (permalink / raw)
To: netdev, netfilter-devel, netfilter, lvs-devel
Cc: Julian Anastasov, Hans Schillstrom, Simon Horman
From: Julian Anastasov <ja@ssi.bg>
Zero the new percpu stats because we copy from there.
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
net/netfilter/ipvs/ip_vs_ctl.c | 17 +++++++++++++++++
1 files changed, 17 insertions(+), 0 deletions(-)
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index a2a67ad..fd74527 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -715,8 +715,25 @@ static void ip_vs_trash_cleanup(struct net *net)
static void
ip_vs_zero_stats(struct ip_vs_stats *stats)
{
+ struct ip_vs_cpu_stats *cpustats = stats->cpustats;
+ int i;
+
spin_lock_bh(&stats->lock);
+ for_each_possible_cpu(i) {
+ struct ip_vs_cpu_stats *u = per_cpu_ptr(cpustats, i);
+ unsigned int start;
+
+ /* Do not pretend to be writer, it is enough to
+ * sync with writers that modify the u64 counters
+ * because under stats->lock we are the only reader.
+ */
+ do {
+ start = u64_stats_fetch_begin(&u->syncp);
+ memset(&u->ustats, 0, sizeof(u->ustats));
+ } while (u64_stats_fetch_retry(&u->syncp, start));
+ }
+
memset(&stats->ustats, 0, sizeof(stats->ustats));
ip_vs_zero_estimator(stats);
--
1.7.2.3
^ permalink raw reply related [flat|nested] 26+ messages in thread
* [PATCH 04/18] ipvs: remove unused seqcount stats
2011-03-05 23:45 [patch v2 ] IPVS: Conditionally include sysctl code Simon Horman
` (2 preceding siblings ...)
2011-03-05 23:45 ` [PATCH 03/18] ipvs: zero percpu stats Simon Horman
@ 2011-03-05 23:45 ` Simon Horman
2011-03-05 23:45 ` [PATCH 05/18] IPVS: Add ip_vs_route_me_harder() Simon Horman
` (13 subsequent siblings)
17 siblings, 0 replies; 26+ messages in thread
From: Simon Horman @ 2011-03-05 23:45 UTC (permalink / raw)
To: netdev, netfilter-devel, netfilter, lvs-devel
Cc: Julian Anastasov, Hans Schillstrom, Simon Horman
From: Julian Anastasov <ja@ssi.bg>
Remove ustats_seq, IPVS_STAT_INC and IPVS_STAT_ADD
because they are not used. They were replaced with u64_stats.
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
include/net/ip_vs.h | 17 -----------------
1 files changed, 0 insertions(+), 17 deletions(-)
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index f17841d..efd68dc 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -376,22 +376,6 @@ struct ip_vs_stats {
spinlock_t lock; /* spin lock */
};
-/*
- * Helper Macros for per cpu
- * ipvs->tot_stats->ustats.count
- */
-#define IPVS_STAT_INC(ipvs, count) \
- __this_cpu_inc((ipvs)->ustats->count)
-
-#define IPVS_STAT_ADD(ipvs, count, value) \
- do {\
- write_seqcount_begin(per_cpu_ptr((ipvs)->ustats_seq, \
- raw_smp_processor_id())); \
- __this_cpu_add((ipvs)->ustats->count, value); \
- write_seqcount_end(per_cpu_ptr((ipvs)->ustats_seq, \
- raw_smp_processor_id())); \
- } while (0)
-
struct dst_entry;
struct iphdr;
struct ip_vs_conn;
@@ -852,7 +836,6 @@ struct netns_ipvs {
/* ip_vs_ctl */
struct ip_vs_stats tot_stats; /* Statistics & est. */
- seqcount_t *ustats_seq; /* u64 read retry */
int num_services; /* no of virtual services */
/* 1/rate drop and drop-entry variables */
--
1.7.2.3
^ permalink raw reply related [flat|nested] 26+ messages in thread
* [PATCH 05/18] IPVS: Add ip_vs_route_me_harder()
2011-03-05 23:45 [patch v2 ] IPVS: Conditionally include sysctl code Simon Horman
` (3 preceding siblings ...)
2011-03-05 23:45 ` [PATCH 04/18] ipvs: remove unused seqcount stats Simon Horman
@ 2011-03-05 23:45 ` Simon Horman
2011-03-05 23:45 ` [PATCH 06/18] IPVS: Add sysctl_snat_reroute() Simon Horman
` (12 subsequent siblings)
17 siblings, 0 replies; 26+ messages in thread
From: Simon Horman @ 2011-03-05 23:45 UTC (permalink / raw)
To: netdev, netfilter-devel, netfilter, lvs-devel
Cc: Julian Anastasov, Hans Schillstrom, Simon Horman
Add ip_vs_route_me_harder() to avoid repeating the same code twice.
Signed-off-by: Simon Horman <horms@verge.net.au>
---
net/netfilter/ipvs/ip_vs_core.c | 48 +++++++++++++++++---------------------
1 files changed, 22 insertions(+), 26 deletions(-)
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 6f4940e..299c7f3 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -631,6 +631,24 @@ static inline int ip_vs_gather_frags_v6(struct sk_buff *skb, u_int32_t user)
}
#endif
+static int ip_vs_route_me_harder(int af, struct sk_buff *skb)
+{
+ struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
+
+#ifdef CONFIG_IP_VS_IPV6
+ if (af == AF_INET6) {
+ if (ipvs->sysctl_snat_reroute && ip6_route_me_harder(skb) != 0)
+ return 1;
+ } else
+#endif
+ if ((ipvs->sysctl_snat_reroute ||
+ skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&
+ ip_route_me_harder(skb, RTN_LOCAL) != 0)
+ return 1;
+
+ return 0;
+}
+
/*
* Packet has been made sufficiently writable in caller
* - inout: 1=in->out, 0=out->in
@@ -737,7 +755,6 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
struct ip_vs_protocol *pp,
unsigned int offset, unsigned int ihl)
{
- struct netns_ipvs *ipvs;
unsigned int verdict = NF_DROP;
if (IP_VS_FWD_METHOD(cp) != 0) {
@@ -759,8 +776,6 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
if (!skb_make_writable(skb, offset))
goto out;
- ipvs = net_ipvs(skb_net(skb));
-
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6)
ip_vs_nat_icmp_v6(skb, pp, cp, 1);
@@ -768,16 +783,8 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
#endif
ip_vs_nat_icmp(skb, pp, cp, 1);
-#ifdef CONFIG_IP_VS_IPV6
- if (af == AF_INET6) {
- if (ipvs->sysctl_snat_reroute && ip6_route_me_harder(skb) != 0)
- goto out;
- } else
-#endif
- if ((ipvs->sysctl_snat_reroute ||
- skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&
- ip_route_me_harder(skb, RTN_LOCAL) != 0)
- goto out;
+ if (ip_vs_route_me_harder(af, skb))
+ goto out;
/* do the statistics and put it back */
ip_vs_out_stats(cp, skb);
@@ -985,7 +992,6 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
struct ip_vs_conn *cp, int ihl)
{
struct ip_vs_protocol *pp = pd->pp;
- struct netns_ipvs *ipvs;
IP_VS_DBG_PKT(11, af, pp, skb, 0, "Outgoing packet");
@@ -1021,18 +1027,8 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
* if it came from this machine itself. So re-compute
* the routing information.
*/
- ipvs = net_ipvs(skb_net(skb));
^ permalink raw reply related [flat|nested] 26+ messages in thread
* [PATCH 06/18] IPVS: Add sysctl_snat_reroute()
2011-03-05 23:45 [patch v2 ] IPVS: Conditionally include sysctl code Simon Horman
` (4 preceding siblings ...)
2011-03-05 23:45 ` [PATCH 05/18] IPVS: Add ip_vs_route_me_harder() Simon Horman
@ 2011-03-05 23:45 ` Simon Horman
2011-03-05 23:45 ` [PATCH 07/18] IPVS: Add sysctl_nat_icmp_send() Simon Horman
` (11 subsequent siblings)
17 siblings, 0 replies; 26+ messages in thread
From: Simon Horman @ 2011-03-05 23:45 UTC (permalink / raw)
To: netdev, netfilter-devel, netfilter, lvs-devel
Cc: Julian Anastasov, Hans Schillstrom, Simon Horman
In preparation for not including sysctl_snat_reroute in
struct netns_ipvs when CONFIG_SYCTL is not defined.
Signed-off-by: Simon Horman <horms@verge.net.au>
---
net/netfilter/ipvs/ip_vs_core.c | 20 ++++++++++++++++----
1 files changed, 16 insertions(+), 4 deletions(-)
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 299c7f3..1d8a2a2 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -599,6 +599,20 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
return NF_DROP;
}
+#ifdef CONFIG_SYSCTL
+
+static int sysctl_snat_reroute(struct sk_buff *skb)
+{
+ struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
+ return ipvs->sysctl_snat_reroute;
+}
+
+#else
+
+static int sysctl_snat_reroute(struct sk_buff *skb) { return 0; }
+
+#endif
+
__sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset)
{
return csum_fold(skb_checksum(skb, offset, skb->len - offset, 0));
@@ -633,15 +647,13 @@ static inline int ip_vs_gather_frags_v6(struct sk_buff *skb, u_int32_t user)
static int ip_vs_route_me_harder(int af, struct sk_buff *skb)
{
- struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
-
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6) {
- if (ipvs->sysctl_snat_reroute && ip6_route_me_harder(skb) != 0)
+ if (sysctl_snat_reroute(skb) && ip6_route_me_harder(skb) != 0)
return 1;
} else
#endif
- if ((ipvs->sysctl_snat_reroute ||
+ if ((sysctl_snat_reroute(skb) ||
skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&
ip_route_me_harder(skb, RTN_LOCAL) != 0)
return 1;
--
1.7.2.3
^ permalink raw reply related [flat|nested] 26+ messages in thread
* [PATCH 07/18] IPVS: Add sysctl_nat_icmp_send()
2011-03-05 23:45 [patch v2 ] IPVS: Conditionally include sysctl code Simon Horman
` (5 preceding siblings ...)
2011-03-05 23:45 ` [PATCH 06/18] IPVS: Add sysctl_snat_reroute() Simon Horman
@ 2011-03-05 23:45 ` Simon Horman
2011-03-05 23:45 ` [PATCH 08/18] IPVS: Add {sysctl_sync_threshold,period}() Simon Horman
` (10 subsequent siblings)
17 siblings, 0 replies; 26+ messages in thread
From: Simon Horman @ 2011-03-05 23:45 UTC (permalink / raw)
To: netdev, netfilter-devel, netfilter, lvs-devel
Cc: Julian Anastasov, Hans Schillstrom, Simon Horman
In preparation for not including sysctl_nat_icmp_send in
struct netns_ipvs when CONFIG_SYCTL is not defined.
Signed-off-by: Simon Horman <horms@verge.net.au>
---
net/netfilter/ipvs/ip_vs_core.c | 11 ++++++++---
1 files changed, 8 insertions(+), 3 deletions(-)
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 1d8a2a2..c9b8372 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -607,9 +607,16 @@ static int sysctl_snat_reroute(struct sk_buff *skb)
return ipvs->sysctl_snat_reroute;
}
+static int sysctl_nat_icmp_send(struct net *net)
+{
+ struct netns_ipvs *ipvs = net_ipvs(net);
+ return ipvs->sysctl_nat_icmp_send;
+}
+
#else
static int sysctl_snat_reroute(struct sk_buff *skb) { return 0; }
+static int sysctl_nat_icmp_send(struct net *net) { return 0; }
#endif
@@ -1074,7 +1081,6 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
struct ip_vs_protocol *pp;
struct ip_vs_proto_data *pd;
struct ip_vs_conn *cp;
- struct netns_ipvs *ipvs;
EnterFunction(11);
@@ -1149,11 +1155,10 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
* Check if the packet belongs to an existing entry
*/
cp = pp->conn_out_get(af, skb, &iph, iph.len, 0);
- ipvs = net_ipvs(net);
if (likely(cp))
return handle_response(af, skb, pd, cp, iph.len);
- if (ipvs->sysctl_nat_icmp_send &&
+ if (sysctl_nat_icmp_send(net) &&
(pp->protocol == IPPROTO_TCP ||
pp->protocol == IPPROTO_UDP ||
pp->protocol == IPPROTO_SCTP)) {
--
1.7.2.3
^ permalink raw reply related [flat|nested] 26+ messages in thread
* [PATCH 08/18] IPVS: Add {sysctl_sync_threshold,period}()
2011-03-05 23:45 [patch v2 ] IPVS: Conditionally include sysctl code Simon Horman
` (6 preceding siblings ...)
2011-03-05 23:45 ` [PATCH 07/18] IPVS: Add sysctl_nat_icmp_send() Simon Horman
@ 2011-03-05 23:45 ` Simon Horman
2011-03-05 23:45 ` [PATCH 09/18] IPVS: Add sysctl_sync_ver() Simon Horman
` (9 subsequent siblings)
17 siblings, 0 replies; 26+ messages in thread
From: Simon Horman @ 2011-03-05 23:45 UTC (permalink / raw)
To: netdev, netfilter-devel, netfilter, lvs-devel
Cc: Julian Anastasov, Hans Schillstrom, Simon Horman
In preparation for not including sysctl_sync_threshold in
struct netns_ipvs when CONFIG_SYCTL is not defined.
Signed-off-by: Simon Horman <horms@verge.net.au>
---
include/net/ip_vs.h | 29 +++++++++++++++++++++++++++++
net/netfilter/ipvs/ip_vs_core.c | 10 +++++-----
net/netfilter/ipvs/ip_vs_ctl.c | 4 ++--
net/netfilter/ipvs/ip_vs_sync.c | 4 ++--
4 files changed, 38 insertions(+), 9 deletions(-)
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index efd68dc..4975524 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -908,6 +908,35 @@ struct netns_ipvs {
struct net *net; /* Needed by timer routines */
};
+#define DEFAULT_SYNC_THRESHOLD 3
+#define DEFAULT_SYNC_PERIOD 50
+
+#ifdef CONFIG_SYSCTL
+
+static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs)
+{
+ return ipvs->sysctl_sync_threshold[0];
+}
+
+static inline int sysctl_sync_period(struct netns_ipvs *ipvs)
+{
+ return ipvs->sysctl_sync_threshold[1];
+}
+
+#else
+
+static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs)
+{
+ return DEFAULT_SYNC_THRESHOLD;
+}
+
+static inline int sysctl_sync_period(struct netns_ipvs *ipvs)
+{
+ return DEFAULT_SYNC_PERIOD;
+}
+
+#endif
+
/*
* IPVS core functions
* (from ip_vs_core.c)
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index c9b8372..6a0053d 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1613,15 +1613,15 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
*/
if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
- pkts = ipvs->sysctl_sync_threshold[0];
+ pkts = sysctl_sync_threshold(ipvs);
else
pkts = atomic_add_return(1, &cp->in_pkts);
if ((ipvs->sync_state & IP_VS_STATE_MASTER) &&
cp->protocol == IPPROTO_SCTP) {
if ((cp->state == IP_VS_SCTP_S_ESTABLISHED &&
- (pkts % ipvs->sysctl_sync_threshold[1]
- == ipvs->sysctl_sync_threshold[0])) ||
+ (pkts % sysctl_sync_period(ipvs)
+ == sysctl_sync_threshold(ipvs))) ||
(cp->old_state != cp->state &&
((cp->state == IP_VS_SCTP_S_CLOSED) ||
(cp->state == IP_VS_SCTP_S_SHUT_ACK_CLI) ||
@@ -1635,8 +1635,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
else if ((ipvs->sync_state & IP_VS_STATE_MASTER) &&
(((cp->protocol != IPPROTO_TCP ||
cp->state == IP_VS_TCP_S_ESTABLISHED) &&
- (pkts % ipvs->sysctl_sync_threshold[1]
- == ipvs->sysctl_sync_threshold[0])) ||
+ (pkts % sysctl_sync_period(ipvs)
+ == sysctl_sync_threshold(ipvs))) ||
((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) &&
((cp->state == IP_VS_TCP_S_FIN_WAIT) ||
(cp->state == IP_VS_TCP_S_CLOSE) ||
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index fd74527..8cdbf91 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -3569,8 +3569,8 @@ int __net_init __ip_vs_control_init(struct net *net)
tbl[idx++].data = &ipvs->sysctl_cache_bypass;
tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
- ipvs->sysctl_sync_threshold[0] = 3;
- ipvs->sysctl_sync_threshold[1] = 50;
+ ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD;
+ ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD;
tbl[idx].data = &ipvs->sysctl_sync_threshold;
tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index fecf24d..d037763 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -650,7 +650,7 @@ control:
if (cp->flags & IP_VS_CONN_F_TEMPLATE) {
int pkts = atomic_add_return(1, &cp->in_pkts);
- if (pkts % ipvs->sysctl_sync_threshold[1] != 1)
+ if (pkts % sysctl_sync_period(ipvs) != 1)
return;
}
goto sloop;
@@ -795,7 +795,7 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
if (opt)
memcpy(&cp->in_seq, opt, sizeof(*opt));
- atomic_set(&cp->in_pkts, ipvs->sysctl_sync_threshold[0]);
+ atomic_set(&cp->in_pkts, sysctl_sync_threshold(ipvs));
cp->state = state;
cp->old_state = cp->state;
/*
--
1.7.2.3
^ permalink raw reply related [flat|nested] 26+ messages in thread
* [PATCH 09/18] IPVS: Add sysctl_sync_ver()
2011-03-05 23:45 [patch v2 ] IPVS: Conditionally include sysctl code Simon Horman
` (7 preceding siblings ...)
2011-03-05 23:45 ` [PATCH 08/18] IPVS: Add {sysctl_sync_threshold,period}() Simon Horman
@ 2011-03-05 23:45 ` Simon Horman
2011-03-05 23:45 ` [PATCH 10/18] IPVS: Add sysctl_expire_nodest_conn() Simon Horman
` (8 subsequent siblings)
17 siblings, 0 replies; 26+ messages in thread
From: Simon Horman @ 2011-03-05 23:45 UTC (permalink / raw)
To: netdev, netfilter-devel, netfilter, lvs-devel
Cc: Julian Anastasov, Hans Schillstrom, Simon Horman
In preparation for not including sysctl_sync_ver in
struct netns_ipvs when CONFIG_SYCTL is not defined.
Signed-off-by: Simon Horman <horms@verge.net.au>
---
include/net/ip_vs.h | 11 +++++++++++
net/netfilter/ipvs/ip_vs_sync.c | 4 ++--
2 files changed, 13 insertions(+), 2 deletions(-)
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 4975524..29cbe39 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -910,6 +910,7 @@ struct netns_ipvs {
#define DEFAULT_SYNC_THRESHOLD 3
#define DEFAULT_SYNC_PERIOD 50
+#define DEFAULT_SYNC_VER 1
#ifdef CONFIG_SYSCTL
@@ -923,6 +924,11 @@ static inline int sysctl_sync_period(struct netns_ipvs *ipvs)
return ipvs->sysctl_sync_threshold[1];
}
+static inline int sysctl_sync_ver(struct netns_ipvs *ipvs)
+{
+ return ipvs->sysctl_sync_ver;
+}
+
#else
static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs)
@@ -935,6 +941,11 @@ static inline int sysctl_sync_period(struct netns_ipvs *ipvs)
return DEFAULT_SYNC_PERIOD;
}
+static inline int sysctl_sync_ver(struct netns_ipvs *ipvs)
+{
+ return DEFAULT_SYNC_VER;
+}
+
#endif
/*
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index d037763..175d8ee 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -394,7 +394,7 @@ void ip_vs_sync_switch_mode(struct net *net, int mode)
if (!(ipvs->sync_state & IP_VS_STATE_MASTER))
return;
- if (mode == ipvs->sysctl_sync_ver || !ipvs->sync_buff)
+ if (mode == sysctl_sync_ver(ipvs) || !ipvs->sync_buff)
return;
spin_lock_bh(&ipvs->sync_buff_lock);
@@ -521,7 +521,7 @@ void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp)
unsigned int len, pe_name_len, pad;
/* Handle old version of the protocol */
- if (ipvs->sysctl_sync_ver == 0) {
+ if (sysctl_sync_ver(ipvs) == 0) {
ip_vs_sync_conn_v0(net, cp);
return;
}
--
1.7.2.3
^ permalink raw reply related [flat|nested] 26+ messages in thread
* [PATCH 10/18] IPVS: Add sysctl_expire_nodest_conn()
2011-03-05 23:45 [patch v2 ] IPVS: Conditionally include sysctl code Simon Horman
` (8 preceding siblings ...)
2011-03-05 23:45 ` [PATCH 09/18] IPVS: Add sysctl_sync_ver() Simon Horman
@ 2011-03-05 23:45 ` Simon Horman
2011-03-05 23:45 ` [PATCH 11/18] IPVS: Add expire_quiescent_template() Simon Horman
` (7 subsequent siblings)
17 siblings, 0 replies; 26+ messages in thread
From: Simon Horman @ 2011-03-05 23:45 UTC (permalink / raw)
To: netdev, netfilter-devel, netfilter, lvs-devel
Cc: Julian Anastasov, Hans Schillstrom, Simon Horman
In preparation for not including sysctl_expire_nodest_conn in
struct netns_ipvs when CONFIG_SYCTL is not defined.
Signed-off-by: Simon Horman <horms@verge.net.au>
---
net/netfilter/ipvs/ip_vs_core.c | 8 +++++++-
1 files changed, 7 insertions(+), 1 deletions(-)
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 6a0053d..d418bc6 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -613,10 +613,16 @@ static int sysctl_nat_icmp_send(struct net *net)
return ipvs->sysctl_nat_icmp_send;
}
+static int sysctl_expire_nodest_conn(struct netns_ipvs *ipvs)
+{
+ return ipvs->sysctl_expire_nodest_conn;
+}
+
#else
static int sysctl_snat_reroute(struct sk_buff *skb) { return 0; }
static int sysctl_nat_icmp_send(struct net *net) { return 0; }
+static int sysctl_expire_nodest_conn(struct netns_ipvs *ipvs) { return 0; }
#endif
@@ -1583,7 +1589,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) {
/* the destination server is not available */
- if (ipvs->sysctl_expire_nodest_conn) {
+ if (sysctl_expire_nodest_conn(ipvs)) {
/* try to expire the connection immediately */
ip_vs_conn_expire_now(cp);
}
--
1.7.2.3
^ permalink raw reply related [flat|nested] 26+ messages in thread
* [PATCH 11/18] IPVS: Add expire_quiescent_template()
2011-03-05 23:45 [patch v2 ] IPVS: Conditionally include sysctl code Simon Horman
` (9 preceding siblings ...)
2011-03-05 23:45 ` [PATCH 10/18] IPVS: Add sysctl_expire_nodest_conn() Simon Horman
@ 2011-03-05 23:45 ` Simon Horman
2011-03-05 23:45 ` [PATCH 12/18] IPVS: Conditinally use sysctl_lblc{r}_expiration Simon Horman
` (6 subsequent siblings)
17 siblings, 0 replies; 26+ messages in thread
From: Simon Horman @ 2011-03-05 23:45 UTC (permalink / raw)
To: netdev, netfilter-devel, netfilter, lvs-devel
Cc: Julian Anastasov, Hans Schillstrom, Simon Horman
In preparation for not including sysctl_expire_quiescent_template in
struct netns_ipvs when CONFIG_SYCTL is not defined.
Signed-off-by: Simon Horman <horms@verge.net.au>
---
net/netfilter/ipvs/ip_vs_conn.c | 13 +++++++++++--
1 files changed, 11 insertions(+), 2 deletions(-)
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 9c2a517..f289306 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -680,6 +680,16 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp)
atomic_dec(&dest->refcnt);
}
+static int expire_quiescent_template(struct netns_ipvs *ipvs,
+ struct ip_vs_dest *dest)
+{
+#ifdef CONFIG_SYSCTL
+ return ipvs->sysctl_expire_quiescent_template &&
+ (atomic_read(&dest->weight) == 0);
+#else
+ return 0;
+#endif
+}
/*
* Checking if the destination of a connection template is available.
@@ -696,8 +706,7 @@ int ip_vs_check_template(struct ip_vs_conn *ct)
*/
if ((dest == NULL) ||
!(dest->flags & IP_VS_DEST_F_AVAILABLE) ||
- (ipvs->sysctl_expire_quiescent_template &&
- (atomic_read(&dest->weight) == 0))) {
+ expire_quiescent_template(ipvs, dest)) {
IP_VS_DBG_BUF(9, "check_template: dest not available for "
"protocol %s s:%s:%d v:%s:%d "
"-> d:%s:%d\n",
--
1.7.2.3
^ permalink raw reply related [flat|nested] 26+ messages in thread
* [PATCH 12/18] IPVS: Conditinally use sysctl_lblc{r}_expiration
2011-03-05 23:45 [patch v2 ] IPVS: Conditionally include sysctl code Simon Horman
` (10 preceding siblings ...)
2011-03-05 23:45 ` [PATCH 11/18] IPVS: Add expire_quiescent_template() Simon Horman
@ 2011-03-05 23:45 ` Simon Horman
2011-03-05 23:45 ` [PATCH 13/18] IPVS: ip_vs_todrop() becomes a noop when CONFIG_SYSCTL is undefined Simon Horman
` (5 subsequent siblings)
17 siblings, 0 replies; 26+ messages in thread
From: Simon Horman @ 2011-03-05 23:45 UTC (permalink / raw)
To: netdev, netfilter-devel, netfilter, lvs-devel
Cc: Julian Anastasov, Hans Schillstrom, Simon Horman
In preparation for not including sysctl_lblc{r}_expiration in
struct netns_ipvs when CONFIG_SYCTL is not defined.
Signed-off-by: Simon Horman <horms@verge.net.au>
---
net/netfilter/ipvs/ip_vs_lblc.c | 16 +++++++++++++---
net/netfilter/ipvs/ip_vs_lblcr.c | 21 +++++++++++++++------
2 files changed, 28 insertions(+), 9 deletions(-)
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index 6bf7a80..51a27f5 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -63,6 +63,8 @@
#define CHECK_EXPIRE_INTERVAL (60*HZ)
#define ENTRY_TIMEOUT (6*60*HZ)
+#define DEFAULT_EXPIRATION (24*60*60*HZ)
+
/*
* It is for full expiration check.
* When there is no partial expiration check (garbage collection)
@@ -238,6 +240,15 @@ static void ip_vs_lblc_flush(struct ip_vs_lblc_table *tbl)
}
}
+static int sysctl_lblc_expiration(struct ip_vs_service *svc)
+{
+#ifdef CONFIG_SYSCTL
+ struct netns_ipvs *ipvs = net_ipvs(svc->net);
+ return ipvs->sysctl_lblc_expiration;
+#else
+ return DEFAULT_EXPIRATION;
+#endif
+}
static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc)
{
@@ -245,7 +256,6 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc)
struct ip_vs_lblc_entry *en, *nxt;
unsigned long now = jiffies;
int i, j;
- struct netns_ipvs *ipvs = net_ipvs(svc->net);
for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) {
j = (j + 1) & IP_VS_LBLC_TAB_MASK;
@@ -254,7 +264,7 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc)
list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
if (time_before(now,
en->lastuse +
- ipvs->sysctl_lblc_expiration))
+ sysctl_lblc_expiration(svc)))
continue;
ip_vs_lblc_free(en);
@@ -550,7 +560,7 @@ static int __net_init __ip_vs_lblc_init(struct net *net)
return -ENOMEM;
} else
ipvs->lblc_ctl_table = vs_vars_table;
- ipvs->sysctl_lblc_expiration = 24*60*60*HZ;
+ ipvs->sysctl_lblc_expiration = DEFAULT_EXPIRATION;
ipvs->lblc_ctl_table[0].data = &ipvs->sysctl_lblc_expiration;
#ifdef CONFIG_SYSCTL
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index 0063176..7fb9190 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -63,6 +63,8 @@
#define CHECK_EXPIRE_INTERVAL (60*HZ)
#define ENTRY_TIMEOUT (6*60*HZ)
+#define DEFAULT_EXPIRATION (24*60*60*HZ)
+
/*
* It is for full expiration check.
* When there is no partial expiration check (garbage collection)
@@ -410,6 +412,15 @@ static void ip_vs_lblcr_flush(struct ip_vs_lblcr_table *tbl)
}
}
+static int sysctl_lblcr_expiration(struct ip_vs_service *svc)
+{
+#ifdef CONFIG_SYSCTL
+ struct netns_ipvs *ipvs = net_ipvs(svc->net);
+ return ipvs->sysctl_lblcr_expiration;
+#else
+ return DEFAULT_EXPIRATION;
+#endif
+}
static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc)
{
@@ -417,15 +428,14 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc)
unsigned long now = jiffies;
int i, j;
struct ip_vs_lblcr_entry *en, *nxt;
- struct netns_ipvs *ipvs = net_ipvs(svc->net);
for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) {
j = (j + 1) & IP_VS_LBLCR_TAB_MASK;
write_lock(&svc->sched_lock);
list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
- if (time_after(en->lastuse
- + ipvs->sysctl_lblcr_expiration, now))
+ if (time_after(en->lastuse +
+ sysctl_lblcr_expiration(svc), now))
continue;
ip_vs_lblcr_free(en);
@@ -650,7 +660,6 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
read_lock(&svc->sched_lock);
en = ip_vs_lblcr_get(svc->af, tbl, &iph.daddr);
if (en) {
- struct netns_ipvs *ipvs = net_ipvs(svc->net);
/* We only hold a read lock, but this is atomic */
en->lastuse = jiffies;
@@ -662,7 +671,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
/* More than one destination + enough time passed by, cleanup */
if (atomic_read(&en->set.size) > 1 &&
time_after(jiffies, en->set.lastmod +
- ipvs->sysctl_lblcr_expiration)) {
+ sysctl_lblcr_expiration(svc))) {
struct ip_vs_dest *m;
write_lock(&en->set.lock);
@@ -746,7 +755,7 @@ static int __net_init __ip_vs_lblcr_init(struct net *net)
return -ENOMEM;
} else
ipvs->lblcr_ctl_table = vs_vars_table;
- ipvs->sysctl_lblcr_expiration = 24*60*60*HZ;
+ ipvs->sysctl_lblcr_expiration = DEFAULT_EXPIRATION;
ipvs->lblcr_ctl_table[0].data = &ipvs->sysctl_lblcr_expiration;
#ifdef CONFIG_SYSCTL
--
1.7.2.3
^ permalink raw reply related [flat|nested] 26+ messages in thread
* [PATCH 13/18] IPVS: ip_vs_todrop() becomes a noop when CONFIG_SYSCTL is undefined
2011-03-05 23:45 [patch v2 ] IPVS: Conditionally include sysctl code Simon Horman
` (11 preceding siblings ...)
2011-03-05 23:45 ` [PATCH 12/18] IPVS: Conditinally use sysctl_lblc{r}_expiration Simon Horman
@ 2011-03-05 23:45 ` Simon Horman
2011-03-05 23:45 ` [PATCH 14/18] IPVS: Conditional ip_vs_conntrack_enabled() Simon Horman
` (4 subsequent siblings)
17 siblings, 0 replies; 26+ messages in thread
From: Simon Horman @ 2011-03-05 23:45 UTC (permalink / raw)
To: netdev, netfilter-devel, netfilter, lvs-devel
Cc: Julian Anastasov, Hans Schillstrom, Simon Horman
Signed-off-by: Simon Horman <horms@verge.net.au>
---
include/net/ip_vs.h | 4 ++++
1 files changed, 4 insertions(+), 0 deletions(-)
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 29cbe39..18b3842 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -1250,6 +1250,7 @@ extern int ip_vs_icmp_xmit_v6
int offset);
#endif
+#ifdef CONFIG_SYSCTL
/*
* This is a simple mechanism to ignore packets when
* we are loaded. Just set ip_vs_drop_rate to 'n' and
@@ -1265,6 +1266,9 @@ static inline int ip_vs_todrop(struct netns_ipvs *ipvs)
ipvs->drop_counter = ipvs->drop_rate;
return 1;
}
+#else
+static inline int ip_vs_todrop(struct netns_ipvs *ipvs) { return 0; }
+#endif
/*
* ip_vs_fwd_tag returns the forwarding tag of the connection
--
1.7.2.3
^ permalink raw reply related [flat|nested] 26+ messages in thread
* [PATCH 14/18] IPVS: Conditional ip_vs_conntrack_enabled()
2011-03-05 23:45 [patch v2 ] IPVS: Conditionally include sysctl code Simon Horman
` (12 preceding siblings ...)
2011-03-05 23:45 ` [PATCH 13/18] IPVS: ip_vs_todrop() becomes a noop when CONFIG_SYSCTL is undefined Simon Horman
@ 2011-03-05 23:45 ` Simon Horman
2011-03-05 23:45 ` [PATCH 15/18] IPVS: Minimise ip_vs_leave when CONFIG_SYSCTL is undefined Simon Horman
` (3 subsequent siblings)
17 siblings, 0 replies; 26+ messages in thread
From: Simon Horman @ 2011-03-05 23:45 UTC (permalink / raw)
To: netdev, netfilter-devel, netfilter, lvs-devel
Cc: Julian Anastasov, Hans Schillstrom, Simon Horman
ip_vs_conntrack_enabled() becomes a noop when CONFIG_SYSCTL is undefined.
In preparation for not including sysctl_conntrack in
struct netns_ipvs when CONFIG_SYCTL is not defined.
Signed-off-by: Simon Horman <horms@verge.net.au>
---
include/net/ip_vs.h | 4 ++++
1 files changed, 4 insertions(+), 0 deletions(-)
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 18b3842..7ef0813 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -1356,7 +1356,11 @@ static inline void ip_vs_notrack(struct sk_buff *skb)
*/
static inline int ip_vs_conntrack_enabled(struct netns_ipvs *ipvs)
{
+#ifdef CONFIG_SYSCTL
return ipvs->sysctl_conntrack;
+#else
+ return 0;
+#endif
}
extern void ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp,
--
1.7.2.3
^ permalink raw reply related [flat|nested] 26+ messages in thread
* [PATCH 15/18] IPVS: Minimise ip_vs_leave when CONFIG_SYSCTL is undefined
2011-03-05 23:45 [patch v2 ] IPVS: Conditionally include sysctl code Simon Horman
` (13 preceding siblings ...)
2011-03-05 23:45 ` [PATCH 14/18] IPVS: Conditional ip_vs_conntrack_enabled() Simon Horman
@ 2011-03-05 23:45 ` Simon Horman
2011-03-05 23:45 ` [PATCH 16/18] IPVS: Conditionally define and use ip_vs_lblc{r}_table Simon Horman
` (2 subsequent siblings)
17 siblings, 0 replies; 26+ messages in thread
From: Simon Horman @ 2011-03-05 23:45 UTC (permalink / raw)
To: netdev, netfilter-devel, netfilter, lvs-devel
Cc: Julian Anastasov, Hans Schillstrom, Simon Horman
Much of ip_vs_leave() is unnecessary if CONFIG_SYSCTL is undefined.
I tried an approach of breaking the now #ifdef'ed portions out
into a separate function. However this appeared to grow the
compiled code on x86_64 by about 200 bytes in the case where
CONFIG_SYSCTL is defined. So I have gone with the simpler though
less elegant #ifdef'ed solution for now.
Signed-off-by: Simon Horman <horms@verge.net.au>
---
net/netfilter/ipvs/ip_vs_core.c | 9 +++++++--
1 files changed, 7 insertions(+), 2 deletions(-)
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index d418bc6..07accf6 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -499,11 +499,13 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
struct ip_vs_proto_data *pd)
{
- struct net *net;
- struct netns_ipvs *ipvs;
__be16 _ports[2], *pptr;
struct ip_vs_iphdr iph;
+#ifdef CONFIG_SYSCTL
+ struct net *net;
+ struct netns_ipvs *ipvs;
int unicast;
+#endif
ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
@@ -512,6 +514,8 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
ip_vs_service_put(svc);
return NF_DROP;
}
+
+#ifdef CONFIG_SYSCTL
net = skb_net(skb);
#ifdef CONFIG_IP_VS_IPV6
@@ -563,6 +567,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
ip_vs_conn_put(cp);
return ret;
}
+#endif
/*
* When the virtual ftp service is presented, packets destined
--
1.7.2.3
^ permalink raw reply related [flat|nested] 26+ messages in thread
* [PATCH 16/18] IPVS: Conditionally define and use ip_vs_lblc{r}_table
2011-03-05 23:45 [patch v2 ] IPVS: Conditionally include sysctl code Simon Horman
` (14 preceding siblings ...)
2011-03-05 23:45 ` [PATCH 15/18] IPVS: Minimise ip_vs_leave when CONFIG_SYSCTL is undefined Simon Horman
@ 2011-03-05 23:45 ` Simon Horman
2011-03-05 23:45 ` [PATCH 17/18] IPVS: Add __ip_vs_control_{init,cleanup}_sysctl() Simon Horman
2011-03-05 23:46 ` [PATCH 18/18] IPVS: Conditionally include sysctl members of struct netns_ipvs Simon Horman
17 siblings, 0 replies; 26+ messages in thread
From: Simon Horman @ 2011-03-05 23:45 UTC (permalink / raw)
To: netdev, netfilter-devel, netfilter, lvs-devel
Cc: Julian Anastasov, Hans Schillstrom, Simon Horman
ip_vs_lblc_table and ip_vs_lblcr_table, and code that uses them
are unnecessary when CONFIG_SYSCTL is undefined.
Signed-off-by: Simon Horman <horms@verge.net.au>
---
net/netfilter/ipvs/ip_vs_lblc.c | 15 ++++++++++-----
net/netfilter/ipvs/ip_vs_lblcr.c | 14 ++++++++++----
2 files changed, 20 insertions(+), 9 deletions(-)
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index 51a27f5..f276df9 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -114,7 +114,7 @@ struct ip_vs_lblc_table {
/*
* IPVS LBLC sysctl table
*/
-
+#ifdef CONFIG_SYSCTL
static ctl_table vs_vars_table[] = {
{
.procname = "lblc_expiration",
@@ -125,6 +125,7 @@ static ctl_table vs_vars_table[] = {
},
{ }
};
+#endif
static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en)
{
@@ -548,6 +549,7 @@ static struct ip_vs_scheduler ip_vs_lblc_scheduler =
/*
* per netns init.
*/
+#ifdef CONFIG_SYSCTL
static int __net_init __ip_vs_lblc_init(struct net *net)
{
struct netns_ipvs *ipvs = net_ipvs(net);
@@ -563,7 +565,6 @@ static int __net_init __ip_vs_lblc_init(struct net *net)
ipvs->sysctl_lblc_expiration = DEFAULT_EXPIRATION;
ipvs->lblc_ctl_table[0].data = &ipvs->sysctl_lblc_expiration;
-#ifdef CONFIG_SYSCTL
ipvs->lblc_ctl_header =
register_net_sysctl_table(net, net_vs_ctl_path,
ipvs->lblc_ctl_table);
@@ -572,7 +573,6 @@ static int __net_init __ip_vs_lblc_init(struct net *net)
kfree(ipvs->lblc_ctl_table);
return -ENOMEM;
}
-#endif
return 0;
}
@@ -581,14 +581,19 @@ static void __net_exit __ip_vs_lblc_exit(struct net *net)
{
struct netns_ipvs *ipvs = net_ipvs(net);
-#ifdef CONFIG_SYSCTL
unregister_net_sysctl_table(ipvs->lblc_ctl_header);
-#endif
if (!net_eq(net, &init_net))
kfree(ipvs->lblc_ctl_table);
}
+#else
+
+static int __net_init __ip_vs_lblc_init(struct net *net) { return 0; }
+static void __net_exit __ip_vs_lblc_exit(struct net *net) { }
+
+#endif
+
static struct pernet_operations ip_vs_lblc_ops = {
.init = __ip_vs_lblc_init,
.exit = __ip_vs_lblc_exit,
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index 7fb9190..cb1c991 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -285,6 +285,7 @@ struct ip_vs_lblcr_table {
};
+#ifdef CONFIG_SYSCTL
/*
* IPVS LBLCR sysctl table
*/
@@ -299,6 +300,7 @@ static ctl_table vs_vars_table[] = {
},
{ }
};
+#endif
static inline void ip_vs_lblcr_free(struct ip_vs_lblcr_entry *en)
{
@@ -743,6 +745,7 @@ static struct ip_vs_scheduler ip_vs_lblcr_scheduler =
/*
* per netns init.
*/
+#ifdef CONFIG_SYSCTL
static int __net_init __ip_vs_lblcr_init(struct net *net)
{
struct netns_ipvs *ipvs = net_ipvs(net);
@@ -758,7 +761,6 @@ static int __net_init __ip_vs_lblcr_init(struct net *net)
ipvs->sysctl_lblcr_expiration = DEFAULT_EXPIRATION;
ipvs->lblcr_ctl_table[0].data = &ipvs->sysctl_lblcr_expiration;
-#ifdef CONFIG_SYSCTL
ipvs->lblcr_ctl_header =
register_net_sysctl_table(net, net_vs_ctl_path,
ipvs->lblcr_ctl_table);
@@ -767,7 +769,6 @@ static int __net_init __ip_vs_lblcr_init(struct net *net)
kfree(ipvs->lblcr_ctl_table);
return -ENOMEM;
}
-#endif
return 0;
}
@@ -776,14 +777,19 @@ static void __net_exit __ip_vs_lblcr_exit(struct net *net)
{
struct netns_ipvs *ipvs = net_ipvs(net);
-#ifdef CONFIG_SYSCTL
unregister_net_sysctl_table(ipvs->lblcr_ctl_header);
-#endif
if (!net_eq(net, &init_net))
kfree(ipvs->lblcr_ctl_table);
}
+#else
+
+static int __net_init __ip_vs_lblcr_init(struct net *net) { return 0; }
+static void __net_exit __ip_vs_lblcr_exit(struct net *net) { }
+
+#endif
+
static struct pernet_operations ip_vs_lblcr_ops = {
.init = __ip_vs_lblcr_init,
.exit = __ip_vs_lblcr_exit,
--
1.7.2.3
^ permalink raw reply related [flat|nested] 26+ messages in thread
* [PATCH 17/18] IPVS: Add __ip_vs_control_{init,cleanup}_sysctl()
2011-03-05 23:45 [patch v2 ] IPVS: Conditionally include sysctl code Simon Horman
` (15 preceding siblings ...)
2011-03-05 23:45 ` [PATCH 16/18] IPVS: Conditionally define and use ip_vs_lblc{r}_table Simon Horman
@ 2011-03-05 23:45 ` Simon Horman
2011-03-05 23:46 ` [PATCH 18/18] IPVS: Conditionally include sysctl members of struct netns_ipvs Simon Horman
17 siblings, 0 replies; 26+ messages in thread
From: Simon Horman @ 2011-03-05 23:45 UTC (permalink / raw)
To: netdev, netfilter-devel, netfilter, lvs-devel
Cc: Julian Anastasov, Hans Schillstrom, Simon Horman
Break out the portions of __ip_vs_control_init() and
__ip_vs_control_cleanup() where aren't necessary when
CONFIG_SYSCTL is undefined.
Signed-off-by: Simon Horman <horms@verge.net.au>
---
net/netfilter/ipvs/ip_vs_ctl.c | 98 +++++++++++++++++++++++++---------------
1 files changed, 62 insertions(+), 36 deletions(-)
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 8cdbf91..4ddae3d 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -88,6 +88,8 @@ static int __ip_vs_addr_is_local_v6(struct net *net,
return 0;
}
#endif
+
+#ifdef CONFIG_SYSCTL
/*
* update_defense_level is called from keventd and from sysctl,
* so it needs to protect itself from softirqs
@@ -229,6 +231,7 @@ static void defense_work_handler(struct work_struct *work)
ip_vs_random_dropentry(ipvs->net);
schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
}
+#endif
int
ip_vs_use_count_inc(void)
@@ -1502,7 +1505,7 @@ static int ip_vs_zero_all(struct net *net)
return 0;
}
-
+#ifdef CONFIG_SYSCTL
static int
proc_do_defense_mode(ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
@@ -1524,7 +1527,6 @@ proc_do_defense_mode(ctl_table *table, int write,
return rc;
}
-
static int
proc_do_sync_threshold(ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
@@ -1758,6 +1760,7 @@ const struct ctl_path net_vs_ctl_path[] = {
{ }
};
EXPORT_SYMBOL_GPL(net_vs_ctl_path);
+#endif
#ifdef CONFIG_PROC_FS
@@ -3511,7 +3514,8 @@ static void ip_vs_genl_unregister(void)
/*
* per netns intit/exit func.
*/
-int __net_init __ip_vs_control_init(struct net *net)
+#ifdef CONFIG_SYSCTL
+int __net_init __ip_vs_control_init_sysctl(struct net *net)
{
int idx;
struct netns_ipvs *ipvs = net_ipvs(net);
@@ -3521,33 +3525,11 @@ int __net_init __ip_vs_control_init(struct net *net)
spin_lock_init(&ipvs->dropentry_lock);
spin_lock_init(&ipvs->droppacket_lock);
spin_lock_init(&ipvs->securetcp_lock);
- ipvs->rs_lock = __RW_LOCK_UNLOCKED(ipvs->rs_lock);
-
- /* Initialize rs_table */
- for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
- INIT_LIST_HEAD(&ipvs->rs_table[idx]);
-
- INIT_LIST_HEAD(&ipvs->dest_trash);
- atomic_set(&ipvs->ftpsvc_counter, 0);
- atomic_set(&ipvs->nullsvc_counter, 0);
-
- /* procfs stats */
- ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
- if (!ipvs->tot_stats.cpustats) {
- pr_err("%s() alloc_percpu failed\n", __func__);
- goto err_alloc;
- }
- spin_lock_init(&ipvs->tot_stats.lock);
-
- proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops);
- proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops);
- proc_net_fops_create(net, "ip_vs_stats_percpu", 0,
- &ip_vs_stats_percpu_fops);
if (!net_eq(net, &init_net)) {
tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL);
if (tbl == NULL)
- goto err_dup;
+ return -ENOMEM;
} else
tbl = vs_vars;
/* Initialize sysctl defaults */
@@ -3576,25 +3558,73 @@ int __net_init __ip_vs_control_init(struct net *net)
tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
-#ifdef CONFIG_SYSCTL
ipvs->sysctl_hdr = register_net_sysctl_table(net, net_vs_ctl_path,
tbl);
if (ipvs->sysctl_hdr == NULL) {
if (!net_eq(net, &init_net))
kfree(tbl);
- goto err_dup;
+ return -ENOMEM;
}
-#endif
ip_vs_new_estimator(net, &ipvs->tot_stats);
ipvs->sysctl_tbl = tbl;
/* Schedule defense work */
INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler);
schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
+
return 0;
+}
+
+void __net_init __ip_vs_control_cleanup_sysctl(struct net *net)
+{
+ struct netns_ipvs *ipvs = net_ipvs(net);
+
+ cancel_delayed_work_sync(&ipvs->defense_work);
+ cancel_work_sync(&ipvs->defense_work.work);
+ unregister_net_sysctl_table(ipvs->sysctl_hdr);
+}
-err_dup:
+#else
+
+int __net_init __ip_vs_control_init_sysctl(struct net *net) { return 0; }
+void __net_init __ip_vs_control_cleanup_sysctl(struct net *net) { }
+
+#endif
+
+int __net_init __ip_vs_control_init(struct net *net)
+{
+ int idx;
+ struct netns_ipvs *ipvs = net_ipvs(net);
+
+ ipvs->rs_lock = __RW_LOCK_UNLOCKED(ipvs->rs_lock);
+
+ /* Initialize rs_table */
+ for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
+ INIT_LIST_HEAD(&ipvs->rs_table[idx]);
+
+ INIT_LIST_HEAD(&ipvs->dest_trash);
+ atomic_set(&ipvs->ftpsvc_counter, 0);
+ atomic_set(&ipvs->nullsvc_counter, 0);
+
+ /* procfs stats */
+ ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
+ if (ipvs->tot_stats.cpustats) {
+ pr_err("%s(): alloc_percpu.\n", __func__);
+ return -ENOMEM;
+ }
+ spin_lock_init(&ipvs->tot_stats.lock);
+
+ proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops);
+ proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops);
+ proc_net_fops_create(net, "ip_vs_stats_percpu", 0,
+ &ip_vs_stats_percpu_fops);
+
+ if (__ip_vs_control_init_sysctl(net))
+ goto err;
+
+ return 0;
+
+err:
free_percpu(ipvs->tot_stats.cpustats);
-err_alloc:
return -ENOMEM;
}
@@ -3604,11 +3634,7 @@ static void __net_exit __ip_vs_control_cleanup(struct net *net)
ip_vs_trash_cleanup(net);
ip_vs_kill_estimator(net, &ipvs->tot_stats);
- cancel_delayed_work_sync(&ipvs->defense_work);
- cancel_work_sync(&ipvs->defense_work.work);
-#ifdef CONFIG_SYSCTL
- unregister_net_sysctl_table(ipvs->sysctl_hdr);
-#endif
+ __ip_vs_control_cleanup_sysctl(net);
proc_net_remove(net, "ip_vs_stats_percpu");
proc_net_remove(net, "ip_vs_stats");
proc_net_remove(net, "ip_vs");
--
1.7.2.3
^ permalink raw reply related [flat|nested] 26+ messages in thread
* [PATCH 18/18] IPVS: Conditionally include sysctl members of struct netns_ipvs
2011-03-05 23:45 [patch v2 ] IPVS: Conditionally include sysctl code Simon Horman
` (16 preceding siblings ...)
2011-03-05 23:45 ` [PATCH 17/18] IPVS: Add __ip_vs_control_{init,cleanup}_sysctl() Simon Horman
@ 2011-03-05 23:46 ` Simon Horman
17 siblings, 0 replies; 26+ messages in thread
From: Simon Horman @ 2011-03-05 23:46 UTC (permalink / raw)
To: netdev, netfilter-devel, netfilter, lvs-devel
Cc: Julian Anastasov, Hans Schillstrom, Simon Horman
There is now no need to include sysctl members of struct netns_ipvs
unless CONFIG_SYSCTL is defined.
Signed-off-by: Simon Horman <horms@verge.net.au>
---
include/net/ip_vs.h | 21 +++++++++++++--------
1 files changed, 13 insertions(+), 8 deletions(-)
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 7ef0813..9b92190 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -838,6 +838,17 @@ struct netns_ipvs {
struct ip_vs_stats tot_stats; /* Statistics & est. */
int num_services; /* no of virtual services */
+
+ rwlock_t rs_lock; /* real services table */
+ /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
+ struct lock_class_key ctl_key; /* ctl_mutex debuging */
+ /* Trash for destinations */
+ struct list_head dest_trash;
+ /* Service counters */
+ atomic_t ftpsvc_counter;
+ atomic_t nullsvc_counter;
+
+#ifdef CONFIG_SYSCTL
/* 1/rate drop and drop-entry variables */
struct delayed_work defense_work; /* Work handler */
int drop_rate;
@@ -847,18 +858,12 @@ struct netns_ipvs {
spinlock_t dropentry_lock; /* drop entry handling */
spinlock_t droppacket_lock; /* drop packet handling */
spinlock_t securetcp_lock; /* state and timeout tables */
- rwlock_t rs_lock; /* real services table */
- /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
- struct lock_class_key ctl_key; /* ctl_mutex debuging */
- /* Trash for destinations */
- struct list_head dest_trash;
- /* Service counters */
- atomic_t ftpsvc_counter;
- atomic_t nullsvc_counter;
/* sys-ctl struct */
struct ctl_table_header *sysctl_hdr;
struct ctl_table *sysctl_tbl;
+#endif
+
/* sysctl variables */
int sysctl_amemthresh;
int sysctl_am_droprate;
--
1.7.2.3
^ permalink raw reply related [flat|nested] 26+ messages in thread
* Re: [PATCH 03/18] ipvs: zero percpu stats
2011-03-05 23:45 ` [PATCH 03/18] ipvs: zero percpu stats Simon Horman
@ 2011-03-06 9:06 ` Eric Dumazet
2011-03-06 12:18 ` Julian Anastasov
0 siblings, 1 reply; 26+ messages in thread
From: Eric Dumazet @ 2011-03-06 9:06 UTC (permalink / raw)
To: Simon Horman
Cc: netdev, netfilter-devel, netfilter, lvs-devel, Julian Anastasov,
Hans Schillstrom
Le dimanche 06 mars 2011 à 08:45 +0900, Simon Horman a écrit :
> From: Julian Anastasov <ja@ssi.bg>
>
> Zero the new percpu stats because we copy from there.
>
> Signed-off-by: Julian Anastasov <ja@ssi.bg>
> Signed-off-by: Simon Horman <horms@verge.net.au>
> ---
> net/netfilter/ipvs/ip_vs_ctl.c | 17 +++++++++++++++++
> 1 files changed, 17 insertions(+), 0 deletions(-)
>
> diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
> index a2a67ad..fd74527 100644
> --- a/net/netfilter/ipvs/ip_vs_ctl.c
> +++ b/net/netfilter/ipvs/ip_vs_ctl.c
> @@ -715,8 +715,25 @@ static void ip_vs_trash_cleanup(struct net *net)
> static void
> ip_vs_zero_stats(struct ip_vs_stats *stats)
> {
> + struct ip_vs_cpu_stats *cpustats = stats->cpustats;
> + int i;
> +
> spin_lock_bh(&stats->lock);
>
> + for_each_possible_cpu(i) {
> + struct ip_vs_cpu_stats *u = per_cpu_ptr(cpustats, i);
> + unsigned int start;
> +
> + /* Do not pretend to be writer, it is enough to
> + * sync with writers that modify the u64 counters
> + * because under stats->lock we are the only reader.
> + */
> + do {
> + start = u64_stats_fetch_begin(&u->syncp);
> + memset(&u->ustats, 0, sizeof(u->ustats));
> + } while (u64_stats_fetch_retry(&u->syncp, start));
Sorry this makes no sense to me.
This code _is_ a writer, and hardly a hot path.
Why try to pretend its a reader and confuse people ?
Either :
- Another writer can modify the counters in same time, and we must
synchronize with them (we are a writer after all)
- Another reader can read the counters in same time, and we must let
them catch we mihjt have cleared half of their values.
- No reader or writer can access data, no synch is needed, a pure
memset() is OK.
> + }
> +
> memset(&stats->ustats, 0, sizeof(stats->ustats));
> ip_vs_zero_estimator(stats);
>
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [PATCH 03/18] ipvs: zero percpu stats
2011-03-06 9:06 ` Eric Dumazet
@ 2011-03-06 12:18 ` Julian Anastasov
2011-03-10 1:34 ` Simon Horman
2011-03-13 10:57 ` Eric Dumazet
0 siblings, 2 replies; 26+ messages in thread
From: Julian Anastasov @ 2011-03-06 12:18 UTC (permalink / raw)
To: Eric Dumazet
Cc: Simon Horman, netdev, netfilter-devel, netfilter, lvs-devel,
Hans Schillstrom
Hello,
On Sun, 6 Mar 2011, Eric Dumazet wrote:
>> Zero the new percpu stats because we copy from there.
>>
>> Signed-off-by: Julian Anastasov <ja@ssi.bg>
>> Signed-off-by: Simon Horman <horms@verge.net.au>
>> ---
>> net/netfilter/ipvs/ip_vs_ctl.c | 17 +++++++++++++++++
>> 1 files changed, 17 insertions(+), 0 deletions(-)
>>
>> diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
>> index a2a67ad..fd74527 100644
>> --- a/net/netfilter/ipvs/ip_vs_ctl.c
>> +++ b/net/netfilter/ipvs/ip_vs_ctl.c
>> @@ -715,8 +715,25 @@ static void ip_vs_trash_cleanup(struct net *net)
>> static void
>> ip_vs_zero_stats(struct ip_vs_stats *stats)
>> {
>> + struct ip_vs_cpu_stats *cpustats = stats->cpustats;
>> + int i;
>> +
>> spin_lock_bh(&stats->lock);
>>
>> + for_each_possible_cpu(i) {
>> + struct ip_vs_cpu_stats *u = per_cpu_ptr(cpustats, i);
>> + unsigned int start;
>> +
>> + /* Do not pretend to be writer, it is enough to
>> + * sync with writers that modify the u64 counters
>> + * because under stats->lock we are the only reader.
>> + */
>> + do {
>> + start = u64_stats_fetch_begin(&u->syncp);
>> + memset(&u->ustats, 0, sizeof(u->ustats));
>> + } while (u64_stats_fetch_retry(&u->syncp, start));
>
>
> Sorry this makes no sense to me.
Hm, yes, the comment is a little bit misleading.
I fixed it below...
> This code _is_ a writer, and hardly a hot path.
Yes, the picture is as follows:
- in 2.6.38-rc we remove the global spin lock (stats->lock)
from packet processing which is a hot path, adding percpu
counters instead
- we need protection for percpu counters and for the sum
- the chain is: interrupts increment percpu counters, the
estimation timer reads them and creates sum every 2 seconds,
then user context can read the sum or even to show the percpu
counters, not to forget the zeroing of sum and counters
The players in detail:
- packet processing:
- softirq context, hot path
- increments counters by using u64_stats_update_begin and
u64_stats_update_end, does not wait readers or zeroing
- sum not touched, stats->lock usage removed in 2.6.38-rc
- 2-second estimation timer:
- funcs: estimation_timer()
- timer context, softirq
- reads percpu counters with u64_stats_fetch_begin and
u64_stats_fetch_retry to sync with counter incrementing
- uses spin_lock (stats->lock) to protect the written sum
which is later read by user context: provides
at least u64 atomicity but additionally the relation
between packets and bytes
- sum readers:
- funcs: ip_vs_stats_show(), ip_vs_stats_percpu_show(),
ip_vs_copy_stats(), ip_vs_genl_fill_stats()
- user context, not a hot path
- uses spin_lock_bh (stats->lock) for atomic reading of
the sum created by estimation_timer()
- show percpu counters:
- funcs: ip_vs_stats_percpu_show()
- user context, not a hot path
- uses u64_stats_fetch_begin_bh and u64_stats_fetch_retry_bh
to synchronize with counter incrementing
- still missing: should use spin_lock_bh (stats->lock)
to synchronize with ip_vs_zero_stats() that modifies
percpu counters.
- zero stats and percpu counters
- funcs: ip_vs_zero_stats()
- user context, not a hot path
- uses spin_lock_bh (stats->lock) while modifying
sum but also while zeroing percpu counters because
we are a hidden writer which does not allow other
percpu counter readers at the same time but we are
still synchronized with percpu counter incrementing
without delaying it
To summarize, I see 2 solutions, in order of preference:
1. all players except packet processing should use stats->lock
when reading/writing sum or when reading/zeroing percpu
counters. Use u64_stats to avoid delays in incrementing.
2. Use seqlock instead of u64_stats if we want to treat the
percpu counters zeroing as writer. This returns us before
2.6.38-rc where we used global stats->lock even for counter
incrementing. Except that now we can use percpu seqlock
just to register the zeroing as writer.
> Why try to pretend its a reader and confuse people ?
>
> Either :
>
> - Another writer can modify the counters in same time, and we must
> synchronize with them (we are a writer after all)
Global mutex allows only one zeroing at a time.
But zeroing runs in parallel with incrementing, so we
have 2 writers for a per-CPU state. This sounds like
above solution 2 with percpu seqlock? But it adds extra
spin_lock in hot path, even if it is percpu. It only
saves the spin_lock_bh while reading percpu counters in
ip_vs_stats_percpu_show(). That is why a prefer solution 1.
> - Another reader can read the counters in same time, and we must let
> them catch we mihjt have cleared half of their values.
Yes, zeroing can run in parallel with /proc reading,
that is why I now try to serialize all readers with the
stats spin lock to guarantee u64 atomicity.
> - No reader or writer can access data, no synch is needed, a pure
> memset() is OK.
Packet processing can damage the counters while we
do memset, so we need at least u64_stats_fetch_* to sync
with incrementing.
>> + }
>> +
>> memset(&stats->ustats, 0, sizeof(stats->ustats));
>> ip_vs_zero_estimator(stats);
So, here is solution 1 fully implemented:
==============
Zero the new percpu stats because we copy from there.
Use the stats spin lock to synchronize the percpu zeroing with
the percpu reading, both in user context and not in a hot path.
Signed-off-by: Julian Anastasov <ja@ssi.bg>
---
diff -urp lvs-test-2.6-8a80c79/linux/net/netfilter/ipvs/ip_vs_ctl.c linux/net/netfilter/ipvs/ip_vs_ctl.c
--- lvs-test-2.6-8a80c79/linux/net/netfilter/ipvs/ip_vs_ctl.c 2011-03-06 13:39:59.000000000 +0200
+++ linux/net/netfilter/ipvs/ip_vs_ctl.c 2011-03-06 13:44:56.108275455 +0200
@@ -713,8 +713,25 @@ static void ip_vs_trash_cleanup(struct n
static void
ip_vs_zero_stats(struct ip_vs_stats *stats)
{
+ struct ip_vs_cpu_stats *cpustats = stats->cpustats;
+ int i;
+
spin_lock_bh(&stats->lock);
+ for_each_possible_cpu(i) {
+ struct ip_vs_cpu_stats *u = per_cpu_ptr(cpustats, i);
+ unsigned int start;
+
+ /* Do not pretend to be writer, it is enough to
+ * sync with writers that modify the u64 counters
+ * because under stats->lock there are no other readers
+ */
+ do {
+ start = u64_stats_fetch_begin(&u->syncp);
+ memset(&u->ustats, 0, sizeof(u->ustats));
+ } while (u64_stats_fetch_retry(&u->syncp, start));
+ }
+
memset(&stats->ustats, 0, sizeof(stats->ustats));
ip_vs_zero_estimator(stats);
@@ -2015,16 +2032,19 @@ static int ip_vs_stats_percpu_show(struc
seq_printf(seq,
"CPU Conns Packets Packets Bytes Bytes\n");
+ /* Use spin lock early to synchronize with percpu zeroing */
+ spin_lock_bh(&tot_stats->lock);
+
for_each_possible_cpu(i) {
struct ip_vs_cpu_stats *u = per_cpu_ptr(cpustats, i);
unsigned int start;
__u64 inbytes, outbytes;
do {
- start = u64_stats_fetch_begin_bh(&u->syncp);
+ start = u64_stats_fetch_begin(&u->syncp);
inbytes = u->ustats.inbytes;
outbytes = u->ustats.outbytes;
- } while (u64_stats_fetch_retry_bh(&u->syncp, start));
+ } while (u64_stats_fetch_retry(&u->syncp, start));
seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n",
i, u->ustats.conns, u->ustats.inpkts,
@@ -2032,7 +2052,6 @@ static int ip_vs_stats_percpu_show(struc
(__u64)outbytes);
}
- spin_lock_bh(&tot_stats->lock);
seq_printf(seq, " ~ %8X %8X %8X %16LX %16LX\n\n",
tot_stats->ustats.conns, tot_stats->ustats.inpkts,
tot_stats->ustats.outpkts,
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [PATCH 03/18] ipvs: zero percpu stats
2011-03-06 12:18 ` Julian Anastasov
@ 2011-03-10 1:34 ` Simon Horman
2011-03-10 2:53 ` David Miller
2011-03-13 10:57 ` Eric Dumazet
1 sibling, 1 reply; 26+ messages in thread
From: Simon Horman @ 2011-03-10 1:34 UTC (permalink / raw)
To: Julian Anastasov
Cc: Eric Dumazet, netdev, netfilter-devel, netfilter, lvs-devel,
Hans Schillstrom
On Sun, Mar 06, 2011 at 02:18:35PM +0200, Julian Anastasov wrote:
[ snip ]
> Zero the new percpu stats because we copy from there.
> Use the stats spin lock to synchronize the percpu zeroing with
> the percpu reading, both in user context and not in a hot path.
>
> Signed-off-by: Julian Anastasov <ja@ssi.bg>
Eric, do you have any thoughts on this?
It seems clean to me.
> ---
>
> diff -urp lvs-test-2.6-8a80c79/linux/net/netfilter/ipvs/ip_vs_ctl.c linux/net/netfilter/ipvs/ip_vs_ctl.c
> --- lvs-test-2.6-8a80c79/linux/net/netfilter/ipvs/ip_vs_ctl.c 2011-03-06 13:39:59.000000000 +0200
> +++ linux/net/netfilter/ipvs/ip_vs_ctl.c 2011-03-06 13:44:56.108275455 +0200
> @@ -713,8 +713,25 @@ static void ip_vs_trash_cleanup(struct n
> static void
> ip_vs_zero_stats(struct ip_vs_stats *stats)
> {
> + struct ip_vs_cpu_stats *cpustats = stats->cpustats;
> + int i;
> +
> spin_lock_bh(&stats->lock);
>
> + for_each_possible_cpu(i) {
> + struct ip_vs_cpu_stats *u = per_cpu_ptr(cpustats, i);
> + unsigned int start;
> +
> + /* Do not pretend to be writer, it is enough to
> + * sync with writers that modify the u64 counters
> + * because under stats->lock there are no other readers
> + */
> + do {
> + start = u64_stats_fetch_begin(&u->syncp);
> + memset(&u->ustats, 0, sizeof(u->ustats));
> + } while (u64_stats_fetch_retry(&u->syncp, start));
> + }
> +
> memset(&stats->ustats, 0, sizeof(stats->ustats));
> ip_vs_zero_estimator(stats);
>
> @@ -2015,16 +2032,19 @@ static int ip_vs_stats_percpu_show(struc
> seq_printf(seq,
> "CPU Conns Packets Packets Bytes Bytes\n");
>
> + /* Use spin lock early to synchronize with percpu zeroing */
> + spin_lock_bh(&tot_stats->lock);
> +
> for_each_possible_cpu(i) {
> struct ip_vs_cpu_stats *u = per_cpu_ptr(cpustats, i);
> unsigned int start;
> __u64 inbytes, outbytes;
>
> do {
> - start = u64_stats_fetch_begin_bh(&u->syncp);
> + start = u64_stats_fetch_begin(&u->syncp);
> inbytes = u->ustats.inbytes;
> outbytes = u->ustats.outbytes;
> - } while (u64_stats_fetch_retry_bh(&u->syncp, start));
> + } while (u64_stats_fetch_retry(&u->syncp, start));
>
> seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n",
> i, u->ustats.conns, u->ustats.inpkts,
> @@ -2032,7 +2052,6 @@ static int ip_vs_stats_percpu_show(struc
> (__u64)outbytes);
> }
>
> - spin_lock_bh(&tot_stats->lock);
> seq_printf(seq, " ~ %8X %8X %8X %16LX %16LX\n\n",
> tot_stats->ustats.conns, tot_stats->ustats.inpkts,
> tot_stats->ustats.outpkts,
>
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [PATCH 03/18] ipvs: zero percpu stats
2011-03-10 1:34 ` Simon Horman
@ 2011-03-10 2:53 ` David Miller
2011-03-10 4:27 ` Simon Horman
0 siblings, 1 reply; 26+ messages in thread
From: David Miller @ 2011-03-10 2:53 UTC (permalink / raw)
To: horms; +Cc: ja, eric.dumazet, netdev, netfilter-devel, netfilter, lvs-devel,
hans
From: Simon Horman <horms@verge.net.au>
Date: Thu, 10 Mar 2011 10:34:42 +0900
> On Sun, Mar 06, 2011 at 02:18:35PM +0200, Julian Anastasov wrote:
>
> [ snip ]
>
>> Zero the new percpu stats because we copy from there.
>> Use the stats spin lock to synchronize the percpu zeroing with
>> the percpu reading, both in user context and not in a hot path.
>>
>> Signed-off-by: Julian Anastasov <ja@ssi.bg>
>
> Eric, do you have any thoughts on this?
> It seems clean to me.
Eric is away until this weekend, so don't be alarmed by a
late response :-)
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [PATCH 03/18] ipvs: zero percpu stats
2011-03-10 2:53 ` David Miller
@ 2011-03-10 4:27 ` Simon Horman
0 siblings, 0 replies; 26+ messages in thread
From: Simon Horman @ 2011-03-10 4:27 UTC (permalink / raw)
To: David Miller
Cc: ja, eric.dumazet, netdev, netfilter-devel, netfilter, lvs-devel,
hans
On Wed, Mar 09, 2011 at 06:53:44PM -0800, David Miller wrote:
> From: Simon Horman <horms@verge.net.au>
> Date: Thu, 10 Mar 2011 10:34:42 +0900
>
> > On Sun, Mar 06, 2011 at 02:18:35PM +0200, Julian Anastasov wrote:
> >
> > [ snip ]
> >
> >> Zero the new percpu stats because we copy from there.
> >> Use the stats spin lock to synchronize the percpu zeroing with
> >> the percpu reading, both in user context and not in a hot path.
> >>
> >> Signed-off-by: Julian Anastasov <ja@ssi.bg>
> >
> > Eric, do you have any thoughts on this?
> > It seems clean to me.
>
> Eric is away until this weekend, so don't be alarmed by a
> late response :-)
Thanks, I'll wait longer :-)
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [PATCH 03/18] ipvs: zero percpu stats
2011-03-06 12:18 ` Julian Anastasov
2011-03-10 1:34 ` Simon Horman
@ 2011-03-13 10:57 ` Eric Dumazet
2011-03-13 23:29 ` Julian Anastasov
1 sibling, 1 reply; 26+ messages in thread
From: Eric Dumazet @ 2011-03-13 10:57 UTC (permalink / raw)
To: Julian Anastasov
Cc: Simon Horman, netdev, netfilter-devel, netfilter, lvs-devel,
Hans Schillstrom
Le dimanche 06 mars 2011 à 14:18 +0200, Julian Anastasov a écrit :
> Hello,
>
> On Sun, 6 Mar 2011, Eric Dumazet wrote:
>
> >> Zero the new percpu stats because we copy from there.
> >>
> >> Signed-off-by: Julian Anastasov <ja@ssi.bg>
> >> Signed-off-by: Simon Horman <horms@verge.net.au>
> >> ---
> >> net/netfilter/ipvs/ip_vs_ctl.c | 17 +++++++++++++++++
> >> 1 files changed, 17 insertions(+), 0 deletions(-)
> >>
> >> diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
> >> index a2a67ad..fd74527 100644
> >> --- a/net/netfilter/ipvs/ip_vs_ctl.c
> >> +++ b/net/netfilter/ipvs/ip_vs_ctl.c
> >> @@ -715,8 +715,25 @@ static void ip_vs_trash_cleanup(struct net *net)
> >> static void
> >> ip_vs_zero_stats(struct ip_vs_stats *stats)
> >> {
> >> + struct ip_vs_cpu_stats *cpustats = stats->cpustats;
> >> + int i;
> >> +
> >> spin_lock_bh(&stats->lock);
> >>
> >> + for_each_possible_cpu(i) {
> >> + struct ip_vs_cpu_stats *u = per_cpu_ptr(cpustats, i);
> >> + unsigned int start;
> >> +
> >> + /* Do not pretend to be writer, it is enough to
> >> + * sync with writers that modify the u64 counters
> >> + * because under stats->lock we are the only reader.
> >> + */
> >> + do {
> >> + start = u64_stats_fetch_begin(&u->syncp);
> >> + memset(&u->ustats, 0, sizeof(u->ustats));
> >> + } while (u64_stats_fetch_retry(&u->syncp, start));
> >
> >
> > Sorry this makes no sense to me.
>
> Hm, yes, the comment is a little bit misleading.
> I fixed it below...
>
> > This code _is_ a writer, and hardly a hot path.
>
> Yes, the picture is as follows:
>
> - in 2.6.38-rc we remove the global spin lock (stats->lock)
> from packet processing which is a hot path, adding percpu
> counters instead
>
> - we need protection for percpu counters and for the sum
>
> - the chain is: interrupts increment percpu counters, the
> estimation timer reads them and creates sum every 2 seconds,
> then user context can read the sum or even to show the percpu
> counters, not to forget the zeroing of sum and counters
>
> The players in detail:
>
> - packet processing:
> - softirq context, hot path
> - increments counters by using u64_stats_update_begin and
> u64_stats_update_end, does not wait readers or zeroing
> - sum not touched, stats->lock usage removed in 2.6.38-rc
>
> - 2-second estimation timer:
> - funcs: estimation_timer()
> - timer context, softirq
> - reads percpu counters with u64_stats_fetch_begin and
> u64_stats_fetch_retry to sync with counter incrementing
> - uses spin_lock (stats->lock) to protect the written sum
> which is later read by user context: provides
> at least u64 atomicity but additionally the relation
> between packets and bytes
>
> - sum readers:
> - funcs: ip_vs_stats_show(), ip_vs_stats_percpu_show(),
> ip_vs_copy_stats(), ip_vs_genl_fill_stats()
> - user context, not a hot path
> - uses spin_lock_bh (stats->lock) for atomic reading of
> the sum created by estimation_timer()
>
> - show percpu counters:
> - funcs: ip_vs_stats_percpu_show()
> - user context, not a hot path
> - uses u64_stats_fetch_begin_bh and u64_stats_fetch_retry_bh
> to synchronize with counter incrementing
> - still missing: should use spin_lock_bh (stats->lock)
> to synchronize with ip_vs_zero_stats() that modifies
> percpu counters.
>
> - zero stats and percpu counters
> - funcs: ip_vs_zero_stats()
> - user context, not a hot path
> - uses spin_lock_bh (stats->lock) while modifying
> sum but also while zeroing percpu counters because
> we are a hidden writer which does not allow other
> percpu counter readers at the same time but we are
> still synchronized with percpu counter incrementing
> without delaying it
>
> To summarize, I see 2 solutions, in order of preference:
>
> 1. all players except packet processing should use stats->lock
> when reading/writing sum or when reading/zeroing percpu
> counters. Use u64_stats to avoid delays in incrementing.
>
> 2. Use seqlock instead of u64_stats if we want to treat the
> percpu counters zeroing as writer. This returns us before
> 2.6.38-rc where we used global stats->lock even for counter
> incrementing. Except that now we can use percpu seqlock
> just to register the zeroing as writer.
>
> > Why try to pretend its a reader and confuse people ?
> >
> > Either :
> >
> > - Another writer can modify the counters in same time, and we must
> > synchronize with them (we are a writer after all)
>
> Global mutex allows only one zeroing at a time.
> But zeroing runs in parallel with incrementing, so we
> have 2 writers for a per-CPU state. This sounds like
> above solution 2 with percpu seqlock? But it adds extra
> spin_lock in hot path, even if it is percpu. It only
> saves the spin_lock_bh while reading percpu counters in
> ip_vs_stats_percpu_show(). That is why a prefer solution 1.
>
> > - Another reader can read the counters in same time, and we must let
> > them catch we mihjt have cleared half of their values.
>
> Yes, zeroing can run in parallel with /proc reading,
> that is why I now try to serialize all readers with the
> stats spin lock to guarantee u64 atomicity.
>
> > - No reader or writer can access data, no synch is needed, a pure
> > memset() is OK.
>
> Packet processing can damage the counters while we
> do memset, so we need at least u64_stats_fetch_* to sync
> with incrementing.
>
OK I now understand what you wanted to do.
Problem is you do synchronize your memset() with a concurrent writer but
one way only. (You detect a writer did some changes on the counters
while you memset() them), but a writer has no way to detect your writes
(could be partially committed to main memory) : It could read a
corrupted value.
I feel memory barriers are wrong and not really fixable without slowing
down the hot path.
As implied in include/linux/u64_stats_sync.h file, a "writer" should be
alone :)
One other way to handle that (and let hot path packet processing without
extra locking) would be to never memset() this data, but use a separate
"summed" value as a relative point, and substract this sum to the
current one (all this in slow path, so not a problem)
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [PATCH 03/18] ipvs: zero percpu stats
2011-03-13 10:57 ` Eric Dumazet
@ 2011-03-13 23:29 ` Julian Anastasov
0 siblings, 0 replies; 26+ messages in thread
From: Julian Anastasov @ 2011-03-13 23:29 UTC (permalink / raw)
To: Eric Dumazet
Cc: Simon Horman, netdev, netfilter-devel, netfilter, lvs-devel,
Hans Schillstrom
[-- Attachment #1: Type: TEXT/PLAIN, Size: 9424 bytes --]
Hello,
On Sun, 13 Mar 2011, Eric Dumazet wrote:
> Le dimanche 06 mars 2011 à 14:18 +0200, Julian Anastasov a écrit :
>> Packet processing can damage the counters while we
>> do memset, so we need at least u64_stats_fetch_* to sync
>> with incrementing.
>>
>
> OK I now understand what you wanted to do.
>
> Problem is you do synchronize your memset() with a concurrent writer but
> one way only. (You detect a writer did some changes on the counters
> while you memset() them), but a writer has no way to detect your writes
> (could be partially committed to main memory) : It could read a
> corrupted value.
You mean such worst case is possible: incrementing
remembers old values and uses them for next packet incrementing.
> I feel memory barriers are wrong and not really fixable without slowing
> down the hot path.
>
> As implied in include/linux/u64_stats_sync.h file, a "writer" should be
> alone :)
>
> One other way to handle that (and let hot path packet processing without
> extra locking) would be to never memset() this data, but use a separate
> "summed" value as a relative point, and substract this sum to the
> current one (all this in slow path, so not a problem)
Good idea. Thanks! Here is a new version that
does not reset percpu counters but maintains proper
values after zeroing for the other stats and rates
as before. For now I decided not to show zeroed values
for percpu values.
===========================================================
Currently, the new percpu counters are not zeroed and
the zero commands do not work as expected, we still show the old
sum of percpu values. OTOH, we can not reset the percpu counters
from user context without causing the incrementing to use old
and bogus values.
So, as Eric Dumazet suggested fix that by moving all overhead
to stats reading in user context. Do not introduce overhead in
timer context (estimator) and incrementing (packet handling in
softirqs).
The new ustats0 field holds the zero point for all
counter values, the rates always use 0 as base value as before.
When showing the values to user space just give the difference
between counters and the base values. The only drawback is that
percpu stats are not zeroed, they are accessible only from /proc
and are new interface, so it should not be a compatibility problem
as long as the sum stats and rates are correct after zeroing.
Signed-off-by: Julian Anastasov <ja@ssi.bg>
---
diff -urp lvs-test-2.6-8a80c79/linux/include/net/ip_vs.h linux/include/net/ip_vs.h
--- lvs-test-2.6-8a80c79/linux/include/net/ip_vs.h 2011-03-14 00:27:47.000000000 +0200
+++ linux/include/net/ip_vs.h 2011-03-14 00:28:59.645249612 +0200
@@ -374,6 +374,7 @@ struct ip_vs_stats {
struct ip_vs_estimator est; /* estimator */
struct ip_vs_cpu_stats *cpustats; /* per cpu counters */
spinlock_t lock; /* spin lock */
+ struct ip_vs_stats_user ustats0; /* reset values */
};
/*
diff -urp lvs-test-2.6-8a80c79/linux/net/netfilter/ipvs/ip_vs_ctl.c linux/net/netfilter/ipvs/ip_vs_ctl.c
--- lvs-test-2.6-8a80c79/linux/net/netfilter/ipvs/ip_vs_ctl.c 2011-03-14 00:27:47.000000000 +0200
+++ linux/net/netfilter/ipvs/ip_vs_ctl.c 2011-03-14 00:32:05.799251081 +0200
@@ -709,13 +709,51 @@ static void ip_vs_trash_cleanup(struct n
}
}
+static void
+ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
+{
+#define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->ustats.c - src->ustats0.c
+#define IP_VS_SHOW_STATS_RATE(r) dst->r = src->ustats.r
+
+ spin_lock_bh(&src->lock);
+
+ IP_VS_SHOW_STATS_COUNTER(conns);
+ IP_VS_SHOW_STATS_COUNTER(inpkts);
+ IP_VS_SHOW_STATS_COUNTER(outpkts);
+ IP_VS_SHOW_STATS_COUNTER(inbytes);
+ IP_VS_SHOW_STATS_COUNTER(outbytes);
+
+ IP_VS_SHOW_STATS_RATE(cps);
+ IP_VS_SHOW_STATS_RATE(inpps);
+ IP_VS_SHOW_STATS_RATE(outpps);
+ IP_VS_SHOW_STATS_RATE(inbps);
+ IP_VS_SHOW_STATS_RATE(outbps);
+
+ spin_unlock_bh(&src->lock);
+}
static void
ip_vs_zero_stats(struct ip_vs_stats *stats)
{
spin_lock_bh(&stats->lock);
- memset(&stats->ustats, 0, sizeof(stats->ustats));
+ /* get current counters as zero point, rates are zeroed */
+
+#define IP_VS_ZERO_STATS_COUNTER(c) stats->ustats0.c = stats->ustats.c
+#define IP_VS_ZERO_STATS_RATE(r) stats->ustats.r = 0
+
+ IP_VS_ZERO_STATS_COUNTER(conns);
+ IP_VS_ZERO_STATS_COUNTER(inpkts);
+ IP_VS_ZERO_STATS_COUNTER(outpkts);
+ IP_VS_ZERO_STATS_COUNTER(inbytes);
+ IP_VS_ZERO_STATS_COUNTER(outbytes);
+
+ IP_VS_ZERO_STATS_RATE(cps);
+ IP_VS_ZERO_STATS_RATE(inpps);
+ IP_VS_ZERO_STATS_RATE(outpps);
+ IP_VS_ZERO_STATS_RATE(inbps);
+ IP_VS_ZERO_STATS_RATE(outbps);
+
ip_vs_zero_estimator(stats);
spin_unlock_bh(&stats->lock);
@@ -1961,7 +1999,7 @@ static const struct file_operations ip_v
static int ip_vs_stats_show(struct seq_file *seq, void *v)
{
struct net *net = seq_file_single_net(seq);
- struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats;
+ struct ip_vs_stats_user show;
/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
seq_puts(seq,
@@ -1969,22 +2007,18 @@ static int ip_vs_stats_show(struct seq_f
seq_printf(seq,
" Conns Packets Packets Bytes Bytes\n");
- spin_lock_bh(&tot_stats->lock);
- seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", tot_stats->ustats.conns,
- tot_stats->ustats.inpkts, tot_stats->ustats.outpkts,
- (unsigned long long) tot_stats->ustats.inbytes,
- (unsigned long long) tot_stats->ustats.outbytes);
+ ip_vs_copy_stats(&show, &net_ipvs(net)->tot_stats);
+ seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", show.conns,
+ show.inpkts, show.outpkts,
+ (unsigned long long) show.inbytes,
+ (unsigned long long) show.outbytes);
/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
seq_puts(seq,
" Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
- seq_printf(seq,"%8X %8X %8X %16X %16X\n",
- tot_stats->ustats.cps,
- tot_stats->ustats.inpps,
- tot_stats->ustats.outpps,
- tot_stats->ustats.inbps,
- tot_stats->ustats.outbps);
- spin_unlock_bh(&tot_stats->lock);
+ seq_printf(seq, "%8X %8X %8X %16X %16X\n",
+ show.cps, show.inpps, show.outpps,
+ show.inbps, show.outbps);
return 0;
}
@@ -2296,14 +2330,6 @@ do_ip_vs_set_ctl(struct sock *sk, int cm
static void
-ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
-{
- spin_lock_bh(&src->lock);
- memcpy(dst, &src->ustats, sizeof(*dst));
- spin_unlock_bh(&src->lock);
-}
-
-static void
ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
{
dst->protocol = src->protocol;
@@ -2689,31 +2715,29 @@ static const struct nla_policy ip_vs_des
static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
struct ip_vs_stats *stats)
{
+ struct ip_vs_stats_user ustats;
struct nlattr *nl_stats = nla_nest_start(skb, container_type);
if (!nl_stats)
return -EMSGSIZE;
- spin_lock_bh(&stats->lock);
-
- NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->ustats.conns);
- NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->ustats.inpkts);
- NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->ustats.outpkts);
- NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->ustats.inbytes);
- NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->ustats.outbytes);
- NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->ustats.cps);
- NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->ustats.inpps);
- NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->ustats.outpps);
- NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->ustats.inbps);
- NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->ustats.outbps);
+ ip_vs_copy_stats(&ustats, stats);
- spin_unlock_bh(&stats->lock);
+ NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, ustats.conns);
+ NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, ustats.inpkts);
+ NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, ustats.outpkts);
+ NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, ustats.inbytes);
+ NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, ustats.outbytes);
+ NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, ustats.cps);
+ NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, ustats.inpps);
+ NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, ustats.outpps);
+ NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, ustats.inbps);
+ NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, ustats.outbps);
nla_nest_end(skb, nl_stats);
return 0;
nla_put_failure:
- spin_unlock_bh(&stats->lock);
nla_nest_cancel(skb, nl_stats);
return -EMSGSIZE;
}
diff -urp lvs-test-2.6-8a80c79/linux/net/netfilter/ipvs/ip_vs_est.c linux/net/netfilter/ipvs/ip_vs_est.c
--- lvs-test-2.6-8a80c79/linux/net/netfilter/ipvs/ip_vs_est.c 2011-03-14 00:27:47.000000000 +0200
+++ linux/net/netfilter/ipvs/ip_vs_est.c 2011-03-14 00:28:59.648248944 +0200
@@ -184,13 +184,14 @@ void ip_vs_kill_estimator(struct net *ne
void ip_vs_zero_estimator(struct ip_vs_stats *stats)
{
struct ip_vs_estimator *est = &stats->est;
+ struct ip_vs_stats_user *u = &stats->ustats;
- /* set counters zero, caller must hold the stats->lock lock */
- est->last_inbytes = 0;
- est->last_outbytes = 0;
- est->last_conns = 0;
- est->last_inpkts = 0;
- est->last_outpkts = 0;
+ /* reset counters, caller must hold the stats->lock lock */
+ est->last_inbytes = u->inbytes;
+ est->last_outbytes = u->outbytes;
+ est->last_conns = u->conns;
+ est->last_inpkts = u->inpkts;
+ est->last_outpkts = u->outpkts;
est->cps = 0;
est->inpps = 0;
est->outpps = 0;
^ permalink raw reply [flat|nested] 26+ messages in thread
end of thread, other threads:[~2011-03-13 23:29 UTC | newest]
Thread overview: 26+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-03-05 23:45 [patch v2 ] IPVS: Conditionally include sysctl code Simon Horman
2011-03-05 23:45 ` [PATCH 01/18] ipvs: move struct netns_ipvs Simon Horman
2011-03-05 23:45 ` [PATCH 02/18] ipvs: reorganize tot_stats Simon Horman
2011-03-05 23:45 ` [PATCH 03/18] ipvs: zero percpu stats Simon Horman
2011-03-06 9:06 ` Eric Dumazet
2011-03-06 12:18 ` Julian Anastasov
2011-03-10 1:34 ` Simon Horman
2011-03-10 2:53 ` David Miller
2011-03-10 4:27 ` Simon Horman
2011-03-13 10:57 ` Eric Dumazet
2011-03-13 23:29 ` Julian Anastasov
2011-03-05 23:45 ` [PATCH 04/18] ipvs: remove unused seqcount stats Simon Horman
2011-03-05 23:45 ` [PATCH 05/18] IPVS: Add ip_vs_route_me_harder() Simon Horman
2011-03-05 23:45 ` [PATCH 06/18] IPVS: Add sysctl_snat_reroute() Simon Horman
2011-03-05 23:45 ` [PATCH 07/18] IPVS: Add sysctl_nat_icmp_send() Simon Horman
2011-03-05 23:45 ` [PATCH 08/18] IPVS: Add {sysctl_sync_threshold,period}() Simon Horman
2011-03-05 23:45 ` [PATCH 09/18] IPVS: Add sysctl_sync_ver() Simon Horman
2011-03-05 23:45 ` [PATCH 10/18] IPVS: Add sysctl_expire_nodest_conn() Simon Horman
2011-03-05 23:45 ` [PATCH 11/18] IPVS: Add expire_quiescent_template() Simon Horman
2011-03-05 23:45 ` [PATCH 12/18] IPVS: Conditinally use sysctl_lblc{r}_expiration Simon Horman
2011-03-05 23:45 ` [PATCH 13/18] IPVS: ip_vs_todrop() becomes a noop when CONFIG_SYSCTL is undefined Simon Horman
2011-03-05 23:45 ` [PATCH 14/18] IPVS: Conditional ip_vs_conntrack_enabled() Simon Horman
2011-03-05 23:45 ` [PATCH 15/18] IPVS: Minimise ip_vs_leave when CONFIG_SYSCTL is undefined Simon Horman
2011-03-05 23:45 ` [PATCH 16/18] IPVS: Conditionally define and use ip_vs_lblc{r}_table Simon Horman
2011-03-05 23:45 ` [PATCH 17/18] IPVS: Add __ip_vs_control_{init,cleanup}_sysctl() Simon Horman
2011-03-05 23:46 ` [PATCH 18/18] IPVS: Conditionally include sysctl members of struct netns_ipvs Simon Horman
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).