From: Julian Anastasov <ja@ssi.bg>
To: Simon Horman <horms@verge.net.au>
Cc: lvs-devel@vger.kernel.org, netfilter-devel@vger.kernel.org,
Dust Li <dust.li@linux.alibaba.com>,
Jiejian Wu <jiejian@linux.alibaba.com>,
rcu@vger.kernel.org
Subject: [PATCHv4 net-next 06/14] ipvs: use more counters to avoid service lookups
Date: Tue, 28 May 2024 11:02:26 +0300 [thread overview]
Message-ID: <20240528080234.10148-7-ja@ssi.bg> (raw)
In-Reply-To: <20240528080234.10148-1-ja@ssi.bg>
When new connection is created we can lookup for services multiple
times to support fallback options. We already have some counters
to skip specific lookups because it costs CPU cycles for hash
calculation, etc.
Add more counters for fwmark/non-fwmark services (fwm_services and
nonfwm_services) and make all counters per address family.
Signed-off-by: Julian Anastasov <ja@ssi.bg>
---
include/net/ip_vs.h | 24 ++++++---
net/netfilter/ipvs/ip_vs_core.c | 2 +-
net/netfilter/ipvs/ip_vs_ctl.c | 86 +++++++++++++++++++--------------
3 files changed, 69 insertions(+), 43 deletions(-)
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index b0a9f67a5c33..6b9b32257e10 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -271,6 +271,18 @@ static inline const char *ip_vs_dbg_addr(int af, char *buf, size_t buf_len,
pr_err(msg, ##__VA_ARGS__); \
} while (0)
+/* For arrays per family */
+enum {
+ IP_VS_AF_INET,
+ IP_VS_AF_INET6,
+ IP_VS_AF_MAX
+};
+
+static inline int ip_vs_af_index(int af)
+{
+ return af == AF_INET6 ? IP_VS_AF_INET6 : IP_VS_AF_INET;
+}
+
/* The port number of FTP service (in network order). */
#define FTPPORT cpu_to_be16(21)
#define FTPDATA cpu_to_be16(20)
@@ -940,17 +952,17 @@ struct netns_ipvs {
/* ip_vs_ctl */
struct ip_vs_stats_rcu *tot_stats; /* Statistics & est. */
- int num_services; /* no of virtual services */
- int num_services6; /* IPv6 virtual services */
-
/* Trash for destinations */
struct list_head dest_trash;
spinlock_t dest_trash_lock;
struct timer_list dest_trash_timer; /* expiration timer */
/* Service counters */
- atomic_t ftpsvc_counter;
- atomic_t nullsvc_counter;
- atomic_t conn_out_counter;
+ atomic_t num_services[IP_VS_AF_MAX]; /* Services */
+ atomic_t fwm_services[IP_VS_AF_MAX]; /* Services */
+ atomic_t nonfwm_services[IP_VS_AF_MAX];/* Services */
+ atomic_t ftpsvc_counter[IP_VS_AF_MAX]; /* FTPPORT */
+ atomic_t nullsvc_counter[IP_VS_AF_MAX];/* Zero port */
+ atomic_t conn_out_counter[IP_VS_AF_MAX];/* out conn */
#ifdef CONFIG_SYSCTL
/* delayed work for expiring no dest connections */
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index c7a8a08b7308..141da3c14f57 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1404,7 +1404,7 @@ ip_vs_out_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *stat
return handle_response(af, skb, pd, cp, &iph, hooknum);
/* Check for real-server-started requests */
- if (atomic_read(&ipvs->conn_out_counter)) {
+ if (atomic_read(&ipvs->conn_out_counter[ip_vs_af_index(af)])) {
/* Currently only for UDP:
* connection oriented protocols typically use
* ephemeral ports for outgoing connections, so
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index d377df9c7a37..3d950b1099a6 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -437,35 +437,42 @@ struct ip_vs_service *
ip_vs_service_find(struct netns_ipvs *ipvs, int af, __u32 fwmark, __u16 protocol,
const union nf_inet_addr *vaddr, __be16 vport)
{
- struct ip_vs_service *svc;
+ struct ip_vs_service *svc = NULL;
+ int af_id = ip_vs_af_index(af);
/*
* Check the table hashed by fwmark first
*/
- if (fwmark) {
+ if (fwmark && atomic_read(&ipvs->fwm_services[af_id])) {
svc = __ip_vs_svc_fwm_find(ipvs, af, fwmark);
if (svc)
goto out;
}
+ if (!atomic_read(&ipvs->nonfwm_services[af_id]))
+ goto out;
+
/*
* Check the table hashed by <protocol,addr,port>
* for "full" addressed entries
*/
svc = __ip_vs_service_find(ipvs, af, protocol, vaddr, vport);
+ if (svc)
+ goto out;
- if (!svc && protocol == IPPROTO_TCP &&
- atomic_read(&ipvs->ftpsvc_counter) &&
+ if (protocol == IPPROTO_TCP &&
+ atomic_read(&ipvs->ftpsvc_counter[af_id]) &&
(vport == FTPDATA || !inet_port_requires_bind_service(ipvs->net, ntohs(vport)))) {
/*
* Check if ftp service entry exists, the packet
* might belong to FTP data connections.
*/
svc = __ip_vs_service_find(ipvs, af, protocol, vaddr, FTPPORT);
+ if (svc)
+ goto out;
}
- if (svc == NULL
- && atomic_read(&ipvs->nullsvc_counter)) {
+ if (atomic_read(&ipvs->nullsvc_counter[af_id])) {
/*
* Check if the catch-all port (port zero) exists
*/
@@ -1352,6 +1359,7 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u,
{
int ret = 0;
struct ip_vs_scheduler *sched = NULL;
+ int af_id = ip_vs_af_index(u->af);
struct ip_vs_pe *pe = NULL;
struct ip_vs_service *svc = NULL;
int ret_hooks = -1;
@@ -1396,8 +1404,7 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u,
}
#endif
- if ((u->af == AF_INET && !ipvs->num_services) ||
- (u->af == AF_INET6 && !ipvs->num_services6)) {
+ if (!atomic_read(&ipvs->num_services[af_id])) {
ret = ip_vs_register_hooks(ipvs, u->af);
if (ret < 0)
goto out_err;
@@ -1448,17 +1455,17 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u,
/* Update the virtual service counters */
if (svc->port == FTPPORT)
- atomic_inc(&ipvs->ftpsvc_counter);
- else if (svc->port == 0)
- atomic_inc(&ipvs->nullsvc_counter);
+ atomic_inc(&ipvs->ftpsvc_counter[af_id]);
+ else if (!svc->port && !svc->fwmark)
+ atomic_inc(&ipvs->nullsvc_counter[af_id]);
if (svc->pe && svc->pe->conn_out)
- atomic_inc(&ipvs->conn_out_counter);
+ atomic_inc(&ipvs->conn_out_counter[af_id]);
- /* Count only IPv4 services for old get/setsockopt interface */
- if (svc->af == AF_INET)
- ipvs->num_services++;
- else if (svc->af == AF_INET6)
- ipvs->num_services6++;
+ if (svc->fwmark)
+ atomic_inc(&ipvs->fwm_services[af_id]);
+ else
+ atomic_inc(&ipvs->nonfwm_services[af_id]);
+ atomic_inc(&ipvs->num_services[af_id]);
/* Hash the service into the service table */
ip_vs_svc_hash(svc);
@@ -1503,6 +1510,8 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
struct ip_vs_pe *pe = NULL, *old_pe = NULL;
int ret = 0;
bool new_pe_conn_out, old_pe_conn_out;
+ struct netns_ipvs *ipvs = svc->ipvs;
+ int af_id = ip_vs_af_index(svc->af);
/*
* Lookup the scheduler, by 'u->sched_name'
@@ -1571,9 +1580,9 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
new_pe_conn_out = (pe && pe->conn_out) ? true : false;
old_pe_conn_out = (old_pe && old_pe->conn_out) ? true : false;
if (new_pe_conn_out && !old_pe_conn_out)
- atomic_inc(&svc->ipvs->conn_out_counter);
+ atomic_inc(&ipvs->conn_out_counter[af_id]);
if (old_pe_conn_out && !new_pe_conn_out)
- atomic_dec(&svc->ipvs->conn_out_counter);
+ atomic_dec(&ipvs->conn_out_counter[af_id]);
}
out:
@@ -1593,16 +1602,15 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup)
struct ip_vs_scheduler *old_sched;
struct ip_vs_pe *old_pe;
struct netns_ipvs *ipvs = svc->ipvs;
+ int af_id = ip_vs_af_index(svc->af);
- if (svc->af == AF_INET) {
- ipvs->num_services--;
- if (!ipvs->num_services)
- ip_vs_unregister_hooks(ipvs, svc->af);
- } else if (svc->af == AF_INET6) {
- ipvs->num_services6--;
- if (!ipvs->num_services6)
- ip_vs_unregister_hooks(ipvs, svc->af);
- }
+ atomic_dec(&ipvs->num_services[af_id]);
+ if (!atomic_read(&ipvs->num_services[af_id]))
+ ip_vs_unregister_hooks(ipvs, svc->af);
+ if (svc->fwmark)
+ atomic_dec(&ipvs->fwm_services[af_id]);
+ else
+ atomic_dec(&ipvs->nonfwm_services[af_id]);
ip_vs_stop_estimator(svc->ipvs, &svc->stats);
@@ -1614,7 +1622,7 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup)
/* Unbind persistence engine, keep svc->pe */
old_pe = rcu_dereference_protected(svc->pe, 1);
if (old_pe && old_pe->conn_out)
- atomic_dec(&ipvs->conn_out_counter);
+ atomic_dec(&ipvs->conn_out_counter[af_id]);
ip_vs_pe_put(old_pe);
/*
@@ -1629,9 +1637,9 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup)
* Update the virtual service counters
*/
if (svc->port == FTPPORT)
- atomic_dec(&ipvs->ftpsvc_counter);
- else if (svc->port == 0)
- atomic_dec(&ipvs->nullsvc_counter);
+ atomic_dec(&ipvs->ftpsvc_counter[af_id]);
+ else if (!svc->port && !svc->fwmark)
+ atomic_dec(&ipvs->nullsvc_counter[af_id]);
/*
* Free the service if nobody refers to it
@@ -2960,7 +2968,8 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
struct ip_vs_getinfo info;
info.version = IP_VS_VERSION_CODE;
info.size = ip_vs_conn_tab_size;
- info.num_services = ipvs->num_services;
+ info.num_services =
+ atomic_read(&ipvs->num_services[IP_VS_AF_INET]);
if (copy_to_user(user, &info, sizeof(info)) != 0)
ret = -EFAULT;
}
@@ -4309,9 +4318,14 @@ int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs)
INIT_LIST_HEAD(&ipvs->dest_trash);
spin_lock_init(&ipvs->dest_trash_lock);
timer_setup(&ipvs->dest_trash_timer, ip_vs_dest_trash_expire, 0);
- atomic_set(&ipvs->ftpsvc_counter, 0);
- atomic_set(&ipvs->nullsvc_counter, 0);
- atomic_set(&ipvs->conn_out_counter, 0);
+ for (idx = 0; idx < IP_VS_AF_MAX; idx++) {
+ atomic_set(&ipvs->num_services[idx], 0);
+ atomic_set(&ipvs->fwm_services[idx], 0);
+ atomic_set(&ipvs->nonfwm_services[idx], 0);
+ atomic_set(&ipvs->ftpsvc_counter[idx], 0);
+ atomic_set(&ipvs->nullsvc_counter[idx], 0);
+ atomic_set(&ipvs->conn_out_counter[idx], 0);
+ }
INIT_DELAYED_WORK(&ipvs->est_reload_work, est_reload_work_handler);
--
2.44.0
next prev parent reply other threads:[~2024-05-28 8:07 UTC|newest]
Thread overview: 17+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-05-28 8:02 [PATCHv4 net-next 00/14] ipvs: per-net tables and optimizations Julian Anastasov
2024-05-28 8:02 ` [PATCHv4 net-next 01/14] rculist_bl: add hlist_bl_for_each_entry_continue_rcu Julian Anastasov
2024-05-28 8:02 ` [PATCHv4 net-next 02/14] ipvs: make ip_vs_svc_table and ip_vs_svc_fwm_table per netns Julian Anastasov
2024-05-28 8:02 ` [PATCHv4 net-next 03/14] ipvs: some service readers can use RCU Julian Anastasov
2024-05-28 8:02 ` [PATCHv4 net-next 04/14] ipvs: use single svc table Julian Anastasov
2024-05-28 8:02 ` [PATCHv4 net-next 05/14] ipvs: do not keep dest_dst after dest is removed Julian Anastasov
2024-05-28 8:02 ` Julian Anastasov [this message]
2024-05-28 8:02 ` [PATCHv4 net-next 07/14] ipvs: add resizable hash tables Julian Anastasov
2024-05-28 8:02 ` [PATCHv4 net-next 08/14] ipvs: use resizable hash table for services Julian Anastasov
2024-05-28 8:02 ` [PATCHv4 net-next 09/14] ipvs: switch to per-net connection table Julian Anastasov
2024-05-28 8:02 ` [PATCHv4 net-next 10/14] ipvs: show the current conn_tab size to users Julian Anastasov
2024-05-28 8:02 ` [PATCHv4 net-next 11/14] ipvs: no_cport and dropentry counters can be per-net Julian Anastasov
2024-05-28 8:02 ` [PATCHv4 net-next 12/14] ipvs: use more keys for connection hashing Julian Anastasov
2024-05-28 8:02 ` [PATCHv4 net-next 13/14] ipvs: add ip_vs_status info Julian Anastasov
2024-05-28 8:02 ` [PATCHv4 net-next 14/14] ipvs: add conn_lfactor and svc_lfactor sysctl vars Julian Anastasov
2025-10-17 2:54 ` [PATCHv4 net-next 00/14] ipvs: per-net tables and optimizations Dust Li
2025-10-17 3:45 ` Julian Anastasov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240528080234.10148-7-ja@ssi.bg \
--to=ja@ssi.bg \
--cc=dust.li@linux.alibaba.com \
--cc=horms@verge.net.au \
--cc=jiejian@linux.alibaba.com \
--cc=lvs-devel@vger.kernel.org \
--cc=netfilter-devel@vger.kernel.org \
--cc=rcu@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).