From: Steffen Klassert <steffen.klassert@secunet.com>
To: David Miller <davem@davemloft.net>, Jakub Kicinski <kuba@kernel.org>
Cc: Herbert Xu <herbert@gondor.apana.org.au>,
Steffen Klassert <steffen.klassert@secunet.com>,
<netdev@vger.kernel.org>
Subject: [PATCH 02/11] xfrm: Cache used outbound xfrm states at the policy.
Date: Fri, 15 Nov 2024 09:33:34 +0100 [thread overview]
Message-ID: <20241115083343.2340827-3-steffen.klassert@secunet.com> (raw)
In-Reply-To: <20241115083343.2340827-1-steffen.klassert@secunet.com>
Now that we can have percpu xfrm states, the number of active
states might increase. To get a better lookup performance,
we cache the used xfrm states at the policy for outbound
IPsec traffic.
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Tested-by: Antony Antony <antony.antony@secunet.com>
Tested-by: Tobias Brunner <tobias@strongswan.org>
---
include/net/xfrm.h | 4 +++
net/xfrm/xfrm_policy.c | 12 +++++++++
net/xfrm/xfrm_state.c | 55 ++++++++++++++++++++++++++++++++++++++++++
3 files changed, 71 insertions(+)
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index f5275618e744..0b394c5fb5f3 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -184,6 +184,7 @@ struct xfrm_state {
};
struct hlist_node byspi;
struct hlist_node byseq;
+ struct hlist_node state_cache;
refcount_t refcnt;
spinlock_t lock;
@@ -537,6 +538,7 @@ struct xfrm_policy_queue {
* @xp_net: network namespace the policy lives in
* @bydst: hlist node for SPD hash table or rbtree list
* @byidx: hlist node for index hash table
+ * @state_cache_list: hlist head for policy cached xfrm states
* @lock: serialize changes to policy structure members
* @refcnt: reference count, freed once it reaches 0
* @pos: kernel internal tie-breaker to determine age of policy
@@ -567,6 +569,8 @@ struct xfrm_policy {
struct hlist_node bydst;
struct hlist_node byidx;
+ struct hlist_head state_cache_list;
+
/* This lock only affects elements except for entry. */
rwlock_t lock;
refcount_t refcnt;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index a2ea9dbac90b..8a1b83191a6c 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -434,6 +434,7 @@ struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp)
if (policy) {
write_pnet(&policy->xp_net, net);
INIT_LIST_HEAD(&policy->walk.all);
+ INIT_HLIST_HEAD(&policy->state_cache_list);
INIT_HLIST_NODE(&policy->bydst);
INIT_HLIST_NODE(&policy->byidx);
rwlock_init(&policy->lock);
@@ -475,6 +476,9 @@ EXPORT_SYMBOL(xfrm_policy_destroy);
static void xfrm_policy_kill(struct xfrm_policy *policy)
{
+ struct net *net = xp_net(policy);
+ struct xfrm_state *x;
+
xfrm_dev_policy_delete(policy);
write_lock_bh(&policy->lock);
@@ -490,6 +494,13 @@ static void xfrm_policy_kill(struct xfrm_policy *policy)
if (del_timer(&policy->timer))
xfrm_pol_put(policy);
+ /* XXX: Flush state cache */
+ spin_lock_bh(&net->xfrm.xfrm_state_lock);
+ hlist_for_each_entry_rcu(x, &policy->state_cache_list, state_cache) {
+ hlist_del_init_rcu(&x->state_cache);
+ }
+ spin_unlock_bh(&net->xfrm.xfrm_state_lock);
+
xfrm_pol_put(policy);
}
@@ -3275,6 +3286,7 @@ struct dst_entry *xfrm_lookup_with_ifid(struct net *net,
dst_release(dst);
dst = dst_orig;
}
+
ok:
xfrm_pols_put(pols, drop_pols);
if (dst && dst->xfrm &&
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index ebef07b80afa..a2047825f6c8 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -665,6 +665,7 @@ struct xfrm_state *xfrm_state_alloc(struct net *net)
refcount_set(&x->refcnt, 1);
atomic_set(&x->tunnel_users, 0);
INIT_LIST_HEAD(&x->km.all);
+ INIT_HLIST_NODE(&x->state_cache);
INIT_HLIST_NODE(&x->bydst);
INIT_HLIST_NODE(&x->bysrc);
INIT_HLIST_NODE(&x->byspi);
@@ -744,12 +745,15 @@ int __xfrm_state_delete(struct xfrm_state *x)
if (x->km.state != XFRM_STATE_DEAD) {
x->km.state = XFRM_STATE_DEAD;
+
spin_lock(&net->xfrm.xfrm_state_lock);
list_del(&x->km.all);
hlist_del_rcu(&x->bydst);
hlist_del_rcu(&x->bysrc);
if (x->km.seq)
hlist_del_rcu(&x->byseq);
+ if (!hlist_unhashed(&x->state_cache))
+ hlist_del_rcu(&x->state_cache);
if (x->id.spi)
hlist_del_rcu(&x->byspi);
net->xfrm.state_num--;
@@ -1222,6 +1226,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
unsigned int sequence;
struct km_event c;
unsigned int pcpu_id;
+ bool cached = false;
/* We need the cpu id just as a lookup key,
* we don't require it to be stable.
@@ -1234,6 +1239,46 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
sequence = read_seqcount_begin(&net->xfrm.xfrm_state_hash_generation);
rcu_read_lock();
+ hlist_for_each_entry_rcu(x, &pol->state_cache_list, state_cache) {
+ if (x->props.family == encap_family &&
+ x->props.reqid == tmpl->reqid &&
+ (mark & x->mark.m) == x->mark.v &&
+ x->if_id == if_id &&
+ !(x->props.flags & XFRM_STATE_WILDRECV) &&
+ xfrm_state_addr_check(x, daddr, saddr, encap_family) &&
+ tmpl->mode == x->props.mode &&
+ tmpl->id.proto == x->id.proto &&
+ (tmpl->id.spi == x->id.spi || !tmpl->id.spi))
+ xfrm_state_look_at(pol, x, fl, encap_family,
+ &best, &acquire_in_progress, &error);
+ }
+
+ if (best)
+ goto cached;
+
+ hlist_for_each_entry_rcu(x, &pol->state_cache_list, state_cache) {
+ if (x->props.family == encap_family &&
+ x->props.reqid == tmpl->reqid &&
+ (mark & x->mark.m) == x->mark.v &&
+ x->if_id == if_id &&
+ !(x->props.flags & XFRM_STATE_WILDRECV) &&
+ xfrm_addr_equal(&x->id.daddr, daddr, encap_family) &&
+ tmpl->mode == x->props.mode &&
+ tmpl->id.proto == x->id.proto &&
+ (tmpl->id.spi == x->id.spi || !tmpl->id.spi))
+ xfrm_state_look_at(pol, x, fl, family,
+ &best, &acquire_in_progress, &error);
+ }
+
+cached:
+ cached = true;
+ if (best)
+ goto found;
+ else if (error)
+ best = NULL;
+ else if (acquire_in_progress) /* XXX: acquire_in_progress should not happen */
+ WARN_ON(1);
+
h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family);
hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h, bydst) {
#ifdef CONFIG_XFRM_OFFLOAD
@@ -1383,6 +1428,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
XFRM_STATE_INSERT(bysrc, &x->bysrc,
net->xfrm.state_bysrc + h,
x->xso.type);
+ INIT_HLIST_NODE(&x->state_cache);
if (x->id.spi) {
h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, encap_family);
XFRM_STATE_INSERT(byspi, &x->byspi,
@@ -1431,6 +1477,15 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
} else {
*err = acquire_in_progress ? -EAGAIN : error;
}
+
+ if (x && x->km.state == XFRM_STATE_VALID && !cached &&
+ (!(pol->flags & XFRM_POLICY_CPU_ACQUIRE) || x->pcpu_num == pcpu_id)) {
+ spin_lock_bh(&net->xfrm.xfrm_state_lock);
+ if (hlist_unhashed(&x->state_cache))
+ hlist_add_head_rcu(&x->state_cache, &pol->state_cache_list);
+ spin_unlock_bh(&net->xfrm.xfrm_state_lock);
+ }
+
rcu_read_unlock();
if (to_put)
xfrm_state_put(to_put);
--
2.34.1
next prev parent reply other threads:[~2024-11-15 8:33 UTC|newest]
Thread overview: 15+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-11-15 8:33 [PATCH 0/11] pull request (net-next): ipsec-next 2024-11-15 Steffen Klassert
2024-11-15 8:33 ` [PATCH 01/11] xfrm: Add support for per cpu xfrm state handling Steffen Klassert
2024-11-16 2:09 ` Jakub Kicinski
2024-11-18 6:23 ` Steffen Klassert
2024-11-18 12:00 ` patchwork-bot+netdevbpf
2024-11-15 8:33 ` Steffen Klassert [this message]
2024-11-15 8:33 ` [PATCH 03/11] xfrm: Add an inbound percpu state cache Steffen Klassert
2024-11-15 8:33 ` [PATCH 04/11] xfrm: Restrict percpu SA attribute to specific netlink message types Steffen Klassert
2024-11-15 8:33 ` [PATCH 05/11] xfrm: Convert xfrm_get_tos() to dscp_t Steffen Klassert
2024-11-15 8:33 ` [PATCH 06/11] xfrm: Convert xfrm_bundle_create() " Steffen Klassert
2024-11-15 8:33 ` [PATCH 07/11] xfrm: Convert xfrm_dst_lookup() " Steffen Klassert
2024-11-15 8:33 ` [PATCH 08/11] xfrm: Convert struct xfrm_dst_lookup_params -> tos " Steffen Klassert
2024-11-15 8:33 ` [PATCH 09/11] xfrm: Add error handling when nla_put_u32() returns an error Steffen Klassert
2024-11-15 8:33 ` [PATCH 10/11] xfrm: replace deprecated strncpy with strscpy_pad Steffen Klassert
2024-11-15 8:33 ` [PATCH 11/11] xfrm: Fix acquire state insertion Steffen Klassert
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20241115083343.2340827-3-steffen.klassert@secunet.com \
--to=steffen.klassert@secunet.com \
--cc=davem@davemloft.net \
--cc=herbert@gondor.apana.org.au \
--cc=kuba@kernel.org \
--cc=netdev@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).