All of lore.kernel.org
 help / color / mirror / Atom feed
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	stable@vger.kernel.org,
	Kristian Evensen <kristian.evensen@gmail.com>,
	Florian Westphal <fw@strlen.de>,
	Steffen Klassert <steffen.klassert@secunet.com>
Subject: [PATCH 4.14 71/71] xfrm: policy: remove pcpu policy cache
Date: Thu, 22 Aug 2019 10:19:46 -0700	[thread overview]
Message-ID: <20190822171730.781081830@linuxfoundation.org> (raw)
In-Reply-To: <20190822171726.131957995@linuxfoundation.org>

From: Florian Westphal <fw@strlen.de>

commit e4db5b61c572475bbbcf63e3c8a2606bfccf2c9d upstream.

Kristian Evensen says:
  In a project I am involved in, we are running ipsec (Strongswan) on
  different mt7621-based routers. Each router is configured as an
  initiator and has around ~30 tunnels to different responders (running
  on misc. devices). Before the flow cache was removed (kernel 4.9), we
  got a combined throughput of around 70Mbit/s for all tunnels on one
  router. However, we recently switched to kernel 4.14 (4.14.48), and
  the total throughput is somewhere around 57Mbit/s (best-case). I.e., a
  drop of around 20%. Reverting the flow cache removal restores, as
  expected, performance levels to that of kernel 4.9.

When pcpu xdst exists, it has to be validated first before it can be
used.

A negative hit thus increases cost vs. no-cache.

As number of tunnels increases, hit rate decreases so this pcpu caching
isn't a viable strategy.

Furthermore, the xdst cache also needs to run with BH off, so when
removing this the bh disable/enable pairs can be removed too.

Kristian tested a 4.14.y backport of this change and reported
increased performance:

  In our tests, the throughput reduction has been reduced from around -20%
  to -5%. We also see that the overall throughput is independent of the
  number of tunnels, while before the throughput was reduced as the number
  of tunnels increased.

Reported-by: Kristian Evensen <kristian.evensen@gmail.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>


---
 include/net/xfrm.h     |    1 
 net/xfrm/xfrm_device.c |   10 ---
 net/xfrm/xfrm_policy.c |  138 -------------------------------------------------
 net/xfrm/xfrm_state.c  |    5 -
 4 files changed, 3 insertions(+), 151 deletions(-)

--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -323,7 +323,6 @@ int xfrm_policy_register_afinfo(const st
 void xfrm_policy_unregister_afinfo(const struct xfrm_policy_afinfo *afinfo);
 void km_policy_notify(struct xfrm_policy *xp, int dir,
 		      const struct km_event *c);
-void xfrm_policy_cache_flush(void);
 void km_state_notify(struct xfrm_state *x, const struct km_event *c);
 
 struct xfrm_tmpl;
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -153,12 +153,6 @@ static int xfrm_dev_register(struct net_
 	return NOTIFY_DONE;
 }
 
-static int xfrm_dev_unregister(struct net_device *dev)
-{
-	xfrm_policy_cache_flush();
-	return NOTIFY_DONE;
-}
-
 static int xfrm_dev_feat_change(struct net_device *dev)
 {
 	if ((dev->features & NETIF_F_HW_ESP) && !dev->xfrmdev_ops)
@@ -178,7 +172,6 @@ static int xfrm_dev_down(struct net_devi
 	if (dev->features & NETIF_F_HW_ESP)
 		xfrm_dev_state_flush(dev_net(dev), dev, true);
 
-	xfrm_policy_cache_flush();
 	return NOTIFY_DONE;
 }
 
@@ -190,9 +183,6 @@ static int xfrm_dev_event(struct notifie
 	case NETDEV_REGISTER:
 		return xfrm_dev_register(dev);
 
-	case NETDEV_UNREGISTER:
-		return xfrm_dev_unregister(dev);
-
 	case NETDEV_FEAT_CHANGE:
 		return xfrm_dev_feat_change(dev);
 
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -45,8 +45,6 @@ struct xfrm_flo {
 	u8 flags;
 };
 
-static DEFINE_PER_CPU(struct xfrm_dst *, xfrm_last_dst);
-static struct work_struct *xfrm_pcpu_work __read_mostly;
 static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock);
 static struct xfrm_policy_afinfo const __rcu *xfrm_policy_afinfo[AF_INET6 + 1]
 						__read_mostly;
@@ -1715,108 +1713,6 @@ static int xfrm_expand_policies(const st
 
 }
 
-static void xfrm_last_dst_update(struct xfrm_dst *xdst, struct xfrm_dst *old)
-{
-	this_cpu_write(xfrm_last_dst, xdst);
-	if (old)
-		dst_release(&old->u.dst);
-}
-
-static void __xfrm_pcpu_work_fn(void)
-{
-	struct xfrm_dst *old;
-
-	old = this_cpu_read(xfrm_last_dst);
-	if (old && !xfrm_bundle_ok(old))
-		xfrm_last_dst_update(NULL, old);
-}
-
-static void xfrm_pcpu_work_fn(struct work_struct *work)
-{
-	local_bh_disable();
-	rcu_read_lock();
-	__xfrm_pcpu_work_fn();
-	rcu_read_unlock();
-	local_bh_enable();
-}
-
-void xfrm_policy_cache_flush(void)
-{
-	struct xfrm_dst *old;
-	bool found = 0;
-	int cpu;
-
-	might_sleep();
-
-	local_bh_disable();
-	rcu_read_lock();
-	for_each_possible_cpu(cpu) {
-		old = per_cpu(xfrm_last_dst, cpu);
-		if (old && !xfrm_bundle_ok(old)) {
-			if (smp_processor_id() == cpu) {
-				__xfrm_pcpu_work_fn();
-				continue;
-			}
-			found = true;
-			break;
-		}
-	}
-
-	rcu_read_unlock();
-	local_bh_enable();
-
-	if (!found)
-		return;
-
-	get_online_cpus();
-
-	for_each_possible_cpu(cpu) {
-		bool bundle_release;
-
-		rcu_read_lock();
-		old = per_cpu(xfrm_last_dst, cpu);
-		bundle_release = old && !xfrm_bundle_ok(old);
-		rcu_read_unlock();
-
-		if (!bundle_release)
-			continue;
-
-		if (cpu_online(cpu)) {
-			schedule_work_on(cpu, &xfrm_pcpu_work[cpu]);
-			continue;
-		}
-
-		rcu_read_lock();
-		old = per_cpu(xfrm_last_dst, cpu);
-		if (old && !xfrm_bundle_ok(old)) {
-			per_cpu(xfrm_last_dst, cpu) = NULL;
-			dst_release(&old->u.dst);
-		}
-		rcu_read_unlock();
-	}
-
-	put_online_cpus();
-}
-
-static bool xfrm_xdst_can_reuse(struct xfrm_dst *xdst,
-				struct xfrm_state * const xfrm[],
-				int num)
-{
-	const struct dst_entry *dst = &xdst->u.dst;
-	int i;
-
-	if (xdst->num_xfrms != num)
-		return false;
-
-	for (i = 0; i < num; i++) {
-		if (!dst || dst->xfrm != xfrm[i])
-			return false;
-		dst = dst->child;
-	}
-
-	return xfrm_bundle_ok(xdst);
-}
-
 static struct xfrm_dst *
 xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
 			       const struct flowi *fl, u16 family,
@@ -1824,7 +1720,7 @@ xfrm_resolve_and_create_bundle(struct xf
 {
 	struct net *net = xp_net(pols[0]);
 	struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
-	struct xfrm_dst *xdst, *old;
+	struct xfrm_dst *xdst;
 	struct dst_entry *dst;
 	int err;
 
@@ -1839,21 +1735,6 @@ xfrm_resolve_and_create_bundle(struct xf
 		return ERR_PTR(err);
 	}
 
-	xdst = this_cpu_read(xfrm_last_dst);
-	if (xdst &&
-	    xdst->u.dst.dev == dst_orig->dev &&
-	    xdst->num_pols == num_pols &&
-	    memcmp(xdst->pols, pols,
-		   sizeof(struct xfrm_policy *) * num_pols) == 0 &&
-	    xfrm_xdst_can_reuse(xdst, xfrm, err)) {
-		dst_hold(&xdst->u.dst);
-		while (err > 0)
-			xfrm_state_put(xfrm[--err]);
-		return xdst;
-	}
-
-	old = xdst;
-
 	dst = xfrm_bundle_create(pols[0], xfrm, err, fl, dst_orig);
 	if (IS_ERR(dst)) {
 		XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR);
@@ -1866,9 +1747,6 @@ xfrm_resolve_and_create_bundle(struct xf
 	memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols);
 	xdst->policy_genid = atomic_read(&pols[0]->genid);
 
-	atomic_set(&xdst->u.dst.__refcnt, 2);
-	xfrm_last_dst_update(xdst, old);
-
 	return xdst;
 }
 
@@ -2069,11 +1947,8 @@ xfrm_bundle_lookup(struct net *net, cons
 	if (num_xfrms <= 0)
 		goto make_dummy_bundle;
 
-	local_bh_disable();
 	xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family,
 					      xflo->dst_orig);
-	local_bh_enable();
-
 	if (IS_ERR(xdst)) {
 		err = PTR_ERR(xdst);
 		if (err != -EAGAIN)
@@ -2160,11 +2035,9 @@ struct dst_entry *xfrm_lookup(struct net
 				goto no_transform;
 			}
 
-			local_bh_disable();
 			xdst = xfrm_resolve_and_create_bundle(
 					pols, num_pols, fl,
 					family, dst_orig);
-			local_bh_enable();
 
 			if (IS_ERR(xdst)) {
 				xfrm_pols_put(pols, num_pols);
@@ -2992,15 +2865,6 @@ static struct pernet_operations __net_in
 
 void __init xfrm_init(void)
 {
-	int i;
-
-	xfrm_pcpu_work = kmalloc_array(NR_CPUS, sizeof(*xfrm_pcpu_work),
-				       GFP_KERNEL);
-	BUG_ON(!xfrm_pcpu_work);
-
-	for (i = 0; i < NR_CPUS; i++)
-		INIT_WORK(&xfrm_pcpu_work[i], xfrm_pcpu_work_fn);
-
 	register_pernet_subsys(&xfrm_net_ops);
 	seqcount_init(&xfrm_policy_hash_generation);
 	xfrm_input_init();
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -735,10 +735,9 @@ restart:
 	}
 out:
 	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
-	if (cnt) {
+	if (cnt)
 		err = 0;
-		xfrm_policy_cache_flush();
-	}
+
 	return err;
 }
 EXPORT_SYMBOL(xfrm_state_flush);



  parent reply	other threads:[~2019-08-22 17:34 UTC|newest]

Thread overview: 80+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-08-22 17:18 [PATCH 4.14 00/71] 4.14.140-stable review Greg Kroah-Hartman
2019-08-22 17:18 ` [PATCH 4.14 01/71] scsi: mpt3sas: Use 63-bit DMA addressing on SAS35 HBA Greg Kroah-Hartman
2019-08-22 17:18 ` [PATCH 4.14 02/71] sh: kernel: hw_breakpoint: Fix missing break in switch statement Greg Kroah-Hartman
2019-08-22 17:18 ` [PATCH 4.14 03/71] mm/usercopy: use memory range to be accessed for wraparound check Greg Kroah-Hartman
2019-08-22 17:18 ` [PATCH 4.14 04/71] mm/memcontrol.c: fix use after free in mem_cgroup_iter() Greg Kroah-Hartman
2019-08-22 17:18 ` [PATCH 4.14 05/71] bpf: get rid of pure_initcall dependency to enable jits Greg Kroah-Hartman
2019-08-22 17:18 ` [PATCH 4.14 06/71] bpf: restrict access to core bpf sysctls Greg Kroah-Hartman
2019-08-22 17:18 ` [PATCH 4.14 07/71] bpf: add bpf_jit_limit knob to restrict unpriv allocations Greg Kroah-Hartman
2019-08-22 17:18 ` [PATCH 4.14 08/71] x86/mm: Use WRITE_ONCE() when setting PTEs Greg Kroah-Hartman
2019-08-22 17:18 ` [PATCH 4.14 09/71] xtensa: add missing isync to the cpu_reset TLB code Greg Kroah-Hartman
2019-08-22 17:18 ` [PATCH 4.14 10/71] ALSA: hda - Apply workaround for another AMD chip 1022:1487 Greg Kroah-Hartman
2019-08-22 17:18 ` [PATCH 4.14 11/71] ALSA: hda - Fix a memory leak bug Greg Kroah-Hartman
2019-08-22 17:18 ` [PATCH 4.14 12/71] ALSA: hda - Add a generic reboot_notify Greg Kroah-Hartman
2019-08-22 17:18 ` [PATCH 4.14 13/71] ALSA: hda - Let all conexant codec enter D3 when rebooting Greg Kroah-Hartman
2019-08-22 17:18 ` [PATCH 4.14 14/71] HID: holtek: test for sanity of intfdata Greg Kroah-Hartman
2019-08-22 17:18 ` [PATCH 4.14 15/71] HID: hiddev: avoid opening a disconnected device Greg Kroah-Hartman
2019-08-22 17:18 ` [PATCH 4.14 16/71] HID: hiddev: do cleanup in failure of opening a device Greg Kroah-Hartman
2019-08-22 17:18 ` [PATCH 4.14 17/71] Input: kbtab - sanity check for endpoint type Greg Kroah-Hartman
2019-08-22 17:18 ` [PATCH 4.14 18/71] Input: iforce - add sanity checks Greg Kroah-Hartman
2019-08-22 17:18 ` [PATCH 4.14 19/71] net: usb: pegasus: fix improper read if get_registers() fail Greg Kroah-Hartman
2019-08-22 17:18 ` [PATCH 4.14 20/71] netfilter: ebtables: also count base chain policies Greg Kroah-Hartman
2019-08-22 17:18 ` [PATCH 4.14 21/71] clk: at91: generated: Truncate divisor to GENERATED_MAX_DIV + 1 Greg Kroah-Hartman
2019-08-22 17:18 ` [PATCH 4.14 22/71] clk: renesas: cpg-mssr: Fix reset control race condition Greg Kroah-Hartman
2019-08-22 17:18 ` [PATCH 4.14 23/71] xen/pciback: remove set but not used variable old_state Greg Kroah-Hartman
2019-08-22 17:18 ` [PATCH 4.14 24/71] irqchip/gic-v3-its: Free unused vpt_page when alloc vpe table fail Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 25/71] irqchip/irq-imx-gpcv2: Forward irq type to parent Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 26/71] perf header: Fix divide by zero error if f_header.attr_size==0 Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 27/71] perf header: Fix use of unitialized value warning Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 28/71] libata: zpodd: Fix small read overflow in zpodd_get_mech_type() Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 29/71] drm/bridge: lvds-encoder: Fix build error while CONFIG_DRM_KMS_HELPER=m Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 30/71] scsi: hpsa: correct scsi command status issue after reset Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 31/71] scsi: qla2xxx: Fix possible fcport null-pointer dereferences Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 32/71] ata: libahci: do not complain in case of deferred probe Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 33/71] kbuild: modpost: handle KBUILD_EXTRA_SYMBOLS only for external modules Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 34/71] arm64/efi: fix variable si set but not used Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 35/71] arm64: unwind: Prohibit probing on return_address() Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 36/71] arm64/mm: fix variable pud set but not used Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 37/71] IB/core: Add mitigation for Spectre V1 Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 38/71] IB/mad: Fix use-after-free in ib mad completion handling Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 39/71] drm: msm: Fix add_gpu_components Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 40/71] ocfs2: remove set but not used variable last_hash Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 41/71] asm-generic: fix -Wtype-limits compiler warnings Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 42/71] KVM: arm/arm64: Sync ICH_VMCR_EL2 back when about to block Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 43/71] staging: comedi: dt3000: Fix signed integer overflow divider * base Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 44/71] staging: comedi: dt3000: Fix rounding up of timer divisor Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 45/71] iio: adc: max9611: Fix temperature reading in probe Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 46/71] USB: core: Fix races in character device registration and deregistraion Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 47/71] usb: gadget: udc: renesas_usb3: Fix sysfs interface of "role" Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 48/71] usb: cdc-acm: make sure a refcount is taken early enough Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 49/71] USB: CDC: fix sanity checks in CDC union parser Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 50/71] USB: serial: option: add D-Link DWM-222 device ID Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 51/71] USB: serial: option: Add support for ZTE MF871A Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 52/71] USB: serial: option: add the BroadMobi BM818 card Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 53/71] USB: serial: option: Add Motorola modem UARTs Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 54/71] bpf: fix bpf_jit_limit knob for PAGE_SIZE >= 64K Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 55/71] Revert "tcp: Clear sk_send_head after purging the write queue" Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 56/71] arm64: compat: Allow single-byte watchpoints on all addresses Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 57/71] arm64: ftrace: Ensure module ftrace trampoline is coherent with I-side Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 58/71] netfilter: conntrack: Use consistent ct id hash calculation Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 59/71] Input: psmouse - fix build error of multiple definition Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 60/71] iommu/amd: Move iommu_init_pci() to .init section Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 61/71] bnx2x: Fix VFs VLAN reconfiguration in reload Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 62/71] net/mlx4_en: fix a memory leak bug Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 63/71] net/packet: fix race in tpacket_snd() Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 64/71] sctp: fix the transport error_count check Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 65/71] xen/netback: Reset nr_frags before freeing skb Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 66/71] net/mlx5e: Only support tx/rx pause setting for port owner Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 67/71] net/mlx5e: Use flow keys dissector to parse packets for ARFS Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 68/71] team: Add vlan tx offload to hw_enc_features Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 69/71] bonding: " Greg Kroah-Hartman
2019-08-22 17:19 ` [PATCH 4.14 70/71] mmc: sdhci-of-arasan: Do now show error message in case of deffered probe Greg Kroah-Hartman
2019-08-22 17:19 ` Greg Kroah-Hartman [this message]
2019-08-22 21:17 ` [PATCH 4.14 00/71] 4.14.140-stable review kernelci.org bot
2019-08-22 23:23   ` Kevin Hilman
2019-08-22 23:39     ` Greg Kroah-Hartman
2019-08-23  2:07 ` Jon Hunter
2019-08-23  2:07   ` Jon Hunter
2019-08-23  8:04 ` Naresh Kamboju
2019-08-23 14:28 ` Guenter Roeck
2019-08-24 17:55 ` shuah

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190822171730.781081830@linuxfoundation.org \
    --to=gregkh@linuxfoundation.org \
    --cc=fw@strlen.de \
    --cc=kristian.evensen@gmail.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=stable@vger.kernel.org \
    --cc=steffen.klassert@secunet.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.