stable.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: lizf@kernel.org
To: stable@vger.kernel.org
Cc: linux-kernel@vger.kernel.org,
	Marcelo Ricardo Leitner <mleitner@redhat.com>,
	Ben Hutchings <ben@decadent.org.uk>,
	Zefan Li <lizefan@huawei.com>
Subject: [PATCH 3.4 84/91] ipv4: move route garbage collector to work queue
Date: Thu, 27 Nov 2014 16:43:07 +0800	[thread overview]
Message-ID: <1417077794-9299-84-git-send-email-lizf@kernel.org> (raw)
In-Reply-To: <1417077368-9217-1-git-send-email-lizf@kernel.org>

From: Marcelo Ricardo Leitner <mleitner@redhat.com>

3.4.105-rc1 review patch.  If anyone has any objections, please let me know.

------------------


Currently the route garbage collector gets called by dst_alloc() if it
have more entries than the threshold. But it's an expensive call, that
don't really need to be done by then.

Another issue with current way is that it allows running the garbage
collector with the same start parameters on multiple CPUs at once, which
is not optimal. A system may even soft lockup if the cache is big enough
as the garbage collectors will be fighting over the hash lock entries.

This patch thus moves the garbage collector to run asynchronously on a
work queue, much similar to how rt_expire_check runs.

There is one condition left that allows multiple executions, which is
handled by the next patch.

Signed-off-by: Marcelo Ricardo Leitner <mleitner@redhat.com>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: Zefan Li <lizefan@huawei.com>
---
 net/ipv4/route.c | 43 +++++++++++++++++++++++++++++--------------
 1 file changed, 29 insertions(+), 14 deletions(-)

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index fd68433..90de2f9 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -150,6 +150,9 @@ static void		 ipv4_link_failure(struct sk_buff *skb);
 static void		 ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
 static int rt_garbage_collect(struct dst_ops *ops);
 
+static void __rt_garbage_collect(struct work_struct *w);
+static DECLARE_WORK(rt_gc_worker, __rt_garbage_collect);
+
 static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
 			    int how)
 {
@@ -977,7 +980,7 @@ static void rt_emergency_hash_rebuild(struct net *net)
    and when load increases it reduces to limit cache size.
  */
 
-static int rt_garbage_collect(struct dst_ops *ops)
+static void __do_rt_garbage_collect(int elasticity, int min_interval)
 {
 	static unsigned long expire = RT_GC_TIMEOUT;
 	static unsigned long last_gc;
@@ -996,7 +999,7 @@ static int rt_garbage_collect(struct dst_ops *ops)
 
 	RT_CACHE_STAT_INC(gc_total);
 
-	if (now - last_gc < ip_rt_gc_min_interval &&
+	if (now - last_gc < min_interval &&
 	    entries < ip_rt_max_size) {
 		RT_CACHE_STAT_INC(gc_ignored);
 		goto out;
@@ -1004,7 +1007,7 @@ static int rt_garbage_collect(struct dst_ops *ops)
 
 	entries = dst_entries_get_slow(&ipv4_dst_ops);
 	/* Calculate number of entries, which we want to expire now. */
-	goal = entries - (ip_rt_gc_elasticity << rt_hash_log);
+	goal = entries - (elasticity << rt_hash_log);
 	if (goal <= 0) {
 		if (equilibrium < ipv4_dst_ops.gc_thresh)
 			equilibrium = ipv4_dst_ops.gc_thresh;
@@ -1021,7 +1024,7 @@ static int rt_garbage_collect(struct dst_ops *ops)
 		equilibrium = entries - goal;
 	}
 
-	if (now - last_gc >= ip_rt_gc_min_interval)
+	if (now - last_gc >= min_interval)
 		last_gc = now;
 
 	if (goal <= 0) {
@@ -1086,15 +1089,33 @@ static int rt_garbage_collect(struct dst_ops *ops)
 	if (net_ratelimit())
 		pr_warn("dst cache overflow\n");
 	RT_CACHE_STAT_INC(gc_dst_overflow);
-	return 1;
+	return;
 
 work_done:
-	expire += ip_rt_gc_min_interval;
+	expire += min_interval;
 	if (expire > ip_rt_gc_timeout ||
 	    dst_entries_get_fast(&ipv4_dst_ops) < ipv4_dst_ops.gc_thresh ||
 	    dst_entries_get_slow(&ipv4_dst_ops) < ipv4_dst_ops.gc_thresh)
 		expire = ip_rt_gc_timeout;
-out:	return 0;
+out:	return;
+}
+
+static void __rt_garbage_collect(struct work_struct *w)
+{
+	__do_rt_garbage_collect(ip_rt_gc_elasticity, ip_rt_gc_min_interval);
+}
+
+static int rt_garbage_collect(struct dst_ops *ops)
+{
+	if (!work_pending(&rt_gc_worker))
+		schedule_work(&rt_gc_worker);
+
+	if (dst_entries_get_fast(&ipv4_dst_ops) >= ip_rt_max_size ||
+	    dst_entries_get_slow(&ipv4_dst_ops) >= ip_rt_max_size) {
+		RT_CACHE_STAT_INC(gc_dst_overflow);
+		return 1;
+	}
+	return 0;
 }
 
 /*
@@ -1288,13 +1309,7 @@ restart:
 			   it is most likely it holds some neighbour records.
 			 */
 			if (attempts-- > 0) {
-				int saved_elasticity = ip_rt_gc_elasticity;
-				int saved_int = ip_rt_gc_min_interval;
-				ip_rt_gc_elasticity	= 1;
-				ip_rt_gc_min_interval	= 0;
-				rt_garbage_collect(&ipv4_dst_ops);
-				ip_rt_gc_min_interval	= saved_int;
-				ip_rt_gc_elasticity	= saved_elasticity;
+				__do_rt_garbage_collect(1, 0);
 				goto restart;
 			}
 
-- 
1.9.1


  parent reply	other threads:[~2014-11-27  8:43 UTC|newest]

Thread overview: 98+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-11-27  8:36 [PATCH 3.4 00/91] 3.4.105-rc1 review lizf
2014-11-27  8:41 ` [PATCH 3.4 01/91] KVM: s390: Fix user triggerable bug in dead code lizf
2014-11-27  8:41 ` [PATCH 3.4 02/91] regmap: Fix handling of volatile registers for format_write() chips lizf
2014-11-27  8:41 ` [PATCH 3.4 03/91] drm/i915: Remove bogus __init annotation from DMI callbacks lizf
2014-11-27  8:41 ` [PATCH 3.4 04/91] get rid of propagate_umount() mistakenly treating slaves as busy lizf
2014-11-27  8:41 ` [PATCH 3.4 05/91] drm/vmwgfx: Fix a potential infinite spin waiting for fifo idle lizf
2014-11-27  8:41 ` [PATCH 3.4 06/91] ALSA: hda - Fix COEF setups for ALC1150 codec lizf
2014-11-27  8:41 ` [PATCH 3.4 07/91] ACPI / cpuidle: fix deadlock between cpuidle_lock and cpu_hotplug.lock lizf
2014-11-27  8:41 ` [PATCH 3.4 08/91] regulatory: add NUL to alpha2 lizf
2014-11-27  8:41 ` [PATCH 3.4 09/91] percpu: fix pcpu_alloc_pages() failure path lizf
2014-11-27  8:41 ` [PATCH 3.4 10/91] percpu: perform tlb flush after pcpu_map_pages() failure lizf
2014-11-27  8:41 ` [PATCH 3.4 11/91] percpu: free percpu allocation info for uniprocessor system lizf
2014-11-27  8:41 ` [PATCH 3.4 12/91] cgroup: reject cgroup names with ' ' lizf
2014-11-27  8:41 ` [PATCH 3.4 13/91] rtlwifi: rtl8192cu: Add new ID lizf
2014-11-27  8:41 ` [PATCH 3.4 14/91] ahci: Add Device IDs for Intel 9 Series PCH lizf
2014-11-27  8:41 ` [PATCH 3.4 15/91] ata_piix: " lizf
2014-11-27  8:41 ` [PATCH 3.4 16/91] USB: ftdi_sio: add support for NOVITUS Bono E thermal printer lizf
2014-11-27  8:42 ` [PATCH 3.4 17/91] USB: sierra: avoid CDC class functions on "68A3" devices lizf
2014-11-27  8:42 ` [PATCH 3.4 18/91] USB: sierra: add 1199:68AA device ID lizf
2014-11-27  8:42 ` [PATCH 3.4 19/91] xen/manage: Always freeze/thaw processes when suspend/resuming lizf
2014-11-27  8:42 ` [PATCH 3.4 20/91] block: Fix dev_t minor allocation lifetime lizf
2014-11-27  8:42 ` [PATCH 3.4 21/91] usb: dwc3: core: fix order of PM runtime calls lizf
2014-11-27  8:42 ` [PATCH 3.4 22/91] ahci: add pcid for Marvel 0x9182 controller lizf
2014-11-27  8:42 ` [PATCH 3.4 23/91] drm/radeon: add connector quirk for fujitsu board lizf
2014-11-27  8:42 ` [PATCH 3.4 24/91] usb: host: xhci: fix compliance mode workaround lizf
2014-11-27  8:42 ` [PATCH 3.4 25/91] Input: elantech - fix detection of touchpad on ASUS s301l lizf
2014-11-27  8:42 ` [PATCH 3.4 26/91] USB: ftdi_sio: Add support for GE Healthcare Nemo Tracker device lizf
2014-11-27  8:42 ` [PATCH 3.4 27/91] uwb: init beacon cache entry before registering uwb device lizf
2014-11-27  8:42 ` [PATCH 3.4 28/91] Input: synaptics - add support for ForcePads lizf
2014-11-27  8:42 ` [PATCH 3.4 29/91] libceph: gracefully handle large reply messages from the mon lizf
2014-11-27  8:42 ` [PATCH 3.4 30/91] libceph: add process_one_ticket() helper lizf
2014-11-27  8:42 ` [PATCH 3.4 31/91] libceph: do not hard code max auth ticket len lizf
2014-11-27  8:42 ` [PATCH 3.4 32/91] Input: serport - add compat handling for SPIOCSTYPE ioctl lizf
2014-11-27  8:42 ` [PATCH 3.4 33/91] usb: hub: take hub->hdev reference when processing from eventlist lizf
2014-11-27  8:42 ` [PATCH 3.4 34/91] storage: Add single-LUN quirk for Jaz USB Adapter lizf
2014-11-27  8:42 ` [PATCH 3.4 35/91] xhci: Fix null pointer dereference if xhci initialization fails lizf
2014-11-27  8:42 ` [PATCH 3.4 36/91] futex: Unlock hb->lock in futex_wait_requeue_pi() error path lizf
2014-11-27  8:42 ` [PATCH 3.4 37/91] alarmtimer: Return relative times in timer_gettime lizf
2014-11-27  8:42 ` [PATCH 3.4 38/91] alarmtimer: Do not signal SIGEV_NONE timers lizf
2014-11-27  8:42 ` [PATCH 3.4 39/91] alarmtimer: Lock k_itimer during timer callback lizf
2014-11-27  8:42 ` [PATCH 3.4 40/91] don't bugger nd->seq on set_root_rcu() from follow_dotdot_rcu() lizf
2014-11-27  8:42 ` [PATCH 3.4 41/91] jiffies: Fix timeval conversion to jiffies lizf
2014-11-27  8:42 ` [PATCH 3.4 42/91] MIPS: ZBOOT: add missing <linux/string.h> include lizf
2014-11-27  8:42 ` [PATCH 3.4 43/91] perf: Fix a race condition in perf_remove_from_context() lizf
2014-12-01 18:43   ` Sukadev Bhattiprolu
2014-12-02  1:22     ` Zefan Li
2014-11-27  8:42 ` [PATCH 3.4 44/91] ASoC: samsung-i2s: Check secondary DAI exists before referencing lizf
2014-11-27  8:42 ` [PATCH 3.4 45/91] Input: i8042 - add Fujitsu U574 to no_timeout dmi table lizf
2014-11-27  8:42 ` [PATCH 3.4 46/91] Input: i8042 - add nomux quirk for Avatar AVIU-145A6 lizf
2014-11-27  8:42 ` [PATCH 3.4 47/91] iscsi-target: Fix memory corruption in iscsit_logout_post_handler_diffcid lizf
2014-11-27  8:42 ` [PATCH 3.4 48/91] iscsi-target: avoid NULL pointer in iscsi_copy_param_list failure lizf
2014-11-27  8:42 ` [PATCH 3.4 49/91] NFSv4: Fix another bug in the close/open_downgrade code lizf
2014-11-27  8:42 ` [PATCH 3.4 50/91] libiscsi: fix potential buffer overrun in __iscsi_conn_send_pdu lizf
2014-11-27  8:42 ` [PATCH 3.4 51/91] USB: storage: Add quirk for Adaptec USBConnect 2000 USB-to-SCSI Adapter lizf
2014-11-27  8:42 ` [PATCH 3.4 52/91] USB: storage: Add quirk for Ariston Technologies iConnect USB to SCSI adapter lizf
2014-11-27  8:42 ` [PATCH 3.4 53/91] USB: storage: Add quirks for Entrega/Xircom USB to SCSI converters lizf
2014-11-27  8:42 ` [PATCH 3.4 54/91] can: flexcan: mark TX mailbox as TX_INACTIVE lizf
2014-11-27  8:42 ` [PATCH 3.4 55/91] can: flexcan: correctly initialize mailboxes lizf
2014-11-27  8:42 ` [PATCH 3.4 56/91] can: flexcan: implement workaround for errata ERR005829 lizf
2014-11-27  8:42 ` [PATCH 3.4 57/91] can: flexcan: put TX mailbox into TX_INACTIVE mode after tx-complete lizf
2014-11-27  8:42 ` [PATCH 3.4 58/91] can: at91_can: add missing prepare and unprepare of the clock lizf
2014-11-27  8:42 ` [PATCH 3.4 59/91] ALSA: pcm: fix fifo_size frame calculation lizf
2014-11-27  8:42 ` [PATCH 3.4 60/91] Fix nasty 32-bit overflow bug in buffer i/o code lizf
2014-11-27  8:42 ` [PATCH 3.4 61/91] parisc: Only use -mfast-indirect-calls option for 32-bit kernel builds lizf
2014-11-27  8:42 ` [PATCH 3.4 62/91] sched: Fix unreleased llc_shared_mask bit during CPU hotplug lizf
2014-11-27  8:42 ` [PATCH 3.4 63/91] sched: add macros to define bitops for task atomic flags lizf
2014-11-27  8:42 ` [PATCH 3.4 64/91] cpuset: PF_SPREAD_PAGE and PF_SPREAD_SLAB should be " lizf
2014-11-27  8:42 ` [PATCH 3.4 65/91] MIPS: mcount: Adjust stack pointer for static trace in MIPS32 lizf
2014-11-27  8:42 ` [PATCH 3.4 66/91] nilfs2: fix data loss with mmap() lizf
2014-11-27  8:42 ` [PATCH 3.4 67/91] ocfs2/dlm: do not get resource spinlock if lockres is new lizf
2014-11-27  8:42 ` [PATCH 3.4 68/91] shmem: fix nlink for rename overwrite directory lizf
2014-11-27  8:42 ` [PATCH 3.4 69/91] ARM: 8165/1: alignment: don't break misaligned NEON load/store lizf
2014-11-27  8:42 ` [PATCH 3.4 70/91] ASoC: core: fix possible ZERO_SIZE_PTR pointer dereferencing error lizf
2014-11-27  8:42 ` [PATCH 3.4 71/91] mm: migrate: Close race between migration completion and mprotect lizf
2014-11-27  8:42 ` [PATCH 3.4 72/91] perf: fix perf bug in fork() lizf
2014-11-27  8:42 ` [PATCH 3.4 73/91] init/Kconfig: Hide printk log config if CONFIG_PRINTK=n lizf
2014-11-27  8:42 ` [PATCH 3.4 74/91] genhd: fix leftover might_sleep() in blk_free_devt() lizf
2014-11-27  8:42 ` [PATCH 3.4 75/91] nl80211: clear skb cb before passing to netlink lizf
2014-11-27  8:42 ` [PATCH 3.4 76/91] ext4: propagate errors up to ext4_find_entry()'s callers lizf
2014-11-27  8:43 ` [PATCH 3.4 77/91] ext4: avoid trying to kfree an ERR_PTR pointer lizf
2014-11-27  8:43 ` [PATCH 3.4 78/91] NFS: fix stable regression lizf
2014-11-27  8:43 ` [PATCH 3.4 79/91] perf: Handle compat ioctl lizf
2014-11-27  8:43 ` [PATCH 3.4 80/91] bluetooth: hci_ldisc: fix deadlock condition lizf
2014-11-27  8:43 ` [PATCH 3.4 81/91] mnt: Only change user settable mount flags in remount lizf
2014-11-27  8:43 ` [PATCH 3.4 82/91] dm crypt: fix access beyond the end of allocated space lizf
2014-11-27  8:43 ` [PATCH 3.4 83/91] Fix spurious request sense in error handling lizf
2014-11-27  8:43 ` lizf [this message]
2014-11-27  8:43 ` [PATCH 3.4 85/91] ipv4: avoid parallel route cache gc executions lizf
2014-11-27  8:43 ` [PATCH 3.4 86/91] ipv4: disable bh while doing route gc lizf
2014-11-27  8:43 ` [PATCH 3.4 87/91] rtl8192ce: Fix null dereference in watchdog lizf
2014-11-27  8:43 ` [PATCH 3.4 88/91] ipv6: reuse ip6_frag_id from ip6_ufo_append_data lizf
2014-11-27  8:43 ` [PATCH 3.4 89/91] net: Do not enable tx-nocache-copy by default lizf
2014-11-27  8:43 ` [PATCH 3.4 90/91] ixgbevf: Prevent RX/TX statistics getting reset to zero lizf
2014-11-27  8:43 ` [PATCH 3.4 91/91] l2tp: fix race while getting PMTU on PPP pseudo-wire lizf
2014-11-27  9:07 ` [PATCH 3.4 00/91] 3.4.105-rc1 review Zefan Li
2014-11-27 20:15 ` Guenter Roeck
2014-11-29  5:54   ` Zefan Li
  -- strict thread matches above, loose matches on Subject: below --
2015-01-28  4:07 [PATCH 3.4 000/177] 3.4.106-rc1 review lizf
2015-01-28  4:08 ` [PATCH 3.4 84/91] ipv4: move route garbage collector to work queue lizf

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1417077794-9299-84-git-send-email-lizf@kernel.org \
    --to=lizf@kernel.org \
    --cc=ben@decadent.org.uk \
    --cc=linux-kernel@vger.kernel.org \
    --cc=lizefan@huawei.com \
    --cc=mleitner@redhat.com \
    --cc=stable@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).