public inbox for stable@vger.kernel.org
 help / color / mirror / Atom feed
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	stable@vger.kernel.org, Jan Beulich <jbeulich@suse.com>,
	Juergen Gross <jgross@suse.com>,
	Boris Ostrovsky <boris.ostrovsky@oracle.com>,
	Sasha Levin <sashal@kernel.org>
Subject: [PATCH 5.4 51/68] xen/balloon: use a kernel thread instead a workqueue
Date: Mon, 27 Sep 2021 19:02:47 +0200	[thread overview]
Message-ID: <20210927170221.723839763@linuxfoundation.org> (raw)
In-Reply-To: <20210927170219.901812470@linuxfoundation.org>

From: Juergen Gross <jgross@suse.com>

[ Upstream commit 8480ed9c2bbd56fc86524998e5f2e3e22f5038f6 ]

Today the Xen ballooning is done via delayed work in a workqueue. This
might result in workqueue hangups being reported in case of large
amounts of memory are being ballooned in one go (here 16GB):

BUG: workqueue lockup - pool cpus=6 node=0 flags=0x0 nice=0 stuck for 64s!
Showing busy workqueues and worker pools:
workqueue events: flags=0x0
  pwq 12: cpus=6 node=0 flags=0x0 nice=0 active=2/256 refcnt=3
    in-flight: 229:balloon_process
    pending: cache_reap
workqueue events_freezable_power_: flags=0x84
  pwq 12: cpus=6 node=0 flags=0x0 nice=0 active=1/256 refcnt=2
    pending: disk_events_workfn
workqueue mm_percpu_wq: flags=0x8
  pwq 12: cpus=6 node=0 flags=0x0 nice=0 active=1/256 refcnt=2
    pending: vmstat_update
pool 12: cpus=6 node=0 flags=0x0 nice=0 hung=64s workers=3 idle: 2222 43

This can easily be avoided by using a dedicated kernel thread for doing
the ballooning work.

Reported-by: Jan Beulich <jbeulich@suse.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Link: https://lore.kernel.org/r/20210827123206.15429-1-jgross@suse.com
Signed-off-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/xen/balloon.c | 62 +++++++++++++++++++++++++++++++------------
 1 file changed, 45 insertions(+), 17 deletions(-)

diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index ebb05517b6aa..2762d246991b 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -43,6 +43,8 @@
 #include <linux/sched.h>
 #include <linux/cred.h>
 #include <linux/errno.h>
+#include <linux/freezer.h>
+#include <linux/kthread.h>
 #include <linux/mm.h>
 #include <linux/memblock.h>
 #include <linux/pagemap.h>
@@ -117,7 +119,7 @@ static struct ctl_table xen_root[] = {
 #define EXTENT_ORDER (fls(XEN_PFN_PER_PAGE) - 1)
 
 /*
- * balloon_process() state:
+ * balloon_thread() state:
  *
  * BP_DONE: done or nothing to do,
  * BP_WAIT: wait to be rescheduled,
@@ -132,6 +134,8 @@ enum bp_state {
 	BP_ECANCELED
 };
 
+/* Main waiting point for xen-balloon thread. */
+static DECLARE_WAIT_QUEUE_HEAD(balloon_thread_wq);
 
 static DEFINE_MUTEX(balloon_mutex);
 
@@ -146,10 +150,6 @@ static xen_pfn_t frame_list[PAGE_SIZE / sizeof(xen_pfn_t)];
 static LIST_HEAD(ballooned_pages);
 static DECLARE_WAIT_QUEUE_HEAD(balloon_wq);
 
-/* Main work function, always executed in process context. */
-static void balloon_process(struct work_struct *work);
-static DECLARE_DELAYED_WORK(balloon_worker, balloon_process);
-
 /* When ballooning out (allocating memory to return to Xen) we don't really
    want the kernel to try too hard since that can trigger the oom killer. */
 #define GFP_BALLOON \
@@ -383,7 +383,7 @@ static void xen_online_page(struct page *page, unsigned int order)
 static int xen_memory_notifier(struct notifier_block *nb, unsigned long val, void *v)
 {
 	if (val == MEM_ONLINE)
-		schedule_delayed_work(&balloon_worker, 0);
+		wake_up(&balloon_thread_wq);
 
 	return NOTIFY_OK;
 }
@@ -508,18 +508,43 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
 }
 
 /*
- * As this is a work item it is guaranteed to run as a single instance only.
+ * Stop waiting if either state is not BP_EAGAIN and ballooning action is
+ * needed, or if the credit has changed while state is BP_EAGAIN.
+ */
+static bool balloon_thread_cond(enum bp_state state, long credit)
+{
+	if (state != BP_EAGAIN)
+		credit = 0;
+
+	return current_credit() != credit || kthread_should_stop();
+}
+
+/*
+ * As this is a kthread it is guaranteed to run as a single instance only.
  * We may of course race updates of the target counts (which are protected
  * by the balloon lock), or with changes to the Xen hard limit, but we will
  * recover from these in time.
  */
-static void balloon_process(struct work_struct *work)
+static int balloon_thread(void *unused)
 {
 	enum bp_state state = BP_DONE;
 	long credit;
+	unsigned long timeout;
+
+	set_freezable();
+	for (;;) {
+		if (state == BP_EAGAIN)
+			timeout = balloon_stats.schedule_delay * HZ;
+		else
+			timeout = 3600 * HZ;
+		credit = current_credit();
 
+		wait_event_interruptible_timeout(balloon_thread_wq,
+				 balloon_thread_cond(state, credit), timeout);
+
+		if (kthread_should_stop())
+			return 0;
 
-	do {
 		mutex_lock(&balloon_mutex);
 
 		credit = current_credit();
@@ -546,12 +571,7 @@ static void balloon_process(struct work_struct *work)
 		mutex_unlock(&balloon_mutex);
 
 		cond_resched();
-
-	} while (credit && state == BP_DONE);
-
-	/* Schedule more work if there is some still to be done. */
-	if (state == BP_EAGAIN)
-		schedule_delayed_work(&balloon_worker, balloon_stats.schedule_delay * HZ);
+	}
 }
 
 /* Resets the Xen limit, sets new target, and kicks off processing. */
@@ -559,7 +579,7 @@ void balloon_set_new_target(unsigned long target)
 {
 	/* No need for lock. Not read-modify-write updates. */
 	balloon_stats.target_pages = target;
-	schedule_delayed_work(&balloon_worker, 0);
+	wake_up(&balloon_thread_wq);
 }
 EXPORT_SYMBOL_GPL(balloon_set_new_target);
 
@@ -664,7 +684,7 @@ void free_xenballooned_pages(int nr_pages, struct page **pages)
 
 	/* The balloon may be too large now. Shrink it if needed. */
 	if (current_credit())
-		schedule_delayed_work(&balloon_worker, 0);
+		wake_up(&balloon_thread_wq);
 
 	mutex_unlock(&balloon_mutex);
 }
@@ -696,6 +716,8 @@ static void __init balloon_add_region(unsigned long start_pfn,
 
 static int __init balloon_init(void)
 {
+	struct task_struct *task;
+
 	if (!xen_domain())
 		return -ENODEV;
 
@@ -739,6 +761,12 @@ static int __init balloon_init(void)
 	}
 #endif
 
+	task = kthread_run(balloon_thread, NULL, "xen-balloon");
+	if (IS_ERR(task)) {
+		pr_err("xen-balloon thread could not be started, ballooning will not work!\n");
+		return PTR_ERR(task);
+	}
+
 	/* Init the xen-balloon driver. */
 	xen_balloon_init();
 
-- 
2.33.0




  parent reply	other threads:[~2021-09-27 17:06 UTC|newest]

Thread overview: 75+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-09-27 17:01 [PATCH 5.4 00/68] 5.4.150-rc1 review Greg Kroah-Hartman
2021-09-27 17:01 ` [PATCH 5.4 01/68] ocfs2: drop acl cache for directories too Greg Kroah-Hartman
2021-09-27 17:01 ` [PATCH 5.4 02/68] usb: gadget: r8a66597: fix a loop in set_feature() Greg Kroah-Hartman
2021-09-27 17:01 ` [PATCH 5.4 03/68] usb: dwc2: gadget: Fix ISOC flow for BDMA and Slave Greg Kroah-Hartman
2021-09-27 17:02 ` [PATCH 5.4 04/68] usb: dwc2: gadget: Fix ISOC transfer complete handling for DDMA Greg Kroah-Hartman
2021-09-27 17:02 ` [PATCH 5.4 05/68] usb: musb: tusb6010: uninitialized data in tusb_fifo_write_unaligned() Greg Kroah-Hartman
2021-09-27 17:02 ` [PATCH 5.4 06/68] cifs: fix incorrect check for null pointer in header_assemble Greg Kroah-Hartman
2021-09-27 17:02 ` [PATCH 5.4 07/68] xen/x86: fix PV trap handling on secondary processors Greg Kroah-Hartman
2021-09-27 17:02 ` [PATCH 5.4 08/68] usb-storage: Add quirk for ScanLogic SL11R-IDE older than 2.6c Greg Kroah-Hartman
2021-09-27 17:02 ` [PATCH 5.4 09/68] USB: serial: cp210x: add ID for GW Instek GDM-834x Digital Multimeter Greg Kroah-Hartman
2021-09-27 17:02 ` [PATCH 5.4 10/68] USB: cdc-acm: fix minor-number release Greg Kroah-Hartman
2021-09-27 17:02 ` [PATCH 5.4 11/68] binder: make sure fd closes complete Greg Kroah-Hartman
2021-09-27 17:02 ` [PATCH 5.4 12/68] staging: greybus: uart: fix tty use after free Greg Kroah-Hartman
2021-09-27 17:02 ` [PATCH 5.4 13/68] Re-enable UAS for LaCie Rugged USB3-FW with fk quirk Greg Kroah-Hartman
2021-09-27 17:02 ` [PATCH 5.4 14/68] usb: core: hcd: Add support for deferring roothub registration Greg Kroah-Hartman
2021-09-27 17:02 ` [PATCH 5.4 15/68] USB: serial: mos7840: remove duplicated 0xac24 device ID Greg Kroah-Hartman
2021-09-27 17:02 ` [PATCH 5.4 16/68] USB: serial: option: add Telit LN920 compositions Greg Kroah-Hartman
2021-09-27 17:02 ` [PATCH 5.4 17/68] USB: serial: option: remove duplicate USB device ID Greg Kroah-Hartman
2021-09-27 17:02 ` [PATCH 5.4 18/68] USB: serial: option: add device id for Foxconn T99W265 Greg Kroah-Hartman
2021-09-27 17:02 ` [PATCH 5.4 19/68] mcb: fix error handling in mcb_alloc_bus() Greg Kroah-Hartman
2021-09-27 17:02 ` [PATCH 5.4 20/68] erofs: fix up erofs_lookup tracepoint Greg Kroah-Hartman
2021-09-27 17:02 ` [PATCH 5.4 21/68] btrfs: prevent __btrfs_dump_space_info() to underflow its free space Greg Kroah-Hartman
2021-09-27 17:02 ` [PATCH 5.4 22/68] xhci: Set HCD flag to defer primary roothub registration Greg Kroah-Hartman
2021-09-27 17:02 ` [PATCH 5.4 23/68] serial: mvebu-uart: fix drivers tx_empty callback Greg Kroah-Hartman
2021-09-27 17:02 ` [PATCH 5.4 24/68] net: hso: fix muxed tty registration Greg Kroah-Hartman
2021-09-27 17:02 ` [PATCH 5.4 25/68] afs: Fix incorrect triggering of sillyrename on 3rd-party invalidation Greg Kroah-Hartman
2021-09-27 17:02 ` [PATCH 5.4 26/68] platform/x86/intel: punit_ipc: Drop wrong use of ACPI_PTR() Greg Kroah-Hartman
2021-09-27 17:02 ` [PATCH 5.4 27/68] enetc: Fix illegal access when reading affinity_hint Greg Kroah-Hartman
2021-09-27 17:02 ` [PATCH 5.4 28/68] bnxt_en: Fix TX timeout when TX ring size is set to the smallest Greg Kroah-Hartman
2021-09-27 17:02 ` [PATCH 5.4 29/68] net/smc: add missing error check in smc_clc_prfx_set() Greg Kroah-Hartman
2021-09-27 17:02 ` [PATCH 5.4 30/68] gpio: uniphier: Fix void functions to remove return value Greg Kroah-Hartman
2021-09-27 17:02 ` [PATCH 5.4 31/68] qed: rdma - dont wait for resources under hw error recovery flow Greg Kroah-Hartman
2021-09-27 17:02 ` [PATCH 5.4 32/68] net/mlx4_en: Dont allow aRFS for encapsulated packets Greg Kroah-Hartman
2021-09-27 17:02 ` [PATCH 5.4 33/68] scsi: iscsi: Adjust iface sysfs attr detection Greg Kroah-Hartman
2021-09-27 17:02 ` [PATCH 5.4 34/68] tty: synclink_gt, drop unneeded forward declarations Greg Kroah-Hartman
2021-09-27 17:02 ` [PATCH 5.4 35/68] tty: synclink_gt: rename a conflicting function name Greg Kroah-Hartman
2021-09-27 17:02 ` [PATCH 5.4 36/68] fpga: machxo2-spi: Return an error on failure Greg Kroah-Hartman
2021-09-27 17:02 ` [PATCH 5.4 37/68] fpga: machxo2-spi: Fix missing error code in machxo2_write_complete() Greg Kroah-Hartman
2021-09-27 17:02 ` [PATCH 5.4 38/68] thermal/core: Potential buffer overflow in thermal_build_list_of_policies() Greg Kroah-Hartman
2021-09-27 17:02 ` [PATCH 5.4 39/68] cifs: fix a sign extension bug Greg Kroah-Hartman
2021-09-27 17:02 ` [PATCH 5.4 40/68] scsi: qla2xxx: Restore initiator in dual mode Greg Kroah-Hartman
2021-09-27 17:02 ` [PATCH 5.4 41/68] scsi: lpfc: Use correct scnprintf() limit Greg Kroah-Hartman
2021-09-27 17:02 ` [PATCH 5.4 42/68] irqchip/goldfish-pic: Select GENERIC_IRQ_CHIP to fix build Greg Kroah-Hartman
2021-09-27 17:02 ` [PATCH 5.4 43/68] irqchip/gic-v3-its: Fix potential VPE leak on error Greg Kroah-Hartman
2021-09-27 17:02 ` [PATCH 5.4 44/68] md: fix a lock order reversal in md_alloc Greg Kroah-Hartman
2021-09-27 17:02 ` [PATCH 5.4 45/68] blktrace: Fix uaf in blk_trace access after removing by sysfs Greg Kroah-Hartman
2021-09-27 17:02 ` [PATCH 5.4 46/68] net: macb: fix use after free on rmmod Greg Kroah-Hartman
2021-09-27 17:02 ` [PATCH 5.4 47/68] net: stmmac: allow CSR clock of 300MHz Greg Kroah-Hartman
2021-09-27 17:02 ` [PATCH 5.4 48/68] m68k: Double cast io functions to unsigned long Greg Kroah-Hartman
2021-09-27 17:02 ` [PATCH 5.4 49/68] ipv6: delay fib6_sernum increase in fib6_add Greg Kroah-Hartman
2021-09-27 17:02 ` [PATCH 5.4 50/68] bpf: Add oversize check before call kvcalloc() Greg Kroah-Hartman
2021-09-27 17:02 ` Greg Kroah-Hartman [this message]
2021-09-27 17:02 ` [PATCH 5.4 52/68] nvme-multipath: fix ANA state updates when a namespace is not present Greg Kroah-Hartman
2021-09-27 17:02 ` [PATCH 5.4 53/68] sparc32: page align size in arch_dma_alloc Greg Kroah-Hartman
2021-09-27 17:02 ` [PATCH 5.4 54/68] blk-cgroup: fix UAF by grabbing blkcg lock before destroying blkg pd Greg Kroah-Hartman
2021-09-27 17:02 ` [PATCH 5.4 55/68] compiler.h: Introduce absolute_pointer macro Greg Kroah-Hartman
2021-09-27 17:02 ` [PATCH 5.4 56/68] net: i825xx: Use absolute_pointer for memcpy from fixed memory location Greg Kroah-Hartman
2021-09-27 17:02 ` [PATCH 5.4 57/68] sparc: avoid stringop-overread errors Greg Kroah-Hartman
2021-09-27 17:02 ` [PATCH 5.4 58/68] qnx4: " Greg Kroah-Hartman
2021-09-27 17:02 ` [PATCH 5.4 59/68] parisc: Use absolute_pointer() to define PAGE0 Greg Kroah-Hartman
2021-09-27 17:02 ` [PATCH 5.4 60/68] arm64: Mark __stack_chk_guard as __ro_after_init Greg Kroah-Hartman
2021-09-27 17:02 ` [PATCH 5.4 61/68] alpha: Declare virt_to_phys and virt_to_bus parameter as pointer to volatile Greg Kroah-Hartman
2021-09-27 17:02 ` [PATCH 5.4 62/68] net: 6pack: Fix tx timeout and slot time Greg Kroah-Hartman
2021-09-27 17:02 ` [PATCH 5.4 63/68] spi: Fix tegra20 build with CONFIG_PM=n Greg Kroah-Hartman
2021-09-27 17:03 ` [PATCH 5.4 64/68] EDAC/synopsys: Fix wrong value type assignment for edac_mode Greg Kroah-Hartman
2021-09-27 17:03 ` [PATCH 5.4 65/68] thermal/drivers/int340x: Do not set a wrong tcc offset on resume Greg Kroah-Hartman
2021-09-27 17:03 ` [PATCH 5.4 66/68] arm64: dts: marvell: armada-37xx: Extend PCIe MEM space Greg Kroah-Hartman
2021-09-27 17:03 ` [PATCH 5.4 67/68] xen/balloon: fix balloon kthread freezing Greg Kroah-Hartman
2021-09-27 17:03 ` [PATCH 5.4 68/68] qnx4: work around gcc false positive warning bug Greg Kroah-Hartman
2021-09-27 17:49 ` [PATCH 5.4 00/68] 5.4.150-rc1 review Florian Fainelli
2021-09-27 22:59 ` Shuah Khan
2021-09-28  7:00 ` Jon Hunter
2021-09-28  9:26 ` Naresh Kamboju
2021-09-28 12:19 ` Sudip Mukherjee
2021-09-29  1:32 ` Samuel Zou

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210927170221.723839763@linuxfoundation.org \
    --to=gregkh@linuxfoundation.org \
    --cc=boris.ostrovsky@oracle.com \
    --cc=jbeulich@suse.com \
    --cc=jgross@suse.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=sashal@kernel.org \
    --cc=stable@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox