public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] iommu: Avoid softlockup and rcu stall in fq_flush_timeout().
@ 2023-02-16  7:11 Peng Zhang
  2023-04-10 22:22 ` Jerry Snitselaar
  0 siblings, 1 reply; 8+ messages in thread
From: Peng Zhang @ 2023-02-16  7:11 UTC (permalink / raw)
  To: robin.murphy, joro, will
  Cc: iommu, linux-kernel, Peng Zhang, Li Bin, Xie XiuQi,
	Yang Yingliang

There is softlockup under fio pressure test with smmu enabled:
watchdog: BUG: soft lockup - CPU#81 stuck for 22s!  [swapper/81:0]
...
Call trace:
 fq_flush_timeout+0xc0/0x110
 call_timer_fn+0x34/0x178
 expire_timers+0xec/0x158
 run_timer_softirq+0xc0/0x1f8
 __do_softirq+0x120/0x324
 irq_exit+0x11c/0x140
 __handle_domain_irq+0x6c/0xc0
 gic_handle_irq+0x6c/0x170
 el1_irq+0xb8/0x140
 arch_cpu_idle+0x38/0x1c0
 default_idle_call+0x24/0x44
 do_idle+0x1f4/0x2d8
 cpu_startup_entry+0x2c/0x30
 secondary_start_kernel+0x17c/0x1c8

Rcu stall may also be triggered:

rcu: INFO: rcu_sched self-detected stall on CPU
NMI backtrace for cpu 21
CPU: 21 PID: 118 Comm: ksoftirqd/21
...
Call trace:
 fq_flush_timeout+0x6d/0x90
 ? fq_ring_free+0xc0/0xc0
 call_timer_fn+0x2b/0x120
 run_timer_softirq+0x1a6/0x420
 ? finish_task_switch+0x80/0x280
 __do_softirq+0xda/0x2da
 ? sort_range+0x20/0x20
 run_ksoftirqd+0x26/0x40
 smpboot_thread_fn+0xb8/0x150
 kthread+0x110/0x130
 ? __kthread_cancel_work+0x40/0x40
 ret_from_fork+0x1f/0x30

This is because the timer callback fq_flush_timeout may run more than
10ms, and timer may be processed continuously in the softirq so trigger
softlockup and rcu stall. We can use work to deal with fq_ring_free for
each cpu which may take long time, that to avoid triggering softlockup
and rcu stall.

This patch is modified from the patch[1] of openEuler.

[1] https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/C3KYS4BXTDMVVBQNEQYNAOGOQWFFINHJ/

Signed-off-by: Li Bin <huawei.libin@huawei.com>
Reviewed-by: Xie XiuQi <xiexiuqi@huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: Peng Zhang <zhangpeng.00@bytedance.com>
---
 drivers/iommu/dma-iommu.c | 33 ++++++++++++++++++++++-----------
 1 file changed, 22 insertions(+), 11 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 99b2646cb5c7..bc4c979d7091 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -59,6 +59,8 @@ struct iommu_dma_cookie {
 			struct timer_list	fq_timer;
 			/* 1 when timer is active, 0 when not */
 			atomic_t		fq_timer_on;
+			/* The work to free iova */
+			struct work_struct free_iova_work;
 		};
 		/* Trivial linear page allocator for IOMMU_DMA_MSI_COOKIE */
 		dma_addr_t		msi_iova;
@@ -155,20 +157,10 @@ static void fq_flush_iotlb(struct iommu_dma_cookie *cookie)
 static void fq_flush_timeout(struct timer_list *t)
 {
 	struct iommu_dma_cookie *cookie = from_timer(cookie, t, fq_timer);
-	int cpu;
 
 	atomic_set(&cookie->fq_timer_on, 0);
 	fq_flush_iotlb(cookie);
-
-	for_each_possible_cpu(cpu) {
-		unsigned long flags;
-		struct iova_fq *fq;
-
-		fq = per_cpu_ptr(cookie->fq, cpu);
-		spin_lock_irqsave(&fq->lock, flags);
-		fq_ring_free(cookie, fq);
-		spin_unlock_irqrestore(&fq->lock, flags);
-	}
+	schedule_work(&cookie->free_iova_work);
 }
 
 static void queue_iova(struct iommu_dma_cookie *cookie,
@@ -227,6 +219,7 @@ static void iommu_dma_free_fq(struct iommu_dma_cookie *cookie)
 		return;
 
 	del_timer_sync(&cookie->fq_timer);
+	cancel_work_sync(&cookie->free_iova_work);
 	/* The IOVAs will be torn down separately, so just free our queued pages */
 	for_each_possible_cpu(cpu) {
 		struct iova_fq *fq = per_cpu_ptr(cookie->fq, cpu);
@@ -238,6 +231,23 @@ static void iommu_dma_free_fq(struct iommu_dma_cookie *cookie)
 	free_percpu(cookie->fq);
 }
 
+static void free_iova_work_func(struct work_struct *work)
+{
+	struct iommu_dma_cookie *cookie;
+	int cpu;
+
+	cookie = container_of(work, struct iommu_dma_cookie, free_iova_work);
+	for_each_possible_cpu(cpu) {
+		unsigned long flags;
+		struct iova_fq *fq;
+
+		fq = per_cpu_ptr(cookie->fq, cpu);
+		spin_lock_irqsave(&fq->lock, flags);
+		fq_ring_free(cookie, fq);
+		spin_unlock_irqrestore(&fq->lock, flags);
+	}
+}
+
 /* sysfs updates are serialised by the mutex of the group owning @domain */
 int iommu_dma_init_fq(struct iommu_domain *domain)
 {
@@ -271,6 +281,7 @@ int iommu_dma_init_fq(struct iommu_domain *domain)
 
 	cookie->fq = queue;
 
+	INIT_WORK(&cookie->free_iova_work, free_iova_work_func);
 	timer_setup(&cookie->fq_timer, fq_flush_timeout, 0);
 	atomic_set(&cookie->fq_timer_on, 0);
 	/*
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread
[parent not found: <20230216084902.1486-1-hdanton@sina.com>]

end of thread, other threads:[~2023-05-22 15:36 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-02-16  7:11 [PATCH] iommu: Avoid softlockup and rcu stall in fq_flush_timeout() Peng Zhang
2023-04-10 22:22 ` Jerry Snitselaar
2023-04-18 10:41   ` Vasant Hegde
2023-04-28  5:44     ` Vasant Hegde
2023-05-22 14:58       ` Joerg Roedel
2023-05-22 15:18         ` Jerry Snitselaar
2023-05-22 15:35           ` John Garry
     [not found] <20230216084902.1486-1-hdanton@sina.com>
2023-02-16 11:02 ` Peng Zhang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox