From: Sasha Levin <sashal@kernel.org>
To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
Cc: Sasha Levin <sashal@kernel.org>,
Philip Yang <Philip.Yang@amd.com>, Ruili Ji <ruili.ji@amd.com>,
airlied@linux.ie, Felix Kuehling <Felix.Kuehling@amd.com>,
Xinhui.Pan@amd.com, amd-gfx@lists.freedesktop.org,
dri-devel@lists.freedesktop.org, daniel@ffwll.ch,
Alex Deucher <alexander.deucher@amd.com>,
christian.koenig@amd.com
Subject: [PATCH AUTOSEL 5.16 016/109] drm/amdkfd: Ensure mm remain valid in svm deferred_list work
Date: Fri, 1 Apr 2022 10:31:23 -0400 [thread overview]
Message-ID: <20220401143256.1950537-16-sashal@kernel.org> (raw)
In-Reply-To: <20220401143256.1950537-1-sashal@kernel.org>
From: Philip Yang <Philip.Yang@amd.com>
[ Upstream commit 367c9b0f1b8750a704070e7ae85234d591290434 ]
svm_deferred_list work should continue to handle deferred_range_list
which maybe split to child range to avoid child range leak, and remove
ranges mmu interval notifier to avoid mm mm_count leak. So taking mm
reference when adding range to deferred list, to ensure mm is valid in
the scheduled deferred_list_work, and drop the mm referrence after range
is handled.
Signed-off-by: Philip Yang <Philip.Yang@amd.com>
Reported-by: Ruili Ji <ruili.ji@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 62 ++++++++++++++++------------
1 file changed, 36 insertions(+), 26 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index c0b8f4ff80b8..ea1c5aaf659a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1926,10 +1926,9 @@ svm_range_update_notifier_and_interval_tree(struct mm_struct *mm,
}
static void
-svm_range_handle_list_op(struct svm_range_list *svms, struct svm_range *prange)
+svm_range_handle_list_op(struct svm_range_list *svms, struct svm_range *prange,
+ struct mm_struct *mm)
{
- struct mm_struct *mm = prange->work_item.mm;
-
switch (prange->work_item.op) {
case SVM_OP_NULL:
pr_debug("NULL OP 0x%p prange 0x%p [0x%lx 0x%lx]\n",
@@ -2013,34 +2012,41 @@ static void svm_range_deferred_list_work(struct work_struct *work)
pr_debug("enter svms 0x%p\n", svms);
p = container_of(svms, struct kfd_process, svms);
- /* Avoid mm is gone when inserting mmu notifier */
- mm = get_task_mm(p->lead_thread);
- if (!mm) {
- pr_debug("svms 0x%p process mm gone\n", svms);
- return;
- }
-retry:
- mmap_write_lock(mm);
-
- /* Checking for the need to drain retry faults must be inside
- * mmap write lock to serialize with munmap notifiers.
- */
- if (unlikely(atomic_read(&svms->drain_pagefaults))) {
- mmap_write_unlock(mm);
- svm_range_drain_retry_fault(svms);
- goto retry;
- }
spin_lock(&svms->deferred_list_lock);
while (!list_empty(&svms->deferred_range_list)) {
prange = list_first_entry(&svms->deferred_range_list,
struct svm_range, deferred_list);
- list_del_init(&prange->deferred_list);
spin_unlock(&svms->deferred_list_lock);
pr_debug("prange 0x%p [0x%lx 0x%lx] op %d\n", prange,
prange->start, prange->last, prange->work_item.op);
+ mm = prange->work_item.mm;
+retry:
+ mmap_write_lock(mm);
+
+ /* Checking for the need to drain retry faults must be inside
+ * mmap write lock to serialize with munmap notifiers.
+ */
+ if (unlikely(atomic_read(&svms->drain_pagefaults))) {
+ mmap_write_unlock(mm);
+ svm_range_drain_retry_fault(svms);
+ goto retry;
+ }
+
+ /* Remove from deferred_list must be inside mmap write lock, for
+ * two race cases:
+ * 1. unmap_from_cpu may change work_item.op and add the range
+ * to deferred_list again, cause use after free bug.
+ * 2. svm_range_list_lock_and_flush_work may hold mmap write
+ * lock and continue because deferred_list is empty, but
+ * deferred_list work is actually waiting for mmap lock.
+ */
+ spin_lock(&svms->deferred_list_lock);
+ list_del_init(&prange->deferred_list);
+ spin_unlock(&svms->deferred_list_lock);
+
mutex_lock(&svms->lock);
mutex_lock(&prange->migrate_mutex);
while (!list_empty(&prange->child_list)) {
@@ -2051,19 +2057,20 @@ static void svm_range_deferred_list_work(struct work_struct *work)
pr_debug("child prange 0x%p op %d\n", pchild,
pchild->work_item.op);
list_del_init(&pchild->child_list);
- svm_range_handle_list_op(svms, pchild);
+ svm_range_handle_list_op(svms, pchild, mm);
}
mutex_unlock(&prange->migrate_mutex);
- svm_range_handle_list_op(svms, prange);
+ svm_range_handle_list_op(svms, prange, mm);
mutex_unlock(&svms->lock);
+ mmap_write_unlock(mm);
+
+ /* Pairs with mmget in svm_range_add_list_work */
+ mmput(mm);
spin_lock(&svms->deferred_list_lock);
}
spin_unlock(&svms->deferred_list_lock);
-
- mmap_write_unlock(mm);
- mmput(mm);
pr_debug("exit svms 0x%p\n", svms);
}
@@ -2081,6 +2088,9 @@ svm_range_add_list_work(struct svm_range_list *svms, struct svm_range *prange,
prange->work_item.op = op;
} else {
prange->work_item.op = op;
+
+ /* Pairs with mmput in deferred_list_work */
+ mmget(mm);
prange->work_item.mm = mm;
list_add_tail(&prange->deferred_list,
&prange->svms->deferred_range_list);
--
2.34.1
next prev parent reply other threads:[~2022-04-01 14:33 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <20220401143256.1950537-1-sashal@kernel.org>
2022-04-01 14:31 ` [PATCH AUTOSEL 5.16 003/109] drm/amd/display: Add signal type check when verify stream backends same Sasha Levin
2022-04-01 14:31 ` [PATCH AUTOSEL 5.16 007/109] drm/amd/amdgpu/amdgpu_cs: fix refcount leak of a dma_fence obj Sasha Levin
2022-04-01 14:31 ` [PATCH AUTOSEL 5.16 008/109] drm/amd/display: Fix memory leak Sasha Levin
2022-04-01 14:31 ` [PATCH AUTOSEL 5.16 009/109] drm/amd/display: Use PSR version selected during set_psr_caps Sasha Levin
2022-04-01 14:31 ` [PATCH AUTOSEL 5.16 015/109] drm/amdkfd: Don't take process mutex for svm ioctls Sasha Levin
2022-04-01 14:31 ` Sasha Levin [this message]
2022-04-01 14:31 ` [PATCH AUTOSEL 5.16 017/109] drm/amdkfd: svm range restore work deadlock when process exit Sasha Levin
2022-04-01 14:31 ` [PATCH AUTOSEL 5.16 036/109] drm/amdgpu: Fix recursive locking warning Sasha Levin
2022-04-01 14:31 ` [PATCH AUTOSEL 5.16 050/109] drm/amd/display: reset lane settings after each PHY repeater LT Sasha Levin
2022-04-01 14:32 ` [PATCH AUTOSEL 5.16 062/109] drm/amdkfd: make CRAT table missing message informational only Sasha Levin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220401143256.1950537-16-sashal@kernel.org \
--to=sashal@kernel.org \
--cc=Felix.Kuehling@amd.com \
--cc=Philip.Yang@amd.com \
--cc=Xinhui.Pan@amd.com \
--cc=airlied@linux.ie \
--cc=alexander.deucher@amd.com \
--cc=amd-gfx@lists.freedesktop.org \
--cc=christian.koenig@amd.com \
--cc=daniel@ffwll.ch \
--cc=dri-devel@lists.freedesktop.org \
--cc=linux-kernel@vger.kernel.org \
--cc=ruili.ji@amd.com \
--cc=stable@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).