* [PATCH 1/3] drm/amdgpu: use HMM mirror callback to replace mmu notifier v6
@ 2019-01-10 17:02 Yang, Philip
[not found] ` <20190110170228.10917-1-Philip.Yang-5C7GfCeVMHo@public.gmane.org>
0 siblings, 1 reply; 8+ messages in thread
From: Yang, Philip @ 2019-01-10 17:02 UTC (permalink / raw)
To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org; +Cc: Yang, Philip
Replace our MMU notifier with hmm_mirror_ops.sync_cpu_device_pagetables
callback. Enable CONFIG_HMM and CONFIG_HMM_MIRROR as a dependency in
DRM_AMDGPU_USERPTR Kconfig.
It supports both KFD userptr and gfx userptr paths.
The depdent HMM patchset from Jérôme Glisse are all merged into 4.20.0
kernel now.
Change-Id: Ie62c3c5e3c5b8521ab3b438d1eff2aa2a003835e
Signed-off-by: Philip Yang <Philip.Yang@amd.com>
---
drivers/gpu/drm/amd/amdgpu/Kconfig | 6 +-
drivers/gpu/drm/amd/amdgpu/Makefile | 2 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 122 ++++++++++---------------
drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h | 2 +-
4 files changed, 55 insertions(+), 77 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig
index 9221e5489069..960a63355705 100644
--- a/drivers/gpu/drm/amd/amdgpu/Kconfig
+++ b/drivers/gpu/drm/amd/amdgpu/Kconfig
@@ -26,10 +26,10 @@ config DRM_AMDGPU_CIK
config DRM_AMDGPU_USERPTR
bool "Always enable userptr write support"
depends on DRM_AMDGPU
- select MMU_NOTIFIER
+ select HMM_MIRROR
help
- This option selects CONFIG_MMU_NOTIFIER if it isn't already
- selected to enabled full userptr support.
+ This option selects CONFIG_HMM and CONFIG_HMM_MIRROR if it
+ isn't already selected to enabled full userptr support.
config DRM_AMDGPU_GART_DEBUGFS
bool "Allow GART access through debugfs"
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index f76bcb9c45e4..675efc850ff4 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -172,7 +172,7 @@ endif
amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o
amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o
amdgpu-$(CONFIG_ACPI) += amdgpu_acpi.o
-amdgpu-$(CONFIG_MMU_NOTIFIER) += amdgpu_mn.o
+amdgpu-$(CONFIG_HMM_MIRROR) += amdgpu_mn.o
include $(FULL_AMD_PATH)/powerplay/Makefile
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
index e55508b39496..5d518d2bb9be 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
@@ -45,7 +45,7 @@
#include <linux/firmware.h>
#include <linux/module.h>
-#include <linux/mmu_notifier.h>
+#include <linux/hmm.h>
#include <linux/interval_tree.h>
#include <drm/drmP.h>
#include <drm/drm.h>
@@ -58,7 +58,6 @@
*
* @adev: amdgpu device pointer
* @mm: process address space
- * @mn: MMU notifier structure
* @type: type of MMU notifier
* @work: destruction work item
* @node: hash table node to find structure by adev and mn
@@ -66,6 +65,7 @@
* @objects: interval tree containing amdgpu_mn_nodes
* @read_lock: mutex for recursive locking of @lock
* @recursion: depth of recursion
+ * @mirror: HMM mirror function support
*
* Data for each amdgpu device and process address space.
*/
@@ -73,7 +73,6 @@ struct amdgpu_mn {
/* constant after initialisation */
struct amdgpu_device *adev;
struct mm_struct *mm;
- struct mmu_notifier mn;
enum amdgpu_mn_type type;
/* only used on destruction */
@@ -87,6 +86,9 @@ struct amdgpu_mn {
struct rb_root_cached objects;
struct mutex read_lock;
atomic_t recursion;
+
+ /* HMM mirror */
+ struct hmm_mirror mirror;
};
/**
@@ -103,7 +105,7 @@ struct amdgpu_mn_node {
};
/**
- * amdgpu_mn_destroy - destroy the MMU notifier
+ * amdgpu_mn_destroy - destroy the HMM mirror
*
* @work: previously sheduled work item
*
@@ -129,28 +131,26 @@ static void amdgpu_mn_destroy(struct work_struct *work)
}
up_write(&amn->lock);
mutex_unlock(&adev->mn_lock);
- mmu_notifier_unregister_no_release(&amn->mn, amn->mm);
+
+ hmm_mirror_unregister(&amn->mirror);
kfree(amn);
}
/**
- * amdgpu_mn_release - callback to notify about mm destruction
+ * amdgpu_hmm_mirror_release - callback to notify about mm destruction
*
- * @mn: our notifier
- * @mm: the mm this callback is about
+ * @mirror: the HMM mirror (mm) this callback is about
*
- * Shedule a work item to lazy destroy our notifier.
+ * Shedule a work item to lazy destroy HMM mirror.
*/
-static void amdgpu_mn_release(struct mmu_notifier *mn,
- struct mm_struct *mm)
+static void amdgpu_hmm_mirror_release(struct hmm_mirror *mirror)
{
- struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
+ struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
INIT_WORK(&amn->work, amdgpu_mn_destroy);
schedule_work(&amn->work);
}
-
/**
* amdgpu_mn_lock - take the write side lock for this notifier
*
@@ -237,21 +237,19 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
/**
* amdgpu_mn_invalidate_range_start_gfx - callback to notify about mm change
*
- * @mn: our notifier
- * @mm: the mm this callback is about
- * @start: start of updated range
- * @end: end of updated range
+ * @mirror: the hmm_mirror (mm) is about to update
+ * @update: the update start, end address
*
* Block for operations on BOs to finish and mark pages as accessed and
* potentially dirty.
*/
-static int amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
- struct mm_struct *mm,
- unsigned long start,
- unsigned long end,
- bool blockable)
+static int amdgpu_mn_sync_pagetables_gfx(struct hmm_mirror *mirror,
+ const struct hmm_update *update)
{
- struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
+ struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
+ unsigned long start = update->start;
+ unsigned long end = update->end;
+ bool blockable = update->blockable;
struct interval_tree_node *it;
/* notification is exclusive, but interval is inclusive */
@@ -278,28 +276,28 @@ static int amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
amdgpu_mn_invalidate_node(node, start, end);
}
+ amdgpu_mn_read_unlock(amn);
+
return 0;
}
/**
* amdgpu_mn_invalidate_range_start_hsa - callback to notify about mm change
*
- * @mn: our notifier
- * @mm: the mm this callback is about
- * @start: start of updated range
- * @end: end of updated range
+ * @mirror: the hmm_mirror (mm) is about to update
+ * @update: the update start, end address
*
* We temporarily evict all BOs between start and end. This
* necessitates evicting all user-mode queues of the process. The BOs
* are restorted in amdgpu_mn_invalidate_range_end_hsa.
*/
-static int amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
- struct mm_struct *mm,
- unsigned long start,
- unsigned long end,
- bool blockable)
+static int amdgpu_mn_sync_pagetables_hsa(struct hmm_mirror *mirror,
+ const struct hmm_update *update)
{
- struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
+ struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
+ unsigned long start = update->start;
+ unsigned long end = update->end;
+ bool blockable = update->blockable;
struct interval_tree_node *it;
/* notification is exclusive, but interval is inclusive */
@@ -326,59 +324,39 @@ static int amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm,
start, end))
- amdgpu_amdkfd_evict_userptr(mem, mm);
+ amdgpu_amdkfd_evict_userptr(mem, amn->mm);
}
}
+ amdgpu_mn_read_unlock(amn);
+
return 0;
}
-/**
- * amdgpu_mn_invalidate_range_end - callback to notify about mm change
- *
- * @mn: our notifier
- * @mm: the mm this callback is about
- * @start: start of updated range
- * @end: end of updated range
- *
- * Release the lock again to allow new command submissions.
+/* Low bits of any reasonable mm pointer will be unused due to struct
+ * alignment. Use these bits to make a unique key from the mm pointer
+ * and notifier type.
*/
-static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn,
- struct mm_struct *mm,
- unsigned long start,
- unsigned long end)
-{
- struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
-
- amdgpu_mn_read_unlock(amn);
-}
+#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
-static const struct mmu_notifier_ops amdgpu_mn_ops[] = {
+static struct hmm_mirror_ops amdgpu_hmm_mirror_ops[] = {
[AMDGPU_MN_TYPE_GFX] = {
- .release = amdgpu_mn_release,
- .invalidate_range_start = amdgpu_mn_invalidate_range_start_gfx,
- .invalidate_range_end = amdgpu_mn_invalidate_range_end,
+ .sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_gfx,
+ .release = amdgpu_hmm_mirror_release
},
[AMDGPU_MN_TYPE_HSA] = {
- .release = amdgpu_mn_release,
- .invalidate_range_start = amdgpu_mn_invalidate_range_start_hsa,
- .invalidate_range_end = amdgpu_mn_invalidate_range_end,
+ .sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_hsa,
+ .release = amdgpu_hmm_mirror_release
},
};
-/* Low bits of any reasonable mm pointer will be unused due to struct
- * alignment. Use these bits to make a unique key from the mm pointer
- * and notifier type.
- */
-#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
-
/**
- * amdgpu_mn_get - create notifier context
+ * amdgpu_mn_get - create HMM mirror context
*
* @adev: amdgpu device pointer
* @type: type of MMU notifier context
*
- * Creates a notifier context for current->mm.
+ * Creates a HMM mirror context for current->mm.
*/
struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
enum amdgpu_mn_type type)
@@ -408,12 +386,12 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
amn->mm = mm;
init_rwsem(&amn->lock);
amn->type = type;
- amn->mn.ops = &amdgpu_mn_ops[type];
amn->objects = RB_ROOT_CACHED;
mutex_init(&amn->read_lock);
atomic_set(&amn->recursion, 0);
- r = __mmu_notifier_register(&amn->mn, mm);
+ amn->mirror.ops = &amdgpu_hmm_mirror_ops[type];
+ r = hmm_mirror_register(&amn->mirror, mm);
if (r)
goto free_amn;
@@ -439,7 +417,7 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
* @bo: amdgpu buffer object
* @addr: userptr addr we should monitor
*
- * Registers an MMU notifier for the given BO at the specified address.
+ * Registers an HMM mirror for the given BO at the specified address.
* Returns 0 on success, -ERRNO if anything goes wrong.
*/
int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
@@ -495,11 +473,11 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
}
/**
- * amdgpu_mn_unregister - unregister a BO for notifier updates
+ * amdgpu_mn_unregister - unregister a BO for HMM mirror updates
*
* @bo: amdgpu buffer object
*
- * Remove any registration of MMU notifier updates from the buffer object.
+ * Remove any registration of HMM mirror updates from the buffer object.
*/
void amdgpu_mn_unregister(struct amdgpu_bo *bo)
{
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
index eb0f432f78fe..0a51fd00021c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
@@ -34,7 +34,7 @@ enum amdgpu_mn_type {
AMDGPU_MN_TYPE_HSA,
};
-#if defined(CONFIG_MMU_NOTIFIER)
+#if defined(CONFIG_HMM_MIRROR)
void amdgpu_mn_lock(struct amdgpu_mn *mn);
void amdgpu_mn_unlock(struct amdgpu_mn *mn);
struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
--
2.17.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 8+ messages in thread[parent not found: <20190110170228.10917-1-Philip.Yang-5C7GfCeVMHo@public.gmane.org>]
* [PATCH 2/3] drm/amdkfd: avoid HMM change cause circular lock dependency v2 [not found] ` <20190110170228.10917-1-Philip.Yang-5C7GfCeVMHo@public.gmane.org> @ 2019-01-10 17:02 ` Yang, Philip 2019-01-10 17:02 ` [PATCH 3/3] drm/amdgpu: replace get_user_pages with HMM address mirror helpers v6 Yang, Philip 1 sibling, 0 replies; 8+ messages in thread From: Yang, Philip @ 2019-01-10 17:02 UTC (permalink / raw) To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org; +Cc: Yang, Philip There is circular lock between gfx and kfd path with HMM change: lock(dqm) -> bo::reserve -> amdgpu_mn_lock To avoid this, move init/unint_mqd() out of lock(dqm), to remove nested locking between mmap_sem and bo::reserve. The locking order is: bo::reserve -> amdgpu_mn_lock(p->mn) Change-Id: I2ec09a47571f6b4c8eaef93f22c0a600f5f70153 Signed-off-by: Philip Yang <Philip.Yang@amd.com> --- .../drm/amd/amdkfd/kfd_device_queue_manager.c | 32 ++++++++++--------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 8372556b52eb..efe0d3c0215b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -1156,21 +1156,17 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, int retval; struct mqd_manager *mqd_mgr; - retval = 0; - - dqm_lock(dqm); - if (dqm->total_queue_count >= max_num_of_queues_per_device) { pr_warn("Can't create new usermode queue because %d queues were already created\n", dqm->total_queue_count); retval = -EPERM; - goto out_unlock; + goto out; } if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { retval = allocate_sdma_queue(dqm, &q->sdma_id); if (retval) - goto out_unlock; + goto out; q->properties.sdma_queue_id = q->sdma_id / get_num_sdma_engines(dqm); q->properties.sdma_engine_id = @@ -1181,6 +1177,9 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, if (retval) goto out_deallocate_sdma_queue; + /* Do init_mqd before dqm_lock(dqm) to avoid circular locking order: + * lock(dqm) -> bo::reserve + */ mqd_mgr = dqm->ops.get_mqd_manager(dqm, get_mqd_type_from_queue_type(q->properties.type)); @@ -1188,6 +1187,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, retval = -ENOMEM; goto out_deallocate_doorbell; } + /* * Eviction state logic: we only mark active queues as evicted * to avoid the overhead of restoring inactive queues later @@ -1196,9 +1196,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, q->properties.is_evicted = (q->properties.queue_size > 0 && q->properties.queue_percent > 0 && q->properties.queue_address != 0); - dqm->asic_ops.init_sdma_vm(dqm, q, qpd); - q->properties.tba_addr = qpd->tba_addr; q->properties.tma_addr = qpd->tma_addr; retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj, @@ -1206,6 +1204,8 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, if (retval) goto out_deallocate_doorbell; + dqm_lock(dqm); + list_add(&q->list, &qpd->queues_list); qpd->queue_count++; if (q->properties.is_active) { @@ -1233,9 +1233,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, out_deallocate_sdma_queue: if (q->properties.type == KFD_QUEUE_TYPE_SDMA) deallocate_sdma_queue(dqm, q->sdma_id); -out_unlock: - dqm_unlock(dqm); - +out: return retval; } @@ -1398,8 +1396,6 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, qpd->reset_wavefronts = true; } - mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); - /* * Unconditionally decrement this counter, regardless of the queue's * type @@ -1410,6 +1406,9 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, dqm_unlock(dqm); + /* Do uninit_mqd after dqm_unlock(dqm) to avoid circular locking */ + mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); + return retval; failed: @@ -1631,7 +1630,11 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, qpd->reset_wavefronts = false; } - /* lastly, free mqd resources */ + dqm_unlock(dqm); + + /* Lastly, free mqd resources. + * Do uninit_mqd() after dqm_unlock to avoid circular locking. + */ list_for_each_entry_safe(q, next, &qpd->queues_list, list) { mqd_mgr = dqm->ops.get_mqd_manager(dqm, get_mqd_type_from_queue_type(q->properties.type)); @@ -1645,7 +1648,6 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, } out: - dqm_unlock(dqm); return retval; } -- 2.17.1 _______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 3/3] drm/amdgpu: replace get_user_pages with HMM address mirror helpers v6 [not found] ` <20190110170228.10917-1-Philip.Yang-5C7GfCeVMHo@public.gmane.org> 2019-01-10 17:02 ` [PATCH 2/3] drm/amdkfd: avoid HMM change cause circular lock dependency v2 Yang, Philip @ 2019-01-10 17:02 ` Yang, Philip [not found] ` <20190110170228.10917-3-Philip.Yang-5C7GfCeVMHo@public.gmane.org> 1 sibling, 1 reply; 8+ messages in thread From: Yang, Philip @ 2019-01-10 17:02 UTC (permalink / raw) To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org; +Cc: Yang, Philip Use HMM helper function hmm_vma_fault() to get physical pages backing userptr and start CPU page table update track of those pages. Then use hmm_vma_range_done() to check if those pages are updated before amdgpu_cs_submit for gfx or before user queues are resumed for kfd. If userptr pages are updated, for gfx, amdgpu_cs_ioctl will restart from scratch, for kfd, restore worker is rescheduled to retry. HMM simplify the CPU page table concurrent update check, so remove guptasklock, mmu_invalidations, last_set_pages fields from amdgpu_ttm_tt struct. HMM does not pin the page (increase page ref count), so remove related operations like release_pages(), put_page(), mark_page_dirty(). Change-Id: I2e8c0c6f0d8c21e5596a32d7fc91762778bc9e67 Signed-off-by: Philip Yang <Philip.Yang@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 1 - .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 95 +++------- drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 158 +++++++--------- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 14 +- drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 25 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 173 ++++++++---------- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 3 +- 9 files changed, 198 insertions(+), 277 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 0b31a1859023..0e1711a75b68 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -61,7 +61,6 @@ struct kgd_mem { atomic_t invalid; struct amdkfd_process_info *process_info; - struct page **user_pages; struct amdgpu_sync sync; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index d7b10d79f1de..ae2d838d31ea 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -582,28 +582,12 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm, goto out; } - /* If no restore worker is running concurrently, user_pages - * should not be allocated - */ - WARN(mem->user_pages, "Leaking user_pages array"); - - mem->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages, - sizeof(struct page *), - GFP_KERNEL | __GFP_ZERO); - if (!mem->user_pages) { - pr_err("%s: Failed to allocate pages array\n", __func__); - ret = -ENOMEM; - goto unregister_out; - } - - ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, mem->user_pages); + ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, bo->tbo.ttm->pages); if (ret) { pr_err("%s: Failed to get user pages: %d\n", __func__, ret); - goto free_out; + goto unregister_out; } - amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->user_pages); - ret = amdgpu_bo_reserve(bo, true); if (ret) { pr_err("%s: Failed to reserve BO\n", __func__); @@ -616,11 +600,7 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm, amdgpu_bo_unreserve(bo); release_out: - if (ret) - release_pages(mem->user_pages, bo->tbo.ttm->num_pages); -free_out: - kvfree(mem->user_pages); - mem->user_pages = NULL; + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); unregister_out: if (ret) amdgpu_mn_unregister(bo); @@ -679,7 +659,6 @@ static int reserve_bo_and_vm(struct kgd_mem *mem, ctx->kfd_bo.priority = 0; ctx->kfd_bo.tv.bo = &bo->tbo; ctx->kfd_bo.tv.num_shared = 1; - ctx->kfd_bo.user_pages = NULL; list_add(&ctx->kfd_bo.tv.head, &ctx->list); amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]); @@ -743,7 +722,6 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem, ctx->kfd_bo.priority = 0; ctx->kfd_bo.tv.bo = &bo->tbo; ctx->kfd_bo.tv.num_shared = 1; - ctx->kfd_bo.user_pages = NULL; list_add(&ctx->kfd_bo.tv.head, &ctx->list); i = 0; @@ -1371,15 +1349,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( list_del(&bo_list_entry->head); mutex_unlock(&process_info->lock); - /* Free user pages if necessary */ - if (mem->user_pages) { - pr_debug("%s: Freeing user_pages array\n", __func__); - if (mem->user_pages[0]) - release_pages(mem->user_pages, - mem->bo->tbo.ttm->num_pages); - kvfree(mem->user_pages); - } - ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx); if (unlikely(ret)) return ret; @@ -1855,25 +1824,11 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, bo = mem->bo; - if (!mem->user_pages) { - mem->user_pages = - kvmalloc_array(bo->tbo.ttm->num_pages, - sizeof(struct page *), - GFP_KERNEL | __GFP_ZERO); - if (!mem->user_pages) { - pr_err("%s: Failed to allocate pages array\n", - __func__); - return -ENOMEM; - } - } else if (mem->user_pages[0]) { - release_pages(mem->user_pages, bo->tbo.ttm->num_pages); - } - /* Get updated user pages */ ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, - mem->user_pages); + bo->tbo.ttm->pages); if (ret) { - mem->user_pages[0] = NULL; + bo->tbo.ttm->pages[0] = NULL; pr_info("%s: Failed to get user pages: %d\n", __func__, ret); /* Pretend it succeeded. It will fail later @@ -1882,12 +1837,6 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, * stalled user mode queues. */ } - - /* Mark the BO as valid unless it was invalidated - * again concurrently - */ - if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid) - return -EAGAIN; } return 0; @@ -1917,7 +1866,8 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) GFP_KERNEL); if (!pd_bo_list_entries) { pr_err("%s: Failed to allocate PD BO list entries\n", __func__); - return -ENOMEM; + ret = -ENOMEM; + goto out_no_mem; } INIT_LIST_HEAD(&resv_list); @@ -1941,7 +1891,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates); WARN(!list_empty(&duplicates), "Duplicates should be empty"); if (ret) - goto out; + goto out_free; amdgpu_sync_create(&sync); @@ -1967,10 +1917,8 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) bo = mem->bo; - /* Copy pages array and validate the BO if we got user pages */ - if (mem->user_pages[0]) { - amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, - mem->user_pages); + /* Validate the BO if we got user pages */ + if (bo->tbo.ttm->pages[0]) { amdgpu_bo_placement_from_domain(bo, mem->domain); ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); if (ret) { @@ -1979,16 +1927,16 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) } } - /* Validate succeeded, now the BO owns the pages, free - * our copy of the pointer array. Put this BO back on - * the userptr_valid_list. If we need to revalidate - * it, we need to start from scratch. - */ - kvfree(mem->user_pages); - mem->user_pages = NULL; list_move_tail(&mem->validate_list.head, &process_info->userptr_valid_list); + /* Stop HMM track the userptr update. We dont check the return + * value for concurrent CPU page table update because we will + * reschedule the restore worker if process_info->evicted_bos + * is updated. + */ + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); + /* Update mapping. If the BO was not validated * (because we couldn't get user pages), this will * clear the page table entries, which will result in @@ -2022,8 +1970,15 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) ttm_eu_backoff_reservation(&ticket, &resv_list); amdgpu_sync_wait(&sync, false); amdgpu_sync_free(&sync); -out: +out_free: kfree(pd_bo_list_entries); +out_no_mem: + list_for_each_entry_safe(mem, tmp_mem, + &process_info->userptr_inval_list, + validate_list.head) { + bo = mem->bo; + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); + } return ret; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h index 7c5f5d1601e6..a130e766cbdb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h @@ -36,7 +36,7 @@ struct amdgpu_bo_list_entry { struct amdgpu_bo_va *bo_va; uint32_t priority; struct page **user_pages; - int user_invalidated; + bool user_invalidated; }; struct amdgpu_bo_list { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 1c49b8266d69..451a1fab17d6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -52,7 +52,6 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, p->uf_entry.tv.bo = &bo->tbo; /* One for TTM and one for the CS job */ p->uf_entry.tv.num_shared = 2; - p->uf_entry.user_pages = NULL; drm_gem_object_put_unlocked(gobj); @@ -539,14 +538,14 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p, if (usermm && usermm != current->mm) return -EPERM; - /* Check if we have user pages and nobody bound the BO already */ - if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) && - lobj->user_pages) { + if (amdgpu_ttm_tt_is_userptr(bo->tbo.ttm) && + lobj->user_invalidated && lobj->user_pages) { amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU); r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); if (r) return r; + amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, lobj->user_pages); binding_userptr = true; @@ -577,7 +576,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, struct amdgpu_bo *gds; struct amdgpu_bo *gws; struct amdgpu_bo *oa; - unsigned tries = 10; int r; INIT_LIST_HEAD(&p->validated); @@ -613,79 +611,45 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, if (p->uf_entry.tv.bo && !ttm_to_amdgpu_bo(p->uf_entry.tv.bo)->parent) list_add(&p->uf_entry.tv.head, &p->validated); - while (1) { - struct list_head need_pages; - - r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true, - &duplicates); - if (unlikely(r != 0)) { - if (r != -ERESTARTSYS) - DRM_ERROR("ttm_eu_reserve_buffers failed.\n"); - goto error_free_pages; - } - - INIT_LIST_HEAD(&need_pages); - amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { - struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); - - if (amdgpu_ttm_tt_userptr_invalidated(bo->tbo.ttm, - &e->user_invalidated) && e->user_pages) { - - /* We acquired a page array, but somebody - * invalidated it. Free it and try again - */ - release_pages(e->user_pages, - bo->tbo.ttm->num_pages); - kvfree(e->user_pages); - e->user_pages = NULL; - } - - if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) && - !e->user_pages) { - list_del(&e->tv.head); - list_add(&e->tv.head, &need_pages); - - amdgpu_bo_unreserve(bo); - } + /* Get userptr backing pages. If pages are updated after registered + * in amdgpu_gem_userptr_ioctl(), amdgpu_cs_list_validate() will do + * amdgpu_ttm_backend_bind() to flush and invalidate new pages + */ + amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); + bool userpage_invalidated = false; + int i; + + e->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages, + sizeof(struct page *), + GFP_KERNEL | __GFP_ZERO); + if (!e->user_pages) { + DRM_ERROR("calloc failure\n"); + return -ENOMEM; } - if (list_empty(&need_pages)) - break; - - /* Unreserve everything again. */ - ttm_eu_backoff_reservation(&p->ticket, &p->validated); - - /* We tried too many times, just abort */ - if (!--tries) { - r = -EDEADLK; - DRM_ERROR("deadlock in %s\n", __func__); - goto error_free_pages; + r = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, e->user_pages); + if (r) { + kvfree(e->user_pages); + e->user_pages = NULL; + return r; } - /* Fill the page arrays for all userptrs. */ - list_for_each_entry(e, &need_pages, tv.head) { - struct ttm_tt *ttm = e->tv.bo->ttm; - - e->user_pages = kvmalloc_array(ttm->num_pages, - sizeof(struct page*), - GFP_KERNEL | __GFP_ZERO); - if (!e->user_pages) { - r = -ENOMEM; - DRM_ERROR("calloc failure in %s\n", __func__); - goto error_free_pages; - } - - r = amdgpu_ttm_tt_get_user_pages(ttm, e->user_pages); - if (r) { - DRM_ERROR("amdgpu_ttm_tt_get_user_pages failed.\n"); - kvfree(e->user_pages); - e->user_pages = NULL; - goto error_free_pages; + for (i = 0; i < bo->tbo.ttm->num_pages; i++) { + if (bo->tbo.ttm->pages[i] != e->user_pages[i]) { + userpage_invalidated = true; + break; } } + e->user_invalidated = userpage_invalidated; + } - /* And try again. */ - list_splice(&need_pages, &p->validated); + r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true, + &duplicates); + if (unlikely(r != 0)) { + if (r != -ERESTARTSYS) + DRM_ERROR("ttm_eu_reserve_buffers failed.\n"); + goto out; } amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold, @@ -754,17 +718,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, error_validate: if (r) ttm_eu_backoff_reservation(&p->ticket, &p->validated); - -error_free_pages: - - amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { - if (!e->user_pages) - continue; - - release_pages(e->user_pages, e->tv.bo->ttm->num_pages); - kvfree(e->user_pages); - } - +out: return r; } @@ -1213,8 +1167,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, struct amdgpu_bo_list_entry *e; struct amdgpu_job *job; uint64_t seq; - - int r; + int r = 0; job = p->job; p->job = NULL; @@ -1223,15 +1176,21 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, if (r) goto error_unlock; - /* No memory allocation is allowed while holding the mn lock */ + /* No memory allocation is allowed while holding the mn lock. + * p->mn is hold until amdgpu_cs_submit is finished and fence is added + * to BOs. + */ amdgpu_mn_lock(p->mn); + + /* If userptr are updated after amdgpu_cs_parser_bos(), restart cs */ amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); - if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) { - r = -ERESTARTSYS; - goto error_abort; - } + r |= !amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); + } + if (r) { + r = -ERESTARTSYS; + goto error_abort; } job->owner = p->filp; @@ -1278,14 +1237,20 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { struct amdgpu_device *adev = dev->dev_private; - union drm_amdgpu_cs *cs = data; - struct amdgpu_cs_parser parser = {}; - bool reserved_buffers = false; + union drm_amdgpu_cs *cs; + struct amdgpu_cs_parser parser; + bool reserved_buffers; + int tries = 10; int i, r; if (!adev->accel_working) return -EBUSY; +restart: + memset(&parser, 0, sizeof(parser)); + cs = data; + reserved_buffers = false; + parser.adev = adev; parser.filp = filp; @@ -1327,6 +1292,15 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) out: amdgpu_cs_parser_fini(&parser, r, reserved_buffers); + + if (r == -ERESTARTSYS) { + if (!--tries) { + DRM_ERROR("Possible deadlock? Retry too many times\n"); + return -EDEADLK; + } + goto restart; + } + return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index f4f00217546e..92025993bfb1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -336,26 +336,24 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, r = amdgpu_bo_reserve(bo, true); if (r) - goto free_pages; + goto user_pages_done; amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT); r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); amdgpu_bo_unreserve(bo); if (r) - goto free_pages; + goto user_pages_done; } r = drm_gem_handle_create(filp, gobj, &handle); - /* drop reference from allocate - handle holds it now */ - drm_gem_object_put_unlocked(gobj); if (r) - return r; + goto user_pages_done; args->handle = handle; - return 0; -free_pages: - release_pages(bo->tbo.ttm->pages, bo->tbo.ttm->num_pages); +user_pages_done: + if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE) + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); release_object: drm_gem_object_put_unlocked(gobj); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c index 5d518d2bb9be..9d40f3001f3c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c @@ -229,8 +229,6 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node, true, false, MAX_SCHEDULE_TIMEOUT); if (r <= 0) DRM_ERROR("(%ld) failed to wait for user bo\n", r); - - amdgpu_ttm_tt_mark_user_pages(bo->tbo.ttm); } } @@ -513,3 +511,26 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo) mutex_unlock(&adev->mn_lock); } +/* flags used by HMM internal, not related to CPU/GPU PTE flags */ +static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = { + (1 << 0), /* HMM_PFN_VALID */ + (1 << 1), /* HMM_PFN_WRITE */ + 0 /* HMM_PFN_DEVICE_PRIVATE */ +}; + +static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = { + 0xfffffffffffffffeUL, /* HMM_PFN_ERROR */ + 0, /* HMM_PFN_NONE */ + 0xfffffffffffffffcUL /* HMM_PFN_SPECIAL */ +}; + +void amdgpu_hmm_init_range(struct hmm_range *range) +{ + if (range) { + range->flags = hmm_range_flags; + range->values = hmm_range_values; + range->pfn_shift = PAGE_SHIFT; + range->pfns = NULL; + INIT_LIST_HEAD(&range->list); + } +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h index 0a51fd00021c..4803e216e174 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h @@ -25,9 +25,10 @@ #define __AMDGPU_MN_H__ /* - * MMU Notifier + * HMM mirror */ struct amdgpu_mn; +struct hmm_range; enum amdgpu_mn_type { AMDGPU_MN_TYPE_GFX, @@ -41,6 +42,7 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, enum amdgpu_mn_type type); int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr); void amdgpu_mn_unregister(struct amdgpu_bo *bo); +void amdgpu_hmm_init_range(struct hmm_range *range); #else static inline void amdgpu_mn_lock(struct amdgpu_mn *mn) {} static inline void amdgpu_mn_unlock(struct amdgpu_mn *mn) {} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index c91ec3101d00..3d074f4b02f4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -43,6 +43,7 @@ #include <linux/pagemap.h> #include <linux/debugfs.h> #include <linux/iommu.h> +#include <linux/hmm.h> #include "amdgpu.h" #include "amdgpu_object.h" #include "amdgpu_trace.h" @@ -705,11 +706,6 @@ static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo, /* * TTM backend functions. */ -struct amdgpu_ttm_gup_task_list { - struct list_head list; - struct task_struct *task; -}; - struct amdgpu_ttm_tt { struct ttm_dma_tt ttm; u64 offset; @@ -718,85 +714,96 @@ struct amdgpu_ttm_tt { uint32_t userflags; spinlock_t guptasklock; struct list_head guptasks; - atomic_t mmu_invalidations; - uint32_t last_set_pages; + struct hmm_range range; }; /** - * amdgpu_ttm_tt_get_user_pages - Pin pages of memory pointed to by a USERPTR - * pointer to memory + * amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user + * memory and start HMM tracking CPU page table update * - * Called by amdgpu_gem_userptr_ioctl() and amdgpu_cs_parser_bos(). - * This provides a wrapper around the get_user_pages() call to provide - * device accessible pages that back user memory. + * Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only + * once afterwards to stop HMM tracking */ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) { struct amdgpu_ttm_tt *gtt = (void *)ttm; struct mm_struct *mm = gtt->usertask->mm; - unsigned int flags = 0; - unsigned pinned = 0; - int r; + unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE; + struct hmm_range *range = >t->range; + int r = 0, i; if (!mm) /* Happens during process shutdown */ return -ESRCH; - if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY)) - flags |= FOLL_WRITE; + amdgpu_hmm_init_range(range); down_read(&mm->mmap_sem); - if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) { - /* - * check that we only use anonymous memory to prevent problems - * with writeback - */ - unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE; - struct vm_area_struct *vma; + range->vma = find_vma(mm, gtt->userptr); + if (!range_in_vma(range->vma, gtt->userptr, end)) + r = -EFAULT; + else if ((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) && + range->vma->vm_file) + r = -EPERM; + if (r) + goto out; - vma = find_vma(mm, gtt->userptr); - if (!vma || vma->vm_file || vma->vm_end < end) { - up_read(&mm->mmap_sem); - return -EPERM; - } + range->pfns = kvmalloc_array(ttm->num_pages, sizeof(uint64_t), + GFP_KERNEL); + if (range->pfns == NULL) { + r = -ENOMEM; + goto out; } + range->start = gtt->userptr; + range->end = end; - /* loop enough times using contiguous pages of memory */ - do { - unsigned num_pages = ttm->num_pages - pinned; - uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE; - struct page **p = pages + pinned; - struct amdgpu_ttm_gup_task_list guptask; + range->pfns[0] = range->flags[HMM_PFN_VALID]; + range->pfns[0] |= amdgpu_ttm_tt_is_readonly(ttm) ? + 0 : range->flags[HMM_PFN_WRITE]; + for (i = 1; i < ttm->num_pages; i++) + range->pfns[i] = range->pfns[0]; - guptask.task = current; - spin_lock(>t->guptasklock); - list_add(&guptask.list, >t->guptasks); - spin_unlock(>t->guptasklock); + /* This may trigger page table update */ + r = hmm_vma_fault(range, true); + if (r) + goto out_free_pfns; - if (mm == current->mm) - r = get_user_pages(userptr, num_pages, flags, p, NULL); - else - r = get_user_pages_remote(gtt->usertask, - mm, userptr, num_pages, - flags, p, NULL, NULL); + up_read(&mm->mmap_sem); - spin_lock(>t->guptasklock); - list_del(&guptask.list); - spin_unlock(>t->guptasklock); + for (i = 0; i < ttm->num_pages; i++) + pages[i] = hmm_pfn_to_page(range, range->pfns[i]); - if (r < 0) - goto release_pages; + return 0; - pinned += r; +out_free_pfns: + kvfree(range->pfns); + range->pfns = NULL; +out: + up_read(&mm->mmap_sem); + return r; +} - } while (pinned < ttm->num_pages); +/** + * amdgpu_ttm_tt_userptr_range_done - stop HMM track the CPU page table change + * Check if the pages backing this ttm range have been invalidated + * + * Returns: true if pages are still valid + */ +bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm) +{ + struct amdgpu_ttm_tt *gtt = (void *)ttm; + bool r = false; - up_read(&mm->mmap_sem); - return 0; + if (!gtt || !gtt->userptr) + return false; + + WARN_ONCE(!gtt->range.pfns, "No user pages to check\n"); + if (gtt->range.pfns) { + r = hmm_vma_range_done(>t->range); + kvfree(gtt->range.pfns); + gtt->range.pfns = NULL; + } -release_pages: - release_pages(pages, pinned); - up_read(&mm->mmap_sem); return r; } @@ -809,16 +816,10 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) */ void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages) { - struct amdgpu_ttm_tt *gtt = (void *)ttm; unsigned i; - gtt->last_set_pages = atomic_read(>t->mmu_invalidations); - for (i = 0; i < ttm->num_pages; ++i) { - if (ttm->pages[i]) - put_page(ttm->pages[i]); - + for (i = 0; i < ttm->num_pages; ++i) ttm->pages[i] = pages ? pages[i] : NULL; - } } /** @@ -903,10 +904,11 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm) /* unmap the pages mapped to the device */ dma_unmap_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction); - /* mark the pages as dirty */ - amdgpu_ttm_tt_mark_user_pages(ttm); - sg_free_table(ttm->sg); + + if (gtt->range.pfns && + ttm->pages[0] == hmm_pfn_to_page(>t->range, gtt->range.pfns[0])) + WARN_ONCE(1, "Missing get_user_page_done\n"); } int amdgpu_ttm_gart_bind(struct amdgpu_device *adev, @@ -1258,8 +1260,6 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, spin_lock_init(>t->guptasklock); INIT_LIST_HEAD(>t->guptasks); - atomic_set(>t->mmu_invalidations, 0); - gtt->last_set_pages = 0; return 0; } @@ -1289,7 +1289,6 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, unsigned long end) { struct amdgpu_ttm_tt *gtt = (void *)ttm; - struct amdgpu_ttm_gup_task_list *entry; unsigned long size; if (gtt == NULL || !gtt->userptr) @@ -1302,48 +1301,20 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, if (gtt->userptr > end || gtt->userptr + size <= start) return false; - /* Search the lists of tasks that hold this mapping and see - * if current is one of them. If it is return false. - */ - spin_lock(>t->guptasklock); - list_for_each_entry(entry, >t->guptasks, list) { - if (entry->task == current) { - spin_unlock(>t->guptasklock); - return false; - } - } - spin_unlock(>t->guptasklock); - - atomic_inc(>t->mmu_invalidations); - return true; } /** - * amdgpu_ttm_tt_userptr_invalidated - Has the ttm_tt object been invalidated? - */ -bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm, - int *last_invalidated) -{ - struct amdgpu_ttm_tt *gtt = (void *)ttm; - int prev_invalidated = *last_invalidated; - - *last_invalidated = atomic_read(>t->mmu_invalidations); - return prev_invalidated != *last_invalidated; -} - -/** - * amdgpu_ttm_tt_userptr_needs_pages - Have the pages backing this ttm_tt object - * been invalidated since the last time they've been set? + * amdgpu_ttm_tt_is_userptr - Have the pages backing by userptr? */ -bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm) +bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm) { struct amdgpu_ttm_tt *gtt = (void *)ttm; if (gtt == NULL || !gtt->userptr) return false; - return atomic_read(>t->mmu_invalidations) != gtt->last_set_pages; + return true; } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index b5b2d101f7db..8988c87fff9d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -102,6 +102,7 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo); int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo); int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages); +bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm); void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages); void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm); int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, @@ -112,7 +113,7 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, unsigned long end); bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm, int *last_invalidated); -bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm); +bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm); bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm); uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_mem_reg *mem); uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, -- 2.17.1 _______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ^ permalink raw reply related [flat|nested] 8+ messages in thread
[parent not found: <20190110170228.10917-3-Philip.Yang-5C7GfCeVMHo@public.gmane.org>]
* Re: [PATCH 3/3] drm/amdgpu: replace get_user_pages with HMM address mirror helpers v6 [not found] ` <20190110170228.10917-3-Philip.Yang-5C7GfCeVMHo@public.gmane.org> @ 2019-01-14 15:33 ` Yang, Philip 0 siblings, 0 replies; 8+ messages in thread From: Yang, Philip @ 2019-01-14 15:33 UTC (permalink / raw) To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org Ping Christian, any comments for the GEM and CS part changes? Thanks. Philip On 2019-01-10 12:02 p.m., Yang, Philip wrote: > Use HMM helper function hmm_vma_fault() to get physical pages backing > userptr and start CPU page table update track of those pages. Then use > hmm_vma_range_done() to check if those pages are updated before > amdgpu_cs_submit for gfx or before user queues are resumed for kfd. > > If userptr pages are updated, for gfx, amdgpu_cs_ioctl will restart > from scratch, for kfd, restore worker is rescheduled to retry. > > HMM simplify the CPU page table concurrent update check, so remove > guptasklock, mmu_invalidations, last_set_pages fields from > amdgpu_ttm_tt struct. > > HMM does not pin the page (increase page ref count), so remove related > operations like release_pages(), put_page(), mark_page_dirty(). > > Change-Id: I2e8c0c6f0d8c21e5596a32d7fc91762778bc9e67 > Signed-off-by: Philip Yang <Philip.Yang@amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 1 - > .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 95 +++------- > drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h | 2 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 158 +++++++--------- > drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 14 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 25 ++- > drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h | 4 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 173 ++++++++---------- > drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 3 +- > 9 files changed, 198 insertions(+), 277 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > index 0b31a1859023..0e1711a75b68 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > @@ -61,7 +61,6 @@ struct kgd_mem { > > atomic_t invalid; > struct amdkfd_process_info *process_info; > - struct page **user_pages; > > struct amdgpu_sync sync; > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > index d7b10d79f1de..ae2d838d31ea 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > @@ -582,28 +582,12 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm, > goto out; > } > > - /* If no restore worker is running concurrently, user_pages > - * should not be allocated > - */ > - WARN(mem->user_pages, "Leaking user_pages array"); > - > - mem->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages, > - sizeof(struct page *), > - GFP_KERNEL | __GFP_ZERO); > - if (!mem->user_pages) { > - pr_err("%s: Failed to allocate pages array\n", __func__); > - ret = -ENOMEM; > - goto unregister_out; > - } > - > - ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, mem->user_pages); > + ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, bo->tbo.ttm->pages); > if (ret) { > pr_err("%s: Failed to get user pages: %d\n", __func__, ret); > - goto free_out; > + goto unregister_out; > } > > - amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->user_pages); > - > ret = amdgpu_bo_reserve(bo, true); > if (ret) { > pr_err("%s: Failed to reserve BO\n", __func__); > @@ -616,11 +600,7 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm, > amdgpu_bo_unreserve(bo); > > release_out: > - if (ret) > - release_pages(mem->user_pages, bo->tbo.ttm->num_pages); > -free_out: > - kvfree(mem->user_pages); > - mem->user_pages = NULL; > + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); > unregister_out: > if (ret) > amdgpu_mn_unregister(bo); > @@ -679,7 +659,6 @@ static int reserve_bo_and_vm(struct kgd_mem *mem, > ctx->kfd_bo.priority = 0; > ctx->kfd_bo.tv.bo = &bo->tbo; > ctx->kfd_bo.tv.num_shared = 1; > - ctx->kfd_bo.user_pages = NULL; > list_add(&ctx->kfd_bo.tv.head, &ctx->list); > > amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]); > @@ -743,7 +722,6 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem, > ctx->kfd_bo.priority = 0; > ctx->kfd_bo.tv.bo = &bo->tbo; > ctx->kfd_bo.tv.num_shared = 1; > - ctx->kfd_bo.user_pages = NULL; > list_add(&ctx->kfd_bo.tv.head, &ctx->list); > > i = 0; > @@ -1371,15 +1349,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( > list_del(&bo_list_entry->head); > mutex_unlock(&process_info->lock); > > - /* Free user pages if necessary */ > - if (mem->user_pages) { > - pr_debug("%s: Freeing user_pages array\n", __func__); > - if (mem->user_pages[0]) > - release_pages(mem->user_pages, > - mem->bo->tbo.ttm->num_pages); > - kvfree(mem->user_pages); > - } > - > ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx); > if (unlikely(ret)) > return ret; > @@ -1855,25 +1824,11 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, > > bo = mem->bo; > > - if (!mem->user_pages) { > - mem->user_pages = > - kvmalloc_array(bo->tbo.ttm->num_pages, > - sizeof(struct page *), > - GFP_KERNEL | __GFP_ZERO); > - if (!mem->user_pages) { > - pr_err("%s: Failed to allocate pages array\n", > - __func__); > - return -ENOMEM; > - } > - } else if (mem->user_pages[0]) { > - release_pages(mem->user_pages, bo->tbo.ttm->num_pages); > - } > - > /* Get updated user pages */ > ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, > - mem->user_pages); > + bo->tbo.ttm->pages); > if (ret) { > - mem->user_pages[0] = NULL; > + bo->tbo.ttm->pages[0] = NULL; > pr_info("%s: Failed to get user pages: %d\n", > __func__, ret); > /* Pretend it succeeded. It will fail later > @@ -1882,12 +1837,6 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, > * stalled user mode queues. > */ > } > - > - /* Mark the BO as valid unless it was invalidated > - * again concurrently > - */ > - if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid) > - return -EAGAIN; > } > > return 0; > @@ -1917,7 +1866,8 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) > GFP_KERNEL); > if (!pd_bo_list_entries) { > pr_err("%s: Failed to allocate PD BO list entries\n", __func__); > - return -ENOMEM; > + ret = -ENOMEM; > + goto out_no_mem; > } > > INIT_LIST_HEAD(&resv_list); > @@ -1941,7 +1891,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) > ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates); > WARN(!list_empty(&duplicates), "Duplicates should be empty"); > if (ret) > - goto out; > + goto out_free; > > amdgpu_sync_create(&sync); > > @@ -1967,10 +1917,8 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) > > bo = mem->bo; > > - /* Copy pages array and validate the BO if we got user pages */ > - if (mem->user_pages[0]) { > - amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, > - mem->user_pages); > + /* Validate the BO if we got user pages */ > + if (bo->tbo.ttm->pages[0]) { > amdgpu_bo_placement_from_domain(bo, mem->domain); > ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); > if (ret) { > @@ -1979,16 +1927,16 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) > } > } > > - /* Validate succeeded, now the BO owns the pages, free > - * our copy of the pointer array. Put this BO back on > - * the userptr_valid_list. If we need to revalidate > - * it, we need to start from scratch. > - */ > - kvfree(mem->user_pages); > - mem->user_pages = NULL; > list_move_tail(&mem->validate_list.head, > &process_info->userptr_valid_list); > > + /* Stop HMM track the userptr update. We dont check the return > + * value for concurrent CPU page table update because we will > + * reschedule the restore worker if process_info->evicted_bos > + * is updated. > + */ > + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); > + > /* Update mapping. If the BO was not validated > * (because we couldn't get user pages), this will > * clear the page table entries, which will result in > @@ -2022,8 +1970,15 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) > ttm_eu_backoff_reservation(&ticket, &resv_list); > amdgpu_sync_wait(&sync, false); > amdgpu_sync_free(&sync); > -out: > +out_free: > kfree(pd_bo_list_entries); > +out_no_mem: > + list_for_each_entry_safe(mem, tmp_mem, > + &process_info->userptr_inval_list, > + validate_list.head) { > + bo = mem->bo; > + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); > + } > > return ret; > } > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h > index 7c5f5d1601e6..a130e766cbdb 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h > @@ -36,7 +36,7 @@ struct amdgpu_bo_list_entry { > struct amdgpu_bo_va *bo_va; > uint32_t priority; > struct page **user_pages; > - int user_invalidated; > + bool user_invalidated; > }; > > struct amdgpu_bo_list { > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > index 1c49b8266d69..451a1fab17d6 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > @@ -52,7 +52,6 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, > p->uf_entry.tv.bo = &bo->tbo; > /* One for TTM and one for the CS job */ > p->uf_entry.tv.num_shared = 2; > - p->uf_entry.user_pages = NULL; > > drm_gem_object_put_unlocked(gobj); > > @@ -539,14 +538,14 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p, > if (usermm && usermm != current->mm) > return -EPERM; > > - /* Check if we have user pages and nobody bound the BO already */ > - if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) && > - lobj->user_pages) { > + if (amdgpu_ttm_tt_is_userptr(bo->tbo.ttm) && > + lobj->user_invalidated && lobj->user_pages) { > amdgpu_bo_placement_from_domain(bo, > AMDGPU_GEM_DOMAIN_CPU); > r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); > if (r) > return r; > + > amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, > lobj->user_pages); > binding_userptr = true; > @@ -577,7 +576,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, > struct amdgpu_bo *gds; > struct amdgpu_bo *gws; > struct amdgpu_bo *oa; > - unsigned tries = 10; > int r; > > INIT_LIST_HEAD(&p->validated); > @@ -613,79 +611,45 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, > if (p->uf_entry.tv.bo && !ttm_to_amdgpu_bo(p->uf_entry.tv.bo)->parent) > list_add(&p->uf_entry.tv.head, &p->validated); > > - while (1) { > - struct list_head need_pages; > - > - r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true, > - &duplicates); > - if (unlikely(r != 0)) { > - if (r != -ERESTARTSYS) > - DRM_ERROR("ttm_eu_reserve_buffers failed.\n"); > - goto error_free_pages; > - } > - > - INIT_LIST_HEAD(&need_pages); > - amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { > - struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); > - > - if (amdgpu_ttm_tt_userptr_invalidated(bo->tbo.ttm, > - &e->user_invalidated) && e->user_pages) { > - > - /* We acquired a page array, but somebody > - * invalidated it. Free it and try again > - */ > - release_pages(e->user_pages, > - bo->tbo.ttm->num_pages); > - kvfree(e->user_pages); > - e->user_pages = NULL; > - } > - > - if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) && > - !e->user_pages) { > - list_del(&e->tv.head); > - list_add(&e->tv.head, &need_pages); > - > - amdgpu_bo_unreserve(bo); > - } > + /* Get userptr backing pages. If pages are updated after registered > + * in amdgpu_gem_userptr_ioctl(), amdgpu_cs_list_validate() will do > + * amdgpu_ttm_backend_bind() to flush and invalidate new pages > + */ > + amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { > + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); > + bool userpage_invalidated = false; > + int i; > + > + e->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages, > + sizeof(struct page *), > + GFP_KERNEL | __GFP_ZERO); > + if (!e->user_pages) { > + DRM_ERROR("calloc failure\n"); > + return -ENOMEM; > } > > - if (list_empty(&need_pages)) > - break; > - > - /* Unreserve everything again. */ > - ttm_eu_backoff_reservation(&p->ticket, &p->validated); > - > - /* We tried too many times, just abort */ > - if (!--tries) { > - r = -EDEADLK; > - DRM_ERROR("deadlock in %s\n", __func__); > - goto error_free_pages; > + r = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, e->user_pages); > + if (r) { > + kvfree(e->user_pages); > + e->user_pages = NULL; > + return r; > } > > - /* Fill the page arrays for all userptrs. */ > - list_for_each_entry(e, &need_pages, tv.head) { > - struct ttm_tt *ttm = e->tv.bo->ttm; > - > - e->user_pages = kvmalloc_array(ttm->num_pages, > - sizeof(struct page*), > - GFP_KERNEL | __GFP_ZERO); > - if (!e->user_pages) { > - r = -ENOMEM; > - DRM_ERROR("calloc failure in %s\n", __func__); > - goto error_free_pages; > - } > - > - r = amdgpu_ttm_tt_get_user_pages(ttm, e->user_pages); > - if (r) { > - DRM_ERROR("amdgpu_ttm_tt_get_user_pages failed.\n"); > - kvfree(e->user_pages); > - e->user_pages = NULL; > - goto error_free_pages; > + for (i = 0; i < bo->tbo.ttm->num_pages; i++) { > + if (bo->tbo.ttm->pages[i] != e->user_pages[i]) { > + userpage_invalidated = true; > + break; > } > } > + e->user_invalidated = userpage_invalidated; > + } > > - /* And try again. */ > - list_splice(&need_pages, &p->validated); > + r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true, > + &duplicates); > + if (unlikely(r != 0)) { > + if (r != -ERESTARTSYS) > + DRM_ERROR("ttm_eu_reserve_buffers failed.\n"); > + goto out; > } > > amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold, > @@ -754,17 +718,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, > error_validate: > if (r) > ttm_eu_backoff_reservation(&p->ticket, &p->validated); > - > -error_free_pages: > - > - amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { > - if (!e->user_pages) > - continue; > - > - release_pages(e->user_pages, e->tv.bo->ttm->num_pages); > - kvfree(e->user_pages); > - } > - > +out: > return r; > } > > @@ -1213,8 +1167,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, > struct amdgpu_bo_list_entry *e; > struct amdgpu_job *job; > uint64_t seq; > - > - int r; > + int r = 0; > > job = p->job; > p->job = NULL; > @@ -1223,15 +1176,21 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, > if (r) > goto error_unlock; > > - /* No memory allocation is allowed while holding the mn lock */ > + /* No memory allocation is allowed while holding the mn lock. > + * p->mn is hold until amdgpu_cs_submit is finished and fence is added > + * to BOs. > + */ > amdgpu_mn_lock(p->mn); > + > + /* If userptr are updated after amdgpu_cs_parser_bos(), restart cs */ > amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { > struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); > > - if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) { > - r = -ERESTARTSYS; > - goto error_abort; > - } > + r |= !amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); > + } > + if (r) { > + r = -ERESTARTSYS; > + goto error_abort; > } > > job->owner = p->filp; > @@ -1278,14 +1237,20 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, > int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) > { > struct amdgpu_device *adev = dev->dev_private; > - union drm_amdgpu_cs *cs = data; > - struct amdgpu_cs_parser parser = {}; > - bool reserved_buffers = false; > + union drm_amdgpu_cs *cs; > + struct amdgpu_cs_parser parser; > + bool reserved_buffers; > + int tries = 10; > int i, r; > > if (!adev->accel_working) > return -EBUSY; > > +restart: > + memset(&parser, 0, sizeof(parser)); > + cs = data; > + reserved_buffers = false; > + > parser.adev = adev; > parser.filp = filp; > > @@ -1327,6 +1292,15 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) > > out: > amdgpu_cs_parser_fini(&parser, r, reserved_buffers); > + > + if (r == -ERESTARTSYS) { > + if (!--tries) { > + DRM_ERROR("Possible deadlock? Retry too many times\n"); > + return -EDEADLK; > + } > + goto restart; > + } > + > return r; > } > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c > index f4f00217546e..92025993bfb1 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c > @@ -336,26 +336,24 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, > > r = amdgpu_bo_reserve(bo, true); > if (r) > - goto free_pages; > + goto user_pages_done; > > amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT); > r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); > amdgpu_bo_unreserve(bo); > if (r) > - goto free_pages; > + goto user_pages_done; > } > > r = drm_gem_handle_create(filp, gobj, &handle); > - /* drop reference from allocate - handle holds it now */ > - drm_gem_object_put_unlocked(gobj); > if (r) > - return r; > + goto user_pages_done; > > args->handle = handle; > - return 0; > > -free_pages: > - release_pages(bo->tbo.ttm->pages, bo->tbo.ttm->num_pages); > +user_pages_done: > + if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE) > + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); > > release_object: > drm_gem_object_put_unlocked(gobj); > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c > index 5d518d2bb9be..9d40f3001f3c 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c > @@ -229,8 +229,6 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node, > true, false, MAX_SCHEDULE_TIMEOUT); > if (r <= 0) > DRM_ERROR("(%ld) failed to wait for user bo\n", r); > - > - amdgpu_ttm_tt_mark_user_pages(bo->tbo.ttm); > } > } > > @@ -513,3 +511,26 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo) > mutex_unlock(&adev->mn_lock); > } > > +/* flags used by HMM internal, not related to CPU/GPU PTE flags */ > +static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = { > + (1 << 0), /* HMM_PFN_VALID */ > + (1 << 1), /* HMM_PFN_WRITE */ > + 0 /* HMM_PFN_DEVICE_PRIVATE */ > +}; > + > +static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = { > + 0xfffffffffffffffeUL, /* HMM_PFN_ERROR */ > + 0, /* HMM_PFN_NONE */ > + 0xfffffffffffffffcUL /* HMM_PFN_SPECIAL */ > +}; > + > +void amdgpu_hmm_init_range(struct hmm_range *range) > +{ > + if (range) { > + range->flags = hmm_range_flags; > + range->values = hmm_range_values; > + range->pfn_shift = PAGE_SHIFT; > + range->pfns = NULL; > + INIT_LIST_HEAD(&range->list); > + } > +} > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h > index 0a51fd00021c..4803e216e174 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h > @@ -25,9 +25,10 @@ > #define __AMDGPU_MN_H__ > > /* > - * MMU Notifier > + * HMM mirror > */ > struct amdgpu_mn; > +struct hmm_range; > > enum amdgpu_mn_type { > AMDGPU_MN_TYPE_GFX, > @@ -41,6 +42,7 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, > enum amdgpu_mn_type type); > int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr); > void amdgpu_mn_unregister(struct amdgpu_bo *bo); > +void amdgpu_hmm_init_range(struct hmm_range *range); > #else > static inline void amdgpu_mn_lock(struct amdgpu_mn *mn) {} > static inline void amdgpu_mn_unlock(struct amdgpu_mn *mn) {} > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > index c91ec3101d00..3d074f4b02f4 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > @@ -43,6 +43,7 @@ > #include <linux/pagemap.h> > #include <linux/debugfs.h> > #include <linux/iommu.h> > +#include <linux/hmm.h> > #include "amdgpu.h" > #include "amdgpu_object.h" > #include "amdgpu_trace.h" > @@ -705,11 +706,6 @@ static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo, > /* > * TTM backend functions. > */ > -struct amdgpu_ttm_gup_task_list { > - struct list_head list; > - struct task_struct *task; > -}; > - > struct amdgpu_ttm_tt { > struct ttm_dma_tt ttm; > u64 offset; > @@ -718,85 +714,96 @@ struct amdgpu_ttm_tt { > uint32_t userflags; > spinlock_t guptasklock; > struct list_head guptasks; > - atomic_t mmu_invalidations; > - uint32_t last_set_pages; > + struct hmm_range range; > }; > > /** > - * amdgpu_ttm_tt_get_user_pages - Pin pages of memory pointed to by a USERPTR > - * pointer to memory > + * amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user > + * memory and start HMM tracking CPU page table update > * > - * Called by amdgpu_gem_userptr_ioctl() and amdgpu_cs_parser_bos(). > - * This provides a wrapper around the get_user_pages() call to provide > - * device accessible pages that back user memory. > + * Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only > + * once afterwards to stop HMM tracking > */ > int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) > { > struct amdgpu_ttm_tt *gtt = (void *)ttm; > struct mm_struct *mm = gtt->usertask->mm; > - unsigned int flags = 0; > - unsigned pinned = 0; > - int r; > + unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE; > + struct hmm_range *range = >t->range; > + int r = 0, i; > > if (!mm) /* Happens during process shutdown */ > return -ESRCH; > > - if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY)) > - flags |= FOLL_WRITE; > + amdgpu_hmm_init_range(range); > > down_read(&mm->mmap_sem); > > - if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) { > - /* > - * check that we only use anonymous memory to prevent problems > - * with writeback > - */ > - unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE; > - struct vm_area_struct *vma; > + range->vma = find_vma(mm, gtt->userptr); > + if (!range_in_vma(range->vma, gtt->userptr, end)) > + r = -EFAULT; > + else if ((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) && > + range->vma->vm_file) > + r = -EPERM; > + if (r) > + goto out; > > - vma = find_vma(mm, gtt->userptr); > - if (!vma || vma->vm_file || vma->vm_end < end) { > - up_read(&mm->mmap_sem); > - return -EPERM; > - } > + range->pfns = kvmalloc_array(ttm->num_pages, sizeof(uint64_t), > + GFP_KERNEL); > + if (range->pfns == NULL) { > + r = -ENOMEM; > + goto out; > } > + range->start = gtt->userptr; > + range->end = end; > > - /* loop enough times using contiguous pages of memory */ > - do { > - unsigned num_pages = ttm->num_pages - pinned; > - uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE; > - struct page **p = pages + pinned; > - struct amdgpu_ttm_gup_task_list guptask; > + range->pfns[0] = range->flags[HMM_PFN_VALID]; > + range->pfns[0] |= amdgpu_ttm_tt_is_readonly(ttm) ? > + 0 : range->flags[HMM_PFN_WRITE]; > + for (i = 1; i < ttm->num_pages; i++) > + range->pfns[i] = range->pfns[0]; > > - guptask.task = current; > - spin_lock(>t->guptasklock); > - list_add(&guptask.list, >t->guptasks); > - spin_unlock(>t->guptasklock); > + /* This may trigger page table update */ > + r = hmm_vma_fault(range, true); > + if (r) > + goto out_free_pfns; > > - if (mm == current->mm) > - r = get_user_pages(userptr, num_pages, flags, p, NULL); > - else > - r = get_user_pages_remote(gtt->usertask, > - mm, userptr, num_pages, > - flags, p, NULL, NULL); > + up_read(&mm->mmap_sem); > > - spin_lock(>t->guptasklock); > - list_del(&guptask.list); > - spin_unlock(>t->guptasklock); > + for (i = 0; i < ttm->num_pages; i++) > + pages[i] = hmm_pfn_to_page(range, range->pfns[i]); > > - if (r < 0) > - goto release_pages; > + return 0; > > - pinned += r; > +out_free_pfns: > + kvfree(range->pfns); > + range->pfns = NULL; > +out: > + up_read(&mm->mmap_sem); > + return r; > +} > > - } while (pinned < ttm->num_pages); > +/** > + * amdgpu_ttm_tt_userptr_range_done - stop HMM track the CPU page table change > + * Check if the pages backing this ttm range have been invalidated > + * > + * Returns: true if pages are still valid > + */ > +bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm) > +{ > + struct amdgpu_ttm_tt *gtt = (void *)ttm; > + bool r = false; > > - up_read(&mm->mmap_sem); > - return 0; > + if (!gtt || !gtt->userptr) > + return false; > + > + WARN_ONCE(!gtt->range.pfns, "No user pages to check\n"); > + if (gtt->range.pfns) { > + r = hmm_vma_range_done(>t->range); > + kvfree(gtt->range.pfns); > + gtt->range.pfns = NULL; > + } > > -release_pages: > - release_pages(pages, pinned); > - up_read(&mm->mmap_sem); > return r; > } > > @@ -809,16 +816,10 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) > */ > void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages) > { > - struct amdgpu_ttm_tt *gtt = (void *)ttm; > unsigned i; > > - gtt->last_set_pages = atomic_read(>t->mmu_invalidations); > - for (i = 0; i < ttm->num_pages; ++i) { > - if (ttm->pages[i]) > - put_page(ttm->pages[i]); > - > + for (i = 0; i < ttm->num_pages; ++i) > ttm->pages[i] = pages ? pages[i] : NULL; > - } > } > > /** > @@ -903,10 +904,11 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm) > /* unmap the pages mapped to the device */ > dma_unmap_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction); > > - /* mark the pages as dirty */ > - amdgpu_ttm_tt_mark_user_pages(ttm); > - > sg_free_table(ttm->sg); > + > + if (gtt->range.pfns && > + ttm->pages[0] == hmm_pfn_to_page(>t->range, gtt->range.pfns[0])) > + WARN_ONCE(1, "Missing get_user_page_done\n"); > } > > int amdgpu_ttm_gart_bind(struct amdgpu_device *adev, > @@ -1258,8 +1260,6 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, > > spin_lock_init(>t->guptasklock); > INIT_LIST_HEAD(>t->guptasks); > - atomic_set(>t->mmu_invalidations, 0); > - gtt->last_set_pages = 0; > > return 0; > } > @@ -1289,7 +1289,6 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, > unsigned long end) > { > struct amdgpu_ttm_tt *gtt = (void *)ttm; > - struct amdgpu_ttm_gup_task_list *entry; > unsigned long size; > > if (gtt == NULL || !gtt->userptr) > @@ -1302,48 +1301,20 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, > if (gtt->userptr > end || gtt->userptr + size <= start) > return false; > > - /* Search the lists of tasks that hold this mapping and see > - * if current is one of them. If it is return false. > - */ > - spin_lock(>t->guptasklock); > - list_for_each_entry(entry, >t->guptasks, list) { > - if (entry->task == current) { > - spin_unlock(>t->guptasklock); > - return false; > - } > - } > - spin_unlock(>t->guptasklock); > - > - atomic_inc(>t->mmu_invalidations); > - > return true; > } > > /** > - * amdgpu_ttm_tt_userptr_invalidated - Has the ttm_tt object been invalidated? > - */ > -bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm, > - int *last_invalidated) > -{ > - struct amdgpu_ttm_tt *gtt = (void *)ttm; > - int prev_invalidated = *last_invalidated; > - > - *last_invalidated = atomic_read(>t->mmu_invalidations); > - return prev_invalidated != *last_invalidated; > -} > - > -/** > - * amdgpu_ttm_tt_userptr_needs_pages - Have the pages backing this ttm_tt object > - * been invalidated since the last time they've been set? > + * amdgpu_ttm_tt_is_userptr - Have the pages backing by userptr? > */ > -bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm) > +bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm) > { > struct amdgpu_ttm_tt *gtt = (void *)ttm; > > if (gtt == NULL || !gtt->userptr) > return false; > > - return atomic_read(>t->mmu_invalidations) != gtt->last_set_pages; > + return true; > } > > /** > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h > index b5b2d101f7db..8988c87fff9d 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h > @@ -102,6 +102,7 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo); > int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo); > > int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages); > +bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm); > void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages); > void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm); > int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, > @@ -112,7 +113,7 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, > unsigned long end); > bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm, > int *last_invalidated); > -bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm); > +bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm); > bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm); > uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_mem_reg *mem); > uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, > _______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ^ permalink raw reply [flat|nested] 8+ messages in thread
* [PATCH 0/3] Use HMM to replace get_user_pages
@ 2019-02-04 15:06 Yang, Philip
[not found] ` <20190204150613.5837-1-Philip.Yang-5C7GfCeVMHo@public.gmane.org>
0 siblings, 1 reply; 8+ messages in thread
From: Yang, Philip @ 2019-02-04 15:06 UTC (permalink / raw)
To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org; +Cc: Yang, Philip
Hi Christian,
This patch is rebased to lastest HMM. Please review the GEM and CS part changes
in patch 3/3.
Regards,
Philip Yang (3):
drm/amdgpu: use HMM mirror callback to replace mmu notifier v6
drm/amdkfd: avoid HMM change cause circular lock dependency v2
drm/amdgpu: replace get_user_pages with HMM address mirror helpers v6
drivers/gpu/drm/amd/amdgpu/Kconfig | 6 +-
drivers/gpu/drm/amd/amdgpu/Makefile | 2 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 1 -
.../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 95 +++-------
drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h | 2 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 158 +++++++---------
drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 14 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 164 +++++++++--------
drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h | 6 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 173 ++++++++----------
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 3 +-
.../drm/amd/amdkfd/kfd_device_queue_manager.c | 32 ++--
12 files changed, 282 insertions(+), 374 deletions(-)
--
2.17.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 8+ messages in thread[parent not found: <20190204150613.5837-1-Philip.Yang-5C7GfCeVMHo@public.gmane.org>]
* [PATCH 2/3] drm/amdkfd: avoid HMM change cause circular lock dependency v2 [not found] ` <20190204150613.5837-1-Philip.Yang-5C7GfCeVMHo@public.gmane.org> @ 2019-02-04 15:06 ` Yang, Philip 0 siblings, 0 replies; 8+ messages in thread From: Yang, Philip @ 2019-02-04 15:06 UTC (permalink / raw) To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org; +Cc: Yang, Philip There is circular lock between gfx and kfd path with HMM change: lock(dqm) -> bo::reserve -> amdgpu_mn_lock To avoid this, move init/unint_mqd() out of lock(dqm), to remove nested locking between mmap_sem and bo::reserve. The locking order is: bo::reserve -> amdgpu_mn_lock(p->mn) Change-Id: I2ec09a47571f6b4c8eaef93f22c0a600f5f70153 Signed-off-by: Philip Yang <Philip.Yang@amd.com> --- .../drm/amd/amdkfd/kfd_device_queue_manager.c | 32 ++++++++++--------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 8372556b52eb..efe0d3c0215b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -1156,21 +1156,17 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, int retval; struct mqd_manager *mqd_mgr; - retval = 0; - - dqm_lock(dqm); - if (dqm->total_queue_count >= max_num_of_queues_per_device) { pr_warn("Can't create new usermode queue because %d queues were already created\n", dqm->total_queue_count); retval = -EPERM; - goto out_unlock; + goto out; } if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { retval = allocate_sdma_queue(dqm, &q->sdma_id); if (retval) - goto out_unlock; + goto out; q->properties.sdma_queue_id = q->sdma_id / get_num_sdma_engines(dqm); q->properties.sdma_engine_id = @@ -1181,6 +1177,9 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, if (retval) goto out_deallocate_sdma_queue; + /* Do init_mqd before dqm_lock(dqm) to avoid circular locking order: + * lock(dqm) -> bo::reserve + */ mqd_mgr = dqm->ops.get_mqd_manager(dqm, get_mqd_type_from_queue_type(q->properties.type)); @@ -1188,6 +1187,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, retval = -ENOMEM; goto out_deallocate_doorbell; } + /* * Eviction state logic: we only mark active queues as evicted * to avoid the overhead of restoring inactive queues later @@ -1196,9 +1196,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, q->properties.is_evicted = (q->properties.queue_size > 0 && q->properties.queue_percent > 0 && q->properties.queue_address != 0); - dqm->asic_ops.init_sdma_vm(dqm, q, qpd); - q->properties.tba_addr = qpd->tba_addr; q->properties.tma_addr = qpd->tma_addr; retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj, @@ -1206,6 +1204,8 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, if (retval) goto out_deallocate_doorbell; + dqm_lock(dqm); + list_add(&q->list, &qpd->queues_list); qpd->queue_count++; if (q->properties.is_active) { @@ -1233,9 +1233,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, out_deallocate_sdma_queue: if (q->properties.type == KFD_QUEUE_TYPE_SDMA) deallocate_sdma_queue(dqm, q->sdma_id); -out_unlock: - dqm_unlock(dqm); - +out: return retval; } @@ -1398,8 +1396,6 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, qpd->reset_wavefronts = true; } - mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); - /* * Unconditionally decrement this counter, regardless of the queue's * type @@ -1410,6 +1406,9 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, dqm_unlock(dqm); + /* Do uninit_mqd after dqm_unlock(dqm) to avoid circular locking */ + mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); + return retval; failed: @@ -1631,7 +1630,11 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, qpd->reset_wavefronts = false; } - /* lastly, free mqd resources */ + dqm_unlock(dqm); + + /* Lastly, free mqd resources. + * Do uninit_mqd() after dqm_unlock to avoid circular locking. + */ list_for_each_entry_safe(q, next, &qpd->queues_list, list) { mqd_mgr = dqm->ops.get_mqd_manager(dqm, get_mqd_type_from_queue_type(q->properties.type)); @@ -1645,7 +1648,6 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, } out: - dqm_unlock(dqm); return retval; } -- 2.17.1 _______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 0/3] Use HMM to replace get_user_pages
@ 2019-02-04 18:22 Yang, Philip
[not found] ` <20190204182237.2641-1-Philip.Yang-5C7GfCeVMHo@public.gmane.org>
0 siblings, 1 reply; 8+ messages in thread
From: Yang, Philip @ 2019-02-04 18:22 UTC (permalink / raw)
To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org; +Cc: Yang, Philip
Hi Christian,
This patch is rebased to lastest HMM. Please review the GEM and CS part changes
in patch 3/3.
Thanks,
Philip Yang (3):
drm/amdgpu: use HMM mirror callback to replace mmu notifier v7
drm/amdkfd: avoid HMM change cause circular lock dependency v2
drm/amdgpu: replace get_user_pages with HMM address mirror helpers v6
drivers/gpu/drm/amd/amdgpu/Kconfig | 6 +-
drivers/gpu/drm/amd/amdgpu/Makefile | 2 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 1 -
.../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 95 +++-------
drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h | 2 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 158 +++++++---------
drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 14 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 179 +++++++++---------
drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h | 6 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 173 +++++++----------
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 3 +-
.../drm/amd/amdkfd/kfd_device_queue_manager.c | 32 ++--
12 files changed, 285 insertions(+), 386 deletions(-)
--
2.17.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 8+ messages in thread[parent not found: <20190204182237.2641-1-Philip.Yang-5C7GfCeVMHo@public.gmane.org>]
* [PATCH 2/3] drm/amdkfd: avoid HMM change cause circular lock dependency v2 [not found] ` <20190204182237.2641-1-Philip.Yang-5C7GfCeVMHo@public.gmane.org> @ 2019-02-04 18:23 ` Yang, Philip [not found] ` <20190204182237.2641-3-Philip.Yang-5C7GfCeVMHo@public.gmane.org> 0 siblings, 1 reply; 8+ messages in thread From: Yang, Philip @ 2019-02-04 18:23 UTC (permalink / raw) To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org; +Cc: Yang, Philip There is circular lock between gfx and kfd path with HMM change: lock(dqm) -> bo::reserve -> amdgpu_mn_lock To avoid this, move init/unint_mqd() out of lock(dqm), to remove nested locking between mmap_sem and bo::reserve. The locking order is: bo::reserve -> amdgpu_mn_lock(p->mn) Change-Id: I2ec09a47571f6b4c8eaef93f22c0a600f5f70153 Signed-off-by: Philip Yang <Philip.Yang@amd.com> --- .../drm/amd/amdkfd/kfd_device_queue_manager.c | 32 ++++++++++--------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 8372556b52eb..efe0d3c0215b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -1156,21 +1156,17 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, int retval; struct mqd_manager *mqd_mgr; - retval = 0; - - dqm_lock(dqm); - if (dqm->total_queue_count >= max_num_of_queues_per_device) { pr_warn("Can't create new usermode queue because %d queues were already created\n", dqm->total_queue_count); retval = -EPERM; - goto out_unlock; + goto out; } if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { retval = allocate_sdma_queue(dqm, &q->sdma_id); if (retval) - goto out_unlock; + goto out; q->properties.sdma_queue_id = q->sdma_id / get_num_sdma_engines(dqm); q->properties.sdma_engine_id = @@ -1181,6 +1177,9 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, if (retval) goto out_deallocate_sdma_queue; + /* Do init_mqd before dqm_lock(dqm) to avoid circular locking order: + * lock(dqm) -> bo::reserve + */ mqd_mgr = dqm->ops.get_mqd_manager(dqm, get_mqd_type_from_queue_type(q->properties.type)); @@ -1188,6 +1187,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, retval = -ENOMEM; goto out_deallocate_doorbell; } + /* * Eviction state logic: we only mark active queues as evicted * to avoid the overhead of restoring inactive queues later @@ -1196,9 +1196,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, q->properties.is_evicted = (q->properties.queue_size > 0 && q->properties.queue_percent > 0 && q->properties.queue_address != 0); - dqm->asic_ops.init_sdma_vm(dqm, q, qpd); - q->properties.tba_addr = qpd->tba_addr; q->properties.tma_addr = qpd->tma_addr; retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj, @@ -1206,6 +1204,8 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, if (retval) goto out_deallocate_doorbell; + dqm_lock(dqm); + list_add(&q->list, &qpd->queues_list); qpd->queue_count++; if (q->properties.is_active) { @@ -1233,9 +1233,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, out_deallocate_sdma_queue: if (q->properties.type == KFD_QUEUE_TYPE_SDMA) deallocate_sdma_queue(dqm, q->sdma_id); -out_unlock: - dqm_unlock(dqm); - +out: return retval; } @@ -1398,8 +1396,6 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, qpd->reset_wavefronts = true; } - mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); - /* * Unconditionally decrement this counter, regardless of the queue's * type @@ -1410,6 +1406,9 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, dqm_unlock(dqm); + /* Do uninit_mqd after dqm_unlock(dqm) to avoid circular locking */ + mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); + return retval; failed: @@ -1631,7 +1630,11 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, qpd->reset_wavefronts = false; } - /* lastly, free mqd resources */ + dqm_unlock(dqm); + + /* Lastly, free mqd resources. + * Do uninit_mqd() after dqm_unlock to avoid circular locking. + */ list_for_each_entry_safe(q, next, &qpd->queues_list, list) { mqd_mgr = dqm->ops.get_mqd_manager(dqm, get_mqd_type_from_queue_type(q->properties.type)); @@ -1645,7 +1648,6 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, } out: - dqm_unlock(dqm); return retval; } -- 2.17.1 _______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ^ permalink raw reply related [flat|nested] 8+ messages in thread
[parent not found: <20190204182237.2641-3-Philip.Yang-5C7GfCeVMHo@public.gmane.org>]
* Re: [PATCH 2/3] drm/amdkfd: avoid HMM change cause circular lock dependency v2 [not found] ` <20190204182237.2641-3-Philip.Yang-5C7GfCeVMHo@public.gmane.org> @ 2019-02-05 11:42 ` Christian König 0 siblings, 0 replies; 8+ messages in thread From: Christian König @ 2019-02-05 11:42 UTC (permalink / raw) To: Yang, Philip, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org Am 04.02.19 um 19:23 schrieb Yang, Philip: > There is circular lock between gfx and kfd path with HMM change: > lock(dqm) -> bo::reserve -> amdgpu_mn_lock > > To avoid this, move init/unint_mqd() out of lock(dqm), to remove nested > locking between mmap_sem and bo::reserve. The locking order > is: bo::reserve -> amdgpu_mn_lock(p->mn) In general this sounds correct to me, but apart from that I don't know the code well enough to fully judge. > > Change-Id: I2ec09a47571f6b4c8eaef93f22c0a600f5f70153 > Signed-off-by: Philip Yang <Philip.Yang@amd.com> Acked-by: Christian König <christian.koenig@amd.com> > --- > .../drm/amd/amdkfd/kfd_device_queue_manager.c | 32 ++++++++++--------- > 1 file changed, 17 insertions(+), 15 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c > index 8372556b52eb..efe0d3c0215b 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c > @@ -1156,21 +1156,17 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, > int retval; > struct mqd_manager *mqd_mgr; > > - retval = 0; > - > - dqm_lock(dqm); > - > if (dqm->total_queue_count >= max_num_of_queues_per_device) { > pr_warn("Can't create new usermode queue because %d queues were already created\n", > dqm->total_queue_count); > retval = -EPERM; > - goto out_unlock; > + goto out; > } > > if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { > retval = allocate_sdma_queue(dqm, &q->sdma_id); > if (retval) > - goto out_unlock; > + goto out; > q->properties.sdma_queue_id = > q->sdma_id / get_num_sdma_engines(dqm); > q->properties.sdma_engine_id = > @@ -1181,6 +1177,9 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, > if (retval) > goto out_deallocate_sdma_queue; > > + /* Do init_mqd before dqm_lock(dqm) to avoid circular locking order: > + * lock(dqm) -> bo::reserve > + */ > mqd_mgr = dqm->ops.get_mqd_manager(dqm, > get_mqd_type_from_queue_type(q->properties.type)); > > @@ -1188,6 +1187,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, > retval = -ENOMEM; > goto out_deallocate_doorbell; > } > + > /* > * Eviction state logic: we only mark active queues as evicted > * to avoid the overhead of restoring inactive queues later > @@ -1196,9 +1196,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, > q->properties.is_evicted = (q->properties.queue_size > 0 && > q->properties.queue_percent > 0 && > q->properties.queue_address != 0); > - > dqm->asic_ops.init_sdma_vm(dqm, q, qpd); > - > q->properties.tba_addr = qpd->tba_addr; > q->properties.tma_addr = qpd->tma_addr; > retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj, > @@ -1206,6 +1204,8 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, > if (retval) > goto out_deallocate_doorbell; > > + dqm_lock(dqm); > + > list_add(&q->list, &qpd->queues_list); > qpd->queue_count++; > if (q->properties.is_active) { > @@ -1233,9 +1233,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, > out_deallocate_sdma_queue: > if (q->properties.type == KFD_QUEUE_TYPE_SDMA) > deallocate_sdma_queue(dqm, q->sdma_id); > -out_unlock: > - dqm_unlock(dqm); > - > +out: > return retval; > } > > @@ -1398,8 +1396,6 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, > qpd->reset_wavefronts = true; > } > > - mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); > - > /* > * Unconditionally decrement this counter, regardless of the queue's > * type > @@ -1410,6 +1406,9 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, > > dqm_unlock(dqm); > > + /* Do uninit_mqd after dqm_unlock(dqm) to avoid circular locking */ > + mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); > + > return retval; > > failed: > @@ -1631,7 +1630,11 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, > qpd->reset_wavefronts = false; > } > > - /* lastly, free mqd resources */ > + dqm_unlock(dqm); > + > + /* Lastly, free mqd resources. > + * Do uninit_mqd() after dqm_unlock to avoid circular locking. > + */ > list_for_each_entry_safe(q, next, &qpd->queues_list, list) { > mqd_mgr = dqm->ops.get_mqd_manager(dqm, > get_mqd_type_from_queue_type(q->properties.type)); > @@ -1645,7 +1648,6 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, > } > > out: > - dqm_unlock(dqm); > return retval; > } > _______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ^ permalink raw reply [flat|nested] 8+ messages in thread
* [PATCH 0/3] Use HMM to replace get_user_pages
@ 2019-02-06 16:26 Yang, Philip
[not found] ` <20190206162556.11512-1-Philip.Yang-5C7GfCeVMHo@public.gmane.org>
0 siblings, 1 reply; 8+ messages in thread
From: Yang, Philip @ 2019-02-06 16:26 UTC (permalink / raw)
To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org; +Cc: Yang, Philip
Hi Christian,
Resend patch 1/3, 2/3, added Reviewed-by in comments.
Change in patch 3/3, amdgpu_cs_submit, amdgpu_cs_ioctl return -EAGAIN
to user space to retry cs_ioctl.
Regards,
Philip
Philip Yang (3):
drm/amdgpu: use HMM mirror callback to replace mmu notifier v7
drm/amdkfd: avoid HMM change cause circular lock dependency v2
drm/amdgpu: replace get_user_pages with HMM address mirror helpers v8
drivers/gpu/drm/amd/amdgpu/Kconfig | 6 +-
drivers/gpu/drm/amd/amdgpu/Makefile | 2 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 1 -
.../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 95 +++-------
drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h | 2 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 138 +++++---------
drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 14 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 179 +++++++++---------
drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h | 6 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 178 +++++++----------
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 3 +-
.../drm/amd/amdkfd/kfd_device_queue_manager.c | 32 ++--
12 files changed, 269 insertions(+), 387 deletions(-)
--
2.17.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 8+ messages in thread[parent not found: <20190206162556.11512-1-Philip.Yang-5C7GfCeVMHo@public.gmane.org>]
* [PATCH 2/3] drm/amdkfd: avoid HMM change cause circular lock dependency v2 [not found] ` <20190206162556.11512-1-Philip.Yang-5C7GfCeVMHo@public.gmane.org> @ 2019-02-06 16:26 ` Yang, Philip 0 siblings, 0 replies; 8+ messages in thread From: Yang, Philip @ 2019-02-06 16:26 UTC (permalink / raw) To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org; +Cc: Yang, Philip There is circular lock between gfx and kfd path with HMM change: lock(dqm) -> bo::reserve -> amdgpu_mn_lock To avoid this, move init/unint_mqd() out of lock(dqm), to remove nested locking between mmap_sem and bo::reserve. The locking order is: bo::reserve -> amdgpu_mn_lock(p->mn) Change-Id: I2ec09a47571f6b4c8eaef93f22c0a600f5f70153 Signed-off-by: Philip Yang <Philip.Yang@amd.com> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> Acked-by: Christian König <christian.koenig@amd.com> --- .../drm/amd/amdkfd/kfd_device_queue_manager.c | 32 ++++++++++--------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 8372556b52eb..efe0d3c0215b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -1156,21 +1156,17 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, int retval; struct mqd_manager *mqd_mgr; - retval = 0; - - dqm_lock(dqm); - if (dqm->total_queue_count >= max_num_of_queues_per_device) { pr_warn("Can't create new usermode queue because %d queues were already created\n", dqm->total_queue_count); retval = -EPERM; - goto out_unlock; + goto out; } if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { retval = allocate_sdma_queue(dqm, &q->sdma_id); if (retval) - goto out_unlock; + goto out; q->properties.sdma_queue_id = q->sdma_id / get_num_sdma_engines(dqm); q->properties.sdma_engine_id = @@ -1181,6 +1177,9 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, if (retval) goto out_deallocate_sdma_queue; + /* Do init_mqd before dqm_lock(dqm) to avoid circular locking order: + * lock(dqm) -> bo::reserve + */ mqd_mgr = dqm->ops.get_mqd_manager(dqm, get_mqd_type_from_queue_type(q->properties.type)); @@ -1188,6 +1187,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, retval = -ENOMEM; goto out_deallocate_doorbell; } + /* * Eviction state logic: we only mark active queues as evicted * to avoid the overhead of restoring inactive queues later @@ -1196,9 +1196,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, q->properties.is_evicted = (q->properties.queue_size > 0 && q->properties.queue_percent > 0 && q->properties.queue_address != 0); - dqm->asic_ops.init_sdma_vm(dqm, q, qpd); - q->properties.tba_addr = qpd->tba_addr; q->properties.tma_addr = qpd->tma_addr; retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj, @@ -1206,6 +1204,8 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, if (retval) goto out_deallocate_doorbell; + dqm_lock(dqm); + list_add(&q->list, &qpd->queues_list); qpd->queue_count++; if (q->properties.is_active) { @@ -1233,9 +1233,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, out_deallocate_sdma_queue: if (q->properties.type == KFD_QUEUE_TYPE_SDMA) deallocate_sdma_queue(dqm, q->sdma_id); -out_unlock: - dqm_unlock(dqm); - +out: return retval; } @@ -1398,8 +1396,6 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, qpd->reset_wavefronts = true; } - mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); - /* * Unconditionally decrement this counter, regardless of the queue's * type @@ -1410,6 +1406,9 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, dqm_unlock(dqm); + /* Do uninit_mqd after dqm_unlock(dqm) to avoid circular locking */ + mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); + return retval; failed: @@ -1631,7 +1630,11 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, qpd->reset_wavefronts = false; } - /* lastly, free mqd resources */ + dqm_unlock(dqm); + + /* Lastly, free mqd resources. + * Do uninit_mqd() after dqm_unlock to avoid circular locking. + */ list_for_each_entry_safe(q, next, &qpd->queues_list, list) { mqd_mgr = dqm->ops.get_mqd_manager(dqm, get_mqd_type_from_queue_type(q->properties.type)); @@ -1645,7 +1648,6 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, } out: - dqm_unlock(dqm); return retval; } -- 2.17.1 _______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ^ permalink raw reply related [flat|nested] 8+ messages in thread
end of thread, other threads:[~2019-02-06 16:26 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2019-01-10 17:02 [PATCH 1/3] drm/amdgpu: use HMM mirror callback to replace mmu notifier v6 Yang, Philip
[not found] ` <20190110170228.10917-1-Philip.Yang-5C7GfCeVMHo@public.gmane.org>
2019-01-10 17:02 ` [PATCH 2/3] drm/amdkfd: avoid HMM change cause circular lock dependency v2 Yang, Philip
2019-01-10 17:02 ` [PATCH 3/3] drm/amdgpu: replace get_user_pages with HMM address mirror helpers v6 Yang, Philip
[not found] ` <20190110170228.10917-3-Philip.Yang-5C7GfCeVMHo@public.gmane.org>
2019-01-14 15:33 ` Yang, Philip
-- strict thread matches above, loose matches on Subject: below --
2019-02-04 15:06 [PATCH 0/3] Use HMM to replace get_user_pages Yang, Philip
[not found] ` <20190204150613.5837-1-Philip.Yang-5C7GfCeVMHo@public.gmane.org>
2019-02-04 15:06 ` [PATCH 2/3] drm/amdkfd: avoid HMM change cause circular lock dependency v2 Yang, Philip
2019-02-04 18:22 [PATCH 0/3] Use HMM to replace get_user_pages Yang, Philip
[not found] ` <20190204182237.2641-1-Philip.Yang-5C7GfCeVMHo@public.gmane.org>
2019-02-04 18:23 ` [PATCH 2/3] drm/amdkfd: avoid HMM change cause circular lock dependency v2 Yang, Philip
[not found] ` <20190204182237.2641-3-Philip.Yang-5C7GfCeVMHo@public.gmane.org>
2019-02-05 11:42 ` Christian König
2019-02-06 16:26 [PATCH 0/3] Use HMM to replace get_user_pages Yang, Philip
[not found] ` <20190206162556.11512-1-Philip.Yang-5C7GfCeVMHo@public.gmane.org>
2019-02-06 16:26 ` [PATCH 2/3] drm/amdkfd: avoid HMM change cause circular lock dependency v2 Yang, Philip
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox