* [PATCH 0/2] Split out dma fence array and invalidate media_gt TLBs in PT code
@ 2024-08-26 16:54 Matthew Brost
2024-08-26 16:54 ` [PATCH 1/2] dma-buf: Split out dma fence array create into alloc and arm Matthew Brost
2024-08-26 16:54 ` [PATCH 2/2] drm/xe: Invalidate media_gt TLBs in PT code Matthew Brost
0 siblings, 2 replies; 3+ messages in thread
From: Matthew Brost @ 2024-08-26 16:54 UTC (permalink / raw)
To: intel-xe, linux-media, dri-devel
Cc: thomas.hellstrom, sumit.semwal, christian.koenig
Respin of [1] [2] based on CI and review feedback.
Matt
Matthew Brost (2):
dma-buf: Split out dma fence array create into alloc and arm
drm/xe: Invalidate media_gt TLBs in PT code
drivers/dma-buf/dma-fence-array.c | 19 +++--
drivers/gpu/drm/xe/xe_pt.c | 117 ++++++++++++++++++++++++------
include/linux/dma-fence-array.h | 9 +--
3 files changed, 109 insertions(+), 36 deletions(-)
--
2.34.1
^ permalink raw reply [flat|nested] 3+ messages in thread
* [PATCH 1/2] dma-buf: Split out dma fence array create into alloc and arm
2024-08-26 16:54 [PATCH 0/2] Split out dma fence array and invalidate media_gt TLBs in PT code Matthew Brost
@ 2024-08-26 16:54 ` Matthew Brost
2024-08-26 16:54 ` [PATCH 2/2] drm/xe: Invalidate media_gt TLBs in PT code Matthew Brost
1 sibling, 0 replies; 3+ messages in thread
From: Matthew Brost @ 2024-08-26 16:54 UTC (permalink / raw)
To: intel-xe, linux-media, dri-devel
Cc: thomas.hellstrom, sumit.semwal, christian.koenig
Useful to preallocate dma fence array and then arm in path of reclaim or
a dma fence.
v2:
- s/arm/init (Christian)
- Drop !array warn (Christian)
Cc: Sumit Semwal <sumit.semwal@linaro.org>
Cc: Christian König <christian.koenig@amd.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
---
drivers/dma-buf/dma-fence-array.c | 19 +++++++++----------
include/linux/dma-fence-array.h | 9 ++++-----
2 files changed, 13 insertions(+), 15 deletions(-)
diff --git a/drivers/dma-buf/dma-fence-array.c b/drivers/dma-buf/dma-fence-array.c
index b03e0a87a5cd..0659e6b29b3c 100644
--- a/drivers/dma-buf/dma-fence-array.c
+++ b/drivers/dma-buf/dma-fence-array.c
@@ -158,7 +158,7 @@ struct dma_fence_array *dma_fence_array_alloc(int num_fences)
EXPORT_SYMBOL(dma_fence_array_alloc);
/**
- * dma_fence_array_arm - Arm a custom fence array
+ * dma_fence_array_init - Arm a custom fence array
* @array: [in] dma fence array to arm
* @num_fences: [in] number of fences to add in the array
* @fences: [in] array containing the fences
@@ -169,13 +169,12 @@ EXPORT_SYMBOL(dma_fence_array_alloc);
* Implementation of @dma_fence_array_create without allocation. Useful to arm a
* preallocated dma fence fence in the path of reclaim or dma fence signaling.
*/
-void dma_fence_array_arm(struct dma_fence_array *array,
- int num_fences,
- struct dma_fence **fences,
- u64 context, unsigned seqno,
- bool signal_on_any)
+void dma_fence_array_init(struct dma_fence_array *array,
+ int num_fences, struct dma_fence **fences,
+ u64 context, unsigned seqno,
+ bool signal_on_any)
{
- WARN_ON(!array || !num_fences || !fences);
+ WARN_ON(!num_fences || !fences);
array->num_fences = num_fences;
@@ -203,7 +202,7 @@ void dma_fence_array_arm(struct dma_fence_array *array,
while (num_fences--)
WARN_ON(dma_fence_is_container(fences[num_fences]));
}
-EXPORT_SYMBOL(dma_fence_array_arm);
+EXPORT_SYMBOL(dma_fence_array_init);
/**
* dma_fence_array_create - Create a custom fence array
@@ -235,8 +234,8 @@ struct dma_fence_array *dma_fence_array_create(int num_fences,
if (!array)
return NULL;
- dma_fence_array_arm(array, num_fences, fences,
- context, seqno, signal_on_any);
+ dma_fence_array_init(array, num_fences, fences,
+ context, seqno, signal_on_any);
return array;
}
diff --git a/include/linux/dma-fence-array.h b/include/linux/dma-fence-array.h
index 3466ffc4b803..079b3dec0a16 100644
--- a/include/linux/dma-fence-array.h
+++ b/include/linux/dma-fence-array.h
@@ -80,11 +80,10 @@ to_dma_fence_array(struct dma_fence *fence)
++(index), fence = dma_fence_array_next(head, index))
struct dma_fence_array *dma_fence_array_alloc(int num_fences);
-void dma_fence_array_arm(struct dma_fence_array *array,
- int num_fences,
- struct dma_fence **fences,
- u64 context, unsigned seqno,
- bool signal_on_any);
+void dma_fence_array_init(struct dma_fence_array *array,
+ int num_fences, struct dma_fence **fences,
+ u64 context, unsigned seqno,
+ bool signal_on_any);
struct dma_fence_array *dma_fence_array_create(int num_fences,
struct dma_fence **fences,
--
2.34.1
^ permalink raw reply related [flat|nested] 3+ messages in thread
* [PATCH 2/2] drm/xe: Invalidate media_gt TLBs in PT code
2024-08-26 16:54 [PATCH 0/2] Split out dma fence array and invalidate media_gt TLBs in PT code Matthew Brost
2024-08-26 16:54 ` [PATCH 1/2] dma-buf: Split out dma fence array create into alloc and arm Matthew Brost
@ 2024-08-26 16:54 ` Matthew Brost
1 sibling, 0 replies; 3+ messages in thread
From: Matthew Brost @ 2024-08-26 16:54 UTC (permalink / raw)
To: intel-xe, linux-media, dri-devel
Cc: thomas.hellstrom, sumit.semwal, christian.koenig
Testing on LNL has shown media GT's TLBs need to be invalidated via the
GuC, update PT code appropriately.
v2:
- Do dma_fence_get before first call of invalidation_fence_init (Himal)
- No need to check for valid chain fence (Himal)
v3:
- Use dma-fence-array
Fixes: 3330361543fc ("drm/xe/lnl: Add LNL platform definition")
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
---
drivers/gpu/drm/xe/xe_pt.c | 117 ++++++++++++++++++++++++++++++-------
1 file changed, 96 insertions(+), 21 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index 579ed31b46db..2e35444a85b0 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -3,6 +3,8 @@
* Copyright © 2022 Intel Corporation
*/
+#include <linux/dma-fence-array.h>
+
#include "xe_pt.h"
#include "regs/xe_gtt_defs.h"
@@ -1627,9 +1629,11 @@ xe_pt_update_ops_rfence_interval(struct xe_vm_pgtable_update_ops *pt_update_ops,
static int vma_reserve_fences(struct xe_device *xe, struct xe_vma *vma)
{
+ int shift = xe_device_get_root_tile(xe)->media_gt ? 1 : 0;
+
if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm)
return dma_resv_reserve_fences(xe_vma_bo(vma)->ttm.base.resv,
- xe->info.tile_count);
+ xe->info.tile_count << shift);
return 0;
}
@@ -1816,6 +1820,7 @@ int xe_pt_update_ops_prepare(struct xe_tile *tile, struct xe_vma_ops *vops)
struct xe_vm_pgtable_update_ops *pt_update_ops =
&vops->pt_update_ops[tile->id];
struct xe_vma_op *op;
+ int shift = tile->media_gt ? 1 : 0;
int err;
lockdep_assert_held(&vops->vm->lock);
@@ -1824,7 +1829,7 @@ int xe_pt_update_ops_prepare(struct xe_tile *tile, struct xe_vma_ops *vops)
xe_pt_update_ops_init(pt_update_ops);
err = dma_resv_reserve_fences(xe_vm_resv(vops->vm),
- tile_to_xe(tile)->info.tile_count);
+ tile_to_xe(tile)->info.tile_count << shift);
if (err)
return err;
@@ -1849,13 +1854,20 @@ int xe_pt_update_ops_prepare(struct xe_tile *tile, struct xe_vma_ops *vops)
static void bind_op_commit(struct xe_vm *vm, struct xe_tile *tile,
struct xe_vm_pgtable_update_ops *pt_update_ops,
- struct xe_vma *vma, struct dma_fence *fence)
+ struct xe_vma *vma, struct dma_fence *fence,
+ struct dma_fence *fence2)
{
- if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm)
+ if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) {
dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence,
pt_update_ops->wait_vm_bookkeep ?
DMA_RESV_USAGE_KERNEL :
DMA_RESV_USAGE_BOOKKEEP);
+ if (fence2)
+ dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence2,
+ pt_update_ops->wait_vm_bookkeep ?
+ DMA_RESV_USAGE_KERNEL :
+ DMA_RESV_USAGE_BOOKKEEP);
+ }
vma->tile_present |= BIT(tile->id);
vma->tile_staged &= ~BIT(tile->id);
if (xe_vma_is_userptr(vma)) {
@@ -1875,13 +1887,20 @@ static void bind_op_commit(struct xe_vm *vm, struct xe_tile *tile,
static void unbind_op_commit(struct xe_vm *vm, struct xe_tile *tile,
struct xe_vm_pgtable_update_ops *pt_update_ops,
- struct xe_vma *vma, struct dma_fence *fence)
+ struct xe_vma *vma, struct dma_fence *fence,
+ struct dma_fence *fence2)
{
- if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm)
+ if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) {
dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence,
pt_update_ops->wait_vm_bookkeep ?
DMA_RESV_USAGE_KERNEL :
DMA_RESV_USAGE_BOOKKEEP);
+ if (fence2)
+ dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence2,
+ pt_update_ops->wait_vm_bookkeep ?
+ DMA_RESV_USAGE_KERNEL :
+ DMA_RESV_USAGE_BOOKKEEP);
+ }
vma->tile_present &= ~BIT(tile->id);
if (!vma->tile_present) {
list_del_init(&vma->combined_links.rebind);
@@ -1898,7 +1917,8 @@ static void unbind_op_commit(struct xe_vm *vm, struct xe_tile *tile,
static void op_commit(struct xe_vm *vm,
struct xe_tile *tile,
struct xe_vm_pgtable_update_ops *pt_update_ops,
- struct xe_vma_op *op, struct dma_fence *fence)
+ struct xe_vma_op *op, struct dma_fence *fence,
+ struct dma_fence *fence2)
{
xe_vm_assert_held(vm);
@@ -1907,26 +1927,28 @@ static void op_commit(struct xe_vm *vm,
if (!op->map.immediate && xe_vm_in_fault_mode(vm))
break;
- bind_op_commit(vm, tile, pt_update_ops, op->map.vma, fence);
+ bind_op_commit(vm, tile, pt_update_ops, op->map.vma, fence,
+ fence2);
break;
case DRM_GPUVA_OP_REMAP:
unbind_op_commit(vm, tile, pt_update_ops,
- gpuva_to_vma(op->base.remap.unmap->va), fence);
+ gpuva_to_vma(op->base.remap.unmap->va), fence,
+ fence2);
if (op->remap.prev)
bind_op_commit(vm, tile, pt_update_ops, op->remap.prev,
- fence);
+ fence, fence2);
if (op->remap.next)
bind_op_commit(vm, tile, pt_update_ops, op->remap.next,
- fence);
+ fence, fence2);
break;
case DRM_GPUVA_OP_UNMAP:
unbind_op_commit(vm, tile, pt_update_ops,
- gpuva_to_vma(op->base.unmap.va), fence);
+ gpuva_to_vma(op->base.unmap.va), fence, fence2);
break;
case DRM_GPUVA_OP_PREFETCH:
bind_op_commit(vm, tile, pt_update_ops,
- gpuva_to_vma(op->base.prefetch.va), fence);
+ gpuva_to_vma(op->base.prefetch.va), fence, fence2);
break;
default:
drm_warn(&vm->xe->drm, "NOT POSSIBLE");
@@ -1963,7 +1985,9 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
struct xe_vm_pgtable_update_ops *pt_update_ops =
&vops->pt_update_ops[tile->id];
struct dma_fence *fence;
- struct invalidation_fence *ifence = NULL;
+ struct invalidation_fence *ifence = NULL, *mfence = NULL;
+ struct dma_fence **fences = NULL;
+ struct dma_fence_array *cf = NULL;
struct xe_range_fence *rfence;
struct xe_vma_op *op;
int err = 0, i;
@@ -1996,6 +2020,23 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
err = -ENOMEM;
goto kill_vm_tile1;
}
+ if (tile->media_gt) {
+ mfence = kzalloc(sizeof(*ifence), GFP_KERNEL);
+ if (!mfence) {
+ err = -ENOMEM;
+ goto free_ifence;
+ }
+ fences = kmalloc_array(2, sizeof(*fences), GFP_KERNEL);
+ if (!fences) {
+ err = -ENOMEM;
+ goto free_ifence;
+ }
+ cf = dma_fence_array_alloc(2);
+ if (!cf) {
+ err = -ENOMEM;
+ goto free_ifence;
+ }
+ }
}
rfence = kzalloc(sizeof(*rfence), GFP_KERNEL);
@@ -2027,19 +2068,50 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
/* tlb invalidation must be done before signaling rebind */
if (ifence) {
+ if (mfence)
+ dma_fence_get(fence);
invalidation_fence_init(tile->primary_gt, ifence, fence,
pt_update_ops->start,
pt_update_ops->last, vm->usm.asid);
- fence = &ifence->base.base;
+ if (mfence) {
+ invalidation_fence_init(tile->media_gt, mfence, fence,
+ pt_update_ops->start,
+ pt_update_ops->last, vm->usm.asid);
+ fences[0] = &ifence->base.base;
+ fences[1] = &mfence->base.base;
+ dma_fence_array_arm(cf, 2, fences,
+ vm->composite_fence_ctx,
+ vm->composite_fence_seqno++,
+ false);
+ fence = &cf->base;
+ } else {
+ fence = &ifence->base.base;
+ }
}
- dma_resv_add_fence(xe_vm_resv(vm), fence,
- pt_update_ops->wait_vm_bookkeep ?
- DMA_RESV_USAGE_KERNEL :
- DMA_RESV_USAGE_BOOKKEEP);
+ if (!mfence) {
+ dma_resv_add_fence(xe_vm_resv(vm), fence,
+ pt_update_ops->wait_vm_bookkeep ?
+ DMA_RESV_USAGE_KERNEL :
+ DMA_RESV_USAGE_BOOKKEEP);
- list_for_each_entry(op, &vops->list, link)
- op_commit(vops->vm, tile, pt_update_ops, op, fence);
+ list_for_each_entry(op, &vops->list, link)
+ op_commit(vops->vm, tile, pt_update_ops, op, fence, NULL);
+ } else {
+ dma_resv_add_fence(xe_vm_resv(vm), &ifence->base.base,
+ pt_update_ops->wait_vm_bookkeep ?
+ DMA_RESV_USAGE_KERNEL :
+ DMA_RESV_USAGE_BOOKKEEP);
+
+ dma_resv_add_fence(xe_vm_resv(vm), &mfence->base.base,
+ pt_update_ops->wait_vm_bookkeep ?
+ DMA_RESV_USAGE_KERNEL :
+ DMA_RESV_USAGE_BOOKKEEP);
+
+ list_for_each_entry(op, &vops->list, link)
+ op_commit(vops->vm, tile, pt_update_ops, op,
+ &ifence->base.base, &mfence->base.base);
+ }
if (pt_update_ops->needs_userptr_lock)
up_read(&vm->userptr.notifier_lock);
@@ -2049,6 +2121,9 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
free_rfence:
kfree(rfence);
free_ifence:
+ kfree(cf);
+ kfree(fences);
+ kfree(mfence);
kfree(ifence);
kill_vm_tile1:
if (err != -EAGAIN && tile->id)
--
2.34.1
^ permalink raw reply related [flat|nested] 3+ messages in thread
end of thread, other threads:[~2024-08-26 16:54 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-08-26 16:54 [PATCH 0/2] Split out dma fence array and invalidate media_gt TLBs in PT code Matthew Brost
2024-08-26 16:54 ` [PATCH 1/2] dma-buf: Split out dma fence array create into alloc and arm Matthew Brost
2024-08-26 16:54 ` [PATCH 2/2] drm/xe: Invalidate media_gt TLBs in PT code Matthew Brost
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox