* [PATCH 1/2] amdgpu: add the amdgpu_vm ptr in the vm_bo_map/unmap events
@ 2024-06-03 8:46 Pierre-Eric Pelloux-Prayer
2024-06-03 8:46 ` [PATCH 2/2] amdgpu: don't dereference a NULL resource in sysfs code Pierre-Eric Pelloux-Prayer
2024-06-03 9:58 ` [PATCH 1/2] amdgpu: add the amdgpu_vm ptr in the vm_bo_map/unmap events Christian König
0 siblings, 2 replies; 7+ messages in thread
From: Pierre-Eric Pelloux-Prayer @ 2024-06-03 8:46 UTC (permalink / raw)
To: alexander.deucher, christian.koenig, amd-gfx; +Cc: Pierre-Eric Pelloux-Prayer
These 2 traces events are tied to a specific VM so in order for them
to be useful for a tool we need to trace the amdgpu_vm as well.
Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 20 ++++++++++++--------
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 8 ++++----
2 files changed, 16 insertions(+), 12 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index f539b1d00234..c84050d318d6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -243,10 +243,11 @@ TRACE_EVENT(amdgpu_vm_grab_id,
);
TRACE_EVENT(amdgpu_vm_bo_map,
- TP_PROTO(struct amdgpu_bo_va *bo_va,
+ TP_PROTO(struct amdgpu_vm *vm, struct amdgpu_bo_va *bo_va,
struct amdgpu_bo_va_mapping *mapping),
- TP_ARGS(bo_va, mapping),
+ TP_ARGS(vm, bo_va, mapping),
TP_STRUCT__entry(
+ __field(struct amdgpu_vm *, vm)
__field(struct amdgpu_bo *, bo)
__field(long, start)
__field(long, last)
@@ -255,22 +256,24 @@ TRACE_EVENT(amdgpu_vm_bo_map,
),
TP_fast_assign(
+ __entry->vm = vm;
__entry->bo = bo_va ? bo_va->base.bo : NULL;
__entry->start = mapping->start;
__entry->last = mapping->last;
__entry->offset = mapping->offset;
__entry->flags = mapping->flags;
),
- TP_printk("bo=%p, start=%lx, last=%lx, offset=%010llx, flags=%llx",
- __entry->bo, __entry->start, __entry->last,
+ TP_printk("vm=%p bo=%p, start=%lx, last=%lx, offset=%010llx, flags=%llx",
+ __entry->vm, __entry->bo, __entry->start, __entry->last,
__entry->offset, __entry->flags)
);
TRACE_EVENT(amdgpu_vm_bo_unmap,
- TP_PROTO(struct amdgpu_bo_va *bo_va,
+ TP_PROTO(struct amdgpu_vm *vm, struct amdgpu_bo_va *bo_va,
struct amdgpu_bo_va_mapping *mapping),
- TP_ARGS(bo_va, mapping),
+ TP_ARGS(vm, bo_va, mapping),
TP_STRUCT__entry(
+ __field(struct amdgpu_vm *, vm)
__field(struct amdgpu_bo *, bo)
__field(long, start)
__field(long, last)
@@ -279,14 +282,15 @@ TRACE_EVENT(amdgpu_vm_bo_unmap,
),
TP_fast_assign(
+ __entry->vm = vm;
__entry->bo = bo_va ? bo_va->base.bo : NULL;
__entry->start = mapping->start;
__entry->last = mapping->last;
__entry->offset = mapping->offset;
__entry->flags = mapping->flags;
),
- TP_printk("bo=%p, start=%lx, last=%lx, offset=%010llx, flags=%llx",
- __entry->bo, __entry->start, __entry->last,
+ TP_printk("vm=%p bo=%p, start=%lx, last=%lx, offset=%010llx, flags=%llx",
+ __entry->vm, __entry->bo, __entry->start, __entry->last,
__entry->offset, __entry->flags)
);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 3abfa66d72a2..e04928d2e26a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1642,7 +1642,7 @@ static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev,
if (amdgpu_vm_is_bo_always_valid(vm, bo) && !bo_va->base.moved)
amdgpu_vm_bo_moved(&bo_va->base);
- trace_amdgpu_vm_bo_map(bo_va, mapping);
+ trace_amdgpu_vm_bo_map(vm, bo_va, mapping);
}
/* Validate operation parameters to prevent potential abuse */
@@ -1834,7 +1834,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
list_del(&mapping->list);
amdgpu_vm_it_remove(mapping, &vm->va);
mapping->bo_va = NULL;
- trace_amdgpu_vm_bo_unmap(bo_va, mapping);
+ trace_amdgpu_vm_bo_unmap(vm, bo_va, mapping);
if (valid)
list_add(&mapping->list, &vm->freed);
@@ -1929,7 +1929,7 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
tmp->bo_va = NULL;
list_add(&tmp->list, &vm->freed);
- trace_amdgpu_vm_bo_unmap(NULL, tmp);
+ trace_amdgpu_vm_bo_unmap(vm, NULL, tmp);
}
/* Insert partial mapping before the range */
@@ -2056,7 +2056,7 @@ void amdgpu_vm_bo_del(struct amdgpu_device *adev,
list_del(&mapping->list);
amdgpu_vm_it_remove(mapping, &vm->va);
mapping->bo_va = NULL;
- trace_amdgpu_vm_bo_unmap(bo_va, mapping);
+ trace_amdgpu_vm_bo_unmap(vm, bo_va, mapping);
list_add(&mapping->list, &vm->freed);
}
list_for_each_entry_safe(mapping, next, &bo_va->invalids, list) {
--
2.40.1
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 2/2] amdgpu: don't dereference a NULL resource in sysfs code
2024-06-03 8:46 [PATCH 1/2] amdgpu: add the amdgpu_vm ptr in the vm_bo_map/unmap events Pierre-Eric Pelloux-Prayer
@ 2024-06-03 8:46 ` Pierre-Eric Pelloux-Prayer
2024-06-06 7:14 ` Christian König
2024-06-03 9:58 ` [PATCH 1/2] amdgpu: add the amdgpu_vm ptr in the vm_bo_map/unmap events Christian König
1 sibling, 1 reply; 7+ messages in thread
From: Pierre-Eric Pelloux-Prayer @ 2024-06-03 8:46 UTC (permalink / raw)
To: alexander.deucher, christian.koenig, amd-gfx; +Cc: Pierre-Eric Pelloux-Prayer
dma_resv_trylock being successful doesn't guarantee that bo->tbo.base.resv
is not NULL, so check its validity before using it.
Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 63 +++++++++++-----------
1 file changed, 33 insertions(+), 30 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 1eadcad1856d..6faeb9e4a572 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -1594,36 +1594,39 @@ u64 amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, struct seq_file *m)
u64 size;
if (dma_resv_trylock(bo->tbo.base.resv)) {
-
- switch (bo->tbo.resource->mem_type) {
- case TTM_PL_VRAM:
- if (amdgpu_res_cpu_visible(adev, bo->tbo.resource))
- placement = "VRAM VISIBLE";
- else
- placement = "VRAM";
- break;
- case TTM_PL_TT:
- placement = "GTT";
- break;
- case AMDGPU_PL_GDS:
- placement = "GDS";
- break;
- case AMDGPU_PL_GWS:
- placement = "GWS";
- break;
- case AMDGPU_PL_OA:
- placement = "OA";
- break;
- case AMDGPU_PL_PREEMPT:
- placement = "PREEMPTIBLE";
- break;
- case AMDGPU_PL_DOORBELL:
- placement = "DOORBELL";
- break;
- case TTM_PL_SYSTEM:
- default:
- placement = "CPU";
- break;
+ if (!bo->tbo.resource) {
+ placement = "NONE";
+ } else {
+ switch (bo->tbo.resource->mem_type) {
+ case TTM_PL_VRAM:
+ if (amdgpu_res_cpu_visible(adev, bo->tbo.resource))
+ placement = "VRAM VISIBLE";
+ else
+ placement = "VRAM";
+ break;
+ case TTM_PL_TT:
+ placement = "GTT";
+ break;
+ case AMDGPU_PL_GDS:
+ placement = "GDS";
+ break;
+ case AMDGPU_PL_GWS:
+ placement = "GWS";
+ break;
+ case AMDGPU_PL_OA:
+ placement = "OA";
+ break;
+ case AMDGPU_PL_PREEMPT:
+ placement = "PREEMPTIBLE";
+ break;
+ case AMDGPU_PL_DOORBELL:
+ placement = "DOORBELL";
+ break;
+ case TTM_PL_SYSTEM:
+ default:
+ placement = "CPU";
+ break;
+ }
}
dma_resv_unlock(bo->tbo.base.resv);
} else {
--
2.40.1
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [PATCH 1/2] amdgpu: add the amdgpu_vm ptr in the vm_bo_map/unmap events
2024-06-03 8:46 [PATCH 1/2] amdgpu: add the amdgpu_vm ptr in the vm_bo_map/unmap events Pierre-Eric Pelloux-Prayer
2024-06-03 8:46 ` [PATCH 2/2] amdgpu: don't dereference a NULL resource in sysfs code Pierre-Eric Pelloux-Prayer
@ 2024-06-03 9:58 ` Christian König
2024-06-03 11:52 ` Pierre-Eric Pelloux-Prayer
1 sibling, 1 reply; 7+ messages in thread
From: Christian König @ 2024-06-03 9:58 UTC (permalink / raw)
To: Pierre-Eric Pelloux-Prayer, alexander.deucher, amd-gfx
Am 03.06.24 um 10:46 schrieb Pierre-Eric Pelloux-Prayer:
> These 2 traces events are tied to a specific VM so in order for them
> to be useful for a tool we need to trace the amdgpu_vm as well.
The bo_va already contains the VM pointer the map/unmap operation
belongs to.
>
> Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 20 ++++++++++++--------
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 8 ++++----
> 2 files changed, 16 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
> index f539b1d00234..c84050d318d6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
> @@ -243,10 +243,11 @@ TRACE_EVENT(amdgpu_vm_grab_id,
> );
>
> TRACE_EVENT(amdgpu_vm_bo_map,
> - TP_PROTO(struct amdgpu_bo_va *bo_va,
> + TP_PROTO(struct amdgpu_vm *vm, struct amdgpu_bo_va *bo_va,
> struct amdgpu_bo_va_mapping *mapping),
> - TP_ARGS(bo_va, mapping),
> + TP_ARGS(vm, bo_va, mapping),
> TP_STRUCT__entry(
> + __field(struct amdgpu_vm *, vm)
> __field(struct amdgpu_bo *, bo)
> __field(long, start)
> __field(long, last)
> @@ -255,22 +256,24 @@ TRACE_EVENT(amdgpu_vm_bo_map,
> ),
>
> TP_fast_assign(
> + __entry->vm = vm;
> __entry->bo = bo_va ? bo_va->base.bo : NULL;
> __entry->start = mapping->start;
> __entry->last = mapping->last;
> __entry->offset = mapping->offset;
> __entry->flags = mapping->flags;
> ),
> - TP_printk("bo=%p, start=%lx, last=%lx, offset=%010llx, flags=%llx",
> - __entry->bo, __entry->start, __entry->last,
> + TP_printk("vm=%p bo=%p, start=%lx, last=%lx, offset=%010llx, flags=%llx",
> + __entry->vm, __entry->bo, __entry->start, __entry->last,
> __entry->offset, __entry->flags)
> );
>
> TRACE_EVENT(amdgpu_vm_bo_unmap,
> - TP_PROTO(struct amdgpu_bo_va *bo_va,
> + TP_PROTO(struct amdgpu_vm *vm, struct amdgpu_bo_va *bo_va,
> struct amdgpu_bo_va_mapping *mapping),
> - TP_ARGS(bo_va, mapping),
> + TP_ARGS(vm, bo_va, mapping),
> TP_STRUCT__entry(
> + __field(struct amdgpu_vm *, vm)
> __field(struct amdgpu_bo *, bo)
> __field(long, start)
> __field(long, last)
> @@ -279,14 +282,15 @@ TRACE_EVENT(amdgpu_vm_bo_unmap,
> ),
>
> TP_fast_assign(
> + __entry->vm = vm;
> __entry->bo = bo_va ? bo_va->base.bo : NULL;
> __entry->start = mapping->start;
> __entry->last = mapping->last;
> __entry->offset = mapping->offset;
> __entry->flags = mapping->flags;
> ),
> - TP_printk("bo=%p, start=%lx, last=%lx, offset=%010llx, flags=%llx",
> - __entry->bo, __entry->start, __entry->last,
> + TP_printk("vm=%p bo=%p, start=%lx, last=%lx, offset=%010llx, flags=%llx",
> + __entry->vm, __entry->bo, __entry->start, __entry->last,
> __entry->offset, __entry->flags)
> );
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 3abfa66d72a2..e04928d2e26a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -1642,7 +1642,7 @@ static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev,
> if (amdgpu_vm_is_bo_always_valid(vm, bo) && !bo_va->base.moved)
> amdgpu_vm_bo_moved(&bo_va->base);
>
> - trace_amdgpu_vm_bo_map(bo_va, mapping);
> + trace_amdgpu_vm_bo_map(vm, bo_va, mapping);
> }
>
> /* Validate operation parameters to prevent potential abuse */
> @@ -1834,7 +1834,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
> list_del(&mapping->list);
> amdgpu_vm_it_remove(mapping, &vm->va);
> mapping->bo_va = NULL;
> - trace_amdgpu_vm_bo_unmap(bo_va, mapping);
> + trace_amdgpu_vm_bo_unmap(vm, bo_va, mapping);
>
> if (valid)
> list_add(&mapping->list, &vm->freed);
> @@ -1929,7 +1929,7 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
>
> tmp->bo_va = NULL;
> list_add(&tmp->list, &vm->freed);
> - trace_amdgpu_vm_bo_unmap(NULL, tmp);
> + trace_amdgpu_vm_bo_unmap(vm, NULL, tmp);
That bo_va is NULL here is probably a bug and should be fixed.
Regards,
Christian.
> }
>
> /* Insert partial mapping before the range */
> @@ -2056,7 +2056,7 @@ void amdgpu_vm_bo_del(struct amdgpu_device *adev,
> list_del(&mapping->list);
> amdgpu_vm_it_remove(mapping, &vm->va);
> mapping->bo_va = NULL;
> - trace_amdgpu_vm_bo_unmap(bo_va, mapping);
> + trace_amdgpu_vm_bo_unmap(vm, bo_va, mapping);
> list_add(&mapping->list, &vm->freed);
> }
> list_for_each_entry_safe(mapping, next, &bo_va->invalids, list) {
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH 1/2] amdgpu: add the amdgpu_vm ptr in the vm_bo_map/unmap events
2024-06-03 9:58 ` [PATCH 1/2] amdgpu: add the amdgpu_vm ptr in the vm_bo_map/unmap events Christian König
@ 2024-06-03 11:52 ` Pierre-Eric Pelloux-Prayer
2024-06-03 14:12 ` Christian König
0 siblings, 1 reply; 7+ messages in thread
From: Pierre-Eric Pelloux-Prayer @ 2024-06-03 11:52 UTC (permalink / raw)
To: Christian König, Pierre-Eric Pelloux-Prayer,
alexander.deucher, amd-gfx
Hi Christia,
Le 03/06/2024 à 11:58, Christian König a écrit :
> Am 03.06.24 um 10:46 schrieb Pierre-Eric Pelloux-Prayer:
>> These 2 traces events are tied to a specific VM so in order for them
>> to be useful for a tool we need to trace the amdgpu_vm as well.
>
> The bo_va already contains the VM pointer the map/unmap operation
> belongs to.
>
Indeed, I've missed that. I'll fix that in v2.
>>
>> Signed-off-by: Pierre-Eric Pelloux-Prayer
>> <pierre-eric.pelloux-prayer@amd.com>
>> ---
>> drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 20 ++++++++++++--------
>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 8 ++++----
>> 2 files changed, 16 insertions(+), 12 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
>> index f539b1d00234..c84050d318d6 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
>> @@ -243,10 +243,11 @@ TRACE_EVENT(amdgpu_vm_grab_id,
>> );
>> TRACE_EVENT(amdgpu_vm_bo_map,
>> - TP_PROTO(struct amdgpu_bo_va *bo_va,
>> + TP_PROTO(struct amdgpu_vm *vm, struct amdgpu_bo_va *bo_va,
>> struct amdgpu_bo_va_mapping *mapping),
>> - TP_ARGS(bo_va, mapping),
>> + TP_ARGS(vm, bo_va, mapping),
>> TP_STRUCT__entry(
>> + __field(struct amdgpu_vm *, vm)
>> __field(struct amdgpu_bo *, bo)
>> __field(long, start)
>> __field(long, last)
>> @@ -255,22 +256,24 @@ TRACE_EVENT(amdgpu_vm_bo_map,
>> ),
>> TP_fast_assign(
>> + __entry->vm = vm;
>> __entry->bo = bo_va ? bo_va->base.bo : NULL;
>> __entry->start = mapping->start;
>> __entry->last = mapping->last;
>> __entry->offset = mapping->offset;
>> __entry->flags = mapping->flags;
>> ),
>> - TP_printk("bo=%p, start=%lx, last=%lx, offset=%010llx,
>> flags=%llx",
>> - __entry->bo, __entry->start, __entry->last,
>> + TP_printk("vm=%p bo=%p, start=%lx, last=%lx, offset=%010llx,
>> flags=%llx",
>> + __entry->vm, __entry->bo, __entry->start, __entry->last,
>> __entry->offset, __entry->flags)
>> );
>> TRACE_EVENT(amdgpu_vm_bo_unmap,
>> - TP_PROTO(struct amdgpu_bo_va *bo_va,
>> + TP_PROTO(struct amdgpu_vm *vm, struct amdgpu_bo_va *bo_va,
>> struct amdgpu_bo_va_mapping *mapping),
>> - TP_ARGS(bo_va, mapping),
>> + TP_ARGS(vm, bo_va, mapping),
>> TP_STRUCT__entry(
>> + __field(struct amdgpu_vm *, vm)
>> __field(struct amdgpu_bo *, bo)
>> __field(long, start)
>> __field(long, last)
>> @@ -279,14 +282,15 @@ TRACE_EVENT(amdgpu_vm_bo_unmap,
>> ),
>> TP_fast_assign(
>> + __entry->vm = vm;
>> __entry->bo = bo_va ? bo_va->base.bo : NULL;
>> __entry->start = mapping->start;
>> __entry->last = mapping->last;
>> __entry->offset = mapping->offset;
>> __entry->flags = mapping->flags;
>> ),
>> - TP_printk("bo=%p, start=%lx, last=%lx, offset=%010llx,
>> flags=%llx",
>> - __entry->bo, __entry->start, __entry->last,
>> + TP_printk("vm=%p bo=%p, start=%lx, last=%lx, offset=%010llx,
>> flags=%llx",
>> + __entry->vm, __entry->bo, __entry->start, __entry->last,
>> __entry->offset, __entry->flags)
>> );
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index 3abfa66d72a2..e04928d2e26a 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -1642,7 +1642,7 @@ static void amdgpu_vm_bo_insert_map(struct
>> amdgpu_device *adev,
>> if (amdgpu_vm_is_bo_always_valid(vm, bo) && !bo_va->base.moved)
>> amdgpu_vm_bo_moved(&bo_va->base);
>> - trace_amdgpu_vm_bo_map(bo_va, mapping);
>> + trace_amdgpu_vm_bo_map(vm, bo_va, mapping);
>> }
>> /* Validate operation parameters to prevent potential abuse */
>> @@ -1834,7 +1834,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
>> list_del(&mapping->list);
>> amdgpu_vm_it_remove(mapping, &vm->va);
>> mapping->bo_va = NULL;
>> - trace_amdgpu_vm_bo_unmap(bo_va, mapping);
>> + trace_amdgpu_vm_bo_unmap(vm, bo_va, mapping);
>> if (valid)
>> list_add(&mapping->list, &vm->freed);
>> @@ -1929,7 +1929,7 @@ int amdgpu_vm_bo_clear_mappings(struct
>> amdgpu_device *adev,
>> tmp->bo_va = NULL;
>> list_add(&tmp->list, &vm->freed);
>> - trace_amdgpu_vm_bo_unmap(NULL, tmp);
>> + trace_amdgpu_vm_bo_unmap(vm, NULL, tmp);
>
> That bo_va is NULL here is probably a bug and should be fixed.
Would something like this work?
trace_amdgpu_vm_bo_unmap(tmp->bo_va, tmp);
tmp->bo_va = NULL;
list_add(&tmp->list, &vm->freed);
Thanks,
Pierre-Eric
>
> Regards,
> Christian.
>
>> }
>> /* Insert partial mapping before the range */
>> @@ -2056,7 +2056,7 @@ void amdgpu_vm_bo_del(struct amdgpu_device *adev,
>> list_del(&mapping->list);
>> amdgpu_vm_it_remove(mapping, &vm->va);
>> mapping->bo_va = NULL;
>> - trace_amdgpu_vm_bo_unmap(bo_va, mapping);
>> + trace_amdgpu_vm_bo_unmap(vm, bo_va, mapping);
>> list_add(&mapping->list, &vm->freed);
>> }
>> list_for_each_entry_safe(mapping, next, &bo_va->invalids, list) {
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH 1/2] amdgpu: add the amdgpu_vm ptr in the vm_bo_map/unmap events
2024-06-03 11:52 ` Pierre-Eric Pelloux-Prayer
@ 2024-06-03 14:12 ` Christian König
2024-06-06 6:52 ` Pelloux-Prayer, Pierre-Eric
0 siblings, 1 reply; 7+ messages in thread
From: Christian König @ 2024-06-03 14:12 UTC (permalink / raw)
To: Pierre-Eric Pelloux-Prayer, Christian König,
Pierre-Eric Pelloux-Prayer, alexander.deucher, amd-gfx
Am 03.06.24 um 13:52 schrieb Pierre-Eric Pelloux-Prayer:
> Hi Christia,
>
> Le 03/06/2024 à 11:58, Christian König a écrit :
>> Am 03.06.24 um 10:46 schrieb Pierre-Eric Pelloux-Prayer:
>>> These 2 traces events are tied to a specific VM so in order for them
>>> to be useful for a tool we need to trace the amdgpu_vm as well.
>>
>> The bo_va already contains the VM pointer the map/unmap operation
>> belongs to.
>>
>
> Indeed, I've missed that. I'll fix that in v2.
>
>>>
>>> Signed-off-by: Pierre-Eric Pelloux-Prayer
>>> <pierre-eric.pelloux-prayer@amd.com>
>>> ---
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 20 ++++++++++++--------
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 8 ++++----
>>> 2 files changed, 16 insertions(+), 12 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
>>> index f539b1d00234..c84050d318d6 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
>>> @@ -243,10 +243,11 @@ TRACE_EVENT(amdgpu_vm_grab_id,
>>> );
>>> TRACE_EVENT(amdgpu_vm_bo_map,
>>> - TP_PROTO(struct amdgpu_bo_va *bo_va,
>>> + TP_PROTO(struct amdgpu_vm *vm, struct amdgpu_bo_va *bo_va,
>>> struct amdgpu_bo_va_mapping *mapping),
>>> - TP_ARGS(bo_va, mapping),
>>> + TP_ARGS(vm, bo_va, mapping),
>>> TP_STRUCT__entry(
>>> + __field(struct amdgpu_vm *, vm)
>>> __field(struct amdgpu_bo *, bo)
>>> __field(long, start)
>>> __field(long, last)
>>> @@ -255,22 +256,24 @@ TRACE_EVENT(amdgpu_vm_bo_map,
>>> ),
>>> TP_fast_assign(
>>> + __entry->vm = vm;
>>> __entry->bo = bo_va ? bo_va->base.bo : NULL;
>>> __entry->start = mapping->start;
>>> __entry->last = mapping->last;
>>> __entry->offset = mapping->offset;
>>> __entry->flags = mapping->flags;
>>> ),
>>> - TP_printk("bo=%p, start=%lx, last=%lx, offset=%010llx,
>>> flags=%llx",
>>> - __entry->bo, __entry->start, __entry->last,
>>> + TP_printk("vm=%p bo=%p, start=%lx, last=%lx,
>>> offset=%010llx, flags=%llx",
>>> + __entry->vm, __entry->bo, __entry->start, __entry->last,
>>> __entry->offset, __entry->flags)
>>> );
>>> TRACE_EVENT(amdgpu_vm_bo_unmap,
>>> - TP_PROTO(struct amdgpu_bo_va *bo_va,
>>> + TP_PROTO(struct amdgpu_vm *vm, struct amdgpu_bo_va *bo_va,
>>> struct amdgpu_bo_va_mapping *mapping),
>>> - TP_ARGS(bo_va, mapping),
>>> + TP_ARGS(vm, bo_va, mapping),
>>> TP_STRUCT__entry(
>>> + __field(struct amdgpu_vm *, vm)
>>> __field(struct amdgpu_bo *, bo)
>>> __field(long, start)
>>> __field(long, last)
>>> @@ -279,14 +282,15 @@ TRACE_EVENT(amdgpu_vm_bo_unmap,
>>> ),
>>> TP_fast_assign(
>>> + __entry->vm = vm;
>>> __entry->bo = bo_va ? bo_va->base.bo : NULL;
>>> __entry->start = mapping->start;
>>> __entry->last = mapping->last;
>>> __entry->offset = mapping->offset;
>>> __entry->flags = mapping->flags;
>>> ),
>>> - TP_printk("bo=%p, start=%lx, last=%lx, offset=%010llx,
>>> flags=%llx",
>>> - __entry->bo, __entry->start, __entry->last,
>>> + TP_printk("vm=%p bo=%p, start=%lx, last=%lx,
>>> offset=%010llx, flags=%llx",
>>> + __entry->vm, __entry->bo, __entry->start, __entry->last,
>>> __entry->offset, __entry->flags)
>>> );
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> index 3abfa66d72a2..e04928d2e26a 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> @@ -1642,7 +1642,7 @@ static void amdgpu_vm_bo_insert_map(struct
>>> amdgpu_device *adev,
>>> if (amdgpu_vm_is_bo_always_valid(vm, bo) && !bo_va->base.moved)
>>> amdgpu_vm_bo_moved(&bo_va->base);
>>> - trace_amdgpu_vm_bo_map(bo_va, mapping);
>>> + trace_amdgpu_vm_bo_map(vm, bo_va, mapping);
>>> }
>>> /* Validate operation parameters to prevent potential abuse */
>>> @@ -1834,7 +1834,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device
>>> *adev,
>>> list_del(&mapping->list);
>>> amdgpu_vm_it_remove(mapping, &vm->va);
>>> mapping->bo_va = NULL;
>>> - trace_amdgpu_vm_bo_unmap(bo_va, mapping);
>>> + trace_amdgpu_vm_bo_unmap(vm, bo_va, mapping);
>>> if (valid)
>>> list_add(&mapping->list, &vm->freed);
>>> @@ -1929,7 +1929,7 @@ int amdgpu_vm_bo_clear_mappings(struct
>>> amdgpu_device *adev,
>>> tmp->bo_va = NULL;
>>> list_add(&tmp->list, &vm->freed);
>>> - trace_amdgpu_vm_bo_unmap(NULL, tmp);
>>> + trace_amdgpu_vm_bo_unmap(vm, NULL, tmp);
>>
>> That bo_va is NULL here is probably a bug and should be fixed.
>
> Would something like this work?
>
> trace_amdgpu_vm_bo_unmap(tmp->bo_va, tmp);
> tmp->bo_va = NULL;
> list_add(&tmp->list, &vm->freed);
It's not 100% accurate because only parts of the mapping is unmapped,
but yes I think that should work.
Regards,
Christian.
>
> Thanks,
> Pierre-Eric
>
>
>>
>> Regards,
>> Christian.
>>
>>> }
>>> /* Insert partial mapping before the range */
>>> @@ -2056,7 +2056,7 @@ void amdgpu_vm_bo_del(struct amdgpu_device *adev,
>>> list_del(&mapping->list);
>>> amdgpu_vm_it_remove(mapping, &vm->va);
>>> mapping->bo_va = NULL;
>>> - trace_amdgpu_vm_bo_unmap(bo_va, mapping);
>>> + trace_amdgpu_vm_bo_unmap(vm, bo_va, mapping);
>>> list_add(&mapping->list, &vm->freed);
>>> }
>>> list_for_each_entry_safe(mapping, next, &bo_va->invalids, list) {
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH 1/2] amdgpu: add the amdgpu_vm ptr in the vm_bo_map/unmap events
2024-06-03 14:12 ` Christian König
@ 2024-06-06 6:52 ` Pelloux-Prayer, Pierre-Eric
0 siblings, 0 replies; 7+ messages in thread
From: Pelloux-Prayer, Pierre-Eric @ 2024-06-06 6:52 UTC (permalink / raw)
To: Christian König, Pierre-Eric Pelloux-Prayer,
Koenig, Christian, Deucher, Alexander,
amd-gfx@lists.freedesktop.org
[-- Attachment #1: Type: text/plain, Size: 6853 bytes --]
[AMD Official Use Only - AMD Internal Distribution Only]
Let's drop this patch: the amdgpu_vm_*_ptes events already contain all the info I need.
Thanks,
Pierre-Eric
________________________________
From: Christian König <ckoenig.leichtzumerken@gmail.com>
Sent: Monday, June 3, 2024 4:12 PM
To: Pierre-Eric Pelloux-Prayer <pierre-eric@damsy.net>; Koenig, Christian <Christian.Koenig@amd.com>; Pelloux-Prayer, Pierre-Eric <Pierre-eric.Pelloux-prayer@amd.com>; Deucher, Alexander <Alexander.Deucher@amd.com>; amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>
Subject: Re: [PATCH 1/2] amdgpu: add the amdgpu_vm ptr in the vm_bo_map/unmap events
Am 03.06.24 um 13:52 schrieb Pierre-Eric Pelloux-Prayer:
> Hi Christia,
>
> Le 03/06/2024 à 11:58, Christian König a écrit :
>> Am 03.06.24 um 10:46 schrieb Pierre-Eric Pelloux-Prayer:
>>> These 2 traces events are tied to a specific VM so in order for them
>>> to be useful for a tool we need to trace the amdgpu_vm as well.
>>
>> The bo_va already contains the VM pointer the map/unmap operation
>> belongs to.
>>
>
> Indeed, I've missed that. I'll fix that in v2.
>
>>>
>>> Signed-off-by: Pierre-Eric Pelloux-Prayer
>>> <pierre-eric.pelloux-prayer@amd.com>
>>> ---
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 20 ++++++++++++--------
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 8 ++++----
>>> 2 files changed, 16 insertions(+), 12 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
>>> index f539b1d00234..c84050d318d6 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
>>> @@ -243,10 +243,11 @@ TRACE_EVENT(amdgpu_vm_grab_id,
>>> );
>>> TRACE_EVENT(amdgpu_vm_bo_map,
>>> - TP_PROTO(struct amdgpu_bo_va *bo_va,
>>> + TP_PROTO(struct amdgpu_vm *vm, struct amdgpu_bo_va *bo_va,
>>> struct amdgpu_bo_va_mapping *mapping),
>>> - TP_ARGS(bo_va, mapping),
>>> + TP_ARGS(vm, bo_va, mapping),
>>> TP_STRUCT__entry(
>>> + __field(struct amdgpu_vm *, vm)
>>> __field(struct amdgpu_bo *, bo)
>>> __field(long, start)
>>> __field(long, last)
>>> @@ -255,22 +256,24 @@ TRACE_EVENT(amdgpu_vm_bo_map,
>>> ),
>>> TP_fast_assign(
>>> + __entry->vm = vm;
>>> __entry->bo = bo_va ? bo_va->base.bo : NULL;
>>> __entry->start = mapping->start;
>>> __entry->last = mapping->last;
>>> __entry->offset = mapping->offset;
>>> __entry->flags = mapping->flags;
>>> ),
>>> - TP_printk("bo=%p, start=%lx, last=%lx, offset=%010llx,
>>> flags=%llx",
>>> - __entry->bo, __entry->start, __entry->last,
>>> + TP_printk("vm=%p bo=%p, start=%lx, last=%lx,
>>> offset=%010llx, flags=%llx",
>>> + __entry->vm, __entry->bo, __entry->start, __entry->last,
>>> __entry->offset, __entry->flags)
>>> );
>>> TRACE_EVENT(amdgpu_vm_bo_unmap,
>>> - TP_PROTO(struct amdgpu_bo_va *bo_va,
>>> + TP_PROTO(struct amdgpu_vm *vm, struct amdgpu_bo_va *bo_va,
>>> struct amdgpu_bo_va_mapping *mapping),
>>> - TP_ARGS(bo_va, mapping),
>>> + TP_ARGS(vm, bo_va, mapping),
>>> TP_STRUCT__entry(
>>> + __field(struct amdgpu_vm *, vm)
>>> __field(struct amdgpu_bo *, bo)
>>> __field(long, start)
>>> __field(long, last)
>>> @@ -279,14 +282,15 @@ TRACE_EVENT(amdgpu_vm_bo_unmap,
>>> ),
>>> TP_fast_assign(
>>> + __entry->vm = vm;
>>> __entry->bo = bo_va ? bo_va->base.bo : NULL;
>>> __entry->start = mapping->start;
>>> __entry->last = mapping->last;
>>> __entry->offset = mapping->offset;
>>> __entry->flags = mapping->flags;
>>> ),
>>> - TP_printk("bo=%p, start=%lx, last=%lx, offset=%010llx,
>>> flags=%llx",
>>> - __entry->bo, __entry->start, __entry->last,
>>> + TP_printk("vm=%p bo=%p, start=%lx, last=%lx,
>>> offset=%010llx, flags=%llx",
>>> + __entry->vm, __entry->bo, __entry->start, __entry->last,
>>> __entry->offset, __entry->flags)
>>> );
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> index 3abfa66d72a2..e04928d2e26a 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> @@ -1642,7 +1642,7 @@ static void amdgpu_vm_bo_insert_map(struct
>>> amdgpu_device *adev,
>>> if (amdgpu_vm_is_bo_always_valid(vm, bo) && !bo_va->base.moved)
>>> amdgpu_vm_bo_moved(&bo_va->base);
>>> - trace_amdgpu_vm_bo_map(bo_va, mapping);
>>> + trace_amdgpu_vm_bo_map(vm, bo_va, mapping);
>>> }
>>> /* Validate operation parameters to prevent potential abuse */
>>> @@ -1834,7 +1834,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device
>>> *adev,
>>> list_del(&mapping->list);
>>> amdgpu_vm_it_remove(mapping, &vm->va);
>>> mapping->bo_va = NULL;
>>> - trace_amdgpu_vm_bo_unmap(bo_va, mapping);
>>> + trace_amdgpu_vm_bo_unmap(vm, bo_va, mapping);
>>> if (valid)
>>> list_add(&mapping->list, &vm->freed);
>>> @@ -1929,7 +1929,7 @@ int amdgpu_vm_bo_clear_mappings(struct
>>> amdgpu_device *adev,
>>> tmp->bo_va = NULL;
>>> list_add(&tmp->list, &vm->freed);
>>> - trace_amdgpu_vm_bo_unmap(NULL, tmp);
>>> + trace_amdgpu_vm_bo_unmap(vm, NULL, tmp);
>>
>> That bo_va is NULL here is probably a bug and should be fixed.
>
> Would something like this work?
>
> trace_amdgpu_vm_bo_unmap(tmp->bo_va, tmp);
> tmp->bo_va = NULL;
> list_add(&tmp->list, &vm->freed);
It's not 100% accurate because only parts of the mapping is unmapped,
but yes I think that should work.
Regards,
Christian.
>
> Thanks,
> Pierre-Eric
>
>
>>
>> Regards,
>> Christian.
>>
>>> }
>>> /* Insert partial mapping before the range */
>>> @@ -2056,7 +2056,7 @@ void amdgpu_vm_bo_del(struct amdgpu_device *adev,
>>> list_del(&mapping->list);
>>> amdgpu_vm_it_remove(mapping, &vm->va);
>>> mapping->bo_va = NULL;
>>> - trace_amdgpu_vm_bo_unmap(bo_va, mapping);
>>> + trace_amdgpu_vm_bo_unmap(vm, bo_va, mapping);
>>> list_add(&mapping->list, &vm->freed);
>>> }
>>> list_for_each_entry_safe(mapping, next, &bo_va->invalids, list) {
[-- Attachment #2: Type: text/html, Size: 14438 bytes --]
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH 2/2] amdgpu: don't dereference a NULL resource in sysfs code
2024-06-03 8:46 ` [PATCH 2/2] amdgpu: don't dereference a NULL resource in sysfs code Pierre-Eric Pelloux-Prayer
@ 2024-06-06 7:14 ` Christian König
0 siblings, 0 replies; 7+ messages in thread
From: Christian König @ 2024-06-06 7:14 UTC (permalink / raw)
To: Pierre-Eric Pelloux-Prayer, alexander.deucher, christian.koenig,
amd-gfx
Am 03.06.24 um 10:46 schrieb Pierre-Eric Pelloux-Prayer:
> dma_resv_trylock being successful doesn't guarantee that bo->tbo.base.resv
> is not NULL, so check its validity before using it.
>
> Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Please make sure that checkpatch doesn't complain about anything.
With that done the patch is Reviewed-by: Christian König
<christian.koenig@amd.com>
Regards,
Christian.
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 63 +++++++++++-----------
> 1 file changed, 33 insertions(+), 30 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index 1eadcad1856d..6faeb9e4a572 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -1594,36 +1594,39 @@ u64 amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, struct seq_file *m)
> u64 size;
>
> if (dma_resv_trylock(bo->tbo.base.resv)) {
> -
> - switch (bo->tbo.resource->mem_type) {
> - case TTM_PL_VRAM:
> - if (amdgpu_res_cpu_visible(adev, bo->tbo.resource))
> - placement = "VRAM VISIBLE";
> - else
> - placement = "VRAM";
> - break;
> - case TTM_PL_TT:
> - placement = "GTT";
> - break;
> - case AMDGPU_PL_GDS:
> - placement = "GDS";
> - break;
> - case AMDGPU_PL_GWS:
> - placement = "GWS";
> - break;
> - case AMDGPU_PL_OA:
> - placement = "OA";
> - break;
> - case AMDGPU_PL_PREEMPT:
> - placement = "PREEMPTIBLE";
> - break;
> - case AMDGPU_PL_DOORBELL:
> - placement = "DOORBELL";
> - break;
> - case TTM_PL_SYSTEM:
> - default:
> - placement = "CPU";
> - break;
> + if (!bo->tbo.resource) {
> + placement = "NONE";
> + } else {
> + switch (bo->tbo.resource->mem_type) {
> + case TTM_PL_VRAM:
> + if (amdgpu_res_cpu_visible(adev, bo->tbo.resource))
> + placement = "VRAM VISIBLE";
> + else
> + placement = "VRAM";
> + break;
> + case TTM_PL_TT:
> + placement = "GTT";
> + break;
> + case AMDGPU_PL_GDS:
> + placement = "GDS";
> + break;
> + case AMDGPU_PL_GWS:
> + placement = "GWS";
> + break;
> + case AMDGPU_PL_OA:
> + placement = "OA";
> + break;
> + case AMDGPU_PL_PREEMPT:
> + placement = "PREEMPTIBLE";
> + break;
> + case AMDGPU_PL_DOORBELL:
> + placement = "DOORBELL";
> + break;
> + case TTM_PL_SYSTEM:
> + default:
> + placement = "CPU";
> + break;
> + }
> }
> dma_resv_unlock(bo->tbo.base.resv);
> } else {
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2024-06-06 7:14 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-06-03 8:46 [PATCH 1/2] amdgpu: add the amdgpu_vm ptr in the vm_bo_map/unmap events Pierre-Eric Pelloux-Prayer
2024-06-03 8:46 ` [PATCH 2/2] amdgpu: don't dereference a NULL resource in sysfs code Pierre-Eric Pelloux-Prayer
2024-06-06 7:14 ` Christian König
2024-06-03 9:58 ` [PATCH 1/2] amdgpu: add the amdgpu_vm ptr in the vm_bo_map/unmap events Christian König
2024-06-03 11:52 ` Pierre-Eric Pelloux-Prayer
2024-06-03 14:12 ` Christian König
2024-06-06 6:52 ` Pelloux-Prayer, Pierre-Eric
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox