* [PATCH] drm/amdkfd: Accounting pdd vram_usage for svm
@ 2024-10-04 20:28 Philip Yang
2024-10-09 21:20 ` Felix Kuehling
0 siblings, 1 reply; 4+ messages in thread
From: Philip Yang @ 2024-10-04 20:28 UTC (permalink / raw)
To: amd-gfx; +Cc: Felix.Kuehling, Philip Yang
Per process device data pdd->vram_usage is used by rocm-smi to report
VRAM usage, this is currently missing the svm_bo usage accounting, so
"rocm-smi --showpids" per process report is incorrect.
Add pdd->vram_usage accounting for svm_bo and change type to atomic64_t
because it is updated outside process mutex now.
Signed-off-by: Philip Yang <Philip.Yang@amd.com>
---
drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 6 +++---
drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 2 +-
drivers/gpu/drm/amd/amdkfd/kfd_process.c | 4 ++--
drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 22 ++++++++++++++++++++++
4 files changed, 28 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index a1f191a5984b..065d87841459 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1148,7 +1148,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
if (flags & KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM)
size >>= 1;
- WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + PAGE_ALIGN(size));
+ atomic64_add(PAGE_ALIGN(size), &pdd->vram_usage);
}
mutex_unlock(&p->mutex);
@@ -1219,7 +1219,7 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
kfd_process_device_remove_obj_handle(
pdd, GET_IDR_HANDLE(args->handle));
- WRITE_ONCE(pdd->vram_usage, pdd->vram_usage - size);
+ atomic64_sub(size, &pdd->vram_usage);
err_unlock:
err_pdd:
@@ -2347,7 +2347,7 @@ static int criu_restore_memory_of_gpu(struct kfd_process_device *pdd,
} else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
bo_bucket->restored_offset = offset;
/* Update the VRAM usage count */
- WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + bo_bucket->size);
+ atomic64_add(bo_bucket->size, &pdd->vram_usage);
}
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 6a5bf88cc232..9e5ca0b93b2a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -775,7 +775,7 @@ struct kfd_process_device {
enum kfd_pdd_bound bound;
/* VRAM usage */
- uint64_t vram_usage;
+ atomic64_t vram_usage;
struct attribute attr_vram;
char vram_filename[MAX_SYSFS_FILENAME_LEN];
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 7909dfd158be..4810521736a9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -332,7 +332,7 @@ static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr,
} else if (strncmp(attr->name, "vram_", 5) == 0) {
struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device,
attr_vram);
- return snprintf(buffer, PAGE_SIZE, "%llu\n", READ_ONCE(pdd->vram_usage));
+ return snprintf(buffer, PAGE_SIZE, "%llu\n", atomic64_read(&pdd->vram_usage));
} else if (strncmp(attr->name, "sdma_", 5) == 0) {
struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device,
attr_sdma);
@@ -1625,7 +1625,7 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev,
pdd->bound = PDD_UNBOUND;
pdd->already_dequeued = false;
pdd->runtime_inuse = false;
- pdd->vram_usage = 0;
+ atomic64_set(&pdd->vram_usage, 0);
pdd->sdma_past_activity_counter = 0;
pdd->user_gpu_id = dev->id;
atomic64_set(&pdd->evict_duration_counter, 0);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 857ec6f23bba..61891ea6b1ac 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -379,6 +379,7 @@ static bool svm_bo_ref_unless_zero(struct svm_range_bo *svm_bo)
static void svm_range_bo_release(struct kref *kref)
{
struct svm_range_bo *svm_bo;
+ struct mm_struct *mm = NULL;
svm_bo = container_of(kref, struct svm_range_bo, kref);
pr_debug("svm_bo 0x%p\n", svm_bo);
@@ -405,6 +406,22 @@ static void svm_range_bo_release(struct kref *kref)
spin_lock(&svm_bo->list_lock);
}
spin_unlock(&svm_bo->list_lock);
+
+ if (mmget_not_zero(svm_bo->eviction_fence->mm)) {
+ struct kfd_process_device *pdd;
+ struct kfd_process *p;
+
+ mm = svm_bo->eviction_fence->mm;
+ p = kfd_lookup_process_by_mm(mm);
+ if (p) {
+ pdd = kfd_get_process_device_data(svm_bo->node, p);
+ if (pdd)
+ atomic64_sub(amdgpu_bo_size(svm_bo->bo), &pdd->vram_usage);
+ kfd_unref_process(p);
+ }
+ mmput(mm);
+ }
+
if (!dma_fence_is_signaled(&svm_bo->eviction_fence->base))
/* We're not in the eviction worker. Signal the fence. */
dma_fence_signal(&svm_bo->eviction_fence->base);
@@ -532,6 +549,7 @@ int
svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange,
bool clear)
{
+ struct kfd_process_device *pdd;
struct amdgpu_bo_param bp;
struct svm_range_bo *svm_bo;
struct amdgpu_bo_user *ubo;
@@ -623,6 +641,10 @@ svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange,
list_add(&prange->svm_bo_list, &svm_bo->range_list);
spin_unlock(&svm_bo->list_lock);
+ pdd = svm_range_get_pdd_by_node(prange, node);
+ if (pdd)
+ atomic64_add(amdgpu_bo_size(bo), &pdd->vram_usage);
+
return 0;
reserve_bo_failed:
--
2.43.2
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH] drm/amdkfd: Accounting pdd vram_usage for svm
2024-10-04 20:28 [PATCH] drm/amdkfd: Accounting pdd vram_usage for svm Philip Yang
@ 2024-10-09 21:20 ` Felix Kuehling
2024-10-11 13:23 ` Philip Yang
0 siblings, 1 reply; 4+ messages in thread
From: Felix Kuehling @ 2024-10-09 21:20 UTC (permalink / raw)
To: Philip Yang, amd-gfx
On 2024-10-04 16:28, Philip Yang wrote:
> Per process device data pdd->vram_usage is used by rocm-smi to report
> VRAM usage, this is currently missing the svm_bo usage accounting, so
> "rocm-smi --showpids" per process report is incorrect.
>
> Add pdd->vram_usage accounting for svm_bo and change type to atomic64_t
> because it is updated outside process mutex now.
>
> Signed-off-by: Philip Yang <Philip.Yang@amd.com>
> ---
> drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 6 +++---
> drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 2 +-
> drivers/gpu/drm/amd/amdkfd/kfd_process.c | 4 ++--
> drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 22 ++++++++++++++++++++++
> 4 files changed, 28 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index a1f191a5984b..065d87841459 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -1148,7 +1148,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
>
> if (flags & KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM)
> size >>= 1;
> - WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + PAGE_ALIGN(size));
> + atomic64_add(PAGE_ALIGN(size), &pdd->vram_usage);
> }
>
> mutex_unlock(&p->mutex);
> @@ -1219,7 +1219,7 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
> kfd_process_device_remove_obj_handle(
> pdd, GET_IDR_HANDLE(args->handle));
>
> - WRITE_ONCE(pdd->vram_usage, pdd->vram_usage - size);
> + atomic64_sub(size, &pdd->vram_usage);
>
> err_unlock:
> err_pdd:
> @@ -2347,7 +2347,7 @@ static int criu_restore_memory_of_gpu(struct kfd_process_device *pdd,
> } else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
> bo_bucket->restored_offset = offset;
> /* Update the VRAM usage count */
> - WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + bo_bucket->size);
> + atomic64_add(bo_bucket->size, &pdd->vram_usage);
> }
> return 0;
> }
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index 6a5bf88cc232..9e5ca0b93b2a 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -775,7 +775,7 @@ struct kfd_process_device {
> enum kfd_pdd_bound bound;
>
> /* VRAM usage */
> - uint64_t vram_usage;
> + atomic64_t vram_usage;
> struct attribute attr_vram;
> char vram_filename[MAX_SYSFS_FILENAME_LEN];
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> index 7909dfd158be..4810521736a9 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> @@ -332,7 +332,7 @@ static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr,
> } else if (strncmp(attr->name, "vram_", 5) == 0) {
> struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device,
> attr_vram);
> - return snprintf(buffer, PAGE_SIZE, "%llu\n", READ_ONCE(pdd->vram_usage));
> + return snprintf(buffer, PAGE_SIZE, "%llu\n", atomic64_read(&pdd->vram_usage));
> } else if (strncmp(attr->name, "sdma_", 5) == 0) {
> struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device,
> attr_sdma);
> @@ -1625,7 +1625,7 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev,
> pdd->bound = PDD_UNBOUND;
> pdd->already_dequeued = false;
> pdd->runtime_inuse = false;
> - pdd->vram_usage = 0;
> + atomic64_set(&pdd->vram_usage, 0);
> pdd->sdma_past_activity_counter = 0;
> pdd->user_gpu_id = dev->id;
> atomic64_set(&pdd->evict_duration_counter, 0);
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> index 857ec6f23bba..61891ea6b1ac 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> @@ -379,6 +379,7 @@ static bool svm_bo_ref_unless_zero(struct svm_range_bo *svm_bo)
> static void svm_range_bo_release(struct kref *kref)
> {
> struct svm_range_bo *svm_bo;
> + struct mm_struct *mm = NULL;
>
> svm_bo = container_of(kref, struct svm_range_bo, kref);
> pr_debug("svm_bo 0x%p\n", svm_bo);
> @@ -405,6 +406,22 @@ static void svm_range_bo_release(struct kref *kref)
> spin_lock(&svm_bo->list_lock);
> }
> spin_unlock(&svm_bo->list_lock);
> +
> + if (mmget_not_zero(svm_bo->eviction_fence->mm)) {
> + struct kfd_process_device *pdd;
> + struct kfd_process *p;
Move struct mm_struct *mm here as well. It's only needed in this block
and should not be used outside.
> +
> + mm = svm_bo->eviction_fence->mm;
> + p = kfd_lookup_process_by_mm(mm);
> + if (p) {
> + pdd = kfd_get_process_device_data(svm_bo->node, p);
> + if (pdd)
> + atomic64_sub(amdgpu_bo_size(svm_bo->bo), &pdd->vram_usage);
> + kfd_unref_process(p);
> + }
> + mmput(mm);
> + }
> +
> if (!dma_fence_is_signaled(&svm_bo->eviction_fence->base))
> /* We're not in the eviction worker. Signal the fence. */
> dma_fence_signal(&svm_bo->eviction_fence->base);
> @@ -532,6 +549,7 @@ int
> svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange,
> bool clear)
> {
> + struct kfd_process_device *pdd;
> struct amdgpu_bo_param bp;
> struct svm_range_bo *svm_bo;
> struct amdgpu_bo_user *ubo;
> @@ -623,6 +641,10 @@ svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange,
> list_add(&prange->svm_bo_list, &svm_bo->range_list);
> spin_unlock(&svm_bo->list_lock);
>
> + pdd = svm_range_get_pdd_by_node(prange, node);
> + if (pdd)
> + atomic64_add(amdgpu_bo_size(bo), &pdd->vram_usage);
> +
Would it make sense to save the pdd pointer in the svm_bo struct? The
effort to look up the mm, process and pdd in svm_range_bo_release seems
quite high.
You could replace svm_bo->node with svm_bo->pdd. Then you can still get
the node with svm_bo->pdd->dev without growing the size of the
structure. This assumes that the svm_bo cannot outlive the pdd.
Regards,
Felix
> return 0;
>
> reserve_bo_failed:
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] drm/amdkfd: Accounting pdd vram_usage for svm
2024-10-09 21:20 ` Felix Kuehling
@ 2024-10-11 13:23 ` Philip Yang
2024-10-11 14:42 ` Felix Kuehling
0 siblings, 1 reply; 4+ messages in thread
From: Philip Yang @ 2024-10-11 13:23 UTC (permalink / raw)
To: Felix Kuehling, Philip Yang, amd-gfx
[-- Attachment #1: Type: text/html, Size: 13643 bytes --]
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] drm/amdkfd: Accounting pdd vram_usage for svm
2024-10-11 13:23 ` Philip Yang
@ 2024-10-11 14:42 ` Felix Kuehling
0 siblings, 0 replies; 4+ messages in thread
From: Felix Kuehling @ 2024-10-11 14:42 UTC (permalink / raw)
To: Philip Yang, Philip Yang, amd-gfx
On 2024-10-11 9:23, Philip Yang wrote:
>
> On 2024-10-09 17:20, Felix Kuehling wrote:
>>
>> On 2024-10-04 16:28, Philip Yang wrote:
>>> Per process device data pdd->vram_usage is used by rocm-smi to report
>>> VRAM usage, this is currently missing the svm_bo usage accounting, so
>>> "rocm-smi --showpids" per process report is incorrect.
>>>
>>> Add pdd->vram_usage accounting for svm_bo and change type to atomic64_t
>>> because it is updated outside process mutex now.
>>>
>>> Signed-off-by: Philip Yang <Philip.Yang@amd.com>
>>> ---
>>> drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 6 +++---
>>> drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 2 +-
>>> drivers/gpu/drm/amd/amdkfd/kfd_process.c | 4 ++--
>>> drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 22 ++++++++++++++++++++++
>>> 4 files changed, 28 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>>> index a1f191a5984b..065d87841459 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>>> @@ -1148,7 +1148,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
>>> if (flags & KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM)
>>> size >>= 1;
>>> - WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + PAGE_ALIGN(size));
>>> + atomic64_add(PAGE_ALIGN(size), &pdd->vram_usage);
>>> }
>>> mutex_unlock(&p->mutex);
>>> @@ -1219,7 +1219,7 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
>>> kfd_process_device_remove_obj_handle(
>>> pdd, GET_IDR_HANDLE(args->handle));
>>> - WRITE_ONCE(pdd->vram_usage, pdd->vram_usage - size);
>>> + atomic64_sub(size, &pdd->vram_usage);
>>> err_unlock:
>>> err_pdd:
>>> @@ -2347,7 +2347,7 @@ static int criu_restore_memory_of_gpu(struct kfd_process_device *pdd,
>>> } else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
>>> bo_bucket->restored_offset = offset;
>>> /* Update the VRAM usage count */
>>> - WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + bo_bucket->size);
>>> + atomic64_add(bo_bucket->size, &pdd->vram_usage);
>>> }
>>> return 0;
>>> }
>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>>> index 6a5bf88cc232..9e5ca0b93b2a 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>>> @@ -775,7 +775,7 @@ struct kfd_process_device {
>>> enum kfd_pdd_bound bound;
>>> /* VRAM usage */
>>> - uint64_t vram_usage;
>>> + atomic64_t vram_usage;
>>> struct attribute attr_vram;
>>> char vram_filename[MAX_SYSFS_FILENAME_LEN];
>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>>> index 7909dfd158be..4810521736a9 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>>> @@ -332,7 +332,7 @@ static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr,
>>> } else if (strncmp(attr->name, "vram_", 5) == 0) {
>>> struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device,
>>> attr_vram);
>>> - return snprintf(buffer, PAGE_SIZE, "%llu\n", READ_ONCE(pdd->vram_usage));
>>> + return snprintf(buffer, PAGE_SIZE, "%llu\n", atomic64_read(&pdd->vram_usage));
>>> } else if (strncmp(attr->name, "sdma_", 5) == 0) {
>>> struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device,
>>> attr_sdma);
>>> @@ -1625,7 +1625,7 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev,
>>> pdd->bound = PDD_UNBOUND;
>>> pdd->already_dequeued = false;
>>> pdd->runtime_inuse = false;
>>> - pdd->vram_usage = 0;
>>> + atomic64_set(&pdd->vram_usage, 0);
>>> pdd->sdma_past_activity_counter = 0;
>>> pdd->user_gpu_id = dev->id;
>>> atomic64_set(&pdd->evict_duration_counter, 0);
>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>>> index 857ec6f23bba..61891ea6b1ac 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>>> @@ -379,6 +379,7 @@ static bool svm_bo_ref_unless_zero(struct svm_range_bo *svm_bo)
>>> static void svm_range_bo_release(struct kref *kref)
>>> {
>>> struct svm_range_bo *svm_bo;
>>> + struct mm_struct *mm = NULL;
>>> svm_bo = container_of(kref, struct svm_range_bo, kref);
>>> pr_debug("svm_bo 0x%p\n", svm_bo);
>>> @@ -405,6 +406,22 @@ static void svm_range_bo_release(struct kref *kref)
>>> spin_lock(&svm_bo->list_lock);
>>> }
>>> spin_unlock(&svm_bo->list_lock);
>>> +
>>> + if (mmget_not_zero(svm_bo->eviction_fence->mm)) {
>>> + struct kfd_process_device *pdd;
>>> + struct kfd_process *p;
>>
>> Move struct mm_struct *mm here as well. It's only needed in this block and should not be used outside.
> yes, mm is only used here. If changing svm_bo->node to svm_bo->pdd, the entire block will be dropped.
>>
>>
>>> +
>>> + mm = svm_bo->eviction_fence->mm;
>>> + p = kfd_lookup_process_by_mm(mm);
>>> + if (p) {
>>> + pdd = kfd_get_process_device_data(svm_bo->node, p);
>>> + if (pdd)
>>> + atomic64_sub(amdgpu_bo_size(svm_bo->bo), &pdd->vram_usage);
>>> + kfd_unref_process(p);
>>> + }
>>> + mmput(mm);
>>> + }
>>> +
>>> if (!dma_fence_is_signaled(&svm_bo->eviction_fence->base))
>>> /* We're not in the eviction worker. Signal the fence. */
>>> dma_fence_signal(&svm_bo->eviction_fence->base);
>>> @@ -532,6 +549,7 @@ int
>>> svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange,
>>> bool clear)
>>> {
>>> + struct kfd_process_device *pdd;
>>> struct amdgpu_bo_param bp;
>>> struct svm_range_bo *svm_bo;
>>> struct amdgpu_bo_user *ubo;
>>> @@ -623,6 +641,10 @@ svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange,
>>> list_add(&prange->svm_bo_list, &svm_bo->range_list);
>>> spin_unlock(&svm_bo->list_lock);
>>> + pdd = svm_range_get_pdd_by_node(prange, node);
>>> + if (pdd)
>>> + atomic64_add(amdgpu_bo_size(bo), &pdd->vram_usage);
>>> +
>>
>> Would it make sense to save the pdd pointer in the svm_bo struct? The effort to look up the mm, process and pdd in svm_range_bo_release seems quite high.
> Thanks for the good idea.
>>
>> You could replace svm_bo->node with svm_bo->pdd. Then you can still get the node with svm_bo->pdd->dev without growing the size of the structure. This assumes that the svm_bo cannot outlive the pdd.
>
> yes, svm_range_list_fini is called before calling kfd_process_destroy_pdds after process exit, so svm_bo->pdd will always be valid. I will send new patch series.
I think that's OK. kfd_process_destroy_pdds happens in the cleanup worker that runs after the mm_struct is gone. So all the page references should be gone.
But there could be issues if a page was shared with another process that holds on to page reference that still point to pdds of processes that don't exist any more.
Regards,
Felix
>
> Regards,
>
> Philip
>
>>
>> Regards,
>> Felix
>>
>>
>>> return 0;
>>> reserve_bo_failed:
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2024-10-11 14:42 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-10-04 20:28 [PATCH] drm/amdkfd: Accounting pdd vram_usage for svm Philip Yang
2024-10-09 21:20 ` Felix Kuehling
2024-10-11 13:23 ` Philip Yang
2024-10-11 14:42 ` Felix Kuehling
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.