* [PATCH] drm/amdgpu: Fix multi-level page table bugs for large BOs v3
@ 2017-03-29 17:22 Felix Kuehling
[not found] ` <1490808171-21714-1-git-send-email-Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
0 siblings, 1 reply; 4+ messages in thread
From: Felix Kuehling @ 2017-03-29 17:22 UTC (permalink / raw)
To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
christian.koenig-5C7GfCeVMHo, Jerry.Zhang-5C7GfCeVMHo,
David1.Zhou-5C7GfCeVMHo
Cc: michel-otUistvHUpPR7s880joybQ, Felix Kuehling
Fix the start/end address calculation for address ranges that span
multiple page directories in amdgpu_vm_alloc_levels.
Add error messages if page tables aren't found. Otherwise the page
table update would just fail silently.
v2:
* Change WARN_ON to WARN_ON_ONCE
* Move masking of high address bits to caller
* Add range-check for "from" and "to"
v3:
* Replace WARN_ON_ONCE in get_pt with pr_err in caller
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 24 ++++++++++++++++++------
1 file changed, 18 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 818747f..c11d15e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -275,13 +275,18 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
memset(parent->entries, 0 , sizeof(struct amdgpu_vm_pt));
}
- from = (saddr >> shift) % amdgpu_vm_num_entries(adev, level);
- to = (eaddr >> shift) % amdgpu_vm_num_entries(adev, level);
+ from = saddr >> shift;
+ to = eaddr >> shift;
+ if (from >= amdgpu_vm_num_entries(adev, level) ||
+ to >= amdgpu_vm_num_entries(adev, level))
+ return -EINVAL;
if (to > parent->last_entry_used)
parent->last_entry_used = to;
++level;
+ saddr = saddr & ((1 << shift) - 1);
+ eaddr = eaddr & ((1 << shift) - 1);
/* walk over the address space and allocate the page tables */
for (pt_idx = from; pt_idx <= to; ++pt_idx) {
@@ -312,8 +317,11 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
}
if (level < adev->vm_manager.num_level) {
- r = amdgpu_vm_alloc_levels(adev, vm, entry, saddr,
- eaddr, level);
+ uint64_t sub_saddr = (pt_idx == from) ? saddr : 0;
+ uint64_t sub_eaddr = (pt_idx == to) ? eaddr :
+ ((1 << shift) - 1);
+ r = amdgpu_vm_alloc_levels(adev, vm, entry, sub_saddr,
+ sub_eaddr, level);
if (r)
return r;
}
@@ -990,8 +998,10 @@ static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
/* initialize the variables */
addr = start;
pt = amdgpu_vm_get_pt(params, addr);
- if (!pt)
+ if (!pt) {
+ pr_err("PT not found, aborting update_ptes\n");
return;
+ }
if (params->shadow) {
if (!pt->shadow)
@@ -1015,8 +1025,10 @@ static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
/* walk over the address space and update the page tables */
while (addr < end) {
pt = amdgpu_vm_get_pt(params, addr);
- if (!pt)
+ if (!pt) {
+ pr_err("PT not found, aborting update_ptes\n");
return;
+ }
if (params->shadow) {
if (!pt->shadow)
--
1.9.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH] drm/amdgpu: Fix multi-level page table bugs for large BOs v3
[not found] ` <1490808171-21714-1-git-send-email-Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
@ 2017-03-29 17:40 ` Christian König
[not found] ` <3beac896-9cb5-de7f-a94e-2a0f4621b537-5C7GfCeVMHo@public.gmane.org>
0 siblings, 1 reply; 4+ messages in thread
From: Christian König @ 2017-03-29 17:40 UTC (permalink / raw)
To: Felix Kuehling, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
Jerry.Zhang-5C7GfCeVMHo, David1.Zhou-5C7GfCeVMHo
Cc: michel-otUistvHUpPR7s880joybQ
Am 29.03.2017 um 19:22 schrieb Felix Kuehling:
> Fix the start/end address calculation for address ranges that span
> multiple page directories in amdgpu_vm_alloc_levels.
>
> Add error messages if page tables aren't found. Otherwise the page
> table update would just fail silently.
>
> v2:
> * Change WARN_ON to WARN_ON_ONCE
> * Move masking of high address bits to caller
> * Add range-check for "from" and "to"
> v3:
> * Replace WARN_ON_ONCE in get_pt with pr_err in caller
>
> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Mhm, I still don't like that we modify saddr/eaddr here because I wanted
to resolve the recursion sooner or later.
But anyway that is work for a future patch, this one is Reviewed-by:
Christian König <christian.koenig@amd.com>.
Regards,
Christian.
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 24 ++++++++++++++++++------
> 1 file changed, 18 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 818747f..c11d15e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -275,13 +275,18 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
> memset(parent->entries, 0 , sizeof(struct amdgpu_vm_pt));
> }
>
> - from = (saddr >> shift) % amdgpu_vm_num_entries(adev, level);
> - to = (eaddr >> shift) % amdgpu_vm_num_entries(adev, level);
> + from = saddr >> shift;
> + to = eaddr >> shift;
> + if (from >= amdgpu_vm_num_entries(adev, level) ||
> + to >= amdgpu_vm_num_entries(adev, level))
> + return -EINVAL;
>
> if (to > parent->last_entry_used)
> parent->last_entry_used = to;
>
> ++level;
> + saddr = saddr & ((1 << shift) - 1);
> + eaddr = eaddr & ((1 << shift) - 1);
>
> /* walk over the address space and allocate the page tables */
> for (pt_idx = from; pt_idx <= to; ++pt_idx) {
> @@ -312,8 +317,11 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
> }
>
> if (level < adev->vm_manager.num_level) {
> - r = amdgpu_vm_alloc_levels(adev, vm, entry, saddr,
> - eaddr, level);
> + uint64_t sub_saddr = (pt_idx == from) ? saddr : 0;
> + uint64_t sub_eaddr = (pt_idx == to) ? eaddr :
> + ((1 << shift) - 1);
> + r = amdgpu_vm_alloc_levels(adev, vm, entry, sub_saddr,
> + sub_eaddr, level);
> if (r)
> return r;
> }
> @@ -990,8 +998,10 @@ static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
> /* initialize the variables */
> addr = start;
> pt = amdgpu_vm_get_pt(params, addr);
> - if (!pt)
> + if (!pt) {
> + pr_err("PT not found, aborting update_ptes\n");
> return;
> + }
>
> if (params->shadow) {
> if (!pt->shadow)
> @@ -1015,8 +1025,10 @@ static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
> /* walk over the address space and update the page tables */
> while (addr < end) {
> pt = amdgpu_vm_get_pt(params, addr);
> - if (!pt)
> + if (!pt) {
> + pr_err("PT not found, aborting update_ptes\n");
> return;
> + }
>
> if (params->shadow) {
> if (!pt->shadow)
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] drm/amdgpu: Fix multi-level page table bugs for large BOs v3
[not found] ` <3beac896-9cb5-de7f-a94e-2a0f4621b537-5C7GfCeVMHo@public.gmane.org>
@ 2017-03-29 18:07 ` Felix Kuehling
[not found] ` <7ea76115-90cc-3153-a410-d39bc291f697-5C7GfCeVMHo@public.gmane.org>
0 siblings, 1 reply; 4+ messages in thread
From: Felix Kuehling @ 2017-03-29 18:07 UTC (permalink / raw)
To: Christian König, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
Jerry.Zhang-5C7GfCeVMHo, David1.Zhou-5C7GfCeVMHo
Cc: michel-otUistvHUpPR7s880joybQ
On 17-03-29 01:40 PM, Christian König wrote:
> Am 29.03.2017 um 19:22 schrieb Felix Kuehling:
>> Fix the start/end address calculation for address ranges that span
>> multiple page directories in amdgpu_vm_alloc_levels.
>>
>> Add error messages if page tables aren't found. Otherwise the page
>> table update would just fail silently.
>>
>> v2:
>> * Change WARN_ON to WARN_ON_ONCE
>> * Move masking of high address bits to caller
>> * Add range-check for "from" and "to"
>> v3:
>> * Replace WARN_ON_ONCE in get_pt with pr_err in caller
>>
>> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
>
> Mhm, I still don't like that we modify saddr/eaddr here because I
> wanted to resolve the recursion sooner or later.
>
> But anyway that is work for a future patch, this one is Reviewed-by:
> Christian König <christian.koenig@amd.com>.
Thank you, I appreciate it. I realize it's probably not perfect. But I'm
just looking at the intention of the original code and trying to make it
work correctly for KFD so we can continue integrating amdgpu changes
(and important fixes, such as VM fault handling) into the KFD branch.
That's also why I'm a bit in a rush. This was actually blocking KFD.
We take a different approach from hybrid. We do a nightly git-merge from
amd-staging-4.9 and running it through our automated pre-submission
tests. If the merge doesn't have conflicts that process is completely
automated (except for someone pressing the Submit button in the end). If
the testing fails, we have to block integrations until the problem is
fixed upstream. So I'm fixing the problem upstream. ;)
Regards,
Felix
>
> Regards,
> Christian.
>
>> ---
>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 24 ++++++++++++++++++------
>> 1 file changed, 18 insertions(+), 6 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index 818747f..c11d15e 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -275,13 +275,18 @@ static int amdgpu_vm_alloc_levels(struct
>> amdgpu_device *adev,
>> memset(parent->entries, 0 , sizeof(struct amdgpu_vm_pt));
>> }
>> - from = (saddr >> shift) % amdgpu_vm_num_entries(adev, level);
>> - to = (eaddr >> shift) % amdgpu_vm_num_entries(adev, level);
>> + from = saddr >> shift;
>> + to = eaddr >> shift;
>> + if (from >= amdgpu_vm_num_entries(adev, level) ||
>> + to >= amdgpu_vm_num_entries(adev, level))
>> + return -EINVAL;
>> if (to > parent->last_entry_used)
>> parent->last_entry_used = to;
>> ++level;
>> + saddr = saddr & ((1 << shift) - 1);
>> + eaddr = eaddr & ((1 << shift) - 1);
>> /* walk over the address space and allocate the page tables */
>> for (pt_idx = from; pt_idx <= to; ++pt_idx) {
>> @@ -312,8 +317,11 @@ static int amdgpu_vm_alloc_levels(struct
>> amdgpu_device *adev,
>> }
>> if (level < adev->vm_manager.num_level) {
>> - r = amdgpu_vm_alloc_levels(adev, vm, entry, saddr,
>> - eaddr, level);
>> + uint64_t sub_saddr = (pt_idx == from) ? saddr : 0;
>> + uint64_t sub_eaddr = (pt_idx == to) ? eaddr :
>> + ((1 << shift) - 1);
>> + r = amdgpu_vm_alloc_levels(adev, vm, entry, sub_saddr,
>> + sub_eaddr, level);
>> if (r)
>> return r;
>> }
>> @@ -990,8 +998,10 @@ static void amdgpu_vm_update_ptes(struct
>> amdgpu_pte_update_params *params,
>> /* initialize the variables */
>> addr = start;
>> pt = amdgpu_vm_get_pt(params, addr);
>> - if (!pt)
>> + if (!pt) {
>> + pr_err("PT not found, aborting update_ptes\n");
>> return;
>> + }
>> if (params->shadow) {
>> if (!pt->shadow)
>> @@ -1015,8 +1025,10 @@ static void amdgpu_vm_update_ptes(struct
>> amdgpu_pte_update_params *params,
>> /* walk over the address space and update the page tables */
>> while (addr < end) {
>> pt = amdgpu_vm_get_pt(params, addr);
>> - if (!pt)
>> + if (!pt) {
>> + pr_err("PT not found, aborting update_ptes\n");
>> return;
>> + }
>> if (params->shadow) {
>> if (!pt->shadow)
>
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] drm/amdgpu: Fix multi-level page table bugs for large BOs v3
[not found] ` <7ea76115-90cc-3153-a410-d39bc291f697-5C7GfCeVMHo@public.gmane.org>
@ 2017-03-30 1:59 ` Zhang, Jerry (Junwei)
0 siblings, 0 replies; 4+ messages in thread
From: Zhang, Jerry (Junwei) @ 2017-03-30 1:59 UTC (permalink / raw)
To: Felix Kuehling, Christian König,
amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW, David1.Zhou-5C7GfCeVMHo
Cc: michel-otUistvHUpPR7s880joybQ
On 03/30/2017 02:07 AM, Felix Kuehling wrote:
> On 17-03-29 01:40 PM, Christian König wrote:
>> Am 29.03.2017 um 19:22 schrieb Felix Kuehling:
>>> Fix the start/end address calculation for address ranges that span
>>> multiple page directories in amdgpu_vm_alloc_levels.
>>>
>>> Add error messages if page tables aren't found. Otherwise the page
>>> table update would just fail silently.
>>>
>>> v2:
>>> * Change WARN_ON to WARN_ON_ONCE
>>> * Move masking of high address bits to caller
>>> * Add range-check for "from" and "to"
>>> v3:
>>> * Replace WARN_ON_ONCE in get_pt with pr_err in caller
>>>
>>> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
>>
>> Mhm, I still don't like that we modify saddr/eaddr here because I
>> wanted to resolve the recursion sooner or later.
>>
>> But anyway that is work for a future patch, this one is Reviewed-by:
>> Christian König <christian.koenig@amd.com>.
>
> Thank you, I appreciate it. I realize it's probably not perfect. But I'm
> just looking at the intention of the original code and trying to make it
> work correctly for KFD so we can continue integrating amdgpu changes
> (and important fixes, such as VM fault handling) into the KFD branch.
> That's also why I'm a bit in a rush. This was actually blocking KFD.
Anyway, let's make it work for all of us.
It will become better in coding, I believe. :)
Reviewed-by: Junwei Zhang <Jerry.Zhang@amd.com>
>
> We take a different approach from hybrid. We do a nightly git-merge from
> amd-staging-4.9 and running it through our automated pre-submission
> tests. If the merge doesn't have conflicts that process is completely
> automated (except for someone pressing the Submit button in the end). If
> the testing fails, we have to block integrations until the problem is
> fixed upstream. So I'm fixing the problem upstream. ;)
>
> Regards,
> Felix
>
>>
>> Regards,
>> Christian.
>>
>>> ---
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 24 ++++++++++++++++++------
>>> 1 file changed, 18 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> index 818747f..c11d15e 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> @@ -275,13 +275,18 @@ static int amdgpu_vm_alloc_levels(struct
>>> amdgpu_device *adev,
>>> memset(parent->entries, 0 , sizeof(struct amdgpu_vm_pt));
>>> }
>>> - from = (saddr >> shift) % amdgpu_vm_num_entries(adev, level);
>>> - to = (eaddr >> shift) % amdgpu_vm_num_entries(adev, level);
>>> + from = saddr >> shift;
>>> + to = eaddr >> shift;
>>> + if (from >= amdgpu_vm_num_entries(adev, level) ||
>>> + to >= amdgpu_vm_num_entries(adev, level))
>>> + return -EINVAL;
>>> if (to > parent->last_entry_used)
>>> parent->last_entry_used = to;
>>> ++level;
>>> + saddr = saddr & ((1 << shift) - 1);
>>> + eaddr = eaddr & ((1 << shift) - 1);
>>> /* walk over the address space and allocate the page tables */
>>> for (pt_idx = from; pt_idx <= to; ++pt_idx) {
>>> @@ -312,8 +317,11 @@ static int amdgpu_vm_alloc_levels(struct
>>> amdgpu_device *adev,
>>> }
>>> if (level < adev->vm_manager.num_level) {
>>> - r = amdgpu_vm_alloc_levels(adev, vm, entry, saddr,
>>> - eaddr, level);
>>> + uint64_t sub_saddr = (pt_idx == from) ? saddr : 0;
>>> + uint64_t sub_eaddr = (pt_idx == to) ? eaddr :
>>> + ((1 << shift) - 1);
>>> + r = amdgpu_vm_alloc_levels(adev, vm, entry, sub_saddr,
>>> + sub_eaddr, level);
>>> if (r)
>>> return r;
>>> }
>>> @@ -990,8 +998,10 @@ static void amdgpu_vm_update_ptes(struct
>>> amdgpu_pte_update_params *params,
>>> /* initialize the variables */
>>> addr = start;
>>> pt = amdgpu_vm_get_pt(params, addr);
>>> - if (!pt)
>>> + if (!pt) {
>>> + pr_err("PT not found, aborting update_ptes\n");
>>> return;
>>> + }
>>> if (params->shadow) {
>>> if (!pt->shadow)
>>> @@ -1015,8 +1025,10 @@ static void amdgpu_vm_update_ptes(struct
>>> amdgpu_pte_update_params *params,
>>> /* walk over the address space and update the page tables */
>>> while (addr < end) {
>>> pt = amdgpu_vm_get_pt(params, addr);
>>> - if (!pt)
>>> + if (!pt) {
>>> + pr_err("PT not found, aborting update_ptes\n");
>>> return;
>>> + }
>>> if (params->shadow) {
>>> if (!pt->shadow)
>>
>>
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2017-03-30 1:59 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2017-03-29 17:22 [PATCH] drm/amdgpu: Fix multi-level page table bugs for large BOs v3 Felix Kuehling
[not found] ` <1490808171-21714-1-git-send-email-Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
2017-03-29 17:40 ` Christian König
[not found] ` <3beac896-9cb5-de7f-a94e-2a0f4621b537-5C7GfCeVMHo@public.gmane.org>
2017-03-29 18:07 ` Felix Kuehling
[not found] ` <7ea76115-90cc-3153-a410-d39bc291f697-5C7GfCeVMHo@public.gmane.org>
2017-03-30 1:59 ` Zhang, Jerry (Junwei)
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.