* [PATCH 1/2] drm/amdgpu: refine amdgpu_bo_create_kernel_at()
@ 2023-07-27 7:56 Lang Yu
2023-07-27 7:56 ` [PATCH 2/2] drm/amdgpu: use amdgpu_bo_create_kernel_at() to create large TMR for APU Lang Yu
2023-07-27 8:00 ` [PATCH 1/2] drm/amdgpu: refine amdgpu_bo_create_kernel_at() Christian König
0 siblings, 2 replies; 8+ messages in thread
From: Lang Yu @ 2023-07-27 7:56 UTC (permalink / raw)
To: amd-gfx
Cc: Alex Deucher, Yifan Zhang, Lang Yu, Christian Koenig,
Arunpravin Paneer Selvam
amdgpu_bo_create_kernel_at() is used to create a physical
contiguous VRAM BO at the specific offset. It calls
amdgpu_bo_create_reserved() to create a VRAM BO first,
then frees its old memory and allocates new memory at
the specific offset. But amdgpu_bo_create_reserved() would
fail if requested VRAM BO size is too large(>128MB) on
APU which usually has a small default VRAM size(512MB).
That is because VRAM fragmentation and DRM_BUDDY_RANGE_ALLOCATION
is not natively supported by amdgpu_bo_create().
The approach is using amdgpu_bo_create_reserved() to
create a BO in CPU domain first, it will always succeed.
Then use amdgpu_bo_pin_restricted() to move the BO to
requested domain and location.
Signed-off-by: Lang Yu <Lang.Yu@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 30 ++++++++--------------
drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 3 ++-
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 15 +++++------
drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 2 +-
4 files changed, 21 insertions(+), 29 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index ff73cc11d47e..331daa47a444 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -358,6 +358,7 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
* @offset: offset of the BO
* @size: size of the BO
* @bo_ptr: used to initialize BOs in structures
+ * @gpu_addr: GPU addr of the pinned BO
* @cpu_addr: optional CPU address mapping
*
* Creates a kernel BO at a specific offset in VRAM.
@@ -367,42 +368,33 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
*/
int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev,
uint64_t offset, uint64_t size,
- struct amdgpu_bo **bo_ptr, void **cpu_addr)
+ struct amdgpu_bo **bo_ptr,
+ u64 *gpu_addr, void **cpu_addr)
{
- struct ttm_operation_ctx ctx = { false, false };
- unsigned int i;
int r;
offset &= PAGE_MASK;
size = ALIGN(size, PAGE_SIZE);
r = amdgpu_bo_create_reserved(adev, size, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM, bo_ptr, NULL,
- cpu_addr);
+ AMDGPU_GEM_DOMAIN_CPU,
+ bo_ptr, NULL, cpu_addr);
if (r)
return r;
if ((*bo_ptr) == NULL)
return 0;
- /*
- * Remove the original mem node and create a new one at the request
- * position.
- */
- if (cpu_addr)
- amdgpu_bo_kunmap(*bo_ptr);
-
- ttm_resource_free(&(*bo_ptr)->tbo, &(*bo_ptr)->tbo.resource);
+ amdgpu_bo_unpin(*bo_ptr);
- for (i = 0; i < (*bo_ptr)->placement.num_placement; ++i) {
- (*bo_ptr)->placements[i].fpfn = offset >> PAGE_SHIFT;
- (*bo_ptr)->placements[i].lpfn = (offset + size) >> PAGE_SHIFT;
- }
- r = ttm_bo_mem_space(&(*bo_ptr)->tbo, &(*bo_ptr)->placement,
- &(*bo_ptr)->tbo.resource, &ctx);
+ r = amdgpu_bo_pin_restricted(*bo_ptr, AMDGPU_GEM_DOMAIN_VRAM,
+ offset, offset + size);
if (r)
goto error;
+ if (gpu_addr)
+ *gpu_addr = amdgpu_bo_gpu_offset(*bo_ptr);
+
if (cpu_addr) {
r = amdgpu_bo_kmap(*bo_ptr, cpu_addr);
if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 5d3440d719e4..8f5b5664a1b6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -315,7 +315,8 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
u64 *gpu_addr, void **cpu_addr);
int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev,
uint64_t offset, uint64_t size,
- struct amdgpu_bo **bo_ptr, void **cpu_addr);
+ struct amdgpu_bo **bo_ptr,
+ u64 *gpu_addr, void **cpu_addr);
int amdgpu_bo_create_user(struct amdgpu_device *adev,
struct amdgpu_bo_param *bp,
struct amdgpu_bo_user **ubo_ptr);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 7c6dd3de1867..a210c243dac0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1619,7 +1619,7 @@ static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev)
adev->mman.fw_vram_usage_start_offset,
adev->mman.fw_vram_usage_size,
&adev->mman.fw_vram_usage_reserved_bo,
- &adev->mman.fw_vram_usage_va);
+ NULL, &adev->mman.fw_vram_usage_va);
}
/**
@@ -1644,7 +1644,7 @@ static int amdgpu_ttm_drv_reserve_vram_init(struct amdgpu_device *adev)
adev->mman.drv_vram_usage_start_offset,
adev->mman.drv_vram_usage_size,
&adev->mman.drv_vram_usage_reserved_bo,
- &adev->mman.drv_vram_usage_va);
+ NULL, &adev->mman.drv_vram_usage_va);
}
/*
@@ -1729,8 +1729,7 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev)
ret = amdgpu_bo_create_kernel_at(adev,
ctx->c2p_train_data_offset,
ctx->train_data_size,
- &ctx->c2p_bo,
- NULL);
+ &ctx->c2p_bo, NULL, NULL);
if (ret) {
DRM_ERROR("alloc c2p_bo failed(%d)!\n", ret);
amdgpu_ttm_training_reserve_vram_fini(adev);
@@ -1742,7 +1741,7 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev)
if (!adev->gmc.is_app_apu) {
ret = amdgpu_bo_create_kernel_at(
adev, adev->gmc.real_vram_size - reserve_size,
- reserve_size, &adev->mman.fw_reserved_memory, NULL);
+ reserve_size, &adev->mman.fw_reserved_memory, NULL, NULL);
if (ret) {
DRM_ERROR("alloc tmr failed(%d)!\n", ret);
amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory,
@@ -1885,14 +1884,14 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
r = amdgpu_bo_create_kernel_at(adev, 0,
adev->mman.stolen_vga_size,
&adev->mman.stolen_vga_memory,
- NULL);
+ NULL, NULL);
if (r)
return r;
r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_vga_size,
adev->mman.stolen_extended_size,
&adev->mman.stolen_extended_memory,
- NULL);
+ NULL, NULL);
if (r)
return r;
@@ -1901,7 +1900,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
adev->mman.stolen_reserved_offset,
adev->mman.stolen_reserved_size,
&adev->mman.stolen_reserved_memory,
- NULL);
+ NULL, NULL);
if (r)
return r;
} else {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 41aa853a07d2..b93b42b916ce 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -397,7 +397,7 @@ static void amdgpu_virt_ras_reserve_bps(struct amdgpu_device *adev)
*/
if (amdgpu_bo_create_kernel_at(adev, bp << AMDGPU_GPU_PAGE_SHIFT,
AMDGPU_GPU_PAGE_SIZE,
- &bo, NULL))
+ &bo, NULL, NULL))
DRM_DEBUG("RAS WARN: reserve vram for retired page %llx fail\n", bp);
data->bps_bo[i] = bo;
--
2.25.1
^ permalink raw reply related [flat|nested] 8+ messages in thread* [PATCH 2/2] drm/amdgpu: use amdgpu_bo_create_kernel_at() to create large TMR for APU
2023-07-27 7:56 [PATCH 1/2] drm/amdgpu: refine amdgpu_bo_create_kernel_at() Lang Yu
@ 2023-07-27 7:56 ` Lang Yu
2023-07-27 8:01 ` Christian König
2023-07-27 8:00 ` [PATCH 1/2] drm/amdgpu: refine amdgpu_bo_create_kernel_at() Christian König
1 sibling, 1 reply; 8+ messages in thread
From: Lang Yu @ 2023-07-27 7:56 UTC (permalink / raw)
To: amd-gfx
Cc: Alex Deucher, Yifan Zhang, Lang Yu, Christian Koenig,
Arunpravin Paneer Selvam
TMR requires physical contiguous memory, amdgpu_bo_create_kernel()
can't satisfy large(>128MB) physical contiguous memory allocation
request with default 512MB VRAM on APU.
When requested TMR size > 128MB, use amdgpu_bo_create_kernel_at()
to create the TMR BO at offset 64MB.
Signed-off-by: Lang Yu <Lang.Yu@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 42 ++++++++++++++++---------
1 file changed, 28 insertions(+), 14 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 15217e33b51d..2eac82113d34 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -783,13 +783,38 @@ static bool psp_boottime_tmr(struct psp_context *psp)
}
}
+static inline int psp_create_tmr_bo(struct psp_context *psp, int tmr_size)
+{
+ void *tmr_buf;
+ void **pptr;
+ int ret;
+
+ if (psp->tmr_bo)
+ return 0;
+
+ pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL;
+
+ if (psp->adev->flags & AMD_IS_APU && tmr_size > 0x8000000)
+ ret = amdgpu_bo_create_kernel_at(psp->adev, 0x4000000, tmr_size,
+ &psp->tmr_bo, &psp->tmr_mc_addr,
+ pptr);
+ else
+ ret = amdgpu_bo_create_kernel(psp->adev, tmr_size,
+ PSP_TMR_ALIGNMENT,
+ AMDGPU_HAS_VRAM(psp->adev) ?
+ AMDGPU_GEM_DOMAIN_VRAM :
+ AMDGPU_GEM_DOMAIN_GTT,
+ &psp->tmr_bo, &psp->tmr_mc_addr,
+ pptr);
+
+ return ret;
+}
+
/* Set up Trusted Memory Region */
static int psp_tmr_init(struct psp_context *psp)
{
int ret = 0;
int tmr_size;
- void *tmr_buf;
- void **pptr;
/*
* According to HW engineer, they prefer the TMR address be "naturally
@@ -813,18 +838,7 @@ static int psp_tmr_init(struct psp_context *psp)
}
}
- if (!psp->tmr_bo) {
- pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL;
- ret = amdgpu_bo_create_kernel(psp->adev, tmr_size,
- PSP_TMR_ALIGNMENT,
- AMDGPU_HAS_VRAM(psp->adev) ?
- AMDGPU_GEM_DOMAIN_VRAM :
- AMDGPU_GEM_DOMAIN_GTT,
- &psp->tmr_bo, &psp->tmr_mc_addr,
- pptr);
- }
-
- return ret;
+ return psp_create_tmr_bo(psp, tmr_size);
}
static bool psp_skip_tmr(struct psp_context *psp)
--
2.25.1
^ permalink raw reply related [flat|nested] 8+ messages in thread* Re: [PATCH 2/2] drm/amdgpu: use amdgpu_bo_create_kernel_at() to create large TMR for APU
2023-07-27 7:56 ` [PATCH 2/2] drm/amdgpu: use amdgpu_bo_create_kernel_at() to create large TMR for APU Lang Yu
@ 2023-07-27 8:01 ` Christian König
2023-07-27 8:24 ` Lang Yu
0 siblings, 1 reply; 8+ messages in thread
From: Christian König @ 2023-07-27 8:01 UTC (permalink / raw)
To: Lang Yu, amd-gfx; +Cc: Alex Deucher, Yifan Zhang, Arunpravin Paneer Selvam
Am 27.07.23 um 09:56 schrieb Lang Yu:
> TMR requires physical contiguous memory, amdgpu_bo_create_kernel()
> can't satisfy large(>128MB) physical contiguous memory allocation
> request with default 512MB VRAM on APU.
>
> When requested TMR size > 128MB, use amdgpu_bo_create_kernel_at()
> to create the TMR BO at offset 64MB.
Well complete NAK, you have misunderstood what
amdgpu_bo_create_kernel_at() is doing.
Regards,
Christian.
>
> Signed-off-by: Lang Yu <Lang.Yu@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 42 ++++++++++++++++---------
> 1 file changed, 28 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
> index 15217e33b51d..2eac82113d34 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
> @@ -783,13 +783,38 @@ static bool psp_boottime_tmr(struct psp_context *psp)
> }
> }
>
> +static inline int psp_create_tmr_bo(struct psp_context *psp, int tmr_size)
> +{
> + void *tmr_buf;
> + void **pptr;
> + int ret;
> +
> + if (psp->tmr_bo)
> + return 0;
> +
> + pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL;
> +
> + if (psp->adev->flags & AMD_IS_APU && tmr_size > 0x8000000)
> + ret = amdgpu_bo_create_kernel_at(psp->adev, 0x4000000, tmr_size,
> + &psp->tmr_bo, &psp->tmr_mc_addr,
> + pptr);
> + else
> + ret = amdgpu_bo_create_kernel(psp->adev, tmr_size,
> + PSP_TMR_ALIGNMENT,
> + AMDGPU_HAS_VRAM(psp->adev) ?
> + AMDGPU_GEM_DOMAIN_VRAM :
> + AMDGPU_GEM_DOMAIN_GTT,
> + &psp->tmr_bo, &psp->tmr_mc_addr,
> + pptr);
> +
> + return ret;
> +}
> +
> /* Set up Trusted Memory Region */
> static int psp_tmr_init(struct psp_context *psp)
> {
> int ret = 0;
> int tmr_size;
> - void *tmr_buf;
> - void **pptr;
>
> /*
> * According to HW engineer, they prefer the TMR address be "naturally
> @@ -813,18 +838,7 @@ static int psp_tmr_init(struct psp_context *psp)
> }
> }
>
> - if (!psp->tmr_bo) {
> - pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL;
> - ret = amdgpu_bo_create_kernel(psp->adev, tmr_size,
> - PSP_TMR_ALIGNMENT,
> - AMDGPU_HAS_VRAM(psp->adev) ?
> - AMDGPU_GEM_DOMAIN_VRAM :
> - AMDGPU_GEM_DOMAIN_GTT,
> - &psp->tmr_bo, &psp->tmr_mc_addr,
> - pptr);
> - }
> -
> - return ret;
> + return psp_create_tmr_bo(psp, tmr_size);
> }
>
> static bool psp_skip_tmr(struct psp_context *psp)
^ permalink raw reply [flat|nested] 8+ messages in thread* Re: [PATCH 2/2] drm/amdgpu: use amdgpu_bo_create_kernel_at() to create large TMR for APU
2023-07-27 8:01 ` Christian König
@ 2023-07-27 8:24 ` Lang Yu
0 siblings, 0 replies; 8+ messages in thread
From: Lang Yu @ 2023-07-27 8:24 UTC (permalink / raw)
To: Christian König
Cc: Alex Deucher, Yifan Zhang, amd-gfx, Arunpravin Paneer Selvam
On 07/27/ , Christian König wrote:
> Am 27.07.23 um 09:56 schrieb Lang Yu:
> > TMR requires physical contiguous memory, amdgpu_bo_create_kernel()
> > can't satisfy large(>128MB) physical contiguous memory allocation
> > request with default 512MB VRAM on APU.
> >
> > When requested TMR size > 128MB, use amdgpu_bo_create_kernel_at()
> > to create the TMR BO at offset 64MB.
>
> Well complete NAK, you have misunderstood what amdgpu_bo_create_kernel_at()
> is doing.
OK. Either taking over or creating memory area, amdgpu_bo_create series
functions should satisfy the client's memory request.
Anyway, amdgpu_bo_create_kernel_at() just works.
Regards,
Lang
> Regards,
> Christian.
>
> >
> > Signed-off-by: Lang Yu <Lang.Yu@amd.com>
> > ---
> > drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 42 ++++++++++++++++---------
> > 1 file changed, 28 insertions(+), 14 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
> > index 15217e33b51d..2eac82113d34 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
> > @@ -783,13 +783,38 @@ static bool psp_boottime_tmr(struct psp_context *psp)
> > }
> > }
> > +static inline int psp_create_tmr_bo(struct psp_context *psp, int tmr_size)
> > +{
> > + void *tmr_buf;
> > + void **pptr;
> > + int ret;
> > +
> > + if (psp->tmr_bo)
> > + return 0;
> > +
> > + pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL;
> > +
> > + if (psp->adev->flags & AMD_IS_APU && tmr_size > 0x8000000)
> > + ret = amdgpu_bo_create_kernel_at(psp->adev, 0x4000000, tmr_size,
> > + &psp->tmr_bo, &psp->tmr_mc_addr,
> > + pptr);
> > + else
> > + ret = amdgpu_bo_create_kernel(psp->adev, tmr_size,
> > + PSP_TMR_ALIGNMENT,
> > + AMDGPU_HAS_VRAM(psp->adev) ?
> > + AMDGPU_GEM_DOMAIN_VRAM :
> > + AMDGPU_GEM_DOMAIN_GTT,
> > + &psp->tmr_bo, &psp->tmr_mc_addr,
> > + pptr);
> > +
> > + return ret;
> > +}
> > +
> > /* Set up Trusted Memory Region */
> > static int psp_tmr_init(struct psp_context *psp)
> > {
> > int ret = 0;
> > int tmr_size;
> > - void *tmr_buf;
> > - void **pptr;
> > /*
> > * According to HW engineer, they prefer the TMR address be "naturally
> > @@ -813,18 +838,7 @@ static int psp_tmr_init(struct psp_context *psp)
> > }
> > }
> > - if (!psp->tmr_bo) {
> > - pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL;
> > - ret = amdgpu_bo_create_kernel(psp->adev, tmr_size,
> > - PSP_TMR_ALIGNMENT,
> > - AMDGPU_HAS_VRAM(psp->adev) ?
> > - AMDGPU_GEM_DOMAIN_VRAM :
> > - AMDGPU_GEM_DOMAIN_GTT,
> > - &psp->tmr_bo, &psp->tmr_mc_addr,
> > - pptr);
> > - }
> > -
> > - return ret;
> > + return psp_create_tmr_bo(psp, tmr_size);
> > }
> > static bool psp_skip_tmr(struct psp_context *psp)
>
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 1/2] drm/amdgpu: refine amdgpu_bo_create_kernel_at()
2023-07-27 7:56 [PATCH 1/2] drm/amdgpu: refine amdgpu_bo_create_kernel_at() Lang Yu
2023-07-27 7:56 ` [PATCH 2/2] drm/amdgpu: use amdgpu_bo_create_kernel_at() to create large TMR for APU Lang Yu
@ 2023-07-27 8:00 ` Christian König
2023-07-27 8:15 ` Lang Yu
1 sibling, 1 reply; 8+ messages in thread
From: Christian König @ 2023-07-27 8:00 UTC (permalink / raw)
To: Lang Yu, amd-gfx; +Cc: Alex Deucher, Yifan Zhang, Arunpravin Paneer Selvam
Am 27.07.23 um 09:56 schrieb Lang Yu:
> amdgpu_bo_create_kernel_at() is used to create a physical
> contiguous VRAM BO at the specific offset. It calls
> amdgpu_bo_create_reserved() to create a VRAM BO first,
> then frees its old memory and allocates new memory at
> the specific offset. But amdgpu_bo_create_reserved() would
> fail if requested VRAM BO size is too large(>128MB) on
> APU which usually has a small default VRAM size(512MB).
>
> That is because VRAM fragmentation and DRM_BUDDY_RANGE_ALLOCATION
> is not natively supported by amdgpu_bo_create().
>
> The approach is using amdgpu_bo_create_reserved() to
> create a BO in CPU domain first, it will always succeed.
> Then use amdgpu_bo_pin_restricted() to move the BO to
> requested domain and location.
That won't work like that.
The amdgpu_bo_create_kernel_at() function is used to take over specific
memory areas from the BIOS and *not* to create a large VRAM BO.
Allocating the initial dummy in the CPU domain is a good idea to avoid
overlap, but you are messing this up quite a bit here and will probably
break tons of stuff.
Regards,
Christian.
>
> Signed-off-by: Lang Yu <Lang.Yu@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 30 ++++++++--------------
> drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 3 ++-
> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 15 +++++------
> drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 2 +-
> 4 files changed, 21 insertions(+), 29 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index ff73cc11d47e..331daa47a444 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -358,6 +358,7 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
> * @offset: offset of the BO
> * @size: size of the BO
> * @bo_ptr: used to initialize BOs in structures
> + * @gpu_addr: GPU addr of the pinned BO
> * @cpu_addr: optional CPU address mapping
> *
> * Creates a kernel BO at a specific offset in VRAM.
> @@ -367,42 +368,33 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
> */
> int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev,
> uint64_t offset, uint64_t size,
> - struct amdgpu_bo **bo_ptr, void **cpu_addr)
> + struct amdgpu_bo **bo_ptr,
> + u64 *gpu_addr, void **cpu_addr)
> {
> - struct ttm_operation_ctx ctx = { false, false };
> - unsigned int i;
> int r;
>
> offset &= PAGE_MASK;
> size = ALIGN(size, PAGE_SIZE);
>
> r = amdgpu_bo_create_reserved(adev, size, PAGE_SIZE,
> - AMDGPU_GEM_DOMAIN_VRAM, bo_ptr, NULL,
> - cpu_addr);
> + AMDGPU_GEM_DOMAIN_CPU,
> + bo_ptr, NULL, cpu_addr);
> if (r)
> return r;
>
> if ((*bo_ptr) == NULL)
> return 0;
>
> - /*
> - * Remove the original mem node and create a new one at the request
> - * position.
> - */
> - if (cpu_addr)
> - amdgpu_bo_kunmap(*bo_ptr);
> -
> - ttm_resource_free(&(*bo_ptr)->tbo, &(*bo_ptr)->tbo.resource);
> + amdgpu_bo_unpin(*bo_ptr);
>
> - for (i = 0; i < (*bo_ptr)->placement.num_placement; ++i) {
> - (*bo_ptr)->placements[i].fpfn = offset >> PAGE_SHIFT;
> - (*bo_ptr)->placements[i].lpfn = (offset + size) >> PAGE_SHIFT;
> - }
> - r = ttm_bo_mem_space(&(*bo_ptr)->tbo, &(*bo_ptr)->placement,
> - &(*bo_ptr)->tbo.resource, &ctx);
> + r = amdgpu_bo_pin_restricted(*bo_ptr, AMDGPU_GEM_DOMAIN_VRAM,
> + offset, offset + size);
> if (r)
> goto error;
>
> + if (gpu_addr)
> + *gpu_addr = amdgpu_bo_gpu_offset(*bo_ptr);
> +
> if (cpu_addr) {
> r = amdgpu_bo_kmap(*bo_ptr, cpu_addr);
> if (r)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> index 5d3440d719e4..8f5b5664a1b6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> @@ -315,7 +315,8 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
> u64 *gpu_addr, void **cpu_addr);
> int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev,
> uint64_t offset, uint64_t size,
> - struct amdgpu_bo **bo_ptr, void **cpu_addr);
> + struct amdgpu_bo **bo_ptr,
> + u64 *gpu_addr, void **cpu_addr);
> int amdgpu_bo_create_user(struct amdgpu_device *adev,
> struct amdgpu_bo_param *bp,
> struct amdgpu_bo_user **ubo_ptr);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 7c6dd3de1867..a210c243dac0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -1619,7 +1619,7 @@ static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev)
> adev->mman.fw_vram_usage_start_offset,
> adev->mman.fw_vram_usage_size,
> &adev->mman.fw_vram_usage_reserved_bo,
> - &adev->mman.fw_vram_usage_va);
> + NULL, &adev->mman.fw_vram_usage_va);
> }
>
> /**
> @@ -1644,7 +1644,7 @@ static int amdgpu_ttm_drv_reserve_vram_init(struct amdgpu_device *adev)
> adev->mman.drv_vram_usage_start_offset,
> adev->mman.drv_vram_usage_size,
> &adev->mman.drv_vram_usage_reserved_bo,
> - &adev->mman.drv_vram_usage_va);
> + NULL, &adev->mman.drv_vram_usage_va);
> }
>
> /*
> @@ -1729,8 +1729,7 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev)
> ret = amdgpu_bo_create_kernel_at(adev,
> ctx->c2p_train_data_offset,
> ctx->train_data_size,
> - &ctx->c2p_bo,
> - NULL);
> + &ctx->c2p_bo, NULL, NULL);
> if (ret) {
> DRM_ERROR("alloc c2p_bo failed(%d)!\n", ret);
> amdgpu_ttm_training_reserve_vram_fini(adev);
> @@ -1742,7 +1741,7 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev)
> if (!adev->gmc.is_app_apu) {
> ret = amdgpu_bo_create_kernel_at(
> adev, adev->gmc.real_vram_size - reserve_size,
> - reserve_size, &adev->mman.fw_reserved_memory, NULL);
> + reserve_size, &adev->mman.fw_reserved_memory, NULL, NULL);
> if (ret) {
> DRM_ERROR("alloc tmr failed(%d)!\n", ret);
> amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory,
> @@ -1885,14 +1884,14 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
> r = amdgpu_bo_create_kernel_at(adev, 0,
> adev->mman.stolen_vga_size,
> &adev->mman.stolen_vga_memory,
> - NULL);
> + NULL, NULL);
> if (r)
> return r;
>
> r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_vga_size,
> adev->mman.stolen_extended_size,
> &adev->mman.stolen_extended_memory,
> - NULL);
> + NULL, NULL);
>
> if (r)
> return r;
> @@ -1901,7 +1900,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
> adev->mman.stolen_reserved_offset,
> adev->mman.stolen_reserved_size,
> &adev->mman.stolen_reserved_memory,
> - NULL);
> + NULL, NULL);
> if (r)
> return r;
> } else {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> index 41aa853a07d2..b93b42b916ce 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> @@ -397,7 +397,7 @@ static void amdgpu_virt_ras_reserve_bps(struct amdgpu_device *adev)
> */
> if (amdgpu_bo_create_kernel_at(adev, bp << AMDGPU_GPU_PAGE_SHIFT,
> AMDGPU_GPU_PAGE_SIZE,
> - &bo, NULL))
> + &bo, NULL, NULL))
> DRM_DEBUG("RAS WARN: reserve vram for retired page %llx fail\n", bp);
>
> data->bps_bo[i] = bo;
^ permalink raw reply [flat|nested] 8+ messages in thread* Re: [PATCH 1/2] drm/amdgpu: refine amdgpu_bo_create_kernel_at()
2023-07-27 8:00 ` [PATCH 1/2] drm/amdgpu: refine amdgpu_bo_create_kernel_at() Christian König
@ 2023-07-27 8:15 ` Lang Yu
2023-07-27 11:23 ` Christian König
0 siblings, 1 reply; 8+ messages in thread
From: Lang Yu @ 2023-07-27 8:15 UTC (permalink / raw)
To: Christian König
Cc: Alex Deucher, Yifan Zhang, amd-gfx, Arunpravin Paneer Selvam
On 07/27/ , Christian König wrote:
> Am 27.07.23 um 09:56 schrieb Lang Yu:
> > amdgpu_bo_create_kernel_at() is used to create a physical
> > contiguous VRAM BO at the specific offset. It calls
> > amdgpu_bo_create_reserved() to create a VRAM BO first,
> > then frees its old memory and allocates new memory at
> > the specific offset. But amdgpu_bo_create_reserved() would
> > fail if requested VRAM BO size is too large(>128MB) on
> > APU which usually has a small default VRAM size(512MB).
> >
> > That is because VRAM fragmentation and DRM_BUDDY_RANGE_ALLOCATION
> > is not natively supported by amdgpu_bo_create().
> >
> > The approach is using amdgpu_bo_create_reserved() to
> > create a BO in CPU domain first, it will always succeed.
> > Then use amdgpu_bo_pin_restricted() to move the BO to
> > requested domain and location.
>
> That won't work like that.
>
> The amdgpu_bo_create_kernel_at() function is used to take over specific
> memory areas from the BIOS and *not* to create a large VRAM BO.
Literally, it is creating a VRAM BO.
> Allocating the initial dummy in the CPU domain is a good idea to avoid
> overlap, but you are messing this up quite a bit here and will probably
> break tons of stuff.
I don't see it breaks something.amdgpu_bo_create() doesn't support
DRM_BUDDY_RANGE_ALLOCATION. Actually it works, this approach is just using
DRM_BUDDY_RANGE_ALLOCATION to satify such allocation request.
Reagrds,
Lang
> Regards,
> Christian.
>
>
> >
> > Signed-off-by: Lang Yu <Lang.Yu@amd.com>
> > ---
> > drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 30 ++++++++--------------
> > drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 3 ++-
> > drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 15 +++++------
> > drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 2 +-
> > 4 files changed, 21 insertions(+), 29 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> > index ff73cc11d47e..331daa47a444 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> > @@ -358,6 +358,7 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
> > * @offset: offset of the BO
> > * @size: size of the BO
> > * @bo_ptr: used to initialize BOs in structures
> > + * @gpu_addr: GPU addr of the pinned BO
> > * @cpu_addr: optional CPU address mapping
> > *
> > * Creates a kernel BO at a specific offset in VRAM.
> > @@ -367,42 +368,33 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
> > */
> > int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev,
> > uint64_t offset, uint64_t size,
> > - struct amdgpu_bo **bo_ptr, void **cpu_addr)
> > + struct amdgpu_bo **bo_ptr,
> > + u64 *gpu_addr, void **cpu_addr)
> > {
> > - struct ttm_operation_ctx ctx = { false, false };
> > - unsigned int i;
> > int r;
> > offset &= PAGE_MASK;
> > size = ALIGN(size, PAGE_SIZE);
> > r = amdgpu_bo_create_reserved(adev, size, PAGE_SIZE,
> > - AMDGPU_GEM_DOMAIN_VRAM, bo_ptr, NULL,
> > - cpu_addr);
> > + AMDGPU_GEM_DOMAIN_CPU,
> > + bo_ptr, NULL, cpu_addr);
> > if (r)
> > return r;
> > if ((*bo_ptr) == NULL)
> > return 0;
> > - /*
> > - * Remove the original mem node and create a new one at the request
> > - * position.
> > - */
> > - if (cpu_addr)
> > - amdgpu_bo_kunmap(*bo_ptr);
> > -
> > - ttm_resource_free(&(*bo_ptr)->tbo, &(*bo_ptr)->tbo.resource);
> > + amdgpu_bo_unpin(*bo_ptr);
> > - for (i = 0; i < (*bo_ptr)->placement.num_placement; ++i) {
> > - (*bo_ptr)->placements[i].fpfn = offset >> PAGE_SHIFT;
> > - (*bo_ptr)->placements[i].lpfn = (offset + size) >> PAGE_SHIFT;
> > - }
> > - r = ttm_bo_mem_space(&(*bo_ptr)->tbo, &(*bo_ptr)->placement,
> > - &(*bo_ptr)->tbo.resource, &ctx);
> > + r = amdgpu_bo_pin_restricted(*bo_ptr, AMDGPU_GEM_DOMAIN_VRAM,
> > + offset, offset + size);
> > if (r)
> > goto error;
> > + if (gpu_addr)
> > + *gpu_addr = amdgpu_bo_gpu_offset(*bo_ptr);
> > +
> > if (cpu_addr) {
> > r = amdgpu_bo_kmap(*bo_ptr, cpu_addr);
> > if (r)
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> > index 5d3440d719e4..8f5b5664a1b6 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> > @@ -315,7 +315,8 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
> > u64 *gpu_addr, void **cpu_addr);
> > int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev,
> > uint64_t offset, uint64_t size,
> > - struct amdgpu_bo **bo_ptr, void **cpu_addr);
> > + struct amdgpu_bo **bo_ptr,
> > + u64 *gpu_addr, void **cpu_addr);
> > int amdgpu_bo_create_user(struct amdgpu_device *adev,
> > struct amdgpu_bo_param *bp,
> > struct amdgpu_bo_user **ubo_ptr);
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> > index 7c6dd3de1867..a210c243dac0 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> > @@ -1619,7 +1619,7 @@ static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev)
> > adev->mman.fw_vram_usage_start_offset,
> > adev->mman.fw_vram_usage_size,
> > &adev->mman.fw_vram_usage_reserved_bo,
> > - &adev->mman.fw_vram_usage_va);
> > + NULL, &adev->mman.fw_vram_usage_va);
> > }
> > /**
> > @@ -1644,7 +1644,7 @@ static int amdgpu_ttm_drv_reserve_vram_init(struct amdgpu_device *adev)
> > adev->mman.drv_vram_usage_start_offset,
> > adev->mman.drv_vram_usage_size,
> > &adev->mman.drv_vram_usage_reserved_bo,
> > - &adev->mman.drv_vram_usage_va);
> > + NULL, &adev->mman.drv_vram_usage_va);
> > }
> > /*
> > @@ -1729,8 +1729,7 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev)
> > ret = amdgpu_bo_create_kernel_at(adev,
> > ctx->c2p_train_data_offset,
> > ctx->train_data_size,
> > - &ctx->c2p_bo,
> > - NULL);
> > + &ctx->c2p_bo, NULL, NULL);
> > if (ret) {
> > DRM_ERROR("alloc c2p_bo failed(%d)!\n", ret);
> > amdgpu_ttm_training_reserve_vram_fini(adev);
> > @@ -1742,7 +1741,7 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev)
> > if (!adev->gmc.is_app_apu) {
> > ret = amdgpu_bo_create_kernel_at(
> > adev, adev->gmc.real_vram_size - reserve_size,
> > - reserve_size, &adev->mman.fw_reserved_memory, NULL);
> > + reserve_size, &adev->mman.fw_reserved_memory, NULL, NULL);
> > if (ret) {
> > DRM_ERROR("alloc tmr failed(%d)!\n", ret);
> > amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory,
> > @@ -1885,14 +1884,14 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
> > r = amdgpu_bo_create_kernel_at(adev, 0,
> > adev->mman.stolen_vga_size,
> > &adev->mman.stolen_vga_memory,
> > - NULL);
> > + NULL, NULL);
> > if (r)
> > return r;
> > r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_vga_size,
> > adev->mman.stolen_extended_size,
> > &adev->mman.stolen_extended_memory,
> > - NULL);
> > + NULL, NULL);
> > if (r)
> > return r;
> > @@ -1901,7 +1900,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
> > adev->mman.stolen_reserved_offset,
> > adev->mman.stolen_reserved_size,
> > &adev->mman.stolen_reserved_memory,
> > - NULL);
> > + NULL, NULL);
> > if (r)
> > return r;
> > } else {
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> > index 41aa853a07d2..b93b42b916ce 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> > @@ -397,7 +397,7 @@ static void amdgpu_virt_ras_reserve_bps(struct amdgpu_device *adev)
> > */
> > if (amdgpu_bo_create_kernel_at(adev, bp << AMDGPU_GPU_PAGE_SHIFT,
> > AMDGPU_GPU_PAGE_SIZE,
> > - &bo, NULL))
> > + &bo, NULL, NULL))
> > DRM_DEBUG("RAS WARN: reserve vram for retired page %llx fail\n", bp);
> > data->bps_bo[i] = bo;
>
^ permalink raw reply [flat|nested] 8+ messages in thread* Re: [PATCH 1/2] drm/amdgpu: refine amdgpu_bo_create_kernel_at()
2023-07-27 8:15 ` Lang Yu
@ 2023-07-27 11:23 ` Christian König
2023-07-27 12:04 ` Lang Yu
0 siblings, 1 reply; 8+ messages in thread
From: Christian König @ 2023-07-27 11:23 UTC (permalink / raw)
To: Lang Yu; +Cc: Alex Deucher, Yifan Zhang, amd-gfx, Arunpravin Paneer Selvam
Am 27.07.23 um 10:15 schrieb Lang Yu:
> On 07/27/ , Christian König wrote:
>> Am 27.07.23 um 09:56 schrieb Lang Yu:
>>> amdgpu_bo_create_kernel_at() is used to create a physical
>>> contiguous VRAM BO at the specific offset. It calls
>>> amdgpu_bo_create_reserved() to create a VRAM BO first,
>>> then frees its old memory and allocates new memory at
>>> the specific offset. But amdgpu_bo_create_reserved() would
>>> fail if requested VRAM BO size is too large(>128MB) on
>>> APU which usually has a small default VRAM size(512MB).
>>>
>>> That is because VRAM fragmentation and DRM_BUDDY_RANGE_ALLOCATION
>>> is not natively supported by amdgpu_bo_create().
>>>
>>> The approach is using amdgpu_bo_create_reserved() to
>>> create a BO in CPU domain first, it will always succeed.
>>> Then use amdgpu_bo_pin_restricted() to move the BO to
>>> requested domain and location.
>> That won't work like that.
>>
>> The amdgpu_bo_create_kernel_at() function is used to take over specific
>> memory areas from the BIOS and *not* to create a large VRAM BO.
>
> Literally, it is creating a VRAM BO.
No, it doesn't.
amdgpu_bo_create_kernel_at() creates a BO for a predefined offset in VRAM.
E.g. the BIOS says to us you can find the table at offsets 0x12345678000
in VRAM and during driver load we create a buffer object for this so
that we can map or copy it away.
>
>> Allocating the initial dummy in the CPU domain is a good idea to avoid
>> overlap, but you are messing this up quite a bit here and will probably
>> break tons of stuff.
> I don't see it breaks something.amdgpu_bo_create() doesn't support
> DRM_BUDDY_RANGE_ALLOCATION. Actually it works, this approach is just using
> DRM_BUDDY_RANGE_ALLOCATION to satify such allocation request.
The big difference between amdgpu_bo_create_kernel_at() and
amdgpu_bo_create_kernel() is that the allocated VRAM is not initialized
to zero.
E.g. we keep the original content the BIOS has placed there for us. With
your modifications that content would be overwritten.
Regards,
Christian.
>
> Reagrds,
> Lang
>
>> Regards,
>> Christian.
>>
>>
>>> Signed-off-by: Lang Yu <Lang.Yu@amd.com>
>>> ---
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 30 ++++++++--------------
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 3 ++-
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 15 +++++------
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 2 +-
>>> 4 files changed, 21 insertions(+), 29 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>> index ff73cc11d47e..331daa47a444 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>> @@ -358,6 +358,7 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
>>> * @offset: offset of the BO
>>> * @size: size of the BO
>>> * @bo_ptr: used to initialize BOs in structures
>>> + * @gpu_addr: GPU addr of the pinned BO
>>> * @cpu_addr: optional CPU address mapping
>>> *
>>> * Creates a kernel BO at a specific offset in VRAM.
>>> @@ -367,42 +368,33 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
>>> */
>>> int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev,
>>> uint64_t offset, uint64_t size,
>>> - struct amdgpu_bo **bo_ptr, void **cpu_addr)
>>> + struct amdgpu_bo **bo_ptr,
>>> + u64 *gpu_addr, void **cpu_addr)
>>> {
>>> - struct ttm_operation_ctx ctx = { false, false };
>>> - unsigned int i;
>>> int r;
>>> offset &= PAGE_MASK;
>>> size = ALIGN(size, PAGE_SIZE);
>>> r = amdgpu_bo_create_reserved(adev, size, PAGE_SIZE,
>>> - AMDGPU_GEM_DOMAIN_VRAM, bo_ptr, NULL,
>>> - cpu_addr);
>>> + AMDGPU_GEM_DOMAIN_CPU,
>>> + bo_ptr, NULL, cpu_addr);
>>> if (r)
>>> return r;
>>> if ((*bo_ptr) == NULL)
>>> return 0;
>>> - /*
>>> - * Remove the original mem node and create a new one at the request
>>> - * position.
>>> - */
>>> - if (cpu_addr)
>>> - amdgpu_bo_kunmap(*bo_ptr);
>>> -
>>> - ttm_resource_free(&(*bo_ptr)->tbo, &(*bo_ptr)->tbo.resource);
>>> + amdgpu_bo_unpin(*bo_ptr);
>>> - for (i = 0; i < (*bo_ptr)->placement.num_placement; ++i) {
>>> - (*bo_ptr)->placements[i].fpfn = offset >> PAGE_SHIFT;
>>> - (*bo_ptr)->placements[i].lpfn = (offset + size) >> PAGE_SHIFT;
>>> - }
>>> - r = ttm_bo_mem_space(&(*bo_ptr)->tbo, &(*bo_ptr)->placement,
>>> - &(*bo_ptr)->tbo.resource, &ctx);
>>> + r = amdgpu_bo_pin_restricted(*bo_ptr, AMDGPU_GEM_DOMAIN_VRAM,
>>> + offset, offset + size);
>>> if (r)
>>> goto error;
>>> + if (gpu_addr)
>>> + *gpu_addr = amdgpu_bo_gpu_offset(*bo_ptr);
>>> +
>>> if (cpu_addr) {
>>> r = amdgpu_bo_kmap(*bo_ptr, cpu_addr);
>>> if (r)
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>>> index 5d3440d719e4..8f5b5664a1b6 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>>> @@ -315,7 +315,8 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
>>> u64 *gpu_addr, void **cpu_addr);
>>> int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev,
>>> uint64_t offset, uint64_t size,
>>> - struct amdgpu_bo **bo_ptr, void **cpu_addr);
>>> + struct amdgpu_bo **bo_ptr,
>>> + u64 *gpu_addr, void **cpu_addr);
>>> int amdgpu_bo_create_user(struct amdgpu_device *adev,
>>> struct amdgpu_bo_param *bp,
>>> struct amdgpu_bo_user **ubo_ptr);
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> index 7c6dd3de1867..a210c243dac0 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> @@ -1619,7 +1619,7 @@ static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev)
>>> adev->mman.fw_vram_usage_start_offset,
>>> adev->mman.fw_vram_usage_size,
>>> &adev->mman.fw_vram_usage_reserved_bo,
>>> - &adev->mman.fw_vram_usage_va);
>>> + NULL, &adev->mman.fw_vram_usage_va);
>>> }
>>> /**
>>> @@ -1644,7 +1644,7 @@ static int amdgpu_ttm_drv_reserve_vram_init(struct amdgpu_device *adev)
>>> adev->mman.drv_vram_usage_start_offset,
>>> adev->mman.drv_vram_usage_size,
>>> &adev->mman.drv_vram_usage_reserved_bo,
>>> - &adev->mman.drv_vram_usage_va);
>>> + NULL, &adev->mman.drv_vram_usage_va);
>>> }
>>> /*
>>> @@ -1729,8 +1729,7 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev)
>>> ret = amdgpu_bo_create_kernel_at(adev,
>>> ctx->c2p_train_data_offset,
>>> ctx->train_data_size,
>>> - &ctx->c2p_bo,
>>> - NULL);
>>> + &ctx->c2p_bo, NULL, NULL);
>>> if (ret) {
>>> DRM_ERROR("alloc c2p_bo failed(%d)!\n", ret);
>>> amdgpu_ttm_training_reserve_vram_fini(adev);
>>> @@ -1742,7 +1741,7 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev)
>>> if (!adev->gmc.is_app_apu) {
>>> ret = amdgpu_bo_create_kernel_at(
>>> adev, adev->gmc.real_vram_size - reserve_size,
>>> - reserve_size, &adev->mman.fw_reserved_memory, NULL);
>>> + reserve_size, &adev->mman.fw_reserved_memory, NULL, NULL);
>>> if (ret) {
>>> DRM_ERROR("alloc tmr failed(%d)!\n", ret);
>>> amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory,
>>> @@ -1885,14 +1884,14 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
>>> r = amdgpu_bo_create_kernel_at(adev, 0,
>>> adev->mman.stolen_vga_size,
>>> &adev->mman.stolen_vga_memory,
>>> - NULL);
>>> + NULL, NULL);
>>> if (r)
>>> return r;
>>> r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_vga_size,
>>> adev->mman.stolen_extended_size,
>>> &adev->mman.stolen_extended_memory,
>>> - NULL);
>>> + NULL, NULL);
>>> if (r)
>>> return r;
>>> @@ -1901,7 +1900,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
>>> adev->mman.stolen_reserved_offset,
>>> adev->mman.stolen_reserved_size,
>>> &adev->mman.stolen_reserved_memory,
>>> - NULL);
>>> + NULL, NULL);
>>> if (r)
>>> return r;
>>> } else {
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
>>> index 41aa853a07d2..b93b42b916ce 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
>>> @@ -397,7 +397,7 @@ static void amdgpu_virt_ras_reserve_bps(struct amdgpu_device *adev)
>>> */
>>> if (amdgpu_bo_create_kernel_at(adev, bp << AMDGPU_GPU_PAGE_SHIFT,
>>> AMDGPU_GPU_PAGE_SIZE,
>>> - &bo, NULL))
>>> + &bo, NULL, NULL))
>>> DRM_DEBUG("RAS WARN: reserve vram for retired page %llx fail\n", bp);
>>> data->bps_bo[i] = bo;
^ permalink raw reply [flat|nested] 8+ messages in thread* Re: [PATCH 1/2] drm/amdgpu: refine amdgpu_bo_create_kernel_at()
2023-07-27 11:23 ` Christian König
@ 2023-07-27 12:04 ` Lang Yu
0 siblings, 0 replies; 8+ messages in thread
From: Lang Yu @ 2023-07-27 12:04 UTC (permalink / raw)
To: Christian König
Cc: Alex Deucher, Yifan Zhang, amd-gfx, Arunpravin Paneer Selvam
On 07/27/ , Christian König wrote:
> Am 27.07.23 um 10:15 schrieb Lang Yu:
> > On 07/27/ , Christian König wrote:
> > > Am 27.07.23 um 09:56 schrieb Lang Yu:
> > > > amdgpu_bo_create_kernel_at() is used to create a physical
> > > > contiguous VRAM BO at the specific offset. It calls
> > > > amdgpu_bo_create_reserved() to create a VRAM BO first,
> > > > then frees its old memory and allocates new memory at
> > > > the specific offset. But amdgpu_bo_create_reserved() would
> > > > fail if requested VRAM BO size is too large(>128MB) on
> > > > APU which usually has a small default VRAM size(512MB).
> > > >
> > > > That is because VRAM fragmentation and DRM_BUDDY_RANGE_ALLOCATION
> > > > is not natively supported by amdgpu_bo_create().
> > > >
> > > > The approach is using amdgpu_bo_create_reserved() to
> > > > create a BO in CPU domain first, it will always succeed.
> > > > Then use amdgpu_bo_pin_restricted() to move the BO to
> > > > requested domain and location.
> > > That won't work like that.
> > >
> > > The amdgpu_bo_create_kernel_at() function is used to take over specific
> > > memory areas from the BIOS and *not* to create a large VRAM BO.
> > Literally, it is creating a VRAM BO.
>
> No, it doesn't.
>
> amdgpu_bo_create_kernel_at() creates a BO for a predefined offset in VRAM.
>
> E.g. the BIOS says to us you can find the table at offsets 0x12345678000 in
> VRAM and during driver load we create a buffer object for this so that we
> can map or copy it away.
>
> >
> > > Allocating the initial dummy in the CPU domain is a good idea to avoid
> > > overlap, but you are messing this up quite a bit here and will probably
> > > break tons of stuff.
> > I don't see it breaks something.amdgpu_bo_create() doesn't support
> > DRM_BUDDY_RANGE_ALLOCATION. Actually it works, this approach is just using
> > DRM_BUDDY_RANGE_ALLOCATION to satify such allocation request.
>
> The big difference between amdgpu_bo_create_kernel_at() and
> amdgpu_bo_create_kernel() is that the allocated VRAM is not initialized to
> zero.
>
> E.g. we keep the original content the BIOS has placed there for us. With
> your modifications that content would be overwritten.
Thanks, I got it. But it looks like only ttm_bo_type_device would be
initialized to zero.
switch (bo->type) {
case ttm_bo_type_device:
if (zero_alloc)
page_flags |= TTM_TT_FLAG_ZERO_ALLOC;
break;
case ttm_bo_type_kernel:
break;
case ttm_bo_type_sg:
page_flags |= TTM_TT_FLAG_EXTERNAL;
break;
default:
pr_err("Illegal buffer object type\n");
return -EINVAL;
}
Yes, it would be overwritten. That's not expected.
Regards,
Lang
> Regards,
> Christian.
>
> >
> > Reagrds,
> > Lang
> > > Regards,
> > > Christian.
> > >
> > >
> > > > Signed-off-by: Lang Yu <Lang.Yu@amd.com>
> > > > ---
> > > > drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 30 ++++++++--------------
> > > > drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 3 ++-
> > > > drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 15 +++++------
> > > > drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 2 +-
> > > > 4 files changed, 21 insertions(+), 29 deletions(-)
> > > >
> > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> > > > index ff73cc11d47e..331daa47a444 100644
> > > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> > > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> > > > @@ -358,6 +358,7 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
> > > > * @offset: offset of the BO
> > > > * @size: size of the BO
> > > > * @bo_ptr: used to initialize BOs in structures
> > > > + * @gpu_addr: GPU addr of the pinned BO
> > > > * @cpu_addr: optional CPU address mapping
> > > > *
> > > > * Creates a kernel BO at a specific offset in VRAM.
> > > > @@ -367,42 +368,33 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
> > > > */
> > > > int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev,
> > > > uint64_t offset, uint64_t size,
> > > > - struct amdgpu_bo **bo_ptr, void **cpu_addr)
> > > > + struct amdgpu_bo **bo_ptr,
> > > > + u64 *gpu_addr, void **cpu_addr)
> > > > {
> > > > - struct ttm_operation_ctx ctx = { false, false };
> > > > - unsigned int i;
> > > > int r;
> > > > offset &= PAGE_MASK;
> > > > size = ALIGN(size, PAGE_SIZE);
> > > > r = amdgpu_bo_create_reserved(adev, size, PAGE_SIZE,
> > > > - AMDGPU_GEM_DOMAIN_VRAM, bo_ptr, NULL,
> > > > - cpu_addr);
> > > > + AMDGPU_GEM_DOMAIN_CPU,
> > > > + bo_ptr, NULL, cpu_addr);
> > > > if (r)
> > > > return r;
> > > > if ((*bo_ptr) == NULL)
> > > > return 0;
> > > > - /*
> > > > - * Remove the original mem node and create a new one at the request
> > > > - * position.
> > > > - */
> > > > - if (cpu_addr)
> > > > - amdgpu_bo_kunmap(*bo_ptr);
> > > > -
> > > > - ttm_resource_free(&(*bo_ptr)->tbo, &(*bo_ptr)->tbo.resource);
> > > > + amdgpu_bo_unpin(*bo_ptr);
> > > > - for (i = 0; i < (*bo_ptr)->placement.num_placement; ++i) {
> > > > - (*bo_ptr)->placements[i].fpfn = offset >> PAGE_SHIFT;
> > > > - (*bo_ptr)->placements[i].lpfn = (offset + size) >> PAGE_SHIFT;
> > > > - }
> > > > - r = ttm_bo_mem_space(&(*bo_ptr)->tbo, &(*bo_ptr)->placement,
> > > > - &(*bo_ptr)->tbo.resource, &ctx);
> > > > + r = amdgpu_bo_pin_restricted(*bo_ptr, AMDGPU_GEM_DOMAIN_VRAM,
> > > > + offset, offset + size);
> > > > if (r)
> > > > goto error;
> > > > + if (gpu_addr)
> > > > + *gpu_addr = amdgpu_bo_gpu_offset(*bo_ptr);
> > > > +
> > > > if (cpu_addr) {
> > > > r = amdgpu_bo_kmap(*bo_ptr, cpu_addr);
> > > > if (r)
> > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> > > > index 5d3440d719e4..8f5b5664a1b6 100644
> > > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> > > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> > > > @@ -315,7 +315,8 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
> > > > u64 *gpu_addr, void **cpu_addr);
> > > > int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev,
> > > > uint64_t offset, uint64_t size,
> > > > - struct amdgpu_bo **bo_ptr, void **cpu_addr);
> > > > + struct amdgpu_bo **bo_ptr,
> > > > + u64 *gpu_addr, void **cpu_addr);
> > > > int amdgpu_bo_create_user(struct amdgpu_device *adev,
> > > > struct amdgpu_bo_param *bp,
> > > > struct amdgpu_bo_user **ubo_ptr);
> > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> > > > index 7c6dd3de1867..a210c243dac0 100644
> > > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> > > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> > > > @@ -1619,7 +1619,7 @@ static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev)
> > > > adev->mman.fw_vram_usage_start_offset,
> > > > adev->mman.fw_vram_usage_size,
> > > > &adev->mman.fw_vram_usage_reserved_bo,
> > > > - &adev->mman.fw_vram_usage_va);
> > > > + NULL, &adev->mman.fw_vram_usage_va);
> > > > }
> > > > /**
> > > > @@ -1644,7 +1644,7 @@ static int amdgpu_ttm_drv_reserve_vram_init(struct amdgpu_device *adev)
> > > > adev->mman.drv_vram_usage_start_offset,
> > > > adev->mman.drv_vram_usage_size,
> > > > &adev->mman.drv_vram_usage_reserved_bo,
> > > > - &adev->mman.drv_vram_usage_va);
> > > > + NULL, &adev->mman.drv_vram_usage_va);
> > > > }
> > > > /*
> > > > @@ -1729,8 +1729,7 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev)
> > > > ret = amdgpu_bo_create_kernel_at(adev,
> > > > ctx->c2p_train_data_offset,
> > > > ctx->train_data_size,
> > > > - &ctx->c2p_bo,
> > > > - NULL);
> > > > + &ctx->c2p_bo, NULL, NULL);
> > > > if (ret) {
> > > > DRM_ERROR("alloc c2p_bo failed(%d)!\n", ret);
> > > > amdgpu_ttm_training_reserve_vram_fini(adev);
> > > > @@ -1742,7 +1741,7 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev)
> > > > if (!adev->gmc.is_app_apu) {
> > > > ret = amdgpu_bo_create_kernel_at(
> > > > adev, adev->gmc.real_vram_size - reserve_size,
> > > > - reserve_size, &adev->mman.fw_reserved_memory, NULL);
> > > > + reserve_size, &adev->mman.fw_reserved_memory, NULL, NULL);
> > > > if (ret) {
> > > > DRM_ERROR("alloc tmr failed(%d)!\n", ret);
> > > > amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory,
> > > > @@ -1885,14 +1884,14 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
> > > > r = amdgpu_bo_create_kernel_at(adev, 0,
> > > > adev->mman.stolen_vga_size,
> > > > &adev->mman.stolen_vga_memory,
> > > > - NULL);
> > > > + NULL, NULL);
> > > > if (r)
> > > > return r;
> > > > r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_vga_size,
> > > > adev->mman.stolen_extended_size,
> > > > &adev->mman.stolen_extended_memory,
> > > > - NULL);
> > > > + NULL, NULL);
> > > > if (r)
> > > > return r;
> > > > @@ -1901,7 +1900,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
> > > > adev->mman.stolen_reserved_offset,
> > > > adev->mman.stolen_reserved_size,
> > > > &adev->mman.stolen_reserved_memory,
> > > > - NULL);
> > > > + NULL, NULL);
> > > > if (r)
> > > > return r;
> > > > } else {
> > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> > > > index 41aa853a07d2..b93b42b916ce 100644
> > > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> > > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> > > > @@ -397,7 +397,7 @@ static void amdgpu_virt_ras_reserve_bps(struct amdgpu_device *adev)
> > > > */
> > > > if (amdgpu_bo_create_kernel_at(adev, bp << AMDGPU_GPU_PAGE_SHIFT,
> > > > AMDGPU_GPU_PAGE_SIZE,
> > > > - &bo, NULL))
> > > > + &bo, NULL, NULL))
> > > > DRM_DEBUG("RAS WARN: reserve vram for retired page %llx fail\n", bp);
> > > > data->bps_bo[i] = bo;
>
^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2023-07-27 12:04 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-07-27 7:56 [PATCH 1/2] drm/amdgpu: refine amdgpu_bo_create_kernel_at() Lang Yu
2023-07-27 7:56 ` [PATCH 2/2] drm/amdgpu: use amdgpu_bo_create_kernel_at() to create large TMR for APU Lang Yu
2023-07-27 8:01 ` Christian König
2023-07-27 8:24 ` Lang Yu
2023-07-27 8:00 ` [PATCH 1/2] drm/amdgpu: refine amdgpu_bo_create_kernel_at() Christian König
2023-07-27 8:15 ` Lang Yu
2023-07-27 11:23 ` Christian König
2023-07-27 12:04 ` Lang Yu
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.