* [PATCH 1/6] drm/amdgpu: cleanup MES command submission
2024-06-04 16:04 Rate limit improvements for TTM Christian König
@ 2024-06-04 16:04 ` Christian König
2024-06-04 16:04 ` [PATCH 2/6] drm/ttm: add TTM_PL_FLAG_TRESHOLD Christian König
` (4 subsequent siblings)
5 siblings, 0 replies; 14+ messages in thread
From: Christian König @ 2024-06-04 16:04 UTC (permalink / raw)
To: tursulin, friedrich.vock; +Cc: dri-devel, amd-gfx
The approach of having a separate WB slot for each submission doesn't
really work well and for example breaks GPU reset.
Use a status query packet for the fence update instead since those
should always succeed we can use the fence of the original packet to
signal the state of the operation.
Only compile tested.
While at it cleanup the coding style.
Signed-off-by: Christian König <christian.koenig@amd.com>
---
drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 76 ++++++++++++++++----------
1 file changed, 48 insertions(+), 28 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
index 0d1407f25005..32d4519541c6 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
@@ -154,18 +154,18 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
void *pkt, int size,
int api_status_off)
{
- int ndw = size / 4;
- signed long r;
- union MESAPI__MISC *x_pkt = pkt;
- struct MES_API_STATUS *api_status;
+ union MESAPI__QUERY_MES_STATUS mes_status_pkt;
+ signed long timeout = 3000000; /* 3000 ms */
struct amdgpu_device *adev = mes->adev;
struct amdgpu_ring *ring = &mes->ring;
- unsigned long flags;
- signed long timeout = 3000000; /* 3000 ms */
+ struct MES_API_STATUS *api_status;
+ union MESAPI__MISC *x_pkt = pkt;
const char *op_str, *misc_op_str;
- u32 fence_offset;
- u64 fence_gpu_addr;
- u64 *fence_ptr;
+ unsigned long flags;
+ u64 status_gpu_addr;
+ u32 status_offset;
+ u64 *status_ptr;
+ signed long r;
int ret;
if (x_pkt->header.opcode >= MES_SCH_API_MAX)
@@ -177,28 +177,38 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
/* Worst case in sriov where all other 15 VF timeout, each VF needs about 600ms */
timeout = 15 * 600 * 1000;
}
- BUG_ON(size % 4 != 0);
- ret = amdgpu_device_wb_get(adev, &fence_offset);
+ ret = amdgpu_device_wb_get(adev, &status_offset);
if (ret)
return ret;
- fence_gpu_addr =
- adev->wb.gpu_addr + (fence_offset * 4);
- fence_ptr = (u64 *)&adev->wb.wb[fence_offset];
- *fence_ptr = 0;
+
+ status_gpu_addr = adev->wb.gpu_addr + (status_offset * 4);
+ status_ptr = (u64 *)&adev->wb.wb[status_offset];
+ *status_ptr = 0;
spin_lock_irqsave(&mes->ring_lock, flags);
- if (amdgpu_ring_alloc(ring, ndw)) {
- spin_unlock_irqrestore(&mes->ring_lock, flags);
- amdgpu_device_wb_free(adev, fence_offset);
- return -ENOMEM;
- }
+ r = amdgpu_ring_alloc(ring, (size + sizeof(mes_status_pkt)) / 4);
+ if (r)
+ goto error_unlock_free;
api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off);
- api_status->api_completion_fence_addr = fence_gpu_addr;
+ api_status->api_completion_fence_addr = status_gpu_addr;
api_status->api_completion_fence_value = 1;
- amdgpu_ring_write_multiple(ring, pkt, ndw);
+ amdgpu_ring_write_multiple(ring, pkt, size / 4);
+
+ memset(&mes_status_pkt, 0, sizeof(mes_status_pkt));
+ mes_status_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS;
+ mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+ mes_status_pkt.api_status.api_completion_fence_addr =
+ ring->fence_drv.gpu_addr;
+ mes_status_pkt.api_status.api_completion_fence_value =
+ ++ring->fence_drv.sync_seq;
+
+ amdgpu_ring_write_multiple(ring, &mes_status_pkt,
+ sizeof(mes_status_pkt) / 4);
+
amdgpu_ring_commit(ring);
spin_unlock_irqrestore(&mes->ring_lock, flags);
@@ -206,15 +216,16 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
misc_op_str = mes_v11_0_get_misc_op_string(x_pkt);
if (misc_op_str)
- dev_dbg(adev->dev, "MES msg=%s (%s) was emitted\n", op_str, misc_op_str);
+ dev_dbg(adev->dev, "MES msg=%s (%s) was emitted\n", op_str,
+ misc_op_str);
else if (op_str)
dev_dbg(adev->dev, "MES msg=%s was emitted\n", op_str);
else
- dev_dbg(adev->dev, "MES msg=%d was emitted\n", x_pkt->header.opcode);
+ dev_dbg(adev->dev, "MES msg=%d was emitted\n",
+ x_pkt->header.opcode);
- r = amdgpu_mes_fence_wait_polling(fence_ptr, (u64)1, timeout);
- amdgpu_device_wb_free(adev, fence_offset);
- if (r < 1) {
+ r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq, timeout);
+ if (r < 1 || !*status_ptr) {
if (misc_op_str)
dev_err(adev->dev, "MES failed to respond to msg=%s (%s)\n",
@@ -229,10 +240,19 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
while (halt_if_hws_hang)
schedule();
- return -ETIMEDOUT;
+ r = -ETIMEDOUT;
+ goto error_wb_free;
}
+ amdgpu_device_wb_free(adev, status_offset);
return 0;
+
+error_unlock_free:
+ spin_unlock_irqrestore(&mes->ring_lock, flags);
+
+error_wb_free:
+ amdgpu_device_wb_free(adev, status_offset);
+ return r;
}
static int convert_to_mes_queue_type(int queue_type)
--
2.34.1
^ permalink raw reply related [flat|nested] 14+ messages in thread* [PATCH 2/6] drm/ttm: add TTM_PL_FLAG_TRESHOLD
2024-06-04 16:04 Rate limit improvements for TTM Christian König
2024-06-04 16:04 ` [PATCH 1/6] drm/amdgpu: cleanup MES command submission Christian König
@ 2024-06-04 16:04 ` Christian König
2024-06-05 1:12 ` kernel test robot
2024-06-04 16:05 ` [PATCH 3/6] drm/amdgpu: enable GTT fallback handling for dGPUs only Christian König
` (3 subsequent siblings)
5 siblings, 1 reply; 14+ messages in thread
From: Christian König @ 2024-06-04 16:04 UTC (permalink / raw)
To: tursulin, friedrich.vock; +Cc: dri-devel, amd-gfx
This adds support to enable a placement only when a certain treshold of
moved bytes is reached. It's a context flag which will be handled
together with TTM_PL_FLAG_DESIRED and TTM_PL_FLAG_FALLBACK.
Signed-off-by: Christian König <christian.koenig@amd.com>
---
drivers/gpu/drm/ttm/ttm_bo.c | 5 ++---
drivers/gpu/drm/ttm/ttm_resource.c | 35 ++++++++++++++++++++++++++++--
include/drm/ttm/ttm_bo.h | 3 +++
include/drm/ttm/ttm_placement.h | 15 +++++++++++++
include/drm/ttm/ttm_resource.h | 2 ++
5 files changed, 55 insertions(+), 5 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 6396dece0db1..6cd2e32bb5db 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -764,8 +764,7 @@ static int ttm_bo_alloc_resource(struct ttm_buffer_object *bo,
if (!man || !ttm_resource_manager_used(man))
continue;
- if (place->flags & (force_space ? TTM_PL_FLAG_DESIRED :
- TTM_PL_FLAG_FALLBACK))
+ if (!ttm_place_applicable(place, ctx, force_space))
continue;
do {
@@ -864,7 +863,7 @@ int ttm_bo_validate(struct ttm_buffer_object *bo,
do {
/* Check whether we need to move buffer. */
if (bo->resource &&
- ttm_resource_compatible(bo->resource, placement,
+ ttm_resource_compatible(bo->resource, placement, ctx,
force_space))
return 0;
diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c
index 4a66b851b67d..74a6bfc74dbe 100644
--- a/drivers/gpu/drm/ttm/ttm_resource.c
+++ b/drivers/gpu/drm/ttm/ttm_resource.c
@@ -292,6 +292,37 @@ bool ttm_resource_intersects(struct ttm_device *bdev,
return man->func->intersects(man, res, place, size);
}
+/**
+ * ttm_place_applicable - check if place is applicable
+ *
+ * @place: place to check
+ * @ctx: the operation context
+ * @evicting: true if TTM is evicting resources
+ *
+ * Return true if the place is currently applicable.
+ */
+bool ttm_place_applicable(const struct ttm_place *place,
+ struct ttm_operation_ctx *ctx,
+ bool evicting)
+{
+
+ /* When no flag is given we always consider the place applicable */
+ if (!(place->flags & TTM_PL_FLAG_CTX_MASK))
+ return true;
+
+ if (place->flags & TTM_PL_FLAG_FALLBACK && evicting)
+ return true;
+
+ if (place->flags & TTM_PL_FLAG_DESIRED && !evicting)
+ return true;
+
+ if (place->flags & TTM_PL_FLAG_MOVE_THRESHOLD &&
+ ctx->bytes_moved < ctx->move_threshold)
+ return true;
+
+ return false;
+}
+
/**
* ttm_resource_compatible - check if resource is compatible with placement
*
@@ -303,6 +334,7 @@ bool ttm_resource_intersects(struct ttm_device *bdev,
*/
bool ttm_resource_compatible(struct ttm_resource *res,
struct ttm_placement *placement,
+ struct ttm_operation_ctx *ctx,
bool evicting)
{
struct ttm_buffer_object *bo = res->bo;
@@ -319,8 +351,7 @@ bool ttm_resource_compatible(struct ttm_resource *res,
if (res->mem_type != place->mem_type)
continue;
- if (place->flags & (evicting ? TTM_PL_FLAG_DESIRED :
- TTM_PL_FLAG_FALLBACK))
+ if (!ttm_place_applicable(place, ctx, evicting))
continue;
if (place->flags & TTM_PL_FLAG_CONTIGUOUS &&
diff --git a/include/drm/ttm/ttm_bo.h b/include/drm/ttm/ttm_bo.h
index 6ccf96c91f3a..a85be2140970 100644
--- a/include/drm/ttm/ttm_bo.h
+++ b/include/drm/ttm/ttm_bo.h
@@ -176,6 +176,8 @@ struct ttm_bo_kmap_obj {
* faults. Should only be used by TTM internally.
* @resv: Reservation object to allow reserved evictions with.
* @bytes_moved: Statistics on how many bytes have been moved.
+ * @move_threshold: When @bytes_moved >= @move_threshold placements with
+ * @TTM_PL_FLAG_MOVE_TRESHOLD are used as well.
*
* Context for TTM operations like changing buffer placement or general memory
* allocation.
@@ -188,6 +190,7 @@ struct ttm_operation_ctx {
bool force_alloc;
struct dma_resv *resv;
uint64_t bytes_moved;
+ uint64_t move_threshold;
};
/**
diff --git a/include/drm/ttm/ttm_placement.h b/include/drm/ttm/ttm_placement.h
index b510a4812609..cf809749585d 100644
--- a/include/drm/ttm/ttm_placement.h
+++ b/include/drm/ttm/ttm_placement.h
@@ -48,6 +48,8 @@
* placement that can handle such scenarios is a good idea.
*/
+struct ttm_operation_ctx;
+
#define TTM_PL_SYSTEM 0
#define TTM_PL_TT 1
#define TTM_PL_VRAM 2
@@ -70,6 +72,15 @@
/* Placement is only used during eviction */
#define TTM_PL_FLAG_FALLBACK (1 << 4)
+/* Placement can only be used if threshold of moved bytes is reached */
+#define TTM_PL_FLAG_MOVE_THRESHOLD (1 << 5)
+
+/* Placement flags which depend on TTMs operation ctx. Fulfilling any flag is
+ * enough to consider the placement applicable.
+ */
+#define TTM_PL_FLAG_CTX_MASK (TTM_PL_FLAG_DESIRED | TTM_PL_FLAG_FALLBACK | \
+ TTM_PL_FLAG_MOVE_THRESHOLD)
+
/**
* struct ttm_place
*
@@ -100,4 +111,8 @@ struct ttm_placement {
const struct ttm_place *placement;
};
+bool ttm_place_applicable(const struct ttm_place *place,
+ struct ttm_operation_ctx *ctx,
+ bool evicting);
+
#endif
diff --git a/include/drm/ttm/ttm_resource.h b/include/drm/ttm/ttm_resource.h
index 69769355139f..6ca6b7b82fb8 100644
--- a/include/drm/ttm/ttm_resource.h
+++ b/include/drm/ttm/ttm_resource.h
@@ -44,6 +44,7 @@ struct ttm_resource;
struct ttm_place;
struct ttm_buffer_object;
struct ttm_placement;
+struct ttm_operation_ctx;
struct iosys_map;
struct io_mapping;
struct sg_table;
@@ -370,6 +371,7 @@ bool ttm_resource_intersects(struct ttm_device *bdev,
size_t size);
bool ttm_resource_compatible(struct ttm_resource *res,
struct ttm_placement *placement,
+ struct ttm_operation_ctx *ctx,
bool evicting);
void ttm_resource_set_bo(struct ttm_resource *res,
struct ttm_buffer_object *bo);
--
2.34.1
^ permalink raw reply related [flat|nested] 14+ messages in thread* Re: [PATCH 2/6] drm/ttm: add TTM_PL_FLAG_TRESHOLD
2024-06-04 16:04 ` [PATCH 2/6] drm/ttm: add TTM_PL_FLAG_TRESHOLD Christian König
@ 2024-06-05 1:12 ` kernel test robot
0 siblings, 0 replies; 14+ messages in thread
From: kernel test robot @ 2024-06-05 1:12 UTC (permalink / raw)
To: Christian König, tursulin, friedrich.vock
Cc: oe-kbuild-all, dri-devel, amd-gfx
Hi Christian,
kernel test robot noticed the following build warnings:
[auto build test WARNING on drm/drm-next]
[also build test WARNING on drm-exynos/exynos-drm-next drm-intel/for-linux-next drm-intel/for-linux-next-fixes drm-misc/drm-misc-next drm-tip/drm-tip linus/master v6.10-rc2 next-20240604]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Christian-K-nig/drm-ttm-add-TTM_PL_FLAG_TRESHOLD/20240605-040913
base: git://anongit.freedesktop.org/drm/drm drm-next
patch link: https://lore.kernel.org/r/20240604160503.43359-3-christian.koenig%40amd.com
patch subject: [PATCH 2/6] drm/ttm: add TTM_PL_FLAG_TRESHOLD
config: parisc-defconfig (https://download.01.org/0day-ci/archive/20240605/202406050819.et54U72l-lkp@intel.com/config)
compiler: hppa-linux-gcc (GCC) 13.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240605/202406050819.et54U72l-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202406050819.et54U72l-lkp@intel.com/
All warnings (new ones prefixed by >>):
>> drivers/gpu/drm/ttm/ttm_resource.c:339: warning: Function parameter or struct member 'ctx' not described in 'ttm_resource_compatible'
vim +339 drivers/gpu/drm/ttm/ttm_resource.c
46299051794a9c Christian König 2024-06-04 325
544432703b2fe7 Arunpravin Paneer Selvam 2022-08-20 326 /**
a78a8da51b36c7 Somalapuram Amaranath 2023-11-13 327 * ttm_resource_compatible - check if resource is compatible with placement
544432703b2fe7 Arunpravin Paneer Selvam 2022-08-20 328 *
a78a8da51b36c7 Somalapuram Amaranath 2023-11-13 329 * @res: the resource to check
a78a8da51b36c7 Somalapuram Amaranath 2023-11-13 330 * @placement: the placement to check against
cc941c70df3927 Christian König 2023-12-06 331 * @evicting: true if the caller is doing evictions
544432703b2fe7 Arunpravin Paneer Selvam 2022-08-20 332 *
a78a8da51b36c7 Somalapuram Amaranath 2023-11-13 333 * Returns true if the placement is compatible.
544432703b2fe7 Arunpravin Paneer Selvam 2022-08-20 334 */
a78a8da51b36c7 Somalapuram Amaranath 2023-11-13 335 bool ttm_resource_compatible(struct ttm_resource *res,
cc941c70df3927 Christian König 2023-12-06 336 struct ttm_placement *placement,
46299051794a9c Christian König 2024-06-04 337 struct ttm_operation_ctx *ctx,
cc941c70df3927 Christian König 2023-12-06 338 bool evicting)
98cca519df6da6 Christian König 2021-08-30 @339 {
544432703b2fe7 Arunpravin Paneer Selvam 2022-08-20 340 struct ttm_buffer_object *bo = res->bo;
544432703b2fe7 Arunpravin Paneer Selvam 2022-08-20 341 struct ttm_device *bdev = bo->bdev;
98cca519df6da6 Christian König 2021-08-30 342 unsigned i;
98cca519df6da6 Christian König 2021-08-30 343
98cca519df6da6 Christian König 2021-08-30 344 if (res->placement & TTM_PL_FLAG_TEMPORARY)
98cca519df6da6 Christian König 2021-08-30 345 return false;
98cca519df6da6 Christian König 2021-08-30 346
a78a8da51b36c7 Somalapuram Amaranath 2023-11-13 347 for (i = 0; i < placement->num_placement; i++) {
a78a8da51b36c7 Somalapuram Amaranath 2023-11-13 348 const struct ttm_place *place = &placement->placement[i];
a78a8da51b36c7 Somalapuram Amaranath 2023-11-13 349 struct ttm_resource_manager *man;
a78a8da51b36c7 Somalapuram Amaranath 2023-11-13 350
a78a8da51b36c7 Somalapuram Amaranath 2023-11-13 351 if (res->mem_type != place->mem_type)
a78a8da51b36c7 Somalapuram Amaranath 2023-11-13 352 continue;
98cca519df6da6 Christian König 2021-08-30 353
46299051794a9c Christian König 2024-06-04 354 if (!ttm_place_applicable(place, ctx, evicting))
cc941c70df3927 Christian König 2023-12-06 355 continue;
cc941c70df3927 Christian König 2023-12-06 356
cc941c70df3927 Christian König 2023-12-06 357 if (place->flags & TTM_PL_FLAG_CONTIGUOUS &&
cc941c70df3927 Christian König 2023-12-06 358 !(res->placement & TTM_PL_FLAG_CONTIGUOUS))
cc941c70df3927 Christian König 2023-12-06 359 continue;
cc941c70df3927 Christian König 2023-12-06 360
a78a8da51b36c7 Somalapuram Amaranath 2023-11-13 361 man = ttm_manager_type(bdev, res->mem_type);
a78a8da51b36c7 Somalapuram Amaranath 2023-11-13 362 if (man->func->compatible &&
a78a8da51b36c7 Somalapuram Amaranath 2023-11-13 363 !man->func->compatible(man, res, place, bo->base.size))
98cca519df6da6 Christian König 2021-08-30 364 continue;
98cca519df6da6 Christian König 2021-08-30 365
98cca519df6da6 Christian König 2021-08-30 366 return true;
98cca519df6da6 Christian König 2021-08-30 367 }
98cca519df6da6 Christian König 2021-08-30 368 return false;
98cca519df6da6 Christian König 2021-08-30 369 }
98cca519df6da6 Christian König 2021-08-30 370
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH 3/6] drm/amdgpu: enable GTT fallback handling for dGPUs only
2024-06-04 16:04 Rate limit improvements for TTM Christian König
2024-06-04 16:04 ` [PATCH 1/6] drm/amdgpu: cleanup MES command submission Christian König
2024-06-04 16:04 ` [PATCH 2/6] drm/ttm: add TTM_PL_FLAG_TRESHOLD Christian König
@ 2024-06-04 16:05 ` Christian König
2024-11-12 15:12 ` Alex Deucher
2024-06-04 16:05 ` [PATCH 4/6] drm/amdgpu: re-order AMDGPU_GEM_DOMAIN_DOORBELL handling Christian König
` (2 subsequent siblings)
5 siblings, 1 reply; 14+ messages in thread
From: Christian König @ 2024-06-04 16:05 UTC (permalink / raw)
To: tursulin, friedrich.vock; +Cc: dri-devel, amd-gfx
That is just a waste of time on APUs.
Signed-off-by: Christian König <christian.koenig@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 8d8c39be6129..f7b534c55c43 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -180,7 +180,8 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
* When GTT is just an alternative to VRAM make sure that we
* only use it as fallback and still try to fill up VRAM first.
*/
- if (domain & abo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM)
+ if (domain & abo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM &&
+ !(adev->flags & AMD_IS_APU))
places[c].flags |= TTM_PL_FLAG_FALLBACK;
c++;
}
--
2.34.1
^ permalink raw reply related [flat|nested] 14+ messages in thread* Re: [PATCH 3/6] drm/amdgpu: enable GTT fallback handling for dGPUs only
2024-06-04 16:05 ` [PATCH 3/6] drm/amdgpu: enable GTT fallback handling for dGPUs only Christian König
@ 2024-11-12 15:12 ` Alex Deucher
0 siblings, 0 replies; 14+ messages in thread
From: Alex Deucher @ 2024-11-12 15:12 UTC (permalink / raw)
To: Christian König; +Cc: tursulin, friedrich.vock, dri-devel, amd-gfx
On Tue, Jun 4, 2024 at 12:12 PM Christian König
<ckoenig.leichtzumerken@gmail.com> wrote:
>
> That is just a waste of time on APUs.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3704
Fixes: 216c1282dde3 ("drm/amdgpu: use GTT only as fallback for VRAM|GTT")
and pushed.
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 3 ++-
> 1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index 8d8c39be6129..f7b534c55c43 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -180,7 +180,8 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
> * When GTT is just an alternative to VRAM make sure that we
> * only use it as fallback and still try to fill up VRAM first.
> */
> - if (domain & abo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM)
> + if (domain & abo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM &&
> + !(adev->flags & AMD_IS_APU))
> places[c].flags |= TTM_PL_FLAG_FALLBACK;
> c++;
> }
> --
> 2.34.1
>
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH 4/6] drm/amdgpu: re-order AMDGPU_GEM_DOMAIN_DOORBELL handling
2024-06-04 16:04 Rate limit improvements for TTM Christian König
` (2 preceding siblings ...)
2024-06-04 16:05 ` [PATCH 3/6] drm/amdgpu: enable GTT fallback handling for dGPUs only Christian König
@ 2024-06-04 16:05 ` Christian König
2024-06-04 16:05 ` [PATCH 5/6] drm/amdgpu: always enable move threshold for BOs Christian König
2024-06-04 16:05 ` [PATCH 6/6] drm/amdgpu: Re-validate evicted buffers v2 Christian König
5 siblings, 0 replies; 14+ messages in thread
From: Christian König @ 2024-06-04 16:05 UTC (permalink / raw)
To: tursulin, friedrich.vock; +Cc: dri-devel, amd-gfx
That should probably come last.
Signed-off-by: Christian König <christian.koenig@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index f7b534c55c43..8c92065c2d52 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -161,14 +161,6 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
c++;
}
- if (domain & AMDGPU_GEM_DOMAIN_DOORBELL) {
- places[c].fpfn = 0;
- places[c].lpfn = 0;
- places[c].mem_type = AMDGPU_PL_DOORBELL;
- places[c].flags = 0;
- c++;
- }
-
if (domain & AMDGPU_GEM_DOMAIN_GTT) {
places[c].fpfn = 0;
places[c].lpfn = 0;
@@ -218,6 +210,14 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
c++;
}
+ if (domain & AMDGPU_GEM_DOMAIN_DOORBELL) {
+ places[c].fpfn = 0;
+ places[c].lpfn = 0;
+ places[c].mem_type = AMDGPU_PL_DOORBELL;
+ places[c].flags = 0;
+ c++;
+ }
+
if (!c) {
places[c].fpfn = 0;
places[c].lpfn = 0;
--
2.34.1
^ permalink raw reply related [flat|nested] 14+ messages in thread* [PATCH 5/6] drm/amdgpu: always enable move threshold for BOs
2024-06-04 16:04 Rate limit improvements for TTM Christian König
` (3 preceding siblings ...)
2024-06-04 16:05 ` [PATCH 4/6] drm/amdgpu: re-order AMDGPU_GEM_DOMAIN_DOORBELL handling Christian König
@ 2024-06-04 16:05 ` Christian König
2024-06-11 16:24 ` Tvrtko Ursulin
2024-06-04 16:05 ` [PATCH 6/6] drm/amdgpu: Re-validate evicted buffers v2 Christian König
5 siblings, 1 reply; 14+ messages in thread
From: Christian König @ 2024-06-04 16:05 UTC (permalink / raw)
To: tursulin, friedrich.vock; +Cc: dri-devel, amd-gfx
This should prevent buffer moves when the threshold is reached during
CS.
Signed-off-by: Christian König <christian.koenig@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 36 ++++++++--------------
drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 22 +++++++++----
2 files changed, 29 insertions(+), 29 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index ec888fc6ead8..9a217932a4fc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -784,7 +784,6 @@ static int amdgpu_cs_bo_validate(void *param, struct amdgpu_bo *bo)
.no_wait_gpu = false,
.resv = bo->tbo.base.resv
};
- uint32_t domain;
int r;
if (bo->tbo.pin_count)
@@ -796,37 +795,28 @@ static int amdgpu_cs_bo_validate(void *param, struct amdgpu_bo *bo)
if (p->bytes_moved < p->bytes_moved_threshold &&
(!bo->tbo.base.dma_buf ||
list_empty(&bo->tbo.base.dma_buf->attachments))) {
+
+ /* And don't move a CPU_ACCESS_REQUIRED BO to limited
+ * visible VRAM if we've depleted our allowance to do
+ * that.
+ */
if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
- (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) {
- /* And don't move a CPU_ACCESS_REQUIRED BO to limited
- * visible VRAM if we've depleted our allowance to do
- * that.
- */
- if (p->bytes_moved_vis < p->bytes_moved_vis_threshold)
- domain = bo->preferred_domains;
- else
- domain = bo->allowed_domains;
- } else {
- domain = bo->preferred_domains;
- }
- } else {
- domain = bo->allowed_domains;
+ (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) &&
+ p->bytes_moved_vis < p->bytes_moved_vis_threshold)
+ ctx.move_threshold = p->bytes_moved_vis_threshold -
+ p->bytes_moved_vis;
+ else
+ ctx.move_threshold = p->bytes_moved_vis_threshold -
+ p->bytes_moved;
}
-retry:
- amdgpu_bo_placement_from_domain(bo, domain);
+ amdgpu_bo_placement_from_domain(bo, bo->allowed_domains);
r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
p->bytes_moved += ctx.bytes_moved;
if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
amdgpu_res_cpu_visible(adev, bo->tbo.resource))
p->bytes_moved_vis += ctx.bytes_moved;
-
- if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) {
- domain = bo->allowed_domains;
- goto retry;
- }
-
return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 8c92065c2d52..cae1a5420c58 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -168,13 +168,23 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
abo->flags & AMDGPU_GEM_CREATE_PREEMPTIBLE ?
AMDGPU_PL_PREEMPT : TTM_PL_TT;
places[c].flags = 0;
- /*
- * When GTT is just an alternative to VRAM make sure that we
- * only use it as fallback and still try to fill up VRAM first.
- */
+
if (domain & abo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM &&
- !(adev->flags & AMD_IS_APU))
- places[c].flags |= TTM_PL_FLAG_FALLBACK;
+ !(adev->flags & AMD_IS_APU)) {
+ /*
+ * When GTT is just an alternative to VRAM make sure that we
+ * only use it as fallback and still try to fill up VRAM first.
+ */
+ if (abo->preferred_domains & AMDGPU_GEM_DOMAIN_GTT)
+ places[c].flags |= TTM_PL_FLAG_FALLBACK;
+
+ /*
+ * Enable GTT when the threshold of moved bytes is
+ * reached. This prevents any non essential buffer move
+ * when the links are already saturated.
+ */
+ places[c].flags |= TTM_PL_FLAG_MOVE_THRESHOLD;
+ }
c++;
}
--
2.34.1
^ permalink raw reply related [flat|nested] 14+ messages in thread* Re: [PATCH 5/6] drm/amdgpu: always enable move threshold for BOs
2024-06-04 16:05 ` [PATCH 5/6] drm/amdgpu: always enable move threshold for BOs Christian König
@ 2024-06-11 16:24 ` Tvrtko Ursulin
2024-06-14 9:53 ` Christian König
0 siblings, 1 reply; 14+ messages in thread
From: Tvrtko Ursulin @ 2024-06-11 16:24 UTC (permalink / raw)
To: Christian König, friedrich.vock; +Cc: dri-devel, amd-gfx
Hi Christian,
On 04/06/2024 17:05, Christian König wrote:
> This should prevent buffer moves when the threshold is reached during
> CS.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 36 ++++++++--------------
> drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 22 +++++++++----
> 2 files changed, 29 insertions(+), 29 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index ec888fc6ead8..9a217932a4fc 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -784,7 +784,6 @@ static int amdgpu_cs_bo_validate(void *param, struct amdgpu_bo *bo)
> .no_wait_gpu = false,
> .resv = bo->tbo.base.resv
> };
> - uint32_t domain;
> int r;
>
> if (bo->tbo.pin_count)
> @@ -796,37 +795,28 @@ static int amdgpu_cs_bo_validate(void *param, struct amdgpu_bo *bo)
> if (p->bytes_moved < p->bytes_moved_threshold &&
> (!bo->tbo.base.dma_buf ||
> list_empty(&bo->tbo.base.dma_buf->attachments))) {
> +
> + /* And don't move a CPU_ACCESS_REQUIRED BO to limited
> + * visible VRAM if we've depleted our allowance to do
> + * that.
> + */
> if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
> - (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) {
> - /* And don't move a CPU_ACCESS_REQUIRED BO to limited
> - * visible VRAM if we've depleted our allowance to do
> - * that.
> - */
> - if (p->bytes_moved_vis < p->bytes_moved_vis_threshold)
> - domain = bo->preferred_domains;
> - else
> - domain = bo->allowed_domains;
> - } else {
> - domain = bo->preferred_domains;
> - }
> - } else {
> - domain = bo->allowed_domains;
> + (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) &&
> + p->bytes_moved_vis < p->bytes_moved_vis_threshold)
> + ctx.move_threshold = p->bytes_moved_vis_threshold -
> + p->bytes_moved_vis;
> + else
> + ctx.move_threshold = p->bytes_moved_vis_threshold -
> + p->bytes_moved;
> }
>
> -retry:
> - amdgpu_bo_placement_from_domain(bo, domain);
> + amdgpu_bo_placement_from_domain(bo, bo->allowed_domains);
> r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
>
> p->bytes_moved += ctx.bytes_moved;
> if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
> amdgpu_res_cpu_visible(adev, bo->tbo.resource))
> p->bytes_moved_vis += ctx.bytes_moved;
> -
> - if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) {
> - domain = bo->allowed_domains;
> - goto retry;
> - }
> -
> return r;
> }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index 8c92065c2d52..cae1a5420c58 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -168,13 +168,23 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
> abo->flags & AMDGPU_GEM_CREATE_PREEMPTIBLE ?
> AMDGPU_PL_PREEMPT : TTM_PL_TT;
> places[c].flags = 0;
> - /*
> - * When GTT is just an alternative to VRAM make sure that we
> - * only use it as fallback and still try to fill up VRAM first.
> - */
> +
> if (domain & abo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM &&
> - !(adev->flags & AMD_IS_APU))
> - places[c].flags |= TTM_PL_FLAG_FALLBACK;
> + !(adev->flags & AMD_IS_APU)) {
> + /*
> + * When GTT is just an alternative to VRAM make sure that we
> + * only use it as fallback and still try to fill up VRAM first.
> + */
> + if (abo->preferred_domains & AMDGPU_GEM_DOMAIN_GTT)
> + places[c].flags |= TTM_PL_FLAG_FALLBACK;
> +
> + /*
> + * Enable GTT when the threshold of moved bytes is
> + * reached. This prevents any non essential buffer move
> + * when the links are already saturated.
> + */
> + places[c].flags |= TTM_PL_FLAG_MOVE_THRESHOLD;
> + }
For the APU case I *think* this works, but for discrete I am not sure yet.
As a side note and disclaimer, the TTM "resource compatible" logic has a
half-life of about one week in my brain until I need to almost re-figure
it all out. I don't know if it just me, but I find it really
non-intuitive and almost like double, triple, or even quadruple negation
way of thinking about things.
It is not helping that with this proposal you set threshold on just one
of the possible object placements which further increases the asymmetry.
For me intuitive thing would be that thresholds apply to the act of
changing the current placement directly. Not indirectly via playing with
one of the placement flags dynamically.
Anyway, lets see.. So you set TTM_PL_FLAG_MOVE_THRESHOLD and
TTM_PL_FLAG_FALLBACK on the GTT placement, with the logic that it will
be considered compatible while under the migration budget?
(Side note, the fact both flags are set I also find very difficult to
mentally model.)
Say a buffer was evicted to GTT already. What then brings it back to VRAM?
The first subsequent ttm_bo_validate pass (!evicting) says GTT is fine
(applicable) while ctx->bytes_moved < ctx->move_threshold, no? Isn't
that the opposite of what would be required and causes nothing to be
migrated back in? What am I missing?
Regards,
Tvrtko
^ permalink raw reply [flat|nested] 14+ messages in thread* Re: [PATCH 5/6] drm/amdgpu: always enable move threshold for BOs
2024-06-11 16:24 ` Tvrtko Ursulin
@ 2024-06-14 9:53 ` Christian König
2024-06-14 16:06 ` Tvrtko Ursulin
0 siblings, 1 reply; 14+ messages in thread
From: Christian König @ 2024-06-14 9:53 UTC (permalink / raw)
To: Tvrtko Ursulin, friedrich.vock; +Cc: dri-devel, amd-gfx
>> if (domain & abo->preferred_domains &
>> AMDGPU_GEM_DOMAIN_VRAM &&
>> - !(adev->flags & AMD_IS_APU))
>> - places[c].flags |= TTM_PL_FLAG_FALLBACK;
>> + !(adev->flags & AMD_IS_APU)) {
>> + /*
>> + * When GTT is just an alternative to VRAM make sure
>> that we
>> + * only use it as fallback and still try to fill up VRAM
>> first.
>> + */
>> + if (abo->preferred_domains & AMDGPU_GEM_DOMAIN_GTT)
>> + places[c].flags |= TTM_PL_FLAG_FALLBACK;
>> +
>> + /*
>> + * Enable GTT when the threshold of moved bytes is
>> + * reached. This prevents any non essential buffer move
>> + * when the links are already saturated.
>> + */
>> + places[c].flags |= TTM_PL_FLAG_MOVE_THRESHOLD;
>> + }
>
> For the APU case I *think* this works, but for discrete I am not sure
> yet.
Agree, APUs are basically already fine as they are. VRAM is just used so
that it isn't wasted there.
>
> As a side note and disclaimer, the TTM "resource compatible" logic has
> a half-life of about one week in my brain until I need to almost
> re-figure it all out. I don't know if it just me, but I find it really
> non-intuitive and almost like double, triple, or even quadruple
> negation way of thinking about things.
Yeah I was also going back and forth between the different approaches
multiple times and just ended up in this implementation because it
seemed to do what I wanted to have.
It's certainly not very intuitive what's going on here.
>
> It is not helping that with this proposal you set threshold on just
> one of the possible object placements which further increases the
> asymmetry. For me intuitive thing would be that thresholds apply to
> the act of changing the current placement directly. Not indirectly via
> playing with one of the placement flags dynamically.
Interesting idea, how would the handling then be? Currently we have only
the stages - 'don't evict' and 'evict'. Should we make it something more
like 'don't move', 'move', 'evict' ?
>
>
> Anyway, lets see.. So you set TTM_PL_FLAG_MOVE_THRESHOLD and
> TTM_PL_FLAG_FALLBACK on the GTT placement, with the logic that it will
> be considered compatible while under the migration budget?
>
> (Side note, the fact both flags are set I also find very difficult to
> mentally model.)
>
> Say a buffer was evicted to GTT already. What then brings it back to
> VRAM?
>
> The first subsequent ttm_bo_validate pass (!evicting) says GTT is fine
> (applicable) while ctx->bytes_moved < ctx->move_threshold, no? Isn't
> that the opposite of what would be required and causes nothing to be
> migrated back in? What am I missing?
The flag says that GTT is fine when ctx->bytes_moved >=
ctx->move_threshold. The logic is exactly inverted to what you described.
This way a BO will be moved back into VRAM as long as bytes moved
doesn't exceed the threshold.
Setting both flags has the effect of saying: It's ok that the BO stays
in GTT when you either above the move threshold or would have to evict
something.
Regards,
Christian.
>
> Regards,
>
> Tvrtko
^ permalink raw reply [flat|nested] 14+ messages in thread* Re: [PATCH 5/6] drm/amdgpu: always enable move threshold for BOs
2024-06-14 9:53 ` Christian König
@ 2024-06-14 16:06 ` Tvrtko Ursulin
2024-06-28 8:13 ` Tvrtko Ursulin
0 siblings, 1 reply; 14+ messages in thread
From: Tvrtko Ursulin @ 2024-06-14 16:06 UTC (permalink / raw)
To: Christian König, friedrich.vock; +Cc: dri-devel, amd-gfx
On 14/06/2024 10:53, Christian König wrote:
>
>>> if (domain & abo->preferred_domains &
>>> AMDGPU_GEM_DOMAIN_VRAM &&
>>> - !(adev->flags & AMD_IS_APU))
>>> - places[c].flags |= TTM_PL_FLAG_FALLBACK;
>>> + !(adev->flags & AMD_IS_APU)) {
>>> + /*
>>> + * When GTT is just an alternative to VRAM make sure
>>> that we
>>> + * only use it as fallback and still try to fill up VRAM
>>> first.
>>> + */
>>> + if (abo->preferred_domains & AMDGPU_GEM_DOMAIN_GTT)
>>> + places[c].flags |= TTM_PL_FLAG_FALLBACK;
>>> +
>>> + /*
>>> + * Enable GTT when the threshold of moved bytes is
>>> + * reached. This prevents any non essential buffer move
>>> + * when the links are already saturated.
>>> + */
>>> + places[c].flags |= TTM_PL_FLAG_MOVE_THRESHOLD;
>>> + }
>>
>> For the APU case I *think* this works, but for discrete I am not sure
>> yet.
>
> Agree, APUs are basically already fine as they are. VRAM is just used so
> that it isn't wasted there.
Well yeah it works, but because re-validation is broken so it cannot hit
the broken migration budget. ;)
>> As a side note and disclaimer, the TTM "resource compatible" logic has
>> a half-life of about one week in my brain until I need to almost
>> re-figure it all out. I don't know if it just me, but I find it really
>> non-intuitive and almost like double, triple, or even quadruple
>> negation way of thinking about things.
>
> Yeah I was also going back and forth between the different approaches
> multiple times and just ended up in this implementation because it
> seemed to do what I wanted to have.
>
> It's certainly not very intuitive what's going on here.
>
>>
>> It is not helping that with this proposal you set threshold on just
>> one of the possible object placements which further increases the
>> asymmetry. For me intuitive thing would be that thresholds apply to
>> the act of changing the current placement directly. Not indirectly via
>> playing with one of the placement flags dynamically.
>
> Interesting idea, how would the handling then be? Currently we have only
> the stages - 'don't evict' and 'evict'. Should we make it something more
> like 'don't move', 'move', 'evict' ?
Intuitively I would think "don't move" aligns with the "out of migration
budget" concept.
Since in this patch you add move_threshold to ttm_operation_context
could it simply be used as the overall criteria if it is set?
In a way like:
1. If the current placement is from the list of userspace supplied
valid ones, and
2. Migration limit has been set, and
3. It is spent.
-> Then just don't migrate, return "all is good" from ttm_bo_validate.
Though I am not sure at the moment how that would interact with the
amdgpu_evict_flags and placements userspace did not specify.
>> Anyway, lets see.. So you set TTM_PL_FLAG_MOVE_THRESHOLD and
>> TTM_PL_FLAG_FALLBACK on the GTT placement, with the logic that it will
>> be considered compatible while under the migration budget?
>>
>> (Side note, the fact both flags are set I also find very difficult to
>> mentally model.)
>>
>> Say a buffer was evicted to GTT already. What then brings it back to
>> VRAM?
>>
>> The first subsequent ttm_bo_validate pass (!evicting) says GTT is fine
>> (applicable) while ctx->bytes_moved < ctx->move_threshold, no? Isn't
>> that the opposite of what would be required and causes nothing to be
>> migrated back in? What am I missing?
>
> The flag says that GTT is fine when ctx->bytes_moved >=
> ctx->move_threshold. The logic is exactly inverted to what you described.
>
> This way a BO will be moved back into VRAM as long as bytes moved
> doesn't exceed the threshold.
I'm afraid I need to sketch it out... If buffer is currently in GTT and
placements are VRAM+GTT.
ttm_bo_validate(evicting=false)
1st iteration:
res=GTT != place=VRAM
continue
2nd iteration:
res=GTT == place=GTT+FALLBACK+THRESHOLD
ttm_place_applicable(GTT)
moved < threshold
return true
Buffer stays in GTT while under migration budget -> wrong, no? Or am I
still confused?
Regards,
Tvrtko
> Setting both flags has the effect of saying: It's ok that the BO stays
> in GTT when you either above the move threshold or would have to evict
> something.
>
> Regards,
> Christian.
>
>>
>> Regards,
>>
>> Tvrtko
>
^ permalink raw reply [flat|nested] 14+ messages in thread* Re: [PATCH 5/6] drm/amdgpu: always enable move threshold for BOs
2024-06-14 16:06 ` Tvrtko Ursulin
@ 2024-06-28 8:13 ` Tvrtko Ursulin
0 siblings, 0 replies; 14+ messages in thread
From: Tvrtko Ursulin @ 2024-06-28 8:13 UTC (permalink / raw)
To: Christian König, friedrich.vock; +Cc: dri-devel, amd-gfx
Hey Christian,
Any thoughts on the below reply? Did I get it wrong or I found a
legitimate issue?
Regards,
Tvrtko
On 14/06/2024 17:06, Tvrtko Ursulin wrote:
>
> On 14/06/2024 10:53, Christian König wrote:
>>
>>>> if (domain & abo->preferred_domains &
>>>> AMDGPU_GEM_DOMAIN_VRAM &&
>>>> - !(adev->flags & AMD_IS_APU))
>>>> - places[c].flags |= TTM_PL_FLAG_FALLBACK;
>>>> + !(adev->flags & AMD_IS_APU)) {
>>>> + /*
>>>> + * When GTT is just an alternative to VRAM make sure
>>>> that we
>>>> + * only use it as fallback and still try to fill up
>>>> VRAM first.
>>>> + */
>>>> + if (abo->preferred_domains & AMDGPU_GEM_DOMAIN_GTT)
>>>> + places[c].flags |= TTM_PL_FLAG_FALLBACK;
>>>> +
>>>> + /*
>>>> + * Enable GTT when the threshold of moved bytes is
>>>> + * reached. This prevents any non essential buffer move
>>>> + * when the links are already saturated.
>>>> + */
>>>> + places[c].flags |= TTM_PL_FLAG_MOVE_THRESHOLD;
>>>> + }
>>>
>>> For the APU case I *think* this works, but for discrete I am not sure
>>> yet.
>>
>> Agree, APUs are basically already fine as they are. VRAM is just used
>> so that it isn't wasted there.
>
> Well yeah it works, but because re-validation is broken so it cannot hit
> the broken migration budget. ;)
>
>>> As a side note and disclaimer, the TTM "resource compatible" logic
>>> has a half-life of about one week in my brain until I need to almost
>>> re-figure it all out. I don't know if it just me, but I find it
>>> really non-intuitive and almost like double, triple, or even
>>> quadruple negation way of thinking about things.
>>
>> Yeah I was also going back and forth between the different approaches
>> multiple times and just ended up in this implementation because it
>> seemed to do what I wanted to have.
>>
>> It's certainly not very intuitive what's going on here.
>>
>>>
>>> It is not helping that with this proposal you set threshold on just
>>> one of the possible object placements which further increases the
>>> asymmetry. For me intuitive thing would be that thresholds apply to
>>> the act of changing the current placement directly. Not indirectly
>>> via playing with one of the placement flags dynamically.
>>
>> Interesting idea, how would the handling then be? Currently we have
>> only the stages - 'don't evict' and 'evict'. Should we make it
>> something more like 'don't move', 'move', 'evict' ?
>
> Intuitively I would think "don't move" aligns with the "out of migration
> budget" concept.
>
> Since in this patch you add move_threshold to ttm_operation_context
> could it simply be used as the overall criteria if it is set?
>
> In a way like:
>
> 1. If the current placement is from the list of userspace supplied
> valid ones, and
> 2. Migration limit has been set, and
> 3. It is spent.
>
> -> Then just don't migrate, return "all is good" from ttm_bo_validate.
>
> Though I am not sure at the moment how that would interact with the
> amdgpu_evict_flags and placements userspace did not specify.
>
>>> Anyway, lets see.. So you set TTM_PL_FLAG_MOVE_THRESHOLD and
>>> TTM_PL_FLAG_FALLBACK on the GTT placement, with the logic that it
>>> will be considered compatible while under the migration budget?
>>>
>>> (Side note, the fact both flags are set I also find very difficult to
>>> mentally model.)
>>>
>>> Say a buffer was evicted to GTT already. What then brings it back to
>>> VRAM?
>>>
>>> The first subsequent ttm_bo_validate pass (!evicting) says GTT is
>>> fine (applicable) while ctx->bytes_moved < ctx->move_threshold, no?
>>> Isn't that the opposite of what would be required and causes nothing
>>> to be migrated back in? What am I missing?
>>
>> The flag says that GTT is fine when ctx->bytes_moved >=
>> ctx->move_threshold. The logic is exactly inverted to what you described.
>>
>> This way a BO will be moved back into VRAM as long as bytes moved
>> doesn't exceed the threshold.
>
> I'm afraid I need to sketch it out... If buffer is currently in GTT and
> placements are VRAM+GTT.
>
> ttm_bo_validate(evicting=false)
>
> 1st iteration:
> res=GTT != place=VRAM
> continue
>
> 2nd iteration:
> res=GTT == place=GTT+FALLBACK+THRESHOLD
>
> ttm_place_applicable(GTT)
> moved < threshold
> return true
>
> Buffer stays in GTT while under migration budget -> wrong, no? Or am I
> still confused?
>
> Regards,
>
> Tvrtko
>
>> Setting both flags has the effect of saying: It's ok that the BO stays
>> in GTT when you either above the move threshold or would have to evict
>> something.
>>
>> Regards,
>> Christian.
>>
>>>
>>> Regards,
>>>
>>> Tvrtko
>>
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH 6/6] drm/amdgpu: Re-validate evicted buffers v2
2024-06-04 16:04 Rate limit improvements for TTM Christian König
` (4 preceding siblings ...)
2024-06-04 16:05 ` [PATCH 5/6] drm/amdgpu: always enable move threshold for BOs Christian König
@ 2024-06-04 16:05 ` Christian König
2024-08-13 15:07 ` Tvrtko Ursulin
5 siblings, 1 reply; 14+ messages in thread
From: Christian König @ 2024-06-04 16:05 UTC (permalink / raw)
To: tursulin, friedrich.vock; +Cc: dri-devel, amd-gfx
From: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
Currently the driver appears to be thinking that it will be attempting to
re-validate the evicted buffers on the next submission if they are not in
their preferred placement.
That however appears not to be true for the very common case of buffers
with allowed placements of VRAM+GTT. Simply because the check can only
detect if the current placement is *none* of the preferred ones, happily
leaving VRAM+GTT buffers in the GTT placement "forever".
Fix it by extending the VRAM+GTT special case to the re-validation logic.
v2: re-work the criteria to consider if something is in it's preferred
placement or not and also disable the handling on APUs.
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
Signed-off-by: Christian König <christian.koenig@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 4e2391c83d7c..1a470dafa93d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1242,15 +1242,15 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
return r;
}
- /* If the BO is not in its preferred location add it back to
- * the evicted list so that it gets validated again on the
- * next command submission.
- */
if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv) {
- uint32_t mem_type = bo->tbo.resource->mem_type;
-
- if (!(bo->preferred_domains &
- amdgpu_mem_type_to_domain(mem_type)))
+ /*
+ * If the preferred location is VRAM but we placed it into GTT
+ * add it back to the evicted list so that it gets validated
+ * again on the next command submission.
+ */
+ if (!(adev->flags & AMD_IS_APU) &&
+ bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM &&
+ bo->tbo.resource->mem_type != TTM_PL_VRAM)
amdgpu_vm_bo_evicted(&bo_va->base);
else
amdgpu_vm_bo_idle(&bo_va->base);
--
2.34.1
^ permalink raw reply related [flat|nested] 14+ messages in thread* Re: [PATCH 6/6] drm/amdgpu: Re-validate evicted buffers v2
2024-06-04 16:05 ` [PATCH 6/6] drm/amdgpu: Re-validate evicted buffers v2 Christian König
@ 2024-08-13 15:07 ` Tvrtko Ursulin
0 siblings, 0 replies; 14+ messages in thread
From: Tvrtko Ursulin @ 2024-08-13 15:07 UTC (permalink / raw)
To: Christian König, friedrich.vock; +Cc: dri-devel, amd-gfx
I was waiting for some replies elsewhere on this thread. Anwyay.. for
the below, because I don't understand how come an important fix like
this is not garnering more attention:
On 04/06/2024 17:05, Christian König wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
Since you pretty much changed my logic you could also "demote" me to
Reported-by:. In which case I could theoretically even provide an r-b. :)
>
> Currently the driver appears to be thinking that it will be attempting to
> re-validate the evicted buffers on the next submission if they are not in
> their preferred placement.
>
> That however appears not to be true for the very common case of buffers
> with allowed placements of VRAM+GTT. Simply because the check can only
> detect if the current placement is *none* of the preferred ones, happily
> leaving VRAM+GTT buffers in the GTT placement "forever".
>
> Fix it by extending the VRAM+GTT special case to the re-validation logic.
>
> v2: re-work the criteria to consider if something is in it's preferred
> placement or not and also disable the handling on APUs.
>
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 16 ++++++++--------
> 1 file changed, 8 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 4e2391c83d7c..1a470dafa93d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -1242,15 +1242,15 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
> return r;
> }
>
> - /* If the BO is not in its preferred location add it back to
> - * the evicted list so that it gets validated again on the
> - * next command submission.
> - */
> if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv) {
> - uint32_t mem_type = bo->tbo.resource->mem_type;
> -
> - if (!(bo->preferred_domains &
> - amdgpu_mem_type_to_domain(mem_type)))
> + /*
> + * If the preferred location is VRAM but we placed it into GTT
> + * add it back to the evicted list so that it gets validated
> + * again on the next command submission.
> + */
> + if (!(adev->flags & AMD_IS_APU) &&
> + bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM &&
> + bo->tbo.resource->mem_type != TTM_PL_VRAM)
I think this logic works to fix discrete and APU apart that I don't like
the special casing things so much. Which is perhaps a consequence of too
many dynamic games with placements. :(
That said, my version also had a little bit of special casing so perhaps
the battle for clean design has been lost.
So ack from me for this version. Not sure it is good to merge it though
without also fixing the migration throttling and that is a more
difficult one (the part where I was awaiting a reply).
Regards,
Tvrtko
> amdgpu_vm_bo_evicted(&bo_va->base);
> else
> amdgpu_vm_bo_idle(&bo_va->base);
^ permalink raw reply [flat|nested] 14+ messages in thread