* [PATCH 1/9] drm/amd: Add new kfd-->kgd interface for gart usage
2014-12-31 13:39 [PATCH 0/9] Replace use of radeon_sa with a new sub allocator Oded Gabbay
@ 2014-12-31 13:39 ` Oded Gabbay
2014-12-31 13:39 ` [PATCH 2/9] drm/radeon: Impl. new gtt allocate/free functions Oded Gabbay
` (9 subsequent siblings)
10 siblings, 0 replies; 15+ messages in thread
From: Oded Gabbay @ 2014-12-31 13:39 UTC (permalink / raw)
To: dri-devel, Alexander.Deucher; +Cc: Christian Koenig
This patch adds two new functions to the kfd-->kgd interface:
init_gtt_mem_allocation, which allocate a large enough buffer on the amdkfd
needs, such as mqds, hpds, kernel queue, fence and runlists. This function
is only called once per GPU device. The size of the allocated buffer is
based on the maximum number of HSA processes and maximum number of queues
per HSA process (two amdkfd kernel module parameters).
free_gtt_mem, which frees a buffer that was allocated on the gart aperture.
Reviewed-by: Alexey Skidanov <Alexey.skidanov@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@amd.com>
---
drivers/gpu/drm/amd/include/kgd_kfd_interface.h | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
index 47b5519..47d0b87 100644
--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
@@ -122,6 +122,11 @@ struct kgd2kfd_calls {
*
* @free_mem: Frees a buffer that was allocated by amdkfd's sa manager
*
+ * @init_gtt_mem_allocation: Allocate a buffer on the gart aperture.
+ * The buffer can be used for mqds, hpds, kernel queue, fence and runlists
+ *
+ * @free_gtt_mem: Frees a buffer that was allocated on the gart aperture
+ *
* @get_vmem_size: Retrieves (physical) size of VRAM
*
* @get_gpu_clock_counter: Retrieves GPU clock counter
@@ -160,8 +165,12 @@ struct kfd2kgd_calls {
void (*fini_sa_manager)(struct kgd_dev *kgd);
int (*allocate_mem)(struct kgd_dev *kgd, size_t size, size_t alignment,
enum kgd_memory_pool pool, struct kgd_mem **mem);
+ int (*init_gtt_mem_allocation)(struct kgd_dev *kgd, size_t size,
+ void **mem_obj, uint64_t *gpu_addr,
+ void **cpu_ptr);
void (*free_mem)(struct kgd_dev *kgd, struct kgd_mem *mem);
+ void (*free_gtt_mem)(struct kgd_dev *kgd, void *mem_obj);
uint64_t (*get_vmem_size)(struct kgd_dev *kgd);
uint64_t (*get_gpu_clock_counter)(struct kgd_dev *kgd);
--
2.1.0
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply related [flat|nested] 15+ messages in thread* [PATCH 2/9] drm/radeon: Impl. new gtt allocate/free functions
2014-12-31 13:39 [PATCH 0/9] Replace use of radeon_sa with a new sub allocator Oded Gabbay
2014-12-31 13:39 ` [PATCH 1/9] drm/amd: Add new kfd-->kgd interface for gart usage Oded Gabbay
@ 2014-12-31 13:39 ` Oded Gabbay
2014-12-31 13:39 ` [PATCH 3/9] drm/amdkfd: Add gtt sa related data to kfd_dev struct Oded Gabbay
` (8 subsequent siblings)
10 siblings, 0 replies; 15+ messages in thread
From: Oded Gabbay @ 2014-12-31 13:39 UTC (permalink / raw)
To: dri-devel, Alexander.Deucher; +Cc: Christian Koenig
This patch adds the implementation of the gtt interface functions.
The allocate function will allocate a single bo, pin and map it to kernel
memory. It will return the gpu address and cpu ptr as arguments.
Reviewed-by: Alexey Skidanov <Alexey.skidanov@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@amd.com>
---
drivers/gpu/drm/radeon/radeon_kfd.c | 85 +++++++++++++++++++++++++++++++++++++
1 file changed, 85 insertions(+)
diff --git a/drivers/gpu/drm/radeon/radeon_kfd.c b/drivers/gpu/drm/radeon/radeon_kfd.c
index d3e78b4..ddd2afd 100644
--- a/drivers/gpu/drm/radeon/radeon_kfd.c
+++ b/drivers/gpu/drm/radeon/radeon_kfd.c
@@ -37,6 +37,8 @@ struct kgd_mem {
struct radeon_sa_bo *sa_bo;
uint64_t gpu_addr;
void *ptr;
+ struct radeon_bo *bo;
+ void *cpu_ptr;
};
static int init_sa_manager(struct kgd_dev *kgd, unsigned int size);
@@ -47,6 +49,12 @@ static int allocate_mem(struct kgd_dev *kgd, size_t size, size_t alignment,
static void free_mem(struct kgd_dev *kgd, struct kgd_mem *mem);
+static int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
+ void **mem_obj, uint64_t *gpu_addr,
+ void **cpu_ptr);
+
+static void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj);
+
static uint64_t get_vmem_size(struct kgd_dev *kgd);
static uint64_t get_gpu_clock_counter(struct kgd_dev *kgd);
@@ -84,6 +92,8 @@ static const struct kfd2kgd_calls kfd2kgd = {
.fini_sa_manager = fini_sa_manager,
.allocate_mem = allocate_mem,
.free_mem = free_mem,
+ .init_gtt_mem_allocation = alloc_gtt_mem,
+ .free_gtt_mem = free_gtt_mem,
.get_vmem_size = get_vmem_size,
.get_gpu_clock_counter = get_gpu_clock_counter,
.get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz,
@@ -278,6 +288,81 @@ static void free_mem(struct kgd_dev *kgd, struct kgd_mem *mem)
kfree(mem);
}
+static int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
+ void **mem_obj, uint64_t *gpu_addr,
+ void **cpu_ptr)
+{
+ struct radeon_device *rdev = (struct radeon_device *)kgd;
+ struct kgd_mem **mem = (struct kgd_mem **) mem_obj;
+ int r;
+
+ BUG_ON(kgd == NULL);
+ BUG_ON(gpu_addr == NULL);
+ BUG_ON(cpu_ptr == NULL);
+
+ *mem = kmalloc(sizeof(struct kgd_mem), GFP_KERNEL);
+ if ((*mem) == NULL)
+ return -ENOMEM;
+
+ r = radeon_bo_create(rdev, size, PAGE_SIZE, true, RADEON_GEM_DOMAIN_GTT,
+ RADEON_GEM_GTT_WC, NULL, NULL, &(*mem)->bo);
+ if (r) {
+ dev_err(rdev->dev,
+ "failed to allocate BO for amdkfd (%d)\n", r);
+ return r;
+ }
+
+ /* map the buffer */
+ r = radeon_bo_reserve((*mem)->bo, true);
+ if (r) {
+ dev_err(rdev->dev, "(%d) failed to reserve bo for amdkfd\n", r);
+ goto allocate_mem_reserve_bo_failed;
+ }
+
+ r = radeon_bo_pin((*mem)->bo, RADEON_GEM_DOMAIN_GTT,
+ &(*mem)->gpu_addr);
+ if (r) {
+ dev_err(rdev->dev, "(%d) failed to pin bo for amdkfd\n", r);
+ goto allocate_mem_pin_bo_failed;
+ }
+ *gpu_addr = (*mem)->gpu_addr;
+
+ r = radeon_bo_kmap((*mem)->bo, &(*mem)->cpu_ptr);
+ if (r) {
+ dev_err(rdev->dev,
+ "(%d) failed to map bo to kernel for amdkfd\n", r);
+ goto allocate_mem_kmap_bo_failed;
+ }
+ *cpu_ptr = (*mem)->cpu_ptr;
+
+ radeon_bo_unreserve((*mem)->bo);
+
+ return 0;
+
+allocate_mem_kmap_bo_failed:
+ radeon_bo_unpin((*mem)->bo);
+allocate_mem_pin_bo_failed:
+ radeon_bo_unreserve((*mem)->bo);
+allocate_mem_reserve_bo_failed:
+ radeon_bo_unref(&(*mem)->bo);
+
+ return r;
+}
+
+static void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj)
+{
+ struct kgd_mem *mem = (struct kgd_mem *) mem_obj;
+
+ BUG_ON(mem == NULL);
+
+ radeon_bo_reserve(mem->bo, true);
+ radeon_bo_kunmap(mem->bo);
+ radeon_bo_unpin(mem->bo);
+ radeon_bo_unreserve(mem->bo);
+ radeon_bo_unref(&(mem->bo));
+ kfree(mem);
+}
+
static uint64_t get_vmem_size(struct kgd_dev *kgd)
{
struct radeon_device *rdev = (struct radeon_device *)kgd;
--
2.1.0
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply related [flat|nested] 15+ messages in thread* [PATCH 3/9] drm/amdkfd: Add gtt sa related data to kfd_dev struct
2014-12-31 13:39 [PATCH 0/9] Replace use of radeon_sa with a new sub allocator Oded Gabbay
2014-12-31 13:39 ` [PATCH 1/9] drm/amd: Add new kfd-->kgd interface for gart usage Oded Gabbay
2014-12-31 13:39 ` [PATCH 2/9] drm/radeon: Impl. new gtt allocate/free functions Oded Gabbay
@ 2014-12-31 13:39 ` Oded Gabbay
2014-12-31 13:39 ` [PATCH 4/9] drm/amdkfd: Add kfd gtt sub-allocator functions Oded Gabbay
` (7 subsequent siblings)
10 siblings, 0 replies; 15+ messages in thread
From: Oded Gabbay @ 2014-12-31 13:39 UTC (permalink / raw)
To: dri-devel, Alexander.Deucher; +Cc: Christian Koenig
This patch adds new fields to kfd_dev struct that are necessary for the new kfd
gtt sa module
Reviewed-by: Alexey Skidanov <Alexey.skidanov@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@amd.com>
---
drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 21 +++++++++++++++------
1 file changed, 15 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index f9fb81e..685e4e1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -110,6 +110,13 @@ struct kfd_device_info {
uint16_t mqd_size_aligned;
};
+struct kfd_mem_obj {
+ uint32_t range_start;
+ uint32_t range_end;
+ uint64_t gpu_addr;
+ uint32_t *cpu_ptr;
+};
+
struct kfd_dev {
struct kgd_dev *kgd;
@@ -135,6 +142,14 @@ struct kfd_dev {
struct kgd2kfd_shared_resources shared_resources;
+ void *gtt_mem;
+ uint64_t gtt_start_gpu_addr;
+ void *gtt_start_cpu_ptr;
+ void *gtt_sa_bitmap;
+ struct mutex gtt_sa_lock;
+ unsigned int gtt_sa_chunk_size;
+ unsigned int gtt_sa_num_of_chunks;
+
void *interrupt_ring;
size_t interrupt_ring_size;
atomic_t interrupt_ring_rptr;
@@ -162,12 +177,6 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd);
extern const struct kfd2kgd_calls *kfd2kgd;
-struct kfd_mem_obj {
- void *bo;
- uint64_t gpu_addr;
- uint32_t *cpu_ptr;
-};
-
enum kfd_mempool {
KFD_MEMPOOL_SYSTEM_CACHEABLE = 1,
KFD_MEMPOOL_SYSTEM_WRITECOMBINE = 2,
--
2.1.0
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply related [flat|nested] 15+ messages in thread* [PATCH 4/9] drm/amdkfd: Add kfd gtt sub-allocator functions
2014-12-31 13:39 [PATCH 0/9] Replace use of radeon_sa with a new sub allocator Oded Gabbay
` (2 preceding siblings ...)
2014-12-31 13:39 ` [PATCH 3/9] drm/amdkfd: Add gtt sa related data to kfd_dev struct Oded Gabbay
@ 2014-12-31 13:39 ` Oded Gabbay
2014-12-31 13:39 ` [PATCH 5/9] drm/amdkfd: Fixed calculation of gart buffer size Oded Gabbay
` (6 subsequent siblings)
10 siblings, 0 replies; 15+ messages in thread
From: Oded Gabbay @ 2014-12-31 13:39 UTC (permalink / raw)
To: dri-devel, Alexander.Deucher; +Cc: Christian Koenig
This patch adds new kfd gtt sub-allocator functions that service the amdkfd
driver when it wants to use gtt memory.
The sub-allocator uses a bitmap to handle the memory area that was transferred
to it during init. It divides the memory area into chunks, according to chunk
size parameter.
The allocation function will allocate contiguous chunks from that memory area,
according to the requested size. If the requested size is smaller than the
chunk size, a single chunk will be allocated.
Reviewed-by: Alexey Skidanov <Alexey.skidanov@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@amd.com>
---
drivers/gpu/drm/amd/amdkfd/kfd_device.c | 179 ++++++++++++++++++++++++++++++++
drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 7 ++
2 files changed, 186 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 43884eb..c1ec162 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -66,6 +66,10 @@ static const struct kfd_deviceid supported_devices[] = {
{ 0x131D, &kaveri_device_info }, /* Kaveri */
};
+static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
+ unsigned int chunk_size);
+static void kfd_gtt_sa_fini(struct kfd_dev *kfd);
+
static const struct kfd_device_info *lookup_device_info(unsigned short did)
{
size_t i;
@@ -306,3 +310,178 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
spin_unlock(&kfd->interrupt_lock);
}
}
+
+static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
+ unsigned int chunk_size)
+{
+ BUG_ON(!kfd);
+ BUG_ON(!kfd->gtt_mem);
+ BUG_ON(buf_size < chunk_size);
+
+ kfd->gtt_sa_chunk_size = chunk_size;
+ kfd->gtt_sa_num_of_chunks = buf_size / chunk_size;
+ kfd->gtt_sa_bitmap = kzalloc(kfd->gtt_sa_num_of_chunks / BITS_PER_BYTE,
+ GFP_KERNEL);
+
+ if (!kfd->gtt_sa_bitmap)
+ return -ENOMEM;
+
+ pr_debug("kfd: gtt_sa_num_of_chunks = %d, gtt_sa_bitmap = %p\n",
+ kfd->gtt_sa_num_of_chunks, kfd->gtt_sa_bitmap);
+
+ mutex_init(&kfd->gtt_sa_lock);
+
+ return 0;
+
+}
+
+static void kfd_gtt_sa_fini(struct kfd_dev *kfd)
+{
+ mutex_destroy(&kfd->gtt_sa_lock);
+ kfree(kfd->gtt_sa_bitmap);
+}
+
+static inline uint64_t kfd_gtt_sa_calc_gpu_addr(uint64_t start_addr,
+ unsigned int bit_num,
+ unsigned int chunk_size)
+{
+ return start_addr + bit_num * chunk_size;
+}
+
+static inline uint32_t *kfd_gtt_sa_calc_cpu_addr(void *start_addr,
+ unsigned int bit_num,
+ unsigned int chunk_size)
+{
+ return (uint32_t *) ((uint64_t) start_addr + bit_num * chunk_size);
+}
+
+int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size,
+ struct kfd_mem_obj **mem_obj)
+{
+ unsigned int found, start_search, cur_size;
+
+ BUG_ON(!kfd);
+
+ if (size == 0)
+ return -EINVAL;
+
+ if (size > kfd->gtt_sa_num_of_chunks * kfd->gtt_sa_chunk_size)
+ return -ENOMEM;
+
+ *mem_obj = kmalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
+ if ((*mem_obj) == NULL)
+ return -ENOMEM;
+
+ pr_debug("kfd: allocated mem_obj = %p for size = %d\n", *mem_obj, size);
+
+ start_search = 0;
+
+ mutex_lock(&kfd->gtt_sa_lock);
+
+kfd_gtt_restart_search:
+ /* Find the first chunk that is free */
+ found = find_next_zero_bit(kfd->gtt_sa_bitmap,
+ kfd->gtt_sa_num_of_chunks,
+ start_search);
+
+ pr_debug("kfd: found = %d\n", found);
+
+ /* If there wasn't any free chunk, bail out */
+ if (found == kfd->gtt_sa_num_of_chunks)
+ goto kfd_gtt_no_free_chunk;
+
+ /* Update fields of mem_obj */
+ (*mem_obj)->range_start = found;
+ (*mem_obj)->range_end = found;
+ (*mem_obj)->gpu_addr = kfd_gtt_sa_calc_gpu_addr(
+ kfd->gtt_start_gpu_addr,
+ found,
+ kfd->gtt_sa_chunk_size);
+ (*mem_obj)->cpu_ptr = kfd_gtt_sa_calc_cpu_addr(
+ kfd->gtt_start_cpu_ptr,
+ found,
+ kfd->gtt_sa_chunk_size);
+
+ pr_debug("kfd: gpu_addr = %p, cpu_addr = %p\n",
+ (uint64_t *) (*mem_obj)->gpu_addr, (*mem_obj)->cpu_ptr);
+
+ /* If we need only one chunk, mark it as allocated and get out */
+ if (size <= kfd->gtt_sa_chunk_size) {
+ pr_debug("kfd: single bit\n");
+ set_bit(found, kfd->gtt_sa_bitmap);
+ goto kfd_gtt_out;
+ }
+
+ /* Otherwise, try to see if we have enough contiguous chunks */
+ cur_size = size - kfd->gtt_sa_chunk_size;
+ do {
+ (*mem_obj)->range_end =
+ find_next_zero_bit(kfd->gtt_sa_bitmap,
+ kfd->gtt_sa_num_of_chunks, ++found);
+ /*
+ * If next free chunk is not contiguous than we need to
+ * restart our search from the last free chunk we found (which
+ * wasn't contiguous to the previous ones
+ */
+ if ((*mem_obj)->range_end != found) {
+ start_search = found;
+ goto kfd_gtt_restart_search;
+ }
+
+ /*
+ * If we reached end of buffer, bail out with error
+ */
+ if (found == kfd->gtt_sa_num_of_chunks)
+ goto kfd_gtt_no_free_chunk;
+
+ /* Check if we don't need another chunk */
+ if (cur_size <= kfd->gtt_sa_chunk_size)
+ cur_size = 0;
+ else
+ cur_size -= kfd->gtt_sa_chunk_size;
+
+ } while (cur_size > 0);
+
+ pr_debug("kfd: range_start = %d, range_end = %d\n",
+ (*mem_obj)->range_start, (*mem_obj)->range_end);
+
+ /* Mark the chunks as allocated */
+ for (found = (*mem_obj)->range_start;
+ found <= (*mem_obj)->range_end;
+ found++)
+ set_bit(found, kfd->gtt_sa_bitmap);
+
+kfd_gtt_out:
+ mutex_unlock(&kfd->gtt_sa_lock);
+ return 0;
+
+kfd_gtt_no_free_chunk:
+ pr_debug("kfd: allocation failed with mem_obj = %p\n", mem_obj);
+ mutex_unlock(&kfd->gtt_sa_lock);
+ kfree(mem_obj);
+ return -ENOMEM;
+}
+
+int kfd_gtt_sa_free(struct kfd_dev *kfd, struct kfd_mem_obj *mem_obj)
+{
+ unsigned int bit;
+
+ BUG_ON(!kfd);
+ BUG_ON(!mem_obj);
+
+ pr_debug("kfd: free mem_obj = %p, range_start = %d, range_end = %d\n",
+ mem_obj, mem_obj->range_start, mem_obj->range_end);
+
+ mutex_lock(&kfd->gtt_sa_lock);
+
+ /* Mark the chunks as free */
+ for (bit = mem_obj->range_start;
+ bit <= mem_obj->range_end;
+ bit++)
+ clear_bit(bit, kfd->gtt_sa_bitmap);
+
+ mutex_unlock(&kfd->gtt_sa_lock);
+
+ kfree(mem_obj);
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 685e4e1..2e39b1c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -510,6 +510,13 @@ unsigned int kfd_queue_id_to_doorbell(struct kfd_dev *kfd,
struct kfd_process *process,
unsigned int queue_id);
+/* GTT Sub-Allocator */
+
+int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size,
+ struct kfd_mem_obj **mem_obj);
+
+int kfd_gtt_sa_free(struct kfd_dev *kfd, struct kfd_mem_obj *mem_obj);
+
extern struct device *kfd_device;
/* Topology */
--
2.1.0
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply related [flat|nested] 15+ messages in thread* [PATCH 5/9] drm/amdkfd: Fixed calculation of gart buffer size
2014-12-31 13:39 [PATCH 0/9] Replace use of radeon_sa with a new sub allocator Oded Gabbay
` (3 preceding siblings ...)
2014-12-31 13:39 ` [PATCH 4/9] drm/amdkfd: Add kfd gtt sub-allocator functions Oded Gabbay
@ 2014-12-31 13:39 ` Oded Gabbay
2014-12-31 13:39 ` [PATCH 6/9] drm/amdkfd: Allocate gart memory using new interface Oded Gabbay
` (5 subsequent siblings)
10 siblings, 0 replies; 15+ messages in thread
From: Oded Gabbay @ 2014-12-31 13:39 UTC (permalink / raw)
To: dri-devel, Alexander.Deucher; +Cc: Christian Koenig
This patch makes the gart's buffer size calculation more accurate. This buffer
is needed per GPU.
It takes into account maximum number of MQDs, runlist packets, kernel queues
and reserves 512KB for other misc allocations.
The total size is just shy of 4MB, for 32 processes and 128 queues per
process, which are the defaults for amdkfd kernel module parameters.
Reviewed-by: Alexey Skidanov <Alexey.skidanov@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@amd.com>
---
drivers/gpu/drm/amd/amdkfd/kfd_device.c | 22 +++++++++++++++++++---
1 file changed, 19 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index c1ec162..163b1a4 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -26,6 +26,7 @@
#include <linux/slab.h>
#include "kfd_priv.h"
#include "kfd_device_queue_manager.h"
+#include "kfd_pm4_headers.h"
#define MQD_SIZE_ALIGNED 768
@@ -177,16 +178,31 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
max_num_of_queues_per_process *
kfd->device_info->mqd_size_aligned;
- /* add another 512KB for all other allocations on gart */
+ /*
+ * calculate max size of runlist packet.
+ * There can be only 2 packets at once
+ */
+ size += (max_num_of_processes * sizeof(struct pm4_map_process) +
+ max_num_of_processes * max_num_of_queues_per_process *
+ sizeof(struct pm4_map_queues) + sizeof(struct pm4_runlist)) * 2;
+
+ /* Add size of HIQ & DIQ */
+ size += KFD_KERNEL_QUEUE_SIZE * 2;
+
+ /* add another 512KB for all other allocations on gart (HPD, fences) */
size += 512 * 1024;
if (kfd2kgd->init_sa_manager(kfd->kgd, size)) {
dev_err(kfd_device,
- "Error initializing sa manager for device (%x:%x)\n",
- kfd->pdev->vendor, kfd->pdev->device);
+ "Could not allocate %d bytes for device (%x:%x)\n",
+ size, kfd->pdev->vendor, kfd->pdev->device);
goto out;
}
+ dev_info(kfd_device,
+ "Allocated %d bytes on gart for device(%x:%x)\n",
+ size, kfd->pdev->vendor, kfd->pdev->device);
+
kfd_doorbell_init(kfd);
if (kfd_topology_add_device(kfd) != 0) {
--
2.1.0
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply related [flat|nested] 15+ messages in thread* [PATCH 6/9] drm/amdkfd: Allocate gart memory using new interface
2014-12-31 13:39 [PATCH 0/9] Replace use of radeon_sa with a new sub allocator Oded Gabbay
` (4 preceding siblings ...)
2014-12-31 13:39 ` [PATCH 5/9] drm/amdkfd: Fixed calculation of gart buffer size Oded Gabbay
@ 2014-12-31 13:39 ` Oded Gabbay
2014-12-31 13:39 ` [PATCH 7/9] drm/amdkfd: Using new gtt sa in amdkfd Oded Gabbay
` (4 subsequent siblings)
10 siblings, 0 replies; 15+ messages in thread
From: Oded Gabbay @ 2014-12-31 13:39 UTC (permalink / raw)
To: dri-devel, Alexander.Deucher; +Cc: Christian Koenig
This patch changes the calls to allocate the gart memory for amdkfd from the
old interface (radeon_sa) to the new one (kfd_gtt_sa)
The new gart sub-allocator is initialized with chunk size equal to 512 bytes.
This is because the KV MQD is 512 Bytes and most of the sub-allocations are
MQDs.
Reviewed-by: Alexey Skidanov <Alexey.skidanov@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@amd.com>
---
drivers/gpu/drm/amd/amdkfd/kfd_device.c | 16 ++++++++++++++--
1 file changed, 14 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 163b1a4..ac94dc3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -192,7 +192,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
/* add another 512KB for all other allocations on gart (HPD, fences) */
size += 512 * 1024;
- if (kfd2kgd->init_sa_manager(kfd->kgd, size)) {
+ if (kfd2kgd->init_gtt_mem_allocation(kfd->kgd, size, &kfd->gtt_mem,
+ &kfd->gtt_start_gpu_addr, &kfd->gtt_start_cpu_ptr)) {
dev_err(kfd_device,
"Could not allocate %d bytes for device (%x:%x)\n",
size, kfd->pdev->vendor, kfd->pdev->device);
@@ -203,6 +204,13 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
"Allocated %d bytes on gart for device(%x:%x)\n",
size, kfd->pdev->vendor, kfd->pdev->device);
+ /* Initialize GTT sa with 512 byte chunk size */
+ if (kfd_gtt_sa_init(kfd, size, 512) != 0) {
+ dev_err(kfd_device,
+ "Error initializing gtt sub-allocator\n");
+ goto kfd_gtt_sa_init_error;
+ }
+
kfd_doorbell_init(kfd);
if (kfd_topology_add_device(kfd) != 0) {
@@ -261,7 +269,9 @@ device_iommu_pasid_error:
kfd_interrupt_error:
kfd_topology_remove_device(kfd);
kfd_topology_add_device_error:
- kfd2kgd->fini_sa_manager(kfd->kgd);
+ kfd_gtt_sa_fini(kfd);
+kfd_gtt_sa_init_error:
+ kfd2kgd->free_gtt_mem(kfd->kgd, kfd->gtt_mem);
dev_err(kfd_device,
"device (%x:%x) NOT added due to errors\n",
kfd->pdev->vendor, kfd->pdev->device);
@@ -276,6 +286,8 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd)
amd_iommu_free_device(kfd->pdev);
kfd_interrupt_exit(kfd);
kfd_topology_remove_device(kfd);
+ kfd_gtt_sa_fini(kfd);
+ kfd2kgd->free_gtt_mem(kfd->kgd, kfd->gtt_mem);
}
kfree(kfd);
--
2.1.0
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply related [flat|nested] 15+ messages in thread* [PATCH 7/9] drm/amdkfd: Using new gtt sa in amdkfd
2014-12-31 13:39 [PATCH 0/9] Replace use of radeon_sa with a new sub allocator Oded Gabbay
` (5 preceding siblings ...)
2014-12-31 13:39 ` [PATCH 6/9] drm/amdkfd: Allocate gart memory using new interface Oded Gabbay
@ 2014-12-31 13:39 ` Oded Gabbay
2014-12-31 13:39 ` [PATCH 8/9] drm/radeon: Remove old radeon_sa usage from kfd-->kgd interface Oded Gabbay
` (3 subsequent siblings)
10 siblings, 0 replies; 15+ messages in thread
From: Oded Gabbay @ 2014-12-31 13:39 UTC (permalink / raw)
To: dri-devel, Alexander.Deucher; +Cc: Christian Koenig
This patch change the calls throughout the amdkfd driver from the old kfd-->kgd
interface to the new kfd gtt sa inside amdkfd
Reviewed-by: Alexey Skidanov <Alexey.skidanov@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@amd.com>
---
.../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 23 ++++--------
drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c | 41 ++++++++--------------
drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c | 16 +++------
drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c | 10 ++----
4 files changed, 30 insertions(+), 60 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index f44d673..a6ea5cf 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -500,11 +500,8 @@ static int init_pipelines(struct device_queue_manager *dqm,
* because it contains no data when there are no active queues.
*/
- err = kfd2kgd->allocate_mem(dqm->dev->kgd,
- CIK_HPD_EOP_BYTES * pipes_num,
- PAGE_SIZE,
- KFD_MEMPOOL_SYSTEM_WRITECOMBINE,
- (struct kgd_mem **) &dqm->pipeline_mem);
+ err = kfd_gtt_sa_allocate(dqm->dev, CIK_HPD_EOP_BYTES * pipes_num,
+ &dqm->pipeline_mem);
if (err) {
pr_err("kfd: error allocate vidmem num pipes: %d\n",
@@ -519,8 +516,7 @@ static int init_pipelines(struct device_queue_manager *dqm,
mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_COMPUTE);
if (mqd == NULL) {
- kfd2kgd->free_mem(dqm->dev->kgd,
- (struct kgd_mem *) dqm->pipeline_mem);
+ kfd_gtt_sa_free(dqm->dev, dqm->pipeline_mem);
return -ENOMEM;
}
@@ -594,8 +590,7 @@ static void uninitialize_nocpsch(struct device_queue_manager *dqm)
for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++)
kfree(dqm->mqds[i]);
mutex_destroy(&dqm->lock);
- kfd2kgd->free_mem(dqm->dev->kgd,
- (struct kgd_mem *) dqm->pipeline_mem);
+ kfd_gtt_sa_free(dqm->dev, dqm->pipeline_mem);
}
static int start_nocpsch(struct device_queue_manager *dqm)
@@ -681,11 +676,8 @@ static int start_cpsch(struct device_queue_manager *dqm)
pr_debug("kfd: allocating fence memory\n");
/* allocate fence memory on the gart */
- retval = kfd2kgd->allocate_mem(dqm->dev->kgd,
- sizeof(*dqm->fence_addr),
- 32,
- KFD_MEMPOOL_SYSTEM_WRITECOMBINE,
- (struct kgd_mem **) &dqm->fence_mem);
+ retval = kfd_gtt_sa_allocate(dqm->dev, sizeof(*dqm->fence_addr),
+ &dqm->fence_mem);
if (retval != 0)
goto fail_allocate_vidmem;
@@ -721,8 +713,7 @@ static int stop_cpsch(struct device_queue_manager *dqm)
pdd = qpd_to_pdd(node->qpd);
pdd->bound = false;
}
- kfd2kgd->free_mem(dqm->dev->kgd,
- (struct kgd_mem *) dqm->fence_mem);
+ kfd_gtt_sa_free(dqm->dev, dqm->fence_mem);
pm_uninit(&dqm->packets);
return 0;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
index 9350714..0fd8bb7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
@@ -72,11 +72,7 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
if (prop.doorbell_ptr == NULL)
goto err_get_kernel_doorbell;
- retval = kfd2kgd->allocate_mem(dev->kgd,
- queue_size,
- PAGE_SIZE,
- KFD_MEMPOOL_SYSTEM_WRITECOMBINE,
- (struct kgd_mem **) &kq->pq);
+ retval = kfd_gtt_sa_allocate(dev, queue_size, &kq->pq);
if (retval != 0)
goto err_pq_allocate_vidmem;
@@ -84,11 +80,8 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
kq->pq_kernel_addr = kq->pq->cpu_ptr;
kq->pq_gpu_addr = kq->pq->gpu_addr;
- retval = kfd2kgd->allocate_mem(dev->kgd,
- sizeof(*kq->rptr_kernel),
- 32,
- KFD_MEMPOOL_SYSTEM_WRITECOMBINE,
- (struct kgd_mem **) &kq->rptr_mem);
+ retval = kfd_gtt_sa_allocate(dev, sizeof(*kq->rptr_kernel),
+ &kq->rptr_mem);
if (retval != 0)
goto err_rptr_allocate_vidmem;
@@ -96,11 +89,8 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
kq->rptr_kernel = kq->rptr_mem->cpu_ptr;
kq->rptr_gpu_addr = kq->rptr_mem->gpu_addr;
- retval = kfd2kgd->allocate_mem(dev->kgd,
- sizeof(*kq->wptr_kernel),
- 32,
- KFD_MEMPOOL_SYSTEM_WRITECOMBINE,
- (struct kgd_mem **) &kq->wptr_mem);
+ retval = kfd_gtt_sa_allocate(dev, sizeof(*kq->wptr_kernel),
+ &kq->wptr_mem);
if (retval != 0)
goto err_wptr_allocate_vidmem;
@@ -145,11 +135,8 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
} else {
/* allocate fence for DIQ */
- retval = kfd2kgd->allocate_mem(dev->kgd,
- sizeof(uint32_t),
- 32,
- KFD_MEMPOOL_SYSTEM_WRITECOMBINE,
- (struct kgd_mem **) &kq->fence_mem_obj);
+ retval = kfd_gtt_sa_allocate(dev, sizeof(uint32_t),
+ &kq->fence_mem_obj);
if (retval != 0)
goto err_alloc_fence;
@@ -165,11 +152,11 @@ err_alloc_fence:
err_init_mqd:
uninit_queue(kq->queue);
err_init_queue:
- kfd2kgd->free_mem(dev->kgd, (struct kgd_mem *) kq->wptr_mem);
+ kfd_gtt_sa_free(dev, kq->wptr_mem);
err_wptr_allocate_vidmem:
- kfd2kgd->free_mem(dev->kgd, (struct kgd_mem *) kq->rptr_mem);
+ kfd_gtt_sa_free(dev, kq->rptr_mem);
err_rptr_allocate_vidmem:
- kfd2kgd->free_mem(dev->kgd, (struct kgd_mem *) kq->pq);
+ kfd_gtt_sa_free(dev, kq->pq);
err_pq_allocate_vidmem:
pr_err("kfd: error init pq\n");
kfd_release_kernel_doorbell(dev, prop.doorbell_ptr);
@@ -190,10 +177,12 @@ static void uninitialize(struct kernel_queue *kq)
QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS,
kq->queue->pipe,
kq->queue->queue);
+ else if (kq->queue->properties.type == KFD_QUEUE_TYPE_DIQ)
+ kfd_gtt_sa_free(kq->dev, kq->fence_mem_obj);
- kfd2kgd->free_mem(kq->dev->kgd, (struct kgd_mem *) kq->rptr_mem);
- kfd2kgd->free_mem(kq->dev->kgd, (struct kgd_mem *) kq->wptr_mem);
- kfd2kgd->free_mem(kq->dev->kgd, (struct kgd_mem *) kq->pq);
+ kfd_gtt_sa_free(kq->dev, kq->rptr_mem);
+ kfd_gtt_sa_free(kq->dev, kq->wptr_mem);
+ kfd_gtt_sa_free(kq->dev, kq->pq);
kfd_release_kernel_doorbell(kq->dev,
kq->queue->properties.doorbell_ptr);
uninit_queue(kq->queue);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
index adc3147..ce082cc 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
@@ -51,11 +51,8 @@ static int init_mqd(struct mqd_manager *mm, void **mqd,
pr_debug("kfd: In func %s\n", __func__);
- retval = kfd2kgd->allocate_mem(mm->dev->kgd,
- sizeof(struct cik_mqd),
- 256,
- KFD_MEMPOOL_SYSTEM_WRITECOMBINE,
- (struct kgd_mem **) mqd_mem_obj);
+ retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct cik_mqd),
+ mqd_mem_obj);
if (retval != 0)
return -ENOMEM;
@@ -115,7 +112,7 @@ static void uninit_mqd(struct mqd_manager *mm, void *mqd,
struct kfd_mem_obj *mqd_mem_obj)
{
BUG_ON(!mm || !mqd);
- kfd2kgd->free_mem(mm->dev->kgd, (struct kgd_mem *) mqd_mem_obj);
+ kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
}
static int load_mqd(struct mqd_manager *mm, void *mqd, uint32_t pipe_id,
@@ -207,11 +204,8 @@ static int init_mqd_hiq(struct mqd_manager *mm, void **mqd,
pr_debug("kfd: In func %s\n", __func__);
- retval = kfd2kgd->allocate_mem(mm->dev->kgd,
- sizeof(struct cik_mqd),
- 256,
- KFD_MEMPOOL_SYSTEM_WRITECOMBINE,
- (struct kgd_mem **) mqd_mem_obj);
+ retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct cik_mqd),
+ mqd_mem_obj);
if (retval != 0)
return -ENOMEM;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
index 5ce9233..3cda952 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
@@ -97,11 +97,8 @@ static int pm_allocate_runlist_ib(struct packet_manager *pm,
pm_calc_rlib_size(pm, rl_buffer_size, is_over_subscription);
- retval = kfd2kgd->allocate_mem(pm->dqm->dev->kgd,
- *rl_buffer_size,
- PAGE_SIZE,
- KFD_MEMPOOL_SYSTEM_WRITECOMBINE,
- (struct kgd_mem **) &pm->ib_buffer_obj);
+ retval = kfd_gtt_sa_allocate(pm->dqm->dev, *rl_buffer_size,
+ &pm->ib_buffer_obj);
if (retval != 0) {
pr_err("kfd: failed to allocate runlist IB\n");
@@ -557,8 +554,7 @@ void pm_release_ib(struct packet_manager *pm)
mutex_lock(&pm->lock);
if (pm->allocated) {
- kfd2kgd->free_mem(pm->dqm->dev->kgd,
- (struct kgd_mem *) pm->ib_buffer_obj);
+ kfd_gtt_sa_free(pm->dqm->dev, pm->ib_buffer_obj);
pm->allocated = false;
}
mutex_unlock(&pm->lock);
--
2.1.0
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply related [flat|nested] 15+ messages in thread* [PATCH 8/9] drm/radeon: Remove old radeon_sa usage from kfd-->kgd interface
2014-12-31 13:39 [PATCH 0/9] Replace use of radeon_sa with a new sub allocator Oded Gabbay
` (6 preceding siblings ...)
2014-12-31 13:39 ` [PATCH 7/9] drm/amdkfd: Using new gtt sa in amdkfd Oded Gabbay
@ 2014-12-31 13:39 ` Oded Gabbay
2014-12-31 13:39 ` [PATCH 9/9] drm/amd: Remove old radeon_sa funcs " Oded Gabbay
` (2 subsequent siblings)
10 siblings, 0 replies; 15+ messages in thread
From: Oded Gabbay @ 2014-12-31 13:39 UTC (permalink / raw)
To: dri-devel, Alexander.Deucher; +Cc: Christian Koenig
Reviewed-by: Alexey Skidanov <Alexey.skidanov@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@amd.com>
---
drivers/gpu/drm/radeon/radeon_kfd.c | 99 +------------------------------------
1 file changed, 1 insertion(+), 98 deletions(-)
diff --git a/drivers/gpu/drm/radeon/radeon_kfd.c b/drivers/gpu/drm/radeon/radeon_kfd.c
index ddd2afd..56bfab3 100644
--- a/drivers/gpu/drm/radeon/radeon_kfd.c
+++ b/drivers/gpu/drm/radeon/radeon_kfd.c
@@ -34,20 +34,11 @@
#define CIK_PIPE_PER_MEC (4)
struct kgd_mem {
- struct radeon_sa_bo *sa_bo;
- uint64_t gpu_addr;
- void *ptr;
struct radeon_bo *bo;
+ uint64_t gpu_addr;
void *cpu_ptr;
};
-static int init_sa_manager(struct kgd_dev *kgd, unsigned int size);
-static void fini_sa_manager(struct kgd_dev *kgd);
-
-static int allocate_mem(struct kgd_dev *kgd, size_t size, size_t alignment,
- enum kgd_memory_pool pool, struct kgd_mem **mem);
-
-static void free_mem(struct kgd_dev *kgd, struct kgd_mem *mem);
static int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
void **mem_obj, uint64_t *gpu_addr,
@@ -88,10 +79,6 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type,
uint32_t queue_id);
static const struct kfd2kgd_calls kfd2kgd = {
- .init_sa_manager = init_sa_manager,
- .fini_sa_manager = fini_sa_manager,
- .allocate_mem = allocate_mem,
- .free_mem = free_mem,
.init_gtt_mem_allocation = alloc_gtt_mem,
.free_gtt_mem = free_gtt_mem,
.get_vmem_size = get_vmem_size,
@@ -204,90 +191,6 @@ int radeon_kfd_resume(struct radeon_device *rdev)
return r;
}
-static u32 pool_to_domain(enum kgd_memory_pool p)
-{
- switch (p) {
- case KGD_POOL_FRAMEBUFFER: return RADEON_GEM_DOMAIN_VRAM;
- default: return RADEON_GEM_DOMAIN_GTT;
- }
-}
-
-static int init_sa_manager(struct kgd_dev *kgd, unsigned int size)
-{
- struct radeon_device *rdev = (struct radeon_device *)kgd;
- int r;
-
- BUG_ON(kgd == NULL);
-
- r = radeon_sa_bo_manager_init(rdev, &rdev->kfd_bo,
- size,
- RADEON_GPU_PAGE_SIZE,
- RADEON_GEM_DOMAIN_GTT,
- RADEON_GEM_GTT_WC);
-
- if (r)
- return r;
-
- r = radeon_sa_bo_manager_start(rdev, &rdev->kfd_bo);
- if (r)
- radeon_sa_bo_manager_fini(rdev, &rdev->kfd_bo);
-
- return r;
-}
-
-static void fini_sa_manager(struct kgd_dev *kgd)
-{
- struct radeon_device *rdev = (struct radeon_device *)kgd;
-
- BUG_ON(kgd == NULL);
-
- radeon_sa_bo_manager_suspend(rdev, &rdev->kfd_bo);
- radeon_sa_bo_manager_fini(rdev, &rdev->kfd_bo);
-}
-
-static int allocate_mem(struct kgd_dev *kgd, size_t size, size_t alignment,
- enum kgd_memory_pool pool, struct kgd_mem **mem)
-{
- struct radeon_device *rdev = (struct radeon_device *)kgd;
- u32 domain;
- int r;
-
- BUG_ON(kgd == NULL);
-
- domain = pool_to_domain(pool);
- if (domain != RADEON_GEM_DOMAIN_GTT) {
- dev_err(rdev->dev,
- "Only allowed to allocate gart memory for kfd\n");
- return -EINVAL;
- }
-
- *mem = kmalloc(sizeof(struct kgd_mem), GFP_KERNEL);
- if ((*mem) == NULL)
- return -ENOMEM;
-
- r = radeon_sa_bo_new(rdev, &rdev->kfd_bo, &(*mem)->sa_bo, size,
- alignment);
- if (r) {
- dev_err(rdev->dev, "failed to get memory for kfd (%d)\n", r);
- return r;
- }
-
- (*mem)->ptr = radeon_sa_bo_cpu_addr((*mem)->sa_bo);
- (*mem)->gpu_addr = radeon_sa_bo_gpu_addr((*mem)->sa_bo);
-
- return 0;
-}
-
-static void free_mem(struct kgd_dev *kgd, struct kgd_mem *mem)
-{
- struct radeon_device *rdev = (struct radeon_device *)kgd;
-
- BUG_ON(kgd == NULL);
-
- radeon_sa_bo_free(rdev, &mem->sa_bo, NULL);
- kfree(mem);
-}
-
static int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
void **mem_obj, uint64_t *gpu_addr,
void **cpu_ptr)
--
2.1.0
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply related [flat|nested] 15+ messages in thread* [PATCH 9/9] drm/amd: Remove old radeon_sa funcs from kfd-->kgd interface
2014-12-31 13:39 [PATCH 0/9] Replace use of radeon_sa with a new sub allocator Oded Gabbay
` (7 preceding siblings ...)
2014-12-31 13:39 ` [PATCH 8/9] drm/radeon: Remove old radeon_sa usage from kfd-->kgd interface Oded Gabbay
@ 2014-12-31 13:39 ` Oded Gabbay
2014-12-31 13:49 ` [PATCH 0/9] Replace use of radeon_sa with a new sub allocator Christian König
2015-01-06 16:51 ` Alex Deucher
10 siblings, 0 replies; 15+ messages in thread
From: Oded Gabbay @ 2014-12-31 13:39 UTC (permalink / raw)
To: dri-devel, Alexander.Deucher; +Cc: Christian Koenig
Reviewed-by: Alexey Skidanov <Alexey.skidanov@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@amd.com>
---
drivers/gpu/drm/amd/include/kgd_kfd_interface.h | 18 ------------------
1 file changed, 18 deletions(-)
diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
index 47d0b87..44838cf 100644
--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
@@ -110,18 +110,6 @@ struct kgd2kfd_calls {
/**
* struct kfd2kgd_calls
*
- * @init_sa_manager: Initialize an instance of the sa manager, used by
- * amdkfd for all system memory allocations that are mapped to the GART
- * address space
- *
- * @fini_sa_manager: Releases all memory allocations for amdkfd that are
- * handled by kgd sa manager
- *
- * @allocate_mem: Allocate a buffer from amdkfd's sa manager. The buffer can
- * be used for mqds, hpds, kernel queue, fence and runlists
- *
- * @free_mem: Frees a buffer that was allocated by amdkfd's sa manager
- *
* @init_gtt_mem_allocation: Allocate a buffer on the gart aperture.
* The buffer can be used for mqds, hpds, kernel queue, fence and runlists
*
@@ -160,16 +148,10 @@ struct kgd2kfd_calls {
*
*/
struct kfd2kgd_calls {
- /* Memory management. */
- int (*init_sa_manager)(struct kgd_dev *kgd, unsigned int size);
- void (*fini_sa_manager)(struct kgd_dev *kgd);
- int (*allocate_mem)(struct kgd_dev *kgd, size_t size, size_t alignment,
- enum kgd_memory_pool pool, struct kgd_mem **mem);
int (*init_gtt_mem_allocation)(struct kgd_dev *kgd, size_t size,
void **mem_obj, uint64_t *gpu_addr,
void **cpu_ptr);
- void (*free_mem)(struct kgd_dev *kgd, struct kgd_mem *mem);
void (*free_gtt_mem)(struct kgd_dev *kgd, void *mem_obj);
uint64_t (*get_vmem_size)(struct kgd_dev *kgd);
--
2.1.0
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply related [flat|nested] 15+ messages in thread* Re: [PATCH 0/9] Replace use of radeon_sa with a new sub allocator
2014-12-31 13:39 [PATCH 0/9] Replace use of radeon_sa with a new sub allocator Oded Gabbay
` (8 preceding siblings ...)
2014-12-31 13:39 ` [PATCH 9/9] drm/amd: Remove old radeon_sa funcs " Oded Gabbay
@ 2014-12-31 13:49 ` Christian König
2014-12-31 14:06 ` Oded Gabbay
2015-01-06 16:51 ` Alex Deucher
10 siblings, 1 reply; 15+ messages in thread
From: Christian König @ 2014-12-31 13:49 UTC (permalink / raw)
To: Oded Gabbay, dri-devel, Alexander.Deucher
Am 31.12.2014 um 14:39 schrieb Oded Gabbay:
> Background:
>
> amdkfd needs GART memory for several things, such as runlist packets,
> MQDs, HPDs and more. Unfortunately, all of this memory must be always
> pinned (due to several reasons which were discussed during the
> initial review of amdkfd).
In general seems to be a good idea, but so far I still don't have seen a
good explanation why all those memory must be pinned. So please
summarize that one once more.
Regards,
Christian.
>
> Current Solution:
>
> The current (short/mid-term) solution that was proposed by Jerome.G, is
> to limit the amount of memory to a small size, roughly 4MB and allocate
> this buffer at the start of the GART. To accomodate this, amdkfd has
> two kernel module parameters, maximum number of HSA processes and
> maximum number of queues per process, which require under 4MB of GART
> memory when using their defaults, 32 and 128 respectively.
>
> Until now, amdkfd used the radeon sub-allocator module (radeon_sa)
> to handle the sub-allocation of memory from this large buffer to
> different modules inside the amdkfd.
>
> However, while running OpenCL conformance test suite, we found that
> radeon_sa module is not suitable for this kind of task, due to its
> design:
> 1. Every allocation increments its interal pointer so the next
> allocation is *always* done ahead of the previous allocation. This
> causes the internal pointer to wrap-around when it reaches the end of
> the buffer.
>
> 2. When encoutering an area that is already allocated, the module
> waits for that area to be freed. If it is not freed in a timely manner
> (or has no fence), the allocation fails. Simply put, it can't "skip"
> the allocated area.
>
> Now, this is most probably good for graphics, but for amdkfd needs,
> the combination of the two behaviors mentioned above eventually causes
> a denial-of-service. This is because some memory allocations
> are *always* present and *never* freed (such as HPDs).
> Therefore, given enough time and workload, the radeon_sa eventually
> wraps around, encounters an already allocated area and gets stuck.
>
> Proposed new solution:
>
> To solve this, I have written a simple sub-allocator module inside
> amdkfd. It allocates fixed-size contiguous chunks (1 or more) and uses
> a bitmap to manage the allocations. The next allocation is always
> being searched for from the start of the GART buffer, and the module
> knows how to skip allocated chunks.
>
> Because most allocations are MQDs, and MQDs are 512 Bytes in size, I
> set the default chunk size to be 512 Bytes.
>
> The basic GART memory allocation is still being done in the
> amdkfd <--> radeon interface, and it still occupies less than 4MB.
>
> I have chosen to implement a new allocator instead of changing
> radeon_sa because the behavior of radeon_sa is very appropriate for
> graphics, where allocations do not stay forever. Also, amdkfd doesn't
> actually need the flexibility and features radeon_sa provides.
>
> Oded
>
> Oded Gabbay (9):
> drm/amd: Add new kfd-->kgd interface for gart usage
> drm/radeon: Impl. new gtt allocate/free functions
> drm/amdkfd: Add gtt sa related data to kfd_dev struct
> drm/amdkfd: Add kfd gtt sub-allocator functions
> drm/amdkfd: Fixed calculation of gart buffer size
> drm/amdkfd: Allocate gart memory using new interface
> drm/amdkfd: Using new gtt sa in amdkfd
> drm/radeon: Remove old radeon_sa usage from kfd-->kgd interface
> drm/amd: Remove old radeon_sa funcs from kfd-->kgd interface
>
> drivers/gpu/drm/amd/amdkfd/kfd_device.c | 217 ++++++++++++++++++++-
> .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 23 +--
> drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c | 41 ++--
> drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c | 16 +-
> drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c | 10 +-
> drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 28 ++-
> drivers/gpu/drm/amd/include/kgd_kfd_interface.h | 23 +--
> drivers/gpu/drm/radeon/radeon_kfd.c | 128 ++++++------
> 8 files changed, 329 insertions(+), 157 deletions(-)
>
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply [flat|nested] 15+ messages in thread* Re: [PATCH 0/9] Replace use of radeon_sa with a new sub allocator
2014-12-31 13:49 ` [PATCH 0/9] Replace use of radeon_sa with a new sub allocator Christian König
@ 2014-12-31 14:06 ` Oded Gabbay
2014-12-31 17:07 ` Christian König
0 siblings, 1 reply; 15+ messages in thread
From: Oded Gabbay @ 2014-12-31 14:06 UTC (permalink / raw)
To: Christian König, dri-devel, Alexander.Deucher
On 12/31/2014 03:49 PM, Christian König wrote:
> Am 31.12.2014 um 14:39 schrieb Oded Gabbay:
>> Background:
>>
>> amdkfd needs GART memory for several things, such as runlist packets,
>> MQDs, HPDs and more. Unfortunately, all of this memory must be always
>> pinned (due to several reasons which were discussed during the
>> initial review of amdkfd).
>
> In general seems to be a good idea, but so far I still don't have seen a
> good explanation why all those memory must be pinned. So please summarize
> that one once more.
>
> Regards,
> Christian.
>
ok, once more :)
The bulk of the allocations in the GART is for MQDs. MQDs represent active
user-mode queues, which are on the current runlist. It is important to
remember that active queues doesn't necessarily mean scheduled/running
queues, especially if there is over-subscription of queues or more than a
single HSA process.
Because the scheduling of the user-mode queues is done by the CP firmware,
amdkfd doesn't have any indication if the queue is scheduled or not. If the
CP will try to schedule a queue, and its MQD is not present, this will
probably stuck the CP permanently, as it will load garbage from the GART
(the address of the MQD is given to the CP inside the runlist packet).
In addition, there are a couple of small allocations which also should
always be pinned - runlist packets (2 packets) and HPDs. runlist packets can
be quite large, depending on number of processes and queues.
A few solutions were proposed, but at the end Jerome agreed there is no harm
when limiting the total memory consumption to around 4MB.
The long-term solution, which I will be working on, hopefully soon, is to
create a mechanism through which radeon/ttm can ask amdkfd to clear
GART/VRAM memory due to memory pressure. Then, amdkfd will preempt the
running queues and wait until the memory pressure is over. Then it will
reschedule the queues. But I'm getting ahead of myself. I hope to send an
RFC about that in the next couple of weeks.
Oded
>>
>> Current Solution:
>>
>> The current (short/mid-term) solution that was proposed by Jerome.G, is
>> to limit the amount of memory to a small size, roughly 4MB and allocate
>> this buffer at the start of the GART. To accomodate this, amdkfd has
>> two kernel module parameters, maximum number of HSA processes and
>> maximum number of queues per process, which require under 4MB of GART
>> memory when using their defaults, 32 and 128 respectively.
>>
>> Until now, amdkfd used the radeon sub-allocator module (radeon_sa)
>> to handle the sub-allocation of memory from this large buffer to
>> different modules inside the amdkfd.
>>
>> However, while running OpenCL conformance test suite, we found that
>> radeon_sa module is not suitable for this kind of task, due to its
>> design:
>> 1. Every allocation increments its interal pointer so the next
>> allocation is *always* done ahead of the previous allocation. This
>> causes the internal pointer to wrap-around when it reaches the end of
>> the buffer.
>>
>> 2. When encoutering an area that is already allocated, the module
>> waits for that area to be freed. If it is not freed in a timely manner
>> (or has no fence), the allocation fails. Simply put, it can't "skip"
>> the allocated area.
>>
>> Now, this is most probably good for graphics, but for amdkfd needs,
>> the combination of the two behaviors mentioned above eventually causes
>> a denial-of-service. This is because some memory allocations
>> are *always* present and *never* freed (such as HPDs).
>> Therefore, given enough time and workload, the radeon_sa eventually
>> wraps around, encounters an already allocated area and gets stuck.
>>
>> Proposed new solution:
>>
>> To solve this, I have written a simple sub-allocator module inside
>> amdkfd. It allocates fixed-size contiguous chunks (1 or more) and uses
>> a bitmap to manage the allocations. The next allocation is always
>> being searched for from the start of the GART buffer, and the module
>> knows how to skip allocated chunks.
>>
>> Because most allocations are MQDs, and MQDs are 512 Bytes in size, I
>> set the default chunk size to be 512 Bytes.
>>
>> The basic GART memory allocation is still being done in the
>> amdkfd <--> radeon interface, and it still occupies less than 4MB.
>>
>> I have chosen to implement a new allocator instead of changing
>> radeon_sa because the behavior of radeon_sa is very appropriate for
>> graphics, where allocations do not stay forever. Also, amdkfd doesn't
>> actually need the flexibility and features radeon_sa provides.
>>
>> Oded
>>
>> Oded Gabbay (9):
>> drm/amd: Add new kfd-->kgd interface for gart usage
>> drm/radeon: Impl. new gtt allocate/free functions
>> drm/amdkfd: Add gtt sa related data to kfd_dev struct
>> drm/amdkfd: Add kfd gtt sub-allocator functions
>> drm/amdkfd: Fixed calculation of gart buffer size
>> drm/amdkfd: Allocate gart memory using new interface
>> drm/amdkfd: Using new gtt sa in amdkfd
>> drm/radeon: Remove old radeon_sa usage from kfd-->kgd interface
>> drm/amd: Remove old radeon_sa funcs from kfd-->kgd interface
>>
>> drivers/gpu/drm/amd/amdkfd/kfd_device.c | 217
>> ++++++++++++++++++++-
>> .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 23 +--
>> drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c | 41 ++--
>> drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c | 16 +-
>> drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c | 10 +-
>> drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 28 ++-
>> drivers/gpu/drm/amd/include/kgd_kfd_interface.h | 23 +--
>> drivers/gpu/drm/radeon/radeon_kfd.c | 128 ++++++------
>> 8 files changed, 329 insertions(+), 157 deletions(-)
>>
>
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH 0/9] Replace use of radeon_sa with a new sub allocator
2014-12-31 14:06 ` Oded Gabbay
@ 2014-12-31 17:07 ` Christian König
2015-01-01 7:46 ` Oded Gabbay
0 siblings, 1 reply; 15+ messages in thread
From: Christian König @ 2014-12-31 17:07 UTC (permalink / raw)
To: Oded Gabbay, dri-devel, Alexander.Deucher
> The long-term solution
That was the part that I missed in the description. Please note
somewhere that we still need to improve this.
Apart from that the patches look fine to me, but I need more time to
review them in detail.
Regards,
Christian.
Am 31.12.2014 um 15:06 schrieb Oded Gabbay:
>
> On 12/31/2014 03:49 PM, Christian König wrote:
>> Am 31.12.2014 um 14:39 schrieb Oded Gabbay:
>>> Background:
>>>
>>> amdkfd needs GART memory for several things, such as runlist packets,
>>> MQDs, HPDs and more. Unfortunately, all of this memory must be always
>>> pinned (due to several reasons which were discussed during the
>>> initial review of amdkfd).
>> In general seems to be a good idea, but so far I still don't have seen a
>> good explanation why all those memory must be pinned. So please summarize
>> that one once more.
>>
>> Regards,
>> Christian.
>>
> ok, once more :)
>
> The bulk of the allocations in the GART is for MQDs. MQDs represent active
> user-mode queues, which are on the current runlist. It is important to
> remember that active queues doesn't necessarily mean scheduled/running
> queues, especially if there is over-subscription of queues or more than a
> single HSA process.
>
> Because the scheduling of the user-mode queues is done by the CP firmware,
> amdkfd doesn't have any indication if the queue is scheduled or not. If the
> CP will try to schedule a queue, and its MQD is not present, this will
> probably stuck the CP permanently, as it will load garbage from the GART
> (the address of the MQD is given to the CP inside the runlist packet).
>
> In addition, there are a couple of small allocations which also should
> always be pinned - runlist packets (2 packets) and HPDs. runlist packets can
> be quite large, depending on number of processes and queues.
>
> A few solutions were proposed, but at the end Jerome agreed there is no harm
> when limiting the total memory consumption to around 4MB.
>
> The long-term solution, which I will be working on, hopefully soon, is to
> create a mechanism through which radeon/ttm can ask amdkfd to clear
> GART/VRAM memory due to memory pressure. Then, amdkfd will preempt the
> running queues and wait until the memory pressure is over. Then it will
> reschedule the queues. But I'm getting ahead of myself. I hope to send an
> RFC about that in the next couple of weeks.
>
> Oded
>
>
>
>>> Current Solution:
>>>
>>> The current (short/mid-term) solution that was proposed by Jerome.G, is
>>> to limit the amount of memory to a small size, roughly 4MB and allocate
>>> this buffer at the start of the GART. To accomodate this, amdkfd has
>>> two kernel module parameters, maximum number of HSA processes and
>>> maximum number of queues per process, which require under 4MB of GART
>>> memory when using their defaults, 32 and 128 respectively.
>>>
>>> Until now, amdkfd used the radeon sub-allocator module (radeon_sa)
>>> to handle the sub-allocation of memory from this large buffer to
>>> different modules inside the amdkfd.
>>>
>>> However, while running OpenCL conformance test suite, we found that
>>> radeon_sa module is not suitable for this kind of task, due to its
>>> design:
>>> 1. Every allocation increments its interal pointer so the next
>>> allocation is *always* done ahead of the previous allocation. This
>>> causes the internal pointer to wrap-around when it reaches the end of
>>> the buffer.
>>>
>>> 2. When encoutering an area that is already allocated, the module
>>> waits for that area to be freed. If it is not freed in a timely manner
>>> (or has no fence), the allocation fails. Simply put, it can't "skip"
>>> the allocated area.
>>>
>>> Now, this is most probably good for graphics, but for amdkfd needs,
>>> the combination of the two behaviors mentioned above eventually causes
>>> a denial-of-service. This is because some memory allocations
>>> are *always* present and *never* freed (such as HPDs).
>>> Therefore, given enough time and workload, the radeon_sa eventually
>>> wraps around, encounters an already allocated area and gets stuck.
>>>
>>> Proposed new solution:
>>>
>>> To solve this, I have written a simple sub-allocator module inside
>>> amdkfd. It allocates fixed-size contiguous chunks (1 or more) and uses
>>> a bitmap to manage the allocations. The next allocation is always
>>> being searched for from the start of the GART buffer, and the module
>>> knows how to skip allocated chunks.
>>>
>>> Because most allocations are MQDs, and MQDs are 512 Bytes in size, I
>>> set the default chunk size to be 512 Bytes.
>>>
>>> The basic GART memory allocation is still being done in the
>>> amdkfd <--> radeon interface, and it still occupies less than 4MB.
>>>
>>> I have chosen to implement a new allocator instead of changing
>>> radeon_sa because the behavior of radeon_sa is very appropriate for
>>> graphics, where allocations do not stay forever. Also, amdkfd doesn't
>>> actually need the flexibility and features radeon_sa provides.
>>>
>>> Oded
>>>
>>> Oded Gabbay (9):
>>> drm/amd: Add new kfd-->kgd interface for gart usage
>>> drm/radeon: Impl. new gtt allocate/free functions
>>> drm/amdkfd: Add gtt sa related data to kfd_dev struct
>>> drm/amdkfd: Add kfd gtt sub-allocator functions
>>> drm/amdkfd: Fixed calculation of gart buffer size
>>> drm/amdkfd: Allocate gart memory using new interface
>>> drm/amdkfd: Using new gtt sa in amdkfd
>>> drm/radeon: Remove old radeon_sa usage from kfd-->kgd interface
>>> drm/amd: Remove old radeon_sa funcs from kfd-->kgd interface
>>>
>>> drivers/gpu/drm/amd/amdkfd/kfd_device.c | 217
>>> ++++++++++++++++++++-
>>> .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 23 +--
>>> drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c | 41 ++--
>>> drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c | 16 +-
>>> drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c | 10 +-
>>> drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 28 ++-
>>> drivers/gpu/drm/amd/include/kgd_kfd_interface.h | 23 +--
>>> drivers/gpu/drm/radeon/radeon_kfd.c | 128 ++++++------
>>> 8 files changed, 329 insertions(+), 157 deletions(-)
>>>
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH 0/9] Replace use of radeon_sa with a new sub allocator
2014-12-31 17:07 ` Christian König
@ 2015-01-01 7:46 ` Oded Gabbay
0 siblings, 0 replies; 15+ messages in thread
From: Oded Gabbay @ 2015-01-01 7:46 UTC (permalink / raw)
To: Christian König, dri-devel, Alexander.Deucher
On 12/31/2014 07:07 PM, Christian König wrote:
>> The long-term solution
> That was the part that I missed in the description. Please note somewhere that
> we still need to improve this.
OK, I'll add it to commit msg of the relevant patch (and cover letter)
>
> Apart from that the patches look fine to me, but I need more time to review them
> in detail.
Thanks! I hope we can push it to 3.20
>
> Regards,
> Christian.
>
> Am 31.12.2014 um 15:06 schrieb Oded Gabbay:
>>
>> On 12/31/2014 03:49 PM, Christian König wrote:
>>> Am 31.12.2014 um 14:39 schrieb Oded Gabbay:
>>>> Background:
>>>>
>>>> amdkfd needs GART memory for several things, such as runlist packets,
>>>> MQDs, HPDs and more. Unfortunately, all of this memory must be always
>>>> pinned (due to several reasons which were discussed during the
>>>> initial review of amdkfd).
>>> In general seems to be a good idea, but so far I still don't have seen a
>>> good explanation why all those memory must be pinned. So please summarize
>>> that one once more.
>>>
>>> Regards,
>>> Christian.
>>>
>> ok, once more :)
>>
>> The bulk of the allocations in the GART is for MQDs. MQDs represent active
>> user-mode queues, which are on the current runlist. It is important to
>> remember that active queues doesn't necessarily mean scheduled/running
>> queues, especially if there is over-subscription of queues or more than a
>> single HSA process.
>>
>> Because the scheduling of the user-mode queues is done by the CP firmware,
>> amdkfd doesn't have any indication if the queue is scheduled or not. If the
>> CP will try to schedule a queue, and its MQD is not present, this will
>> probably stuck the CP permanently, as it will load garbage from the GART
>> (the address of the MQD is given to the CP inside the runlist packet).
>>
>> In addition, there are a couple of small allocations which also should
>> always be pinned - runlist packets (2 packets) and HPDs. runlist packets can
>> be quite large, depending on number of processes and queues.
>>
>> A few solutions were proposed, but at the end Jerome agreed there is no harm
>> when limiting the total memory consumption to around 4MB.
>>
>> The long-term solution, which I will be working on, hopefully soon, is to
>> create a mechanism through which radeon/ttm can ask amdkfd to clear
>> GART/VRAM memory due to memory pressure. Then, amdkfd will preempt the
>> running queues and wait until the memory pressure is over. Then it will
>> reschedule the queues. But I'm getting ahead of myself. I hope to send an
>> RFC about that in the next couple of weeks.
>>
>> Oded
>>
>>
>>
>>>> Current Solution:
>>>>
>>>> The current (short/mid-term) solution that was proposed by Jerome.G, is
>>>> to limit the amount of memory to a small size, roughly 4MB and allocate
>>>> this buffer at the start of the GART. To accomodate this, amdkfd has
>>>> two kernel module parameters, maximum number of HSA processes and
>>>> maximum number of queues per process, which require under 4MB of GART
>>>> memory when using their defaults, 32 and 128 respectively.
>>>>
>>>> Until now, amdkfd used the radeon sub-allocator module (radeon_sa)
>>>> to handle the sub-allocation of memory from this large buffer to
>>>> different modules inside the amdkfd.
>>>>
>>>> However, while running OpenCL conformance test suite, we found that
>>>> radeon_sa module is not suitable for this kind of task, due to its
>>>> design:
>>>> 1. Every allocation increments its interal pointer so the next
>>>> allocation is *always* done ahead of the previous allocation. This
>>>> causes the internal pointer to wrap-around when it reaches the end of
>>>> the buffer.
>>>>
>>>> 2. When encoutering an area that is already allocated, the module
>>>> waits for that area to be freed. If it is not freed in a timely manner
>>>> (or has no fence), the allocation fails. Simply put, it can't "skip"
>>>> the allocated area.
>>>>
>>>> Now, this is most probably good for graphics, but for amdkfd needs,
>>>> the combination of the two behaviors mentioned above eventually causes
>>>> a denial-of-service. This is because some memory allocations
>>>> are *always* present and *never* freed (such as HPDs).
>>>> Therefore, given enough time and workload, the radeon_sa eventually
>>>> wraps around, encounters an already allocated area and gets stuck.
>>>>
>>>> Proposed new solution:
>>>>
>>>> To solve this, I have written a simple sub-allocator module inside
>>>> amdkfd. It allocates fixed-size contiguous chunks (1 or more) and uses
>>>> a bitmap to manage the allocations. The next allocation is always
>>>> being searched for from the start of the GART buffer, and the module
>>>> knows how to skip allocated chunks.
>>>>
>>>> Because most allocations are MQDs, and MQDs are 512 Bytes in size, I
>>>> set the default chunk size to be 512 Bytes.
>>>>
>>>> The basic GART memory allocation is still being done in the
>>>> amdkfd <--> radeon interface, and it still occupies less than 4MB.
>>>>
>>>> I have chosen to implement a new allocator instead of changing
>>>> radeon_sa because the behavior of radeon_sa is very appropriate for
>>>> graphics, where allocations do not stay forever. Also, amdkfd doesn't
>>>> actually need the flexibility and features radeon_sa provides.
>>>>
>>>> Oded
>>>>
>>>> Oded Gabbay (9):
>>>> drm/amd: Add new kfd-->kgd interface for gart usage
>>>> drm/radeon: Impl. new gtt allocate/free functions
>>>> drm/amdkfd: Add gtt sa related data to kfd_dev struct
>>>> drm/amdkfd: Add kfd gtt sub-allocator functions
>>>> drm/amdkfd: Fixed calculation of gart buffer size
>>>> drm/amdkfd: Allocate gart memory using new interface
>>>> drm/amdkfd: Using new gtt sa in amdkfd
>>>> drm/radeon: Remove old radeon_sa usage from kfd-->kgd interface
>>>> drm/amd: Remove old radeon_sa funcs from kfd-->kgd interface
>>>>
>>>> drivers/gpu/drm/amd/amdkfd/kfd_device.c | 217
>>>> ++++++++++++++++++++-
>>>> .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 23 +--
>>>> drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c | 41 ++--
>>>> drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c | 16 +-
>>>> drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c | 10 +-
>>>> drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 28 ++-
>>>> drivers/gpu/drm/amd/include/kgd_kfd_interface.h | 23 +--
>>>> drivers/gpu/drm/radeon/radeon_kfd.c | 128 ++++++------
>>>> 8 files changed, 329 insertions(+), 157 deletions(-)
>>>>
>
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH 0/9] Replace use of radeon_sa with a new sub allocator
2014-12-31 13:39 [PATCH 0/9] Replace use of radeon_sa with a new sub allocator Oded Gabbay
` (9 preceding siblings ...)
2014-12-31 13:49 ` [PATCH 0/9] Replace use of radeon_sa with a new sub allocator Christian König
@ 2015-01-06 16:51 ` Alex Deucher
10 siblings, 0 replies; 15+ messages in thread
From: Alex Deucher @ 2015-01-06 16:51 UTC (permalink / raw)
To: Oded Gabbay
Cc: Deucher, Alexander, Christian Koenig,
Maling list - DRI developers
On Wed, Dec 31, 2014 at 8:39 AM, Oded Gabbay <oded.gabbay@amd.com> wrote:
> Background:
>
> amdkfd needs GART memory for several things, such as runlist packets,
> MQDs, HPDs and more. Unfortunately, all of this memory must be always
> pinned (due to several reasons which were discussed during the
> initial review of amdkfd).
>
> Current Solution:
>
> The current (short/mid-term) solution that was proposed by Jerome.G, is
> to limit the amount of memory to a small size, roughly 4MB and allocate
> this buffer at the start of the GART. To accomodate this, amdkfd has
> two kernel module parameters, maximum number of HSA processes and
> maximum number of queues per process, which require under 4MB of GART
> memory when using their defaults, 32 and 128 respectively.
>
> Until now, amdkfd used the radeon sub-allocator module (radeon_sa)
> to handle the sub-allocation of memory from this large buffer to
> different modules inside the amdkfd.
>
> However, while running OpenCL conformance test suite, we found that
> radeon_sa module is not suitable for this kind of task, due to its
> design:
> 1. Every allocation increments its interal pointer so the next
> allocation is *always* done ahead of the previous allocation. This
> causes the internal pointer to wrap-around when it reaches the end of
> the buffer.
>
> 2. When encoutering an area that is already allocated, the module
> waits for that area to be freed. If it is not freed in a timely manner
> (or has no fence), the allocation fails. Simply put, it can't "skip"
> the allocated area.
>
> Now, this is most probably good for graphics, but for amdkfd needs,
> the combination of the two behaviors mentioned above eventually causes
> a denial-of-service. This is because some memory allocations
> are *always* present and *never* freed (such as HPDs).
> Therefore, given enough time and workload, the radeon_sa eventually
> wraps around, encounters an already allocated area and gets stuck.
>
> Proposed new solution:
>
> To solve this, I have written a simple sub-allocator module inside
> amdkfd. It allocates fixed-size contiguous chunks (1 or more) and uses
> a bitmap to manage the allocations. The next allocation is always
> being searched for from the start of the GART buffer, and the module
> knows how to skip allocated chunks.
>
> Because most allocations are MQDs, and MQDs are 512 Bytes in size, I
> set the default chunk size to be 512 Bytes.
>
> The basic GART memory allocation is still being done in the
> amdkfd <--> radeon interface, and it still occupies less than 4MB.
>
> I have chosen to implement a new allocator instead of changing
> radeon_sa because the behavior of radeon_sa is very appropriate for
> graphics, where allocations do not stay forever. Also, amdkfd doesn't
> actually need the flexibility and features radeon_sa provides.
>
For the series:
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
> Oded
>
> Oded Gabbay (9):
> drm/amd: Add new kfd-->kgd interface for gart usage
> drm/radeon: Impl. new gtt allocate/free functions
> drm/amdkfd: Add gtt sa related data to kfd_dev struct
> drm/amdkfd: Add kfd gtt sub-allocator functions
> drm/amdkfd: Fixed calculation of gart buffer size
> drm/amdkfd: Allocate gart memory using new interface
> drm/amdkfd: Using new gtt sa in amdkfd
> drm/radeon: Remove old radeon_sa usage from kfd-->kgd interface
> drm/amd: Remove old radeon_sa funcs from kfd-->kgd interface
>
> drivers/gpu/drm/amd/amdkfd/kfd_device.c | 217 ++++++++++++++++++++-
> .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 23 +--
> drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c | 41 ++--
> drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c | 16 +-
> drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c | 10 +-
> drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 28 ++-
> drivers/gpu/drm/amd/include/kgd_kfd_interface.h | 23 +--
> drivers/gpu/drm/radeon/radeon_kfd.c | 128 ++++++------
> 8 files changed, 329 insertions(+), 157 deletions(-)
>
> --
> 2.1.0
>
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/dri-devel
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply [flat|nested] 15+ messages in thread