From: Graham Sider <Graham.Sider@amd.com>
To: <amd-gfx@lists.freedesktop.org>
Cc: Mukul.Joshi@amd.com, Felix.Kuehling@amd.com,
Graham Sider <Graham.Sider@amd.com>,
Philip.Yang@amd.com
Subject: [PATCH v2 2/3] drm/amdkfd: Enable GFX11 usermode queue oversubscription
Date: Fri, 10 Jun 2022 13:13:15 -0400 [thread overview]
Message-ID: <20220610171316.2668611-2-Graham.Sider@amd.com> (raw)
In-Reply-To: <20220610171316.2668611-1-Graham.Sider@amd.com>
Starting with GFX11, MES requires wptr BOs to be GTT allocated/mapped to
GART for usermode queues in order to support oversubscription. In the
case that work is submitted to an unmapped queue, MES must have a GART
wptr address to determine whether the queue should be mapped.
This change is accompanied with changes in MES and is applicable for
MES_VERSION >= 3.
v2: Update MES_VERSION check from 2 to 3.
Signed-off-by: Graham Sider <Graham.Sider@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 1 +
.../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 39 ++++++++++++++++
drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 45 ++++++++++++++++++-
.../drm/amd/amdkfd/kfd_device_queue_manager.c | 9 +++-
.../gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c | 2 +
drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 3 ++
.../amd/amdkfd/kfd_process_queue_manager.c | 19 +++++---
7 files changed, 110 insertions(+), 8 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 429b16ba10bf..dba26d1e3be9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -301,6 +301,7 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct amdgpu_device *adev,
struct kgd_mem *mem, void **kptr, uint64_t *size);
void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct amdgpu_device *adev,
struct kgd_mem *mem);
+int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct amdgpu_bo *bo);
int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info,
struct dma_fence **ef);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index efab923056f4..2d452655eb04 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -2030,6 +2030,45 @@ int amdgpu_amdkfd_gpuvm_sync_memory(
return ret;
}
+int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct amdgpu_bo *bo)
+{
+ int ret;
+
+ ret = amdgpu_bo_reserve(bo, true);
+ if (ret) {
+ pr_err("Failed to reserve bo. ret %d\n", ret);
+ goto err_reserve_bo_failed;
+ }
+
+ ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
+ if (ret) {
+ pr_err("Failed to pin bo. ret %d\n", ret);
+ goto err_pin_bo_failed;
+ }
+
+ ret = amdgpu_ttm_alloc_gart(&bo->tbo);
+ if (ret) {
+ pr_err("Failed to bind bo to GART. ret %d\n", ret);
+ goto err_map_bo_gart_failed;
+ }
+
+ amdgpu_amdkfd_remove_eviction_fence(
+ bo, bo->kfd_bo->process_info->eviction_fence);
+ list_del_init(&bo->kfd_bo->validate_list.head);
+
+ amdgpu_bo_unreserve(bo);
+
+ return 0;
+
+err_map_bo_gart_failed:
+ amdgpu_bo_unpin(bo);
+err_pin_bo_failed:
+ amdgpu_bo_unreserve(bo);
+err_reserve_bo_failed:
+
+ return ret;
+}
+
int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct amdgpu_device *adev,
struct kgd_mem *mem, void **kptr, uint64_t *size)
{
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index e9766e165c38..58d5ebed1b32 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -290,6 +290,11 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
struct queue_properties q_properties;
uint32_t doorbell_offset_in_process = 0;
+ struct amdgpu_bo_va_mapping *wptr_mapping;
+ struct interval_tree_node *wptr_node;
+ struct amdgpu_vm *wptr_vm;
+ struct amdgpu_bo *wptr_bo = NULL;
+
memset(&q_properties, 0, sizeof(struct queue_properties));
pr_debug("Creating queue ioctl\n");
@@ -316,12 +321,44 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
goto err_bind_process;
}
+ /* Starting with GFX11, MES requires wptr BOs to be GTT allocated/mapped to
+ * GART for usermode queues in order to support oversubscription. In the
+ * case that work is submitted to an unmapped queue, MES must have a GART
+ * wptr address to determine whether the queue should be mapped.
+ */
+ if (dev->shared_resources.enable_mes && (dev->adev->mes.sched_version & 0xff) >= 3) {
+ wptr_vm = drm_priv_to_vm(pdd->drm_priv);
+ err = amdgpu_bo_reserve(wptr_vm->root.bo, false);
+ if (err)
+ goto err_wptr_bo_reserve;
+
+ wptr_node = interval_tree_iter_first(&wptr_vm->va,
+ args->write_pointer_address >> PAGE_SHIFT,
+ args->write_pointer_address >> PAGE_SHIFT);
+ if (!wptr_node) {
+ pr_err("Failed to lookup wptr bo\n");
+ err = -EINVAL;
+ goto err_wptr_bo_lookup;
+ }
+
+ wptr_mapping = container_of((struct rb_node *)wptr_node, struct amdgpu_bo_va_mapping, rb);
+ wptr_bo = wptr_mapping->bo_va->base.bo;
+
+ amdgpu_bo_unreserve(wptr_vm->root.bo);
+
+ err = amdgpu_amdkfd_map_gtt_bo_to_gart(dev->adev, wptr_bo);
+ if (err) {
+ pr_err("Failed to map wptr bo to GART\n");
+ goto err_wptr_bo_gart_map;
+ }
+ }
+
pr_debug("Creating queue for PASID 0x%x on gpu 0x%x\n",
p->pasid,
dev->id);
- err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id, NULL, NULL, NULL,
- &doorbell_offset_in_process);
+ err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id, wptr_bo,
+ NULL, NULL, NULL, &doorbell_offset_in_process);
if (err != 0)
goto err_create_queue;
@@ -353,7 +390,11 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
kfd_dbg_ev_raise(KFD_EC_MASK(EC_QUEUE_NEW), p, dev, queue_id, false, NULL, 0);
return 0;
+err_wptr_bo_lookup:
+ amdgpu_bo_unreserve(wptr_vm->root.bo);
err_create_queue:
+err_wptr_bo_gart_map:
+err_wptr_bo_reserve:
err_bind_process:
err_pdd:
mutex_unlock(&p->mutex);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index b39d89c52887..d8de2fbdfc7d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -208,6 +208,7 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q,
struct kfd_process_device *pdd = qpd_to_pdd(qpd);
struct mes_add_queue_input queue_input;
int r, queue_type;
+ uint64_t wptr_addr_off;
if (dqm->is_hws_hang)
return -EIO;
@@ -227,7 +228,13 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q,
AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
queue_input.doorbell_offset = q->properties.doorbell_off;
queue_input.mqd_addr = q->gart_mqd_addr;
- queue_input.wptr_addr = (uint64_t)q->properties.write_ptr;
+
+ if (q->wptr_bo) {
+ wptr_addr_off = (uint64_t)q->properties.write_ptr - (uint64_t)q->wptr_bo->kfd_bo->va;
+ queue_input.wptr_addr = ((uint64_t)q->wptr_bo->tbo.resource->start << PAGE_SHIFT) + wptr_addr_off;
+ } else
+ queue_input.wptr_addr = (uint64_t)q->properties.write_ptr;
+
queue_input.paging = false;
queue_input.tba_addr = qpd->tba_addr;
queue_input.tma_addr = qpd->tma_addr;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
index f1654b4da856..35e74bdd81da 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
@@ -377,6 +377,8 @@ static void update_mqd_sdma(struct mqd_manager *mm, void *mqd,
m->sdmax_rlcx_rb_base_hi = upper_32_bits(q->queue_address >> 8);
m->sdmax_rlcx_rb_rptr_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
m->sdmax_rlcx_rb_rptr_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
+ m->sdmax_rlcx_rb_wptr_poll_addr_lo = lower_32_bits((uint64_t)q->write_ptr);
+ m->sdmax_rlcx_rb_wptr_poll_addr_hi = upper_32_bits((uint64_t)q->write_ptr);
m->sdmax_rlcx_doorbell_offset =
q->doorbell_off << SDMA0_QUEUE0_DOORBELL_OFFSET__OFFSET__SHIFT;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index a5d3963537d7..dcddee0d6f06 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -639,6 +639,8 @@ struct queue {
void *gang_ctx_bo;
uint64_t gang_ctx_gpu_addr;
void *gang_ctx_cpu_ptr;
+
+ struct amdgpu_bo *wptr_bo;
};
enum KFD_MQD_TYPE {
@@ -1404,6 +1406,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
struct file *f,
struct queue_properties *properties,
unsigned int *qid,
+ struct amdgpu_bo *wptr_bo,
const struct kfd_criu_queue_priv_data *q_data,
const void *restore_mqd,
const void *restore_ctl_stack,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index f99e09dc43ea..ede1462a55c8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -190,7 +190,8 @@ void pqm_uninit(struct process_queue_manager *pqm)
static int init_user_queue(struct process_queue_manager *pqm,
struct kfd_dev *dev, struct queue **q,
struct queue_properties *q_properties,
- struct file *f, unsigned int qid)
+ struct file *f, struct amdgpu_bo *wptr_bo,
+ unsigned int qid)
{
int retval;
@@ -221,6 +222,9 @@ static int init_user_queue(struct process_queue_manager *pqm,
goto cleanup;
}
memset((*q)->gang_ctx_cpu_ptr, 0, AMDGPU_MES_GANG_CTX_SIZE);
+
+ if (wptr_bo)
+ (*q)->wptr_bo = amdgpu_bo_ref(wptr_bo);
}
pr_debug("PQM After init queue");
@@ -237,6 +241,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
struct file *f,
struct queue_properties *properties,
unsigned int *qid,
+ struct amdgpu_bo *wptr_bo,
const struct kfd_criu_queue_priv_data *q_data,
const void *restore_mqd,
const void *restore_ctl_stack,
@@ -299,7 +304,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
* allocate_sdma_queue() in create_queue() has the
* corresponding check logic.
*/
- retval = init_user_queue(pqm, dev, &q, properties, f, *qid);
+ retval = init_user_queue(pqm, dev, &q, properties, f, wptr_bo, *qid);
if (retval != 0)
goto err_create_queue;
pqn->q = q;
@@ -320,7 +325,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
goto err_create_queue;
}
- retval = init_user_queue(pqm, dev, &q, properties, f, *qid);
+ retval = init_user_queue(pqm, dev, &q, properties, f, wptr_bo, *qid);
if (retval != 0)
goto err_create_queue;
pqn->q = q;
@@ -457,9 +462,13 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
pdd->qpd.num_gws = 0;
}
- if (dev->shared_resources.enable_mes)
+ if (dev->shared_resources.enable_mes) {
amdgpu_amdkfd_free_gtt_mem(dev->adev,
pqn->q->gang_ctx_bo);
+ if (pqn->q->wptr_bo)
+ amdgpu_amdkfd_free_gtt_mem(dev->adev, pqn->q->wptr_bo);
+
+ }
uninit_queue(pqn->q);
}
@@ -900,7 +909,7 @@ int kfd_criu_restore_queue(struct kfd_process *p,
print_queue_properties(&qp);
- ret = pqm_create_queue(&p->pqm, pdd->dev, NULL, &qp, &queue_id, q_data, mqd, ctl_stack,
+ ret = pqm_create_queue(&p->pqm, pdd->dev, NULL, &qp, &queue_id, NULL, q_data, mqd, ctl_stack,
NULL);
if (ret) {
pr_err("Failed to create new queue err:%d\n", ret);
--
2.25.1
next prev parent reply other threads:[~2022-06-10 17:13 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-06-10 17:13 [PATCH v2 1/3] drm/amdgpu: Fetch MES scheduler/KIQ versions Graham Sider
2022-06-10 17:13 ` Graham Sider [this message]
2022-06-10 23:06 ` [PATCH v2 2/3] drm/amdkfd: Enable GFX11 usermode queue oversubscription Felix Kuehling
2022-06-13 13:33 ` Sider, Graham
2022-06-11 2:37 ` Lang Yu
2022-06-13 13:35 ` Sider, Graham
2022-06-10 17:13 ` [PATCH v2 3/3] drm/amdgpu: Update mes_v11_api_def.h Graham Sider
2022-06-10 23:06 ` Felix Kuehling
2022-06-10 23:07 ` [PATCH v2 1/3] drm/amdgpu: Fetch MES scheduler/KIQ versions Felix Kuehling
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220610171316.2668611-2-Graham.Sider@amd.com \
--to=graham.sider@amd.com \
--cc=Felix.Kuehling@amd.com \
--cc=Mukul.Joshi@amd.com \
--cc=Philip.Yang@amd.com \
--cc=amd-gfx@lists.freedesktop.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.