Re: [PATCH V6 07/10] accel/amdxdna: Add command execution

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Lizhi Hou <lizhi.hou@amd.com>
To: Matthew Brost <matthew.brost@intel.com>
Cc: <ogabbay@kernel.org>, <quic_jhugo@quicinc.com>,
	<dri-devel@lists.freedesktop.org>, <linux-kernel@vger.kernel.org>,
	<min.ma@amd.com>, <max.zhen@amd.com>, <sonal.santan@amd.com>,
	<king.tam@amd.com>
Subject: Re: [PATCH V6 07/10] accel/amdxdna: Add command execution
Date: Wed, 6 Nov 2024 10:23:21 -0800	[thread overview]
Message-ID: <a100b35f-ab8a-e7f6-325d-b29953c756c5@amd.com> (raw)
In-Reply-To: <ZyhqSDch6JY48FUU@lstrano-desk.jf.intel.com>


On 11/3/24 22:31, Matthew Brost wrote:
> On Wed, Oct 30, 2024 at 08:51:44AM -0700, Lizhi Hou wrote:
>> Add interfaces for user application to submit command and wait for its
>> completion.
>>
>> Co-developed-by: Min Ma <min.ma@amd.com>
>> Signed-off-by: Min Ma <min.ma@amd.com>
>> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
>> ---
>>   drivers/accel/amdxdna/aie2_ctx.c              | 664 +++++++++++++++++-
>>   drivers/accel/amdxdna/aie2_message.c          | 343 +++++++++
>>   drivers/accel/amdxdna/aie2_pci.c              |   5 +
>>   drivers/accel/amdxdna/aie2_pci.h              |  35 +
>>   drivers/accel/amdxdna/aie2_psp.c              |   2 +
>>   drivers/accel/amdxdna/aie2_smu.c              |   2 +
>>   drivers/accel/amdxdna/amdxdna_ctx.c           | 330 ++++++++-
>>   drivers/accel/amdxdna/amdxdna_ctx.h           | 111 +++
>>   drivers/accel/amdxdna/amdxdna_gem.c           |  10 +
>>   drivers/accel/amdxdna/amdxdna_gem.h           |   1 +
>>   .../accel/amdxdna/amdxdna_mailbox_helper.c    |   5 +
>>   drivers/accel/amdxdna/amdxdna_pci_drv.c       |   5 +
>>   drivers/accel/amdxdna/amdxdna_pci_drv.h       |   4 +
>>   drivers/accel/amdxdna/amdxdna_sysfs.c         |   5 +
>>   drivers/accel/amdxdna/npu1_regs.c             |   1 +
>>   drivers/accel/amdxdna/npu2_regs.c             |   1 +
>>   drivers/accel/amdxdna/npu4_regs.c             |   1 +
>>   drivers/accel/amdxdna/npu5_regs.c             |   1 +
>>   include/trace/events/amdxdna.h                |  41 ++
>>   include/uapi/drm/amdxdna_accel.h              |  38 +
>>   20 files changed, 1596 insertions(+), 9 deletions(-)
>>
>> diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c
>> index 617fc05077d9..c3ac668e16ab 100644
>> --- a/drivers/accel/amdxdna/aie2_ctx.c
>> +++ b/drivers/accel/amdxdna/aie2_ctx.c
>> @@ -8,8 +8,12 @@
>>   #include <drm/drm_gem.h>
>>   #include <drm/drm_gem_shmem_helper.h>
>>   #include <drm/drm_print.h>
>> +#include <drm/drm_syncobj.h>
>> +#include <linux/hmm.h>
>>   #include <linux/types.h>
>> +#include <trace/events/amdxdna.h>
>>   
>> +#include "aie2_msg_priv.h"
>>   #include "aie2_pci.h"
>>   #include "aie2_solver.h"
>>   #include "amdxdna_ctx.h"
>> @@ -17,6 +21,337 @@
>>   #include "amdxdna_mailbox.h"
>>   #include "amdxdna_pci_drv.h"
>>   
>> +bool force_cmdlist;
>> +module_param(force_cmdlist, bool, 0600);
>> +MODULE_PARM_DESC(force_cmdlist, "Force use command list (Default false)");
>> +
>> +#define HWCTX_MAX_TIMEOUT	60000 /* milliseconds */
>> +
>> +static struct amdxdna_sched_job *
>> +aie2_hwctx_get_job(struct amdxdna_hwctx *hwctx, u64 seq)
>> +{
>> +	int idx;
>> +
>> +	/* Special sequence number for oldest fence if exist */
>> +	if (seq == AMDXDNA_INVALID_CMD_HANDLE) {
>> +		idx = get_job_idx(hwctx->priv->seq);
>> +		goto out;
>> +	}
>> +
>> +	if (seq >= hwctx->priv->seq)
>> +		return ERR_PTR(-EINVAL);
>> +
>> +	if (seq + HWCTX_MAX_CMDS < hwctx->priv->seq)
>> +		return NULL;
>> +
>> +	idx = get_job_idx(seq);
>> +
>> +out:
>> +	return hwctx->priv->pending[idx];
>> +}
>> +
>> +/* The bad_job is used in aie2_sched_job_timedout, otherwise, set it to NULL */
>> +static void aie2_hwctx_stop(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hwctx,
>> +			    struct drm_sched_job *bad_job)
>> +{
>> +	drm_sched_stop(&hwctx->priv->sched, bad_job);
>> +	aie2_destroy_context(xdna->dev_handle, hwctx);
>> +}
>> +
>> +static int aie2_hwctx_restart(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hwctx)
>> +{
>> +	struct amdxdna_gem_obj *heap = hwctx->priv->heap;
>> +	int ret;
>> +
>> +	ret = aie2_create_context(xdna->dev_handle, hwctx);
>> +	if (ret) {
>> +		XDNA_ERR(xdna, "Create hwctx failed, ret %d", ret);
>> +		goto out;
>> +	}
>> +
>> +	ret = aie2_map_host_buf(xdna->dev_handle, hwctx->fw_ctx_id,
>> +				heap->mem.userptr, heap->mem.size);
>> +	if (ret) {
>> +		XDNA_ERR(xdna, "Map host buf failed, ret %d", ret);
>> +		goto out;
>> +	}
>> +
>> +	if (hwctx->status != HWCTX_STAT_READY) {
>> +		XDNA_DBG(xdna, "hwctx is not ready, status %d", hwctx->status);
>> +		goto out;
>> +	}
>> +
>> +	ret = aie2_config_cu(hwctx);
>> +	if (ret) {
>> +		XDNA_ERR(xdna, "Config cu failed, ret %d", ret);
>> +		goto out;
>> +	}
>> +
>> +out:
>> +	drm_sched_start(&hwctx->priv->sched);
>> +	XDNA_DBG(xdna, "%s restarted, ret %d", hwctx->name, ret);
>> +	return ret;
>> +}
>> +
>> +void aie2_stop_ctx_by_col_map(struct amdxdna_client *client, u32 col_map)
>> +{
>> +	struct amdxdna_dev *xdna = client->xdna;
>> +	struct amdxdna_hwctx *hwctx;
>> +	int next = 0;
>> +
>> +	drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
>> +	mutex_lock(&client->hwctx_lock);
>> +	idr_for_each_entry_continue(&client->hwctx_idr, hwctx, next) {
>> +		/* check if the HW context uses the error column */
>> +		if (!(col_map & amdxdna_hwctx_col_map(hwctx)))
>> +			continue;
>> +
>> +		aie2_hwctx_stop(xdna, hwctx, NULL);
>> +		hwctx->old_status = hwctx->status;
>> +		hwctx->status = HWCTX_STAT_STOP;
>> +		XDNA_DBG(xdna, "Stop %s", hwctx->name);
>> +	}
>> +	mutex_unlock(&client->hwctx_lock);
>> +}
>> +
>> +void aie2_restart_ctx(struct amdxdna_client *client)
>> +{
>> +	struct amdxdna_dev *xdna = client->xdna;
>> +	struct amdxdna_hwctx *hwctx;
>> +	int next = 0;
>> +
>> +	drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
>> +	mutex_lock(&client->hwctx_lock);
>> +	idr_for_each_entry_continue(&client->hwctx_idr, hwctx, next) {
>> +		if (hwctx->status != HWCTX_STAT_STOP)
>> +			continue;
>> +
>> +		hwctx->status = hwctx->old_status;
>> +		XDNA_DBG(xdna, "Resetting %s", hwctx->name);
>> +		aie2_hwctx_restart(xdna, hwctx);
>> +	}
>> +	mutex_unlock(&client->hwctx_lock);
>> +}
>> +
>> +static int aie2_hwctx_wait_for_idle(struct amdxdna_hwctx *hwctx)
>> +{
>> +	struct amdxdna_sched_job *job;
>> +
>> +	mutex_lock(&hwctx->priv->io_lock);
>> +	if (!hwctx->priv->seq) {
>> +		mutex_unlock(&hwctx->priv->io_lock);
>> +		return 0;
>> +	}
>> +
>> +	job = aie2_hwctx_get_job(hwctx, hwctx->priv->seq - 1);
>> +	if (IS_ERR_OR_NULL(job)) {
>> +		mutex_unlock(&hwctx->priv->io_lock);
>> +		XDNA_WARN(hwctx->client->xdna, "Corrupted pending list");
>> +		return 0;
>> +	}
>> +	mutex_unlock(&hwctx->priv->io_lock);
>> +
>> +	wait_event(hwctx->priv->job_free_wq, !job->fence);
>> +
>> +	return 0;
>> +}
>> +
>> +static void
>> +aie2_sched_notify(struct amdxdna_sched_job *job)
>> +{
>> +	struct dma_fence *fence = job->fence;
>> +
>> +	job->hwctx->priv->completed++;
>> +	dma_fence_signal(fence);
>> +	trace_xdna_job(&job->base, job->hwctx->name, "signaled fence", job->seq);
>> +	dma_fence_put(fence);
>> +	mmput(job->mm);
>> +	amdxdna_job_put(job);
>> +}
>> +
>> +static int
>> +aie2_sched_resp_handler(void *handle, const u32 *data, size_t size)
>> +{
>> +	struct amdxdna_sched_job *job = handle;
>> +	struct amdxdna_gem_obj *cmd_abo;
>> +	u32 ret = 0;
>> +	u32 status;
>> +
>> +	cmd_abo = job->cmd_bo;
>> +
>> +	if (unlikely(!data))
>> +		goto out;
>> +
>> +	if (unlikely(size != sizeof(u32))) {
>> +		amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT);
>> +		ret = -EINVAL;
>> +		goto out;
>> +	}
>> +
>> +	status = *data;
>> +	XDNA_DBG(job->hwctx->client->xdna, "Resp status 0x%x", status);
>> +	if (status == AIE2_STATUS_SUCCESS)
>> +		amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_COMPLETED);
>> +	else
>> +		amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ERROR);
>> +
>> +out:
>> +	aie2_sched_notify(job);
>> +	return ret;
>> +}
>> +
>> +static int
>> +aie2_sched_nocmd_resp_handler(void *handle, const u32 *data, size_t size)
>> +{
>> +	struct amdxdna_sched_job *job = handle;
>> +	u32 ret = 0;
>> +	u32 status;
>> +
>> +	if (unlikely(!data))
>> +		goto out;
>> +
>> +	if (unlikely(size != sizeof(u32))) {
>> +		ret = -EINVAL;
>> +		goto out;
>> +	}
>> +
>> +	status = *data;
>> +	XDNA_DBG(job->hwctx->client->xdna, "Resp status 0x%x", status);
>> +
>> +out:
>> +	aie2_sched_notify(job);
>> +	return ret;
>> +}
>> +
>> +static int
>> +aie2_sched_cmdlist_resp_handler(void *handle, const u32 *data, size_t size)
>> +{
>> +	struct amdxdna_sched_job *job = handle;
>> +	struct amdxdna_gem_obj *cmd_abo;
>> +	struct cmd_chain_resp *resp;
>> +	struct amdxdna_dev *xdna;
>> +	u32 fail_cmd_status;
>> +	u32 fail_cmd_idx;
>> +	u32 ret = 0;
>> +
>> +	cmd_abo = job->cmd_bo;
>> +	if (unlikely(!data) || unlikely(size != sizeof(u32) * 3)) {
>> +		amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT);
>> +		ret = -EINVAL;
>> +		goto out;
>> +	}
>> +
>> +	resp = (struct cmd_chain_resp *)data;
>> +	xdna = job->hwctx->client->xdna;
>> +	XDNA_DBG(xdna, "Status 0x%x", resp->status);
>> +	if (resp->status == AIE2_STATUS_SUCCESS) {
>> +		amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_COMPLETED);
>> +		goto out;
>> +	}
>> +
>> +	/* Slow path to handle error, read from ringbuf on BAR */
>> +	fail_cmd_idx = resp->fail_cmd_idx;
>> +	fail_cmd_status = resp->fail_cmd_status;
>> +	XDNA_DBG(xdna, "Failed cmd idx %d, status 0x%x",
>> +		 fail_cmd_idx, fail_cmd_status);
>> +
>> +	if (fail_cmd_status == AIE2_STATUS_SUCCESS) {
>> +		amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT);
>> +		ret = -EINVAL;
>> +		goto out;
>> +	}
>> +	amdxdna_cmd_set_state(cmd_abo, fail_cmd_status);
>> +
>> +	if (amdxdna_cmd_get_op(cmd_abo) == ERT_CMD_CHAIN) {
>> +		struct amdxdna_cmd_chain *cc = amdxdna_cmd_get_payload(cmd_abo, NULL);
>> +
>> +		cc->error_index = fail_cmd_idx;
>> +		if (cc->error_index >= cc->command_count)
>> +			cc->error_index = 0;
>> +	}
>> +out:
>> +	aie2_sched_notify(job);
>> +	return ret;
>> +}
>> +
>> +static struct dma_fence *
>> +aie2_sched_job_run(struct drm_sched_job *sched_job)
>> +{
>> +	struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job);
>> +	struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
>> +	struct amdxdna_hwctx *hwctx = job->hwctx;
>> +	struct dma_fence *fence;
>> +	int ret;
>> +
>> +	if (!mmget_not_zero(job->mm))
>> +		return ERR_PTR(-ESRCH);
>> +
>> +	kref_get(&job->refcnt);
>> +	fence = dma_fence_get(job->fence);
>> +
>> +	if (unlikely(!cmd_abo)) {
>> +		ret = aie2_sync_bo(hwctx, job, aie2_sched_nocmd_resp_handler);
>> +		goto out;
>> +	}
>> +
>> +	amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_NEW);
>> +
>> +	if (amdxdna_cmd_get_op(cmd_abo) == ERT_CMD_CHAIN)
>> +		ret = aie2_cmdlist_multi_execbuf(hwctx, job, aie2_sched_cmdlist_resp_handler);
>> +	else if (force_cmdlist)
>> +		ret = aie2_cmdlist_single_execbuf(hwctx, job, aie2_sched_cmdlist_resp_handler);
>> +	else
>> +		ret = aie2_execbuf(hwctx, job, aie2_sched_resp_handler);
>> +
>> +out:
>> +	if (ret) {
>> +		dma_fence_put(job->fence);
>> +		amdxdna_job_put(job);
>> +		mmput(job->mm);
>> +		fence = ERR_PTR(ret);
>> +	}
>> +	trace_xdna_job(sched_job, hwctx->name, "sent to device", job->seq);
>> +
>> +	return fence;
>> +}
>> +
>> +static void aie2_sched_job_free(struct drm_sched_job *sched_job)
>> +{
>> +	struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job);
>> +	struct amdxdna_hwctx *hwctx = job->hwctx;
>> +
>> +	trace_xdna_job(sched_job, hwctx->name, "job free", job->seq);
>> +	drm_sched_job_cleanup(sched_job);
>> +	job->fence = NULL;
>> +	amdxdna_job_put(job);
>> +
>> +	wake_up(&hwctx->priv->job_free_wq);
>> +}
>> +
>> +static enum drm_gpu_sched_stat
>> +aie2_sched_job_timedout(struct drm_sched_job *sched_job)
>> +{
>> +	struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job);
>> +	struct amdxdna_hwctx *hwctx = job->hwctx;
>> +	struct amdxdna_dev *xdna;
>> +
>> +	xdna = hwctx->client->xdna;
>> +	trace_xdna_job(sched_job, hwctx->name, "job timedout", job->seq);
>> +	mutex_lock(&xdna->dev_lock);
>> +	aie2_hwctx_stop(xdna, hwctx, sched_job);
>> +
>> +	aie2_hwctx_restart(xdna, hwctx);
>> +	mutex_unlock(&xdna->dev_lock);
>> +
>> +	return DRM_GPU_SCHED_STAT_NOMINAL;
>> +}
>> +
>> +const struct drm_sched_backend_ops sched_ops = {
>> +	.run_job = aie2_sched_job_run,
>> +	.free_job = aie2_sched_job_free,
>> +	.timedout_job = aie2_sched_job_timedout,
>> +};
>> +
>>   static int aie2_hwctx_col_list(struct amdxdna_hwctx *hwctx)
>>   {
>>   	struct amdxdna_dev *xdna = hwctx->client->xdna;
>> @@ -126,13 +461,66 @@ static void aie2_release_resource(struct amdxdna_hwctx *hwctx)
>>   		XDNA_ERR(xdna, "Release AIE resource failed, ret %d", ret);
>>   }
>>   
>> +static int aie2_ctx_syncobj_create(struct amdxdna_hwctx *hwctx)
>> +{
>> +	struct amdxdna_dev *xdna = hwctx->client->xdna;
>> +	struct drm_file *filp = hwctx->client->filp;
>> +	struct drm_syncobj *syncobj;
>> +	u32 hdl;
>> +	int ret;
>> +
>> +	hwctx->syncobj_hdl = AMDXDNA_INVALID_FENCE_HANDLE;
>> +
>> +	ret = drm_syncobj_create(&syncobj, DRM_SYNCOBJ_CREATE_SIGNALED, NULL);
>> +	if (ret) {
>> +		XDNA_ERR(xdna, "Create ctx syncobj failed, ret %d", ret);
>> +		return ret;
>> +	}
>> +	ret = drm_syncobj_get_handle(filp, syncobj, &hdl);
>> +	if (ret) {
>> +		drm_syncobj_put(syncobj);
>> +		XDNA_ERR(xdna, "Create ctx syncobj handle failed, ret %d", ret);
>> +		return ret;
>> +	}
>> +	hwctx->priv->syncobj = syncobj;
>> +	hwctx->syncobj_hdl = hdl;
>> +
>> +	return 0;
>> +}
>> +
>> +static void aie2_ctx_syncobj_destroy(struct amdxdna_hwctx *hwctx)
>> +{
>> +	/*
>> +	 * The syncobj_hdl is owned by user space and will be cleaned up
>> +	 * separately.
>> +	 */
>> +	drm_syncobj_put(hwctx->priv->syncobj);
>> +}
>> +
>> +static void aie2_ctx_syncobj_add_fence(struct amdxdna_hwctx *hwctx,
>> +				       struct dma_fence *ofence, u64 seq)
>> +{
>> +	struct drm_syncobj *syncobj = hwctx->priv->syncobj;
>> +	struct dma_fence_chain *chain;
>> +
>> +	if (!syncobj)
>> +		return;
>> +
>> +	chain = dma_fence_chain_alloc();
>> +	if (!chain)
>> +		return;
>> +
>> +	drm_syncobj_add_point(syncobj, chain, ofence, seq);
>> +}
>> +
>>   int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
>>   {
>>   	struct amdxdna_client *client = hwctx->client;
>>   	struct amdxdna_dev *xdna = client->xdna;
>> +	struct drm_gpu_scheduler *sched;
>>   	struct amdxdna_hwctx_priv *priv;
>>   	struct amdxdna_gem_obj *heap;
>> -	int ret;
>> +	int i, ret;
>>   
>>   	priv = kzalloc(sizeof(*hwctx->priv), GFP_KERNEL);
>>   	if (!priv)
>> @@ -157,10 +545,48 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
>>   		goto put_heap;
>>   	}
>>   
>> +	for (i = 0; i < ARRAY_SIZE(priv->cmd_buf); i++) {
>> +		struct amdxdna_gem_obj *abo;
>> +		struct amdxdna_drm_create_bo args = {
>> +			.flags = 0,
>> +			.type = AMDXDNA_BO_DEV,
>> +			.vaddr = 0,
>> +			.size = MAX_CHAIN_CMDBUF_SIZE,
>> +		};
>> +
>> +		abo = amdxdna_drm_alloc_dev_bo(&xdna->ddev, &args, client->filp, true);
>> +		if (IS_ERR(abo)) {
>> +			ret = PTR_ERR(abo);
>> +			goto free_cmd_bufs;
>> +		}
>> +
>> +		XDNA_DBG(xdna, "Command buf %d addr 0x%llx size 0x%lx",
>> +			 i, abo->mem.dev_addr, abo->mem.size);
>> +		priv->cmd_buf[i] = abo;
>> +	}
>> +
>> +	sched = &priv->sched;
>> +	mutex_init(&priv->io_lock);
>> +	ret = drm_sched_init(sched, &sched_ops, NULL, DRM_SCHED_PRIORITY_COUNT,
>> +			     HWCTX_MAX_CMDS, 0, msecs_to_jiffies(HWCTX_MAX_TIMEOUT),
>> +			     NULL, NULL, hwctx->name, xdna->ddev.dev);
>> +	if (ret) {
>> +		XDNA_ERR(xdna, "Failed to init DRM scheduler. ret %d", ret);
>> +		goto free_cmd_bufs;
>> +	}
>> +
>> +	ret = drm_sched_entity_init(&priv->entity, DRM_SCHED_PRIORITY_NORMAL,
>> +				    &sched, 1, NULL);
>> +	if (ret) {
>> +		XDNA_ERR(xdna, "Failed to initial sched entiry. ret %d", ret);
>> +		goto free_sched;
>> +	}
>> +	init_waitqueue_head(&priv->job_free_wq);
>> +
>>   	ret = aie2_hwctx_col_list(hwctx);
>>   	if (ret) {
>>   		XDNA_ERR(xdna, "Create col list failed, ret %d", ret);
>> -		goto unpin;
>> +		goto free_entity;
>>   	}
>>   
>>   	ret = aie2_alloc_resource(hwctx);
>> @@ -175,6 +601,13 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
>>   		XDNA_ERR(xdna, "Map host buffer failed, ret %d", ret);
>>   		goto release_resource;
>>   	}
>> +
>> +	ret = aie2_ctx_syncobj_create(hwctx);
>> +	if (ret) {
>> +		XDNA_ERR(xdna, "Create syncobj failed, ret %d", ret);
>> +		goto release_resource;
>> +	}
>> +
>>   	hwctx->status = HWCTX_STAT_INIT;
>>   
>>   	XDNA_DBG(xdna, "hwctx %s init completed", hwctx->name);
>> @@ -185,7 +618,16 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
>>   	aie2_release_resource(hwctx);
>>   free_col_list:
>>   	kfree(hwctx->col_list);
>> -unpin:
>> +free_entity:
>> +	drm_sched_entity_destroy(&priv->entity);
>> +free_sched:
>> +	drm_sched_fini(&priv->sched);
>> +free_cmd_bufs:
>> +	for (i = 0; i < ARRAY_SIZE(priv->cmd_buf); i++) {
>> +		if (!priv->cmd_buf[i])
>> +			continue;
>> +		drm_gem_object_put(to_gobj(priv->cmd_buf[i]));
>> +	}
>>   	amdxdna_gem_unpin(heap);
>>   put_heap:
>>   	drm_gem_object_put(to_gobj(heap));
>> @@ -196,11 +638,44 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
>>   
>>   void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx)
>>   {
>> +	struct amdxdna_sched_job *job;
>> +	struct amdxdna_dev *xdna;
>> +	int idx;
>> +
>> +	xdna = hwctx->client->xdna;
>> +	aie2_ctx_syncobj_destroy(hwctx);
>> +	drm_sched_wqueue_stop(&hwctx->priv->sched);
>> +
>> +	/* Now, scheduler will not send command to device. */
>>   	aie2_release_resource(hwctx);
>>   
>> +	/*
>> +	 * All submitted commands are aborted.
>> +	 * Restart scheduler queues to cleanup jobs. The amdxdna_sched_job_run()
>> +	 * will return NODEV if it is called.
>> +	 */
>> +	drm_sched_wqueue_start(&hwctx->priv->sched);
>> +
>> +	aie2_hwctx_wait_for_idle(hwctx);
>> +	drm_sched_entity_destroy(&hwctx->priv->entity);
>> +	drm_sched_fini(&hwctx->priv->sched);
>> +
>> +	for (idx = 0; idx < HWCTX_MAX_CMDS; idx++) {
>> +		job = hwctx->priv->pending[idx];
>> +		if (!job)
>> +			continue;
>> +
>> +		dma_fence_put(job->out_fence);
>> +		amdxdna_job_put(job);
>> +	}
>> +	XDNA_DBG(xdna, "%s sequence number %lld", hwctx->name, hwctx->priv->seq);
>> +
>> +	for (idx = 0; idx < ARRAY_SIZE(hwctx->priv->cmd_buf); idx++)
>> +		drm_gem_object_put(to_gobj(hwctx->priv->cmd_buf[idx]));
>>   	amdxdna_gem_unpin(hwctx->priv->heap);
>>   	drm_gem_object_put(to_gobj(hwctx->priv->heap));
>>   
>> +	mutex_destroy(&hwctx->priv->io_lock);
>>   	kfree(hwctx->col_list);
>>   	kfree(hwctx->priv);
>>   	kfree(hwctx->cus);
>> @@ -267,3 +742,186 @@ int aie2_hwctx_config(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *bu
>>   		return -EOPNOTSUPP;
>>   	}
>>   }
>> +
>> +static int aie2_populate_range(struct amdxdna_gem_obj *abo)
>> +{
>> +	struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev);
>> +	struct mm_struct *mm = abo->mem.notifier.mm;
>> +	struct hmm_range range = { 0 };
>> +	unsigned long timeout;
>> +	int ret;
>> +
>> +	XDNA_INFO_ONCE(xdna, "populate memory range %llx size %lx",
>> +		       abo->mem.userptr, abo->mem.size);
>> +	range.notifier = &abo->mem.notifier;
>> +	range.start = abo->mem.userptr;
>> +	range.end = abo->mem.userptr + abo->mem.size;
>> +	range.hmm_pfns = abo->mem.pfns;
>> +	range.default_flags = HMM_PFN_REQ_FAULT;
>> +
>> +	if (!mmget_not_zero(mm))
>> +		return -EFAULT;
>> +
>> +	timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
>> +again:
>> +	range.notifier_seq = mmu_interval_read_begin(&abo->mem.notifier);
>> +	mmap_read_lock(mm);
>> +	ret = hmm_range_fault(&range);
>> +	mmap_read_unlock(mm);
>> +	if (ret) {
>> +		if (time_after(jiffies, timeout)) {
>> +			ret = -ETIME;
>> +			goto put_mm;
>> +		}
>> +
>> +		if (ret == -EBUSY)
>> +			goto again;
>> +
>> +		goto put_mm;
>> +	}
>> +
>> +	mutex_lock(&abo->mem.notify_lock);
>> +	if (mmu_interval_read_retry(&abo->mem.notifier, range.notifier_seq)) {
>> +		mutex_unlock(&abo->mem.notify_lock);
>> +		goto again;
>> +	}
>> +	abo->mem.map_invalid = false;
>> +	mutex_unlock(&abo->mem.notify_lock);
>> +
>> +put_mm:
>> +	mmput(mm);
>> +	return ret;
>> +}
>> +
>> +static int aie2_hwctx_push_job(struct amdxdna_sched_job *job, u64 *seq)
>> +{
>> +	struct amdxdna_hwctx *hwctx = job->hwctx;
>> +	struct amdxdna_sched_job *other;
>> +	struct dma_fence *fence;
>> +	long ret;
>> +	int idx;
>> +
>> +again:
>> +	mutex_lock(&hwctx->priv->io_lock);
>> +	idx = get_job_idx(hwctx->priv->seq);
>> +	/* When pending list full, hwctx->seq points to oldest fence */
>> +	other = hwctx->priv->pending[idx];
>> +	if (other && !dma_fence_is_signaled(other->out_fence)) {
>> +		fence = dma_fence_get(other->out_fence);
>> +		mutex_unlock(&hwctx->priv->io_lock);
>> +
>> +		ret = dma_fence_wait_timeout(fence, true, MAX_SCHEDULE_TIMEOUT);
>> +		dma_fence_put(fence);
>> +		if (!ret)
>> +			return -ETIME;
>> +		else if (ret < 0)
>> +			return ret;
>> +		goto again;
>> +	}
>> +
>> +	if (other) {
>> +		dma_fence_put(other->out_fence);
>> +		amdxdna_job_put(other);
>> +	}
>> +
>> +	hwctx->priv->pending[idx] = job;
>> +	job->seq = hwctx->priv->seq++;
>> +	*seq = job->seq;
>> +	kref_get(&job->refcnt);
>> +
>> +	fence = dma_fence_get(job->out_fence);
>> +	drm_sched_entity_push_job(&job->base);
>> +	mutex_unlock(&hwctx->priv->io_lock);
>> +
>> +	aie2_ctx_syncobj_add_fence(hwctx, fence, *seq);
>> +	dma_fence_put(fence);
>> +	return 0;
>> +}
>> +
>> +int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq)
>> +{
>> +	struct amdxdna_dev *xdna = hwctx->client->xdna;
>> +	struct ww_acquire_ctx acquire_ctx;
>> +	struct amdxdna_gem_obj *abo;
>> +	unsigned long timeout = 0;
>> +	int ret, i;
>> +
>> +	ret = drm_sched_job_init(&job->base, &hwctx->priv->entity, 1, hwctx);
>> +	if (ret) {
>> +		XDNA_ERR(xdna, "DRM job init failed, ret %d", ret);
>> +		return ret;
>> +	}
>> +
>> +	drm_sched_job_arm(&job->base);
> Again drive by comments.
>
> This looks wildly dangerous. This typically should be called once
> holding all locks at the point in which you cannot fail. I get that
> you signal the jobs fence on failure but that doesn't seem like a great
> idea nor do I think how the schedule is designed.
>
> The flow typically is:
>
> acquire all locks and setup job...
> arm
> install fences
> push
>
> ^^^ With not being able to to fail between arn & push.
>
> Your flow is...
>
> arm
> acquire locks...
> install fences
> drop locks...
> acquire different locks...
> push
> drops different locks...
>
> Seems dangerous, I would reconsider.

Ok, I worked on this and will send out v7 patches to follow the 
suggested flow.


Thanks,

Lizhi

next prev parent reply	other threads:[~2024-11-06 18:23 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-10-30 15:51 [PATCH V6 00/10] AMD XDNA driver Lizhi Hou
2024-10-30 15:51 ` [PATCH V6 01/10] accel/amdxdna: Add documentation for AMD NPU accelerator driver Lizhi Hou
2024-10-30 15:51 ` [PATCH V6 02/10] accel/amdxdna: Add a new driver for AMD AI Engine Lizhi Hou
2024-10-30 15:51 ` [PATCH V6 03/10] accel/amdxdna: Support hardware mailbox Lizhi Hou
2024-10-30 15:51 ` [PATCH V6 04/10] accel/amdxdna: Add hardware resource solver Lizhi Hou
2024-10-30 15:51 ` [PATCH V6 05/10] accel/amdxdna: Add hardware context Lizhi Hou
2024-10-30 15:51 ` [PATCH V6 06/10] accel/amdxdna: Add GEM buffer object management Lizhi Hou
2024-10-30 15:51 ` [PATCH V6 07/10] accel/amdxdna: Add command execution Lizhi Hou
2024-11-04  6:31   ` Matthew Brost
2024-11-06 18:23     ` Lizhi Hou [this message]
2024-10-30 15:51 ` [PATCH V6 08/10] accel/amdxdna: Add suspend and resume Lizhi Hou
2024-10-30 15:51 ` [PATCH V6 09/10] accel/amdxdna: Add error handling Lizhi Hou
2024-10-30 15:51 ` [PATCH V6 10/10] accel/amdxdna: Add query functions Lizhi Hou

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=a100b35f-ab8a-e7f6-325d-b29953c756c5@amd.com \
    --to=lizhi.hou@amd.com \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=king.tam@amd.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=matthew.brost@intel.com \
    --cc=max.zhen@amd.com \
    --cc=min.ma@amd.com \
    --cc=ogabbay@kernel.org \
    --cc=quic_jhugo@quicinc.com \
    --cc=sonal.santan@amd.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.