* [PATCH V1] accel/amdxdna: Use MSG_OP_CHAIN_EXEC_NPU when supported
@ 2025-10-31 1:47 Lizhi Hou
2025-10-31 5:10 ` Mario Limonciello (AMD) (kernel.org)
0 siblings, 1 reply; 4+ messages in thread
From: Lizhi Hou @ 2025-10-31 1:47 UTC (permalink / raw)
To: ogabbay, quic_jhugo, maciej.falkowski, dri-devel
Cc: Lizhi Hou, linux-kernel, max.zhen, sonal.santan,
mario.limonciello
MSG_OP_CHAIN_EXEC_NPU is a unified mailbox message that replaces
MSG_OP_CHAIN_EXEC_BUFFER_CF and MSG_OP_CHAIN_EXEC_DPU.
Add driver logic to check firmware version, and if MSG_OP_CHAIN_EXEC_NPU
is supported, uses it to submit firmware commands.
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
---
drivers/accel/amdxdna/aie2_message.c | 443 +++++++++++++++++---------
drivers/accel/amdxdna/aie2_msg_priv.h | 42 ++-
drivers/accel/amdxdna/aie2_pci.c | 13 +
drivers/accel/amdxdna/aie2_pci.h | 29 ++
drivers/accel/amdxdna/amdxdna_ctx.c | 6 +-
drivers/accel/amdxdna/amdxdna_ctx.h | 11 +-
drivers/accel/amdxdna/npu1_regs.c | 6 +
drivers/accel/amdxdna/npu2_regs.c | 1 +
drivers/accel/amdxdna/npu4_regs.c | 6 +
drivers/accel/amdxdna/npu5_regs.c | 1 +
drivers/accel/amdxdna/npu6_regs.c | 1 +
11 files changed, 392 insertions(+), 167 deletions(-)
diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c
index 3a4c845d783a..4751a8aff0f7 100644
--- a/drivers/accel/amdxdna/aie2_message.c
+++ b/drivers/accel/amdxdna/aie2_message.c
@@ -27,6 +27,8 @@
#define DECLARE_AIE2_MSG(name, op) \
DECLARE_XDNA_MSG_COMMON(name, op, MAX_AIE2_STATUS_CODE)
+#define EXEC_MSG_OPS(xdna) ((xdna)->dev_handle->exec_msg_ops)
+
static int aie2_send_mgmt_msg_wait(struct amdxdna_dev_hdl *ndev,
struct xdna_mailbox_msg *msg)
{
@@ -479,177 +481,291 @@ int aie2_config_cu(struct amdxdna_hwctx *hwctx,
return xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
}
-int aie2_execbuf(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
- int (*notify_cb)(void *, void __iomem *, size_t))
+static int aie2_init_exec_cu_req(struct amdxdna_gem_obj *cmd_bo, void *req,
+ size_t *size, u32 *msg_op)
{
- struct mailbox_channel *chann = hwctx->priv->mbox_chann;
- struct amdxdna_dev *xdna = hwctx->client->xdna;
- struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
- union {
- struct execute_buffer_req ebuf;
- struct exec_dpu_req dpu;
- } req;
- struct xdna_mailbox_msg msg;
- u32 payload_len;
- void *payload;
- int cu_idx;
- int ret;
- u32 op;
+ struct execute_buffer_req *cu_req = req;
+ u32 cmd_len;
+ void *cmd;
- if (!chann)
- return -ENODEV;
+ cmd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
+ if (cmd_len > sizeof(cu_req->payload))
+ return -EINVAL;
- payload = amdxdna_cmd_get_payload(cmd_abo, &payload_len);
- if (!payload) {
- XDNA_ERR(xdna, "Invalid command, cannot get payload");
+ cu_req->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
+ if (cu_req->cu_idx == INVALID_CU_IDX)
return -EINVAL;
- }
- cu_idx = amdxdna_cmd_get_cu_idx(cmd_abo);
- if (cu_idx < 0) {
- XDNA_DBG(xdna, "Invalid cu idx");
+ memcpy(cu_req->payload, cmd, cmd_len);
+
+ *size = sizeof(*cu_req);
+ *msg_op = MSG_OP_EXECUTE_BUFFER_CF;
+ return 0;
+}
+
+static int aie2_init_exec_dpu_req(struct amdxdna_gem_obj *cmd_bo, void *req,
+ size_t *size, u32 *msg_op)
+{
+ struct exec_dpu_req *dpu_req = req;
+ struct amdxdna_cmd_start_npu *sn;
+ u32 cmd_len;
+
+ sn = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
+ if (cmd_len - sizeof(*sn) > sizeof(dpu_req->payload))
return -EINVAL;
- }
- op = amdxdna_cmd_get_op(cmd_abo);
- switch (op) {
- case ERT_START_CU:
- if (unlikely(payload_len > sizeof(req.ebuf.payload)))
- XDNA_DBG(xdna, "Invalid ebuf payload len: %d", payload_len);
- req.ebuf.cu_idx = cu_idx;
- memcpy(req.ebuf.payload, payload, sizeof(req.ebuf.payload));
- msg.send_size = sizeof(req.ebuf);
- msg.opcode = MSG_OP_EXECUTE_BUFFER_CF;
- break;
- case ERT_START_NPU: {
- struct amdxdna_cmd_start_npu *sn = payload;
-
- if (unlikely(payload_len - sizeof(*sn) > sizeof(req.dpu.payload)))
- XDNA_DBG(xdna, "Invalid dpu payload len: %d", payload_len);
- req.dpu.inst_buf_addr = sn->buffer;
- req.dpu.inst_size = sn->buffer_size;
- req.dpu.inst_prop_cnt = sn->prop_count;
- req.dpu.cu_idx = cu_idx;
- memcpy(req.dpu.payload, sn->prop_args, sizeof(req.dpu.payload));
- msg.send_size = sizeof(req.dpu);
- msg.opcode = MSG_OP_EXEC_DPU;
- break;
- }
- default:
- XDNA_DBG(xdna, "Invalid ERT cmd op code: %d", op);
+ dpu_req->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
+ if (dpu_req->cu_idx == INVALID_CU_IDX)
return -EINVAL;
- }
- msg.handle = job;
- msg.notify_cb = notify_cb;
- msg.send_data = (u8 *)&req;
- print_hex_dump_debug("cmd: ", DUMP_PREFIX_OFFSET, 16, 4, &req,
- 0x40, false);
- ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
- if (ret) {
- XDNA_ERR(xdna, "Send message failed");
- return ret;
- }
+ dpu_req->inst_buf_addr = sn->buffer;
+ dpu_req->inst_size = sn->buffer_size;
+ dpu_req->inst_prop_cnt = sn->prop_count;
+ memcpy(dpu_req->payload, sn->prop_args, cmd_len - sizeof(*sn));
+ *size = sizeof(*dpu_req);
+ *msg_op = MSG_OP_EXEC_DPU;
return 0;
}
+static void aie2_init_exec_chain_req(void *req, u64 slot_addr, size_t size, u32 cmd_cnt)
+{
+ struct cmd_chain_req *chain_req = req;
+
+ chain_req->buf_addr = slot_addr;
+ chain_req->buf_size = size;
+ chain_req->count = cmd_cnt;
+}
+
+static void aie2_init_npu_chain_req(void *req, u64 slot_addr, size_t size, u32 cmd_cnt)
+{
+ struct cmd_chain_npu_req *npu_chain_req = req;
+
+ npu_chain_req->flags = 0;
+ npu_chain_req->reserved = 0;
+ npu_chain_req->buf_addr = slot_addr;
+ npu_chain_req->buf_size = size;
+ npu_chain_req->count = cmd_cnt;
+}
+
static int
-aie2_cmdlist_fill_one_slot_cf(void *cmd_buf, u32 offset,
- struct amdxdna_gem_obj *abo, u32 *size)
+aie2_cmdlist_fill_cf(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size)
{
- struct cmd_chain_slot_execbuf_cf *buf = cmd_buf + offset;
- int cu_idx = amdxdna_cmd_get_cu_idx(abo);
- u32 payload_len;
- void *payload;
+ struct cmd_chain_slot_execbuf_cf *cf_slot = slot;
+ u32 cmd_len;
+ void *cmd;
- if (cu_idx < 0)
+ cmd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
+ if (*size < sizeof(*cf_slot) + cmd_len)
return -EINVAL;
- payload = amdxdna_cmd_get_payload(abo, &payload_len);
- if (!payload)
+ cf_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
+ if (cf_slot->cu_idx == INVALID_CU_IDX)
return -EINVAL;
- if (!slot_has_space(*buf, offset, payload_len))
- return -ENOSPC;
-
- buf->cu_idx = cu_idx;
- buf->arg_cnt = payload_len / sizeof(u32);
- memcpy(buf->args, payload, payload_len);
- /* Accurate buf size to hint firmware to do necessary copy */
- *size = sizeof(*buf) + payload_len;
+ cf_slot->arg_cnt = cmd_len / sizeof(u32);
+ memcpy(cf_slot->args, cmd, cmd_len);
+ /* Accurate slot size to hint firmware to do necessary copy */
+ *size = sizeof(*cf_slot) + cmd_len;
return 0;
}
static int
-aie2_cmdlist_fill_one_slot_dpu(void *cmd_buf, u32 offset,
- struct amdxdna_gem_obj *abo, u32 *size)
+aie2_cmdlist_fill_dpu(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size)
{
- struct cmd_chain_slot_dpu *buf = cmd_buf + offset;
- int cu_idx = amdxdna_cmd_get_cu_idx(abo);
+ struct cmd_chain_slot_dpu *dpu_slot = slot;
struct amdxdna_cmd_start_npu *sn;
- u32 payload_len;
- void *payload;
+ u32 cmd_len;
u32 arg_sz;
- if (cu_idx < 0)
+ sn = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
+ arg_sz = cmd_len - sizeof(*sn);
+ if (cmd_len < sizeof(*sn) || arg_sz > MAX_DPU_ARGS_SIZE)
return -EINVAL;
- payload = amdxdna_cmd_get_payload(abo, &payload_len);
- if (!payload)
+ if (*size < sizeof(*dpu_slot) + arg_sz)
return -EINVAL;
- sn = payload;
- arg_sz = payload_len - sizeof(*sn);
- if (payload_len < sizeof(*sn) || arg_sz > MAX_DPU_ARGS_SIZE)
+
+ dpu_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
+ if (dpu_slot->cu_idx == INVALID_CU_IDX)
return -EINVAL;
- if (!slot_has_space(*buf, offset, arg_sz))
- return -ENOSPC;
+ dpu_slot->inst_buf_addr = sn->buffer;
+ dpu_slot->inst_size = sn->buffer_size;
+ dpu_slot->inst_prop_cnt = sn->prop_count;
+ dpu_slot->arg_cnt = arg_sz / sizeof(u32);
+ memcpy(dpu_slot->args, sn->prop_args, arg_sz);
+
+ /* Accurate slot size to hint firmware to do necessary copy */
+ *size = sizeof(*dpu_slot) + arg_sz;
+ return 0;
+}
+
+static u32 aie2_get_chain_msg_op(u32 cmd_op)
+{
+ switch (cmd_op) {
+ case ERT_START_CU:
+ return MSG_OP_CHAIN_EXEC_BUFFER_CF;
+ case ERT_START_NPU:
+ return MSG_OP_CHAIN_EXEC_DPU;
+ default:
+ break;
+ }
- buf->inst_buf_addr = sn->buffer;
- buf->inst_size = sn->buffer_size;
- buf->inst_prop_cnt = sn->prop_count;
- buf->cu_idx = cu_idx;
- buf->arg_cnt = arg_sz / sizeof(u32);
- memcpy(buf->args, sn->prop_args, arg_sz);
+ return MSG_OP_MAX_OPCODE;
+}
- /* Accurate buf size to hint firmware to do necessary copy */
- *size = sizeof(*buf) + arg_sz;
+static struct aie2_exec_msg_ops legacy_exec_message_ops = {
+ .init_cu_req = aie2_init_exec_cu_req,
+ .init_dpu_req = aie2_init_exec_dpu_req,
+ .init_chain_req = aie2_init_exec_chain_req,
+ .fill_cf_slot = aie2_cmdlist_fill_cf,
+ .fill_dpu_slot = aie2_cmdlist_fill_dpu,
+ .get_chain_msg_op = aie2_get_chain_msg_op,
+};
+
+static int
+aie2_cmdlist_fill_npu_cf(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size)
+{
+ struct cmd_chain_slot_npu *npu_slot = slot;
+ u32 cmd_len;
+ void *cmd;
+
+ cmd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
+ if (*size < sizeof(*npu_slot) + cmd_len)
+ return -EINVAL;
+
+ npu_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
+ if (npu_slot->cu_idx == INVALID_CU_IDX)
+ return -EINVAL;
+
+ memset(npu_slot, 0, sizeof(*npu_slot));
+ npu_slot->type = EXEC_NPU_TYPE_NON_ELF;
+ npu_slot->arg_cnt = cmd_len / sizeof(u32);
+ memcpy(npu_slot->args, cmd, cmd_len);
+
+ *size = sizeof(*npu_slot) + cmd_len;
return 0;
}
static int
-aie2_cmdlist_fill_one_slot(u32 op, struct amdxdna_gem_obj *cmdbuf_abo, u32 offset,
- struct amdxdna_gem_obj *abo, u32 *size)
+aie2_cmdlist_fill_npu_dpu(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size)
+{
+ struct cmd_chain_slot_npu *npu_slot = slot;
+ struct amdxdna_cmd_start_npu *sn;
+ u32 cmd_len;
+ u32 arg_sz;
+
+ sn = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
+ arg_sz = cmd_len - sizeof(*sn);
+ if (cmd_len < sizeof(*sn) || arg_sz > MAX_NPU_ARGS_SIZE)
+ return -EINVAL;
+
+ if (*size < sizeof(*npu_slot) + arg_sz)
+ return -EINVAL;
+
+ npu_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
+ if (npu_slot->cu_idx == INVALID_CU_IDX)
+ return -EINVAL;
+
+ memset(npu_slot, 0, sizeof(*npu_slot));
+ npu_slot->type = EXEC_NPU_TYPE_PARTIAL_ELF;
+ npu_slot->inst_buf_addr = sn->buffer;
+ npu_slot->inst_size = sn->buffer_size;
+ npu_slot->inst_prop_cnt = sn->prop_count;
+ npu_slot->arg_cnt = arg_sz / sizeof(u32);
+ memcpy(npu_slot->args, sn->prop_args, arg_sz);
+
+ *size = sizeof(*npu_slot) + arg_sz;
+ return 0;
+}
+
+static u32 aie2_get_npu_chain_msg_op(u32 cmd_op)
+{
+ return MSG_OP_CHAIN_EXEC_NPU;
+}
+
+static struct aie2_exec_msg_ops npu_exec_message_ops = {
+ .init_cu_req = aie2_init_exec_cu_req,
+ .init_dpu_req = aie2_init_exec_dpu_req,
+ .init_chain_req = aie2_init_npu_chain_req,
+ .fill_cf_slot = aie2_cmdlist_fill_npu_cf,
+ .fill_dpu_slot = aie2_cmdlist_fill_npu_dpu,
+ .get_chain_msg_op = aie2_get_npu_chain_msg_op,
+};
+
+static int aie2_init_exec_req(void *req, struct amdxdna_gem_obj *cmd_abo,
+ size_t *size, u32 *msg_op)
{
- u32 this_op = amdxdna_cmd_get_op(abo);
- void *cmd_buf = cmdbuf_abo->mem.kva;
+ struct amdxdna_dev *xdna = cmd_abo->client->xdna;
int ret;
+ u32 op;
- if (this_op != op) {
- ret = -EINVAL;
- goto done;
- }
+ op = amdxdna_cmd_get_op(cmd_abo);
switch (op) {
case ERT_START_CU:
- ret = aie2_cmdlist_fill_one_slot_cf(cmd_buf, offset, abo, size);
+ ret = EXEC_MSG_OPS(xdna)->init_cu_req(cmd_abo, req, size, msg_op);
+ if (ret) {
+ XDNA_DBG(xdna, "Init CU req failed ret %d", ret);
+ return ret;
+ }
break;
case ERT_START_NPU:
- ret = aie2_cmdlist_fill_one_slot_dpu(cmd_buf, offset, abo, size);
+ ret = EXEC_MSG_OPS(xdna)->init_dpu_req(cmd_abo, req, size, msg_op);
+ if (ret) {
+ XDNA_DBG(xdna, "Init DPU req failed ret %d", ret);
+ return ret;
+ }
+
break;
default:
+ XDNA_INFO(xdna, "Unsupported op %d", op);
ret = -EOPNOTSUPP;
+ break;
}
-done:
- if (ret) {
- XDNA_ERR(abo->client->xdna, "Can't fill slot for cmd op %d ret %d",
- op, ret);
+ return ret;
+}
+
+static int
+aie2_cmdlist_fill_slot(void *slot, struct amdxdna_gem_obj *cmd_abo,
+ size_t *size, u32 *cmd_op)
+{
+ struct amdxdna_dev *xdna = cmd_abo->client->xdna;
+ int ret;
+ u32 op;
+
+ op = amdxdna_cmd_get_op(cmd_abo);
+ if (*cmd_op == ERT_INVALID_CMD)
+ *cmd_op = op;
+ else if (op != *cmd_op)
+ return -EINVAL;
+
+ switch (op) {
+ case ERT_START_CU:
+ ret = EXEC_MSG_OPS(xdna)->fill_cf_slot(cmd_abo, slot, size);
+ break;
+ case ERT_START_NPU:
+ ret = EXEC_MSG_OPS(xdna)->fill_dpu_slot(cmd_abo, slot, size);
+ break;
+ default:
+ XDNA_INFO(xdna, "Unsupported op %d", op);
+ ret = -EOPNOTSUPP;
+ break;
}
+
return ret;
}
+void aie2_msg_init(struct amdxdna_dev_hdl *ndev)
+{
+ if (AIE2_FEATURE_ON(ndev, AIE2_NPU_COMMAND))
+ ndev->exec_msg_ops = &npu_exec_message_ops;
+ else
+ ndev->exec_msg_ops = &legacy_exec_message_ops;
+}
+
static inline struct amdxdna_gem_obj *
aie2_cmdlist_get_cmd_buf(struct amdxdna_sched_job *job)
{
@@ -658,29 +774,36 @@ aie2_cmdlist_get_cmd_buf(struct amdxdna_sched_job *job)
return job->hwctx->priv->cmd_buf[idx];
}
-static void
-aie2_cmdlist_prepare_request(struct cmd_chain_req *req,
- struct amdxdna_gem_obj *cmdbuf_abo, u32 size, u32 cnt)
+int aie2_execbuf(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
+ int (*notify_cb)(void *, void __iomem *, size_t))
{
- req->buf_addr = cmdbuf_abo->mem.dev_addr;
- req->buf_size = size;
- req->count = cnt;
- drm_clflush_virt_range(cmdbuf_abo->mem.kva, size);
- XDNA_DBG(cmdbuf_abo->client->xdna, "Command buf addr 0x%llx size 0x%x count %d",
- req->buf_addr, size, cnt);
-}
+ struct mailbox_channel *chann = hwctx->priv->mbox_chann;
+ struct amdxdna_dev *xdna = hwctx->client->xdna;
+ struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
+ struct xdna_mailbox_msg msg;
+ union exec_req req;
+ int ret;
-static inline u32
-aie2_cmd_op_to_msg_op(u32 op)
-{
- switch (op) {
- case ERT_START_CU:
- return MSG_OP_CHAIN_EXEC_BUFFER_CF;
- case ERT_START_NPU:
- return MSG_OP_CHAIN_EXEC_DPU;
- default:
- return MSG_OP_MAX_OPCODE;
+ if (!chann)
+ return -ENODEV;
+
+ ret = aie2_init_exec_req(&req, cmd_abo, &msg.send_size, &msg.opcode);
+ if (ret)
+ return ret;
+
+ msg.handle = job;
+ msg.notify_cb = notify_cb;
+ msg.send_data = (u8 *)&req;
+ print_hex_dump_debug("cmd: ", DUMP_PREFIX_OFFSET, 16, 4, &req,
+ 0x40, false);
+
+ ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
+ if (ret) {
+ XDNA_ERR(xdna, "Send message failed");
+ return ret;
}
+
+ return 0;
}
int aie2_cmdlist_multi_execbuf(struct amdxdna_hwctx *hwctx,
@@ -691,12 +814,13 @@ int aie2_cmdlist_multi_execbuf(struct amdxdna_hwctx *hwctx,
struct mailbox_channel *chann = hwctx->priv->mbox_chann;
struct amdxdna_client *client = hwctx->client;
struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
+ struct amdxdna_dev *xdna = client->xdna;
struct amdxdna_cmd_chain *payload;
struct xdna_mailbox_msg msg;
- struct cmd_chain_req req;
+ union exec_chain_req req;
u32 payload_len;
u32 offset = 0;
- u32 size;
+ size_t size;
int ret;
u32 op;
u32 i;
@@ -707,41 +831,42 @@ int aie2_cmdlist_multi_execbuf(struct amdxdna_hwctx *hwctx,
payload_len < struct_size(payload, data, payload->command_count))
return -EINVAL;
+ op = ERT_INVALID_CMD;
for (i = 0; i < payload->command_count; i++) {
u32 boh = (u32)(payload->data[i]);
struct amdxdna_gem_obj *abo;
abo = amdxdna_gem_get_obj(client, boh, AMDXDNA_BO_CMD);
if (!abo) {
- XDNA_ERR(client->xdna, "Failed to find cmd BO %d", boh);
+ XDNA_ERR(xdna, "Failed to find cmd BO %d", boh);
return -ENOENT;
}
- /* All sub-cmd should have same op, use the first one. */
- if (i == 0)
- op = amdxdna_cmd_get_op(abo);
-
- ret = aie2_cmdlist_fill_one_slot(op, cmdbuf_abo, offset, abo, &size);
+ size = cmdbuf_abo->mem.size - offset;
+ ret = aie2_cmdlist_fill_slot(cmdbuf_abo->mem.kva + offset,
+ abo, &size, &op);
amdxdna_gem_put_obj(abo);
if (ret)
- return -EINVAL;
+ return ret;
offset += size;
}
+ msg.opcode = EXEC_MSG_OPS(xdna)->get_chain_msg_op(op);
+ if (msg.opcode == MSG_OP_MAX_OPCODE)
+ return -EOPNOTSUPP;
/* The offset is the accumulated total size of the cmd buffer */
- aie2_cmdlist_prepare_request(&req, cmdbuf_abo, offset, payload->command_count);
+ EXEC_MSG_OPS(xdna)->init_chain_req(&req, cmdbuf_abo->mem.dev_addr,
+ offset, payload->command_count);
+ drm_clflush_virt_range(cmdbuf_abo->mem.kva, offset);
- msg.opcode = aie2_cmd_op_to_msg_op(op);
- if (msg.opcode == MSG_OP_MAX_OPCODE)
- return -EOPNOTSUPP;
msg.handle = job;
msg.notify_cb = notify_cb;
msg.send_data = (u8 *)&req;
msg.send_size = sizeof(req);
ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
if (ret) {
- XDNA_ERR(hwctx->client->xdna, "Send message failed");
+ XDNA_ERR(xdna, "Send message failed");
return ret;
}
@@ -754,23 +879,27 @@ int aie2_cmdlist_single_execbuf(struct amdxdna_hwctx *hwctx,
{
struct amdxdna_gem_obj *cmdbuf_abo = aie2_cmdlist_get_cmd_buf(job);
struct mailbox_channel *chann = hwctx->priv->mbox_chann;
+ struct amdxdna_dev *xdna = hwctx->client->xdna;
struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
struct xdna_mailbox_msg msg;
- struct cmd_chain_req req;
- u32 size;
+ union exec_chain_req req;
+ u32 op = ERT_INVALID_CMD;
+ size_t size;
int ret;
- u32 op;
- op = amdxdna_cmd_get_op(cmd_abo);
- ret = aie2_cmdlist_fill_one_slot(op, cmdbuf_abo, 0, cmd_abo, &size);
+ size = cmdbuf_abo->mem.size;
+ ret = aie2_cmdlist_fill_slot(cmdbuf_abo->mem.kva, cmd_abo, &size, &op);
if (ret)
return ret;
- aie2_cmdlist_prepare_request(&req, cmdbuf_abo, size, 1);
-
- msg.opcode = aie2_cmd_op_to_msg_op(op);
+ msg.opcode = EXEC_MSG_OPS(xdna)->get_chain_msg_op(op);
if (msg.opcode == MSG_OP_MAX_OPCODE)
return -EOPNOTSUPP;
+
+ EXEC_MSG_OPS(xdna)->init_chain_req(&req, cmdbuf_abo->mem.dev_addr,
+ size, 1);
+ drm_clflush_virt_range(cmdbuf_abo->mem.kva, size);
+
msg.handle = job;
msg.notify_cb = notify_cb;
msg.send_data = (u8 *)&req;
diff --git a/drivers/accel/amdxdna/aie2_msg_priv.h b/drivers/accel/amdxdna/aie2_msg_priv.h
index 2dbea1d09980..947daa63f064 100644
--- a/drivers/accel/amdxdna/aie2_msg_priv.h
+++ b/drivers/accel/amdxdna/aie2_msg_priv.h
@@ -20,6 +20,7 @@ enum aie2_msg_opcode {
MSG_OP_CHAIN_EXEC_BUFFER_CF = 0x12,
MSG_OP_CHAIN_EXEC_DPU = 0x13,
MSG_OP_CONFIG_DEBUG_BO = 0x14,
+ MSG_OP_CHAIN_EXEC_NPU = 0x18,
MSG_OP_MAX_XRT_OPCODE,
MSG_OP_SUSPEND = 0x101,
MSG_OP_RESUME = 0x102,
@@ -172,6 +173,16 @@ struct exec_dpu_req {
__u32 payload[35];
} __packed;
+enum exec_npu_type {
+ EXEC_NPU_TYPE_NON_ELF = 0x1,
+ EXEC_NPU_TYPE_PARTIAL_ELF = 0x2,
+};
+
+union exec_req {
+ struct execute_buffer_req ebuf;
+ struct exec_dpu_req dpu_req;
+};
+
struct execute_buffer_resp {
enum aie2_msg_status status;
} __packed;
@@ -343,9 +354,6 @@ struct async_event_msg_resp {
} __packed;
#define MAX_CHAIN_CMDBUF_SIZE SZ_4K
-#define slot_has_space(slot, offset, payload_size) \
- (MAX_CHAIN_CMDBUF_SIZE >= (offset) + (payload_size) + \
- sizeof(typeof(slot)))
struct cmd_chain_slot_execbuf_cf {
__u32 cu_idx;
@@ -363,12 +371,40 @@ struct cmd_chain_slot_dpu {
__u32 args[] __counted_by(arg_cnt);
};
+#define MAX_NPU_ARGS_SIZE (26 * sizeof(__u32))
+struct cmd_chain_slot_npu {
+ enum exec_npu_type type;
+ u64 inst_buf_addr;
+ u64 save_buf_addr;
+ u64 restore_buf_addr;
+ u32 inst_size;
+ u32 save_size;
+ u32 restore_size;
+ u32 inst_prop_cnt;
+ u32 cu_idx;
+ u32 arg_cnt;
+ u32 args[] __counted_by(arg_cnt);
+} __packed;
+
struct cmd_chain_req {
__u64 buf_addr;
__u32 buf_size;
__u32 count;
} __packed;
+struct cmd_chain_npu_req {
+ u32 flags;
+ u32 reserved;
+ u64 buf_addr;
+ u32 buf_size;
+ u32 count;
+} __packed;
+
+union exec_chain_req {
+ struct cmd_chain_npu_req npu_req;
+ struct cmd_chain_req req;
+};
+
struct cmd_chain_resp {
enum aie2_msg_status status;
__u32 fail_cmd_idx;
diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c
index 80313a2a98d4..d7ccbdaf47f5 100644
--- a/drivers/accel/amdxdna/aie2_pci.c
+++ b/drivers/accel/amdxdna/aie2_pci.c
@@ -55,6 +55,7 @@ struct mgmt_mbox_chann_info {
static int aie2_check_protocol(struct amdxdna_dev_hdl *ndev, u32 fw_major, u32 fw_minor)
{
+ const struct aie2_fw_feature_tbl *feature;
struct amdxdna_dev *xdna = ndev->xdna;
/*
@@ -78,6 +79,17 @@ static int aie2_check_protocol(struct amdxdna_dev_hdl *ndev, u32 fw_major, u32 f
XDNA_ERR(xdna, "Firmware minor version smaller than supported");
return -EINVAL;
}
+
+ for (feature = ndev->priv->fw_feature_tbl; feature && feature->min_minor;
+ feature++) {
+ if (fw_minor < feature->min_minor)
+ continue;
+ if (feature->max_minor > 0 && fw_minor > feature->max_minor)
+ continue;
+
+ set_bit(feature->feature, &ndev->feature_mask);
+ }
+
return 0;
}
@@ -587,6 +599,7 @@ static int aie2_init(struct amdxdna_dev *xdna)
}
release_firmware(fw);
+ aie2_msg_init(ndev);
amdxdna_pm_init(xdna);
return 0;
diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/amdxdna/aie2_pci.h
index cfe42b0d4242..d0a3cb1fe8be 100644
--- a/drivers/accel/amdxdna/aie2_pci.h
+++ b/drivers/accel/amdxdna/aie2_pci.h
@@ -156,6 +156,17 @@ enum aie2_dev_status {
AIE2_DEV_START,
};
+struct aie2_exec_msg_ops {
+ int (*init_cu_req)(struct amdxdna_gem_obj *cmd_bo, void *req,
+ size_t *size, u32 *msg_op);
+ int (*init_dpu_req)(struct amdxdna_gem_obj *cmd_bo, void *req,
+ size_t *size, u32 *msg_op);
+ void (*init_chain_req)(void *req, u64 slot_addr, size_t size, u32 cmd_cnt);
+ int (*fill_cf_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size);
+ int (*fill_dpu_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size);
+ u32 (*get_chain_msg_op)(u32 cmd_op);
+};
+
struct amdxdna_dev_hdl {
struct amdxdna_dev *xdna;
const struct amdxdna_dev_priv *priv;
@@ -173,6 +184,8 @@ struct amdxdna_dev_hdl {
u32 total_col;
struct aie_version version;
struct aie_metadata metadata;
+ unsigned long feature_mask;
+ struct aie2_exec_msg_ops *exec_msg_ops;
/* power management and clock*/
enum amdxdna_power_mode_type pw_mode;
@@ -208,12 +221,26 @@ struct aie2_hw_ops {
int (*set_dpm)(struct amdxdna_dev_hdl *ndev, u32 dpm_level);
};
+enum aie2_fw_feature {
+ AIE2_NPU_COMMAND,
+ AIE2_FEATURE_MAX
+};
+
+struct aie2_fw_feature_tbl {
+ enum aie2_fw_feature feature;
+ u32 max_minor;
+ u32 min_minor;
+};
+
+#define AIE2_FEATURE_ON(ndev, feature) test_bit(feature, &(ndev)->feature_mask)
+
struct amdxdna_dev_priv {
const char *fw_path;
u64 protocol_major;
u64 protocol_minor;
const struct rt_config *rt_config;
const struct dpm_clk_freq *dpm_clk_tbl;
+ const struct aie2_fw_feature_tbl *fw_feature_tbl;
#define COL_ALIGN_NONE 0
#define COL_ALIGN_NATURE 1
@@ -239,6 +266,7 @@ extern const struct dpm_clk_freq npu1_dpm_clk_table[];
extern const struct dpm_clk_freq npu4_dpm_clk_table[];
extern const struct rt_config npu1_default_rt_cfg[];
extern const struct rt_config npu4_default_rt_cfg[];
+extern const struct aie2_fw_feature_tbl npu4_fw_feature_table[];
/* aie2_smu.c */
int aie2_smu_init(struct amdxdna_dev_hdl *ndev);
@@ -263,6 +291,7 @@ int aie2_get_array_async_error(struct amdxdna_dev_hdl *ndev,
struct amdxdna_drm_get_array *args);
/* aie2_message.c */
+void aie2_msg_init(struct amdxdna_dev_hdl *ndev);
int aie2_suspend_fw(struct amdxdna_dev_hdl *ndev);
int aie2_resume_fw(struct amdxdna_dev_hdl *ndev);
int aie2_set_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 value);
diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c b/drivers/accel/amdxdna/amdxdna_ctx.c
index d18182c59668..878cc955f56d 100644
--- a/drivers/accel/amdxdna/amdxdna_ctx.c
+++ b/drivers/accel/amdxdna/amdxdna_ctx.c
@@ -113,14 +113,14 @@ void *amdxdna_cmd_get_payload(struct amdxdna_gem_obj *abo, u32 *size)
return &cmd->data[num_masks];
}
-int amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo)
+u32 amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo)
{
struct amdxdna_cmd *cmd = abo->mem.kva;
u32 num_masks, i;
u32 *cu_mask;
if (amdxdna_cmd_get_op(abo) == ERT_CMD_CHAIN)
- return -1;
+ return INVALID_CU_IDX;
num_masks = 1 + FIELD_GET(AMDXDNA_CMD_EXTRA_CU_MASK, cmd->header);
cu_mask = cmd->data;
@@ -129,7 +129,7 @@ int amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo)
return ffs(cu_mask[i]) - 1;
}
- return -1;
+ return INVALID_CU_IDX;
}
/*
diff --git a/drivers/accel/amdxdna/amdxdna_ctx.h b/drivers/accel/amdxdna/amdxdna_ctx.h
index 919c654dfea6..1aa2b938e07b 100644
--- a/drivers/accel/amdxdna/amdxdna_ctx.h
+++ b/drivers/accel/amdxdna/amdxdna_ctx.h
@@ -13,9 +13,10 @@
struct amdxdna_hwctx_priv;
enum ert_cmd_opcode {
- ERT_START_CU = 0,
- ERT_CMD_CHAIN = 19,
- ERT_START_NPU = 20,
+ ERT_INVALID_CMD = ~0U,
+ ERT_START_CU = 0,
+ ERT_CMD_CHAIN = 19,
+ ERT_START_NPU = 20,
};
enum ert_cmd_state {
@@ -64,6 +65,8 @@ struct amdxdna_cmd {
u32 data[];
};
+#define INVALID_CU_IDX (~0U)
+
struct amdxdna_hwctx {
struct amdxdna_client *client;
struct amdxdna_hwctx_priv *priv;
@@ -150,7 +153,7 @@ amdxdna_cmd_get_state(struct amdxdna_gem_obj *abo)
}
void *amdxdna_cmd_get_payload(struct amdxdna_gem_obj *abo, u32 *size);
-int amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo);
+u32 amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo);
void amdxdna_sched_job_cleanup(struct amdxdna_sched_job *job);
void amdxdna_hwctx_remove_all(struct amdxdna_client *client);
diff --git a/drivers/accel/amdxdna/npu1_regs.c b/drivers/accel/amdxdna/npu1_regs.c
index 23feb5f6fad3..ffc2e7c7b523 100644
--- a/drivers/accel/amdxdna/npu1_regs.c
+++ b/drivers/accel/amdxdna/npu1_regs.c
@@ -63,12 +63,18 @@ const struct dpm_clk_freq npu1_dpm_clk_table[] = {
{ 0 }
};
+static const struct aie2_fw_feature_tbl npu1_fw_feature_table[] = {
+ { .feature = AIE2_NPU_COMMAND, .min_minor = 8 },
+ { 0 }
+};
+
static const struct amdxdna_dev_priv npu1_dev_priv = {
.fw_path = "amdnpu/1502_00/npu.sbin",
.protocol_major = 0x5,
.protocol_minor = 0x7,
.rt_config = npu1_default_rt_cfg,
.dpm_clk_tbl = npu1_dpm_clk_table,
+ .fw_feature_tbl = npu1_fw_feature_table,
.col_align = COL_ALIGN_NONE,
.mbox_dev_addr = NPU1_MBOX_BAR_BASE,
.mbox_size = 0, /* Use BAR size */
diff --git a/drivers/accel/amdxdna/npu2_regs.c b/drivers/accel/amdxdna/npu2_regs.c
index 67c2ae931c62..5fbfdcc3762d 100644
--- a/drivers/accel/amdxdna/npu2_regs.c
+++ b/drivers/accel/amdxdna/npu2_regs.c
@@ -67,6 +67,7 @@ static const struct amdxdna_dev_priv npu2_dev_priv = {
.protocol_minor = 0x6,
.rt_config = npu4_default_rt_cfg,
.dpm_clk_tbl = npu4_dpm_clk_table,
+ .fw_feature_tbl = npu4_fw_feature_table,
.col_align = COL_ALIGN_NATURE,
.mbox_dev_addr = NPU2_MBOX_BAR_BASE,
.mbox_size = 0, /* Use BAR size */
diff --git a/drivers/accel/amdxdna/npu4_regs.c b/drivers/accel/amdxdna/npu4_regs.c
index fac6c1b0b74b..79aba12acfde 100644
--- a/drivers/accel/amdxdna/npu4_regs.c
+++ b/drivers/accel/amdxdna/npu4_regs.c
@@ -83,12 +83,18 @@ const struct dpm_clk_freq npu4_dpm_clk_table[] = {
{ 0 }
};
+const struct aie2_fw_feature_tbl npu4_fw_feature_table[] = {
+ { .feature = AIE2_NPU_COMMAND, .min_minor = 15 },
+ { 0 }
+};
+
static const struct amdxdna_dev_priv npu4_dev_priv = {
.fw_path = "amdnpu/17f0_10/npu.sbin",
.protocol_major = 0x6,
.protocol_minor = 12,
.rt_config = npu4_default_rt_cfg,
.dpm_clk_tbl = npu4_dpm_clk_table,
+ .fw_feature_tbl = npu4_fw_feature_table,
.col_align = COL_ALIGN_NATURE,
.mbox_dev_addr = NPU4_MBOX_BAR_BASE,
.mbox_size = 0, /* Use BAR size */
diff --git a/drivers/accel/amdxdna/npu5_regs.c b/drivers/accel/amdxdna/npu5_regs.c
index c91e1fa76ff5..c5e259ab9f49 100644
--- a/drivers/accel/amdxdna/npu5_regs.c
+++ b/drivers/accel/amdxdna/npu5_regs.c
@@ -67,6 +67,7 @@ static const struct amdxdna_dev_priv npu5_dev_priv = {
.protocol_minor = 12,
.rt_config = npu4_default_rt_cfg,
.dpm_clk_tbl = npu4_dpm_clk_table,
+ .fw_feature_tbl = npu4_fw_feature_table,
.col_align = COL_ALIGN_NATURE,
.mbox_dev_addr = NPU5_MBOX_BAR_BASE,
.mbox_size = 0, /* Use BAR size */
diff --git a/drivers/accel/amdxdna/npu6_regs.c b/drivers/accel/amdxdna/npu6_regs.c
index 773f738915a7..2de63b44d6e7 100644
--- a/drivers/accel/amdxdna/npu6_regs.c
+++ b/drivers/accel/amdxdna/npu6_regs.c
@@ -67,6 +67,7 @@ static const struct amdxdna_dev_priv npu6_dev_priv = {
.protocol_minor = 12,
.rt_config = npu4_default_rt_cfg,
.dpm_clk_tbl = npu4_dpm_clk_table,
+ .fw_feature_tbl = npu4_fw_feature_table,
.col_align = COL_ALIGN_NATURE,
.mbox_dev_addr = NPU6_MBOX_BAR_BASE,
.mbox_size = 0, /* Use BAR size */
--
2.34.1
^ permalink raw reply related [flat|nested] 4+ messages in thread* Re: [PATCH V1] accel/amdxdna: Use MSG_OP_CHAIN_EXEC_NPU when supported
2025-10-31 1:47 [PATCH V1] accel/amdxdna: Use MSG_OP_CHAIN_EXEC_NPU when supported Lizhi Hou
@ 2025-10-31 5:10 ` Mario Limonciello (AMD) (kernel.org)
2025-10-31 15:15 ` Lizhi Hou
0 siblings, 1 reply; 4+ messages in thread
From: Mario Limonciello (AMD) (kernel.org) @ 2025-10-31 5:10 UTC (permalink / raw)
To: Lizhi Hou, ogabbay, quic_jhugo, maciej.falkowski, dri-devel
Cc: linux-kernel, max.zhen, sonal.santan
On 10/30/2025 8:47 PM, Lizhi Hou wrote:
> MSG_OP_CHAIN_EXEC_NPU is a unified mailbox message that replaces
> MSG_OP_CHAIN_EXEC_BUFFER_CF and MSG_OP_CHAIN_EXEC_DPU.
>
> Add driver logic to check firmware version, and if MSG_OP_CHAIN_EXEC_NPU
> is supported, uses it to submit firmware commands.
>
> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
Two small nits below to me. Otherwise
Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
> ---
> drivers/accel/amdxdna/aie2_message.c | 443 +++++++++++++++++---------
> drivers/accel/amdxdna/aie2_msg_priv.h | 42 ++-
> drivers/accel/amdxdna/aie2_pci.c | 13 +
> drivers/accel/amdxdna/aie2_pci.h | 29 ++
> drivers/accel/amdxdna/amdxdna_ctx.c | 6 +-
> drivers/accel/amdxdna/amdxdna_ctx.h | 11 +-
> drivers/accel/amdxdna/npu1_regs.c | 6 +
> drivers/accel/amdxdna/npu2_regs.c | 1 +
> drivers/accel/amdxdna/npu4_regs.c | 6 +
> drivers/accel/amdxdna/npu5_regs.c | 1 +
> drivers/accel/amdxdna/npu6_regs.c | 1 +
> 11 files changed, 392 insertions(+), 167 deletions(-)
>
> diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c
> index 3a4c845d783a..4751a8aff0f7 100644
> --- a/drivers/accel/amdxdna/aie2_message.c
> +++ b/drivers/accel/amdxdna/aie2_message.c
> @@ -27,6 +27,8 @@
> #define DECLARE_AIE2_MSG(name, op) \
> DECLARE_XDNA_MSG_COMMON(name, op, MAX_AIE2_STATUS_CODE)
>
> +#define EXEC_MSG_OPS(xdna) ((xdna)->dev_handle->exec_msg_ops)
> +
> static int aie2_send_mgmt_msg_wait(struct amdxdna_dev_hdl *ndev,
> struct xdna_mailbox_msg *msg)
> {
> @@ -479,177 +481,291 @@ int aie2_config_cu(struct amdxdna_hwctx *hwctx,
> return xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
> }
>
> -int aie2_execbuf(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
> - int (*notify_cb)(void *, void __iomem *, size_t))
> +static int aie2_init_exec_cu_req(struct amdxdna_gem_obj *cmd_bo, void *req,
> + size_t *size, u32 *msg_op)
> {
> - struct mailbox_channel *chann = hwctx->priv->mbox_chann;
> - struct amdxdna_dev *xdna = hwctx->client->xdna;
> - struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
> - union {
> - struct execute_buffer_req ebuf;
> - struct exec_dpu_req dpu;
> - } req;
> - struct xdna_mailbox_msg msg;
> - u32 payload_len;
> - void *payload;
> - int cu_idx;
> - int ret;
> - u32 op;
> + struct execute_buffer_req *cu_req = req;
> + u32 cmd_len;
> + void *cmd;
>
> - if (!chann)
> - return -ENODEV;
> + cmd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
> + if (cmd_len > sizeof(cu_req->payload))
> + return -EINVAL;
>
> - payload = amdxdna_cmd_get_payload(cmd_abo, &payload_len);
> - if (!payload) {
> - XDNA_ERR(xdna, "Invalid command, cannot get payload");
> + cu_req->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
> + if (cu_req->cu_idx == INVALID_CU_IDX)
> return -EINVAL;
> - }
>
> - cu_idx = amdxdna_cmd_get_cu_idx(cmd_abo);
> - if (cu_idx < 0) {
> - XDNA_DBG(xdna, "Invalid cu idx");
> + memcpy(cu_req->payload, cmd, cmd_len);
> +
> + *size = sizeof(*cu_req);
> + *msg_op = MSG_OP_EXECUTE_BUFFER_CF;
> + return 0;
> +}
> +
> +static int aie2_init_exec_dpu_req(struct amdxdna_gem_obj *cmd_bo, void *req,
> + size_t *size, u32 *msg_op)
> +{
> + struct exec_dpu_req *dpu_req = req;
> + struct amdxdna_cmd_start_npu *sn;
> + u32 cmd_len;
> +
> + sn = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
> + if (cmd_len - sizeof(*sn) > sizeof(dpu_req->payload))
> return -EINVAL;
> - }
>
> - op = amdxdna_cmd_get_op(cmd_abo);
> - switch (op) {
> - case ERT_START_CU:
> - if (unlikely(payload_len > sizeof(req.ebuf.payload)))
> - XDNA_DBG(xdna, "Invalid ebuf payload len: %d", payload_len);
> - req.ebuf.cu_idx = cu_idx;
> - memcpy(req.ebuf.payload, payload, sizeof(req.ebuf.payload));
> - msg.send_size = sizeof(req.ebuf);
> - msg.opcode = MSG_OP_EXECUTE_BUFFER_CF;
> - break;
> - case ERT_START_NPU: {
> - struct amdxdna_cmd_start_npu *sn = payload;
> -
> - if (unlikely(payload_len - sizeof(*sn) > sizeof(req.dpu.payload)))
> - XDNA_DBG(xdna, "Invalid dpu payload len: %d", payload_len);
> - req.dpu.inst_buf_addr = sn->buffer;
> - req.dpu.inst_size = sn->buffer_size;
> - req.dpu.inst_prop_cnt = sn->prop_count;
> - req.dpu.cu_idx = cu_idx;
> - memcpy(req.dpu.payload, sn->prop_args, sizeof(req.dpu.payload));
> - msg.send_size = sizeof(req.dpu);
> - msg.opcode = MSG_OP_EXEC_DPU;
> - break;
> - }
> - default:
> - XDNA_DBG(xdna, "Invalid ERT cmd op code: %d", op);
> + dpu_req->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
> + if (dpu_req->cu_idx == INVALID_CU_IDX)
> return -EINVAL;
> - }
> - msg.handle = job;
> - msg.notify_cb = notify_cb;
> - msg.send_data = (u8 *)&req;
> - print_hex_dump_debug("cmd: ", DUMP_PREFIX_OFFSET, 16, 4, &req,
> - 0x40, false);
>
> - ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
> - if (ret) {
> - XDNA_ERR(xdna, "Send message failed");
> - return ret;
> - }
> + dpu_req->inst_buf_addr = sn->buffer;
> + dpu_req->inst_size = sn->buffer_size;
> + dpu_req->inst_prop_cnt = sn->prop_count;
> + memcpy(dpu_req->payload, sn->prop_args, cmd_len - sizeof(*sn));
>
> + *size = sizeof(*dpu_req);
> + *msg_op = MSG_OP_EXEC_DPU;
> return 0;
> }
>
> +static void aie2_init_exec_chain_req(void *req, u64 slot_addr, size_t size, u32 cmd_cnt)
> +{
> + struct cmd_chain_req *chain_req = req;
> +
> + chain_req->buf_addr = slot_addr;
> + chain_req->buf_size = size;
> + chain_req->count = cmd_cnt;
> +}
> +
> +static void aie2_init_npu_chain_req(void *req, u64 slot_addr, size_t size, u32 cmd_cnt)
> +{
> + struct cmd_chain_npu_req *npu_chain_req = req;
> +
> + npu_chain_req->flags = 0;
> + npu_chain_req->reserved = 0;
> + npu_chain_req->buf_addr = slot_addr;
> + npu_chain_req->buf_size = size;
> + npu_chain_req->count = cmd_cnt;
> +}
> +
> static int
> -aie2_cmdlist_fill_one_slot_cf(void *cmd_buf, u32 offset,
> - struct amdxdna_gem_obj *abo, u32 *size)
> +aie2_cmdlist_fill_cf(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size)
> {
> - struct cmd_chain_slot_execbuf_cf *buf = cmd_buf + offset;
> - int cu_idx = amdxdna_cmd_get_cu_idx(abo);
> - u32 payload_len;
> - void *payload;
> + struct cmd_chain_slot_execbuf_cf *cf_slot = slot;
> + u32 cmd_len;
> + void *cmd;
>
> - if (cu_idx < 0)
> + cmd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
> + if (*size < sizeof(*cf_slot) + cmd_len)
> return -EINVAL;
>
> - payload = amdxdna_cmd_get_payload(abo, &payload_len);
> - if (!payload)
> + cf_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
> + if (cf_slot->cu_idx == INVALID_CU_IDX)
> return -EINVAL;
>
> - if (!slot_has_space(*buf, offset, payload_len))
> - return -ENOSPC;
> -
> - buf->cu_idx = cu_idx;
> - buf->arg_cnt = payload_len / sizeof(u32);
> - memcpy(buf->args, payload, payload_len);
> - /* Accurate buf size to hint firmware to do necessary copy */
> - *size = sizeof(*buf) + payload_len;
> + cf_slot->arg_cnt = cmd_len / sizeof(u32);
> + memcpy(cf_slot->args, cmd, cmd_len);
> + /* Accurate slot size to hint firmware to do necessary copy */
> + *size = sizeof(*cf_slot) + cmd_len;
> return 0;
> }
>
> static int
> -aie2_cmdlist_fill_one_slot_dpu(void *cmd_buf, u32 offset,
> - struct amdxdna_gem_obj *abo, u32 *size)
> +aie2_cmdlist_fill_dpu(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size)
> {
> - struct cmd_chain_slot_dpu *buf = cmd_buf + offset;
> - int cu_idx = amdxdna_cmd_get_cu_idx(abo);
> + struct cmd_chain_slot_dpu *dpu_slot = slot;
> struct amdxdna_cmd_start_npu *sn;
> - u32 payload_len;
> - void *payload;
> + u32 cmd_len;
> u32 arg_sz;
>
> - if (cu_idx < 0)
> + sn = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
> + arg_sz = cmd_len - sizeof(*sn);
> + if (cmd_len < sizeof(*sn) || arg_sz > MAX_DPU_ARGS_SIZE)
> return -EINVAL;
>
> - payload = amdxdna_cmd_get_payload(abo, &payload_len);
> - if (!payload)
> + if (*size < sizeof(*dpu_slot) + arg_sz)
> return -EINVAL;
> - sn = payload;
> - arg_sz = payload_len - sizeof(*sn);
> - if (payload_len < sizeof(*sn) || arg_sz > MAX_DPU_ARGS_SIZE)
> +
> + dpu_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
> + if (dpu_slot->cu_idx == INVALID_CU_IDX)
> return -EINVAL;
>
> - if (!slot_has_space(*buf, offset, arg_sz))
> - return -ENOSPC;
> + dpu_slot->inst_buf_addr = sn->buffer;
> + dpu_slot->inst_size = sn->buffer_size;
> + dpu_slot->inst_prop_cnt = sn->prop_count;
> + dpu_slot->arg_cnt = arg_sz / sizeof(u32);
> + memcpy(dpu_slot->args, sn->prop_args, arg_sz);
> +
> + /* Accurate slot size to hint firmware to do necessary copy */
> + *size = sizeof(*dpu_slot) + arg_sz;
> + return 0;
> +}
> +
> +static u32 aie2_get_chain_msg_op(u32 cmd_op)
> +{
> + switch (cmd_op) {
> + case ERT_START_CU:
> + return MSG_OP_CHAIN_EXEC_BUFFER_CF;
> + case ERT_START_NPU:
> + return MSG_OP_CHAIN_EXEC_DPU;
> + default:
> + break;
> + }
>
> - buf->inst_buf_addr = sn->buffer;
> - buf->inst_size = sn->buffer_size;
> - buf->inst_prop_cnt = sn->prop_count;
> - buf->cu_idx = cu_idx;
> - buf->arg_cnt = arg_sz / sizeof(u32);
> - memcpy(buf->args, sn->prop_args, arg_sz);
> + return MSG_OP_MAX_OPCODE;
> +}
>
> - /* Accurate buf size to hint firmware to do necessary copy */
> - *size = sizeof(*buf) + arg_sz;
> +static struct aie2_exec_msg_ops legacy_exec_message_ops = {
> + .init_cu_req = aie2_init_exec_cu_req,
> + .init_dpu_req = aie2_init_exec_dpu_req,
> + .init_chain_req = aie2_init_exec_chain_req,
> + .fill_cf_slot = aie2_cmdlist_fill_cf,
> + .fill_dpu_slot = aie2_cmdlist_fill_dpu,
> + .get_chain_msg_op = aie2_get_chain_msg_op,
> +};
> +
> +static int
> +aie2_cmdlist_fill_npu_cf(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size)
> +{
> + struct cmd_chain_slot_npu *npu_slot = slot;
> + u32 cmd_len;
> + void *cmd;
> +
> + cmd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
> + if (*size < sizeof(*npu_slot) + cmd_len)
> + return -EINVAL;
> +
> + npu_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
> + if (npu_slot->cu_idx == INVALID_CU_IDX)
> + return -EINVAL;
> +
> + memset(npu_slot, 0, sizeof(*npu_slot));
> + npu_slot->type = EXEC_NPU_TYPE_NON_ELF;
> + npu_slot->arg_cnt = cmd_len / sizeof(u32);
> + memcpy(npu_slot->args, cmd, cmd_len);
> +
> + *size = sizeof(*npu_slot) + cmd_len;
> return 0;
> }
>
> static int
> -aie2_cmdlist_fill_one_slot(u32 op, struct amdxdna_gem_obj *cmdbuf_abo, u32 offset,
> - struct amdxdna_gem_obj *abo, u32 *size)
> +aie2_cmdlist_fill_npu_dpu(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size)
> +{
> + struct cmd_chain_slot_npu *npu_slot = slot;
> + struct amdxdna_cmd_start_npu *sn;
> + u32 cmd_len;
> + u32 arg_sz;
> +
> + sn = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
> + arg_sz = cmd_len - sizeof(*sn);
> + if (cmd_len < sizeof(*sn) || arg_sz > MAX_NPU_ARGS_SIZE)
> + return -EINVAL;
> +
> + if (*size < sizeof(*npu_slot) + arg_sz)
> + return -EINVAL;
> +
> + npu_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
> + if (npu_slot->cu_idx == INVALID_CU_IDX)
> + return -EINVAL;
> +
> + memset(npu_slot, 0, sizeof(*npu_slot));
> + npu_slot->type = EXEC_NPU_TYPE_PARTIAL_ELF;
> + npu_slot->inst_buf_addr = sn->buffer;
> + npu_slot->inst_size = sn->buffer_size;
> + npu_slot->inst_prop_cnt = sn->prop_count;
> + npu_slot->arg_cnt = arg_sz / sizeof(u32);
> + memcpy(npu_slot->args, sn->prop_args, arg_sz);
> +
> + *size = sizeof(*npu_slot) + arg_sz;
> + return 0;
> +}
> +
> +static u32 aie2_get_npu_chain_msg_op(u32 cmd_op)
> +{
> + return MSG_OP_CHAIN_EXEC_NPU;
> +}
> +
> +static struct aie2_exec_msg_ops npu_exec_message_ops = {
> + .init_cu_req = aie2_init_exec_cu_req,
> + .init_dpu_req = aie2_init_exec_dpu_req,
> + .init_chain_req = aie2_init_npu_chain_req,
> + .fill_cf_slot = aie2_cmdlist_fill_npu_cf,
> + .fill_dpu_slot = aie2_cmdlist_fill_npu_dpu,
> + .get_chain_msg_op = aie2_get_npu_chain_msg_op,
> +};
> +
> +static int aie2_init_exec_req(void *req, struct amdxdna_gem_obj *cmd_abo,
> + size_t *size, u32 *msg_op)
> {
> - u32 this_op = amdxdna_cmd_get_op(abo);
> - void *cmd_buf = cmdbuf_abo->mem.kva;
> + struct amdxdna_dev *xdna = cmd_abo->client->xdna;
> int ret;
> + u32 op;
>
> - if (this_op != op) {
> - ret = -EINVAL;
> - goto done;
> - }
>
> + op = amdxdna_cmd_get_op(cmd_abo);
> switch (op) {
> case ERT_START_CU:
> - ret = aie2_cmdlist_fill_one_slot_cf(cmd_buf, offset, abo, size);
> + ret = EXEC_MSG_OPS(xdna)->init_cu_req(cmd_abo, req, size, msg_op);
> + if (ret) {
> + XDNA_DBG(xdna, "Init CU req failed ret %d", ret);
> + return ret;
> + }
> break;
> case ERT_START_NPU:
> - ret = aie2_cmdlist_fill_one_slot_dpu(cmd_buf, offset, abo, size);
> + ret = EXEC_MSG_OPS(xdna)->init_dpu_req(cmd_abo, req, size, msg_op);
> + if (ret) {
> + XDNA_DBG(xdna, "Init DPU req failed ret %d", ret);
> + return ret;
> + }
> +
> break;
> default:
> + XDNA_INFO(xdna, "Unsupported op %d", op);
Shouldn't this be XDNA_ERR()?
> ret = -EOPNOTSUPP;
> + break;
> }
>
> -done:
> - if (ret) {
> - XDNA_ERR(abo->client->xdna, "Can't fill slot for cmd op %d ret %d",
> - op, ret);
> + return ret;
> +}
> +
> +static int
> +aie2_cmdlist_fill_slot(void *slot, struct amdxdna_gem_obj *cmd_abo,
> + size_t *size, u32 *cmd_op)
> +{
> + struct amdxdna_dev *xdna = cmd_abo->client->xdna;
> + int ret;
> + u32 op;
> +
> + op = amdxdna_cmd_get_op(cmd_abo);
> + if (*cmd_op == ERT_INVALID_CMD)
> + *cmd_op = op;
> + else if (op != *cmd_op)
> + return -EINVAL;
> +
> + switch (op) {
> + case ERT_START_CU:
> + ret = EXEC_MSG_OPS(xdna)->fill_cf_slot(cmd_abo, slot, size);
> + break;
> + case ERT_START_NPU:
> + ret = EXEC_MSG_OPS(xdna)->fill_dpu_slot(cmd_abo, slot, size);
> + break;
> + default:
> + XDNA_INFO(xdna, "Unsupported op %d", op);
> + ret = -EOPNOTSUPP;
> + break;
> }
> +
> return ret;
> }
>
> +void aie2_msg_init(struct amdxdna_dev_hdl *ndev)
> +{
> + if (AIE2_FEATURE_ON(ndev, AIE2_NPU_COMMAND))
> + ndev->exec_msg_ops = &npu_exec_message_ops;
> + else
> + ndev->exec_msg_ops = &legacy_exec_message_ops;
> +}
> +
> static inline struct amdxdna_gem_obj *
> aie2_cmdlist_get_cmd_buf(struct amdxdna_sched_job *job)
> {
> @@ -658,29 +774,36 @@ aie2_cmdlist_get_cmd_buf(struct amdxdna_sched_job *job)
> return job->hwctx->priv->cmd_buf[idx];
> }
>
> -static void
> -aie2_cmdlist_prepare_request(struct cmd_chain_req *req,
> - struct amdxdna_gem_obj *cmdbuf_abo, u32 size, u32 cnt)
> +int aie2_execbuf(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
> + int (*notify_cb)(void *, void __iomem *, size_t))
> {
> - req->buf_addr = cmdbuf_abo->mem.dev_addr;
> - req->buf_size = size;
> - req->count = cnt;
> - drm_clflush_virt_range(cmdbuf_abo->mem.kva, size);
> - XDNA_DBG(cmdbuf_abo->client->xdna, "Command buf addr 0x%llx size 0x%x count %d",
> - req->buf_addr, size, cnt);
> -}
> + struct mailbox_channel *chann = hwctx->priv->mbox_chann;
> + struct amdxdna_dev *xdna = hwctx->client->xdna;
> + struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
> + struct xdna_mailbox_msg msg;
> + union exec_req req;
> + int ret;
>
> -static inline u32
> -aie2_cmd_op_to_msg_op(u32 op)
> -{
> - switch (op) {
> - case ERT_START_CU:
> - return MSG_OP_CHAIN_EXEC_BUFFER_CF;
> - case ERT_START_NPU:
> - return MSG_OP_CHAIN_EXEC_DPU;
> - default:
> - return MSG_OP_MAX_OPCODE;
> + if (!chann)
> + return -ENODEV;
> +
> + ret = aie2_init_exec_req(&req, cmd_abo, &msg.send_size, &msg.opcode);
> + if (ret)
> + return ret;
> +
> + msg.handle = job;
> + msg.notify_cb = notify_cb;
> + msg.send_data = (u8 *)&req;
> + print_hex_dump_debug("cmd: ", DUMP_PREFIX_OFFSET, 16, 4, &req,
> + 0x40, false);
> +
> + ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
> + if (ret) {
> + XDNA_ERR(xdna, "Send message failed");
> + return ret;
> }
> +
> + return 0;
> }
>
> int aie2_cmdlist_multi_execbuf(struct amdxdna_hwctx *hwctx,
> @@ -691,12 +814,13 @@ int aie2_cmdlist_multi_execbuf(struct amdxdna_hwctx *hwctx,
> struct mailbox_channel *chann = hwctx->priv->mbox_chann;
> struct amdxdna_client *client = hwctx->client;
> struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
> + struct amdxdna_dev *xdna = client->xdna;
> struct amdxdna_cmd_chain *payload;
> struct xdna_mailbox_msg msg;
> - struct cmd_chain_req req;
> + union exec_chain_req req;
> u32 payload_len;
> u32 offset = 0;
> - u32 size;
> + size_t size;
> int ret;
> u32 op;
> u32 i;
> @@ -707,41 +831,42 @@ int aie2_cmdlist_multi_execbuf(struct amdxdna_hwctx *hwctx,
> payload_len < struct_size(payload, data, payload->command_count))
> return -EINVAL;
>
> + op = ERT_INVALID_CMD;
> for (i = 0; i < payload->command_count; i++) {
> u32 boh = (u32)(payload->data[i]);
> struct amdxdna_gem_obj *abo;
>
> abo = amdxdna_gem_get_obj(client, boh, AMDXDNA_BO_CMD);
> if (!abo) {
> - XDNA_ERR(client->xdna, "Failed to find cmd BO %d", boh);
> + XDNA_ERR(xdna, "Failed to find cmd BO %d", boh);
> return -ENOENT;
> }
>
> - /* All sub-cmd should have same op, use the first one. */
> - if (i == 0)
> - op = amdxdna_cmd_get_op(abo);
> -
> - ret = aie2_cmdlist_fill_one_slot(op, cmdbuf_abo, offset, abo, &size);
> + size = cmdbuf_abo->mem.size - offset;
> + ret = aie2_cmdlist_fill_slot(cmdbuf_abo->mem.kva + offset,
> + abo, &size, &op);
> amdxdna_gem_put_obj(abo);
> if (ret)
> - return -EINVAL;
> + return ret;
>
> offset += size;
> }
> + msg.opcode = EXEC_MSG_OPS(xdna)->get_chain_msg_op(op);
> + if (msg.opcode == MSG_OP_MAX_OPCODE)
> + return -EOPNOTSUPP;
>
> /* The offset is the accumulated total size of the cmd buffer */
> - aie2_cmdlist_prepare_request(&req, cmdbuf_abo, offset, payload->command_count);
> + EXEC_MSG_OPS(xdna)->init_chain_req(&req, cmdbuf_abo->mem.dev_addr,
> + offset, payload->command_count);
> + drm_clflush_virt_range(cmdbuf_abo->mem.kva, offset);
>
> - msg.opcode = aie2_cmd_op_to_msg_op(op);
> - if (msg.opcode == MSG_OP_MAX_OPCODE)
> - return -EOPNOTSUPP;
> msg.handle = job;
> msg.notify_cb = notify_cb;
> msg.send_data = (u8 *)&req;
> msg.send_size = sizeof(req);
> ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
> if (ret) {
> - XDNA_ERR(hwctx->client->xdna, "Send message failed");
> + XDNA_ERR(xdna, "Send message failed");
> return ret;
> }
>
> @@ -754,23 +879,27 @@ int aie2_cmdlist_single_execbuf(struct amdxdna_hwctx *hwctx,
> {
> struct amdxdna_gem_obj *cmdbuf_abo = aie2_cmdlist_get_cmd_buf(job);
> struct mailbox_channel *chann = hwctx->priv->mbox_chann;
> + struct amdxdna_dev *xdna = hwctx->client->xdna;
> struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
> struct xdna_mailbox_msg msg;
> - struct cmd_chain_req req;
> - u32 size;
> + union exec_chain_req req;
> + u32 op = ERT_INVALID_CMD;
> + size_t size;
> int ret;
> - u32 op;
>
> - op = amdxdna_cmd_get_op(cmd_abo);
> - ret = aie2_cmdlist_fill_one_slot(op, cmdbuf_abo, 0, cmd_abo, &size);
> + size = cmdbuf_abo->mem.size;
> + ret = aie2_cmdlist_fill_slot(cmdbuf_abo->mem.kva, cmd_abo, &size, &op);
> if (ret)
> return ret;
>
> - aie2_cmdlist_prepare_request(&req, cmdbuf_abo, size, 1);
> -
> - msg.opcode = aie2_cmd_op_to_msg_op(op);
> + msg.opcode = EXEC_MSG_OPS(xdna)->get_chain_msg_op(op);
> if (msg.opcode == MSG_OP_MAX_OPCODE)
> return -EOPNOTSUPP;
> +
> + EXEC_MSG_OPS(xdna)->init_chain_req(&req, cmdbuf_abo->mem.dev_addr,
> + size, 1);
> + drm_clflush_virt_range(cmdbuf_abo->mem.kva, size);
> +
> msg.handle = job;
> msg.notify_cb = notify_cb;
> msg.send_data = (u8 *)&req;
> diff --git a/drivers/accel/amdxdna/aie2_msg_priv.h b/drivers/accel/amdxdna/aie2_msg_priv.h
> index 2dbea1d09980..947daa63f064 100644
> --- a/drivers/accel/amdxdna/aie2_msg_priv.h
> +++ b/drivers/accel/amdxdna/aie2_msg_priv.h
> @@ -20,6 +20,7 @@ enum aie2_msg_opcode {
> MSG_OP_CHAIN_EXEC_BUFFER_CF = 0x12,
> MSG_OP_CHAIN_EXEC_DPU = 0x13,
> MSG_OP_CONFIG_DEBUG_BO = 0x14,
> + MSG_OP_CHAIN_EXEC_NPU = 0x18,
> MSG_OP_MAX_XRT_OPCODE,
> MSG_OP_SUSPEND = 0x101,
> MSG_OP_RESUME = 0x102,
> @@ -172,6 +173,16 @@ struct exec_dpu_req {
> __u32 payload[35];
> } __packed;
>
> +enum exec_npu_type {
> + EXEC_NPU_TYPE_NON_ELF = 0x1,
> + EXEC_NPU_TYPE_PARTIAL_ELF = 0x2,
> +};
> +
> +union exec_req {
> + struct execute_buffer_req ebuf;
> + struct exec_dpu_req dpu_req;
> +};
> +
> struct execute_buffer_resp {
> enum aie2_msg_status status;
> } __packed;
> @@ -343,9 +354,6 @@ struct async_event_msg_resp {
> } __packed;
>
> #define MAX_CHAIN_CMDBUF_SIZE SZ_4K
> -#define slot_has_space(slot, offset, payload_size) \
> - (MAX_CHAIN_CMDBUF_SIZE >= (offset) + (payload_size) + \
> - sizeof(typeof(slot)))
>
> struct cmd_chain_slot_execbuf_cf {
> __u32 cu_idx;
> @@ -363,12 +371,40 @@ struct cmd_chain_slot_dpu {
> __u32 args[] __counted_by(arg_cnt);
> };
>
> +#define MAX_NPU_ARGS_SIZE (26 * sizeof(__u32))
> +struct cmd_chain_slot_npu {
> + enum exec_npu_type type;
> + u64 inst_buf_addr;
> + u64 save_buf_addr;
> + u64 restore_buf_addr;
> + u32 inst_size;
> + u32 save_size;
> + u32 restore_size;
> + u32 inst_prop_cnt;
> + u32 cu_idx;
> + u32 arg_cnt;
> + u32 args[] __counted_by(arg_cnt);
> +} __packed;
> +
> struct cmd_chain_req {
> __u64 buf_addr;
> __u32 buf_size;
> __u32 count;
> } __packed;
>
> +struct cmd_chain_npu_req {
> + u32 flags;
> + u32 reserved;
> + u64 buf_addr;
> + u32 buf_size;
> + u32 count;
> +} __packed;
> +
> +union exec_chain_req {
> + struct cmd_chain_npu_req npu_req;
> + struct cmd_chain_req req;
> +};
> +
> struct cmd_chain_resp {
> enum aie2_msg_status status;
> __u32 fail_cmd_idx;
> diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c
> index 80313a2a98d4..d7ccbdaf47f5 100644
> --- a/drivers/accel/amdxdna/aie2_pci.c
> +++ b/drivers/accel/amdxdna/aie2_pci.c
> @@ -55,6 +55,7 @@ struct mgmt_mbox_chann_info {
>
> static int aie2_check_protocol(struct amdxdna_dev_hdl *ndev, u32 fw_major, u32 fw_minor)
> {
> + const struct aie2_fw_feature_tbl *feature;
> struct amdxdna_dev *xdna = ndev->xdna;
>
> /*
> @@ -78,6 +79,17 @@ static int aie2_check_protocol(struct amdxdna_dev_hdl *ndev, u32 fw_major, u32 f
> XDNA_ERR(xdna, "Firmware minor version smaller than supported");
> return -EINVAL;
> }
> +
> + for (feature = ndev->priv->fw_feature_tbl; feature && feature->min_minor;
> + feature++) {
> + if (fw_minor < feature->min_minor)
> + continue;
> + if (feature->max_minor > 0 && fw_minor > feature->max_minor)
> + continue;
> +
> + set_bit(feature->feature, &ndev->feature_mask);
> + }
> +
> return 0;
> }
>
> @@ -587,6 +599,7 @@ static int aie2_init(struct amdxdna_dev *xdna)
> }
>
> release_firmware(fw);
> + aie2_msg_init(ndev);
> amdxdna_pm_init(xdna);
> return 0;
>
> diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/amdxdna/aie2_pci.h
> index cfe42b0d4242..d0a3cb1fe8be 100644
> --- a/drivers/accel/amdxdna/aie2_pci.h
> +++ b/drivers/accel/amdxdna/aie2_pci.h
> @@ -156,6 +156,17 @@ enum aie2_dev_status {
> AIE2_DEV_START,
> };
>
> +struct aie2_exec_msg_ops {
> + int (*init_cu_req)(struct amdxdna_gem_obj *cmd_bo, void *req,
> + size_t *size, u32 *msg_op);
> + int (*init_dpu_req)(struct amdxdna_gem_obj *cmd_bo, void *req,
> + size_t *size, u32 *msg_op);
> + void (*init_chain_req)(void *req, u64 slot_addr, size_t size, u32 cmd_cnt);
> + int (*fill_cf_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size);
> + int (*fill_dpu_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *size);
> + u32 (*get_chain_msg_op)(u32 cmd_op);
> +};
> +
> struct amdxdna_dev_hdl {
> struct amdxdna_dev *xdna;
> const struct amdxdna_dev_priv *priv;
> @@ -173,6 +184,8 @@ struct amdxdna_dev_hdl {
> u32 total_col;
> struct aie_version version;
> struct aie_metadata metadata;
> + unsigned long feature_mask;
> + struct aie2_exec_msg_ops *exec_msg_ops;
>
> /* power management and clock*/
> enum amdxdna_power_mode_type pw_mode;
> @@ -208,12 +221,26 @@ struct aie2_hw_ops {
> int (*set_dpm)(struct amdxdna_dev_hdl *ndev, u32 dpm_level);
> };
>
> +enum aie2_fw_feature {
> + AIE2_NPU_COMMAND,
> + AIE2_FEATURE_MAX
> +};
> +
> +struct aie2_fw_feature_tbl {
> + enum aie2_fw_feature feature;
> + u32 max_minor;
> + u32 min_minor;
> +};
> +
> +#define AIE2_FEATURE_ON(ndev, feature) test_bit(feature, &(ndev)->feature_mask)
> +
> struct amdxdna_dev_priv {
> const char *fw_path;
> u64 protocol_major;
> u64 protocol_minor;
> const struct rt_config *rt_config;
> const struct dpm_clk_freq *dpm_clk_tbl;
> + const struct aie2_fw_feature_tbl *fw_feature_tbl;
>
> #define COL_ALIGN_NONE 0
> #define COL_ALIGN_NATURE 1
> @@ -239,6 +266,7 @@ extern const struct dpm_clk_freq npu1_dpm_clk_table[];
> extern const struct dpm_clk_freq npu4_dpm_clk_table[];
> extern const struct rt_config npu1_default_rt_cfg[];
> extern const struct rt_config npu4_default_rt_cfg[];
> +extern const struct aie2_fw_feature_tbl npu4_fw_feature_table[];
>
> /* aie2_smu.c */
> int aie2_smu_init(struct amdxdna_dev_hdl *ndev);
> @@ -263,6 +291,7 @@ int aie2_get_array_async_error(struct amdxdna_dev_hdl *ndev,
> struct amdxdna_drm_get_array *args);
>
> /* aie2_message.c */
> +void aie2_msg_init(struct amdxdna_dev_hdl *ndev);
> int aie2_suspend_fw(struct amdxdna_dev_hdl *ndev);
> int aie2_resume_fw(struct amdxdna_dev_hdl *ndev);
> int aie2_set_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 value);
> diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c b/drivers/accel/amdxdna/amdxdna_ctx.c
> index d18182c59668..878cc955f56d 100644
> --- a/drivers/accel/amdxdna/amdxdna_ctx.c
> +++ b/drivers/accel/amdxdna/amdxdna_ctx.c
> @@ -113,14 +113,14 @@ void *amdxdna_cmd_get_payload(struct amdxdna_gem_obj *abo, u32 *size)
> return &cmd->data[num_masks];
> }
>
> -int amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo)
> +u32 amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo)
> {
> struct amdxdna_cmd *cmd = abo->mem.kva;
> u32 num_masks, i;
> u32 *cu_mask;
>
> if (amdxdna_cmd_get_op(abo) == ERT_CMD_CHAIN)
> - return -1;
> + return INVALID_CU_IDX;
>
> num_masks = 1 + FIELD_GET(AMDXDNA_CMD_EXTRA_CU_MASK, cmd->header);
> cu_mask = cmd->data;
> @@ -129,7 +129,7 @@ int amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo)
> return ffs(cu_mask[i]) - 1;
> }
>
> - return -1;
> + return INVALID_CU_IDX;
> }
>
> /*
> diff --git a/drivers/accel/amdxdna/amdxdna_ctx.h b/drivers/accel/amdxdna/amdxdna_ctx.h
> index 919c654dfea6..1aa2b938e07b 100644
> --- a/drivers/accel/amdxdna/amdxdna_ctx.h
> +++ b/drivers/accel/amdxdna/amdxdna_ctx.h
> @@ -13,9 +13,10 @@
> struct amdxdna_hwctx_priv;
>
> enum ert_cmd_opcode {
> - ERT_START_CU = 0,
> - ERT_CMD_CHAIN = 19,
> - ERT_START_NPU = 20,
> + ERT_INVALID_CMD = ~0U,
~0U > 20, shouldn't this be at the end of the enum?> + ERT_START_CU = 0,
> + ERT_CMD_CHAIN = 19,
> + ERT_START_NPU = 20,
> };
>
> enum ert_cmd_state {
> @@ -64,6 +65,8 @@ struct amdxdna_cmd {
> u32 data[];
> };
>
> +#define INVALID_CU_IDX (~0U)
> +
> struct amdxdna_hwctx {
> struct amdxdna_client *client;
> struct amdxdna_hwctx_priv *priv;
> @@ -150,7 +153,7 @@ amdxdna_cmd_get_state(struct amdxdna_gem_obj *abo)
> }
>
> void *amdxdna_cmd_get_payload(struct amdxdna_gem_obj *abo, u32 *size);
> -int amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo);
> +u32 amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo);
>
> void amdxdna_sched_job_cleanup(struct amdxdna_sched_job *job);
> void amdxdna_hwctx_remove_all(struct amdxdna_client *client);
> diff --git a/drivers/accel/amdxdna/npu1_regs.c b/drivers/accel/amdxdna/npu1_regs.c
> index 23feb5f6fad3..ffc2e7c7b523 100644
> --- a/drivers/accel/amdxdna/npu1_regs.c
> +++ b/drivers/accel/amdxdna/npu1_regs.c
> @@ -63,12 +63,18 @@ const struct dpm_clk_freq npu1_dpm_clk_table[] = {
> { 0 }
> };
>
> +static const struct aie2_fw_feature_tbl npu1_fw_feature_table[] = {
> + { .feature = AIE2_NPU_COMMAND, .min_minor = 8 },
> + { 0 }
> +};
> +
> static const struct amdxdna_dev_priv npu1_dev_priv = {
> .fw_path = "amdnpu/1502_00/npu.sbin",
> .protocol_major = 0x5,
> .protocol_minor = 0x7,
> .rt_config = npu1_default_rt_cfg,
> .dpm_clk_tbl = npu1_dpm_clk_table,
> + .fw_feature_tbl = npu1_fw_feature_table,
> .col_align = COL_ALIGN_NONE,
> .mbox_dev_addr = NPU1_MBOX_BAR_BASE,
> .mbox_size = 0, /* Use BAR size */
> diff --git a/drivers/accel/amdxdna/npu2_regs.c b/drivers/accel/amdxdna/npu2_regs.c
> index 67c2ae931c62..5fbfdcc3762d 100644
> --- a/drivers/accel/amdxdna/npu2_regs.c
> +++ b/drivers/accel/amdxdna/npu2_regs.c
> @@ -67,6 +67,7 @@ static const struct amdxdna_dev_priv npu2_dev_priv = {
> .protocol_minor = 0x6,
> .rt_config = npu4_default_rt_cfg,
> .dpm_clk_tbl = npu4_dpm_clk_table,
> + .fw_feature_tbl = npu4_fw_feature_table,
> .col_align = COL_ALIGN_NATURE,
> .mbox_dev_addr = NPU2_MBOX_BAR_BASE,
> .mbox_size = 0, /* Use BAR size */
> diff --git a/drivers/accel/amdxdna/npu4_regs.c b/drivers/accel/amdxdna/npu4_regs.c
> index fac6c1b0b74b..79aba12acfde 100644
> --- a/drivers/accel/amdxdna/npu4_regs.c
> +++ b/drivers/accel/amdxdna/npu4_regs.c
> @@ -83,12 +83,18 @@ const struct dpm_clk_freq npu4_dpm_clk_table[] = {
> { 0 }
> };
>
> +const struct aie2_fw_feature_tbl npu4_fw_feature_table[] = {
> + { .feature = AIE2_NPU_COMMAND, .min_minor = 15 },
> + { 0 }
> +};
> +
> static const struct amdxdna_dev_priv npu4_dev_priv = {
> .fw_path = "amdnpu/17f0_10/npu.sbin",
> .protocol_major = 0x6,
> .protocol_minor = 12,
> .rt_config = npu4_default_rt_cfg,
> .dpm_clk_tbl = npu4_dpm_clk_table,
> + .fw_feature_tbl = npu4_fw_feature_table,
> .col_align = COL_ALIGN_NATURE,
> .mbox_dev_addr = NPU4_MBOX_BAR_BASE,
> .mbox_size = 0, /* Use BAR size */
> diff --git a/drivers/accel/amdxdna/npu5_regs.c b/drivers/accel/amdxdna/npu5_regs.c
> index c91e1fa76ff5..c5e259ab9f49 100644
> --- a/drivers/accel/amdxdna/npu5_regs.c
> +++ b/drivers/accel/amdxdna/npu5_regs.c
> @@ -67,6 +67,7 @@ static const struct amdxdna_dev_priv npu5_dev_priv = {
> .protocol_minor = 12,
> .rt_config = npu4_default_rt_cfg,
> .dpm_clk_tbl = npu4_dpm_clk_table,
> + .fw_feature_tbl = npu4_fw_feature_table,
> .col_align = COL_ALIGN_NATURE,
> .mbox_dev_addr = NPU5_MBOX_BAR_BASE,
> .mbox_size = 0, /* Use BAR size */
> diff --git a/drivers/accel/amdxdna/npu6_regs.c b/drivers/accel/amdxdna/npu6_regs.c
> index 773f738915a7..2de63b44d6e7 100644
> --- a/drivers/accel/amdxdna/npu6_regs.c
> +++ b/drivers/accel/amdxdna/npu6_regs.c
> @@ -67,6 +67,7 @@ static const struct amdxdna_dev_priv npu6_dev_priv = {
> .protocol_minor = 12,
> .rt_config = npu4_default_rt_cfg,
> .dpm_clk_tbl = npu4_dpm_clk_table,
> + .fw_feature_tbl = npu4_fw_feature_table,
> .col_align = COL_ALIGN_NATURE,
> .mbox_dev_addr = NPU6_MBOX_BAR_BASE,
> .mbox_size = 0, /* Use BAR size */
^ permalink raw reply [flat|nested] 4+ messages in thread* Re: [PATCH V1] accel/amdxdna: Use MSG_OP_CHAIN_EXEC_NPU when supported
2025-10-31 5:10 ` Mario Limonciello (AMD) (kernel.org)
@ 2025-10-31 15:15 ` Lizhi Hou
2025-11-03 18:10 ` Lizhi Hou
0 siblings, 1 reply; 4+ messages in thread
From: Lizhi Hou @ 2025-10-31 15:15 UTC (permalink / raw)
To: Mario Limonciello (AMD) (kernel.org), ogabbay, quic_jhugo,
maciej.falkowski, dri-devel
Cc: linux-kernel, max.zhen, sonal.santan
On 10/30/25 22:10, Mario Limonciello (AMD) (kernel.org) wrote:
>
>
> On 10/30/2025 8:47 PM, Lizhi Hou wrote:
>> MSG_OP_CHAIN_EXEC_NPU is a unified mailbox message that replaces
>> MSG_OP_CHAIN_EXEC_BUFFER_CF and MSG_OP_CHAIN_EXEC_DPU.
>>
>> Add driver logic to check firmware version, and if MSG_OP_CHAIN_EXEC_NPU
>> is supported, uses it to submit firmware commands.
>>
>> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
>
> Two small nits below to me. Otherwise
>
> Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
Thanks a lot. And I will fix the nits when I merge.
Lizhi
>
>> ---
>> drivers/accel/amdxdna/aie2_message.c | 443 +++++++++++++++++---------
>> drivers/accel/amdxdna/aie2_msg_priv.h | 42 ++-
>> drivers/accel/amdxdna/aie2_pci.c | 13 +
>> drivers/accel/amdxdna/aie2_pci.h | 29 ++
>> drivers/accel/amdxdna/amdxdna_ctx.c | 6 +-
>> drivers/accel/amdxdna/amdxdna_ctx.h | 11 +-
>> drivers/accel/amdxdna/npu1_regs.c | 6 +
>> drivers/accel/amdxdna/npu2_regs.c | 1 +
>> drivers/accel/amdxdna/npu4_regs.c | 6 +
>> drivers/accel/amdxdna/npu5_regs.c | 1 +
>> drivers/accel/amdxdna/npu6_regs.c | 1 +
>> 11 files changed, 392 insertions(+), 167 deletions(-)
>>
>> diff --git a/drivers/accel/amdxdna/aie2_message.c
>> b/drivers/accel/amdxdna/aie2_message.c
>> index 3a4c845d783a..4751a8aff0f7 100644
>> --- a/drivers/accel/amdxdna/aie2_message.c
>> +++ b/drivers/accel/amdxdna/aie2_message.c
>> @@ -27,6 +27,8 @@
>> #define DECLARE_AIE2_MSG(name, op) \
>> DECLARE_XDNA_MSG_COMMON(name, op, MAX_AIE2_STATUS_CODE)
>> +#define EXEC_MSG_OPS(xdna) ((xdna)->dev_handle->exec_msg_ops)
>> +
>> static int aie2_send_mgmt_msg_wait(struct amdxdna_dev_hdl *ndev,
>> struct xdna_mailbox_msg *msg)
>> {
>> @@ -479,177 +481,291 @@ int aie2_config_cu(struct amdxdna_hwctx *hwctx,
>> return xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
>> }
>> -int aie2_execbuf(struct amdxdna_hwctx *hwctx, struct
>> amdxdna_sched_job *job,
>> - int (*notify_cb)(void *, void __iomem *, size_t))
>> +static int aie2_init_exec_cu_req(struct amdxdna_gem_obj *cmd_bo,
>> void *req,
>> + size_t *size, u32 *msg_op)
>> {
>> - struct mailbox_channel *chann = hwctx->priv->mbox_chann;
>> - struct amdxdna_dev *xdna = hwctx->client->xdna;
>> - struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
>> - union {
>> - struct execute_buffer_req ebuf;
>> - struct exec_dpu_req dpu;
>> - } req;
>> - struct xdna_mailbox_msg msg;
>> - u32 payload_len;
>> - void *payload;
>> - int cu_idx;
>> - int ret;
>> - u32 op;
>> + struct execute_buffer_req *cu_req = req;
>> + u32 cmd_len;
>> + void *cmd;
>> - if (!chann)
>> - return -ENODEV;
>> + cmd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
>> + if (cmd_len > sizeof(cu_req->payload))
>> + return -EINVAL;
>> - payload = amdxdna_cmd_get_payload(cmd_abo, &payload_len);
>> - if (!payload) {
>> - XDNA_ERR(xdna, "Invalid command, cannot get payload");
>> + cu_req->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
>> + if (cu_req->cu_idx == INVALID_CU_IDX)
>> return -EINVAL;
>> - }
>> - cu_idx = amdxdna_cmd_get_cu_idx(cmd_abo);
>> - if (cu_idx < 0) {
>> - XDNA_DBG(xdna, "Invalid cu idx");
>> + memcpy(cu_req->payload, cmd, cmd_len);
>> +
>> + *size = sizeof(*cu_req);
>> + *msg_op = MSG_OP_EXECUTE_BUFFER_CF;
>> + return 0;
>> +}
>> +
>> +static int aie2_init_exec_dpu_req(struct amdxdna_gem_obj *cmd_bo,
>> void *req,
>> + size_t *size, u32 *msg_op)
>> +{
>> + struct exec_dpu_req *dpu_req = req;
>> + struct amdxdna_cmd_start_npu *sn;
>> + u32 cmd_len;
>> +
>> + sn = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
>> + if (cmd_len - sizeof(*sn) > sizeof(dpu_req->payload))
>> return -EINVAL;
>> - }
>> - op = amdxdna_cmd_get_op(cmd_abo);
>> - switch (op) {
>> - case ERT_START_CU:
>> - if (unlikely(payload_len > sizeof(req.ebuf.payload)))
>> - XDNA_DBG(xdna, "Invalid ebuf payload len: %d",
>> payload_len);
>> - req.ebuf.cu_idx = cu_idx;
>> - memcpy(req.ebuf.payload, payload, sizeof(req.ebuf.payload));
>> - msg.send_size = sizeof(req.ebuf);
>> - msg.opcode = MSG_OP_EXECUTE_BUFFER_CF;
>> - break;
>> - case ERT_START_NPU: {
>> - struct amdxdna_cmd_start_npu *sn = payload;
>> -
>> - if (unlikely(payload_len - sizeof(*sn) >
>> sizeof(req.dpu.payload)))
>> - XDNA_DBG(xdna, "Invalid dpu payload len: %d", payload_len);
>> - req.dpu.inst_buf_addr = sn->buffer;
>> - req.dpu.inst_size = sn->buffer_size;
>> - req.dpu.inst_prop_cnt = sn->prop_count;
>> - req.dpu.cu_idx = cu_idx;
>> - memcpy(req.dpu.payload, sn->prop_args,
>> sizeof(req.dpu.payload));
>> - msg.send_size = sizeof(req.dpu);
>> - msg.opcode = MSG_OP_EXEC_DPU;
>> - break;
>> - }
>> - default:
>> - XDNA_DBG(xdna, "Invalid ERT cmd op code: %d", op);
>> + dpu_req->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
>> + if (dpu_req->cu_idx == INVALID_CU_IDX)
>> return -EINVAL;
>> - }
>> - msg.handle = job;
>> - msg.notify_cb = notify_cb;
>> - msg.send_data = (u8 *)&req;
>> - print_hex_dump_debug("cmd: ", DUMP_PREFIX_OFFSET, 16, 4, &req,
>> - 0x40, false);
>> - ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
>> - if (ret) {
>> - XDNA_ERR(xdna, "Send message failed");
>> - return ret;
>> - }
>> + dpu_req->inst_buf_addr = sn->buffer;
>> + dpu_req->inst_size = sn->buffer_size;
>> + dpu_req->inst_prop_cnt = sn->prop_count;
>> + memcpy(dpu_req->payload, sn->prop_args, cmd_len - sizeof(*sn));
>> + *size = sizeof(*dpu_req);
>> + *msg_op = MSG_OP_EXEC_DPU;
>> return 0;
>> }
>> +static void aie2_init_exec_chain_req(void *req, u64 slot_addr,
>> size_t size, u32 cmd_cnt)
>> +{
>> + struct cmd_chain_req *chain_req = req;
>> +
>> + chain_req->buf_addr = slot_addr;
>> + chain_req->buf_size = size;
>> + chain_req->count = cmd_cnt;
>> +}
>> +
>> +static void aie2_init_npu_chain_req(void *req, u64 slot_addr, size_t
>> size, u32 cmd_cnt)
>> +{
>> + struct cmd_chain_npu_req *npu_chain_req = req;
>> +
>> + npu_chain_req->flags = 0;
>> + npu_chain_req->reserved = 0;
>> + npu_chain_req->buf_addr = slot_addr;
>> + npu_chain_req->buf_size = size;
>> + npu_chain_req->count = cmd_cnt;
>> +}
>> +
>> static int
>> -aie2_cmdlist_fill_one_slot_cf(void *cmd_buf, u32 offset,
>> - struct amdxdna_gem_obj *abo, u32 *size)
>> +aie2_cmdlist_fill_cf(struct amdxdna_gem_obj *cmd_bo, void *slot,
>> size_t *size)
>> {
>> - struct cmd_chain_slot_execbuf_cf *buf = cmd_buf + offset;
>> - int cu_idx = amdxdna_cmd_get_cu_idx(abo);
>> - u32 payload_len;
>> - void *payload;
>> + struct cmd_chain_slot_execbuf_cf *cf_slot = slot;
>> + u32 cmd_len;
>> + void *cmd;
>> - if (cu_idx < 0)
>> + cmd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
>> + if (*size < sizeof(*cf_slot) + cmd_len)
>> return -EINVAL;
>> - payload = amdxdna_cmd_get_payload(abo, &payload_len);
>> - if (!payload)
>> + cf_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
>> + if (cf_slot->cu_idx == INVALID_CU_IDX)
>> return -EINVAL;
>> - if (!slot_has_space(*buf, offset, payload_len))
>> - return -ENOSPC;
>> -
>> - buf->cu_idx = cu_idx;
>> - buf->arg_cnt = payload_len / sizeof(u32);
>> - memcpy(buf->args, payload, payload_len);
>> - /* Accurate buf size to hint firmware to do necessary copy */
>> - *size = sizeof(*buf) + payload_len;
>> + cf_slot->arg_cnt = cmd_len / sizeof(u32);
>> + memcpy(cf_slot->args, cmd, cmd_len);
>> + /* Accurate slot size to hint firmware to do necessary copy */
>> + *size = sizeof(*cf_slot) + cmd_len;
>> return 0;
>> }
>> static int
>> -aie2_cmdlist_fill_one_slot_dpu(void *cmd_buf, u32 offset,
>> - struct amdxdna_gem_obj *abo, u32 *size)
>> +aie2_cmdlist_fill_dpu(struct amdxdna_gem_obj *cmd_bo, void *slot,
>> size_t *size)
>> {
>> - struct cmd_chain_slot_dpu *buf = cmd_buf + offset;
>> - int cu_idx = amdxdna_cmd_get_cu_idx(abo);
>> + struct cmd_chain_slot_dpu *dpu_slot = slot;
>> struct amdxdna_cmd_start_npu *sn;
>> - u32 payload_len;
>> - void *payload;
>> + u32 cmd_len;
>> u32 arg_sz;
>> - if (cu_idx < 0)
>> + sn = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
>> + arg_sz = cmd_len - sizeof(*sn);
>> + if (cmd_len < sizeof(*sn) || arg_sz > MAX_DPU_ARGS_SIZE)
>> return -EINVAL;
>> - payload = amdxdna_cmd_get_payload(abo, &payload_len);
>> - if (!payload)
>> + if (*size < sizeof(*dpu_slot) + arg_sz)
>> return -EINVAL;
>> - sn = payload;
>> - arg_sz = payload_len - sizeof(*sn);
>> - if (payload_len < sizeof(*sn) || arg_sz > MAX_DPU_ARGS_SIZE)
>> +
>> + dpu_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
>> + if (dpu_slot->cu_idx == INVALID_CU_IDX)
>> return -EINVAL;
>> - if (!slot_has_space(*buf, offset, arg_sz))
>> - return -ENOSPC;
>> + dpu_slot->inst_buf_addr = sn->buffer;
>> + dpu_slot->inst_size = sn->buffer_size;
>> + dpu_slot->inst_prop_cnt = sn->prop_count;
>> + dpu_slot->arg_cnt = arg_sz / sizeof(u32);
>> + memcpy(dpu_slot->args, sn->prop_args, arg_sz);
>> +
>> + /* Accurate slot size to hint firmware to do necessary copy */
>> + *size = sizeof(*dpu_slot) + arg_sz;
>> + return 0;
>> +}
>> +
>> +static u32 aie2_get_chain_msg_op(u32 cmd_op)
>> +{
>> + switch (cmd_op) {
>> + case ERT_START_CU:
>> + return MSG_OP_CHAIN_EXEC_BUFFER_CF;
>> + case ERT_START_NPU:
>> + return MSG_OP_CHAIN_EXEC_DPU;
>> + default:
>> + break;
>> + }
>> - buf->inst_buf_addr = sn->buffer;
>> - buf->inst_size = sn->buffer_size;
>> - buf->inst_prop_cnt = sn->prop_count;
>> - buf->cu_idx = cu_idx;
>> - buf->arg_cnt = arg_sz / sizeof(u32);
>> - memcpy(buf->args, sn->prop_args, arg_sz);
>> + return MSG_OP_MAX_OPCODE;
>> +}
>> - /* Accurate buf size to hint firmware to do necessary copy */
>> - *size = sizeof(*buf) + arg_sz;
>> +static struct aie2_exec_msg_ops legacy_exec_message_ops = {
>> + .init_cu_req = aie2_init_exec_cu_req,
>> + .init_dpu_req = aie2_init_exec_dpu_req,
>> + .init_chain_req = aie2_init_exec_chain_req,
>> + .fill_cf_slot = aie2_cmdlist_fill_cf,
>> + .fill_dpu_slot = aie2_cmdlist_fill_dpu,
>> + .get_chain_msg_op = aie2_get_chain_msg_op,
>> +};
>> +
>> +static int
>> +aie2_cmdlist_fill_npu_cf(struct amdxdna_gem_obj *cmd_bo, void *slot,
>> size_t *size)
>> +{
>> + struct cmd_chain_slot_npu *npu_slot = slot;
>> + u32 cmd_len;
>> + void *cmd;
>> +
>> + cmd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
>> + if (*size < sizeof(*npu_slot) + cmd_len)
>> + return -EINVAL;
>> +
>> + npu_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
>> + if (npu_slot->cu_idx == INVALID_CU_IDX)
>> + return -EINVAL;
>> +
>> + memset(npu_slot, 0, sizeof(*npu_slot));
>> + npu_slot->type = EXEC_NPU_TYPE_NON_ELF;
>> + npu_slot->arg_cnt = cmd_len / sizeof(u32);
>> + memcpy(npu_slot->args, cmd, cmd_len);
>> +
>> + *size = sizeof(*npu_slot) + cmd_len;
>> return 0;
>> }
>> static int
>> -aie2_cmdlist_fill_one_slot(u32 op, struct amdxdna_gem_obj
>> *cmdbuf_abo, u32 offset,
>> - struct amdxdna_gem_obj *abo, u32 *size)
>> +aie2_cmdlist_fill_npu_dpu(struct amdxdna_gem_obj *cmd_bo, void
>> *slot, size_t *size)
>> +{
>> + struct cmd_chain_slot_npu *npu_slot = slot;
>> + struct amdxdna_cmd_start_npu *sn;
>> + u32 cmd_len;
>> + u32 arg_sz;
>> +
>> + sn = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
>> + arg_sz = cmd_len - sizeof(*sn);
>> + if (cmd_len < sizeof(*sn) || arg_sz > MAX_NPU_ARGS_SIZE)
>> + return -EINVAL;
>> +
>> + if (*size < sizeof(*npu_slot) + arg_sz)
>> + return -EINVAL;
>> +
>> + npu_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
>> + if (npu_slot->cu_idx == INVALID_CU_IDX)
>> + return -EINVAL;
>> +
>> + memset(npu_slot, 0, sizeof(*npu_slot));
>> + npu_slot->type = EXEC_NPU_TYPE_PARTIAL_ELF;
>> + npu_slot->inst_buf_addr = sn->buffer;
>> + npu_slot->inst_size = sn->buffer_size;
>> + npu_slot->inst_prop_cnt = sn->prop_count;
>> + npu_slot->arg_cnt = arg_sz / sizeof(u32);
>> + memcpy(npu_slot->args, sn->prop_args, arg_sz);
>> +
>> + *size = sizeof(*npu_slot) + arg_sz;
>> + return 0;
>> +}
>> +
>> +static u32 aie2_get_npu_chain_msg_op(u32 cmd_op)
>> +{
>> + return MSG_OP_CHAIN_EXEC_NPU;
>> +}
>> +
>> +static struct aie2_exec_msg_ops npu_exec_message_ops = {
>> + .init_cu_req = aie2_init_exec_cu_req,
>> + .init_dpu_req = aie2_init_exec_dpu_req,
>> + .init_chain_req = aie2_init_npu_chain_req,
>> + .fill_cf_slot = aie2_cmdlist_fill_npu_cf,
>> + .fill_dpu_slot = aie2_cmdlist_fill_npu_dpu,
>> + .get_chain_msg_op = aie2_get_npu_chain_msg_op,
>> +};
>> +
>> +static int aie2_init_exec_req(void *req, struct amdxdna_gem_obj
>> *cmd_abo,
>> + size_t *size, u32 *msg_op)
>> {
>> - u32 this_op = amdxdna_cmd_get_op(abo);
>> - void *cmd_buf = cmdbuf_abo->mem.kva;
>> + struct amdxdna_dev *xdna = cmd_abo->client->xdna;
>> int ret;
>> + u32 op;
>> - if (this_op != op) {
>> - ret = -EINVAL;
>> - goto done;
>> - }
>> + op = amdxdna_cmd_get_op(cmd_abo);
>> switch (op) {
>> case ERT_START_CU:
>> - ret = aie2_cmdlist_fill_one_slot_cf(cmd_buf, offset, abo,
>> size);
>> + ret = EXEC_MSG_OPS(xdna)->init_cu_req(cmd_abo, req, size,
>> msg_op);
>> + if (ret) {
>> + XDNA_DBG(xdna, "Init CU req failed ret %d", ret);
>> + return ret;
>> + }
>> break;
>> case ERT_START_NPU:
>> - ret = aie2_cmdlist_fill_one_slot_dpu(cmd_buf, offset, abo,
>> size);
>> + ret = EXEC_MSG_OPS(xdna)->init_dpu_req(cmd_abo, req, size,
>> msg_op);
>> + if (ret) {
>> + XDNA_DBG(xdna, "Init DPU req failed ret %d", ret);
>> + return ret;
>> + }
>> +
>> break;
>> default:
>> + XDNA_INFO(xdna, "Unsupported op %d", op);
>
> Shouldn't this be XDNA_ERR()?
>
>> ret = -EOPNOTSUPP;
>> + break;
>> }
>> -done:
>> - if (ret) {
>> - XDNA_ERR(abo->client->xdna, "Can't fill slot for cmd op %d
>> ret %d",
>> - op, ret);
>> + return ret;
>> +}
>> +
>> +static int
>> +aie2_cmdlist_fill_slot(void *slot, struct amdxdna_gem_obj *cmd_abo,
>> + size_t *size, u32 *cmd_op)
>> +{
>> + struct amdxdna_dev *xdna = cmd_abo->client->xdna;
>> + int ret;
>> + u32 op;
>> +
>> + op = amdxdna_cmd_get_op(cmd_abo);
>> + if (*cmd_op == ERT_INVALID_CMD)
>> + *cmd_op = op;
>> + else if (op != *cmd_op)
>> + return -EINVAL;
>> +
>> + switch (op) {
>> + case ERT_START_CU:
>> + ret = EXEC_MSG_OPS(xdna)->fill_cf_slot(cmd_abo, slot, size);
>> + break;
>> + case ERT_START_NPU:
>> + ret = EXEC_MSG_OPS(xdna)->fill_dpu_slot(cmd_abo, slot, size);
>> + break;
>> + default:
>> + XDNA_INFO(xdna, "Unsupported op %d", op);
>> + ret = -EOPNOTSUPP;
>> + break;
>> }
>> +
>> return ret;
>> }
>> +void aie2_msg_init(struct amdxdna_dev_hdl *ndev)
>> +{
>> + if (AIE2_FEATURE_ON(ndev, AIE2_NPU_COMMAND))
>> + ndev->exec_msg_ops = &npu_exec_message_ops;
>> + else
>> + ndev->exec_msg_ops = &legacy_exec_message_ops;
>> +}
>> +
>> static inline struct amdxdna_gem_obj *
>> aie2_cmdlist_get_cmd_buf(struct amdxdna_sched_job *job)
>> {
>> @@ -658,29 +774,36 @@ aie2_cmdlist_get_cmd_buf(struct
>> amdxdna_sched_job *job)
>> return job->hwctx->priv->cmd_buf[idx];
>> }
>> -static void
>> -aie2_cmdlist_prepare_request(struct cmd_chain_req *req,
>> - struct amdxdna_gem_obj *cmdbuf_abo, u32 size, u32 cnt)
>> +int aie2_execbuf(struct amdxdna_hwctx *hwctx, struct
>> amdxdna_sched_job *job,
>> + int (*notify_cb)(void *, void __iomem *, size_t))
>> {
>> - req->buf_addr = cmdbuf_abo->mem.dev_addr;
>> - req->buf_size = size;
>> - req->count = cnt;
>> - drm_clflush_virt_range(cmdbuf_abo->mem.kva, size);
>> - XDNA_DBG(cmdbuf_abo->client->xdna, "Command buf addr 0x%llx size
>> 0x%x count %d",
>> - req->buf_addr, size, cnt);
>> -}
>> + struct mailbox_channel *chann = hwctx->priv->mbox_chann;
>> + struct amdxdna_dev *xdna = hwctx->client->xdna;
>> + struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
>> + struct xdna_mailbox_msg msg;
>> + union exec_req req;
>> + int ret;
>> -static inline u32
>> -aie2_cmd_op_to_msg_op(u32 op)
>> -{
>> - switch (op) {
>> - case ERT_START_CU:
>> - return MSG_OP_CHAIN_EXEC_BUFFER_CF;
>> - case ERT_START_NPU:
>> - return MSG_OP_CHAIN_EXEC_DPU;
>> - default:
>> - return MSG_OP_MAX_OPCODE;
>> + if (!chann)
>> + return -ENODEV;
>> +
>> + ret = aie2_init_exec_req(&req, cmd_abo, &msg.send_size,
>> &msg.opcode);
>> + if (ret)
>> + return ret;
>> +
>> + msg.handle = job;
>> + msg.notify_cb = notify_cb;
>> + msg.send_data = (u8 *)&req;
>> + print_hex_dump_debug("cmd: ", DUMP_PREFIX_OFFSET, 16, 4, &req,
>> + 0x40, false);
>> +
>> + ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
>> + if (ret) {
>> + XDNA_ERR(xdna, "Send message failed");
>> + return ret;
>> }
>> +
>> + return 0;
>> }
>> int aie2_cmdlist_multi_execbuf(struct amdxdna_hwctx *hwctx,
>> @@ -691,12 +814,13 @@ int aie2_cmdlist_multi_execbuf(struct
>> amdxdna_hwctx *hwctx,
>> struct mailbox_channel *chann = hwctx->priv->mbox_chann;
>> struct amdxdna_client *client = hwctx->client;
>> struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
>> + struct amdxdna_dev *xdna = client->xdna;
>> struct amdxdna_cmd_chain *payload;
>> struct xdna_mailbox_msg msg;
>> - struct cmd_chain_req req;
>> + union exec_chain_req req;
>> u32 payload_len;
>> u32 offset = 0;
>> - u32 size;
>> + size_t size;
>> int ret;
>> u32 op;
>> u32 i;
>> @@ -707,41 +831,42 @@ int aie2_cmdlist_multi_execbuf(struct
>> amdxdna_hwctx *hwctx,
>> payload_len < struct_size(payload, data,
>> payload->command_count))
>> return -EINVAL;
>> + op = ERT_INVALID_CMD;
>> for (i = 0; i < payload->command_count; i++) {
>> u32 boh = (u32)(payload->data[i]);
>> struct amdxdna_gem_obj *abo;
>> abo = amdxdna_gem_get_obj(client, boh, AMDXDNA_BO_CMD);
>> if (!abo) {
>> - XDNA_ERR(client->xdna, "Failed to find cmd BO %d", boh);
>> + XDNA_ERR(xdna, "Failed to find cmd BO %d", boh);
>> return -ENOENT;
>> }
>> - /* All sub-cmd should have same op, use the first one. */
>> - if (i == 0)
>> - op = amdxdna_cmd_get_op(abo);
>> -
>> - ret = aie2_cmdlist_fill_one_slot(op, cmdbuf_abo, offset,
>> abo, &size);
>> + size = cmdbuf_abo->mem.size - offset;
>> + ret = aie2_cmdlist_fill_slot(cmdbuf_abo->mem.kva + offset,
>> + abo, &size, &op);
>> amdxdna_gem_put_obj(abo);
>> if (ret)
>> - return -EINVAL;
>> + return ret;
>> offset += size;
>> }
>> + msg.opcode = EXEC_MSG_OPS(xdna)->get_chain_msg_op(op);
>> + if (msg.opcode == MSG_OP_MAX_OPCODE)
>> + return -EOPNOTSUPP;
>> /* The offset is the accumulated total size of the cmd buffer */
>> - aie2_cmdlist_prepare_request(&req, cmdbuf_abo, offset,
>> payload->command_count);
>> + EXEC_MSG_OPS(xdna)->init_chain_req(&req, cmdbuf_abo->mem.dev_addr,
>> + offset, payload->command_count);
>> + drm_clflush_virt_range(cmdbuf_abo->mem.kva, offset);
>> - msg.opcode = aie2_cmd_op_to_msg_op(op);
>> - if (msg.opcode == MSG_OP_MAX_OPCODE)
>> - return -EOPNOTSUPP;
>> msg.handle = job;
>> msg.notify_cb = notify_cb;
>> msg.send_data = (u8 *)&req;
>> msg.send_size = sizeof(req);
>> ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
>> if (ret) {
>> - XDNA_ERR(hwctx->client->xdna, "Send message failed");
>> + XDNA_ERR(xdna, "Send message failed");
>> return ret;
>> }
>> @@ -754,23 +879,27 @@ int aie2_cmdlist_single_execbuf(struct
>> amdxdna_hwctx *hwctx,
>> {
>> struct amdxdna_gem_obj *cmdbuf_abo =
>> aie2_cmdlist_get_cmd_buf(job);
>> struct mailbox_channel *chann = hwctx->priv->mbox_chann;
>> + struct amdxdna_dev *xdna = hwctx->client->xdna;
>> struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
>> struct xdna_mailbox_msg msg;
>> - struct cmd_chain_req req;
>> - u32 size;
>> + union exec_chain_req req;
>> + u32 op = ERT_INVALID_CMD;
>> + size_t size;
>> int ret;
>> - u32 op;
>> - op = amdxdna_cmd_get_op(cmd_abo);
>> - ret = aie2_cmdlist_fill_one_slot(op, cmdbuf_abo, 0, cmd_abo,
>> &size);
>> + size = cmdbuf_abo->mem.size;
>> + ret = aie2_cmdlist_fill_slot(cmdbuf_abo->mem.kva, cmd_abo,
>> &size, &op);
>> if (ret)
>> return ret;
>> - aie2_cmdlist_prepare_request(&req, cmdbuf_abo, size, 1);
>> -
>> - msg.opcode = aie2_cmd_op_to_msg_op(op);
>> + msg.opcode = EXEC_MSG_OPS(xdna)->get_chain_msg_op(op);
>> if (msg.opcode == MSG_OP_MAX_OPCODE)
>> return -EOPNOTSUPP;
>> +
>> + EXEC_MSG_OPS(xdna)->init_chain_req(&req, cmdbuf_abo->mem.dev_addr,
>> + size, 1);
>> + drm_clflush_virt_range(cmdbuf_abo->mem.kva, size);
>> +
>> msg.handle = job;
>> msg.notify_cb = notify_cb;
>> msg.send_data = (u8 *)&req;
>> diff --git a/drivers/accel/amdxdna/aie2_msg_priv.h
>> b/drivers/accel/amdxdna/aie2_msg_priv.h
>> index 2dbea1d09980..947daa63f064 100644
>> --- a/drivers/accel/amdxdna/aie2_msg_priv.h
>> +++ b/drivers/accel/amdxdna/aie2_msg_priv.h
>> @@ -20,6 +20,7 @@ enum aie2_msg_opcode {
>> MSG_OP_CHAIN_EXEC_BUFFER_CF = 0x12,
>> MSG_OP_CHAIN_EXEC_DPU = 0x13,
>> MSG_OP_CONFIG_DEBUG_BO = 0x14,
>> + MSG_OP_CHAIN_EXEC_NPU = 0x18,
>> MSG_OP_MAX_XRT_OPCODE,
>> MSG_OP_SUSPEND = 0x101,
>> MSG_OP_RESUME = 0x102,
>> @@ -172,6 +173,16 @@ struct exec_dpu_req {
>> __u32 payload[35];
>> } __packed;
>> +enum exec_npu_type {
>> + EXEC_NPU_TYPE_NON_ELF = 0x1,
>> + EXEC_NPU_TYPE_PARTIAL_ELF = 0x2,
>> +};
>> +
>> +union exec_req {
>> + struct execute_buffer_req ebuf;
>> + struct exec_dpu_req dpu_req;
>> +};
>> +
>> struct execute_buffer_resp {
>> enum aie2_msg_status status;
>> } __packed;
>> @@ -343,9 +354,6 @@ struct async_event_msg_resp {
>> } __packed;
>> #define MAX_CHAIN_CMDBUF_SIZE SZ_4K
>> -#define slot_has_space(slot, offset, payload_size) \
>> - (MAX_CHAIN_CMDBUF_SIZE >= (offset) + (payload_size) + \
>> - sizeof(typeof(slot)))
>> struct cmd_chain_slot_execbuf_cf {
>> __u32 cu_idx;
>> @@ -363,12 +371,40 @@ struct cmd_chain_slot_dpu {
>> __u32 args[] __counted_by(arg_cnt);
>> };
>> +#define MAX_NPU_ARGS_SIZE (26 * sizeof(__u32))
>> +struct cmd_chain_slot_npu {
>> + enum exec_npu_type type;
>> + u64 inst_buf_addr;
>> + u64 save_buf_addr;
>> + u64 restore_buf_addr;
>> + u32 inst_size;
>> + u32 save_size;
>> + u32 restore_size;
>> + u32 inst_prop_cnt;
>> + u32 cu_idx;
>> + u32 arg_cnt;
>> + u32 args[] __counted_by(arg_cnt);
>> +} __packed;
>> +
>> struct cmd_chain_req {
>> __u64 buf_addr;
>> __u32 buf_size;
>> __u32 count;
>> } __packed;
>> +struct cmd_chain_npu_req {
>> + u32 flags;
>> + u32 reserved;
>> + u64 buf_addr;
>> + u32 buf_size;
>> + u32 count;
>> +} __packed;
>> +
>> +union exec_chain_req {
>> + struct cmd_chain_npu_req npu_req;
>> + struct cmd_chain_req req;
>> +};
>> +
>> struct cmd_chain_resp {
>> enum aie2_msg_status status;
>> __u32 fail_cmd_idx;
>> diff --git a/drivers/accel/amdxdna/aie2_pci.c
>> b/drivers/accel/amdxdna/aie2_pci.c
>> index 80313a2a98d4..d7ccbdaf47f5 100644
>> --- a/drivers/accel/amdxdna/aie2_pci.c
>> +++ b/drivers/accel/amdxdna/aie2_pci.c
>> @@ -55,6 +55,7 @@ struct mgmt_mbox_chann_info {
>> static int aie2_check_protocol(struct amdxdna_dev_hdl *ndev, u32
>> fw_major, u32 fw_minor)
>> {
>> + const struct aie2_fw_feature_tbl *feature;
>> struct amdxdna_dev *xdna = ndev->xdna;
>> /*
>> @@ -78,6 +79,17 @@ static int aie2_check_protocol(struct
>> amdxdna_dev_hdl *ndev, u32 fw_major, u32 f
>> XDNA_ERR(xdna, "Firmware minor version smaller than
>> supported");
>> return -EINVAL;
>> }
>> +
>> + for (feature = ndev->priv->fw_feature_tbl; feature &&
>> feature->min_minor;
>> + feature++) {
>> + if (fw_minor < feature->min_minor)
>> + continue;
>> + if (feature->max_minor > 0 && fw_minor > feature->max_minor)
>> + continue;
>> +
>> + set_bit(feature->feature, &ndev->feature_mask);
>> + }
>> +
>> return 0;
>> }
>> @@ -587,6 +599,7 @@ static int aie2_init(struct amdxdna_dev *xdna)
>> }
>> release_firmware(fw);
>> + aie2_msg_init(ndev);
>> amdxdna_pm_init(xdna);
>> return 0;
>> diff --git a/drivers/accel/amdxdna/aie2_pci.h
>> b/drivers/accel/amdxdna/aie2_pci.h
>> index cfe42b0d4242..d0a3cb1fe8be 100644
>> --- a/drivers/accel/amdxdna/aie2_pci.h
>> +++ b/drivers/accel/amdxdna/aie2_pci.h
>> @@ -156,6 +156,17 @@ enum aie2_dev_status {
>> AIE2_DEV_START,
>> };
>> +struct aie2_exec_msg_ops {
>> + int (*init_cu_req)(struct amdxdna_gem_obj *cmd_bo, void *req,
>> + size_t *size, u32 *msg_op);
>> + int (*init_dpu_req)(struct amdxdna_gem_obj *cmd_bo, void *req,
>> + size_t *size, u32 *msg_op);
>> + void (*init_chain_req)(void *req, u64 slot_addr, size_t size,
>> u32 cmd_cnt);
>> + int (*fill_cf_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot,
>> size_t *size);
>> + int (*fill_dpu_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot,
>> size_t *size);
>> + u32 (*get_chain_msg_op)(u32 cmd_op);
>> +};
>> +
>> struct amdxdna_dev_hdl {
>> struct amdxdna_dev *xdna;
>> const struct amdxdna_dev_priv *priv;
>> @@ -173,6 +184,8 @@ struct amdxdna_dev_hdl {
>> u32 total_col;
>> struct aie_version version;
>> struct aie_metadata metadata;
>> + unsigned long feature_mask;
>> + struct aie2_exec_msg_ops *exec_msg_ops;
>> /* power management and clock*/
>> enum amdxdna_power_mode_type pw_mode;
>> @@ -208,12 +221,26 @@ struct aie2_hw_ops {
>> int (*set_dpm)(struct amdxdna_dev_hdl *ndev, u32 dpm_level);
>> };
>> +enum aie2_fw_feature {
>> + AIE2_NPU_COMMAND,
>> + AIE2_FEATURE_MAX
>> +};
>> +
>> +struct aie2_fw_feature_tbl {
>> + enum aie2_fw_feature feature;
>> + u32 max_minor;
>> + u32 min_minor;
>> +};
>> +
>> +#define AIE2_FEATURE_ON(ndev, feature) test_bit(feature,
>> &(ndev)->feature_mask)
>> +
>> struct amdxdna_dev_priv {
>> const char *fw_path;
>> u64 protocol_major;
>> u64 protocol_minor;
>> const struct rt_config *rt_config;
>> const struct dpm_clk_freq *dpm_clk_tbl;
>> + const struct aie2_fw_feature_tbl *fw_feature_tbl;
>> #define COL_ALIGN_NONE 0
>> #define COL_ALIGN_NATURE 1
>> @@ -239,6 +266,7 @@ extern const struct dpm_clk_freq
>> npu1_dpm_clk_table[];
>> extern const struct dpm_clk_freq npu4_dpm_clk_table[];
>> extern const struct rt_config npu1_default_rt_cfg[];
>> extern const struct rt_config npu4_default_rt_cfg[];
>> +extern const struct aie2_fw_feature_tbl npu4_fw_feature_table[];
>> /* aie2_smu.c */
>> int aie2_smu_init(struct amdxdna_dev_hdl *ndev);
>> @@ -263,6 +291,7 @@ int aie2_get_array_async_error(struct
>> amdxdna_dev_hdl *ndev,
>> struct amdxdna_drm_get_array *args);
>> /* aie2_message.c */
>> +void aie2_msg_init(struct amdxdna_dev_hdl *ndev);
>> int aie2_suspend_fw(struct amdxdna_dev_hdl *ndev);
>> int aie2_resume_fw(struct amdxdna_dev_hdl *ndev);
>> int aie2_set_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type,
>> u64 value);
>> diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c
>> b/drivers/accel/amdxdna/amdxdna_ctx.c
>> index d18182c59668..878cc955f56d 100644
>> --- a/drivers/accel/amdxdna/amdxdna_ctx.c
>> +++ b/drivers/accel/amdxdna/amdxdna_ctx.c
>> @@ -113,14 +113,14 @@ void *amdxdna_cmd_get_payload(struct
>> amdxdna_gem_obj *abo, u32 *size)
>> return &cmd->data[num_masks];
>> }
>> -int amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo)
>> +u32 amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo)
>> {
>> struct amdxdna_cmd *cmd = abo->mem.kva;
>> u32 num_masks, i;
>> u32 *cu_mask;
>> if (amdxdna_cmd_get_op(abo) == ERT_CMD_CHAIN)
>> - return -1;
>> + return INVALID_CU_IDX;
>> num_masks = 1 + FIELD_GET(AMDXDNA_CMD_EXTRA_CU_MASK,
>> cmd->header);
>> cu_mask = cmd->data;
>> @@ -129,7 +129,7 @@ int amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj
>> *abo)
>> return ffs(cu_mask[i]) - 1;
>> }
>> - return -1;
>> + return INVALID_CU_IDX;
>> }
>> /*
>> diff --git a/drivers/accel/amdxdna/amdxdna_ctx.h
>> b/drivers/accel/amdxdna/amdxdna_ctx.h
>> index 919c654dfea6..1aa2b938e07b 100644
>> --- a/drivers/accel/amdxdna/amdxdna_ctx.h
>> +++ b/drivers/accel/amdxdna/amdxdna_ctx.h
>> @@ -13,9 +13,10 @@
>> struct amdxdna_hwctx_priv;
>> enum ert_cmd_opcode {
>> - ERT_START_CU = 0,
>> - ERT_CMD_CHAIN = 19,
>> - ERT_START_NPU = 20,
>> + ERT_INVALID_CMD = ~0U,
> ~0U > 20, shouldn't this be at the end of the enum?> + ERT_START_CU = 0,
>> + ERT_CMD_CHAIN = 19,
>> + ERT_START_NPU = 20,
>> };
>> enum ert_cmd_state {
>> @@ -64,6 +65,8 @@ struct amdxdna_cmd {
>> u32 data[];
>> };
>> +#define INVALID_CU_IDX (~0U)
>> +
>> struct amdxdna_hwctx {
>> struct amdxdna_client *client;
>> struct amdxdna_hwctx_priv *priv;
>> @@ -150,7 +153,7 @@ amdxdna_cmd_get_state(struct amdxdna_gem_obj *abo)
>> }
>> void *amdxdna_cmd_get_payload(struct amdxdna_gem_obj *abo, u32
>> *size);
>> -int amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo);
>> +u32 amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo);
>> void amdxdna_sched_job_cleanup(struct amdxdna_sched_job *job);
>> void amdxdna_hwctx_remove_all(struct amdxdna_client *client);
>> diff --git a/drivers/accel/amdxdna/npu1_regs.c
>> b/drivers/accel/amdxdna/npu1_regs.c
>> index 23feb5f6fad3..ffc2e7c7b523 100644
>> --- a/drivers/accel/amdxdna/npu1_regs.c
>> +++ b/drivers/accel/amdxdna/npu1_regs.c
>> @@ -63,12 +63,18 @@ const struct dpm_clk_freq npu1_dpm_clk_table[] = {
>> { 0 }
>> };
>> +static const struct aie2_fw_feature_tbl npu1_fw_feature_table[] = {
>> + { .feature = AIE2_NPU_COMMAND, .min_minor = 8 },
>> + { 0 }
>> +};
>> +
>> static const struct amdxdna_dev_priv npu1_dev_priv = {
>> .fw_path = "amdnpu/1502_00/npu.sbin",
>> .protocol_major = 0x5,
>> .protocol_minor = 0x7,
>> .rt_config = npu1_default_rt_cfg,
>> .dpm_clk_tbl = npu1_dpm_clk_table,
>> + .fw_feature_tbl = npu1_fw_feature_table,
>> .col_align = COL_ALIGN_NONE,
>> .mbox_dev_addr = NPU1_MBOX_BAR_BASE,
>> .mbox_size = 0, /* Use BAR size */
>> diff --git a/drivers/accel/amdxdna/npu2_regs.c
>> b/drivers/accel/amdxdna/npu2_regs.c
>> index 67c2ae931c62..5fbfdcc3762d 100644
>> --- a/drivers/accel/amdxdna/npu2_regs.c
>> +++ b/drivers/accel/amdxdna/npu2_regs.c
>> @@ -67,6 +67,7 @@ static const struct amdxdna_dev_priv npu2_dev_priv = {
>> .protocol_minor = 0x6,
>> .rt_config = npu4_default_rt_cfg,
>> .dpm_clk_tbl = npu4_dpm_clk_table,
>> + .fw_feature_tbl = npu4_fw_feature_table,
>> .col_align = COL_ALIGN_NATURE,
>> .mbox_dev_addr = NPU2_MBOX_BAR_BASE,
>> .mbox_size = 0, /* Use BAR size */
>> diff --git a/drivers/accel/amdxdna/npu4_regs.c
>> b/drivers/accel/amdxdna/npu4_regs.c
>> index fac6c1b0b74b..79aba12acfde 100644
>> --- a/drivers/accel/amdxdna/npu4_regs.c
>> +++ b/drivers/accel/amdxdna/npu4_regs.c
>> @@ -83,12 +83,18 @@ const struct dpm_clk_freq npu4_dpm_clk_table[] = {
>> { 0 }
>> };
>> +const struct aie2_fw_feature_tbl npu4_fw_feature_table[] = {
>> + { .feature = AIE2_NPU_COMMAND, .min_minor = 15 },
>> + { 0 }
>> +};
>> +
>> static const struct amdxdna_dev_priv npu4_dev_priv = {
>> .fw_path = "amdnpu/17f0_10/npu.sbin",
>> .protocol_major = 0x6,
>> .protocol_minor = 12,
>> .rt_config = npu4_default_rt_cfg,
>> .dpm_clk_tbl = npu4_dpm_clk_table,
>> + .fw_feature_tbl = npu4_fw_feature_table,
>> .col_align = COL_ALIGN_NATURE,
>> .mbox_dev_addr = NPU4_MBOX_BAR_BASE,
>> .mbox_size = 0, /* Use BAR size */
>> diff --git a/drivers/accel/amdxdna/npu5_regs.c
>> b/drivers/accel/amdxdna/npu5_regs.c
>> index c91e1fa76ff5..c5e259ab9f49 100644
>> --- a/drivers/accel/amdxdna/npu5_regs.c
>> +++ b/drivers/accel/amdxdna/npu5_regs.c
>> @@ -67,6 +67,7 @@ static const struct amdxdna_dev_priv npu5_dev_priv = {
>> .protocol_minor = 12,
>> .rt_config = npu4_default_rt_cfg,
>> .dpm_clk_tbl = npu4_dpm_clk_table,
>> + .fw_feature_tbl = npu4_fw_feature_table,
>> .col_align = COL_ALIGN_NATURE,
>> .mbox_dev_addr = NPU5_MBOX_BAR_BASE,
>> .mbox_size = 0, /* Use BAR size */
>> diff --git a/drivers/accel/amdxdna/npu6_regs.c
>> b/drivers/accel/amdxdna/npu6_regs.c
>> index 773f738915a7..2de63b44d6e7 100644
>> --- a/drivers/accel/amdxdna/npu6_regs.c
>> +++ b/drivers/accel/amdxdna/npu6_regs.c
>> @@ -67,6 +67,7 @@ static const struct amdxdna_dev_priv npu6_dev_priv = {
>> .protocol_minor = 12,
>> .rt_config = npu4_default_rt_cfg,
>> .dpm_clk_tbl = npu4_dpm_clk_table,
>> + .fw_feature_tbl = npu4_fw_feature_table,
>> .col_align = COL_ALIGN_NATURE,
>> .mbox_dev_addr = NPU6_MBOX_BAR_BASE,
>> .mbox_size = 0, /* Use BAR size */
>
^ permalink raw reply [flat|nested] 4+ messages in thread* Re: [PATCH V1] accel/amdxdna: Use MSG_OP_CHAIN_EXEC_NPU when supported
2025-10-31 15:15 ` Lizhi Hou
@ 2025-11-03 18:10 ` Lizhi Hou
0 siblings, 0 replies; 4+ messages in thread
From: Lizhi Hou @ 2025-11-03 18:10 UTC (permalink / raw)
To: Mario Limonciello (AMD) (kernel.org), ogabbay, quic_jhugo,
maciej.falkowski, dri-devel
Cc: linux-kernel, max.zhen, sonal.santan
Applied to drm-misc-next.
On 10/31/25 08:15, Lizhi Hou wrote:
>
> On 10/30/25 22:10, Mario Limonciello (AMD) (kernel.org) wrote:
>>
>>
>> On 10/30/2025 8:47 PM, Lizhi Hou wrote:
>>> MSG_OP_CHAIN_EXEC_NPU is a unified mailbox message that replaces
>>> MSG_OP_CHAIN_EXEC_BUFFER_CF and MSG_OP_CHAIN_EXEC_DPU.
>>>
>>> Add driver logic to check firmware version, and if
>>> MSG_OP_CHAIN_EXEC_NPU
>>> is supported, uses it to submit firmware commands.
>>>
>>> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
>>
>> Two small nits below to me. Otherwise
>>
>> Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
>
> Thanks a lot. And I will fix the nits when I merge.
>
>
> Lizhi
>
>>
>>> ---
>>> drivers/accel/amdxdna/aie2_message.c | 443
>>> +++++++++++++++++---------
>>> drivers/accel/amdxdna/aie2_msg_priv.h | 42 ++-
>>> drivers/accel/amdxdna/aie2_pci.c | 13 +
>>> drivers/accel/amdxdna/aie2_pci.h | 29 ++
>>> drivers/accel/amdxdna/amdxdna_ctx.c | 6 +-
>>> drivers/accel/amdxdna/amdxdna_ctx.h | 11 +-
>>> drivers/accel/amdxdna/npu1_regs.c | 6 +
>>> drivers/accel/amdxdna/npu2_regs.c | 1 +
>>> drivers/accel/amdxdna/npu4_regs.c | 6 +
>>> drivers/accel/amdxdna/npu5_regs.c | 1 +
>>> drivers/accel/amdxdna/npu6_regs.c | 1 +
>>> 11 files changed, 392 insertions(+), 167 deletions(-)
>>>
>>> diff --git a/drivers/accel/amdxdna/aie2_message.c
>>> b/drivers/accel/amdxdna/aie2_message.c
>>> index 3a4c845d783a..4751a8aff0f7 100644
>>> --- a/drivers/accel/amdxdna/aie2_message.c
>>> +++ b/drivers/accel/amdxdna/aie2_message.c
>>> @@ -27,6 +27,8 @@
>>> #define DECLARE_AIE2_MSG(name, op) \
>>> DECLARE_XDNA_MSG_COMMON(name, op, MAX_AIE2_STATUS_CODE)
>>> +#define EXEC_MSG_OPS(xdna) ((xdna)->dev_handle->exec_msg_ops)
>>> +
>>> static int aie2_send_mgmt_msg_wait(struct amdxdna_dev_hdl *ndev,
>>> struct xdna_mailbox_msg *msg)
>>> {
>>> @@ -479,177 +481,291 @@ int aie2_config_cu(struct amdxdna_hwctx *hwctx,
>>> return xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
>>> }
>>> -int aie2_execbuf(struct amdxdna_hwctx *hwctx, struct
>>> amdxdna_sched_job *job,
>>> - int (*notify_cb)(void *, void __iomem *, size_t))
>>> +static int aie2_init_exec_cu_req(struct amdxdna_gem_obj *cmd_bo,
>>> void *req,
>>> + size_t *size, u32 *msg_op)
>>> {
>>> - struct mailbox_channel *chann = hwctx->priv->mbox_chann;
>>> - struct amdxdna_dev *xdna = hwctx->client->xdna;
>>> - struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
>>> - union {
>>> - struct execute_buffer_req ebuf;
>>> - struct exec_dpu_req dpu;
>>> - } req;
>>> - struct xdna_mailbox_msg msg;
>>> - u32 payload_len;
>>> - void *payload;
>>> - int cu_idx;
>>> - int ret;
>>> - u32 op;
>>> + struct execute_buffer_req *cu_req = req;
>>> + u32 cmd_len;
>>> + void *cmd;
>>> - if (!chann)
>>> - return -ENODEV;
>>> + cmd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
>>> + if (cmd_len > sizeof(cu_req->payload))
>>> + return -EINVAL;
>>> - payload = amdxdna_cmd_get_payload(cmd_abo, &payload_len);
>>> - if (!payload) {
>>> - XDNA_ERR(xdna, "Invalid command, cannot get payload");
>>> + cu_req->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
>>> + if (cu_req->cu_idx == INVALID_CU_IDX)
>>> return -EINVAL;
>>> - }
>>> - cu_idx = amdxdna_cmd_get_cu_idx(cmd_abo);
>>> - if (cu_idx < 0) {
>>> - XDNA_DBG(xdna, "Invalid cu idx");
>>> + memcpy(cu_req->payload, cmd, cmd_len);
>>> +
>>> + *size = sizeof(*cu_req);
>>> + *msg_op = MSG_OP_EXECUTE_BUFFER_CF;
>>> + return 0;
>>> +}
>>> +
>>> +static int aie2_init_exec_dpu_req(struct amdxdna_gem_obj *cmd_bo,
>>> void *req,
>>> + size_t *size, u32 *msg_op)
>>> +{
>>> + struct exec_dpu_req *dpu_req = req;
>>> + struct amdxdna_cmd_start_npu *sn;
>>> + u32 cmd_len;
>>> +
>>> + sn = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
>>> + if (cmd_len - sizeof(*sn) > sizeof(dpu_req->payload))
>>> return -EINVAL;
>>> - }
>>> - op = amdxdna_cmd_get_op(cmd_abo);
>>> - switch (op) {
>>> - case ERT_START_CU:
>>> - if (unlikely(payload_len > sizeof(req.ebuf.payload)))
>>> - XDNA_DBG(xdna, "Invalid ebuf payload len: %d",
>>> payload_len);
>>> - req.ebuf.cu_idx = cu_idx;
>>> - memcpy(req.ebuf.payload, payload, sizeof(req.ebuf.payload));
>>> - msg.send_size = sizeof(req.ebuf);
>>> - msg.opcode = MSG_OP_EXECUTE_BUFFER_CF;
>>> - break;
>>> - case ERT_START_NPU: {
>>> - struct amdxdna_cmd_start_npu *sn = payload;
>>> -
>>> - if (unlikely(payload_len - sizeof(*sn) >
>>> sizeof(req.dpu.payload)))
>>> - XDNA_DBG(xdna, "Invalid dpu payload len: %d",
>>> payload_len);
>>> - req.dpu.inst_buf_addr = sn->buffer;
>>> - req.dpu.inst_size = sn->buffer_size;
>>> - req.dpu.inst_prop_cnt = sn->prop_count;
>>> - req.dpu.cu_idx = cu_idx;
>>> - memcpy(req.dpu.payload, sn->prop_args,
>>> sizeof(req.dpu.payload));
>>> - msg.send_size = sizeof(req.dpu);
>>> - msg.opcode = MSG_OP_EXEC_DPU;
>>> - break;
>>> - }
>>> - default:
>>> - XDNA_DBG(xdna, "Invalid ERT cmd op code: %d", op);
>>> + dpu_req->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
>>> + if (dpu_req->cu_idx == INVALID_CU_IDX)
>>> return -EINVAL;
>>> - }
>>> - msg.handle = job;
>>> - msg.notify_cb = notify_cb;
>>> - msg.send_data = (u8 *)&req;
>>> - print_hex_dump_debug("cmd: ", DUMP_PREFIX_OFFSET, 16, 4, &req,
>>> - 0x40, false);
>>> - ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
>>> - if (ret) {
>>> - XDNA_ERR(xdna, "Send message failed");
>>> - return ret;
>>> - }
>>> + dpu_req->inst_buf_addr = sn->buffer;
>>> + dpu_req->inst_size = sn->buffer_size;
>>> + dpu_req->inst_prop_cnt = sn->prop_count;
>>> + memcpy(dpu_req->payload, sn->prop_args, cmd_len - sizeof(*sn));
>>> + *size = sizeof(*dpu_req);
>>> + *msg_op = MSG_OP_EXEC_DPU;
>>> return 0;
>>> }
>>> +static void aie2_init_exec_chain_req(void *req, u64 slot_addr,
>>> size_t size, u32 cmd_cnt)
>>> +{
>>> + struct cmd_chain_req *chain_req = req;
>>> +
>>> + chain_req->buf_addr = slot_addr;
>>> + chain_req->buf_size = size;
>>> + chain_req->count = cmd_cnt;
>>> +}
>>> +
>>> +static void aie2_init_npu_chain_req(void *req, u64 slot_addr,
>>> size_t size, u32 cmd_cnt)
>>> +{
>>> + struct cmd_chain_npu_req *npu_chain_req = req;
>>> +
>>> + npu_chain_req->flags = 0;
>>> + npu_chain_req->reserved = 0;
>>> + npu_chain_req->buf_addr = slot_addr;
>>> + npu_chain_req->buf_size = size;
>>> + npu_chain_req->count = cmd_cnt;
>>> +}
>>> +
>>> static int
>>> -aie2_cmdlist_fill_one_slot_cf(void *cmd_buf, u32 offset,
>>> - struct amdxdna_gem_obj *abo, u32 *size)
>>> +aie2_cmdlist_fill_cf(struct amdxdna_gem_obj *cmd_bo, void *slot,
>>> size_t *size)
>>> {
>>> - struct cmd_chain_slot_execbuf_cf *buf = cmd_buf + offset;
>>> - int cu_idx = amdxdna_cmd_get_cu_idx(abo);
>>> - u32 payload_len;
>>> - void *payload;
>>> + struct cmd_chain_slot_execbuf_cf *cf_slot = slot;
>>> + u32 cmd_len;
>>> + void *cmd;
>>> - if (cu_idx < 0)
>>> + cmd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
>>> + if (*size < sizeof(*cf_slot) + cmd_len)
>>> return -EINVAL;
>>> - payload = amdxdna_cmd_get_payload(abo, &payload_len);
>>> - if (!payload)
>>> + cf_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
>>> + if (cf_slot->cu_idx == INVALID_CU_IDX)
>>> return -EINVAL;
>>> - if (!slot_has_space(*buf, offset, payload_len))
>>> - return -ENOSPC;
>>> -
>>> - buf->cu_idx = cu_idx;
>>> - buf->arg_cnt = payload_len / sizeof(u32);
>>> - memcpy(buf->args, payload, payload_len);
>>> - /* Accurate buf size to hint firmware to do necessary copy */
>>> - *size = sizeof(*buf) + payload_len;
>>> + cf_slot->arg_cnt = cmd_len / sizeof(u32);
>>> + memcpy(cf_slot->args, cmd, cmd_len);
>>> + /* Accurate slot size to hint firmware to do necessary copy */
>>> + *size = sizeof(*cf_slot) + cmd_len;
>>> return 0;
>>> }
>>> static int
>>> -aie2_cmdlist_fill_one_slot_dpu(void *cmd_buf, u32 offset,
>>> - struct amdxdna_gem_obj *abo, u32 *size)
>>> +aie2_cmdlist_fill_dpu(struct amdxdna_gem_obj *cmd_bo, void *slot,
>>> size_t *size)
>>> {
>>> - struct cmd_chain_slot_dpu *buf = cmd_buf + offset;
>>> - int cu_idx = amdxdna_cmd_get_cu_idx(abo);
>>> + struct cmd_chain_slot_dpu *dpu_slot = slot;
>>> struct amdxdna_cmd_start_npu *sn;
>>> - u32 payload_len;
>>> - void *payload;
>>> + u32 cmd_len;
>>> u32 arg_sz;
>>> - if (cu_idx < 0)
>>> + sn = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
>>> + arg_sz = cmd_len - sizeof(*sn);
>>> + if (cmd_len < sizeof(*sn) || arg_sz > MAX_DPU_ARGS_SIZE)
>>> return -EINVAL;
>>> - payload = amdxdna_cmd_get_payload(abo, &payload_len);
>>> - if (!payload)
>>> + if (*size < sizeof(*dpu_slot) + arg_sz)
>>> return -EINVAL;
>>> - sn = payload;
>>> - arg_sz = payload_len - sizeof(*sn);
>>> - if (payload_len < sizeof(*sn) || arg_sz > MAX_DPU_ARGS_SIZE)
>>> +
>>> + dpu_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
>>> + if (dpu_slot->cu_idx == INVALID_CU_IDX)
>>> return -EINVAL;
>>> - if (!slot_has_space(*buf, offset, arg_sz))
>>> - return -ENOSPC;
>>> + dpu_slot->inst_buf_addr = sn->buffer;
>>> + dpu_slot->inst_size = sn->buffer_size;
>>> + dpu_slot->inst_prop_cnt = sn->prop_count;
>>> + dpu_slot->arg_cnt = arg_sz / sizeof(u32);
>>> + memcpy(dpu_slot->args, sn->prop_args, arg_sz);
>>> +
>>> + /* Accurate slot size to hint firmware to do necessary copy */
>>> + *size = sizeof(*dpu_slot) + arg_sz;
>>> + return 0;
>>> +}
>>> +
>>> +static u32 aie2_get_chain_msg_op(u32 cmd_op)
>>> +{
>>> + switch (cmd_op) {
>>> + case ERT_START_CU:
>>> + return MSG_OP_CHAIN_EXEC_BUFFER_CF;
>>> + case ERT_START_NPU:
>>> + return MSG_OP_CHAIN_EXEC_DPU;
>>> + default:
>>> + break;
>>> + }
>>> - buf->inst_buf_addr = sn->buffer;
>>> - buf->inst_size = sn->buffer_size;
>>> - buf->inst_prop_cnt = sn->prop_count;
>>> - buf->cu_idx = cu_idx;
>>> - buf->arg_cnt = arg_sz / sizeof(u32);
>>> - memcpy(buf->args, sn->prop_args, arg_sz);
>>> + return MSG_OP_MAX_OPCODE;
>>> +}
>>> - /* Accurate buf size to hint firmware to do necessary copy */
>>> - *size = sizeof(*buf) + arg_sz;
>>> +static struct aie2_exec_msg_ops legacy_exec_message_ops = {
>>> + .init_cu_req = aie2_init_exec_cu_req,
>>> + .init_dpu_req = aie2_init_exec_dpu_req,
>>> + .init_chain_req = aie2_init_exec_chain_req,
>>> + .fill_cf_slot = aie2_cmdlist_fill_cf,
>>> + .fill_dpu_slot = aie2_cmdlist_fill_dpu,
>>> + .get_chain_msg_op = aie2_get_chain_msg_op,
>>> +};
>>> +
>>> +static int
>>> +aie2_cmdlist_fill_npu_cf(struct amdxdna_gem_obj *cmd_bo, void
>>> *slot, size_t *size)
>>> +{
>>> + struct cmd_chain_slot_npu *npu_slot = slot;
>>> + u32 cmd_len;
>>> + void *cmd;
>>> +
>>> + cmd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
>>> + if (*size < sizeof(*npu_slot) + cmd_len)
>>> + return -EINVAL;
>>> +
>>> + npu_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
>>> + if (npu_slot->cu_idx == INVALID_CU_IDX)
>>> + return -EINVAL;
>>> +
>>> + memset(npu_slot, 0, sizeof(*npu_slot));
>>> + npu_slot->type = EXEC_NPU_TYPE_NON_ELF;
>>> + npu_slot->arg_cnt = cmd_len / sizeof(u32);
>>> + memcpy(npu_slot->args, cmd, cmd_len);
>>> +
>>> + *size = sizeof(*npu_slot) + cmd_len;
>>> return 0;
>>> }
>>> static int
>>> -aie2_cmdlist_fill_one_slot(u32 op, struct amdxdna_gem_obj
>>> *cmdbuf_abo, u32 offset,
>>> - struct amdxdna_gem_obj *abo, u32 *size)
>>> +aie2_cmdlist_fill_npu_dpu(struct amdxdna_gem_obj *cmd_bo, void
>>> *slot, size_t *size)
>>> +{
>>> + struct cmd_chain_slot_npu *npu_slot = slot;
>>> + struct amdxdna_cmd_start_npu *sn;
>>> + u32 cmd_len;
>>> + u32 arg_sz;
>>> +
>>> + sn = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
>>> + arg_sz = cmd_len - sizeof(*sn);
>>> + if (cmd_len < sizeof(*sn) || arg_sz > MAX_NPU_ARGS_SIZE)
>>> + return -EINVAL;
>>> +
>>> + if (*size < sizeof(*npu_slot) + arg_sz)
>>> + return -EINVAL;
>>> +
>>> + npu_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
>>> + if (npu_slot->cu_idx == INVALID_CU_IDX)
>>> + return -EINVAL;
>>> +
>>> + memset(npu_slot, 0, sizeof(*npu_slot));
>>> + npu_slot->type = EXEC_NPU_TYPE_PARTIAL_ELF;
>>> + npu_slot->inst_buf_addr = sn->buffer;
>>> + npu_slot->inst_size = sn->buffer_size;
>>> + npu_slot->inst_prop_cnt = sn->prop_count;
>>> + npu_slot->arg_cnt = arg_sz / sizeof(u32);
>>> + memcpy(npu_slot->args, sn->prop_args, arg_sz);
>>> +
>>> + *size = sizeof(*npu_slot) + arg_sz;
>>> + return 0;
>>> +}
>>> +
>>> +static u32 aie2_get_npu_chain_msg_op(u32 cmd_op)
>>> +{
>>> + return MSG_OP_CHAIN_EXEC_NPU;
>>> +}
>>> +
>>> +static struct aie2_exec_msg_ops npu_exec_message_ops = {
>>> + .init_cu_req = aie2_init_exec_cu_req,
>>> + .init_dpu_req = aie2_init_exec_dpu_req,
>>> + .init_chain_req = aie2_init_npu_chain_req,
>>> + .fill_cf_slot = aie2_cmdlist_fill_npu_cf,
>>> + .fill_dpu_slot = aie2_cmdlist_fill_npu_dpu,
>>> + .get_chain_msg_op = aie2_get_npu_chain_msg_op,
>>> +};
>>> +
>>> +static int aie2_init_exec_req(void *req, struct amdxdna_gem_obj
>>> *cmd_abo,
>>> + size_t *size, u32 *msg_op)
>>> {
>>> - u32 this_op = amdxdna_cmd_get_op(abo);
>>> - void *cmd_buf = cmdbuf_abo->mem.kva;
>>> + struct amdxdna_dev *xdna = cmd_abo->client->xdna;
>>> int ret;
>>> + u32 op;
>>> - if (this_op != op) {
>>> - ret = -EINVAL;
>>> - goto done;
>>> - }
>>> + op = amdxdna_cmd_get_op(cmd_abo);
>>> switch (op) {
>>> case ERT_START_CU:
>>> - ret = aie2_cmdlist_fill_one_slot_cf(cmd_buf, offset, abo,
>>> size);
>>> + ret = EXEC_MSG_OPS(xdna)->init_cu_req(cmd_abo, req, size,
>>> msg_op);
>>> + if (ret) {
>>> + XDNA_DBG(xdna, "Init CU req failed ret %d", ret);
>>> + return ret;
>>> + }
>>> break;
>>> case ERT_START_NPU:
>>> - ret = aie2_cmdlist_fill_one_slot_dpu(cmd_buf, offset, abo,
>>> size);
>>> + ret = EXEC_MSG_OPS(xdna)->init_dpu_req(cmd_abo, req, size,
>>> msg_op);
>>> + if (ret) {
>>> + XDNA_DBG(xdna, "Init DPU req failed ret %d", ret);
>>> + return ret;
>>> + }
>>> +
>>> break;
>>> default:
>>> + XDNA_INFO(xdna, "Unsupported op %d", op);
>>
>> Shouldn't this be XDNA_ERR()?
>>
>>> ret = -EOPNOTSUPP;
>>> + break;
>>> }
>>> -done:
>>> - if (ret) {
>>> - XDNA_ERR(abo->client->xdna, "Can't fill slot for cmd op %d
>>> ret %d",
>>> - op, ret);
>>> + return ret;
>>> +}
>>> +
>>> +static int
>>> +aie2_cmdlist_fill_slot(void *slot, struct amdxdna_gem_obj *cmd_abo,
>>> + size_t *size, u32 *cmd_op)
>>> +{
>>> + struct amdxdna_dev *xdna = cmd_abo->client->xdna;
>>> + int ret;
>>> + u32 op;
>>> +
>>> + op = amdxdna_cmd_get_op(cmd_abo);
>>> + if (*cmd_op == ERT_INVALID_CMD)
>>> + *cmd_op = op;
>>> + else if (op != *cmd_op)
>>> + return -EINVAL;
>>> +
>>> + switch (op) {
>>> + case ERT_START_CU:
>>> + ret = EXEC_MSG_OPS(xdna)->fill_cf_slot(cmd_abo, slot, size);
>>> + break;
>>> + case ERT_START_NPU:
>>> + ret = EXEC_MSG_OPS(xdna)->fill_dpu_slot(cmd_abo, slot, size);
>>> + break;
>>> + default:
>>> + XDNA_INFO(xdna, "Unsupported op %d", op);
>>> + ret = -EOPNOTSUPP;
>>> + break;
>>> }
>>> +
>>> return ret;
>>> }
>>> +void aie2_msg_init(struct amdxdna_dev_hdl *ndev)
>>> +{
>>> + if (AIE2_FEATURE_ON(ndev, AIE2_NPU_COMMAND))
>>> + ndev->exec_msg_ops = &npu_exec_message_ops;
>>> + else
>>> + ndev->exec_msg_ops = &legacy_exec_message_ops;
>>> +}
>>> +
>>> static inline struct amdxdna_gem_obj *
>>> aie2_cmdlist_get_cmd_buf(struct amdxdna_sched_job *job)
>>> {
>>> @@ -658,29 +774,36 @@ aie2_cmdlist_get_cmd_buf(struct
>>> amdxdna_sched_job *job)
>>> return job->hwctx->priv->cmd_buf[idx];
>>> }
>>> -static void
>>> -aie2_cmdlist_prepare_request(struct cmd_chain_req *req,
>>> - struct amdxdna_gem_obj *cmdbuf_abo, u32 size, u32
>>> cnt)
>>> +int aie2_execbuf(struct amdxdna_hwctx *hwctx, struct
>>> amdxdna_sched_job *job,
>>> + int (*notify_cb)(void *, void __iomem *, size_t))
>>> {
>>> - req->buf_addr = cmdbuf_abo->mem.dev_addr;
>>> - req->buf_size = size;
>>> - req->count = cnt;
>>> - drm_clflush_virt_range(cmdbuf_abo->mem.kva, size);
>>> - XDNA_DBG(cmdbuf_abo->client->xdna, "Command buf addr 0x%llx
>>> size 0x%x count %d",
>>> - req->buf_addr, size, cnt);
>>> -}
>>> + struct mailbox_channel *chann = hwctx->priv->mbox_chann;
>>> + struct amdxdna_dev *xdna = hwctx->client->xdna;
>>> + struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
>>> + struct xdna_mailbox_msg msg;
>>> + union exec_req req;
>>> + int ret;
>>> -static inline u32
>>> -aie2_cmd_op_to_msg_op(u32 op)
>>> -{
>>> - switch (op) {
>>> - case ERT_START_CU:
>>> - return MSG_OP_CHAIN_EXEC_BUFFER_CF;
>>> - case ERT_START_NPU:
>>> - return MSG_OP_CHAIN_EXEC_DPU;
>>> - default:
>>> - return MSG_OP_MAX_OPCODE;
>>> + if (!chann)
>>> + return -ENODEV;
>>> +
>>> + ret = aie2_init_exec_req(&req, cmd_abo, &msg.send_size,
>>> &msg.opcode);
>>> + if (ret)
>>> + return ret;
>>> +
>>> + msg.handle = job;
>>> + msg.notify_cb = notify_cb;
>>> + msg.send_data = (u8 *)&req;
>>> + print_hex_dump_debug("cmd: ", DUMP_PREFIX_OFFSET, 16, 4, &req,
>>> + 0x40, false);
>>> +
>>> + ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
>>> + if (ret) {
>>> + XDNA_ERR(xdna, "Send message failed");
>>> + return ret;
>>> }
>>> +
>>> + return 0;
>>> }
>>> int aie2_cmdlist_multi_execbuf(struct amdxdna_hwctx *hwctx,
>>> @@ -691,12 +814,13 @@ int aie2_cmdlist_multi_execbuf(struct
>>> amdxdna_hwctx *hwctx,
>>> struct mailbox_channel *chann = hwctx->priv->mbox_chann;
>>> struct amdxdna_client *client = hwctx->client;
>>> struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
>>> + struct amdxdna_dev *xdna = client->xdna;
>>> struct amdxdna_cmd_chain *payload;
>>> struct xdna_mailbox_msg msg;
>>> - struct cmd_chain_req req;
>>> + union exec_chain_req req;
>>> u32 payload_len;
>>> u32 offset = 0;
>>> - u32 size;
>>> + size_t size;
>>> int ret;
>>> u32 op;
>>> u32 i;
>>> @@ -707,41 +831,42 @@ int aie2_cmdlist_multi_execbuf(struct
>>> amdxdna_hwctx *hwctx,
>>> payload_len < struct_size(payload, data,
>>> payload->command_count))
>>> return -EINVAL;
>>> + op = ERT_INVALID_CMD;
>>> for (i = 0; i < payload->command_count; i++) {
>>> u32 boh = (u32)(payload->data[i]);
>>> struct amdxdna_gem_obj *abo;
>>> abo = amdxdna_gem_get_obj(client, boh, AMDXDNA_BO_CMD);
>>> if (!abo) {
>>> - XDNA_ERR(client->xdna, "Failed to find cmd BO %d", boh);
>>> + XDNA_ERR(xdna, "Failed to find cmd BO %d", boh);
>>> return -ENOENT;
>>> }
>>> - /* All sub-cmd should have same op, use the first one. */
>>> - if (i == 0)
>>> - op = amdxdna_cmd_get_op(abo);
>>> -
>>> - ret = aie2_cmdlist_fill_one_slot(op, cmdbuf_abo, offset,
>>> abo, &size);
>>> + size = cmdbuf_abo->mem.size - offset;
>>> + ret = aie2_cmdlist_fill_slot(cmdbuf_abo->mem.kva + offset,
>>> + abo, &size, &op);
>>> amdxdna_gem_put_obj(abo);
>>> if (ret)
>>> - return -EINVAL;
>>> + return ret;
>>> offset += size;
>>> }
>>> + msg.opcode = EXEC_MSG_OPS(xdna)->get_chain_msg_op(op);
>>> + if (msg.opcode == MSG_OP_MAX_OPCODE)
>>> + return -EOPNOTSUPP;
>>> /* The offset is the accumulated total size of the cmd
>>> buffer */
>>> - aie2_cmdlist_prepare_request(&req, cmdbuf_abo, offset,
>>> payload->command_count);
>>> + EXEC_MSG_OPS(xdna)->init_chain_req(&req, cmdbuf_abo->mem.dev_addr,
>>> + offset, payload->command_count);
>>> + drm_clflush_virt_range(cmdbuf_abo->mem.kva, offset);
>>> - msg.opcode = aie2_cmd_op_to_msg_op(op);
>>> - if (msg.opcode == MSG_OP_MAX_OPCODE)
>>> - return -EOPNOTSUPP;
>>> msg.handle = job;
>>> msg.notify_cb = notify_cb;
>>> msg.send_data = (u8 *)&req;
>>> msg.send_size = sizeof(req);
>>> ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
>>> if (ret) {
>>> - XDNA_ERR(hwctx->client->xdna, "Send message failed");
>>> + XDNA_ERR(xdna, "Send message failed");
>>> return ret;
>>> }
>>> @@ -754,23 +879,27 @@ int aie2_cmdlist_single_execbuf(struct
>>> amdxdna_hwctx *hwctx,
>>> {
>>> struct amdxdna_gem_obj *cmdbuf_abo =
>>> aie2_cmdlist_get_cmd_buf(job);
>>> struct mailbox_channel *chann = hwctx->priv->mbox_chann;
>>> + struct amdxdna_dev *xdna = hwctx->client->xdna;
>>> struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
>>> struct xdna_mailbox_msg msg;
>>> - struct cmd_chain_req req;
>>> - u32 size;
>>> + union exec_chain_req req;
>>> + u32 op = ERT_INVALID_CMD;
>>> + size_t size;
>>> int ret;
>>> - u32 op;
>>> - op = amdxdna_cmd_get_op(cmd_abo);
>>> - ret = aie2_cmdlist_fill_one_slot(op, cmdbuf_abo, 0, cmd_abo,
>>> &size);
>>> + size = cmdbuf_abo->mem.size;
>>> + ret = aie2_cmdlist_fill_slot(cmdbuf_abo->mem.kva, cmd_abo,
>>> &size, &op);
>>> if (ret)
>>> return ret;
>>> - aie2_cmdlist_prepare_request(&req, cmdbuf_abo, size, 1);
>>> -
>>> - msg.opcode = aie2_cmd_op_to_msg_op(op);
>>> + msg.opcode = EXEC_MSG_OPS(xdna)->get_chain_msg_op(op);
>>> if (msg.opcode == MSG_OP_MAX_OPCODE)
>>> return -EOPNOTSUPP;
>>> +
>>> + EXEC_MSG_OPS(xdna)->init_chain_req(&req, cmdbuf_abo->mem.dev_addr,
>>> + size, 1);
>>> + drm_clflush_virt_range(cmdbuf_abo->mem.kva, size);
>>> +
>>> msg.handle = job;
>>> msg.notify_cb = notify_cb;
>>> msg.send_data = (u8 *)&req;
>>> diff --git a/drivers/accel/amdxdna/aie2_msg_priv.h
>>> b/drivers/accel/amdxdna/aie2_msg_priv.h
>>> index 2dbea1d09980..947daa63f064 100644
>>> --- a/drivers/accel/amdxdna/aie2_msg_priv.h
>>> +++ b/drivers/accel/amdxdna/aie2_msg_priv.h
>>> @@ -20,6 +20,7 @@ enum aie2_msg_opcode {
>>> MSG_OP_CHAIN_EXEC_BUFFER_CF = 0x12,
>>> MSG_OP_CHAIN_EXEC_DPU = 0x13,
>>> MSG_OP_CONFIG_DEBUG_BO = 0x14,
>>> + MSG_OP_CHAIN_EXEC_NPU = 0x18,
>>> MSG_OP_MAX_XRT_OPCODE,
>>> MSG_OP_SUSPEND = 0x101,
>>> MSG_OP_RESUME = 0x102,
>>> @@ -172,6 +173,16 @@ struct exec_dpu_req {
>>> __u32 payload[35];
>>> } __packed;
>>> +enum exec_npu_type {
>>> + EXEC_NPU_TYPE_NON_ELF = 0x1,
>>> + EXEC_NPU_TYPE_PARTIAL_ELF = 0x2,
>>> +};
>>> +
>>> +union exec_req {
>>> + struct execute_buffer_req ebuf;
>>> + struct exec_dpu_req dpu_req;
>>> +};
>>> +
>>> struct execute_buffer_resp {
>>> enum aie2_msg_status status;
>>> } __packed;
>>> @@ -343,9 +354,6 @@ struct async_event_msg_resp {
>>> } __packed;
>>> #define MAX_CHAIN_CMDBUF_SIZE SZ_4K
>>> -#define slot_has_space(slot, offset, payload_size) \
>>> - (MAX_CHAIN_CMDBUF_SIZE >= (offset) + (payload_size) + \
>>> - sizeof(typeof(slot)))
>>> struct cmd_chain_slot_execbuf_cf {
>>> __u32 cu_idx;
>>> @@ -363,12 +371,40 @@ struct cmd_chain_slot_dpu {
>>> __u32 args[] __counted_by(arg_cnt);
>>> };
>>> +#define MAX_NPU_ARGS_SIZE (26 * sizeof(__u32))
>>> +struct cmd_chain_slot_npu {
>>> + enum exec_npu_type type;
>>> + u64 inst_buf_addr;
>>> + u64 save_buf_addr;
>>> + u64 restore_buf_addr;
>>> + u32 inst_size;
>>> + u32 save_size;
>>> + u32 restore_size;
>>> + u32 inst_prop_cnt;
>>> + u32 cu_idx;
>>> + u32 arg_cnt;
>>> + u32 args[] __counted_by(arg_cnt);
>>> +} __packed;
>>> +
>>> struct cmd_chain_req {
>>> __u64 buf_addr;
>>> __u32 buf_size;
>>> __u32 count;
>>> } __packed;
>>> +struct cmd_chain_npu_req {
>>> + u32 flags;
>>> + u32 reserved;
>>> + u64 buf_addr;
>>> + u32 buf_size;
>>> + u32 count;
>>> +} __packed;
>>> +
>>> +union exec_chain_req {
>>> + struct cmd_chain_npu_req npu_req;
>>> + struct cmd_chain_req req;
>>> +};
>>> +
>>> struct cmd_chain_resp {
>>> enum aie2_msg_status status;
>>> __u32 fail_cmd_idx;
>>> diff --git a/drivers/accel/amdxdna/aie2_pci.c
>>> b/drivers/accel/amdxdna/aie2_pci.c
>>> index 80313a2a98d4..d7ccbdaf47f5 100644
>>> --- a/drivers/accel/amdxdna/aie2_pci.c
>>> +++ b/drivers/accel/amdxdna/aie2_pci.c
>>> @@ -55,6 +55,7 @@ struct mgmt_mbox_chann_info {
>>> static int aie2_check_protocol(struct amdxdna_dev_hdl *ndev, u32
>>> fw_major, u32 fw_minor)
>>> {
>>> + const struct aie2_fw_feature_tbl *feature;
>>> struct amdxdna_dev *xdna = ndev->xdna;
>>> /*
>>> @@ -78,6 +79,17 @@ static int aie2_check_protocol(struct
>>> amdxdna_dev_hdl *ndev, u32 fw_major, u32 f
>>> XDNA_ERR(xdna, "Firmware minor version smaller than
>>> supported");
>>> return -EINVAL;
>>> }
>>> +
>>> + for (feature = ndev->priv->fw_feature_tbl; feature &&
>>> feature->min_minor;
>>> + feature++) {
>>> + if (fw_minor < feature->min_minor)
>>> + continue;
>>> + if (feature->max_minor > 0 && fw_minor > feature->max_minor)
>>> + continue;
>>> +
>>> + set_bit(feature->feature, &ndev->feature_mask);
>>> + }
>>> +
>>> return 0;
>>> }
>>> @@ -587,6 +599,7 @@ static int aie2_init(struct amdxdna_dev *xdna)
>>> }
>>> release_firmware(fw);
>>> + aie2_msg_init(ndev);
>>> amdxdna_pm_init(xdna);
>>> return 0;
>>> diff --git a/drivers/accel/amdxdna/aie2_pci.h
>>> b/drivers/accel/amdxdna/aie2_pci.h
>>> index cfe42b0d4242..d0a3cb1fe8be 100644
>>> --- a/drivers/accel/amdxdna/aie2_pci.h
>>> +++ b/drivers/accel/amdxdna/aie2_pci.h
>>> @@ -156,6 +156,17 @@ enum aie2_dev_status {
>>> AIE2_DEV_START,
>>> };
>>> +struct aie2_exec_msg_ops {
>>> + int (*init_cu_req)(struct amdxdna_gem_obj *cmd_bo, void *req,
>>> + size_t *size, u32 *msg_op);
>>> + int (*init_dpu_req)(struct amdxdna_gem_obj *cmd_bo, void *req,
>>> + size_t *size, u32 *msg_op);
>>> + void (*init_chain_req)(void *req, u64 slot_addr, size_t size,
>>> u32 cmd_cnt);
>>> + int (*fill_cf_slot)(struct amdxdna_gem_obj *cmd_bo, void *slot,
>>> size_t *size);
>>> + int (*fill_dpu_slot)(struct amdxdna_gem_obj *cmd_bo, void
>>> *slot, size_t *size);
>>> + u32 (*get_chain_msg_op)(u32 cmd_op);
>>> +};
>>> +
>>> struct amdxdna_dev_hdl {
>>> struct amdxdna_dev *xdna;
>>> const struct amdxdna_dev_priv *priv;
>>> @@ -173,6 +184,8 @@ struct amdxdna_dev_hdl {
>>> u32 total_col;
>>> struct aie_version version;
>>> struct aie_metadata metadata;
>>> + unsigned long feature_mask;
>>> + struct aie2_exec_msg_ops *exec_msg_ops;
>>> /* power management and clock*/
>>> enum amdxdna_power_mode_type pw_mode;
>>> @@ -208,12 +221,26 @@ struct aie2_hw_ops {
>>> int (*set_dpm)(struct amdxdna_dev_hdl *ndev, u32 dpm_level);
>>> };
>>> +enum aie2_fw_feature {
>>> + AIE2_NPU_COMMAND,
>>> + AIE2_FEATURE_MAX
>>> +};
>>> +
>>> +struct aie2_fw_feature_tbl {
>>> + enum aie2_fw_feature feature;
>>> + u32 max_minor;
>>> + u32 min_minor;
>>> +};
>>> +
>>> +#define AIE2_FEATURE_ON(ndev, feature) test_bit(feature,
>>> &(ndev)->feature_mask)
>>> +
>>> struct amdxdna_dev_priv {
>>> const char *fw_path;
>>> u64 protocol_major;
>>> u64 protocol_minor;
>>> const struct rt_config *rt_config;
>>> const struct dpm_clk_freq *dpm_clk_tbl;
>>> + const struct aie2_fw_feature_tbl *fw_feature_tbl;
>>> #define COL_ALIGN_NONE 0
>>> #define COL_ALIGN_NATURE 1
>>> @@ -239,6 +266,7 @@ extern const struct dpm_clk_freq
>>> npu1_dpm_clk_table[];
>>> extern const struct dpm_clk_freq npu4_dpm_clk_table[];
>>> extern const struct rt_config npu1_default_rt_cfg[];
>>> extern const struct rt_config npu4_default_rt_cfg[];
>>> +extern const struct aie2_fw_feature_tbl npu4_fw_feature_table[];
>>> /* aie2_smu.c */
>>> int aie2_smu_init(struct amdxdna_dev_hdl *ndev);
>>> @@ -263,6 +291,7 @@ int aie2_get_array_async_error(struct
>>> amdxdna_dev_hdl *ndev,
>>> struct amdxdna_drm_get_array *args);
>>> /* aie2_message.c */
>>> +void aie2_msg_init(struct amdxdna_dev_hdl *ndev);
>>> int aie2_suspend_fw(struct amdxdna_dev_hdl *ndev);
>>> int aie2_resume_fw(struct amdxdna_dev_hdl *ndev);
>>> int aie2_set_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type,
>>> u64 value);
>>> diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c
>>> b/drivers/accel/amdxdna/amdxdna_ctx.c
>>> index d18182c59668..878cc955f56d 100644
>>> --- a/drivers/accel/amdxdna/amdxdna_ctx.c
>>> +++ b/drivers/accel/amdxdna/amdxdna_ctx.c
>>> @@ -113,14 +113,14 @@ void *amdxdna_cmd_get_payload(struct
>>> amdxdna_gem_obj *abo, u32 *size)
>>> return &cmd->data[num_masks];
>>> }
>>> -int amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo)
>>> +u32 amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo)
>>> {
>>> struct amdxdna_cmd *cmd = abo->mem.kva;
>>> u32 num_masks, i;
>>> u32 *cu_mask;
>>> if (amdxdna_cmd_get_op(abo) == ERT_CMD_CHAIN)
>>> - return -1;
>>> + return INVALID_CU_IDX;
>>> num_masks = 1 + FIELD_GET(AMDXDNA_CMD_EXTRA_CU_MASK,
>>> cmd->header);
>>> cu_mask = cmd->data;
>>> @@ -129,7 +129,7 @@ int amdxdna_cmd_get_cu_idx(struct
>>> amdxdna_gem_obj *abo)
>>> return ffs(cu_mask[i]) - 1;
>>> }
>>> - return -1;
>>> + return INVALID_CU_IDX;
>>> }
>>> /*
>>> diff --git a/drivers/accel/amdxdna/amdxdna_ctx.h
>>> b/drivers/accel/amdxdna/amdxdna_ctx.h
>>> index 919c654dfea6..1aa2b938e07b 100644
>>> --- a/drivers/accel/amdxdna/amdxdna_ctx.h
>>> +++ b/drivers/accel/amdxdna/amdxdna_ctx.h
>>> @@ -13,9 +13,10 @@
>>> struct amdxdna_hwctx_priv;
>>> enum ert_cmd_opcode {
>>> - ERT_START_CU = 0,
>>> - ERT_CMD_CHAIN = 19,
>>> - ERT_START_NPU = 20,
>>> + ERT_INVALID_CMD = ~0U,
>> ~0U > 20, shouldn't this be at the end of the enum?> + ERT_START_CU = 0,
>>> + ERT_CMD_CHAIN = 19,
>>> + ERT_START_NPU = 20,
>>> };
>>> enum ert_cmd_state {
>>> @@ -64,6 +65,8 @@ struct amdxdna_cmd {
>>> u32 data[];
>>> };
>>> +#define INVALID_CU_IDX (~0U)
>>> +
>>> struct amdxdna_hwctx {
>>> struct amdxdna_client *client;
>>> struct amdxdna_hwctx_priv *priv;
>>> @@ -150,7 +153,7 @@ amdxdna_cmd_get_state(struct amdxdna_gem_obj *abo)
>>> }
>>> void *amdxdna_cmd_get_payload(struct amdxdna_gem_obj *abo, u32
>>> *size);
>>> -int amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo);
>>> +u32 amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo);
>>> void amdxdna_sched_job_cleanup(struct amdxdna_sched_job *job);
>>> void amdxdna_hwctx_remove_all(struct amdxdna_client *client);
>>> diff --git a/drivers/accel/amdxdna/npu1_regs.c
>>> b/drivers/accel/amdxdna/npu1_regs.c
>>> index 23feb5f6fad3..ffc2e7c7b523 100644
>>> --- a/drivers/accel/amdxdna/npu1_regs.c
>>> +++ b/drivers/accel/amdxdna/npu1_regs.c
>>> @@ -63,12 +63,18 @@ const struct dpm_clk_freq npu1_dpm_clk_table[] = {
>>> { 0 }
>>> };
>>> +static const struct aie2_fw_feature_tbl npu1_fw_feature_table[] = {
>>> + { .feature = AIE2_NPU_COMMAND, .min_minor = 8 },
>>> + { 0 }
>>> +};
>>> +
>>> static const struct amdxdna_dev_priv npu1_dev_priv = {
>>> .fw_path = "amdnpu/1502_00/npu.sbin",
>>> .protocol_major = 0x5,
>>> .protocol_minor = 0x7,
>>> .rt_config = npu1_default_rt_cfg,
>>> .dpm_clk_tbl = npu1_dpm_clk_table,
>>> + .fw_feature_tbl = npu1_fw_feature_table,
>>> .col_align = COL_ALIGN_NONE,
>>> .mbox_dev_addr = NPU1_MBOX_BAR_BASE,
>>> .mbox_size = 0, /* Use BAR size */
>>> diff --git a/drivers/accel/amdxdna/npu2_regs.c
>>> b/drivers/accel/amdxdna/npu2_regs.c
>>> index 67c2ae931c62..5fbfdcc3762d 100644
>>> --- a/drivers/accel/amdxdna/npu2_regs.c
>>> +++ b/drivers/accel/amdxdna/npu2_regs.c
>>> @@ -67,6 +67,7 @@ static const struct amdxdna_dev_priv npu2_dev_priv
>>> = {
>>> .protocol_minor = 0x6,
>>> .rt_config = npu4_default_rt_cfg,
>>> .dpm_clk_tbl = npu4_dpm_clk_table,
>>> + .fw_feature_tbl = npu4_fw_feature_table,
>>> .col_align = COL_ALIGN_NATURE,
>>> .mbox_dev_addr = NPU2_MBOX_BAR_BASE,
>>> .mbox_size = 0, /* Use BAR size */
>>> diff --git a/drivers/accel/amdxdna/npu4_regs.c
>>> b/drivers/accel/amdxdna/npu4_regs.c
>>> index fac6c1b0b74b..79aba12acfde 100644
>>> --- a/drivers/accel/amdxdna/npu4_regs.c
>>> +++ b/drivers/accel/amdxdna/npu4_regs.c
>>> @@ -83,12 +83,18 @@ const struct dpm_clk_freq npu4_dpm_clk_table[] = {
>>> { 0 }
>>> };
>>> +const struct aie2_fw_feature_tbl npu4_fw_feature_table[] = {
>>> + { .feature = AIE2_NPU_COMMAND, .min_minor = 15 },
>>> + { 0 }
>>> +};
>>> +
>>> static const struct amdxdna_dev_priv npu4_dev_priv = {
>>> .fw_path = "amdnpu/17f0_10/npu.sbin",
>>> .protocol_major = 0x6,
>>> .protocol_minor = 12,
>>> .rt_config = npu4_default_rt_cfg,
>>> .dpm_clk_tbl = npu4_dpm_clk_table,
>>> + .fw_feature_tbl = npu4_fw_feature_table,
>>> .col_align = COL_ALIGN_NATURE,
>>> .mbox_dev_addr = NPU4_MBOX_BAR_BASE,
>>> .mbox_size = 0, /* Use BAR size */
>>> diff --git a/drivers/accel/amdxdna/npu5_regs.c
>>> b/drivers/accel/amdxdna/npu5_regs.c
>>> index c91e1fa76ff5..c5e259ab9f49 100644
>>> --- a/drivers/accel/amdxdna/npu5_regs.c
>>> +++ b/drivers/accel/amdxdna/npu5_regs.c
>>> @@ -67,6 +67,7 @@ static const struct amdxdna_dev_priv npu5_dev_priv
>>> = {
>>> .protocol_minor = 12,
>>> .rt_config = npu4_default_rt_cfg,
>>> .dpm_clk_tbl = npu4_dpm_clk_table,
>>> + .fw_feature_tbl = npu4_fw_feature_table,
>>> .col_align = COL_ALIGN_NATURE,
>>> .mbox_dev_addr = NPU5_MBOX_BAR_BASE,
>>> .mbox_size = 0, /* Use BAR size */
>>> diff --git a/drivers/accel/amdxdna/npu6_regs.c
>>> b/drivers/accel/amdxdna/npu6_regs.c
>>> index 773f738915a7..2de63b44d6e7 100644
>>> --- a/drivers/accel/amdxdna/npu6_regs.c
>>> +++ b/drivers/accel/amdxdna/npu6_regs.c
>>> @@ -67,6 +67,7 @@ static const struct amdxdna_dev_priv npu6_dev_priv
>>> = {
>>> .protocol_minor = 12,
>>> .rt_config = npu4_default_rt_cfg,
>>> .dpm_clk_tbl = npu4_dpm_clk_table,
>>> + .fw_feature_tbl = npu4_fw_feature_table,
>>> .col_align = COL_ALIGN_NATURE,
>>> .mbox_dev_addr = NPU6_MBOX_BAR_BASE,
>>> .mbox_size = 0, /* Use BAR size */
>>
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2025-11-03 18:10 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-10-31 1:47 [PATCH V1] accel/amdxdna: Use MSG_OP_CHAIN_EXEC_NPU when supported Lizhi Hou
2025-10-31 5:10 ` Mario Limonciello (AMD) (kernel.org)
2025-10-31 15:15 ` Lizhi Hou
2025-11-03 18:10 ` Lizhi Hou
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).