* [PATCH v10 1/9] iommu/arm-smmu-v3: Issue a batch of commands to the same cmdq
2024-07-28 22:44 [PATCH v10 0/9] Add Tegra241 (Grace) CMDQV Support (part 1/2) Nicolin Chen
@ 2024-07-28 22:44 ` Nicolin Chen
2024-07-28 22:44 ` [PATCH v10 2/9] iommu/arm-smmu-v3: Enforce arm_smmu_cmdq_build_sync_cmd Nicolin Chen
` (7 subsequent siblings)
8 siblings, 0 replies; 12+ messages in thread
From: Nicolin Chen @ 2024-07-28 22:44 UTC (permalink / raw)
To: will
Cc: robin.murphy, joro, jgg, thierry.reding, vdumpa, jonathanh,
linux-kernel, iommu, linux-arm-kernel, linux-tegra
The driver calls in different places the arm_smmu_get_cmdq() helper, and
it's fine to do so since the helper always returns the single SMMU CMDQ.
However, with NVIDIA CMDQV extension or SMMU ECMDQ, there can be multiple
cmdqs in the system to select one from. And either case requires a batch
of commands to be issued to the same cmdq. Thus, a cmdq has to be decided
in the higher-level callers.
Add a cmdq pointer in arm_smmu_cmdq_batch structure, and decide the cmdq
when initializing the batch. Pass its pointer down to the bottom function.
Update __arm_smmu_cmdq_issue_cmd() accordingly for single command issuers.
Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
---
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 44 +++++++++++++--------
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 1 +
2 files changed, 29 insertions(+), 16 deletions(-)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index a31460f9f3d42..f409ead589ffc 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -592,11 +592,11 @@ static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq,
/* Wait for the command queue to become non-full */
static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
+ struct arm_smmu_cmdq *cmdq,
struct arm_smmu_ll_queue *llq)
{
unsigned long flags;
struct arm_smmu_queue_poll qp;
- struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
int ret = 0;
/*
@@ -627,11 +627,11 @@ static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
* Must be called with the cmdq lock held in some capacity.
*/
static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
+ struct arm_smmu_cmdq *cmdq,
struct arm_smmu_ll_queue *llq)
{
int ret = 0;
struct arm_smmu_queue_poll qp;
- struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod));
queue_poll_init(smmu, &qp);
@@ -651,10 +651,10 @@ static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
* Must be called with the cmdq lock held in some capacity.
*/
static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
+ struct arm_smmu_cmdq *cmdq,
struct arm_smmu_ll_queue *llq)
{
struct arm_smmu_queue_poll qp;
- struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
u32 prod = llq->prod;
int ret = 0;
@@ -701,12 +701,13 @@ static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
}
static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
+ struct arm_smmu_cmdq *cmdq,
struct arm_smmu_ll_queue *llq)
{
if (smmu->options & ARM_SMMU_OPT_MSIPOLL)
- return __arm_smmu_cmdq_poll_until_msi(smmu, llq);
+ return __arm_smmu_cmdq_poll_until_msi(smmu, cmdq, llq);
- return __arm_smmu_cmdq_poll_until_consumed(smmu, llq);
+ return __arm_smmu_cmdq_poll_until_consumed(smmu, cmdq, llq);
}
static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
@@ -743,13 +744,13 @@ static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
* CPU will appear before any of the commands from the other CPU.
*/
static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
+ struct arm_smmu_cmdq *cmdq,
u64 *cmds, int n, bool sync)
{
u64 cmd_sync[CMDQ_ENT_DWORDS];
u32 prod;
unsigned long flags;
bool owner;
- struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
struct arm_smmu_ll_queue llq, head;
int ret = 0;
@@ -763,7 +764,7 @@ static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
while (!queue_has_space(&llq, n + sync)) {
local_irq_restore(flags);
- if (arm_smmu_cmdq_poll_until_not_full(smmu, &llq))
+ if (arm_smmu_cmdq_poll_until_not_full(smmu, cmdq, &llq))
dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
local_irq_save(flags);
}
@@ -839,7 +840,7 @@ static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
/* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */
if (sync) {
llq.prod = queue_inc_prod_n(&llq, n);
- ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq);
+ ret = arm_smmu_cmdq_poll_until_sync(smmu, cmdq, &llq);
if (ret) {
dev_err_ratelimited(smmu->dev,
"CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
@@ -874,7 +875,8 @@ static int __arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
return -EINVAL;
}
- return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, sync);
+ return arm_smmu_cmdq_issue_cmdlist(
+ smmu, arm_smmu_get_cmdq(smmu), cmd, 1, sync);
}
static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
@@ -889,6 +891,13 @@ static int arm_smmu_cmdq_issue_cmd_with_sync(struct arm_smmu_device *smmu,
return __arm_smmu_cmdq_issue_cmd(smmu, ent, true);
}
+static void arm_smmu_cmdq_batch_init(struct arm_smmu_device *smmu,
+ struct arm_smmu_cmdq_batch *cmds)
+{
+ cmds->num = 0;
+ cmds->cmdq = arm_smmu_get_cmdq(smmu);
+}
+
static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
struct arm_smmu_cmdq_batch *cmds,
struct arm_smmu_cmdq_ent *cmd)
@@ -897,12 +906,14 @@ static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
if (cmds->num == CMDQ_BATCH_ENTRIES - 1 &&
(smmu->options & ARM_SMMU_OPT_CMDQ_FORCE_SYNC)) {
- arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
+ arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmdq, cmds->cmds,
+ cmds->num, true);
cmds->num = 0;
}
if (cmds->num == CMDQ_BATCH_ENTRIES) {
- arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, false);
+ arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmdq, cmds->cmds,
+ cmds->num, false);
cmds->num = 0;
}
@@ -919,7 +930,8 @@ static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
struct arm_smmu_cmdq_batch *cmds)
{
- return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
+ return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmdq, cmds->cmds,
+ cmds->num, true);
}
static void arm_smmu_page_response(struct device *dev, struct iopf_fault *unused,
@@ -1170,7 +1182,7 @@ static void arm_smmu_sync_cd(struct arm_smmu_master *master,
},
};
- cmds.num = 0;
+ arm_smmu_cmdq_batch_init(smmu, &cmds);
for (i = 0; i < master->num_streams; i++) {
cmd.cfgi.sid = master->streams[i].id;
arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
@@ -2021,7 +2033,7 @@ static int arm_smmu_atc_inv_master(struct arm_smmu_master *master,
arm_smmu_atc_inv_to_cmd(ssid, 0, 0, &cmd);
- cmds.num = 0;
+ arm_smmu_cmdq_batch_init(master->smmu, &cmds);
for (i = 0; i < master->num_streams; i++) {
cmd.atc.sid = master->streams[i].id;
arm_smmu_cmdq_batch_add(master->smmu, &cmds, &cmd);
@@ -2059,7 +2071,7 @@ int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain,
if (!atomic_read(&smmu_domain->nr_ats_masters))
return 0;
- cmds.num = 0;
+ arm_smmu_cmdq_batch_init(smmu_domain->smmu, &cmds);
spin_lock_irqsave(&smmu_domain->devices_lock, flags);
list_for_each_entry(master_domain, &smmu_domain->devices,
@@ -2141,7 +2153,7 @@ static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,
num_pages++;
}
- cmds.num = 0;
+ arm_smmu_cmdq_batch_init(smmu, &cmds);
while (iova < end) {
if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index 14bca41a981b4..c1454e9758c48 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -570,6 +570,7 @@ struct arm_smmu_cmdq {
struct arm_smmu_cmdq_batch {
u64 cmds[CMDQ_BATCH_ENTRIES * CMDQ_ENT_DWORDS];
+ struct arm_smmu_cmdq *cmdq;
int num;
};
--
2.43.0
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH v10 2/9] iommu/arm-smmu-v3: Enforce arm_smmu_cmdq_build_sync_cmd
2024-07-28 22:44 [PATCH v10 0/9] Add Tegra241 (Grace) CMDQV Support (part 1/2) Nicolin Chen
2024-07-28 22:44 ` [PATCH v10 1/9] iommu/arm-smmu-v3: Issue a batch of commands to the same cmdq Nicolin Chen
@ 2024-07-28 22:44 ` Nicolin Chen
2024-07-28 22:44 ` [PATCH v10 3/9] iommu/arm-smmu-v3: Pass in cmdq pointer to arm_smmu_cmdq_build_sync_cmd Nicolin Chen
` (6 subsequent siblings)
8 siblings, 0 replies; 12+ messages in thread
From: Nicolin Chen @ 2024-07-28 22:44 UTC (permalink / raw)
To: will
Cc: robin.murphy, joro, jgg, thierry.reding, vdumpa, jonathanh,
linux-kernel, iommu, linux-arm-kernel, linux-tegra
There is an existing arm_smmu_cmdq_build_sync_cmd() so the driver should
call it at all places other than going through arm_smmu_cmdq_build_cmd()
separately. This helps the following patch that adds a CS_NONE option.
Note that this changes the type of CMD_SYNC in __arm_smmu_cmdq_skip_err,
in ARM_SMMU_OPT_MSIPOLL=true case, from previously a non-MSI one to now
an MSI one that is proven to still work using a hacking test:
nvme: Adding to iommu group 10
nvme: --------hacking-----------
arm-smmu-v3: unexpected global error reported (0x00000001),
this could be serious
arm-smmu-v3: CMDQ error (cons 0x01000022): Illegal command
arm-smmu-v3: skipping command in error state:
arm-smmu-v3: 0x0000000000000000
arm-smmu-v3: 0x0000000000000000
nvme: -------recovered----------
nvme nvme0: 72/0/0 default/read/poll queues
nvme0n1: p1 p2
Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
---
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 36 ++++++++-------------
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 3 --
2 files changed, 13 insertions(+), 26 deletions(-)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index f409ead589ffc..f481d7be3d4ec 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -329,16 +329,6 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_RESP, ent->resume.resp);
cmd[1] |= FIELD_PREP(CMDQ_RESUME_1_STAG, ent->resume.stag);
break;
- case CMDQ_OP_CMD_SYNC:
- if (ent->sync.msiaddr) {
- cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
- cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
- } else {
- cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
- }
- cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
- cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
- break;
default:
return -ENOENT;
}
@@ -354,20 +344,23 @@ static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu)
static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
struct arm_smmu_queue *q, u32 prod)
{
- struct arm_smmu_cmdq_ent ent = {
- .opcode = CMDQ_OP_CMD_SYNC,
- };
+ cmd[1] = 0;
+ cmd[0] = FIELD_PREP(CMDQ_0_OP, CMDQ_OP_CMD_SYNC) |
+ FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH) |
+ FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
+
+ if (!(smmu->options & ARM_SMMU_OPT_MSIPOLL)) {
+ cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
+ return;
+ }
/*
* Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
* payload, so the write will zero the entire command on that platform.
*/
- if (smmu->options & ARM_SMMU_OPT_MSIPOLL) {
- ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
- q->ent_dwords * 8;
- }
-
- arm_smmu_cmdq_build_cmd(cmd, &ent);
+ cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
+ cmd[1] |= (q->base_dma + Q_IDX(&q->llq, prod) * q->ent_dwords * 8) &
+ CMDQ_SYNC_1_MSIADDR_MASK;
}
static void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
@@ -384,9 +377,6 @@ static void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
u64 cmd[CMDQ_ENT_DWORDS];
u32 cons = readl_relaxed(q->cons_reg);
u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
- struct arm_smmu_cmdq_ent cmd_sync = {
- .opcode = CMDQ_OP_CMD_SYNC,
- };
dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
idx < ARRAY_SIZE(cerror_str) ? cerror_str[idx] : "Unknown");
@@ -420,7 +410,7 @@ static void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
/* Convert the erroneous command into a CMD_SYNC */
- arm_smmu_cmdq_build_cmd(cmd, &cmd_sync);
+ arm_smmu_cmdq_build_sync_cmd(cmd, smmu, q, cons);
queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
}
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index c1454e9758c48..6c5739f6b90f5 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -518,9 +518,6 @@ struct arm_smmu_cmdq_ent {
} resume;
#define CMDQ_OP_CMD_SYNC 0x46
- struct {
- u64 msiaddr;
- } sync;
};
};
--
2.43.0
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH v10 3/9] iommu/arm-smmu-v3: Pass in cmdq pointer to arm_smmu_cmdq_build_sync_cmd
2024-07-28 22:44 [PATCH v10 0/9] Add Tegra241 (Grace) CMDQV Support (part 1/2) Nicolin Chen
2024-07-28 22:44 ` [PATCH v10 1/9] iommu/arm-smmu-v3: Issue a batch of commands to the same cmdq Nicolin Chen
2024-07-28 22:44 ` [PATCH v10 2/9] iommu/arm-smmu-v3: Enforce arm_smmu_cmdq_build_sync_cmd Nicolin Chen
@ 2024-07-28 22:44 ` Nicolin Chen
2024-07-28 22:44 ` [PATCH v10 4/9] iommu/arm-smmu-v3: Pass in cmdq pointer to arm_smmu_cmdq_init Nicolin Chen
` (5 subsequent siblings)
8 siblings, 0 replies; 12+ messages in thread
From: Nicolin Chen @ 2024-07-28 22:44 UTC (permalink / raw)
To: will
Cc: robin.murphy, joro, jgg, thierry.reding, vdumpa, jonathanh,
linux-kernel, iommu, linux-arm-kernel, linux-tegra
The CMDQV extension on NVIDIA Tegra241 SoC only supports CS_NONE in the
CS field of CMD_SYNC, v.s. standard SMMU CMDQ. Pass in the cmdq pointer
directly, so the function can identify a different cmdq implementation.
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
---
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 13 ++++++++-----
1 file changed, 8 insertions(+), 5 deletions(-)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index f481d7be3d4ec..d11362e9fc8a0 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -342,8 +342,10 @@ static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu)
}
static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
- struct arm_smmu_queue *q, u32 prod)
+ struct arm_smmu_cmdq *cmdq, u32 prod)
{
+ struct arm_smmu_queue *q = &cmdq->q;
+
cmd[1] = 0;
cmd[0] = FIELD_PREP(CMDQ_0_OP, CMDQ_OP_CMD_SYNC) |
FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH) |
@@ -364,7 +366,7 @@ static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
}
static void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
- struct arm_smmu_queue *q)
+ struct arm_smmu_cmdq *cmdq)
{
static const char * const cerror_str[] = {
[CMDQ_ERR_CERROR_NONE_IDX] = "No error",
@@ -372,6 +374,7 @@ static void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
[CMDQ_ERR_CERROR_ABT_IDX] = "Abort on command fetch",
[CMDQ_ERR_CERROR_ATC_INV_IDX] = "ATC invalidate timeout",
};
+ struct arm_smmu_queue *q = &cmdq->q;
int i;
u64 cmd[CMDQ_ENT_DWORDS];
@@ -410,14 +413,14 @@ static void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
/* Convert the erroneous command into a CMD_SYNC */
- arm_smmu_cmdq_build_sync_cmd(cmd, smmu, q, cons);
+ arm_smmu_cmdq_build_sync_cmd(cmd, smmu, cmdq, cons);
queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
}
static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
{
- __arm_smmu_cmdq_skip_err(smmu, &smmu->cmdq.q);
+ __arm_smmu_cmdq_skip_err(smmu, &smmu->cmdq);
}
/*
@@ -780,7 +783,7 @@ static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
if (sync) {
prod = queue_inc_prod_n(&llq, n);
- arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, &cmdq->q, prod);
+ arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, cmdq, prod);
queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
/*
--
2.43.0
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH v10 4/9] iommu/arm-smmu-v3: Pass in cmdq pointer to arm_smmu_cmdq_init
2024-07-28 22:44 [PATCH v10 0/9] Add Tegra241 (Grace) CMDQV Support (part 1/2) Nicolin Chen
` (2 preceding siblings ...)
2024-07-28 22:44 ` [PATCH v10 3/9] iommu/arm-smmu-v3: Pass in cmdq pointer to arm_smmu_cmdq_build_sync_cmd Nicolin Chen
@ 2024-07-28 22:44 ` Nicolin Chen
2024-07-28 22:44 ` [PATCH v10 5/9] iommu/arm-smmu-v3: Make symbols public for CONFIG_TEGRA241_CMDQV Nicolin Chen
` (4 subsequent siblings)
8 siblings, 0 replies; 12+ messages in thread
From: Nicolin Chen @ 2024-07-28 22:44 UTC (permalink / raw)
To: will
Cc: robin.murphy, joro, jgg, thierry.reding, vdumpa, jonathanh,
linux-kernel, iommu, linux-arm-kernel, linux-tegra
So that this function can be used by other cmdqs than &smmu->cmdq only.
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
---
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index d11362e9fc8a0..e664c40b14aee 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -3555,9 +3555,9 @@ static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
return 0;
}
-static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu)
+static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu,
+ struct arm_smmu_cmdq *cmdq)
{
- struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
atomic_set(&cmdq->owner_prod, 0);
@@ -3582,7 +3582,7 @@ static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
if (ret)
return ret;
- ret = arm_smmu_cmdq_init(smmu);
+ ret = arm_smmu_cmdq_init(smmu, &smmu->cmdq);
if (ret)
return ret;
--
2.43.0
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH v10 5/9] iommu/arm-smmu-v3: Make symbols public for CONFIG_TEGRA241_CMDQV
2024-07-28 22:44 [PATCH v10 0/9] Add Tegra241 (Grace) CMDQV Support (part 1/2) Nicolin Chen
` (3 preceding siblings ...)
2024-07-28 22:44 ` [PATCH v10 4/9] iommu/arm-smmu-v3: Pass in cmdq pointer to arm_smmu_cmdq_init Nicolin Chen
@ 2024-07-28 22:44 ` Nicolin Chen
2024-07-28 22:44 ` [PATCH v10 6/9] iommu/arm-smmu-v3: Add ARM_SMMU_OPT_SECONDARY_CMDQ_CS_NONE_ONLY Nicolin Chen
` (3 subsequent siblings)
8 siblings, 0 replies; 12+ messages in thread
From: Nicolin Chen @ 2024-07-28 22:44 UTC (permalink / raw)
To: will
Cc: robin.murphy, joro, jgg, thierry.reding, vdumpa, jonathanh,
linux-kernel, iommu, linux-arm-kernel, linux-tegra
The symbols __arm_smmu_cmdq_skip_err(), arm_smmu_init_one_queue(), and
arm_smmu_cmdq_init() need to be used by the tegra241-cmdqv compilation
unit in a following patch.
Remove the static and put prototypes in the header.
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
---
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 18 ++++++++----------
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 9 +++++++++
2 files changed, 17 insertions(+), 10 deletions(-)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index e664c40b14aee..df11490958606 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -365,8 +365,8 @@ static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
CMDQ_SYNC_1_MSIADDR_MASK;
}
-static void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
- struct arm_smmu_cmdq *cmdq)
+void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
+ struct arm_smmu_cmdq *cmdq)
{
static const char * const cerror_str[] = {
[CMDQ_ERR_CERROR_NONE_IDX] = "No error",
@@ -3512,12 +3512,10 @@ static struct iommu_dirty_ops arm_smmu_dirty_ops = {
};
/* Probing and initialisation functions */
-static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
- struct arm_smmu_queue *q,
- void __iomem *page,
- unsigned long prod_off,
- unsigned long cons_off,
- size_t dwords, const char *name)
+int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
+ struct arm_smmu_queue *q, void __iomem *page,
+ unsigned long prod_off, unsigned long cons_off,
+ size_t dwords, const char *name)
{
size_t qsz;
@@ -3555,8 +3553,8 @@ static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
return 0;
}
-static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu,
- struct arm_smmu_cmdq *cmdq)
+int arm_smmu_cmdq_init(struct arm_smmu_device *smmu,
+ struct arm_smmu_cmdq *cmdq)
{
unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index 6c5739f6b90f5..6c5dc2f10a330 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -813,6 +813,15 @@ void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid,
int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain,
unsigned long iova, size_t size);
+void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
+ struct arm_smmu_cmdq *cmdq);
+int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
+ struct arm_smmu_queue *q, void __iomem *page,
+ unsigned long prod_off, unsigned long cons_off,
+ size_t dwords, const char *name);
+int arm_smmu_cmdq_init(struct arm_smmu_device *smmu,
+ struct arm_smmu_cmdq *cmdq);
+
#ifdef CONFIG_ARM_SMMU_V3_SVA
bool arm_smmu_sva_supported(struct arm_smmu_device *smmu);
bool arm_smmu_master_sva_supported(struct arm_smmu_master *master);
--
2.43.0
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH v10 6/9] iommu/arm-smmu-v3: Add ARM_SMMU_OPT_SECONDARY_CMDQ_CS_NONE_ONLY
2024-07-28 22:44 [PATCH v10 0/9] Add Tegra241 (Grace) CMDQV Support (part 1/2) Nicolin Chen
` (4 preceding siblings ...)
2024-07-28 22:44 ` [PATCH v10 5/9] iommu/arm-smmu-v3: Make symbols public for CONFIG_TEGRA241_CMDQV Nicolin Chen
@ 2024-07-28 22:44 ` Nicolin Chen
2024-07-28 22:44 ` [PATCH v10 7/9] iommu/arm-smmu-v3: Add struct arm_smmu_impl Nicolin Chen
` (2 subsequent siblings)
8 siblings, 0 replies; 12+ messages in thread
From: Nicolin Chen @ 2024-07-28 22:44 UTC (permalink / raw)
To: will
Cc: robin.murphy, joro, jgg, thierry.reding, vdumpa, jonathanh,
linux-kernel, iommu, linux-arm-kernel, linux-tegra
The CMDQV extension in NVIDIA Tegra241 SoC only supports CS_NONE in the
CS field of CMD_SYNC. Add a new SMMU option to accommodate that.
Suggested-by: Will Deacon <will@kernel.org>
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
---
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 17 ++++++++++++++++-
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 9 +++++----
2 files changed, 21 insertions(+), 5 deletions(-)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index df11490958606..e764236a92166 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -341,6 +341,15 @@ static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu)
return &smmu->cmdq;
}
+static bool arm_smmu_cmdq_needs_busy_polling(struct arm_smmu_device *smmu,
+ struct arm_smmu_cmdq *cmdq)
+{
+ if (cmdq == &smmu->cmdq)
+ return false;
+
+ return smmu->options & ARM_SMMU_OPT_SECONDARY_CMDQ_CS_NONE_ONLY;
+}
+
static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
struct arm_smmu_cmdq *cmdq, u32 prod)
{
@@ -351,6 +360,11 @@ static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH) |
FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
+ if (arm_smmu_cmdq_needs_busy_polling(smmu, cmdq)) {
+ cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_NONE);
+ return;
+ }
+
if (!(smmu->options & ARM_SMMU_OPT_MSIPOLL)) {
cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
return;
@@ -697,7 +711,8 @@ static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
struct arm_smmu_cmdq *cmdq,
struct arm_smmu_ll_queue *llq)
{
- if (smmu->options & ARM_SMMU_OPT_MSIPOLL)
+ if (smmu->options & ARM_SMMU_OPT_MSIPOLL &&
+ !arm_smmu_cmdq_needs_busy_polling(smmu, cmdq))
return __arm_smmu_cmdq_poll_until_msi(smmu, cmdq, llq);
return __arm_smmu_cmdq_poll_until_consumed(smmu, cmdq, llq);
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index 6c5dc2f10a330..71818f5860364 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -658,10 +658,11 @@ struct arm_smmu_device {
#define ARM_SMMU_FEAT_HD (1 << 22)
u32 features;
-#define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0)
-#define ARM_SMMU_OPT_PAGE0_REGS_ONLY (1 << 1)
-#define ARM_SMMU_OPT_MSIPOLL (1 << 2)
-#define ARM_SMMU_OPT_CMDQ_FORCE_SYNC (1 << 3)
+#define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0)
+#define ARM_SMMU_OPT_PAGE0_REGS_ONLY (1 << 1)
+#define ARM_SMMU_OPT_MSIPOLL (1 << 2)
+#define ARM_SMMU_OPT_CMDQ_FORCE_SYNC (1 << 3)
+#define ARM_SMMU_OPT_SECONDARY_CMDQ_CS_NONE_ONLY (1 << 4)
u32 options;
struct arm_smmu_cmdq cmdq;
--
2.43.0
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH v10 7/9] iommu/arm-smmu-v3: Add struct arm_smmu_impl
2024-07-28 22:44 [PATCH v10 0/9] Add Tegra241 (Grace) CMDQV Support (part 1/2) Nicolin Chen
` (5 preceding siblings ...)
2024-07-28 22:44 ` [PATCH v10 6/9] iommu/arm-smmu-v3: Add ARM_SMMU_OPT_SECONDARY_CMDQ_CS_NONE_ONLY Nicolin Chen
@ 2024-07-28 22:44 ` Nicolin Chen
2024-07-28 22:44 ` [PATCH v10 8/9] iommu/arm-smmu-v3: Add in-kernel support for NVIDIA Tegra241 (Grace) CMDQV Nicolin Chen
2024-07-28 22:44 ` [PATCH v10 9/9] iommu/tegra241-cmdqv: Limit CMDs for guest owned VINTF Nicolin Chen
8 siblings, 0 replies; 12+ messages in thread
From: Nicolin Chen @ 2024-07-28 22:44 UTC (permalink / raw)
To: will
Cc: robin.murphy, joro, jgg, thierry.reding, vdumpa, jonathanh,
linux-kernel, iommu, linux-arm-kernel, linux-tegra
NVIDIA Tegra241 implemented SMMU in a slightly different way that supports
a CMDQV extension feature as a secondary CMDQ for virtualization cases.
Mimicing the arm-smmu (v2) driver, introduce a new struct arm_smmu_impl to
accommodate impl routines.
Suggested-by: Will Deacon <will@kernel.org>
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
---
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 67 +++++++++++++++++----
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 19 ++++++
2 files changed, 74 insertions(+), 12 deletions(-)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index e764236a92166..18d940c65e2ca 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -338,7 +338,12 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu)
{
- return &smmu->cmdq;
+ struct arm_smmu_cmdq *cmdq = NULL;
+
+ if (smmu->impl && smmu->impl->get_secondary_cmdq)
+ cmdq = smmu->impl->get_secondary_cmdq(smmu);
+
+ return cmdq ?: &smmu->cmdq;
}
static bool arm_smmu_cmdq_needs_busy_polling(struct arm_smmu_device *smmu,
@@ -4044,6 +4049,14 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu)
return ret;
}
+ if (smmu->impl && smmu->impl->device_reset) {
+ ret = smmu->impl->device_reset(smmu);
+ if (ret) {
+ dev_err(smmu->dev, "failed to reset impl\n");
+ return ret;
+ }
+ }
+
return 0;
}
@@ -4347,8 +4360,23 @@ static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
}
-static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
- struct arm_smmu_device *smmu)
+static struct arm_smmu_device *
+arm_smmu_impl_acpi_probe(struct arm_smmu_device *smmu,
+ struct acpi_iort_node *node)
+{
+ /*
+ * DSDT might hold some SMMU extension, so we have no option but to go
+ * through the ACPI tables unconditionally. On success, this returns a
+ * copy of smmu struct holding an impl pointer. Otherwise, an impl may
+ * choose to return an ERR_PTR as an error out, or to return the pass-
+ * in smmu pointer as a fallback to the standard SMMU.
+ */
+ return arm_smmu_impl_acpi_dsdt_probe(smmu, node);
+}
+
+static struct arm_smmu_device *
+arm_smmu_device_acpi_probe(struct platform_device *pdev,
+ struct arm_smmu_device *smmu)
{
struct acpi_iort_smmu_v3 *iort_smmu;
struct device *dev = smmu->dev;
@@ -4372,18 +4400,20 @@ static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
smmu->features |= ARM_SMMU_FEAT_HA;
}
- return 0;
+ return arm_smmu_impl_acpi_probe(smmu, node);
}
#else
-static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
- struct arm_smmu_device *smmu)
+static struct arm_smmu_device *
+arm_smmu_device_acpi_probe(struct platform_device *pdev,
+ struct arm_smmu_device *smmu)
{
- return -ENODEV;
+ return ERR_PTR(-ENODEV);
}
#endif
-static int arm_smmu_device_dt_probe(struct platform_device *pdev,
- struct arm_smmu_device *smmu)
+static struct arm_smmu_device *
+arm_smmu_device_dt_probe(struct platform_device *pdev,
+ struct arm_smmu_device *smmu)
{
struct device *dev = &pdev->dev;
u32 cells;
@@ -4401,7 +4431,7 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev,
if (of_dma_is_coherent(dev->of_node))
smmu->features |= ARM_SMMU_FEAT_COHERENCY;
- return ret;
+ return ret ? ERR_PTR(ret) : smmu;
}
static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
@@ -4453,6 +4483,14 @@ static void arm_smmu_rmr_install_bypass_ste(struct arm_smmu_device *smmu)
iort_put_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
}
+static void arm_smmu_impl_remove(void *data)
+{
+ struct arm_smmu_device *smmu = data;
+
+ if (smmu->impl && smmu->impl->device_remove)
+ smmu->impl->device_remove(smmu);
+}
+
static int arm_smmu_device_probe(struct platform_device *pdev)
{
int irq, ret;
@@ -4467,10 +4505,14 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
smmu->dev = dev;
if (dev->of_node) {
- ret = arm_smmu_device_dt_probe(pdev, smmu);
+ smmu = arm_smmu_device_dt_probe(pdev, smmu);
} else {
- ret = arm_smmu_device_acpi_probe(pdev, smmu);
+ smmu = arm_smmu_device_acpi_probe(pdev, smmu);
}
+ if (IS_ERR(smmu))
+ return PTR_ERR(smmu);
+
+ ret = devm_add_action_or_reset(dev, arm_smmu_impl_remove, smmu);
if (ret)
return ret;
@@ -4560,6 +4602,7 @@ static void arm_smmu_device_remove(struct platform_device *pdev)
{
struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
+ arm_smmu_impl_remove(smmu);
iommu_device_unregister(&smmu->iommu);
iommu_device_sysfs_remove(&smmu->iommu);
arm_smmu_device_disable(smmu);
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index 71818f5860364..38d4a84e2c821 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -14,6 +14,9 @@
#include <linux/mmzone.h>
#include <linux/sizes.h>
+struct arm_smmu_device;
+struct acpi_iort_node;
+
/* MMIO registers */
#define ARM_SMMU_IDR0 0x0
#define IDR0_ST_LVL GENMASK(28, 27)
@@ -627,9 +630,25 @@ struct arm_smmu_strtab_cfg {
u32 strtab_base_cfg;
};
+struct arm_smmu_impl {
+ int (*device_reset)(struct arm_smmu_device *smmu);
+ void (*device_remove)(struct arm_smmu_device *smmu);
+ struct arm_smmu_cmdq *(*get_secondary_cmdq)(struct arm_smmu_device *smmu);
+};
+
+static inline struct arm_smmu_device *
+arm_smmu_impl_acpi_dsdt_probe(struct arm_smmu_device *smmu,
+ struct acpi_iort_node *node)
+{
+ return smmu;
+}
+
/* An SMMUv3 instance */
struct arm_smmu_device {
struct device *dev;
+ /* An SMMUv3 implementation */
+ const struct arm_smmu_impl *impl;
+
void __iomem *base;
void __iomem *page1;
--
2.43.0
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH v10 8/9] iommu/arm-smmu-v3: Add in-kernel support for NVIDIA Tegra241 (Grace) CMDQV
2024-07-28 22:44 [PATCH v10 0/9] Add Tegra241 (Grace) CMDQV Support (part 1/2) Nicolin Chen
` (6 preceding siblings ...)
2024-07-28 22:44 ` [PATCH v10 7/9] iommu/arm-smmu-v3: Add struct arm_smmu_impl Nicolin Chen
@ 2024-07-28 22:44 ` Nicolin Chen
2024-07-29 14:01 ` kernel test robot
2024-07-28 22:44 ` [PATCH v10 9/9] iommu/tegra241-cmdqv: Limit CMDs for guest owned VINTF Nicolin Chen
8 siblings, 1 reply; 12+ messages in thread
From: Nicolin Chen @ 2024-07-28 22:44 UTC (permalink / raw)
To: will
Cc: robin.murphy, joro, jgg, thierry.reding, vdumpa, jonathanh,
linux-kernel, iommu, linux-arm-kernel, linux-tegra
From: Nate Watterson <nwatterson@nvidia.com>
NVIDIA's Tegra241 Soc has a CMDQ-Virtualization (CMDQV) hardware, extending
the standard ARM SMMU v3 IP to support multiple VCMDQs with virtualization
capabilities. In terms of command queue, they are very like a standard SMMU
CMDQ (or ECMDQs), but only support CS_NONE in the CS field of CMD_SYNC.
Add a new tegra241-cmdqv driver, and insert its structure pointer into the
existing arm_smmu_device, and then add related function calls in the SMMUv3
driver to interact with the CMDQV driver.
In the CMDQV driver, add a minimal part for the in-kernel support: reserve
VINTF0 for in-kernel use, and assign some of the VCMDQs to the VINTF0, and
select one VCMDQ based on the current CPU ID to execute supported commands.
This multi-queue design for in-kernel use gives some limited improvements:
up to 20% reduction of invalidation time was measured by a multi-threaded
DMA unmap benchmark, compared to a single queue.
The other part of the CMDQV driver will be user-space support that gives a
hypervisor running on the host OS to talk to the driver for virtualization
use cases, allowing VMs to use VCMDQs without trappings, i.e. no VM Exits.
This is designed based on IOMMUFD, and its RFC series is also under review.
It will provide a guest OS a bigger improvement: 70% to 90% reductions of
TLB invalidation time were measured by DMA unmap tests running in a guest,
compared to nested SMMU CMDQ (with trappings).
As the initial version, the CMDQV driver only supports ACPI configurations.
Signed-off-by: Nate Watterson <nwatterson@nvidia.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Co-developed-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
---
MAINTAINERS | 1 +
drivers/iommu/Kconfig | 11 +
drivers/iommu/arm/arm-smmu-v3/Makefile | 1 +
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 9 +
.../iommu/arm/arm-smmu-v3/tegra241-cmdqv.c | 867 ++++++++++++++++++
5 files changed, 889 insertions(+)
create mode 100644 drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c
diff --git a/MAINTAINERS b/MAINTAINERS
index 42decde383206..fb9ab8909618b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -22471,6 +22471,7 @@ M: Thierry Reding <thierry.reding@gmail.com>
R: Krishna Reddy <vdumpa@nvidia.com>
L: linux-tegra@vger.kernel.org
S: Supported
+F: drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c
F: drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c
F: drivers/iommu/tegra*
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index a82f10054aec8..22addaedf64df 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -424,6 +424,17 @@ config ARM_SMMU_V3_KUNIT_TEST
Enable this option to unit-test arm-smmu-v3 driver functions.
If unsure, say N.
+
+config TEGRA241_CMDQV
+ bool "NVIDIA Tegra241 CMDQ-V extension support for ARM SMMUv3"
+ depends on ACPI
+ help
+ Support for NVIDIA CMDQ-Virtualization extension for ARM SMMUv3. The
+ CMDQ-V extension is similar to v3.3 ECMDQ for multi command queues
+ support, except with virtualization capabilities.
+
+ Say Y here if your system is NVIDIA Tegra241 (Grace) or it has the same
+ CMDQ-V extension.
endif
config S390_IOMMU
diff --git a/drivers/iommu/arm/arm-smmu-v3/Makefile b/drivers/iommu/arm/arm-smmu-v3/Makefile
index 355173d1441d2..dc98c88b48c82 100644
--- a/drivers/iommu/arm/arm-smmu-v3/Makefile
+++ b/drivers/iommu/arm/arm-smmu-v3/Makefile
@@ -2,5 +2,6 @@
obj-$(CONFIG_ARM_SMMU_V3) += arm_smmu_v3.o
arm_smmu_v3-y := arm-smmu-v3.o
arm_smmu_v3-$(CONFIG_ARM_SMMU_V3_SVA) += arm-smmu-v3-sva.o
+arm_smmu_v3-$(CONFIG_TEGRA241_CMDQV) += tegra241-cmdqv.o
obj-$(CONFIG_ARM_SMMU_V3_KUNIT_TEST) += arm-smmu-v3-test.o
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index 38d4a84e2c821..367f5e160af4e 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -636,10 +636,19 @@ struct arm_smmu_impl {
struct arm_smmu_cmdq *(*get_secondary_cmdq)(struct arm_smmu_device *smmu);
};
+#ifdef CONFIG_TEGRA241_CMDQV
+struct arm_smmu_device *
+tegra241_cmdqv_acpi_dsdt_probe(struct arm_smmu_device *smmu,
+ struct acpi_iort_node *node);
+#endif
+
static inline struct arm_smmu_device *
arm_smmu_impl_acpi_dsdt_probe(struct arm_smmu_device *smmu,
struct acpi_iort_node *node)
{
+#ifdef CONFIG_TEGRA241_CMDQV
+ smmu = tegra241_cmdqv_acpi_dsdt_probe(smmu, node);
+#endif
return smmu;
}
diff --git a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c
new file mode 100644
index 0000000000000..891302fb20901
--- /dev/null
+++ b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c
@@ -0,0 +1,867 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2021-2024 NVIDIA CORPORATION & AFFILIATES. */
+
+#define dev_fmt(fmt) "tegra241_cmdqv: " fmt
+
+#include <linux/acpi.h>
+#include <linux/debugfs.h>
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/iommu.h>
+#include <linux/iopoll.h>
+
+#include <acpi/acpixf.h>
+
+#include "arm-smmu-v3.h"
+
+#define TEGRA241_CMDQV_HID "NVDA200C"
+
+/* CMDQV register page base and size defines */
+#define TEGRA241_CMDQV_CONFIG_BASE (0)
+#define TEGRA241_CMDQV_CONFIG_SIZE (SZ_64K)
+#define TEGRA241_VCMDQ_PAGE0_BASE (TEGRA241_CMDQV_CONFIG_BASE + SZ_64K)
+#define TEGRA241_VCMDQ_PAGE1_BASE (TEGRA241_VCMDQ_PAGE0_BASE + SZ_64K)
+#define TEGRA241_VINTF_PAGE_BASE (TEGRA241_VCMDQ_PAGE1_BASE + SZ_64K)
+
+/* CMDQV global base regs */
+#define TEGRA241_CMDQV_CONFIG 0x0000
+#define CMDQV_EN BIT(0)
+
+#define TEGRA241_CMDQV_PARAM 0x0004
+#define CMDQV_NUM_VINTF_LOG2 GENMASK(11, 8)
+#define CMDQV_NUM_VCMDQ_LOG2 GENMASK(7, 4)
+
+#define TEGRA241_CMDQV_STATUS 0x0008
+#define CMDQV_ENABLED BIT(0)
+
+#define TEGRA241_CMDQV_VINTF_ERR_MAP 0x0014
+#define TEGRA241_CMDQV_VINTF_INT_MASK 0x001C
+#define TEGRA241_CMDQV_CMDQ_ERR_MAP(m) (0x0024 + 0x4*(m))
+
+#define TEGRA241_CMDQV_CMDQ_ALLOC(q) (0x0200 + 0x4*(q))
+#define CMDQV_CMDQ_ALLOC_VINTF GENMASK(20, 15)
+#define CMDQV_CMDQ_ALLOC_LVCMDQ GENMASK(7, 1)
+#define CMDQV_CMDQ_ALLOCATED BIT(0)
+
+/* VINTF base regs */
+#define TEGRA241_VINTF(v) (0x1000 + 0x100*(v))
+
+#define TEGRA241_VINTF_CONFIG 0x0000
+#define VINTF_HYP_OWN BIT(17)
+#define VINTF_VMID GENMASK(16, 1)
+#define VINTF_EN BIT(0)
+
+#define TEGRA241_VINTF_STATUS 0x0004
+#define VINTF_STATUS GENMASK(3, 1)
+#define VINTF_ENABLED BIT(0)
+
+#define TEGRA241_VINTF_LVCMDQ_ERR_MAP_64(m) \
+ (0x00C0 + 0x8*(m))
+#define LVCMDQ_ERR_MAP_NUM_64 2
+
+/* VCMDQ base regs */
+/* -- PAGE0 -- */
+#define TEGRA241_VCMDQ_PAGE0(q) (TEGRA241_VCMDQ_PAGE0_BASE + 0x80*(q))
+
+#define TEGRA241_VCMDQ_CONS 0x00000
+#define VCMDQ_CONS_ERR GENMASK(30, 24)
+
+#define TEGRA241_VCMDQ_PROD 0x00004
+
+#define TEGRA241_VCMDQ_CONFIG 0x00008
+#define VCMDQ_EN BIT(0)
+
+#define TEGRA241_VCMDQ_STATUS 0x0000C
+#define VCMDQ_ENABLED BIT(0)
+
+#define TEGRA241_VCMDQ_GERROR 0x00010
+#define TEGRA241_VCMDQ_GERRORN 0x00014
+
+/* -- PAGE1 -- */
+#define TEGRA241_VCMDQ_PAGE1(q) (TEGRA241_VCMDQ_PAGE1_BASE + 0x80*(q))
+#define VCMDQ_ADDR GENMASK(47, 5)
+#define VCMDQ_LOG2SIZE GENMASK(4, 0)
+
+#define TEGRA241_VCMDQ_BASE 0x00000
+#define TEGRA241_VCMDQ_CONS_INDX_BASE 0x00008
+
+/* VINTF logical-VCMDQ pages */
+#define TEGRA241_VINTFi_PAGE0(i) (TEGRA241_VINTF_PAGE_BASE + SZ_128K*(i))
+#define TEGRA241_VINTFi_PAGE1(i) (TEGRA241_VINTFi_PAGE0(i) + SZ_64K)
+#define TEGRA241_VINTFi_LVCMDQ_PAGE0(i, q) \
+ (TEGRA241_VINTFi_PAGE0(i) + 0x80*(q))
+#define TEGRA241_VINTFi_LVCMDQ_PAGE1(i, q) \
+ (TEGRA241_VINTFi_PAGE1(i) + 0x80*(q))
+
+/* MMIO helpers */
+#define REG_CMDQV(_cmdqv, _regname) \
+ ((_cmdqv)->base + TEGRA241_CMDQV_##_regname)
+#define REG_VINTF(_vintf, _regname) \
+ ((_vintf)->base + TEGRA241_VINTF_##_regname)
+#define REG_VCMDQ_PAGE0(_vcmdq, _regname) \
+ ((_vcmdq)->page0 + TEGRA241_VCMDQ_##_regname)
+#define REG_VCMDQ_PAGE1(_vcmdq, _regname) \
+ ((_vcmdq)->page1 + TEGRA241_VCMDQ_##_regname)
+
+
+static bool disable_cmdqv;
+module_param(disable_cmdqv, bool, 0444);
+MODULE_PARM_DESC(disable_cmdqv,
+ "This allows to disable CMDQV HW and use default SMMU internal CMDQ.");
+
+static bool bypass_vcmdq;
+module_param(bypass_vcmdq, bool, 0444);
+MODULE_PARM_DESC(bypass_vcmdq,
+ "This allows to bypass VCMDQ for debugging use or perf comparison.");
+
+/**
+ * struct tegra241_vcmdq - Virtual Command Queue
+ * @idx: Global index in the CMDQV
+ * @lidx: Local index in the VINTF
+ * @enabled: Enable status
+ * @cmdqv: Parent CMDQV pointer
+ * @vintf: Parent VINTF pointer
+ * @cmdq: Command Queue struct
+ * @page0: MMIO Page0 base address
+ * @page1: MMIO Page1 base address
+ */
+struct tegra241_vcmdq {
+ u16 idx;
+ u16 lidx;
+
+ bool enabled;
+
+ struct tegra241_cmdqv *cmdqv;
+ struct tegra241_vintf *vintf;
+ struct arm_smmu_cmdq cmdq;
+
+ void __iomem *page0;
+ void __iomem *page1;
+};
+
+/**
+ * struct tegra241_vintf - Virtual Interface
+ * @idx: Global index in the CMDQV
+ * @enabled: Enable status
+ * @cmdqv: Parent CMDQV pointer
+ * @lvcmdqs: List of logical VCMDQ pointers
+ * @base: MMIO base address
+ */
+struct tegra241_vintf {
+ u16 idx;
+
+ bool enabled;
+
+ struct tegra241_cmdqv *cmdqv;
+ struct tegra241_vcmdq **lvcmdqs;
+
+ void __iomem *base;
+};
+
+/**
+ * struct tegra241_cmdqv - CMDQ-V for SMMUv3
+ * @smmu: SMMUv3 device
+ * @base: MMIO base address
+ * @irq: IRQ number
+ * @num_vintfs: Total number of VINTFs
+ * @num_vcmdqs: Total number of VCMDQs
+ * @num_lvcmdqs_per_vintf: Number of logical VCMDQs per VINTF
+ * @vintf_ids: VINTF id allocator
+ * @vtinfs: List of VINTFs
+ */
+struct tegra241_cmdqv {
+ struct arm_smmu_device smmu;
+
+ void __iomem *base;
+ int irq;
+
+ /* CMDQV Hardware Params */
+ u16 num_vintfs;
+ u16 num_vcmdqs;
+ u16 num_lvcmdqs_per_vintf;
+
+ struct ida vintf_ids;
+
+ struct tegra241_vintf **vintfs;
+};
+
+/* Config and Polling Helpers */
+
+static inline int tegra241_cmdqv_write_config(struct tegra241_cmdqv *cmdqv,
+ void __iomem *addr_config,
+ void __iomem *addr_status,
+ u32 regval, const char *header,
+ bool *out_enabled)
+{
+ bool en = regval & BIT(0);
+ int ret;
+
+ writel(regval, addr_config);
+ ret = readl_poll_timeout(addr_status, regval,
+ en ? regval & BIT(0) : !(regval & BIT(0)),
+ 1, ARM_SMMU_POLL_TIMEOUT_US);
+ if (ret)
+ dev_err(cmdqv->smmu.dev, "%sfailed to %sable, STATUS=0x%08X\n",
+ header, en ? "en" : "dis", regval);
+ if (out_enabled)
+ WRITE_ONCE(*out_enabled, regval & BIT(0));
+ return ret;
+}
+
+static inline int cmdqv_write_config(struct tegra241_cmdqv *cmdqv, u32 regval)
+{
+ return tegra241_cmdqv_write_config(cmdqv,
+ REG_CMDQV(cmdqv, CONFIG),
+ REG_CMDQV(cmdqv, STATUS),
+ regval, "CMDQV: ", NULL);
+}
+
+static inline int vintf_write_config(struct tegra241_vintf *vintf, u32 regval)
+{
+ char header[16];
+
+ snprintf(header, 16, "VINTF%u: ", vintf->idx);
+ return tegra241_cmdqv_write_config(vintf->cmdqv,
+ REG_VINTF(vintf, CONFIG),
+ REG_VINTF(vintf, STATUS),
+ regval, header, &vintf->enabled);
+}
+
+static inline char *lvcmdq_error_header(struct tegra241_vcmdq *vcmdq,
+ char *header, int hlen)
+{
+ WARN_ON(hlen < 32);
+ if (WARN_ON(!vcmdq->vintf))
+ return "";
+ snprintf(header, hlen, "VINTF%u: VCMDQ%u/LVCMDQ%u: ",
+ vcmdq->vintf->idx, vcmdq->idx, vcmdq->lidx);
+ return header;
+}
+
+static inline int vcmdq_write_config(struct tegra241_vcmdq *vcmdq, u32 regval)
+{
+ char header[32], *h = lvcmdq_error_header(vcmdq, header, 32);
+
+ return tegra241_cmdqv_write_config(vcmdq->cmdqv,
+ REG_VCMDQ_PAGE0(vcmdq, CONFIG),
+ REG_VCMDQ_PAGE0(vcmdq, STATUS),
+ regval, h, &vcmdq->enabled);
+}
+
+/* ISR Functions */
+
+static void tegra241_vintf0_handle_error(struct tegra241_vintf *vintf)
+{
+ int i;
+
+ for (i = 0; i < LVCMDQ_ERR_MAP_NUM_64; i++) {
+ u64 map = readq_relaxed(REG_VINTF(vintf, LVCMDQ_ERR_MAP_64(i)));
+
+ while (map) {
+ unsigned long lidx = __ffs64(map) - 1;
+ struct tegra241_vcmdq *vcmdq = vintf->lvcmdqs[lidx];
+ u32 gerror = readl_relaxed(REG_VCMDQ_PAGE0(vcmdq, GERROR));
+
+ __arm_smmu_cmdq_skip_err(&vintf->cmdqv->smmu, &vcmdq->cmdq);
+ writel(gerror, REG_VCMDQ_PAGE0(vcmdq, GERRORN));
+ map &= ~BIT_ULL(lidx);
+ }
+ }
+}
+
+static irqreturn_t tegra241_cmdqv_isr(int irq, void *devid)
+{
+ struct tegra241_cmdqv *cmdqv = (struct tegra241_cmdqv *)devid;
+ void __iomem *reg_vintf_map = REG_CMDQV(cmdqv, VINTF_ERR_MAP);
+ char err_str[256];
+ u64 vintf_map;
+
+ /* Use readl_relaxed() as register addresses are not 64-bit aligned */
+ vintf_map = (u64)readl_relaxed(reg_vintf_map + 0x4) << 32 |
+ (u64)readl_relaxed(reg_vintf_map);
+
+ snprintf(err_str, sizeof(err_str),
+ "vintf_map: %016llx, vcmdq_map %08x:%08x:%08x:%08x", vintf_map,
+ readl_relaxed(REG_CMDQV(cmdqv, CMDQ_ERR_MAP(3))),
+ readl_relaxed(REG_CMDQV(cmdqv, CMDQ_ERR_MAP(2))),
+ readl_relaxed(REG_CMDQV(cmdqv, CMDQ_ERR_MAP(1))),
+ readl_relaxed(REG_CMDQV(cmdqv, CMDQ_ERR_MAP(0))));
+
+ dev_warn(cmdqv->smmu.dev, "unexpected error reported. %s\n", err_str);
+
+ /* Handle VINTF0 and its LVCMDQs */
+ if (vintf_map & BIT_ULL(0)) {
+ tegra241_vintf0_handle_error(cmdqv->vintfs[0]);
+ vintf_map &= ~BIT_ULL(0);
+ }
+
+ return IRQ_HANDLED;
+}
+
+/* Command Queue Function */
+
+static struct arm_smmu_cmdq *
+tegra241_cmdqv_get_cmdq(struct arm_smmu_device *smmu)
+{
+ struct tegra241_cmdqv *cmdqv =
+ container_of(smmu, struct tegra241_cmdqv, smmu);
+ struct tegra241_vintf *vintf = cmdqv->vintfs[0];
+ struct tegra241_vcmdq *vcmdq;
+ u16 lidx;
+
+ if (READ_ONCE(bypass_vcmdq))
+ return NULL;
+
+ /* Use SMMU CMDQ if VINTF0 is uninitialized */
+ if (!READ_ONCE(vintf->enabled))
+ return NULL;
+
+ /*
+ * Select a LVCMDQ to use. Here we use a temporal solution to
+ * balance out traffic on cmdq issuing: each cmdq has its own
+ * lock, if all cpus issue cmdlist using the same cmdq, only
+ * one CPU at a time can enter the process, while the others
+ * will be spinning at the same lock.
+ */
+ lidx = smp_processor_id() % cmdqv->num_lvcmdqs_per_vintf;
+ vcmdq = vintf->lvcmdqs[lidx];
+ if (!vcmdq || !READ_ONCE(vcmdq->enabled))
+ return NULL;
+ return &vcmdq->cmdq;
+}
+
+/* HW Reset Functions */
+
+static void tegra241_vcmdq_hw_deinit(struct tegra241_vcmdq *vcmdq)
+{
+ char header[32], *h = lvcmdq_error_header(vcmdq, header, 32);
+ u32 gerrorn, gerror;
+
+ if (vcmdq_write_config(vcmdq, 0)) {
+ dev_err(vcmdq->cmdqv->smmu.dev,
+ "%sGERRORN=0x%X, GERROR=0x%X, CONS=0x%X\n", h,
+ readl_relaxed(REG_VCMDQ_PAGE0(vcmdq, GERRORN)),
+ readl_relaxed(REG_VCMDQ_PAGE0(vcmdq, GERROR)),
+ readl_relaxed(REG_VCMDQ_PAGE0(vcmdq, CONS)));
+ }
+ writel_relaxed(0, REG_VCMDQ_PAGE0(vcmdq, PROD));
+ writel_relaxed(0, REG_VCMDQ_PAGE0(vcmdq, CONS));
+ writeq_relaxed(0, REG_VCMDQ_PAGE1(vcmdq, BASE));
+ writeq_relaxed(0, REG_VCMDQ_PAGE1(vcmdq, CONS_INDX_BASE));
+
+ gerrorn = readl_relaxed(REG_VCMDQ_PAGE0(vcmdq, GERRORN));
+ gerror = readl_relaxed(REG_VCMDQ_PAGE0(vcmdq, GERROR));
+ if (gerror != gerrorn) {
+ dev_warn(vcmdq->cmdqv->smmu.dev,
+ "%suncleared error detected, resetting\n", h);
+ writel(gerror, REG_VCMDQ_PAGE0(vcmdq, GERRORN));
+ }
+
+ dev_dbg(vcmdq->cmdqv->smmu.dev, "%sdeinited\n", h);
+}
+
+static int tegra241_vcmdq_hw_init(struct tegra241_vcmdq *vcmdq)
+{
+ char header[32], *h = lvcmdq_error_header(vcmdq, header, 32);
+ int ret;
+
+ /* Reset VCMDQ */
+ tegra241_vcmdq_hw_deinit(vcmdq);
+
+ /* Configure and enable VCMDQ */
+ writeq_relaxed(vcmdq->cmdq.q.q_base, REG_VCMDQ_PAGE1(vcmdq, BASE));
+
+ ret = vcmdq_write_config(vcmdq, VCMDQ_EN);
+ if (ret) {
+ dev_err(vcmdq->cmdqv->smmu.dev,
+ "%sGERRORN=0x%X, GERROR=0x%X, CONS=0x%X\n", h,
+ readl_relaxed(REG_VCMDQ_PAGE0(vcmdq, GERRORN)),
+ readl_relaxed(REG_VCMDQ_PAGE0(vcmdq, GERROR)),
+ readl_relaxed(REG_VCMDQ_PAGE0(vcmdq, CONS)));
+ return ret;
+ }
+
+ dev_dbg(vcmdq->cmdqv->smmu.dev, "%sinited\n", h);
+ return 0;
+}
+
+static void tegra241_vintf_hw_deinit(struct tegra241_vintf *vintf)
+{
+ u16 lidx;
+
+ for (lidx = 0; lidx < vintf->cmdqv->num_lvcmdqs_per_vintf; lidx++)
+ if (vintf->lvcmdqs && vintf->lvcmdqs[lidx])
+ tegra241_vcmdq_hw_deinit(vintf->lvcmdqs[lidx]);
+ vintf_write_config(vintf, 0);
+}
+
+static int tegra241_vintf_hw_init(struct tegra241_vintf *vintf, bool hyp_own)
+{
+ u32 regval;
+ u16 lidx;
+ int ret;
+
+ /* Reset VINTF */
+ tegra241_vintf_hw_deinit(vintf);
+
+ /* Configure and enable VINTF */
+ regval = FIELD_PREP(VINTF_HYP_OWN, hyp_own);
+ writel(regval, REG_VINTF(vintf, CONFIG));
+
+ ret = vintf_write_config(vintf, regval | VINTF_EN);
+ if (ret)
+ return ret;
+
+ for (lidx = 0; lidx < vintf->cmdqv->num_lvcmdqs_per_vintf; lidx++) {
+ if (vintf->lvcmdqs && vintf->lvcmdqs[lidx]) {
+ ret = tegra241_vcmdq_hw_init(vintf->lvcmdqs[lidx]);
+ if (ret) {
+ tegra241_vintf_hw_deinit(vintf);
+ return ret;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int tegra241_cmdqv_hw_reset(struct arm_smmu_device *smmu)
+{
+ struct tegra241_cmdqv *cmdqv =
+ container_of(smmu, struct tegra241_cmdqv, smmu);
+ u16 qidx, lidx, idx;
+ u32 regval;
+ int ret;
+
+ /* Reset CMDQV */
+ regval = readl_relaxed(REG_CMDQV(cmdqv, CONFIG));
+ ret = cmdqv_write_config(cmdqv, regval & ~CMDQV_EN);
+ if (ret)
+ return ret;
+ ret = cmdqv_write_config(cmdqv, regval | CMDQV_EN);
+ if (ret)
+ return ret;
+
+ /* Assign preallocated global VCMDQs to each VINTF as LVCMDQs */
+ for (idx = 0, qidx = 0; idx < cmdqv->num_vintfs; idx++) {
+ for (lidx = 0; lidx < cmdqv->num_lvcmdqs_per_vintf; lidx++) {
+ regval = FIELD_PREP(CMDQV_CMDQ_ALLOC_VINTF, idx);
+ regval |= FIELD_PREP(CMDQV_CMDQ_ALLOC_LVCMDQ, lidx);
+ regval |= CMDQV_CMDQ_ALLOCATED;
+ writel_relaxed(regval,
+ REG_CMDQV(cmdqv, CMDQ_ALLOC(qidx++)));
+ }
+ }
+
+ return tegra241_vintf_hw_init(cmdqv->vintfs[0], true);
+}
+
+/* VCMDQ Resource Helpers */
+
+static void tegra241_vcmdq_free_smmu_cmdq(struct tegra241_vcmdq *vcmdq)
+{
+ struct arm_smmu_queue *q = &vcmdq->cmdq.q;
+ size_t nents = 1 << q->llq.max_n_shift;
+ size_t qsz = nents << CMDQ_ENT_SZ_SHIFT;
+
+ if (!q->base)
+ return;
+ dmam_free_coherent(vcmdq->cmdqv->smmu.dev, qsz, q->base, q->base_dma);
+}
+
+static int tegra241_vcmdq_alloc_smmu_cmdq(struct tegra241_vcmdq *vcmdq)
+{
+ struct arm_smmu_device *smmu = &vcmdq->cmdqv->smmu;
+ struct arm_smmu_cmdq *cmdq = &vcmdq->cmdq;
+ struct arm_smmu_queue *q = &cmdq->q;
+ char name[16];
+ int ret;
+
+ snprintf(name, 16, "vcmdq%u", vcmdq->idx);
+
+ q->llq.max_n_shift = ilog2(SZ_64K >> CMDQ_ENT_SZ_SHIFT);
+
+ /* Use the common helper to init the VCMDQ, and then... */
+ ret = arm_smmu_init_one_queue(smmu, q, vcmdq->page0,
+ TEGRA241_VCMDQ_PROD, TEGRA241_VCMDQ_CONS,
+ CMDQ_ENT_DWORDS, name);
+ if (ret)
+ return ret;
+
+ /* ...override q_base to write VCMDQ_BASE registers */
+ q->q_base = q->base_dma & VCMDQ_ADDR;
+ q->q_base |= FIELD_PREP(VCMDQ_LOG2SIZE, q->llq.max_n_shift);
+
+ return arm_smmu_cmdq_init(smmu, cmdq);
+}
+
+/* VINTF Logical VCMDQ Resource Helpers */
+
+static void tegra241_vintf_deinit_lvcmdq(struct tegra241_vintf *vintf, u16 lidx)
+{
+ vintf->lvcmdqs[lidx] = NULL;
+}
+
+static int tegra241_vintf_init_lvcmdq(struct tegra241_vintf *vintf, u16 lidx,
+ struct tegra241_vcmdq *vcmdq)
+{
+ struct tegra241_cmdqv *cmdqv = vintf->cmdqv;
+ u16 idx = vintf->idx;
+
+ vcmdq->idx = idx * cmdqv->num_lvcmdqs_per_vintf + lidx;
+ vcmdq->lidx = lidx;
+ vcmdq->cmdqv = cmdqv;
+ vcmdq->vintf = vintf;
+ vcmdq->page0 = cmdqv->base + TEGRA241_VINTFi_LVCMDQ_PAGE0(idx, lidx);
+ vcmdq->page1 = cmdqv->base + TEGRA241_VINTFi_LVCMDQ_PAGE1(idx, lidx);
+
+ vintf->lvcmdqs[lidx] = vcmdq;
+ return 0;
+}
+
+static void tegra241_vintf_free_lvcmdq(struct tegra241_vintf *vintf, u16 lidx)
+{
+ struct tegra241_vcmdq *vcmdq = vintf->lvcmdqs[lidx];
+ char header[32];
+
+ tegra241_vcmdq_free_smmu_cmdq(vcmdq);
+ tegra241_vintf_deinit_lvcmdq(vintf, lidx);
+
+ dev_dbg(vintf->cmdqv->smmu.dev,
+ "%sdeallocated\n", lvcmdq_error_header(vcmdq, header, 32));
+ kfree(vcmdq);
+}
+
+static struct tegra241_vcmdq *
+tegra241_vintf_alloc_lvcmdq(struct tegra241_vintf *vintf, u16 lidx)
+{
+ struct tegra241_cmdqv *cmdqv = vintf->cmdqv;
+ struct tegra241_vcmdq *vcmdq;
+ char header[32];
+ int ret;
+
+ vcmdq = kzalloc(sizeof(*vcmdq), GFP_KERNEL);
+ if (!vcmdq)
+ return ERR_PTR(-ENOMEM);
+
+ ret = tegra241_vintf_init_lvcmdq(vintf, lidx, vcmdq);
+ if (ret)
+ goto free_vcmdq;
+
+ /* Build an arm_smmu_cmdq for each LVCMDQ */
+ ret = tegra241_vcmdq_alloc_smmu_cmdq(vcmdq);
+ if (ret)
+ goto deinit_lvcmdq;
+
+ dev_dbg(cmdqv->smmu.dev,
+ "%sallocated\n", lvcmdq_error_header(vcmdq, header, 32));
+ return vcmdq;
+
+deinit_lvcmdq:
+ tegra241_vintf_deinit_lvcmdq(vintf, lidx);
+free_vcmdq:
+ kfree(vcmdq);
+ return ERR_PTR(ret);
+}
+
+/* VINTF Resource Helpers */
+
+static void tegra241_cmdqv_deinit_vintf(struct tegra241_cmdqv *cmdqv, u16 idx)
+{
+ kfree(cmdqv->vintfs[idx]->lvcmdqs);
+ ida_free(&cmdqv->vintf_ids, idx);
+ cmdqv->vintfs[idx] = NULL;
+}
+
+static int tegra241_cmdqv_init_vintf(struct tegra241_cmdqv *cmdqv, u16 max_idx,
+ struct tegra241_vintf *vintf)
+{
+
+ u16 idx;
+ int ret;
+
+ ret = ida_alloc_max(&cmdqv->vintf_ids, max_idx, GFP_KERNEL);
+ if (ret < 0)
+ return ret;
+ idx = ret;
+
+ vintf->idx = idx;
+ vintf->cmdqv = cmdqv;
+ vintf->base = cmdqv->base + TEGRA241_VINTF(idx);
+
+ vintf->lvcmdqs = kcalloc(cmdqv->num_lvcmdqs_per_vintf,
+ sizeof(*vintf->lvcmdqs), GFP_KERNEL);
+ if (!vintf->lvcmdqs) {
+ ida_free(&cmdqv->vintf_ids, idx);
+ return -ENOMEM;
+ }
+
+ cmdqv->vintfs[idx] = vintf;
+ return ret;
+}
+
+/* Remove Helpers */
+
+static void tegra241_vintf_remove_lvcmdq(struct tegra241_vintf *vintf, u16 lidx)
+{
+ tegra241_vcmdq_hw_deinit(vintf->lvcmdqs[lidx]);
+ tegra241_vintf_free_lvcmdq(vintf, lidx);
+}
+
+static void tegra241_cmdqv_remove_vintf(struct tegra241_cmdqv *cmdqv, u16 idx)
+{
+ struct tegra241_vintf *vintf = cmdqv->vintfs[idx];
+ u16 lidx;
+
+ /* Remove LVCMDQ resources */
+ for (lidx = 0; lidx < vintf->cmdqv->num_lvcmdqs_per_vintf; lidx++)
+ if (vintf->lvcmdqs[lidx])
+ tegra241_vintf_remove_lvcmdq(vintf, lidx);
+
+ /* Remove VINTF resources */
+ tegra241_vintf_hw_deinit(vintf);
+
+ dev_dbg(cmdqv->smmu.dev, "VINTF%u: deallocated\n", vintf->idx);
+ tegra241_cmdqv_deinit_vintf(cmdqv, idx);
+ kfree(vintf);
+}
+
+static void tegra241_cmdqv_remove(struct arm_smmu_device *smmu)
+{
+ struct tegra241_cmdqv *cmdqv =
+ container_of(smmu, struct tegra241_cmdqv, smmu);
+ u16 idx;
+
+ /* Remove VINTF resources */
+ for (idx = 0; idx < cmdqv->num_vintfs; idx++) {
+ if (cmdqv->vintfs[idx]) {
+ /* Only vintf0 should remain at this stage */
+ WARN_ON(idx > 0);
+ tegra241_cmdqv_remove_vintf(cmdqv, idx);
+ }
+ }
+
+ /* Remove cmdqv resources */
+ ida_destroy(&cmdqv->vintf_ids);
+
+ if (cmdqv->irq > 0)
+ free_irq(cmdqv->irq, cmdqv);
+ iounmap(cmdqv->base);
+ kfree(cmdqv->vintfs);
+}
+
+static struct arm_smmu_impl tegra241_cmdqv_impl = {
+ .get_secondary_cmdq = tegra241_cmdqv_get_cmdq,
+ .device_reset = tegra241_cmdqv_hw_reset,
+ .device_remove = tegra241_cmdqv_remove,
+};
+
+/* Probe Functions */
+
+static int tegra241_cmdqv_acpi_is_memory(struct acpi_resource *res, void *data)
+{
+ struct resource_win win;
+
+ return !acpi_dev_resource_address_space(res, &win);
+}
+
+static int tegra241_cmdqv_acpi_get_irqs(struct acpi_resource *ares, void *data)
+{
+ struct resource r;
+ int *irq = data;
+
+ if (*irq <= 0 && acpi_dev_resource_interrupt(ares, 0, &r))
+ *irq = r.start;
+ return 1; /* No need to add resource to the list */
+}
+
+static struct resource *
+tegra241_cmdqv_find_acpi_resource(struct arm_smmu_device *smmu,
+ struct acpi_iort_node *node, int *irq)
+{
+ struct device *dev = smmu->dev;
+ struct list_head resource_list;
+ struct resource_entry *rentry;
+ struct resource *res = NULL;
+ struct acpi_device *adev;
+ const char *match_uid;
+ int ret;
+
+ if (acpi_disabled)
+ return NULL;
+
+ /* Look for a device in the DSDT whose _UID matches the SMMU node ID */
+ match_uid = kasprintf(GFP_KERNEL, "%u", node->identifier);
+ adev = acpi_dev_get_first_match_dev(TEGRA241_CMDQV_HID, match_uid, -1);
+ kfree(match_uid);
+
+ if (!adev)
+ return NULL;
+
+ dev_info(dev, "found companion CMDQV device, %s\n",
+ dev_name(&adev->dev));
+
+ INIT_LIST_HEAD(&resource_list);
+ ret = acpi_dev_get_resources(adev, &resource_list,
+ tegra241_cmdqv_acpi_is_memory, NULL);
+ if (ret < 0) {
+ dev_err(dev, "failed to get memory resource: %d\n", ret);
+ goto put_dev;
+ }
+
+ rentry = list_first_entry_or_null(&resource_list,
+ struct resource_entry, node);
+ if (!rentry) {
+ dev_err(dev, "failed to get memory resource entry\n");
+ goto free_list;
+ }
+
+ /* Caller must free the res */
+ res = kzalloc(sizeof(*res), GFP_KERNEL);
+ if (!res)
+ goto free_list;
+
+ *res = *rentry->res;
+
+ acpi_dev_free_resource_list(&resource_list);
+
+ INIT_LIST_HEAD(&resource_list);
+
+ if (irq)
+ ret = acpi_dev_get_resources(adev, &resource_list,
+ tegra241_cmdqv_acpi_get_irqs, irq);
+ if (ret < 0 || !irq || *irq <= 0)
+ dev_warn(dev, "no interrupt. errors will not be reported\n");
+
+free_list:
+ acpi_dev_free_resource_list(&resource_list);
+put_dev:
+ put_device(&adev->dev);
+
+ return res;
+}
+
+struct dentry *cmdqv_debugfs_dir;
+
+static struct arm_smmu_device *
+tegra241_cmdqv_probe(struct arm_smmu_device *smmu,
+ struct resource *res, int irq)
+{
+ struct tegra241_cmdqv *cmdqv = NULL;
+ struct tegra241_vintf *vintf;
+ void __iomem *base;
+ u32 regval;
+ int lidx;
+ int ret;
+
+ base = ioremap(res->start, resource_size(res));
+ if (IS_ERR(base)) {
+ dev_err(smmu->dev, "failed to ioremap: %ld\n", PTR_ERR(base));
+ goto iounmap;
+ }
+
+ regval = readl(base + TEGRA241_CMDQV_CONFIG);
+ if (disable_cmdqv) {
+ dev_info(smmu->dev, "Detected disable_cmdqv=true\n");
+ writel(regval & ~CMDQV_EN, base + TEGRA241_CMDQV_CONFIG);
+ goto iounmap;
+ }
+
+ cmdqv = devm_krealloc(smmu->dev, smmu, sizeof(*cmdqv), GFP_KERNEL);
+ if (!cmdqv)
+ goto iounmap;
+ smmu = &cmdqv->smmu;
+
+ cmdqv->irq = irq;
+ cmdqv->base = base;
+
+ if (cmdqv->irq > 0) {
+ ret = request_irq(irq, tegra241_cmdqv_isr, 0,
+ "tegra241-cmdqv", cmdqv);
+ if (ret) {
+ dev_err(smmu->dev, "failed to request irq (%d): %d\n",
+ cmdqv->irq, ret);
+ goto iounmap;
+ }
+ }
+
+ regval = readl_relaxed(REG_CMDQV(cmdqv, PARAM));
+ cmdqv->num_vintfs = 1 << FIELD_GET(CMDQV_NUM_VINTF_LOG2, regval);
+ cmdqv->num_vcmdqs = 1 << FIELD_GET(CMDQV_NUM_VCMDQ_LOG2, regval);
+ cmdqv->num_lvcmdqs_per_vintf = cmdqv->num_vcmdqs / cmdqv->num_vintfs;
+
+ cmdqv->vintfs = kcalloc(cmdqv->num_vintfs,
+ sizeof(*cmdqv->vintfs), GFP_KERNEL);
+ if (!cmdqv->vintfs)
+ goto free_irq;
+
+ ida_init(&cmdqv->vintf_ids);
+
+ vintf = kzalloc(sizeof(*vintf), GFP_KERNEL);
+ if (!vintf)
+ goto destroy_ids;
+
+ /* Init VINTF0 for in-kernel use */
+ ret = tegra241_cmdqv_init_vintf(cmdqv, 0, vintf);
+ if (ret) {
+ dev_err(smmu->dev, "failed to init vintf0: %d\n", ret);
+ goto free_vintf;
+ }
+
+ /* Preallocate logical VCMDQs to VINTF0 */
+ for (lidx = 0; lidx < cmdqv->num_lvcmdqs_per_vintf; lidx++) {
+ struct tegra241_vcmdq *vcmdq;
+
+ vcmdq = tegra241_vintf_alloc_lvcmdq(vintf, lidx);
+ if (IS_ERR(vcmdq))
+ goto free_lvcmdq;
+ }
+
+#ifdef CONFIG_IOMMU_DEBUGFS
+ if (!cmdqv_debugfs_dir) {
+ cmdqv_debugfs_dir =
+ debugfs_create_dir("tegra241_cmdqv", iommu_debugfs_dir);
+ debugfs_create_bool("bypass_vcmdq", 0644,
+ cmdqv_debugfs_dir, &bypass_vcmdq);
+ }
+#endif
+
+ cmdqv->smmu.impl = &tegra241_cmdqv_impl;
+ cmdqv->smmu.options |= ARM_SMMU_OPT_SECONDARY_CMDQ_CS_NONE_ONLY;
+
+ return &cmdqv->smmu;
+
+free_lvcmdq:
+ for (lidx--; lidx >= 0; lidx--)
+ tegra241_vintf_free_lvcmdq(vintf, lidx);
+ tegra241_cmdqv_deinit_vintf(cmdqv, vintf->idx);
+free_vintf:
+ kfree(vintf);
+destroy_ids:
+ ida_destroy(&cmdqv->vintf_ids);
+ kfree(cmdqv->vintfs);
+free_irq:
+ if (cmdqv->irq > 0)
+ free_irq(cmdqv->irq, cmdqv);
+iounmap:
+ iounmap(base);
+ dev_info(smmu->dev, "Falling back to standard SMMU CMDQ\n");
+ return smmu;
+}
+
+struct arm_smmu_device *
+tegra241_cmdqv_acpi_dsdt_probe(struct arm_smmu_device *smmu,
+ struct acpi_iort_node *node)
+{
+ struct resource *res;
+ int irq;
+
+ /* Keep the pointer smmu intact if !res */
+ res = tegra241_cmdqv_find_acpi_resource(smmu, node, &irq);
+ if (!res)
+ return smmu;
+
+ smmu = tegra241_cmdqv_probe(smmu, res, irq);
+ kfree(res);
+ return smmu;
+}
--
2.43.0
^ permalink raw reply related [flat|nested] 12+ messages in thread* Re: [PATCH v10 8/9] iommu/arm-smmu-v3: Add in-kernel support for NVIDIA Tegra241 (Grace) CMDQV
2024-07-28 22:44 ` [PATCH v10 8/9] iommu/arm-smmu-v3: Add in-kernel support for NVIDIA Tegra241 (Grace) CMDQV Nicolin Chen
@ 2024-07-29 14:01 ` kernel test robot
2024-07-29 16:55 ` Nicolin Chen
0 siblings, 1 reply; 12+ messages in thread
From: kernel test robot @ 2024-07-29 14:01 UTC (permalink / raw)
To: Nicolin Chen, will
Cc: llvm, oe-kbuild-all, robin.murphy, joro, jgg, thierry.reding,
vdumpa, jonathanh, linux-kernel, iommu, linux-arm-kernel,
linux-tegra
Hi Nicolin,
kernel test robot noticed the following build warnings:
[auto build test WARNING on linus/master]
[also build test WARNING on v6.11-rc1 next-20240729]
[cannot apply to joro-iommu/next]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Nicolin-Chen/iommu-arm-smmu-v3-Issue-a-batch-of-commands-to-the-same-cmdq/20240729-072957
base: linus/master
patch link: https://lore.kernel.org/r/ca671f4d090546c21a0aba6fa4ddda8da26d4474.1722206275.git.nicolinc%40nvidia.com
patch subject: [PATCH v10 8/9] iommu/arm-smmu-v3: Add in-kernel support for NVIDIA Tegra241 (Grace) CMDQV
config: arm64-allmodconfig (https://download.01.org/0day-ci/archive/20240729/202407292157.BauV7TPf-lkp@intel.com/config)
compiler: clang version 20.0.0git (https://github.com/llvm/llvm-project ccae7b461be339e717d02f99ac857cf0bc7d17fc)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240729/202407292157.BauV7TPf-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202407292157.BauV7TPf-lkp@intel.com/
All warnings (new ones prefixed by >>):
>> drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c:186: warning: Function parameter or struct member 'vintfs' not described in 'tegra241_cmdqv'
>> drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c:186: warning: Excess struct member 'vtinfs' description in 'tegra241_cmdqv'
vim +186 drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c
160
161 /**
162 * struct tegra241_cmdqv - CMDQ-V for SMMUv3
163 * @smmu: SMMUv3 device
164 * @base: MMIO base address
165 * @irq: IRQ number
166 * @num_vintfs: Total number of VINTFs
167 * @num_vcmdqs: Total number of VCMDQs
168 * @num_lvcmdqs_per_vintf: Number of logical VCMDQs per VINTF
169 * @vintf_ids: VINTF id allocator
170 * @vtinfs: List of VINTFs
171 */
172 struct tegra241_cmdqv {
173 struct arm_smmu_device smmu;
174
175 void __iomem *base;
176 int irq;
177
178 /* CMDQV Hardware Params */
179 u16 num_vintfs;
180 u16 num_vcmdqs;
181 u16 num_lvcmdqs_per_vintf;
182
183 struct ida vintf_ids;
184
185 struct tegra241_vintf **vintfs;
> 186 };
187
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
^ permalink raw reply [flat|nested] 12+ messages in thread* Re: [PATCH v10 8/9] iommu/arm-smmu-v3: Add in-kernel support for NVIDIA Tegra241 (Grace) CMDQV
2024-07-29 14:01 ` kernel test robot
@ 2024-07-29 16:55 ` Nicolin Chen
0 siblings, 0 replies; 12+ messages in thread
From: Nicolin Chen @ 2024-07-29 16:55 UTC (permalink / raw)
To: kernel test robot
Cc: will, llvm, oe-kbuild-all, robin.murphy, joro, jgg,
thierry.reding, vdumpa, jonathanh, linux-kernel, iommu,
linux-arm-kernel, linux-tegra
On Mon, Jul 29, 2024 at 10:01:10PM +0800, kernel test robot wrote:
> All warnings (new ones prefixed by >>):
>
> >> drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c:186: warning: Function parameter or struct member 'vintfs' not described in 'tegra241_cmdqv'
> >> drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c:186: warning: Excess struct member 'vtinfs' description in 'tegra241_cmdqv'
>
>
> vim +186 drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c
>
> 160
> 161 /**
> 162 * struct tegra241_cmdqv - CMDQ-V for SMMUv3
> 163 * @smmu: SMMUv3 device
> 164 * @base: MMIO base address
> 165 * @irq: IRQ number
> 166 * @num_vintfs: Total number of VINTFs
> 167 * @num_vcmdqs: Total number of VCMDQs
> 168 * @num_lvcmdqs_per_vintf: Number of logical VCMDQs per VINTF
> 169 * @vintf_ids: VINTF id allocator
> 170 * @vtinfs: List of VINTFs
s/vtinfs/vintfs
Will include the typo fix in next ver.
Thanks
Nicolin
> 171 */
> 172 struct tegra241_cmdqv {
> 173 struct arm_smmu_device smmu;
> 174
> 175 void __iomem *base;
> 176 int irq;
> 177
> 178 /* CMDQV Hardware Params */
> 179 u16 num_vintfs;
> 180 u16 num_vcmdqs;
> 181 u16 num_lvcmdqs_per_vintf;
> 182
> 183 struct ida vintf_ids;
> 184
> 185 struct tegra241_vintf **vintfs;
> > 186 };
> 187
>
> --
> 0-DAY CI Kernel Test Service
> https://github.com/intel/lkp-tests/wiki
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH v10 9/9] iommu/tegra241-cmdqv: Limit CMDs for guest owned VINTF
2024-07-28 22:44 [PATCH v10 0/9] Add Tegra241 (Grace) CMDQV Support (part 1/2) Nicolin Chen
` (7 preceding siblings ...)
2024-07-28 22:44 ` [PATCH v10 8/9] iommu/arm-smmu-v3: Add in-kernel support for NVIDIA Tegra241 (Grace) CMDQV Nicolin Chen
@ 2024-07-28 22:44 ` Nicolin Chen
8 siblings, 0 replies; 12+ messages in thread
From: Nicolin Chen @ 2024-07-28 22:44 UTC (permalink / raw)
To: will
Cc: robin.murphy, joro, jgg, thierry.reding, vdumpa, jonathanh,
linux-kernel, iommu, linux-arm-kernel, linux-tegra
When VCMDQs are assigned to a VINTF owned by a guest (HYP_OWN bit unset),
only TLB and ATC invalidation commands are supported by the VCMDQ HW. So,
add a new helper to scan the input cmd to make sure it is supported when
selecting a queue, though this assumes that SMMUv3 driver will only add
the same type of commands into an arm_smmu_cmdq_batch as it does today.
Note that the guest VM shouldn't have HYP_OWN bit being set regardless of
guest kernel driver writing it or not, i.e. the hypervisor running in the
host OS should wire this bit to zero when trapping a write access to this
VINTF_CONFIG register from a guest kernel.
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
---
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 22 +++++++-----
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 3 +-
.../iommu/arm/arm-smmu-v3/tegra241-cmdqv.c | 35 ++++++++++++++++++-
3 files changed, 49 insertions(+), 11 deletions(-)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 18d940c65e2ca..8ff8e264d5e7c 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -336,12 +336,13 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
return 0;
}
-static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu)
+static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu,
+ u8 opcode)
{
struct arm_smmu_cmdq *cmdq = NULL;
if (smmu->impl && smmu->impl->get_secondary_cmdq)
- cmdq = smmu->impl->get_secondary_cmdq(smmu);
+ cmdq = smmu->impl->get_secondary_cmdq(smmu, opcode);
return cmdq ?: &smmu->cmdq;
}
@@ -889,7 +890,7 @@ static int __arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
}
return arm_smmu_cmdq_issue_cmdlist(
- smmu, arm_smmu_get_cmdq(smmu), cmd, 1, sync);
+ smmu, arm_smmu_get_cmdq(smmu, ent->opcode), cmd, 1, sync);
}
static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
@@ -905,10 +906,13 @@ static int arm_smmu_cmdq_issue_cmd_with_sync(struct arm_smmu_device *smmu,
}
static void arm_smmu_cmdq_batch_init(struct arm_smmu_device *smmu,
- struct arm_smmu_cmdq_batch *cmds)
+ struct arm_smmu_cmdq_batch *cmds,
+ u8 opcode)
{
+ WARN_ON_ONCE(!opcode);
+
cmds->num = 0;
- cmds->cmdq = arm_smmu_get_cmdq(smmu);
+ cmds->cmdq = arm_smmu_get_cmdq(smmu, opcode);
}
static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
@@ -1195,7 +1199,7 @@ static void arm_smmu_sync_cd(struct arm_smmu_master *master,
},
};
- arm_smmu_cmdq_batch_init(smmu, &cmds);
+ arm_smmu_cmdq_batch_init(smmu, &cmds, CMDQ_OP_CFGI_CD);
for (i = 0; i < master->num_streams; i++) {
cmd.cfgi.sid = master->streams[i].id;
arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
@@ -2046,7 +2050,7 @@ static int arm_smmu_atc_inv_master(struct arm_smmu_master *master,
arm_smmu_atc_inv_to_cmd(ssid, 0, 0, &cmd);
- arm_smmu_cmdq_batch_init(master->smmu, &cmds);
+ arm_smmu_cmdq_batch_init(master->smmu, &cmds, CMDQ_OP_ATC_INV);
for (i = 0; i < master->num_streams; i++) {
cmd.atc.sid = master->streams[i].id;
arm_smmu_cmdq_batch_add(master->smmu, &cmds, &cmd);
@@ -2084,7 +2088,7 @@ int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain,
if (!atomic_read(&smmu_domain->nr_ats_masters))
return 0;
- arm_smmu_cmdq_batch_init(smmu_domain->smmu, &cmds);
+ arm_smmu_cmdq_batch_init(smmu_domain->smmu, &cmds, CMDQ_OP_ATC_INV);
spin_lock_irqsave(&smmu_domain->devices_lock, flags);
list_for_each_entry(master_domain, &smmu_domain->devices,
@@ -2166,7 +2170,7 @@ static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,
num_pages++;
}
- arm_smmu_cmdq_batch_init(smmu, &cmds);
+ arm_smmu_cmdq_batch_init(smmu, &cmds, cmd->opcode);
while (iova < end) {
if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index 367f5e160af4e..c7f34a5c31f3f 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -633,7 +633,8 @@ struct arm_smmu_strtab_cfg {
struct arm_smmu_impl {
int (*device_reset)(struct arm_smmu_device *smmu);
void (*device_remove)(struct arm_smmu_device *smmu);
- struct arm_smmu_cmdq *(*get_secondary_cmdq)(struct arm_smmu_device *smmu);
+ struct arm_smmu_cmdq *(*get_secondary_cmdq)(struct arm_smmu_device *smmu,
+ u8 opcode);
};
#ifdef CONFIG_TEGRA241_CMDQV
diff --git a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c
index 891302fb20901..13c57a5fb758c 100644
--- a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c
+++ b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c
@@ -143,6 +143,7 @@ struct tegra241_vcmdq {
* struct tegra241_vintf - Virtual Interface
* @idx: Global index in the CMDQV
* @enabled: Enable status
+ * @hyp_own: Owned by hypervisor (in-kernel)
* @cmdqv: Parent CMDQV pointer
* @lvcmdqs: List of logical VCMDQ pointers
* @base: MMIO base address
@@ -151,6 +152,7 @@ struct tegra241_vintf {
u16 idx;
bool enabled;
+ bool hyp_own;
struct tegra241_cmdqv *cmdqv;
struct tegra241_vcmdq **lvcmdqs;
@@ -300,8 +302,25 @@ static irqreturn_t tegra241_cmdqv_isr(int irq, void *devid)
/* Command Queue Function */
+static bool tegra241_vintf_support_cmd(struct tegra241_vintf *vintf, u8 opcode)
+{
+ /* Hypervisor-owned VINTF can execute any command in its VCMDQs */
+ if (READ_ONCE(vintf->hyp_own))
+ return true;
+
+ /* Guest-owned VINTF must check against the list of supported CMDs */
+ switch (opcode) {
+ case CMDQ_OP_TLBI_NH_ASID:
+ case CMDQ_OP_TLBI_NH_VA:
+ case CMDQ_OP_ATC_INV:
+ return true;
+ default:
+ return false;
+ }
+}
+
static struct arm_smmu_cmdq *
-tegra241_cmdqv_get_cmdq(struct arm_smmu_device *smmu)
+tegra241_cmdqv_get_cmdq(struct arm_smmu_device *smmu, u8 opcode)
{
struct tegra241_cmdqv *cmdqv =
container_of(smmu, struct tegra241_cmdqv, smmu);
@@ -316,6 +335,10 @@ tegra241_cmdqv_get_cmdq(struct arm_smmu_device *smmu)
if (!READ_ONCE(vintf->enabled))
return NULL;
+ /* Unsupported CMD go for smmu->cmdq pathway */
+ if (!tegra241_vintf_support_cmd(vintf, opcode))
+ return NULL;
+
/*
* Select a LVCMDQ to use. Here we use a temporal solution to
* balance out traffic on cmdq issuing: each cmdq has its own
@@ -405,12 +428,22 @@ static int tegra241_vintf_hw_init(struct tegra241_vintf *vintf, bool hyp_own)
tegra241_vintf_hw_deinit(vintf);
/* Configure and enable VINTF */
+ /*
+ * Note that HYP_OWN bit is wired to zero when running in guest kernel,
+ * whether enabling it here or not, as !HYP_OWN cmdq HWs only support a
+ * restricted set of supported commands.
+ */
regval = FIELD_PREP(VINTF_HYP_OWN, hyp_own);
writel(regval, REG_VINTF(vintf, CONFIG));
ret = vintf_write_config(vintf, regval | VINTF_EN);
if (ret)
return ret;
+ /*
+ * As being mentioned above, HYP_OWN bit is wired to zero for a guest
+ * kernel, so read it back from HW to ensure that reflects in hyp_own
+ */
+ vintf->hyp_own = !!(VINTF_HYP_OWN & readl(REG_VINTF(vintf, CONFIG)));
for (lidx = 0; lidx < vintf->cmdqv->num_lvcmdqs_per_vintf; lidx++) {
if (vintf->lvcmdqs && vintf->lvcmdqs[lidx]) {
--
2.43.0
^ permalink raw reply related [flat|nested] 12+ messages in thread