From: Zhen Lei <thunder.leizhen@huawei.com>
To: Will Deacon <will@kernel.org>,
Robin Murphy <robin.murphy@arm.com>,
"Joerg Roedel" <joro@8bytes.org>,
linux-arm-kernel <linux-arm-kernel@lists.infradead.org>,
iommu <iommu@lists.linux-foundation.org>,
linux-kernel <linux-kernel@vger.kernel.org>
Subject: [PATCH RFC 5/8] iommu/arm-smmu-v3: Add support for ECMDQ register mode
Date: Sat, 26 Jun 2021 19:01:27 +0800 [thread overview]
Message-ID: <20210626110130.2416-6-thunder.leizhen@huawei.com> (raw)
In-Reply-To: <20210626110130.2416-1-thunder.leizhen@huawei.com>
Ensure that each core exclusively occupies an ECMDQ and all of them are
enabled during initialization. During this initialization process, any
errors will result in a fallback to using normal CMDQ.
When GERROR is triggered by ECMDQ, all ECMDQs need to be traversed: the
ECMDQs with errors will be processed and the ECMDQs without errors will
be skipped directly.
Compared with register SMMU_CMDQ_PROD, register SMMU_ECMDQ_PROD has one
more 'EN' bit and one more 'ERRACK' bit. Therefore, an extra member
'ecmdq_prod' is added to record the values of these two bits. Each time
register SMMU_ECMDQ_PROD is updated, the value of 'ecmdq_prod' is ORed.
After the error indicated by SMMU_GERROR.CMDQP_ERR is fixed, the 'ERRACK'
bit needs to be toggled to resume the corresponding ECMDQ. Therefore, a
rwlock is used to protect the write operation to bit 'ERRACK' during error
handling and the read operation to bit 'ERRACK' during command insertion.
Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
---
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 210 +++++++++++++++++++-
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 36 ++++
2 files changed, 245 insertions(+), 1 deletion(-)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 62b2742daab3257..d7b590e911a879d 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -337,6 +337,14 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu)
{
+ if (smmu->ecmdq_enabled) {
+ struct arm_smmu_ecmdq *ecmdq;
+
+ ecmdq = *this_cpu_ptr(smmu->ecmdq);
+
+ return &ecmdq->cmdq;
+ }
+
return &smmu->cmdq;
}
@@ -421,6 +429,38 @@ static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
__arm_smmu_cmdq_skip_err(smmu, &smmu->cmdq.q);
}
+static void arm_smmu_ecmdq_skip_err(struct arm_smmu_device *smmu)
+{
+ int i;
+ u32 prod, cons;
+ struct arm_smmu_queue *q;
+ struct arm_smmu_ecmdq *ecmdq;
+
+ for (i = 0; i < smmu->nr_ecmdq; i++) {
+ unsigned long flags;
+
+ ecmdq = *per_cpu_ptr(smmu->ecmdq, i);
+ q = &ecmdq->cmdq.q;
+
+ prod = readl_relaxed(q->prod_reg);
+ cons = readl_relaxed(q->cons_reg);
+ if (((prod ^ cons) & ECMDQ_CONS_ERR) == 0)
+ continue;
+
+ __arm_smmu_cmdq_skip_err(smmu, q);
+
+ write_lock_irqsave(&q->ecmdq_lock, flags);
+ q->ecmdq_prod &= ~ECMDQ_PROD_ERRACK;
+ q->ecmdq_prod |= cons & ECMDQ_CONS_ERR;
+
+ prod = readl_relaxed(q->prod_reg);
+ prod &= ~ECMDQ_PROD_ERRACK;
+ prod |= cons & ECMDQ_CONS_ERR;
+ writel(prod, q->prod_reg);
+ write_unlock_irqrestore(&q->ecmdq_lock, flags);
+ }
+}
+
/*
* Command queue locking.
* This is a form of bastardised rwlock with the following major changes:
@@ -817,7 +857,13 @@ static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
* d. Advance the hardware prod pointer
* Control dependency ordering from the entries becoming valid.
*/
- writel_relaxed(prod, cmdq->q.prod_reg);
+ if (smmu->ecmdq_enabled) {
+ read_lock(&cmdq->q.ecmdq_lock);
+ writel_relaxed(prod | cmdq->q.ecmdq_prod, cmdq->q.prod_reg);
+ read_unlock(&cmdq->q.ecmdq_lock);
+ } else {
+ writel_relaxed(prod, cmdq->q.prod_reg);
+ }
/*
* e. Tell the next owner we're done
@@ -1675,6 +1721,9 @@ static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
if (active & GERROR_CMDQ_ERR)
arm_smmu_cmdq_skip_err(smmu);
+ if (active & GERROR_CMDQP_ERR)
+ arm_smmu_ecmdq_skip_err(smmu);
+
writel(gerror, smmu->base + ARM_SMMU_GERRORN);
return IRQ_HANDLED;
}
@@ -2941,6 +2990,20 @@ static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu)
return ret;
}
+static int arm_smmu_ecmdq_init(struct arm_smmu_cmdq *cmdq)
+{
+ unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
+
+ atomic_set(&cmdq->owner_prod, 0);
+ atomic_set(&cmdq->lock, 0);
+
+ cmdq->valid_map = (atomic_long_t *)bitmap_zalloc(nents, GFP_KERNEL);
+ if (!cmdq->valid_map)
+ return -ENOMEM;
+
+ return 0;
+}
+
static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
{
int ret;
@@ -3304,6 +3367,7 @@ static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
{
+ int i;
int ret;
u32 reg, enables;
struct arm_smmu_cmdq_ent cmd;
@@ -3348,6 +3412,28 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
+ for (i = 0; i < smmu->nr_ecmdq; i++) {
+ struct arm_smmu_ecmdq *ecmdq;
+ struct arm_smmu_queue *q;
+
+ ecmdq = *per_cpu_ptr(smmu->ecmdq, i);
+ q = &ecmdq->cmdq.q;
+
+ writeq_relaxed(q->q_base, ecmdq->base + ARM_SMMU_ECMDQ_BASE);
+ writel_relaxed(q->llq.prod, ecmdq->base + ARM_SMMU_ECMDQ_PROD);
+ writel_relaxed(q->llq.cons, ecmdq->base + ARM_SMMU_ECMDQ_CONS);
+
+ /* enable ecmdq */
+ writel(ECMDQ_PROD_EN, q->prod_reg);
+ ret = readl_relaxed_poll_timeout(q->cons_reg, reg, reg & ECMDQ_CONS_ENACK,
+ 1, ARM_SMMU_POLL_TIMEOUT_US);
+ if (ret) {
+ dev_err(smmu->dev, "ecmdq[%d] enable failed\n", i);
+ smmu->ecmdq_enabled = 0;
+ break;
+ }
+ }
+
enables = CR0_CMDQEN;
ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
ARM_SMMU_CR0ACK);
@@ -3437,6 +3523,115 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
return 0;
}
+static int arm_smmu_ecmdq_layout(struct arm_smmu_device *smmu)
+{
+ int cpu;
+ struct arm_smmu_ecmdq *ecmdq;
+
+ if (num_possible_cpus() <= smmu->nr_ecmdq) {
+ ecmdq = devm_alloc_percpu(smmu->dev, *ecmdq);
+ if (!ecmdq)
+ return -ENOMEM;
+
+ for_each_possible_cpu(cpu)
+ *per_cpu_ptr(smmu->ecmdq, cpu) = per_cpu_ptr(ecmdq, cpu);
+
+ /* A core requires at most one ECMDQ */
+ smmu->nr_ecmdq = num_possible_cpus();
+
+ return 0;
+ }
+
+ return -ENOSPC;
+}
+
+static int arm_smmu_ecmdq_probe(struct arm_smmu_device *smmu)
+{
+ int ret, cpu;
+ u32 i, nump, numq, gap;
+ u32 reg, shift_increment;
+ u64 addr, smmu_dma_base;
+ void __iomem *cp_regs, *cp_base;
+
+ /* IDR6 */
+ reg = readl_relaxed(smmu->base + ARM_SMMU_IDR6);
+ nump = 1 << FIELD_GET(IDR6_LOG2NUMP, reg);
+ numq = 1 << FIELD_GET(IDR6_LOG2NUMQ, reg);
+ smmu->nr_ecmdq = nump * numq;
+ gap = ECMDQ_CP_RRESET_SIZE >> FIELD_GET(IDR6_LOG2NUMQ, reg);
+
+ smmu_dma_base = (vmalloc_to_pfn(smmu->base) << PAGE_SHIFT);
+ cp_regs = ioremap(smmu_dma_base + ARM_SMMU_ECMDQ_CP_BASE, PAGE_SIZE);
+ if (!cp_regs)
+ return -ENOMEM;
+
+ for (i = 0; i < nump; i++) {
+ u64 val, pre_addr;
+
+ val = readq_relaxed(cp_regs + 32 * i);
+ if (!(val & ECMDQ_CP_PRESET)) {
+ iounmap(cp_regs);
+ dev_err(smmu->dev, "ecmdq control page %u is memory mode\n", i);
+ return -EFAULT;
+ }
+
+ if (i && ((val & ECMDQ_CP_ADDR) != (pre_addr + ECMDQ_CP_RRESET_SIZE))) {
+ iounmap(cp_regs);
+ dev_err(smmu->dev, "ecmdq_cp memory region is not contiguous\n");
+ return -EFAULT;
+ }
+
+ pre_addr = val & ECMDQ_CP_ADDR;
+ }
+
+ addr = readl_relaxed(cp_regs) & ECMDQ_CP_ADDR;
+ iounmap(cp_regs);
+
+ cp_base = devm_ioremap(smmu->dev, smmu_dma_base + addr, ECMDQ_CP_RRESET_SIZE * nump);
+ if (!cp_base)
+ return -ENOMEM;
+
+ smmu->ecmdq = devm_alloc_percpu(smmu->dev, struct arm_smmu_ecmdq *);
+ if (!smmu->ecmdq)
+ return -ENOMEM;
+
+ ret = arm_smmu_ecmdq_layout(smmu);
+ if (ret)
+ return ret;
+
+ shift_increment = order_base_2(num_possible_cpus() / smmu->nr_ecmdq);
+
+ addr = 0;
+ for_each_possible_cpu(cpu) {
+ struct arm_smmu_ecmdq *ecmdq;
+ struct arm_smmu_queue *q;
+
+ ecmdq = *per_cpu_ptr(smmu->ecmdq, cpu);
+ ecmdq->base = cp_base + addr;
+
+ q = &ecmdq->cmdq.q;
+
+ q->llq.max_n_shift = ECMDQ_MAX_SZ_SHIFT + shift_increment;
+ ret = arm_smmu_init_one_queue(smmu, q, ecmdq->base, ARM_SMMU_ECMDQ_PROD,
+ ARM_SMMU_ECMDQ_CONS, CMDQ_ENT_DWORDS, "ecmdq");
+ if (ret)
+ return ret;
+
+ q->ecmdq_prod = ECMDQ_PROD_EN;
+ rwlock_init(&q->ecmdq_lock);
+
+ ret = arm_smmu_ecmdq_init(&ecmdq->cmdq);
+ if (ret) {
+ dev_err(smmu->dev, "ecmdq[%d] init failed\n", i);
+ return ret;
+ }
+
+ addr += gap;
+ }
+
+ return 0;
+}
+
static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
{
u32 reg;
@@ -3547,6 +3742,9 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
return -ENXIO;
}
+ if (reg & IDR1_ECMDQ)
+ smmu->features |= ARM_SMMU_FEAT_ECMDQ;
+
/* Queue sizes, capped to ensure natural alignment */
smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
FIELD_GET(IDR1_CMDQS, reg));
@@ -3647,6 +3845,16 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
smmu->ias, smmu->oas, smmu->features);
+
+ if (smmu->features & ARM_SMMU_FEAT_ECMDQ) {
+ int err;
+
+ err = arm_smmu_ecmdq_probe(smmu);
+ if (err) {
+ dev_err(smmu->dev, "suppress ecmdq feature, errno=%d\n", err);
+ smmu->ecmdq_enabled = 0;
+ }
+ }
return 0;
}
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index 4cb136f07914e83..3f3a867a4626fcd 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -41,6 +41,7 @@
#define IDR0_S2P (1 << 0)
#define ARM_SMMU_IDR1 0x4
+#define IDR1_ECMDQ (1 << 31)
#define IDR1_TABLES_PRESET (1 << 30)
#define IDR1_QUEUES_PRESET (1 << 29)
#define IDR1_REL (1 << 28)
@@ -107,6 +108,7 @@
#define ARM_SMMU_IRQ_CTRLACK 0x54
#define ARM_SMMU_GERROR 0x60
+#define GERROR_CMDQP_ERR (1 << 9)
#define GERROR_SFM_ERR (1 << 8)
#define GERROR_MSI_GERROR_ABT_ERR (1 << 7)
#define GERROR_MSI_PRIQ_ABT_ERR (1 << 6)
@@ -152,6 +154,26 @@
#define ARM_SMMU_PRIQ_IRQ_CFG1 0xd8
#define ARM_SMMU_PRIQ_IRQ_CFG2 0xdc
+#define ARM_SMMU_IDR6 0x190
+#define IDR6_LOG2NUMP GENMASK(27, 24)
+#define IDR6_LOG2NUMQ GENMASK(19, 16)
+#define IDR6_BA_DOORBELLS GENMASK(9, 0)
+
+#define ARM_SMMU_ECMDQ_BASE 0x00
+#define ARM_SMMU_ECMDQ_PROD 0x08
+#define ARM_SMMU_ECMDQ_CONS 0x0c
+#define ECMDQ_MAX_SZ_SHIFT 8
+#define ECMDQ_PROD_EN (1 << 31)
+#define ECMDQ_CONS_ENACK (1 << 31)
+#define ECMDQ_CONS_ERR (1 << 23)
+#define ECMDQ_PROD_ERRACK (1 << 23)
+
+#define ARM_SMMU_ECMDQ_CP_BASE 0x4000
+#define ECMDQ_CP_ADDR GENMASK_ULL(51, 12)
+#define ECMDQ_CP_CMDQGS GENMASK_ULL(2, 1)
+#define ECMDQ_CP_PRESET (1UL << 0)
+#define ECMDQ_CP_RRESET_SIZE 0x10000
+
#define ARM_SMMU_REG_SZ 0xe00
/* Common MSI config fields */
@@ -522,6 +544,8 @@ struct arm_smmu_ll_queue {
struct arm_smmu_queue {
struct arm_smmu_ll_queue llq;
int irq; /* Wired interrupt */
+ u32 ecmdq_prod;
+ rwlock_t ecmdq_lock;
__le64 *base;
dma_addr_t base_dma;
@@ -547,6 +571,11 @@ struct arm_smmu_cmdq {
atomic_t lock;
};
+struct arm_smmu_ecmdq {
+ struct arm_smmu_cmdq cmdq;
+ void __iomem *base;
+};
+
struct arm_smmu_cmdq_batch {
u64 cmds[CMDQ_BATCH_ENTRIES * CMDQ_ENT_DWORDS];
int num;
@@ -640,6 +669,7 @@ struct arm_smmu_device {
#define ARM_SMMU_FEAT_BTM (1 << 16)
#define ARM_SMMU_FEAT_SVA (1 << 17)
#define ARM_SMMU_FEAT_E2H (1 << 18)
+#define ARM_SMMU_FEAT_ECMDQ (1 << 19)
u32 features;
#define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0)
@@ -647,6 +677,12 @@ struct arm_smmu_device {
#define ARM_SMMU_OPT_MSIPOLL (1 << 2)
u32 options;
+ union {
+ u32 nr_ecmdq;
+ u32 ecmdq_enabled;
+ };
+ struct arm_smmu_ecmdq *__percpu *ecmdq;
+
struct arm_smmu_cmdq cmdq;
struct arm_smmu_evtq evtq;
struct arm_smmu_priq priq;
--
2.26.0.106.g9fadedd
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu
next prev parent reply other threads:[~2021-06-26 11:02 UTC|newest]
Thread overview: 17+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-06-26 11:01 [PATCH RFC 0/8] iommu/arm-smmu-v3: add support for ECMDQ register mode Zhen Lei
2021-06-26 11:01 ` [PATCH RFC 1/8] iommu/arm-smmu-v3: Use command queue batching helpers to improve performance Zhen Lei
2021-06-26 11:01 ` [PATCH RFC 2/8] iommu/arm-smmu-v3: Add and use static helper function arm_smmu_cmdq_issue_cmd_with_sync() Zhen Lei
2021-08-10 18:24 ` Will Deacon
2021-08-11 2:16 ` Leizhen (ThunderTown)
2021-08-11 10:09 ` Will Deacon
2021-08-11 10:31 ` John Garry
2021-08-11 10:33 ` Will Deacon
2021-08-11 11:15 ` John Garry
2021-06-26 11:01 ` [PATCH RFC 3/8] iommu/arm-smmu-v3: Add and use static helper function arm_smmu_get_cmdq() Zhen Lei
2021-06-26 11:01 ` [PATCH RFC 4/8] iommu/arm-smmu-v3: Extract reusable function __arm_smmu_cmdq_skip_err() Zhen Lei
2021-06-26 11:01 ` Zhen Lei [this message]
2021-06-26 11:01 ` [PATCH RFC 6/8] iommu/arm-smmu-v3: Ensure that a set of associated commands are inserted in the same ECMDQ Zhen Lei
2021-06-26 11:01 ` [PATCH RFC 7/8] iommu/arm-smmu-v3: Add arm_smmu_ecmdq_issue_cmdlist() for non-shared ECMDQ Zhen Lei
2021-06-26 11:01 ` [PATCH RFC 8/8] iommu/arm-smmu-v3: Add support for less than one ECMDQ per core Zhen Lei
2021-08-10 18:35 ` [PATCH RFC 0/8] iommu/arm-smmu-v3: add support for ECMDQ register mode Will Deacon
2021-08-11 2:07 ` Leizhen (ThunderTown)
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210626110130.2416-6-thunder.leizhen@huawei.com \
--to=thunder.leizhen@huawei.com \
--cc=iommu@lists.linux-foundation.org \
--cc=joro@8bytes.org \
--cc=linux-arm-kernel@lists.infradead.org \
--cc=linux-kernel@vger.kernel.org \
--cc=robin.murphy@arm.com \
--cc=will@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox