From: Will Deacon <will@kernel.org>
To: iommu@lists.linux-foundation.org
Cc: Vijay Kilary <vkilari@codeaurora.org>,
Jean-Philippe Brucker <jean-philippe.brucker@arm.com>,
Will Deacon <will@kernel.org>, Jan Glauber <jglauber@marvell.com>,
Alex Williamson <alex.williamson@redhat.com>,
Jayachandran Chandrasekharan Nair <jnair@marvell.com>,
Jon Masters <jcm@redhat.com>, Robin Murphy <robin.murphy@arm.com>
Subject: [RFC PATCH v2 19/19] iommu/arm-smmu-v3: Defer TLB invalidation until ->iotlb_sync()
Date: Thu, 11 Jul 2019 18:19:27 +0100 [thread overview]
Message-ID: <20190711171927.28803-20-will@kernel.org> (raw)
In-Reply-To: <20190711171927.28803-1-will@kernel.org>
Update the iommu_iotlb_gather structure passed to ->tlb_add_page() and
use this information to defer all TLB invalidation until ->iotlb_sync().
This drastically reduces contention on the command queue, since we can
insert our commands in batches rather than one-by-one.
Signed-off-by: Will Deacon <will@kernel.org>
---
drivers/iommu/arm-smmu-v3.c | 71 +++++++++++++++++++++++++++------------------
1 file changed, 42 insertions(+), 29 deletions(-)
diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index 5ee2c6389df3..b71ab839db3d 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -303,6 +303,13 @@
#define CMDQ_PROD_OWNED_FLAG Q_OVERFLOW_FLAG
+/*
+ * This is used to size the command queue and therefore must be at least
+ * BITS_PER_LONG so that the valid_map works correctly (it relies on the
+ * total number of queue entries being a multiple of BITS_PER_LONG).
+ */
+#define CMDQ_BATCH_ENTRIES BITS_PER_LONG
+
#define CMDQ_0_OP GENMASK_ULL(7, 0)
#define CMDQ_0_SSV (1UL << 11)
@@ -1930,15 +1937,17 @@ static void arm_smmu_tlb_inv_context(void *cookie)
arm_smmu_cmdq_issue_sync(smmu);
}
-static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
- size_t granule, bool leaf, void *cookie)
+static void arm_smmu_tlb_inv_range(unsigned long iova, size_t size,
+ size_t granule, bool leaf,
+ struct arm_smmu_domain *smmu_domain)
{
- struct arm_smmu_domain *smmu_domain = cookie;
+ u64 cmds[CMDQ_BATCH_ENTRIES * CMDQ_ENT_DWORDS];
struct arm_smmu_device *smmu = smmu_domain->smmu;
+ unsigned long end = iova + size;
+ int i = 0;
struct arm_smmu_cmdq_ent cmd = {
.tlbi = {
.leaf = leaf,
- .addr = iova,
},
};
@@ -1950,37 +1959,41 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
}
- do {
- arm_smmu_cmdq_issue_cmd(smmu, &cmd);
- cmd.tlbi.addr += granule;
- } while (size -= granule);
+ while (iova < end) {
+ if (i == CMDQ_BATCH_ENTRIES) {
+ arm_smmu_cmdq_issue_cmdlist(smmu, cmds, i, false);
+ i = 0;
+ }
+
+ cmd.tlbi.addr = iova;
+ arm_smmu_cmdq_build_cmd(&cmds[i * CMDQ_ENT_DWORDS], &cmd);
+ iova += granule;
+ i++;
+ }
+
+ arm_smmu_cmdq_issue_cmdlist(smmu, cmds, i, true);
}
static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
unsigned long iova, size_t granule,
void *cookie)
{
- arm_smmu_tlb_inv_range_nosync(iova, granule, granule, true, cookie);
+ struct arm_smmu_domain *smmu_domain = cookie;
+ struct iommu_domain *domain = &smmu_domain->domain;
+
+ iommu_iotlb_gather_add_page(domain, gather, iova, granule);
}
static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
size_t granule, void *cookie)
{
- struct arm_smmu_domain *smmu_domain = cookie;
- struct arm_smmu_device *smmu = smmu_domain->smmu;
-
- arm_smmu_tlb_inv_range_nosync(iova, size, granule, false, cookie);
- arm_smmu_cmdq_issue_sync(smmu);
+ arm_smmu_tlb_inv_range(iova, size, granule, false, cookie);
}
static void arm_smmu_tlb_inv_leaf(unsigned long iova, size_t size,
size_t granule, void *cookie)
{
- struct arm_smmu_domain *smmu_domain = cookie;
- struct arm_smmu_device *smmu = smmu_domain->smmu;
-
- arm_smmu_tlb_inv_range_nosync(iova, size, granule, true, cookie);
- arm_smmu_cmdq_issue_sync(smmu);
+ arm_smmu_tlb_inv_range(iova, size, granule, true, cookie);
}
static const struct iommu_flush_ops arm_smmu_flush_ops = {
@@ -2391,10 +2404,10 @@ static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
struct iommu_iotlb_gather *gather)
{
- struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
+ struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
- if (smmu)
- arm_smmu_cmdq_issue_sync(smmu);
+ arm_smmu_tlb_inv_range(gather->start, gather->end - gather->start,
+ gather->pgsize, true, smmu_domain);
}
static phys_addr_t
@@ -3321,15 +3334,15 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
/* Queue sizes, capped to ensure natural alignment */
smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
FIELD_GET(IDR1_CMDQS, reg));
- if (smmu->cmdq.q.llq.max_n_shift < ilog2(BITS_PER_LONG)) {
+ if (smmu->cmdq.q.llq.max_n_shift <= ilog2(CMDQ_BATCH_ENTRIES)) {
/*
- * The cmdq valid_map relies on the total number of entries
- * being a multiple of BITS_PER_LONG. There's also no way
- * we can handle the weird alignment restrictions on the
- * base pointer for a unit-length queue.
+ * We don't support splitting up batches, so one batch of
+ * commands plus an extra sync needs to fit inside the command
+ * queue. There's also no way we can handle the weird alignment
+ * restrictions on the base pointer for a unit-length queue.
*/
- dev_err(smmu->dev, "command queue size < %d entries not supported\n",
- BITS_PER_LONG);
+ dev_err(smmu->dev, "command queue size <= %d entries not supported\n",
+ CMDQ_BATCH_ENTRIES);
return -ENXIO;
}
--
2.11.0
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu
next prev parent reply other threads:[~2019-07-11 17:30 UTC|newest]
Thread overview: 37+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-07-11 17:19 [RFC PATCH v2 00/19] Try to reduce lock contention on the SMMUv3 command queue Will Deacon
2019-07-11 17:19 ` [RFC PATCH v2 01/19] iommu: Remove empty iommu_tlb_range_add() callback from iommu_ops Will Deacon
2019-07-11 17:19 ` [RFC PATCH v2 02/19] iommu/io-pgtable-arm: Remove redundant call to io_pgtable_tlb_sync() Will Deacon
2019-07-11 17:19 ` [RFC PATCH v2 03/19] iommu/io-pgtable: Rename iommu_gather_ops to iommu_flush_ops Will Deacon
2019-07-11 17:19 ` [RFC PATCH v2 04/19] iommu: Introduce struct iommu_iotlb_gather for batching TLB flushes Will Deacon
2019-07-24 7:19 ` Joerg Roedel
2019-07-24 7:41 ` Will Deacon
2019-07-25 7:58 ` Joerg Roedel
2019-07-11 17:19 ` [RFC PATCH v2 05/19] iommu: Introduce iommu_iotlb_gather_add_page() Will Deacon
2019-07-11 17:19 ` [RFC PATCH v2 06/19] iommu: Pass struct iommu_iotlb_gather to ->unmap() and ->iotlb_sync() Will Deacon
2019-07-11 17:19 ` [RFC PATCH v2 07/19] iommu/io-pgtable: Introduce tlb_flush_walk() and tlb_flush_leaf() Will Deacon
2019-07-11 17:19 ` [RFC PATCH v2 08/19] iommu/io-pgtable: Hook up ->tlb_flush_walk() and ->tlb_flush_leaf() in drivers Will Deacon
2019-07-11 17:19 ` [RFC PATCH v2 09/19] iommu/io-pgtable-arm: Call ->tlb_flush_walk() and ->tlb_flush_leaf() Will Deacon
2019-07-11 17:19 ` [RFC PATCH v2 10/19] iommu/io-pgtable: Replace ->tlb_add_flush() with ->tlb_add_page() Will Deacon
2019-07-11 17:19 ` [RFC PATCH v2 11/19] iommu/io-pgtable: Remove unused ->tlb_sync() callback Will Deacon
2019-07-11 17:19 ` [RFC PATCH v2 12/19] iommu/io-pgtable: Pass struct iommu_iotlb_gather to ->unmap() Will Deacon
2019-07-11 17:19 ` [RFC PATCH v2 13/19] iommu/io-pgtable: Pass struct iommu_iotlb_gather to ->tlb_add_page() Will Deacon
2019-07-11 17:19 ` [RFC PATCH v2 14/19] iommu/arm-smmu-v3: Separate s/w and h/w views of prod and cons indexes Will Deacon
2019-07-11 17:19 ` [RFC PATCH v2 15/19] iommu/arm-smmu-v3: Drop unused 'q' argument from Q_OVF macro Will Deacon
2019-07-11 17:19 ` [RFC PATCH v2 16/19] iommu/arm-smmu-v3: Move low-level queue fields out of arm_smmu_queue Will Deacon
2019-07-11 17:19 ` [RFC PATCH v2 17/19] iommu/arm-smmu-v3: Operate directly on low-level queue where possible Will Deacon
2019-07-11 17:19 ` [RFC PATCH v2 18/19] iommu/arm-smmu-v3: Reduce contention during command-queue insertion Will Deacon
2019-07-19 11:04 ` John Garry
2019-07-24 12:15 ` Will Deacon
2019-07-24 14:03 ` John Garry
2019-07-24 14:07 ` Will Deacon
2019-07-24 8:20 ` John Garry
2019-07-24 14:33 ` Will Deacon
2019-07-25 11:31 ` John Garry
2019-07-11 17:19 ` Will Deacon [this message]
2019-07-19 4:25 ` [RFC PATCH v2 00/19] Try to reduce lock contention on the SMMUv3 command queue Ganapatrao Kulkarni
2019-07-24 12:28 ` Will Deacon
2019-07-24 9:58 ` John Garry
2019-07-24 12:20 ` Will Deacon
2019-07-24 14:25 ` John Garry
2019-07-24 14:48 ` Will Deacon
2019-07-25 10:11 ` John Garry
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20190711171927.28803-20-will@kernel.org \
--to=will@kernel.org \
--cc=alex.williamson@redhat.com \
--cc=iommu@lists.linux-foundation.org \
--cc=jcm@redhat.com \
--cc=jean-philippe.brucker@arm.com \
--cc=jglauber@marvell.com \
--cc=jnair@marvell.com \
--cc=robin.murphy@arm.com \
--cc=vkilari@codeaurora.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.