From: john.garry@huawei.com (John Garry)
To: linux-arm-kernel@lists.infradead.org
Subject: [PATCH v5 1/2] iommu/arm-smmu-v3: Poll for CMD_SYNC outside cmdq lock
Date: Thu, 18 Oct 2018 11:55:47 +0100 [thread overview]
Message-ID: <2c4e00a2-cd53-a6f5-8561-97379dcf9c02@huawei.com> (raw)
In-Reply-To: <61b4c3e5f1322dfe96ca2062a7fe058298340996.1539782799.git.robin.murphy@arm.com>
On 17/10/2018 14:56, Robin Murphy wrote:
> Even without the MSI trick, we can still do a lot better than hogging
> the entire queue while it drains. All we actually need to do for the
> necessary guarantee of completion is wait for our particular command to
> have been consumed - as long as we keep track of where we inserted it,
> there is no need to block other CPUs from adding further commands in the
> meantime. There is one theoretical (but incredibly unlikely) edge case
> to avoid, where cons has wrapped twice to still appear 'behind' the sync
> position - this is easily disambiguated by adding a generation count to
> the queue to indicate when prod wraps, since cons cannot wrap twice
> without prod having wrapped at least once.
>
> This also makes it reasonable to separate the two conceptually different
> modes of polling such that command insertion - which really wants to be
> fair and have minimal latency - is not subject to exponential backoff,
> and returns to its original implementation.
>
> Signed-off-by: Robin Murphy <robin.murphy@arm.com>
> ---
>
> v5:
> - Rework to incorporate the back-to-back sync elision.
> - Refactor the generation count slightly to preemptively help with
> the HiSilicon MSI workaround.
> - Split the cleanup into a separate patch for ease of review (it could
> happily be squashed when applied).
>
> The fundamental logic is copied directly from v4, but I've dropped the
> previous tested-by since there are a fair few subtle changes in how it's
> integrated. Patches are based on Will's iommu/devel branch plus my "Fix
> big-endian CMD_SYNC writes" patch.
>
> Robin.
>
> drivers/iommu/arm-smmu-v3.c | 94 +++++++++++++++++++++++++++----------
> 1 file changed, 69 insertions(+), 25 deletions(-)
>
> diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
> index 867ba548c2cc..da8a91d116bf 100644
> --- a/drivers/iommu/arm-smmu-v3.c
> +++ b/drivers/iommu/arm-smmu-v3.c
> @@ -588,6 +588,7 @@ struct arm_smmu_device {
> struct arm_smmu_strtab_cfg strtab_cfg;
>
> u32 sync_count;
> + int cmdq_generation;
>
> /* IOMMU core code handle */
> struct iommu_device iommu;
> @@ -676,6 +677,17 @@ static bool queue_empty(struct arm_smmu_queue *q)
> Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
> }
>
> +static bool queue_behind(struct arm_smmu_queue *q, u32 idx)
> +{
> + return Q_IDX(q, q->cons) < Q_IDX(q, idx);
> +}
> +
> +static bool queue_ahead_not_wrapped(struct arm_smmu_queue *q, u32 idx)
> +{
> + return Q_IDX(q, q->cons) >= Q_IDX(q, idx) &&
> + Q_WRP(q, q->cons) == Q_WRP(q, idx);
> +}
> +
> static void queue_sync_cons(struct arm_smmu_queue *q)
> {
> q->cons = readl_relaxed(q->cons_reg);
> @@ -709,33 +721,19 @@ static void queue_inc_prod(struct arm_smmu_queue *q)
> writel(q->prod, q->prod_reg);
> }
>
> -/*
> - * Wait for the SMMU to consume items. If sync is true, wait until the queue
> - * is empty. Otherwise, wait until there is at least one free slot.
> - */
> -static int queue_poll_cons(struct arm_smmu_queue *q, bool sync, bool wfe)
> +static int queue_poll_cons(struct arm_smmu_queue *q, bool wfe)
> {
> - ktime_t timeout;
> - unsigned int delay = 1, spin_cnt = 0;
> + ktime_t timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
>
> - /* Wait longer if it's a CMD_SYNC */
> - timeout = ktime_add_us(ktime_get(), sync ?
> - ARM_SMMU_CMDQ_SYNC_TIMEOUT_US :
> - ARM_SMMU_POLL_TIMEOUT_US);
> -
> - while (queue_sync_cons(q), (sync ? !queue_empty(q) : queue_full(q))) {
> + while (queue_sync_cons(q), queue_full(q)) {
> if (ktime_compare(ktime_get(), timeout) > 0)
> return -ETIMEDOUT;
>
> if (wfe) {
> wfe();
> - } else if (++spin_cnt < ARM_SMMU_CMDQ_SYNC_SPIN_COUNT) {
> - cpu_relax();
> - continue;
> } else {
> - udelay(delay);
> - delay *= 2;
> - spin_cnt = 0;
> + cpu_relax();
> + udelay(1);
> }
> }
>
> @@ -905,8 +903,11 @@ static void arm_smmu_cmdq_insert_cmd(struct arm_smmu_device *smmu, u64 *cmd)
>
> smmu->prev_cmd_opcode = FIELD_GET(CMDQ_0_OP, cmd[0]);
>
> + if (Q_IDX(q, q->prod + 1) == 0)
> + WRITE_ONCE(smmu->cmdq_generation, smmu->cmdq_generation + 1);
> +
> while (queue_insert_raw(q, cmd) == -ENOSPC) {
> - if (queue_poll_cons(q, false, wfe))
> + if (queue_poll_cons(q, wfe))
> dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
> }
> }
> @@ -945,6 +946,48 @@ static int __arm_smmu_sync_poll_msi(struct arm_smmu_device *smmu, u32 sync_idx)
> return (int)(val - sync_idx) < 0 ? -ETIMEDOUT : 0;
> }
>
> +static int arm_smmu_sync_poll_cons(struct arm_smmu_device *smmu, u32 sync_idx,
> + int sync_gen)
> +{
> + struct arm_smmu_queue *q = &smmu->cmdq.q;
> + bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
> + unsigned int delay = 1, spin_cnt = 0;
> + ktime_t timeout;
> +
> + timeout = ktime_add_us(ktime_get(), ARM_SMMU_CMDQ_SYNC_TIMEOUT_US);
> + do {
> + queue_sync_cons(q);
> + /*
> + * If we see updates quickly enough, cons has passed sync_idx,
> + * but not yet wrapped. At worst, cons might have actually
> + * wrapped an even number of times, but that still guarantees
> + * the original sync must have been consumed.
> + */
> + if (queue_ahead_not_wrapped(q, sync_idx))
> + return 0;
> + /*
> + * Otherwise, cons may have passed sync_idx and wrapped one or
> + * more times to appear behind it again, but in that case prod
> + * must also be one or more generations ahead.
> + */
> + if (queue_behind(q, sync_idx) &&
> + READ_ONCE(smmu->cmdq_generation) != sync_gen)
> + return 0;
> +
> + if (wfe) {
> + wfe();
> + } else if (++spin_cnt < ARM_SMMU_CMDQ_SYNC_SPIN_COUNT) {
> + cpu_relax();
> + } else {
> + udelay(delay);
> + delay *= 2;
> + spin_cnt = 0;
> + }
> + } while (ktime_before(ktime_get(), timeout));
> +
> + return -ETIMEDOUT;
> +}
> +
> static int __arm_smmu_cmdq_issue_sync_msi(struct arm_smmu_device *smmu)
> {
> u64 cmd[CMDQ_ENT_DWORDS];
> @@ -976,18 +1019,19 @@ static int __arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
> {
> u64 cmd[CMDQ_ENT_DWORDS];
> unsigned long flags;
> - bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
> struct arm_smmu_cmdq_ent ent = { .opcode = CMDQ_OP_CMD_SYNC };
> - int ret;
> + int sync_idx, sync_gen;
>
> arm_smmu_cmdq_build_cmd(cmd, &ent);
>
> spin_lock_irqsave(&smmu->cmdq.lock, flags);
> - arm_smmu_cmdq_insert_cmd(smmu, cmd);
> - ret = queue_poll_cons(&smmu->cmdq.q, true, wfe);
> + if (smmu->prev_cmd_opcode != CMDQ_OP_CMD_SYNC)
> + arm_smmu_cmdq_insert_cmd(smmu, cmd);
Hi Robin,
If we did stop rebuilding the non-MSI command as I suggested, then we
would not have the case of building the command and then discarding it,
right?
Thanks,
John
> + sync_idx = smmu->cmdq.q.prod;
> + sync_gen = READ_ONCE(smmu->cmdq_generation);
> spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
>
> - return ret;
> + return arm_smmu_sync_poll_cons(smmu, sync_idx, sync_gen);
> }
>
> static void arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
>
next prev parent reply other threads:[~2018-10-18 10:55 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-10-17 13:56 [PATCH v5 1/2] iommu/arm-smmu-v3: Poll for CMD_SYNC outside cmdq lock Robin Murphy
2018-10-17 13:56 ` [PATCH v5 2/2] iommu/arm-smmu-v3: Reunify arm_smmu_cmdq_issue_sync() Robin Murphy
2018-10-17 14:38 ` John Garry
2018-10-18 11:18 ` Robin Murphy
2018-10-18 11:29 ` John Garry
2018-10-18 8:58 ` Andrew Murray
2018-10-22 12:29 ` John Garry
2018-10-18 8:56 ` [PATCH v5 1/2] iommu/arm-smmu-v3: Poll for CMD_SYNC outside cmdq lock Andrew Murray
2018-10-18 10:55 ` John Garry [this message]
2018-10-18 11:19 ` Robin Murphy
2018-10-18 11:48 ` John Garry
2018-10-19 14:30 ` John Garry
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=2c4e00a2-cd53-a6f5-8561-97379dcf9c02@huawei.com \
--to=john.garry@huawei.com \
--cc=linux-arm-kernel@lists.infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox