[PATCH v5 1/2] iommu/arm-smmu-v3: Poll for CMD_SYNC outside cmdq lock

public inbox for linux-arm-kernel@lists.infradead.org
 help / color / mirror / Atom feed

From: john.garry@huawei.com (John Garry)
To: linux-arm-kernel@lists.infradead.org
Subject: [PATCH v5 1/2] iommu/arm-smmu-v3: Poll for CMD_SYNC outside cmdq lock
Date: Thu, 18 Oct 2018 11:55:47 +0100	[thread overview]
Message-ID: <2c4e00a2-cd53-a6f5-8561-97379dcf9c02@huawei.com> (raw)
In-Reply-To: <61b4c3e5f1322dfe96ca2062a7fe058298340996.1539782799.git.robin.murphy@arm.com>

On 17/10/2018 14:56, Robin Murphy wrote:
> Even without the MSI trick, we can still do a lot better than hogging
> the entire queue while it drains. All we actually need to do for the
> necessary guarantee of completion is wait for our particular command to
> have been consumed - as long as we keep track of where we inserted it,
> there is no need to block other CPUs from adding further commands in the
> meantime. There is one theoretical (but incredibly unlikely) edge case
> to avoid, where cons has wrapped twice to still appear 'behind' the sync
> position - this is easily disambiguated by adding a generation count to
> the queue to indicate when prod wraps, since cons cannot wrap twice
> without prod having wrapped at least once.
>
> This also makes it reasonable to separate the two conceptually different
> modes of polling such that command insertion - which really wants to be
> fair and have minimal latency - is not subject to exponential backoff,
> and returns to its original implementation.
>
> Signed-off-by: Robin Murphy <robin.murphy@arm.com>
> ---
>
> v5:
>  - Rework to incorporate the back-to-back sync elision.
>  - Refactor the generation count slightly to preemptively help with
>    the HiSilicon MSI workaround.
>  - Split the cleanup into a separate patch for ease of review (it could
>    happily be squashed when applied).
>
> The fundamental logic is copied directly from v4, but I've dropped the
> previous tested-by since there are a fair few subtle changes in how it's
> integrated. Patches are based on Will's iommu/devel branch plus my "Fix
> big-endian CMD_SYNC writes" patch.
>
> Robin.
>
>  drivers/iommu/arm-smmu-v3.c | 94 +++++++++++++++++++++++++++----------
>  1 file changed, 69 insertions(+), 25 deletions(-)
>
> diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
> index 867ba548c2cc..da8a91d116bf 100644
> --- a/drivers/iommu/arm-smmu-v3.c
> +++ b/drivers/iommu/arm-smmu-v3.c
> @@ -588,6 +588,7 @@ struct arm_smmu_device {
>  	struct arm_smmu_strtab_cfg	strtab_cfg;
>
>  	u32				sync_count;
> +	int				cmdq_generation;
>
>  	/* IOMMU core code handle */
>  	struct iommu_device		iommu;
> @@ -676,6 +677,17 @@ static bool queue_empty(struct arm_smmu_queue *q)
>  	       Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
>  }
>
> +static bool queue_behind(struct arm_smmu_queue *q, u32 idx)
> +{
> +	return Q_IDX(q, q->cons) < Q_IDX(q, idx);
> +}
> +
> +static bool queue_ahead_not_wrapped(struct arm_smmu_queue *q, u32 idx)
> +{
> +	return Q_IDX(q, q->cons) >= Q_IDX(q, idx) &&
> +	       Q_WRP(q, q->cons) == Q_WRP(q, idx);
> +}
> +
>  static void queue_sync_cons(struct arm_smmu_queue *q)
>  {
>  	q->cons = readl_relaxed(q->cons_reg);
> @@ -709,33 +721,19 @@ static void queue_inc_prod(struct arm_smmu_queue *q)
>  	writel(q->prod, q->prod_reg);
>  }
>
> -/*
> - * Wait for the SMMU to consume items. If sync is true, wait until the queue
> - * is empty. Otherwise, wait until there is at least one free slot.
> - */
> -static int queue_poll_cons(struct arm_smmu_queue *q, bool sync, bool wfe)
> +static int queue_poll_cons(struct arm_smmu_queue *q, bool wfe)
>  {
> -	ktime_t timeout;
> -	unsigned int delay = 1, spin_cnt = 0;
> +	ktime_t timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
>
> -	/* Wait longer if it's a CMD_SYNC */
> -	timeout = ktime_add_us(ktime_get(), sync ?
> -					    ARM_SMMU_CMDQ_SYNC_TIMEOUT_US :
> -					    ARM_SMMU_POLL_TIMEOUT_US);
> -
> -	while (queue_sync_cons(q), (sync ? !queue_empty(q) : queue_full(q))) {
> +	while (queue_sync_cons(q), queue_full(q)) {
>  		if (ktime_compare(ktime_get(), timeout) > 0)
>  			return -ETIMEDOUT;
>
>  		if (wfe) {
>  			wfe();
> -		} else if (++spin_cnt < ARM_SMMU_CMDQ_SYNC_SPIN_COUNT) {
> -			cpu_relax();
> -			continue;
>  		} else {
> -			udelay(delay);
> -			delay *= 2;
> -			spin_cnt = 0;
> +			cpu_relax();
> +			udelay(1);
>  		}
>  	}
>
> @@ -905,8 +903,11 @@ static void arm_smmu_cmdq_insert_cmd(struct arm_smmu_device *smmu, u64 *cmd)
>
>  	smmu->prev_cmd_opcode = FIELD_GET(CMDQ_0_OP, cmd[0]);
>
> +	if (Q_IDX(q, q->prod + 1) == 0)
> +		WRITE_ONCE(smmu->cmdq_generation, smmu->cmdq_generation + 1);
> +
>  	while (queue_insert_raw(q, cmd) == -ENOSPC) {
> -		if (queue_poll_cons(q, false, wfe))
> +		if (queue_poll_cons(q, wfe))
>  			dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
>  	}
>  }
> @@ -945,6 +946,48 @@ static int __arm_smmu_sync_poll_msi(struct arm_smmu_device *smmu, u32 sync_idx)
>  	return (int)(val - sync_idx) < 0 ? -ETIMEDOUT : 0;
>  }
>
> +static int arm_smmu_sync_poll_cons(struct arm_smmu_device *smmu, u32 sync_idx,
> +				   int sync_gen)
> +{
> +	struct arm_smmu_queue *q = &smmu->cmdq.q;
> +	bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
> +	unsigned int delay = 1, spin_cnt = 0;
> +	ktime_t timeout;
> +
> +	timeout = ktime_add_us(ktime_get(), ARM_SMMU_CMDQ_SYNC_TIMEOUT_US);
> +	do {
> +		queue_sync_cons(q);
> +		/*
> +		 * If we see updates quickly enough, cons has passed sync_idx,
> +		 * but not yet wrapped. At worst, cons might have actually
> +		 * wrapped an even number of times, but that still guarantees
> +		 * the original sync must have been consumed.
> +		 */
> +		if (queue_ahead_not_wrapped(q, sync_idx))
> +			return 0;
> +		/*
> +		 * Otherwise, cons may have passed sync_idx and wrapped one or
> +		 * more times to appear behind it again, but in that case prod
> +		 * must also be one or more generations ahead.
> +		 */
> +		if (queue_behind(q, sync_idx) &&
> +		    READ_ONCE(smmu->cmdq_generation) != sync_gen)
> +			return 0;
> +
> +		if (wfe) {
> +			wfe();
> +		} else if (++spin_cnt < ARM_SMMU_CMDQ_SYNC_SPIN_COUNT) {
> +			cpu_relax();
> +		} else {
> +			udelay(delay);
> +			delay *= 2;
> +			spin_cnt = 0;
> +		}
> +	} while (ktime_before(ktime_get(), timeout));
> +
> +	return -ETIMEDOUT;
> +}
> +
>  static int __arm_smmu_cmdq_issue_sync_msi(struct arm_smmu_device *smmu)
>  {
>  	u64 cmd[CMDQ_ENT_DWORDS];
> @@ -976,18 +1019,19 @@ static int __arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
>  {
>  	u64 cmd[CMDQ_ENT_DWORDS];
>  	unsigned long flags;
> -	bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
>  	struct arm_smmu_cmdq_ent ent = { .opcode = CMDQ_OP_CMD_SYNC };
> -	int ret;
> +	int sync_idx, sync_gen;
>
>  	arm_smmu_cmdq_build_cmd(cmd, &ent);
>
>  	spin_lock_irqsave(&smmu->cmdq.lock, flags);
> -	arm_smmu_cmdq_insert_cmd(smmu, cmd);
> -	ret = queue_poll_cons(&smmu->cmdq.q, true, wfe);
> +	if (smmu->prev_cmd_opcode != CMDQ_OP_CMD_SYNC)
> +		arm_smmu_cmdq_insert_cmd(smmu, cmd);

Hi Robin,

If we did stop rebuilding the non-MSI command as I suggested, then we 
would not have the case of building the command and then discarding it, 
right?

Thanks,
John

> +	sync_idx = smmu->cmdq.q.prod;
> +	sync_gen = READ_ONCE(smmu->cmdq_generation);
>  	spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
>
> -	return ret;
> +	return arm_smmu_sync_poll_cons(smmu, sync_idx, sync_gen);
>  }
>
>  static void arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
>

next prev parent reply	other threads:[~2018-10-18 10:55 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-10-17 13:56 [PATCH v5 1/2] iommu/arm-smmu-v3: Poll for CMD_SYNC outside cmdq lock Robin Murphy
2018-10-17 13:56 ` [PATCH v5 2/2] iommu/arm-smmu-v3: Reunify arm_smmu_cmdq_issue_sync() Robin Murphy
2018-10-17 14:38   ` John Garry
2018-10-18 11:18     ` Robin Murphy
2018-10-18 11:29       ` John Garry
2018-10-18  8:58   ` Andrew Murray
2018-10-22 12:29   ` John Garry
2018-10-18  8:56 ` [PATCH v5 1/2] iommu/arm-smmu-v3: Poll for CMD_SYNC outside cmdq lock Andrew Murray
2018-10-18 10:55 ` John Garry [this message]
2018-10-18 11:19   ` Robin Murphy
2018-10-18 11:48     ` John Garry
2018-10-19 14:30       ` John Garry

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=2c4e00a2-cd53-a6f5-8561-97379dcf9c02@huawei.com \
    --to=john.garry@huawei.com \
    --cc=linux-arm-kernel@lists.infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox