Re: Cache Invalidation Solution for Nested IOMMU

Linux IOMMU Development
 help / color / mirror / Atom feed

From: Nicolin Chen <nicolinc@nvidia.com>
To: Jason Gunthorpe <jgg@nvidia.com>
Cc: Robin Murphy <robin.murphy@arm.com>, <kevin.tian@intel.com>,
	<yi.l.liu@intel.com>, <eric.auger@redhat.com>,
	<baolu.lu@linux.intel.com>,
	<shameerali.kolothum.thodi@huawei.com>,
	<jean-philippe@linaro.org>, <iommu@lists.linux.dev>
Subject: Re: Cache Invalidation Solution for Nested IOMMU
Date: Tue, 4 Apr 2023 22:45:56 -0700	[thread overview]
Message-ID: <ZC0LFM9hxF9wY76w@Asurada-Nvidia> (raw)
In-Reply-To: <ZCxOMSvR+6rYbIfy@nvidia.com>

On Tue, Apr 04, 2023 at 01:20:01PM -0300, Jason Gunthorpe wrote:
> On Mon, Apr 03, 2023 at 05:02:09PM -0700, Nicolin Chen wrote:
> 
> > My preference is to have a mmap'd page, so the interface can
> > be reused later by VCMDQ too. Performance-wise, it should be
> > good enough, since it does batching, IMHO.
> 
> You can't reuse mmaping the queue page with vcmdq, so it doesn't seem
> meaningful to me.
> 
> There should be no mmap on the SW path. If you need a half step
> between an ioctl as a batch and a full vhost-like queue scheme then
> using iouring with pre-registered memory would be appropriate.

I've changed to a non-mmap approach that the host kernel reads
the guest queue directly and inserts all invalidation commands
into the host queue.

The qsz could be as large as 128 x 64K pages. So, there has to
be a big array of pages getting pinned in the handler.

(The handler still needs a pathway to report errors. I will add
 tomorrow.)

Does the implementation below look fine in general?

Thanks
Nicolin

[User Data]
/**
 * struct iommu_hwpt_invalidate_arm_smmuv3 - ARM SMMUv3 cahce invalidation info
 * @cmdq_base: User space base virtual address of user command queue
 * @cmdq_entry_size: Entry size of user command queue
 * @cmdq_log2size: User command queue size as log 2 (entries)
 *                 Refer to LOG2SIZE field of SMMU_CMDQ_BASE register
 * @cmdq_prod: Producer index of user command queue
 * @cmdq_cons: Consumer index of user command queue
 */
struct iommu_hwpt_invalidate_arm_smmuv3 {
	__u64 cmdq_base;
	__u32 cmdq_entry_size;
	__u32 cmdq_log2size;
	__u32 cmdq_prod;
	__u32 cmdq_cons;
};

[Host Handler]
static int arm_smmu_fix_user_cmd(struct arm_smmu_domain *smmu_domain, u64 *cmd)
{
	struct arm_smmu_stream *stream;

	switch (*cmd & CMDQ_0_OP) {
	case CMDQ_OP_TLBI_NSNH_ALL:
		*cmd &= ~CMDQ_0_OP;
		*cmd |= CMDQ_OP_TLBI_NH_ALL;
		fallthrough;
	case CMDQ_OP_TLBI_NH_VA:
	case CMDQ_OP_TLBI_NH_VAA:
	case CMDQ_OP_TLBI_NH_ALL:
	case CMDQ_OP_TLBI_NH_ASID:
		*cmd &= ~CMDQ_TLBI_0_VMID;
		*cmd |= FIELD_PREP(CMDQ_TLBI_0_VMID,
				   smmu_domain->s2->s2_cfg.vmid);
		break;
	case CMDQ_OP_ATC_INV:
	case CMDQ_OP_CFGI_CD:
	case CMDQ_OP_CFGI_CD_ALL:
		xa_lock(&smmu_domain->smmu->user_streams);
		stream = xa_load(&smmu_domain->smmu->user_streams,
				 FIELD_GET(CMDQ_CFGI_0_SID, *cmd));
		xa_unlock(&smmu_domain->smmu->user_streams);
		if (!stream)
			return -ENODEV;
		*cmd &= ~CMDQ_CFGI_0_SID;
		*cmd |= FIELD_PREP(CMDQ_CFGI_0_SID, stream->id);
		break;
	default:
		return -EOPNOTSUPP;
	}
	pr_debug("Fixed user CMD: %016llx : %016llx\n", cmd[1], cmd[0]);

	return 0;
}

static void arm_smmu_cache_invalidate_user(struct iommu_domain *domain,
					   void *user_data)
{
	const u32 cons_err = FIELD_PREP(CMDQ_CONS_ERR, CMDQ_ERR_CERROR_ILL_IDX);
	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
	struct iommu_hwpt_invalidate_arm_smmuv3 *inv = user_data;
	struct arm_smmu_device *smmu = smmu_domain->smmu;
	struct arm_smmu_queue q = {
		.llq = {
			.prod = inv->cmdq_prod,
			.cons = inv->cmdq_cons,
			.max_n_shift = inv->cmdq_log2size,
		},
		.ent_dwords = inv->cmdq_entry_size / sizeof(u64),
	};
	int ncmds = inv->cmdq_prod - inv->cmdq_cons;
	unsigned int nents = 1 << q.llq.max_n_shift;
	size_t qsz = nents * inv->cmdq_entry_size;
	unsigned long npages = qsz >> PAGE_SHIFT;
	struct page **pages;
	long pinned;
	u64 *cmds;
	int i = 0;
	int ret;

	if (!smmu || !smmu_domain->s2 || domain->type != IOMMU_DOMAIN_NESTED)
		return;
	if (WARN_ON(q.ent_dwords != CMDQ_ENT_DWORDS))
		return;
	if (WARN_ON(queue_empty(&q.llq)))
		return;
	WARN_ON(q.llq.max_n_shift > smmu->cmdq.q.llq.max_n_shift);

	pages = kcalloc(npages, sizeof(*pages), GFP_KERNEL);
	if (!pages)
		return;

	if (ncmds <= 0)
		ncmds += nents;
	cmds = kcalloc(ncmds, inv->cmdq_entry_size, GFP_KERNEL);
	if (!cmds)
		goto out_free_pages;

	pinned = get_user_pages(inv->cmdq_base, npages, FOLL_GET, pages, NULL);
	if (pinned != npages)
		goto out_put_page;
	q.base = page_to_virt(pages[0]) + (inv->cmdq_base & (PAGE_SIZE - 1));

	do {
		u64 *cmd = &cmds[i * CMDQ_ENT_DWORDS];

		queue_read(cmd, Q_ENT(&q, q.llq.cons), q.ent_dwords);
		ret = arm_smmu_fix_user_cmd(smmu_domain, cmd);
		if (ret && ret != -EOPNOTSUPP) {
			q.llq.cons |= cons_err;
			goto out_put_page;
		}
		if (!ret)
			i++;
		queue_inc_cons(&q.llq);
	} while (!queue_empty(&q.llq));

	ret = arm_smmu_cmdq_issue_cmdlist(smmu, cmds, i, true);
	/* FIXME return CMD_SYNC timeout */
out_put_page:
	for (i = 0; i < pinned; i++)
		put_page(pages[i]);
	kfree(cmds);
out_free_pages:
	kfree(pages);
	inv->cmdq_cons = q.llq.cons;
}

next prev parent reply	other threads:[~2023-04-05  5:46 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-04-03  0:33 Cache Invalidation Solution for Nested IOMMU Nicolin Chen
2023-04-03  7:26 ` Liu, Yi L
2023-04-03  8:39   ` Tian, Kevin
2023-04-03 15:24     ` Nicolin Chen
2023-04-04  2:42       ` Tian, Kevin
2023-04-04  3:12         ` Nicolin Chen
2023-04-03 12:23   ` Jason Gunthorpe
2023-04-03  8:00 ` Tian, Kevin
2023-04-03 14:29   ` Nicolin Chen
2023-04-04  2:15     ` Tian, Kevin
2023-04-04  2:47       ` Nicolin Chen
2023-04-03 14:08 ` Jason Gunthorpe
2023-04-03 14:51   ` Nicolin Chen
2023-04-03 19:15     ` Robin Murphy
2023-04-04  0:02       ` Nicolin Chen
2023-04-04 16:20         ` Jason Gunthorpe
2023-04-04 16:50           ` Shameerali Kolothum Thodi
2023-04-05 11:57             ` Jason Gunthorpe
2023-04-06  6:23             ` Zhangfei Gao
2023-04-06  6:39               ` Nicolin Chen
2023-04-06 11:40               ` Jason Gunthorpe
2023-04-10  1:08                 ` Nicolin Chen
2023-04-11  9:07                   ` Jean-Philippe Brucker
2023-04-11 11:57                     ` Jason Gunthorpe
2023-04-11 18:39                       ` Nicolin Chen
2023-04-11 18:41                         ` Jason Gunthorpe
2023-04-11 19:02                           ` Nicolin Chen
2023-04-11 18:43                     ` Nicolin Chen
2023-04-12  2:47                   ` Zhangfei Gao
2023-04-12  5:47                     ` Nicolin Chen
2023-05-03 15:14                     ` Shameerali Kolothum Thodi
2023-05-03 23:44                       ` Nicolin Chen
2023-04-05  5:45           ` Nicolin Chen [this message]
2023-04-05 11:37             ` Jason Gunthorpe
2023-04-05 15:34               ` Nicolin Chen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=ZC0LFM9hxF9wY76w@Asurada-Nvidia \
    --to=nicolinc@nvidia.com \
    --cc=baolu.lu@linux.intel.com \
    --cc=eric.auger@redhat.com \
    --cc=iommu@lists.linux.dev \
    --cc=jean-philippe@linaro.org \
    --cc=jgg@nvidia.com \
    --cc=kevin.tian@intel.com \
    --cc=robin.murphy@arm.com \
    --cc=shameerali.kolothum.thodi@huawei.com \
    --cc=yi.l.liu@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox