Re: [PATCH v6 0/9] iommu/amd: Use 128-bit cmpxchg operation to update DTE

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

From: Jason Gunthorpe <jgg@nvidia.com>
To: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
Cc: linux-kernel@vger.kernel.org, iommu@lists.linux.dev,
	joro@8bytes.org, robin.murphy@arm.com, vasant.hegde@amd.com,
	kevin.tian@intel.com, jon.grimm@amd.com, santosh.shukla@amd.com,
	pandoh@google.com, kumaranand@google.com
Subject: Re: [PATCH v6 0/9] iommu/amd: Use 128-bit cmpxchg operation to update DTE
Date: Wed, 16 Oct 2024 11:22:37 -0300	[thread overview]
Message-ID: <20241016142237.GP3559746@nvidia.com> (raw)
In-Reply-To: <20241016051756.4317-1-suravee.suthikulpanit@amd.com>

On Wed, Oct 16, 2024 at 05:17:47AM +0000, Suravee Suthikulpanit wrote:
> This series modifies current implementation to use 128-bit cmpxchg to
> update DTE when needed as specified in the AMD I/O Virtualization
> Techonology (IOMMU) Specification.
> 
> Please note that I have verified with the hardware designer, and they have
> confirmed that the IOMMU hardware has always been implemented with 256-bit
> read. The next revision of the IOMMU spec will be updated to correctly
> describe this part.  Therefore, I have updated the implementation to avoid
> unnecessary flushing.
> 
> Changes in v6:
> 
> * Patch 2, 4, 7: Newly add
> 
> * Patch 3, 5, 6, 7, 9: Add READ_ONCE() per Uros.
> 
> * Patch 3:
>   - Modify write_dte_[higher|lower]128() to avoid copying old DTE in the loop.
> 
> * Patch 5:
>   - Use dev_data->dte_cache to restore persistent DTE bits in set_dte_entry().
>   - Simplify make_clear_dte():
>     - Remove bit preservation logic.
>     - Remove non-SNP check for setting TV since it should not be needed.
> 
> * Patch 6:
>   - Use find_dev_data(..., alias) since the dev_data might not have been allocated.
>   - Move dev_iommu_priv_set() to before setup_aliases().

I wanted to see how far this was to being split up neatly like ARM is,
I came up with this, which seems pretty good to me. This would
probably be the next step to get to, then you'd lift the individual
set functions higher up the call chain into their respective attach
functions.

static void set_dte_identity(struct amd_iommu *iommu,
			       struct iommu_dev_data *dev_data,
			       struct dev_table_entry *target)
{
	/*
	 * SNP does not support TV=1/Mode=1 in any case, and can't do IDENTITY
	 */
	if (WARN_ON(amd_iommu_snp_en))
		return;

	/* mode is zero */
	target->data[0] |= DTE_FLAG_TV | DTE_FLAG_IR | DTE_FLAG_IW | DTE_FLAG_V;
	if (dev_data->ats_enabled)
		target->data[1] |= DTE_FLAG_IOTLB;
	/* ppr is not allowed for identity */

	target->data128[0] |= dev_data->dte_cache.data128[0];
	target->data128[1] |= dev_data->dte_cache.data128[1];
}

static void set_dte_gcr3_table(struct amd_iommu *iommu,
			       struct iommu_dev_data *dev_data,
			       struct dev_table_entry *target)
{
	struct gcr3_tbl_info *gcr3_info = &dev_data->gcr3_info;
	u64 gcr3;

	if (!gcr3_info->gcr3_tbl)
		return;

	pr_debug("%s: devid=%#x, glx=%#x, gcr3_tbl=%#llx\n",
		 __func__, dev_data->devid, gcr3_info->glx,
		 (unsigned long long)gcr3_info->gcr3_tbl);

	gcr3 = iommu_virt_to_phys(gcr3_info->gcr3_tbl);

	target->data[0] |= DTE_FLAG_GV | DTE_FLAG_TV | DTE_FLAG_IR |
			   DTE_FLAG_IW | DTE_FLAG_V |
			   FIELD_PREP(DTE_GLX, gcr3_info->glx) |
			   FIELD_PREP(DTE_GCR3_14_12, gcr3 >> 12);
	if (pdom_is_v2_pgtbl_mode(dev_data->domain))
		target->data[0] |= DTE_FLAG_GIOV;

	target->data[1] |= FIELD_PREP(DTE_GCR3_30_15, gcr3 >> 15) |
			   FIELD_PREP(DTE_GCR3_51_31, gcr3 >> 31);

	/* Guest page table can only support 4 and 5 levels  */
	target->data[2] |= FIELD_PREP(
		DTE_GPT_LEVEL_MASK, (amd_iommu_gpt_level == PAGE_MODE_5_LEVEL ?
					     GUEST_PGTABLE_5_LEVEL :
					     GUEST_PGTABLE_4_LEVEL));

	target->data[1] |= dev_data->gcr3_info.domid;
	if (dev_data->ppr)
		target->data[0] |= 1ULL << DEV_ENTRY_PPR;
	if (dev_data->ats_enabled)
		target->data[1] |= DTE_FLAG_IOTLB;

	target->data128[0] |= dev_data->dte_cache.data128[0];
	target->data128[1] |= dev_data->dte_cache.data128[1];
}

static void set_dte_paging(struct amd_iommu *iommu,
			       struct iommu_dev_data *dev_data,
			       struct dev_table_entry *target)
{
	struct protection_domain *domain = dev_data->domain;

	target->data[0] |= DTE_FLAG_TV | DTE_FLAG_IR | DTE_FLAG_IW |
			   iommu_virt_to_phys(domain->iop.root) |
			   ((domain->iop.mode & DEV_ENTRY_MODE_MASK)
			    << DEV_ENTRY_MODE_SHIFT) |
			   DTE_FLAG_V;
	if (dev_data->ppr)
		target->data[0] |= 1ULL << DEV_ENTRY_PPR;
	if (domain->dirty_tracking)
		target->data[0] |= DTE_FLAG_HAD;

	target->data[1] |= domain->id;
	if (dev_data->ats_enabled)
		target->data[1] |= DTE_FLAG_IOTLB;

	target->data128[0] |= dev_data->dte_cache.data128[0];
	target->data128[1] |= dev_data->dte_cache.data128[1];
}

static void set_dte_entry(struct amd_iommu *iommu,
			  struct iommu_dev_data *dev_data)
{
	u32 old_domid;
	struct dev_table_entry new = {};
	struct protection_domain *domain = dev_data->domain;
	struct gcr3_tbl_info *gcr3_info = &dev_data->gcr3_info;
	struct dev_table_entry *dte = &get_dev_table(iommu)[dev_data->devid];

	make_clear_dte(dev_data, dte, &new);
	if (gcr3_info && gcr3_info->gcr3_tbl)
		set_dte_gcr3_table(iommu, dev_data, &new);
	else if (domain->iop.mode == PAGE_MODE_NONE)
		set_dte_identity(iommu, dev_data, &new);
	else
		set_dte_paging(iommu, dev_data, &new);

	old_domid = READ_ONCE(dte->data[1]) & DEV_DOMID_MASK;
	update_dte256(iommu, dev_data, &new);

	/*
	 * A kdump kernel might be replacing a domain ID that was copied from
	 * the previous kernel--if so, it needs to flush the translation cache
	 * entries for the old domain ID that is being overwritten
	 */
	if (old_domid) {
		amd_iommu_flush_tlb_domid(iommu, old_domid);
	}
}

next prev parent reply	other threads:[~2024-10-16 14:22 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-10-16  5:17 [PATCH v6 0/9] iommu/amd: Use 128-bit cmpxchg operation to update DTE Suravee Suthikulpanit
2024-10-16  5:17 ` [PATCH v6 1/9] iommu/amd: Disable AMD IOMMU if CMPXCHG16B feature is not supported Suravee Suthikulpanit
2024-10-16  5:17 ` [PATCH v6 2/9] asm/rwonce: Introduce [READ|WRITE]_ONCE() support for __int128 Suravee Suthikulpanit
2024-10-16 13:08   ` Jason Gunthorpe
2024-10-16  5:17 ` [PATCH v6 3/9] iommu/amd: Introduce helper function to update 256-bit DTE Suravee Suthikulpanit
2024-10-16  5:17 ` [PATCH v6 4/9] iommu/amd: Introduce per-device DTE cache to store persistent bits Suravee Suthikulpanit
2024-10-16 13:21   ` Jason Gunthorpe
2024-10-16  5:17 ` [PATCH v6 5/9] iommu/amd: Modify set_dte_entry() to use 256-bit DTE helpers Suravee Suthikulpanit
2024-10-16 13:52   ` Jason Gunthorpe
2024-10-16 14:07   ` Jason Gunthorpe
2024-10-16 14:12   ` Jason Gunthorpe
2024-10-16  5:17 ` [PATCH v6 6/9] iommu/amd: Introduce helper function get_dte256() Suravee Suthikulpanit
2024-10-16  5:17 ` [PATCH v6 7/9] iommu/amd: Move erratum 63 logic to write_dte_lower128() Suravee Suthikulpanit
2024-10-16 13:30   ` Jason Gunthorpe
2024-10-31  8:53     ` Suthikulpanit, Suravee
2024-10-31  8:53     ` Suthikulpanit, Suravee
2024-10-16  5:17 ` [PATCH v6 8/9] iommu/amd: Modify clear_dte_entry() to avoid in-place update Suravee Suthikulpanit
2024-10-16  5:17 ` [PATCH v6 9/9] iommu/amd: Lock DTE before updating the entry with WRITE_ONCE() Suravee Suthikulpanit
2024-10-16 14:22 ` Jason Gunthorpe [this message]
2024-10-31  9:15   ` [PATCH v6 0/9] iommu/amd: Use 128-bit cmpxchg operation to update DTE Suthikulpanit, Suravee
2024-10-31 11:33     ` Jason Gunthorpe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20241016142237.GP3559746@nvidia.com \
    --to=jgg@nvidia.com \
    --cc=iommu@lists.linux.dev \
    --cc=jon.grimm@amd.com \
    --cc=joro@8bytes.org \
    --cc=kevin.tian@intel.com \
    --cc=kumaranand@google.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=pandoh@google.com \
    --cc=robin.murphy@arm.com \
    --cc=santosh.shukla@amd.com \
    --cc=suravee.suthikulpanit@amd.com \
    --cc=vasant.hegde@amd.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox