From: Lu Baolu <baolu.lu@linux.intel.com>
To: Joerg Roedel <joro@8bytes.org>,
David Woodhouse <dwmw2@infradead.org>,
Alex Williamson <alex.williamson@redhat.com>
Cc: kevin.tian@intel.com, ashok.raj@intel.com, kvm@vger.kernel.org,
sanjay.k.kumar@intel.com, iommu@lists.linux-foundation.org,
linux-kernel@vger.kernel.org, yi.y.sun@intel.com
Subject: [PATCH v5 4/9] iommu/vt-d: Setup pasid entries for iova over first level
Date: Tue, 24 Dec 2019 15:44:57 +0800 [thread overview]
Message-ID: <20191224074502.5545-5-baolu.lu@linux.intel.com> (raw)
In-Reply-To: <20191224074502.5545-1-baolu.lu@linux.intel.com>
Intel VT-d in scalable mode supports two types of page tables for
IOVA translation: first level and second level. The IOMMU driver
can choose one from both for IOVA translation according to the use
case. This sets up the pasid entry if a domain is selected to use
the first-level page table for iova translation.
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
---
drivers/iommu/intel-iommu.c | 57 +++++++++++++++++++++++++++++++++----
include/linux/intel-iommu.h | 16 +++++++----
2 files changed, 62 insertions(+), 11 deletions(-)
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 35f65628202c..071cbc172ce8 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -571,6 +571,11 @@ static inline int domain_type_is_si(struct dmar_domain *domain)
return domain->flags & DOMAIN_FLAG_STATIC_IDENTITY;
}
+static inline bool domain_use_first_level(struct dmar_domain *domain)
+{
+ return domain->flags & DOMAIN_FLAG_USE_FIRST_LEVEL;
+}
+
static inline int domain_pfn_supported(struct dmar_domain *domain,
unsigned long pfn)
{
@@ -932,6 +937,8 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
+ if (domain_use_first_level(domain))
+ pteval |= DMA_FL_PTE_XD;
if (cmpxchg64(&pte->val, 0ULL, pteval))
/* Someone else set it while we were thinking; use theirs. */
free_pgtable_page(tmp_page);
@@ -2281,17 +2288,20 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
unsigned long sg_res = 0;
unsigned int largepage_lvl = 0;
unsigned long lvl_pages = 0;
+ u64 attr;
BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
return -EINVAL;
- prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
+ attr = prot & (DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP);
+ if (domain_use_first_level(domain))
+ attr |= DMA_FL_PTE_PRESENT | DMA_FL_PTE_XD;
if (!sg) {
sg_res = nr_pages;
- pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
+ pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | attr;
}
while (nr_pages > 0) {
@@ -2303,7 +2313,7 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
sg_res = aligned_nrpages(sg->offset, sg->length);
sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + pgoff;
sg->dma_length = sg->length;
- pteval = (sg_phys(sg) - pgoff) | prot;
+ pteval = (sg_phys(sg) - pgoff) | attr;
phys_pfn = pteval >> VTD_PAGE_SHIFT;
}
@@ -2515,6 +2525,36 @@ dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
return NULL;
}
+static int domain_setup_first_level(struct intel_iommu *iommu,
+ struct dmar_domain *domain,
+ struct device *dev,
+ int pasid)
+{
+ int flags = PASID_FLAG_SUPERVISOR_MODE;
+ struct dma_pte *pgd = domain->pgd;
+ int agaw, level;
+
+ /*
+ * Skip top levels of page tables for iommu which has
+ * less agaw than default. Unnecessary for PT mode.
+ */
+ for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
+ pgd = phys_to_virt(dma_pte_addr(pgd));
+ if (!dma_pte_present(pgd))
+ return -ENOMEM;
+ }
+
+ level = agaw_to_level(agaw);
+ if (level != 4 && level != 5)
+ return -EINVAL;
+
+ flags |= (level == 5) ? PASID_FLAG_FL5LP : 0;
+
+ return intel_pasid_setup_first_level(iommu, dev, (pgd_t *)pgd, pasid,
+ domain->iommu_did[iommu->seq_id],
+ flags);
+}
+
static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
int bus, int devfn,
struct device *dev,
@@ -2614,6 +2654,9 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
if (hw_pass_through && domain_type_is_si(domain))
ret = intel_pasid_setup_pass_through(iommu, domain,
dev, PASID_RID2PASID);
+ else if (domain_use_first_level(domain))
+ ret = domain_setup_first_level(iommu, domain, dev,
+ PASID_RID2PASID);
else
ret = intel_pasid_setup_second_level(iommu, domain,
dev, PASID_RID2PASID);
@@ -5374,8 +5417,12 @@ static int aux_domain_add_dev(struct dmar_domain *domain,
goto attach_failed;
/* Setup the PASID entry for mediated devices: */
- ret = intel_pasid_setup_second_level(iommu, domain, dev,
- domain->default_pasid);
+ if (domain_use_first_level(domain))
+ ret = domain_setup_first_level(iommu, domain, dev,
+ domain->default_pasid);
+ else
+ ret = intel_pasid_setup_second_level(iommu, domain, dev,
+ domain->default_pasid);
if (ret)
goto table_failed;
spin_unlock(&iommu->lock);
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index aaece25c055f..454c69712131 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -34,10 +34,13 @@
#define VTD_STRIDE_SHIFT (9)
#define VTD_STRIDE_MASK (((u64)-1) << VTD_STRIDE_SHIFT)
-#define DMA_PTE_READ (1)
-#define DMA_PTE_WRITE (2)
-#define DMA_PTE_LARGE_PAGE (1 << 7)
-#define DMA_PTE_SNP (1 << 11)
+#define DMA_PTE_READ BIT_ULL(0)
+#define DMA_PTE_WRITE BIT_ULL(1)
+#define DMA_PTE_LARGE_PAGE BIT_ULL(7)
+#define DMA_PTE_SNP BIT_ULL(11)
+
+#define DMA_FL_PTE_PRESENT BIT_ULL(0)
+#define DMA_FL_PTE_XD BIT_ULL(63)
#define CONTEXT_TT_MULTI_LEVEL 0
#define CONTEXT_TT_DEV_IOTLB 1
@@ -610,10 +613,11 @@ static inline void dma_clear_pte(struct dma_pte *pte)
static inline u64 dma_pte_addr(struct dma_pte *pte)
{
#ifdef CONFIG_64BIT
- return pte->val & VTD_PAGE_MASK;
+ return pte->val & VTD_PAGE_MASK & (~DMA_FL_PTE_XD);
#else
/* Must have a full atomic 64-bit read */
- return __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK;
+ return __cmpxchg64(&pte->val, 0ULL, 0ULL) &
+ VTD_PAGE_MASK & (~DMA_FL_PTE_XD);
#endif
}
--
2.17.1
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu
WARNING: multiple messages have this Message-ID (diff)
From: Lu Baolu <baolu.lu@linux.intel.com>
To: Joerg Roedel <joro@8bytes.org>,
David Woodhouse <dwmw2@infradead.org>,
Alex Williamson <alex.williamson@redhat.com>
Cc: ashok.raj@intel.com, sanjay.k.kumar@intel.com,
jacob.jun.pan@linux.intel.com, kevin.tian@intel.com,
yi.l.liu@intel.com, yi.y.sun@intel.com,
Peter Xu <peterx@redhat.com>,
iommu@lists.linux-foundation.org, kvm@vger.kernel.org,
linux-kernel@vger.kernel.org, Lu Baolu <baolu.lu@linux.intel.com>
Subject: [PATCH v5 4/9] iommu/vt-d: Setup pasid entries for iova over first level
Date: Tue, 24 Dec 2019 15:44:57 +0800 [thread overview]
Message-ID: <20191224074502.5545-5-baolu.lu@linux.intel.com> (raw)
In-Reply-To: <20191224074502.5545-1-baolu.lu@linux.intel.com>
Intel VT-d in scalable mode supports two types of page tables for
IOVA translation: first level and second level. The IOMMU driver
can choose one from both for IOVA translation according to the use
case. This sets up the pasid entry if a domain is selected to use
the first-level page table for iova translation.
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
---
drivers/iommu/intel-iommu.c | 57 +++++++++++++++++++++++++++++++++----
include/linux/intel-iommu.h | 16 +++++++----
2 files changed, 62 insertions(+), 11 deletions(-)
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 35f65628202c..071cbc172ce8 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -571,6 +571,11 @@ static inline int domain_type_is_si(struct dmar_domain *domain)
return domain->flags & DOMAIN_FLAG_STATIC_IDENTITY;
}
+static inline bool domain_use_first_level(struct dmar_domain *domain)
+{
+ return domain->flags & DOMAIN_FLAG_USE_FIRST_LEVEL;
+}
+
static inline int domain_pfn_supported(struct dmar_domain *domain,
unsigned long pfn)
{
@@ -932,6 +937,8 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
+ if (domain_use_first_level(domain))
+ pteval |= DMA_FL_PTE_XD;
if (cmpxchg64(&pte->val, 0ULL, pteval))
/* Someone else set it while we were thinking; use theirs. */
free_pgtable_page(tmp_page);
@@ -2281,17 +2288,20 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
unsigned long sg_res = 0;
unsigned int largepage_lvl = 0;
unsigned long lvl_pages = 0;
+ u64 attr;
BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
return -EINVAL;
- prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
+ attr = prot & (DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP);
+ if (domain_use_first_level(domain))
+ attr |= DMA_FL_PTE_PRESENT | DMA_FL_PTE_XD;
if (!sg) {
sg_res = nr_pages;
- pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
+ pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | attr;
}
while (nr_pages > 0) {
@@ -2303,7 +2313,7 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
sg_res = aligned_nrpages(sg->offset, sg->length);
sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + pgoff;
sg->dma_length = sg->length;
- pteval = (sg_phys(sg) - pgoff) | prot;
+ pteval = (sg_phys(sg) - pgoff) | attr;
phys_pfn = pteval >> VTD_PAGE_SHIFT;
}
@@ -2515,6 +2525,36 @@ dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
return NULL;
}
+static int domain_setup_first_level(struct intel_iommu *iommu,
+ struct dmar_domain *domain,
+ struct device *dev,
+ int pasid)
+{
+ int flags = PASID_FLAG_SUPERVISOR_MODE;
+ struct dma_pte *pgd = domain->pgd;
+ int agaw, level;
+
+ /*
+ * Skip top levels of page tables for iommu which has
+ * less agaw than default. Unnecessary for PT mode.
+ */
+ for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
+ pgd = phys_to_virt(dma_pte_addr(pgd));
+ if (!dma_pte_present(pgd))
+ return -ENOMEM;
+ }
+
+ level = agaw_to_level(agaw);
+ if (level != 4 && level != 5)
+ return -EINVAL;
+
+ flags |= (level == 5) ? PASID_FLAG_FL5LP : 0;
+
+ return intel_pasid_setup_first_level(iommu, dev, (pgd_t *)pgd, pasid,
+ domain->iommu_did[iommu->seq_id],
+ flags);
+}
+
static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
int bus, int devfn,
struct device *dev,
@@ -2614,6 +2654,9 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
if (hw_pass_through && domain_type_is_si(domain))
ret = intel_pasid_setup_pass_through(iommu, domain,
dev, PASID_RID2PASID);
+ else if (domain_use_first_level(domain))
+ ret = domain_setup_first_level(iommu, domain, dev,
+ PASID_RID2PASID);
else
ret = intel_pasid_setup_second_level(iommu, domain,
dev, PASID_RID2PASID);
@@ -5374,8 +5417,12 @@ static int aux_domain_add_dev(struct dmar_domain *domain,
goto attach_failed;
/* Setup the PASID entry for mediated devices: */
- ret = intel_pasid_setup_second_level(iommu, domain, dev,
- domain->default_pasid);
+ if (domain_use_first_level(domain))
+ ret = domain_setup_first_level(iommu, domain, dev,
+ domain->default_pasid);
+ else
+ ret = intel_pasid_setup_second_level(iommu, domain, dev,
+ domain->default_pasid);
if (ret)
goto table_failed;
spin_unlock(&iommu->lock);
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index aaece25c055f..454c69712131 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -34,10 +34,13 @@
#define VTD_STRIDE_SHIFT (9)
#define VTD_STRIDE_MASK (((u64)-1) << VTD_STRIDE_SHIFT)
-#define DMA_PTE_READ (1)
-#define DMA_PTE_WRITE (2)
-#define DMA_PTE_LARGE_PAGE (1 << 7)
-#define DMA_PTE_SNP (1 << 11)
+#define DMA_PTE_READ BIT_ULL(0)
+#define DMA_PTE_WRITE BIT_ULL(1)
+#define DMA_PTE_LARGE_PAGE BIT_ULL(7)
+#define DMA_PTE_SNP BIT_ULL(11)
+
+#define DMA_FL_PTE_PRESENT BIT_ULL(0)
+#define DMA_FL_PTE_XD BIT_ULL(63)
#define CONTEXT_TT_MULTI_LEVEL 0
#define CONTEXT_TT_DEV_IOTLB 1
@@ -610,10 +613,11 @@ static inline void dma_clear_pte(struct dma_pte *pte)
static inline u64 dma_pte_addr(struct dma_pte *pte)
{
#ifdef CONFIG_64BIT
- return pte->val & VTD_PAGE_MASK;
+ return pte->val & VTD_PAGE_MASK & (~DMA_FL_PTE_XD);
#else
/* Must have a full atomic 64-bit read */
- return __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK;
+ return __cmpxchg64(&pte->val, 0ULL, 0ULL) &
+ VTD_PAGE_MASK & (~DMA_FL_PTE_XD);
#endif
}
--
2.17.1
next prev parent reply other threads:[~2019-12-24 7:46 UTC|newest]
Thread overview: 26+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-12-24 7:44 [PATCH v5 0/9] Use 1st-level for IOVA translation Lu Baolu
2019-12-24 7:44 ` Lu Baolu
2019-12-24 7:44 ` [PATCH v5 1/9] iommu/vt-d: Identify domains using first level page table Lu Baolu
2019-12-24 7:44 ` Lu Baolu
2019-12-24 7:44 ` [PATCH v5 2/9] iommu/vt-d: Add set domain DOMAIN_ATTR_NESTING attr Lu Baolu
2019-12-24 7:44 ` Lu Baolu
2019-12-24 7:44 ` [PATCH v5 3/9] iommu/vt-d: Add PASID_FLAG_FL5LP for first-level pasid setup Lu Baolu
2019-12-24 7:44 ` Lu Baolu
2019-12-24 7:44 ` Lu Baolu [this message]
2019-12-24 7:44 ` [PATCH v5 4/9] iommu/vt-d: Setup pasid entries for iova over first level Lu Baolu
2019-12-24 7:44 ` [PATCH v5 5/9] iommu/vt-d: Flush PASID-based iotlb " Lu Baolu
2019-12-24 7:44 ` Lu Baolu
2019-12-24 7:44 ` [PATCH v5 6/9] iommu/vt-d: Make first level IOVA canonical Lu Baolu
2019-12-24 7:44 ` Lu Baolu
2019-12-24 7:45 ` [PATCH v5 7/9] iommu/vt-d: Update first level super page capability Lu Baolu
2019-12-24 7:45 ` Lu Baolu
2019-12-24 7:45 ` [PATCH v5 8/9] iommu/vt-d: Use iova over first level Lu Baolu
2019-12-24 7:45 ` Lu Baolu
2019-12-24 7:45 ` [PATCH v5 9/9] iommu/vt-d: debugfs: Add support to show page table internals Lu Baolu
2019-12-24 7:45 ` Lu Baolu
2020-01-01 23:38 ` [PATCH v5 0/9] Use 1st-level for IOVA translation Lu Baolu
2020-01-01 23:38 ` Lu Baolu
2020-01-02 2:31 ` Liu, Yi L
2020-01-02 2:31 ` Liu, Yi L
2020-01-02 2:32 ` Lu Baolu
2020-01-02 2:32 ` Lu Baolu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20191224074502.5545-5-baolu.lu@linux.intel.com \
--to=baolu.lu@linux.intel.com \
--cc=alex.williamson@redhat.com \
--cc=ashok.raj@intel.com \
--cc=dwmw2@infradead.org \
--cc=iommu@lists.linux-foundation.org \
--cc=joro@8bytes.org \
--cc=kevin.tian@intel.com \
--cc=kvm@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=sanjay.k.kumar@intel.com \
--cc=yi.y.sun@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.