All of lore.kernel.org
 help / color / mirror / Atom feed
From: Lu Baolu <baolu.lu@linux.intel.com>
To: Joerg Roedel <joro@8bytes.org>, David Woodhouse <dwmw2@infradead.org>
Cc: ashok.raj@intel.com, sanjay.k.kumar@intel.com,
	jacob.jun.pan@intel.com, kevin.tian@intel.com,
	yi.l.liu@intel.com, yi.y.sun@intel.com, peterx@redhat.com,
	Jean-Philippe Brucker <jean-philippe.brucker@arm.com>,
	iommu@lists.linux-foundation.org, linux-kernel@vger.kernel.org,
	Lu Baolu <baolu.lu@linux.intel.com>,
	Jacob Pan <jacob.jun.pan@linux.intel.com>
Subject: [PATCH v6 02/12] iommu/vt-d: Manage scalalble mode PASID tables
Date: Mon, 10 Dec 2018 09:58:56 +0800	[thread overview]
Message-ID: <20181210015906.27929-3-baolu.lu@linux.intel.com> (raw)
In-Reply-To: <20181210015906.27929-1-baolu.lu@linux.intel.com>

In scalable mode, pasid structure is a two level table with
a pasid directory table and a pasid table. Any pasid entry
can be identified by a pasid value in below way.

   1
   9                       6 5      0
    .-----------------------.-------.
    |              PASID    |       |
    '-----------------------'-------'    .-------------.
             |                    |      |             |
             |                    |      |             |
             |                    |      |             |
             |     .-----------.  |      .-------------.
             |     |           |  |----->| PASID Entry |
             |     |           |  |      '-------------'
             |     |           |  |Plus  |             |
             |     .-----------.  |      |             |
             |---->| DIR Entry |-------->|             |
             |     '-----------'         '-------------'
.---------.  |Plus |           |
| Context |  |     |           |
|  Entry  |------->|           |
'---------'        '-----------'

This changes the pasid table APIs to support scalable mode
PASID directory and PASID table. It also adds a helper to
get the PASID table entry according to the pasid value.

Cc: Ashok Raj <ashok.raj@intel.com>
Cc: Jacob Pan <jacob.jun.pan@linux.intel.com>
Cc: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Sanjay Kumar <sanjay.k.kumar@intel.com>
Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Ashok Raj <ashok.raj@intel.com>
---
 drivers/iommu/intel-iommu.c | 23 ++++++----
 drivers/iommu/intel-pasid.c | 87 ++++++++++++++++++++++++++++++-------
 drivers/iommu/intel-pasid.h | 12 ++++-
 drivers/iommu/intel-svm.c   |  6 +--
 4 files changed, 97 insertions(+), 31 deletions(-)

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index c0ffde6cb21a..73d7c15bf737 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -425,21 +425,24 @@ static LIST_HEAD(device_domain_list);
 
 /*
  * Iterate over elements in device_domain_list and call the specified
- * callback @fn against each element. This helper should only be used
- * in the context where the device_domain_lock has already been holden.
+ * callback @fn against each element.
  */
 int for_each_device_domain(int (*fn)(struct device_domain_info *info,
 				     void *data), void *data)
 {
 	int ret = 0;
+	unsigned long flags;
 	struct device_domain_info *info;
 
-	assert_spin_locked(&device_domain_lock);
+	spin_lock_irqsave(&device_domain_lock, flags);
 	list_for_each_entry(info, &device_domain_list, global) {
 		ret = fn(info, data);
-		if (ret)
+		if (ret) {
+			spin_unlock_irqrestore(&device_domain_lock, flags);
 			return ret;
+		}
 	}
+	spin_unlock_irqrestore(&device_domain_lock, flags);
 
 	return 0;
 }
@@ -2481,16 +2484,18 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
 	list_add(&info->global, &device_domain_list);
 	if (dev)
 		dev->archdata.iommu = info;
+	spin_unlock_irqrestore(&device_domain_lock, flags);
 
-	if (dev && dev_is_pci(dev) && info->pasid_supported) {
+	/* PASID table is mandatory for a PCI device in scalable mode. */
+	if (dev && dev_is_pci(dev) && sm_supported(iommu)) {
 		ret = intel_pasid_alloc_table(dev);
 		if (ret) {
-			pr_warn("No pasid table for %s, pasid disabled\n",
-				dev_name(dev));
-			info->pasid_supported = 0;
+			pr_err("PASID table allocation for %s failed\n",
+			       dev_name(dev));
+			dmar_remove_one_dev_info(domain, dev);
+			return NULL;
 		}
 	}
-	spin_unlock_irqrestore(&device_domain_lock, flags);
 
 	if (dev && domain_context_mapping(domain, dev)) {
 		pr_err("Domain context map for %s failed\n", dev_name(dev));
diff --git a/drivers/iommu/intel-pasid.c b/drivers/iommu/intel-pasid.c
index fe95c9bd4d33..fd3ccc0753b0 100644
--- a/drivers/iommu/intel-pasid.c
+++ b/drivers/iommu/intel-pasid.c
@@ -123,12 +123,13 @@ int intel_pasid_alloc_table(struct device *dev)
 	struct pasid_table *pasid_table;
 	struct pasid_table_opaque data;
 	struct page *pages;
-	size_t size, count;
+	int max_pasid = 0;
 	int ret, order;
+	int size;
 
+	might_sleep();
 	info = dev->archdata.iommu;
-	if (WARN_ON(!info || !dev_is_pci(dev) ||
-		    !info->pasid_supported || info->pasid_table))
+	if (WARN_ON(!info || !dev_is_pci(dev) || info->pasid_table))
 		return -EINVAL;
 
 	/* DMA alias device already has a pasid table, use it: */
@@ -138,23 +139,25 @@ int intel_pasid_alloc_table(struct device *dev)
 	if (ret)
 		goto attach_out;
 
-	pasid_table = kzalloc(sizeof(*pasid_table), GFP_ATOMIC);
+	pasid_table = kzalloc(sizeof(*pasid_table), GFP_KERNEL);
 	if (!pasid_table)
 		return -ENOMEM;
 	INIT_LIST_HEAD(&pasid_table->dev);
 
-	size = sizeof(struct pasid_entry);
-	count = min_t(int, pci_max_pasids(to_pci_dev(dev)), intel_pasid_max_id);
-	order = get_order(size * count);
+	if (info->pasid_supported)
+		max_pasid = min_t(int, pci_max_pasids(to_pci_dev(dev)),
+				  intel_pasid_max_id);
+
+	size = max_pasid >> (PASID_PDE_SHIFT - 3);
+	order = size ? get_order(size) : 0;
 	pages = alloc_pages_node(info->iommu->node,
-				 GFP_ATOMIC | __GFP_ZERO,
-				 order);
+				 GFP_KERNEL | __GFP_ZERO, order);
 	if (!pages)
 		return -ENOMEM;
 
 	pasid_table->table = page_address(pages);
 	pasid_table->order = order;
-	pasid_table->max_pasid = count;
+	pasid_table->max_pasid = 1 << (order + PAGE_SHIFT + 3);
 
 attach_out:
 	device_attach_pasid_table(info, pasid_table);
@@ -162,14 +165,33 @@ int intel_pasid_alloc_table(struct device *dev)
 	return 0;
 }
 
+/* Get PRESENT bit of a PASID directory entry. */
+static inline bool
+pasid_pde_is_present(struct pasid_dir_entry *pde)
+{
+	return READ_ONCE(pde->val) & PASID_PTE_PRESENT;
+}
+
+/* Get PASID table from a PASID directory entry. */
+static inline struct pasid_entry *
+get_pasid_table_from_pde(struct pasid_dir_entry *pde)
+{
+	if (!pasid_pde_is_present(pde))
+		return NULL;
+
+	return phys_to_virt(READ_ONCE(pde->val) & PDE_PFN_MASK);
+}
+
 void intel_pasid_free_table(struct device *dev)
 {
 	struct device_domain_info *info;
 	struct pasid_table *pasid_table;
+	struct pasid_dir_entry *dir;
+	struct pasid_entry *table;
+	int i, max_pde;
 
 	info = dev->archdata.iommu;
-	if (!info || !dev_is_pci(dev) ||
-	    !info->pasid_supported || !info->pasid_table)
+	if (!info || !dev_is_pci(dev) || !info->pasid_table)
 		return;
 
 	pasid_table = info->pasid_table;
@@ -178,6 +200,14 @@ void intel_pasid_free_table(struct device *dev)
 	if (!list_empty(&pasid_table->dev))
 		return;
 
+	/* Free scalable mode PASID directory tables: */
+	dir = pasid_table->table;
+	max_pde = pasid_table->max_pasid >> PASID_PDE_SHIFT;
+	for (i = 0; i < max_pde; i++) {
+		table = get_pasid_table_from_pde(&dir[i]);
+		free_pgtable_page(table);
+	}
+
 	free_pages((unsigned long)pasid_table->table, pasid_table->order);
 	kfree(pasid_table);
 }
@@ -206,17 +236,37 @@ int intel_pasid_get_dev_max_id(struct device *dev)
 
 struct pasid_entry *intel_pasid_get_entry(struct device *dev, int pasid)
 {
+	struct device_domain_info *info;
 	struct pasid_table *pasid_table;
+	struct pasid_dir_entry *dir;
 	struct pasid_entry *entries;
+	int dir_index, index;
 
 	pasid_table = intel_pasid_get_table(dev);
 	if (WARN_ON(!pasid_table || pasid < 0 ||
 		    pasid >= intel_pasid_get_dev_max_id(dev)))
 		return NULL;
 
-	entries = pasid_table->table;
+	dir = pasid_table->table;
+	info = dev->archdata.iommu;
+	dir_index = pasid >> PASID_PDE_SHIFT;
+	index = pasid & PASID_PTE_MASK;
+
+	spin_lock(&pasid_lock);
+	entries = get_pasid_table_from_pde(&dir[dir_index]);
+	if (!entries) {
+		entries = alloc_pgtable_page(info->iommu->node);
+		if (!entries) {
+			spin_unlock(&pasid_lock);
+			return NULL;
+		}
+
+		WRITE_ONCE(dir[dir_index].val,
+			   (u64)virt_to_phys(entries) | PASID_PTE_PRESENT);
+	}
+	spin_unlock(&pasid_lock);
 
-	return &entries[pasid];
+	return &entries[index];
 }
 
 /*
@@ -224,7 +274,14 @@ struct pasid_entry *intel_pasid_get_entry(struct device *dev, int pasid)
  */
 static inline void pasid_clear_entry(struct pasid_entry *pe)
 {
-	WRITE_ONCE(pe->val, 0);
+	WRITE_ONCE(pe->val[0], 0);
+	WRITE_ONCE(pe->val[1], 0);
+	WRITE_ONCE(pe->val[2], 0);
+	WRITE_ONCE(pe->val[3], 0);
+	WRITE_ONCE(pe->val[4], 0);
+	WRITE_ONCE(pe->val[5], 0);
+	WRITE_ONCE(pe->val[6], 0);
+	WRITE_ONCE(pe->val[7], 0);
 }
 
 void intel_pasid_clear_entry(struct device *dev, int pasid)
diff --git a/drivers/iommu/intel-pasid.h b/drivers/iommu/intel-pasid.h
index 1fb5e12b029a..12f480c2bb8b 100644
--- a/drivers/iommu/intel-pasid.h
+++ b/drivers/iommu/intel-pasid.h
@@ -11,12 +11,20 @@
 #define __INTEL_PASID_H
 
 #define PASID_MIN			0x1
-#define PASID_MAX			0x20000
+#define PASID_MAX			0x100000
+#define PASID_PTE_MASK			0x3F
+#define PASID_PTE_PRESENT		1
+#define PDE_PFN_MASK			PAGE_MASK
+#define PASID_PDE_SHIFT			6
 
-struct pasid_entry {
+struct pasid_dir_entry {
 	u64 val;
 };
 
+struct pasid_entry {
+	u64 val[8];
+};
+
 /* The representative of a PASID table */
 struct pasid_table {
 	void			*table;		/* pasid table pointer */
diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
index 887150907526..9014b07ab975 100644
--- a/drivers/iommu/intel-svm.c
+++ b/drivers/iommu/intel-svm.c
@@ -65,8 +65,6 @@ int intel_svm_init(struct intel_iommu *iommu)
 
 	order = get_order(sizeof(struct pasid_entry) * iommu->pasid_max);
 	if (ecap_dis(iommu->ecap)) {
-		/* Just making it explicit... */
-		BUILD_BUG_ON(sizeof(struct pasid_entry) != sizeof(struct pasid_state_entry));
 		pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, order);
 		if (pages)
 			iommu->pasid_state_table = page_address(pages);
@@ -405,9 +403,7 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_
 			pasid_entry_val |= PASID_ENTRY_FLPM_5LP;
 
 		entry = intel_pasid_get_entry(dev, svm->pasid);
-		entry->val = pasid_entry_val;
-
-		wmb();
+		WRITE_ONCE(entry->val[0], pasid_entry_val);
 
 		/*
 		 * Flush PASID cache when a PASID table entry becomes
-- 
2.17.1

  parent reply	other threads:[~2018-12-10  1:58 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-12-10  1:58 [PATCH v6 00/12] iommu/vt-d: Add scalable mode support Lu Baolu
2018-12-10  1:58 ` Lu Baolu
     [not found] ` <20181210015906.27929-1-baolu.lu-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
2018-12-10  1:58   ` [PATCH v6 01/12] iommu/vt-d: Enumerate the scalable mode capability Lu Baolu
2018-12-10  1:58     ` Lu Baolu
2018-12-10  1:58   ` [PATCH v6 05/12] iommu/vt-d: Reserve a domain id for FL and PT modes Lu Baolu
2018-12-10  1:58     ` Lu Baolu
2018-12-11  9:46   ` [PATCH v6 00/12] iommu/vt-d: Add scalable mode support Joerg Roedel
2018-12-11  9:46     ` Joerg Roedel
2018-12-10  1:58 ` Lu Baolu [this message]
2018-12-10  1:58 ` [PATCH v6 03/12] iommu/vt-d: Move page table helpers into header Lu Baolu
2018-12-10  1:58 ` [PATCH v6 04/12] iommu/vt-d: Add 256-bit invalidation descriptor support Lu Baolu
2018-12-10  1:59 ` [PATCH v6 06/12] iommu/vt-d: Add second level page table interface Lu Baolu
2018-12-10  1:59 ` [PATCH v6 07/12] iommu/vt-d: Setup pasid entry for RID2PASID support Lu Baolu
2018-12-10  1:59 ` [PATCH v6 08/12] iommu/vt-d: Pass pasid table to context mapping Lu Baolu
2018-12-10  1:59 ` [PATCH v6 09/12] iommu/vt-d: Setup context and enable RID2PASID support Lu Baolu
2018-12-10  1:59 ` [PATCH v6 10/12] iommu/vt-d: Add first level page table interface Lu Baolu
2018-12-10  1:59 ` [PATCH v6 11/12] iommu/vt-d: Shared virtual address in scalable mode Lu Baolu
2018-12-10  1:59 ` [PATCH v6 12/12] iommu/vt-d: Remove deferred invalidation Lu Baolu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20181210015906.27929-3-baolu.lu@linux.intel.com \
    --to=baolu.lu@linux.intel.com \
    --cc=ashok.raj@intel.com \
    --cc=dwmw2@infradead.org \
    --cc=iommu@lists.linux-foundation.org \
    --cc=jacob.jun.pan@intel.com \
    --cc=jacob.jun.pan@linux.intel.com \
    --cc=jean-philippe.brucker@arm.com \
    --cc=joro@8bytes.org \
    --cc=kevin.tian@intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=peterx@redhat.com \
    --cc=sanjay.k.kumar@intel.com \
    --cc=yi.l.liu@intel.com \
    --cc=yi.y.sun@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.