* [PATCH 0/22] Initial AMD IOMMUv2 support
@ 2011-12-05 13:34 Joerg Roedel
2011-12-05 13:34 ` [PATCH 01/22] iommu/amd: Convert dev_table_entry to u64 Joerg Roedel
` (23 more replies)
0 siblings, 24 replies; 28+ messages in thread
From: Joerg Roedel @ 2011-12-05 13:34 UTC (permalink / raw)
To: iommu; +Cc: linux-kernel
Hi,
here is a patch-set that adds initial support for the AMD IOMMU version
2 to the Linux kernel. The main feature of the new IOMMU version is
support for DMA demand-paging and multiple DMA address spaces per
device.
The patch-set consists of 2 parts. Patches 1-16 implement the hardware
access functions to setup and manage the IOMMUv2 functionality of the
hardware. Patches 17-22 implement a seperate module which makes use
these functions to implement a page-fault handler for devices which can
be used by real device drivers.
Any feedback appreciated.
Thanks,
Joerg
diff-stat:
Documentation/kernel-parameters.txt | 5 +
drivers/iommu/Kconfig | 13 +-
drivers/iommu/Makefile | 1 +
drivers/iommu/amd_iommu.c | 791 ++++++++++++++++++++++++++++--
drivers/iommu/amd_iommu_init.c | 113 ++++-
drivers/iommu/amd_iommu_proto.h | 24 +
drivers/iommu/amd_iommu_types.h | 109 ++++-
drivers/iommu/amd_iommu_v2.c | 951 +++++++++++++++++++++++++++++++++++
include/linux/amd-iommu.h | 95 ++++-
9 files changed, 2058 insertions(+), 44 deletions(-)
^ permalink raw reply [flat|nested] 28+ messages in thread
* [PATCH 01/22] iommu/amd: Convert dev_table_entry to u64
2011-12-05 13:34 [PATCH 0/22] Initial AMD IOMMUv2 support Joerg Roedel
@ 2011-12-05 13:34 ` Joerg Roedel
2011-12-05 13:34 ` [PATCH 02/22] iommu/amd: Fix wrong address masks in tlb flush code Joerg Roedel
` (22 subsequent siblings)
23 siblings, 0 replies; 28+ messages in thread
From: Joerg Roedel @ 2011-12-05 13:34 UTC (permalink / raw)
To: iommu; +Cc: linux-kernel, Joerg Roedel
Convert the contents of 'struct dev_table_entry' to u64 to
allow updating the DTE wit 64bit writes as required by the
spec.
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
drivers/iommu/amd_iommu.c | 18 ++++++++++--------
drivers/iommu/amd_iommu_init.c | 12 ++++++------
drivers/iommu/amd_iommu_types.h | 4 ++--
3 files changed, 18 insertions(+), 16 deletions(-)
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 4ee277a..661e2bb 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -365,8 +365,8 @@ static void dump_dte_entry(u16 devid)
{
int i;
- for (i = 0; i < 8; ++i)
- pr_err("AMD-Vi: DTE[%d]: %08x\n", i,
+ for (i = 0; i < 4; ++i)
+ pr_err("AMD-Vi: DTE[%d]: %016llx\n", i,
amd_iommu_dev_table[devid].data[i]);
}
@@ -1583,19 +1583,22 @@ static bool dma_ops_domain(struct protection_domain *domain)
static void set_dte_entry(u16 devid, struct protection_domain *domain, bool ats)
{
u64 pte_root = virt_to_phys(domain->pt_root);
- u32 flags = 0;
+ u64 flags = 0;
pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK)
<< DEV_ENTRY_MODE_SHIFT;
pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV;
+ flags = amd_iommu_dev_table[devid].data[1];
+
if (ats)
flags |= DTE_FLAG_IOTLB;
- amd_iommu_dev_table[devid].data[3] |= flags;
- amd_iommu_dev_table[devid].data[2] = domain->id;
- amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root);
- amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root);
+ flags &= ~(0xffffUL);
+ flags |= domain->id;
+
+ amd_iommu_dev_table[devid].data[1] = flags;
+ amd_iommu_dev_table[devid].data[0] = pte_root;
}
static void clear_dte_entry(u16 devid)
@@ -1603,7 +1606,6 @@ static void clear_dte_entry(u16 devid)
/* remove entry from the device table seen by the hardware */
amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV;
amd_iommu_dev_table[devid].data[1] = 0;
- amd_iommu_dev_table[devid].data[2] = 0;
amd_iommu_apply_erratum_63(devid);
}
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 82d2410..17e0f77 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -584,18 +584,18 @@ static void __init free_event_buffer(struct amd_iommu *iommu)
/* sets a specific bit in the device table entry. */
static void set_dev_entry_bit(u16 devid, u8 bit)
{
- int i = (bit >> 5) & 0x07;
- int _bit = bit & 0x1f;
+ int i = (bit >> 6) & 0x03;
+ int _bit = bit & 0x3f;
- amd_iommu_dev_table[devid].data[i] |= (1 << _bit);
+ amd_iommu_dev_table[devid].data[i] |= (1UL << _bit);
}
static int get_dev_entry_bit(u16 devid, u8 bit)
{
- int i = (bit >> 5) & 0x07;
- int _bit = bit & 0x1f;
+ int i = (bit >> 6) & 0x03;
+ int _bit = bit & 0x3f;
- return (amd_iommu_dev_table[devid].data[i] & (1 << _bit)) >> _bit;
+ return (amd_iommu_dev_table[devid].data[i] & (1UL << _bit)) >> _bit;
}
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index 5b9c507..f8dd9ae 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -230,7 +230,7 @@
#define IOMMU_PTE_IR (1ULL << 61)
#define IOMMU_PTE_IW (1ULL << 62)
-#define DTE_FLAG_IOTLB 0x01
+#define DTE_FLAG_IOTLB (0x01UL << 32)
#define IOMMU_PAGE_MASK (((1ULL << 52) - 1) & ~0xfffULL)
#define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_P)
@@ -484,7 +484,7 @@ extern struct list_head amd_iommu_pd_list;
* Structure defining one entry in the device table
*/
struct dev_table_entry {
- u32 data[8];
+ u64 data[4];
};
/*
--
1.7.5.4
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 02/22] iommu/amd: Fix wrong address masks in tlb flush code
2011-12-05 13:34 [PATCH 0/22] Initial AMD IOMMUv2 support Joerg Roedel
2011-12-05 13:34 ` [PATCH 01/22] iommu/amd: Convert dev_table_entry to u64 Joerg Roedel
@ 2011-12-05 13:34 ` Joerg Roedel
2011-12-05 13:34 ` [PATCH 03/22] iommu/amd: Get the maximum number of PASIDs supported Joerg Roedel
` (21 subsequent siblings)
23 siblings, 0 replies; 28+ messages in thread
From: Joerg Roedel @ 2011-12-05 13:34 UTC (permalink / raw)
To: iommu; +Cc: linux-kernel, Joerg Roedel
Using PAGE_MASK is not sufficient because it masks out the
high bits too. But the IOMMU supports 64 bits and not only
52. So change the masks to only mask out the low 12 bits.
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
drivers/iommu/amd_iommu.c | 4 ++--
drivers/iommu/amd_iommu_types.h | 1 +
2 files changed, 3 insertions(+), 2 deletions(-)
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 661e2bb..f2eec1c 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -551,7 +551,7 @@ static void build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address,
s = 1;
}
- address &= PAGE_MASK;
+ address &= IOMMU_IO_PAGE_MASK;
memset(cmd, 0, sizeof(*cmd));
cmd->data[1] |= domid;
@@ -582,7 +582,7 @@ static void build_inv_iotlb_pages(struct iommu_cmd *cmd, u16 devid, int qdep,
s = 1;
}
- address &= PAGE_MASK;
+ address &= IOMMU_IO_PAGE_MASK;
memset(cmd, 0, sizeof(*cmd));
cmd->data[0] = devid;
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index f8dd9ae..a8a994b 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -233,6 +233,7 @@
#define DTE_FLAG_IOTLB (0x01UL << 32)
#define IOMMU_PAGE_MASK (((1ULL << 52) - 1) & ~0xfffULL)
+#define IOMMU_IO_PAGE_MASK (~(0xfffULL))
#define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_P)
#define IOMMU_PTE_PAGE(pte) (phys_to_virt((pte) & IOMMU_PAGE_MASK))
#define IOMMU_PTE_MODE(pte) (((pte) >> 9) & 0x07)
--
1.7.5.4
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 03/22] iommu/amd: Get the maximum number of PASIDs supported
2011-12-05 13:34 [PATCH 0/22] Initial AMD IOMMUv2 support Joerg Roedel
2011-12-05 13:34 ` [PATCH 01/22] iommu/amd: Convert dev_table_entry to u64 Joerg Roedel
2011-12-05 13:34 ` [PATCH 02/22] iommu/amd: Fix wrong address masks in tlb flush code Joerg Roedel
@ 2011-12-05 13:34 ` Joerg Roedel
2011-12-05 13:34 ` [PATCH 04/22] iommu/amd: Setup PPR log when supported by IOMMU Joerg Roedel
` (20 subsequent siblings)
23 siblings, 0 replies; 28+ messages in thread
From: Joerg Roedel @ 2011-12-05 13:34 UTC (permalink / raw)
To: iommu; +Cc: linux-kernel, Joerg Roedel
Read the number of PASIDs supported by each IOMMU in the
system and take the smallest number as the maximum value
supported by the IOMMU driver.
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
drivers/iommu/amd_iommu_init.c | 13 +++++++++++++
drivers/iommu/amd_iommu_types.h | 6 ++++++
2 files changed, 19 insertions(+), 0 deletions(-)
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 17e0f77..fb4afd6 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -141,6 +141,8 @@ int amd_iommus_present;
bool amd_iommu_np_cache __read_mostly;
bool amd_iommu_iotlb_sup __read_mostly = true;
+u32 amd_iommu_max_pasids __read_mostly = ~0;
+
/*
* The ACPI table parsing functions set this variable on an error
*/
@@ -699,6 +701,17 @@ static void __init init_iommu_from_pci(struct amd_iommu *iommu)
iommu->features = ((u64)high << 32) | low;
+ if (iommu_feature(iommu, FEATURE_GT)) {
+ u32 pasids;
+ u64 shift;
+
+ shift = iommu->features & FEATURE_PASID_MASK;
+ shift >>= FEATURE_PASID_SHIFT;
+ pasids = (1 << shift);
+
+ amd_iommu_max_pasids = min(amd_iommu_max_pasids, pasids);
+ }
+
if (!is_rd890_iommu(iommu->dev))
return;
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index a8a994b..0a62685 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -87,6 +87,9 @@
#define FEATURE_HE (1ULL<<8)
#define FEATURE_PC (1ULL<<9)
+#define FEATURE_PASID_SHIFT 32
+#define FEATURE_PASID_MASK (0x1fULL << FEATURE_PASID_SHIFT)
+
/* MMIO status bits */
#define MMIO_STATUS_COM_WAIT_INT_MASK 0x04
@@ -550,6 +553,9 @@ extern unsigned long *amd_iommu_pd_alloc_bitmap;
*/
extern bool amd_iommu_unmap_flush;
+/* Smallest number of PASIDs supported by any IOMMU in the system */
+extern u32 amd_iommu_max_pasids;
+
/* takes bus and device/function and returns the device id
* FIXME: should that be in generic PCI code? */
static inline u16 calc_devid(u8 bus, u8 devfn)
--
1.7.5.4
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 04/22] iommu/amd: Setup PPR log when supported by IOMMU
2011-12-05 13:34 [PATCH 0/22] Initial AMD IOMMUv2 support Joerg Roedel
` (2 preceding siblings ...)
2011-12-05 13:34 ` [PATCH 03/22] iommu/amd: Get the maximum number of PASIDs supported Joerg Roedel
@ 2011-12-05 13:34 ` Joerg Roedel
2011-12-05 13:34 ` [PATCH 05/22] iommu/amd: Enable GT mode " Joerg Roedel
` (19 subsequent siblings)
23 siblings, 0 replies; 28+ messages in thread
From: Joerg Roedel @ 2011-12-05 13:34 UTC (permalink / raw)
To: iommu; +Cc: linux-kernel, Joerg Roedel
Allocate and enable a log buffer for peripheral page faults
when the IOMMU supports this feature.
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
drivers/iommu/amd_iommu_init.c | 51 +++++++++++++++++++++++++++++++++++++++
drivers/iommu/amd_iommu_types.h | 14 ++++++++++
2 files changed, 65 insertions(+), 0 deletions(-)
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index fb4afd6..60716ce 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -583,6 +583,46 @@ static void __init free_event_buffer(struct amd_iommu *iommu)
free_pages((unsigned long)iommu->evt_buf, get_order(EVT_BUFFER_SIZE));
}
+/* allocates the memory where the IOMMU will log its events to */
+static u8 * __init alloc_ppr_log(struct amd_iommu *iommu)
+{
+ iommu->ppr_log = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+ get_order(PPR_LOG_SIZE));
+
+ if (iommu->ppr_log == NULL)
+ return NULL;
+
+ return iommu->ppr_log;
+}
+
+static void iommu_enable_ppr_log(struct amd_iommu *iommu)
+{
+ u64 entry;
+
+ if (iommu->ppr_log == NULL)
+ return;
+
+ entry = (u64)virt_to_phys(iommu->ppr_log) | PPR_LOG_SIZE_512;
+
+ memcpy_toio(iommu->mmio_base + MMIO_PPR_LOG_OFFSET,
+ &entry, sizeof(entry));
+
+ /* set head and tail to zero manually */
+ writel(0x00, iommu->mmio_base + MMIO_PPR_HEAD_OFFSET);
+ writel(0x00, iommu->mmio_base + MMIO_PPR_TAIL_OFFSET);
+
+ iommu_feature_enable(iommu, CONTROL_PPFLOG_EN);
+ iommu_feature_enable(iommu, CONTROL_PPR_EN);
+}
+
+static void __init free_ppr_log(struct amd_iommu *iommu)
+{
+ if (iommu->ppr_log == NULL)
+ return;
+
+ free_pages((unsigned long)iommu->ppr_log, get_order(PPR_LOG_SIZE));
+}
+
/* sets a specific bit in the device table entry. */
static void set_dev_entry_bit(u16 devid, u8 bit)
{
@@ -914,6 +954,7 @@ static void __init free_iommu_one(struct amd_iommu *iommu)
{
free_command_buffer(iommu);
free_event_buffer(iommu);
+ free_ppr_log(iommu);
iommu_unmap_mmio_space(iommu);
}
@@ -977,6 +1018,12 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
init_iommu_from_acpi(iommu, h);
init_iommu_devices(iommu);
+ if (iommu_feature(iommu, FEATURE_PPR)) {
+ iommu->ppr_log = alloc_ppr_log(iommu);
+ if (!iommu->ppr_log)
+ return -ENOMEM;
+ }
+
if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE))
amd_iommu_np_cache = true;
@@ -1063,6 +1110,9 @@ static int iommu_setup_msi(struct amd_iommu *iommu)
iommu->int_enabled = true;
iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
+ if (iommu->ppr_log != NULL)
+ iommu_feature_enable(iommu, CONTROL_PPFINT_EN);
+
return 0;
}
@@ -1287,6 +1337,7 @@ static void enable_iommus(void)
iommu_set_device_table(iommu);
iommu_enable_command_buffer(iommu);
iommu_enable_event_buffer(iommu);
+ iommu_enable_ppr_log(iommu);
iommu_set_exclusion_range(iommu);
iommu_init_msi(iommu);
iommu_enable(iommu);
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index 0a62685..59405c8 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -69,11 +69,14 @@
#define MMIO_EXCL_BASE_OFFSET 0x0020
#define MMIO_EXCL_LIMIT_OFFSET 0x0028
#define MMIO_EXT_FEATURES 0x0030
+#define MMIO_PPR_LOG_OFFSET 0x0038
#define MMIO_CMD_HEAD_OFFSET 0x2000
#define MMIO_CMD_TAIL_OFFSET 0x2008
#define MMIO_EVT_HEAD_OFFSET 0x2010
#define MMIO_EVT_TAIL_OFFSET 0x2018
#define MMIO_STATUS_OFFSET 0x2020
+#define MMIO_PPR_HEAD_OFFSET 0x2030
+#define MMIO_PPR_TAIL_OFFSET 0x2038
/* Extended Feature Bits */
@@ -125,6 +128,7 @@
#define CONTROL_CMDBUF_EN 0x0cULL
#define CONTROL_PPFLOG_EN 0x0dULL
#define CONTROL_PPFINT_EN 0x0eULL
+#define CONTROL_PPR_EN 0x0fULL
/* command specific defines */
#define CMD_COMPL_WAIT 0x01
@@ -168,6 +172,13 @@
#define EVT_BUFFER_SIZE 8192 /* 512 entries */
#define EVT_LEN_MASK (0x9ULL << 56)
+/* Constants for PPR Log handling */
+#define PPR_LOG_ENTRIES 512
+#define PPR_LOG_SIZE_SHIFT 56
+#define PPR_LOG_SIZE_512 (0x9ULL << PPR_LOG_SIZE_SHIFT)
+#define PPR_ENTRY_SIZE 16
+#define PPR_LOG_SIZE (PPR_ENTRY_SIZE * PPR_LOG_ENTRIES)
+
#define PAGE_MODE_NONE 0x00
#define PAGE_MODE_1_LEVEL 0x01
#define PAGE_MODE_2_LEVEL 0x02
@@ -435,6 +446,9 @@ struct amd_iommu {
/* MSI number for event interrupt */
u16 evt_msi_num;
+ /* Base of the PPR log, if present */
+ u8 *ppr_log;
+
/* true if interrupts for this IOMMU are already enabled */
bool int_enabled;
--
1.7.5.4
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 05/22] iommu/amd: Enable GT mode when supported by IOMMU
2011-12-05 13:34 [PATCH 0/22] Initial AMD IOMMUv2 support Joerg Roedel
` (3 preceding siblings ...)
2011-12-05 13:34 ` [PATCH 04/22] iommu/amd: Setup PPR log when supported by IOMMU Joerg Roedel
@ 2011-12-05 13:34 ` Joerg Roedel
2011-12-05 13:34 ` [PATCH 06/22] iommu/amd: Add iommuv2 flag to struct amd_iommu Joerg Roedel
` (18 subsequent siblings)
23 siblings, 0 replies; 28+ messages in thread
From: Joerg Roedel @ 2011-12-05 13:34 UTC (permalink / raw)
To: iommu; +Cc: linux-kernel, Joerg Roedel
This feature needs to be enabled before IOMMUv2 DTEs can be
set up.
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
drivers/iommu/amd_iommu_init.c | 9 +++++++++
drivers/iommu/amd_iommu_types.h | 1 +
2 files changed, 10 insertions(+), 0 deletions(-)
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 60716ce..2c25ae3 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -623,6 +623,14 @@ static void __init free_ppr_log(struct amd_iommu *iommu)
free_pages((unsigned long)iommu->ppr_log, get_order(PPR_LOG_SIZE));
}
+static void iommu_enable_gt(struct amd_iommu *iommu)
+{
+ if (!iommu_feature(iommu, FEATURE_GT))
+ return;
+
+ iommu_feature_enable(iommu, CONTROL_GT_EN);
+}
+
/* sets a specific bit in the device table entry. */
static void set_dev_entry_bit(u16 devid, u8 bit)
{
@@ -1338,6 +1346,7 @@ static void enable_iommus(void)
iommu_enable_command_buffer(iommu);
iommu_enable_event_buffer(iommu);
iommu_enable_ppr_log(iommu);
+ iommu_enable_gt(iommu);
iommu_set_exclusion_range(iommu);
iommu_init_msi(iommu);
iommu_enable(iommu);
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index 59405c8..c26a16d 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -129,6 +129,7 @@
#define CONTROL_PPFLOG_EN 0x0dULL
#define CONTROL_PPFINT_EN 0x0eULL
#define CONTROL_PPR_EN 0x0fULL
+#define CONTROL_GT_EN 0x10ULL
/* command specific defines */
#define CMD_COMPL_WAIT 0x01
--
1.7.5.4
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 06/22] iommu/amd: Add iommuv2 flag to struct amd_iommu
2011-12-05 13:34 [PATCH 0/22] Initial AMD IOMMUv2 support Joerg Roedel
` (4 preceding siblings ...)
2011-12-05 13:34 ` [PATCH 05/22] iommu/amd: Enable GT mode " Joerg Roedel
@ 2011-12-05 13:34 ` Joerg Roedel
2011-12-05 13:34 ` [PATCH 07/22] iommu/amd: Put IOMMUv2 capable devices in pt_domain Joerg Roedel
` (17 subsequent siblings)
23 siblings, 0 replies; 28+ messages in thread
From: Joerg Roedel @ 2011-12-05 13:34 UTC (permalink / raw)
To: iommu; +Cc: linux-kernel, Joerg Roedel
In mixed IOMMU setups this flag inidicates whether an IOMMU
supports the v2 features or not. This patch also adds a
global flag together with a function to query that flag from
other code. The flag shows if at least one IOMMUv2 is in the
system.
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
drivers/iommu/amd_iommu_init.c | 15 +++++++++++++++
drivers/iommu/amd_iommu_proto.h | 3 +++
drivers/iommu/amd_iommu_types.h | 5 +++++
3 files changed, 23 insertions(+), 0 deletions(-)
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 2c25ae3..d1e5067 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -25,6 +25,7 @@
#include <linux/interrupt.h>
#include <linux/msi.h>
#include <linux/amd-iommu.h>
+#include <linux/export.h>
#include <asm/pci-direct.h>
#include <asm/iommu.h>
#include <asm/gart.h>
@@ -143,6 +144,8 @@ bool amd_iommu_iotlb_sup __read_mostly = true;
u32 amd_iommu_max_pasids __read_mostly = ~0;
+bool amd_iommu_v2_present __read_mostly;
+
/*
* The ACPI table parsing functions set this variable on an error
*/
@@ -760,6 +763,12 @@ static void __init init_iommu_from_pci(struct amd_iommu *iommu)
amd_iommu_max_pasids = min(amd_iommu_max_pasids, pasids);
}
+ if (iommu_feature(iommu, FEATURE_GT) &&
+ iommu_feature(iommu, FEATURE_PPR)) {
+ iommu->is_iommu_v2 = true;
+ amd_iommu_v2_present = true;
+ }
+
if (!is_rd890_iommu(iommu->dev))
return;
@@ -1645,3 +1654,9 @@ IOMMU_INIT_FINISH(amd_iommu_detect,
gart_iommu_hole_init,
0,
0);
+
+bool amd_iommu_v2_supported(void)
+{
+ return amd_iommu_v2_present;
+}
+EXPORT_SYMBOL(amd_iommu_v2_supported);
diff --git a/drivers/iommu/amd_iommu_proto.h b/drivers/iommu/amd_iommu_proto.h
index 7ffaa64..3a46c30 100644
--- a/drivers/iommu/amd_iommu_proto.h
+++ b/drivers/iommu/amd_iommu_proto.h
@@ -31,6 +31,9 @@ extern int amd_iommu_init_devices(void);
extern void amd_iommu_uninit_devices(void);
extern void amd_iommu_init_notifier(void);
extern void amd_iommu_init_api(void);
+
+extern bool amd_iommu_v2_supported(void);
+
#ifndef CONFIG_AMD_IOMMU_STATS
static inline void amd_iommu_stats_init(void) { }
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index c26a16d..535cd07 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -415,6 +415,9 @@ struct amd_iommu {
/* Extended features */
u64 features;
+ /* IOMMUv2 */
+ bool is_iommu_v2;
+
/*
* Capability pointer. There could be more than one IOMMU per PCI
* device function if there are more than one AMD IOMMU capability
@@ -571,6 +574,8 @@ extern bool amd_iommu_unmap_flush;
/* Smallest number of PASIDs supported by any IOMMU in the system */
extern u32 amd_iommu_max_pasids;
+extern bool amd_iommu_v2_present;
+
/* takes bus and device/function and returns the device id
* FIXME: should that be in generic PCI code? */
static inline u16 calc_devid(u8 bus, u8 devfn)
--
1.7.5.4
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 07/22] iommu/amd: Put IOMMUv2 capable devices in pt_domain
2011-12-05 13:34 [PATCH 0/22] Initial AMD IOMMUv2 support Joerg Roedel
` (5 preceding siblings ...)
2011-12-05 13:34 ` [PATCH 06/22] iommu/amd: Add iommuv2 flag to struct amd_iommu Joerg Roedel
@ 2011-12-05 13:34 ` Joerg Roedel
2011-12-05 13:34 ` [PATCH 08/22] iommu/amd: Implement notifier for PPR faults Joerg Roedel
` (16 subsequent siblings)
23 siblings, 0 replies; 28+ messages in thread
From: Joerg Roedel @ 2011-12-05 13:34 UTC (permalink / raw)
To: iommu; +Cc: linux-kernel, Joerg Roedel
If the device starts to use IOMMUv2 features the dma handles
need to stay valid. The only sane way to do this is to use a
identity mapping for the device and not translate it by the
iommu. This is implemented with this patch. Since this lifts
the device-isolation there is also a new kernel parameter
which allows to disable that feature.
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
Documentation/kernel-parameters.txt | 5 ++
drivers/iommu/amd_iommu.c | 94 +++++++++++++++++++++++++++++------
drivers/iommu/amd_iommu_init.c | 4 ++
drivers/iommu/amd_iommu_types.h | 4 ++
4 files changed, 91 insertions(+), 16 deletions(-)
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index a0c5c5f..d55f2ad 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -329,6 +329,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
is a lot of faster
off - do not initialize any AMD IOMMU found in
the system
+ force_isolation - Force device isolation for all
+ devices. The IOMMU driver is not
+ allowed anymore to lift isolation
+ requirements as needed. This option
+ does not override iommu=pt
amijoy.map= [HW,JOY] Amiga joystick support
Map of devices attached to JOY0DAT and JOY1DAT
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index f2eec1c..bde2776 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -67,6 +67,7 @@ struct iommu_cmd {
};
static void update_domain(struct protection_domain *domain);
+static int __init alloc_passthrough_domain(void);
/****************************************************************************
*
@@ -147,6 +148,24 @@ static struct iommu_dev_data *get_dev_data(struct device *dev)
return dev->archdata.iommu;
}
+static bool pci_iommuv2_capable(struct pci_dev *pdev)
+{
+ static const int caps[] = {
+ PCI_EXT_CAP_ID_ATS,
+ PCI_PRI_CAP,
+ PCI_PASID_CAP,
+ };
+ int i, pos;
+
+ for (i = 0; i < 3; ++i) {
+ pos = pci_find_ext_capability(pdev, caps[i]);
+ if (pos == 0)
+ return false;
+ }
+
+ return true;
+}
+
/*
* In this function the list of preallocated protection domains is traversed to
* find the domain for a specific device
@@ -204,6 +223,7 @@ static bool check_device(struct device *dev)
static int iommu_init_device(struct device *dev)
{
+ struct pci_dev *pdev = to_pci_dev(dev);
struct iommu_dev_data *dev_data;
u16 alias;
@@ -228,6 +248,13 @@ static int iommu_init_device(struct device *dev)
dev_data->alias_data = alias_data;
}
+ if (pci_iommuv2_capable(pdev)) {
+ struct amd_iommu *iommu;
+
+ iommu = amd_iommu_rlookup_table[dev_data->devid];
+ dev_data->iommu_v2 = iommu->is_iommu_v2;
+ }
+
dev->archdata.iommu = dev_data;
return 0;
@@ -1762,7 +1789,7 @@ static void __detach_device(struct iommu_dev_data *dev_data)
* passthrough domain if it is detached from any other domain.
* Make sure we can deassign from the pt_domain itself.
*/
- if (iommu_pass_through &&
+ if (dev_data->passthrough &&
(dev_data->domain == NULL && domain != pt_domain))
__attach_device(dev_data, pt_domain);
}
@@ -1820,18 +1847,20 @@ static struct protection_domain *domain_for_device(struct device *dev)
static int device_change_notifier(struct notifier_block *nb,
unsigned long action, void *data)
{
- struct device *dev = data;
- u16 devid;
- struct protection_domain *domain;
struct dma_ops_domain *dma_domain;
+ struct protection_domain *domain;
+ struct iommu_dev_data *dev_data;
+ struct device *dev = data;
struct amd_iommu *iommu;
unsigned long flags;
+ u16 devid;
if (!check_device(dev))
return 0;
- devid = get_device_id(dev);
- iommu = amd_iommu_rlookup_table[devid];
+ devid = get_device_id(dev);
+ iommu = amd_iommu_rlookup_table[devid];
+ dev_data = get_dev_data(dev);
switch (action) {
case BUS_NOTIFY_UNBOUND_DRIVER:
@@ -1840,7 +1869,7 @@ static int device_change_notifier(struct notifier_block *nb,
if (!domain)
goto out;
- if (iommu_pass_through)
+ if (dev_data->passthrough)
break;
detach_device(dev);
break;
@@ -2436,8 +2465,9 @@ static int amd_iommu_dma_supported(struct device *dev, u64 mask)
*/
static void prealloc_protection_domains(void)
{
- struct pci_dev *dev = NULL;
+ struct iommu_dev_data *dev_data;
struct dma_ops_domain *dma_dom;
+ struct pci_dev *dev = NULL;
u16 devid;
for_each_pci_dev(dev) {
@@ -2446,6 +2476,16 @@ static void prealloc_protection_domains(void)
if (!check_device(&dev->dev))
continue;
+ dev_data = get_dev_data(&dev->dev);
+ if (!amd_iommu_force_isolation && dev_data->iommu_v2) {
+ /* Make sure passthrough domain is allocated */
+ alloc_passthrough_domain();
+ dev_data->passthrough = true;
+ attach_device(&dev->dev, pt_domain);
+ pr_info("AMD-Vi: Using passthough domain for device %s\n",
+ dev_name(&dev->dev));
+ }
+
/* Is there already any domain for it? */
if (domain_for_device(&dev->dev))
continue;
@@ -2476,6 +2516,7 @@ static struct dma_map_ops amd_iommu_dma_ops = {
static unsigned device_dma_ops_init(void)
{
+ struct iommu_dev_data *dev_data;
struct pci_dev *pdev = NULL;
unsigned unhandled = 0;
@@ -2485,7 +2526,12 @@ static unsigned device_dma_ops_init(void)
continue;
}
- pdev->dev.archdata.dma_ops = &amd_iommu_dma_ops;
+ dev_data = get_dev_data(&pdev->dev);
+
+ if (!dev_data->passthrough)
+ pdev->dev.archdata.dma_ops = &amd_iommu_dma_ops;
+ else
+ pdev->dev.archdata.dma_ops = &nommu_dma_ops;
}
return unhandled;
@@ -2612,6 +2658,20 @@ out_err:
return NULL;
}
+static int __init alloc_passthrough_domain(void)
+{
+ if (pt_domain != NULL)
+ return 0;
+
+ /* allocate passthrough domain */
+ pt_domain = protection_domain_alloc();
+ if (!pt_domain)
+ return -ENOMEM;
+
+ pt_domain->mode = PAGE_MODE_NONE;
+
+ return 0;
+}
static int amd_iommu_domain_init(struct iommu_domain *dom)
{
struct protection_domain *domain;
@@ -2798,21 +2858,23 @@ static struct iommu_ops amd_iommu_ops = {
int __init amd_iommu_init_passthrough(void)
{
- struct amd_iommu *iommu;
+ struct iommu_dev_data *dev_data;
struct pci_dev *dev = NULL;
+ struct amd_iommu *iommu;
u16 devid;
+ int ret;
- /* allocate passthrough domain */
- pt_domain = protection_domain_alloc();
- if (!pt_domain)
- return -ENOMEM;
-
- pt_domain->mode |= PAGE_MODE_NONE;
+ ret = alloc_passthrough_domain();
+ if (ret)
+ return ret;
for_each_pci_dev(dev) {
if (!check_device(&dev->dev))
continue;
+ dev_data = get_dev_data(&dev->dev);
+ dev_data->passthrough = true;
+
devid = get_device_id(&dev->dev);
iommu = amd_iommu_rlookup_table[devid];
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index d1e5067..7c3fd57 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -146,6 +146,8 @@ u32 amd_iommu_max_pasids __read_mostly = ~0;
bool amd_iommu_v2_present __read_mostly;
+bool amd_iommu_force_isolation __read_mostly;
+
/*
* The ACPI table parsing functions set this variable on an error
*/
@@ -1642,6 +1644,8 @@ static int __init parse_amd_iommu_options(char *str)
amd_iommu_unmap_flush = true;
if (strncmp(str, "off", 3) == 0)
amd_iommu_disabled = true;
+ if (strncmp(str, "force_isolation", 15) == 0)
+ amd_iommu_force_isolation = true;
}
return 1;
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index 535cd07..6897645 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -331,6 +331,8 @@ struct iommu_dev_data {
struct protection_domain *domain; /* Domain the device is bound to */
atomic_t bind; /* Domain attach reverent count */
u16 devid; /* PCI Device ID */
+ bool iommu_v2; /* Device can make use of IOMMUv2 */
+ bool passthrough; /* Default for device is pt_domain */
struct {
bool enabled;
int qdep;
@@ -576,6 +578,8 @@ extern u32 amd_iommu_max_pasids;
extern bool amd_iommu_v2_present;
+extern bool amd_iommu_force_isolation;
+
/* takes bus and device/function and returns the device id
* FIXME: should that be in generic PCI code? */
static inline u16 calc_devid(u8 bus, u8 devfn)
--
1.7.5.4
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 08/22] iommu/amd: Implement notifier for PPR faults
2011-12-05 13:34 [PATCH 0/22] Initial AMD IOMMUv2 support Joerg Roedel
` (6 preceding siblings ...)
2011-12-05 13:34 ` [PATCH 07/22] iommu/amd: Put IOMMUv2 capable devices in pt_domain Joerg Roedel
@ 2011-12-05 13:34 ` Joerg Roedel
2011-12-05 13:34 ` [PATCH 09/22] iommu/amd: Add amd_iommu_domain_direct_map function Joerg Roedel
` (15 subsequent siblings)
23 siblings, 0 replies; 28+ messages in thread
From: Joerg Roedel @ 2011-12-05 13:34 UTC (permalink / raw)
To: iommu; +Cc: linux-kernel, Joerg Roedel
Add a notifer at which a module can attach to get informed
about incoming PPR faults.
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
drivers/iommu/amd_iommu.c | 90 ++++++++++++++++++++++++++++++++++++++-
drivers/iommu/amd_iommu_proto.h | 3 +
drivers/iommu/amd_iommu_types.h | 34 ++++++++++++++-
3 files changed, 125 insertions(+), 2 deletions(-)
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index bde2776..81a709a 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -17,6 +17,7 @@
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
+#include <linux/ratelimit.h>
#include <linux/pci.h>
#include <linux/pci-ats.h>
#include <linux/bitmap.h>
@@ -28,6 +29,8 @@
#include <linux/iommu.h>
#include <linux/delay.h>
#include <linux/amd-iommu.h>
+#include <linux/notifier.h>
+#include <linux/export.h>
#include <asm/msidef.h>
#include <asm/proto.h>
#include <asm/iommu.h>
@@ -59,6 +62,8 @@ static struct protection_domain *pt_domain;
static struct iommu_ops amd_iommu_ops;
+static ATOMIC_NOTIFIER_HEAD(ppr_notifier);
+
/*
* general struct to manage commands send to an IOMMU
*/
@@ -488,12 +493,82 @@ static void iommu_poll_events(struct amd_iommu *iommu)
spin_unlock_irqrestore(&iommu->lock, flags);
}
+static void iommu_handle_ppr_entry(struct amd_iommu *iommu, u32 head)
+{
+ struct amd_iommu_fault fault;
+ volatile u64 *raw;
+ int i;
+
+ raw = (u64 *)(iommu->ppr_log + head);
+
+ /*
+ * Hardware bug: Interrupt may arrive before the entry is written to
+ * memory. If this happens we need to wait for the entry to arrive.
+ */
+ for (i = 0; i < LOOP_TIMEOUT; ++i) {
+ if (PPR_REQ_TYPE(raw[0]) != 0)
+ break;
+ udelay(1);
+ }
+
+ if (PPR_REQ_TYPE(raw[0]) != PPR_REQ_FAULT) {
+ pr_err_ratelimited("AMD-Vi: Unknown PPR request received\n");
+ return;
+ }
+
+ fault.address = raw[1];
+ fault.pasid = PPR_PASID(raw[0]);
+ fault.device_id = PPR_DEVID(raw[0]);
+ fault.tag = PPR_TAG(raw[0]);
+ fault.flags = PPR_FLAGS(raw[0]);
+
+ /*
+ * To detect the hardware bug we need to clear the entry
+ * to back to zero.
+ */
+ raw[0] = raw[1] = 0;
+
+ atomic_notifier_call_chain(&ppr_notifier, 0, &fault);
+}
+
+static void iommu_poll_ppr_log(struct amd_iommu *iommu)
+{
+ unsigned long flags;
+ u32 head, tail;
+
+ if (iommu->ppr_log == NULL)
+ return;
+
+ spin_lock_irqsave(&iommu->lock, flags);
+
+ head = readl(iommu->mmio_base + MMIO_PPR_HEAD_OFFSET);
+ tail = readl(iommu->mmio_base + MMIO_PPR_TAIL_OFFSET);
+
+ while (head != tail) {
+
+ /* Handle PPR entry */
+ iommu_handle_ppr_entry(iommu, head);
+
+ /* Update and refresh ring-buffer state*/
+ head = (head + PPR_ENTRY_SIZE) % PPR_LOG_SIZE;
+ writel(head, iommu->mmio_base + MMIO_PPR_HEAD_OFFSET);
+ tail = readl(iommu->mmio_base + MMIO_PPR_TAIL_OFFSET);
+ }
+
+ /* enable ppr interrupts again */
+ writel(MMIO_STATUS_PPR_INT_MASK, iommu->mmio_base + MMIO_STATUS_OFFSET);
+
+ spin_unlock_irqrestore(&iommu->lock, flags);
+}
+
irqreturn_t amd_iommu_int_thread(int irq, void *data)
{
struct amd_iommu *iommu;
- for_each_iommu(iommu)
+ for_each_iommu(iommu) {
iommu_poll_events(iommu);
+ iommu_poll_ppr_log(iommu);
+ }
return IRQ_HANDLED;
}
@@ -2888,3 +2963,16 @@ int __init amd_iommu_init_passthrough(void)
return 0;
}
+
+/* IOMMUv2 specific functions */
+int amd_iommu_register_ppr_notifier(struct notifier_block *nb)
+{
+ return atomic_notifier_chain_register(&ppr_notifier, nb);
+}
+EXPORT_SYMBOL(amd_iommu_register_ppr_notifier);
+
+int amd_iommu_unregister_ppr_notifier(struct notifier_block *nb)
+{
+ return atomic_notifier_chain_unregister(&ppr_notifier, nb);
+}
+EXPORT_SYMBOL(amd_iommu_unregister_ppr_notifier);
diff --git a/drivers/iommu/amd_iommu_proto.h b/drivers/iommu/amd_iommu_proto.h
index 3a46c30..cfe2dfc 100644
--- a/drivers/iommu/amd_iommu_proto.h
+++ b/drivers/iommu/amd_iommu_proto.h
@@ -32,7 +32,10 @@ extern void amd_iommu_uninit_devices(void);
extern void amd_iommu_init_notifier(void);
extern void amd_iommu_init_api(void);
+/* IOMMUv2 specific functions */
extern bool amd_iommu_v2_supported(void);
+extern int amd_iommu_register_ppr_notifier(struct notifier_block *nb);
+extern int amd_iommu_unregister_ppr_notifier(struct notifier_block *nb);
#ifndef CONFIG_AMD_IOMMU_STATS
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index 6897645..f626722 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -94,7 +94,8 @@
#define FEATURE_PASID_MASK (0x1fULL << FEATURE_PASID_SHIFT)
/* MMIO status bits */
-#define MMIO_STATUS_COM_WAIT_INT_MASK 0x04
+#define MMIO_STATUS_COM_WAIT_INT_MASK (1 << 2)
+#define MMIO_STATUS_PPR_INT_MASK (1 << 6)
/* event logging constants */
#define EVENT_ENTRY_SIZE 0x10
@@ -180,6 +181,16 @@
#define PPR_ENTRY_SIZE 16
#define PPR_LOG_SIZE (PPR_ENTRY_SIZE * PPR_LOG_ENTRIES)
+#define PPR_REQ_TYPE(x) (((x) >> 60) & 0xfULL)
+#define PPR_FLAGS(x) (((x) >> 48) & 0xfffULL)
+#define PPR_DEVID(x) ((x) & 0xffffULL)
+#define PPR_TAG(x) (((x) >> 32) & 0x3ffULL)
+#define PPR_PASID1(x) (((x) >> 16) & 0xffffULL)
+#define PPR_PASID2(x) (((x) >> 42) & 0xfULL)
+#define PPR_PASID(x) ((PPR_PASID2(x) << 16) | PPR_PASID1(x))
+
+#define PPR_REQ_FAULT 0x01
+
#define PAGE_MODE_NONE 0x00
#define PAGE_MODE_1_LEVEL 0x01
#define PAGE_MODE_2_LEVEL 0x02
@@ -301,6 +312,27 @@ extern bool amd_iommu_iotlb_sup;
#define APERTURE_RANGE_INDEX(a) ((a) >> APERTURE_RANGE_SHIFT)
#define APERTURE_PAGE_INDEX(a) (((a) >> 21) & 0x3fULL)
+
+/*
+ * This struct is used to pass information about
+ * incoming PPR faults around.
+ */
+struct amd_iommu_fault {
+ u64 address; /* IO virtual address of the fault*/
+ u32 pasid; /* Address space identifier */
+ u16 device_id; /* Originating PCI device id */
+ u16 tag; /* PPR tag */
+ u16 flags; /* Fault flags */
+
+};
+
+#define PPR_FAULT_EXEC (1 << 1)
+#define PPR_FAULT_READ (1 << 2)
+#define PPR_FAULT_WRITE (1 << 5)
+#define PPR_FAULT_USER (1 << 6)
+#define PPR_FAULT_RSVD (1 << 7)
+#define PPR_FAULT_GN (1 << 8)
+
/*
* This structure contains generic data for IOMMU protection domains
* independent of their use.
--
1.7.5.4
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 09/22] iommu/amd: Add amd_iommu_domain_direct_map function
2011-12-05 13:34 [PATCH 0/22] Initial AMD IOMMUv2 support Joerg Roedel
` (7 preceding siblings ...)
2011-12-05 13:34 ` [PATCH 08/22] iommu/amd: Implement notifier for PPR faults Joerg Roedel
@ 2011-12-05 13:34 ` Joerg Roedel
2011-12-05 13:34 ` [PATCH 10/22] iommu/amd: Add support for IOMMUv2 domain mode Joerg Roedel
` (14 subsequent siblings)
23 siblings, 0 replies; 28+ messages in thread
From: Joerg Roedel @ 2011-12-05 13:34 UTC (permalink / raw)
To: iommu; +Cc: linux-kernel, Joerg Roedel
This function can be used to switch a domain into
paging-mode 0. In this mode all devices can access physical
system memory directly without any remapping.
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
drivers/iommu/amd_iommu.c | 38 ++++++++++++++++++++++++++++++++++++--
drivers/iommu/amd_iommu_proto.h | 3 +++
2 files changed, 39 insertions(+), 2 deletions(-)
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 81a709a..f0b289f 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -1684,9 +1684,12 @@ static bool dma_ops_domain(struct protection_domain *domain)
static void set_dte_entry(u16 devid, struct protection_domain *domain, bool ats)
{
- u64 pte_root = virt_to_phys(domain->pt_root);
+ u64 pte_root = 0;
u64 flags = 0;
+ if (domain->mode != PAGE_MODE_NONE)
+ pte_root = virt_to_phys(domain->pt_root);
+
pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK)
<< DEV_ENTRY_MODE_SHIFT;
pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV;
@@ -2782,7 +2785,8 @@ static void amd_iommu_domain_destroy(struct iommu_domain *dom)
BUG_ON(domain->dev_cnt != 0);
- free_pagetable(domain);
+ if (domain->mode != PAGE_MODE_NONE)
+ free_pagetable(domain);
protection_domain_free(domain);
@@ -2846,6 +2850,9 @@ static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova,
int prot = 0;
int ret;
+ if (domain->mode == PAGE_MODE_NONE)
+ return -EINVAL;
+
if (iommu_prot & IOMMU_READ)
prot |= IOMMU_PROT_IR;
if (iommu_prot & IOMMU_WRITE)
@@ -2864,6 +2871,9 @@ static int amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova,
struct protection_domain *domain = dom->priv;
unsigned long page_size, unmap_size;
+ if (domain->mode == PAGE_MODE_NONE)
+ return -EINVAL;
+
page_size = 0x1000UL << gfp_order;
mutex_lock(&domain->api_lock);
@@ -2883,6 +2893,9 @@ static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,
phys_addr_t paddr;
u64 *pte, __pte;
+ if (domain->mode == PAGE_MODE_NONE)
+ return iova;
+
pte = fetch_pte(domain, iova);
if (!pte || !IOMMU_PTE_PRESENT(*pte))
@@ -2976,3 +2989,24 @@ int amd_iommu_unregister_ppr_notifier(struct notifier_block *nb)
return atomic_notifier_chain_unregister(&ppr_notifier, nb);
}
EXPORT_SYMBOL(amd_iommu_unregister_ppr_notifier);
+
+void amd_iommu_domain_direct_map(struct iommu_domain *dom)
+{
+ struct protection_domain *domain = dom->priv;
+ unsigned long flags;
+
+ spin_lock_irqsave(&domain->lock, flags);
+
+ /* Update data structure */
+ domain->mode = PAGE_MODE_NONE;
+ domain->updated = true;
+
+ /* Make changes visible to IOMMUs */
+ update_domain(domain);
+
+ /* Page-table is not visible to IOMMU anymore, so free it */
+ free_pagetable(domain);
+
+ spin_unlock_irqrestore(&domain->lock, flags);
+}
+EXPORT_SYMBOL(amd_iommu_domain_direct_map);
diff --git a/drivers/iommu/amd_iommu_proto.h b/drivers/iommu/amd_iommu_proto.h
index cfe2dfc..2c4554e 100644
--- a/drivers/iommu/amd_iommu_proto.h
+++ b/drivers/iommu/amd_iommu_proto.h
@@ -33,9 +33,12 @@ extern void amd_iommu_init_notifier(void);
extern void amd_iommu_init_api(void);
/* IOMMUv2 specific functions */
+struct iommu_domain;
+
extern bool amd_iommu_v2_supported(void);
extern int amd_iommu_register_ppr_notifier(struct notifier_block *nb);
extern int amd_iommu_unregister_ppr_notifier(struct notifier_block *nb);
+extern void amd_iommu_domain_direct_map(struct iommu_domain *dom);
#ifndef CONFIG_AMD_IOMMU_STATS
--
1.7.5.4
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 10/22] iommu/amd: Add support for IOMMUv2 domain mode
2011-12-05 13:34 [PATCH 0/22] Initial AMD IOMMUv2 support Joerg Roedel
` (8 preceding siblings ...)
2011-12-05 13:34 ` [PATCH 09/22] iommu/amd: Add amd_iommu_domain_direct_map function Joerg Roedel
@ 2011-12-05 13:34 ` Joerg Roedel
2011-12-05 13:34 ` [PATCH 11/22] iommu/amd: Implement IOMMUv2 TLB flushing routines Joerg Roedel
` (13 subsequent siblings)
23 siblings, 0 replies; 28+ messages in thread
From: Joerg Roedel @ 2011-12-05 13:34 UTC (permalink / raw)
To: iommu; +Cc: linux-kernel, Joerg Roedel
This patch adds support for protection domains that
implement two-level paging for devices.
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
drivers/iommu/Kconfig | 4 +-
drivers/iommu/amd_iommu.c | 144 +++++++++++++++++++++++++++++++++++++-
drivers/iommu/amd_iommu_init.c | 9 +++
drivers/iommu/amd_iommu_proto.h | 1 +
drivers/iommu/amd_iommu_types.h | 27 +++++++
5 files changed, 180 insertions(+), 5 deletions(-)
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index 5414253b..220dfc2 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -34,7 +34,9 @@ config AMD_IOMMU
bool "AMD IOMMU support"
select SWIOTLB
select PCI_MSI
- select PCI_IOV
+ select PCI_ATS
+ select PCI_PRI
+ select PCI_PASID
select IOMMU_API
depends on X86_64 && PCI && ACPI
---help---
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index f0b289f..eaa1af2 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -63,6 +63,7 @@ static struct protection_domain *pt_domain;
static struct iommu_ops amd_iommu_ops;
static ATOMIC_NOTIFIER_HEAD(ppr_notifier);
+int amd_iommu_max_glx_val = -1;
/*
* general struct to manage commands send to an IOMMU
@@ -1598,6 +1599,11 @@ static void free_pagetable(struct protection_domain *domain)
domain->pt_root = NULL;
}
+static void free_gcr3_table(struct protection_domain *domain)
+{
+ free_page((unsigned long)domain->gcr3_tbl);
+}
+
/*
* Free a domain, only used if something went wrong in the
* allocation path and we need to free an already allocated page table
@@ -1699,6 +1705,32 @@ static void set_dte_entry(u16 devid, struct protection_domain *domain, bool ats)
if (ats)
flags |= DTE_FLAG_IOTLB;
+ if (domain->flags & PD_IOMMUV2_MASK) {
+ u64 gcr3 = __pa(domain->gcr3_tbl);
+ u64 glx = domain->glx;
+ u64 tmp;
+
+ pte_root |= DTE_FLAG_GV;
+ pte_root |= (glx & DTE_GLX_MASK) << DTE_GLX_SHIFT;
+
+ /* First mask out possible old values for GCR3 table */
+ tmp = DTE_GCR3_VAL_B(~0ULL) << DTE_GCR3_SHIFT_B;
+ flags &= ~tmp;
+
+ tmp = DTE_GCR3_VAL_C(~0ULL) << DTE_GCR3_SHIFT_C;
+ flags &= ~tmp;
+
+ /* Encode GCR3 table into DTE */
+ tmp = DTE_GCR3_VAL_A(gcr3) << DTE_GCR3_SHIFT_A;
+ pte_root |= tmp;
+
+ tmp = DTE_GCR3_VAL_B(gcr3) << DTE_GCR3_SHIFT_B;
+ flags |= tmp;
+
+ tmp = DTE_GCR3_VAL_C(gcr3) << DTE_GCR3_SHIFT_C;
+ flags |= tmp;
+ }
+
flags &= ~(0xffffUL);
flags |= domain->id;
@@ -1803,6 +1835,46 @@ out_unlock:
return ret;
}
+
+static void pdev_iommuv2_disable(struct pci_dev *pdev)
+{
+ pci_disable_ats(pdev);
+ pci_disable_pri(pdev);
+ pci_disable_pasid(pdev);
+}
+
+static int pdev_iommuv2_enable(struct pci_dev *pdev)
+{
+ int ret;
+
+ /* Only allow access to user-accessible pages */
+ ret = pci_enable_pasid(pdev, 0);
+ if (ret)
+ goto out_err;
+
+ /* First reset the PRI state of the device */
+ ret = pci_reset_pri(pdev);
+ if (ret)
+ goto out_err;
+
+ /* FIXME: Hardcode number of outstanding requests for now */
+ ret = pci_enable_pri(pdev, 32);
+ if (ret)
+ goto out_err;
+
+ ret = pci_enable_ats(pdev, PAGE_SHIFT);
+ if (ret)
+ goto out_err;
+
+ return 0;
+
+out_err:
+ pci_disable_pri(pdev);
+ pci_disable_pasid(pdev);
+
+ return ret;
+}
+
/*
* If a device is not yet associated with a domain, this function does
* assigns it visible for the hardware
@@ -1817,7 +1889,17 @@ static int attach_device(struct device *dev,
dev_data = get_dev_data(dev);
- if (amd_iommu_iotlb_sup && pci_enable_ats(pdev, PAGE_SHIFT) == 0) {
+ if (domain->flags & PD_IOMMUV2_MASK) {
+ if (!dev_data->iommu_v2 || !dev_data->passthrough)
+ return -EINVAL;
+
+ if (pdev_iommuv2_enable(pdev) != 0)
+ return -EINVAL;
+
+ dev_data->ats.enabled = true;
+ dev_data->ats.qdep = pci_ats_queue_depth(pdev);
+ } else if (amd_iommu_iotlb_sup &&
+ pci_enable_ats(pdev, PAGE_SHIFT) == 0) {
dev_data->ats.enabled = true;
dev_data->ats.qdep = pci_ats_queue_depth(pdev);
}
@@ -1877,20 +1959,24 @@ static void __detach_device(struct iommu_dev_data *dev_data)
*/
static void detach_device(struct device *dev)
{
+ struct protection_domain *domain;
struct iommu_dev_data *dev_data;
unsigned long flags;
dev_data = get_dev_data(dev);
+ domain = dev_data->domain;
/* lock device table */
write_lock_irqsave(&amd_iommu_devtable_lock, flags);
__detach_device(dev_data);
write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
- if (dev_data->ats.enabled) {
+ if (domain->flags & PD_IOMMUV2_MASK)
+ pdev_iommuv2_disable(to_pci_dev(dev));
+ else if (dev_data->ats.enabled)
pci_disable_ats(to_pci_dev(dev));
- dev_data->ats.enabled = false;
- }
+
+ dev_data->ats.enabled = false;
}
/*
@@ -2788,6 +2874,9 @@ static void amd_iommu_domain_destroy(struct iommu_domain *dom)
if (domain->mode != PAGE_MODE_NONE)
free_pagetable(domain);
+ if (domain->flags & PD_IOMMUV2_MASK)
+ free_gcr3_table(domain);
+
protection_domain_free(domain);
dom->priv = NULL;
@@ -3010,3 +3099,50 @@ void amd_iommu_domain_direct_map(struct iommu_domain *dom)
spin_unlock_irqrestore(&domain->lock, flags);
}
EXPORT_SYMBOL(amd_iommu_domain_direct_map);
+
+int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids)
+{
+ struct protection_domain *domain = dom->priv;
+ unsigned long flags;
+ int levels, ret;
+
+ if (pasids <= 0 || pasids & ~PASID_MASK)
+ return -EINVAL;
+
+ /* Number of GCR3 table levels required */
+ for (levels = -1; pasids != 0; pasids >>= 9)
+ levels += 1;
+
+ if (levels > amd_iommu_max_glx_val)
+ return -EINVAL;
+
+ spin_lock_irqsave(&domain->lock, flags);
+
+ /*
+ * Save us all sanity checks whether devices already in the
+ * domain support IOMMUv2. Just force that the domain has no
+ * devices attached when it is switched into IOMMUv2 mode.
+ */
+ ret = -EBUSY;
+ if (domain->dev_cnt > 0 || domain->flags & PD_IOMMUV2_MASK)
+ goto out;
+
+ ret = -ENOMEM;
+ domain->gcr3_tbl = (void *)get_zeroed_page(GFP_ATOMIC);
+ if (domain->gcr3_tbl == NULL)
+ goto out;
+
+ domain->glx = levels;
+ domain->flags |= PD_IOMMUV2_MASK;
+ domain->updated = true;
+
+ update_domain(domain);
+
+ ret = 0;
+
+out:
+ spin_unlock_irqrestore(&domain->lock, flags);
+
+ return ret;
+}
+EXPORT_SYMBOL(amd_iommu_domain_enable_v2);
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 7c3fd57..c7a5d7e 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -755,6 +755,7 @@ static void __init init_iommu_from_pci(struct amd_iommu *iommu)
iommu->features = ((u64)high << 32) | low;
if (iommu_feature(iommu, FEATURE_GT)) {
+ int glxval;
u32 pasids;
u64 shift;
@@ -763,6 +764,14 @@ static void __init init_iommu_from_pci(struct amd_iommu *iommu)
pasids = (1 << shift);
amd_iommu_max_pasids = min(amd_iommu_max_pasids, pasids);
+
+ glxval = iommu->features & FEATURE_GLXVAL_MASK;
+ glxval >>= FEATURE_GLXVAL_SHIFT;
+
+ if (amd_iommu_max_glx_val == -1)
+ amd_iommu_max_glx_val = glxval;
+ else
+ amd_iommu_max_glx_val = min(amd_iommu_max_glx_val, glxval);
}
if (iommu_feature(iommu, FEATURE_GT) &&
diff --git a/drivers/iommu/amd_iommu_proto.h b/drivers/iommu/amd_iommu_proto.h
index 2c4554e..d207b1d 100644
--- a/drivers/iommu/amd_iommu_proto.h
+++ b/drivers/iommu/amd_iommu_proto.h
@@ -39,6 +39,7 @@ extern bool amd_iommu_v2_supported(void);
extern int amd_iommu_register_ppr_notifier(struct notifier_block *nb);
extern int amd_iommu_unregister_ppr_notifier(struct notifier_block *nb);
extern void amd_iommu_domain_direct_map(struct iommu_domain *dom);
+extern int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids);
#ifndef CONFIG_AMD_IOMMU_STATS
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index f626722..905eca5 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -93,6 +93,11 @@
#define FEATURE_PASID_SHIFT 32
#define FEATURE_PASID_MASK (0x1fULL << FEATURE_PASID_SHIFT)
+#define FEATURE_GLXVAL_SHIFT 14
+#define FEATURE_GLXVAL_MASK (0x03ULL << FEATURE_GLXVAL_SHIFT)
+
+#define PASID_MASK 0x000fffff
+
/* MMIO status bits */
#define MMIO_STATUS_COM_WAIT_INT_MASK (1 << 2)
#define MMIO_STATUS_PPR_INT_MASK (1 << 6)
@@ -257,6 +262,22 @@
#define IOMMU_PTE_IW (1ULL << 62)
#define DTE_FLAG_IOTLB (0x01UL << 32)
+#define DTE_FLAG_GV (0x01ULL << 55)
+#define DTE_GLX_SHIFT (56)
+#define DTE_GLX_MASK (3)
+
+#define DTE_GCR3_VAL_A(x) (((x) >> 12) & 0x00007ULL)
+#define DTE_GCR3_VAL_B(x) (((x) >> 15) & 0x0ffffULL)
+#define DTE_GCR3_VAL_C(x) (((x) >> 31) & 0xfffffULL)
+
+#define DTE_GCR3_INDEX_A 0
+#define DTE_GCR3_INDEX_B 1
+#define DTE_GCR3_INDEX_C 1
+
+#define DTE_GCR3_SHIFT_A 58
+#define DTE_GCR3_SHIFT_B 16
+#define DTE_GCR3_SHIFT_C 43
+
#define IOMMU_PAGE_MASK (((1ULL << 52) - 1) & ~0xfffULL)
#define IOMMU_IO_PAGE_MASK (~(0xfffULL))
@@ -284,6 +305,7 @@
domain for an IOMMU */
#define PD_PASSTHROUGH_MASK (1UL << 2) /* domain has no page
translation */
+#define PD_IOMMUV2_MASK (1UL << 3) /* domain has gcr3 table */
extern bool amd_iommu_dump;
#define DUMP_printk(format, arg...) \
@@ -345,6 +367,8 @@ struct protection_domain {
u16 id; /* the domain id written to the device table */
int mode; /* paging mode (0-6 levels) */
u64 *pt_root; /* page table root pointer */
+ int glx; /* Number of levels for GCR3 table */
+ u64 *gcr3_tbl; /* Guest CR3 table */
unsigned long flags; /* flags to find out type of domain */
bool updated; /* complete domain flush required */
unsigned dev_cnt; /* devices assigned to this domain */
@@ -612,6 +636,9 @@ extern bool amd_iommu_v2_present;
extern bool amd_iommu_force_isolation;
+/* Max levels of glxval supported */
+extern int amd_iommu_max_glx_val;
+
/* takes bus and device/function and returns the device id
* FIXME: should that be in generic PCI code? */
static inline u16 calc_devid(u8 bus, u8 devfn)
--
1.7.5.4
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 11/22] iommu/amd: Implement IOMMUv2 TLB flushing routines
2011-12-05 13:34 [PATCH 0/22] Initial AMD IOMMUv2 support Joerg Roedel
` (9 preceding siblings ...)
2011-12-05 13:34 ` [PATCH 10/22] iommu/amd: Add support for IOMMUv2 domain mode Joerg Roedel
@ 2011-12-05 13:34 ` Joerg Roedel
2011-12-05 13:34 ` [PATCH 12/22] iommu/amd: Implement functions to manage GCR3 table Joerg Roedel
` (12 subsequent siblings)
23 siblings, 0 replies; 28+ messages in thread
From: Joerg Roedel @ 2011-12-05 13:34 UTC (permalink / raw)
To: iommu; +Cc: linux-kernel, Joerg Roedel
The functions added with this patch allow to manage the
IOMMU and the device TLBs for all devices in an IOMMUv2
domain.
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
drivers/iommu/amd_iommu.c | 136 +++++++++++++++++++++++++++++++++++++++
drivers/iommu/amd_iommu_proto.h | 3 +
drivers/iommu/amd_iommu_types.h | 1 +
3 files changed, 140 insertions(+), 0 deletions(-)
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index eaa1af2..bd18a04 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -698,6 +698,44 @@ static void build_inv_iotlb_pages(struct iommu_cmd *cmd, u16 devid, int qdep,
cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
}
+static void build_inv_iommu_pasid(struct iommu_cmd *cmd, u16 domid, int pasid,
+ u64 address, bool size)
+{
+ memset(cmd, 0, sizeof(*cmd));
+
+ address &= ~(0xfffULL);
+
+ cmd->data[0] = pasid & PASID_MASK;
+ cmd->data[1] = domid;
+ cmd->data[2] = lower_32_bits(address);
+ cmd->data[3] = upper_32_bits(address);
+ cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
+ cmd->data[2] |= CMD_INV_IOMMU_PAGES_GN_MASK;
+ if (size)
+ cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
+ CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES);
+}
+
+static void build_inv_iotlb_pasid(struct iommu_cmd *cmd, u16 devid, int pasid,
+ int qdep, u64 address, bool size)
+{
+ memset(cmd, 0, sizeof(*cmd));
+
+ address &= ~(0xfffULL);
+
+ cmd->data[0] = devid;
+ cmd->data[0] |= (pasid & 0xff) << 16;
+ cmd->data[0] |= (qdep & 0xff) << 24;
+ cmd->data[1] = devid;
+ cmd->data[1] |= ((pasid >> 8) & 0xfff) << 16;
+ cmd->data[2] = lower_32_bits(address);
+ cmd->data[2] |= CMD_INV_IOMMU_PAGES_GN_MASK;
+ cmd->data[3] = upper_32_bits(address);
+ if (size)
+ cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
+ CMD_SET_TYPE(cmd, CMD_INV_IOTLB_PAGES);
+}
+
static void build_inv_all(struct iommu_cmd *cmd)
{
memset(cmd, 0, sizeof(*cmd));
@@ -3146,3 +3184,101 @@ out:
return ret;
}
EXPORT_SYMBOL(amd_iommu_domain_enable_v2);
+
+static int __flush_pasid(struct protection_domain *domain, int pasid,
+ u64 address, bool size)
+{
+ struct iommu_dev_data *dev_data;
+ struct iommu_cmd cmd;
+ int i, ret;
+
+ if (!(domain->flags & PD_IOMMUV2_MASK))
+ return -EINVAL;
+
+ build_inv_iommu_pasid(&cmd, domain->id, pasid, address, size);
+
+ /*
+ * IOMMU TLB needs to be flushed before Device TLB to
+ * prevent device TLB refill from IOMMU TLB
+ */
+ for (i = 0; i < amd_iommus_present; ++i) {
+ if (domain->dev_iommu[i] == 0)
+ continue;
+
+ ret = iommu_queue_command(amd_iommus[i], &cmd);
+ if (ret != 0)
+ goto out;
+ }
+
+ /* Wait until IOMMU TLB flushes are complete */
+ domain_flush_complete(domain);
+
+ /* Now flush device TLBs */
+ list_for_each_entry(dev_data, &domain->dev_list, list) {
+ struct amd_iommu *iommu;
+ int qdep;
+
+ BUG_ON(!dev_data->ats.enabled);
+
+ qdep = dev_data->ats.qdep;
+ iommu = amd_iommu_rlookup_table[dev_data->devid];
+
+ build_inv_iotlb_pasid(&cmd, dev_data->devid, pasid,
+ qdep, address, size);
+
+ ret = iommu_queue_command(iommu, &cmd);
+ if (ret != 0)
+ goto out;
+ }
+
+ /* Wait until all device TLBs are flushed */
+ domain_flush_complete(domain);
+
+ ret = 0;
+
+out:
+
+ return ret;
+}
+
+static int __amd_iommu_flush_page(struct protection_domain *domain, int pasid,
+ u64 address)
+{
+ return __flush_pasid(domain, pasid, address, false);
+}
+
+int amd_iommu_flush_page(struct iommu_domain *dom, int pasid,
+ u64 address)
+{
+ struct protection_domain *domain = dom->priv;
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&domain->lock, flags);
+ ret = __amd_iommu_flush_page(domain, pasid, address);
+ spin_unlock_irqrestore(&domain->lock, flags);
+
+ return ret;
+}
+EXPORT_SYMBOL(amd_iommu_flush_page);
+
+static int __amd_iommu_flush_tlb(struct protection_domain *domain, int pasid)
+{
+ return __flush_pasid(domain, pasid, CMD_INV_IOMMU_ALL_PAGES_ADDRESS,
+ true);
+}
+
+int amd_iommu_flush_tlb(struct iommu_domain *dom, int pasid)
+{
+ struct protection_domain *domain = dom->priv;
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&domain->lock, flags);
+ ret = __amd_iommu_flush_tlb(domain, pasid);
+ spin_unlock_irqrestore(&domain->lock, flags);
+
+ return ret;
+}
+EXPORT_SYMBOL(amd_iommu_flush_tlb);
+
diff --git a/drivers/iommu/amd_iommu_proto.h b/drivers/iommu/amd_iommu_proto.h
index d207b1d..a92dc61 100644
--- a/drivers/iommu/amd_iommu_proto.h
+++ b/drivers/iommu/amd_iommu_proto.h
@@ -40,6 +40,9 @@ extern int amd_iommu_register_ppr_notifier(struct notifier_block *nb);
extern int amd_iommu_unregister_ppr_notifier(struct notifier_block *nb);
extern void amd_iommu_domain_direct_map(struct iommu_domain *dom);
extern int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids);
+extern int amd_iommu_flush_page(struct iommu_domain *dom, int pasid,
+ u64 address);
+extern int amd_iommu_flush_tlb(struct iommu_domain *dom, int pasid);
#ifndef CONFIG_AMD_IOMMU_STATS
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index 905eca5..64a7642 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -148,6 +148,7 @@
#define CMD_COMPL_WAIT_INT_MASK 0x02
#define CMD_INV_IOMMU_PAGES_SIZE_MASK 0x01
#define CMD_INV_IOMMU_PAGES_PDE_MASK 0x02
+#define CMD_INV_IOMMU_PAGES_GN_MASK 0x04
#define CMD_INV_IOMMU_ALL_PAGES_ADDRESS 0x7fffffffffffffffULL
--
1.7.5.4
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 12/22] iommu/amd: Implement functions to manage GCR3 table
2011-12-05 13:34 [PATCH 0/22] Initial AMD IOMMUv2 support Joerg Roedel
` (10 preceding siblings ...)
2011-12-05 13:34 ` [PATCH 11/22] iommu/amd: Implement IOMMUv2 TLB flushing routines Joerg Roedel
@ 2011-12-05 13:34 ` Joerg Roedel
2011-12-05 13:34 ` [PATCH 13/22] iommu/amd: Implement function to send PPR completions Joerg Roedel
` (11 subsequent siblings)
23 siblings, 0 replies; 28+ messages in thread
From: Joerg Roedel @ 2011-12-05 13:34 UTC (permalink / raw)
To: iommu; +Cc: linux-kernel, Joerg Roedel
This patch adds functions necessary to set and clear the
GCR3 values associated with a particular PASID in an IOMMUv2
domain.
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
drivers/iommu/amd_iommu.c | 130 +++++++++++++++++++++++++++++++++++++++
drivers/iommu/amd_iommu_proto.h | 4 +
drivers/iommu/amd_iommu_types.h | 1 +
3 files changed, 135 insertions(+), 0 deletions(-)
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index bd18a04..faf566c 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -1637,8 +1637,45 @@ static void free_pagetable(struct protection_domain *domain)
domain->pt_root = NULL;
}
+static void free_gcr3_tbl_level1(u64 *tbl)
+{
+ u64 *ptr;
+ int i;
+
+ for (i = 0; i < 512; ++i) {
+ if (!(tbl[i] & GCR3_VALID))
+ continue;
+
+ ptr = __va(tbl[i] & PAGE_MASK);
+
+ free_page((unsigned long)ptr);
+ }
+}
+
+static void free_gcr3_tbl_level2(u64 *tbl)
+{
+ u64 *ptr;
+ int i;
+
+ for (i = 0; i < 512; ++i) {
+ if (!(tbl[i] & GCR3_VALID))
+ continue;
+
+ ptr = __va(tbl[i] & PAGE_MASK);
+
+ free_gcr3_tbl_level1(ptr);
+ }
+}
+
static void free_gcr3_table(struct protection_domain *domain)
{
+ if (domain->glx == 2)
+ free_gcr3_tbl_level2(domain->gcr3_tbl);
+ else if (domain->glx == 1)
+ free_gcr3_tbl_level1(domain->gcr3_tbl);
+ else if (domain->glx != 0)
+ BUG();
+
free_page((unsigned long)domain->gcr3_tbl);
}
@@ -3282,3 +3319,96 @@ int amd_iommu_flush_tlb(struct iommu_domain *dom, int pasid)
}
EXPORT_SYMBOL(amd_iommu_flush_tlb);
+static u64 *__get_gcr3_pte(u64 *root, int level, int pasid, bool alloc)
+{
+ int index;
+ u64 *pte;
+
+ while (true) {
+
+ index = (pasid >> (9 * level)) & 0x1ff;
+ pte = &root[index];
+
+ if (level == 0)
+ break;
+
+ if (!(*pte & GCR3_VALID)) {
+ if (!alloc)
+ return NULL;
+
+ root = (void *)get_zeroed_page(GFP_ATOMIC);
+ if (root == NULL)
+ return NULL;
+
+ *pte = __pa(root) | GCR3_VALID;
+ }
+
+ root = __va(*pte & PAGE_MASK);
+
+ level -= 1;
+ }
+
+ return pte;
+}
+
+static int __set_gcr3(struct protection_domain *domain, int pasid,
+ unsigned long cr3)
+{
+ u64 *pte;
+
+ if (domain->mode != PAGE_MODE_NONE)
+ return -EINVAL;
+
+ pte = __get_gcr3_pte(domain->gcr3_tbl, domain->glx, pasid, true);
+ if (pte == NULL)
+ return -ENOMEM;
+
+ *pte = (cr3 & PAGE_MASK) | GCR3_VALID;
+
+ return __amd_iommu_flush_tlb(domain, pasid);
+}
+
+static int __clear_gcr3(struct protection_domain *domain, int pasid)
+{
+ u64 *pte;
+
+ if (domain->mode != PAGE_MODE_NONE)
+ return -EINVAL;
+
+ pte = __get_gcr3_pte(domain->gcr3_tbl, domain->glx, pasid, false);
+ if (pte == NULL)
+ return 0;
+
+ *pte = 0;
+
+ return __amd_iommu_flush_tlb(domain, pasid);
+}
+
+int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, int pasid,
+ unsigned long cr3)
+{
+ struct protection_domain *domain = dom->priv;
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&domain->lock, flags);
+ ret = __set_gcr3(domain, pasid, cr3);
+ spin_unlock_irqrestore(&domain->lock, flags);
+
+ return ret;
+}
+EXPORT_SYMBOL(amd_iommu_domain_set_gcr3);
+
+int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, int pasid)
+{
+ struct protection_domain *domain = dom->priv;
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&domain->lock, flags);
+ ret = __clear_gcr3(domain, pasid);
+ spin_unlock_irqrestore(&domain->lock, flags);
+
+ return ret;
+}
+EXPORT_SYMBOL(amd_iommu_domain_clear_gcr3);
diff --git a/drivers/iommu/amd_iommu_proto.h b/drivers/iommu/amd_iommu_proto.h
index a92dc61..a951a70 100644
--- a/drivers/iommu/amd_iommu_proto.h
+++ b/drivers/iommu/amd_iommu_proto.h
@@ -43,6 +43,10 @@ extern int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids);
extern int amd_iommu_flush_page(struct iommu_domain *dom, int pasid,
u64 address);
extern int amd_iommu_flush_tlb(struct iommu_domain *dom, int pasid);
+extern int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, int pasid,
+ unsigned long cr3);
+extern int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, int pasid);
+
#ifndef CONFIG_AMD_IOMMU_STATS
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index 64a7642..68937a4 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -279,6 +279,7 @@
#define DTE_GCR3_SHIFT_B 16
#define DTE_GCR3_SHIFT_C 43
+#define GCR3_VALID 0x01ULL
#define IOMMU_PAGE_MASK (((1ULL << 52) - 1) & ~0xfffULL)
#define IOMMU_IO_PAGE_MASK (~(0xfffULL))
--
1.7.5.4
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 13/22] iommu/amd: Implement function to send PPR completions
2011-12-05 13:34 [PATCH 0/22] Initial AMD IOMMUv2 support Joerg Roedel
` (11 preceding siblings ...)
2011-12-05 13:34 ` [PATCH 12/22] iommu/amd: Implement functions to manage GCR3 table Joerg Roedel
@ 2011-12-05 13:34 ` Joerg Roedel
2011-12-05 13:34 ` [PATCH 14/22] iommu/amd: Add function to get IOMMUv2 domain for pdev Joerg Roedel
` (10 subsequent siblings)
23 siblings, 0 replies; 28+ messages in thread
From: Joerg Roedel @ 2011-12-05 13:34 UTC (permalink / raw)
To: iommu; +Cc: linux-kernel, Joerg Roedel
To send completions for PPR requests this patch adds a
function which can be used by the IOMMUv2 driver.
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
drivers/iommu/amd_iommu.c | 51 +++++++++++++++++++++++++++++++++++++++
drivers/iommu/amd_iommu_proto.h | 6 ++++
drivers/iommu/amd_iommu_types.h | 6 ++++
3 files changed, 63 insertions(+), 0 deletions(-)
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index faf566c..e363180 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -736,6 +736,22 @@ static void build_inv_iotlb_pasid(struct iommu_cmd *cmd, u16 devid, int pasid,
CMD_SET_TYPE(cmd, CMD_INV_IOTLB_PAGES);
}
+static void build_complete_ppr(struct iommu_cmd *cmd, u16 devid, int pasid,
+ int status, int tag, bool gn)
+{
+ memset(cmd, 0, sizeof(*cmd));
+
+ cmd->data[0] = devid;
+ if (gn) {
+ cmd->data[1] = pasid & PASID_MASK;
+ cmd->data[2] = CMD_INV_IOMMU_PAGES_GN_MASK;
+ }
+ cmd->data[3] = tag & 0x1ff;
+ cmd->data[3] |= (status & PPR_STATUS_MASK) << PPR_STATUS_SHIFT;
+
+ CMD_SET_TYPE(cmd, CMD_COMPLETE_PPR);
+}
+
static void build_inv_all(struct iommu_cmd *cmd)
{
memset(cmd, 0, sizeof(*cmd));
@@ -1950,6 +1966,23 @@ out_err:
return ret;
}
+/* FIXME: Move this to PCI code */
+#define PCI_PRI_TLP_OFF (1 << 2)
+
+bool pci_pri_tlp_required(struct pci_dev *pdev)
+{
+ u16 control;
+ int pos;
+
+ pos = pci_find_ext_capability(pdev, PCI_PRI_CAP);
+ if (!pos)
+ return false;
+
+ pci_read_config_word(pdev, pos + PCI_PRI_CONTROL_OFF, &control);
+
+ return (control & PCI_PRI_TLP_OFF) ? true : false;
+}
+
/*
* If a device is not yet associated with a domain, this function does
* assigns it visible for the hardware
@@ -1973,6 +2006,7 @@ static int attach_device(struct device *dev,
dev_data->ats.enabled = true;
dev_data->ats.qdep = pci_ats_queue_depth(pdev);
+ dev_data->pri_tlp = pci_pri_tlp_required(pdev);
} else if (amd_iommu_iotlb_sup &&
pci_enable_ats(pdev, PAGE_SHIFT) == 0) {
dev_data->ats.enabled = true;
@@ -3412,3 +3446,20 @@ int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, int pasid)
return ret;
}
EXPORT_SYMBOL(amd_iommu_domain_clear_gcr3);
+
+int amd_iommu_complete_ppr(struct pci_dev *pdev, int pasid,
+ int status, int tag)
+{
+ struct iommu_dev_data *dev_data;
+ struct amd_iommu *iommu;
+ struct iommu_cmd cmd;
+
+ dev_data = get_dev_data(&pdev->dev);
+ iommu = amd_iommu_rlookup_table[dev_data->devid];
+
+ build_complete_ppr(&cmd, dev_data->devid, pasid, status,
+ tag, dev_data->pri_tlp);
+
+ return iommu_queue_command(iommu, &cmd);
+}
+EXPORT_SYMBOL(amd_iommu_complete_ppr);
diff --git a/drivers/iommu/amd_iommu_proto.h b/drivers/iommu/amd_iommu_proto.h
index a951a70..bb5ecfe 100644
--- a/drivers/iommu/amd_iommu_proto.h
+++ b/drivers/iommu/amd_iommu_proto.h
@@ -47,6 +47,12 @@ extern int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, int pasid,
unsigned long cr3);
extern int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, int pasid);
+#define PPR_SUCCESS 0x0
+#define PPR_INVALID 0x1
+#define PPR_FAILURE 0xf
+
+extern int amd_iommu_complete_ppr(struct pci_dev *pdev, int pasid,
+ int status, int tag);
#ifndef CONFIG_AMD_IOMMU_STATS
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index 68937a4..e21c74a 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -142,6 +142,7 @@
#define CMD_INV_DEV_ENTRY 0x02
#define CMD_INV_IOMMU_PAGES 0x03
#define CMD_INV_IOTLB_PAGES 0x04
+#define CMD_COMPLETE_PPR 0x07
#define CMD_INV_ALL 0x08
#define CMD_COMPL_WAIT_STORE_MASK 0x01
@@ -150,6 +151,9 @@
#define CMD_INV_IOMMU_PAGES_PDE_MASK 0x02
#define CMD_INV_IOMMU_PAGES_GN_MASK 0x04
+#define PPR_STATUS_MASK 0xf
+#define PPR_STATUS_SHIFT 12
+
#define CMD_INV_IOMMU_ALL_PAGES_ADDRESS 0x7fffffffffffffffULL
/* macros and definitions for device table entries */
@@ -395,6 +399,8 @@ struct iommu_dev_data {
bool enabled;
int qdep;
} ats; /* ATS state */
+ bool pri_tlp; /* PASID TLB required for
+ PPR completions */
};
/*
--
1.7.5.4
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 14/22] iommu/amd: Add function to get IOMMUv2 domain for pdev
2011-12-05 13:34 [PATCH 0/22] Initial AMD IOMMUv2 support Joerg Roedel
` (12 preceding siblings ...)
2011-12-05 13:34 ` [PATCH 13/22] iommu/amd: Implement function to send PPR completions Joerg Roedel
@ 2011-12-05 13:34 ` Joerg Roedel
2011-12-05 13:34 ` [PATCH 15/22] iommu/amd: Add device errata handling Joerg Roedel
` (9 subsequent siblings)
23 siblings, 0 replies; 28+ messages in thread
From: Joerg Roedel @ 2011-12-05 13:34 UTC (permalink / raw)
To: iommu; +Cc: linux-kernel, Joerg Roedel
The AMD IOMMUv2 driver needs to get the IOMMUv2 domain
associated with a particular device. This patch adds a
function to get this information.
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
drivers/iommu/amd_iommu.c | 18 ++++++++++++++++++
drivers/iommu/amd_iommu_proto.h | 1 +
drivers/iommu/amd_iommu_types.h | 4 ++++
3 files changed, 23 insertions(+), 0 deletions(-)
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index e363180..dd66211 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -2958,6 +2958,8 @@ static int amd_iommu_domain_init(struct iommu_domain *dom)
if (!domain->pt_root)
goto out_free;
+ domain->iommu_domain = dom;
+
dom->priv = domain;
return 0;
@@ -3463,3 +3465,19 @@ int amd_iommu_complete_ppr(struct pci_dev *pdev, int pasid,
return iommu_queue_command(iommu, &cmd);
}
EXPORT_SYMBOL(amd_iommu_complete_ppr);
+
+struct iommu_domain *amd_iommu_get_v2_domain(struct pci_dev *pdev)
+{
+ struct protection_domain *domain;
+
+ domain = get_domain(&pdev->dev);
+ if (IS_ERR(domain))
+ return NULL;
+
+ /* Only return IOMMUv2 domains */
+ if (!(domain->flags & PD_IOMMUV2_MASK))
+ return NULL;
+
+ return domain->iommu_domain;
+}
+EXPORT_SYMBOL(amd_iommu_get_v2_domain);
diff --git a/drivers/iommu/amd_iommu_proto.h b/drivers/iommu/amd_iommu_proto.h
index bb5ecfe..1a7f41c 100644
--- a/drivers/iommu/amd_iommu_proto.h
+++ b/drivers/iommu/amd_iommu_proto.h
@@ -46,6 +46,7 @@ extern int amd_iommu_flush_tlb(struct iommu_domain *dom, int pasid);
extern int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, int pasid,
unsigned long cr3);
extern int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, int pasid);
+extern struct iommu_domain *amd_iommu_get_v2_domain(struct pci_dev *pdev);
#define PPR_SUCCESS 0x0
#define PPR_INVALID 0x1
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index e21c74a..2ba7b9a 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -361,6 +361,8 @@ struct amd_iommu_fault {
#define PPR_FAULT_RSVD (1 << 7)
#define PPR_FAULT_GN (1 << 8)
+struct iommu_domain;
+
/*
* This structure contains generic data for IOMMU protection domains
* independent of their use.
@@ -380,6 +382,8 @@ struct protection_domain {
unsigned dev_cnt; /* devices assigned to this domain */
unsigned dev_iommu[MAX_IOMMUS]; /* per-IOMMU reference count */
void *priv; /* private data */
+ struct iommu_domain *iommu_domain; /* Pointer to generic
+ domain structure */
};
--
1.7.5.4
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 15/22] iommu/amd: Add device errata handling
2011-12-05 13:34 [PATCH 0/22] Initial AMD IOMMUv2 support Joerg Roedel
` (13 preceding siblings ...)
2011-12-05 13:34 ` [PATCH 14/22] iommu/amd: Add function to get IOMMUv2 domain for pdev Joerg Roedel
@ 2011-12-05 13:34 ` Joerg Roedel
2011-12-05 13:34 ` [PATCH 16/22] iommu/amd: Add stat counter for IOMMUv2 events Joerg Roedel
` (8 subsequent siblings)
23 siblings, 0 replies; 28+ messages in thread
From: Joerg Roedel @ 2011-12-05 13:34 UTC (permalink / raw)
To: iommu; +Cc: linux-kernel, Joerg Roedel
Add infrastructure for errata-handling and handle two known
erratas in the IOMMUv2 code.
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
drivers/iommu/amd_iommu.c | 57 ++++++++++++++++++++++++++++++++++++--
drivers/iommu/amd_iommu_types.h | 1 +
include/linux/amd-iommu.h | 18 ++++++++++++
3 files changed, 73 insertions(+), 3 deletions(-)
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index dd66211..7bd1565 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -172,6 +172,15 @@ static bool pci_iommuv2_capable(struct pci_dev *pdev)
return true;
}
+static bool pdev_pri_erratum(struct pci_dev *pdev, u32 erratum)
+{
+ struct iommu_dev_data *dev_data;
+
+ dev_data = get_dev_data(&pdev->dev);
+
+ return dev_data->errata & (1 << erratum) ? true : false;
+}
+
/*
* In this function the list of preallocated protection domains is traversed to
* find the domain for a specific device
@@ -1934,9 +1943,33 @@ static void pdev_iommuv2_disable(struct pci_dev *pdev)
pci_disable_pasid(pdev);
}
+/* FIXME: Change generic reset-function to do the same */
+static int pri_reset_while_enabled(struct pci_dev *pdev)
+{
+ u16 control;
+ int pos;
+
+ pos = pci_find_ext_capability(pdev, PCI_PRI_CAP);
+ if (!pos)
+ return -EINVAL;
+
+ pci_read_config_word(pdev, pos + PCI_PRI_CONTROL_OFF, &control);
+ control |= PCI_PRI_RESET;
+ pci_write_config_word(pdev, pos + PCI_PRI_CONTROL_OFF, control);
+
+ return 0;
+}
+
static int pdev_iommuv2_enable(struct pci_dev *pdev)
{
- int ret;
+ bool reset_enable;
+ int reqs, ret;
+
+ /* FIXME: Hardcode number of outstanding requests for now */
+ reqs = 32;
+ if (pdev_pri_erratum(pdev, AMD_PRI_DEV_ERRATUM_LIMIT_REQ_ONE))
+ reqs = 1;
+ reset_enable = pdev_pri_erratum(pdev, AMD_PRI_DEV_ERRATUM_ENABLE_RESET);
/* Only allow access to user-accessible pages */
ret = pci_enable_pasid(pdev, 0);
@@ -1948,11 +1981,17 @@ static int pdev_iommuv2_enable(struct pci_dev *pdev)
if (ret)
goto out_err;
- /* FIXME: Hardcode number of outstanding requests for now */
- ret = pci_enable_pri(pdev, 32);
+ /* Enable PRI */
+ ret = pci_enable_pri(pdev, reqs);
if (ret)
goto out_err;
+ if (reset_enable) {
+ ret = pri_reset_while_enabled(pdev);
+ if (ret)
+ goto out_err;
+ }
+
ret = pci_enable_ats(pdev, PAGE_SHIFT);
if (ret)
goto out_err;
@@ -3481,3 +3520,15 @@ struct iommu_domain *amd_iommu_get_v2_domain(struct pci_dev *pdev)
return domain->iommu_domain;
}
EXPORT_SYMBOL(amd_iommu_get_v2_domain);
+
+void amd_iommu_enable_device_erratum(struct pci_dev *pdev, u32 erratum)
+{
+ struct iommu_dev_data *dev_data;
+
+ if (!amd_iommu_v2_supported())
+ return;
+
+ dev_data = get_dev_data(&pdev->dev);
+ dev_data->errata |= (1 << erratum);
+}
+EXPORT_SYMBOL(amd_iommu_enable_device_erratum);
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index 2ba7b9a..90d149c 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -405,6 +405,7 @@ struct iommu_dev_data {
} ats; /* ATS state */
bool pri_tlp; /* PASID TLB required for
PPR completions */
+ u32 errata; /* Bitmap for errata to apply */
};
/*
diff --git a/include/linux/amd-iommu.h b/include/linux/amd-iommu.h
index a6863a2..4152c30 100644
--- a/include/linux/amd-iommu.h
+++ b/include/linux/amd-iommu.h
@@ -26,6 +26,24 @@
extern int amd_iommu_detect(void);
+
+/**
+ * amd_iommu_enable_device_erratum() - Enable erratum workaround for device
+ * in the IOMMUv2 driver
+ * @pdev: The PCI device the workaround is necessary for
+ * @erratum: The erratum workaround to enable
+ *
+ * Possible values for the erratum number are for now:
+ * - AMD_PRI_DEV_ERRATUM_ENABLE_RESET - Reset PRI capability when PRI
+ * is enabled
+ * - AMD_PRI_DEV_ERRATUM_LIMIT_REQ_ONE - Limit number of outstanding PRI
+ * requests to one
+ */
+#define AMD_PRI_DEV_ERRATUM_ENABLE_RESET 0
+#define AMD_PRI_DEV_ERRATUM_LIMIT_REQ_ONE 1
+
+extern void amd_iommu_enable_device_erratum(struct pci_dev *pdev, u32 erratum);
+
#else
static inline int amd_iommu_detect(void) { return -ENODEV; }
--
1.7.5.4
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 16/22] iommu/amd: Add stat counter for IOMMUv2 events
2011-12-05 13:34 [PATCH 0/22] Initial AMD IOMMUv2 support Joerg Roedel
` (14 preceding siblings ...)
2011-12-05 13:34 ` [PATCH 15/22] iommu/amd: Add device errata handling Joerg Roedel
@ 2011-12-05 13:34 ` Joerg Roedel
2011-12-05 13:34 ` [PATCH 17/22] iommu/amd: Add driver stub for AMD IOMMUv2 support Joerg Roedel
` (7 subsequent siblings)
23 siblings, 0 replies; 28+ messages in thread
From: Joerg Roedel @ 2011-12-05 13:34 UTC (permalink / raw)
To: iommu; +Cc: linux-kernel, Joerg Roedel
Add some interesting statistic counters for events when
IOMMUv2 is active.
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
drivers/iommu/amd_iommu.c | 17 +++++++++++++++++
1 files changed, 17 insertions(+), 0 deletions(-)
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 7bd1565..88b16c0 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -359,6 +359,11 @@ DECLARE_STATS_COUNTER(domain_flush_single);
DECLARE_STATS_COUNTER(domain_flush_all);
DECLARE_STATS_COUNTER(alloced_io_mem);
DECLARE_STATS_COUNTER(total_map_requests);
+DECLARE_STATS_COUNTER(complete_ppr);
+DECLARE_STATS_COUNTER(invalidate_iotlb);
+DECLARE_STATS_COUNTER(invalidate_iotlb_all);
+DECLARE_STATS_COUNTER(pri_requests);
+
static struct dentry *stats_dir;
static struct dentry *de_fflush;
@@ -393,6 +398,10 @@ static void amd_iommu_stats_init(void)
amd_iommu_stats_add(&domain_flush_all);
amd_iommu_stats_add(&alloced_io_mem);
amd_iommu_stats_add(&total_map_requests);
+ amd_iommu_stats_add(&complete_ppr);
+ amd_iommu_stats_add(&invalidate_iotlb);
+ amd_iommu_stats_add(&invalidate_iotlb_all);
+ amd_iommu_stats_add(&pri_requests);
}
#endif
@@ -509,6 +518,8 @@ static void iommu_handle_ppr_entry(struct amd_iommu *iommu, u32 head)
volatile u64 *raw;
int i;
+ INC_STATS_COUNTER(pri_requests);
+
raw = (u64 *)(iommu->ppr_log + head);
/*
@@ -3356,6 +3367,8 @@ out:
static int __amd_iommu_flush_page(struct protection_domain *domain, int pasid,
u64 address)
{
+ INC_STATS_COUNTER(invalidate_iotlb);
+
return __flush_pasid(domain, pasid, address, false);
}
@@ -3376,6 +3389,8 @@ EXPORT_SYMBOL(amd_iommu_flush_page);
static int __amd_iommu_flush_tlb(struct protection_domain *domain, int pasid)
{
+ INC_STATS_COUNTER(invalidate_iotlb_all);
+
return __flush_pasid(domain, pasid, CMD_INV_IOMMU_ALL_PAGES_ADDRESS,
true);
}
@@ -3495,6 +3510,8 @@ int amd_iommu_complete_ppr(struct pci_dev *pdev, int pasid,
struct amd_iommu *iommu;
struct iommu_cmd cmd;
+ INC_STATS_COUNTER(complete_ppr);
+
dev_data = get_dev_data(&pdev->dev);
iommu = amd_iommu_rlookup_table[dev_data->devid];
--
1.7.5.4
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 17/22] iommu/amd: Add driver stub for AMD IOMMUv2 support
2011-12-05 13:34 [PATCH 0/22] Initial AMD IOMMUv2 support Joerg Roedel
` (15 preceding siblings ...)
2011-12-05 13:34 ` [PATCH 16/22] iommu/amd: Add stat counter for IOMMUv2 events Joerg Roedel
@ 2011-12-05 13:34 ` Joerg Roedel
2011-12-05 13:34 ` [PATCH 18/22] iommu/amd: Implement device aquisition code for IOMMUv2 Joerg Roedel
` (6 subsequent siblings)
23 siblings, 0 replies; 28+ messages in thread
From: Joerg Roedel @ 2011-12-05 13:34 UTC (permalink / raw)
To: iommu; +Cc: linux-kernel, Joerg Roedel
Add a Kconfig option for the optional driver. Since it is
optional it can be compiled as a module and will only be
loaded when required by another driver.
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
drivers/iommu/Kconfig | 8 ++++++++
drivers/iommu/Makefile | 1 +
drivers/iommu/amd_iommu_v2.c | 36 ++++++++++++++++++++++++++++++++++++
3 files changed, 45 insertions(+), 0 deletions(-)
create mode 100644 drivers/iommu/amd_iommu_v2.c
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index 220dfc2..e608a36 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -60,6 +60,14 @@ config AMD_IOMMU_STATS
information to userspace via debugfs.
If unsure, say N.
+config AMD_IOMMU_V2
+ tristate "AMD IOMMU Version 2 driver (EXPERIMENTAL)"
+ depends on AMD_IOMMU && EXPERIMENTAL
+ ---help---
+ This option enables support for the AMD IOMMUv2 features of the IOMMU
+ hardware. Select this option if you want to use devices that support
+ the the PCI PRI and PASID interface.
+
# Intel IOMMU support
config DMAR_TABLE
bool
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index 2f44487..0e36b49 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -1,6 +1,7 @@
obj-$(CONFIG_IOMMU_API) += iommu.o
obj-$(CONFIG_MSM_IOMMU) += msm_iommu.o msm_iommu_dev.o
obj-$(CONFIG_AMD_IOMMU) += amd_iommu.o amd_iommu_init.o
+obj-$(CONFIG_AMD_IOMMU_V2) += amd_iommu_v2.o
obj-$(CONFIG_DMAR_TABLE) += dmar.o
obj-$(CONFIG_INTEL_IOMMU) += iova.o intel-iommu.o
obj-$(CONFIG_IRQ_REMAP) += intr_remapping.o
diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c
new file mode 100644
index 0000000..a19e07d
--- /dev/null
+++ b/drivers/iommu/amd_iommu_v2.c
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2010-2012 Advanced Micro Devices, Inc.
+ * Author: Joerg Roedel <joerg.roedel@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/module.h>
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Joerg Roedel <joerg.roedel@amd.com>");
+
+static int __init amd_iommu_v2_init(void)
+{
+ pr_info("AMD IOMMUv2 driver by Joerg Roedel <joerg.roedel@amd.com>");
+
+ return 0;
+}
+
+static void __exit amd_iommu_v2_exit(void)
+{
+}
+
+module_init(amd_iommu_v2_init);
+module_exit(amd_iommu_v2_exit);
--
1.7.5.4
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 18/22] iommu/amd: Implement device aquisition code for IOMMUv2
2011-12-05 13:34 [PATCH 0/22] Initial AMD IOMMUv2 support Joerg Roedel
` (16 preceding siblings ...)
2011-12-05 13:34 ` [PATCH 17/22] iommu/amd: Add driver stub for AMD IOMMUv2 support Joerg Roedel
@ 2011-12-05 13:34 ` Joerg Roedel
2011-12-05 13:34 ` [PATCH 19/22] iommu/amd: Add routines to bind/unbind a pasid Joerg Roedel
` (5 subsequent siblings)
23 siblings, 0 replies; 28+ messages in thread
From: Joerg Roedel @ 2011-12-05 13:34 UTC (permalink / raw)
To: iommu; +Cc: linux-kernel, Joerg Roedel
This patch adds the amd_iommu_init_device() and
amd_iommu_free_device() functions which make a device and
the IOMMU ready for IOMMUv2 usage.
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
drivers/iommu/amd_iommu_v2.c | 213 ++++++++++++++++++++++++++++++++++++++++++
include/linux/amd-iommu.h | 23 +++++-
2 files changed, 235 insertions(+), 1 deletions(-)
diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c
index a19e07d..e0ec952 100644
--- a/drivers/iommu/amd_iommu_v2.c
+++ b/drivers/iommu/amd_iommu_v2.c
@@ -16,20 +16,233 @@
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
+#include <linux/amd-iommu.h>
+#include <linux/mm_types.h>
#include <linux/module.h>
+#include <linux/iommu.h>
+#include <linux/pci.h>
+#include <linux/gfp.h>
+
+#include "amd_iommu_proto.h"
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Joerg Roedel <joerg.roedel@amd.com>");
+#define MAX_DEVICES 0x10000
+#define PRI_QUEUE_SIZE 512
+
+struct pri_queue {
+ atomic_t inflight;
+ bool finish;
+};
+
+struct pasid_state {
+ struct list_head list; /* For global state-list */
+ atomic_t count; /* Reference count */
+ struct task_struct *task; /* Task bound to this PASID */
+ struct mm_struct *mm; /* mm_struct for the faults */
+ struct pri_queue pri[PRI_QUEUE_SIZE]; /* PRI tag states */
+ struct device_state *device_state; /* Link to our device_state */
+ int pasid; /* PASID index */
+};
+
+struct device_state {
+ atomic_t count;
+ struct pci_dev *pdev;
+ struct pasid_state **states;
+ struct iommu_domain *domain;
+ int pasid_levels;
+ int max_pasids;
+ spinlock_t lock;
+};
+
+struct device_state **state_table;
+static spinlock_t state_lock;
+
+/* List and lock for all pasid_states */
+static LIST_HEAD(pasid_state_list);
+static DEFINE_SPINLOCK(ps_lock);
+
+static u16 device_id(struct pci_dev *pdev)
+{
+ u16 devid;
+
+ devid = pdev->bus->number;
+ devid = (devid << 8) | pdev->devfn;
+
+ return devid;
+}
+
+static struct device_state *get_device_state(u16 devid)
+{
+ struct device_state *dev_state;
+ unsigned long flags;
+
+ spin_lock_irqsave(&state_lock, flags);
+ dev_state = state_table[devid];
+ if (dev_state != NULL)
+ atomic_inc(&dev_state->count);
+ spin_unlock_irqrestore(&state_lock, flags);
+
+ return dev_state;
+}
+
+static void free_device_state(struct device_state *dev_state)
+{
+ iommu_detach_device(dev_state->domain, &dev_state->pdev->dev);
+ iommu_domain_free(dev_state->domain);
+ kfree(dev_state);
+}
+
+static void put_device_state(struct device_state *dev_state)
+{
+ if (atomic_dec_and_test(&dev_state->count))
+ free_device_state(dev_state);
+}
+
+int amd_iommu_init_device(struct pci_dev *pdev, int pasids)
+{
+ struct device_state *dev_state;
+ unsigned long flags;
+ int ret, tmp;
+ u16 devid;
+
+ might_sleep();
+
+ if (!amd_iommu_v2_supported())
+ return -ENODEV;
+
+ if (pasids <= 0 || pasids & ~PASID_MASK)
+ return -EINVAL;
+
+ devid = device_id(pdev);
+
+ dev_state = kzalloc(sizeof(*dev_state), GFP_KERNEL);
+ if (dev_state == NULL)
+ return -ENOMEM;
+
+ spin_lock_init(&dev_state->lock);
+ dev_state->pdev = pdev;
+
+ tmp = pasids;
+ while (tmp & ~0x1ff) {
+ dev_state->pasid_levels += 1;
+ tmp >>= 9;
+ }
+
+ atomic_set(&dev_state->count, 1);
+ dev_state->max_pasids = pasids;
+
+ ret = -ENOMEM;
+ dev_state->states = (void *)get_zeroed_page(GFP_KERNEL);
+ if (dev_state->states == NULL)
+ goto out_free_dev_state;
+
+ dev_state->domain = iommu_domain_alloc(&pci_bus_type);
+ if (dev_state->domain == NULL)
+ goto out_free_states;
+
+ amd_iommu_domain_direct_map(dev_state->domain);
+
+ ret = amd_iommu_domain_enable_v2(dev_state->domain, pasids);
+ if (ret)
+ goto out_free_domain;
+
+ ret = iommu_attach_device(dev_state->domain, &pdev->dev);
+ if (ret != 0)
+ goto out_free_domain;
+
+ spin_lock_irqsave(&state_lock, flags);
+
+ if (state_table[devid] != NULL) {
+ spin_unlock_irqrestore(&state_lock, flags);
+ ret = -EBUSY;
+ goto out_free_domain;
+ }
+
+ state_table[devid] = dev_state;
+
+ spin_unlock_irqrestore(&state_lock, flags);
+
+ return 0;
+
+out_free_domain:
+ iommu_domain_free(dev_state->domain);
+
+out_free_states:
+ free_page((unsigned long)dev_state->states);
+
+out_free_dev_state:
+ kfree(dev_state);
+
+ return ret;
+}
+EXPORT_SYMBOL(amd_iommu_init_device);
+
+void amd_iommu_free_device(struct pci_dev *pdev)
+{
+ struct device_state *dev_state;
+ unsigned long flags;
+ u16 devid;
+
+ if (!amd_iommu_v2_supported())
+ return;
+
+ devid = device_id(pdev);
+
+ spin_lock_irqsave(&state_lock, flags);
+
+ dev_state = state_table[devid];
+ if (dev_state == NULL) {
+ spin_unlock_irqrestore(&state_lock, flags);
+ return;
+ }
+
+ state_table[devid] = NULL;
+
+ spin_unlock_irqrestore(&state_lock, flags);
+
+ put_device_state(dev_state);
+}
+EXPORT_SYMBOL(amd_iommu_free_device);
+
static int __init amd_iommu_v2_init(void)
{
+ size_t state_table_size;
+
pr_info("AMD IOMMUv2 driver by Joerg Roedel <joerg.roedel@amd.com>");
+ spin_lock_init(&state_lock);
+
+ state_table_size = MAX_DEVICES * sizeof(struct device_state *);
+ state_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+ get_order(state_table_size));
+ if (state_table == NULL)
+ return -ENOMEM;
+
return 0;
}
static void __exit amd_iommu_v2_exit(void)
{
+ struct device_state *dev_state;
+ size_t state_table_size;
+ int i;
+
+ for (i = 0; i < MAX_DEVICES; ++i) {
+ dev_state = get_device_state(i);
+
+ if (dev_state == NULL)
+ continue;
+
+ WARN_ON_ONCE(1);
+
+ amd_iommu_free_device(dev_state->pdev);
+ put_device_state(dev_state);
+ }
+
+ state_table_size = MAX_DEVICES * sizeof(struct device_state *);
+ free_pages((unsigned long)state_table, get_order(state_table_size));
}
module_init(amd_iommu_v2_init);
diff --git a/include/linux/amd-iommu.h b/include/linux/amd-iommu.h
index 4152c30..e8c7a2e 100644
--- a/include/linux/amd-iommu.h
+++ b/include/linux/amd-iommu.h
@@ -20,10 +20,12 @@
#ifndef _ASM_X86_AMD_IOMMU_H
#define _ASM_X86_AMD_IOMMU_H
-#include <linux/irqreturn.h>
+#include <linux/types.h>
#ifdef CONFIG_AMD_IOMMU
+struct pci_dev;
+
extern int amd_iommu_detect(void);
@@ -33,6 +35,7 @@ extern int amd_iommu_detect(void);
* @pdev: The PCI device the workaround is necessary for
* @erratum: The erratum workaround to enable
*
+ * The function needs to be called before amd_iommu_init_device().
* Possible values for the erratum number are for now:
* - AMD_PRI_DEV_ERRATUM_ENABLE_RESET - Reset PRI capability when PRI
* is enabled
@@ -44,6 +47,24 @@ extern int amd_iommu_detect(void);
extern void amd_iommu_enable_device_erratum(struct pci_dev *pdev, u32 erratum);
+/**
+ * amd_iommu_init_device() - Init device for use with IOMMUv2 driver
+ * @pdev: The PCI device to initialize
+ * @pasids: Number of PASIDs to support for this device
+ *
+ * This function does all setup for the device pdev so that it can be
+ * used with IOMMUv2.
+ * Returns 0 on success or negative value on error.
+ */
+extern int amd_iommu_init_device(struct pci_dev *pdev, int pasids);
+
+/**
+ * amd_iommu_free_device() - Free all IOMMUv2 related device resources
+ * and disable IOMMUv2 usage for this device
+ * @pdev: The PCI device to disable IOMMUv2 usage for'
+ */
+extern void amd_iommu_free_device(struct pci_dev *pdev);
+
#else
static inline int amd_iommu_detect(void) { return -ENODEV; }
--
1.7.5.4
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 19/22] iommu/amd: Add routines to bind/unbind a pasid
2011-12-05 13:34 [PATCH 0/22] Initial AMD IOMMUv2 support Joerg Roedel
` (17 preceding siblings ...)
2011-12-05 13:34 ` [PATCH 18/22] iommu/amd: Implement device aquisition code for IOMMUv2 Joerg Roedel
@ 2011-12-05 13:34 ` Joerg Roedel
2011-12-05 13:34 ` [PATCH 20/22] iommu/amd: Implement IO page-fault handler Joerg Roedel
` (4 subsequent siblings)
23 siblings, 0 replies; 28+ messages in thread
From: Joerg Roedel @ 2011-12-05 13:34 UTC (permalink / raw)
To: iommu; +Cc: linux-kernel, Joerg Roedel
This patch adds routines to bind a specific process
address-space to a given PASID.
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
drivers/iommu/amd_iommu_v2.c | 301 ++++++++++++++++++++++++++++++++++++++++++
include/linux/amd-iommu.h | 26 ++++
2 files changed, 327 insertions(+), 0 deletions(-)
diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c
index e0ec952..f54b991 100644
--- a/drivers/iommu/amd_iommu_v2.c
+++ b/drivers/iommu/amd_iommu_v2.c
@@ -19,6 +19,7 @@
#include <linux/amd-iommu.h>
#include <linux/mm_types.h>
#include <linux/module.h>
+#include <linux/sched.h>
#include <linux/iommu.h>
#include <linux/pci.h>
#include <linux/gfp.h>
@@ -63,6 +64,9 @@ static spinlock_t state_lock;
static LIST_HEAD(pasid_state_list);
static DEFINE_SPINLOCK(ps_lock);
+static void free_pasid_states(struct device_state *dev_state);
+static void unbind_pasid(struct device_state *dev_state, int pasid);
+
static u16 device_id(struct pci_dev *pdev)
{
u16 devid;
@@ -89,8 +93,16 @@ static struct device_state *get_device_state(u16 devid)
static void free_device_state(struct device_state *dev_state)
{
+ /*
+ * First detach device from domain - No more PRI requests will arrive
+ * from that device after it is unbound from the IOMMUv2 domain.
+ */
iommu_detach_device(dev_state->domain, &dev_state->pdev->dev);
+
+ /* Everything is down now, free the IOMMUv2 domain */
iommu_domain_free(dev_state->domain);
+
+ /* Finally get rid of the device-state */
kfree(dev_state);
}
@@ -100,6 +112,292 @@ static void put_device_state(struct device_state *dev_state)
free_device_state(dev_state);
}
+static void link_pasid_state(struct pasid_state *pasid_state)
+{
+ spin_lock(&ps_lock);
+ list_add_tail(&pasid_state->list, &pasid_state_list);
+ spin_unlock(&ps_lock);
+}
+
+static void __unlink_pasid_state(struct pasid_state *pasid_state)
+{
+ list_del(&pasid_state->list);
+}
+
+static void unlink_pasid_state(struct pasid_state *pasid_state)
+{
+ spin_lock(&ps_lock);
+ __unlink_pasid_state(pasid_state);
+ spin_unlock(&ps_lock);
+}
+
+/* Must be called under dev_state->lock */
+static struct pasid_state **__get_pasid_state_ptr(struct device_state *dev_state,
+ int pasid, bool alloc)
+{
+ struct pasid_state **root, **ptr;
+ int level, index;
+
+ level = dev_state->pasid_levels;
+ root = dev_state->states;
+
+ while (true) {
+
+ index = (pasid >> (9 * level)) & 0x1ff;
+ ptr = &root[index];
+
+ if (level == 0)
+ break;
+
+ if (*ptr == NULL) {
+ if (!alloc)
+ return NULL;
+
+ *ptr = (void *)get_zeroed_page(GFP_ATOMIC);
+ if (*ptr == NULL)
+ return NULL;
+ }
+
+ root = (struct pasid_state **)*ptr;
+ level -= 1;
+ }
+
+ return ptr;
+}
+
+static int set_pasid_state(struct device_state *dev_state,
+ struct pasid_state *pasid_state,
+ int pasid)
+{
+ struct pasid_state **ptr;
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&dev_state->lock, flags);
+ ptr = __get_pasid_state_ptr(dev_state, pasid, true);
+
+ ret = -ENOMEM;
+ if (ptr == NULL)
+ goto out_unlock;
+
+ ret = -ENOMEM;
+ if (*ptr != NULL)
+ goto out_unlock;
+
+ *ptr = pasid_state;
+
+ ret = 0;
+
+out_unlock:
+ spin_unlock_irqrestore(&dev_state->lock, flags);
+
+ return ret;
+}
+
+static void clear_pasid_state(struct device_state *dev_state, int pasid)
+{
+ struct pasid_state **ptr;
+ unsigned long flags;
+
+ spin_lock_irqsave(&dev_state->lock, flags);
+ ptr = __get_pasid_state_ptr(dev_state, pasid, true);
+
+ if (ptr == NULL)
+ goto out_unlock;
+
+ *ptr = NULL;
+
+out_unlock:
+ spin_unlock_irqrestore(&dev_state->lock, flags);
+}
+
+static struct pasid_state *get_pasid_state(struct device_state *dev_state,
+ int pasid)
+{
+ struct pasid_state **ptr, *ret = NULL;
+ unsigned long flags;
+
+ spin_lock_irqsave(&dev_state->lock, flags);
+ ptr = __get_pasid_state_ptr(dev_state, pasid, false);
+
+ if (ptr == NULL)
+ goto out_unlock;
+
+ ret = *ptr;
+ if (ret)
+ atomic_inc(&ret->count);
+
+out_unlock:
+ spin_unlock_irqrestore(&dev_state->lock, flags);
+
+ return ret;
+}
+
+static void free_pasid_state(struct pasid_state *pasid_state)
+{
+ kfree(pasid_state);
+}
+
+static void put_pasid_state(struct pasid_state *pasid_state)
+{
+ if (atomic_dec_and_test(&pasid_state->count)) {
+ put_device_state(pasid_state->device_state);
+ free_pasid_state(pasid_state);
+ }
+}
+
+static void unbind_pasid(struct device_state *dev_state, int pasid)
+{
+ struct pasid_state *pasid_state;
+
+ pasid_state = get_pasid_state(dev_state, pasid);
+ if (pasid_state == NULL)
+ return;
+
+ unlink_pasid_state(pasid_state);
+
+ amd_iommu_domain_clear_gcr3(dev_state->domain, pasid);
+ clear_pasid_state(dev_state, pasid);
+
+ put_pasid_state(pasid_state); /* Reference taken in this function */
+ put_pasid_state(pasid_state); /* Reference taken in bind() function */
+}
+
+static void free_pasid_states_level1(struct pasid_state **tbl)
+{
+ int i;
+
+ for (i = 0; i < 512; ++i) {
+ if (tbl[i] == NULL)
+ continue;
+
+ free_page((unsigned long)tbl[i]);
+ }
+}
+
+static void free_pasid_states_level2(struct pasid_state **tbl)
+{
+ struct pasid_state **ptr;
+ int i;
+
+ for (i = 0; i < 512; ++i) {
+ if (tbl[i] == NULL)
+ continue;
+
+ ptr = (struct pasid_state **)tbl[i];
+ free_pasid_states_level1(ptr);
+ }
+}
+
+static void free_pasid_states(struct device_state *dev_state)
+{
+ struct pasid_state *pasid_state;
+ int i;
+
+ for (i = 0; i < dev_state->max_pasids; ++i) {
+ pasid_state = get_pasid_state(dev_state, i);
+ if (pasid_state == NULL)
+ continue;
+
+ unbind_pasid(dev_state, i);
+ put_pasid_state(pasid_state);
+ }
+
+ if (dev_state->pasid_levels == 2)
+ free_pasid_states_level2(dev_state->states);
+ else if (dev_state->pasid_levels == 1)
+ free_pasid_states_level1(dev_state->states);
+ else if (dev_state->pasid_levels != 0)
+ BUG();
+
+ free_page((unsigned long)dev_state->states);
+}
+
+int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid,
+ struct task_struct *task)
+{
+ struct pasid_state *pasid_state;
+ struct device_state *dev_state;
+ u16 devid;
+ int ret;
+
+ might_sleep();
+
+ if (!amd_iommu_v2_supported())
+ return -ENODEV;
+
+ devid = device_id(pdev);
+ dev_state = get_device_state(devid);
+
+ if (dev_state == NULL)
+ return -EINVAL;
+
+ ret = -EINVAL;
+ if (pasid < 0 || pasid >= dev_state->max_pasids)
+ goto out;
+
+ ret = -ENOMEM;
+ pasid_state = kzalloc(sizeof(*pasid_state), GFP_KERNEL);
+ if (pasid_state == NULL)
+ goto out;
+
+ atomic_set(&pasid_state->count, 1);
+ pasid_state->task = task;
+ pasid_state->mm = task->mm;
+ pasid_state->device_state = dev_state;
+ pasid_state->pasid = pasid;
+
+ ret = set_pasid_state(dev_state, pasid_state, pasid);
+ if (ret)
+ goto out_free;
+
+ ret = amd_iommu_domain_set_gcr3(dev_state->domain, pasid,
+ __pa(pasid_state->mm->pgd));
+ if (ret)
+ goto out_clear_state;
+
+ link_pasid_state(pasid_state);
+
+ return 0;
+
+out_clear_state:
+ clear_pasid_state(dev_state, pasid);
+
+out_free:
+ put_pasid_state(pasid_state);
+
+out:
+ put_device_state(dev_state);
+
+ return ret;
+}
+EXPORT_SYMBOL(amd_iommu_bind_pasid);
+
+void amd_iommu_unbind_pasid(struct pci_dev *pdev, int pasid)
+{
+ struct device_state *dev_state;
+ u16 devid;
+
+ might_sleep();
+
+ if (!amd_iommu_v2_supported())
+ return;
+
+ devid = device_id(pdev);
+ dev_state = get_device_state(devid);
+ if (dev_state == NULL)
+ return;
+
+ if (pasid < 0 || pasid >= dev_state->max_pasids)
+ goto out;
+
+ unbind_pasid(dev_state, pasid);
+
+out:
+ put_device_state(dev_state);
+}
+EXPORT_SYMBOL(amd_iommu_unbind_pasid);
+
int amd_iommu_init_device(struct pci_dev *pdev, int pasids)
{
struct device_state *dev_state;
@@ -202,6 +500,9 @@ void amd_iommu_free_device(struct pci_dev *pdev)
spin_unlock_irqrestore(&state_lock, flags);
+ /* Get rid of any remaining pasid states */
+ free_pasid_states(dev_state);
+
put_device_state(dev_state);
}
EXPORT_SYMBOL(amd_iommu_free_device);
diff --git a/include/linux/amd-iommu.h b/include/linux/amd-iommu.h
index e8c7a2e..23e21e1 100644
--- a/include/linux/amd-iommu.h
+++ b/include/linux/amd-iommu.h
@@ -24,9 +24,13 @@
#ifdef CONFIG_AMD_IOMMU
+struct task_struct;
struct pci_dev;
extern int amd_iommu_detect(void);
+extern int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid,
+ struct task_struct *task);
+extern void amd_iommu_unbind_pasid(struct pci_dev *pdev, int pasid);
/**
@@ -65,6 +69,28 @@ extern int amd_iommu_init_device(struct pci_dev *pdev, int pasids);
*/
extern void amd_iommu_free_device(struct pci_dev *pdev);
+/**
+ * amd_iommu_bind_pasid() - Bind a given task to a PASID on a device
+ * @pdev: The PCI device to bind the task to
+ * @pasid: The PASID on the device the task should be bound to
+ * @task: the task to bind
+ *
+ * The function returns 0 on success or a negative value on error.
+ */
+extern int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid,
+ struct task_struct *task);
+
+/**
+ * amd_iommu_unbind_pasid() - Unbind a PASID from its task on
+ * a device
+ * @pdev: The device of the PASID
+ * @pasid: The PASID to unbind
+ *
+ * When this function returns the device is no longer using the PASID
+ * and the PASID is no longer bound to its task.
+ */
+extern void amd_iommu_unbind_pasid(struct pci_dev *pdev, int pasid);
+
#else
static inline int amd_iommu_detect(void) { return -ENODEV; }
--
1.7.5.4
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 20/22] iommu/amd: Implement IO page-fault handler
2011-12-05 13:34 [PATCH 0/22] Initial AMD IOMMUv2 support Joerg Roedel
` (18 preceding siblings ...)
2011-12-05 13:34 ` [PATCH 19/22] iommu/amd: Add routines to bind/unbind a pasid Joerg Roedel
@ 2011-12-05 13:34 ` Joerg Roedel
2011-12-14 16:13 ` Jerome Glisse
2011-12-05 13:34 ` [PATCH 21/22] iommu/amd: Implement notifiers for IOMMUv2 Joerg Roedel
` (3 subsequent siblings)
23 siblings, 1 reply; 28+ messages in thread
From: Joerg Roedel @ 2011-12-05 13:34 UTC (permalink / raw)
To: iommu; +Cc: linux-kernel, Joerg Roedel
Register the notifier for PPR faults and handle them as
necessary.
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
drivers/iommu/amd_iommu_v2.c | 200 ++++++++++++++++++++++++++++++++++++++++--
1 files changed, 194 insertions(+), 6 deletions(-)
diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c
index f54b991..0f2ffff 100644
--- a/drivers/iommu/amd_iommu_v2.c
+++ b/drivers/iommu/amd_iommu_v2.c
@@ -21,9 +21,11 @@
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/iommu.h>
+#include <linux/wait.h>
#include <linux/pci.h>
#include <linux/gfp.h>
+#include "amd_iommu_types.h"
#include "amd_iommu_proto.h"
MODULE_LICENSE("GPL v2");
@@ -35,6 +37,7 @@ MODULE_AUTHOR("Joerg Roedel <joerg.roedel@amd.com>");
struct pri_queue {
atomic_t inflight;
bool finish;
+ int status;
};
struct pasid_state {
@@ -45,6 +48,8 @@ struct pasid_state {
struct pri_queue pri[PRI_QUEUE_SIZE]; /* PRI tag states */
struct device_state *device_state; /* Link to our device_state */
int pasid; /* PASID index */
+ spinlock_t lock; /* Protect pri_queues */
+ wait_queue_head_t wq; /* To wait for count == 0 */
};
struct device_state {
@@ -55,6 +60,20 @@ struct device_state {
int pasid_levels;
int max_pasids;
spinlock_t lock;
+ wait_queue_head_t wq;
+};
+
+struct fault {
+ struct work_struct work;
+ struct device_state *dev_state;
+ struct pasid_state *state;
+ struct mm_struct *mm;
+ u64 address;
+ u16 devid;
+ u16 pasid;
+ u16 tag;
+ u16 finish;
+ u16 flags;
};
struct device_state **state_table;
@@ -64,6 +83,8 @@ static spinlock_t state_lock;
static LIST_HEAD(pasid_state_list);
static DEFINE_SPINLOCK(ps_lock);
+static struct workqueue_struct *iommu_wq;
+
static void free_pasid_states(struct device_state *dev_state);
static void unbind_pasid(struct device_state *dev_state, int pasid);
@@ -109,9 +130,20 @@ static void free_device_state(struct device_state *dev_state)
static void put_device_state(struct device_state *dev_state)
{
if (atomic_dec_and_test(&dev_state->count))
- free_device_state(dev_state);
+ wake_up(&dev_state->wq);
}
+static void put_device_state_wait(struct device_state *dev_state)
+{
+ DEFINE_WAIT(wait);
+
+ prepare_to_wait(&dev_state->wq, &wait, TASK_UNINTERRUPTIBLE);
+ if (!atomic_dec_and_test(&dev_state->count))
+ schedule();
+ finish_wait(&dev_state->wq, &wait);
+
+ free_device_state(dev_state);
+}
static void link_pasid_state(struct pasid_state *pasid_state)
{
spin_lock(&ps_lock);
@@ -242,10 +274,25 @@ static void put_pasid_state(struct pasid_state *pasid_state)
{
if (atomic_dec_and_test(&pasid_state->count)) {
put_device_state(pasid_state->device_state);
- free_pasid_state(pasid_state);
+ wake_up(&pasid_state->wq);
}
}
+static void put_pasid_state_wait(struct pasid_state *pasid_state)
+{
+ DEFINE_WAIT(wait);
+
+ prepare_to_wait(&pasid_state->wq, &wait, TASK_UNINTERRUPTIBLE);
+
+ if (atomic_dec_and_test(&pasid_state->count))
+ put_device_state(pasid_state->device_state);
+ else
+ schedule();
+
+ finish_wait(&pasid_state->wq, &wait);
+ free_pasid_state(pasid_state);
+}
+
static void unbind_pasid(struct device_state *dev_state, int pasid)
{
struct pasid_state *pasid_state;
@@ -260,7 +307,7 @@ static void unbind_pasid(struct device_state *dev_state, int pasid)
clear_pasid_state(dev_state, pasid);
put_pasid_state(pasid_state); /* Reference taken in this function */
- put_pasid_state(pasid_state); /* Reference taken in bind() function */
+ put_pasid_state_wait(pasid_state); /* Reference from bind() function */
}
static void free_pasid_states_level1(struct pasid_state **tbl)
@@ -299,8 +346,8 @@ static void free_pasid_states(struct device_state *dev_state)
if (pasid_state == NULL)
continue;
- unbind_pasid(dev_state, i);
put_pasid_state(pasid_state);
+ unbind_pasid(dev_state, i);
}
if (dev_state->pasid_levels == 2)
@@ -313,6 +360,120 @@ static void free_pasid_states(struct device_state *dev_state)
free_page((unsigned long)dev_state->states);
}
+static void set_pri_tag_status(struct pasid_state *pasid_state,
+ u16 tag, int status)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&pasid_state->lock, flags);
+ pasid_state->pri[tag].status = status;
+ spin_unlock_irqrestore(&pasid_state->lock, flags);
+}
+
+static void finish_pri_tag(struct device_state *dev_state,
+ struct pasid_state *pasid_state,
+ u16 tag)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&pasid_state->lock, flags);
+ if (atomic_dec_and_test(&pasid_state->pri[tag].inflight) &&
+ pasid_state->pri[tag].finish) {
+ amd_iommu_complete_ppr(dev_state->pdev, pasid_state->pasid,
+ pasid_state->pri[tag].status, tag);
+ pasid_state->pri[tag].finish = false;
+ pasid_state->pri[tag].status = PPR_SUCCESS;
+ }
+ spin_unlock_irqrestore(&pasid_state->lock, flags);
+}
+
+static void do_fault(struct work_struct *work)
+{
+ struct fault *fault = container_of(work, struct fault, work);
+ int npages, write;
+ struct page *page;
+
+ write = !!(fault->flags & PPR_FAULT_WRITE);
+
+ npages = get_user_pages(fault->state->task, fault->state->mm,
+ fault->address, 1, write, 0, &page, NULL);
+
+ if (npages == 1)
+ put_page(page);
+ else
+ set_pri_tag_status(fault->state, fault->tag, PPR_INVALID);
+
+ finish_pri_tag(fault->dev_state, fault->state, fault->tag);
+
+ put_pasid_state(fault->state);
+
+ kfree(fault);
+}
+
+static int ppr_notifier(struct notifier_block *nb, unsigned long e, void *data)
+{
+ struct amd_iommu_fault *iommu_fault;
+ struct pasid_state *pasid_state;
+ struct device_state *dev_state;
+ unsigned long flags;
+ struct fault *fault;
+ bool finish;
+ u16 tag;
+ int ret;
+
+ iommu_fault = data;
+ tag = iommu_fault->tag & 0x1ff;
+ finish = (iommu_fault->tag >> 9) & 1;
+
+ ret = NOTIFY_DONE;
+ dev_state = get_device_state(iommu_fault->device_id);
+ if (dev_state == NULL)
+ goto out;
+
+ pasid_state = get_pasid_state(dev_state, iommu_fault->pasid);
+ if (pasid_state == NULL) {
+ /* We know the device but not the PASID -> send INVALID */
+ amd_iommu_complete_ppr(dev_state->pdev, iommu_fault->pasid,
+ PPR_INVALID, tag);
+ goto out_drop_state;
+ }
+
+ spin_lock_irqsave(&pasid_state->lock, flags);
+ atomic_inc(&pasid_state->pri[tag].inflight);
+ if (finish)
+ pasid_state->pri[tag].finish = true;
+ spin_unlock_irqrestore(&pasid_state->lock, flags);
+
+ fault = kzalloc(sizeof(*fault), GFP_ATOMIC);
+ if (fault == NULL) {
+ /* We are OOM - send success and let the device re-fault */
+ finish_pri_tag(dev_state, pasid_state, tag);
+ goto out_drop_state;
+ }
+
+ fault->dev_state = dev_state;
+ fault->address = iommu_fault->address;
+ fault->state = pasid_state;
+ fault->tag = tag;
+ fault->finish = finish;
+ fault->flags = iommu_fault->flags;
+ INIT_WORK(&fault->work, do_fault);
+
+ queue_work(iommu_wq, &fault->work);
+
+ ret = NOTIFY_OK;
+
+out_drop_state:
+ put_device_state(dev_state);
+
+out:
+ return ret;
+}
+
+static struct notifier_block ppr_nb = {
+ .notifier_call = ppr_notifier,
+};
+
int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid,
struct task_struct *task)
{
@@ -342,6 +503,7 @@ int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid,
goto out;
atomic_set(&pasid_state->count, 1);
+ init_waitqueue_head(&pasid_state->wq);
pasid_state->task = task;
pasid_state->mm = task->mm;
pasid_state->device_state = dev_state;
@@ -420,6 +582,7 @@ int amd_iommu_init_device(struct pci_dev *pdev, int pasids)
return -ENOMEM;
spin_lock_init(&dev_state->lock);
+ init_waitqueue_head(&dev_state->wq);
dev_state->pdev = pdev;
tmp = pasids;
@@ -503,13 +666,14 @@ void amd_iommu_free_device(struct pci_dev *pdev)
/* Get rid of any remaining pasid states */
free_pasid_states(dev_state);
- put_device_state(dev_state);
+ put_device_state_wait(dev_state);
}
EXPORT_SYMBOL(amd_iommu_free_device);
static int __init amd_iommu_v2_init(void)
{
size_t state_table_size;
+ int ret;
pr_info("AMD IOMMUv2 driver by Joerg Roedel <joerg.roedel@amd.com>");
@@ -521,7 +685,21 @@ static int __init amd_iommu_v2_init(void)
if (state_table == NULL)
return -ENOMEM;
+ ret = -ENOMEM;
+ iommu_wq = create_workqueue("amd_iommu_v2");
+ if (iommu_wq == NULL) {
+ ret = -ENOMEM;
+ goto out_free;
+ }
+
+ amd_iommu_register_ppr_notifier(&ppr_nb);
+
return 0;
+
+out_free:
+ free_pages((unsigned long)state_table, get_order(state_table_size));
+
+ return ret;
}
static void __exit amd_iommu_v2_exit(void)
@@ -530,6 +708,14 @@ static void __exit amd_iommu_v2_exit(void)
size_t state_table_size;
int i;
+ amd_iommu_unregister_ppr_notifier(&ppr_nb);
+
+ flush_workqueue(iommu_wq);
+
+ /*
+ * The loop below might call flush_workqueue(), so call
+ * destroy_workqueue() after it
+ */
for (i = 0; i < MAX_DEVICES; ++i) {
dev_state = get_device_state(i);
@@ -538,10 +724,12 @@ static void __exit amd_iommu_v2_exit(void)
WARN_ON_ONCE(1);
- amd_iommu_free_device(dev_state->pdev);
put_device_state(dev_state);
+ amd_iommu_free_device(dev_state->pdev);
}
+ destroy_workqueue(iommu_wq);
+
state_table_size = MAX_DEVICES * sizeof(struct device_state *);
free_pages((unsigned long)state_table, get_order(state_table_size));
}
--
1.7.5.4
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 21/22] iommu/amd: Implement notifiers for IOMMUv2
2011-12-05 13:34 [PATCH 0/22] Initial AMD IOMMUv2 support Joerg Roedel
` (19 preceding siblings ...)
2011-12-05 13:34 ` [PATCH 20/22] iommu/amd: Implement IO page-fault handler Joerg Roedel
@ 2011-12-05 13:34 ` Joerg Roedel
2011-12-05 13:34 ` [PATCH 22/22] iommu/amd: Add invalid_ppr callback Joerg Roedel
` (2 subsequent siblings)
23 siblings, 0 replies; 28+ messages in thread
From: Joerg Roedel @ 2011-12-05 13:34 UTC (permalink / raw)
To: iommu; +Cc: linux-kernel, Joerg Roedel
Since pages are not pinned anymore we need notifications
when the VMM changes the page-tables. Use mmu_notifiers for
that.
Also use the task_exit notifier from the profiling subsystem
to shutdown all contexts related to this task.
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
drivers/iommu/Kconfig | 3 +-
drivers/iommu/amd_iommu_v2.c | 178 +++++++++++++++++++++++++++++++++++++++--
2 files changed, 171 insertions(+), 10 deletions(-)
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index e608a36..6bea696 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -62,7 +62,8 @@ config AMD_IOMMU_STATS
config AMD_IOMMU_V2
tristate "AMD IOMMU Version 2 driver (EXPERIMENTAL)"
- depends on AMD_IOMMU && EXPERIMENTAL
+ depends on AMD_IOMMU && PROFILING && EXPERIMENTAL
+ select MMU_NOTIFIER
---help---
This option enables support for the AMD IOMMUv2 features of the IOMMU
hardware. Select this option if you want to use devices that support
diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c
index 0f2ffff..ffe85e0 100644
--- a/drivers/iommu/amd_iommu_v2.c
+++ b/drivers/iommu/amd_iommu_v2.c
@@ -16,8 +16,10 @@
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
+#include <linux/mmu_notifier.h>
#include <linux/amd-iommu.h>
#include <linux/mm_types.h>
+#include <linux/profile.h>
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/iommu.h>
@@ -45,6 +47,7 @@ struct pasid_state {
atomic_t count; /* Reference count */
struct task_struct *task; /* Task bound to this PASID */
struct mm_struct *mm; /* mm_struct for the faults */
+ struct mmu_notifier mn; /* mmu_otifier handle */
struct pri_queue pri[PRI_QUEUE_SIZE]; /* PRI tag states */
struct device_state *device_state; /* Link to our device_state */
int pasid; /* PASID index */
@@ -85,8 +88,16 @@ static DEFINE_SPINLOCK(ps_lock);
static struct workqueue_struct *iommu_wq;
+/*
+ * Empty page table - Used between
+ * mmu_notifier_invalidate_range_start and
+ * mmu_notifier_invalidate_range_end
+ */
+static u64 *empty_page_table;
+
static void free_pasid_states(struct device_state *dev_state);
static void unbind_pasid(struct device_state *dev_state, int pasid);
+static int task_exit(struct notifier_block *nb, unsigned long e, void *data);
static u16 device_id(struct pci_dev *pdev)
{
@@ -144,6 +155,11 @@ static void put_device_state_wait(struct device_state *dev_state)
free_device_state(dev_state);
}
+
+static struct notifier_block profile_nb = {
+ .notifier_call = task_exit,
+};
+
static void link_pasid_state(struct pasid_state *pasid_state)
{
spin_lock(&ps_lock);
@@ -293,6 +309,23 @@ static void put_pasid_state_wait(struct pasid_state *pasid_state)
free_pasid_state(pasid_state);
}
+static void __unbind_pasid(struct pasid_state *pasid_state)
+{
+ struct iommu_domain *domain;
+
+ domain = pasid_state->device_state->domain;
+
+ amd_iommu_domain_clear_gcr3(domain, pasid_state->pasid);
+ clear_pasid_state(pasid_state->device_state, pasid_state->pasid);
+
+ /* Make sure no more pending faults are in the queue */
+ flush_workqueue(iommu_wq);
+
+ mmu_notifier_unregister(&pasid_state->mn, pasid_state->mm);
+
+ put_pasid_state(pasid_state); /* Reference taken in bind() function */
+}
+
static void unbind_pasid(struct device_state *dev_state, int pasid)
{
struct pasid_state *pasid_state;
@@ -302,12 +335,8 @@ static void unbind_pasid(struct device_state *dev_state, int pasid)
return;
unlink_pasid_state(pasid_state);
-
- amd_iommu_domain_clear_gcr3(dev_state->domain, pasid);
- clear_pasid_state(dev_state, pasid);
-
- put_pasid_state(pasid_state); /* Reference taken in this function */
- put_pasid_state_wait(pasid_state); /* Reference from bind() function */
+ __unbind_pasid(pasid_state);
+ put_pasid_state_wait(pasid_state); /* Reference taken in this function */
}
static void free_pasid_states_level1(struct pasid_state **tbl)
@@ -360,6 +389,83 @@ static void free_pasid_states(struct device_state *dev_state)
free_page((unsigned long)dev_state->states);
}
+static struct pasid_state *mn_to_state(struct mmu_notifier *mn)
+{
+ return container_of(mn, struct pasid_state, mn);
+}
+
+static void __mn_flush_page(struct mmu_notifier *mn,
+ unsigned long address)
+{
+ struct pasid_state *pasid_state;
+ struct device_state *dev_state;
+
+ pasid_state = mn_to_state(mn);
+ dev_state = pasid_state->device_state;
+
+ amd_iommu_flush_page(dev_state->domain, pasid_state->pasid, address);
+}
+
+static int mn_clear_flush_young(struct mmu_notifier *mn,
+ struct mm_struct *mm,
+ unsigned long address)
+{
+ __mn_flush_page(mn, address);
+
+ return 0;
+}
+
+static void mn_change_pte(struct mmu_notifier *mn,
+ struct mm_struct *mm,
+ unsigned long address,
+ pte_t pte)
+{
+ __mn_flush_page(mn, address);
+}
+
+static void mn_invalidate_page(struct mmu_notifier *mn,
+ struct mm_struct *mm,
+ unsigned long address)
+{
+ __mn_flush_page(mn, address);
+}
+
+static void mn_invalidate_range_start(struct mmu_notifier *mn,
+ struct mm_struct *mm,
+ unsigned long start, unsigned long end)
+{
+ struct pasid_state *pasid_state;
+ struct device_state *dev_state;
+
+ pasid_state = mn_to_state(mn);
+ dev_state = pasid_state->device_state;
+
+ amd_iommu_domain_set_gcr3(dev_state->domain, pasid_state->pasid,
+ __pa(empty_page_table));
+}
+
+static void mn_invalidate_range_end(struct mmu_notifier *mn,
+ struct mm_struct *mm,
+ unsigned long start, unsigned long end)
+{
+ struct pasid_state *pasid_state;
+ struct device_state *dev_state;
+
+ pasid_state = mn_to_state(mn);
+ dev_state = pasid_state->device_state;
+
+ amd_iommu_domain_set_gcr3(dev_state->domain, pasid_state->pasid,
+ __pa(pasid_state->mm->pgd));
+}
+
+static struct mmu_notifier_ops iommu_mn = {
+ .clear_flush_young = mn_clear_flush_young,
+ .change_pte = mn_change_pte,
+ .invalidate_page = mn_invalidate_page,
+ .invalidate_range_start = mn_invalidate_range_start,
+ .invalidate_range_end = mn_invalidate_range_end,
+};
+
static void set_pri_tag_status(struct pasid_state *pasid_state,
u16 tag, int status)
{
@@ -474,6 +580,47 @@ static struct notifier_block ppr_nb = {
.notifier_call = ppr_notifier,
};
+static int task_exit(struct notifier_block *nb, unsigned long e, void *data)
+{
+ struct pasid_state *pasid_state;
+ struct task_struct *task;
+
+ task = data;
+
+ /*
+ * Using this notifier is a hack - but there is no other choice
+ * at the moment. What I really want is a sleeping notifier that
+ * is called when an MM goes down. But such a notifier doesn't
+ * exist yet. The notifier needs to sleep because it has to make
+ * sure that the device does not use the PASID and the address
+ * space anymore before it is destroyed. This includes waiting
+ * for pending PRI requests to pass the workqueue. The
+ * MMU-Notifiers would be a good fit, but they use RCU and so
+ * they are not allowed to sleep. Lets see how we can solve this
+ * in a more intelligent way in the future.
+ */
+again:
+ spin_lock(&ps_lock);
+ list_for_each_entry(pasid_state, &pasid_state_list, list) {
+ if (pasid_state->task != task)
+ continue;
+
+ /* Found one - remove it from list */
+ __unlink_pasid_state(pasid_state);
+
+ /* Drop Lock and unbind */
+ spin_unlock(&ps_lock);
+
+ __unbind_pasid(pasid_state);
+
+ /* Task may be in the list multiple times */
+ goto again;
+ }
+ spin_unlock(&ps_lock);
+
+ return NOTIFY_OK;
+}
+
int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid,
struct task_struct *task)
{
@@ -508,6 +655,9 @@ int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid,
pasid_state->mm = task->mm;
pasid_state->device_state = dev_state;
pasid_state->pasid = pasid;
+ pasid_state->mn.ops = &iommu_mn;
+
+ mmu_notifier_register(&pasid_state->mn, pasid_state->mm);
ret = set_pasid_state(dev_state, pasid_state, pasid);
if (ret)
@@ -687,15 +837,22 @@ static int __init amd_iommu_v2_init(void)
ret = -ENOMEM;
iommu_wq = create_workqueue("amd_iommu_v2");
- if (iommu_wq == NULL) {
- ret = -ENOMEM;
+ if (iommu_wq == NULL)
goto out_free;
- }
+
+ ret = -ENOMEM;
+ empty_page_table = (u64 *)get_zeroed_page(GFP_KERNEL);
+ if (empty_page_table == NULL)
+ goto out_destroy_wq;
amd_iommu_register_ppr_notifier(&ppr_nb);
+ profile_event_register(PROFILE_TASK_EXIT, &profile_nb);
return 0;
+out_destroy_wq:
+ destroy_workqueue(iommu_wq);
+
out_free:
free_pages((unsigned long)state_table, get_order(state_table_size));
@@ -708,6 +865,7 @@ static void __exit amd_iommu_v2_exit(void)
size_t state_table_size;
int i;
+ profile_event_unregister(PROFILE_TASK_EXIT, &profile_nb);
amd_iommu_unregister_ppr_notifier(&ppr_nb);
flush_workqueue(iommu_wq);
@@ -732,6 +890,8 @@ static void __exit amd_iommu_v2_exit(void)
state_table_size = MAX_DEVICES * sizeof(struct device_state *);
free_pages((unsigned long)state_table, get_order(state_table_size));
+
+ free_page((unsigned long)empty_page_table);
}
module_init(amd_iommu_v2_init);
--
1.7.5.4
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 22/22] iommu/amd: Add invalid_ppr callback
2011-12-05 13:34 [PATCH 0/22] Initial AMD IOMMUv2 support Joerg Roedel
` (20 preceding siblings ...)
2011-12-05 13:34 ` [PATCH 21/22] iommu/amd: Implement notifiers for IOMMUv2 Joerg Roedel
@ 2011-12-05 13:34 ` Joerg Roedel
2011-12-08 20:47 ` [PATCH 0/22] Initial AMD IOMMUv2 support Jerome Glisse
2011-12-14 14:26 ` Joerg Roedel
23 siblings, 0 replies; 28+ messages in thread
From: Joerg Roedel @ 2011-12-05 13:34 UTC (permalink / raw)
To: iommu; +Cc: linux-kernel, Joerg Roedel
This callback can be used to change the PRI response code
sent to a device when a PPR fault fails.
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
drivers/iommu/amd_iommu_v2.c | 57 ++++++++++++++++++++++++++++++++++++++++-
include/linux/amd-iommu.h | 34 ++++++++++++++++++++++--
2 files changed, 86 insertions(+), 5 deletions(-)
diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c
index ffe85e0..7f63a4d 100644
--- a/drivers/iommu/amd_iommu_v2.c
+++ b/drivers/iommu/amd_iommu_v2.c
@@ -62,6 +62,7 @@ struct device_state {
struct iommu_domain *domain;
int pasid_levels;
int max_pasids;
+ amd_iommu_invalid_ppr_cb inv_ppr_cb;
spinlock_t lock;
wait_queue_head_t wq;
};
@@ -504,10 +505,31 @@ static void do_fault(struct work_struct *work)
npages = get_user_pages(fault->state->task, fault->state->mm,
fault->address, 1, write, 0, &page, NULL);
- if (npages == 1)
+ if (npages == 1) {
put_page(page);
- else
+ } else if (fault->dev_state->inv_ppr_cb) {
+ int status;
+
+ status = fault->dev_state->inv_ppr_cb(fault->dev_state->pdev,
+ fault->pasid,
+ fault->address,
+ fault->flags);
+ switch (status) {
+ case AMD_IOMMU_INV_PRI_RSP_SUCCESS:
+ set_pri_tag_status(fault->state, fault->tag, PPR_SUCCESS);
+ break;
+ case AMD_IOMMU_INV_PRI_RSP_INVALID:
+ set_pri_tag_status(fault->state, fault->tag, PPR_INVALID);
+ break;
+ case AMD_IOMMU_INV_PRI_RSP_FAIL:
+ set_pri_tag_status(fault->state, fault->tag, PPR_FAILURE);
+ break;
+ default:
+ BUG();
+ }
+ } else {
set_pri_tag_status(fault->state, fault->tag, PPR_INVALID);
+ }
finish_pri_tag(fault->dev_state, fault->state, fault->tag);
@@ -820,6 +842,37 @@ void amd_iommu_free_device(struct pci_dev *pdev)
}
EXPORT_SYMBOL(amd_iommu_free_device);
+int amd_iommu_set_invalid_ppr_cb(struct pci_dev *pdev,
+ amd_iommu_invalid_ppr_cb cb)
+{
+ struct device_state *dev_state;
+ unsigned long flags;
+ u16 devid;
+ int ret;
+
+ if (!amd_iommu_v2_supported())
+ return -ENODEV;
+
+ devid = device_id(pdev);
+
+ spin_lock_irqsave(&state_lock, flags);
+
+ ret = -EINVAL;
+ dev_state = state_table[devid];
+ if (dev_state == NULL)
+ goto out_unlock;
+
+ dev_state->inv_ppr_cb = cb;
+
+ ret = 0;
+
+out_unlock:
+ spin_unlock_irqrestore(&state_lock, flags);
+
+ return ret;
+}
+EXPORT_SYMBOL(amd_iommu_set_invalid_ppr_cb);
+
static int __init amd_iommu_v2_init(void)
{
size_t state_table_size;
diff --git a/include/linux/amd-iommu.h b/include/linux/amd-iommu.h
index 23e21e1..06688c4 100644
--- a/include/linux/amd-iommu.h
+++ b/include/linux/amd-iommu.h
@@ -28,9 +28,6 @@ struct task_struct;
struct pci_dev;
extern int amd_iommu_detect(void);
-extern int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid,
- struct task_struct *task);
-extern void amd_iommu_unbind_pasid(struct pci_dev *pdev, int pasid);
/**
@@ -91,6 +88,37 @@ extern int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid,
*/
extern void amd_iommu_unbind_pasid(struct pci_dev *pdev, int pasid);
+/**
+ * amd_iommu_set_invalid_ppr_cb() - Register a call-back for failed
+ * PRI requests
+ * @pdev: The PCI device the call-back should be registered for
+ * @cb: The call-back function
+ *
+ * The IOMMUv2 driver invokes this call-back when it is unable to
+ * successfully handle a PRI request. The device driver can then decide
+ * which PRI response the device should see. Possible return values for
+ * the call-back are:
+ *
+ * - AMD_IOMMU_INV_PRI_RSP_SUCCESS - Send SUCCESS back to the device
+ * - AMD_IOMMU_INV_PRI_RSP_INVALID - Send INVALID back to the device
+ * - AMD_IOMMU_INV_PRI_RSP_FAIL - Send Failure back to the device,
+ * the device is required to disable
+ * PRI when it receives this response
+ *
+ * The function returns 0 on success or negative value on error.
+ */
+#define AMD_IOMMU_INV_PRI_RSP_SUCCESS 0
+#define AMD_IOMMU_INV_PRI_RSP_INVALID 1
+#define AMD_IOMMU_INV_PRI_RSP_FAIL 2
+
+typedef int (*amd_iommu_invalid_ppr_cb)(struct pci_dev *pdev,
+ int pasid,
+ unsigned long address,
+ u16);
+
+extern int amd_iommu_set_invalid_ppr_cb(struct pci_dev *pdev,
+ amd_iommu_invalid_ppr_cb cb);
+
#else
static inline int amd_iommu_detect(void) { return -ENODEV; }
--
1.7.5.4
^ permalink raw reply related [flat|nested] 28+ messages in thread
* Re: [PATCH 0/22] Initial AMD IOMMUv2 support
2011-12-05 13:34 [PATCH 0/22] Initial AMD IOMMUv2 support Joerg Roedel
` (21 preceding siblings ...)
2011-12-05 13:34 ` [PATCH 22/22] iommu/amd: Add invalid_ppr callback Joerg Roedel
@ 2011-12-08 20:47 ` Jerome Glisse
2011-12-09 14:43 ` Joerg Roedel
2011-12-14 14:26 ` Joerg Roedel
23 siblings, 1 reply; 28+ messages in thread
From: Jerome Glisse @ 2011-12-08 20:47 UTC (permalink / raw)
To: Joerg Roedel; +Cc: iommu, linux-kernel
On Mon, Dec 05, 2011 at 02:34:15PM +0100, Joerg Roedel wrote:
> Hi,
>
> here is a patch-set that adds initial support for the AMD IOMMU version
> 2 to the Linux kernel. The main feature of the new IOMMU version is
> support for DMA demand-paging and multiple DMA address spaces per
> device.
> The patch-set consists of 2 parts. Patches 1-16 implement the hardware
> access functions to setup and manage the IOMMUv2 functionality of the
> hardware. Patches 17-22 implement a seperate module which makes use
> these functions to implement a page-fault handler for devices which can
> be used by real device drivers.
>
> Any feedback appreciated.
>
> Thanks,
>
> Joerg
>
What hw/chipset/CPU support this functionality ?
Cheers,
Jerome
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH 0/22] Initial AMD IOMMUv2 support
2011-12-08 20:47 ` [PATCH 0/22] Initial AMD IOMMUv2 support Jerome Glisse
@ 2011-12-09 14:43 ` Joerg Roedel
0 siblings, 0 replies; 28+ messages in thread
From: Joerg Roedel @ 2011-12-09 14:43 UTC (permalink / raw)
To: Jerome Glisse; +Cc: iommu, linux-kernel
On Thu, Dec 08, 2011 at 03:47:39PM -0500, Jerome Glisse wrote:
> On Mon, Dec 05, 2011 at 02:34:15PM +0100, Joerg Roedel wrote:
> > Hi,
> >
> > here is a patch-set that adds initial support for the AMD IOMMU version
> > 2 to the Linux kernel. The main feature of the new IOMMU version is
> > support for DMA demand-paging and multiple DMA address spaces per
> > device.
> > The patch-set consists of 2 parts. Patches 1-16 implement the hardware
> > access functions to setup and manage the IOMMUv2 functionality of the
> > hardware. Patches 17-22 implement a seperate module which makes use
> > these functions to implement a page-fault handler for devices which can
> > be used by real device drivers.
>
> What hw/chipset/CPU support this functionality ?
This functionality will be supported in our Trinity APUs and South
Islands GPUs, which will be shipping early next year. Official product
names for these APUs and GPUs have not yet been announced.
Joerg
--
AMD Operating System Research Center
Advanced Micro Devices GmbH Einsteinring 24 85609 Dornach
General Managers: Alberto Bozzo, Andrew Bowd
Registration: Dornach, Landkr. Muenchen; Registerger. Muenchen, HRB Nr. 43632
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH 0/22] Initial AMD IOMMUv2 support
2011-12-05 13:34 [PATCH 0/22] Initial AMD IOMMUv2 support Joerg Roedel
` (22 preceding siblings ...)
2011-12-08 20:47 ` [PATCH 0/22] Initial AMD IOMMUv2 support Jerome Glisse
@ 2011-12-14 14:26 ` Joerg Roedel
23 siblings, 0 replies; 28+ messages in thread
From: Joerg Roedel @ 2011-12-14 14:26 UTC (permalink / raw)
To: Joerg Roedel; +Cc: iommu, linux-kernel
On Mon, Dec 05, 2011 at 02:34:15PM +0100, Joerg Roedel wrote:
> Hi,
>
> here is a patch-set that adds initial support for the AMD IOMMU version
> 2 to the Linux kernel. The main feature of the new IOMMU version is
> support for DMA demand-paging and multiple DMA address spaces per
> device.
> The patch-set consists of 2 parts. Patches 1-16 implement the hardware
> access functions to setup and manage the IOMMUv2 functionality of the
> hardware. Patches 17-22 implement a seperate module which makes use
> these functions to implement a page-fault handler for devices which can
> be used by real device drivers.
Removed patch 2 because it is unnecessary. Applied the rest to x86/amd.
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH 20/22] iommu/amd: Implement IO page-fault handler
2011-12-05 13:34 ` [PATCH 20/22] iommu/amd: Implement IO page-fault handler Joerg Roedel
@ 2011-12-14 16:13 ` Jerome Glisse
2011-12-14 16:17 ` Joerg Roedel
0 siblings, 1 reply; 28+ messages in thread
From: Jerome Glisse @ 2011-12-14 16:13 UTC (permalink / raw)
To: Joerg Roedel; +Cc: iommu, linux-kernel
On Mon, Dec 05, 2011 at 02:34:35PM +0100, Joerg Roedel wrote:
> Register the notifier for PPR faults and handle them as
> necessary.
>
> Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
> ---
> drivers/iommu/amd_iommu_v2.c | 200 ++++++++++++++++++++++++++++++++++++++++--
> 1 files changed, 194 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c
> index f54b991..0f2ffff 100644
> --- a/drivers/iommu/amd_iommu_v2.c
> +++ b/drivers/iommu/amd_iommu_v2.c
> @@ -21,9 +21,11 @@
> #include <linux/module.h>
> #include <linux/sched.h>
> #include <linux/iommu.h>
> +#include <linux/wait.h>
> #include <linux/pci.h>
> #include <linux/gfp.h>
>
> +#include "amd_iommu_types.h"
> #include "amd_iommu_proto.h"
>
> MODULE_LICENSE("GPL v2");
> @@ -35,6 +37,7 @@ MODULE_AUTHOR("Joerg Roedel <joerg.roedel@amd.com>");
> struct pri_queue {
> atomic_t inflight;
> bool finish;
> + int status;
> };
>
> struct pasid_state {
> @@ -45,6 +48,8 @@ struct pasid_state {
> struct pri_queue pri[PRI_QUEUE_SIZE]; /* PRI tag states */
> struct device_state *device_state; /* Link to our device_state */
> int pasid; /* PASID index */
> + spinlock_t lock; /* Protect pri_queues */
> + wait_queue_head_t wq; /* To wait for count == 0 */
> };
>
> struct device_state {
> @@ -55,6 +60,20 @@ struct device_state {
> int pasid_levels;
> int max_pasids;
> spinlock_t lock;
> + wait_queue_head_t wq;
> +};
> +
> +struct fault {
> + struct work_struct work;
> + struct device_state *dev_state;
> + struct pasid_state *state;
> + struct mm_struct *mm;
> + u64 address;
> + u16 devid;
> + u16 pasid;
> + u16 tag;
> + u16 finish;
> + u16 flags;
> };
>
> struct device_state **state_table;
> @@ -64,6 +83,8 @@ static spinlock_t state_lock;
> static LIST_HEAD(pasid_state_list);
> static DEFINE_SPINLOCK(ps_lock);
>
> +static struct workqueue_struct *iommu_wq;
> +
> static void free_pasid_states(struct device_state *dev_state);
> static void unbind_pasid(struct device_state *dev_state, int pasid);
>
> @@ -109,9 +130,20 @@ static void free_device_state(struct device_state *dev_state)
> static void put_device_state(struct device_state *dev_state)
> {
> if (atomic_dec_and_test(&dev_state->count))
> - free_device_state(dev_state);
> + wake_up(&dev_state->wq);
> }
>
> +static void put_device_state_wait(struct device_state *dev_state)
> +{
> + DEFINE_WAIT(wait);
> +
> + prepare_to_wait(&dev_state->wq, &wait, TASK_UNINTERRUPTIBLE);
> + if (!atomic_dec_and_test(&dev_state->count))
> + schedule();
> + finish_wait(&dev_state->wq, &wait);
> +
> + free_device_state(dev_state);
> +}
> static void link_pasid_state(struct pasid_state *pasid_state)
> {
> spin_lock(&ps_lock);
> @@ -242,10 +274,25 @@ static void put_pasid_state(struct pasid_state *pasid_state)
> {
> if (atomic_dec_and_test(&pasid_state->count)) {
> put_device_state(pasid_state->device_state);
> - free_pasid_state(pasid_state);
> + wake_up(&pasid_state->wq);
> }
> }
>
> +static void put_pasid_state_wait(struct pasid_state *pasid_state)
> +{
> + DEFINE_WAIT(wait);
> +
> + prepare_to_wait(&pasid_state->wq, &wait, TASK_UNINTERRUPTIBLE);
> +
> + if (atomic_dec_and_test(&pasid_state->count))
> + put_device_state(pasid_state->device_state);
> + else
> + schedule();
> +
> + finish_wait(&pasid_state->wq, &wait);
> + free_pasid_state(pasid_state);
> +}
> +
> static void unbind_pasid(struct device_state *dev_state, int pasid)
> {
> struct pasid_state *pasid_state;
> @@ -260,7 +307,7 @@ static void unbind_pasid(struct device_state *dev_state, int pasid)
> clear_pasid_state(dev_state, pasid);
>
> put_pasid_state(pasid_state); /* Reference taken in this function */
> - put_pasid_state(pasid_state); /* Reference taken in bind() function */
> + put_pasid_state_wait(pasid_state); /* Reference from bind() function */
> }
>
> static void free_pasid_states_level1(struct pasid_state **tbl)
> @@ -299,8 +346,8 @@ static void free_pasid_states(struct device_state *dev_state)
> if (pasid_state == NULL)
> continue;
>
> - unbind_pasid(dev_state, i);
> put_pasid_state(pasid_state);
> + unbind_pasid(dev_state, i);
> }
>
> if (dev_state->pasid_levels == 2)
> @@ -313,6 +360,120 @@ static void free_pasid_states(struct device_state *dev_state)
> free_page((unsigned long)dev_state->states);
> }
>
> +static void set_pri_tag_status(struct pasid_state *pasid_state,
> + u16 tag, int status)
> +{
> + unsigned long flags;
> +
> + spin_lock_irqsave(&pasid_state->lock, flags);
> + pasid_state->pri[tag].status = status;
> + spin_unlock_irqrestore(&pasid_state->lock, flags);
> +}
> +
> +static void finish_pri_tag(struct device_state *dev_state,
> + struct pasid_state *pasid_state,
> + u16 tag)
> +{
> + unsigned long flags;
> +
> + spin_lock_irqsave(&pasid_state->lock, flags);
> + if (atomic_dec_and_test(&pasid_state->pri[tag].inflight) &&
> + pasid_state->pri[tag].finish) {
> + amd_iommu_complete_ppr(dev_state->pdev, pasid_state->pasid,
> + pasid_state->pri[tag].status, tag);
> + pasid_state->pri[tag].finish = false;
> + pasid_state->pri[tag].status = PPR_SUCCESS;
> + }
> + spin_unlock_irqrestore(&pasid_state->lock, flags);
> +}
> +
> +static void do_fault(struct work_struct *work)
> +{
> + struct fault *fault = container_of(work, struct fault, work);
> + int npages, write;
> + struct page *page;
> +
> + write = !!(fault->flags & PPR_FAULT_WRITE);
> +
> + npages = get_user_pages(fault->state->task, fault->state->mm,
> + fault->address, 1, write, 0, &page, NULL);
> +
> + if (npages == 1)
> + put_page(page);
> + else
> + set_pri_tag_status(fault->state, fault->tag, PPR_INVALID);
I might be missing something on how mm work in linux but can't the page
in a vma change anytime (like being migrated to a node or being evicted
...) I guess my question is when a fault happen is it because the task
page table have an invalid entry ?
I was under the impression that their was a page table associated with
each iommu client. Thus that the task page table was never directly use.
But i haven't carrefully read all the patches.
Cheers,
Jerome
> +
> + finish_pri_tag(fault->dev_state, fault->state, fault->tag);
> +
> + put_pasid_state(fault->state);
> +
> + kfree(fault);
> +}
> +
> +static int ppr_notifier(struct notifier_block *nb, unsigned long e, void *data)
> +{
> + struct amd_iommu_fault *iommu_fault;
> + struct pasid_state *pasid_state;
> + struct device_state *dev_state;
> + unsigned long flags;
> + struct fault *fault;
> + bool finish;
> + u16 tag;
> + int ret;
> +
> + iommu_fault = data;
> + tag = iommu_fault->tag & 0x1ff;
> + finish = (iommu_fault->tag >> 9) & 1;
> +
> + ret = NOTIFY_DONE;
> + dev_state = get_device_state(iommu_fault->device_id);
> + if (dev_state == NULL)
> + goto out;
> +
> + pasid_state = get_pasid_state(dev_state, iommu_fault->pasid);
> + if (pasid_state == NULL) {
> + /* We know the device but not the PASID -> send INVALID */
> + amd_iommu_complete_ppr(dev_state->pdev, iommu_fault->pasid,
> + PPR_INVALID, tag);
> + goto out_drop_state;
> + }
> +
> + spin_lock_irqsave(&pasid_state->lock, flags);
> + atomic_inc(&pasid_state->pri[tag].inflight);
> + if (finish)
> + pasid_state->pri[tag].finish = true;
> + spin_unlock_irqrestore(&pasid_state->lock, flags);
> +
> + fault = kzalloc(sizeof(*fault), GFP_ATOMIC);
> + if (fault == NULL) {
> + /* We are OOM - send success and let the device re-fault */
> + finish_pri_tag(dev_state, pasid_state, tag);
> + goto out_drop_state;
> + }
> +
> + fault->dev_state = dev_state;
> + fault->address = iommu_fault->address;
> + fault->state = pasid_state;
> + fault->tag = tag;
> + fault->finish = finish;
> + fault->flags = iommu_fault->flags;
> + INIT_WORK(&fault->work, do_fault);
> +
> + queue_work(iommu_wq, &fault->work);
> +
> + ret = NOTIFY_OK;
> +
> +out_drop_state:
> + put_device_state(dev_state);
> +
> +out:
> + return ret;
> +}
> +
> +static struct notifier_block ppr_nb = {
> + .notifier_call = ppr_notifier,
> +};
> +
> int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid,
> struct task_struct *task)
> {
> @@ -342,6 +503,7 @@ int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid,
> goto out;
>
> atomic_set(&pasid_state->count, 1);
> + init_waitqueue_head(&pasid_state->wq);
> pasid_state->task = task;
> pasid_state->mm = task->mm;
> pasid_state->device_state = dev_state;
> @@ -420,6 +582,7 @@ int amd_iommu_init_device(struct pci_dev *pdev, int pasids)
> return -ENOMEM;
>
> spin_lock_init(&dev_state->lock);
> + init_waitqueue_head(&dev_state->wq);
> dev_state->pdev = pdev;
>
> tmp = pasids;
> @@ -503,13 +666,14 @@ void amd_iommu_free_device(struct pci_dev *pdev)
> /* Get rid of any remaining pasid states */
> free_pasid_states(dev_state);
>
> - put_device_state(dev_state);
> + put_device_state_wait(dev_state);
> }
> EXPORT_SYMBOL(amd_iommu_free_device);
>
> static int __init amd_iommu_v2_init(void)
> {
> size_t state_table_size;
> + int ret;
>
> pr_info("AMD IOMMUv2 driver by Joerg Roedel <joerg.roedel@amd.com>");
>
> @@ -521,7 +685,21 @@ static int __init amd_iommu_v2_init(void)
> if (state_table == NULL)
> return -ENOMEM;
>
> + ret = -ENOMEM;
> + iommu_wq = create_workqueue("amd_iommu_v2");
> + if (iommu_wq == NULL) {
> + ret = -ENOMEM;
> + goto out_free;
> + }
> +
> + amd_iommu_register_ppr_notifier(&ppr_nb);
> +
> return 0;
> +
> +out_free:
> + free_pages((unsigned long)state_table, get_order(state_table_size));
> +
> + return ret;
> }
>
> static void __exit amd_iommu_v2_exit(void)
> @@ -530,6 +708,14 @@ static void __exit amd_iommu_v2_exit(void)
> size_t state_table_size;
> int i;
>
> + amd_iommu_unregister_ppr_notifier(&ppr_nb);
> +
> + flush_workqueue(iommu_wq);
> +
> + /*
> + * The loop below might call flush_workqueue(), so call
> + * destroy_workqueue() after it
> + */
> for (i = 0; i < MAX_DEVICES; ++i) {
> dev_state = get_device_state(i);
>
> @@ -538,10 +724,12 @@ static void __exit amd_iommu_v2_exit(void)
>
> WARN_ON_ONCE(1);
>
> - amd_iommu_free_device(dev_state->pdev);
> put_device_state(dev_state);
> + amd_iommu_free_device(dev_state->pdev);
> }
>
> + destroy_workqueue(iommu_wq);
> +
> state_table_size = MAX_DEVICES * sizeof(struct device_state *);
> free_pages((unsigned long)state_table, get_order(state_table_size));
> }
> --
> 1.7.5.4
>
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH 20/22] iommu/amd: Implement IO page-fault handler
2011-12-14 16:13 ` Jerome Glisse
@ 2011-12-14 16:17 ` Joerg Roedel
0 siblings, 0 replies; 28+ messages in thread
From: Joerg Roedel @ 2011-12-14 16:17 UTC (permalink / raw)
To: Jerome Glisse; +Cc: Joerg Roedel, iommu, linux-kernel
On Wed, Dec 14, 2011 at 11:13:00AM -0500, Jerome Glisse wrote:
> On Mon, Dec 05, 2011 at 02:34:35PM +0100, Joerg Roedel wrote:
> > + npages = get_user_pages(fault->state->task, fault->state->mm,
> > + fault->address, 1, write, 0, &page, NULL);
> > +
> > + if (npages == 1)
> > + put_page(page);
> > + else
> > + set_pri_tag_status(fault->state, fault->tag, PPR_INVALID);
>
> I might be missing something on how mm work in linux but can't the page
> in a vma change anytime (like being migrated to a node or being evicted
> ...) I guess my question is when a fault happen is it because the task
> page table have an invalid entry ?
Yes, but this is not a problem. When the page-fault handler signals
completion of the page-fault back to the device it will start to re-walk
the page-table. If the page-walker detects that the page is not there it
will just signal the fault again. MMU-Notifiers take care of the
TLB management.
> I was under the impression that their was a page table associated with
> each iommu client. Thus that the task page table was never directly use.
> But i haven't carrefully read all the patches.
The IOMMUv2 can handle x86 long-mode page-tables (including
accessed/dirty bits). So it is easy to just re-use the task page-tables
in the IOMMUv2.
Regards,
Joerg
^ permalink raw reply [flat|nested] 28+ messages in thread
end of thread, other threads:[~2011-12-14 16:17 UTC | newest]
Thread overview: 28+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-12-05 13:34 [PATCH 0/22] Initial AMD IOMMUv2 support Joerg Roedel
2011-12-05 13:34 ` [PATCH 01/22] iommu/amd: Convert dev_table_entry to u64 Joerg Roedel
2011-12-05 13:34 ` [PATCH 02/22] iommu/amd: Fix wrong address masks in tlb flush code Joerg Roedel
2011-12-05 13:34 ` [PATCH 03/22] iommu/amd: Get the maximum number of PASIDs supported Joerg Roedel
2011-12-05 13:34 ` [PATCH 04/22] iommu/amd: Setup PPR log when supported by IOMMU Joerg Roedel
2011-12-05 13:34 ` [PATCH 05/22] iommu/amd: Enable GT mode " Joerg Roedel
2011-12-05 13:34 ` [PATCH 06/22] iommu/amd: Add iommuv2 flag to struct amd_iommu Joerg Roedel
2011-12-05 13:34 ` [PATCH 07/22] iommu/amd: Put IOMMUv2 capable devices in pt_domain Joerg Roedel
2011-12-05 13:34 ` [PATCH 08/22] iommu/amd: Implement notifier for PPR faults Joerg Roedel
2011-12-05 13:34 ` [PATCH 09/22] iommu/amd: Add amd_iommu_domain_direct_map function Joerg Roedel
2011-12-05 13:34 ` [PATCH 10/22] iommu/amd: Add support for IOMMUv2 domain mode Joerg Roedel
2011-12-05 13:34 ` [PATCH 11/22] iommu/amd: Implement IOMMUv2 TLB flushing routines Joerg Roedel
2011-12-05 13:34 ` [PATCH 12/22] iommu/amd: Implement functions to manage GCR3 table Joerg Roedel
2011-12-05 13:34 ` [PATCH 13/22] iommu/amd: Implement function to send PPR completions Joerg Roedel
2011-12-05 13:34 ` [PATCH 14/22] iommu/amd: Add function to get IOMMUv2 domain for pdev Joerg Roedel
2011-12-05 13:34 ` [PATCH 15/22] iommu/amd: Add device errata handling Joerg Roedel
2011-12-05 13:34 ` [PATCH 16/22] iommu/amd: Add stat counter for IOMMUv2 events Joerg Roedel
2011-12-05 13:34 ` [PATCH 17/22] iommu/amd: Add driver stub for AMD IOMMUv2 support Joerg Roedel
2011-12-05 13:34 ` [PATCH 18/22] iommu/amd: Implement device aquisition code for IOMMUv2 Joerg Roedel
2011-12-05 13:34 ` [PATCH 19/22] iommu/amd: Add routines to bind/unbind a pasid Joerg Roedel
2011-12-05 13:34 ` [PATCH 20/22] iommu/amd: Implement IO page-fault handler Joerg Roedel
2011-12-14 16:13 ` Jerome Glisse
2011-12-14 16:17 ` Joerg Roedel
2011-12-05 13:34 ` [PATCH 21/22] iommu/amd: Implement notifiers for IOMMUv2 Joerg Roedel
2011-12-05 13:34 ` [PATCH 22/22] iommu/amd: Add invalid_ppr callback Joerg Roedel
2011-12-08 20:47 ` [PATCH 0/22] Initial AMD IOMMUv2 support Jerome Glisse
2011-12-09 14:43 ` Joerg Roedel
2011-12-14 14:26 ` Joerg Roedel
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).