* [PATCH v7 01/10] iommu/vt-d: Update iommu_attach_domain() and its callers
2015-01-07 4:04 [PATCH v7 0/10] iommu/vt-d: Fix intel vt-d faults in kdump kernel Li, Zhen-Hua
@ 2015-01-07 4:04 ` Li, Zhen-Hua
2015-01-07 4:04 ` [PATCH v7 02/10] iommu/vt-d: Items required for kdump Li, Zhen-Hua
` (9 subsequent siblings)
10 siblings, 0 replies; 18+ messages in thread
From: Li, Zhen-Hua @ 2015-01-07 4:04 UTC (permalink / raw)
To: dwmw2, indou.takao, bhe, joro, vgoyal, dyoung
Cc: iommu, linux-kernel, linux-pci, kexec, alex.williamson, ddutile,
ishii.hironobu, bhelgaas, doug.hatch, jerry.hoemann, tom.vaden,
li.zhang6, lisa.mitchell, billsumnerlinux, zhen-hual, rwright
Allow specification of the domain-id for the new domain.
This patch only adds the 'did' parameter to iommu_attach_domain()
and modifies all of its callers to specify the default value of -1
which says "no did specified, allocate a new one".
This is no functional change from current behaviour -- just enables
a functional change to be made in a later patch.
Bill Sumner:
Original version.
Li, Zhenhua:
Minor change, add change to function __iommu_attach_domain.
Signed-off-by: Bill Sumner <billsumnerlinux@gmail.com>
Signed-off-by: Li, Zhen-Hua <zhen-hual@hp.com>
---
drivers/iommu/intel-iommu.c | 34 ++++++++++++++++++++--------------
1 file changed, 20 insertions(+), 14 deletions(-)
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 1232336..2dc6250 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -1534,31 +1534,36 @@ static struct dmar_domain *alloc_domain(int flags)
}
static int __iommu_attach_domain(struct dmar_domain *domain,
- struct intel_iommu *iommu)
+ struct intel_iommu *iommu,
+ int domain_number)
{
int num;
unsigned long ndomains;
ndomains = cap_ndoms(iommu->cap);
- num = find_first_zero_bit(iommu->domain_ids, ndomains);
- if (num < ndomains) {
- set_bit(num, iommu->domain_ids);
- iommu->domains[num] = domain;
- } else {
- num = -ENOSPC;
- }
+ if (domain_number < 0) {
+ num = find_first_zero_bit(iommu->domain_ids, ndomains);
+ if (num < ndomains) {
+ set_bit(num, iommu->domain_ids);
+ iommu->domains[num] = domain;
+ } else {
+ num = -ENOSPC;
+ }
+ } else
+ num = domain_number;
return num;
}
static int iommu_attach_domain(struct dmar_domain *domain,
- struct intel_iommu *iommu)
+ struct intel_iommu *iommu,
+ int domain_number)
{
int num;
unsigned long flags;
spin_lock_irqsave(&iommu->lock, flags);
- num = __iommu_attach_domain(domain, iommu);
+ num = __iommu_attach_domain(domain, iommu, domain_number);
spin_unlock_irqrestore(&iommu->lock, flags);
if (num < 0)
pr_err("IOMMU: no free domain ids\n");
@@ -1577,7 +1582,7 @@ static int iommu_attach_vm_domain(struct dmar_domain *domain,
if (iommu->domains[num] == domain)
return num;
- return __iommu_attach_domain(domain, iommu);
+ return __iommu_attach_domain(domain, iommu, -1);
}
static void iommu_detach_domain(struct dmar_domain *domain,
@@ -2231,6 +2236,7 @@ static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
u16 dma_alias;
unsigned long flags;
u8 bus, devfn;
+ int did = -1; /* Default to "no domain_id supplied" */
domain = find_domain(dev);
if (domain)
@@ -2264,7 +2270,7 @@ static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
domain = alloc_domain(0);
if (!domain)
return NULL;
- domain->id = iommu_attach_domain(domain, iommu);
+ domain->id = iommu_attach_domain(domain, iommu, did);
if (domain->id < 0) {
free_domain_mem(domain);
return NULL;
@@ -2442,7 +2448,7 @@ static int __init si_domain_init(int hw)
return -EFAULT;
for_each_active_iommu(iommu, drhd) {
- ret = iommu_attach_domain(si_domain, iommu);
+ ret = iommu_attach_domain(si_domain, iommu, -1);
if (ret < 0) {
domain_exit(si_domain);
return -EFAULT;
@@ -3866,7 +3872,7 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
iommu_enable_translation(iommu);
if (si_domain) {
- ret = iommu_attach_domain(si_domain, iommu);
+ ret = iommu_attach_domain(si_domain, iommu, -1);
if (ret < 0 || si_domain->id != ret)
goto disable_iommu;
domain_attach_iommu(si_domain, iommu);
--
2.0.0-rc0
^ permalink raw reply related [flat|nested] 18+ messages in thread* [PATCH v7 02/10] iommu/vt-d: Items required for kdump
2015-01-07 4:04 [PATCH v7 0/10] iommu/vt-d: Fix intel vt-d faults in kdump kernel Li, Zhen-Hua
2015-01-07 4:04 ` [PATCH v7 01/10] iommu/vt-d: Update iommu_attach_domain() and its callers Li, Zhen-Hua
@ 2015-01-07 4:04 ` Li, Zhen-Hua
2015-01-07 4:04 ` [PATCH v7 03/10] iommu/vt-d: Add domain-id functions Li, Zhen-Hua
` (8 subsequent siblings)
10 siblings, 0 replies; 18+ messages in thread
From: Li, Zhen-Hua @ 2015-01-07 4:04 UTC (permalink / raw)
To: dwmw2, indou.takao, bhe, joro, vgoyal, dyoung
Cc: iommu, linux-kernel, linux-pci, kexec, alex.williamson, ddutile,
ishii.hironobu, bhelgaas, doug.hatch, jerry.hoemann, tom.vaden,
li.zhang6, lisa.mitchell, billsumnerlinux, zhen-hual, rwright
Add structure type domain_values_entry used for kdump;
Add context entry functions needed for kdump.
Bill Sumner:
Original version;
Li, Zhenhua:
Changed the name of new functions, make them consistent with current
context get/set functions.
Signed-off-by: Bill Sumner <billsumnerlinux@gmail.com>
Signed-off-by: Li, Zhen-Hua <zhen-hual@hp.com>
---
drivers/iommu/intel-iommu.c | 70 +++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 70 insertions(+)
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 2dc6250..5ce2850 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -40,6 +40,7 @@
#include <linux/pci-ats.h>
#include <linux/memblock.h>
#include <linux/dma-contiguous.h>
+#include <linux/crash_dump.h>
#include <asm/irq_remapping.h>
#include <asm/cacheflush.h>
#include <asm/iommu.h>
@@ -208,6 +209,12 @@ get_context_addr_from_root(struct root_entry *root)
NULL);
}
+static inline unsigned long
+get_context_phys_from_root(struct root_entry *root)
+{
+ return root_present(root) ? (root->val & VTD_PAGE_MASK) : 0;
+}
+
/*
* low 64 bits:
* 0: present
@@ -228,6 +235,32 @@ static inline bool context_present(struct context_entry *context)
{
return (context->lo & 1);
}
+
+static inline int context_fault_enable(struct context_entry *c)
+{
+ return((c->lo >> 1) & 0x1);
+}
+
+static inline int context_translation_type(struct context_entry *c)
+{
+ return((c->lo >> 2) & 0x3);
+}
+
+static inline u64 context_address_root(struct context_entry *c)
+{
+ return((c->lo >> VTD_PAGE_SHIFT));
+}
+
+static inline int context_address_width(struct context_entry *c)
+{
+ return((c->hi >> 0) & 0x7);
+}
+
+static inline int context_domain_id(struct context_entry *c)
+{
+ return((c->hi >> 8) & 0xffff);
+}
+
static inline void context_set_present(struct context_entry *context)
{
context->lo |= 1;
@@ -313,6 +346,43 @@ static inline int first_pte_in_page(struct dma_pte *pte)
return !((unsigned long)pte & ~VTD_PAGE_MASK);
}
+
+#ifdef CONFIG_CRASH_DUMP
+
+/*
+ * Fix Crashdump failure caused by leftover DMA through a hardware IOMMU
+ *
+ * Fixes the crashdump kernel to deal with an active iommu and legacy
+ * DMA from the (old) panicked kernel in a manner similar to how legacy
+ * DMA is handled when no hardware iommu was in use by the old kernel --
+ * allow the legacy DMA to continue into its current buffers.
+ *
+ * In the crashdump kernel, this code:
+ * 1. skips disabling the IOMMU's translating of IO Virtual Addresses (IOVA).
+ * 2. Do not re-enable IOMMU's translating.
+ * 3. In kdump kernel, use the old root entry table.
+ * 4. Leaves the current translations in-place so that legacy DMA will
+ * continue to use its current buffers.
+ * 5. Allocates to the device drivers in the crashdump kernel
+ * portions of the iova address ranges that are different
+ * from the iova address ranges that were being used by the old kernel
+ * at the time of the panic.
+ *
+ */
+
+struct domain_values_entry {
+ struct list_head link; /* link entries into a list */
+ struct iova_domain iovad; /* iova's that belong to this domain */
+ struct dma_pte *pgd; /* virtual address */
+ int did; /* domain id */
+ int gaw; /* max guest address width */
+ int iommu_superpage; /* Level of superpages supported:
+ 0 == 4KiB (no superpages), 1 == 2MiB,
+ 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
+};
+
+#endif /* CONFIG_CRASH_DUMP */
+
/*
* This domain is a statically identity mapping domain.
* 1. This domain creats a static 1:1 mapping to all usable memory.
--
2.0.0-rc0
^ permalink raw reply related [flat|nested] 18+ messages in thread* [PATCH v7 03/10] iommu/vt-d: Add domain-id functions
2015-01-07 4:04 [PATCH v7 0/10] iommu/vt-d: Fix intel vt-d faults in kdump kernel Li, Zhen-Hua
2015-01-07 4:04 ` [PATCH v7 01/10] iommu/vt-d: Update iommu_attach_domain() and its callers Li, Zhen-Hua
2015-01-07 4:04 ` [PATCH v7 02/10] iommu/vt-d: Items required for kdump Li, Zhen-Hua
@ 2015-01-07 4:04 ` Li, Zhen-Hua
2015-01-07 4:04 ` [PATCH v7 04/10] iommu/vt-d: functions to copy data from old mem Li, Zhen-Hua
` (7 subsequent siblings)
10 siblings, 0 replies; 18+ messages in thread
From: Li, Zhen-Hua @ 2015-01-07 4:04 UTC (permalink / raw)
To: dwmw2, indou.takao, bhe, joro, vgoyal, dyoung
Cc: iommu, linux-kernel, linux-pci, kexec, alex.williamson, ddutile,
ishii.hironobu, bhelgaas, doug.hatch, jerry.hoemann, tom.vaden,
li.zhang6, lisa.mitchell, billsumnerlinux, zhen-hual, rwright
Interfaces for when a new domain in the crashdump kernel needs some
values from the panicked kernel's context entries.
Signed-off-by: Bill Sumner <billsumnerlinux@gmail.com>
Signed-off-by: Li, Zhen-Hua <zhen-hual@hp.com>
---
drivers/iommu/intel-iommu.c | 62 +++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 62 insertions(+)
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 5ce2850..c0bebd6 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -381,6 +381,13 @@ struct domain_values_entry {
2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
};
+static struct domain_values_entry *intel_iommu_did_to_domain_values_entry(
+ int did, struct intel_iommu *iommu);
+
+static int intel_iommu_get_dids_from_old_kernel(struct intel_iommu *iommu);
+
+static int device_to_domain_id(struct intel_iommu *iommu, u8 bus, u8 devfn);
+
#endif /* CONFIG_CRASH_DUMP */
/*
@@ -4832,3 +4839,58 @@ static void __init check_tylersburg_isoch(void)
printk(KERN_WARNING "DMAR: Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
vtisochctrl);
}
+
+#ifdef CONFIG_CRASH_DUMP
+
+/*
+ * Interfaces for when a new domain in the crashdump kernel needs some
+ * values from the panicked kernel's context entries
+ *
+ */
+static struct domain_values_entry *intel_iommu_did_to_domain_values_entry(
+ int did, struct intel_iommu *iommu)
+{
+ struct domain_values_entry *dve; /* iterator */
+
+ list_for_each_entry(dve, &domain_values_list[iommu->seq_id], link)
+ if (dve->did == did)
+ return dve;
+ return NULL;
+}
+
+/* Mark domain-id's from old kernel as in-use on this iommu so that a new
+ * domain-id is allocated in the case where there is a device in the new kernel
+ * that was not in the old kernel -- and therefore a new domain-id is needed.
+ */
+static int intel_iommu_get_dids_from_old_kernel(struct intel_iommu *iommu)
+{
+ struct domain_values_entry *dve; /* iterator */
+
+ pr_info("IOMMU:%d Domain ids from panicked kernel:\n", iommu->seq_id);
+
+ list_for_each_entry(dve, &domain_values_list[iommu->seq_id], link) {
+ set_bit(dve->did, iommu->domain_ids);
+ pr_info("DID did:%d(0x%4.4x)\n", dve->did, dve->did);
+ }
+
+ pr_info("----------------------------------------\n");
+ return 0;
+}
+
+static int device_to_domain_id(struct intel_iommu *iommu, u8 bus, u8 devfn)
+{
+ int did = -1; /* domain-id returned */
+ struct root_entry *root;
+ struct context_entry *context;
+ unsigned long flags;
+
+ spin_lock_irqsave(&iommu->lock, flags);
+ root = &iommu->root_entry[bus];
+ context = get_context_addr_from_root(root);
+ if (context && context_present(context+devfn))
+ did = context_domain_id(context+devfn);
+ spin_unlock_irqrestore(&iommu->lock, flags);
+ return did;
+}
+
+#endif /* CONFIG_CRASH_DUMP */
--
2.0.0-rc0
^ permalink raw reply related [flat|nested] 18+ messages in thread* [PATCH v7 04/10] iommu/vt-d: functions to copy data from old mem
2015-01-07 4:04 [PATCH v7 0/10] iommu/vt-d: Fix intel vt-d faults in kdump kernel Li, Zhen-Hua
` (2 preceding siblings ...)
2015-01-07 4:04 ` [PATCH v7 03/10] iommu/vt-d: Add domain-id functions Li, Zhen-Hua
@ 2015-01-07 4:04 ` Li, Zhen-Hua
2015-01-07 4:04 ` [PATCH v7 05/10] iommu/vt-d: Add functions to load and save old re Li, Zhen-Hua
` (6 subsequent siblings)
10 siblings, 0 replies; 18+ messages in thread
From: Li, Zhen-Hua @ 2015-01-07 4:04 UTC (permalink / raw)
To: dwmw2, indou.takao, bhe, joro, vgoyal, dyoung
Cc: iommu, linux-kernel, linux-pci, kexec, alex.williamson, ddutile,
ishii.hironobu, bhelgaas, doug.hatch, jerry.hoemann, tom.vaden,
li.zhang6, lisa.mitchell, billsumnerlinux, zhen-hual, rwright
Add some functions to copy the data from old kernel.
These functions are used to copy context tables and page tables.
To avoid calling iounmap between spin_lock_irqsave and spin_unlock_irqrestore,
use a link here, store the pointers , and then use iounmap to free them in
another place.
Li, Zhen-hua:
The functions and logics.
Takao Indoh:
Check if pfn is ram:
if (page_is_ram(pfn))
Signed-off-by: Li, Zhen-Hua <zhen-hual@hp.com>
Signed-off-by: Takao Indoh <indou.takao@jp.fujitsu.com>
---
drivers/iommu/intel-iommu.c | 97 +++++++++++++++++++++++++++++++++++++++++++++
include/linux/intel-iommu.h | 9 +++++
2 files changed, 106 insertions(+)
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index c0bebd6..8a7ad72 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -388,6 +388,13 @@ static int intel_iommu_get_dids_from_old_kernel(struct intel_iommu *iommu);
static int device_to_domain_id(struct intel_iommu *iommu, u8 bus, u8 devfn);
+struct iommu_remapped_entry {
+ struct list_head list;
+ void __iomem *mem;
+};
+static LIST_HEAD(__iommu_remapped_mem);
+static DEFINE_MUTEX(__iommu_mem_list_lock);
+
#endif /* CONFIG_CRASH_DUMP */
/*
@@ -4843,6 +4850,96 @@ static void __init check_tylersburg_isoch(void)
#ifdef CONFIG_CRASH_DUMP
/*
+ * Copy memory from a physically-addressed area into a virtually-addressed area
+ */
+int __iommu_load_from_oldmem(void *to, unsigned long from, unsigned long size)
+{
+ unsigned long pfn; /* Page Frame Number */
+ size_t csize = (size_t)size; /* Num(bytes to copy) */
+ unsigned long offset; /* Lower 12 bits of to */
+ void __iomem *virt_mem;
+ struct iommu_remapped_entry *mapped;
+
+ pfn = from >> VTD_PAGE_SHIFT;
+ offset = from & (~VTD_PAGE_MASK);
+
+ if (page_is_ram(pfn)) {
+ memcpy(to, pfn_to_kaddr(pfn) + offset, csize);
+ } else{
+
+ mapped = kzalloc(sizeof(struct iommu_remapped_entry),
+ GFP_KERNEL);
+ if (!mapped)
+ return -ENOMEM;
+
+ virt_mem = ioremap_cache((unsigned long)from, size);
+ if (!virt_mem) {
+ kfree(mapped);
+ return -ENOMEM;
+ }
+ memcpy(to, virt_mem, size);
+
+ mutex_lock(&__iommu_mem_list_lock);
+ mapped->mem = virt_mem;
+ list_add_tail(&mapped->list, &__iommu_remapped_mem);
+ mutex_unlock(&__iommu_mem_list_lock);
+ }
+ return size;
+}
+
+/*
+ * Copy memory from a virtually-addressed area into a physically-addressed area
+ */
+int __iommu_save_to_oldmem(unsigned long to, void *from, unsigned long size)
+{
+ unsigned long pfn; /* Page Frame Number */
+ size_t csize = (size_t)size; /* Num(bytes to copy) */
+ unsigned long offset; /* Lower 12 bits of to */
+ void __iomem *virt_mem;
+ struct iommu_remapped_entry *mapped;
+
+ pfn = to >> VTD_PAGE_SHIFT;
+ offset = to & (~VTD_PAGE_MASK);
+
+ if (page_is_ram(pfn)) {
+ memcpy(pfn_to_kaddr(pfn) + offset, from, csize);
+ } else{
+ mapped = kzalloc(sizeof(struct iommu_remapped_entry),
+ GFP_KERNEL);
+ if (!mapped)
+ return -ENOMEM;
+
+ virt_mem = ioremap_cache((unsigned long)to, size);
+ if (!virt_mem) {
+ kfree(mapped);
+ return -ENOMEM;
+ }
+ memcpy(virt_mem, from, size);
+ mutex_lock(&__iommu_mem_list_lock);
+ mapped->mem = virt_mem;
+ list_add_tail(&mapped->list, &__iommu_remapped_mem);
+ mutex_unlock(&__iommu_mem_list_lock);
+ }
+ return size;
+}
+
+/*
+ * Free the mapped memory for ioremap;
+ */
+int __iommu_free_mapped_mem(void)
+{
+ struct iommu_remapped_entry *mem_entry, *tmp;
+
+ mutex_lock(&__iommu_mem_list_lock);
+ list_for_each_entry_safe(mem_entry, tmp, &__iommu_remapped_mem, list) {
+ iounmap(mem_entry->mem);
+ list_del(&mem_entry->list);
+ kfree(mem_entry);
+ }
+ mutex_unlock(&__iommu_mem_list_lock);
+ return 0;
+}
+/*
* Interfaces for when a new domain in the crashdump kernel needs some
* values from the panicked kernel's context entries
*
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index a65208a..8ffa523 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -26,6 +26,7 @@
#include <linux/iova.h>
#include <linux/io.h>
#include <linux/dma_remapping.h>
+#include <linux/crash_dump.h>
#include <asm/cacheflush.h>
#include <asm/iommu.h>
@@ -368,4 +369,12 @@ extern int dmar_ir_support(void);
extern const struct attribute_group *intel_iommu_groups[];
+#ifdef CONFIG_CRASH_DUMP
+extern int __iommu_load_from_oldmem(void *to, unsigned long from,
+ unsigned long size);
+extern int __iommu_save_to_oldmem(unsigned long to, void *from,
+ unsigned long size);
+extern int __iommu_free_mapped_mem(void);
+#endif /* CONFIG_CRASH_DUMP */
+
#endif
--
2.0.0-rc0
^ permalink raw reply related [flat|nested] 18+ messages in thread* [PATCH v7 05/10] iommu/vt-d: Add functions to load and save old re
2015-01-07 4:04 [PATCH v7 0/10] iommu/vt-d: Fix intel vt-d faults in kdump kernel Li, Zhen-Hua
` (3 preceding siblings ...)
2015-01-07 4:04 ` [PATCH v7 04/10] iommu/vt-d: functions to copy data from old mem Li, Zhen-Hua
@ 2015-01-07 4:04 ` Li, Zhen-Hua
2015-01-07 4:04 ` [PATCH v7 06/10] iommu/vt-d: datatypes and functions used for kdump Li, Zhen-Hua
` (5 subsequent siblings)
10 siblings, 0 replies; 18+ messages in thread
From: Li, Zhen-Hua @ 2015-01-07 4:04 UTC (permalink / raw)
To: dwmw2, indou.takao, bhe, joro, vgoyal, dyoung
Cc: iommu, linux-kernel, linux-pci, kexec, alex.williamson, ddutile,
ishii.hironobu, bhelgaas, doug.hatch, jerry.hoemann, tom.vaden,
li.zhang6, lisa.mitchell, billsumnerlinux, zhen-hual, rwright
Add functions to load root entry table from old kernel, and to save updated
root entry table.
Add two member in struct intel_iommu, to store the RTA in old kernel, and
the mapped virt address of it.
We use the old RTA in dump kernel, and when the iommu->root_entry is used as
a cache in kdump kernel, its phys address will not be save to RTA register,
but when its data is changed, we will save the new data to old root entry table.
Li, Zhen-hua:
The functions and logics.
Takao Indoh:
Add __iommu_flush_cache.
Signed-off-by: Li, Zhen-Hua <zhen-hual@hp.com>
Signed-off-by: Takao Indoh <indou.takao@jp.fujitsu.com>
---
drivers/iommu/intel-iommu.c | 53 +++++++++++++++++++++++++++++++++++++++++++++
include/linux/intel-iommu.h | 5 +++++
2 files changed, 58 insertions(+)
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 8a7ad72..f3059b8 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -388,6 +388,10 @@ static int intel_iommu_get_dids_from_old_kernel(struct intel_iommu *iommu);
static int device_to_domain_id(struct intel_iommu *iommu, u8 bus, u8 devfn);
+static void __iommu_load_old_root_entry(struct intel_iommu *iommu);
+
+static void __iommu_update_old_root_entry(struct intel_iommu *iommu, int index);
+
struct iommu_remapped_entry {
struct list_head list;
void __iomem *mem;
@@ -4990,4 +4994,53 @@ static int device_to_domain_id(struct intel_iommu *iommu, u8 bus, u8 devfn)
return did;
}
+/*
+ * Load the old root entry table to new root entry table.
+ */
+static void __iommu_load_old_root_entry(struct intel_iommu *iommu)
+{
+ if ((!iommu)
+ || (!iommu->root_entry)
+ || (!iommu->root_entry_old_virt)
+ || (!iommu->root_entry_old_phys))
+ return;
+ memcpy(iommu->root_entry, iommu->root_entry_old_virt, PAGE_SIZE);
+
+ __iommu_flush_cache(iommu, iommu->root_entry, PAGE_SIZE);
+}
+
+/*
+ * When the data in new root entry table is changed, this function
+ * must be called to save the updated data to old root entry table.
+ */
+static void __iommu_update_old_root_entry(struct intel_iommu *iommu, int index)
+{
+ u8 start;
+ unsigned long size;
+ void __iomem *to;
+ void *from;
+
+ if ((!iommu)
+ || (!iommu->root_entry)
+ || (!iommu->root_entry_old_virt)
+ || (!iommu->root_entry_old_phys))
+ return;
+
+ if (index < -1 || index >= ROOT_ENTRY_NR)
+ return;
+
+ if (index == -1) {
+ start = 0;
+ size = ROOT_ENTRY_NR * sizeof(struct root_entry);
+ } else {
+ start = index * sizeof(struct root_entry);
+ size = sizeof(struct root_entry);
+ }
+ to = iommu->root_entry_old_virt;
+ from = iommu->root_entry;
+ memcpy(to + start, from + start, size);
+
+ __iommu_flush_cache(iommu, to + start, size);
+}
+
#endif /* CONFIG_CRASH_DUMP */
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 8ffa523..8e29b97 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -329,6 +329,11 @@ struct intel_iommu {
spinlock_t lock; /* protect context, domain ids */
struct root_entry *root_entry; /* virtual address */
+#ifdef CONFIG_CRASH_DUMP
+ void __iomem *root_entry_old_virt; /* mapped from old root entry */
+ unsigned long root_entry_old_phys; /* root entry in old kernel */
+#endif
+
struct iommu_flush flush;
#endif
struct q_inval *qi; /* Queued invalidation info */
--
2.0.0-rc0
^ permalink raw reply related [flat|nested] 18+ messages in thread* [PATCH v7 06/10] iommu/vt-d: datatypes and functions used for kdump
2015-01-07 4:04 [PATCH v7 0/10] iommu/vt-d: Fix intel vt-d faults in kdump kernel Li, Zhen-Hua
` (4 preceding siblings ...)
2015-01-07 4:04 ` [PATCH v7 05/10] iommu/vt-d: Add functions to load and save old re Li, Zhen-Hua
@ 2015-01-07 4:04 ` Li, Zhen-Hua
2015-01-07 4:04 ` [PATCH v7 07/10] iommu/vt-d: enable kdump support in iommu module Li, Zhen-Hua
` (4 subsequent siblings)
10 siblings, 0 replies; 18+ messages in thread
From: Li, Zhen-Hua @ 2015-01-07 4:04 UTC (permalink / raw)
To: dwmw2, indou.takao, bhe, joro, vgoyal, dyoung
Cc: iommu, linux-kernel, linux-pci, kexec, alex.williamson, ddutile,
ishii.hironobu, bhelgaas, doug.hatch, jerry.hoemann, tom.vaden,
li.zhang6, lisa.mitchell, billsumnerlinux, zhen-hual, rwright
Populate it with support functions to copy iommu translation tables from
from the panicked kernel into the kdump kernel in the event of a crash.
Functions:
malloc new context table and copy old context table to the new one.
malloc new page table and copy old page table to the new one.
Bill Sumner:
Original version, the creation of the data types and functions.
Li, Zhenhua:
Minor change:
Update the usage of context_get_* and context_put*, use context_*
and context_set_* for replacement.
Update the name of the function that copies root entry table.
Use new function to copy old context entry tables and page tables.
Use "unsigned long" for physical address.
Change incorrect aw_shift[4] and a few comments in copy_context_entry().
Signed-off-by: Bill Sumner <billsumnerlinux@gmail.com>
Signed-off-by: Li, Zhen-Hua <zhen-hual@hp.com>
---
drivers/iommu/intel-iommu.c | 543 ++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 543 insertions(+)
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index f3059b8..d2c19a0 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -399,6 +399,62 @@ struct iommu_remapped_entry {
static LIST_HEAD(__iommu_remapped_mem);
static DEFINE_MUTEX(__iommu_mem_list_lock);
+/* ========================================================================
+ * Copy iommu translation tables from old kernel into new kernel.
+ * Entry to this set of functions is: intel_iommu_load_translation_tables()
+ * ------------------------------------------------------------------------
+ */
+
+/*
+ * Lists of domain_values_entry to hold domain values found during the copy.
+ * One list for each iommu in g_number_of_iommus.
+ */
+static struct list_head *domain_values_list;
+
+
+#define RET_BADCOPY -1 /* Return-code: Cannot copy translate tables */
+
+/*
+ * Struct copy_page_addr_parms is used to allow copy_page_addr()
+ * to accumulate values across multiple calls and returns.
+ */
+struct copy_page_addr_parms {
+ u32 first; /* flag: first-time */
+ u32 last; /* flag: last-time */
+ u32 bus; /* last bus number we saw */
+ u32 devfn; /* last devfn we saw */
+ u32 shift; /* last shift we saw */
+ u64 pte; /* Page Table Entry */
+ u64 next_addr; /* next-expected page_addr */
+
+ u64 page_addr; /* page_addr accumulating size */
+ u64 page_size; /* page_size accumulated */
+
+ struct domain_values_entry *dve; /* to accumulate iova ranges */
+};
+
+enum returns_from_copy_context_entry {
+RET_CCE_NOT_PRESENT = 1,
+RET_CCE_NEW_PAGE_TABLES,
+RET_CCE_PASS_THROUGH_1,
+RET_CCE_PASS_THROUGH_2,
+RET_CCE_RESERVED_VALUE,
+RET_CCE_PREVIOUS_DID
+};
+
+static int copy_context_entry(struct intel_iommu *iommu, u32 bus, u32 devfn,
+ void *ppap, struct context_entry *ce);
+
+static int copy_context_entry_table(struct intel_iommu *iommu,
+ u32 bus, void *ppap,
+ unsigned long *context_new_p,
+ unsigned long context_old_phys);
+
+static int copy_root_entry_table(struct intel_iommu *iommu, void *ppap);
+
+static int intel_iommu_load_translation_tables(struct dmar_drhd_unit *drhd,
+ int g_num_of_iommus);
+
#endif /* CONFIG_CRASH_DUMP */
/*
@@ -5043,4 +5099,491 @@ static void __iommu_update_old_root_entry(struct intel_iommu *iommu, int index)
__iommu_flush_cache(iommu, to + start, size);
}
+/*
+ * constant for initializing instances of copy_page_addr_parms properly.
+ */
+static struct copy_page_addr_parms copy_page_addr_parms_init = {1, 0};
+
+
+
+/*
+ * Lowest-level function in the 'Copy Page Tables' set
+ * Called once for each page_addr present in an iommu page-address table.
+ *
+ * Because of the depth-first traversal of the page-tables by the
+ * higher-level functions that call 'copy_page_addr', all pages
+ * of a domain will be presented in ascending order of IO Virtual Address.
+ *
+ * This function accumulates each contiguous range of these IOVAs and
+ * reserves it within the proper domain in the crashdump kernel when a
+ * non-contiguous range is detected, as determined by any of the following:
+ * 1. a change in the bus or device owning the presented page
+ * 2. a change in the page-size of the presented page (parameter shift)
+ * 3. a change in the page-table entry of the presented page
+ * 4. a presented IOVA that does not match the expected next-page address
+ * 5. the 'last' flag is set, indicating that all IOVAs have been seen.
+ */
+static int copy_page_addr(u64 page_addr, u32 shift, u32 bus, u32 devfn,
+ u64 pte, struct domain_values_entry *dve,
+ void *parms)
+{
+ struct copy_page_addr_parms *ppap = parms;
+
+ u64 page_size = ((u64)1 << shift); /* page_size */
+ u64 pfn_lo; /* For reserving IOVA range */
+ u64 pfn_hi; /* For reserving IOVA range */
+ struct iova *iova_p; /* For reserving IOVA range */
+
+ if (!ppap) {
+ pr_err("ERROR: ppap is NULL: 0x%3.3x(%3.3d) DevFn: 0x%3.3x(%3.3d) Page: 0x%16.16llx Size: 0x%16.16llx(%lld)\n",
+ bus, bus, devfn, devfn, page_addr,
+ page_size, page_size);
+ return 0;
+ }
+
+ /* If (only extending current addr range) */
+ if (ppap->first == 0 &&
+ ppap->last == 0 &&
+ ppap->bus == bus &&
+ ppap->devfn == devfn &&
+ ppap->shift == shift &&
+ (ppap->pte & ~VTD_PAGE_MASK) == (pte & ~VTD_PAGE_MASK) &&
+ ppap->next_addr == page_addr) {
+
+ /* Update page size and next-expected address */
+ ppap->next_addr += page_size;
+ ppap->page_size += page_size;
+ return 0;
+ }
+
+ if (!ppap->first) {
+ /* Close-out the accumulated IOVA address range */
+
+ if (!ppap->dve) {
+ pr_err("%s ERROR: ppap->dve is NULL -- needed to reserve range for B:D:F=%2.2x:%2.2x:%1.1x\n",
+ __func__,
+ ppap->bus, ppap->devfn >> 3, ppap->devfn & 0x7);
+ return RET_BADCOPY;
+ }
+ pfn_lo = IOVA_PFN(ppap->page_addr);
+ pfn_hi = IOVA_PFN(ppap->page_addr + ppap->page_size);
+ iova_p = reserve_iova(&ppap->dve->iovad, pfn_lo, pfn_hi);
+ }
+
+ /* Prepare for a new IOVA address range */
+ ppap->first = 0; /* Not first-time anymore */
+ ppap->bus = bus;
+ ppap->devfn = devfn;
+ ppap->shift = shift;
+ ppap->pte = pte;
+ ppap->next_addr = page_addr + page_size; /* Next-expected page_addr */
+
+ ppap->page_addr = page_addr; /* Addr(new page) */
+ ppap->page_size = page_size; /* Size(new page) */
+
+ ppap->dve = dve; /* adr(device_values_entry for new range) */
+
+ return 0;
+}
+
+/*
+ * Recursive function to copy the tree of page tables (max 6 recursions)
+ * Parameter 'shift' controls the recursion
+ */
+static int copy_page_table(unsigned long *dma_pte_new_p,
+ unsigned long dma_pte_phys,
+ u32 shift, u64 page_addr,
+ struct intel_iommu *iommu,
+ u32 bus, u32 devfn,
+ struct domain_values_entry *dve, void *ppap)
+{
+ int ret; /* Integer return code */
+ struct dma_pte *p; /* Virtual adr(each entry) iterator */
+ struct dma_pte *pgt_new_virt; /* Adr(dma_pte in new kernel) */
+ unsigned long dma_pte_next; /* Adr(next table down) */
+ u64 u; /* index(each entry in page_table) */
+
+
+ /* If (already done all levels -- problem) */
+ if (shift < 12) {
+ pr_err("ERROR %s shift < 12 %lx\n", __func__, dma_pte_phys);
+ pr_err("shift %d, page_addr %16.16llu bus %3.3u devfn %3.3u\n",
+ shift, page_addr, bus, devfn);
+ return RET_BADCOPY;
+ }
+
+ /* allocate a page table in the new kernel
+ * copy contents from old kernel
+ * then update each entry in the table in the new kernel
+ */
+
+ pgt_new_virt = (struct dma_pte *)alloc_pgtable_page(iommu->node);
+ if (!pgt_new_virt)
+ return -ENOMEM;
+
+ ret = __iommu_load_from_oldmem(pgt_new_virt,
+ dma_pte_phys,
+ VTD_PAGE_SIZE);
+
+ if (ret <= 0)
+ return ret;
+
+ for (u = 0, p = pgt_new_virt; u < 512; u++, p++) {
+
+ if (((p->val & DMA_PTE_READ) == 0) &&
+ ((p->val & DMA_PTE_WRITE) == 0))
+ continue;
+
+ if (dma_pte_superpage(p) || (shift == 12)) {
+
+ ret = copy_page_addr(page_addr | (u << shift),
+ shift, bus, devfn, p->val, dve, ppap);
+ if (ret)
+ return ret;
+ continue;
+ }
+
+ ret = copy_page_table(&dma_pte_next,
+ (p->val & VTD_PAGE_MASK),
+ shift-9, page_addr | (u << shift),
+ iommu, bus, devfn, dve, ppap);
+ if (ret)
+ return ret;
+
+ p->val &= ~VTD_PAGE_MASK; /* Clear old and set new pgd */
+ p->val |= ((u64)dma_pte_next & VTD_PAGE_MASK);
+ }
+
+ *dma_pte_new_p = virt_to_phys(pgt_new_virt);
+
+ return 0;
+}
+
+
+/*
+ * Called once for each context_entry found in a copied context_entry_table
+ * Each context_entry represents one PCIe device handled by the IOMMU.
+ *
+ * The 'domain_values_list' contains one 'domain_values_entry' for each
+ * unique domain-id found while copying the context entries for each iommu.
+ *
+ * The Intel-iommu spec. requires that every context_entry that contains
+ * the same domain-id point to the same set of page translation tables.
+ * The hardware uses this to improve the use of its translation cache.
+ * In order to insure that the copied translate tables abide by this
+ * requirement, this function keeps a list of domain-ids (dids) that
+ * have already been seen for this iommu. This function checks each entry
+ * already on the list for a domain-id that matches the domain-id in this
+ * context_entry. If found, this function places the address of the previous
+ * context's tree of page translation tables into this context_entry.
+ * If a matching previous entry is not found, a new 'domain_values_entry'
+ * structure is created for the domain-id in this context_entry and
+ * copy_page_table is called to duplicate its tree of page tables.
+ */
+static int copy_context_entry(struct intel_iommu *iommu, u32 bus, u32 devfn,
+ void *ppap, struct context_entry *ce)
+{
+ int ret = 0; /* Integer Return Code */
+ u32 shift = 0; /* bits to shift page_addr */
+ u64 page_addr = 0; /* Address of translated page */
+ unsigned long pgt_old_phys; /* Adr(page_table in the old kernel) */
+ unsigned long pgt_new_phys; /* Adr(page_table in the new kernel) */
+ u8 t; /* Translation-type from context */
+ u8 aw; /* Address-width from context */
+ u32 aw_shift[8] = {
+ 12+9+9, /* [000b] 30-bit AGAW (2-level page table) */
+ 12+9+9+9, /* [001b] 39-bit AGAW (3-level page table) */
+ 12+9+9+9+9, /* [010b] 48-bit AGAW (4-level page table) */
+ 12+9+9+9+9+9, /* [011b] 57-bit AGAW (5-level page table) */
+ 12+9+9+9+9+9+7, /* [100b] 64-bit AGAW (6-level page table) */
+ 0, /* [101b] Reserved */
+ 0, /* [110b] Reserved */
+ 0, /* [111b] Reserved */
+ };
+
+ struct domain_values_entry *dve = NULL;
+
+ if (!context_present(ce)) { /* If (context not present) */
+ ret = RET_CCE_NOT_PRESENT; /* Skip it */
+ goto exit;
+ }
+
+ t = context_translation_type(ce);
+ /* If we have seen this domain-id before on this iommu,
+ * give this context the same page-tables and we are done.
+ */
+ list_for_each_entry(dve, &domain_values_list[iommu->seq_id], link) {
+ if (dve->did == (int) context_domain_id(ce)) {
+ switch (t) {
+ case 0: /* page tables */
+ case 1: /* page tables */
+ context_set_address_root(ce,
+ virt_to_phys(dve->pgd));
+ ret = RET_CCE_PREVIOUS_DID;
+ break;
+
+ case 2: /* Pass through */
+ if (dve->pgd == NULL)
+ ret = RET_CCE_PASS_THROUGH_2;
+ else
+ ret = RET_BADCOPY;
+ break;
+
+ default: /* Bad value of 't'*/
+ ret = RET_BADCOPY;
+ break;
+ }
+ goto exit;
+ }
+ }
+
+ /* Since we now know that this is a new domain-id for this iommu,
+ * create a new entry, add it to the list, and handle its
+ * page tables.
+ */
+
+ dve = kcalloc(1, sizeof(struct domain_values_entry), GFP_KERNEL);
+ if (!dve) {
+ ret = -ENOMEM;
+ goto exit;
+ }
+
+ dve->did = (int) context_domain_id(ce);
+ dve->gaw = (int) agaw_to_width(context_address_width(ce));
+ dve->pgd = NULL;
+ init_iova_domain(&dve->iovad, DMA_32BIT_PFN);
+
+ list_add(&dve->link, &domain_values_list[iommu->seq_id]);
+
+
+ if (t == 0 || t == 1) { /* If (context has page tables) */
+ aw = context_address_width(ce);
+ shift = aw_shift[aw];
+
+ pgt_old_phys = context_address_root(ce) << VTD_PAGE_SHIFT;
+
+ ret = copy_page_table(&pgt_new_phys, pgt_old_phys,
+ shift-9, page_addr, iommu, bus, devfn, dve, ppap);
+
+ __iommu_flush_cache(iommu, phys_to_virt(pgt_new_phys),
+ VTD_PAGE_SIZE);
+
+ if (ret) /* if (problem) bail out */
+ goto exit;
+
+ context_set_address_root(ce, pgt_new_phys);
+ dve->pgd = phys_to_virt(pgt_new_phys);
+ ret = RET_CCE_NEW_PAGE_TABLES;
+ goto exit;
+ }
+
+ if (t == 2) { /* If (Identity mapped pass-through) */
+ ret = RET_CCE_PASS_THROUGH_1; /* REVISIT: Skip for now */
+ goto exit;
+ }
+
+ ret = RET_CCE_RESERVED_VALUE; /* Else ce->t is a Reserved value */
+ /* Note fall-through */
+
+exit: /* all returns come through here to insure good clean-up */
+ return ret;
+}
+
+
+/*
+ * Called once for each context_entry_table found in the root_entry_table
+ */
+static int copy_context_entry_table(struct intel_iommu *iommu,
+ u32 bus, void *ppap,
+ unsigned long *context_new_p,
+ unsigned long context_old_phys)
+{
+ int ret = 0; /* Integer return code */
+ struct context_entry *ce; /* Iterator */
+ unsigned long context_new_phys; /* adr(table in new kernel) */
+ struct context_entry *context_new_virt; /* adr(table in new kernel) */
+ u32 devfn = 0; /* PCI Device & function */
+
+ /* allocate a context-entry table in the new kernel
+ * copy contents from old kernel
+ * then update each entry in the table in the new kernel
+ */
+ context_new_virt =
+ (struct context_entry *)alloc_pgtable_page(iommu->node);
+ if (!context_new_virt)
+ return -ENOMEM;
+
+ context_new_phys = virt_to_phys(context_new_virt);
+
+ __iommu_load_from_oldmem(context_new_virt,
+ context_old_phys,
+ VTD_PAGE_SIZE);
+
+ for (devfn = 0, ce = context_new_virt; devfn < 256; devfn++, ce++) {
+
+ if (!context_present(ce)) /* If (context not present) */
+ continue; /* Skip it */
+
+ ret = copy_context_entry(iommu, bus, devfn, ppap, ce);
+ if (ret < 0) /* if (problem) */
+ return RET_BADCOPY;
+
+ switch (ret) {
+ case RET_CCE_NOT_PRESENT:
+ continue;
+ case RET_CCE_NEW_PAGE_TABLES:
+ continue;
+ case RET_CCE_PASS_THROUGH_1:
+ continue;
+ case RET_CCE_PASS_THROUGH_2:
+ continue;
+ case RET_CCE_RESERVED_VALUE:
+ return RET_BADCOPY;
+ case RET_CCE_PREVIOUS_DID:
+ continue;
+ default:
+ return RET_BADCOPY;
+ };
+ }
+
+ *context_new_p = context_new_phys;
+ return 0;
+}
+
+
+/*
+ * Highest-level function in the 'copy translation tables' set of functions
+ */
+static int copy_root_entry_table(struct intel_iommu *iommu, void *ppap)
+{
+ int ret = 0; /* Integer return code */
+ u32 bus; /* Index: root-entry-table */
+ struct root_entry *re; /* Virt(iterator: new table) */
+ unsigned long context_old_phys; /* Phys(context table entry) */
+ unsigned long context_new_phys; /* Phys(new context_entry) */
+
+ /*
+ * allocate a root-entry table in the new kernel
+ * copy contents from old kernel
+ * then update each entry in the table in the new kernel
+ */
+
+ if (!iommu->root_entry_old_phys)
+ return -ENOMEM;
+
+ for (bus = 0, re = iommu->root_entry; bus < 256; bus += 1, re += 1) {
+ if (!root_present(re))
+ continue;
+
+ context_old_phys = get_context_phys_from_root(re);
+
+ if (!context_old_phys)
+ continue;
+
+ context_new_phys = 0;
+ ret = copy_context_entry_table(iommu, bus, ppap,
+ &context_new_phys,
+ context_old_phys);
+ __iommu_flush_cache(iommu,
+ phys_to_virt(context_new_phys),
+ VTD_PAGE_SIZE);
+
+ if (ret)
+ return ret;
+
+ set_root_value(re, context_new_phys);
+ }
+
+ return 0;
+}
+/*
+ * Interface to the "copy translation tables" set of functions
+ * from mainline code.
+ */
+static int intel_iommu_load_translation_tables(struct dmar_drhd_unit *drhd,
+ int g_num_of_iommus)
+{
+ struct intel_iommu *iommu; /* Virt(iommu hardware registers) */
+ unsigned long long q; /* quadword scratch */
+ int ret = 0; /* Integer return code */
+ int i = 0; /* Loop index */
+ unsigned long flags;
+
+ /* Structure so copy_page_addr() can accumulate things
+ * over multiple calls and returns
+ */
+ struct copy_page_addr_parms ppa_parms = copy_page_addr_parms_init;
+ struct copy_page_addr_parms *ppap = &ppa_parms;
+
+
+ iommu = drhd->iommu;
+ q = dmar_readq(iommu->reg + DMAR_RTADDR_REG);
+ if (!q)
+ return -1;
+
+ /* If (list needs initializing) do it here */
+ if (!domain_values_list) {
+ domain_values_list =
+ kcalloc(g_num_of_iommus, sizeof(struct list_head),
+ GFP_KERNEL);
+
+ if (!domain_values_list) {
+ pr_err("Allocation failed for domain_values_list array\n");
+ return -ENOMEM;
+ }
+ for (i = 0; i < g_num_of_iommus; i++)
+ INIT_LIST_HEAD(&domain_values_list[i]);
+ }
+
+ spin_lock_irqsave(&iommu->lock, flags);
+
+ /* Load the root-entry table from the old kernel
+ * foreach context_entry_table in root_entry
+ * foreach context_entry in context_entry_table
+ * foreach level-1 page_table_entry in context_entry
+ * foreach level-2 page_table_entry in level 1 page_table_entry
+ * Above pattern continues up to 6 levels of page tables
+ * Sanity-check the entry
+ * Process the bus, devfn, page_address, page_size
+ */
+ if (!iommu->root_entry) {
+ iommu->root_entry =
+ (struct root_entry *)alloc_pgtable_page(iommu->node);
+ if (!iommu->root_entry) {
+ spin_unlock_irqrestore(&iommu->lock, flags);
+ return -ENOMEM;
+ }
+ }
+
+ iommu->root_entry_old_phys = q & VTD_PAGE_MASK;
+ if (!iommu->root_entry_old_phys) {
+ pr_err("Could not read old root entry address.");
+ return -1;
+ }
+
+ iommu->root_entry_old_virt = ioremap_cache(iommu->root_entry_old_phys,
+ VTD_PAGE_SIZE);
+ if (!iommu->root_entry_old_virt) {
+ pr_err("Could not map the old root entry.");
+ return -ENOMEM;
+ }
+
+ __iommu_load_old_root_entry(iommu);
+ ret = copy_root_entry_table(iommu, ppap);
+ __iommu_flush_cache(iommu, iommu->root_entry, PAGE_SIZE);
+ __iommu_update_old_root_entry(iommu, -1);
+
+ spin_unlock_irqrestore(&iommu->lock, flags);
+
+ __iommu_free_mapped_mem();
+
+ if (ret)
+ return ret;
+
+ ppa_parms.last = 1;
+ copy_page_addr(0, 0, 0, 0, 0, NULL, ppap);
+
+ return 0;
+}
+
#endif /* CONFIG_CRASH_DUMP */
--
2.0.0-rc0
^ permalink raw reply related [flat|nested] 18+ messages in thread* [PATCH v7 07/10] iommu/vt-d: enable kdump support in iommu module
2015-01-07 4:04 [PATCH v7 0/10] iommu/vt-d: Fix intel vt-d faults in kdump kernel Li, Zhen-Hua
` (5 preceding siblings ...)
2015-01-07 4:04 ` [PATCH v7 06/10] iommu/vt-d: datatypes and functions used for kdump Li, Zhen-Hua
@ 2015-01-07 4:04 ` Li, Zhen-Hua
2015-01-07 4:04 ` [PATCH v7 08/10] iommu/vt-d: assign new page table for dma_map Li, Zhen-Hua
` (3 subsequent siblings)
10 siblings, 0 replies; 18+ messages in thread
From: Li, Zhen-Hua @ 2015-01-07 4:04 UTC (permalink / raw)
To: dwmw2, indou.takao, bhe, joro, vgoyal, dyoung
Cc: iommu, linux-kernel, linux-pci, kexec, alex.williamson, ddutile,
ishii.hironobu, bhelgaas, doug.hatch, jerry.hoemann, tom.vaden,
li.zhang6, lisa.mitchell, billsumnerlinux, zhen-hual, rwright
Modify the operation of the following functions when called during crash dump:
device_to_domain_id
get_domain_for_dev
init_dmars
intel_iommu_init
Bill Sumner:
Original version.
Zhenhua:
Minor change,
The name of new calling functions.
Do not disable and re-enable TE in kdump kernel.
Signed-off-by: Bill Sumner <billsumnerlinux@gmail.com>
Signed-off-by: Li, Zhen-Hua <zhen-hual@hp.com>
---
drivers/iommu/intel-iommu.c | 135 +++++++++++++++++++++++++++++++++++++++-----
1 file changed, 120 insertions(+), 15 deletions(-)
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index d2c19a0..8807710 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -907,6 +907,11 @@ static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
set_root_value(root, phy_addr);
set_root_present(root);
__iommu_flush_cache(iommu, root, sizeof(*root));
+
+#ifdef CONFIG_CRASH_DUMP
+ if (is_kdump_kernel())
+ __iommu_update_old_root_entry(iommu, bus);
+#endif
}
spin_unlock_irqrestore(&iommu->lock, flags);
return &context[devfn];
@@ -958,7 +963,8 @@ static void free_context_table(struct intel_iommu *iommu)
spin_lock_irqsave(&iommu->lock, flags);
if (!iommu->root_entry) {
- goto out;
+ spin_unlock_irqrestore(&iommu->lock, flags);
+ return;
}
for (i = 0; i < ROOT_ENTRY_NR; i++) {
root = &iommu->root_entry[i];
@@ -966,10 +972,23 @@ static void free_context_table(struct intel_iommu *iommu)
if (context)
free_pgtable_page(context);
}
+
+#ifdef CONFIG_CRASH_DUMP
+ if (is_kdump_kernel()) {
+ iommu->root_entry_old_phys = 0;
+ root = iommu->root_entry_old_virt;
+ iommu->root_entry_old_virt = NULL;
+ }
+#endif
free_pgtable_page(iommu->root_entry);
iommu->root_entry = NULL;
-out:
+
spin_unlock_irqrestore(&iommu->lock, flags);
+
+#ifdef CONFIG_CRASH_DUMP
+ if (is_kdump_kernel())
+ iounmap(root);
+#endif
}
static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
@@ -2381,6 +2400,9 @@ static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
unsigned long flags;
u8 bus, devfn;
int did = -1; /* Default to "no domain_id supplied" */
+#ifdef CONFIG_CRASH_DUMP
+ struct domain_values_entry *dve = NULL;
+#endif /* CONFIG_CRASH_DUMP */
domain = find_domain(dev);
if (domain)
@@ -2414,6 +2436,24 @@ static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
domain = alloc_domain(0);
if (!domain)
return NULL;
+
+#ifdef CONFIG_CRASH_DUMP
+ if (is_kdump_kernel()) {
+ /*
+ * if this device had a did in the old kernel
+ * use its values instead of generating new ones
+ */
+ did = device_to_domain_id(iommu, bus, devfn);
+ if (did > 0 || (did == 0 && !cap_caching_mode(iommu->cap)))
+ dve = intel_iommu_did_to_domain_values_entry(did,
+ iommu);
+ if (dve)
+ gaw = dve->gaw;
+ else
+ did = -1;
+ }
+#endif /* CONFIG_CRASH_DUMP */
+
domain->id = iommu_attach_domain(domain, iommu, did);
if (domain->id < 0) {
free_domain_mem(domain);
@@ -2425,6 +2465,18 @@ static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
return NULL;
}
+#ifdef CONFIG_CRASH_DUMP
+ if (is_kdump_kernel() && dve) {
+
+ if (domain->pgd)
+ free_pgtable_page(domain->pgd);
+
+ domain->pgd = dve->pgd;
+
+ copy_reserved_iova(&dve->iovad, &domain->iovad);
+ }
+#endif /* CONFIG_CRASH_DUMP */
+
/* register PCI DMA alias device */
if (dev_is_pci(dev)) {
tmp = dmar_insert_dev_info(iommu, PCI_BUS_NUM(dma_alias),
@@ -2948,14 +3000,35 @@ static int __init init_dmars(void)
if (ret)
goto free_iommu;
- /*
- * TBD:
- * we could share the same root & context tables
- * among all IOMMU's. Need to Split it later.
- */
- ret = iommu_alloc_root_entry(iommu);
- if (ret)
- goto free_iommu;
+#ifdef CONFIG_CRASH_DUMP
+ if (is_kdump_kernel()) {
+ pr_info("IOMMU Copying translate tables from panicked kernel\n");
+ ret = intel_iommu_load_translation_tables(drhd,
+ g_num_of_iommus);
+ if (ret) {
+ pr_err("IOMMU: Copy translate tables failed\n");
+
+ /* Best to stop trying */
+ goto free_iommu;
+ }
+ pr_info("IOMMU: root_cache:0x%12.12llx phys:0x%12.12llx\n",
+ (u64)iommu->root_entry,
+ (u64)iommu->root_entry_old_phys);
+ intel_iommu_get_dids_from_old_kernel(iommu);
+ } else {
+#endif /* CONFIG_CRASH_DUMP */
+ /*
+ * TBD:
+ * we could share the same root & context tables
+ * among all IOMMU's. Need to Split it later.
+ */
+ ret = iommu_alloc_root_entry(iommu);
+ if (ret)
+ goto free_iommu;
+#ifdef CONFIG_CRASH_DUMP
+ }
+#endif
+
if (!ecap_pass_through(iommu->ecap))
hw_pass_through = 0;
}
@@ -2972,6 +3045,16 @@ static int __init init_dmars(void)
check_tylersburg_isoch();
+#ifdef CONFIG_CRASH_DUMP
+ /*
+ * In the crashdump kernel: Skip setting-up new domains for
+ * si, rmrr, and the isa bus on the expectation that these
+ * translations were copied from the old kernel.
+ */
+ if (is_kdump_kernel())
+ goto skip_new_domains_for_si_rmrr_isa;
+#endif /* CONFIG_CRASH_DUMP */
+
/*
* If pass through is not set or not enabled, setup context entries for
* identity mappings for rmrr, gfx, and isa and may fall back to static
@@ -3012,6 +3095,10 @@ static int __init init_dmars(void)
iommu_prepare_isa();
+#ifdef CONFIG_CRASH_DUMP
+skip_new_domains_for_si_rmrr_isa:;
+#endif /* CONFIG_CRASH_DUMP */
+
/*
* for each drhd
* enable fault log
@@ -3040,7 +3127,15 @@ static int __init init_dmars(void)
iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
- iommu_enable_translation(iommu);
+
+#ifdef CONFIG_CRASH_DUMP
+ if (is_kdump_kernel()) {
+ if (!(iommu->gcmd & DMA_GCMD_TE))
+ iommu_enable_translation(iommu);
+ } else
+#endif
+ iommu_enable_translation(iommu);
+
iommu_disable_protect_mem_regions(iommu);
}
@@ -4351,12 +4446,22 @@ int __init intel_iommu_init(void)
goto out_free_dmar;
}
+#ifdef CONFIG_CRASH_DUMP
/*
- * Disable translation if already enabled prior to OS handover.
+ * If (This is the crash kernel)
+ * Set: copy iommu translate tables from old kernel
+ * Skip disabling the iommu hardware translations
*/
- for_each_active_iommu(iommu, drhd)
- if (iommu->gcmd & DMA_GCMD_TE)
- iommu_disable_translation(iommu);
+ if (is_kdump_kernel()) {
+ pr_info("IOMMU Skip disabling iommu hardware translations\n");
+ } else
+#endif /* CONFIG_CRASH_DUMP */
+ /*
+ * Disable translation if already enabled prior to OS handover.
+ */
+ for_each_active_iommu(iommu, drhd)
+ if (iommu->gcmd & DMA_GCMD_TE)
+ iommu_disable_translation(iommu);
if (dmar_dev_scope_init() < 0) {
if (force_on)
--
2.0.0-rc0
^ permalink raw reply related [flat|nested] 18+ messages in thread* [PATCH v7 08/10] iommu/vt-d: assign new page table for dma_map
2015-01-07 4:04 [PATCH v7 0/10] iommu/vt-d: Fix intel vt-d faults in kdump kernel Li, Zhen-Hua
` (6 preceding siblings ...)
2015-01-07 4:04 ` [PATCH v7 07/10] iommu/vt-d: enable kdump support in iommu module Li, Zhen-Hua
@ 2015-01-07 4:04 ` Li, Zhen-Hua
2015-01-07 4:04 ` [PATCH v7 09/10] iommu/vt-d: Copy functions for irte Li, Zhen-Hua
` (2 subsequent siblings)
10 siblings, 0 replies; 18+ messages in thread
From: Li, Zhen-Hua @ 2015-01-07 4:04 UTC (permalink / raw)
To: dwmw2, indou.takao, bhe, joro, vgoyal, dyoung
Cc: iommu, linux-kernel, linux-pci, kexec, alex.williamson, ddutile,
ishii.hironobu, bhelgaas, doug.hatch, jerry.hoemann, tom.vaden,
li.zhang6, lisa.mitchell, billsumnerlinux, zhen-hual, rwright
When a device driver issues the first dma_map command for a
device, we assign a new and empty page-table, thus removing all
mappings from the old kernel for the device.
Signed-off-by: Li, Zhen-Hua <zhen-hual@hp.com>
---
drivers/iommu/intel-iommu.c | 56 ++++++++++++++++++++++++++++++++++++++-------
1 file changed, 48 insertions(+), 8 deletions(-)
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 8807710..57ae08b 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -44,6 +44,7 @@
#include <asm/irq_remapping.h>
#include <asm/cacheflush.h>
#include <asm/iommu.h>
+#include <linux/dma-mapping.h>
#include "irq_remapping.h"
@@ -455,6 +456,8 @@ static int copy_root_entry_table(struct intel_iommu *iommu, void *ppap);
static int intel_iommu_load_translation_tables(struct dmar_drhd_unit *drhd,
int g_num_of_iommus);
+static void unmap_device_dma(struct dmar_domain *domain, struct device *dev);
+
#endif /* CONFIG_CRASH_DUMP */
/*
@@ -3196,14 +3199,30 @@ static struct dmar_domain *__get_valid_domain_for_dev(struct device *dev)
return NULL;
}
- /* make sure context mapping is ok */
- if (unlikely(!domain_context_mapped(dev))) {
- ret = domain_context_mapping(domain, dev, CONTEXT_TT_MULTI_LEVEL);
- if (ret) {
- printk(KERN_ERR "Domain context map for %s failed",
- dev_name(dev));
- return NULL;
- }
+ /* if in kdump kernel, we need to unmap the mapped dma pages,
+ * detach this device first.
+ */
+ if (likely(domain_context_mapped(dev))) {
+#ifdef CONFIG_CRASH_DUMP
+ if (is_kdump_kernel()) {
+ unmap_device_dma(domain, dev);
+ domain = get_domain_for_dev(dev,
+ DEFAULT_DOMAIN_ADDRESS_WIDTH);
+ if (!domain) {
+ pr_err("Allocating domain for %s failed",
+ dev_name(dev));
+ return NULL;
+ }
+ } else
+#endif
+ return domain;
+ }
+
+ ret = domain_context_mapping(domain, dev, CONTEXT_TT_MULTI_LEVEL);
+ if (ret) {
+ pr_err("Domain context map for %s failed",
+ dev_name(dev));
+ return NULL;
}
return domain;
@@ -5691,4 +5710,25 @@ static int intel_iommu_load_translation_tables(struct dmar_drhd_unit *drhd,
return 0;
}
+static void unmap_device_dma(struct dmar_domain *domain, struct device *dev)
+{
+ struct intel_iommu *iommu;
+ struct context_entry *ce;
+ struct iova *iova;
+ u8 bus, devfn;
+ phys_addr_t phys_addr;
+ dma_addr_t dev_addr;
+
+ iommu = device_to_iommu(dev, &bus, &devfn);
+ ce = device_to_context_entry(iommu, bus, devfn);
+ phys_addr = context_address_root(ce) << VTD_PAGE_SHIFT;
+ dev_addr = phys_to_dma(dev, phys_addr);
+
+ iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
+ if (iova)
+ intel_unmap(dev, dev_addr);
+
+ domain_remove_one_dev_info(domain, dev);
+}
+
#endif /* CONFIG_CRASH_DUMP */
--
2.0.0-rc0
^ permalink raw reply related [flat|nested] 18+ messages in thread* [PATCH v7 09/10] iommu/vt-d: Copy functions for irte
2015-01-07 4:04 [PATCH v7 0/10] iommu/vt-d: Fix intel vt-d faults in kdump kernel Li, Zhen-Hua
` (7 preceding siblings ...)
2015-01-07 4:04 ` [PATCH v7 08/10] iommu/vt-d: assign new page table for dma_map Li, Zhen-Hua
@ 2015-01-07 4:04 ` Li, Zhen-Hua
2015-01-07 4:04 ` [PATCH v7 10/10] iommu/vt-d: Use old irte in kdump kernel Li, Zhen-Hua
2015-01-07 4:11 ` [PATCH v7 0/10] iommu/vt-d: Fix intel vt-d faults " Li, ZhenHua
10 siblings, 0 replies; 18+ messages in thread
From: Li, Zhen-Hua @ 2015-01-07 4:04 UTC (permalink / raw)
To: dwmw2, indou.takao, bhe, joro, vgoyal, dyoung
Cc: iommu, linux-kernel, linux-pci, kexec, alex.williamson, ddutile,
ishii.hironobu, bhelgaas, doug.hatch, jerry.hoemann, tom.vaden,
li.zhang6, lisa.mitchell, billsumnerlinux, zhen-hual, rwright
Functions to copy the irte data from the old kernel into the kdump kernel.
Signed-off-by: Li, Zhen-Hua <zhen-hual@hp.com>
---
drivers/iommu/intel_irq_remapping.c | 62 +++++++++++++++++++++++++++++++++++++
include/linux/intel-iommu.h | 4 +++
2 files changed, 66 insertions(+)
diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c
index a55b207..d37fd62 100644
--- a/drivers/iommu/intel_irq_remapping.c
+++ b/drivers/iommu/intel_irq_remapping.c
@@ -8,6 +8,7 @@
#include <linux/irq.h>
#include <linux/intel-iommu.h>
#include <linux/acpi.h>
+#include <linux/crash_dump.h>
#include <asm/io_apic.h>
#include <asm/smp.h>
#include <asm/cpu.h>
@@ -17,6 +18,11 @@
#include "irq_remapping.h"
+#ifdef CONFIG_CRASH_DUMP
+static int __iommu_load_old_irte(struct intel_iommu *iommu);
+static int __iommu_update_old_irte(struct intel_iommu *iommu, int index);
+#endif /* CONFIG_CRASH_DUMP */
+
struct ioapic_scope {
struct intel_iommu *iommu;
unsigned int id;
@@ -1296,3 +1302,59 @@ int dmar_ir_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
return ret;
}
+
+#ifdef CONFIG_CRASH_DUMP
+
+static int __iommu_load_old_irte(struct intel_iommu *iommu)
+{
+ if ((!iommu)
+ || (!iommu->ir_table)
+ || (!iommu->ir_table->base)
+ || (!iommu->ir_table->base_old_phys)
+ || (!iommu->ir_table->base_old_virt))
+ return -1;
+
+ memcpy(iommu->ir_table->base,
+ iommu->ir_table->base_old_virt,
+ INTR_REMAP_TABLE_ENTRIES*sizeof(struct irte));
+
+ __iommu_flush_cache(iommu, iommu->ir_table->base,
+ INTR_REMAP_TABLE_ENTRIES*sizeof(struct irte));
+
+ return 0;
+}
+
+static int __iommu_update_old_irte(struct intel_iommu *iommu, int index)
+{
+ int start;
+ unsigned long size;
+ void __iomem *to;
+ void *from;
+
+ if ((!iommu)
+ || (!iommu->ir_table)
+ || (!iommu->ir_table->base)
+ || (!iommu->ir_table->base_old_phys)
+ || (!iommu->ir_table->base_old_virt))
+ return -1;
+
+ if (index < -1 || index >= INTR_REMAP_TABLE_ENTRIES)
+ return -1;
+
+ if (index == -1) {
+ start = 0;
+ size = INTR_REMAP_TABLE_ENTRIES * sizeof(struct irte);
+ } else {
+ start = index * sizeof(struct irte);
+ size = sizeof(struct irte);
+ }
+
+ to = iommu->ir_table->base_old_virt;
+ from = iommu->ir_table->base;
+ memcpy(to + start, from + start, size);
+
+ __iommu_flush_cache(iommu, to + start, size);
+
+ return 0;
+}
+#endif /* CONFIG_CRASH_DUMP */
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 8e29b97..76c6ea5 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -290,6 +290,10 @@ struct q_inval {
struct ir_table {
struct irte *base;
unsigned long *bitmap;
+#ifdef CONFIG_CRASH_DUMP
+ void __iomem *base_old_virt;
+ unsigned long base_old_phys;
+#endif
};
#endif
--
2.0.0-rc0
^ permalink raw reply related [flat|nested] 18+ messages in thread* [PATCH v7 10/10] iommu/vt-d: Use old irte in kdump kernel
2015-01-07 4:04 [PATCH v7 0/10] iommu/vt-d: Fix intel vt-d faults in kdump kernel Li, Zhen-Hua
` (8 preceding siblings ...)
2015-01-07 4:04 ` [PATCH v7 09/10] iommu/vt-d: Copy functions for irte Li, Zhen-Hua
@ 2015-01-07 4:04 ` Li, Zhen-Hua
2015-01-07 4:11 ` [PATCH v7 0/10] iommu/vt-d: Fix intel vt-d faults " Li, ZhenHua
10 siblings, 0 replies; 18+ messages in thread
From: Li, Zhen-Hua @ 2015-01-07 4:04 UTC (permalink / raw)
To: dwmw2, indou.takao, bhe, joro, vgoyal, dyoung
Cc: iommu, linux-kernel, linux-pci, kexec, alex.williamson, ddutile,
ishii.hironobu, bhelgaas, doug.hatch, jerry.hoemann, tom.vaden,
li.zhang6, lisa.mitchell, billsumnerlinux, zhen-hual, rwright
Fix the intr-remapping fault.
[1.594890] dmar: DRHD: handling fault status reg 2
[1.594894] dmar: INTR-REMAP: Request device [[41:00.0] fault index 4d
[1.594894] INTR-REMAP:[fault reason 34] Present field in the IRTE entry
is clear
Use old irte in kdump kernel, do not disable and re-enable interrupt
remapping.
Signed-off-by: Li, Zhen-Hua <zhen-hual@hp.com>
---
drivers/iommu/intel_irq_remapping.c | 42 ++++++++++++++++++++++++++++++++-----
1 file changed, 37 insertions(+), 5 deletions(-)
diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c
index d37fd62..58356cb 100644
--- a/drivers/iommu/intel_irq_remapping.c
+++ b/drivers/iommu/intel_irq_remapping.c
@@ -198,6 +198,11 @@ static int modify_irte(int irq, struct irte *irte_modified)
set_64bit(&irte->low, irte_modified->low);
set_64bit(&irte->high, irte_modified->high);
+
+#ifdef CONFIG_CRASH_DUMP
+ if (is_kdump_kernel())
+ __iommu_update_old_irte(iommu, index);
+#endif
__iommu_flush_cache(iommu, irte, sizeof(*irte));
rc = qi_flush_iec(iommu, index, 0);
@@ -259,6 +264,11 @@ static int clear_entries(struct irq_2_iommu *irq_iommu)
bitmap_release_region(iommu->ir_table->bitmap, index,
irq_iommu->irte_mask);
+#ifdef CONFIG_CRASH_DUMP
+ if (is_kdump_kernel())
+ __iommu_update_old_irte(iommu, -1);
+#endif
+
return qi_flush_iec(iommu, index, irq_iommu->irte_mask);
}
@@ -640,11 +650,20 @@ static int __init intel_enable_irq_remapping(void)
*/
dmar_fault(-1, iommu);
- /*
- * Disable intr remapping and queued invalidation, if already
- * enabled prior to OS handover.
- */
- iommu_disable_irq_remapping(iommu);
+#ifdef CONFIG_CRASH_DUMP
+ if (is_kdump_kernel()) {
+ /* Do notdisable irq and then re-enable again. */
+ } else {
+#endif
+ /*
+ * Disable intr remapping and queued invalidation,
+ * if already enabled prior to OS handover.
+ */
+ iommu_disable_irq_remapping(iommu);
+
+#ifdef CONFIG_CRASH_DUMP
+ }
+#endif
dmar_disable_qi(iommu);
}
@@ -687,7 +706,20 @@ static int __init intel_enable_irq_remapping(void)
if (intel_setup_irq_remapping(iommu))
goto error;
+#ifdef CONFIG_CRASH_DUMP
+ if (is_kdump_kernel()) {
+ unsigned long long q;
+
+ q = dmar_readq(iommu->reg + DMAR_IRTA_REG);
+ iommu->ir_table->base_old_phys = q & VTD_PAGE_MASK;
+ iommu->ir_table->base_old_virt = ioremap_cache(
+ iommu->ir_table->base_old_phys,
+ INTR_REMAP_TABLE_ENTRIES*sizeof(struct irte));
+ __iommu_load_old_irte(iommu);
+ } else
+#endif
iommu_set_irq_remapping(iommu, eim);
+
setup = 1;
}
--
2.0.0-rc0
^ permalink raw reply related [flat|nested] 18+ messages in thread* Re: [PATCH v7 0/10] iommu/vt-d: Fix intel vt-d faults in kdump kernel
2015-01-07 4:04 [PATCH v7 0/10] iommu/vt-d: Fix intel vt-d faults in kdump kernel Li, Zhen-Hua
` (9 preceding siblings ...)
2015-01-07 4:04 ` [PATCH v7 10/10] iommu/vt-d: Use old irte in kdump kernel Li, Zhen-Hua
@ 2015-01-07 4:11 ` Li, ZhenHua
2015-01-07 5:02 ` Baoquan He
10 siblings, 1 reply; 18+ messages in thread
From: Li, ZhenHua @ 2015-01-07 4:11 UTC (permalink / raw)
To: Li, Zhen-Hua
Cc: dwmw2, indou.takao, bhe, joro, vgoyal, dyoung, iommu,
linux-kernel, linux-pci, kexec, alex.williamson, ddutile,
ishii.hironobu, bhelgaas, doug.hatch, jerry.hoemann, tom.vaden,
li.zhang6, lisa.mitchell, billsumnerlinux, rwright
Many thanks to Takao Indoh and Baoquan He, for your testing on more
different systems.
The calling of flush functions are added to this version.
The usage of __iommu_flush_cache function :
1. Fixes a dump on Takao's system.
2. Reduces the count of faults on Baoquan's system.
Regards
Zhenhua
On 01/07/2015 12:04 PM, Li, Zhen-Hua wrote:
> This patchset is an update of Bill Sumner's patchset, implements a fix for:
> If a kernel boots with intel_iommu=on on a system that supports intel vt-d,
> when a panic happens, the kdump kernel will boot with these faults:
>
> dmar: DRHD: handling fault status reg 102
> dmar: DMAR:[DMA Read] Request device [01:00.0] fault addr fff80000
> DMAR:[fault reason 01] Present bit in root entry is clear
>
> dmar: DRHD: handling fault status reg 2
> dmar: INTR-REMAP: Request device [[61:00.0] fault index 42
> INTR-REMAP:[fault reason 34] Present field in the IRTE entry is clear
>
> On some system, the interrupt remapping fault will also happen even if the
> intel_iommu is not set to on, because the interrupt remapping will be enabled
> when x2apic is needed by the system.
>
> The cause of the DMA fault is described in Bill's original version, and the
> INTR-Remap fault is caused by a similar reason. In short, the initialization
> of vt-d drivers causes the in-flight DMA and interrupt requests get wrong
> response.
>
> To fix this problem, we modifies the behaviors of the intel vt-d in the
> crashdump kernel:
>
> For DMA Remapping:
> 1. To accept the vt-d hardware in an active state,
> 2. Do not disable and re-enable the translation, keep it enabled.
> 3. Use the old root entry table, do not rewrite the RTA register.
> 4. Malloc and use new context entry table and page table, copy data from the
> old ones that used by the old kernel.
> 5. to use different portions of the iova address ranges for the device drivers
> in the crashdump kernel than the iova ranges that were in-use at the time
> of the panic.
> 6. After device driver is loaded, when it issues the first dma_map command,
> free the dmar_domain structure for this device, and generate a new one, so
> that the device can be assigned a new and empty page table.
> 7. When a new context entry table is generated, we also save its address to
> the old root entry table.
>
> For Interrupt Remapping:
> 1. To accept the vt-d hardware in an active state,
> 2. Do not disable and re-enable the interrupt remapping, keep it enabled.
> 3. Use the old interrupt remapping table, do not rewrite the IRTA register.
> 4. When ioapic entry is setup, the interrupt remapping table is changed, and
> the updated data will be stored to the old interrupt remapping table.
>
> Advantages of this approach:
> 1. All manipulation of the IO-device is done by the Linux device-driver
> for that device.
> 2. This approach behaves in a manner very similar to operation without an
> active iommu.
> 3. Any activity between the IO-device and its RMRR areas is handled by the
> device-driver in the same manner as during a non-kdump boot.
> 4. If an IO-device has no driver in the kdump kernel, it is simply left alone.
> This supports the practice of creating a special kdump kernel without
> drivers for any devices that are not required for taking a crashdump.
> 5. Minimal code-changes among the existing mainline intel vt-d code.
>
> Summary of changes in this patch set:
> 1. Added some useful function for root entry table in code intel-iommu.c
> 2. Added new members to struct root_entry and struct irte;
> 3. Functions to load old root entry table to iommu->root_entry from the memory
> of old kernel.
> 4. Functions to malloc new context entry table and page table and copy the data
> from the old ones to the malloced new ones.
> 5. Functions to enable support for DMA remapping in kdump kernel.
> 6. Functions to load old irte data from the old kernel to the kdump kernel.
> 7. Some code changes that support other behaviours that have been listed.
> 8. In the new functions, use physical address as "unsigned long" type, not
> pointers.
>
> Original version by Bill Sumner:
> https://lkml.org/lkml/2014/1/10/518
> https://lkml.org/lkml/2014/4/15/716
> https://lkml.org/lkml/2014/4/24/836
>
> Zhenhua's updates:
> https://lkml.org/lkml/2014/10/21/134
> https://lkml.org/lkml/2014/12/15/121
> https://lkml.org/lkml/2014/12/22/53
>
> Changelog[v7]:
> 1. Use __iommu_flush_cache to flush the data to hardware.
>
> Changelog[v6]:
> 1. Use "unsigned long" as type of physical address.
> 2. Use new function unmap_device_dma to unmap the old dma.
> 3. Some small incorrect bits order for aw shift.
>
> Changelog[v5]:
> 1. Do not disable and re-enable traslation and interrupt remapping.
> 2. Use old root entry table.
> 3. Use old interrupt remapping table.
> 4. New functions to copy data from old kernel, and save to old kernel mem.
> 5. New functions to save updated root entry table and irte table.
> 6. Use intel_unmap to unmap the old dma;
> 7. Allocate new pages while driver is being loaded.
>
> Changelog[v4]:
> 1. Cut off the patches that move some defines and functions to new files.
> 2. Reduce the numbers of patches to five, make it more easier to read.
> 3. Changed the name of functions, make them consistent with current context
> get/set functions.
> 4. Add change to function __iommu_attach_domain.
>
> Changelog[v3]:
> 1. Commented-out "#define DEBUG 1" to eliminate debug messages.
> 2. Updated the comments about changes in each version.
> 3. Fixed: one-line added to Copy-Translations patch to initialize the iovad
> struct as recommended by Baoquan He [bhe@redhat.com]
> init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
>
> Changelog[v2]:
> The following series implements a fix for:
> A kdump problem about DMA that has been discussed for a long time. That is,
> when a kernel panics and boots into the kdump kernel, DMA started by the
> panicked kernel is not stopped before the kdump kernel is booted and the
> kdump kernel disables the IOMMU while this DMA continues. This causes the
> IOMMU to stop translating the DMA addresses as IOVAs and begin to treat
> them as physical memory addresses -- which causes the DMA to either:
> (1) generate DMAR errors or
> (2) generate PCI SERR errors or
> (3) transfer data to or from incorrect areas of memory. Often this
> causes the dump to fail.
>
> Changelog[v1]:
> The original version.
>
> Changed in this version:
> 1. Do not disable and re-enable traslation and interrupt remapping.
> 2. Use old root entry table.
> 3. Use old interrupt remapping table.
> 4. Use "unsigned long" as physical address.
> 5. Use intel_unmap to unmap the old dma;
>
> Baoquan He <bhe@redhat.com> helps testing this patchset.
>
> iommu/vt-d: Update iommu_attach_domain() and its callers
> iommu/vt-d: Items required for kdump
> iommu/vt-d: Add domain-id functions
> iommu/vt-d: functions to copy data from old mem
> iommu/vt-d: Add functions to load and save old re
> iommu/vt-d: datatypes and functions used for kdump
> iommu/vt-d: enable kdump support in iommu module
> iommu/vt-d: assign new page table for dma_map
> iommu/vt-d: Copy functions for irte
> iommu/vt-d: Use old irte in kdump kernel
>
> Signed-off-by: Bill Sumner <billsumnerlinux@gmail.com>
> Signed-off-by: Li, Zhen-Hua <zhen-hual@hp.com>
> Signed-off-by: Takao Indoh <indou.takao@jp.fujitsu.com>
> Tested-by: Baoquan He <bhe@redhat.com>
> ---
> drivers/iommu/intel-iommu.c | 1050 +++++++++++++++++++++++++++++++++--
> drivers/iommu/intel_irq_remapping.c | 104 +++-
> include/linux/intel-iommu.h | 18 +
> 3 files changed, 1130 insertions(+), 42 deletions(-)
>
^ permalink raw reply [flat|nested] 18+ messages in thread* Re: [PATCH v7 0/10] iommu/vt-d: Fix intel vt-d faults in kdump kernel
2015-01-07 4:11 ` [PATCH v7 0/10] iommu/vt-d: Fix intel vt-d faults " Li, ZhenHua
@ 2015-01-07 5:02 ` Baoquan He
2015-01-07 5:25 ` Li, ZhenHua
0 siblings, 1 reply; 18+ messages in thread
From: Baoquan He @ 2015-01-07 5:02 UTC (permalink / raw)
To: Li, ZhenHua
Cc: dwmw2, indou.takao, joro, vgoyal, dyoung, iommu, linux-kernel,
linux-pci, kexec, alex.williamson, ddutile, ishii.hironobu,
bhelgaas, doug.hatch, jerry.hoemann, tom.vaden, li.zhang6,
lisa.mitchell, billsumnerlinux, rwright
On 01/07/15 at 12:11pm, Li, ZhenHua wrote:
> Many thanks to Takao Indoh and Baoquan He, for your testing on more
> different systems.
>
> The calling of flush functions are added to this version.
>
> The usage of __iommu_flush_cache function :
> 1. Fixes a dump on Takao's system.
> 2. Reduces the count of faults on Baoquan's system.
I am testing the version you sent to me yesterday afternoon. Is that
different with this patchset? I found your patchset man reserve a big
contiguous memory region under 896M, this will cause the crashkernel
reservation failed when I set crashkernel=320M. The reason I increase
the crashkerenl reservation to 320M is 256M is not enough and cause OOM
when that patchset is tested.
I am checking what happened.
Thanks
Baoquan
>
> Regards
> Zhenhua
>
> On 01/07/2015 12:04 PM, Li, Zhen-Hua wrote:
> >This patchset is an update of Bill Sumner's patchset, implements a fix for:
> >If a kernel boots with intel_iommu=on on a system that supports intel vt-d,
> >when a panic happens, the kdump kernel will boot with these faults:
> >
> > dmar: DRHD: handling fault status reg 102
> > dmar: DMAR:[DMA Read] Request device [01:00.0] fault addr fff80000
> > DMAR:[fault reason 01] Present bit in root entry is clear
> >
> > dmar: DRHD: handling fault status reg 2
> > dmar: INTR-REMAP: Request device [[61:00.0] fault index 42
> > INTR-REMAP:[fault reason 34] Present field in the IRTE entry is clear
> >
> >On some system, the interrupt remapping fault will also happen even if the
> >intel_iommu is not set to on, because the interrupt remapping will be enabled
> >when x2apic is needed by the system.
> >
> >The cause of the DMA fault is described in Bill's original version, and the
> >INTR-Remap fault is caused by a similar reason. In short, the initialization
> >of vt-d drivers causes the in-flight DMA and interrupt requests get wrong
> >response.
> >
> >To fix this problem, we modifies the behaviors of the intel vt-d in the
> >crashdump kernel:
> >
> >For DMA Remapping:
> >1. To accept the vt-d hardware in an active state,
> >2. Do not disable and re-enable the translation, keep it enabled.
> >3. Use the old root entry table, do not rewrite the RTA register.
> >4. Malloc and use new context entry table and page table, copy data from the
> > old ones that used by the old kernel.
> >5. to use different portions of the iova address ranges for the device drivers
> > in the crashdump kernel than the iova ranges that were in-use at the time
> > of the panic.
> >6. After device driver is loaded, when it issues the first dma_map command,
> > free the dmar_domain structure for this device, and generate a new one, so
> > that the device can be assigned a new and empty page table.
> >7. When a new context entry table is generated, we also save its address to
> > the old root entry table.
> >
> >For Interrupt Remapping:
> >1. To accept the vt-d hardware in an active state,
> >2. Do not disable and re-enable the interrupt remapping, keep it enabled.
> >3. Use the old interrupt remapping table, do not rewrite the IRTA register.
> >4. When ioapic entry is setup, the interrupt remapping table is changed, and
> > the updated data will be stored to the old interrupt remapping table.
> >
> >Advantages of this approach:
> >1. All manipulation of the IO-device is done by the Linux device-driver
> > for that device.
> >2. This approach behaves in a manner very similar to operation without an
> > active iommu.
> >3. Any activity between the IO-device and its RMRR areas is handled by the
> > device-driver in the same manner as during a non-kdump boot.
> >4. If an IO-device has no driver in the kdump kernel, it is simply left alone.
> > This supports the practice of creating a special kdump kernel without
> > drivers for any devices that are not required for taking a crashdump.
> >5. Minimal code-changes among the existing mainline intel vt-d code.
> >
> >Summary of changes in this patch set:
> >1. Added some useful function for root entry table in code intel-iommu.c
> >2. Added new members to struct root_entry and struct irte;
> >3. Functions to load old root entry table to iommu->root_entry from the memory
> > of old kernel.
> >4. Functions to malloc new context entry table and page table and copy the data
> > from the old ones to the malloced new ones.
> >5. Functions to enable support for DMA remapping in kdump kernel.
> >6. Functions to load old irte data from the old kernel to the kdump kernel.
> >7. Some code changes that support other behaviours that have been listed.
> >8. In the new functions, use physical address as "unsigned long" type, not
> > pointers.
> >
> >Original version by Bill Sumner:
> > https://lkml.org/lkml/2014/1/10/518
> > https://lkml.org/lkml/2014/4/15/716
> > https://lkml.org/lkml/2014/4/24/836
> >
> >Zhenhua's updates:
> > https://lkml.org/lkml/2014/10/21/134
> > https://lkml.org/lkml/2014/12/15/121
> > https://lkml.org/lkml/2014/12/22/53
> >
> >Changelog[v7]:
> > 1. Use __iommu_flush_cache to flush the data to hardware.
> >
> >Changelog[v6]:
> > 1. Use "unsigned long" as type of physical address.
> > 2. Use new function unmap_device_dma to unmap the old dma.
> > 3. Some small incorrect bits order for aw shift.
> >
> >Changelog[v5]:
> > 1. Do not disable and re-enable traslation and interrupt remapping.
> > 2. Use old root entry table.
> > 3. Use old interrupt remapping table.
> > 4. New functions to copy data from old kernel, and save to old kernel mem.
> > 5. New functions to save updated root entry table and irte table.
> > 6. Use intel_unmap to unmap the old dma;
> > 7. Allocate new pages while driver is being loaded.
> >
> >Changelog[v4]:
> > 1. Cut off the patches that move some defines and functions to new files.
> > 2. Reduce the numbers of patches to five, make it more easier to read.
> > 3. Changed the name of functions, make them consistent with current context
> > get/set functions.
> > 4. Add change to function __iommu_attach_domain.
> >
> >Changelog[v3]:
> > 1. Commented-out "#define DEBUG 1" to eliminate debug messages.
> > 2. Updated the comments about changes in each version.
> > 3. Fixed: one-line added to Copy-Translations patch to initialize the iovad
> > struct as recommended by Baoquan He [bhe@redhat.com]
> > init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
> >
> >Changelog[v2]:
> > The following series implements a fix for:
> > A kdump problem about DMA that has been discussed for a long time. That is,
> > when a kernel panics and boots into the kdump kernel, DMA started by the
> > panicked kernel is not stopped before the kdump kernel is booted and the
> > kdump kernel disables the IOMMU while this DMA continues. This causes the
> > IOMMU to stop translating the DMA addresses as IOVAs and begin to treat
> > them as physical memory addresses -- which causes the DMA to either:
> > (1) generate DMAR errors or
> > (2) generate PCI SERR errors or
> > (3) transfer data to or from incorrect areas of memory. Often this
> > causes the dump to fail.
> >
> >Changelog[v1]:
> > The original version.
> >
> >Changed in this version:
> >1. Do not disable and re-enable traslation and interrupt remapping.
> >2. Use old root entry table.
> >3. Use old interrupt remapping table.
> >4. Use "unsigned long" as physical address.
> >5. Use intel_unmap to unmap the old dma;
> >
> >Baoquan He <bhe@redhat.com> helps testing this patchset.
> >
> > iommu/vt-d: Update iommu_attach_domain() and its callers
> > iommu/vt-d: Items required for kdump
> > iommu/vt-d: Add domain-id functions
> > iommu/vt-d: functions to copy data from old mem
> > iommu/vt-d: Add functions to load and save old re
> > iommu/vt-d: datatypes and functions used for kdump
> > iommu/vt-d: enable kdump support in iommu module
> > iommu/vt-d: assign new page table for dma_map
> > iommu/vt-d: Copy functions for irte
> > iommu/vt-d: Use old irte in kdump kernel
> >
> >Signed-off-by: Bill Sumner <billsumnerlinux@gmail.com>
> >Signed-off-by: Li, Zhen-Hua <zhen-hual@hp.com>
> >Signed-off-by: Takao Indoh <indou.takao@jp.fujitsu.com>
> >Tested-by: Baoquan He <bhe@redhat.com>
> >---
> > drivers/iommu/intel-iommu.c | 1050 +++++++++++++++++++++++++++++++++--
> > drivers/iommu/intel_irq_remapping.c | 104 +++-
> > include/linux/intel-iommu.h | 18 +
> > 3 files changed, 1130 insertions(+), 42 deletions(-)
> >
>
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH v7 0/10] iommu/vt-d: Fix intel vt-d faults in kdump kernel
2015-01-07 5:02 ` Baoquan He
@ 2015-01-07 5:25 ` Li, ZhenHua
2015-01-07 8:28 ` Baoquan He
0 siblings, 1 reply; 18+ messages in thread
From: Li, ZhenHua @ 2015-01-07 5:25 UTC (permalink / raw)
To: Baoquan He
Cc: dwmw2, indou.takao, joro, vgoyal, dyoung, iommu, linux-kernel,
linux-pci, kexec, alex.williamson, ddutile, ishii.hironobu,
bhelgaas, doug.hatch, jerry.hoemann, tom.vaden, li.zhang6,
lisa.mitchell, billsumnerlinux, rwright, Li, ZhenHua
It is same as the last one I send to you yesterday.
The continuous memory that needed for data in this patchset:
RE: PAGE_SIZE, 4096 Bytes;
IRTE: 65536 * 16 ; 1M Bytes;
It should use same memory as the old versions of this patchset. The
changes for the last version do not need more memory.
Regards
Zhenhua
On 01/07/2015 01:02 PM, Baoquan He wrote:
> On 01/07/15 at 12:11pm, Li, ZhenHua wrote:
>> Many thanks to Takao Indoh and Baoquan He, for your testing on more
>> different systems.
>>
>> The calling of flush functions are added to this version.
>>
>> The usage of __iommu_flush_cache function :
>> 1. Fixes a dump on Takao's system.
>> 2. Reduces the count of faults on Baoquan's system.
>
> I am testing the version you sent to me yesterday afternoon. Is that
> different with this patchset? I found your patchset man reserve a big
> contiguous memory region under 896M, this will cause the crashkernel
> reservation failed when I set crashkernel=320M. The reason I increase
> the crashkerenl reservation to 320M is 256M is not enough and cause OOM
> when that patchset is tested.
>
> I am checking what happened.
>
>
> Thanks
> Baoquan
>
>>
>> Regards
>> Zhenhua
>>
>> On 01/07/2015 12:04 PM, Li, Zhen-Hua wrote:
>>> This patchset is an update of Bill Sumner's patchset, implements a fix for:
>>> If a kernel boots with intel_iommu=on on a system that supports intel vt-d,
>>> when a panic happens, the kdump kernel will boot with these faults:
>>>
>>> dmar: DRHD: handling fault status reg 102
>>> dmar: DMAR:[DMA Read] Request device [01:00.0] fault addr fff80000
>>> DMAR:[fault reason 01] Present bit in root entry is clear
>>>
>>> dmar: DRHD: handling fault status reg 2
>>> dmar: INTR-REMAP: Request device [[61:00.0] fault index 42
>>> INTR-REMAP:[fault reason 34] Present field in the IRTE entry is clear
>>>
>>> On some system, the interrupt remapping fault will also happen even if the
>>> intel_iommu is not set to on, because the interrupt remapping will be enabled
>>> when x2apic is needed by the system.
>>>
>>> The cause of the DMA fault is described in Bill's original version, and the
>>> INTR-Remap fault is caused by a similar reason. In short, the initialization
>>> of vt-d drivers causes the in-flight DMA and interrupt requests get wrong
>>> response.
>>>
>>> To fix this problem, we modifies the behaviors of the intel vt-d in the
>>> crashdump kernel:
>>>
>>> For DMA Remapping:
>>> 1. To accept the vt-d hardware in an active state,
>>> 2. Do not disable and re-enable the translation, keep it enabled.
>>> 3. Use the old root entry table, do not rewrite the RTA register.
>>> 4. Malloc and use new context entry table and page table, copy data from the
>>> old ones that used by the old kernel.
>>> 5. to use different portions of the iova address ranges for the device drivers
>>> in the crashdump kernel than the iova ranges that were in-use at the time
>>> of the panic.
>>> 6. After device driver is loaded, when it issues the first dma_map command,
>>> free the dmar_domain structure for this device, and generate a new one, so
>>> that the device can be assigned a new and empty page table.
>>> 7. When a new context entry table is generated, we also save its address to
>>> the old root entry table.
>>>
>>> For Interrupt Remapping:
>>> 1. To accept the vt-d hardware in an active state,
>>> 2. Do not disable and re-enable the interrupt remapping, keep it enabled.
>>> 3. Use the old interrupt remapping table, do not rewrite the IRTA register.
>>> 4. When ioapic entry is setup, the interrupt remapping table is changed, and
>>> the updated data will be stored to the old interrupt remapping table.
>>>
>>> Advantages of this approach:
>>> 1. All manipulation of the IO-device is done by the Linux device-driver
>>> for that device.
>>> 2. This approach behaves in a manner very similar to operation without an
>>> active iommu.
>>> 3. Any activity between the IO-device and its RMRR areas is handled by the
>>> device-driver in the same manner as during a non-kdump boot.
>>> 4. If an IO-device has no driver in the kdump kernel, it is simply left alone.
>>> This supports the practice of creating a special kdump kernel without
>>> drivers for any devices that are not required for taking a crashdump.
>>> 5. Minimal code-changes among the existing mainline intel vt-d code.
>>>
>>> Summary of changes in this patch set:
>>> 1. Added some useful function for root entry table in code intel-iommu.c
>>> 2. Added new members to struct root_entry and struct irte;
>>> 3. Functions to load old root entry table to iommu->root_entry from the memory
>>> of old kernel.
>>> 4. Functions to malloc new context entry table and page table and copy the data
>>> from the old ones to the malloced new ones.
>>> 5. Functions to enable support for DMA remapping in kdump kernel.
>>> 6. Functions to load old irte data from the old kernel to the kdump kernel.
>>> 7. Some code changes that support other behaviours that have been listed.
>>> 8. In the new functions, use physical address as "unsigned long" type, not
>>> pointers.
>>>
>>> Original version by Bill Sumner:
>>> https://lkml.org/lkml/2014/1/10/518
>>> https://lkml.org/lkml/2014/4/15/716
>>> https://lkml.org/lkml/2014/4/24/836
>>>
>>> Zhenhua's updates:
>>> https://lkml.org/lkml/2014/10/21/134
>>> https://lkml.org/lkml/2014/12/15/121
>>> https://lkml.org/lkml/2014/12/22/53
>>>
>>> Changelog[v7]:
>>> 1. Use __iommu_flush_cache to flush the data to hardware.
>>>
>>> Changelog[v6]:
>>> 1. Use "unsigned long" as type of physical address.
>>> 2. Use new function unmap_device_dma to unmap the old dma.
>>> 3. Some small incorrect bits order for aw shift.
>>>
>>> Changelog[v5]:
>>> 1. Do not disable and re-enable traslation and interrupt remapping.
>>> 2. Use old root entry table.
>>> 3. Use old interrupt remapping table.
>>> 4. New functions to copy data from old kernel, and save to old kernel mem.
>>> 5. New functions to save updated root entry table and irte table.
>>> 6. Use intel_unmap to unmap the old dma;
>>> 7. Allocate new pages while driver is being loaded.
>>>
>>> Changelog[v4]:
>>> 1. Cut off the patches that move some defines and functions to new files.
>>> 2. Reduce the numbers of patches to five, make it more easier to read.
>>> 3. Changed the name of functions, make them consistent with current context
>>> get/set functions.
>>> 4. Add change to function __iommu_attach_domain.
>>>
>>> Changelog[v3]:
>>> 1. Commented-out "#define DEBUG 1" to eliminate debug messages.
>>> 2. Updated the comments about changes in each version.
>>> 3. Fixed: one-line added to Copy-Translations patch to initialize the iovad
>>> struct as recommended by Baoquan He [bhe@redhat.com]
>>> init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
>>>
>>> Changelog[v2]:
>>> The following series implements a fix for:
>>> A kdump problem about DMA that has been discussed for a long time. That is,
>>> when a kernel panics and boots into the kdump kernel, DMA started by the
>>> panicked kernel is not stopped before the kdump kernel is booted and the
>>> kdump kernel disables the IOMMU while this DMA continues. This causes the
>>> IOMMU to stop translating the DMA addresses as IOVAs and begin to treat
>>> them as physical memory addresses -- which causes the DMA to either:
>>> (1) generate DMAR errors or
>>> (2) generate PCI SERR errors or
>>> (3) transfer data to or from incorrect areas of memory. Often this
>>> causes the dump to fail.
>>>
>>> Changelog[v1]:
>>> The original version.
>>>
>>> Changed in this version:
>>> 1. Do not disable and re-enable traslation and interrupt remapping.
>>> 2. Use old root entry table.
>>> 3. Use old interrupt remapping table.
>>> 4. Use "unsigned long" as physical address.
>>> 5. Use intel_unmap to unmap the old dma;
>>>
>>> Baoquan He <bhe@redhat.com> helps testing this patchset.
>>>
>>> iommu/vt-d: Update iommu_attach_domain() and its callers
>>> iommu/vt-d: Items required for kdump
>>> iommu/vt-d: Add domain-id functions
>>> iommu/vt-d: functions to copy data from old mem
>>> iommu/vt-d: Add functions to load and save old re
>>> iommu/vt-d: datatypes and functions used for kdump
>>> iommu/vt-d: enable kdump support in iommu module
>>> iommu/vt-d: assign new page table for dma_map
>>> iommu/vt-d: Copy functions for irte
>>> iommu/vt-d: Use old irte in kdump kernel
>>>
>>> Signed-off-by: Bill Sumner <billsumnerlinux@gmail.com>
>>> Signed-off-by: Li, Zhen-Hua <zhen-hual@hp.com>
>>> Signed-off-by: Takao Indoh <indou.takao@jp.fujitsu.com>
>>> Tested-by: Baoquan He <bhe@redhat.com>
>>> ---
>>> drivers/iommu/intel-iommu.c | 1050 +++++++++++++++++++++++++++++++++--
>>> drivers/iommu/intel_irq_remapping.c | 104 +++-
>>> include/linux/intel-iommu.h | 18 +
>>> 3 files changed, 1130 insertions(+), 42 deletions(-)
>>>
>>
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH v7 0/10] iommu/vt-d: Fix intel vt-d faults in kdump kernel
2015-01-07 5:25 ` Li, ZhenHua
@ 2015-01-07 8:28 ` Baoquan He
2015-01-07 8:52 ` Li, ZhenHua
0 siblings, 1 reply; 18+ messages in thread
From: Baoquan He @ 2015-01-07 8:28 UTC (permalink / raw)
To: Li, ZhenHua
Cc: dwmw2, indou.takao, joro, vgoyal, dyoung, iommu, linux-kernel,
linux-pci, kexec, alex.williamson, ddutile, ishii.hironobu,
bhelgaas, doug.hatch, jerry.hoemann, tom.vaden, li.zhang6,
lisa.mitchell, billsumnerlinux, rwright
On 01/07/15 at 01:25pm, Li, ZhenHua wrote:
> It is same as the last one I send to you yesterday.
>
> The continuous memory that needed for data in this patchset:
> RE: PAGE_SIZE, 4096 Bytes;
> IRTE: 65536 * 16 ; 1M Bytes;
>
> It should use same memory as the old versions of this patchset. The
> changes for the last version do not need more memory.
Hi Zhenhua,
It was my mistake because I didn't strip the debug info of modules, then
initramfs is bloated very big. Just now I tested the latest version, it
works well and dump is successful. No dmar fault and intr-remap fault
seen any more, good job!
Thanks
Baoquan
>
> Regards
> Zhenhua
>
> On 01/07/2015 01:02 PM, Baoquan He wrote:
> >On 01/07/15 at 12:11pm, Li, ZhenHua wrote:
> >>Many thanks to Takao Indoh and Baoquan He, for your testing on more
> >>different systems.
> >>
> >>The calling of flush functions are added to this version.
> >>
> >>The usage of __iommu_flush_cache function :
> >>1. Fixes a dump on Takao's system.
> >>2. Reduces the count of faults on Baoquan's system.
> >
> >I am testing the version you sent to me yesterday afternoon. Is that
> >different with this patchset? I found your patchset man reserve a big
> >contiguous memory region under 896M, this will cause the crashkernel
> >reservation failed when I set crashkernel=320M. The reason I increase
> >the crashkerenl reservation to 320M is 256M is not enough and cause OOM
> >when that patchset is tested.
> >
> >I am checking what happened.
> >
> >
> >Thanks
> >Baoquan
> >
> >>
> >>Regards
> >>Zhenhua
> >>
> >>On 01/07/2015 12:04 PM, Li, Zhen-Hua wrote:
> >>>This patchset is an update of Bill Sumner's patchset, implements a fix for:
> >>>If a kernel boots with intel_iommu=on on a system that supports intel vt-d,
> >>>when a panic happens, the kdump kernel will boot with these faults:
> >>>
> >>> dmar: DRHD: handling fault status reg 102
> >>> dmar: DMAR:[DMA Read] Request device [01:00.0] fault addr fff80000
> >>> DMAR:[fault reason 01] Present bit in root entry is clear
> >>>
> >>> dmar: DRHD: handling fault status reg 2
> >>> dmar: INTR-REMAP: Request device [[61:00.0] fault index 42
> >>> INTR-REMAP:[fault reason 34] Present field in the IRTE entry is clear
> >>>
> >>>On some system, the interrupt remapping fault will also happen even if the
> >>>intel_iommu is not set to on, because the interrupt remapping will be enabled
> >>>when x2apic is needed by the system.
> >>>
> >>>The cause of the DMA fault is described in Bill's original version, and the
> >>>INTR-Remap fault is caused by a similar reason. In short, the initialization
> >>>of vt-d drivers causes the in-flight DMA and interrupt requests get wrong
> >>>response.
> >>>
> >>>To fix this problem, we modifies the behaviors of the intel vt-d in the
> >>>crashdump kernel:
> >>>
> >>>For DMA Remapping:
> >>>1. To accept the vt-d hardware in an active state,
> >>>2. Do not disable and re-enable the translation, keep it enabled.
> >>>3. Use the old root entry table, do not rewrite the RTA register.
> >>>4. Malloc and use new context entry table and page table, copy data from the
> >>> old ones that used by the old kernel.
> >>>5. to use different portions of the iova address ranges for the device drivers
> >>> in the crashdump kernel than the iova ranges that were in-use at the time
> >>> of the panic.
> >>>6. After device driver is loaded, when it issues the first dma_map command,
> >>> free the dmar_domain structure for this device, and generate a new one, so
> >>> that the device can be assigned a new and empty page table.
> >>>7. When a new context entry table is generated, we also save its address to
> >>> the old root entry table.
> >>>
> >>>For Interrupt Remapping:
> >>>1. To accept the vt-d hardware in an active state,
> >>>2. Do not disable and re-enable the interrupt remapping, keep it enabled.
> >>>3. Use the old interrupt remapping table, do not rewrite the IRTA register.
> >>>4. When ioapic entry is setup, the interrupt remapping table is changed, and
> >>> the updated data will be stored to the old interrupt remapping table.
> >>>
> >>>Advantages of this approach:
> >>>1. All manipulation of the IO-device is done by the Linux device-driver
> >>> for that device.
> >>>2. This approach behaves in a manner very similar to operation without an
> >>> active iommu.
> >>>3. Any activity between the IO-device and its RMRR areas is handled by the
> >>> device-driver in the same manner as during a non-kdump boot.
> >>>4. If an IO-device has no driver in the kdump kernel, it is simply left alone.
> >>> This supports the practice of creating a special kdump kernel without
> >>> drivers for any devices that are not required for taking a crashdump.
> >>>5. Minimal code-changes among the existing mainline intel vt-d code.
> >>>
> >>>Summary of changes in this patch set:
> >>>1. Added some useful function for root entry table in code intel-iommu.c
> >>>2. Added new members to struct root_entry and struct irte;
> >>>3. Functions to load old root entry table to iommu->root_entry from the memory
> >>> of old kernel.
> >>>4. Functions to malloc new context entry table and page table and copy the data
> >>> from the old ones to the malloced new ones.
> >>>5. Functions to enable support for DMA remapping in kdump kernel.
> >>>6. Functions to load old irte data from the old kernel to the kdump kernel.
> >>>7. Some code changes that support other behaviours that have been listed.
> >>>8. In the new functions, use physical address as "unsigned long" type, not
> >>> pointers.
> >>>
> >>>Original version by Bill Sumner:
> >>> https://lkml.org/lkml/2014/1/10/518
> >>> https://lkml.org/lkml/2014/4/15/716
> >>> https://lkml.org/lkml/2014/4/24/836
> >>>
> >>>Zhenhua's updates:
> >>> https://lkml.org/lkml/2014/10/21/134
> >>> https://lkml.org/lkml/2014/12/15/121
> >>> https://lkml.org/lkml/2014/12/22/53
> >>>
> >>>Changelog[v7]:
> >>> 1. Use __iommu_flush_cache to flush the data to hardware.
> >>>
> >>>Changelog[v6]:
> >>> 1. Use "unsigned long" as type of physical address.
> >>> 2. Use new function unmap_device_dma to unmap the old dma.
> >>> 3. Some small incorrect bits order for aw shift.
> >>>
> >>>Changelog[v5]:
> >>> 1. Do not disable and re-enable traslation and interrupt remapping.
> >>> 2. Use old root entry table.
> >>> 3. Use old interrupt remapping table.
> >>> 4. New functions to copy data from old kernel, and save to old kernel mem.
> >>> 5. New functions to save updated root entry table and irte table.
> >>> 6. Use intel_unmap to unmap the old dma;
> >>> 7. Allocate new pages while driver is being loaded.
> >>>
> >>>Changelog[v4]:
> >>> 1. Cut off the patches that move some defines and functions to new files.
> >>> 2. Reduce the numbers of patches to five, make it more easier to read.
> >>> 3. Changed the name of functions, make them consistent with current context
> >>> get/set functions.
> >>> 4. Add change to function __iommu_attach_domain.
> >>>
> >>>Changelog[v3]:
> >>> 1. Commented-out "#define DEBUG 1" to eliminate debug messages.
> >>> 2. Updated the comments about changes in each version.
> >>> 3. Fixed: one-line added to Copy-Translations patch to initialize the iovad
> >>> struct as recommended by Baoquan He [bhe@redhat.com]
> >>> init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
> >>>
> >>>Changelog[v2]:
> >>> The following series implements a fix for:
> >>> A kdump problem about DMA that has been discussed for a long time. That is,
> >>> when a kernel panics and boots into the kdump kernel, DMA started by the
> >>> panicked kernel is not stopped before the kdump kernel is booted and the
> >>> kdump kernel disables the IOMMU while this DMA continues. This causes the
> >>> IOMMU to stop translating the DMA addresses as IOVAs and begin to treat
> >>> them as physical memory addresses -- which causes the DMA to either:
> >>> (1) generate DMAR errors or
> >>> (2) generate PCI SERR errors or
> >>> (3) transfer data to or from incorrect areas of memory. Often this
> >>> causes the dump to fail.
> >>>
> >>>Changelog[v1]:
> >>> The original version.
> >>>
> >>>Changed in this version:
> >>>1. Do not disable and re-enable traslation and interrupt remapping.
> >>>2. Use old root entry table.
> >>>3. Use old interrupt remapping table.
> >>>4. Use "unsigned long" as physical address.
> >>>5. Use intel_unmap to unmap the old dma;
> >>>
> >>>Baoquan He <bhe@redhat.com> helps testing this patchset.
> >>>
> >>> iommu/vt-d: Update iommu_attach_domain() and its callers
> >>> iommu/vt-d: Items required for kdump
> >>> iommu/vt-d: Add domain-id functions
> >>> iommu/vt-d: functions to copy data from old mem
> >>> iommu/vt-d: Add functions to load and save old re
> >>> iommu/vt-d: datatypes and functions used for kdump
> >>> iommu/vt-d: enable kdump support in iommu module
> >>> iommu/vt-d: assign new page table for dma_map
> >>> iommu/vt-d: Copy functions for irte
> >>> iommu/vt-d: Use old irte in kdump kernel
> >>>
> >>>Signed-off-by: Bill Sumner <billsumnerlinux@gmail.com>
> >>>Signed-off-by: Li, Zhen-Hua <zhen-hual@hp.com>
> >>>Signed-off-by: Takao Indoh <indou.takao@jp.fujitsu.com>
> >>>Tested-by: Baoquan He <bhe@redhat.com>
> >>>---
> >>> drivers/iommu/intel-iommu.c | 1050 +++++++++++++++++++++++++++++++++--
> >>> drivers/iommu/intel_irq_remapping.c | 104 +++-
> >>> include/linux/intel-iommu.h | 18 +
> >>> 3 files changed, 1130 insertions(+), 42 deletions(-)
> >>>
> >>
>
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH v7 0/10] iommu/vt-d: Fix intel vt-d faults in kdump kernel
2015-01-07 8:28 ` Baoquan He
@ 2015-01-07 8:52 ` Li, ZhenHua
2015-01-08 1:00 ` Takao Indoh
0 siblings, 1 reply; 18+ messages in thread
From: Li, ZhenHua @ 2015-01-07 8:52 UTC (permalink / raw)
To: Baoquan He, indou.takao
Cc: dwmw2, joro, vgoyal, dyoung, iommu, linux-kernel, linux-pci,
kexec, alex.williamson, ddutile, ishii.hironobu, bhelgaas,
doug.hatch, jerry.hoemann, tom.vaden, li.zhang6, lisa.mitchell,
billsumnerlinux, rwright, Li, ZhenHua
Well, that's quite good news.
Looking forward Takao's testing on his system.
Regards
Zhenhua
On 01/07/2015 04:28 PM, Baoquan He wrote:
> On 01/07/15 at 01:25pm, Li, ZhenHua wrote:
>> It is same as the last one I send to you yesterday.
>>
>> The continuous memory that needed for data in this patchset:
>> RE: PAGE_SIZE, 4096 Bytes;
>> IRTE: 65536 * 16 ; 1M Bytes;
>>
>> It should use same memory as the old versions of this patchset. The
>> changes for the last version do not need more memory.
>
> Hi Zhenhua,
>
> It was my mistake because I didn't strip the debug info of modules, then
> initramfs is bloated very big. Just now I tested the latest version, it
> works well and dump is successful. No dmar fault and intr-remap fault
> seen any more, good job!
>
> Thanks
> Baoquan
>
>
>>
>> Regards
>> Zhenhua
>>
>> On 01/07/2015 01:02 PM, Baoquan He wrote:
>>> On 01/07/15 at 12:11pm, Li, ZhenHua wrote:
>>>> Many thanks to Takao Indoh and Baoquan He, for your testing on more
>>>> different systems.
>>>>
>>>> The calling of flush functions are added to this version.
>>>>
>>>> The usage of __iommu_flush_cache function :
>>>> 1. Fixes a dump on Takao's system.
>>>> 2. Reduces the count of faults on Baoquan's system.
>>>
>>> I am testing the version you sent to me yesterday afternoon. Is that
>>> different with this patchset? I found your patchset man reserve a big
>>> contiguous memory region under 896M, this will cause the crashkernel
>>> reservation failed when I set crashkernel=320M. The reason I increase
>>> the crashkerenl reservation to 320M is 256M is not enough and cause OOM
>>> when that patchset is tested.
>>>
>>> I am checking what happened.
>>>
>>>
>>> Thanks
>>> Baoquan
>>>
>>>>
>>>> Regards
>>>> Zhenhua
>>>>
>>>> On 01/07/2015 12:04 PM, Li, Zhen-Hua wrote:
>>>>> This patchset is an update of Bill Sumner's patchset, implements a fix for:
>>>>> If a kernel boots with intel_iommu=on on a system that supports intel vt-d,
>>>>> when a panic happens, the kdump kernel will boot with these faults:
>>>>>
>>>>> dmar: DRHD: handling fault status reg 102
>>>>> dmar: DMAR:[DMA Read] Request device [01:00.0] fault addr fff80000
>>>>> DMAR:[fault reason 01] Present bit in root entry is clear
>>>>>
>>>>> dmar: DRHD: handling fault status reg 2
>>>>> dmar: INTR-REMAP: Request device [[61:00.0] fault index 42
>>>>> INTR-REMAP:[fault reason 34] Present field in the IRTE entry is clear
>>>>>
>>>>> On some system, the interrupt remapping fault will also happen even if the
>>>>> intel_iommu is not set to on, because the interrupt remapping will be enabled
>>>>> when x2apic is needed by the system.
>>>>>
>>>>> The cause of the DMA fault is described in Bill's original version, and the
>>>>> INTR-Remap fault is caused by a similar reason. In short, the initialization
>>>>> of vt-d drivers causes the in-flight DMA and interrupt requests get wrong
>>>>> response.
>>>>>
>>>>> To fix this problem, we modifies the behaviors of the intel vt-d in the
>>>>> crashdump kernel:
>>>>>
>>>>> For DMA Remapping:
>>>>> 1. To accept the vt-d hardware in an active state,
>>>>> 2. Do not disable and re-enable the translation, keep it enabled.
>>>>> 3. Use the old root entry table, do not rewrite the RTA register.
>>>>> 4. Malloc and use new context entry table and page table, copy data from the
>>>>> old ones that used by the old kernel.
>>>>> 5. to use different portions of the iova address ranges for the device drivers
>>>>> in the crashdump kernel than the iova ranges that were in-use at the time
>>>>> of the panic.
>>>>> 6. After device driver is loaded, when it issues the first dma_map command,
>>>>> free the dmar_domain structure for this device, and generate a new one, so
>>>>> that the device can be assigned a new and empty page table.
>>>>> 7. When a new context entry table is generated, we also save its address to
>>>>> the old root entry table.
>>>>>
>>>>> For Interrupt Remapping:
>>>>> 1. To accept the vt-d hardware in an active state,
>>>>> 2. Do not disable and re-enable the interrupt remapping, keep it enabled.
>>>>> 3. Use the old interrupt remapping table, do not rewrite the IRTA register.
>>>>> 4. When ioapic entry is setup, the interrupt remapping table is changed, and
>>>>> the updated data will be stored to the old interrupt remapping table.
>>>>>
>>>>> Advantages of this approach:
>>>>> 1. All manipulation of the IO-device is done by the Linux device-driver
>>>>> for that device.
>>>>> 2. This approach behaves in a manner very similar to operation without an
>>>>> active iommu.
>>>>> 3. Any activity between the IO-device and its RMRR areas is handled by the
>>>>> device-driver in the same manner as during a non-kdump boot.
>>>>> 4. If an IO-device has no driver in the kdump kernel, it is simply left alone.
>>>>> This supports the practice of creating a special kdump kernel without
>>>>> drivers for any devices that are not required for taking a crashdump.
>>>>> 5. Minimal code-changes among the existing mainline intel vt-d code.
>>>>>
>>>>> Summary of changes in this patch set:
>>>>> 1. Added some useful function for root entry table in code intel-iommu.c
>>>>> 2. Added new members to struct root_entry and struct irte;
>>>>> 3. Functions to load old root entry table to iommu->root_entry from the memory
>>>>> of old kernel.
>>>>> 4. Functions to malloc new context entry table and page table and copy the data
>>>>> from the old ones to the malloced new ones.
>>>>> 5. Functions to enable support for DMA remapping in kdump kernel.
>>>>> 6. Functions to load old irte data from the old kernel to the kdump kernel.
>>>>> 7. Some code changes that support other behaviours that have been listed.
>>>>> 8. In the new functions, use physical address as "unsigned long" type, not
>>>>> pointers.
>>>>>
>>>>> Original version by Bill Sumner:
>>>>> https://lkml.org/lkml/2014/1/10/518
>>>>> https://lkml.org/lkml/2014/4/15/716
>>>>> https://lkml.org/lkml/2014/4/24/836
>>>>>
>>>>> Zhenhua's updates:
>>>>> https://lkml.org/lkml/2014/10/21/134
>>>>> https://lkml.org/lkml/2014/12/15/121
>>>>> https://lkml.org/lkml/2014/12/22/53
>>>>>
>>>>> Changelog[v7]:
>>>>> 1. Use __iommu_flush_cache to flush the data to hardware.
>>>>>
>>>>> Changelog[v6]:
>>>>> 1. Use "unsigned long" as type of physical address.
>>>>> 2. Use new function unmap_device_dma to unmap the old dma.
>>>>> 3. Some small incorrect bits order for aw shift.
>>>>>
>>>>> Changelog[v5]:
>>>>> 1. Do not disable and re-enable traslation and interrupt remapping.
>>>>> 2. Use old root entry table.
>>>>> 3. Use old interrupt remapping table.
>>>>> 4. New functions to copy data from old kernel, and save to old kernel mem.
>>>>> 5. New functions to save updated root entry table and irte table.
>>>>> 6. Use intel_unmap to unmap the old dma;
>>>>> 7. Allocate new pages while driver is being loaded.
>>>>>
>>>>> Changelog[v4]:
>>>>> 1. Cut off the patches that move some defines and functions to new files.
>>>>> 2. Reduce the numbers of patches to five, make it more easier to read.
>>>>> 3. Changed the name of functions, make them consistent with current context
>>>>> get/set functions.
>>>>> 4. Add change to function __iommu_attach_domain.
>>>>>
>>>>> Changelog[v3]:
>>>>> 1. Commented-out "#define DEBUG 1" to eliminate debug messages.
>>>>> 2. Updated the comments about changes in each version.
>>>>> 3. Fixed: one-line added to Copy-Translations patch to initialize the iovad
>>>>> struct as recommended by Baoquan He [bhe@redhat.com]
>>>>> init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
>>>>>
>>>>> Changelog[v2]:
>>>>> The following series implements a fix for:
>>>>> A kdump problem about DMA that has been discussed for a long time. That is,
>>>>> when a kernel panics and boots into the kdump kernel, DMA started by the
>>>>> panicked kernel is not stopped before the kdump kernel is booted and the
>>>>> kdump kernel disables the IOMMU while this DMA continues. This causes the
>>>>> IOMMU to stop translating the DMA addresses as IOVAs and begin to treat
>>>>> them as physical memory addresses -- which causes the DMA to either:
>>>>> (1) generate DMAR errors or
>>>>> (2) generate PCI SERR errors or
>>>>> (3) transfer data to or from incorrect areas of memory. Often this
>>>>> causes the dump to fail.
>>>>>
>>>>> Changelog[v1]:
>>>>> The original version.
>>>>>
>>>>> Changed in this version:
>>>>> 1. Do not disable and re-enable traslation and interrupt remapping.
>>>>> 2. Use old root entry table.
>>>>> 3. Use old interrupt remapping table.
>>>>> 4. Use "unsigned long" as physical address.
>>>>> 5. Use intel_unmap to unmap the old dma;
>>>>>
>>>>> Baoquan He <bhe@redhat.com> helps testing this patchset.
>>>>>
>>>>> iommu/vt-d: Update iommu_attach_domain() and its callers
>>>>> iommu/vt-d: Items required for kdump
>>>>> iommu/vt-d: Add domain-id functions
>>>>> iommu/vt-d: functions to copy data from old mem
>>>>> iommu/vt-d: Add functions to load and save old re
>>>>> iommu/vt-d: datatypes and functions used for kdump
>>>>> iommu/vt-d: enable kdump support in iommu module
>>>>> iommu/vt-d: assign new page table for dma_map
>>>>> iommu/vt-d: Copy functions for irte
>>>>> iommu/vt-d: Use old irte in kdump kernel
>>>>>
>>>>> Signed-off-by: Bill Sumner <billsumnerlinux@gmail.com>
>>>>> Signed-off-by: Li, Zhen-Hua <zhen-hual@hp.com>
>>>>> Signed-off-by: Takao Indoh <indou.takao@jp.fujitsu.com>
>>>>> Tested-by: Baoquan He <bhe@redhat.com>
>>>>> ---
>>>>> drivers/iommu/intel-iommu.c | 1050 +++++++++++++++++++++++++++++++++--
>>>>> drivers/iommu/intel_irq_remapping.c | 104 +++-
>>>>> include/linux/intel-iommu.h | 18 +
>>>>> 3 files changed, 1130 insertions(+), 42 deletions(-)
>>>>>
>>>>
>>
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH v7 0/10] iommu/vt-d: Fix intel vt-d faults in kdump kernel
2015-01-07 8:52 ` Li, ZhenHua
@ 2015-01-08 1:00 ` Takao Indoh
2015-01-08 3:11 ` Li, Zhen-Hua
0 siblings, 1 reply; 18+ messages in thread
From: Takao Indoh @ 2015-01-08 1:00 UTC (permalink / raw)
To: zhen-hual, bhe
Cc: dwmw2, joro, vgoyal, dyoung, iommu, linux-kernel, linux-pci,
kexec, alex.williamson, ddutile, ishii.hironobu, bhelgaas,
doug.hatch, jerry.hoemann, tom.vaden, li.zhang6, lisa.mitchell,
billsumnerlinux, rwright
[-- Attachment #1: Type: text/plain, Size: 10424 bytes --]
On 2015/01/07 17:52, Li, ZhenHua wrote:
> Well, that's quite good news.
> Looking forward Takao's testing on his system.
Unfortunately DMAR fault still occurs with this patch...
I attach console log.
Thanks,
Takao Indoh
>
> Regards
> Zhenhua
> On 01/07/2015 04:28 PM, Baoquan He wrote:
>> On 01/07/15 at 01:25pm, Li, ZhenHua wrote:
>>> It is same as the last one I send to you yesterday.
>>>
>>> The continuous memory that needed for data in this patchset:
>>> RE: PAGE_SIZE, 4096 Bytes;
>>> IRTE: 65536 * 16 ; 1M Bytes;
>>>
>>> It should use same memory as the old versions of this patchset. The
>>> changes for the last version do not need more memory.
>>
>> Hi Zhenhua,
>>
>> It was my mistake because I didn't strip the debug info of modules, then
>> initramfs is bloated very big. Just now I tested the latest version, it
>> works well and dump is successful. No dmar fault and intr-remap fault
>> seen any more, good job!
>>
>> Thanks
>> Baoquan
>>
>>
>>>
>>> Regards
>>> Zhenhua
>>>
>>> On 01/07/2015 01:02 PM, Baoquan He wrote:
>>>> On 01/07/15 at 12:11pm, Li, ZhenHua wrote:
>>>>> Many thanks to Takao Indoh and Baoquan He, for your testing on more
>>>>> different systems.
>>>>>
>>>>> The calling of flush functions are added to this version.
>>>>>
>>>>> The usage of __iommu_flush_cache function :
>>>>> 1. Fixes a dump on Takao's system.
>>>>> 2. Reduces the count of faults on Baoquan's system.
>>>>
>>>> I am testing the version you sent to me yesterday afternoon. Is that
>>>> different with this patchset? I found your patchset man reserve a big
>>>> contiguous memory region under 896M, this will cause the crashkernel
>>>> reservation failed when I set crashkernel=320M. The reason I increase
>>>> the crashkerenl reservation to 320M is 256M is not enough and cause OOM
>>>> when that patchset is tested.
>>>>
>>>> I am checking what happened.
>>>>
>>>>
>>>> Thanks
>>>> Baoquan
>>>>
>>>>>
>>>>> Regards
>>>>> Zhenhua
>>>>>
>>>>> On 01/07/2015 12:04 PM, Li, Zhen-Hua wrote:
>>>>>> This patchset is an update of Bill Sumner's patchset, implements a fix for:
>>>>>> If a kernel boots with intel_iommu=on on a system that supports intel vt-d,
>>>>>> when a panic happens, the kdump kernel will boot with these faults:
>>>>>>
>>>>>> dmar: DRHD: handling fault status reg 102
>>>>>> dmar: DMAR:[DMA Read] Request device [01:00.0] fault addr fff80000
>>>>>> DMAR:[fault reason 01] Present bit in root entry is clear
>>>>>>
>>>>>> dmar: DRHD: handling fault status reg 2
>>>>>> dmar: INTR-REMAP: Request device [[61:00.0] fault index 42
>>>>>> INTR-REMAP:[fault reason 34] Present field in the IRTE entry is clear
>>>>>>
>>>>>> On some system, the interrupt remapping fault will also happen even if the
>>>>>> intel_iommu is not set to on, because the interrupt remapping will be enabled
>>>>>> when x2apic is needed by the system.
>>>>>>
>>>>>> The cause of the DMA fault is described in Bill's original version, and the
>>>>>> INTR-Remap fault is caused by a similar reason. In short, the initialization
>>>>>> of vt-d drivers causes the in-flight DMA and interrupt requests get wrong
>>>>>> response.
>>>>>>
>>>>>> To fix this problem, we modifies the behaviors of the intel vt-d in the
>>>>>> crashdump kernel:
>>>>>>
>>>>>> For DMA Remapping:
>>>>>> 1. To accept the vt-d hardware in an active state,
>>>>>> 2. Do not disable and re-enable the translation, keep it enabled.
>>>>>> 3. Use the old root entry table, do not rewrite the RTA register.
>>>>>> 4. Malloc and use new context entry table and page table, copy data from the
>>>>>> old ones that used by the old kernel.
>>>>>> 5. to use different portions of the iova address ranges for the device drivers
>>>>>> in the crashdump kernel than the iova ranges that were in-use at the time
>>>>>> of the panic.
>>>>>> 6. After device driver is loaded, when it issues the first dma_map command,
>>>>>> free the dmar_domain structure for this device, and generate a new one, so
>>>>>> that the device can be assigned a new and empty page table.
>>>>>> 7. When a new context entry table is generated, we also save its address to
>>>>>> the old root entry table.
>>>>>>
>>>>>> For Interrupt Remapping:
>>>>>> 1. To accept the vt-d hardware in an active state,
>>>>>> 2. Do not disable and re-enable the interrupt remapping, keep it enabled.
>>>>>> 3. Use the old interrupt remapping table, do not rewrite the IRTA register.
>>>>>> 4. When ioapic entry is setup, the interrupt remapping table is changed, and
>>>>>> the updated data will be stored to the old interrupt remapping table.
>>>>>>
>>>>>> Advantages of this approach:
>>>>>> 1. All manipulation of the IO-device is done by the Linux device-driver
>>>>>> for that device.
>>>>>> 2. This approach behaves in a manner very similar to operation without an
>>>>>> active iommu.
>>>>>> 3. Any activity between the IO-device and its RMRR areas is handled by the
>>>>>> device-driver in the same manner as during a non-kdump boot.
>>>>>> 4. If an IO-device has no driver in the kdump kernel, it is simply left alone.
>>>>>> This supports the practice of creating a special kdump kernel without
>>>>>> drivers for any devices that are not required for taking a crashdump.
>>>>>> 5. Minimal code-changes among the existing mainline intel vt-d code.
>>>>>>
>>>>>> Summary of changes in this patch set:
>>>>>> 1. Added some useful function for root entry table in code intel-iommu.c
>>>>>> 2. Added new members to struct root_entry and struct irte;
>>>>>> 3. Functions to load old root entry table to iommu->root_entry from the memory
>>>>>> of old kernel.
>>>>>> 4. Functions to malloc new context entry table and page table and copy the data
>>>>>> from the old ones to the malloced new ones.
>>>>>> 5. Functions to enable support for DMA remapping in kdump kernel.
>>>>>> 6. Functions to load old irte data from the old kernel to the kdump kernel.
>>>>>> 7. Some code changes that support other behaviours that have been listed.
>>>>>> 8. In the new functions, use physical address as "unsigned long" type, not
>>>>>> pointers.
>>>>>>
>>>>>> Original version by Bill Sumner:
>>>>>> https://lkml.org/lkml/2014/1/10/518
>>>>>> https://lkml.org/lkml/2014/4/15/716
>>>>>> https://lkml.org/lkml/2014/4/24/836
>>>>>>
>>>>>> Zhenhua's updates:
>>>>>> https://lkml.org/lkml/2014/10/21/134
>>>>>> https://lkml.org/lkml/2014/12/15/121
>>>>>> https://lkml.org/lkml/2014/12/22/53
>>>>>>
>>>>>> Changelog[v7]:
>>>>>> 1. Use __iommu_flush_cache to flush the data to hardware.
>>>>>>
>>>>>> Changelog[v6]:
>>>>>> 1. Use "unsigned long" as type of physical address.
>>>>>> 2. Use new function unmap_device_dma to unmap the old dma.
>>>>>> 3. Some small incorrect bits order for aw shift.
>>>>>>
>>>>>> Changelog[v5]:
>>>>>> 1. Do not disable and re-enable traslation and interrupt remapping.
>>>>>> 2. Use old root entry table.
>>>>>> 3. Use old interrupt remapping table.
>>>>>> 4. New functions to copy data from old kernel, and save to old kernel mem.
>>>>>> 5. New functions to save updated root entry table and irte table.
>>>>>> 6. Use intel_unmap to unmap the old dma;
>>>>>> 7. Allocate new pages while driver is being loaded.
>>>>>>
>>>>>> Changelog[v4]:
>>>>>> 1. Cut off the patches that move some defines and functions to new files.
>>>>>> 2. Reduce the numbers of patches to five, make it more easier to read.
>>>>>> 3. Changed the name of functions, make them consistent with current context
>>>>>> get/set functions.
>>>>>> 4. Add change to function __iommu_attach_domain.
>>>>>>
>>>>>> Changelog[v3]:
>>>>>> 1. Commented-out "#define DEBUG 1" to eliminate debug messages.
>>>>>> 2. Updated the comments about changes in each version.
>>>>>> 3. Fixed: one-line added to Copy-Translations patch to initialize the iovad
>>>>>> struct as recommended by Baoquan He [bhe@redhat.com]
>>>>>> init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
>>>>>>
>>>>>> Changelog[v2]:
>>>>>> The following series implements a fix for:
>>>>>> A kdump problem about DMA that has been discussed for a long time. That is,
>>>>>> when a kernel panics and boots into the kdump kernel, DMA started by the
>>>>>> panicked kernel is not stopped before the kdump kernel is booted and the
>>>>>> kdump kernel disables the IOMMU while this DMA continues. This causes the
>>>>>> IOMMU to stop translating the DMA addresses as IOVAs and begin to treat
>>>>>> them as physical memory addresses -- which causes the DMA to either:
>>>>>> (1) generate DMAR errors or
>>>>>> (2) generate PCI SERR errors or
>>>>>> (3) transfer data to or from incorrect areas of memory. Often this
>>>>>> causes the dump to fail.
>>>>>>
>>>>>> Changelog[v1]:
>>>>>> The original version.
>>>>>>
>>>>>> Changed in this version:
>>>>>> 1. Do not disable and re-enable traslation and interrupt remapping.
>>>>>> 2. Use old root entry table.
>>>>>> 3. Use old interrupt remapping table.
>>>>>> 4. Use "unsigned long" as physical address.
>>>>>> 5. Use intel_unmap to unmap the old dma;
>>>>>>
>>>>>> Baoquan He <bhe@redhat.com> helps testing this patchset.
>>>>>>
>>>>>> iommu/vt-d: Update iommu_attach_domain() and its callers
>>>>>> iommu/vt-d: Items required for kdump
>>>>>> iommu/vt-d: Add domain-id functions
>>>>>> iommu/vt-d: functions to copy data from old mem
>>>>>> iommu/vt-d: Add functions to load and save old re
>>>>>> iommu/vt-d: datatypes and functions used for kdump
>>>>>> iommu/vt-d: enable kdump support in iommu module
>>>>>> iommu/vt-d: assign new page table for dma_map
>>>>>> iommu/vt-d: Copy functions for irte
>>>>>> iommu/vt-d: Use old irte in kdump kernel
>>>>>>
>>>>>> Signed-off-by: Bill Sumner <billsumnerlinux@gmail.com>
>>>>>> Signed-off-by: Li, Zhen-Hua <zhen-hual@hp.com>
>>>>>> Signed-off-by: Takao Indoh <indou.takao@jp.fujitsu.com>
>>>>>> Tested-by: Baoquan He <bhe@redhat.com>
>>>>>> ---
>>>>>> drivers/iommu/intel-iommu.c | 1050 +++++++++++++++++++++++++++++++++--
>>>>>> drivers/iommu/intel_irq_remapping.c | 104 +++-
>>>>>> include/linux/intel-iommu.h | 18 +
>>>>>> 3 files changed, 1130 insertions(+), 42 deletions(-)
>>>>>>
>>>>>
>>>
>
>
>
[-- Attachment #2: log3.txt --]
[-- Type: text/plain, Size: 51714 bytes --]
SysRq : Trigger a crash
BUG: unable to handle kernel NULL pointer dereference at (null)
IP: [<ffffffff8135ed76>] sysrq_handle_crash+0x16/0x20
PGD 239d95067 PUD 239b0a067 PMD 0
Oops: 0002 [#1] SMP
Modules linked in: ebtable_nat ebtables xt_CHECKSUM iptable_mangle bridge autofs4 8021q garp stp llc cpufreq_ondemand ipt_REJECT nf_reject_ipv4 nf_conntrack_ipv4 nf_defrag_ipv4 iptable_filter ip_tables ip6t_REJECT nf_reject_ipv6 nf_conntrack_ipv6 nf_defrag_ipv6 xt_state nf_conntrack ip6table_filter ip6_tables ipv6 vhost_net macvtap macvlan vhost tun kvm_intel kvm uinput iTCO_wdt iTCO_vendor_support microcode serio_raw pcspkr tpm_infineon ipmi_si ipmi_msghandler i2c_i801 lpc_ich mfd_core ioatdma i7core_edac edac_core sg igb dca i2c_algo_bit ptp pps_core acpi_cpufreq ext4 jbd2 mbcache mptsas mptscsih mptbase scsi_transport_sas lpfc scsi_transport_fc megaraid_sas dm_mirror dm_region_hash dm_log dm_mod
CPU: 4 PID: 3000 Comm: bash Not tainted 3.19.0-rc3 #36
Hardware name: FUJITSU-SV PRIMERGY BX920 S2/D3030, BIOS 080015 Rev.3D81.3030 02/10/2012
task: ffff88023931e250 ti: ffff880239b64000 task.ti: ffff880239b64000
RIP: 0010:[<ffffffff8135ed76>] [<ffffffff8135ed76>] sysrq_handle_crash+0x16/0x20
RSP: 0018:ffff880239b67e88 EFLAGS: 00010296
RAX: 000000000000000f RBX: 0000000000000063 RCX: 0000000000000000
RDX: ffff88023fc0ea38 RSI: ffff88023fc0d238 RDI: 0000000000000063
RBP: ffff880239b67e88 R08: 0000000000019cd8 R09: 00000000000004c4
R10: 0000000000000003 R11: 00000000000004c3 R12: ffffffff81ac4c00
R13: 0000000000000008 R14: 0000000000000000 R15: 0000000000000001
FS: 00007f01234f3700(0000) GS:ffff88023fc00000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000000000000 CR3: 00000002356e6000 CR4: 00000000000007e0
Stack:
ffff880239b67eb8 ffffffff8135f6f1 0000000000000002 fffffffffffffffb
00007f0123504000 ffff880239b67f50 ffff880239b67ed8 ffffffff8135f78d
ffff880239b67ef8 ffff88023960ddc0 ffff880239b67ef8 ffffffff81201403
Call Trace:
[<ffffffff8135f6f1>] __handle_sysrq+0x121/0x180
[<ffffffff8135f78d>] write_sysrq_trigger+0x3d/0x40
[<ffffffff81201403>] proc_reg_write+0x43/0x70
[<ffffffff8119dbbe>] vfs_write+0xce/0x180
[<ffffffff8119e326>] SyS_write+0x56/0xd0
[<ffffffff810eca3c>] ? __audit_syscall_entry+0xac/0x110
[<ffffffff815b6152>] system_call_fastpath+0x12/0x17
Code: f1 6b 25 00 31 c0 eb ae 90 90 90 90 90 90 90 90 90 90 90 90 90 55 48 89 e5 66 66 66 66 90 c7 05 ed 17 a0 00 01 00 00 00 0f ae f8 <c6> 04 25 00 00 00 00 01 c9 c3 55 48 89 e5 66 66 66 66 90 8d 47
RIP [<ffffffff8135ed76>] sysrq_handle_crash+0x16/0x20
RSP <ffff880239b67e88>
CR2: 0000000000000000
Initializing cgroup subsys cpuset
Initializing cgroup subsys cpu
Initializing cgroup subsys cpuacct
Linux version 3.19.0-rc3 (root@indowsXP) (gcc version 4.4.7 20120313 (Red Hat 4.4.7-4) (GCC) ) #36 SMP Wed Jan 7 18:33:24 JST 2015
Command line: ro root=UUID=bfc88f62-f080-492f-a9f5-17f0c7c6215a rd_NO_LUKS console=ttyS0,115200n8 rd_NO_MD KEYBOARDTYPE=pc KEYTABLE=jp106 LANG=ja_JP.UTF-8 rd_NO_LVM rd_NO_DM intel_iommu=on irqpoll nr_cpus=1 reset_devices cgroup_disable=memory mce=off memmap=exactmap memmap=627K@4K memmap=130425K@770675K elfcorehdr=901100K memmap=4K$0K memmap=9K$631K memmap=100K$924K memmap=8K$3137080K memmap=52K#3137088K memmap=204K#3137140K memmap=64K$3137344K memmap=8268K$3137460K memmap=262144K$3670016K memmap=4K$4175872K memmap=2048K$4192256K
e820: BIOS-provided physical RAM map:
BIOS-e820: [mem 0x0000000000000100-0x000000000009dbff] usable
BIOS-e820: [mem 0x000000000009dc00-0x000000000009ffff] reserved
BIOS-e820: [mem 0x00000000000e7000-0x00000000000fffff] reserved
BIOS-e820: [mem 0x0000000000100000-0x00000000bf77ffff] usable
BIOS-e820: [mem 0x00000000bf78e000-0x00000000bf78ffff] reserved
BIOS-e820: [mem 0x00000000bf790000-0x00000000bf79cfff] ACPI data
BIOS-e820: [mem 0x00000000bf79d000-0x00000000bf7cffff] ACPI NVS
BIOS-e820: [mem 0x00000000bf7d0000-0x00000000bf7dffff] reserved
BIOS-e820: [mem 0x00000000bf7ed000-0x00000000bfffffff] reserved
BIOS-e820: [mem 0x00000000e0000000-0x00000000efffffff] reserved
BIOS-e820: [mem 0x00000000fee00000-0x00000000fee00fff] reserved
BIOS-e820: [mem 0x00000000ffe00000-0x00000000ffffffff] reserved
BIOS-e820: [mem 0x0000000100000000-0x000000023fffffff] usable
e820: last_pfn = 0x240000 max_arch_pfn = 0x400000000
NX (Execute Disable) protection: active
e820: user-defined physical RAM map:
user: [mem 0x0000000000000000-0x0000000000000fff] reserved
user: [mem 0x0000000000001000-0x000000000009dbff] usable
user: [mem 0x000000000009dc00-0x000000000009ffff] reserved
user: [mem 0x00000000000e7000-0x00000000000fffff] reserved
user: [mem 0x000000002f09cc00-0x0000000036ffafff] usable
user: [mem 0x00000000bf78e000-0x00000000bf78ffff] reserved
user: [mem 0x00000000bf790000-0x00000000bf7cffff] ACPI data
user: [mem 0x00000000bf7d0000-0x00000000bf7dffff] reserved
user: [mem 0x00000000bf7ed000-0x00000000bfffffff] reserved
user: [mem 0x00000000e0000000-0x00000000efffffff] reserved
user: [mem 0x00000000fee00000-0x00000000fee00fff] reserved
user: [mem 0x00000000ffe00000-0x00000000ffffffff] reserved
SMBIOS 2.5 present.
AGP: No AGP bridge found
e820: last_pfn = 0x36ffb max_arch_pfn = 0x400000000
PAT configuration [0-7]: WB WC UC- UC WB WC UC- UC
total RAM covered: 8184M
Found optimal setting for mtrr clean up
gran_size: 64K chunk_size: 16M num_reg: 5 lose cover RAM: 0G
found SMP MP-table at [mem 0x000ff780-0x000ff78f] mapped at [ffff8800000ff780]
Using GB pages for direct mapping
init_memory_mapping: [mem 0x00000000-0x000fffff]
init_memory_mapping: [mem 0x36800000-0x369fffff]
init_memory_mapping: [mem 0x34000000-0x367fffff]
init_memory_mapping: [mem 0x2f09d000-0x33ffffff]
init_memory_mapping: [mem 0x36a00000-0x36ffafff]
RAMDISK: [mem 0x36ab5000-0x36feefff]
ACPI: Early table checksum verification disabled
ACPI: RSDP 0x00000000000FAA50 000024 (v02 ACPIAM)
ACPI: XSDT 0x00000000BF790100 000094 (v01 AAA 20120210 MSFT 00000097)
ACPI: FACP 0x00000000BF790290 0000F4 (v04 021012 FACP1717 20120210 MSFT 00000097)
ACPI BIOS Warning (bug): 32/64X length mismatch in FADT/Gpe0Block: 128/64 (20141107/tbfadt-618)
ACPI: DSDT 0x00000000BF7906F0 00712B (v02 TU1O TU1O3D81 00003D81 INTL 20051117)
ACPI: FACS 0x00000000BF79D000 000040
ACPI: APIC 0x00000000BF790390 00011E (v02 021012 APIC1717 20120210 MSFT 00000097)
ACPI: SPCR 0x00000000BF7904B0 000050 (v01 021012 SPCR1717 20120210 MSFT 00000097)
ACPI: MCFG 0x00000000BF790500 00003C (v01 021012 OEMMCFG 20120210 MSFT 00000097)
ACPI: SLIT 0x00000000BF790540 000030 (v01 021012 OEMSLIT 20120210 MSFT 00000097)
ACPI: SLIC 0x00000000BF790570 000176 (v01 AAA 20120210 MSFT 00000097)
ACPI: SRAT 0x00000000BF79A6F0 0001D0 (v02 021012 OEMSRAT 00000001 INTL 00000001)
ACPI: HPET 0x00000000BF79A8C0 000038 (v01 021012 OEMHPET 20120210 MSFT 00000097)
ACPI: DMAR 0x00000000BF79D0E0 000140 (v01 AMI OEMDMAR 00000001 MSFT 00000097)
ACPI: SSDT 0x00000000BF79F9A0 000363 (v01 DpgPmm CpuPm 00000012 INTL 20051117)
ACPI: EINJ 0x00000000BF79A900 000130 (v01 AMIER AMI_EINJ 20120210 MSFT 00000097)
ACPI: BERT 0x00000000BF79AA90 000030 (v01 AMIER AMI_BERT 20120210 MSFT 00000097)
ACPI: ERST 0x00000000BF79AAC0 0001B0 (v01 AMIER AMI_ERST 20120210 MSFT 00000097)
ACPI: HEST 0x00000000BF79AC70 0000A8 (v01 AMIER ABC_HEST 20120210 MSFT 00000097)
SRAT: PXM 0 -> APIC 0x00 -> Node 0
SRAT: PXM 0 -> APIC 0x02 -> Node 0
SRAT: PXM 0 -> APIC 0x12 -> Node 0
SRAT: PXM 0 -> APIC 0x14 -> Node 0
SRAT: PXM 0 -> APIC 0x01 -> Node 0
SRAT: PXM 0 -> APIC 0x03 -> Node 0
SRAT: PXM 0 -> APIC 0x13 -> Node 0
SRAT: PXM 0 -> APIC 0x15 -> Node 0
SRAT: PXM 1 -> APIC 0x20 -> Node 1
SRAT: PXM 1 -> APIC 0x22 -> Node 1
SRAT: PXM 1 -> APIC 0x32 -> Node 1
SRAT: PXM 1 -> APIC 0x34 -> Node 1
SRAT: PXM 1 -> APIC 0x21 -> Node 1
SRAT: PXM 1 -> APIC 0x23 -> Node 1
SRAT: PXM 1 -> APIC 0x33 -> Node 1
SRAT: PXM 1 -> APIC 0x35 -> Node 1
SRAT: Node 0 PXM 0 [mem 0x00000000-0x0009ffff]
SRAT: Node 0 PXM 0 [mem 0x00100000-0xbfffffff]
SRAT: Node 0 PXM 0 [mem 0x100000000-0x13fffffff]
SRAT: Node 1 PXM 1 [mem 0x140000000-0x23fffffff]
NUMA: Node 0 [mem 0x00000000-0x0009ffff] + [mem 0x00100000-0x36ffafff] -> [mem 0x00000000-0x36ffafff]
NODE_DATA(0) allocated [mem 0x36a8f000-0x36ab4fff]
Zone ranges:
DMA [mem 0x00001000-0x00ffffff]
DMA32 [mem 0x01000000-0x36ffafff]
Normal empty
Movable zone start for each node
Early memory node ranges
node 0: [mem 0x00001000-0x0009cfff]
node 0: [mem 0x2f09d000-0x36ffafff]
Initmem setup node 0 [mem 0x00001000-0x36ffafff]
ACPI: PM-Timer IO Port: 0x808
ACPI: LAPIC (acpi_id[0x01] lapic_id[0x00] enabled)
ACPI: NR_CPUS/possible_cpus limit of 1 almost reached. Keeping one slot for boot cpu. Processor 0/0x0 ignored.
ACPI: LAPIC (acpi_id[0x02] lapic_id[0x02] enabled)
ACPI: NR_CPUS/possible_cpus limit of 1 almost reached. Keeping one slot for boot cpu. Processor 1/0x2 ignored.
ACPI: LAPIC (acpi_id[0x03] lapic_id[0x12] enabled)
ACPI: NR_CPUS/possible_cpus limit of 1 almost reached. Keeping one slot for boot cpu. Processor 2/0x12 ignored.
ACPI: LAPIC (acpi_id[0x04] lapic_id[0x14] enabled)
ACPI: NR_CPUS/possible_cpus limit of 1 almost reached. Keeping one slot for boot cpu. Processor 3/0x14 ignored.
ACPI: LAPIC (acpi_id[0x05] lapic_id[0x20] enabled)
ACPI: LAPIC (acpi_id[0x06] lapic_id[0x22] enabled)
ACPI: NR_CPUS/possible_cpus limit of 1 reached. Processor 5/0x22 ignored.
ACPI: LAPIC (acpi_id[0x07] lapic_id[0x32] enabled)
ACPI: NR_CPUS/possible_cpus limit of 1 reached. Processor 6/0x32 ignored.
ACPI: LAPIC (acpi_id[0x08] lapic_id[0x34] enabled)
ACPI: NR_CPUS/possible_cpus limit of 1 reached. Processor 7/0x34 ignored.
ACPI: LAPIC (acpi_id[0x09] lapic_id[0x01] enabled)
ACPI: NR_CPUS/possible_cpus limit of 1 reached. Processor 8/0x1 ignored.
ACPI: LAPIC (acpi_id[0x0a] lapic_id[0x03] enabled)
ACPI: NR_CPUS/possible_cpus limit of 1 reached. Processor 9/0x3 ignored.
ACPI: LAPIC (acpi_id[0x0b] lapic_id[0x13] enabled)
ACPI: NR_CPUS/possible_cpus limit of 1 reached. Processor 10/0x13 ignored.
ACPI: LAPIC (acpi_id[0x0c] lapic_id[0x15] enabled)
ACPI: NR_CPUS/possible_cpus limit of 1 reached. Processor 11/0x15 ignored.
ACPI: LAPIC (acpi_id[0x0d] lapic_id[0x21] enabled)
ACPI: NR_CPUS/possible_cpus limit of 1 reached. Processor 12/0x21 ignored.
ACPI: LAPIC (acpi_id[0x0e] lapic_id[0x23] enabled)
ACPI: NR_CPUS/possible_cpus limit of 1 reached. Processor 13/0x23 ignored.
ACPI: LAPIC (acpi_id[0x0f] lapic_id[0x33] enabled)
ACPI: NR_CPUS/possible_cpus limit of 1 reached. Processor 14/0x33 ignored.
ACPI: LAPIC (acpi_id[0x10] lapic_id[0x35] enabled)
ACPI: NR_CPUS/possible_cpus limit of 1 reached. Processor 15/0x35 ignored.
ACPI: LAPIC (acpi_id[0x11] lapic_id[0x90] disabled)
ACPI: LAPIC (acpi_id[0x12] lapic_id[0x91] disabled)
ACPI: LAPIC (acpi_id[0x13] lapic_id[0x92] disabled)
ACPI: LAPIC (acpi_id[0x14] lapic_id[0x93] disabled)
ACPI: LAPIC (acpi_id[0x15] lapic_id[0x94] disabled)
ACPI: LAPIC (acpi_id[0x16] lapic_id[0x95] disabled)
ACPI: LAPIC (acpi_id[0x17] lapic_id[0x96] disabled)
ACPI: LAPIC (acpi_id[0x18] lapic_id[0x97] disabled)
ACPI: LAPIC_NMI (acpi_id[0xff] high dfl lint[0x1])
ACPI: IOAPIC (id[0x06] address[0xfec00000] gsi_base[0])
IOAPIC[0]: apic_id 6, version 32, address 0xfec00000, GSI 0-23
ACPI: IOAPIC (id[0x07] address[0xfec8a000] gsi_base[24])
IOAPIC[1]: apic_id 7, version 32, address 0xfec8a000, GSI 24-47
ACPI: INT_SRC_OVR (bus 0 bus_irq 0 global_irq 2 dfl dfl)
ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 9 high level)
Using ACPI (MADT) for SMP configuration information
ACPI: HPET id: 0x8086a301 base: 0xfed00000
smpboot: 24 Processors exceeds NR_CPUS limit of 1
smpboot: Allowing 1 CPUs, 0 hotplug CPUs
PM: Registered nosave memory: [mem 0x00000000-0x00000fff]
PM: Registered nosave memory: [mem 0x0009d000-0x0009dfff]
PM: Registered nosave memory: [mem 0x0009e000-0x0009ffff]
PM: Registered nosave memory: [mem 0x000a0000-0x000e6fff]
PM: Registered nosave memory: [mem 0x000e7000-0x000fffff]
PM: Registered nosave memory: [mem 0x00100000-0x2f09cfff]
e820: [mem 0x36ffb000-0xbf78dfff] available for PCI devices
setup_percpu: NR_CPUS:8192 nr_cpumask_bits:1 nr_cpu_ids:1 nr_node_ids:2
PERCPU: Embedded 29 pages/cpu @ffff880036600000 s80640 r8192 d29952 u2097152
Built 1 zonelists in Node order, mobility grouping on. Total pages: 32291
Policy zone: DMA32
Kernel command line: ro root=UUID=bfc88f62-f080-492f-a9f5-17f0c7c6215a rd_NO_LUKS console=ttyS0,115200n8 rd_NO_MD KEYBOARDTYPE=pc KEYTABLE=jp106 LANG=ja_JP.UTF-8 rd_NO_LVM rd_NO_DM intel_iommu=on irqpoll nr_cpus=1 reset_devices cgroup_disable=memory mce=off memmap=exactmap memmap=627K@4K memmap=130425K@770675K elfcorehdr=901100K memmap=4K$0K memmap=9K$631K memmap=100K$924K memmap=8K$3137080K memmap=52K#3137088K memmap=204K#3137140K memmap=64K$3137344K memmap=8268K$3137460K memmap=262144K$3670016K memmap=4K$4175872K memmap=2048K$4192256K
Intel-IOMMU: enabled
Misrouted IRQ fixup and polling support enabled
This may significantly impact system performance
PID hash table entries: 512 (order: 0, 4096 bytes)
AGP: Checking aperture...
AGP: No AGP bridge found
Memory: 101568K/131048K available (5867K kernel code, 1199K rwdata, 2848K rodata, 1748K init, 2104K bss, 29480K reserved, 0K cma-reserved)
Hierarchical RCU implementation.
RCU restricting CPUs from NR_CPUS=8192 to nr_cpu_ids=1.
RCU: Adjusting geometry for rcu_fanout_leaf=16, nr_cpu_ids=1
NR_IRQS:524544 nr_irqs:256 16
Spurious LAPIC timer interrupt on cpu 0
Console: colour dummy device 80x25
console [ttyS0] enabled
tsc: Fast TSC calibration using PIT
tsc: Detected 2133.414 MHz processor
Calibrating delay loop (skipped), value calculated using timer frequency.. 4266.82 BogoMIPS (lpj=2133414)
pid_max: default: 32768 minimum: 301
ACPI: Core revision 20141107
ACPI: All ACPI Tables successfully acquired
Security Framework initialized
SELinux: Initializing.
Dentry cache hash table entries: 16384 (order: 5, 131072 bytes)
Inode-cache hash table entries: 8192 (order: 4, 65536 bytes)
Mount-cache hash table entries: 512 (order: 0, 4096 bytes)
Mountpoint-cache hash table entries: 512 (order: 0, 4096 bytes)
Initializing cgroup subsys devices
Initializing cgroup subsys freezer
Initializing cgroup subsys net_cls
Initializing cgroup subsys blkio
Initializing cgroup subsys perf_event
CPU: Physical Processor ID: 1
CPU: Processor Core ID: 0
Last level iTLB entries: 4KB 512, 2MB 7, 4MB 7
Last level dTLB entries: 4KB 512, 2MB 32, 4MB 32, 1GB 0
Freeing SMP alternatives memory: 20K (ffffffff81ce2000 - ffffffff81ce7000)
ftrace: allocating 23906 entries in 94 pages
dmar: Host address width 40
dmar: DRHD base: 0x000000fbffe000 flags: 0x1
dmar: IOMMU 0: reg_base_addr fbffe000 ver 1:0 cap c90780106f0462 ecap f0207e
dmar: RMRR base: 0x000000000e7000 end: 0x000000000e9fff
dmar: RMRR base: 0x000000bf7ed000 end: 0x000000bf7fffff
dmar: ATSR flags: 0x0
IOAPIC id 6 under DRHD base 0xfbffe000 IOMMU 0
IOAPIC id 7 under DRHD base 0xfbffe000 IOMMU 0
Enabled IRQ remapping in xapic mode
..TIMER: vector=0x30 apic1=0 pin1=2 apic2=-1 pin2=-1
smpboot: CPU0: Intel(R) Xeon(R) CPU L5630 @ 2.13GHz (fam: 06, model: 2c, stepping: 02)
Performance Events: PEBS fmt1+, 16-deep LBR, Westmere events, Broken BIOS detected, complain to your hardware vendor.
[Firmware Bug]: the BIOS has corrupted hw-PMU resources (MSR 38d is b0)
Intel PMU driver.
perf_event_intel: CPUID marked event: 'bus cycles' unavailable
... version: 3
... bit width: 48
... generic registers: 4
... value mask: 0000ffffffffffff
... max period: 000000007fffffff
... fixed-purpose events: 3
... event mask: 000000070000000f
x86: Booted up 1 node, 1 CPUs
smpboot: Total of 1 processors activated (4266.82 BogoMIPS)
NMI watchdog: enabled on all CPUs, permanently consumes one hw-PMU counter.
devtmpfs: initialized
NET: Registered protocol family 16
cpuidle: using governor ladder
cpuidle: using governor menu
ACPI FADT declares the system doesn't support PCIe ASPM, so disable it
ACPI: bus type PCI registered
acpiphp: ACPI Hot Plug PCI Controller Driver version: 0.5
PCI: MMCONFIG for domain 0000 [bus 00-ff] at [mem 0xe0000000-0xefffffff] (base 0xe0000000)
PCI: MMCONFIG at [mem 0xe0000000-0xefffffff] reserved in E820
PCI: Using configuration type 1 for base access
ACPI: Added _OSI(Module Device)
ACPI: Added _OSI(Processor Device)
ACPI: Added _OSI(3.0 _SCP Extensions)
ACPI: Added _OSI(Processor Aggregator Device)
ACPI: Executed 1 blocks of module-level executable AML code
ACPI: Interpreter enabled
ACPI Exception: AE_NOT_FOUND, While evaluating Sleep State [\_S2_] (20141107/hwxface-580)
ACPI Exception: AE_NOT_FOUND, While evaluating Sleep State [\_S3_] (20141107/hwxface-580)
ACPI: (supports S0 S1 S4 S5)
ACPI: Using IOAPIC for interrupt routing
HEST: Table parsing has been initialized.
PCI: Using host bridge windows from ACPI; if necessary, use "pci=nocrs" and report a bug
ACPI: PCI Root Bridge [PCI0] (domain 0000 [bus 00-ff])
acpi PNP0A08:00: _OSC: OS supports [ExtendedConfig ASPM ClockPM Segments MSI]
acpi PNP0A08:00: _OSC failed (AE_NOT_FOUND); disabling ASPM
acpi PNP0A08:00: ignoring host bridge window [mem 0x000d0000-0x000dffff] (conflicts with Adapter ROM [mem 0x000ce800-0x000d4fff])
PCI host bridge to bus 0000:00
pci_bus 0000:00: root bus resource [bus 00-ff]
pci_bus 0000:00: root bus resource [io 0x0000-0x0cf7]
pci_bus 0000:00: root bus resource [io 0x0d00-0xffff]
pci_bus 0000:00: root bus resource [mem 0x000a0000-0x000bffff]
pci_bus 0000:00: root bus resource [mem 0xc0000000-0xdfffffff]
pci_bus 0000:00: root bus resource [mem 0xf0000000-0xfed8ffff]
pci 0000:00:01.0: System wakeup disabled by ACPI
pci 0000:00:02.0: System wakeup disabled by ACPI
pci 0000:00:03.0: System wakeup disabled by ACPI
pci 0000:00:07.0: System wakeup disabled by ACPI
pci 0000:00:08.0: System wakeup disabled by ACPI
pci 0000:00:09.0: System wakeup disabled by ACPI
pci 0000:00:0a.0: System wakeup disabled by ACPI
pci 0000:00:1a.0: System wakeup disabled by ACPI
pci 0000:00:1a.1: System wakeup disabled by ACPI
pci 0000:00:1a.2: System wakeup disabled by ACPI
pci 0000:00:1a.7: System wakeup disabled by ACPI
pci 0000:00:1c.0: System wakeup disabled by ACPI
pci 0000:00:1d.0: System wakeup disabled by ACPI
pci 0000:00:1d.1: System wakeup disabled by ACPI
pci 0000:00:1d.2: System wakeup disabled by ACPI
pci 0000:00:1d.7: System wakeup disabled by ACPI
pci 0000:00:1e.0: System wakeup disabled by ACPI
pci 0000:01:00.0: System wakeup disabled by ACPI
pci 0000:01:00.1: System wakeup disabled by ACPI
pci 0000:00:01.0: PCI bridge to [bus 01]
pci 0000:02:00.0: System wakeup disabled by ACPI
pci 0000:02:00.1: System wakeup disabled by ACPI
pci 0000:00:02.0: PCI bridge to [bus 02]
pci 0000:00:03.0: PCI bridge to [bus 03]
pci 0000:00:07.0: PCI bridge to [bus 10]
pci 0000:00:08.0: PCI bridge to [bus 20]
pci 0000:00:09.0: PCI bridge to [bus 30]
pci 0000:00:0a.0: PCI bridge to [bus 40]
pci 0000:00:1c.0: PCI bridge to [bus 50]
pci 0000:00:1c.4: PCI bridge to [bus 60]
pci 0000:00:1e.0: PCI bridge to [bus 61] (subtractive decode)
ACPI: PCI Interrupt Link [LNKA] (IRQs 3 4 6 7 *10 11 12 14 15), disabled.
ACPI: PCI Interrupt Link [LNKB] (IRQs *5), disabled.
ACPI: PCI Interrupt Link [LNKC] (IRQs 3 4 6 7 10 *11 12 14 15), disabled.
ACPI: PCI Interrupt Link [LNKD] (IRQs 3 4 6 7 10 11 12 14 *15), disabled.
ACPI: PCI Interrupt Link [LNKE] (IRQs 3 4 6 7 10 11 12 14 15) *0, disabled.
ACPI: PCI Interrupt Link [LNKF] (IRQs 3 4 6 7 10 11 12 *14 15), disabled.
ACPI: PCI Interrupt Link [LNKG] (IRQs 3 4 6 7 10 11 12 14 15) *0, disabled.
ACPI: PCI Interrupt Link [LNKH] (IRQs *3 4 6 7 10 11 12 14 15), disabled.
ACPI: Enabled 2 GPEs in block 00 to 3F
vgaarb: setting as boot device: PCI:0000:60:00.0
vgaarb: device added: PCI:0000:60:00.0,decodes=io+mem,owns=io+mem,locks=none
vgaarb: loaded
vgaarb: bridge control possible 0000:60:00.0
SCSI subsystem initialized
ACPI: bus type USB registered
usbcore: registered new interface driver usbfs
usbcore: registered new interface driver hub
usbcore: registered new device driver usb
PCI: Using ACPI for IRQ routing
PCI: Discovered peer bus fe
PCI host bridge to bus 0000:fe
pci_bus 0000:fe: root bus resource [io 0x0000-0xffff]
pci_bus 0000:fe: root bus resource [mem 0x00000000-0xffffffffff]
pci_bus 0000:fe: No busn resource found for root bus, will use [bus fe-ff]
PCI: Discovered peer bus ff
PCI host bridge to bus 0000:ff
pci_bus 0000:ff: root bus resource [io 0x0000-0xffff]
pci_bus 0000:ff: root bus resource [mem 0x00000000-0xffffffffff]
pci_bus 0000:ff: No busn resource found for root bus, will use [bus ff-ff]
NetLabel: Initializing
NetLabel: domain hash size = 128
NetLabel: protocols = UNLABELED CIPSOv4
NetLabel: unlabeled traffic allowed by default
hpet0: at MMIO 0xfed00000, IRQs 2, 8, 0, 0
hpet0: 4 comparators, 64-bit 14.318180 MHz counter
Switched to clocksource hpet
pnp: PnP ACPI init
system 00:00: [mem 0xfbf00000-0xfbffffff] could not be reserved
system 00:00: [mem 0xfc000000-0xfcffffff] has been reserved
system 00:00: [mem 0xfd000000-0xfdffffff] has been reserved
system 00:00: [mem 0xfe000000-0xfebfffff] has been reserved
system 00:00: [mem 0xfec8a000-0xfec8afff] could not be reserved
system 00:00: [mem 0xfed10000-0xfed10fff] has been reserved
system 00:04: [io 0x0ca0-0x0ca1] has been reserved
system 00:04: [io 0x0ca4-0x0ca7] has been reserved
system 00:04: [io 0x0caa-0x0caf] has been reserved
system 00:04: [io 0x04d0-0x04d1] has been reserved
system 00:04: [io 0x0800-0x087f] has been reserved
system 00:04: [io 0x0500-0x057f] has been reserved
system 00:04: [mem 0xfed1c000-0xfed1ffff] has been reserved
system 00:04: [mem 0xfed20000-0xfed3ffff] has been reserved
system 00:04: [mem 0xfed40000-0xfed8ffff] has been reserved
system 00:05: [mem 0xfec00000-0xfec00fff] could not be reserved
system 00:05: [mem 0xfee00000-0xfee00fff] has been reserved
system 00:07: [mem 0xe0000000-0xefffffff] has been reserved
system 00:08: [mem 0x00000000-0x0009ffff] could not be reserved
system 00:08: [mem 0x000e0000-0x000fffff] could not be reserved
system 00:08: [mem 0x00100000-0xbfffffff] could not be reserved
system 00:08: [mem 0xfed90000-0xffffffff] could not be reserved
pnp: PnP ACPI: found 9 devices
pci 0000:00:01.0: PCI bridge to [bus 01]
pci 0000:00:01.0: bridge window [io 0xa000-0xafff]
pci 0000:00:01.0: bridge window [mem 0xfa800000-0xfa8fffff]
pci 0000:00:02.0: PCI bridge to [bus 02]
pci 0000:00:02.0: bridge window [io 0xb000-0xbfff]
pci 0000:00:02.0: bridge window [mem 0xfa900000-0xfa9fffff]
pci 0000:00:03.0: PCI bridge to [bus 03]
pci 0000:00:03.0: bridge window [io 0xc000-0xcfff]
pci 0000:00:03.0: bridge window [mem 0xfaa00000-0xfaafffff]
pci 0000:00:03.0: bridge window [mem 0xc0000000-0xc01fffff 64bit pref]
pci 0000:00:07.0: PCI bridge to [bus 10]
pci 0000:00:08.0: PCI bridge to [bus 20]
pci 0000:00:09.0: PCI bridge to [bus 30]
pci 0000:00:09.0: bridge window [io 0xd000-0xdfff]
pci 0000:00:09.0: bridge window [mem 0xfab00000-0xfabfffff]
pci 0000:00:0a.0: PCI bridge to [bus 40]
pci 0000:00:1c.0: PCI bridge to [bus 50]
pci 0000:00:1c.0: bridge window [io 0xe000-0xefff]
pci 0000:00:1c.0: bridge window [mem 0xfac00000-0xfaffffff]
pci 0000:00:1c.0: bridge window [mem 0xc0200000-0xc03fffff 64bit pref]
pci 0000:60:00.0: BAR 6: assigned [mem 0xfb800000-0xfb80ffff pref]
pci 0000:00:1c.4: PCI bridge to [bus 60]
pci 0000:00:1c.4: bridge window [io 0x1000-0x1fff]
pci 0000:00:1c.4: bridge window [mem 0xfb000000-0xfbefffff]
pci 0000:00:1c.4: bridge window [mem 0xf9000000-0xf9ffffff 64bit pref]
pci 0000:00:1e.0: PCI bridge to [bus 61]
NET: Registered protocol family 2
TCP established hash table entries: 1024 (order: 1, 8192 bytes)
TCP bind hash table entries: 1024 (order: 2, 16384 bytes)
TCP: Hash tables configured (established 1024 bind 1024)
TCP: reno registered
UDP hash table entries: 256 (order: 1, 8192 bytes)
UDP-Lite hash table entries: 256 (order: 1, 8192 bytes)
NET: Registered protocol family 1
pci 0000:01:00.0: Disabling L0s
pci 0000:01:00.0: can't disable ASPM; OS doesn't have ASPM control
pci 0000:01:00.1: Disabling L0s
pci 0000:01:00.1: can't disable ASPM; OS doesn't have ASPM control
pci 0000:02:00.0: Disabling L0s
pci 0000:02:00.0: can't disable ASPM; OS doesn't have ASPM control
pci 0000:02:00.1: Disabling L0s
pci 0000:02:00.1: can't disable ASPM; OS doesn't have ASPM control
Trying to unpack rootfs image as initramfs...
Freeing initrd memory: 5352K (ffff880036ab5000 - ffff880036fef000)
IOMMU Skip disabling iommu hardware translations
IOMMU Copying translate tables from panicked kernel
IOMMU: root_cache:0xffff880033ed8000 phys:0x0000bac2f000
IOMMU:0 Domain ids from panicked kernel:
DID did:12(0x000c)
DID did:11(0x000b)
DID did:10(0x000a)
DID did:9(0x0009)
DID did:24(0x0018)
DID did:23(0x0017)
DID did:22(0x0016)
DID did:21(0x0015)
DID did:8(0x0008)
DID did:7(0x0007)
DID did:6(0x0006)
DID did:5(0x0005)
DID did:4(0x0004)
DID did:3(0x0003)
DID did:2(0x0002)
DID did:1(0x0001)
DID did:0(0x0000)
DID did:20(0x0014)
DID did:19(0x0013)
DID did:18(0x0012)
DID did:17(0x0011)
DID did:16(0x0010)
DID did:15(0x000f)
DID did:14(0x000e)
DID did:13(0x000d)
----------------------------------------
IOMMU: dmar0 using Queued invalidation
PCI-DMA: Intel(R) Virtualization Technology for Directed I/O
futex hash table entries: 256 (order: 2, 16384 bytes)
audit: initializing netlink subsys (disabled)
audit: type=2000 audit(1420624212.480:1): initialized
HugeTLB registered 2 MB page size, pre-allocated 0 pages
VFS: Disk quotas dquot_6.5.2
VFS: Dquot-cache hash table entries: 512 (order 0, 4096 bytes)
bounce: pool size: 64 pages
Block layer SCSI generic (bsg) driver version 0.4 loaded (major 252)
dmar: DRHD: handling fault status reg 2
dmar: DMAR:[DMA Write] Request device [01:00.0] fault addr ffdf2000
DMAR:[fault reason 05] PTE Write access is not set
io scheduler noop registered
io scheduler deadline registered
io scheduler cfq registered (default)
pci_hotplug: PCI Hot Plug PCI Core version: 0.5
pciehp: PCI Express Hot Plug Controller Driver version: 0.4
input: Power Button as /devices/LNXSYSTM:00/LNXSYBUS:00/PNP0C0C:00/input/input0
ACPI: Power Button [PWRB]
input: Power Button as /devices/LNXSYSTM:00/LNXPWRBN:00/input/input1
ACPI: Power Button [PWRF]
APEI: Can not request [mem 0xbf7b3d3a-0xbf7b3d3b] for APEI ERST registers
[Firmware Warn]: GHES: Poll interval is 0 for generic hardware error source: 1, disabled.
GHES: APEI firmware first mode is enabled by WHEA _OSC.
Serial: 8250/16550 driver, 4 ports, IRQ sharing enabled
serial 00:02: ttyS0 at I/O 0x3f8 (irq = 4, base_baud = 115200) is a 16550A
Non-volatile memory driver v1.3
Linux agpgart interface v0.103
brd: module loaded
loop: module loaded
libphy: Fixed MDIO Bus: probed
ehci_hcd: USB 2.0 'Enhanced' Host Controller (EHCI) Driver
ehci-pci: EHCI PCI platform driver
ehci-pci 0000:00:1a.7: EHCI Host Controller
ehci-pci 0000:00:1a.7: new USB bus registered, assigned bus number 1
ehci-pci 0000:00:1a.7: debug port 1
ehci-pci 0000:00:1a.7: irq 18, io mem 0xfa7de000
ehci-pci 0000:00:1a.7: USB 2.0 started, EHCI 1.00
usb usb1: New USB device found, idVendor=1d6b, idProduct=0002
usb usb1: New USB device strings: Mfr=3, Product=2, SerialNumber=1
usb usb1: Product: EHCI Host Controller
dmar: DRHD: handling fault status reg 102
dmar: DMAR:[DMA Write] Request device [01:00.0] fault addr ffce8000
DMAR:[fault reason 05] PTE Write access is not set
usb usb1: Manufacturer: Linux 3.19.0-rc3 ehci_hcd
usb usb1: SerialNumber: 0000:00:1a.7
hub 1-0:1.0: USB hub found
hub 1-0:1.0: 6 ports detected
ehci-pci 0000:00:1d.7: EHCI Host Controller
ehci-pci 0000:00:1d.7: new USB bus registered, assigned bus number 2
ehci-pci 0000:00:1d.7: debug port 1
ehci-pci 0000:00:1d.7: irq 23, io mem 0xfa7dc000
ehci-pci 0000:00:1d.7: USB 2.0 started, EHCI 1.00
usb usb2: New USB device found, idVendor=1d6b, idProduct=0002
usb usb2: New USB device strings: Mfr=3, Product=2, SerialNumber=1
usb usb2: Product: EHCI Host Controller
usb usb2: Manufacturer: Linux 3.19.0-rc3 ehci_hcd
usb usb2: SerialNumber: 0000:00:1d.7
hub 2-0:1.0: USB hub found
hub 2-0:1.0: 6 ports detected
ohci_hcd: USB 1.1 'Open' Host Controller (OHCI) Driver
ohci-pci: OHCI PCI platform driver
uhci_hcd: USB Universal Host Controller Interface driver
uhci_hcd 0000:00:1a.0: UHCI Host Controller
uhci_hcd 0000:00:1a.0: new USB bus registered, assigned bus number 3
uhci_hcd 0000:00:1a.0: detected 2 ports
uhci_hcd 0000:00:1a.0: irq 16, io base 0x00009c00
usb usb3: New USB device found, idVendor=1d6b, idProduct=0001
usb usb3: New USB device strings: Mfr=3, Product=2, SerialNumber=1
usb usb3: Product: UHCI Host Controller
usb usb3: Manufacturer: Linux 3.19.0-rc3 uhci_hcd
usb usb3: SerialNumber: 0000:00:1a.0
hub 3-0:1.0: USB hub found
hub 3-0:1.0: 2 ports detected
uhci_hcd 0000:00:1a.1: UHCI Host Controller
uhci_hcd 0000:00:1a.1: new USB bus registered, assigned bus number 4
uhci_hcd 0000:00:1a.1: detected 2 ports
uhci_hcd 0000:00:1a.1: irq 21, io base 0x00009880
usb usb4: New USB device found, idVendor=1d6b, idProduct=0001
usb usb4: New USB device strings: Mfr=3, Product=2, SerialNumber=1
usb usb4: Product: UHCI Host Controller
usb usb4: Manufacturer: Linux 3.19.0-rc3 uhci_hcd
usb usb4: SerialNumber: 0000:00:1a.1
hub 4-0:1.0: USB hub found
hub 4-0:1.0: 2 ports detected
uhci_hcd 0000:00:1a.2: UHCI Host Controller
uhci_hcd 0000:00:1a.2: new USB bus registered, assigned bus number 5
uhci_hcd 0000:00:1a.2: detected 2 ports
uhci_hcd 0000:00:1a.2: irq 19, io base 0x00009800
usb usb5: New USB device found, idVendor=1d6b, idProduct=0001
usb usb5: New USB device strings: Mfr=3, Product=2, SerialNumber=1
usb usb5: Product: UHCI Host Controller
usb usb5: Manufacturer: Linux 3.19.0-rc3 uhci_hcd
usb usb5: SerialNumber: 0000:00:1a.2
hub 5-0:1.0: USB hub found
hub 5-0:1.0: 2 ports detected
uhci_hcd 0000:00:1d.0: UHCI Host Controller
uhci_hcd 0000:00:1d.0: new USB bus registered, assigned bus number 6
uhci_hcd 0000:00:1d.0: detected 2 ports
uhci_hcd 0000:00:1d.0: irq 23, io base 0x00009480
usb usb6: New USB device found, idVendor=1d6b, idProduct=0001
dmar: DRHD: handling fault status reg 202
dmar: DMAR:[DMA Write] Request device [01:00.0] fault addr ffef3000
DMAR:[fault reason 05] PTE Write access is not set
usb usb6: New USB device strings: Mfr=3, Product=2, SerialNumber=1
usb usb6: Product: UHCI Host Controller
usb usb6: Manufacturer: Linux 3.19.0-rc3 uhci_hcd
usb usb6: SerialNumber: 0000:00:1d.0
hub 6-0:1.0: USB hub found
hub 6-0:1.0: 2 ports detected
uhci_hcd 0000:00:1d.1: UHCI Host Controller
usb 1-2: new high-speed USB device number 2 using ehci-pci
uhci_hcd 0000:00:1d.1: new USB bus registered, assigned bus number 7
uhci_hcd 0000:00:1d.1: detected 2 ports
uhci_hcd 0000:00:1d.1: irq 19, io base 0x00009400
usb usb7: New USB device found, idVendor=1d6b, idProduct=0001
usb usb7: New USB device strings: Mfr=3, Product=2, SerialNumber=1
usb usb7: Product: UHCI Host Controller
usb usb7: Manufacturer: Linux 3.19.0-rc3 uhci_hcd
usb usb7: SerialNumber: 0000:00:1d.1
hub 7-0:1.0: USB hub found
hub 7-0:1.0: 2 ports detected
uhci_hcd 0000:00:1d.2: UHCI Host Controller
uhci_hcd 0000:00:1d.2: new USB bus registered, assigned bus number 8
uhci_hcd 0000:00:1d.2: detected 2 ports
uhci_hcd 0000:00:1d.2: irq 18, io base 0x00009080
usb usb8: New USB device found, idVendor=1d6b, idProduct=0001
usb usb8: New USB device strings: Mfr=3, Product=2, SerialNumber=1
usb usb8: Product: UHCI Host Controller
usb usb8: Manufacturer: Linux 3.19.0-rc3 uhci_hcd
usb usb8: SerialNumber: 0000:00:1d.2
hub 8-0:1.0: USB hub found
hub 8-0:1.0: 2 ports detected
i8042: PNP: No PS/2 controller found. Probing ports directly.
dmar: DRHD: handling fault status reg 302
dmar: DMAR:[DMA Write] Request device [01:00.0] fault addr ffce7000
DMAR:[fault reason 05] PTE Write access is not set
i8042: Failed to disable AUX port, but continuing anyway... Is this a SiS?
i8042: If AUX port is really absent please use the 'i8042.noaux' option
dmar: DRHD: handling fault status reg 402
dmar: DMAR:[DMA Write] Request device [01:00.0] fault addr ffce5000
DMAR:[fault reason 05] PTE Write access is not set
sched: RT throttling activated
serio: i8042 KBD port at 0x60,0x64 irq 1
mousedev: PS/2 mouse device common for all mice
rtc_cmos 00:01: RTC can wake from S4
rtc_cmos 00:01: rtc core: registered rtc_cmos as rtc0
rtc_cmos 00:01: alarms up to one month, y3k, 114 bytes nvram, hpet irqs
hidraw: raw HID events driver (C) Jiri Kosina
usb 1-2: New USB device found, idVendor=04b4, idProduct=6560
usb 1-2: New USB device strings: Mfr=0, Product=0, SerialNumber=0
usbcore: registered new interface driver usbhid
usbhid: USB HID core driver
drop_monitor: Initializing network drop monitor service
hub 1-2:1.0: USB hub found
TCP: cubic registered
hub 1-2:1.0: 4 ports detected
Initializing XFRM netlink socket
NET: Registered protocol family 17
mce: Unable to init device /dev/mcelog (rc: -5)
registered taskstats version 1
ima: No TPM chip found, activating TPM-bypass!
rtc_cmos 00:01: setting system clock to 2015-01-07 09:50:20 UTC (1420624220)
Freeing unused kernel memory: 1748K (ffffffff81b2d000 - ffffffff81ce2000)
Write protecting the kernel read-only data: 10240k
Freeing unused kernel memory: 264K (ffff8800305be000 - ffff880030600000)
Freeing unused kernel memory: 1248K (ffff8800308c8000 - ffff880030a00000)
Mounting proc filesystem
Mounting sysfs filesystem
Creating /dev
Creating initial device nodes
setfont: KDFONTOP: Inappropriate ioctl for dedevice-mapper: uevent: version 1.0.3
vice
Free memordevice-mapper: ioctl: 4.29.0-ioctl (2014-10-28) initialised: dm-devel@redhat.com
y/Total memory (free %): 76920 / 110200 ( 69.8004 )
Loading dm-mod.ko module
Loading dm-log.koip_tables: (C) 2000-2006 Netfilter Core Team
module
Loading dm-region-hash.nf_conntrack version 0.5.0 (860 buckets, 3440 max)
ko module
Loadiip6_tables: (C) 2000-2006 Netfilter Core Team
ng dm-mirror.ko module
Loading dm-zero.ko module
Loading dm-buNET: Registered protocol family 10
fio.ko module
Loading dm-snapshot.ko module
Lotun: Universal TUN/TAP device driver, 1.6
ading cpufreq_ontun: (C) 1999-2004 Max Krasnyansky <maxk@qualcomm.com>
demand.ko module
Loading nf_reject_ipv4.ko module
Loading nf_defrag_ipv4.ko moiTCO_vendor_support: vendor-support=0
dule
Loading ip_tables.ko modulinput: PC Speaker as /devices/platform/pcspkr/input/input3
e
Loading nf_conntrack.ko modulipmi message handler version 39.2
e
Loading ip6_tables.ko module\rgenirq: Flags mismatch irq 0. 00000080 (i801_smbus) vs. 00015a00 (timer)
CPU: 0 PID: 145 Comm: insmod Not tainted 3.19.0-rc3 #36
Hardware name: FUJITSU-SV PRIMERGY BX920 S2/D3030, BIOS 080015 Rev.3D81.3030 02/10/2012
ffff8800335d7940 ffff880033503a48 ffffffff815b200a 0000000000015a00
ffff88003603d600 ffff880033503aa8 ffffffff810a8ccc ffff880033503a98
0000000000000246 ffff880033503aa8 ffff880033ba5c00 ffff88003603d600
Call Trace:
[<ffffffff815b200a>] dump_stack+0x48/0x5e
[<ffffffff810a8ccc>] __setup_irq+0x39c/0x4f0
[<ffffffff810a9640>] request_threaded_irq+0x100/0x170
[<ffffffffa0195c30>] ? dmi_check_onboard_devices+0x1a0/0x1a0 [i2c_i801]
[<ffffffffa0195964>] i801_probe+0x4b4/0x5e0 [i2c_i801]
[<ffffffff815b4296>] ? mutex_lock+0x16/0x40
[<ffffffff812d16bc>] local_pci_probe+0x4c/0xb0
[<ffffffff812d17a9>] pci_call_probe+0x89/0xb0
[<ffffffff812d15fe>] ? pci_match_device+0xde/0x110
[<ffffffff812d1a89>] pci_device_probe+0x79/0xa0
[<ffffffff8139af02>] ? driver_sysfs_add+0x82/0xb0
[<ffffffff8139b181>] really_probe+0x81/0x350
[<ffffffff8139b497>] driver_probe_device+0x47/0xa0
[<ffffffff8139b59b>] __driver_attach+0xab/0xb0
[<ffffffff8139b4f0>] ? driver_probe_device+0xa0/0xa0
[<ffffffff8139b4f0>] ? driver_probe_device+0xa0/0xa0
[<ffffffff81399374>] bus_for_each_dev+0x94/0xb0
[<ffffffff8139ae2e>] driver_attach+0x1e/0x20
[<ffffffff8139a890>] bus_add_driver+0x1b0/0x250
[<ffffffff8139bc34>] driver_register+0x64/0xf0
[<ffffffff812d1b7c>] __pci_register_driver+0x4c/0x50
[<ffffffffa019a0a5>] i2c_i801_init+0xa5/0x1000 [i2c_i801]
[<ffffffffa019a000>] ? 0xffffffffa019a000
[<ffffffff81000287>] do_one_initcall+0xb7/0x1d0
[<ffffffff81175352>] ? __vunmap+0xc2/0x110
[<ffffffff810cf600>] do_init_module+0x30/0x1a0
[<ffffffff810d1dee>] load_module+0x4ee/0x620
[<ffffffff810ceeb0>] ? mod_sysfs_teardown+0x150/0x150
[<ffffffff81175bf5>] ? __vmalloc_node+0x35/0x40
[<ffffffff810ce7f0>] ? module_sect_show+0x30/0x30
[<ffffffff810d20a4>] SyS_init_module+0x94/0xc0
[<ffffffff815b6152>] system_call_fastpath+0x12/0x17
Loading ipv6.koi801_smbus 0000:00:1f.3: Failed to allocate irq 0: -16
module
Loadingi801_smbus 0000:00:1f.3: SMBus using polling
macvlan.ko module
Loading vhosEDAC MC: Ver: 3.0.0
t.ko module
Loading tun.ko modudca service started, version 1.12.1
le
Loading kvm.ko module
Loadipps_core: LinuxPPS API ver. 1 registered
ng uinput.ko modpps_core: Software ver. 5.3.6 - Copyright 2005-2007 Rodolfo Giometti <giometti@linux.it>
ule
Loading iTCO_vendor_support.ko module
Loading serio_raw.koFusion MPT base driver 3.04.20
module
LoadingCopyright (c) 1999-2008 LSI Corporation
pcspkr.ko module
Loading ipmi_msghandler.ko module
Loading tpm_infineon.ko momegasas: 06.805.06.01-rc1
dule
Loading i2megasas: 0x1000:0x0079:0x1734:0x1176: c-i801.ko modulebus 3:slot 0:func 0
Loading mfd-comegaraid_sas 0000:03:00.0: Can't allocate Firmware crash dump DMA buffer
re.ko module
Lomegasas: Waiting for FW to come to ready state
ading edac_core.ko module
Loading sg.ko module
Loading dca.ko module
Loading i2c-algo-bit.ko module
Loading pps_core.ko modudmar: DRHD: handling fault status reg 502
dmar: DMAR:[DMA Write] Request device [03:00.0] fault addr ffdfe000
DMAR:[fault reason 05] PTE Write access is not set
le
Loading acpiusb 5-1: new full-speed USB device number 2 using uhci_hcd
-cpufreq.ko modumegasas: FW now in Ready state
le
insmod: can'megaraid_sas 0000:03:00.0: [scsi0]: FW supports<0> MSIX vector,Online CPUs: <1>,Current MSIX <1>
t insert '/lib/modules/3.19.0-rc3/acpi-cpufreq.ko': No such device
Loading jbd2.ko module
Loading mbcache.ko module
Loading mptbase.ko module
Loading scsi_transport_sas.ko module
Loading scsi_transport_fc.ko module
Loading megaraid_sas.ko module
megasas_init_mfi: fw_support_ieee=0
megasas: INIT adapter done
megaraid_sas 0000:03:00.0: Controller type: MR,Memory size is: 512MB
scsi host0: LSI SAS based MegaRAID driver
usb 5-1: New USB device found, idVendor=0000, idProduct=0000
usb 5-1: New USB device strings: Mfr=1, Product=2, SerialNumber=3
usb 5-1: Product: iRMC USB Device
usb 5-1: Manufacturer: Fujitsu
usb 5-1: SerialNumber: 401B794770E1CC
scsi 0:2:0:0: Direct-Access LSI RAID 5/6 SAS 6G 2.13 PQ: 0 ANSI: 5
scsi 0:2:1:0: Direct-Access LSI RAID 5/6 SAS 6G 2.13 PQ: 0 ANSI: 5
scsi 0:2:2:0: Direct-Access LSI RAID 5/6 SAS 6G 2.13 PQ: 0 ANSI: 5
scsi 0:2:3:0: Direct-Access LSI RAID 5/6 SAS 6G 2.13 PQ: 0 ANSI: 5
scsi 0:2:4:0: Direct-Access LSI RAID 5/6 SAS 6G 2.13 PQ: 0 ANSI: 5
scsi 0:2:5:0: Direct-Access LSI RAID 5/6 SAS 6G 2.13 PQ: 0 ANSI: 5
scsi 0:2:6:0: Direct-Access LSI RAID 5/6 SAS 6G 2.13 PQ: 0 ANSI: 5
input: Fujitsu iRMC USB Device as /devices/pci0000:00/0000:00:1a.2/usb5/5-1/5-1:1.0/0003:0000:0000.0001/input/input4
scsi 0:2:7:0: Direct-Access LSI RAID 5/6 SAS 6G 2.13 PQ: 0 ANSI: 5
sd 0:2:0:0: [sda] 285671424 512-byte logical blocks: (146 GB/136 GiB)
sd 0:2:0:0: [sda] 4096-byte physical blocks
sd 0:2:0:0: Attached scsi generic sg0 type 0
sd 0:2:0:0: [sda] Write Protect is off
sd 0:2:1:0: [sdb] 285671424 512-byte logical blocks: (146 GB/136 GiB)
sd 0:2:1:0: [sdb] 4096-byte physical blocks
sd 0:2:1:0: Attached scsi generic sg1 type 0
sd 0:2:0:0: [sda] Write cache: enabled, read cache: disabled, doesn't support DPO or FUA
hid-generic 0003:0000:0000.0001: input,hidraw0: USB HID v1.11 Keyboard [Fujitsu iRMC USB Device] on usb-0000:00:1a.2-1/input0
sd 0:2:1:0: [sdb] Write Protect is off
sd 0:2:2:0: [sdc] 285671424 512-byte logical blocks: (146 GB/136 GiB)
sd 0:2:2:0: [sdc] 4096-byte physical blocks
sd 0:2:2:0: Attached scsi generic sg2 type 0
sd 0:2:1:0: [sdb] Write cache: enabled, read cache: disabled, doesn't support DPO or FUA
sd 0:2:2:0: [sdc] Write Protect is off
sd 0:2:3:0: [sdd] 285671424 512-byte logical blocks: (146 GB/136 GiB)
sd 0:2:3:0: [sdd] 4096-byte physical blocks
sd 0:2:3:0: Attached scsi generic sg3 type 0
sd 0:2:2:0: [sdc] Write cache: enabled, read cache: disabled, doesn't support DPO or FUA
sd 0:2:3:0: [sdd] Write Protect is off
sda: sda1
sd 0:2:4:0: [sde] 285671424 512-byte logical blocks: (146 GB/136 GiB)
sd 0:2:4:0: [sde] 4096-byte physical blocks
sd 0:2:4:0: Attached scsi generic sg4 type 0
sd 0:2:0:0: [sda] Attached SCSI disk
Switched to clocksource tsc
input: Fujitsu iRMC USB Device as /devices/pci0000:00/0000:00:1a.2/usb5/5-1/5-1:1.1/0003:0000:0000.0002/input/input5
sd 0:2:3:0: [sdd] Write cache: enabled, read cache: disabled, doesn't support DPO or FUA
sd 0:2:4:0: [sde] Write Protect is off
sd 0:2:5:0: [sdf] 285671424 512-byte logical blocks: (146 GB/136 GiB)
sd 0:2:5:0: [sdf] 4096-byte physical blocks
sdb: sdb1
sd 0:2:5:0: Attached scsi generic sg5 type 0
sd 0:2:1:0: [sdb] Attached SCSI disk
sd 0:2:4:0: [sde] Write cache: enabled, read cache: disabled, doesn't support DPO or FUA
sdc: sdc1 sdc2
sd 0:2:6:0: [sdg] 285671424 512-byte logical blocks: (146 GB/136 GiB)
sd 0:2:6:0: [sdg] 4096-byte physical blocks
sd 0:2:6:0: Attached scsi generic sg6 type 0
sd 0:2:2:0: [sdc] Attached SCSI disk
sd 0:2:5:0: [sdf] Write Protect is off
hid-generic 0003:0000:0000.0002: input,hidraw1: USB HID v1.11 Mouse [Fujitsu iRMC USB Device] on usb-0000:00:1a.2-1/input1
sd 0:2:7:0: [sdh] 285671424 512-byte logical blocks: (146 GB/136 GiB)
sd 0:2:7:0: [sdh] 4096-byte physical blocks
sdd: sdd1 sdd2
sd 0:2:7:0: Attached scsi generic sg7 type 0
sd 0:2:5:0: [sdf] Write cache: enabled, read cache: disabled, doesn't support DPO or FUA
sd 0:2:3:0: [sdd] Attached SCSI disk
sd 0:2:6:0: [sdg] Write Protect is off
sd 0:2:7:0: [sdh] Write Protect is off
sd 0:2:6:0: [sdg] Write cache: enabled, read cache: disabled, doesn't support DPO or FUA
sd 0:2:7:0: [sdh] Write cache: enabled, read cache: disabled, doesn't support DPO or FUA
sde: sde1 sde2 sde3 sde4 < sde5 sde6 sde7 sde8 >
sdf: sdf1 sdf2
sd 0:2:5:0: [sdf] Attached SCSI disk
sd 0:2:4:0: [sde] Attached SCSI disk
sdg: unknown partition table
sdh: unknown partition table
sd 0:2:6:0: [sdg] Attached SCSI disk
sd 0:2:7:0: [sdh] Attached SCSI disk
Loading ipt_REJECT.ko module
Loading nf_conntrack_ipv4.ko module
Loading iptable_filter.ko modkvm: VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL does not work properly. Using workaround
ule
Loading nf_iTCO_wdt: Intel TCO WatchDog Timer Driver v1.11
reject_ipv6.ko mIPMI System Interface driver.
odule
Loading nipmi_si: probing via ACPI
f_defrag_ipv6.koipmi_si 00:06: [io 0x0ca2-0x0ca3] regsize 1 spacing 1 irq 0
module
Loadingipmi_si: Adding ACPI-specified kcs state machine xt_state.ko mod
ule
Loading ip6ipmi_si: probing via SMBIOS
table_filter.ko ipmi_si: SMBIOS: io 0xca2 regsize 1 spacing 1 irq 0
ipmi_si: Adding SMBIOS-specified kcs state machinemodule
Loading duplicate interface
macvtap.ko modulipmi_si: Trying ACPI-specified kcs state machine at i/o address 0xca2, slave address 0x0, irq 0
e
Loading kvm-intel.ko module
Loading iTCO_wdt.ko module
Loading ipmi_si.ko module
ipmi_si 00:06: Found new BMC (man_id: 0x002880, prod_id: 0x0276, dev_id: 0x32)
ipmi_si 00:06: IPMI kcs interface initialized
Loading lpc_ich.iTCO_wdt: Found a ICH10 TCO device (Version=2, TCOBASE=0x0860)
ko module
iTCO_wdt: initialized. heartbeat=30 sec (nowayout=0)
ACPI Warning: SystemIO range 0x0000000000000828-0x000000000000082f conflicts with OpRegion 0x0000000000000800-0x000000000000084f (\PMRG) (20141107/utaddress-258)
ACPI: If an ACPI driver is available for this device, you should use it instead of the native driver
ACPI Warning: SystemIO range 0x0000000000000500-0x000000000000052f conflicts with OpRegion 0x0000000000000500-0x000000000000051b (\_SI_.BLNK) (20141107/utaddress-258)
ACPI: If an ACPI driver is available for this device, you should use it instead of the native driver
lpc_ich: Resource conflict(s) found affecting gpio_ich
Loading ioatdma.ioatdma: Intel(R) QuickData Technology Driver 4.00
ko module
Loading i7core_edac.ko module
EDAC MC1: Giving out device to module i7core_edac.c controller i7 core #1: DEV 0000:fe:03.0 (POLLED)
EDAC PCI0: Giving out device to module i7core_edac controller EDAC PCI controller: DEV 0000:fe:03.0 (POLLED)
EDAC MC0: Giving out device to module i7core_edac.c controller i7 core #0: DEV 0000:ff:03.0 (POLLED)
EDAC PCI1: Giving out device to module i7core_edac controller EDAC PCI controller: DEV 0000:ff:03.0 (POLLED)
EDAC i7core: Driver loaded, 2 memory controller(s) found.
Loading ptp.ko mPTP clock support registered
odule
Loading ext4.ko module
Loading mptscsih.ko module
Loading lpfc.ko module
Emulex LightPulse Fibre Channel SCSI driver 10.4.8000.0.
Copyright(c) 2004-2014 Emulex. All rights reserved.
scsi host1: Emulex LPe12000 PCIe Fibre Channel Adapter on PCI bus 30 device 00 irq 40
scsi host2: Emulex LPe12000 PCIe Fibre Channel Adapter on PCI bus 30 device 01 irq 43
Loading ip6t_REJECT.ko module
Loading nf_conntrack_ipv6.ko moduigb: Intel(R) Gigabit Ethernet Network Driver - version 5.2.15-k
le
Loading vhosigb: Copyright (c) 2007-2014 Intel Corporation.
t_net.ko module
Loading igb.ko module
igb 0000:01:00.0: DCA enabled
igb 0000:01:00.0: Intel(R) Gigabit Ethernet Network Connection
igb 0000:01:00.0: eth0: (PCIe:2.5Gb/s:Width x2) c8:0a:a9:9d:fa:52
igb 0000:01:00.0: eth0: PBA No: Unknown
igb 0000:01:00.0: Using MSI-X interrupts. 1 rx queue(s), 1 tx queue(s)
igb 0000:01:00.1: DCA enabled
igb 0000:01:00.1: Intel(R) Gigabit Ethernet Network Connection
igb 0000:01:00.1: eth1: (PCIe:2.5Gb/s:Width x2) c8:0a:a9:9d:fa:53
igb 0000:01:00.1: eth1: PBA No: Unknown
igb 0000:01:00.1: Using MSI-X interrupts. 1 rx queue(s), 1 tx queue(s)
igb 0000:02:00.0: DCA enabled
igb 0000:02:00.0: Intel(R) Gigabit Ethernet Network Connection
igb 0000:02:00.0: eth2: (PCIe:2.5Gb/s:Width x2) c8:0a:a9:9d:fa:54
igb 0000:02:00.0: eth2: PBA No: Unknown
igb 0000:02:00.0: Using MSI-X interrupts. 1 rx queue(s), 1 tx queue(s)
igb 0000:02:00.1: DCA enabled
igb 0000:02:00.1: Intel(R) Gigabit Ethernet Network Connection
igb 0000:02:00.1: eth3: (PCIe:2.5Gb/s:Width x2) c8:0a:a9:9d:fa:55
igb 0000:02:00.1: eth3: PBA No: Unknown
igb 0000:02:00.1: Using MSI-X interrupts. 1 rx queue(s), 1 tx queue(s)
Loading mptsas.kFusion MPT SAS Host driver 3.04.20
o module
mptbase: ioc0: Initiating bringup
ioc0: LSISAS1064E B3: Capabilities={Initiator}
scsi host3: ioc0: LSISAS1064E B3, FwRev=011e0000h, Ports=1, MaxQ=277, IRQ=16
mptsas: ioc0: attaching sata device: fw_channel 0, fw_id 0, phy 0, sas_addr 0x1221000000000000
scsi 3:0:0:0: Direct-Access ATA ST9160511NS FTD4 PQ: 0 ANSI: 5
sd 3:0:0:0: Attached scsi generic sg8 type 0
sd 3:0:0:0: [sdi] 312581808 512-byte logical blocks: (160 GB/149 GiB)
mptsas: ioc0: attaching sata device: fw_channel 0, fw_id 1, phy 1, sas_addr 0x1221000001000000
scsi 3:0:1:0: Direct-Access ATA ST9160511NS FTD4 PQ: 0 ANSI: 5
sd 3:0:1:0: Attached scsi generic sg9 type 0
sd 3:0:1:0: [sdj] 312581808 512-byte logical blocks: (160 GB/149 GiB)
sd 3:0:0:0: [sdi] Write Protect is off
sd 3:0:0:0: [sdi] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA
sd 3:0:1:0: [sdj] Write Protect is off
sd 3:0:1:0: [sdj] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA
sdi: sdi1 sdi2 sdi3
random: nonblocking pool is initialized
sdj: sdj1 sdj2 sdj3 sdj4 < sdj5 sdj6 >
sd 3:0:0:0: [sdi] Attached SCSI disk
sd 3:0:1:0: [sdj] Attached SCSI disk
Waiting for required block device discovery
Waiting for device with scsi_ids: 360030057014aa330182f86352027e114 ...
Creating block device loop0
Creating block device loop1
Creating block device loop2
Creating block device loop3
Creating block device loop4
Creating block device loop5
Creating block device loop6
Creating block device loop7
Creating block device ram0
Creating block device ram1
Creating block device ram10
Creating block device ram11
Creating block device ram12
Creating block device ram13
Creating block device ram14
Creating block device ram15
Creating block device ram2
Creating block device ram3
Creating block device ram4
Creating block device ram5
Creating block device ram6
Creating block device ram7
Creating block device ram8
Creating block device ram9
Creating block d sda: sda1
evice sda
Creating block device sdb
sdb: sdb1
Creating block d sdc: sdc1 sdc2
evice sdc
Creating block d sdd: sdd1 sdd2
evice sdd
Creating block device sde
sde: sde1 sde2 sde3 sde4 < sde5 sde6 sde7 sde8 >
Creating block d sdf: sdf1 sdf2
evice sdf
Creating block d sdg: unknown partition table
evice sdg
Creating block d sdh: unknown partition table
evice sdh
Creating block device sdi
sdi: sdi1 sdi2 sdi3
Creating block device sdj
sdj: sdj1 sdj2 sdj3 sdj4 < sdj5 sdj6 >
Found device with scsi_ids: 360030057014aa330182f86352027e114
Creating Remain Block Devices
Saving to the local filesystem LABEL=/dump2
e2fsck 1.41.12 (17-May-2010)
/dump2: clean, 35/8929280 files, 929518/35708672 blocks
EXT4-fs (sda1): mounted filesystem with ordered data mode. Opts:
Free memory/Total memory (free %): 61812 / 110200 ( 56.0907 )
Saving vmcore-dmesg.txt
Missing the struct log size export
Saving vmcore-dmesg.txt failed
\rExcluding unnecessary pages : [ 0 %] \rExcluding unnecessary pages : [100 %] \rExcluding unnecessary pages : [ 12 %] \rExcluding unnecessary pages : [100 %] \rCopying data : [ 2 %] \rCopying data : [ 12 %] \rCopying data : [ 19 %] \rCopying data : [ 26 %] \rCopying data : [ 36 %] \rCopying data : [ 47 %] \rCopying data : [ 57 %] \rCopying data : [ 68 %] \rCopying data : [ 75 %] \rCopying data : [ 86 %] \rCopying data : [ 95 %] \rCopying data : [100 %]
^[[0JSaving core complete
kvm: exiting hardware virtualization
sd 3:0:1:0: [sdj] Synchronizing SCSI cache
sd 3:0:0:0: [sdi] Synchronizing SCSI cache
sd 0:2:7:0: [sdh] Synchronizing SCSI cache
sd 0:2:6:0: [sdg] Synchronizing SCSI cache
sd 0:2:5:0: [sdf] Synchronizing SCSI cache
sd 0:2:4:0: [sde] Synchronizing SCSI cache
sd 0:2:3:0: [sdd] Synchronizing SCSI cache
sd 0:2:2:0: [sdc] Synchronizing SCSI cache
sd 0:2:1:0: [sdb] Synchronizing SCSI cache
sd 0:2:0:0: [sda] Synchronizing SCSI cache
reboot: Restarting system
reboot: machine restart
^ permalink raw reply [flat|nested] 18+ messages in thread* RE: [PATCH v7 0/10] iommu/vt-d: Fix intel vt-d faults in kdump kernel
2015-01-08 1:00 ` Takao Indoh
@ 2015-01-08 3:11 ` Li, Zhen-Hua
0 siblings, 0 replies; 18+ messages in thread
From: Li, Zhen-Hua @ 2015-01-08 3:11 UTC (permalink / raw)
To: Takao Indoh, bhe@redhat.com
Cc: dwmw2@infradead.org, joro@8bytes.org, vgoyal@redhat.com,
dyoung@redhat.com, iommu@lists.linux-foundation.org,
linux-kernel@vger.kernel.org, linux-pci@vger.kernel.org,
kexec@lists.infradead.org, alex.williamson@redhat.com,
ddutile@redhat.com, ishii.hironobu@jp.fujitsu.com,
bhelgaas@google.com, Hatch, Douglas B (HPS Linux PM),
Hoemann, Jerry, Vaden, Tom (HP Server OS Architecture),
Zhang, Li (Zoe@HPservers-Core-OE-PSC),
Mitchell, Lisa (MCLinux in Fort Collins),
billsumnerlinux@gmail.com, Wright, Randy (HP Servers Linux)
In your log, it seems something incorrect while copying pages.
Your last DMAR fault is:
DMAR:[fault reason 01] Present bit in root entry is clear
But this time, it is:
DMAR:[fault reason 05] PTE Write access is not set
So I think this line I added to this version , it works.
function intel_iommu_load_translation_tables, line:
__iommu_flush_cache(iommu, iommu->root_entry, PAGE_SIZE);
I checked the code, found I missed one flush in function copy_page_table.
How do you think we add one flush after this lines:
ret = copy_page_table(&dma_pte_next,
(p->val & VTD_PAGE_MASK),
shift-9, page_addr | (u << shift),
iommu, bus, devfn, dve, ppap);
+ __iommu_flush_cache(iommu, phys_to_virt(dma_pte_next),
+ VTD_PAGE_SIZE);
If this does not work, I have no ideas currently, need to dig the code more.
Regards
Zhenhua
-----Original Message-----
From: Takao Indoh [mailto:indou.takao@jp.fujitsu.com]
Sent: Thursday, January 08, 2015 9:00 AM
To: Li, Zhen-Hua; bhe@redhat.com
Cc: dwmw2@infradead.org; joro@8bytes.org; vgoyal@redhat.com; dyoung@redhat.com; iommu@lists.linux-foundation.org; linux-kernel@vger.kernel.org; linux-pci@vger.kernel.org; kexec@lists.infradead.org; alex.williamson@redhat.com; ddutile@redhat.com; ishii.hironobu@jp.fujitsu.com; bhelgaas@google.com; Hatch, Douglas B (HPS Linux PM); Hoemann, Jerry; Vaden, Tom (HP Server OS Architecture); Zhang, Li (Zoe@HPservers-Core-OE-PSC); Mitchell, Lisa (MCLinux in Fort Collins); billsumnerlinux@gmail.com; Wright, Randy (HP Servers Linux)
Subject: Re: [PATCH v7 0/10] iommu/vt-d: Fix intel vt-d faults in kdump kernel
On 2015/01/07 17:52, Li, ZhenHua wrote:
> Well, that's quite good news.
> Looking forward Takao's testing on his system.
Unfortunately DMAR fault still occurs with this patch...
I attach console log.
Thanks,
Takao Indoh
>
> Regards
> Zhenhua
> On 01/07/2015 04:28 PM, Baoquan He wrote:
>> On 01/07/15 at 01:25pm, Li, ZhenHua wrote:
>>> It is same as the last one I send to you yesterday.
>>>
>>> The continuous memory that needed for data in this patchset:
>>> RE: PAGE_SIZE, 4096 Bytes;
>>> IRTE: 65536 * 16 ; 1M Bytes;
>>>
>>> It should use same memory as the old versions of this patchset. The
>>> changes for the last version do not need more memory.
>>
>> Hi Zhenhua,
>>
>> It was my mistake because I didn't strip the debug info of modules,
>> then initramfs is bloated very big. Just now I tested the latest
>> version, it works well and dump is successful. No dmar fault and
>> intr-remap fault seen any more, good job!
>>
>> Thanks
>> Baoquan
>>
>>
>>>
>>> Regards
>>> Zhenhua
>>>
>>> On 01/07/2015 01:02 PM, Baoquan He wrote:
>>>> On 01/07/15 at 12:11pm, Li, ZhenHua wrote:
>>>>> Many thanks to Takao Indoh and Baoquan He, for your testing on
>>>>> more different systems.
>>>>>
>>>>> The calling of flush functions are added to this version.
>>>>>
>>>>> The usage of __iommu_flush_cache function :
>>>>> 1. Fixes a dump on Takao's system.
>>>>> 2. Reduces the count of faults on Baoquan's system.
>>>>
>>>> I am testing the version you sent to me yesterday afternoon. Is
>>>> that different with this patchset? I found your patchset man
>>>> reserve a big contiguous memory region under 896M, this will cause
>>>> the crashkernel reservation failed when I set crashkernel=320M. The
>>>> reason I increase the crashkerenl reservation to 320M is 256M is
>>>> not enough and cause OOM when that patchset is tested.
>>>>
>>>> I am checking what happened.
>>>>
>>>>
>>>> Thanks
>>>> Baoquan
>>>>
>>>>>
>>>>> Regards
>>>>> Zhenhua
>>>>>
>>>>> On 01/07/2015 12:04 PM, Li, Zhen-Hua wrote:
>>>>>> This patchset is an update of Bill Sumner's patchset, implements a fix for:
>>>>>> If a kernel boots with intel_iommu=on on a system that supports
>>>>>> intel vt-d, when a panic happens, the kdump kernel will boot with these faults:
>>>>>>
>>>>>> dmar: DRHD: handling fault status reg 102
>>>>>> dmar: DMAR:[DMA Read] Request device [01:00.0] fault addr fff80000
>>>>>> DMAR:[fault reason 01] Present bit in root entry is clear
>>>>>>
>>>>>> dmar: DRHD: handling fault status reg 2
>>>>>> dmar: INTR-REMAP: Request device [[61:00.0] fault index 42
>>>>>> INTR-REMAP:[fault reason 34] Present field in the IRTE entry
>>>>>> is clear
>>>>>>
>>>>>> On some system, the interrupt remapping fault will also happen
>>>>>> even if the intel_iommu is not set to on, because the interrupt
>>>>>> remapping will be enabled when x2apic is needed by the system.
>>>>>>
>>>>>> The cause of the DMA fault is described in Bill's original
>>>>>> version, and the INTR-Remap fault is caused by a similar reason.
>>>>>> In short, the initialization of vt-d drivers causes the in-flight
>>>>>> DMA and interrupt requests get wrong response.
>>>>>>
>>>>>> To fix this problem, we modifies the behaviors of the intel vt-d
>>>>>> in the crashdump kernel:
>>>>>>
>>>>>> For DMA Remapping:
>>>>>> 1. To accept the vt-d hardware in an active state, 2. Do not
>>>>>> disable and re-enable the translation, keep it enabled.
>>>>>> 3. Use the old root entry table, do not rewrite the RTA register.
>>>>>> 4. Malloc and use new context entry table and page table, copy data from the
>>>>>> old ones that used by the old kernel.
>>>>>> 5. to use different portions of the iova address ranges for the device drivers
>>>>>> in the crashdump kernel than the iova ranges that were in-use at the time
>>>>>> of the panic.
>>>>>> 6. After device driver is loaded, when it issues the first dma_map command,
>>>>>> free the dmar_domain structure for this device, and generate a new one, so
>>>>>> that the device can be assigned a new and empty page table.
>>>>>> 7. When a new context entry table is generated, we also save its address to
>>>>>> the old root entry table.
>>>>>>
>>>>>> For Interrupt Remapping:
>>>>>> 1. To accept the vt-d hardware in an active state, 2. Do not
>>>>>> disable and re-enable the interrupt remapping, keep it enabled.
>>>>>> 3. Use the old interrupt remapping table, do not rewrite the IRTA register.
>>>>>> 4. When ioapic entry is setup, the interrupt remapping table is changed, and
>>>>>> the updated data will be stored to the old interrupt remapping table.
>>>>>>
>>>>>> Advantages of this approach:
>>>>>> 1. All manipulation of the IO-device is done by the Linux device-driver
>>>>>> for that device.
>>>>>> 2. This approach behaves in a manner very similar to operation without an
>>>>>> active iommu.
>>>>>> 3. Any activity between the IO-device and its RMRR areas is handled by the
>>>>>> device-driver in the same manner as during a non-kdump boot.
>>>>>> 4. If an IO-device has no driver in the kdump kernel, it is simply left alone.
>>>>>> This supports the practice of creating a special kdump kernel without
>>>>>> drivers for any devices that are not required for taking a crashdump.
>>>>>> 5. Minimal code-changes among the existing mainline intel vt-d code.
>>>>>>
>>>>>> Summary of changes in this patch set:
>>>>>> 1. Added some useful function for root entry table in code
>>>>>> intel-iommu.c 2. Added new members to struct root_entry and
>>>>>> struct irte; 3. Functions to load old root entry table to iommu->root_entry from the memory
>>>>>> of old kernel.
>>>>>> 4. Functions to malloc new context entry table and page table and copy the data
>>>>>> from the old ones to the malloced new ones.
>>>>>> 5. Functions to enable support for DMA remapping in kdump kernel.
>>>>>> 6. Functions to load old irte data from the old kernel to the kdump kernel.
>>>>>> 7. Some code changes that support other behaviours that have been listed.
>>>>>> 8. In the new functions, use physical address as "unsigned long" type, not
>>>>>> pointers.
>>>>>>
>>>>>> Original version by Bill Sumner:
>>>>>> https://lkml.org/lkml/2014/1/10/518
>>>>>> https://lkml.org/lkml/2014/4/15/716
>>>>>> https://lkml.org/lkml/2014/4/24/836
>>>>>>
>>>>>> Zhenhua's updates:
>>>>>> https://lkml.org/lkml/2014/10/21/134
>>>>>> https://lkml.org/lkml/2014/12/15/121
>>>>>> https://lkml.org/lkml/2014/12/22/53
>>>>>>
>>>>>> Changelog[v7]:
>>>>>> 1. Use __iommu_flush_cache to flush the data to hardware.
>>>>>>
>>>>>> Changelog[v6]:
>>>>>> 1. Use "unsigned long" as type of physical address.
>>>>>> 2. Use new function unmap_device_dma to unmap the old dma.
>>>>>> 3. Some small incorrect bits order for aw shift.
>>>>>>
>>>>>> Changelog[v5]:
>>>>>> 1. Do not disable and re-enable traslation and interrupt remapping.
>>>>>> 2. Use old root entry table.
>>>>>> 3. Use old interrupt remapping table.
>>>>>> 4. New functions to copy data from old kernel, and save to old kernel mem.
>>>>>> 5. New functions to save updated root entry table and irte table.
>>>>>> 6. Use intel_unmap to unmap the old dma;
>>>>>> 7. Allocate new pages while driver is being loaded.
>>>>>>
>>>>>> Changelog[v4]:
>>>>>> 1. Cut off the patches that move some defines and functions to new files.
>>>>>> 2. Reduce the numbers of patches to five, make it more easier to read.
>>>>>> 3. Changed the name of functions, make them consistent with current context
>>>>>> get/set functions.
>>>>>> 4. Add change to function __iommu_attach_domain.
>>>>>>
>>>>>> Changelog[v3]:
>>>>>> 1. Commented-out "#define DEBUG 1" to eliminate debug messages.
>>>>>> 2. Updated the comments about changes in each version.
>>>>>> 3. Fixed: one-line added to Copy-Translations patch to initialize the iovad
>>>>>> struct as recommended by Baoquan He [bhe@redhat.com]
>>>>>> init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
>>>>>>
>>>>>> Changelog[v2]:
>>>>>> The following series implements a fix for:
>>>>>> A kdump problem about DMA that has been discussed for a long time. That is,
>>>>>> when a kernel panics and boots into the kdump kernel, DMA started by the
>>>>>> panicked kernel is not stopped before the kdump kernel is booted and the
>>>>>> kdump kernel disables the IOMMU while this DMA continues. This causes the
>>>>>> IOMMU to stop translating the DMA addresses as IOVAs and begin to treat
>>>>>> them as physical memory addresses -- which causes the DMA to either:
>>>>>> (1) generate DMAR errors or
>>>>>> (2) generate PCI SERR errors or
>>>>>> (3) transfer data to or from incorrect areas of memory. Often this
>>>>>> causes the dump to fail.
>>>>>>
>>>>>> Changelog[v1]:
>>>>>> The original version.
>>>>>>
>>>>>> Changed in this version:
>>>>>> 1. Do not disable and re-enable traslation and interrupt remapping.
>>>>>> 2. Use old root entry table.
>>>>>> 3. Use old interrupt remapping table.
>>>>>> 4. Use "unsigned long" as physical address.
>>>>>> 5. Use intel_unmap to unmap the old dma;
>>>>>>
>>>>>> Baoquan He <bhe@redhat.com> helps testing this patchset.
>>>>>>
>>>>>> iommu/vt-d: Update iommu_attach_domain() and its callers
>>>>>> iommu/vt-d: Items required for kdump
>>>>>> iommu/vt-d: Add domain-id functions
>>>>>> iommu/vt-d: functions to copy data from old mem
>>>>>> iommu/vt-d: Add functions to load and save old re
>>>>>> iommu/vt-d: datatypes and functions used for kdump
>>>>>> iommu/vt-d: enable kdump support in iommu module
>>>>>> iommu/vt-d: assign new page table for dma_map
>>>>>> iommu/vt-d: Copy functions for irte
>>>>>> iommu/vt-d: Use old irte in kdump kernel
>>>>>>
>>>>>> Signed-off-by: Bill Sumner <billsumnerlinux@gmail.com>
>>>>>> Signed-off-by: Li, Zhen-Hua <zhen-hual@hp.com>
>>>>>> Signed-off-by: Takao Indoh <indou.takao@jp.fujitsu.com>
>>>>>> Tested-by: Baoquan He <bhe@redhat.com>
>>>>>> ---
>>>>>> drivers/iommu/intel-iommu.c | 1050 +++++++++++++++++++++++++++++++++--
>>>>>> drivers/iommu/intel_irq_remapping.c | 104 +++-
>>>>>> include/linux/intel-iommu.h | 18 +
>>>>>> 3 files changed, 1130 insertions(+), 42 deletions(-)
>>>>>>
>>>>>
>>>
>
>
>
^ permalink raw reply [flat|nested] 18+ messages in thread