LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 02/22] powerpc/eeh: Info to trace passed devices
From: Gavin Shan @ 2014-05-05  1:27 UTC (permalink / raw)
  To: linuxppc-dev, kvm, kvm-ppc; +Cc: aik, alex.williamson, qiudayu, Gavin Shan
In-Reply-To: <1399253291-3975-1-git-send-email-gwshan@linux.vnet.ibm.com>

The address of passed PCI devices (domain:bus:slot:func) might be
quite different from the perspective of host and guest. We have to
trace the address mapping so that we can emulate EEH RTAS requests
from guest. The patch introduces additional fields to eeh_pe and
eeh_dev for the purpose.

Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/eeh.h | 49 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 49 insertions(+)

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 7782056..8bfb167 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -48,6 +48,17 @@ struct device_node;
 #define EEH_PE_RST_HOLD_TIME		250
 #define EEH_PE_RST_SETTLE_TIME		1800
 
+#ifdef CONFIG_KVM_EEH
+struct eeh_vfio_pci_addr {
+	struct kvm	*kvm;		/* KVM identifier		*/
+	unsigned int	buid_hi;	/* PHB BUID high		*/
+	unsigned int	buid_lo;	/* PHB BUID low			*/
+	unsigned char	bus;		/* Bus number			*/
+	unsigned char	devfn;		/* Slot and function		*/
+	int		pe_addr;	/* PE configuration address	*/
+};
+#endif /* CONFIG_KVM_EEH */
+
 /*
  * The struct is used to trace PE related EEH functionality.
  * In theory, there will have one instance of the struct to
@@ -72,6 +83,7 @@ struct device_node;
 #define EEH_PE_RESET		(1 << 2)	/* PE reset in progress	*/
 
 #define EEH_PE_KEEP		(1 << 8)	/* Keep PE on hotplug	*/
+#define EEH_PE_PASSTHROUGH	(1 << 9)	/* PE owned by guest	*/
 
 struct eeh_pe {
 	int type;			/* PE type: PHB/Bus/Device	*/
@@ -85,6 +97,9 @@ struct eeh_pe {
 	struct timeval tstamp;		/* Time on first-time freeze	*/
 	int false_positives;		/* Times of reported #ff's	*/
 	struct eeh_pe *parent;		/* Parent PE			*/
+#ifdef CONFIG_KVM_EEH
+	struct eeh_vfio_pci_addr gaddr;	/* Associated KVM guest address */
+#endif
 	struct list_head child_list;	/* Link PE to the child list	*/
 	struct list_head edevs;		/* Link list of EEH devices	*/
 	struct list_head child;		/* Child PEs			*/
@@ -93,6 +108,21 @@ struct eeh_pe {
 #define eeh_pe_for_each_dev(pe, edev, tmp) \
 		list_for_each_entry_safe(edev, tmp, &pe->edevs, list)
 
+static inline bool eeh_pe_passed(struct eeh_pe *pe)
+{
+	return pe ? !!(pe->state & EEH_PE_PASSTHROUGH) : false;
+}
+
+static inline void eeh_pe_set_passed(struct eeh_pe *pe, bool passed)
+{
+	if (pe) {
+		if (passed)
+			pe->state |= EEH_PE_PASSTHROUGH;
+		else
+			pe->state &= ~EEH_PE_PASSTHROUGH;
+	}
+}
+
 /*
  * The struct is used to trace EEH state for the associated
  * PCI device node or PCI device. In future, it might
@@ -110,6 +140,7 @@ struct eeh_pe {
 #define EEH_DEV_SYSFS		(1 << 9)	/* Sysfs created	*/
 #define EEH_DEV_REMOVED		(1 << 10)	/* Removed permanently	*/
 #define EEH_DEV_FRESET		(1 << 11)	/* Fundamental reset	*/
+#define EEH_DEV_PASSTHROUGH	(1 << 12)	/* Owned by guest	*/
 
 struct eeh_dev {
 	int mode;			/* EEH mode			*/
@@ -126,6 +157,9 @@ struct eeh_dev {
 	struct device_node *dn;		/* Associated device node	*/
 	struct pci_dev *pdev;		/* Associated PCI device	*/
 	struct pci_bus *bus;		/* PCI bus for partial hotplug	*/
+#ifdef CONFIG_KVM_EEH
+	struct eeh_vfio_pci_addr gaddr;	/* Address in guest		*/
+#endif
 };
 
 static inline struct device_node *eeh_dev_to_of_node(struct eeh_dev *edev)
@@ -138,6 +172,21 @@ static inline struct pci_dev *eeh_dev_to_pci_dev(struct eeh_dev *edev)
 	return edev ? edev->pdev : NULL;
 }
 
+static inline bool eeh_dev_passed(struct eeh_dev *dev)
+{
+	return dev ? !!(dev->mode & EEH_DEV_PASSTHROUGH) : false;
+}
+
+static inline void eeh_dev_set_passed(struct eeh_dev *dev, bool passed)
+{
+	if (dev) {
+		if (passed)
+			dev->mode |= EEH_DEV_PASSTHROUGH;
+		else
+			dev->mode &= ~EEH_DEV_PASSTHROUGH;
+	}
+}
+
 /* Return values from eeh_ops::next_error */
 enum {
 	EEH_NEXT_ERR_NONE = 0,
-- 
1.8.3.2

^ permalink raw reply related

* [PATCH 01/22] powerpc: Introduce CONFIG_KVM_EEH
From: Gavin Shan @ 2014-05-05  1:27 UTC (permalink / raw)
  To: linuxppc-dev, kvm, kvm-ppc; +Cc: aik, alex.williamson, qiudayu, Gavin Shan
In-Reply-To: <1399253291-3975-1-git-send-email-gwshan@linux.vnet.ibm.com>

The patch introduces kernel configuration option KVM_EEH, which
depends on KVM_BOOK3S_64, VFIO_IOMMU_SPAPR_TCE and EEH. The option
is to enable emulating EEH RTAS services that required by EEH
module in pSeries-based guest.

Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
---
 arch/powerpc/kvm/Kconfig | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 141b202..743d2d9 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -189,6 +189,14 @@ config KVM_XICS
 	  Specification) interrupt controller architecture used on
 	  IBM POWER (pSeries) servers.
 
+config KVM_EEH
+	bool "KVM in-kernel EEH RTAS emulation"
+	depends on PPC_POWERNV && KVM_BOOK3S_64 && EEH && VFIO_IOMMU_SPAPR_TCE
+	default y
+	---help---
+	  Enable support for emulating EEH RTAS services used on IBM
+	  POWER (pSeries) servers.
+
 source drivers/vhost/Kconfig
 
 endif # VIRTUALIZATION
-- 
1.8.3.2

^ permalink raw reply related

* [PATCH 05/22] powerpc/eeh: Release VFIO dev on VM destruction
From: Gavin Shan @ 2014-05-05  1:27 UTC (permalink / raw)
  To: linuxppc-dev, kvm, kvm-ppc; +Cc: aik, alex.williamson, qiudayu, Gavin Shan
In-Reply-To: <1399253291-3975-1-git-send-email-gwshan@linux.vnet.ibm.com>

When the VM is destroyed, the EEH devices and PEs that have been
marked as being owned by guest should be returned to host. The
patch introduces kvmppc_vfio_pci_free() to do it.

Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/eeh.h |  6 +++++-
 arch/powerpc/kernel/eeh_pe.c   | 42 ++++++++++++++++++++++++++++++++++++++++++
 arch/powerpc/kvm/book3s_hv.c   |  2 ++
 3 files changed, 49 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 3807167..677c719 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -380,6 +380,8 @@ static inline void eeh_add_sysfs_files(struct pci_bus *bus) { }
 
 static inline void eeh_remove_device(struct pci_dev *dev) { }
 
+static inline void kvmppc_eeh_vfio_release(struct kvm *kvm) { }
+
 #define EEH_POSSIBLE_ERROR(val, type) (0)
 #define EEH_IO_ERROR_VALUE(size) (-1UL)
 #endif /* CONFIG_EEH */
@@ -388,7 +390,9 @@ static inline void eeh_remove_device(struct pci_dev *dev) { }
 #ifdef CONFIG_KVM_EEH
 struct eeh_dev *eeh_vfio_dev_get(struct eeh_vfio_pci_addr *addr);
 struct eeh_pe *eeh_vfio_pe_get(struct eeh_vfio_pci_addr *addr);
-
+void kvmppc_eeh_vfio_release(struct kvm *kvm);
+#else
+static inline void kvmppc_eeh_vfio_release(void *kvm) { };
 #endif /* CONFIG_KVM_EEH */
 
 #ifdef CONFIG_PPC64
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index 1bd7b1f..9e73188 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -331,6 +331,48 @@ struct eeh_dev *eeh_vfio_dev_get(struct eeh_vfio_pci_addr *addr)
 
 	return NULL;
 }
+
+static void *__kvmppc_eeh_vfio_release(void *data, void *flag)
+{
+	struct eeh_pe *pe = (struct eeh_pe *)data;
+	struct kvm *kvm = (struct kvm *)flag;
+	struct eeh_dev *edev, *tmp;
+
+	if (!eeh_pe_passed(pe))
+		return NULL;
+
+	eeh_pe_for_each_dev(pe, edev, tmp) {
+		if (!eeh_dev_passed(edev))
+			continue;
+
+		if (edev->gaddr.kvm == kvm)
+			eeh_dev_set_passed(edev, false);
+	}
+
+	eeh_pe_set_passed(pe, false);
+
+	return NULL;
+}
+
+/**
+ * kvmppc_eeh_vfio_release - Release VFIO devices for the given VM
+ * @kvm: VM indicator
+ *
+ * The function is expected to be called while the VM is destroyed.
+ * In turn, the PCI devices that have been passed to that VM should
+ * be released and their address mapping maintained will be destroyed.
+ */
+void kvmppc_eeh_vfio_release(struct kvm *kvm)
+{
+	struct eeh_pe *root;
+	void *ret;
+
+	list_for_each_entry(root, &eeh_phb_pe, child) {
+		ret = eeh_pe_traverse(root, __kvmppc_eeh_vfio_release, kvm);
+		if (ret) return;
+	}
+}
+EXPORT_SYMBOL_GPL(kvmppc_eeh_vfio_release);
 #endif /* CONFIG_KVM_EEH */
 
 /**
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 8227dba..f07a12d 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -49,6 +49,7 @@
 #include <asm/hvcall.h>
 #include <asm/switch_to.h>
 #include <asm/smp.h>
+#include <asm/eeh.h>
 #include <linux/gfp.h>
 #include <linux/vmalloc.h>
 #include <linux/highmem.h>
@@ -2344,6 +2345,7 @@ static void kvmppc_core_destroy_vm_hv(struct kvm *kvm)
 		kvm->arch.rma = NULL;
 	}
 
+	kvmppc_eeh_vfio_release(kvm);
 	kvmppc_free_hpt(kvm);
 }
 
-- 
1.8.3.2

^ permalink raw reply related

* [PATCH 04/22] powerpc/eeh: Search EEH PE by guest address
From: Gavin Shan @ 2014-05-05  1:27 UTC (permalink / raw)
  To: linuxppc-dev, kvm, kvm-ppc; +Cc: aik, alex.williamson, qiudayu, Gavin Shan
In-Reply-To: <1399253291-3975-1-git-send-email-gwshan@linux.vnet.ibm.com>

The patch introduces function eeh_vfio_pe_get() to search the EEH
PE according to its guest address, which is made up of KVM indicator,
PHB ID and PE configuration address. The function will be useful in
backends for EEH RTAS emulation.

Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/eeh.h |  1 +
 arch/powerpc/kernel/eeh_pe.c   | 40 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 41 insertions(+)

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index b12e3e9..3807167 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -387,6 +387,7 @@ static inline void eeh_remove_device(struct pci_dev *dev) { }
 
 #ifdef CONFIG_KVM_EEH
 struct eeh_dev *eeh_vfio_dev_get(struct eeh_vfio_pci_addr *addr);
+struct eeh_pe *eeh_vfio_pe_get(struct eeh_vfio_pci_addr *addr);
 
 #endif /* CONFIG_KVM_EEH */
 
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index dba7c82..1bd7b1f 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -249,6 +249,46 @@ struct eeh_pe *eeh_pe_get(struct eeh_dev *edev)
 }
 
 #ifdef CONFIG_KVM_EEH
+static void *__eeh_vfio_pe_get(void *data, void *flag)
+{
+	struct eeh_pe *pe = (struct eeh_pe *)data;
+	struct eeh_vfio_pci_addr *addr = (struct eeh_vfio_pci_addr *)flag;
+
+	if (!eeh_pe_passed(pe))
+		return NULL;
+
+	/* Comparing the address */
+	if (addr->kvm     == pe->gaddr.kvm &&
+	    addr->buid_hi == pe->gaddr.buid_hi &&
+	    addr->buid_lo == pe->gaddr.buid_lo &&
+	    addr->pe_addr == pe->gaddr.pe_addr)
+		return pe;
+
+	return NULL;
+}
+
+/**
+ * eeh_vfio_pe_get - Search EEH PE based on guest's address
+ * @addr: EEH PE guest address
+ *
+ * Search the EEH PE according to the guest address, which
+ * is made up of VM indicator, PHB BUID, and PE configuration
+ * address.
+ */
+struct eeh_pe *eeh_vfio_pe_get(struct eeh_vfio_pci_addr *addr)
+{
+	struct eeh_pe *root;
+	struct eeh_pe *pe;
+
+	list_for_each_entry(root, &eeh_phb_pe, child) {
+		pe = eeh_pe_traverse(root, __eeh_vfio_pe_get, addr);
+		if (pe)
+			return pe;
+	}
+
+	return NULL;
+}
+
 static void *__eeh_vfio_dev_get(void *data, void *flag)
 {
 	struct eeh_pe *pe = (struct eeh_pe *)data;
-- 
1.8.3.2

^ permalink raw reply related

* [PATCH 06/22] powerpc/eeh: Function for address mapping
From: Gavin Shan @ 2014-05-05  1:27 UTC (permalink / raw)
  To: linuxppc-dev, kvm, kvm-ppc; +Cc: aik, alex.williamson, qiudayu, Gavin Shan
In-Reply-To: <1399253291-3975-1-git-send-email-gwshan@linux.vnet.ibm.com>

The patch introduces function kvm_vfio_eeh_dev_map(), which is
expected to be called on IOCTL command issued to the VM device, in
order to build the address mapping for VFIO PCI device.

Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
---
 arch/powerpc/kernel/eeh_pe.c | 88 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/kvm_host.h     | 14 +++++++
 2 files changed, 102 insertions(+)

diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index 9e73188..200cd5a 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -332,6 +332,94 @@ struct eeh_dev *eeh_vfio_dev_get(struct eeh_vfio_pci_addr *addr)
 	return NULL;
 }
 
+/**
+ * kvm_vfio_eeh_dev_map - Build the address mapping for VFIO device
+ *
+ * @kvm: VM descriptor
+ * @domain: host domain of PCI device
+ * @bdn: host bus/device/function number
+ * @buid: BUID of guest PHB
+ * @gbdn: guest bus/device/function number
+ *
+ * Build the address mapping between host and guest deivce. It's called
+ * while passing through PCI device from host to guest.
+ */
+int kvm_vfio_eeh_dev_map(struct kvm *kvm, int domain,
+			 int bdn, unsigned long buid, int gbdn)
+{
+	struct pci_bus *bus, *pe_bus;
+	struct pci_dev *dev;
+	struct eeh_dev *edev;
+	struct eeh_pe *pe;
+	int bus_no, devfn;
+
+	/* Find the PCI device in host side */
+	bus_no = (bdn >> 8) & 0xff;
+	devfn = bdn & 0xff;
+	bus = pci_find_bus(domain, bus_no);
+	if (!bus) {
+		pr_warn("%s: PCI bus %04x:%02x not found\n",
+			__func__, domain, bus_no);
+		return -ENODEV;
+	}
+
+	dev = pci_get_slot(bus, devfn);
+	if (!dev) {
+		pr_warn("%s: PCI device %04x:%02x:%02x.%01x not found\n",
+			__func__, domain, bus_no,
+			PCI_SLOT(devfn), PCI_FUNC(devfn));
+		return -ENODEV;
+	}
+
+	/*
+	 * Mark the EEH device as passed. We allow dynamic change
+	 * on the address mapping.
+	 */
+	edev = pci_dev_to_eeh_dev(dev);
+	if (!edev) {
+		pr_warn("%s: No EEH dev for PCI device %s\n",
+			__func__, pci_name(dev));
+		return -ENODEV;
+	}
+
+	/*
+	 * The PE configuration address is exactly PCI config address
+	 * of the PE primary bus. That has format 00BBSS00 defined in
+	 * PAPR.
+	 */
+	pe = edev->pe;
+	if (!eeh_pe_passed(pe)) {
+		pe_bus = eeh_pe_bus_get(pe);
+		BUG_ON(!pe_bus);
+
+		pe->gaddr.kvm		= kvm;
+		pe->gaddr.buid_hi	= BUID_HI(buid);
+		pe->gaddr.buid_lo	= BUID_LO(buid);
+		pe->gaddr.pe_addr	= pe_bus->number << 16;
+		eeh_pe_set_passed(pe, true);
+	} else if (pe->gaddr.kvm != kvm ||
+		   pe->gaddr.buid_hi != BUID_HI(buid) ||
+		   pe->gaddr.buid_lo != BUID_LO(buid)) {
+		pr_warn("%s: Mismatched VM or PHB on passing %s\n",
+			__func__, pci_name(dev));
+		return -EINVAL;
+	}
+
+	edev->gaddr.kvm		= kvm;
+	edev->gaddr.buid_hi	= BUID_HI(buid);
+	edev->gaddr.buid_lo	= BUID_LO(buid);
+	edev->gaddr.bus		= (gbdn >> 8) & 0xff;
+	edev->gaddr.devfn	= gbdn & 0xff;
+	eeh_dev_set_passed(edev, true);
+
+	pr_debug("EEH: Host PCI device %s passed to %lx-%02x:%02x.%01x\n",
+		 pci_name(dev), buid, (gbdn >> 8) & 0xff,
+		 PCI_SLOT(gbdn & 0xff), PCI_FUNC(gbdn & 0xff));
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(kvm_vfio_eeh_dev_map);
+
 static void *__kvmppc_eeh_vfio_release(void *data, void *flag)
 {
 	struct eeh_pe *pe = (struct eeh_pe *)data;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 7d21cf9..294ce48 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1102,5 +1102,19 @@ static inline void kvm_vcpu_set_dy_eligible(struct kvm_vcpu *vcpu, bool val)
 {
 }
 #endif /* CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT */
+
+#ifdef CONFIG_KVM_EEH
+typedef int (*kvm_vfio_dev_eeh_map)(struct kvm *kvm, int domain,
+				    int bdn, unsigned long buid, int gbdn);
+extern int kvm_vfio_eeh_dev_map(struct kvm *kvm, int domain,
+				int bdn, unsigned long buid, int gbdn);
+#else
+static inline int kvm_vfio_eeh_dev_map(struct kvm *kvm, int domain,
+				       int bdn, unsigned long buid, int gbdn)
+{
+	return 0;
+}
+#endif /* CONFIG_KVM_EEH */
+
 #endif
 
-- 
1.8.3.2

^ permalink raw reply related

* [PATCH 03/22] powerpc/eeh: Search EEH device by guest address
From: Gavin Shan @ 2014-05-05  1:27 UTC (permalink / raw)
  To: linuxppc-dev, kvm, kvm-ppc; +Cc: aik, alex.williamson, qiudayu, Gavin Shan
In-Reply-To: <1399253291-3975-1-git-send-email-gwshan@linux.vnet.ibm.com>

The patch introduces function eeh_vfio_dev_get() to search the EEH
device according to its guest address, which is made up of VM indicator,
PHB BUID, bus, slot and function number. The function is useful in the
backends for EEH RTAS emulation.

Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/eeh.h |  6 ++++++
 arch/powerpc/kernel/eeh_pe.c   | 45 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 51 insertions(+)

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 8bfb167..b12e3e9 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -384,6 +384,12 @@ static inline void eeh_remove_device(struct pci_dev *dev) { }
 #define EEH_IO_ERROR_VALUE(size) (-1UL)
 #endif /* CONFIG_EEH */
 
+
+#ifdef CONFIG_KVM_EEH
+struct eeh_dev *eeh_vfio_dev_get(struct eeh_vfio_pci_addr *addr);
+
+#endif /* CONFIG_KVM_EEH */
+
 #ifdef CONFIG_PPC64
 /*
  * MMIO read/write operations with EEH support.
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index fbd01eb..dba7c82 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -248,6 +248,51 @@ struct eeh_pe *eeh_pe_get(struct eeh_dev *edev)
 	return pe;
 }
 
+#ifdef CONFIG_KVM_EEH
+static void *__eeh_vfio_dev_get(void *data, void *flag)
+{
+	struct eeh_pe *pe = (struct eeh_pe *)data;
+	struct eeh_vfio_pci_addr *addr = (struct eeh_vfio_pci_addr *)flag;
+	struct eeh_dev *edev, *tmp;
+
+	eeh_pe_for_each_dev(pe, edev, tmp) {
+		if (!eeh_dev_passed(edev))
+			continue;
+
+		/* Comparing the address in the guest */
+		if (addr->kvm     == edev->gaddr.kvm &&
+		    addr->buid_hi == edev->gaddr.buid_hi &&
+		    addr->buid_lo == edev->gaddr.buid_lo &&
+		    addr->bus     == edev->gaddr.bus &&
+		    addr->devfn   == edev->gaddr.devfn)
+			return edev;
+	}
+
+	return NULL;
+}
+
+/**
+ * eeh_vfio_dev_get - Search EEH device based on guest's address
+ * @addr: EEH device guest address
+ *
+ * Search the EEH device according to its guest's address, which
+ * is made up of PHB BUID, and PCI config address.
+ */
+struct eeh_dev *eeh_vfio_dev_get(struct eeh_vfio_pci_addr *addr)
+{
+	struct eeh_pe *root;
+	struct eeh_dev *edev;
+
+	list_for_each_entry(root, &eeh_phb_pe, child) {
+		edev = eeh_pe_traverse(root, __eeh_vfio_dev_get, addr);
+		if (edev)
+			return edev;
+	}
+
+	return NULL;
+}
+#endif /* CONFIG_KVM_EEH */
+
 /**
  * eeh_pe_get_parent - Retrieve the parent PE
  * @edev: EEH device
-- 
1.8.3.2

^ permalink raw reply related

* [PATCH 08/22] kvm: Address mapping for VFIO device
From: Gavin Shan @ 2014-05-05  1:27 UTC (permalink / raw)
  To: linuxppc-dev, kvm, kvm-ppc; +Cc: aik, alex.williamson, qiudayu, Gavin Shan
In-Reply-To: <1399253291-3975-1-git-send-email-gwshan@linux.vnet.ibm.com>

The address (domain/bus/slot/function) looks different from the
perspective of host and guest. We have to setup the mapping for
EEH and tear it down accordingly. The patch introduces additional
attributes to KVM VFIO device for address mapping or unmapping.

Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
---
 arch/powerpc/kvm/Kconfig  |  1 +
 arch/powerpc/kvm/Makefile |  3 +++
 include/uapi/linux/kvm.h  | 10 ++++++++
 virt/kvm/vfio.c           | 60 ++++++++++++++++++++++++++++++++++++++++++++++-
 4 files changed, 73 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 743d2d9..6764fc5 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -64,6 +64,7 @@ config KVM_BOOK3S_64
 	select KVM_BOOK3S_64_HANDLER
 	select KVM
 	select KVM_BOOK3S_PR_POSSIBLE if !KVM_BOOK3S_HV_POSSIBLE
+	select KVM_VFIO if VFIO
 	---help---
 	  Support running unmodified book3s_64 and book3s_32 guest kernels
 	  in virtual machines on book3s_64 host processors.
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index ce569b6..673038d 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -97,6 +97,9 @@ endif
 kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \
 	book3s_xics.o
 
+kvm-book3s_64-objs-$(CONFIG_KVM_VFIO) += \
+	$(addprefix ../../../virt/kvm/, vfio.o)
+
 kvm-book3s_64-module-objs += \
 	$(KVM)/kvm_main.o \
 	$(KVM)/eventfd.o \
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index a8f4ee5..97b4d1e 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -932,9 +932,19 @@ struct kvm_device_attr {
 #define  KVM_DEV_VFIO_GROUP			1
 #define   KVM_DEV_VFIO_GROUP_ADD			1
 #define   KVM_DEV_VFIO_GROUP_DEL			2
+#define  KVM_DEV_VFIO_DEV			2
+#define   KVM_DEV_VFIO_DEV_EEH_MAP			1
+#define   KVM_DEV_VFIO_DEV_EEH_UNMAP			2
 #define KVM_DEV_TYPE_ARM_VGIC_V2	5
 #define KVM_DEV_TYPE_FLIC		6
 
+struct kvm_vfio_pci_addr {
+	__u32 domain;	/* Host PHB domain	*/
+	__u32 bdn;	/* Host bus/dev/func	*/
+	__u64 gbuid;	/* Guet PHB BUID	*/
+	__u32 gbdn;	/* Guest bus/dev/func	*/
+};
+
 /*
  * ioctls for VM fds
  */
diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
index ba1a93f..778015d 100644
--- a/virt/kvm/vfio.c
+++ b/virt/kvm/vfio.c
@@ -28,6 +28,10 @@ struct kvm_vfio {
 	struct list_head group_list;
 	struct mutex lock;
 	bool noncoherent;
+#ifdef CONFIG_KVM_EEH
+	kvm_vfio_dev_eeh_map eeh_map;
+	kvm_vfio_dev_eeh_unmap eeh_unmap;
+#endif
 };
 
 static struct vfio_group *kvm_vfio_group_get_external_user(struct file *filep)
@@ -201,12 +205,53 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg)
 	return -ENXIO;
 }
 
+static int kvm_vfio_set_dev(struct kvm_device *dev, long attr, u64 arg)
+{
+	struct kvm_vfio *kv = dev->private;
+	struct kvm_vfio_pci_addr addr;
+	int ret = -ENXIO;
+
+	switch (attr) {
+#ifdef CONFIG_KVM_EEH
+	case KVM_DEV_VFIO_DEV_EEH_MAP:
+		if (copy_from_user(&addr, (void __user *)arg, sizeof(addr))) {
+			ret = -EFAULT;
+			break;
+		}
+
+		if (kv->eeh_map)
+			ret = kv->eeh_map(dev->kvm, addr.domain,
+					  addr.bdn, addr.gbuid, addr.gbdn);
+		else
+			ret = 0;
+
+		break;
+	case KVM_DEV_VFIO_DEV_EEH_UNMAP:
+		if (copy_from_user(&addr, (void __user *)arg, sizeof(addr))) {
+			ret = -EFAULT;
+			break;
+		}
+
+		if (kv->eeh_unmap)
+			ret = kv->eeh_unmap(dev->kvm, addr.domain, addr.bdn);
+		else
+			ret = 0;
+
+		break;
+#endif
+	}
+
+	return ret;
+}
+
 static int kvm_vfio_set_attr(struct kvm_device *dev,
 			     struct kvm_device_attr *attr)
 {
 	switch (attr->group) {
 	case KVM_DEV_VFIO_GROUP:
 		return kvm_vfio_set_group(dev, attr->attr, attr->addr);
+	case KVM_DEV_VFIO_DEV:
+		return kvm_vfio_set_dev(dev, attr->attr, attr->addr);
 	}
 
 	return -ENXIO;
@@ -224,6 +269,16 @@ static int kvm_vfio_has_attr(struct kvm_device *dev,
 		}
 
 		break;
+	case KVM_DEV_VFIO_DEV:
+		switch (attr->attr) {
+#ifdef CONFIG_KVM_EEH
+		case KVM_DEV_VFIO_DEV_EEH_MAP:
+		case KVM_DEV_VFIO_DEV_EEH_UNMAP:
+			return 0;
+#endif
+		}
+
+		break;
 	}
 
 	return -ENXIO;
@@ -262,7 +317,10 @@ static int kvm_vfio_create(struct kvm_device *dev, u32 type)
 
 	INIT_LIST_HEAD(&kv->group_list);
 	mutex_init(&kv->lock);
-
+#ifdef CONFIG_KVM_EEH
+	kv->eeh_map = kvm_vfio_eeh_dev_map;
+	kv->eeh_unmap = kvm_vfio_eeh_dev_unmap;
+#endif
 	dev->private = kv;
 
 	return 0;
-- 
1.8.3.2

^ permalink raw reply related

* [PATCH 07/22] powerpc/eeh: Function to tear down address mapping
From: Gavin Shan @ 2014-05-05  1:27 UTC (permalink / raw)
  To: linuxppc-dev, kvm, kvm-ppc; +Cc: aik, alex.williamson, qiudayu, Gavin Shan
In-Reply-To: <1399253291-3975-1-git-send-email-gwshan@linux.vnet.ibm.com>

The patch introduces function kvm_vfio_eeh_dev_unmap(), which is
expected to be called on IOCTL command issued to the VM device, in
order to tear down the address mapping for VFIO PCI device.

Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
---
 arch/powerpc/kernel/eeh_pe.c | 82 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/kvm_host.h     |  7 ++++
 2 files changed, 89 insertions(+)

diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index 200cd5a..8398efc 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -420,6 +420,88 @@ int kvm_vfio_eeh_dev_map(struct kvm *kvm, int domain,
 }
 EXPORT_SYMBOL_GPL(kvm_vfio_eeh_dev_map);
 
+ /**
+  * kvm_vfio_eeh_dev_unmap - Tear down address mapping for VFIO PCI device
+  *
+  * @kvm: VM descriptor
+  * @domain: host domain
+  * @bdn: host bus/device/function number
+  *
+  * Tear down address mapping for VFIO PCI device.
+  */
+int kvm_vfio_eeh_dev_unmap(struct kvm *kvm, int domain, int bdn)
+{
+	struct pci_bus *bus;
+	struct pci_dev *dev;
+	struct eeh_pe *pe;
+	struct eeh_dev *edev, *tmp;
+	int bus_no, devfn;
+	bool passed;
+
+	/* Find the PCI device in host side */
+	bus_no = (bdn >> 8) & 0xff;
+	devfn = bdn & 0xff;
+	bus = pci_find_bus(domain, bus_no);
+	if (!bus) {
+		pr_warn("%s: PCI bus %04x:%02x not found\n",
+			__func__, domain, bus_no);
+		return -ENODEV;
+	}
+
+	dev = pci_get_slot(bus, devfn);
+	if (!dev) {
+		pr_warn("%s: PCI device %04x:%02x:%02x.%01x not found\n",
+			__func__, domain, bus_no,
+			PCI_SLOT(devfn), PCI_FUNC(devfn));
+		return -ENODEV;
+	}
+
+	/* Mark the EEH device as non-passed */
+	edev = pci_dev_to_eeh_dev(dev);
+	if (!edev) {
+		pr_warn("%s: No EEH dev for PCI device %s\n",
+			__func__, pci_name(dev));
+		return -ENODEV;
+	} else if (!eeh_dev_passed(edev)    ||
+		   !eeh_pe_passed(edev->pe) ||
+		   edev->gaddr.kvm != kvm   ||
+		   edev->pe->gaddr.kvm != kvm) {
+		pr_warn("%s: Non-passsed PCI dev %s or PE\n",
+			__func__, pci_name(dev));
+		return 0;
+	}
+	memset(&edev->gaddr, 0, sizeof(edev->gaddr));
+	eeh_dev_set_passed(edev, false);
+	pr_debug("EEH: Host PCI device %s returned\n",
+		pci_name(dev));
+
+	/*
+	 * Mark the PE as non-passed if all PCI devices
+	 * except P2P bridges are non-passed.
+	 */
+	pe = edev->pe;
+	passed = false;
+	eeh_pe_for_each_dev(pe, edev, tmp) {
+		dev = eeh_dev_to_pci_dev(edev);
+		if (dev && dev->subordinate)
+			continue;
+		if (eeh_dev_passed(edev)) {
+			passed = true;
+			break;
+		}
+	}
+
+	if (!passed) {
+		memset(&pe->gaddr, 0, sizeof(pe->gaddr));
+		eeh_pe_set_passed(pe, false);
+		pr_debug("EEH: PHB#%x-PE#%x returned to host\n",
+			pe->phb->global_number, pe->addr);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(kvm_vfio_eeh_dev_unmap);
+
 static void *__kvmppc_eeh_vfio_release(void *data, void *flag)
 {
 	struct eeh_pe *pe = (struct eeh_pe *)data;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 294ce48..520b3d0 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1106,14 +1106,21 @@ static inline void kvm_vcpu_set_dy_eligible(struct kvm_vcpu *vcpu, bool val)
 #ifdef CONFIG_KVM_EEH
 typedef int (*kvm_vfio_dev_eeh_map)(struct kvm *kvm, int domain,
 				    int bdn, unsigned long buid, int gbdn);
+typedef int (*kvm_vfio_dev_eeh_unmap)(struct kvm *kvm, int domain, int bdn);
 extern int kvm_vfio_eeh_dev_map(struct kvm *kvm, int domain,
 				int bdn, unsigned long buid, int gbdn);
+extern int kvm_vfio_eeh_dev_unmap(struct kvm *kvm, int domain, int bdn);
 #else
 static inline int kvm_vfio_eeh_dev_map(struct kvm *kvm, int domain,
 				       int bdn, unsigned long buid, int gbdn)
 {
 	return 0;
 }
+
+static inline int kvm_vfio_eeh_dev_unmap(struct kvm *kvm, int domain, int bdn)
+{
+	return 0;
+}
 #endif /* CONFIG_KVM_EEH */
 
 #endif
-- 
1.8.3.2

^ permalink raw reply related

* [PATCH 10/22] powerpc/eeh: Introduce kvmppc_eeh_format_addr()
From: Gavin Shan @ 2014-05-05  1:27 UTC (permalink / raw)
  To: linuxppc-dev, kvm, kvm-ppc; +Cc: aik, alex.williamson, qiudayu, Gavin Shan
In-Reply-To: <1399253291-3975-1-git-send-email-gwshan@linux.vnet.ibm.com>

The guest will pass 2 kinds of addresses: tranditional bus/device/
function combo, and guest sensitive PE address returned from host.
The patch introduces function kvmppc_eeh_format_addr() to convert
the guest address information from RTAS call argument (struct rtas_args)
and retrieve the EEH device or PE instance if necessary. The function
will be used by subsequent patches.

Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/powernv/eeh-rtas.c | 52 +++++++++++++++++++++++++++++++
 1 file changed, 52 insertions(+)

diff --git a/arch/powerpc/platforms/powernv/eeh-rtas.c b/arch/powerpc/platforms/powernv/eeh-rtas.c
index fded461..f04b820 100644
--- a/arch/powerpc/platforms/powernv/eeh-rtas.c
+++ b/arch/powerpc/platforms/powernv/eeh-rtas.c
@@ -39,6 +39,58 @@
 #include "powernv.h"
 #include "pci.h"
 
+/*
+ * Guest is passing 2 types of addresses. First one would be
+ * traditional bus/device/function combo and another one is
+ * PE address, which starts from 0x10000
+ */
+static int kvmppc_eeh_format_addr(struct kvm_vcpu *vcpu,
+				  struct rtas_args *args,
+				  struct eeh_vfio_pci_addr *addr,
+				  bool is_legacy,
+				  struct eeh_dev **pedev,
+				  struct eeh_pe **ppe)
+{
+	struct eeh_dev *edev;
+	struct eeh_pe *pe;
+
+	if (pedev) *pedev = NULL;
+	if (ppe) *ppe = NULL;
+
+	addr->kvm       = vcpu->kvm;
+	addr->buid_hi   = args->args[1];
+	addr->buid_lo   = args->args[2];
+	if (is_legacy) {
+		addr->bus   = (args->args[0] >> 16) & 0xFF;
+		addr->devfn = (args->args[0] >> 8) & 0xFF;
+
+		edev = eeh_vfio_dev_get(addr);
+		if (!edev) {
+			pr_warn("%s: Can't find VFIO device "
+				"(%08x-%08x-%02x-%02x)\n",
+				__func__, addr->buid_hi,
+				addr->buid_lo, addr->bus, addr->devfn);
+			return -EEXIST;
+		}
+
+		if (pedev) *pedev = edev;
+		if (ppe)   *ppe = edev->pe;
+	} else {
+		addr->pe_addr = args->args[0];
+		pe = eeh_vfio_pe_get(addr);
+		if (!pe) {
+			pr_warn("%s: Can't find PE (%08x-%08x-%x)\n",
+				__func__, addr->buid_hi,
+				addr->buid_lo, addr->pe_addr);
+			return -EEXIST;
+		}
+
+		if (ppe) *ppe = pe;
+	}
+
+	return 0;
+}
+
 /**
  * kvmppc_eeh_rtas - Backend for EEH RTAS emulation
  * @vcpu: KVM virtual CPU
-- 
1.8.3.2

^ permalink raw reply related

* [PATCH 09/22] powerpc/powernv: EEH RTAS emulation backend
From: Gavin Shan @ 2014-05-05  1:27 UTC (permalink / raw)
  To: linuxppc-dev, kvm, kvm-ppc; +Cc: aik, alex.williamson, qiudayu, Gavin Shan
In-Reply-To: <1399253291-3975-1-git-send-email-gwshan@linux.vnet.ibm.com>

The implementation of EEH RTAS emulation is split up into 2 layers:
kvm and powernv platform layer. The KVM layer is quite simple to
dispatch RTAS requests from guest to powernv platform layer. After
that, the powernv platform layer takes care of the details, process
the request and return result to kvm layer.

The patch implements the infrastructure of powernv platform layer
for EEH RTAS emulation.

Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/eeh.h            | 18 +++++++++
 arch/powerpc/platforms/powernv/Makefile   |  1 +
 arch/powerpc/platforms/powernv/eeh-rtas.c | 64 +++++++++++++++++++++++++++++++
 3 files changed, 83 insertions(+)
 create mode 100644 arch/powerpc/platforms/powernv/eeh-rtas.c

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 677c719..7384dee 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -49,6 +49,24 @@ struct device_node;
 #define EEH_PE_RST_SETTLE_TIME		1800
 
 #ifdef CONFIG_KVM_EEH
+
+/*
+ * Those EEH RTAS operations are going to be emulated.
+ * According to PAPR specification, there're much more
+ * operations. However, the following RTAS operations
+ * are enough for EEH in guest to work properly.
+ */
+enum {
+	eeh_rtas_first			= 0,
+	eeh_rtas_set_option		= 0,
+	eeh_rtas_set_slot_reset		= 1,
+	eeh_rtas_read_slot_reset_state2	= 2,
+	eeh_rtas_get_config_addr_info2	= 3,
+	eeh_rtas_slot_error_detail	= 4,
+	eeh_rtas_configure_pe		= 5,
+	eeh_rtas_last			= 5
+};
+
 struct eeh_vfio_pci_addr {
 	struct kvm	*kvm;		/* KVM identifier		*/
 	unsigned int	buid_hi;	/* PHB BUID high		*/
diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
index 63cebb9..d8ea670 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -6,5 +6,6 @@ obj-y			+= opal-msglog.o
 obj-$(CONFIG_SMP)	+= smp.o
 obj-$(CONFIG_PCI)	+= pci.o pci-p5ioc2.o pci-ioda.o
 obj-$(CONFIG_EEH)	+= eeh-ioda.o eeh-powernv.o
+obj-$(CONFIG_KVM_EEH)	+= eeh-rtas.o
 obj-$(CONFIG_PPC_SCOM)	+= opal-xscom.o
 obj-$(CONFIG_MEMORY_FAILURE)	+= opal-memory-errors.o
diff --git a/arch/powerpc/platforms/powernv/eeh-rtas.c b/arch/powerpc/platforms/powernv/eeh-rtas.c
new file mode 100644
index 0000000..fded461
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/eeh-rtas.c
@@ -0,0 +1,64 @@
+/*
+ * The file intends to implement emulation for EEH related RTAS services,
+ * which is expected to be done inside hypervisor. The specific RTAS
+ * service is identified by its unique token. Currently, the tokens
+ * are assigned by QEMU in a dynamic way and the dedicated hcall (0xf000)
+ * was introduced for the purpose of RTAS emulation either in hypervisor
+ * or QEMU.
+ *
+ * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2014.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/bootmem.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/kvm_host.h>
+#include <linux/msi.h>
+#include <linux/pci.h>
+#include <linux/string.h>
+
+#include <asm/rtas.h>
+#include <asm/eeh.h>
+#include <asm/eeh_event.h>
+#include <asm/io.h>
+#include <asm/iommu.h>
+#include <asm/opal.h>
+#include <asm/msi_bitmap.h>
+#include <asm/pci-bridge.h>
+#include <asm/ppc-pci.h>
+#include <asm/tce.h>
+
+#include "powernv.h"
+#include "pci.h"
+
+/**
+ * kvmppc_eeh_rtas - Backend for EEH RTAS emulation
+ * @vcpu: KVM virtual CPU
+ * @args: RTAS parameter
+ * @op: identifier of the specific EEH RTAS service
+ *
+ * The function will be called when the hypervisor receives emulation
+ * request on EEH RTAS from guest. Accordingly, it will dispatch to
+ * specific functions to handle the request.
+ */
+void kvmppc_eeh_rtas(struct kvm_vcpu *vcpu, struct rtas_args *args, int op)
+{
+	int ret = -3;
+
+	/* Parse the requested service */
+	switch (op) {
+	default:
+		pr_warn("%s: Unsupported EEH RTAS service#%d\n",
+			__func__, op);
+	}
+
+	args->rets[0] = ret;
+}
-- 
1.8.3.2

^ permalink raw reply related

* [PATCH 12/22] powerpc/eeh: Emulate RTAS call ibm,set-slot-reset
From: Gavin Shan @ 2014-05-05  1:28 UTC (permalink / raw)
  To: linuxppc-dev, kvm, kvm-ppc; +Cc: aik, alex.williamson, qiudayu, Gavin Shan
In-Reply-To: <1399253291-3975-1-git-send-email-gwshan@linux.vnet.ibm.com>

The RTAS call "ibm,set-slot-reset" is being used to reset one
particular PE, either foundamental or hot reset. The patche intends
to implement the backend to emulate the RTAS call.

Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/powernv/eeh-rtas.c | 92 +++++++++++++++++++++++++++++++
 1 file changed, 92 insertions(+)

diff --git a/arch/powerpc/platforms/powernv/eeh-rtas.c b/arch/powerpc/platforms/powernv/eeh-rtas.c
index 1a037fd..3e38d13 100644
--- a/arch/powerpc/platforms/powernv/eeh-rtas.c
+++ b/arch/powerpc/platforms/powernv/eeh-rtas.c
@@ -171,6 +171,95 @@ out:
 	return ret;
 }
 
+static int kvmppc_eeh_set_reset(struct kvm_vcpu *vcpu,
+				struct rtas_args *args)
+{
+	struct pci_controller *hose;
+	struct pnv_phb *phb;
+	struct eeh_dev *edev;
+	struct eeh_pe *pe;
+	struct eeh_vfio_pci_addr addr;
+	int opcode;
+	int ret = 0;
+
+	/* Sanity check on parameter */
+	if (args->nargs != 4 || args->nret != 1) {
+		pr_warn("%s: Non-matched arguments (%d, %d) - (4, 1)\n",
+			__func__, args->nargs, args->nret);
+		ret = -3;
+		goto out;
+	}
+
+	/* Sanity check on opcode */
+	opcode = args->args[3];
+	if (opcode != EEH_RESET_DEACTIVATE &&
+	    opcode != EEH_RESET_HOT &&
+	    opcode != EEH_RESET_FUNDAMENTAL) {
+		pr_warn("%s: Unsupported opcode %d\n",
+			__func__, opcode);
+		ret = -3;
+		goto out;
+	}
+
+	/* Figure out the address. We always have PE address */
+	if (kvmppc_eeh_format_addr(vcpu, args, &addr, false, &edev, &pe)) {
+		ret = -3;
+		goto out;
+	}
+
+	/* Insure that the EEH stuff has been initialized */
+	hose = pe->phb;
+	phb = hose->private_data;
+	if (!(phb->flags & PNV_PHB_FLAG_EEH)) {
+		pr_warn("%s: EEH disable on PHB#%d\n",
+			__func__, hose->global_number);
+		ret = -7;
+		goto out;
+	}
+
+	/* Call into the IODA dependent backend to do the reset */
+	if (!phb->eeh_ops ||
+	    !phb->eeh_ops->set_option ||
+	    !phb->eeh_ops->reset) {
+		pr_warn("%s: Unsupported request\n", __func__);
+		ret = -7;
+	} else {
+		/*
+		 * The frozen PE might be caused by the mechanism called
+		 * PAPR error injection, which is supposed to be one-shot
+		 * without "sticky" bit as being stated by the spec. But
+		 * the reality isn't that, at least on P7IOC. So we have
+		 * to clear that to avoid recrusive error, which fail the
+		 * recovery.
+		 */
+		if (opcode == EEH_RESET_DEACTIVATE)
+			opal_pci_reset(phb->opal_id,
+				       OPAL_PHB_ERROR,
+				       OPAL_ASSERT_RESET);
+
+		if (phb->eeh_ops->reset(pe, opcode)) {
+			pr_warn("%s: Failure from backend\n",
+				__func__);
+			ret = -1;
+			goto out;
+		}
+
+		/*
+		 * The PE is still in frozen state and we need clear that.
+		 * It's good to clear frozen state after deassert to avoid
+		 * messy IO access during reset, which might cause recrusive
+		 * frozen PE.
+		 */
+		if (opcode == EEH_RESET_DEACTIVATE) {
+			phb->eeh_ops->set_option(pe, EEH_OPT_THAW_MMIO);
+			phb->eeh_ops->set_option(pe, EEH_OPT_THAW_DMA);
+		}
+	}
+
+out:
+	return ret;
+}
+
 /**
  * kvmppc_eeh_rtas - Backend for EEH RTAS emulation
  * @vcpu: KVM virtual CPU
@@ -190,6 +279,9 @@ void kvmppc_eeh_rtas(struct kvm_vcpu *vcpu, struct rtas_args *args, int op)
 	case eeh_rtas_set_option:
 		ret = kvmppc_eeh_set_option(vcpu, args);
 		break;
+	case eeh_rtas_set_slot_reset:
+		ret = kvmppc_eeh_set_reset(vcpu, args);
+		break;
 	default:
 		pr_warn("%s: Unsupported EEH RTAS service#%d\n",
 			__func__, op);
-- 
1.8.3.2

^ permalink raw reply related

* [PATCH 11/22] powerpc/eeh: Emulate RTAS call ibm,set-eeh-option
From: Gavin Shan @ 2014-05-05  1:28 UTC (permalink / raw)
  To: linuxppc-dev, kvm, kvm-ppc; +Cc: aik, alex.williamson, qiudayu, Gavin Shan
In-Reply-To: <1399253291-3975-1-git-send-email-gwshan@linux.vnet.ibm.com>

The RTAS call "ibm,set-eeh-option" is being used to enable/disable
EEH functionality on the specified PE, or enable MMIO/DMA for the
frozen PE. The patch emulates the RTAS call.

Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/powernv/eeh-rtas.c | 83 +++++++++++++++++++++++++++++++
 1 file changed, 83 insertions(+)

diff --git a/arch/powerpc/platforms/powernv/eeh-rtas.c b/arch/powerpc/platforms/powernv/eeh-rtas.c
index f04b820..1a037fd 100644
--- a/arch/powerpc/platforms/powernv/eeh-rtas.c
+++ b/arch/powerpc/platforms/powernv/eeh-rtas.c
@@ -91,6 +91,86 @@ static int kvmppc_eeh_format_addr(struct kvm_vcpu *vcpu,
 	return 0;
 }
 
+static int kvmppc_eeh_set_option(struct kvm_vcpu *vcpu,
+				 struct rtas_args *args)
+{
+	struct pci_controller *hose;
+	struct pnv_phb *phb;
+	struct eeh_dev *edev;
+	struct eeh_pe *pe;
+	struct eeh_vfio_pci_addr addr;
+	int opcode;
+	bool is_legacy = false;
+	int ret = 0;
+
+	/* Sanity check on parameter */
+	if (args->nargs != 4 || args->nret != 1) {
+		pr_warn("%s: Non-matched arguments (%d, %d) - (4, 1)\n",
+			__func__, args->nargs, args->nret);
+		ret = -3;
+		goto out;
+	}
+
+	/* Check on opcode */
+	opcode = args->args[3];
+	if (opcode < EEH_OPT_DISABLE || opcode > EEH_OPT_THAW_DMA) {
+		pr_warn("%s: opcode %d out of range (%d, %d)\n",
+			__func__, opcode, EEH_OPT_DISABLE, EEH_OPT_THAW_DMA);
+		ret = -3;
+		goto out;
+	}
+
+	if (opcode == EEH_OPT_ENABLE)
+		is_legacy = true;
+
+	/* Figure out the address */
+	if (kvmppc_eeh_format_addr(vcpu, args, &addr, is_legacy, &edev, &pe)) {
+		ret = -7;
+		goto out;
+	}
+
+	/* Insure that the EEH stuff has been initialized */
+	hose = pe->phb;
+	phb = hose->private_data;
+	if (!(phb->flags & PNV_PHB_FLAG_EEH)) {
+		pr_warn("%s: EEH disabled on PHB#%d\n",
+			__func__, hose->global_number);
+		ret = -7;
+		goto out;
+	}
+
+	/*
+	 * The EEH functionality has been enabled on all PEs
+	 * by default. So just return success. The same situation
+	 * would be applied while we disable EEH functionality.
+	 * However, the guest isn't expected to disable that
+	 * at all.
+	 */
+	if (opcode == EEH_OPT_DISABLE ||
+		opcode == EEH_OPT_ENABLE) {
+		ret = 0;
+		goto out;
+	}
+
+	/*
+	 * Call into the IODA dependent backend in order
+	 * to enable DMA or MMIO for the indicated PE.
+	 */
+	if (phb->eeh_ops && phb->eeh_ops->set_option) {
+		if (phb->eeh_ops->set_option(pe, opcode)) {
+			pr_warn("%s: Failure from backend\n",
+				__func__);
+			ret = -1;
+		}
+	} else {
+		pr_warn("%s: Unsupported request\n",
+			__func__);
+		ret = -7;
+	}
+out:
+	return ret;
+}
+
 /**
  * kvmppc_eeh_rtas - Backend for EEH RTAS emulation
  * @vcpu: KVM virtual CPU
@@ -107,6 +187,9 @@ void kvmppc_eeh_rtas(struct kvm_vcpu *vcpu, struct rtas_args *args, int op)
 
 	/* Parse the requested service */
 	switch (op) {
+	case eeh_rtas_set_option:
+		ret = kvmppc_eeh_set_option(vcpu, args);
+		break;
 	default:
 		pr_warn("%s: Unsupported EEH RTAS service#%d\n",
 			__func__, op);
-- 
1.8.3.2

^ permalink raw reply related

* [PATCH 15/22] powerpc/eeh: Emulate RTAS call ibm,slot-error-detail
From: Gavin Shan @ 2014-05-05  1:28 UTC (permalink / raw)
  To: linuxppc-dev, kvm, kvm-ppc; +Cc: aik, alex.williamson, qiudayu, Gavin Shan
In-Reply-To: <1399253291-3975-1-git-send-email-gwshan@linux.vnet.ibm.com>

The RTAS call "ibm,slot-error-detail" is being used to retrieve the
error log (either permanent or temporary) from the underlying firmware.
The patch implements the backend to emulate the RTAS call.

Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/powernv/eeh-rtas.c | 75 +++++++++++++++++++++++++++++++
 1 file changed, 75 insertions(+)

diff --git a/arch/powerpc/platforms/powernv/eeh-rtas.c b/arch/powerpc/platforms/powernv/eeh-rtas.c
index 4a9c2c7..8934564 100644
--- a/arch/powerpc/platforms/powernv/eeh-rtas.c
+++ b/arch/powerpc/platforms/powernv/eeh-rtas.c
@@ -390,6 +390,78 @@ out:
 	return ret;
 }
 
+static int kvmppc_eeh_get_error(struct kvm_vcpu *vcpu,
+				struct rtas_args *args)
+{
+	struct pci_controller *hose;
+	struct pnv_phb *phb;
+	struct eeh_dev *edev;
+	struct eeh_pe *pe;
+	struct eeh_vfio_pci_addr addr;
+	char *log;
+	int guest_log;
+	int len, severity;
+	int ret = 0;
+
+	/* Sanity check on parameter */
+	if (args->nargs != 8 || args->nret != 1) {
+		pr_warn("%s: Non-matched arguments (%d, %d) - (8, 1)\n",
+			__func__, args->nargs, args->nret);
+		ret = 1;
+		goto out;
+	} else if (args->args[7] != 1 && args->args[7] != 2) {
+		pr_warn("%s: Invalid Log type\n", __func__);
+		ret = 1;
+		goto out;
+	}
+
+	/* Figure out the address */
+	if (kvmppc_eeh_format_addr(vcpu, args, &addr, false, &edev, &pe)) {
+		ret = 1;
+		goto out;
+	}
+
+	/* Make sure that the EEH stuff has been initialized */
+	hose = pe->phb;
+	phb = hose->private_data;
+	if (!(phb->flags & PNV_PHB_FLAG_EEH)) {
+		pr_warn("%s: EEH disabled on PHB#%d\n",
+			__func__, hose->global_number);
+		ret = 1;
+		goto out;
+	}
+
+	/*
+	 * Retrieve error log from PE. We don't have cached error
+	 * log for one specific PE yet, which need to be figured
+	 * out later.
+	 */
+	if (phb->eeh_ops && phb->eeh_ops->get_log) {
+		guest_log = args->args[5];
+		len = args->args[6];
+		severity = args->args[7];
+		log = kzalloc(len, GFP_KERNEL);
+		if (!log) {
+			pr_err("%s: Out of memory!\n", __func__);
+			ret = 1;
+			goto out;
+		}
+
+		phb->eeh_ops->get_log(pe, severity, log, len);
+		if (kvm_write_guest(vcpu->kvm, guest_log, log, len)) {
+			pr_warn("%s: Fail pushing log to guest\n",
+				__func__);
+			ret = 1;
+		}
+
+		kfree(log);
+	} else {
+		ret = 1;
+	}
+out:
+	return ret;
+}
+
 /**
  * kvmppc_eeh_rtas - Backend for EEH RTAS emulation
  * @vcpu: KVM virtual CPU
@@ -418,6 +490,9 @@ void kvmppc_eeh_rtas(struct kvm_vcpu *vcpu, struct rtas_args *args, int op)
 	case eeh_rtas_get_config_addr_info2:
 		ret = kvmppc_eeh_get_addr2(vcpu, args);
 		break;
+	case eeh_rtas_slot_error_detail:
+		ret = kvmppc_eeh_get_error(vcpu, args);
+		break;
 	default:
 		pr_warn("%s: Unsupported EEH RTAS service#%d\n",
 			__func__, op);
-- 
1.8.3.2

^ permalink raw reply related

* [PATCH 13/22] powerpc/eeh: Emulate RTAS call ibm, read-slot-reset-state2
From: Gavin Shan @ 2014-05-05  1:28 UTC (permalink / raw)
  To: linuxppc-dev, kvm, kvm-ppc; +Cc: aik, alex.williamson, qiudayu, Gavin Shan
In-Reply-To: <1399253291-3975-1-git-send-email-gwshan@linux.vnet.ibm.com>

The RTAS call "ibm,read-slot-reset-state2" is being used to retrieve
the various states of the specified PE, e.g. reset state, frozen DMA,
frozen MMIO etc. The patch implements the backend to emulate the
RTAS call.

Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/powernv/eeh-rtas.c | 77 +++++++++++++++++++++++++++++++
 1 file changed, 77 insertions(+)

diff --git a/arch/powerpc/platforms/powernv/eeh-rtas.c b/arch/powerpc/platforms/powernv/eeh-rtas.c
index 3e38d13..031ee8c 100644
--- a/arch/powerpc/platforms/powernv/eeh-rtas.c
+++ b/arch/powerpc/platforms/powernv/eeh-rtas.c
@@ -260,6 +260,80 @@ out:
 	return ret;
 }
 
+static int kvmppc_eeh_get_state2(struct kvm_vcpu *vcpu,
+				 struct rtas_args *args)
+{
+	struct pci_controller *hose;
+	struct pnv_phb *phb;
+	struct eeh_dev *edev;
+	struct eeh_pe *pe;
+	struct eeh_vfio_pci_addr addr;
+	int result, ret = 0;
+
+	/* Sanity check on parameter */
+	if (args->nargs != 3 || (args->nret != 4 && args->nret != 5)) {
+		pr_warn("%s: Non-matched argument (%d, %d) - (3, 4/5)\n",
+			__func__, args->nargs, args->nret);
+		ret = -3;
+		goto out;
+	}
+
+	/* Figure out the address */
+	if (kvmppc_eeh_format_addr(vcpu, args, &addr, false, &edev, &pe)) {
+		ret = -3;
+		goto out;
+	}
+
+	/* Make sure that the EEH stuff has been initialized */
+	hose = pe->phb;
+	phb = hose->private_data;
+	if (!(phb->flags & PNV_PHB_FLAG_EEH)) {
+		pr_warn("%s: EEH disabled on PHB#%d\n",
+			__func__, hose->global_number);
+		ret = -3;
+		args->rets[2] = 0;
+		goto out;
+	}
+
+	/*
+	 * Mark EEH supported on the PCI device. Otherwise,
+	 * the PE state is meaningless to the guest
+	 */
+	args->rets[2] = 1;
+
+	/* Call to the IOC dependent function */
+	if (phb->eeh_ops && phb->eeh_ops->get_state) {
+		result = phb->eeh_ops->get_state(pe);
+
+		if (!(result & EEH_STATE_RESET_ACTIVE) &&
+		    (result & EEH_STATE_DMA_ENABLED) &&
+		    (result & EEH_STATE_MMIO_ENABLED))
+			args->rets[1] = 0;
+		else if (result & EEH_STATE_RESET_ACTIVE)
+			args->rets[1] = 1;
+		else if (!(result & EEH_STATE_RESET_ACTIVE) &&
+			 !(result & EEH_STATE_DMA_ENABLED) &&
+			 !(result & EEH_STATE_MMIO_ENABLED))
+			args->rets[1] = 2;
+		else if (!(result & EEH_STATE_RESET_ACTIVE) &&
+			(result & EEH_STATE_DMA_ENABLED) &&
+			!(result & EEH_STATE_MMIO_ENABLED))
+			args->rets[1] = 4;
+		else {
+			args->rets[1] = 5;
+			args->rets[3] = 1000;
+		}
+
+		ret = 0;
+	} else {
+		pr_warn("%s: Unsupported request\n",
+			__func__);
+		ret = -3;
+	}
+out:
+	return ret;
+}
+
 /**
  * kvmppc_eeh_rtas - Backend for EEH RTAS emulation
  * @vcpu: KVM virtual CPU
@@ -282,6 +356,9 @@ void kvmppc_eeh_rtas(struct kvm_vcpu *vcpu, struct rtas_args *args, int op)
 	case eeh_rtas_set_slot_reset:
 		ret = kvmppc_eeh_set_reset(vcpu, args);
 		break;
+	case eeh_rtas_read_slot_reset_state2:
+		ret = kvmppc_eeh_get_state2(vcpu, args);
+		break;
 	default:
 		pr_warn("%s: Unsupported EEH RTAS service#%d\n",
 			__func__, op);
-- 
1.8.3.2

^ permalink raw reply related

* [PATCH 16/22] powerpc/eeh: Emulate RTAS call ibm,configure-pe
From: Gavin Shan @ 2014-05-05  1:28 UTC (permalink / raw)
  To: linuxppc-dev, kvm, kvm-ppc; +Cc: aik, alex.williamson, qiudayu, Gavin Shan
In-Reply-To: <1399253291-3975-1-git-send-email-gwshan@linux.vnet.ibm.com>

The RTAS call "ibm,configure-pe" is being used to restore everything
after PE reset. The patch implements the backend to emulate the
RTAS call. In that, we restores BARs for the affected PCI device in
host side because the guest might not have full access to the config
space.

Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/powernv/eeh-rtas.c | 49 +++++++++++++++++++++++++++++++
 1 file changed, 49 insertions(+)

diff --git a/arch/powerpc/platforms/powernv/eeh-rtas.c b/arch/powerpc/platforms/powernv/eeh-rtas.c
index 8934564..a663cd8 100644
--- a/arch/powerpc/platforms/powernv/eeh-rtas.c
+++ b/arch/powerpc/platforms/powernv/eeh-rtas.c
@@ -462,6 +462,52 @@ out:
 	return ret;
 }
 
+static int kvmppc_eeh_configure_pe(struct kvm_vcpu *vcpu,
+				   struct rtas_args *args)
+{
+	struct pci_controller *hose;
+	struct pnv_phb *phb;
+	struct eeh_dev *edev;
+	struct eeh_pe *pe;
+	struct eeh_vfio_pci_addr addr;
+	int ret = 0;
+
+	/* Sanity check on parameter */
+	if (args->nargs != 3 || args->nret != 1) {
+		pr_warn("%s: Non-matched arguments (%d, %d) - (3, 1)\n",
+			__func__, args->nargs, args->nret);
+		ret = -3;
+		goto out;
+	}
+
+	/* Figure out the address */
+	if (kvmppc_eeh_format_addr(vcpu, args, &addr, false, &edev, &pe)) {
+		ret = -3;
+		goto out;
+	}
+
+	/* Make sure that the EEH stuff has been initialized */
+	hose = pe->phb;
+	phb = hose->private_data;
+	if (!(phb->flags & PNV_PHB_FLAG_EEH)) {
+		pr_warn("%s: EEH disabled on PHB#%x\n",
+			__func__, hose->global_number);
+		ret = -3;
+		goto out;
+	}
+
+	/*
+	 * The access to PCI config space on VFIO device has some
+	 * limitations. Part of PCI config space, including BAR
+	 * registers are not readable and writable. So the guest
+	 * should have stale values for those registers and we have
+	 * to restore them in host side.
+	 */
+	eeh_pe_restore_bars(pe);
+out:
+	return ret;
+}
+
 /**
  * kvmppc_eeh_rtas - Backend for EEH RTAS emulation
  * @vcpu: KVM virtual CPU
@@ -493,6 +539,9 @@ void kvmppc_eeh_rtas(struct kvm_vcpu *vcpu, struct rtas_args *args, int op)
 	case eeh_rtas_slot_error_detail:
 		ret = kvmppc_eeh_get_error(vcpu, args);
 		break;
+	case eeh_rtas_configure_pe:
+		ret = kvmppc_eeh_configure_pe(vcpu, args);
+		break;
 	default:
 		pr_warn("%s: Unsupported EEH RTAS service#%d\n",
 			__func__, op);
-- 
1.8.3.2

^ permalink raw reply related

* [PATCH 14/22] powerpc/eeh: Emulate RTAS call ibm, get-config-addr-info2
From: Gavin Shan @ 2014-05-05  1:28 UTC (permalink / raw)
  To: linuxppc-dev, kvm, kvm-ppc; +Cc: aik, alex.williamson, qiudayu, Gavin Shan
In-Reply-To: <1399253291-3975-1-git-send-email-gwshan@linux.vnet.ibm.com>

The RTAS call "ibm,get-config-addr-info2" is being used by guest
to retrieve the corresponding PE number for the specified PCI device.
The patch implements the backend to support the emulation of the
RTAS call.

Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/powernv/eeh-rtas.c | 59 +++++++++++++++++++++++++++++++
 1 file changed, 59 insertions(+)

diff --git a/arch/powerpc/platforms/powernv/eeh-rtas.c b/arch/powerpc/platforms/powernv/eeh-rtas.c
index 031ee8c..4a9c2c7 100644
--- a/arch/powerpc/platforms/powernv/eeh-rtas.c
+++ b/arch/powerpc/platforms/powernv/eeh-rtas.c
@@ -334,6 +334,62 @@ out:
 	return ret;
 }
 
+static int kvmppc_eeh_get_addr2(struct kvm_vcpu *vcpu,
+				struct rtas_args *args)
+{
+	struct pci_controller *hose;
+	struct pnv_phb *phb;
+	struct eeh_dev *edev;
+	struct eeh_pe *pe;
+	struct eeh_vfio_pci_addr addr;
+	int opcode;
+	int ret = 0;
+
+	/* Sanity check on parameter */
+	if (args->nargs != 4 || args->nret != 2) {
+		pr_warn("%s: Non-matched arguments (%d, %d) - (4, 2)\n",
+			__func__, args->nargs, args->nret);
+		ret = -3;
+		goto out;
+	}
+
+	/* Check on the operation code */
+	opcode = args->args[3];
+	if (opcode != 0 && opcode != 1) {
+		pr_warn("%s: opcode %d out of range (0, 1)\n",
+			__func__, opcode);
+		ret = -3;
+		goto out;
+	}
+
+	/* Figure out address */
+	if (kvmppc_eeh_format_addr(vcpu, args, &addr, true, &edev, &pe)) {
+		ret = -3;
+		goto out;
+	}
+
+	/* Insure that the EEH stuff has been initialized */
+	hose = pe->phb;
+	phb = hose->private_data;
+	if (!(phb->flags & PNV_PHB_FLAG_EEH)) {
+		pr_warn("%s: EEH disabled on PHB#%d\n",
+			__func__, hose->global_number);
+		ret = -3;
+		goto out;
+	}
+
+	/*
+	 * Fill result according to opcode. We don't differentiate
+	 * PCI bus and device sensitive PE here.
+	 */
+	if (opcode == 0)
+		args->rets[1] = pe->gaddr.pe_addr;
+	else
+		args->rets[1] = 1;
+out:
+	return ret;
+}
+
 /**
  * kvmppc_eeh_rtas - Backend for EEH RTAS emulation
  * @vcpu: KVM virtual CPU
@@ -359,6 +415,9 @@ void kvmppc_eeh_rtas(struct kvm_vcpu *vcpu, struct rtas_args *args, int op)
 	case eeh_rtas_read_slot_reset_state2:
 		ret = kvmppc_eeh_get_state2(vcpu, args);
 		break;
+	case eeh_rtas_get_config_addr_info2:
+		ret = kvmppc_eeh_get_addr2(vcpu, args);
+		break;
 	default:
 		pr_warn("%s: Unsupported EEH RTAS service#%d\n",
 			__func__, op);
-- 
1.8.3.2

^ permalink raw reply related

* [PATCH 18/22] powerpc/eeh: Avoid event on passed PE
From: Gavin Shan @ 2014-05-05  1:28 UTC (permalink / raw)
  To: linuxppc-dev, kvm, kvm-ppc; +Cc: aik, alex.williamson, qiudayu, Gavin Shan
In-Reply-To: <1399253291-3975-1-git-send-email-gwshan@linux.vnet.ibm.com>

If we detects frozen state on PE that has been passed to guest, we
needn't handle it. Instead, we rely on the guest to detect and recover
it. The patch avoid EEH event on the frozen passed PE so that the guest
can have chance to handle that.

Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
---
 arch/powerpc/kernel/eeh.c                 | 8 ++++++++
 arch/powerpc/platforms/powernv/eeh-ioda.c | 3 ++-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 33d683a..a2121e8 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -399,6 +399,14 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
 	if (ret > 0)
 		return ret;
 
+	/*
+	 * If the PE has been passed to guest, we won't check the
+	 * state. Instead, let the guest handle it if the PE has
+	 * been frozen.
+	 */
+	if (eeh_pe_passed(pe))
+		return 0;
+
 	/* If we already have a pending isolation event for this
 	 * slot, we know it's bad already, we don't need to check.
 	 * Do this checking under a lock; as multiple PCI devices
diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c
index 1b5982f..03a3ed2 100644
--- a/arch/powerpc/platforms/powernv/eeh-ioda.c
+++ b/arch/powerpc/platforms/powernv/eeh-ioda.c
@@ -890,7 +890,8 @@ static int ioda_eeh_next_error(struct eeh_pe **pe)
 				opal_pci_eeh_freeze_clear(phb->opal_id, frozen_pe_no,
 					OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
 				ret = EEH_NEXT_ERR_NONE;
-			} else if ((*pe)->state & EEH_PE_ISOLATED) {
+			} else if ((*pe)->state & EEH_PE_ISOLATED ||
+				   eeh_pe_passed(*pe)) {
 				ret = EEH_NEXT_ERR_NONE;
 			} else {
 				pr_err("EEH: Frozen PHB#%x-PE#%x (%s) detected\n",
-- 
1.8.3.2

^ permalink raw reply related

* [PATCH 17/22] powerpc/kvm: Connect EEH RTAS emulation backend
From: Gavin Shan @ 2014-05-05  1:28 UTC (permalink / raw)
  To: linuxppc-dev, kvm, kvm-ppc; +Cc: aik, alex.williamson, qiudayu, Gavin Shan
In-Reply-To: <1399253291-3975-1-git-send-email-gwshan@linux.vnet.ibm.com>

The patch intends to connect the KVM module with the backend for
EEH RTAS emulation. In turn, we can handle the EEH RTAS services
from the guest.

Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/kvm_ppc.h |  7 +++++++
 arch/powerpc/kvm/book3s_rtas.c     | 40 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 47 insertions(+)

diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 4096f16..18b51a1 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -29,6 +29,9 @@
 #include <linux/kvm_types.h>
 #include <linux/kvm_host.h>
 #include <linux/bug.h>
+#ifdef CONFIG_KVM_EEH
+#include <asm/rtas.h>
+#endif
 #ifdef CONFIG_PPC_BOOK3S
 #include <asm/kvm_book3s.h>
 #else
@@ -166,6 +169,10 @@ int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq);
 extern int kvm_vm_ioctl_rtas_define_token(struct kvm *kvm, void __user *argp);
 extern int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu);
 extern void kvmppc_rtas_tokens_free(struct kvm *kvm);
+#ifdef CONFIG_KVM_EEH
+extern void kvmppc_eeh_rtas(struct kvm_vcpu *vcpu,
+			    struct rtas_args *args, int flag);
+#endif
 extern int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 server,
 				u32 priority);
 extern int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server,
diff --git a/arch/powerpc/kvm/book3s_rtas.c b/arch/powerpc/kvm/book3s_rtas.c
index 7a05315..17bdb4a 100644
--- a/arch/powerpc/kvm/book3s_rtas.c
+++ b/arch/powerpc/kvm/book3s_rtas.c
@@ -16,6 +16,8 @@
 #include <asm/kvm_ppc.h>
 #include <asm/hvcall.h>
 #include <asm/rtas.h>
+#include <asm/ppc-pci.h>
+#include <asm/eeh.h>
 
 #ifdef CONFIG_KVM_XICS
 static void kvm_rtas_set_xive(struct kvm_vcpu *vcpu, struct rtas_args *args)
@@ -103,6 +105,24 @@ out:
 }
 #endif /* CONFIG_KVM_XICS */
 
+#ifdef CONFIG_KVM_EEH
+
+#define KVM_RTAS_EEH_FUNC(name, flag)			\
+static void kvm_rtas_eeh_##name(struct kvm_vcpu *vcpu,	\
+				struct rtas_args *args)	\
+{							\
+	kvmppc_eeh_rtas(vcpu, args, flag);		\
+}
+
+KVM_RTAS_EEH_FUNC(set_option,	eeh_rtas_set_option)
+KVM_RTAS_EEH_FUNC(set_reset,	eeh_rtas_set_slot_reset)
+KVM_RTAS_EEH_FUNC(read_state2,	eeh_rtas_read_slot_reset_state2)
+KVM_RTAS_EEH_FUNC(addr_info2,	eeh_rtas_get_config_addr_info2)
+KVM_RTAS_EEH_FUNC(error_detail,	eeh_rtas_slot_error_detail)
+KVM_RTAS_EEH_FUNC(configure_pe,	eeh_rtas_configure_pe)
+
+#endif /* CONFIG_KVM_EEH */
+
 struct rtas_handler {
 	void (*handler)(struct kvm_vcpu *vcpu, struct rtas_args *args);
 	char *name;
@@ -115,6 +135,26 @@ static struct rtas_handler rtas_handlers[] = {
 	{ .name = "ibm,int-off",  .handler = kvm_rtas_int_off },
 	{ .name = "ibm,int-on",   .handler = kvm_rtas_int_on },
 #endif
+#ifdef CONFIG_KVM_EEH
+	{ .name = "ibm,set-eeh-option",
+	  .handler = kvm_rtas_eeh_set_option
+	},
+	{ .name = "ibm,set-slot-reset",
+	  .handler = kvm_rtas_eeh_set_reset
+	},
+	{ .name = "ibm,read-slot-reset-state2",
+	  .handler = kvm_rtas_eeh_read_state2
+	},
+	{ .name = "ibm,get-config-addr-info2",
+	  .handler = kvm_rtas_eeh_addr_info2
+	},
+	{ .name = "ibm,slot-error-detail",
+	  .handler = kvm_rtas_eeh_error_detail
+	},
+	{ .name = "ibm,configure-pe",
+	  .handler = kvm_rtas_eeh_configure_pe
+	}
+#endif /* CONFIG_KVM_EEH */
 };
 
 struct rtas_token_definition {
-- 
1.8.3.2

^ permalink raw reply related

* [PATCH 20/22] powerpc/kvm: Infrastructure for error injection
From: Gavin Shan @ 2014-05-05  1:28 UTC (permalink / raw)
  To: linuxppc-dev, kvm, kvm-ppc; +Cc: aik, alex.williamson, qiudayu, Gavin Shan
In-Reply-To: <1399253291-3975-1-git-send-email-gwshan@linux.vnet.ibm.com>

The patch intends to implements the infrastructure for error injection.
RTAS calls "ibm,{open-errinjct, close-errinjct, errinjct}" are handled
in the host directly. Each VM is allowed to have one opened token at
once.

There're multiple types of error injection to be supported by the system.
So we maintain an array of handlers with error type as index. The array
supports dynamic registration.

Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s_errinjct.h |  78 +++++++
 arch/powerpc/kvm/Makefile                  |   3 +
 arch/powerpc/kvm/book3s_errinjct.c         | 329 +++++++++++++++++++++++++++++
 arch/powerpc/kvm/book3s_rtas.c             |  29 ++-
 4 files changed, 438 insertions(+), 1 deletion(-)
 create mode 100644 arch/powerpc/include/asm/book3s_errinjct.h
 create mode 100644 arch/powerpc/kvm/book3s_errinjct.c

diff --git a/arch/powerpc/include/asm/book3s_errinjct.h b/arch/powerpc/include/asm/book3s_errinjct.h
new file mode 100644
index 0000000..35712be
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s_errinjct.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2014.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __POWERPC_BOOK3S_ERRINJCT_H__
+#define __POWERPC_BOOK3S_ERRINJCT_H__
+
+/* Error injection handler */
+typedef int (*kvm_errinjct_func)(struct kvm_vcpu *vcpu, rtas_arg_t buf);
+
+#ifdef CONFIG_KVM_ERRINJCT
+
+/* RTAS services for error injection */
+enum {
+	kvm_errinjct_open_token,
+	kvm_errinjct_close_token,
+	kvm_errinjct_errinjct
+};
+
+/* Supported types of error injection */
+enum {
+	kvm_errinjct_min = 0,
+	kvm_errinjct_fatal,
+	kvm_errinjct_recover_random_evt,
+	kvm_errinjct_recover_special_evt,
+	kvm_errinjct_corrupted_page,
+	kvm_errinjct_corrupted_slb,
+	kvm_errinjct_translator_failure,
+	kvm_errinjct_ioa_bus_error,
+	kvm_errinjct_ioa_bus_error_64,
+	kvm_errinjct_platform_specific,
+	kvm_errinjct_corrupted_dcache_start,
+	kvm_errinjct_corrupted_dcache_end,
+	kvm_errinjct_corrupted_icache_start,
+	kvm_errinjct_corrupted_icache_end,
+	kvm_errinjct_corrupted_tlb_start,
+	kvm_errinjct_corrupted_tlb_end,
+	kvm_errinjct_upstream_io_error,
+	kvm_errinjct_max
+};
+
+/* Handler for specific type of error injection */
+struct kvm_errinjct_handler {
+	int opcode;
+	kvm_errinjct_func handler;
+};
+
+/* Tokens that have been opened */
+struct kvm_errinjct_token {
+	struct kvm *kvm;
+	int token;
+	struct list_head list;
+};
+
+int kvm_errinjct_register(int opcode, kvm_errinjct_func handler);
+int kvm_errinjct_unregister(int opcode);
+void kvmppc_errinjct_rtas(struct kvm_vcpu *vcpu,
+			  struct rtas_args *args, int flag);
+
+#else
+
+static inline int kvm_errinjct_register(int opcode,
+					kvm_errinjct_func handler)
+{
+	return 0;
+}
+
+static inline int kvm_errinjct_unregister(int opcode);
+{
+	return 0;
+}
+
+#endif /* CONFIG_KVM_ERRINJCT */
+#endif /* __POWERPC_BOOK3S_ERRINJCT_H__ */
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 673038d..f221f66 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -97,6 +97,9 @@ endif
 kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \
 	book3s_xics.o
 
+kvm-book3s_64-objs-$(CONFIG_KVM_ERRINJCT) += \
+	book3s_errinjct.o
+
 kvm-book3s_64-objs-$(CONFIG_KVM_VFIO) += \
 	$(addprefix ../../../virt/kvm/, vfio.o)
 
diff --git a/arch/powerpc/kvm/book3s_errinjct.c b/arch/powerpc/kvm/book3s_errinjct.c
new file mode 100644
index 0000000..27a49ab
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_errinjct.c
@@ -0,0 +1,329 @@
+/*
+ * The file intends to implement RTAS errinjct functionality for book3s
+ * architecture. Due to the individual errors injected to the system
+ * are defined by device tree node, it's reasonable to introduce the
+ * mechanism to register the supported errors and their corresponding
+ * handlers.
+ *
+ * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2014.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/kvm_host.h>
+#include <linux/kvm.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+
+#include <asm/uaccess.h>
+#include <asm/kvm_book3s.h>
+#include <asm/kvm_ppc.h>
+#include <asm/book3s_errinjct.h>
+#include <asm/hvcall.h>
+
+static struct kvm_errinjct_handler handlers[kvm_errinjct_max];
+static DEFINE_SPINLOCK(handler_lock);
+static LIST_HEAD(open_token_list);
+static DEFINE_SPINLOCK(token_lock);
+static unsigned long *token_bitmap = NULL;
+static int token_max = 1024;
+
+/**
+ * kvm_errinjct_register - Register error injection handler
+ * @opcode: to idenfity the error type to be injected
+ * @handler: function to handler the error type
+ *
+ * Register function handler for the specified type of error.
+ */
+int kvm_errinjct_register(int opcode, kvm_errinjct_func handler)
+{
+	spin_lock(&handler_lock);
+	if (!opcode || !handler) {
+		spin_unlock(&handler_lock);
+		pr_warn("%s: Invalid argument\n", __func__);
+		return -EINVAL;
+	}
+
+	if (opcode <= kvm_errinjct_min ||
+	    opcode >= kvm_errinjct_max) {
+		spin_unlock(&handler_lock);
+		pr_warn("%s: Opcode %d out of range (%d, %d)\n",
+			__func__, opcode, kvm_errinjct_min, kvm_errinjct_max);
+		return -ERANGE;
+	}
+
+	if (handlers[opcode].handler) {
+		spin_unlock(&handler_lock);
+		pr_warn("%s: Opcode %d had attached handler\n",
+			__func__, opcode);
+		return -EBUSY;
+	}
+
+	handlers[opcode].opcode  = opcode;
+	handlers[opcode].handler = handler;
+	spin_unlock(&handler_lock);
+
+	return 0;
+}
+
+/**
+ * kvm_errinjct_unregister - Unregister error injection handler
+ * @opcode: to identify the error type
+ *
+ * Unregister function handler for the specified type of error.
+ */
+int kvm_errinjct_unregister(int opcode)
+{
+	spin_lock(&handler_lock);
+
+	if (opcode <= kvm_errinjct_min ||
+	    opcode >= kvm_errinjct_max) {
+		spin_unlock(&handler_lock);
+		pr_warn("%s: Opcode %d out of range (%d, %d)\n",
+			__func__, opcode, kvm_errinjct_min, kvm_errinjct_max);
+		return -ERANGE;
+	}
+
+	handlers[opcode].opcode  = 0;
+	handlers[opcode].handler = NULL;
+	spin_unlock(&handler_lock);
+
+	return 0;
+}
+
+/* Allocate token from the bitmap */
+static int kvm_errinjct_token_alloc(void)
+{
+	int token;
+
+	/* The token bitmap isn't initialized yet */
+	if (unlikely(!token_bitmap)) {
+		unsigned long size;
+		unsigned long *mem;
+
+		size = _ALIGN_UP(token_max, sizeof(unsigned long));
+		mem = kzalloc(size, GFP_KERNEL);
+		if (!mem) {
+			pr_err("%s: Out of memory!\n", __func__);
+			return -ENOMEM;
+		}
+
+		/* In case some body else did it */
+		if (unlikely(token_bitmap))
+			kfree(mem);
+		else
+			token_bitmap = mem;
+	}
+
+	/* Allocate token */
+	do {
+		token = find_next_zero_bit(token_bitmap, token_max, 0);
+		if (token >= token_max)
+			return -ERANGE;
+        } while(test_and_set_bit(token, token_bitmap));
+
+	return token;
+}
+
+/* Free token to the bitmap */
+static void kvm_errinjct_token_free(int token)
+{
+	if (unlikely(!token_bitmap))
+		return;
+	if (unlikely(token >= token_max))
+		return;
+
+	clear_bit(token, token_bitmap);
+}
+
+/* Check if the specified VM has opened token or not */
+static bool kvm_errinjct_token_get(struct kvm *kvm,
+				   struct kvm_errinjct_token **token)
+{
+	struct kvm_errinjct_token *t;
+
+	list_for_each_entry(t, &open_token_list, list) {
+		if (t->kvm == kvm) {
+			if (token)
+				*token = t;
+			return true;
+		}
+	}
+
+	return false;
+}
+
+/* Emulation handler for opening token */
+static int kvmppc_errinjct_open(struct kvm_vcpu *vcpu,
+				struct rtas_args *args)
+{
+        struct kvm_errinjct_token *t;
+	int token;
+	int ret = 0;
+
+	/* Check the parameters */
+	if (args->nargs != 0 || args->nret != 2) {
+		pr_warn("%s: Breaking rule (#args: 0, #rets: 2)\n",
+			__func__);
+		ret = -1;
+		goto out;
+	}
+
+	/* Check if the guest has opened token */
+	spin_lock(&token_lock);
+	if (kvm_errinjct_token_get(vcpu->kvm, NULL)) {
+		ret = -4;
+		spin_unlock(&token_lock);
+		goto out;
+	}
+
+	/* Allocate token */
+	token = kvm_errinjct_token_alloc();
+	if (token > token_max) {
+		ret = -1;
+		spin_unlock(&token_lock);
+		goto out;
+	}
+
+	/* Attach open token */
+	t = kzalloc(sizeof(*t), GFP_KERNEL);
+	if (!t) {
+		ret = -2;
+		pr_warn("%s: Out of memory !\n", __func__);
+		kvm_errinjct_token_free(token);
+                spin_unlock(&token_lock);
+                goto out;
+        }
+        t->kvm   = vcpu->kvm;
+        t->token = token;
+	INIT_LIST_HEAD(&t->list);
+	list_add_tail(&t->list, &open_token_list);
+	spin_unlock(&token_lock);
+out:
+        args->rets[1] = ret;
+        return ret == 0 ? token : -1;
+}
+
+/* Emulation handler for closing token */
+static int kvmppc_errinjct_close(struct kvm_vcpu *vcpu,
+				 struct rtas_args *args)
+{
+	struct kvm_errinjct_token *t;
+	int ret = 0;
+
+	/* Check the parameters */
+	if (args->nargs != 1 || args->nret != 1) {
+		pr_warn("%s: Breaking rule (#args: 1, #rets: 1)\n",
+			__func__);
+		ret = -1;
+		goto out;
+	}
+
+	/* Search the opened token */
+	spin_lock(&token_lock);
+	if (!kvm_errinjct_token_get(vcpu->kvm, &t)) {
+		ret = -4;
+		spin_unlock(&token_lock);
+		goto out;
+	}
+
+	/* Detach and free it */
+	list_del(&t->list);
+	kvm_errinjct_token_free(t->token);
+	spin_unlock(&token_lock);
+
+	kfree(t);
+out:
+        return ret;
+}
+
+/*
+ * Emulation handler for error injection. After checking
+ * the arguments, we will dispatch the request to the
+ * dynamically registered handler if possible.
+ */
+static int kvmppc_errinjct(struct kvm_vcpu *vcpu,
+			   struct rtas_args *args)
+{
+	struct kvm_errinjct_token *t;
+	int token, opcode, ret = 0;
+	rtas_arg_t buf;
+
+	/* Check the parameters */
+	if (args->nargs != 3 || args->nret != 1) {
+		pr_warn("%s: Breaking rule (#args: 3, #rets: 1)\n",
+			__func__);
+                ret = -3;
+                goto out;
+        }
+
+	/* Check opcode and buffer */
+	opcode = args->args[0];
+	token  = args->args[1];
+	buf    = args->args[2];
+	if (opcode < kvm_errinjct_min ||
+	    opcode >= kvm_errinjct_max ||
+	    (buf & 0x3fful)) {
+		ret = -3;
+		goto out;
+	}
+
+	/* Check if the VM has the opened token */
+	spin_lock(&token_lock);
+	if (!kvm_errinjct_token_get(vcpu->kvm, &t) ||
+	    t->token != token) {
+		ret = -4;
+		spin_unlock(&token_lock);
+		goto out;
+	}
+	spin_unlock(&token_lock);
+
+	/* Dispatch the request */
+	spin_lock(&handler_lock);
+	if (handlers[opcode].handler)
+		ret = handlers[opcode].handler(vcpu, buf);
+	else
+		ret = -3;
+	spin_unlock(&handler_lock);
+out:
+	return ret;
+}
+
+/**
+ * kvmppc_errinjct_rtas - Common handler for error injection emulation
+ * @vcpu: KVM virtual CPU
+ * @args: RTAS call arguments
+ * @flag: error injection service indicator
+ *
+ * The function is the common handler to emulate error injection RTAS.
+ * All error injection requests will trigger the function and in turn,
+ * the requests will be distributed to individual handler.
+ */
+void kvmppc_errinjct_rtas(struct kvm_vcpu *vcpu,
+			  struct rtas_args *args, int flag)
+{
+	int ret = -1;
+
+	/* Parse the requested service */
+	switch (flag) {
+	case kvm_errinjct_open_token:
+		ret = kvmppc_errinjct_open(vcpu, args);
+		break;
+	case kvm_errinjct_close_token:
+		ret = kvmppc_errinjct_close(vcpu, args);
+		break;
+	case kvm_errinjct_errinjct:
+		ret = kvmppc_errinjct(vcpu, args);
+		break;
+	default:
+		pr_warn("%s: Unsupported option %d\n",
+			__func__, flag);
+	}
+
+	/* Update the return value */
+	args->rets[0] = ret;
+}
diff --git a/arch/powerpc/kvm/book3s_rtas.c b/arch/powerpc/kvm/book3s_rtas.c
index 17bdb4a..030b006 100644
--- a/arch/powerpc/kvm/book3s_rtas.c
+++ b/arch/powerpc/kvm/book3s_rtas.c
@@ -18,6 +18,7 @@
 #include <asm/rtas.h>
 #include <asm/ppc-pci.h>
 #include <asm/eeh.h>
+#include <asm/book3s_errinjct.h>
 
 #ifdef CONFIG_KVM_XICS
 static void kvm_rtas_set_xive(struct kvm_vcpu *vcpu, struct rtas_args *args)
@@ -123,6 +124,21 @@ KVM_RTAS_EEH_FUNC(configure_pe,	eeh_rtas_configure_pe)
 
 #endif /* CONFIG_KVM_EEH */
 
+#ifdef CONFIG_KVM_ERRINJCT
+
+#define KVM_RTAS_ERRINJCT_FUNC(name, flag)			\
+static void kvm_rtas_errinjct_##name(struct kvm_vcpu *vcpu,	\
+				     struct rtas_args *args)	\
+{								\
+	kvmppc_errinjct_rtas(vcpu, args, flag);			\
+}
+
+KVM_RTAS_ERRINJCT_FUNC(open_token,	kvm_errinjct_open_token);
+KVM_RTAS_ERRINJCT_FUNC(close_token,	kvm_errinjct_close_token);
+KVM_RTAS_ERRINJCT_FUNC(errinjct,	kvm_errinjct_errinjct);
+
+#endif /* CONFIG_KVM_ERRINJCT */
+
 struct rtas_handler {
 	void (*handler)(struct kvm_vcpu *vcpu, struct rtas_args *args);
 	char *name;
@@ -153,8 +169,19 @@ static struct rtas_handler rtas_handlers[] = {
 	},
 	{ .name = "ibm,configure-pe",
 	  .handler = kvm_rtas_eeh_configure_pe
-	}
+	},
 #endif /* CONFIG_KVM_EEH */
+#ifdef CONFIG_KVM_ERRINJCT
+	{ .name = "ibm,open-errinjct",
+	  .handler = kvm_rtas_errinjct_open_token
+	},
+	{ .name = "ibm,close-errinjct",
+	  .handler = kvm_rtas_errinjct_close_token
+	},
+	{ .name = "ibm,errinjct",
+	  .handler = kvm_rtas_errinjct_errinjct
+	},
+#endif /* CONFIG_KVM_ERRINJCT */
 };
 
 struct rtas_token_definition {
-- 
1.8.3.2

^ permalink raw reply related

* [PATCH 21/22] powerpc/powernv: Sync OPAL header file with firmware
From: Gavin Shan @ 2014-05-05  1:28 UTC (permalink / raw)
  To: linuxppc-dev, kvm, kvm-ppc; +Cc: aik, alex.williamson, qiudayu, Gavin Shan
In-Reply-To: <1399253291-3975-1-git-send-email-gwshan@linux.vnet.ibm.com>

The patch synchronizes OPAL header file with firmware so that the
host kernel can make OPAL call to do error injection.

Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/opal.h                | 65 ++++++++++++++++++++++++++
 arch/powerpc/platforms/powernv/opal-wrappers.S |  1 +
 2 files changed, 66 insertions(+)

diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 66ad7a7..ca55d9c 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -175,6 +175,7 @@ extern int opal_enter_rtas(struct rtas_args *args,
 #define OPAL_SET_PARAM				90
 #define OPAL_DUMP_RESEND			91
 #define OPAL_DUMP_INFO2				94
+#define OPAL_ERR_INJECT				96
 
 #ifndef __ASSEMBLY__
 
@@ -219,6 +220,69 @@ enum OpalPciErrorSeverity {
 	OPAL_EEH_SEV_INF	= 5
 };
 
+enum OpalErrinjctType {
+	OpalErrinjctTypeFirst			= 0,
+	OpalErrinjctTypeFatal			= 1,
+	OpalErrinjctTypeRecoverRandomEvent	= 2,
+	OpalErrinjctTypeRecoverSpecialEvent	= 3,
+	OpalErrinjctTypeCorruptedPage		= 4,
+	OpalErrinjctTypeCorruptedSlb		= 5,
+	OpalErrinjctTypeTranslatorFailure	= 6,
+	OpalErrinjctTypeIoaBusError		= 7,
+	OpalErrinjctTypeIoaBusError64		= 8,
+	OpalErrinjctTypePlatformSpecific	= 9,
+	OpalErrinjctTypeDcacheStart		= 10,
+	OpalErrinjctTypeDcacheEnd		= 11,
+	OpalErrinjctTypeIcacheStart		= 12,
+	OpalErrinjctTypeIcacheEnd		= 13,
+	OpalErrinjctTypeTlbStart		= 14,
+	OpalErrinjctTypeTlbEnd			= 15,
+	OpalErrinjctTypeUpstreamIoError		= 16,
+	OpalErrinjctTypeLast			= 17,
+
+	/* IoaBusError & IoaBusError64 */
+	OpalEjtIoaLoadMemAddr			= 0,
+	OpalEjtIoaLoadMemData			= 1,
+	OpalEjtIoaLoadIoAddr			= 2,
+	OpalEjtIoaLoadIoData			= 3,
+	OpalEjtIoaLoadConfigAddr		= 4,
+	OpalEjtIoaLoadConfigData		= 5,
+	OpalEjtIoaStoreMemAddr			= 6,
+	OpalEjtIoaStoreMemData			= 7,
+	OpalEjtIoaStoreIoAddr			= 8,
+	OpalEjtIoaStoreIoData			= 9,
+	OpalEjtIoaStoreConfigAddr		= 10,
+	OpalEjtIoaStoreConfigData		= 11,
+	OpalEjtIoaDmaReadMemAddr		= 12,
+	OpalEjtIoaDmaReadMemData		= 13,
+	OpalEjtIoaDmaReadMemMaster		= 14,
+	OpalEjtIoaDmaReadMemTarget		= 15,
+	OpalEjtIoaDmaWriteMemAddr		= 16,
+	OpalEjtIoaDmaWriteMemData		= 17,
+	OpalEjtIoaDmaWriteMemMaster		= 18,
+	OpalEjtIoaDmaWriteMemTarget		= 19,
+};
+
+struct OpalErrinjct {
+	int32_t type;
+	union {
+		struct {
+			uint32_t addr;
+			uint32_t mask;
+			uint64_t phb_id;
+			uint32_t pe;
+			uint32_t function;
+		}ioa;
+		struct {
+			uint64_t addr;
+			uint64_t mask;
+			uint64_t phb_id;
+			uint32_t pe;
+			uint32_t function;
+		}ioa64;
+	};
+};
+
 enum OpalShpcAction {
 	OPAL_SHPC_GET_LINK_STATE = 0,
 	OPAL_SHPC_GET_SLOT_STATE = 1
@@ -839,6 +903,7 @@ int64_t opal_pci_get_phb_diag_data(uint64_t phb_id, void *diag_buffer,
 				   uint64_t diag_buffer_len);
 int64_t opal_pci_get_phb_diag_data2(uint64_t phb_id, void *diag_buffer,
 				    uint64_t diag_buffer_len);
+int64_t opal_err_injct(void *data);
 int64_t opal_pci_fence_phb(uint64_t phb_id);
 int64_t opal_pci_reinit(uint64_t phb_id, uint64_t reinit_scope, uint64_t data);
 int64_t opal_pci_mask_pe_error(uint64_t phb_id, uint16_t pe_number, uint8_t error_type, uint8_t mask_action);
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index f531ffe..46265de 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -119,6 +119,7 @@ OPAL_CALL(opal_pci_next_error,			OPAL_PCI_NEXT_ERROR);
 OPAL_CALL(opal_pci_poll,			OPAL_PCI_POLL);
 OPAL_CALL(opal_pci_msi_eoi,			OPAL_PCI_MSI_EOI);
 OPAL_CALL(opal_pci_get_phb_diag_data2,		OPAL_PCI_GET_PHB_DIAG_DATA2);
+OPAL_CALL(opal_err_injct,			OPAL_ERR_INJECT);
 OPAL_CALL(opal_xscom_read,			OPAL_XSCOM_READ);
 OPAL_CALL(opal_xscom_write,			OPAL_XSCOM_WRITE);
 OPAL_CALL(opal_lpc_read,			OPAL_LPC_READ);
-- 
1.8.3.2

^ permalink raw reply related

* [PATCH 19/22] powerpc: Introduce CONFIG_KVM_ERRINJCT
From: Gavin Shan @ 2014-05-05  1:28 UTC (permalink / raw)
  To: linuxppc-dev, kvm, kvm-ppc; +Cc: aik, alex.williamson, qiudayu, Gavin Shan
In-Reply-To: <1399253291-3975-1-git-send-email-gwshan@linux.vnet.ibm.com>

The patch introduces kernel configuration option KVM_ERRINJCT. It
enables emulating error injection RTAS services used on IBM POWER
(pSeries) servers.

Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
---
 arch/powerpc/kvm/Kconfig | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 6764fc5..914ab05 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -198,6 +198,14 @@ config KVM_EEH
 	  Enable support for emulating EEH RTAS services used on IBM
 	  POWER (pSeries) servers.
 
+config KVM_ERRINJCT
+	bool "KVM in-kernel error injection emulation"
+	depends on KVM_EEH
+	default y
+	---help---
+	  Enable support for emulating error injection services used
+	  on IBM POWER (pSeries) servers
+
 source drivers/vhost/Kconfig
 
 endif # VIRTUALIZATION
-- 
1.8.3.2

^ permalink raw reply related

* [PATCH 22/22] powerpc/powernv: Support PCI error injection
From: Gavin Shan @ 2014-05-05  1:28 UTC (permalink / raw)
  To: linuxppc-dev, kvm, kvm-ppc; +Cc: aik, alex.williamson, qiudayu, Gavin Shan
In-Reply-To: <1399253291-3975-1-git-send-email-gwshan@linux.vnet.ibm.com>

The patch introduces the infrastructure of error injection backend
for PowerNV platform. For now, we just implement logic to inject
PCI errors. We need support injecting other types of errors in
future.

Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s_errinjct.h |  19 +++
 arch/powerpc/platforms/powernv/Makefile    |   1 +
 arch/powerpc/platforms/powernv/errinjct.c  | 215 +++++++++++++++++++++++++++++
 3 files changed, 235 insertions(+)
 create mode 100644 arch/powerpc/platforms/powernv/errinjct.c

diff --git a/arch/powerpc/include/asm/book3s_errinjct.h b/arch/powerpc/include/asm/book3s_errinjct.h
index 35712be..75443ad 100644
--- a/arch/powerpc/include/asm/book3s_errinjct.h
+++ b/arch/powerpc/include/asm/book3s_errinjct.h
@@ -56,6 +56,25 @@ struct kvm_errinjct_token {
 	struct list_head list;
 };
 
+/* Argument buffer for various operations */
+struct kvm_errinjct_ioa_bus {
+	uint32_t addr;
+	uint32_t mask;
+	uint32_t cfg_addr;
+	uint32_t buid_hi;
+	uint32_t buid_lo;
+	uint32_t op;
+};
+
+struct kvm_errinjct_ioa_bus64 {
+	uint64_t addr;
+	uint64_t mask;
+	uint32_t cfg_addr;
+	uint32_t buid_hi;
+	uint32_t buid_lo;
+	uint32_t op;
+};
+
 int kvm_errinjct_register(int opcode, kvm_errinjct_func handler);
 int kvm_errinjct_unregister(int opcode);
 void kvmppc_errinjct_rtas(struct kvm_vcpu *vcpu,
diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
index d8ea670..d096b18 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -7,5 +7,6 @@ obj-$(CONFIG_SMP)	+= smp.o
 obj-$(CONFIG_PCI)	+= pci.o pci-p5ioc2.o pci-ioda.o
 obj-$(CONFIG_EEH)	+= eeh-ioda.o eeh-powernv.o
 obj-$(CONFIG_KVM_EEH)	+= eeh-rtas.o
+obj-$(CONFIG_KVM_ERRINJCT)	+= errinjct.o
 obj-$(CONFIG_PPC_SCOM)	+= opal-xscom.o
 obj-$(CONFIG_MEMORY_FAILURE)	+= opal-memory-errors.o
diff --git a/arch/powerpc/platforms/powernv/errinjct.c b/arch/powerpc/platforms/powernv/errinjct.c
new file mode 100644
index 0000000..ccc7853
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/errinjct.c
@@ -0,0 +1,215 @@
+/*
+ * Backend for error injection implemented on PowerNV platform.
+ *
+ * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2014.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/kvm_host.h>
+#include <linux/kvm.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+
+
+#include <asm/uaccess.h>
+#include <asm/pci-bridge.h>
+#include <asm/iommu.h>
+#include <asm/eeh.h>
+#include <asm/kvm_book3s.h>
+#include <asm/kvm_ppc.h>
+#include <asm/book3s_errinjct.h>
+#include <asm/hvcall.h>
+#include <asm/msi_bitmap.h>
+#include <asm/opal.h>
+
+#include "powernv.h"
+#include "pci.h"
+
+static int powernv_errinjct_ioa(struct kvm_vcpu *vcpu, rtas_arg_t buf)
+{
+	struct OpalErrinjct ej;
+	struct kvm_errinjct_ioa_bus args;
+	struct eeh_vfio_pci_addr addr;
+	struct eeh_pe *pe;
+	struct pnv_phb *phb;
+	long rc;
+	int ret = 0;
+
+	/* Word aligned buffer */
+	if (buf & 0x3) {
+		ret = -3;
+		goto out;
+	}
+
+	/* Copy over argument */
+	ret = kvm_read_guest(vcpu->kvm, buf, &args, sizeof(args));
+	if (ret) {
+		pr_warn("%s: Can't copyover arguments (%d)\n",
+			__func__, ret);
+		ret = -3;
+		goto out;
+	}
+
+	/*
+	 * Sanity check on operation. We don't support optional
+	 * operation (20) and last one (21) for now.
+	 */
+	if (args.op < 0 || args.op > 21) {
+		ret = -3;
+		goto out;
+	} else if (args.op >= 20) {
+		ret = -1;
+		goto out;
+	}
+
+	/*
+	 * Only do error injection on passthrou PE. It's notable
+	 * the "cfg_addr" is guest PE address
+	 */
+	addr.kvm = vcpu->kvm;
+	addr.buid_hi = args.buid_hi;
+	addr.buid_lo = args.buid_lo;
+	addr.pe_addr = args.cfg_addr;
+	pe = eeh_vfio_pe_get(&addr);
+	if (!pe) {
+		pr_warn("%s: Can't find passed PE (%08x-%08x-%08x)\n",
+			__func__, args.buid_hi, args.buid_lo, args.cfg_addr);
+		ret = -3;
+		goto out;
+	}
+
+	/*
+	 * Calling to OPAL API. We need host PE address
+	 * and PHB host BUID.
+	 */
+	phb = pe->phb->private_data;
+
+	ej.type		= OpalErrinjctTypeIoaBusError;
+	ej.ioa.addr	= args.addr;
+	ej.ioa.mask	= args.mask;
+	ej.ioa.phb_id	= phb->opal_id;
+	ej.ioa.pe	= pe->addr;
+	ej.ioa.function	=  args.op;
+	rc = opal_err_injct(&ej);
+	if (rc != OPAL_SUCCESS) {
+		pr_warn("%s: OPAL API returns %ld\n", __func__, rc);
+		ret = -1;
+		goto out;
+	}
+
+	ret = 0;
+out:
+	return ret;
+}
+
+static int powernv_errinjct_ioa64(struct kvm_vcpu *vcpu, rtas_arg_t buf)
+{
+	struct OpalErrinjct ej;
+	struct kvm_errinjct_ioa_bus64 args;
+	struct eeh_vfio_pci_addr addr;
+	struct eeh_pe *pe;
+	struct pnv_phb *phb;
+	long rc;
+	int ret = 0;
+
+	/* Double word aligned buffer */
+	if (buf & 0x7) {
+		ret = -3;
+		goto out;
+	}
+
+	/* Copy over argument */
+	ret = kvm_read_guest(vcpu->kvm, buf, &args, sizeof(args));
+	if (ret) {
+		pr_warn("%s: Can't copyover arguments (%d)\n",
+			__func__, ret);
+		ret = -3;
+		goto out;
+	}
+
+	/*
+	 * Sanity check on operation. We don't support optional
+	 * operation (20) and last one (21) for now.
+	 */
+	if (args.op < 0 || args.op > 21) {
+		ret = -3;
+		goto out;
+	} else if (args.op >= 20) {
+		ret = -1;
+		goto out;
+	}
+
+	/*
+	 * Only do error injection on passthrou PE. It's notable
+	 * that "cfg_addr" is guest PE address
+	 */
+	addr.kvm = vcpu->kvm;
+	addr.buid_hi = args.buid_hi;
+	addr.buid_lo = args.buid_lo;
+	addr.pe_addr = args.cfg_addr;
+	pe = eeh_vfio_pe_get(&addr);
+	if (!pe) {
+		pr_warn("%s: Can't find passed PE (%08x-%08x-%08x)\n",
+			__func__, args.buid_hi, args.buid_lo, args.cfg_addr);
+		ret = -3;
+		goto out;
+	}
+
+	/*
+	 * Calling to OPAL API. We need host PE address
+	 * and PHB host BUID.
+	 */
+	phb = pe->phb->private_data;
+
+	ej.type		  = OpalErrinjctTypeIoaBusError64;
+	ej.ioa64.addr	  = args.addr;
+	ej.ioa64.mask	  = args.mask;
+	ej.ioa64.phb_id	  = phb->opal_id;
+	ej.ioa64.pe	  = pe->addr;
+	ej.ioa64.function =  args.op;
+	rc = opal_err_injct(&ej);
+	if (rc != OPAL_SUCCESS) {
+		pr_warn("%s: OPAL API returns %ld\n", __func__, rc);
+		ret = -1;
+		goto out;
+	}
+
+	/* Success */
+	ret = 0;
+out:
+	return ret;
+}
+
+static struct kvm_errinjct_handler handlers[] = {
+	{ .opcode  = kvm_errinjct_ioa_bus_error,
+	  .handler = powernv_errinjct_ioa
+	},
+	{ .opcode  = kvm_errinjct_ioa_bus_error_64,
+	  .handler = powernv_errinjct_ioa64
+	}
+};
+
+static int __init powernv_errinjct_init(void)
+{
+	int i, ret;
+
+	for (i = 0; i < ARRAY_SIZE(handlers); i++) {
+		ret = kvm_errinjct_register(handlers[i].opcode,
+					    handlers[i].handler);
+		if (ret) {
+			pr_warn("%s: Failure registering handler %d (%d)\n",
+				__func__, handlers[i].opcode, ret);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+module_init(powernv_errinjct_init);
-- 
1.8.3.2

^ permalink raw reply related

* [PATCH] powerpc/eeh: Fix build error for celleb
From: Gavin Shan @ 2014-05-05  2:09 UTC (permalink / raw)
  To: benh; +Cc: linuxppc-dev, Gavin Shan

Commit 7f52a526f ("powerpc/eeh: Allow to disable EEH") caused
following build error with "celleb_defconfig" as being catched
by Mikey on linux-next.

arch/powerpc/kernel/eeh.c: In function 'eeh_init_proc':
arch/powerpc/kernel/eeh.c:1173:37: error: 'powerpc_debugfs_root' \
undeclared (first use in this function)
arch/powerpc/kernel/eeh.c:1173:37: note: each undeclared identifier \
is reported only once for each function it appears in

Reported-by: Michael Neuling <mikey@neuling.org>
Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
---
 arch/powerpc/kernel/eeh.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 33d683a..9c6b899 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -36,6 +36,7 @@
 #include <linux/of.h>
 
 #include <linux/atomic.h>
+#include <asm/debug.h>
 #include <asm/eeh.h>
 #include <asm/eeh_event.h>
 #include <asm/io.h>
-- 
1.8.3.2

^ permalink raw reply related

* [PATCH V5] KVM: PPC: BOOK3S: PR: Enable Little Endian PR guest
From: Aneesh Kumar K.V @ 2014-05-05  3:09 UTC (permalink / raw)
  To: agraf, benh, paulus; +Cc: linuxppc-dev, kvm, kvm-ppc, Aneesh Kumar K.V

This patch make sure we inherit the LE bit correctly in different case
so that we can run Little Endian distro in PR mode

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
Changes from V4:
* Don't check for MSR_LE bit while setting LPCR. 

 arch/powerpc/include/asm/kvm_host.h |  2 +-
 arch/powerpc/kernel/asm-offsets.c   |  2 +-
 arch/powerpc/kvm/book3s_64_mmu.c    |  2 +-
 arch/powerpc/kvm/book3s_pr.c        | 23 ++++++++++++++++++++++-
 4 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 1eaea2dea174..d342f8efc843 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -562,6 +562,7 @@ struct kvm_vcpu_arch {
 #ifdef CONFIG_PPC_BOOK3S
 	ulong fault_dar;
 	u32 fault_dsisr;
+	unsigned long intr_msr;
 #endif
 
 #ifdef CONFIG_BOOKE
@@ -654,7 +655,6 @@ struct kvm_vcpu_arch {
 	spinlock_t tbacct_lock;
 	u64 busy_stolen;
 	u64 busy_preempt;
-	unsigned long intr_msr;
 #endif
 };
 
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index dba8140ebc20..6a4b77d197f3 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -493,7 +493,6 @@ int main(void)
 	DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar));
 	DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr));
 	DEFINE(VCPU_VPA_DIRTY, offsetof(struct kvm_vcpu, arch.vpa.dirty));
-	DEFINE(VCPU_INTR_MSR, offsetof(struct kvm_vcpu, arch.intr_msr));
 #endif
 #ifdef CONFIG_PPC_BOOK3S
 	DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id));
@@ -528,6 +527,7 @@ int main(void)
 	DEFINE(VCPU_SLB_NR, offsetof(struct kvm_vcpu, arch.slb_nr));
 	DEFINE(VCPU_FAULT_DSISR, offsetof(struct kvm_vcpu, arch.fault_dsisr));
 	DEFINE(VCPU_FAULT_DAR, offsetof(struct kvm_vcpu, arch.fault_dar));
+	DEFINE(VCPU_INTR_MSR, offsetof(struct kvm_vcpu, arch.intr_msr));
 	DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst));
 	DEFINE(VCPU_TRAP, offsetof(struct kvm_vcpu, arch.trap));
 	DEFINE(VCPU_CFAR, offsetof(struct kvm_vcpu, arch.cfar));
diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c
index 83da1f868fd5..8231b83c493b 100644
--- a/arch/powerpc/kvm/book3s_64_mmu.c
+++ b/arch/powerpc/kvm/book3s_64_mmu.c
@@ -38,7 +38,7 @@
 
 static void kvmppc_mmu_book3s_64_reset_msr(struct kvm_vcpu *vcpu)
 {
-	kvmppc_set_msr(vcpu, MSR_SF);
+	kvmppc_set_msr(vcpu, vcpu->arch.intr_msr);
 }
 
 static struct kvmppc_slb *kvmppc_mmu_book3s_64_find_slbe(
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index f30cdfee800d..01a7156d055c 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -249,7 +249,7 @@ static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu)
 	ulong smsr = vcpu->arch.shared->msr;
 
 	/* Guest MSR values */
-	smsr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | MSR_BE;
+	smsr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | MSR_BE | MSR_LE;
 	/* Process MSR values */
 	smsr |= MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_PR | MSR_EE;
 	/* External providers the guest reserved */
@@ -1118,6 +1118,15 @@ static int kvmppc_get_one_reg_pr(struct kvm_vcpu *vcpu, u64 id,
 	case KVM_REG_PPC_HIOR:
 		*val = get_reg_val(id, to_book3s(vcpu)->hior);
 		break;
+	case KVM_REG_PPC_LPCR:
+		/*
+		 * We are only interested in the LPCR_ILE bit
+		 */
+		if (vcpu->arch.intr_msr & MSR_LE)
+			*val = get_reg_val(id, LPCR_ILE);
+		else
+			*val = get_reg_val(id, 0);
+		break;
 	default:
 		r = -EINVAL;
 		break;
@@ -1126,6 +1135,14 @@ static int kvmppc_get_one_reg_pr(struct kvm_vcpu *vcpu, u64 id,
 	return r;
 }
 
+static void kvmppc_set_lpcr_pr(struct kvm_vcpu *vcpu, u64 new_lpcr)
+{
+	if (new_lpcr & LPCR_ILE)
+		vcpu->arch.intr_msr |= MSR_LE;
+	else
+		vcpu->arch.intr_msr &= ~MSR_LE;
+}
+
 static int kvmppc_set_one_reg_pr(struct kvm_vcpu *vcpu, u64 id,
 				 union kvmppc_one_reg *val)
 {
@@ -1136,6 +1153,9 @@ static int kvmppc_set_one_reg_pr(struct kvm_vcpu *vcpu, u64 id,
 		to_book3s(vcpu)->hior = set_reg_val(id, *val);
 		to_book3s(vcpu)->hior_explicit = true;
 		break;
+	case KVM_REG_PPC_LPCR:
+		kvmppc_set_lpcr_pr(vcpu, set_reg_val(id, *val));
+		break;
 	default:
 		r = -EINVAL;
 		break;
@@ -1188,6 +1208,7 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_pr(struct kvm *kvm,
 	vcpu->arch.pvr = 0x3C0301;
 	if (mmu_has_feature(MMU_FTR_1T_SEGMENT))
 		vcpu->arch.pvr = mfspr(SPRN_PVR);
+	vcpu->arch.intr_msr = MSR_SF;
 #else
 	/* default to book3s_32 (750) */
 	vcpu->arch.pvr = 0x84202;
-- 
1.9.1

^ permalink raw reply related

* [PATCH] powerpc: Fix comment around arch specific definition of RECLAIM_DISTANCE
From: Preeti U Murthy @ 2014-05-05  5:17 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: anton, kosaki.motohiro

Commit 32e45ff43eaf5c17f changed the default value of
RECLAIM_DISTANCE to 30. However the comment around arch
specifc definition of RECLAIM_DISTANCE is not updated to
reflect the same. Correct the value mentioned in the comment.

Signed-off-by: Preeti U Murthy <preeti@linux.vnet.ibm.com>
Cc: Anton Blanchard <anton@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
---

 arch/powerpc/include/asm/topology.h |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index c920215..356546d 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -12,7 +12,7 @@ struct device_node;
  * Before going off node we want the VM to try and reclaim from the local
  * node. It does this if the remote distance is larger than RECLAIM_DISTANCE.
  * With the default REMOTE_DISTANCE of 20 and the default RECLAIM_DISTANCE of
- * 20, we never reclaim and go off node straight away.
+ * 30, we never reclaim and go off node straight away.
  *
  * To fix this we choose a smaller value of RECLAIM_DISTANCE.
  */

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox