LinuxPPC-Dev Archive on lore.kernel.org

LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed

* Re: [PATCH 20/21] Introduce struct eeh_stats for EEH
From: Gavin Shan @ 2012-02-29  1:08 UTC (permalink / raw)
  To: David Laight, linuxppc-dev
In-Reply-To: <AE90C24D6B3A694183C094C60CF0A2F6026B6E82@saturn3.aculab.com>

>  
> > +struct eeh_stats {
> > +	unsigned int no_device;		/* PCI device not found */
> ...
> > +				"no device           =%d\n"
> ...
> 
> Use %u (for all the stats), you really don't want negative
> values printed.

Yes. 

> I've NFI how long wrapping these counters might take!
> If it is feasable (maybe much above 100Hz) then you
> need 64bit counters.
> 

I think it's better to use "u64" here ;-)

> 	David
> 

Thanks,
Gavin
 

^ permalink raw reply

* [PATCH] KVM: PPC: Don't sync timebase when inside KVM
From: Alexander Graf @ 2012-02-29  2:16 UTC (permalink / raw)
  To: kvm-ppc; +Cc: Scott Wood, linuxppc-dev, kvm

When we know that we're running inside of a KVM guest, we don't have to
worry about synchronizing timebases between different CPUs, since the
host already took care of that.

This fixes CPU overcommit scenarios where vCPUs could hang forever trying
to sync each other while not being scheduled.

Reported-by: Stuart Yoder <B08248@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kernel/smp.c |    6 ++++--
 1 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 46695fe..670b453 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -49,6 +49,8 @@
 #ifdef CONFIG_PPC64
 #include <asm/paca.h>
 #endif
+#include <linux/kvm_para.h>
+#include <asm/kvm_para.h>
 
 #ifdef DEBUG
 #include <asm/udbg.h>
@@ -541,7 +543,7 @@ int __cpuinit __cpu_up(unsigned int cpu)
 
 	DBG("Processor %u found.\n", cpu);
 
-	if (smp_ops->give_timebase)
+	if (!kvm_para_available() && smp_ops->give_timebase)
 		smp_ops->give_timebase();
 
 	/* Wait until cpu puts itself in the online map */
@@ -626,7 +628,7 @@ void __devinit start_secondary(void *unused)
 
 	if (smp_ops->setup_cpu)
 		smp_ops->setup_cpu(cpu);
-	if (smp_ops->take_timebase)
+	if (!kvm_para_available() && smp_ops->take_timebase)
 		smp_ops->take_timebase();
 
 	secondary_cpu_time_init();
-- 
1.6.0.2

^ permalink raw reply related

* Re: [PATCH 20/21] Introduce struct eeh_stats for EEH
From: Gavin Shan @ 2012-02-29  2:25 UTC (permalink / raw)
  To: linuxppc-dev
In-Reply-To: <1330409051-8941-21-git-send-email-shangw@linux.vnet.ibm.com>

With the original EEH implementation, the EEH global statistics
are maintained by individual global variables. That makes the
code a little hard to maintain.

The patch introduces extra struct eeh_stats for the EEH global
statistics so that it can be maintained in collective fashion.

It's the rework on the corresponding v5 patch. According to
the comments from David Laight, the EEH global statistics have
been changed for a litte bit so that they have fixed-type of
"u64". Also, the format used to print them has been changed to
"%llu" based on David's suggestion.

Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/pseries/eeh.c |   65 ++++++++++++++++++++--------------
 1 files changed, 38 insertions(+), 27 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c
index 9b1fd0c..753ec8a 100644
--- a/arch/powerpc/platforms/pseries/eeh.c
+++ b/arch/powerpc/platforms/pseries/eeh.c
@@ -102,14 +102,22 @@ static DEFINE_RAW_SPINLOCK(confirm_error_lock);
 #define EEH_PCI_REGS_LOG_LEN 4096
 static unsigned char pci_regs_buf[EEH_PCI_REGS_LOG_LEN];
 
-/* System monitoring statistics */
-static unsigned long no_device;
-static unsigned long no_dn;
-static unsigned long no_cfg_addr;
-static unsigned long ignored_check;
-static unsigned long total_mmio_ffs;
-static unsigned long false_positives;
-static unsigned long slot_resets;
+/*
+ * The struct is used to maintain the EEH global statistic
+ * information. Besides, the EEH global statistics will be
+ * exported to user space through procfs
+ */
+struct eeh_stats {
+	u64 no_device;		/* PCI device not found		*/
+	u64 no_dn;		/* OF node not found		*/
+	u64 no_cfg_addr;	/* Config address not found	*/
+	u64 ignored_check;	/* EEH check skipped		*/
+	u64 total_mmio_ffs;	/* Total EEH checks		*/
+	u64 false_positives;	/* Unnecessary EEH checks	*/
+	u64 slot_resets;	/* PE reset			*/
+};
+
+static struct eeh_stats eeh_stats;
 
 #define IS_BRIDGE(class_code) (((class_code)<<16) == PCI_BASE_CLASS_BRIDGE)
 
@@ -392,13 +400,13 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
 	int rc = 0;
 	const char *location;
 
-	total_mmio_ffs++;
+	eeh_stats.total_mmio_ffs++;
 
 	if (!eeh_subsystem_enabled)
 		return 0;
 
 	if (!dn) {
-		no_dn++;
+		eeh_stats.no_dn++;
 		return 0;
 	}
 	dn = eeh_find_device_pe(dn);
@@ -407,14 +415,14 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
 	/* Access to IO BARs might get this far and still not want checking. */
 	if (!(edev->mode & EEH_MODE_SUPPORTED) ||
 	    edev->mode & EEH_MODE_NOCHECK) {
-		ignored_check++;
+		eeh_stats.ignored_check++;
 		pr_debug("EEH: Ignored check (%x) for %s %s\n",
 			edev->mode, eeh_pci_name(dev), dn->full_name);
 		return 0;
 	}
 
 	if (!edev->config_addr && !edev->pe_config_addr) {
-		no_cfg_addr++;
+		eeh_stats.no_cfg_addr++;
 		return 0;
 	}
 
@@ -460,13 +468,13 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
 	    (ret == EEH_STATE_NOT_SUPPORT) ||
 	    (ret & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) ==
 	    (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) {
-		false_positives++;
+		eeh_stats.false_positives++;
 		edev->false_positives ++;
 		rc = 0;
 		goto dn_unlock;
 	}
 
-	slot_resets++;
+	eeh_stats.slot_resets++;
  
 	/* Avoid repeated reports of this failure, including problems
 	 * with other functions on this device, and functions under
@@ -513,7 +521,7 @@ unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned lon
 	addr = eeh_token_to_phys((unsigned long __force) token);
 	dev = pci_addr_cache_get_device(addr);
 	if (!dev) {
-		no_device++;
+		eeh_stats.no_device++;
 		return val;
 	}
 
@@ -1174,21 +1182,24 @@ static int proc_eeh_show(struct seq_file *m, void *v)
 {
 	if (0 == eeh_subsystem_enabled) {
 		seq_printf(m, "EEH Subsystem is globally disabled\n");
-		seq_printf(m, "eeh_total_mmio_ffs=%ld\n", total_mmio_ffs);
+		seq_printf(m, "eeh_total_mmio_ffs=%llu\n", eeh_stats.total_mmio_ffs);
 	} else {
 		seq_printf(m, "EEH Subsystem is enabled\n");
 		seq_printf(m,
-				"no device=%ld\n"
-				"no device node=%ld\n"
-				"no config address=%ld\n"
-				"check not wanted=%ld\n"
-				"eeh_total_mmio_ffs=%ld\n"
-				"eeh_false_positives=%ld\n"
-				"eeh_slot_resets=%ld\n",
-				no_device, no_dn, no_cfg_addr, 
-				ignored_check, total_mmio_ffs, 
-				false_positives,
-				slot_resets);
+				"no device           =%llu\n"
+				"no device node      =%llu\n"
+				"no config address   =%llu\n"
+				"check not wanted    =%llu\n"
+				"eeh_total_mmio_ffs  =%llu\n"
+				"eeh_false_positives =%llu\n"
+				"eeh_slot_resets     =%llu\n",
+				eeh_stats.no_device,
+				eeh_stats.no_dn,
+				eeh_stats.no_cfg_addr,
+				eeh_stats.ignored_check,
+				eeh_stats.total_mmio_ffs,
+				eeh_stats.false_positives,
+				eeh_stats.slot_resets);
 	}
 
 	return 0;
-- 
1.7.5.4

 

^ permalink raw reply related

* RE: [PATCH V3] fsl-sata: add support for interrupt coalsecing feature
From: Liu Qiang-B32616 @ 2012-02-29  2:54 UTC (permalink / raw)
  To: jgarzik@pobox.com
  Cc: linux-ide@vger.kernel.org, linuxppc-dev@lists.ozlabs.org,
	linux-kernel@vger.kernel.org, Li Yang-R58472
In-Reply-To: <CADRPPNTmRvPVymJmW4vagU+2M4DmqeDSEj=4=R_=YDJekOTccQ@mail.gmail.com>

SGkgSmVmZiwNCg0KRG8geW91IHBsYW4gdG8gYXBwbHkgaXQgdG8gdXBzdHJlYW0sIG9yIGFueSBz
dWdnZXN0aW9ucz8gVGhhbmtzLg0KDQo+IC0tLS0tT3JpZ2luYWwgTWVzc2FnZS0tLS0tDQo+IEZy
b206IGxpbnV4LWlkZS1vd25lckB2Z2VyLmtlcm5lbC5vcmcgW21haWx0bzpsaW51eC1pZGUtDQo+
IG93bmVyQHZnZXIua2VybmVsLm9yZ10gT24gQmVoYWxmIE9mIExpIFlhbmcNCj4gU2VudDogV2Vk
bmVzZGF5LCBGZWJydWFyeSAxNSwgMjAxMiAzOjUxIFBNDQo+IFRvOiBMaXUgUWlhbmctQjMyNjE2
DQo+IENjOiBqZ2FyemlrQHBvYm94LmNvbTsgbGludXgtaWRlQHZnZXIua2VybmVsLm9yZzsgbGlu
dXgtDQo+IGtlcm5lbEB2Z2VyLmtlcm5lbC5vcmc7IGxpbnV4cHBjLWRldkBsaXN0cy5vemxhYnMu
b3JnDQo+IFN1YmplY3Q6IFJlOiBbUEFUQ0ggVjNdIGZzbC1zYXRhOiBhZGQgc3VwcG9ydCBmb3Ig
aW50ZXJydXB0IGNvYWxzZWNpbmcNCj4gZmVhdHVyZQ0KPiANCj4gT24gV2VkLCBGZWIgMTUsIDIw
MTIgYXQgMzo0MCBQTSwgUWlhbmcgTGl1IDxxaWFuZy5saXVAZnJlZXNjYWxlLmNvbT4NCj4gd3Jv
dGU6DQo+ID4gQWRkcyBzdXBwb3J0IGZvciBpbnRlcnJ1cHQgY29hbGVzY2luZyBmZWF0dXJlIHRv
IHJlZHVjZSBpbnRlcnJ1cHQNCj4gZXZlbnRzLg0KPiA+IFByb3ZpZGVzIGEgbWVjaGFuaXNtIG9m
IGFkanVzdGluZyBjb2FsZXNjaW5nIGNvdW50IGFuZCB0aW1lb3V0IHRpY2sgYnkNCj4gPiBzeXNm
cyBhdCBydW50aW1lLCBzbyB0aGF0IHRyYWRlb2ZmIG9mIGxhdGVuY3kgYW5kIENQVSBsb2FkIGNh
biBiZSBtYWRlDQo+ID4gZGVwZW5kaW5nIG9uIGRpZmZlcmVudCBhcHBsaWNhdGlvbnMuDQo+ID4N
Cj4gPiBTaWduZWQtb2ZmLWJ5OiBRaWFuZyBMaXUgPHFpYW5nLmxpdUBmcmVlc2NhbGUuY29tPg0K
PiANCj4gQWNrZWQtYnk6IExpIFlhbmcgPGxlb2xpQGZyZWVzY2FsZS5jb20+DQo+IA0KPiAtIExl
bw0KPiAtLQ0KPiBUbyB1bnN1YnNjcmliZSBmcm9tIHRoaXMgbGlzdDogc2VuZCB0aGUgbGluZSAi
dW5zdWJzY3JpYmUgbGludXgtaWRlIiBpbg0KPiB0aGUgYm9keSBvZiBhIG1lc3NhZ2UgdG8gbWFq
b3Jkb21vQHZnZXIua2VybmVsLm9yZyBNb3JlIG1ham9yZG9tbyBpbmZvIGF0DQo+IGh0dHA6Ly92
Z2VyLmtlcm5lbC5vcmcvbWFqb3Jkb21vLWluZm8uaHRtbA0KDQo=

^ permalink raw reply

* Re: [PATCH v5 00/21] EEH reorganization
From: Gavin Shan @ 2012-02-29  3:04 UTC (permalink / raw)
  To: benh; +Cc: linuxppc-dev
In-Reply-To: <1330409051-8941-1-git-send-email-shangw@linux.vnet.ibm.com>

Hi Ben,

Could you pls take a look on this when you have time?

Thanks,
Gavin

> This series of patches is going to reorganize EEH so that it could support
> multiple platforms in future. The requirements were raised from the aspects.
> 
> 	* The original EEH implementation only support pSeries platform, which
> 	  would be regarded as guest system. Platform powernv is coming and EEH
> 	  needs to be supported on powernv as well.
> 	* Different platforms might be running based on variable firmware.Further
> 	  more, the firmware would supply different EEH interfaces to kernel.
> 	  Therefore, we have to do necessary abstraction on current EEH implementation.
> 
> In order to accomodate the requirements, the series of patches have reorganized
> current EEH implementation.
> 
> 	* The original implementation looks not clean enough. Necessary cleanup
> 	  will be done in some of the patches.
> 	* struct eeh_ops has been introduced so that EEH core components and platform
> 	  dependent implementation could be split up. That make it possible for EEH
> 	  to be supported on multiple platforms.
> 	* struct eeh_dev has been introduced to replace struct pci_dn so that EEH module
> 	  works independently as much as possible.
> 	* EEH global statistics will be maintained in a collective fashion.
> 
> v1 -> v2:
> 
> 	* If possible, to add "eeh_" prefix for function names.
> 	* The format of leading function comments won't be changed in order not to
> 	  break kernel document automatic generation (e.g. by "make pdfdocs").
> 	* The name of local variables won't be changed if there're no explicit reasons.
> 	* Represent the PE's state in bitmap fasion.
> 	* Some function names have been adjusted so that they look shorter and
> 	  meaningful.
> 	* Platform operation name has been changed to "pseries".
> 	* Merge those patches for cleanup if possible.
> 	* The line length is kept as appropriately short if possible.
> 	* Fixup on alignment & spacing issues.
> 
> v2 -> v3:
> 	* Split cleanup patch into 2: one for comment cleanup and another one for
> 	  renaming function names.
> 	* Try to use pr_warning/pr_info/pr_debug instead of printk() function call.
> 	* Function names are adjusted a little bit so that they looks more meaningful
> 	  according to comments from Michael/Ben.
> 	* Useful comment has been kept according to Michael's comments.
> 	* struct eeh_ops::set_eeh has been changed to eeh_ops::set_option.
> 	* struct eeh_ops::name has been changed to "char *".
> 	* Remove file name from the source file.
> 	* Copyright (C) format has been changed since "(C)" isn't encouraged to use.
> 	* The header files included in the source file have been sorted alphabetically.
> 	* eeh_platform_init() has been replaced by eeh_pseries_init() to avoid duplicate
> 	  functions when kernel supports multiple platforms.
> 	* "F/W" has been changed to "Firmware".
> 	* The maximal wait time to retrieve PE's state has been covered by macro.
> 	* It also include changes according to the minor comments from Michael.
> 
> v3 -> v4:
> 	* Fix some typo included in the commit messages.
> 	* Reduce code nesting according to Ram's suggestions.
> 	* Addtinal pr_warning on failure of configuring bridges.
> 
> v4 -> v5:
> 	* OF node and PCI device are tracing the corresponding eeh device.
> 	  That has been changed to "struct eeh_dev *" instead of the original
> 	  "void *".
> 	* The conversion between OF node, PCI device, eeh device is changed
> 	  to inline functions instead of the original macros.
> 	* The "struct eeh_stats" has been moved from eeh.h to eeh.c. Besides,
> 	  the individual members of the struct have been changed to fixed-type
> 	  "unsigned int". 
> 
> 
> The series of patches (v5) has been verified on Firebird-L machine. In order to carry out
> the test, you have to install IBM Power Tools from IBM internal yum source. Following
> command is used to force EEH check on ethernet interface, which could be recovered eventually
> by EEH and device driver successfully. You could keep pinging to the blade before issuing
> the following command to force EEH. You should see the network interface can't be reached for
> a moment and everything will be recovered couple of seconds after the forced EEH error. At the
> same time, you should see EEH error log out of system console. 
> 
> 	* errinjct eeh -v -f 0 -p U78AE.001.WZS00M9-P1-C18-L1-T2 -a 0x0 -m 0x0
> 
> -----
> 
> arch/powerpc/include/asm/device.h            |    3 +
> arch/powerpc/include/asm/eeh.h               |  134 +++-
> arch/powerpc/include/asm/eeh_event.h         |   33 +-
> arch/powerpc/include/asm/ppc-pci.h           |   89 +--
> arch/powerpc/kernel/of_platform.c            |    3 +
> arch/powerpc/kernel/rtas_pci.c               |    3 +
> arch/powerpc/platforms/pseries/Makefile      |    3 +-
> arch/powerpc/platforms/pseries/eeh.c         | 1044 ++++++++++++--------------
> arch/powerpc/platforms/pseries/eeh_cache.c   |   44 +-
> arch/powerpc/platforms/pseries/eeh_dev.c     |  102 +++
> arch/powerpc/platforms/pseries/eeh_driver.c  |  213 +++---
> arch/powerpc/platforms/pseries/eeh_event.c   |   55 +-
> arch/powerpc/platforms/pseries/eeh_pseries.c |  565 ++++++++++++++
> arch/powerpc/platforms/pseries/eeh_sysfs.c   |   25 +-
> arch/powerpc/platforms/pseries/msi.c         |    2 +-
> arch/powerpc/platforms/pseries/pci_dlpar.c   |    3 +
> arch/powerpc/platforms/pseries/setup.c       |    7 +-
> include/linux/of.h                           |   10 +
> include/linux/pci.h                          |    7 +
> 19 files changed, 1477 insertions(+), 868 deletions(-)
> 
> Thanks,
> Gavin

^ permalink raw reply

* [PATCH 2/2] powerpc/44x: Add PCI MSI node for APM821xx SoC and Bluestone board in DTS
From: Mai La @ 2012-02-29  8:47 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Josh Boyer, Matt Porter,
	Tirumala R Marri, Grant Likely, Michael Neuling, Kumar Gala,
	Anton Blanchard, linuxppc-dev, linux-kernel
  Cc: open-source-review, Mai La

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain, Size: 1552 bytes --]


Signed-off-by: Mai La <mla@apm.com>
---
 arch/powerpc/boot/dts/bluestone.dts |   24 ++++++++++++++++++++++++
 1 files changed, 24 insertions(+), 0 deletions(-)

diff --git a/arch/powerpc/boot/dts/bluestone.dts b/arch/powerpc/boot/dts/bluestone.dts
index 2a56a0d..8ea6325 100644
--- a/arch/powerpc/boot/dts/bluestone.dts
+++ b/arch/powerpc/boot/dts/bluestone.dts
@@ -250,5 +250,29 @@
 			};
 		};
 
+		MSI: ppc4xx-msi@C10000000 {
+			compatible = "amcc,ppc4xx-msi", "ppc4xx-msi";
+			reg = < 0xC 0x10000000 0x100
+				0xC 0x10000000 0x100>;
+			sdr-base = <0x36C>;
+			msi-data = <0x00004440>;
+			msi-mask = <0x0000ffe0>;
+			interrupts =<0 1 2 3 4 5 6 7>;
+			interrupt-parent = <&MSI>;
+			#interrupt-cells = <1>;
+			#address-cells = <0>;
+			#size-cells = <0>;
+			msi-available-ranges = <0x0 0x100>;
+			interrupt-map = <
+				0 &UIC3 0x18 1
+				1 &UIC3 0x19 1
+				2 &UIC3 0x1A 1
+				3 &UIC3 0x1B 1
+				4 &UIC3 0x1C 1
+				5 &UIC3 0x1D 1
+				6 &UIC3 0x1E 1
+				7 &UIC3 0x1F 1
+			>;
+		};
 	};
 };
-- 
1.7.3.4

CONFIDENTIALITY NOTICE: This e-mail message, including any attachments, 
is for the sole use of the intended recipient(s) and contains information 
that is confidential and proprietary to AppliedMicro Corporation or its subsidiaries. 
It is to be used solely for the purpose of furthering the parties' business relationship. 
All unauthorized review, use, disclosure or distribution is prohibited. 
If you are not the intended recipient, please contact the sender by reply e-mail 
and destroy all copies of the original message.

^ permalink raw reply related

* [PATCH 1/2] powerpc/44x: Fix PCI MSI support for APM821xx SoC and Bluestone board
From: Mai La @ 2012-02-29  8:47 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Josh Boyer, Matt Porter,
	Tirumala R Marri, Grant Likely, Michael Neuling, Kumar Gala,
	Anton Blanchard, linuxppc-dev, linux-kernel
  Cc: open-source-review, Mai La

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain, Size: 4148 bytes --]

This patch consists of:
- Enable PCI MSI as default for Bluestone board 
- Define number of MSI interrupt for Maui APM821xx
- Fix returning ENODEV as finding MSI node
- Fix MSI physical high and low address
- Keep MSI data logically

Signed-off-by: Mai La <mla@apm.com>
---
 arch/powerpc/platforms/44x/Kconfig |    2 ++
 arch/powerpc/sysdev/ppc4xx_msi.c   |   28 ++++++++++++++++++----------
 2 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/platforms/44x/Kconfig b/arch/powerpc/platforms/44x/Kconfig
index fcf6bf2..9f04ce3 100644
--- a/arch/powerpc/platforms/44x/Kconfig
+++ b/arch/powerpc/platforms/44x/Kconfig
@@ -23,6 +23,8 @@ config BLUESTONE
 	default n
 	select PPC44x_SIMPLE
 	select APM821xx
+	select PCI_MSI
+	select PPC4xx_MSI
 	select IBM_EMAC_RGMII
 	help
 	  This option enables support for the APM APM821xx Evaluation board.
diff --git a/arch/powerpc/sysdev/ppc4xx_msi.c b/arch/powerpc/sysdev/ppc4xx_msi.c
index 1c2d7af..6103908 100644
--- a/arch/powerpc/sysdev/ppc4xx_msi.c
+++ b/arch/powerpc/sysdev/ppc4xx_msi.c
@@ -31,7 +31,7 @@
 #include <asm/prom.h>
 #include <asm/hw_irq.h>
 #include <asm/ppc-pci.h>
-#include <boot/dcr.h>
+#include <asm/dcr.h>
 #include <asm/dcr-regs.h>
 #include <asm/msi_bitmap.h>
 
@@ -43,7 +43,12 @@
 #define PEIH_FLUSH0	0x30
 #define PEIH_FLUSH1	0x38
 #define PEIH_CNTRST	0x48
+
+#ifdef CONFIG_APM821xx
+#define NR_MSI_IRQS	8
+#else
 #define NR_MSI_IRQS	4
+#endif
 
 struct ppc4xx_msi {
 	u32 msi_addr_lo;
@@ -150,12 +155,11 @@ static int ppc4xx_setup_pcieh_hw(struct platform_device *dev,
 	if (!sdr_addr)
 		return -1;
 
-	SDR0_WRITE(sdr_addr, (u64)res.start >> 32);	 /*HIGH addr */
-	SDR0_WRITE(sdr_addr + 1, res.start & 0xFFFFFFFF); /* Low addr */
-
+	mtdcri(SDR0, *sdr_addr, res.start >> 32);	/*HIGH addr */
+	mtdcri(SDR0, *sdr_addr + 1, res.start & 0xFFFFFFFF);/* Low addr */
 
 	msi->msi_dev = of_find_node_by_name(NULL, "ppc4xx-msi");
-	if (msi->msi_dev)
+	if (!msi->msi_dev)
 		return -ENODEV;
 
 	msi->msi_regs = of_iomap(msi->msi_dev, 0);
@@ -167,9 +171,12 @@ static int ppc4xx_setup_pcieh_hw(struct platform_device *dev,
 		(u32) (msi->msi_regs + PEIH_TERMADH), (u32) (msi->msi_regs));
 
 	msi_virt = dma_alloc_coherent(&dev->dev, 64, &msi_phys, GFP_KERNEL);
-	msi->msi_addr_hi = 0x0;
-	msi->msi_addr_lo = (u32) msi_phys;
-	dev_dbg(&dev->dev, "PCIE-MSI: msi address 0x%x\n", msi->msi_addr_lo);
+	if (!msi_virt)
+		return -ENOMEM;
+	msi->msi_addr_hi = (u32)(msi_phys >> 32);
+	msi->msi_addr_lo = (u32)(msi_phys & 0xffffffff);
+	dev_dbg(&dev->dev, "PCIE-MSI: msi address high 0x%x, low 0x%x\n",
+		msi->msi_addr_hi, msi->msi_addr_lo);
 
 	/* Progam the Interrupt handler Termination addr registers */
 	out_be32(msi->msi_regs + PEIH_TERMADH, msi->msi_addr_hi);
@@ -185,6 +192,8 @@ static int ppc4xx_setup_pcieh_hw(struct platform_device *dev,
 	out_be32(msi->msi_regs + PEIH_MSIED, *msi_data);
 	out_be32(msi->msi_regs + PEIH_MSIMK, *msi_mask);
 
+	dma_free_coherent(&dev->dev, 64, msi_virt, msi_phys);
+
 	return 0;
 }
 
@@ -215,8 +224,6 @@ static int __devinit ppc4xx_msi_probe(struct platform_device *dev)
 	struct resource res;
 	int err = 0;
 
-	msi = &ppc4xx_msi;/*keep the msi data for further use*/
-
 	dev_dbg(&dev->dev, "PCIE-MSI: Setting up MSI support...\n");
 
 	msi = kzalloc(sizeof(struct ppc4xx_msi), GFP_KERNEL);
@@ -242,6 +249,7 @@ static int __devinit ppc4xx_msi_probe(struct platform_device *dev)
 		dev_err(&dev->dev, "Error allocating MSI bitmap\n");
 		goto error_out;
 	}
+	ppc4xx_msi = *msi;
 
 	ppc_md.setup_msi_irqs = ppc4xx_setup_msi_irqs;
 	ppc_md.teardown_msi_irqs = ppc4xx_teardown_msi_irqs;
-- 
1.7.3.4

CONFIDENTIALITY NOTICE: This e-mail message, including any attachments, 
is for the sole use of the intended recipient(s) and contains information 
that is confidential and proprietary to AppliedMicro Corporation or its subsidiaries. 
It is to be used solely for the purpose of furthering the parties' business relationship. 
All unauthorized review, use, disclosure or distribution is prohibited. 
If you are not the intended recipient, please contact the sender by reply e-mail 
and destroy all copies of the original message.

^ permalink raw reply related

* Re: [PATCH] sparsemem/bootmem: catch greater than section size allocations
From: Johannes Weiner @ 2012-02-29  9:17 UTC (permalink / raw)
  To: Nishanth Aravamudan
  Cc: Anton Blanchard, Dave Hansen, linux-mm, Paul Mackerras,
	Nishanth Aravamudan, Andrew Morton, Robert Jennings, linuxppc-dev
In-Reply-To: <20120228201151.GC5136@linux.vnet.ibm.com>

On Tue, Feb 28, 2012 at 12:11:51PM -0800, Nishanth Aravamudan wrote:
> On 28.02.2012 [14:53:26 +0100], Johannes Weiner wrote:
> > On Fri, Feb 24, 2012 at 11:33:58AM -0800, Nishanth Aravamudan wrote:
> > > While testing AMS (Active Memory Sharing) / CMO (Cooperative Memory
> > > Overcommit) on powerpc, we tripped the following:
> > > 
> > > kernel BUG at mm/bootmem.c:483!
> > > cpu 0x0: Vector: 700 (Program Check) at [c000000000c03940]
> > >     pc: c000000000a62bd8: .alloc_bootmem_core+0x90/0x39c
> > >     lr: c000000000a64bcc: .sparse_early_usemaps_alloc_node+0x84/0x29c
> > >     sp: c000000000c03bc0
> > >    msr: 8000000000021032
> > >   current = 0xc000000000b0cce0
> > >   paca    = 0xc000000001d80000
> > >     pid   = 0, comm = swapper
> > > kernel BUG at mm/bootmem.c:483!
> > > enter ? for help
> > > [c000000000c03c80] c000000000a64bcc
> > > .sparse_early_usemaps_alloc_node+0x84/0x29c
> > > [c000000000c03d50] c000000000a64f10 .sparse_init+0x12c/0x28c
> > > [c000000000c03e20] c000000000a474f4 .setup_arch+0x20c/0x294
> > > [c000000000c03ee0] c000000000a4079c .start_kernel+0xb4/0x460
> > > [c000000000c03f90] c000000000009670 .start_here_common+0x1c/0x2c
> > > 
> > > This is
> > > 
> > >         BUG_ON(limit && goal + size > limit);
> > > 
> > > and after some debugging, it seems that
> > > 
> > > 	goal = 0x7ffff000000
> > > 	limit = 0x80000000000
> > > 
> > > and sparse_early_usemaps_alloc_node ->
> > > sparse_early_usemaps_alloc_pgdat_section -> alloc_bootmem_section calls
> > > 
> > > 	return alloc_bootmem_section(usemap_size() * count, section_nr);
> > > 
> > > This is on a system with 8TB available via the AMS pool, and as a quirk
> > > of AMS in firmware, all of that memory shows up in node 0. So, we end up
> > > with an allocation that will fail the goal/limit constraints. In theory,
> > > we could "fall-back" to alloc_bootmem_node() in
> > > sparse_early_usemaps_alloc_node(), but since we actually have HOTREMOVE
> > > defined, we'll BUG_ON() instead. A simple solution appears to be to
> > > disable the limit check if the size of the allocation in
> > > alloc_bootmem_secition exceeds the section size.
> > 
> > It makes sense to allow the usemaps to spill over to subsequent
> > sections instead of panicking, so FWIW:
> > 
> > Acked-by: Johannes Weiner <hannes@cmpxchg.org>
> > 
> > That being said, it would be good if check_usemap_section_nr() printed
> > the cross-dependencies between pgdats and sections when the usemaps of
> > a node spilled over to other sections than the ones holding the pgdat.
> > 
> > How about this?
> > 
> > ---
> > From: Johannes Weiner <hannes@cmpxchg.org>
> > Subject: sparsemem/bootmem: catch greater than section size allocations fix
> > 
> > If alloc_bootmem_section() no longer guarantees section-locality, we
> > need check_usemap_section_nr() to print possible cross-dependencies
> > between node descriptors and the usemaps allocated through it.
> > 
> > Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
> > ---
> > 
> > diff --git a/mm/sparse.c b/mm/sparse.c
> > index 61d7cde..9e032dc 100644
> > --- a/mm/sparse.c
> > +++ b/mm/sparse.c
> > @@ -359,6 +359,7 @@ static void __init sparse_early_usemaps_alloc_node(unsigned long**usemap_map,
> >  				continue;
> >  			usemap_map[pnum] = usemap;
> >  			usemap += size;
> > +			check_usemap_section_nr(nodeid, usemap_map[pnum]);
> >  		}
> >  		return;
> >  	}
> 
> This makes sense to me -- ok if I fold it into the re-worked patch
> (based upon Mel's comments)?

Sure thing!

> > Furthermore, I wonder if we can remove the sparse-specific stuff from
> > bootmem.c as well, as now even more so than before, calculating the
> > desired area is really none of bootmem's business.
> > 
> > Would something like this be okay?
> > 
> > ---
> > From: Johannes Weiner <hannes@cmpxchg.org>
> > Subject: [patch] mm: remove sparsemem allocation details from the bootmem allocator
> > 
> > alloc_bootmem_section() derives allocation area constraints from the
> > specified sparsemem section.  This is a bit specific for a generic
> > memory allocator like bootmem, though, so move it over to sparsemem.
> > 
> > Since __alloc_bootmem_node() already retries failed allocations with
> > relaxed area constraints, the fallback code in sparsemem.c can be
> > removed and the code becomes a bit more compact overall.
> > 
> > Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
> 
> I've not tested it, but the intention seems sensible. I think it should
> remain a separate change.

Yes, I agree.  I'll resend it in a bit as stand-alone patch.

^ permalink raw reply

* Re: [PATCH 20/21] Introduce struct eeh_stats for EEH
From: Michael Ellerman @ 2012-02-29 12:56 UTC (permalink / raw)
  To: Gavin Shan; +Cc: linuxppc-dev
In-Reply-To: <1330409051-8941-21-git-send-email-shangw@linux.vnet.ibm.com>

[-- Attachment #1: Type: text/plain, Size: 1405 bytes --]

On Tue, 2012-02-28 at 14:04 +0800, Gavin Shan wrote:
> With the original EEH implementation, the EEH global statistics
> are maintained by individual global variables. That makes the
> code a little hard to maintain.

Hi Gavin,

> @@ -1174,21 +1182,24 @@ static int proc_eeh_show(struct seq_file *m, void *v)
>  {
>  	if (0 == eeh_subsystem_enabled) {
>  		seq_printf(m, "EEH Subsystem is globally disabled\n");
> -		seq_printf(m, "eeh_total_mmio_ffs=%ld\n", total_mmio_ffs);
> +		seq_printf(m, "eeh_total_mmio_ffs=%d\n", eeh_stats.total_mmio_ffs);
>  	} else {
>  		seq_printf(m, "EEH Subsystem is enabled\n");
>  		seq_printf(m,
> -				"no device=%ld\n"
> -				"no device node=%ld\n"
> -				"no config address=%ld\n"
> -				"check not wanted=%ld\n"
> -				"eeh_total_mmio_ffs=%ld\n"
> -				"eeh_false_positives=%ld\n"
> -				"eeh_slot_resets=%ld\n",
> -				no_device, no_dn, no_cfg_addr, 
> -				ignored_check, total_mmio_ffs, 
> -				false_positives,
> -				slot_resets);
> +				"no device           =%d\n"
> +				"no device node      =%d\n"
> +				"no config address   =%d\n"
> +				"check not wanted    =%d\n"
> +				"eeh_total_mmio_ffs  =%d\n"
> +				"eeh_false_positives =%d\n"
> +				"eeh_slot_resets     =%d\n",

There *might* be tools out there that parse this output, so I'd say
don't change it unless you have to - and I don't think you have to?

cheers

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply

* Re: [PATCH 2/2] powerpc/44x: Add more changes for APM821XX EMAC driver
From: Josh Boyer @ 2012-02-29 13:43 UTC (permalink / raw)
  To: Duc Dang; +Cc: netdev, Paul Mackerras, linuxppc-dev, linux-kernel
In-Reply-To: <1329466058-15969-1-git-send-email-dhdang@apm.com>

On Fri, Feb 17, 2012 at 3:07 AM, Duc Dang <dhdang@apm.com> wrote:
> This patch includes:
>
> =A0Configure EMAC PHY clock source (clock from PHY or internal clock).
>
> =A0Do not advertise PHY half duplex capability as APM821XX EMAC does not
> support half duplex mode.
>
> =A0Add changes to support configuring jumbo frame for APM821XX EMAC.
>
> Signed-off-by: Duc Dang <dhdang@apm.com>

This should have been sent to netdev.  CC'ing them now.

Ben and David, I can take this change through the 4xx tree if it looks OK t=
o
both of you.  The pre-requisite DTS patch will go through my tree, so it mi=
ght
make sense to keep them together.

josh

> ---
>  drivers/net/ethernet/ibm/emac/core.c |   26 +++++++++++++++++++++++++-
>  drivers/net/ethernet/ibm/emac/core.h |   13 +++++++++++--
>  drivers/net/ethernet/ibm/emac/emac.h |    5 ++++-
>  3 files changed, 40 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/=
ibm/emac/core.c
> index ed79b2d..de620f1 100644
> --- a/drivers/net/ethernet/ibm/emac/core.c
> +++ b/drivers/net/ethernet/ibm/emac/core.c
> @@ -434,6 +434,11 @@ static inline u32 emac_iff2rmr(struct net_device *nd=
ev)
>  	else if (!netdev_mc_empty(ndev))
>  		r |=3D EMAC_RMR_MAE;
>
> +	if (emac_has_feature(dev, EMAC_APM821XX_REQ_JUMBO_FRAME_SIZE)) {
> +		r &=3D ~EMAC4_RMR_MJS_MASK;
> +		r |=3D EMAC4_RMR_MJS(ndev->mtu);
> +	}
> +
>  	return r;
>  }
>
> @@ -965,6 +970,7 @@ static int emac_resize_rx_ring(struct emac_instance *=
dev, int new_mtu)
>  	int rx_sync_size =3D emac_rx_sync_size(new_mtu);
>  	int rx_skb_size =3D emac_rx_skb_size(new_mtu);
>  	int i, ret =3D 0;
> +	int mr1_jumbo_bit_change =3D 0;
>
>  	mutex_lock(&dev->link_lock);
>  	emac_netif_stop(dev);
> @@ -1013,7 +1019,14 @@ static int emac_resize_rx_ring(struct emac_instanc=
e *dev, int new_mtu)
>  	}
>   skip:
>  	/* Check if we need to change "Jumbo" bit in MR1 */
> -	if ((new_mtu > ETH_DATA_LEN) ^ (dev->ndev->mtu > ETH_DATA_LEN)) {
> +	if (emac_has_feature(dev, EMAC_APM821XX_REQ_JUMBO_FRAME_SIZE))
> +		mr1_jumbo_bit_change =3D (new_mtu > ETH_DATA_LEN) ||
> +				(dev->ndev->mtu > ETH_DATA_LEN);
> +	else
> +		mr1_jumbo_bit_change =3D (new_mtu > ETH_DATA_LEN) ^
> +				(dev->ndev->mtu > ETH_DATA_LEN);
> +
> +	if (mr1_jumbo_bit_change) {
>  		/* This is to prevent starting RX channel in emac_rx_enable() */
>  		set_bit(MAL_COMMAC_RX_STOPPED, &dev->commac.flags);
>
> @@ -2471,6 +2484,7 @@ static int __devinit emac_init_phy(struct emac_inst=
ance *dev)
>
>  	/* Disable any PHY features not supported by the platform */
>  	dev->phy.def->features &=3D ~dev->phy_feat_exc;
> +	dev->phy.features &=3D ~dev->phy_feat_exc;
>
>  	/* Setup initial link parameters */
>  	if (dev->phy.features & SUPPORTED_Autoneg) {
> @@ -2568,6 +2582,10 @@ static int __devinit emac_init_config(struct emac_=
instance *dev)
>  		if (of_device_is_compatible(np, "ibm,emac-405ex") ||
>  		    of_device_is_compatible(np, "ibm,emac-405exr"))
>  			dev->features |=3D EMAC_FTR_440EP_PHY_CLK_FIX;
> +		if (of_device_is_compatible(np, "ibm,emac-apm821xx"))
> +			dev->features |=3D (EMAC_APM821XX_REQ_JUMBO_FRAME_SIZE
> +					| EMAC_FTR_APM821XX_NO_HALF_DUPLEX
> +					| EMAC_FTR_460EX_PHY_CLK_FIX);
>  	} else if (of_device_is_compatible(np, "ibm,emac4")) {
>  		dev->features |=3D EMAC_FTR_EMAC4;
>  		if (of_device_is_compatible(np, "ibm,emac-440gx"))
> @@ -2818,6 +2836,12 @@ static int __devinit emac_probe(struct platform_de=
vice *ofdev)
>  	dev->stop_timeout =3D STOP_TIMEOUT_100;
>  	INIT_DELAYED_WORK(&dev->link_work, emac_link_timer);
>
> +	/* Some SoCs like APM821xx does not support Half Duplex mode. */
> +	if (emac_has_feature(dev, EMAC_FTR_APM821XX_NO_HALF_DUPLEX))
> +		dev->phy_feat_exc =3D (SUPPORTED_1000baseT_Half
> +					| SUPPORTED_100baseT_Half
> +					| SUPPORTED_10baseT_Half);
> +
>  	/* Find PHY if any */
>  	err =3D emac_init_phy(dev);
>  	if (err !=3D 0)
> diff --git a/drivers/net/ethernet/ibm/emac/core.h b/drivers/net/ethernet/=
ibm/emac/core.h
> index fa3ec57..9dea85a 100644
> --- a/drivers/net/ethernet/ibm/emac/core.h
> +++ b/drivers/net/ethernet/ibm/emac/core.h
> @@ -325,7 +325,14 @@ struct emac_instance {
>   * Set if we need phy clock workaround for 460ex or 460gt
>   */
>  #define EMAC_FTR_460EX_PHY_CLK_FIX	0x00000400
> -
> +/*
> + * APM821xx requires Jumbo frame size set explicitly
> + */
> +#define EMAC_APM821XX_REQ_JUMBO_FRAME_SIZE	0x00000800
> +/*
> + * APM821xx does not support Half Duplex mode
> + */
> +#define EMAC_FTR_APM821XX_NO_HALF_DUPLEX	0x00001000
>
>  /* Right now, we don't quite handle the always/possible masks on the
>   * most optimal way as we don't have a way to say something like
> @@ -353,7 +360,9 @@ enum {
>  	    EMAC_FTR_NO_FLOW_CONTROL_40x |
>  #endif
>  	EMAC_FTR_460EX_PHY_CLK_FIX |
> -	EMAC_FTR_440EP_PHY_CLK_FIX,
> +	EMAC_FTR_440EP_PHY_CLK_FIX |
> +	EMAC_APM821XX_REQ_JUMBO_FRAME_SIZE |
> +	EMAC_FTR_APM821XX_NO_HALF_DUPLEX,
>  };
>
>  static inline int emac_has_feature(struct emac_instance *dev,
> diff --git a/drivers/net/ethernet/ibm/emac/emac.h b/drivers/net/ethernet/=
ibm/emac/emac.h
> index 1568278..36bcd69 100644
> --- a/drivers/net/ethernet/ibm/emac/emac.h
> +++ b/drivers/net/ethernet/ibm/emac/emac.h
> @@ -212,7 +212,10 @@ struct emac_regs {
>  #define EMAC4_RMR_RFAF_64_1024		0x00000006
>  #define EMAC4_RMR_RFAF_128_2048		0x00000007
>  #define EMAC4_RMR_BASE			EMAC4_RMR_RFAF_128_2048
> -
> +#if defined(CONFIG_APM821xx)
> +#define EMAC4_RMR_MJS_MASK              0x0001fff8
> +#define EMAC4_RMR_MJS(s)                (((s) << 3) & EMAC4_RMR_MJS_MASK=
)
> +#endif
>  /* EMACx_ISR & EMACx_ISER */
>  #define EMAC4_ISR_TXPE			0x20000000
>  #define EMAC4_ISR_RXPE			0x10000000
> --
> 1.7.5.4
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" i=
n
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

^ permalink raw reply

* Re: [PATCH 1/3] powerpc/44x: The bug fixed support for APM821xx SoC and Bluestone board
From: Josh Boyer @ 2012-02-29 13:54 UTC (permalink / raw)
  To: Vinh Nguyen Huu Tuong; +Cc: Paul Mackerras, linuxppc-dev, linux-kernel
In-Reply-To: <1324385014-30725-1-git-send-email-vhtnguyen@apm.com>

On Tue, Dec 20, 2011 at 7:43 AM, Vinh Nguyen Huu Tuong
<vhtnguyen@apm.com> wrote:
> This patch consists of:
> - Fix the pvr mask for checking pvr in cputable.c
> - Fix the cpu name as consistent with cpu name is describled in dts file
>
> Signed-off-by: Vinh Nguyen Huu Tuong <vhtnguyen@apm.com>
> ---

I was waiting to see if you would submit a new series with patch 3/3 fixed for
the comments I made.  Seems you haven't yet or I missed it entirely.  For now,
I'll take this patch as it's stand-alone.  The DTS and PCI driver patches will
need to be submitted together again.

josh

^ permalink raw reply

* Re: [PATCH 1/2] powerpc/44x: Fix PCI MSI support for APM821xx SoC and Bluestone board
From: Josh Boyer @ 2012-02-29 14:18 UTC (permalink / raw)
  To: Mai La
  Cc: open-source-review, Tirumala R Marri, linux-kernel,
	Paul Mackerras, linuxppc-dev
In-Reply-To: <1330505221-3678-1-git-send-email-mla@apm.com>

On Wed, Feb 29, 2012 at 3:47 AM, Mai La <mla@apm.com> wrote:
> This patch consists of:
> - Enable PCI MSI as default for Bluestone board
> - Define number of MSI interrupt for Maui APM821xx

What is Maui?  Is that the same thing as Bluestone?

> - Fix returning ENODEV as finding MSI node
> - Fix MSI physical high and low address
> - Keep MSI data logically
>
> Signed-off-by: Mai La <mla@apm.com>

Wow.  So there are a lot of bugfixes here.  I'm surprised this ever worked =
at
all with some of the things you're fixing.  Nice to see.

> ---
> =A0arch/powerpc/platforms/44x/Kconfig | =A0 =A02 ++
> =A0arch/powerpc/sysdev/ppc4xx_msi.c =A0 | =A0 28 ++++++++++++++++++------=
----
> =A02 files changed, 20 insertions(+), 10 deletions(-)
>
> diff --git a/arch/powerpc/platforms/44x/Kconfig b/arch/powerpc/platforms/=
44x/Kconfig
> index fcf6bf2..9f04ce3 100644
> --- a/arch/powerpc/platforms/44x/Kconfig
> +++ b/arch/powerpc/platforms/44x/Kconfig
> @@ -23,6 +23,8 @@ config BLUESTONE
> =A0 =A0 =A0 =A0default n
> =A0 =A0 =A0 =A0select PPC44x_SIMPLE
> =A0 =A0 =A0 =A0select APM821xx
> + =A0 =A0 =A0 select PCI_MSI
> + =A0 =A0 =A0 select PPC4xx_MSI
> =A0 =A0 =A0 =A0select IBM_EMAC_RGMII
> =A0 =A0 =A0 =A0help
> =A0 =A0 =A0 =A0 =A0This option enables support for the APM APM821xx Evalu=
ation board.
> diff --git a/arch/powerpc/sysdev/ppc4xx_msi.c b/arch/powerpc/sysdev/ppc4x=
x_msi.c
> index 1c2d7af..6103908 100644
> --- a/arch/powerpc/sysdev/ppc4xx_msi.c
> +++ b/arch/powerpc/sysdev/ppc4xx_msi.c
> @@ -31,7 +31,7 @@
> =A0#include <asm/prom.h>
> =A0#include <asm/hw_irq.h>
> =A0#include <asm/ppc-pci.h>
> -#include <boot/dcr.h>
> +#include <asm/dcr.h>
> =A0#include <asm/dcr-regs.h>
> =A0#include <asm/msi_bitmap.h>
>
> @@ -43,7 +43,12 @@
> =A0#define PEIH_FLUSH0 =A0 =A00x30
> =A0#define PEIH_FLUSH1 =A0 =A00x38
> =A0#define PEIH_CNTRST =A0 =A00x48
> +
> +#ifdef CONFIG_APM821xx
> +#define NR_MSI_IRQS =A0 =A08
> +#else
> =A0#define NR_MSI_IRQS =A0 =A04
> +#endif

Hm.  Do you think this is going to change quite a bit depending on which So=
C
is being used?  If so, it might be better to introduce a Kconfig variable
that just defines this instead.  Something like:

	config 4xx_MSI_IRQS
	   int "Number of MSI IRQs"
	   depends on 4xx
	   default "8" if APM821xx
	   default "4" if !APM821xx

If there aren't going to be a wide variety of numbers, then the simple ifde=
f
you have is probably sufficient.

> =A0struct ppc4xx_msi {
> =A0 =A0 =A0 =A0u32 msi_addr_lo;
> @@ -150,12 +155,11 @@ static int ppc4xx_setup_pcieh_hw(struct platform_de=
vice *dev,
> =A0 =A0 =A0 =A0if (!sdr_addr)
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0return -1;
>
> - =A0 =A0 =A0 SDR0_WRITE(sdr_addr, (u64)res.start >> 32); =A0 =A0 =A0/*HI=
GH addr */
> - =A0 =A0 =A0 SDR0_WRITE(sdr_addr + 1, res.start & 0xFFFFFFFF); /* Low ad=
dr */
> -
> + =A0 =A0 =A0 mtdcri(SDR0, *sdr_addr, res.start >> 32); =A0 =A0 =A0 /*HIG=
H addr */
> + =A0 =A0 =A0 mtdcri(SDR0, *sdr_addr + 1, res.start & 0xFFFFFFFF);/* Low =
addr */

Don't you still want the (u64) cast on res.start?

> CONFIDENTIALITY NOTICE: This e-mail message, including any attachments,
> is for the sole use of the intended recipient(s) and contains information
> that is confidential and proprietary to AppliedMicro Corporation or its s=
ubsidiaries.
> It is to be used solely for the purpose of furthering the parties' busine=
ss relationship.
> All unauthorized review, use, disclosure or distribution is prohibited.
> If you are not the intended recipient, please contact the sender by reply=
 e-mail
> and destroy all copies of the original message.

Is there a way you can drop this?  Others from APM seem to have figured out
how to do that, so hopefully it won't be a big problem.

josh

^ permalink raw reply

* Re: [PATCH] KVM: PPC: Don't sync timebase when inside KVM
From: Scott Wood @ 2012-02-29 17:50 UTC (permalink / raw)
  To: Alexander Graf; +Cc: linuxppc-dev, kvm, kvm-ppc
In-Reply-To: <1330481769-24390-1-git-send-email-agraf@suse.de>

On 02/28/2012 08:16 PM, Alexander Graf wrote:
> When we know that we're running inside of a KVM guest, we don't have to
> worry about synchronizing timebases between different CPUs, since the
> host already took care of that.
> 
> This fixes CPU overcommit scenarios where vCPUs could hang forever trying
> to sync each other while not being scheduled.
> 
> Reported-by: Stuart Yoder <B08248@freescale.com>
> Signed-off-by: Alexander Graf <agraf@suse.de>

This should apply to any hypervisor, not just KVM.  On book3e, Power ISA
says timebase is read-only on virtualized implementations.  My
understanding is that book3s is paravirt-only (guest state is not
considered an implementation of the Power ISA), and it says "Writing the
Time Base is privileged, and can be done only in hypervisor state".

Which platforms are you seeing this on?  If it's on Freescale chips,
U-Boot should be doing the sync and Linux should never do it, even in
the absence of a hypervisor.

-Scott

^ permalink raw reply

* [PATCH v2] bootmem/sparsemem: remove limit constraint in alloc_bootmem_section
From: Nishanth Aravamudan @ 2012-02-29 18:12 UTC (permalink / raw)
  To: Mel Gorman
  Cc: Anton Blanchard, Dave Hansen, linux-mm, Paul Mackerras,
	Johannes Weiner, Andrew Morton, Robert Jennings, linuxppc-dev
In-Reply-To: <20120228154732.GE1199@suse.de>

On 28.02.2012 [15:47:32 +0000], Mel Gorman wrote:
> On Fri, Feb 24, 2012 at 11:33:58AM -0800, Nishanth Aravamudan wrote:
> > While testing AMS (Active Memory Sharing) / CMO (Cooperative Memory
> > Overcommit) on powerpc, we tripped the following:
> > 
> > kernel BUG at mm/bootmem.c:483!
> > cpu 0x0: Vector: 700 (Program Check) at [c000000000c03940]
> >     pc: c000000000a62bd8: .alloc_bootmem_core+0x90/0x39c
> >     lr: c000000000a64bcc: .sparse_early_usemaps_alloc_node+0x84/0x29c
> >     sp: c000000000c03bc0
> >    msr: 8000000000021032
> >   current = 0xc000000000b0cce0
> >   paca    = 0xc000000001d80000
> >     pid   = 0, comm = swapper
> > kernel BUG at mm/bootmem.c:483!
> > enter ? for help
> > [c000000000c03c80] c000000000a64bcc
> > .sparse_early_usemaps_alloc_node+0x84/0x29c
> > [c000000000c03d50] c000000000a64f10 .sparse_init+0x12c/0x28c
> > [c000000000c03e20] c000000000a474f4 .setup_arch+0x20c/0x294
> > [c000000000c03ee0] c000000000a4079c .start_kernel+0xb4/0x460
> > [c000000000c03f90] c000000000009670 .start_here_common+0x1c/0x2c
> > 
> > This is
> > 
> >         BUG_ON(limit && goal + size > limit);
> > 
> > and after some debugging, it seems that
> > 
> > 	goal = 0x7ffff000000
> > 	limit = 0x80000000000
> > 
> > and sparse_early_usemaps_alloc_node ->
> > sparse_early_usemaps_alloc_pgdat_section -> alloc_bootmem_section calls
> > 
> > 	return alloc_bootmem_section(usemap_size() * count, section_nr);
> > 
> > This is on a system with 8TB available via the AMS pool, and as a quirk
> > of AMS in firmware, all of that memory shows up in node 0. So, we end up
> > with an allocation that will fail the goal/limit constraints. In theory,
> > we could "fall-back" to alloc_bootmem_node() in
> > sparse_early_usemaps_alloc_node(), but since we actually have HOTREMOVE
> > defined, we'll BUG_ON() instead. A simple solution appears to be to
> > disable the limit check if the size of the allocation in
> > alloc_bootmem_secition exceeds the section size.
> > 
> > Signed-off-by: Nishanth Aravamudan <nacc@us.ibm.com>
> > Cc: Dave Hansen <haveblue@us.ibm.com>
> > Cc: Anton Blanchard <anton@au1.ibm.com>
> > Cc: Paul Mackerras <paulus@samba.org>
> > Cc: Ben Herrenschmidt <benh@kernel.crashing.org>
> > Cc: Robert Jennings <rcj@linux.vnet.ibm.com>
> > Cc: linux-mm@kvack.org
> > Cc: linuxppc-dev@lists.ozlabs.org
> > ---
> >  include/linux/mmzone.h |    2 ++
> >  mm/bootmem.c           |    5 ++++-
> >  2 files changed, 6 insertions(+), 1 deletions(-)
> > 
> > diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
> > index 650ba2f..4176834 100644
> > --- a/include/linux/mmzone.h
> > +++ b/include/linux/mmzone.h
> > @@ -967,6 +967,8 @@ static inline unsigned long early_pfn_to_nid(unsigned long pfn)
> >   * PA_SECTION_SHIFT		physical address to/from section number
> >   * PFN_SECTION_SHIFT		pfn to/from section number
> >   */
> > +#define BYTES_PER_SECTION	(1UL << SECTION_SIZE_BITS)
> > +
> >  #define SECTIONS_SHIFT		(MAX_PHYSMEM_BITS - SECTION_SIZE_BITS)
> >  
> >  #define PA_SECTION_SHIFT	(SECTION_SIZE_BITS)
> > diff --git a/mm/bootmem.c b/mm/bootmem.c
> > index 668e94d..5cbbc76 100644
> > --- a/mm/bootmem.c
> > +++ b/mm/bootmem.c
> > @@ -770,7 +770,10 @@ void * __init alloc_bootmem_section(unsigned long size,
> >  
> >  	pfn = section_nr_to_pfn(section_nr);
> >  	goal = pfn << PAGE_SHIFT;
> > -	limit = section_nr_to_pfn(section_nr + 1) << PAGE_SHIFT;
> > +	if (size > BYTES_PER_SECTION)
> > +		limit = 0;
> > +	else
> > +		limit = section_nr_to_pfn(section_nr + 1) << PAGE_SHIFT;
> 
> As it's ok to spill the allocation over to an adjacent section, why not
> just make limit==0 unconditionally. That would avoid defining
> BYTES_PER_SECTION.

Something like this?

Andrew, presuming Mel & Johannes give their, ack this should presumably
supersede the patch you pulled into -mm.

Thanks,
Nish

-------

While testing AMS (Active Memory Sharing) / CMO (Cooperative Memory
Overcommit) on powerpc, we tripped the following:

kernel BUG at mm/bootmem.c:483!
cpu 0x0: Vector: 700 (Program Check) at [c000000000c03940]
    pc: c000000000a62bd8: .alloc_bootmem_core+0x90/0x39c
    lr: c000000000a64bcc: .sparse_early_usemaps_alloc_node+0x84/0x29c
    sp: c000000000c03bc0
   msr: 8000000000021032
  current = 0xc000000000b0cce0
  paca    = 0xc000000001d80000
    pid   = 0, comm = swapper
kernel BUG at mm/bootmem.c:483!
enter ? for help
[c000000000c03c80] c000000000a64bcc
.sparse_early_usemaps_alloc_node+0x84/0x29c
[c000000000c03d50] c000000000a64f10 .sparse_init+0x12c/0x28c
[c000000000c03e20] c000000000a474f4 .setup_arch+0x20c/0x294
[c000000000c03ee0] c000000000a4079c .start_kernel+0xb4/0x460
[c000000000c03f90] c000000000009670 .start_here_common+0x1c/0x2c

This is

        BUG_ON(limit && goal + size > limit);

and after some debugging, it seems that

	goal = 0x7ffff000000
	limit = 0x80000000000

and sparse_early_usemaps_alloc_node ->
sparse_early_usemaps_alloc_pgdat_section calls

	return alloc_bootmem_section(usemap_size() * count, section_nr);

This is on a system with 8TB available via the AMS pool, and as a quirk
of AMS in firmware, all of that memory shows up in node 0. So, we end up
with an allocation that will fail the goal/limit constraints. In theory,
we could "fall-back" to alloc_bootmem_node() in
sparse_early_usemaps_alloc_node(), but since we actually have HOTREMOVE
defined, we'll BUG_ON() instead. A simple solution appears to be to
unconditionally remove the limit condition in alloc_bootmem_section,
meaning allocations are allowed to cross section boundaries (necessary
for systems of this size).

Johannes Weiner pointed out that if alloc_bootmem_section() no longer
guarantees section-locality, we need check_usemap_section_nr() to print
possible cross-dependencies between node descriptors and the usemaps
allocated through it. That makes the two loops in
sparse_early_usemaps_alloc_node() identical, so re-factor the code a
bit.

Signed-off-by: Nishanth Aravamudan <nacc@us.ibm.com>

---
v2: Unconditionally set limit to 0. Fold in Johannes' changes to
sparse_early_usemaps_alloc_node.

diff --git a/mm/bootmem.c b/mm/bootmem.c
index 668e94d..9c9ae09 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -770,7 +770,7 @@ void * __init alloc_bootmem_section(unsigned long size,
 
 	pfn = section_nr_to_pfn(section_nr);
 	goal = pfn << PAGE_SHIFT;
-	limit = section_nr_to_pfn(section_nr + 1) << PAGE_SHIFT;
+	limit = 0;
 	bdata = &bootmem_node_data[early_pfn_to_nid(pfn)];
 
 	return alloc_bootmem_core(bdata, size, SMP_CACHE_BYTES, goal, limit);
diff --git a/mm/sparse.c b/mm/sparse.c
index 61d7cde..a8bc7d3 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -353,29 +353,21 @@ static void __init sparse_early_usemaps_alloc_node(unsigned long**usemap_map,
 
 	usemap = sparse_early_usemaps_alloc_pgdat_section(NODE_DATA(nodeid),
 								 usemap_count);
-	if (usemap) {
-		for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
-			if (!present_section_nr(pnum))
-				continue;
-			usemap_map[pnum] = usemap;
-			usemap += size;
+	if (!usemap) {
+		usemap = alloc_bootmem_node(NODE_DATA(nodeid), size * usemap_count);
+		if (!usemap) {
+			printk(KERN_WARNING "%s: allocation failed\n", __func__);
+			return;
 		}
-		return;
 	}
 
-	usemap = alloc_bootmem_node(NODE_DATA(nodeid), size * usemap_count);
-	if (usemap) {
-		for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
-			if (!present_section_nr(pnum))
-				continue;
-			usemap_map[pnum] = usemap;
-			usemap += size;
-			check_usemap_section_nr(nodeid, usemap_map[pnum]);
-		}
-		return;
+	for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
+		if (!present_section_nr(pnum))
+			continue;
+		usemap_map[pnum] = usemap;
+		usemap += size;
+		check_usemap_section_nr(nodeid, usemap_map[pnum]);
 	}
-
-	printk(KERN_WARNING "%s: allocation failed\n", __func__);
 }
 
 #ifndef CONFIG_SPARSEMEM_VMEMMAP

-- 
Nishanth Aravamudan <nacc@us.ibm.com>
IBM Linux Technology Center

^ permalink raw reply related

* Re: [PATCH 2/2] powerpc/44x: Add more changes for APM821XX EMAC driver
From: David Miller @ 2012-02-29 18:25 UTC (permalink / raw)
  To: jwboyer; +Cc: dhdang, linux-kernel, paulus, netdev, linuxppc-dev
In-Reply-To: <CA+5PVA5hciQSvfkodX-oP_kUZueiTp=0+t8X_0iHQ+ehU0ecOQ@mail.gmail.com>

From: Josh Boyer <jwboyer@gmail.com>
Date: Wed, 29 Feb 2012 08:43:46 -0500

> On Fri, Feb 17, 2012 at 3:07 AM, Duc Dang <dhdang@apm.com> wrote:
>> This patch includes:
>>
>> =A0Configure EMAC PHY clock source (clock from PHY or internal clock=
).
>>
>> =A0Do not advertise PHY half duplex capability as APM821XX EMAC does=
 not
>> support half duplex mode.
>>
>> =A0Add changes to support configuring jumbo frame for APM821XX EMAC.=

>>
>> Signed-off-by: Duc Dang <dhdang@apm.com>
> =

> This should have been sent to netdev.  CC'ing them now.
> =

> Ben and David, I can take this change through the 4xx tree if it look=
s OK to
> both of you.  The pre-requisite DTS patch will go through my tree, so=
 it might
> make sense to keep them together.

Well the patch has coding style problems, for one:

>> +			dev->features |=3D (EMAC_APM821XX_REQ_JUMBO_FRAME_SIZE
>> +					| EMAC_FTR_APM821XX_NO_HALF_DUPLEX
>> +					| EMAC_FTR_460EX_PHY_CLK_FIX);

Should be:

>> +			dev->features |=3D (EMAC_APM821XX_REQ_JUMBO_FRAME_SIZE |
>> +					  EMAC_FTR_APM821XX_NO_HALF_DUPLEX |
>> +					  EMAC_FTR_460EX_PHY_CLK_FIX);

And this:

>> +		dev->phy_feat_exc =3D (SUPPORTED_1000baseT_Half
>> +					| SUPPORTED_100baseT_Half
>> +					| SUPPORTED_10baseT_Half);

Should be:

>> +		dev->phy_feat_exc =3D (SUPPORTED_1000baseT_Half |
>> +				     SUPPORTED_100baseT_Half |
>> +				     SUPPORTED_10baseT_Half);

^ permalink raw reply

* Re: [PATCH] KVM: PPC: Don't sync timebase when inside KVM
From: Alexander Graf @ 2012-02-29 18:28 UTC (permalink / raw)
  To: Scott Wood
  Cc: <linuxppc-dev@lists.ozlabs.org>,
	<kvm@vger.kernel.org>, <kvm-ppc@vger.kernel.org>
In-Reply-To: <4F4E6574.5050604@freescale.com>



On 29.02.2012, at 18:50, Scott Wood <scottwood@freescale.com> wrote:

> On 02/28/2012 08:16 PM, Alexander Graf wrote:
>> When we know that we're running inside of a KVM guest, we don't have to
>> worry about synchronizing timebases between different CPUs, since the
>> host already took care of that.
>> 
>> This fixes CPU overcommit scenarios where vCPUs could hang forever trying
>> to sync each other while not being scheduled.
>> 
>> Reported-by: Stuart Yoder <B08248@freescale.com>
>> Signed-off-by: Alexander Graf <agraf@suse.de>
> 
> This should apply to any hypervisor, not just KVM.  

Sure, but do you have a generic function to evaluate that? :)

> On book3e, Power ISA
> says timebase is read-only on virtualized implementations.  My
> understanding is that book3s is paravirt-only (guest state is not
> considered an implementation of the Power ISA), and it says "Writing the
> Time Base is privileged, and can be done only in hypervisor state".

For PR non-PAPR KVM, we are non-paravirt, but ignore tb writes iirc.

> 
> Which platforms are you seeing this on?  If it's on Freescale chips,
> U-Boot should be doing the sync and Linux should never do it, even in
> the absence of a hypervisor.

This is on e500mc.

Alex

> 
> -Scott
> 
> --
> To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH v2] bootmem/sparsemem: remove limit constraint in alloc_bootmem_section
From: Johannes Weiner @ 2012-02-29 18:45 UTC (permalink / raw)
  To: Nishanth Aravamudan
  Cc: Anton Blanchard, Dave Hansen, linux-mm, Paul Mackerras,
	Mel Gorman, Andrew Morton, Robert Jennings, linuxppc-dev
In-Reply-To: <20120229181233.GF5136@linux.vnet.ibm.com>

On Wed, Feb 29, 2012 at 10:12:33AM -0800, Nishanth Aravamudan wrote:
> On 28.02.2012 [15:47:32 +0000], Mel Gorman wrote:
> > On Fri, Feb 24, 2012 at 11:33:58AM -0800, Nishanth Aravamudan wrote:
> > > While testing AMS (Active Memory Sharing) / CMO (Cooperative Memory
> > > Overcommit) on powerpc, we tripped the following:
> > > 
> > > kernel BUG at mm/bootmem.c:483!
> > > cpu 0x0: Vector: 700 (Program Check) at [c000000000c03940]
> > >     pc: c000000000a62bd8: .alloc_bootmem_core+0x90/0x39c
> > >     lr: c000000000a64bcc: .sparse_early_usemaps_alloc_node+0x84/0x29c
> > >     sp: c000000000c03bc0
> > >    msr: 8000000000021032
> > >   current = 0xc000000000b0cce0
> > >   paca    = 0xc000000001d80000
> > >     pid   = 0, comm = swapper
> > > kernel BUG at mm/bootmem.c:483!
> > > enter ? for help
> > > [c000000000c03c80] c000000000a64bcc
> > > .sparse_early_usemaps_alloc_node+0x84/0x29c
> > > [c000000000c03d50] c000000000a64f10 .sparse_init+0x12c/0x28c
> > > [c000000000c03e20] c000000000a474f4 .setup_arch+0x20c/0x294
> > > [c000000000c03ee0] c000000000a4079c .start_kernel+0xb4/0x460
> > > [c000000000c03f90] c000000000009670 .start_here_common+0x1c/0x2c
> > > 
> > > This is
> > > 
> > >         BUG_ON(limit && goal + size > limit);
> > > 
> > > and after some debugging, it seems that
> > > 
> > > 	goal = 0x7ffff000000
> > > 	limit = 0x80000000000
> > > 
> > > and sparse_early_usemaps_alloc_node ->
> > > sparse_early_usemaps_alloc_pgdat_section -> alloc_bootmem_section calls
> > > 
> > > 	return alloc_bootmem_section(usemap_size() * count, section_nr);
> > > 
> > > This is on a system with 8TB available via the AMS pool, and as a quirk
> > > of AMS in firmware, all of that memory shows up in node 0. So, we end up
> > > with an allocation that will fail the goal/limit constraints. In theory,
> > > we could "fall-back" to alloc_bootmem_node() in
> > > sparse_early_usemaps_alloc_node(), but since we actually have HOTREMOVE
> > > defined, we'll BUG_ON() instead. A simple solution appears to be to
> > > disable the limit check if the size of the allocation in
> > > alloc_bootmem_secition exceeds the section size.
> > > 
> > > Signed-off-by: Nishanth Aravamudan <nacc@us.ibm.com>
> > > Cc: Dave Hansen <haveblue@us.ibm.com>
> > > Cc: Anton Blanchard <anton@au1.ibm.com>
> > > Cc: Paul Mackerras <paulus@samba.org>
> > > Cc: Ben Herrenschmidt <benh@kernel.crashing.org>
> > > Cc: Robert Jennings <rcj@linux.vnet.ibm.com>
> > > Cc: linux-mm@kvack.org
> > > Cc: linuxppc-dev@lists.ozlabs.org
> > > ---
> > >  include/linux/mmzone.h |    2 ++
> > >  mm/bootmem.c           |    5 ++++-
> > >  2 files changed, 6 insertions(+), 1 deletions(-)
> > > 
> > > diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
> > > index 650ba2f..4176834 100644
> > > --- a/include/linux/mmzone.h
> > > +++ b/include/linux/mmzone.h
> > > @@ -967,6 +967,8 @@ static inline unsigned long early_pfn_to_nid(unsigned long pfn)
> > >   * PA_SECTION_SHIFT		physical address to/from section number
> > >   * PFN_SECTION_SHIFT		pfn to/from section number
> > >   */
> > > +#define BYTES_PER_SECTION	(1UL << SECTION_SIZE_BITS)
> > > +
> > >  #define SECTIONS_SHIFT		(MAX_PHYSMEM_BITS - SECTION_SIZE_BITS)
> > >  
> > >  #define PA_SECTION_SHIFT	(SECTION_SIZE_BITS)
> > > diff --git a/mm/bootmem.c b/mm/bootmem.c
> > > index 668e94d..5cbbc76 100644
> > > --- a/mm/bootmem.c
> > > +++ b/mm/bootmem.c
> > > @@ -770,7 +770,10 @@ void * __init alloc_bootmem_section(unsigned long size,
> > >  
> > >  	pfn = section_nr_to_pfn(section_nr);
> > >  	goal = pfn << PAGE_SHIFT;
> > > -	limit = section_nr_to_pfn(section_nr + 1) << PAGE_SHIFT;
> > > +	if (size > BYTES_PER_SECTION)
> > > +		limit = 0;
> > > +	else
> > > +		limit = section_nr_to_pfn(section_nr + 1) << PAGE_SHIFT;
> > 
> > As it's ok to spill the allocation over to an adjacent section, why not
> > just make limit==0 unconditionally. That would avoid defining
> > BYTES_PER_SECTION.
> 
> Something like this?
> 
> Andrew, presuming Mel & Johannes give their, ack this should presumably
> supersede the patch you pulled into -mm.
> 
> Thanks,
> Nish
> 
> -------
> 
> While testing AMS (Active Memory Sharing) / CMO (Cooperative Memory
> Overcommit) on powerpc, we tripped the following:
> 
> kernel BUG at mm/bootmem.c:483!
> cpu 0x0: Vector: 700 (Program Check) at [c000000000c03940]
>     pc: c000000000a62bd8: .alloc_bootmem_core+0x90/0x39c
>     lr: c000000000a64bcc: .sparse_early_usemaps_alloc_node+0x84/0x29c
>     sp: c000000000c03bc0
>    msr: 8000000000021032
>   current = 0xc000000000b0cce0
>   paca    = 0xc000000001d80000
>     pid   = 0, comm = swapper
> kernel BUG at mm/bootmem.c:483!
> enter ? for help
> [c000000000c03c80] c000000000a64bcc
> .sparse_early_usemaps_alloc_node+0x84/0x29c
> [c000000000c03d50] c000000000a64f10 .sparse_init+0x12c/0x28c
> [c000000000c03e20] c000000000a474f4 .setup_arch+0x20c/0x294
> [c000000000c03ee0] c000000000a4079c .start_kernel+0xb4/0x460
> [c000000000c03f90] c000000000009670 .start_here_common+0x1c/0x2c
> 
> This is
> 
>         BUG_ON(limit && goal + size > limit);
> 
> and after some debugging, it seems that
> 
> 	goal = 0x7ffff000000
> 	limit = 0x80000000000
> 
> and sparse_early_usemaps_alloc_node ->
> sparse_early_usemaps_alloc_pgdat_section calls
> 
> 	return alloc_bootmem_section(usemap_size() * count, section_nr);
> 
> This is on a system with 8TB available via the AMS pool, and as a quirk
> of AMS in firmware, all of that memory shows up in node 0. So, we end up
> with an allocation that will fail the goal/limit constraints. In theory,
> we could "fall-back" to alloc_bootmem_node() in
> sparse_early_usemaps_alloc_node(), but since we actually have HOTREMOVE
> defined, we'll BUG_ON() instead. A simple solution appears to be to
> unconditionally remove the limit condition in alloc_bootmem_section,
> meaning allocations are allowed to cross section boundaries (necessary
> for systems of this size).
> 
> Johannes Weiner pointed out that if alloc_bootmem_section() no longer
> guarantees section-locality, we need check_usemap_section_nr() to print
> possible cross-dependencies between node descriptors and the usemaps
> allocated through it. That makes the two loops in
> sparse_early_usemaps_alloc_node() identical, so re-factor the code a
> bit.
> 
> Signed-off-by: Nishanth Aravamudan <nacc@us.ibm.com>

Acked-by: Johannes Weiner <hannes@cmpxchg.org>

^ permalink raw reply

* Re: [PATCH] KVM: PPC: Don't sync timebase when inside KVM
From: Scott Wood @ 2012-02-29 19:06 UTC (permalink / raw)
  To: Alexander Graf
  Cc: <linuxppc-dev@lists.ozlabs.org>,
	<kvm@vger.kernel.org>, <kvm-ppc@vger.kernel.org>
In-Reply-To: <39AA9511-4D56-4087-BC98-4BB32EF048AA@suse.de>

On 02/29/2012 12:28 PM, Alexander Graf wrote:
> 
> 
> On 29.02.2012, at 18:50, Scott Wood <scottwood@freescale.com> wrote:
> 
>> On 02/28/2012 08:16 PM, Alexander Graf wrote:
>>> When we know that we're running inside of a KVM guest, we don't have to
>>> worry about synchronizing timebases between different CPUs, since the
>>> host already took care of that.
>>>
>>> This fixes CPU overcommit scenarios where vCPUs could hang forever trying
>>> to sync each other while not being scheduled.
>>>
>>> Reported-by: Stuart Yoder <B08248@freescale.com>
>>> Signed-off-by: Alexander Graf <agraf@suse.de>
>>
>> This should apply to any hypervisor, not just KVM.  
> 
> Sure, but do you have a generic function to evaluate that? :)

The presence of a hypervisor node without testing compatible.  Might not
get them all, but at least it will cover more than just KVM.

>> Which platforms are you seeing this on?  If it's on Freescale chips,
>> U-Boot should be doing the sync and Linux should never do it, even in
>> the absence of a hypervisor.
> 
> This is on e500mc.

On e500mc Linux should never by trying to sync the timebase.  If it is,
let's fix that.

-Scott

^ permalink raw reply

* [PATCH 28/39] PCI, powerpc: Register busn_res for root buses
From: Yinghai Lu @ 2012-02-29 23:07 UTC (permalink / raw)
  To: Jesse Barnes, Benjamin Herrenschmidt, Tony Luck, David Miller,
	x86
  Cc: linux-arch, linux-pci, linuxppc-dev, linux-kernel,
	Dominik Brodowski, Paul Mackerras, Bjorn Helgaas, Yinghai Lu
In-Reply-To: <1330556858-11768-1-git-send-email-yinghai@kernel.org>

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: linuxppc-dev@lists.ozlabs.org
---
 arch/powerpc/include/asm/pci-bridge.h |    1 +
 arch/powerpc/kernel/pci-common.c      |   10 +++++++++-
 2 files changed, 10 insertions(+), 1 deletions(-)

diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
index 5d48765..11cebf0 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -30,6 +30,7 @@ struct pci_controller {
 	int first_busno;
 	int last_busno;
 	int self_busno;
+	struct resource busn;
 
 	void __iomem *io_base_virt;
 #ifdef CONFIG_PPC64
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index 910b9de..ee8c0c9 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -1648,6 +1648,11 @@ void __devinit pcibios_scan_phb(struct pci_controller *hose)
 	/* Wire up PHB bus resources */
 	pcibios_setup_phb_resources(hose, &resources);
 
+	hose->busn.start = hose->first_busno;
+	hose->busn.end	 = hose->last_busno;
+	hose->busn.flags = IORESOURCE_BUS;
+	pci_add_resource(&resources, &hose->busn);
+
 	/* Create an empty bus for the toplevel */
 	bus = pci_create_root_bus(hose->parent, hose->first_busno,
 				  hose->ops, hose, &resources);
@@ -1670,8 +1675,11 @@ void __devinit pcibios_scan_phb(struct pci_controller *hose)
 		of_scan_bus(node, bus);
 	}
 
-	if (mode == PCI_PROBE_NORMAL)
+	if (mode == PCI_PROBE_NORMAL) {
+		pci_bus_update_busn_res_end(bus, 255);
 		hose->last_busno = bus->subordinate = pci_scan_child_bus(bus);
+		pci_bus_update_busn_res_end(bus, bus->subordinate);
+	}
 
 	/* Platform gets a chance to do some global fixups before
 	 * we proceed to resource allocation
-- 
1.7.7

^ permalink raw reply related

* Re: [PATCH v2] bootmem/sparsemem: remove limit constraint in alloc_bootmem_section
From: Andrew Morton @ 2012-02-29 23:28 UTC (permalink / raw)
  To: Nishanth Aravamudan
  Cc: Anton Blanchard, Dave Hansen, stable, linux-mm, Paul Mackerras,
	Mel Gorman, Johannes Weiner, Robert Jennings, linuxppc-dev
In-Reply-To: <20120229181233.GF5136@linux.vnet.ibm.com>

On Wed, 29 Feb 2012 10:12:33 -0800
Nishanth Aravamudan <nacc@linux.vnet.ibm.com> wrote:

> While testing AMS (Active Memory Sharing) / CMO (Cooperative Memory
> Overcommit) on powerpc, we tripped the following:
> 
> kernel BUG at mm/bootmem.c:483!
>
> ...
> 
> This is
> 
>         BUG_ON(limit && goal + size > limit);
> 
> and after some debugging, it seems that
> 
> 	goal = 0x7ffff000000
> 	limit = 0x80000000000
> 
> and sparse_early_usemaps_alloc_node ->
> sparse_early_usemaps_alloc_pgdat_section calls
> 
> 	return alloc_bootmem_section(usemap_size() * count, section_nr);
> 
> This is on a system with 8TB available via the AMS pool, and as a quirk
> of AMS in firmware, all of that memory shows up in node 0. So, we end up
> with an allocation that will fail the goal/limit constraints. In theory,
> we could "fall-back" to alloc_bootmem_node() in
> sparse_early_usemaps_alloc_node(), but since we actually have HOTREMOVE
> defined, we'll BUG_ON() instead. A simple solution appears to be to
> unconditionally remove the limit condition in alloc_bootmem_section,
> meaning allocations are allowed to cross section boundaries (necessary
> for systems of this size).
> 
> Johannes Weiner pointed out that if alloc_bootmem_section() no longer
> guarantees section-locality, we need check_usemap_section_nr() to print
> possible cross-dependencies between node descriptors and the usemaps
> allocated through it. That makes the two loops in
> sparse_early_usemaps_alloc_node() identical, so re-factor the code a
> bit.

The patch is a bit scary now, so I think we should merge it into
3.4-rc1 and then backport it into 3.3.1 if nothing blows up.

Do you think it should be backported into 3.3.x?  Earlier kernels?

Also, this?

--- a/mm/bootmem.c~bootmem-sparsemem-remove-limit-constraint-in-alloc_bootmem_section-fix
+++ a/mm/bootmem.c
@@ -766,14 +766,13 @@ void * __init alloc_bootmem_section(unsi
 				    unsigned long section_nr)
 {
 	bootmem_data_t *bdata;
-	unsigned long pfn, goal, limit;
+	unsigned long pfn, goal;
 
 	pfn = section_nr_to_pfn(section_nr);
 	goal = pfn << PAGE_SHIFT;
-	limit = 0;
 	bdata = &bootmem_node_data[early_pfn_to_nid(pfn)];
 
-	return alloc_bootmem_core(bdata, size, SMP_CACHE_BYTES, goal, limit);
+	return alloc_bootmem_core(bdata, size, SMP_CACHE_BYTES, goal, 0);
 }
 #endif
 
_

^ permalink raw reply

* Re: [PATCH v2] bootmem/sparsemem: remove limit constraint in alloc_bootmem_section
From: Nishanth Aravamudan @ 2012-03-01  0:03 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Anton Blanchard, Dave Hansen, stable, linux-mm, Paul Mackerras,
	Mel Gorman, Johannes Weiner, Robert Jennings, linuxppc-dev
In-Reply-To: <20120229152830.22fc72a2.akpm@linux-foundation.org>

On 29.02.2012 [15:28:30 -0800], Andrew Morton wrote:
> On Wed, 29 Feb 2012 10:12:33 -0800
> Nishanth Aravamudan <nacc@linux.vnet.ibm.com> wrote:
> 
> > While testing AMS (Active Memory Sharing) / CMO (Cooperative Memory
> > Overcommit) on powerpc, we tripped the following:
> > 
> > kernel BUG at mm/bootmem.c:483!
> >
> > ...
> > 
> > This is
> > 
> >         BUG_ON(limit && goal + size > limit);
> > 
> > and after some debugging, it seems that
> > 
> > 	goal = 0x7ffff000000
> > 	limit = 0x80000000000
> > 
> > and sparse_early_usemaps_alloc_node ->
> > sparse_early_usemaps_alloc_pgdat_section calls
> > 
> > 	return alloc_bootmem_section(usemap_size() * count, section_nr);
> > 
> > This is on a system with 8TB available via the AMS pool, and as a quirk
> > of AMS in firmware, all of that memory shows up in node 0. So, we end up
> > with an allocation that will fail the goal/limit constraints. In theory,
> > we could "fall-back" to alloc_bootmem_node() in
> > sparse_early_usemaps_alloc_node(), but since we actually have HOTREMOVE
> > defined, we'll BUG_ON() instead. A simple solution appears to be to
> > unconditionally remove the limit condition in alloc_bootmem_section,
> > meaning allocations are allowed to cross section boundaries (necessary
> > for systems of this size).
> > 
> > Johannes Weiner pointed out that if alloc_bootmem_section() no longer
> > guarantees section-locality, we need check_usemap_section_nr() to print
> > possible cross-dependencies between node descriptors and the usemaps
> > allocated through it. That makes the two loops in
> > sparse_early_usemaps_alloc_node() identical, so re-factor the code a
> > bit.
> 
> The patch is a bit scary now, so I think we should merge it into
> 3.4-rc1 and then backport it into 3.3.1 if nothing blows up.

I think that's fair.

> Do you think it should be backported into 3.3.x?  Earlier kernels?

3.3.x seems reasonable. If I had to guess, I think this could be hit on
any kernels with this functionality -- that is, sparsemem in general?
Not sure how far back it's worth backporting.

> Also, this?

Urgh, yeah, that's way better.

Acked-by: Nishanth Aravamudan <nacc@us.ibm.com>

> --- a/mm/bootmem.c~bootmem-sparsemem-remove-limit-constraint-in-alloc_bootmem_section-fix
> +++ a/mm/bootmem.c
> @@ -766,14 +766,13 @@ void * __init alloc_bootmem_section(unsi
>  				    unsigned long section_nr)
>  {
>  	bootmem_data_t *bdata;
> -	unsigned long pfn, goal, limit;
> +	unsigned long pfn, goal;
> 
>  	pfn = section_nr_to_pfn(section_nr);
>  	goal = pfn << PAGE_SHIFT;
> -	limit = 0;
>  	bdata = &bootmem_node_data[early_pfn_to_nid(pfn)];
> 
> -	return alloc_bootmem_core(bdata, size, SMP_CACHE_BYTES, goal, limit);
> +	return alloc_bootmem_core(bdata, size, SMP_CACHE_BYTES, goal, 0);
>  }
>  #endif

Thanks for all the feedback!

-Nish

-- 
Nishanth Aravamudan <nacc@us.ibm.com>
IBM Linux Technology Center

^ permalink raw reply

* Re: [PATCH 20/21] Introduce struct eeh_stats for EEH
From: Gavin Shan @ 2012-03-01  1:14 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: linuxppc-dev
In-Reply-To: <1330520204.15023.16.camel@concordia>

> > With the original EEH implementation, the EEH global statistics
> > are maintained by individual global variables. That makes the
> > code a little hard to maintain.
> 
> Hi Gavin,
> 
> > @@ -1174,21 +1182,24 @@ static int proc_eeh_show(struct seq_file *m, void *v)
> >  {
> >  	if (0 == eeh_subsystem_enabled) {
> >  		seq_printf(m, "EEH Subsystem is globally disabled\n");
> > -		seq_printf(m, "eeh_total_mmio_ffs=%ld\n", total_mmio_ffs);
> > +		seq_printf(m, "eeh_total_mmio_ffs=%d\n", eeh_stats.total_mmio_ffs);
> >  	} else {
> >  		seq_printf(m, "EEH Subsystem is enabled\n");
> >  		seq_printf(m,
> > -				"no device=%ld\n"
> > -				"no device node=%ld\n"
> > -				"no config address=%ld\n"
> > -				"check not wanted=%ld\n"
> > -				"eeh_total_mmio_ffs=%ld\n"
> > -				"eeh_false_positives=%ld\n"
> > -				"eeh_slot_resets=%ld\n",
> > -				no_device, no_dn, no_cfg_addr, 
> > -				ignored_check, total_mmio_ffs, 
> > -				false_positives,
> > -				slot_resets);
> > +				"no device           =%d\n"
> > +				"no device node      =%d\n"
> > +				"no config address   =%d\n"
> > +				"check not wanted    =%d\n"
> > +				"eeh_total_mmio_ffs  =%d\n"
> > +				"eeh_false_positives =%d\n"
> > +				"eeh_slot_resets     =%d\n",
> 
> There *might* be tools out there that parse this output, so I'd say
> don't change it unless you have to - and I don't think you have to?
> 

Thanks for catching the point, Michael. I will change it back soon ;-)

Thanks,
Gavin

^ permalink raw reply

* [PATCH v2 1/2] powerpc/e500: make load_up_spe a normal fuction
From: Olivia Yin @ 2012-03-01  1:20 UTC (permalink / raw)
  To: kvm-ppc, kvm, linuxppc-dev; +Cc: Liu Yu, Olivia Yin

From: Liu Yu <yu.liu@freescale.com>

So that we can call it when improving SPE switch like book3e did for fp switch.

Signed-off-by: Liu Yu <yu.liu@freescale.com>
Signed-off-by: Olivia Yin <hong-hua.yin@freescale.com>
---
v2: 	add Signed-off-by

 arch/powerpc/kernel/head_fsl_booke.S |   23 ++++++-----------------
 1 files changed, 6 insertions(+), 17 deletions(-)

diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
index d5d78c4..c96e025 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -539,8 +539,10 @@ interrupt_base:
 	/* SPE Unavailable */
 	START_EXCEPTION(SPEUnavailable)
 	NORMAL_EXCEPTION_PROLOG
-	bne	load_up_spe
-	addi	r3,r1,STACK_FRAME_OVERHEAD
+	beq	1f
+	bl	load_up_spe
+	b	fast_exception_return
+1:	addi	r3,r1,STACK_FRAME_OVERHEAD
 	EXC_XFER_EE_LITE(0x2010, KernelSPE)
 #else
 	EXCEPTION(0x2020, SPEUnavailable, unknown_exception, EXC_XFER_EE)
@@ -743,7 +745,7 @@ tlb_write_entry:
 /* Note that the SPE support is closely modeled after the AltiVec
  * support.  Changes to one are likely to be applicable to the
  * other!  */
-load_up_spe:
+_GLOBAL(load_up_spe)
 /*
  * Disable SPE for the task which had SPE previously,
  * and save its SPE registers in its thread_struct.
@@ -791,20 +793,7 @@ load_up_spe:
 	subi	r4,r5,THREAD
 	stw	r4,last_task_used_spe@l(r3)
 #endif /* !CONFIG_SMP */
-	/* restore registers and return */
-2:	REST_4GPRS(3, r11)
-	lwz	r10,_CCR(r11)
-	REST_GPR(1, r11)
-	mtcr	r10
-	lwz	r10,_LINK(r11)
-	mtlr	r10
-	REST_GPR(10, r11)
-	mtspr	SPRN_SRR1,r9
-	mtspr	SPRN_SRR0,r12
-	REST_GPR(9, r11)
-	REST_GPR(12, r11)
-	lwz	r11,GPR11(r11)
-	rfi
+	blr
 
 /*
  * SPE unavailable trap from kernel - print a message, but let
-- 
1.6.4

^ permalink raw reply related

* [PATCH v2 2/2] KVM: booke: Improve SPE switch
From: Olivia Yin @ 2012-03-01  1:20 UTC (permalink / raw)
  To: kvm-ppc, kvm, linuxppc-dev; +Cc: Liu Yu, Olivia Yin

From: Liu Yu <yu.liu@freescale.com>

Like book3s did for fp switch,
instead of switch SPE between host and guest,
the patch switch SPE state between qemu and guest.
In this way, we can simulate a host loadup SPE when load guest SPE state,
and let host to decide when to giveup SPE state.
Therefor it cooperates better with host SPE usage,
and so that has some performance benifit in UP host(lazy SPE).

Moreover, since the patch save guest SPE state into linux thread field,
it creates the condition to emulate guest SPE instructions in host,
so that we can avoid injecting SPE exception to guest.

The patch also turns all asm code into C code,
and add SPE stat counts.

Signed-off-by: Liu Yu <yu.liu@freescale.com>
Signed-off-by: Olivia Yin <hong-hua.yin@freescale.com>
---
v2: 	
Keep shadow MSR[SPE] consistent with 
thread MSR[SPE] in kvmppc_core_vcpu_load

 arch/powerpc/include/asm/kvm_host.h |   11 +++++-
 arch/powerpc/kernel/asm-offsets.c   |    7 ----
 arch/powerpc/kvm/booke.c            |   63 +++++++++++++++++++++++++++++++----
 arch/powerpc/kvm/booke.h            |    8 +----
 arch/powerpc/kvm/booke_interrupts.S |   37 --------------------
 arch/powerpc/kvm/e500.c             |   13 ++++---
 arch/powerpc/kvm/timing.c           |    5 +++
 arch/powerpc/kvm/timing.h           |   11 ++++++
 8 files changed, 91 insertions(+), 64 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 1843d5d..6186d08 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -117,6 +117,11 @@ struct kvm_vcpu_stat {
 	u32 st;
 	u32 st_slow;
 #endif
+#ifdef CONFIG_SPE
+	u32 spe_unavail;
+	u32 spe_fp_data;
+	u32 spe_fp_round;
+#endif
 };
 
 enum kvm_exit_types {
@@ -147,6 +152,11 @@ enum kvm_exit_types {
 	FP_UNAVAIL,
 	DEBUG_EXITS,
 	TIMEINGUEST,
+#ifdef CONFIG_SPE
+	SPE_UNAVAIL,
+	SPE_FP_DATA,
+	SPE_FP_ROUND,
+#endif
 	__NUMBER_OF_KVM_EXIT_TYPES
 };
 
@@ -330,7 +340,6 @@ struct kvm_vcpu_arch {
 #ifdef CONFIG_SPE
 	ulong evr[32];
 	ulong spefscr;
-	ulong host_spefscr;
 	u64 acc;
 #endif
 #ifdef CONFIG_ALTIVEC
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 8e0db0b..ff68f71 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -604,13 +604,6 @@ int main(void)
 	DEFINE(TLBCAM_MAS7, offsetof(struct tlbcam, MAS7));
 #endif
 
-#if defined(CONFIG_KVM) && defined(CONFIG_SPE)
-	DEFINE(VCPU_EVR, offsetof(struct kvm_vcpu, arch.evr[0]));
-	DEFINE(VCPU_ACC, offsetof(struct kvm_vcpu, arch.acc));
-	DEFINE(VCPU_SPEFSCR, offsetof(struct kvm_vcpu, arch.spefscr));
-	DEFINE(VCPU_HOST_SPEFSCR, offsetof(struct kvm_vcpu, arch.host_spefscr));
-#endif
-
 #ifdef CONFIG_KVM_EXIT_TIMING
 	DEFINE(VCPU_TIMING_EXIT_TBU, offsetof(struct kvm_vcpu,
 						arch.timing_exit.tv32.tbu));
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index ee9e1ee..f20010b 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -55,6 +55,11 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ "dec",        VCPU_STAT(dec_exits) },
 	{ "ext_intr",   VCPU_STAT(ext_intr_exits) },
 	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
+#ifdef CONFIG_SPE
+	{ "spe_unavail", VCPU_STAT(spe_unavail) },
+	{ "spe_fp_data", VCPU_STAT(spe_fp_data) },
+	{ "spe_fp_round", VCPU_STAT(spe_fp_round) },
+#endif
 	{ NULL }
 };
 
@@ -80,11 +85,11 @@ void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu)
 }
 
 #ifdef CONFIG_SPE
-void kvmppc_vcpu_disable_spe(struct kvm_vcpu *vcpu)
+static void kvmppc_vcpu_disable_spe(struct kvm_vcpu *vcpu)
 {
 	preempt_disable();
-	enable_kernel_spe();
-	kvmppc_save_guest_spe(vcpu);
+	if (current->thread.regs->msr & MSR_SPE)
+		giveup_spe(current);
 	vcpu->arch.shadow_msr &= ~MSR_SPE;
 	preempt_enable();
 }
@@ -92,8 +97,10 @@ void kvmppc_vcpu_disable_spe(struct kvm_vcpu *vcpu)
 static void kvmppc_vcpu_enable_spe(struct kvm_vcpu *vcpu)
 {
 	preempt_disable();
-	enable_kernel_spe();
-	kvmppc_load_guest_spe(vcpu);
+	if (!(current->thread.regs->msr & MSR_SPE)) {
+		load_up_spe(NULL);
+		current->thread.regs->msr |= MSR_SPE;
+	}
 	vcpu->arch.shadow_msr |= MSR_SPE;
 	preempt_enable();
 }
@@ -104,7 +111,7 @@ static void kvmppc_vcpu_sync_spe(struct kvm_vcpu *vcpu)
 		if (!(vcpu->arch.shadow_msr & MSR_SPE))
 			kvmppc_vcpu_enable_spe(vcpu);
 	} else if (vcpu->arch.shadow_msr & MSR_SPE) {
-		kvmppc_vcpu_disable_spe(vcpu);
+		vcpu->arch.shadow_msr &= ~MSR_SPE;
 	}
 }
 #else
@@ -124,7 +131,8 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
 	vcpu->arch.shared->msr = new_msr;
 
 	kvmppc_mmu_msr_notify(vcpu, old_msr);
-	kvmppc_vcpu_sync_spe(vcpu);
+ 	if ((old_msr ^ new_msr) & MSR_SPE)
+		kvmppc_vcpu_sync_spe(vcpu);
 }
 
 static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu,
@@ -338,6 +346,11 @@ void kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu)
 int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 {
 	int ret;
+#ifdef CONFIG_SPE
+	ulong evr[32];
+	ulong spefscr;
+	u64 acc;
+#endif
 
 	if (!vcpu->arch.sane) {
 		kvm_run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
@@ -355,7 +368,40 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 	}
 
 	kvm_guest_enter();
+#ifdef CONFIG_SPE
+	/* Save userspace SPE state in stack */
+	enable_kernel_spe();
+	memcpy(evr, current->thread.evr, sizeof(current->thread.evr));
+	acc = current->thread.acc;
+
+	/* Restore guest SPE state to thread */
+	memcpy(current->thread.evr, vcpu->arch.evr, sizeof(vcpu->arch.evr));
+	current->thread.acc = vcpu->arch.acc;
+
+	/* Switch SPEFSCR and load guest SPE state if needed */
+	spefscr = mfspr(SPRN_SPEFSCR);
+	kvmppc_vcpu_sync_spe(vcpu);
+	mtspr(SPRN_SPEFSCR, vcpu->arch.spefscr);
+#endif
+
 	ret = __kvmppc_vcpu_run(kvm_run, vcpu);
+
+#ifdef CONFIG_SPE
+	/* Switch SPEFSCR and save guest SPE state if needed */
+	vcpu->arch.spefscr = mfspr(SPRN_SPEFSCR);
+	kvmppc_vcpu_disable_spe(vcpu);
+	mtspr(SPRN_SPEFSCR, spefscr);
+
+	/* Save guest SPE state from thread */
+	memcpy(vcpu->arch.evr, current->thread.evr, sizeof(vcpu->arch.evr));
+	vcpu->arch.acc = current->thread.acc;
+
+	/* Restore userspace SPE state from stack */
+	memcpy(current->thread.evr, evr, sizeof(current->thread.evr));
+	current->thread.spefscr = spefscr;
+	current->thread.acc = acc;
+#endif
+
 	kvm_guest_exit();
 
 out:
@@ -457,17 +503,20 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		else
 			kvmppc_booke_queue_irqprio(vcpu,
 						   BOOKE_IRQPRIO_SPE_UNAVAIL);
+		kvmppc_account_exit(vcpu, SPE_UNAVAIL);
 		r = RESUME_GUEST;
 		break;
 	}
 
 	case BOOKE_INTERRUPT_SPE_FP_DATA:
 		kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SPE_FP_DATA);
+		kvmppc_account_exit(vcpu, SPE_FP_DATA);
 		r = RESUME_GUEST;
 		break;
 
 	case BOOKE_INTERRUPT_SPE_FP_ROUND:
 		kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SPE_FP_ROUND);
+		kvmppc_account_exit(vcpu, SPE_FP_ROUND);
 		r = RESUME_GUEST;
 		break;
 #else
diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h
index 2fe2027..c02b8f9 100644
--- a/arch/powerpc/kvm/booke.h
+++ b/arch/powerpc/kvm/booke.h
@@ -22,6 +22,7 @@
 
 #include <linux/types.h>
 #include <linux/kvm_host.h>
+#include <asm/system.h>
 #include <asm/kvm_ppc.h>
 #include "timing.h"
 
@@ -64,11 +65,4 @@ int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
 int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt);
 int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs);
 
-/* low-level asm code to transfer guest state */
-void kvmppc_load_guest_spe(struct kvm_vcpu *vcpu);
-void kvmppc_save_guest_spe(struct kvm_vcpu *vcpu);
-
-/* high-level function, manages flags, host state */
-void kvmppc_vcpu_disable_spe(struct kvm_vcpu *vcpu);
-
 #endif /* __KVM_BOOKE_H__ */
diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S
index 10d8ef6..c44367d 100644
--- a/arch/powerpc/kvm/booke_interrupts.S
+++ b/arch/powerpc/kvm/booke_interrupts.S
@@ -245,15 +245,6 @@ _GLOBAL(kvmppc_resume_host)
 
 heavyweight_exit:
 	/* Not returning to guest. */
-
-#ifdef CONFIG_SPE
-	/* save guest SPEFSCR and load host SPEFSCR */
-	mfspr	r9, SPRN_SPEFSCR
-	stw	r9, VCPU_SPEFSCR(r4)
-	lwz	r9, VCPU_HOST_SPEFSCR(r4)
-	mtspr	SPRN_SPEFSCR, r9
-#endif
-
 	/* We already saved guest volatile register state; now save the
 	 * non-volatiles. */
 	stw	r15, VCPU_GPR(r15)(r4)
@@ -355,14 +346,6 @@ _GLOBAL(__kvmppc_vcpu_run)
 	lwz	r30, VCPU_GPR(r30)(r4)
 	lwz	r31, VCPU_GPR(r31)(r4)
 
-#ifdef CONFIG_SPE
-	/* save host SPEFSCR and load guest SPEFSCR */
-	mfspr	r3, SPRN_SPEFSCR
-	stw	r3, VCPU_HOST_SPEFSCR(r4)
-	lwz	r3, VCPU_SPEFSCR(r4)
-	mtspr	SPRN_SPEFSCR, r3
-#endif
-
 lightweight_exit:
 	stw	r2, HOST_R2(r1)
 
@@ -460,23 +443,3 @@ lightweight_exit:
 	lwz	r4, VCPU_GPR(r4)(r4)
 	rfi
 
-#ifdef CONFIG_SPE
-_GLOBAL(kvmppc_save_guest_spe)
-	cmpi	0,r3,0
-	beqlr-
-	SAVE_32EVRS(0, r4, r3, VCPU_EVR)
-	evxor   evr6, evr6, evr6
-	evmwumiaa evr6, evr6, evr6
-	li	r4,VCPU_ACC
-	evstddx evr6, r4, r3		/* save acc */
-	blr
-
-_GLOBAL(kvmppc_load_guest_spe)
-	cmpi	0,r3,0
-	beqlr-
-	li      r4,VCPU_ACC
-	evlddx  evr6,r4,r3
-	evmra   evr6,evr6		/* load acc */
-	REST_32EVRS(0, r4, r3, VCPU_EVR)
-	blr
-#endif
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
index ddcd896..dfc516b 100644
--- a/arch/powerpc/kvm/e500.c
+++ b/arch/powerpc/kvm/e500.c
@@ -37,16 +37,19 @@ void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
 void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 	kvmppc_e500_tlb_load(vcpu, cpu);
+
+	/*
+	 * Keep shadow MSR[SPE] consistent with thread MSR[SPE].
+	 * If guest SPE state is saved by host, we just diable guest SPE.
+	 */
+	if ((current->flags & PF_VCPU) &&
+			!(current->thread.regs->msr & MSR_SPE))
+		vcpu->arch.shadow_msr &= ~MSR_SPE;
 }
 
 void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
 {
 	kvmppc_e500_tlb_put(vcpu);
-
-#ifdef CONFIG_SPE
-	if (vcpu->arch.shadow_msr & MSR_SPE)
-		kvmppc_vcpu_disable_spe(vcpu);
-#endif
 }
 
 int kvmppc_core_check_processor_compat(void)
diff --git a/arch/powerpc/kvm/timing.c b/arch/powerpc/kvm/timing.c
index 07b6110..c9ce332 100644
--- a/arch/powerpc/kvm/timing.c
+++ b/arch/powerpc/kvm/timing.c
@@ -135,6 +135,11 @@ static const char *kvm_exit_names[__NUMBER_OF_KVM_EXIT_TYPES] = {
 	[USR_PR_INST] =             "USR_PR_INST",
 	[FP_UNAVAIL] =              "FP_UNAVAIL",
 	[DEBUG_EXITS] =             "DEBUG",
+#ifdef CONFIG_SPE
+	[SPE_UNAVAIL] =              "SPE_UNAVAIL",
+	[SPE_FP_DATA] =              "SPE_FP_DATA",
+	[SPE_FP_ROUND] =             "SPE_FP_ROUND",
+#endif
 	[TIMEINGUEST] =             "TIMEINGUEST"
 };
 
diff --git a/arch/powerpc/kvm/timing.h b/arch/powerpc/kvm/timing.h
index 8167d42..712ab3a 100644
--- a/arch/powerpc/kvm/timing.h
+++ b/arch/powerpc/kvm/timing.h
@@ -93,6 +93,17 @@ static inline void kvmppc_account_exit_stat(struct kvm_vcpu *vcpu, int type)
 	case SIGNAL_EXITS:
 		vcpu->stat.signal_exits++;
 		break;
+#ifdef CONFIG_SPE
+	case SPE_UNAVAIL:
+		vcpu->stat.spe_unavail++;
+		break;
+	case SPE_FP_DATA:
+		vcpu->stat.spe_fp_data++;
+		break;
+	case SPE_FP_ROUND:
+		vcpu->stat.spe_fp_round++;
+		break;
+#endif
 	}
 }
 
-- 
1.6.4

^ permalink raw reply related

* Re: [PATCH 20/21] Introduce struct eeh_stats for EEH - Reworked
From: Gavin Shan @ 2012-03-01  1:47 UTC (permalink / raw)
  To: linuxppc-dev
In-Reply-To: <1330409051-8941-21-git-send-email-shangw@linux.vnet.ibm.com>

With the original EEH implementation, the EEH global statistics
are maintained by individual global variables. That makes the
code a little hard to maintain.

The patch introduces extra struct eeh_stats for the EEH global
statistics so that it can be maintained in collective fashion.

It's the rework on the corresponding v5 patch. According to
the comments from David Laight, the EEH global statistics have
been changed for a litte bit so that they have fixed-type of
"u64". Also, the format used to print them has been changed to
"%llu" based on David's suggestion. Also, the output format of
EEH global statistics should be kept as intacted according to
Michael's suggestion that there might be tools parsing them.

Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/pseries/eeh.c |   65 ++++++++++++++++++++--------------
 1 files changed, 38 insertions(+), 27 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c
index 9b1fd0c..1d08cd7 100644
--- a/arch/powerpc/platforms/pseries/eeh.c
+++ b/arch/powerpc/platforms/pseries/eeh.c
@@ -102,14 +102,22 @@ static DEFINE_RAW_SPINLOCK(confirm_error_lock);
 #define EEH_PCI_REGS_LOG_LEN 4096
 static unsigned char pci_regs_buf[EEH_PCI_REGS_LOG_LEN];
 
-/* System monitoring statistics */
-static unsigned long no_device;
-static unsigned long no_dn;
-static unsigned long no_cfg_addr;
-static unsigned long ignored_check;
-static unsigned long total_mmio_ffs;
-static unsigned long false_positives;
-static unsigned long slot_resets;
+/*
+ * The struct is used to maintain the EEH global statistic
+ * information. Besides, the EEH global statistics will be
+ * exported to user space through procfs
+ */
+struct eeh_stats {
+	u64 no_device;		/* PCI device not found		*/
+	u64 no_dn;		/* OF node not found		*/
+	u64 no_cfg_addr;	/* Config address not found	*/
+	u64 ignored_check;	/* EEH check skipped		*/
+	u64 total_mmio_ffs;	/* Total EEH checks		*/
+	u64 false_positives;	/* Unnecessary EEH checks	*/
+	u64 slot_resets;	/* PE reset			*/
+};
+
+static struct eeh_stats eeh_stats;
 
 #define IS_BRIDGE(class_code) (((class_code)<<16) == PCI_BASE_CLASS_BRIDGE)
 
@@ -392,13 +400,13 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
 	int rc = 0;
 	const char *location;
 
-	total_mmio_ffs++;
+	eeh_stats.total_mmio_ffs++;
 
 	if (!eeh_subsystem_enabled)
 		return 0;
 
 	if (!dn) {
-		no_dn++;
+		eeh_stats.no_dn++;
 		return 0;
 	}
 	dn = eeh_find_device_pe(dn);
@@ -407,14 +415,14 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
 	/* Access to IO BARs might get this far and still not want checking. */
 	if (!(edev->mode & EEH_MODE_SUPPORTED) ||
 	    edev->mode & EEH_MODE_NOCHECK) {
-		ignored_check++;
+		eeh_stats.ignored_check++;
 		pr_debug("EEH: Ignored check (%x) for %s %s\n",
 			edev->mode, eeh_pci_name(dev), dn->full_name);
 		return 0;
 	}
 
 	if (!edev->config_addr && !edev->pe_config_addr) {
-		no_cfg_addr++;
+		eeh_stats.no_cfg_addr++;
 		return 0;
 	}
 
@@ -460,13 +468,13 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
 	    (ret == EEH_STATE_NOT_SUPPORT) ||
 	    (ret & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) ==
 	    (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) {
-		false_positives++;
+		eeh_stats.false_positives++;
 		edev->false_positives ++;
 		rc = 0;
 		goto dn_unlock;
 	}
 
-	slot_resets++;
+	eeh_stats.slot_resets++;
  
 	/* Avoid repeated reports of this failure, including problems
 	 * with other functions on this device, and functions under
@@ -513,7 +521,7 @@ unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned lon
 	addr = eeh_token_to_phys((unsigned long __force) token);
 	dev = pci_addr_cache_get_device(addr);
 	if (!dev) {
-		no_device++;
+		eeh_stats.no_device++;
 		return val;
 	}
 
@@ -1174,21 +1182,24 @@ static int proc_eeh_show(struct seq_file *m, void *v)
 {
 	if (0 == eeh_subsystem_enabled) {
 		seq_printf(m, "EEH Subsystem is globally disabled\n");
-		seq_printf(m, "eeh_total_mmio_ffs=%ld\n", total_mmio_ffs);
+		seq_printf(m, "eeh_total_mmio_ffs=%llu\n", eeh_stats.total_mmio_ffs);
 	} else {
 		seq_printf(m, "EEH Subsystem is enabled\n");
 		seq_printf(m,
-				"no device=%ld\n"
-				"no device node=%ld\n"
-				"no config address=%ld\n"
-				"check not wanted=%ld\n"
-				"eeh_total_mmio_ffs=%ld\n"
-				"eeh_false_positives=%ld\n"
-				"eeh_slot_resets=%ld\n",
-				no_device, no_dn, no_cfg_addr, 
-				ignored_check, total_mmio_ffs, 
-				false_positives,
-				slot_resets);
+				"no device=%llu\n"
+				"no device node=%llu\n"
+				"no config address=%llu\n"
+				"check not wanted=%llu\n"
+				"eeh_total_mmio_ffs=%llu\n"
+				"eeh_false_positives=%llu\n"
+				"eeh_slot_resets=%llu\n",
+				eeh_stats.no_device,
+				eeh_stats.no_dn,
+				eeh_stats.no_cfg_addr,
+				eeh_stats.ignored_check,
+				eeh_stats.total_mmio_ffs,
+				eeh_stats.false_positives,
+				eeh_stats.slot_resets);
 	}
 
 	return 0;
-- 
1.7.5.4

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox