LinuxPPC-Dev Archive on lore.kernel.org

LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed

* [PATCHv5 1/2] ppc64: perform proper max_bus_speed detection
From: Kleber Sacilotto de Souza @ 2013-05-03 22:43 UTC (permalink / raw)
  To: linuxppc-dev, dri-devel, Benjamin Herrenschmidt, Bjorn Helgaas,
	David Airlie, Michael Ellerman
  Cc: Brian King, Alex Deucher, Jerome Glisse,
	Thadeu Lima de Souza Cascardo, Kleber Sacilotto de Souza
In-Reply-To: <1367620993-27037-1-git-send-email-klebers@linux.vnet.ibm.com>

On pseries machines the detection for max_bus_speed should be done
through an OpenFirmware property. This patch adds a function to perform
this detection and a hook to perform dynamic adding of the function only
for pseries. This is done by overwriting the weak
pcibios_root_bridge_prepare function which is called by
pci_create_root_bus().

From: Lucas Kannebley Tavares <lucaskt@linux.vnet.ibm.com>
Signed-off-by: Kleber Sacilotto de Souza <klebers@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/machdep.h       |    3 ++
 arch/powerpc/kernel/pci-common.c         |    8 ++++
 arch/powerpc/platforms/pseries/pci.c     |   53 ++++++++++++++++++++++++++++++
 arch/powerpc/platforms/pseries/pseries.h |    4 ++
 arch/powerpc/platforms/pseries/setup.c   |    2 +
 5 files changed, 70 insertions(+), 0 deletions(-)

diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index 3f3f691..92386fc 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -29,6 +29,7 @@ struct rtc_time;
 struct file;
 struct pci_controller;
 struct kimage;
+struct pci_host_bridge;
 
 struct machdep_calls {
 	char		*name;
@@ -108,6 +109,8 @@ struct machdep_calls {
 	void		(*pcibios_fixup)(void);
 	int		(*pci_probe_mode)(struct pci_bus *);
 	void		(*pci_irq_fixup)(struct pci_dev *dev);
+	int		(*pcibios_root_bridge_prepare)(struct pci_host_bridge
+				*bridge);
 
 	/* To setup PHBs when using automatic OF platform driver for PCI */
 	int		(*pci_setup_phb)(struct pci_controller *host);
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index f325dc9..d5811d8 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -845,6 +845,14 @@ int pci_proc_domain(struct pci_bus *bus)
 	return 1;
 }
 
+int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge)
+{
+	if (ppc_md.pcibios_root_bridge_prepare)
+		return ppc_md.pcibios_root_bridge_prepare(bridge);
+
+	return 0;
+}
+
 /* This header fixup will do the resource fixup for all devices as they are
  * probed, but not for bridge ranges
  */
diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c
index 0b580f4..5f93856 100644
--- a/arch/powerpc/platforms/pseries/pci.c
+++ b/arch/powerpc/platforms/pseries/pci.c
@@ -108,3 +108,56 @@ static void fixup_winbond_82c105(struct pci_dev* dev)
 }
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_WINBOND, PCI_DEVICE_ID_WINBOND_82C105,
 			 fixup_winbond_82c105);
+
+int pseries_root_bridge_prepare(struct pci_host_bridge *bridge)
+{
+	struct device_node *dn, *pdn;
+	struct pci_bus *bus;
+	const uint32_t *pcie_link_speed_stats;
+
+	bus = bridge->bus;
+
+	dn = pcibios_get_phb_of_node(bus);
+	if (!dn)
+		return 0;
+
+	for (pdn = dn; pdn != NULL; pdn = of_get_next_parent(pdn)) {
+		pcie_link_speed_stats = (const uint32_t *) of_get_property(pdn,
+			"ibm,pcie-link-speed-stats", NULL);
+		if (pcie_link_speed_stats)
+			break;
+	}
+
+	of_node_put(pdn);
+
+	if (!pcie_link_speed_stats) {
+		pr_err("no ibm,pcie-link-speed-stats property\n");
+		return 0;
+	}
+
+	switch (pcie_link_speed_stats[0]) {
+	case 0x01:
+		bus->max_bus_speed = PCIE_SPEED_2_5GT;
+		break;
+	case 0x02:
+		bus->max_bus_speed = PCIE_SPEED_5_0GT;
+		break;
+	default:
+		bus->max_bus_speed = PCI_SPEED_UNKNOWN;
+		break;
+	}
+
+	switch (pcie_link_speed_stats[1]) {
+	case 0x01:
+		bus->cur_bus_speed = PCIE_SPEED_2_5GT;
+		break;
+	case 0x02:
+		bus->cur_bus_speed = PCIE_SPEED_5_0GT;
+		break;
+	default:
+		bus->cur_bus_speed = PCI_SPEED_UNKNOWN;
+		break;
+	}
+
+	return 0;
+}
diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
index 8af71e4..c2a3a25 100644
--- a/arch/powerpc/platforms/pseries/pseries.h
+++ b/arch/powerpc/platforms/pseries/pseries.h
@@ -63,4 +63,8 @@ extern int dlpar_detach_node(struct device_node *);
 /* Snooze Delay, pseries_idle */
 DECLARE_PER_CPU(long, smt_snooze_delay);
 
+/* PCI root bridge prepare function override for pseries */
+struct pci_host_bridge;
+int pseries_root_bridge_prepare(struct pci_host_bridge *bridge);
+
 #endif /* _PSERIES_PSERIES_H */
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index ac932a9..c11c823 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -466,6 +466,8 @@ static void __init pSeries_setup_arch(void)
 	else
 		ppc_md.enable_pmcs = power4_enable_pmcs;
 
+	ppc_md.pcibios_root_bridge_prepare = pseries_root_bridge_prepare;
+
 	if (firmware_has_feature(FW_FEATURE_SET_MODE)) {
 		long rc;
 		if ((rc = pSeries_enable_reloc_on_exc()) != H_SUCCESS) {
-- 
1.7.1

^ permalink raw reply related

* [PATCHv5 2/2] radeon: use max_bus_speed to activate gen2 speeds
From: Kleber Sacilotto de Souza @ 2013-05-03 22:43 UTC (permalink / raw)
  To: linuxppc-dev, dri-devel, Benjamin Herrenschmidt, Bjorn Helgaas,
	David Airlie, Michael Ellerman
  Cc: Brian King, Alex Deucher, Jerome Glisse,
	Thadeu Lima de Souza Cascardo, Kleber Sacilotto de Souza
In-Reply-To: <1367620993-27037-1-git-send-email-klebers@linux.vnet.ibm.com>

radeon currently uses a drm function to get the speed capabilities for
the bus, drm_pcie_get_speed_cap_mask. However, this is a non-standard
method of performing this detection and this patch changes it to use
the max_bus_speed attribute.

From: Lucas Kannebley Tavares <lucaskt@linux.vnet.ibm.com>
Signed-off-by: Kleber Sacilotto de Souza <klebers@linux.vnet.ibm.com>
---
 drivers/gpu/drm/radeon/evergreen.c |   10 +++-------
 drivers/gpu/drm/radeon/r600.c      |    9 ++-------
 drivers/gpu/drm/radeon/rv770.c     |    9 ++-------
 3 files changed, 7 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index 105bafb..3966696 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -4992,8 +4992,7 @@ void evergreen_fini(struct radeon_device *rdev)
 
 void evergreen_pcie_gen2_enable(struct radeon_device *rdev)
 {
-	u32 link_width_cntl, speed_cntl, mask;
-	int ret;
+	u32 link_width_cntl, speed_cntl;
 
 	if (radeon_pcie_gen2 == 0)
 		return;
@@ -5008,11 +5007,8 @@ void evergreen_pcie_gen2_enable(struct radeon_device *rdev)
 	if (ASIC_IS_X2(rdev))
 		return;
 
-	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
-	if (ret != 0)
-		return;
-
-	if (!(mask & DRM_PCIE_SPEED_50))
+	if ((rdev->pdev->bus->max_bus_speed != PCIE_SPEED_5_0GT) &&
+		(rdev->pdev->bus->max_bus_speed != PCIE_SPEED_8_0GT))
 		return;
 
 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index 1a08008..b45e648 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -4631,8 +4631,6 @@ static void r600_pcie_gen2_enable(struct radeon_device *rdev)
 {
 	u32 link_width_cntl, lanes, speed_cntl, training_cntl, tmp;
 	u16 link_cntl2;
-	u32 mask;
-	int ret;
 
 	if (radeon_pcie_gen2 == 0)
 		return;
@@ -4651,11 +4649,8 @@ static void r600_pcie_gen2_enable(struct radeon_device *rdev)
 	if (rdev->family <= CHIP_R600)
 		return;
 
-	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
-	if (ret != 0)
-		return;
-
-	if (!(mask & DRM_PCIE_SPEED_50))
+	if ((rdev->pdev->bus->max_bus_speed != PCIE_SPEED_5_0GT) &&
+		(rdev->pdev->bus->max_bus_speed != PCIE_SPEED_8_0GT))
 		return;
 
 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c
index 83f612a..a6af4aa 100644
--- a/drivers/gpu/drm/radeon/rv770.c
+++ b/drivers/gpu/drm/radeon/rv770.c
@@ -2113,8 +2113,6 @@ static void rv770_pcie_gen2_enable(struct radeon_device *rdev)
 {
 	u32 link_width_cntl, lanes, speed_cntl, tmp;
 	u16 link_cntl2;
-	u32 mask;
-	int ret;
 
 	if (radeon_pcie_gen2 == 0)
 		return;
@@ -2129,11 +2127,8 @@ static void rv770_pcie_gen2_enable(struct radeon_device *rdev)
 	if (ASIC_IS_X2(rdev))
 		return;
 
-	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
-	if (ret != 0)
-		return;
-
-	if (!(mask & DRM_PCIE_SPEED_50))
+	if ((rdev->pdev->bus->max_bus_speed != PCIE_SPEED_5_0GT) &&
+		(rdev->pdev->bus->max_bus_speed != PCIE_SPEED_8_0GT))
 		return;
 
 	DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
-- 
1.7.1

^ permalink raw reply related

* RE: [PATCH] KVM: PPC: Book3E 64: Fix IRQs warnings and hangs
From: Caraman Mihai Claudiu-B02008 @ 2013-05-03 22:59 UTC (permalink / raw)
  To: Wood Scott-B07421
  Cc: linuxppc-dev@lists.ozlabs.org, kvm@vger.kernel.org,
	kvm-ppc@vger.kernel.org
In-Reply-To: <1367618808.19391.11@snotra>

> -----Original Message-----
> From: Wood Scott-B07421
> Sent: Saturday, May 04, 2013 1:07 AM
> To: Caraman Mihai Claudiu-B02008
> Cc: Wood Scott-B07421; kvm-ppc@vger.kernel.org; kvm@vger.kernel.org;
> linuxppc-dev@lists.ozlabs.org
> Subject: Re: [PATCH] KVM: PPC: Book3E 64: Fix IRQs warnings and hangs
>=20
> I replaced the two calls to kvmppc_lazy_ee_enable() with calls to
> hard_irq_disable(), and it seems to be working fine.

Please take a look on 'KVM: PPC64: booke: Hard disable interrupts when
entering guest' RFC thread and see if your solution addresses Ben's
comments.

>=20
> > > > > Where is the arch_local_irq_restore() instance you're talking
> > about?
> > > >
> > > > ./arch/power/kernel/irq.c
> > >
> > > I meant the caller. :-P
> >
> > ./arch/powerpc/include/asm/hw_irq.h
> >
> >   55static inline unsigned long arch_local_irq_disable(void)
> >   56{
> >   57        unsigned long flags, zero;
> >   58
> >   59        asm volatile(
> >   60                "li %1,0; lbz %0,%2(13); stb %1,%2(13)"
> >   61                : "=3Dr" (flags), "=3D&r" (zero)
> >   62                : "i" (offsetof(struct paca_struct, soft_enabled))
> >   63                : "memory");
> >   64
> >   65        return flags;
> >   66}
> >   67
> >   68extern void arch_local_irq_restore(unsigned long);
> >   69
> >   70static inline void arch_local_irq_enable(void)
> >   71{
> >   72        arch_local_irq_restore(1);
> >   73}
>=20
> Sigh.  I meant the real caller, who's calling local_irq_restore().

I'm not sure what you mean, arch_local_irq_restore() is called indirectly
by local_irq_enable() in our case from handle_exit().

-Mike

^ permalink raw reply

* Re: [PATCHv5 0/2] Speed Cap fixes for ppc64
From: Benjamin Herrenschmidt @ 2013-05-03 23:01 UTC (permalink / raw)
  To: Kleber Sacilotto de Souza
  Cc: David Airlie, dri-devel, Brian King, Jerome Glisse,
	Thadeu Lima de Souza Cascardo, Bjorn Helgaas, Alex Deucher,
	linuxppc-dev
In-Reply-To: <1367620993-27037-1-git-send-email-klebers@linux.vnet.ibm.com>

On Fri, 2013-05-03 at 19:43 -0300, Kleber Sacilotto de Souza wrote:

> This patch series does:
>   1. max_bus_speed is used to set the device to gen2 speeds
>   2. on power there's no longer a conflict between the pseries call and other
> architectures, because the overwrite is done via a ppc_md hook
>   3. radeon is using bus->max_bus_speed instead of drm_pcie_get_speed_cap_mask
> for gen2 capability detection
> 
> The first patch consists of some architecture changes, such as adding a hook on
> powerpc for pci_root_bridge_prepare, so that pseries will initialize it to a
> function, while all other architectures get a NULL pointer. So that whenever
> pci_create_root_bus is called, we'll get max_bus_speed properly setup from
> OpenFirmware.
> 
> The second patch consists of simple radeon changes not to call
> drm_get_pcie_speed_cap_mask anymore. I assume that on x86 machines,
> the max_bus_speed property will be properly set already.

So I'm ok with the approach now and I might even put the powerpc patch
in for 3.10 since arguably we are fixing a nasty bug (uninitialized
max_bus_speed).

David, what's your feeling about the radeon change ? It would be nice if
that could go in soon for various distro targets :-) On the other hand
I'm not going to be pushy if you are not comfortable with it.

Cheers,
Ben.

^ permalink raw reply

* [PATCH] arch/powerpc: advertise ISA2.07, HTM, DSCR, EBB and ISEL bits in HWCAP2
From: Nishanth Aravamudan @ 2013-05-03 23:19 UTC (permalink / raw)
  To: benh
  Cc: Michael Neuling, Michael R Meissner, sjmunroe, bergner,
	Ryan Arnold, linuxppc-dev

Now that we have AT_HWCAP2 support, start exposing some of the new
POWER8 features via it.

Signed-off-by: Nishanth Aravamudan <nacc@us.ibm.com>

---
Note: there are, I think, some Freescale processors that also should be
updated to indicate they support ISEL, but I don't know which ones.
Since this is a new feature bit (and vector), it seems like we can fix
that up in a follow-on patch. Also, this is my first patch trying to
manipulate these bits, so please let me know if I'm doing something
wrong (for instance, I don't see any particular order to the bits in
PPC_FEATURE_*)

diff --git a/arch/powerpc/include/uapi/asm/cputable.h b/arch/powerpc/include/uapi/asm/cputable.h
index ed9dd81..78db4e2 100644
--- a/arch/powerpc/include/uapi/asm/cputable.h
+++ b/arch/powerpc/include/uapi/asm/cputable.h
@@ -1,6 +1,7 @@
 #ifndef _UAPI__ASM_POWERPC_CPUTABLE_H
 #define _UAPI__ASM_POWERPC_CPUTABLE_H
 
+/* in AT_HWCAP */
 #define PPC_FEATURE_32			0x80000000
 #define PPC_FEATURE_64			0x40000000
 #define PPC_FEATURE_601_INSTR		0x20000000
@@ -33,4 +34,11 @@
 #define PPC_FEATURE_TRUE_LE		0x00000002
 #define PPC_FEATURE_PPC_LE		0x00000001
 
+/* in AT_HWCAP2 */
+#define PPC_FEATURE2_ARCH_2_07		0x80000000
+#define PPC_FEATURE2_HTM		0x40000000
+#define PPC_FEATURE2_DSCR		0x20000000
+#define PPC_FEATURE2_EBB		0x10000000
+#define PPC_FEATURE2_ISEL		0x08000000
+
 #endif /* _UAPI__ASM_POWERPC_CPUTABLE_H */
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index ae9f433..871c741 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -102,6 +102,9 @@ extern void __restore_cpu_e6500(void);
 				 PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP | \
 				 PPC_FEATURE_TRUE_LE | \
 				 PPC_FEATURE_PSERIES_PERFMON_COMPAT)
+#define COMMON_USER2_POWER8	(PPC_FEATURE2_ARCH_2_07 | PPC_FEATURE2_HTM | \
+				 PPC_FEATURE2_DSCR | PPC_FEATURE2_EBB | \
+				 PPC_FEATURE2_ISEL)
 #define COMMON_USER_PA6T	(COMMON_USER_PPC64 | PPC_FEATURE_PA6T |\
 				 PPC_FEATURE_TRUE_LE | \
 				 PPC_FEATURE_HAS_ALTIVEC_COMP)
@@ -443,6 +446,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_name		= "POWER8 (architected)",
 		.cpu_features		= CPU_FTRS_POWER8,
 		.cpu_user_features	= COMMON_USER_POWER8,
+		.cpu_user_features2	= COMMON_USER2_POWER8,
 		.mmu_features		= MMU_FTRS_POWER8,
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
@@ -492,6 +496,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_name		= "POWER8 (raw)",
 		.cpu_features		= CPU_FTRS_POWER8,
 		.cpu_user_features	= COMMON_USER_POWER8,
+		.cpu_user_features2	= COMMON_USER2_POWER8,
 		.mmu_features		= MMU_FTRS_POWER8,
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,

^ permalink raw reply related

* Re: [PATCH] arch/powerpc: advertise ISA2.07, HTM, DSCR, EBB and ISEL bits in HWCAP2
From: Benjamin Herrenschmidt @ 2013-05-03 23:23 UTC (permalink / raw)
  To: Nishanth Aravamudan
  Cc: Michael Neuling, Michael R Meissner, sjmunroe, bergner,
	Ryan Arnold, linuxppc-dev
In-Reply-To: <20130503231933.GA29436@linux.vnet.ibm.com>

On Fri, 2013-05-03 at 16:19 -0700, Nishanth Aravamudan wrote:
> +/* in AT_HWCAP2 */
> +#define PPC_FEATURE2_ARCH_2_07         0x80000000
> +#define PPC_FEATURE2_HTM               0x40000000
> +#define PPC_FEATURE2_DSCR              0x20000000
> +#define PPC_FEATURE2_EBB               0x10000000
> +#define PPC_FEATURE2_ISEL              0x08000000

Should we "adjust" (ie filter out) some of these based
on CONFIG_ options (such as transactional memory enabled,
EBB supported by the hypervisor, etc...) ?

Cheers,
Ben.

^ permalink raw reply

* Re: [PATCH] arch/powerpc: advertise ISA2.07, HTM, DSCR, EBB and ISEL bits in HWCAP2
From: Michael R Meissner @ 2013-05-03 23:26 UTC (permalink / raw)
  To: Nishanth Aravamudan
  Cc: linuxppc-dev, Steve Munroe, Peter Bergner, Ryan Arnold,
	Michael Neuling
In-Reply-To: <20130503231933.GA29436@linux.vnet.ibm.com>

[-- Attachment #1: Type: text/plain, Size: 4193 bytes --]

According to the GCC sources, ISEL is enabled by default for the 8540, 
8548, e500mc, e500mc64, e6500 processors.



From:
Nishanth Aravamudan <nacc@linux.vnet.ibm.com>
To:
benh@kernel.crashing.org, 
Cc:
Steve Munroe/Rochester/IBM@IBMUS, Peter Bergner/Rochester/IBM@IBMUS, 
Michael R Meissner/Cambridge/IBM@IBMUS, Michael Neuling 
<michael.neuling@au1.ibm.com>, linuxppc-dev@lists.ozlabs.org, Ryan 
Arnold/Rochester/IBM@IBMUS
Date:
05/03/2013 07:19 PM
Subject:
[PATCH] arch/powerpc: advertise ISA2.07, HTM, DSCR, EBB and ISEL bits in 
HWCAP2



Now that we have AT_HWCAP2 support, start exposing some of the new
POWER8 features via it.

Signed-off-by: Nishanth Aravamudan <nacc@us.ibm.com>

---
Note: there are, I think, some Freescale processors that also should be
updated to indicate they support ISEL, but I don't know which ones.
Since this is a new feature bit (and vector), it seems like we can fix
that up in a follow-on patch. Also, this is my first patch trying to
manipulate these bits, so please let me know if I'm doing something
wrong (for instance, I don't see any particular order to the bits in
PPC_FEATURE_*)

diff --git a/arch/powerpc/include/uapi/asm/cputable.h 
b/arch/powerpc/include/uapi/asm/cputable.h
index ed9dd81..78db4e2 100644
--- a/arch/powerpc/include/uapi/asm/cputable.h
+++ b/arch/powerpc/include/uapi/asm/cputable.h
@@ -1,6 +1,7 @@
 #ifndef _UAPI__ASM_POWERPC_CPUTABLE_H
 #define _UAPI__ASM_POWERPC_CPUTABLE_H
 
+/* in AT_HWCAP */
 #define PPC_FEATURE_32 0x80000000
 #define PPC_FEATURE_64 0x40000000
 #define PPC_FEATURE_601_INSTR                           0x20000000
@@ -33,4 +34,11 @@
 #define PPC_FEATURE_TRUE_LE                             0x00000002
 #define PPC_FEATURE_PPC_LE                              0x00000001
 
+/* in AT_HWCAP2 */
+#define PPC_FEATURE2_ARCH_2_07                          0x80000000
+#define PPC_FEATURE2_HTM                                0x40000000
+#define PPC_FEATURE2_DSCR                               0x20000000
+#define PPC_FEATURE2_EBB                                0x10000000
+#define PPC_FEATURE2_ISEL                               0x08000000
+
 #endif /* _UAPI__ASM_POWERPC_CPUTABLE_H */
diff --git a/arch/powerpc/kernel/cputable.c 
b/arch/powerpc/kernel/cputable.c
index ae9f433..871c741 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -102,6 +102,9 @@ extern void __restore_cpu_e6500(void);
 PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP | \
 PPC_FEATURE_TRUE_LE | \
 PPC_FEATURE_PSERIES_PERFMON_COMPAT)
+#define COMMON_USER2_POWER8             (PPC_FEATURE2_ARCH_2_07 | 
PPC_FEATURE2_HTM | \
+ PPC_FEATURE2_DSCR | PPC_FEATURE2_EBB | \
+ PPC_FEATURE2_ISEL)
 #define COMMON_USER_PA6T                (COMMON_USER_PPC64 | 
PPC_FEATURE_PA6T |\
 PPC_FEATURE_TRUE_LE | \
 PPC_FEATURE_HAS_ALTIVEC_COMP)
@@ -443,6 +446,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
                                 .cpu_name                               = 
"POWER8 (architected)",
                                 .cpu_features                           = 
CPU_FTRS_POWER8,
                                 .cpu_user_features              = 
COMMON_USER_POWER8,
+                                .cpu_user_features2             = 
COMMON_USER2_POWER8,
                                 .mmu_features                           = 
MMU_FTRS_POWER8,
                                 .icache_bsize                           = 
128,
                                 .dcache_bsize                           = 
128,
@@ -492,6 +496,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
                                 .cpu_name                               = 
"POWER8 (raw)",
                                 .cpu_features                           = 
CPU_FTRS_POWER8,
                                 .cpu_user_features              = 
COMMON_USER_POWER8,
+                                .cpu_user_features2             = 
COMMON_USER2_POWER8,
                                 .mmu_features                           = 
MMU_FTRS_POWER8,
                                 .icache_bsize                           = 
128,
                                 .dcache_bsize                           = 
128,



[-- Attachment #2: Type: text/html, Size: 10342 bytes --]

^ permalink raw reply related

* Re: [PATCH] KVM: PPC: Book3E 64: Fix IRQs warnings and hangs
From: Scott Wood @ 2013-05-03 23:30 UTC (permalink / raw)
  To: Caraman Mihai Claudiu-B02008
  Cc: Wood Scott-B07421, linuxppc-dev@lists.ozlabs.org,
	kvm@vger.kernel.org, kvm-ppc@vger.kernel.org
In-Reply-To: <300B73AA675FCE4A93EB4FC1D42459FF3E9D81@039-SN2MPN1-013.039d.mgd.msft.net>

On 05/03/2013 05:59:32 PM, Caraman Mihai Claudiu-B02008 wrote:
> > -----Original Message-----
> > From: Wood Scott-B07421
> > Sent: Saturday, May 04, 2013 1:07 AM
> > To: Caraman Mihai Claudiu-B02008
> > Cc: Wood Scott-B07421; kvm-ppc@vger.kernel.org; kvm@vger.kernel.org;
> > linuxppc-dev@lists.ozlabs.org
> > Subject: Re: [PATCH] KVM: PPC: Book3E 64: Fix IRQs warnings and =20
> hangs
> >
> > I replaced the two calls to kvmppc_lazy_ee_enable() with calls to
> > hard_irq_disable(), and it seems to be working fine.
>=20
> Please take a look on 'KVM: PPC64: booke: Hard disable interrupts when
> entering guest' RFC thread and see if your solution addresses Ben's
> comments.

My original one didn't (there was a race if an interrupt comes in =20
between soft-disabling and hard-disabling, it wouldn't be received =20
until the guest exits for some other reason).

Instead, I turned the local_irq_disable() into hard_irq_disable() plus =20
trace_hardirqs_off().  This worked without warnings.

-Scott=

^ permalink raw reply

* Re: [PATCH] arch/powerpc: advertise ISA2.07, HTM, DSCR, EBB and ISEL bits in HWCAP2
From: Nishanth Aravamudan @ 2013-05-03 23:40 UTC (permalink / raw)
  To: Benjamin Herrenschmidt
  Cc: Michael Neuling, Michael R Meissner, sjmunroe, bergner,
	Ryan Arnold, linuxppc-dev
In-Reply-To: <1367623431.4389.132.camel@pasglop>

On 04.05.2013 [09:23:51 +1000], Benjamin Herrenschmidt wrote:
> On Fri, 2013-05-03 at 16:19 -0700, Nishanth Aravamudan wrote:
> > +/* in AT_HWCAP2 */
> > +#define PPC_FEATURE2_ARCH_2_07         0x80000000
> > +#define PPC_FEATURE2_HTM               0x40000000
> > +#define PPC_FEATURE2_DSCR              0x20000000
> > +#define PPC_FEATURE2_EBB               0x10000000
> > +#define PPC_FEATURE2_ISEL              0x08000000
> 
> Should we "adjust" (ie filter out) some of these based
> on CONFIG_ options (such as transactional memory enabled,
> EBB supported by the hypervisor, etc...) ?

Err, yeah, that seems reasonable :) However, it seems like glibc uses
these values rather directly so it knows what bits to check for each
feature. Therefore, it seems like it would be better to do the
ifdeffery/checking in the user in cputable.c, but that seems like it
could get quite complicated.

Would it be ok (I guess I'm asking Ryan & co. here) to have an #ifdef in
the definition that may or may not mean the bit is set in the aux
vector, but the bit, if set, would always be the same bit?

-Nish

^ permalink raw reply

* [PATCH] kvm/ppc/booke64: Hard disable interrupts when entering the guest
From: Scott Wood @ 2013-05-03 23:45 UTC (permalink / raw)
  To: Alexander Graf; +Cc: Scott Wood, Mihai Caraman, linuxppc-dev, kvm, kvm-ppc

kvmppc_lazy_ee_enable() was causing interrupts to be soft-enabled
(albeit hard-disabled) in kvmppc_restart_interrupt().  This led to
warnings, and possibly breakage if the interrupt state was later saved
and then restored (leading to interrupts being hard-and-soft enabled
when they should be at least soft-disabled).

Simply removing kvmppc_lazy_ee_enable() leaves interrupts only
soft-disabled when we enter the guest, but they will be hard-disabled
when we exit the guest -- without PACA_IRQ_HARD_DIS ever being set, so
the local_irq_enable() fails to hard-enable.

While we could just set PACA_IRQ_HARD_DIS after an exit to compensate,
instead hard-disable interrupts before entering the guest.  This way,
we won't have to worry about interactions if we take an interrupt
during the guest entry code.  While I don't see any obvious
interactions, it could change in the future (e.g. it would be bad if
the non-hv code were used on 64-bit or if 32-bit guest lazy interrupt
disabling, since the non-hv code changes IVPR among other things).

Signed-off-by: Scott Wood <scottwood@freescale.com>
Cc: Mihai Caraman <mihai.caraman@freescale.com>
---
 arch/powerpc/kvm/booke.c |    9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index ecbe908..b216821 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -666,14 +666,14 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 		return -EINVAL;
 	}

-	local_irq_disable();
+	hard_irq_disable();
+	trace_hardirqs_off();
 	s = kvmppc_prepare_to_enter(vcpu);
 	if (s <= 0) {
 		local_irq_enable();
 		ret = s;
 		goto out;
 	}
-	kvmppc_lazy_ee_enable();

 	kvm_guest_enter();

@@ -1150,13 +1150,12 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	 * aren't already exiting to userspace for some other reason.
 	 */
 	if (!(r & RESUME_HOST)) {
-		local_irq_disable();
+		hard_irq_disable();
+		trace_hardirqs_off();
 		s = kvmppc_prepare_to_enter(vcpu);
 		if (s <= 0) {
 			local_irq_enable();
 			r = (s << 2) | RESUME_HOST | (r & RESUME_FLAG_NV);
-		} else {
-			kvmppc_lazy_ee_enable();
 		}
 	}

-- 
1.7.10.4

^ permalink raw reply related

* Re: [PATCH] kvm/ppc/booke64: Hard disable interrupts when entering the guest
From: Scott Wood @ 2013-05-03 23:53 UTC (permalink / raw)
  To: Scott Wood; +Cc: Mihai Caraman, linuxppc-dev, Alexander Graf, kvm-ppc, kvm
In-Reply-To: <1367624723-22456-1-git-send-email-scottwood@freescale.com>

On 05/03/2013 06:45:23 PM, Scott Wood wrote:
> While we could just set PACA_IRQ_HARD_DIS after an exit to compensate,
> instead hard-disable interrupts before entering the guest.  This way,
> we won't have to worry about interactions if we take an interrupt
> during the guest entry code.  While I don't see any obvious
> interactions, it could change in the future (e.g. it would be bad if
> the non-hv code were used on 64-bit or if 32-bit guest lazy interrupt
> disabling, since the non-hv code changes IVPR among other things).

s/32-bit guest lazy/32-bit gets lazy/

-Scott=

^ permalink raw reply

* Re: [PATCH] arch/powerpc: advertise ISA2.07, HTM, DSCR, EBB and ISEL bits in HWCAP2
From: Michael Neuling @ 2013-05-04  0:04 UTC (permalink / raw)
  To: Nishanth Aravamudan
  Cc: Michael R Meissner, Steve Munroe, Peter Bergner, Ryan Arnold,
	linuxppc-dev
In-Reply-To: <20130503231933.GA29436@linux.vnet.ibm.com>

Nishanth Aravamudan <nacc@linux.vnet.ibm.com> wrote:

> Now that we have AT_HWCAP2 support, start exposing some of the new
> POWER8 features via it.
> 
> Signed-off-by: Nishanth Aravamudan <nacc@us.ibm.com>

A few points:

We need a TAR bit as well, although this could be covered in 207?

For TM we need to turn it off if CONFIG_PPC_TRANSACTIONAL_MEM is not
set.  Look at PPC_FEATURE_HAS_ALTIVEC_COMP for how we do that and follow
the bouncing ball.  

Please add ISEL on other processors.

Please add DSCR on other processors.

EBB can be reserved, but there is no enablement at this point so don't
turn it on yet.  We'll turn it on when we send the perf API.

Mikey

> ---
> Note: there are, I think, some Freescale processors that also should be
> updated to indicate they support ISEL, but I don't know which ones.
> Since this is a new feature bit (and vector), it seems like we can fix
> that up in a follow-on patch. Also, this is my first patch trying to
> manipulate these bits, so please let me know if I'm doing something
> wrong (for instance, I don't see any particular order to the bits in
> PPC_FEATURE_*)
> 
> diff --git a/arch/powerpc/include/uapi/asm/cputable.h b/arch/powerpc/include/uapi/asm/cputable.h
> index ed9dd81..78db4e2 100644
> --- a/arch/powerpc/include/uapi/asm/cputable.h
> +++ b/arch/powerpc/include/uapi/asm/cputable.h
> @@ -1,6 +1,7 @@
>  #ifndef _UAPI__ASM_POWERPC_CPUTABLE_H
>  #define _UAPI__ASM_POWERPC_CPUTABLE_H
>  
> +/* in AT_HWCAP */
>  #define PPC_FEATURE_32			0x80000000
>  #define PPC_FEATURE_64			0x40000000
>  #define PPC_FEATURE_601_INSTR		0x20000000
> @@ -33,4 +34,11 @@
>  #define PPC_FEATURE_TRUE_LE		0x00000002
>  #define PPC_FEATURE_PPC_LE		0x00000001
>  
> +/* in AT_HWCAP2 */
> +#define PPC_FEATURE2_ARCH_2_07		0x80000000
> +#define PPC_FEATURE2_HTM		0x40000000
> +#define PPC_FEATURE2_DSCR		0x20000000
> +#define PPC_FEATURE2_EBB		0x10000000
> +#define PPC_FEATURE2_ISEL		0x08000000
> +
>  #endif /* _UAPI__ASM_POWERPC_CPUTABLE_H */
> diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
> index ae9f433..871c741 100644
> --- a/arch/powerpc/kernel/cputable.c
> +++ b/arch/powerpc/kernel/cputable.c
> @@ -102,6 +102,9 @@ extern void __restore_cpu_e6500(void);
>  				 PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP | \
>  				 PPC_FEATURE_TRUE_LE | \
>  				 PPC_FEATURE_PSERIES_PERFMON_COMPAT)
> +#define COMMON_USER2_POWER8	(PPC_FEATURE2_ARCH_2_07 | PPC_FEATURE2_HTM | \
> +				 PPC_FEATURE2_DSCR | PPC_FEATURE2_EBB | \
> +				 PPC_FEATURE2_ISEL)
>  #define COMMON_USER_PA6T	(COMMON_USER_PPC64 | PPC_FEATURE_PA6T |\
>  				 PPC_FEATURE_TRUE_LE | \
>  				 PPC_FEATURE_HAS_ALTIVEC_COMP)
> @@ -443,6 +446,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
>  		.cpu_name		= "POWER8 (architected)",
>  		.cpu_features		= CPU_FTRS_POWER8,
>  		.cpu_user_features	= COMMON_USER_POWER8,
> +		.cpu_user_features2	= COMMON_USER2_POWER8,
>  		.mmu_features		= MMU_FTRS_POWER8,
>  		.icache_bsize		= 128,
>  		.dcache_bsize		= 128,
> @@ -492,6 +496,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
>  		.cpu_name		= "POWER8 (raw)",
>  		.cpu_features		= CPU_FTRS_POWER8,
>  		.cpu_user_features	= COMMON_USER_POWER8,
> +		.cpu_user_features2	= COMMON_USER2_POWER8,
>  		.mmu_features		= MMU_FTRS_POWER8,
>  		.icache_bsize		= 128,
>  		.dcache_bsize		= 128,
> 

^ permalink raw reply

* [PATCH v2 1/4] powerpc/cputable: reserve bits in HWCAP2 for new features
From: Nishanth Aravamudan @ 2013-05-04  0:47 UTC (permalink / raw)
  To: Michael Neuling
  Cc: Michael R Meissner, Steve Munroe, Peter Bergner, Ryan Arnold,
	linuxppc-dev
In-Reply-To: <2889.1367625894@ale.ozlabs.ibm.com>

Also, make HTM's presence dependent on the .config option.
    
Signed-off-by: Nishanth Aravamudan <nacc@us.ibm.com>

---
Changes since v1:
 - Add TAR.
 - Make HTM config dependent.

diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index fcc54ad..26807e5 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -224,8 +224,10 @@ extern const char *powerpc_base_platform;
 /* We only set the TM feature if the kernel was compiled with TM supprt */
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 #define CPU_FTR_TM_COMP		CPU_FTR_TM
+#define PPC_FEATURE2_HTM_COMP	PPC_FEATURE2_HTM
 #else
 #define CPU_FTR_TM_COMP		0
+#define PPC_FEATURE2_HTM_COMP	0
 #endif
 
 /* We need to mark all pages as being coherent if we're SMP or we have a
diff --git a/arch/powerpc/include/uapi/asm/cputable.h b/arch/powerpc/include/uapi/asm/cputable.h
index ed9dd81..5b76579 100644
--- a/arch/powerpc/include/uapi/asm/cputable.h
+++ b/arch/powerpc/include/uapi/asm/cputable.h
@@ -1,6 +1,7 @@
 #ifndef _UAPI__ASM_POWERPC_CPUTABLE_H
 #define _UAPI__ASM_POWERPC_CPUTABLE_H
 
+/* in AT_HWCAP */
 #define PPC_FEATURE_32			0x80000000
 #define PPC_FEATURE_64			0x40000000
 #define PPC_FEATURE_601_INSTR		0x20000000
@@ -33,4 +34,12 @@
 #define PPC_FEATURE_TRUE_LE		0x00000002
 #define PPC_FEATURE_PPC_LE		0x00000001
 
+/* in AT_HWCAP2 */
+#define PPC_FEATURE2_ARCH_2_07		0x80000000
+#define PPC_FEATURE2_HTM		0x40000000
+#define PPC_FEATURE2_DSCR		0x20000000
+#define PPC_FEATURE2_EBB		0x10000000
+#define PPC_FEATURE2_ISEL		0x08000000
+#define PPC_FEATURE2_TAR		0x04000000
+
 #endif /* _UAPI__ASM_POWERPC_CPUTABLE_H */

^ permalink raw reply related

* [PATCH v2 2/4] powerpc/cputable: advertise DSCR support on P7/P7+
From: Nishanth Aravamudan @ 2013-05-04  0:48 UTC (permalink / raw)
  To: Michael Neuling
  Cc: Michael R Meissner, Steve Munroe, Peter Bergner, Ryan Arnold,
	linuxppc-dev
In-Reply-To: <20130504004756.GA3532@linux.vnet.ibm.com>

Signed-off-by: Nishanth Aravamudan <nacc@us.ibm.com>

diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index ae9f433..a792157 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -98,6 +98,7 @@ extern void __restore_cpu_e6500(void);
 				 PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP | \
 				 PPC_FEATURE_TRUE_LE | \
 				 PPC_FEATURE_PSERIES_PERFMON_COMPAT)
+#define COMMON_USER2_POWER7	(PPC_FEATURE2_DSCR)
 #define COMMON_USER_POWER8	(COMMON_USER_PPC64 | PPC_FEATURE_ARCH_2_06 |\
 				 PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP | \
 				 PPC_FEATURE_TRUE_LE | \
@@ -428,6 +429,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_name		= "POWER7 (architected)",
 		.cpu_features		= CPU_FTRS_POWER7,
 		.cpu_user_features	= COMMON_USER_POWER7,
+		.cpu_user_features2	= COMMON_USER2_POWER7,
 		.mmu_features		= MMU_FTRS_POWER7,
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
@@ -458,6 +460,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_name		= "POWER7 (raw)",
 		.cpu_features		= CPU_FTRS_POWER7,
 		.cpu_user_features	= COMMON_USER_POWER7,
+		.cpu_user_features2	= COMMON_USER2_POWER7,
 		.mmu_features		= MMU_FTRS_POWER7,
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
@@ -475,6 +478,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_name		= "POWER7+ (raw)",
 		.cpu_features		= CPU_FTRS_POWER7,
 		.cpu_user_features	= COMMON_USER_POWER7,
+		.cpu_user_features	= COMMON_USER2_POWER7,
 		.mmu_features		= MMU_FTRS_POWER7,
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,

^ permalink raw reply related

* [PATCH v2 3/4] powerpc/cputable: advertise ISEL support on appropriate embedded processors
From: Nishanth Aravamudan @ 2013-05-04  0:49 UTC (permalink / raw)
  To: Michael Neuling
  Cc: Michael R Meissner, Steve Munroe, Peter Bergner, Ryan Arnold,
	linuxppc-dev
In-Reply-To: <20130504004838.GB3532@linux.vnet.ibm.com>

Signed-off-by: Nishanth Aravamudan <nacc@us.ibm.com>

diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index a792157..b224cd5 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -1999,6 +1999,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_user_features	= COMMON_USER_BOOKE |
 			PPC_FEATURE_HAS_SPE_COMP |
 			PPC_FEATURE_HAS_EFP_SINGLE_COMP,
+		.cpu_user_features2	= PPC_FEATURE2_ISEL,
 		.mmu_features		= MMU_FTR_TYPE_FSL_E,
 		.icache_bsize		= 32,
 		.dcache_bsize		= 32,
@@ -2018,6 +2019,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 			PPC_FEATURE_HAS_SPE_COMP |
 			PPC_FEATURE_HAS_EFP_SINGLE_COMP |
 			PPC_FEATURE_HAS_EFP_DOUBLE_COMP,
+		.cpu_user_features2	= PPC_FEATURE2_ISEL,
 		.mmu_features		= MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS,
 		.icache_bsize		= 32,
 		.dcache_bsize		= 32,
@@ -2034,6 +2036,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_name		= "e500mc",
 		.cpu_features		= CPU_FTRS_E500MC,
 		.cpu_user_features	= COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+		.cpu_user_features2	= PPC_FEATURE2_ISEL,
 		.mmu_features		= MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS |
 			MMU_FTR_USE_TLBILX,
 		.icache_bsize		= 64,
@@ -2073,6 +2076,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_features		= CPU_FTRS_E6500,
 		.cpu_user_features	= COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU |
 			PPC_FEATURE_HAS_ALTIVEC_COMP,
+		.cpu_user_features2	= PPC_FEATURE2_ISEL,
 		.mmu_features		= MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS |
 			MMU_FTR_USE_TLBILX,
 		.icache_bsize		= 64,

^ permalink raw reply related

* [PATCH v2 4/4] powerpc/cputable: advertise support for ISEL/HTM/DSCR/TAR on POWER8
From: Nishanth Aravamudan @ 2013-05-04  0:49 UTC (permalink / raw)
  To: Michael Neuling
  Cc: Michael R Meissner, Steve Munroe, Peter Bergner, Ryan Arnold,
	linuxppc-dev
In-Reply-To: <20130504004912.GC3532@linux.vnet.ibm.com>

Signed-off-by: Nishanth Aravamudan <nacc@us.ibm.com>

---
Changes since v1:
 - Add TAR.
 - Use config dependent symbol for HTM.

diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index b224cd5..79cde71 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -103,6 +103,9 @@ extern void __restore_cpu_e6500(void);
 				 PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP | \
 				 PPC_FEATURE_TRUE_LE | \
 				 PPC_FEATURE_PSERIES_PERFMON_COMPAT)
+#define COMMON_USER2_POWER8	(PPC_FEATURE2_ARCH_2_07 | \
+				 PPC_FEATURE2_HTM_COMP | PPC_FEATURE2_DSCR | \
+				 PPC_FEATURE2_ISEL | PPC_FEATURE2_TAR)
 #define COMMON_USER_PA6T	(COMMON_USER_PPC64 | PPC_FEATURE_PA6T |\
 				 PPC_FEATURE_TRUE_LE | \
 				 PPC_FEATURE_HAS_ALTIVEC_COMP)
@@ -445,6 +448,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_name		= "POWER8 (architected)",
 		.cpu_features		= CPU_FTRS_POWER8,
 		.cpu_user_features	= COMMON_USER_POWER8,
+		.cpu_user_features2	= COMMON_USER2_POWER8,
 		.mmu_features		= MMU_FTRS_POWER8,
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
@@ -496,6 +500,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_name		= "POWER8 (raw)",
 		.cpu_features		= CPU_FTRS_POWER8,
 		.cpu_user_features	= COMMON_USER_POWER8,
+		.cpu_user_features2	= COMMON_USER2_POWER8,
 		.mmu_features		= MMU_FTRS_POWER8,
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,

^ permalink raw reply related

* [PATCH v2] net/eth/ibmveth: Fixup retrieval of MAC address
From: Benjamin Herrenschmidt @ 2013-05-04  3:19 UTC (permalink / raw)
  To: netdev; +Cc: Ben Hutchings, linuxppc-dev, David Miller, David Gibson

Some ancient pHyp versions used to create a 8 bytes local-mac-address
property in the device-tree instead of a 6 bytes one for veth.

The Linux driver code to deal with that is an insane hack which also
happens to break with some choices of MAC addresses in qemu by testing
for a bit in the address rather than just looking at the size of the
property.

Sanitize this by doing the latter instead.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
CC: <stable@vger.kernel.org>
---

v2: Add missing "else" so the 8-bytes case doesn't trip the
    bad length error

 drivers/net/ethernet/ibm/ibmveth.c |   23 ++++++++++-------------
 1 file changed, 10 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c
index c859771..f46dbef 100644
--- a/drivers/net/ethernet/ibm/ibmveth.c
+++ b/drivers/net/ethernet/ibm/ibmveth.c
@@ -1324,7 +1324,7 @@ static const struct net_device_ops ibmveth_netdev_ops = {
 
 static int ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id)
 {
-	int rc, i;
+	int rc, i, mac_len;
 	struct net_device *netdev;
 	struct ibmveth_adapter *adapter;
 	unsigned char *mac_addr_p;
@@ -1334,11 +1334,19 @@ static int ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id)
 		dev->unit_address);
 
 	mac_addr_p = (unsigned char *)vio_get_attribute(dev, VETH_MAC_ADDR,
-							NULL);
+							&mac_len);
 	if (!mac_addr_p) {
 		dev_err(&dev->dev, "Can't find VETH_MAC_ADDR attribute\n");
 		return -EINVAL;
 	}
+	/* Workaround for old/broken pHyp */
+	if (mac_len == 8)
+		mac_addr_p += 2;
+	else if (mac_len != 6) {
+		dev_err(&dev->dev, "VETH_MAC_ADDR attribute wrong len %d\n",
+			mac_len);
+		return -EINVAL;
+	}
 
 	mcastFilterSize_p = (unsigned int *)vio_get_attribute(dev,
 						VETH_MCAST_FILTER_SIZE, NULL);
@@ -1363,17 +1371,6 @@ static int ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id)
 
 	netif_napi_add(netdev, &adapter->napi, ibmveth_poll, 16);
 
-	/*
-	 * Some older boxes running PHYP non-natively have an OF that returns
-	 * a 8-byte local-mac-address field (and the first 2 bytes have to be
-	 * ignored) while newer boxes' OF return a 6-byte field. Note that
-	 * IEEE 1275 specifies that local-mac-address must be a 6-byte field.
-	 * The RPA doc specifies that the first byte must be 10b, so we'll
-	 * just look for it to solve this 8 vs. 6 byte field issue
-	 */
-	if ((*mac_addr_p & 0x3) != 0x02)
-		mac_addr_p += 2;
-
 	adapter->mac_addr = 0;
 	memcpy(&adapter->mac_addr, mac_addr_p, 6);
 

^ permalink raw reply related

* [PATCH] powerpc/powerpnv: Properly handle failure starting CPUs
From: Benjamin Herrenschmidt @ 2013-05-04  3:21 UTC (permalink / raw)
  To: linuxppc-dev

If OPAL returns an error, propagate it upward rather than spinning
seconds waiting for a CPU that will never show up

Signed-off-by: Benjamin Herrenschmidt  <benh@kernel.crashing.org>
---
 arch/powerpc/platforms/powernv/smp.c |    4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index 0bdc735..6a3ecca 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -77,9 +77,11 @@ int pnv_smp_kick_cpu(int nr)
 	if (!paca[nr].cpu_start && firmware_has_feature(FW_FEATURE_OPALv2)) {
 		pr_devel("OPAL: Starting CPU %d (HW 0x%x)...\n", nr, pcpu);
 		rc = opal_start_cpu(pcpu, start_here);
-		if (rc != OPAL_SUCCESS)
+		if (rc != OPAL_SUCCESS) {
 			pr_warn("OPAL Error %ld starting CPU %d\n",
 				rc, nr);
+			return -ENODEV;
+		}
 	}
 	return smp_generic_kick_cpu(nr);
 }

^ permalink raw reply related

* Re: [PATCH -V7 04/10] powerpc: Update find_linux_pte_or_hugepte to handle transparent hugepages
From: David Gibson @ 2013-05-04  6:28 UTC (permalink / raw)
  To: Aneesh Kumar K.V; +Cc: linuxppc-dev, paulus, linux-mm
In-Reply-To: <87ip2z51rn.fsf@linux.vnet.ibm.com>

[-- Attachment #1: Type: text/plain, Size: 815 bytes --]

On Sat, May 04, 2013 at 12:28:20AM +0530, Aneesh Kumar K.V wrote:
> David Gibson <dwg@au1.ibm.com> writes:
> 
> > On Mon, Apr 29, 2013 at 01:21:45AM +0530, Aneesh Kumar K.V wrote:
> >> From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
> >
> > What's the difference in meaning between pmd_huge() and pmd_large()?
> >
> 
> #ifndef CONFIG_HUGETLB_PAGE
> #define pmd_huge(x)	0
> #endif
> 
> Also pmd_large do check for THP PTE flag, and _PAGE_PRESENT.

I don't mean what's the code difference.  I mean what is the semantic
difference between pmd_huge() and pmd_large() supposed to be - in
words.

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 198 bytes --]

^ permalink raw reply

* RE: [PATCH] kvm/ppc/booke64: Hard disable interrupts when entering the guest
From: Caraman Mihai Claudiu-B02008 @ 2013-05-04  7:11 UTC (permalink / raw)
  To: Wood Scott-B07421, Alexander Graf
  Cc: linuxppc-dev@lists.ozlabs.org, kvm@vger.kernel.org,
	kvm-ppc@vger.kernel.org
In-Reply-To: <1367624723-22456-1-git-send-email-scottwood@freescale.com>

> -----Original Message-----
> From: Wood Scott-B07421
> Sent: Saturday, May 04, 2013 2:45 AM
> To: Alexander Graf
> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; linuxppc-
> dev@lists.ozlabs.org; Wood Scott-B07421; Caraman Mihai Claudiu-B02008
> Subject: [PATCH] kvm/ppc/booke64: Hard disable interrupts when entering
> the guest
>=20
> kvmppc_lazy_ee_enable() was causing interrupts to be soft-enabled
> (albeit hard-disabled) in kvmppc_restart_interrupt().  This led to
> warnings, and possibly breakage if the interrupt state was later saved
> and then restored (leading to interrupts being hard-and-soft enabled
> when they should be at least soft-disabled).
>=20
> Simply removing kvmppc_lazy_ee_enable() leaves interrupts only
> soft-disabled when we enter the guest, but they will be hard-disabled
> when we exit the guest -- without PACA_IRQ_HARD_DIS ever being set, so
> the local_irq_enable() fails to hard-enable.

Just to mention one special case. may_hard_irq_enable() called from do_IRQ(=
)
and timer_interrupt() clears PACA_IRQ_HARD_DIS but it either hard-enable or=
=20
let PACA_IRQ_EE set which is enough for local_irq_enable() to hard-enable.

>=20
> While we could just set PACA_IRQ_HARD_DIS after an exit to compensate,
> instead hard-disable interrupts before entering the guest.  This way,
> we won't have to worry about interactions if we take an interrupt
> during the guest entry code.  While I don't see any obvious
> interactions, it could change in the future (e.g. it would be bad if
> the non-hv code were used on 64-bit or if 32-bit guest lazy interrupt
> disabling, since the non-hv code changes IVPR among other things).
>=20
> Signed-off-by: Scott Wood <scottwood@freescale.com>
> Cc: Mihai Caraman <mihai.caraman@freescale.com>

Please add my signed-off, it builds on the same principle of interrupts
soft-disabled to fix warnings and irq_happened flags to force interrupts
hard-enabled ... and parts of the code ;)

-Mike

^ permalink raw reply

* Re: [PATCH -V7 02/10] powerpc/THP: Implement transparent hugepages for ppc64
From: Aneesh Kumar K.V @ 2013-05-04 19:14 UTC (permalink / raw)
  To: David Gibson; +Cc: paulus, linuxppc-dev, linux-mm
In-Reply-To: <20130503045201.GO13041@truffula.fritz.box>

David Gibson <dwg@au1.ibm.com> writes:

> On Mon, Apr 29, 2013 at 01:21:43AM +0530, Aneesh Kumar K.V wrote:
>> From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
>> 
>> We now have pmd entries covering 16MB range and the PMD table double its original size.
>> We use the second half of the PMD table to deposit the pgtable (PTE page).
>> The depoisted PTE page is further used to track the HPTE information. The information
>> include [ secondary group | 3 bit hidx | valid ]. We use one byte per each HPTE entry.
>> With 16MB hugepage and 64K HPTE we need 256 entries and with 4K HPTE we need
>> 4096 entries. Both will fit in a 4K PTE page. On hugepage invalidate we need to walk
>> the PTE page and invalidate all valid HPTEs.
>> 
>> This patch implements necessary arch specific functions for THP support and also
>> hugepage invalidate logic. These PMD related functions are intentionally kept
>> similar to their PTE counter-part.
>> 
>> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
>> ---
>>  arch/powerpc/include/asm/page.h              |  11 +-
>>  arch/powerpc/include/asm/pgtable-ppc64-64k.h |   3 +-
>>  arch/powerpc/include/asm/pgtable-ppc64.h     | 259 +++++++++++++++++++++-
>>  arch/powerpc/include/asm/pgtable.h           |   5 +
>>  arch/powerpc/include/asm/pte-hash64-64k.h    |  17 ++
>>  arch/powerpc/mm/pgtable_64.c                 | 318 +++++++++++++++++++++++++++
>>  arch/powerpc/platforms/Kconfig.cputype       |   1 +
>>  7 files changed, 611 insertions(+), 3 deletions(-)
>> 
>> diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
>> index 988c812..cbf4be7 100644
>> --- a/arch/powerpc/include/asm/page.h
>> +++ b/arch/powerpc/include/asm/page.h
>> @@ -37,8 +37,17 @@
>>  #define PAGE_SIZE		(ASM_CONST(1) << PAGE_SHIFT)
>>  
>>  #ifndef __ASSEMBLY__
>> -#ifdef CONFIG_HUGETLB_PAGE
>> +/*
>> + * With hugetlbfs enabled we allow the HPAGE_SHIFT to run time
>> + * configurable. But we enable THP only with 16MB hugepage.
>> + * With only THP configured, we force hugepage size to 16MB.
>> + * This should ensure that all subarchs that doesn't support
>> + * THP continue to work fine with HPAGE_SHIFT usage.
>> + */
>> +#if defined(CONFIG_HUGETLB_PAGE)
>>  extern unsigned int HPAGE_SHIFT;
>> +#elif defined(CONFIG_TRANSPARENT_HUGEPAGE)
>> +#define HPAGE_SHIFT PMD_SHIFT
>
> As I said in comments on the first patch series, this messing around
> with HPAGE_SHIFT for THP is missing the point.  On ppc HPAGE_SHIFT is
> nothing more than the _default_ hugepage size for explicit hugepages.
> THP should not be dependent on it in any way.

fixed. 

>
>>  #else
>>  #define HPAGE_SHIFT PAGE_SHIFT
>>  #endif
>> diff --git a/arch/powerpc/include/asm/pgtable-ppc64-64k.h b/arch/powerpc/include/asm/pgtable-ppc64-64k.h
>> index 45142d6..a56b82f 100644
>> --- a/arch/powerpc/include/asm/pgtable-ppc64-64k.h
>> +++ b/arch/powerpc/include/asm/pgtable-ppc64-64k.h
>> @@ -33,7 +33,8 @@
>>  #define PGDIR_MASK	(~(PGDIR_SIZE-1))
>>  
>>  /* Bits to mask out from a PMD to get to the PTE page */
>> -#define PMD_MASKED_BITS		0x1ff
>> +/* PMDs point to PTE table fragments which are 4K aligned.  */
>> +#define PMD_MASKED_BITS		0xfff
>
> Hrm.  AFAICT this is related to the change in size of PTE tables, and
> hence the page sharing stuff, so this belongs in the patch which
> implements that, rather than the THP support itself.
>

fixed

>>  /* Bits to mask out from a PGD/PUD to get to the PMD page */
>>  #define PUD_MASKED_BITS		0x1ff
>>  
>> diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h
>> index ab84332..20133c1 100644
>> --- a/arch/powerpc/include/asm/pgtable-ppc64.h
>> +++ b/arch/powerpc/include/asm/pgtable-ppc64.h
>> @@ -154,7 +154,7 @@
>>  #define	pmd_present(pmd)	(pmd_val(pmd) != 0)
>>  #define	pmd_clear(pmdp)		(pmd_val(*(pmdp)) = 0)
>>  #define pmd_page_vaddr(pmd)	(pmd_val(pmd) & ~PMD_MASKED_BITS)
>> -#define pmd_page(pmd)		virt_to_page(pmd_page_vaddr(pmd))
>> +extern struct page *pmd_page(pmd_t pmd);
>>  
>>  #define pud_set(pudp, pudval)	(pud_val(*(pudp)) = (pudval))
>>  #define pud_none(pud)		(!pud_val(pud))
>> @@ -382,4 +382,261 @@ static inline pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
>>  
>>  #endif /* __ASSEMBLY__ */
>>  
>> +#ifndef _PAGE_SPLITTING
>> +/*
>> + * THP pages can't be special. So use the _PAGE_SPECIAL
>> + */
>> +#define _PAGE_SPLITTING _PAGE_SPECIAL
>> +#endif
>> +
>> +#ifndef _PAGE_THP_HUGE
>> +/*
>> + * We need to differentiate between explicit huge page and THP huge
>> + * page, since THP huge page also need to track real subpage details
>> + * We use the _PAGE_COMBO bits here as dummy for platform that doesn't
>> + * support THP.
>> + */
>> +#define _PAGE_THP_HUGE  0x10000000
>
> So if it's _PAGE_COMBO, use _PAGE_COMBO, instead of the actual number.
>

We define _PAGE_THP_HUGE value in pte-hash64-64k.h. Now the functions
below which depends on _PAGE_THP_HUGE are in pgtable-ppc64.h. The above
#define takes care of compile errors on subarch that doesn't include
pte-hash64-64k.h We really won't be using these functions at run time,
because we will not find a transparent huge page on those subarchs.



>> +#endif
>> +
>> +/*
>> + * PTE flags to conserve for HPTE identification for THP page.
>> + */
>> +#ifndef _PAGE_THP_HPTEFLAGS
>> +#define _PAGE_THP_HPTEFLAGS	(_PAGE_BUSY | _PAGE_HASHPTE)
>
> You have this definition both here and in pte-hash64-64k.h.  More
> importantly including _PAGE_BUSY seems like an extremely bad idea -
> did you mean _PAGE_THP_HUGE == _PAGE_COMBO?
>

We have the same defition for _PAGE_HPTEFLAGS. But since i moved
_PAGE_THP_HUGE to _PAGE_4K_PFN in the new series, I will be dropping
this. 

>> +#endif
>> +
>> +#define HUGE_PAGE_SIZE		(ASM_CONST(1) << 24)
>> +#define HUGE_PAGE_MASK		(~(HUGE_PAGE_SIZE - 1))
>
> These constants should be named so its clear they're THP specific.
> They should also be defined in terms of PMD_SHIFT, instead of
> directly.
>

I was not able to use HPAGE_PMD_SIZE because we have that BUILD_BUG_ON
when THP is not enabled. I will switch them to PMD_SIZE and PMD_MASK ?


>> +/*
>> + * set of bits not changed in pmd_modify.
>> + */
>> +#define _HPAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_THP_HPTEFLAGS | \
>> +			 _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_THP_HUGE)
>> +
>> +#ifndef __ASSEMBLY__
>> +extern void hpte_need_hugepage_flush(struct mm_struct *mm, unsigned long addr,
>> +				     pmd_t *pmdp);
>
> This should maybe be called "hpge_do_hugepage_flush()".  The current
> name suggests it returns a boolean, rather than performing the actual
> flush.
>

done


>> +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
>> +extern pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot);
>> +extern pmd_t mk_pmd(struct page *page, pgprot_t pgprot);
>> +extern pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot);
>> +extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
>> +		       pmd_t *pmdp, pmd_t pmd);
>> +extern void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
>> +				 pmd_t *pmd);
>> +
>> +static inline int pmd_trans_huge(pmd_t pmd)
>> +{
>> +	/*
>> +	 * leaf pte for huge page, bottom two bits != 00
>> +	 */
>> +	return (pmd_val(pmd) & 0x3) && (pmd_val(pmd) & _PAGE_THP_HUGE);
>> +}
>> +
>> +static inline int pmd_large(pmd_t pmd)
>> +{
>> +	/*
>> +	 * leaf pte for huge page, bottom two bits != 00
>> +	 */
>> +	if (pmd_trans_huge(pmd))
>> +		return pmd_val(pmd) & _PAGE_PRESENT;
>> +	return 0;
>> +}
>> +
>> +static inline int pmd_trans_splitting(pmd_t pmd)
>> +{
>> +	if (pmd_trans_huge(pmd))
>> +		return pmd_val(pmd) & _PAGE_SPLITTING;
>> +	return 0;
>> +}
>> +
>> +
>> +static inline unsigned long pmd_pfn(pmd_t pmd)
>> +{
>> +	/*
>> +	 * Only called for hugepage pmd
>> +	 */
>> +	return pmd_val(pmd) >> PTE_RPN_SHIFT;
>> +}
>> +
>> +/* We will enable it in the last patch */
>> +#define has_transparent_hugepage() 0
>> +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
>> +
>> +static inline int pmd_young(pmd_t pmd)
>> +{
>> +	return pmd_val(pmd) & _PAGE_ACCESSED;
>> +}
>
> It would be clearer to define this function as well as various others
> that operate on PMDs as PTEs to just cast the parameter and call the
> corresponding pte_XXX(),

I did what tile arch is done. How about 

+#define pmd_pte(pmd)		(pmd)
+#define pte_pmd(pte)		(pte)
+#define pmd_pfn(pmd)		pte_pfn(pmd_pte(pmd))
+#define pmd_young(pmd)		pte_young(pmd_pte(pmd))
+#define pmd_mkold(pmd)		pte_pmd(pte_mkold(pmd_pte(pmd)))
+#define pmd_wrprotect(pmd)	pte_pmd(pte_wrprotect(pmd_pte(pmd)))
+#define pmd_mkdirty(pmd)	pte_pmd(pte_mkdirty(pmd_pte(pmd)))
+#define pmd_mkyoung(pmd)	pte_pmd(pte_mkyoung(pmd_pte(pmd)))
+#define pmd_mkwrite(pmd)	pte_pmd(pte_mkwrite(pmd_pte(pmd)))
 

>
>> +
>> +static inline pmd_t pmd_mkhuge(pmd_t pmd)
>> +{
>> +	/* Do nothing, mk_pmd() does this part.  */
>> +	return pmd;
>> +}
>> +
>> +#define __HAVE_ARCH_PMD_WRITE
>> +static inline int pmd_write(pmd_t pmd)
>> +{
>> +	return pmd_val(pmd) & _PAGE_RW;
>> +}
>> +
>> +static inline pmd_t pmd_mkold(pmd_t pmd)
>> +{
>> +	pmd_val(pmd) &= ~_PAGE_ACCESSED;
>> +	return pmd;
>> +}
>> +
>> +static inline pmd_t pmd_wrprotect(pmd_t pmd)
>> +{
>> +	pmd_val(pmd) &= ~_PAGE_RW;
>> +	return pmd;
>> +}
>> +
>> +static inline pmd_t pmd_mkdirty(pmd_t pmd)
>> +{
>> +	pmd_val(pmd) |= _PAGE_DIRTY;
>> +	return pmd;
>> +}
>> +
>> +static inline pmd_t pmd_mkyoung(pmd_t pmd)
>> +{
>> +	pmd_val(pmd) |= _PAGE_ACCESSED;
>> +	return pmd;
>> +}
>> +
>> +static inline pmd_t pmd_mkwrite(pmd_t pmd)
>> +{
>> +	pmd_val(pmd) |= _PAGE_RW;
>> +	return pmd;
>> +}
>> +
>> +static inline pmd_t pmd_mknotpresent(pmd_t pmd)
>> +{
>> +	pmd_val(pmd) &= ~_PAGE_PRESENT;
>> +	return pmd;
>> +}
>> +
>> +static inline pmd_t pmd_mksplitting(pmd_t pmd)
>> +{
>> +	pmd_val(pmd) |= _PAGE_SPLITTING;
>> +	return pmd;
>> +}
>> +
>> +/*
>> + * Set the dirty and/or accessed bits atomically in a linux hugepage PMD, this
>> + * function doesn't need to flush the hash entry
>> + */
>> +static inline void __pmdp_set_access_flags(pmd_t *pmdp, pmd_t entry)
>> +{
>> +	unsigned long bits = pmd_val(entry) & (_PAGE_DIRTY |
>> +					       _PAGE_ACCESSED |
>> +					       _PAGE_RW | _PAGE_EXEC);
>> +#ifdef PTE_ATOMIC_UPDATES
>> +	unsigned long old, tmp;
>> +
>> +	__asm__ __volatile__(
>> +	"1:	ldarx	%0,0,%4\n\
>> +		andi.	%1,%0,%6\n\
>> +		bne-	1b \n\
>> +		or	%0,%3,%0\n\
>> +		stdcx.	%0,0,%4\n\
>> +		bne-	1b"
>> +	:"=&r" (old), "=&r" (tmp), "=m" (*pmdp)
>> +	:"r" (bits), "r" (pmdp), "m" (*pmdp), "i" (_PAGE_BUSY)
>> +	:"cc");
>> +#else
>> +	unsigned long old = pmd_val(*pmdp);
>> +	*pmdp = __pmd(old | bits);
>> +#endif
>
> Using parameter casts on the corresponding pte_update() function would
> be even more valuable for these more complex functions with asm.


We may want to retain some of these because of the assert we want to add
for locking. PTE related functions expect ptl to be locked. PMD related
functions expect mm->page_table_lock to be locked.

>
>> +}
>> +
>> +#define __HAVE_ARCH_PMD_SAME
>> +static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
>> +{
>> +	return (((pmd_val(pmd_a) ^ pmd_val(pmd_b)) & ~_PAGE_THP_HPTEFLAGS) == 0);
>
> Here, specifically, the fact that PAGE_BUSY is in PAGE_THP_HPTEFLAGS
> is likely to be bad.  If the page is busy, it's in the middle of
> update so can't stably be considered the same as anything.
>


pte_same have the above definition. We use _PAGE_BUSY to indicate that
we are using the entry to satisfy a hpte hash insert. That is used to
prevent a parallel update. So why should pmd_same consider the
_PAGE_BUSY ? 


>> +}
>> +
>> +#define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
>> +extern int pmdp_set_access_flags(struct vm_area_struct *vma,
>> +				 unsigned long address, pmd_t *pmdp,
>> +				 pmd_t entry, int dirty);
>> +
>> +static inline unsigned long pmd_hugepage_update(struct mm_struct *mm,
>> +						unsigned long addr,
>> +						pmd_t *pmdp, unsigned long clr)
>> +{
>> +#ifdef PTE_ATOMIC_UPDATES
>> +	unsigned long old, tmp;
>> +
>> +	__asm__ __volatile__(
>> +	"1:	ldarx	%0,0,%3\n\
>> +		andi.	%1,%0,%6\n\
>> +		bne-	1b \n\
>> +		andc	%1,%0,%4 \n\
>> +		stdcx.	%1,0,%3 \n\
>> +		bne-	1b"
>> +	: "=&r" (old), "=&r" (tmp), "=m" (*pmdp)
>> +	: "r" (pmdp), "r" (clr), "m" (*pmdp), "i" (_PAGE_BUSY)
>> +	: "cc" );
>> +#else
>> +	unsigned long old = pmd_val(*pmdp);
>> +	*pmdp = __pmd(old & ~clr);
>> +#endif
>> +
>> +#ifdef CONFIG_PPC_STD_MMU_64
>
> THP only works with the standard hash MMU, so this #if seems a bit
> pointless.

done


>
>> +	if (old & _PAGE_HASHPTE)
>> +		hpte_need_hugepage_flush(mm, addr, pmdp);
>> +#endif
>> +	return old;
>> +}
>> +
>> +static inline int __pmdp_test_and_clear_young(struct mm_struct *mm,
>> +					      unsigned long addr, pmd_t *pmdp)
>> +{
>> +	unsigned long old;
>> +
>> +	if ((pmd_val(*pmdp) & (_PAGE_ACCESSED | _PAGE_HASHPTE)) == 0)
>> +		return 0;
>> +	old = pmd_hugepage_update(mm, addr, pmdp, _PAGE_ACCESSED);
>> +	return ((old & _PAGE_ACCESSED) != 0);
>> +}
>> +
>> +#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
>> +extern int pmdp_test_and_clear_young(struct vm_area_struct *vma,
>> +				     unsigned long address, pmd_t *pmdp);
>> +#define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH
>> +extern int pmdp_clear_flush_young(struct vm_area_struct *vma,
>> +				  unsigned long address, pmd_t *pmdp);
>> +
>> +#define __HAVE_ARCH_PMDP_GET_AND_CLEAR
>> +extern pmd_t pmdp_get_and_clear(struct mm_struct *mm,
>> +				unsigned long addr, pmd_t *pmdp);
>> +
>> +#define __HAVE_ARCH_PMDP_SET_WRPROTECT
>
> Now that the PTE format is the same at bottom or PMD level, do you
> still need this?

Some of them we can drop. Others we need to, because we want to have
different asserts as i explained above.  For example below wrprotect we
want to call pmd_hugepage_update. 

>
>> +static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr,
>> +				      pmd_t *pmdp)
>> +{
>> +
>> +	if ((pmd_val(*pmdp) & _PAGE_RW) == 0)
>> +		return;
>> +
>> +	pmd_hugepage_update(mm, addr, pmdp, _PAGE_RW);
>> +}
>> +
>> +#define __HAVE_ARCH_PMDP_SPLITTING_FLUSH
>> +extern void pmdp_splitting_flush(struct vm_area_struct *vma,
>> +				 unsigned long address, pmd_t *pmdp);
>> +
>> +#define __HAVE_ARCH_PGTABLE_DEPOSIT
>> +extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
>> +				       pgtable_t pgtable);
>> +#define __HAVE_ARCH_PGTABLE_WITHDRAW
>> +extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
>> +
>> +#define __HAVE_ARCH_PMDP_INVALIDATE
>> +extern void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
>> +			    pmd_t *pmdp);
>> +#endif /* __ASSEMBLY__ */
>>  #endif /* _ASM_POWERPC_PGTABLE_PPC64_H_ */
>> diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
>> index 7aeb955..283198e 100644
>> --- a/arch/powerpc/include/asm/pgtable.h
>> +++ b/arch/powerpc/include/asm/pgtable.h
>> @@ -222,5 +222,10 @@ extern int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
>>  		       unsigned long end, int write, struct page **pages, int *nr);
>>  #endif /* __ASSEMBLY__ */
>>  
>> +#ifndef CONFIG_TRANSPARENT_HUGEPAGE
>> +#define pmd_large(pmd)		0
>> +#define has_transparent_hugepage() 0
>> +#endif
>> +
>>  #endif /* __KERNEL__ */
>>  #endif /* _ASM_POWERPC_PGTABLE_H */
>> diff --git a/arch/powerpc/include/asm/pte-hash64-64k.h b/arch/powerpc/include/asm/pte-hash64-64k.h
>> index 3e13e23..6be70be 100644
>> --- a/arch/powerpc/include/asm/pte-hash64-64k.h
>> +++ b/arch/powerpc/include/asm/pte-hash64-64k.h
>> @@ -38,6 +38,23 @@
>>   */
>>  #define PTE_RPN_SHIFT	(30)
>>  
>> +/*
>> + * THP pages can't be special. So use the _PAGE_SPECIAL
>> + */
>> +#define _PAGE_SPLITTING _PAGE_SPECIAL
>> +
>> +/*
>> + * PTE flags to conserve for HPTE identification for THP page.
>> + * We drop _PAGE_COMBO here, because we overload that with _PAGE_TH_HUGE.
>> + */
>> +#define _PAGE_THP_HPTEFLAGS	(_PAGE_BUSY | _PAGE_HASHPTE)
>> +
>> +/*
>> + * We need to differentiate between explicit huge page and THP huge
>> + * page, since THP huge page also need to track real subpage details
>> + */
>> +#define _PAGE_THP_HUGE  _PAGE_COMBO
>
> All 3 of these definitions also appeared elsewhere.

These are the actual values used. The pgtable-ppc64.h is to take care of
compliation issues on arch that doesn't support THP.

>
>> +
>>  #ifndef __ASSEMBLY__
>>  
>>  /*
>> diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
>> index a854096..54216c1 100644
>> --- a/arch/powerpc/mm/pgtable_64.c
>> +++ b/arch/powerpc/mm/pgtable_64.c
>> @@ -338,6 +338,19 @@ EXPORT_SYMBOL(iounmap);
>>  EXPORT_SYMBOL(__iounmap);
>>  EXPORT_SYMBOL(__iounmap_at);
>>  
>> +/*
>> + * For hugepage we have pfn in the pmd, we use PTE_RPN_SHIFT bits for flags
>> + * For PTE page, we have a PTE_FRAG_SIZE (4K) aligned virtual address.
>> + */
>> +struct page *pmd_page(pmd_t pmd)
>> +{
>> +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
>> +	if (pmd_trans_huge(pmd))
>> +		return pfn_to_page(pmd_pfn(pmd));
>
> In this case you should be able to define this in terms of pte_pfn().

We now have pmd_pfn done in term of pte_pfn. So will retain pmd_pfn 

>
>> +#endif
>> +	return virt_to_page(pmd_page_vaddr(pmd));
>> +}
>> +
>>  #ifdef CONFIG_PPC_64K_PAGES
>>  static pte_t *get_from_cache(struct mm_struct *mm)
>>  {
>> @@ -455,3 +468,308 @@ void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
>>  }
>>  #endif
>>  #endif /* CONFIG_PPC_64K_PAGES */
>> +
>> +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
>> +static pmd_t set_hugepage_access_flags_filter(pmd_t pmd,
>> +					      struct vm_area_struct *vma,
>> +					      int dirty)
>> +{
>> +	return pmd;
>> +}
>
> This identity function is only used immediately before.  Why does it
> exist?
>

removed

>> +/*
>> + * This is called when relaxing access to a hugepage. It's also called in the page
>> + * fault path when we don't hit any of the major fault cases, ie, a minor
>> + * update of _PAGE_ACCESSED, _PAGE_DIRTY, etc... The generic code will have
>> + * handled those two for us, we additionally deal with missing execute
>> + * permission here on some processors
>> + */
>> +int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address,
>> +			  pmd_t *pmdp, pmd_t entry, int dirty)
>> +{
>> +	int changed;
>> +	entry = set_hugepage_access_flags_filter(entry, vma, dirty);
>> +	changed = !pmd_same(*(pmdp), entry);
>> +	if (changed) {
>> +		__pmdp_set_access_flags(pmdp, entry);
>> +		/*
>> +		 * Since we are not supporting SW TLB systems, we don't
>> +		 * have any thing similar to flush_tlb_page_nohash()
>> +		 */
>> +	}
>> +	return changed;
>> +}
>> +
>> +int pmdp_test_and_clear_young(struct vm_area_struct *vma,
>> +			      unsigned long address, pmd_t *pmdp)
>> +{
>> +	return __pmdp_test_and_clear_young(vma->vm_mm, address, pmdp);
>> +}
>> +
>> +/*
>> + * We currently remove entries from the hashtable regardless of whether
>> + * the entry was young or dirty. The generic routines only flush if the
>> + * entry was young or dirty which is not good enough.
>> + *
>> + * We should be more intelligent about this but for the moment we override
>> + * these functions and force a tlb flush unconditionally
>> + */
>> +int pmdp_clear_flush_young(struct vm_area_struct *vma,
>> +				  unsigned long address, pmd_t *pmdp)
>> +{
>> +	return __pmdp_test_and_clear_young(vma->vm_mm, address, pmdp);
>> +}
>> +
>> +/*
>> + * We mark the pmd splitting and invalidate all the hpte
>> + * entries for this hugepage.
>> + */
>> +void pmdp_splitting_flush(struct vm_area_struct *vma,
>> +			  unsigned long address, pmd_t *pmdp)
>> +{
>> +	unsigned long old, tmp;
>> +
>> +	VM_BUG_ON(address & ~HPAGE_PMD_MASK);
>> +#ifdef PTE_ATOMIC_UPDATES
>> +
>> +	__asm__ __volatile__(
>> +	"1:	ldarx	%0,0,%3\n\
>> +		andi.	%1,%0,%6\n\
>> +		bne-	1b \n\
>> +		ori	%1,%0,%4 \n\
>> +		stdcx.	%1,0,%3 \n\
>> +		bne-	1b"
>> +	: "=&r" (old), "=&r" (tmp), "=m" (*pmdp)
>> +	: "r" (pmdp), "i" (_PAGE_SPLITTING), "m" (*pmdp), "i" (_PAGE_BUSY)
>> +	: "cc" );
>> +#else
>> +	old = pmd_val(*pmdp);
>> +	*pmdp = __pmd(old | _PAGE_SPLITTING);
>> +#endif
>> +	/*
>> +	 * If we didn't had the splitting flag set, go and flush the
>> +	 * HPTE entries and serialize against gup fast.
>> +	 */
>> +	if (!(old & _PAGE_SPLITTING)) {
>> +#ifdef CONFIG_PPC_STD_MMU_64
>> +		/* We need to flush the hpte */
>> +		if (old & _PAGE_HASHPTE)
>> +			hpte_need_hugepage_flush(vma->vm_mm, address, pmdp);
>> +#endif
>> +		/* need tlb flush only to serialize against gup-fast */
>> +		flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
>> +	}
>> +}
>> +
>> +/*
>> + * We want to put the pgtable in pmd and use pgtable for tracking
>> + * the base page size hptes
>> + */
>> +void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
>> +				pgtable_t pgtable)
>> +{
>> +	unsigned long *pgtable_slot;
>> +	assert_spin_locked(&mm->page_table_lock);
>> +	/*
>> +	 * we store the pgtable in the second half of PMD
>> +	 */
>> +	pgtable_slot = pmdp + PTRS_PER_PMD;
>> +	*pgtable_slot = (unsigned long)pgtable;
>
> Why not just make pgtable_slot have type (pgtable_t *) and avoid the
> case.
>

done. But we would have cast in the above line. 


>> +}
>> +
>> +pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
>> +{
>> +	pgtable_t pgtable;
>> +	unsigned long *pgtable_slot;
>> +
>> +	assert_spin_locked(&mm->page_table_lock);
>> +	pgtable_slot = pmdp + PTRS_PER_PMD;
>> +	pgtable = (pgtable_t) *pgtable_slot;
>> +	/*
>> +	 * We store HPTE information in the deposited PTE fragment.
>> +	 * zero out the content on withdraw.
>> +	 */
>> +	memset(pgtable, 0, PTE_FRAG_SIZE);
>> +	return pgtable;
>> +}
>> +
>> +/*
>> + * Since we are looking at latest ppc64, we don't need to worry about
>> + * i/d cache coherency on exec fault
>> + */
>> +static pmd_t set_pmd_filter(pmd_t pmd, unsigned long addr)
>> +{
>> +	pmd = __pmd(pmd_val(pmd) & ~_PAGE_THP_HPTEFLAGS);
>> +	return pmd;
>> +}
>> +
>> +/*
>> + * We can make it less convoluted than __set_pte_at, because
>> + * we can ignore lot of hardware here, because this is only for
>> + * MPSS
>> + */
>> +static inline void __set_pmd_at(struct mm_struct *mm, unsigned long addr,
>> +				pmd_t *pmdp, pmd_t pmd, int percpu)
>> +{
>> +	/*
>> +	 * There is nothing in hash page table now, so nothing to
>> +	 * invalidate, set_pte_at is used for adding new entry.
>> +	 * For updating we should use update_hugepage_pmd()
>> +	 */
>> +	*pmdp = pmd;
>> +}
>
> Again you should be able to define this in terms of the set_pte_at()
> functions.
>

done 


>> +/*
>> + * set a new huge pmd. We should not be called for updating
>> + * an existing pmd entry. That should go via pmd_hugepage_update.
>> + */
>> +void set_pmd_at(struct mm_struct *mm, unsigned long addr,
>> +		pmd_t *pmdp, pmd_t pmd)
>> +{
>> +	/*
>> +	 * Note: mm->context.id might not yet have been assigned as
>> +	 * this context might not have been activated yet when this
>> +	 * is called.
>
> And the relevance of this comment here is...?
>
>> +	 */
>> +	pmd = set_pmd_filter(pmd, addr);
>> +
>> +	__set_pmd_at(mm, addr, pmdp, pmd, 0);
>> +
>> +}
>> +
>> +void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
>> +		     pmd_t *pmdp)
>> +{
>> +	pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT);
>> +	flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
>> +}
>> +
>> +/*
>> + * A linux hugepage PMD was changed and the corresponding hash table entries
>> + * neesd to be flushed.
>> + *
>> + * The linux hugepage PMD now include the pmd entries followed by the address
>> + * to the stashed pgtable_t. The stashed pgtable_t contains the hpte bits.
>> + * [ secondary group | 3 bit hidx | valid ]. We use one byte per each HPTE entry.
>> + * With 16MB hugepage and 64K HPTE we need 256 entries and with 4K HPTE we need
>> + * 4096 entries. Both will fit in a 4K pgtable_t.
>> + */
>> +void hpte_need_hugepage_flush(struct mm_struct *mm, unsigned long addr,
>> +			      pmd_t *pmdp)
>> +{
>> +	int ssize, i;
>> +	unsigned long s_addr;
>> +	unsigned int psize, valid;
>> +	unsigned char *hpte_slot_array;
>> +	unsigned long hidx, vpn, vsid, hash, shift, slot;
>> +
>> +	/*
>> +	 * Flush all the hptes mapping this hugepage
>> +	 */
>> +	s_addr = addr & HUGE_PAGE_MASK;
>> +	/*
>> +	 * The hpte hindex are stored in the pgtable whose address is in the
>> +	 * second half of the PMD
>> +	 */
>> +	hpte_slot_array = *(char **)(pmdp + PTRS_PER_PMD);
>> +
>> +	/* get the base page size */
>> +	psize = get_slice_psize(mm, s_addr);
>> +	shift = mmu_psize_defs[psize].shift;
>> +
>> +	for (i = 0; i < (HUGE_PAGE_SIZE >> shift); i++) {
>> +		/*
>> +		 * 8 bits per each hpte entries
>> +		 * 000| [ secondary group (one bit) | hidx (3 bits) | valid bit]
>> +		 */
>> +		valid = hpte_slot_array[i] & 0x1;
>> +		if (!valid)
>> +			continue;
>> +		hidx =  hpte_slot_array[i]  >> 1;
>> +
>> +		/* get the vpn */
>> +		addr = s_addr + (i * (1ul << shift));
>> +		if (!is_kernel_addr(addr)) {
>> +			ssize = user_segment_size(addr);
>> +			vsid = get_vsid(mm->context.id, addr, ssize);
>> +			WARN_ON(vsid == 0);
>> +		} else {
>> +			vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
>> +			ssize = mmu_kernel_ssize;
>> +		}
>> +
>> +		vpn = hpt_vpn(addr, vsid, ssize);
>> +		hash = hpt_hash(vpn, shift, ssize);
>> +		if (hidx & _PTEIDX_SECONDARY)
>> +			hash = ~hash;
>> +
>> +		slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
>> +		slot += hidx & _PTEIDX_GROUP_IX;
>> +		ppc_md.hpte_invalidate(slot, vpn, psize, ssize, 0);
>> +	}
>> +}
>> +
>> +static pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot)
>> +{
>> +	pmd_val(pmd) |= pgprot_val(pgprot);
>> +	return pmd;
>> +}
>> +
>> +pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot)
>> +{
>> +	pmd_t pmd;
>> +	/*
>> +	 * For a valid pte, we would have _PAGE_PRESENT or _PAGE_FILE always
>> +	 * set. We use this to check THP page at pmd level.
>> +	 * leaf pte for huge page, bottom two bits != 00
>> +	 */
>> +	pmd_val(pmd) = pfn << PTE_RPN_SHIFT;
>> +	pmd_val(pmd) |= _PAGE_THP_HUGE;
>> +	pmd = pmd_set_protbits(pmd, pgprot);
>> +	return pmd;
>> +}
>> +
>> +pmd_t mk_pmd(struct page *page, pgprot_t pgprot)
>> +{
>> +	return pfn_pmd(page_to_pfn(page), pgprot);
>> +}
>> +
>> +pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
>> +{
>> +
>> +	pmd_val(pmd) &= _HPAGE_CHG_MASK;
>> +	pmd = pmd_set_protbits(pmd, newprot);
>> +	return pmd;
>> +}
>> +
>> +/*
>> + * This is called at the end of handling a user page fault, when the
>> + * fault has been handled by updating a HUGE PMD entry in the linux page tables.
>> + * We use it to preload an HPTE into the hash table corresponding to
>> + * the updated linux HUGE PMD entry.
>> + */
>> +void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
>> +			  pmd_t *pmd)
>> +{
>> +	return;
>> +}
>> +
>> +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
>> +
>> +pmd_t pmdp_get_and_clear(struct mm_struct *mm,
>> +			 unsigned long addr, pmd_t *pmdp)
>> +{
>> +	pmd_t old_pmd;
>> +	unsigned long old;
>> +	/*
>> +	 * khugepaged calls this for normal pmd also
>> +	 */
>> +	if (pmd_trans_huge(*pmdp)) {
>> +		old = pmd_hugepage_update(mm, addr, pmdp, ~0UL);
>> +		old_pmd = __pmd(old);
>> +	} else {
>> +		old_pmd = *pmdp;
>> +		pmd_clear(pmdp);
>> +	}
>> +	return old_pmd;
>> +}
>> diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
>> index 18e3b76..a526144 100644
>> --- a/arch/powerpc/platforms/Kconfig.cputype
>> +++ b/arch/powerpc/platforms/Kconfig.cputype
>> @@ -71,6 +71,7 @@ config PPC_BOOK3S_64
>>  	select PPC_FPU
>>  	select PPC_HAVE_PMU_SUPPORT
>>  	select SYS_SUPPORTS_HUGETLBFS
>> +	select HAVE_ARCH_TRANSPARENT_HUGEPAGE if PPC_64K_PAGES
>>  
>>  config PPC_BOOK3E_64
>>  	bool "Embedded processors"
>

-aneesh

^ permalink raw reply

* Re: [PATCH -V7 02/10] powerpc/THP: Implement transparent hugepages for ppc64
From: Benjamin Herrenschmidt @ 2013-05-04 21:39 UTC (permalink / raw)
  To: Aneesh Kumar K.V; +Cc: linux-mm, paulus, linuxppc-dev, David Gibson
In-Reply-To: <87a9oa4kx0.fsf@linux.vnet.ibm.com>

On Sun, 2013-05-05 at 00:44 +0530, Aneesh Kumar K.V wrote:
> 
> We may want to retain some of these because of the assert we want to add
> for locking. PTE related functions expect ptl to be locked. PMD related
> functions expect mm->page_table_lock to be locked.

In this case have a single inline commmon function __something called
by two different wrappers.

Cheers,
Ben.

^ permalink raw reply

* Re: [PATCH] arch/powerpc: advertise ISA2.07, HTM, DSCR, EBB and ISEL bits in HWCAP2
From: Segher Boessenkool @ 2013-05-04 21:42 UTC (permalink / raw)
  To: Michael R Meissner
  Cc: Michael Neuling, Nishanth Aravamudan, Steve Munroe, Peter Bergner,
	Ryan Arnold, linuxppc-dev
In-Reply-To: <OFFA9C9591.71B67D7B-ON85257B60.00808C43-85257B60.0080BB77@us.ibm.com>

> According to the GCC sources, ISEL is enabled by default for the  
> 8540, 8548, e500mc, e500mc64, e6500 processors.

And e5500.  POWER7 is said to support it as well.


Segher

^ permalink raw reply

* [PATCH] powerpc/pci: Don't add bogus empty resources to PHBs
From: Benjamin Herrenschmidt @ 2013-05-05  0:22 UTC (permalink / raw)
  To: linuxppc-dev

When converting to use the new pci_add_resource_offset() we didn't
properly account for empty resources (0 flags) and add those bogons
to the PHBs. The result is some annoying messages in the log.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---

BTW. Anybody knows if we really need those ppc32 workarounds anymore ?

They smell like ancient pre-device-tree crap... if a bridge is missing
a window in its "ranges" property it should probably not have it exposed
to the generic code nowadays.
 
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index f325dc9..5775ea9 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -1532,12 +1532,16 @@ static void pcibios_setup_phb_resources(struct pci_controller *hose,
 		res->flags = IORESOURCE_IO;
 #endif /* CONFIG_PPC32 */
 	}
+	if (res->flags) {
+		pr_debug("PCI: PHB IO resource    = %016llx-%016llx [%lx]\n",
+			 (unsigned long long)res->start,
+			 (unsigned long long)res->end,
+			 (unsigned long)res->flags);
+		pci_add_resource_offset(resources, res, pcibios_io_space_offset(hose));
 
-	pr_debug("PCI: PHB IO resource    = %016llx-%016llx [%lx]\n",
-		 (unsigned long long)res->start,
-		 (unsigned long long)res->end,
-		 (unsigned long)res->flags);
-	pci_add_resource_offset(resources, res, pcibios_io_space_offset(hose));
+		pr_debug("PCI: PHB IO  offset     = %08lx\n",
+			 (unsigned long)hose->io_base_virt - _IO_BASE);
+	}
 
 	/* Hookup PHB Memory resources */
 	for (i = 0; i < 3; ++i) {
@@ -1555,19 +1559,17 @@ static void pcibios_setup_phb_resources(struct pci_controller *hose,
 			res->flags = IORESOURCE_MEM;
 #endif /* CONFIG_PPC32 */
 		}
-
-		pr_debug("PCI: PHB MEM resource %d = %016llx-%016llx [%lx]\n", i,
-			 (unsigned long long)res->start,
-			 (unsigned long long)res->end,
-			 (unsigned long)res->flags);
-		pci_add_resource_offset(resources, res, hose->pci_mem_offset);
+		if (res->flags) {
+			pr_debug("PCI: PHB MEM resource %d = %016llx-%016llx [%lx]\n", i,
+				 (unsigned long long)res->start,
+				 (unsigned long long)res->end,
+				 (unsigned long)res->flags);
+			pci_add_resource_offset(resources, res, hose->pci_mem_offset);
+		}
 	}
 
 	pr_debug("PCI: PHB MEM offset     = %016llx\n",
 		 (unsigned long long)hose->pci_mem_offset);
-	pr_debug("PCI: PHB IO  offset     = %08lx\n",
-		 (unsigned long)hose->io_base_virt - _IO_BASE);
-
 }
 
 /*

^ permalink raw reply related

* [PATCH] powerpc/pnv: Fix "compatible" property for P8 PHB
From: Benjamin Herrenschmidt @ 2013-05-05  0:24 UTC (permalink / raw)
  To: linuxppc-dev

The property should be "ibm,power8-pciex", not "ibm,p8-pciex". The latter
was changed in FW because it was inconsistent with the rest of the nodes.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 8c6c9cf..97b08fc 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -1089,7 +1089,7 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np, int ioda_type)
 	/* Detect specific models for error handling */
 	if (of_device_is_compatible(np, "ibm,p7ioc-pciex"))
 		phb->model = PNV_PHB_MODEL_P7IOC;
-	else if (of_device_is_compatible(np, "ibm,p8-pciex"))
+	else if (of_device_is_compatible(np, "ibm,power8-pciex"))
 		phb->model = PNV_PHB_MODEL_PHB3;
 	else
 		phb->model = PNV_PHB_MODEL_UNKNOWN;

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox