LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* Re: [PATCH 5/6] KVM: PPC: Book3S: Load/save FP/VMX/VSX state directly to/from vcpu struct
From: Alexander Graf @ 2013-09-10 18:54 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: kvm-ppc, kvm, linuxppc-dev
In-Reply-To: <20130910102221.GF28145@iris.ozlabs.ibm.com>


On 10.09.2013, at 05:22, Paul Mackerras wrote:

> Now that we have the vcpu floating-point and vector state stored in
> the same type of struct as the main kernel uses, we can load that
> state directly from the vcpu struct instead of having extra copies
> to/from the thread_struct.  Similarly, when the guest state needs to
> be saved, we can have it saved it directly to the vcpu struct by
> setting the current->thread.fp_save_area and =
current->thread.vr_save_area
> pointers.  That also means that we don't need to back up and restore
> userspace's FP/vector state.  This all makes the code simpler and
> faster.
>=20
> Note that it's not necessary to save or modify =
current->thread.fpexc_mode,
> since nothing in KVM uses or is affected by its value.  Nor is it
> necessary to touch used_vr or used_vsr.
>=20
> Signed-off-by: Paul Mackerras <paulus@samba.org>
> ---
> arch/powerpc/kvm/book3s_pr.c | 72 =
++++++++++----------------------------------
> arch/powerpc/kvm/booke.c     | 16 ----------
> arch/powerpc/kvm/booke.h     |  4 ++-
> 3 files changed, 19 insertions(+), 73 deletions(-)
>=20
> diff --git a/arch/powerpc/kvm/book3s_pr.c =
b/arch/powerpc/kvm/book3s_pr.c
> index 90be91c..5eae919 100644
> --- a/arch/powerpc/kvm/book3s_pr.c
> +++ b/arch/powerpc/kvm/book3s_pr.c
> @@ -462,16 +462,16 @@ void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, =
ulong msr)
> 		 * both the traditional FP registers and the added VSX
> 		 * registers into thread.fp_state.fpr[].
> 		 */
> -		if (current->thread.regs->msr & MSR_FP)
> +		if (t->regs->msr & MSR_FP)
> 			giveup_fpu(current);

If you make a second version of this call that also gets a state area as =
parameter, you don't need the pointer in the thread struct anymore, no? =
Or do you? Ah, you want to be able to grab the FPU for in-kernel FPU =
using code, so it needs to be seamless.

Fiar enough - pointer it is then :).


Alex

^ permalink raw reply

* Re: [PATCH 6/6] KVM: PPC: Book3S HV: Use load/store_fp_state functions in HV guest entry/exit
From: Alexander Graf @ 2013-09-10 18:57 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: kvm-ppc, kvm, linuxppc-dev
In-Reply-To: <20130910102259.GG28145@iris.ozlabs.ibm.com>


On 10.09.2013, at 05:22, Paul Mackerras wrote:

> This modifies kvmppc_load_fp and kvmppc_save_fp to use the generic
> FP/VSX and VMX load/store functions instead of open-coding the
> FP/VSX/VMX load/store instructions.  Since kvmppc_load/save_fp don't
> follow C calling conventions, we make them private symbols within
> book3s_hv_rmhandlers.S.
>=20
> Signed-off-by: Paul Mackerras <paulus@samba.org>
> ---
> arch/powerpc/kernel/asm-offsets.c       |  2 -
> arch/powerpc/kvm/book3s_hv_rmhandlers.S | 82 =
++++++++-------------------------
> 2 files changed, 18 insertions(+), 66 deletions(-)
>=20
> diff --git a/arch/powerpc/kernel/asm-offsets.c =
b/arch/powerpc/kernel/asm-offsets.c
> index 4c1609f..7982870 100644
> --- a/arch/powerpc/kernel/asm-offsets.c
> +++ b/arch/powerpc/kernel/asm-offsets.c
> @@ -425,10 +425,8 @@ int main(void)
> 	DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr));
> 	DEFINE(VCPU_VRSAVE, offsetof(struct kvm_vcpu, arch.vrsave));
> 	DEFINE(VCPU_FPRS, offsetof(struct kvm_vcpu, arch.fp.fpr));
> -	DEFINE(VCPU_FPSCR, offsetof(struct kvm_vcpu, arch.fp.fpscr));
> #ifdef CONFIG_ALTIVEC
> 	DEFINE(VCPU_VRS, offsetof(struct kvm_vcpu, arch.vr.vr));
> -	DEFINE(VCPU_VSCR, offsetof(struct kvm_vcpu, arch.vr.vscr));
> #endif
> 	DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer));
> 	DEFINE(VCPU_CTR, offsetof(struct kvm_vcpu, arch.ctr));
> diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S =
b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> index f5f2396..b5183ed 100644
> --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> @@ -1102,7 +1102,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
>=20
> 	/* save FP state */
> 	mr	r3, r9
> -	bl	.kvmppc_save_fp
> +	bl	kvmppc_save_fp
>=20
> 	/* Increment yield count if they have a VPA */
> 	ld	r8, VCPU_VPA(r9)	/* do they have a VPA? */
> @@ -1591,7 +1591,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206)
> 	std	r31, VCPU_GPR(R31)(r3)
>=20
> 	/* save FP state */
> -	bl	.kvmppc_save_fp
> +	bl	kvmppc_save_fp
>=20
> 	/*
> 	 * Take a nap until a decrementer or external interrupt occurs,
> @@ -1767,7 +1767,9 @@ kvm_no_guest:
>  * Save away FP, VMX and VSX registers.
>  * r3 =3D vcpu pointer
>  */
> -_GLOBAL(kvmppc_save_fp)
> +kvmppc_save_fp:
> +	mflr	r30
> +	mr	r31,r3

Please note somewhere that e30 and r31 get clobbered by this function.

> 	mfmsr	r5
> 	ori	r8,r5,MSR_FP
> #ifdef CONFIG_ALTIVEC
> @@ -1782,42 +1784,17 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
> #endif
> 	mtmsrd	r8
> 	isync
> -#ifdef CONFIG_VSX
> -BEGIN_FTR_SECTION
> -	reg =3D 0
> -	.rept	32
> -	li	r6,reg*16+VCPU_FPRS
> -	STXVD2X(reg,R6,R3)
> -	reg =3D reg + 1
> -	.endr
> -FTR_SECTION_ELSE
> -#endif
> -	reg =3D 0
> -	.rept	32
> -	stfd	reg,reg*8+VCPU_FPRS(r3)
> -	reg =3D reg + 1
> -	.endr
> -#ifdef CONFIG_VSX
> -ALT_FTR_SECTION_END_IFSET(CPU_FTR_VSX)
> -#endif
> -	mffs	fr0
> -	stfd	fr0,VCPU_FPSCR(r3)
> -
> +	addi	r3,r3,VCPU_FPRS
> +	bl	.store_fp_state
> #ifdef CONFIG_ALTIVEC
> BEGIN_FTR_SECTION
> -	reg =3D 0
> -	.rept	32
> -	li	r6,reg*16+VCPU_VRS
> -	stvx	reg,r6,r3
> -	reg =3D reg + 1
> -	.endr
> -	mfvscr	vr0
> -	li	r6,VCPU_VSCR
> -	stvx	vr0,r6,r3
> +	addi	r3,r31,VCPU_VRS
> +	bl	.store_vr_state
> END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
> #endif
> 	mfspr	r6,SPRN_VRSAVE
> 	stw	r6,VCPU_VRSAVE(r3)
> +	mtlr	r30
> 	mtmsrd	r5
> 	isync
> 	blr
> @@ -1826,8 +1803,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
>  * Load up FP, VMX and VSX registers
>  * r4 =3D vcpu pointer
>  */
> -	.globl	kvmppc_load_fp
> kvmppc_load_fp:
> +	mflr	r30
> +	mr	r31,r4

here too. It's also worth noting in the header comment that r4 is =
preserved (unlike what you'd expect from the C ABI).


Alex

> 	mfmsr	r9
> 	ori	r8,r9,MSR_FP
> #ifdef CONFIG_ALTIVEC
> @@ -1842,40 +1820,16 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
> #endif
> 	mtmsrd	r8
> 	isync
> -	lfd	fr0,VCPU_FPSCR(r4)
> -	MTFSF_L(fr0)
> -#ifdef CONFIG_VSX
> -BEGIN_FTR_SECTION
> -	reg =3D 0
> -	.rept	32
> -	li	r7,reg*16+VCPU_FPRS
> -	LXVD2X(reg,R7,R4)
> -	reg =3D reg + 1
> -	.endr
> -FTR_SECTION_ELSE
> -#endif
> -	reg =3D 0
> -	.rept	32
> -	lfd	reg,reg*8+VCPU_FPRS(r4)
> -	reg =3D reg + 1
> -	.endr
> -#ifdef CONFIG_VSX
> -ALT_FTR_SECTION_END_IFSET(CPU_FTR_VSX)
> -#endif
> -
> +	addi	r3,r4,VCPU_FPRS
> +	bl	.load_fp_state
> #ifdef CONFIG_ALTIVEC
> BEGIN_FTR_SECTION
> -	li	r7,VCPU_VSCR
> -	lvx	vr0,r7,r4
> -	mtvscr	vr0
> -	reg =3D 0
> -	.rept	32
> -	li	r7,reg*16+VCPU_VRS
> -	lvx	reg,r7,r4
> -	reg =3D reg + 1
> -	.endr
> +	addi	r3,r31,VCPU_VRS
> +	bl	.load_vr_state
> END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
> #endif
> 	lwz	r7,VCPU_VRSAVE(r4)
> 	mtspr	SPRN_VRSAVE,r7
> +	mtlr	r30
> +	mr	r4,r31
> 	blr
> --=20
> 1.8.4.rc3
>=20

^ permalink raw reply

* Re: [v3] powerpc/mpc85xx: Update the clock device tree nodes
From: Scott Wood @ 2013-09-10 21:46 UTC (permalink / raw)
  To: Tang Yuantian-B29983
  Cc: Wood Scott-B07421, Mike Turquette, linuxppc-dev@lists.ozlabs.org,
	devicetree@vger.kernel.org
In-Reply-To: <D07C73A334FF604B95B3CBD2A545D07B1503F161@039-SN2MPN1-012.039d.mgd.msft.net>

On Mon, 2013-08-26 at 21:49 -0500, Tang Yuantian-B29983 wrote:
> > > > > +		};
> > > > > +		pll1: pll1@820 {
> > > > > +			#clock-cells = <1>;
> > > > > +			reg = <0x820>;
> > > > > +			compatible = "fsl,core-pll-clock";
> > > > > +			clocks = <&clockgen>;
> > > > > +			clock-output-names = "pll1", "pll1-div2", "pll1-
> > div4";
> > > > > +		};
> > > >
> > > > Please leave a blank line between properties and nodes, and between
> > nodes.
> > > >
> > > OK, will add.
> > >
> > > > What does reg represent?  Where is the binding for this?
> > > >
> > > > The compatible is too vague.
> > > Reg is register offset.
> > 
> > With no size?
> 
> No size is needed.

Yes, it is.  Register blocks have size -- even if it's just a single
register.

> > > It is too later to change since the clock driver is merged for months
> > > although I sent this patch first.
> > 
> > It should not have gone in without an approved binding.  It seems it went
> > in via Mike Turquette (why is a non-ARM-specific tree using linux-arm-
> > kernel as its list, BTW?).  No ack from Ben, Kumar, or me is shown in the
> > commit.
> The Linux common clock framework is not ARM specific. Any other arch can use it.

Sure, it just seemed an odd choice of mailing list for something that
isn't ARM-specific.

> > In any case, you can preserve compatibility with existing trees without
> > using this compatible in new trees.  The driver can check for both
> > compatibles, with a comment indicating that "fsl,core-mux-clock" is
> > deprecated and for compatibility only.
> It is sub-clock node, is it really necessary to think about compatibility?
> I think that's the node clockgen's responsibility.

It describes registers, so yes, you need to consider compatibility.  A
clock provider is not responsible for figuring out how to program
devices that consume its clocks, nor should it make any assumptions
about such devices.
 
> > > Besides, it is not too bad because other arch use the similar name.
> > 
> > I don't follow.  This is a specific Freescale register interface, not a
> > general concept.
> > 
> > In any case, which "similar names" are you referring to?  A search in
> > arch/arm/boot/dts for "mux" with "clk" or "clock" turns up
> > "allwinner,sun4i-apb1-mux-clk" which is much more specific than
> > "fsl,core-mux-clock".
> Ok, I will change the compatible string.
> Do you think "fsl,ppc-core-*" is ok?

No.  How about "fsl,qoriq-chassis1-*" (for e500mc/e5500) and
fsl,qoriq-chassis2-*" (for e6500)?

-Scott

^ permalink raw reply

* Re: powerpc allmodconfig build broken due to commit 15863ff3b (powerpc: Make chip-id information available to userspace)
From: Benjamin Herrenschmidt @ 2013-09-10 22:02 UTC (permalink / raw)
  To: Asai Thambi S P
  Cc: linux-kernel@vger.kernel.org, Vasant Hegde, Paul Mackerras,
	Shivaprasad G Bhat, linuxppc-dev, Guenter Roeck
In-Reply-To: <522E5FD5.40603@micron.com>

On Mon, 2013-09-09 at 16:55 -0700, Asai Thambi S P wrote:
> On 09/08/2013 5:28 PM, Guenter Roeck wrote:
> > Hi all,
> >
> > powerpc allmodconfig build on the latest upstream kernel results in:
> >
> > ERROR: ".cpu_to_chip_id" [drivers/block/mtip32xx/mtip32xx.ko] undefined!
> >
> > This is due to commit 15863ff3b (powerpc: Make chip-id information 
> > available to userspace).
> > Not surprising, as cpu_to_chip_id() is not exported.
> >
> Apart from the above error, I have a concern on the patch, purely based on the commit message.
> (to be honest, I am not familiar with the ppc architecture)
> 
> Commit message of 15863ff3b has the following text.
> 
> ******************
> So far "/sys/devices/system/cpu/cpuX/topology/physical_package_id"
> was always default (-1) on ppc64 architecture.
> 
> Now, some systems have an ibm,chip-id property in the cpu nodes in
> the device tree. On these systems, we now use this information to
> display physical_package_id
> ******************
> 
> Shouldn't the new definition of "topology_physical_package_id" apply only to those systems supporting ibm,chip-id property?

There should be no negative side effect (appart from the missing
EXPORT_SYMBOL of course). If the property is not found in the
device-tree, the new function returns -1, so it should work fine on all
systems.

Cheers,
Ben.

> 
> > Reverting this commit fixes the problem. Any good idea how to fix it 
> > for real ?
> >
> > Guenter
> > -- 
> > To unsubscribe from this list: send the line "unsubscribe 
> > linux-kernel" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > Please read the FAQ at  http://www.tux.org/lkml/

^ permalink raw reply

* Re: [PATCH] powerpc: Export cpu_to_chip_id() to fix build error
From: Benjamin Herrenschmidt @ 2013-09-10 22:03 UTC (permalink / raw)
  To: Guenter Roeck
  Cc: Vasant Hegde, Paul Mackerras, linuxppc-dev, linux-kernel,
	Shivaprasad G Bhat
In-Reply-To: <1378777076-4699-1-git-send-email-linux@roeck-us.net>

On Mon, 2013-09-09 at 18:37 -0700, Guenter Roeck wrote:
> powerpc allmodconfig build fails with:
> 
> ERROR: ".cpu_to_chip_id" [drivers/block/mtip32xx/mtip32xx.ko] undefined!
> 
> The problem was introduced with commit 15863ff3b (powerpc: Make chip-id
> information available to userspace).

Thanks, I'll send that to Linus asap.

Ben.

> Export the missing symbol.
> 
> Cc: Vasant Hegde <hegdevasant@linux.vnet.ibm.com>
> Cc: Shivaprasad G Bhat <sbhat@linux.vnet.ibm.com>
> Signed-off-by: Guenter Roeck <linux@roeck-us.net>
> ---
>  arch/powerpc/kernel/smp.c |    1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
> index 442d8e2..8e59abc 100644
> --- a/arch/powerpc/kernel/smp.c
> +++ b/arch/powerpc/kernel/smp.c
> @@ -611,6 +611,7 @@ int cpu_to_chip_id(int cpu)
>  	of_node_put(np);
>  	return of_get_ibm_chip_id(np);
>  }
> +EXPORT_SYMBOL(cpu_to_chip_id);
>  
>  /* Helper routines for cpu to core mapping */
>  int cpu_core_index_of_thread(int cpu)

^ permalink raw reply

* Re: [PATCH 1/7] powerpc/mpc85xx: Fix EDAC address capture
From: Scott Wood @ 2013-09-10 22:25 UTC (permalink / raw)
  To: York Sun; +Cc: linuxppc-dev
In-Reply-To: <1378482199-10581-1-git-send-email-yorksun@freescale.com>

On Fri, 2013-09-06 at 08:43 -0700, York Sun wrote:
> Extend err_addr to cover 64 bits for DDR errors.
> 
> Signed-off-by: York Sun <yorksun@freescale.com>
> Reviewed-by: Fleming Andrew-AFLEMING <AFLEMING@freescale.com>
> Tested-by: Fleming Andrew-AFLEMING <AFLEMING@freescale.com>
> ---
>  drivers/edac/mpc85xx_edac.c |   10 +++++++---
>  drivers/edac/mpc85xx_edac.h |    1 +
>  2 files changed, 8 insertions(+), 3 deletions(-)

EDAC patches should go via the linux-edac list and maintainer.

BTW, were those "reviewed-by" and "tested-by" added by Andy manually, or
are they from gerrit?  If the latter, please strip them before sending
upstream.

-Scott

^ permalink raw reply

* Re: [PATCH 3/7] i2c/rtc-ds3232: Fix irq for probing
From: Scott Wood @ 2013-09-10 22:26 UTC (permalink / raw)
  To: York Sun; +Cc: linuxppc-dev
In-Reply-To: <1378482199-10581-3-git-send-email-yorksun@freescale.com>

On Fri, 2013-09-06 at 08:43 -0700, York Sun wrote:
> Driver shouldn't request irq when irq = 0. It is returned from parsing
> device tree. 0 means no interrupt.
> 
> Signed-off-by: York Sun <yorksun@freescale.com>
> Reviewed-by: Zang Tiefei-R61911 <tie-fei.zang@freescale.com>
> Reviewed-by: Fleming Andrew-AFLEMING <AFLEMING@freescale.com>
> Tested-by: Fleming Andrew-AFLEMING <AFLEMING@freescale.com>
> ---
>  drivers/rtc/rtc-ds3232.c |    2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)

This should go via the i2c list and maintainer.  It's not a PPC patch at
all.

Please do not lump unrelated patches into a single patchset.

-Scott

^ permalink raw reply

* Re: [PATCH 7/7] powerpc/b4860emu: Add device tree file for b4860emu
From: Scott Wood @ 2013-09-10 22:31 UTC (permalink / raw)
  To: York Sun; +Cc: linuxppc-dev
In-Reply-To: <1378482199-10581-7-git-send-email-yorksun@freescale.com>

On Fri, 2013-09-06 at 08:43 -0700, York Sun wrote:
> B4860EMU is a emualtor target with minimum peripherals. It is based on
> B4860QDS and trimmed down most peripherals due to either not modeled or
> lack of board level connections. The main purpose of this minimum dts is
> to speed up booting on emulator.
> 
> Signed-off-by: York Sun <yorksun@freescale.com>
> Reviewed-by: Wood Scott-B07421 <scottwood@freescale.com>
> Reviewed-by: Fleming Andrew-AFLEMING <AFLEMING@freescale.com>
> Tested-by: Fleming Andrew-AFLEMING <AFLEMING@freescale.com>

I did not give a reviewed-by in this context.  Please strip gerrit tags
before posting.

> +	clockgen: global-utilities@e1000 {
> +		compatible = "fsl,b4860-clockgen", "fsl,qoriq-clockgen-2";

It's "fsl,qoriq-clockgen-2.0" now.

Please be careful with copy-and-paste patches like this -- make sure
that it's up-to-date with what you copied from.

-Scott

^ permalink raw reply

* Re: Power PC Build problem
From: Scott Wood @ 2013-09-10 22:40 UTC (permalink / raw)
  To: Jason Rennie; +Cc: linuxppc-dev@ozlabs.org
In-Reply-To: <5226A3E3.2050308@rftechnology.com.au>

On Wed, 2013-09-04 at 13:07 +1000, Jason Rennie wrote:
> But when I try building it with the latest buildroot (2013.08) and I
> configure the kernel (3.10.10) I get one of two things. If I don't
> include specific settings for physmap compat support (Device Drivers
> -> MTD Support -> Mapping drivers for chip access) then I get a kernel
> panic when it can't mount the root filesystem as follows 
[snip]
> ----------- 
> and when I do include the physmap settings 
> 
> <*> Flash device in physical memory map 
>  [*]   Physmap compat support 
>   (0xFF000000) Physical start address of flash mapping 
>   (0x1000000) Physical length of flash mapping 
>   (2)     Bank width in octets 
> 
> which I took from the earlier build and do seem to be right, it does
> the following. 
> -------------------- 
> f0011a00.serial: ttyCPM0 at MMIO 0xc505ea00 (irq = 40) is a CPM UART 
> f0011a60.serial: ttyCPM1 at MMIO 0xc5060a60 (irq = 43) is a CPM UART 
> brd: module loaded 
> physmap platform flash device: 01000000 at ff000000 
> Machine check in kernel mode. 
> Caused by (from SRR1=49030): Transfer error ack signal 
> Oops: Machine check, sig: 7 [#1] 
> PREEMPT Freescale MPC8272 ADS 
> Modules linked in: 
> CPU: 0 PID: 1 Comm: swapper Not tainted 3.10.10 #12 
> task: c3830000 ti: c3832000 task.ti: c3832000 
> NIP: c01a5da8 LR: c01a6200 CTR: c01a60cc 
> REGS: c3833c00 TRAP: 0200   Not tainted  (3.10.10) 
> MSR: 00049030 <EE,ME,IR,DR>  CR: 22000022  XER: 00000000 
> 
> GPR00: c01a59a0 c3833cb0 c3830000 00000000 c39c0e24 c3833cf8 00000002
> 00000aaa 
> GPR08: c5080000 0000aaaa 00000002 00000002 42000024 00000000 c000406c
> 00000000 
> GPR16: 00000000 00000000 c39c0e24 c02c9ff4 00000001 c02c8d30 c02c9fe8
> c0356e23 
> GPR24: c02c8d78 00000001 00000000 00000001 c03b89d8 00000000 c39c0e24
> c3833cf8 
> NIP [c01a5da8] jedec_reset+0x124/0x448 
> LR [c01a6200] jedec_probe_chip+0x134/0x1050 
> Call Trace: 
> [c3833cb0] [c002d900] call_usermodehelper_exec+0x154/0x160
> (unreliable) 
> [c3833cf0] [c01a59a0] mtd_do_chip_probe+0x78/0x34c 
> [c3833d70] [c01a73a8] physmap_flash_probe+0x1ec/0x30c 
> [c3833db0] [c01996c8] driver_probe_device+0xb8/0x1f4 
> [c3833dd0] [c0197a64] bus_for_each_drv+0x60/0xa8 
> [c3833e00] [c01995d8] device_attach+0x7c/0x94 
> [c3833e20] [c0198ac4] bus_probe_device+0x34/0xac 
> [c3833e40] [c0196f94] device_add+0x3d4/0x570 
> [c3833e80] [c019acd0] platform_device_add+0x164/0x1dc 
> [c3833ea0] [c039a438] physmap_init+0x34/0x50 
> [c3833ec0] [c0003acc] do_one_initcall+0xdc/0x180 
> [c3833ef0] [c0386bcc] kernel_init_freeable+0x11c/0x1c0 
> [c3833f30] [c0004088] kernel_init+0x1c/0xf4 
> [c3833f40] [c000d3a8] ret_from_kernel_thread+0x5c/0x64 
> Instruction dump: 
> 5529063e 7d2839ae 48000024 2f8a0002 40be0010 5529043e 7d283b2e
> 48000010 
> 2f8a0004 409e0324 7d28392e 7c0004ac <81250008> 8105000c 81440018
> 7d6849d6 
> ---[ end trace 06e97f39189bf8ef ]--- 
> 
> Kernel panic - not syncing: Attempted to kill init!
> exitcode=0x00000007 
> 
> Rebooting in 1 seconds.. 
> -------------------- 
> I assume it is crashing when it tries to probe the flash chips? 
> 
> I'm at a loss to work out what I am doing wrong here. Can anybody
> offer any pointers? 

It sounds like the address you're specifying for flash is incorrect.
You shouldn't need to specify it this way; the information is in the
device tree (and for mpc8272ads the device tree says it's at
0xff800000).

Do you have CONFIG_MTD_PHYSMAP_OF enabled?

BTW, I don't see how this is a "build problem".

-Scott

^ permalink raw reply

* Re: powerpc allmodconfig build broken due to commit 15863ff3b (powerpc: Make chip-id information available to userspace)
From: Guenter Roeck @ 2013-09-10 22:50 UTC (permalink / raw)
  To: Benjamin Herrenschmidt
  Cc: Asai Thambi S P, linux-kernel@vger.kernel.org, Vasant Hegde,
	Shivaprasad G Bhat, Paul Mackerras, linuxppc-dev
In-Reply-To: <1378850569.4121.28.camel@pasglop>

On Wed, Sep 11, 2013 at 08:02:49AM +1000, Benjamin Herrenschmidt wrote:
> On Mon, 2013-09-09 at 16:55 -0700, Asai Thambi S P wrote:
> > On 09/08/2013 5:28 PM, Guenter Roeck wrote:
> > > Hi all,
> > >
> > > powerpc allmodconfig build on the latest upstream kernel results in:
> > >
> > > ERROR: ".cpu_to_chip_id" [drivers/block/mtip32xx/mtip32xx.ko] undefined!
> > >
> > > This is due to commit 15863ff3b (powerpc: Make chip-id information 
> > > available to userspace).
> > > Not surprising, as cpu_to_chip_id() is not exported.
> > >
> > Apart from the above error, I have a concern on the patch, purely based on the commit message.
> > (to be honest, I am not familiar with the ppc architecture)
> > 
> > Commit message of 15863ff3b has the following text.
> > 
> > ******************
> > So far "/sys/devices/system/cpu/cpuX/topology/physical_package_id"
> > was always default (-1) on ppc64 architecture.
> > 
> > Now, some systems have an ibm,chip-id property in the cpu nodes in
> > the device tree. On these systems, we now use this information to
> > display physical_package_id
> > ******************
> > 
> > Shouldn't the new definition of "topology_physical_package_id" apply only to those systems supporting ibm,chip-id property?
> 
> There should be no negative side effect (appart from the missing
> EXPORT_SYMBOL of course). If the property is not found in the
> device-tree, the new function returns -1, so it should work fine on all
> systems.
> 
Good. I submitted a patch doing just that yesterday or so.
Hope you'll accept it ;).

Thanks,
Guenter

^ permalink raw reply

* Re: [PATCH] powerpc/85xx: DTS - re-organize the SPI partitions property
From: Scott Wood @ 2013-09-10 23:33 UTC (permalink / raw)
  To: Mingkai Hu; +Cc: linuxppc-dev
In-Reply-To: <1378454743-17637-1-git-send-email-Mingkai.Hu@freescale.com>

On Fri, 2013-09-06 at 16:05 +0800, Mingkai Hu wrote:
> Re-organize the SPI partitions and use the same SPI flash memory
> map for most of the platforms which have 16MB SPI flash mounted.
[snip]
> Based on 'next' branch on git tree:
> git://git.kernel.org/pub/scm/linux/kernel/git/scottwood/linux.git
> 
>  arch/powerpc/boot/dts/bsc9131rdb.dtsi  | 35 ++++++++++++-----------------
>  arch/powerpc/boot/dts/c293pcie.dts     | 35 ++++++++++++-----------------
>  arch/powerpc/boot/dts/mpc8536ds.dtsi   | 12 +++++-----
>  arch/powerpc/boot/dts/p1010rdb.dtsi    | 40 ++++++++++++----------------------
>  arch/powerpc/boot/dts/p1020rdb-pc.dtsi | 24 +++++---------------
>  arch/powerpc/boot/dts/p1020rdb-pd.dts  | 34 ++++++++++++-----------------
>  arch/powerpc/boot/dts/p1020rdb.dtsi    | 23 ++++---------------
>  arch/powerpc/boot/dts/p1021mds.dts     | 17 +++++++--------
>  arch/powerpc/boot/dts/p1021rdb-pc.dtsi | 32 +++++++++------------------
>  arch/powerpc/boot/dts/p1022ds.dtsi     | 21 +++++++++---------
>  arch/powerpc/boot/dts/p1023rds.dts     | 10 ++-------
>  arch/powerpc/boot/dts/p1024rdb.dtsi    | 40 ++++++++++++----------------------
>  arch/powerpc/boot/dts/p1025rdb.dtsi    | 23 +++++--------------
>  arch/powerpc/boot/dts/p2020rdb-pc.dtsi | 40 ++++++++++++----------------------
>  arch/powerpc/boot/dts/p2020rdb.dts     | 38 ++++++++++----------------------
>  arch/powerpc/boot/dts/p2041rdb.dts     | 12 +++++-----
>  arch/powerpc/boot/dts/p3041ds.dts      | 12 +++++-----
>  arch/powerpc/boot/dts/p4080ds.dts      | 12 +++++-----
>  arch/powerpc/boot/dts/p5020ds.dts      | 12 +++++-----
>  arch/powerpc/boot/dts/p5040ds.dts      | 13 ++++++-----

What happens to exsting users whose flash is laid out the existing way,
when they upgrade to these device trees?

We really should not be putting partition layout info in the device tree
to begin with...

-Scott

^ permalink raw reply

* Re: [RFC PATCH v2 1/1] powerpc/embedded6xx: Add support for Motorola/Emerson MVME5100.
From: Scott Wood @ 2013-09-10 23:47 UTC (permalink / raw)
  To: Stephen Chivers; +Cc: paulus, linuxppc-dev, cproctor
In-Reply-To: <20130905055122.F256EE07B6@canberra.localdomain>

On Thu, 2013-09-05 at 15:51 +1000, Stephen Chivers wrote:
> Add support for the Motorola/Emerson MVME5100 Single Board Computer.
> 
> The MVME5100 is a 6U form factor VME64 computer with:
> 
> 	- A single MPC7410 or MPC750 CPU
> 	- A HAWK Processor Host Bridge (CPU to PCI) and
> 	  MultiProcessor Interrupt Controller (MPIC)
> 	- Up to 500Mb of onboard memory
> 	- A M48T37 Real Time Clock (RTC) and Non-Volatile Memory chip
> 	- Two 16550 compatible UARTS
> 	- Two Intel E100 Fast Ethernets
> 	- Two PCI Mezzanine Card (PMC) Slots
> 	- PPCBug Firmware
> 
> The HAWK PHB/MPIC is compatible with the MPC10x devices.
> 
> There is no onboard disk support. This is usually provided by
> installing a PMC in the first PMC slot.
> 
> This patch revives the board support, it was present in early 2.6
> series kernels. The board support in those days was by Matt Porter of
> MontaVista Software.
> 
> CSC Australia has around 31 of these boards in service. The kernel in use
> for the boards is based on 2.6.31. The boards are operated without disks
> from a file server. 
> 
> This patch is based on linux-3.11-rc7 and has been boot tested.
> 
> V1->V2:
> 	Address comments by Kular Gama and Scott Wood.
> 	Minor adjustment to platforms/embedded6xx/Kconfig to ensure
> 		correct indentation where possible.
> 
> Signed-off-by: Stephen Chivers <schivers@csc.com>
> ---

Some comments below, and please run checkpatch.pl, but the next version
can probably be non-RFC if you're happy with it.

> +		PowerPC,7410 {
> +			device_type = "cpu";
> +			reg = <0x0>;
> +			/* Following required by dtc but not used */
> +			d-cache-line-size = <32>;
> +			i-cache-line-size = <32>;
> +			i-cache-size = <32768>;
> +			d-cache-size = <32768>;
> +			timebase-frequency = <25000000>;
> +			clock-frequency = <500000000>;
> +                        bus-frequency = <100000000>;
> +		};

Whitespace on bus-frequency

> +		mpic: interrupt-controller@f3f80000 {
> +			#interrupt-cells = <2>;
> +			#address-cells = <0>;
> +			device_type = "open-pic";
> +			compatible = "chrp,open-pic";
> +			interrupt-controller;
> +			reg = <0xf3f80000 0x40000>;
> +		};
> +
> +	};

No blank line before }

> +CONFIG_CMDLINE_BOOL=y
> +CONFIG_CMDLINE="console=ttyS0,9600 ip=dhcp root=/dev/nfs"

I take it there's no way to pass a command line in from whatever loader
this board uses... but you could put it in the dts instead.

Did you ever figure out the problem with the combined defconfig?

> +	help
> +          This option enables support for the Motorola (now Emerson) MVME5100
> +	  board.

Whitespace

> +/* Board register addresses. */
> +#define	BOARD_STATUS_REG	0xfef88080
> +#define	BOARD_MODFAIL_REG	0xfef88090
> +#define	BOARD_MODRST_REG	0xfef880a0
> +#define	BOARD_TBEN_REG		0xfef880c0
> +#define BOARD_SW_READ_REG	0xfef880e0
> +#define	BOARD_GEO_ADDR_REG	0xfef880e8
> +#define	BOARD_EXT_FEATURE1_REG	0xfef880f0
> +#define	BOARD_EXT_FEATURE2_REG	0xfef88100

Use a space rather than a tab after #define.

> +static unsigned int pci_membase;

phys_addr_t

> +static void mvme5100_restart(char *cmd)
> +{
> +	u_char			*restart;

Is all that tabbing before "*restart" really necessary?

> +	restart = ioremap(BOARD_MODRST_REG, 4);
> +	local_irq_disable();
> +	mtmsr(mfmsr() | MSR_IP);
> +
> +	out_8((u_char *) restart, 0x01);

If ioremap() fails you'll panic here.

In any case, you should map things at boot time.

-Scott

^ permalink raw reply

* Re: [PATCH 1/6] powerpc: Put FP/VSX and VR state into structures
From: Paul Mackerras @ 2013-09-10 23:52 UTC (permalink / raw)
  To: Alexander Graf; +Cc: kvm-ppc, kvm, linuxppc-dev
In-Reply-To: <992F9967-6DA8-4BDF-A6BC-A8F9E8F4FF7D@suse.de>

On Tue, Sep 10, 2013 at 12:07:46PM -0500, Alexander Graf wrote:
> 
> On 10.09.2013, at 05:20, Paul Mackerras wrote:
> > @@ -897,7 +897,7 @@ int fix_alignment(struct pt_regs *regs)
> > 				return -EFAULT;
> > 		}
> > 	} else if (flags & F) {
> > -		data.dd = current->thread.TS_FPR(reg);
> > +		data.ll = current->thread.TS_FPR(reg);
> 
> I don't understand this change. Could you please explain?

It's simply that the type which we use to store the FPR values is now
an unsigned integer type rather than a floating-point type.  If I
didn't make this change, the compiler would try to convert that
unsigned integer value into a floating-point value, which we don't
want.

> > --- a/arch/powerpc/kernel/tm.S
> > +++ b/arch/powerpc/kernel/tm.S
> > @@ -12,16 +12,15 @@
> > #include <asm/reg.h>
> > 
> > #ifdef CONFIG_VSX
> > -/* See fpu.S, this is very similar but to save/restore checkpointed FPRs/VSRs */
> > -#define __SAVE_32FPRS_VSRS_TRANSACT(n,c,base)	\
> > +/* See fpu.S, this is borrowed from there */
> > +#define __SAVE_32FPRS_VSRS(n,c,base)		\
> 
> Should this really be in tm.S with its new name?

Do you mean, could I merge it with __SAVE_32FPVSRS from fpu.S, put it
in ppc_asm.h and avoid having two very similar macros defined in
different places?  Yes I could, and that's a good idea.

Paul.

^ permalink raw reply

* Re: [PATCH 2/6] powerpc: Provide for giveup_fpu/altivec to save state in alternate location
From: Paul Mackerras @ 2013-09-10 23:54 UTC (permalink / raw)
  To: Alexander Graf; +Cc: kvm-ppc, kvm, linuxppc-dev
In-Reply-To: <675235E3-6991-4DB1-90A4-935E826F6413@suse.de>

On Tue, Sep 10, 2013 at 12:12:47PM -0500, Alexander Graf wrote:
> 
> On 10.09.2013, at 05:21, Paul Mackerras wrote:
> 
> > @@ -212,6 +212,7 @@ struct thread_struct {
> > #endif
> > #endif
> > 	struct thread_fp_state	fp_state;
> > +	struct thread_fp_state	*fp_save_area;
> 
> Why do you need these pointers? Couldn't you handle everything you need through preempt notifiers?

As you note in your review of a later patch, no, I need the pointer so
that if in-kernel code wants to use FP or VSX, potentially in the
context of this same process, it knows where to save the FP/VSX state
away to.

Paul.

^ permalink raw reply

* Re: [PATCH][RFC][v2] pci: fsl: rework PCIe driver compatible with Layerscape
From: Scott Wood @ 2013-09-11  0:25 UTC (permalink / raw)
  To: Minghuan Lian; +Cc: linuxppc-dev, Zang Roy-R61911
In-Reply-To: <1377686572-3981-1-git-send-email-Minghuan.Lian@freescale.com>

On Wed, 2013-08-28 at 18:42 +0800, Minghuan Lian wrote:
> The Freescale's Layerscape series processors will use ARM cores.
> The LS1's PCIe controllers is the same as T4240's. So it's better
> the PCIe controller driver can support PowerPC and ARM
> simultaneously. This patch is for this purpose. It derives
> the common functions from arch/powerpc/sysdev/fsl_pci.c to
> drivers/pci/host/pcie-fsl.c and leaves several platform-dependent
> functions which should be implemented in platform files.
> 
> Signed-off-by: Minghuan Lian <Minghuan.Lian@freescale.com>
> ---
> Based on upstream master 3.11-rc7
> The function has been tested on MPC8315ERDB MPC8572DS P5020DS P3041DS
> and T4240QDS boards 
> 
> Change log:
> v2:
> 1. Use 'pci' instead of 'pcie' in new file name and file contents. 
> 2. Use iowrite32be()/iowrite32() instead of out_be32/le32()
> 3. Fix ppc_md.dma_set_mask setting
> 4. Synchronizes host->first_busno and pci->first_busno.
> 5. Fix PCI IO space settings
> 6. Some small changes according to Scott's comments.

Could you please split this into two patches, where one moves the code,
and the other makes changes to it?

-Scott

^ permalink raw reply

* Re: [RFC PATCH v2 1/1] powerpc/embedded6xx: Add support for Motorola/Emerson MVME5100.
From: Stephen N Chivers @ 2013-09-11  0:29 UTC (permalink / raw)
  To: Scott Wood; +Cc: linuxppc-dev, paulus, Chris Proctor
In-Reply-To: <1378856847.12204.380.camel@snotra.buserror.net>

Scott Wood <scottwood@freescale.com> wrote on 09/11/2013 09:47:27 AM:

> From: Scott Wood <scottwood@freescale.com>
> To: Stephen N Chivers/AUS/CSC@CSC
> Cc: Chris Proctor/AUS/CSC@CSC, <linuxppc-dev@lists.ozlabs.org>, 
> <paulus@samba.org>, <benh@kernel.crashing.org>
> Date: 09/11/2013 09:47 AM
> Subject: Re: [RFC PATCH v2 1/1] powerpc/embedded6xx: Add support for
> Motorola/Emerson MVME5100.
> 
> On Thu, 2013-09-05 at 15:51 +1000, Stephen Chivers wrote:
> > Add support for the Motorola/Emerson MVME5100 Single Board Computer.
> > 
> > The MVME5100 is a 6U form factor VME64 computer with:
> > 
> >    - A single MPC7410 or MPC750 CPU
> >    - A HAWK Processor Host Bridge (CPU to PCI) and
> >      MultiProcessor Interrupt Controller (MPIC)
> >    - Up to 500Mb of onboard memory
> >    - A M48T37 Real Time Clock (RTC) and Non-Volatile Memory chip
> >    - Two 16550 compatible UARTS
> >    - Two Intel E100 Fast Ethernets
> >    - Two PCI Mezzanine Card (PMC) Slots
> >    - PPCBug Firmware
> > 
> > The HAWK PHB/MPIC is compatible with the MPC10x devices.
> > 
> > There is no onboard disk support. This is usually provided by
> > installing a PMC in the first PMC slot.
> > 
> > This patch revives the board support, it was present in early 2.6
> > series kernels. The board support in those days was by Matt Porter of
> > MontaVista Software.
> > 
> > CSC Australia has around 31 of these boards in service. The kernel in 
use
> > for the boards is based on 2.6.31. The boards are operated without 
disks
> > from a file server. 
> > 
> > This patch is based on linux-3.11-rc7 and has been boot tested.
> > 
> > V1->V2:
> >    Address comments by Kular Gama and Scott Wood.
> >    Minor adjustment to platforms/embedded6xx/Kconfig to ensure
> >       correct indentation where possible.
> > 
> > Signed-off-by: Stephen Chivers <schivers@csc.com>
> > ---
> 
> Some comments below, and please run checkpatch.pl, but the next version
> can probably be non-RFC if you're happy with it.
Ok.
> 
> > +      PowerPC,7410 {
> > +         device_type = "cpu";
> > +         reg = <0x0>;
> > +         /* Following required by dtc but not used */
> > +         d-cache-line-size = <32>;
> > +         i-cache-line-size = <32>;
> > +         i-cache-size = <32768>;
> > +         d-cache-size = <32768>;
> > +         timebase-frequency = <25000000>;
> > +         clock-frequency = <500000000>;
> > +                        bus-frequency = <100000000>;
> > +      };
> 
> Whitespace on bus-frequency
Ok. Will fix.
> 
> > +      mpic: interrupt-controller@f3f80000 {
> > +         #interrupt-cells = <2>;
> > +         #address-cells = <0>;
> > +         device_type = "open-pic";
> > +         compatible = "chrp,open-pic";
> > +         interrupt-controller;
> > +         reg = <0xf3f80000 0x40000>;
> > +      };
> > +
> > +   };
> 
> No blank line before }
Ok. Will be fixed.
> 
> > +CONFIG_CMDLINE_BOOL=y
> > +CONFIG_CMDLINE="console=ttyS0,9600 ip=dhcp root=/dev/nfs"
> 
> I take it there's no way to pass a command line in from whatever loader
> this board uses... but you could put it in the dts instead.
>
It can be done by reading the NVRAM/RTC (M48T37) and "overriding" the
DTS specification. But I wanted to keep things simple to start
with.

Putting the default command line in the DTS is required to support
a combined "defconfig" (pp6xx_defconfig) and I know it does work.
So I will do that.

> Did you ever figure out the problem with the combined defconfig?
Not really. But I have been forced to think about that as a new
project will be using some PMCs with 8250 UARTS (PCI) and they are
another way that the console moves from the debug port on the front
panel to somewhere else.

It is very likely that the HAWK UARTS will have to be registered as
platform devices by the board support file itself.

> 
> > +   help
> > +          This option enables support for the Motorola (now 
> Emerson) MVME5100
> > +     board.
> 
> Whitespace
Ok.
> 
> > +/* Board register addresses. */
> > +#define   BOARD_STATUS_REG   0xfef88080
> > +#define   BOARD_MODFAIL_REG   0xfef88090
> > +#define   BOARD_MODRST_REG   0xfef880a0
> > +#define   BOARD_TBEN_REG      0xfef880c0
> > +#define BOARD_SW_READ_REG   0xfef880e0
> > +#define   BOARD_GEO_ADDR_REG   0xfef880e8
> > +#define   BOARD_EXT_FEATURE1_REG   0xfef880f0
> > +#define   BOARD_EXT_FEATURE2_REG   0xfef88100
> 
> Use a space rather than a tab after #define.
Ok.
> 
> > +static unsigned int pci_membase;
> 
> phys_addr_t
Ok.
> 
> > +static void mvme5100_restart(char *cmd)
> > +{
> > +   u_char         *restart;
> 
> Is all that tabbing before "*restart" really necessary?
Will fix.
> 
> > +   restart = ioremap(BOARD_MODRST_REG, 4);
> > +   local_irq_disable();
> > +   mtmsr(mfmsr() | MSR_IP);
> > +
> > +   out_8((u_char *) restart, 0x01);
> 
> If ioremap() fails you'll panic here.
Ok. Will do in mvme5100_setup_arch.
> 
> In any case, you should map things at boot time.
> 
> -Scott
Thanks,
Stephen.
> 
> 
> 

^ permalink raw reply

* Re: [RFC PATCH v2 1/1] powerpc/embedded6xx: Add support for Motorola/Emerson MVME5100.
From: Benjamin Herrenschmidt @ 2013-09-11  1:15 UTC (permalink / raw)
  To: Scott Wood; +Cc: Stephen Chivers, paulus, linuxppc-dev, cproctor
In-Reply-To: <1378856847.12204.380.camel@snotra.buserror.net>

On Tue, 2013-09-10 at 18:47 -0500, Scott Wood wrote:

> No blank line before }
> 
> > +CONFIG_CMDLINE_BOOL=y
> > +CONFIG_CMDLINE="console=ttyS0,9600 ip=dhcp root=/dev/nfs"
> 
> I take it there's no way to pass a command line in from whatever loader
> this board uses... but you could put it in the dts instead.

No, please don't put that in the device-tree. Somebody might want
different settings, this is typically what .config is for.

> Did you ever figure out the problem with the combined defconfig?
> 
> > +	help
> > +          This option enables support for the Motorola (now Emerson) MVME5100
> > +	  board.
> 
> Whitespace
> 
> > +/* Board register addresses. */
> > +#define	BOARD_STATUS_REG	0xfef88080
> > +#define	BOARD_MODFAIL_REG	0xfef88090
> > +#define	BOARD_MODRST_REG	0xfef880a0
> > +#define	BOARD_TBEN_REG		0xfef880c0
> > +#define BOARD_SW_READ_REG	0xfef880e0
> > +#define	BOARD_GEO_ADDR_REG	0xfef880e8
> > +#define	BOARD_EXT_FEATURE1_REG	0xfef880f0
> > +#define	BOARD_EXT_FEATURE2_REG	0xfef88100
> 
> Use a space rather than a tab after #define.
> 
> > +static unsigned int pci_membase;
> 
> phys_addr_t
> 
> > +static void mvme5100_restart(char *cmd)
> > +{
> > +	u_char			*restart;
> 
> Is all that tabbing before "*restart" really necessary?
> 
> > +	restart = ioremap(BOARD_MODRST_REG, 4);
> > +	local_irq_disable();
> > +	mtmsr(mfmsr() | MSR_IP);
> > +
> > +	out_8((u_char *) restart, 0x01);
> 
> If ioremap() fails you'll panic here.
> 
> In any case, you should map things at boot time.
> 
> -Scott
> 
> 

^ permalink raw reply

* RE: [PATCH] powerpc/85xx: DTS - re-organize the SPI partitions property
From: Hu Mingkai-B21284 @ 2013-09-11  2:07 UTC (permalink / raw)
  To: Wood Scott-B07421; +Cc: linuxppc-dev@ozlabs.org
In-Reply-To: <1378856005.12204.372.camel@snotra.buserror.net>

DQoNCj4gLS0tLS1PcmlnaW5hbCBNZXNzYWdlLS0tLS0NCj4gRnJvbTogV29vZCBTY290dC1CMDc0
MjENCj4gU2VudDogV2VkbmVzZGF5LCBTZXB0ZW1iZXIgMTEsIDIwMTMgNzozMyBBTQ0KPiBUbzog
SHUgTWluZ2thaS1CMjEyODQNCj4gQ2M6IGxpbnV4cHBjLWRldkBvemxhYnMub3JnDQo+IFN1Ympl
Y3Q6IFJlOiBbUEFUQ0hdIHBvd2VycGMvODV4eDogRFRTIC0gcmUtb3JnYW5pemUgdGhlIFNQSSBw
YXJ0aXRpb25zDQo+IHByb3BlcnR5DQo+IA0KPiBPbiBGcmksIDIwMTMtMDktMDYgYXQgMTY6MDUg
KzA4MDAsIE1pbmdrYWkgSHUgd3JvdGU6DQo+ID4gUmUtb3JnYW5pemUgdGhlIFNQSSBwYXJ0aXRp
b25zIGFuZCB1c2UgdGhlIHNhbWUgU1BJIGZsYXNoIG1lbW9yeSBtYXANCj4gPiBmb3IgbW9zdCBv
ZiB0aGUgcGxhdGZvcm1zIHdoaWNoIGhhdmUgMTZNQiBTUEkgZmxhc2ggbW91bnRlZC4NCj4gW3Nu
aXBdDQo+ID4gQmFzZWQgb24gJ25leHQnIGJyYW5jaCBvbiBnaXQgdHJlZToNCj4gPiBnaXQ6Ly9n
aXQua2VybmVsLm9yZy9wdWIvc2NtL2xpbnV4L2tlcm5lbC9naXQvc2NvdHR3b29kL2xpbnV4Lmdp
dA0KPiA+DQo+ID4gIGFyY2gvcG93ZXJwYy9ib290L2R0cy9ic2M5MTMxcmRiLmR0c2kgIHwgMzUg
KysrKysrKysrKysrLS0tLS0tLS0tLS0tLS0NCj4gLS0tDQo+ID4gIGFyY2gvcG93ZXJwYy9ib290
L2R0cy9jMjkzcGNpZS5kdHMgICAgIHwgMzUgKysrKysrKysrKysrLS0tLS0tLS0tLS0tLS0NCj4g
LS0tDQo+ID4gIGFyY2gvcG93ZXJwYy9ib290L2R0cy9tcGM4NTM2ZHMuZHRzaSAgIHwgMTIgKysr
KystLS0tLQ0KPiA+ICBhcmNoL3Bvd2VycGMvYm9vdC9kdHMvcDEwMTByZGIuZHRzaSAgICB8IDQw
ICsrKysrKysrKysrKy0tLS0tLS0tLS0tLS0tDQo+IC0tLS0tLS0tDQo+ID4gIGFyY2gvcG93ZXJw
Yy9ib290L2R0cy9wMTAyMHJkYi1wYy5kdHNpIHwgMjQgKysrKystLS0tLS0tLS0tLS0tLS0NCj4g
PiBhcmNoL3Bvd2VycGMvYm9vdC9kdHMvcDEwMjByZGItcGQuZHRzICB8IDM0ICsrKysrKysrKysr
Ky0tLS0tLS0tLS0tLS0tLQ0KPiAtLQ0KPiA+ICBhcmNoL3Bvd2VycGMvYm9vdC9kdHMvcDEwMjBy
ZGIuZHRzaSAgICB8IDIzICsrKystLS0tLS0tLS0tLS0tLS0NCj4gPiAgYXJjaC9wb3dlcnBjL2Jv
b3QvZHRzL3AxMDIxbWRzLmR0cyAgICAgfCAxNyArKysrKysrLS0tLS0tLS0NCj4gPiAgYXJjaC9w
b3dlcnBjL2Jvb3QvZHRzL3AxMDIxcmRiLXBjLmR0c2kgfCAzMiArKysrKysrKystLS0tLS0tLS0t
LS0tLS0tLQ0KPiAtDQo+ID4gIGFyY2gvcG93ZXJwYy9ib290L2R0cy9wMTAyMmRzLmR0c2kgICAg
IHwgMjEgKysrKysrKysrLS0tLS0tLS0tDQo+ID4gIGFyY2gvcG93ZXJwYy9ib290L2R0cy9wMTAy
M3Jkcy5kdHMgICAgIHwgMTAgKystLS0tLS0tDQo+ID4gIGFyY2gvcG93ZXJwYy9ib290L2R0cy9w
MTAyNHJkYi5kdHNpICAgIHwgNDAgKysrKysrKysrKysrLS0tLS0tLS0tLS0tLS0NCj4gLS0tLS0t
LS0NCj4gPiAgYXJjaC9wb3dlcnBjL2Jvb3QvZHRzL3AxMDI1cmRiLmR0c2kgICAgfCAyMyArKysr
Ky0tLS0tLS0tLS0tLS0tDQo+ID4gIGFyY2gvcG93ZXJwYy9ib290L2R0cy9wMjAyMHJkYi1wYy5k
dHNpIHwgNDAgKysrKysrKysrKysrLS0tLS0tLS0tLS0tLS0NCj4gLS0tLS0tLS0NCj4gPiAgYXJj
aC9wb3dlcnBjL2Jvb3QvZHRzL3AyMDIwcmRiLmR0cyAgICAgfCAzOCArKysrKysrKysrLS0tLS0t
LS0tLS0tLS0tLQ0KPiAtLS0tLS0NCj4gPiAgYXJjaC9wb3dlcnBjL2Jvb3QvZHRzL3AyMDQxcmRi
LmR0cyAgICAgfCAxMiArKysrKy0tLS0tDQo+ID4gIGFyY2gvcG93ZXJwYy9ib290L2R0cy9wMzA0
MWRzLmR0cyAgICAgIHwgMTIgKysrKystLS0tLQ0KPiA+ICBhcmNoL3Bvd2VycGMvYm9vdC9kdHMv
cDQwODBkcy5kdHMgICAgICB8IDEyICsrKysrLS0tLS0NCj4gPiAgYXJjaC9wb3dlcnBjL2Jvb3Qv
ZHRzL3A1MDIwZHMuZHRzICAgICAgfCAxMiArKysrKy0tLS0tDQo+ID4gIGFyY2gvcG93ZXJwYy9i
b290L2R0cy9wNTA0MGRzLmR0cyAgICAgIHwgMTMgKysrKysrLS0tLS0NCj4gDQo+IFdoYXQgaGFw
cGVucyB0byBleHN0aW5nIHVzZXJzIHdob3NlIGZsYXNoIGlzIGxhaWQgb3V0IHRoZSBleGlzdGlu
ZyB3YXksDQo+IHdoZW4gdGhleSB1cGdyYWRlIHRvIHRoZXNlIGRldmljZSB0cmVlcz8NCj4gDQoN
ClRoZSBTUEkgZmxhc2ggbGF5b3V0IHNob3VsZCBiZSBtYXBwaW5nIHRoZSBuZXcgZGV2aWNlIHRy
ZWUuDQoNCklmIHRoZSBleGlzdGluZyBkZXZpY2UgdHJlZSBpcyB1c2VkIHRvIGRlcGxveSB0aGUg
U1BJIGZsYXNoLCB0aGUgZm9sbG93aW5nIGlzc3Vlcw0KbXVzdCBiZSBydW4gaW50byBhcyB0aGUg
Y29tbWl0IG1lc3NhZ2UgZGVzY3JpYmVkOg0KDQoxLiBLZXJuZWwgaW1hZ2VzIHdvdWxkIGJlIG92
ZXJsYXBwZWQgd2l0aCBVLUJvb3QgaW1hZ2UuDQoyLiBLZXJuZWwgaW1hZ2VzIHdvdWxkIGJlIG92
ZXJsYXBwZWQgd2l0aCBGTUFOIHVjb2RlLg0KMy4gU2F2aW5nIGVudmlyb25tZW50IHZhcmlhYmxl
cyB3aWxsIGNyYXNoIHRoZSBrZXJuZWwgaW1hZ2UuDQoNCj4gV2UgcmVhbGx5IHNob3VsZCBub3Qg
YmUgcHV0dGluZyBwYXJ0aXRpb24gbGF5b3V0IGluZm8gaW4gdGhlIGRldmljZSB0cmVlDQo+IHRv
IGJlZ2luIHdpdGguLi4NCj4gDQpPSywgSSB3aWxsIHJlbW92ZSB0aGUgbGF5b3V0IGRpYWdyYW0g
aW4gdGhlIGNvbW1pdCBtZXNzYWdlLg0KDQpUaGFua3MsDQpNaW5na2FpDQo=

^ permalink raw reply

* [PATCH V3 0/6] cpuidle/ppc: Enable broadcast support for deep idle states
From: Preeti U Murthy @ 2013-09-11  2:50 UTC (permalink / raw)
  To: benh, paul.gortmaker, paulus, shangw, rjw, galak, fweisbec,
	paulmck, arnd, linux-pm, rostedt, michael, john.stultz, tglx,
	chenhui.zhao, deepthi, r58472, geoff, linux-kernel, srivatsa.bhat,
	schwidefsky, svaidy, linuxppc-dev

On PowerPC, when CPUs enter deep idle states, their local timers get
switched off. An external clock device needs to programmed to wake them
up at their next timer event.
	On PowerPC, we do not have an external device equivalent to HPET,
which is currently used on architectures like x86 under the same scenario.
Instead we assign the local timer of one of the CPUs to do this job.

This patchset is an attempt to hook onto the existing timer broadcast
framework in the kernel by using the local timer of one of the CPUs to do the
job of the external clock device.

On expiry of this device, the broadcast framework today has the infrastructure
to send ipis to all such CPUs whose local timers have expired. Hence the term
"broadcast" and the ipi sent is called the broadcast ipi.

This patch series is ported ontop of 3.11-rc7 + the cpuidle driver backend
for power posted by Deepthi Dharwar recently.
http://comments.gmane.org/gmane.linux.ports.ppc.embedded/63556

Changes in V3:

1. Fix the way in which a broadcast ipi is handled on the idling cpus. Timer
handling on a broadcast ipi is being done now without missing out any timer
stats generation.

2. Fix a bug in the programming of the hrtimer meant to do broadcast. Program
it to trigger at the earlier of a "broadcast period", and the next wakeup
event. By introducing the "broadcast period" as the maximum period after
which the broadcast hrtimer can fire, we ensure that we do not miss
wakeups in corner cases.

3. On hotplug of a broadcast cpu, trigger the hrtimer meant to do broadcast
to fire immediately on the new broadcast cpu. This will ensure we do not miss
doing a broadcast pending in the nearest future.

4. Change the type of allocation from GFP_KERNEL to GFP_NOWAIT while
initializing bc_hrtimer since we are in an atomic context and cannot sleep.

5. Use the broadcast ipi to wakeup the newly nominated broadcast cpu on
hotplug of the old instead of smp_call_function_single(). This is because we
are interrupt disabled at this point and should not be using
smp_call_function_single or its children in this context to send an ipi.

6. Move GENERIC_CLOCKEVENTS_BROADCAST to arch/powerpc/Kconfig.

7. Fix coding style issues.

Changes in V2: https://lkml.org/lkml/2013/8/14/239

1. Dynamically pick a broadcast CPU, instead of having a dedicated one.
2. Remove the constraint of having to disable tickless idle on the broadcast
CPU by queueing a hrtimer dedicated to do broadcast.

V1 posting: https://lkml.org/lkml/2013/7/25/740.

The patchset has been tested for stability in idle and during multi threaded
ebizzy runs.

Many thanks to Ben H, Frederic Weisbecker, Li Yang, Srivatsa S. Bhat and
Vaidyanathan Srinivasan for all their comments and suggestions so far.

---

Preeti U Murthy (4):
      cpuidle/ppc: Split timer_interrupt() into timer handling and interrupt handling routines
      cpuidle/ppc: Add basic infrastructure to support the broadcast framework on ppc
      cpuidle/ppc: Introduce the deep idle state in which the local timers stop
      cpuidle/ppc: Nominate new broadcast cpu on hotplug of the old

Srivatsa S. Bhat (2):
      powerpc: Free up the IPI message slot of ipi call function (PPC_MSG_CALL_FUNC)
      powerpc: Implement broadcast timer interrupt as an IPI message


 arch/powerpc/Kconfig                    |    1 
 arch/powerpc/include/asm/smp.h          |    3 -
 arch/powerpc/include/asm/time.h         |    4 +
 arch/powerpc/kernel/smp.c               |   23 +++-
 arch/powerpc/kernel/time.c              |  143 ++++++++++++++++++++------
 arch/powerpc/platforms/cell/interrupt.c |    2 
 arch/powerpc/platforms/ps3/smp.c        |    2 
 drivers/cpuidle/cpuidle-ibm-power.c     |  172 +++++++++++++++++++++++++++++++
 scripts/kconfig/streamline_config.pl    |    0 
 9 files changed, 307 insertions(+), 43 deletions(-)
 mode change 100644 => 100755 scripts/kconfig/streamline_config.pl

^ permalink raw reply

* [PATCH V3 1/6] powerpc: Free up the IPI message slot of ipi call function (PPC_MSG_CALL_FUNC)
From: Preeti U Murthy @ 2013-09-11  2:51 UTC (permalink / raw)
  To: benh, paul.gortmaker, paulus, shangw, rjw, galak, fweisbec,
	paulmck, arnd, linux-pm, rostedt, michael, john.stultz, tglx,
	chenhui.zhao, deepthi, r58472, geoff, linux-kernel, srivatsa.bhat,
	schwidefsky, svaidy, linuxppc-dev
In-Reply-To: <20130911024906.27726.4735.stgit@preeti.in.ibm.com>

From: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>

The IPI handlers for both PPC_MSG_CALL_FUNC and PPC_MSG_CALL_FUNC_SINGLE map to
a common implementation - generic_smp_call_function_single_interrupt(). So, we
can consolidate them and save one of the IPI message slots, (which are precious,
since only 4 of those slots are available).

So, implement the functionality of PPC_MSG_CALL_FUNC using
PPC_MSG_CALL_FUNC_SINGLE itself and release its IPI message slot, so that it
can be used for something else in the future, if desired.

Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Signed-off-by: Preeti U Murthy <preeti@linux.vnet.ibm.com>
---

 arch/powerpc/include/asm/smp.h          |    2 +-
 arch/powerpc/kernel/smp.c               |   12 +++++-------
 arch/powerpc/platforms/cell/interrupt.c |    2 +-
 arch/powerpc/platforms/ps3/smp.c        |    2 +-
 4 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
index 48cfc85..a632b6e 100644
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -117,7 +117,7 @@ extern int cpu_to_core_id(int cpu);
  *
  * Make sure this matches openpic_request_IPIs in open_pic.c, or what shows up
  * in /proc/interrupts will be wrong!!! --Troy */
-#define PPC_MSG_CALL_FUNCTION   0
+#define PPC_MSG_UNUSED		0
 #define PPC_MSG_RESCHEDULE      1
 #define PPC_MSG_CALL_FUNC_SINGLE	2
 #define PPC_MSG_DEBUGGER_BREAK  3
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 38b0ba6..bc41e9f 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -111,9 +111,9 @@ int smp_generic_kick_cpu(int nr)
 }
 #endif /* CONFIG_PPC64 */
 
-static irqreturn_t call_function_action(int irq, void *data)
+static irqreturn_t unused_action(int irq, void *data)
 {
-	generic_smp_call_function_interrupt();
+	/* This slot is unused and hence available for use, if needed */
 	return IRQ_HANDLED;
 }
 
@@ -144,14 +144,14 @@ static irqreturn_t debug_ipi_action(int irq, void *data)
 }
 
 static irq_handler_t smp_ipi_action[] = {
-	[PPC_MSG_CALL_FUNCTION] =  call_function_action,
+	[PPC_MSG_UNUSED] =  unused_action, /* Slot available for future use */
 	[PPC_MSG_RESCHEDULE] = reschedule_action,
 	[PPC_MSG_CALL_FUNC_SINGLE] = call_function_single_action,
 	[PPC_MSG_DEBUGGER_BREAK] = debug_ipi_action,
 };
 
 const char *smp_ipi_name[] = {
-	[PPC_MSG_CALL_FUNCTION] =  "ipi call function",
+	[PPC_MSG_UNUSED] =  "ipi unused",
 	[PPC_MSG_RESCHEDULE] = "ipi reschedule",
 	[PPC_MSG_CALL_FUNC_SINGLE] = "ipi call function single",
 	[PPC_MSG_DEBUGGER_BREAK] = "ipi debugger",
@@ -221,8 +221,6 @@ irqreturn_t smp_ipi_demux(void)
 		all = xchg(&info->messages, 0);
 
 #ifdef __BIG_ENDIAN
-		if (all & (1 << (24 - 8 * PPC_MSG_CALL_FUNCTION)))
-			generic_smp_call_function_interrupt();
 		if (all & (1 << (24 - 8 * PPC_MSG_RESCHEDULE)))
 			scheduler_ipi();
 		if (all & (1 << (24 - 8 * PPC_MSG_CALL_FUNC_SINGLE)))
@@ -265,7 +263,7 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask)
 	unsigned int cpu;
 
 	for_each_cpu(cpu, mask)
-		do_message_pass(cpu, PPC_MSG_CALL_FUNCTION);
+		do_message_pass(cpu, PPC_MSG_CALL_FUNC_SINGLE);
 }
 
 #if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC)
diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c
index 2d42f3b..28166e4 100644
--- a/arch/powerpc/platforms/cell/interrupt.c
+++ b/arch/powerpc/platforms/cell/interrupt.c
@@ -213,7 +213,7 @@ static void iic_request_ipi(int msg)
 
 void iic_request_IPIs(void)
 {
-	iic_request_ipi(PPC_MSG_CALL_FUNCTION);
+	iic_request_ipi(PPC_MSG_UNUSED);
 	iic_request_ipi(PPC_MSG_RESCHEDULE);
 	iic_request_ipi(PPC_MSG_CALL_FUNC_SINGLE);
 	iic_request_ipi(PPC_MSG_DEBUGGER_BREAK);
diff --git a/arch/powerpc/platforms/ps3/smp.c b/arch/powerpc/platforms/ps3/smp.c
index 4b35166..488f069 100644
--- a/arch/powerpc/platforms/ps3/smp.c
+++ b/arch/powerpc/platforms/ps3/smp.c
@@ -74,7 +74,7 @@ static int __init ps3_smp_probe(void)
 		* to index needs to be setup.
 		*/
 
-		BUILD_BUG_ON(PPC_MSG_CALL_FUNCTION    != 0);
+		BUILD_BUG_ON(PPC_MSG_UNUSED	      != 0);
 		BUILD_BUG_ON(PPC_MSG_RESCHEDULE       != 1);
 		BUILD_BUG_ON(PPC_MSG_CALL_FUNC_SINGLE != 2);
 		BUILD_BUG_ON(PPC_MSG_DEBUGGER_BREAK   != 3);

^ permalink raw reply related

* [PATCH V3 2/6] powerpc: Implement broadcast timer interrupt as an IPI message
From: Preeti U Murthy @ 2013-09-11  2:51 UTC (permalink / raw)
  To: benh, paul.gortmaker, paulus, shangw, rjw, galak, fweisbec,
	paulmck, arnd, linux-pm, rostedt, michael, john.stultz, tglx,
	chenhui.zhao, deepthi, r58472, geoff, linux-kernel, srivatsa.bhat,
	schwidefsky, svaidy, linuxppc-dev
In-Reply-To: <20130911024906.27726.4735.stgit@preeti.in.ibm.com>

From: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>

For scalability and performance reasons, we want the broadcast IPIs
to be handled as efficiently as possible. Fixed IPI messages
are one of the most efficient mechanisms available - they are faster
than the smp_call_function mechanism because the IPI handlers are fixed
and hence they don't involve costly operations such as adding IPI handlers
to the target CPU's function queue, acquiring locks for synchronization etc.

Luckily we have an unused IPI message slot, so use that to implement
broadcast timer interrupts efficiently.

Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
[Changelog modified by preeti@linux.vnet.ibm.com]
Signed-off-by: Preeti U Murthy <preeti@linux.vnet.ibm.com>
---

 arch/powerpc/include/asm/smp.h          |    3 ++-
 arch/powerpc/include/asm/time.h         |    1 +
 arch/powerpc/kernel/smp.c               |   19 +++++++++++++++----
 arch/powerpc/kernel/time.c              |    4 ++++
 arch/powerpc/platforms/cell/interrupt.c |    2 +-
 arch/powerpc/platforms/ps3/smp.c        |    2 +-
 scripts/kconfig/streamline_config.pl    |    0 
 7 files changed, 24 insertions(+), 7 deletions(-)
 mode change 100644 => 100755 scripts/kconfig/streamline_config.pl

diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
index a632b6e..22f6d63 100644
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -117,7 +117,7 @@ extern int cpu_to_core_id(int cpu);
  *
  * Make sure this matches openpic_request_IPIs in open_pic.c, or what shows up
  * in /proc/interrupts will be wrong!!! --Troy */
-#define PPC_MSG_UNUSED		0
+#define PPC_MSG_TIMER		0
 #define PPC_MSG_RESCHEDULE      1
 #define PPC_MSG_CALL_FUNC_SINGLE	2
 #define PPC_MSG_DEBUGGER_BREAK  3
@@ -194,6 +194,7 @@ extern struct smp_ops_t *smp_ops;
 
 extern void arch_send_call_function_single_ipi(int cpu);
 extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
+extern void arch_send_tick_broadcast(const struct cpumask *mask);
 
 /* Definitions relative to the secondary CPU spin loop
  * and entry point. Not all of them exist on both 32 and
diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
index c1f2676..4e35282 100644
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -28,6 +28,7 @@ extern struct clock_event_device decrementer_clockevent;
 struct rtc_time;
 extern void to_tm(int tim, struct rtc_time * tm);
 extern void GregorianDay(struct rtc_time *tm);
+extern void decrementer_timer_interrupt(void);
 
 extern void generic_calibrate_decr(void);
 
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index bc41e9f..d3b7014 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -35,6 +35,7 @@
 #include <asm/ptrace.h>
 #include <linux/atomic.h>
 #include <asm/irq.h>
+#include <asm/hw_irq.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
 #include <asm/prom.h>
@@ -111,9 +112,9 @@ int smp_generic_kick_cpu(int nr)
 }
 #endif /* CONFIG_PPC64 */
 
-static irqreturn_t unused_action(int irq, void *data)
+static irqreturn_t timer_action(int irq, void *data)
 {
-	/* This slot is unused and hence available for use, if needed */
+	decrementer_timer_interrupt();
 	return IRQ_HANDLED;
 }
 
@@ -144,14 +145,14 @@ static irqreturn_t debug_ipi_action(int irq, void *data)
 }
 
 static irq_handler_t smp_ipi_action[] = {
-	[PPC_MSG_UNUSED] =  unused_action, /* Slot available for future use */
+	[PPC_MSG_TIMER] =  timer_action,
 	[PPC_MSG_RESCHEDULE] = reschedule_action,
 	[PPC_MSG_CALL_FUNC_SINGLE] = call_function_single_action,
 	[PPC_MSG_DEBUGGER_BREAK] = debug_ipi_action,
 };
 
 const char *smp_ipi_name[] = {
-	[PPC_MSG_UNUSED] =  "ipi unused",
+	[PPC_MSG_TIMER] =  "ipi timer",
 	[PPC_MSG_RESCHEDULE] = "ipi reschedule",
 	[PPC_MSG_CALL_FUNC_SINGLE] = "ipi call function single",
 	[PPC_MSG_DEBUGGER_BREAK] = "ipi debugger",
@@ -221,6 +222,8 @@ irqreturn_t smp_ipi_demux(void)
 		all = xchg(&info->messages, 0);
 
 #ifdef __BIG_ENDIAN
+		if (all & (1 << (24 - 8 * PPC_MSG_TIMER)))
+			decrementer_timer_interrupt();
 		if (all & (1 << (24 - 8 * PPC_MSG_RESCHEDULE)))
 			scheduler_ipi();
 		if (all & (1 << (24 - 8 * PPC_MSG_CALL_FUNC_SINGLE)))
@@ -266,6 +269,14 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask)
 		do_message_pass(cpu, PPC_MSG_CALL_FUNC_SINGLE);
 }
 
+void arch_send_tick_broadcast(const struct cpumask *mask)
+{
+	unsigned int cpu;
+
+	for_each_cpu(cpu, mask)
+		do_message_pass(cpu, PPC_MSG_TIMER);
+}
+
 #if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC)
 void smp_send_debugger_break(void)
 {
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 65ab9e9..0dfa0c5 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -813,6 +813,10 @@ static void decrementer_set_mode(enum clock_event_mode mode,
 		decrementer_set_next_event(DECREMENTER_MAX, dev);
 }
 
+void decrementer_timer_interrupt(void)
+{
+}
+
 static void register_decrementer_clockevent(int cpu)
 {
 	struct clock_event_device *dec = &per_cpu(decrementers, cpu);
diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c
index 28166e4..1359113 100644
--- a/arch/powerpc/platforms/cell/interrupt.c
+++ b/arch/powerpc/platforms/cell/interrupt.c
@@ -213,7 +213,7 @@ static void iic_request_ipi(int msg)
 
 void iic_request_IPIs(void)
 {
-	iic_request_ipi(PPC_MSG_UNUSED);
+	iic_request_ipi(PPC_MSG_TIMER);
 	iic_request_ipi(PPC_MSG_RESCHEDULE);
 	iic_request_ipi(PPC_MSG_CALL_FUNC_SINGLE);
 	iic_request_ipi(PPC_MSG_DEBUGGER_BREAK);
diff --git a/arch/powerpc/platforms/ps3/smp.c b/arch/powerpc/platforms/ps3/smp.c
index 488f069..5cb742a 100644
--- a/arch/powerpc/platforms/ps3/smp.c
+++ b/arch/powerpc/platforms/ps3/smp.c
@@ -74,7 +74,7 @@ static int __init ps3_smp_probe(void)
 		* to index needs to be setup.
 		*/
 
-		BUILD_BUG_ON(PPC_MSG_UNUSED	      != 0);
+		BUILD_BUG_ON(PPC_MSG_TIMER	      != 0);
 		BUILD_BUG_ON(PPC_MSG_RESCHEDULE       != 1);
 		BUILD_BUG_ON(PPC_MSG_CALL_FUNC_SINGLE != 2);
 		BUILD_BUG_ON(PPC_MSG_DEBUGGER_BREAK   != 3);
diff --git a/scripts/kconfig/streamline_config.pl b/scripts/kconfig/streamline_config.pl
old mode 100644
new mode 100755

^ permalink raw reply related

* [PATCH V3 3/6] cpuidle/ppc: Split timer_interrupt() into timer handling and interrupt handling routines
From: Preeti U Murthy @ 2013-09-11  2:51 UTC (permalink / raw)
  To: benh, paul.gortmaker, paulus, shangw, rjw, galak, fweisbec,
	paulmck, arnd, linux-pm, rostedt, michael, john.stultz, tglx,
	chenhui.zhao, deepthi, r58472, geoff, linux-kernel, srivatsa.bhat,
	schwidefsky, svaidy, linuxppc-dev
In-Reply-To: <20130911024906.27726.4735.stgit@preeti.in.ibm.com>

On PowerPC, when CPUs enter deep idle states, their local timers get
switched off. The local timer is called the decrementer. An external clock
device needs to programmed to wake them up at their next timer event.
	On PowerPC, we do not have an external device equivalent to HPET,
which is currently used on architectures like x86 under the same scenario.
Instead we assign the local timer of one of the CPUs to do this job.

On expiry of this timer, the broadcast framework today has the infrastructure
to send ipis to all such CPUs whose local timers have expired.

When such an ipi is received, the cpus in deep idle should handle their
expired timers. It should be as though they were woken up from a
timer interrupt itself. Hence this external ipi serves as an emulated timer
interrupt for the cpus in deep idle.

Therefore ideally on ppc, these cpus should call timer_interrupt() which
is the interrupt handler for a decrementer interrupt. But timer_interrupt()
also contains routines which are usually performed in an interrupt handler.
These are not required to be done in this scenario as the external interrupt
handler takes care of them.

Therefore split up timer_interrupt() into routines performed during regular
interrupt handling and __timer_interrupt(), which takes care of running local
timers and collecting time related stats. Now on a broadcast ipi, call
__timer_interrupt().

Signed-off-by: Preeti U Murthy <preeti@linux.vnet.ibm.com>
---

 arch/powerpc/kernel/time.c |   69 ++++++++++++++++++++++++--------------------
 1 file changed, 37 insertions(+), 32 deletions(-)

diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 0dfa0c5..eb48291 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -478,6 +478,42 @@ void arch_irq_work_raise(void)
 
 #endif /* CONFIG_IRQ_WORK */
 
+static void __timer_interrupt(void)
+{
+	struct pt_regs *regs = get_irq_regs();
+	u64 *next_tb = &__get_cpu_var(decrementers_next_tb);
+	struct clock_event_device *evt = &__get_cpu_var(decrementers);
+	u64 now;
+
+	__get_cpu_var(irq_stat).timer_irqs++;
+	trace_timer_interrupt_entry(regs);
+
+	if (test_irq_work_pending()) {
+		clear_irq_work_pending();
+		irq_work_run();
+	}
+
+	now = get_tb_or_rtc();
+	if (now >= *next_tb) {
+		*next_tb = ~(u64)0;
+		if (evt->event_handler)
+			evt->event_handler(evt);
+	} else {
+		now = *next_tb - now;
+		if (now <= DECREMENTER_MAX)
+			set_dec((int)now);
+	}
+
+#ifdef CONFIG_PPC64
+	/* collect purr register values often, for accurate calculations */
+	if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
+		struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array);
+		cu->current_tb = mfspr(SPRN_PURR);
+	}
+#endif
+	trace_timer_interrupt_exit(regs);
+}
+
 /*
  * timer_interrupt - gets called when the decrementer overflows,
  * with interrupts disabled.
@@ -486,8 +522,6 @@ void timer_interrupt(struct pt_regs * regs)
 {
 	struct pt_regs *old_regs;
 	u64 *next_tb = &__get_cpu_var(decrementers_next_tb);
-	struct clock_event_device *evt = &__get_cpu_var(decrementers);
-	u64 now;
 
 	/* Ensure a positive value is written to the decrementer, or else
 	 * some CPUs will continue to take decrementer exceptions.
@@ -510,8 +544,6 @@ void timer_interrupt(struct pt_regs * regs)
 	 */
 	may_hard_irq_enable();
 
-	__get_cpu_var(irq_stat).timer_irqs++;
-
 #if defined(CONFIG_PPC32) && defined(CONFIG_PMAC)
 	if (atomic_read(&ppc_n_lost_interrupts) != 0)
 		do_IRQ(regs);
@@ -520,34 +552,7 @@ void timer_interrupt(struct pt_regs * regs)
 	old_regs = set_irq_regs(regs);
 	irq_enter();
 
-	trace_timer_interrupt_entry(regs);
-
-	if (test_irq_work_pending()) {
-		clear_irq_work_pending();
-		irq_work_run();
-	}
-
-	now = get_tb_or_rtc();
-	if (now >= *next_tb) {
-		*next_tb = ~(u64)0;
-		if (evt->event_handler)
-			evt->event_handler(evt);
-	} else {
-		now = *next_tb - now;
-		if (now <= DECREMENTER_MAX)
-			set_dec((int)now);
-	}
-
-#ifdef CONFIG_PPC64
-	/* collect purr register values often, for accurate calculations */
-	if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
-		struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array);
-		cu->current_tb = mfspr(SPRN_PURR);
-	}
-#endif
-
-	trace_timer_interrupt_exit(regs);
-
+	__timer_interrupt();
 	irq_exit();
 	set_irq_regs(old_regs);
 }

^ permalink raw reply related

* [PATCH V3 4/6] cpuidle/ppc: Add basic infrastructure to support the broadcast framework on ppc
From: Preeti U Murthy @ 2013-09-11  2:51 UTC (permalink / raw)
  To: benh, paul.gortmaker, paulus, shangw, rjw, galak, fweisbec,
	paulmck, arnd, linux-pm, rostedt, michael, john.stultz, tglx,
	chenhui.zhao, deepthi, r58472, geoff, linux-kernel, srivatsa.bhat,
	schwidefsky, svaidy, linuxppc-dev
In-Reply-To: <20130911024906.27726.4735.stgit@preeti.in.ibm.com>

The broadcast framework in the kernel expects an external clock device which will
continue functioning in deep idle states also. This ability is specified by
the "non-existence" of the feature C3STOP . This is the device that it relies
upon to wakup cpus in deep idle states whose local timers/clock devices get
switched off in deep idle states.

On ppc we do not have such an external device. Therefore we introduce a
pseudo clock device, which has the features of this external clock device
called the broadcast_clockevent. Having such a device qualifies the cpus to
enter and exit deep idle states from the point of view of the broadcast
framework, because there is an external device to wake them up.
	Specifically the broadcast framework uses this device's event
handler and next_event members in its functioning. On ppc we use this
device as the gateway into the broadcast framework and *not* as a
timer. An explicit timer infrastructure will be developed in the following
patches to keep track of when to wake up cpus in deep idle.

Since this device is a pseudo device, it can be safely assumed to work for
all cpus. Therefore its cpumask is set to cpu_possible_mask. Also due to the
same reason, the set_next_event() routine associated with this device is a
nop.

The broadcast framework relies on a broadcast functionality being made
available in the .broadcast member of the local clock devices on all cpus.
This function is called upon by the broadcast framework on one of the nominated
cpus, to send ipis to all the cpus in deep idle at their expired timer events.
This patch also initializes the .broadcast member of the decrementer whose
job is to send the broadcast ipis.

When cpus inform the broadcast framework that they are entering deep idle,
their local timers are put in shutdown mode. On ppc, this means setting the
decrementer_next_tb and programming the decrementer to DECREMENTER_MAX.
On being woken up by the broadcast ipi, these cpus call __timer_interrupt(),
which runs the local timers only if decrementer_next_tb has expired.
  Therefore on being woken up from the broadcast ipi, set the decrementers_next_tb
to now before calling __timer_interrupt().

Signed-off-by: Preeti U Murthy <preeti@linux.vnet.ibm.com>
---

 arch/powerpc/Kconfig            |    1 +
 arch/powerpc/include/asm/time.h |    1 +
 arch/powerpc/kernel/time.c      |   69 ++++++++++++++++++++++++++++++++++++++-
 3 files changed, 70 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index dbd9d3c..550fc04 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -130,6 +130,7 @@ config PPC
 	select GENERIC_CMOS_UPDATE
 	select GENERIC_TIME_VSYSCALL_OLD
 	select GENERIC_CLOCKEVENTS
+	select GENERIC_CLOCKEVENTS_BROADCAST
 	select GENERIC_STRNCPY_FROM_USER
 	select GENERIC_STRNLEN_USER
 	select HAVE_MOD_ARCH_SPECIFIC
diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
index 4e35282..264dc96 100644
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -24,6 +24,7 @@ extern unsigned long tb_ticks_per_jiffy;
 extern unsigned long tb_ticks_per_usec;
 extern unsigned long tb_ticks_per_sec;
 extern struct clock_event_device decrementer_clockevent;
+extern struct clock_event_device broadcast_clockevent;
 
 struct rtc_time;
 extern void to_tm(int tim, struct rtc_time * tm);
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index eb48291..bda78bb 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -42,6 +42,7 @@
 #include <linux/timex.h>
 #include <linux/kernel_stat.h>
 #include <linux/time.h>
+#include <linux/timer.h>
 #include <linux/init.h>
 #include <linux/profile.h>
 #include <linux/cpu.h>
@@ -97,8 +98,13 @@ static struct clocksource clocksource_timebase = {
 
 static int decrementer_set_next_event(unsigned long evt,
 				      struct clock_event_device *dev);
+static int broadcast_set_next_event(unsigned long evt,
+				      struct clock_event_device *dev);
+static void broadcast_set_mode(enum clock_event_mode mode,
+				 struct clock_event_device *dev);
 static void decrementer_set_mode(enum clock_event_mode mode,
 				 struct clock_event_device *dev);
+static void decrementer_timer_broadcast(const struct cpumask *mask);
 
 struct clock_event_device decrementer_clockevent = {
 	.name           = "decrementer",
@@ -106,12 +112,24 @@ struct clock_event_device decrementer_clockevent = {
 	.irq            = 0,
 	.set_next_event = decrementer_set_next_event,
 	.set_mode       = decrementer_set_mode,
-	.features       = CLOCK_EVT_FEAT_ONESHOT,
+	.broadcast	= decrementer_timer_broadcast,
+	.features       = CLOCK_EVT_FEAT_C3STOP | CLOCK_EVT_FEAT_ONESHOT,
 };
 EXPORT_SYMBOL(decrementer_clockevent);
 
+struct clock_event_device broadcast_clockevent = {
+	.name           = "broadcast",
+	.rating         = 200,
+	.irq            = 0,
+	.set_next_event = broadcast_set_next_event,
+	.set_mode       = broadcast_set_mode,
+	.features       = CLOCK_EVT_FEAT_ONESHOT,
+};
+EXPORT_SYMBOL(broadcast_clockevent);
+
 DEFINE_PER_CPU(u64, decrementers_next_tb);
 static DEFINE_PER_CPU(struct clock_event_device, decrementers);
+static struct clock_event_device bc_timer;
 
 #define XSEC_PER_SEC (1024*1024)
 
@@ -811,6 +829,19 @@ static int decrementer_set_next_event(unsigned long evt,
 	return 0;
 }
 
+static int broadcast_set_next_event(unsigned long evt,
+					struct clock_event_device *dev)
+{
+	return 0;
+}
+
+static void broadcast_set_mode(enum clock_event_mode mode,
+				 struct clock_event_device *dev)
+{
+	if (mode != CLOCK_EVT_MODE_ONESHOT)
+		broadcast_set_next_event(DECREMENTER_MAX, dev);
+}
+
 static void decrementer_set_mode(enum clock_event_mode mode,
 				 struct clock_event_device *dev)
 {
@@ -820,6 +851,15 @@ static void decrementer_set_mode(enum clock_event_mode mode,
 
 void decrementer_timer_interrupt(void)
 {
+	u64 *next_tb = &__get_cpu_var(decrementers_next_tb);
+
+	*next_tb = get_tb_or_rtc();
+	__timer_interrupt();
+}
+
+static void decrementer_timer_broadcast(const struct cpumask *mask)
+{
+	arch_send_tick_broadcast(mask);
 }
 
 static void register_decrementer_clockevent(int cpu)
@@ -835,6 +875,19 @@ static void register_decrementer_clockevent(int cpu)
 	clockevents_register_device(dec);
 }
 
+static void register_broadcast_clockevent(int cpu)
+{
+	struct clock_event_device *bc_evt = &bc_timer;
+
+	*bc_evt = broadcast_clockevent;
+	bc_evt->cpumask = cpu_possible_mask;
+
+	printk_once(KERN_DEBUG "clockevent: %s mult[%x] shift[%d] cpu[%d]\n",
+		    bc_evt->name, bc_evt->mult, bc_evt->shift, cpu);
+
+	clockevents_register_device(bc_evt);
+}
+
 static void __init init_decrementer_clockevent(void)
 {
 	int cpu = smp_processor_id();
@@ -849,6 +902,19 @@ static void __init init_decrementer_clockevent(void)
 	register_decrementer_clockevent(cpu);
 }
 
+static void __init init_broadcast_clockevent(void)
+{
+	int cpu = smp_processor_id();
+
+	clockevents_calc_mult_shift(&broadcast_clockevent, ppc_tb_freq, 4);
+
+	broadcast_clockevent.max_delta_ns =
+		clockevent_delta2ns(DECREMENTER_MAX, &broadcast_clockevent);
+	broadcast_clockevent.min_delta_ns =
+		clockevent_delta2ns(2, &broadcast_clockevent);
+	register_broadcast_clockevent(cpu);
+}
+
 void secondary_cpu_time_init(void)
 {
 	/* Start the decrementer on CPUs that have manual control
@@ -925,6 +991,7 @@ void __init time_init(void)
 	clocksource_init();
 
 	init_decrementer_clockevent();
+	init_broadcast_clockevent();
 }
 
 

^ permalink raw reply related

* [PATCH V3 5/6] cpuidle/ppc: Introduce the deep idle state in which the local timers stop
From: Preeti U Murthy @ 2013-09-11  2:52 UTC (permalink / raw)
  To: benh, paul.gortmaker, paulus, shangw, rjw, galak, fweisbec,
	paulmck, arnd, linux-pm, rostedt, michael, john.stultz, tglx,
	chenhui.zhao, deepthi, r58472, geoff, linux-kernel, srivatsa.bhat,
	schwidefsky, svaidy, linuxppc-dev
In-Reply-To: <20130911024906.27726.4735.stgit@preeti.in.ibm.com>

Now that we have the basic infrastructure setup to make use of the broadcast
framework, introduce the deep idle state in which cpus need to avail the
functionality provided by this infrastructure to wake them up at their
expired timer events. On ppc this deep idle state is called sleep.
	In this patch however, we introduce longnap, which emulates sleep
state, by disabling timer interrupts. This is until such time that sleep support is
made available in the kernel.

Since on ppc, we do not have an external device that can wakeup cpus in deep
idle, the local timer of one of the cpus need to be nominated to do this job.
This cpu is called the broadcast cpu/bc_cpu. Only if the bc_cpu is nominated
will the remaining cpus be allowed to enter deep idle state after notifying
the broadcast framework about their next timer event. The bc_cpu is not allowed
to enter deep idle state.

The first cpu that enters longnap is made the bc_cpu. It queues a hrtimer onto
itself which expires after a broadcast period. The job of this
hrtimer is to call into the broadcast framework[1] using the pseudo clock device
that we have initiliazed, in which, the cpus whose wakeup times
have expired are sent an ipi.
	On each expiry of the hrtimer, it is programmed to the earlier of the
next pending timer event of the cpus in deep idle and the broadcast period, so
as to not miss any wakeups.

The broadcast period is nothing but the max duration until which the
bc_cpu need not concern itself with checking for expired timer events on cpus
in deep idle. The broadcast period is set to a jiffy in this patch for debug
purposes. Ideally it needn't be smaller than the target_residency of the deep
idle state.

But having a dedicated bc_cpu would mean overloading just one cpu with the
broadcast work which could hinder its performance apart from leading to thermal
imbalance on the chip. Therefore unassign the bc_cpu when there are no more cpus
in deep idle to be woken up. The bc_cpu is left unassigned until such a time that
a cpu enters longnap to be nominated as the bc_cpu and the above cycle repeats.

Protect the region of nomination,de-nomination and check for existence of broadcast
cpu with a lock to ensure synchronization between them.

[1] tick_handle_oneshot_broadcast() or tick_handle_periodic_broadcast().

Signed-off-by: Preeti U Murthy <preeti@linux.vnet.ibm.com>
---

 arch/powerpc/include/asm/time.h     |    1 
 arch/powerpc/kernel/time.c          |    2 
 drivers/cpuidle/cpuidle-ibm-power.c |  150 +++++++++++++++++++++++++++++++++++
 3 files changed, 152 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
index 264dc96..38341fa 100644
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -25,6 +25,7 @@ extern unsigned long tb_ticks_per_usec;
 extern unsigned long tb_ticks_per_sec;
 extern struct clock_event_device decrementer_clockevent;
 extern struct clock_event_device broadcast_clockevent;
+extern struct clock_event_device bc_timer;
 
 struct rtc_time;
 extern void to_tm(int tim, struct rtc_time * tm);
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index bda78bb..44a76de 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -129,7 +129,7 @@ EXPORT_SYMBOL(broadcast_clockevent);
 
 DEFINE_PER_CPU(u64, decrementers_next_tb);
 static DEFINE_PER_CPU(struct clock_event_device, decrementers);
-static struct clock_event_device bc_timer;
+struct clock_event_device bc_timer;
 
 #define XSEC_PER_SEC (1024*1024)
 
diff --git a/drivers/cpuidle/cpuidle-ibm-power.c b/drivers/cpuidle/cpuidle-ibm-power.c
index f8905c3..ae47a0a 100644
--- a/drivers/cpuidle/cpuidle-ibm-power.c
+++ b/drivers/cpuidle/cpuidle-ibm-power.c
@@ -12,12 +12,19 @@
 #include <linux/cpuidle.h>
 #include <linux/cpu.h>
 #include <linux/notifier.h>
+#include <linux/clockchips.h>
+#include <linux/tick.h>
+#include <linux/hrtimer.h>
+#include <linux/ktime.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
 
 #include <asm/paca.h>
 #include <asm/reg.h>
 #include <asm/machdep.h>
 #include <asm/firmware.h>
 #include <asm/runlatch.h>
+#include <asm/time.h>
 #include <asm/plpar_wrappers.h>
 
 struct cpuidle_driver power_idle_driver = {
@@ -28,6 +35,26 @@ struct cpuidle_driver power_idle_driver = {
 static int max_idle_state;
 static struct cpuidle_state *cpuidle_state_table;
 
+static int bc_cpu = -1;
+static struct hrtimer *bc_hrtimer;
+static int bc_hrtimer_initialized = 0;
+
+/*
+ * Bits to indicate if a cpu can enter deep idle where local timer gets
+ * switched off.
+ * BROADCAST_CPU_PRESENT : Enter deep idle since bc_cpu is assigned
+ * BROADCAST_CPU_SELF	 : Do not enter deep idle since you are bc_cpu
+ * BROADCAST_CPU_ABSENT	 : Do not enter deep idle since there is no bc_cpu,
+ * 			   hence nominate yourself as bc_cpu
+ * BROADCAST_CPU_ERROR	:  Do not enter deep idle since there is no bc_cpu
+ *			   and the broadcast hrtimer could not be initialized.
+ */
+enum broadcast_cpu_status {
+	BROADCAST_CPU_PRESENT,
+	BROADCAST_CPU_SELF,
+	BROADCAST_CPU_ERROR,
+};
+
 static inline void idle_loop_prolog(unsigned long *in_purr)
 {
 	*in_purr = mfspr(SPRN_PURR);
@@ -44,6 +71,8 @@ static inline void idle_loop_epilog(unsigned long in_purr)
 	get_lppaca()->idle = 0;
 }
 
+static DEFINE_SPINLOCK(longnap_idle_lock);
+
 static int snooze_loop(struct cpuidle_device *dev,
 			struct cpuidle_driver *drv,
 			int index)
@@ -139,6 +168,120 @@ static int nap_loop(struct cpuidle_device *dev,
 	return index;
 }
 
+/* Functions supporting broadcasting in longnap */
+static ktime_t get_next_bc_tick(void)
+{
+	u64 next_bc_ns;
+
+	next_bc_ns = (tb_ticks_per_jiffy / tb_ticks_per_usec) * 1000;
+	return ns_to_ktime(next_bc_ns);
+}
+
+static int restart_broadcast(struct clock_event_device *bc_evt)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&longnap_idle_lock, flags);
+	bc_evt->event_handler(bc_evt);
+
+	if (bc_evt->next_event.tv64 == KTIME_MAX)
+		bc_cpu = -1;
+
+	spin_unlock_irqrestore(&longnap_idle_lock, flags);
+	return (bc_cpu != -1);
+}
+
+static enum hrtimer_restart handle_broadcast(struct hrtimer *hrtimer)
+{
+	struct clock_event_device *bc_evt = &bc_timer;
+	ktime_t interval, next_bc_tick;
+
+	u64 now = get_tb_or_rtc();
+	ktime_t now_ktime = ns_to_ktime((now / tb_ticks_per_usec) * 1000);
+
+	if (!restart_broadcast(bc_evt))
+		return HRTIMER_NORESTART;
+
+	interval.tv64 = bc_evt->next_event.tv64 - now_ktime.tv64;
+	next_bc_tick = get_next_bc_tick();
+
+	if (interval.tv64 < next_bc_tick.tv64)
+		hrtimer_forward_now(hrtimer, interval);
+	else
+		hrtimer_forward_now(hrtimer, next_bc_tick);
+
+	return HRTIMER_RESTART;
+}
+
+static enum broadcast_cpu_status can_enter_deep_idle(int cpu)
+{
+	if (bc_cpu != -1 && cpu != bc_cpu) {
+		return BROADCAST_CPU_PRESENT;
+	} else if (bc_cpu != -1 && cpu == bc_cpu) {
+		return BROADCAST_CPU_SELF;
+	} else {
+		if (!bc_hrtimer_initialized) {
+			bc_hrtimer = kmalloc(sizeof(*bc_hrtimer), GFP_NOWAIT);
+			if (!bc_hrtimer)
+				return BROADCAST_CPU_ERROR;
+			hrtimer_init(bc_hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
+			bc_hrtimer->function = handle_broadcast;
+			hrtimer_start(bc_hrtimer, get_next_bc_tick(),
+				HRTIMER_MODE_REL_PINNED);
+			bc_hrtimer_initialized = 1;
+		} else {
+			hrtimer_start(bc_hrtimer, get_next_bc_tick(), HRTIMER_MODE_REL_PINNED);
+		}
+
+		bc_cpu = cpu;
+		return BROADCAST_CPU_SELF;
+	}
+}
+
+/* Emulate sleep, with long nap.
+ * During sleep, the core does not receive decrementer interrupts.
+ * Emulate sleep using long nap with decrementers interrupts disabled.
+ * This is an initial prototype to test the broadcast framework for ppc.
+ */
+static int longnap_loop(struct cpuidle_device *dev,
+				struct cpuidle_driver *drv,
+				int index)
+{
+	int cpu = dev->cpu;
+	unsigned long lpcr = mfspr(SPRN_LPCR);
+	unsigned long flags;
+	int bc_cpu_status;
+
+	lpcr &= ~(LPCR_MER | LPCR_PECE); /* lpcr[mer] must be 0 */
+
+	/* exit powersave upon external interrupt, but not decrementer
+	 * interrupt, Emulate sleep.
+	 */
+	lpcr |= LPCR_PECE0;
+
+	spin_lock_irqsave(&longnap_idle_lock, flags);
+	bc_cpu_status = can_enter_deep_idle(cpu);
+
+	if (bc_cpu_status == BROADCAST_CPU_PRESENT) {
+		mtspr(SPRN_LPCR, lpcr);
+		clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu);
+		spin_unlock_irqrestore(&longnap_idle_lock, flags);
+		power7_nap();
+		spin_lock_irqsave(&longnap_idle_lock, flags);
+		clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu);
+		spin_unlock_irqrestore(&longnap_idle_lock, flags);
+	} else if (bc_cpu_status == BROADCAST_CPU_SELF) {
+		lpcr |= LPCR_PECE1;
+		mtspr(SPRN_LPCR, lpcr);
+		spin_unlock_irqrestore(&longnap_idle_lock, flags);
+		power7_nap();
+	} else {
+		spin_unlock_irqrestore(&longnap_idle_lock, flags);
+	}
+
+	return index;
+}
+
 /*
  * States for dedicated partition case.
  */
@@ -187,6 +330,13 @@ static struct cpuidle_state powernv_states[] = {
 		.exit_latency = 10,
 		.target_residency = 100,
 		.enter = &nap_loop },
+	 { /* LongNap */
+		.name = "LongNap",
+		.desc = "LongNap",
+		.flags = CPUIDLE_FLAG_TIME_VALID,
+		.exit_latency = 10,
+		.target_residency = 100,
+		.enter = &longnap_loop },
 };
 
 void update_smt_snooze_delay(int cpu, int residency)

^ permalink raw reply related

* [PATCH V3 6/6] cpuidle/ppc: Nominate new broadcast cpu on hotplug of the old
From: Preeti U Murthy @ 2013-09-11  2:52 UTC (permalink / raw)
  To: benh, paul.gortmaker, paulus, shangw, rjw, galak, fweisbec,
	paulmck, arnd, linux-pm, rostedt, michael, john.stultz, tglx,
	chenhui.zhao, deepthi, r58472, geoff, linux-kernel, srivatsa.bhat,
	schwidefsky, svaidy, linuxppc-dev
In-Reply-To: <20130911024906.27726.4735.stgit@preeti.in.ibm.com>

On hotplug of the broadcast cpu, cancel the hrtimer queued to do
broadcast and nominate a new broadcast cpu to be the first cpu in the
broadcast mask which includes all the cpus that have notified the broadcast
framework about entering deep idle state.

Since the new broadcast cpu is one of the cpus in deep idle, send an ipi to
wake it up to continue the duty of broadcast. The new broadcast cpu needs to
find out if it woke up to resume broadcast. If so it needs to restart the
broadcast hrtimer on itself.

Its possible that the old broadcast cpu was hotplugged out when the broadcast
hrtimer was about to fire on it. Therefore the newly nominated broadcast cpu
should set the broadcast hrtimer on itself to expire immediately so as to not
miss wakeups under such scenarios.

Signed-off-by: Preeti U Murthy <preeti@linux.vnet.ibm.com>
---

 arch/powerpc/include/asm/time.h     |    1 +
 arch/powerpc/kernel/time.c          |    1 +
 drivers/cpuidle/cpuidle-ibm-power.c |   22 ++++++++++++++++++++++
 3 files changed, 24 insertions(+)

diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
index 38341fa..3bc0205 100644
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -31,6 +31,7 @@ struct rtc_time;
 extern void to_tm(int tim, struct rtc_time * tm);
 extern void GregorianDay(struct rtc_time *tm);
 extern void decrementer_timer_interrupt(void);
+extern void broadcast_irq_entry(void);
 
 extern void generic_calibrate_decr(void);
 
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 44a76de..0ac2e11 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -853,6 +853,7 @@ void decrementer_timer_interrupt(void)
 {
 	u64 *next_tb = &__get_cpu_var(decrementers_next_tb);
 
+	broadcast_irq_entry();
 	*next_tb = get_tb_or_rtc();
 	__timer_interrupt();
 }
diff --git a/drivers/cpuidle/cpuidle-ibm-power.c b/drivers/cpuidle/cpuidle-ibm-power.c
index ae47a0a..580ea04 100644
--- a/drivers/cpuidle/cpuidle-ibm-power.c
+++ b/drivers/cpuidle/cpuidle-ibm-power.c
@@ -282,6 +282,12 @@ static int longnap_loop(struct cpuidle_device *dev,
 	return index;
 }
 
+void broadcast_irq_entry(void)
+{
+	if (smp_processor_id() == bc_cpu)
+		hrtimer_start(bc_hrtimer, ns_to_ktime(0), HRTIMER_MODE_REL_PINNED);
+}
+
 /*
  * States for dedicated partition case.
  */
@@ -360,6 +366,7 @@ static int power_cpuidle_add_cpu_notifier(struct notifier_block *n,
 			unsigned long action, void *hcpu)
 {
 	int hotcpu = (unsigned long)hcpu;
+	unsigned long flags;
 	struct cpuidle_device *dev =
 			per_cpu(cpuidle_devices, hotcpu);
 
@@ -372,6 +379,21 @@ static int power_cpuidle_add_cpu_notifier(struct notifier_block *n,
 			cpuidle_resume_and_unlock();
 			break;
 
+		case CPU_DYING:
+		case CPU_DYING_FROZEN:
+			spin_lock_irqsave(&longnap_idle_lock, flags);
+			if (hotcpu == bc_cpu) {
+				bc_cpu = -1;
+				hrtimer_cancel(bc_hrtimer);
+				if (!cpumask_empty(tick_get_broadcast_oneshot_mask())) {
+					bc_cpu = cpumask_first(
+							tick_get_broadcast_oneshot_mask());
+					arch_send_tick_broadcast(cpumask_of(bc_cpu));
+				}
+			}
+			spin_unlock_irqrestore(&longnap_idle_lock, flags);
+			break;
+
 		case CPU_DEAD:
 		case CPU_DEAD_FROZEN:
 			cpuidle_pause_and_lock();

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox