LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* Re: [PATCH v2 4/4] powerpc: Allow LD_DEAD_CODE_DATA_ELIMINATION to be selected
From: Mathieu Malaterre @ 2018-05-14  6:06 UTC (permalink / raw)
  To: Nicholas Piggin; +Cc: Masahiro Yamada, linux-arch, linuxppc-dev, linux-kbuild
In-Reply-To: <20180509130001.24276-5-npiggin@gmail.com>

On Wed, May 9, 2018 at 3:00 PM, Nicholas Piggin <npiggin@gmail.com> wrote:
> This requires further changes to linker script to KEEP some tables
> and wildcard compiler generated sections into the right place. This
> includes pp32 modifications from Christophe Leroy.
>
> When compiling powernv_defconfig with this option, the resulting
> kernel is almost 400kB smaller (and still boots):
>
>     text      data       bss        dec   filename
> 11827621   4810490   1341080   17979191   vmlinux
> 11752437   4598858   1338776   17690071   vmlinux.dcde
>
> Mathieu's numbers for custom Mac Mini G4 config has almost 200kB

Technically this is an oldconfig from debian original config file
(debian official powerpc kernel package)... tested on a mac mini g4.

> saving. It also had some increase in vmlinux size for as-yet
> unknown reasons.
>
>     text      data       bss        dec   filename
>  7461457   2475122   1428064   11364643   vmlinux
>  7386425   2364370   1425432   11176227   vmlinux.dcde
>
> Tested-by: Christophe Leroy <christophe.leroy@c-s.fr> [8xx]
> Tested-by: Mathieu Malaterre <malat@debian.org> [32-bit powermac]
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
>  arch/powerpc/Kconfig              |  1 +
>  arch/powerpc/kernel/vmlinux.lds.S | 22 +++++++++++-----------
>  2 files changed, 12 insertions(+), 11 deletions(-)
>
> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
> index c32a181a7cbb..ee6dbe2efc8b 100644
> --- a/arch/powerpc/Kconfig
> +++ b/arch/powerpc/Kconfig
> @@ -205,6 +205,7 @@ config PPC
>         select HAVE_KPROBES
>         select HAVE_KPROBES_ON_FTRACE
>         select HAVE_KRETPROBES
> +       select HAVE_LD_DEAD_CODE_DATA_ELIMINATION
>         select HAVE_LIVEPATCH                   if HAVE_DYNAMIC_FTRACE_WITH_REGS
>         select HAVE_MEMBLOCK
>         select HAVE_MEMBLOCK_NODE_MAP
> diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
> index c8af90ff49f0..52a93cdd04bc 100644
> --- a/arch/powerpc/kernel/vmlinux.lds.S
> +++ b/arch/powerpc/kernel/vmlinux.lds.S
> @@ -89,7 +89,7 @@ SECTIONS
>          */
>         .text BLOCK(0) : AT(ADDR(.text) - LOAD_OFFSET) {
>  #ifdef CONFIG_LD_HEAD_STUB_CATCH
> -               *(.linker_stub_catch);
> +               KEEP(*(.linker_stub_catch));
>                 . = . ;
>  #endif
>
> @@ -98,7 +98,7 @@ SECTIONS
>                 ALIGN_FUNCTION();
>  #endif
>                 /* careful! __ftr_alt_* sections need to be close to .text */
> -               *(.text.hot .text .text.fixup .text.unlikely .fixup __ftr_alt_* .ref.text);
> +               *(.text.hot TEXT_MAIN .text.fixup .text.unlikely .fixup __ftr_alt_* .ref.text);
>                 SCHED_TEXT
>                 CPUIDLE_TEXT
>                 LOCK_TEXT
> @@ -170,10 +170,10 @@ SECTIONS
>         .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) {
>                 INIT_DATA
>                 __vtop_table_begin = .;
> -               *(.vtop_fixup);
> +               KEEP(*(.vtop_fixup));
>                 __vtop_table_end = .;
>                 __ptov_table_begin = .;
> -               *(.ptov_fixup);
> +               KEEP(*(.ptov_fixup));
>                 __ptov_table_end = .;
>         }
>
> @@ -194,26 +194,26 @@ SECTIONS
>         . = ALIGN(8);
>         __ftr_fixup : AT(ADDR(__ftr_fixup) - LOAD_OFFSET) {
>                 __start___ftr_fixup = .;
> -               *(__ftr_fixup)
> +               KEEP(*(__ftr_fixup))
>                 __stop___ftr_fixup = .;
>         }
>         . = ALIGN(8);
>         __mmu_ftr_fixup : AT(ADDR(__mmu_ftr_fixup) - LOAD_OFFSET) {
>                 __start___mmu_ftr_fixup = .;
> -               *(__mmu_ftr_fixup)
> +               KEEP(*(__mmu_ftr_fixup))
>                 __stop___mmu_ftr_fixup = .;
>         }
>         . = ALIGN(8);
>         __lwsync_fixup : AT(ADDR(__lwsync_fixup) - LOAD_OFFSET) {
>                 __start___lwsync_fixup = .;
> -               *(__lwsync_fixup)
> +               KEEP(*(__lwsync_fixup))
>                 __stop___lwsync_fixup = .;
>         }
>  #ifdef CONFIG_PPC64
>         . = ALIGN(8);
>         __fw_ftr_fixup : AT(ADDR(__fw_ftr_fixup) - LOAD_OFFSET) {
>                 __start___fw_ftr_fixup = .;
> -               *(__fw_ftr_fixup)
> +               KEEP(*(__fw_ftr_fixup))
>                 __stop___fw_ftr_fixup = .;
>         }
>  #endif
> @@ -226,7 +226,7 @@ SECTIONS
>         . = ALIGN(8);
>         .machine.desc : AT(ADDR(.machine.desc) - LOAD_OFFSET) {
>                 __machine_desc_start = . ;
> -               *(.machine.desc)
> +               KEEP(*(.machine.desc))
>                 __machine_desc_end = . ;
>         }
>  #ifdef CONFIG_RELOCATABLE
> @@ -274,7 +274,7 @@ SECTIONS
>         .data : AT(ADDR(.data) - LOAD_OFFSET) {
>                 DATA_DATA
>                 *(.data.rel*)
> -               *(.sdata)
> +               *(SDATA_MAIN)
>                 *(.sdata2)
>                 *(.got.plt) *(.got)
>                 *(.plt)
> @@ -289,7 +289,7 @@ SECTIONS
>
>         .opd : AT(ADDR(.opd) - LOAD_OFFSET) {
>                 __start_opd = .;
> -               *(.opd)
> +               KEEP(*(.opd))
>                 __end_opd = .;
>         }
>
> --
> 2.17.0
>

^ permalink raw reply

* Re: [PATCH 2/2] KVM: PPC: Book3S HV: lockless tlbie for HPT hcalls
From: Michael Ellerman @ 2018-05-14  4:04 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: Nicholas Piggin, kvm-ppc, linuxppc-dev
In-Reply-To: <20180510053042.GA14286@fergus.ozlabs.ibm.com>

Paul Mackerras <paulus@ozlabs.org> writes:

> On Fri, Apr 06, 2018 at 04:12:32PM +1000, Michael Ellerman wrote:
>> Nicholas Piggin <npiggin@gmail.com> writes:
>> > diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
>> > index 78e6a392330f..0221a0f74f07 100644
>> > --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
>> > +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
>> > @@ -439,6 +439,9 @@ static inline int try_lock_tlbie(unsigned int *lock)
>> >  	unsigned int tmp, old;
>> >  	unsigned int token = LOCK_TOKEN;
>> >  
>> > +	if (mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE))
>> > +		return 1;
>> > +
>> >  	asm volatile("1:lwarx	%1,0,%2\n"
>> >  		     "	cmpwi	cr0,%1,0\n"
>> >  		     "	bne	2f\n"
>> > @@ -452,6 +455,12 @@ static inline int try_lock_tlbie(unsigned int *lock)
>> >  	return old == 0;
>> >  }
>> >  
>> > +static inline void unlock_tlbie_after_sync(unsigned int *lock)
>> > +{
>> > +	if (mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE))
>> > +		return;
>> > +}
>> 
>> So this is a bit hard to follow:
>> 
>> #define MMU_FTRS_DEFAULT_HPTE_ARCH_V2	\
>> 	MMU_FTR_HPTE_TABLE | MMU_FTR_PPCAS_ARCH_V2
>> #define MMU_FTRS_POWER		MMU_FTRS_DEFAULT_HPTE_ARCH_V2
>> #define MMU_FTRS_PPC970		MMU_FTRS_POWER | MMU_FTR_TLBIE_CROP_VA			// does NOT
>> #define MMU_FTRS_POWER5		MMU_FTRS_POWER | MMU_FTR_LOCKLESS_TLBIE
>> #define MMU_FTRS_POWER6		MMU_FTRS_POWER5 | MMU_FTR_KERNEL_RO | MMU_FTR_68_BIT_VA	// includes lockless TLBIE
>> #define MMU_FTRS_POWER7		MMU_FTRS_POWER6						// includes lockless TLBIE
>> #define MMU_FTRS_POWER8		MMU_FTRS_POWER6						// includes lockless TLBIE
>> #define MMU_FTRS_POWER9		MMU_FTRS_POWER6						// includes lockless TLBIE
>> #define MMU_FTRS_CELL		MMU_FTRS_DEFAULT_HPTE_ARCH_V2 | 			// does NOT
>> 				MMU_FTR_CI_LARGE_PAGE
>> #define MMU_FTRS_PA6T		MMU_FTRS_DEFAULT_HPTE_ARCH_V2 | \			// does NOT
>> 				MMU_FTR_CI_LARGE_PAGE | MMU_FTR_NO_SLBIE_B
>> 
>> 
>> So it's only 970, Cell and Pasemi that *don't* have lockless TLBIE.
>> 
>> And KVM HV doesn't doesn't run on any of those.
>> 
>> So we can just not check for the feature in the KVM HV code.
>> 
>> Am I right?
>
> Yes; that code was written when we still supported HV KVM on 970,
> but we ripped that out some time ago.

OK good, in commit:

c17b98cf6028 ("KVM: PPC: Book3S HV: Remove code for PPC970 processors") (Dec 2014)

So we should be able to do the patch below.

cheers


diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 17498e9a26e4..7756b0c6da75 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -269,7 +269,6 @@ struct kvm_arch {
 	unsigned long host_lpcr;
 	unsigned long sdr1;
 	unsigned long host_sdr1;
-	int tlbie_lock;
 	unsigned long lpcr;
 	unsigned long vrma_slb_v;
 	int mmu_ready;
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 78e6a392330f..89d909b3b881 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -434,24 +434,6 @@ static inline int is_mmio_hpte(unsigned long v, unsigned long r)
 		(HPTE_R_KEY_HI | HPTE_R_KEY_LO));
 }
 
-static inline int try_lock_tlbie(unsigned int *lock)
-{
-	unsigned int tmp, old;
-	unsigned int token = LOCK_TOKEN;
-
-	asm volatile("1:lwarx	%1,0,%2\n"
-		     "	cmpwi	cr0,%1,0\n"
-		     "	bne	2f\n"
-		     "  stwcx.	%3,0,%2\n"
-		     "	bne-	1b\n"
-		     "  isync\n"
-		     "2:"
-		     : "=&r" (tmp), "=&r" (old)
-		     : "r" (lock), "r" (token)
-		     : "cc", "memory");
-	return old == 0;
-}
-
 static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
 		      long npages, int global, bool need_sync)
 {
@@ -463,8 +445,6 @@ static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
 	 * the RS field, this is backwards-compatible with P7 and P8.
 	 */
 	if (global) {
-		while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
-			cpu_relax();
 		if (need_sync)
 			asm volatile("ptesync" : : : "memory");
 		for (i = 0; i < npages; ++i) {
@@ -483,7 +463,6 @@ static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
 		}
 
 		asm volatile("eieio; tlbsync; ptesync" : : : "memory");
-		kvm->arch.tlbie_lock = 0;
 	} else {
 		if (need_sync)
 			asm volatile("ptesync" : : : "memory");

^ permalink raw reply related

* [PATCH v3 4/4] powerpc/kbuild: move -mprofile-kernel check to Kconfig
From: Nicholas Piggin @ 2018-05-14  3:52 UTC (permalink / raw)
  To: linux-kbuild
  Cc: Nicholas Piggin, linuxppc-dev, Masahiro Yamada,
	Segher Boessenkool, Steven Rostedt
In-Reply-To: <20180514035228.21316-1-npiggin@gmail.com>

This eliminates the workaround that requires disabling
-mprofile-kernel by default in Kconfig.

[ Note: this depends on https://git.kernel.org/pub/scm/linux/kernel/git/masahiroy/linux-kbuild.git kconfig-shell-v3 ]

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/Kconfig                            | 16 +---------------
 arch/powerpc/Makefile                           | 14 ++------------
 arch/powerpc/tools/gcc-check-mprofile-kernel.sh |  4 ++--
 3 files changed, 5 insertions(+), 29 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 512fcc177c87..af527f894f9b 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -460,23 +460,9 @@ config LD_HEAD_STUB_CATCH
 
 	  If unsure, say "N".
 
-config DISABLE_MPROFILE_KERNEL
-	bool "Disable use of mprofile-kernel for kernel tracing"
-	depends on PPC64 && CPU_LITTLE_ENDIAN
-	default y
-	help
-	  Selecting this options disables use of the mprofile-kernel ABI for
-	  kernel tracing. That will cause options such as live patching
-	  (CONFIG_LIVEPATCH) which depend on CONFIG_DYNAMIC_FTRACE_WITH_REGS to
-	  be disabled also.
-
-	  If you have a toolchain which supports mprofile-kernel, then you can
-	  disable this. Otherwise leave it enabled. If you're not sure, say
-	  "Y".
-
 config MPROFILE_KERNEL
 	depends on PPC64 && CPU_LITTLE_ENDIAN
-	def_bool !DISABLE_MPROFILE_KERNEL
+	def_bool $(success $(srctree)/arch/powerpc/tools/gcc-check-mprofile-kernel.sh $(CC) -I$(srctree)/include -D__KERNEL__)
 
 config IOMMU_HELPER
 	def_bool PPC64
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index 6faf1d6ad9dd..8f7a64fe7370 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -161,18 +161,8 @@ CFLAGS-$(CONFIG_GENERIC_CPU) += -mcpu=powerpc64
 endif
 
 ifdef CONFIG_MPROFILE_KERNEL
-    ifeq ($(shell $(srctree)/arch/powerpc/tools/gcc-check-mprofile-kernel.sh $(CC) -I$(srctree)/include -D__KERNEL__),OK)
-        CC_FLAGS_FTRACE := -pg -mprofile-kernel
-        KBUILD_CPPFLAGS += -DCC_USING_MPROFILE_KERNEL
-    else
-        # If the user asked for mprofile-kernel but the toolchain doesn't
-        # support it, emit a warning and deliberately break the build later
-        # with mprofile-kernel-not-supported. We would prefer to make this an
-        # error right here, but then the user would never be able to run
-        # oldconfig to change their configuration.
-        $(warning Compiler does not support mprofile-kernel, set CONFIG_DISABLE_MPROFILE_KERNEL)
-        CC_FLAGS_FTRACE := -mprofile-kernel-not-supported
-    endif
+	CC_FLAGS_FTRACE := -pg -mprofile-kernel
+	KBUILD_CPPFLAGS += -DCC_USING_MPROFILE_KERNEL
 endif
 
 CFLAGS-$(CONFIG_CELL_CPU) += $(call cc-option,-mcpu=cell)
diff --git a/arch/powerpc/tools/gcc-check-mprofile-kernel.sh b/arch/powerpc/tools/gcc-check-mprofile-kernel.sh
index 061f8035bdbe..ec4486a9c4a3 100755
--- a/arch/powerpc/tools/gcc-check-mprofile-kernel.sh
+++ b/arch/powerpc/tools/gcc-check-mprofile-kernel.sh
@@ -10,13 +10,13 @@ set -o pipefail
 # Test whether the compile option -mprofile-kernel exists and generates
 # profiling code (ie. a call to _mcount()).
 echo "int func() { return 0; }" | \
-    $* -S -x c -O2 -p -mprofile-kernel - -o - 2> /dev/null | \
+    $* -m64 -S -x c -O2 -p -mprofile-kernel - -o - 2> /dev/null | \
     grep -q "_mcount"
 
 # Test whether the notrace attribute correctly suppresses calls to _mcount().
 
 echo -e "#include <linux/compiler.h>\nnotrace int func() { return 0; }" | \
-    $* -S -x c -O2 -p -mprofile-kernel - -o - 2> /dev/null | \
+    $* -m64 -S -x c -O2 -p -mprofile-kernel - -o - 2> /dev/null | \
     grep -q "_mcount" && \
     exit 1
 
-- 
2.17.0

^ permalink raw reply related

* [PATCH v3 3/4] powerpc/kbuild: Use flags variables rather than overriding LD/CC/AS
From: Nicholas Piggin @ 2018-05-14  3:52 UTC (permalink / raw)
  To: linux-kbuild
  Cc: Nicholas Piggin, linuxppc-dev, Masahiro Yamada,
	Segher Boessenkool, Steven Rostedt
In-Reply-To: <20180514035228.21316-1-npiggin@gmail.com>

The powerpc toolchain can compile combinations of 32/64 bit and
big/little endian, so it's convenient to consider, e.g.,

  `CC -m64 -mbig-endian`

To be the C compiler for the purpose of invoking it to build target
artifacts. So overriding the the CC variable to include thse flags
works for this purpose.

Unfortunately that is not compatible with the way the proposed new
Kconfig macro language will work.

After previous patches in this series, these flags can be carefully
passed in using flags instead.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
Since v2:
- removed extra -EB in the recordmcount script (thanks mpe)

 arch/powerpc/Makefile   | 16 +++++++++-------
 scripts/recordmcount.pl |  7 +++++++
 2 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index 167b26a0780c..6faf1d6ad9dd 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -75,13 +75,15 @@ endif
 endif
 
 ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y)
-override LD	+= -EL
+KBUILD_CFLAGS	+= -mlittle-endian
+LDFLAGS		+= -EL
 LDEMULATION	:= lppc
 GNUTARGET	:= powerpcle
 MULTIPLEWORD	:= -mno-multiple
 KBUILD_CFLAGS_MODULE += $(call cc-option,-mno-save-toc-indirect)
 else
-override LD	+= -EB
+KBUILD_CFLAGS += $(call cc-option,-mbig-endian)
+LDFLAGS		+= -EB
 LDEMULATION	:= ppc
 GNUTARGET	:= powerpc
 MULTIPLEWORD	:= -mmultiple
@@ -94,19 +96,19 @@ aflags-$(CONFIG_CPU_BIG_ENDIAN)		+= $(call cc-option,-mabi=elfv1)
 aflags-$(CONFIG_CPU_LITTLE_ENDIAN)	+= -mabi=elfv2
 endif
 
-cflags-$(CONFIG_CPU_LITTLE_ENDIAN)	+= -mlittle-endian
-cflags-$(CONFIG_CPU_BIG_ENDIAN)		+= $(call cc-option,-mbig-endian)
 ifneq ($(cc-name),clang)
   cflags-$(CONFIG_CPU_LITTLE_ENDIAN)	+= -mno-strict-align
 endif
 
+cflags-$(CONFIG_CPU_BIG_ENDIAN)		+= $(call cc-option,-mbig-endian)
+cflags-$(CONFIG_CPU_LITTLE_ENDIAN)	+= -mlittle-endian
 aflags-$(CONFIG_CPU_BIG_ENDIAN)		+= $(call cc-option,-mbig-endian)
 aflags-$(CONFIG_CPU_LITTLE_ENDIAN)	+= -mlittle-endian
 
 ifeq ($(HAS_BIARCH),y)
-override AS	+= -a$(BITS)
-override LD	+= -m elf$(BITS)$(LDEMULATION)
-override CC	+= -m$(BITS)
+KBUILD_CFLAGS	+= -m$(BITS)
+KBUILD_AFLAGS	+= -m$(BITS) -Wl,-a$(BITS)
+LDFLAGS		+= -m elf$(BITS)$(LDEMULATION)
 KBUILD_ARFLAGS	+= --target=elf$(BITS)-$(GNUTARGET)
 endif
 
diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
index 191eb949d52c..3c67304a7425 100755
--- a/scripts/recordmcount.pl
+++ b/scripts/recordmcount.pl
@@ -274,6 +274,13 @@ if ($arch eq "x86_64") {
     if ($bits == 64) {
 	$type = ".quad";
     }
+    if ($endian eq "big") {
+	    $cc .= " -mbig-endian ";
+	    $ld .= " -EB ";
+    } else {
+	    $cc .= " -mlittle-endian ";
+	    $ld .= " -EL ";
+    }
 
 } elsif ($arch eq "arm") {
     $alignment = 2;
-- 
2.17.0

^ permalink raw reply related

* [PATCH v3 2/4] powerpc/kbuild: remove CROSS32 defines from top level powerpc Makefile
From: Nicholas Piggin @ 2018-05-14  3:52 UTC (permalink / raw)
  To: linux-kbuild
  Cc: Nicholas Piggin, linuxppc-dev, Masahiro Yamada,
	Segher Boessenkool
In-Reply-To: <20180514035228.21316-1-npiggin@gmail.com>

Switch VDSO32 build over to use CROSS32_COMPILE directly, and have
it pass in -m32 after the standard c_flags. This allows endianness
overrides to be removed and the endian and bitness flags moved into
standard flags variables.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/Makefile               | 10 ----------
 arch/powerpc/boot/Makefile          | 16 +++++++++++-----
 arch/powerpc/kernel/vdso32/Makefile | 15 +++++++++++----
 3 files changed, 22 insertions(+), 19 deletions(-)

diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index 15ca4bafad82..167b26a0780c 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -17,14 +17,8 @@ HAS_BIARCH	:= $(call cc-option-yn, -m32)
 # Set default 32 bits cross compilers for vdso and boot wrapper
 CROSS32_COMPILE ?=
 
-CROSS32CC		:= $(CROSS32_COMPILE)gcc
-CROSS32AR		:= $(CROSS32_COMPILE)ar
-
 ifeq ($(HAS_BIARCH),y)
 ifeq ($(CROSS32_COMPILE),)
-CROSS32CC	:= $(CC) -m32
-KBUILD_ARFLAGS	+= --target=elf32-powerpc
-
 ifdef CONFIG_PPC32
 # These options will be overridden by any -mcpu option that the CPU
 # or platform code sets later on the command line, but they are needed
@@ -33,13 +27,9 @@ ifdef CONFIG_PPC32
 KBUILD_CFLAGS		+= -mcpu=powerpc
 KBUILD_AFLAGS		+= -mcpu=powerpc
 endif
-
 endif
 endif
 
-
-export CROSS32CC CROSS32AR
-
 ifeq ($(CROSS_COMPILE),)
 KBUILD_DEFCONFIG := $(shell uname -m)_defconfig
 else
diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index 26d5d2a5b8e9..49767e06202c 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -23,19 +23,23 @@ all: $(obj)/zImage
 compress-$(CONFIG_KERNEL_GZIP) := CONFIG_KERNEL_GZIP
 compress-$(CONFIG_KERNEL_XZ)   := CONFIG_KERNEL_XZ
 
+ifdef CROSS32_COMPILE
+    BOOTCC := $(CROSS32_COMPILE)gcc
+    BOOTAR := $(CROSS32_COMPILE)ar
+else
+    BOOTCC := $(CC)
+    BOOTAR := $(AR)
+endif
+
 BOOTCFLAGS    := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \
 		 -fno-strict-aliasing -Os -msoft-float -pipe \
 		 -fomit-frame-pointer -fno-builtin -fPIC -nostdinc \
 		 -D$(compress-y)
 
-BOOTCC := $(CC)
 ifdef CONFIG_PPC64_BOOT_WRAPPER
 BOOTCFLAGS	+= -m64
 else
 BOOTCFLAGS	+= -m32
-ifdef CROSS32_COMPILE
-    BOOTCC := $(CROSS32_COMPILE)gcc
-endif
 endif
 
 BOOTCFLAGS	+= -isystem $(shell $(BOOTCC) -print-file-name=include)
@@ -49,6 +53,8 @@ endif
 
 BOOTAFLAGS	:= -D__ASSEMBLY__ $(BOOTCFLAGS) -traditional -nostdinc
 
+BOOTARFLAGS	:= -cr$(KBUILD_ARFLAGS)
+
 ifdef CONFIG_DEBUG_INFO
 BOOTCFLAGS	+= -g
 endif
@@ -202,7 +208,7 @@ quiet_cmd_bootas = BOOTAS  $@
       cmd_bootas = $(BOOTCC) -Wp,-MD,$(depfile) $(BOOTAFLAGS) -c -o $@ $<
 
 quiet_cmd_bootar = BOOTAR  $@
-      cmd_bootar = $(CROSS32AR) -cr$(KBUILD_ARFLAGS) $@.$$$$ $(filter-out FORCE,$^); mv $@.$$$$ $@
+      cmd_bootar = $(BOOTAR) $(BOOTARFLAGS) $@.$$$$ $(filter-out FORCE,$^); mv $@.$$$$ $@
 
 $(obj-libfdt): $(obj)/%.o: $(srctree)/scripts/dtc/libfdt/%.c FORCE
 	$(call if_changed_dep,bootcc)
diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile
index b8c434d1d459..50112d4473bb 100644
--- a/arch/powerpc/kernel/vdso32/Makefile
+++ b/arch/powerpc/kernel/vdso32/Makefile
@@ -8,8 +8,15 @@ obj-vdso32 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o \
 
 # Build rules
 
-ifeq ($(CONFIG_PPC32),y)
-CROSS32CC := $(CC)
+ifdef CROSS32_COMPILE
+    VDSOCC := $(CROSS32_COMPILE)gcc
+else
+    VDSOCC := $(CC)
+endif
+
+CC32FLAGS :=
+ifdef CONFIG_PPC64
+CC32FLAGS += -m32
 endif
 
 targets := $(obj-vdso32) vdso32.so vdso32.so.dbg
@@ -45,9 +52,9 @@ $(obj-vdso32): %.o: %.S FORCE
 
 # actual build commands
 quiet_cmd_vdso32ld = VDSO32L $@
-      cmd_vdso32ld = $(CROSS32CC) $(c_flags) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^)
+      cmd_vdso32ld = $(VDSOCC) $(c_flags) $(CC32FLAGS) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^)
 quiet_cmd_vdso32as = VDSO32A $@
-      cmd_vdso32as = $(CROSS32CC) $(a_flags) -c -o $@ $<
+      cmd_vdso32as = $(VDSOCC) $(a_flags) $(CC32FLAGS) -c -o $@ $<
 
 # install commands for the unstripped file
 quiet_cmd_vdso_install = INSTALL $@
-- 
2.17.0

^ permalink raw reply related

* [PATCH v3 1/4] powerpc/kbuild: set default generic machine type for 32-bit compile
From: Nicholas Piggin @ 2018-05-14  3:52 UTC (permalink / raw)
  To: linux-kbuild
  Cc: Nicholas Piggin, linuxppc-dev, Masahiro Yamada,
	Segher Boessenkool
In-Reply-To: <20180514035228.21316-1-npiggin@gmail.com>

Some 64-bit toolchains uses the wrong ISA variant for compiling 32-bit
kernels, even with -m32. Debian's powerpc64le is one such case, and
that is because it is built with --with-cpu=power8.

So when cross compiling a 32-bit kernel with a 64-bit toolchain, set
-mcpu=powerpc initially, which is the generic 32-bit powerpc machine
type and scheduling model. CPU and platform code can override this
with subsequent -mcpu flags if necessary.

This is not done for 32-bit toolchains otherwise it would override
their defaults, which are presumably set appropriately for the
environment (moreso than a 64-bit cross compiler).

This fixes a lot of build failures due to incompatible assembly when
compiling 32-bit kernel with th Debian powerpc64le 64-bit toolchain.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>

---
Since v2:
- reworded changelog to explain the cause of the problem,
  (thanks Segher).
- moved the flags into the 64-32 cross compile case so as not to
  disturb native 32-bit builds.

 arch/powerpc/Makefile | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index 95813df90801..15ca4bafad82 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -24,9 +24,20 @@ ifeq ($(HAS_BIARCH),y)
 ifeq ($(CROSS32_COMPILE),)
 CROSS32CC	:= $(CC) -m32
 KBUILD_ARFLAGS	+= --target=elf32-powerpc
+
+ifdef CONFIG_PPC32
+# These options will be overridden by any -mcpu option that the CPU
+# or platform code sets later on the command line, but they are needed
+# to set a sane 32-bit cpu target for the 64-bit cross compiler which
+# may default to the wrong ISA.
+KBUILD_CFLAGS		+= -mcpu=powerpc
+KBUILD_AFLAGS		+= -mcpu=powerpc
+endif
+
 endif
 endif
 
+
 export CROSS32CC CROSS32AR
 
 ifeq ($(CROSS_COMPILE),)
-- 
2.17.0

^ permalink raw reply related

* [PATCH v3 0/4] powerpc patches for new Kconfig language
From: Nicholas Piggin @ 2018-05-14  3:52 UTC (permalink / raw)
  To: linux-kbuild
  Cc: Nicholas Piggin, linuxppc-dev, Masahiro Yamada,
	Segher Boessenkool

This series of patches improves th powerpc kbuild system. The
motivation was to to be compatible with the new Kconfig scripting
language that Yamada-san has proposed here:

https://git.kernel.org/pub/scm/linux/kernel/git/masahiroy/linux-kbuild.git/?h=kconfig-shell-v3

I have tested on top of that tree and powerpc now builds there.

I think patches 1-3 are improvements to the build system even before
kconfig-shell, so they could be merged ahead of it.

Patch 4 actually takes advatage of a new feature of the kconfig-shell
changes to improve powerpc kconfig, so that should rather be merged
with the kbuild tree with an ack from powerpc. Yamada-san are you
planning to merge kconfig-shell for 4.17?

Thanks,
Nick

Nicholas Piggin (4):
  powerpc/kbuild: set default generic machine type for 32-bit compile
  powerpc/kbuild: remove CROSS32 defines from top level powerpc Makefile
  powerpc/kbuild: Use flags variables rather than overriding LD/CC/AS
  powerpc/kbuild: move -mprofile-kernel check to Kconfig

 arch/powerpc/Kconfig                          | 16 +------
 arch/powerpc/Makefile                         | 45 ++++++++-----------
 arch/powerpc/boot/Makefile                    | 16 ++++---
 arch/powerpc/kernel/vdso32/Makefile           | 15 +++++--
 .../tools/gcc-check-mprofile-kernel.sh        |  4 +-
 scripts/recordmcount.pl                       |  7 +++
 6 files changed, 51 insertions(+), 52 deletions(-)

-- 
2.17.0

^ permalink raw reply

* Re: [PATCH] powerpc/xive: Remove (almost) unused macros
From: Michael Ellerman @ 2018-05-14  3:30 UTC (permalink / raw)
  To: Russell Currey, linuxppc-dev
In-Reply-To: <20180511080313.29815-1-ruscur@russell.cc>

Russell Currey <ruscur@russell.cc> writes:
> diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c
> index b48454be5b98..3b471c0193ca 100644
> --- a/arch/powerpc/sysdev/xive/native.c
> +++ b/arch/powerpc/sysdev/xive/native.c
> @@ -341,7 +341,7 @@ static void xive_native_update_pending(struct xive_cpu *xc)
>  	 * of the hypervisor interrupt (if any)
>  	 */
>  	cppr = ack & 0xff;
> -	he = GETFIELD(TM_QW3_NSR_HE, (ack >> 8));
> +	he = ((ack >> 8) & TM_QW3_NSR_HE) >> (ffs(TM_QW3_NSR_HE) - 1);

Using the #defines and ffs() here doesn't make the code any more
readable or maintainable.

We can just write:

	he = ack >> 14;

Or if we want to be more explicit about it being the top two bits of the
2nd byte:

	he = (ack >> 8) >> 6;


cheers

^ permalink raw reply

* Re: [PATCH 2/2] powerpc: Enable ASYM_SMT on interleaved big-core systems
From: Michael Neuling @ 2018-05-14  3:22 UTC (permalink / raw)
  To: Gautham R. Shenoy, Michael Ellerman, Benjamin Herrenschmidt,
	Vaidyanathan Srinivasan, Akshay Adiga, Shilpasri G Bhat,
	Balbir Singh, Oliver O'Halloran, Nicholas Piggin
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <1526037444-22876-3-git-send-email-ego@linux.vnet.ibm.com>

On Fri, 2018-05-11 at 16:47 +0530, Gautham R. Shenoy wrote:
> From: "Gautham R. Shenoy" <ego@linux.vnet.ibm.com>
>=20
> Each of the SMT4 cores forming a fused-core are more or less
> independent units. Thus when multiple tasks are scheduled to run on
> the fused core, we get the best performance when the tasks are spread
> across the pair of SMT4 cores.
>=20
> Since the threads in the pair of SMT4 cores of an interleaved big-core
> are numbered {0,2,4,6} and {1,3,5,7} respectively, enable ASYM_SMT on
> such interleaved big-cores that will bias the load-balancing of tasks
> on smaller numbered threads, which will automatically result in
> spreading the tasks uniformly across the associated pair of SMT4
> cores.
>=20
> Signed-off-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
> ---
>  arch/powerpc/kernel/smp.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>=20
> diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
> index 9ca7148..0153f01 100644
> --- a/arch/powerpc/kernel/smp.c
> +++ b/arch/powerpc/kernel/smp.c
> @@ -1082,7 +1082,7 @@ static int powerpc_smt_flags(void)
>  {
>  	int flags =3D SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES;
> =20
> -	if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
> +	if (cpu_has_feature(CPU_FTR_ASYM_SMT) || has_interleaved_big_core) {

Shouldn't we just set CPU_FTR_ASYM_SMT and leave this code unchanged?


>  		printk_once(KERN_INFO "Enabling Asymmetric SMT
> scheduling\n");
>  		flags |=3D SD_ASYM_PACKING;
>  	}

^ permalink raw reply

* Re: [PATCH 1/2] powerpc: Detect the presence of big-core with interleaved threads
From: Michael Neuling @ 2018-05-14  3:21 UTC (permalink / raw)
  To: Gautham R. Shenoy, Michael Ellerman, Benjamin Herrenschmidt,
	Vaidyanathan Srinivasan, Akshay Adiga, Shilpasri G Bhat,
	Balbir Singh, Oliver O'Halloran, Nicholas Piggin
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <1526037444-22876-2-git-send-email-ego@linux.vnet.ibm.com>

Thanks for posting this... A couple of comments below.

On Fri, 2018-05-11 at 16:47 +0530, Gautham R. Shenoy wrote:
> From: "Gautham R. Shenoy" <ego@linux.vnet.ibm.com>
>=20
> A pair of IBM POWER9 SMT4 cores can be fused together to form a
> big-core with 8 SMT threads. This can be discovered via the
> "ibm,thread-groups" CPU property in the device tree which will
> indicate which group of threads that share the L1 cache, translation
> cache and instruction data flow.  If there are multiple such group of
> threads, then the core is a big-core. The thread-ids of the threads of
> the big-core can be obtained by interleaving the thread-ids of the
> thread-groups (component small core).
>=20
> Eg: Threads in the pair of component SMT4 cores of an interleaved
> big-core are numbered {0,2,4,6} and {1,3,5,7} respectively.
>=20
> This patch introduces a function to check if a given device tree node
> corresponding to a CPU node represents an interleaved big-core.
>=20
> This function is invoked during the boot-up to detect the presence of
> interleaved big-cores. The presence of such an interleaved big-core is
> recorded in a global variable for later use.
>=20
> Signed-off-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
> ---
>  arch/powerpc/include/asm/cputhreads.h |  8 +++--
>  arch/powerpc/kernel/setup-common.c    | 63 +++++++++++++++++++++++++++++=
++++-
> -
>  2 files changed, 66 insertions(+), 5 deletions(-)
>=20
> diff --git a/arch/powerpc/include/asm/cputhreads.h
> b/arch/powerpc/include/asm/cputhreads.h
> index d71a909..b706f0a 100644
> --- a/arch/powerpc/include/asm/cputhreads.h
> +++ b/arch/powerpc/include/asm/cputhreads.h
> @@ -23,11 +23,13 @@
>  extern int threads_per_core;
>  extern int threads_per_subcore;
>  extern int threads_shift;
> +extern bool has_interleaved_big_core;
>  extern cpumask_t threads_core_mask;
>  #else
> -#define threads_per_core	1
> -#define threads_per_subcore	1
> -#define threads_shift		0
> +#define threads_per_core		1
> +#define threads_per_subcore		1
> +#define threads_shift			0
> +#define has_interleaved_big_core	0
>  #define threads_core_mask	(*get_cpu_mask(0))
>  #endif
> =20
> diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/set=
up-
> common.c
> index 0af5c11..884dff2 100644
> --- a/arch/powerpc/kernel/setup-common.c
> +++ b/arch/powerpc/kernel/setup-common.c
> @@ -408,10 +408,12 @@ void __init check_for_initrd(void)
>  #ifdef CONFIG_SMP
> =20
>  int threads_per_core, threads_per_subcore, threads_shift;
> +bool has_interleaved_big_core;
>  cpumask_t threads_core_mask;
>  EXPORT_SYMBOL_GPL(threads_per_core);
>  EXPORT_SYMBOL_GPL(threads_per_subcore);
>  EXPORT_SYMBOL_GPL(threads_shift);
> +EXPORT_SYMBOL_GPL(has_interleaved_big_core);
>  EXPORT_SYMBOL_GPL(threads_core_mask);
> =20
>  static void __init cpu_init_thread_core_maps(int tpc)
> @@ -436,8 +438,56 @@ static void __init cpu_init_thread_core_maps(int tpc=
)
>  	printk(KERN_DEBUG " (thread shift is %d)\n", threads_shift);
>  }
> =20
> -
>  u32 *cpu_to_phys_id =3D NULL;
> +/*
> + * check_for_interleaved_big_core - Checks if the core represented by
> + *	 dn is a big-core whose threads are interleavings of the
> + *	 threads of the component small cores.
> + *
> + * @dn: device node corresponding to the core.
> + *
> + * Returns true if the core is a interleaved big-core.
> + * Returns false otherwise.
> + */
> +static inline bool check_for_interleaved_big_core(struct device_node *dn=
)
> +{
> +	int len, nr_groups, threads_per_group;
> +	const __be32 *thread_groups;
> +	__be32 *thread_list, *first_cpu_idx;
> +	int cur_cpu, next_cpu, i, j;
> +
> +	thread_groups =3D of_get_property(dn, "ibm,thread-groups", &len);
> +	if (!thread_groups)
> +		return false;

Can you document what this property looks like? Seems to be nr_groups,
threads_per_group, thread_list. Can you explain what each of these mean?

If we get configured with an SMT2 big-core (ie. two interleaved SMT1 normal
cores), will this code also work there?

> +
> +	nr_groups =3D be32_to_cpu(*(thread_groups + 1));
> +	if (nr_groups <=3D 1)
> +		return false;
> +
> +	threads_per_group =3D be32_to_cpu(*(thread_groups + 2));
> +	thread_list =3D (__be32 *)thread_groups + 3;
> +
> +	/*
> +	 * In case of an interleaved big-core, the thread-ids of the
> +	 * big-core can be obtained by interleaving the the thread-ids
> +	 * of the component small
> +	 *
> +	 * Eg: On a 8-thread big-core with two SMT4 small cores, the
> +	 * threads of the two component small cores will be
> +	 * {0, 2, 4, 6} and {1, 3, 5, 7}.
> +	 */
> +	for (i =3D 0; i < nr_groups; i++) {
> +		first_cpu_idx =3D thread_list + i * threads_per_group;
> +
> +		for (j =3D 0; j < threads_per_group - 1; j++) {
> +			cur_cpu =3D be32_to_cpu(*(first_cpu_idx + j));
> +			next_cpu =3D be32_to_cpu(*(first_cpu_idx + j + 1));
> +			if (next_cpu !=3D cur_cpu + nr_groups)
> +				return false;
> +		}
> +	}
> +	return true;
> +}
> =20
>  /**
>   * setup_cpu_maps - initialize the following cpu maps:
> @@ -565,7 +615,16 @@ void __init smp_setup_cpu_maps(void)
>  	vdso_data->processorCount =3D num_present_cpus();
>  #endif /* CONFIG_PPC64 */
> =20
> -        /* Initialize CPU <=3D> thread mapping/
> +	dn =3D of_find_node_by_type(NULL, "cpu");
> +	if (dn) {
> +		if (check_for_interleaved_big_core(dn)) {
> +			has_interleaved_big_core =3D true;
> +			pr_info("Detected interleaved big-cores\n");

Is there a runtime way to check this also?  If the dmesg buffer overflows, =
we
lose this.

Mikey

> +		}
> +		of_node_put(dn);
> +	}
> +
> +	/* Initialize CPU <=3D> thread mapping/
>  	 *
>  	 * WARNING: We assume that the number of threads is the same for
>  	 * every CPU in the system. If that is not the case, then some code

^ permalink raw reply

* Re: [PATCH v2 4/4] powerpc: Allow LD_DEAD_CODE_DATA_ELIMINATION to be selected
From: Michael Ellerman @ 2018-05-14  2:05 UTC (permalink / raw)
  To: Nicholas Piggin, Masahiro Yamada
  Cc: linux-arch, linuxppc-dev, Nicholas Piggin, linux-kbuild
In-Reply-To: <20180509130001.24276-5-npiggin@gmail.com>

Nicholas Piggin <npiggin@gmail.com> writes:

> This requires further changes to linker script to KEEP some tables
> and wildcard compiler generated sections into the right place. This
> includes pp32 modifications from Christophe Leroy.
>
> When compiling powernv_defconfig with this option, the resulting
> kernel is almost 400kB smaller (and still boots):
>
>     text      data       bss        dec   filename
> 11827621   4810490   1341080   17979191   vmlinux
> 11752437   4598858   1338776   17690071   vmlinux.dcde
>
> Mathieu's numbers for custom Mac Mini G4 config has almost 200kB
> saving. It also had some increase in vmlinux size for as-yet
> unknown reasons.
>
>     text      data       bss        dec   filename
>  7461457   2475122   1428064   11364643   vmlinux
>  7386425   2364370   1425432   11176227   vmlinux.dcde
>
> Tested-by: Christophe Leroy <christophe.leroy@c-s.fr> [8xx]
> Tested-by: Mathieu Malaterre <malat@debian.org> [32-bit powermac]
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
>  arch/powerpc/Kconfig              |  1 +
>  arch/powerpc/kernel/vmlinux.lds.S | 22 +++++++++++-----------
>  2 files changed, 12 insertions(+), 11 deletions(-)

I'm happy for this to go in via the kbuild tree.

As long as it's in linux-next it will get tested by my CI which should
hopefully shake out any bugs.

Acked-by: Michael Ellerman <mpe@ellerman.id.au>

cheers

^ permalink raw reply

* Re: [PATCH v2 0/4] LD_DEAD_CODE_DATA_ELIMINATION fixes and enabling for powerpc
From: Masahiro Yamada @ 2018-05-14  0:08 UTC (permalink / raw)
  To: Nicholas Piggin; +Cc: linux-arch, Linux Kbuild mailing list, linuxppc-dev
In-Reply-To: <20180509130001.24276-1-npiggin@gmail.com>

2018-05-09 21:59 GMT+09:00 Nicholas Piggin <npiggin@gmail.com>:
> v2 changes only patch 4, updates the changelog with ppc32 numbers and
> Tested-by tags, and also use TEXT_MAIN in the linker script as
> Christophe suggested.


Replaced with v2.
Thanks!


> Thanks,
> Nick
>
> Nicholas Piggin (4):
>   kbuild: Fix asm-generic/vmlinux.lds.h for
>     LD_DEAD_CODE_DATA_ELIMINATION
>   kbuild: LD_DEAD_CODE_DATA_ELIMINATION no
>     -ffunction-sections/-fdata-sections for module build
>   kbuild: Allow LD_DEAD_CODE_DATA_ELIMINATION to be selectable if
>     enabled
>   powerpc: Allow LD_DEAD_CODE_DATA_ELIMINATION to be selected
>
>  Makefile                          |  4 +--
>  arch/Kconfig                      | 15 ----------
>  arch/powerpc/Kconfig              |  1 +
>  arch/powerpc/kernel/vmlinux.lds.S | 22 +++++++-------
>  include/asm-generic/vmlinux.lds.h | 49 ++++++++++++++++++-------------
>  init/Kconfig                      | 27 +++++++++++++++++
>  6 files changed, 70 insertions(+), 48 deletions(-)
>
> --
> 2.17.0
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kbuild" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html



-- 
Best Regards
Masahiro Yamada

^ permalink raw reply

* [PATCH 3/3] powerpc/64s/radix: optimise pte_update
From: Nicholas Piggin @ 2018-05-13  4:21 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Nicholas Piggin
In-Reply-To: <20180513042106.15470-1-npiggin@gmail.com>

Implementing pte_update with pte_xchg (which uses cmpxchg) is
inefficient. A single larx/stcx. works fine, no need for the less
efficient cmpxchg sequence.

Then remove the memory barriers from the operation. There is a
requirement for TLB flushing to load mm_cpumask after the store
that reduces pte permissions, which is moved into the TLB flush
code.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/include/asm/book3s/64/radix.h | 25 +++++++++++-----------
 arch/powerpc/mm/mmu_context.c              |  6 ++++--
 arch/powerpc/mm/tlb-radix.c                | 11 +++++++++-
 3 files changed, 27 insertions(+), 15 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h
index 45bf1e1b1d33..cc9437a542cc 100644
--- a/arch/powerpc/include/asm/book3s/64/radix.h
+++ b/arch/powerpc/include/asm/book3s/64/radix.h
@@ -127,20 +127,21 @@ extern void radix__mark_initmem_nx(void);
 static inline unsigned long __radix_pte_update(pte_t *ptep, unsigned long clr,
 					       unsigned long set)
 {
-	pte_t pte;
-	unsigned long old_pte, new_pte;
-
-	do {
-		pte = READ_ONCE(*ptep);
-		old_pte = pte_val(pte);
-		new_pte = (old_pte | set) & ~clr;
-
-	} while (!pte_xchg(ptep, __pte(old_pte), __pte(new_pte)));
-
-	return old_pte;
+	__be64 old_be, tmp_be;
+
+	__asm__ __volatile__(
+	"1:	ldarx	%0,0,%3		# pte_update\n"
+	"	andc	%1,%0,%5	\n"
+	"	or	%1,%1,%4	\n"
+	"	stdcx.	%1,0,%3		\n"
+	"	bne-	1b"
+	: "=&r" (old_be), "=&r" (tmp_be), "=m" (*ptep)
+	: "r" (ptep), "r" (cpu_to_be64(set)), "r" (cpu_to_be64(clr))
+	: "cc" );
+
+	return be64_to_cpu(old_be);
 }
 
-
 static inline unsigned long radix__pte_update(struct mm_struct *mm,
 					unsigned long addr,
 					pte_t *ptep, unsigned long clr,
diff --git a/arch/powerpc/mm/mmu_context.c b/arch/powerpc/mm/mmu_context.c
index 0ab297c4cfad..f84e14f23e50 100644
--- a/arch/powerpc/mm/mmu_context.c
+++ b/arch/powerpc/mm/mmu_context.c
@@ -57,8 +57,10 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 		 * in switch_slb(), and/or the store of paca->mm_ctx_id in
 		 * copy_mm_to_paca().
 		 *
-		 * On the read side the barrier is in pte_xchg(), which orders
-		 * the store to the PTE vs the load of mm_cpumask.
+		 * On the other side, the barrier is in mm/tlb-radix.c for
+		 * radix which orders earlier stores to clear the PTEs vs
+		 * the load of mm_cpumask. And pte_xchg which does the same
+		 * thing for hash.
 		 *
 		 * This full barrier is needed by membarrier when switching
 		 * between processes after store to rq->curr, before user-space
diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c
index 55f93d66c8d2..b419702b1ba6 100644
--- a/arch/powerpc/mm/tlb-radix.c
+++ b/arch/powerpc/mm/tlb-radix.c
@@ -535,6 +535,11 @@ void radix__flush_tlb_mm(struct mm_struct *mm)
 		return;
 
 	preempt_disable();
+	/*
+	 * Order loads of mm_cpumask vs previous stores to clear ptes before
+	 * the invalidate. See barrier in switch_mm_irqs_off
+	 */
+	smp_mb();
 	if (!mm_is_thread_local(mm)) {
 		if (mm_is_singlethreaded(mm)) {
 			_tlbie_pid(pid, RIC_FLUSH_ALL);
@@ -560,6 +565,7 @@ void radix__flush_all_mm(struct mm_struct *mm)
 		return;
 
 	preempt_disable();
+	smp_mb(); /* see radix__flush_tlb_mm */
 	if (!mm_is_thread_local(mm)) {
 		_tlbie_pid(pid, RIC_FLUSH_ALL);
 		if (mm_is_singlethreaded(mm))
@@ -587,6 +593,7 @@ void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
 		return;
 
 	preempt_disable();
+	smp_mb(); /* see radix__flush_tlb_mm */
 	if (mm_is_thread_local(mm)) {
 		_tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
 	} else {
@@ -655,6 +662,7 @@ void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
 		return;
 
 	preempt_disable();
+	smp_mb(); /* see radix__flush_tlb_mm */
 	if (mm_is_thread_local(mm)) {
 		local = true;
 		full = (end == TLB_FLUSH_ALL ||
@@ -820,6 +828,7 @@ static inline void __radix__flush_tlb_range_psize(struct mm_struct *mm,
 		return;
 
 	preempt_disable();
+	smp_mb(); /* see radix__flush_tlb_mm */
 	if (mm_is_thread_local(mm)) {
 		local = true;
 		full = (end == TLB_FLUSH_ALL ||
@@ -882,7 +891,7 @@ void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr)
 
 	/* Otherwise first do the PWC, then iterate the pages. */
 	preempt_disable();
-
+	smp_mb(); /* see radix__flush_tlb_mm */
 	if (mm_is_thread_local(mm)) {
 		_tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
 	} else {
-- 
2.17.0

^ permalink raw reply related

* [PATCH 2/3] powerpc/64s/radix: avoid ptesync after set_pte and ptep_set_access_flags
From: Nicholas Piggin @ 2018-05-13  4:21 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Nicholas Piggin
In-Reply-To: <20180513042106.15470-1-npiggin@gmail.com>

The ISA suggests ptesync after setting a pte, to prevent a table walk
initiated by a subsequent access from causing a spurious fault, which
may be an allowance implementation to have page table walk loads
incoherent with store queues.

However there is no correctness problem in spurious faults -- the
kernel copes with these at any time, and the architecture requires
the pte to be re-loaded, which would eventually find the updated pte.

On POWER9 there does not appear to be a large window where this is a
problem, so as an optimisation, remove the costly ptesync from pte
updates. If implementations benefit from ptesync, it would likely be
better to go in update_mmu_cache, rather than set_pte etc which is
called for things like fork and mprotect.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/include/asm/book3s/64/radix.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h
index fcd92f9b6ec0..45bf1e1b1d33 100644
--- a/arch/powerpc/include/asm/book3s/64/radix.h
+++ b/arch/powerpc/include/asm/book3s/64/radix.h
@@ -209,7 +209,6 @@ static inline void radix__ptep_set_access_flags(struct mm_struct *mm,
 		__radix_pte_update(ptep, 0, new_pte);
 	} else
 		__radix_pte_update(ptep, 0, set);
-	asm volatile("ptesync" : : : "memory");
 }
 
 static inline int radix__pte_same(pte_t pte_a, pte_t pte_b)
@@ -226,7 +225,6 @@ static inline void radix__set_pte_at(struct mm_struct *mm, unsigned long addr,
 				 pte_t *ptep, pte_t pte, int percpu)
 {
 	*ptep = pte;
-	asm volatile("ptesync" : : : "memory");
 }
 
 static inline int radix__pmd_bad(pmd_t pmd)
-- 
2.17.0

^ permalink raw reply related

* [PATCH 1/3] powerpc/64s/radix: make ptep_get_and_clear_full non-atomic for the full case
From: Nicholas Piggin @ 2018-05-13  4:21 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Nicholas Piggin
In-Reply-To: <20180513042106.15470-1-npiggin@gmail.com>

This matches other architectures, when we know there will be no
further accesses to the address (e.g., for teardown), page table
entries can be cleared non-atomically.

The comments about NMMU are bogus, all MMU notifiers (including NMMU)
are released at this point, with their TLBs flushed. An NMMU access
at this point would be a bug.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/include/asm/book3s/64/radix.h | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h
index 705193e7192f..fcd92f9b6ec0 100644
--- a/arch/powerpc/include/asm/book3s/64/radix.h
+++ b/arch/powerpc/include/asm/book3s/64/radix.h
@@ -176,14 +176,8 @@ static inline pte_t radix__ptep_get_and_clear_full(struct mm_struct *mm,
 	unsigned long old_pte;
 
 	if (full) {
-		/*
-		 * If we are trying to clear the pte, we can skip
-		 * the DD1 pte update sequence and batch the tlb flush. The
-		 * tlb flush batching is done by mmu gather code. We
-		 * still keep the cmp_xchg update to make sure we get
-		 * correct R/C bit which might be updated via Nest MMU.
-		 */
-		old_pte = __radix_pte_update(ptep, ~0ul, 0);
+		old_pte = pte_val(*ptep);
+		*ptep = __pte(0);
 	} else
 		old_pte = radix__pte_update(mm, addr, ptep, ~0ul, 0, 0);
 
-- 
2.17.0

^ permalink raw reply related

* [PATCH 0/3] powerpc/64s/radix pte manipulation optimisations
From: Nicholas Piggin @ 2018-05-13  4:21 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Nicholas Piggin

Here's a few patches which I'm sure will cause a lot of concern, but
I think now is the time to have it out and really start optimising
these things as far as we can. Radix MMU has been stable for quite
some time, and distros have made releases with the more conservative
flushes and barriers and updates etc.

If we decide not to do any of these things, we can document why not
so it becomes easier to revisit.

With these patches, plus the TLB flush reduction patches earlier,
plus a few generic mm patches that I haven't posted yet, fork/exec
benchmark from selftests increases performance by 11%.

A test which mprotects 16GB of memory to readonly, then reads a byte
from each page, then protects read/write and updates a byte from each
page, then repeats, is more tha 2x faster. Mostly due to reduced TLB
flushing, barriers, and atomics from these two patch sets.

Nicholas Piggin (3):
  powerpc/64s/radix: make ptep_get_and_clear_full non-atomic for the
    full case
  powerpc/64s/radix: avoid ptesync after set_pte and
    ptep_set_access_flags
  powerpc/64s/radix: optimise pte_update

 arch/powerpc/include/asm/book3s/64/radix.h | 37 +++++++++-------------
 arch/powerpc/mm/mmu_context.c              |  6 ++--
 arch/powerpc/mm/tlb-radix.c                | 11 ++++++-
 3 files changed, 29 insertions(+), 25 deletions(-)

-- 
2.17.0

^ permalink raw reply

* Re: [PATCH] selftests/powerpc: fix exec benchmark
From: Mathieu Malaterre @ 2018-05-12 16:35 UTC (permalink / raw)
  To: Nicholas Piggin; +Cc: linuxppc-dev, Segher Boessenkool
In-Reply-To: <CA+7wUszjeXyYYExUmAVMh774HGD-5-CP5GKo5dSS6g32Dg1JcQ@mail.gmail.com>

On Sat, May 12, 2018 at 10:54 AM, Mathieu Malaterre <malat@debian.org> wrote:
> Nick,
>
> On Sat, May 12, 2018 at 5:35 AM, Nicholas Piggin <npiggin@gmail.com> wrote:
>> The exec_target binary could segfault calling _exit(2) because r13
>> is not set up properly (and libc looks at that when performing a
>> syscall). Call SYS_exit using syscall(2) which doesn't seem to
>> have this problem.
>>
>> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
>> ---
>>  tools/testing/selftests/powerpc/benchmarks/exec_target.c | 7 +++++--
>>  1 file changed, 5 insertions(+), 2 deletions(-)
>>
>> diff --git a/tools/testing/selftests/powerpc/benchmarks/exec_target.c b/tools/testing/selftests/powerpc/benchmarks/exec_target.c
>> index 3c9c144192be..c14b0fc1edde 100644
>> --- a/tools/testing/selftests/powerpc/benchmarks/exec_target.c
>> +++ b/tools/testing/selftests/powerpc/benchmarks/exec_target.c
>> @@ -6,8 +6,11 @@
>>   * Copyright 2018, Anton Blanchard, IBM Corp.
>>   */
>>
>> -void _exit(int);
>> +#define _GNU_SOURCE
>> +#include <unistd.h>
>> +#include <sys/syscall.h>
>> +
>>  void _start(void)
>>  {
>> -       _exit(0);
>> +       syscall(SYS_exit, 0);
>>  }
>> --
>> 2.17.0
>>
>
> Could you please apply the same patch to :
>
> ./tools/testing/selftests/size/get_size.c
>
> It segfault on ppc32 with default cross compiler from Debian.

Nevermind, I did not tested and this seems not to be sufficient to
make the executable run properly.

^ permalink raw reply

* [PATCH] powerpc: Fix "integer constant is too large" build failure
From: Finn Thain @ 2018-05-12  8:21 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: linuxppc-dev, linux-kernel

My powerpc-linux-gnu-gcc v4.4.5 compiler can't build a 32-bit kernel
any more:

arch/powerpc/lib/sstep.c: In function 'do_popcnt':
arch/powerpc/lib/sstep.c:1068: error: integer constant is too large for 'long' type
arch/powerpc/lib/sstep.c:1069: error: integer constant is too large for 'long' type
arch/powerpc/lib/sstep.c:1069: error: integer constant is too large for 'long' type
arch/powerpc/lib/sstep.c:1070: error: integer constant is too large for 'long' type
arch/powerpc/lib/sstep.c:1079: error: integer constant is too large for 'long' type
arch/powerpc/lib/sstep.c: In function 'do_prty':
arch/powerpc/lib/sstep.c:1117: error: integer constant is too large for 'long' type

This file gets compiled with -std=gnu89 which means a constant can be
given the type 'long' even if it won't fit. Fix the errors with a 'ULL'
suffix on the relevant constants.

Fixes: 2c979c489fee ("powerpc/lib/sstep: Add prty instruction emulation")
Fixes: dcbd19b48d31 ("powerpc/lib/sstep: Add popcnt instruction emulation")
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
---
This change was compile tested but not regression tested.
---
 arch/powerpc/lib/sstep.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index 34d68f1b1b40..49427a3ee104 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -1065,9 +1065,10 @@ static nokprobe_inline void do_popcnt(const struct pt_regs *regs,
 {
 	unsigned long long out = v1;
 
-	out -= (out >> 1) & 0x5555555555555555;
-	out = (0x3333333333333333 & out) + (0x3333333333333333 & (out >> 2));
-	out = (out + (out >> 4)) & 0x0f0f0f0f0f0f0f0f;
+	out -= (out >> 1) & 0x5555555555555555ULL;
+	out = (0x3333333333333333ULL & out) +
+	      (0x3333333333333333ULL & (out >> 2));
+	out = (out + (out >> 4)) & 0x0f0f0f0f0f0f0f0fULL;
 
 	if (size == 8) {	/* popcntb */
 		op->val = out;
@@ -1076,7 +1077,7 @@ static nokprobe_inline void do_popcnt(const struct pt_regs *regs,
 	out += out >> 8;
 	out += out >> 16;
 	if (size == 32) {	/* popcntw */
-		op->val = out & 0x0000003f0000003f;
+		op->val = out & 0x0000003f0000003fULL;
 		return;
 	}
 
@@ -1114,7 +1115,7 @@ static nokprobe_inline void do_prty(const struct pt_regs *regs,
 
 	res ^= res >> 16;
 	if (size == 32) {		/* prtyw */
-		op->val = res & 0x0000000100000001;
+		op->val = res & 0x0000000100000001ULL;
 		return;
 	}
 
-- 
2.16.1

^ permalink raw reply related

* Re: [PATCH] selftests/powerpc: fix exec benchmark
From: Mathieu Malaterre @ 2018-05-12  8:54 UTC (permalink / raw)
  To: Nicholas Piggin; +Cc: linuxppc-dev, Segher Boessenkool
In-Reply-To: <20180512033525.18762-1-npiggin@gmail.com>

Nick,

On Sat, May 12, 2018 at 5:35 AM, Nicholas Piggin <npiggin@gmail.com> wrote:
> The exec_target binary could segfault calling _exit(2) because r13
> is not set up properly (and libc looks at that when performing a
> syscall). Call SYS_exit using syscall(2) which doesn't seem to
> have this problem.
>
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
>  tools/testing/selftests/powerpc/benchmarks/exec_target.c | 7 +++++--
>  1 file changed, 5 insertions(+), 2 deletions(-)
>
> diff --git a/tools/testing/selftests/powerpc/benchmarks/exec_target.c b/tools/testing/selftests/powerpc/benchmarks/exec_target.c
> index 3c9c144192be..c14b0fc1edde 100644
> --- a/tools/testing/selftests/powerpc/benchmarks/exec_target.c
> +++ b/tools/testing/selftests/powerpc/benchmarks/exec_target.c
> @@ -6,8 +6,11 @@
>   * Copyright 2018, Anton Blanchard, IBM Corp.
>   */
>
> -void _exit(int);
> +#define _GNU_SOURCE
> +#include <unistd.h>
> +#include <sys/syscall.h>
> +
>  void _start(void)
>  {
> -       _exit(0);
> +       syscall(SYS_exit, 0);
>  }
> --
> 2.17.0
>

Could you please apply the same patch to :

./tools/testing/selftests/size/get_size.c

It segfault on ppc32 with default cross compiler from Debian.

Thanks

^ permalink raw reply

* [PATCH] selftests/powerpc: fix exec benchmark
From: Nicholas Piggin @ 2018-05-12  3:35 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Nicholas Piggin

The exec_target binary could segfault calling _exit(2) because r13
is not set up properly (and libc looks at that when performing a
syscall). Call SYS_exit using syscall(2) which doesn't seem to
have this problem.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 tools/testing/selftests/powerpc/benchmarks/exec_target.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/powerpc/benchmarks/exec_target.c b/tools/testing/selftests/powerpc/benchmarks/exec_target.c
index 3c9c144192be..c14b0fc1edde 100644
--- a/tools/testing/selftests/powerpc/benchmarks/exec_target.c
+++ b/tools/testing/selftests/powerpc/benchmarks/exec_target.c
@@ -6,8 +6,11 @@
  * Copyright 2018, Anton Blanchard, IBM Corp.
  */
 
-void _exit(int);
+#define _GNU_SOURCE
+#include <unistd.h>
+#include <sys/syscall.h>
+
 void _start(void)
 {
-	_exit(0);
+	syscall(SYS_exit, 0);
 }
-- 
2.17.0

^ permalink raw reply related

* Re: [PATCH] powerpc/perf: Fix memory allocation for core-imc based on num_possible_cpus()
From: Balbir Singh @ 2018-05-12  0:35 UTC (permalink / raw)
  To: Anju T Sudhakar
  Cc: open list:LINUX FOR POWERPC (32-BIT AND 64-BIT),
	Madhavan Srinivasan, ppaidipe
In-Reply-To: <1526046222-17842-1-git-send-email-anju@linux.vnet.ibm.com>

On Fri, May 11, 2018 at 11:43 PM, Anju T Sudhakar
<anju@linux.vnet.ibm.com> wrote:
> Currently memory is allocated for core-imc based on cpu_present_mask, which has
> bit 'cpu' set iff cpu is populated. We use  (cpu number / threads per core)
> as as array index to access the memory.
> So in a system with guarded cores, since allocation happens based on
> cpu_present_mask, (cpu number / threads per core) bounds the index and leads
> to memory overflow.
>
> The issue is exposed in a guard test.
> The guard test will make some CPU's as un-available to the system during boot
> time as well as at runtime. So when the cpu is unavailable to the system during
> boot time, the memory allocation happens depending on the number of available
> cpus. And when we access the memory using (cpu number / threads per core) as the
> index the system crashes due to memory overflow.
>
> Allocating memory for core-imc based on cpu_possible_mask, which has
> bit 'cpu' set iff cpu is populatable, will fix this issue.
>
> Reported-by: Pridhiviraj Paidipeddi <ppaidipe@linux.vnet.ibm.com>
> Signed-off-by: Anju T Sudhakar <anju@linux.vnet.ibm.com>
> ---
>  arch/powerpc/perf/imc-pmu.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)

The changelog does not clearly call out the confusion between present
and possible.
Guarded CPUs are possible but not present, so it blows a hole when we assume the
max length of our allocation is driven by our max present cpus, where
as one of the cpus
might be online and be beyond the max present cpus, due to the hole..

Reviewed-by: Balbir Singh <bsingharora@gmail.com>

Balbir Singh.

^ permalink raw reply

* Re: [PATCH] powerpc/perf: Fix memory allocation for core-imc based on num_possible_cpus()
From: Michael Neuling @ 2018-05-11 23:45 UTC (permalink / raw)
  To: Anju T Sudhakar, linuxppc-dev; +Cc: maddy, ppaidipe
In-Reply-To: <1526046222-17842-1-git-send-email-anju@linux.vnet.ibm.com>

On Fri, 2018-05-11 at 19:13 +0530, Anju T Sudhakar wrote:
> Currently memory is allocated for core-imc based on cpu_present_mask, whi=
ch
> has
> bit 'cpu' set iff cpu is populated. We use  (cpu number / threads per cor=
e)
> as as array index to access the memory.
> So in a system with guarded cores, since allocation happens based on
> cpu_present_mask, (cpu number / threads per core) bounds the index and le=
ads
> to memory overflow.
>=20
> The issue is exposed in a guard test.
> The guard test will make some CPU's as un-available to the system during =
boot
> time as well as at runtime. So when the cpu is unavailable to the system
> during
> boot time, the memory allocation happens depending on the number of avail=
able
> cpus. And when we access the memory using (cpu number / threads per core)=
 as
> the
> index the system crashes due to memory overflow.
>=20
> Allocating memory for core-imc based on cpu_possible_mask, which has
> bit 'cpu' set iff cpu is populatable, will fix this issue.
>=20
> Reported-by: Pridhiviraj Paidipeddi <ppaidipe@linux.vnet.ibm.com>
> Signed-off-by: Anju T Sudhakar <anju@linux.vnet.ibm.com>

Thanks, this should be:=20

Cc: <stable@vger.kernel.org> # 4.14

> ---
>  arch/powerpc/perf/imc-pmu.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>=20
> diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
> index d7532e7..75fb23c 100644
> --- a/arch/powerpc/perf/imc-pmu.c
> +++ b/arch/powerpc/perf/imc-pmu.c
> @@ -1146,7 +1146,7 @@ static int init_nest_pmu_ref(void)
> =20
>  static void cleanup_all_core_imc_memory(void)
>  {
> -	int i, nr_cores =3D DIV_ROUND_UP(num_present_cpus(), threads_per_core);
> +	int i, nr_cores =3D DIV_ROUND_UP(num_possible_cpus(),
> threads_per_core);
>  	struct imc_mem_info *ptr =3D core_imc_pmu->mem_info;
>  	int size =3D core_imc_pmu->counter_mem_size;
> =20
> @@ -1264,7 +1264,7 @@ static int imc_mem_init(struct imc_pmu *pmu_ptr, st=
ruct
> device_node *parent,
>  		if (!pmu_ptr->pmu.name)
>  			return -ENOMEM;
> =20
> -		nr_cores =3D DIV_ROUND_UP(num_present_cpus(),
> threads_per_core);
> +		nr_cores =3D DIV_ROUND_UP(num_possible_cpus(),
> threads_per_core);
>  		pmu_ptr->mem_info =3D kcalloc(nr_cores, sizeof(struct
> imc_mem_info),
>  								GFP_KERNEL);
> =20

^ permalink raw reply

* Re: make a few block drivers highmem safe
From: Jens Axboe @ 2018-05-11 21:08 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Ed L. Cashin, Geoff Levand, Maxim Levitsky, linux-block,
	linuxppc-dev, linux-mtd, sparclinux
In-Reply-To: <20180509135948.20329-1-hch@lst.de>

On 5/9/18 7:59 AM, Christoph Hellwig wrote:
> Hi all,
> 
> this series converts a few random block drivers to be highmem safe,
> in preparation of eventually getting rid of the block layer bounce
> buffering support.

Applied, thanks.

-- 
Jens Axboe

^ permalink raw reply

* [PATCH 7/7] cxlflash: Isolate external module dependencies
From: Uma Krishnan @ 2018-05-11 19:06 UTC (permalink / raw)
  To: linux-scsi, James Bottomley, Martin K. Petersen, Matthew R. Ochs,
	Manoj N. Kumar
  Cc: linuxppc-dev, Andrew Donnellan, Frederic Barrat,
	Christophe Lombard
In-Reply-To: <1526065440-38806-1-git-send-email-ukrishn@linux.vnet.ibm.com>

Depending on the underlying transport, cxlflash has a dependency on either
the CXL or OCXL drivers, which are enabled via their Kconfig option.
Instead of having a module wide dependency on these config options, it is
better to isolate the object modules that are dependent on the CXL and OCXL
drivers and adjust the module dependencies accordingly.

This commit isolates the object files that are dependent on CXL and/or
OCXL. The cxl/ocxl fops used in the core driver are tucked under an ifdef
to avoid compilation errors.

Signed-off-by: Uma Krishnan <ukrishn@linux.vnet.ibm.com>
---
 drivers/scsi/cxlflash/Kconfig     | 2 +-
 drivers/scsi/cxlflash/Makefile    | 4 +++-
 drivers/scsi/cxlflash/lunmgt.c    | 4 +++-
 drivers/scsi/cxlflash/main.c      | 2 --
 drivers/scsi/cxlflash/main.h      | 5 +++++
 drivers/scsi/cxlflash/superpipe.c | 3 ++-
 drivers/scsi/cxlflash/vlun.c      | 3 ++-
 7 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/drivers/scsi/cxlflash/Kconfig b/drivers/scsi/cxlflash/Kconfig
index e2a3a1b..f1b17e3 100644
--- a/drivers/scsi/cxlflash/Kconfig
+++ b/drivers/scsi/cxlflash/Kconfig
@@ -4,7 +4,7 @@
 
 config CXLFLASH
 	tristate "Support for IBM CAPI Flash"
-	depends on PCI && SCSI && CXL && OCXL && EEH
+	depends on PCI && SCSI && (CXL || OCXL) && EEH
 	select IRQ_POLL
 	default m
 	help
diff --git a/drivers/scsi/cxlflash/Makefile b/drivers/scsi/cxlflash/Makefile
index 5124c68..283377d 100644
--- a/drivers/scsi/cxlflash/Makefile
+++ b/drivers/scsi/cxlflash/Makefile
@@ -1,2 +1,4 @@
 obj-$(CONFIG_CXLFLASH) += cxlflash.o
-cxlflash-y += main.o superpipe.o lunmgt.o vlun.o cxl_hw.o ocxl_hw.o
+cxlflash-y += main.o superpipe.o lunmgt.o vlun.o
+cxlflash-$(CONFIG_CXL) += cxl_hw.o
+cxlflash-$(CONFIG_OCXL) += ocxl_hw.o
diff --git a/drivers/scsi/cxlflash/lunmgt.c b/drivers/scsi/cxlflash/lunmgt.c
index 4d232e2..edea125 100644
--- a/drivers/scsi/cxlflash/lunmgt.c
+++ b/drivers/scsi/cxlflash/lunmgt.c
@@ -12,9 +12,11 @@
  * 2 of the License, or (at your option) any later version.
  */
 
-#include <misc/cxl.h>
 #include <asm/unaligned.h>
 
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+
 #include <scsi/scsi_host.h>
 #include <uapi/scsi/cxlflash_ioctl.h>
 
diff --git a/drivers/scsi/cxlflash/main.c b/drivers/scsi/cxlflash/main.c
index cd7dcc5..6637116 100644
--- a/drivers/scsi/cxlflash/main.c
+++ b/drivers/scsi/cxlflash/main.c
@@ -19,8 +19,6 @@
 
 #include <asm/unaligned.h>
 
-#include <misc/cxl.h>
-
 #include <scsi/scsi_cmnd.h>
 #include <scsi/scsi_host.h>
 #include <uapi/scsi/cxlflash_ioctl.h>
diff --git a/drivers/scsi/cxlflash/main.h b/drivers/scsi/cxlflash/main.h
index ed4908e..2a39778 100644
--- a/drivers/scsi/cxlflash/main.h
+++ b/drivers/scsi/cxlflash/main.h
@@ -107,10 +107,15 @@ cxlflash_assign_ops(struct dev_dependent_vals *ddv)
 {
 	const struct cxlflash_backend_ops *ops = NULL;
 
+#ifdef CONFIG_OCXL
 	if (ddv->flags & CXLFLASH_OCXL_DEV)
 		ops = &cxlflash_ocxl_ops;
+#endif
+
+#ifdef CONFIG_CXL
 	if (!(ddv->flags & CXLFLASH_OCXL_DEV))
 		ops = &cxlflash_cxl_ops;
+#endif
 
 	return ops;
 }
diff --git a/drivers/scsi/cxlflash/superpipe.c b/drivers/scsi/cxlflash/superpipe.c
index 5ba6e62..e489d89 100644
--- a/drivers/scsi/cxlflash/superpipe.c
+++ b/drivers/scsi/cxlflash/superpipe.c
@@ -14,8 +14,9 @@
 
 #include <linux/delay.h>
 #include <linux/file.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
 #include <linux/syscalls.h>
-#include <misc/cxl.h>
 #include <asm/unaligned.h>
 
 #include <scsi/scsi.h>
diff --git a/drivers/scsi/cxlflash/vlun.c b/drivers/scsi/cxlflash/vlun.c
index 5deef57..66e445a 100644
--- a/drivers/scsi/cxlflash/vlun.c
+++ b/drivers/scsi/cxlflash/vlun.c
@@ -12,8 +12,9 @@
  * 2 of the License, or (at your option) any later version.
  */
 
+#include <linux/interrupt.h>
+#include <linux/pci.h>
 #include <linux/syscalls.h>
-#include <misc/cxl.h>
 #include <asm/unaligned.h>
 #include <asm/bitsperlong.h>
 
-- 
2.1.0

^ permalink raw reply related

* [PATCH 6/7] cxlflash: Abstract hardware dependent assignments
From: Uma Krishnan @ 2018-05-11 19:06 UTC (permalink / raw)
  To: linux-scsi, James Bottomley, Martin K. Petersen, Matthew R. Ochs,
	Manoj N. Kumar
  Cc: linuxppc-dev, Andrew Donnellan, Frederic Barrat,
	Christophe Lombard
In-Reply-To: <1526065440-38806-1-git-send-email-ukrishn@linux.vnet.ibm.com>

As a staging cleanup to support transport specific builds of the cxlflash
module, relocate device dependent assignments to header files. This will
avoid littering the core driver with conditional compilation logic.

Signed-off-by: Uma Krishnan <ukrishn@linux.vnet.ibm.com>
---
 drivers/scsi/cxlflash/main.c |  7 ++-----
 drivers/scsi/cxlflash/main.h | 15 +++++++++++++++
 2 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/drivers/scsi/cxlflash/main.c b/drivers/scsi/cxlflash/main.c
index c91e912..cd7dcc5 100644
--- a/drivers/scsi/cxlflash/main.c
+++ b/drivers/scsi/cxlflash/main.c
@@ -3708,11 +3708,8 @@ static int cxlflash_probe(struct pci_dev *pdev,
 	cfg->init_state = INIT_STATE_NONE;
 	cfg->dev = pdev;
 	cfg->cxl_fops = cxlflash_cxl_fops;
-
-	if (ddv->flags & CXLFLASH_OCXL_DEV)
-		cfg->ops = &cxlflash_ocxl_ops;
-	else
-		cfg->ops = &cxlflash_cxl_ops;
+	cfg->ops = cxlflash_assign_ops(ddv);
+	WARN_ON_ONCE(!cfg->ops);
 
 	/*
 	 * Promoted LUNs move to the top of the LUN table. The rest stay on
diff --git a/drivers/scsi/cxlflash/main.h b/drivers/scsi/cxlflash/main.h
index 6f1be62..ed4908e 100644
--- a/drivers/scsi/cxlflash/main.h
+++ b/drivers/scsi/cxlflash/main.h
@@ -20,6 +20,8 @@
 #include <scsi/scsi.h>
 #include <scsi/scsi_device.h>
 
+#include "backend.h"
+
 #define CXLFLASH_NAME		"cxlflash"
 #define CXLFLASH_ADAPTER_NAME	"IBM POWER CXL Flash Adapter"
 #define CXLFLASH_MAX_ADAPTERS	32
@@ -100,6 +102,19 @@ struct dev_dependent_vals {
 #define CXLFLASH_OCXL_DEV		0x0000000000000004ULL
 };
 
+static inline const struct cxlflash_backend_ops *
+cxlflash_assign_ops(struct dev_dependent_vals *ddv)
+{
+	const struct cxlflash_backend_ops *ops = NULL;
+
+	if (ddv->flags & CXLFLASH_OCXL_DEV)
+		ops = &cxlflash_ocxl_ops;
+	if (!(ddv->flags & CXLFLASH_OCXL_DEV))
+		ops = &cxlflash_cxl_ops;
+
+	return ops;
+}
+
 struct asyc_intr_info {
 	u64 status;
 	char *desc;
-- 
2.1.0

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox