LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 4/7] powerpc/64s/radix: Allow mm_cpumask trimming from external sources
From: Nicholas Piggin @ 2020-12-17 13:47 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Nicholas Piggin
In-Reply-To: <20201217134731.488135-1-npiggin@gmail.com>

mm_cpumask trimming is currently restricted to be issued by the current
thread of a single-threaded mm. This patch relaxes that and allows the
mask to be trimmed from any context.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/mm/book3s64/radix_tlb.c | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c
index 5b62e2e7371c..7b199bee4baa 100644
--- a/arch/powerpc/mm/book3s64/radix_tlb.c
+++ b/arch/powerpc/mm/book3s64/radix_tlb.c
@@ -657,20 +657,16 @@ static void do_exit_flush_lazy_tlb(void *arg)
 	}
 
 	/*
-	 * This IPI is only initiated from a CPU which is running mm which
-	 * is a single-threaded process, so there will not be another racing
-	 * IPI coming in where we would find our cpumask already clear.
-	 *
-	 * Nothing else clears our bit in the cpumask except CPU offlining,
-	 * in which case we should not be taking IPIs here. However check
-	 * this just in case the logic is wrong somewhere, and don't underflow
-	 * the active_cpus count.
+	 * This IPI may be initiated from any source including those not
+	 * running the mm, so there may be a racing IPI that comes after
+	 * this one which finds the cpumask already clear. Check and avoid
+	 * underflowing the active_cpus count in that case. The race should
+	 * not otherwise be a problem, but the TLB must be flushed because
+	 * that's what the caller expects.
 	 */
 	if (cpumask_test_cpu(cpu, mm_cpumask(mm))) {
 		atomic_dec(&mm->context.active_cpus);
 		cpumask_clear_cpu(cpu, mm_cpumask(mm));
-	} else {
-		WARN_ON_ONCE(1);
 	}
 
 out_flush:
-- 
2.23.0


^ permalink raw reply related

* [PATCH 5/7] powerpc/64s/radix: occasionally attempt to trim mm_cpumask
From: Nicholas Piggin @ 2020-12-17 13:47 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Nicholas Piggin
In-Reply-To: <20201217134731.488135-1-npiggin@gmail.com>

A single-threaded process that is flushing its own address space is
so far the only case where the mm_cpumask is attempted to be trimmed.
This patch expands that to flush in other situations, multi-threaded
processes and external sources. For now it's a relatively simple
occasional trim attempt. The main aim is to add the mechanism,
tweaking and tuning can come with more data.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/mm/book3s64/radix_tlb.c | 60 ++++++++++++++++++++++++++--
 1 file changed, 56 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c
index 7b199bee4baa..4dca7cbf07e9 100644
--- a/arch/powerpc/mm/book3s64/radix_tlb.c
+++ b/arch/powerpc/mm/book3s64/radix_tlb.c
@@ -630,10 +630,8 @@ static bool mm_needs_flush_escalation(struct mm_struct *mm)
 	return false;
 }
 
-#ifdef CONFIG_SMP
-static void do_exit_flush_lazy_tlb(void *arg)
+static void exit_lazy_flush_tlb(struct mm_struct *mm)
 {
-	struct mm_struct *mm = arg;
 	unsigned long pid = mm->context.id;
 	int cpu = smp_processor_id();
 
@@ -673,6 +671,13 @@ static void do_exit_flush_lazy_tlb(void *arg)
 	_tlbiel_pid(pid, RIC_FLUSH_ALL);
 }
 
+#ifdef CONFIG_SMP
+static void do_exit_flush_lazy_tlb(void *arg)
+{
+	struct mm_struct *mm = arg;
+	exit_lazy_flush_tlb(mm);
+}
+
 static void exit_flush_lazy_tlbs(struct mm_struct *mm)
 {
 	/*
@@ -685,10 +690,32 @@ static void exit_flush_lazy_tlbs(struct mm_struct *mm)
 	smp_call_function_many(mm_cpumask(mm), do_exit_flush_lazy_tlb,
 				(void *)mm, 1);
 }
+
 #else /* CONFIG_SMP */
 static inline void exit_flush_lazy_tlbs(struct mm_struct *mm) { }
 #endif /* CONFIG_SMP */
 
+static DEFINE_PER_CPU(unsigned int, mm_cpumask_trim_clock);
+
+/*
+ * Interval between flushes at which we send out IPIs to check whether the
+ * mm_cpumask can be trimmed for the case where it's not a single-threaded
+ * process flushing its own mm. The intent is to reduce the cost of later
+ * flushes. Don't want this to be so low that it adds noticable cost to TLB
+ * flushing, or so high that it doesn't help reduce global TLBIEs.
+ */
+static unsigned long tlb_mm_cpumask_trim_timer = 1073;
+
+static bool tick_and_test_trim_clock(void)
+{
+	if (__this_cpu_inc_return(mm_cpumask_trim_clock) ==
+			tlb_mm_cpumask_trim_timer) {
+		__this_cpu_write(mm_cpumask_trim_clock, 0);
+		return true;
+	}
+	return false;
+}
+
 enum tlb_flush_type {
 	FLUSH_TYPE_NONE,
 	FLUSH_TYPE_LOCAL,
@@ -702,8 +729,20 @@ static enum tlb_flush_type flush_type_needed(struct mm_struct *mm, bool fullmm)
 
 	if (active_cpus == 0)
 		return FLUSH_TYPE_NONE;
-	if (active_cpus == 1 && cpumask_test_cpu(cpu, mm_cpumask(mm)))
+	if (active_cpus == 1 && cpumask_test_cpu(cpu, mm_cpumask(mm))) {
+		if (current->mm != mm) {
+			/*
+			 * Asynchronous flush sources may trim down to nothing
+			 * if the process is not running, so occasionally try
+			 * to trim.
+			 */
+			if (tick_and_test_trim_clock()) {
+				exit_lazy_flush_tlb(mm);
+				return FLUSH_TYPE_NONE;
+			}
+		}
 		return FLUSH_TYPE_LOCAL;
+	}
 
 	/* Coprocessors require TLBIE to invalidate nMMU. */
 	if (atomic_read(&mm->context.copros) > 0)
@@ -735,6 +774,19 @@ static enum tlb_flush_type flush_type_needed(struct mm_struct *mm, bool fullmm)
 		return FLUSH_TYPE_LOCAL;
 	}
 
+	/*
+	 * Occasionally try to trim down the cpumask. It's possible this can
+	 * bring the mask to zero, which results in no flush.
+	 */
+	if (tick_and_test_trim_clock()) {
+		exit_flush_lazy_tlbs(mm);
+		if (current->mm == mm)
+			return FLUSH_TYPE_LOCAL;
+		if (cpumask_test_cpu(cpu, mm_cpumask(mm)))
+			exit_lazy_flush_tlb(mm);
+		return FLUSH_TYPE_NONE;
+	}
+
 	return FLUSH_TYPE_GLOBAL;
 }
 
-- 
2.23.0


^ permalink raw reply related

* [PATCH 6/7] powerpc/64s/radix: serialize_against_pte_lookup IPIs trim mm_cpumask
From: Nicholas Piggin @ 2020-12-17 13:47 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Nicholas Piggin
In-Reply-To: <20201217134731.488135-1-npiggin@gmail.com>

serialize_against_pte_lookup() performs IPIs to all CPUs in mm_cpumask.
Take this opportunity to try trim the CPU out of mm_cpumask. This can
reduce the cost of future serialize_against_pte_lookup() and/or the
cost of future TLB flushes.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/mm/book3s64/pgtable.c   | 13 ++++++++++---
 arch/powerpc/mm/book3s64/radix_tlb.c | 20 +++++++++++++-------
 2 files changed, 23 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c
index e18ae50a275c..ec23faf102b2 100644
--- a/arch/powerpc/mm/book3s64/pgtable.c
+++ b/arch/powerpc/mm/book3s64/pgtable.c
@@ -79,10 +79,17 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
 	return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd));
 }
 
-static void do_nothing(void *unused)
-{
+void exit_lazy_flush_tlb(struct mm_struct *mm, bool always_flush);
 
+static void do_serialize(void *arg)
+{
+	/* We've taken the IPI, so try to trim the mask while here */
+	if (radix_enabled()) {
+		struct mm_struct *mm = arg;
+		exit_lazy_flush_tlb(mm, false);
+	}
 }
+
 /*
  * Serialize against find_current_mm_pte which does lock-less
  * lookup in page tables with local interrupts disabled. For huge pages
@@ -96,7 +103,7 @@ static void do_nothing(void *unused)
 void serialize_against_pte_lookup(struct mm_struct *mm)
 {
 	smp_mb();
-	smp_call_function_many(mm_cpumask(mm), do_nothing, NULL, 1);
+	smp_call_function_many(mm_cpumask(mm), do_serialize, mm, 1);
 }
 
 /*
diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c
index 4dca7cbf07e9..d04c80d6f52c 100644
--- a/arch/powerpc/mm/book3s64/radix_tlb.c
+++ b/arch/powerpc/mm/book3s64/radix_tlb.c
@@ -630,7 +630,11 @@ static bool mm_needs_flush_escalation(struct mm_struct *mm)
 	return false;
 }
 
-static void exit_lazy_flush_tlb(struct mm_struct *mm)
+/*
+ * If always_flush is true, then flush even if this CPU can't be removed
+ * from mm_cpumask.
+ */
+void exit_lazy_flush_tlb(struct mm_struct *mm, bool always_flush)
 {
 	unsigned long pid = mm->context.id;
 	int cpu = smp_processor_id();
@@ -643,7 +647,7 @@ static void exit_lazy_flush_tlb(struct mm_struct *mm)
 	 * done with interrupts off.
 	 */
 	if (current->mm == mm)
-		goto out_flush;
+		goto out;
 
 	if (current->active_mm == mm) {
 		WARN_ON_ONCE(current->mm != NULL);
@@ -665,17 +669,19 @@ static void exit_lazy_flush_tlb(struct mm_struct *mm)
 	if (cpumask_test_cpu(cpu, mm_cpumask(mm))) {
 		atomic_dec(&mm->context.active_cpus);
 		cpumask_clear_cpu(cpu, mm_cpumask(mm));
+		always_flush = true;
 	}
 
-out_flush:
-	_tlbiel_pid(pid, RIC_FLUSH_ALL);
+out:
+	if (always_flush)
+		_tlbiel_pid(pid, RIC_FLUSH_ALL);
 }
 
 #ifdef CONFIG_SMP
 static void do_exit_flush_lazy_tlb(void *arg)
 {
 	struct mm_struct *mm = arg;
-	exit_lazy_flush_tlb(mm);
+	exit_lazy_flush_tlb(mm, true);
 }
 
 static void exit_flush_lazy_tlbs(struct mm_struct *mm)
@@ -737,7 +743,7 @@ static enum tlb_flush_type flush_type_needed(struct mm_struct *mm, bool fullmm)
 			 * to trim.
 			 */
 			if (tick_and_test_trim_clock()) {
-				exit_lazy_flush_tlb(mm);
+				exit_lazy_flush_tlb(mm, true);
 				return FLUSH_TYPE_NONE;
 			}
 		}
@@ -783,7 +789,7 @@ static enum tlb_flush_type flush_type_needed(struct mm_struct *mm, bool fullmm)
 		if (current->mm == mm)
 			return FLUSH_TYPE_LOCAL;
 		if (cpumask_test_cpu(cpu, mm_cpumask(mm)))
-			exit_lazy_flush_tlb(mm);
+			exit_lazy_flush_tlb(mm, true);
 		return FLUSH_TYPE_NONE;
 	}
 
-- 
2.23.0


^ permalink raw reply related

* [PATCH 7/7] powerpc/64s: Implement ptep_clear_flush_young that does not flush TLBs
From: Nicholas Piggin @ 2020-12-17 13:47 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Nicholas Piggin
In-Reply-To: <20201217134731.488135-1-npiggin@gmail.com>

Similarly to the x86 commit b13b1d2d8692 ("x86/mm: In the PTE swapout
page reclaim case clear the accessed bit instead of flushing the TLB"),
implement ptep_clear_flush_young that does not actually flush the TLB
in the case the referenced bit is cleared.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/include/asm/book3s/64/pgtable.h | 23 +++++++++++++++++---
 1 file changed, 20 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index cd3feeac6e87..751f98a40aca 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -388,11 +388,28 @@ static inline int __ptep_test_and_clear_young(struct mm_struct *mm,
 #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
 #define ptep_test_and_clear_young(__vma, __addr, __ptep)	\
 ({								\
-	int __r;						\
-	__r = __ptep_test_and_clear_young((__vma)->vm_mm, __addr, __ptep); \
-	__r;							\
+	__ptep_test_and_clear_young((__vma)->vm_mm, __addr, __ptep); \
 })
 
+/*
+ * On Book3S CPUs, clearing the accessed bit without a TLB flush
+ * doesn't cause data corruption. [ It could cause incorrect
+ * page aging and the (mistaken) reclaim of hot pages, but the
+ * chance of that should be relatively low. ]
+ *
+ * So as a performance optimization don't flush the TLB when
+ * clearing the accessed bit, it will eventually be flushed by
+ * a context switch or a VM operation anyway. [ In the rare
+ * event of it not getting flushed for a long time the delay
+ * shouldn't really matter because there's no real memory
+ * pressure for swapout to react to. ]
+ */
+#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
+#define ptep_clear_flush_young ptep_test_and_clear_young
+
+#define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH
+#define pmdp_clear_flush_young pmdp_test_and_clear_young
+
 static inline int __pte_write(pte_t pte)
 {
 	return !!(pte_raw(pte) & cpu_to_be64(_PAGE_WRITE));
-- 
2.23.0


^ permalink raw reply related

* Re: powerpc VDSO files being unnecessarily rebuilt
From: Masahiro Yamada @ 2020-12-17 16:40 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: linuxppc-dev, Linux Kbuild mailing list
In-Reply-To: <CAK7LNASTXyxhLzH7kRyAKCixe6ksJaKPiuPxWnsYZ6NJVCWDhQ@mail.gmail.com>

On Thu, Dec 17, 2020 at 6:23 PM Masahiro Yamada <masahiroy@kernel.org> wrote:
>
> On Thu, Dec 17, 2020 at 11:56 AM Michael Ellerman <mpe@ellerman.id.au> wrote:
> >
> > Hi all,
> >
> > Since the merge of the C VDSO I see we are repeatedly rebuilding some
> > files in the VDSO, eg:
> >
> >   $ make V=2
> >   make[1]: Entering directory '/home/michael/linux/build~'
> >     GEN     Makefile
> >     CALL    /home/michael/linux/scripts/checksyscalls.sh - due to target missing
> >     CALL    /home/michael/linux/scripts/atomic/check-atomics.sh - due to target missing
> >     CHK     include/generated/compile.h
> >     CC      arch/powerpc/kernel/vdso64/vgettimeofday.o - due to vgettimeofday.o not in $(targets)
> >
> > This then causes multiple other files to be rebuilt.
> >
> > So the obvious fix is to add it to targets:
> >
> > diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile
> > index d365810a689a..5386532866ce 100644
> > --- a/arch/powerpc/kernel/vdso64/Makefile
> > +++ b/arch/powerpc/kernel/vdso64/Makefile
> > @@ -5,6 +5,7 @@ ARCH_REL_TYPE_ABS := R_PPC_JUMP_SLOT|R_PPC_GLOB_DAT|R_PPC_ADDR32|R_PPC_ADDR24|R_
> >  include $(srctree)/lib/vdso/Makefile
> >
> >  obj-vdso64 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o getcpu.o
> > +targets := $(obj-vdso64) vdso64.so.dbg
> >
> >  ifneq ($(c-gettimeofday-y),)
> >    CFLAGS_vgettimeofday.o += -include $(c-gettimeofday-y)
> > @@ -13,11 +14,11 @@ ifneq ($(c-gettimeofday-y),)
> >    CFLAGS_vgettimeofday.o += -DDISABLE_BRANCH_PROFILING
> >    CFLAGS_vgettimeofday.o += -ffreestanding -fasynchronous-unwind-tables
> >    CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE)
> > +  targets += vgettimeofday.o
> >  endif
> >
> >  # Build rules
> >
> > -targets := $(obj-vdso64) vdso64.so.dbg
> >  obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64))
> >
> >  GCOV_PROFILE := n
> >
> >
> > But then I see it still rebuilt:
> >
> >   CC      arch/powerpc/kernel/vdso64/vgettimeofday.o - due to command line change
> >
> >
> > I'm not changing the command line, and AFAICS the .cmd file is not
> > changing either:
> >
> >   $ make V=2
> >   ...
> >     CC      arch/powerpc/kernel/vdso64/vgettimeofday.o - due to command line change
> >
> >   $ sha256sum build\~/arch/powerpc/kernel/vdso64/vgettimeofday.o
> >   7f635546bc2768c7b929d3de1724d83285f3cd54394fcd7104f8b1301d689d65  build~/arch/powerpc/kernel/vdso64/vgettimeofday.o
> >
> >   $ make V=2
> >   ...
> >     CC      arch/powerpc/kernel/vdso64/vgettimeofday.o - due to command line change
> >
> >   $ sha256sum build\~/arch/powerpc/kernel/vdso64/vgettimeofday.o
> >   7f635546bc2768c7b929d3de1724d83285f3cd54394fcd7104f8b1301d689d65  build~/arch/powerpc/kernel/vdso64/vgettimeofday.o
> >
> >
> > So any hints on what I'm missing here?
> >
> > cheers
>
>
> This is because PPC builds the vdso twice
> with different command arguments.
>
>
> First time:
>
> vdso_prepare: prepare0
>          $(if $(CONFIG_VDSO32),$(Q)$(MAKE) \
>                  $(build)=arch/powerpc/kernel/vdso32
> include/generated/vdso32-offsets.h)
>          $(if $(CONFIG_PPC64),$(Q)$(MAKE) \
>                  $(build)=arch/powerpc/kernel/vdso64
> include/generated/vdso64-offsets.h)
>
>
> Second time:
>    from  arch/powerpc/kernel/Makefile
>
>
>
>
>
> For the first build, -Werror is missing because
> Kbuild directly descends into arch/powerpc/kernel/vdso[32,64]/.
>
>
> For the second build,
>
> arch/powerpc/Kbuild appends the following:
>
> subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
>


This is very dangerous because the vdso offsets are wrong.

I think ppc followed arm64 implementation.

Let me figure out how to fix arm64 vdso.



-- 
Best Regards
Masahiro Yamada

^ permalink raw reply

* Re: [PATCH 6/7] powerpc/64s/radix: serialize_against_pte_lookup IPIs trim mm_cpumask
From: kernel test robot @ 2020-12-17 18:23 UTC (permalink / raw)
  To: Nicholas Piggin, linuxppc-dev; +Cc: kbuild-all, Nicholas Piggin
In-Reply-To: <20201217134731.488135-7-npiggin@gmail.com>

[-- Attachment #1: Type: text/plain, Size: 3681 bytes --]

Hi Nicholas,

I love your patch! Yet something to improve:

[auto build test ERROR on powerpc/next]
[also build test ERROR on v5.10 next-20201217]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Nicholas-Piggin/powerpc-64s-TLB-flushing-improvements/20201217-220230
base:   https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next
config: powerpc-randconfig-m031-20201217 (attached as .config)
compiler: powerpc64le-linux-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/0day-ci/linux/commit/5bd02bea3da4e21fa00729e7687fc93a5c653e2b
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Nicholas-Piggin/powerpc-64s-TLB-flushing-improvements/20201217-220230
        git checkout 5bd02bea3da4e21fa00729e7687fc93a5c653e2b
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=powerpc 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

>> arch/powerpc/mm/book3s64/radix_tlb.c:646:6: error: no previous prototype for 'exit_lazy_flush_tlb' [-Werror=missing-prototypes]
     646 | void exit_lazy_flush_tlb(struct mm_struct *mm, bool always_flush)
         |      ^~~~~~~~~~~~~~~~~~~
   cc1: all warnings being treated as errors


vim +/exit_lazy_flush_tlb +646 arch/powerpc/mm/book3s64/radix_tlb.c

   641	
   642	/*
   643	 * If always_flush is true, then flush even if this CPU can't be removed
   644	 * from mm_cpumask.
   645	 */
 > 646	void exit_lazy_flush_tlb(struct mm_struct *mm, bool always_flush)
   647	{
   648		unsigned long pid = mm->context.id;
   649		int cpu = smp_processor_id();
   650	
   651		/*
   652		 * A kthread could have done a mmget_not_zero() after the flushing CPU
   653		 * checked mm_cpumask, and be in the process of kthread_use_mm when
   654		 * interrupted here. In that case, current->mm will be set to mm,
   655		 * because kthread_use_mm() setting ->mm and switching to the mm is
   656		 * done with interrupts off.
   657		 */
   658		if (current->mm == mm)
   659			goto out;
   660	
   661		if (current->active_mm == mm) {
   662			WARN_ON_ONCE(current->mm != NULL);
   663			/* Is a kernel thread and is using mm as the lazy tlb */
   664			mmgrab(&init_mm);
   665			current->active_mm = &init_mm;
   666			switch_mm_irqs_off(mm, &init_mm, current);
   667			mmdrop(mm);
   668		}
   669	
   670		/*
   671		 * This IPI may be initiated from any source including those not
   672		 * running the mm, so there may be a racing IPI that comes after
   673		 * this one which finds the cpumask already clear. Check and avoid
   674		 * underflowing the active_cpus count in that case. The race should
   675		 * not otherwise be a problem, but the TLB must be flushed because
   676		 * that's what the caller expects.
   677		 */
   678		if (cpumask_test_cpu(cpu, mm_cpumask(mm))) {
   679			atomic_dec(&mm->context.active_cpus);
   680			cpumask_clear_cpu(cpu, mm_cpumask(mm));
   681			always_flush = true;
   682		}
   683	
   684	out:
   685		if (always_flush)
   686			_tlbiel_pid(pid, RIC_FLUSH_ALL);
   687	}
   688	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 40559 bytes --]

^ permalink raw reply

* Re: [GIT PULL] Please pull powerpc/linux.git powerpc-5.11-1 tag
From: pr-tracker-bot @ 2020-12-17 21:44 UTC (permalink / raw)
  To: Michael Ellerman
  Cc: ego, clombard, david, aik, jniethe5, bala24, oohall, morbo, ardb,
	srikar, leobras.c, maddy, aneesh.kumar, miaoqinglang,
	vincent.stehle, tiwai, ganeshgr, u.kleine-koenig, harish, longman,
	nathanl, ravi.bangoria, ajd, amodra, linuxppc-dev, npiggin, oss,
	mathieu.desnoyers, clg, colin.king, ldufour, tangyouling,
	po-hsu.lin, dja, atrajeev, Kees Cook, zhangxiaoxu5, oleg, tyreld,
	linux-kernel, fbarrat, kaixuxia, Linus Torvalds
In-Reply-To: <87r1noy325.fsf@mpe.ellerman.id.au>

The pull request you sent on Fri, 18 Dec 2020 00:28:34 +1100:

> https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git tags/powerpc-5.11-1

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/8a5be36b9303ae167468d4f5e1b3c090b9981396

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/prtracker.html

^ permalink raw reply

* [powerpc:next] BUILD SUCCESS c1bea0a840ac75dca19bc6aa05575a33eb9fd058
From: kernel test robot @ 2020-12-17 22:28 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: linuxppc-dev

tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git  next
branch HEAD: c1bea0a840ac75dca19bc6aa05575a33eb9fd058  powerpc/32s: Fix cleanup_cpu_mmu_context() compile bug

elapsed time: 962m

configs tested: 170
configs skipped: 3

The following configs have been built successfully.
More configs may be tested in the coming days.

gcc tested configs:
arm                                 defconfig
arm64                            allyesconfig
arm64                               defconfig
arm                              allyesconfig
arm                              allmodconfig
arm                       omap2plus_defconfig
sh                                  defconfig
powerpc                 mpc8540_ads_defconfig
arm                        vexpress_defconfig
arm                        cerfcube_defconfig
h8300                     edosk2674_defconfig
arm                          pxa3xx_defconfig
arm                     eseries_pxa_defconfig
mips                         bigsur_defconfig
mips                          ath25_defconfig
arm                          simpad_defconfig
powerpc                      chrp32_defconfig
arm                           corgi_defconfig
powerpc                     tqm5200_defconfig
riscv                            alldefconfig
arm                      footbridge_defconfig
powerpc                     tqm8540_defconfig
c6x                        evmc6457_defconfig
sh                            shmin_defconfig
xtensa                       common_defconfig
arm                           sunxi_defconfig
sh                      rts7751r2d1_defconfig
arm                           h3600_defconfig
m68k                        stmark2_defconfig
sh                        sh7757lcr_defconfig
powerpc                      tqm8xx_defconfig
powerpc                 mpc834x_itx_defconfig
m68k                             alldefconfig
arm                           h5000_defconfig
powerpc                     akebono_defconfig
c6x                        evmc6678_defconfig
microblaze                      mmu_defconfig
powerpc                     skiroot_defconfig
mips                  cavium_octeon_defconfig
arm                       mainstone_defconfig
sh                   sh7724_generic_defconfig
arm                           sama5_defconfig
h8300                    h8300h-sim_defconfig
arm                         orion5x_defconfig
arm                         axm55xx_defconfig
arm                          prima2_defconfig
arm                         s5pv210_defconfig
arm                        oxnas_v6_defconfig
arm                         s3c6400_defconfig
powerpc                     tqm8555_defconfig
arm                        multi_v5_defconfig
powerpc                 mpc8315_rdb_defconfig
xtensa                         virt_defconfig
mips                          malta_defconfig
arm                         lubbock_defconfig
arm                          iop32x_defconfig
arm                         lpc32xx_defconfig
h8300                       h8s-sim_defconfig
arc                 nsimosci_hs_smp_defconfig
mips                      bmips_stb_defconfig
powerpc                   currituck_defconfig
s390                       zfcpdump_defconfig
powerpc                    ge_imp3a_defconfig
mips                      pistachio_defconfig
powerpc64                        alldefconfig
arm                  colibri_pxa300_defconfig
m68k                        m5407c3_defconfig
powerpc                       ppc64_defconfig
arm                         assabet_defconfig
sh                          lboxre2_defconfig
m68k                         apollo_defconfig
powerpc                     pseries_defconfig
m68k                       bvme6000_defconfig
powerpc                       eiger_defconfig
arm                         s3c2410_defconfig
arm                          pxa168_defconfig
mips                      fuloong2e_defconfig
powerpc                      cm5200_defconfig
sh                        apsh4ad0a_defconfig
arm                          ep93xx_defconfig
m68k                           sun3_defconfig
powerpc                      pcm030_defconfig
powerpc                     powernv_defconfig
arm                          tango4_defconfig
arm                            xcep_defconfig
arm                   milbeaut_m10v_defconfig
arm                           spitz_defconfig
xtensa                          iss_defconfig
powerpc                           allnoconfig
arm                         cm_x300_defconfig
sh                             espt_defconfig
c6x                        evmc6472_defconfig
ia64                                defconfig
sparc                            allyesconfig
arm                        spear6xx_defconfig
riscv                    nommu_virt_defconfig
powerpc                     stx_gp3_defconfig
sh                   rts7751r2dplus_defconfig
powerpc                      pasemi_defconfig
mips                        omega2p_defconfig
mips                         mpc30x_defconfig
mips                       bmips_be_defconfig
mips                         cobalt_defconfig
ia64                             allmodconfig
ia64                             allyesconfig
m68k                             allmodconfig
m68k                                defconfig
m68k                             allyesconfig
nios2                               defconfig
arc                              allyesconfig
nds32                             allnoconfig
c6x                              allyesconfig
nds32                               defconfig
nios2                            allyesconfig
csky                                defconfig
alpha                               defconfig
alpha                            allyesconfig
xtensa                           allyesconfig
h8300                            allyesconfig
arc                                 defconfig
sh                               allmodconfig
parisc                              defconfig
s390                             allyesconfig
parisc                           allyesconfig
s390                                defconfig
i386                             allyesconfig
sparc                               defconfig
i386                               tinyconfig
i386                                defconfig
mips                             allyesconfig
mips                             allmodconfig
powerpc                          allyesconfig
powerpc                          allmodconfig
x86_64               randconfig-a003-20201217
x86_64               randconfig-a006-20201217
x86_64               randconfig-a002-20201217
x86_64               randconfig-a005-20201217
x86_64               randconfig-a004-20201217
x86_64               randconfig-a001-20201217
i386                 randconfig-a001-20201217
i386                 randconfig-a004-20201217
i386                 randconfig-a003-20201217
i386                 randconfig-a002-20201217
i386                 randconfig-a006-20201217
i386                 randconfig-a005-20201217
i386                 randconfig-a014-20201217
i386                 randconfig-a013-20201217
i386                 randconfig-a012-20201217
i386                 randconfig-a011-20201217
i386                 randconfig-a015-20201217
i386                 randconfig-a016-20201217
riscv                    nommu_k210_defconfig
riscv                            allyesconfig
riscv                             allnoconfig
riscv                               defconfig
riscv                          rv32_defconfig
riscv                            allmodconfig
x86_64                                   rhel
x86_64                           allyesconfig
x86_64                    rhel-7.6-kselftests
x86_64                              defconfig
x86_64                               rhel-8.3
x86_64                      rhel-8.3-kbuiltin
x86_64                                  kexec

clang tested configs:
x86_64               randconfig-a016-20201217
x86_64               randconfig-a012-20201217
x86_64               randconfig-a013-20201217
x86_64               randconfig-a015-20201217
x86_64               randconfig-a014-20201217
x86_64               randconfig-a011-20201217

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

^ permalink raw reply

* [powerpc:merge] BUILD SUCCESS 3ae03872115a0a158508ee5a91337648aede547d
From: kernel test robot @ 2020-12-17 22:28 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: linuxppc-dev

tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git  merge
branch HEAD: 3ae03872115a0a158508ee5a91337648aede547d  Automatic merge of 'next' into merge (2020-12-17 14:33)

elapsed time: 964m

configs tested: 129
configs skipped: 3

The following configs have been built successfully.
More configs may be tested in the coming days.

gcc tested configs:
arm                                 defconfig
arm64                            allyesconfig
arm64                               defconfig
arm                              allyesconfig
arm                              allmodconfig
arm                       omap2plus_defconfig
sh                                  defconfig
powerpc                 mpc8540_ads_defconfig
arm                        vexpress_defconfig
arm                        cerfcube_defconfig
c6x                        evmc6457_defconfig
sh                            shmin_defconfig
xtensa                       common_defconfig
arm                           sunxi_defconfig
arm                            lart_defconfig
mips                    maltaup_xpa_defconfig
arm                          badge4_defconfig
mips                            e55_defconfig
sh                           se7751_defconfig
powerpc                 mpc837x_mds_defconfig
sh                      rts7751r2d1_defconfig
arm                           h3600_defconfig
m68k                        stmark2_defconfig
sh                        sh7757lcr_defconfig
mips                  cavium_octeon_defconfig
arm                       mainstone_defconfig
sh                   sh7724_generic_defconfig
arm                           sama5_defconfig
h8300                    h8300h-sim_defconfig
arm                         orion5x_defconfig
xtensa                         virt_defconfig
mips                          malta_defconfig
arm                         lubbock_defconfig
arm                          iop32x_defconfig
arm                         lpc32xx_defconfig
h8300                       h8s-sim_defconfig
arc                 nsimosci_hs_smp_defconfig
mips                      bmips_stb_defconfig
powerpc                   currituck_defconfig
powerpc                     akebono_defconfig
mips                      fuloong2e_defconfig
powerpc                      cm5200_defconfig
sh                        apsh4ad0a_defconfig
arm                          ep93xx_defconfig
m68k                           sun3_defconfig
arm                            xcep_defconfig
arm                   milbeaut_m10v_defconfig
arm                           spitz_defconfig
xtensa                          iss_defconfig
arm                        spear6xx_defconfig
arm                      footbridge_defconfig
powerpc                     stx_gp3_defconfig
sh                   rts7751r2dplus_defconfig
mips                        workpad_defconfig
sparc                       sparc64_defconfig
mips                        nlm_xlr_defconfig
microblaze                      mmu_defconfig
powerpc                      makalu_defconfig
ia64                             allmodconfig
ia64                                defconfig
ia64                             allyesconfig
m68k                             allmodconfig
m68k                                defconfig
m68k                             allyesconfig
nios2                               defconfig
arc                              allyesconfig
nds32                             allnoconfig
c6x                              allyesconfig
nds32                               defconfig
nios2                            allyesconfig
csky                                defconfig
alpha                               defconfig
alpha                            allyesconfig
xtensa                           allyesconfig
h8300                            allyesconfig
arc                                 defconfig
sh                               allmodconfig
parisc                              defconfig
s390                             allyesconfig
parisc                           allyesconfig
s390                                defconfig
i386                             allyesconfig
sparc                            allyesconfig
sparc                               defconfig
i386                               tinyconfig
i386                                defconfig
mips                             allyesconfig
mips                             allmodconfig
powerpc                          allyesconfig
powerpc                          allmodconfig
powerpc                           allnoconfig
x86_64               randconfig-a003-20201217
x86_64               randconfig-a006-20201217
x86_64               randconfig-a002-20201217
x86_64               randconfig-a005-20201217
x86_64               randconfig-a004-20201217
x86_64               randconfig-a001-20201217
i386                 randconfig-a001-20201217
i386                 randconfig-a004-20201217
i386                 randconfig-a003-20201217
i386                 randconfig-a002-20201217
i386                 randconfig-a006-20201217
i386                 randconfig-a005-20201217
i386                 randconfig-a014-20201217
i386                 randconfig-a013-20201217
i386                 randconfig-a012-20201217
i386                 randconfig-a011-20201217
i386                 randconfig-a015-20201217
i386                 randconfig-a016-20201217
riscv                    nommu_k210_defconfig
riscv                            allyesconfig
riscv                    nommu_virt_defconfig
riscv                             allnoconfig
riscv                               defconfig
riscv                          rv32_defconfig
riscv                            allmodconfig
x86_64                                   rhel
x86_64                           allyesconfig
x86_64                    rhel-7.6-kselftests
x86_64                              defconfig
x86_64                               rhel-8.3
x86_64                      rhel-8.3-kbuiltin
x86_64                                  kexec

clang tested configs:
x86_64               randconfig-a016-20201217
x86_64               randconfig-a012-20201217
x86_64               randconfig-a013-20201217
x86_64               randconfig-a015-20201217
x86_64               randconfig-a014-20201217
x86_64               randconfig-a011-20201217

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

^ permalink raw reply

* Re: [PATCH] powerpc/perf/hv-24x7: Dont create sysfs event files for dummy events
From: Michael Ellerman @ 2020-12-18  0:56 UTC (permalink / raw)
  To: Kajol Jain, linuxppc-dev; +Cc: kjain, suka, maddy, atrajeev
In-Reply-To: <20201217113230.1069882-1-kjain@linux.ibm.com>

Kajol Jain <kjain@linux.ibm.com> writes:
> hv_24x7 performance monitoring unit creates list of supported events
> from the event catalog obtained via HCALL. hv_24x7 catalog could also
> contain invalid or dummy events (with names like FREE_  or CPM_FREE_ so
> on). These events does not have any hardware counters backing them.
> So patch adds a check to string compare the event names to filter
> out them.
>
> Signed-off-by: Kajol Jain <kjain@linux.ibm.com>
> ---
>  arch/powerpc/perf/hv-24x7.c | 8 ++++++++
>  1 file changed, 8 insertions(+)
>
> diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c
> index 6e7e820508df..c3252d8a7818 100644
> --- a/arch/powerpc/perf/hv-24x7.c
> +++ b/arch/powerpc/perf/hv-24x7.c
> @@ -894,6 +894,11 @@ static int create_events_from_catalog(struct attribute ***events_,
>  
>  		name = event_name(event, &nl);
>  
> +		if (strstr(name, "FREE_")) {
> +			pr_info("invalid event %zu (%.*s)\n", event_idx, nl, name);
> +			junk_events++;
> +			continue;

I don't think we want a print for each event, just one at the end saying
"Dropped %d invalid events" would be preferable I think.


> +		}
>  		if (event->event_group_record_len == 0) {
>  			pr_devel("invalid event %zu (%.*s): group_record_len == 0, skipping\n",
>  					event_idx, nl, name);
> @@ -955,6 +960,9 @@ static int create_events_from_catalog(struct attribute ***events_,
>  			continue;
>  
>  		name  = event_name(event, &nl);
> +		if (strstr(name, "FREE_"))
> +			continue;

Would be nice if the string comparison was in a single place, ie. in a
helper function.

cheers

^ permalink raw reply

* [PATCH] powerpc/mm: Limit allocation of SWIOTLB on server machines
From: Thiago Jung Bauermann @ 2020-12-18  6:21 UTC (permalink / raw)
  To: linuxppc-dev
  Cc: Satheesh Rajendran, Ram Pai, linux-kernel, Thiago Jung Bauermann

On server-class POWER machines, we don't need the SWIOTLB unless we're a
secure VM. Nevertheless, if CONFIG_SWIOTLB is enabled we unconditionally
allocate it.

In most cases this is harmless, but on a few machine configurations (e.g.,
POWER9 powernv systems with 4 GB area reserved for crashdump kernel) it can
happen that memblock can't find a 64 MB chunk of memory for the SWIOTLB and
fails with a scary-looking WARN_ONCE:

 ------------[ cut here ]------------
 memblock: bottom-up allocation failed, memory hotremove may be affected
 WARNING: CPU: 0 PID: 0 at mm/memblock.c:332 memblock_find_in_range_node+0x328/0x340
 Modules linked in:
 CPU: 0 PID: 0 Comm: swapper Not tainted 5.10.0-rc2-orig+ #6
 NIP:  c000000000442f38 LR: c000000000442f34 CTR: c0000000001e0080
 REGS: c000000001def900 TRAP: 0700   Not tainted  (5.10.0-rc2-orig+)
 MSR:  9000000002021033 <SF,HV,VEC,ME,IR,DR,RI,LE>  CR: 28022222  XER: 20040000
 CFAR: c00000000014b7b4 IRQMASK: 1
 GPR00: c000000000442f34 c000000001defba0 c000000001deff00 0000000000000047
 GPR04: 00000000ffff7fff c000000001def828 c000000001def820 0000000000000000
 GPR08: 0000001ffc3e0000 c000000001b75478 c000000001b75478 0000000000000001
 GPR12: 0000000000002000 c000000002030000 0000000000000000 0000000000000000
 GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000002030000
 GPR20: 0000000000000000 0000000000010000 0000000000010000 c000000001defc10
 GPR24: c000000001defc08 c000000001c91868 c000000001defc18 c000000001c91890
 GPR28: 0000000000000000 ffffffffffffffff 0000000004000000 00000000ffffffff
 NIP [c000000000442f38] memblock_find_in_range_node+0x328/0x340
 LR [c000000000442f34] memblock_find_in_range_node+0x324/0x340
 Call Trace:
 [c000000001defba0] [c000000000442f34] memblock_find_in_range_node+0x324/0x340 (unreliable)
 [c000000001defc90] [c0000000015ac088] memblock_alloc_range_nid+0xec/0x1b0
 [c000000001defd40] [c0000000015ac1f8] memblock_alloc_internal+0xac/0x110
 [c000000001defda0] [c0000000015ac4d0] memblock_alloc_try_nid+0x94/0xcc
 [c000000001defe30] [c00000000159c3c8] swiotlb_init+0x78/0x104
 [c000000001defea0] [c00000000158378c] mem_init+0x4c/0x98
 [c000000001defec0] [c00000000157457c] start_kernel+0x714/0xac8
 [c000000001deff90] [c00000000000d244] start_here_common+0x1c/0x58
 Instruction dump:
 2c230000 4182ffd4 ea610088 ea810090 4bfffe84 39200001 3d42fff4 3c62ff60
 3863c560 992a8bfc 4bd0881d 60000000 <0fe00000> ea610088 4bfffd94 60000000
 random: get_random_bytes called from __warn+0x128/0x184 with crng_init=0
 ---[ end trace 0000000000000000 ]---
 software IO TLB: Cannot allocate buffer

Unless this is a secure VM the message can actually be ignored, because the
SWIOTLB isn't needed. Therefore, let's avoid the SWIOTLB in those cases.

Fixes: eae9eec476d1 ("powerpc/pseries/svm: Allocate SWIOTLB buffer anywhere in memory")
Signed-off-by: Thiago Jung Bauermann <bauerman@linux.ibm.com>
---
 arch/powerpc/mm/mem.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index afab328d0887..3af991844145 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -300,7 +300,8 @@ void __init mem_init(void)
 	memblock_set_bottom_up(true);
 	if (is_secure_guest())
 		svm_swiotlb_init();
-	else
+	/* Server machines don't need SWIOTLB if they're not secure guests. */
+	else if (!machine_is(pseries) && !machine_is(powernv))
 		swiotlb_init(0);
 #endif
 

^ permalink raw reply related

* [PATCH v2] powerpc/32s: Only build hash code when CONFIG_PPC_BOOK3S_604 is selected
From: Christophe Leroy @ 2020-12-18  6:56 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: linuxppc-dev, linux-kernel

It is now possible to only build book3s/32 kernel for
CPUs without hash table.

Opt out hash related code when CONFIG_PPC_BOOK3S_604 is not selected.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
v2: Rebased
---
 arch/powerpc/kernel/head_book3s_32.S | 12 ++++++++++++
 arch/powerpc/mm/book3s32/Makefile    |  4 +++-
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S
index 349bf3f0c3af..c02024bce544 100644
--- a/arch/powerpc/kernel/head_book3s_32.S
+++ b/arch/powerpc/kernel/head_book3s_32.S
@@ -286,6 +286,7 @@ MachineCheck:
 	DO_KVM  0x300
 DataAccess:
 #ifdef CONFIG_VMAP_STACK
+#ifdef CONFIG_PPC_BOOK3S_604
 BEGIN_MMU_FTR_SECTION
 	mtspr	SPRN_SPRG_SCRATCH2,r10
 	mfspr	r10, SPRN_SPRG_THREAD
@@ -302,12 +303,14 @@ BEGIN_MMU_FTR_SECTION
 MMU_FTR_SECTION_ELSE
 	b	1f
 ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE)
+#endif
 1:	EXCEPTION_PROLOG_0 handle_dar_dsisr=1
 	EXCEPTION_PROLOG_1
 	b	handle_page_fault_tramp_1
 #else	/* CONFIG_VMAP_STACK */
 	EXCEPTION_PROLOG handle_dar_dsisr=1
 	get_and_save_dar_dsisr_on_stack	r4, r5, r11
+#ifdef CONFIG_PPC_BOOK3S_604
 BEGIN_MMU_FTR_SECTION
 	andis.	r0, r5, (DSISR_BAD_FAULT_32S | DSISR_DABRMATCH)@h
 	bne	handle_page_fault_tramp_2	/* if not, try to put a PTE */
@@ -315,8 +318,11 @@ BEGIN_MMU_FTR_SECTION
 	bl	hash_page
 	b	handle_page_fault_tramp_1
 MMU_FTR_SECTION_ELSE
+#endif
 	b	handle_page_fault_tramp_2
+#ifdef CONFIG_PPC_BOOK3S_604
 ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE)
+#endif
 #endif	/* CONFIG_VMAP_STACK */
 
 /* Instruction access exception. */
@@ -332,12 +338,14 @@ InstructionAccess:
 	mfspr	r11, SPRN_SRR1		/* check whether user or kernel */
 	stw	r11, SRR1(r10)
 	mfcr	r10
+#ifdef CONFIG_PPC_BOOK3S_604
 BEGIN_MMU_FTR_SECTION
 	andis.	r11, r11, SRR1_ISI_NOPT@h	/* no pte found? */
 	bne	hash_page_isi
 .Lhash_page_isi_cont:
 	mfspr	r11, SPRN_SRR1		/* check whether user or kernel */
 END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
+#endif
 	andi.	r11, r11, MSR_PR
 
 	EXCEPTION_PROLOG_1
@@ -348,9 +356,11 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
 	beq	1f			/* if so, try to put a PTE */
 	li	r3,0			/* into the hash table */
 	mr	r4,r12			/* SRR0 is fault address */
+#ifdef CONFIG_PPC_BOOK3S_604
 BEGIN_MMU_FTR_SECTION
 	bl	hash_page
 END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
+#endif
 #endif	/* CONFIG_VMAP_STACK */
 1:	mr	r4,r12
 	andis.	r5,r9,DSISR_SRR1_MATCH_32S@h /* Filter relevant SRR1 bits */
@@ -683,6 +693,7 @@ handle_page_fault_tramp_2:
 	EXC_XFER_LITE(0x300, handle_page_fault)
 
 #ifdef CONFIG_VMAP_STACK
+#ifdef CONFIG_PPC_BOOK3S_604
 .macro save_regs_thread		thread
 	stw	r0, THR0(\thread)
 	stw	r3, THR3(\thread)
@@ -754,6 +765,7 @@ fast_hash_page_return:
 	mfspr	r11, SPRN_SPRG_SCRATCH1
 	mfspr	r10, SPRN_SPRG_SCRATCH0
 	rfi
+#endif /* CONFIG_PPC_BOOK3S_604 */
 
 stack_overflow:
 	vmap_stack_overflow_exception
diff --git a/arch/powerpc/mm/book3s32/Makefile b/arch/powerpc/mm/book3s32/Makefile
index 3f972db17761..446d9de88ce4 100644
--- a/arch/powerpc/mm/book3s32/Makefile
+++ b/arch/powerpc/mm/book3s32/Makefile
@@ -6,4 +6,6 @@ ifdef CONFIG_KASAN
 CFLAGS_mmu.o  		+= -DDISABLE_BRANCH_PROFILING
 endif
 
-obj-y += mmu.o hash_low.o mmu_context.o tlb.o nohash_low.o
+obj-y += mmu.o mmu_context.o
+obj-$(CONFIG_PPC_BOOK3S_603) += nohash_low.o
+obj-$(CONFIG_PPC_BOOK3S_604) += hash_low.o tlb.o
-- 
2.25.0


^ permalink raw reply related

* Re: [PATCH] powerpc/perf/hv-24x7: Dont create sysfs event files for dummy events
From: kajoljain @ 2020-12-18  7:51 UTC (permalink / raw)
  To: Michael Ellerman, linuxppc-dev; +Cc: suka, maddy, atrajeev
In-Reply-To: <87o8isx77u.fsf@mpe.ellerman.id.au>



On 12/18/20 6:26 AM, Michael Ellerman wrote:
> Kajol Jain <kjain@linux.ibm.com> writes:
>> hv_24x7 performance monitoring unit creates list of supported events
>> from the event catalog obtained via HCALL. hv_24x7 catalog could also
>> contain invalid or dummy events (with names like FREE_  or CPM_FREE_ so
>> on). These events does not have any hardware counters backing them.
>> So patch adds a check to string compare the event names to filter
>> out them.
>>
>> Signed-off-by: Kajol Jain <kjain@linux.ibm.com>
>> ---
>>  arch/powerpc/perf/hv-24x7.c | 8 ++++++++
>>  1 file changed, 8 insertions(+)
>>
>> diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c
>> index 6e7e820508df..c3252d8a7818 100644
>> --- a/arch/powerpc/perf/hv-24x7.c
>> +++ b/arch/powerpc/perf/hv-24x7.c
>> @@ -894,6 +894,11 @@ static int create_events_from_catalog(struct attribute ***events_,
>>  
>>  		name = event_name(event, &nl);
>>  
>> +		if (strstr(name, "FREE_")) {
>> +			pr_info("invalid event %zu (%.*s)\n", event_idx, nl, name);
>> +			junk_events++;
>> +			continue;
> 
> I don't think we want a print for each event, just one at the end saying
> "Dropped %d invalid events" would be preferable I think.

Hi Michael,
  Sure I will remove prints for each event. Having one print for number of dropped
events may not be useful. So I will drop that too.

> 
> 
>> +		}
>>  		if (event->event_group_record_len == 0) {
>>  			pr_devel("invalid event %zu (%.*s): group_record_len == 0, skipping\n",
>>  					event_idx, nl, name);
>> @@ -955,6 +960,9 @@ static int create_events_from_catalog(struct attribute ***events_,
>>  			continue;
>>  
>>  		name  = event_name(event, &nl);
>> +		if (strstr(name, "FREE_"))
>> +			continue;
> 
> Would be nice if the string comparison was in a single place, ie. in a
> helper function.

Sure I will make that change.

Thanks,
Kajol Jain
> 
> cheers
> 

^ permalink raw reply

* [PATCH v2] powerpc/perf/hv-24x7: Dont create sysfs event files for dummy events
From: Kajol Jain @ 2020-12-18 10:01 UTC (permalink / raw)
  To: mpe, linuxppc-dev; +Cc: kjain, suka, maddy, atrajeev

hv_24x7 performance monitoring unit creates list of supported events
from the event catalog obtained via HCALL. hv_24x7 catalog could also
contain invalid or dummy events (with names like FREE_* or CPM_FREE_*
and RESERVED*). These events do not have any hardware counters
backing them. So patch adds a check to string compare the event names
to filter out them.

Result in power9 machine:

Before this patch:
.....
  hv_24x7/PM_XLINK2_OUT_ODD_CYC,chip=?/              [Kernel PMU event]
  hv_24x7/PM_XLINK2_OUT_ODD_DATA_COUNT,chip=?/       [Kernel PMU event]
  hv_24x7/PM_XLINK2_OUT_ODD_TOTAL_UTIL,chip=?/       [Kernel PMU event]
  hv_24x7/PM_XTS_ATR_DEMAND_CHECKOUT,chip=?/         [Kernel PMU event]
  hv_24x7/PM_XTS_ATR_DEMAND_CHECKOUT_MISS,chip=?/    [Kernel PMU event]
  hv_24x7/PM_XTS_ATSD_SENT,chip=?/                   [Kernel PMU event]
  hv_24x7/PM_XTS_ATSD_TLBI_RCV,chip=?/               [Kernel PMU event]
  hv_24x7/RESERVED_NEST1,chip=?/                     [Kernel PMU event]
  hv_24x7/RESERVED_NEST10,chip=?/                    [Kernel PMU event]
  hv_24x7/RESERVED_NEST11,chip=?/                    [Kernel PMU event]
  hv_24x7/RESERVED_NEST12,chip=?/                    [Kernel PMU event]
  hv_24x7/RESERVED_NEST13,chip=?/                    [Kernel PMU event]
......

Dmesg:
[    0.000362] printk: console [hvc0] enabled
[    0.815452] hv-24x7: read 1530 catalog entries, created 537 event attrs
(0 failures), 275 descs

After this patch:
......
  hv_24x7/PM_XLINK2_OUT_ODD_AVLBL_CYC,chip=?/        [Kernel PMU event]
  hv_24x7/PM_XLINK2_OUT_ODD_CYC,chip=?/              [Kernel PMU event]
  hv_24x7/PM_XLINK2_OUT_ODD_DATA_COUNT,chip=?/       [Kernel PMU event]
  hv_24x7/PM_XLINK2_OUT_ODD_TOTAL_UTIL,chip=?/       [Kernel PMU event]
  hv_24x7/PM_XTS_ATR_DEMAND_CHECKOUT,chip=?/         [Kernel PMU event]
  hv_24x7/PM_XTS_ATR_DEMAND_CHECKOUT_MISS,chip=?/    [Kernel PMU event]
  hv_24x7/PM_XTS_ATSD_SENT,chip=?/                   [Kernel PMU event]
  hv_24x7/PM_XTS_ATSD_TLBI_RCV,chip=?/               [Kernel PMU event]
  hv_24x7/TOD,chip=?/                                [Kernel PMU event]
......

Demsg:
[    0.000357] printk: console [hvc0] enabled
[    0.808592] hv-24x7: read 1530 catalog entries, created 509 event attrs
(0 failures), 275 descs

Signed-off-by: Kajol Jain <kjain@linux.ibm.com>
---
 arch/powerpc/perf/hv-24x7.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

---
Changelog
v1 -> v2
- Include "RESERVED*" as part of the invalid event check as
  suggested by Madhavan Srinivasan
- Add new helper function "ignore_event" to check invalid/dummy
  events as suggested by Michael Ellerman
- Remove pr_info to print each invalid event as suggested by
  Michael Ellerman
---
diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c
index 6e7e820508df..1a6004d88f98 100644
--- a/arch/powerpc/perf/hv-24x7.c
+++ b/arch/powerpc/perf/hv-24x7.c
@@ -764,6 +764,16 @@ static ssize_t catalog_event_len_validate(struct hv_24x7_event_data *event,
 	return ev_len;
 }
 
+/*
+ * Return true incase of invalid or dummy events with names like FREE_* or CPM_FREE_*
+ * and RESERVED*
+ */
+static bool ignore_event(const char *name)
+{
+	return (strstr(name, "FREE_") || !strncmp(name, "RESERVED", 8)) ?
+			true : false;
+}
+
 #define MAX_4K (SIZE_MAX / 4096)
 
 static int create_events_from_catalog(struct attribute ***events_,
@@ -894,6 +904,10 @@ static int create_events_from_catalog(struct attribute ***events_,
 
 		name = event_name(event, &nl);
 
+		if (ignore_event(name)) {
+			junk_events++;
+			continue;
+		}
 		if (event->event_group_record_len == 0) {
 			pr_devel("invalid event %zu (%.*s): group_record_len == 0, skipping\n",
 					event_idx, nl, name);
@@ -955,6 +969,9 @@ static int create_events_from_catalog(struct attribute ***events_,
 			continue;
 
 		name  = event_name(event, &nl);
+		if (ignore_event(name))
+			continue;
+
 		nonce = event_uniq_add(&ev_uniq, name, nl, event->domain);
 		ct    = event_data_to_attrs(event_idx, events + event_attr_ct,
 					    event, nonce);
-- 
2.27.0


^ permalink raw reply related

* [PATCH 1/3] powerpc/vdso: Block R_PPC_REL24 relocations
From: Michael Ellerman @ 2020-12-18 11:16 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: skirmisher

Add R_PPC_REL24 relocations to the list of relocations we do NOT
support in the VDSO.

These are generated in some cases and we do not support relocating
them at runtime, so if they appear then the VDSO will not work at
runtime, therefore it's preferable to break the build if we see them.

Fixes: ab037dd87a2f ("powerpc/vdso: Switch VDSO to generic C implementation.")
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/vdso32/Makefile | 2 +-
 arch/powerpc/kernel/vdso64/Makefile | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile
index 59aa2944ecae..6616f4e794d0 100644
--- a/arch/powerpc/kernel/vdso32/Makefile
+++ b/arch/powerpc/kernel/vdso32/Makefile
@@ -2,7 +2,7 @@
 
 # List of files in the vdso, has to be asm only for now
 
-ARCH_REL_TYPE_ABS := R_PPC_JUMP_SLOT|R_PPC_GLOB_DAT|R_PPC_ADDR32|R_PPC_ADDR24|R_PPC_ADDR16|R_PPC_ADDR16_LO|R_PPC_ADDR16_HI|R_PPC_ADDR16_HA|R_PPC_ADDR14|R_PPC_ADDR14_BRTAKEN|R_PPC_ADDR14_BRNTAKEN
+ARCH_REL_TYPE_ABS := R_PPC_JUMP_SLOT|R_PPC_GLOB_DAT|R_PPC_ADDR32|R_PPC_ADDR24|R_PPC_ADDR16|R_PPC_ADDR16_LO|R_PPC_ADDR16_HI|R_PPC_ADDR16_HA|R_PPC_ADDR14|R_PPC_ADDR14_BRTAKEN|R_PPC_ADDR14_BRNTAKEN|R_PPC_REL24
 include $(srctree)/lib/vdso/Makefile
 
 obj-vdso32 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o getcpu.o
diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile
index d365810a689a..bf363ff37152 100644
--- a/arch/powerpc/kernel/vdso64/Makefile
+++ b/arch/powerpc/kernel/vdso64/Makefile
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 # List of files in the vdso, has to be asm only for now
 
-ARCH_REL_TYPE_ABS := R_PPC_JUMP_SLOT|R_PPC_GLOB_DAT|R_PPC_ADDR32|R_PPC_ADDR24|R_PPC_ADDR16|R_PPC_ADDR16_LO|R_PPC_ADDR16_HI|R_PPC_ADDR16_HA|R_PPC_ADDR14|R_PPC_ADDR14_BRTAKEN|R_PPC_ADDR14_BRNTAKEN
+ARCH_REL_TYPE_ABS := R_PPC_JUMP_SLOT|R_PPC_GLOB_DAT|R_PPC_ADDR32|R_PPC_ADDR24|R_PPC_ADDR16|R_PPC_ADDR16_LO|R_PPC_ADDR16_HI|R_PPC_ADDR16_HA|R_PPC_ADDR14|R_PPC_ADDR14_BRTAKEN|R_PPC_ADDR14_BRNTAKEN|R_PPC_REL24
 include $(srctree)/lib/vdso/Makefile
 
 obj-vdso64 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o getcpu.o
-- 
2.25.1


^ permalink raw reply related

* [PATCH 2/3] powerpc/vdso: Don't pass 64-bit ABI cflags to 32-bit VDSO
From: Michael Ellerman @ 2020-12-18 11:16 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: skirmisher
In-Reply-To: <20201218111619.1206391-1-mpe@ellerman.id.au>

When building the 32-bit VDSO, we are building 32-bit code as part of
a 64-bit kernel build. That requires us to tweak the cflags to trick
the compiler into building 32-bit code for us. The main way we do that
is by passing -m32, but there are other options that affect code
generation and ABI selection.

In particular when building vgettimeofday.c, we end up passing
-mcall-aixdesc because it's in KBUILD_CFLAGS, which causes the
compiler to generate function descriptors, and dot symbols, eg:

  $ nm arch/powerpc/kernel/vdso32/vgettimeofday.o
  000005d0 T .__c_kernel_clock_getres
  00000024 D __c_kernel_clock_getres
  ...

We get away with that at the moment because we also use the DOTSYM
macro, and that is also incorrectly prepending a '.' in 32-bit VDSO
code due to a separate bug.

But we shouldn't be generating function descriptors for this file,
there's no 32-bit ABI that includes function descriptors, so the
resulting object file is some frankenstein and it's surprising that it
even links.

So filter out all the ABI-related options we add to CFLAGS for 64-bit
builds, so that they're not used when building 32-bit code. With that
we only see regular text symbols:

  $ nm arch/powerpc/kernel/vdso32/vgettimeofday.o                                                                                                                                     michael@alpine1-p1
  000005d0 T __c_kernel_clock_getres
  00000000 T __c_kernel_clock_gettime
  00000200 T __c_kernel_clock_gettime64
  00000410 T __c_kernel_gettimeofday
  00000650 T __c_kernel_time

Fixes: ab037dd87a2f ("powerpc/vdso: Switch VDSO to generic C implementation.")
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/vdso32/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile
index 6616f4e794d0..9cb6f524854b 100644
--- a/arch/powerpc/kernel/vdso32/Makefile
+++ b/arch/powerpc/kernel/vdso32/Makefile
@@ -27,7 +27,7 @@ endif
 CC32FLAGS :=
 ifdef CONFIG_PPC64
 CC32FLAGS += -m32
-KBUILD_CFLAGS := $(filter-out -mcmodel=medium,$(KBUILD_CFLAGS))
+KBUILD_CFLAGS := $(filter-out -mcmodel=medium -mabi=elfv1 -mabi=elfv2 -mcall-aixdesc,$(KBUILD_CFLAGS))
 endif
 
 targets := $(obj-vdso32) vdso32.so.dbg
-- 
2.25.1


^ permalink raw reply related

* [PATCH 3/3] powerpc/vdso: Fix DOTSYM for 32-bit LE VDSO
From: Michael Ellerman @ 2020-12-18 11:16 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: skirmisher
In-Reply-To: <20201218111619.1206391-1-mpe@ellerman.id.au>

Skirmisher reported on IRC that the 32-bit LE VDSO was hanging. This
turned out to be due to a branch to self in eg. __kernel_gettimeofday.
Looking at the disassembly with objdump -dR shows why:

  00000528 <__kernel_gettimeofday>:
   528:   f0 ff 21 94     stwu    r1,-16(r1)
   52c:   a6 02 08 7c     mflr    r0
   530:   f0 ff 21 94     stwu    r1,-16(r1)
   534:   14 00 01 90     stw     r0,20(r1)
   538:   05 00 9f 42     bcl     20,4*cr7+so,53c <__kernel_gettimeofday+0x14>
   53c:   a6 02 a8 7c     mflr    r5
   540:   ff ff a5 3c     addis   r5,r5,-1
   544:   c4 fa a5 38     addi    r5,r5,-1340
   548:   f0 00 a5 38     addi    r5,r5,240
   54c:   01 00 00 48     bl      54c <__kernel_gettimeofday+0x24>
                          54c: R_PPC_REL24        .__c_kernel_gettimeofday

Because we don't process relocations for the VDSO, this branch remains
a branch from 0x54c to 0x54c.

With the preceding patch to prohibit R_PPC_REL24 relocations, we
instead get a build failure:

  0000054c R_PPC_REL24       .__c_kernel_gettimeofday
  00000598 R_PPC_REL24       .__c_kernel_clock_gettime
  000005e4 R_PPC_REL24       .__c_kernel_clock_gettime64
  00000630 R_PPC_REL24       .__c_kernel_clock_getres
  0000067c R_PPC_REL24       .__c_kernel_time
  arch/powerpc/kernel/vdso32/vdso32.so.dbg: dynamic relocations are not supported

The root cause is that we're branching to `.__c_kernel_gettimeofday`.
But this is 32-bit LE code, which doesn't use function descriptors, so
there are no dot symbols.

The reason we're trying to branch to a dot symbol is because we're
using the DOTSYM macro, but the ifdefs we use to define the DOTSYM
macro do not currently work for 32-bit LE.

So like previous commits we need to differentiate if the current
compilation unit is 64-bit, rather than the kernel as a whole. ie.
switch from CONFIG_PPC64 to __powerpc64__.

With that fixed 32-bit LE code gets the empty version of DOTSYM, which
just resolves to the original symbol name, leading to a direct branch
and no relocations:

  000003f8 <__kernel_gettimeofday>:
   3f8:   f0 ff 21 94     stwu    r1,-16(r1)
   3fc:   a6 02 08 7c     mflr    r0
   400:   f0 ff 21 94     stwu    r1,-16(r1)
   404:   14 00 01 90     stw     r0,20(r1)
   408:   05 00 9f 42     bcl     20,4*cr7+so,40c <__kernel_gettimeofday+0x14>
   40c:   a6 02 a8 7c     mflr    r5
   410:   ff ff a5 3c     addis   r5,r5,-1
   414:   f4 fb a5 38     addi    r5,r5,-1036
   418:   f0 00 a5 38     addi    r5,r5,240
   41c:   85 06 00 48     bl      aa0 <__c_kernel_gettimeofday>

Fixes: ab037dd87a2f ("powerpc/vdso: Switch VDSO to generic C implementation.")
Reported-by: "Will Springer <skirmisher@protonmail.com>"
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/ppc_asm.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
index cfa814824285..cc1bca571332 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -180,7 +180,12 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
 #define VCPU_GPR(n)	__VCPU_GPR(__REG_##n)
 
 #ifdef __KERNEL__
-#ifdef CONFIG_PPC64
+
+/*
+ * We use __powerpc64__ here because we want the compat VDSO to use the 32-bit
+ * version below in the else case of the ifdef.
+ */
+#ifdef __powerpc64__
 
 #define STACKFRAMESIZE 256
 #define __STK_REG(i)   (112 + ((i)-14)*8)
-- 
2.25.1


^ permalink raw reply related

* [PATCH] mm: Remove arch_remap() and mm-arch-hooks.h
From: Christophe Leroy @ 2020-12-18 14:07 UTC (permalink / raw)
  To: Arnd Bergmann, Andrew Morton
  Cc: linux-arch, Richard Weinberger, Jeff Dike, linux-um, linux-kernel,
	linux-mm, linuxppc-dev, Anton Ivanov

powerpc was the last provider of arch_remap() and the last
user of mm-arch-hooks.h.

Since commit 526a9c4a7234 ("powerpc/vdso: Provide vdso_remap()"),
arch_remap() hence mm-arch-hooks.h are not used anymore.

Remove them.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/um/include/asm/Kbuild          |  1 -
 include/asm-generic/Kbuild          |  1 -
 include/asm-generic/mm-arch-hooks.h | 16 ----------------
 include/linux/mm-arch-hooks.h       | 22 ----------------------
 mm/mremap.c                         |  3 ---
 5 files changed, 43 deletions(-)
 delete mode 100644 include/asm-generic/mm-arch-hooks.h
 delete mode 100644 include/linux/mm-arch-hooks.h

diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild
index 1c63b260ecc4..314979467db1 100644
--- a/arch/um/include/asm/Kbuild
+++ b/arch/um/include/asm/Kbuild
@@ -14,7 +14,6 @@ generic-y += irq_regs.h
 generic-y += irq_work.h
 generic-y += kdebug.h
 generic-y += mcs_spinlock.h
-generic-y += mm-arch-hooks.h
 generic-y += mmiowb.h
 generic-y += module.lds.h
 generic-y += param.h
diff --git a/include/asm-generic/Kbuild b/include/asm-generic/Kbuild
index 4365b9aa3e3f..e867eb3058d5 100644
--- a/include/asm-generic/Kbuild
+++ b/include/asm-generic/Kbuild
@@ -34,7 +34,6 @@ mandatory-y += kmap_size.h
 mandatory-y += kprobes.h
 mandatory-y += linkage.h
 mandatory-y += local.h
-mandatory-y += mm-arch-hooks.h
 mandatory-y += mmiowb.h
 mandatory-y += mmu.h
 mandatory-y += mmu_context.h
diff --git a/include/asm-generic/mm-arch-hooks.h b/include/asm-generic/mm-arch-hooks.h
deleted file mode 100644
index 5ff0e5193f85..000000000000
--- a/include/asm-generic/mm-arch-hooks.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/*
- * Architecture specific mm hooks
- */
-
-#ifndef _ASM_GENERIC_MM_ARCH_HOOKS_H
-#define _ASM_GENERIC_MM_ARCH_HOOKS_H
-
-/*
- * This file should be included through arch/../include/asm/Kbuild for
- * the architecture which doesn't need specific mm hooks.
- *
- * In that case, the generic hooks defined in include/linux/mm-arch-hooks.h
- * are used.
- */
-
-#endif /* _ASM_GENERIC_MM_ARCH_HOOKS_H */
diff --git a/include/linux/mm-arch-hooks.h b/include/linux/mm-arch-hooks.h
deleted file mode 100644
index 9c4bedc95504..000000000000
--- a/include/linux/mm-arch-hooks.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Generic mm no-op hooks.
- *
- * Copyright (C) 2015, IBM Corporation
- * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com>
- */
-#ifndef _LINUX_MM_ARCH_HOOKS_H
-#define _LINUX_MM_ARCH_HOOKS_H
-
-#include <asm/mm-arch-hooks.h>
-
-#ifndef arch_remap
-static inline void arch_remap(struct mm_struct *mm,
-			      unsigned long old_start, unsigned long old_end,
-			      unsigned long new_start, unsigned long new_end)
-{
-}
-#define arch_remap arch_remap
-#endif
-
-#endif /* _LINUX_MM_ARCH_HOOKS_H */
diff --git a/mm/mremap.c b/mm/mremap.c
index c5590afe7165..e43696a91260 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -22,7 +22,6 @@
 #include <linux/syscalls.h>
 #include <linux/mmu_notifier.h>
 #include <linux/uaccess.h>
-#include <linux/mm-arch-hooks.h>
 #include <linux/userfaultfd_k.h>
 
 #include <asm/cacheflush.h>
@@ -560,8 +559,6 @@ static unsigned long move_vma(struct vm_area_struct *vma,
 		new_addr = err;
 	} else {
 		mremap_userfaultfd_prep(new_vma, uf);
-		arch_remap(mm, old_addr, old_addr + old_len,
-			   new_addr, new_addr + new_len);
 	}
 
 	/* Conceal VM_ACCOUNT so old reservation is not undone */
-- 
2.25.0


^ permalink raw reply related

* [PATCH 02/23] arch: alpha: drop misleading warning on spurious IRQ
From: Enrico Weigelt, metux IT consult @ 2020-12-18 14:31 UTC (permalink / raw)
  To: linux-kernel
  Cc: mark.rutland, dalias, linux-ia64, linux-sh, alexander.shishkin,
	linus.walleij, James.Bottomley, paulus, hpa, sparclinux, will,
	gerg, linux-arch, linux-s390, linux-c6x-dev, ysato, jolsa, deller,
	x86, bgolaszewski, tony, geert, catalin.marinas, linux-alpha,
	arnd, msalter, jacquiot.aurelien, linux-gpio, linux-m68k, bp,
	namhyung, tglx, linux-omap, tsbogend, linux-parisc, linux-mips,
	maz, linuxppc-dev, davem
In-Reply-To: <20201218143122.19459-1-info@metux.net>

The warning in ack_bad_irq() is misleading in several ways:
* the term "vector" isn't quite correct
* the printing format isn't consistent across the archs: some print decimal,
  some hex, some hex w/o 0x prefix.
* the printed linux irq isn't meaningful in all cases - we actually would
  want it to print the hw irq.

Since all call sites already print out more detailed and correct information,
we just don't need to duplicate this in each single arch. So just drop it.

Signed-off-by: Enrico Weigelt, metux IT consult <info@metux.net>
---
 arch/alpha/kernel/irq.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/alpha/kernel/irq.c b/arch/alpha/kernel/irq.c
index f6d2946edbd2..c1980eea75a6 100644
--- a/arch/alpha/kernel/irq.c
+++ b/arch/alpha/kernel/irq.c
@@ -35,7 +35,6 @@ DEFINE_PER_CPU(unsigned long, irq_pmi_count);
 void ack_bad_irq(unsigned int irq)
 {
 	irq_err_count++;
-	printk(KERN_CRIT "Unexpected IRQ trap at vector %u\n", irq);
 }
 
 #ifdef CONFIG_SMP 
-- 
2.11.0


^ permalink raw reply related

* cleanup handling of bad IRQs
From: Enrico Weigelt, metux IT consult @ 2020-12-18 14:30 UTC (permalink / raw)
  To: linux-kernel
  Cc: mark.rutland, dalias, linux-ia64, linux-sh, alexander.shishkin,
	linus.walleij, James.Bottomley, paulus, hpa, sparclinux, will,
	gerg, linux-arch, linux-s390, linux-c6x-dev, ysato, jolsa, deller,
	x86, bgolaszewski, tony, geert, catalin.marinas, linux-alpha,
	arnd, msalter, jacquiot.aurelien, linux-gpio, linux-m68k, bp,
	namhyung, tglx, linux-omap, tsbogend, linux-parisc, linux-mips,
	maz, linuxppc-dev, davem

Hello friends,


here's a patch queue for cleaning up the IRQ handling. Inspired by a
discussion we had on a previous patch of mine:

    "arch: fix 'unexpected IRQ trap at vector' warnings"
    https://www.spinics.net/lists/kernel/msg3763137.html

Turned out that the whole message, as it is right now, doesn't make much
sense at at all - not just incorrect wording, but also not quite useful
information. And the whole ack_bad_irq() thing deserves a cleanup anyways.

So, I've had a closer look and came to these conclusions:

1. The warning message doesn't need to be duplicated in the per architecture
   ack_bad_irq() functions. All, but one callers already do their own warning.
   Thus just adding a pr_warn() call there, printing out more useful data
   like the hardware IRQ number, and dropping all warnings from all the
   ack_bad_irq() functions.

2. Many of the ack_bad_irq()'s count up the spurious interrupts - lots of
   duplications over the various archs. Some of them using atomic_t, some
   just plain ints. Consolidating this by introducing a global counter
   with inline'd accessors and doing the upcounting in the (currently 3)
   call sites of ack_bad_irq(). After that, step by step changing all
   archs to use the new counter.

3. For all but one arch (x86), ack_bad_irq() became a no-op.

   On x86, it's just a call to ack_APIC_irq(), in order to prevent lockups
   when IRQs missed to be ack'ed on the APIC. Could we perhaps do this in
   some better place ? In that case, ack_bad_irq() could easily be removed
   entirely.

have fun,

--mtx



^ permalink raw reply

* [PATCH 05/23] arch: ia64: drop misleading warning on spurious IRQ
From: Enrico Weigelt, metux IT consult @ 2020-12-18 14:31 UTC (permalink / raw)
  To: linux-kernel
  Cc: mark.rutland, dalias, linux-ia64, linux-sh, alexander.shishkin,
	linus.walleij, James.Bottomley, paulus, hpa, sparclinux, will,
	gerg, linux-arch, linux-s390, linux-c6x-dev, ysato, jolsa, deller,
	x86, bgolaszewski, tony, geert, catalin.marinas, linux-alpha,
	arnd, msalter, jacquiot.aurelien, linux-gpio, linux-m68k, bp,
	namhyung, tglx, linux-omap, tsbogend, linux-parisc, linux-mips,
	maz, linuxppc-dev, davem
In-Reply-To: <20201218143122.19459-1-info@metux.net>

The warning in ack_bad_irq() is misleading in several ways:
* the term "vector" isn't quite correct
* the printing format isn't consistent across the archs: some print decimal,
  some hex, some hex w/o "0x" prefix.
* the printed linux irq isn't meaningful in all cases - we actually would
  want it to print the hw irq.

Since all call sites already print out more detailed and correct information,
we just don't need to duplicate this in each single arch. So just drop it.

Signed-off-by: Enrico Weigelt, metux IT consult <info@metux.net>
---
 arch/ia64/include/asm/hardirq.h | 2 +-
 arch/ia64/kernel/irq.c          | 9 ---------
 2 files changed, 1 insertion(+), 10 deletions(-)

diff --git a/arch/ia64/include/asm/hardirq.h b/arch/ia64/include/asm/hardirq.h
index ccde7c2ba00f..dddaafaf84e0 100644
--- a/arch/ia64/include/asm/hardirq.h
+++ b/arch/ia64/include/asm/hardirq.h
@@ -22,6 +22,6 @@
 
 extern void __iomem *ipi_base_addr;
 
-void ack_bad_irq(unsigned int irq);
+#define ack_bad_irq(irq)
 
 #endif /* _ASM_IA64_HARDIRQ_H */
diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c
index ecef17c7c35b..1365c7cf2095 100644
--- a/arch/ia64/kernel/irq.c
+++ b/arch/ia64/kernel/irq.c
@@ -28,15 +28,6 @@
 #include <asm/xtp.h>
 
 /*
- * 'what should we do if we get a hw irq event on an illegal vector'.
- * each architecture has to answer this themselves.
- */
-void ack_bad_irq(unsigned int irq)
-{
-	printk(KERN_ERR "Unexpected irq vector 0x%x on CPU %u!\n", irq, smp_processor_id());
-}
-
-/*
  * Interrupt statistics:
  */
 
-- 
2.11.0


^ permalink raw reply related

* [PATCH 06/23] arch: mips: drop misleading warning on spurious IRQ
From: Enrico Weigelt, metux IT consult @ 2020-12-18 14:31 UTC (permalink / raw)
  To: linux-kernel
  Cc: mark.rutland, dalias, linux-ia64, linux-sh, alexander.shishkin,
	linus.walleij, James.Bottomley, paulus, hpa, sparclinux, will,
	gerg, linux-arch, linux-s390, linux-c6x-dev, ysato, jolsa, deller,
	x86, bgolaszewski, tony, geert, catalin.marinas, linux-alpha,
	arnd, msalter, jacquiot.aurelien, linux-gpio, linux-m68k, bp,
	namhyung, tglx, linux-omap, tsbogend, linux-parisc, linux-mips,
	maz, linuxppc-dev, davem
In-Reply-To: <20201218143122.19459-1-info@metux.net>

The warning in ack_bad_irq() is misleading in several ways:
* the term "vector" isn't quite correct
* the printing format isn't consistent across the archs: some print decimal,
  some hex, some hex w/o 0x prefix.
* the printed linux irq isn't meaningful in all cases - we actually would
  want it to print the hw irq.

Since all call sites already print out more detailed and correct information,
we just don't need to duplicate this in each single arch. So just drop it.

Signed-off-by: Enrico Weigelt, metux IT consult <info@metux.net>
---
 arch/mips/include/asm/hardirq.h | 3 +--
 arch/mips/kernel/irq.c          | 9 ---------
 2 files changed, 1 insertion(+), 11 deletions(-)

diff --git a/arch/mips/include/asm/hardirq.h b/arch/mips/include/asm/hardirq.h
index c977a86c2c65..75444120e6cb 100644
--- a/arch/mips/include/asm/hardirq.h
+++ b/arch/mips/include/asm/hardirq.h
@@ -10,8 +10,7 @@
 #ifndef _ASM_HARDIRQ_H
 #define _ASM_HARDIRQ_H
 
-extern void ack_bad_irq(unsigned int irq);
-#define ack_bad_irq ack_bad_irq
+#define ack_bad_irq(irq)
 
 #include <asm-generic/hardirq.h>
 
diff --git a/arch/mips/kernel/irq.c b/arch/mips/kernel/irq.c
index 85b6c60f285d..c98be305fab6 100644
--- a/arch/mips/kernel/irq.c
+++ b/arch/mips/kernel/irq.c
@@ -27,15 +27,6 @@
 
 void *irq_stack[NR_CPUS];
 
-/*
- * 'what should we do if we get a hw irq event on an illegal vector'.
- * each architecture has to answer this themselves.
- */
-void ack_bad_irq(unsigned int irq)
-{
-	printk("unexpected IRQ # %d\n", irq);
-}
-
 atomic_t irq_err_count;
 
 int arch_show_interrupts(struct seq_file *p, int prec)
-- 
2.11.0


^ permalink raw reply related

* [PATCH 04/23] arch: c6x: drop misleading warning on spurious IRQ
From: Enrico Weigelt, metux IT consult @ 2020-12-18 14:31 UTC (permalink / raw)
  To: linux-kernel
  Cc: mark.rutland, dalias, linux-ia64, linux-sh, alexander.shishkin,
	linus.walleij, James.Bottomley, paulus, hpa, sparclinux, will,
	gerg, linux-arch, linux-s390, linux-c6x-dev, ysato, jolsa, deller,
	x86, bgolaszewski, tony, geert, catalin.marinas, linux-alpha,
	arnd, msalter, jacquiot.aurelien, linux-gpio, linux-m68k, bp,
	namhyung, tglx, linux-omap, tsbogend, linux-parisc, linux-mips,
	maz, linuxppc-dev, davem
In-Reply-To: <20201218143122.19459-1-info@metux.net>

The warning in ack_bad_irq() is misleading in several ways:
* the term "vector" isn't quite correct
* the printing format isn't consistent across the archs: some print decimal,
  some hex, some hex w/o 0x prefix.
* the printed linux irq isn't meaningful in all cases - we actually would
  want it to print the hw irq.

Since all call sites already print out more detailed and correct information,
we just don't need to duplicate this in each single arch. So just drop it.

Signed-off-by: Enrico Weigelt, metux IT consult <info@metux.net>
---
 arch/c6x/kernel/irq.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/c6x/kernel/irq.c b/arch/c6x/kernel/irq.c
index e4c53d185b62..b9f7cfa2ed21 100644
--- a/arch/c6x/kernel/irq.c
+++ b/arch/c6x/kernel/irq.c
@@ -116,7 +116,6 @@ void __init init_IRQ(void)
 
 void ack_bad_irq(int irq)
 {
-	printk(KERN_ERR "IRQ: spurious interrupt %d\n", irq);
 	irq_err_count++;
 }
 
-- 
2.11.0


^ permalink raw reply related

* [PATCH 11/23] arch: sparc: drop misleading warning on spurious IRQ
From: Enrico Weigelt, metux IT consult @ 2020-12-18 14:31 UTC (permalink / raw)
  To: linux-kernel
  Cc: mark.rutland, dalias, linux-ia64, linux-sh, alexander.shishkin,
	linus.walleij, James.Bottomley, paulus, hpa, sparclinux, will,
	gerg, linux-arch, linux-s390, linux-c6x-dev, ysato, jolsa, deller,
	x86, bgolaszewski, tony, geert, catalin.marinas, linux-alpha,
	arnd, msalter, jacquiot.aurelien, linux-gpio, linux-m68k, bp,
	namhyung, tglx, linux-omap, tsbogend, linux-parisc, linux-mips,
	maz, linuxppc-dev, davem
In-Reply-To: <20201218143122.19459-1-info@metux.net>

The warning in ack_bad_irq() is misleading in several ways:
* the term "vector" isn't quite correct
* the printing format isn't consistent across the archs: some print decimal,
  some hex, some hex w/o 0x prefix.
* the printed linux irq isn't meaningful in all cases - we actually would
  want it to print the hw irq.

Since all call sites already print out more detailed and correct information,
we just don't need to duplicate this in each single arch. So just drop it.

Signed-off-by: Enrico Weigelt, metux IT consult <info@metux.net>
---
 arch/sparc/include/asm/hardirq_64.h | 2 +-
 arch/sparc/kernel/irq_64.c          | 5 -----
 2 files changed, 1 insertion(+), 6 deletions(-)

diff --git a/arch/sparc/include/asm/hardirq_64.h b/arch/sparc/include/asm/hardirq_64.h
index 75b92bfe04b5..874151f520de 100644
--- a/arch/sparc/include/asm/hardirq_64.h
+++ b/arch/sparc/include/asm/hardirq_64.h
@@ -14,6 +14,6 @@
 #define local_softirq_pending_ref \
 	__cpu_data.__softirq_pending
 
-void ack_bad_irq(unsigned int irq);
+#define ack_bad_irq(irq)
 
 #endif /* !(__SPARC64_HARDIRQ_H) */
diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c
index 3ec9f1402aad..ea2a52f7fe53 100644
--- a/arch/sparc/kernel/irq_64.c
+++ b/arch/sparc/kernel/irq_64.c
@@ -284,11 +284,6 @@ static unsigned int sysino_exists(u32 devhandle, unsigned int devino)
 	return irq;
 }
 
-void ack_bad_irq(unsigned int irq)
-{
-	pr_crit("BAD IRQ ack %d\n", irq);
-}
-
 void irq_install_pre_handler(int irq,
 			     void (*func)(unsigned int, void *, void *),
 			     void *arg1, void *arg2)
-- 
2.11.0


^ permalink raw reply related

* [PATCH 01/23] kernel: irq: irqdescs: warn on spurious IRQ
From: Enrico Weigelt, metux IT consult @ 2020-12-18 14:31 UTC (permalink / raw)
  To: linux-kernel
  Cc: mark.rutland, dalias, linux-ia64, linux-sh, alexander.shishkin,
	linus.walleij, James.Bottomley, paulus, hpa, sparclinux, will,
	gerg, linux-arch, linux-s390, linux-c6x-dev, ysato, jolsa, deller,
	x86, bgolaszewski, tony, geert, catalin.marinas, linux-alpha,
	arnd, msalter, jacquiot.aurelien, linux-gpio, linux-m68k, bp,
	namhyung, tglx, linux-omap, tsbogend, linux-parisc, linux-mips,
	maz, linuxppc-dev, davem
In-Reply-To: <20201218143122.19459-1-info@metux.net>

Add a warning on spurious IRQs to __handle_domain_irq(), also telling the
linux IRQ number (if any), the hw IRQ number and the max nr of IRQs.

That's far more informative than the warnings in (some of) the individual
arch's ack_bad_irq()'s. These aren't really helpful since either the
other callers already had printed more detailed information or (when called
by __handle_domain_irq()) the printout just doesn't tell anything useful.

Signed-off-by: Enrico Weigelt, metux IT consult <info@metux.net>
---
 kernel/irq/irqdesc.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index e810eb9906ea..62a381351775 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -681,6 +681,9 @@ int __handle_domain_irq(struct irq_domain *domain, unsigned int hwirq,
 	 * than crashing, do something sensible.
 	 */
 	if (unlikely(!irq || irq >= nr_irqs)) {
+		if (printk_ratelimit())
+			pr_warn("spurious IRQ: irq=%d hwirq=%d nr_irqs=%d\n",
+				irq, hwirq, nr_irqs);
 		ack_bad_irq(irq);
 		ret = -EINVAL;
 	} else {
-- 
2.11.0


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox