LinuxPPC-Dev Archive on lore.kernel.org

LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH v3 3/5] powerpc/fault: Unnest definition of page_fault_is_write() and page_fault_is_bad()
From: Christophe Leroy @ 2020-12-08  8:37 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman, npiggin,
	aneesh.kumar
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <0d37490a067840f53fc5b118869917c0aec9ab87.1607416578.git.christophe.leroy@csgroup.eu>

To make it more readable, separate page_fault_is_write() and page_fault_is_bad()
to avoir several levels of #ifdefs

Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/mm/fault.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index f6ae56a0d7a3..3fcd34c28e10 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -363,17 +363,19 @@ static void sanity_check_fault(bool is_write, bool is_user,
  */
 #if (defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
 #define page_fault_is_write(__err)	((__err) & ESR_DST)
-#define page_fault_is_bad(__err)	(0)
 #else
 #define page_fault_is_write(__err)	((__err) & DSISR_ISSTORE)
-#if defined(CONFIG_PPC_8xx)
+#endif
+
+#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
+#define page_fault_is_bad(__err)	(0)
+#elif defined(CONFIG_PPC_8xx)
 #define page_fault_is_bad(__err)	((__err) & DSISR_NOEXEC_OR_G)
 #elif defined(CONFIG_PPC64)
 #define page_fault_is_bad(__err)	((__err) & DSISR_BAD_FAULT_64S)
 #else
 #define page_fault_is_bad(__err)	((__err) & DSISR_BAD_FAULT_32S)
 #endif
-#endif
 
 /*
  * For 600- and 800-family processors, the error_code parameter is DSISR
-- 
2.25.0


^ permalink raw reply related

* [PATCH v3 1/5] powerpc/book3s64/kuap: Improve error reporting with KUAP
From: Christophe Leroy @ 2020-12-08  8:36 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman, npiggin,
	aneesh.kumar
  Cc: linuxppc-dev, linux-kernel

From: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>

This partially reverts commit eb232b162446 ("powerpc/book3s64/kuap: Improve
error reporting with KUAP") and update the fault handler to print

[   55.022514] Kernel attempted to access user page (7e6725b70000) - exploit attempt? (uid: 0)
[   55.022528] BUG: Unable to handle kernel data access on read at 0x7e6725b70000
[   55.022533] Faulting instruction address: 0xc000000000e8b9bc
[   55.022540] Oops: Kernel access of bad area, sig: 11 [#1]
....

when the kernel access userspace address without unlocking AMR.

bad_kuap_fault() is added as part of commit 5e5be3aed230 ("powerpc/mm: Detect
bad KUAP faults") to catch userspace access incorrectly blocked by AMR. Hence
retain the full stack dump there even with hash translation. Also, add a comment
explaining the difference between hash and radix.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/include/asm/book3s/32/kup.h     |  4 +--
 arch/powerpc/include/asm/book3s/64/kup.h     | 34 ++++++++++----------
 arch/powerpc/include/asm/kup.h               |  4 +--
 arch/powerpc/include/asm/nohash/32/kup-8xx.h |  4 +--
 arch/powerpc/mm/fault.c                      |  4 +--
 5 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h
index b18cd931e325..32fd4452e960 100644
--- a/arch/powerpc/include/asm/book3s/32/kup.h
+++ b/arch/powerpc/include/asm/book3s/32/kup.h
@@ -177,8 +177,8 @@ static inline void restore_user_access(unsigned long flags)
 		allow_user_access(to, to, end - addr, KUAP_READ_WRITE);
 }
 
-static inline bool bad_kuap_fault(struct pt_regs *regs, unsigned long address,
-				  bool is_write, unsigned long error_code)
+static inline bool
+bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
 {
 	unsigned long begin = regs->kuap & 0xf0000000;
 	unsigned long end = regs->kuap << 28;
diff --git a/arch/powerpc/include/asm/book3s/64/kup.h b/arch/powerpc/include/asm/book3s/64/kup.h
index f2e6dd78d5e2..7075c92c320c 100644
--- a/arch/powerpc/include/asm/book3s/64/kup.h
+++ b/arch/powerpc/include/asm/book3s/64/kup.h
@@ -353,29 +353,29 @@ static inline void set_kuap(unsigned long value)
 	isync();
 }
 
-#define RADIX_KUAP_BLOCK_READ	UL(0x4000000000000000)
-#define RADIX_KUAP_BLOCK_WRITE	UL(0x8000000000000000)
-
 static inline bool bad_kuap_fault(struct pt_regs *regs, unsigned long address,
-				  bool is_write, unsigned long error_code)
+				  bool is_write)
 {
 	if (!mmu_has_feature(MMU_FTR_BOOK3S_KUAP))
 		return false;
-
-	if (radix_enabled()) {
-		/*
-		 * Will be a storage protection fault.
-		 * Only check the details of AMR[0]
-		 */
-		return WARN((regs->kuap & (is_write ? RADIX_KUAP_BLOCK_WRITE : RADIX_KUAP_BLOCK_READ)),
-			    "Bug: %s fault blocked by AMR!", is_write ? "Write" : "Read");
-	}
 	/*
-	 * We don't want to WARN here because userspace can setup
-	 * keys such that a kernel access to user address can cause
-	 * fault
+	 * For radix this will be a storage protection fault (DSISR_PROTFAULT).
+	 * For hash this will be a key fault (DSISR_KEYFAULT)
 	 */
-	return !!(error_code & DSISR_KEYFAULT);
+	/*
+	 * We do have exception table entry, but accessing the
+	 * userspace results in fault.  This could be because we
+	 * didn't unlock the AMR or access is denied by userspace
+	 * using a key value that blocks access. We are only interested
+	 * in catching the use case of accessing without unlocking
+	 * the AMR. Hence check for BLOCK_WRITE/READ against AMR.
+	 */
+	if (is_write) {
+		return WARN(((regs->amr & AMR_KUAP_BLOCK_WRITE) == AMR_KUAP_BLOCK_WRITE),
+			    "Bug: Write fault blocked by AMR!");
+	}
+	return WARN(((regs->amr & AMR_KUAP_BLOCK_READ) == AMR_KUAP_BLOCK_READ),
+		    "Bug: Read fault blocked by AMR!");
 }
 
 static __always_inline void allow_user_access(void __user *to, const void __user *from,
diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h
index f8ec679bd2de..5a9820c54da9 100644
--- a/arch/powerpc/include/asm/kup.h
+++ b/arch/powerpc/include/asm/kup.h
@@ -62,8 +62,8 @@ void setup_kuap(bool disabled);
 #else
 static inline void setup_kuap(bool disabled) { }
 
-static inline bool bad_kuap_fault(struct pt_regs *regs, unsigned long address,
-				  bool is_write, unsigned long error_code)
+static inline bool
+bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
 {
 	return false;
 }
diff --git a/arch/powerpc/include/asm/nohash/32/kup-8xx.h b/arch/powerpc/include/asm/nohash/32/kup-8xx.h
index 7bdd9e5b63ed..567cdc557402 100644
--- a/arch/powerpc/include/asm/nohash/32/kup-8xx.h
+++ b/arch/powerpc/include/asm/nohash/32/kup-8xx.h
@@ -60,8 +60,8 @@ static inline void restore_user_access(unsigned long flags)
 	mtspr(SPRN_MD_AP, flags);
 }
 
-static inline bool bad_kuap_fault(struct pt_regs *regs, unsigned long address,
-				  bool is_write, unsigned long error_code)
+static inline bool
+bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
 {
 	return WARN(!((regs->kuap ^ MD_APG_KUAP) & 0xff000000),
 		    "Bug: fault blocked by AP register !");
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index c91621df0c61..b12595102525 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -210,7 +210,7 @@ static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code,
 		return true;
 	}
 
-	if (!is_exec && address < TASK_SIZE && (error_code & DSISR_PROTFAULT) &&
+	if (!is_exec && address < TASK_SIZE && (error_code & (DSISR_PROTFAULT | DSISR_KEYFAULT)) &&
 	    !search_exception_tables(regs->nip)) {
 		pr_crit_ratelimited("Kernel attempted to access user page (%lx) - exploit attempt? (uid: %d)\n",
 				    address,
@@ -227,7 +227,7 @@ static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code,
 
 	// Read/write fault in a valid region (the exception table search passed
 	// above), but blocked by KUAP is bad, it can never succeed.
-	if (bad_kuap_fault(regs, address, is_write, error_code))
+	if (bad_kuap_fault(regs, address, is_write))
 		return true;
 
 	// What's left? Kernel fault on user in well defined regions (extable
-- 
2.25.0


^ permalink raw reply related

* Re: [PATCH v2] powerpc/powermac: Fix low_sleep_handler with CONFIG_VMAP_STACK
From: Giuseppe Sacco @ 2020-12-08  8:30 UTC (permalink / raw)
  To: Christophe Leroy, Benjamin Herrenschmidt, Paul Mackerras,
	Michael Ellerman
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <e3e0d8042a3ba75cb4a9546c19c408b5b5b28994.1607404931.git.christophe.leroy@csgroup.eu>

Hello Christophe,

Il giorno mar, 08/12/2020 alle 05.24 +0000, Christophe Leroy ha
scritto:
> low_sleep_handler() can't restore the context from standard
> stack because the stack can hardly be accessed with MMU OFF.
> 
> Store everything in a global storage area instead of storing
> a pointer to the stack in that global storage area.
> 
> To avoid a complete churn of the function, still use r1 as
> the pointer to the storage area during restore.
> 
> Reported-by: Giuseppe Sacco <giuseppe@sguazz.it>
> Fixes: cd08f109e262 ("powerpc/32s: Enable CONFIG_VMAP_STACK")
> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
> ---
> This is only build tested. Giuseppe can you test it ? Thanks.
> 
> v2: Changed an erroneous 'addis' to 'addi' ; Using bss instead of
> data section
> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
> ---
>  arch/powerpc/platforms/Kconfig.cputype  |   2 +-
>  arch/powerpc/platforms/powermac/sleep.S | 132 +++++++++++-----------
> --
>  2 files changed, 60 insertions(+), 74 deletions(-)
[...]

I just tested the v2 patch against latest kernel. Please note that
yesterday patch was on a kernel named 5.10.0-rc6+, while today's patch
is on a kernel named 5.10.0-rc7+. Even with the latest kernel updates,
the patch apply correctly and machine boots normally, fixing the
problem.

$ uname -a
Linux titanium4 5.10.0-rc7+ #3 Tue Dec 8 09:11:20 CET 2020 ppc GNU/Linux
$ grep VMAP /boot/config-5.10.0-rc7+
CONFIG_HAVE_ARCH_VMAP_STACK=y
CONFIG_VMAP_STACK=y

Thank you again.

Bye,
Giuseppe



^ permalink raw reply

* Re: [PATCH] EDAC/mv64x60: Remove orphan mv64x60 driver
From: Michael Ellerman @ 2020-12-08  9:02 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: tony.luck, rric, linux-kernel, linuxppc-dev, james.morse, mchehab,
	linux-edac
In-Reply-To: <20201207111727.GC20489@zn.tnic>

Borislav Petkov <bp@alien8.de> writes:
> On Mon, Dec 07, 2020 at 03:02:53PM +1100, Michael Ellerman wrote:
>> The mv64x60 EDAC driver depends on CONFIG_MV64X60. But that symbol is
>> not user-selectable, and the last code that selected it was removed
>> with the C2K board support in 2018, see:
>> 
>>   92c8c16f3457 ("powerpc/embedded6xx: Remove C2K board support")
>> 
>> That means the driver is now dead code, so remove it.
>> 
>> Suggested-by: Borislav Petkov <bp@alien8.de>
>> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
>> ---
>>  drivers/edac/Kconfig        |   7 -
>>  drivers/edac/Makefile       |   1 -
>>  drivers/edac/mv64x60_edac.c | 883 ------------------------------------
>>  drivers/edac/mv64x60_edac.h | 114 -----
>>  4 files changed, 1005 deletions(-)
>>  delete mode 100644 drivers/edac/mv64x60_edac.c
>>  delete mode 100644 drivers/edac/mv64x60_edac.h
>
> Gladly taken and applied, thanks!

Thanks.

cheers

^ permalink raw reply

* linux-next: manual merge of the akpm-current tree with the powerpc tree
From: Stephen Rothwell @ 2020-12-08  9:40 UTC (permalink / raw)
  To: Andrew Morton, Michael Ellerman, PowerPC
  Cc: Ganesh Goudar, Linux Next Mailing List, Francis Laniel,
	Linux Kernel Mailing List, Mahesh Salgaonkar

[-- Attachment #1: Type: text/plain, Size: 2145 bytes --]

Hi all,

Today's linux-next merge of the akpm-current tree got conflicts in:

  drivers/misc/lkdtm/Makefile
  drivers/misc/lkdtm/lkdtm.h
  tools/testing/selftests/lkdtm/tests.txt

between commit:

  3ba150fb2120 ("lkdtm/powerpc: Add SLB multihit test")

from the powerpc tree and commit:

  014a486edd8a ("drivers/misc/lkdtm: add new file in LKDTM to test fortified strscpy")

from the akpm-current tree.

I fixed it up (see below) and can carry the fix as necessary. This
is now fixed as far as linux-next is concerned, but any non trivial
conflicts should be mentioned to your upstream maintainer when your tree
is submitted for merging.  You may also want to consider cooperating
with the maintainer of the conflicting tree to minimise any particularly
complex conflicts.

-- 
Cheers,
Stephen Rothwell

diff --cc drivers/misc/lkdtm/Makefile
index 5a92c74eca92,d898f7b22045..000000000000
--- a/drivers/misc/lkdtm/Makefile
+++ b/drivers/misc/lkdtm/Makefile
@@@ -10,7 -10,7 +10,8 @@@ lkdtm-$(CONFIG_LKDTM)		+= rodata_objcop
  lkdtm-$(CONFIG_LKDTM)		+= usercopy.o
  lkdtm-$(CONFIG_LKDTM)		+= stackleak.o
  lkdtm-$(CONFIG_LKDTM)		+= cfi.o
+ lkdtm-$(CONFIG_LKDTM)		+= fortify.o
 +lkdtm-$(CONFIG_PPC_BOOK3S_64)	+= powerpc.o
  
  KASAN_SANITIZE_stackleak.o	:= n
  KCOV_INSTRUMENT_rodata.o	:= n
diff --cc drivers/misc/lkdtm/lkdtm.h
index 79ec05c18dd1,6aa6d6a1a839..000000000000
--- a/drivers/misc/lkdtm/lkdtm.h
+++ b/drivers/misc/lkdtm/lkdtm.h
@@@ -102,7 -104,7 +104,10 @@@ void lkdtm_STACKLEAK_ERASING(void)
  /* cfi.c */
  void lkdtm_CFI_FORWARD_PROTO(void);
  
+ /* fortify.c */
+ void lkdtm_FORTIFIED_STRSCPY(void);
+ 
 +/* powerpc.c */
 +void lkdtm_PPC_SLB_MULTIHIT(void);
 +
  #endif
diff --cc tools/testing/selftests/lkdtm/tests.txt
index 18e4599863c0,92ba4cc41314..000000000000
--- a/tools/testing/selftests/lkdtm/tests.txt
+++ b/tools/testing/selftests/lkdtm/tests.txt
@@@ -68,4 -68,4 +68,5 @@@ USERCOPY_STACK_BEYON
  USERCOPY_KERNEL
  STACKLEAK_ERASING OK: the rest of the thread stack is properly erased
  CFI_FORWARD_PROTO
+ FORTIFIED_STRSCPY
 +PPC_SLB_MULTIHIT Recovered

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply

* Re: [PATCH] powerpc/mce: Remove per cpu variables from MCE handlers
From: Michael Ellerman @ 2020-12-08 10:31 UTC (permalink / raw)
  To: Ganesh Goudar, linuxppc-dev; +Cc: Ganesh Goudar, mahesh, npiggin
In-Reply-To: <20201204102310.76213-1-ganeshgr@linux.ibm.com>

Ganesh Goudar <ganeshgr@linux.ibm.com> writes:
> diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
> index 9454d29ff4b4..4769954efa7d 100644
> --- a/arch/powerpc/include/asm/paca.h
> +++ b/arch/powerpc/include/asm/paca.h
> @@ -273,6 +274,17 @@ struct paca_struct {
>  #ifdef CONFIG_MMIOWB
>  	struct mmiowb_state mmiowb_state;
>  #endif
> +#ifdef CONFIG_PPC_BOOK3S_64
> +	int mce_nest_count;
> +	struct machine_check_event mce_event[MAX_MC_EVT];
> +	/* Queue for delayed MCE events. */
> +	int mce_queue_count;
> +	struct machine_check_event mce_event_queue[MAX_MC_EVT];
> +
> +	/* Queue for delayed MCE UE events. */
> +	int mce_ue_count;
> +	struct machine_check_event  mce_ue_event_queue[MAX_MC_EVT];
> +#endif /* CONFIG_PPC_BOOK3S_64 */
>  } ____cacheline_aligned;

How much does this expand the paca by?

pahole can tell you.

cheers

^ permalink raw reply

* Re: [PATCH] powerpc/mce: Remove per cpu variables from MCE handlers
From: Ganesh @ 2020-12-08 10:46 UTC (permalink / raw)
  To: Michael Ellerman, linuxppc-dev; +Cc: mahesh, npiggin
In-Reply-To: <871rg0twpw.fsf@mpe.ellerman.id.au>


On 12/8/20 4:01 PM, Michael Ellerman wrote:
> Ganesh Goudar <ganeshgr@linux.ibm.com> writes:
>> diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
>> index 9454d29ff4b4..4769954efa7d 100644
>> --- a/arch/powerpc/include/asm/paca.h
>> +++ b/arch/powerpc/include/asm/paca.h
>> @@ -273,6 +274,17 @@ struct paca_struct {
>>   #ifdef CONFIG_MMIOWB
>>   	struct mmiowb_state mmiowb_state;
>>   #endif
>> +#ifdef CONFIG_PPC_BOOK3S_64
>> +	int mce_nest_count;
>> +	struct machine_check_event mce_event[MAX_MC_EVT];
>> +	/* Queue for delayed MCE events. */
>> +	int mce_queue_count;
>> +	struct machine_check_event mce_event_queue[MAX_MC_EVT];
>> +
>> +	/* Queue for delayed MCE UE events. */
>> +	int mce_ue_count;
>> +	struct machine_check_event  mce_ue_event_queue[MAX_MC_EVT];
>> +#endif /* CONFIG_PPC_BOOK3S_64 */
>>   } ____cacheline_aligned;
> How much does this expand the paca by?

Size of paca is 4480 bytes, these add up another 2160 bytes, so expands it by 48%.


^ permalink raw reply

* Re: linux-next: build warning after merge of the akpm tree
From: Stephen Rothwell @ 2020-12-08 12:01 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Kees Cook, Mathieu Malaterre, Linux Kernel Mailing List,
	Nicholas Piggin, Linux Next Mailing List, PowerPC
In-Reply-To: <20201204210000.660293c6@canb.auug.org.au>

[-- Attachment #1: Type: text/plain, Size: 2154 bytes --]

Hi Stephen,

On Fri, 4 Dec 2020 21:00:00 +1100 Stephen Rothwell <sfr@canb.auug.org.au> wrote:
>
> Hi all,
> 
> After merging the akpm tree, today's linux-next build (powerpc
> allyesconfig) produced warnings like this:
> 
> ld: warning: orphan section `.data..Lubsan_data177' from `arch/powerpc/oprofile/op_model_pa6t.o' being placed in section `.data..Lubsan_data177'
> 
> (lots of these latter ones)

781584 of them today!

> I don't know what produced these, but it is in the akpm-current or
> akpm trees.

Presumably the result of commit

  186c3e18dba3 ("ubsan: enable for all*config builds")

from the akpm-current tree.

arch/powerpc/kernel/vmlinux.lds.S has:

#ifdef CONFIG_PPC32
        .data : AT(ADDR(.data) - LOAD_OFFSET) {
                DATA_DATA
#ifdef CONFIG_UBSAN
                *(.data..Lubsan_data*)
                *(.data..Lubsan_type*)
#endif
                *(.data.rel*)
                *(SDATA_MAIN)

added by commit

  beba24ac5913 ("powerpc/32: Add .data..Lubsan_data*/.data..Lubsan_type* sections explicitly")

in 2018, but no equivalent for 64 bit.

I will try the following patch tomorrow:

From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Tue, 8 Dec 2020 22:58:24 +1100
Subject: [PATCH] powerpc: Add .data..Lubsan_data*/.data..Lubsan_type* sections explicitly

Similarly to commit

  beba24ac5913 ("powerpc/32: Add .data..Lubsan_data*/.data..Lubsan_type* sections explicitly")

since CONFIG_UBSAN bits can now be enabled for all*config.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
---
 arch/powerpc/kernel/vmlinux.lds.S | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index 3b4c26e94328..0318ba436f34 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -296,6 +296,10 @@ SECTIONS
 #else
 	.data : AT(ADDR(.data) - LOAD_OFFSET) {
 		DATA_DATA
+#ifdef CONFIG_UBSAN
+		*(.data..Lubsan_data*)
+		*(.data..Lubsan_type*)
+#endif
 		*(.data.rel*)
 		*(.toc1)
 		*(.branch_lt)
-- 
2.29.2

-- 
Cheers,
Stephen Rothwell

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply related

* Re: [PATCH] powerpc/mce: Remove per cpu variables from MCE handlers
From: Mahesh Jagannath Salgaonkar @ 2020-12-08 12:04 UTC (permalink / raw)
  To: Ganesh, Michael Ellerman, linuxppc-dev; +Cc: npiggin
In-Reply-To: <a514db98-6090-467a-74ae-9c7b4337d0c1@linux.ibm.com>

On 12/8/20 4:16 PM, Ganesh wrote:
> 
> On 12/8/20 4:01 PM, Michael Ellerman wrote:
>> Ganesh Goudar <ganeshgr@linux.ibm.com> writes:
>>> diff --git a/arch/powerpc/include/asm/paca.h
>>> b/arch/powerpc/include/asm/paca.h
>>> index 9454d29ff4b4..4769954efa7d 100644
>>> --- a/arch/powerpc/include/asm/paca.h
>>> +++ b/arch/powerpc/include/asm/paca.h
>>> @@ -273,6 +274,17 @@ struct paca_struct {
>>>   #ifdef CONFIG_MMIOWB
>>>       struct mmiowb_state mmiowb_state;
>>>   #endif
>>> +#ifdef CONFIG_PPC_BOOK3S_64
>>> +    int mce_nest_count;
>>> +    struct machine_check_event mce_event[MAX_MC_EVT];
>>> +    /* Queue for delayed MCE events. */
>>> +    int mce_queue_count;
>>> +    struct machine_check_event mce_event_queue[MAX_MC_EVT];
>>> +
>>> +    /* Queue for delayed MCE UE events. */
>>> +    int mce_ue_count;
>>> +    struct machine_check_event  mce_ue_event_queue[MAX_MC_EVT];
>>> +#endif /* CONFIG_PPC_BOOK3S_64 */
>>>   } ____cacheline_aligned;
>> How much does this expand the paca by?
> 
> Size of paca is 4480 bytes, these add up another 2160 bytes, so expands
> it by 48%.
> 

Should we dynamically allocate the array sizes early as similar to that
of paca->mce_faulty_slbs so that we don't bump up paca size ?

Thanks,
-Mahesh.

^ permalink raw reply

* Re: [RFC][PATCH 1/2] libnvdimm: Introduce ND_CMD_GET_STAT to retrieve nvdimm statistics
From: Peter Zijlstra @ 2020-12-08 13:00 UTC (permalink / raw)
  To: Dan Williams
  Cc: Santosh Sivaraj, Ira Weiny, linux-nvdimm,
	Arnaldo Carvalho de Melo, Ingo Molnar, Aneesh Kumar K . V,
	Vaibhav Jain, linuxppc-dev
In-Reply-To: <CAPcyv4h0PAPyYoea2oxqw_mOZ-Ec-o1MwcdSN0gf5UXqZqjafQ@mail.gmail.com>

On Mon, Dec 07, 2020 at 04:54:21PM -0800, Dan Williams wrote:
> [ add perf maintainers ]
> 
> On Sun, Nov 8, 2020 at 1:16 PM Vaibhav Jain <vaibhav@linux.ibm.com> wrote:
> >
> > Implement support for exposing generic nvdimm statistics via newly
> > introduced dimm-command ND_CMD_GET_STAT that can be handled by nvdimm
> > command handler function and provide values for these statistics back
> > to libnvdimm. Following generic nvdimm statistics are defined as an
> > enumeration in 'uapi/ndctl.h':
> >
> > * "media_reads" : Number of media reads that have occurred since reboot.
> > * "media_writes" : Number of media writes that have occurred since reboot.
> > * "read_requests" : Number of read requests that have occurred since reboot.
> > * "write_requests" : Number of write requests that have occurred since reboot.
> 
> Perhaps document these as "since device reset"? As I can imagine some
> devices might have a mechanism to reset the count outside of "reboot"
> which is a bit ambiguous.
> 
> > * "total_media_reads" : Total number of media reads that have occurred.
> > * "total_media_writes" : Total number of media writes that have occurred.
> > * "total_read_requests" : Total number of read requests that have occurred.
> > * "total_write_requests" : Total number of write requests that have occurred.
> >
> > Apart from ND_CMD_GET_STAT ioctl these nvdimm statistics are also
> > exposed via sysfs '<nvdimm-device>/stats' directory for easy user-space
> > access like below:
> >
> > /sys/class/nd/ndctl0/device/nmem0/stats # tail -n +1 *
> > ==> media_reads <==
> > 252197707602
> > ==> media_writes <==
> > 20684685172
> > ==> read_requests <==
> > 658810924962
> > ==> write_requests <==
> > 404464081574
> 
> Hmm, I haven't looked but how hard would it be to plumb these to be
> perf counter-events. So someone could combine these with other perf
> counters?
> 
> > In case a specific nvdimm-statistic is not supported than nvdimm
> > command handler function can simply return an error (e.g -ENOENT) for
> > request to read that nvdimm-statistic.
> 
> Makes sense, but I expect the perf route also has a way to enumerate
> which statistics / counters are supported. I'm not opposed to also
> having them in sysfs, but I think perf support should be a first class
> citizen.

arch/x86/events/msr.c might be a good starting point for a software pmu
delivering pure counters.

^ permalink raw reply

* Re: [PATCH v3 4/5] powerpc/fault: Avoid heavy search_exception_tables() verification
From: Aneesh Kumar K.V @ 2020-12-08 13:00 UTC (permalink / raw)
  To: Christophe Leroy, Benjamin Herrenschmidt, Paul Mackerras,
	Michael Ellerman, npiggin
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <731bdee26a5a5c81cd815ed624a6fb3bdef8b4db.1607416578.git.christophe.leroy@csgroup.eu>

On 12/8/20 2:07 PM, Christophe Leroy wrote:
> search_exception_tables() is an heavy operation, we have to avoid it.
> When KUAP is selected, we'll know the fault has been blocked by KUAP.
> Otherwise, it behaves just as if the address was already in the TLBs
> and no fault was generated.
> 
> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
> Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
> ---
> v3: rebased
> v2: Squashed with the preceeding patch which was re-ordering tests that get removed in this patch.
> ---
>   arch/powerpc/mm/fault.c | 23 +++++++----------------
>   1 file changed, 7 insertions(+), 16 deletions(-)
> 
> diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
> index 3fcd34c28e10..1770b41e4730 100644
> --- a/arch/powerpc/mm/fault.c
> +++ b/arch/powerpc/mm/fault.c
> @@ -210,28 +210,19 @@ static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code,
>   		return true;
>   	}
>   
> -	if (!is_exec && address < TASK_SIZE && (error_code & (DSISR_PROTFAULT | DSISR_KEYFAULT)) &&
> -	    !search_exception_tables(regs->nip)) {
> -		pr_crit_ratelimited("Kernel attempted to access user page (%lx) - exploit attempt? (uid: %d)\n",
> -				    address,
> -				    from_kuid(&init_user_ns, current_uid()));
> -	}
> -
>   	// Kernel fault on kernel address is bad
>   	if (address >= TASK_SIZE)
>   		return true;
>   
> -	// Fault on user outside of certain regions (eg. copy_tofrom_user()) is bad
> -	if (!search_exception_tables(regs->nip))
> -		return true;
> -
> -	// Read/write fault in a valid region (the exception table search passed
> -	// above), but blocked by KUAP is bad, it can never succeed.
> -	if (bad_kuap_fault(regs, address, is_write))
> +	// Read/write fault blocked by KUAP is bad, it can never succeed.
> +	if (bad_kuap_fault(regs, address, is_write)) {
> +		pr_crit_ratelimited("Kernel attempted to %s user page (%lx) - exploit attempt? (uid: %d)\n",
> +				    is_write ? "write" : "read", address,
> +				    from_kuid(&init_user_ns, current_uid()));
>   		return true;
> +	}


Should we update bad_kuap_fault to check for !is_kernel_addr() and 
error_code & (DSISIR_PROT_FAULT | DSISIR_KEYFAULT). I am wondering 
whether we can take another fault w.r.t kernel address/user address and 
end up reporting that as KUAP fault?

>   
> -	// What's left? Kernel fault on user in well defined regions (extable
> -	// matched), and allowed by KUAP in the faulting context.
> +	// What's left? Kernel fault on user and allowed by KUAP in the faulting context.
>   	return false;
>   }
>   
> 


-aneesh

^ permalink raw reply

* Re: [PATCH] powerpc/book3s_hv_uvmem: Check for failed page migration
From: Ram Pai @ 2020-12-08 14:20 UTC (permalink / raw)
  To: Alistair Popple; +Cc: linuxppc-dev, bharata
In-Reply-To: <20201203050812.5234-1-alistair@popple.id.au>

On Thu, Dec 03, 2020 at 04:08:12PM +1100, Alistair Popple wrote:
> migrate_vma_pages() may still clear MIGRATE_PFN_MIGRATE on pages which
> are not able to be migrated. Drivers may safely copy data prior to
> calling migrate_vma_pages() however a remote mapping must not be
> established until after migrate_vma_pages() has returned as the
> migration could still fail.
> 
> UV_PAGE_IN_in both copies and maps the data page, therefore it should
> only be called after checking the results of migrate_vma_pages().
> 
> Signed-off-by: Alistair Popple <alistair@popple.id.au>
> ---
>  arch/powerpc/kvm/book3s_hv_uvmem.c | 7 ++++---
>  1 file changed, 4 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c
> index 84e5a2dc8be5..08aa6a90c525 100644
> --- a/arch/powerpc/kvm/book3s_hv_uvmem.c
> +++ b/arch/powerpc/kvm/book3s_hv_uvmem.c
> @@ -762,7 +762,10 @@ static int kvmppc_svm_page_in(struct vm_area_struct *vma,
>  		goto out_finalize;
>  	}
> 
> -	if (pagein) {
> +	*mig.dst = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED;
> +	migrate_vma_pages(&mig);
> +
> +	if ((*mig.src & MIGRATE_PFN_MIGRATE) && pagein) {
>  		pfn = *mig.src >> MIGRATE_PFN_SHIFT;
>  		spage = migrate_pfn_to_page(*mig.src);
>  		if (spage) {
> @@ -773,8 +776,6 @@ static int kvmppc_svm_page_in(struct vm_area_struct *vma,
>  		}
>  	}
> 
> -	*mig.dst = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED;
> -	migrate_vma_pages(&mig);
>  out_finalize:
>  	migrate_vma_finalize(&mig);
>  	return ret;

Though this patch did not solve the specific problem, I am running into,
my tests did not expose any regression.

Tested-by: Ram Pai <linuxram@us.ibm.com>


Reviewed-by: Ram Pai <linuxram@us.ibm.com>

^ permalink raw reply

* Re: [PATCH v3 4/5] powerpc/fault: Avoid heavy search_exception_tables() verification
From: Christophe Leroy @ 2020-12-08 14:26 UTC (permalink / raw)
  To: Aneesh Kumar K.V, Benjamin Herrenschmidt, Paul Mackerras,
	Michael Ellerman, npiggin
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <0e25f03d-9f59-b963-312c-c3ae1d7953a2@linux.ibm.com>



Le 08/12/2020 à 14:00, Aneesh Kumar K.V a écrit :
> On 12/8/20 2:07 PM, Christophe Leroy wrote:
>> search_exception_tables() is an heavy operation, we have to avoid it.
>> When KUAP is selected, we'll know the fault has been blocked by KUAP.
>> Otherwise, it behaves just as if the address was already in the TLBs
>> and no fault was generated.
>>
>> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
>> Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
>> ---
>> v3: rebased
>> v2: Squashed with the preceeding patch which was re-ordering tests that get removed in this patch.
>> ---
>>   arch/powerpc/mm/fault.c | 23 +++++++----------------
>>   1 file changed, 7 insertions(+), 16 deletions(-)
>>
>> diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
>> index 3fcd34c28e10..1770b41e4730 100644
>> --- a/arch/powerpc/mm/fault.c
>> +++ b/arch/powerpc/mm/fault.c
>> @@ -210,28 +210,19 @@ static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code,
>>           return true;
>>       }
>> -    if (!is_exec && address < TASK_SIZE && (error_code & (DSISR_PROTFAULT | DSISR_KEYFAULT)) &&
>> -        !search_exception_tables(regs->nip)) {
>> -        pr_crit_ratelimited("Kernel attempted to access user page (%lx) - exploit attempt? (uid: 
>> %d)\n",
>> -                    address,
>> -                    from_kuid(&init_user_ns, current_uid()));
>> -    }
>> -
>>       // Kernel fault on kernel address is bad
>>       if (address >= TASK_SIZE)
>>           return true;
>> -    // Fault on user outside of certain regions (eg. copy_tofrom_user()) is bad
>> -    if (!search_exception_tables(regs->nip))
>> -        return true;
>> -
>> -    // Read/write fault in a valid region (the exception table search passed
>> -    // above), but blocked by KUAP is bad, it can never succeed.
>> -    if (bad_kuap_fault(regs, address, is_write))
>> +    // Read/write fault blocked by KUAP is bad, it can never succeed.
>> +    if (bad_kuap_fault(regs, address, is_write)) {
>> +        pr_crit_ratelimited("Kernel attempted to %s user page (%lx) - exploit attempt? (uid: %d)\n",
>> +                    is_write ? "write" : "read", address,
>> +                    from_kuid(&init_user_ns, current_uid()));
>>           return true;
>> +    }
> 
> 
> Should we update bad_kuap_fault to check for !is_kernel_addr() and error_code & (DSISIR_PROT_FAULT | 
> DSISIR_KEYFAULT). I am wondering whether we can take another fault w.r.t kernel address/user address 
> and end up reporting that as KUAP fault?

Just before this we do:

	if (address >= TASK_SIZE)
		return true;

About the error code, I don't know. Can we take a fault that is not a DSISR_PROT_FAULT |
  DSISR_KEYFAULT and that is not a KUAP fault ?

Previously (before this patch), the error code was taken into account for the call to 
search_exception_tables(), but has never been for the bad_kuap_fault().

> 
>> -    // What's left? Kernel fault on user in well defined regions (extable
>> -    // matched), and allowed by KUAP in the faulting context.
>> +    // What's left? Kernel fault on user and allowed by KUAP in the faulting context.
>>       return false;
>>   }
>>
> 
> 
> -aneesh

Christophe

^ permalink raw reply

* Re: [PATCH v3 4/5] powerpc/fault: Avoid heavy search_exception_tables() verification
From: Aneesh Kumar K.V @ 2020-12-08 14:31 UTC (permalink / raw)
  To: Christophe Leroy, Benjamin Herrenschmidt, Paul Mackerras,
	Michael Ellerman, npiggin
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <d66f9706-9e36-5b92-5a87-90ebd05587e9@csgroup.eu>

Christophe Leroy <christophe.leroy@csgroup.eu> writes:

> Le 08/12/2020 à 14:00, Aneesh Kumar K.V a écrit :
>> On 12/8/20 2:07 PM, Christophe Leroy wrote:
>>> search_exception_tables() is an heavy operation, we have to avoid it.
>>> When KUAP is selected, we'll know the fault has been blocked by KUAP.
>>> Otherwise, it behaves just as if the address was already in the TLBs
>>> and no fault was generated.
>>>
>>> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
>>> Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
>>> ---
>>> v3: rebased
>>> v2: Squashed with the preceeding patch which was re-ordering tests that get removed in this patch.
>>> ---
>>>   arch/powerpc/mm/fault.c | 23 +++++++----------------
>>>   1 file changed, 7 insertions(+), 16 deletions(-)
>>>
>>> diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
>>> index 3fcd34c28e10..1770b41e4730 100644
>>> --- a/arch/powerpc/mm/fault.c
>>> +++ b/arch/powerpc/mm/fault.c
>>> @@ -210,28 +210,19 @@ static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code,
>>>           return true;
>>>       }
>>> -    if (!is_exec && address < TASK_SIZE && (error_code & (DSISR_PROTFAULT | DSISR_KEYFAULT)) &&
>>> -        !search_exception_tables(regs->nip)) {
>>> -        pr_crit_ratelimited("Kernel attempted to access user page (%lx) - exploit attempt? (uid: 
>>> %d)\n",
>>> -                    address,
>>> -                    from_kuid(&init_user_ns, current_uid()));
>>> -    }
>>> -
>>>       // Kernel fault on kernel address is bad
>>>       if (address >= TASK_SIZE)
>>>           return true;
>>> -    // Fault on user outside of certain regions (eg. copy_tofrom_user()) is bad
>>> -    if (!search_exception_tables(regs->nip))
>>> -        return true;
>>> -
>>> -    // Read/write fault in a valid region (the exception table search passed
>>> -    // above), but blocked by KUAP is bad, it can never succeed.
>>> -    if (bad_kuap_fault(regs, address, is_write))
>>> +    // Read/write fault blocked by KUAP is bad, it can never succeed.
>>> +    if (bad_kuap_fault(regs, address, is_write)) {
>>> +        pr_crit_ratelimited("Kernel attempted to %s user page (%lx) - exploit attempt? (uid: %d)\n",
>>> +                    is_write ? "write" : "read", address,
>>> +                    from_kuid(&init_user_ns, current_uid()));
>>>           return true;
>>> +    }
>> 
>> 
>> Should we update bad_kuap_fault to check for !is_kernel_addr() and error_code & (DSISIR_PROT_FAULT | 
>> DSISIR_KEYFAULT). I am wondering whether we can take another fault w.r.t kernel address/user address 
>> and end up reporting that as KUAP fault?
>
> Just before this we do:
>
> 	if (address >= TASK_SIZE)
> 		return true;
>
> About the error code, I don't know. Can we take a fault that is not a DSISR_PROT_FAULT |
>   DSISR_KEYFAULT and that is not a KUAP fault ?
>
> Previously (before this patch), the error code was taken into account for the call to 
> search_exception_tables(), but has never been for the bad_kuap_fault().
>

a KUAP fault on radix will result in PROTFAULT and on hash it will
generate KEYFAULT. ie, something like below

diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h
index 32fd4452e960..b18cd931e325 100644
--- a/arch/powerpc/include/asm/book3s/32/kup.h
+++ b/arch/powerpc/include/asm/book3s/32/kup.h
@@ -177,8 +177,8 @@ static inline void restore_user_access(unsigned long flags)
 		allow_user_access(to, to, end - addr, KUAP_READ_WRITE);
 }
 
-static inline bool
-bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
+static inline bool bad_kuap_fault(struct pt_regs *regs, unsigned long address,
+				  bool is_write, unsigned long error_code)
 {
 	unsigned long begin = regs->kuap & 0xf0000000;
 	unsigned long end = regs->kuap << 28;
diff --git a/arch/powerpc/include/asm/book3s/64/kup.h b/arch/powerpc/include/asm/book3s/64/kup.h
index 91af50e9b09a..16bb6aee9fcd 100644
--- a/arch/powerpc/include/asm/book3s/64/kup.h
+++ b/arch/powerpc/include/asm/book3s/64/kup.h
@@ -392,14 +392,24 @@ static inline void set_kuap(unsigned long value)
 }
 
 static inline bool bad_kuap_fault(struct pt_regs *regs, unsigned long address,
-				  bool is_write)
+				  bool is_write, unsigned long error_code)
 {
+	unsigned long fault;
+
 	if (!mmu_has_feature(MMU_FTR_BOOK3S_KUAP))
 		return false;
+
+	if (radix_enabled())
+		fault = DSISR_PROTFAULT;
+	else
+		fault = DSISR_KEYFAULT;
+
 	/*
 	 * For radix this will be a storage protection fault (DSISR_PROTFAULT).
 	 * For hash this will be a key fault (DSISR_KEYFAULT)
 	 */
+	if (!(error_code & fault))
+		return false;
 	/*
 	 * We do have exception table entry, but accessing the
 	 * userspace results in fault.  This could be because we
diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h
index 49fe4b4a9434..bdffa2664bf0 100644
--- a/arch/powerpc/include/asm/kup.h
+++ b/arch/powerpc/include/asm/kup.h
@@ -62,8 +62,8 @@ void setup_kuap(bool disabled);
 #else
 static inline void setup_kuap(bool disabled) { }
 
-static inline bool
-bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
+static inline bool bad_kuap_fault(struct pt_regs *regs, unsigned long address,
+				  bool is_write, unsigned long error_code)
 {
 	return false;
 }
diff --git a/arch/powerpc/include/asm/nohash/32/kup-8xx.h b/arch/powerpc/include/asm/nohash/32/kup-8xx.h
index 567cdc557402..7bdd9e5b63ed 100644
--- a/arch/powerpc/include/asm/nohash/32/kup-8xx.h
+++ b/arch/powerpc/include/asm/nohash/32/kup-8xx.h
@@ -60,8 +60,8 @@ static inline void restore_user_access(unsigned long flags)
 	mtspr(SPRN_MD_AP, flags);
 }
 
-static inline bool
-bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
+static inline bool bad_kuap_fault(struct pt_regs *regs, unsigned long address,
+				  bool is_write, unsigned long error_code)
 {
 	return WARN(!((regs->kuap ^ MD_APG_KUAP) & 0xff000000),
 		    "Bug: fault blocked by AP register !");
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index b12595102525..03c3414bdc79 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -218,7 +218,7 @@ static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code,
 	}
 
 	// Kernel fault on kernel address is bad
-	if (address >= TASK_SIZE)
+	if (is_kernel_addr(address))
 		return true;
 
 	// Fault on user outside of certain regions (eg. copy_tofrom_user()) is bad
@@ -227,7 +227,7 @@ static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code,
 
 	// Read/write fault in a valid region (the exception table search passed
 	// above), but blocked by KUAP is bad, it can never succeed.
-	if (bad_kuap_fault(regs, address, is_write))
+	if (bad_kuap_fault(regs, address, is_write, error_code))
 		return true;
 
 	// What's left? Kernel fault on user in well defined regions (extable


^ permalink raw reply related

* Re: [PATCH] arch: fix 'unexpected IRQ trap at vector' warnings
From: Helge Deller @ 2020-12-08 14:42 UTC (permalink / raw)
  To: Michael Ellerman, Enrico Weigelt, metux IT consult, linux-kernel
  Cc: linux-s390, hpa, linux-parisc, jdike, x86, linux-um,
	James.Bottomley, mingo, paulus, richard, bp, tglx, linuxppc-dev,
	anton.ivanov
In-Reply-To: <877dptt5av.fsf@mpe.ellerman.id.au>

On 12/8/20 3:11 AM, Michael Ellerman wrote:
> "Enrico Weigelt, metux IT consult" <info@metux.net> writes:
>> All archs, except Alpha, print out the irq number in hex, but the message
>> looks like it was a decimal number, which is quite confusing. Fixing this
>> by adding "0x" prefix.
>
> Arguably decimal would be better, /proc/interrupts and /proc/irq/ both
> use decimal.

I agree.

> The whole message is very dated IMO, these days the number it prints is
> (possibly) virtualised via IRQ domains, ie. it's not necessarily a
> "vector" if that even makes sense on all arches). Arguably "trap" is the
> wrong term on some arches too.
>
> So it would be better reworded entirely IMO, and also switched to
> decimal to match other sources of information on interrupts.
>
> Perhaps:
> 	"Unexpected Linux IRQ %d."

Yes.

and while cleaning it up, introducing a default weak implementation of ack_bad_irq()
which adds and increases irq_err_count for all platforms would be a nice cleanup.

Helge

> If anyone else is having deja vu like me, yes this has come up before:
>   https://lore.kernel.org/lkml/20150712220211.7166.42035.stgit@bhelgaas-glaptop2.roam.corp.google.com/
>
> cheers
>
>
>
>> diff --git a/arch/arm/include/asm/hw_irq.h b/arch/arm/include/asm/hw_irq.h
>> index cecc13214ef1..2749f19271d9 100644
>> --- a/arch/arm/include/asm/hw_irq.h
>> +++ b/arch/arm/include/asm/hw_irq.h
>> @@ -9,7 +9,7 @@ static inline void ack_bad_irq(int irq)
>>  {
>>  	extern unsigned long irq_err_count;
>>  	irq_err_count++;
>> -	pr_crit("unexpected IRQ trap at vector %02x\n", irq);
>> +	pr_crit("unexpected IRQ trap at vector 0x%02x\n", irq);
>>  }
>>
>>  #define ARCH_IRQ_INIT_FLAGS	(IRQ_NOREQUEST | IRQ_NOPROBE)
>> diff --git a/arch/parisc/include/asm/hardirq.h b/arch/parisc/include/asm/hardirq.h
>> index 7f7039516e53..c3348af88d3f 100644
>> --- a/arch/parisc/include/asm/hardirq.h
>> +++ b/arch/parisc/include/asm/hardirq.h
>> @@ -35,6 +35,6 @@ DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
>>  #define __IRQ_STAT(cpu, member) (irq_stat[cpu].member)
>>  #define inc_irq_stat(member)	this_cpu_inc(irq_stat.member)
>>  #define __inc_irq_stat(member)	__this_cpu_inc(irq_stat.member)
>> -#define ack_bad_irq(irq) WARN(1, "unexpected IRQ trap at vector %02x\n", irq)
>> +#define ack_bad_irq(irq) WARN(1, "unexpected IRQ trap at vector 0x%02x\n", irq)
>>
>>  #endif /* _PARISC_HARDIRQ_H */
>> diff --git a/arch/powerpc/include/asm/hardirq.h b/arch/powerpc/include/asm/hardirq.h
>> index f133b5930ae1..ec8cf3cf6e49 100644
>> --- a/arch/powerpc/include/asm/hardirq.h
>> +++ b/arch/powerpc/include/asm/hardirq.h
>> @@ -29,7 +29,7 @@ DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
>>
>>  static inline void ack_bad_irq(unsigned int irq)
>>  {
>> -	printk(KERN_CRIT "unexpected IRQ trap at vector %02x\n", irq);
>> +	printk(KERN_CRIT "unexpected IRQ trap at vector 0x%02x\n", irq);
>>  }
>>
>>  extern u64 arch_irq_stat_cpu(unsigned int cpu);
>> diff --git a/arch/s390/include/asm/hardirq.h b/arch/s390/include/asm/hardirq.h
>> index dfbc3c6c0674..aaaec5cdd4fe 100644
>> --- a/arch/s390/include/asm/hardirq.h
>> +++ b/arch/s390/include/asm/hardirq.h
>> @@ -23,7 +23,7 @@
>>
>>  static inline void ack_bad_irq(unsigned int irq)
>>  {
>> -	printk(KERN_CRIT "unexpected IRQ trap at vector %02x\n", irq);
>> +	printk(KERN_CRIT "unexpected IRQ trap at vector 0x%02x\n", irq);
>>  }
>>
>>  #endif /* __ASM_HARDIRQ_H */
>> diff --git a/arch/um/include/asm/hardirq.h b/arch/um/include/asm/hardirq.h
>> index b426796d26fd..2a2e6eae034b 100644
>> --- a/arch/um/include/asm/hardirq.h
>> +++ b/arch/um/include/asm/hardirq.h
>> @@ -15,7 +15,7 @@ typedef struct {
>>  #ifndef ack_bad_irq
>>  static inline void ack_bad_irq(unsigned int irq)
>>  {
>> -	printk(KERN_CRIT "unexpected IRQ trap at vector %02x\n", irq);
>> +	printk(KERN_CRIT "unexpected IRQ trap at vector 0x%02x\n", irq);
>>  }
>>  #endif
>>
>> diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
>> index c5dd50369e2f..957c716f2df7 100644
>> --- a/arch/x86/kernel/irq.c
>> +++ b/arch/x86/kernel/irq.c
>> @@ -37,7 +37,7 @@ atomic_t irq_err_count;
>>  void ack_bad_irq(unsigned int irq)
>>  {
>>  	if (printk_ratelimit())
>> -		pr_err("unexpected IRQ trap at vector %02x\n", irq);
>> +		pr_err("unexpected IRQ trap at vector 0x%02x\n", irq);
>>
>>  	/*
>>  	 * Currently unexpected vectors happen only on SMP and APIC.
>> --
>> 2.11.0


^ permalink raw reply

* Re: [PATCH v3 4/5] powerpc/fault: Avoid heavy search_exception_tables() verification
From: Aneesh Kumar K.V @ 2020-12-08 14:52 UTC (permalink / raw)
  To: Christophe Leroy, Benjamin Herrenschmidt, Paul Mackerras,
	Michael Ellerman, npiggin
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <731bdee26a5a5c81cd815ed624a6fb3bdef8b4db.1607416578.git.christophe.leroy@csgroup.eu>

Christophe Leroy <christophe.leroy@csgroup.eu> writes:

> search_exception_tables() is an heavy operation, we have to avoid it.
> When KUAP is selected, we'll know the fault has been blocked by KUAP.
> Otherwise, it behaves just as if the address was already in the TLBs
> and no fault was generated.
>
> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
> Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
> ---
> v3: rebased
> v2: Squashed with the preceeding patch which was re-ordering tests that get removed in this patch.
> ---
>  arch/powerpc/mm/fault.c | 23 +++++++----------------
>  1 file changed, 7 insertions(+), 16 deletions(-)
>
> diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
> index 3fcd34c28e10..1770b41e4730 100644
> --- a/arch/powerpc/mm/fault.c
> +++ b/arch/powerpc/mm/fault.c
> @@ -210,28 +210,19 @@ static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code,
>  		return true;
>  	}
>  
> -	if (!is_exec && address < TASK_SIZE && (error_code & (DSISR_PROTFAULT | DSISR_KEYFAULT)) &&
> -	    !search_exception_tables(regs->nip)) {
> -		pr_crit_ratelimited("Kernel attempted to access user page (%lx) - exploit attempt? (uid: %d)\n",
> -				    address,
> -				    from_kuid(&init_user_ns, current_uid()));
> -	}
> -
>  	// Kernel fault on kernel address is bad
>  	if (address >= TASK_SIZE)
>  		return true;
>  
> -	// Fault on user outside of certain regions (eg. copy_tofrom_user()) is bad
> -	if (!search_exception_tables(regs->nip))
> -		return true;
> -
> -	// Read/write fault in a valid region (the exception table search passed
> -	// above), but blocked by KUAP is bad, it can never succeed.
> -	if (bad_kuap_fault(regs, address, is_write))
> +	// Read/write fault blocked by KUAP is bad, it can never succeed.
> +	if (bad_kuap_fault(regs, address, is_write)) {
> +		pr_crit_ratelimited("Kernel attempted to %s user page (%lx) - exploit attempt? (uid: %d)\n",
> +				    is_write ? "write" : "read", address,
> +				    from_kuid(&init_user_ns, current_uid()));
>  		return true;
> +	}


With this I am wondering whether the WARN() in bad_kuap_fault() is
needed. A direct access of userspace address will trigger this, whereas
previously we used bad_kuap_fault() only to identify incorrect restore
of AMR register (ie, to identify kernel bugs). Hence a WARN() there was
useful. We loose that differentiation now?


>  
> -	// What's left? Kernel fault on user in well defined regions (extable
> -	// matched), and allowed by KUAP in the faulting context.
> +	// What's left? Kernel fault on user and allowed by KUAP in the faulting context.
>  	return false;
>  }
>  
> -- 
> 2.25.0

^ permalink raw reply

* [PATCH] drivers: usb: gadget: prefer pr_*() functions over raw printk()
From: Enrico Weigelt, metux IT consult @ 2020-12-08 14:44 UTC (permalink / raw)
  To: linux-kernel; +Cc: balbi, linux-usb, linuxppc-dev, laurent.pinchart, leoyang.li

Reduce a bit logging boilerplate by using the preferred pr_*()
macros instead of raw printk().

Signed-off-by: Enrico Weigelt, metux IT consult <info@metux.net>
---
 drivers/usb/gadget/function/uvc.h       |  2 +-
 drivers/usb/gadget/udc/atmel_usba_udc.c |  2 +-
 drivers/usb/gadget/udc/fsl_udc_core.c   |  4 +--
 drivers/usb/gadget/udc/fsl_usb2_udc.h   |  4 +--
 drivers/usb/gadget/udc/fusb300_udc.c    | 64 ++++++++++++++++-----------------
 drivers/usb/gadget/udc/goku_udc.c       |  2 +-
 drivers/usb/gadget/udc/r8a66597-udc.h   |  2 +-
 7 files changed, 40 insertions(+), 40 deletions(-)

diff --git a/drivers/usb/gadget/function/uvc.h b/drivers/usb/gadget/function/uvc.h
index 23ee25383c1f..d546eb7c348c 100644
--- a/drivers/usb/gadget/function/uvc.h
+++ b/drivers/usb/gadget/function/uvc.h
@@ -49,7 +49,7 @@ extern unsigned int uvc_gadget_trace_param;
 #define uvc_trace(flag, msg...) \
 	do { \
 		if (uvc_gadget_trace_param & flag) \
-			printk(KERN_DEBUG "uvcvideo: " msg); \
+			pr_debug("uvcvideo: " msg); \
 	} while (0)
 
 #define uvcg_dbg(f, fmt, args...) \
diff --git a/drivers/usb/gadget/udc/atmel_usba_udc.c b/drivers/usb/gadget/udc/atmel_usba_udc.c
index 2b893bceea45..4834fafb3f70 100644
--- a/drivers/usb/gadget/udc/atmel_usba_udc.c
+++ b/drivers/usb/gadget/udc/atmel_usba_udc.c
@@ -1573,7 +1573,7 @@ static void usba_control_irq(struct usba_udc *udc, struct usba_ep *ep)
 		 * generate or receive a reply right away. */
 		usba_ep_writel(ep, CLR_STA, USBA_RX_SETUP);
 
-		/* printk(KERN_DEBUG "setup: %d: %02x.%02x\n",
+		/* pr_debug("setup: %d: %02x.%02x\n",
 			ep->state, crq.crq.bRequestType,
 			crq.crq.bRequest); */
 
diff --git a/drivers/usb/gadget/udc/fsl_udc_core.c b/drivers/usb/gadget/udc/fsl_udc_core.c
index ad6ff9c4188e..cab4def04f9f 100644
--- a/drivers/usb/gadget/udc/fsl_udc_core.c
+++ b/drivers/usb/gadget/udc/fsl_udc_core.c
@@ -1474,7 +1474,7 @@ __acquires(udc->lock)
 			mdelay(10);
 			tmp = fsl_readl(&dr_regs->portsc1) | (ptc << 16);
 			fsl_writel(tmp, &dr_regs->portsc1);
-			printk(KERN_INFO "udc: switch to test mode %d.\n", ptc);
+			pr_info("udc: switch to test mode %d.\n", ptc);
 		}
 
 		return;
@@ -1952,7 +1952,7 @@ static int fsl_udc_start(struct usb_gadget *g,
 	if (!IS_ERR_OR_NULL(udc_controller->transceiver)) {
 		/* Suspend the controller until OTG enable it */
 		udc_controller->stopped = 1;
-		printk(KERN_INFO "Suspend udc for OTG auto detect\n");
+		pr_info("Suspend udc for OTG auto detect\n");
 
 		/* connect to bus through transceiver */
 		if (!IS_ERR_OR_NULL(udc_controller->transceiver)) {
diff --git a/drivers/usb/gadget/udc/fsl_usb2_udc.h b/drivers/usb/gadget/udc/fsl_usb2_udc.h
index 4ba651ae9048..b180bf14dd0c 100644
--- a/drivers/usb/gadget/udc/fsl_usb2_udc.h
+++ b/drivers/usb/gadget/udc/fsl_usb2_udc.h
@@ -509,7 +509,7 @@ struct fsl_udc {
 /*-------------------------------------------------------------------------*/
 
 #ifdef DEBUG
-#define DBG(fmt, args...) 	printk(KERN_DEBUG "[%s]  " fmt "\n", \
+#define DBG(fmt, args...) 	pr_debug("[%s]  " fmt "\n", \
 				__func__, ## args)
 #else
 #define DBG(fmt, args...)	do{}while(0)
@@ -535,7 +535,7 @@ static void dump_msg(const char *label, const u8 * buf, unsigned int length)
 			p += 3;
 		}
 		*p = 0;
-		printk(KERN_DEBUG "%6x: %s\n", start, line);
+		pr_debug("%6x: %s\n", start, line);
 		buf += num;
 		start += num;
 		length -= num;
diff --git a/drivers/usb/gadget/udc/fusb300_udc.c b/drivers/usb/gadget/udc/fusb300_udc.c
index 9af8b415f303..c4e7e4b8e46f 100644
--- a/drivers/usb/gadget/udc/fusb300_udc.c
+++ b/drivers/usb/gadget/udc/fusb300_udc.c
@@ -352,24 +352,24 @@ static void fusb300_wrcxf(struct fusb300_ep *ep,
 		for (i = length >> 2; i > 0; i--) {
 			data = *tmp | *(tmp + 1) << 8 | *(tmp + 2) << 16 |
 				*(tmp + 3) << 24;
-			printk(KERN_DEBUG "    0x%x\n", data);
+			pr_debug("    0x%x\n", data);
 			iowrite32(data, fusb300->reg + FUSB300_OFFSET_CXPORT);
 			tmp = tmp + 4;
 		}
 		switch (length % 4) {
 		case 1:
 			data = *tmp;
-			printk(KERN_DEBUG "    0x%x\n", data);
+			pr_debug("    0x%x\n", data);
 			iowrite32(data, fusb300->reg + FUSB300_OFFSET_CXPORT);
 			break;
 		case 2:
 			data = *tmp | *(tmp + 1) << 8;
-			printk(KERN_DEBUG "    0x%x\n", data);
+			pr_debug("    0x%x\n", data);
 			iowrite32(data, fusb300->reg + FUSB300_OFFSET_CXPORT);
 			break;
 		case 3:
 			data = *tmp | *(tmp + 1) << 8 | *(tmp + 2) << 16;
-			printk(KERN_DEBUG "    0x%x\n", data);
+			pr_debug("    0x%x\n", data);
 			iowrite32(data, fusb300->reg + FUSB300_OFFSET_CXPORT);
 			break;
 		default:
@@ -390,7 +390,7 @@ static void fusb300_clear_epnstall(struct fusb300 *fusb300, u8 ep)
 	u32 reg = ioread32(fusb300->reg + FUSB300_OFFSET_EPSET0(ep));
 
 	if (reg & FUSB300_EPSET0_STL) {
-		printk(KERN_DEBUG "EP%d stall... Clear!!\n", ep);
+		pr_debug("EP%d stall... Clear!!\n", ep);
 		reg |= FUSB300_EPSET0_STL_CLR;
 		iowrite32(reg, fusb300->reg + FUSB300_OFFSET_EPSET0(ep));
 	}
@@ -402,7 +402,7 @@ static void ep0_queue(struct fusb300_ep *ep, struct fusb300_request *req)
 		if (req->req.length) {
 			fusb300_wrcxf(ep, req);
 		} else
-			printk(KERN_DEBUG "%s : req->req.length = 0x%x\n",
+			pr_debug("%s : req->req.length = 0x%x\n",
 				__func__, req->req.length);
 		if ((req->req.length == req->req.actual) ||
 		    (req->req.actual < ep->ep.maxpacket))
@@ -565,7 +565,7 @@ static void fusb300_rdcxf(struct fusb300 *fusb300,
 
 	for (i = (length >> 2); i > 0; i--) {
 		data = ioread32(fusb300->reg + FUSB300_OFFSET_CXPORT);
-		printk(KERN_DEBUG "    0x%x\n", data);
+		pr_debug("    0x%x\n", data);
 		*tmp = data & 0xFF;
 		*(tmp + 1) = (data >> 8) & 0xFF;
 		*(tmp + 2) = (data >> 16) & 0xFF;
@@ -576,18 +576,18 @@ static void fusb300_rdcxf(struct fusb300 *fusb300,
 	switch (length % 4) {
 	case 1:
 		data = ioread32(fusb300->reg + FUSB300_OFFSET_CXPORT);
-		printk(KERN_DEBUG "    0x%x\n", data);
+		pr_debug("    0x%x\n", data);
 		*tmp = data & 0xFF;
 		break;
 	case 2:
 		data = ioread32(fusb300->reg + FUSB300_OFFSET_CXPORT);
-		printk(KERN_DEBUG "    0x%x\n", data);
+		pr_debug("    0x%x\n", data);
 		*tmp = data & 0xFF;
 		*(tmp + 1) = (data >> 8) & 0xFF;
 		break;
 	case 3:
 		data = ioread32(fusb300->reg + FUSB300_OFFSET_CXPORT);
-		printk(KERN_DEBUG "    0x%x\n", data);
+		pr_debug("    0x%x\n", data);
 		*tmp = data & 0xFF;
 		*(tmp + 1) = (data >> 8) & 0xFF;
 		*(tmp + 2) = (data >> 16) & 0xFF;
@@ -610,7 +610,7 @@ static void fusb300_rdfifo(struct fusb300_ep *ep,
 	req->req.actual += length;
 
 	if (req->req.actual > req->req.length)
-		printk(KERN_DEBUG "req->req.actual > req->req.length\n");
+		pr_debug("req->req.actual > req->req.length\n");
 
 	for (i = (length >> 2); i > 0; i--) {
 		data = ioread32(fusb300->reg +
@@ -649,7 +649,7 @@ static void fusb300_rdfifo(struct fusb300_ep *ep,
 		reg = ioread32(fusb300->reg + FUSB300_OFFSET_IGR1);
 		reg &= FUSB300_IGR1_SYNF0_EMPTY_INT;
 		if (i)
-			printk(KERN_INFO "sync fifo is not empty!\n");
+			pr_info("sync fifo is not empty!\n");
 		i++;
 	} while (!reg);
 }
@@ -677,7 +677,7 @@ static u8 fusb300_get_cxstall(struct fusb300 *fusb300)
 static void request_error(struct fusb300 *fusb300)
 {
 	fusb300_set_cxstall(fusb300);
-	printk(KERN_DEBUG "request error!!\n");
+	pr_debug("request error!!\n");
 }
 
 static void get_status(struct fusb300 *fusb300, struct usb_ctrlrequest *ctrl)
@@ -999,7 +999,7 @@ static void check_device_mode(struct fusb300 *fusb300)
 		fusb300->gadget.speed = USB_SPEED_UNKNOWN;
 		break;
 	}
-	printk(KERN_INFO "dev_mode = %d\n", (reg & FUSB300_GCR_DEVEN_MSK));
+	pr_info("dev_mode = %d\n", (reg & FUSB300_GCR_DEVEN_MSK));
 }
 
 
@@ -1076,14 +1076,14 @@ static irqreturn_t fusb300_irq(int irq, void *_fusb300)
 	if (int_grp1 & FUSB300_IGR1_WARM_RST_INT) {
 		fusb300_clear_int(fusb300, FUSB300_OFFSET_IGR1,
 				  FUSB300_IGR1_WARM_RST_INT);
-		printk(KERN_INFO"fusb300_warmreset\n");
+		pr_info("fusb300_warmreset\n");
 		fusb300_reset();
 	}
 
 	if (int_grp1 & FUSB300_IGR1_HOT_RST_INT) {
 		fusb300_clear_int(fusb300, FUSB300_OFFSET_IGR1,
 				  FUSB300_IGR1_HOT_RST_INT);
-		printk(KERN_INFO"fusb300_hotreset\n");
+		pr_info("fusb300_hotreset\n");
 		fusb300_reset();
 	}
 
@@ -1097,13 +1097,13 @@ static irqreturn_t fusb300_irq(int irq, void *_fusb300)
 	if (int_grp1 & FUSB300_IGR1_CX_COMABT_INT) {
 		fusb300_clear_int(fusb300, FUSB300_OFFSET_IGR1,
 				  FUSB300_IGR1_CX_COMABT_INT);
-		printk(KERN_INFO"fusb300_ep0abt\n");
+		pr_info("fusb300_ep0abt\n");
 	}
 
 	if (int_grp1 & FUSB300_IGR1_VBUS_CHG_INT) {
 		fusb300_clear_int(fusb300, FUSB300_OFFSET_IGR1,
 				  FUSB300_IGR1_VBUS_CHG_INT);
-		printk(KERN_INFO"fusb300_vbus_change\n");
+		pr_info("fusb300_vbus_change\n");
 	}
 
 	if (int_grp1 & FUSB300_IGR1_U3_EXIT_FAIL_INT) {
@@ -1134,25 +1134,25 @@ static irqreturn_t fusb300_irq(int irq, void *_fusb300)
 	if (int_grp1 & FUSB300_IGR1_U3_EXIT_INT) {
 		fusb300_clear_int(fusb300, FUSB300_OFFSET_IGR1,
 				  FUSB300_IGR1_U3_EXIT_INT);
-		printk(KERN_INFO "FUSB300_IGR1_U3_EXIT_INT\n");
+		pr_info("FUSB300_IGR1_U3_EXIT_INT\n");
 	}
 
 	if (int_grp1 & FUSB300_IGR1_U2_EXIT_INT) {
 		fusb300_clear_int(fusb300, FUSB300_OFFSET_IGR1,
 				  FUSB300_IGR1_U2_EXIT_INT);
-		printk(KERN_INFO "FUSB300_IGR1_U2_EXIT_INT\n");
+		pr_info("FUSB300_IGR1_U2_EXIT_INT\n");
 	}
 
 	if (int_grp1 & FUSB300_IGR1_U1_EXIT_INT) {
 		fusb300_clear_int(fusb300, FUSB300_OFFSET_IGR1,
 				  FUSB300_IGR1_U1_EXIT_INT);
-		printk(KERN_INFO "FUSB300_IGR1_U1_EXIT_INT\n");
+		pr_info("FUSB300_IGR1_U1_EXIT_INT\n");
 	}
 
 	if (int_grp1 & FUSB300_IGR1_U3_ENTRY_INT) {
 		fusb300_clear_int(fusb300, FUSB300_OFFSET_IGR1,
 				  FUSB300_IGR1_U3_ENTRY_INT);
-		printk(KERN_INFO "FUSB300_IGR1_U3_ENTRY_INT\n");
+		pr_info("FUSB300_IGR1_U3_ENTRY_INT\n");
 		fusb300_enable_bit(fusb300, FUSB300_OFFSET_SSCR1,
 				   FUSB300_SSCR1_GO_U3_DONE);
 	}
@@ -1160,31 +1160,31 @@ static irqreturn_t fusb300_irq(int irq, void *_fusb300)
 	if (int_grp1 & FUSB300_IGR1_U2_ENTRY_INT) {
 		fusb300_clear_int(fusb300, FUSB300_OFFSET_IGR1,
 				  FUSB300_IGR1_U2_ENTRY_INT);
-		printk(KERN_INFO "FUSB300_IGR1_U2_ENTRY_INT\n");
+		pr_info("FUSB300_IGR1_U2_ENTRY_INT\n");
 	}
 
 	if (int_grp1 & FUSB300_IGR1_U1_ENTRY_INT) {
 		fusb300_clear_int(fusb300, FUSB300_OFFSET_IGR1,
 				  FUSB300_IGR1_U1_ENTRY_INT);
-		printk(KERN_INFO "FUSB300_IGR1_U1_ENTRY_INT\n");
+		pr_info("FUSB300_IGR1_U1_ENTRY_INT\n");
 	}
 
 	if (int_grp1 & FUSB300_IGR1_RESM_INT) {
 		fusb300_clear_int(fusb300, FUSB300_OFFSET_IGR1,
 				  FUSB300_IGR1_RESM_INT);
-		printk(KERN_INFO "fusb300_resume\n");
+		pr_info("fusb300_resume\n");
 	}
 
 	if (int_grp1 & FUSB300_IGR1_SUSP_INT) {
 		fusb300_clear_int(fusb300, FUSB300_OFFSET_IGR1,
 				  FUSB300_IGR1_SUSP_INT);
-		printk(KERN_INFO "fusb300_suspend\n");
+		pr_info("fusb300_suspend\n");
 	}
 
 	if (int_grp1 & FUSB300_IGR1_HS_LPM_INT) {
 		fusb300_clear_int(fusb300, FUSB300_OFFSET_IGR1,
 				  FUSB300_IGR1_HS_LPM_INT);
-		printk(KERN_INFO "fusb300_HS_LPM_INT\n");
+		pr_info("fusb300_HS_LPM_INT\n");
 	}
 
 	if (int_grp1 & FUSB300_IGR1_DEV_MODE_CHG_INT) {
@@ -1195,11 +1195,11 @@ static irqreturn_t fusb300_irq(int irq, void *_fusb300)
 
 	if (int_grp1 & FUSB300_IGR1_CX_COMFAIL_INT) {
 		fusb300_set_cxstall(fusb300);
-		printk(KERN_INFO "fusb300_ep0fail\n");
+		pr_info("fusb300_ep0fail\n");
 	}
 
 	if (int_grp1 & FUSB300_IGR1_CX_SETUP_INT) {
-		printk(KERN_INFO "fusb300_ep0setup\n");
+		pr_info("fusb300_ep0setup\n");
 		if (setup_packet(fusb300, &ctrl)) {
 			spin_unlock(&fusb300->lock);
 			if (fusb300->driver->setup(&fusb300->gadget, &ctrl) < 0)
@@ -1209,16 +1209,16 @@ static irqreturn_t fusb300_irq(int irq, void *_fusb300)
 	}
 
 	if (int_grp1 & FUSB300_IGR1_CX_CMDEND_INT)
-		printk(KERN_INFO "fusb300_cmdend\n");
+		pr_info("fusb300_cmdend\n");
 
 
 	if (int_grp1 & FUSB300_IGR1_CX_OUT_INT) {
-		printk(KERN_INFO "fusb300_cxout\n");
+		pr_info("fusb300_cxout\n");
 		fusb300_ep0out(fusb300);
 	}
 
 	if (int_grp1 & FUSB300_IGR1_CX_IN_INT) {
-		printk(KERN_INFO "fusb300_cxin\n");
+		pr_info("fusb300_cxin\n");
 		fusb300_ep0in(fusb300);
 	}
 
diff --git a/drivers/usb/gadget/udc/goku_udc.c b/drivers/usb/gadget/udc/goku_udc.c
index 3e1267d38774..4f225552861a 100644
--- a/drivers/usb/gadget/udc/goku_udc.c
+++ b/drivers/usb/gadget/udc/goku_udc.c
@@ -1748,7 +1748,7 @@ static int goku_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	int			retval;
 
 	if (!pdev->irq) {
-		printk(KERN_ERR "Check PCI %s IRQ setup!\n", pci_name(pdev));
+		pr_err("Check PCI %s IRQ setup!\n", pci_name(pdev));
 		retval = -ENODEV;
 		goto err;
 	}
diff --git a/drivers/usb/gadget/udc/r8a66597-udc.h b/drivers/usb/gadget/udc/r8a66597-udc.h
index 9a115caba661..fa4d62c32ea1 100644
--- a/drivers/usb/gadget/udc/r8a66597-udc.h
+++ b/drivers/usb/gadget/udc/r8a66597-udc.h
@@ -247,7 +247,7 @@ static inline u16 get_xtal_from_pdata(struct r8a66597_platdata *pdata)
 		clock = XTAL48;
 		break;
 	default:
-		printk(KERN_ERR "r8a66597: platdata clock is wrong.\n");
+		pr_err("r8a66597: platdata clock is wrong.\n");
 		break;
 	}
 
-- 
2.11.0


^ permalink raw reply related

* Re: [PATCH v3 4/5] powerpc/fault: Avoid heavy search_exception_tables() verification
From: Christophe Leroy @ 2020-12-08 15:07 UTC (permalink / raw)
  To: Aneesh Kumar K.V, Benjamin Herrenschmidt, Paul Mackerras,
	Michael Ellerman, npiggin
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <87lfe8qrik.fsf@linux.ibm.com>



Le 08/12/2020 à 15:52, Aneesh Kumar K.V a écrit :
> Christophe Leroy <christophe.leroy@csgroup.eu> writes:
> 
>> search_exception_tables() is an heavy operation, we have to avoid it.
>> When KUAP is selected, we'll know the fault has been blocked by KUAP.
>> Otherwise, it behaves just as if the address was already in the TLBs
>> and no fault was generated.
>>
>> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
>> Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
>> ---
>> v3: rebased
>> v2: Squashed with the preceeding patch which was re-ordering tests that get removed in this patch.
>> ---
>>   arch/powerpc/mm/fault.c | 23 +++++++----------------
>>   1 file changed, 7 insertions(+), 16 deletions(-)
>>
>> diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
>> index 3fcd34c28e10..1770b41e4730 100644
>> --- a/arch/powerpc/mm/fault.c
>> +++ b/arch/powerpc/mm/fault.c
>> @@ -210,28 +210,19 @@ static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code,
>>   		return true;
>>   	}
>>   
>> -	if (!is_exec && address < TASK_SIZE && (error_code & (DSISR_PROTFAULT | DSISR_KEYFAULT)) &&
>> -	    !search_exception_tables(regs->nip)) {
>> -		pr_crit_ratelimited("Kernel attempted to access user page (%lx) - exploit attempt? (uid: %d)\n",
>> -				    address,
>> -				    from_kuid(&init_user_ns, current_uid()));
>> -	}
>> -
>>   	// Kernel fault on kernel address is bad
>>   	if (address >= TASK_SIZE)
>>   		return true;
>>   
>> -	// Fault on user outside of certain regions (eg. copy_tofrom_user()) is bad
>> -	if (!search_exception_tables(regs->nip))
>> -		return true;
>> -
>> -	// Read/write fault in a valid region (the exception table search passed
>> -	// above), but blocked by KUAP is bad, it can never succeed.
>> -	if (bad_kuap_fault(regs, address, is_write))
>> +	// Read/write fault blocked by KUAP is bad, it can never succeed.
>> +	if (bad_kuap_fault(regs, address, is_write)) {
>> +		pr_crit_ratelimited("Kernel attempted to %s user page (%lx) - exploit attempt? (uid: %d)\n",
>> +				    is_write ? "write" : "read", address,
>> +				    from_kuid(&init_user_ns, current_uid()));
>>   		return true;
>> +	}
> 
> 
> With this I am wondering whether the WARN() in bad_kuap_fault() is
> needed. A direct access of userspace address will trigger this, whereas
> previously we used bad_kuap_fault() only to identify incorrect restore
> of AMR register (ie, to identify kernel bugs). Hence a WARN() there was
> useful. We loose that differentiation now?

Yes, I wanted to remove the WARN(), see 
https://patchwork.ozlabs.org/project/linuxppc-dev/patch/cc9129bdda1dbc2f0a09cf45fece7d0b0e690784.1605541983.git.christophe.leroy@csgroup.eu/
but I understood from Michael that maybe it was not a good idea, so I left it aside for now when 
rebasing to v3.

Yes previously we were able to differentiate between a direct access of userspace and a valid access 
triggering a KUAP fault, but at the cost of the heavy search_exception_tables().
The issue was reported by Nick through https://github.com/linuxppc/issues/issues/317

Should be perform the search_exception_tables() once we have hit the KUAP fault and WARN() only in 
that case ?

I was wondering also if we should keep the WARN() only when CONFIG_PPC_KUAP_DEBUG is set ?

> 
> 
>>   
>> -	// What's left? Kernel fault on user in well defined regions (extable
>> -	// matched), and allowed by KUAP in the faulting context.
>> +	// What's left? Kernel fault on user and allowed by KUAP in the faulting context.
>>   	return false;
>>   }
>>   
>> -- 
>> 2.25.0

^ permalink raw reply

* [PATCH 02/13] powerpc/xive: Rename XIVE_IRQ_NO_EOI to show its a flag
From: Cédric Le Goater @ 2020-12-08 15:11 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Cédric Le Goater
In-Reply-To: <20201208151124.1329942-1-clg@kaod.org>

This is a simple cleanup to identify easily all flags of the XIVE
interrupt structure. The interrupts flagged with XIVE_IRQ_FLAG_NO_EOI
are the escalations used to wake up vCPUs in KVM. They are handled
very differently from the rest.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
---
 arch/powerpc/include/asm/xive.h   | 2 +-
 arch/powerpc/kvm/book3s_xive.c    | 4 ++--
 arch/powerpc/sysdev/xive/common.c | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h
index 309b4d65b74f..d332dd9a18de 100644
--- a/arch/powerpc/include/asm/xive.h
+++ b/arch/powerpc/include/asm/xive.h
@@ -66,7 +66,7 @@ struct xive_irq_data {
 #define XIVE_IRQ_FLAG_H_INT_ESB	0x20
 
 /* Special flag set by KVM for excalation interrupts */
-#define XIVE_IRQ_NO_EOI		0x80
+#define XIVE_IRQ_FLAG_NO_EOI	0x80
 
 #define XIVE_INVALID_CHIP_ID	-1
 
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index 18a6b75a3bfd..fae1c2e8da29 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -219,7 +219,7 @@ int kvmppc_xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio,
 	/* In single escalation mode, we grab the ESB MMIO of the
 	 * interrupt and mask it. Also populate the VCPU v/raddr
 	 * of the ESB page for use by asm entry/exit code. Finally
-	 * set the XIVE_IRQ_NO_EOI flag which will prevent the
+	 * set the XIVE_IRQ_FLAG_NO_EOI flag which will prevent the
 	 * core code from performing an EOI on the escalation
 	 * interrupt, thus leaving it effectively masked after
 	 * it fires once.
@@ -231,7 +231,7 @@ int kvmppc_xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio,
 		xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_01);
 		vcpu->arch.xive_esc_raddr = xd->eoi_page;
 		vcpu->arch.xive_esc_vaddr = (__force u64)xd->eoi_mmio;
-		xd->flags |= XIVE_IRQ_NO_EOI;
+		xd->flags |= XIVE_IRQ_FLAG_NO_EOI;
 	}
 
 	return 0;
diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
index a80440af491a..65af34ac1fa2 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -416,7 +416,7 @@ static void xive_irq_eoi(struct irq_data *d)
 	 * been passed-through to a KVM guest
 	 */
 	if (!irqd_irq_disabled(d) && !irqd_is_forwarded_to_vcpu(d) &&
-	    !(xd->flags & XIVE_IRQ_NO_EOI))
+	    !(xd->flags & XIVE_IRQ_FLAG_NO_EOI))
 		xive_do_source_eoi(irqd_to_hwirq(d), xd);
 	else
 		xd->stale_p = true;
-- 
2.26.2


^ permalink raw reply related

* [PATCH 04/13] powerpc/xive: Use cpu_to_node() instead of ibm, chip-id property
From: Cédric Le Goater @ 2020-12-08 15:11 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Cédric Le Goater
In-Reply-To: <20201208151124.1329942-1-clg@kaod.org>

The 'chip_id' field of the XIVE CPU structure is used to choose a
target for a source located on the same chip when possible. This field
is assigned on the PowerNV platform using the "ibm,chip-id" property
on pSeries under KVM when NUMA nodes are defined but it is undefined
under PowerVM. The XIVE source structure has a similar field
'src_chip' which is only assigned on the PowerNV platform.

cpu_to_node() returns a compatible value on all platforms, 0 being the
default node. It will also give us the opportunity to set the affinity
of a source on pSeries when we can localize them.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
---
 arch/powerpc/sysdev/xive/common.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
index ee375daf8114..605238ca65e4 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -1342,16 +1342,11 @@ static int xive_prepare_cpu(unsigned int cpu)

 	xc = per_cpu(xive_cpu, cpu);
 	if (!xc) {
-		struct device_node *np;
-
 		xc = kzalloc_node(sizeof(struct xive_cpu),
 				  GFP_KERNEL, cpu_to_node(cpu));
 		if (!xc)
 			return -ENOMEM;
-		np = of_get_cpu_node(cpu, NULL);
-		if (np)
-			xc->chip_id = of_get_ibm_chip_id(np);
-		of_node_put(np);
+		xc->chip_id = cpu_to_node(cpu);
 		xc->hw_ipi = XIVE_BAD_IRQ;

 		per_cpu(xive_cpu, cpu) = xc;
-- 
2.26.2

^ permalink raw reply related

* [PATCH 08/13] powerpc: Increase NR_IRQS range to support more KVM guests
From: Cédric Le Goater @ 2020-12-08 15:11 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Cédric Le Goater
In-Reply-To: <20201208151124.1329942-1-clg@kaod.org>

PowerNV systems can handle up to 4K guests and 1M interrupt numbers
per chip. Increase the range of allowed interrupts to support a larger
number of guests.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
---
 arch/powerpc/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 5181872f9452..c250fbd430d1 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -66,7 +66,7 @@ config NEED_PER_CPU_PAGE_FIRST_CHUNK
 
 config NR_IRQS
 	int "Number of virtual interrupt numbers"
-	range 32 32768
+	range 32 1048576
 	default "512"
 	help
 	  This defines the number of virtual interrupt numbers the kernel
-- 
2.26.2


^ permalink raw reply related

* [PATCH 00/13] powerpc/xive: misc cleanups
From: Cédric Le Goater @ 2020-12-08 15:11 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Cédric Le Goater

Hello,

The most important change is the removal of support of OPAL flags
required for P9 DD1. It provides a good cleanup of some complex
routines.

The series also includes a change on how the pages donated to the XIVE
IC are allocated in Linux. The flags are changed to make sure that
these pages can not be reclaimed.

Thanks,

C.

Cédric Le Goater (13):
  KVM: PPC: Book3S HV: XIVE: Show detailed configuration in debug output
  powerpc/xive: Rename XIVE_IRQ_NO_EOI to show its a flag
  powerpc/xive: Introduce XIVE_IPI_HW_IRQ
  powerpc/xive: Use cpu_to_node() instead of ibm,chip-id property
  powerpc/xive: Fix allocation of pages donated to the XIVE controller
  powerpc/xive: Add a name to the IRQ domain
  powerpc/xive: Add a debug_show handler to the XIVE irq_domain
  powerpc: Increase NR_IRQS range to support more KVM guests
  powerpc/xive: Remove P9 DD1 flag XIVE_IRQ_FLAG_SHIFT_BUG
  powerpc/xive: Remove P9 DD1 flag XIVE_IRQ_FLAG_MASK_FW
  powerpc/xive: Remove P9 DD1 flag XIVE_IRQ_FLAG_EOI_FW
  powerpc/xive: Simplify xive_do_source_eoi()
  powerpc/xive: Improve error reporting of OPAL calls

 arch/powerpc/include/asm/opal-api.h      |   6 +-
 arch/powerpc/include/asm/xive.h          |   8 +-
 arch/powerpc/kvm/book3s_xive.h           |   2 +
 arch/powerpc/sysdev/xive/xive-internal.h |   9 +-
 arch/powerpc/kvm/book3s_xive.c           | 134 +++++++-------
 arch/powerpc/kvm/book3s_xive_native.c    |  24 ++-
 arch/powerpc/kvm/book3s_xive_template.c  |   5 -
 arch/powerpc/sysdev/xive/common.c        | 219 +++++++++++------------
 arch/powerpc/sysdev/xive/native.c        |  48 ++---
 arch/powerpc/sysdev/xive/spapr.c         |   8 +-
 arch/powerpc/Kconfig                     |   2 +-
 11 files changed, 230 insertions(+), 235 deletions(-)

-- 
2.26.2


^ permalink raw reply

* [PATCH 09/13] powerpc/xive: Remove P9 DD1 flag XIVE_IRQ_FLAG_SHIFT_BUG
From: Cédric Le Goater @ 2020-12-08 15:11 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Cédric Le Goater
In-Reply-To: <20201208151124.1329942-1-clg@kaod.org>

This flag was used to support the PHB4 LSIs on P9 DD1 and we have
stopped supporting this CPU when DD2 came out. See skiboot commit:

  https://github.com/open-power/skiboot/commit/0b0d15e3c170

Signed-off-by: Cédric Le Goater <clg@kaod.org>
---
 arch/powerpc/include/asm/opal-api.h     | 2 +-
 arch/powerpc/include/asm/xive.h         | 2 +-
 arch/powerpc/kvm/book3s_xive_native.c   | 3 ---
 arch/powerpc/kvm/book3s_xive_template.c | 3 ---
 arch/powerpc/sysdev/xive/common.c       | 8 --------
 arch/powerpc/sysdev/xive/native.c       | 2 --
 6 files changed, 2 insertions(+), 18 deletions(-)

diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h
index 1dffa3cb16ba..48ee604ca39a 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -1091,7 +1091,7 @@ enum {
 	OPAL_XIVE_IRQ_TRIGGER_PAGE	= 0x00000001,
 	OPAL_XIVE_IRQ_STORE_EOI		= 0x00000002,
 	OPAL_XIVE_IRQ_LSI		= 0x00000004,
-	OPAL_XIVE_IRQ_SHIFT_BUG		= 0x00000008,
+	OPAL_XIVE_IRQ_SHIFT_BUG		= 0x00000008, /* P9 DD1.0 workaround */
 	OPAL_XIVE_IRQ_MASK_VIA_FW	= 0x00000010,
 	OPAL_XIVE_IRQ_EOI_VIA_FW	= 0x00000020,
 };
diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h
index d332dd9a18de..ff805885a028 100644
--- a/arch/powerpc/include/asm/xive.h
+++ b/arch/powerpc/include/asm/xive.h
@@ -60,7 +60,7 @@ struct xive_irq_data {
 };
 #define XIVE_IRQ_FLAG_STORE_EOI	0x01
 #define XIVE_IRQ_FLAG_LSI	0x02
-#define XIVE_IRQ_FLAG_SHIFT_BUG	0x04
+#define XIVE_IRQ_FLAG_SHIFT_BUG	0x04 /* P9 DD1.0 workaround */
 #define XIVE_IRQ_FLAG_MASK_FW	0x08
 #define XIVE_IRQ_FLAG_EOI_FW	0x10
 #define XIVE_IRQ_FLAG_H_INT_ESB	0x20
diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c
index 9b395381179d..170d1d04e1d1 100644
--- a/arch/powerpc/kvm/book3s_xive_native.c
+++ b/arch/powerpc/kvm/book3s_xive_native.c
@@ -37,9 +37,6 @@ static u8 xive_vm_esb_load(struct xive_irq_data *xd, u32 offset)
 	 * ordering.
 	 */
 
-	if (xd->flags & XIVE_IRQ_FLAG_SHIFT_BUG)
-		offset |= offset << 4;
-
 	val = in_be64(xd->eoi_mmio + offset);
 	return (u8)val;
 }
diff --git a/arch/powerpc/kvm/book3s_xive_template.c b/arch/powerpc/kvm/book3s_xive_template.c
index 4ad3c0279458..ece36e024a8f 100644
--- a/arch/powerpc/kvm/book3s_xive_template.c
+++ b/arch/powerpc/kvm/book3s_xive_template.c
@@ -61,9 +61,6 @@ static u8 GLUE(X_PFX,esb_load)(struct xive_irq_data *xd, u32 offset)
 	if (offset == XIVE_ESB_SET_PQ_10 && xd->flags & XIVE_IRQ_FLAG_STORE_EOI)
 		offset |= XIVE_ESB_LD_ST_MO;
 
-	if (xd->flags & XIVE_IRQ_FLAG_SHIFT_BUG)
-		offset |= offset << 4;
-
 	val =__x_readq(__x_eoi_page(xd) + offset);
 #ifdef __LITTLE_ENDIAN__
 	val >>= 64-8;
diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
index 411cba12d73b..a9259470bf9f 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -200,10 +200,6 @@ static notrace u8 xive_esb_read(struct xive_irq_data *xd, u32 offset)
 	if (offset == XIVE_ESB_SET_PQ_10 && xd->flags & XIVE_IRQ_FLAG_STORE_EOI)
 		offset |= XIVE_ESB_LD_ST_MO;
 
-	/* Handle HW errata */
-	if (xd->flags & XIVE_IRQ_FLAG_SHIFT_BUG)
-		offset |= offset << 4;
-
 	if ((xd->flags & XIVE_IRQ_FLAG_H_INT_ESB) && xive_ops->esb_rw)
 		val = xive_ops->esb_rw(xd->hw_irq, offset, 0, 0);
 	else
@@ -214,10 +210,6 @@ static notrace u8 xive_esb_read(struct xive_irq_data *xd, u32 offset)
 
 static void xive_esb_write(struct xive_irq_data *xd, u32 offset, u64 data)
 {
-	/* Handle HW errata */
-	if (xd->flags & XIVE_IRQ_FLAG_SHIFT_BUG)
-		offset |= offset << 4;
-
 	if ((xd->flags & XIVE_IRQ_FLAG_H_INT_ESB) && xive_ops->esb_rw)
 		xive_ops->esb_rw(xd->hw_irq, offset, data, 1);
 	else
diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c
index 5f1e5aed8ab4..0310783241b5 100644
--- a/arch/powerpc/sysdev/xive/native.c
+++ b/arch/powerpc/sysdev/xive/native.c
@@ -64,8 +64,6 @@ int xive_native_populate_irq_data(u32 hw_irq, struct xive_irq_data *data)
 		data->flags |= XIVE_IRQ_FLAG_STORE_EOI;
 	if (opal_flags & OPAL_XIVE_IRQ_LSI)
 		data->flags |= XIVE_IRQ_FLAG_LSI;
-	if (opal_flags & OPAL_XIVE_IRQ_SHIFT_BUG)
-		data->flags |= XIVE_IRQ_FLAG_SHIFT_BUG;
 	if (opal_flags & OPAL_XIVE_IRQ_MASK_VIA_FW)
 		data->flags |= XIVE_IRQ_FLAG_MASK_FW;
 	if (opal_flags & OPAL_XIVE_IRQ_EOI_VIA_FW)
-- 
2.26.2


^ permalink raw reply related

* [PATCH 12/13] powerpc/xive: Simplify xive_do_source_eoi()
From: Cédric Le Goater @ 2020-12-08 15:11 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Cédric Le Goater
In-Reply-To: <20201208151124.1329942-1-clg@kaod.org>

Previous patches removed the need of the first argument which was a
hack for Firwmware EOI. Remove it and flatten the routine which has
became simpler.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
---
 arch/powerpc/sysdev/xive/common.c | 72 ++++++++++++++-----------------
 1 file changed, 33 insertions(+), 39 deletions(-)

diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
index fe6229dd3241..fb438203d5ee 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -348,39 +348,40 @@ static void xive_do_queue_eoi(struct xive_cpu *xc)
  * EOI an interrupt at the source. There are several methods
  * to do this depending on the HW version and source type
  */
-static void xive_do_source_eoi(u32 hw_irq, struct xive_irq_data *xd)
+static void xive_do_source_eoi(struct xive_irq_data *xd)
 {
+	u8 eoi_val;
+
 	xd->stale_p = false;
+
 	/* If the XIVE supports the new "store EOI facility, use it */
-	if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI)
+	if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI) {
 		xive_esb_write(xd, XIVE_ESB_STORE_EOI, 0);
-	else {
-		u8 eoi_val;
+		return;
+	}
 
-		/*
-		 * Otherwise for EOI, we use the special MMIO that does
-		 * a clear of both P and Q and returns the old Q,
-		 * except for LSIs where we use the "EOI cycle" special
-		 * load.
-		 *
-		 * This allows us to then do a re-trigger if Q was set
-		 * rather than synthesizing an interrupt in software
-		 *
-		 * For LSIs the HW EOI cycle is used rather than PQ bits,
-		 * as they are automatically re-triggred in HW when still
-		 * pending.
-		 */
-		if (xd->flags & XIVE_IRQ_FLAG_LSI)
-			xive_esb_read(xd, XIVE_ESB_LOAD_EOI);
-		else {
-			eoi_val = xive_esb_read(xd, XIVE_ESB_SET_PQ_00);
-			DBG_VERBOSE("eoi_val=%x\n", eoi_val);
-
-			/* Re-trigger if needed */
-			if ((eoi_val & XIVE_ESB_VAL_Q) && xd->trig_mmio)
-				out_be64(xd->trig_mmio, 0);
-		}
+	/*
+	 * For LSIs, we use the "EOI cycle" special load rather than
+	 * PQ bits, as they are automatically re-triggered in HW when
+	 * still pending.
+	 */
+	if (xd->flags & XIVE_IRQ_FLAG_LSI) {
+		xive_esb_read(xd, XIVE_ESB_LOAD_EOI);
+		return;
 	}
+
+	/*
+	 * Otherwise, we use the special MMIO that does a clear of
+	 * both P and Q and returns the old Q. This allows us to then
+	 * do a re-trigger if Q was set rather than synthesizing an
+	 * interrupt in software
+	 */
+	eoi_val = xive_esb_read(xd, XIVE_ESB_SET_PQ_00);
+	DBG_VERBOSE("eoi_val=%x\n", eoi_val);
+
+	/* Re-trigger if needed */
+	if ((eoi_val & XIVE_ESB_VAL_Q) && xd->trig_mmio)
+		out_be64(xd->trig_mmio, 0);
 }
 
 /* irq_chip eoi callback, called with irq descriptor lock held */
@@ -398,7 +399,7 @@ static void xive_irq_eoi(struct irq_data *d)
 	 */
 	if (!irqd_irq_disabled(d) && !irqd_is_forwarded_to_vcpu(d) &&
 	    !(xd->flags & XIVE_IRQ_FLAG_NO_EOI))
-		xive_do_source_eoi(irqd_to_hwirq(d), xd);
+		xive_do_source_eoi(xd);
 	else
 		xd->stale_p = true;
 
@@ -788,14 +789,7 @@ static int xive_irq_retrigger(struct irq_data *d)
 	 * 11, then perform an EOI.
 	 */
 	xive_esb_read(xd, XIVE_ESB_SET_PQ_11);
-
-	/*
-	 * Note: We pass "0" to the hw_irq argument in order to
-	 * avoid calling into the backend EOI code which we don't
-	 * want to do in the case of a re-trigger. Backends typically
-	 * only do EOI for LSIs anyway.
-	 */
-	xive_do_source_eoi(0, xd);
+	xive_do_source_eoi(xd);
 
 	return 1;
 }
@@ -910,7 +904,7 @@ static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state)
 		 * while masked, the generic code will re-mask it anyway.
 		 */
 		if (!xd->saved_p)
-			xive_do_source_eoi(hw_irq, xd);
+			xive_do_source_eoi(xd);
 
 	}
 	return 0;
@@ -1054,7 +1048,7 @@ static void xive_ipi_eoi(struct irq_data *d)
 	DBG_VERBOSE("IPI eoi: irq=%d [0x%lx] (HW IRQ 0x%x) pending=%02x\n",
 		    d->irq, irqd_to_hwirq(d), xc->hw_ipi, xc->pending_prio);
 
-	xive_do_source_eoi(xc->hw_ipi, &xc->ipi_data);
+	xive_do_source_eoi(&xc->ipi_data);
 	xive_do_queue_eoi(xc);
 }
 
@@ -1443,7 +1437,7 @@ static void xive_flush_cpu_queue(unsigned int cpu, struct xive_cpu *xc)
 		 * still asserted. Otherwise do an MSI retrigger.
 		 */
 		if (xd->flags & XIVE_IRQ_FLAG_LSI)
-			xive_do_source_eoi(irqd_to_hwirq(d), xd);
+			xive_do_source_eoi(xd);
 		else
 			xive_irq_retrigger(d);
 
-- 
2.26.2


^ permalink raw reply related

* [PATCH 11/13] powerpc/xive: Remove P9 DD1 flag XIVE_IRQ_FLAG_EOI_FW
From: Cédric Le Goater @ 2020-12-08 15:11 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Cédric Le Goater
In-Reply-To: <20201208151124.1329942-1-clg@kaod.org>

This flag was used to support the P9 DD1 and we have stopped
supporting this CPU when DD2 came out. See skiboot commit:

  https://github.com/open-power/skiboot/commit/0b0d15e3c170

Also, remove eoi handler which is now unused.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
---
 arch/powerpc/include/asm/opal-api.h      |  2 +-
 arch/powerpc/include/asm/xive.h          |  2 +-
 arch/powerpc/sysdev/xive/xive-internal.h |  1 -
 arch/powerpc/kvm/book3s_xive_template.c  |  2 --
 arch/powerpc/sysdev/xive/common.c        | 13 +------------
 arch/powerpc/sysdev/xive/native.c        | 12 ------------
 arch/powerpc/sysdev/xive/spapr.c         |  6 ------
 7 files changed, 3 insertions(+), 35 deletions(-)

diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h
index 0455b679c050..0b63ba7d5917 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -1093,7 +1093,7 @@ enum {
 	OPAL_XIVE_IRQ_LSI		= 0x00000004,
 	OPAL_XIVE_IRQ_SHIFT_BUG		= 0x00000008, /* P9 DD1.0 workaround */
 	OPAL_XIVE_IRQ_MASK_VIA_FW	= 0x00000010, /* P9 DD1.0 workaround */
-	OPAL_XIVE_IRQ_EOI_VIA_FW	= 0x00000020,
+	OPAL_XIVE_IRQ_EOI_VIA_FW	= 0x00000020, /* P9 DD1.0 workaround */
 };
 
 /* Flags for OPAL_XIVE_GET/SET_QUEUE_INFO */
diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h
index d62368d0ba91..f6150d7a757a 100644
--- a/arch/powerpc/include/asm/xive.h
+++ b/arch/powerpc/include/asm/xive.h
@@ -62,7 +62,7 @@ struct xive_irq_data {
 #define XIVE_IRQ_FLAG_LSI	0x02
 #define XIVE_IRQ_FLAG_SHIFT_BUG	0x04 /* P9 DD1.0 workaround */
 #define XIVE_IRQ_FLAG_MASK_FW	0x08 /* P9 DD1.0 workaround */
-#define XIVE_IRQ_FLAG_EOI_FW	0x10
+#define XIVE_IRQ_FLAG_EOI_FW	0x10 /* P9 DD1.0 workaround */
 #define XIVE_IRQ_FLAG_H_INT_ESB	0x20
 
 /* Special flag set by KVM for excalation interrupts */
diff --git a/arch/powerpc/sysdev/xive/xive-internal.h b/arch/powerpc/sysdev/xive/xive-internal.h
index 066d6fe3dc1d..3b7dd2cba9db 100644
--- a/arch/powerpc/sysdev/xive/xive-internal.h
+++ b/arch/powerpc/sysdev/xive/xive-internal.h
@@ -52,7 +52,6 @@ struct xive_ops {
 	void	(*shutdown)(void);
 
 	void	(*update_pending)(struct xive_cpu *xc);
-	void	(*eoi)(u32 hw_irq);
 	void	(*sync_source)(u32 hw_irq);
 	u64	(*esb_rw)(u32 hw_irq, u32 offset, u64 data, bool write);
 #ifdef CONFIG_SMP
diff --git a/arch/powerpc/kvm/book3s_xive_template.c b/arch/powerpc/kvm/book3s_xive_template.c
index ece36e024a8f..b0015e05d99a 100644
--- a/arch/powerpc/kvm/book3s_xive_template.c
+++ b/arch/powerpc/kvm/book3s_xive_template.c
@@ -74,8 +74,6 @@ static void GLUE(X_PFX,source_eoi)(u32 hw_irq, struct xive_irq_data *xd)
 	/* If the XIVE supports the new "store EOI facility, use it */
 	if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI)
 		__x_writeq(0, __x_eoi_page(xd) + XIVE_ESB_STORE_EOI);
-	else if (hw_irq && xd->flags & XIVE_IRQ_FLAG_EOI_FW)
-		opal_int_eoi(hw_irq);
 	else if (xd->flags & XIVE_IRQ_FLAG_LSI) {
 		/*
 		 * For LSIs the HW EOI cycle is used rather than PQ bits,
diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
index a71412fefb65..fe6229dd3241 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -354,18 +354,7 @@ static void xive_do_source_eoi(u32 hw_irq, struct xive_irq_data *xd)
 	/* If the XIVE supports the new "store EOI facility, use it */
 	if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI)
 		xive_esb_write(xd, XIVE_ESB_STORE_EOI, 0);
-	else if (hw_irq && xd->flags & XIVE_IRQ_FLAG_EOI_FW) {
-		/*
-		 * The FW told us to call it. This happens for some
-		 * interrupt sources that need additional HW whacking
-		 * beyond the ESB manipulation. For example LPC interrupts
-		 * on P9 DD1.0 needed a latch to be clared in the LPC bridge
-		 * itself. The Firmware will take care of it.
-		 */
-		if (WARN_ON_ONCE(!xive_ops->eoi))
-			return;
-		xive_ops->eoi(hw_irq);
-	} else {
+	else {
 		u8 eoi_val;
 
 		/*
diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c
index deb97ad25d62..4902d05ebbd1 100644
--- a/arch/powerpc/sysdev/xive/native.c
+++ b/arch/powerpc/sysdev/xive/native.c
@@ -64,8 +64,6 @@ int xive_native_populate_irq_data(u32 hw_irq, struct xive_irq_data *data)
 		data->flags |= XIVE_IRQ_FLAG_STORE_EOI;
 	if (opal_flags & OPAL_XIVE_IRQ_LSI)
 		data->flags |= XIVE_IRQ_FLAG_LSI;
-	if (opal_flags & OPAL_XIVE_IRQ_EOI_VIA_FW)
-		data->flags |= XIVE_IRQ_FLAG_EOI_FW;
 	data->eoi_page = be64_to_cpu(eoi_page);
 	data->trig_page = be64_to_cpu(trig_page);
 	data->esb_shift = be32_to_cpu(esb_shift);
@@ -380,15 +378,6 @@ static void xive_native_update_pending(struct xive_cpu *xc)
 	}
 }
 
-static void xive_native_eoi(u32 hw_irq)
-{
-	/*
-	 * Not normally used except if specific interrupts need
-	 * a workaround on EOI.
-	 */
-	opal_int_eoi(hw_irq);
-}
-
 static void xive_native_setup_cpu(unsigned int cpu, struct xive_cpu *xc)
 {
 	s64 rc;
@@ -471,7 +460,6 @@ static const struct xive_ops xive_native_ops = {
 	.match			= xive_native_match,
 	.shutdown		= xive_native_shutdown,
 	.update_pending		= xive_native_update_pending,
-	.eoi			= xive_native_eoi,
 	.setup_cpu		= xive_native_setup_cpu,
 	.teardown_cpu		= xive_native_teardown_cpu,
 	.sync_source		= xive_native_sync_source,
diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c
index 6610e5149d5a..01ccc0786ada 100644
--- a/arch/powerpc/sysdev/xive/spapr.c
+++ b/arch/powerpc/sysdev/xive/spapr.c
@@ -628,11 +628,6 @@ static void xive_spapr_update_pending(struct xive_cpu *xc)
 	}
 }
 
-static void xive_spapr_eoi(u32 hw_irq)
-{
-	/* Not used */;
-}
-
 static void xive_spapr_setup_cpu(unsigned int cpu, struct xive_cpu *xc)
 {
 	/* Only some debug on the TIMA settings */
@@ -677,7 +672,6 @@ static const struct xive_ops xive_spapr_ops = {
 	.match			= xive_spapr_match,
 	.shutdown		= xive_spapr_shutdown,
 	.update_pending		= xive_spapr_update_pending,
-	.eoi			= xive_spapr_eoi,
 	.setup_cpu		= xive_spapr_setup_cpu,
 	.teardown_cpu		= xive_spapr_teardown_cpu,
 	.sync_source		= xive_spapr_sync_source,
-- 
2.26.2


^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox