* Re: [PATCH v13 15/24] selftests/vm: powerpc implementation for generic abstraction
From: Dave Hansen @ 2018-06-20 15:06 UTC (permalink / raw)
To: Ram Pai, shuahkh, linux-kselftest
Cc: mpe, linuxppc-dev, linux-mm, x86, linux-arch, mingo, mhocko,
bauerman, fweimer, msuchanek, aneesh.kumar
In-Reply-To: <1528937115-10132-16-git-send-email-linuxram@us.ibm.com>
> +static inline u32 *siginfo_get_pkey_ptr(siginfo_t *si)
> +{
> +#ifdef si_pkey
> + return &si->si_pkey;
> +#else
> + return (u32 *)(((u8 *)si) + si_pkey_offset);
> +#endif
> }
FWIW, this isn't ppc-specific.
> diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c
> index f43a319..88dfa40 100644
> --- a/tools/testing/selftests/vm/protection_keys.c
> +++ b/tools/testing/selftests/vm/protection_keys.c
> @@ -197,17 +197,18 @@ void dump_mem(void *dumpme, int len_bytes)
>
> int pkey_faults;
> int last_si_pkey = -1;
> +void pkey_access_allow(int pkey);
> void signal_handler(int signum, siginfo_t *si, void *vucontext)
> {
> ucontext_t *uctxt = vucontext;
> int trapno;
> unsigned long ip;
> char *fpregs;
> +#if defined(__i386__) || defined(__x86_64__) /* arch */
> pkey_reg_t *pkey_reg_ptr;
> - u64 siginfo_pkey;
> +#endif /* defined(__i386__) || defined(__x86_64__) */
> + u32 siginfo_pkey;
> u32 *si_pkey_ptr;
> - int pkey_reg_offset;
> - fpregset_t fpregset;
>
> dprint_in_signal = 1;
> dprintf1(">>>>===============SIGSEGV============================\n");
> @@ -217,12 +218,14 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
>
> trapno = uctxt->uc_mcontext.gregs[REG_TRAPNO];
> ip = uctxt->uc_mcontext.gregs[REG_IP_IDX];
> - fpregset = uctxt->uc_mcontext.fpregs;
> - fpregs = (void *)fpregset;
> + fpregs = (char *) uctxt->uc_mcontext.fpregs;
>
> dprintf2("%s() trapno: %d ip: 0x%016lx info->si_code: %s/%d\n",
> __func__, trapno, ip, si_code_str(si->si_code),
> si->si_code);
> +
> +#if defined(__i386__) || defined(__x86_64__) /* arch */
> +
> #ifdef __i386__
> /*
> * 32-bit has some extra padding so that userspace can tell whether
> @@ -230,20 +233,28 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
> * state. We just assume that it is here.
> */
> fpregs += 0x70;
> -#endif
> - pkey_reg_offset = pkey_reg_xstate_offset();
> - pkey_reg_ptr = (void *)(&fpregs[pkey_reg_offset]);
> +#endif /* __i386__ */
>
> - dprintf1("siginfo: %p\n", si);
> - dprintf1(" fpregs: %p\n", fpregs);
> + pkey_reg_ptr = (void *)(&fpregs[pkey_reg_xstate_offset()]);
> /*
> - * If we got a PKEY fault, we *HAVE* to have at least one bit set in
> + * If we got a key fault, we *HAVE* to have at least one bit set in
> * here.
> */
> dprintf1("pkey_reg_xstate_offset: %d\n", pkey_reg_xstate_offset());
> if (DEBUG_LEVEL > 4)
> dump_mem(pkey_reg_ptr - 128, 256);
> pkey_assert(*pkey_reg_ptr);
> +#endif /* defined(__i386__) || defined(__x86_64__) */
The series up to this point has been looking pretty nice and broken out
and easy to read. It goes off the rails a bit here. Adding #ifdefs and..
> + dprintf1("siginfo: %p\n", si);
> + dprintf1(" fpregs: %p\n", fpregs);
> +
> + si_pkey_ptr = siginfo_get_pkey_ptr(si);
> + dprintf1("si_pkey_ptr: %p\n", si_pkey_ptr);
> + dump_mem(si_pkey_ptr - 8, 24);
> + siginfo_pkey = *si_pkey_ptr;
> + pkey_assert(siginfo_pkey < NR_PKEYS);
> + last_si_pkey = siginfo_pkey;
>
> if ((si->si_code == SEGV_MAPERR) ||
> (si->si_code == SEGV_ACCERR) ||
> @@ -252,22 +263,21 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
> exit(4);
> }
>
> - si_pkey_ptr = (u32 *)(((u8 *)si) + si_pkey_offset);
> - dprintf1("si_pkey_ptr: %p\n", si_pkey_ptr);
> - dump_mem((u8 *)si_pkey_ptr - 8, 24);
> - siginfo_pkey = *si_pkey_ptr;
> - pkey_assert(siginfo_pkey < NR_PKEYS);
> - last_si_pkey = siginfo_pkey;
Moving random code around with no explanation.
> - dprintf1("signal pkey_reg from xsave: "PKEY_REG_FMT"\n", *pkey_reg_ptr);
> /*
> * need __read_pkey_reg() version so we do not do shadow_pkey_reg
> * checking
> */
> dprintf1("signal pkey_reg from pkey_reg: "PKEY_REG_FMT"\n",
> __read_pkey_reg());
> - dprintf1("pkey from siginfo: %jx\n", siginfo_pkey);
> - *(u64 *)pkey_reg_ptr = 0x00000000;
> +#if defined(__i386__) || defined(__x86_64__) /* arch */
> + dprintf1("signal pkey_reg from xsave: "PKEY_REG_FMT"\n", *pkey_reg_ptr);
> + *(u64 *)pkey_reg_ptr &= clear_pkey_flags(siginfo_pkey,
> + PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE);
> +#elif __powerpc64__
> + pkey_access_allow(siginfo_pkey);
> +#endif
> + shadow_pkey_reg &= clear_pkey_flags(siginfo_pkey,
> + PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE);
> dprintf1("WARNING: set PKEY_REG=0 to allow faulting instruction "
> "to continue\n");
> pkey_faults++;
> @@ -1331,9 +1341,8 @@ void test_executing_on_unreadable_memory(int *ptr, u16 pkey)
> madvise(p1, PAGE_SIZE, MADV_DONTNEED);
> lots_o_noops_around_write(&scratch);
> do_not_expect_pkey_fault("executing on PROT_EXEC memory");
> - ptr_contents = read_ptr(p1);
> - dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
> - expected_pkey_fault(pkey);
> +
> + expect_fault_on_read_execonly_key(p1, pkey);
> }
While none of this is a deal-breaker (as I said, I feel like the
selftests/ rules are a bit more lax) this does kinda break the illusion
of a nice, broken out series.
Could you address this a bit in the changelog at least, please?
^ permalink raw reply
* Re: [PATCH v13 16/24] selftests/vm: clear the bits in shadow reg when a pkey is freed.
From: Dave Hansen @ 2018-06-20 15:07 UTC (permalink / raw)
To: Ram Pai, shuahkh, linux-kselftest
Cc: mpe, linuxppc-dev, linux-mm, x86, linux-arch, mingo, mhocko,
bauerman, fweimer, msuchanek, aneesh.kumar
In-Reply-To: <1528937115-10132-17-git-send-email-linuxram@us.ibm.com>
On 06/13/2018 05:45 PM, Ram Pai wrote:
> --- a/tools/testing/selftests/vm/protection_keys.c
> +++ b/tools/testing/selftests/vm/protection_keys.c
> @@ -577,7 +577,8 @@ int sys_pkey_free(unsigned long pkey)
> int ret = syscall(SYS_pkey_free, pkey);
>
> if (!ret)
> - shadow_pkey_reg &= clear_pkey_flags(pkey, PKEY_DISABLE_ACCESS);
> + shadow_pkey_reg &= clear_pkey_flags(pkey,
> + PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE);
> dprintf1("%s(pkey=%ld) syscall ret: %d\n", __func__, pkey, ret);
> return ret;
> }
Why did you introduce this code earlier and then modify it now?
BTW, my original aversion to this code still stands.
^ permalink raw reply
* Re: [PATCH v2 0/6] powerpc/pkeys: fixes to pkeys
From: Florian Weimer @ 2018-06-20 15:08 UTC (permalink / raw)
To: Michael Ellerman, Ram Pai
Cc: linuxppc-dev, dave.hansen, aneesh.kumar, bsingharora, hbabu,
mhocko, bauerman, Ulrich.Weigand, luto, msuchanek
In-Reply-To: <874lhzx84e.fsf@concordia.ellerman.id.au>
On 06/19/2018 02:40 PM, Michael Ellerman wrote:
>> I tested the whole series with the new selftests, with the printamr.c
>> program I posted earlier, and the glibc test for pkey_alloc &c. The
>> latter required some test fixes, but now passes as well. As far as I
>> can tell, everything looks good now.
>>
>> Tested-By: Florian Weimer<fweimer@redhat.com>
> Thanks. I'll add that to each patch I guess, if you're happy with that?
Sure, but I only tested the whole series as a whole.
Thanks,
Florian
^ permalink raw reply
* Re: [PATCH v13 17/24] selftests/vm: powerpc implementation to check support for pkey
From: Dave Hansen @ 2018-06-20 15:09 UTC (permalink / raw)
To: Ram Pai, shuahkh, linux-kselftest
Cc: mpe, linuxppc-dev, linux-mm, x86, linux-arch, mingo, mhocko,
bauerman, fweimer, msuchanek, aneesh.kumar
In-Reply-To: <1528937115-10132-18-git-send-email-linuxram@us.ibm.com>
> - if (cpu_has_pku()) {
> - dprintf1("SKIP: %s: no CPU support\n", __func__);
> + if (is_pkey_supported()) {
> + dprintf1("SKIP: %s: no CPU/kernel support\n", __func__);
> return;
> }
I actually kinda wanted a specific message for when the *CPU* doesn't
support the feature.
^ permalink raw reply
* Re: [PATCH v13 18/24] selftests/vm: fix an assertion in test_pkey_alloc_exhaust()
From: Dave Hansen @ 2018-06-20 15:11 UTC (permalink / raw)
To: Ram Pai, shuahkh, linux-kselftest
Cc: mpe, linuxppc-dev, linux-mm, x86, linux-arch, mingo, mhocko,
bauerman, fweimer, msuchanek, aneesh.kumar
In-Reply-To: <1528937115-10132-19-git-send-email-linuxram@us.ibm.com>
On 06/13/2018 05:45 PM, Ram Pai wrote:
> /*
> - * There are 16 pkeys supported in hardware. Three are
> - * allocated by the time we get here:
> - * 1. The default key (0)
> - * 2. One possibly consumed by an execute-only mapping.
> - * 3. One allocated by the test code and passed in via
> - * 'pkey' to this function.
> - * Ensure that we can allocate at least another 13 (16-3).
> + * There are NR_PKEYS pkeys supported in hardware. arch_reserved_keys()
> + * are reserved. One of which is the default key(0). One can be taken
> + * up by an execute-only mapping.
> + * Ensure that we can allocate at least the remaining.
> */
> - pkey_assert(i >= NR_PKEYS-3);
> + pkey_assert(i >= (NR_PKEYS-arch_reserved_keys()-1));
We recently had a bug here. I fixed it and left myself a really nice
comment so I and others wouldn't screw it up in the future.
Does this kill my nice, new comment?
^ permalink raw reply
* Re: [PATCH v13 19/24] selftests/vm: associate key on a mapped page and detect access violation
From: Dave Hansen @ 2018-06-20 15:16 UTC (permalink / raw)
To: Ram Pai, shuahkh, linux-kselftest
Cc: mpe, linuxppc-dev, linux-mm, x86, linux-arch, mingo, mhocko,
bauerman, fweimer, msuchanek, aneesh.kumar
In-Reply-To: <1528937115-10132-20-git-send-email-linuxram@us.ibm.com>
On 06/13/2018 05:45 PM, Ram Pai wrote:
> +void test_read_of_access_disabled_region_with_page_already_mapped(int *ptr,
> + u16 pkey)
> +{
> + int ptr_contents;
> +
> + dprintf1("disabling access to PKEY[%02d], doing read @ %p\n",
> + pkey, ptr);
> + ptr_contents = read_ptr(ptr);
> + dprintf1("reading ptr before disabling the read : %d\n",
> + ptr_contents);
> + read_pkey_reg();
> + pkey_access_deny(pkey);
> + ptr_contents = read_ptr(ptr);
> + dprintf1("*ptr: %d\n", ptr_contents);
> + expected_pkey_fault(pkey);
> +}
Looks fine to me. I'm a bit surprised we didn't do this already, which
is a good thing for this patch.
FWIW, if you took patches like this and put them first, you could
probably get it merged now. Yes, I know it would mean redoing some of
the later code move and rename ones.
^ permalink raw reply
* Re: [PATCH v13 22/24] selftests/vm: testcases must restore pkey-permissions
From: Dave Hansen @ 2018-06-20 15:20 UTC (permalink / raw)
To: Ram Pai, shuahkh, linux-kselftest
Cc: mpe, linuxppc-dev, linux-mm, x86, linux-arch, mingo, mhocko,
bauerman, fweimer, msuchanek, aneesh.kumar
In-Reply-To: <1528937115-10132-23-git-send-email-linuxram@us.ibm.com>
On 06/13/2018 05:45 PM, Ram Pai wrote:
> Generally the signal handler restores the state of the pkey register
> before returning. However there are times when the read/write operation
> can legitamely fail without invoking the signal handler. Eg: A
> sys_read() operaton to a write-protected page should be disallowed. In
> such a case the state of the pkey register is not restored to its
> original state. The test case is responsible for restoring the key
> register state to its original value.
Seems fragile. Can't we just do this in common code? We could just
loop through and restore the default permissions. That seems much more
resistant to a bad test case.
^ permalink raw reply
* Re: [PATCH v13 24/24] selftests/vm: test correct behavior of pkey-0
From: Dave Hansen @ 2018-06-20 15:22 UTC (permalink / raw)
To: Ram Pai, shuahkh, linux-kselftest
Cc: mpe, linuxppc-dev, linux-mm, x86, linux-arch, mingo, mhocko,
bauerman, fweimer, msuchanek, aneesh.kumar
In-Reply-To: <1528937115-10132-25-git-send-email-linuxram@us.ibm.com>
On 06/13/2018 05:45 PM, Ram Pai wrote:
> Ensure pkey-0 is allocated on start. Ensure pkey-0 can be attached
> dynamically in various modes, without failures. Ensure pkey-0 can be
> freed and allocated.
I like this. Looks very useful.
Acked-by: Dave Hansen <dave.hansen@intel.com>
^ permalink raw reply
* [PATCH v3] powerpc/32: Remove left over function prototypes
From: Mathieu Malaterre @ 2018-06-20 19:00 UTC (permalink / raw)
To: Michael Ellerman
Cc: Mathieu Malaterre, Benjamin Herrenschmidt, Paul Mackerras,
Nicholas Piggin, linuxppc-dev, linux-kernel
In-Reply-To: <20180408194821.19682-1-malat@debian.org>
In commit 4aea909eeba3 ("powerpc: Add missing prototypes in setup_32.c")
prototypes for
- ppc_setup_l2cr
- ppc_setup_l3cr
- ppc_init
were added but at the same time in commit d15a261d876d ("powerpc/32: Make
some functions static") those same functions were made static. Fix
conflicting changes by removing the prototypes and leave the function as
static. Fix the following warnings, treated as errors with W=1:
arch/powerpc/kernel/setup_32.c:127:19: error: static declaration of ‘ppc_setup_l2cr’ follows non-static declaration
arch/powerpc/kernel/setup_32.c:140:19: error: static declaration of ‘ppc_setup_l3cr’ follows non-static declaration
arch/powerpc/kernel/setup_32.c:186:19: error: static declaration of ‘ppc_init’ follows non-static declaration
Signed-off-by: Mathieu Malaterre <malat@debian.org>
---
v3: correct subject line to be less confusing
v2: Previous version contained the reverted patch, correct that.
arch/powerpc/kernel/setup.h | 3 ---
1 file changed, 3 deletions(-)
diff --git a/arch/powerpc/kernel/setup.h b/arch/powerpc/kernel/setup.h
index 35ca309848d7..829ed66f0a40 100644
--- a/arch/powerpc/kernel/setup.h
+++ b/arch/powerpc/kernel/setup.h
@@ -19,9 +19,6 @@ void irqstack_early_init(void);
void setup_power_save(void);
unsigned long __init early_init(unsigned long dt_ptr);
void __init machine_init(u64 dt_ptr);
-int __init ppc_setup_l2cr(char *str);
-int __init ppc_setup_l3cr(char *str);
-int __init ppc_init(void);
#else
static inline void setup_power_save(void) { };
#endif
--
2.11.0
^ permalink raw reply related
* [PATCH] powerpc: Wire up io_pgetevents
From: Breno Leitao @ 2018-06-20 19:35 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Breno Leitao, Christoph Hellwig
Wire up io_pgetevents system call on powerpc.
io_pgetevents is a new syscall to read asynchronous I/O events from the
completion queue.
Tested with libaio branch aio-poll[1] and the io_pgetevents test (#22) passed
on both ppc64 LE and BE modes.
[1] https://pagure.io/libaio/branch/aio-poll
CC: Christoph Hellwig <hch@lst.de>
Signed-off-by: Breno Leitao <leitao@debian.org>
---
arch/powerpc/include/asm/systbl.h | 1 +
arch/powerpc/include/asm/unistd.h | 2 +-
arch/powerpc/include/uapi/asm/unistd.h | 1 +
3 files changed, 3 insertions(+), 1 deletion(-)
diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h
index cfcf6a874cfa..01b5171ea189 100644
--- a/arch/powerpc/include/asm/systbl.h
+++ b/arch/powerpc/include/asm/systbl.h
@@ -393,3 +393,4 @@ SYSCALL(pkey_alloc)
SYSCALL(pkey_free)
SYSCALL(pkey_mprotect)
SYSCALL(rseq)
+COMPAT_SYS(io_pgetevents)
diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
index 1e9708632dce..c19379f0a32e 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h
@@ -12,7 +12,7 @@
#include <uapi/asm/unistd.h>
-#define NR_syscalls 388
+#define NR_syscalls 389
#define __NR__exit __NR_exit
diff --git a/arch/powerpc/include/uapi/asm/unistd.h b/arch/powerpc/include/uapi/asm/unistd.h
index ac5ba55066dd..985534d0b448 100644
--- a/arch/powerpc/include/uapi/asm/unistd.h
+++ b/arch/powerpc/include/uapi/asm/unistd.h
@@ -399,5 +399,6 @@
#define __NR_pkey_free 385
#define __NR_pkey_mprotect 386
#define __NR_rseq 387
+#define __NR_io_pgetevents 388
#endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */
--
2.16.3
^ permalink raw reply related
* [PATCH] selftests/powerpc: Fix strncpy usage
From: Breno Leitao @ 2018-06-20 22:51 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Breno Leitao, Anshuman Khandual
There is a buffer overflow in dscr_inherit_test.c test. In main(), strncpy()'s
third argument is the lengh of the source, not the size of the destination
buffer, which makes strncpy() behaves like strcpy(), causing a buffer overflow
if argv[0] is bigger than LEN_MAX (100).
This patch simply limit the string copy to sizeof(prog) less 1 (space for \0).
CC: Anshuman Khandual <khandual@linux.vnet.ibm.com>
Signed-off-by: Breno Leitao <leitao@debian.org>
---
tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c b/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c
index 08a8b95e3bc1..638e0dc717d5 100644
--- a/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c
+++ b/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c
@@ -104,6 +104,6 @@ int main(int argc, char *argv[])
exit(1);
}
- strncpy(prog, argv[0], strlen(argv[0]));
+ strncpy(prog, argv[0], sizeof(prog) - 1);
return test_harness(dscr_inherit_exec, "dscr_inherit_exec_test");
}
--
2.17.1
^ permalink raw reply related
* Re: [PATCH v2 1/3] kbuild: add macro for controlling warnings to linux/compiler.h
From: Masahiro Yamada @ 2018-06-20 23:17 UTC (permalink / raw)
To: Paul Burton
Cc: Linux Kbuild mailing list, Mauro Carvalho Chehab, Linux-MIPS,
Arnd Bergmann, Ingo Molnar, Matthew Wilcox, Thomas Gleixner,
Douglas Anderson, Josh Poimboeuf, Andrew Morton,
Matthias Kaehlcke, He Zhe, Benjamin Herrenschmidt, Michal Marek,
Khem Raj, Christophe Leroy, Al Viro, Stafford Horne,
Gideon Israel Dsouza, Kees Cook, Michael Ellerman, Heiko Carstens,
Linux Kernel Mailing List, Paul Mackerras, linuxppc-dev
In-Reply-To: <20180619201458.4559-2-paul.burton@mips.com>
Hi.
2018-06-20 5:14 GMT+09:00 Paul Burton <paul.burton@mips.com>:
> From: Arnd Bergmann <arnd@arndb.de>
>
> I have occasionally run into a situation where it would make sense to
> control a compiler warning from a source file rather than doing so from
> a Makefile using the $(cc-disable-warning, ...) or $(cc-option, ...)
> helpers.
>
> The approach here is similar to what glibc uses, using __diag() and
> related macros to encapsulate a _Pragma("GCC diagnostic ...") statement
> that gets turned into the respective "#pragma GCC diagnostic ..." by
> the preprocessor when the macro gets expanded.
>
> Like glibc, I also have an argument to pass the affected compiler
> version, but decided to actually evaluate that one. For now, this
> supports GCC_4_6, GCC_4_7, GCC_4_8, GCC_4_9, GCC_5, GCC_6, GCC_7,
> GCC_8 and GCC_9. Adding support for CLANG_5 and other interesting
> versions is straightforward here. GNU compilers starting with gcc-4.2
> could support it in principle, but "#pragma GCC diagnostic push"
> was only added in gcc-4.6, so it seems simpler to not deal with those
> at all. The same versions show a large number of warnings already,
> so it seems easier to just leave it at that and not do a more
> fine-grained control for them.
>
> The use cases I found so far include:
>
> - turning off the gcc-8 -Wattribute-alias warning inside of the
> SYSCALL_DEFINEx() macro without having to do it globally.
>
> - Reducing the build time for a simple re-make after a change,
> once we move the warnings from ./Makefile and
> ./scripts/Makefile.extrawarn into linux/compiler.h
>
> - More control over the warnings based on other configurations,
> using preprocessor syntax instead of Makefile syntax. This should make
> it easier for the average developer to understand and change things.
>
> - Adding an easy way to turn the W=1 option on unconditionally
> for a subdirectory or a specific file. This has been requested
> by several developers in the past that want to have their subsystems
> W=1 clean.
>
> - Integrating clang better into the build systems. Clang supports
> more warnings than GCC, and we probably want to classify them
> as default, W=1, W=2 etc, but there are cases in which the
> warnings should be classified differently due to excessive false
> positives from one or the other compiler.
>
> - Adding a way to turn the default warnings into errors (e.g. using
> a new "make E=0" tag) while not also turning the W=1 warnings into
> errors.
>
> This patch for now just adds the minimal infrastructure in order to
> do the first of the list above. As the #pragma GCC diagnostic
> takes precedence over command line options, the next step would be
> to convert a lot of the individual Makefiles that set nonstandard
> options to use __diag() instead.
>
> [paul.burton@mips.com:
> - Rebase atop current master.
> - Add __diag_GCC, or more generally __diag_<compiler>, abstraction to
> avoid code outside of linux/compiler-gcc.h needing to duplicate
> knowledge about different GCC versions.
> - Add a comment argument to __diag_{ignore,warn,error} which isn't
> used in the expansion of the macros but serves to push people to
> document the reason for using them - per feedback from Kees Cook.
> - Translate severity to GCC-specific pragmas in linux/compiler-gcc.h
> rather than using GCC-specific in linux/compiler_types.h.
> - Drop all but GCC 8 macros, since we only need to define macros for
> versions that we need to introduce pragmas for, and as of this
> series that's just GCC 8.
> - Capitalize comments in linux/compiler-gcc.h to match the style of
> the rest of the file.
> - Line up macro definitions with tabs in linux/compiler-gcc.h.]
>
> Signed-off-by: Arnd Bergmann <arnd@arndb.de>
> Signed-off-by: Paul Burton <paul.burton@mips.com>
> Tested-by: Christophe Leroy <christophe.leroy@c-s.fr>
> Tested-by: Stafford Horne <shorne@gmail.com>
> Cc: Michal Marek <michal.lkml@markovi.net>
> Cc: Masahiro Yamada <yamada.masahiro@socionext.com>
> Cc: Douglas Anderson <dianders@chromium.org>
> Cc: Al Viro <viro@zeniv.linux.org.uk>
> Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
> Cc: Mauro Carvalho Chehab <mchehab@kernel.org>
> Cc: Matthew Wilcox <matthew@wil.cx>
> Cc: Matthias Kaehlcke <mka@chromium.org>
> Cc: Arnd Bergmann <arnd@arndb.de>
> Cc: Ingo Molnar <mingo@kernel.org>
> Cc: Josh Poimboeuf <jpoimboe@redhat.com>
> Cc: Kees Cook <keescook@chromium.org>
> Cc: Andrew Morton <akpm@linux-foundation.org>
> Cc: Thomas Gleixner <tglx@linutronix.de>
> Cc: Gideon Israel Dsouza <gidisrael@gmail.com>
> Cc: Christophe Leroy <christophe.leroy@c-s.fr>
> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> Cc: Paul Mackerras <paulus@samba.org>
> Cc: Michael Ellerman <mpe@ellerman.id.au>
> Cc: Stafford Horne <shorne@gmail.com>
> Cc: Khem Raj <raj.khem@gmail.com>
> Cc: He Zhe <zhe.he@windriver.com>
> Cc: linux-kbuild@vger.kernel.org
> Cc: linux-kernel@vger.kernel.org
> Cc: linux-mips@linux-mips.org
> Cc: linuxppc-dev@lists.ozlabs.org
>
> ---
>
> Changes in v2:
> - Add version argument to fallback __diag_GCC definition.
> - Translate severity from generic ignore,warn,error to GCC-specific
> pragma content ignored,warning,error in linux/compiler-gcc.h in order
> to keep linux/compiler_types.h generic per feedback from Masahiro
> Yamada.
> - Drop all but GCC 8 macros, since we only need to define macros for
> versions that we need to introduce pragmas for, and as of this series
> that's just GCC 8.
> - Capitalize comments in linux/compiler-gcc.h to match the style of the
> rest of the file.
> - Line up macro definitions with tabs in linux/compiler-gcc.h.
>
> include/linux/compiler-gcc.h | 27 +++++++++++++++++++++++++++
> include/linux/compiler_types.h | 18 ++++++++++++++++++
> 2 files changed, 45 insertions(+)
>
> diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
> index f1a7492a5cc8..5067a90af9c3 100644
> --- a/include/linux/compiler-gcc.h
> +++ b/include/linux/compiler-gcc.h
> @@ -347,3 +347,30 @@
> #if GCC_VERSION >= 50100
> #define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1
> #endif
> +
> +/*
> + * Turn individual warnings and errors on and off locally, depending
> + * on version.
> + */
> +#define __diag_GCC(version, severity, s) \
> + __diag_GCC_ ## version(__diag_GCC_ ## severity s)
> +
> +/* Severity used in pragma directives */
> +#define __diag_GCC_ignore ignored
> +#define __diag_GCC_warn warning
> +#define __diag_GCC_error error
> +
> +/* Compilers before gcc-4.6 do not understand "#pragma GCC diagnostic push" */
> +#if GCC_VERSION >= 40600
> +#define __diag_str1(s) #s
> +#define __diag_str(s) __diag_str1(s)
> +#define __diag(s) _Pragma(__diag_str(GCC diagnostic s))
> +#else
> +#define __diag(s)
You can omit the #else here
because it is covered by compiler_types.h
> +#endif
> +
> +#if GCC_VERSION >= 80000
> +#define __diag_GCC_8(s) __diag(s)
> +#else
> +#define __diag_GCC_8(s)
> +#endif
> diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
> index 6b79a9bba9a7..a8ba6b04152c 100644
> --- a/include/linux/compiler_types.h
> +++ b/include/linux/compiler_types.h
> @@ -271,4 +271,22 @@ struct ftrace_likely_data {
> # define __native_word(t) (sizeof(t) == sizeof(char) || sizeof(t) == sizeof(short) || sizeof(t) == sizeof(int) || sizeof(t) == sizeof(long))
> #endif
>
> +#ifndef __diag
> +#define __diag(string)
> +#endif
> +
> +#ifndef __diag_GCC
> +#define __diag_GCC(version, severity, string)
> +#endif
> +
> +#define __diag_push() __diag(push)
> +#define __diag_pop() __diag(pop)
> +
> +#define __diag_ignore(compiler, version, option, comment) \
> + __diag_ ## compiler(version, ignore, option)
> +#define __diag_warn(compiler, version, option, comment) \
> + __diag_ ## compiler(version, warn, option)
> +#define __diag_error(compiler, version, option, comment) \
> + __diag_ ## compiler(version, error, option)
> +
> #endif /* __LINUX_COMPILER_TYPES_H */
> --
> 2.17.1
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kbuild" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
--
Best Regards
Masahiro Yamada
^ permalink raw reply
* Re: [PATCH 1/3] kbuild: add macro for controlling warnings to linux/compiler.h
From: Masahiro Yamada @ 2018-06-20 23:21 UTC (permalink / raw)
To: Paul Burton
Cc: Arnd Bergmann, Linux Kbuild mailing list, Mauro Carvalho Chehab,
Linux-MIPS, Ingo Molnar, Matthew Wilcox, Thomas Gleixner,
Douglas Anderson, Josh Poimboeuf, Andrew Morton,
Matthias Kaehlcke, He Zhe, Benjamin Herrenschmidt, Michal Marek,
Khem Raj, Christophe Leroy, Al Viro, Stafford Horne,
Gideon Israel Dsouza, Kees Cook, Michael Ellerman, Heiko Carstens,
Linux Kernel Mailing List, Paul Mackerras, linuxppc-dev
In-Reply-To: <20180619190225.7eguhiw3ixaiwpgl@pburton-laptop>
2018-06-20 4:02 GMT+09:00 Paul Burton <paul.burton@mips.com>:
> Hi Masahiro,
>
> On Wed, Jun 20, 2018 at 02:34:35AM +0900, Masahiro Yamada wrote:
>> 2018-06-16 9:53 GMT+09:00 Paul Burton <paul.burton@mips.com>:
>> > diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
>> > index f1a7492a5cc8..aba64a2912d8 100644
>> > --- a/include/linux/compiler-gcc.h
>> > +++ b/include/linux/compiler-gcc.h
>> > @@ -347,3 +347,69 @@
>> > #if GCC_VERSION >= 50100
>> > #define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1
>> > #endif
>> > +
>> > +/*
>> > + * turn individual warnings and errors on and off locally, depending
>> > + * on version.
>> > + */
>> > +#define __diag_GCC(version, s) __diag_GCC_ ## version(s)
>> > +
>> > +#if GCC_VERSION >= 40600
>> > +#define __diag_str1(s) #s
>> > +#define __diag_str(s) __diag_str1(s)
>> > +#define __diag(s) _Pragma(__diag_str(GCC diagnostic s))
>> > +
>> > +/* compilers before gcc-4.6 do not understand "#pragma GCC diagnostic push" */
>> > +#define __diag_GCC_4_6(s) __diag(s)
>> > +#else
>> > +#define __diag(s)
>> > +#define __diag_GCC_4_6(s)
>> > +#endif
>> > +
>> > +#if GCC_VERSION >= 40700
>> > +#define __diag_GCC_4_7(s) __diag(s)
>> > +#else
>> > +#define __diag_GCC_4_7(s)
>> > +#endif
>> > +
>> > +#if GCC_VERSION >= 40800
>> > +#define __diag_GCC_4_8(s) __diag(s)
>> > +#else
>> > +#define __diag_GCC_4_8(s)
>> > +#endif
>> > +
>> > +#if GCC_VERSION >= 40900
>> > +#define __diag_GCC_4_9(s) __diag(s)
>> > +#else
>> > +#define __diag_GCC_4_9(s)
>> > +#endif
>> > +
>> > +#if GCC_VERSION >= 50000
>> > +#define __diag_GCC_5(s) __diag(s)
>> > +#else
>> > +#define __diag_GCC_5(s)
>> > +#endif
>> > +
>> > +#if GCC_VERSION >= 60000
>> > +#define __diag_GCC_6(s) __diag(s)
>> > +#else
>> > +#define __diag_GCC_6(s)
>> > +#endif
>> > +
>> > +#if GCC_VERSION >= 70000
>> > +#define __diag_GCC_7(s) __diag(s)
>> > +#else
>> > +#define __diag_GCC_7(s)
>> > +#endif
>> > +
>> > +#if GCC_VERSION >= 80000
>> > +#define __diag_GCC_8(s) __diag(s)
>> > +#else
>> > +#define __diag_GCC_8(s)
>> > +#endif
>> > +
>> > +#if GCC_VERSION >= 90000
>> > +#define __diag_GCC_9(s) __diag(s)
>> > +#else
>> > +#define __diag_GCC_9(s)
>> > +#endif
>>
>>
>> Hmm, we would have to add this for every release.
>
> Well, strictly speaking only ones that we need to modify diags for - ie.
> in this series we could get away with only adding the GCC 8 macro if we
> wanted.
>
>> > diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
>> > index 6b79a9bba9a7..313a2ad884e0 100644
>> > --- a/include/linux/compiler_types.h
>> > +++ b/include/linux/compiler_types.h
>> > @@ -271,4 +271,22 @@ struct ftrace_likely_data {
>> > # define __native_word(t) (sizeof(t) == sizeof(char) || sizeof(t) == sizeof(short) || sizeof(t) == sizeof(int) || sizeof(t) == sizeof(long))
>> > #endif
>> >
>> > +#ifndef __diag
>> > +#define __diag(string)
>> > +#endif
>> > +
>> > +#ifndef __diag_GCC
>> > +#define __diag_GCC(string)
>> > +#endif
>>
>> __diag_GCC() takes two arguments,
>> so it should be:
>>
>> #ifndef __diag_GCC
>> #define __diag_GCC(version, s)
>> #endif
>>
>>
>> Otherwise, this would cause warning like this:
>>
>>
>> arch/arm64/kernel/sys.c:40:1: error: macro "__diag_GCC" passed 2
>> arguments, but takes just 1
>> SYSCALL_DEFINE1(arm64_personality, unsigned int, personality)
>> ^~~~~~~~~~
>
> Yes, good catch.
>
>> > +#define __diag_push() __diag(push)
>> > +#define __diag_pop() __diag(pop)
>> > +
>> > +#define __diag_ignore(compiler, version, option, comment) \
>> > + __diag_ ## compiler(version, ignored option)
>> > +#define __diag_warn(compiler, version, option, comment) \
>> > + __diag_ ## compiler(version, warning option)
>> > +#define __diag_error(compiler, version, option, comment) \
>> > + __diag_ ## compiler(version, error option)
>> > +
>>
>> To me, it looks like this is putting GCC/Clang specific things
>> in the common file, <linux/compiler_types.h> .
>>
>> All compilers must use "ignored", "warning", "error",
>> not allowed to use "ignore".
>
> We could move that to linux/compiler-gcc.h pretty easily.
>
>> I also wonder if we could avoid proliferating __diag_GCC_*.
>
> My thought is that it's unlikely we'll ever support enough different
> compilers for it to become problematic to list the ones we modify
> warnings for in linux/compiler_types.h.
>
>> I attached a bit different implementation below.
>>
>> I used -Wno-pragmas to avoid unknown option warnings.
>
> That doesn't seem very clean to me because it will hide typos or other
> mistakes. One advantage of Arnd's patch is that by specifying the
> compiler & version we only attempt to use pragmas that are appropriate
> so we don't need to ignore unknown ones.
>
>> Usage is
>>
>> __diag_push();
>> __diag_ignore(-Wattribute-alias,
>> "Type aliasing is used to sanitize syscall arguments");
>> ...
>> __diag_pop();
>>
>> Comments, ideas are appreciated.
>
> By removing the compiler & version arguments you're enforcing that if we
> ignore a warning for one compiler we must ignore it for all, regardless
> of whether it's problematic. This essentially presumes that warnings
> with the same name will behave the same across compilers, which feels
> worse to me than having users of this explicitly state which compilers
> need the pragmas.
Fair enough.
V2 is good except one nit.
(I left a comment in it)
I can fix it up locally
if it is tedious to re-spin, though.
> Thanks,
> Paul
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kbuild" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
--
Best Regards
Masahiro Yamada
^ permalink raw reply
* Re: [PATCH 1/3] kbuild: add macro for controlling warnings to linux/compiler.h
From: Paul Burton @ 2018-06-21 0:02 UTC (permalink / raw)
To: Masahiro Yamada
Cc: Arnd Bergmann, Linux Kbuild mailing list, Mauro Carvalho Chehab,
Linux-MIPS, Ingo Molnar, Matthew Wilcox, Thomas Gleixner,
Douglas Anderson, Josh Poimboeuf, Andrew Morton,
Matthias Kaehlcke, He Zhe, Benjamin Herrenschmidt, Michal Marek,
Khem Raj, Christophe Leroy, Al Viro, Stafford Horne,
Gideon Israel Dsouza, Kees Cook, Michael Ellerman, Heiko Carstens,
Linux Kernel Mailing List, Paul Mackerras, linuxppc-dev
In-Reply-To: <CAK7LNAQRu0p8_8DqEdmUUPfc4gC60SSj90vTpz3iKaiE596-TA@mail.gmail.com>
Hi Masahiro,
On Thu, Jun 21, 2018 at 08:21:01AM +0900, Masahiro Yamada wrote:
> V2 is good except one nit.
> (I left a comment in it)
Thanks, and yes I agree with your comment that the GCC<4.6 __diag()
definition can be removed.
> I can fix it up locally if it is tedious to re-spin, though.
If you wouldn't mind that'd be great :)
Thanks,
Paul
^ permalink raw reply
* Re: [PATCH] arch: powerpc: pci-common: fix wrong return value check on phd_id
From: Michael Ellerman @ 2018-06-21 0:28 UTC (permalink / raw)
To: Daniel Walker, Guilherme G. Piccoli, benh
Cc: Andrew Morton, xe-kernel, linux-kernel, Paul Mackerras,
linuxppc-dev, Mauro Rodrigues, linux-pci
In-Reply-To: <6d321d8a-432d-d41d-cc08-144764d644b2@cisco.com>
Hi Daniel,
Sorry we broke things for you.
Daniel Walker <danielwa@cisco.com> writes:
> On 06/19/2018 09:26 AM, Guilherme G. Piccoli wrote:
>>> [...]
>>> What your doing is changing the phb_id to some transformation of "reg" for
>>> all platforms except which have "ibm,opal-phbid". This is what we observe.
>>> This is a radical altering of the prior phb_id selection before your patch.
...
>
> I didn't look into changing the behavior on our side because it didn't
> look like the intent of the patch was to make a global change. I can
> take a look at changing this behavior on our side , given that this was
> intended by your changes.
You're right the change log and the patch are a bit out of sync, that
was probably my fault.
> However, they're may be other platforms or drivers which depend on these
> numbers getting set a certain way, so there may be other userspace
> dependencies on these values.
That's true, though I think yours is the first report we've had of
problems.
The old behaviour relied on device tree ordering in nearly all cases, so
you basically get whatever order your firmware happened to flatten the
device tree in.
That tends to be consistent on a single system or with a single firmware
version, but it's not stable in general. If your firmware changes, or
you kexec then the ordering can change.
So I'd definitely prefer we didn't go back to that behaviour, because
it's basically "random order".
If there's anything you can do on your end to cope with the new ordering
that would be great.
cheers
^ permalink raw reply
* Re: [PATCH v2 4/6] powerpc/pkeys: Preallocate execute-only key
From: Michael Ellerman @ 2018-06-21 0:28 UTC (permalink / raw)
To: Ram Pai
Cc: linuxppc-dev, dave.hansen, aneesh.kumar, bsingharora, hbabu,
mhocko, bauerman, Ulrich.Weigand, fweimer, luto, msuchanek
In-Reply-To: <20180619163813.GG5294@ram.oc3035372033.ibm.com>
Ram Pai <linuxram@us.ibm.com> writes:
> On Tue, Jun 19, 2018 at 10:40:13PM +1000, Michael Ellerman wrote:
>> Ram Pai <linuxram@us.ibm.com> writes:
>>=20
>> > execute-only key is allocated dynamically. This is a problem. When a
>> > thread implicitly creates a execute-only key, and resets UAMOR for that
>> > key, the UAMOR value does not percolate to all the other threads. Any
>> > other thread may ignorantly change the permissions on the key. This c=
an
>> > cause the key to be not execute-only for that thread.
>> >
>> > Preallocate the execute-only key and ensure that no thread can change
>> > the permission of the key, by resetting the corresponding bit in UAMOR.
>>=20
>> OK this is a non-ABI changing bug fix AFAICS.
>>=20
>> I'll add:
>>=20
>> Fixes: 5586cf61e108 ("powerpc: introduce execute-only pkey")
>> Cc: stable@vger.kernel.org # v4.16+
>>=20
>> > diff --git a/arch/powerpc/mm/pkeys.c b/arch/powerpc/mm/pkeys.c
>> > index b681bec..1f2389f 100644
>> > --- a/arch/powerpc/mm/pkeys.c
>> > +++ b/arch/powerpc/mm/pkeys.c
>> > @@ -25,6 +25,7 @@
>> > #define IAMR_EX_BIT 0x1UL
>> > #define PKEY_REG_BITS (sizeof(u64)*8)
>> > #define pkeyshift(pkey) (PKEY_REG_BITS - ((pkey+1) * AMR_BITS_PER_PKE=
Y))
>> > +#define EXECUTE_ONLY_KEY 2
>>=20
>> Do we ensure we have at least 3 keys anywhere?
>
> No. Good to add. Can this be different patch?
Yes please.
> However we do not have any systems with less than 16keys AFAICT.
It's controllable by firmware so we have between 0 and =E2=88=9E keys :)
But yeah you're right it's unlikely to be a bug anyone hits in practice.
cheers
^ permalink raw reply
* [PATCH v04 0/9] powerpc/hotplug: Update affinity for migrated CPUs
From: Michael Bringmann @ 2018-06-21 0:28 UTC (permalink / raw)
To: linuxppc-dev
Cc: Michael Bringmann, Nathan Fontenot, John Allen, Tyrel Datwyler,
Thomas Falcon
The migration of LPARs across Power systems affects many attributes
including that of the associativity of CPUs. The patches in this
set execute when a system is coming up fresh upon a migration target.
They are intended to,
* Recognize changes to the associativity of CPUs recorded in internal
data structures when compared to the latest copies in the device tree.
* Generate calls to other code layers to reset the data structures
related to associativity of the CPUs.
* Re-register the 'changed' entities into the target system.
Re-registration of CPUs mostly entails acting as if they have been
newly hot-added into the target system.
Signed-off-by: Michael Bringmann <mwb@linux.vnet.ibm.com>
Michael Bringmann (9):
hotplug/cpu: Conditionally acquire/release DRC index
hotplug/cpu: Add operation queuing function
hotplug/cpu: Provide CPU readd operation
mobility/numa: Ensure numa update does not overlap
numa: Disable/enable arch_update_cpu_topology
pmt/numa: Disable arch_update_cpu_topology during CPU readd
powerpc/rtas: Allow disabling rtas_event_scan
hotplug/rtas: No rtas_event_scan during property update
hotplug/pmt: Update topology after PMT
---
Changes in patch:
-- Restructure and rearrange content of patches to co-locate
similar or related modifications
-- Rename pseries_update_drconf_cpu to pseries_update_processor
-- Simplify code to update CPU nodes during mobility checks.
Remove functions to generate extra HP_ELOG messages in favor
of direct function calls to dlpar_cpu_readd_by_index.
-- Revise code order in dlpar_cpu_readd_by_index() to present
more appropriate error codes from underlying layers of the
implementation.
-- Add hotplug device lock around all property updates
-- Add call to rebuild_sched_domains in case of changes
-- Various code cleanups and compaction
-- Rebase to 4.17-rc5 kernel
^ permalink raw reply
* Re: [RFC PATCH 17/23] watchdog/hardlockup/hpet: Convert the timer's interrupt to NMI
From: Ricardo Neri @ 2018-06-21 0:25 UTC (permalink / raw)
To: Randy Dunlap
Cc: Thomas Gleixner, Ingo Molnar, H. Peter Anvin, Andi Kleen,
Ashok Raj, Borislav Petkov, Tony Luck, Ravi V. Shankar, x86,
sparclinux, linuxppc-dev, linux-kernel, Jacob Pan,
Rafael J. Wysocki, Don Zickus, Nicholas Piggin, Michael Ellerman,
Frederic Weisbecker, Alexei Starovoitov, Babu Moger,
Mathieu Desnoyers, Masami Hiramatsu, Peter Zijlstra,
Andrew Morton, Philippe Ombredanne, Colin Ian King,
Byungchul Park, Paul E. McKenney, Luis R. Rodriguez, Waiman Long,
Josh Poimboeuf, Davidlohr Bueso, Christoffer Dall, Marc Zyngier,
Kai-Heng Feng, Konrad Rzeszutek Wilk, David Rientjes, iommu
In-Reply-To: <c040d4e7-f33b-9c93-6f5a-6bf960943e36@infradead.org>
On Tue, Jun 19, 2018 at 05:25:09PM -0700, Randy Dunlap wrote:
> On 06/19/2018 05:15 PM, Ricardo Neri wrote:
> > On Sat, Jun 16, 2018 at 03:24:49PM +0200, Thomas Gleixner wrote:
> >> On Fri, 15 Jun 2018, Ricardo Neri wrote:
> >>> On Fri, Jun 15, 2018 at 11:19:09AM +0200, Thomas Gleixner wrote:
> >>>> On Thu, 14 Jun 2018, Ricardo Neri wrote:
> >>>>> Alternatively, there could be a counter that skips reading the HPET status
> >>>>> register (and the detection of hardlockups) for every X NMIs. This would
> >>>>> reduce the overall frequency of HPET register reads.
> >>>>
> >>>> Great plan. So if the watchdog is the only NMI (because perf is off) then
> >>>> you delay the watchdog detection by that count.
> >>>
> >>> OK. This was a bad idea. Then, is it acceptable to have an read to an HPET
> >>> register per NMI just to check in the status register if the HPET timer
> >>> caused the NMI?
> >>
> >> The status register is useless in case of MSI. MSI is edge triggered ....
> >>
> >> The only register which gives you proper information is the counter
> >> register itself. That adds an massive overhead to each NMI, because the
> >> counter register access is synchronized to the HPET clock with hardware
> >> magic. Plus on larger systems, the HPET access is cross node and even
> >> slower.
> >
> > It starts to sound that the HPET is too slow to drive the hardlockup detector.
> >
> > Would it be possible to envision a variant of this implementation? In this
> > variant, the HPET only targets a single CPU. The actual hardlockup detector
> > is implemented by this single CPU sending interprocessor interrupts to the
> > rest of the CPUs.
> >
> > In this manner only one CPU has to deal with the slowness of the HPET; the
> > rest of the CPUs don't have to read or write any HPET registers. A sysfs
> > entry could be added to configure which CPU will have to deal with the HPET
> > timer. However, profiling could not be done accurately on such CPU.
>
> Please forgive my simple question:
>
> What happens when this one CPU is the one that locks up?
I think that in this particular case this one CPU would check for hardlockups
on itself when it receives the NMI from the HPET timer. It would also issue
NMIs to the other monitored processors.
Thanks and BR,
Ricardo
^ permalink raw reply
* [PATCH v04 1/9] hotplug/cpu: Conditionally acquire/release DRC index
From: Michael Bringmann @ 2018-06-21 0:29 UTC (permalink / raw)
To: linuxppc-dev
Cc: Michael Bringmann, Nathan Fontenot, John Allen, Tyrel Datwyler,
Thomas Falcon
In-Reply-To: <0425b353-54b0-6ccd-fbb6-3d26d9448bb5@linux.vnet.ibm.com>
powerpc/cpu: Modify dlpar_cpu_add and dlpar_cpu_remove to allow the
skipping of DRC index acquire or release operations during the CPU
add or remove operations. This is intended to support subsequent
changes to provide a 'CPU readd' operation.
Signed-off-by: Michael Bringmann <mwb@linux.vnet.ibm.com>
---
Changes in patch:
-- Move new validity check added to pseries_smp_notifier
to another patch
---
arch/powerpc/platforms/pseries/hotplug-cpu.c | 68 +++++++++++++++-----------
1 file changed, 39 insertions(+), 29 deletions(-)
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index 6ef77ca..3632db2 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -432,7 +432,7 @@ static bool valid_cpu_drc_index(struct device_node *parent, u32 drc_index)
return found;
}
-static ssize_t dlpar_cpu_add(u32 drc_index)
+static ssize_t dlpar_cpu_add(u32 drc_index, bool acquire_drc)
{
struct device_node *dn, *parent;
int rc, saved_rc;
@@ -457,19 +457,22 @@ static ssize_t dlpar_cpu_add(u32 drc_index)
return -EINVAL;
}
- rc = dlpar_acquire_drc(drc_index);
- if (rc) {
- pr_warn("Failed to acquire DRC, rc: %d, drc index: %x\n",
- rc, drc_index);
- of_node_put(parent);
- return -EINVAL;
+ if (acquire_drc) {
+ rc = dlpar_acquire_drc(drc_index);
+ if (rc) {
+ pr_warn("Failed to acquire DRC, rc: %d, drc index: %x\n",
+ rc, drc_index);
+ of_node_put(parent);
+ return -EINVAL;
+ }
}
dn = dlpar_configure_connector(cpu_to_be32(drc_index), parent);
if (!dn) {
pr_warn("Failed call to configure-connector, drc index: %x\n",
drc_index);
- dlpar_release_drc(drc_index);
+ if (acquire_drc)
+ dlpar_release_drc(drc_index);
of_node_put(parent);
return -EINVAL;
}
@@ -484,8 +487,9 @@ static ssize_t dlpar_cpu_add(u32 drc_index)
pr_warn("Failed to attach node %s, rc: %d, drc index: %x\n",
dn->name, rc, drc_index);
- rc = dlpar_release_drc(drc_index);
- if (!rc)
+ if (acquire_drc)
+ rc = dlpar_release_drc(drc_index);
+ if (!rc || acquire_drc)
dlpar_free_cc_nodes(dn);
return saved_rc;
@@ -498,7 +502,7 @@ static ssize_t dlpar_cpu_add(u32 drc_index)
dn->name, rc, drc_index);
rc = dlpar_detach_node(dn);
- if (!rc)
+ if (!rc && acquire_drc)
dlpar_release_drc(drc_index);
return saved_rc;
@@ -566,7 +570,8 @@ static int dlpar_offline_cpu(struct device_node *dn)
}
-static ssize_t dlpar_cpu_remove(struct device_node *dn, u32 drc_index)
+static ssize_t dlpar_cpu_remove(struct device_node *dn, u32 drc_index,
+ bool release_drc)
{
int rc;
@@ -579,12 +584,14 @@ static ssize_t dlpar_cpu_remove(struct device_node *dn, u32 drc_index)
return -EINVAL;
}
- rc = dlpar_release_drc(drc_index);
- if (rc) {
- pr_warn("Failed to release drc (%x) for CPU %s, rc: %d\n",
- drc_index, dn->name, rc);
- dlpar_online_cpu(dn);
- return rc;
+ if (release_drc) {
+ rc = dlpar_release_drc(drc_index);
+ if (rc) {
+ pr_warn("Failed to release drc (%x) for CPU %s, rc: %d\n",
+ drc_index, dn->name, rc);
+ dlpar_online_cpu(dn);
+ return rc;
+ }
}
rc = dlpar_detach_node(dn);
@@ -593,7 +600,10 @@ static ssize_t dlpar_cpu_remove(struct device_node *dn, u32 drc_index)
pr_warn("Failed to detach CPU %s, rc: %d", dn->name, rc);
- rc = dlpar_acquire_drc(drc_index);
+ if (release_drc)
+ rc = dlpar_acquire_drc(drc_index);
+ else
+ rc = 0;
if (!rc)
dlpar_online_cpu(dn);
@@ -622,7 +632,7 @@ static struct device_node *cpu_drc_index_to_dn(u32 drc_index)
return dn;
}
-static int dlpar_cpu_remove_by_index(u32 drc_index)
+static int dlpar_cpu_remove_by_index(u32 drc_index, bool release_drc)
{
struct device_node *dn;
int rc;
@@ -634,7 +644,7 @@ static int dlpar_cpu_remove_by_index(u32 drc_index)
return -ENODEV;
}
- rc = dlpar_cpu_remove(dn, drc_index);
+ rc = dlpar_cpu_remove(dn, drc_index, release_drc);
of_node_put(dn);
return rc;
}
@@ -699,7 +709,7 @@ static int dlpar_cpu_remove_by_count(u32 cpus_to_remove)
}
for (i = 0; i < cpus_to_remove; i++) {
- rc = dlpar_cpu_remove_by_index(cpu_drcs[i]);
+ rc = dlpar_cpu_remove_by_index(cpu_drcs[i], true);
if (rc)
break;
@@ -710,7 +720,7 @@ static int dlpar_cpu_remove_by_count(u32 cpus_to_remove)
pr_warn("CPU hot-remove failed, adding back removed CPUs\n");
for (i = 0; i < cpus_removed; i++)
- dlpar_cpu_add(cpu_drcs[i]);
+ dlpar_cpu_add(cpu_drcs[i], true);
rc = -EINVAL;
} else {
@@ -780,7 +790,7 @@ static int dlpar_cpu_add_by_count(u32 cpus_to_add)
}
for (i = 0; i < cpus_to_add; i++) {
- rc = dlpar_cpu_add(cpu_drcs[i]);
+ rc = dlpar_cpu_add(cpu_drcs[i], true);
if (rc)
break;
@@ -791,7 +801,7 @@ static int dlpar_cpu_add_by_count(u32 cpus_to_add)
pr_warn("CPU hot-add failed, removing any added CPUs\n");
for (i = 0; i < cpus_added; i++)
- dlpar_cpu_remove_by_index(cpu_drcs[i]);
+ dlpar_cpu_remove_by_index(cpu_drcs[i], true);
rc = -EINVAL;
} else {
@@ -817,7 +827,7 @@ int dlpar_cpu(struct pseries_hp_errorlog *hp_elog)
if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_COUNT)
rc = dlpar_cpu_remove_by_count(count);
else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX)
- rc = dlpar_cpu_remove_by_index(drc_index);
+ rc = dlpar_cpu_remove_by_index(drc_index, true);
else
rc = -EINVAL;
break;
@@ -825,7 +835,7 @@ int dlpar_cpu(struct pseries_hp_errorlog *hp_elog)
if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_COUNT)
rc = dlpar_cpu_add_by_count(count);
else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX)
- rc = dlpar_cpu_add(drc_index);
+ rc = dlpar_cpu_add(drc_index, true);
else
rc = -EINVAL;
break;
@@ -850,7 +860,7 @@ static ssize_t dlpar_cpu_probe(const char *buf, size_t count)
if (rc)
return -EINVAL;
- rc = dlpar_cpu_add(drc_index);
+ rc = dlpar_cpu_add(drc_index, true);
return rc ? rc : count;
}
@@ -871,7 +881,7 @@ static ssize_t dlpar_cpu_release(const char *buf, size_t count)
return -EINVAL;
}
- rc = dlpar_cpu_remove(dn, drc_index);
+ rc = dlpar_cpu_remove(dn, drc_index, true);
of_node_put(dn);
return rc ? rc : count;
^ permalink raw reply related
* [PATCH v04 2/9] hotplug/cpu: Add operation queuing function
From: Michael Bringmann @ 2018-06-21 0:29 UTC (permalink / raw)
To: linuxppc-dev
Cc: Michael Bringmann, Nathan Fontenot, John Allen, Tyrel Datwyler,
Thomas Falcon
In-Reply-To: <0425b353-54b0-6ccd-fbb6-3d26d9448bb5@linux.vnet.ibm.com>
migration/dlpar: This patch adds function dlpar_queue_action()
which will queued up information about a CPU/Memory 'readd'
operation according to resource type, action code, and DRC index.
At a subsequent point, the list of operations can be run/played
in series. Examples of such oprations include 'readd' of CPU
and Memory blocks identified as having changed their associativity
during an LPAR migration event.
Signed-off-by: Michael Bringmann <mwb@linux.vnet.ibm.com>
---
Changes in patch:
-- Correct drc_index before adding to pseries_hp_errorlog struct
-- Correct text of notice
-- Revise queuing model to save up all of the DLPAR actions for
later execution.
-- Restore list init statement missing from patch
-- Move call to apply queued operations into 'mobility.c'
-- Compress some code
-- Rename some of queueing function APIs
---
arch/powerpc/include/asm/rtas.h | 2 +
arch/powerpc/platforms/pseries/dlpar.c | 59 +++++++++++++++++++++++++++++
arch/powerpc/platforms/pseries/mobility.c | 4 ++
arch/powerpc/platforms/pseries/pseries.h | 2 +
4 files changed, 67 insertions(+)
diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index ec9dd79..4f45152 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -310,12 +310,14 @@ struct pseries_hp_errorlog {
struct { __be32 count, index; } ic;
char drc_name[1];
} _drc_u;
+ struct list_head list;
};
#define PSERIES_HP_ELOG_RESOURCE_CPU 1
#define PSERIES_HP_ELOG_RESOURCE_MEM 2
#define PSERIES_HP_ELOG_RESOURCE_SLOT 3
#define PSERIES_HP_ELOG_RESOURCE_PHB 4
+#define PSERIES_HP_ELOG_RESOURCE_PMT 5
#define PSERIES_HP_ELOG_ACTION_ADD 1
#define PSERIES_HP_ELOG_ACTION_REMOVE 2
diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c
index a0b20c0..4b43fec 100644
--- a/arch/powerpc/platforms/pseries/dlpar.c
+++ b/arch/powerpc/platforms/pseries/dlpar.c
@@ -25,6 +25,7 @@
#include <asm/prom.h>
#include <asm/machdep.h>
#include <linux/uaccess.h>
+#include <linux/delay.h>
#include <asm/rtas.h>
static struct workqueue_struct *pseries_hp_wq;
@@ -329,6 +330,8 @@ int dlpar_release_drc(u32 drc_index)
return 0;
}
+static int dlpar_pmt(struct pseries_hp_errorlog *work);
+
static int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_elog)
{
int rc;
@@ -357,6 +360,9 @@ static int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_elog)
case PSERIES_HP_ELOG_RESOURCE_CPU:
rc = dlpar_cpu(hp_elog);
break;
+ case PSERIES_HP_ELOG_RESOURCE_PMT:
+ rc = dlpar_pmt(hp_elog);
+ break;
default:
pr_warn_ratelimited("Invalid resource (%d) specified\n",
hp_elog->resource);
@@ -407,6 +413,59 @@ void queue_hotplug_event(struct pseries_hp_errorlog *hp_errlog,
}
}
+LIST_HEAD(dlpar_delayed_list);
+
+int dlpar_queue_action(int resource, int action, u32 drc_index)
+{
+ struct pseries_hp_errorlog *hp_errlog;
+
+ hp_errlog = kmalloc(sizeof(struct pseries_hp_errorlog), GFP_KERNEL);
+ if (!hp_errlog)
+ return -ENOMEM;
+
+ hp_errlog->resource = resource;
+ hp_errlog->action = action;
+ hp_errlog->id_type = PSERIES_HP_ELOG_ID_DRC_INDEX;
+ hp_errlog->_drc_u.drc_index = cpu_to_be32(drc_index);
+
+ list_add_tail(&hp_errlog->list, &dlpar_delayed_list);
+
+ return 0;
+}
+
+static int dlpar_pmt(struct pseries_hp_errorlog *work)
+{
+ struct list_head *pos, *q;
+
+ ssleep(15);
+
+ list_for_each_safe(pos, q, &dlpar_delayed_list) {
+ struct pseries_hp_errorlog *tmp;
+
+ tmp = list_entry(pos, struct pseries_hp_errorlog, list);
+ handle_dlpar_errorlog(tmp);
+
+ list_del(pos);
+ kfree(tmp);
+ }
+
+ return 0;
+}
+
+int dlpar_queued_actions_run(void)
+{
+ struct pseries_hp_errorlog hp_errlog;
+
+ if (!list_empty(&dlpar_delayed_list)) {
+ hp_errlog.resource = PSERIES_HP_ELOG_RESOURCE_PMT;
+ hp_errlog.action = 0;
+ hp_errlog.id_type = 0;
+
+ dlpar_pmt(&hp_errlog);
+ }
+ return 0;
+}
+
static int dlpar_parse_resource(char **cmd, struct pseries_hp_errorlog *hp_elog)
{
char *arg;
diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c
index 8a8033a..a96c13b 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -375,6 +375,10 @@ static ssize_t migration_store(struct class *class,
return rc;
post_mobility_fixup();
+
+ /* Apply any necessary changes identified during fixup */
+ dlpar_schedule_delayed_queue();
+
return count;
}
diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
index 60db2ee..72ca996 100644
--- a/arch/powerpc/platforms/pseries/pseries.h
+++ b/arch/powerpc/platforms/pseries/pseries.h
@@ -61,6 +61,8 @@ extern struct device_node *dlpar_configure_connector(__be32,
void queue_hotplug_event(struct pseries_hp_errorlog *hp_errlog,
struct completion *hotplug_done, int *rc);
+int dlpar_queue_action(int resource, int action, u32 drc_index);
+int dlpar_queued_actions_run(void);
#ifdef CONFIG_MEMORY_HOTPLUG
int dlpar_memory(struct pseries_hp_errorlog *hp_elog);
#else
^ permalink raw reply related
* [PATCH v04 3/9] hotplug/cpu: Provide CPU readd operation
From: Michael Bringmann @ 2018-06-21 0:29 UTC (permalink / raw)
To: linuxppc-dev
Cc: Michael Bringmann, Nathan Fontenot, John Allen, Tyrel Datwyler,
Thomas Falcon
In-Reply-To: <0425b353-54b0-6ccd-fbb6-3d26d9448bb5@linux.vnet.ibm.com>
powerpc/dlpar: Provide hotplug CPU 'readd by index' operation to
support LPAR Post Migration state updates. When such changes are
invoked by the PowerPC 'mobility' code, they will be queued up so
that modifications to CPU properties will take place after the new
property value is written to the device-tree.
Signed-off-by: Michael Bringmann <mwb@linux.vnet.ibm.com>
---
Changes in patch:
-- Add CPU validity check to pseries_smp_notifier
-- Improve check on 'ibm,associativity' property
-- Add check for cpu type to new update property entry
---
arch/powerpc/platforms/pseries/hotplug-cpu.c | 58 ++++++++++++++++++++++++++
arch/powerpc/platforms/pseries/mobility.c | 2 -
2 files changed, 59 insertions(+), 1 deletion(-)
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index 3632db2..8f28160 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -305,6 +305,36 @@ static int pseries_add_processor(struct device_node *np)
return err;
}
+static int pseries_update_processor(struct of_reconfig_data *pr)
+{
+ int old_entries, new_entries, rc = 0;
+ __be32 *old_assoc, *new_assoc;
+
+ /* We only handle changes due to 'ibm,associativity' property
+ */
+ old_assoc = pr->old_prop->value;
+ old_entries = be32_to_cpu(*old_assoc++);
+
+ new_assoc = pr->prop->value;
+ new_entries = be32_to_cpu(*new_assoc++);
+
+ if (old_entries == new_entries) {
+ int sz = old_entries * sizeof(int);
+
+ if (memcmp(old_assoc, new_assoc, sz))
+ rc = dlpar_queue_action(
+ PSERIES_HP_ELOG_RESOURCE_CPU,
+ PSERIES_HP_ELOG_ACTION_READD,
+ pr->dn->phandle);
+ } else {
+ rc = dlpar_queue_action(PSERIES_HP_ELOG_RESOURCE_CPU,
+ PSERIES_HP_ELOG_ACTION_READD,
+ pr->dn->phandle);
+ }
+
+ return rc;
+}
+
/*
* Update the present map for a cpu node which is going away, and set
* the hard id in the paca(s) to -1 to be consistent with boot time
@@ -649,6 +679,26 @@ static int dlpar_cpu_remove_by_index(u32 drc_index, bool release_drc)
return rc;
}
+static int dlpar_cpu_readd_by_index(u32 drc_index)
+{
+ int rc = 0;
+
+ pr_info("Attempting to re-add CPU, drc index %x\n", drc_index);
+
+ rc = dlpar_cpu_remove_by_index(drc_index, false);
+ if (!rc)
+ rc = dlpar_cpu_add(drc_index, false);
+
+ if (rc)
+ pr_info("Failed to update cpu at drc_index %lx\n",
+ (unsigned long int)drc_index);
+ else
+ pr_info("CPU at drc_index %lx was updated\n",
+ (unsigned long int)drc_index);
+
+ return rc;
+}
+
static int find_dlpar_cpus_to_remove(u32 *cpu_drcs, int cpus_to_remove)
{
struct device_node *dn;
@@ -839,6 +889,9 @@ int dlpar_cpu(struct pseries_hp_errorlog *hp_elog)
else
rc = -EINVAL;
break;
+ case PSERIES_HP_ELOG_ACTION_READD:
+ rc = dlpar_cpu_readd_by_index(drc_index);
+ break;
default:
pr_err("Invalid action (%d) specified\n", hp_elog->action);
rc = -EINVAL;
@@ -902,6 +955,11 @@ static int pseries_smp_notifier(struct notifier_block *nb,
case OF_RECONFIG_DETACH_NODE:
pseries_remove_processor(rd->dn);
break;
+ case OF_RECONFIG_UPDATE_PROPERTY:
+ if (!strcmp(rd->dn->type, "cpu") &&
+ !strcmp(rd->prop->name, "ibm,associativity"))
+ pseries_update_processor(rd);
+ break;
}
return notifier_from_errno(err);
}
diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c
index a96c13b..e57ab6b 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -377,7 +377,7 @@ static ssize_t migration_store(struct class *class,
post_mobility_fixup();
/* Apply any necessary changes identified during fixup */
- dlpar_schedule_delayed_queue();
+ dlpar_queued_actions_run();
return count;
}
^ permalink raw reply related
* [PATCH v04 4/9] mobility/numa: Ensure numa update does not overlap
From: Michael Bringmann @ 2018-06-21 0:29 UTC (permalink / raw)
To: linuxppc-dev
Cc: Michael Bringmann, Nathan Fontenot, John Allen, Tyrel Datwyler,
Thomas Falcon
In-Reply-To: <0425b353-54b0-6ccd-fbb6-3d26d9448bb5@linux.vnet.ibm.com>
mobility/numa: Ensure that numa_update_cpu_topology() can not be
entered multiple times concurrently. It may be accessed through
many different paths / concurrent work functions, and the lock
ordering may be difficult to ensure otherwise.
Signed-off-by: Michael Bringmann <mwb@linux.vnet.ibm.com>
---
arch/powerpc/mm/numa.c | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 8802e7d..d4543b3 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -1079,6 +1079,7 @@ struct topology_update_data {
static int topology_timer_secs = 1;
static int topology_inited;
static int topology_update_needed;
+static struct mutex topology_update_lock;
/*
* Change polling interval for associativity changes.
@@ -1320,6 +1321,11 @@ int numa_update_cpu_topology(bool cpus_locked)
if (!updates)
return 0;
+ if (!mutex_trylock(&topology_update_lock)) {
+ kfree(updates);
+ return 0;
+ }
+
cpumask_clear(&updated_cpus);
for_each_cpu(cpu, &cpu_associativity_changes_mask) {
@@ -1424,6 +1430,7 @@ int numa_update_cpu_topology(bool cpus_locked)
out:
kfree(updates);
topology_update_needed = 0;
+ mutex_unlock(&topology_update_lock);
return changed;
}
@@ -1598,6 +1605,8 @@ static ssize_t topology_write(struct file *file, const char __user *buf,
static int topology_update_init(void)
{
+ mutex_init(&topology_update_lock);
+
/* Do not poll for changes if disabled at boot */
if (topology_updates_enabled)
start_topology_update();
^ permalink raw reply related
* [PATCH v04 5/9] numa: Disable/enable arch_update_cpu_topology
From: Michael Bringmann @ 2018-06-21 0:29 UTC (permalink / raw)
To: linuxppc-dev
Cc: Michael Bringmann, Nathan Fontenot, John Allen, Tyrel Datwyler,
Thomas Falcon
In-Reply-To: <0425b353-54b0-6ccd-fbb6-3d26d9448bb5@linux.vnet.ibm.com>
numa: Provide mechanism to disable/enable operation of
arch_update_cpu_topology/numa_update_cpu_topology. This is
a simple tool to eliminate some avenues for thread deadlock
observed during system execution.
Signed-off-by: Michael Bringmann <mwb@linux.vnet.ibm.com>
---
arch/powerpc/include/asm/topology.h | 10 ++++++++++
arch/powerpc/mm/numa.c | 14 ++++++++++++++
2 files changed, 24 insertions(+)
diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index 16b0778..d9ceba6 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -43,6 +43,8 @@ static inline int pcibus_to_node(struct pci_bus *bus)
extern int sysfs_add_device_to_node(struct device *dev, int nid);
extern void sysfs_remove_device_from_node(struct device *dev, int nid);
extern int numa_update_cpu_topology(bool cpus_locked);
+extern void arch_update_cpu_topology_suspend(void);
+extern void arch_update_cpu_topology_resume(void);
static inline void update_numa_cpu_lookup_table(unsigned int cpu, int node)
{
@@ -82,6 +84,14 @@ static inline int numa_update_cpu_topology(bool cpus_locked)
return 0;
}
+static inline void arch_update_cpu_topology_suspend(void)
+{
+}
+
+static inline void arch_update_cpu_topology_resume(void)
+{
+}
+
static inline void update_numa_cpu_lookup_table(unsigned int cpu, int node) {}
#endif /* CONFIG_NUMA */
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index d4543b3..13c0068 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -1079,6 +1079,7 @@ struct topology_update_data {
static int topology_timer_secs = 1;
static int topology_inited;
static int topology_update_needed;
+static int topology_update_enabled = 1;
static struct mutex topology_update_lock;
/*
@@ -1313,6 +1314,9 @@ int numa_update_cpu_topology(bool cpus_locked)
return 0;
}
+ if (!topology_update_enabled)
+ return 0;
+
weight = cpumask_weight(&cpu_associativity_changes_mask);
if (!weight)
return 0;
@@ -1439,6 +1443,16 @@ int arch_update_cpu_topology(void)
return numa_update_cpu_topology(true);
}
+void arch_update_cpu_topology_suspend(void)
+{
+ topology_update_enabled = 0;
+}
+
+void arch_update_cpu_topology_resume(void)
+{
+ topology_update_enabled = 1;
+}
+
static void topology_work_fn(struct work_struct *work)
{
rebuild_sched_domains();
^ permalink raw reply related
* [PATCH v04 6/9] pmt/numa: Disable arch_update_cpu_topology during CPU readd
From: Michael Bringmann @ 2018-06-21 0:30 UTC (permalink / raw)
To: linuxppc-dev
Cc: Michael Bringmann, Nathan Fontenot, John Allen, Tyrel Datwyler,
Thomas Falcon
In-Reply-To: <0425b353-54b0-6ccd-fbb6-3d26d9448bb5@linux.vnet.ibm.com>
pmt/numa: Disable arch_update_cpu_topology during post migration
CPU readd updates when evaluating device-tree changes after LPM
to avoid thread deadlocks trying to update node assignments.
System timing between all of the threads and timers restarted in
a migrated system overlapped frequently allowing tasks to start
acquiring resources (get_online_cpus) needed by rebuild_sched_domains.
Defer the operation of that function until after the CPU readd has
completed.
Signed-off-by: Michael Bringmann <mwb@linux.vnet.ibm.com>
---
arch/powerpc/platforms/pseries/hotplug-cpu.c | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index 8f28160..6267b53 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -26,6 +26,7 @@
#include <linux/sched.h> /* for idle_task_exit */
#include <linux/sched/hotplug.h>
#include <linux/cpu.h>
+#include <linux/cpuset.h>
#include <linux/of.h>
#include <linux/slab.h>
#include <asm/prom.h>
@@ -685,9 +686,15 @@ static int dlpar_cpu_readd_by_index(u32 drc_index)
pr_info("Attempting to re-add CPU, drc index %x\n", drc_index);
+ arch_update_cpu_topology_suspend();
rc = dlpar_cpu_remove_by_index(drc_index, false);
- if (!rc)
+ arch_update_cpu_topology_resume();
+
+ if (!rc) {
+ arch_update_cpu_topology_suspend();
rc = dlpar_cpu_add(drc_index, false);
+ arch_update_cpu_topology_resume();
+ }
if (rc)
pr_info("Failed to update cpu at drc_index %lx\n",
^ permalink raw reply related
* [PATCH v04 7/9] powerpc/rtas: Allow disabling rtas_event_scan
From: Michael Bringmann @ 2018-06-21 0:30 UTC (permalink / raw)
To: linuxppc-dev
Cc: Michael Bringmann, Nathan Fontenot, John Allen, Tyrel Datwyler,
Thomas Falcon
In-Reply-To: <0425b353-54b0-6ccd-fbb6-3d26d9448bb5@linux.vnet.ibm.com>
powerpc/rtas: Provide mechanism by which the rtas_event_scan can
be disabled/re-enabled by other portions of the powerpc code.
Among other things, this simplifies the usage of locking mechanisms
for shared kernel resources.
Signed-off-by: Michael Bringmann <mwb@linux.vnet.ibm.com>
---
arch/powerpc/include/asm/rtas.h | 4 ++++
arch/powerpc/kernel/rtasd.c | 14 ++++++++++++++
2 files changed, 18 insertions(+)
diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index 4f45152..a94e3ff 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -386,8 +386,12 @@ extern int early_init_dt_scan_rtas(unsigned long node,
#ifdef CONFIG_PPC_RTAS_DAEMON
extern void rtas_cancel_event_scan(void);
+extern void rtas_event_scan_disable(void);
+extern void rtas_event_scan_enable(void);
#else
static inline void rtas_cancel_event_scan(void) { }
+static inline void rtas_event_scan_disable(void) { }
+static inline void rtas_event_scan_enable(void) { }
#endif
/* Error types logged. */
diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c
index f915db9..72f3696 100644
--- a/arch/powerpc/kernel/rtasd.c
+++ b/arch/powerpc/kernel/rtasd.c
@@ -455,11 +455,25 @@ static void do_event_scan(void)
*/
static unsigned long event_scan_delay = 1*HZ;
static int first_pass = 1;
+static int res_enable = 1;
+
+void rtas_event_scan_disable(void)
+{
+ res_enable = 0;
+}
+
+void rtas_event_scan_enable(void)
+{
+ res_enable = 1;
+}
static void rtas_event_scan(struct work_struct *w)
{
unsigned int cpu;
+ if (!res_enable)
+ return;
+
do_event_scan();
get_online_cpus();
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox