LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* Re: [PATCH v3 24/41] powerpc/book3s64/pkeys: Store/restore userspace AMR correctly on entry and exit from kernel
From: kernel test robot @ 2020-06-10 17:36 UTC (permalink / raw)
  To: Aneesh Kumar K.V, linuxppc-dev, mpe
  Cc: Aneesh Kumar K.V, kbuild-all, bauerman, linuxram
In-Reply-To: <20200610095204.608183-25-aneesh.kumar@linux.ibm.com>

[-- Attachment #1: Type: text/plain, Size: 10620 bytes --]

Hi "Aneesh,

I love your patch! Yet something to improve:

[auto build test ERROR on powerpc/next]
[also build test ERROR on next-20200610]
[cannot apply to scottwood/next mpe/next v5.7]
[if your patch is applied to the wrong git tree, please drop us a note to help
improve the system. BTW, we also suggest to use '--base' option to specify the
base tree in git format-patch, please see https://stackoverflow.com/a/37406982]

url:    https://github.com/0day-ci/linux/commits/Aneesh-Kumar-K-V/Kernel-userspace-access-execution-prevention-with-hash-translation/20200610-191943
base:   https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next
config: powerpc-randconfig-r036-20200607 (attached as .config)
compiler: powerpc64-linux-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=powerpc 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>, old ones prefixed by <<):

In file included from arch/powerpc/include/asm/processor.h:9,
from arch/powerpc/include/asm/thread_info.h:40,
from include/linux/thread_info.h:38,
from include/asm-generic/preempt.h:5,
from ./arch/powerpc/include/generated/asm/preempt.h:1,
from include/linux/preempt.h:78,
from include/linux/spinlock.h:51,
from include/linux/seqlock.h:36,
from include/linux/time.h:6,
from include/linux/compat.h:10,
from arch/powerpc/kernel/asm-offsets.c:14:
arch/powerpc/include/asm/book3s/64/kup.h: In function 'kuap_restore_user_amr':
>> arch/powerpc/include/asm/book3s/64/kup.h:142:22: error: 'struct pt_regs' has no member named 'kuap'
142 |  mtspr(SPRN_AMR, regs->kuap);
|                      ^~
arch/powerpc/include/asm/reg.h:1386:33: note: in definition of macro 'mtspr'
1386 |          : "r" ((unsigned long)(v))          |                                 ^
In file included from include/linux/kernel.h:11,
from include/linux/list.h:9,
from include/linux/preempt.h:11,
from include/linux/spinlock.h:51,
from include/linux/seqlock.h:36,
from include/linux/time.h:6,
from include/linux/compat.h:10,
from arch/powerpc/kernel/asm-offsets.c:14:
arch/powerpc/include/asm/book3s/64/kup.h: In function 'kuap_restore_kernel_amr':
arch/powerpc/include/asm/book3s/64/kup.h:155:20: error: 'struct pt_regs' has no member named 'kuap'
155 |   if (unlikely(regs->kuap != amr)) {
|                    ^~
include/linux/compiler.h:78:42: note: in definition of macro 'unlikely'
78 | # define unlikely(x) __builtin_expect(!!(x), 0)
|                                          ^
In file included from arch/powerpc/include/asm/processor.h:9,
from arch/powerpc/include/asm/thread_info.h:40,
from include/linux/thread_info.h:38,
from include/asm-generic/preempt.h:5,
from ./arch/powerpc/include/generated/asm/preempt.h:1,
from include/linux/preempt.h:78,
from include/linux/spinlock.h:51,
from include/linux/seqlock.h:36,
from include/linux/time.h:6,
from include/linux/compat.h:10,
from arch/powerpc/kernel/asm-offsets.c:14:
arch/powerpc/include/asm/book3s/64/kup.h:157:24: error: 'struct pt_regs' has no member named 'kuap'
157 |    mtspr(SPRN_AMR, regs->kuap);
|                        ^~
arch/powerpc/include/asm/reg.h:1386:33: note: in definition of macro 'mtspr'
1386 |          : "r" ((unsigned long)(v))          |                                 ^
In file included from arch/powerpc/include/asm/bug.h:109,
from include/linux/bug.h:5,
from include/linux/thread_info.h:12,
from include/asm-generic/preempt.h:5,
from ./arch/powerpc/include/generated/asm/preempt.h:1,
from include/linux/preempt.h:78,
from include/linux/spinlock.h:51,
from include/linux/seqlock.h:36,
from include/linux/time.h:6,
from include/linux/compat.h:10,
from arch/powerpc/kernel/asm-offsets.c:14:
arch/powerpc/include/asm/book3s/64/kup.h: In function 'bad_kuap_fault':
arch/powerpc/include/asm/book3s/64/kup.h:251:12: error: 'struct pt_regs' has no member named 'kuap'
251 |       (regs->kuap & (is_write ? AMR_KUAP_BLOCK_WRITE : AMR_KUAP_BLOCK_READ)),
|            ^~
include/asm-generic/bug.h:122:25: note: in definition of macro 'WARN'
122 |  int __ret_warn_on = !!(condition);             |                         ^~~~~~~~~
In file included from arch/powerpc/include/asm/uaccess.h:9,
from include/linux/uaccess.h:11,
from include/linux/crypto.h:21,
from include/crypto/hash.h:11,
from include/linux/uio.h:10,
from include/linux/socket.h:8,
from include/linux/compat.h:15,
from arch/powerpc/kernel/asm-offsets.c:14:
arch/powerpc/include/asm/kup.h: At top level:
>> arch/powerpc/include/asm/kup.h:56:20: error: redefinition of 'allow_user_access'
56 | static inline void allow_user_access(void __user *to, const void __user *from,
|                    ^~~~~~~~~~~~~~~~~
In file included from arch/powerpc/include/asm/kup.h:18,
from arch/powerpc/include/asm/uaccess.h:9,
from include/linux/uaccess.h:11,
from include/linux/crypto.h:21,
from include/crypto/hash.h:11,
from include/linux/uio.h:10,
from include/linux/socket.h:8,
from include/linux/compat.h:15,
from arch/powerpc/kernel/asm-offsets.c:14:
arch/powerpc/include/asm/book3s/64/kup.h:212:29: note: previous definition of 'allow_user_access' was here
212 | static __always_inline void allow_user_access(void __user *to, const void __user *from,
|                             ^~~~~~~~~~~~~~~~~
In file included from arch/powerpc/include/asm/uaccess.h:9,
from include/linux/uaccess.h:11,
from include/linux/crypto.h:21,
from include/crypto/hash.h:11,
from include/linux/uio.h:10,
from include/linux/socket.h:8,
from include/linux/compat.h:15,
from arch/powerpc/kernel/asm-offsets.c:14:
>> arch/powerpc/include/asm/kup.h:58:20: error: redefinition of 'prevent_user_access'
58 | static inline void prevent_user_access(void __user *to, const void __user *from,
|                    ^~~~~~~~~~~~~~~~~~~
In file included from arch/powerpc/include/asm/kup.h:18,
from arch/powerpc/include/asm/uaccess.h:9,
from include/linux/uaccess.h:11,
from include/linux/crypto.h:21,
from include/crypto/hash.h:11,
from include/linux/uio.h:10,
from include/linux/socket.h:8,
from include/linux/compat.h:15,
from arch/powerpc/kernel/asm-offsets.c:14:
arch/powerpc/include/asm/book3s/64/kup.h:227:20: note: previous definition of 'prevent_user_access' was here
227 | static inline void prevent_user_access(void __user *to, const void __user *from,
|                    ^~~~~~~~~~~~~~~~~~~
In file included from arch/powerpc/include/asm/uaccess.h:9,
from include/linux/uaccess.h:11,
from include/linux/crypto.h:21,
from include/crypto/hash.h:11,
from include/linux/uio.h:10,
from include/linux/socket.h:8,
from include/linux/compat.h:15,
from arch/powerpc/kernel/asm-offsets.c:14:
>> arch/powerpc/include/asm/kup.h:60:29: error: redefinition of 'prevent_user_access_return'
60 | static inline unsigned long prevent_user_access_return(void) { return 0UL; }
|                             ^~~~~~~~~~~~~~~~~~~~~~~~~~
In file included from arch/powerpc/include/asm/kup.h:18,
from arch/powerpc/include/asm/uaccess.h:9,
from include/linux/uaccess.h:11,
from include/linux/crypto.h:21,
from include/crypto/hash.h:11,
from include/linux/uio.h:10,
from include/linux/socket.h:8,
from include/linux/compat.h:15,
from arch/powerpc/kernel/asm-offsets.c:14:
arch/powerpc/include/asm/book3s/64/kup.h:233:29: note: previous definition of 'prevent_user_access_return' was here
233 | static inline unsigned long prevent_user_access_return(void)
|                             ^~~~~~~~~~~~~~~~~~~~~~~~~~
In file included from arch/powerpc/include/asm/uaccess.h:9,
from include/linux/uaccess.h:11,
from include/linux/crypto.h:21,
from include/crypto/hash.h:11,
from include/linux/uio.h:10,
from include/linux/socket.h:8,
from include/linux/compat.h:15,
from arch/powerpc/kernel/asm-offsets.c:14:
>> arch/powerpc/include/asm/kup.h:61:20: error: redefinition of 'restore_user_access'
61 | static inline void restore_user_access(unsigned long flags) { }
|                    ^~~~~~~~~~~~~~~~~~~
In file included from arch/powerpc/include/asm/kup.h:18,
from arch/powerpc/include/asm/uaccess.h:9,
from include/linux/uaccess.h:11,
from include/linux/crypto.h:21,
from include/crypto/hash.h:11,
from include/linux/uio.h:10,
from include/linux/socket.h:8,
from include/linux/compat.h:15,
from arch/powerpc/kernel/asm-offsets.c:14:
arch/powerpc/include/asm/book3s/64/kup.h:242:20: note: previous definition of 'restore_user_access' was here
242 | static inline void restore_user_access(unsigned long flags)
|                    ^~~~~~~~~~~~~~~~~~~
In file included from arch/powerpc/include/asm/uaccess.h:9,
from include/linux/uaccess.h:11,
from include/linux/crypto.h:21,
from include/crypto/hash.h:11,
from include/linux/uio.h:10,
from include/linux/socket.h:8,
from include/linux/compat.h:15,
from arch/powerpc/kernel/asm-offsets.c:14:
>> arch/powerpc/include/asm/kup.h:63:1: error: redefinition of 'bad_kuap_fault'
63 | bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
| ^~~~~~~~~~~~~~
In file included from arch/powerpc/include/asm/kup.h:18,
from arch/powerpc/include/asm/uaccess.h:9,
from include/linux/uaccess.h:11,
from include/linux/crypto.h:21,
from include/crypto/hash.h:11,
from include/linux/uio.h:10,
from include/linux/socket.h:8,
from include/linux/compat.h:15,
from arch/powerpc/kernel/asm-offsets.c:14:
arch/powerpc/include/asm/book3s/64/kup.h:248:1: note: previous definition of 'bad_kuap_fault' was here
248 | bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
| ^~~~~~~~~~~~~~
make[2]: *** [scripts/Makefile.build:100: arch/powerpc/kernel/asm-offsets.s] Error 1
make[2]: Target '__build' not remade because of errors.
make[1]: *** [Makefile:1141: prepare0] Error 2
make[1]: Target 'prepare' not remade because of errors.
make: *** [Makefile:180: sub-make] Error 2

vim +142 arch/powerpc/include/asm/book3s/64/kup.h

   135	
   136	static inline void kuap_restore_user_amr(struct pt_regs *regs)
   137	{
   138		if (!mmu_has_feature(MMU_FTR_PKEY))
   139			return;
   140	
   141		isync();
 > 142		mtspr(SPRN_AMR, regs->kuap);
   143		/*
   144		 * No isync required here because we are about to rfi
   145		 * back to previous context before any user accesses
   146		 * would be made, which is a CSI.
   147		 */
   148	}
   149	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 33846 bytes --]

^ permalink raw reply

* Re: [PATCH v3 25/41] powerpc/book3s64/kuep: Store/restore userspace IAMR correctly on entry and exit from kernel
From: kernel test robot @ 2020-06-10 18:47 UTC (permalink / raw)
  To: Aneesh Kumar K.V, linuxppc-dev, mpe
  Cc: Aneesh Kumar K.V, kbuild-all, bauerman, linuxram
In-Reply-To: <20200610095204.608183-26-aneesh.kumar@linux.ibm.com>

[-- Attachment #1: Type: text/plain, Size: 8059 bytes --]

Hi "Aneesh,

I love your patch! Yet something to improve:

[auto build test ERROR on powerpc/next]
[also build test ERROR on next-20200610]
[cannot apply to scottwood/next mpe/next v5.7]
[if your patch is applied to the wrong git tree, please drop us a note to help
improve the system. BTW, we also suggest to use '--base' option to specify the
base tree in git format-patch, please see https://stackoverflow.com/a/37406982]

url:    https://github.com/0day-ci/linux/commits/Aneesh-Kumar-K-V/Kernel-userspace-access-execution-prevention-with-hash-translation/20200610-191943
base:   https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next
config: powerpc-randconfig-r036-20200607 (attached as .config)
compiler: powerpc64-linux-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=powerpc 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>, old ones prefixed by <<):

In file included from arch/powerpc/include/asm/processor.h:9,
from arch/powerpc/include/asm/thread_info.h:40,
from include/linux/thread_info.h:38,
from include/asm-generic/preempt.h:5,
from ./arch/powerpc/include/generated/asm/preempt.h:1,
from include/linux/preempt.h:78,
from include/linux/spinlock.h:51,
from include/linux/seqlock.h:36,
from include/linux/time.h:6,
from include/linux/compat.h:10,
from arch/powerpc/kernel/asm-offsets.c:14:
arch/powerpc/include/asm/book3s/64/kup.h: In function 'kuap_restore_user_amr':
arch/powerpc/include/asm/book3s/64/kup.h:181:22: error: 'struct pt_regs' has no member named 'kuap'
181 |  mtspr(SPRN_AMR, regs->kuap);
|                      ^~
arch/powerpc/include/asm/reg.h:1386:33: note: in definition of macro 'mtspr'
1386 |          : "r" ((unsigned long)(v))          |                                 ^
>> arch/powerpc/include/asm/book3s/64/kup.h:182:23: error: 'struct pt_regs' has no member named 'kuep'
182 |  mtspr(SPRN_IAMR, regs->kuep);
|                       ^~
arch/powerpc/include/asm/reg.h:1386:33: note: in definition of macro 'mtspr'
1386 |          : "r" ((unsigned long)(v))          |                                 ^
In file included from include/linux/kernel.h:11,
from include/linux/list.h:9,
from include/linux/preempt.h:11,
from include/linux/spinlock.h:51,
from include/linux/seqlock.h:36,
from include/linux/time.h:6,
from include/linux/compat.h:10,
from arch/powerpc/kernel/asm-offsets.c:14:
arch/powerpc/include/asm/book3s/64/kup.h: In function 'kuap_restore_kernel_amr':
arch/powerpc/include/asm/book3s/64/kup.h:194:20: error: 'struct pt_regs' has no member named 'kuap'
194 |   if (unlikely(regs->kuap != amr)) {
|                    ^~
include/linux/compiler.h:78:42: note: in definition of macro 'unlikely'
78 | # define unlikely(x) __builtin_expect(!!(x), 0)
|                                          ^
In file included from arch/powerpc/include/asm/processor.h:9,
from arch/powerpc/include/asm/thread_info.h:40,
from include/linux/thread_info.h:38,
from include/asm-generic/preempt.h:5,
from ./arch/powerpc/include/generated/asm/preempt.h:1,
from include/linux/preempt.h:78,
from include/linux/spinlock.h:51,
from include/linux/seqlock.h:36,
from include/linux/time.h:6,
from include/linux/compat.h:10,
from arch/powerpc/kernel/asm-offsets.c:14:
arch/powerpc/include/asm/book3s/64/kup.h:196:24: error: 'struct pt_regs' has no member named 'kuap'
196 |    mtspr(SPRN_AMR, regs->kuap);
|                        ^~
arch/powerpc/include/asm/reg.h:1386:33: note: in definition of macro 'mtspr'
1386 |          : "r" ((unsigned long)(v))          |                                 ^
In file included from arch/powerpc/include/asm/bug.h:109,
from include/linux/bug.h:5,
from include/linux/thread_info.h:12,
from include/asm-generic/preempt.h:5,
from ./arch/powerpc/include/generated/asm/preempt.h:1,
from include/linux/preempt.h:78,
from include/linux/spinlock.h:51,
from include/linux/seqlock.h:36,
from include/linux/time.h:6,
from include/linux/compat.h:10,
from arch/powerpc/kernel/asm-offsets.c:14:
arch/powerpc/include/asm/book3s/64/kup.h: In function 'bad_kuap_fault':
arch/powerpc/include/asm/book3s/64/kup.h:293:12: error: 'struct pt_regs' has no member named 'kuap'
293 |       (regs->kuap & (is_write ? AMR_KUAP_BLOCK_WRITE : AMR_KUAP_BLOCK_READ)),
|            ^~
include/asm-generic/bug.h:122:25: note: in definition of macro 'WARN'
122 |  int __ret_warn_on = !!(condition);             |                         ^~~~~~~~~
In file included from arch/powerpc/include/asm/uaccess.h:9,
from include/linux/uaccess.h:11,
from include/linux/crypto.h:21,
from include/crypto/hash.h:11,
from include/linux/uio.h:10,
from include/linux/socket.h:8,
from include/linux/compat.h:15,
from arch/powerpc/kernel/asm-offsets.c:14:
arch/powerpc/include/asm/kup.h: At top level:
arch/powerpc/include/asm/kup.h:56:20: error: redefinition of 'allow_user_access'
56 | static inline void allow_user_access(void __user *to, const void __user *from,
|                    ^~~~~~~~~~~~~~~~~
In file included from arch/powerpc/include/asm/kup.h:18,
from arch/powerpc/include/asm/uaccess.h:9,
from include/linux/uaccess.h:11,
from include/linux/crypto.h:21,
from include/crypto/hash.h:11,
from include/linux/uio.h:10,
from include/linux/socket.h:8,
from include/linux/compat.h:15,
from arch/powerpc/kernel/asm-offsets.c:14:
arch/powerpc/include/asm/book3s/64/kup.h:254:29: note: previous definition of 'allow_user_access' was here
254 | static __always_inline void allow_user_access(void __user *to, const void __user *from,
|                             ^~~~~~~~~~~~~~~~~
In file included from arch/powerpc/include/asm/uaccess.h:9,
from include/linux/uaccess.h:11,
from include/linux/crypto.h:21,
from include/crypto/hash.h:11,
from include/linux/uio.h:10,
from include/linux/socket.h:8,
from include/linux/compat.h:15,
from arch/powerpc/kernel/asm-offsets.c:14:
arch/powerpc/include/asm/kup.h:58:20: error: redefinition of 'prevent_user_access'
58 | static inline void prevent_user_access(void __user *to, const void __user *from,
|                    ^~~~~~~~~~~~~~~~~~~
In file included from arch/powerpc/include/asm/kup.h:18,
from arch/powerpc/include/asm/uaccess.h:9,
from include/linux/uaccess.h:11,
from include/linux/crypto.h:21,
from include/crypto/hash.h:11,
from include/linux/uio.h:10,
from include/linux/socket.h:8,
from include/linux/compat.h:15,
from arch/powerpc/kernel/asm-offsets.c:14:
arch/powerpc/include/asm/book3s/64/kup.h:269:20: note: previous definition of 'prevent_user_access' was here

vim +182 arch/powerpc/include/asm/book3s/64/kup.h

   174	
   175	static inline void kuap_restore_user_amr(struct pt_regs *regs)
   176	{
   177		if (!mmu_has_feature(MMU_FTR_PKEY))
   178			return;
   179	
   180		isync();
   181		mtspr(SPRN_AMR, regs->kuap);
 > 182		mtspr(SPRN_IAMR, regs->kuep);
   183		/*
   184		 * No isync required here because we are about to rfi
   185		 * back to previous context before any user accesses
   186		 * would be made, which is a CSI.
   187		 */
   188	}
   189	static inline void kuap_restore_kernel_amr(struct pt_regs *regs,
   190						   unsigned long amr)
   191	{
   192		if (mmu_has_feature(MMU_FTR_KUAP) || mmu_has_feature(MMU_FTR_PKEY)) {
   193	
   194			if (unlikely(regs->kuap != amr)) {
   195				isync();
   196				mtspr(SPRN_AMR, regs->kuap);
   197				/*
   198				 * No isync required here because we are about to rfi
   199				 * back to previous context before any user accesses
   200				 * would be made, which is a CSI.
   201				 */
   202			}
   203		}
   204		/*
   205		 * No need to restore IAMR when returning to kernel space.
   206		 */
   207	}
   208	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 33846 bytes --]

^ permalink raw reply

* Re: [PATCH 2/3] powerpc/pci: unmap legacy INTx interrupts of passthrough IO adapters
From: Cédric Le Goater @ 2020-06-10 18:10 UTC (permalink / raw)
  To: Oliver O'Halloran; +Cc: linuxppc-dev
In-Reply-To: <CAOSf1CHZ4+vEHotKzPDu2czVDBBM_oerxcCRS5QOFxsMbSknKQ@mail.gmail.com>

On 5/27/20 2:57 AM, Oliver O'Halloran wrote:
> On Wed, Apr 29, 2020 at 5:51 PM Cédric Le Goater <clg@kaod.org> wrote:
>>
>> When a passthrough IO adapter is removed from a pseries machine using
>> hash MMU and the XIVE interrupt mode, the POWER hypervisor, pHyp,
>> expects the guest OS to have cleared all page table entries related to
>> the adapter. If some are still present, the RTAS call which isolates
>> the PCI slot returns error 9001 "valid outstanding translations" and
>> the removal of the IO adapter fails.
>>
>> INTx interrupt numbers need special care because Linux maps the
>> interrupts automatically in the Linux interrupt number space if they
>> are presented in the device tree node describing the IO adapter. These
>> interrupts are not un-mapped automatically and in case of an hot-plug
>> adapter, the PCI hot-plug layer needs to handle the cleanup to make
>> sure that all the page table entries of the XIVE ESB pages are
>> cleared.
>>
>> Cc: "Oliver O'Halloran" <oohall@gmail.com>
>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
>> ---
>>  arch/powerpc/kernel/pci-hotplug.c | 2 ++
>>  1 file changed, 2 insertions(+)
>>
>> diff --git a/arch/powerpc/kernel/pci-hotplug.c b/arch/powerpc/kernel/pci-hotplug.c
>> index bf83f76563a3..9e9c6befd7ea 100644
>> --- a/arch/powerpc/kernel/pci-hotplug.c
>> +++ b/arch/powerpc/kernel/pci-hotplug.c
>> @@ -57,6 +57,8 @@ void pcibios_release_device(struct pci_dev *dev)
>>         struct pci_controller *phb = pci_bus_to_host(dev->bus);
>>         struct pci_dn *pdn = pci_get_pdn(dev);
>>
>> +       irq_dispose_mapping(dev->irq);
> 
> What does the original mapping? Powerpc arch code or the PCI core?
> Tearing down the mapping in pcibios_release_device() seems a bit fishy
> to me since the PCI core has already torn down the device state at
> that point. If the release is delayed it's possible that another
> pci_dev has mapped the IRQ before we get here, but maybe that's ok.

How's that below ? INTx mappings are cleared only when the PHB is removed.
It applies to all platforms but we could limit the removal to PHB hotplug
on pseries. 

C.


From 10794159567552355f87e86e24002641c54e7ab5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@kaod.org>
Date: Wed, 10 Jun 2020 19:55:24 +0200
Subject: [PATCH] powerpc/pci: unmap legacy INTx interrupts of passthrough IO
 adapters
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When a passthrough IO adapter is removed from a pseries machine using
hash MMU and the XIVE interrupt mode, the POWER hypervisor, pHyp,
expects the guest OS to have cleared all page table entries related to
the adapter. If some are still present, the RTAS call which isolates
the PCI slot returns error 9001 "valid outstanding translations" and
the removal of the IO adapter fails.

INTx interrupt numbers need special care because Linux maps the
interrupts automatically in the Linux interrupt number space. These
interrupts are not un-mapped automatically and in case of an hot-plug
adapter, the PCI hot-plug layer needs to handle the cleanup to make
sure that all the page table entries of the ESB pages are cleared.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
---
 arch/powerpc/include/asm/pci-bridge.h |  4 +++
 arch/powerpc/kernel/pci-common.c      | 47 +++++++++++++++++++++++++++
 2 files changed, 51 insertions(+)

diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
index b92e81b256e5..9960dd249079 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -48,6 +48,8 @@ struct pci_controller_ops {
 
 /*
  * Structure of a PCI controller (host bridge)
+ *
+ * @intx: legacy INTx mappings
  */
 struct pci_controller {
 	struct pci_bus *bus;
@@ -127,6 +129,8 @@ struct pci_controller {
 
 	void *private_data;
 	struct npu *npu;
+
+	unsigned int intx[PCI_NUM_INTX];
 };
 
 /* These are used for config access before all the PCI probing
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index be108616a721..795a9b49e0d6 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -353,6 +353,48 @@ struct pci_controller *pci_find_controller_for_domain(int domain_nr)
 	return NULL;
 }
 
+static void pci_intx_register(struct pci_dev *pdev, int virq)
+{
+	struct pci_controller *phb = pci_bus_to_host(pdev->bus);
+	int i;
+
+	for (i = 0; i < PCI_NUM_INTX; i++) {
+		/*
+		 * Look for an empty or an equivalent slot, IRQs can be
+		 * shared
+		 */
+		if (phb->intx[i] == virq || !phb->intx[i]) {
+			phb->intx[i] = virq;
+			break;
+		}
+	}
+
+	if (i == PCI_NUM_INTX)
+		pr_err("PCI:%s INTx all mapped\n", pci_name(pdev));
+}
+
+/*
+ * Clearing the mapped INTx interrupts will also clear the underlying
+ * mappings of the ESB pages of the interrupts when under XIVE. It is
+ * a requirement of PowerVM to clear all memory mappings before
+ * removing a PHB.
+ */
+static void pci_intx_dispose(struct pci_controller *phb)
+{
+	int i;
+
+	for (i = 0; i < PCI_NUM_INTX; i++)
+		irq_dispose_mapping(phb->intx[i]);
+}
+
+void pcibios_remove_bus(struct pci_bus *bus)
+{
+	pr_debug("PCI: Clearing PHB %04x:%02x...\n",
+		 pci_domain_nr(bus), bus->number);
+	pci_intx_dispose(pci_bus_to_host(bus));
+}
+EXPORT_SYMBOL_GPL(pcibios_remove_bus);
+
 /*
  * Reads the interrupt pin to determine if interrupt is use by card.
  * If the interrupt is used, then gets the interrupt line from the
@@ -401,6 +443,11 @@ static int pci_read_irq_line(struct pci_dev *pci_dev)
 
 	pci_dev->irq = virq;
 
+	/*
+	 * Record all INTx mappings to clear them if the PHB is
+	 * dynamically removed.
+	 */
+	pci_intx_register(pci_dev, virq);
 	return 0;
 }
 
-- 
2.25.4


^ permalink raw reply related

* [PATCH 1/3] powerpc/dt_cpu_ftrs: Remove unused macro ISA_V2_07B
From: Murilo Opsfelder Araujo @ 2020-06-10 21:51 UTC (permalink / raw)
  To: linuxppc-dev, linux-kernel
  Cc: Murilo Opsfelder Araujo, Murilo Opsfelder Araujo, Paul Mackerras,
	Aneesh Kumar K . V, Alistair Popple, Jordan Niethe, Daniel Axtens
In-Reply-To: <20200610215114.167544-1-muriloo@linux.ibm.com>

Macro ISA_V2_07B is defined but not used anywhere else in the code.

Signed-off-by: Murilo Opsfelder Araujo <muriloo@linux.ibm.com>
---
 arch/powerpc/kernel/dt_cpu_ftrs.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
index 3a409517c031..1b7da8b5ce0d 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -24,7 +24,6 @@
 
 
 /* Device-tree visible constants follow */
-#define ISA_V2_07B      2070
 #define ISA_V3_0B       3000
 #define ISA_V3_1        3100
 
-- 
2.25.4


^ permalink raw reply related

* [PATCH 0/3] powerpc/dt_cpu_ftrs: Make use of ISA_V3_* macros
From: Murilo Opsfelder Araujo @ 2020-06-10 21:51 UTC (permalink / raw)
  To: linuxppc-dev, linux-kernel
  Cc: Murilo Opsfelder Araujo, Murilo Opsfelder Araujo, Paul Mackerras,
	Aneesh Kumar K . V, Alistair Popple, Jordan Niethe, Daniel Axtens

The first patch removes unused macro ISA_V2_07B.  The second and third
patches make use of macros ISA_V3_0B and ISA_V3_1, respectively,
instead their corresponding literals.

Murilo Opsfelder Araujo (3):
  powerpc/dt_cpu_ftrs: Remove unused macro ISA_V2_07B
  powerpc/dt_cpu_ftrs: Make use of macro ISA_V3_0B
  powerpc/dt_cpu_ftrs: Make use of macro ISA_V3_1

 arch/powerpc/kernel/dt_cpu_ftrs.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

--
2.25.4

^ permalink raw reply

* [PATCH 2/3] powerpc/dt_cpu_ftrs: Make use of macro ISA_V3_0B
From: Murilo Opsfelder Araujo @ 2020-06-10 21:51 UTC (permalink / raw)
  To: linuxppc-dev, linux-kernel
  Cc: Murilo Opsfelder Araujo, Murilo Opsfelder Araujo, Paul Mackerras,
	Aneesh Kumar K . V, Alistair Popple, Jordan Niethe, Daniel Axtens
In-Reply-To: <20200610215114.167544-1-muriloo@linux.ibm.com>

Macro ISA_V3_0B was defined but never used.  Use it instead of
literal.

Signed-off-by: Murilo Opsfelder Araujo <muriloo@linux.ibm.com>
---
 arch/powerpc/kernel/dt_cpu_ftrs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
index 1b7da8b5ce0d..9d6e833da2bd 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -673,7 +673,7 @@ static void __init cpufeatures_setup_start(u32 isa)
 {
 	pr_info("setup for ISA %d\n", isa);
 
-	if (isa >= 3000) {
+	if (isa >= ISA_V3_0B) {
 		cur_cpu_spec->cpu_features |= CPU_FTR_ARCH_300;
 		cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_ARCH_3_00;
 	}
-- 
2.25.4


^ permalink raw reply related

* [PATCH 3/3] powerpc/dt_cpu_ftrs: Make use of macro ISA_V3_1
From: Murilo Opsfelder Araujo @ 2020-06-10 21:51 UTC (permalink / raw)
  To: linuxppc-dev, linux-kernel
  Cc: Murilo Opsfelder Araujo, Murilo Opsfelder Araujo, Paul Mackerras,
	Aneesh Kumar K . V, Alistair Popple, Jordan Niethe, Daniel Axtens
In-Reply-To: <20200610215114.167544-1-muriloo@linux.ibm.com>

Macro ISA_V3_1 was defined but never used.  Use it instead of literal.

Signed-off-by: Murilo Opsfelder Araujo <muriloo@linux.ibm.com>
---
 arch/powerpc/kernel/dt_cpu_ftrs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
index 9d6e833da2bd..a0edeb391e3e 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -678,7 +678,7 @@ static void __init cpufeatures_setup_start(u32 isa)
 		cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_ARCH_3_00;
 	}
 
-	if (isa >= 3100) {
+	if (isa >= ISA_V3_1) {
 		cur_cpu_spec->cpu_features |= CPU_FTR_ARCH_31;
 		cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_ARCH_3_1;
 	}
-- 
2.25.4


^ permalink raw reply related

* Re: [PATCH v5 2/4] riscv: Introduce CONFIG_RELOCATABLE
From: Jerome Forissier @ 2020-06-10 14:10 UTC (permalink / raw)
  To: Alexandre Ghiti, Michael Ellerman, Benjamin Herrenschmidt,
	Paul Mackerras, Paul Walmsley, Palmer Dabbelt, Albert Ou,
	Anup Patel, Atish Patra, Zong Li, linux-kernel, linuxppc-dev,
	linux-riscv
  Cc: Anup Patel
In-Reply-To: <20200607075949.665-3-alex@ghiti.fr>



On 6/7/20 9:59 AM, Alexandre Ghiti wrote:
[...]

> +config RELOCATABLE
> +	bool
> +	depends on MMU
> +	help
> +          This builds a kernel as a Position Independent Executable (PIE),
> +          which retains all relocation metadata required to relocate the
> +          kernel binary at runtime to a different virtual address than the
> +          address it was linked at.
> +          Since RISCV uses the RELA relocation format, this requires a
> +          relocation pass at runtime even if the kernel is loaded at the
> +          same address it was linked at.

Is this true? I thought that the GNU linker would write the "proper"
values by default, contrary to the LLVM linker (ld.lld) which would need
a special flag: --apply-dynamic-relocs (by default the relocated places
are set to zero). At least, it is my experience with Aarch64 on a
different project. So, sorry if I'm talking nonsense here -- I have not
looked at the details.

-- 
Jerome

^ permalink raw reply

* RE: Re: [RESEND PATCH v5 2/5] arm64/crash_core: Export TCR_EL1.T1SZ in vmcoreinfo
From: Bharat Gooty @ 2020-06-10 16:47 UTC (permalink / raw)
  To: Scott Branden, Bhupesh Sharma, Amit Kachhap
  Cc: Mark Rutland, Kazuhito Hagio, Ard Biesheuvel, Catalin Marinas,
	x86, Linux Doc Mailing List, Linux Kernel Mailing List, Ray Jui,
	linuxppc-dev, James Morse, Dave Anderson, bhupesh linux,
	Will Deacon, kexec mailing list, linux-arm-kernel, Steve Capper
In-Reply-To: <d401b003-af3e-c525-ba00-0de48486b7a0@broadcom.com>

Hello Bhupesh,
V6 patch set on Linux 5.7, did not help.
I have applied makedump file
http://lists.infradead.org/pipermail/kexec/2019-November/023963.html changes
also (makedump-1.6.6). Tried to apply it on makedumpfile 1.6.7.  Patch set_2
failed. Would like to know, if you have V5 patch set for makedump file
changes. With makedump 1.6.6, able to collect the vmore file.
I used latest crash utility
(https://www.redhat.com/archives/crash-utility/2019-November/msg00014.html
changes are present)
When I used crash utility, following is the error:

Thanks,
-Bharat


-----Original Message-----
From: Scott Branden [mailto:scott.branden@broadcom.com]
Sent: Thursday, April 30, 2020 4:34 AM
To: Bhupesh Sharma; Amit Kachhap
Cc: Mark Rutland; x86@kernel.org; Will Deacon; Linux Doc Mailing List;
Catalin Marinas; Ard Biesheuvel; kexec mailing list; Linux Kernel Mailing
List; Kazuhito Hagio; James Morse; Dave Anderson; bhupesh linux;
linuxppc-dev@lists.ozlabs.org; linux-arm-kernel; Steve Capper; Ray Jui;
Bharat Gooty
Subject: Re: Re: [RESEND PATCH v5 2/5] arm64/crash_core: Export TCR_EL1.T1SZ
in vmcoreinfo

Hi Bhupesh,

On 2020-02-23 10:25 p.m., Bhupesh Sharma wrote:
> Hi Amit,
>
> On Fri, Feb 21, 2020 at 2:36 PM Amit Kachhap <amit.kachhap@arm.com> wrote:
>> Hi Bhupesh,
>>
>> On 1/13/20 5:44 PM, Bhupesh Sharma wrote:
>>> Hi James,
>>>
>>> On 01/11/2020 12:30 AM, Dave Anderson wrote:
>>>> ----- Original Message -----
>>>>> Hi Bhupesh,
>>>>>
>>>>> On 25/12/2019 19:01, Bhupesh Sharma wrote:
>>>>>> On 12/12/2019 04:02 PM, James Morse wrote:
>>>>>>> On 29/11/2019 19:59, Bhupesh Sharma wrote:
>>>>>>>> vabits_actual variable on arm64 indicates the actual VA space size,
>>>>>>>> and allows a single binary to support both 48-bit and 52-bit VA
>>>>>>>> spaces.
>>>>>>>>
>>>>>>>> If the ARMv8.2-LVA optional feature is present, and we are running
>>>>>>>> with a 64KB page size; then it is possible to use 52-bits of
>>>>>>>> address
>>>>>>>> space for both userspace and kernel addresses. However, any kernel
>>>>>>>> binary that supports 52-bit must also be able to fall back to
>>>>>>>> 48-bit
>>>>>>>> at early boot time if the hardware feature is not present.
>>>>>>>>
>>>>>>>> Since TCR_EL1.T1SZ indicates the size offset of the memory region
>>>>>>>> addressed by TTBR1_EL1 (and hence can be used for determining the
>>>>>>>> vabits_actual value) it makes more sense to export the same in
>>>>>>>> vmcoreinfo rather than vabits_actual variable, as the name of the
>>>>>>>> variable can change in future kernel versions, but the
>>>>>>>> architectural
>>>>>>>> constructs like TCR_EL1.T1SZ can be used better to indicate
>>>>>>>> intended
>>>>>>>> specific fields to user-space.
>>>>>>>>
>>>>>>>> User-space utilities like makedumpfile and crash-utility, need to
>>>>>>>> read/write this value from/to vmcoreinfo
>>>>>>> (write?)
>>>>>> Yes, also write so that the vmcoreinfo from an (crashing) arm64
>>>>>> system can
>>>>>> be used for
>>>>>> analysis of the root-cause of panic/crash on say an x86_64 host using
>>>>>> utilities like
>>>>>> crash-utility/gdb.
>>>>> I read this as as "User-space [...] needs to write to vmcoreinfo".
>>> That's correct. But for writing to vmcore dump in the kdump kernel, we
>>> need to read the symbols from the vmcoreinfo in the primary kernel.
>>>
>>>>>>>> for determining if a virtual address lies in the linear map range.
>>>>>>> I think this is a fragile example. The debugger shouldn't need to
>>>>>>> know
>>>>>>> this.
>>>>>> Well that the current user-space utility design, so I am not sure we
>>>>>> can
>>>>>> tweak that too much.
>>>>>>
>>>>>>>> The user-space computation for determining whether an address lies
>>>>>>>> in
>>>>>>>> the linear map range is the same as we have in kernel-space:
>>>>>>>>
>>>>>>>>      #define __is_lm_address(addr)    (!(((u64)addr) &
>>>>>>>> BIT(vabits_actual -
>>>>>>>>      1)))
>>>>>>> This was changed with 14c127c957c1 ("arm64: mm: Flip kernel VA
>>>>>>> space"). If
>>>>>>> user-space
>>>>>>> tools rely on 'knowing' the kernel memory layout, they must have to
>>>>>>> constantly be fixed
>>>>>>> and updated. This is a poor argument for adding this to something
>>>>>>> that
>>>>>>> ends up as ABI.
>>>>>> See above. The user-space has to rely on some ABI/guaranteed
>>>>>> hardware-symbols which can be
>>>>>> used for 'determining' the kernel memory layout.
>>>>> I disagree. Everything and anything in the kernel will change. The
>>>>> ABI rules apply to
>>>>> stuff exposed via syscalls and kernel filesystems. It does not apply
>>>>> to kernel internals,
>>>>> like the memory layout we used yesterday. 14c127c957c1 is a case in
>>>>> point.
>>>>>
>>>>> A debugger trying to rely on this sort of thing would have to play
>>>>> catchup whenever it
>>>>> changes.
>>>> Exactly.  That's the whole point.
>>>>
>>>> The crash utility and makedumpfile are not in the same league as other
>>>> user-space tools.
>>>> They have always had to "play catchup" precisely because they depend
>>>> upon kernel internals,
>>>> which constantly change.
>>> I agree with you and DaveA here. Software user-space debuggers are
>>> dependent on kernel internals (which can change from time-to-time) and
>>> will have to play catch-up (which has been the case since the very
>>> start).
>>>
>>> Unfortunately we don't have any clear ABI for software debugging tools -
>>> may be something to look for in future.
>>>
>>> A case in point is gdb/kgdb, which still needs to run with KASLR
>>> turned-off (nokaslr) for debugging, as it confuses gdb which resolve
>>> kernel symbol address from symbol table of vmlinux. But we can
>>> work-around the same in makedumpfile/crash by reading the 'kaslr_offset'
>>> value. And I have several users telling me now they cannot use gdb on
>>> KASLR enabled kernel to debug panics, but can makedumpfile + crash
>>> combination to achieve the same.
>>>
>>> So, we should be looking to fix these utilities which are broken since
>>> the 52-bit changes for arm64. Accordingly, I will try to send the v6
>>> soon while incorporating the comments posted on the v5.
>> Any update on the next v6 version. Since this patch series is fixing the
>> current broken kdump so need this series to add some more fields in
>> vmcoreinfo for Pointer Authentication work.
> Sorry for the delay. I was caught up in some other urgent arm64
> user-space issues.
> I am preparing the v6 now and hopefully will be able to post it out
> for review later today.

Did v6 get sent out?

>
> Thanks,
> Bhupesh
>
>
Regards,
Scott

^ permalink raw reply

* RE: Re: [RESEND PATCH v5 2/5] arm64/crash_core: Export TCR_EL1.T1SZ in vmcoreinfo
From: Bharat Gooty @ 2020-06-10 16:49 UTC (permalink / raw)
  To: Scott Branden, Bhupesh Sharma, Amit Kachhap
  Cc: Mark Rutland, Kazuhito Hagio, Bharat Gooty, Ard Biesheuvel,
	Catalin Marinas, x86, Linux Doc Mailing List,
	Linux Kernel Mailing List, Ray Jui, linuxppc-dev, James Morse,
	Dave Anderson, bhupesh linux, Will Deacon, kexec mailing list,
	linux-arm-kernel, Steve Capper
In-Reply-To: <d401b003-af3e-c525-ba00-0de48486b7a0@broadcom.com>

Sorry, error message was not posted. Following is the error message

crash: cannot determine VA_BITS_ACTUAL

-----Original Message-----
From: Bharat Gooty [mailto:bharat.gooty@broadcom.com]
Sent: Wednesday, June 10, 2020 10:18 PM
To: Scott Branden; 'Bhupesh Sharma'; 'Amit Kachhap'
Cc: 'Mark Rutland'; 'x86@kernel.org'; 'Will Deacon'; 'Linux Doc Mailing
List'; 'Catalin Marinas'; 'Ard Biesheuvel'; 'kexec mailing list'; 'Linux
Kernel Mailing List'; 'Kazuhito Hagio'; 'James Morse'; 'Dave Anderson';
'bhupesh linux'; 'linuxppc-dev@lists.ozlabs.org'; 'linux-arm-kernel'; 'Steve
Capper'; Ray Jui
Subject: RE: Re: [RESEND PATCH v5 2/5] arm64/crash_core: Export TCR_EL1.T1SZ
in vmcoreinfo

Hello Bhupesh,
V6 patch set on Linux 5.7, did not help.
I have applied makedump file
http://lists.infradead.org/pipermail/kexec/2019-November/023963.html changes
also (makedump-1.6.6). Tried to apply it on makedumpfile 1.6.7.  Patch set_2
failed. Would like to know, if you have V5 patch set for makedump file
changes. With makedump 1.6.6, able to collect the vmore file.
I used latest crash utility
(https://www.redhat.com/archives/crash-utility/2019-November/msg00014.html
changes are present)
When I used crash utility, following is the error:

Thanks,
-Bharat


-----Original Message-----
From: Scott Branden [mailto:scott.branden@broadcom.com]
Sent: Thursday, April 30, 2020 4:34 AM
To: Bhupesh Sharma; Amit Kachhap
Cc: Mark Rutland; x86@kernel.org; Will Deacon; Linux Doc Mailing List;
Catalin Marinas; Ard Biesheuvel; kexec mailing list; Linux Kernel Mailing
List; Kazuhito Hagio; James Morse; Dave Anderson; bhupesh linux;
linuxppc-dev@lists.ozlabs.org; linux-arm-kernel; Steve Capper; Ray Jui;
Bharat Gooty
Subject: Re: Re: [RESEND PATCH v5 2/5] arm64/crash_core: Export TCR_EL1.T1SZ
in vmcoreinfo

Hi Bhupesh,

On 2020-02-23 10:25 p.m., Bhupesh Sharma wrote:
> Hi Amit,
>
> On Fri, Feb 21, 2020 at 2:36 PM Amit Kachhap <amit.kachhap@arm.com> wrote:
>> Hi Bhupesh,
>>
>> On 1/13/20 5:44 PM, Bhupesh Sharma wrote:
>>> Hi James,
>>>
>>> On 01/11/2020 12:30 AM, Dave Anderson wrote:
>>>> ----- Original Message -----
>>>>> Hi Bhupesh,
>>>>>
>>>>> On 25/12/2019 19:01, Bhupesh Sharma wrote:
>>>>>> On 12/12/2019 04:02 PM, James Morse wrote:
>>>>>>> On 29/11/2019 19:59, Bhupesh Sharma wrote:
>>>>>>>> vabits_actual variable on arm64 indicates the actual VA space size,
>>>>>>>> and allows a single binary to support both 48-bit and 52-bit VA
>>>>>>>> spaces.
>>>>>>>>
>>>>>>>> If the ARMv8.2-LVA optional feature is present, and we are running
>>>>>>>> with a 64KB page size; then it is possible to use 52-bits of
>>>>>>>> address
>>>>>>>> space for both userspace and kernel addresses. However, any kernel
>>>>>>>> binary that supports 52-bit must also be able to fall back to
>>>>>>>> 48-bit
>>>>>>>> at early boot time if the hardware feature is not present.
>>>>>>>>
>>>>>>>> Since TCR_EL1.T1SZ indicates the size offset of the memory region
>>>>>>>> addressed by TTBR1_EL1 (and hence can be used for determining the
>>>>>>>> vabits_actual value) it makes more sense to export the same in
>>>>>>>> vmcoreinfo rather than vabits_actual variable, as the name of the
>>>>>>>> variable can change in future kernel versions, but the
>>>>>>>> architectural
>>>>>>>> constructs like TCR_EL1.T1SZ can be used better to indicate
>>>>>>>> intended
>>>>>>>> specific fields to user-space.
>>>>>>>>
>>>>>>>> User-space utilities like makedumpfile and crash-utility, need to
>>>>>>>> read/write this value from/to vmcoreinfo
>>>>>>> (write?)
>>>>>> Yes, also write so that the vmcoreinfo from an (crashing) arm64
>>>>>> system can
>>>>>> be used for
>>>>>> analysis of the root-cause of panic/crash on say an x86_64 host using
>>>>>> utilities like
>>>>>> crash-utility/gdb.
>>>>> I read this as as "User-space [...] needs to write to vmcoreinfo".
>>> That's correct. But for writing to vmcore dump in the kdump kernel, we
>>> need to read the symbols from the vmcoreinfo in the primary kernel.
>>>
>>>>>>>> for determining if a virtual address lies in the linear map range.
>>>>>>> I think this is a fragile example. The debugger shouldn't need to
>>>>>>> know
>>>>>>> this.
>>>>>> Well that the current user-space utility design, so I am not sure we
>>>>>> can
>>>>>> tweak that too much.
>>>>>>
>>>>>>>> The user-space computation for determining whether an address lies
>>>>>>>> in
>>>>>>>> the linear map range is the same as we have in kernel-space:
>>>>>>>>
>>>>>>>>      #define __is_lm_address(addr)    (!(((u64)addr) &
>>>>>>>> BIT(vabits_actual -
>>>>>>>>      1)))
>>>>>>> This was changed with 14c127c957c1 ("arm64: mm: Flip kernel VA
>>>>>>> space"). If
>>>>>>> user-space
>>>>>>> tools rely on 'knowing' the kernel memory layout, they must have to
>>>>>>> constantly be fixed
>>>>>>> and updated. This is a poor argument for adding this to something
>>>>>>> that
>>>>>>> ends up as ABI.
>>>>>> See above. The user-space has to rely on some ABI/guaranteed
>>>>>> hardware-symbols which can be
>>>>>> used for 'determining' the kernel memory layout.
>>>>> I disagree. Everything and anything in the kernel will change. The
>>>>> ABI rules apply to
>>>>> stuff exposed via syscalls and kernel filesystems. It does not apply
>>>>> to kernel internals,
>>>>> like the memory layout we used yesterday. 14c127c957c1 is a case in
>>>>> point.
>>>>>
>>>>> A debugger trying to rely on this sort of thing would have to play
>>>>> catchup whenever it
>>>>> changes.
>>>> Exactly.  That's the whole point.
>>>>
>>>> The crash utility and makedumpfile are not in the same league as other
>>>> user-space tools.
>>>> They have always had to "play catchup" precisely because they depend
>>>> upon kernel internals,
>>>> which constantly change.
>>> I agree with you and DaveA here. Software user-space debuggers are
>>> dependent on kernel internals (which can change from time-to-time) and
>>> will have to play catch-up (which has been the case since the very
>>> start).
>>>
>>> Unfortunately we don't have any clear ABI for software debugging tools -
>>> may be something to look for in future.
>>>
>>> A case in point is gdb/kgdb, which still needs to run with KASLR
>>> turned-off (nokaslr) for debugging, as it confuses gdb which resolve
>>> kernel symbol address from symbol table of vmlinux. But we can
>>> work-around the same in makedumpfile/crash by reading the 'kaslr_offset'
>>> value. And I have several users telling me now they cannot use gdb on
>>> KASLR enabled kernel to debug panics, but can makedumpfile + crash
>>> combination to achieve the same.
>>>
>>> So, we should be looking to fix these utilities which are broken since
>>> the 52-bit changes for arm64. Accordingly, I will try to send the v6
>>> soon while incorporating the comments posted on the v5.
>> Any update on the next v6 version. Since this patch series is fixing the
>> current broken kdump so need this series to add some more fields in
>> vmcoreinfo for Pointer Authentication work.
> Sorry for the delay. I was caught up in some other urgent arm64
> user-space issues.
> I am preparing the v6 now and hopefully will be able to post it out
> for review later today.

Did v6 get sent out?

>
> Thanks,
> Bhupesh
>
>
Regards,
Scott

^ permalink raw reply

* Re: [PATCH v3 24/41] powerpc/book3s64/pkeys: Store/restore userspace AMR correctly on entry and exit from kernel
From: kernel test robot @ 2020-06-10 22:29 UTC (permalink / raw)
  To: Aneesh Kumar K.V, linuxppc-dev, mpe
  Cc: clang-built-linux, kbuild-all, bauerman, linuxram,
	Aneesh Kumar K.V
In-Reply-To: <20200610095204.608183-25-aneesh.kumar@linux.ibm.com>

[-- Attachment #1: Type: text/plain, Size: 13507 bytes --]

Hi "Aneesh,

I love your patch! Yet something to improve:

[auto build test ERROR on powerpc/next]
[also build test ERROR on next-20200610]
[cannot apply to v5.7]
[if your patch is applied to the wrong git tree, please drop us a note to help
improve the system. BTW, we also suggest to use '--base' option to specify the
base tree in git format-patch, please see https://stackoverflow.com/a/37406982]

url:    https://github.com/0day-ci/linux/commits/Aneesh-Kumar-K-V/Kernel-userspace-access-execution-prevention-with-hash-translation/20200610-191943
base:   https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next
config: powerpc-randconfig-r006-20200608 (attached as .config)
compiler: clang version 11.0.0 (https://github.com/llvm/llvm-project bc2b70982be8f5250cd0082a7190f8b417bd4dfe)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # install powerpc cross compiling tool for clang build
        # apt-get install binutils-powerpc-linux-gnu
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=powerpc 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>, old ones prefixed by <<):

In file included from arch/powerpc/kernel/asm-offsets.c:14:
In file included from include/linux/compat.h:15:
In file included from include/linux/socket.h:8:
In file included from include/linux/uio.h:10:
In file included from include/crypto/hash.h:11:
In file included from include/linux/crypto.h:21:
In file included from include/linux/uaccess.h:11:
In file included from arch/powerpc/include/asm/uaccess.h:9:
In file included from arch/powerpc/include/asm/kup.h:18:
>> arch/powerpc/include/asm/book3s/64/kup.h:142:24: error: no member named 'kuap' in 'struct pt_regs'
mtspr(SPRN_AMR, regs->kuap);
~~~~  ^
arch/powerpc/include/asm/reg.h:1386:33: note: expanded from macro 'mtspr'
: "r" ((unsigned long)(v))                                                                ^
In file included from arch/powerpc/kernel/asm-offsets.c:14:
In file included from include/linux/compat.h:15:
In file included from include/linux/socket.h:8:
In file included from include/linux/uio.h:10:
In file included from include/crypto/hash.h:11:
In file included from include/linux/crypto.h:21:
In file included from include/linux/uaccess.h:11:
In file included from arch/powerpc/include/asm/uaccess.h:9:
In file included from arch/powerpc/include/asm/kup.h:18:
arch/powerpc/include/asm/book3s/64/kup.h:155:22: error: no member named 'kuap' in 'struct pt_regs'
if (unlikely(regs->kuap != amr)) {
~~~~  ^
include/linux/compiler.h:78:42: note: expanded from macro 'unlikely'
# define unlikely(x)    __builtin_expect(!!(x), 0)
^
In file included from arch/powerpc/kernel/asm-offsets.c:14:
In file included from include/linux/compat.h:15:
In file included from include/linux/socket.h:8:
In file included from include/linux/uio.h:10:
In file included from include/crypto/hash.h:11:
In file included from include/linux/crypto.h:21:
In file included from include/linux/uaccess.h:11:
In file included from arch/powerpc/include/asm/uaccess.h:9:
In file included from arch/powerpc/include/asm/kup.h:18:
arch/powerpc/include/asm/book3s/64/kup.h:157:26: error: no member named 'kuap' in 'struct pt_regs'
mtspr(SPRN_AMR, regs->kuap);
~~~~  ^
arch/powerpc/include/asm/reg.h:1386:33: note: expanded from macro 'mtspr'
: "r" ((unsigned long)(v))                                                                ^
In file included from arch/powerpc/kernel/asm-offsets.c:14:
In file included from include/linux/compat.h:15:
In file included from include/linux/socket.h:8:
In file included from include/linux/uio.h:10:
In file included from include/crypto/hash.h:11:
In file included from include/linux/crypto.h:21:
In file included from include/linux/uaccess.h:11:
In file included from arch/powerpc/include/asm/uaccess.h:9:
In file included from arch/powerpc/include/asm/kup.h:18:
arch/powerpc/include/asm/book3s/64/kup.h:251:14: error: no member named 'kuap' in 'struct pt_regs'
(regs->kuap & (is_write ? AMR_KUAP_BLOCK_WRITE : AMR_KUAP_BLOCK_READ)),
~~~~  ^
include/asm-generic/bug.h:122:25: note: expanded from macro 'WARN'
int __ret_warn_on = !!(condition);                                                                ^~~~~~~~~
In file included from arch/powerpc/kernel/asm-offsets.c:14:
In file included from include/linux/compat.h:15:
In file included from include/linux/socket.h:8:
In file included from include/linux/uio.h:10:
In file included from include/crypto/hash.h:11:
In file included from include/linux/crypto.h:21:
In file included from include/linux/uaccess.h:11:
In file included from arch/powerpc/include/asm/uaccess.h:9:
>> arch/powerpc/include/asm/kup.h:56:20: error: redefinition of 'allow_user_access'
static inline void allow_user_access(void __user *to, const void __user *from,
^
arch/powerpc/include/asm/book3s/64/kup.h:212:29: note: previous definition is here
static __always_inline void allow_user_access(void __user *to, const void __user *from,
^
In file included from arch/powerpc/kernel/asm-offsets.c:14:
In file included from include/linux/compat.h:15:
In file included from include/linux/socket.h:8:
In file included from include/linux/uio.h:10:
In file included from include/crypto/hash.h:11:
In file included from include/linux/crypto.h:21:
In file included from include/linux/uaccess.h:11:
In file included from arch/powerpc/include/asm/uaccess.h:9:
>> arch/powerpc/include/asm/kup.h:58:20: error: redefinition of 'prevent_user_access'
static inline void prevent_user_access(void __user *to, const void __user *from,
^
arch/powerpc/include/asm/book3s/64/kup.h:227:20: note: previous definition is here
static inline void prevent_user_access(void __user *to, const void __user *from,
^
In file included from arch/powerpc/kernel/asm-offsets.c:14:
In file included from include/linux/compat.h:15:
In file included from include/linux/socket.h:8:
In file included from include/linux/uio.h:10:
In file included from include/crypto/hash.h:11:
In file included from include/linux/crypto.h:21:
In file included from include/linux/uaccess.h:11:
In file included from arch/powerpc/include/asm/uaccess.h:9:
>> arch/powerpc/include/asm/kup.h:60:29: error: redefinition of 'prevent_user_access_return'
static inline unsigned long prevent_user_access_return(void) { return 0UL; }
^
arch/powerpc/include/asm/book3s/64/kup.h:233:29: note: previous definition is here
static inline unsigned long prevent_user_access_return(void)
^
In file included from arch/powerpc/kernel/asm-offsets.c:14:
In file included from include/linux/compat.h:15:
In file included from include/linux/socket.h:8:
In file included from include/linux/uio.h:10:
In file included from include/crypto/hash.h:11:
In file included from include/linux/crypto.h:21:
In file included from include/linux/uaccess.h:11:
In file included from arch/powerpc/include/asm/uaccess.h:9:
>> arch/powerpc/include/asm/kup.h:61:20: error: redefinition of 'restore_user_access'
static inline void restore_user_access(unsigned long flags) { }
^
arch/powerpc/include/asm/book3s/64/kup.h:242:20: note: previous definition is here
static inline void restore_user_access(unsigned long flags)
^
In file included from arch/powerpc/kernel/asm-offsets.c:14:
In file included from include/linux/compat.h:15:
In file included from include/linux/socket.h:8:
In file included from include/linux/uio.h:10:
In file included from include/crypto/hash.h:11:
In file included from include/linux/crypto.h:21:
In file included from include/linux/uaccess.h:11:
In file included from arch/powerpc/include/asm/uaccess.h:9:
>> arch/powerpc/include/asm/kup.h:63:1: error: redefinition of 'bad_kuap_fault'
bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
^
arch/powerpc/include/asm/book3s/64/kup.h:248:1: note: previous definition is here
bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
^
In file included from arch/powerpc/kernel/asm-offsets.c:14:
In file included from include/linux/compat.h:17:
In file included from include/linux/fs.h:34:
In file included from include/linux/percpu-rwsem.h:7:
In file included from include/linux/rcuwait.h:6:
In file included from include/linux/sched/signal.h:6:
include/linux/signal.h:87:11: warning: array index 3 is past the end of the array (which contains 1 element) [-Warray-bounds]
return (set->sig[3] | set->sig[2] |
^        ~
arch/powerpc/include/uapi/asm/signal.h:18:2: note: array 'sig' declared here
unsigned long sig[_NSIG_WORDS];
^
In file included from arch/powerpc/kernel/asm-offsets.c:14:
In file included from include/linux/compat.h:17:
In file included from include/linux/fs.h:34:
In file included from include/linux/percpu-rwsem.h:7:
In file included from include/linux/rcuwait.h:6:
In file included from include/linux/sched/signal.h:6:
include/linux/signal.h:87:25: warning: array index 2 is past the end of the array (which contains 1 element) [-Warray-bounds]
return (set->sig[3] | set->sig[2] |
^        ~
arch/powerpc/include/uapi/asm/signal.h:18:2: note: array 'sig' declared here
unsigned long sig[_NSIG_WORDS];
^
In file included from arch/powerpc/kernel/asm-offsets.c:14:
In file included from include/linux/compat.h:17:
In file included from include/linux/fs.h:34:
In file included from include/linux/percpu-rwsem.h:7:
In file included from include/linux/rcuwait.h:6:
In file included from include/linux/sched/signal.h:6:
include/linux/signal.h:88:4: warning: array index 1 is past the end of the array (which contains 1 element) [-Warray-bounds]
set->sig[1] | set->sig[0]) == 0;
^        ~
arch/powerpc/include/uapi/asm/signal.h:18:2: note: array 'sig' declared here
unsigned long sig[_NSIG_WORDS];
^
In file included from arch/powerpc/kernel/asm-offsets.c:14:
In file included from include/linux/compat.h:17:
In file included from include/linux/fs.h:34:
In file included from include/linux/percpu-rwsem.h:7:
In file included from include/linux/rcuwait.h:6:
In file included from include/linux/sched/signal.h:6:
include/linux/signal.h:90:11: warning: array index 1 is past the end of the array (which contains 1 element) [-Warray-bounds]
return (set->sig[1] | set->sig[0]) == 0;
^        ~
arch/powerpc/include/uapi/asm/signal.h:18:2: note: array 'sig' declared here
unsigned long sig[_NSIG_WORDS];
^
In file included from arch/powerpc/kernel/asm-offsets.c:14:
In file included from include/linux/compat.h:17:
In file included from include/linux/fs.h:34:
In file included from include/linux/percpu-rwsem.h:7:
In file included from include/linux/rcuwait.h:6:
In file included from include/linux/sched/signal.h:6:
include/linux/signal.h:103:11: warning: array index 3 is past the end of the array (which contains 1 element) [-Warray-bounds]
return  (set1->sig[3] == set2->sig[3]) &&
^         ~
arch/powerpc/include/uapi/asm/signal.h:18:2: note: array 'sig' declared here
unsigned long sig[_NSIG_WORDS];
^
In file included from arch/powerpc/kernel/asm-offsets.c:14:
In file included from include/linux/compat.h:17:
In file included from include/linux/fs.h:34:
In file included from include/linux/percpu-rwsem.h:7:
In file included from include/linux/rcuwait.h:6:
In file included from include/linux/sched/signal.h:6:
include/linux/signal.h:103:27: warning: array index 3 is past the end of the array (which contains 1 element) [-Warray-bounds]
return  (set1->sig[3] == set2->sig[3]) &&
^         ~
arch/powerpc/include/uapi/asm/signal.h:18:2: note: array 'sig' declared here
unsigned long sig[_NSIG_WORDS];
^
In file included from arch/powerpc/kernel/asm-offsets.c:14:
In file included from include/linux/compat.h:17:
In file included from include/linux/fs.h:34:
In file included from include/linux/percpu-rwsem.h:7:
In file included from include/linux/rcuwait.h:6:
In file included from include/linux/sched/signal.h:6:
include/linux/signal.h:104:5: warning: array index 2 is past the end of the array (which contains 1 element) [-Warray-bounds]
(set1->sig[2] == set2->sig[2]) &&
^         ~
arch/powerpc/include/uapi/asm/signal.h:18:2: note: array 'sig' declared here
unsigned long sig[_NSIG_WORDS];
^
In file included from arch/powerpc/kernel/asm-offsets.c:14:
In file included from include/linux/compat.h:17:
In file included from include/linux/fs.h:34:
In file included from include/linux/percpu-rwsem.h:7:
In file included from include/linux/rcuwait.h:6:
In file included from include/linux/sched/signal.h:6:
include/linux/signal.h:104:21: warning: array index 2 is past the end of the array (which contains 1 element) [-Warray-bounds]
(set1->sig[2] == set2->sig[2]) &&
^         ~
arch/powerpc/include/uapi/asm/signal.h:18:2: note: array 'sig' declared here
unsigned long sig[_NSIG_WORDS];

vim +142 arch/powerpc/include/asm/book3s/64/kup.h

   135	
   136	static inline void kuap_restore_user_amr(struct pt_regs *regs)
   137	{
   138		if (!mmu_has_feature(MMU_FTR_PKEY))
   139			return;
   140	
   141		isync();
 > 142		mtspr(SPRN_AMR, regs->kuap);
   143		/*
   144		 * No isync required here because we are about to rfi
   145		 * back to previous context before any user accesses
   146		 * would be made, which is a CSI.
   147		 */
   148	}
   149	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 37467 bytes --]

^ permalink raw reply

* Re: [PATCH v3 25/41] powerpc/book3s64/kuep: Store/restore userspace IAMR correctly on entry and exit from kernel
From: kernel test robot @ 2020-06-11  0:03 UTC (permalink / raw)
  To: Aneesh Kumar K.V, linuxppc-dev, mpe
  Cc: clang-built-linux, kbuild-all, bauerman, linuxram,
	Aneesh Kumar K.V
In-Reply-To: <20200610095204.608183-26-aneesh.kumar@linux.ibm.com>

[-- Attachment #1: Type: text/plain, Size: 9130 bytes --]

Hi "Aneesh,

I love your patch! Yet something to improve:

[auto build test ERROR on powerpc/next]
[also build test ERROR on next-20200610]
[cannot apply to v5.7]
[if your patch is applied to the wrong git tree, please drop us a note to help
improve the system. BTW, we also suggest to use '--base' option to specify the
base tree in git format-patch, please see https://stackoverflow.com/a/37406982]

url:    https://github.com/0day-ci/linux/commits/Aneesh-Kumar-K-V/Kernel-userspace-access-execution-prevention-with-hash-translation/20200610-191943
base:   https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next
config: powerpc-randconfig-r006-20200608 (attached as .config)
compiler: clang version 11.0.0 (https://github.com/llvm/llvm-project bc2b70982be8f5250cd0082a7190f8b417bd4dfe)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # install powerpc cross compiling tool for clang build
        # apt-get install binutils-powerpc-linux-gnu
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=powerpc 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>, old ones prefixed by <<):

In file included from arch/powerpc/kernel/asm-offsets.c:14:
In file included from include/linux/compat.h:15:
In file included from include/linux/socket.h:8:
In file included from include/linux/uio.h:10:
In file included from include/crypto/hash.h:11:
In file included from include/linux/crypto.h:21:
In file included from include/linux/uaccess.h:11:
In file included from arch/powerpc/include/asm/uaccess.h:9:
In file included from arch/powerpc/include/asm/kup.h:18:
arch/powerpc/include/asm/book3s/64/kup.h:181:24: error: no member named 'kuap' in 'struct pt_regs'
mtspr(SPRN_AMR, regs->kuap);
~~~~  ^
arch/powerpc/include/asm/reg.h:1386:33: note: expanded from macro 'mtspr'
: "r" ((unsigned long)(v))                                                                ^
In file included from arch/powerpc/kernel/asm-offsets.c:14:
In file included from include/linux/compat.h:15:
In file included from include/linux/socket.h:8:
In file included from include/linux/uio.h:10:
In file included from include/crypto/hash.h:11:
In file included from include/linux/crypto.h:21:
In file included from include/linux/uaccess.h:11:
In file included from arch/powerpc/include/asm/uaccess.h:9:
In file included from arch/powerpc/include/asm/kup.h:18:
>> arch/powerpc/include/asm/book3s/64/kup.h:182:25: error: no member named 'kuep' in 'struct pt_regs'
mtspr(SPRN_IAMR, regs->kuep);
~~~~  ^
arch/powerpc/include/asm/reg.h:1386:33: note: expanded from macro 'mtspr'
: "r" ((unsigned long)(v))                                                                ^
In file included from arch/powerpc/kernel/asm-offsets.c:14:
In file included from include/linux/compat.h:15:
In file included from include/linux/socket.h:8:
In file included from include/linux/uio.h:10:
In file included from include/crypto/hash.h:11:
In file included from include/linux/crypto.h:21:
In file included from include/linux/uaccess.h:11:
In file included from arch/powerpc/include/asm/uaccess.h:9:
In file included from arch/powerpc/include/asm/kup.h:18:
arch/powerpc/include/asm/book3s/64/kup.h:194:22: error: no member named 'kuap' in 'struct pt_regs'
if (unlikely(regs->kuap != amr)) {
~~~~  ^
include/linux/compiler.h:78:42: note: expanded from macro 'unlikely'
# define unlikely(x)    __builtin_expect(!!(x), 0)
^
In file included from arch/powerpc/kernel/asm-offsets.c:14:
In file included from include/linux/compat.h:15:
In file included from include/linux/socket.h:8:
In file included from include/linux/uio.h:10:
In file included from include/crypto/hash.h:11:
In file included from include/linux/crypto.h:21:
In file included from include/linux/uaccess.h:11:
In file included from arch/powerpc/include/asm/uaccess.h:9:
In file included from arch/powerpc/include/asm/kup.h:18:
arch/powerpc/include/asm/book3s/64/kup.h:196:26: error: no member named 'kuap' in 'struct pt_regs'
mtspr(SPRN_AMR, regs->kuap);
~~~~  ^
arch/powerpc/include/asm/reg.h:1386:33: note: expanded from macro 'mtspr'
: "r" ((unsigned long)(v))                                                                ^
In file included from arch/powerpc/kernel/asm-offsets.c:14:
In file included from include/linux/compat.h:15:
In file included from include/linux/socket.h:8:
In file included from include/linux/uio.h:10:
In file included from include/crypto/hash.h:11:
In file included from include/linux/crypto.h:21:
In file included from include/linux/uaccess.h:11:
In file included from arch/powerpc/include/asm/uaccess.h:9:
In file included from arch/powerpc/include/asm/kup.h:18:
arch/powerpc/include/asm/book3s/64/kup.h:293:14: error: no member named 'kuap' in 'struct pt_regs'
(regs->kuap & (is_write ? AMR_KUAP_BLOCK_WRITE : AMR_KUAP_BLOCK_READ)),
~~~~  ^
include/asm-generic/bug.h:122:25: note: expanded from macro 'WARN'
int __ret_warn_on = !!(condition);                                                                ^~~~~~~~~
In file included from arch/powerpc/kernel/asm-offsets.c:14:
In file included from include/linux/compat.h:15:
In file included from include/linux/socket.h:8:
In file included from include/linux/uio.h:10:
In file included from include/crypto/hash.h:11:
In file included from include/linux/crypto.h:21:
In file included from include/linux/uaccess.h:11:
In file included from arch/powerpc/include/asm/uaccess.h:9:
arch/powerpc/include/asm/kup.h:56:20: error: redefinition of 'allow_user_access'
static inline void allow_user_access(void __user *to, const void __user *from,
^
arch/powerpc/include/asm/book3s/64/kup.h:254:29: note: previous definition is here
static __always_inline void allow_user_access(void __user *to, const void __user *from,
^
In file included from arch/powerpc/kernel/asm-offsets.c:14:
In file included from include/linux/compat.h:15:
In file included from include/linux/socket.h:8:
In file included from include/linux/uio.h:10:
In file included from include/crypto/hash.h:11:
In file included from include/linux/crypto.h:21:
In file included from include/linux/uaccess.h:11:
In file included from arch/powerpc/include/asm/uaccess.h:9:
arch/powerpc/include/asm/kup.h:58:20: error: redefinition of 'prevent_user_access'
static inline void prevent_user_access(void __user *to, const void __user *from,
^
arch/powerpc/include/asm/book3s/64/kup.h:269:20: note: previous definition is here
static inline void prevent_user_access(void __user *to, const void __user *from,
^
In file included from arch/powerpc/kernel/asm-offsets.c:14:
In file included from include/linux/compat.h:15:
In file included from include/linux/socket.h:8:
In file included from include/linux/uio.h:10:
In file included from include/crypto/hash.h:11:
In file included from include/linux/crypto.h:21:
In file included from include/linux/uaccess.h:11:
In file included from arch/powerpc/include/asm/uaccess.h:9:
arch/powerpc/include/asm/kup.h:60:29: error: redefinition of 'prevent_user_access_return'
static inline unsigned long prevent_user_access_return(void) { return 0UL; }
^
arch/powerpc/include/asm/book3s/64/kup.h:275:29: note: previous definition is here
static inline unsigned long prevent_user_access_return(void)
^
In file included from arch/powerpc/kernel/asm-offsets.c:14:
In file included from include/linux/compat.h:15:
In file included from include/linux/socket.h:8:
In file included from include/linux/uio.h:10:
In file included from include/crypto/hash.h:11:
In file included from include/linux/crypto.h:21:
In file included from include/linux/uaccess.h:11:
In file included from arch/powerpc/include/asm/uaccess.h:9:

vim +182 arch/powerpc/include/asm/book3s/64/kup.h

   174	
   175	static inline void kuap_restore_user_amr(struct pt_regs *regs)
   176	{
   177		if (!mmu_has_feature(MMU_FTR_PKEY))
   178			return;
   179	
   180		isync();
   181		mtspr(SPRN_AMR, regs->kuap);
 > 182		mtspr(SPRN_IAMR, regs->kuep);
   183		/*
   184		 * No isync required here because we are about to rfi
   185		 * back to previous context before any user accesses
   186		 * would be made, which is a CSI.
   187		 */
   188	}
   189	static inline void kuap_restore_kernel_amr(struct pt_regs *regs,
   190						   unsigned long amr)
   191	{
   192		if (mmu_has_feature(MMU_FTR_KUAP) || mmu_has_feature(MMU_FTR_PKEY)) {
   193	
   194			if (unlikely(regs->kuap != amr)) {
   195				isync();
   196				mtspr(SPRN_AMR, regs->kuap);
   197				/*
   198				 * No isync required here because we are about to rfi
   199				 * back to previous context before any user accesses
   200				 * would be made, which is a CSI.
   201				 */
   202			}
   203		}
   204		/*
   205		 * No need to restore IAMR when returning to kernel space.
   206		 */
   207	}
   208	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 37467 bytes --]

^ permalink raw reply

* Re: [RFC PATCH 2/2] powerpc/64s: system call support for scv/rfscv instructions
From: Nicholas Piggin @ 2020-06-11  2:34 UTC (permalink / raw)
  To: linuxppc-dev, Matheus Castanho
In-Reply-To: <cc609f82-1d11-b1f9-2594-153936d7fe48@linux.ibm.com>

Excerpts from Matheus Castanho's message of May 14, 2020 6:55 am:
> Hi Nicholas,
> 
> Small comment below:
> 
> On 4/30/20 1:02 AM, Nicholas Piggin wrote:
>> Add support for the scv instruction on POWER9 and later CPUs.
>> 
>> For now this implements the zeroth scv vector 'scv 0', as identical
>> to 'sc' system calls, with the exception that lr is not preserved, and
>> it is 64-bit only. There may yet be changes made to this ABI, so it's
>> for testing only.
>> 
>> rfscv is implemented to return from scv type system calls. It can not
>> be used to return from sc system calls because those are defined to
>> preserve lr.
>> 
>> In a comparison of getpid syscall, the test program had scv taking
>> about 3 more cycles in user mode (92 vs 89 for sc), due to lr handling.
>> getpid syscall throughput on POWER9 is improved by 33%, mostly due to
>> reducing mtmsr and mtspr.
>> 
>> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
>> ---
>>  Documentation/powerpc/syscall64-abi.rst   |  42 ++++--
> 
> [...]
> 
>> +Return value
>> +------------
>> +- For the sc instruction, both a return value and a return error code are
>> +  returned. cr0.SO is the return error code, and r3 is the return value or
>> +  error code. When cr0.SO is clear, the syscall succeeded and r3 is the return
>> +  value. When cr0.SO is set, the syscall failed and r3 is the error code that
>> +  generally corresponds to errno.
>> +
>> +- For the scv 0 instruction, there is a return value indicates failure if it
>> +  is >= -MAX_ERRNO (-4095) as an unsigned comparison, in which case it is the
>> +  negated return error code. Otherwise it is the successful return value.
> 
> I believe this last paragraph is a bit confusing (didn't quite get the
> unsigned comparison with negative values). So instead of cr0.SO to
> indicate failure, scv returns the negated error code, and positive in
> case of success?

Yes, it will be like other major architectures and return values from
-4095..-1 indicate an error with error value equal to -return value.

I will try to make it a bit clearer.

Thanks,
Nick

^ permalink raw reply

* [PATCH kernel] KVM: PPC: Fix nested guest RC bits update
From: Alexey Kardashevskiy @ 2020-06-11  3:05 UTC (permalink / raw)
  To: linuxppc-dev
  Cc: Alexey Kardashevskiy, Aneesh Kumar K.V, kvm-ppc, David Gibson

Before commit 6cdf30375f82 ("powerpc/kvm/book3s: Use kvm helpers
to walk shadow or secondary table") we called __find_linux_pte() with
a page table pointer from a kvm_nested_guest struct but
now we rely on kvmhv_find_nested() which takes an L1 LPID and returns
a kvm_nested_guest pointer, however we pass a L0 LPID there and
the L2 guest hangs.

This fixes the LPID passed to kvmppc_hv_handle_set_rc().

Fixes: 6cdf30375f82 ("powerpc/kvm/book3s: Use kvm helpers to walk shadow or secondary table")
Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
---
 arch/powerpc/kvm/book3s_hv_nested.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c
index 99011f1b772a..f36f0a2993c0 100644
--- a/arch/powerpc/kvm/book3s_hv_nested.c
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -1234,7 +1234,7 @@ static long kvmhv_handle_nested_set_rc(struct kvm_vcpu *vcpu,
 
 	/* Set the rc bit in the pte of the shadow_pgtable for the nest guest */
 	ret = kvmppc_hv_handle_set_rc(kvm, true, writing,
-				      n_gpa, gp->shadow_lpid);
+				      n_gpa, gp->l1_lpid);
 	if (!ret)
 		ret = -EINVAL;
 	else
-- 
2.17.1


^ permalink raw reply related

* Re: [PATCH 2/5] powerpc/lib: Initialize a temporary mm for code patching
From: Christopher M. Riedl @ 2020-06-11  3:29 UTC (permalink / raw)
  To: Christophe Leroy, linuxppc-dev, kernel-hardening
In-Reply-To: <4ffced42-ee3a-841f-2d3f-34daec11b05b@csgroup.eu>

On Wed Jun 3, 2020 at 9:01 AM, Christophe Leroy wrote:
>
> 
>
> 
> Le 03/06/2020 à 07:19, Christopher M. Riedl a écrit :
> > When code patching a STRICT_KERNEL_RWX kernel the page containing the
> > address to be patched is temporarily mapped with permissive memory
> > protections. Currently, a per-cpu vmalloc patch area is used for this
> > purpose. While the patch area is per-cpu, the temporary page mapping is
> > inserted into the kernel page tables for the duration of the patching.
> > The mapping is exposed to CPUs other than the patching CPU - this is
> > undesirable from a hardening perspective.
> > 
> > Use the `poking_init` init hook to prepare a temporary mm and patching
> > address. Initialize the temporary mm by copying the init mm. Choose a
> > randomized patching address inside the temporary mm userspace address
> > portion. The next patch uses the temporary mm and patching address for
> > code patching.
> > 
> > Based on x86 implementation:
> > 
> > commit 4fc19708b165
> > ("x86/alternatives: Initialize temporary mm for patching")
> > 
> > Signed-off-by: Christopher M. Riedl <cmr@informatik.wtf>
> > ---
> >   arch/powerpc/lib/code-patching.c | 33 ++++++++++++++++++++++++++++++++
> >   1 file changed, 33 insertions(+)
> > 
> > diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
> > index 5ecf0d635a8d..599114f63b44 100644
> > --- a/arch/powerpc/lib/code-patching.c
> > +++ b/arch/powerpc/lib/code-patching.c
> > @@ -11,6 +11,8 @@
> >   #include <linux/cpuhotplug.h>
> >   #include <linux/slab.h>
> >   #include <linux/uaccess.h>
> > +#include <linux/sched/task.h>
> > +#include <linux/random.h>
> >   
> >   #include <asm/pgtable.h>
> >   #include <asm/tlbflush.h>
> > @@ -45,6 +47,37 @@ int raw_patch_instruction(struct ppc_inst *addr, struct ppc_inst instr)
> >   }
> >   
> >   #ifdef CONFIG_STRICT_KERNEL_RWX
> > +
> > +static struct mm_struct *patching_mm __ro_after_init;
> > +static unsigned long patching_addr __ro_after_init;
> > +
> > +void __init poking_init(void)
> > +{
> > +	spinlock_t *ptl; /* for protecting pte table */
> > +	pte_t *ptep;
> > +
> > +	/*
> > +	 * Some parts of the kernel (static keys for example) depend on
> > +	 * successful code patching. Code patching under STRICT_KERNEL_RWX
> > +	 * requires this setup - otherwise we cannot patch at all. We use
> > +	 * BUG_ON() here and later since an early failure is preferred to
> > +	 * buggy behavior and/or strange crashes later.
> > +	 */
> > +	patching_mm = copy_init_mm();
> > +	BUG_ON(!patching_mm);
> > +
> > +	/*
> > +	 * In hash we cannot go above DEFAULT_MAP_WINDOW easily.
> > +	 * XXX: Do we want additional bits of entropy for radix?
> > +	 */
> > +	patching_addr = (get_random_long() & PAGE_MASK) %
> > +		(DEFAULT_MAP_WINDOW - PAGE_SIZE);
> > +
> > +	ptep = get_locked_pte(patching_mm, patching_addr, &ptl);
> > +	BUG_ON(!ptep);
> > +	pte_unmap_unlock(ptep, ptl);
>
> 
> Is this needed ? What's the point in getting the pte to unmap it
> immediatly without doing anything with it ?
>

We pre-allocate the PTE here since later the allocation may fail
(GFP_KERNEL) badly when interrupts are disabled during patching.

> 
> Christophe
>
> 
> > +}
> > +
> >   static DEFINE_PER_CPU(struct vm_struct *, text_poke_area);
> >   
> >   static int text_area_cpu_up(unsigned int cpu)
> > 
>
> 
>
> 


^ permalink raw reply

* Re: [PATCH 3/5] powerpc/lib: Use a temporary mm for code patching
From: Christopher M. Riedl @ 2020-06-11  3:31 UTC (permalink / raw)
  To: Christophe Leroy, linuxppc-dev, kernel-hardening
In-Reply-To: <7c72a249-dff1-ca22-393b-dabe35665375@csgroup.eu>

On Wed Jun 3, 2020 at 9:12 AM, Christophe Leroy wrote:
>
> 
>
> 
> Le 03/06/2020 à 07:19, Christopher M. Riedl a écrit :
> > Currently, code patching a STRICT_KERNEL_RWX exposes the temporary
> > mappings to other CPUs. These mappings should be kept local to the CPU
> > doing the patching. Use the pre-initialized temporary mm and patching
> > address for this purpose. Also add a check after patching to ensure the
> > patch succeeded.
> > 
> > Use the KUAP functions on non-BOOKS3_64 platforms since the temporary
> > mapping for patching uses a userspace address (to keep the mapping
> > local). On BOOKS3_64 platforms hash does not implement KUAP and on radix
> > the use of PAGE_KERNEL sets EAA[0] for the PTE which means the AMR
> > (KUAP) protection is ignored (see PowerISA v3.0b, Fig, 35).
> > 
> > Based on x86 implementation:
> > 
> > commit b3fd8e83ada0
> > ("x86/alternatives: Use temporary mm for text poking")
> > 
> > Signed-off-by: Christopher M. Riedl <cmr@informatik.wtf>
> > ---
> >   arch/powerpc/lib/code-patching.c | 148 ++++++++++++-------------------
> >   1 file changed, 55 insertions(+), 93 deletions(-)
> > 
> > diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
> > index 599114f63b44..df0765845204 100644
> > --- a/arch/powerpc/lib/code-patching.c
> > +++ b/arch/powerpc/lib/code-patching.c
> > @@ -20,6 +20,7 @@
> >   #include <asm/code-patching.h>
> >   #include <asm/setup.h>
> >   #include <asm/inst.h>
> > +#include <asm/mmu_context.h>
> >   
> >   static int __patch_instruction(struct ppc_inst *exec_addr, struct ppc_inst instr,
> >   			       struct ppc_inst *patch_addr)
> > @@ -78,101 +79,58 @@ void __init poking_init(void)
> >   	pte_unmap_unlock(ptep, ptl);
> >   }
> >   
> > -static DEFINE_PER_CPU(struct vm_struct *, text_poke_area);
> > -
> > -static int text_area_cpu_up(unsigned int cpu)
> > -{
> > -	struct vm_struct *area;
> > -
> > -	area = get_vm_area(PAGE_SIZE, VM_ALLOC);
> > -	if (!area) {
> > -		WARN_ONCE(1, "Failed to create text area for cpu %d\n",
> > -			cpu);
> > -		return -1;
> > -	}
> > -	this_cpu_write(text_poke_area, area);
> > -
> > -	return 0;
> > -}
> > -
> > -static int text_area_cpu_down(unsigned int cpu)
> > -{
> > -	free_vm_area(this_cpu_read(text_poke_area));
> > -	return 0;
> > -}
> > -
> > -/*
> > - * Run as a late init call. This allows all the boot time patching to be done
> > - * simply by patching the code, and then we're called here prior to
> > - * mark_rodata_ro(), which happens after all init calls are run. Although
> > - * BUG_ON() is rude, in this case it should only happen if ENOMEM, and we judge
> > - * it as being preferable to a kernel that will crash later when someone tries
> > - * to use patch_instruction().
> > - */
> > -static int __init setup_text_poke_area(void)
> > -{
> > -	BUG_ON(!cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
> > -		"powerpc/text_poke:online", text_area_cpu_up,
> > -		text_area_cpu_down));
> > -
> > -	return 0;
> > -}
> > -late_initcall(setup_text_poke_area);
> > +struct patch_mapping {
> > +	spinlock_t *ptl; /* for protecting pte table */
> > +	pte_t *ptep;
> > +	struct temp_mm temp_mm;
> > +};
> >   
> >   /*
> >    * This can be called for kernel text or a module.
> >    */
> > -static int map_patch_area(void *addr, unsigned long text_poke_addr)
> > +static int map_patch(const void *addr, struct patch_mapping *patch_mapping)
> >   {
> > -	unsigned long pfn;
> > -	int err;
> > +	struct page *page;
> > +	pte_t pte;
> > +	pgprot_t pgprot;
> >   
> >   	if (is_vmalloc_addr(addr))
> > -		pfn = vmalloc_to_pfn(addr);
> > +		page = vmalloc_to_page(addr);
> >   	else
> > -		pfn = __pa_symbol(addr) >> PAGE_SHIFT;
> > +		page = virt_to_page(addr);
> >   
> > -	err = map_kernel_page(text_poke_addr, (pfn << PAGE_SHIFT), PAGE_KERNEL);
> > +	if (radix_enabled())
> > +		pgprot = PAGE_KERNEL;
> > +	else
> > +		pgprot = PAGE_SHARED;
> >   
> > -	pr_devel("Mapped addr %lx with pfn %lx:%d\n", text_poke_addr, pfn, err);
> > -	if (err)
> > +	patch_mapping->ptep = get_locked_pte(patching_mm, patching_addr,
> > +					     &patch_mapping->ptl);
> > +	if (unlikely(!patch_mapping->ptep)) {
> > +		pr_warn("map patch: failed to allocate pte for patching\n");
> >   		return -1;
> > +	}
> > +
> > +	pte = mk_pte(page, pgprot);
> > +	if (!IS_ENABLED(CONFIG_PPC_BOOK3S_64))
> > +		pte = pte_mkdirty(pte);
>
> 
> Are you should you don't need the DIRTY bit for BOOK3S/64 non radix ?
>
> 
> I think the DIRTY bit is needed always, and adding it when it is already
> there is harmless, so it should be done inconditionnnaly.
>

I tested this and it doesn't seem to make a differnce so I can make this
common in the next spin.

> 
> > +	set_pte_at(patching_mm, patching_addr, patch_mapping->ptep, pte);
> > +
> > +	init_temp_mm(&patch_mapping->temp_mm, patching_mm);
> > +	use_temporary_mm(&patch_mapping->temp_mm);
> >   
> >   	return 0;
> >   }
> >   
> > -static inline int unmap_patch_area(unsigned long addr)
> > +static void unmap_patch(struct patch_mapping *patch_mapping)
> >   {
> > -	pte_t *ptep;
> > -	pmd_t *pmdp;
> > -	pud_t *pudp;
> > -	pgd_t *pgdp;
> > -
> > -	pgdp = pgd_offset_k(addr);
> > -	if (unlikely(!pgdp))
> > -		return -EINVAL;
> > -
> > -	pudp = pud_offset(pgdp, addr);
> > -	if (unlikely(!pudp))
> > -		return -EINVAL;
> > -
> > -	pmdp = pmd_offset(pudp, addr);
> > -	if (unlikely(!pmdp))
> > -		return -EINVAL;
> > -
> > -	ptep = pte_offset_kernel(pmdp, addr);
> > -	if (unlikely(!ptep))
> > -		return -EINVAL;
> > +	/* In hash, pte_clear flushes the tlb */
> > +	pte_clear(patching_mm, patching_addr, patch_mapping->ptep);
> > +	unuse_temporary_mm(&patch_mapping->temp_mm);
> >   
> > -	pr_devel("clearing mm %p, pte %p, addr %lx\n", &init_mm, ptep, addr);
> > -
> > -	/*
> > -	 * In hash, pte_clear flushes the tlb, in radix, we have to
> > -	 */
> > -	pte_clear(&init_mm, addr, ptep);
> > -	flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
> > -
> > -	return 0;
> > +	/* In radix, we have to explicitly flush the tlb (no-op in hash) */
> > +	local_flush_tlb_mm(patching_mm);
> > +	pte_unmap_unlock(patch_mapping->ptep, patch_mapping->ptl);
> >   }
> >   
> >   static int do_patch_instruction(struct ppc_inst *addr, struct ppc_inst instr)
> > @@ -180,32 +138,36 @@ static int do_patch_instruction(struct ppc_inst *addr, struct ppc_inst instr)
> >   	int err;
> >   	struct ppc_inst *patch_addr = NULL;
> >   	unsigned long flags;
> > -	unsigned long text_poke_addr;
> > -	unsigned long kaddr = (unsigned long)addr;
> > +	struct patch_mapping patch_mapping;
> >   
> >   	/*
> > -	 * During early early boot patch_instruction is called
> > -	 * when text_poke_area is not ready, but we still need
> > -	 * to allow patching. We just do the plain old patching
> > +	 * The patching_mm is initialized before calling mark_rodata_ro. Prior
> > +	 * to this, patch_instruction is called when we don't have (and don't
> > +	 * need) the patching_mm so just do plain old patching.
> >   	 */
> > -	if (!this_cpu_read(text_poke_area))
> > +	if (!patching_mm)
> >   		return raw_patch_instruction(addr, instr);
> >   
> >   	local_irq_save(flags);
> >   
> > -	text_poke_addr = (unsigned long)__this_cpu_read(text_poke_area)->addr;
> > -	if (map_patch_area(addr, text_poke_addr)) {
> > -		err = -1;
> > +	err = map_patch(addr, &patch_mapping);
> > +	if (err)
> >   		goto out;
> > -	}
> >   
> > -	patch_addr = (struct ppc_inst *)(text_poke_addr + (kaddr & ~PAGE_MASK));
> > +	patch_addr = (struct ppc_inst *)(patching_addr | offset_in_page(addr));
> >   
> > -	__patch_instruction(addr, instr, patch_addr);
> > +	if (!radix_enabled())
> > +		allow_write_to_user(patch_addr, sizeof(instr));
>
> 
> Can't use sizeof(instr), you have to use ppc_inst_size()
>

Good catch, will fix this in the next spin (the other one below too).

> 
> > +	err = __patch_instruction(addr, instr, patch_addr);
> > +	if (!radix_enabled())
> > +		prevent_write_to_user(patch_addr, sizeof(instr));
>
> 
> Same
>
> 
> >   
> > -	err = unmap_patch_area(text_poke_addr);
> > -	if (err)
> > -		pr_warn("failed to unmap %lx\n", text_poke_addr);
> > +	unmap_patch(&patch_mapping);
> > +	/*
> > +	 * Something is wrong if what we just wrote doesn't match what we
> > +	 * think we just wrote.
> > +	 */
> > +	WARN_ON(!ppc_inst_equal(ppc_inst_read(addr), instr));
> >   
> >   out:
> >   	local_irq_restore(flags);
> > 
>
> 
> Christophe
>
> 
>
> 


^ permalink raw reply

* Re: [PATCH 1/5] powerpc/mm: Introduce temporary mm
From: Christopher M. Riedl @ 2020-06-11  3:34 UTC (permalink / raw)
  To: Christophe Leroy, linuxppc-dev, kernel-hardening
In-Reply-To: <ff05b833-720e-e1e2-f43b-8285d520a563@csgroup.eu>

On Wed Jun 3, 2020 at 8:58 AM, Christophe Leroy wrote:
>
> 
>
> 
> Le 03/06/2020 à 07:19, Christopher M. Riedl a écrit :
> > x86 supports the notion of a temporary mm which restricts access to
> > temporary PTEs to a single CPU. A temporary mm is useful for situations
> > where a CPU needs to perform sensitive operations (such as patching a
> > STRICT_KERNEL_RWX kernel) requiring temporary mappings without exposing
> > said mappings to other CPUs. A side benefit is that other CPU TLBs do
> > not need to be flushed when the temporary mm is torn down.
> > 
> > Mappings in the temporary mm can be set in the userspace portion of the
> > address-space.
> > 
> > Interrupts must be disabled while the temporary mm is in use. HW
> > breakpoints, which may have been set by userspace as watchpoints on
> > addresses now within the temporary mm, are saved and disabled when
> > loading the temporary mm. The HW breakpoints are restored when unloading
> > the temporary mm. All HW breakpoints are indiscriminately disabled while
> > the temporary mm is in use.
> > 
> > Based on x86 implementation:
> > 
> > commit cefa929c034e
> > ("x86/mm: Introduce temporary mm structs")
> > 
> > Signed-off-by: Christopher M. Riedl <cmr@informatik.wtf>
> > ---
> >   arch/powerpc/include/asm/debug.h       |  1 +
> >   arch/powerpc/include/asm/mmu_context.h | 64 ++++++++++++++++++++++++++
> >   arch/powerpc/kernel/process.c          |  5 ++
> >   3 files changed, 70 insertions(+)
> > 
> > diff --git a/arch/powerpc/include/asm/debug.h b/arch/powerpc/include/asm/debug.h
> > index ec57daf87f40..827350c9bcf3 100644
> > --- a/arch/powerpc/include/asm/debug.h
> > +++ b/arch/powerpc/include/asm/debug.h
> > @@ -46,6 +46,7 @@ static inline int debugger_fault_handler(struct pt_regs *regs) { return 0; }
> >   #endif
> >   
> >   void __set_breakpoint(int nr, struct arch_hw_breakpoint *brk);
> > +void __get_breakpoint(int nr, struct arch_hw_breakpoint *brk);
> >   bool ppc_breakpoint_available(void);
> >   #ifdef CONFIG_PPC_ADV_DEBUG_REGS
> >   extern void do_send_trap(struct pt_regs *regs, unsigned long address,
> > diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
> > index 1a474f6b1992..9269c7c7b04e 100644
> > --- a/arch/powerpc/include/asm/mmu_context.h
> > +++ b/arch/powerpc/include/asm/mmu_context.h
> > @@ -10,6 +10,7 @@
> >   #include <asm/mmu.h>	
> >   #include <asm/cputable.h>
> >   #include <asm/cputhreads.h>
> > +#include <asm/debug.h>
> >   
> >   /*
> >    * Most if the context management is out of line
> > @@ -300,5 +301,68 @@ static inline int arch_dup_mmap(struct mm_struct *oldmm,
> >   	return 0;
> >   }
> >   
> > +struct temp_mm {
> > +	struct mm_struct *temp;
> > +	struct mm_struct *prev;
> > +	bool is_kernel_thread;
> > +	struct arch_hw_breakpoint brk[HBP_NUM_MAX];
> > +};
> > +
> > +static inline void init_temp_mm(struct temp_mm *temp_mm, struct mm_struct *mm)
> > +{
> > +	temp_mm->temp = mm;
> > +	temp_mm->prev = NULL;
> > +	temp_mm->is_kernel_thread = false;
> > +	memset(&temp_mm->brk, 0, sizeof(temp_mm->brk));
> > +}
> > +
> > +static inline void use_temporary_mm(struct temp_mm *temp_mm)
> > +{
> > +	lockdep_assert_irqs_disabled();
> > +
> > +	temp_mm->is_kernel_thread = current->mm == NULL;
> > +	if (temp_mm->is_kernel_thread)
> > +		temp_mm->prev = current->active_mm;
> > +	else
> > +		temp_mm->prev = current->mm;
>
> 
> Is that necessary to make different for kernel threads ? When I look at
> x86 implementation, they don't do such a thing.
>

Yup, in do_slb_fault we error out if the current->mm is NULL resulting
in spectacular fails during patching w/ hash mmu.

> 
> > +
> > +	/*
> > +	 * Hash requires a non-NULL current->mm to allocate a userspace address
> > +	 * when handling a page fault. Does not appear to hurt in Radix either.
> > +	 */
> > +	current->mm = temp_mm->temp;
> > +	switch_mm_irqs_off(NULL, temp_mm->temp, current);
> > +
> > +	if (ppc_breakpoint_available()) {
> > +		struct arch_hw_breakpoint null_brk = {0};
> > +		int i = 0;
> > +
> > +		for (; i < nr_wp_slots(); ++i) {
> > +			__get_breakpoint(i, &temp_mm->brk[i]);
> > +			if (temp_mm->brk[i].type != 0)
> > +				__set_breakpoint(i, &null_brk);
> > +		}
> > +	}
> > +}
> > +
> > +static inline void unuse_temporary_mm(struct temp_mm *temp_mm)
> > +{
> > +	lockdep_assert_irqs_disabled();
> > +
> > +	if (temp_mm->is_kernel_thread)
> > +		current->mm = NULL;
> > +	else
> > +		current->mm = temp_mm->prev;
> > +	switch_mm_irqs_off(NULL, temp_mm->prev, current);
> > +
> > +	if (ppc_breakpoint_available()) {
> > +		int i = 0;
> > +
> > +		for (; i < nr_wp_slots(); ++i)
> > +			if (temp_mm->brk[i].type != 0)
> > +				__set_breakpoint(i, &temp_mm->brk[i]);
> > +	}
> > +}
> > +
> >   #endif /* __KERNEL__ */
> >   #endif /* __ASM_POWERPC_MMU_CONTEXT_H */
> > diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
> > index 048d64c4e115..3973144f6980 100644
> > --- a/arch/powerpc/kernel/process.c
> > +++ b/arch/powerpc/kernel/process.c
> > @@ -825,6 +825,11 @@ static inline int set_breakpoint_8xx(struct arch_hw_breakpoint *brk)
> >   	return 0;
> >   }
> >   
> > +void __get_breakpoint(int nr, struct arch_hw_breakpoint *brk)
> > +{
> > +	memcpy(brk, this_cpu_ptr(&current_brk[nr]), sizeof(*brk));
> > +}
> > +
> >   void __set_breakpoint(int nr, struct arch_hw_breakpoint *brk)
> >   {
> >   	memcpy(this_cpu_ptr(&current_brk[nr]), brk, sizeof(*brk));
> > 
>
> 
> Christophe
>
> 
>
> 


^ permalink raw reply

* [PATCH v2] powerpc: Remove inaccessible CMDLINE default
From: Chris Packham @ 2020-06-11  3:41 UTC (permalink / raw)
  To: mpe, benh, paulus, christophe.leroy
  Cc: Chris Packham, linuxppc-dev, linux-kernel

Since commit cbe46bd4f510 ("powerpc: remove CONFIG_CMDLINE #ifdef mess")
CONFIG_CMDLINE has always had a value regardless of CONFIG_CMDLINE_BOOL.

For example:

 $ make ARCH=powerpc defconfig
 $ cat .config
 # CONFIG_CMDLINE_BOOL is not set
 CONFIG_CMDLINE=""

When enabling CONFIG_CMDLINE_BOOL this value is kept making the 'default
"..." if CONFIG_CMDLINE_BOOL' ineffective.

 $ ./scripts/config --enable CONFIG_CMDLINE_BOOL
 $ cat .config
 CONFIG_CMDLINE_BOOL=y
 CONFIG_CMDLINE=""

Remove CONFIG_CMDLINE_BOOL and the inaccessible default.

Signed-off-by: Chris Packham <chris.packham@alliedtelesis.co.nz>
Reviewed-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
It took me a while to get round to sending a v2, for a refresher v1 can be found here:

http://patchwork.ozlabs.org/project/linuxppc-dev/patch/20190802050232.22978-1-chris.packham@alliedtelesis.co.nz/

Changes in v2:
- Rebase on top of Linus's tree
- Fix some typos in commit message
- Add review from Christophe
- Remove CONFIG_CMDLINE_BOOL

 arch/powerpc/Kconfig | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 9fa23eb320ff..51abc59c3334 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -859,12 +859,8 @@ config PPC_DENORMALISATION
 	  Add support for handling denormalisation of single precision
 	  values.  Useful for bare metal only.  If unsure say Y here.
 
-config CMDLINE_BOOL
-	bool "Default bootloader kernel arguments"
-
 config CMDLINE
-	string "Initial kernel command string" if CMDLINE_BOOL
-	default "console=ttyS0,9600 console=tty0 root=/dev/sda2" if CMDLINE_BOOL
+	string "Initial kernel command string"
 	default ""
 	help
 	  On some platforms, there is currently no way for the boot loader to
-- 
2.27.0


^ permalink raw reply related

* [PATCH v2] All arch: remove system call sys_sysctl
From: Xiaoming Ni @ 2020-06-11  3:54 UTC (permalink / raw)
  To: acme, ak, akpm, alexander.shishkin, arnd, axboe, bauerman, benh,
	borntraeger, bp, brgerst, catalin.marinas, christian, chris,
	cyphar, dalias, davem, deller, dhowells, dvyukov, ebiederm, elver,
	fenghua.yu, flameeyes, geert, gor, haolee.swjtu, heiko.carstens,
	hpa, ink, James.Bottomley, jcmvbkbc, jiri, jolsa, jongk, keescook,
	krzk, linux-alpha, linux-api, linux-arm-kernel, linux, linux,
	linux-fsdevel, linux-ia64, linux-kernel, linux-m68k, linux-mips,
	linux-parisc, linuxppc-dev, linux-s390, linux-sh, linux-xtensa,
	luto, mark.rutland, martin.petersen, mattst88, mcgrof, minchan,
	mingo, monstr, mpe, mszeredi, namhyung, naveen.n.rao, nixiaoming,
	npiggin, oleg, olof, paulburton, paulmck, paulus, peterz,
	ravi.bangoria, rdunlap, rth, samitolvanen, sargun, sfr, shawnguo,
	sparclinux, sudeep.holla, surenb, svens, tglx, tony.luck,
	tsbogend, vbabka, viro, will, x86, yamada.masahiro, ysato,
	yzaikin, zhouyanjie
  Cc: young.liuyang, alex.huangjianhui

Since the commit 61a47c1ad3a4dc ("sysctl: Remove the sysctl system call"),
sys_sysctl is actually unavailable: any input can only return an error.

We have been warning about people using the sysctl system call for years
and believe there are no more users.  Even if there are users of this
interface if they have not complained or fixed their code by now they
probably are not going to, so there is no point in warning them any
longer.

So completely remove sys_sysctl on all architectures.

Signed-off-by: Xiaoming Ni <nixiaoming@huawei.com>

changes in v2:
  According to Kees Cook's suggestion, completely remove sys_sysctl on all arch
  According to Eric W. Biederman's suggestion, update the commit log

V1: https://lore.kernel.org/lkml/1591683605-8585-1-git-send-email-nixiaoming@huawei.com/
  Delete the code of sys_sysctl and return -ENOSYS directly at the function entry
---
 arch/alpha/kernel/syscalls/syscall.tbl             |   2 +-
 arch/arm/configs/am200epdkit_defconfig             |   1 -
 arch/arm/tools/syscall.tbl                         |   2 +-
 arch/arm64/include/asm/unistd32.h                  |   4 +-
 arch/ia64/kernel/syscalls/syscall.tbl              |   2 +-
 arch/m68k/kernel/syscalls/syscall.tbl              |   2 +-
 arch/microblaze/kernel/syscalls/syscall.tbl        |   2 +-
 arch/mips/configs/cu1000-neo_defconfig             |   1 -
 arch/mips/kernel/syscalls/syscall_n32.tbl          |   2 +-
 arch/mips/kernel/syscalls/syscall_n64.tbl          |   2 +-
 arch/mips/kernel/syscalls/syscall_o32.tbl          |   2 +-
 arch/parisc/kernel/syscalls/syscall.tbl            |   2 +-
 arch/powerpc/kernel/syscalls/syscall.tbl           |   2 +-
 arch/s390/kernel/syscalls/syscall.tbl              |   2 +-
 arch/sh/configs/dreamcast_defconfig                |   1 -
 arch/sh/configs/espt_defconfig                     |   1 -
 arch/sh/configs/hp6xx_defconfig                    |   1 -
 arch/sh/configs/landisk_defconfig                  |   1 -
 arch/sh/configs/lboxre2_defconfig                  |   1 -
 arch/sh/configs/microdev_defconfig                 |   1 -
 arch/sh/configs/migor_defconfig                    |   1 -
 arch/sh/configs/r7780mp_defconfig                  |   1 -
 arch/sh/configs/r7785rp_defconfig                  |   1 -
 arch/sh/configs/rts7751r2d1_defconfig              |   1 -
 arch/sh/configs/rts7751r2dplus_defconfig           |   1 -
 arch/sh/configs/se7206_defconfig                   |   1 -
 arch/sh/configs/se7343_defconfig                   |   1 -
 arch/sh/configs/se7619_defconfig                   |   1 -
 arch/sh/configs/se7705_defconfig                   |   1 -
 arch/sh/configs/se7750_defconfig                   |   1 -
 arch/sh/configs/se7751_defconfig                   |   1 -
 arch/sh/configs/secureedge5410_defconfig           |   1 -
 arch/sh/configs/sh03_defconfig                     |   1 -
 arch/sh/configs/sh7710voipgw_defconfig             |   1 -
 arch/sh/configs/sh7757lcr_defconfig                |   1 -
 arch/sh/configs/sh7763rdp_defconfig                |   1 -
 arch/sh/configs/shmin_defconfig                    |   1 -
 arch/sh/configs/titan_defconfig                    |   1 -
 arch/sh/include/uapi/asm/unistd_64.h               |   2 +-
 arch/sh/kernel/syscalls/syscall.tbl                |   2 +-
 arch/sh/kernel/syscalls_64.S                       |   2 +-
 arch/sparc/kernel/syscalls/syscall.tbl             |   2 +-
 arch/x86/entry/syscalls/syscall_32.tbl             |   2 +-
 arch/x86/entry/syscalls/syscall_64.tbl             |   2 +-
 arch/xtensa/kernel/syscalls/syscall.tbl            |   2 +-
 include/linux/compat.h                             |   1 -
 include/linux/syscalls.h                           |   2 -
 include/linux/sysctl.h                             |   6 +-
 include/uapi/linux/sysctl.h                        |  15 --
 kernel/Makefile                                    |   2 +-
 kernel/sys_ni.c                                    |   1 -
 kernel/sysctl_binary.c                             | 171 ---------------------
 tools/perf/arch/powerpc/entry/syscalls/syscall.tbl |   2 +-
 tools/perf/arch/s390/entry/syscalls/syscall.tbl    |   2 +-
 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl  |   2 +-
 55 files changed, 26 insertions(+), 244 deletions(-)
 delete mode 100644 kernel/sysctl_binary.c

diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl
index b249824..0da7f1c 100644
--- a/arch/alpha/kernel/syscalls/syscall.tbl
+++ b/arch/alpha/kernel/syscalls/syscall.tbl
@@ -249,7 +249,7 @@
 316	common	mlockall			sys_mlockall
 317	common	munlockall			sys_munlockall
 318	common	sysinfo				sys_sysinfo
-319	common	_sysctl				sys_sysctl
+319	common	_sysctl				sys_ni_syscall
 # 320 was sys_idle
 321	common	oldumount			sys_oldumount
 322	common	swapon				sys_swapon
diff --git a/arch/arm/configs/am200epdkit_defconfig b/arch/arm/configs/am200epdkit_defconfig
index f56ac39..4e49d6c 100644
--- a/arch/arm/configs/am200epdkit_defconfig
+++ b/arch/arm/configs/am200epdkit_defconfig
@@ -3,7 +3,6 @@ CONFIG_LOCALVERSION="gum"
 CONFIG_SYSVIPC=y
 CONFIG_SYSFS_DEPRECATED_V2=y
 CONFIG_EXPERT=y
-# CONFIG_SYSCTL_SYSCALL is not set
 # CONFIG_EPOLL is not set
 # CONFIG_SHMEM is not set
 # CONFIG_VM_EVENT_COUNTERS is not set
diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
index 7b3832d..f36fda6 100644
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -162,7 +162,7 @@
 146	common	writev			sys_writev
 147	common	getsid			sys_getsid
 148	common	fdatasync		sys_fdatasync
-149	common	_sysctl			sys_sysctl
+149	common	_sysctl			sys_ni_syscall
 150	common	mlock			sys_mlock
 151	common	munlock			sys_munlock
 152	common	mlockall		sys_mlockall
diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index f8dafe9..ca41bb7 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -308,8 +308,8 @@
 __SYSCALL(__NR_getsid, sys_getsid)
 #define __NR_fdatasync 148
 __SYSCALL(__NR_fdatasync, sys_fdatasync)
-#define __NR__sysctl 149
-__SYSCALL(__NR__sysctl, compat_sys_sysctl)
+			/* 149 was sys_sysctl */
+__SYSCALL(149, sys_ni_syscall)
 #define __NR_mlock 150
 __SYSCALL(__NR_mlock, sys_mlock)
 #define __NR_munlock 151
diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl
index 6636a1a..75b880b 100644
--- a/arch/ia64/kernel/syscalls/syscall.tbl
+++ b/arch/ia64/kernel/syscalls/syscall.tbl
@@ -135,7 +135,7 @@
 123	common	writev				sys_writev
 124	common	pread64				sys_pread64
 125	common	pwrite64			sys_pwrite64
-126	common	_sysctl				sys_sysctl
+126	common	_sysctl				sys_ni_syscall
 127	common	mmap				sys_mmap
 128	common	munmap				sys_munmap
 129	common	mlock				sys_mlock
diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl
index 8cd84a7..91b21ad 100644
--- a/arch/m68k/kernel/syscalls/syscall.tbl
+++ b/arch/m68k/kernel/syscalls/syscall.tbl
@@ -156,7 +156,7 @@
 146	common	writev				sys_writev
 147	common	getsid				sys_getsid
 148	common	fdatasync			sys_fdatasync
-149	common	_sysctl				sys_sysctl
+149	common	_sysctl				sys_ni_syscall
 150	common	mlock				sys_mlock
 151	common	munlock				sys_munlock
 152	common	mlockall			sys_mlockall
diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl
index f581a02..dc5f9fb 100644
--- a/arch/microblaze/kernel/syscalls/syscall.tbl
+++ b/arch/microblaze/kernel/syscalls/syscall.tbl
@@ -156,7 +156,7 @@
 146	common	writev				sys_writev
 147	common	getsid				sys_getsid
 148	common	fdatasync			sys_fdatasync
-149	common	_sysctl				sys_sysctl
+149	common	_sysctl				sys_ni_syscall
 150	common	mlock				sys_mlock
 151	common	munlock				sys_munlock
 152	common	mlockall			sys_mlockall
diff --git a/arch/mips/configs/cu1000-neo_defconfig b/arch/mips/configs/cu1000-neo_defconfig
index 9b05a8f..244654c 100644
--- a/arch/mips/configs/cu1000-neo_defconfig
+++ b/arch/mips/configs/cu1000-neo_defconfig
@@ -17,7 +17,6 @@ CONFIG_CGROUP_CPUACCT=y
 CONFIG_NAMESPACES=y
 CONFIG_USER_NS=y
 CONFIG_CC_OPTIMIZE_FOR_SIZE=y
-CONFIG_SYSCTL_SYSCALL=y
 CONFIG_KALLSYMS_ALL=y
 CONFIG_EMBEDDED=y
 # CONFIG_VM_EVENT_COUNTERS is not set
diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl
index c85bdc3..2653b28 100644
--- a/arch/mips/kernel/syscalls/syscall_n32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
@@ -159,7 +159,7 @@
 149	n32	munlockall			sys_munlockall
 150	n32	vhangup				sys_vhangup
 151	n32	pivot_root			sys_pivot_root
-152	n32	_sysctl				compat_sys_sysctl
+152	n32	_sysctl				sys_ni_syscall
 153	n32	prctl				sys_prctl
 154	n32	adjtimex			sys_adjtimex_time32
 155	n32	setrlimit			compat_sys_setrlimit
diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl
index 9e08c40..a4fd3bf 100644
--- a/arch/mips/kernel/syscalls/syscall_n64.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n64.tbl
@@ -159,7 +159,7 @@
 149	n64	munlockall			sys_munlockall
 150	n64	vhangup				sys_vhangup
 151	n64	pivot_root			sys_pivot_root
-152	n64	_sysctl				sys_sysctl
+152	n64	_sysctl				sys_ni_syscall
 153	n64	prctl				sys_prctl
 154	n64	adjtimex			sys_adjtimex
 155	n64	setrlimit			sys_setrlimit
diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl
index a2b591d..a30cfd4 100644
--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
@@ -164,7 +164,7 @@
 150	o32	unused150			sys_ni_syscall
 151	o32	getsid				sys_getsid
 152	o32	fdatasync			sys_fdatasync
-153	o32	_sysctl				sys_sysctl			compat_sys_sysctl
+153	o32	_sysctl				sys_ni_syscall
 154	o32	mlock				sys_mlock
 155	o32	munlock				sys_munlock
 156	o32	mlockall			sys_mlockall
diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl
index 98e7442..a47bc19 100644
--- a/arch/parisc/kernel/syscalls/syscall.tbl
+++ b/arch/parisc/kernel/syscalls/syscall.tbl
@@ -163,7 +163,7 @@
 146	common	writev			sys_writev			compat_sys_writev
 147	common	getsid			sys_getsid
 148	common	fdatasync		sys_fdatasync
-149	common	_sysctl			sys_sysctl			compat_sys_sysctl
+149	common	_sysctl			sys_ni_syscall
 150	common	mlock			sys_mlock
 151	common	munlock			sys_munlock
 152	common	mlockall		sys_mlockall
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index 72fb9dd..a60163f 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -197,7 +197,7 @@
 146	common	writev				sys_writev			compat_sys_writev
 147	common	getsid				sys_getsid
 148	common	fdatasync			sys_fdatasync
-149	nospu	_sysctl				sys_sysctl			compat_sys_sysctl
+149	nospu	_sysctl				sys_ni_syscall
 150	common	mlock				sys_mlock
 151	common	munlock				sys_munlock
 152	common	mlockall			sys_mlockall
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index b731fcb..f17aaf6 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -138,7 +138,7 @@
 146  common	writev			sys_writev			compat_sys_writev
 147  common	getsid			sys_getsid			sys_getsid
 148  common	fdatasync		sys_fdatasync			sys_fdatasync
-149  common	_sysctl			sys_sysctl			compat_sys_sysctl
+149  common	_sysctl			sys_ni_syscall
 150  common	mlock			sys_mlock			sys_mlock
 151  common	munlock			sys_munlock			sys_munlock
 152  common	mlockall		sys_mlockall			sys_mlockall
diff --git a/arch/sh/configs/dreamcast_defconfig b/arch/sh/configs/dreamcast_defconfig
index ae067e0..6a82c7b 100644
--- a/arch/sh/configs/dreamcast_defconfig
+++ b/arch/sh/configs/dreamcast_defconfig
@@ -1,7 +1,6 @@
 CONFIG_SYSVIPC=y
 CONFIG_BSD_PROCESS_ACCT=y
 CONFIG_LOG_BUF_SHIFT=14
-# CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_SLAB=y
 CONFIG_PROFILING=y
 CONFIG_MODULES=y
diff --git a/arch/sh/configs/espt_defconfig b/arch/sh/configs/espt_defconfig
index a5b865a..9a988c3 100644
--- a/arch/sh/configs/espt_defconfig
+++ b/arch/sh/configs/espt_defconfig
@@ -5,7 +5,6 @@ CONFIG_LOG_BUF_SHIFT=14
 CONFIG_NAMESPACES=y
 CONFIG_UTS_NS=y
 CONFIG_IPC_NS=y
-# CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_SLAB=y
 CONFIG_PROFILING=y
 CONFIG_OPROFILE=y
diff --git a/arch/sh/configs/hp6xx_defconfig b/arch/sh/configs/hp6xx_defconfig
index a92db66..70e6605 100644
--- a/arch/sh/configs/hp6xx_defconfig
+++ b/arch/sh/configs/hp6xx_defconfig
@@ -3,7 +3,6 @@ CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_LOG_BUF_SHIFT=14
 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-# CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_SLAB=y
 # CONFIG_BLK_DEV_BSG is not set
 CONFIG_CPU_SUBTYPE_SH7709=y
diff --git a/arch/sh/configs/landisk_defconfig b/arch/sh/configs/landisk_defconfig
index 567af75..ba6ec04 100644
--- a/arch/sh/configs/landisk_defconfig
+++ b/arch/sh/configs/landisk_defconfig
@@ -1,6 +1,5 @@
 CONFIG_SYSVIPC=y
 CONFIG_LOG_BUF_SHIFT=14
-# CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS_EXTRA_PASS=y
 CONFIG_SLAB=y
 CONFIG_MODULES=y
diff --git a/arch/sh/configs/lboxre2_defconfig b/arch/sh/configs/lboxre2_defconfig
index 10f6d37..05e4ac6 100644
--- a/arch/sh/configs/lboxre2_defconfig
+++ b/arch/sh/configs/lboxre2_defconfig
@@ -1,6 +1,5 @@
 CONFIG_SYSVIPC=y
 CONFIG_LOG_BUF_SHIFT=14
-# CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS_EXTRA_PASS=y
 CONFIG_SLAB=y
 CONFIG_MODULES=y
diff --git a/arch/sh/configs/microdev_defconfig b/arch/sh/configs/microdev_defconfig
index ed84d13..c65667d 100644
--- a/arch/sh/configs/microdev_defconfig
+++ b/arch/sh/configs/microdev_defconfig
@@ -2,7 +2,6 @@ CONFIG_BSD_PROCESS_ACCT=y
 CONFIG_LOG_BUF_SHIFT=14
 CONFIG_BLK_DEV_INITRD=y
 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-# CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_SLAB=y
 # CONFIG_BLK_DEV_BSG is not set
 CONFIG_CPU_SUBTYPE_SH4_202=y
diff --git a/arch/sh/configs/migor_defconfig b/arch/sh/configs/migor_defconfig
index 494a1675..dec9316 100644
--- a/arch/sh/configs/migor_defconfig
+++ b/arch/sh/configs/migor_defconfig
@@ -4,7 +4,6 @@ CONFIG_IKCONFIG_PROC=y
 CONFIG_LOG_BUF_SHIFT=14
 CONFIG_BLK_DEV_INITRD=y
 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-# CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_SLAB=y
 CONFIG_PROFILING=y
 CONFIG_OPROFILE=y
diff --git a/arch/sh/configs/r7780mp_defconfig b/arch/sh/configs/r7780mp_defconfig
index 0a18f80..ff8f8d4 100644
--- a/arch/sh/configs/r7780mp_defconfig
+++ b/arch/sh/configs/r7780mp_defconfig
@@ -3,7 +3,6 @@ CONFIG_BSD_PROCESS_ACCT=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_LOG_BUF_SHIFT=14
-# CONFIG_SYSCTL_SYSCALL is not set
 # CONFIG_FUTEX is not set
 # CONFIG_EPOLL is not set
 CONFIG_SLAB=y
diff --git a/arch/sh/configs/r7785rp_defconfig b/arch/sh/configs/r7785rp_defconfig
index 7226ac5..d9afce5 100644
--- a/arch/sh/configs/r7785rp_defconfig
+++ b/arch/sh/configs/r7785rp_defconfig
@@ -7,7 +7,6 @@ CONFIG_RCU_TRACE=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_LOG_BUF_SHIFT=14
-# CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_SLAB=y
 CONFIG_PROFILING=y
 CONFIG_OPROFILE=y
diff --git a/arch/sh/configs/rts7751r2d1_defconfig b/arch/sh/configs/rts7751r2d1_defconfig
index 6a3cfe0..fc9c221 100644
--- a/arch/sh/configs/rts7751r2d1_defconfig
+++ b/arch/sh/configs/rts7751r2d1_defconfig
@@ -1,7 +1,6 @@
 CONFIG_SYSVIPC=y
 CONFIG_LOG_BUF_SHIFT=14
 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-# CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_SLAB=y
 CONFIG_PROFILING=y
 CONFIG_OPROFILE=y
diff --git a/arch/sh/configs/rts7751r2dplus_defconfig b/arch/sh/configs/rts7751r2dplus_defconfig
index 2b3d7d2..ff3fd678 100644
--- a/arch/sh/configs/rts7751r2dplus_defconfig
+++ b/arch/sh/configs/rts7751r2dplus_defconfig
@@ -1,7 +1,6 @@
 CONFIG_SYSVIPC=y
 CONFIG_LOG_BUF_SHIFT=14
 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-# CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_SLAB=y
 CONFIG_PROFILING=y
 CONFIG_OPROFILE=y
diff --git a/arch/sh/configs/se7206_defconfig b/arch/sh/configs/se7206_defconfig
index a93402b..19f0dae 100644
--- a/arch/sh/configs/se7206_defconfig
+++ b/arch/sh/configs/se7206_defconfig
@@ -18,7 +18,6 @@ CONFIG_USER_NS=y
 CONFIG_PID_NS=y
 CONFIG_BLK_DEV_INITRD=y
 # CONFIG_UID16 is not set
-# CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS_ALL=y
 # CONFIG_ELF_CORE is not set
 # CONFIG_COMPAT_BRK is not set
diff --git a/arch/sh/configs/se7343_defconfig b/arch/sh/configs/se7343_defconfig
index 06d067c..553c7aa 100644
--- a/arch/sh/configs/se7343_defconfig
+++ b/arch/sh/configs/se7343_defconfig
@@ -2,7 +2,6 @@
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
 CONFIG_LOG_BUF_SHIFT=14
-# CONFIG_SYSCTL_SYSCALL is not set
 # CONFIG_FUTEX is not set
 # CONFIG_EPOLL is not set
 # CONFIG_SHMEM is not set
diff --git a/arch/sh/configs/se7619_defconfig b/arch/sh/configs/se7619_defconfig
index f54722d..baf1c84 100644
--- a/arch/sh/configs/se7619_defconfig
+++ b/arch/sh/configs/se7619_defconfig
@@ -1,7 +1,6 @@
 # CONFIG_LOCALVERSION_AUTO is not set
 CONFIG_LOG_BUF_SHIFT=14
 # CONFIG_UID16 is not set
-# CONFIG_SYSCTL_SYSCALL is not set
 # CONFIG_KALLSYMS is not set
 # CONFIG_HOTPLUG is not set
 # CONFIG_ELF_CORE is not set
diff --git a/arch/sh/configs/se7705_defconfig b/arch/sh/configs/se7705_defconfig
index ddfc698..805966f 100644
--- a/arch/sh/configs/se7705_defconfig
+++ b/arch/sh/configs/se7705_defconfig
@@ -2,7 +2,6 @@
 CONFIG_LOG_BUF_SHIFT=14
 CONFIG_BLK_DEV_INITRD=y
 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-# CONFIG_SYSCTL_SYSCALL is not set
 # CONFIG_KALLSYMS is not set
 # CONFIG_HOTPLUG is not set
 CONFIG_SLAB=y
diff --git a/arch/sh/configs/se7750_defconfig b/arch/sh/configs/se7750_defconfig
index b23f675..3f1c137 100644
--- a/arch/sh/configs/se7750_defconfig
+++ b/arch/sh/configs/se7750_defconfig
@@ -5,7 +5,6 @@ CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_LOG_BUF_SHIFT=14
 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-# CONFIG_SYSCTL_SYSCALL is not set
 # CONFIG_HOTPLUG is not set
 CONFIG_SLAB=y
 CONFIG_MODULES=y
diff --git a/arch/sh/configs/se7751_defconfig b/arch/sh/configs/se7751_defconfig
index 1623436..4a02406 100644
--- a/arch/sh/configs/se7751_defconfig
+++ b/arch/sh/configs/se7751_defconfig
@@ -3,7 +3,6 @@ CONFIG_BSD_PROCESS_ACCT=y
 CONFIG_LOG_BUF_SHIFT=14
 CONFIG_BLK_DEV_INITRD=y
 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-# CONFIG_SYSCTL_SYSCALL is not set
 # CONFIG_HOTPLUG is not set
 CONFIG_SLAB=y
 CONFIG_MODULES=y
diff --git a/arch/sh/configs/secureedge5410_defconfig b/arch/sh/configs/secureedge5410_defconfig
index 360592d..8422599 100644
--- a/arch/sh/configs/secureedge5410_defconfig
+++ b/arch/sh/configs/secureedge5410_defconfig
@@ -1,7 +1,6 @@
 # CONFIG_SWAP is not set
 CONFIG_LOG_BUF_SHIFT=14
 CONFIG_BLK_DEV_INITRD=y
-# CONFIG_SYSCTL_SYSCALL is not set
 # CONFIG_HOTPLUG is not set
 CONFIG_SLAB=y
 # CONFIG_BLK_DEV_BSG is not set
diff --git a/arch/sh/configs/sh03_defconfig b/arch/sh/configs/sh03_defconfig
index 87db9a8..f0073ed 100644
--- a/arch/sh/configs/sh03_defconfig
+++ b/arch/sh/configs/sh03_defconfig
@@ -3,7 +3,6 @@ CONFIG_POSIX_MQUEUE=y
 CONFIG_BSD_PROCESS_ACCT=y
 CONFIG_LOG_BUF_SHIFT=14
 CONFIG_BLK_DEV_INITRD=y
-# CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_SLAB=y
 CONFIG_PROFILING=y
 CONFIG_OPROFILE=m
diff --git a/arch/sh/configs/sh7710voipgw_defconfig b/arch/sh/configs/sh7710voipgw_defconfig
index c86f284..12a1395 100644
--- a/arch/sh/configs/sh7710voipgw_defconfig
+++ b/arch/sh/configs/sh7710voipgw_defconfig
@@ -2,7 +2,6 @@
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
 CONFIG_LOG_BUF_SHIFT=14
-# CONFIG_SYSCTL_SYSCALL is not set
 # CONFIG_FUTEX is not set
 # CONFIG_EPOLL is not set
 # CONFIG_SHMEM is not set
diff --git a/arch/sh/configs/sh7757lcr_defconfig b/arch/sh/configs/sh7757lcr_defconfig
index 9f2aed0..ca327d1 100644
--- a/arch/sh/configs/sh7757lcr_defconfig
+++ b/arch/sh/configs/sh7757lcr_defconfig
@@ -8,7 +8,6 @@ CONFIG_TASK_XACCT=y
 CONFIG_TASK_IO_ACCOUNTING=y
 CONFIG_LOG_BUF_SHIFT=14
 CONFIG_BLK_DEV_INITRD=y
-# CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_KALLSYMS_ALL=y
 CONFIG_SLAB=y
 CONFIG_MODULES=y
diff --git a/arch/sh/configs/sh7763rdp_defconfig b/arch/sh/configs/sh7763rdp_defconfig
index d0a0aa7..26c5fd0 100644
--- a/arch/sh/configs/sh7763rdp_defconfig
+++ b/arch/sh/configs/sh7763rdp_defconfig
@@ -5,7 +5,6 @@ CONFIG_LOG_BUF_SHIFT=14
 CONFIG_NAMESPACES=y
 CONFIG_UTS_NS=y
 CONFIG_IPC_NS=y
-# CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_SLAB=y
 CONFIG_PROFILING=y
 CONFIG_OPROFILE=y
diff --git a/arch/sh/configs/shmin_defconfig b/arch/sh/configs/shmin_defconfig
index d589cfd..5504ca4 100644
--- a/arch/sh/configs/shmin_defconfig
+++ b/arch/sh/configs/shmin_defconfig
@@ -1,7 +1,6 @@
 # CONFIG_SWAP is not set
 CONFIG_LOG_BUF_SHIFT=14
 # CONFIG_UID16 is not set
-# CONFIG_SYSCTL_SYSCALL is not set
 # CONFIG_KALLSYMS is not set
 # CONFIG_HOTPLUG is not set
 # CONFIG_BUG is not set
diff --git a/arch/sh/configs/titan_defconfig b/arch/sh/configs/titan_defconfig
index 4ec961a..ba887f1 100644
--- a/arch/sh/configs/titan_defconfig
+++ b/arch/sh/configs/titan_defconfig
@@ -6,7 +6,6 @@ CONFIG_IKCONFIG_PROC=y
 CONFIG_LOG_BUF_SHIFT=16
 CONFIG_BLK_DEV_INITRD=y
 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-# CONFIG_SYSCTL_SYSCALL is not set
 CONFIG_SLAB=y
 CONFIG_MODULES=y
 CONFIG_MODULE_UNLOAD=y
diff --git a/arch/sh/include/uapi/asm/unistd_64.h b/arch/sh/include/uapi/asm/unistd_64.h
index 75da548..04fe2ef 100644
--- a/arch/sh/include/uapi/asm/unistd_64.h
+++ b/arch/sh/include/uapi/asm/unistd_64.h
@@ -164,7 +164,7 @@
 #define __NR_writev		146
 #define __NR_getsid		147
 #define __NR_fdatasync		148
-#define __NR__sysctl		149
+				/* 149 was sys_sysctl */
 #define __NR_mlock		150
 #define __NR_munlock		151
 #define __NR_mlockall		152
diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl
index e7a4804..7456845 100644
--- a/arch/sh/kernel/syscalls/syscall.tbl
+++ b/arch/sh/kernel/syscalls/syscall.tbl
@@ -156,7 +156,7 @@
 146	common	writev				sys_writev
 147	common	getsid				sys_getsid
 148	common	fdatasync			sys_fdatasync
-149	common	_sysctl				sys_sysctl
+149	common	_sysctl				sys_ni_syscall
 150	common	mlock				sys_mlock
 151	common	munlock				sys_munlock
 152	common	mlockall			sys_mlockall
diff --git a/arch/sh/kernel/syscalls_64.S b/arch/sh/kernel/syscalls_64.S
index 1bcb86f..e4c1d54 100644
--- a/arch/sh/kernel/syscalls_64.S
+++ b/arch/sh/kernel/syscalls_64.S
@@ -166,7 +166,7 @@ sys_call_table:
 	.long sys_writev
 	.long sys_getsid
 	.long sys_fdatasync
-	.long sys_sysctl
+	.long sys_ni_syscall	/* 149: for sys_sysctl */
 	.long sys_mlock			/* 150 */
 	.long sys_munlock
 	.long sys_mlockall
diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl
index d6126ee..74adaeca 100644
--- a/arch/sparc/kernel/syscalls/syscall.tbl
+++ b/arch/sparc/kernel/syscalls/syscall.tbl
@@ -300,7 +300,7 @@
 249	64	nanosleep		sys_nanosleep
 250	32	mremap			sys_mremap
 250	64	mremap			sys_64_mremap
-251	common	_sysctl			sys_sysctl			compat_sys_sysctl
+251	common	_sysctl			sys_ni_syscall
 252	common	getsid			sys_getsid
 253	common	fdatasync		sys_fdatasync
 254	32	nfsservctl		sys_ni_syscall			sys_nis_syscall
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 686d59d..ef76360 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -160,7 +160,7 @@
 146	i386	writev			sys_writev			compat_sys_writev
 147	i386	getsid			sys_getsid
 148	i386	fdatasync		sys_fdatasync
-149	i386	_sysctl			sys_sysctl			compat_sys_sysctl
+149	i386	_sysctl			sys_ni_syscall
 150	i386	mlock			sys_mlock
 151	i386	munlock			sys_munlock
 152	i386	mlockall		sys_mlockall
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index b345b35..6a3b0b3 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -164,7 +164,7 @@
 153	common	vhangup			sys_vhangup
 154	common	modify_ldt		sys_modify_ldt
 155	common	pivot_root		sys_pivot_root
-156	64	_sysctl			sys_sysctl
+156	64	_sysctl			sys_ni_syscall
 157	common	prctl			sys_prctl
 158	common	arch_prctl		sys_arch_prctl
 159	common	adjtimex		sys_adjtimex
diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl
index 96cb070..34cbbf5 100644
--- a/arch/xtensa/kernel/syscalls/syscall.tbl
+++ b/arch/xtensa/kernel/syscalls/syscall.tbl
@@ -222,7 +222,7 @@
 204	common	quotactl			sys_quotactl
 # 205 was old nfsservctl
 205	common	nfsservctl			sys_ni_syscall
-206	common	_sysctl				sys_sysctl
+206	common	_sysctl				sys_ni_syscall
 207	common	bdflush				sys_bdflush
 208	common	uname				sys_newuname
 209	common	sysinfo				sys_sysinfo
diff --git a/include/linux/compat.h b/include/linux/compat.h
index 86b61e8..c55d245 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -859,7 +859,6 @@ asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp,
 asmlinkage long compat_sys_ustat(unsigned dev, struct compat_ustat __user *u32);
 asmlinkage long compat_sys_recv(int fd, void __user *buf, compat_size_t len,
 				unsigned flags);
-asmlinkage long compat_sys_sysctl(struct compat_sysctl_args __user *args);
 
 /* obsolete: fs/readdir.c */
 asmlinkage long compat_sys_old_readdir(unsigned int fd,
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 63ffa6d..915233a 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -48,7 +48,6 @@
 struct statfs64;
 struct statx;
 struct fsinfo_params;
-struct __sysctl_args;
 struct sysinfo;
 struct timespec;
 struct __kernel_old_timeval;
@@ -1125,7 +1124,6 @@ asmlinkage long sys_epoll_wait(int epfd, struct epoll_event __user *events,
 asmlinkage long sys_bdflush(int func, long data);
 asmlinkage long sys_oldumount(char __user *name);
 asmlinkage long sys_uselib(const char __user *library);
-asmlinkage long sys_sysctl(struct __sysctl_args __user *args);
 asmlinkage long sys_sysfs(int option,
 				unsigned long arg1, unsigned long arg2);
 asmlinkage long sys_fork(void);
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 8112c15..299f9cb 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -90,15 +90,13 @@ int proc_do_static_key(struct ctl_table *table, int write, void *buffer,
  * sysctl names can be mirrored automatically under /proc/sys.  The
  * procname supplied controls /proc naming.
  *
- * The table's mode will be honoured both for sys_sysctl(2) and
- * proc-fs access.
+ * The table's mode will be honoured for proc-fs access.
  *
  * Leaf nodes in the sysctl tree will be represented by a single file
  * under /proc; non-leaf nodes will be represented by directories.  A
  * null procname disables /proc mirroring at this node.
  *
- * sysctl(2) can automatically manage read and write requests through
- * the sysctl table.  The data and maxlen fields of the ctl_table
+ * The data and maxlen fields of the ctl_table
  * struct enable minimal validation of the values being written to be
  * performed, and the mode field allows minimal authentication.
  * 
diff --git a/include/uapi/linux/sysctl.h b/include/uapi/linux/sysctl.h
index 27c1ed2..84b44c3 100644
--- a/include/uapi/linux/sysctl.h
+++ b/include/uapi/linux/sysctl.h
@@ -27,21 +27,6 @@
 #include <linux/types.h>
 #include <linux/compiler.h>
 
-#define CTL_MAXNAME 10		/* how many path components do we allow in a
-				   call to sysctl?   In other words, what is
-				   the largest acceptable value for the nlen
-				   member of a struct __sysctl_args to have? */
-
-struct __sysctl_args {
-	int __user *name;
-	int nlen;
-	void __user *oldval;
-	size_t __user *oldlenp;
-	void __user *newval;
-	size_t newlen;
-	unsigned long __unused[4];
-};
-
 /* Define sysctl names first */
 
 /* Top-level names: */
diff --git a/kernel/Makefile b/kernel/Makefile
index 0bd4ed7..a3f7c08 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -5,7 +5,7 @@
 
 obj-y     = fork.o exec_domain.o panic.o \
 	    cpu.o exit.o softirq.o resource.o \
-	    sysctl.o sysctl_binary.o capability.o ptrace.o user.o \
+	    sysctl.o capability.o ptrace.o user.o \
 	    signal.o sys.o umh.o workqueue.o pid.o task_work.o \
 	    extable.o params.o \
 	    kthread.o sys_ni.o nsproxy.o \
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index fad48ac..c935c18 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -373,7 +373,6 @@ asmlinkage long sys_ni_syscall(void)
 COND_SYSCALL_COMPAT(socketcall);
 
 /* compat syscalls for arm64, x86, ... */
-COND_SYSCALL_COMPAT(sysctl);
 COND_SYSCALL_COMPAT(fanotify_mark);
 
 /* x86 */
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c
deleted file mode 100644
index 7d550cc..00000000
--- a/kernel/sysctl_binary.c
+++ /dev/null
@@ -1,171 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/stat.h>
-#include <linux/sysctl.h>
-#include "../fs/xfs/xfs_sysctl.h"
-#include <linux/sunrpc/debug.h>
-#include <linux/string.h>
-#include <linux/syscalls.h>
-#include <linux/namei.h>
-#include <linux/mount.h>
-#include <linux/fs.h>
-#include <linux/nsproxy.h>
-#include <linux/pid_namespace.h>
-#include <linux/file.h>
-#include <linux/ctype.h>
-#include <linux/netdevice.h>
-#include <linux/kernel.h>
-#include <linux/uuid.h>
-#include <linux/slab.h>
-#include <linux/compat.h>
-
-static ssize_t binary_sysctl(const int *name, int nlen,
-	void __user *oldval, size_t oldlen, void __user *newval, size_t newlen)
-{
-	return -ENOSYS;
-}
-
-static void deprecated_sysctl_warning(const int *name, int nlen)
-{
-	int i;
-
-	/*
-	 * CTL_KERN/KERN_VERSION is used by older glibc and cannot
-	 * ever go away.
-	 */
-	if (nlen >= 2 && name[0] == CTL_KERN && name[1] == KERN_VERSION)
-		return;
-
-	if (printk_ratelimit()) {
-		printk(KERN_INFO
-			"warning: process `%s' used the deprecated sysctl "
-			"system call with ", current->comm);
-		for (i = 0; i < nlen; i++)
-			printk(KERN_CONT "%d.", name[i]);
-		printk(KERN_CONT "\n");
-	}
-	return;
-}
-
-#define WARN_ONCE_HASH_BITS 8
-#define WARN_ONCE_HASH_SIZE (1<<WARN_ONCE_HASH_BITS)
-
-static DECLARE_BITMAP(warn_once_bitmap, WARN_ONCE_HASH_SIZE);
-
-#define FNV32_OFFSET 2166136261U
-#define FNV32_PRIME 0x01000193
-
-/*
- * Print each legacy sysctl (approximately) only once.
- * To avoid making the tables non-const use a external
- * hash-table instead.
- * Worst case hash collision: 6, but very rarely.
- * NOTE! We don't use the SMP-safe bit tests. We simply
- * don't care enough.
- */
-static void warn_on_bintable(const int *name, int nlen)
-{
-	int i;
-	u32 hash = FNV32_OFFSET;
-
-	for (i = 0; i < nlen; i++)
-		hash = (hash ^ name[i]) * FNV32_PRIME;
-	hash %= WARN_ONCE_HASH_SIZE;
-	if (__test_and_set_bit(hash, warn_once_bitmap))
-		return;
-	deprecated_sysctl_warning(name, nlen);
-}
-
-static ssize_t do_sysctl(int __user *args_name, int nlen,
-	void __user *oldval, size_t oldlen, void __user *newval, size_t newlen)
-{
-	int name[CTL_MAXNAME];
-	int i;
-
-	/* Check args->nlen. */
-	if (nlen < 0 || nlen > CTL_MAXNAME)
-		return -ENOTDIR;
-	/* Read in the sysctl name for simplicity */
-	for (i = 0; i < nlen; i++)
-		if (get_user(name[i], args_name + i))
-			return -EFAULT;
-
-	warn_on_bintable(name, nlen);
-
-	return binary_sysctl(name, nlen, oldval, oldlen, newval, newlen);
-}
-
-SYSCALL_DEFINE1(sysctl, struct __sysctl_args __user *, args)
-{
-	struct __sysctl_args tmp;
-	size_t oldlen = 0;
-	ssize_t result;
-
-	if (copy_from_user(&tmp, args, sizeof(tmp)))
-		return -EFAULT;
-
-	if (tmp.oldval && !tmp.oldlenp)
-		return -EFAULT;
-
-	if (tmp.oldlenp && get_user(oldlen, tmp.oldlenp))
-		return -EFAULT;
-
-	result = do_sysctl(tmp.name, tmp.nlen, tmp.oldval, oldlen,
-			   tmp.newval, tmp.newlen);
-
-	if (result >= 0) {
-		oldlen = result;
-		result = 0;
-	}
-
-	if (tmp.oldlenp && put_user(oldlen, tmp.oldlenp))
-		return -EFAULT;
-
-	return result;
-}
-
-
-#ifdef CONFIG_COMPAT
-
-struct compat_sysctl_args {
-	compat_uptr_t	name;
-	int		nlen;
-	compat_uptr_t	oldval;
-	compat_uptr_t	oldlenp;
-	compat_uptr_t	newval;
-	compat_size_t	newlen;
-	compat_ulong_t	__unused[4];
-};
-
-COMPAT_SYSCALL_DEFINE1(sysctl, struct compat_sysctl_args __user *, args)
-{
-	struct compat_sysctl_args tmp;
-	compat_size_t __user *compat_oldlenp;
-	size_t oldlen = 0;
-	ssize_t result;
-
-	if (copy_from_user(&tmp, args, sizeof(tmp)))
-		return -EFAULT;
-
-	if (tmp.oldval && !tmp.oldlenp)
-		return -EFAULT;
-
-	compat_oldlenp = compat_ptr(tmp.oldlenp);
-	if (compat_oldlenp && get_user(oldlen, compat_oldlenp))
-		return -EFAULT;
-
-	result = do_sysctl(compat_ptr(tmp.name), tmp.nlen,
-			   compat_ptr(tmp.oldval), oldlen,
-			   compat_ptr(tmp.newval), tmp.newlen);
-
-	if (result >= 0) {
-		oldlen = result;
-		result = 0;
-	}
-
-	if (compat_oldlenp && put_user(oldlen, compat_oldlenp))
-		return -EFAULT;
-
-	return result;
-}
-
-#endif /* CONFIG_COMPAT */
diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
index 35b61bf..6d29d9a 100644
--- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
@@ -193,7 +193,7 @@
 146	common	writev				sys_writev			compat_sys_writev
 147	common	getsid				sys_getsid
 148	common	fdatasync			sys_fdatasync
-149	nospu	_sysctl				sys_sysctl			compat_sys_sysctl
+149	nospu	_sysctl				sys_ni_syscall
 150	common	mlock				sys_mlock
 151	common	munlock				sys_munlock
 152	common	mlockall			sys_mlockall
diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
index b38d484..0193f9b 100644
--- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
@@ -138,7 +138,7 @@
 146  common	writev			sys_writev			compat_sys_writev
 147  common	getsid			sys_getsid			sys_getsid
 148  common	fdatasync		sys_fdatasync			sys_fdatasync
-149  common	_sysctl			sys_sysctl			compat_sys_sysctl
+149  common	_sysctl			sys_ni_syscall
 150  common	mlock			sys_mlock			compat_sys_mlock
 151  common	munlock			sys_munlock			compat_sys_munlock
 152  common	mlockall		sys_mlockall			sys_mlockall
diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
index 37b844f..4e50062 100644
--- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
@@ -164,7 +164,7 @@
 153	common	vhangup			sys_vhangup
 154	common	modify_ldt		sys_modify_ldt
 155	common	pivot_root		sys_pivot_root
-156	64	_sysctl			sys_sysctl
+156	64	_sysctl			sys_ni_syscall
 157	common	prctl			sys_prctl
 158	common	arch_prctl		sys_arch_prctl
 159	common	adjtimex		sys_adjtimex
-- 
1.8.5.6


^ permalink raw reply related

* Re: [PATCH v2] All arch: remove system call sys_sysctl
From: Stephen Rothwell @ 2020-06-11  4:12 UTC (permalink / raw)
  To: Xiaoming Ni
  Cc: linux-sh, catalin.marinas, paulus, ak, paulburton, geert,
	mattst88, brgerst, acme, cyphar, viro, luto, tglx, surenb, rth,
	young.liuyang, linux-parisc, rdunlap, linux-kernel, mcgrof,
	linux-fsdevel, akpm, mark.rutland, linux-ia64, linux-xtensa,
	jongk, linux, James.Bottomley, jcmvbkbc, linux-s390, ysato,
	deller, yzaikin, mszeredi, gor, linux-alpha, linux-m68k,
	linux-arm-kernel, chris, tony.luck, linux-api, zhouyanjie,
	minchan, ebiederm, sargun, alexander.shishkin, heiko.carstens,
	alex.huangjianhui, will, krzk, borntraeger, vbabka, samitolvanen,
	flameeyes, ravi.bangoria, elver, keescook, arnd, bp, christian,
	tsbogend, jiri, martin.petersen, yamada.masahiro, oleg,
	sudeep.holla, olof, shawnguo, davem, bauerman, dalias, fenghua.yu,
	peterz, dhowells, hpa, sparclinux, jolsa, svens, x86, linux,
	mingo, naveen.n.rao, paulmck, npiggin, namhyung, dvyukov, axboe,
	monstr, haolee.swjtu, linux-mips, ink, linuxppc-dev
In-Reply-To: <1591847640-124894-1-git-send-email-nixiaoming@huawei.com>

[-- Attachment #1: Type: text/plain, Size: 1962 bytes --]

Hi Xiaoming,

On Thu, 11 Jun 2020 11:54:00 +0800 Xiaoming Ni <nixiaoming@huawei.com> wrote:
>
>  arch/sh/configs/dreamcast_defconfig                |   1 -
>  arch/sh/configs/espt_defconfig                     |   1 -
>  arch/sh/configs/hp6xx_defconfig                    |   1 -
>  arch/sh/configs/landisk_defconfig                  |   1 -
>  arch/sh/configs/lboxre2_defconfig                  |   1 -
>  arch/sh/configs/microdev_defconfig                 |   1 -
>  arch/sh/configs/migor_defconfig                    |   1 -
>  arch/sh/configs/r7780mp_defconfig                  |   1 -
>  arch/sh/configs/r7785rp_defconfig                  |   1 -
>  arch/sh/configs/rts7751r2d1_defconfig              |   1 -
>  arch/sh/configs/rts7751r2dplus_defconfig           |   1 -
>  arch/sh/configs/se7206_defconfig                   |   1 -
>  arch/sh/configs/se7343_defconfig                   |   1 -
>  arch/sh/configs/se7619_defconfig                   |   1 -
>  arch/sh/configs/se7705_defconfig                   |   1 -
>  arch/sh/configs/se7750_defconfig                   |   1 -
>  arch/sh/configs/se7751_defconfig                   |   1 -
>  arch/sh/configs/secureedge5410_defconfig           |   1 -
>  arch/sh/configs/sh03_defconfig                     |   1 -
>  arch/sh/configs/sh7710voipgw_defconfig             |   1 -
>  arch/sh/configs/sh7757lcr_defconfig                |   1 -
>  arch/sh/configs/sh7763rdp_defconfig                |   1 -
>  arch/sh/configs/shmin_defconfig                    |   1 -
>  arch/sh/configs/titan_defconfig                    |   1 -
>  arch/sh/include/uapi/asm/unistd_64.h               |   2 +-
>  arch/sh/kernel/syscalls/syscall.tbl                |   2 +-
>  arch/sh/kernel/syscalls_64.S                       |   2 +-

You might want to rebase this onto v5.8-rc1 when it is released this
weekend as the 64bit sh code (sh5) has been removed.

-- 
Cheers,
Stephen Rothwell

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply

* Re: [PATCH v2] powerpc: Remove inaccessible CMDLINE default
From: Christophe Leroy @ 2020-06-11  5:46 UTC (permalink / raw)
  To: Chris Packham, mpe, benh, paulus, christophe.leroy
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <20200611034140.9133-1-chris.packham@alliedtelesis.co.nz>



Le 11/06/2020 à 05:41, Chris Packham a écrit :
> Since commit cbe46bd4f510 ("powerpc: remove CONFIG_CMDLINE #ifdef mess")
> CONFIG_CMDLINE has always had a value regardless of CONFIG_CMDLINE_BOOL.
> 
> For example:
> 
>   $ make ARCH=powerpc defconfig
>   $ cat .config
>   # CONFIG_CMDLINE_BOOL is not set
>   CONFIG_CMDLINE=""
> 
> When enabling CONFIG_CMDLINE_BOOL this value is kept making the 'default
> "..." if CONFIG_CMDLINE_BOOL' ineffective.
> 
>   $ ./scripts/config --enable CONFIG_CMDLINE_BOOL
>   $ cat .config
>   CONFIG_CMDLINE_BOOL=y
>   CONFIG_CMDLINE=""
> 
> Remove CONFIG_CMDLINE_BOOL and the inaccessible default.

You also have to remove all CONFIG_CMDLINE_BOOL from the defconfigs

Christophe

> 
> Signed-off-by: Chris Packham <chris.packham@alliedtelesis.co.nz>
> Reviewed-by: Christophe Leroy <christophe.leroy@c-s.fr>
> ---
> It took me a while to get round to sending a v2, for a refresher v1 can be found here:
> 
> http://patchwork.ozlabs.org/project/linuxppc-dev/patch/20190802050232.22978-1-chris.packham@alliedtelesis.co.nz/
> 
> Changes in v2:
> - Rebase on top of Linus's tree
> - Fix some typos in commit message
> - Add review from Christophe
> - Remove CONFIG_CMDLINE_BOOL
> 
>   arch/powerpc/Kconfig | 6 +-----
>   1 file changed, 1 insertion(+), 5 deletions(-)
> 
> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
> index 9fa23eb320ff..51abc59c3334 100644
> --- a/arch/powerpc/Kconfig
> +++ b/arch/powerpc/Kconfig
> @@ -859,12 +859,8 @@ config PPC_DENORMALISATION
>   	  Add support for handling denormalisation of single precision
>   	  values.  Useful for bare metal only.  If unsure say Y here.
>   
> -config CMDLINE_BOOL
> -	bool "Default bootloader kernel arguments"
> -
>   config CMDLINE
> -	string "Initial kernel command string" if CMDLINE_BOOL
> -	default "console=ttyS0,9600 console=tty0 root=/dev/sda2" if CMDLINE_BOOL
> +	string "Initial kernel command string"
>   	default ""
>   	help
>   	  On some platforms, there is currently no way for the boot loader to
> 

^ permalink raw reply

* [Bug 205183] PPC64: Signal delivery fails with SIGSEGV if between about 1KB and 4KB bytes of stack remain
From: bugzilla-daemon @ 2020-06-11  6:43 UTC (permalink / raw)
  To: linuxppc-dev
In-Reply-To: <bug-205183-206035@https.bugzilla.kernel.org/>

https://bugzilla.kernel.org/show_bug.cgi?id=205183

--- Comment #4 from Daniel Black (daniel@linux.ibm.com) ---
Still broken.

danielgb@talos2:~$ gcc -g -Wall -O stacktest.c
danielgb@talos2:~$  ./a.out 1240000 &
[1] 494618
danielgb@talos2:~$ cat /proc/$(pidof a.out)/maps | grep stack
7fffcde80000-7fffcdfb0000 rw-p 00000000 00:00 0                         
[stack]
danielgb@talos2:~$ kill -USR1 %1
danielgb@talos2:~$ signal delivered, stack base 0x7fffcdfb0000 top
0x7fffcde81427 (1240025 used)

[1]+  Done                    ./a.out 1240000
danielgb@talos2:~$ ./a.out 1241000 &
[1] 494677
danielgb@talos2:~$ kill -USR1 %1
danielgb@talos2:~$ 
[1]+  Segmentation fault      ./a.out 1241000
danielgb@talos2:~$ 
danielgb@talos2:~$ dmesg | grep a.out
[10617.616145] a.out[494587]: bad frame in setup_rt_frame: 00007fffdea30010 nip
000000011a0a09fc lr 00007fffa1c404c8
[10865.752876] a.out[494677]: bad frame in setup_rt_frame: 00007fffcc420030 nip
0000000135a70a3c lr 00007fff952604c8
danielgb@talos2:~$ uname -a
Linux talos2 5.7.0-rc5-77151-gfea086b627a0 #1 SMP Mon May 11 16:00:00 AEST 2020
ppc64le ppc64le ppc64le GNU/Linux

-- 
You are receiving this mail because:
You are watching the assignee of the bug.

^ permalink raw reply

* Re: [PATCH v2] All arch: remove system call sys_sysctl
From: Will Deacon @ 2020-06-11  7:07 UTC (permalink / raw)
  To: Xiaoming Ni
  Cc: linux-sh, catalin.marinas, paulus, ak, paulburton, geert,
	mattst88, brgerst, acme, cyphar, viro, luto, tglx, surenb, rth,
	young.liuyang, linux-parisc, rdunlap, linux-kernel, mcgrof,
	linux-fsdevel, akpm, mark.rutland, linux-ia64, linux-xtensa,
	jongk, linux, James.Bottomley, jcmvbkbc, linux-s390, ysato,
	deller, yzaikin, mszeredi, gor, linux-alpha, linux-m68k,
	linux-arm-kernel, chris, tony.luck, linux-api, zhouyanjie,
	minchan, ebiederm, sargun, alexander.shishkin, heiko.carstens,
	alex.huangjianhui, krzk, borntraeger, vbabka, samitolvanen,
	flameeyes, ravi.bangoria, elver, keescook, arnd, bp, christian,
	tsbogend, jiri, martin.petersen, yamada.masahiro, oleg,
	sudeep.holla, olof, shawnguo, davem, bauerman, dalias, fenghua.yu,
	peterz, dhowells, hpa, sparclinux, jolsa, svens, x86, linux,
	mingo, naveen.n.rao, paulmck, sfr, npiggin, namhyung, dvyukov,
	axboe, monstr, haolee.swjtu, linux-mips, ink, linuxppc-dev
In-Reply-To: <1591847640-124894-1-git-send-email-nixiaoming@huawei.com>

On Thu, Jun 11, 2020 at 11:54:00AM +0800, Xiaoming Ni wrote:
> Since the commit 61a47c1ad3a4dc ("sysctl: Remove the sysctl system call"),
> sys_sysctl is actually unavailable: any input can only return an error.
> 
> We have been warning about people using the sysctl system call for years
> and believe there are no more users.  Even if there are users of this
> interface if they have not complained or fixed their code by now they
> probably are not going to, so there is no point in warning them any
> longer.
> 
> So completely remove sys_sysctl on all architectures.
> 
> Signed-off-by: Xiaoming Ni <nixiaoming@huawei.com>
> 
> changes in v2:
>   According to Kees Cook's suggestion, completely remove sys_sysctl on all arch
>   According to Eric W. Biederman's suggestion, update the commit log
> 
> V1: https://lore.kernel.org/lkml/1591683605-8585-1-git-send-email-nixiaoming@huawei.com/
>   Delete the code of sys_sysctl and return -ENOSYS directly at the function entry
> ---
>  arch/alpha/kernel/syscalls/syscall.tbl             |   2 +-
>  arch/arm/configs/am200epdkit_defconfig             |   1 -
>  arch/arm/tools/syscall.tbl                         |   2 +-
>  arch/arm64/include/asm/unistd32.h                  |   4 +-

For the arm/arm64 parts:

Acked-by: Will Deacon <will@kernel.org>

Will

^ permalink raw reply

* Linux powerpc new system call instruction and ABI
From: Nicholas Piggin @ 2020-06-11  8:12 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: libc-dev, musl, Nicholas Piggin, linux-api

Thanks to everyone who has given feedback on the proposed new system
call instruction and ABI, I think it has reached agreement and the
implementation can be merged into Linux.

I have a hacked glibc implementation (that doesn't do all the right
HWCAP detection and misses a few things) that I've tested several things
including some kernel selftests (involving signals and syscalls) with.

System Call Vectored (scv) ABI
==============================

The scv instruction is introduced with POWER9 / ISA3, it comes with an
rfscv counter-part. The benefit of these instructions is performance
(trading slower SRR0/1 with faster LR/CTR registers, and entering the
kernel with MSR[EE] and MSR[RI] left enabled, which can reduce MSR 
updates. The scv instruction has 128 levels (not enough to cover the Linux
system call space).

Assignment and advertisement
----------------------------
The proposal is to assign scv levels conservatively, and advertise them
with HWCAP feature bits as we add support for more.

Linux has not enabled FSCR[SCV] yet, so executing the scv instruction will
cause the kernel to log a "SCV facility unavilable" message, and deliver a
SIGILL with ILL_ILLOPC to the process. Linux has defined a HWCAP2 bit
PPC_FEATURE2_SCV for SCV support, but does not set it.

This change allocates the zero level ('scv 0'), advertised with
PPC_FEATURE2_SCV, which will be used to provide normal Linux system
calls (equivalent to 'sc').

Attempting to execute scv with other levels will cause a SIGILL to be
delivered the same as before, but will not log a "SCV facility unavailable"
message (because the processor facility is enabled).

Calling convention
------------------
The proposal is for scv 0 to provide the standard Linux system call ABI 
with the following differences from sc convention[1]:

- lr is to be volatile across scv calls. This is necessary because the 
  scv instruction clobbers lr. From previous discussion, this should be 
  possible to deal with in GCC clobbers and CFI.

- cr1 and cr5-cr7 are volatile. This matches the C ABI and would allow the
  kernel system call exit to avoid restoring the volatile cr registers
  (although we probably still would anyway to avoid information leaks).

- Error handling: The consensus among kernel, glibc, and musl is to move to
  using negative return values in r3 rather than CR0[SO]=1 to indicate error,
  which matches most other architectures, and is closer to a function call.

Notes
-----
- r0,r4-r8 are documented as volatile in the ABI, but the kernel patch as
  submitted currently preserves them. This is to leave room for deciding
  which way to go with these. Some small benefit was found by preserving
  them[1] but I'm not convinced it's worth deviating from the C function
  call ABI just for this. Release code should follow the ABI.

Previous discussions:
https://lists.ozlabs.org/pipermail/linuxppc-dev/2020-April/208691.html
https://lists.ozlabs.org/pipermail/linuxppc-dev/2020-April/209268.html

[1] https://github.com/torvalds/linux/blob/master/Documentation/powerpc/syscall64-abi.rst
[2] https://lists.ozlabs.org/pipermail/linuxppc-dev/2020-April/209263.html

The following patches to add scv support to Linux are posted to

 https://lists.ozlabs.org/pipermail/linuxppc-dev/

Nicholas Piggin (2):
  powerpc/64s/exception: treat NIA below __end_interrupts as soft-masked
  powerpc/64s: system call support for scv/rfscv instructions

Thanks,
Nick

-- 
2.23.0


^ permalink raw reply

* [PATCH 1/2] powerpc/64s/exception: treat NIA below __end_interrupts as soft-masked
From: Nicholas Piggin @ 2020-06-11  8:12 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: libc-dev, musl, Nicholas Piggin, linux-api
In-Reply-To: <20200611081203.995112-1-npiggin@gmail.com>

The scv instruction causes an interrupt which can enter the kernel with
MSR[EE]=1, thus allowing interrupts to hit at any time. These must not
be taken as normal interrupts, because they come from MSR[PR]=0 context,
and yet the kernel stack is not yet set up and r13 is not set to the
PACA).

Treat this as a soft-masked interrupt regardless of the soft masked
state. This does not affect behaviour yet, because currently all
interrupts are taken with MSR[EE]=0.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/kernel/exceptions-64s.S | 27 ++++++++++++++++++++++++---
 1 file changed, 24 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index e70ebb5c318c..388e34665b4a 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -508,8 +508,24 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real)
 
 .macro __GEN_COMMON_BODY name
 	.if IMASK
+		.if ! ISTACK
+		.error "No support for masked interrupt to use custom stack"
+		.endif
+
+		/* If coming from user, skip soft-mask tests. */
+		andi.	r10,r12,MSR_PR
+		bne	2f
+
+		/* Kernel code running below __end_interrupts is implicitly
+		 * soft-masked */
+		LOAD_HANDLER(r10, __end_interrupts)
+		cmpld	r11,r10
+		li	r10,IMASK
+		blt-	1f
+
+		/* Test the soft mask state against our interrupt's bit */
 		lbz	r10,PACAIRQSOFTMASK(r13)
-		andi.	r10,r10,IMASK
+1:		andi.	r10,r10,IMASK
 		/* Associate vector numbers with bits in paca->irq_happened */
 		.if IVEC == 0x500 || IVEC == 0xea0
 		li	r10,PACA_IRQ_EE
@@ -540,7 +556,7 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real)
 
 	.if ISTACK
 	andi.	r10,r12,MSR_PR		/* See if coming from user	*/
-	mr	r10,r1			/* Save r1			*/
+2:	mr	r10,r1			/* Save r1			*/
 	subi	r1,r1,INT_FRAME_SIZE	/* alloc frame on kernel stack	*/
 	beq-	100f
 	ld	r1,PACAKSAVE(r13)	/* kernel stack to use		*/
@@ -2838,7 +2854,8 @@ masked_interrupt:
 	ld	r10,PACA_EXGEN+EX_R10(r13)
 	ld	r11,PACA_EXGEN+EX_R11(r13)
 	ld	r12,PACA_EXGEN+EX_R12(r13)
-	/* returns to kernel where r13 must be set up, so don't restore it */
+	ld	r13,PACA_EXGEN+EX_R13(r13)
+	/* May return to masked low address where r13 is not set up */
 	.if \hsrr
 	HRFI_TO_KERNEL
 	.else
@@ -2997,6 +3014,10 @@ EXC_COMMON_BEGIN(ppc64_runlatch_on_trampoline)
 
 USE_FIXED_SECTION(virt_trampolines)
 	/*
+	 * All code below __end_interrupts is treated as soft-masked. If
+	 * any code runs here with MSR[EE]=1, it must then cope with pending
+	 * soft interrupt being raised (i.e., by ensuring it is replayed).
+	 *
 	 * The __end_interrupts marker must be past the out-of-line (OOL)
 	 * handlers, so that they are copied to real address 0x100 when running
 	 * a relocatable kernel. This ensures they can be reached from the short
-- 
2.23.0


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox