* [PATCH v3 7/8] powerpc/signal64: Rewrite handle_rt_signal64() to minimise uaccess switches
From: Christopher M. Riedl @ 2021-01-09 3:25 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Daniel Axtens
In-Reply-To: <20210109032557.13831-1-cmr@codefail.de>
From: Daniel Axtens <dja@axtens.net>
Add uaccess blocks and use the 'unsafe' versions of functions doing user
access where possible to reduce the number of times uaccess has to be
opened/closed.
There is no 'unsafe' version of copy_siginfo_to_user, so move it
slightly to allow for a "longer" uaccess block.
Signed-off-by: Daniel Axtens <dja@axtens.net>
Co-developed-by: Christopher M. Riedl <cmr@codefail.de>
Signed-off-by: Christopher M. Riedl <cmr@codefail.de>
---
arch/powerpc/kernel/signal_64.c | 54 +++++++++++++++++++++------------
1 file changed, 34 insertions(+), 20 deletions(-)
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 0c7f633b832b..7e62d62ef482 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -848,44 +848,51 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
unsigned long msr __maybe_unused = regs->msr;
frame = get_sigframe(ksig, tsk, sizeof(*frame), 0);
- if (!access_ok(frame, sizeof(*frame)))
- goto badframe;
- err |= __put_user(&frame->info, &frame->pinfo);
- err |= __put_user(&frame->uc, &frame->puc);
- err |= copy_siginfo_to_user(&frame->info, &ksig->info);
- if (err)
+ /* This only applies when calling unsafe_setup_sigcontext() and must be
+ * called before opening the uaccess window.
+ */
+ if (!MSR_TM_ACTIVE(msr))
+ prepare_setup_sigcontext(tsk, 1);
+
+ if (!user_write_access_begin(frame, sizeof(*frame)))
goto badframe;
+ unsafe_put_user(&frame->info, &frame->pinfo, badframe_block);
+ unsafe_put_user(&frame->uc, &frame->puc, badframe_block);
+
/* Create the ucontext. */
- err |= __put_user(0, &frame->uc.uc_flags);
- err |= __save_altstack(&frame->uc.uc_stack, regs->gpr[1]);
+ unsafe_put_user(0, &frame->uc.uc_flags, badframe_block);
+ unsafe_save_altstack(&frame->uc.uc_stack, regs->gpr[1], badframe_block);
if (MSR_TM_ACTIVE(msr)) {
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
/* The ucontext_t passed to userland points to the second
* ucontext_t (for transactional state) with its uc_link ptr.
*/
- err |= __put_user(&frame->uc_transact, &frame->uc.uc_link);
+ unsafe_put_user(&frame->uc_transact, &frame->uc.uc_link, badframe_block);
+
+ user_write_access_end();
+
err |= setup_tm_sigcontexts(&frame->uc.uc_mcontext,
&frame->uc_transact.uc_mcontext,
tsk, ksig->sig, NULL,
(unsigned long)ksig->ka.sa.sa_handler,
msr);
+
+ if (!user_write_access_begin(frame, sizeof(struct rt_sigframe)))
+ goto badframe;
+
#endif
} else {
- err |= __put_user(0, &frame->uc.uc_link);
- prepare_setup_sigcontext(tsk, 1);
- if (!user_write_access_begin(frame, sizeof(struct rt_sigframe)))
- return -EFAULT;
- err |= __unsafe_setup_sigcontext(&frame->uc.uc_mcontext, tsk,
- ksig->sig, NULL,
- (unsigned long)ksig->ka.sa.sa_handler, 1);
- user_write_access_end();
+ unsafe_put_user(0, &frame->uc.uc_link, badframe_block);
+ unsafe_setup_sigcontext(&frame->uc.uc_mcontext, tsk, ksig->sig,
+ NULL, (unsigned long)ksig->ka.sa.sa_handler,
+ 1, badframe_block);
}
- err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
- if (err)
- goto badframe;
+
+ unsafe_copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set), badframe_block);
+ user_write_access_end();
/* Make sure signal handler doesn't get spurious FP exceptions */
tsk->thread.fp_state.fpscr = 0;
@@ -900,6 +907,11 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
regs->nip = (unsigned long) &frame->tramp[0];
}
+
+ /* Save the siginfo outside of the unsafe block. */
+ if (copy_siginfo_to_user(&frame->info, &ksig->info))
+ goto badframe;
+
/* Allocate a dummy caller frame for the signal handler. */
newsp = ((unsigned long)frame) - __SIGNAL_FRAMESIZE;
err |= put_user(regs->gpr[1], (unsigned long __user *)newsp);
@@ -939,6 +951,8 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
return 0;
+badframe_block:
+ user_write_access_end();
badframe:
signal_fault(current, regs, "handle_rt_signal64", frame);
--
2.26.1
^ permalink raw reply related
* Re: [patch V3 13/37] mips/mm/highmem: Switch to generic kmap atomic
From: Thomas Bogendoerfer @ 2021-01-09 0:33 UTC (permalink / raw)
To: Paul Cercueil
Cc: juri.lelli, linux-aio, airlied, nouveau, bigeasy, dri-devel,
linux-mips, bsegall, jcmvbkbc, ray.huang, paulus, kraxel,
sparclinux, deanbo422, hch, vincent.guittot, paulmck, x86, linux,
linux-csky, mingo, peterz, linux-graphics-maintainer, bskeggs,
airlied, linux-snps-arc, linux-mm, mgorman, linux-xtensa, arnd,
intel-gfx, sroland, josef, rostedt, torvalds, green.hu,
rodrigo.vivi, dsterba, tglx, virtualization, dietmar.eggemann,
linux-arm-kernel, chris, monstr, nickhu, clm, linuxppc-dev,
linux-kernel, christian.koenig, bcrl, spice-devel, vgupta,
linux-fsdevel, akpm, bristot, davem, linux-btrfs, viro
In-Reply-To: <20210108235805.GA17543@alpha.franken.de>
On Sat, Jan 09, 2021 at 12:58:05AM +0100, Thomas Bogendoerfer wrote:
> On Fri, Jan 08, 2021 at 08:20:43PM +0000, Paul Cercueil wrote:
> > Hi Thomas,
> >
> > 5.11 does not boot anymore on Ingenic SoCs, I bisected it to this commit.
> >
> > Any idea what could be happening?
>
> not yet, kernel crash log of a Malta QEMU is below.
update:
This dirty hack lets the Malta QEMU boot again:
diff --git a/mm/highmem.c b/mm/highmem.c
index c3a9ea7875ef..190cdda1149d 100644
--- a/mm/highmem.c
+++ b/mm/highmem.c
@@ -515,7 +515,7 @@ void *__kmap_local_pfn_prot(unsigned long pfn, pgprot_t prot)
vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
BUG_ON(!pte_none(*(kmap_pte - idx)));
pteval = pfn_pte(pfn, prot);
- set_pte_at(&init_mm, vaddr, kmap_pte - idx, pteval);
+ set_pte(kmap_pte - idx, pteval);
arch_kmap_local_post_map(vaddr, pteval);
current->kmap_ctrl.pteval[kmap_local_idx()] = pteval;
preempt_enable();
set_pte_at() tries to update cache and could do an kmap_atomic() there.
Not sure, if this is allowed at this point.
Thomas.
--
Crap can work. Given enough thrust pigs will fly, but it's not necessarily a
good idea. [ RFC1925, 2.3 ]
^ permalink raw reply related
* Re: [patch V3 13/37] mips/mm/highmem: Switch to generic kmap atomic
From: Thomas Bogendoerfer @ 2021-01-08 23:58 UTC (permalink / raw)
To: Paul Cercueil
Cc: juri.lelli, linux-aio, airlied, nouveau, bigeasy, dri-devel,
linux-mips, bsegall, jcmvbkbc, ray.huang, paulus, kraxel,
sparclinux, deanbo422, hch, vincent.guittot, paulmck, x86, linux,
linux-csky, mingo, peterz, linux-graphics-maintainer, bskeggs,
airlied, linux-snps-arc, linux-mm, mgorman, linux-xtensa, arnd,
intel-gfx, sroland, josef, rostedt, torvalds, green.hu,
rodrigo.vivi, dsterba, tglx, virtualization, dietmar.eggemann,
linux-arm-kernel, chris, monstr, nickhu, clm, linuxppc-dev,
linux-kernel, christian.koenig, bcrl, spice-devel, vgupta,
linux-fsdevel, akpm, bristot, davem, linux-btrfs, viro
In-Reply-To: <JUTMMQ.NNFWKIUV7UUJ1@crapouillou.net>
On Fri, Jan 08, 2021 at 08:20:43PM +0000, Paul Cercueil wrote:
> Hi Thomas,
>
> 5.11 does not boot anymore on Ingenic SoCs, I bisected it to this commit.
>
> Any idea what could be happening?
not yet, kernel crash log of a Malta QEMU is below.
Thomas.
Kernel bug detected[#1]:
CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.11.0-rc1-00017-gccb21774863a #2
$ 0 : 00000000 00000001 00000000 00000010
$ 4 : 00000001 000005cf 9e00059f 00000000
$ 8 : 00118173 809e6db8 9e00059f 00000000
$12 : 82023c00 00000001 810da04c 0212422f
$16 : 810da000 00027800 000005cf 80b4bf9c
$20 : 809e968c 82602400 810da000 0000000b
$24 : 021558f9 00000000
$28 : 820e0000 820e3928 80b10000 802710d0
Hi : 0000346c
Lo : 000002dd
epc : 80271114 __kmap_local_pfn_prot+0x78/0x1c0
ra : 802710d0 __kmap_local_pfn_prot+0x34/0x1c0
Status: 1000a403 KERNEL EXL IE
Cause : 00800034 (ExcCode 0d)
PrId : 0001a800 (MIPS P5600)
Modules linked in:
Process swapper/0 (pid: 1, threadinfo=(ptrval), task=(ptrval), tls=00000000)
Stack : 7fffffff 820c2408 820e3990 ffffff04 ffff0a00 80518224 000081a4 810da000
00000001 000005cf fff64000 8011c77c 820e3b26 ffffff04 ffff0a00 80518440
80b30000 80b4bf64 9e0005cf 000005cf fff64000 80271188 00000000 820e3a60
80b10000 80194478 0000005e 80954406 809e0000 810da000 00000001 000005cf
fff68000 8011c77c 8088fd44 809f6074 000000f4 00000000 00000000 80b4bf68
...
Call Trace:
[<80271114>] __kmap_local_pfn_prot+0x78/0x1c0
[<8011c77c>] __update_cache+0x16c/0x174
[<80271188>] __kmap_local_pfn_prot+0xec/0x1c0
[<8011c77c>] __update_cache+0x16c/0x174
[<80271188>] __kmap_local_pfn_prot+0xec/0x1c0
[<8011c77c>] __update_cache+0x16c/0x174
[<80271188>] __kmap_local_pfn_prot+0xec/0x1c0
[<8011c77c>] __update_cache+0x16c/0x174
[<80271188>] __kmap_local_pfn_prot+0xec/0x1c0
[<8011c77c>] __update_cache+0x16c/0x174
[<80271188>] __kmap_local_pfn_prot+0xec/0x1c0
[<8011c77c>] __update_cache+0x16c/0x174
[<80271188>] __kmap_local_pfn_prot+0xec/0x1c0
[<8011c77c>] __update_cache+0x16c/0x174
[<80271188>] __kmap_local_pfn_prot+0xec/0x1c0
[<8011c77c>] __update_cache+0x16c/0x174
[<80271188>] __kmap_local_pfn_prot+0xec/0x1c0
[<8011c77c>] __update_cache+0x16c/0x174
[<80271188>] __kmap_local_pfn_prot+0xec/0x1c0
[<8011c77c>] __update_cache+0x16c/0x174
[<80271188>] __kmap_local_pfn_prot+0xec/0x1c0
[<8011c77c>] __update_cache+0x16c/0x174
[<80271188>] __kmap_local_pfn_prot+0xec/0x1c0
[<8011c77c>] __update_cache+0x16c/0x174
[<80271188>] __kmap_local_pfn_prot+0xec/0x1c0
[<8011c77c>] __update_cache+0x16c/0x174
[<80271188>] __kmap_local_pfn_prot+0xec/0x1c0
[<8011c77c>] __update_cache+0x16c/0x174
[<80271188>] __kmap_local_pfn_prot+0xec/0x1c0
[<8011c77c>] __update_cache+0x16c/0x174
[<80271188>] __kmap_local_pfn_prot+0xec/0x1c0
[<802c49a0>] copy_string_kernel+0x168/0x264
[<802c5d18>] kernel_execve+0xd0/0x164
[<801006cc>] try_to_run_init_process+0x18/0x5c
[<80859e0c>] kernel_init+0xd0/0x120
[<801037f8>] ret_from_kernel_thread+0x14/0x1c
Code: 8c630564 28640010 38840001 <00040336> 8f82000c 2463ffff 00021100 00431021 2403ffbf
--
Crap can work. Given enough thrust pigs will fly, but it's not necessarily a
good idea. [ RFC1925, 2.3 ]
^ permalink raw reply
* Re: [PATCH] powerpc/mm: Limit allocation of SWIOTLB on server machines
From: Thiago Jung Bauermann @ 2021-01-09 0:27 UTC (permalink / raw)
To: Ram Pai; +Cc: Satheesh Rajendran, linuxppc-dev, linux-kernel
In-Reply-To: <20201224031409.GB4102@ram-ibm-com.ibm.com>
Ram Pai <linuxram@us.ibm.com> writes:
> On Wed, Dec 23, 2020 at 09:06:01PM -0300, Thiago Jung Bauermann wrote:
>>
>> Hi Ram,
>>
>> Thanks for reviewing this patch.
>>
>> Ram Pai <linuxram@us.ibm.com> writes:
>>
>> > On Fri, Dec 18, 2020 at 03:21:03AM -0300, Thiago Jung Bauermann wrote:
>> >> On server-class POWER machines, we don't need the SWIOTLB unless we're a
>> >> secure VM. Nevertheless, if CONFIG_SWIOTLB is enabled we unconditionally
>> >> allocate it.
>> >>
>> >> In most cases this is harmless, but on a few machine configurations (e.g.,
>> >> POWER9 powernv systems with 4 GB area reserved for crashdump kernel) it can
>> >> happen that memblock can't find a 64 MB chunk of memory for the SWIOTLB and
>> >> fails with a scary-looking WARN_ONCE:
>> >>
>> >> ------------[ cut here ]------------
>> >> memblock: bottom-up allocation failed, memory hotremove may be affected
>> >> WARNING: CPU: 0 PID: 0 at mm/memblock.c:332 memblock_find_in_range_node+0x328/0x340
>> >> Modules linked in:
>> >> CPU: 0 PID: 0 Comm: swapper Not tainted 5.10.0-rc2-orig+ #6
>> >> NIP: c000000000442f38 LR: c000000000442f34 CTR: c0000000001e0080
>> >> REGS: c000000001def900 TRAP: 0700 Not tainted (5.10.0-rc2-orig+)
>> >> MSR: 9000000002021033 <SF,HV,VEC,ME,IR,DR,RI,LE> CR: 28022222 XER: 20040000
>> >> CFAR: c00000000014b7b4 IRQMASK: 1
>> >> GPR00: c000000000442f34 c000000001defba0 c000000001deff00 0000000000000047
>> >> GPR04: 00000000ffff7fff c000000001def828 c000000001def820 0000000000000000
>> >> GPR08: 0000001ffc3e0000 c000000001b75478 c000000001b75478 0000000000000001
>> >> GPR12: 0000000000002000 c000000002030000 0000000000000000 0000000000000000
>> >> GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000002030000
>> >> GPR20: 0000000000000000 0000000000010000 0000000000010000 c000000001defc10
>> >> GPR24: c000000001defc08 c000000001c91868 c000000001defc18 c000000001c91890
>> >> GPR28: 0000000000000000 ffffffffffffffff 0000000004000000 00000000ffffffff
>> >> NIP [c000000000442f38] memblock_find_in_range_node+0x328/0x340
>> >> LR [c000000000442f34] memblock_find_in_range_node+0x324/0x340
>> >> Call Trace:
>> >> [c000000001defba0] [c000000000442f34] memblock_find_in_range_node+0x324/0x340 (unreliable)
>> >> [c000000001defc90] [c0000000015ac088] memblock_alloc_range_nid+0xec/0x1b0
>> >> [c000000001defd40] [c0000000015ac1f8] memblock_alloc_internal+0xac/0x110
>> >> [c000000001defda0] [c0000000015ac4d0] memblock_alloc_try_nid+0x94/0xcc
>> >> [c000000001defe30] [c00000000159c3c8] swiotlb_init+0x78/0x104
>> >> [c000000001defea0] [c00000000158378c] mem_init+0x4c/0x98
>> >> [c000000001defec0] [c00000000157457c] start_kernel+0x714/0xac8
>> >> [c000000001deff90] [c00000000000d244] start_here_common+0x1c/0x58
>> >> Instruction dump:
>> >> 2c230000 4182ffd4 ea610088 ea810090 4bfffe84 39200001 3d42fff4 3c62ff60
>> >> 3863c560 992a8bfc 4bd0881d 60000000 <0fe00000> ea610088 4bfffd94 60000000
>> >> random: get_random_bytes called from __warn+0x128/0x184 with crng_init=0
>> >> ---[ end trace 0000000000000000 ]---
>> >> software IO TLB: Cannot allocate buffer
>> >>
>> >> Unless this is a secure VM the message can actually be ignored, because the
>> >> SWIOTLB isn't needed. Therefore, let's avoid the SWIOTLB in those cases.
>> >
>> > The above warn_on is conveying a genuine warning. Should it be silenced?
>>
>> Not sure I understand your point. This patch doesn't silence the
>> warning, it avoids the problem it is warning about.
>
> Sorry, I should have explained it better. My point is...
>
> If CONFIG_SWIOTLB is enabled, it means that the kernel is
> promising the bounce buffering capability. I know, currently we
> do not have any kernel subsystems that use bounce buffers on
> non-secure-pseries-kernel or powernv-kernel. But that does not
> mean, there wont be any. In case there is such a third-party
> module needing bounce buffering, it wont be able to operate,
> because of the proposed change in your patch.
>
> Is that a good thing or a bad thing, I do not know. I will let
> the experts opine.
Ping? Does anyone else has an opinion on this? The other option I can
think of is changing the crashkernel code to not reserve so much memory
below 4 GB. Other people are considering this option, but it's not
planned for the near future.
Also, there's a patch currently in linux-next which removes the scary
warning because of unrelated reasons:
https://lore.kernel.org/lkml/20201217201214.3414100-2-guro@fb.com
So assuming that the patch above goes in and keeping the assumption that
the swiotlb won't be needed in the powernv machines where I've seen the
warning happen, we can just leave things as they are now.
--
Thiago Jung Bauermann
IBM Linux Technology Center
^ permalink raw reply
* Re: [patch V3 13/37] mips/mm/highmem: Switch to generic kmap atomic
From: Paul Cercueil @ 2021-01-08 20:20 UTC (permalink / raw)
To: tglx
Cc: juri.lelli, linux-aio, airlied, nouveau, bigeasy, dri-devel,
linux-mips, bsegall, jcmvbkbc, ray.huang, paulus, kraxel,
sparclinux, deanbo422, hch, vincent.guittot, paulmck, x86, linux,
linux-csky, mingo, peterz, linux-graphics-maintainer, bskeggs,
airlied, linux-snps-arc, linux-mm, mgorman, linux-xtensa, arnd,
intel-gfx, sroland, josef, rostedt, torvalds, green.hu,
rodrigo.vivi, dsterba, virtualization, dietmar.eggemann,
linux-arm-kernel, chris, monstr, tsbogend, nickhu, clm,
linuxppc-dev, linux-kernel, christian.koenig, bcrl, spice-devel,
vgupta, linux-fsdevel, akpm, bristot, davem, linux-btrfs, viro
Hi Thomas,
5.11 does not boot anymore on Ingenic SoCs, I bisected it to this
commit.
Any idea what could be happening?
Cheers,
-Paul
^ permalink raw reply
* [powerpc:next-test] BUILD SUCCESS 3e069ffc01566d93fa14c80f3c705c1610b9c402
From: kernel test robot @ 2021-01-08 20:58 UTC (permalink / raw)
To: Michael Ellerman; +Cc: linuxppc-dev
tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next-test
branch HEAD: 3e069ffc01566d93fa14c80f3c705c1610b9c402 powerpc/pseries/eeh: Make pseries_send_allow_unfreeze() static
elapsed time: 1134m
configs tested: 157
configs skipped: 2
The following configs have been built successfully.
More configs may be tested in the coming days.
gcc tested configs:
arm defconfig
arm64 allyesconfig
arm allyesconfig
arm allmodconfig
arm64 defconfig
powerpc xes_mpc85xx_defconfig
powerpc mpc832x_rdb_defconfig
arm imx_v6_v7_defconfig
x86_64 defconfig
nios2 allyesconfig
powerpc mvme5100_defconfig
mips ath25_defconfig
sh shx3_defconfig
m68k multi_defconfig
sh lboxre2_defconfig
openrisc defconfig
powerpc arches_defconfig
mips qi_lb60_defconfig
powerpc sbc8548_defconfig
openrisc simple_smp_defconfig
sh se7780_defconfig
arc haps_hs_smp_defconfig
um x86_64_defconfig
sparc64 alldefconfig
arm oxnas_v6_defconfig
sh se7721_defconfig
powerpc mgcoge_defconfig
mips malta_kvm_defconfig
m68k mvme16x_defconfig
powerpc wii_defconfig
riscv rv32_defconfig
m68k alldefconfig
openrisc or1klitex_defconfig
parisc generic-32bit_defconfig
powerpc warp_defconfig
arm imx_v4_v5_defconfig
sh ap325rxa_defconfig
m68k amcore_defconfig
ia64 alldefconfig
sh sh7785lcr_defconfig
arm h3600_defconfig
m68k atari_defconfig
sparc sparc64_defconfig
c6x evmc6472_defconfig
powerpc sam440ep_defconfig
mips tb0219_defconfig
mips bmips_stb_defconfig
powerpc ppc6xx_defconfig
mips decstation_64_defconfig
nds32 alldefconfig
sh microdev_defconfig
powerpc mpc7448_hpc2_defconfig
mips workpad_defconfig
mips omega2p_defconfig
sh titan_defconfig
powerpc ppc64e_defconfig
sh dreamcast_defconfig
powerpc allnoconfig
arm pxa3xx_defconfig
riscv nommu_virt_defconfig
sh se7619_defconfig
mips ip22_defconfig
ia64 zx1_defconfig
powerpc ppa8548_defconfig
openrisc or1ksim_defconfig
powerpc mpc885_ads_defconfig
mips malta_kvm_guest_defconfig
i386 allyesconfig
powerpc ppc64_defconfig
powerpc tqm8560_defconfig
mips cobalt_defconfig
nios2 3c120_defconfig
mips bigsur_defconfig
powerpc storcenter_defconfig
m68k m5272c3_defconfig
sh ecovec24_defconfig
powerpc pseries_defconfig
arm lpc32xx_defconfig
powerpc kilauea_defconfig
m68k allyesconfig
arm eseries_pxa_defconfig
sh r7780mp_defconfig
powerpc acadia_defconfig
arc axs103_defconfig
arc axs101_defconfig
mips gcw0_defconfig
mips pic32mzda_defconfig
mips pistachio_defconfig
arm vexpress_defconfig
mips gpr_defconfig
sh hp6xx_defconfig
arm integrator_defconfig
sh rts7751r2dplus_defconfig
ia64 allmodconfig
ia64 defconfig
ia64 allyesconfig
m68k allmodconfig
m68k defconfig
nios2 defconfig
arc allyesconfig
nds32 allnoconfig
c6x allyesconfig
nds32 defconfig
csky defconfig
alpha defconfig
alpha allyesconfig
xtensa allyesconfig
h8300 allyesconfig
arc defconfig
sh allmodconfig
parisc defconfig
s390 allyesconfig
parisc allyesconfig
s390 defconfig
sparc allyesconfig
sparc defconfig
i386 tinyconfig
i386 defconfig
mips allyesconfig
mips allmodconfig
powerpc allyesconfig
powerpc allmodconfig
x86_64 randconfig-a004-20210108
x86_64 randconfig-a006-20210108
x86_64 randconfig-a001-20210108
x86_64 randconfig-a002-20210108
x86_64 randconfig-a003-20210108
x86_64 randconfig-a005-20210108
i386 randconfig-a005-20210108
i386 randconfig-a002-20210108
i386 randconfig-a001-20210108
i386 randconfig-a003-20210108
i386 randconfig-a006-20210108
i386 randconfig-a004-20210108
i386 randconfig-a016-20210108
i386 randconfig-a011-20210108
i386 randconfig-a014-20210108
i386 randconfig-a015-20210108
i386 randconfig-a013-20210108
i386 randconfig-a012-20210108
riscv nommu_k210_defconfig
riscv allyesconfig
riscv allnoconfig
riscv defconfig
riscv allmodconfig
x86_64 rhel
x86_64 allyesconfig
x86_64 rhel-7.6-kselftests
x86_64 rhel-8.3
x86_64 rhel-8.3-kbuiltin
x86_64 kexec
clang tested configs:
x86_64 randconfig-a013-20210108
x86_64 randconfig-a011-20210108
x86_64 randconfig-a012-20210108
x86_64 randconfig-a016-20210108
x86_64 randconfig-a014-20210108
x86_64 randconfig-a015-20210108
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
^ permalink raw reply
* [powerpc:fixes-test] BUILD SUCCESS 3ce47d95b7346dcafd9bed3556a8d072cb2b8571
From: kernel test robot @ 2021-01-08 17:01 UTC (permalink / raw)
To: Michael Ellerman; +Cc: linuxppc-dev
tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git fixes-test
branch HEAD: 3ce47d95b7346dcafd9bed3556a8d072cb2b8571 powerpc: Handle .text.{hot,unlikely}.* in linker script
elapsed time: 900m
configs tested: 55
configs skipped: 109
The following configs have been built successfully.
More configs may be tested in the coming days.
gcc tested configs:
arc haps_hs_smp_defconfig
um x86_64_defconfig
sparc64 alldefconfig
arm oxnas_v6_defconfig
sh se7721_defconfig
arm h3600_defconfig
m68k atari_defconfig
sparc sparc64_defconfig
c6x evmc6472_defconfig
mips ath25_defconfig
nios2 3c120_defconfig
mips bigsur_defconfig
powerpc storcenter_defconfig
powerpc mpc7448_hpc2_defconfig
m68k m5272c3_defconfig
sh ecovec24_defconfig
arm eseries_pxa_defconfig
sh r7780mp_defconfig
powerpc acadia_defconfig
arc axs103_defconfig
ia64 allmodconfig
ia64 defconfig
ia64 allyesconfig
nds32 defconfig
nios2 allyesconfig
csky defconfig
alpha defconfig
alpha allyesconfig
xtensa allyesconfig
h8300 allyesconfig
arc defconfig
sh allmodconfig
mips allyesconfig
mips allmodconfig
powerpc allyesconfig
powerpc allmodconfig
powerpc allnoconfig
x86_64 randconfig-a004-20210108
x86_64 randconfig-a006-20210108
x86_64 randconfig-a001-20210108
x86_64 randconfig-a002-20210108
x86_64 randconfig-a003-20210108
x86_64 randconfig-a005-20210108
i386 randconfig-a005-20210108
i386 randconfig-a002-20210108
i386 randconfig-a001-20210108
i386 randconfig-a003-20210108
i386 randconfig-a006-20210108
i386 randconfig-a004-20210108
i386 randconfig-a016-20210108
i386 randconfig-a011-20210108
i386 randconfig-a014-20210108
i386 randconfig-a015-20210108
i386 randconfig-a013-20210108
i386 randconfig-a012-20210108
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
^ permalink raw reply
* [powerpc:merge] BUILD SUCCESS 39a77db53cca8200b8bc99fc0993e127b59f08fb
From: kernel test robot @ 2021-01-08 17:01 UTC (permalink / raw)
To: Michael Ellerman; +Cc: linuxppc-dev
tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git merge
branch HEAD: 39a77db53cca8200b8bc99fc0993e127b59f08fb Automatic merge of 'fixes' into merge (2021-01-07 17:43)
elapsed time: 899m
configs tested: 131
configs skipped: 2
The following configs have been built successfully.
More configs may be tested in the coming days.
gcc tested configs:
arm defconfig
arm64 allyesconfig
arm64 defconfig
arm allyesconfig
arm allmodconfig
nios2 allyesconfig
powerpc mvme5100_defconfig
mips ath25_defconfig
sh shx3_defconfig
m68k multi_defconfig
sh lboxre2_defconfig
openrisc defconfig
powerpc arches_defconfig
mips qi_lb60_defconfig
powerpc sbc8548_defconfig
openrisc simple_smp_defconfig
sh se7780_defconfig
arc haps_hs_smp_defconfig
um x86_64_defconfig
sparc64 alldefconfig
arm oxnas_v6_defconfig
sh se7721_defconfig
powerpc mgcoge_defconfig
mips malta_kvm_defconfig
m68k mvme16x_defconfig
powerpc wii_defconfig
riscv rv32_defconfig
m68k alldefconfig
openrisc or1klitex_defconfig
parisc generic-32bit_defconfig
powerpc warp_defconfig
powerpc motionpro_defconfig
sh microdev_defconfig
arm vexpress_defconfig
mips ath79_defconfig
sh rsk7201_defconfig
arm imx_v4_v5_defconfig
sh ap325rxa_defconfig
m68k amcore_defconfig
ia64 alldefconfig
sh sh7785lcr_defconfig
arm h3600_defconfig
m68k atari_defconfig
sparc sparc64_defconfig
c6x evmc6472_defconfig
powerpc sam440ep_defconfig
ia64 tiger_defconfig
powerpc ppc64_defconfig
powerpc tqm8555_defconfig
arm shannon_defconfig
powerpc ksi8560_defconfig
mips malta_kvm_guest_defconfig
powerpc pcm030_defconfig
mips gpr_defconfig
arm spear13xx_defconfig
nios2 3c120_defconfig
mips bigsur_defconfig
powerpc storcenter_defconfig
powerpc mpc7448_hpc2_defconfig
m68k m5272c3_defconfig
sh ecovec24_defconfig
arm eseries_pxa_defconfig
sh r7780mp_defconfig
powerpc acadia_defconfig
arc axs103_defconfig
ia64 allmodconfig
ia64 defconfig
ia64 allyesconfig
m68k allmodconfig
m68k defconfig
m68k allyesconfig
nios2 defconfig
arc allyesconfig
nds32 allnoconfig
c6x allyesconfig
nds32 defconfig
csky defconfig
alpha defconfig
alpha allyesconfig
xtensa allyesconfig
h8300 allyesconfig
arc defconfig
sh allmodconfig
parisc defconfig
s390 allyesconfig
parisc allyesconfig
s390 defconfig
i386 allyesconfig
sparc allyesconfig
sparc defconfig
i386 tinyconfig
i386 defconfig
mips allyesconfig
mips allmodconfig
powerpc allyesconfig
powerpc allmodconfig
powerpc allnoconfig
x86_64 randconfig-a004-20210108
x86_64 randconfig-a006-20210108
x86_64 randconfig-a001-20210108
x86_64 randconfig-a002-20210108
x86_64 randconfig-a003-20210108
x86_64 randconfig-a005-20210108
i386 randconfig-a005-20210108
i386 randconfig-a002-20210108
i386 randconfig-a001-20210108
i386 randconfig-a003-20210108
i386 randconfig-a006-20210108
i386 randconfig-a004-20210108
i386 randconfig-a016-20210108
i386 randconfig-a011-20210108
i386 randconfig-a014-20210108
i386 randconfig-a015-20210108
i386 randconfig-a013-20210108
i386 randconfig-a012-20210108
riscv nommu_k210_defconfig
riscv allyesconfig
riscv nommu_virt_defconfig
riscv allnoconfig
riscv defconfig
riscv allmodconfig
x86_64 rhel
x86_64 allyesconfig
x86_64 rhel-7.6-kselftests
x86_64 defconfig
x86_64 rhel-8.3
x86_64 rhel-8.3-kbuiltin
x86_64 kexec
clang tested configs:
x86_64 randconfig-a013-20210108
x86_64 randconfig-a011-20210108
x86_64 randconfig-a012-20210108
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
^ permalink raw reply
* RE: [PATCH 05/11] iov_iter: merge the compat case into rw_copy_check_uvector
From: David Laight @ 2021-01-08 11:49 UTC (permalink / raw)
To: 'Christoph Hellwig', Alexander Viro
Cc: Jens Axboe, linux-s390@vger.kernel.org,
linux-arch@vger.kernel.org, linux-parisc@vger.kernel.org,
Arnd Bergmann, linux-aio@kvack.org, netdev@vger.kernel.org,
linux-kernel@vger.kernel.org, linux-mips@vger.kernel.org,
David Howells, linux-fsdevel@vger.kernel.org,
linux-security-module@vger.kernel.org, keyrings@vger.kernel.org,
linux-block@vger.kernel.org, linux-mm@kvack.org,
linux-scsi@vger.kernel.org, sparclinux@vger.kernel.org,
Andrew Morton, linuxppc-dev@lists.ozlabs.org,
io-uring@vger.kernel.org, linux-arm-kernel@lists.infradead.org
In-Reply-To: <20200921143434.707844-6-hch@lst.de>
From: Christoph Hellwig <hch@lst.de>
> Sent: 21 September 2020 15:34
>
> Stop duplicating the iovec verify code, and instead add add a
> __import_iovec helper that does the whole verify and import, but takes
> a bool compat to decided on the native or compat layout. This also
> ends up massively simplifying the calling conventions.
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
> lib/iov_iter.c | 195 ++++++++++++++++++-------------------------------
> 1 file changed, 70 insertions(+), 125 deletions(-)
>
> diff --git a/lib/iov_iter.c b/lib/iov_iter.c
> index a64867501a7483..8bfa47b63d39aa 100644
> --- a/lib/iov_iter.c
> +++ b/lib/iov_iter.c
> @@ -10,6 +10,7 @@
> #include <net/checksum.h>
> #include <linux/scatterlist.h>
> #include <linux/instrumented.h>
> +#include <linux/compat.h>
>
> #define PIPE_PARANOIA /* for now */
>
> @@ -1650,43 +1651,76 @@ const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
> }
> EXPORT_SYMBOL(dup_iter);
>
> -static ssize_t rw_copy_check_uvector(int type,
> - const struct iovec __user *uvector, unsigned long nr_segs,
> - unsigned long fast_segs, struct iovec *fast_pointer,
> - struct iovec **ret_pointer)
> +static int compat_copy_iovecs_from_user(struct iovec *iov,
> + const struct iovec __user *uvector, unsigned long nr_segs)
> +{
> + const struct compat_iovec __user *uiov =
> + (const struct compat_iovec __user *)uvector;
> + unsigned long i;
> + int ret = -EFAULT;
> +
> + if (!user_access_begin(uvector, nr_segs * sizeof(*uvector)))
> + return -EFAULT;
I little bit late, but the above isn't quite right.
It should be sizeof(*iouv) - the length is double what it should be.
Not that access_ok() can fail for compat addresses
and the extra length won't matter for architectures that
need the address/length to open an address hole into userspace.
David
-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
^ permalink raw reply
* Re: [PATCH v2 0/5] ibmvfc: MQ preparatory locking work
From: Martin K. Petersen @ 2021-01-08 3:38 UTC (permalink / raw)
To: Tyrel Datwyler
Cc: martin.petersen, linux-scsi, linux-kernel, james.bottomley,
brking, linuxppc-dev
In-Reply-To: <20210106201835.1053593-1-tyreld@linux.ibm.com>
Tyrel,
> The ibmvfc driver in its current form relies heavily on the
> host_lock. This patchset introduces a genric queue with its own queue
> lock and sent/free event list locks. This generic queue allows the
> driver to decouple the primary queue and future subordinate queues
> from the host lock reducing lock contention while also relaxing
> locking for submissions and completions to simply the list lock of the
> queue in question.
Applied to 5.12/scsi-staging, thanks!
--
Martin K. Petersen Oracle Linux Engineering
^ permalink raw reply
* Re: [PATCH] ibmvfc: fix missing cast of ibmvfc_event pointer to u64 handle
From: Martin K. Petersen @ 2021-01-08 4:19 UTC (permalink / raw)
To: Tyrel Datwyler, james.bottomley
Cc: kernel test robot, linux-scsi, linux-kernel, Martin K . Petersen,
brking, linuxppc-dev
In-Reply-To: <20210106203721.1054693-1-tyreld@linux.ibm.com>
On Wed, 6 Jan 2021 14:37:21 -0600, Tyrel Datwyler wrote:
> Commit 2aa0102c6688 ("scsi: ibmvfc: Use correlation token to tag
> commands") sets the vfcFrame correlation token to the pointer handle of
> the associated ibmvfc_event. However, that commit failed to cast the
> pointer to an appropriate type which in this case is a u64. As such
> sparse warnings are generated for both correlation token assignments.
>
> ibmvfc.c:2375:36: sparse: incorrect type in argument 1 (different base types)
> ibmvfc.c:2375:36: sparse: expected unsigned long long [usertype] val
> ibmvfc.c:2375:36: sparse: got struct ibmvfc_event *[assigned] evt
>
> [...]
Applied to 5.11/scsi-fixes, thanks!
[1/1] ibmvfc: fix missing cast of ibmvfc_event pointer to u64 handle
https://git.kernel.org/mkp/scsi/c/901d01c8e50c
--
Martin K. Petersen Oracle Linux Engineering
^ permalink raw reply
* Re: [RFC PATCH v3 5/6] dt-bindings: of: Add restricted DMA pool
From: Konrad Rzeszutek Wilk @ 2021-01-07 18:00 UTC (permalink / raw)
To: Claire Chang
Cc: heikki.krogerus, peterz, grant.likely, paulus, Frank Rowand,
mingo, Marek Szyprowski, sstabellini, Saravana Kannan,
list@263.net:IOMMU DRIVERS <iommu@lists.linux-foundation.org>, Joerg Roedel <joro@8bytes.org>, ,
rafael.j.wysocki, Christoph Hellwig, Bartosz Golaszewski,
xen-devel, Thierry Reding, linux-devicetree, will, dan.j.williams,
linuxppc-dev, Rob Herring, boris.ostrovsky, Andy Shevchenko,
jgross, Nicolas Boichat, Greg KH, rdunlap, lkml, Tomasz Figa,
list@263.net:IOMMU DRIVERS <iommu@lists.linux-foundation.org>, Joerg Roedel <joro@8bytes.org>, ,
xypron.glpk, Robin Murphy, bauerman
In-Reply-To: <CALiNf2_dV13jbHqLt-r1eK+dtOcAKBGcWQCVMQn+eL6MuOrETQ@mail.gmail.com>
On Fri, Jan 08, 2021 at 01:39:43AM +0800, Claire Chang wrote:
> On Thu, Jan 7, 2021 at 2:58 AM Konrad Rzeszutek Wilk
> <konrad.wilk@oracle.com> wrote:
> >
> > On Wed, Jan 06, 2021 at 11:41:23AM +0800, Claire Chang wrote:
> > > Introduce the new compatible string, restricted-dma-pool, for restricted
> > > DMA. One can specify the address and length of the restricted DMA memory
> > > region by restricted-dma-pool in the device tree.
> > >
> > > Signed-off-by: Claire Chang <tientzu@chromium.org>
> > > ---
> > > .../reserved-memory/reserved-memory.txt | 24 +++++++++++++++++++
> > > 1 file changed, 24 insertions(+)
> > >
> > > diff --git a/Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt b/Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt
> > > index e8d3096d922c..44975e2a1fd2 100644
> > > --- a/Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt
> > > +++ b/Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt
> > > @@ -51,6 +51,20 @@ compatible (optional) - standard definition
> > > used as a shared pool of DMA buffers for a set of devices. It can
> > > be used by an operating system to instantiate the necessary pool
> > > management subsystem if necessary.
> > > + - restricted-dma-pool: This indicates a region of memory meant to be
> > > + used as a pool of restricted DMA buffers for a set of devices. The
> > > + memory region would be the only region accessible to those devices.
> > > + When using this, the no-map and reusable properties must not be set,
> > > + so the operating system can create a virtual mapping that will be used
> > > + for synchronization. The main purpose for restricted DMA is to
> > > + mitigate the lack of DMA access control on systems without an IOMMU,
> > > + which could result in the DMA accessing the system memory at
> > > + unexpected times and/or unexpected addresses, possibly leading to data
> > > + leakage or corruption. The feature on its own provides a basic level
> > > + of protection against the DMA overwriting buffer contents at
> > > + unexpected times. However, to protect against general data leakage and
> > > + system memory corruption, the system needs to provide way to restrict
> > > + the DMA to a predefined memory region.
> >
> > Heya!
> >
> > I think I am missing something obvious here so please bear with my
> > questions:
> >
> > - This code adds the means of having the SWIOTLB pool tied to a specific
> > memory correct?
>
> It doesn't affect the existing SWIOTLB. It just utilizes the existing SWIOTLB
> code to create another DMA pool tied to a specific memory region for a given set
> of devices. It bounces the streaming DMA (map/unmap) in and out of that region
> and does the memory allocation (dma_direct_alloc) from the same region.
Right, so why can't it follow the same mechanism that Xen SWIOTLB does - which
had exactly the same problem (needed special handling on the pool) - and do
a similar code?
>
> >
> >
> > - Nothing stops the physical device from bypassing the SWIOTLB buffer.
> > That is if an errant device screwed up the length or DMA address, the
> > SWIOTLB would gladly do what the device told it do?
>
> So the system needs to provide a way to lock down the memory access, e.g. MPU.
OK! Would it be prudent to have this in the description above perhaps?
>
> >
> > - This has to be combined with SWIOTLB-force-ish to always use the
> > bounce buffer, otherwise you could still do DMA without using
> > SWIOTLB (by not hitting the criteria for needing to use SWIOTLB)?
>
> Since restricted DMA is for the devices that are not behind an IOMMU, I change
> the criteria
> `if (unlikely(swiotlb_force == SWIOTLB_FORCE))`
> to
> `if (unlikely(swiotlb_force == SWIOTLB_FORCE) || dev->dma_io_tlb_mem)`
> in dma_direct_map_page().
>
> Also, even if SWIOTLB=force, the restricted DMA pool is preferred if available
> (get_io_tlb_mem in https://lore.kernel.org/patchwork/patch/1360995/).
>
> Thanks!
^ permalink raw reply
* Re: [RFC PATCH v3 0/6] Restricted DMA
From: Claire Chang @ 2021-01-07 17:38 UTC (permalink / raw)
To: Florian Fainelli
Cc: heikki.krogerus, peterz, grant.likely, paulus, Frank Rowand,
mingo, Marek Szyprowski, sstabellini, Saravana Kannan,
list@263.net:IOMMU DRIVERS <iommu@lists.linux-foundation.org>, Joerg Roedel <joro@8bytes.org>, ,
rafael.j.wysocki, Christoph Hellwig, Bartosz Golaszewski,
xen-devel, Thierry Reding, linux-devicetree, will, konrad.wilk,
dan.j.williams, linuxppc-dev, Rob Herring, boris.ostrovsky,
Andy Shevchenko, jgross, Nicolas Boichat, Greg KH, rdunlap, lkml,
Tomasz Figa,
list@263.net:IOMMU DRIVERS <iommu@lists.linux-foundation.org>, Joerg Roedel <joro@8bytes.org>, ,
Jim Quinlan, xypron.glpk, Robin Murphy, bauerman
In-Reply-To: <d7043239-12cf-3636-4726-2e3b90917dc6@gmail.com>
[-- Attachment #1: Type: text/plain, Size: 7552 bytes --]
On Thu, Jan 7, 2021 at 2:48 AM Florian Fainelli <f.fainelli@gmail.com>
wrote:
>
> Hi,
>
> First of all let me say that I am glad that someone is working on a
> upstream solution for this issue, would appreciate if you could CC and
> Jim Quinlan on subsequent submissions.
Sure!
>
>
> On 1/5/21 7:41 PM, Claire Chang wrote:
> > This series implements mitigations for lack of DMA access control on
> > systems without an IOMMU, which could result in the DMA accessing the
> > system memory at unexpected times and/or unexpected addresses, possibly
> > leading to data leakage or corruption.
> >
> > For example, we plan to use the PCI-e bus for Wi-Fi and that PCI-e bus
is
> > not behind an IOMMU. As PCI-e, by design, gives the device full access
to
> > system memory, a vulnerability in the Wi-Fi firmware could easily
escalate
> > to a full system exploit (remote wifi exploits: [1a], [1b] that shows a
> > full chain of exploits; [2], [3]).
> >
> > To mitigate the security concerns, we introduce restricted DMA.
Restricted
> > DMA utilizes the existing swiotlb to bounce streaming DMA in and out of
a
> > specially allocated region and does memory allocation from the same
region.
> > The feature on its own provides a basic level of protection against the
DMA
> > overwriting buffer contents at unexpected times. However, to protect
> > against general data leakage and system memory corruption, the system
needs
> > to provide a way to restrict the DMA to a predefined memory region
(this is
> > usually done at firmware level, e.g. in ATF on some ARM platforms).
>
> Can you explain how ATF gets involved and to what extent it does help,
> besides enforcing a secure region from the ARM CPU's perpsective? Does
> the PCIe root complex not have an IOMMU but can somehow be denied access
> to a region that is marked NS=0 in the ARM CPU's MMU? If so, that is
> still some sort of basic protection that the HW enforces, right?
We need the ATF support for memory MPU (memory protection unit).
Restricted DMA (with reserved-memory in dts) makes sure the predefined
memory
region is for PCIe DMA only, but we still need MPU to locks down PCIe
access to
that specific regions.
>
> On Broadcom STB SoCs we have had something similar for a while however
> and while we don't have an IOMMU for the PCIe bridge, we do have a a
> basic protection mechanism whereby we can configure a region in DRAM to
> be PCIe read/write and CPU read/write which then gets used as the PCIe
> inbound region for the PCIe EP. By default the PCIe bridge is not
> allowed access to DRAM so we must call into a security agent to allow
> the PCIe bridge to access the designated DRAM region.
>
> We have done this using a private CMA area region assigned via Device
> Tree, assigned with a and requiring the PCIe EP driver to use
> dma_alloc_from_contiguous() in order to allocate from this device
> private CMA area. The only drawback with that approach is that it
> requires knowing how much memory you need up front for buffers and DMA
> descriptors that the PCIe EP will need to process. The problem is that
> it requires driver modifications and that does not scale over the number
> of PCIe EP drivers, some we absolutely do not control, but there is no
> need to bounce buffer. Your approach scales better across PCIe EP
> drivers however it does require bounce buffering which could be a
> performance hit.
Only the streaming DMA (map/unmap) needs bounce buffering.
I also added alloc/free support in this series
(https://lore.kernel.org/patchwork/patch/1360995/), so dma_direct_alloc()
will
try to allocate memory from the predefined memory region.
As for the performance hit, it should be similar to the default swiotlb.
Here are my experiment results. Both SoCs lack IOMMU for PCIe.
PCIe wifi vht80 throughput -
MTK SoC tcp_tx tcp_rx udp_tx udp_rx
w/o Restricted DMA 244.1 134.66 312.56 350.79
w/ Restricted DMA 246.95 136.59 363.21 351.99
Rockchip SoC tcp_tx tcp_rx udp_tx udp_rx
w/o Restricted DMA 237.87 133.86 288.28 361.88
w/ Restricted DMA 256.01 130.95 292.28 353.19
The CPU usage doesn't increase too much either.
Although I didn't measure the CPU usage very precisely, it's ~3% with a
single
big core (Cortex-A72) and ~5% with a single small core (Cortex-A53).
Thanks!
>
> Thanks!
On Thu, Jan 7, 2021 at 2:48 AM Florian Fainelli <f.fainelli@gmail.com>
wrote:
> Hi,
>
> First of all let me say that I am glad that someone is working on a
> upstream solution for this issue, would appreciate if you could CC and
> Jim Quinlan on subsequent submissions.
>
> On 1/5/21 7:41 PM, Claire Chang wrote:
> > This series implements mitigations for lack of DMA access control on
> > systems without an IOMMU, which could result in the DMA accessing the
> > system memory at unexpected times and/or unexpected addresses, possibly
> > leading to data leakage or corruption.
> >
> > For example, we plan to use the PCI-e bus for Wi-Fi and that PCI-e bus is
> > not behind an IOMMU. As PCI-e, by design, gives the device full access to
> > system memory, a vulnerability in the Wi-Fi firmware could easily
> escalate
> > to a full system exploit (remote wifi exploits: [1a], [1b] that shows a
> > full chain of exploits; [2], [3]).
> >
> > To mitigate the security concerns, we introduce restricted DMA.
> Restricted
> > DMA utilizes the existing swiotlb to bounce streaming DMA in and out of a
> > specially allocated region and does memory allocation from the same
> region.
> > The feature on its own provides a basic level of protection against the
> DMA
> > overwriting buffer contents at unexpected times. However, to protect
> > against general data leakage and system memory corruption, the system
> needs
> > to provide a way to restrict the DMA to a predefined memory region (this
> is
> > usually done at firmware level, e.g. in ATF on some ARM platforms).
>
> Can you explain how ATF gets involved and to what extent it does help,
> besides enforcing a secure region from the ARM CPU's perpsective? Does
> the PCIe root complex not have an IOMMU but can somehow be denied access
> to a region that is marked NS=0 in the ARM CPU's MMU? If so, that is
> still some sort of basic protection that the HW enforces, right?
>
> On Broadcom STB SoCs we have had something similar for a while however
> and while we don't have an IOMMU for the PCIe bridge, we do have a a
> basic protection mechanism whereby we can configure a region in DRAM to
> be PCIe read/write and CPU read/write which then gets used as the PCIe
> inbound region for the PCIe EP. By default the PCIe bridge is not
> allowed access to DRAM so we must call into a security agent to allow
> the PCIe bridge to access the designated DRAM region.
>
> We have done this using a private CMA area region assigned via Device
> Tree, assigned with a and requiring the PCIe EP driver to use
> dma_alloc_from_contiguous() in order to allocate from this device
> private CMA area. The only drawback with that approach is that it
> requires knowing how much memory you need up front for buffers and DMA
> descriptors that the PCIe EP will need to process. The problem is that
> it requires driver modifications and that does not scale over the number
> of PCIe EP drivers, some we absolutely do not control, but there is no
> need to bounce buffer. Your approach scales better across PCIe EP
> drivers however it does require bounce buffering which could be a
> performance hit.
>
> Thanks!
> --
> Florian
>
[-- Attachment #2: Type: text/html, Size: 8961 bytes --]
^ permalink raw reply
* Re: [RFC PATCH v3 2/6] swiotlb: Add restricted DMA pool
From: Konrad Rzeszutek Wilk @ 2021-01-07 21:19 UTC (permalink / raw)
To: Florian Fainelli
Cc: heikki.krogerus, peterz, grant.likely, paulus, Frank Rowand,
mingo, Marek Szyprowski, sstabellini, Saravana Kannan,
xypron.glpk, Joerg Roedel, rafael.j.wysocki, Christoph Hellwig,
Bartosz Golaszewski, xen-devel, Thierry Reding, linux-devicetree,
will, dan.j.williams, linuxppc-dev, Rob Herring, Claire Chang,
boris.ostrovsky, Andy Shevchenko, jgross, Nicolas Boichat,
Greg KH, rdunlap, lkml, Tomasz Figa, list@263.net:IOMMU DRIVERS,
Robin Murphy, bauerman
In-Reply-To: <aa5af7d1-779e-f0f6-e6ba-8040e603523f@gmail.com>
On Thu, Jan 07, 2021 at 10:09:14AM -0800, Florian Fainelli wrote:
> On 1/7/21 9:57 AM, Konrad Rzeszutek Wilk wrote:
> > On Fri, Jan 08, 2021 at 01:39:18AM +0800, Claire Chang wrote:
> >> Hi Greg and Konrad,
> >>
> >> This change is intended to be non-arch specific. Any arch that lacks DMA access
> >> control and has devices not behind an IOMMU can make use of it. Could you share
> >> why you think this should be arch specific?
> >
> > The idea behind non-arch specific code is it to be generic. The devicetree
> > is specific to PowerPC, Sparc, and ARM, and not to x86 - hence it should
> > be in arch specific code.
>
> In premise the same code could be used with an ACPI enabled system with
> an appropriate service to identify the restricted DMA regions and unlock
> them.
Which this patchset is not.
>
> More than 1 architecture requiring this function (ARM and ARM64 are the
> two I can think of needing this immediately) sort of calls for making
> the code architecture agnostic since past 2, you need something that scales.
I believe the use-case is for ARM64 at this moment.
>
> There is already code today under kernel/dma/contiguous.c that is only
> activated on a CONFIG_OF=y && CONFIG_OF_RESERVED_MEM=y system, this is
> no different.
> --
> Florian
^ permalink raw reply
* Re: [RFC PATCH v3 5/6] dt-bindings: of: Add restricted DMA pool
From: Florian Fainelli @ 2021-01-07 18:14 UTC (permalink / raw)
To: Konrad Rzeszutek Wilk, Claire Chang
Cc: heikki.krogerus, peterz, grant.likely, paulus, Frank Rowand,
mingo, Marek Szyprowski, sstabellini, Saravana Kannan,
Joerg Roedel, rafael.j.wysocki, Christoph Hellwig,
Bartosz Golaszewski, xen-devel, Thierry Reding, linux-devicetree,
will, dan.j.williams, linuxppc-dev, Rob Herring, boris.ostrovsky,
Andy Shevchenko, jgross, Nicolas Boichat, Greg KH, rdunlap, lkml,
Tomasz Figa, iommu, xypron.glpk, Robin Murphy, bauerman
In-Reply-To: <20210107180032.GB16519@char.us.oracle.com>
On 1/7/21 10:00 AM, Konrad Rzeszutek Wilk wrote:
>>>
>>>
>>> - Nothing stops the physical device from bypassing the SWIOTLB buffer.
>>> That is if an errant device screwed up the length or DMA address, the
>>> SWIOTLB would gladly do what the device told it do?
>>
>> So the system needs to provide a way to lock down the memory access, e.g. MPU.
>
> OK! Would it be prudent to have this in the description above perhaps?
Yes this is something that must be documented as a requirement for the
restricted DMA pool users, otherwise attempting to do restricted DMA
pool is no different than say, using a device private CMA region.
Without the enforcement, this is just a best effort.
--
Florian
^ permalink raw reply
* Re: [RFC PATCH v3 2/6] swiotlb: Add restricted DMA pool
From: Florian Fainelli @ 2021-01-07 18:09 UTC (permalink / raw)
To: Konrad Rzeszutek Wilk, Claire Chang
Cc: heikki.krogerus, peterz, grant.likely, paulus, Frank Rowand,
mingo, Marek Szyprowski, sstabellini, Saravana Kannan,
Joerg Roedel, rafael.j.wysocki, Christoph Hellwig,
Bartosz Golaszewski, xen-devel, Thierry Reding, linux-devicetree,
will, dan.j.williams, linuxppc-dev, Rob Herring, boris.ostrovsky,
Andy Shevchenko, jgross, Nicolas Boichat, Greg KH, rdunlap, lkml,
Tomasz Figa, iommu, xypron.glpk, Robin Murphy, bauerman
In-Reply-To: <20210107175740.GA16519@char.us.oracle.com>
On 1/7/21 9:57 AM, Konrad Rzeszutek Wilk wrote:
> On Fri, Jan 08, 2021 at 01:39:18AM +0800, Claire Chang wrote:
>> Hi Greg and Konrad,
>>
>> This change is intended to be non-arch specific. Any arch that lacks DMA access
>> control and has devices not behind an IOMMU can make use of it. Could you share
>> why you think this should be arch specific?
>
> The idea behind non-arch specific code is it to be generic. The devicetree
> is specific to PowerPC, Sparc, and ARM, and not to x86 - hence it should
> be in arch specific code.
In premise the same code could be used with an ACPI enabled system with
an appropriate service to identify the restricted DMA regions and unlock
them.
More than 1 architecture requiring this function (ARM and ARM64 are the
two I can think of needing this immediately) sort of calls for making
the code architecture agnostic since past 2, you need something that scales.
There is already code today under kernel/dma/contiguous.c that is only
activated on a CONFIG_OF=y && CONFIG_OF_RESERVED_MEM=y system, this is
no different.
--
Florian
^ permalink raw reply
* Re: [RFC PATCH v3 0/6] Restricted DMA
From: Florian Fainelli @ 2021-01-07 17:59 UTC (permalink / raw)
To: Claire Chang
Cc: heikki.krogerus, peterz, grant.likely, paulus, Frank Rowand,
mingo, Marek Szyprowski, sstabellini, Saravana Kannan,
Joerg Roedel, rafael.j.wysocki, Christoph Hellwig,
Bartosz Golaszewski, xen-devel, Thierry Reding, linux-devicetree,
will, Konrad Rzeszutek Wilk, dan.j.williams, linuxppc-dev,
Rob Herring, boris.ostrovsky, Andy Shevchenko, jgross,
Nicolas Boichat, Greg KH, rdunlap, lkml, Tomasz Figa, iommu,
Jim Quinlan, xypron.glpk, Robin Murphy, bauerman
In-Reply-To: <CALiNf28sU1VtGB7LeTXExkMwQiCeg8N5arqyEjw0CPZP72R4dg@mail.gmail.com>
On 1/7/21 9:42 AM, Claire Chang wrote:
>> Can you explain how ATF gets involved and to what extent it does help,
>> besides enforcing a secure region from the ARM CPU's perpsective? Does
>> the PCIe root complex not have an IOMMU but can somehow be denied access
>> to a region that is marked NS=0 in the ARM CPU's MMU? If so, that is
>> still some sort of basic protection that the HW enforces, right?
>
> We need the ATF support for memory MPU (memory protection unit).
> Restricted DMA (with reserved-memory in dts) makes sure the predefined memory
> region is for PCIe DMA only, but we still need MPU to locks down PCIe access to
> that specific regions.
OK so you do have a protection unit of some sort to enforce which region
in DRAM the PCIE bridge is allowed to access, that makes sense,
otherwise the restricted DMA region would only be a hint but nothing you
can really enforce. This is almost entirely analogous to our systems then.
There may be some value in standardizing on an ARM SMCCC call then since
you already support two different SoC vendors.
>
>>
>> On Broadcom STB SoCs we have had something similar for a while however
>> and while we don't have an IOMMU for the PCIe bridge, we do have a a
>> basic protection mechanism whereby we can configure a region in DRAM to
>> be PCIe read/write and CPU read/write which then gets used as the PCIe
>> inbound region for the PCIe EP. By default the PCIe bridge is not
>> allowed access to DRAM so we must call into a security agent to allow
>> the PCIe bridge to access the designated DRAM region.
>>
>> We have done this using a private CMA area region assigned via Device
>> Tree, assigned with a and requiring the PCIe EP driver to use
>> dma_alloc_from_contiguous() in order to allocate from this device
>> private CMA area. The only drawback with that approach is that it
>> requires knowing how much memory you need up front for buffers and DMA
>> descriptors that the PCIe EP will need to process. The problem is that
>> it requires driver modifications and that does not scale over the number
>> of PCIe EP drivers, some we absolutely do not control, but there is no
>> need to bounce buffer. Your approach scales better across PCIe EP
>> drivers however it does require bounce buffering which could be a
>> performance hit.
>
> Only the streaming DMA (map/unmap) needs bounce buffering.
True, and typically only on transmit since you don't really control
where the sk_buff are allocated from, right? On RX since you need to
hand buffer addresses to the WLAN chip prior to DMA, you can allocate
them from a pool that already falls within the restricted DMA region, right?
> I also added alloc/free support in this series
> (https://lore.kernel.org/patchwork/patch/1360995/), so dma_direct_alloc() will
> try to allocate memory from the predefined memory region.
>
> As for the performance hit, it should be similar to the default swiotlb.
> Here are my experiment results. Both SoCs lack IOMMU for PCIe.
>
> PCIe wifi vht80 throughput -
>
> MTK SoC tcp_tx tcp_rx udp_tx udp_rx
> w/o Restricted DMA 244.1 134.66 312.56 350.79
> w/ Restricted DMA 246.95 136.59 363.21 351.99
>
> Rockchip SoC tcp_tx tcp_rx udp_tx udp_rx
> w/o Restricted DMA 237.87 133.86 288.28 361.88
> w/ Restricted DMA 256.01 130.95 292.28 353.19
How come you get better throughput with restricted DMA? Is it because
doing DMA to/from a contiguous region allows for better grouping of
transactions from the DRAM controller's perspective somehow?
>
> The CPU usage doesn't increase too much either.
> Although I didn't measure the CPU usage very precisely, it's ~3% with a single
> big core (Cortex-A72) and ~5% with a single small core (Cortex-A53).
>
> Thanks!
>
>>
>> Thanks!
>> --
>> Florian
--
Florian
^ permalink raw reply
* Re: [RFC PATCH v3 2/6] swiotlb: Add restricted DMA pool
From: Konrad Rzeszutek Wilk @ 2021-01-07 17:57 UTC (permalink / raw)
To: Claire Chang
Cc: heikki.krogerus, peterz, grant.likely, paulus, Frank Rowand,
mingo, Marek Szyprowski, sstabellini, Saravana Kannan,
list@263.net:IOMMU DRIVERS <iommu@lists.linux-foundation.org>, Joerg Roedel <joro@8bytes.org>, ,
rafael.j.wysocki, Christoph Hellwig, Bartosz Golaszewski,
xen-devel, Thierry Reding, linux-devicetree, will, dan.j.williams,
linuxppc-dev, Rob Herring, boris.ostrovsky, Andy Shevchenko,
jgross, Nicolas Boichat, Greg KH, rdunlap, lkml, Tomasz Figa,
list@263.net:IOMMU DRIVERS <iommu@lists.linux-foundation.org>, Joerg Roedel <joro@8bytes.org>, ,
xypron.glpk, Robin Murphy, bauerman
In-Reply-To: <CALiNf2-HDf6tFcvVgCttr-ta=88ZMH=OvB5XoryTPc6MNvwV+Q@mail.gmail.com>
On Fri, Jan 08, 2021 at 01:39:18AM +0800, Claire Chang wrote:
> Hi Greg and Konrad,
>
> This change is intended to be non-arch specific. Any arch that lacks DMA access
> control and has devices not behind an IOMMU can make use of it. Could you share
> why you think this should be arch specific?
The idea behind non-arch specific code is it to be generic. The devicetree
is specific to PowerPC, Sparc, and ARM, and not to x86 - hence it should
be in arch specific code.
>
> Thanks!
^ permalink raw reply
* Re: [RFC PATCH v3 0/6] Restricted DMA
From: Claire Chang @ 2021-01-07 17:42 UTC (permalink / raw)
To: Florian Fainelli
Cc: heikki.krogerus, peterz, grant.likely, paulus, Frank Rowand,
mingo, Marek Szyprowski, sstabellini, Saravana Kannan,
list@263.net:IOMMU DRIVERS <iommu@lists.linux-foundation.org>, Joerg Roedel <joro@8bytes.org>, ,
rafael.j.wysocki, Christoph Hellwig, Bartosz Golaszewski,
xen-devel, Thierry Reding, linux-devicetree, will,
Konrad Rzeszutek Wilk, dan.j.williams, linuxppc-dev, Rob Herring,
boris.ostrovsky, Andy Shevchenko, jgross, Nicolas Boichat,
Greg KH, rdunlap, lkml, Tomasz Figa,
list@263.net:IOMMU DRIVERS <iommu@lists.linux-foundation.org>, Joerg Roedel <joro@8bytes.org>, ,
Jim Quinlan, xypron.glpk, Robin Murphy, bauerman
In-Reply-To: <d7043239-12cf-3636-4726-2e3b90917dc6@gmail.com>
On Thu, Jan 7, 2021 at 2:48 AM Florian Fainelli <f.fainelli@gmail.com> wrote:
>
> Hi,
>
> First of all let me say that I am glad that someone is working on a
> upstream solution for this issue, would appreciate if you could CC and
> Jim Quinlan on subsequent submissions.
Sure!
>
> On 1/5/21 7:41 PM, Claire Chang wrote:
> > This series implements mitigations for lack of DMA access control on
> > systems without an IOMMU, which could result in the DMA accessing the
> > system memory at unexpected times and/or unexpected addresses, possibly
> > leading to data leakage or corruption.
> >
> > For example, we plan to use the PCI-e bus for Wi-Fi and that PCI-e bus is
> > not behind an IOMMU. As PCI-e, by design, gives the device full access to
> > system memory, a vulnerability in the Wi-Fi firmware could easily escalate
> > to a full system exploit (remote wifi exploits: [1a], [1b] that shows a
> > full chain of exploits; [2], [3]).
> >
> > To mitigate the security concerns, we introduce restricted DMA. Restricted
> > DMA utilizes the existing swiotlb to bounce streaming DMA in and out of a
> > specially allocated region and does memory allocation from the same region.
> > The feature on its own provides a basic level of protection against the DMA
> > overwriting buffer contents at unexpected times. However, to protect
> > against general data leakage and system memory corruption, the system needs
> > to provide a way to restrict the DMA to a predefined memory region (this is
> > usually done at firmware level, e.g. in ATF on some ARM platforms).
>
> Can you explain how ATF gets involved and to what extent it does help,
> besides enforcing a secure region from the ARM CPU's perpsective? Does
> the PCIe root complex not have an IOMMU but can somehow be denied access
> to a region that is marked NS=0 in the ARM CPU's MMU? If so, that is
> still some sort of basic protection that the HW enforces, right?
We need the ATF support for memory MPU (memory protection unit).
Restricted DMA (with reserved-memory in dts) makes sure the predefined memory
region is for PCIe DMA only, but we still need MPU to locks down PCIe access to
that specific regions.
>
> On Broadcom STB SoCs we have had something similar for a while however
> and while we don't have an IOMMU for the PCIe bridge, we do have a a
> basic protection mechanism whereby we can configure a region in DRAM to
> be PCIe read/write and CPU read/write which then gets used as the PCIe
> inbound region for the PCIe EP. By default the PCIe bridge is not
> allowed access to DRAM so we must call into a security agent to allow
> the PCIe bridge to access the designated DRAM region.
>
> We have done this using a private CMA area region assigned via Device
> Tree, assigned with a and requiring the PCIe EP driver to use
> dma_alloc_from_contiguous() in order to allocate from this device
> private CMA area. The only drawback with that approach is that it
> requires knowing how much memory you need up front for buffers and DMA
> descriptors that the PCIe EP will need to process. The problem is that
> it requires driver modifications and that does not scale over the number
> of PCIe EP drivers, some we absolutely do not control, but there is no
> need to bounce buffer. Your approach scales better across PCIe EP
> drivers however it does require bounce buffering which could be a
> performance hit.
Only the streaming DMA (map/unmap) needs bounce buffering.
I also added alloc/free support in this series
(https://lore.kernel.org/patchwork/patch/1360995/), so dma_direct_alloc() will
try to allocate memory from the predefined memory region.
As for the performance hit, it should be similar to the default swiotlb.
Here are my experiment results. Both SoCs lack IOMMU for PCIe.
PCIe wifi vht80 throughput -
MTK SoC tcp_tx tcp_rx udp_tx udp_rx
w/o Restricted DMA 244.1 134.66 312.56 350.79
w/ Restricted DMA 246.95 136.59 363.21 351.99
Rockchip SoC tcp_tx tcp_rx udp_tx udp_rx
w/o Restricted DMA 237.87 133.86 288.28 361.88
w/ Restricted DMA 256.01 130.95 292.28 353.19
The CPU usage doesn't increase too much either.
Although I didn't measure the CPU usage very precisely, it's ~3% with a single
big core (Cortex-A72) and ~5% with a single small core (Cortex-A53).
Thanks!
>
> Thanks!
> --
> Florian
^ permalink raw reply
* Re: [RFC PATCH v3 5/6] dt-bindings: of: Add restricted DMA pool
From: Claire Chang @ 2021-01-07 17:39 UTC (permalink / raw)
To: Konrad Rzeszutek Wilk
Cc: heikki.krogerus, peterz, grant.likely, paulus, Frank Rowand,
mingo, Marek Szyprowski, sstabellini, Saravana Kannan,
list@263.net:IOMMU DRIVERS <iommu@lists.linux-foundation.org>, Joerg Roedel <joro@8bytes.org>, ,
rafael.j.wysocki, Christoph Hellwig, Bartosz Golaszewski,
xen-devel, Thierry Reding, linux-devicetree, will, dan.j.williams,
linuxppc-dev, Rob Herring, boris.ostrovsky, Andy Shevchenko,
jgross, Nicolas Boichat, Greg KH, rdunlap, lkml, Tomasz Figa,
list@263.net:IOMMU DRIVERS <iommu@lists.linux-foundation.org>, Joerg Roedel <joro@8bytes.org>, ,
xypron.glpk, Robin Murphy, bauerman
In-Reply-To: <20210106185757.GB109735@localhost.localdomain>
On Thu, Jan 7, 2021 at 2:58 AM Konrad Rzeszutek Wilk
<konrad.wilk@oracle.com> wrote:
>
> On Wed, Jan 06, 2021 at 11:41:23AM +0800, Claire Chang wrote:
> > Introduce the new compatible string, restricted-dma-pool, for restricted
> > DMA. One can specify the address and length of the restricted DMA memory
> > region by restricted-dma-pool in the device tree.
> >
> > Signed-off-by: Claire Chang <tientzu@chromium.org>
> > ---
> > .../reserved-memory/reserved-memory.txt | 24 +++++++++++++++++++
> > 1 file changed, 24 insertions(+)
> >
> > diff --git a/Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt b/Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt
> > index e8d3096d922c..44975e2a1fd2 100644
> > --- a/Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt
> > +++ b/Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt
> > @@ -51,6 +51,20 @@ compatible (optional) - standard definition
> > used as a shared pool of DMA buffers for a set of devices. It can
> > be used by an operating system to instantiate the necessary pool
> > management subsystem if necessary.
> > + - restricted-dma-pool: This indicates a region of memory meant to be
> > + used as a pool of restricted DMA buffers for a set of devices. The
> > + memory region would be the only region accessible to those devices.
> > + When using this, the no-map and reusable properties must not be set,
> > + so the operating system can create a virtual mapping that will be used
> > + for synchronization. The main purpose for restricted DMA is to
> > + mitigate the lack of DMA access control on systems without an IOMMU,
> > + which could result in the DMA accessing the system memory at
> > + unexpected times and/or unexpected addresses, possibly leading to data
> > + leakage or corruption. The feature on its own provides a basic level
> > + of protection against the DMA overwriting buffer contents at
> > + unexpected times. However, to protect against general data leakage and
> > + system memory corruption, the system needs to provide way to restrict
> > + the DMA to a predefined memory region.
>
> Heya!
>
> I think I am missing something obvious here so please bear with my
> questions:
>
> - This code adds the means of having the SWIOTLB pool tied to a specific
> memory correct?
It doesn't affect the existing SWIOTLB. It just utilizes the existing SWIOTLB
code to create another DMA pool tied to a specific memory region for a given set
of devices. It bounces the streaming DMA (map/unmap) in and out of that region
and does the memory allocation (dma_direct_alloc) from the same region.
>
>
> - Nothing stops the physical device from bypassing the SWIOTLB buffer.
> That is if an errant device screwed up the length or DMA address, the
> SWIOTLB would gladly do what the device told it do?
So the system needs to provide a way to lock down the memory access, e.g. MPU.
>
> - This has to be combined with SWIOTLB-force-ish to always use the
> bounce buffer, otherwise you could still do DMA without using
> SWIOTLB (by not hitting the criteria for needing to use SWIOTLB)?
Since restricted DMA is for the devices that are not behind an IOMMU, I change
the criteria
`if (unlikely(swiotlb_force == SWIOTLB_FORCE))`
to
`if (unlikely(swiotlb_force == SWIOTLB_FORCE) || dev->dma_io_tlb_mem)`
in dma_direct_map_page().
Also, even if SWIOTLB=force, the restricted DMA pool is preferred if available
(get_io_tlb_mem in https://lore.kernel.org/patchwork/patch/1360995/).
Thanks!
^ permalink raw reply
* Re: [RFC PATCH v3 2/6] swiotlb: Add restricted DMA pool
From: Claire Chang @ 2021-01-07 17:39 UTC (permalink / raw)
To: Konrad Rzeszutek Wilk
Cc: heikki.krogerus, peterz, grant.likely, paulus, Frank Rowand,
mingo, Marek Szyprowski, sstabellini, Saravana Kannan,
list@263.net:IOMMU DRIVERS <iommu@lists.linux-foundation.org>, Joerg Roedel <joro@8bytes.org>, ,
rafael.j.wysocki, Christoph Hellwig, Bartosz Golaszewski,
xen-devel, Thierry Reding, linux-devicetree, will, dan.j.williams,
linuxppc-dev, Rob Herring, boris.ostrovsky, Andy Shevchenko,
jgross, Nicolas Boichat, Greg KH, rdunlap, lkml, Tomasz Figa,
list@263.net:IOMMU DRIVERS <iommu@lists.linux-foundation.org>, Joerg Roedel <joro@8bytes.org>, ,
xypron.glpk, Robin Murphy, bauerman
In-Reply-To: <20210106185241.GA109735@localhost.localdomain>
Hi Greg and Konrad,
This change is intended to be non-arch specific. Any arch that lacks DMA access
control and has devices not behind an IOMMU can make use of it. Could you share
why you think this should be arch specific?
Thanks!
^ permalink raw reply
* [PATCH v2] powerpc/mce: Remove per cpu variables from MCE handlers
From: Ganesh Goudar @ 2021-01-07 11:00 UTC (permalink / raw)
To: linuxppc-dev, mpe; +Cc: Ganesh Goudar, mahesh, npiggin
Access to per-cpu variables requires translation to be enabled on
pseries machine running in hash mmu mode, Since part of MCE handler
runs in realmode and part of MCE handling code is shared between ppc
architectures pseries and powernv, it becomes difficult to manage
these variables differently on different architectures, So have
these variables in paca instead of having them as per-cpu variables
to avoid complications.
Maximum recursive depth of MCE is 4, Considering the maximum depth
allowed reduce the size of event to 10 from 100.
Signed-off-by: Ganesh Goudar <ganeshgr@linux.ibm.com>
---
v2: Dynamically allocate memory for machine check event info
---
arch/powerpc/include/asm/mce.h | 21 +++++++-
arch/powerpc/include/asm/paca.h | 4 ++
arch/powerpc/kernel/mce.c | 86 ++++++++++++++++++------------
arch/powerpc/kernel/setup-common.c | 2 +-
4 files changed, 78 insertions(+), 35 deletions(-)
diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
index e6c27ae843dc..8d6e3a7a9f37 100644
--- a/arch/powerpc/include/asm/mce.h
+++ b/arch/powerpc/include/asm/mce.h
@@ -204,7 +204,18 @@ struct mce_error_info {
bool ignore_event;
};
-#define MAX_MC_EVT 100
+#define MAX_MC_EVT 10
+
+struct mce_info {
+ int mce_nest_count;
+ struct machine_check_event mce_event[MAX_MC_EVT];
+ /* Queue for delayed MCE events. */
+ int mce_queue_count;
+ struct machine_check_event mce_event_queue[MAX_MC_EVT];
+ /* Queue for delayed MCE UE events. */
+ int mce_ue_count;
+ struct machine_check_event mce_ue_event_queue[MAX_MC_EVT];
+};
/* Release flags for get_mce_event() */
#define MCE_EVENT_RELEASE true
@@ -233,5 +244,13 @@ long __machine_check_early_realmode_p7(struct pt_regs *regs);
long __machine_check_early_realmode_p8(struct pt_regs *regs);
long __machine_check_early_realmode_p9(struct pt_regs *regs);
long __machine_check_early_realmode_p10(struct pt_regs *regs);
+#define get_mce_info() local_paca->mce_info
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
+#ifdef CONFIG_PPC_BOOK3S_64
+void mce_init(void);
+#else
+static inline void mce_init(void) { };
#endif /* CONFIG_PPC_BOOK3S_64 */
+
#endif /* __ASM_PPC64_MCE_H__ */
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 9454d29ff4b4..38e0c55e845d 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -29,6 +29,7 @@
#include <asm/hmi.h>
#include <asm/cpuidle.h>
#include <asm/atomic.h>
+#include <asm/mce.h>
#include <asm-generic/mmiowb_types.h>
@@ -273,6 +274,9 @@ struct paca_struct {
#ifdef CONFIG_MMIOWB
struct mmiowb_state mmiowb_state;
#endif
+#ifdef CONFIG_PPC_BOOK3S_64
+ struct mce_info *mce_info;
+#endif /* CONFIG_PPC_BOOK3S_64 */
} ____cacheline_aligned;
extern void copy_mm_to_paca(struct mm_struct *mm);
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index 9f3e133b57b7..14142ddbedf2 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -17,23 +17,12 @@
#include <linux/irq_work.h>
#include <linux/extable.h>
#include <linux/ftrace.h>
+#include <linux/memblock.h>
#include <asm/machdep.h>
#include <asm/mce.h>
#include <asm/nmi.h>
-static DEFINE_PER_CPU(int, mce_nest_count);
-static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event);
-
-/* Queue for delayed MCE events. */
-static DEFINE_PER_CPU(int, mce_queue_count);
-static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event_queue);
-
-/* Queue for delayed MCE UE events. */
-static DEFINE_PER_CPU(int, mce_ue_count);
-static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT],
- mce_ue_event_queue);
-
static void machine_check_process_queued_event(struct irq_work *work);
static void machine_check_ue_irq_work(struct irq_work *work);
static void machine_check_ue_event(struct machine_check_event *evt);
@@ -103,8 +92,8 @@ void save_mce_event(struct pt_regs *regs, long handled,
struct mce_error_info *mce_err,
uint64_t nip, uint64_t addr, uint64_t phys_addr)
{
- int index = __this_cpu_inc_return(mce_nest_count) - 1;
- struct machine_check_event *mce = this_cpu_ptr(&mce_event[index]);
+ int index = get_mce_info()->mce_nest_count++;
+ struct machine_check_event *mce = &get_mce_info()->mce_event[index];
/*
* Return if we don't have enough space to log mce event.
@@ -191,7 +180,7 @@ void save_mce_event(struct pt_regs *regs, long handled,
*/
int get_mce_event(struct machine_check_event *mce, bool release)
{
- int index = __this_cpu_read(mce_nest_count) - 1;
+ int index = get_mce_info()->mce_nest_count - 1;
struct machine_check_event *mc_evt;
int ret = 0;
@@ -201,7 +190,7 @@ int get_mce_event(struct machine_check_event *mce, bool release)
/* Check if we have MCE info to process. */
if (index < MAX_MC_EVT) {
- mc_evt = this_cpu_ptr(&mce_event[index]);
+ mc_evt = &get_mce_info()->mce_event[index];
/* Copy the event structure and release the original */
if (mce)
*mce = *mc_evt;
@@ -211,7 +200,7 @@ int get_mce_event(struct machine_check_event *mce, bool release)
}
/* Decrement the count to free the slot. */
if (release)
- __this_cpu_dec(mce_nest_count);
+ get_mce_info()->mce_nest_count--;
return ret;
}
@@ -233,13 +222,13 @@ static void machine_check_ue_event(struct machine_check_event *evt)
{
int index;
- index = __this_cpu_inc_return(mce_ue_count) - 1;
+ index = get_mce_info()->mce_ue_count++;
/* If queue is full, just return for now. */
if (index >= MAX_MC_EVT) {
- __this_cpu_dec(mce_ue_count);
+ get_mce_info()->mce_ue_count--;
return;
}
- memcpy(this_cpu_ptr(&mce_ue_event_queue[index]), evt, sizeof(*evt));
+ memcpy(&get_mce_info()->mce_ue_event_queue[index], evt, sizeof(*evt));
/* Queue work to process this event later. */
irq_work_queue(&mce_ue_event_irq_work);
@@ -256,13 +245,13 @@ void machine_check_queue_event(void)
if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
return;
- index = __this_cpu_inc_return(mce_queue_count) - 1;
+ index = get_mce_info()->mce_queue_count++;
/* If queue is full, just return for now. */
if (index >= MAX_MC_EVT) {
- __this_cpu_dec(mce_queue_count);
+ get_mce_info()->mce_queue_count--;
return;
}
- memcpy(this_cpu_ptr(&mce_event_queue[index]), &evt, sizeof(evt));
+ memcpy(&get_mce_info()->mce_event_queue[index], &evt, sizeof(evt));
/* Queue irq work to process this event later. */
irq_work_queue(&mce_event_process_work);
@@ -289,9 +278,9 @@ static void machine_process_ue_event(struct work_struct *work)
int index;
struct machine_check_event *evt;
- while (__this_cpu_read(mce_ue_count) > 0) {
- index = __this_cpu_read(mce_ue_count) - 1;
- evt = this_cpu_ptr(&mce_ue_event_queue[index]);
+ while (get_mce_info()->mce_ue_count > 0) {
+ index = get_mce_info()->mce_ue_count - 1;
+ evt = &get_mce_info()->mce_ue_event_queue[index];
blocking_notifier_call_chain(&mce_notifier_list, 0, evt);
#ifdef CONFIG_MEMORY_FAILURE
/*
@@ -304,7 +293,7 @@ static void machine_process_ue_event(struct work_struct *work)
*/
if (evt->error_type == MCE_ERROR_TYPE_UE) {
if (evt->u.ue_error.ignore_event) {
- __this_cpu_dec(mce_ue_count);
+ get_mce_info()->mce_ue_count--;
continue;
}
@@ -320,7 +309,7 @@ static void machine_process_ue_event(struct work_struct *work)
"was generated\n");
}
#endif
- __this_cpu_dec(mce_ue_count);
+ get_mce_info()->mce_ue_count--;
}
}
/*
@@ -338,17 +327,17 @@ static void machine_check_process_queued_event(struct irq_work *work)
* For now just print it to console.
* TODO: log this error event to FSP or nvram.
*/
- while (__this_cpu_read(mce_queue_count) > 0) {
- index = __this_cpu_read(mce_queue_count) - 1;
- evt = this_cpu_ptr(&mce_event_queue[index]);
+ while (get_mce_info()->mce_queue_count > 0) {
+ index = get_mce_info()->mce_queue_count - 1;
+ evt = &get_mce_info()->mce_event_queue[index];
if (evt->error_type == MCE_ERROR_TYPE_UE &&
evt->u.ue_error.ignore_event) {
- __this_cpu_dec(mce_queue_count);
+ get_mce_info()->mce_queue_count--;
continue;
}
machine_check_print_event_info(evt, false, false);
- __this_cpu_dec(mce_queue_count);
+ get_mce_info()->mce_queue_count--;
}
}
@@ -741,3 +730,34 @@ long hmi_exception_realmode(struct pt_regs *regs)
return 1;
}
+
+void __init mce_init(void)
+{
+ int i, size;
+ struct mce_info *mce_info;
+
+ if (!radix_enabled() && firmware_has_feature(FW_FEATURE_LPAR)) {
+ size = sizeof(struct mce_info) * num_possible_cpus();
+ mce_info = memblock_alloc_try_nid(size, sizeof(struct mce_info),
+ MEMBLOCK_LOW_LIMIT,
+ ppc64_rma_size,
+ NUMA_NO_NODE);
+ if (!mce_info)
+ goto err;
+ for_each_possible_cpu(i)
+ paca_ptrs[i]->mce_info = mce_info + i;
+ return;
+ }
+ for_each_possible_cpu(i) {
+ mce_info =
+ memblock_alloc_node(sizeof(struct mce_info),
+ __alignof__(*paca_ptrs[i]->mce_info),
+ cpu_to_node(i));
+ if (!mce_info)
+ goto err;
+ paca_ptrs[i]->mce_info = mce_info;
+ }
+ return;
+err:
+ panic("Failed allocate memory MCE event data\n");
+}
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 71f38e9248be..17dc451f0e45 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -916,7 +916,6 @@ void __init setup_arch(char **cmdline_p)
/* On BookE, setup per-core TLB data structures. */
setup_tlb_core_data();
#endif
-
/* Print various info about the machine that has been gathered so far. */
print_system_info();
@@ -938,6 +937,7 @@ void __init setup_arch(char **cmdline_p)
exc_lvl_early_init();
emergency_stack_init();
+ mce_init();
smp_release_cpus();
initmem_init();
--
2.26.2
^ permalink raw reply related
* Re: [PATCH v3 1/2] KVM: PPC: Book3S HV: Add support for H_RPT_INVALIDATE
From: Bharata B Rao @ 2021-01-07 4:08 UTC (permalink / raw)
To: Fabiano Rosas; +Cc: aneesh.kumar, npiggin, kvm-ppc, linuxppc-dev, david
In-Reply-To: <87k0sp95g0.fsf@linux.ibm.com>
On Wed, Jan 06, 2021 at 05:27:27PM -0300, Fabiano Rosas wrote:
> Bharata B Rao <bharata@linux.ibm.com> writes:
> > +
> > +long kvmhv_h_rpti_nested(struct kvm_vcpu *vcpu, unsigned long lpid,
> > + unsigned long type, unsigned long pg_sizes,
> > + unsigned long start, unsigned long end)
> > +{
> > + struct kvm_nested_guest *gp;
> > + long ret;
> > + unsigned long psize, ap;
> > +
> > + /*
> > + * If L2 lpid isn't valid, we need to return H_PARAMETER.
> > + *
> > + * However, nested KVM issues a L2 lpid flush call when creating
> > + * partition table entries for L2. This happens even before the
> > + * corresponding shadow lpid is created in HV which happens in
> > + * H_ENTER_NESTED call. Since we can't differentiate this case from
> > + * the invalid case, we ignore such flush requests and return success.
> > + */
>
> So for a nested lpid the H_TLB_INVALIDATE in:
>
> kvmppc_core_init_vm_hv -> kvmppc_setup_partition_table ->
> kvmhv_set_ptbl_entry -> kvmhv_flush_lpid
>
> has always been a noop? It seems that we could just skip
> kvmhv_flush_lpid in L1 during init_vm then.
May be, but I suppose that flush is required and could be fixed
eventually.
Regards,
Bharata.
^ permalink raw reply
* [PATCH] powerpc/pseries/dlpar: handle ibm, configure-connector delay status
From: Nathan Lynch @ 2021-01-07 2:59 UTC (permalink / raw)
To: linuxppc-dev; +Cc: tyreld, brking
dlpar_configure_connector() has two problems in its handling of
ibm,configure-connector's return status:
1. When the status is -2 (busy, call again), we call
ibm,configure-connector again immediately without checking whether
to schedule, which can result in monopolizing the CPU.
2. Extended delay status (9900..9905) goes completely unhandled,
causing the configuration to unnecessarily terminate.
Fix both of these issues by using rtas_busy_delay().
Fixes: ab519a011caa ("powerpc/pseries: Kernel DLPAR Infrastructure")
Signed-off-by: Nathan Lynch <nathanl@linux.ibm.com>
---
arch/powerpc/platforms/pseries/dlpar.c | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c
index 16e86ba8aa20..f6b7749d6ada 100644
--- a/arch/powerpc/platforms/pseries/dlpar.c
+++ b/arch/powerpc/platforms/pseries/dlpar.c
@@ -127,7 +127,6 @@ void dlpar_free_cc_nodes(struct device_node *dn)
#define NEXT_PROPERTY 3
#define PREV_PARENT 4
#define MORE_MEMORY 5
-#define CALL_AGAIN -2
#define ERR_CFG_USE -9003
struct device_node *dlpar_configure_connector(__be32 drc_index,
@@ -168,6 +167,9 @@ struct device_node *dlpar_configure_connector(__be32 drc_index,
spin_unlock(&rtas_data_buf_lock);
+ if (rtas_busy_delay(rc))
+ continue;
+
switch (rc) {
case COMPLETE:
break;
@@ -216,9 +218,6 @@ struct device_node *dlpar_configure_connector(__be32 drc_index,
last_dn = last_dn->parent;
break;
- case CALL_AGAIN:
- break;
-
case MORE_MEMORY:
case ERR_CFG_USE:
default:
--
2.29.2
^ permalink raw reply related
* [PATCH] ibmvfc: fix missing cast of ibmvfc_event pointer to u64 handle
From: Tyrel Datwyler @ 2021-01-06 20:37 UTC (permalink / raw)
To: james.bottomley
Cc: Tyrel Datwyler, martin.petersen, linux-scsi, linux-kernel, brking,
linuxppc-dev, kernel test robot
Commit 2aa0102c6688 ("scsi: ibmvfc: Use correlation token to tag
commands") sets the vfcFrame correlation token to the pointer handle of
the associated ibmvfc_event. However, that commit failed to cast the
pointer to an appropriate type which in this case is a u64. As such
sparse warnings are generated for both correlation token assignments.
ibmvfc.c:2375:36: sparse: incorrect type in argument 1 (different base types)
ibmvfc.c:2375:36: sparse: expected unsigned long long [usertype] val
ibmvfc.c:2375:36: sparse: got struct ibmvfc_event *[assigned] evt
Add the apporpriate u64 casts when assigning an ibmvfc_event as a
correlation token.
Fixes: Commit 2aa0102c6688 ("scsi: ibmvfc: Use correlation token to tag commands")
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Tyrel Datwyler <tyreld@linux.ibm.com>
---
drivers/scsi/ibmvscsi/ibmvfc.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index 42e4d35e0d35..7312f31df878 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -1744,7 +1744,7 @@ static int ibmvfc_queuecommand_lck(struct scsi_cmnd *cmnd,
iu->pri_task_attr = IBMVFC_SIMPLE_TASK;
}
- vfc_cmd->correlation = cpu_to_be64(evt);
+ vfc_cmd->correlation = cpu_to_be64((u64)evt);
if (likely(!(rc = ibmvfc_map_sg_data(cmnd, evt, vfc_cmd, vhost->dev))))
return ibmvfc_send_event(evt, vhost, 0);
@@ -2418,7 +2418,7 @@ static int ibmvfc_abort_task_set(struct scsi_device *sdev)
tmf->flags = cpu_to_be16((IBMVFC_NO_MEM_DESC | IBMVFC_TMF));
evt->sync_iu = &rsp_iu;
- tmf->correlation = cpu_to_be64(evt);
+ tmf->correlation = cpu_to_be64((u64)evt);
init_completion(&evt->comp);
rsp_rc = ibmvfc_send_event(evt, vhost, default_timeout);
--
2.27.0
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox