* [PATCH v5] arm64: fpsimd: improve stacking logic in non-interruptible context
From: Ard Biesheuvel @ 2016-12-12 17:56 UTC (permalink / raw)
To: linux-arm-kernel
Currently, we allow kernel mode NEON in softirq or hardirq context by
stacking and unstacking a slice of the NEON register file for each call
to kernel_neon_begin() and kernel_neon_end(), respectively.
Given that
a) a CPU typically spends most of its time in userland, during which time
no kernel mode NEON in process context is in progress,
b) a CPU spends most of its time in the kernel doing other things than
kernel mode NEON when it gets interrupted to perform kernel mode NEON
in softirq context
the stacking and subsequent unstacking is only necessary if we are
interrupting a thread while it is performing kernel mode NEON in process
context, which means that in all other cases, we can simply preserve the
userland FPSIMD state once, and only restore it upon return to userland,
even if we are being invoked from softirq or hardirq context.
So instead of checking whether we are running in interrupt context, keep
track of the level of nested kernel mode NEON calls in progress, and only
perform the eager stack/unstack if the level exceeds 1.
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
arch/arm64/kernel/fpsimd.c | 64 +++++++++++++++++++++++++++++++++-------------
1 file changed, 46 insertions(+), 18 deletions(-)
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 394c61db5566..c19363775436 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -220,45 +220,73 @@ void fpsimd_flush_task_state(struct task_struct *t)
#ifdef CONFIG_KERNEL_MODE_NEON
-static DEFINE_PER_CPU(struct fpsimd_partial_state, hardirq_fpsimdstate);
-static DEFINE_PER_CPU(struct fpsimd_partial_state, softirq_fpsimdstate);
+/*
+ * Although unlikely, it is possible for three kernel mode NEON contexts to
+ * be live at the same time: process context, softirq context and hardirq
+ * context. So while the userland context is stashed in the thread's fpsimd
+ * state structure, we need two additional levels of storage.
+ */
+static DEFINE_PER_CPU(struct fpsimd_partial_state, nested_fpsimdstate[2]);
+static DEFINE_PER_CPU(int, kernel_neon_nesting_level);
/*
* Kernel-side NEON support functions
*/
void kernel_neon_begin_partial(u32 num_regs)
{
- if (in_interrupt()) {
- struct fpsimd_partial_state *s = this_cpu_ptr(
- in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate);
+ struct fpsimd_partial_state *s;
+ int level;
- BUG_ON(num_regs > 32);
- fpsimd_save_partial_state(s, roundup(num_regs, 2));
- } else {
+ preempt_disable();
+
+ if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) {
/*
* Save the userland FPSIMD state if we have one and if we
* haven't done so already. Clear fpsimd_last_state to indicate
* that there is no longer userland FPSIMD state in the
* registers.
*/
- preempt_disable();
- if (current->mm &&
- !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE))
- fpsimd_save_state(¤t->thread.fpsimd_state);
+ if (current->mm) {
+ unsigned long flags;
+
+ local_irq_save(flags);
+ if (!test_and_set_thread_flag(TIF_FOREIGN_FPSTATE))
+ fpsimd_save_state(¤t->thread.fpsimd_state);
+ local_irq_restore(flags);
+ } else {
+ set_thread_flag(TIF_FOREIGN_FPSTATE);
+ }
this_cpu_write(fpsimd_last_state, NULL);
}
+
+ level = this_cpu_inc_return(kernel_neon_nesting_level);
+ BUG_ON(level > 3);
+
+ if (level > 1) {
+ s = this_cpu_ptr(nested_fpsimdstate);
+
+ WARN_ON_ONCE(num_regs > 32);
+ num_regs = min(roundup(num_regs, 2), 32U);
+
+ fpsimd_save_partial_state(&s[level - 2], num_regs);
+ }
}
EXPORT_SYMBOL(kernel_neon_begin_partial);
void kernel_neon_end(void)
{
- if (in_interrupt()) {
- struct fpsimd_partial_state *s = this_cpu_ptr(
- in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate);
- fpsimd_load_partial_state(s);
- } else {
- preempt_enable();
+ struct fpsimd_partial_state *s;
+ int level;
+
+ level = this_cpu_read(kernel_neon_nesting_level);
+ BUG_ON(level < 1);
+
+ if (level > 1) {
+ s = this_cpu_ptr(nested_fpsimdstate);
+ fpsimd_load_partial_state(&s[level - 2]);
}
+ this_cpu_dec(kernel_neon_nesting_level);
+ preempt_enable();
}
EXPORT_SYMBOL(kernel_neon_end);
--
2.7.4
^ permalink raw reply related
* [PATCH v4] arm64: fpsimd: improve stacking logic in non-interruptible context
From: Ard Biesheuvel @ 2016-12-12 17:55 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <20161212103512.GE1574@e103592.cambridge.arm.com>
On 12 December 2016 at 10:35, Dave Martin <Dave.Martin@arm.com> wrote:
> On Fri, Dec 09, 2016 at 08:57:20PM +0000, Ard Biesheuvel wrote:
>> On 9 December 2016 at 19:29, Dave Martin <Dave.Martin@arm.com> wrote:
>> > On Fri, Dec 09, 2016 at 06:21:55PM +0000, Catalin Marinas wrote:
>> >> On Fri, Dec 09, 2016 at 04:46:32PM +0000, Ard Biesheuvel wrote:
>> >> > void kernel_neon_begin_partial(u32 num_regs)
>> >> > {
>> >> > - if (in_interrupt()) {
>> >> > - struct fpsimd_partial_state *s = this_cpu_ptr(
>> >> > - in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate);
>> >> > + struct fpsimd_partial_state *s;
>> >> > + int level;
>> >> > +
>> >> > + preempt_disable();
>> >> > +
>> >> > + level = this_cpu_inc_return(kernel_neon_nesting_level);
>> >> > + BUG_ON(level > 3);
>> >> > +
>> >> > + if (level > 1) {
>> >> > + s = this_cpu_ptr(nested_fpsimdstate);
>> >> >
>> >> > - BUG_ON(num_regs > 32);
>> >> > - fpsimd_save_partial_state(s, roundup(num_regs, 2));
>> >> > + WARN_ON_ONCE(num_regs > 32);
>> >> > + num_regs = min(roundup(num_regs, 2), 32U);
>> >> > +
>> >> > + fpsimd_save_partial_state(&s[level - 2], num_regs);
>> >> > } else {
>> >> > /*
>> >> > * Save the userland FPSIMD state if we have one and if we
>> >> > @@ -241,7 +256,6 @@ void kernel_neon_begin_partial(u32 num_regs)
>> >> > * that there is no longer userland FPSIMD state in the
>> >> > * registers.
>> >> > */
>> >> > - preempt_disable();
>> >> > if (current->mm &&
>> >> > !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE))
>> >> > fpsimd_save_state(¤t->thread.fpsimd_state);
>> >>
>> >> I wonder whether we could actually do this saving and flag/level setting
>> >> in reverse to simplify the races. Something like your previous patch but
>> >> only set TIF_FOREIGN_FPSTATE after saving:
>> >>
>> >> level = this_cpu_read(kernel_neon_nesting_level);
>> >> if (level > 0) {
>> >> ...
>> >> fpsimd_save_partial_state();
>> >> } else {
>> >> if (!test_thread_flag(TIF_FOREIGN_FPSTATE))
>> >> fpsimd_save_state();
>> >> set_thread_flag(TIF_FOREIGN_FPSTATE);
>> >> }
>> >> this_cpu_inc(kernel_neon_nesting_level);
>> >>
>> >> There is a risk of extra saving if we get an interrupt after
>> >> test_thread_flag() and before set_thread_flag() but I don't think this
>> >> would corrupt any state, just writing things twice.
>> >
>> > I would worry that we can save two states over the same buffer and then
>> > restore an uninitialised buffer in this case unless we are careful.
>> > Because the level-dependent code is now misbracketed by the inc/dec,
>> > a preempting call races with the outer call and use the same value.
>> >
>> > I guess we could do
>> >
>> > if (!test_thread_flag(TIF_FOREIGN_FPSTATE))
>> > fpsimd_save_state();
>> > clear_thread_flag(TIF_FOREIGN_FPSTATE);
>> >
>> > at the start unconditionally, before the _inc_return().
>> >
>> > The task state may then get saved in the middle of being saved, but
>> > as you say it shouldn't have changed in the meantime.
>>
>> It /will/ have changed in the meantime: when the interrupted context
>> is resumed, it will happily proceed with saving the state where it
>> left off, but now the register file contains whatever was left after
>> the interrupt handler is done with the NEON.
>
> Hmmm, true. The NEON regs will have been restored by kernel_neon_end()
> in the inner context, but the extra SVE bits won't have been.
>
Even worse: both the interrupter and the interruptee think they are
preserving the userland context, so once the interrupter is done, it
will not restore the context as it found it. The interruptee will then
proceed and write whatever is left in those registers into the saved
state.
>>
>> > The nested
>> > save code may then do a partial save of the same state on top of that
>> > which could get restored at the inner kernel_neon_end() call.
>> >
>>
>> I'm afraid the only way to deal with this correctly is to treat the
>> whole sequence as a critical section, which means execute it with
>> interrupts disabled.
>
> Or we make the KERNEL_MODE_NEON code SVE-aware, which is where I started
> off. In that case, we do SVE (partial) save/restore whenever
> kernel_mode_neon() is called with live SVE state. The change here is
> that would we consider that there is always live SVE state until the
> fpsimd_save_state() actually finishes at the outer level. We may want
> to delay setting of TIF_FOREIGN_FPSTATE for that purpose.
>
> This means you do take an additional latency hit if you want to use NEON
> in an interrupting context and there happens to be live SVE state. It's
> a consequence of the architecture though -- I don't think there's any
> way to get around it. We can still scale the cost by implementing
> sve_save_partial_state() or something equivalent.
>
> You original inc()+save() ... restore()+dec() seems sound enough if
> viewed this way. Unless I'm missing something?
>
I think having a small critical section is not so bad. Let me send out
a v5 so we can discuss ...
^ permalink raw reply
* [PATCH] watchdog: bcm2835_wdt: set WDOG_HW_RUNNING bit when appropriate
From: Eric Anholt @ 2016-12-12 17:46 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <1481536123-9279-1-git-send-email-rasmus.villemoes@prevas.dk>
Rasmus Villemoes <rasmus.villemoes@prevas.dk> writes:
> A bootloader may start the watchdog device before handing control to
> the kernel - in that case, we should tell the kernel about it so the
> watchdog framework can keep it alive until userspace opens
> /dev/watchdog0.
I don't believe our current bootloaders (the closed firmware or u-boot)
set up the watchdog, but this seems reasonable since they might want to
later.
Acked-by: Eric Anholt <eric@anholt.net>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 832 bytes
Desc: not available
URL: <http://lists.infradead.org/pipermail/linux-arm-kernel/attachments/20161212/673b1918/attachment.sig>
^ permalink raw reply
* [PATCH] crypto: arm64/aes: reimplement bit-sliced ARM/NEON implementation for arm64
From: Ard Biesheuvel @ 2016-12-12 17:45 UTC (permalink / raw)
To: linux-arm-kernel
This is a reimplementation of the NEON version of the bit-sliced AES
algorithm. This code is heavily based on Andy Polyakov's OpenSSL version
for ARM, which is also available in the kernel. This is an alternative for
the existing NEON implementation for arm64 authored by me, which suffers
from poor performance due to its reliance on the pathologically slow four
register variant of the tbl/tbx NEON instruction.
This version is about ~30% (*) faster than the generic C code, but only in
cases where the input can be 8x interleaved (this is a fundamental property
of bit slicing). For this reason, only the chaining modes ECB, XTS and CTR
are implemented. (The significance of ECB is that it could potentially be
used by other chaining modes)
* Measured on Cortex-A57. Note that this is still an order of magnitude
slower than the implementations that use the dedicated AES instructions
introduced in ARMv8, but those are part of an optional extension, and so
it is good to have a fallback.
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
arch/arm64/crypto/Kconfig | 6 +
arch/arm64/crypto/Makefile | 3 +
arch/arm64/crypto/aes-neonbs-core.S | 905 ++++++++++++++++++++++++++++++++++++
arch/arm64/crypto/aes-neonbs-glue.c | 300 ++++++++++++
4 files changed, 1214 insertions(+)
create mode 100644 arch/arm64/crypto/aes-neonbs-core.S
create mode 100644 arch/arm64/crypto/aes-neonbs-glue.c
diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
index 450a85df041a..cd0e7a6146b7 100644
--- a/arch/arm64/crypto/Kconfig
+++ b/arch/arm64/crypto/Kconfig
@@ -72,4 +72,10 @@ config CRYPTO_CRC32_ARM64
depends on ARM64
select CRYPTO_HASH
+config CRYPTO_AES_NEON_BS
+ tristate "AES in ECB/CBC/CTR/XTS modes using bit-sliced NEON algorithm"
+ depends on KERNEL_MODE_NEON
+ select CRYPTO_BLKCIPHER
+ select CRYPTO_AES
+
endif
diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile
index aa8888d7b744..11d20714ec48 100644
--- a/arch/arm64/crypto/Makefile
+++ b/arch/arm64/crypto/Makefile
@@ -41,6 +41,9 @@ sha256-arm64-y := sha256-glue.o sha256-core.o
obj-$(CONFIG_CRYPTO_SHA512_ARM64) += sha512-arm64.o
sha512-arm64-y := sha512-glue.o sha512-core.o
+obj-$(CONFIG_CRYPTO_AES_NEON_BS) += aes-neon-bs.o
+aes-neon-bs-y := aes-neonbs-core.o aes-neonbs-glue.o
+
AFLAGS_aes-ce.o := -DINTERLEAVE=4
AFLAGS_aes-neon.o := -DINTERLEAVE=4
diff --git a/arch/arm64/crypto/aes-neonbs-core.S b/arch/arm64/crypto/aes-neonbs-core.S
new file mode 100644
index 000000000000..d027c276cc75
--- /dev/null
+++ b/arch/arm64/crypto/aes-neonbs-core.S
@@ -0,0 +1,905 @@
+/*
+ * Bit sliced AES using NEON instructions
+ *
+ * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/*
+ * The algorithm implemented here is described in detail by the paper
+ * 'Faster and Timing-Attack Resistant AES-GCM' by Emilia Kaesper and
+ * Peter Schwabe (https://eprint.iacr.org/2009/129.pdf)
+ *
+ * This implementation is based primarily on the OpenSSL implementation
+ * for 32-bit ARM written by Andy Polyakov <appro@openssl.org>
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+ .text
+
+ rounds .req x11
+ bskey .req x12
+
+ .macro in_bs_ch, b0, b1, b2, b3, b4, b5, b6, b7
+ eor \b2, \b2, \b1
+ eor \b5, \b5, \b6
+ eor \b3, \b3, \b0
+ eor \b6, \b6, \b2
+ eor \b5, \b5, \b0
+ eor \b6, \b6, \b3
+ eor \b3, \b3, \b7
+ eor \b7, \b7, \b5
+ eor \b3, \b3, \b4
+ eor \b4, \b4, \b5
+ eor \b2, \b2, \b7
+ eor \b3, \b3, \b1
+ eor \b1, \b1, \b5
+ .endm
+
+ .macro out_bs_ch, b0, b1, b2, b3, b4, b5, b6, b7
+ eor \b0, \b0, \b6
+ eor \b1, \b1, \b4
+ eor \b4, \b4, \b6
+ eor \b2, \b2, \b0
+ eor \b6, \b6, \b1
+ eor \b1, \b1, \b5
+ eor \b5, \b5, \b3
+ eor \b3, \b3, \b7
+ eor \b7, \b7, \b5
+ eor \b2, \b2, \b5
+ eor \b4, \b4, \b7
+ .endm
+
+ .macro inv_in_bs_ch, b6, b1, b2, b4, b7, b0, b3, b5
+ eor \b1, \b1, \b7
+ eor \b4, \b4, \b7
+ eor \b7, \b7, \b5
+ eor \b1, \b1, \b3
+ eor \b2, \b2, \b5
+ eor \b3, \b3, \b7
+ eor \b6, \b6, \b1
+ eor \b2, \b2, \b0
+ eor \b5, \b5, \b3
+ eor \b4, \b4, \b6
+ eor \b0, \b0, \b6
+ eor \b1, \b1, \b4
+ .endm
+
+ .macro inv_out_bs_ch, b6, b5, b0, b3, b7, b1, b4, b2
+ eor \b1, \b1, \b5
+ eor \b2, \b2, \b7
+ eor \b3, \b3, \b1
+ eor \b4, \b4, \b5
+ eor \b7, \b7, \b5
+ eor \b3, \b3, \b4
+ eor \b5, \b5, \b0
+ eor \b3, \b3, \b7
+ eor \b6, \b6, \b2
+ eor \b2, \b2, \b1
+ eor \b6, \b6, \b3
+ eor \b3, \b3, \b0
+ eor \b5, \b5, \b6
+ .endm
+
+ .macro mul_gf4, x0, x1, y0, y1, t0, t1
+ eor \t0, \y0, \y1
+ and \t0, \t0, \x0
+ eor \x0, \x0, \x1
+ and \t1, \x1, \y0
+ and \x0, \x0, \y1
+ eor \x1, \t1, \t0
+ eor \x0, \x0, \t1
+ .endm
+
+ .macro mul_gf4_n, x0, x1, y0, y1, t0
+ eor \t0, \y0, \y1
+ and \t0, \t0, \x0
+ eor \x0, \x0, \x1
+ and \x1, \x1, \y0
+ and \x0, \x0, \y1
+ eor \x1, \x1, \x0
+ eor \x0, \x0, \t0
+ .endm
+
+ .macro mul_gf4_n_gf4, x0, x1, y0, y1, t0, x2, x3, y2, y3, t1
+ eor \t0, \y0, \y1
+ eor \t1, \y2, \y3
+ and \t0, \t0, \x0
+ and \t1, \t1, \x2
+ eor \x0, \x0, \x1
+ eor \x2, \x2, \x3
+ and \x1, \x1, \y0
+ and \x3, \x3, \y2
+ and \x0, \x0, \y1
+ and \x2, \x2, \y3
+ eor \x1, \x1, \x0
+ eor \x2, \x2, \x3
+ eor \x0, \x0, \t0
+ eor \x3, \x3, \t1
+ .endm
+
+ .macro mul_gf16_2, x0, x1, x2, x3, x4, x5, x6, x7, \
+ y0, y1, y2, y3, t0, t1, t2, t3
+ eor \t0, \x0, \x2
+ eor \t1, \x1, \x3
+ mul_gf4 \x0, \x1, \y0, \y1, \t2, \t3
+ eor \y0, \y0, \y2
+ eor \y1, \y1, \y3
+ mul_gf4_n_gf4 \t0, \t1, \y0, \y1, \t3, \x2, \x3, \y2, \y3, \t2
+ eor \x0, \x0, \t0
+ eor \x2, \x2, \t0
+ eor \x1, \x1, \t1
+ eor \x3, \x3, \t1
+ eor \t0, \x4, \x6
+ eor \t1, \x5, \x7
+ mul_gf4_n_gf4 \t0, \t1, \y0, \y1, \t3, \x6, \x7, \y2, \y3, \t2
+ eor \y0, \y0, \y2
+ eor \y1, \y1, \y3
+ mul_gf4 \x4, \x5, \y0, \y1, \t2, \t3
+ eor \x4, \x4, \t0
+ eor \x6, \x6, \t0
+ eor \x5, \x5, \t1
+ eor \x7, \x7, \t1
+ .endm
+
+ .macro inv_gf256, x0, x1, x2, x3, x4, x5, x6, x7, \
+ t0, t1, t2, t3, s0, s1, s2, s3
+ eor \t3, \x4, \x6
+ eor \t2, \x5, \x7
+ eor \t1, \x1, \x3
+ eor \s1, \x7, \x6
+ mov \t0, \t2
+ eor \s0, \x0, \x2
+ orr \t2, \t2, \t1
+ eor \s3, \t3, \t0
+ and \s2, \t3, \s0
+ orr \t3, \t3, \s0
+ eor \s0, \s0, \t1
+ and \t0, \t0, \t1
+ eor \t1, \x3, \x2
+ and \s3, \s3, \s0
+ and \s1, \s1, \t1
+ eor \t1, \x4, \x5
+ eor \s0, \x1, \x0
+ eor \t3, \t3, \s1
+ eor \t2, \t2, \s1
+ and \s1, \t1, \s0
+ orr \t1, \t1, \s0
+ eor \t3, \t3, \s3
+ eor \t0, \t0, \s1
+ eor \t2, \t2, \s2
+ eor \t1, \t1, \s3
+ eor \t0, \t0, \s2
+ and \s0, \x7, \x3
+ eor \t1, \t1, \s2
+ and \s1, \x6, \x2
+ and \s2, \x5, \x1
+ orr \s3, \x4, \x0
+ eor \t3, \t3, \s0
+ eor \t1, \t1, \s2
+ eor \t0, \t0, \s3
+ eor \t2, \t2, \s1
+ and \s2, \t3, \t1
+ mov \s0, \t0
+ eor \s1, \t2, \s2
+ eor \s3, \t0, \s2
+ eor \s2, \t0, \s2
+ bsl \s1, \t1, \t0
+ bsl \s3, \t3, \t2
+ eor \t3, \t3, \t2
+ bsl \s0, \s1, \s2
+ bsl \t0, \s2, \s1
+ and \s2, \s0, \s3
+ eor \t1, \t1, \t0
+ eor \s2, \s2, \t3
+ mul_gf16_2 \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \
+ \s3, \s2, \s1, \t1, \s0, \t0, \t2, \t3
+ .endm
+
+ .macro sbox, b0, b1, b2, b3, b4, b5, b6, b7, \
+ t0, t1, t2, t3, s0, s1, s2, s3
+ in_bs_ch \b0\().16b, \b1\().16b, \b2\().16b, \b3\().16b, \
+ \b4\().16b, \b5\().16b, \b6\().16b, \b7\().16b
+ inv_gf256 \b6\().16b, \b5\().16b, \b0\().16b, \b3\().16b, \
+ \b7\().16b, \b1\().16b, \b4\().16b, \b2\().16b, \
+ \t0\().16b, \t1\().16b, \t2\().16b, \t3\().16b, \
+ \s0\().16b, \s1\().16b, \s2\().16b, \s3\().16b
+ out_bs_ch \b7\().16b, \b1\().16b, \b4\().16b, \b2\().16b, \
+ \b6\().16b, \b5\().16b, \b0\().16b, \b3\().16b
+ .endm
+
+ .macro inv_sbox, b0, b1, b2, b3, b4, b5, b6, b7, \
+ t0, t1, t2, t3, s0, s1, s2, s3
+ inv_in_bs_ch \b0\().16b, \b1\().16b, \b2\().16b, \b3\().16b, \
+ \b4\().16b, \b5\().16b, \b6\().16b, \b7\().16b
+ inv_gf256 \b5\().16b, \b1\().16b, \b2\().16b, \b6\().16b, \
+ \b3\().16b, \b7\().16b, \b0\().16b, \b4\().16b, \
+ \t0\().16b, \t1\().16b, \t2\().16b, \t3\().16b, \
+ \s0\().16b, \s1\().16b, \s2\().16b, \s3\().16b
+ inv_out_bs_ch \b3\().16b, \b7\().16b, \b0\().16b, \b4\().16b, \
+ \b5\().16b, \b1\().16b, \b2\().16b, \b6\().16b
+ .endm
+
+ .macro enc_next_rk
+ ldp q16, q17, [bskey], #32
+ ldp q18, q19, [bskey], #32
+ ldp q20, q21, [bskey], #32
+ ldp q22, q23, [bskey], #32
+ .endm
+
+ .macro dec_next_rk
+ ldp q16, q17, [bskey, #-128]!
+ ldp q18, q19, [bskey, #32]
+ ldp q20, q21, [bskey, #64]
+ ldp q22, q23, [bskey, #96]
+ .endm
+
+ .macro add_round_key, x0, x1, x2, x3, x4, x5, x6, x7
+ eor \x0\().16b, \x0\().16b, v16.16b
+ eor \x1\().16b, \x1\().16b, v17.16b
+ eor \x2\().16b, \x2\().16b, v18.16b
+ eor \x3\().16b, \x3\().16b, v19.16b
+ eor \x4\().16b, \x4\().16b, v20.16b
+ eor \x5\().16b, \x5\().16b, v21.16b
+ eor \x6\().16b, \x6\().16b, v22.16b
+ eor \x7\().16b, \x7\().16b, v23.16b
+ .endm
+
+ .macro shift_rows, x0, x1, x2, x3, x4, x5, x6, x7, mask
+ tbl \x0\().16b, {\x0\().16b}, \mask\().16b
+ tbl \x1\().16b, {\x1\().16b}, \mask\().16b
+ tbl \x2\().16b, {\x2\().16b}, \mask\().16b
+ tbl \x3\().16b, {\x3\().16b}, \mask\().16b
+ tbl \x4\().16b, {\x4\().16b}, \mask\().16b
+ tbl \x5\().16b, {\x5\().16b}, \mask\().16b
+ tbl \x6\().16b, {\x6\().16b}, \mask\().16b
+ tbl \x7\().16b, {\x7\().16b}, \mask\().16b
+ .endm
+
+ .macro mix_cols, x0, x1, x2, x3, x4, x5, x6, x7, \
+ t0, t1, t2, t3, t4, t5, t6, t7, inv
+ ext \t0\().16b, \x0\().16b, \x0\().16b, #12
+ ext \t1\().16b, \x1\().16b, \x1\().16b, #12
+ eor \x0\().16b, \x0\().16b, \t0\().16b
+ ext \t2\().16b, \x2\().16b, \x2\().16b, #12
+ eor \x1\().16b, \x1\().16b, \t1\().16b
+ ext \t3\().16b, \x3\().16b, \x3\().16b, #12
+ eor \x2\().16b, \x2\().16b, \t2\().16b
+ ext \t4\().16b, \x4\().16b, \x4\().16b, #12
+ eor \x3\().16b, \x3\().16b, \t3\().16b
+ ext \t5\().16b, \x5\().16b, \x5\().16b, #12
+ eor \x4\().16b, \x4\().16b, \t4\().16b
+ ext \t6\().16b, \x6\().16b, \x6\().16b, #12
+ eor \x5\().16b, \x5\().16b, \t5\().16b
+ ext \t7\().16b, \x7\().16b, \x7\().16b, #12
+ eor \x6\().16b, \x6\().16b, \t6\().16b
+ eor \t1\().16b, \t1\().16b, \x0\().16b
+ eor \x7\().16b, \x7\().16b, \t7\().16b
+ ext \x0\().16b, \x0\().16b, \x0\().16b, #8
+ eor \t2\().16b, \t2\().16b, \x1\().16b
+ eor \t0\().16b, \t0\().16b, \x7\().16b
+ eor \t1\().16b, \t1\().16b, \x7\().16b
+ ext \x1\().16b, \x1\().16b, \x1\().16b, #8
+ eor \t5\().16b, \t5\().16b, \x4\().16b
+ eor \x0\().16b, \x0\().16b, \t0\().16b
+ eor \t6\().16b, \t6\().16b, \x5\().16b
+ eor \x1\().16b, \x1\().16b, \t1\().16b
+ ext \t0\().16b, \x4\().16b, \x4\().16b, #8
+ eor \t4\().16b, \t4\().16b, \x3\().16b
+ ext \t1\().16b, \x5\().16b, \x5\().16b, #8
+ eor \t7\().16b, \t7\().16b, \x6\().16b
+ ext \x4\().16b, \x3\().16b, \x3\().16b, #8
+ eor \t3\().16b, \t3\().16b, \x2\().16b
+ ext \x5\().16b, \x7\().16b, \x7\().16b, #8
+ eor \t4\().16b, \t4\().16b, \x7\().16b
+ ext \x3\().16b, \x6\().16b, \x6\().16b, #8
+ eor \t3\().16b, \t3\().16b, \x7\().16b
+ ext \x6\().16b, \x2\().16b, \x2\().16b, #8
+ eor \x7\().16b, \t1\().16b, \t5\().16b
+ .ifb \inv
+ eor \x2\().16b, \t0\().16b, \t4\().16b
+ eor \x4\().16b, \x4\().16b, \t3\().16b
+ eor \x5\().16b, \x5\().16b, \t7\().16b
+ eor \x3\().16b, \x3\().16b, \t6\().16b
+ eor \x6\().16b, \x6\().16b, \t2\().16b
+ .else
+ eor \t3\().16b, \t3\().16b, \x4\().16b
+ eor \x5\().16b, \x5\().16b, \t7\().16b
+ eor \x2\().16b, \x3\().16b, \t6\().16b
+ eor \x3\().16b, \t0\().16b, \t4\().16b
+ eor \x4\().16b, \x6\().16b, \t2\().16b
+ mov \x6\().16b, \t3\().16b
+ .endif
+ .endm
+
+ .macro inv_mix_cols, x0, x1, x2, x3, x4, x5, x6, x7, \
+ t0, t1, t2, t3, t4, t5, t6, t7
+ ext \t0\().16b, \x0\().16b, \x0\().16b, #8
+ ext \t6\().16b, \x6\().16b, \x6\().16b, #8
+ ext \t7\().16b, \x7\().16b, \x7\().16b, #8
+ eor \t0\().16b, \t0\().16b, \x0\().16b
+ ext \t1\().16b, \x1\().16b, \x1\().16b, #8
+ eor \t6\().16b, \t6\().16b, \x6\().16b
+ ext \t2\().16b, \x2\().16b, \x2\().16b, #8
+ eor \t7\().16b, \t7\().16b, \x7\().16b
+ ext \t3\().16b, \x3\().16b, \x3\().16b, #8
+ eor \t1\().16b, \t1\().16b, \x1\().16b
+ ext \t4\().16b, \x4\().16b, \x4\().16b, #8
+ eor \t2\().16b, \t2\().16b, \x2\().16b
+ ext \t5\().16b, \x5\().16b, \x5\().16b, #8
+ eor \t3\().16b, \t3\().16b, \x3\().16b
+ eor \t4\().16b, \t4\().16b, \x4\().16b
+ eor \t5\().16b, \t5\().16b, \x5\().16b
+ eor \x0\().16b, \x0\().16b, \t6\().16b
+ eor \x1\().16b, \x1\().16b, \t6\().16b
+ eor \x2\().16b, \x2\().16b, \t0\().16b
+ eor \x4\().16b, \x4\().16b, \t2\().16b
+ eor \x3\().16b, \x3\().16b, \t1\().16b
+ eor \x1\().16b, \x1\().16b, \t7\().16b
+ eor \x2\().16b, \x2\().16b, \t7\().16b
+ eor \x4\().16b, \x4\().16b, \t6\().16b
+ eor \x5\().16b, \x5\().16b, \t3\().16b
+ eor \x3\().16b, \x3\().16b, \t6\().16b
+ eor \x6\().16b, \x6\().16b, \t4\().16b
+ eor \x4\().16b, \x4\().16b, \t7\().16b
+ eor \x5\().16b, \x5\().16b, \t7\().16b
+ eor \x7\().16b, \x7\().16b, \t5\().16b
+ mix_cols \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \
+ \t0, \t1, \t2, \t3, \t4, \t5, \t6, \t7, 1
+ .endm
+
+ .macro swapmove_2x, a0, b0, a1, b1, n, mask, t0, t1
+ ushr \t0\().2d, \b0\().2d, #\n
+ ushr \t1\().2d, \b1\().2d, #\n
+ eor \t0\().16b, \t0\().16b, \a0\().16b
+ eor \t1\().16b, \t1\().16b, \a1\().16b
+ and \t0\().16b, \t0\().16b, \mask\().16b
+ and \t1\().16b, \t1\().16b, \mask\().16b
+ eor \a0\().16b, \a0\().16b, \t0\().16b
+ shl \t0\().2d, \t0\().2d, #\n
+ eor \a1\().16b, \a1\().16b, \t1\().16b
+ shl \t1\().2d, \t1\().2d, #\n
+ eor \b0\().16b, \b0\().16b, \t0\().16b
+ eor \b1\().16b, \b1\().16b, \t1\().16b
+ .endm
+
+ .macro bitslice, x7, x6, x5, x4, x3, x2, x1, x0, t0, t1, t2, t3
+ movi \t0\().16b, #0x55
+ movi \t1\().16b, #0x33
+ swapmove_2x \x0, \x1, \x2, \x3, 1, \t0, \t2, \t3
+ swapmove_2x \x4, \x5, \x6, \x7, 1, \t0, \t2, \t3
+ movi \t0\().16b, #0x0f
+ swapmove_2x \x0, \x2, \x1, \x3, 2, \t1, \t2, \t3
+ swapmove_2x \x4, \x6, \x5, \x7, 2, \t1, \t2, \t3
+ swapmove_2x \x0, \x4, \x1, \x5, 4, \t0, \t2, \t3
+ swapmove_2x \x2, \x6, \x3, \x7, 4, \t0, \t2, \t3
+ .endm
+
+
+ .align 6
+M0: .octa 0x0004080c0105090d02060a0e03070b0f
+
+M0SR: .octa 0x0004080c05090d010a0e02060f03070b
+SR: .octa 0x0f0e0d0c0a09080b0504070600030201
+SRM0: .octa 0x01060b0c0207080d0304090e00050a0f
+
+M0ISR: .octa 0x0004080c0d0105090a0e0206070b0f03
+ISR: .octa 0x0f0e0d0c080b0a090504070602010003
+ISRM0: .octa 0x0306090c00070a0d01040b0e0205080f
+
+ /*
+ * void aesbs_convert_key(u8 out[], u32 const rk[], int rounds)
+ */
+ENTRY(aesbs_convert_key)
+ ld1 {v7.4s}, [x1], #16 // load round 0 key
+ ld1 {v17.4s}, [x1], #16 // load round 1 key
+
+ movi v8.16b, #0x01 // bit masks
+ movi v9.16b, #0x02
+ movi v10.16b, #0x04
+ movi v11.16b, #0x08
+ movi v12.16b, #0x10
+ movi v13.16b, #0x20
+ movi v14.16b, #0x40
+ movi v15.16b, #0x80
+ ldr q16, M0
+
+ sub x2, x2, #1
+ str q7, [x0], #16 // save round 0 key
+
+.Lkey_loop:
+ tbl v7.16b ,{v17.16b}, v16.16b
+ ld1 {v17.4s}, [x1], #16 // load next round key
+
+ cmtst v0.16b, v7.16b, v8.16b
+ cmtst v1.16b, v7.16b, v9.16b
+ cmtst v2.16b, v7.16b, v10.16b
+ cmtst v3.16b, v7.16b, v11.16b
+ cmtst v4.16b, v7.16b, v12.16b
+ cmtst v5.16b, v7.16b, v13.16b
+ cmtst v6.16b, v7.16b, v14.16b
+ cmtst v7.16b, v7.16b, v15.16b
+ not v0.16b, v0.16b
+ not v1.16b, v1.16b
+ not v5.16b, v5.16b
+ not v6.16b, v6.16b
+
+ subs x2, x2, #1
+ stp q2, q3, [x0, #32]
+ stp q4, q5, [x0, #64]
+ stp q6, q7, [x0, #96]
+ stp q0, q1, [x0], #128
+ b.ne .Lkey_loop
+
+ movi v7.16b, #0x63 // compose .L63
+ eor v17.16b, v17.16b, v7.16b
+ str q17, [x0]
+ ret
+ENDPROC(aesbs_convert_key)
+
+ .align 4
+aesbs_encrypt8:
+ ldr q9, [bskey], #16 // round 0 key
+ ldr q8, M0SR
+ ldr q24, SR
+
+ eor v10.16b, v0.16b, v9.16b // xor with round0 key
+ eor v11.16b, v1.16b, v9.16b
+ tbl v0.16b, {v10.16b}, v8.16b
+ eor v12.16b, v2.16b, v9.16b
+ tbl v1.16b, {v11.16b}, v8.16b
+ eor v13.16b, v3.16b, v9.16b
+ tbl v2.16b, {v12.16b}, v8.16b
+ eor v14.16b, v4.16b, v9.16b
+ tbl v3.16b, {v13.16b}, v8.16b
+ eor v15.16b, v5.16b, v9.16b
+ tbl v4.16b, {v14.16b}, v8.16b
+ eor v10.16b, v6.16b, v9.16b
+ tbl v5.16b, {v15.16b}, v8.16b
+ eor v11.16b, v7.16b, v9.16b
+ tbl v6.16b, {v10.16b}, v8.16b
+ tbl v7.16b, {v11.16b}, v8.16b
+
+ bitslice v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11
+
+ sub rounds, rounds, #1
+ b .Lenc_sbox
+
+.Lenc_loop:
+ shift_rows v0, v1, v2, v3, v4, v5, v6, v7, v24
+.Lenc_sbox:
+ sbox v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, \
+ v13, v14, v15
+ subs rounds, rounds, #1
+ b.cc .Lenc_done
+
+ enc_next_rk
+
+ mix_cols v0, v1, v4, v6, v3, v7, v2, v5, v8, v9, v10, v11, v12, \
+ v13, v14, v15
+
+ add_round_key v0, v1, v2, v3, v4, v5, v6, v7
+
+ b.ne .Lenc_loop
+ ldr q24, SRM0
+ b .Lenc_loop
+
+.Lenc_done:
+ ldr q12, [bskey] // last round key
+
+ bitslice v0, v1, v4, v6, v3, v7, v2, v5, v8, v9, v10, v11
+
+ eor v0.16b, v0.16b, v12.16b
+ eor v1.16b, v1.16b, v12.16b
+ eor v4.16b, v4.16b, v12.16b
+ eor v6.16b, v6.16b, v12.16b
+ eor v3.16b, v3.16b, v12.16b
+ eor v7.16b, v7.16b, v12.16b
+ eor v2.16b, v2.16b, v12.16b
+ eor v5.16b, v5.16b, v12.16b
+ ret
+ENDPROC(aesbs_encrypt8)
+
+ .align 4
+aesbs_decrypt8:
+ lsl x9, rounds, #7
+ add bskey, bskey, x9
+
+ ldr q9, [bskey, #-112]! // round 0 key
+ ldr q8, M0ISR
+ ldr q24, ISR
+
+ eor v10.16b, v0.16b, v9.16b // xor with round0 key
+ eor v11.16b, v1.16b, v9.16b
+ tbl v0.16b, {v10.16b}, v8.16b
+ eor v12.16b, v2.16b, v9.16b
+ tbl v1.16b, {v11.16b}, v8.16b
+ eor v13.16b, v3.16b, v9.16b
+ tbl v2.16b, {v12.16b}, v8.16b
+ eor v14.16b, v4.16b, v9.16b
+ tbl v3.16b, {v13.16b}, v8.16b
+ eor v15.16b, v5.16b, v9.16b
+ tbl v4.16b, {v14.16b}, v8.16b
+ eor v10.16b, v6.16b, v9.16b
+ tbl v5.16b, {v15.16b}, v8.16b
+ eor v11.16b, v7.16b, v9.16b
+ tbl v6.16b, {v10.16b}, v8.16b
+ tbl v7.16b, {v11.16b}, v8.16b
+
+ bitslice v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11
+
+ sub rounds, rounds, #1
+ b .Ldec_sbox
+
+.Ldec_loop:
+ shift_rows v0, v1, v2, v3, v4, v5, v6, v7, v24
+.Ldec_sbox:
+ inv_sbox v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, \
+ v13, v14, v15
+ subs rounds, rounds, #1
+ b.cc .Ldec_done
+
+ dec_next_rk
+
+ add_round_key v0, v1, v6, v4, v2, v7, v3, v5
+
+ inv_mix_cols v0, v1, v6, v4, v2, v7, v3, v5, v8, v9, v10, v11, v12, \
+ v13, v14, v15
+
+ b.ne .Ldec_loop
+ ldr q24, ISRM0
+ b .Ldec_loop
+.Ldec_done:
+ ldr q12, [bskey, #-16] // last round key
+
+ bitslice v0, v1, v6, v4, v2, v7, v3, v5, v8, v9, v10, v11
+
+ eor v0.16b, v0.16b, v12.16b
+ eor v1.16b, v1.16b, v12.16b
+ eor v6.16b, v6.16b, v12.16b
+ eor v4.16b, v4.16b, v12.16b
+ eor v2.16b, v2.16b, v12.16b
+ eor v7.16b, v7.16b, v12.16b
+ eor v3.16b, v3.16b, v12.16b
+ eor v5.16b, v5.16b, v12.16b
+ ret
+ENDPROC(aesbs_decrypt8)
+
+ /*
+ * aesbs_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
+ * int blocks)
+ * aesbs_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
+ * int blocks)
+ */
+ .macro __ecb_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7
+ stp x29, x30, [sp, #-16]!
+ mov x29, sp
+
+99: mov x5, #1
+ lsl x5, x5, x4
+ subs w4, w4, #8
+ csel x4, x4, xzr, pl
+ csel x5, x5, xzr, mi
+
+ ld1 {v0.16b}, [x1], #16
+ tbnz x5, #1, 0f
+ ld1 {v1.16b}, [x1], #16
+ tbnz x5, #2, 0f
+ ld1 {v2.16b}, [x1], #16
+ tbnz x5, #3, 0f
+ ld1 {v3.16b}, [x1], #16
+ tbnz x5, #4, 0f
+ ld1 {v4.16b}, [x1], #16
+ tbnz x5, #5, 0f
+ ld1 {v5.16b}, [x1], #16
+ tbnz x5, #6, 0f
+ ld1 {v6.16b}, [x1], #16
+ tbnz x5, #7, 0f
+ ld1 {v7.16b}, [x1], #16
+
+0: mov bskey, x2
+ mov rounds, x3
+ bl \do8
+
+ st1 {\o0\().16b}, [x0], #16
+ tbnz x5, #1, 1f
+ st1 {\o1\().16b}, [x0], #16
+ tbnz x5, #2, 1f
+ st1 {\o2\().16b}, [x0], #16
+ tbnz x5, #3, 1f
+ st1 {\o3\().16b}, [x0], #16
+ tbnz x5, #4, 1f
+ st1 {\o4\().16b}, [x0], #16
+ tbnz x5, #5, 1f
+ st1 {\o5\().16b}, [x0], #16
+ tbnz x5, #6, 1f
+ st1 {\o6\().16b}, [x0], #16
+ tbnz x5, #7, 1f
+ st1 {\o7\().16b}, [x0], #16
+
+ cbnz x4, 99b
+
+1: ldp x29, x30, [sp], #16
+ ret
+ .endm
+
+ .align 4
+ENTRY(aesbs_ecb_encrypt)
+ __ecb_crypt aesbs_encrypt8, v0, v1, v4, v6, v3, v7, v2, v5
+ENDPROC(aesbs_ecb_encrypt)
+
+ .align 4
+ENTRY(aesbs_ecb_decrypt)
+ __ecb_crypt aesbs_decrypt8, v0, v1, v6, v4, v2, v7, v3, v5
+ENDPROC(aesbs_ecb_decrypt)
+
+ .macro next_tweak, out, in, const, tmp
+ sshr \tmp\().2d, \in\().2d, #63
+ and \tmp\().16b, \tmp\().16b, \const\().16b
+ add \out\().2d, \in\().2d, \in\().2d
+ ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8
+ eor \out\().16b, \out\().16b, \tmp\().16b
+ .endm
+
+ .align 4
+.Lxts_mul_x:
+CPU_LE( .quad 1, 0x87 )
+CPU_BE( .quad 0x87, 1 )
+
+ /*
+ * aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
+ * int blocks, u8 iv[])
+ * aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
+ * int blocks, u8 iv[])
+ */
+__xts_crypt8:
+ mov x6, #1
+ lsl x6, x6, x4
+ subs w4, w4, #8
+ csel x4, x4, xzr, pl
+ csel x6, x6, xzr, mi
+
+ ld1 {v0.16b}, [x1], #16
+ next_tweak v26, v25, v30, v31
+ eor v0.16b, v0.16b, v25.16b
+ tbnz x6, #1, 0f
+
+ ld1 {v1.16b}, [x1], #16
+ next_tweak v27, v26, v30, v31
+ eor v1.16b, v1.16b, v26.16b
+ tbnz x6, #2, 0f
+
+ ld1 {v2.16b}, [x1], #16
+ next_tweak v28, v27, v30, v31
+ eor v2.16b, v2.16b, v27.16b
+ tbnz x6, #3, 0f
+
+ ld1 {v3.16b}, [x1], #16
+ next_tweak v29, v28, v30, v31
+ eor v3.16b, v3.16b, v28.16b
+ tbnz x6, #4, 0f
+
+ ld1 {v4.16b}, [x1], #16
+ str q29, [sp, #16]
+ eor v4.16b, v4.16b, v29.16b
+ next_tweak v29, v29, v30, v31
+ tbnz x6, #5, 0f
+
+ ld1 {v5.16b}, [x1], #16
+ str q29, [sp, #32]
+ eor v5.16b, v5.16b, v29.16b
+ next_tweak v29, v29, v30, v31
+ tbnz x6, #6, 0f
+
+ ld1 {v6.16b}, [x1], #16
+ str q29, [sp, #48]
+ eor v6.16b, v6.16b, v29.16b
+ next_tweak v29, v29, v30, v31
+ tbnz x6, #7, 0f
+
+ ld1 {v7.16b}, [x1], #16
+ str q29, [sp, #64]
+ eor v7.16b, v7.16b, v29.16b
+ next_tweak v29, v29, v30, v31
+
+0: mov bskey, x2
+ mov rounds, x3
+ br x7
+ENDPROC(__xts_crypt8)
+
+ .macro __xts_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7
+ stp x29, x30, [sp, #-80]!
+ mov x29, sp
+
+ ldr q30, .Lxts_mul_x
+ ld1 {v25.16b}, [x5]
+
+99: adr x7, \do8
+ bl __xts_crypt8
+
+ ldp q16, q17, [sp, #16]
+ ldp q18, q19, [sp, #48]
+
+ eor \o0\().16b, \o0\().16b, v25.16b
+ eor \o1\().16b, \o1\().16b, v26.16b
+ eor \o2\().16b, \o2\().16b, v27.16b
+ eor \o3\().16b, \o3\().16b, v28.16b
+
+ st1 {\o0\().16b}, [x0], #16
+ mov v25.16b, v26.16b
+ tbnz x6, #1, 1f
+ st1 {\o1\().16b}, [x0], #16
+ mov v25.16b, v27.16b
+ tbnz x6, #2, 1f
+ st1 {\o2\().16b}, [x0], #16
+ mov v25.16b, v28.16b
+ tbnz x6, #3, 1f
+ st1 {\o3\().16b}, [x0], #16
+ mov v25.16b, v29.16b
+ tbnz x6, #4, 1f
+
+ eor \o4\().16b, \o4\().16b, v16.16b
+ eor \o5\().16b, \o5\().16b, v17.16b
+ eor \o6\().16b, \o6\().16b, v18.16b
+ eor \o7\().16b, \o7\().16b, v19.16b
+
+ st1 {\o4\().16b}, [x0], #16
+ tbnz x6, #5, 1f
+ st1 {\o5\().16b}, [x0], #16
+ tbnz x6, #6, 1f
+ st1 {\o6\().16b}, [x0], #16
+ tbnz x6, #7, 1f
+ st1 {\o7\().16b}, [x0], #16
+
+ cbnz x4, 99b
+
+1: st1 {v25.16b}, [x5]
+ ldp x29, x30, [sp], #80
+ ret
+ .endm
+
+ENTRY(aesbs_xts_encrypt)
+ __xts_crypt aesbs_encrypt8, v0, v1, v4, v6, v3, v7, v2, v5
+ENDPROC(aesbs_xts_encrypt)
+
+ENTRY(aesbs_xts_decrypt)
+ __xts_crypt aesbs_decrypt8, v0, v1, v6, v4, v2, v7, v3, v5
+ENDPROC(aesbs_xts_decrypt)
+
+ .macro next_ctr, v
+ mov \v\().d[1], x8
+ mov \v\().d[0], x7
+ adds x8, x8, #1
+ adc x7, x7, xzr
+ rev64 \v\().16b, \v\().16b
+ .endm
+
+ /*
+ * aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
+ * int rounds, int blocks, u8 iv[], bool final)
+ */
+ENTRY(aesbs_ctr_encrypt)
+ stp x29, x30, [sp, #-16]!
+ mov x29, sp
+
+ add x4, x4, x6 // do one extra block if final
+
+ ldp x7, x8, [x5]
+ ld1 {v0.16b}, [x5]
+CPU_LE( rev x7, x7 )
+CPU_LE( rev x8, x8 )
+ adds x8, x8, #1
+ adc x7, x7, xzr
+
+99: mov x9, #1
+ lsl x9, x9, x4
+ subs w4, w4, #8
+ csel x4, x4, xzr, pl
+ csel x9, x9, xzr, le
+
+ tbnz x9, #1, 0f
+
+ next_ctr v1
+ tbnz x9, #2, 0f
+
+ next_ctr v2
+ tbnz x9, #3, 0f
+
+ next_ctr v3
+ tbnz x9, #4, 0f
+
+ next_ctr v4
+ tbnz x9, #5, 0f
+
+ next_ctr v5
+ tbnz x9, #6, 0f
+
+ next_ctr v6
+ tbnz x9, #7, 0f
+
+ next_ctr v7
+
+0: mov bskey, x2
+ mov rounds, x3
+ bl aesbs_encrypt8
+
+ lsr x9, x9, x6 // disregard the final block
+ tbnz x9, #0, 0f
+
+ ld1 {v8.16b}, [x1], #16
+ eor v0.16b, v0.16b, v8.16b
+ st1 {v0.16b}, [x0], #16
+ tbnz x9, #1, 1f
+
+ ld1 {v9.16b}, [x1], #16
+ eor v1.16b, v1.16b, v9.16b
+ st1 {v1.16b}, [x0], #16
+ tbnz x9, #2, 2f
+
+ ld1 {v10.16b}, [x1], #16
+ eor v4.16b, v4.16b, v10.16b
+ st1 {v4.16b}, [x0], #16
+ tbnz x9, #3, 3f
+
+ ld1 {v11.16b}, [x1], #16
+ eor v6.16b, v6.16b, v11.16b
+ st1 {v6.16b}, [x0], #16
+ tbnz x9, #4, 4f
+
+ ld1 {v12.16b}, [x1], #16
+ eor v3.16b, v3.16b, v12.16b
+ st1 {v3.16b}, [x0], #16
+ tbnz x9, #5, 5f
+
+ ld1 {v13.16b}, [x1], #16
+ eor v7.16b, v7.16b, v13.16b
+ st1 {v7.16b}, [x0], #16
+ tbnz x9, #6, 6f
+
+ ld1 {v14.16b}, [x1], #16
+ eor v2.16b, v2.16b, v14.16b
+ st1 {v2.16b}, [x0], #16
+ tbnz x9, #7, 7f
+
+ ld1 {v15.16b}, [x1], #16
+ eor v5.16b, v5.16b, v15.16b
+ st1 {v5.16b}, [x0], #16
+
+ next_ctr v0
+ cbnz x4, 99b
+
+0: st1 {v0.16b}, [x5]
+8: ldp x29, x30, [sp], #16
+ ret
+
+ /*
+ * If we are handling the tail of the input (x6 == 1), return the
+ * final keystream block back to the caller via the IV buffer.
+ */
+1: cbz x6, 8b
+ st1 {v1.16b}, [x5]
+ b 8b
+2: cbz x6, 8b
+ st1 {v4.16b}, [x5]
+ b 8b
+3: cbz x6, 8b
+ st1 {v6.16b}, [x5]
+ b 8b
+4: cbz x6, 8b
+ st1 {v3.16b}, [x5]
+ b 8b
+5: cbz x6, 8b
+ st1 {v7.16b}, [x5]
+ b 8b
+6: cbz x6, 8b
+ st1 {v2.16b}, [x5]
+ b 8b
+7: cbz x6, 8b
+ st1 {v5.16b}, [x5]
+ b 8b
+ENDPROC(aesbs_ctr_encrypt)
diff --git a/arch/arm64/crypto/aes-neonbs-glue.c b/arch/arm64/crypto/aes-neonbs-glue.c
new file mode 100644
index 000000000000..57982172563c
--- /dev/null
+++ b/arch/arm64/crypto/aes-neonbs-glue.c
@@ -0,0 +1,300 @@
+/*
+ * Bit sliced AES using NEON instructions
+ *
+ * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/neon.h>
+#include <crypto/aes.h>
+#include <crypto/internal/simd.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/xts.h>
+#include <linux/module.h>
+
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+
+asmlinkage void aesbs_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[],
+ int rounds, int blocks);
+asmlinkage void aesbs_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[],
+ int rounds, int blocks);
+
+asmlinkage void aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[],
+ int rounds, int blocks, u8 iv[]);
+asmlinkage void aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[],
+ int rounds, int blocks, u8 iv[]);
+
+asmlinkage void aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
+ int rounds, int blocks, u8 iv[], bool final);
+
+asmlinkage void aesbs_convert_key(u8 out[], u32 const rk[], int rounds);
+
+struct aesbs_key {
+ u8 key[13 * (8 * AES_BLOCK_SIZE) + 32];
+};
+
+struct aesbs_ctx {
+ struct aesbs_key bskey;
+ int rounds;
+};
+
+struct aesbs_xts_ctx {
+ struct aesbs_key bskey;
+ struct crypto_cipher *tweak_tfm;
+ int rounds;
+};
+
+static int aesbs_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
+ unsigned int key_len)
+{
+ struct aesbs_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct crypto_aes_ctx rk;
+ int err;
+
+ err = crypto_aes_expand_key(&rk, in_key, key_len);
+ if (err)
+ return err;
+
+ ctx->rounds = 6 + key_len / 4;
+
+ kernel_neon_begin();
+ aesbs_convert_key(ctx->bskey.key, rk.key_enc, ctx->rounds);
+ kernel_neon_end();
+
+ return 0;
+}
+
+static int xts_init(struct crypto_skcipher *tfm)
+{
+ struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+ ctx->tweak_tfm = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC);
+ if (IS_ERR(ctx->tweak_tfm))
+ return PTR_ERR(ctx->tweak_tfm);
+
+ return 0;
+}
+
+static void xts_exit(struct crypto_skcipher *tfm)
+{
+ struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+ crypto_free_cipher(ctx->tweak_tfm);
+}
+
+static int aesbs_xts_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
+ unsigned int key_len)
+{
+ struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct crypto_aes_ctx rk;
+ int err;
+
+ err = xts_verify_key(tfm, in_key, key_len);
+ if (err)
+ return err;
+
+ err = crypto_cipher_setkey(ctx->tweak_tfm, in_key + key_len / 2,
+ key_len / 2);
+ if (err)
+ return err;
+
+ err = crypto_aes_expand_key(&rk, in_key, key_len / 2);
+ if (err)
+ return err;
+
+ ctx->rounds = 6 + key_len / 8;
+
+ kernel_neon_begin();
+ aesbs_convert_key(ctx->bskey.key, rk.key_enc, ctx->rounds);
+ kernel_neon_end();
+
+ return 0;
+}
+
+static int __ecb_crypt(struct skcipher_request *req,
+ void (*fn)(u8 out[], u8 const in[], u8 const rk[],
+ int rounds, int blocks))
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct aesbs_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ int err;
+
+ err = skcipher_walk_virt(&walk, req, true);
+
+ kernel_neon_begin();
+ while (walk.nbytes >= AES_BLOCK_SIZE) {
+ unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
+
+ if (walk.nbytes < walk.total)
+ blocks = round_down(blocks,
+ walk.chunksize / AES_BLOCK_SIZE);
+
+ fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->bskey.key,
+ ctx->rounds, blocks);
+ err = skcipher_walk_done(&walk,
+ walk.nbytes - blocks * AES_BLOCK_SIZE);
+ }
+ kernel_neon_end();
+
+ return err;
+}
+
+static int ecb_encrypt(struct skcipher_request *req)
+{
+ return __ecb_crypt(req, aesbs_ecb_encrypt);
+}
+
+static int ecb_decrypt(struct skcipher_request *req)
+{
+ return __ecb_crypt(req, aesbs_ecb_decrypt);
+}
+
+static int __xts_crypt(struct skcipher_request *req,
+ void (*fn)(u8 out[], u8 const in[], u8 const rk[],
+ int rounds, int blocks, u8 iv[]))
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ int err;
+
+ err = skcipher_walk_virt(&walk, req, true);
+
+ crypto_cipher_encrypt_one(ctx->tweak_tfm, walk.iv, walk.iv);
+
+ kernel_neon_begin();
+ while (walk.nbytes >= AES_BLOCK_SIZE) {
+ unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
+
+ if (walk.nbytes < walk.total)
+ blocks = round_down(blocks,
+ walk.chunksize / AES_BLOCK_SIZE);
+
+ fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->bskey.key,
+ ctx->rounds, blocks, walk.iv);
+ err = skcipher_walk_done(&walk,
+ walk.nbytes - blocks * AES_BLOCK_SIZE);
+ }
+ kernel_neon_end();
+
+ return err;
+}
+
+static int xts_encrypt(struct skcipher_request *req)
+{
+ return __xts_crypt(req, aesbs_xts_encrypt);
+}
+
+static int xts_decrypt(struct skcipher_request *req)
+{
+ return __xts_crypt(req, aesbs_xts_decrypt);
+}
+
+static int ctr_encrypt(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct aesbs_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ int err;
+
+ err = skcipher_walk_virt(&walk, req, true);
+
+ kernel_neon_begin();
+ while (walk.nbytes > 0) {
+ unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
+ bool final = (walk.total % AES_BLOCK_SIZE) != 0;
+
+ if (walk.nbytes < walk.total) {
+ blocks = round_down(blocks,
+ walk.chunksize / AES_BLOCK_SIZE);
+ final = false;
+ }
+
+ aesbs_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->bskey.key, ctx->rounds, blocks, walk.iv,
+ final);
+
+ if (final) {
+ u8 *dst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
+ u8 *src = walk.src.virt.addr + blocks * AES_BLOCK_SIZE;
+
+ if (dst != src)
+ memcpy(dst, src, walk.total % AES_BLOCK_SIZE);
+ crypto_xor(dst, walk.iv, walk.total % AES_BLOCK_SIZE);
+
+ err = skcipher_walk_done(&walk, 0);
+ break;
+ }
+ err = skcipher_walk_done(&walk,
+ walk.nbytes - blocks * AES_BLOCK_SIZE);
+ }
+ kernel_neon_end();
+
+ return err;
+}
+
+static struct skcipher_alg aes_algs[] = { {
+ .base.cra_name = "ecb(aes)",
+ .base.cra_driver_name = "ecb-aes-neonbs",
+ .base.cra_priority = 200,
+ .base.cra_blocksize = AES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct aesbs_ctx),
+ .base.cra_module = THIS_MODULE,
+
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .chunksize = 8 * AES_BLOCK_SIZE,
+ .setkey = aesbs_setkey,
+ .encrypt = ecb_encrypt,
+ .decrypt = ecb_decrypt,
+}, {
+ .base.cra_name = "xts(aes)",
+ .base.cra_driver_name = "xts-aes-neonbs",
+ .base.cra_priority = 200,
+ .base.cra_blocksize = AES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct aesbs_xts_ctx),
+ .base.cra_module = THIS_MODULE,
+
+ .min_keysize = 2 * AES_MIN_KEY_SIZE,
+ .max_keysize = 2 * AES_MAX_KEY_SIZE,
+ .chunksize = 8 * AES_BLOCK_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = aesbs_xts_setkey,
+ .encrypt = xts_encrypt,
+ .decrypt = xts_decrypt,
+ .init = xts_init,
+ .exit = xts_exit,
+}, {
+ .base.cra_name = "ctr(aes)",
+ .base.cra_driver_name = "ctr-aes-neonbs",
+ .base.cra_priority = 200,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct aesbs_ctx),
+ .base.cra_module = THIS_MODULE,
+
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .chunksize = 8 * AES_BLOCK_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = aesbs_setkey,
+ .encrypt = ctr_encrypt,
+ .decrypt = ctr_encrypt,
+} };
+
+static int __init aes_init(void)
+{
+ return crypto_register_skciphers(aes_algs, ARRAY_SIZE(aes_algs));
+}
+
+static void aes_exit(void)
+{
+ crypto_unregister_skciphers(aes_algs, ARRAY_SIZE(aes_algs));
+}
+
+module_init(aes_init);
+module_exit(aes_exit);
--
2.7.4
^ permalink raw reply related
* [PATCH] ARM: dts: vexpress: Support GICC_DIR operations
From: Marc Zyngier @ 2016-12-12 17:35 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <20161210201351.25894-1-christoffer.dall@linaro.org>
[+Sudeep]
On 10/12/16 20:13, Christoffer Dall wrote:
> The GICv2 CPU interface registers span across 8K, not 4K as indicated in
> the DT. Only the GICC_DIR register is located after the initial 4K
> boundary, leaving a functional system but without support for separately
> EOI'ing and deactivating interrupts.
>
> After this change the system support split priority drop and interrupt
> deactivation.
>
> Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
> ---
> arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts b/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts
> index 0205c97..2e0cf39 100644
> --- a/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts
> +++ b/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts
> @@ -126,7 +126,7 @@
> #address-cells = <0>;
> interrupt-controller;
> reg = <0 0x2c001000 0 0x1000>,
> - <0 0x2c002000 0 0x1000>,
> + <0 0x2c002000 0 0x2000>,
> <0 0x2c004000 0 0x2000>,
> <0 0x2c006000 0 0x2000>;
> interrupts = <1 9 0xf04>;
>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
M.
--
Jazz is not dead. It just smells funny...
^ permalink raw reply
* [PATCH 4/4] dt-bindings: input: Specify the interrupt number of TPS65217 power button
From: Rob Herring @ 2016-12-12 17:27 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <20161209062833.5768-5-woogyom.kim@gmail.com>
On Fri, Dec 09, 2016 at 03:28:33PM +0900, Milo Kim wrote:
> Specify the power button interrupt number which is from the datasheet.
>
> Signed-off-by: Milo Kim <woogyom.kim@gmail.com>
> ---
> Documentation/devicetree/bindings/input/tps65218-pwrbutton.txt | 4 +++-
> 1 file changed, 3 insertions(+), 1 deletion(-)
Acked-by: Rob Herring <robh@kernel.org>
^ permalink raw reply
* [PATCH 3/4] dt-bindings: power/supply: Update TPS65217 properties
From: Rob Herring @ 2016-12-12 17:26 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <20161209062833.5768-4-woogyom.kim@gmail.com>
On Fri, Dec 09, 2016 at 03:28:32PM +0900, Milo Kim wrote:
> Add interrupt specifiers for USB and AC charger input. Interrupt numbers
> are from the datasheet.
> Fix wrong property for compatible string.
>
> Signed-off-by: Milo Kim <woogyom.kim@gmail.com>
> ---
> .../devicetree/bindings/power/supply/tps65217_charger.txt | 7 ++++++-
> 1 file changed, 6 insertions(+), 1 deletion(-)
Acked-by: Rob Herring <robh@kernel.org>
^ permalink raw reply
* [PATCH 2/4] dt-bindings: mfd: Remove TPS65217 interrupts
From: Rob Herring @ 2016-12-12 17:25 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <20161209062833.5768-3-woogyom.kim@gmail.com>
On Fri, Dec 09, 2016 at 03:28:31PM +0900, Milo Kim wrote:
> Interrupt numbers are from the datasheet, so no need to keep them in
> the ABI. Use the number in the DT file.
I don't see the purpose of ripping this out. The headers have always
been for convienence, not whether the values come from the datasheet or
not.
> Signed-off-by: Milo Kim <woogyom.kim@gmail.com>
> ---
> arch/arm/boot/dts/am335x-bone-common.dtsi | 8 +++-----
> include/dt-bindings/mfd/tps65217.h | 26 --------------------------
> 2 files changed, 3 insertions(+), 31 deletions(-)
> delete mode 100644 include/dt-bindings/mfd/tps65217.h
^ permalink raw reply
* [PATCH] clk: bcm: Fix 'maybe-uninitialized' warning in bcm2835_clock_choose_div_and_prate()
From: Eric Anholt @ 2016-12-12 17:24 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <1481529653-28133-1-git-send-email-boris.brezillon@free-electrons.com>
Boris Brezillon <boris.brezillon@free-electrons.com> writes:
> best_rate is reported as potentially uninitialized by gcc.
>
> Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
> Fixes: 155e8b3b0ee3 ("clk: bcm: Support rate change propagation on bcm2835 clocks")
> Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Reviewed-by: Eric Anholt <eric@anholt.net>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 832 bytes
Desc: not available
URL: <http://lists.infradead.org/pipermail/linux-arm-kernel/attachments/20161212/65cadde3/attachment.sig>
^ permalink raw reply
* [PATCH] dt-bindings: Document the hi3660 reset bindings
From: Rob Herring @ 2016-12-12 17:20 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <1481249504-7942-1-git-send-email-zhangfei.gao@linaro.org>
On Fri, Dec 09, 2016 at 10:11:44AM +0800, Zhangfei Gao wrote:
> Add DT bindings documentation for hi3660 SoC reset controller.
>
> Signed-off-by: Zhangfei Gao <zhangfei.gao@linaro.org>
> ---
> .../bindings/reset/hisilicon,hi3660-reset.txt | 43 ++++++++++++++++++++++
> 1 file changed, 43 insertions(+)
> create mode 100644 Documentation/devicetree/bindings/reset/hisilicon,hi3660-reset.txt
Acked-by: Rob Herring <robh@kernel.org>
^ permalink raw reply
* [PATCH 1/2] dt-bindings: zx296718-clk: add compatible for audio clock controller
From: Rob Herring @ 2016-12-12 17:10 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <1481189157-8995-1-git-send-email-shawnguo@kernel.org>
On Thu, Dec 08, 2016 at 05:25:56PM +0800, Shawn Guo wrote:
> From: Shawn Guo <shawn.guo@linaro.org>
>
> It adds the compatible string for zx296718 audio clock controller.
>
> Signed-off-by: Shawn Guo <shawn.guo@linaro.org>
> ---
> Documentation/devicetree/bindings/clock/zx296718-clk.txt | 3 +++
> 1 file changed, 3 insertions(+)
Acked-by: Rob Herring <robh@kernel.org>
^ permalink raw reply
* [PATCH 5/5] Documentation: fsl-quadspi: Add fsl, ls1012a-qspi compatible string
From: Rob Herring @ 2016-12-12 17:09 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <1481188984-43683-6-git-send-email-yao.yuan@freescale.com>
On Thu, Dec 08, 2016 at 05:23:04PM +0800, Yuan Yao wrote:
> From: Yuan Yao <yao.yuan@nxp.com>
Same problem in this subject too.
>
> new compatible string: "fsl,ls1012a-qspi".
>
> Signed-off-by: Yuan Yao <yao.yuan@nxp.com>
> ---
> Documentation/devicetree/bindings/mtd/fsl-quadspi.txt | 1 +
> 1 file changed, 1 insertion(+)
Acked-by: Rob Herring <robh@kernel.org>
^ permalink raw reply
* [PATCH 3/5] Documentation: dt: mtd: add chip support for "jedec, spi-nor"
From: Rob Herring @ 2016-12-12 17:09 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <1481188984-43683-4-git-send-email-yao.yuan@freescale.com>
On Thu, Dec 08, 2016 at 05:23:02PM +0800, Yuan Yao wrote:
> From: Yuan Yao <yao.yuan@nxp.com>
The compatible string is wrong in the subject.
>
> "sst25wf040b" and "en25s64" are also chip compatible with SPI NOR flash.
>
> Signed-off-by: Yuan Yao <yao.yuan@nxp.com>
> ---
> Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt | 2 ++
> 1 file changed, 2 insertions(+)
Otherwise,
Acked-by: Rob Herring <robh@kernel.org>
^ permalink raw reply
* [PATCH 2/5] Documentation: fsl: dspi: Add fsl, ls1012a-dspi compatible string
From: Rob Herring @ 2016-12-12 17:08 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <1481188984-43683-3-git-send-email-yao.yuan@freescale.com>
On Thu, Dec 08, 2016 at 05:23:01PM +0800, Yuan Yao wrote:
> From: Yuan Yao <yao.yuan@nxp.com>
>
> new compatible string: "fsl,ls1012a-dspi".
>
> Signed-off-by: Yuan Yao <yao.yuan@nxp.com>
> ---
> Documentation/devicetree/bindings/spi/spi-fsl-dspi.txt | 1 +
> 1 file changed, 1 insertion(+)
Acked-by: Rob Herring <robh@kernel.org>
^ permalink raw reply
* [PATCH] efi/libstub: arm*: Pass latest memory map to the kernel
From: Jeffrey Hugo @ 2016-12-12 17:00 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <584E6F50.3020901@arm.com>
On 12/12/2016 2:35 AM, James Morse wrote:
> Hi Ard,
>
> On 09/12/16 18:24, Ard Biesheuvel wrote:
>> As reported by James, the current libstub code involving the annotated
>> memory map only works somewhat correctly by accident, due to the fact
>> that a pool allocation happens to be reused immediately, retaining its
>> former contents.
>>
>> Instead of juggling memory maps, which makes the code more complex than
>> it needs to be, simply put a placholder value into the FDT, and only
>> write the actual value after ExitBootServices() has been called.
>
>> diff --git a/drivers/firmware/efi/libstub/fdt.c b/drivers/firmware/efi/libstub/fdt.c
>> index a6a93116a8f0..5d39dff77f17 100644
>> --- a/drivers/firmware/efi/libstub/fdt.c
>> +++ b/drivers/firmware/efi/libstub/fdt.c
>> @@ -101,7 +101,7 @@ efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt,
>> if (status)
>> goto fdt_set_fail;
>>
>> - fdt_val64 = cpu_to_fdt64((u64)(unsigned long)memory_map);
>> + fdt_val64 = U64_MAX; /* placeholder */
>> status = fdt_setprop(fdt, node, "linux,uefi-mmap-start",
>> &fdt_val64, sizeof(fdt_val64));
>> if (status)
>> @@ -148,6 +148,24 @@ efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt,
>> return EFI_LOAD_ERROR;
>> }
>>
>> +static efi_status_t update_fdt_memmap(void *fdt, u64 memmap)
>> +{
>> + int node = fdt_path_offset(fdt, "/chosen");
>> + efi_status_t status;
>> +
>> + if (node < 0)
>> + return EFI_LOAD_ERROR;
>> +
>> + memmap = cpu_to_fdt64(memmap);
>> + status = fdt_setprop_inplace(fdt, node, "linux,uefi-mmap-start",
>> + &memmap, sizeof(memmap));
>> +
>> + if (status)
>> + return EFI_LOAD_ERROR;
>> +
>> + return EFI_SUCCESS;
>> +}
>
> v4.9.0 with this patch doesn't boot on my Seattle (with known buggy UEFI FW)
> [0]. It looks like the memory map is truncated (and missing a runtime region,
> compare with [1]). Should 'linux,uefi-mmap-size' be updated too? (Otherwise its
> the size when we retrieved the runtime mapping, but before we allocated the FDT)
>
Overall this fails for me as well. It appears to work, until I trigger
the race condition I fixed, then OOM killer gets triggered the instant
rootfs starts to initialize. Since I see James has a number of
comments, I did not investigate further to determine why the patch is
not working on my system.
--
Jeffrey Hugo
Qualcomm Datacenter Technologies as an affiliate of Qualcomm
Technologies, Inc.
Qualcomm Technologies, Inc. is a member of the
Code Aurora Forum, a Linux Foundation Collaborative Project.
^ permalink raw reply
* [PATCH 1/1] arm/module: maximum utilization of module area.
From: Ard Biesheuvel @ 2016-12-12 16:57 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <alpine.LFD.2.20.1612121026250.1657@knanqh.ubzr>
On 12 December 2016 at 15:28, Nicolas Pitre <nicolas.pitre@linaro.org> wrote:
> On Mon, 12 Dec 2016, Vaneet Narang wrote:
>
>> Hi,
>>
>> >A PC24 relocation has a range of +/-32MB. This means that where-ever
>> >the module is placed, it must be capable of reaching any function
>> >within the kernel text, which may itself be quite large (eg, 8MB, or
>> >possibly larger). The module area exists to allow modules to be
>> >located in an area where PC24 relocations are able to reach all of the
>> >kernel text on sensibly configured kernels, thereby allowing for
>> >optimal performance.
>> >
>> >If you wish to load large modules, then enable ARM_MODULE_PLTS, which
>> >will use the less efficient PLT method (which is basically an indirect
>> >function call) for relocations that PC24 can't handle, and will allow
>> >the module to be loaded into the vmalloc area.
>> >
>> >Growing the module area so that smaller modules also get penalised by
>> >the PLT indirection is not sane.
>>
>> This is exactly what i am saying. These changes are useful to accomdate
>> 22MB modules without enabling ARM_MODULE_PLTS.
>
> I think you need to figure out why you need such a huge module in the
> first place. That is very uncommon indeed.
>
Also, note that the module PLT code was recently optimized, to remove
some pathological behavior which severely affected load times of large
modules.
Can you quantify the performance hit you are taking when using module
PLTs? And the actual increase in memory footprint?
^ permalink raw reply
* [PATCH v5 2/4] drm: bridge: add support for TI ths8135
From: Laurent Pinchart @ 2016-12-12 16:49 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <20161212164547.jw5ejbums6nwwtw2@rob-hp-laptop>
Hello,
On Monday 12 Dec 2016 10:45:47 Rob Herring wrote:
> On Wed, Dec 07, 2016 at 11:42:43AM +0100, Bartosz Golaszewski wrote:
> > THS8135 is a configurable video DAC. Add DT bindings for this chip and
> > use the dumb-vga-dac driver for now as no configuration is required to
> > make it work.
> >
> > Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
> > ---
> >
> > .../bindings/display/bridge/ti,ths8135.txt | 52 +++++++++++++++++
> > drivers/gpu/drm/bridge/dumb-vga-dac.c | 1 +
> > 2 files changed, 53 insertions(+)
> > create mode 100644
> > Documentation/devicetree/bindings/display/bridge/ti,ths8135.txt
> Acked-by: Rob Herring <robh@kernel.org>
>
> But one nit below:
> > diff --git
> > a/Documentation/devicetree/bindings/display/bridge/ti,ths8135.txt
> > b/Documentation/devicetree/bindings/display/bridge/ti,ths8135.txt new
> > file mode 100644
> > index 0000000..23cd8ee
> > --- /dev/null
> > +++ b/Documentation/devicetree/bindings/display/bridge/ti,ths8135.txt
> > @@ -0,0 +1,52 @@
> > +THS8135 Video DAC
> > +-----------------
> > +
> > +This is the binding for Texas Instruments THS8135 Video DAC bridge.
> > +
> > +Required properties:
> > +
> > +- compatible: Must be "ti,ths8135"
> > +
> > +Required nodes:
> > +
> > +This device has two video ports. Their connections are modelled using the
> > OF +graph bindings specified in
> > Documentation/devicetree/bindings/graph.txt. +
> > +- Video port 0 for RGB input
> > +- Video port 1 for VGA output
> > +
> > +Example
> > +-------
> > +
> > +vga-bridge {
> > + compatible = "ti,ths8135";
> > + #address-cells = <1>;
> > + #size-cells = <0>;
> > +
> > + ports {
> > + #address-cells = <1>;
> > + #size-cells = <0>;
> > +
> > + port at 0 {
> > + #address-cells = <1>;
> > + #size-cells = <0>;
> > + reg = <0>;
> > +
> > + vga_bridge_in: endpoint at 0 {
> > + reg = <0>;
>
> You don't need reg here.
In which case the endpoint node should be named endpoint, not endpoint at 0. You
could then also remove the #address-cells and #size-cells properties from the
port at 0 node. Same for port at 1 below.
> > + remote-endpoint = <&lcdc_out_vga>;
> > + };
> > + };
> > +
> > + port at 1 {
> > + #address-cells = <1>;
> > + #size-cells = <0>;
> > + reg = <1>;
> > +
> > + vga_bridge_out: endpoint at 0 {
> > + reg = <0>;
> > + remote-endpoint = <&vga_con_in>;
> > + };
> > + };
> > + };
> > +};
> > diff --git a/drivers/gpu/drm/bridge/dumb-vga-dac.c
> > b/drivers/gpu/drm/bridge/dumb-vga-dac.c index afec232..498fa75 100644
> > --- a/drivers/gpu/drm/bridge/dumb-vga-dac.c
> > +++ b/drivers/gpu/drm/bridge/dumb-vga-dac.c
> > @@ -204,6 +204,7 @@ static int dumb_vga_remove(struct platform_device
> > *pdev)
> >
> > static const struct of_device_id dumb_vga_match[] = {
> > { .compatible = "dumb-vga-dac" },
> > + { .compatible = "ti,ths8135" },
> > {},
> > };
> > MODULE_DEVICE_TABLE(of, dumb_vga_match);
--
Regards,
Laurent Pinchart
^ permalink raw reply
* [PATCH] ARM: at91/dt: sama5d2: add ssc0 definition
From: Alexandre Belloni @ 2016-12-12 16:47 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <CAJP5LRNHDXQtZDRcQcNuS76uvUndKVjKqWC2KXTM+DOMrro5kg@mail.gmail.com>
On 05/12/2016 at 12:28:34 +0200, Alex Gershgorin wrote :
>
> From 3a5a8e78ccccab2858c5944000884ab3c49eba5a Mon Sep 17 00:00:00 2001
> From: Alex <Alex.Gershgorin@qcore.com>
> Date: Sun, 4 Dec 2016 16:03:56 +0200
> Subject: [PATCH] ARM: at91/dt: sama5d2: add ssc0 definition
>
> The sama5d2 SoC has Synchronous Serial Controller which provides
> synchronous communication link with external devices.
> It's generally used in audio and telecom applications such as
> I2S, Short Frame Sync, Long Frame Sync.
>
> Signed-off-by: Alex Gershgorin <alex.gershgorin@qcore.com>
> ---
> arch/arm/boot/dts/sama5d2.dtsi | 16 ++++++++++++++++
> 1 file changed, 16 insertions(+)
>
Applied, thanks.
--
Alexandre Belloni, Free Electrons
Embedded Linux and Kernel engineering
http://free-electrons.com
^ permalink raw reply
* [PATCH v5 2/4] drm: bridge: add support for TI ths8135
From: Rob Herring @ 2016-12-12 16:45 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <1481107365-24839-3-git-send-email-bgolaszewski@baylibre.com>
On Wed, Dec 07, 2016 at 11:42:43AM +0100, Bartosz Golaszewski wrote:
> THS8135 is a configurable video DAC. Add DT bindings for this chip and
> use the dumb-vga-dac driver for now as no configuration is required to
> make it work.
>
> Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
> ---
> .../bindings/display/bridge/ti,ths8135.txt | 52 ++++++++++++++++++++++
> drivers/gpu/drm/bridge/dumb-vga-dac.c | 1 +
> 2 files changed, 53 insertions(+)
> create mode 100644 Documentation/devicetree/bindings/display/bridge/ti,ths8135.txt
Acked-by: Rob Herring <robh@kernel.org>
But one nit below:
>
> diff --git a/Documentation/devicetree/bindings/display/bridge/ti,ths8135.txt b/Documentation/devicetree/bindings/display/bridge/ti,ths8135.txt
> new file mode 100644
> index 0000000..23cd8ee
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/display/bridge/ti,ths8135.txt
> @@ -0,0 +1,52 @@
> +THS8135 Video DAC
> +-----------------
> +
> +This is the binding for Texas Instruments THS8135 Video DAC bridge.
> +
> +Required properties:
> +
> +- compatible: Must be "ti,ths8135"
> +
> +Required nodes:
> +
> +This device has two video ports. Their connections are modelled using the OF
> +graph bindings specified in Documentation/devicetree/bindings/graph.txt.
> +
> +- Video port 0 for RGB input
> +- Video port 1 for VGA output
> +
> +Example
> +-------
> +
> +vga-bridge {
> + compatible = "ti,ths8135";
> + #address-cells = <1>;
> + #size-cells = <0>;
> +
> + ports {
> + #address-cells = <1>;
> + #size-cells = <0>;
> +
> + port at 0 {
> + #address-cells = <1>;
> + #size-cells = <0>;
> + reg = <0>;
> +
> + vga_bridge_in: endpoint at 0 {
> + reg = <0>;
You don't need reg here.
> + remote-endpoint = <&lcdc_out_vga>;
> + };
> + };
> +
> + port at 1 {
> + #address-cells = <1>;
> + #size-cells = <0>;
> + reg = <1>;
> +
> + vga_bridge_out: endpoint at 0 {
> + reg = <0>;
> + remote-endpoint = <&vga_con_in>;
> + };
> + };
> + };
> +};
> diff --git a/drivers/gpu/drm/bridge/dumb-vga-dac.c b/drivers/gpu/drm/bridge/dumb-vga-dac.c
> index afec232..498fa75 100644
> --- a/drivers/gpu/drm/bridge/dumb-vga-dac.c
> +++ b/drivers/gpu/drm/bridge/dumb-vga-dac.c
> @@ -204,6 +204,7 @@ static int dumb_vga_remove(struct platform_device *pdev)
>
> static const struct of_device_id dumb_vga_match[] = {
> { .compatible = "dumb-vga-dac" },
> + { .compatible = "ti,ths8135" },
> {},
> };
> MODULE_DEVICE_TABLE(of, dumb_vga_match);
> --
> 2.9.3
>
^ permalink raw reply
* [PATCH 2/2] FPGA: Add TS-7300 FPGA manager
From: Alan Tull @ 2016-12-12 16:44 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <39bc0569-81e1-8c35-0280-4ba1824b2710@gmail.com>
On Mon, 12 Dec 2016, Florian Fainelli wrote:
> On 12/12/2016 08:01 AM, Alan Tull wrote:
> > On Sun, 11 Dec 2016, Florian Fainelli wrote:
> >
> >> Add support for loading bitstreams on the Altera Cyclone II FPGA
> >> populated on the TS-7300 board. This is done through the configuration
> >> and data registers offered through a memory interface between the EP93xx
> >> SoC and the FPGA.
> >>
> >> Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
> >
> > Hi Florain,
> >
> > Thanks for submitting!
> >
> > How specific is this to the tx7300 board?
> >
> > I'm unclear about the programming method here. Are these registers
> > exposed by the EP93xx? Is it possible that another cpu could access
> > these two registers to configure the cyclone ii? Is this passive
> > serial?
>
> So here is my understanding, from glancing at the TS-7300 board manual:
>
> - there is an on-board CPLD which does a variety of services and I/O for
> the EP9302 SoC, one of these services is the configuration of the
> on-board FPGA
>
> - the programming interface here is some kind of abstraction around a
> Cyclone II FPGA, and is by no means standard, nor directly exposed to
> the CPU
>
> - unless you go through the CPLD, there is no other way that you could
> configure the FPGA
>
> Does that help answer your questions?
Yes it does. Maybe a brief comment explaining that similar to what
you just said.
> >> +static int ts73xx_fpga_write_init(struct fpga_manager *mgr, u32 flags,
> >> + const char *buf, size_t count)
> >> +{
> >> + struct ts73xx_fpga_priv *priv = mgr->priv;
> >> +
> >> + /* Reset the FPGA */
> >> + writeb(0, priv->io_base + TS73XX_FPGA_CONFIG_REG);
> >> + udelay(30);
> >> + writeb(0x2, priv->io_base + TS73XX_FPGA_CONFIG_REG);
> >> + udelay(80);
> >
> > Could these udelay values be macros?
>
> The bit definitions could be defined, but the delays, why would that be
> useful?
If it is helpful for someone reading the code to know what the delays
are, if some future generation of the board/cpld uses this same
driver. So when this driver is broken for the next generation
board/cpld, people trying to fix this know what the delay is there for
and can have a better chance at adjusting the right delay.
>
> >
> >> +
> >> + return 0;
> >> +}
> >> +
> >> +static inline int ts73xx_fpga_can_write(struct ts73xx_fpga_priv *priv)
> >> +{
> >> + unsigned int timeout = 1000;
> >
> > Another macro?
>
> The delay is just an arbitrary good timeout.
> --
> Florian
>
^ permalink raw reply
* [PATCH v3 11/12] arm64: dts: marvell: add sdhci support for Armada 7K/8K
From: Gregory CLEMENT @ 2016-12-12 16:37 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <20161209200729.GO14217@n2100.armlinux.org.uk>
Hi Russell King,
On ven., d?c. 09 2016, Russell King - ARM Linux <linux@armlinux.org.uk> wrote:
> On Fri, Dec 09, 2016 at 11:30:07AM +0100, Gregory CLEMENT wrote:
>> Also enable it on the Armada 7040 DB board
>>
>> Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
>
> Hi,
>
> Can this also be added to the cp110 on the 7k/8k SoCs as well, or does
> it rely on other unmerged support?
I did not add this one because until now I had not the hardware setup
available to test it.
But at least I can add the ressources in the device tree and now I can
test it partially.
Gregory
>
> Thanks.
>
> --
> RMK's Patch system: http://www.armlinux.org.uk/developer/patches/
> FTTC broadband for 0.8mile line: currently at 9.6Mbps down 400kbps up
> according to speedtest.net.
--
Gregory Clement, Free Electrons
Kernel, drivers, real-time and embedded Linux
development, consulting, training and support.
http://free-electrons.com
^ permalink raw reply
* [PATCH v2 2/2] ASoC: atmel: tse850: rely on the ssc to register as a cpu dai by itself
From: Rob Herring @ 2016-12-12 16:34 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <1481052157-23400-3-git-send-email-peda@axentia.se>
On Tue, Dec 06, 2016 at 08:22:37PM +0100, Peter Rosin wrote:
> This breaks devicetree compatibility, but in this case that is ok. All
> affected units are either on my desk, or running an even older version
> of the driver that is not compatible with the upstreamed version anyway
> (and when these other units are eventually updated, they will get a
> fresh dtb as well, so that is not a significant problem either).
Perfect.
> All of that is of course assuming that noone else has managed to build
> something that can use this driver, but that seems extremely improbable.
>
> Signed-off-by: Peter Rosin <peda@axentia.se>
> ---
> .../bindings/sound/axentia,tse850-pcm5142.txt | 11 ++++++++---
Acked-by: Rob Herring <robh@kernel.org>
> sound/soc/atmel/tse850-pcm5142.c | 23 +++-------------------
> 2 files changed, 11 insertions(+), 23 deletions(-)
^ permalink raw reply
* [PATCH 2/2] FPGA: Add TS-7300 FPGA manager
From: Florian Fainelli @ 2016-12-12 16:27 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <alpine.DEB.2.10.1612120951520.3310@atull-730U3E-740U3E>
On 12/12/2016 08:01 AM, Alan Tull wrote:
> On Sun, 11 Dec 2016, Florian Fainelli wrote:
>
>> Add support for loading bitstreams on the Altera Cyclone II FPGA
>> populated on the TS-7300 board. This is done through the configuration
>> and data registers offered through a memory interface between the EP93xx
>> SoC and the FPGA.
>>
>> Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
>
> Hi Florain,
>
> Thanks for submitting!
>
> How specific is this to the tx7300 board?
>
> I'm unclear about the programming method here. Are these registers
> exposed by the EP93xx? Is it possible that another cpu could access
> these two registers to configure the cyclone ii? Is this passive
> serial?
So here is my understanding, from glancing at the TS-7300 board manual:
- there is an on-board CPLD which does a variety of services and I/O for
the EP9302 SoC, one of these services is the configuration of the
on-board FPGA
- the programming interface here is some kind of abstraction around a
Cyclone II FPGA, and is by no means standard, nor directly exposed to
the CPU
- unless you go through the CPLD, there is no other way that you could
configure the FPGA
Does that help answer your questions?
>
> Please cc linux-fpga at vger.kernel.org for the next version.
>
> Other comments below...
OK, I will fix those,
>
>> ---
>> drivers/fpga/Kconfig | 7 ++
>> drivers/fpga/Makefile | 1 +
>> drivers/fpga/ts73xx-fpga.c | 165 +++++++++++++++++++++++++++++++++++++++++++++
>> 3 files changed, 173 insertions(+)
>> create mode 100644 drivers/fpga/ts73xx-fpga.c
>>
>> diff --git a/drivers/fpga/Kconfig b/drivers/fpga/Kconfig
>> index cd84934774cc..109625707ef0 100644
>> --- a/drivers/fpga/Kconfig
>> +++ b/drivers/fpga/Kconfig
>> @@ -26,6 +26,13 @@ config FPGA_MGR_ZYNQ_FPGA
>> help
>> FPGA manager driver support for Xilinx Zynq FPGAs.
>>
>> +config FPGA_MGR_TS73XX
>> + tristate "Technologic Systems TS-73xx SBC FPGA Manager"
>> + depends on ARCH_EP93XX && MACH_TS72XX
>> + help
>> + FPGA manager driver support for the Altera Cyclone II FPGA
>> + present on the TS-73xx SBC boards.
>> +
>> endif # FPGA
>>
>> endmenu
>> diff --git a/drivers/fpga/Makefile b/drivers/fpga/Makefile
>> index 8d83fc6b1613..5d51265cc1b4 100644
>> --- a/drivers/fpga/Makefile
>> +++ b/drivers/fpga/Makefile
>> @@ -8,3 +8,4 @@ obj-$(CONFIG_FPGA) += fpga-mgr.o
>> # FPGA Manager Drivers
>> obj-$(CONFIG_FPGA_MGR_SOCFPGA) += socfpga.o
>> obj-$(CONFIG_FPGA_MGR_ZYNQ_FPGA) += zynq-fpga.o
>> +obj-$(CONFIG_FPGA_MGR_TS73XX) += ts73xx-fpga.o
>> diff --git a/drivers/fpga/ts73xx-fpga.c b/drivers/fpga/ts73xx-fpga.c
>> new file mode 100644
>> index 000000000000..2b3d5d668dfc
>> --- /dev/null
>> +++ b/drivers/fpga/ts73xx-fpga.c
>> @@ -0,0 +1,165 @@
>> +/*
>> + * Technologic Systems TS-73xx SBC FPGA loader
>> + *
>> + * Copyright (C) 2016 Florian Fainelli <f.fainelli@gmail.com>
>> + *
>> + * FPGA Manager Driver for the on-board Altera Cyclone II FPGA found on
>> + * TS-7300, heavily based on load_fpga.c in their vendor tree.
>> + *
>> + * This program is free software; you can redistribute it and/or modify
>> + * it under the terms of the GNU General Public License as published by
>> + * the Free Software Foundation; version 2 of the License.
>> + *
>> + * This program is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
>> + * GNU General Public License for more details.
>> + */
>> +
>> +#include <linux/delay.h>
>> +#include <linux/io.h>
>> +#include <linux/module.h>
>> +#include <linux/platform_device.h>
>> +#include <linux/string.h>
>> +#include <linux/bitrev.h>
>> +#include <linux/fpga/fpga-mgr.h>
>> +
>> +#define TS73XX_FPGA_DATA_REG 0
>> +#define TS73XX_FPGA_CONFIG_REG 1
>> +
>> +struct ts73xx_fpga_priv {
>> + void __iomem *io_base;
>> + struct device *dev;
>> +};
>> +
>> +static enum fpga_mgr_states ts73xx_fpga_state(struct fpga_manager *mgr)
>> +{
>> + return FPGA_MGR_STATE_UNKNOWN;
>> +}
>> +
>> +static int ts73xx_fpga_write_init(struct fpga_manager *mgr, u32 flags,
>> + const char *buf, size_t count)
>> +{
>> + struct ts73xx_fpga_priv *priv = mgr->priv;
>> +
>> + /* Reset the FPGA */
>> + writeb(0, priv->io_base + TS73XX_FPGA_CONFIG_REG);
>> + udelay(30);
>> + writeb(0x2, priv->io_base + TS73XX_FPGA_CONFIG_REG);
>> + udelay(80);
>
> Could these udelay values be macros?
The bit definitions could be defined, but the delays, why would that be
useful?
>
>> +
>> + return 0;
>> +}
>> +
>> +static inline int ts73xx_fpga_can_write(struct ts73xx_fpga_priv *priv)
>> +{
>> + unsigned int timeout = 1000;
>
> Another macro?
The delay is just an arbitrary good timeout.
--
Florian
^ permalink raw reply
* [PATCH v6 5/5] ARM: configs: stm32: Add I2C support for STM32 defconfig
From: M'boumba Cedric Madianga @ 2016-12-12 16:15 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <1481559342-6106-1-git-send-email-cedric.madianga@gmail.com>
Signed-off-by: M'boumba Cedric Madianga <cedric.madianga@gmail.com>
---
arch/arm/configs/stm32_defconfig | 3 +++
1 file changed, 3 insertions(+)
diff --git a/arch/arm/configs/stm32_defconfig b/arch/arm/configs/stm32_defconfig
index e7b56d4..9494eaf 100644
--- a/arch/arm/configs/stm32_defconfig
+++ b/arch/arm/configs/stm32_defconfig
@@ -52,6 +52,9 @@ CONFIG_SERIAL_NONSTANDARD=y
CONFIG_SERIAL_STM32=y
CONFIG_SERIAL_STM32_CONSOLE=y
# CONFIG_HW_RANDOM is not set
+CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_STM32F4=y
# CONFIG_HWMON is not set
# CONFIG_USB_SUPPORT is not set
CONFIG_NEW_LEDS=y
--
1.9.1
^ permalink raw reply related
* [PATCH v6 4/5] ARM: dts: stm32: Add I2C1 support for STM32429 eval board
From: M'boumba Cedric Madianga @ 2016-12-12 16:15 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <1481559342-6106-1-git-send-email-cedric.madianga@gmail.com>
Signed-off-by: M'boumba Cedric Madianga <cedric.madianga@gmail.com>
---
arch/arm/boot/dts/stm32429i-eval.dts | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/arch/arm/boot/dts/stm32429i-eval.dts b/arch/arm/boot/dts/stm32429i-eval.dts
index afb90bc..74e0045 100644
--- a/arch/arm/boot/dts/stm32429i-eval.dts
+++ b/arch/arm/boot/dts/stm32429i-eval.dts
@@ -141,3 +141,9 @@
pinctrl-names = "default";
status = "okay";
};
+
+&i2c1 {
+ pinctrl-0 = <&i2c1_pins_b>;
+ pinctrl-names = "default";
+ status = "okay";
+};
--
1.9.1
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox