Linux-ARM-Kernel Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] watchdog: bcm2835_wdt: set WDOG_HW_RUNNING bit when appropriate
From: Eric Anholt @ 2016-12-12 17:46 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1481536123-9279-1-git-send-email-rasmus.villemoes@prevas.dk>

Rasmus Villemoes <rasmus.villemoes@prevas.dk> writes:

> A bootloader may start the watchdog device before handing control to
> the kernel - in that case, we should tell the kernel about it so the
> watchdog framework can keep it alive until userspace opens
> /dev/watchdog0.

I don't believe our current bootloaders (the closed firmware or u-boot)
set up the watchdog, but this seems reasonable since they might want to
later.

Acked-by: Eric Anholt <eric@anholt.net>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 832 bytes
Desc: not available
URL: <http://lists.infradead.org/pipermail/linux-arm-kernel/attachments/20161212/673b1918/attachment.sig>

^ permalink raw reply

* [PATCH] crypto: arm64/aes: reimplement bit-sliced ARM/NEON implementation for arm64
From: Ard Biesheuvel @ 2016-12-12 17:45 UTC (permalink / raw)
  To: linux-arm-kernel

This is a reimplementation of the NEON version of the bit-sliced AES
algorithm. This code is heavily based on Andy Polyakov's OpenSSL version
for ARM, which is also available in the kernel. This is an alternative for
the existing NEON implementation for arm64 authored by me, which suffers
from poor performance due to its reliance on the pathologically slow four
register variant of the tbl/tbx NEON instruction.

This version is about ~30% (*) faster than the generic C code, but only in
cases where the input can be 8x interleaved (this is a fundamental property
of bit slicing). For this reason, only the chaining modes ECB, XTS and CTR
are implemented. (The significance of ECB is that it could potentially be
used by other chaining modes)

* Measured on Cortex-A57. Note that this is still an order of magnitude
  slower than the implementations that use the dedicated AES instructions
  introduced in ARMv8, but those are part of an optional extension, and so
  it is good to have a fallback.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
 arch/arm64/crypto/Kconfig           |   6 +
 arch/arm64/crypto/Makefile          |   3 +
 arch/arm64/crypto/aes-neonbs-core.S | 905 ++++++++++++++++++++++++++++++++++++
 arch/arm64/crypto/aes-neonbs-glue.c | 300 ++++++++++++
 4 files changed, 1214 insertions(+)
 create mode 100644 arch/arm64/crypto/aes-neonbs-core.S
 create mode 100644 arch/arm64/crypto/aes-neonbs-glue.c

diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
index 450a85df041a..cd0e7a6146b7 100644
--- a/arch/arm64/crypto/Kconfig
+++ b/arch/arm64/crypto/Kconfig
@@ -72,4 +72,10 @@ config CRYPTO_CRC32_ARM64
 	depends on ARM64
 	select CRYPTO_HASH
 
+config CRYPTO_AES_NEON_BS
+	tristate "AES in ECB/CBC/CTR/XTS modes using bit-sliced NEON algorithm"
+	depends on KERNEL_MODE_NEON
+	select CRYPTO_BLKCIPHER
+	select CRYPTO_AES
+
 endif
diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile
index aa8888d7b744..11d20714ec48 100644
--- a/arch/arm64/crypto/Makefile
+++ b/arch/arm64/crypto/Makefile
@@ -41,6 +41,9 @@ sha256-arm64-y := sha256-glue.o sha256-core.o
 obj-$(CONFIG_CRYPTO_SHA512_ARM64) += sha512-arm64.o
 sha512-arm64-y := sha512-glue.o sha512-core.o
 
+obj-$(CONFIG_CRYPTO_AES_NEON_BS) += aes-neon-bs.o
+aes-neon-bs-y := aes-neonbs-core.o aes-neonbs-glue.o
+
 AFLAGS_aes-ce.o		:= -DINTERLEAVE=4
 AFLAGS_aes-neon.o	:= -DINTERLEAVE=4
 
diff --git a/arch/arm64/crypto/aes-neonbs-core.S b/arch/arm64/crypto/aes-neonbs-core.S
new file mode 100644
index 000000000000..d027c276cc75
--- /dev/null
+++ b/arch/arm64/crypto/aes-neonbs-core.S
@@ -0,0 +1,905 @@
+/*
+ * Bit sliced AES using NEON instructions
+ *
+ * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/*
+ * The algorithm implemented here is described in detail by the paper
+ * 'Faster and Timing-Attack Resistant AES-GCM' by Emilia Kaesper and
+ * Peter Schwabe (https://eprint.iacr.org/2009/129.pdf)
+ *
+ * This implementation is based primarily on the OpenSSL implementation
+ * for 32-bit ARM written by Andy Polyakov <appro@openssl.org>
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+	.text
+
+	rounds		.req	x11
+	bskey		.req	x12
+
+	.macro		in_bs_ch, b0, b1, b2, b3, b4, b5, b6, b7
+	eor		\b2, \b2, \b1
+	eor		\b5, \b5, \b6
+	eor		\b3, \b3, \b0
+	eor		\b6, \b6, \b2
+	eor		\b5, \b5, \b0
+	eor		\b6, \b6, \b3
+	eor		\b3, \b3, \b7
+	eor		\b7, \b7, \b5
+	eor		\b3, \b3, \b4
+	eor		\b4, \b4, \b5
+	eor		\b2, \b2, \b7
+	eor		\b3, \b3, \b1
+	eor		\b1, \b1, \b5
+	.endm
+
+	.macro		out_bs_ch, b0, b1, b2, b3, b4, b5, b6, b7
+	eor		\b0, \b0, \b6
+	eor		\b1, \b1, \b4
+	eor		\b4, \b4, \b6
+	eor		\b2, \b2, \b0
+	eor		\b6, \b6, \b1
+	eor		\b1, \b1, \b5
+	eor		\b5, \b5, \b3
+	eor		\b3, \b3, \b7
+	eor		\b7, \b7, \b5
+	eor		\b2, \b2, \b5
+	eor		\b4, \b4, \b7
+	.endm
+
+	.macro		inv_in_bs_ch, b6, b1, b2, b4, b7, b0, b3, b5
+	eor		\b1, \b1, \b7
+	eor		\b4, \b4, \b7
+	eor		\b7, \b7, \b5
+	eor		\b1, \b1, \b3
+	eor		\b2, \b2, \b5
+	eor		\b3, \b3, \b7
+	eor		\b6, \b6, \b1
+	eor		\b2, \b2, \b0
+	eor		\b5, \b5, \b3
+	eor		\b4, \b4, \b6
+	eor		\b0, \b0, \b6
+	eor		\b1, \b1, \b4
+	.endm
+
+	.macro		inv_out_bs_ch, b6, b5, b0, b3, b7, b1, b4, b2
+	eor		\b1, \b1, \b5
+	eor		\b2, \b2, \b7
+	eor		\b3, \b3, \b1
+	eor		\b4, \b4, \b5
+	eor		\b7, \b7, \b5
+	eor		\b3, \b3, \b4
+	eor 		\b5, \b5, \b0
+	eor		\b3, \b3, \b7
+	eor		\b6, \b6, \b2
+	eor		\b2, \b2, \b1
+	eor		\b6, \b6, \b3
+	eor		\b3, \b3, \b0
+	eor		\b5, \b5, \b6
+	.endm
+
+	.macro		mul_gf4, x0, x1, y0, y1, t0, t1
+	eor 		\t0, \y0, \y1
+	and		\t0, \t0, \x0
+	eor		\x0, \x0, \x1
+	and		\t1, \x1, \y0
+	and		\x0, \x0, \y1
+	eor		\x1, \t1, \t0
+	eor		\x0, \x0, \t1
+	.endm
+
+	.macro		mul_gf4_n, x0, x1, y0, y1, t0
+	eor		\t0, \y0, \y1
+	and		\t0, \t0, \x0
+	eor		\x0, \x0, \x1
+	and		\x1, \x1, \y0
+	and		\x0, \x0, \y1
+	eor		\x1, \x1, \x0
+	eor		\x0, \x0, \t0
+	.endm
+
+	.macro		mul_gf4_n_gf4, x0, x1, y0, y1, t0, x2, x3, y2, y3, t1
+	eor		\t0, \y0, \y1
+	eor 		\t1, \y2, \y3
+	and		\t0, \t0, \x0
+	and		\t1, \t1, \x2
+	eor		\x0, \x0, \x1
+	eor		\x2, \x2, \x3
+	and		\x1, \x1, \y0
+	and		\x3, \x3, \y2
+	and		\x0, \x0, \y1
+	and		\x2, \x2, \y3
+	eor		\x1, \x1, \x0
+	eor		\x2, \x2, \x3
+	eor		\x0, \x0, \t0
+	eor		\x3, \x3, \t1
+	.endm
+
+	.macro		mul_gf16_2, x0, x1, x2, x3, x4, x5, x6, x7, \
+				    y0, y1, y2, y3, t0, t1, t2, t3
+	eor		\t0, \x0, \x2
+	eor		\t1, \x1, \x3
+	mul_gf4  	\x0, \x1, \y0, \y1, \t2, \t3
+	eor		\y0, \y0, \y2
+	eor		\y1, \y1, \y3
+	mul_gf4_n_gf4	\t0, \t1, \y0, \y1, \t3, \x2, \x3, \y2, \y3, \t2
+	eor		\x0, \x0, \t0
+	eor		\x2, \x2, \t0
+	eor		\x1, \x1, \t1
+	eor		\x3, \x3, \t1
+	eor		\t0, \x4, \x6
+	eor		\t1, \x5, \x7
+	mul_gf4_n_gf4	\t0, \t1, \y0, \y1, \t3, \x6, \x7, \y2, \y3, \t2
+	eor		\y0, \y0, \y2
+	eor		\y1, \y1, \y3
+	mul_gf4  	\x4, \x5, \y0, \y1, \t2, \t3
+	eor		\x4, \x4, \t0
+	eor		\x6, \x6, \t0
+	eor		\x5, \x5, \t1
+	eor		\x7, \x7, \t1
+	.endm
+
+	.macro		inv_gf256, x0, x1, x2, x3, x4, x5, x6, x7, \
+				   t0, t1, t2, t3, s0, s1, s2, s3
+	eor		\t3, \x4, \x6
+	eor		\t2, \x5, \x7
+	eor		\t1, \x1, \x3
+	eor		\s1, \x7, \x6
+	mov		\t0, \t2
+	eor		\s0, \x0, \x2
+	orr		\t2, \t2, \t1
+	eor		\s3, \t3, \t0
+	and		\s2, \t3, \s0
+	orr		\t3, \t3, \s0
+	eor		\s0, \s0, \t1
+	and		\t0, \t0, \t1
+	eor		\t1, \x3, \x2
+	and		\s3, \s3, \s0
+	and		\s1, \s1, \t1
+	eor		\t1, \x4, \x5
+	eor		\s0, \x1, \x0
+	eor		\t3, \t3, \s1
+	eor		\t2, \t2, \s1
+	and		\s1, \t1, \s0
+	orr		\t1, \t1, \s0
+	eor		\t3, \t3, \s3
+	eor		\t0, \t0, \s1
+	eor		\t2, \t2, \s2
+	eor		\t1, \t1, \s3
+	eor		\t0, \t0, \s2
+	and		\s0, \x7, \x3
+	eor		\t1, \t1, \s2
+	and		\s1, \x6, \x2
+	and		\s2, \x5, \x1
+	orr		\s3, \x4, \x0
+	eor		\t3, \t3, \s0
+	eor		\t1, \t1, \s2
+	eor		\t0, \t0, \s3
+	eor		\t2, \t2, \s1
+	and		\s2, \t3, \t1
+	mov		\s0, \t0
+	eor		\s1, \t2, \s2
+	eor		\s3, \t0, \s2
+	eor		\s2, \t0, \s2
+	bsl		\s1, \t1, \t0
+	bsl		\s3, \t3, \t2
+	eor		\t3, \t3, \t2
+	bsl		\s0, \s1, \s2
+	bsl		\t0, \s2, \s1
+	and		\s2, \s0, \s3
+	eor		\t1, \t1, \t0
+	eor		\s2, \s2, \t3
+	mul_gf16_2	\x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \
+			\s3, \s2, \s1, \t1, \s0, \t0, \t2, \t3
+	.endm
+
+	.macro		sbox, b0, b1, b2, b3, b4, b5, b6, b7, \
+			      t0, t1, t2, t3, s0, s1, s2, s3
+	in_bs_ch	\b0\().16b, \b1\().16b, \b2\().16b, \b3\().16b, \
+			\b4\().16b, \b5\().16b, \b6\().16b, \b7\().16b
+	inv_gf256	\b6\().16b, \b5\().16b, \b0\().16b, \b3\().16b, \
+			\b7\().16b, \b1\().16b, \b4\().16b, \b2\().16b, \
+			\t0\().16b, \t1\().16b, \t2\().16b, \t3\().16b, \
+			\s0\().16b, \s1\().16b, \s2\().16b, \s3\().16b
+	out_bs_ch	\b7\().16b, \b1\().16b, \b4\().16b, \b2\().16b, \
+			\b6\().16b, \b5\().16b, \b0\().16b, \b3\().16b
+	.endm
+
+	.macro		inv_sbox, b0, b1, b2, b3, b4, b5, b6, b7, \
+				  t0, t1, t2, t3, s0, s1, s2, s3
+	inv_in_bs_ch	\b0\().16b, \b1\().16b, \b2\().16b, \b3\().16b, \
+			\b4\().16b, \b5\().16b, \b6\().16b, \b7\().16b
+	inv_gf256	\b5\().16b, \b1\().16b, \b2\().16b, \b6\().16b, \
+			\b3\().16b, \b7\().16b, \b0\().16b, \b4\().16b, \
+			\t0\().16b, \t1\().16b, \t2\().16b, \t3\().16b, \
+			\s0\().16b, \s1\().16b, \s2\().16b, \s3\().16b
+	inv_out_bs_ch	\b3\().16b, \b7\().16b, \b0\().16b, \b4\().16b, \
+			\b5\().16b, \b1\().16b, \b2\().16b, \b6\().16b
+	.endm
+
+	.macro		enc_next_rk
+	ldp		q16, q17, [bskey], #32
+	ldp		q18, q19, [bskey], #32
+	ldp		q20, q21, [bskey], #32
+	ldp		q22, q23, [bskey], #32
+	.endm
+
+	.macro		dec_next_rk
+	ldp		q16, q17, [bskey, #-128]!
+	ldp		q18, q19, [bskey, #32]
+	ldp		q20, q21, [bskey, #64]
+	ldp		q22, q23, [bskey, #96]
+	.endm
+
+	.macro		add_round_key, x0, x1, x2, x3, x4, x5, x6, x7
+	eor		\x0\().16b, \x0\().16b, v16.16b
+	eor		\x1\().16b, \x1\().16b, v17.16b
+	eor		\x2\().16b, \x2\().16b, v18.16b
+	eor		\x3\().16b, \x3\().16b, v19.16b
+	eor		\x4\().16b, \x4\().16b, v20.16b
+	eor		\x5\().16b, \x5\().16b, v21.16b
+	eor		\x6\().16b, \x6\().16b, v22.16b
+	eor		\x7\().16b, \x7\().16b, v23.16b
+	.endm
+
+	.macro		shift_rows, x0, x1, x2, x3, x4, x5, x6, x7, mask
+	tbl		\x0\().16b, {\x0\().16b}, \mask\().16b
+	tbl		\x1\().16b, {\x1\().16b}, \mask\().16b
+	tbl		\x2\().16b, {\x2\().16b}, \mask\().16b
+	tbl		\x3\().16b, {\x3\().16b}, \mask\().16b
+	tbl		\x4\().16b, {\x4\().16b}, \mask\().16b
+	tbl		\x5\().16b, {\x5\().16b}, \mask\().16b
+	tbl		\x6\().16b, {\x6\().16b}, \mask\().16b
+	tbl		\x7\().16b, {\x7\().16b}, \mask\().16b
+	.endm
+
+	.macro		mix_cols, x0, x1, x2, x3, x4, x5, x6, x7, \
+				  t0, t1, t2, t3, t4, t5, t6, t7, inv
+	ext		\t0\().16b, \x0\().16b, \x0\().16b, #12
+	ext		\t1\().16b, \x1\().16b, \x1\().16b, #12
+	eor		\x0\().16b, \x0\().16b, \t0\().16b
+	ext		\t2\().16b, \x2\().16b, \x2\().16b, #12
+	eor		\x1\().16b, \x1\().16b, \t1\().16b
+	ext		\t3\().16b, \x3\().16b, \x3\().16b, #12
+	eor		\x2\().16b, \x2\().16b, \t2\().16b
+	ext		\t4\().16b, \x4\().16b, \x4\().16b, #12
+	eor		\x3\().16b, \x3\().16b, \t3\().16b
+	ext		\t5\().16b, \x5\().16b, \x5\().16b, #12
+	eor		\x4\().16b, \x4\().16b, \t4\().16b
+	ext		\t6\().16b, \x6\().16b, \x6\().16b, #12
+	eor		\x5\().16b, \x5\().16b, \t5\().16b
+	ext		\t7\().16b, \x7\().16b, \x7\().16b, #12
+	eor		\x6\().16b, \x6\().16b, \t6\().16b
+	eor		\t1\().16b, \t1\().16b, \x0\().16b
+	eor		\x7\().16b, \x7\().16b, \t7\().16b
+	ext		\x0\().16b, \x0\().16b, \x0\().16b, #8
+	eor		\t2\().16b, \t2\().16b, \x1\().16b
+	eor		\t0\().16b, \t0\().16b, \x7\().16b
+	eor		\t1\().16b, \t1\().16b, \x7\().16b
+	ext		\x1\().16b, \x1\().16b, \x1\().16b, #8
+	eor		\t5\().16b, \t5\().16b, \x4\().16b
+	eor		\x0\().16b, \x0\().16b, \t0\().16b
+	eor		\t6\().16b, \t6\().16b, \x5\().16b
+	eor		\x1\().16b, \x1\().16b, \t1\().16b
+	ext		\t0\().16b, \x4\().16b, \x4\().16b, #8
+	eor		\t4\().16b, \t4\().16b, \x3\().16b
+	ext		\t1\().16b, \x5\().16b, \x5\().16b, #8
+	eor		\t7\().16b, \t7\().16b, \x6\().16b
+	ext		\x4\().16b, \x3\().16b, \x3\().16b, #8
+	eor		\t3\().16b, \t3\().16b, \x2\().16b
+	ext		\x5\().16b, \x7\().16b, \x7\().16b, #8
+	eor		\t4\().16b, \t4\().16b, \x7\().16b
+	ext		\x3\().16b, \x6\().16b, \x6\().16b, #8
+	eor		\t3\().16b, \t3\().16b, \x7\().16b
+	ext		\x6\().16b, \x2\().16b, \x2\().16b, #8
+	eor		\x7\().16b, \t1\().16b, \t5\().16b
+	.ifb		\inv
+	eor		\x2\().16b, \t0\().16b, \t4\().16b
+	eor		\x4\().16b, \x4\().16b, \t3\().16b
+	eor		\x5\().16b, \x5\().16b, \t7\().16b
+	eor		\x3\().16b, \x3\().16b, \t6\().16b
+	eor		\x6\().16b, \x6\().16b, \t2\().16b
+	.else
+	eor		\t3\().16b, \t3\().16b, \x4\().16b
+	eor		\x5\().16b, \x5\().16b, \t7\().16b
+	eor		\x2\().16b, \x3\().16b, \t6\().16b
+	eor		\x3\().16b, \t0\().16b, \t4\().16b
+	eor		\x4\().16b, \x6\().16b, \t2\().16b
+	mov		\x6\().16b, \t3\().16b
+	.endif
+	.endm
+
+	.macro		inv_mix_cols, x0, x1, x2, x3, x4, x5, x6, x7, \
+				      t0, t1, t2, t3, t4, t5, t6, t7
+	ext		\t0\().16b, \x0\().16b, \x0\().16b, #8
+	ext		\t6\().16b, \x6\().16b, \x6\().16b, #8
+	ext		\t7\().16b, \x7\().16b, \x7\().16b, #8
+	eor		\t0\().16b, \t0\().16b, \x0\().16b
+	ext		\t1\().16b, \x1\().16b, \x1\().16b, #8
+	eor		\t6\().16b, \t6\().16b, \x6\().16b
+	ext		\t2\().16b, \x2\().16b, \x2\().16b, #8
+	eor		\t7\().16b, \t7\().16b, \x7\().16b
+	ext		\t3\().16b, \x3\().16b, \x3\().16b, #8
+	eor		\t1\().16b, \t1\().16b, \x1\().16b
+	ext		\t4\().16b, \x4\().16b, \x4\().16b, #8
+	eor		\t2\().16b, \t2\().16b, \x2\().16b
+	ext		\t5\().16b, \x5\().16b, \x5\().16b, #8
+	eor		\t3\().16b, \t3\().16b, \x3\().16b
+	eor		\t4\().16b, \t4\().16b, \x4\().16b
+	eor		\t5\().16b, \t5\().16b, \x5\().16b
+	eor		\x0\().16b, \x0\().16b, \t6\().16b
+	eor		\x1\().16b, \x1\().16b, \t6\().16b
+	eor		\x2\().16b, \x2\().16b, \t0\().16b
+	eor		\x4\().16b, \x4\().16b, \t2\().16b
+	eor		\x3\().16b, \x3\().16b, \t1\().16b
+	eor		\x1\().16b, \x1\().16b, \t7\().16b
+	eor		\x2\().16b, \x2\().16b, \t7\().16b
+	eor		\x4\().16b, \x4\().16b, \t6\().16b
+	eor		\x5\().16b, \x5\().16b, \t3\().16b
+	eor		\x3\().16b, \x3\().16b, \t6\().16b
+	eor		\x6\().16b, \x6\().16b, \t4\().16b
+	eor		\x4\().16b, \x4\().16b, \t7\().16b
+	eor		\x5\().16b, \x5\().16b, \t7\().16b
+	eor		\x7\().16b, \x7\().16b, \t5\().16b
+	mix_cols	\x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \
+			\t0, \t1, \t2, \t3, \t4, \t5, \t6, \t7, 1
+	.endm
+
+	.macro		swapmove_2x, a0, b0, a1, b1, n, mask, t0, t1
+	ushr		\t0\().2d, \b0\().2d, #\n
+	ushr		\t1\().2d, \b1\().2d, #\n
+	eor		\t0\().16b, \t0\().16b, \a0\().16b
+	eor		\t1\().16b, \t1\().16b, \a1\().16b
+	and		\t0\().16b, \t0\().16b, \mask\().16b
+	and		\t1\().16b, \t1\().16b, \mask\().16b
+	eor		\a0\().16b, \a0\().16b, \t0\().16b
+	shl		\t0\().2d, \t0\().2d, #\n
+	eor		\a1\().16b, \a1\().16b, \t1\().16b
+	shl		\t1\().2d, \t1\().2d, #\n
+	eor		\b0\().16b, \b0\().16b, \t0\().16b
+	eor		\b1\().16b, \b1\().16b, \t1\().16b
+	.endm
+
+	.macro		bitslice, x7, x6, x5, x4, x3, x2, x1, x0, t0, t1, t2, t3
+	movi		\t0\().16b, #0x55
+	movi		\t1\().16b, #0x33
+	swapmove_2x	\x0, \x1, \x2, \x3, 1, \t0, \t2, \t3
+	swapmove_2x	\x4, \x5, \x6, \x7, 1, \t0, \t2, \t3
+	movi		\t0\().16b, #0x0f
+	swapmove_2x	\x0, \x2, \x1, \x3, 2, \t1, \t2, \t3
+	swapmove_2x	\x4, \x6, \x5, \x7, 2, \t1, \t2, \t3
+	swapmove_2x	\x0, \x4, \x1, \x5, 4, \t0, \t2, \t3
+	swapmove_2x	\x2, \x6, \x3, \x7, 4, \t0, \t2, \t3
+	.endm
+
+
+	.align		6
+M0:	.octa		0x0004080c0105090d02060a0e03070b0f
+
+M0SR:	.octa		0x0004080c05090d010a0e02060f03070b
+SR:	.octa		0x0f0e0d0c0a09080b0504070600030201
+SRM0:	.octa		0x01060b0c0207080d0304090e00050a0f
+
+M0ISR:	.octa		0x0004080c0d0105090a0e0206070b0f03
+ISR:	.octa		0x0f0e0d0c080b0a090504070602010003
+ISRM0:	.octa		0x0306090c00070a0d01040b0e0205080f
+
+	/*
+	 * void aesbs_convert_key(u8 out[], u32 const rk[], int rounds)
+	 */
+ENTRY(aesbs_convert_key)
+	ld1		{v7.4s}, [x1], #16		// load round 0 key
+	ld1		{v17.4s}, [x1], #16		// load round 1 key
+
+	movi		v8.16b,  #0x01			// bit masks
+	movi		v9.16b,  #0x02
+	movi		v10.16b, #0x04
+	movi		v11.16b, #0x08
+	movi		v12.16b, #0x10
+	movi		v13.16b, #0x20
+	movi		v14.16b, #0x40
+	movi		v15.16b, #0x80
+	ldr		q16, M0
+
+	sub		x2, x2, #1
+	str		q7, [x0], #16		// save round 0 key
+
+.Lkey_loop:
+	tbl		v7.16b ,{v17.16b}, v16.16b
+	ld1		{v17.4s}, [x1], #16		// load next round key
+
+	cmtst		v0.16b, v7.16b, v8.16b
+	cmtst		v1.16b, v7.16b, v9.16b
+	cmtst		v2.16b, v7.16b, v10.16b
+	cmtst		v3.16b, v7.16b, v11.16b
+	cmtst		v4.16b, v7.16b, v12.16b
+	cmtst		v5.16b, v7.16b, v13.16b
+	cmtst		v6.16b, v7.16b, v14.16b
+	cmtst		v7.16b, v7.16b, v15.16b
+	not		v0.16b, v0.16b
+	not		v1.16b, v1.16b
+	not		v5.16b, v5.16b
+	not		v6.16b, v6.16b
+
+	subs		x2, x2, #1
+	stp		q2, q3, [x0, #32]
+	stp		q4, q5, [x0, #64]
+	stp		q6, q7, [x0, #96]
+	stp		q0, q1, [x0], #128
+	b.ne		.Lkey_loop
+
+	movi		v7.16b, #0x63			// compose .L63
+	eor		v17.16b, v17.16b, v7.16b
+	str		q17, [x0]
+	ret
+ENDPROC(aesbs_convert_key)
+
+	.align		4
+aesbs_encrypt8:
+	ldr		q9, [bskey], #16		// round 0 key
+	ldr		q8, M0SR
+	ldr		q24, SR
+
+	eor		v10.16b, v0.16b, v9.16b		// xor with round0 key
+	eor		v11.16b, v1.16b, v9.16b
+	tbl		v0.16b, {v10.16b}, v8.16b
+	eor		v12.16b, v2.16b, v9.16b
+	tbl		v1.16b, {v11.16b}, v8.16b
+	eor		v13.16b, v3.16b, v9.16b
+	tbl		v2.16b, {v12.16b}, v8.16b
+	eor		v14.16b, v4.16b, v9.16b
+	tbl		v3.16b, {v13.16b}, v8.16b
+	eor		v15.16b, v5.16b, v9.16b
+	tbl		v4.16b, {v14.16b}, v8.16b
+	eor		v10.16b, v6.16b, v9.16b
+	tbl		v5.16b, {v15.16b}, v8.16b
+	eor		v11.16b, v7.16b, v9.16b
+	tbl		v6.16b, {v10.16b}, v8.16b
+	tbl		v7.16b, {v11.16b}, v8.16b
+
+	bitslice	v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11
+
+	sub		rounds, rounds, #1
+	b		.Lenc_sbox
+
+.Lenc_loop:
+	shift_rows	v0, v1, v2, v3, v4, v5, v6, v7, v24
+.Lenc_sbox:
+	sbox		v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, \
+								v13, v14, v15
+	subs		rounds, rounds, #1
+	b.cc		.Lenc_done
+
+	enc_next_rk
+
+	mix_cols	v0, v1, v4, v6, v3, v7, v2, v5, v8, v9, v10, v11, v12, \
+								v13, v14, v15
+
+	add_round_key	v0, v1, v2, v3, v4, v5, v6, v7
+
+	b.ne		.Lenc_loop
+	ldr		q24, SRM0
+	b		.Lenc_loop
+
+.Lenc_done:
+	ldr		q12, [bskey]			// last round key
+
+	bitslice	v0, v1, v4, v6, v3, v7, v2, v5, v8, v9, v10, v11
+
+	eor		v0.16b, v0.16b, v12.16b
+	eor		v1.16b, v1.16b, v12.16b
+	eor		v4.16b, v4.16b, v12.16b
+	eor		v6.16b, v6.16b, v12.16b
+	eor		v3.16b, v3.16b, v12.16b
+	eor		v7.16b, v7.16b, v12.16b
+	eor		v2.16b, v2.16b, v12.16b
+	eor		v5.16b, v5.16b, v12.16b
+	ret
+ENDPROC(aesbs_encrypt8)
+
+	.align		4
+aesbs_decrypt8:
+	lsl		x9, rounds, #7
+	add		bskey, bskey, x9
+
+	ldr		q9, [bskey, #-112]!		// round 0 key
+	ldr		q8, M0ISR
+	ldr		q24, ISR
+
+	eor		v10.16b, v0.16b, v9.16b		// xor with round0 key
+	eor		v11.16b, v1.16b, v9.16b
+	tbl		v0.16b, {v10.16b}, v8.16b
+	eor		v12.16b, v2.16b, v9.16b
+	tbl		v1.16b, {v11.16b}, v8.16b
+	eor		v13.16b, v3.16b, v9.16b
+	tbl		v2.16b, {v12.16b}, v8.16b
+	eor		v14.16b, v4.16b, v9.16b
+	tbl		v3.16b, {v13.16b}, v8.16b
+	eor		v15.16b, v5.16b, v9.16b
+	tbl		v4.16b, {v14.16b}, v8.16b
+	eor		v10.16b, v6.16b, v9.16b
+	tbl		v5.16b, {v15.16b}, v8.16b
+	eor		v11.16b, v7.16b, v9.16b
+	tbl		v6.16b, {v10.16b}, v8.16b
+	tbl		v7.16b, {v11.16b}, v8.16b
+
+	bitslice	v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11
+
+	sub		rounds, rounds, #1
+	b		.Ldec_sbox
+
+.Ldec_loop:
+	shift_rows	v0, v1, v2, v3, v4, v5, v6, v7, v24
+.Ldec_sbox:
+	inv_sbox	v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, \
+								v13, v14, v15
+	subs		rounds, rounds, #1
+	b.cc		.Ldec_done
+
+	dec_next_rk
+
+	add_round_key	v0, v1, v6, v4, v2, v7, v3, v5
+
+	inv_mix_cols	v0, v1, v6, v4, v2, v7, v3, v5, v8, v9, v10, v11, v12, \
+								v13, v14, v15
+
+	b.ne		.Ldec_loop
+	ldr		q24, ISRM0
+	b		.Ldec_loop
+.Ldec_done:
+	ldr		q12, [bskey, #-16]		// last round key
+
+	bitslice	v0, v1, v6, v4, v2, v7, v3, v5, v8, v9, v10, v11
+
+	eor		v0.16b, v0.16b, v12.16b
+	eor		v1.16b, v1.16b, v12.16b
+	eor		v6.16b, v6.16b, v12.16b
+	eor		v4.16b, v4.16b, v12.16b
+	eor		v2.16b, v2.16b, v12.16b
+	eor		v7.16b, v7.16b, v12.16b
+	eor		v3.16b, v3.16b, v12.16b
+	eor		v5.16b, v5.16b, v12.16b
+	ret
+ENDPROC(aesbs_decrypt8)
+
+	/*
+	 * aesbs_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
+	 *		     int blocks)
+	 * aesbs_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
+	 *		     int blocks)
+	 */
+	.macro		__ecb_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7
+	stp		x29, x30, [sp, #-16]!
+	mov		x29, sp
+
+99:	mov		x5, #1
+	lsl		x5, x5, x4
+	subs		w4, w4, #8
+	csel		x4, x4, xzr, pl
+	csel		x5, x5, xzr, mi
+
+	ld1		{v0.16b}, [x1], #16
+	tbnz		x5, #1, 0f
+	ld1		{v1.16b}, [x1], #16
+	tbnz		x5, #2, 0f
+	ld1		{v2.16b}, [x1], #16
+	tbnz		x5, #3, 0f
+	ld1		{v3.16b}, [x1], #16
+	tbnz		x5, #4, 0f
+	ld1		{v4.16b}, [x1], #16
+	tbnz		x5, #5, 0f
+	ld1		{v5.16b}, [x1], #16
+	tbnz		x5, #6, 0f
+	ld1		{v6.16b}, [x1], #16
+	tbnz		x5, #7, 0f
+	ld1		{v7.16b}, [x1], #16
+
+0:	mov		bskey, x2
+	mov		rounds, x3
+	bl		\do8
+
+	st1		{\o0\().16b}, [x0], #16
+	tbnz		x5, #1, 1f
+	st1		{\o1\().16b}, [x0], #16
+	tbnz		x5, #2, 1f
+	st1		{\o2\().16b}, [x0], #16
+	tbnz		x5, #3, 1f
+	st1		{\o3\().16b}, [x0], #16
+	tbnz		x5, #4, 1f
+	st1		{\o4\().16b}, [x0], #16
+	tbnz		x5, #5, 1f
+	st1		{\o5\().16b}, [x0], #16
+	tbnz		x5, #6, 1f
+	st1		{\o6\().16b}, [x0], #16
+	tbnz		x5, #7, 1f
+	st1		{\o7\().16b}, [x0], #16
+
+	cbnz		x4, 99b
+
+1:	ldp		x29, x30, [sp], #16
+	ret
+	.endm
+
+	.align		4
+ENTRY(aesbs_ecb_encrypt)
+	__ecb_crypt	aesbs_encrypt8, v0, v1, v4, v6, v3, v7, v2, v5
+ENDPROC(aesbs_ecb_encrypt)
+
+	.align		4
+ENTRY(aesbs_ecb_decrypt)
+	__ecb_crypt	aesbs_decrypt8, v0, v1, v6, v4, v2, v7, v3, v5
+ENDPROC(aesbs_ecb_decrypt)
+
+	.macro		next_tweak, out, in, const, tmp
+	sshr		\tmp\().2d,  \in\().2d,   #63
+	and		\tmp\().16b, \tmp\().16b, \const\().16b
+	add		\out\().2d,  \in\().2d,   \in\().2d
+	ext		\tmp\().16b, \tmp\().16b, \tmp\().16b, #8
+	eor		\out\().16b, \out\().16b, \tmp\().16b
+	.endm
+
+	.align		4
+.Lxts_mul_x:
+CPU_LE(	.quad		1, 0x87		)
+CPU_BE(	.quad		0x87, 1		)
+
+	/*
+	 * aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
+	 *		     int blocks, u8 iv[])
+	 * aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
+	 *		     int blocks, u8 iv[])
+	 */
+__xts_crypt8:
+	mov		x6, #1
+	lsl		x6, x6, x4
+	subs		w4, w4, #8
+	csel		x4, x4, xzr, pl
+	csel		x6, x6, xzr, mi
+
+	ld1		{v0.16b}, [x1], #16
+	next_tweak	v26, v25, v30, v31
+	eor		v0.16b, v0.16b, v25.16b
+	tbnz		x6, #1, 0f
+
+	ld1		{v1.16b}, [x1], #16
+	next_tweak	v27, v26, v30, v31
+	eor		v1.16b, v1.16b, v26.16b
+	tbnz		x6, #2, 0f
+
+	ld1		{v2.16b}, [x1], #16
+	next_tweak	v28, v27, v30, v31
+	eor		v2.16b, v2.16b, v27.16b
+	tbnz		x6, #3, 0f
+
+	ld1		{v3.16b}, [x1], #16
+	next_tweak	v29, v28, v30, v31
+	eor		v3.16b, v3.16b, v28.16b
+	tbnz		x6, #4, 0f
+
+	ld1		{v4.16b}, [x1], #16
+	str		q29, [sp, #16]
+	eor		v4.16b, v4.16b, v29.16b
+	next_tweak	v29, v29, v30, v31
+	tbnz		x6, #5, 0f
+
+	ld1		{v5.16b}, [x1], #16
+	str		q29, [sp, #32]
+	eor		v5.16b, v5.16b, v29.16b
+	next_tweak	v29, v29, v30, v31
+	tbnz		x6, #6, 0f
+
+	ld1		{v6.16b}, [x1], #16
+	str		q29, [sp, #48]
+	eor		v6.16b, v6.16b, v29.16b
+	next_tweak	v29, v29, v30, v31
+	tbnz		x6, #7, 0f
+
+	ld1		{v7.16b}, [x1], #16
+	str		q29, [sp, #64]
+	eor		v7.16b, v7.16b, v29.16b
+	next_tweak	v29, v29, v30, v31
+
+0:	mov		bskey, x2
+	mov		rounds, x3
+	br		x7
+ENDPROC(__xts_crypt8)
+
+	.macro		__xts_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7
+	stp		x29, x30, [sp, #-80]!
+	mov		x29, sp
+
+	ldr		q30, .Lxts_mul_x
+	ld1		{v25.16b}, [x5]
+
+99:	adr		x7, \do8
+	bl		__xts_crypt8
+
+	ldp		q16, q17, [sp, #16]
+	ldp		q18, q19, [sp, #48]
+
+	eor		\o0\().16b, \o0\().16b, v25.16b
+	eor		\o1\().16b, \o1\().16b, v26.16b
+	eor		\o2\().16b, \o2\().16b, v27.16b
+	eor		\o3\().16b, \o3\().16b, v28.16b
+
+	st1		{\o0\().16b}, [x0], #16
+	mov		v25.16b, v26.16b
+	tbnz		x6, #1, 1f
+	st1		{\o1\().16b}, [x0], #16
+	mov		v25.16b, v27.16b
+	tbnz		x6, #2, 1f
+	st1		{\o2\().16b}, [x0], #16
+	mov		v25.16b, v28.16b
+	tbnz		x6, #3, 1f
+	st1		{\o3\().16b}, [x0], #16
+	mov		v25.16b, v29.16b
+	tbnz		x6, #4, 1f
+
+	eor		\o4\().16b, \o4\().16b, v16.16b
+	eor		\o5\().16b, \o5\().16b, v17.16b
+	eor		\o6\().16b, \o6\().16b, v18.16b
+	eor		\o7\().16b, \o7\().16b, v19.16b
+
+	st1		{\o4\().16b}, [x0], #16
+	tbnz		x6, #5, 1f
+	st1		{\o5\().16b}, [x0], #16
+	tbnz		x6, #6, 1f
+	st1		{\o6\().16b}, [x0], #16
+	tbnz		x6, #7, 1f
+	st1		{\o7\().16b}, [x0], #16
+
+	cbnz		x4, 99b
+
+1:	st1		{v25.16b}, [x5]
+	ldp		x29, x30, [sp], #80
+	ret
+	.endm
+
+ENTRY(aesbs_xts_encrypt)
+	__xts_crypt	aesbs_encrypt8, v0, v1, v4, v6, v3, v7, v2, v5
+ENDPROC(aesbs_xts_encrypt)
+
+ENTRY(aesbs_xts_decrypt)
+	__xts_crypt	aesbs_decrypt8, v0, v1, v6, v4, v2, v7, v3, v5
+ENDPROC(aesbs_xts_decrypt)
+
+	.macro		next_ctr, v
+	mov		\v\().d[1], x8
+	mov		\v\().d[0], x7
+	adds		x8, x8, #1
+	adc		x7, x7, xzr
+	rev64		\v\().16b, \v\().16b
+	.endm
+
+	/*
+	 * aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
+	 *		     int rounds, int blocks, u8 iv[], bool final)
+	 */
+ENTRY(aesbs_ctr_encrypt)
+	stp		x29, x30, [sp, #-16]!
+	mov		x29, sp
+
+	add		x4, x4, x6		// do one extra block if final
+
+	ldp		x7, x8, [x5]
+	ld1		{v0.16b}, [x5]
+CPU_LE(	rev		x7, x7		)
+CPU_LE(	rev		x8, x8		)
+	adds		x8, x8, #1
+	adc		x7, x7, xzr
+
+99:	mov		x9, #1
+	lsl		x9, x9, x4
+	subs		w4, w4, #8
+	csel		x4, x4, xzr, pl
+	csel		x9, x9, xzr, le
+
+	tbnz		x9, #1, 0f
+
+	next_ctr	v1
+	tbnz		x9, #2, 0f
+
+	next_ctr	v2
+	tbnz		x9, #3, 0f
+
+	next_ctr	v3
+	tbnz		x9, #4, 0f
+
+	next_ctr	v4
+	tbnz		x9, #5, 0f
+
+	next_ctr	v5
+	tbnz		x9, #6, 0f
+
+	next_ctr	v6
+	tbnz		x9, #7, 0f
+
+	next_ctr	v7
+
+0:	mov		bskey, x2
+	mov		rounds, x3
+	bl		aesbs_encrypt8
+
+	lsr		x9, x9, x6		// disregard the final block
+	tbnz		x9, #0, 0f
+
+	ld1		{v8.16b}, [x1], #16
+	eor		v0.16b, v0.16b, v8.16b
+	st1		{v0.16b}, [x0], #16
+	tbnz		x9, #1, 1f
+
+	ld1		{v9.16b}, [x1], #16
+	eor		v1.16b, v1.16b, v9.16b
+	st1		{v1.16b}, [x0], #16
+	tbnz		x9, #2, 2f
+
+	ld1		{v10.16b}, [x1], #16
+	eor		v4.16b, v4.16b, v10.16b
+	st1		{v4.16b}, [x0], #16
+	tbnz		x9, #3, 3f
+
+	ld1		{v11.16b}, [x1], #16
+	eor		v6.16b, v6.16b, v11.16b
+	st1		{v6.16b}, [x0], #16
+	tbnz		x9, #4, 4f
+
+	ld1		{v12.16b}, [x1], #16
+	eor		v3.16b, v3.16b, v12.16b
+	st1		{v3.16b}, [x0], #16
+	tbnz		x9, #5, 5f
+
+	ld1		{v13.16b}, [x1], #16
+	eor		v7.16b, v7.16b, v13.16b
+	st1		{v7.16b}, [x0], #16
+	tbnz		x9, #6, 6f
+
+	ld1		{v14.16b}, [x1], #16
+	eor		v2.16b, v2.16b, v14.16b
+	st1		{v2.16b}, [x0], #16
+	tbnz		x9, #7, 7f
+
+	ld1		{v15.16b}, [x1], #16
+	eor		v5.16b, v5.16b, v15.16b
+	st1		{v5.16b}, [x0], #16
+
+	next_ctr	v0
+	cbnz		x4, 99b
+
+0:	st1		{v0.16b}, [x5]
+8:	ldp		x29, x30, [sp], #16
+	ret
+
+	/*
+	 * If we are handling the tail of the input (x6 == 1), return the
+	 * final keystream block back to the caller via the IV buffer.
+	 */
+1:	cbz		x6, 8b
+	st1		{v1.16b}, [x5]
+	b		8b
+2:	cbz		x6, 8b
+	st1		{v4.16b}, [x5]
+	b		8b
+3:	cbz		x6, 8b
+	st1		{v6.16b}, [x5]
+	b		8b
+4:	cbz		x6, 8b
+	st1		{v3.16b}, [x5]
+	b		8b
+5:	cbz		x6, 8b
+	st1		{v7.16b}, [x5]
+	b		8b
+6:	cbz		x6, 8b
+	st1		{v2.16b}, [x5]
+	b		8b
+7:	cbz		x6, 8b
+	st1		{v5.16b}, [x5]
+	b		8b
+ENDPROC(aesbs_ctr_encrypt)
diff --git a/arch/arm64/crypto/aes-neonbs-glue.c b/arch/arm64/crypto/aes-neonbs-glue.c
new file mode 100644
index 000000000000..57982172563c
--- /dev/null
+++ b/arch/arm64/crypto/aes-neonbs-glue.c
@@ -0,0 +1,300 @@
+/*
+ * Bit sliced AES using NEON instructions
+ *
+ * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/neon.h>
+#include <crypto/aes.h>
+#include <crypto/internal/simd.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/xts.h>
+#include <linux/module.h>
+
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+
+asmlinkage void aesbs_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[],
+				  int rounds, int blocks);
+asmlinkage void aesbs_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[],
+				  int rounds, int blocks);
+
+asmlinkage void aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[],
+				  int rounds, int blocks, u8 iv[]);
+asmlinkage void aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[],
+				  int rounds, int blocks, u8 iv[]);
+
+asmlinkage void aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
+				  int rounds, int blocks, u8 iv[], bool final);
+
+asmlinkage void aesbs_convert_key(u8 out[], u32 const rk[], int rounds);
+
+struct aesbs_key {
+	u8			key[13 * (8 * AES_BLOCK_SIZE) + 32];
+};
+
+struct aesbs_ctx {
+	struct aesbs_key	bskey;
+	int			rounds;
+};
+
+struct aesbs_xts_ctx {
+	struct aesbs_key	bskey;
+	struct crypto_cipher	*tweak_tfm;
+	int			rounds;
+};
+
+static int aesbs_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
+			unsigned int key_len)
+{
+	struct aesbs_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct crypto_aes_ctx rk;
+	int err;
+
+	err = crypto_aes_expand_key(&rk, in_key, key_len);
+	if (err)
+		return err;
+
+	ctx->rounds = 6 + key_len / 4;
+
+	kernel_neon_begin();
+	aesbs_convert_key(ctx->bskey.key, rk.key_enc, ctx->rounds);
+	kernel_neon_end();
+
+	return 0;
+}
+
+static int xts_init(struct crypto_skcipher *tfm)
+{
+	struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	ctx->tweak_tfm = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(ctx->tweak_tfm))
+		return PTR_ERR(ctx->tweak_tfm);
+
+	return 0;
+}
+
+static void xts_exit(struct crypto_skcipher *tfm)
+{
+	struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	crypto_free_cipher(ctx->tweak_tfm);
+}
+
+static int aesbs_xts_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
+			    unsigned int key_len)
+{
+	struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct crypto_aes_ctx rk;
+	int err;
+
+	err = xts_verify_key(tfm, in_key, key_len);
+	if (err)
+		return err;
+
+	err = crypto_cipher_setkey(ctx->tweak_tfm, in_key + key_len / 2,
+				   key_len / 2);
+	if (err)
+		return err;
+
+	err = crypto_aes_expand_key(&rk, in_key, key_len / 2);
+	if (err)
+		return err;
+
+	ctx->rounds = 6 + key_len / 8;
+
+	kernel_neon_begin();
+	aesbs_convert_key(ctx->bskey.key, rk.key_enc, ctx->rounds);
+	kernel_neon_end();
+
+	return 0;
+}
+
+static int __ecb_crypt(struct skcipher_request *req,
+		       void (*fn)(u8 out[], u8 const in[], u8 const rk[],
+				  int rounds, int blocks))
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct aesbs_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	int err;
+
+	err = skcipher_walk_virt(&walk, req, true);
+
+	kernel_neon_begin();
+	while (walk.nbytes >= AES_BLOCK_SIZE) {
+		unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
+
+		if (walk.nbytes < walk.total)
+			blocks = round_down(blocks,
+					    walk.chunksize / AES_BLOCK_SIZE);
+
+		fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->bskey.key,
+		   ctx->rounds, blocks);
+		err = skcipher_walk_done(&walk,
+					 walk.nbytes - blocks * AES_BLOCK_SIZE);
+	}
+	kernel_neon_end();
+
+	return err;
+}
+
+static int ecb_encrypt(struct skcipher_request *req)
+{
+	return __ecb_crypt(req, aesbs_ecb_encrypt);
+}
+
+static int ecb_decrypt(struct skcipher_request *req)
+{
+	return __ecb_crypt(req, aesbs_ecb_decrypt);
+}
+
+static int __xts_crypt(struct skcipher_request *req,
+		       void (*fn)(u8 out[], u8 const in[], u8 const rk[],
+				  int rounds, int blocks, u8 iv[]))
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	int err;
+
+	err = skcipher_walk_virt(&walk, req, true);
+
+	crypto_cipher_encrypt_one(ctx->tweak_tfm, walk.iv, walk.iv);
+
+	kernel_neon_begin();
+	while (walk.nbytes >= AES_BLOCK_SIZE) {
+		unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
+
+		if (walk.nbytes < walk.total)
+			blocks = round_down(blocks,
+					    walk.chunksize / AES_BLOCK_SIZE);
+
+		fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->bskey.key,
+		   ctx->rounds, blocks, walk.iv);
+		err = skcipher_walk_done(&walk,
+					 walk.nbytes - blocks * AES_BLOCK_SIZE);
+	}
+	kernel_neon_end();
+
+	return err;
+}
+
+static int xts_encrypt(struct skcipher_request *req)
+{
+	return __xts_crypt(req, aesbs_xts_encrypt);
+}
+
+static int xts_decrypt(struct skcipher_request *req)
+{
+	return __xts_crypt(req, aesbs_xts_decrypt);
+}
+
+static int ctr_encrypt(struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct aesbs_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	int err;
+
+	err = skcipher_walk_virt(&walk, req, true);
+
+	kernel_neon_begin();
+	while (walk.nbytes > 0) {
+		unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
+		bool final = (walk.total % AES_BLOCK_SIZE) != 0;
+
+		if (walk.nbytes < walk.total) {
+			blocks = round_down(blocks,
+					    walk.chunksize / AES_BLOCK_SIZE);
+			final = false;
+		}
+
+		aesbs_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
+				  ctx->bskey.key, ctx->rounds, blocks, walk.iv,
+				  final);
+
+		if (final) {
+			u8 *dst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
+			u8 *src = walk.src.virt.addr + blocks * AES_BLOCK_SIZE;
+
+			if (dst != src)
+				memcpy(dst, src, walk.total % AES_BLOCK_SIZE);
+			crypto_xor(dst, walk.iv, walk.total % AES_BLOCK_SIZE);
+
+			err = skcipher_walk_done(&walk, 0);
+			break;
+		}
+		err = skcipher_walk_done(&walk,
+					 walk.nbytes - blocks * AES_BLOCK_SIZE);
+	}
+	kernel_neon_end();
+
+	return err;
+}
+
+static struct skcipher_alg aes_algs[] = { {
+	.base.cra_name		= "ecb(aes)",
+	.base.cra_driver_name	= "ecb-aes-neonbs",
+	.base.cra_priority	= 200,
+	.base.cra_blocksize	= AES_BLOCK_SIZE,
+	.base.cra_ctxsize	= sizeof(struct aesbs_ctx),
+	.base.cra_module	= THIS_MODULE,
+
+	.min_keysize		= AES_MIN_KEY_SIZE,
+	.max_keysize		= AES_MAX_KEY_SIZE,
+	.chunksize		= 8 * AES_BLOCK_SIZE,
+	.setkey			= aesbs_setkey,
+	.encrypt		= ecb_encrypt,
+	.decrypt		= ecb_decrypt,
+}, {
+	.base.cra_name		= "xts(aes)",
+	.base.cra_driver_name	= "xts-aes-neonbs",
+	.base.cra_priority	= 200,
+	.base.cra_blocksize	= AES_BLOCK_SIZE,
+	.base.cra_ctxsize	= sizeof(struct aesbs_xts_ctx),
+	.base.cra_module	= THIS_MODULE,
+
+	.min_keysize		= 2 * AES_MIN_KEY_SIZE,
+	.max_keysize		= 2 * AES_MAX_KEY_SIZE,
+	.chunksize		= 8 * AES_BLOCK_SIZE,
+	.ivsize			= AES_BLOCK_SIZE,
+	.setkey			= aesbs_xts_setkey,
+	.encrypt		= xts_encrypt,
+	.decrypt		= xts_decrypt,
+	.init			= xts_init,
+	.exit			= xts_exit,
+}, {
+	.base.cra_name		= "ctr(aes)",
+	.base.cra_driver_name	= "ctr-aes-neonbs",
+	.base.cra_priority	= 200,
+	.base.cra_blocksize	= 1,
+	.base.cra_ctxsize	= sizeof(struct aesbs_ctx),
+	.base.cra_module	= THIS_MODULE,
+
+	.min_keysize		= AES_MIN_KEY_SIZE,
+	.max_keysize		= AES_MAX_KEY_SIZE,
+	.chunksize		= 8 * AES_BLOCK_SIZE,
+	.ivsize			= AES_BLOCK_SIZE,
+	.setkey			= aesbs_setkey,
+	.encrypt		= ctr_encrypt,
+	.decrypt		= ctr_encrypt,
+} };
+
+static int __init aes_init(void)
+{
+	return crypto_register_skciphers(aes_algs, ARRAY_SIZE(aes_algs));
+}
+
+static void aes_exit(void)
+{
+	crypto_unregister_skciphers(aes_algs, ARRAY_SIZE(aes_algs));
+}
+
+module_init(aes_init);
+module_exit(aes_exit);
-- 
2.7.4

^ permalink raw reply related

* [PATCH] ARM: dts: vexpress: Support GICC_DIR operations
From: Marc Zyngier @ 2016-12-12 17:35 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20161210201351.25894-1-christoffer.dall@linaro.org>

[+Sudeep]

On 10/12/16 20:13, Christoffer Dall wrote:
> The GICv2 CPU interface registers span across 8K, not 4K as indicated in
> the DT.  Only the GICC_DIR register is located after the initial 4K
> boundary, leaving a functional system but without support for separately
> EOI'ing and deactivating interrupts.
> 
> After this change the system support split priority drop and interrupt
> deactivation.
> 
> Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
> ---
>  arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts b/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts
> index 0205c97..2e0cf39 100644
> --- a/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts
> +++ b/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts
> @@ -126,7 +126,7 @@
>  		#address-cells = <0>;
>  		interrupt-controller;
>  		reg = <0 0x2c001000 0 0x1000>,
> -		      <0 0x2c002000 0 0x1000>,
> +		      <0 0x2c002000 0 0x2000>,
>  		      <0 0x2c004000 0 0x2000>,
>  		      <0 0x2c006000 0 0x2000>;
>  		interrupts = <1 9 0xf04>;
> 

Acked-by: Marc Zyngier <marc.zyngier@arm.com>

	M.
-- 
Jazz is not dead. It just smells funny...

^ permalink raw reply

* [PATCH 4/4] dt-bindings: input: Specify the interrupt number of TPS65217 power button
From: Rob Herring @ 2016-12-12 17:27 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20161209062833.5768-5-woogyom.kim@gmail.com>

On Fri, Dec 09, 2016 at 03:28:33PM +0900, Milo Kim wrote:
> Specify the power button interrupt number which is from the datasheet.
> 
> Signed-off-by: Milo Kim <woogyom.kim@gmail.com>
> ---
>  Documentation/devicetree/bindings/input/tps65218-pwrbutton.txt | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)

Acked-by: Rob Herring <robh@kernel.org>

^ permalink raw reply

* [PATCH 3/4] dt-bindings: power/supply: Update TPS65217 properties
From: Rob Herring @ 2016-12-12 17:26 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20161209062833.5768-4-woogyom.kim@gmail.com>

On Fri, Dec 09, 2016 at 03:28:32PM +0900, Milo Kim wrote:
> Add interrupt specifiers for USB and AC charger input. Interrupt numbers
> are from the datasheet.
> Fix wrong property for compatible string.
> 
> Signed-off-by: Milo Kim <woogyom.kim@gmail.com>
> ---
>  .../devicetree/bindings/power/supply/tps65217_charger.txt          | 7 ++++++-
>  1 file changed, 6 insertions(+), 1 deletion(-)

Acked-by: Rob Herring <robh@kernel.org>

^ permalink raw reply

* [PATCH 2/4] dt-bindings: mfd: Remove TPS65217 interrupts
From: Rob Herring @ 2016-12-12 17:25 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20161209062833.5768-3-woogyom.kim@gmail.com>

On Fri, Dec 09, 2016 at 03:28:31PM +0900, Milo Kim wrote:
> Interrupt numbers are from the datasheet, so no need to keep them in
> the ABI. Use the number in the DT file.

I don't see the purpose of ripping this out. The headers have always 
been for convienence, not whether the values come from the datasheet or 
not.

> Signed-off-by: Milo Kim <woogyom.kim@gmail.com>
> ---
>  arch/arm/boot/dts/am335x-bone-common.dtsi |  8 +++-----
>  include/dt-bindings/mfd/tps65217.h        | 26 --------------------------
>  2 files changed, 3 insertions(+), 31 deletions(-)
>  delete mode 100644 include/dt-bindings/mfd/tps65217.h

^ permalink raw reply

* [PATCH] clk: bcm: Fix 'maybe-uninitialized' warning in bcm2835_clock_choose_div_and_prate()
From: Eric Anholt @ 2016-12-12 17:24 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1481529653-28133-1-git-send-email-boris.brezillon@free-electrons.com>

Boris Brezillon <boris.brezillon@free-electrons.com> writes:

> best_rate is reported as potentially uninitialized by gcc.
>
> Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
> Fixes: 155e8b3b0ee3 ("clk: bcm: Support rate change propagation on bcm2835 clocks")
> Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>

Reviewed-by: Eric Anholt <eric@anholt.net>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 832 bytes
Desc: not available
URL: <http://lists.infradead.org/pipermail/linux-arm-kernel/attachments/20161212/65cadde3/attachment.sig>

^ permalink raw reply

* [PATCH] dt-bindings: Document the hi3660 reset bindings
From: Rob Herring @ 2016-12-12 17:20 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1481249504-7942-1-git-send-email-zhangfei.gao@linaro.org>

On Fri, Dec 09, 2016 at 10:11:44AM +0800, Zhangfei Gao wrote:
> Add DT bindings documentation for hi3660 SoC reset controller.
> 
> Signed-off-by: Zhangfei Gao <zhangfei.gao@linaro.org>
> ---
>  .../bindings/reset/hisilicon,hi3660-reset.txt      | 43 ++++++++++++++++++++++
>  1 file changed, 43 insertions(+)
>  create mode 100644 Documentation/devicetree/bindings/reset/hisilicon,hi3660-reset.txt

Acked-by: Rob Herring <robh@kernel.org>

^ permalink raw reply

* [PATCH 1/2] dt-bindings: zx296718-clk: add compatible for audio clock controller
From: Rob Herring @ 2016-12-12 17:10 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1481189157-8995-1-git-send-email-shawnguo@kernel.org>

On Thu, Dec 08, 2016 at 05:25:56PM +0800, Shawn Guo wrote:
> From: Shawn Guo <shawn.guo@linaro.org>
> 
> It adds the compatible string for zx296718 audio clock controller.
> 
> Signed-off-by: Shawn Guo <shawn.guo@linaro.org>
> ---
>  Documentation/devicetree/bindings/clock/zx296718-clk.txt | 3 +++
>  1 file changed, 3 insertions(+)

Acked-by: Rob Herring <robh@kernel.org>

^ permalink raw reply

* [PATCH 5/5] Documentation: fsl-quadspi: Add fsl, ls1012a-qspi compatible string
From: Rob Herring @ 2016-12-12 17:09 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1481188984-43683-6-git-send-email-yao.yuan@freescale.com>

On Thu, Dec 08, 2016 at 05:23:04PM +0800, Yuan Yao wrote:
> From: Yuan Yao <yao.yuan@nxp.com>

Same problem in this subject too.

> 
> new compatible string: "fsl,ls1012a-qspi".
> 
> Signed-off-by: Yuan Yao <yao.yuan@nxp.com>
> ---
>  Documentation/devicetree/bindings/mtd/fsl-quadspi.txt | 1 +
>  1 file changed, 1 insertion(+)

Acked-by: Rob Herring <robh@kernel.org>

^ permalink raw reply

* [PATCH 3/5] Documentation: dt: mtd: add chip support for "jedec, spi-nor"
From: Rob Herring @ 2016-12-12 17:09 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1481188984-43683-4-git-send-email-yao.yuan@freescale.com>

On Thu, Dec 08, 2016 at 05:23:02PM +0800, Yuan Yao wrote:
> From: Yuan Yao <yao.yuan@nxp.com>

The compatible string is wrong in the subject.

> 
> "sst25wf040b" and "en25s64" are also chip compatible with SPI NOR flash.
> 
> Signed-off-by: Yuan Yao <yao.yuan@nxp.com>
> ---
>  Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt | 2 ++
>  1 file changed, 2 insertions(+)

Otherwise,

Acked-by: Rob Herring <robh@kernel.org>

^ permalink raw reply

* [PATCH 2/5] Documentation: fsl: dspi: Add fsl, ls1012a-dspi compatible string
From: Rob Herring @ 2016-12-12 17:08 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1481188984-43683-3-git-send-email-yao.yuan@freescale.com>

On Thu, Dec 08, 2016 at 05:23:01PM +0800, Yuan Yao wrote:
> From: Yuan Yao <yao.yuan@nxp.com>
> 
> new compatible string: "fsl,ls1012a-dspi".
> 
> Signed-off-by: Yuan Yao <yao.yuan@nxp.com>
> ---
>  Documentation/devicetree/bindings/spi/spi-fsl-dspi.txt | 1 +
>  1 file changed, 1 insertion(+)

Acked-by: Rob Herring <robh@kernel.org>

^ permalink raw reply

* [PATCH] efi/libstub: arm*: Pass latest memory map to the kernel
From: Jeffrey Hugo @ 2016-12-12 17:00 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <584E6F50.3020901@arm.com>

On 12/12/2016 2:35 AM, James Morse wrote:
> Hi Ard,
>
> On 09/12/16 18:24, Ard Biesheuvel wrote:
>> As reported by James, the current libstub code involving the annotated
>> memory map only works somewhat correctly by accident, due to the fact
>> that a pool allocation happens to be reused immediately, retaining its
>> former contents.
>>
>> Instead of juggling memory maps, which makes the code more complex than
>> it needs to be, simply put a placholder value into the FDT, and only
>> write the actual value after ExitBootServices() has been called.
>
>> diff --git a/drivers/firmware/efi/libstub/fdt.c b/drivers/firmware/efi/libstub/fdt.c
>> index a6a93116a8f0..5d39dff77f17 100644
>> --- a/drivers/firmware/efi/libstub/fdt.c
>> +++ b/drivers/firmware/efi/libstub/fdt.c
>> @@ -101,7 +101,7 @@ efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt,
>>  	if (status)
>>  		goto fdt_set_fail;
>>
>> -	fdt_val64 = cpu_to_fdt64((u64)(unsigned long)memory_map);
>> +	fdt_val64 = U64_MAX; /* placeholder */
>>  	status = fdt_setprop(fdt, node, "linux,uefi-mmap-start",
>>  			     &fdt_val64,  sizeof(fdt_val64));
>>  	if (status)
>> @@ -148,6 +148,24 @@ efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt,
>>  	return EFI_LOAD_ERROR;
>>  }
>>
>> +static efi_status_t update_fdt_memmap(void *fdt, u64 memmap)
>> +{
>> +	int node = fdt_path_offset(fdt, "/chosen");
>> +	efi_status_t status;
>> +
>> +	if (node < 0)
>> +		return EFI_LOAD_ERROR;
>> +
>> +	memmap = cpu_to_fdt64(memmap);
>> +	status = fdt_setprop_inplace(fdt, node, "linux,uefi-mmap-start",
>> +				     &memmap, sizeof(memmap));
>> +
>> +	if (status)
>> +		return EFI_LOAD_ERROR;
>> +
>> +	return EFI_SUCCESS;
>> +}
>
> v4.9.0 with this patch doesn't boot on my Seattle (with known buggy UEFI FW)
> [0]. It looks like the memory map is truncated (and missing a runtime region,
> compare with [1]). Should 'linux,uefi-mmap-size' be updated too? (Otherwise its
> the size when we retrieved the runtime mapping, but before we allocated the FDT)
>

Overall this fails for me as well.  It appears to work, until I trigger 
the race condition I fixed, then OOM killer gets triggered the instant 
rootfs starts to initialize.  Since I see James has a number of 
comments, I did not investigate further to determine why the patch is 
not working on my system.

-- 
Jeffrey Hugo
Qualcomm Datacenter Technologies as an affiliate of Qualcomm 
Technologies, Inc.
Qualcomm Technologies, Inc. is a member of the
Code Aurora Forum, a Linux Foundation Collaborative Project.

^ permalink raw reply

* [PATCH 1/1] arm/module: maximum utilization of module area.
From: Ard Biesheuvel @ 2016-12-12 16:57 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <alpine.LFD.2.20.1612121026250.1657@knanqh.ubzr>

On 12 December 2016 at 15:28, Nicolas Pitre <nicolas.pitre@linaro.org> wrote:
> On Mon, 12 Dec 2016, Vaneet Narang wrote:
>
>> Hi,
>>
>> >A PC24 relocation has a range of +/-32MB.  This means that where-ever
>> >the module is placed, it must be capable of reaching any function
>> >within the kernel text, which may itself be quite large (eg, 8MB, or
>> >possibly larger).  The module area exists to allow modules to be
>> >located in an area where PC24 relocations are able to reach all of the
>> >kernel text on sensibly configured kernels, thereby allowing for
>> >optimal performance.
>> >
>> >If you wish to load large modules, then enable ARM_MODULE_PLTS, which
>> >will use the less efficient PLT method (which is basically an indirect
>> >function call) for relocations that PC24 can't handle, and will allow
>> >the module to be loaded into the vmalloc area.
>> >
>> >Growing the module area so that smaller modules also get penalised by
>> >the PLT indirection is not sane.
>>
>> This is exactly what i am saying. These changes are useful to accomdate
>> 22MB modules without enabling ARM_MODULE_PLTS.
>
> I think you need to figure out why you need such a huge module in the
> first place.  That is very uncommon indeed.
>

Also, note that the module PLT code was recently optimized, to remove
some pathological behavior which severely affected load times of large
modules.

Can you quantify the performance hit you are taking when using module
PLTs? And the actual increase in memory footprint?

^ permalink raw reply

* [PATCH v5 2/4] drm: bridge: add support for TI ths8135
From: Laurent Pinchart @ 2016-12-12 16:49 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20161212164547.jw5ejbums6nwwtw2@rob-hp-laptop>

Hello,

On Monday 12 Dec 2016 10:45:47 Rob Herring wrote:
> On Wed, Dec 07, 2016 at 11:42:43AM +0100, Bartosz Golaszewski wrote:
> > THS8135 is a configurable video DAC. Add DT bindings for this chip and
> > use the dumb-vga-dac driver for now as no configuration is required to
> > make it work.
> > 
> > Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
> > ---
> > 
> >  .../bindings/display/bridge/ti,ths8135.txt         | 52 +++++++++++++++++
> >  drivers/gpu/drm/bridge/dumb-vga-dac.c              |  1 +
> >  2 files changed, 53 insertions(+)
> >  create mode 100644
> >  Documentation/devicetree/bindings/display/bridge/ti,ths8135.txt
> Acked-by: Rob Herring <robh@kernel.org>
> 
> But one nit below:
> > diff --git
> > a/Documentation/devicetree/bindings/display/bridge/ti,ths8135.txt
> > b/Documentation/devicetree/bindings/display/bridge/ti,ths8135.txt new
> > file mode 100644
> > index 0000000..23cd8ee
> > --- /dev/null
> > +++ b/Documentation/devicetree/bindings/display/bridge/ti,ths8135.txt
> > @@ -0,0 +1,52 @@
> > +THS8135 Video DAC
> > +-----------------
> > +
> > +This is the binding for Texas Instruments THS8135 Video DAC bridge.
> > +
> > +Required properties:
> > +
> > +- compatible: Must be "ti,ths8135"
> > +
> > +Required nodes:
> > +
> > +This device has two video ports. Their connections are modelled using the
> > OF +graph bindings specified in
> > Documentation/devicetree/bindings/graph.txt. +
> > +- Video port 0 for RGB input
> > +- Video port 1 for VGA output
> > +
> > +Example
> > +-------
> > +
> > +vga-bridge {
> > +	compatible = "ti,ths8135";
> > +	#address-cells = <1>;
> > +	#size-cells = <0>;
> > +
> > +	ports {
> > +		#address-cells = <1>;
> > +		#size-cells = <0>;
> > +
> > +		port at 0 {
> > +			#address-cells = <1>;
> > +			#size-cells = <0>;
> > +			reg = <0>;
> > +
> > +			vga_bridge_in: endpoint at 0 {
> > +				reg = <0>;
> 
> You don't need reg here.

In which case the endpoint node should be named endpoint, not endpoint at 0. You 
could then also remove the #address-cells and #size-cells properties from the 
port at 0 node. Same for port at 1 below.

> > +				remote-endpoint = <&lcdc_out_vga>;
> > +			};
> > +		};
> > +
> > +		port at 1 {
> > +			#address-cells = <1>;
> > +			#size-cells = <0>;
> > +			reg = <1>;
> > +
> > +			vga_bridge_out: endpoint at 0 {
> > +				reg = <0>;
> > +				remote-endpoint = <&vga_con_in>;
> > +			};
> > +		};
> > +	};
> > +};
> > diff --git a/drivers/gpu/drm/bridge/dumb-vga-dac.c
> > b/drivers/gpu/drm/bridge/dumb-vga-dac.c index afec232..498fa75 100644
> > --- a/drivers/gpu/drm/bridge/dumb-vga-dac.c
> > +++ b/drivers/gpu/drm/bridge/dumb-vga-dac.c
> > @@ -204,6 +204,7 @@ static int dumb_vga_remove(struct platform_device
> > *pdev)
> >  
> >  static const struct of_device_id dumb_vga_match[] = {
> >  	{ .compatible = "dumb-vga-dac" },
> > +	{ .compatible = "ti,ths8135" },
> >  	{},
> >  };
> >  MODULE_DEVICE_TABLE(of, dumb_vga_match);

-- 
Regards,

Laurent Pinchart

^ permalink raw reply

* [PATCH] ARM: at91/dt: sama5d2: add ssc0 definition
From: Alexandre Belloni @ 2016-12-12 16:47 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <CAJP5LRNHDXQtZDRcQcNuS76uvUndKVjKqWC2KXTM+DOMrro5kg@mail.gmail.com>

On 05/12/2016 at 12:28:34 +0200, Alex Gershgorin wrote :
> 

> From 3a5a8e78ccccab2858c5944000884ab3c49eba5a Mon Sep 17 00:00:00 2001
> From: Alex <Alex.Gershgorin@qcore.com>
> Date: Sun, 4 Dec 2016 16:03:56 +0200
> Subject: [PATCH] ARM: at91/dt: sama5d2: add ssc0 definition
> 
> The sama5d2 SoC has Synchronous Serial Controller which provides
> synchronous communication link with external devices.
> It's generally used in audio and telecom applications such as
> I2S, Short Frame Sync, Long Frame Sync.
> 
> Signed-off-by: Alex Gershgorin <alex.gershgorin@qcore.com>
> ---
>  arch/arm/boot/dts/sama5d2.dtsi | 16 ++++++++++++++++
>  1 file changed, 16 insertions(+)
> 
Applied, thanks.

-- 
Alexandre Belloni, Free Electrons
Embedded Linux and Kernel engineering
http://free-electrons.com

^ permalink raw reply

* [PATCH v5 2/4] drm: bridge: add support for TI ths8135
From: Rob Herring @ 2016-12-12 16:45 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1481107365-24839-3-git-send-email-bgolaszewski@baylibre.com>

On Wed, Dec 07, 2016 at 11:42:43AM +0100, Bartosz Golaszewski wrote:
> THS8135 is a configurable video DAC. Add DT bindings for this chip and
> use the dumb-vga-dac driver for now as no configuration is required to
> make it work.
> 
> Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
> ---
>  .../bindings/display/bridge/ti,ths8135.txt         | 52 ++++++++++++++++++++++
>  drivers/gpu/drm/bridge/dumb-vga-dac.c              |  1 +
>  2 files changed, 53 insertions(+)
>  create mode 100644 Documentation/devicetree/bindings/display/bridge/ti,ths8135.txt

Acked-by: Rob Herring <robh@kernel.org>

But one nit below:

> 
> diff --git a/Documentation/devicetree/bindings/display/bridge/ti,ths8135.txt b/Documentation/devicetree/bindings/display/bridge/ti,ths8135.txt
> new file mode 100644
> index 0000000..23cd8ee
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/display/bridge/ti,ths8135.txt
> @@ -0,0 +1,52 @@
> +THS8135 Video DAC
> +-----------------
> +
> +This is the binding for Texas Instruments THS8135 Video DAC bridge.
> +
> +Required properties:
> +
> +- compatible: Must be "ti,ths8135"
> +
> +Required nodes:
> +
> +This device has two video ports. Their connections are modelled using the OF
> +graph bindings specified in Documentation/devicetree/bindings/graph.txt.
> +
> +- Video port 0 for RGB input
> +- Video port 1 for VGA output
> +
> +Example
> +-------
> +
> +vga-bridge {
> +	compatible = "ti,ths8135";
> +	#address-cells = <1>;
> +	#size-cells = <0>;
> +
> +	ports {
> +		#address-cells = <1>;
> +		#size-cells = <0>;
> +
> +		port at 0 {
> +			#address-cells = <1>;
> +			#size-cells = <0>;
> +			reg = <0>;
> +
> +			vga_bridge_in: endpoint at 0 {
> +				reg = <0>;

You don't need reg here.

> +				remote-endpoint = <&lcdc_out_vga>;
> +			};
> +		};
> +
> +		port at 1 {
> +			#address-cells = <1>;
> +			#size-cells = <0>;
> +			reg = <1>;
> +
> +			vga_bridge_out: endpoint at 0 {
> +				reg = <0>;
> +				remote-endpoint = <&vga_con_in>;
> +			};
> +		};
> +	};
> +};
> diff --git a/drivers/gpu/drm/bridge/dumb-vga-dac.c b/drivers/gpu/drm/bridge/dumb-vga-dac.c
> index afec232..498fa75 100644
> --- a/drivers/gpu/drm/bridge/dumb-vga-dac.c
> +++ b/drivers/gpu/drm/bridge/dumb-vga-dac.c
> @@ -204,6 +204,7 @@ static int dumb_vga_remove(struct platform_device *pdev)
>  
>  static const struct of_device_id dumb_vga_match[] = {
>  	{ .compatible = "dumb-vga-dac" },
> +	{ .compatible = "ti,ths8135" },
>  	{},
>  };
>  MODULE_DEVICE_TABLE(of, dumb_vga_match);
> -- 
> 2.9.3
> 

^ permalink raw reply

* [PATCH 2/2] FPGA: Add TS-7300 FPGA manager
From: Alan Tull @ 2016-12-12 16:44 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <39bc0569-81e1-8c35-0280-4ba1824b2710@gmail.com>

On Mon, 12 Dec 2016, Florian Fainelli wrote:

> On 12/12/2016 08:01 AM, Alan Tull wrote:
> > On Sun, 11 Dec 2016, Florian Fainelli wrote:
> > 
> >> Add support for loading bitstreams on the Altera Cyclone II FPGA
> >> populated on the TS-7300 board. This is done through the configuration
> >> and data registers offered through a memory interface between the EP93xx
> >> SoC and the FPGA.
> >>
> >> Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
> > 
> > Hi Florain,
> > 
> > Thanks for submitting!
> > 
> > How specific is this to the tx7300 board?
> > 
> > I'm unclear about the programming method here.  Are these registers
> > exposed by the EP93xx?  Is it possible that another cpu could access
> > these two registers to configure the cyclone ii?  Is this passive
> > serial?
> 
> So here is my understanding, from glancing at the TS-7300 board manual:
> 
> - there is an on-board CPLD which does a variety of services and I/O for
> the EP9302 SoC, one of these services is the configuration of the
> on-board FPGA
> 
> - the programming interface here is some kind of abstraction around a
> Cyclone II FPGA, and is by no means standard, nor directly exposed to
> the CPU
> 
> - unless you go through the CPLD, there is no other way that you could
> configure the FPGA
> 
> Does that help answer your questions?

Yes it does.  Maybe a brief comment explaining that similar to what
you just said.

> >> +static int ts73xx_fpga_write_init(struct fpga_manager *mgr, u32 flags,
> >> +				  const char *buf, size_t count)
> >> +{
> >> +	struct ts73xx_fpga_priv *priv = mgr->priv;
> >> +
> >> +	/* Reset the FPGA */
> >> +	writeb(0, priv->io_base + TS73XX_FPGA_CONFIG_REG);
> >> +	udelay(30);
> >> +	writeb(0x2, priv->io_base + TS73XX_FPGA_CONFIG_REG);
> >> +	udelay(80);
> > 
> > Could these udelay values be macros?
> 
> The bit definitions could be defined, but the delays, why would that be
> useful?

If it is helpful for someone reading the code to know what the delays
are, if some future generation of the board/cpld uses this same
driver.  So when this driver is broken for the next generation
board/cpld, people trying to fix this know what the delay is there for
and can have a better chance at adjusting the right delay.

> 
> > 
> >> +
> >> +	return 0;
> >> +}
> >> +
> >> +static inline int ts73xx_fpga_can_write(struct ts73xx_fpga_priv *priv)
> >> +{
> >> +	unsigned int timeout = 1000;
> > 
> > Another macro?
> 
> The delay is just an arbitrary good timeout.
> -- 
> Florian
> 

^ permalink raw reply

* [PATCH v3 11/12] arm64: dts: marvell: add sdhci support for Armada 7K/8K
From: Gregory CLEMENT @ 2016-12-12 16:37 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20161209200729.GO14217@n2100.armlinux.org.uk>

Hi Russell King,
 
 On ven., d?c. 09 2016, Russell King - ARM Linux <linux@armlinux.org.uk> wrote:

> On Fri, Dec 09, 2016 at 11:30:07AM +0100, Gregory CLEMENT wrote:
>> Also enable it on the Armada 7040 DB board
>> 
>> Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
>
> Hi,
>
> Can this also be added to the cp110 on the 7k/8k SoCs as well, or does
> it rely on other unmerged support?

I did not add this one because until now I had not the hardware setup
available to test it.

But at least I can add the ressources in the device tree and now I can
test it partially.

Gregory

>
> Thanks.
>
> -- 
> RMK's Patch system: http://www.armlinux.org.uk/developer/patches/
> FTTC broadband for 0.8mile line: currently at 9.6Mbps down 400kbps up
> according to speedtest.net.

-- 
Gregory Clement, Free Electrons
Kernel, drivers, real-time and embedded Linux
development, consulting, training and support.
http://free-electrons.com

^ permalink raw reply

* [PATCH v2 2/2] ASoC: atmel: tse850: rely on the ssc to register as a cpu dai by itself
From: Rob Herring @ 2016-12-12 16:34 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1481052157-23400-3-git-send-email-peda@axentia.se>

On Tue, Dec 06, 2016 at 08:22:37PM +0100, Peter Rosin wrote:
> This breaks devicetree compatibility, but in this case that is ok. All
> affected units are either on my desk, or running an even older version
> of the driver that is not compatible with the upstreamed version anyway
> (and when these other units are eventually updated, they will get a
> fresh dtb as well, so that is not a significant problem either).

Perfect.

> All of that is of course assuming that noone else has managed to build
> something that can use this driver, but that seems extremely improbable.
> 
> Signed-off-by: Peter Rosin <peda@axentia.se>
> ---
>  .../bindings/sound/axentia,tse850-pcm5142.txt      | 11 ++++++++---

Acked-by: Rob Herring <robh@kernel.org>

>  sound/soc/atmel/tse850-pcm5142.c                   | 23 +++-------------------
>  2 files changed, 11 insertions(+), 23 deletions(-)

^ permalink raw reply

* [PATCH 2/2] FPGA: Add TS-7300 FPGA manager
From: Florian Fainelli @ 2016-12-12 16:27 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <alpine.DEB.2.10.1612120951520.3310@atull-730U3E-740U3E>

On 12/12/2016 08:01 AM, Alan Tull wrote:
> On Sun, 11 Dec 2016, Florian Fainelli wrote:
> 
>> Add support for loading bitstreams on the Altera Cyclone II FPGA
>> populated on the TS-7300 board. This is done through the configuration
>> and data registers offered through a memory interface between the EP93xx
>> SoC and the FPGA.
>>
>> Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
> 
> Hi Florain,
> 
> Thanks for submitting!
> 
> How specific is this to the tx7300 board?
> 
> I'm unclear about the programming method here.  Are these registers
> exposed by the EP93xx?  Is it possible that another cpu could access
> these two registers to configure the cyclone ii?  Is this passive
> serial?

So here is my understanding, from glancing at the TS-7300 board manual:

- there is an on-board CPLD which does a variety of services and I/O for
the EP9302 SoC, one of these services is the configuration of the
on-board FPGA

- the programming interface here is some kind of abstraction around a
Cyclone II FPGA, and is by no means standard, nor directly exposed to
the CPU

- unless you go through the CPLD, there is no other way that you could
configure the FPGA

Does that help answer your questions?

> 
> Please cc linux-fpga at vger.kernel.org for the next version.
> 
> Other comments below...

OK, I will fix those,

> 
>> ---
>>  drivers/fpga/Kconfig       |   7 ++
>>  drivers/fpga/Makefile      |   1 +
>>  drivers/fpga/ts73xx-fpga.c | 165 +++++++++++++++++++++++++++++++++++++++++++++
>>  3 files changed, 173 insertions(+)
>>  create mode 100644 drivers/fpga/ts73xx-fpga.c
>>
>> diff --git a/drivers/fpga/Kconfig b/drivers/fpga/Kconfig
>> index cd84934774cc..109625707ef0 100644
>> --- a/drivers/fpga/Kconfig
>> +++ b/drivers/fpga/Kconfig
>> @@ -26,6 +26,13 @@ config FPGA_MGR_ZYNQ_FPGA
>>  	help
>>  	  FPGA manager driver support for Xilinx Zynq FPGAs.
>>  
>> +config FPGA_MGR_TS73XX
>> +	tristate "Technologic Systems TS-73xx SBC FPGA Manager"
>> +	depends on ARCH_EP93XX && MACH_TS72XX
>> +	help
>> +	  FPGA manager driver support for the Altera Cyclone II FPGA
>> +	  present on the TS-73xx SBC boards.
>> +
>>  endif # FPGA
>>  
>>  endmenu
>> diff --git a/drivers/fpga/Makefile b/drivers/fpga/Makefile
>> index 8d83fc6b1613..5d51265cc1b4 100644
>> --- a/drivers/fpga/Makefile
>> +++ b/drivers/fpga/Makefile
>> @@ -8,3 +8,4 @@ obj-$(CONFIG_FPGA)			+= fpga-mgr.o
>>  # FPGA Manager Drivers
>>  obj-$(CONFIG_FPGA_MGR_SOCFPGA)		+= socfpga.o
>>  obj-$(CONFIG_FPGA_MGR_ZYNQ_FPGA)	+= zynq-fpga.o
>> +obj-$(CONFIG_FPGA_MGR_TS73XX)		+= ts73xx-fpga.o
>> diff --git a/drivers/fpga/ts73xx-fpga.c b/drivers/fpga/ts73xx-fpga.c
>> new file mode 100644
>> index 000000000000..2b3d5d668dfc
>> --- /dev/null
>> +++ b/drivers/fpga/ts73xx-fpga.c
>> @@ -0,0 +1,165 @@
>> +/*
>> + * Technologic Systems TS-73xx SBC FPGA loader
>> + *
>> + * Copyright (C) 2016 Florian Fainelli <f.fainelli@gmail.com>
>> + *
>> + * FPGA Manager Driver for the on-board Altera Cyclone II FPGA found on
>> + * TS-7300, heavily based on load_fpga.c in their vendor tree.
>> + *
>> + * This program is free software; you can redistribute it and/or modify
>> + * it under the terms of the GNU General Public License as published by
>> + * the Free Software Foundation; version 2 of the License.
>> + *
>> + * This program is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
>> + * GNU General Public License for more details.
>> + */
>> +
>> +#include <linux/delay.h>
>> +#include <linux/io.h>
>> +#include <linux/module.h>
>> +#include <linux/platform_device.h>
>> +#include <linux/string.h>
>> +#include <linux/bitrev.h>
>> +#include <linux/fpga/fpga-mgr.h>
>> +
>> +#define TS73XX_FPGA_DATA_REG	0
>> +#define TS73XX_FPGA_CONFIG_REG	1
>> +
>> +struct ts73xx_fpga_priv {
>> +	void __iomem	*io_base;
>> +	struct device	*dev;
>> +};
>> +
>> +static enum fpga_mgr_states ts73xx_fpga_state(struct fpga_manager *mgr)
>> +{
>> +	return FPGA_MGR_STATE_UNKNOWN;
>> +}
>> +
>> +static int ts73xx_fpga_write_init(struct fpga_manager *mgr, u32 flags,
>> +				  const char *buf, size_t count)
>> +{
>> +	struct ts73xx_fpga_priv *priv = mgr->priv;
>> +
>> +	/* Reset the FPGA */
>> +	writeb(0, priv->io_base + TS73XX_FPGA_CONFIG_REG);
>> +	udelay(30);
>> +	writeb(0x2, priv->io_base + TS73XX_FPGA_CONFIG_REG);
>> +	udelay(80);
> 
> Could these udelay values be macros?

The bit definitions could be defined, but the delays, why would that be
useful?

> 
>> +
>> +	return 0;
>> +}
>> +
>> +static inline int ts73xx_fpga_can_write(struct ts73xx_fpga_priv *priv)
>> +{
>> +	unsigned int timeout = 1000;
> 
> Another macro?

The delay is just an arbitrary good timeout.
-- 
Florian

^ permalink raw reply

* [PATCH v6 5/5] ARM: configs: stm32: Add I2C support for STM32 defconfig
From: M'boumba Cedric Madianga @ 2016-12-12 16:15 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1481559342-6106-1-git-send-email-cedric.madianga@gmail.com>

Signed-off-by: M'boumba Cedric Madianga <cedric.madianga@gmail.com>
---
 arch/arm/configs/stm32_defconfig | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/arm/configs/stm32_defconfig b/arch/arm/configs/stm32_defconfig
index e7b56d4..9494eaf 100644
--- a/arch/arm/configs/stm32_defconfig
+++ b/arch/arm/configs/stm32_defconfig
@@ -52,6 +52,9 @@ CONFIG_SERIAL_NONSTANDARD=y
 CONFIG_SERIAL_STM32=y
 CONFIG_SERIAL_STM32_CONSOLE=y
 # CONFIG_HW_RANDOM is not set
+CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_STM32F4=y
 # CONFIG_HWMON is not set
 # CONFIG_USB_SUPPORT is not set
 CONFIG_NEW_LEDS=y
-- 
1.9.1

^ permalink raw reply related

* [PATCH v6 4/5] ARM: dts: stm32: Add I2C1 support for STM32429 eval board
From: M'boumba Cedric Madianga @ 2016-12-12 16:15 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1481559342-6106-1-git-send-email-cedric.madianga@gmail.com>

Signed-off-by: M'boumba Cedric Madianga <cedric.madianga@gmail.com>
---
 arch/arm/boot/dts/stm32429i-eval.dts | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/arch/arm/boot/dts/stm32429i-eval.dts b/arch/arm/boot/dts/stm32429i-eval.dts
index afb90bc..74e0045 100644
--- a/arch/arm/boot/dts/stm32429i-eval.dts
+++ b/arch/arm/boot/dts/stm32429i-eval.dts
@@ -141,3 +141,9 @@
 	pinctrl-names = "default";
 	status = "okay";
 };
+
+&i2c1 {
+	pinctrl-0 = <&i2c1_pins_b>;
+	pinctrl-names = "default";
+	status = "okay";
+};
-- 
1.9.1

^ permalink raw reply related

* [PATCH v6 3/5] ARM: dts: stm32: Add I2C1 support for STM32F429 SoC
From: M'boumba Cedric Madianga @ 2016-12-12 16:15 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1481559342-6106-1-git-send-email-cedric.madianga@gmail.com>

Signed-off-by: Patrice Chotard <patrice.chotard@st.com>
Signed-off-by: M'boumba Cedric Madianga <cedric.madianga@gmail.com>
---
 arch/arm/boot/dts/stm32f429.dtsi | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/arch/arm/boot/dts/stm32f429.dtsi b/arch/arm/boot/dts/stm32f429.dtsi
index 7de52ee..cbdece7 100644
--- a/arch/arm/boot/dts/stm32f429.dtsi
+++ b/arch/arm/boot/dts/stm32f429.dtsi
@@ -48,6 +48,7 @@
 #include "skeleton.dtsi"
 #include "armv7-m.dtsi"
 #include <dt-bindings/pinctrl/stm32f429-pinfunc.h>
+#include <dt-bindings/mfd/stm32f4-rcc.h>
 
 / {
 	clocks {
@@ -337,6 +338,16 @@
 					slew-rate = <2>;
 				};
 			};
+
+			i2c1_pins_b: i2c1 at 0 {
+				pins1 {
+					pinmux = <STM32F429_PB9_FUNC_I2C1_SDA>;
+					drive-open-drain;
+				};
+				pins2 {
+					pinmux = <STM32F429_PB6_FUNC_I2C1_SCL>;
+				};
+			};
 		};
 
 		rcc: rcc at 40023810 {
@@ -409,6 +420,18 @@
 			interrupts = <80>;
 			clocks = <&rcc 0 38>;
 		};
+
+		i2c1: i2c at 40005400 {
+			compatible = "st,stm32f4-i2c";
+			reg = <0x40005400 0x400>;
+			interrupts = <31>,
+				     <32>;
+			resets = <&rcc STM32F4_APB1_RESET(I2C1)>;
+			clocks = <&rcc 0 STM32F4_APB1_CLOCK(I2C1)>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			status = "disabled";
+		};
 	};
 };
 
-- 
1.9.1

^ permalink raw reply related

* [PATCH v6 2/5] i2c: Add STM32F4 I2C driver
From: M'boumba Cedric Madianga @ 2016-12-12 16:15 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1481559342-6106-1-git-send-email-cedric.madianga@gmail.com>

This patch adds support for the STM32F4 I2C controller.

Signed-off-by: M'boumba Cedric Madianga <cedric.madianga@gmail.com>
---
 drivers/i2c/busses/Kconfig       |  10 +
 drivers/i2c/busses/Makefile      |   1 +
 drivers/i2c/busses/i2c-stm32f4.c | 849 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 860 insertions(+)
 create mode 100644 drivers/i2c/busses/i2c-stm32f4.c

diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index 0cdc844..2719208 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig
@@ -886,6 +886,16 @@ config I2C_ST
 	  This driver can also be built as module. If so, the module
 	  will be called i2c-st.
 
+config I2C_STM32F4
+	tristate "STMicroelectronics STM32F4 I2C support"
+	depends on ARCH_STM32 || COMPILE_TEST
+	help
+	  Enable this option to add support for STM32 I2C controller embedded
+	  in STM32F4 SoCs.
+
+	  This driver can also be built as module. If so, the module
+	  will be called i2c-stm32f4.
+
 config I2C_STU300
 	tristate "ST Microelectronics DDC I2C interface"
 	depends on MACH_U300
diff --git a/drivers/i2c/busses/Makefile b/drivers/i2c/busses/Makefile
index 1c1bac8..a2c6ff5 100644
--- a/drivers/i2c/busses/Makefile
+++ b/drivers/i2c/busses/Makefile
@@ -85,6 +85,7 @@ obj-$(CONFIG_I2C_SH_MOBILE)	+= i2c-sh_mobile.o
 obj-$(CONFIG_I2C_SIMTEC)	+= i2c-simtec.o
 obj-$(CONFIG_I2C_SIRF)		+= i2c-sirf.o
 obj-$(CONFIG_I2C_ST)		+= i2c-st.o
+obj-$(CONFIG_I2C_STM32F4)	+= i2c-stm32f4.o
 obj-$(CONFIG_I2C_STU300)	+= i2c-stu300.o
 obj-$(CONFIG_I2C_SUN6I_P2WI)	+= i2c-sun6i-p2wi.o
 obj-$(CONFIG_I2C_TEGRA)		+= i2c-tegra.o
diff --git a/drivers/i2c/busses/i2c-stm32f4.c b/drivers/i2c/busses/i2c-stm32f4.c
new file mode 100644
index 0000000..89ad579
--- /dev/null
+++ b/drivers/i2c/busses/i2c-stm32f4.c
@@ -0,0 +1,849 @@
+/*
+ * Driver for STMicroelectronics STM32 I2C controller
+ *
+ * Copyright (C) M'boumba Cedric Madianga 2015
+ * Author: M'boumba Cedric Madianga <cedric.madianga@gmail.com>
+ *
+ * This driver is based on i2c-st.c
+ *
+ * License terms:  GNU General Public License (GPL), version 2
+ */
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/module.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/reset.h>
+
+/* STM32F4 I2C offset registers */
+#define STM32F4_I2C_CR1			0x00
+#define STM32F4_I2C_CR2			0x04
+#define STM32F4_I2C_DR			0x10
+#define STM32F4_I2C_SR1			0x14
+#define STM32F4_I2C_SR2			0x18
+#define STM32F4_I2C_CCR			0x1C
+#define STM32F4_I2C_TRISE		0x20
+#define STM32F4_I2C_FLTR		0x24
+
+/* STM32F4 I2C control 1*/
+#define STM32F4_I2C_CR1_SWRST		BIT(15)
+#define STM32F4_I2C_CR1_POS		BIT(11)
+#define STM32F4_I2C_CR1_ACK		BIT(10)
+#define STM32F4_I2C_CR1_STOP		BIT(9)
+#define STM32F4_I2C_CR1_START		BIT(8)
+#define STM32F4_I2C_CR1_PE		BIT(0)
+
+/* STM32F4 I2C control 2 */
+#define STM32F4_I2C_CR2_FREQ_MASK	GENMASK(5, 0)
+#define STM32F4_I2C_CR2_FREQ(n)		((n & STM32F4_I2C_CR2_FREQ_MASK))
+#define STM32F4_I2C_CR2_ITBUFEN		BIT(10)
+#define STM32F4_I2C_CR2_ITEVTEN		BIT(9)
+#define STM32F4_I2C_CR2_ITERREN		BIT(8)
+#define STM32F4_I2C_CR2_IRQ_MASK	(STM32F4_I2C_CR2_ITBUFEN \
+					| STM32F4_I2C_CR2_ITEVTEN \
+					| STM32F4_I2C_CR2_ITERREN)
+
+/* STM32F4 I2C Status 1 */
+#define STM32F4_I2C_SR1_AF		BIT(10)
+#define STM32F4_I2C_SR1_ARLO		BIT(9)
+#define STM32F4_I2C_SR1_BERR		BIT(8)
+#define STM32F4_I2C_SR1_TXE		BIT(7)
+#define STM32F4_I2C_SR1_RXNE		BIT(6)
+#define STM32F4_I2C_SR1_BTF		BIT(2)
+#define STM32F4_I2C_SR1_ADDR		BIT(1)
+#define STM32F4_I2C_SR1_SB		BIT(0)
+#define STM32F4_I2C_SR1_ITEVTEN_MASK	(STM32F4_I2C_SR1_BTF \
+					| STM32F4_I2C_SR1_ADDR \
+					| STM32F4_I2C_SR1_SB)
+#define STM32F4_I2C_SR1_ITBUFEN_MASK	(STM32F4_I2C_SR1_TXE \
+					| STM32F4_I2C_SR1_RXNE)
+#define STM32F4_I2C_SR1_ITERREN_MASK	(STM32F4_I2C_SR1_AF \
+					| STM32F4_I2C_SR1_ARLO \
+					| STM32F4_I2C_SR1_BERR)
+
+/* STM32F4 I2C Status 2 */
+#define STM32F4_I2C_SR2_BUSY		BIT(1)
+
+/* STM32F4 I2C Control Clock */
+#define STM32F4_I2C_CCR_CCR_MASK	GENMASK(11, 0)
+#define STM32F4_I2C_CCR_CCR(n)		((n & STM32F4_I2C_CCR_CCR_MASK))
+#define STM32F4_I2C_CCR_FS		BIT(15)
+#define STM32F4_I2C_CCR_DUTY		BIT(14)
+
+/* STM32F4 I2C Trise */
+#define STM32F4_I2C_TRISE_VALUE_MASK	GENMASK(5, 0)
+#define STM32F4_I2C_TRISE_VALUE(n)	((n & STM32F4_I2C_TRISE_VALUE_MASK))
+
+/* STM32F4 I2C Filter */
+#define STM32F4_I2C_FLTR_DNF_MASK	GENMASK(3, 0)
+#define STM32F4_I2C_FLTR_DNF(n)		((n & STM32F4_I2C_FLTR_DNF_MASK))
+#define STM32F4_I2C_FLTR_ANOFF		BIT(4)
+
+#define STM32F4_I2C_MIN_FREQ		2U
+#define STM32F4_I2C_MAX_FREQ		42U
+#define FAST_MODE_MAX_RISE_TIME		1000
+#define STD_MODE_MAX_RISE_TIME		300
+#define MHZ_TO_HZ			1000000
+
+enum stm32f4_i2c_speed {
+	STM32F4_I2C_SPEED_STANDARD, /* 100 kHz */
+	STM32F4_I2C_SPEED_FAST, /* 400 kHz */
+	STM32F4_I2C_SPEED_END,
+};
+
+/**
+ * struct stm32f4_i2c_timings - per-Mode tuning parameters
+ * @duty: Fast mode duty cycle
+ * @mul_ccr: Value to be multiplied to CCR to reach 100Khz/400Khz SCL frequency
+ * @min_ccr: Minimum clock ctrl reg value to reach 100Khz/400Khz SCL frequency
+ */
+struct stm32f4_i2c_timings {
+	u32 rate;
+	u32 duty;
+	u32 mul_ccr;
+	u32 min_ccr;
+};
+
+/**
+ * struct stm32f4_i2c_msg - client specific data
+ * @addr: 8-bit slave addr, including r/w bit
+ * @count: number of bytes to be transferred
+ * @buf: data buffer
+ * @result: result of the transfer
+ * @stop: last I2C msg to be sent, i.e. STOP to be generated
+ */
+struct stm32f4_i2c_msg {
+	u8	addr;
+	u32	count;
+	u8	*buf;
+	int	result;
+	bool	stop;
+};
+
+/**
+ * struct stm32f4_i2c_dev - private data of the controller
+ * @adap: I2C adapter for this controller
+ * @dev: device for this controller
+ * @base: virtual memory area
+ * @complete: completion of I2C message
+ * @irq_event: interrupt event line for the controller
+ * @irq_error: interrupt error line for the controller
+ * @clk: hw i2c clock
+ * speed: I2C clock frequency of the controller. Standard or Fast only supported
+ * @msg: I2C transfer information
+ */
+struct stm32f4_i2c_dev {
+	struct i2c_adapter		adap;
+	struct device			*dev;
+	void __iomem			*base;
+	struct completion		complete;
+	int				irq_event;
+	int				irq_error;
+	struct clk			*clk;
+	int				speed;
+	struct stm32f4_i2c_msg		msg;
+};
+
+static struct stm32f4_i2c_timings i2c_timings[] = {
+	[STM32F4_I2C_SPEED_STANDARD] = {
+		.mul_ccr		= 1,
+		.min_ccr		= 4,
+		.duty			= 0,
+	},
+	[STM32F4_I2C_SPEED_FAST] = {
+		.mul_ccr		= 16,
+		.min_ccr		= 1,
+		.duty			= 1,
+	},
+};
+
+static inline void stm32f4_i2c_set_bits(void __iomem *reg, u32 mask)
+{
+	writel_relaxed(readl_relaxed(reg) | mask, reg);
+}
+
+static inline void stm32f4_i2c_clr_bits(void __iomem *reg, u32 mask)
+{
+	writel_relaxed(readl_relaxed(reg) & ~mask, reg);
+}
+
+static void stm32f4_i2c_soft_reset(struct stm32f4_i2c_dev *i2c_dev)
+{
+	void __iomem *reg = i2c_dev->base + STM32F4_I2C_CR1;
+
+	stm32f4_i2c_set_bits(reg, STM32F4_I2C_CR1_SWRST);
+	stm32f4_i2c_clr_bits(reg, STM32F4_I2C_CR1_SWRST);
+}
+
+static void stm32f4_i2c_disable_it(struct stm32f4_i2c_dev *i2c_dev)
+{
+	void __iomem *reg = i2c_dev->base + STM32F4_I2C_CR2;
+
+	stm32f4_i2c_clr_bits(reg, STM32F4_I2C_CR2_IRQ_MASK);
+}
+
+static void stm32f4_i2c_set_periph_clk_freq(struct stm32f4_i2c_dev *i2c_dev)
+{
+	u32 clk_rate, cr2, freq;
+
+	cr2 = readl_relaxed(i2c_dev->base + STM32F4_I2C_CR2);
+	cr2 &= ~STM32F4_I2C_CR2_FREQ_MASK;
+	clk_rate = clk_get_rate(i2c_dev->clk);
+	freq = clk_rate / MHZ_TO_HZ;
+	freq = clamp(freq, STM32F4_I2C_MIN_FREQ, STM32F4_I2C_MAX_FREQ);
+	cr2 |= STM32F4_I2C_CR2_FREQ(freq);
+	writel_relaxed(cr2, i2c_dev->base + STM32F4_I2C_CR2);
+}
+
+static void stm32f4_i2c_set_rise_time(struct stm32f4_i2c_dev *i2c_dev)
+{
+	u32 trise, freq, cr2, val;
+
+	cr2 = readl_relaxed(i2c_dev->base + STM32F4_I2C_CR2);
+	freq = cr2 & STM32F4_I2C_CR2_FREQ_MASK;
+
+	trise = readl_relaxed(i2c_dev->base + STM32F4_I2C_TRISE);
+	trise &= ~STM32F4_I2C_TRISE_VALUE_MASK;
+
+	/* Maximum rise time computation */
+	if (i2c_dev->speed == STM32F4_I2C_SPEED_STANDARD) {
+		trise |= STM32F4_I2C_TRISE_VALUE((freq + 1));
+	} else {
+		val = freq * FAST_MODE_MAX_RISE_TIME / STD_MODE_MAX_RISE_TIME;
+		trise |= STM32F4_I2C_TRISE_VALUE((val + 1));
+	}
+
+	writel_relaxed(trise, i2c_dev->base + STM32F4_I2C_TRISE);
+}
+
+static void stm32f4_i2c_set_speed_mode(struct stm32f4_i2c_dev *i2c_dev)
+{
+	struct stm32f4_i2c_timings *t = &i2c_timings[i2c_dev->speed];
+	u32 ccr, clk_rate;
+	int val;
+
+	ccr = readl_relaxed(i2c_dev->base + STM32F4_I2C_CCR);
+	ccr &= ~(STM32F4_I2C_CCR_FS | STM32F4_I2C_CCR_DUTY |
+		 STM32F4_I2C_CCR_CCR_MASK);
+
+	clk_rate = clk_get_rate(i2c_dev->clk);
+	val = clk_rate / MHZ_TO_HZ * t->mul_ccr;
+	if (val < t->min_ccr)
+		val = t->min_ccr;
+	ccr |= STM32F4_I2C_CCR_CCR(val);
+
+	if (t->duty)
+		ccr |= STM32F4_I2C_CCR_FS | STM32F4_I2C_CCR_DUTY;
+
+	writel_relaxed(ccr, i2c_dev->base + STM32F4_I2C_CCR);
+}
+
+static void stm32f4_i2c_set_filter(struct stm32f4_i2c_dev *i2c_dev)
+{
+	u32 filter;
+
+	/* Enable analog noise filter and disable digital noise filter */
+	filter = readl_relaxed(i2c_dev->base + STM32F4_I2C_FLTR);
+	filter &= ~(STM32F4_I2C_FLTR_ANOFF | STM32F4_I2C_FLTR_DNF_MASK);
+	writel_relaxed(filter, i2c_dev->base + STM32F4_I2C_FLTR);
+}
+
+/**
+ * stm32f4_i2c_hw_config() - Prepare I2C block
+ * @i2c_dev: Controller's private data
+ */
+static void stm32f4_i2c_hw_config(struct stm32f4_i2c_dev *i2c_dev)
+{
+	void __iomem *reg = i2c_dev->base + STM32F4_I2C_CR1;
+
+	/* Disable I2C */
+	stm32f4_i2c_clr_bits(reg, STM32F4_I2C_CR1_PE);
+
+	stm32f4_i2c_set_periph_clk_freq(i2c_dev);
+
+	stm32f4_i2c_set_rise_time(i2c_dev);
+
+	stm32f4_i2c_set_speed_mode(i2c_dev);
+
+	stm32f4_i2c_set_filter(i2c_dev);
+
+	/* Enable I2C */
+	stm32f4_i2c_set_bits(reg, STM32F4_I2C_CR1_PE);
+}
+
+static int stm32f4_i2c_wait_free_bus(struct stm32f4_i2c_dev *i2c_dev)
+{
+	u32 status;
+	int ret;
+
+	ret = readl_relaxed_poll_timeout(i2c_dev->base + STM32F4_I2C_SR2,
+					 status,
+					 !(status & STM32F4_I2C_SR2_BUSY),
+					 10, 1000);
+	if (ret) {
+		dev_err(i2c_dev->dev, "bus not free\n");
+		ret = -EBUSY;
+	}
+
+	return ret;
+}
+
+/**
+ * stm32f4_i2c_write_ byte() - Write a byte in the data register
+ * @i2c_dev: Controller's private data
+ * @byte: Data to write in the register
+ */
+static void stm32f4_i2c_write_byte(struct stm32f4_i2c_dev *i2c_dev, u8 byte)
+{
+	writel_relaxed(byte, i2c_dev->base + STM32F4_I2C_DR);
+}
+
+/**
+ * stm32f4_i2c_write_msg() - Fill the data register in write mode
+ * @i2c_dev: Controller's private data
+ *
+ * This function fills the data register with I2C transfer buffer
+ */
+static void stm32f4_i2c_write_msg(struct stm32f4_i2c_dev *i2c_dev)
+{
+	struct stm32f4_i2c_msg *msg = &i2c_dev->msg;
+
+	stm32f4_i2c_write_byte(i2c_dev, *msg->buf++);
+	msg->count--;
+}
+
+static void stm32f4_i2c_read_msg(struct stm32f4_i2c_dev *i2c_dev)
+{
+	struct stm32f4_i2c_msg *msg = &i2c_dev->msg;
+	u32 rbuf;
+
+	rbuf = readl_relaxed(i2c_dev->base + STM32F4_I2C_DR);
+	*msg->buf++ = (u8)rbuf & 0xff;
+	msg->count--;
+}
+
+static void stm32f4_i2c_terminate_xfer(struct stm32f4_i2c_dev *i2c_dev)
+{
+	struct stm32f4_i2c_msg *msg = &i2c_dev->msg;
+	void __iomem *reg = i2c_dev->base + STM32F4_I2C_CR2;
+
+	stm32f4_i2c_disable_it(i2c_dev);
+
+	reg = i2c_dev->base + STM32F4_I2C_CR1;
+	if (msg->stop)
+		stm32f4_i2c_set_bits(reg, STM32F4_I2C_CR1_STOP);
+	else
+		stm32f4_i2c_set_bits(reg, STM32F4_I2C_CR1_START);
+
+	complete(&i2c_dev->complete);
+}
+
+/**
+ * stm32f4_i2c_handle_write() - Handle FIFO empty interrupt in case of write
+ * @i2c_dev: Controller's private data
+ */
+static void stm32f4_i2c_handle_write(struct stm32f4_i2c_dev *i2c_dev)
+{
+	struct stm32f4_i2c_msg *msg = &i2c_dev->msg;
+	void __iomem *reg = i2c_dev->base + STM32F4_I2C_CR2;
+
+	if (msg->count) {
+		stm32f4_i2c_write_msg(i2c_dev);
+		if (!msg->count) {
+			/* Disable BUF interrupt */
+			stm32f4_i2c_clr_bits(reg, STM32F4_I2C_CR2_ITBUFEN);
+		}
+	} else {
+		stm32f4_i2c_terminate_xfer(i2c_dev);
+	}
+}
+
+/**
+ * stm32f4_i2c_handle_read() - Handle FIFO empty interrupt in case of read
+ * @i2c_dev: Controller's private data
+ */
+static void stm32f4_i2c_handle_read(struct stm32f4_i2c_dev *i2c_dev)
+{
+	struct stm32f4_i2c_msg *msg = &i2c_dev->msg;
+	void __iomem *reg = i2c_dev->base + STM32F4_I2C_CR2;
+
+	switch (msg->count) {
+	case 1:
+		stm32f4_i2c_disable_it(i2c_dev);
+		stm32f4_i2c_read_msg(i2c_dev);
+		complete(&i2c_dev->complete);
+		break;
+	case 2:
+	case 3:
+		stm32f4_i2c_clr_bits(reg, STM32F4_I2C_CR2_ITBUFEN);
+		break;
+	default:
+		stm32f4_i2c_read_msg(i2c_dev);
+	}
+}
+
+/**
+ * stm32f4_i2c_handle_rx_btf() - Handle byte transfer finished interrupt
+ * in case of read
+ * @i2c_dev: Controller's private data
+ */
+static void stm32f4_i2c_handle_rx_btf(struct stm32f4_i2c_dev *i2c_dev)
+{
+	struct stm32f4_i2c_msg *msg = &i2c_dev->msg;
+	void __iomem *reg;
+	u32 mask;
+	int i;
+
+	switch (msg->count) {
+	case 2:
+		reg = i2c_dev->base + STM32F4_I2C_CR1;
+		/* Generate STOP or REPSTART */
+		if (msg->stop)
+			stm32f4_i2c_set_bits(reg, STM32F4_I2C_CR1_STOP);
+		else
+			stm32f4_i2c_set_bits(reg, STM32F4_I2C_CR1_START);
+
+		/* Read two last data bytes */
+		for (i = 2; i > 0; i--)
+			stm32f4_i2c_read_msg(i2c_dev);
+
+		/* Disable EVT and ERR interrupt */
+		reg = i2c_dev->base + STM32F4_I2C_CR2;
+		mask = STM32F4_I2C_CR2_ITEVTEN | STM32F4_I2C_CR2_ITERREN;
+		stm32f4_i2c_clr_bits(reg, mask);
+
+		complete(&i2c_dev->complete);
+		break;
+	case 3:
+		/* Enable ACK and read data */
+		reg = i2c_dev->base + STM32F4_I2C_CR1;
+		stm32f4_i2c_clr_bits(reg, STM32F4_I2C_CR1_ACK);
+		stm32f4_i2c_read_msg(i2c_dev);
+		break;
+	default:
+		stm32f4_i2c_read_msg(i2c_dev);
+	}
+}
+
+/**
+ * stm32f4_i2c_handle_rx_addr() - Handle address matched interrupt in case of
+ * master receiver
+ * @i2c_dev: Controller's private data
+ */
+static void stm32f4_i2c_handle_rx_addr(struct stm32f4_i2c_dev *i2c_dev)
+{
+	struct stm32f4_i2c_msg *msg = &i2c_dev->msg;
+	void __iomem *reg;
+
+	switch (msg->count) {
+	case 0:
+		stm32f4_i2c_terminate_xfer(i2c_dev);
+		/* Clear ADDR flag */
+		readl_relaxed(i2c_dev->base + STM32F4_I2C_SR2);
+		break;
+	case 1:
+		/*
+		 * Single byte reception:
+		 * Enable NACK, clear ADDR flag and generate STOP or RepSTART
+		 */
+		reg = i2c_dev->base + STM32F4_I2C_CR1;
+		stm32f4_i2c_clr_bits(reg, STM32F4_I2C_CR1_ACK);
+		if (msg->stop)
+			stm32f4_i2c_set_bits(reg, STM32F4_I2C_CR1_STOP);
+		else
+			stm32f4_i2c_set_bits(reg, STM32F4_I2C_CR1_START);
+		break;
+	case 2:
+		/*
+		 * 2-byte reception:
+		 * Enable NACK and PEC Position Ack and clear ADDR flag
+		 */
+		reg = i2c_dev->base + STM32F4_I2C_CR1;
+		stm32f4_i2c_clr_bits(reg, STM32F4_I2C_CR1_ACK);
+		stm32f4_i2c_set_bits(reg, STM32F4_I2C_CR1_POS);
+		readl_relaxed(i2c_dev->base + STM32F4_I2C_SR2);
+		break;
+
+	default:
+		/* N-byte reception: Enable ACK and clear ADDR flag */
+		reg = i2c_dev->base + STM32F4_I2C_CR1;
+		stm32f4_i2c_set_bits(reg, STM32F4_I2C_CR1_ACK);
+		readl_relaxed(i2c_dev->base + STM32F4_I2C_SR2);
+		break;
+	}
+}
+
+/**
+ * stm32f4_i2c_isr_event() - Interrupt routine for I2C bus event
+ * @irq: interrupt number
+ * @data: Controller's private data
+ */
+static irqreturn_t stm32f4_i2c_isr_event(int irq, void *data)
+{
+	struct stm32f4_i2c_dev *i2c_dev = data;
+	struct stm32f4_i2c_msg *msg = &i2c_dev->msg;
+	void __iomem *reg;
+	u32 real_status, possible_status, ien;
+	int flag;
+
+	ien = readl_relaxed(i2c_dev->base + STM32F4_I2C_CR2);
+	ien &= STM32F4_I2C_CR2_IRQ_MASK;
+	possible_status = 0;
+
+	/* Check possible status combinations */
+	if (ien & STM32F4_I2C_CR2_ITEVTEN) {
+		possible_status = STM32F4_I2C_SR1_ITEVTEN_MASK;
+		if (ien & STM32F4_I2C_CR2_ITBUFEN)
+			possible_status |= STM32F4_I2C_SR1_ITBUFEN_MASK;
+	}
+
+	real_status = readl_relaxed(i2c_dev->base + STM32F4_I2C_SR1);
+
+	if (!(real_status & possible_status)) {
+		dev_dbg(i2c_dev->dev,
+			"spurious evt it (status=0x%08x, ien=0x%08x)\n",
+			real_status, ien);
+		return IRQ_NONE;
+	}
+
+	/* Use __fls() to check error bits first */
+	flag = __fls(real_status & possible_status);
+
+	switch (1 << flag) {
+	case STM32F4_I2C_SR1_SB:
+		stm32f4_i2c_write_byte(i2c_dev, msg->addr);
+		break;
+
+	case STM32F4_I2C_SR1_ADDR:
+		if (msg->addr & I2C_M_RD)
+			stm32f4_i2c_handle_rx_addr(i2c_dev);
+		else
+			readl_relaxed(i2c_dev->base + STM32F4_I2C_SR2);
+
+		/* Enable ITBUF interrupts */
+		reg = i2c_dev->base + STM32F4_I2C_CR2;
+		stm32f4_i2c_set_bits(reg, STM32F4_I2C_CR2_ITBUFEN);
+		break;
+
+	case STM32F4_I2C_SR1_BTF:
+		if (msg->addr & I2C_M_RD)
+			stm32f4_i2c_handle_rx_btf(i2c_dev);
+		else
+			stm32f4_i2c_handle_write(i2c_dev);
+		break;
+
+	case STM32F4_I2C_SR1_TXE:
+		stm32f4_i2c_handle_write(i2c_dev);
+		break;
+
+	case STM32F4_I2C_SR1_RXNE:
+		stm32f4_i2c_handle_read(i2c_dev);
+		break;
+
+	default:
+		dev_err(i2c_dev->dev,
+			"evt it unhandled: status=0x%08x)\n", real_status);
+		return IRQ_NONE;
+	}
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * stm32f4_i2c_isr_error() - Interrupt routine for I2C bus error
+ * @irq: interrupt number
+ * @data: Controller's private data
+ */
+static irqreturn_t stm32f4_i2c_isr_error(int irq, void *data)
+{
+	struct stm32f4_i2c_dev *i2c_dev = data;
+	struct stm32f4_i2c_msg *msg = &i2c_dev->msg;
+	void __iomem *reg;
+	u32 real_status, possible_status, ien;
+	int flag;
+
+	ien = readl_relaxed(i2c_dev->base + STM32F4_I2C_CR2);
+	ien &= STM32F4_I2C_CR2_IRQ_MASK;
+	possible_status = 0;
+
+	/* Check possible status combinations */
+	if (ien & STM32F4_I2C_CR2_ITERREN)
+		possible_status = STM32F4_I2C_SR1_ITERREN_MASK;
+
+	real_status = readl_relaxed(i2c_dev->base + STM32F4_I2C_SR1);
+
+	if (!(real_status & possible_status)) {
+		dev_dbg(i2c_dev->dev,
+			"spurious err it (status=0x%08x, ien=0x%08x)\n",
+			real_status, ien);
+		return IRQ_NONE;
+	}
+
+	/* Use __fls() to check error bits first */
+	flag = __fls(real_status & possible_status);
+
+	switch (1 << flag) {
+	case STM32F4_I2C_SR1_BERR:
+		reg = i2c_dev->base + STM32F4_I2C_SR1;
+		stm32f4_i2c_clr_bits(reg, STM32F4_I2C_SR1_BERR);
+		msg->result = -EIO;
+		break;
+
+	case STM32F4_I2C_SR1_ARLO:
+		reg = i2c_dev->base + STM32F4_I2C_SR1;
+		stm32f4_i2c_clr_bits(reg, STM32F4_I2C_SR1_ARLO);
+		msg->result = -EAGAIN;
+		break;
+
+	case STM32F4_I2C_SR1_AF:
+		reg = i2c_dev->base + STM32F4_I2C_CR1;
+		stm32f4_i2c_set_bits(reg, STM32F4_I2C_CR1_STOP);
+		msg->result = -EIO;
+		break;
+
+	default:
+		dev_err(i2c_dev->dev,
+			"err it unhandled: status=0x%08x)\n", real_status);
+		return IRQ_NONE;
+	}
+
+	stm32f4_i2c_soft_reset(i2c_dev);
+	stm32f4_i2c_disable_it(i2c_dev);
+	complete(&i2c_dev->complete);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * stm32f4_i2c_xfer_msg() - Transfer a single I2C message
+ * @i2c_dev: Controller's private data
+ * @msg: I2C message to transfer
+ * @is_first: first message of the sequence
+ * @is_last: last message of the sequence
+ */
+static int stm32f4_i2c_xfer_msg(struct stm32f4_i2c_dev *i2c_dev,
+				struct i2c_msg *msg, bool is_first,
+				bool is_last)
+{
+	struct stm32f4_i2c_msg *f4_msg = &i2c_dev->msg;
+	void __iomem *reg = i2c_dev->base + STM32F4_I2C_CR1;
+	unsigned long timeout;
+	u32 mask;
+	int ret;
+
+	f4_msg->addr = i2c_8bit_addr_from_msg(msg);
+	f4_msg->buf = msg->buf;
+	f4_msg->count = msg->len;
+	f4_msg->result = 0;
+	f4_msg->stop = is_last;
+
+	reinit_completion(&i2c_dev->complete);
+
+	/* Enable ITEVT and ITERR interrupts */
+	mask = STM32F4_I2C_CR2_ITEVTEN | STM32F4_I2C_CR2_ITERREN;
+	stm32f4_i2c_set_bits(i2c_dev->base + STM32F4_I2C_CR2, mask);
+
+	if (is_first) {
+		ret = stm32f4_i2c_wait_free_bus(i2c_dev);
+		if (ret)
+			return ret;
+
+		/* START generation */
+		stm32f4_i2c_set_bits(reg, STM32F4_I2C_CR1_START);
+	}
+
+	timeout = wait_for_completion_timeout(&i2c_dev->complete,
+					      i2c_dev->adap.timeout);
+	ret = f4_msg->result;
+
+	/* Disable PEC position Ack */
+	stm32f4_i2c_clr_bits(reg, STM32F4_I2C_CR1_POS);
+
+	if (!timeout)
+		ret = -ETIMEDOUT;
+
+	return ret;
+}
+
+/**
+ * stm32f4_i2c_xfer() - Transfer combined I2C message
+ * @i2c_adap: Adapter pointer to the controller
+ * @msgs: Pointer to data to be written.
+ * @num: Number of messages to be executed
+ */
+static int stm32f4_i2c_xfer(struct i2c_adapter *i2c_adap, struct i2c_msg msgs[],
+			    int num)
+{
+	struct stm32f4_i2c_dev *i2c_dev = i2c_get_adapdata(i2c_adap);
+	int ret, i;
+
+	ret = clk_enable(i2c_dev->clk);
+	if (ret) {
+		dev_err(i2c_dev->dev, "Failed to enable clock\n");
+		return ret;
+	}
+
+	stm32f4_i2c_hw_config(i2c_dev);
+
+	for (i = 0; i < num && !ret; i++)
+		ret = stm32f4_i2c_xfer_msg(i2c_dev, &msgs[i], i == 0,
+					   i == num - 1);
+
+	clk_disable(i2c_dev->clk);
+
+	return (ret < 0) ? ret : i;
+}
+
+static u32 stm32f4_i2c_func(struct i2c_adapter *adap)
+{
+	return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL;
+}
+
+static struct i2c_algorithm stm32f4_i2c_algo = {
+	.master_xfer = stm32f4_i2c_xfer,
+	.functionality = stm32f4_i2c_func,
+};
+
+static int stm32f4_i2c_probe(struct platform_device *pdev)
+{
+	struct device_node *np = pdev->dev.of_node;
+	struct stm32f4_i2c_dev *i2c_dev;
+	struct resource *res;
+	u32 clk_rate;
+	struct i2c_adapter *adap;
+	struct reset_control *rst;
+	int ret;
+
+	i2c_dev = devm_kzalloc(&pdev->dev, sizeof(*i2c_dev), GFP_KERNEL);
+	if (!i2c_dev)
+		return -ENOMEM;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	i2c_dev->base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(i2c_dev->base))
+		return PTR_ERR(i2c_dev->base);
+
+	i2c_dev->irq_event = irq_of_parse_and_map(np, 0);
+	if (!i2c_dev->irq_event) {
+		dev_err(&pdev->dev, "IRQ missing or invalid\n");
+		return -EINVAL;
+	}
+
+	i2c_dev->irq_error = irq_of_parse_and_map(np, 1);
+	if (!i2c_dev->irq_error) {
+		dev_err(&pdev->dev, "IRQ missing or invalid\n");
+		return -EINVAL;
+	}
+
+	i2c_dev->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(i2c_dev->clk)) {
+		dev_err(&pdev->dev, "Error: Missing controller clock\n");
+		return PTR_ERR(i2c_dev->clk);
+	}
+	ret = clk_prepare(i2c_dev->clk);
+	if (ret) {
+		dev_err(i2c_dev->dev, "Failed to prepare clock\n");
+		return ret;
+	}
+
+	rst = devm_reset_control_get(&pdev->dev, NULL);
+	if (IS_ERR(rst)) {
+		dev_err(&pdev->dev, "Error: Missing controller reset\n");
+		ret = PTR_ERR(rst);
+		goto clk_free;
+	}
+	reset_control_assert(rst);
+	udelay(2);
+	reset_control_deassert(rst);
+
+	i2c_dev->speed = STM32F4_I2C_SPEED_STANDARD;
+	ret = of_property_read_u32(np, "clock-frequency", &clk_rate);
+	if ((!ret) && (clk_rate == 400000))
+		i2c_dev->speed = STM32F4_I2C_SPEED_FAST;
+
+	i2c_dev->dev = &pdev->dev;
+
+	ret = devm_request_threaded_irq(&pdev->dev, i2c_dev->irq_event,
+					NULL, stm32f4_i2c_isr_event,
+					IRQF_ONESHOT, pdev->name, i2c_dev);
+	if (ret) {
+		dev_err(&pdev->dev, "Failed to request irq %i\n",
+			i2c_dev->irq_error);
+		goto clk_free;
+	}
+
+	ret = devm_request_threaded_irq(&pdev->dev, i2c_dev->irq_error,
+					NULL, stm32f4_i2c_isr_error,
+					IRQF_ONESHOT, pdev->name, i2c_dev);
+	if (ret) {
+		dev_err(&pdev->dev, "Failed to request irq %i\n",
+			i2c_dev->irq_error);
+		goto clk_free;
+	}
+
+	adap = &i2c_dev->adap;
+	i2c_set_adapdata(adap, i2c_dev);
+	snprintf(adap->name, sizeof(adap->name), "STM32 I2C(%pa)", &res->start);
+	adap->owner = THIS_MODULE;
+	adap->timeout = 2 * HZ;
+	adap->retries = 0;
+	adap->algo = &stm32f4_i2c_algo;
+	adap->dev.parent = &pdev->dev;
+	adap->dev.of_node = pdev->dev.of_node;
+
+	init_completion(&i2c_dev->complete);
+
+	ret = i2c_add_adapter(adap);
+	if (ret)
+		goto clk_free;
+
+	platform_set_drvdata(pdev, i2c_dev);
+
+	dev_info(i2c_dev->dev, "STM32F4 I2C driver initialized\n");
+
+	return 0;
+
+clk_free:
+	clk_unprepare(i2c_dev->clk);
+	return ret;
+}
+
+static int stm32f4_i2c_remove(struct platform_device *pdev)
+{
+	struct stm32f4_i2c_dev *i2c_dev = platform_get_drvdata(pdev);
+
+	i2c_del_adapter(&i2c_dev->adap);
+
+	clk_unprepare(i2c_dev->clk);
+
+	return 0;
+}
+
+static const struct of_device_id stm32f4_i2c_match[] = {
+	{ .compatible = "st,stm32f4-i2c", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, stm32f4_i2c_match);
+
+static struct platform_driver stm32f4_i2c_driver = {
+	.driver = {
+		.name = "stm32f4-i2c",
+		.of_match_table = stm32f4_i2c_match,
+	},
+	.probe = stm32f4_i2c_probe,
+	.remove = stm32f4_i2c_remove,
+};
+
+module_platform_driver(stm32f4_i2c_driver);
+
+MODULE_AUTHOR("M'boumba Cedric Madianga <cedric.madianga@gmail.com>");
+MODULE_DESCRIPTION("STMicroelectronics STM32F4 I2C driver");
+MODULE_LICENSE("GPL v2");
-- 
1.9.1

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox