* [PATCH V1] pinctrl:pxa:pinctrl-pxa2xx:- No need of devm functions
From: Arvind Yadav @ 2016-12-08 14:35 UTC (permalink / raw)
To: linux-arm-kernel
In functions pxa2xx_build_functions, the memory allocated for
'functions' is live within the function only. After the
allocation it is immediately freed with devm_kfree. There is
no need to allocate memory for 'functions' with devm function
so replace devm_kcalloc with kcalloc and devm_kfree with kfree.
Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
---
drivers/pinctrl/pxa/pinctrl-pxa2xx.c | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/drivers/pinctrl/pxa/pinctrl-pxa2xx.c b/drivers/pinctrl/pxa/pinctrl-pxa2xx.c
index 866aa3c..47b8e3a 100644
--- a/drivers/pinctrl/pxa/pinctrl-pxa2xx.c
+++ b/drivers/pinctrl/pxa/pinctrl-pxa2xx.c
@@ -277,7 +277,7 @@ static int pxa2xx_build_functions(struct pxa_pinctrl *pctl)
* alternate function, 6 * npins is an absolute high limit of the number
* of functions.
*/
- functions = devm_kcalloc(pctl->dev, pctl->npins * 6,
+ functions = kcalloc(pctl->npins * 6,
sizeof(*functions), GFP_KERNEL);
if (!functions)
return -ENOMEM;
@@ -289,10 +289,12 @@ static int pxa2xx_build_functions(struct pxa_pinctrl *pctl)
pctl->functions = devm_kmemdup(pctl->dev, functions,
pctl->nfuncs * sizeof(*functions),
GFP_KERNEL);
- if (!pctl->functions)
+ if (!pctl->functions) {
+ kfree(functions);
return -ENOMEM;
+ }
- devm_kfree(pctl->dev, functions);
+ kfree(functions);
return 0;
}
--
2.7.4
^ permalink raw reply related
* [PATCH 2/2] crypto: arm/chacha20 - implement NEON version based on SSE3 code
From: Ard Biesheuvel @ 2016-12-08 14:28 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <1481207339-17332-1-git-send-email-ard.biesheuvel@linaro.org>
This is a straight port to ARM/NEON of the x86 SSE3 implementation
of the ChaCha20 stream cipher.
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
arch/arm/crypto/Kconfig | 6 +
arch/arm/crypto/Makefile | 2 +
arch/arm/crypto/chacha20-neon-core.S | 524 ++++++++++++++++++++
arch/arm/crypto/chacha20-neon-glue.c | 136 +++++
4 files changed, 668 insertions(+)
diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig
index 13f1b4c289d4..2f3339f015d3 100644
--- a/arch/arm/crypto/Kconfig
+++ b/arch/arm/crypto/Kconfig
@@ -130,4 +130,10 @@ config CRYPTO_CRC32_ARM_CE
depends on KERNEL_MODE_NEON && CRC32
select CRYPTO_HASH
+config CRYPTO_CHACHA20_NEON
+ tristate "NEON accelerated ChaCha20 symmetric cipher"
+ depends on KERNEL_MODE_NEON
+ select CRYPTO_BLKCIPHER
+ select CRYPTO_CHACHA20
+
endif
diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile
index b578a1820ab1..8d74e55eacd4 100644
--- a/arch/arm/crypto/Makefile
+++ b/arch/arm/crypto/Makefile
@@ -8,6 +8,7 @@ obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o
obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o
obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o
obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o
+obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha20-neon.o
ce-obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o
ce-obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o
@@ -40,6 +41,7 @@ aes-arm-ce-y := aes-ce-core.o aes-ce-glue.o
ghash-arm-ce-y := ghash-ce-core.o ghash-ce-glue.o
crct10dif-arm-ce-y := crct10dif-ce-core.o crct10dif-ce-glue.o
crc32-arm-ce-y:= crc32-ce-core.o crc32-ce-glue.o
+chacha20-neon-y := chacha20-neon-core.o chacha20-neon-glue.o
quiet_cmd_perl = PERL $@
cmd_perl = $(PERL) $(<) > $(@)
diff --git a/arch/arm/crypto/chacha20-neon-core.S b/arch/arm/crypto/chacha20-neon-core.S
new file mode 100644
index 000000000000..9f315041f521
--- /dev/null
+++ b/arch/arm/crypto/chacha20-neon-core.S
@@ -0,0 +1,524 @@
+/*
+ * ChaCha20 256-bit cipher algorithm, RFC7539, ARM NEON functions
+ *
+ * Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Based on:
+ * ChaCha20 256-bit cipher algorithm, RFC7539, x64 SNEON3 functions
+ *
+ * Copyright (C) 2015 Martin Willi
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/linkage.h>
+
+ .text
+ .fpu neon
+ .align 5
+
+ENTRY(chacha20_block_xor_neon)
+ // r0: Input state matrix, s
+ // r1: 1 data block output, o
+ // r2: 1 data block input, i
+
+ //
+ // This function encrypts one ChaCha20 block by loading the state matrix
+ // in four NEON registers. It performs matrix operation on four words in
+ // parallel, but requireds shuffling to rearrange the words after each
+ // round.
+ //
+
+ // x0..3 = s0..3
+ add ip, r0, #0x20
+ vld1.32 {q0-q1}, [r0]
+ vld1.32 {q2-q3}, [ip]
+
+ vmov q8, q0
+ vmov q9, q1
+ vmov q10, q2
+ vmov q11, q3
+
+ mov r3, #10
+
+.Ldoubleround:
+ // x0 += x1, x3 = rotl32(x3 ^ x0, 16)
+ vadd.i32 q0, q0, q1
+ veor q4, q3, q0
+ vshl.u32 q3, q4, #16
+ vsri.u32 q3, q4, #16
+
+ // x2 += x3, x1 = rotl32(x1 ^ x2, 12)
+ vadd.i32 q2, q2, q3
+ veor q4, q1, q2
+ vshl.u32 q1, q4, #12
+ vsri.u32 q1, q4, #20
+
+ // x0 += x1, x3 = rotl32(x3 ^ x0, 8)
+ vadd.i32 q0, q0, q1
+ veor q4, q3, q0
+ vshl.u32 q3, q4, #8
+ vsri.u32 q3, q4, #24
+
+ // x2 += x3, x1 = rotl32(x1 ^ x2, 7)
+ vadd.i32 q2, q2, q3
+ veor q4, q1, q2
+ vshl.u32 q1, q4, #7
+ vsri.u32 q1, q4, #25
+
+ // x1 = shuffle32(x1, MASK(0, 3, 2, 1))
+ vext.8 q1, q1, q1, #4
+ // x2 = shuffle32(x2, MASK(1, 0, 3, 2))
+ vext.8 q2, q2, q2, #8
+ // x3 = shuffle32(x3, MASK(2, 1, 0, 3))
+ vext.8 q3, q3, q3, #12
+
+ // x0 += x1, x3 = rotl32(x3 ^ x0, 16)
+ vadd.i32 q0, q0, q1
+ veor q4, q3, q0
+ vshl.u32 q3, q4, #16
+ vsri.u32 q3, q4, #16
+
+ // x2 += x3, x1 = rotl32(x1 ^ x2, 12)
+ vadd.i32 q2, q2, q3
+ veor q4, q1, q2
+ vshl.u32 q1, q4, #12
+ vsri.u32 q1, q4, #20
+
+ // x0 += x1, x3 = rotl32(x3 ^ x0, 8)
+ vadd.i32 q0, q0, q1
+ veor q4, q3, q0
+ vshl.u32 q3, q4, #8
+ vsri.u32 q3, q4, #24
+
+ // x2 += x3, x1 = rotl32(x1 ^ x2, 7)
+ vadd.i32 q2, q2, q3
+ veor q4, q1, q2
+ vshl.u32 q1, q4, #7
+ vsri.u32 q1, q4, #25
+
+ // x1 = shuffle32(x1, MASK(2, 1, 0, 3))
+ vext.8 q1, q1, q1, #12
+ // x2 = shuffle32(x2, MASK(1, 0, 3, 2))
+ vext.8 q2, q2, q2, #8
+ // x3 = shuffle32(x3, MASK(0, 3, 2, 1))
+ vext.8 q3, q3, q3, #4
+
+ subs r3, r3, #1
+ bne .Ldoubleround
+
+ add ip, r2, #0x20
+ vld1.8 {q4-q5}, [r2]
+ vld1.8 {q6-q7}, [ip]
+
+ // o0 = i0 ^ (x0 + s0)
+ vadd.i32 q0, q0, q8
+ veor q0, q0, q4
+
+ // o1 = i1 ^ (x1 + s1)
+ vadd.i32 q1, q1, q9
+ veor q1, q1, q5
+
+ // o2 = i2 ^ (x2 + s2)
+ vadd.i32 q2, q2, q10
+ veor q2, q2, q6
+
+ // o3 = i3 ^ (x3 + s3)
+ vadd.i32 q3, q3, q11
+ veor q3, q3, q7
+
+ add ip, r1, #0x20
+ vst1.8 {q0-q1}, [r1]
+ vst1.8 {q2-q3}, [ip]
+
+ bx lr
+ENDPROC(chacha20_block_xor_neon)
+
+ .align 5
+ENTRY(chacha20_4block_xor_neon)
+ push {r4-r6, lr}
+ mov ip, sp // preserve the stack pointer
+ sub r3, sp, #0x20 // allocate a 32 byte buffer
+ bic r3, r3, #0x1f // aligned to 32 bytes
+ mov sp, r3
+
+ // r0: Input state matrix, s
+ // r1: 4 data blocks output, o
+ // r2: 4 data blocks input, i
+
+ //
+ // This function encrypts four consecutive ChaCha20 blocks by loading
+ // the state matrix in NEON registers four times. The algorithm performs
+ // each operation on the corresponding word of each state matrix, hence
+ // requires no word shuffling. For final XORing step we transpose the
+ // matrix by interleaving 32- and then 64-bit words, which allows us to
+ // do XOR in NEON registers.
+ //
+
+ // x0..15[0-3] = s0..3[0..3]
+ add r3, r0, #0x20
+ vld1.32 {q0-q1}, [r0]
+ vld1.32 {q2-q3}, [r3]
+
+ adr r3, CTRINC
+ vdup.32 q15, d7[1]
+ vdup.32 q14, d7[0]
+ vld1.32 {q11}, [r3, :128]
+ vdup.32 q13, d6[1]
+ vdup.32 q12, d6[0]
+ vadd.i32 q12, q12, q11 // x12 += counter values 0-3
+ vdup.32 q11, d5[1]
+ vdup.32 q10, d5[0]
+ vdup.32 q9, d4[1]
+ vdup.32 q8, d4[0]
+ vdup.32 q7, d3[1]
+ vdup.32 q6, d3[0]
+ vdup.32 q5, d2[1]
+ vdup.32 q4, d2[0]
+ vdup.32 q3, d1[1]
+ vdup.32 q2, d1[0]
+ vdup.32 q1, d0[1]
+ vdup.32 q0, d0[0]
+
+ mov r3, #10
+
+.Ldoubleround4:
+ // x0 += x4, x12 = rotl32(x12 ^ x0, 16)
+ // x1 += x5, x13 = rotl32(x13 ^ x1, 16)
+ // x2 += x6, x14 = rotl32(x14 ^ x2, 16)
+ // x3 += x7, x15 = rotl32(x15 ^ x3, 16)
+ vadd.i32 q0, q0, q4
+ vadd.i32 q1, q1, q5
+ vadd.i32 q2, q2, q6
+ vadd.i32 q3, q3, q7
+
+ veor q12, q12, q0
+ veor q13, q13, q1
+ veor q14, q14, q2
+ veor q15, q15, q3
+
+ vrev32.16 q12, q12
+ vrev32.16 q13, q13
+ vrev32.16 q14, q14
+ vrev32.16 q15, q15
+
+ // x8 += x12, x4 = rotl32(x4 ^ x8, 12)
+ // x9 += x13, x5 = rotl32(x5 ^ x9, 12)
+ // x10 += x14, x6 = rotl32(x6 ^ x10, 12)
+ // x11 += x15, x7 = rotl32(x7 ^ x11, 12)
+ vadd.i32 q8, q8, q12
+ vadd.i32 q9, q9, q13
+ vadd.i32 q10, q10, q14
+ vadd.i32 q11, q11, q15
+
+ vst1.32 {q8-q9}, [sp, :256]
+
+ veor q8, q4, q8
+ veor q9, q5, q9
+ vshl.u32 q4, q8, #12
+ vshl.u32 q5, q9, #12
+ vsri.u32 q4, q8, #20
+ vsri.u32 q5, q9, #20
+
+ veor q8, q6, q10
+ veor q9, q7, q11
+ vshl.u32 q6, q8, #12
+ vshl.u32 q7, q9, #12
+ vsri.u32 q6, q8, #20
+ vsri.u32 q7, q9, #20
+
+ // x0 += x4, x12 = rotl32(x12 ^ x0, 8)
+ // x1 += x5, x13 = rotl32(x13 ^ x1, 8)
+ // x2 += x6, x14 = rotl32(x14 ^ x2, 8)
+ // x3 += x7, x15 = rotl32(x15 ^ x3, 8)
+ vadd.i32 q0, q0, q4
+ vadd.i32 q1, q1, q5
+ vadd.i32 q2, q2, q6
+ vadd.i32 q3, q3, q7
+
+ veor q8, q12, q0
+ veor q9, q13, q1
+ vshl.u32 q12, q8, #8
+ vshl.u32 q13, q9, #8
+ vsri.u32 q12, q8, #24
+ vsri.u32 q13, q9, #24
+
+ veor q8, q14, q2
+ veor q9, q15, q3
+ vshl.u32 q14, q8, #8
+ vshl.u32 q15, q9, #8
+ vsri.u32 q14, q8, #24
+ vsri.u32 q15, q9, #24
+
+ vld1.32 {q8-q9}, [sp, :256]
+
+ // x8 += x12, x4 = rotl32(x4 ^ x8, 7)
+ // x9 += x13, x5 = rotl32(x5 ^ x9, 7)
+ // x10 += x14, x6 = rotl32(x6 ^ x10, 7)
+ // x11 += x15, x7 = rotl32(x7 ^ x11, 7)
+ vadd.i32 q8, q8, q12
+ vadd.i32 q9, q9, q13
+ vadd.i32 q10, q10, q14
+ vadd.i32 q11, q11, q15
+
+ vst1.32 {q8-q9}, [sp, :256]
+
+ veor q8, q4, q8
+ veor q9, q5, q9
+ vshl.u32 q4, q8, #7
+ vshl.u32 q5, q9, #7
+ vsri.u32 q4, q8, #25
+ vsri.u32 q5, q9, #25
+
+ veor q8, q6, q10
+ veor q9, q7, q11
+ vshl.u32 q6, q8, #7
+ vshl.u32 q7, q9, #7
+ vsri.u32 q6, q8, #25
+ vsri.u32 q7, q9, #25
+
+ vld1.32 {q8-q9}, [sp, :256]
+
+ // x0 += x5, x15 = rotl32(x15 ^ x0, 16)
+ // x1 += x6, x12 = rotl32(x12 ^ x1, 16)
+ // x2 += x7, x13 = rotl32(x13 ^ x2, 16)
+ // x3 += x4, x14 = rotl32(x14 ^ x3, 16)
+ vadd.i32 q0, q0, q5
+ vadd.i32 q1, q1, q6
+ vadd.i32 q2, q2, q7
+ vadd.i32 q3, q3, q4
+
+ veor q15, q15, q0
+ veor q12, q12, q1
+ veor q13, q13, q2
+ veor q14, q14, q3
+
+ vrev32.16 q15, q15
+ vrev32.16 q12, q12
+ vrev32.16 q13, q13
+ vrev32.16 q14, q14
+
+ // x10 += x15, x5 = rotl32(x5 ^ x10, 12)
+ // x11 += x12, x6 = rotl32(x6 ^ x11, 12)
+ // x8 += x13, x7 = rotl32(x7 ^ x8, 12)
+ // x9 += x14, x4 = rotl32(x4 ^ x9, 12)
+ vadd.i32 q10, q10, q15
+ vadd.i32 q11, q11, q12
+ vadd.i32 q8, q8, q13
+ vadd.i32 q9, q9, q14
+
+ vst1.32 {q8-q9}, [sp, :256]
+
+ veor q8, q7, q8
+ veor q9, q4, q9
+ vshl.u32 q7, q8, #12
+ vshl.u32 q4, q9, #12
+ vsri.u32 q7, q8, #20
+ vsri.u32 q4, q9, #20
+
+ veor q8, q5, q10
+ veor q9, q6, q11
+ vshl.u32 q5, q8, #12
+ vshl.u32 q6, q9, #12
+ vsri.u32 q5, q8, #20
+ vsri.u32 q6, q9, #20
+
+ // x0 += x5, x15 = rotl32(x15 ^ x0, 8)
+ // x1 += x6, x12 = rotl32(x12 ^ x1, 8)
+ // x2 += x7, x13 = rotl32(x13 ^ x2, 8)
+ // x3 += x4, x14 = rotl32(x14 ^ x3, 8)
+ vadd.i32 q0, q0, q5
+ vadd.i32 q1, q1, q6
+ vadd.i32 q2, q2, q7
+ vadd.i32 q3, q3, q4
+
+ veor q8, q15, q0
+ veor q9, q12, q1
+ vshl.u32 q15, q8, #8
+ vshl.u32 q12, q9, #8
+ vsri.u32 q15, q8, #24
+ vsri.u32 q12, q9, #24
+
+ veor q8, q13, q2
+ veor q9, q14, q3
+ vshl.u32 q13, q8, #8
+ vshl.u32 q14, q9, #8
+ vsri.u32 q13, q8, #24
+ vsri.u32 q14, q9, #24
+
+ vld1.32 {q8-q9}, [sp, :256]
+
+ // x10 += x15, x5 = rotl32(x5 ^ x10, 7)
+ // x11 += x12, x6 = rotl32(x6 ^ x11, 7)
+ // x8 += x13, x7 = rotl32(x7 ^ x8, 7)
+ // x9 += x14, x4 = rotl32(x4 ^ x9, 7)
+ vadd.i32 q10, q10, q15
+ vadd.i32 q11, q11, q12
+ vadd.i32 q8, q8, q13
+ vadd.i32 q9, q9, q14
+
+ vst1.32 {q8-q9}, [sp, :256]
+
+ veor q8, q7, q8
+ veor q9, q4, q9
+ vshl.u32 q7, q8, #7
+ vshl.u32 q4, q9, #7
+ vsri.u32 q7, q8, #25
+ vsri.u32 q4, q9, #25
+
+ veor q8, q5, q10
+ veor q9, q6, q11
+ vshl.u32 q5, q8, #7
+ vshl.u32 q6, q9, #7
+ vsri.u32 q5, q8, #25
+ vsri.u32 q6, q9, #25
+
+ subs r3, r3, #1
+ beq 0f
+
+ vld1.32 {q8-q9}, [sp, :256]
+ b .Ldoubleround4
+
+ // x0[0-3] += s0[0]
+ // x1[0-3] += s0[1]
+ // x2[0-3] += s0[2]
+ // x3[0-3] += s0[3]
+0: ldmia r0!, {r3-r6}
+ vdup.32 q8, r3
+ vdup.32 q9, r4
+ vadd.i32 q0, q0, q8
+ vadd.i32 q1, q1, q9
+ vdup.32 q8, r5
+ vdup.32 q9, r6
+ vadd.i32 q2, q2, q8
+ vadd.i32 q3, q3, q9
+
+ // x4[0-3] += s1[0]
+ // x5[0-3] += s1[1]
+ // x6[0-3] += s1[2]
+ // x7[0-3] += s1[3]
+ ldmia r0!, {r3-r6}
+ vdup.32 q8, r3
+ vdup.32 q9, r4
+ vadd.i32 q4, q4, q8
+ vadd.i32 q5, q5, q9
+ vdup.32 q8, r5
+ vdup.32 q9, r6
+ vadd.i32 q6, q6, q8
+ vadd.i32 q7, q7, q9
+
+ // interleave 32-bit words in state n, n+1
+ vzip.32 q0, q1
+ vzip.32 q2, q3
+ vzip.32 q4, q5
+ vzip.32 q6, q7
+
+ // interleave 64-bit words in state n, n+2
+ vswp d1, d4
+ vswp d3, d6
+ vswp d9, d12
+ vswp d11, d14
+
+ // xor with corresponding input, write to output
+ vld1.8 {q8-q9}, [r2]!
+ veor q8, q8, q0
+ veor q9, q9, q4
+ vst1.8 {q8-q9}, [r1]!
+
+ vld1.32 {q8-q9}, [sp, :256]
+
+ // x8[0-3] += s2[0]
+ // x9[0-3] += s2[1]
+ // x10[0-3] += s2[2]
+ // x11[0-3] += s2[3]
+ ldmia r0!, {r3-r6}
+ vdup.32 q0, r3
+ vdup.32 q4, r4
+ vadd.i32 q8, q8, q0
+ vadd.i32 q9, q9, q4
+ vdup.32 q0, r5
+ vdup.32 q4, r6
+ vadd.i32 q10, q10, q0
+ vadd.i32 q11, q11, q4
+
+ // x12[0-3] += s3[0]
+ // x13[0-3] += s3[1]
+ // x14[0-3] += s3[2]
+ // x15[0-3] += s3[3]
+ ldmia r0!, {r3-r6}
+ vdup.32 q0, r3
+ vdup.32 q4, r4
+ adr r3, CTRINC
+ vadd.i32 q12, q12, q0
+ vld1.32 {q0}, [r3, :128]
+ vadd.i32 q13, q13, q4
+ vadd.i32 q12, q12, q0 // x12 += counter values 0-3
+
+ vdup.32 q0, r5
+ vdup.32 q4, r6
+ vadd.i32 q14, q14, q0
+ vadd.i32 q15, q15, q4
+
+ // interleave 32-bit words in state n, n+1
+ vzip.32 q8, q9
+ vzip.32 q10, q11
+ vzip.32 q12, q13
+ vzip.32 q14, q15
+
+ // interleave 64-bit words in state n, n+2
+ vswp d17, d20
+ vswp d19, d22
+ vswp d25, d28
+ vswp d27, d30
+
+ vmov q4, q1
+
+ vld1.8 {q0-q1}, [r2]!
+ veor q0, q0, q8
+ veor q1, q1, q12
+ vst1.8 {q0-q1}, [r1]!
+
+ vld1.8 {q0-q1}, [r2]!
+ veor q0, q0, q2
+ veor q1, q1, q6
+ vst1.8 {q0-q1}, [r1]!
+
+ vld1.8 {q0-q1}, [r2]!
+ veor q0, q0, q10
+ veor q1, q1, q14
+ vst1.8 {q0-q1}, [r1]!
+
+ vld1.8 {q0-q1}, [r2]!
+ veor q0, q0, q4
+ veor q1, q1, q5
+ vst1.8 {q0-q1}, [r1]!
+
+ vld1.8 {q0-q1}, [r2]!
+ veor q0, q0, q9
+ veor q1, q1, q13
+ vst1.8 {q0-q1}, [r1]!
+
+ vld1.8 {q0-q1}, [r2]!
+ veor q0, q0, q3
+ veor q1, q1, q7
+ vst1.8 {q0-q1}, [r1]!
+
+ vld1.8 {q0-q1}, [r2]
+ veor q0, q0, q11
+ veor q1, q1, q15
+ vst1.8 {q0-q1}, [r1]
+
+ mov sp, ip
+ pop {r4-r6, pc}
+ENDPROC(chacha20_4block_xor_neon)
+
+ .align 4
+CTRINC: .word 0, 1, 2, 3
+
diff --git a/arch/arm/crypto/chacha20-neon-glue.c b/arch/arm/crypto/chacha20-neon-glue.c
new file mode 100644
index 000000000000..554f7f6069da
--- /dev/null
+++ b/arch/arm/crypto/chacha20-neon-glue.c
@@ -0,0 +1,136 @@
+/*
+ * ChaCha20 256-bit cipher algorithm, RFC7539, ARM NEON functions
+ *
+ * Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Based on:
+ * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code
+ *
+ * Copyright (C) 2015 Martin Willi
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <crypto/algapi.h>
+#include <crypto/chacha20.h>
+#include <linux/crypto.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#include <asm/hwcap.h>
+#include <asm/neon.h>
+#include <asm/simd.h>
+
+asmlinkage void chacha20_block_xor_neon(u32 *state, u8 *dst, const u8 *src);
+asmlinkage void chacha20_4block_xor_neon(u32 *state, u8 *dst, const u8 *src);
+
+static void chacha20_dosimd(u32 *state, u8 *dst, const u8 *src,
+ unsigned int bytes)
+{
+ u8 buf[CHACHA20_BLOCK_SIZE];
+
+ while (bytes >= CHACHA20_BLOCK_SIZE * 4) {
+ chacha20_4block_xor_neon(state, dst, src);
+ bytes -= CHACHA20_BLOCK_SIZE * 4;
+ src += CHACHA20_BLOCK_SIZE * 4;
+ dst += CHACHA20_BLOCK_SIZE * 4;
+ state[12] += 4;
+ }
+ while (bytes >= CHACHA20_BLOCK_SIZE) {
+ chacha20_block_xor_neon(state, dst, src);
+ bytes -= CHACHA20_BLOCK_SIZE;
+ src += CHACHA20_BLOCK_SIZE;
+ dst += CHACHA20_BLOCK_SIZE;
+ state[12]++;
+ }
+ if (bytes) {
+ memcpy(buf, src, bytes);
+ chacha20_block_xor_neon(state, buf, buf);
+ memcpy(dst, buf, bytes);
+ }
+}
+
+static int chacha20_simd(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ struct blkcipher_walk walk;
+ u32 state[16];
+ int err;
+
+ if (nbytes <= CHACHA20_BLOCK_SIZE || !may_use_simd())
+ return crypto_chacha20_crypt(desc, dst, src, nbytes);
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ err = blkcipher_walk_virt_block(desc, &walk, CHACHA20_BLOCK_SIZE);
+
+ crypto_chacha20_init(state, crypto_blkcipher_ctx(desc->tfm), walk.iv);
+
+ kernel_neon_begin();
+
+ while (walk.nbytes >= CHACHA20_BLOCK_SIZE) {
+ chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr,
+ rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE));
+ err = blkcipher_walk_done(desc, &walk,
+ walk.nbytes % CHACHA20_BLOCK_SIZE);
+ }
+
+ if (walk.nbytes) {
+ chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr,
+ walk.nbytes);
+ err = blkcipher_walk_done(desc, &walk, 0);
+ }
+
+ kernel_neon_end();
+
+ return err;
+}
+
+static struct crypto_alg alg = {
+ .cra_name = "chacha20",
+ .cra_driver_name = "chacha20-neon",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = 1,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_ctxsize = sizeof(struct chacha20_ctx),
+ .cra_alignmask = sizeof(u32) - 1,
+ .cra_module = THIS_MODULE,
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = CHACHA20_KEY_SIZE,
+ .max_keysize = CHACHA20_KEY_SIZE,
+ .ivsize = CHACHA20_IV_SIZE,
+ .geniv = "seqiv",
+ .setkey = crypto_chacha20_setkey,
+ .encrypt = chacha20_simd,
+ .decrypt = chacha20_simd,
+ },
+ },
+};
+
+static int __init chacha20_simd_mod_init(void)
+{
+ if (!(elf_hwcap & HWCAP_NEON))
+ return -ENODEV;
+
+ return crypto_register_alg(&alg);
+}
+
+static void __exit chacha20_simd_mod_fini(void)
+{
+ crypto_unregister_alg(&alg);
+}
+
+module_init(chacha20_simd_mod_init);
+module_exit(chacha20_simd_mod_fini);
+
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("chacha20");
--
2.7.4
^ permalink raw reply related
* [PATCH 1/2] crypto: arm64/chacha20 - implement NEON version based on SSE3 code
From: Ard Biesheuvel @ 2016-12-08 14:28 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <1481207339-17332-1-git-send-email-ard.biesheuvel@linaro.org>
This is a straight port to arm64/NEON of the x86 SSE3 implementation
of the ChaCha20 stream cipher.
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
arch/arm64/crypto/Kconfig | 6 +
arch/arm64/crypto/Makefile | 3 +
arch/arm64/crypto/chacha20-neon-core.S | 480 ++++++++++++++++++++
arch/arm64/crypto/chacha20-neon-glue.c | 131 ++++++
4 files changed, 620 insertions(+)
diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
index 450a85df041a..0bf0f531f539 100644
--- a/arch/arm64/crypto/Kconfig
+++ b/arch/arm64/crypto/Kconfig
@@ -72,4 +72,10 @@ config CRYPTO_CRC32_ARM64
depends on ARM64
select CRYPTO_HASH
+config CRYPTO_CHACHA20_NEON
+ tristate "NEON accelerated ChaCha20 symmetric cipher"
+ depends on KERNEL_MODE_NEON
+ select CRYPTO_BLKCIPHER
+ select CRYPTO_CHACHA20
+
endif
diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile
index aa8888d7b744..9d2826c5fccf 100644
--- a/arch/arm64/crypto/Makefile
+++ b/arch/arm64/crypto/Makefile
@@ -41,6 +41,9 @@ sha256-arm64-y := sha256-glue.o sha256-core.o
obj-$(CONFIG_CRYPTO_SHA512_ARM64) += sha512-arm64.o
sha512-arm64-y := sha512-glue.o sha512-core.o
+obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha20-neon.o
+chacha20-neon-y := chacha20-neon-core.o chacha20-neon-glue.o
+
AFLAGS_aes-ce.o := -DINTERLEAVE=4
AFLAGS_aes-neon.o := -DINTERLEAVE=4
diff --git a/arch/arm64/crypto/chacha20-neon-core.S b/arch/arm64/crypto/chacha20-neon-core.S
new file mode 100644
index 000000000000..e2cd65580807
--- /dev/null
+++ b/arch/arm64/crypto/chacha20-neon-core.S
@@ -0,0 +1,480 @@
+/*
+ * ChaCha20 256-bit cipher algorithm, RFC7539, arm64 NEON functions
+ *
+ * Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Based on:
+ * ChaCha20 256-bit cipher algorithm, RFC7539, x64 SSSE3 functions
+ *
+ * Copyright (C) 2015 Martin Willi
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/linkage.h>
+
+ .text
+ .align 6
+
+ENTRY(chacha20_block_xor_neon)
+ // x0: Input state matrix, s
+ // x1: 1 data block output, o
+ // x2: 1 data block input, i
+
+ //
+ // This function encrypts one ChaCha20 block by loading the state matrix
+ // in four NEON registers. It performs matrix operation on four words in
+ // parallel, but requires shuffling to rearrange the words after each
+ // round.
+ //
+
+ // x0..3 = s0..3
+ ld1 {v0.4s-v3.4s}, [x0]
+ ld1 {v8.4s-v11.4s}, [x0]
+
+ mov x3, #10
+
+.Ldoubleround:
+ // x0 += x1, x3 = rotl32(x3 ^ x0, 16)
+ add v0.4s, v0.4s, v1.4s
+ eor v3.16b, v3.16b, v0.16b
+ rev32 v3.8h, v3.8h
+
+ // x2 += x3, x1 = rotl32(x1 ^ x2, 12)
+ add v2.4s, v2.4s, v3.4s
+ eor v4.16b, v1.16b, v2.16b
+ shl v1.4s, v4.4s, #12
+ sri v1.4s, v4.4s, #20
+
+ // x0 += x1, x3 = rotl32(x3 ^ x0, 8)
+ add v0.4s, v0.4s, v1.4s
+ eor v4.16b, v3.16b, v0.16b
+ shl v3.4s, v4.4s, #8
+ sri v3.4s, v4.4s, #24
+
+ // x2 += x3, x1 = rotl32(x1 ^ x2, 7)
+ add v2.4s, v2.4s, v3.4s
+ eor v4.16b, v1.16b, v2.16b
+ shl v1.4s, v4.4s, #7
+ sri v1.4s, v4.4s, #25
+
+ // x1 = shuffle32(x1, MASK(0, 3, 2, 1))
+ ext v1.16b, v1.16b, v1.16b, #4
+ // x2 = shuffle32(x2, MASK(1, 0, 3, 2))
+ ext v2.16b, v2.16b, v2.16b, #8
+ // x3 = shuffle32(x3, MASK(2, 1, 0, 3))
+ ext v3.16b, v3.16b, v3.16b, #12
+
+ // x0 += x1, x3 = rotl32(x3 ^ x0, 16)
+ add v0.4s, v0.4s, v1.4s
+ eor v3.16b, v3.16b, v0.16b
+ rev32 v3.8h, v3.8h
+
+ // x2 += x3, x1 = rotl32(x1 ^ x2, 12)
+ add v2.4s, v2.4s, v3.4s
+ eor v4.16b, v1.16b, v2.16b
+ shl v1.4s, v4.4s, #12
+ sri v1.4s, v4.4s, #20
+
+ // x0 += x1, x3 = rotl32(x3 ^ x0, 8)
+ add v0.4s, v0.4s, v1.4s
+ eor v4.16b, v3.16b, v0.16b
+ shl v3.4s, v4.4s, #8
+ sri v3.4s, v4.4s, #24
+
+ // x2 += x3, x1 = rotl32(x1 ^ x2, 7)
+ add v2.4s, v2.4s, v3.4s
+ eor v4.16b, v1.16b, v2.16b
+ shl v1.4s, v4.4s, #7
+ sri v1.4s, v4.4s, #25
+
+ // x1 = shuffle32(x1, MASK(2, 1, 0, 3))
+ ext v1.16b, v1.16b, v1.16b, #12
+ // x2 = shuffle32(x2, MASK(1, 0, 3, 2))
+ ext v2.16b, v2.16b, v2.16b, #8
+ // x3 = shuffle32(x3, MASK(0, 3, 2, 1))
+ ext v3.16b, v3.16b, v3.16b, #4
+
+ subs x3, x3, #1
+ b.ne .Ldoubleround
+
+ ld1 {v4.16b-v7.16b}, [x2]
+
+ // o0 = i0 ^ (x0 + s0)
+ add v0.4s, v0.4s, v8.4s
+ eor v0.16b, v0.16b, v4.16b
+
+ // o1 = i1 ^ (x1 + s1)
+ add v1.4s, v1.4s, v9.4s
+ eor v1.16b, v1.16b, v5.16b
+
+ // o2 = i2 ^ (x2 + s2)
+ add v2.4s, v2.4s, v10.4s
+ eor v2.16b, v2.16b, v6.16b
+
+ // o3 = i3 ^ (x3 + s3)
+ add v3.4s, v3.4s, v11.4s
+ eor v3.16b, v3.16b, v7.16b
+
+ st1 {v0.16b-v3.16b}, [x1]
+
+ ret
+ENDPROC(chacha20_block_xor_neon)
+
+ .align 6
+ENTRY(chacha20_4block_xor_neon)
+ // x0: Input state matrix, s
+ // x1: 4 data blocks output, o
+ // x2: 4 data blocks input, i
+
+ //
+ // This function encrypts four consecutive ChaCha20 blocks by loading
+ // the state matrix in NEON registers four times. The algorithm performs
+ // each operation on the corresponding word of each state matrix, hence
+ // requires no word shuffling. For final XORing step we transpose the
+ // matrix by interleaving 32- and then 64-bit words, which allows us to
+ // do XOR in NEON registers.
+ //
+ adr x3, CTRINC
+ ld1 {v16.4s}, [x3]
+
+ // x0..15[0-3] = s0..3[0..3]
+ mov x4, x0
+ ld4r { v0.4s- v3.4s}, [x4], #16
+ ld4r { v4.4s- v7.4s}, [x4], #16
+ ld4r { v8.4s-v11.4s}, [x4], #16
+ ld4r {v12.4s-v15.4s}, [x4]
+
+ // x12 += counter values 0-3
+ add v12.4s, v12.4s, v16.4s
+
+ mov x3, #10
+
+.Ldoubleround4:
+ // x0 += x4, x12 = rotl32(x12 ^ x0, 16)
+ // x1 += x5, x13 = rotl32(x13 ^ x1, 16)
+ // x2 += x6, x14 = rotl32(x14 ^ x2, 16)
+ // x3 += x7, x15 = rotl32(x15 ^ x3, 16)
+ add v0.4s, v0.4s, v4.4s
+ add v1.4s, v1.4s, v5.4s
+ add v2.4s, v2.4s, v6.4s
+ add v3.4s, v3.4s, v7.4s
+
+ eor v12.16b, v12.16b, v0.16b
+ eor v13.16b, v13.16b, v1.16b
+ eor v14.16b, v14.16b, v2.16b
+ eor v15.16b, v15.16b, v3.16b
+
+ rev32 v12.8h, v12.8h
+ rev32 v13.8h, v13.8h
+ rev32 v14.8h, v14.8h
+ rev32 v15.8h, v15.8h
+
+ // x8 += x12, x4 = rotl32(x4 ^ x8, 12)
+ // x9 += x13, x5 = rotl32(x5 ^ x9, 12)
+ // x10 += x14, x6 = rotl32(x6 ^ x10, 12)
+ // x11 += x15, x7 = rotl32(x7 ^ x11, 12)
+ add v8.4s, v8.4s, v12.4s
+ add v9.4s, v9.4s, v13.4s
+ add v10.4s, v10.4s, v14.4s
+ add v11.4s, v11.4s, v15.4s
+
+ eor v17.16b, v4.16b, v8.16b
+ eor v18.16b, v5.16b, v9.16b
+ eor v19.16b, v6.16b, v10.16b
+ eor v20.16b, v7.16b, v11.16b
+
+ shl v4.4s, v17.4s, #12
+ shl v5.4s, v18.4s, #12
+ shl v6.4s, v19.4s, #12
+ shl v7.4s, v20.4s, #12
+
+ sri v4.4s, v17.4s, #20
+ sri v5.4s, v18.4s, #20
+ sri v6.4s, v19.4s, #20
+ sri v7.4s, v20.4s, #20
+
+ // x0 += x4, x12 = rotl32(x12 ^ x0, 8)
+ // x1 += x5, x13 = rotl32(x13 ^ x1, 8)
+ // x2 += x6, x14 = rotl32(x14 ^ x2, 8)
+ // x3 += x7, x15 = rotl32(x15 ^ x3, 8)
+ add v0.4s, v0.4s, v4.4s
+ add v1.4s, v1.4s, v5.4s
+ add v2.4s, v2.4s, v6.4s
+ add v3.4s, v3.4s, v7.4s
+
+ eor v17.16b, v12.16b, v0.16b
+ eor v18.16b, v13.16b, v1.16b
+ eor v19.16b, v14.16b, v2.16b
+ eor v20.16b, v15.16b, v3.16b
+
+ shl v12.4s, v17.4s, #8
+ shl v13.4s, v18.4s, #8
+ shl v14.4s, v19.4s, #8
+ shl v15.4s, v20.4s, #8
+
+ sri v12.4s, v17.4s, #24
+ sri v13.4s, v18.4s, #24
+ sri v14.4s, v19.4s, #24
+ sri v15.4s, v20.4s, #24
+
+ // x8 += x12, x4 = rotl32(x4 ^ x8, 7)
+ // x9 += x13, x5 = rotl32(x5 ^ x9, 7)
+ // x10 += x14, x6 = rotl32(x6 ^ x10, 7)
+ // x11 += x15, x7 = rotl32(x7 ^ x11, 7)
+ add v8.4s, v8.4s, v12.4s
+ add v9.4s, v9.4s, v13.4s
+ add v10.4s, v10.4s, v14.4s
+ add v11.4s, v11.4s, v15.4s
+
+ eor v17.16b, v4.16b, v8.16b
+ eor v18.16b, v5.16b, v9.16b
+ eor v19.16b, v6.16b, v10.16b
+ eor v20.16b, v7.16b, v11.16b
+
+ shl v4.4s, v17.4s, #7
+ shl v5.4s, v18.4s, #7
+ shl v6.4s, v19.4s, #7
+ shl v7.4s, v20.4s, #7
+
+ sri v4.4s, v17.4s, #25
+ sri v5.4s, v18.4s, #25
+ sri v6.4s, v19.4s, #25
+ sri v7.4s, v20.4s, #25
+
+ // x0 += x5, x15 = rotl32(x15 ^ x0, 16)
+ // x1 += x6, x12 = rotl32(x12 ^ x1, 16)
+ // x2 += x7, x13 = rotl32(x13 ^ x2, 16)
+ // x3 += x4, x14 = rotl32(x14 ^ x3, 16)
+ add v0.4s, v0.4s, v5.4s
+ add v1.4s, v1.4s, v6.4s
+ add v2.4s, v2.4s, v7.4s
+ add v3.4s, v3.4s, v4.4s
+
+ eor v15.16b, v15.16b, v0.16b
+ eor v12.16b, v12.16b, v1.16b
+ eor v13.16b, v13.16b, v2.16b
+ eor v14.16b, v14.16b, v3.16b
+
+ rev32 v15.8h, v15.8h
+ rev32 v12.8h, v12.8h
+ rev32 v13.8h, v13.8h
+ rev32 v14.8h, v14.8h
+
+ // x10 += x15, x5 = rotl32(x5 ^ x10, 12)
+ // x11 += x12, x6 = rotl32(x6 ^ x11, 12)
+ // x8 += x13, x7 = rotl32(x7 ^ x8, 12)
+ // x9 += x14, x4 = rotl32(x4 ^ x9, 12)
+ add v10.4s, v10.4s, v15.4s
+ add v11.4s, v11.4s, v12.4s
+ add v8.4s, v8.4s, v13.4s
+ add v9.4s, v9.4s, v14.4s
+
+ eor v17.16b, v5.16b, v10.16b
+ eor v18.16b, v6.16b, v11.16b
+ eor v19.16b, v7.16b, v8.16b
+ eor v20.16b, v4.16b, v9.16b
+
+ shl v5.4s, v17.4s, #12
+ shl v6.4s, v18.4s, #12
+ shl v7.4s, v19.4s, #12
+ shl v4.4s, v20.4s, #12
+
+ sri v5.4s, v17.4s, #20
+ sri v6.4s, v18.4s, #20
+ sri v7.4s, v19.4s, #20
+ sri v4.4s, v20.4s, #20
+
+ // x0 += x5, x15 = rotl32(x15 ^ x0, 8)
+ // x1 += x6, x12 = rotl32(x12 ^ x1, 8)
+ // x2 += x7, x13 = rotl32(x13 ^ x2, 8)
+ // x3 += x4, x14 = rotl32(x14 ^ x3, 8)
+ add v0.4s, v0.4s, v5.4s
+ add v1.4s, v1.4s, v6.4s
+ add v2.4s, v2.4s, v7.4s
+ add v3.4s, v3.4s, v4.4s
+
+ eor v17.16b, v15.16b, v0.16b
+ eor v18.16b, v12.16b, v1.16b
+ eor v19.16b, v13.16b, v2.16b
+ eor v20.16b, v14.16b, v3.16b
+
+ shl v15.4s, v17.4s, #8
+ shl v12.4s, v18.4s, #8
+ shl v13.4s, v19.4s, #8
+ shl v14.4s, v20.4s, #8
+
+ sri v15.4s, v17.4s, #24
+ sri v12.4s, v18.4s, #24
+ sri v13.4s, v19.4s, #24
+ sri v14.4s, v20.4s, #24
+
+ // x10 += x15, x5 = rotl32(x5 ^ x10, 7)
+ // x11 += x12, x6 = rotl32(x6 ^ x11, 7)
+ // x8 += x13, x7 = rotl32(x7 ^ x8, 7)
+ // x9 += x14, x4 = rotl32(x4 ^ x9, 7)
+ add v10.4s, v10.4s, v15.4s
+ add v11.4s, v11.4s, v12.4s
+ add v8.4s, v8.4s, v13.4s
+ add v9.4s, v9.4s, v14.4s
+
+ eor v17.16b, v5.16b, v10.16b
+ eor v18.16b, v6.16b, v11.16b
+ eor v19.16b, v7.16b, v8.16b
+ eor v20.16b, v4.16b, v9.16b
+
+ shl v5.4s, v17.4s, #7
+ shl v6.4s, v18.4s, #7
+ shl v7.4s, v19.4s, #7
+ shl v4.4s, v20.4s, #7
+
+ sri v5.4s, v17.4s, #25
+ sri v6.4s, v18.4s, #25
+ sri v7.4s, v19.4s, #25
+ sri v4.4s, v20.4s, #25
+
+ subs x3, x3, #1
+ b.ne .Ldoubleround4
+
+ // x0[0-3] += s0[0]
+ // x1[0-3] += s0[1]
+ // x2[0-3] += s0[2]
+ // x3[0-3] += s0[3]
+ ld4r {v17.4s-v20.4s}, [x0], #16
+ add v0.4s, v0.4s, v17.4s
+ add v1.4s, v1.4s, v18.4s
+ add v2.4s, v2.4s, v19.4s
+ add v3.4s, v3.4s, v20.4s
+
+ // x4[0-3] += s1[0]
+ // x5[0-3] += s1[1]
+ // x6[0-3] += s1[2]
+ // x7[0-3] += s1[3]
+ ld4r {v21.4s-v24.4s}, [x0], #16
+ add v4.4s, v4.4s, v21.4s
+ add v5.4s, v5.4s, v22.4s
+ add v6.4s, v6.4s, v23.4s
+ add v7.4s, v7.4s, v24.4s
+
+ // x8[0-3] += s2[0]
+ // x9[0-3] += s2[1]
+ // x10[0-3] += s2[2]
+ // x11[0-3] += s2[3]
+ ld4r {v17.4s-v20.4s}, [x0], #16
+ add v8.4s, v8.4s, v17.4s
+ add v9.4s, v9.4s, v18.4s
+ add v10.4s, v10.4s, v19.4s
+ add v11.4s, v11.4s, v20.4s
+
+ // x12[0-3] += s3[0]
+ // x13[0-3] += s3[1]
+ // x14[0-3] += s3[2]
+ // x15[0-3] += s3[3]
+ ld4r {v21.4s-v24.4s}, [x0]
+ add v12.4s, v12.4s, v21.4s
+ add v13.4s, v13.4s, v22.4s
+ add v14.4s, v14.4s, v23.4s
+ add v15.4s, v15.4s, v24.4s
+
+ // x12 += counter values 0-3
+ add v12.4s, v12.4s, v16.4s
+
+ ld1 {v16.16b-v19.16b}, [x2], #64
+ ld1 {v20.16b-v23.16b}, [x2], #64
+
+ // interleave 32-bit words in state n, n+1
+ zip1 v24.4s, v0.4s, v1.4s
+ zip1 v25.4s, v2.4s, v3.4s
+ zip1 v26.4s, v4.4s, v5.4s
+ zip1 v27.4s, v6.4s, v7.4s
+ zip1 v28.4s, v8.4s, v9.4s
+ zip1 v29.4s, v10.4s, v11.4s
+ zip1 v30.4s, v12.4s, v13.4s
+ zip1 v31.4s, v14.4s, v15.4s
+
+ zip2 v1.4s, v0.4s, v1.4s
+ zip2 v3.4s, v2.4s, v3.4s
+ zip2 v5.4s, v4.4s, v5.4s
+ zip2 v7.4s, v6.4s, v7.4s
+ zip2 v9.4s, v8.4s, v9.4s
+ zip2 v11.4s, v10.4s, v11.4s
+ zip2 v13.4s, v12.4s, v13.4s
+ zip2 v15.4s, v14.4s, v15.4s
+
+ mov v0.16b, v24.16b
+ mov v2.16b, v25.16b
+ mov v4.16b, v26.16b
+ mov v6.16b, v27.16b
+ mov v8.16b, v28.16b
+ mov v10.16b, v29.16b
+ mov v12.16b, v30.16b
+ mov v14.16b, v31.16b
+
+ // interleave 64-bit words in state n, n+2
+ zip1 v24.2d, v0.2d, v2.2d
+ zip1 v25.2d, v1.2d, v3.2d
+ zip1 v26.2d, v4.2d, v6.2d
+ zip1 v27.2d, v5.2d, v7.2d
+ zip1 v28.2d, v8.2d, v10.2d
+ zip1 v29.2d, v9.2d, v11.2d
+ zip1 v30.2d, v12.2d, v14.2d
+ zip1 v31.2d, v13.2d, v15.2d
+
+ zip2 v2.2d, v0.2d, v2.2d
+ zip2 v3.2d, v1.2d, v3.2d
+ zip2 v6.2d, v4.2d, v6.2d
+ zip2 v7.2d, v5.2d, v7.2d
+ zip2 v10.2d, v8.2d, v10.2d
+ zip2 v11.2d, v9.2d, v11.2d
+ zip2 v14.2d, v12.2d, v14.2d
+ zip2 v15.2d, v13.2d, v15.2d
+
+ mov v0.16b, v24.16b
+ mov v1.16b, v25.16b
+ mov v4.16b, v26.16b
+ mov v5.16b, v27.16b
+
+ mov v8.16b, v28.16b
+ mov v9.16b, v29.16b
+ mov v12.16b, v30.16b
+ mov v13.16b, v31.16b
+
+ ld1 {v24.16b-v27.16b}, [x2], #64
+ ld1 {v28.16b-v31.16b}, [x2]
+
+ // xor with corresponding input, write to output
+ eor v16.16b, v16.16b, v0.16b
+ eor v17.16b, v17.16b, v4.16b
+ eor v18.16b, v18.16b, v8.16b
+ eor v19.16b, v19.16b, v12.16b
+ st1 {v16.16b-v19.16b}, [x1], #64
+
+ eor v20.16b, v20.16b, v2.16b
+ eor v21.16b, v21.16b, v6.16b
+ eor v22.16b, v22.16b, v10.16b
+ eor v23.16b, v23.16b, v14.16b
+ st1 {v20.16b-v23.16b}, [x1], #64
+
+ eor v24.16b, v24.16b, v1.16b
+ eor v25.16b, v25.16b, v5.16b
+ eor v26.16b, v26.16b, v9.16b
+ eor v27.16b, v27.16b, v13.16b
+ st1 {v24.16b-v27.16b}, [x1], #64
+
+ eor v28.16b, v28.16b, v3.16b
+ eor v29.16b, v29.16b, v7.16b
+ eor v30.16b, v30.16b, v11.16b
+ eor v31.16b, v31.16b, v15.16b
+ st1 {v28.16b-v31.16b}, [x1]
+
+ ret
+ENDPROC(chacha20_4block_xor_neon)
+
+CTRINC: .word 0, 1, 2, 3
diff --git a/arch/arm64/crypto/chacha20-neon-glue.c b/arch/arm64/crypto/chacha20-neon-glue.c
new file mode 100644
index 000000000000..705b42b06d00
--- /dev/null
+++ b/arch/arm64/crypto/chacha20-neon-glue.c
@@ -0,0 +1,131 @@
+/*
+ * ChaCha20 256-bit cipher algorithm, RFC7539, arm64 NEON functions
+ *
+ * Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Based on:
+ * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code
+ *
+ * Copyright (C) 2015 Martin Willi
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <crypto/algapi.h>
+#include <crypto/chacha20.h>
+#include <linux/crypto.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#include <asm/neon.h>
+
+asmlinkage void chacha20_block_xor_neon(u32 *state, u8 *dst, const u8 *src);
+asmlinkage void chacha20_4block_xor_neon(u32 *state, u8 *dst, const u8 *src);
+
+static void chacha20_dosimd(u32 *state, u8 *dst, const u8 *src,
+ unsigned int bytes)
+{
+ u8 buf[CHACHA20_BLOCK_SIZE];
+
+ while (bytes >= CHACHA20_BLOCK_SIZE * 4) {
+ chacha20_4block_xor_neon(state, dst, src);
+ bytes -= CHACHA20_BLOCK_SIZE * 4;
+ src += CHACHA20_BLOCK_SIZE * 4;
+ dst += CHACHA20_BLOCK_SIZE * 4;
+ state[12] += 4;
+ }
+ while (bytes >= CHACHA20_BLOCK_SIZE) {
+ chacha20_block_xor_neon(state, dst, src);
+ bytes -= CHACHA20_BLOCK_SIZE;
+ src += CHACHA20_BLOCK_SIZE;
+ dst += CHACHA20_BLOCK_SIZE;
+ state[12]++;
+ }
+ if (bytes) {
+ memcpy(buf, src, bytes);
+ chacha20_block_xor_neon(state, buf, buf);
+ memcpy(dst, buf, bytes);
+ }
+}
+
+static int chacha20_simd(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ struct blkcipher_walk walk;
+ u32 state[16];
+ int err;
+
+ if (nbytes <= CHACHA20_BLOCK_SIZE)
+ return crypto_chacha20_crypt(desc, dst, src, nbytes);
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ err = blkcipher_walk_virt_block(desc, &walk, CHACHA20_BLOCK_SIZE);
+
+ crypto_chacha20_init(state, crypto_blkcipher_ctx(desc->tfm), walk.iv);
+
+ kernel_neon_begin();
+
+ while (walk.nbytes >= CHACHA20_BLOCK_SIZE) {
+ chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr,
+ rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE));
+ err = blkcipher_walk_done(desc, &walk,
+ walk.nbytes % CHACHA20_BLOCK_SIZE);
+ }
+
+ if (walk.nbytes) {
+ chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr,
+ walk.nbytes);
+ err = blkcipher_walk_done(desc, &walk, 0);
+ }
+
+ kernel_neon_end();
+
+ return err;
+}
+
+static struct crypto_alg alg = {
+ .cra_name = "chacha20",
+ .cra_driver_name = "chacha20-neon",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = 1,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_ctxsize = sizeof(struct chacha20_ctx),
+ .cra_alignmask = sizeof(u32) - 1,
+ .cra_module = THIS_MODULE,
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = CHACHA20_KEY_SIZE,
+ .max_keysize = CHACHA20_KEY_SIZE,
+ .ivsize = CHACHA20_IV_SIZE,
+ .geniv = "seqiv",
+ .setkey = crypto_chacha20_setkey,
+ .encrypt = chacha20_simd,
+ .decrypt = chacha20_simd,
+ },
+ },
+};
+
+static int __init chacha20_simd_mod_init(void)
+{
+ return crypto_register_alg(&alg);
+}
+
+static void __exit chacha20_simd_mod_fini(void)
+{
+ crypto_unregister_alg(&alg);
+}
+
+module_init(chacha20_simd_mod_init);
+module_exit(chacha20_simd_mod_fini);
+
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("chacha20");
--
2.7.4
^ permalink raw reply related
* [PATCH 0/2] crypto: arm64/ARM: NEON accelerated ChaCha20
From: Ard Biesheuvel @ 2016-12-08 14:28 UTC (permalink / raw)
To: linux-arm-kernel
Another port of existing x86 SSE code to NEON, again both for arm64 and ARM.
ChaCha20 is a stream cipher described in RFC 7539, and is intended to be
an efficient software implementable 'standby cipher', in case AES cannot
be used.
This NEON implementation is almost 2x as fast as the generic C code
(measured on Cortex-A57 using the arm64 version)
I'm aware that blkciphers are deprecated in favor of skciphers, but this
code (like the x86 version) uses the init and setkey routines of the generic
version, so it is probably better to port all implementations at once.
Ard Biesheuvel (2):
crypto: arm64/chacha20 - implement NEON version based on SSE3 code
crypto: arm/chacha20 - implement NEON version based on SSE3 code
arch/arm/crypto/Kconfig | 6 +
arch/arm/crypto/Makefile | 2 +
arch/arm/crypto/chacha20-neon-core.S | 524 ++++++++++++++++++++
arch/arm/crypto/chacha20-neon-glue.c | 136 +++++
arch/arm64/crypto/Kconfig | 6 +
arch/arm64/crypto/Makefile | 3 +
arch/arm64/crypto/chacha20-neon-core.S | 480 ++++++++++++++++++
arch/arm64/crypto/chacha20-neon-glue.c | 131 +++++
8 files changed, 1288 insertions(+)
create mode 100644 arch/arm/crypto/chacha20-neon-core.S
create mode 100644 arch/arm/crypto/chacha20-neon-glue.c
create mode 100644 arch/arm64/crypto/chacha20-neon-core.S
create mode 100644 arch/arm64/crypto/chacha20-neon-glue.c
--
2.7.4
^ permalink raw reply
* [PATCH v2.1 1/4] ARM: dts: davinci: da850: VPIF: add node and muxing
From: Laurent Pinchart @ 2016-12-08 13:51 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <20161208001418.4469-1-khilman@baylibre.com>
Hi Kevin,
Thank you for the patch.
On Wednesday 07 Dec 2016 16:14:18 Kevin Hilman wrote:
> Add VPIF node an pins to da850 and enable on boards. VPIF has two input
> channels described using the standard DT ports and enpoints.
>
> Signed-off-by: Kevin Hilman <khilman@baylibre.com>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
> ---
> v2 -> v2.1: moved ports from SoC .dtsi to board .dts files.
>
> arch/arm/boot/dts/da850-evm.dts | 20 ++++++++++++++++++++
> arch/arm/boot/dts/da850-lcdk.dts | 13 +++++++++++++
> arch/arm/boot/dts/da850.dtsi | 27 ++++++++++++++++++++++++++-
> 3 files changed, 59 insertions(+), 1 deletion(-)
>
> diff --git a/arch/arm/boot/dts/da850-evm.dts
> b/arch/arm/boot/dts/da850-evm.dts index 41de15fe15a2..cea36ee6fd07 100644
> --- a/arch/arm/boot/dts/da850-evm.dts
> +++ b/arch/arm/boot/dts/da850-evm.dts
> @@ -289,3 +289,23 @@
> };
> };
> };
> +
> +&vpif {
> + pinctrl-names = "default";
> + pinctrl-0 = <&vpif_capture_pins>;
> + status = "okay";
> +
> + /* VPIF capture port */
> + port {
> + vpif_ch0: endpoint at 0 {
> + reg = <0>;
> + bus-width = <8>;
> + };
> +
> + vpif_ch1: endpoint at 1 {
> + reg = <1>;
> + bus-width = <8>;
> + data-shift = <8>;
> + };
> + };
> +};
> diff --git a/arch/arm/boot/dts/da850-lcdk.dts
> b/arch/arm/boot/dts/da850-lcdk.dts index 7b8ab21fed6c..5fc21528e0ba 100644
> --- a/arch/arm/boot/dts/da850-lcdk.dts
> +++ b/arch/arm/boot/dts/da850-lcdk.dts
> @@ -219,3 +219,16 @@
> };
> };
> };
> +
> +&vpif {
> + pinctrl-names = "default";
> + pinctrl-0 = <&vpif_capture_pins>;
> + status = "okay";
> +
> + /* VPIF capture port */
> + port {
> + vpif_ch0: endpoint {
> + bus-width = <8>;
> + };
> + };
> +};
> diff --git a/arch/arm/boot/dts/da850.dtsi b/arch/arm/boot/dts/da850.dtsi
> index f79e1b91c680..5f0b40510b2b 100644
> --- a/arch/arm/boot/dts/da850.dtsi
> +++ b/arch/arm/boot/dts/da850.dtsi
> @@ -186,7 +186,18 @@
> 0xc 0x88888888 0xffffffff
>
> >;
>
> };
> -
> + vpif_capture_pins: vpif_capture_pins {
> + pinctrl-single,bits = <
> + /* VP_DIN[2..7], VP_CLKIN1, VP_CLKIN0
*/
> + 0x38 0x11111111 0xffffffff
> + /* VP_DIN[10..15,0..1] */
> + 0x3c 0x11111111 0xffffffff
> + /* VP_DIN[8..9] */
> + 0x40 0x00000011 0x000000ff
> + /* VP_CLKIN3, VP_CLKIN2 */
> + 0x4c 0x00010100 0x000f0f00
> + >;
> + };
> };
> edma0: edma at 0 {
> compatible = "ti,edma3-tpcc";
> @@ -399,7 +410,21 @@
> <&edma0 0 1>;
> dma-names = "tx", "rx";
> };
> +
> + vpif: video at 217000 {
> + compatible = "ti,da850-vpif";
> + reg = <0x217000 0x1000>;
> + interrupts = <92>;
> + status = "disabled";
> +
> + /* VPIF capture port */
> + port {
> + #address-cells = <1>;
> + #size-cells = <0>;
> + };
> + };
> };
> +
> aemif: aemif at 68000000 {
> compatible = "ti,da850-aemif";
> #address-cells = <2>;
--
Regards,
Laurent Pinchart
^ permalink raw reply
* Tearing down DMA transfer setup after DMA client has finished
From: Måns Rullgård @ 2016-12-08 13:39 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <68bbe77a-c991-4e64-c189-fafbcda8e7ae@free.fr>
Mason <slash.tmp@free.fr> writes:
> On 08/12/2016 13:44, M?ns Rullg?rd wrote:
>
>> Mason <slash.tmp@free.fr> writes:
>>
>>> On 08/12/2016 13:20, M?ns Rullg?rd wrote:
>>>
>>>> The only problem we have is that nobody envisioned hardware where the
>>>> dma engine indicates completion slightly too soon. I suspect there's a
>>>> fifo or such somewhere, and the interrupt is triggered when the last
>>>> byte has been placed in the fifo rather than when it has been removed
>>>> which would have been more correct.
>>>
>>> As I (tried to) explain here:
>>> https://marc.info/?l=dmaengine&m=148007808418242&w=2
>>>
>>> A *read* MBUS agent raises its IRQ when it is safe for the memory
>>> to be overwritten (i.e. every byte has been pushed into the pipe).
>>>
>>> A *write* MBUS agent raises its IRQ when it is safe for another
>>> agent to read any one of the transferred bytes.
>>>
>>> The issue comes from the fact that, for a memory-to-device transfer,
>>> the system will receive the read agent's IRQ, but most devices
>>> (NFC, SATA) don't have an IRQ line to signal that their part of the
>>> operation is complete.
>>
>> SATA does, actually. Nevertheless, it's an unusual design.
>
> Thanks, I was mistaken about the SATA controller.
>
> On tango3 (and also tango4, I assume)
>
> IRQ 41 = Serial ATA #0
> IRQ 42 = Serial ATA DMA #0
> IRQ 54 = Serial ATA #1
> IRQ 55 = Serial ATA DMA #1
>
> But in the end, whether there is a device interrupt (SATA)
> or not (NFC), for a memory-to-device transfer, the DMA
> driver will get the read agent notification (which should
> be ignored) and the client driver should either spin until
> idle (NFC) or wait for its completion IRQ (SATA).
>
> Correct?
Yes, and when the client device is finished, the driver needs to signal
this to the dma driver so it can reuse the channel. It's this last
piece that's missing.
--
M?ns Rullg?rd
^ permalink raw reply
* [RFC v3 00/10] KVM PCIe/MSI passthrough on ARM/ARM64 and IOVA reserved regions
From: Auger Eric @ 2016-12-08 13:36 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <cd16fc5c-8649-0bfa-d67d-8f257aa38bd6@arm.com>
Hi Robin,
On 08/12/2016 14:14, Robin Murphy wrote:
> On 08/12/16 09:36, Auger Eric wrote:
>> Hi,
>>
>> On 15/11/2016 14:09, Eric Auger wrote:
>>> Following LPC discussions, we now report reserved regions through
>>> iommu-group sysfs reserved_regions attribute file.
>>
>>
>> While I am respinning this series into v4, here is a tentative summary
>> of technical topics for which no consensus was reached at this point.
>>
>> 1) Shall we report the usable IOVA range instead of reserved IOVA
>> ranges. Not discussed at/after LPC.
>> x I currently report reserved regions. Alex expressed the need to
>> report the full usable IOVA range instead (x86 min-max range
>> minus MSI APIC window). I think this is meaningful for ARM
>> too where arm-smmu might not support the full 64b range.
>> x Any objection we report the usable IOVA regions instead?
>
> The issue with that is that we can't actually report "the usable
> regions" at the moment, as that involves pulling together disjoint
> properties of arbitrary hardware unrelated to the IOMMU. We'd be
> reporting "the not-definitely-unusable regions, which may have some
> unusable holes in them still". That seems like an ABI nightmare - I'd
> still much rather say "here are some, but not necessarily all, regions
> you definitely can't use", because saying "here are some regions which
> you might be able to use most of, probably" is what we're already doing
> today, via a single implicit region from 0 to ULONG_MAX ;)
>
> The address space limits are definitely useful to know, but I think it
> would be better to expose them separately to avoid the ambiguity. At
> worst, I guess it would be reasonable to express the limits via an
> "out-of-range" reserved region type for 0 to $base and $top to
> ULONG-MAX. To *safely* expose usable regions, we'd have to start out
> with a very conservative assumption (e.g. only IOVAs matching physical
> RAM), and only expand them once we're sure we can detect every possible
> bit of problematic hardware in the system - that's just too limiting to
> be useful. And if we expose something knowingly inaccurate, we risk
> having another "bogoMIPS in /proc/cpuinfo" ABI burden on our hands, and
> nobody wants that...
Makes sense to me. "out-of-range reserved region type for 0 to $base and
$top to ULONG-MAX" can be an alternative to fulfill the requirement.
>
>> 2) Shall the kernel check collision with MSI window* when userspace
>> calls VFIO_IOMMU_MAP_DMA?
>> Joerg/Will No; Alex yes
>> *for IOVA regions consumed downstream to the IOMMU: everyone says NO
>
> If we're starting off by having the SMMU drivers expose it as a fake
> fixed region, I don't think we need to worry about this yet. We all seem
> to agree that as long as we communicate the fixed regions to userspace,
> it's then userspace's job to work around them. Let's come back to this
> one once we actually get to the point of dynamically sizing and
> allocating 'real' MSI remapping region(s).
>
> Ultimately, the kernel *will* police collisions either way, because an
> underlying iommu_map() is going to fail if overlapping IOVAs are ever
> actually used, so it's really just a question of whether to have a more
> user-friendly failure mode.
That's true on ARM but not on x86 where the APIC MSI region is not
mapped I think.
>
>> 3) RMRR reporting in the iommu group sysfs? Joerg: yes; Don: no
>> My current series does not expose them in iommu group sysfs.
>> I understand we can expose the RMRR regions in the iomm group sysfs
>> without necessarily supporting RMRR requiring device assignment.
>> We can also add this support later.
>
> As you say, reporting them doesn't necessitate allowing device
> assignment, and it's information which can already be easily grovelled
> out of dmesg (for intel-iommu at least) - there doesn't seem to be any
> need to hide them, but the x86 folks can have the final word on that.
agreed
Thanks
Eric
>
> Robin.
>
>> Thanks
>>
>> Eric
>>
>>
>>>
>>> Reserved regions are populated through the IOMMU get_resv_region callback
>>> (former get_dm_regions), now implemented by amd-iommu, intel-iommu and
>>> arm-smmu.
>>>
>>> The intel-iommu reports the [FEE0_0000h - FEF0_000h] MSI window as an
>>> IOMMU_RESV_NOMAP reserved region.
>>>
>>> arm-smmu reports the MSI window (arbitrarily located at 0x8000000 and
>>> 1MB large) and the PCI host bridge windows.
>>>
>>> The series integrates a not officially posted patch from Robin:
>>> "iommu/dma: Allow MSI-only cookies".
>>>
>>> This series currently does not address IRQ safety assessment.
>>>
>>> Best Regards
>>>
>>> Eric
>>>
>>> Git: complete series available at
>>> https://github.com/eauger/linux/tree/v4.9-rc5-reserved-rfc-v3
>>>
>>> History:
>>> RFC v2 -> v3:
>>> - switch to an iommu-group sysfs API
>>> - use new dummy allocator provided by Robin
>>> - dummy allocator initialized by vfio-iommu-type1 after enumerating
>>> the reserved regions
>>> - at the moment ARM MSI base address/size is left unchanged compared
>>> to v2
>>> - we currently report reserved regions and not usable IOVA regions as
>>> requested by Alex
>>>
>>> RFC v1 -> v2:
>>> - fix intel_add_reserved_regions
>>> - add mutex lock/unlock in vfio_iommu_type1
>>>
>>>
>>> Eric Auger (10):
>>> iommu/dma: Allow MSI-only cookies
>>> iommu: Rename iommu_dm_regions into iommu_resv_regions
>>> iommu: Add new reserved IOMMU attributes
>>> iommu: iommu_alloc_resv_region
>>> iommu: Do not map reserved regions
>>> iommu: iommu_get_group_resv_regions
>>> iommu: Implement reserved_regions iommu-group sysfs file
>>> iommu/vt-d: Implement reserved region get/put callbacks
>>> iommu/arm-smmu: Implement reserved region get/put callbacks
>>> vfio/type1: Get MSI cookie
>>>
>>> drivers/iommu/amd_iommu.c | 20 +++---
>>> drivers/iommu/arm-smmu.c | 52 +++++++++++++++
>>> drivers/iommu/dma-iommu.c | 116 ++++++++++++++++++++++++++-------
>>> drivers/iommu/intel-iommu.c | 50 ++++++++++----
>>> drivers/iommu/iommu.c | 141 ++++++++++++++++++++++++++++++++++++----
>>> drivers/vfio/vfio_iommu_type1.c | 26 ++++++++
>>> include/linux/dma-iommu.h | 7 ++
>>> include/linux/iommu.h | 49 ++++++++++----
>>> 8 files changed, 391 insertions(+), 70 deletions(-)
>>>
>
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo at vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
^ permalink raw reply
* [PATCH] arm64: dts: h3ulcb: Provide sd0_uhs node
From: Simon Horman @ 2016-12-08 13:30 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <1480583246-6707-1-git-send-email-horms+renesas@verge.net.au>
On Thu, Dec 01, 2016 at 10:07:26AM +0100, Simon Horman wrote:
> Provide separaate sd0 and sd0_uhs nodes rather than duplicate sd0 nodes.
>
> Cc: Vladimir Barinov <vladimir.barinov@cogentembedded.com>
> Cc: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
> Fixes: 93373c309a70 ("arm64: dts: h3ulcb: rename SDHI0 pins")
> Signed-off-by: Simon Horman <horms+renesas@verge.net.au>
I have queued this up as a fix for v4.10.
> arch/arm64/boot/dts/renesas/r8a7795-h3ulcb.dts | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/arch/arm64/boot/dts/renesas/r8a7795-h3ulcb.dts b/arch/arm64/boot/dts/renesas/r8a7795-h3ulcb.dts
> index 6ffb0517421a..dbea2c3d8f0c 100644
> --- a/arch/arm64/boot/dts/renesas/r8a7795-h3ulcb.dts
> +++ b/arch/arm64/boot/dts/renesas/r8a7795-h3ulcb.dts
> @@ -169,7 +169,7 @@
> power-source = <3300>;
> };
>
> - sdhi0_pins_uhs: sd0 {
> + sdhi0_pins_uhs: sd0_uhs {
> groups = "sdhi0_data4", "sdhi0_ctrl";
> function = "sdhi0";
> power-source = <1800>;
> --
> 2.7.0.rc3.207.g0ac5344
>
^ permalink raw reply
* Tearing down DMA transfer setup after DMA client has finished
From: Mason @ 2016-12-08 13:29 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <yw1xfulyzj2b.fsf@unicorn.mansr.com>
On 08/12/2016 13:44, M?ns Rullg?rd wrote:
> Mason <slash.tmp@free.fr> writes:
>
>> On 08/12/2016 13:20, M?ns Rullg?rd wrote:
>>
>>> The only problem we have is that nobody envisioned hardware where the
>>> dma engine indicates completion slightly too soon. I suspect there's a
>>> fifo or such somewhere, and the interrupt is triggered when the last
>>> byte has been placed in the fifo rather than when it has been removed
>>> which would have been more correct.
>>
>> As I (tried to) explain here:
>> https://marc.info/?l=dmaengine&m=148007808418242&w=2
>>
>> A *read* MBUS agent raises its IRQ when it is safe for the memory
>> to be overwritten (i.e. every byte has been pushed into the pipe).
>>
>> A *write* MBUS agent raises its IRQ when it is safe for another
>> agent to read any one of the transferred bytes.
>>
>> The issue comes from the fact that, for a memory-to-device transfer,
>> the system will receive the read agent's IRQ, but most devices
>> (NFC, SATA) don't have an IRQ line to signal that their part of the
>> operation is complete.
>
> SATA does, actually. Nevertheless, it's an unusual design.
Thanks, I was mistaken about the SATA controller.
On tango3 (and also tango4, I assume)
IRQ 41 = Serial ATA #0
IRQ 42 = Serial ATA DMA #0
IRQ 54 = Serial ATA #1
IRQ 55 = Serial ATA DMA #1
But in the end, whether there is a device interrupt (SATA)
or not (NFC), for a memory-to-device transfer, the DMA
driver will get the read agent notification (which should
be ignored) and the client driver should either spin until
idle (NFC) or wait for its completion IRQ (SATA).
Correct?
Regards.
^ permalink raw reply
* [PATCH] arm64: Work around Falkor erratum 1009
From: Catalin Marinas @ 2016-12-08 13:27 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <20161208114511.GC9768@leverpostej>
On Thu, Dec 08, 2016 at 11:45:12AM +0000, Mark Rutland wrote:
> On Wed, Dec 07, 2016 at 03:04:31PM -0500, Christopher Covington wrote:
> > + asm volatile(ALTERNATIVE(
> > + "nop \n"
> > + "nop \n",
> > + "tlbi vmalle1is \n"
> > + "dsb ish \n",
>
> As a general note, perhaps we want a C compatible NOP_ALTERNATIVE() so
> that the nop case can be implicitly generated for sequences like this.
It's also worth checking what cpus_have_const_cap() would generate for
the default (no workaround required) case.
--
Catalin
^ permalink raw reply
* [PATCH renesas/devel 1/2] ARM: shmobile: defconfig: Enable r8a774[35] SoCs
From: Simon Horman @ 2016-12-08 13:26 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <CAMuHMdUyCcT3k22b-N8J8aasE=YDYfQYfU5GqFKEtsdFnUnHCw@mail.gmail.com>
On Tue, Dec 06, 2016 at 03:00:53PM +0100, Geert Uytterhoeven wrote:
> On Tue, Dec 6, 2016 at 2:32 PM, Simon Horman <horms+renesas@verge.net.au> wrote:
> > Enable recently added r8a7743 (RZ/G1M) and r8a7745 (RZ/G1E) SoCs.
> >
> > Signed-off-by: Simon Horman <horms+renesas@verge.net.au>
>
> Acked-by: Geert Uytterhoeven <geert+renesas@glider.be>
Thanks, I have queued up this and the following patch for v4.11.
^ permalink raw reply
* [RFC v3 00/10] KVM PCIe/MSI passthrough on ARM/ARM64 and IOVA reserved regions
From: Robin Murphy @ 2016-12-08 13:14 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <c5d4efa7-699d-4aa3-44cc-4ce03d0ce185@redhat.com>
On 08/12/16 09:36, Auger Eric wrote:
> Hi,
>
> On 15/11/2016 14:09, Eric Auger wrote:
>> Following LPC discussions, we now report reserved regions through
>> iommu-group sysfs reserved_regions attribute file.
>
>
> While I am respinning this series into v4, here is a tentative summary
> of technical topics for which no consensus was reached at this point.
>
> 1) Shall we report the usable IOVA range instead of reserved IOVA
> ranges. Not discussed at/after LPC.
> x I currently report reserved regions. Alex expressed the need to
> report the full usable IOVA range instead (x86 min-max range
> minus MSI APIC window). I think this is meaningful for ARM
> too where arm-smmu might not support the full 64b range.
> x Any objection we report the usable IOVA regions instead?
The issue with that is that we can't actually report "the usable
regions" at the moment, as that involves pulling together disjoint
properties of arbitrary hardware unrelated to the IOMMU. We'd be
reporting "the not-definitely-unusable regions, which may have some
unusable holes in them still". That seems like an ABI nightmare - I'd
still much rather say "here are some, but not necessarily all, regions
you definitely can't use", because saying "here are some regions which
you might be able to use most of, probably" is what we're already doing
today, via a single implicit region from 0 to ULONG_MAX ;)
The address space limits are definitely useful to know, but I think it
would be better to expose them separately to avoid the ambiguity. At
worst, I guess it would be reasonable to express the limits via an
"out-of-range" reserved region type for 0 to $base and $top to
ULONG-MAX. To *safely* expose usable regions, we'd have to start out
with a very conservative assumption (e.g. only IOVAs matching physical
RAM), and only expand them once we're sure we can detect every possible
bit of problematic hardware in the system - that's just too limiting to
be useful. And if we expose something knowingly inaccurate, we risk
having another "bogoMIPS in /proc/cpuinfo" ABI burden on our hands, and
nobody wants that...
> 2) Shall the kernel check collision with MSI window* when userspace
> calls VFIO_IOMMU_MAP_DMA?
> Joerg/Will No; Alex yes
> *for IOVA regions consumed downstream to the IOMMU: everyone says NO
If we're starting off by having the SMMU drivers expose it as a fake
fixed region, I don't think we need to worry about this yet. We all seem
to agree that as long as we communicate the fixed regions to userspace,
it's then userspace's job to work around them. Let's come back to this
one once we actually get to the point of dynamically sizing and
allocating 'real' MSI remapping region(s).
Ultimately, the kernel *will* police collisions either way, because an
underlying iommu_map() is going to fail if overlapping IOVAs are ever
actually used, so it's really just a question of whether to have a more
user-friendly failure mode.
> 3) RMRR reporting in the iommu group sysfs? Joerg: yes; Don: no
> My current series does not expose them in iommu group sysfs.
> I understand we can expose the RMRR regions in the iomm group sysfs
> without necessarily supporting RMRR requiring device assignment.
> We can also add this support later.
As you say, reporting them doesn't necessitate allowing device
assignment, and it's information which can already be easily grovelled
out of dmesg (for intel-iommu at least) - there doesn't seem to be any
need to hide them, but the x86 folks can have the final word on that.
Robin.
> Thanks
>
> Eric
>
>
>>
>> Reserved regions are populated through the IOMMU get_resv_region callback
>> (former get_dm_regions), now implemented by amd-iommu, intel-iommu and
>> arm-smmu.
>>
>> The intel-iommu reports the [FEE0_0000h - FEF0_000h] MSI window as an
>> IOMMU_RESV_NOMAP reserved region.
>>
>> arm-smmu reports the MSI window (arbitrarily located at 0x8000000 and
>> 1MB large) and the PCI host bridge windows.
>>
>> The series integrates a not officially posted patch from Robin:
>> "iommu/dma: Allow MSI-only cookies".
>>
>> This series currently does not address IRQ safety assessment.
>>
>> Best Regards
>>
>> Eric
>>
>> Git: complete series available at
>> https://github.com/eauger/linux/tree/v4.9-rc5-reserved-rfc-v3
>>
>> History:
>> RFC v2 -> v3:
>> - switch to an iommu-group sysfs API
>> - use new dummy allocator provided by Robin
>> - dummy allocator initialized by vfio-iommu-type1 after enumerating
>> the reserved regions
>> - at the moment ARM MSI base address/size is left unchanged compared
>> to v2
>> - we currently report reserved regions and not usable IOVA regions as
>> requested by Alex
>>
>> RFC v1 -> v2:
>> - fix intel_add_reserved_regions
>> - add mutex lock/unlock in vfio_iommu_type1
>>
>>
>> Eric Auger (10):
>> iommu/dma: Allow MSI-only cookies
>> iommu: Rename iommu_dm_regions into iommu_resv_regions
>> iommu: Add new reserved IOMMU attributes
>> iommu: iommu_alloc_resv_region
>> iommu: Do not map reserved regions
>> iommu: iommu_get_group_resv_regions
>> iommu: Implement reserved_regions iommu-group sysfs file
>> iommu/vt-d: Implement reserved region get/put callbacks
>> iommu/arm-smmu: Implement reserved region get/put callbacks
>> vfio/type1: Get MSI cookie
>>
>> drivers/iommu/amd_iommu.c | 20 +++---
>> drivers/iommu/arm-smmu.c | 52 +++++++++++++++
>> drivers/iommu/dma-iommu.c | 116 ++++++++++++++++++++++++++-------
>> drivers/iommu/intel-iommu.c | 50 ++++++++++----
>> drivers/iommu/iommu.c | 141 ++++++++++++++++++++++++++++++++++++----
>> drivers/vfio/vfio_iommu_type1.c | 26 ++++++++
>> include/linux/dma-iommu.h | 7 ++
>> include/linux/iommu.h | 49 ++++++++++----
>> 8 files changed, 391 insertions(+), 70 deletions(-)
>>
^ permalink raw reply
* [PATCH 16/18] arm64: ptrace: handle ptrace_request differently for aarch32 and ilp32
From: Catalin Marinas @ 2016-12-08 13:12 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <3703608.GKr1zzErMk@wuerfel>
On Wed, Dec 07, 2016 at 09:40:13PM +0100, Arnd Bergmann wrote:
> On Wednesday, December 7, 2016 4:59:13 PM CET Catalin Marinas wrote:
> > On Tue, Dec 06, 2016 at 11:55:08AM +0530, Yury Norov wrote:
> > > On Mon, Dec 05, 2016 at 04:34:23PM +0000, Catalin Marinas wrote:
> > > > On Fri, Oct 21, 2016 at 11:33:15PM +0300, Yury Norov wrote:
> > > > > New aarch32 ptrace syscall handler is introduced to avoid run-time
> > > > > detection of the task type.
> > > >
> > > > What's wrong with the run-time detection? If it's just to avoid a
> > > > negligible overhead, I would rather keep the code simpler by avoiding
> > > > duplicating the generic compat_sys_ptrace().
> > >
> > > Nothing wrong. This is how Arnd asked me to do. You already asked this
> > > question: http://lkml.iu.edu/hypermail/linux/kernel/1604.3/00930.html
> >
> > Hmm, I completely forgot about this ;). There is still an advantage to
> > doing run-time checking if we avoid touching core code (less acks to
> > gather and less code duplication).
> >
> > Let's see what Arnd says but the initial patch looked simpler.
>
> I don't currently have either version of the patch in my inbox
> (the archive is on a different machine), but in general I'd still
> think it's best to avoid the runtime check for aarch64-ilp32
> altogether. I'd have to look at the overall kernel source to
> see if it's worth avoiding one or two instances though, or
> if there are an overwhelming number of other checks that we
> can't avoid at all.
Just in case you haven't found them already, current version:
https://marc.info/?l=linux-arm-kernel&m=147708276818318&w=2
Original version:
https://patchwork.kernel.org/patch/7980521/
The old one looks more readable and given that ptrace is not really a
fast path, I'm not two worried about run-time checks
> Regarding ptrace, I notice that arch/tile doesn't even use
> the compat entry point for its ilp32 user space on 64-bit
> kernels, it just calls the regular 64-bit one. Would that
> help here?
I don't know whether it would work, we have incompatible siginfo_t on
AArch64/ILP32.
--
Catalin
^ permalink raw reply
* [PATCH v9 05/11] arm/arm64: vgic: Introduce VENG0 and VENG1 fields to vmcr struct
From: Auger Eric @ 2016-12-08 12:50 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <20161208122115.GG4816@cbox>
Hi Christoffer,
On 08/12/2016 13:21, Christoffer Dall wrote:
> On Thu, Dec 08, 2016 at 12:52:39PM +0100, Auger Eric wrote:
>> Hi Vijay,
>>
>> On 28/11/2016 15:28, Christoffer Dall wrote:
>>> On Wed, Nov 23, 2016 at 06:31:52PM +0530, vijay.kilari at gmail.com wrote:
>>>> From: Vijaya Kumar K <Vijaya.Kumar@cavium.com>
>>>>
>>>> ICC_VMCR_EL2 supports virtual access to ICC_IGRPEN1_EL1.Enable
>>>> and ICC_IGRPEN0_EL1.Enable fields. Add grpen0 and grpen1 member
>>>> variables to struct vmcr to support read and write of these fields.
>>>>
>>>> Also refactor vgic_set_vmcr and vgic_get_vmcr() code.
>>>> Drop ICH_VMCR_CTLR_SHIFT and ICH_VMCR_CTLR_MASK macros and instead
>>>> use ICH_VMCR_EOI* and ICH_VMCR_CBPR* macros
>>>> .
>>>> Signed-off-by: Vijaya Kumar K <Vijaya.Kumar@cavium.com>
>>>> ---
>>>> include/linux/irqchip/arm-gic-v3.h | 2 --
>>>> virt/kvm/arm/vgic/vgic-mmio-v2.c | 16 ----------------
>>>> virt/kvm/arm/vgic/vgic-mmio.c | 16 ++++++++++++++++
>>>> virt/kvm/arm/vgic/vgic-v3.c | 22 ++++++++++++++++++++--
>>>> virt/kvm/arm/vgic/vgic.h | 5 +++++
>>>> 5 files changed, 41 insertions(+), 20 deletions(-)
>>>>
>>>> diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
>>>> index b4f8287..406fc3e 100644
>>>> --- a/include/linux/irqchip/arm-gic-v3.h
>>>> +++ b/include/linux/irqchip/arm-gic-v3.h
>>>> @@ -404,8 +404,6 @@
>>>> #define ICH_HCR_EN (1 << 0)
>>>> #define ICH_HCR_UIE (1 << 1)
>>>>
>>>> -#define ICH_VMCR_CTLR_SHIFT 0
>>>> -#define ICH_VMCR_CTLR_MASK (0x21f << ICH_VMCR_CTLR_SHIFT)
>>>> #define ICH_VMCR_CBPR_SHIFT 4
>>>> #define ICH_VMCR_CBPR_MASK (1 << ICH_VMCR_CBPR_SHIFT)
>>>> #define ICH_VMCR_EOIM_SHIFT 9
>>>> diff --git a/virt/kvm/arm/vgic/vgic-mmio-v2.c b/virt/kvm/arm/vgic/vgic-mmio-v2.c
>>>> index 2cb04b7..ad353b5 100644
>>>> --- a/virt/kvm/arm/vgic/vgic-mmio-v2.c
>>>> +++ b/virt/kvm/arm/vgic/vgic-mmio-v2.c
>>>> @@ -212,22 +212,6 @@ static void vgic_mmio_write_sgipends(struct kvm_vcpu *vcpu,
>>>> }
>>>> }
>>>>
>>>> -static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
>>>> -{
>>>> - if (kvm_vgic_global_state.type == VGIC_V2)
>>>> - vgic_v2_set_vmcr(vcpu, vmcr);
>>>> - else
>>>> - vgic_v3_set_vmcr(vcpu, vmcr);
>>>> -}
>>>> -
>>>> -static void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
>>>> -{
>>>> - if (kvm_vgic_global_state.type == VGIC_V2)
>>>> - vgic_v2_get_vmcr(vcpu, vmcr);
>>>> - else
>>>> - vgic_v3_get_vmcr(vcpu, vmcr);
>>>> -}
>>>> -
>>>> #define GICC_ARCH_VERSION_V2 0x2
>>>>
>>>> /* These are for userland accesses only, there is no guest-facing emulation. */
>>>> diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c
>>>> index 0d1bc98..f81e0e5 100644
>>>> --- a/virt/kvm/arm/vgic/vgic-mmio.c
>>>> +++ b/virt/kvm/arm/vgic/vgic-mmio.c
>>>> @@ -416,6 +416,22 @@ int vgic_validate_mmio_region_addr(struct kvm_device *dev,
>>>> return -ENXIO;
>>>> }
>>>>
>>>> +void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
>>>> +{
>>>> + if (kvm_vgic_global_state.type == VGIC_V2)
>>>> + vgic_v2_set_vmcr(vcpu, vmcr);
>>>> + else
>>>> + vgic_v3_set_vmcr(vcpu, vmcr);
>>>> +}
>>>> +
>>>> +void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
>>>> +{
>>>> + if (kvm_vgic_global_state.type == VGIC_V2)
>>>> + vgic_v2_get_vmcr(vcpu, vmcr);
>>>> + else
>>>> + vgic_v3_get_vmcr(vcpu, vmcr);
>>>> +}
>>>> +
>>>> /*
>>>> * kvm_mmio_read_buf() returns a value in a format where it can be converted
>>>> * to a byte array and be directly observed as the guest wanted it to appear
>>>> diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
>>>> index 9f0dae3..a3ff04b 100644
>>>> --- a/virt/kvm/arm/vgic/vgic-v3.c
>>>> +++ b/virt/kvm/arm/vgic/vgic-v3.c
>>>> @@ -175,10 +175,19 @@ void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
>>>> {
>>>> u32 vmcr;
>>>>
>>>> - vmcr = (vmcrp->ctlr << ICH_VMCR_CTLR_SHIFT) & ICH_VMCR_CTLR_MASK;
>>>> + /*
>>>> + * Ignore the FIQen bit, because GIC emulation always implies
>>>> + * SRE=1 which means the vFIQEn bit is also RES1.
>>>> + */
>>>> + vmcr = (vmcrp->ctlr & ICC_CTLR_EL1_EOImode_MASK) >>
>>>> + ICC_CTLR_EL1_EOImode_SHIFT;
>>>> + vmcr = (vmcr << ICH_VMCR_EOIM_SHIFT) & ICH_VMCR_EOIM_MASK;
>
>> I am not able to understand why we use ICC_CTLR _*macros here? Please
>> could you explain it to me? Besides if we want to ignore the FIQen bit
>> can't we change the ICH_VMCR_CTLR_MASK value?
>>
> This first statement is setting the vmcr to the ctlr's bit[1], but
> placed in bit[0], and then the next statement is moving that bit value
> to the corresponding place in the vmcr. I think this is correct
> (although a little opaque).
Yes the question was more about the semantic of the vmcrp->ctlr field. I
thought it was supposed to store ICH_VMCR_EL2 ctrl bits as they are and
not with a different layout.
>
> There's also a newer series on the list, just so you know.
Argh OK I missed it. I will check the diffs in the AArch64 related patches.
Thanks
Eric
>
> Thanks,
> -Christoffer
>
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel at lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
>
^ permalink raw reply
* Tearing down DMA transfer setup after DMA client has finished
From: Måns Rullgård @ 2016-12-08 12:44 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <a865e7d0-4247-657e-25db-9093731005d3@free.fr>
Mason <slash.tmp@free.fr> writes:
> On 08/12/2016 13:20, M?ns Rullg?rd wrote:
>
>> The only problem we have is that nobody envisioned hardware where the
>> dma engine indicates completion slightly too soon. I suspect there's a
>> fifo or such somewhere, and the interrupt is triggered when the last
>> byte has been placed in the fifo rather than when it has been removed
>> which would have been more correct.
>
> As I (tried to) explain here:
> https://marc.info/?l=dmaengine&m=148007808418242&w=2
>
> A *read* MBUS agent raises its IRQ when it is safe for the memory
> to be overwritten (i.e. every byte has been pushed into the pipe).
>
> A *write* MBUS agent raises its IRQ when it is safe for another
> agent to read any one of the transferred bytes.
>
> The issue comes from the fact that, for a memory-to-device transfer,
> the system will receive the read agent's IRQ, but most devices
> (NFC, SATA) don't have an IRQ line to signal that their part of the
> operation is complete.
SATA does, actually. Nevertheless, it's an unusual design.
--
M?ns Rullg?rd
^ permalink raw reply
* [PATCH 5/8] efi: Get the secure boot status [ver #5]
From: Lukas Wunner @ 2016-12-08 12:42 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <6009.1481184981@warthog.procyon.org.uk>
On Thu, Dec 08, 2016 at 08:16:21AM +0000, David Howells wrote:
> +/*
> + * Determine whether we're in secure boot mode.
> + */
> +enum efi_secureboot_mode efi_get_secureboot(efi_system_table_t *sys_table_arg)
> +{
> + u8 secboot, setupmode;
> + unsigned long size;
> + efi_status_t status;
> +
> + size = sizeof(secboot);
> + status = get_efi_var(efi_SecureBoot_name, &efi_variable_guid,
> + NULL, &size, &secboot);
> + if (status != EFI_SUCCESS)
> + goto out_efi_err;
> +
> + size = sizeof(setupmode);
> + status = get_efi_var(efi_SetupMode_name, &efi_variable_guid,
> + NULL, &size, &setupmode);
> + if (status != EFI_SUCCESS)
> + goto out_efi_err;
> +
> + if (secboot == 0 || setupmode == 1)
> + return efi_secureboot_mode_disabled;
> +
> + pr_efi(sys_table_arg, "UEFI Secure Boot is enabled.\n");
> + return efi_secureboot_mode_enabled;
> +
> +out_efi_err:
> + pr_efi_err(sys_table_arg, "Could not determine UEFI Secure Boot status.\n");
> + if (status == EFI_NOT_FOUND)
> + return efi_secureboot_mode_disabled;
> + return efi_secureboot_mode_unknown;
> +}
In the out_efi_err path, the if-statement needs to come before the
pr_efi_err() call. Otherwise it would be a change of behaviour for
ARM to what we have now.
Also, minor nit, I'd expect Matt to ask for a newline between the
if-statement and the following statements, so:
out_efi_err:
if (status == EFI_NOT_FOUND)
return efi_secureboot_mode_disabled;
pr_efi_err(sys_table_arg, "Could not determine UEFI Secure Boot status.\n");
return efi_secureboot_mode_unknown;
The error message doesn't say what the consequence is of the
failure to determine the status, but IIUC this differs between
x86 and ARM, is that correct? (If I remember the discussion
correctly, x86 defaults to disabled, ARM to enabled.)
Thanks,
Lukas
^ permalink raw reply
* Tearing down DMA transfer setup after DMA client has finished
From: Mason @ 2016-12-08 12:41 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <yw1xshpyzk6p.fsf@unicorn.mansr.com>
On 08/12/2016 13:20, M?ns Rullg?rd wrote:
> The only problem we have is that nobody envisioned hardware where the
> dma engine indicates completion slightly too soon. I suspect there's a
> fifo or such somewhere, and the interrupt is triggered when the last
> byte has been placed in the fifo rather than when it has been removed
> which would have been more correct.
As I (tried to) explain here:
https://marc.info/?l=dmaengine&m=148007808418242&w=2
A *read* MBUS agent raises its IRQ when it is safe for the memory
to be overwritten (i.e. every byte has been pushed into the pipe).
A *write* MBUS agent raises its IRQ when it is safe for another
agent to read any one of the transferred bytes.
The issue comes from the fact that, for a memory-to-device transfer,
the system will receive the read agent's IRQ, but most devices
(NFC, SATA) don't have an IRQ line to signal that their part of the
operation is complete.
As I explained, if one sets up a memory-to-memory DMA copy, then
the system will actually receive *two* IRQs.
Regards.
^ permalink raw reply
* [PATCH v6 1/2] ARM: davinci: Export two clocks function
From: Sekhar Nori @ 2016-12-08 12:39 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <4e313609-0acc-4e06-85e6-ba36b7916b33@ti.com>
On Thursday 08 December 2016 06:02 PM, Sekhar Nori wrote:
> On Wednesday 07 December 2016 10:39 PM, Alexandre Bailon wrote:
>> Rename and export __clk_enable() and __clk_disable() in order
>> to use them from usb-da8xx.c. This file implements the usb20 phy clock
>> that must be able to enable or disable usb20 clock.
>> To prevent a recurssive call to clk_enable() that would cause a recursive
>> locking issue, we must use __clk_enable() and __clk_disable().
>> Rename these methods in davinci_clk_enable() and davinci_clk_disable(),
>> and export them.
>>
>> Signed-off-by: Alexandre Bailon <abailon@baylibre.com>
>> Suggested-by: David Lechner <david@lechnology.com>
>> ---
>> arch/arm/mach-davinci/clock.c | 14 ++++++++------
>> arch/arm/mach-davinci/clock.h | 2 ++
>> 2 files changed, 10 insertions(+), 6 deletions(-)
>>
>> diff --git a/arch/arm/mach-davinci/clock.c b/arch/arm/mach-davinci/clock.c
>> index df42c93..0f967c3 100644
>> --- a/arch/arm/mach-davinci/clock.c
>> +++ b/arch/arm/mach-davinci/clock.c
>> @@ -31,10 +31,10 @@ static LIST_HEAD(clocks);
>> static DEFINE_MUTEX(clocks_mutex);
>> static DEFINE_SPINLOCK(clockfw_lock);
>>
>> -static void __clk_enable(struct clk *clk)
>> +void davinci_clk_enable(struct clk *clk)
>> {
>> if (clk->parent)
>> - __clk_enable(clk->parent);
>> + davinci_clk_enable(clk->parent);
>> if (clk->usecount++ == 0) {
>> if (clk->flags & CLK_PSC)
>> davinci_psc_config(clk->domain, clk->gpsc, clk->lpsc,
>> @@ -43,8 +43,9 @@ static void __clk_enable(struct clk *clk)
>> clk->clk_enable(clk);
>> }
>> }
>> +EXPORT_SYMBOL(davinci_clk_enable);
>
> We don't want to export these as we dont want drivers to use this API.
> This is to be used within mach-davinci only.
Also, subject line is pretty vague. What about:
ARM: davinci: provide lock-less versions of clk_{enable|disable}
In the description too, please talk about on the main difference between
davinci_clk_{enable|disable}() (lockless) vs clk_enable() (locked). Use
USB case only as an example. Using them in usb-da8xx.c might be the
current motivation but this addresses similar needs in future too.
Thanks,
Sekhar
^ permalink raw reply
* [PATCH v6 1/2] ARM: davinci: Export two clocks function
From: Sekhar Nori @ 2016-12-08 12:32 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <1481130567-27829-1-git-send-email-abailon@baylibre.com>
On Wednesday 07 December 2016 10:39 PM, Alexandre Bailon wrote:
> Rename and export __clk_enable() and __clk_disable() in order
> to use them from usb-da8xx.c. This file implements the usb20 phy clock
> that must be able to enable or disable usb20 clock.
> To prevent a recurssive call to clk_enable() that would cause a recursive
> locking issue, we must use __clk_enable() and __clk_disable().
> Rename these methods in davinci_clk_enable() and davinci_clk_disable(),
> and export them.
>
> Signed-off-by: Alexandre Bailon <abailon@baylibre.com>
> Suggested-by: David Lechner <david@lechnology.com>
> ---
> arch/arm/mach-davinci/clock.c | 14 ++++++++------
> arch/arm/mach-davinci/clock.h | 2 ++
> 2 files changed, 10 insertions(+), 6 deletions(-)
>
> diff --git a/arch/arm/mach-davinci/clock.c b/arch/arm/mach-davinci/clock.c
> index df42c93..0f967c3 100644
> --- a/arch/arm/mach-davinci/clock.c
> +++ b/arch/arm/mach-davinci/clock.c
> @@ -31,10 +31,10 @@ static LIST_HEAD(clocks);
> static DEFINE_MUTEX(clocks_mutex);
> static DEFINE_SPINLOCK(clockfw_lock);
>
> -static void __clk_enable(struct clk *clk)
> +void davinci_clk_enable(struct clk *clk)
> {
> if (clk->parent)
> - __clk_enable(clk->parent);
> + davinci_clk_enable(clk->parent);
> if (clk->usecount++ == 0) {
> if (clk->flags & CLK_PSC)
> davinci_psc_config(clk->domain, clk->gpsc, clk->lpsc,
> @@ -43,8 +43,9 @@ static void __clk_enable(struct clk *clk)
> clk->clk_enable(clk);
> }
> }
> +EXPORT_SYMBOL(davinci_clk_enable);
We don't want to export these as we dont want drivers to use this API.
This is to be used within mach-davinci only.
Thanks,
Sekhar
^ permalink raw reply
* Tearing down DMA transfer setup after DMA client has finished
From: Geert Uytterhoeven @ 2016-12-08 12:31 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <yw1xshpyzk6p.fsf@unicorn.mansr.com>
On Thu, Dec 8, 2016 at 1:20 PM, M?ns Rullg?rd <mans@mansr.com> wrote:
> Geert Uytterhoeven <geert@linux-m68k.org> writes:
>> On Thu, Dec 8, 2016 at 12:44 PM, M?ns Rullg?rd <mans@mansr.com> wrote:
>>> Vinod Koul <vinod.koul@intel.com> writes:
>>>> On Wed, Dec 07, 2016 at 04:45:58PM +0000, M?ns Rullg?rd wrote:
>>>>> Vinod Koul <vinod.koul@intel.com> writes:
>>>>> > On Tue, Dec 06, 2016 at 01:14:20PM +0000, M?ns Rullg?rd wrote:
>>>>> >> That's not going to work very well. Device drivers typically request
>>>>> >> dma channels in their probe functions or when the device is opened.
>>>>> >> This means that reserving one of the few channels there will inevitably
>>>>> >> make some other device fail to operate.
>>>>> >
>>>>> > No that doesnt make sense at all, you should get a channel only when you
>>>>> > want to use it and not in probe!
>>>>>
>>>>> Tell that to just about every single driver ever written.
>>>>
>>>> Not really, few do yes which is wrong but not _all_ do that.
>>>
>>> Every driver I ever looked at does. Name one you consider "correct."
>>
>> I'm far from claiming that drivers/tty/serial/sh-sci.c is perfect, but
>> it does request DMA channels at open time, not at probe time.
>
> In the part quoted above, I said most drivers request dma channels in
> their probe or open functions. For the purposes of this discussion,
> that distinction is irrelevant. In either case, the channel is held
> indefinitely. If this wasn't the correct way to use the dmaengine,
> there would be no need for the virt-dma helpers which are specifically
> designed for cases the one currently at hand.
Sorry, I mainly read Vinod's "not in probe", and missed your "or when the
device is opened".
Gr{oetje,eeting}s,
Geert
--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert at linux-m68k.org
In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds
^ permalink raw reply
* [PATCH 8/8] efi: Add EFI_SECURE_BOOT bit [ver #6]
From: David Howells @ 2016-12-08 12:31 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <148120020832.5854.5448601415491330495.stgit@warthog.procyon.org.uk>
From: Josh Boyer <jwboyer@fedoraproject.org>
UEFI machines can be booted in Secure Boot mode. Add a EFI_SECURE_BOOT bit
that can be passed to efi_enabled() to find out whether secure boot is
enabled.
This will be used by the SysRq+x handler, registered by the x86 arch, to find
out whether secure boot mode is enabled so that it can be disabled.
Signed-off-by: Josh Boyer <jwboyer@fedoraproject.org>
Signed-off-by: David Howells <dhowells@redhat.com>
---
arch/x86/kernel/setup.c | 15 +++++++++++++++
include/linux/efi.h | 1 +
2 files changed, 16 insertions(+)
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 9c337b0e8ba7..d8972ec6257d 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1152,6 +1152,21 @@ void __init setup_arch(char **cmdline_p)
/* Allocate bigger log buffer */
setup_log_buf(1);
+ if (IS_ENABLED(CONFIG_EFI)) {
+ switch (boot_params.secure_boot) {
+ case efi_secureboot_mode_disabled:
+ pr_info("Secure boot disabled\n");
+ break;
+ case efi_secureboot_mode_enabled:
+ set_bit(EFI_SECURE_BOOT, &efi.flags);
+ pr_info("Secure boot enabled\n");
+ break;
+ default:
+ pr_info("Secure boot could not be determined\n");
+ break;
+ }
+ }
+
reserve_initrd();
acpi_table_upgrade();
diff --git a/include/linux/efi.h b/include/linux/efi.h
index c894ed5bfa1c..e1893f5002c3 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -1070,6 +1070,7 @@ extern int __init efi_setup_pcdp_console(char *);
#define EFI_ARCH_1 7 /* First arch-specific bit */
#define EFI_DBG 8 /* Print additional debug info at runtime */
#define EFI_NX_PE_DATA 9 /* Can runtime data regions be mapped non-executable? */
+#define EFI_SECURE_BOOT 10 /* Are we in Secure Boot mode? */
#ifdef CONFIG_EFI
/*
^ permalink raw reply related
* [PATCH 7/8] efi: Handle secure boot from UEFI-2.6 [ver #6]
From: David Howells @ 2016-12-08 12:31 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <148120020832.5854.5448601415491330495.stgit@warthog.procyon.org.uk>
UEFI-2.6 adds a new variable, DeployedMode. If it exists, this must be 1
if we're to engage lockdown mode.
Reported-by: James Bottomley <James.Bottomley@HansenPartnership.com>
Signed-off-by: David Howells <dhowells@redhat.com>
---
drivers/firmware/efi/libstub/secureboot.c | 16 +++++++++++++++-
include/linux/efi.h | 4 ++++
2 files changed, 19 insertions(+), 1 deletion(-)
diff --git a/drivers/firmware/efi/libstub/secureboot.c b/drivers/firmware/efi/libstub/secureboot.c
index 39c91e091f6a..d653f76b9725 100644
--- a/drivers/firmware/efi/libstub/secureboot.c
+++ b/drivers/firmware/efi/libstub/secureboot.c
@@ -22,6 +22,9 @@ static const efi_char16_t const efi_SecureBoot_name[] = {
static const efi_char16_t const efi_SetupMode_name[] = {
'S', 'e', 't', 'u', 'p', 'M', 'o', 'd', 'e', 0
};
+static const efi_char16_t const efi_DeployedMode_name[] = {
+ 'D', 'e', 'p', 'l', 'o', 'y', 'e', 'd', 'M', 'o', 'd', 'e', 0
+};
/* SHIM variables */
static const efi_guid_t shim_guid = EFI_SHIM_LOCK_GUID;
@@ -40,7 +43,7 @@ static efi_char16_t const shim_MokSBState_name[] = {
enum efi_secureboot_mode efi_get_secureboot(efi_system_table_t *sys_table_arg)
{
u32 attr;
- u8 secboot, setupmode, moksbstate;
+ u8 secboot, setupmode, deployedmode, moksbstate;
unsigned long size;
efi_status_t status;
@@ -59,6 +62,17 @@ enum efi_secureboot_mode efi_get_secureboot(efi_system_table_t *sys_table_arg)
if (secboot == 0 || setupmode == 1)
return efi_secureboot_mode_disabled;
+ /* UEFI-2.6 requires DeployedMode to be 1. */
+ if (sys_table_arg->hdr.revision >= EFI_2_60_SYSTEM_TABLE_REVISION) {
+ size = sizeof(deployedmode);
+ status = get_efi_var(efi_DeployedMode_name, &efi_variable_guid,
+ NULL, &size, &deployedmode);
+ if (status != EFI_SUCCESS)
+ goto out_efi_err;
+ if (deployedmode == 0)
+ return efi_secureboot_mode_disabled;
+ }
+
/* See if a user has put shim into insecure mode. If so, and if the
* variable doesn't have the runtime attribute set, we might as well
* honor that.
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 92e23f03045e..c894ed5bfa1c 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -645,6 +645,10 @@ typedef struct {
#define EFI_SYSTEM_TABLE_SIGNATURE ((u64)0x5453595320494249ULL)
+#define EFI_2_60_SYSTEM_TABLE_REVISION ((2 << 16) | (60))
+#define EFI_2_50_SYSTEM_TABLE_REVISION ((2 << 16) | (50))
+#define EFI_2_40_SYSTEM_TABLE_REVISION ((2 << 16) | (40))
+#define EFI_2_31_SYSTEM_TABLE_REVISION ((2 << 16) | (31))
#define EFI_2_30_SYSTEM_TABLE_REVISION ((2 << 16) | (30))
#define EFI_2_20_SYSTEM_TABLE_REVISION ((2 << 16) | (20))
#define EFI_2_10_SYSTEM_TABLE_REVISION ((2 << 16) | (10))
^ permalink raw reply related
* [PATCH 6/8] efi: Disable secure boot if shim is in insecure mode [ver #6]
From: David Howells @ 2016-12-08 12:30 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <148120020832.5854.5448601415491330495.stgit@warthog.procyon.org.uk>
From: Josh Boyer <jwboyer@fedoraproject.org>
A user can manually tell the shim boot loader to disable validation of
images it loads. When a user does this, it creates a UEFI variable called
MokSBState that does not have the runtime attribute set. Given that the
user explicitly disabled validation, we can honor that and not enable
secure boot mode if that variable is set.
Signed-off-by: Josh Boyer <jwboyer@fedoraproject.org>
Signed-off-by: David Howells <dhowells@redhat.com>
---
drivers/firmware/efi/libstub/secureboot.c | 24 +++++++++++++++++++++++-
1 file changed, 23 insertions(+), 1 deletion(-)
diff --git a/drivers/firmware/efi/libstub/secureboot.c b/drivers/firmware/efi/libstub/secureboot.c
index 62d6904da800..39c91e091f6a 100644
--- a/drivers/firmware/efi/libstub/secureboot.c
+++ b/drivers/firmware/efi/libstub/secureboot.c
@@ -23,6 +23,12 @@ static const efi_char16_t const efi_SetupMode_name[] = {
'S', 'e', 't', 'u', 'p', 'M', 'o', 'd', 'e', 0
};
+/* SHIM variables */
+static const efi_guid_t shim_guid = EFI_SHIM_LOCK_GUID;
+static efi_char16_t const shim_MokSBState_name[] = {
+ 'M', 'o', 'k', 'S', 'B', 'S', 't', 'a', 't', 'e', 0
+};
+
#define get_efi_var(name, vendor, ...) \
efi_call_runtime(get_variable, \
(efi_char16_t *)(name), (efi_guid_t *)(vendor), \
@@ -33,7 +39,8 @@ static const efi_char16_t const efi_SetupMode_name[] = {
*/
enum efi_secureboot_mode efi_get_secureboot(efi_system_table_t *sys_table_arg)
{
- u8 secboot, setupmode;
+ u32 attr;
+ u8 secboot, setupmode, moksbstate;
unsigned long size;
efi_status_t status;
@@ -52,6 +59,21 @@ enum efi_secureboot_mode efi_get_secureboot(efi_system_table_t *sys_table_arg)
if (secboot == 0 || setupmode == 1)
return efi_secureboot_mode_disabled;
+ /* See if a user has put shim into insecure mode. If so, and if the
+ * variable doesn't have the runtime attribute set, we might as well
+ * honor that.
+ */
+ size = sizeof(moksbstate);
+ status = get_efi_var(shim_MokSBState_name, &shim_guid,
+ &attr, &size, &moksbstate);
+
+ /* If it fails, we don't care why. Default to secure */
+ if (status != EFI_SUCCESS)
+ goto secure_boot_enabled;
+ if (!(attr & EFI_VARIABLE_RUNTIME_ACCESS) && moksbstate == 1)
+ return efi_secureboot_mode_disabled;
+
+secure_boot_enabled:
pr_efi(sys_table_arg, "UEFI Secure Boot is enabled.\n");
return efi_secureboot_mode_enabled;
^ permalink raw reply related
* [PATCH 5/8] efi: Get the secure boot status [ver #6]
From: David Howells @ 2016-12-08 12:30 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <148120020832.5854.5448601415491330495.stgit@warthog.procyon.org.uk>
Get the firmware's secure-boot status in the kernel boot wrapper and stash
it somewhere that the main kernel image can find.
The efi_get_secureboot() function is extracted from the arm stub and (a)
generalised so that it can be called from x86 and (b) made to use
efi_call_runtime() so that it can be run in mixed-mode.
Suggested-by: Lukas Wunner <lukas@wunner.de>
Signed-off-by: David Howells <dhowells@redhat.com>
---
Documentation/x86/zero-page.txt | 2 +
arch/x86/boot/compressed/eboot.c | 2 +
arch/x86/boot/compressed/head_32.S | 1
arch/x86/boot/compressed/head_64.S | 1
arch/x86/include/asm/bootparam_utils.h | 5 +-
arch/x86/include/uapi/asm/bootparam.h | 3 +
arch/x86/kernel/asm-offsets.c | 1
drivers/firmware/efi/libstub/Makefile | 2 -
drivers/firmware/efi/libstub/arm-stub.c | 63 +++--------------------------
drivers/firmware/efi/libstub/secureboot.c | 63 +++++++++++++++++++++++++++++
include/linux/efi.h | 8 ++++
11 files changed, 90 insertions(+), 61 deletions(-)
create mode 100644 drivers/firmware/efi/libstub/secureboot.c
diff --git a/Documentation/x86/zero-page.txt b/Documentation/x86/zero-page.txt
index 95a4d34af3fd..b8527c6b7646 100644
--- a/Documentation/x86/zero-page.txt
+++ b/Documentation/x86/zero-page.txt
@@ -31,6 +31,8 @@ Offset Proto Name Meaning
1E9/001 ALL eddbuf_entries Number of entries in eddbuf (below)
1EA/001 ALL edd_mbr_sig_buf_entries Number of entries in edd_mbr_sig_buffer
(below)
+1EB/001 ALL kbd_status Numlock is enabled
+1EC/001 ALL secure_boot Secure boot is enabled in the firmware
1EF/001 ALL sentinel Used to detect broken bootloaders
290/040 ALL edd_mbr_sig_buffer EDD MBR signatures
2D0/A00 ALL e820_map E820 memory map table
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index c8c32ebcdfdb..5b151c262ac2 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -1158,6 +1158,8 @@ struct boot_params *efi_main(struct efi_config *c,
else
setup_boot_services32(efi_early);
+ boot_params->secure_boot = efi_get_secureboot(sys_table);
+
setup_graphics(boot_params);
setup_efi_pci(boot_params);
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index d85b9625e836..c635f7e32f5c 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -61,6 +61,7 @@
__HEAD
ENTRY(startup_32)
+ movb $0, BP_secure_boot(%esi)
#ifdef CONFIG_EFI_STUB
jmp preferred_addr
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index beab8322f72a..ccd2c7461b7f 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -244,6 +244,7 @@ ENTRY(startup_64)
* that maps our entire kernel(text+data+bss+brk), zero page
* and command line.
*/
+ movb $0, BP_secure_boot(%rsi)
#ifdef CONFIG_EFI_STUB
/*
* The entry point for the PE/COFF executable is efi_pe_entry, so
diff --git a/arch/x86/include/asm/bootparam_utils.h b/arch/x86/include/asm/bootparam_utils.h
index 4a8cb8d7cbd5..7e16d53ff6a3 100644
--- a/arch/x86/include/asm/bootparam_utils.h
+++ b/arch/x86/include/asm/bootparam_utils.h
@@ -38,9 +38,10 @@ static void sanitize_boot_params(struct boot_params *boot_params)
memset(&boot_params->ext_ramdisk_image, 0,
(char *)&boot_params->efi_info -
(char *)&boot_params->ext_ramdisk_image);
- memset(&boot_params->kbd_status, 0,
+ boot_params->kbd_status = 0;
+ memset(&boot_params->_pad5, 0,
(char *)&boot_params->hdr -
- (char *)&boot_params->kbd_status);
+ (char *)&boot_params->_pad5);
memset(&boot_params->_pad7[0], 0,
(char *)&boot_params->edd_mbr_sig_buffer[0] -
(char *)&boot_params->_pad7[0]);
diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h
index b10bf319ed20..5138dacf8bb8 100644
--- a/arch/x86/include/uapi/asm/bootparam.h
+++ b/arch/x86/include/uapi/asm/bootparam.h
@@ -135,7 +135,8 @@ struct boot_params {
__u8 eddbuf_entries; /* 0x1e9 */
__u8 edd_mbr_sig_buf_entries; /* 0x1ea */
__u8 kbd_status; /* 0x1eb */
- __u8 _pad5[3]; /* 0x1ec */
+ __u8 secure_boot; /* 0x1ec */
+ __u8 _pad5[2]; /* 0x1ed */
/*
* The sentinel is set to a nonzero value (0xff) in header.S.
*
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index c62e015b126c..de827d6ac8c2 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -81,6 +81,7 @@ void common(void) {
BLANK();
OFFSET(BP_scratch, boot_params, scratch);
+ OFFSET(BP_secure_boot, boot_params, secure_boot);
OFFSET(BP_loadflags, boot_params, hdr.loadflags);
OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch);
OFFSET(BP_version, boot_params, hdr.version);
diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile
index 6621b13c370f..9af966863612 100644
--- a/drivers/firmware/efi/libstub/Makefile
+++ b/drivers/firmware/efi/libstub/Makefile
@@ -28,7 +28,7 @@ OBJECT_FILES_NON_STANDARD := y
# Prevents link failures: __sanitizer_cov_trace_pc() is not linked in.
KCOV_INSTRUMENT := n
-lib-y := efi-stub-helper.o gop.o
+lib-y := efi-stub-helper.o gop.o secureboot.o
# include the stub's generic dependencies from lib/ when building for ARM/arm64
arm-deps := fdt_rw.c fdt_ro.c fdt_wip.c fdt.c fdt_empty_tree.c fdt_sw.c sort.c
diff --git a/drivers/firmware/efi/libstub/arm-stub.c b/drivers/firmware/efi/libstub/arm-stub.c
index b4f7d78f9e8b..9984d0442442 100644
--- a/drivers/firmware/efi/libstub/arm-stub.c
+++ b/drivers/firmware/efi/libstub/arm-stub.c
@@ -20,52 +20,6 @@
bool __nokaslr;
-static int efi_get_secureboot(efi_system_table_t *sys_table_arg)
-{
- static efi_char16_t const sb_var_name[] = {
- 'S', 'e', 'c', 'u', 'r', 'e', 'B', 'o', 'o', 't', 0 };
- static efi_char16_t const sm_var_name[] = {
- 'S', 'e', 't', 'u', 'p', 'M', 'o', 'd', 'e', 0 };
-
- efi_guid_t var_guid = EFI_GLOBAL_VARIABLE_GUID;
- efi_get_variable_t *f_getvar = sys_table_arg->runtime->get_variable;
- u8 val;
- unsigned long size = sizeof(val);
- efi_status_t status;
-
- status = f_getvar((efi_char16_t *)sb_var_name, (efi_guid_t *)&var_guid,
- NULL, &size, &val);
-
- if (status != EFI_SUCCESS)
- goto out_efi_err;
-
- if (val == 0)
- return 0;
-
- status = f_getvar((efi_char16_t *)sm_var_name, (efi_guid_t *)&var_guid,
- NULL, &size, &val);
-
- if (status != EFI_SUCCESS)
- goto out_efi_err;
-
- if (val == 1)
- return 0;
-
- return 1;
-
-out_efi_err:
- switch (status) {
- case EFI_NOT_FOUND:
- return 0;
- case EFI_DEVICE_ERROR:
- return -EIO;
- case EFI_SECURITY_VIOLATION:
- return -EACCES;
- default:
- return -EINVAL;
- }
-}
-
efi_status_t efi_open_volume(efi_system_table_t *sys_table_arg,
void *__image, void **__fh)
{
@@ -226,7 +180,7 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table,
efi_guid_t loaded_image_proto = LOADED_IMAGE_PROTOCOL_GUID;
unsigned long reserve_addr = 0;
unsigned long reserve_size = 0;
- int secure_boot = 0;
+ enum efi_secureboot_mode secure_boot;
struct screen_info *si;
/* Check if we were booted by the EFI firmware */
@@ -296,19 +250,14 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table,
pr_efi_err(sys_table, "Failed to parse EFI cmdline options\n");
secure_boot = efi_get_secureboot(sys_table);
- if (secure_boot > 0)
- pr_efi(sys_table, "UEFI Secure Boot is enabled.\n");
-
- if (secure_boot < 0) {
- pr_efi_err(sys_table,
- "could not determine UEFI Secure Boot status.\n");
- }
/*
- * Unauthenticated device tree data is a security hazard, so
- * ignore 'dtb=' unless UEFI Secure Boot is disabled.
+ * Unauthenticated device tree data is a security hazard, so ignore
+ * 'dtb=' unless UEFI Secure Boot is disabled. We assume that secure
+ * boot is enabled if we can't determine its state.
*/
- if (secure_boot != 0 && strstr(cmdline_ptr, "dtb=")) {
+ if (secure_boot != efi_secureboot_mode_disabled &&
+ strstr(cmdline_ptr, "dtb=")) {
pr_efi(sys_table, "Ignoring DTB from command line.\n");
} else {
status = handle_cmdline_files(sys_table, image, cmdline_ptr,
diff --git a/drivers/firmware/efi/libstub/secureboot.c b/drivers/firmware/efi/libstub/secureboot.c
new file mode 100644
index 000000000000..62d6904da800
--- /dev/null
+++ b/drivers/firmware/efi/libstub/secureboot.c
@@ -0,0 +1,63 @@
+/*
+ * Secure boot handling.
+ *
+ * Copyright (C) 2013,2014 Linaro Limited
+ * Roy Franz <roy.franz at linaro.org
+ * Copyright (C) 2013 Red Hat, Inc.
+ * Mark Salter <msalter@redhat.com>
+ *
+ * This file is part of the Linux kernel, and is made available under the
+ * terms of the GNU General Public License version 2.
+ *
+ */
+
+#include <linux/efi.h>
+#include <asm/efi.h>
+
+/* BIOS variables */
+static const efi_guid_t efi_variable_guid = EFI_GLOBAL_VARIABLE_GUID;
+static const efi_char16_t const efi_SecureBoot_name[] = {
+ 'S', 'e', 'c', 'u', 'r', 'e', 'B', 'o', 'o', 't', 0
+};
+static const efi_char16_t const efi_SetupMode_name[] = {
+ 'S', 'e', 't', 'u', 'p', 'M', 'o', 'd', 'e', 0
+};
+
+#define get_efi_var(name, vendor, ...) \
+ efi_call_runtime(get_variable, \
+ (efi_char16_t *)(name), (efi_guid_t *)(vendor), \
+ __VA_ARGS__);
+
+/*
+ * Determine whether we're in secure boot mode.
+ */
+enum efi_secureboot_mode efi_get_secureboot(efi_system_table_t *sys_table_arg)
+{
+ u8 secboot, setupmode;
+ unsigned long size;
+ efi_status_t status;
+
+ size = sizeof(secboot);
+ status = get_efi_var(efi_SecureBoot_name, &efi_variable_guid,
+ NULL, &size, &secboot);
+ if (status != EFI_SUCCESS)
+ goto out_efi_err;
+
+ size = sizeof(setupmode);
+ status = get_efi_var(efi_SetupMode_name, &efi_variable_guid,
+ NULL, &size, &setupmode);
+ if (status != EFI_SUCCESS)
+ goto out_efi_err;
+
+ if (secboot == 0 || setupmode == 1)
+ return efi_secureboot_mode_disabled;
+
+ pr_efi(sys_table_arg, "UEFI Secure Boot is enabled.\n");
+ return efi_secureboot_mode_enabled;
+
+out_efi_err:
+ pr_efi_err(sys_table_arg, "Could not determine UEFI Secure Boot status.\n");
+ if (status == EFI_NOT_FOUND)
+ return efi_secureboot_mode_disabled;
+ return efi_secureboot_mode_unknown;
+}
diff --git a/include/linux/efi.h b/include/linux/efi.h
index c7904556d7a8..92e23f03045e 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -1477,6 +1477,14 @@ efi_status_t efi_setup_gop(efi_system_table_t *sys_table_arg,
bool efi_runtime_disabled(void);
extern void efi_call_virt_check_flags(unsigned long flags, const char *call);
+enum efi_secureboot_mode {
+ efi_secureboot_mode_unset,
+ efi_secureboot_mode_unknown,
+ efi_secureboot_mode_disabled,
+ efi_secureboot_mode_enabled,
+};
+enum efi_secureboot_mode efi_get_secureboot(efi_system_table_t *sys_table);
+
/*
* Arch code can implement the following three template macros, avoiding
* reptition for the void/non-void return cases of {__,}efi_call_virt():
^ permalink raw reply related
* [PATCH 4/8] efi: Add SHIM and image security database GUID definitions [ver #6]
From: David Howells @ 2016-12-08 12:30 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <148120020832.5854.5448601415491330495.stgit@warthog.procyon.org.uk>
Add the definitions for shim and image security database, both of which
are used widely in various Linux distros.
Signed-off-by: Josh Boyer <jwboyer@fedoraproject.org>
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
include/linux/efi.h | 3 +++
1 file changed, 3 insertions(+)
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 93a82de167eb..c7904556d7a8 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -610,6 +610,9 @@ void efi_native_runtime_setup(void);
#define EFI_CONSOLE_OUT_DEVICE_GUID EFI_GUID(0xd3b36f2c, 0xd551, 0x11d4, 0x9a, 0x46, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d)
#define APPLE_PROPERTIES_PROTOCOL_GUID EFI_GUID(0x91bd12fe, 0xf6c3, 0x44fb, 0xa5, 0xb7, 0x51, 0x22, 0xab, 0x30, 0x3a, 0xe0)
+#define EFI_IMAGE_SECURITY_DATABASE_GUID EFI_GUID(0xd719b2cb, 0x3d3a, 0x4596, 0xa3, 0xbc, 0xda, 0xd0, 0x0e, 0x67, 0x65, 0x6f)
+#define EFI_SHIM_LOCK_GUID EFI_GUID(0x605dab50, 0xe046, 0x4300, 0xab, 0xb6, 0x3d, 0xd8, 0x10, 0xdd, 0x8b, 0x23)
+
/*
* This GUID is used to pass to the kernel proper the struct screen_info
* structure that was populated by the stub based on the GOP protocol instance
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox