Netdev List
 help / color / mirror / Atom feed
* [PATCH net-next v8 05/28] zinc: import Andy Polyakov's ChaCha20 x86_64 implementation
From: Jason A. Donenfeld @ 2018-10-18 14:56 UTC (permalink / raw)
  To: linux-kernel, netdev, linux-crypto, davem, gregkh
  Cc: Jason A. Donenfeld, Andy Polyakov, Thomas Gleixner, Ingo Molnar,
	x86, Samuel Neves, Jean-Philippe Aumasson, Andy Lutomirski,
	Andrew Morton, Linus Torvalds, kernel-hardening
In-Reply-To: <20181018145712.7538-1-Jason@zx2c4.com>

These x86_64 vectorized implementations come from Andy Polyakov's
implementation, and are included here in raw form without modification,
so that subsequent commits that fix these up for the kernel can see how
it has changed.

While this is CRYPTOGAMS code, the originating code for this happens to
be the same as OpenSSL's commit cded951378069a478391843f5f8653c1eb5128da

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Based-on-code-from: Andy Polyakov <appro@openssl.org>
Cc: Andy Polyakov <appro@openssl.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: x86@kernel.org
Cc: Samuel Neves <sneves@dei.uc.pt>
Cc: Jean-Philippe Aumasson <jeanphilippe.aumasson@gmail.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: kernel-hardening@lists.openwall.com
Cc: linux-crypto@vger.kernel.org
---
 .../chacha20/chacha20-x86_64-cryptogams.S     | 3433 +++++++++++++++++
 1 file changed, 3433 insertions(+)
 create mode 100644 lib/zinc/chacha20/chacha20-x86_64-cryptogams.S

diff --git a/lib/zinc/chacha20/chacha20-x86_64-cryptogams.S b/lib/zinc/chacha20/chacha20-x86_64-cryptogams.S
new file mode 100644
index 000000000000..2bfc76f7e01f
--- /dev/null
+++ b/lib/zinc/chacha20/chacha20-x86_64-cryptogams.S
@@ -0,0 +1,3433 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
+/*
+ * Copyright (C) 2006-2017 CRYPTOGAMS by <appro@openssl.org>. All Rights Reserved.
+ */
+
+.text	
+
+
+
+.align	64
+.Lzero:
+.long	0,0,0,0
+.Lone:
+.long	1,0,0,0
+.Linc:
+.long	0,1,2,3
+.Lfour:
+.long	4,4,4,4
+.Lincy:
+.long	0,2,4,6,1,3,5,7
+.Leight:
+.long	8,8,8,8,8,8,8,8
+.Lrot16:
+.byte	0x2,0x3,0x0,0x1, 0x6,0x7,0x4,0x5, 0xa,0xb,0x8,0x9, 0xe,0xf,0xc,0xd
+.Lrot24:
+.byte	0x3,0x0,0x1,0x2, 0x7,0x4,0x5,0x6, 0xb,0x8,0x9,0xa, 0xf,0xc,0xd,0xe
+.Ltwoy:
+.long	2,0,0,0, 2,0,0,0
+.align	64
+.Lzeroz:
+.long	0,0,0,0, 1,0,0,0, 2,0,0,0, 3,0,0,0
+.Lfourz:
+.long	4,0,0,0, 4,0,0,0, 4,0,0,0, 4,0,0,0
+.Lincz:
+.long	0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
+.Lsixteen:
+.long	16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16
+.Lsigma:
+.byte	101,120,112,97,110,100,32,51,50,45,98,121,116,101,32,107,0
+.byte	67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.globl	ChaCha20_ctr32
+.type	ChaCha20_ctr32,@function
+.align	64
+ChaCha20_ctr32:
+.cfi_startproc	
+	cmpq	$0,%rdx
+	je	.Lno_data
+	movq	OPENSSL_ia32cap_P+4(%rip),%r10
+	btq	$48,%r10
+	jc	.LChaCha20_avx512
+	testq	%r10,%r10
+	js	.LChaCha20_avx512vl
+	testl	$512,%r10d
+	jnz	.LChaCha20_ssse3
+
+	pushq	%rbx
+.cfi_adjust_cfa_offset	8
+.cfi_offset	%rbx,-16
+	pushq	%rbp
+.cfi_adjust_cfa_offset	8
+.cfi_offset	%rbp,-24
+	pushq	%r12
+.cfi_adjust_cfa_offset	8
+.cfi_offset	%r12,-32
+	pushq	%r13
+.cfi_adjust_cfa_offset	8
+.cfi_offset	%r13,-40
+	pushq	%r14
+.cfi_adjust_cfa_offset	8
+.cfi_offset	%r14,-48
+	pushq	%r15
+.cfi_adjust_cfa_offset	8
+.cfi_offset	%r15,-56
+	subq	$64+24,%rsp
+.cfi_adjust_cfa_offset	64+24
+.Lctr32_body:
+
+
+	movdqu	(%rcx),%xmm1
+	movdqu	16(%rcx),%xmm2
+	movdqu	(%r8),%xmm3
+	movdqa	.Lone(%rip),%xmm4
+
+
+	movdqa	%xmm1,16(%rsp)
+	movdqa	%xmm2,32(%rsp)
+	movdqa	%xmm3,48(%rsp)
+	movq	%rdx,%rbp
+	jmp	.Loop_outer
+
+.align	32
+.Loop_outer:
+	movl	$0x61707865,%eax
+	movl	$0x3320646e,%ebx
+	movl	$0x79622d32,%ecx
+	movl	$0x6b206574,%edx
+	movl	16(%rsp),%r8d
+	movl	20(%rsp),%r9d
+	movl	24(%rsp),%r10d
+	movl	28(%rsp),%r11d
+	movd	%xmm3,%r12d
+	movl	52(%rsp),%r13d
+	movl	56(%rsp),%r14d
+	movl	60(%rsp),%r15d
+
+	movq	%rbp,64+0(%rsp)
+	movl	$10,%ebp
+	movq	%rsi,64+8(%rsp)
+.byte	102,72,15,126,214
+	movq	%rdi,64+16(%rsp)
+	movq	%rsi,%rdi
+	shrq	$32,%rdi
+	jmp	.Loop
+
+.align	32
+.Loop:
+	addl	%r8d,%eax
+	xorl	%eax,%r12d
+	roll	$16,%r12d
+	addl	%r9d,%ebx
+	xorl	%ebx,%r13d
+	roll	$16,%r13d
+	addl	%r12d,%esi
+	xorl	%esi,%r8d
+	roll	$12,%r8d
+	addl	%r13d,%edi
+	xorl	%edi,%r9d
+	roll	$12,%r9d
+	addl	%r8d,%eax
+	xorl	%eax,%r12d
+	roll	$8,%r12d
+	addl	%r9d,%ebx
+	xorl	%ebx,%r13d
+	roll	$8,%r13d
+	addl	%r12d,%esi
+	xorl	%esi,%r8d
+	roll	$7,%r8d
+	addl	%r13d,%edi
+	xorl	%edi,%r9d
+	roll	$7,%r9d
+	movl	%esi,32(%rsp)
+	movl	%edi,36(%rsp)
+	movl	40(%rsp),%esi
+	movl	44(%rsp),%edi
+	addl	%r10d,%ecx
+	xorl	%ecx,%r14d
+	roll	$16,%r14d
+	addl	%r11d,%edx
+	xorl	%edx,%r15d
+	roll	$16,%r15d
+	addl	%r14d,%esi
+	xorl	%esi,%r10d
+	roll	$12,%r10d
+	addl	%r15d,%edi
+	xorl	%edi,%r11d
+	roll	$12,%r11d
+	addl	%r10d,%ecx
+	xorl	%ecx,%r14d
+	roll	$8,%r14d
+	addl	%r11d,%edx
+	xorl	%edx,%r15d
+	roll	$8,%r15d
+	addl	%r14d,%esi
+	xorl	%esi,%r10d
+	roll	$7,%r10d
+	addl	%r15d,%edi
+	xorl	%edi,%r11d
+	roll	$7,%r11d
+	addl	%r9d,%eax
+	xorl	%eax,%r15d
+	roll	$16,%r15d
+	addl	%r10d,%ebx
+	xorl	%ebx,%r12d
+	roll	$16,%r12d
+	addl	%r15d,%esi
+	xorl	%esi,%r9d
+	roll	$12,%r9d
+	addl	%r12d,%edi
+	xorl	%edi,%r10d
+	roll	$12,%r10d
+	addl	%r9d,%eax
+	xorl	%eax,%r15d
+	roll	$8,%r15d
+	addl	%r10d,%ebx
+	xorl	%ebx,%r12d
+	roll	$8,%r12d
+	addl	%r15d,%esi
+	xorl	%esi,%r9d
+	roll	$7,%r9d
+	addl	%r12d,%edi
+	xorl	%edi,%r10d
+	roll	$7,%r10d
+	movl	%esi,40(%rsp)
+	movl	%edi,44(%rsp)
+	movl	32(%rsp),%esi
+	movl	36(%rsp),%edi
+	addl	%r11d,%ecx
+	xorl	%ecx,%r13d
+	roll	$16,%r13d
+	addl	%r8d,%edx
+	xorl	%edx,%r14d
+	roll	$16,%r14d
+	addl	%r13d,%esi
+	xorl	%esi,%r11d
+	roll	$12,%r11d
+	addl	%r14d,%edi
+	xorl	%edi,%r8d
+	roll	$12,%r8d
+	addl	%r11d,%ecx
+	xorl	%ecx,%r13d
+	roll	$8,%r13d
+	addl	%r8d,%edx
+	xorl	%edx,%r14d
+	roll	$8,%r14d
+	addl	%r13d,%esi
+	xorl	%esi,%r11d
+	roll	$7,%r11d
+	addl	%r14d,%edi
+	xorl	%edi,%r8d
+	roll	$7,%r8d
+	decl	%ebp
+	jnz	.Loop
+	movl	%edi,36(%rsp)
+	movl	%esi,32(%rsp)
+	movq	64(%rsp),%rbp
+	movdqa	%xmm2,%xmm1
+	movq	64+8(%rsp),%rsi
+	paddd	%xmm4,%xmm3
+	movq	64+16(%rsp),%rdi
+
+	addl	$0x61707865,%eax
+	addl	$0x3320646e,%ebx
+	addl	$0x79622d32,%ecx
+	addl	$0x6b206574,%edx
+	addl	16(%rsp),%r8d
+	addl	20(%rsp),%r9d
+	addl	24(%rsp),%r10d
+	addl	28(%rsp),%r11d
+	addl	48(%rsp),%r12d
+	addl	52(%rsp),%r13d
+	addl	56(%rsp),%r14d
+	addl	60(%rsp),%r15d
+	paddd	32(%rsp),%xmm1
+
+	cmpq	$64,%rbp
+	jb	.Ltail
+
+	xorl	0(%rsi),%eax
+	xorl	4(%rsi),%ebx
+	xorl	8(%rsi),%ecx
+	xorl	12(%rsi),%edx
+	xorl	16(%rsi),%r8d
+	xorl	20(%rsi),%r9d
+	xorl	24(%rsi),%r10d
+	xorl	28(%rsi),%r11d
+	movdqu	32(%rsi),%xmm0
+	xorl	48(%rsi),%r12d
+	xorl	52(%rsi),%r13d
+	xorl	56(%rsi),%r14d
+	xorl	60(%rsi),%r15d
+	leaq	64(%rsi),%rsi
+	pxor	%xmm1,%xmm0
+
+	movdqa	%xmm2,32(%rsp)
+	movd	%xmm3,48(%rsp)
+
+	movl	%eax,0(%rdi)
+	movl	%ebx,4(%rdi)
+	movl	%ecx,8(%rdi)
+	movl	%edx,12(%rdi)
+	movl	%r8d,16(%rdi)
+	movl	%r9d,20(%rdi)
+	movl	%r10d,24(%rdi)
+	movl	%r11d,28(%rdi)
+	movdqu	%xmm0,32(%rdi)
+	movl	%r12d,48(%rdi)
+	movl	%r13d,52(%rdi)
+	movl	%r14d,56(%rdi)
+	movl	%r15d,60(%rdi)
+	leaq	64(%rdi),%rdi
+
+	subq	$64,%rbp
+	jnz	.Loop_outer
+
+	jmp	.Ldone
+
+.align	16
+.Ltail:
+	movl	%eax,0(%rsp)
+	movl	%ebx,4(%rsp)
+	xorq	%rbx,%rbx
+	movl	%ecx,8(%rsp)
+	movl	%edx,12(%rsp)
+	movl	%r8d,16(%rsp)
+	movl	%r9d,20(%rsp)
+	movl	%r10d,24(%rsp)
+	movl	%r11d,28(%rsp)
+	movdqa	%xmm1,32(%rsp)
+	movl	%r12d,48(%rsp)
+	movl	%r13d,52(%rsp)
+	movl	%r14d,56(%rsp)
+	movl	%r15d,60(%rsp)
+
+.Loop_tail:
+	movzbl	(%rsi,%rbx,1),%eax
+	movzbl	(%rsp,%rbx,1),%edx
+	leaq	1(%rbx),%rbx
+	xorl	%edx,%eax
+	movb	%al,-1(%rdi,%rbx,1)
+	decq	%rbp
+	jnz	.Loop_tail
+
+.Ldone:
+	leaq	64+24+48(%rsp),%rsi
+.cfi_def_cfa	%rsi,8
+	movq	-48(%rsi),%r15
+.cfi_restore	%r15
+	movq	-40(%rsi),%r14
+.cfi_restore	%r14
+	movq	-32(%rsi),%r13
+.cfi_restore	%r13
+	movq	-24(%rsi),%r12
+.cfi_restore	%r12
+	movq	-16(%rsi),%rbp
+.cfi_restore	%rbp
+	movq	-8(%rsi),%rbx
+.cfi_restore	%rbx
+	leaq	(%rsi),%rsp
+.cfi_def_cfa_register	%rsp
+.Lno_data:
+	.byte	0xf3,0xc3
+.cfi_endproc	
+.size	ChaCha20_ctr32,.-ChaCha20_ctr32
+.type	ChaCha20_ssse3,@function
+.align	32
+ChaCha20_ssse3:
+.cfi_startproc	
+.LChaCha20_ssse3:
+	movq	%rsp,%r9
+.cfi_def_cfa_register	%r9
+	testl	$2048,%r10d
+	jnz	.LChaCha20_4xop
+	cmpq	$128,%rdx
+	je	.LChaCha20_128
+	ja	.LChaCha20_4x
+
+.Ldo_sse3_after_all:
+	subq	$64+8,%rsp
+	movdqa	.Lsigma(%rip),%xmm0
+	movdqu	(%rcx),%xmm1
+	movdqu	16(%rcx),%xmm2
+	movdqu	(%r8),%xmm3
+	movdqa	.Lrot16(%rip),%xmm6
+	movdqa	.Lrot24(%rip),%xmm7
+
+	movdqa	%xmm0,0(%rsp)
+	movdqa	%xmm1,16(%rsp)
+	movdqa	%xmm2,32(%rsp)
+	movdqa	%xmm3,48(%rsp)
+	movq	$10,%r8
+	jmp	.Loop_ssse3
+
+.align	32
+.Loop_outer_ssse3:
+	movdqa	.Lone(%rip),%xmm3
+	movdqa	0(%rsp),%xmm0
+	movdqa	16(%rsp),%xmm1
+	movdqa	32(%rsp),%xmm2
+	paddd	48(%rsp),%xmm3
+	movq	$10,%r8
+	movdqa	%xmm3,48(%rsp)
+	jmp	.Loop_ssse3
+
+.align	32
+.Loop_ssse3:
+	paddd	%xmm1,%xmm0
+	pxor	%xmm0,%xmm3
+.byte	102,15,56,0,222
+	paddd	%xmm3,%xmm2
+	pxor	%xmm2,%xmm1
+	movdqa	%xmm1,%xmm4
+	psrld	$20,%xmm1
+	pslld	$12,%xmm4
+	por	%xmm4,%xmm1
+	paddd	%xmm1,%xmm0
+	pxor	%xmm0,%xmm3
+.byte	102,15,56,0,223
+	paddd	%xmm3,%xmm2
+	pxor	%xmm2,%xmm1
+	movdqa	%xmm1,%xmm4
+	psrld	$25,%xmm1
+	pslld	$7,%xmm4
+	por	%xmm4,%xmm1
+	pshufd	$78,%xmm2,%xmm2
+	pshufd	$57,%xmm1,%xmm1
+	pshufd	$147,%xmm3,%xmm3
+	nop
+	paddd	%xmm1,%xmm0
+	pxor	%xmm0,%xmm3
+.byte	102,15,56,0,222
+	paddd	%xmm3,%xmm2
+	pxor	%xmm2,%xmm1
+	movdqa	%xmm1,%xmm4
+	psrld	$20,%xmm1
+	pslld	$12,%xmm4
+	por	%xmm4,%xmm1
+	paddd	%xmm1,%xmm0
+	pxor	%xmm0,%xmm3
+.byte	102,15,56,0,223
+	paddd	%xmm3,%xmm2
+	pxor	%xmm2,%xmm1
+	movdqa	%xmm1,%xmm4
+	psrld	$25,%xmm1
+	pslld	$7,%xmm4
+	por	%xmm4,%xmm1
+	pshufd	$78,%xmm2,%xmm2
+	pshufd	$147,%xmm1,%xmm1
+	pshufd	$57,%xmm3,%xmm3
+	decq	%r8
+	jnz	.Loop_ssse3
+	paddd	0(%rsp),%xmm0
+	paddd	16(%rsp),%xmm1
+	paddd	32(%rsp),%xmm2
+	paddd	48(%rsp),%xmm3
+
+	cmpq	$64,%rdx
+	jb	.Ltail_ssse3
+
+	movdqu	0(%rsi),%xmm4
+	movdqu	16(%rsi),%xmm5
+	pxor	%xmm4,%xmm0
+	movdqu	32(%rsi),%xmm4
+	pxor	%xmm5,%xmm1
+	movdqu	48(%rsi),%xmm5
+	leaq	64(%rsi),%rsi
+	pxor	%xmm4,%xmm2
+	pxor	%xmm5,%xmm3
+
+	movdqu	%xmm0,0(%rdi)
+	movdqu	%xmm1,16(%rdi)
+	movdqu	%xmm2,32(%rdi)
+	movdqu	%xmm3,48(%rdi)
+	leaq	64(%rdi),%rdi
+
+	subq	$64,%rdx
+	jnz	.Loop_outer_ssse3
+
+	jmp	.Ldone_ssse3
+
+.align	16
+.Ltail_ssse3:
+	movdqa	%xmm0,0(%rsp)
+	movdqa	%xmm1,16(%rsp)
+	movdqa	%xmm2,32(%rsp)
+	movdqa	%xmm3,48(%rsp)
+	xorq	%r8,%r8
+
+.Loop_tail_ssse3:
+	movzbl	(%rsi,%r8,1),%eax
+	movzbl	(%rsp,%r8,1),%ecx
+	leaq	1(%r8),%r8
+	xorl	%ecx,%eax
+	movb	%al,-1(%rdi,%r8,1)
+	decq	%rdx
+	jnz	.Loop_tail_ssse3
+
+.Ldone_ssse3:
+	leaq	(%r9),%rsp
+.cfi_def_cfa_register	%rsp
+.Lssse3_epilogue:
+	.byte	0xf3,0xc3
+.cfi_endproc	
+.size	ChaCha20_ssse3,.-ChaCha20_ssse3
+.type	ChaCha20_128,@function
+.align	32
+ChaCha20_128:
+.cfi_startproc	
+.LChaCha20_128:
+	movq	%rsp,%r9
+.cfi_def_cfa_register	%r9
+	subq	$64+8,%rsp
+	movdqa	.Lsigma(%rip),%xmm8
+	movdqu	(%rcx),%xmm9
+	movdqu	16(%rcx),%xmm2
+	movdqu	(%r8),%xmm3
+	movdqa	.Lone(%rip),%xmm1
+	movdqa	.Lrot16(%rip),%xmm6
+	movdqa	.Lrot24(%rip),%xmm7
+
+	movdqa	%xmm8,%xmm10
+	movdqa	%xmm8,0(%rsp)
+	movdqa	%xmm9,%xmm11
+	movdqa	%xmm9,16(%rsp)
+	movdqa	%xmm2,%xmm0
+	movdqa	%xmm2,32(%rsp)
+	paddd	%xmm3,%xmm1
+	movdqa	%xmm3,48(%rsp)
+	movq	$10,%r8
+	jmp	.Loop_128
+
+.align	32
+.Loop_128:
+	paddd	%xmm9,%xmm8
+	pxor	%xmm8,%xmm3
+	paddd	%xmm11,%xmm10
+	pxor	%xmm10,%xmm1
+.byte	102,15,56,0,222
+.byte	102,15,56,0,206
+	paddd	%xmm3,%xmm2
+	paddd	%xmm1,%xmm0
+	pxor	%xmm2,%xmm9
+	pxor	%xmm0,%xmm11
+	movdqa	%xmm9,%xmm4
+	psrld	$20,%xmm9
+	movdqa	%xmm11,%xmm5
+	pslld	$12,%xmm4
+	psrld	$20,%xmm11
+	por	%xmm4,%xmm9
+	pslld	$12,%xmm5
+	por	%xmm5,%xmm11
+	paddd	%xmm9,%xmm8
+	pxor	%xmm8,%xmm3
+	paddd	%xmm11,%xmm10
+	pxor	%xmm10,%xmm1
+.byte	102,15,56,0,223
+.byte	102,15,56,0,207
+	paddd	%xmm3,%xmm2
+	paddd	%xmm1,%xmm0
+	pxor	%xmm2,%xmm9
+	pxor	%xmm0,%xmm11
+	movdqa	%xmm9,%xmm4
+	psrld	$25,%xmm9
+	movdqa	%xmm11,%xmm5
+	pslld	$7,%xmm4
+	psrld	$25,%xmm11
+	por	%xmm4,%xmm9
+	pslld	$7,%xmm5
+	por	%xmm5,%xmm11
+	pshufd	$78,%xmm2,%xmm2
+	pshufd	$57,%xmm9,%xmm9
+	pshufd	$147,%xmm3,%xmm3
+	pshufd	$78,%xmm0,%xmm0
+	pshufd	$57,%xmm11,%xmm11
+	pshufd	$147,%xmm1,%xmm1
+	paddd	%xmm9,%xmm8
+	pxor	%xmm8,%xmm3
+	paddd	%xmm11,%xmm10
+	pxor	%xmm10,%xmm1
+.byte	102,15,56,0,222
+.byte	102,15,56,0,206
+	paddd	%xmm3,%xmm2
+	paddd	%xmm1,%xmm0
+	pxor	%xmm2,%xmm9
+	pxor	%xmm0,%xmm11
+	movdqa	%xmm9,%xmm4
+	psrld	$20,%xmm9
+	movdqa	%xmm11,%xmm5
+	pslld	$12,%xmm4
+	psrld	$20,%xmm11
+	por	%xmm4,%xmm9
+	pslld	$12,%xmm5
+	por	%xmm5,%xmm11
+	paddd	%xmm9,%xmm8
+	pxor	%xmm8,%xmm3
+	paddd	%xmm11,%xmm10
+	pxor	%xmm10,%xmm1
+.byte	102,15,56,0,223
+.byte	102,15,56,0,207
+	paddd	%xmm3,%xmm2
+	paddd	%xmm1,%xmm0
+	pxor	%xmm2,%xmm9
+	pxor	%xmm0,%xmm11
+	movdqa	%xmm9,%xmm4
+	psrld	$25,%xmm9
+	movdqa	%xmm11,%xmm5
+	pslld	$7,%xmm4
+	psrld	$25,%xmm11
+	por	%xmm4,%xmm9
+	pslld	$7,%xmm5
+	por	%xmm5,%xmm11
+	pshufd	$78,%xmm2,%xmm2
+	pshufd	$147,%xmm9,%xmm9
+	pshufd	$57,%xmm3,%xmm3
+	pshufd	$78,%xmm0,%xmm0
+	pshufd	$147,%xmm11,%xmm11
+	pshufd	$57,%xmm1,%xmm1
+	decq	%r8
+	jnz	.Loop_128
+	paddd	0(%rsp),%xmm8
+	paddd	16(%rsp),%xmm9
+	paddd	32(%rsp),%xmm2
+	paddd	48(%rsp),%xmm3
+	paddd	.Lone(%rip),%xmm1
+	paddd	0(%rsp),%xmm10
+	paddd	16(%rsp),%xmm11
+	paddd	32(%rsp),%xmm0
+	paddd	48(%rsp),%xmm1
+
+	movdqu	0(%rsi),%xmm4
+	movdqu	16(%rsi),%xmm5
+	pxor	%xmm4,%xmm8
+	movdqu	32(%rsi),%xmm4
+	pxor	%xmm5,%xmm9
+	movdqu	48(%rsi),%xmm5
+	pxor	%xmm4,%xmm2
+	movdqu	64(%rsi),%xmm4
+	pxor	%xmm5,%xmm3
+	movdqu	80(%rsi),%xmm5
+	pxor	%xmm4,%xmm10
+	movdqu	96(%rsi),%xmm4
+	pxor	%xmm5,%xmm11
+	movdqu	112(%rsi),%xmm5
+	pxor	%xmm4,%xmm0
+	pxor	%xmm5,%xmm1
+
+	movdqu	%xmm8,0(%rdi)
+	movdqu	%xmm9,16(%rdi)
+	movdqu	%xmm2,32(%rdi)
+	movdqu	%xmm3,48(%rdi)
+	movdqu	%xmm10,64(%rdi)
+	movdqu	%xmm11,80(%rdi)
+	movdqu	%xmm0,96(%rdi)
+	movdqu	%xmm1,112(%rdi)
+	leaq	(%r9),%rsp
+.cfi_def_cfa_register	%rsp
+.L128_epilogue:
+	.byte	0xf3,0xc3
+.cfi_endproc	
+.size	ChaCha20_128,.-ChaCha20_128
+.type	ChaCha20_4x,@function
+.align	32
+ChaCha20_4x:
+.cfi_startproc	
+.LChaCha20_4x:
+	movq	%rsp,%r9
+.cfi_def_cfa_register	%r9
+	movq	%r10,%r11
+	shrq	$32,%r10
+	testq	$32,%r10
+	jnz	.LChaCha20_8x
+	cmpq	$192,%rdx
+	ja	.Lproceed4x
+
+	andq	$71303168,%r11
+	cmpq	$4194304,%r11
+	je	.Ldo_sse3_after_all
+
+.Lproceed4x:
+	subq	$0x140+8,%rsp
+	movdqa	.Lsigma(%rip),%xmm11
+	movdqu	(%rcx),%xmm15
+	movdqu	16(%rcx),%xmm7
+	movdqu	(%r8),%xmm3
+	leaq	256(%rsp),%rcx
+	leaq	.Lrot16(%rip),%r10
+	leaq	.Lrot24(%rip),%r11
+
+	pshufd	$0x00,%xmm11,%xmm8
+	pshufd	$0x55,%xmm11,%xmm9
+	movdqa	%xmm8,64(%rsp)
+	pshufd	$0xaa,%xmm11,%xmm10
+	movdqa	%xmm9,80(%rsp)
+	pshufd	$0xff,%xmm11,%xmm11
+	movdqa	%xmm10,96(%rsp)
+	movdqa	%xmm11,112(%rsp)
+
+	pshufd	$0x00,%xmm15,%xmm12
+	pshufd	$0x55,%xmm15,%xmm13
+	movdqa	%xmm12,128-256(%rcx)
+	pshufd	$0xaa,%xmm15,%xmm14
+	movdqa	%xmm13,144-256(%rcx)
+	pshufd	$0xff,%xmm15,%xmm15
+	movdqa	%xmm14,160-256(%rcx)
+	movdqa	%xmm15,176-256(%rcx)
+
+	pshufd	$0x00,%xmm7,%xmm4
+	pshufd	$0x55,%xmm7,%xmm5
+	movdqa	%xmm4,192-256(%rcx)
+	pshufd	$0xaa,%xmm7,%xmm6
+	movdqa	%xmm5,208-256(%rcx)
+	pshufd	$0xff,%xmm7,%xmm7
+	movdqa	%xmm6,224-256(%rcx)
+	movdqa	%xmm7,240-256(%rcx)
+
+	pshufd	$0x00,%xmm3,%xmm0
+	pshufd	$0x55,%xmm3,%xmm1
+	paddd	.Linc(%rip),%xmm0
+	pshufd	$0xaa,%xmm3,%xmm2
+	movdqa	%xmm1,272-256(%rcx)
+	pshufd	$0xff,%xmm3,%xmm3
+	movdqa	%xmm2,288-256(%rcx)
+	movdqa	%xmm3,304-256(%rcx)
+
+	jmp	.Loop_enter4x
+
+.align	32
+.Loop_outer4x:
+	movdqa	64(%rsp),%xmm8
+	movdqa	80(%rsp),%xmm9
+	movdqa	96(%rsp),%xmm10
+	movdqa	112(%rsp),%xmm11
+	movdqa	128-256(%rcx),%xmm12
+	movdqa	144-256(%rcx),%xmm13
+	movdqa	160-256(%rcx),%xmm14
+	movdqa	176-256(%rcx),%xmm15
+	movdqa	192-256(%rcx),%xmm4
+	movdqa	208-256(%rcx),%xmm5
+	movdqa	224-256(%rcx),%xmm6
+	movdqa	240-256(%rcx),%xmm7
+	movdqa	256-256(%rcx),%xmm0
+	movdqa	272-256(%rcx),%xmm1
+	movdqa	288-256(%rcx),%xmm2
+	movdqa	304-256(%rcx),%xmm3
+	paddd	.Lfour(%rip),%xmm0
+
+.Loop_enter4x:
+	movdqa	%xmm6,32(%rsp)
+	movdqa	%xmm7,48(%rsp)
+	movdqa	(%r10),%xmm7
+	movl	$10,%eax
+	movdqa	%xmm0,256-256(%rcx)
+	jmp	.Loop4x
+
+.align	32
+.Loop4x:
+	paddd	%xmm12,%xmm8
+	paddd	%xmm13,%xmm9
+	pxor	%xmm8,%xmm0
+	pxor	%xmm9,%xmm1
+.byte	102,15,56,0,199
+.byte	102,15,56,0,207
+	paddd	%xmm0,%xmm4
+	paddd	%xmm1,%xmm5
+	pxor	%xmm4,%xmm12
+	pxor	%xmm5,%xmm13
+	movdqa	%xmm12,%xmm6
+	pslld	$12,%xmm12
+	psrld	$20,%xmm6
+	movdqa	%xmm13,%xmm7
+	pslld	$12,%xmm13
+	por	%xmm6,%xmm12
+	psrld	$20,%xmm7
+	movdqa	(%r11),%xmm6
+	por	%xmm7,%xmm13
+	paddd	%xmm12,%xmm8
+	paddd	%xmm13,%xmm9
+	pxor	%xmm8,%xmm0
+	pxor	%xmm9,%xmm1
+.byte	102,15,56,0,198
+.byte	102,15,56,0,206
+	paddd	%xmm0,%xmm4
+	paddd	%xmm1,%xmm5
+	pxor	%xmm4,%xmm12
+	pxor	%xmm5,%xmm13
+	movdqa	%xmm12,%xmm7
+	pslld	$7,%xmm12
+	psrld	$25,%xmm7
+	movdqa	%xmm13,%xmm6
+	pslld	$7,%xmm13
+	por	%xmm7,%xmm12
+	psrld	$25,%xmm6
+	movdqa	(%r10),%xmm7
+	por	%xmm6,%xmm13
+	movdqa	%xmm4,0(%rsp)
+	movdqa	%xmm5,16(%rsp)
+	movdqa	32(%rsp),%xmm4
+	movdqa	48(%rsp),%xmm5
+	paddd	%xmm14,%xmm10
+	paddd	%xmm15,%xmm11
+	pxor	%xmm10,%xmm2
+	pxor	%xmm11,%xmm3
+.byte	102,15,56,0,215
+.byte	102,15,56,0,223
+	paddd	%xmm2,%xmm4
+	paddd	%xmm3,%xmm5
+	pxor	%xmm4,%xmm14
+	pxor	%xmm5,%xmm15
+	movdqa	%xmm14,%xmm6
+	pslld	$12,%xmm14
+	psrld	$20,%xmm6
+	movdqa	%xmm15,%xmm7
+	pslld	$12,%xmm15
+	por	%xmm6,%xmm14
+	psrld	$20,%xmm7
+	movdqa	(%r11),%xmm6
+	por	%xmm7,%xmm15
+	paddd	%xmm14,%xmm10
+	paddd	%xmm15,%xmm11
+	pxor	%xmm10,%xmm2
+	pxor	%xmm11,%xmm3
+.byte	102,15,56,0,214
+.byte	102,15,56,0,222
+	paddd	%xmm2,%xmm4
+	paddd	%xmm3,%xmm5
+	pxor	%xmm4,%xmm14
+	pxor	%xmm5,%xmm15
+	movdqa	%xmm14,%xmm7
+	pslld	$7,%xmm14
+	psrld	$25,%xmm7
+	movdqa	%xmm15,%xmm6
+	pslld	$7,%xmm15
+	por	%xmm7,%xmm14
+	psrld	$25,%xmm6
+	movdqa	(%r10),%xmm7
+	por	%xmm6,%xmm15
+	paddd	%xmm13,%xmm8
+	paddd	%xmm14,%xmm9
+	pxor	%xmm8,%xmm3
+	pxor	%xmm9,%xmm0
+.byte	102,15,56,0,223
+.byte	102,15,56,0,199
+	paddd	%xmm3,%xmm4
+	paddd	%xmm0,%xmm5
+	pxor	%xmm4,%xmm13
+	pxor	%xmm5,%xmm14
+	movdqa	%xmm13,%xmm6
+	pslld	$12,%xmm13
+	psrld	$20,%xmm6
+	movdqa	%xmm14,%xmm7
+	pslld	$12,%xmm14
+	por	%xmm6,%xmm13
+	psrld	$20,%xmm7
+	movdqa	(%r11),%xmm6
+	por	%xmm7,%xmm14
+	paddd	%xmm13,%xmm8
+	paddd	%xmm14,%xmm9
+	pxor	%xmm8,%xmm3
+	pxor	%xmm9,%xmm0
+.byte	102,15,56,0,222
+.byte	102,15,56,0,198
+	paddd	%xmm3,%xmm4
+	paddd	%xmm0,%xmm5
+	pxor	%xmm4,%xmm13
+	pxor	%xmm5,%xmm14
+	movdqa	%xmm13,%xmm7
+	pslld	$7,%xmm13
+	psrld	$25,%xmm7
+	movdqa	%xmm14,%xmm6
+	pslld	$7,%xmm14
+	por	%xmm7,%xmm13
+	psrld	$25,%xmm6
+	movdqa	(%r10),%xmm7
+	por	%xmm6,%xmm14
+	movdqa	%xmm4,32(%rsp)
+	movdqa	%xmm5,48(%rsp)
+	movdqa	0(%rsp),%xmm4
+	movdqa	16(%rsp),%xmm5
+	paddd	%xmm15,%xmm10
+	paddd	%xmm12,%xmm11
+	pxor	%xmm10,%xmm1
+	pxor	%xmm11,%xmm2
+.byte	102,15,56,0,207
+.byte	102,15,56,0,215
+	paddd	%xmm1,%xmm4
+	paddd	%xmm2,%xmm5
+	pxor	%xmm4,%xmm15
+	pxor	%xmm5,%xmm12
+	movdqa	%xmm15,%xmm6
+	pslld	$12,%xmm15
+	psrld	$20,%xmm6
+	movdqa	%xmm12,%xmm7
+	pslld	$12,%xmm12
+	por	%xmm6,%xmm15
+	psrld	$20,%xmm7
+	movdqa	(%r11),%xmm6
+	por	%xmm7,%xmm12
+	paddd	%xmm15,%xmm10
+	paddd	%xmm12,%xmm11
+	pxor	%xmm10,%xmm1
+	pxor	%xmm11,%xmm2
+.byte	102,15,56,0,206
+.byte	102,15,56,0,214
+	paddd	%xmm1,%xmm4
+	paddd	%xmm2,%xmm5
+	pxor	%xmm4,%xmm15
+	pxor	%xmm5,%xmm12
+	movdqa	%xmm15,%xmm7
+	pslld	$7,%xmm15
+	psrld	$25,%xmm7
+	movdqa	%xmm12,%xmm6
+	pslld	$7,%xmm12
+	por	%xmm7,%xmm15
+	psrld	$25,%xmm6
+	movdqa	(%r10),%xmm7
+	por	%xmm6,%xmm12
+	decl	%eax
+	jnz	.Loop4x
+
+	paddd	64(%rsp),%xmm8
+	paddd	80(%rsp),%xmm9
+	paddd	96(%rsp),%xmm10
+	paddd	112(%rsp),%xmm11
+
+	movdqa	%xmm8,%xmm6
+	punpckldq	%xmm9,%xmm8
+	movdqa	%xmm10,%xmm7
+	punpckldq	%xmm11,%xmm10
+	punpckhdq	%xmm9,%xmm6
+	punpckhdq	%xmm11,%xmm7
+	movdqa	%xmm8,%xmm9
+	punpcklqdq	%xmm10,%xmm8
+	movdqa	%xmm6,%xmm11
+	punpcklqdq	%xmm7,%xmm6
+	punpckhqdq	%xmm10,%xmm9
+	punpckhqdq	%xmm7,%xmm11
+	paddd	128-256(%rcx),%xmm12
+	paddd	144-256(%rcx),%xmm13
+	paddd	160-256(%rcx),%xmm14
+	paddd	176-256(%rcx),%xmm15
+
+	movdqa	%xmm8,0(%rsp)
+	movdqa	%xmm9,16(%rsp)
+	movdqa	32(%rsp),%xmm8
+	movdqa	48(%rsp),%xmm9
+
+	movdqa	%xmm12,%xmm10
+	punpckldq	%xmm13,%xmm12
+	movdqa	%xmm14,%xmm7
+	punpckldq	%xmm15,%xmm14
+	punpckhdq	%xmm13,%xmm10
+	punpckhdq	%xmm15,%xmm7
+	movdqa	%xmm12,%xmm13
+	punpcklqdq	%xmm14,%xmm12
+	movdqa	%xmm10,%xmm15
+	punpcklqdq	%xmm7,%xmm10
+	punpckhqdq	%xmm14,%xmm13
+	punpckhqdq	%xmm7,%xmm15
+	paddd	192-256(%rcx),%xmm4
+	paddd	208-256(%rcx),%xmm5
+	paddd	224-256(%rcx),%xmm8
+	paddd	240-256(%rcx),%xmm9
+
+	movdqa	%xmm6,32(%rsp)
+	movdqa	%xmm11,48(%rsp)
+
+	movdqa	%xmm4,%xmm14
+	punpckldq	%xmm5,%xmm4
+	movdqa	%xmm8,%xmm7
+	punpckldq	%xmm9,%xmm8
+	punpckhdq	%xmm5,%xmm14
+	punpckhdq	%xmm9,%xmm7
+	movdqa	%xmm4,%xmm5
+	punpcklqdq	%xmm8,%xmm4
+	movdqa	%xmm14,%xmm9
+	punpcklqdq	%xmm7,%xmm14
+	punpckhqdq	%xmm8,%xmm5
+	punpckhqdq	%xmm7,%xmm9
+	paddd	256-256(%rcx),%xmm0
+	paddd	272-256(%rcx),%xmm1
+	paddd	288-256(%rcx),%xmm2
+	paddd	304-256(%rcx),%xmm3
+
+	movdqa	%xmm0,%xmm8
+	punpckldq	%xmm1,%xmm0
+	movdqa	%xmm2,%xmm7
+	punpckldq	%xmm3,%xmm2
+	punpckhdq	%xmm1,%xmm8
+	punpckhdq	%xmm3,%xmm7
+	movdqa	%xmm0,%xmm1
+	punpcklqdq	%xmm2,%xmm0
+	movdqa	%xmm8,%xmm3
+	punpcklqdq	%xmm7,%xmm8
+	punpckhqdq	%xmm2,%xmm1
+	punpckhqdq	%xmm7,%xmm3
+	cmpq	$256,%rdx
+	jb	.Ltail4x
+
+	movdqu	0(%rsi),%xmm6
+	movdqu	16(%rsi),%xmm11
+	movdqu	32(%rsi),%xmm2
+	movdqu	48(%rsi),%xmm7
+	pxor	0(%rsp),%xmm6
+	pxor	%xmm12,%xmm11
+	pxor	%xmm4,%xmm2
+	pxor	%xmm0,%xmm7
+
+	movdqu	%xmm6,0(%rdi)
+	movdqu	64(%rsi),%xmm6
+	movdqu	%xmm11,16(%rdi)
+	movdqu	80(%rsi),%xmm11
+	movdqu	%xmm2,32(%rdi)
+	movdqu	96(%rsi),%xmm2
+	movdqu	%xmm7,48(%rdi)
+	movdqu	112(%rsi),%xmm7
+	leaq	128(%rsi),%rsi
+	pxor	16(%rsp),%xmm6
+	pxor	%xmm13,%xmm11
+	pxor	%xmm5,%xmm2
+	pxor	%xmm1,%xmm7
+
+	movdqu	%xmm6,64(%rdi)
+	movdqu	0(%rsi),%xmm6
+	movdqu	%xmm11,80(%rdi)
+	movdqu	16(%rsi),%xmm11
+	movdqu	%xmm2,96(%rdi)
+	movdqu	32(%rsi),%xmm2
+	movdqu	%xmm7,112(%rdi)
+	leaq	128(%rdi),%rdi
+	movdqu	48(%rsi),%xmm7
+	pxor	32(%rsp),%xmm6
+	pxor	%xmm10,%xmm11
+	pxor	%xmm14,%xmm2
+	pxor	%xmm8,%xmm7
+
+	movdqu	%xmm6,0(%rdi)
+	movdqu	64(%rsi),%xmm6
+	movdqu	%xmm11,16(%rdi)
+	movdqu	80(%rsi),%xmm11
+	movdqu	%xmm2,32(%rdi)
+	movdqu	96(%rsi),%xmm2
+	movdqu	%xmm7,48(%rdi)
+	movdqu	112(%rsi),%xmm7
+	leaq	128(%rsi),%rsi
+	pxor	48(%rsp),%xmm6
+	pxor	%xmm15,%xmm11
+	pxor	%xmm9,%xmm2
+	pxor	%xmm3,%xmm7
+	movdqu	%xmm6,64(%rdi)
+	movdqu	%xmm11,80(%rdi)
+	movdqu	%xmm2,96(%rdi)
+	movdqu	%xmm7,112(%rdi)
+	leaq	128(%rdi),%rdi
+
+	subq	$256,%rdx
+	jnz	.Loop_outer4x
+
+	jmp	.Ldone4x
+
+.Ltail4x:
+	cmpq	$192,%rdx
+	jae	.L192_or_more4x
+	cmpq	$128,%rdx
+	jae	.L128_or_more4x
+	cmpq	$64,%rdx
+	jae	.L64_or_more4x
+
+
+	xorq	%r10,%r10
+
+	movdqa	%xmm12,16(%rsp)
+	movdqa	%xmm4,32(%rsp)
+	movdqa	%xmm0,48(%rsp)
+	jmp	.Loop_tail4x
+
+.align	32
+.L64_or_more4x:
+	movdqu	0(%rsi),%xmm6
+	movdqu	16(%rsi),%xmm11
+	movdqu	32(%rsi),%xmm2
+	movdqu	48(%rsi),%xmm7
+	pxor	0(%rsp),%xmm6
+	pxor	%xmm12,%xmm11
+	pxor	%xmm4,%xmm2
+	pxor	%xmm0,%xmm7
+	movdqu	%xmm6,0(%rdi)
+	movdqu	%xmm11,16(%rdi)
+	movdqu	%xmm2,32(%rdi)
+	movdqu	%xmm7,48(%rdi)
+	je	.Ldone4x
+
+	movdqa	16(%rsp),%xmm6
+	leaq	64(%rsi),%rsi
+	xorq	%r10,%r10
+	movdqa	%xmm6,0(%rsp)
+	movdqa	%xmm13,16(%rsp)
+	leaq	64(%rdi),%rdi
+	movdqa	%xmm5,32(%rsp)
+	subq	$64,%rdx
+	movdqa	%xmm1,48(%rsp)
+	jmp	.Loop_tail4x
+
+.align	32
+.L128_or_more4x:
+	movdqu	0(%rsi),%xmm6
+	movdqu	16(%rsi),%xmm11
+	movdqu	32(%rsi),%xmm2
+	movdqu	48(%rsi),%xmm7
+	pxor	0(%rsp),%xmm6
+	pxor	%xmm12,%xmm11
+	pxor	%xmm4,%xmm2
+	pxor	%xmm0,%xmm7
+
+	movdqu	%xmm6,0(%rdi)
+	movdqu	64(%rsi),%xmm6
+	movdqu	%xmm11,16(%rdi)
+	movdqu	80(%rsi),%xmm11
+	movdqu	%xmm2,32(%rdi)
+	movdqu	96(%rsi),%xmm2
+	movdqu	%xmm7,48(%rdi)
+	movdqu	112(%rsi),%xmm7
+	pxor	16(%rsp),%xmm6
+	pxor	%xmm13,%xmm11
+	pxor	%xmm5,%xmm2
+	pxor	%xmm1,%xmm7
+	movdqu	%xmm6,64(%rdi)
+	movdqu	%xmm11,80(%rdi)
+	movdqu	%xmm2,96(%rdi)
+	movdqu	%xmm7,112(%rdi)
+	je	.Ldone4x
+
+	movdqa	32(%rsp),%xmm6
+	leaq	128(%rsi),%rsi
+	xorq	%r10,%r10
+	movdqa	%xmm6,0(%rsp)
+	movdqa	%xmm10,16(%rsp)
+	leaq	128(%rdi),%rdi
+	movdqa	%xmm14,32(%rsp)
+	subq	$128,%rdx
+	movdqa	%xmm8,48(%rsp)
+	jmp	.Loop_tail4x
+
+.align	32
+.L192_or_more4x:
+	movdqu	0(%rsi),%xmm6
+	movdqu	16(%rsi),%xmm11
+	movdqu	32(%rsi),%xmm2
+	movdqu	48(%rsi),%xmm7
+	pxor	0(%rsp),%xmm6
+	pxor	%xmm12,%xmm11
+	pxor	%xmm4,%xmm2
+	pxor	%xmm0,%xmm7
+
+	movdqu	%xmm6,0(%rdi)
+	movdqu	64(%rsi),%xmm6
+	movdqu	%xmm11,16(%rdi)
+	movdqu	80(%rsi),%xmm11
+	movdqu	%xmm2,32(%rdi)
+	movdqu	96(%rsi),%xmm2
+	movdqu	%xmm7,48(%rdi)
+	movdqu	112(%rsi),%xmm7
+	leaq	128(%rsi),%rsi
+	pxor	16(%rsp),%xmm6
+	pxor	%xmm13,%xmm11
+	pxor	%xmm5,%xmm2
+	pxor	%xmm1,%xmm7
+
+	movdqu	%xmm6,64(%rdi)
+	movdqu	0(%rsi),%xmm6
+	movdqu	%xmm11,80(%rdi)
+	movdqu	16(%rsi),%xmm11
+	movdqu	%xmm2,96(%rdi)
+	movdqu	32(%rsi),%xmm2
+	movdqu	%xmm7,112(%rdi)
+	leaq	128(%rdi),%rdi
+	movdqu	48(%rsi),%xmm7
+	pxor	32(%rsp),%xmm6
+	pxor	%xmm10,%xmm11
+	pxor	%xmm14,%xmm2
+	pxor	%xmm8,%xmm7
+	movdqu	%xmm6,0(%rdi)
+	movdqu	%xmm11,16(%rdi)
+	movdqu	%xmm2,32(%rdi)
+	movdqu	%xmm7,48(%rdi)
+	je	.Ldone4x
+
+	movdqa	48(%rsp),%xmm6
+	leaq	64(%rsi),%rsi
+	xorq	%r10,%r10
+	movdqa	%xmm6,0(%rsp)
+	movdqa	%xmm15,16(%rsp)
+	leaq	64(%rdi),%rdi
+	movdqa	%xmm9,32(%rsp)
+	subq	$192,%rdx
+	movdqa	%xmm3,48(%rsp)
+
+.Loop_tail4x:
+	movzbl	(%rsi,%r10,1),%eax
+	movzbl	(%rsp,%r10,1),%ecx
+	leaq	1(%r10),%r10
+	xorl	%ecx,%eax
+	movb	%al,-1(%rdi,%r10,1)
+	decq	%rdx
+	jnz	.Loop_tail4x
+
+.Ldone4x:
+	leaq	(%r9),%rsp
+.cfi_def_cfa_register	%rsp
+.L4x_epilogue:
+	.byte	0xf3,0xc3
+.cfi_endproc	
+.size	ChaCha20_4x,.-ChaCha20_4x
+.type	ChaCha20_4xop,@function
+.align	32
+ChaCha20_4xop:
+.cfi_startproc	
+.LChaCha20_4xop:
+	movq	%rsp,%r9
+.cfi_def_cfa_register	%r9
+	subq	$0x140+8,%rsp
+	vzeroupper
+
+	vmovdqa	.Lsigma(%rip),%xmm11
+	vmovdqu	(%rcx),%xmm3
+	vmovdqu	16(%rcx),%xmm15
+	vmovdqu	(%r8),%xmm7
+	leaq	256(%rsp),%rcx
+
+	vpshufd	$0x00,%xmm11,%xmm8
+	vpshufd	$0x55,%xmm11,%xmm9
+	vmovdqa	%xmm8,64(%rsp)
+	vpshufd	$0xaa,%xmm11,%xmm10
+	vmovdqa	%xmm9,80(%rsp)
+	vpshufd	$0xff,%xmm11,%xmm11
+	vmovdqa	%xmm10,96(%rsp)
+	vmovdqa	%xmm11,112(%rsp)
+
+	vpshufd	$0x00,%xmm3,%xmm0
+	vpshufd	$0x55,%xmm3,%xmm1
+	vmovdqa	%xmm0,128-256(%rcx)
+	vpshufd	$0xaa,%xmm3,%xmm2
+	vmovdqa	%xmm1,144-256(%rcx)
+	vpshufd	$0xff,%xmm3,%xmm3
+	vmovdqa	%xmm2,160-256(%rcx)
+	vmovdqa	%xmm3,176-256(%rcx)
+
+	vpshufd	$0x00,%xmm15,%xmm12
+	vpshufd	$0x55,%xmm15,%xmm13
+	vmovdqa	%xmm12,192-256(%rcx)
+	vpshufd	$0xaa,%xmm15,%xmm14
+	vmovdqa	%xmm13,208-256(%rcx)
+	vpshufd	$0xff,%xmm15,%xmm15
+	vmovdqa	%xmm14,224-256(%rcx)
+	vmovdqa	%xmm15,240-256(%rcx)
+
+	vpshufd	$0x00,%xmm7,%xmm4
+	vpshufd	$0x55,%xmm7,%xmm5
+	vpaddd	.Linc(%rip),%xmm4,%xmm4
+	vpshufd	$0xaa,%xmm7,%xmm6
+	vmovdqa	%xmm5,272-256(%rcx)
+	vpshufd	$0xff,%xmm7,%xmm7
+	vmovdqa	%xmm6,288-256(%rcx)
+	vmovdqa	%xmm7,304-256(%rcx)
+
+	jmp	.Loop_enter4xop
+
+.align	32
+.Loop_outer4xop:
+	vmovdqa	64(%rsp),%xmm8
+	vmovdqa	80(%rsp),%xmm9
+	vmovdqa	96(%rsp),%xmm10
+	vmovdqa	112(%rsp),%xmm11
+	vmovdqa	128-256(%rcx),%xmm0
+	vmovdqa	144-256(%rcx),%xmm1
+	vmovdqa	160-256(%rcx),%xmm2
+	vmovdqa	176-256(%rcx),%xmm3
+	vmovdqa	192-256(%rcx),%xmm12
+	vmovdqa	208-256(%rcx),%xmm13
+	vmovdqa	224-256(%rcx),%xmm14
+	vmovdqa	240-256(%rcx),%xmm15
+	vmovdqa	256-256(%rcx),%xmm4
+	vmovdqa	272-256(%rcx),%xmm5
+	vmovdqa	288-256(%rcx),%xmm6
+	vmovdqa	304-256(%rcx),%xmm7
+	vpaddd	.Lfour(%rip),%xmm4,%xmm4
+
+.Loop_enter4xop:
+	movl	$10,%eax
+	vmovdqa	%xmm4,256-256(%rcx)
+	jmp	.Loop4xop
+
+.align	32
+.Loop4xop:
+	vpaddd	%xmm0,%xmm8,%xmm8
+	vpaddd	%xmm1,%xmm9,%xmm9
+	vpaddd	%xmm2,%xmm10,%xmm10
+	vpaddd	%xmm3,%xmm11,%xmm11
+	vpxor	%xmm4,%xmm8,%xmm4
+	vpxor	%xmm5,%xmm9,%xmm5
+	vpxor	%xmm6,%xmm10,%xmm6
+	vpxor	%xmm7,%xmm11,%xmm7
+.byte	143,232,120,194,228,16
+.byte	143,232,120,194,237,16
+.byte	143,232,120,194,246,16
+.byte	143,232,120,194,255,16
+	vpaddd	%xmm4,%xmm12,%xmm12
+	vpaddd	%xmm5,%xmm13,%xmm13
+	vpaddd	%xmm6,%xmm14,%xmm14
+	vpaddd	%xmm7,%xmm15,%xmm15
+	vpxor	%xmm0,%xmm12,%xmm0
+	vpxor	%xmm1,%xmm13,%xmm1
+	vpxor	%xmm14,%xmm2,%xmm2
+	vpxor	%xmm15,%xmm3,%xmm3
+.byte	143,232,120,194,192,12
+.byte	143,232,120,194,201,12
+.byte	143,232,120,194,210,12
+.byte	143,232,120,194,219,12
+	vpaddd	%xmm8,%xmm0,%xmm8
+	vpaddd	%xmm9,%xmm1,%xmm9
+	vpaddd	%xmm2,%xmm10,%xmm10
+	vpaddd	%xmm3,%xmm11,%xmm11
+	vpxor	%xmm4,%xmm8,%xmm4
+	vpxor	%xmm5,%xmm9,%xmm5
+	vpxor	%xmm6,%xmm10,%xmm6
+	vpxor	%xmm7,%xmm11,%xmm7
+.byte	143,232,120,194,228,8
+.byte	143,232,120,194,237,8
+.byte	143,232,120,194,246,8
+.byte	143,232,120,194,255,8
+	vpaddd	%xmm4,%xmm12,%xmm12
+	vpaddd	%xmm5,%xmm13,%xmm13
+	vpaddd	%xmm6,%xmm14,%xmm14
+	vpaddd	%xmm7,%xmm15,%xmm15
+	vpxor	%xmm0,%xmm12,%xmm0
+	vpxor	%xmm1,%xmm13,%xmm1
+	vpxor	%xmm14,%xmm2,%xmm2
+	vpxor	%xmm15,%xmm3,%xmm3
+.byte	143,232,120,194,192,7
+.byte	143,232,120,194,201,7
+.byte	143,232,120,194,210,7
+.byte	143,232,120,194,219,7
+	vpaddd	%xmm1,%xmm8,%xmm8
+	vpaddd	%xmm2,%xmm9,%xmm9
+	vpaddd	%xmm3,%xmm10,%xmm10
+	vpaddd	%xmm0,%xmm11,%xmm11
+	vpxor	%xmm7,%xmm8,%xmm7
+	vpxor	%xmm4,%xmm9,%xmm4
+	vpxor	%xmm5,%xmm10,%xmm5
+	vpxor	%xmm6,%xmm11,%xmm6
+.byte	143,232,120,194,255,16
+.byte	143,232,120,194,228,16
+.byte	143,232,120,194,237,16
+.byte	143,232,120,194,246,16
+	vpaddd	%xmm7,%xmm14,%xmm14
+	vpaddd	%xmm4,%xmm15,%xmm15
+	vpaddd	%xmm5,%xmm12,%xmm12
+	vpaddd	%xmm6,%xmm13,%xmm13
+	vpxor	%xmm1,%xmm14,%xmm1
+	vpxor	%xmm2,%xmm15,%xmm2
+	vpxor	%xmm12,%xmm3,%xmm3
+	vpxor	%xmm13,%xmm0,%xmm0
+.byte	143,232,120,194,201,12
+.byte	143,232,120,194,210,12
+.byte	143,232,120,194,219,12
+.byte	143,232,120,194,192,12
+	vpaddd	%xmm8,%xmm1,%xmm8
+	vpaddd	%xmm9,%xmm2,%xmm9
+	vpaddd	%xmm3,%xmm10,%xmm10
+	vpaddd	%xmm0,%xmm11,%xmm11
+	vpxor	%xmm7,%xmm8,%xmm7
+	vpxor	%xmm4,%xmm9,%xmm4
+	vpxor	%xmm5,%xmm10,%xmm5
+	vpxor	%xmm6,%xmm11,%xmm6
+.byte	143,232,120,194,255,8
+.byte	143,232,120,194,228,8
+.byte	143,232,120,194,237,8
+.byte	143,232,120,194,246,8
+	vpaddd	%xmm7,%xmm14,%xmm14
+	vpaddd	%xmm4,%xmm15,%xmm15
+	vpaddd	%xmm5,%xmm12,%xmm12
+	vpaddd	%xmm6,%xmm13,%xmm13
+	vpxor	%xmm1,%xmm14,%xmm1
+	vpxor	%xmm2,%xmm15,%xmm2
+	vpxor	%xmm12,%xmm3,%xmm3
+	vpxor	%xmm13,%xmm0,%xmm0
+.byte	143,232,120,194,201,7
+.byte	143,232,120,194,210,7
+.byte	143,232,120,194,219,7
+.byte	143,232,120,194,192,7
+	decl	%eax
+	jnz	.Loop4xop
+
+	vpaddd	64(%rsp),%xmm8,%xmm8
+	vpaddd	80(%rsp),%xmm9,%xmm9
+	vpaddd	96(%rsp),%xmm10,%xmm10
+	vpaddd	112(%rsp),%xmm11,%xmm11
+
+	vmovdqa	%xmm14,32(%rsp)
+	vmovdqa	%xmm15,48(%rsp)
+
+	vpunpckldq	%xmm9,%xmm8,%xmm14
+	vpunpckldq	%xmm11,%xmm10,%xmm15
+	vpunpckhdq	%xmm9,%xmm8,%xmm8
+	vpunpckhdq	%xmm11,%xmm10,%xmm10
+	vpunpcklqdq	%xmm15,%xmm14,%xmm9
+	vpunpckhqdq	%xmm15,%xmm14,%xmm14
+	vpunpcklqdq	%xmm10,%xmm8,%xmm11
+	vpunpckhqdq	%xmm10,%xmm8,%xmm8
+	vpaddd	128-256(%rcx),%xmm0,%xmm0
+	vpaddd	144-256(%rcx),%xmm1,%xmm1
+	vpaddd	160-256(%rcx),%xmm2,%xmm2
+	vpaddd	176-256(%rcx),%xmm3,%xmm3
+
+	vmovdqa	%xmm9,0(%rsp)
+	vmovdqa	%xmm14,16(%rsp)
+	vmovdqa	32(%rsp),%xmm9
+	vmovdqa	48(%rsp),%xmm14
+
+	vpunpckldq	%xmm1,%xmm0,%xmm10
+	vpunpckldq	%xmm3,%xmm2,%xmm15
+	vpunpckhdq	%xmm1,%xmm0,%xmm0
+	vpunpckhdq	%xmm3,%xmm2,%xmm2
+	vpunpcklqdq	%xmm15,%xmm10,%xmm1
+	vpunpckhqdq	%xmm15,%xmm10,%xmm10
+	vpunpcklqdq	%xmm2,%xmm0,%xmm3
+	vpunpckhqdq	%xmm2,%xmm0,%xmm0
+	vpaddd	192-256(%rcx),%xmm12,%xmm12
+	vpaddd	208-256(%rcx),%xmm13,%xmm13
+	vpaddd	224-256(%rcx),%xmm9,%xmm9
+	vpaddd	240-256(%rcx),%xmm14,%xmm14
+
+	vpunpckldq	%xmm13,%xmm12,%xmm2
+	vpunpckldq	%xmm14,%xmm9,%xmm15
+	vpunpckhdq	%xmm13,%xmm12,%xmm12
+	vpunpckhdq	%xmm14,%xmm9,%xmm9
+	vpunpcklqdq	%xmm15,%xmm2,%xmm13
+	vpunpckhqdq	%xmm15,%xmm2,%xmm2
+	vpunpcklqdq	%xmm9,%xmm12,%xmm14
+	vpunpckhqdq	%xmm9,%xmm12,%xmm12
+	vpaddd	256-256(%rcx),%xmm4,%xmm4
+	vpaddd	272-256(%rcx),%xmm5,%xmm5
+	vpaddd	288-256(%rcx),%xmm6,%xmm6
+	vpaddd	304-256(%rcx),%xmm7,%xmm7
+
+	vpunpckldq	%xmm5,%xmm4,%xmm9
+	vpunpckldq	%xmm7,%xmm6,%xmm15
+	vpunpckhdq	%xmm5,%xmm4,%xmm4
+	vpunpckhdq	%xmm7,%xmm6,%xmm6
+	vpunpcklqdq	%xmm15,%xmm9,%xmm5
+	vpunpckhqdq	%xmm15,%xmm9,%xmm9
+	vpunpcklqdq	%xmm6,%xmm4,%xmm7
+	vpunpckhqdq	%xmm6,%xmm4,%xmm4
+	vmovdqa	0(%rsp),%xmm6
+	vmovdqa	16(%rsp),%xmm15
+
+	cmpq	$256,%rdx
+	jb	.Ltail4xop
+
+	vpxor	0(%rsi),%xmm6,%xmm6
+	vpxor	16(%rsi),%xmm1,%xmm1
+	vpxor	32(%rsi),%xmm13,%xmm13
+	vpxor	48(%rsi),%xmm5,%xmm5
+	vpxor	64(%rsi),%xmm15,%xmm15
+	vpxor	80(%rsi),%xmm10,%xmm10
+	vpxor	96(%rsi),%xmm2,%xmm2
+	vpxor	112(%rsi),%xmm9,%xmm9
+	leaq	128(%rsi),%rsi
+	vpxor	0(%rsi),%xmm11,%xmm11
+	vpxor	16(%rsi),%xmm3,%xmm3
+	vpxor	32(%rsi),%xmm14,%xmm14
+	vpxor	48(%rsi),%xmm7,%xmm7
+	vpxor	64(%rsi),%xmm8,%xmm8
+	vpxor	80(%rsi),%xmm0,%xmm0
+	vpxor	96(%rsi),%xmm12,%xmm12
+	vpxor	112(%rsi),%xmm4,%xmm4
+	leaq	128(%rsi),%rsi
+
+	vmovdqu	%xmm6,0(%rdi)
+	vmovdqu	%xmm1,16(%rdi)
+	vmovdqu	%xmm13,32(%rdi)
+	vmovdqu	%xmm5,48(%rdi)
+	vmovdqu	%xmm15,64(%rdi)
+	vmovdqu	%xmm10,80(%rdi)
+	vmovdqu	%xmm2,96(%rdi)
+	vmovdqu	%xmm9,112(%rdi)
+	leaq	128(%rdi),%rdi
+	vmovdqu	%xmm11,0(%rdi)
+	vmovdqu	%xmm3,16(%rdi)
+	vmovdqu	%xmm14,32(%rdi)
+	vmovdqu	%xmm7,48(%rdi)
+	vmovdqu	%xmm8,64(%rdi)
+	vmovdqu	%xmm0,80(%rdi)
+	vmovdqu	%xmm12,96(%rdi)
+	vmovdqu	%xmm4,112(%rdi)
+	leaq	128(%rdi),%rdi
+
+	subq	$256,%rdx
+	jnz	.Loop_outer4xop
+
+	jmp	.Ldone4xop
+
+.align	32
+.Ltail4xop:
+	cmpq	$192,%rdx
+	jae	.L192_or_more4xop
+	cmpq	$128,%rdx
+	jae	.L128_or_more4xop
+	cmpq	$64,%rdx
+	jae	.L64_or_more4xop
+
+	xorq	%r10,%r10
+	vmovdqa	%xmm6,0(%rsp)
+	vmovdqa	%xmm1,16(%rsp)
+	vmovdqa	%xmm13,32(%rsp)
+	vmovdqa	%xmm5,48(%rsp)
+	jmp	.Loop_tail4xop
+
+.align	32
+.L64_or_more4xop:
+	vpxor	0(%rsi),%xmm6,%xmm6
+	vpxor	16(%rsi),%xmm1,%xmm1
+	vpxor	32(%rsi),%xmm13,%xmm13
+	vpxor	48(%rsi),%xmm5,%xmm5
+	vmovdqu	%xmm6,0(%rdi)
+	vmovdqu	%xmm1,16(%rdi)
+	vmovdqu	%xmm13,32(%rdi)
+	vmovdqu	%xmm5,48(%rdi)
+	je	.Ldone4xop
+
+	leaq	64(%rsi),%rsi
+	vmovdqa	%xmm15,0(%rsp)
+	xorq	%r10,%r10
+	vmovdqa	%xmm10,16(%rsp)
+	leaq	64(%rdi),%rdi
+	vmovdqa	%xmm2,32(%rsp)
+	subq	$64,%rdx
+	vmovdqa	%xmm9,48(%rsp)
+	jmp	.Loop_tail4xop
+
+.align	32
+.L128_or_more4xop:
+	vpxor	0(%rsi),%xmm6,%xmm6
+	vpxor	16(%rsi),%xmm1,%xmm1
+	vpxor	32(%rsi),%xmm13,%xmm13
+	vpxor	48(%rsi),%xmm5,%xmm5
+	vpxor	64(%rsi),%xmm15,%xmm15
+	vpxor	80(%rsi),%xmm10,%xmm10
+	vpxor	96(%rsi),%xmm2,%xmm2
+	vpxor	112(%rsi),%xmm9,%xmm9
+
+	vmovdqu	%xmm6,0(%rdi)
+	vmovdqu	%xmm1,16(%rdi)
+	vmovdqu	%xmm13,32(%rdi)
+	vmovdqu	%xmm5,48(%rdi)
+	vmovdqu	%xmm15,64(%rdi)
+	vmovdqu	%xmm10,80(%rdi)
+	vmovdqu	%xmm2,96(%rdi)
+	vmovdqu	%xmm9,112(%rdi)
+	je	.Ldone4xop
+
+	leaq	128(%rsi),%rsi
+	vmovdqa	%xmm11,0(%rsp)
+	xorq	%r10,%r10
+	vmovdqa	%xmm3,16(%rsp)
+	leaq	128(%rdi),%rdi
+	vmovdqa	%xmm14,32(%rsp)
+	subq	$128,%rdx
+	vmovdqa	%xmm7,48(%rsp)
+	jmp	.Loop_tail4xop
+
+.align	32
+.L192_or_more4xop:
+	vpxor	0(%rsi),%xmm6,%xmm6
+	vpxor	16(%rsi),%xmm1,%xmm1
+	vpxor	32(%rsi),%xmm13,%xmm13
+	vpxor	48(%rsi),%xmm5,%xmm5
+	vpxor	64(%rsi),%xmm15,%xmm15
+	vpxor	80(%rsi),%xmm10,%xmm10
+	vpxor	96(%rsi),%xmm2,%xmm2
+	vpxor	112(%rsi),%xmm9,%xmm9
+	leaq	128(%rsi),%rsi
+	vpxor	0(%rsi),%xmm11,%xmm11
+	vpxor	16(%rsi),%xmm3,%xmm3
+	vpxor	32(%rsi),%xmm14,%xmm14
+	vpxor	48(%rsi),%xmm7,%xmm7
+
+	vmovdqu	%xmm6,0(%rdi)
+	vmovdqu	%xmm1,16(%rdi)
+	vmovdqu	%xmm13,32(%rdi)
+	vmovdqu	%xmm5,48(%rdi)
+	vmovdqu	%xmm15,64(%rdi)
+	vmovdqu	%xmm10,80(%rdi)
+	vmovdqu	%xmm2,96(%rdi)
+	vmovdqu	%xmm9,112(%rdi)
+	leaq	128(%rdi),%rdi
+	vmovdqu	%xmm11,0(%rdi)
+	vmovdqu	%xmm3,16(%rdi)
+	vmovdqu	%xmm14,32(%rdi)
+	vmovdqu	%xmm7,48(%rdi)
+	je	.Ldone4xop
+
+	leaq	64(%rsi),%rsi
+	vmovdqa	%xmm8,0(%rsp)
+	xorq	%r10,%r10
+	vmovdqa	%xmm0,16(%rsp)
+	leaq	64(%rdi),%rdi
+	vmovdqa	%xmm12,32(%rsp)
+	subq	$192,%rdx
+	vmovdqa	%xmm4,48(%rsp)
+
+.Loop_tail4xop:
+	movzbl	(%rsi,%r10,1),%eax
+	movzbl	(%rsp,%r10,1),%ecx
+	leaq	1(%r10),%r10
+	xorl	%ecx,%eax
+	movb	%al,-1(%rdi,%r10,1)
+	decq	%rdx
+	jnz	.Loop_tail4xop
+
+.Ldone4xop:
+	vzeroupper
+	leaq	(%r9),%rsp
+.cfi_def_cfa_register	%rsp
+.L4xop_epilogue:
+	.byte	0xf3,0xc3
+.cfi_endproc	
+.size	ChaCha20_4xop,.-ChaCha20_4xop
+.type	ChaCha20_8x,@function
+.align	32
+ChaCha20_8x:
+.cfi_startproc	
+.LChaCha20_8x:
+	movq	%rsp,%r9
+.cfi_def_cfa_register	%r9
+	subq	$0x280+8,%rsp
+	andq	$-32,%rsp
+	vzeroupper
+
+
+
+
+
+
+
+
+
+
+	vbroadcasti128	.Lsigma(%rip),%ymm11
+	vbroadcasti128	(%rcx),%ymm3
+	vbroadcasti128	16(%rcx),%ymm15
+	vbroadcasti128	(%r8),%ymm7
+	leaq	256(%rsp),%rcx
+	leaq	512(%rsp),%rax
+	leaq	.Lrot16(%rip),%r10
+	leaq	.Lrot24(%rip),%r11
+
+	vpshufd	$0x00,%ymm11,%ymm8
+	vpshufd	$0x55,%ymm11,%ymm9
+	vmovdqa	%ymm8,128-256(%rcx)
+	vpshufd	$0xaa,%ymm11,%ymm10
+	vmovdqa	%ymm9,160-256(%rcx)
+	vpshufd	$0xff,%ymm11,%ymm11
+	vmovdqa	%ymm10,192-256(%rcx)
+	vmovdqa	%ymm11,224-256(%rcx)
+
+	vpshufd	$0x00,%ymm3,%ymm0
+	vpshufd	$0x55,%ymm3,%ymm1
+	vmovdqa	%ymm0,256-256(%rcx)
+	vpshufd	$0xaa,%ymm3,%ymm2
+	vmovdqa	%ymm1,288-256(%rcx)
+	vpshufd	$0xff,%ymm3,%ymm3
+	vmovdqa	%ymm2,320-256(%rcx)
+	vmovdqa	%ymm3,352-256(%rcx)
+
+	vpshufd	$0x00,%ymm15,%ymm12
+	vpshufd	$0x55,%ymm15,%ymm13
+	vmovdqa	%ymm12,384-512(%rax)
+	vpshufd	$0xaa,%ymm15,%ymm14
+	vmovdqa	%ymm13,416-512(%rax)
+	vpshufd	$0xff,%ymm15,%ymm15
+	vmovdqa	%ymm14,448-512(%rax)
+	vmovdqa	%ymm15,480-512(%rax)
+
+	vpshufd	$0x00,%ymm7,%ymm4
+	vpshufd	$0x55,%ymm7,%ymm5
+	vpaddd	.Lincy(%rip),%ymm4,%ymm4
+	vpshufd	$0xaa,%ymm7,%ymm6
+	vmovdqa	%ymm5,544-512(%rax)
+	vpshufd	$0xff,%ymm7,%ymm7
+	vmovdqa	%ymm6,576-512(%rax)
+	vmovdqa	%ymm7,608-512(%rax)
+
+	jmp	.Loop_enter8x
+
+.align	32
+.Loop_outer8x:
+	vmovdqa	128-256(%rcx),%ymm8
+	vmovdqa	160-256(%rcx),%ymm9
+	vmovdqa	192-256(%rcx),%ymm10
+	vmovdqa	224-256(%rcx),%ymm11
+	vmovdqa	256-256(%rcx),%ymm0
+	vmovdqa	288-256(%rcx),%ymm1
+	vmovdqa	320-256(%rcx),%ymm2
+	vmovdqa	352-256(%rcx),%ymm3
+	vmovdqa	384-512(%rax),%ymm12
+	vmovdqa	416-512(%rax),%ymm13
+	vmovdqa	448-512(%rax),%ymm14
+	vmovdqa	480-512(%rax),%ymm15
+	vmovdqa	512-512(%rax),%ymm4
+	vmovdqa	544-512(%rax),%ymm5
+	vmovdqa	576-512(%rax),%ymm6
+	vmovdqa	608-512(%rax),%ymm7
+	vpaddd	.Leight(%rip),%ymm4,%ymm4
+
+.Loop_enter8x:
+	vmovdqa	%ymm14,64(%rsp)
+	vmovdqa	%ymm15,96(%rsp)
+	vbroadcasti128	(%r10),%ymm15
+	vmovdqa	%ymm4,512-512(%rax)
+	movl	$10,%eax
+	jmp	.Loop8x
+
+.align	32
+.Loop8x:
+	vpaddd	%ymm0,%ymm8,%ymm8
+	vpxor	%ymm4,%ymm8,%ymm4
+	vpshufb	%ymm15,%ymm4,%ymm4
+	vpaddd	%ymm1,%ymm9,%ymm9
+	vpxor	%ymm5,%ymm9,%ymm5
+	vpshufb	%ymm15,%ymm5,%ymm5
+	vpaddd	%ymm4,%ymm12,%ymm12
+	vpxor	%ymm0,%ymm12,%ymm0
+	vpslld	$12,%ymm0,%ymm14
+	vpsrld	$20,%ymm0,%ymm0
+	vpor	%ymm0,%ymm14,%ymm0
+	vbroadcasti128	(%r11),%ymm14
+	vpaddd	%ymm5,%ymm13,%ymm13
+	vpxor	%ymm1,%ymm13,%ymm1
+	vpslld	$12,%ymm1,%ymm15
+	vpsrld	$20,%ymm1,%ymm1
+	vpor	%ymm1,%ymm15,%ymm1
+	vpaddd	%ymm0,%ymm8,%ymm8
+	vpxor	%ymm4,%ymm8,%ymm4
+	vpshufb	%ymm14,%ymm4,%ymm4
+	vpaddd	%ymm1,%ymm9,%ymm9
+	vpxor	%ymm5,%ymm9,%ymm5
+	vpshufb	%ymm14,%ymm5,%ymm5
+	vpaddd	%ymm4,%ymm12,%ymm12
+	vpxor	%ymm0,%ymm12,%ymm0
+	vpslld	$7,%ymm0,%ymm15
+	vpsrld	$25,%ymm0,%ymm0
+	vpor	%ymm0,%ymm15,%ymm0
+	vbroadcasti128	(%r10),%ymm15
+	vpaddd	%ymm5,%ymm13,%ymm13
+	vpxor	%ymm1,%ymm13,%ymm1
+	vpslld	$7,%ymm1,%ymm14
+	vpsrld	$25,%ymm1,%ymm1
+	vpor	%ymm1,%ymm14,%ymm1
+	vmovdqa	%ymm12,0(%rsp)
+	vmovdqa	%ymm13,32(%rsp)
+	vmovdqa	64(%rsp),%ymm12
+	vmovdqa	96(%rsp),%ymm13
+	vpaddd	%ymm2,%ymm10,%ymm10
+	vpxor	%ymm6,%ymm10,%ymm6
+	vpshufb	%ymm15,%ymm6,%ymm6
+	vpaddd	%ymm3,%ymm11,%ymm11
+	vpxor	%ymm7,%ymm11,%ymm7
+	vpshufb	%ymm15,%ymm7,%ymm7
+	vpaddd	%ymm6,%ymm12,%ymm12
+	vpxor	%ymm2,%ymm12,%ymm2
+	vpslld	$12,%ymm2,%ymm14
+	vpsrld	$20,%ymm2,%ymm2
+	vpor	%ymm2,%ymm14,%ymm2
+	vbroadcasti128	(%r11),%ymm14
+	vpaddd	%ymm7,%ymm13,%ymm13
+	vpxor	%ymm3,%ymm13,%ymm3
+	vpslld	$12,%ymm3,%ymm15
+	vpsrld	$20,%ymm3,%ymm3
+	vpor	%ymm3,%ymm15,%ymm3
+	vpaddd	%ymm2,%ymm10,%ymm10
+	vpxor	%ymm6,%ymm10,%ymm6
+	vpshufb	%ymm14,%ymm6,%ymm6
+	vpaddd	%ymm3,%ymm11,%ymm11
+	vpxor	%ymm7,%ymm11,%ymm7
+	vpshufb	%ymm14,%ymm7,%ymm7
+	vpaddd	%ymm6,%ymm12,%ymm12
+	vpxor	%ymm2,%ymm12,%ymm2
+	vpslld	$7,%ymm2,%ymm15
+	vpsrld	$25,%ymm2,%ymm2
+	vpor	%ymm2,%ymm15,%ymm2
+	vbroadcasti128	(%r10),%ymm15
+	vpaddd	%ymm7,%ymm13,%ymm13
+	vpxor	%ymm3,%ymm13,%ymm3
+	vpslld	$7,%ymm3,%ymm14
+	vpsrld	$25,%ymm3,%ymm3
+	vpor	%ymm3,%ymm14,%ymm3
+	vpaddd	%ymm1,%ymm8,%ymm8
+	vpxor	%ymm7,%ymm8,%ymm7
+	vpshufb	%ymm15,%ymm7,%ymm7
+	vpaddd	%ymm2,%ymm9,%ymm9
+	vpxor	%ymm4,%ymm9,%ymm4
+	vpshufb	%ymm15,%ymm4,%ymm4
+	vpaddd	%ymm7,%ymm12,%ymm12
+	vpxor	%ymm1,%ymm12,%ymm1
+	vpslld	$12,%ymm1,%ymm14
+	vpsrld	$20,%ymm1,%ymm1
+	vpor	%ymm1,%ymm14,%ymm1
+	vbroadcasti128	(%r11),%ymm14
+	vpaddd	%ymm4,%ymm13,%ymm13
+	vpxor	%ymm2,%ymm13,%ymm2
+	vpslld	$12,%ymm2,%ymm15
+	vpsrld	$20,%ymm2,%ymm2
+	vpor	%ymm2,%ymm15,%ymm2
+	vpaddd	%ymm1,%ymm8,%ymm8
+	vpxor	%ymm7,%ymm8,%ymm7
+	vpshufb	%ymm14,%ymm7,%ymm7
+	vpaddd	%ymm2,%ymm9,%ymm9
+	vpxor	%ymm4,%ymm9,%ymm4
+	vpshufb	%ymm14,%ymm4,%ymm4
+	vpaddd	%ymm7,%ymm12,%ymm12
+	vpxor	%ymm1,%ymm12,%ymm1
+	vpslld	$7,%ymm1,%ymm15
+	vpsrld	$25,%ymm1,%ymm1
+	vpor	%ymm1,%ymm15,%ymm1
+	vbroadcasti128	(%r10),%ymm15
+	vpaddd	%ymm4,%ymm13,%ymm13
+	vpxor	%ymm2,%ymm13,%ymm2
+	vpslld	$7,%ymm2,%ymm14
+	vpsrld	$25,%ymm2,%ymm2
+	vpor	%ymm2,%ymm14,%ymm2
+	vmovdqa	%ymm12,64(%rsp)
+	vmovdqa	%ymm13,96(%rsp)
+	vmovdqa	0(%rsp),%ymm12
+	vmovdqa	32(%rsp),%ymm13
+	vpaddd	%ymm3,%ymm10,%ymm10
+	vpxor	%ymm5,%ymm10,%ymm5
+	vpshufb	%ymm15,%ymm5,%ymm5
+	vpaddd	%ymm0,%ymm11,%ymm11
+	vpxor	%ymm6,%ymm11,%ymm6
+	vpshufb	%ymm15,%ymm6,%ymm6
+	vpaddd	%ymm5,%ymm12,%ymm12
+	vpxor	%ymm3,%ymm12,%ymm3
+	vpslld	$12,%ymm3,%ymm14
+	vpsrld	$20,%ymm3,%ymm3
+	vpor	%ymm3,%ymm14,%ymm3
+	vbroadcasti128	(%r11),%ymm14
+	vpaddd	%ymm6,%ymm13,%ymm13
+	vpxor	%ymm0,%ymm13,%ymm0
+	vpslld	$12,%ymm0,%ymm15
+	vpsrld	$20,%ymm0,%ymm0
+	vpor	%ymm0,%ymm15,%ymm0
+	vpaddd	%ymm3,%ymm10,%ymm10
+	vpxor	%ymm5,%ymm10,%ymm5
+	vpshufb	%ymm14,%ymm5,%ymm5
+	vpaddd	%ymm0,%ymm11,%ymm11
+	vpxor	%ymm6,%ymm11,%ymm6
+	vpshufb	%ymm14,%ymm6,%ymm6
+	vpaddd	%ymm5,%ymm12,%ymm12
+	vpxor	%ymm3,%ymm12,%ymm3
+	vpslld	$7,%ymm3,%ymm15
+	vpsrld	$25,%ymm3,%ymm3
+	vpor	%ymm3,%ymm15,%ymm3
+	vbroadcasti128	(%r10),%ymm15
+	vpaddd	%ymm6,%ymm13,%ymm13
+	vpxor	%ymm0,%ymm13,%ymm0
+	vpslld	$7,%ymm0,%ymm14
+	vpsrld	$25,%ymm0,%ymm0
+	vpor	%ymm0,%ymm14,%ymm0
+	decl	%eax
+	jnz	.Loop8x
+
+	leaq	512(%rsp),%rax
+	vpaddd	128-256(%rcx),%ymm8,%ymm8
+	vpaddd	160-256(%rcx),%ymm9,%ymm9
+	vpaddd	192-256(%rcx),%ymm10,%ymm10
+	vpaddd	224-256(%rcx),%ymm11,%ymm11
+
+	vpunpckldq	%ymm9,%ymm8,%ymm14
+	vpunpckldq	%ymm11,%ymm10,%ymm15
+	vpunpckhdq	%ymm9,%ymm8,%ymm8
+	vpunpckhdq	%ymm11,%ymm10,%ymm10
+	vpunpcklqdq	%ymm15,%ymm14,%ymm9
+	vpunpckhqdq	%ymm15,%ymm14,%ymm14
+	vpunpcklqdq	%ymm10,%ymm8,%ymm11
+	vpunpckhqdq	%ymm10,%ymm8,%ymm8
+	vpaddd	256-256(%rcx),%ymm0,%ymm0
+	vpaddd	288-256(%rcx),%ymm1,%ymm1
+	vpaddd	320-256(%rcx),%ymm2,%ymm2
+	vpaddd	352-256(%rcx),%ymm3,%ymm3
+
+	vpunpckldq	%ymm1,%ymm0,%ymm10
+	vpunpckldq	%ymm3,%ymm2,%ymm15
+	vpunpckhdq	%ymm1,%ymm0,%ymm0
+	vpunpckhdq	%ymm3,%ymm2,%ymm2
+	vpunpcklqdq	%ymm15,%ymm10,%ymm1
+	vpunpckhqdq	%ymm15,%ymm10,%ymm10
+	vpunpcklqdq	%ymm2,%ymm0,%ymm3
+	vpunpckhqdq	%ymm2,%ymm0,%ymm0
+	vperm2i128	$0x20,%ymm1,%ymm9,%ymm15
+	vperm2i128	$0x31,%ymm1,%ymm9,%ymm1
+	vperm2i128	$0x20,%ymm10,%ymm14,%ymm9
+	vperm2i128	$0x31,%ymm10,%ymm14,%ymm10
+	vperm2i128	$0x20,%ymm3,%ymm11,%ymm14
+	vperm2i128	$0x31,%ymm3,%ymm11,%ymm3
+	vperm2i128	$0x20,%ymm0,%ymm8,%ymm11
+	vperm2i128	$0x31,%ymm0,%ymm8,%ymm0
+	vmovdqa	%ymm15,0(%rsp)
+	vmovdqa	%ymm9,32(%rsp)
+	vmovdqa	64(%rsp),%ymm15
+	vmovdqa	96(%rsp),%ymm9
+
+	vpaddd	384-512(%rax),%ymm12,%ymm12
+	vpaddd	416-512(%rax),%ymm13,%ymm13
+	vpaddd	448-512(%rax),%ymm15,%ymm15
+	vpaddd	480-512(%rax),%ymm9,%ymm9
+
+	vpunpckldq	%ymm13,%ymm12,%ymm2
+	vpunpckldq	%ymm9,%ymm15,%ymm8
+	vpunpckhdq	%ymm13,%ymm12,%ymm12
+	vpunpckhdq	%ymm9,%ymm15,%ymm15
+	vpunpcklqdq	%ymm8,%ymm2,%ymm13
+	vpunpckhqdq	%ymm8,%ymm2,%ymm2
+	vpunpcklqdq	%ymm15,%ymm12,%ymm9
+	vpunpckhqdq	%ymm15,%ymm12,%ymm12
+	vpaddd	512-512(%rax),%ymm4,%ymm4
+	vpaddd	544-512(%rax),%ymm5,%ymm5
+	vpaddd	576-512(%rax),%ymm6,%ymm6
+	vpaddd	608-512(%rax),%ymm7,%ymm7
+
+	vpunpckldq	%ymm5,%ymm4,%ymm15
+	vpunpckldq	%ymm7,%ymm6,%ymm8
+	vpunpckhdq	%ymm5,%ymm4,%ymm4
+	vpunpckhdq	%ymm7,%ymm6,%ymm6
+	vpunpcklqdq	%ymm8,%ymm15,%ymm5
+	vpunpckhqdq	%ymm8,%ymm15,%ymm15
+	vpunpcklqdq	%ymm6,%ymm4,%ymm7
+	vpunpckhqdq	%ymm6,%ymm4,%ymm4
+	vperm2i128	$0x20,%ymm5,%ymm13,%ymm8
+	vperm2i128	$0x31,%ymm5,%ymm13,%ymm5
+	vperm2i128	$0x20,%ymm15,%ymm2,%ymm13
+	vperm2i128	$0x31,%ymm15,%ymm2,%ymm15
+	vperm2i128	$0x20,%ymm7,%ymm9,%ymm2
+	vperm2i128	$0x31,%ymm7,%ymm9,%ymm7
+	vperm2i128	$0x20,%ymm4,%ymm12,%ymm9
+	vperm2i128	$0x31,%ymm4,%ymm12,%ymm4
+	vmovdqa	0(%rsp),%ymm6
+	vmovdqa	32(%rsp),%ymm12
+
+	cmpq	$512,%rdx
+	jb	.Ltail8x
+
+	vpxor	0(%rsi),%ymm6,%ymm6
+	vpxor	32(%rsi),%ymm8,%ymm8
+	vpxor	64(%rsi),%ymm1,%ymm1
+	vpxor	96(%rsi),%ymm5,%ymm5
+	leaq	128(%rsi),%rsi
+	vmovdqu	%ymm6,0(%rdi)
+	vmovdqu	%ymm8,32(%rdi)
+	vmovdqu	%ymm1,64(%rdi)
+	vmovdqu	%ymm5,96(%rdi)
+	leaq	128(%rdi),%rdi
+
+	vpxor	0(%rsi),%ymm12,%ymm12
+	vpxor	32(%rsi),%ymm13,%ymm13
+	vpxor	64(%rsi),%ymm10,%ymm10
+	vpxor	96(%rsi),%ymm15,%ymm15
+	leaq	128(%rsi),%rsi
+	vmovdqu	%ymm12,0(%rdi)
+	vmovdqu	%ymm13,32(%rdi)
+	vmovdqu	%ymm10,64(%rdi)
+	vmovdqu	%ymm15,96(%rdi)
+	leaq	128(%rdi),%rdi
+
+	vpxor	0(%rsi),%ymm14,%ymm14
+	vpxor	32(%rsi),%ymm2,%ymm2
+	vpxor	64(%rsi),%ymm3,%ymm3
+	vpxor	96(%rsi),%ymm7,%ymm7
+	leaq	128(%rsi),%rsi
+	vmovdqu	%ymm14,0(%rdi)
+	vmovdqu	%ymm2,32(%rdi)
+	vmovdqu	%ymm3,64(%rdi)
+	vmovdqu	%ymm7,96(%rdi)
+	leaq	128(%rdi),%rdi
+
+	vpxor	0(%rsi),%ymm11,%ymm11
+	vpxor	32(%rsi),%ymm9,%ymm9
+	vpxor	64(%rsi),%ymm0,%ymm0
+	vpxor	96(%rsi),%ymm4,%ymm4
+	leaq	128(%rsi),%rsi
+	vmovdqu	%ymm11,0(%rdi)
+	vmovdqu	%ymm9,32(%rdi)
+	vmovdqu	%ymm0,64(%rdi)
+	vmovdqu	%ymm4,96(%rdi)
+	leaq	128(%rdi),%rdi
+
+	subq	$512,%rdx
+	jnz	.Loop_outer8x
+
+	jmp	.Ldone8x
+
+.Ltail8x:
+	cmpq	$448,%rdx
+	jae	.L448_or_more8x
+	cmpq	$384,%rdx
+	jae	.L384_or_more8x
+	cmpq	$320,%rdx
+	jae	.L320_or_more8x
+	cmpq	$256,%rdx
+	jae	.L256_or_more8x
+	cmpq	$192,%rdx
+	jae	.L192_or_more8x
+	cmpq	$128,%rdx
+	jae	.L128_or_more8x
+	cmpq	$64,%rdx
+	jae	.L64_or_more8x
+
+	xorq	%r10,%r10
+	vmovdqa	%ymm6,0(%rsp)
+	vmovdqa	%ymm8,32(%rsp)
+	jmp	.Loop_tail8x
+
+.align	32
+.L64_or_more8x:
+	vpxor	0(%rsi),%ymm6,%ymm6
+	vpxor	32(%rsi),%ymm8,%ymm8
+	vmovdqu	%ymm6,0(%rdi)
+	vmovdqu	%ymm8,32(%rdi)
+	je	.Ldone8x
+
+	leaq	64(%rsi),%rsi
+	xorq	%r10,%r10
+	vmovdqa	%ymm1,0(%rsp)
+	leaq	64(%rdi),%rdi
+	subq	$64,%rdx
+	vmovdqa	%ymm5,32(%rsp)
+	jmp	.Loop_tail8x
+
+.align	32
+.L128_or_more8x:
+	vpxor	0(%rsi),%ymm6,%ymm6
+	vpxor	32(%rsi),%ymm8,%ymm8
+	vpxor	64(%rsi),%ymm1,%ymm1
+	vpxor	96(%rsi),%ymm5,%ymm5
+	vmovdqu	%ymm6,0(%rdi)
+	vmovdqu	%ymm8,32(%rdi)
+	vmovdqu	%ymm1,64(%rdi)
+	vmovdqu	%ymm5,96(%rdi)
+	je	.Ldone8x
+
+	leaq	128(%rsi),%rsi
+	xorq	%r10,%r10
+	vmovdqa	%ymm12,0(%rsp)
+	leaq	128(%rdi),%rdi
+	subq	$128,%rdx
+	vmovdqa	%ymm13,32(%rsp)
+	jmp	.Loop_tail8x
+
+.align	32
+.L192_or_more8x:
+	vpxor	0(%rsi),%ymm6,%ymm6
+	vpxor	32(%rsi),%ymm8,%ymm8
+	vpxor	64(%rsi),%ymm1,%ymm1
+	vpxor	96(%rsi),%ymm5,%ymm5
+	vpxor	128(%rsi),%ymm12,%ymm12
+	vpxor	160(%rsi),%ymm13,%ymm13
+	vmovdqu	%ymm6,0(%rdi)
+	vmovdqu	%ymm8,32(%rdi)
+	vmovdqu	%ymm1,64(%rdi)
+	vmovdqu	%ymm5,96(%rdi)
+	vmovdqu	%ymm12,128(%rdi)
+	vmovdqu	%ymm13,160(%rdi)
+	je	.Ldone8x
+
+	leaq	192(%rsi),%rsi
+	xorq	%r10,%r10
+	vmovdqa	%ymm10,0(%rsp)
+	leaq	192(%rdi),%rdi
+	subq	$192,%rdx
+	vmovdqa	%ymm15,32(%rsp)
+	jmp	.Loop_tail8x
+
+.align	32
+.L256_or_more8x:
+	vpxor	0(%rsi),%ymm6,%ymm6
+	vpxor	32(%rsi),%ymm8,%ymm8
+	vpxor	64(%rsi),%ymm1,%ymm1
+	vpxor	96(%rsi),%ymm5,%ymm5
+	vpxor	128(%rsi),%ymm12,%ymm12
+	vpxor	160(%rsi),%ymm13,%ymm13
+	vpxor	192(%rsi),%ymm10,%ymm10
+	vpxor	224(%rsi),%ymm15,%ymm15
+	vmovdqu	%ymm6,0(%rdi)
+	vmovdqu	%ymm8,32(%rdi)
+	vmovdqu	%ymm1,64(%rdi)
+	vmovdqu	%ymm5,96(%rdi)
+	vmovdqu	%ymm12,128(%rdi)
+	vmovdqu	%ymm13,160(%rdi)
+	vmovdqu	%ymm10,192(%rdi)
+	vmovdqu	%ymm15,224(%rdi)
+	je	.Ldone8x
+
+	leaq	256(%rsi),%rsi
+	xorq	%r10,%r10
+	vmovdqa	%ymm14,0(%rsp)
+	leaq	256(%rdi),%rdi
+	subq	$256,%rdx
+	vmovdqa	%ymm2,32(%rsp)
+	jmp	.Loop_tail8x
+
+.align	32
+.L320_or_more8x:
+	vpxor	0(%rsi),%ymm6,%ymm6
+	vpxor	32(%rsi),%ymm8,%ymm8
+	vpxor	64(%rsi),%ymm1,%ymm1
+	vpxor	96(%rsi),%ymm5,%ymm5
+	vpxor	128(%rsi),%ymm12,%ymm12
+	vpxor	160(%rsi),%ymm13,%ymm13
+	vpxor	192(%rsi),%ymm10,%ymm10
+	vpxor	224(%rsi),%ymm15,%ymm15
+	vpxor	256(%rsi),%ymm14,%ymm14
+	vpxor	288(%rsi),%ymm2,%ymm2
+	vmovdqu	%ymm6,0(%rdi)
+	vmovdqu	%ymm8,32(%rdi)
+	vmovdqu	%ymm1,64(%rdi)
+	vmovdqu	%ymm5,96(%rdi)
+	vmovdqu	%ymm12,128(%rdi)
+	vmovdqu	%ymm13,160(%rdi)
+	vmovdqu	%ymm10,192(%rdi)
+	vmovdqu	%ymm15,224(%rdi)
+	vmovdqu	%ymm14,256(%rdi)
+	vmovdqu	%ymm2,288(%rdi)
+	je	.Ldone8x
+
+	leaq	320(%rsi),%rsi
+	xorq	%r10,%r10
+	vmovdqa	%ymm3,0(%rsp)
+	leaq	320(%rdi),%rdi
+	subq	$320,%rdx
+	vmovdqa	%ymm7,32(%rsp)
+	jmp	.Loop_tail8x
+
+.align	32
+.L384_or_more8x:
+	vpxor	0(%rsi),%ymm6,%ymm6
+	vpxor	32(%rsi),%ymm8,%ymm8
+	vpxor	64(%rsi),%ymm1,%ymm1
+	vpxor	96(%rsi),%ymm5,%ymm5
+	vpxor	128(%rsi),%ymm12,%ymm12
+	vpxor	160(%rsi),%ymm13,%ymm13
+	vpxor	192(%rsi),%ymm10,%ymm10
+	vpxor	224(%rsi),%ymm15,%ymm15
+	vpxor	256(%rsi),%ymm14,%ymm14
+	vpxor	288(%rsi),%ymm2,%ymm2
+	vpxor	320(%rsi),%ymm3,%ymm3
+	vpxor	352(%rsi),%ymm7,%ymm7
+	vmovdqu	%ymm6,0(%rdi)
+	vmovdqu	%ymm8,32(%rdi)
+	vmovdqu	%ymm1,64(%rdi)
+	vmovdqu	%ymm5,96(%rdi)
+	vmovdqu	%ymm12,128(%rdi)
+	vmovdqu	%ymm13,160(%rdi)
+	vmovdqu	%ymm10,192(%rdi)
+	vmovdqu	%ymm15,224(%rdi)
+	vmovdqu	%ymm14,256(%rdi)
+	vmovdqu	%ymm2,288(%rdi)
+	vmovdqu	%ymm3,320(%rdi)
+	vmovdqu	%ymm7,352(%rdi)
+	je	.Ldone8x
+
+	leaq	384(%rsi),%rsi
+	xorq	%r10,%r10
+	vmovdqa	%ymm11,0(%rsp)
+	leaq	384(%rdi),%rdi
+	subq	$384,%rdx
+	vmovdqa	%ymm9,32(%rsp)
+	jmp	.Loop_tail8x
+
+.align	32
+.L448_or_more8x:
+	vpxor	0(%rsi),%ymm6,%ymm6
+	vpxor	32(%rsi),%ymm8,%ymm8
+	vpxor	64(%rsi),%ymm1,%ymm1
+	vpxor	96(%rsi),%ymm5,%ymm5
+	vpxor	128(%rsi),%ymm12,%ymm12
+	vpxor	160(%rsi),%ymm13,%ymm13
+	vpxor	192(%rsi),%ymm10,%ymm10
+	vpxor	224(%rsi),%ymm15,%ymm15
+	vpxor	256(%rsi),%ymm14,%ymm14
+	vpxor	288(%rsi),%ymm2,%ymm2
+	vpxor	320(%rsi),%ymm3,%ymm3
+	vpxor	352(%rsi),%ymm7,%ymm7
+	vpxor	384(%rsi),%ymm11,%ymm11
+	vpxor	416(%rsi),%ymm9,%ymm9
+	vmovdqu	%ymm6,0(%rdi)
+	vmovdqu	%ymm8,32(%rdi)
+	vmovdqu	%ymm1,64(%rdi)
+	vmovdqu	%ymm5,96(%rdi)
+	vmovdqu	%ymm12,128(%rdi)
+	vmovdqu	%ymm13,160(%rdi)
+	vmovdqu	%ymm10,192(%rdi)
+	vmovdqu	%ymm15,224(%rdi)
+	vmovdqu	%ymm14,256(%rdi)
+	vmovdqu	%ymm2,288(%rdi)
+	vmovdqu	%ymm3,320(%rdi)
+	vmovdqu	%ymm7,352(%rdi)
+	vmovdqu	%ymm11,384(%rdi)
+	vmovdqu	%ymm9,416(%rdi)
+	je	.Ldone8x
+
+	leaq	448(%rsi),%rsi
+	xorq	%r10,%r10
+	vmovdqa	%ymm0,0(%rsp)
+	leaq	448(%rdi),%rdi
+	subq	$448,%rdx
+	vmovdqa	%ymm4,32(%rsp)
+
+.Loop_tail8x:
+	movzbl	(%rsi,%r10,1),%eax
+	movzbl	(%rsp,%r10,1),%ecx
+	leaq	1(%r10),%r10
+	xorl	%ecx,%eax
+	movb	%al,-1(%rdi,%r10,1)
+	decq	%rdx
+	jnz	.Loop_tail8x
+
+.Ldone8x:
+	vzeroall
+	leaq	(%r9),%rsp
+.cfi_def_cfa_register	%rsp
+.L8x_epilogue:
+	.byte	0xf3,0xc3
+.cfi_endproc	
+.size	ChaCha20_8x,.-ChaCha20_8x
+.type	ChaCha20_avx512,@function
+.align	32
+ChaCha20_avx512:
+.cfi_startproc	
+.LChaCha20_avx512:
+	movq	%rsp,%r9
+.cfi_def_cfa_register	%r9
+	cmpq	$512,%rdx
+	ja	.LChaCha20_16x
+
+	subq	$64+8,%rsp
+	vbroadcasti32x4	.Lsigma(%rip),%zmm0
+	vbroadcasti32x4	(%rcx),%zmm1
+	vbroadcasti32x4	16(%rcx),%zmm2
+	vbroadcasti32x4	(%r8),%zmm3
+
+	vmovdqa32	%zmm0,%zmm16
+	vmovdqa32	%zmm1,%zmm17
+	vmovdqa32	%zmm2,%zmm18
+	vpaddd	.Lzeroz(%rip),%zmm3,%zmm3
+	vmovdqa32	.Lfourz(%rip),%zmm20
+	movq	$10,%r8
+	vmovdqa32	%zmm3,%zmm19
+	jmp	.Loop_avx512
+
+.align	16
+.Loop_outer_avx512:
+	vmovdqa32	%zmm16,%zmm0
+	vmovdqa32	%zmm17,%zmm1
+	vmovdqa32	%zmm18,%zmm2
+	vpaddd	%zmm20,%zmm19,%zmm3
+	movq	$10,%r8
+	vmovdqa32	%zmm3,%zmm19
+	jmp	.Loop_avx512
+
+.align	32
+.Loop_avx512:
+	vpaddd	%zmm1,%zmm0,%zmm0
+	vpxord	%zmm0,%zmm3,%zmm3
+	vprold	$16,%zmm3,%zmm3
+	vpaddd	%zmm3,%zmm2,%zmm2
+	vpxord	%zmm2,%zmm1,%zmm1
+	vprold	$12,%zmm1,%zmm1
+	vpaddd	%zmm1,%zmm0,%zmm0
+	vpxord	%zmm0,%zmm3,%zmm3
+	vprold	$8,%zmm3,%zmm3
+	vpaddd	%zmm3,%zmm2,%zmm2
+	vpxord	%zmm2,%zmm1,%zmm1
+	vprold	$7,%zmm1,%zmm1
+	vpshufd	$78,%zmm2,%zmm2
+	vpshufd	$57,%zmm1,%zmm1
+	vpshufd	$147,%zmm3,%zmm3
+	vpaddd	%zmm1,%zmm0,%zmm0
+	vpxord	%zmm0,%zmm3,%zmm3
+	vprold	$16,%zmm3,%zmm3
+	vpaddd	%zmm3,%zmm2,%zmm2
+	vpxord	%zmm2,%zmm1,%zmm1
+	vprold	$12,%zmm1,%zmm1
+	vpaddd	%zmm1,%zmm0,%zmm0
+	vpxord	%zmm0,%zmm3,%zmm3
+	vprold	$8,%zmm3,%zmm3
+	vpaddd	%zmm3,%zmm2,%zmm2
+	vpxord	%zmm2,%zmm1,%zmm1
+	vprold	$7,%zmm1,%zmm1
+	vpshufd	$78,%zmm2,%zmm2
+	vpshufd	$147,%zmm1,%zmm1
+	vpshufd	$57,%zmm3,%zmm3
+	decq	%r8
+	jnz	.Loop_avx512
+	vpaddd	%zmm16,%zmm0,%zmm0
+	vpaddd	%zmm17,%zmm1,%zmm1
+	vpaddd	%zmm18,%zmm2,%zmm2
+	vpaddd	%zmm19,%zmm3,%zmm3
+
+	subq	$64,%rdx
+	jb	.Ltail64_avx512
+
+	vpxor	0(%rsi),%xmm0,%xmm4
+	vpxor	16(%rsi),%xmm1,%xmm5
+	vpxor	32(%rsi),%xmm2,%xmm6
+	vpxor	48(%rsi),%xmm3,%xmm7
+	leaq	64(%rsi),%rsi
+
+	vmovdqu	%xmm4,0(%rdi)
+	vmovdqu	%xmm5,16(%rdi)
+	vmovdqu	%xmm6,32(%rdi)
+	vmovdqu	%xmm7,48(%rdi)
+	leaq	64(%rdi),%rdi
+
+	jz	.Ldone_avx512
+
+	vextracti32x4	$1,%zmm0,%xmm4
+	vextracti32x4	$1,%zmm1,%xmm5
+	vextracti32x4	$1,%zmm2,%xmm6
+	vextracti32x4	$1,%zmm3,%xmm7
+
+	subq	$64,%rdx
+	jb	.Ltail_avx512
+
+	vpxor	0(%rsi),%xmm4,%xmm4
+	vpxor	16(%rsi),%xmm5,%xmm5
+	vpxor	32(%rsi),%xmm6,%xmm6
+	vpxor	48(%rsi),%xmm7,%xmm7
+	leaq	64(%rsi),%rsi
+
+	vmovdqu	%xmm4,0(%rdi)
+	vmovdqu	%xmm5,16(%rdi)
+	vmovdqu	%xmm6,32(%rdi)
+	vmovdqu	%xmm7,48(%rdi)
+	leaq	64(%rdi),%rdi
+
+	jz	.Ldone_avx512
+
+	vextracti32x4	$2,%zmm0,%xmm4
+	vextracti32x4	$2,%zmm1,%xmm5
+	vextracti32x4	$2,%zmm2,%xmm6
+	vextracti32x4	$2,%zmm3,%xmm7
+
+	subq	$64,%rdx
+	jb	.Ltail_avx512
+
+	vpxor	0(%rsi),%xmm4,%xmm4
+	vpxor	16(%rsi),%xmm5,%xmm5
+	vpxor	32(%rsi),%xmm6,%xmm6
+	vpxor	48(%rsi),%xmm7,%xmm7
+	leaq	64(%rsi),%rsi
+
+	vmovdqu	%xmm4,0(%rdi)
+	vmovdqu	%xmm5,16(%rdi)
+	vmovdqu	%xmm6,32(%rdi)
+	vmovdqu	%xmm7,48(%rdi)
+	leaq	64(%rdi),%rdi
+
+	jz	.Ldone_avx512
+
+	vextracti32x4	$3,%zmm0,%xmm4
+	vextracti32x4	$3,%zmm1,%xmm5
+	vextracti32x4	$3,%zmm2,%xmm6
+	vextracti32x4	$3,%zmm3,%xmm7
+
+	subq	$64,%rdx
+	jb	.Ltail_avx512
+
+	vpxor	0(%rsi),%xmm4,%xmm4
+	vpxor	16(%rsi),%xmm5,%xmm5
+	vpxor	32(%rsi),%xmm6,%xmm6
+	vpxor	48(%rsi),%xmm7,%xmm7
+	leaq	64(%rsi),%rsi
+
+	vmovdqu	%xmm4,0(%rdi)
+	vmovdqu	%xmm5,16(%rdi)
+	vmovdqu	%xmm6,32(%rdi)
+	vmovdqu	%xmm7,48(%rdi)
+	leaq	64(%rdi),%rdi
+
+	jnz	.Loop_outer_avx512
+
+	jmp	.Ldone_avx512
+
+.align	16
+.Ltail64_avx512:
+	vmovdqa	%xmm0,0(%rsp)
+	vmovdqa	%xmm1,16(%rsp)
+	vmovdqa	%xmm2,32(%rsp)
+	vmovdqa	%xmm3,48(%rsp)
+	addq	$64,%rdx
+	jmp	.Loop_tail_avx512
+
+.align	16
+.Ltail_avx512:
+	vmovdqa	%xmm4,0(%rsp)
+	vmovdqa	%xmm5,16(%rsp)
+	vmovdqa	%xmm6,32(%rsp)
+	vmovdqa	%xmm7,48(%rsp)
+	addq	$64,%rdx
+
+.Loop_tail_avx512:
+	movzbl	(%rsi,%r8,1),%eax
+	movzbl	(%rsp,%r8,1),%ecx
+	leaq	1(%r8),%r8
+	xorl	%ecx,%eax
+	movb	%al,-1(%rdi,%r8,1)
+	decq	%rdx
+	jnz	.Loop_tail_avx512
+
+	vmovdqu32	%zmm16,0(%rsp)
+
+.Ldone_avx512:
+	vzeroall
+	leaq	(%r9),%rsp
+.cfi_def_cfa_register	%rsp
+.Lavx512_epilogue:
+	.byte	0xf3,0xc3
+.cfi_endproc	
+.size	ChaCha20_avx512,.-ChaCha20_avx512
+.type	ChaCha20_avx512vl,@function
+.align	32
+ChaCha20_avx512vl:
+.cfi_startproc	
+.LChaCha20_avx512vl:
+	movq	%rsp,%r9
+.cfi_def_cfa_register	%r9
+	cmpq	$128,%rdx
+	ja	.LChaCha20_8xvl
+
+	subq	$64+8,%rsp
+	vbroadcasti128	.Lsigma(%rip),%ymm0
+	vbroadcasti128	(%rcx),%ymm1
+	vbroadcasti128	16(%rcx),%ymm2
+	vbroadcasti128	(%r8),%ymm3
+
+	vmovdqa32	%ymm0,%ymm16
+	vmovdqa32	%ymm1,%ymm17
+	vmovdqa32	%ymm2,%ymm18
+	vpaddd	.Lzeroz(%rip),%ymm3,%ymm3
+	vmovdqa32	.Ltwoy(%rip),%ymm20
+	movq	$10,%r8
+	vmovdqa32	%ymm3,%ymm19
+	jmp	.Loop_avx512vl
+
+.align	16
+.Loop_outer_avx512vl:
+	vmovdqa32	%ymm18,%ymm2
+	vpaddd	%ymm20,%ymm19,%ymm3
+	movq	$10,%r8
+	vmovdqa32	%ymm3,%ymm19
+	jmp	.Loop_avx512vl
+
+.align	32
+.Loop_avx512vl:
+	vpaddd	%ymm1,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm3,%ymm3
+	vprold	$16,%ymm3,%ymm3
+	vpaddd	%ymm3,%ymm2,%ymm2
+	vpxor	%ymm2,%ymm1,%ymm1
+	vprold	$12,%ymm1,%ymm1
+	vpaddd	%ymm1,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm3,%ymm3
+	vprold	$8,%ymm3,%ymm3
+	vpaddd	%ymm3,%ymm2,%ymm2
+	vpxor	%ymm2,%ymm1,%ymm1
+	vprold	$7,%ymm1,%ymm1
+	vpshufd	$78,%ymm2,%ymm2
+	vpshufd	$57,%ymm1,%ymm1
+	vpshufd	$147,%ymm3,%ymm3
+	vpaddd	%ymm1,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm3,%ymm3
+	vprold	$16,%ymm3,%ymm3
+	vpaddd	%ymm3,%ymm2,%ymm2
+	vpxor	%ymm2,%ymm1,%ymm1
+	vprold	$12,%ymm1,%ymm1
+	vpaddd	%ymm1,%ymm0,%ymm0
+	vpxor	%ymm0,%ymm3,%ymm3
+	vprold	$8,%ymm3,%ymm3
+	vpaddd	%ymm3,%ymm2,%ymm2
+	vpxor	%ymm2,%ymm1,%ymm1
+	vprold	$7,%ymm1,%ymm1
+	vpshufd	$78,%ymm2,%ymm2
+	vpshufd	$147,%ymm1,%ymm1
+	vpshufd	$57,%ymm3,%ymm3
+	decq	%r8
+	jnz	.Loop_avx512vl
+	vpaddd	%ymm16,%ymm0,%ymm0
+	vpaddd	%ymm17,%ymm1,%ymm1
+	vpaddd	%ymm18,%ymm2,%ymm2
+	vpaddd	%ymm19,%ymm3,%ymm3
+
+	subq	$64,%rdx
+	jb	.Ltail64_avx512vl
+
+	vpxor	0(%rsi),%xmm0,%xmm4
+	vpxor	16(%rsi),%xmm1,%xmm5
+	vpxor	32(%rsi),%xmm2,%xmm6
+	vpxor	48(%rsi),%xmm3,%xmm7
+	leaq	64(%rsi),%rsi
+
+	vmovdqu	%xmm4,0(%rdi)
+	vmovdqu	%xmm5,16(%rdi)
+	vmovdqu	%xmm6,32(%rdi)
+	vmovdqu	%xmm7,48(%rdi)
+	leaq	64(%rdi),%rdi
+
+	jz	.Ldone_avx512vl
+
+	vextracti128	$1,%ymm0,%xmm4
+	vextracti128	$1,%ymm1,%xmm5
+	vextracti128	$1,%ymm2,%xmm6
+	vextracti128	$1,%ymm3,%xmm7
+
+	subq	$64,%rdx
+	jb	.Ltail_avx512vl
+
+	vpxor	0(%rsi),%xmm4,%xmm4
+	vpxor	16(%rsi),%xmm5,%xmm5
+	vpxor	32(%rsi),%xmm6,%xmm6
+	vpxor	48(%rsi),%xmm7,%xmm7
+	leaq	64(%rsi),%rsi
+
+	vmovdqu	%xmm4,0(%rdi)
+	vmovdqu	%xmm5,16(%rdi)
+	vmovdqu	%xmm6,32(%rdi)
+	vmovdqu	%xmm7,48(%rdi)
+	leaq	64(%rdi),%rdi
+
+	vmovdqa32	%ymm16,%ymm0
+	vmovdqa32	%ymm17,%ymm1
+	jnz	.Loop_outer_avx512vl
+
+	jmp	.Ldone_avx512vl
+
+.align	16
+.Ltail64_avx512vl:
+	vmovdqa	%xmm0,0(%rsp)
+	vmovdqa	%xmm1,16(%rsp)
+	vmovdqa	%xmm2,32(%rsp)
+	vmovdqa	%xmm3,48(%rsp)
+	addq	$64,%rdx
+	jmp	.Loop_tail_avx512vl
+
+.align	16
+.Ltail_avx512vl:
+	vmovdqa	%xmm4,0(%rsp)
+	vmovdqa	%xmm5,16(%rsp)
+	vmovdqa	%xmm6,32(%rsp)
+	vmovdqa	%xmm7,48(%rsp)
+	addq	$64,%rdx
+
+.Loop_tail_avx512vl:
+	movzbl	(%rsi,%r8,1),%eax
+	movzbl	(%rsp,%r8,1),%ecx
+	leaq	1(%r8),%r8
+	xorl	%ecx,%eax
+	movb	%al,-1(%rdi,%r8,1)
+	decq	%rdx
+	jnz	.Loop_tail_avx512vl
+
+	vmovdqu32	%ymm16,0(%rsp)
+	vmovdqu32	%ymm16,32(%rsp)
+
+.Ldone_avx512vl:
+	vzeroall
+	leaq	(%r9),%rsp
+.cfi_def_cfa_register	%rsp
+.Lavx512vl_epilogue:
+	.byte	0xf3,0xc3
+.cfi_endproc	
+.size	ChaCha20_avx512vl,.-ChaCha20_avx512vl
+.type	ChaCha20_16x,@function
+.align	32
+ChaCha20_16x:
+.cfi_startproc	
+.LChaCha20_16x:
+	movq	%rsp,%r9
+.cfi_def_cfa_register	%r9
+	subq	$64+8,%rsp
+	andq	$-64,%rsp
+	vzeroupper
+
+	leaq	.Lsigma(%rip),%r10
+	vbroadcasti32x4	(%r10),%zmm3
+	vbroadcasti32x4	(%rcx),%zmm7
+	vbroadcasti32x4	16(%rcx),%zmm11
+	vbroadcasti32x4	(%r8),%zmm15
+
+	vpshufd	$0x00,%zmm3,%zmm0
+	vpshufd	$0x55,%zmm3,%zmm1
+	vpshufd	$0xaa,%zmm3,%zmm2
+	vpshufd	$0xff,%zmm3,%zmm3
+	vmovdqa64	%zmm0,%zmm16
+	vmovdqa64	%zmm1,%zmm17
+	vmovdqa64	%zmm2,%zmm18
+	vmovdqa64	%zmm3,%zmm19
+
+	vpshufd	$0x00,%zmm7,%zmm4
+	vpshufd	$0x55,%zmm7,%zmm5
+	vpshufd	$0xaa,%zmm7,%zmm6
+	vpshufd	$0xff,%zmm7,%zmm7
+	vmovdqa64	%zmm4,%zmm20
+	vmovdqa64	%zmm5,%zmm21
+	vmovdqa64	%zmm6,%zmm22
+	vmovdqa64	%zmm7,%zmm23
+
+	vpshufd	$0x00,%zmm11,%zmm8
+	vpshufd	$0x55,%zmm11,%zmm9
+	vpshufd	$0xaa,%zmm11,%zmm10
+	vpshufd	$0xff,%zmm11,%zmm11
+	vmovdqa64	%zmm8,%zmm24
+	vmovdqa64	%zmm9,%zmm25
+	vmovdqa64	%zmm10,%zmm26
+	vmovdqa64	%zmm11,%zmm27
+
+	vpshufd	$0x00,%zmm15,%zmm12
+	vpshufd	$0x55,%zmm15,%zmm13
+	vpshufd	$0xaa,%zmm15,%zmm14
+	vpshufd	$0xff,%zmm15,%zmm15
+	vpaddd	.Lincz(%rip),%zmm12,%zmm12
+	vmovdqa64	%zmm12,%zmm28
+	vmovdqa64	%zmm13,%zmm29
+	vmovdqa64	%zmm14,%zmm30
+	vmovdqa64	%zmm15,%zmm31
+
+	movl	$10,%eax
+	jmp	.Loop16x
+
+.align	32
+.Loop_outer16x:
+	vpbroadcastd	0(%r10),%zmm0
+	vpbroadcastd	4(%r10),%zmm1
+	vpbroadcastd	8(%r10),%zmm2
+	vpbroadcastd	12(%r10),%zmm3
+	vpaddd	.Lsixteen(%rip),%zmm28,%zmm28
+	vmovdqa64	%zmm20,%zmm4
+	vmovdqa64	%zmm21,%zmm5
+	vmovdqa64	%zmm22,%zmm6
+	vmovdqa64	%zmm23,%zmm7
+	vmovdqa64	%zmm24,%zmm8
+	vmovdqa64	%zmm25,%zmm9
+	vmovdqa64	%zmm26,%zmm10
+	vmovdqa64	%zmm27,%zmm11
+	vmovdqa64	%zmm28,%zmm12
+	vmovdqa64	%zmm29,%zmm13
+	vmovdqa64	%zmm30,%zmm14
+	vmovdqa64	%zmm31,%zmm15
+
+	vmovdqa64	%zmm0,%zmm16
+	vmovdqa64	%zmm1,%zmm17
+	vmovdqa64	%zmm2,%zmm18
+	vmovdqa64	%zmm3,%zmm19
+
+	movl	$10,%eax
+	jmp	.Loop16x
+
+.align	32
+.Loop16x:
+	vpaddd	%zmm4,%zmm0,%zmm0
+	vpaddd	%zmm5,%zmm1,%zmm1
+	vpaddd	%zmm6,%zmm2,%zmm2
+	vpaddd	%zmm7,%zmm3,%zmm3
+	vpxord	%zmm0,%zmm12,%zmm12
+	vpxord	%zmm1,%zmm13,%zmm13
+	vpxord	%zmm2,%zmm14,%zmm14
+	vpxord	%zmm3,%zmm15,%zmm15
+	vprold	$16,%zmm12,%zmm12
+	vprold	$16,%zmm13,%zmm13
+	vprold	$16,%zmm14,%zmm14
+	vprold	$16,%zmm15,%zmm15
+	vpaddd	%zmm12,%zmm8,%zmm8
+	vpaddd	%zmm13,%zmm9,%zmm9
+	vpaddd	%zmm14,%zmm10,%zmm10
+	vpaddd	%zmm15,%zmm11,%zmm11
+	vpxord	%zmm8,%zmm4,%zmm4
+	vpxord	%zmm9,%zmm5,%zmm5
+	vpxord	%zmm10,%zmm6,%zmm6
+	vpxord	%zmm11,%zmm7,%zmm7
+	vprold	$12,%zmm4,%zmm4
+	vprold	$12,%zmm5,%zmm5
+	vprold	$12,%zmm6,%zmm6
+	vprold	$12,%zmm7,%zmm7
+	vpaddd	%zmm4,%zmm0,%zmm0
+	vpaddd	%zmm5,%zmm1,%zmm1
+	vpaddd	%zmm6,%zmm2,%zmm2
+	vpaddd	%zmm7,%zmm3,%zmm3
+	vpxord	%zmm0,%zmm12,%zmm12
+	vpxord	%zmm1,%zmm13,%zmm13
+	vpxord	%zmm2,%zmm14,%zmm14
+	vpxord	%zmm3,%zmm15,%zmm15
+	vprold	$8,%zmm12,%zmm12
+	vprold	$8,%zmm13,%zmm13
+	vprold	$8,%zmm14,%zmm14
+	vprold	$8,%zmm15,%zmm15
+	vpaddd	%zmm12,%zmm8,%zmm8
+	vpaddd	%zmm13,%zmm9,%zmm9
+	vpaddd	%zmm14,%zmm10,%zmm10
+	vpaddd	%zmm15,%zmm11,%zmm11
+	vpxord	%zmm8,%zmm4,%zmm4
+	vpxord	%zmm9,%zmm5,%zmm5
+	vpxord	%zmm10,%zmm6,%zmm6
+	vpxord	%zmm11,%zmm7,%zmm7
+	vprold	$7,%zmm4,%zmm4
+	vprold	$7,%zmm5,%zmm5
+	vprold	$7,%zmm6,%zmm6
+	vprold	$7,%zmm7,%zmm7
+	vpaddd	%zmm5,%zmm0,%zmm0
+	vpaddd	%zmm6,%zmm1,%zmm1
+	vpaddd	%zmm7,%zmm2,%zmm2
+	vpaddd	%zmm4,%zmm3,%zmm3
+	vpxord	%zmm0,%zmm15,%zmm15
+	vpxord	%zmm1,%zmm12,%zmm12
+	vpxord	%zmm2,%zmm13,%zmm13
+	vpxord	%zmm3,%zmm14,%zmm14
+	vprold	$16,%zmm15,%zmm15
+	vprold	$16,%zmm12,%zmm12
+	vprold	$16,%zmm13,%zmm13
+	vprold	$16,%zmm14,%zmm14
+	vpaddd	%zmm15,%zmm10,%zmm10
+	vpaddd	%zmm12,%zmm11,%zmm11
+	vpaddd	%zmm13,%zmm8,%zmm8
+	vpaddd	%zmm14,%zmm9,%zmm9
+	vpxord	%zmm10,%zmm5,%zmm5
+	vpxord	%zmm11,%zmm6,%zmm6
+	vpxord	%zmm8,%zmm7,%zmm7
+	vpxord	%zmm9,%zmm4,%zmm4
+	vprold	$12,%zmm5,%zmm5
+	vprold	$12,%zmm6,%zmm6
+	vprold	$12,%zmm7,%zmm7
+	vprold	$12,%zmm4,%zmm4
+	vpaddd	%zmm5,%zmm0,%zmm0
+	vpaddd	%zmm6,%zmm1,%zmm1
+	vpaddd	%zmm7,%zmm2,%zmm2
+	vpaddd	%zmm4,%zmm3,%zmm3
+	vpxord	%zmm0,%zmm15,%zmm15
+	vpxord	%zmm1,%zmm12,%zmm12
+	vpxord	%zmm2,%zmm13,%zmm13
+	vpxord	%zmm3,%zmm14,%zmm14
+	vprold	$8,%zmm15,%zmm15
+	vprold	$8,%zmm12,%zmm12
+	vprold	$8,%zmm13,%zmm13
+	vprold	$8,%zmm14,%zmm14
+	vpaddd	%zmm15,%zmm10,%zmm10
+	vpaddd	%zmm12,%zmm11,%zmm11
+	vpaddd	%zmm13,%zmm8,%zmm8
+	vpaddd	%zmm14,%zmm9,%zmm9
+	vpxord	%zmm10,%zmm5,%zmm5
+	vpxord	%zmm11,%zmm6,%zmm6
+	vpxord	%zmm8,%zmm7,%zmm7
+	vpxord	%zmm9,%zmm4,%zmm4
+	vprold	$7,%zmm5,%zmm5
+	vprold	$7,%zmm6,%zmm6
+	vprold	$7,%zmm7,%zmm7
+	vprold	$7,%zmm4,%zmm4
+	decl	%eax
+	jnz	.Loop16x
+
+	vpaddd	%zmm16,%zmm0,%zmm0
+	vpaddd	%zmm17,%zmm1,%zmm1
+	vpaddd	%zmm18,%zmm2,%zmm2
+	vpaddd	%zmm19,%zmm3,%zmm3
+
+	vpunpckldq	%zmm1,%zmm0,%zmm18
+	vpunpckldq	%zmm3,%zmm2,%zmm19
+	vpunpckhdq	%zmm1,%zmm0,%zmm0
+	vpunpckhdq	%zmm3,%zmm2,%zmm2
+	vpunpcklqdq	%zmm19,%zmm18,%zmm1
+	vpunpckhqdq	%zmm19,%zmm18,%zmm18
+	vpunpcklqdq	%zmm2,%zmm0,%zmm3
+	vpunpckhqdq	%zmm2,%zmm0,%zmm0
+	vpaddd	%zmm20,%zmm4,%zmm4
+	vpaddd	%zmm21,%zmm5,%zmm5
+	vpaddd	%zmm22,%zmm6,%zmm6
+	vpaddd	%zmm23,%zmm7,%zmm7
+
+	vpunpckldq	%zmm5,%zmm4,%zmm2
+	vpunpckldq	%zmm7,%zmm6,%zmm19
+	vpunpckhdq	%zmm5,%zmm4,%zmm4
+	vpunpckhdq	%zmm7,%zmm6,%zmm6
+	vpunpcklqdq	%zmm19,%zmm2,%zmm5
+	vpunpckhqdq	%zmm19,%zmm2,%zmm2
+	vpunpcklqdq	%zmm6,%zmm4,%zmm7
+	vpunpckhqdq	%zmm6,%zmm4,%zmm4
+	vshufi32x4	$0x44,%zmm5,%zmm1,%zmm19
+	vshufi32x4	$0xee,%zmm5,%zmm1,%zmm5
+	vshufi32x4	$0x44,%zmm2,%zmm18,%zmm1
+	vshufi32x4	$0xee,%zmm2,%zmm18,%zmm2
+	vshufi32x4	$0x44,%zmm7,%zmm3,%zmm18
+	vshufi32x4	$0xee,%zmm7,%zmm3,%zmm7
+	vshufi32x4	$0x44,%zmm4,%zmm0,%zmm3
+	vshufi32x4	$0xee,%zmm4,%zmm0,%zmm4
+	vpaddd	%zmm24,%zmm8,%zmm8
+	vpaddd	%zmm25,%zmm9,%zmm9
+	vpaddd	%zmm26,%zmm10,%zmm10
+	vpaddd	%zmm27,%zmm11,%zmm11
+
+	vpunpckldq	%zmm9,%zmm8,%zmm6
+	vpunpckldq	%zmm11,%zmm10,%zmm0
+	vpunpckhdq	%zmm9,%zmm8,%zmm8
+	vpunpckhdq	%zmm11,%zmm10,%zmm10
+	vpunpcklqdq	%zmm0,%zmm6,%zmm9
+	vpunpckhqdq	%zmm0,%zmm6,%zmm6
+	vpunpcklqdq	%zmm10,%zmm8,%zmm11
+	vpunpckhqdq	%zmm10,%zmm8,%zmm8
+	vpaddd	%zmm28,%zmm12,%zmm12
+	vpaddd	%zmm29,%zmm13,%zmm13
+	vpaddd	%zmm30,%zmm14,%zmm14
+	vpaddd	%zmm31,%zmm15,%zmm15
+
+	vpunpckldq	%zmm13,%zmm12,%zmm10
+	vpunpckldq	%zmm15,%zmm14,%zmm0
+	vpunpckhdq	%zmm13,%zmm12,%zmm12
+	vpunpckhdq	%zmm15,%zmm14,%zmm14
+	vpunpcklqdq	%zmm0,%zmm10,%zmm13
+	vpunpckhqdq	%zmm0,%zmm10,%zmm10
+	vpunpcklqdq	%zmm14,%zmm12,%zmm15
+	vpunpckhqdq	%zmm14,%zmm12,%zmm12
+	vshufi32x4	$0x44,%zmm13,%zmm9,%zmm0
+	vshufi32x4	$0xee,%zmm13,%zmm9,%zmm13
+	vshufi32x4	$0x44,%zmm10,%zmm6,%zmm9
+	vshufi32x4	$0xee,%zmm10,%zmm6,%zmm10
+	vshufi32x4	$0x44,%zmm15,%zmm11,%zmm6
+	vshufi32x4	$0xee,%zmm15,%zmm11,%zmm15
+	vshufi32x4	$0x44,%zmm12,%zmm8,%zmm11
+	vshufi32x4	$0xee,%zmm12,%zmm8,%zmm12
+	vshufi32x4	$0x88,%zmm0,%zmm19,%zmm16
+	vshufi32x4	$0xdd,%zmm0,%zmm19,%zmm19
+	vshufi32x4	$0x88,%zmm13,%zmm5,%zmm0
+	vshufi32x4	$0xdd,%zmm13,%zmm5,%zmm13
+	vshufi32x4	$0x88,%zmm9,%zmm1,%zmm17
+	vshufi32x4	$0xdd,%zmm9,%zmm1,%zmm1
+	vshufi32x4	$0x88,%zmm10,%zmm2,%zmm9
+	vshufi32x4	$0xdd,%zmm10,%zmm2,%zmm10
+	vshufi32x4	$0x88,%zmm6,%zmm18,%zmm14
+	vshufi32x4	$0xdd,%zmm6,%zmm18,%zmm18
+	vshufi32x4	$0x88,%zmm15,%zmm7,%zmm6
+	vshufi32x4	$0xdd,%zmm15,%zmm7,%zmm15
+	vshufi32x4	$0x88,%zmm11,%zmm3,%zmm8
+	vshufi32x4	$0xdd,%zmm11,%zmm3,%zmm3
+	vshufi32x4	$0x88,%zmm12,%zmm4,%zmm11
+	vshufi32x4	$0xdd,%zmm12,%zmm4,%zmm12
+	cmpq	$1024,%rdx
+	jb	.Ltail16x
+
+	vpxord	0(%rsi),%zmm16,%zmm16
+	vpxord	64(%rsi),%zmm17,%zmm17
+	vpxord	128(%rsi),%zmm14,%zmm14
+	vpxord	192(%rsi),%zmm8,%zmm8
+	vmovdqu32	%zmm16,0(%rdi)
+	vmovdqu32	%zmm17,64(%rdi)
+	vmovdqu32	%zmm14,128(%rdi)
+	vmovdqu32	%zmm8,192(%rdi)
+
+	vpxord	256(%rsi),%zmm19,%zmm19
+	vpxord	320(%rsi),%zmm1,%zmm1
+	vpxord	384(%rsi),%zmm18,%zmm18
+	vpxord	448(%rsi),%zmm3,%zmm3
+	vmovdqu32	%zmm19,256(%rdi)
+	vmovdqu32	%zmm1,320(%rdi)
+	vmovdqu32	%zmm18,384(%rdi)
+	vmovdqu32	%zmm3,448(%rdi)
+
+	vpxord	512(%rsi),%zmm0,%zmm0
+	vpxord	576(%rsi),%zmm9,%zmm9
+	vpxord	640(%rsi),%zmm6,%zmm6
+	vpxord	704(%rsi),%zmm11,%zmm11
+	vmovdqu32	%zmm0,512(%rdi)
+	vmovdqu32	%zmm9,576(%rdi)
+	vmovdqu32	%zmm6,640(%rdi)
+	vmovdqu32	%zmm11,704(%rdi)
+
+	vpxord	768(%rsi),%zmm13,%zmm13
+	vpxord	832(%rsi),%zmm10,%zmm10
+	vpxord	896(%rsi),%zmm15,%zmm15
+	vpxord	960(%rsi),%zmm12,%zmm12
+	leaq	1024(%rsi),%rsi
+	vmovdqu32	%zmm13,768(%rdi)
+	vmovdqu32	%zmm10,832(%rdi)
+	vmovdqu32	%zmm15,896(%rdi)
+	vmovdqu32	%zmm12,960(%rdi)
+	leaq	1024(%rdi),%rdi
+
+	subq	$1024,%rdx
+	jnz	.Loop_outer16x
+
+	jmp	.Ldone16x
+
+.align	32
+.Ltail16x:
+	xorq	%r10,%r10
+	subq	%rsi,%rdi
+	cmpq	$64,%rdx
+	jb	.Less_than_64_16x
+	vpxord	(%rsi),%zmm16,%zmm16
+	vmovdqu32	%zmm16,(%rdi,%rsi,1)
+	je	.Ldone16x
+	vmovdqa32	%zmm17,%zmm16
+	leaq	64(%rsi),%rsi
+
+	cmpq	$128,%rdx
+	jb	.Less_than_64_16x
+	vpxord	(%rsi),%zmm17,%zmm17
+	vmovdqu32	%zmm17,(%rdi,%rsi,1)
+	je	.Ldone16x
+	vmovdqa32	%zmm14,%zmm16
+	leaq	64(%rsi),%rsi
+
+	cmpq	$192,%rdx
+	jb	.Less_than_64_16x
+	vpxord	(%rsi),%zmm14,%zmm14
+	vmovdqu32	%zmm14,(%rdi,%rsi,1)
+	je	.Ldone16x
+	vmovdqa32	%zmm8,%zmm16
+	leaq	64(%rsi),%rsi
+
+	cmpq	$256,%rdx
+	jb	.Less_than_64_16x
+	vpxord	(%rsi),%zmm8,%zmm8
+	vmovdqu32	%zmm8,(%rdi,%rsi,1)
+	je	.Ldone16x
+	vmovdqa32	%zmm19,%zmm16
+	leaq	64(%rsi),%rsi
+
+	cmpq	$320,%rdx
+	jb	.Less_than_64_16x
+	vpxord	(%rsi),%zmm19,%zmm19
+	vmovdqu32	%zmm19,(%rdi,%rsi,1)
+	je	.Ldone16x
+	vmovdqa32	%zmm1,%zmm16
+	leaq	64(%rsi),%rsi
+
+	cmpq	$384,%rdx
+	jb	.Less_than_64_16x
+	vpxord	(%rsi),%zmm1,%zmm1
+	vmovdqu32	%zmm1,(%rdi,%rsi,1)
+	je	.Ldone16x
+	vmovdqa32	%zmm18,%zmm16
+	leaq	64(%rsi),%rsi
+
+	cmpq	$448,%rdx
+	jb	.Less_than_64_16x
+	vpxord	(%rsi),%zmm18,%zmm18
+	vmovdqu32	%zmm18,(%rdi,%rsi,1)
+	je	.Ldone16x
+	vmovdqa32	%zmm3,%zmm16
+	leaq	64(%rsi),%rsi
+
+	cmpq	$512,%rdx
+	jb	.Less_than_64_16x
+	vpxord	(%rsi),%zmm3,%zmm3
+	vmovdqu32	%zmm3,(%rdi,%rsi,1)
+	je	.Ldone16x
+	vmovdqa32	%zmm0,%zmm16
+	leaq	64(%rsi),%rsi
+
+	cmpq	$576,%rdx
+	jb	.Less_than_64_16x
+	vpxord	(%rsi),%zmm0,%zmm0
+	vmovdqu32	%zmm0,(%rdi,%rsi,1)
+	je	.Ldone16x
+	vmovdqa32	%zmm9,%zmm16
+	leaq	64(%rsi),%rsi
+
+	cmpq	$640,%rdx
+	jb	.Less_than_64_16x
+	vpxord	(%rsi),%zmm9,%zmm9
+	vmovdqu32	%zmm9,(%rdi,%rsi,1)
+	je	.Ldone16x
+	vmovdqa32	%zmm6,%zmm16
+	leaq	64(%rsi),%rsi
+
+	cmpq	$704,%rdx
+	jb	.Less_than_64_16x
+	vpxord	(%rsi),%zmm6,%zmm6
+	vmovdqu32	%zmm6,(%rdi,%rsi,1)
+	je	.Ldone16x
+	vmovdqa32	%zmm11,%zmm16
+	leaq	64(%rsi),%rsi
+
+	cmpq	$768,%rdx
+	jb	.Less_than_64_16x
+	vpxord	(%rsi),%zmm11,%zmm11
+	vmovdqu32	%zmm11,(%rdi,%rsi,1)
+	je	.Ldone16x
+	vmovdqa32	%zmm13,%zmm16
+	leaq	64(%rsi),%rsi
+
+	cmpq	$832,%rdx
+	jb	.Less_than_64_16x
+	vpxord	(%rsi),%zmm13,%zmm13
+	vmovdqu32	%zmm13,(%rdi,%rsi,1)
+	je	.Ldone16x
+	vmovdqa32	%zmm10,%zmm16
+	leaq	64(%rsi),%rsi
+
+	cmpq	$896,%rdx
+	jb	.Less_than_64_16x
+	vpxord	(%rsi),%zmm10,%zmm10
+	vmovdqu32	%zmm10,(%rdi,%rsi,1)
+	je	.Ldone16x
+	vmovdqa32	%zmm15,%zmm16
+	leaq	64(%rsi),%rsi
+
+	cmpq	$960,%rdx
+	jb	.Less_than_64_16x
+	vpxord	(%rsi),%zmm15,%zmm15
+	vmovdqu32	%zmm15,(%rdi,%rsi,1)
+	je	.Ldone16x
+	vmovdqa32	%zmm12,%zmm16
+	leaq	64(%rsi),%rsi
+
+.Less_than_64_16x:
+	vmovdqa32	%zmm16,0(%rsp)
+	leaq	(%rdi,%rsi,1),%rdi
+	andq	$63,%rdx
+
+.Loop_tail16x:
+	movzbl	(%rsi,%r10,1),%eax
+	movzbl	(%rsp,%r10,1),%ecx
+	leaq	1(%r10),%r10
+	xorl	%ecx,%eax
+	movb	%al,-1(%rdi,%r10,1)
+	decq	%rdx
+	jnz	.Loop_tail16x
+
+	vpxord	%zmm16,%zmm16,%zmm16
+	vmovdqa32	%zmm16,0(%rsp)
+
+.Ldone16x:
+	vzeroall
+	leaq	(%r9),%rsp
+.cfi_def_cfa_register	%rsp
+.L16x_epilogue:
+	.byte	0xf3,0xc3
+.cfi_endproc	
+.size	ChaCha20_16x,.-ChaCha20_16x
+.type	ChaCha20_8xvl,@function
+.align	32
+ChaCha20_8xvl:
+.cfi_startproc	
+.LChaCha20_8xvl:
+	movq	%rsp,%r9
+.cfi_def_cfa_register	%r9
+	subq	$64+8,%rsp
+	andq	$-64,%rsp
+	vzeroupper
+
+	leaq	.Lsigma(%rip),%r10
+	vbroadcasti128	(%r10),%ymm3
+	vbroadcasti128	(%rcx),%ymm7
+	vbroadcasti128	16(%rcx),%ymm11
+	vbroadcasti128	(%r8),%ymm15
+
+	vpshufd	$0x00,%ymm3,%ymm0
+	vpshufd	$0x55,%ymm3,%ymm1
+	vpshufd	$0xaa,%ymm3,%ymm2
+	vpshufd	$0xff,%ymm3,%ymm3
+	vmovdqa64	%ymm0,%ymm16
+	vmovdqa64	%ymm1,%ymm17
+	vmovdqa64	%ymm2,%ymm18
+	vmovdqa64	%ymm3,%ymm19
+
+	vpshufd	$0x00,%ymm7,%ymm4
+	vpshufd	$0x55,%ymm7,%ymm5
+	vpshufd	$0xaa,%ymm7,%ymm6
+	vpshufd	$0xff,%ymm7,%ymm7
+	vmovdqa64	%ymm4,%ymm20
+	vmovdqa64	%ymm5,%ymm21
+	vmovdqa64	%ymm6,%ymm22
+	vmovdqa64	%ymm7,%ymm23
+
+	vpshufd	$0x00,%ymm11,%ymm8
+	vpshufd	$0x55,%ymm11,%ymm9
+	vpshufd	$0xaa,%ymm11,%ymm10
+	vpshufd	$0xff,%ymm11,%ymm11
+	vmovdqa64	%ymm8,%ymm24
+	vmovdqa64	%ymm9,%ymm25
+	vmovdqa64	%ymm10,%ymm26
+	vmovdqa64	%ymm11,%ymm27
+
+	vpshufd	$0x00,%ymm15,%ymm12
+	vpshufd	$0x55,%ymm15,%ymm13
+	vpshufd	$0xaa,%ymm15,%ymm14
+	vpshufd	$0xff,%ymm15,%ymm15
+	vpaddd	.Lincy(%rip),%ymm12,%ymm12
+	vmovdqa64	%ymm12,%ymm28
+	vmovdqa64	%ymm13,%ymm29
+	vmovdqa64	%ymm14,%ymm30
+	vmovdqa64	%ymm15,%ymm31
+
+	movl	$10,%eax
+	jmp	.Loop8xvl
+
+.align	32
+.Loop_outer8xvl:
+
+
+	vpbroadcastd	8(%r10),%ymm2
+	vpbroadcastd	12(%r10),%ymm3
+	vpaddd	.Leight(%rip),%ymm28,%ymm28
+	vmovdqa64	%ymm20,%ymm4
+	vmovdqa64	%ymm21,%ymm5
+	vmovdqa64	%ymm22,%ymm6
+	vmovdqa64	%ymm23,%ymm7
+	vmovdqa64	%ymm24,%ymm8
+	vmovdqa64	%ymm25,%ymm9
+	vmovdqa64	%ymm26,%ymm10
+	vmovdqa64	%ymm27,%ymm11
+	vmovdqa64	%ymm28,%ymm12
+	vmovdqa64	%ymm29,%ymm13
+	vmovdqa64	%ymm30,%ymm14
+	vmovdqa64	%ymm31,%ymm15
+
+	vmovdqa64	%ymm0,%ymm16
+	vmovdqa64	%ymm1,%ymm17
+	vmovdqa64	%ymm2,%ymm18
+	vmovdqa64	%ymm3,%ymm19
+
+	movl	$10,%eax
+	jmp	.Loop8xvl
+
+.align	32
+.Loop8xvl:
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpaddd	%ymm7,%ymm3,%ymm3
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpxor	%ymm3,%ymm15,%ymm15
+	vprold	$16,%ymm12,%ymm12
+	vprold	$16,%ymm13,%ymm13
+	vprold	$16,%ymm14,%ymm14
+	vprold	$16,%ymm15,%ymm15
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpaddd	%ymm15,%ymm11,%ymm11
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpxor	%ymm11,%ymm7,%ymm7
+	vprold	$12,%ymm4,%ymm4
+	vprold	$12,%ymm5,%ymm5
+	vprold	$12,%ymm6,%ymm6
+	vprold	$12,%ymm7,%ymm7
+	vpaddd	%ymm4,%ymm0,%ymm0
+	vpaddd	%ymm5,%ymm1,%ymm1
+	vpaddd	%ymm6,%ymm2,%ymm2
+	vpaddd	%ymm7,%ymm3,%ymm3
+	vpxor	%ymm0,%ymm12,%ymm12
+	vpxor	%ymm1,%ymm13,%ymm13
+	vpxor	%ymm2,%ymm14,%ymm14
+	vpxor	%ymm3,%ymm15,%ymm15
+	vprold	$8,%ymm12,%ymm12
+	vprold	$8,%ymm13,%ymm13
+	vprold	$8,%ymm14,%ymm14
+	vprold	$8,%ymm15,%ymm15
+	vpaddd	%ymm12,%ymm8,%ymm8
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpaddd	%ymm15,%ymm11,%ymm11
+	vpxor	%ymm8,%ymm4,%ymm4
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpxor	%ymm11,%ymm7,%ymm7
+	vprold	$7,%ymm4,%ymm4
+	vprold	$7,%ymm5,%ymm5
+	vprold	$7,%ymm6,%ymm6
+	vprold	$7,%ymm7,%ymm7
+	vpaddd	%ymm5,%ymm0,%ymm0
+	vpaddd	%ymm6,%ymm1,%ymm1
+	vpaddd	%ymm7,%ymm2,%ymm2
+	vpaddd	%ymm4,%ymm3,%ymm3
+	vpxor	%ymm0,%ymm15,%ymm15
+	vpxor	%ymm1,%ymm12,%ymm12
+	vpxor	%ymm2,%ymm13,%ymm13
+	vpxor	%ymm3,%ymm14,%ymm14
+	vprold	$16,%ymm15,%ymm15
+	vprold	$16,%ymm12,%ymm12
+	vprold	$16,%ymm13,%ymm13
+	vprold	$16,%ymm14,%ymm14
+	vpaddd	%ymm15,%ymm10,%ymm10
+	vpaddd	%ymm12,%ymm11,%ymm11
+	vpaddd	%ymm13,%ymm8,%ymm8
+	vpaddd	%ymm14,%ymm9,%ymm9
+	vpxor	%ymm10,%ymm5,%ymm5
+	vpxor	%ymm11,%ymm6,%ymm6
+	vpxor	%ymm8,%ymm7,%ymm7
+	vpxor	%ymm9,%ymm4,%ymm4
+	vprold	$12,%ymm5,%ymm5
+	vprold	$12,%ymm6,%ymm6
+	vprold	$12,%ymm7,%ymm7
+	vprold	$12,%ymm4,%ymm4
+	vpaddd	%ymm5,%ymm0,%ymm0
+	vpaddd	%ymm6,%ymm1,%ymm1
+	vpaddd	%ymm7,%ymm2,%ymm2
+	vpaddd	%ymm4,%ymm3,%ymm3
+	vpxor	%ymm0,%ymm15,%ymm15
+	vpxor	%ymm1,%ymm12,%ymm12
+	vpxor	%ymm2,%ymm13,%ymm13
+	vpxor	%ymm3,%ymm14,%ymm14
+	vprold	$8,%ymm15,%ymm15
+	vprold	$8,%ymm12,%ymm12
+	vprold	$8,%ymm13,%ymm13
+	vprold	$8,%ymm14,%ymm14
+	vpaddd	%ymm15,%ymm10,%ymm10
+	vpaddd	%ymm12,%ymm11,%ymm11
+	vpaddd	%ymm13,%ymm8,%ymm8
+	vpaddd	%ymm14,%ymm9,%ymm9
+	vpxor	%ymm10,%ymm5,%ymm5
+	vpxor	%ymm11,%ymm6,%ymm6
+	vpxor	%ymm8,%ymm7,%ymm7
+	vpxor	%ymm9,%ymm4,%ymm4
+	vprold	$7,%ymm5,%ymm5
+	vprold	$7,%ymm6,%ymm6
+	vprold	$7,%ymm7,%ymm7
+	vprold	$7,%ymm4,%ymm4
+	decl	%eax
+	jnz	.Loop8xvl
+
+	vpaddd	%ymm16,%ymm0,%ymm0
+	vpaddd	%ymm17,%ymm1,%ymm1
+	vpaddd	%ymm18,%ymm2,%ymm2
+	vpaddd	%ymm19,%ymm3,%ymm3
+
+	vpunpckldq	%ymm1,%ymm0,%ymm18
+	vpunpckldq	%ymm3,%ymm2,%ymm19
+	vpunpckhdq	%ymm1,%ymm0,%ymm0
+	vpunpckhdq	%ymm3,%ymm2,%ymm2
+	vpunpcklqdq	%ymm19,%ymm18,%ymm1
+	vpunpckhqdq	%ymm19,%ymm18,%ymm18
+	vpunpcklqdq	%ymm2,%ymm0,%ymm3
+	vpunpckhqdq	%ymm2,%ymm0,%ymm0
+	vpaddd	%ymm20,%ymm4,%ymm4
+	vpaddd	%ymm21,%ymm5,%ymm5
+	vpaddd	%ymm22,%ymm6,%ymm6
+	vpaddd	%ymm23,%ymm7,%ymm7
+
+	vpunpckldq	%ymm5,%ymm4,%ymm2
+	vpunpckldq	%ymm7,%ymm6,%ymm19
+	vpunpckhdq	%ymm5,%ymm4,%ymm4
+	vpunpckhdq	%ymm7,%ymm6,%ymm6
+	vpunpcklqdq	%ymm19,%ymm2,%ymm5
+	vpunpckhqdq	%ymm19,%ymm2,%ymm2
+	vpunpcklqdq	%ymm6,%ymm4,%ymm7
+	vpunpckhqdq	%ymm6,%ymm4,%ymm4
+	vshufi32x4	$0,%ymm5,%ymm1,%ymm19
+	vshufi32x4	$3,%ymm5,%ymm1,%ymm5
+	vshufi32x4	$0,%ymm2,%ymm18,%ymm1
+	vshufi32x4	$3,%ymm2,%ymm18,%ymm2
+	vshufi32x4	$0,%ymm7,%ymm3,%ymm18
+	vshufi32x4	$3,%ymm7,%ymm3,%ymm7
+	vshufi32x4	$0,%ymm4,%ymm0,%ymm3
+	vshufi32x4	$3,%ymm4,%ymm0,%ymm4
+	vpaddd	%ymm24,%ymm8,%ymm8
+	vpaddd	%ymm25,%ymm9,%ymm9
+	vpaddd	%ymm26,%ymm10,%ymm10
+	vpaddd	%ymm27,%ymm11,%ymm11
+
+	vpunpckldq	%ymm9,%ymm8,%ymm6
+	vpunpckldq	%ymm11,%ymm10,%ymm0
+	vpunpckhdq	%ymm9,%ymm8,%ymm8
+	vpunpckhdq	%ymm11,%ymm10,%ymm10
+	vpunpcklqdq	%ymm0,%ymm6,%ymm9
+	vpunpckhqdq	%ymm0,%ymm6,%ymm6
+	vpunpcklqdq	%ymm10,%ymm8,%ymm11
+	vpunpckhqdq	%ymm10,%ymm8,%ymm8
+	vpaddd	%ymm28,%ymm12,%ymm12
+	vpaddd	%ymm29,%ymm13,%ymm13
+	vpaddd	%ymm30,%ymm14,%ymm14
+	vpaddd	%ymm31,%ymm15,%ymm15
+
+	vpunpckldq	%ymm13,%ymm12,%ymm10
+	vpunpckldq	%ymm15,%ymm14,%ymm0
+	vpunpckhdq	%ymm13,%ymm12,%ymm12
+	vpunpckhdq	%ymm15,%ymm14,%ymm14
+	vpunpcklqdq	%ymm0,%ymm10,%ymm13
+	vpunpckhqdq	%ymm0,%ymm10,%ymm10
+	vpunpcklqdq	%ymm14,%ymm12,%ymm15
+	vpunpckhqdq	%ymm14,%ymm12,%ymm12
+	vperm2i128	$0x20,%ymm13,%ymm9,%ymm0
+	vperm2i128	$0x31,%ymm13,%ymm9,%ymm13
+	vperm2i128	$0x20,%ymm10,%ymm6,%ymm9
+	vperm2i128	$0x31,%ymm10,%ymm6,%ymm10
+	vperm2i128	$0x20,%ymm15,%ymm11,%ymm6
+	vperm2i128	$0x31,%ymm15,%ymm11,%ymm15
+	vperm2i128	$0x20,%ymm12,%ymm8,%ymm11
+	vperm2i128	$0x31,%ymm12,%ymm8,%ymm12
+	cmpq	$512,%rdx
+	jb	.Ltail8xvl
+
+	movl	$0x80,%eax
+	vpxord	0(%rsi),%ymm19,%ymm19
+	vpxor	32(%rsi),%ymm0,%ymm0
+	vpxor	64(%rsi),%ymm5,%ymm5
+	vpxor	96(%rsi),%ymm13,%ymm13
+	leaq	(%rsi,%rax,1),%rsi
+	vmovdqu32	%ymm19,0(%rdi)
+	vmovdqu	%ymm0,32(%rdi)
+	vmovdqu	%ymm5,64(%rdi)
+	vmovdqu	%ymm13,96(%rdi)
+	leaq	(%rdi,%rax,1),%rdi
+
+	vpxor	0(%rsi),%ymm1,%ymm1
+	vpxor	32(%rsi),%ymm9,%ymm9
+	vpxor	64(%rsi),%ymm2,%ymm2
+	vpxor	96(%rsi),%ymm10,%ymm10
+	leaq	(%rsi,%rax,1),%rsi
+	vmovdqu	%ymm1,0(%rdi)
+	vmovdqu	%ymm9,32(%rdi)
+	vmovdqu	%ymm2,64(%rdi)
+	vmovdqu	%ymm10,96(%rdi)
+	leaq	(%rdi,%rax,1),%rdi
+
+	vpxord	0(%rsi),%ymm18,%ymm18
+	vpxor	32(%rsi),%ymm6,%ymm6
+	vpxor	64(%rsi),%ymm7,%ymm7
+	vpxor	96(%rsi),%ymm15,%ymm15
+	leaq	(%rsi,%rax,1),%rsi
+	vmovdqu32	%ymm18,0(%rdi)
+	vmovdqu	%ymm6,32(%rdi)
+	vmovdqu	%ymm7,64(%rdi)
+	vmovdqu	%ymm15,96(%rdi)
+	leaq	(%rdi,%rax,1),%rdi
+
+	vpxor	0(%rsi),%ymm3,%ymm3
+	vpxor	32(%rsi),%ymm11,%ymm11
+	vpxor	64(%rsi),%ymm4,%ymm4
+	vpxor	96(%rsi),%ymm12,%ymm12
+	leaq	(%rsi,%rax,1),%rsi
+	vmovdqu	%ymm3,0(%rdi)
+	vmovdqu	%ymm11,32(%rdi)
+	vmovdqu	%ymm4,64(%rdi)
+	vmovdqu	%ymm12,96(%rdi)
+	leaq	(%rdi,%rax,1),%rdi
+
+	vpbroadcastd	0(%r10),%ymm0
+	vpbroadcastd	4(%r10),%ymm1
+
+	subq	$512,%rdx
+	jnz	.Loop_outer8xvl
+
+	jmp	.Ldone8xvl
+
+.align	32
+.Ltail8xvl:
+	vmovdqa64	%ymm19,%ymm8
+	xorq	%r10,%r10
+	subq	%rsi,%rdi
+	cmpq	$64,%rdx
+	jb	.Less_than_64_8xvl
+	vpxor	0(%rsi),%ymm8,%ymm8
+	vpxor	32(%rsi),%ymm0,%ymm0
+	vmovdqu	%ymm8,0(%rdi,%rsi,1)
+	vmovdqu	%ymm0,32(%rdi,%rsi,1)
+	je	.Ldone8xvl
+	vmovdqa	%ymm5,%ymm8
+	vmovdqa	%ymm13,%ymm0
+	leaq	64(%rsi),%rsi
+
+	cmpq	$128,%rdx
+	jb	.Less_than_64_8xvl
+	vpxor	0(%rsi),%ymm5,%ymm5
+	vpxor	32(%rsi),%ymm13,%ymm13
+	vmovdqu	%ymm5,0(%rdi,%rsi,1)
+	vmovdqu	%ymm13,32(%rdi,%rsi,1)
+	je	.Ldone8xvl
+	vmovdqa	%ymm1,%ymm8
+	vmovdqa	%ymm9,%ymm0
+	leaq	64(%rsi),%rsi
+
+	cmpq	$192,%rdx
+	jb	.Less_than_64_8xvl
+	vpxor	0(%rsi),%ymm1,%ymm1
+	vpxor	32(%rsi),%ymm9,%ymm9
+	vmovdqu	%ymm1,0(%rdi,%rsi,1)
+	vmovdqu	%ymm9,32(%rdi,%rsi,1)
+	je	.Ldone8xvl
+	vmovdqa	%ymm2,%ymm8
+	vmovdqa	%ymm10,%ymm0
+	leaq	64(%rsi),%rsi
+
+	cmpq	$256,%rdx
+	jb	.Less_than_64_8xvl
+	vpxor	0(%rsi),%ymm2,%ymm2
+	vpxor	32(%rsi),%ymm10,%ymm10
+	vmovdqu	%ymm2,0(%rdi,%rsi,1)
+	vmovdqu	%ymm10,32(%rdi,%rsi,1)
+	je	.Ldone8xvl
+	vmovdqa32	%ymm18,%ymm8
+	vmovdqa	%ymm6,%ymm0
+	leaq	64(%rsi),%rsi
+
+	cmpq	$320,%rdx
+	jb	.Less_than_64_8xvl
+	vpxord	0(%rsi),%ymm18,%ymm18
+	vpxor	32(%rsi),%ymm6,%ymm6
+	vmovdqu32	%ymm18,0(%rdi,%rsi,1)
+	vmovdqu	%ymm6,32(%rdi,%rsi,1)
+	je	.Ldone8xvl
+	vmovdqa	%ymm7,%ymm8
+	vmovdqa	%ymm15,%ymm0
+	leaq	64(%rsi),%rsi
+
+	cmpq	$384,%rdx
+	jb	.Less_than_64_8xvl
+	vpxor	0(%rsi),%ymm7,%ymm7
+	vpxor	32(%rsi),%ymm15,%ymm15
+	vmovdqu	%ymm7,0(%rdi,%rsi,1)
+	vmovdqu	%ymm15,32(%rdi,%rsi,1)
+	je	.Ldone8xvl
+	vmovdqa	%ymm3,%ymm8
+	vmovdqa	%ymm11,%ymm0
+	leaq	64(%rsi),%rsi
+
+	cmpq	$448,%rdx
+	jb	.Less_than_64_8xvl
+	vpxor	0(%rsi),%ymm3,%ymm3
+	vpxor	32(%rsi),%ymm11,%ymm11
+	vmovdqu	%ymm3,0(%rdi,%rsi,1)
+	vmovdqu	%ymm11,32(%rdi,%rsi,1)
+	je	.Ldone8xvl
+	vmovdqa	%ymm4,%ymm8
+	vmovdqa	%ymm12,%ymm0
+	leaq	64(%rsi),%rsi
+
+.Less_than_64_8xvl:
+	vmovdqa	%ymm8,0(%rsp)
+	vmovdqa	%ymm0,32(%rsp)
+	leaq	(%rdi,%rsi,1),%rdi
+	andq	$63,%rdx
+
+.Loop_tail8xvl:
+	movzbl	(%rsi,%r10,1),%eax
+	movzbl	(%rsp,%r10,1),%ecx
+	leaq	1(%r10),%r10
+	xorl	%ecx,%eax
+	movb	%al,-1(%rdi,%r10,1)
+	decq	%rdx
+	jnz	.Loop_tail8xvl
+
+	vpxor	%ymm8,%ymm8,%ymm8
+	vmovdqa	%ymm8,0(%rsp)
+	vmovdqa	%ymm8,32(%rsp)
+
+.Ldone8xvl:
+	vzeroall
+	leaq	(%r9),%rsp
+.cfi_def_cfa_register	%rsp
+.L8xvl_epilogue:
+	.byte	0xf3,0xc3
+.cfi_endproc	
+.size	ChaCha20_8xvl,.-ChaCha20_8xvl
-- 
2.19.1

^ permalink raw reply related

* [PATCH net-next v8 04/28] zinc: ChaCha20 generic C implementation and selftest
From: Jason A. Donenfeld @ 2018-10-18 14:56 UTC (permalink / raw)
  To: linux-kernel, netdev, linux-crypto, davem, gregkh
  Cc: Jason A. Donenfeld, Samuel Neves, Jean-Philippe Aumasson,
	Andy Lutomirski, Andrew Morton, Linus Torvalds, kernel-hardening
In-Reply-To: <20181018145712.7538-1-Jason@zx2c4.com>

This implements the ChaCha20 permutation as a single C statement, by way
of the comma operator, which the compiler is able to simplify
terrifically.

Information: https://cr.yp.to/chacha.html

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Cc: Samuel Neves <sneves@dei.uc.pt>
Cc: Jean-Philippe Aumasson <jeanphilippe.aumasson@gmail.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: kernel-hardening@lists.openwall.com
Cc: linux-crypto@vger.kernel.org
---
 include/zinc/chacha20.h      |   70 +
 lib/zinc/Kconfig             |    4 +
 lib/zinc/Makefile            |    3 +
 lib/zinc/chacha20/chacha20.c |  179 +++
 lib/zinc/selftest/chacha20.c | 2698 ++++++++++++++++++++++++++++++++++
 lib/zinc/selftest/run.h      |   48 +
 6 files changed, 3002 insertions(+)
 create mode 100644 include/zinc/chacha20.h
 create mode 100644 lib/zinc/chacha20/chacha20.c
 create mode 100644 lib/zinc/selftest/chacha20.c
 create mode 100644 lib/zinc/selftest/run.h

diff --git a/include/zinc/chacha20.h b/include/zinc/chacha20.h
new file mode 100644
index 000000000000..0b98bd6946ae
--- /dev/null
+++ b/include/zinc/chacha20.h
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/*
+ * Copyright (C) 2015-2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#ifndef _ZINC_CHACHA20_H
+#define _ZINC_CHACHA20_H
+
+#include <asm/unaligned.h>
+#include <linux/simd.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+
+enum {
+	CHACHA20_NONCE_SIZE = 16,
+	CHACHA20_KEY_SIZE = 32,
+	CHACHA20_KEY_WORDS = CHACHA20_KEY_SIZE / sizeof(u32),
+	CHACHA20_BLOCK_SIZE = 64,
+	CHACHA20_BLOCK_WORDS = CHACHA20_BLOCK_SIZE / sizeof(u32),
+	HCHACHA20_NONCE_SIZE = CHACHA20_NONCE_SIZE,
+	HCHACHA20_KEY_SIZE = CHACHA20_KEY_SIZE
+};
+
+enum { /* expand 32-byte k */
+	CHACHA20_CONSTANT_EXPA = 0x61707865U,
+	CHACHA20_CONSTANT_ND_3 = 0x3320646eU,
+	CHACHA20_CONSTANT_2_BY = 0x79622d32U,
+	CHACHA20_CONSTANT_TE_K = 0x6b206574U
+};
+
+struct chacha20_ctx {
+	union {
+		u32 state[16];
+		struct {
+			u32 constant[4];
+			u32 key[8];
+			u32 counter[4];
+		};
+	};
+};
+
+static inline void chacha20_init(struct chacha20_ctx *ctx,
+				 const u8 key[CHACHA20_KEY_SIZE],
+				 const u64 nonce)
+{
+	ctx->constant[0] = CHACHA20_CONSTANT_EXPA;
+	ctx->constant[1] = CHACHA20_CONSTANT_ND_3;
+	ctx->constant[2] = CHACHA20_CONSTANT_2_BY;
+	ctx->constant[3] = CHACHA20_CONSTANT_TE_K;
+	ctx->key[0] = get_unaligned_le32(key + 0);
+	ctx->key[1] = get_unaligned_le32(key + 4);
+	ctx->key[2] = get_unaligned_le32(key + 8);
+	ctx->key[3] = get_unaligned_le32(key + 12);
+	ctx->key[4] = get_unaligned_le32(key + 16);
+	ctx->key[5] = get_unaligned_le32(key + 20);
+	ctx->key[6] = get_unaligned_le32(key + 24);
+	ctx->key[7] = get_unaligned_le32(key + 28);
+	ctx->counter[0] = 0;
+	ctx->counter[1] = 0;
+	ctx->counter[2] = nonce & U32_MAX;
+	ctx->counter[3] = nonce >> 32;
+}
+void chacha20(struct chacha20_ctx *ctx, u8 *dst, const u8 *src, u32 len,
+	      simd_context_t *simd_context);
+
+void hchacha20(u32 derived_key[CHACHA20_KEY_WORDS],
+	       const u8 nonce[HCHACHA20_NONCE_SIZE],
+	       const u8 key[HCHACHA20_KEY_SIZE], simd_context_t *simd_context);
+
+#endif /* _ZINC_CHACHA20_H */
diff --git a/lib/zinc/Kconfig b/lib/zinc/Kconfig
index 90e066ea93a0..d271be37cecb 100644
--- a/lib/zinc/Kconfig
+++ b/lib/zinc/Kconfig
@@ -1,3 +1,7 @@
+config ZINC_CHACHA20
+	tristate
+	select CRYPTO_ALGAPI
+
 config ZINC_SELFTEST
 	bool "Zinc cryptography library self-tests"
 	help
diff --git a/lib/zinc/Makefile b/lib/zinc/Makefile
index a61c80d676cb..3d80144d55a6 100644
--- a/lib/zinc/Makefile
+++ b/lib/zinc/Makefile
@@ -1,3 +1,6 @@
 ccflags-y := -O2
 ccflags-y += -D'pr_fmt(fmt)="zinc: " fmt'
 ccflags-$(CONFIG_ZINC_DEBUG) += -DDEBUG
+
+zinc_chacha20-y := chacha20/chacha20.o
+obj-$(CONFIG_ZINC_CHACHA20) += zinc_chacha20.o
diff --git a/lib/zinc/chacha20/chacha20.c b/lib/zinc/chacha20/chacha20.c
new file mode 100644
index 000000000000..03209c15d1ca
--- /dev/null
+++ b/lib/zinc/chacha20/chacha20.c
@@ -0,0 +1,179 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright (C) 2015-2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ *
+ * Implementation of the ChaCha20 stream cipher.
+ *
+ * Information: https://cr.yp.to/chacha.html
+ */
+
+#include <zinc/chacha20.h>
+#include "../selftest/run.h"
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/vmalloc.h>
+#include <crypto/algapi.h> // For crypto_xor_cpy.
+
+static bool *const chacha20_nobs[] __initconst = { };
+static void __init chacha20_fpu_init(void)
+{
+}
+static inline bool chacha20_arch(struct chacha20_ctx *ctx, u8 *dst,
+				 const u8 *src, size_t len,
+				 simd_context_t *simd_context)
+{
+	return false;
+}
+static inline bool hchacha20_arch(u32 derived_key[CHACHA20_KEY_WORDS],
+				  const u8 nonce[HCHACHA20_NONCE_SIZE],
+				  const u8 key[HCHACHA20_KEY_SIZE],
+				  simd_context_t *simd_context)
+{
+	return false;
+}
+
+#define QUARTER_ROUND(x, a, b, c, d) ( \
+	x[a] += x[b], \
+	x[d] = rol32((x[d] ^ x[a]), 16), \
+	x[c] += x[d], \
+	x[b] = rol32((x[b] ^ x[c]), 12), \
+	x[a] += x[b], \
+	x[d] = rol32((x[d] ^ x[a]), 8), \
+	x[c] += x[d], \
+	x[b] = rol32((x[b] ^ x[c]), 7) \
+)
+
+#define C(i, j) (i * 4 + j)
+
+#define DOUBLE_ROUND(x) ( \
+	/* Column Round */ \
+	QUARTER_ROUND(x, C(0, 0), C(1, 0), C(2, 0), C(3, 0)), \
+	QUARTER_ROUND(x, C(0, 1), C(1, 1), C(2, 1), C(3, 1)), \
+	QUARTER_ROUND(x, C(0, 2), C(1, 2), C(2, 2), C(3, 2)), \
+	QUARTER_ROUND(x, C(0, 3), C(1, 3), C(2, 3), C(3, 3)), \
+	/* Diagonal Round */ \
+	QUARTER_ROUND(x, C(0, 0), C(1, 1), C(2, 2), C(3, 3)), \
+	QUARTER_ROUND(x, C(0, 1), C(1, 2), C(2, 3), C(3, 0)), \
+	QUARTER_ROUND(x, C(0, 2), C(1, 3), C(2, 0), C(3, 1)), \
+	QUARTER_ROUND(x, C(0, 3), C(1, 0), C(2, 1), C(3, 2)) \
+)
+
+#define TWENTY_ROUNDS(x) ( \
+	DOUBLE_ROUND(x), \
+	DOUBLE_ROUND(x), \
+	DOUBLE_ROUND(x), \
+	DOUBLE_ROUND(x), \
+	DOUBLE_ROUND(x), \
+	DOUBLE_ROUND(x), \
+	DOUBLE_ROUND(x), \
+	DOUBLE_ROUND(x), \
+	DOUBLE_ROUND(x), \
+	DOUBLE_ROUND(x) \
+)
+
+static void chacha20_block_generic(struct chacha20_ctx *ctx, __le32 *stream)
+{
+	u32 x[CHACHA20_BLOCK_WORDS];
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(x); ++i)
+		x[i] = ctx->state[i];
+
+	TWENTY_ROUNDS(x);
+
+	for (i = 0; i < ARRAY_SIZE(x); ++i)
+		stream[i] = cpu_to_le32(x[i] + ctx->state[i]);
+
+	ctx->counter[0] += 1;
+}
+
+static void chacha20_generic(struct chacha20_ctx *ctx, u8 *out, const u8 *in,
+			     u32 len)
+{
+	__le32 buf[CHACHA20_BLOCK_WORDS];
+
+	while (len >= CHACHA20_BLOCK_SIZE) {
+		chacha20_block_generic(ctx, buf);
+		crypto_xor_cpy(out, in, (u8 *)buf, CHACHA20_BLOCK_SIZE);
+		len -= CHACHA20_BLOCK_SIZE;
+		out += CHACHA20_BLOCK_SIZE;
+		in += CHACHA20_BLOCK_SIZE;
+	}
+	if (len) {
+		chacha20_block_generic(ctx, buf);
+		crypto_xor_cpy(out, in, (u8 *)buf, len);
+	}
+}
+
+void chacha20(struct chacha20_ctx *ctx, u8 *dst, const u8 *src, u32 len,
+	      simd_context_t *simd_context)
+{
+	if (!chacha20_arch(ctx, dst, src, len, simd_context))
+		chacha20_generic(ctx, dst, src, len);
+}
+EXPORT_SYMBOL(chacha20);
+
+static void hchacha20_generic(u32 derived_key[CHACHA20_KEY_WORDS],
+			      const u8 nonce[HCHACHA20_NONCE_SIZE],
+			      const u8 key[HCHACHA20_KEY_SIZE])
+{
+	u32 x[] = { CHACHA20_CONSTANT_EXPA,
+		    CHACHA20_CONSTANT_ND_3,
+		    CHACHA20_CONSTANT_2_BY,
+		    CHACHA20_CONSTANT_TE_K,
+		    get_unaligned_le32(key +  0),
+		    get_unaligned_le32(key +  4),
+		    get_unaligned_le32(key +  8),
+		    get_unaligned_le32(key + 12),
+		    get_unaligned_le32(key + 16),
+		    get_unaligned_le32(key + 20),
+		    get_unaligned_le32(key + 24),
+		    get_unaligned_le32(key + 28),
+		    get_unaligned_le32(nonce +  0),
+		    get_unaligned_le32(nonce +  4),
+		    get_unaligned_le32(nonce +  8),
+		    get_unaligned_le32(nonce + 12)
+	};
+
+	TWENTY_ROUNDS(x);
+
+	memcpy(derived_key + 0, x +  0, sizeof(u32) * 4);
+	memcpy(derived_key + 4, x + 12, sizeof(u32) * 4);
+}
+
+/* Derived key should be 32-bit aligned */
+void hchacha20(u32 derived_key[CHACHA20_KEY_WORDS],
+	       const u8 nonce[HCHACHA20_NONCE_SIZE],
+	       const u8 key[HCHACHA20_KEY_SIZE], simd_context_t *simd_context)
+{
+	if (!hchacha20_arch(derived_key, nonce, key, simd_context))
+		hchacha20_generic(derived_key, nonce, key);
+}
+EXPORT_SYMBOL(hchacha20);
+
+#include "../selftest/chacha20.c"
+
+static bool nosimd __initdata = false;
+
+static int __init mod_init(void)
+{
+	if (!nosimd)
+		chacha20_fpu_init();
+	if (!selftest_run("chacha20", chacha20_selftest, chacha20_nobs,
+			  ARRAY_SIZE(chacha20_nobs)))
+		return -ENOTRECOVERABLE;
+	return 0;
+}
+
+static void __exit mod_exit(void)
+{
+}
+
+module_param(nosimd, bool, 0);
+module_init(mod_init);
+module_exit(mod_exit);
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("ChaCha20 stream cipher");
+MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");
diff --git a/lib/zinc/selftest/chacha20.c b/lib/zinc/selftest/chacha20.c
new file mode 100644
index 000000000000..b8c9c709071d
--- /dev/null
+++ b/lib/zinc/selftest/chacha20.c
@@ -0,0 +1,2698 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright (C) 2015-2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+struct chacha20_testvec {
+	const u8 *input, *output, *key;
+	u64 nonce;
+	size_t ilen;
+};
+
+struct hchacha20_testvec {
+	u8 key[HCHACHA20_KEY_SIZE];
+	u8 nonce[HCHACHA20_NONCE_SIZE];
+	u8 output[CHACHA20_KEY_SIZE];
+};
+
+/* These test vectors are generated by reference implementations and are
+ * designed to check chacha20 implementation block handling, as well as from
+ * the draft-arciszewski-xchacha-01 document.
+ */
+
+static const u8 input01[] __initconst = { };
+static const u8 output01[] __initconst = { };
+static const u8 key01[] __initconst = {
+	0x09, 0xf4, 0xe8, 0x57, 0x10, 0xf2, 0x12, 0xc3,
+	0xc6, 0x91, 0xc4, 0x09, 0x97, 0x46, 0xef, 0xfe,
+	0x02, 0x00, 0xe4, 0x5c, 0x82, 0xed, 0x16, 0xf3,
+	0x32, 0xbe, 0xec, 0x7a, 0xe6, 0x68, 0x12, 0x26
+};
+enum { nonce01 = 0x3834e2afca3c66d3ULL };
+
+static const u8 input02[] __initconst = {
+	0x9d
+};
+static const u8 output02[] __initconst = {
+	0x94
+};
+static const u8 key02[] __initconst = {
+	0x8c, 0x01, 0xac, 0xaf, 0x62, 0x63, 0x56, 0x7a,
+	0xad, 0x23, 0x4c, 0x58, 0x29, 0x29, 0xbe, 0xab,
+	0xe9, 0xf8, 0xdf, 0x6c, 0x8c, 0x74, 0x4d, 0x7d,
+	0x13, 0x94, 0x10, 0x02, 0x3d, 0x8e, 0x9f, 0x94
+};
+enum { nonce02 = 0x5d1b3bfdedd9f73aULL };
+
+static const u8 input03[] __initconst = {
+	0x04, 0x16
+};
+static const u8 output03[] __initconst = {
+	0x92, 0x07
+};
+static const u8 key03[] __initconst = {
+	0x22, 0x0c, 0x79, 0x2c, 0x38, 0x51, 0xbe, 0x99,
+	0xa9, 0x59, 0x24, 0x50, 0xef, 0x87, 0x38, 0xa6,
+	0xa0, 0x97, 0x20, 0xcb, 0xb4, 0x0c, 0x94, 0x67,
+	0x1f, 0x98, 0xdc, 0xc4, 0x83, 0xbc, 0x35, 0x4d
+};
+enum { nonce03 = 0x7a3353ad720a3e2eULL };
+
+static const u8 input04[] __initconst = {
+	0xc7, 0xcc, 0xd0
+};
+static const u8 output04[] __initconst = {
+	0xd8, 0x41, 0x80
+};
+static const u8 key04[] __initconst = {
+	0x81, 0x5e, 0x12, 0x01, 0xc4, 0x36, 0x15, 0x03,
+	0x11, 0xa0, 0xe9, 0x86, 0xbb, 0x5a, 0xdc, 0x45,
+	0x7d, 0x5e, 0x98, 0xf8, 0x06, 0x76, 0x1c, 0xec,
+	0xc0, 0xf7, 0xca, 0x4e, 0x99, 0xd9, 0x42, 0x38
+};
+enum { nonce04 = 0x6816e2fc66176da2ULL };
+
+static const u8 input05[] __initconst = {
+	0x48, 0xf1, 0x31, 0x5f
+};
+static const u8 output05[] __initconst = {
+	0x48, 0xf7, 0x13, 0x67
+};
+static const u8 key05[] __initconst = {
+	0x3f, 0xd6, 0xb6, 0x5e, 0x2f, 0xda, 0x82, 0x39,
+	0x97, 0x06, 0xd3, 0x62, 0x4f, 0xbd, 0xcb, 0x9b,
+	0x1d, 0xe6, 0x4a, 0x76, 0xab, 0xdd, 0x14, 0x50,
+	0x59, 0x21, 0xe3, 0xb2, 0xc7, 0x95, 0xbc, 0x45
+};
+enum { nonce05 = 0xc41a7490e228cc42ULL };
+
+static const u8 input06[] __initconst = {
+	0xae, 0xa2, 0x85, 0x1d, 0xc8
+};
+static const u8 output06[] __initconst = {
+	0xfa, 0xff, 0x45, 0x6b, 0x6f
+};
+static const u8 key06[] __initconst = {
+	0x04, 0x8d, 0xea, 0x67, 0x20, 0x78, 0xfb, 0x8f,
+	0x49, 0x80, 0x35, 0xb5, 0x7b, 0xe4, 0x31, 0x74,
+	0x57, 0x43, 0x3a, 0x64, 0x64, 0xb9, 0xe6, 0x23,
+	0x4d, 0xfe, 0xb8, 0x7b, 0x71, 0x4d, 0x9d, 0x21
+};
+enum { nonce06 = 0x251366db50b10903ULL };
+
+static const u8 input07[] __initconst = {
+	0x1a, 0x32, 0x85, 0xb6, 0xe8, 0x52
+};
+static const u8 output07[] __initconst = {
+	0xd3, 0x5f, 0xf0, 0x07, 0x69, 0xec
+};
+static const u8 key07[] __initconst = {
+	0xbf, 0x2d, 0x42, 0x99, 0x97, 0x76, 0x04, 0xad,
+	0xd3, 0x8f, 0x6e, 0x6a, 0x34, 0x85, 0xaf, 0x81,
+	0xef, 0x36, 0x33, 0xd5, 0x43, 0xa2, 0xaa, 0x08,
+	0x0f, 0x77, 0x42, 0x83, 0x58, 0xc5, 0x42, 0x2a
+};
+enum { nonce07 = 0xe0796da17dba9b58ULL };
+
+static const u8 input08[] __initconst = {
+	0x40, 0xae, 0xcd, 0xe4, 0x3d, 0x22, 0xe0
+};
+static const u8 output08[] __initconst = {
+	0xfd, 0x8a, 0x9f, 0x3d, 0x05, 0xc9, 0xd3
+};
+static const u8 key08[] __initconst = {
+	0xdc, 0x3f, 0x41, 0xe3, 0x23, 0x2a, 0x8d, 0xf6,
+	0x41, 0x2a, 0xa7, 0x66, 0x05, 0x68, 0xe4, 0x7b,
+	0xc4, 0x58, 0xd6, 0xcc, 0xdf, 0x0d, 0xc6, 0x25,
+	0x1b, 0x61, 0x32, 0x12, 0x4e, 0xf1, 0xe6, 0x29
+};
+enum { nonce08 = 0xb1d2536d9e159832ULL };
+
+static const u8 input09[] __initconst = {
+	0xba, 0x1d, 0x14, 0x16, 0x9f, 0x83, 0x67, 0x24
+};
+static const u8 output09[] __initconst = {
+	0x7c, 0xe3, 0x78, 0x1d, 0xa2, 0xe7, 0xe9, 0x39
+};
+static const u8 key09[] __initconst = {
+	0x17, 0x55, 0x90, 0x52, 0xa4, 0xce, 0x12, 0xae,
+	0xd4, 0xfd, 0xd4, 0xfb, 0xd5, 0x18, 0x59, 0x50,
+	0x4e, 0x51, 0x99, 0x32, 0x09, 0x31, 0xfc, 0xf7,
+	0x27, 0x10, 0x8e, 0xa2, 0x4b, 0xa5, 0xf5, 0x62
+};
+enum { nonce09 = 0x495fc269536d003ULL };
+
+static const u8 input10[] __initconst = {
+	0x09, 0xfd, 0x3c, 0x0b, 0x3d, 0x0e, 0xf3, 0x9d,
+	0x27
+};
+static const u8 output10[] __initconst = {
+	0xdc, 0xe4, 0x33, 0x60, 0x0c, 0x07, 0xcb, 0x51,
+	0x6b
+};
+static const u8 key10[] __initconst = {
+	0x4e, 0x00, 0x72, 0x37, 0x0f, 0x52, 0x4d, 0x6f,
+	0x37, 0x50, 0x3c, 0xb3, 0x51, 0x81, 0x49, 0x16,
+	0x7e, 0xfd, 0xb1, 0x51, 0x72, 0x2e, 0xe4, 0x16,
+	0x68, 0x5c, 0x5b, 0x8a, 0xc3, 0x90, 0x70, 0x04
+};
+enum { nonce10 = 0x1ad9d1114d88cbbdULL };
+
+static const u8 input11[] __initconst = {
+	0x70, 0x18, 0x52, 0x85, 0xba, 0x66, 0xff, 0x2c,
+	0x9a, 0x46
+};
+static const u8 output11[] __initconst = {
+	0xf5, 0x2a, 0x7a, 0xfd, 0x31, 0x7c, 0x91, 0x41,
+	0xb1, 0xcf
+};
+static const u8 key11[] __initconst = {
+	0x48, 0xb4, 0xd0, 0x7c, 0x88, 0xd1, 0x96, 0x0d,
+	0x80, 0x33, 0xb4, 0xd5, 0x31, 0x9a, 0x88, 0xca,
+	0x14, 0xdc, 0xf0, 0xa8, 0xf3, 0xac, 0xb8, 0x47,
+	0x75, 0x86, 0x7c, 0x88, 0x50, 0x11, 0x43, 0x40
+};
+enum { nonce11 = 0x47c35dd1f4f8aa4fULL };
+
+static const u8 input12[] __initconst = {
+	0x9e, 0x8e, 0x3d, 0x2a, 0x05, 0xfd, 0xe4, 0x90,
+	0x24, 0x1c, 0xd3
+};
+static const u8 output12[] __initconst = {
+	0x97, 0x72, 0x40, 0x9f, 0xc0, 0x6b, 0x05, 0x33,
+	0x42, 0x7e, 0x28
+};
+static const u8 key12[] __initconst = {
+	0xee, 0xff, 0x33, 0x33, 0xe0, 0x28, 0xdf, 0xa2,
+	0xb6, 0x5e, 0x25, 0x09, 0x52, 0xde, 0xa5, 0x9c,
+	0x8f, 0x95, 0xa9, 0x03, 0x77, 0x0f, 0xbe, 0xa1,
+	0xd0, 0x7d, 0x73, 0x2f, 0xf8, 0x7e, 0x51, 0x44
+};
+enum { nonce12 = 0xc22d044dc6ea4af3ULL };
+
+static const u8 input13[] __initconst = {
+	0x9c, 0x16, 0xa2, 0x22, 0x4d, 0xbe, 0x04, 0x9a,
+	0xb3, 0xb5, 0xc6, 0x58
+};
+static const u8 output13[] __initconst = {
+	0xf0, 0x81, 0xdb, 0x6d, 0xa3, 0xe9, 0xb2, 0xc6,
+	0x32, 0x50, 0x16, 0x9f
+};
+static const u8 key13[] __initconst = {
+	0x96, 0xb3, 0x01, 0xd2, 0x7a, 0x8c, 0x94, 0x09,
+	0x4f, 0x58, 0xbe, 0x80, 0xcc, 0xa9, 0x7e, 0x2d,
+	0xad, 0x58, 0x3b, 0x63, 0xb8, 0x5c, 0x17, 0xce,
+	0xbf, 0x43, 0x33, 0x7a, 0x7b, 0x82, 0x28, 0x2f
+};
+enum { nonce13 = 0x2a5d05d88cd7b0daULL };
+
+static const u8 input14[] __initconst = {
+	0x57, 0x4f, 0xaa, 0x30, 0xe6, 0x23, 0x50, 0x86,
+	0x91, 0xa5, 0x60, 0x96, 0x2b
+};
+static const u8 output14[] __initconst = {
+	0x6c, 0x1f, 0x3b, 0x42, 0xb6, 0x2f, 0xf0, 0xbd,
+	0x76, 0x60, 0xc7, 0x7e, 0x8d
+};
+static const u8 key14[] __initconst = {
+	0x22, 0x85, 0xaf, 0x8f, 0xa3, 0x53, 0xa0, 0xc4,
+	0xb5, 0x75, 0xc0, 0xba, 0x30, 0x92, 0xc3, 0x32,
+	0x20, 0x5a, 0x8f, 0x7e, 0x93, 0xda, 0x65, 0x18,
+	0xd1, 0xf6, 0x9a, 0x9b, 0x8f, 0x85, 0x30, 0xe6
+};
+enum { nonce14 = 0xf9946c166aa4475fULL };
+
+static const u8 input15[] __initconst = {
+	0x89, 0x81, 0xc7, 0xe2, 0x00, 0xac, 0x52, 0x70,
+	0xa4, 0x79, 0xab, 0xeb, 0x74, 0xf7
+};
+static const u8 output15[] __initconst = {
+	0xb4, 0xd0, 0xa9, 0x9d, 0x15, 0x5f, 0x48, 0xd6,
+	0x00, 0x7e, 0x4c, 0x77, 0x5a, 0x46
+};
+static const u8 key15[] __initconst = {
+	0x0a, 0x66, 0x36, 0xca, 0x5d, 0x82, 0x23, 0xb6,
+	0xe4, 0x9b, 0xad, 0x5e, 0xd0, 0x7f, 0xf6, 0x7a,
+	0x7b, 0x03, 0xa7, 0x4c, 0xfd, 0xec, 0xd5, 0xa1,
+	0xfc, 0x25, 0x54, 0xda, 0x5a, 0x5c, 0xf0, 0x2c
+};
+enum { nonce15 = 0x9ab2b87a35e772c8ULL };
+
+static const u8 input16[] __initconst = {
+	0x5f, 0x09, 0xc0, 0x8b, 0x1e, 0xde, 0xca, 0xd9,
+	0xb7, 0x5c, 0x23, 0xc9, 0x55, 0x1e, 0xcf
+};
+static const u8 output16[] __initconst = {
+	0x76, 0x9b, 0x53, 0xf3, 0x66, 0x88, 0x28, 0x60,
+	0x98, 0x80, 0x2c, 0xa8, 0x80, 0xa6, 0x48
+};
+static const u8 key16[] __initconst = {
+	0x80, 0xb5, 0x51, 0xdf, 0x17, 0x5b, 0xb0, 0xef,
+	0x8b, 0x5b, 0x2e, 0x3e, 0xc5, 0xe3, 0xa5, 0x86,
+	0xac, 0x0d, 0x8e, 0x32, 0x90, 0x9d, 0x82, 0x27,
+	0xf1, 0x23, 0x26, 0xc3, 0xea, 0x55, 0xb6, 0x63
+};
+enum { nonce16 = 0xa82e9d39e4d02ef5ULL };
+
+static const u8 input17[] __initconst = {
+	0x87, 0x0b, 0x36, 0x71, 0x7c, 0xb9, 0x0b, 0x80,
+	0x4d, 0x77, 0x5c, 0x4f, 0xf5, 0x51, 0x0e, 0x1a
+};
+static const u8 output17[] __initconst = {
+	0xf1, 0x12, 0x4a, 0x8a, 0xd9, 0xd0, 0x08, 0x67,
+	0x66, 0xd7, 0x34, 0xea, 0x32, 0x3b, 0x54, 0x0e
+};
+static const u8 key17[] __initconst = {
+	0xfb, 0x71, 0x5f, 0x3f, 0x7a, 0xc0, 0x9a, 0xc8,
+	0xc8, 0xcf, 0xe8, 0xbc, 0xfb, 0x09, 0xbf, 0x89,
+	0x6a, 0xef, 0xd5, 0xe5, 0x36, 0x87, 0x14, 0x76,
+	0x00, 0xb9, 0x32, 0x28, 0xb2, 0x00, 0x42, 0x53
+};
+enum { nonce17 = 0x229b87e73d557b96ULL };
+
+static const u8 input18[] __initconst = {
+	0x38, 0x42, 0xb5, 0x37, 0xb4, 0x3d, 0xfe, 0x59,
+	0x38, 0x68, 0x88, 0xfa, 0x89, 0x8a, 0x5f, 0x90,
+	0x3c
+};
+static const u8 output18[] __initconst = {
+	0xac, 0xad, 0x14, 0xe8, 0x7e, 0xd7, 0xce, 0x96,
+	0x3d, 0xb3, 0x78, 0x85, 0x22, 0x5a, 0xcb, 0x39,
+	0xd4
+};
+static const u8 key18[] __initconst = {
+	0xe1, 0xc1, 0xa8, 0xe0, 0x91, 0xe7, 0x38, 0x66,
+	0x80, 0x17, 0x12, 0x3c, 0x5e, 0x2d, 0xbb, 0xea,
+	0xeb, 0x6c, 0x8b, 0xc8, 0x1b, 0x6f, 0x7c, 0xea,
+	0x50, 0x57, 0x23, 0x1e, 0x65, 0x6f, 0x6d, 0x81
+};
+enum { nonce18 = 0xfaf5fcf8f30e57a9ULL };
+
+static const u8 input19[] __initconst = {
+	0x1c, 0x4a, 0x30, 0x26, 0xef, 0x9a, 0x32, 0xa7,
+	0x8f, 0xe5, 0xc0, 0x0f, 0x30, 0x3a, 0xbf, 0x38,
+	0x54, 0xba
+};
+static const u8 output19[] __initconst = {
+	0x57, 0x67, 0x54, 0x4f, 0x31, 0xd6, 0xef, 0x35,
+	0x0b, 0xd9, 0x52, 0xa7, 0x46, 0x7d, 0x12, 0x17,
+	0x1e, 0xe3
+};
+static const u8 key19[] __initconst = {
+	0x5a, 0x79, 0xc1, 0xea, 0x33, 0xb3, 0xc7, 0x21,
+	0xec, 0xf8, 0xcb, 0xd2, 0x58, 0x96, 0x23, 0xd6,
+	0x4d, 0xed, 0x2f, 0xdf, 0x8a, 0x79, 0xe6, 0x8b,
+	0x38, 0xa3, 0xc3, 0x7a, 0x33, 0xda, 0x02, 0xc7
+};
+enum { nonce19 = 0x2b23b61840429604ULL };
+
+static const u8 input20[] __initconst = {
+	0xab, 0xe9, 0x32, 0xbb, 0x35, 0x17, 0xe0, 0x60,
+	0x80, 0xb1, 0x27, 0xdc, 0xe6, 0x62, 0x9e, 0x0c,
+	0x77, 0xf4, 0x50
+};
+static const u8 output20[] __initconst = {
+	0x54, 0x6d, 0xaa, 0xfc, 0x08, 0xfb, 0x71, 0xa8,
+	0xd6, 0x1d, 0x7d, 0xf3, 0x45, 0x10, 0xb5, 0x4c,
+	0xcc, 0x4b, 0x45
+};
+static const u8 key20[] __initconst = {
+	0xa3, 0xfd, 0x3d, 0xa9, 0xeb, 0xea, 0x2c, 0x69,
+	0xcf, 0x59, 0x38, 0x13, 0x5b, 0xa7, 0x53, 0x8f,
+	0x5e, 0xa2, 0x33, 0x86, 0x4c, 0x75, 0x26, 0xaf,
+	0x35, 0x12, 0x09, 0x71, 0x81, 0xea, 0x88, 0x66
+};
+enum { nonce20 = 0x7459667a8fadff58ULL };
+
+static const u8 input21[] __initconst = {
+	0xa6, 0x82, 0x21, 0x23, 0xad, 0x27, 0x3f, 0xc6,
+	0xd7, 0x16, 0x0d, 0x6d, 0x24, 0x15, 0x54, 0xc5,
+	0x96, 0x72, 0x59, 0x8a
+};
+static const u8 output21[] __initconst = {
+	0x5f, 0x34, 0x32, 0xea, 0x06, 0xd4, 0x9e, 0x01,
+	0xdc, 0x32, 0x32, 0x40, 0x66, 0x73, 0x6d, 0x4a,
+	0x6b, 0x12, 0x20, 0xe8
+};
+static const u8 key21[] __initconst = {
+	0x96, 0xfd, 0x13, 0x23, 0xa9, 0x89, 0x04, 0xe6,
+	0x31, 0xa5, 0x2c, 0xc1, 0x40, 0xd5, 0x69, 0x5c,
+	0x32, 0x79, 0x56, 0xe0, 0x29, 0x93, 0x8f, 0xe8,
+	0x5f, 0x65, 0x53, 0x7f, 0xc1, 0xe9, 0xaf, 0xaf
+};
+enum { nonce21 = 0xba8defee9d8e13b5ULL };
+
+static const u8 input22[] __initconst = {
+	0xb8, 0x32, 0x1a, 0x81, 0xd8, 0x38, 0x89, 0x5a,
+	0xb0, 0x05, 0xbe, 0xf4, 0xd2, 0x08, 0xc6, 0xee,
+	0x79, 0x7b, 0x3a, 0x76, 0x59
+};
+static const u8 output22[] __initconst = {
+	0xb7, 0xba, 0xae, 0x80, 0xe4, 0x9f, 0x79, 0x84,
+	0x5a, 0x48, 0x50, 0x6d, 0xcb, 0xd0, 0x06, 0x0c,
+	0x15, 0x63, 0xa7, 0x5e, 0xbd
+};
+static const u8 key22[] __initconst = {
+	0x0f, 0x35, 0x3d, 0xeb, 0x5f, 0x0a, 0x82, 0x0d,
+	0x24, 0x59, 0x71, 0xd8, 0xe6, 0x2d, 0x5f, 0xe1,
+	0x7e, 0x0c, 0xae, 0xf6, 0xdc, 0x2c, 0xc5, 0x4a,
+	0x38, 0x88, 0xf2, 0xde, 0xd9, 0x5f, 0x76, 0x7c
+};
+enum { nonce22 = 0xe77f1760e9f5e192ULL };
+
+static const u8 input23[] __initconst = {
+	0x4b, 0x1e, 0x79, 0x99, 0xcf, 0xef, 0x64, 0x4b,
+	0xb0, 0x66, 0xae, 0x99, 0x2e, 0x68, 0x97, 0xf5,
+	0x5d, 0x9b, 0x3f, 0x7a, 0xa9, 0xd9
+};
+static const u8 output23[] __initconst = {
+	0x5f, 0xa4, 0x08, 0x39, 0xca, 0xfa, 0x2b, 0x83,
+	0x5d, 0x95, 0x70, 0x7c, 0x2e, 0xd4, 0xae, 0xfa,
+	0x45, 0x4a, 0x77, 0x7f, 0xa7, 0x65
+};
+static const u8 key23[] __initconst = {
+	0x4a, 0x06, 0x83, 0x64, 0xaa, 0xe3, 0x38, 0x32,
+	0x28, 0x5d, 0xa4, 0xb2, 0x5a, 0xee, 0xcf, 0x8e,
+	0x19, 0x67, 0xf1, 0x09, 0xe8, 0xc9, 0xf6, 0x40,
+	0x02, 0x6d, 0x0b, 0xde, 0xfa, 0x81, 0x03, 0xb1
+};
+enum { nonce23 = 0x9b3f349158709849ULL };
+
+static const u8 input24[] __initconst = {
+	0xc6, 0xfc, 0x47, 0x5e, 0xd8, 0xed, 0xa9, 0xe5,
+	0x4f, 0x82, 0x79, 0x35, 0xee, 0x3e, 0x7e, 0x3e,
+	0x35, 0x70, 0x6e, 0xfa, 0x6d, 0x08, 0xe8
+};
+static const u8 output24[] __initconst = {
+	0x3b, 0xc5, 0xf8, 0xc2, 0xbf, 0x2b, 0x90, 0x33,
+	0xa6, 0xae, 0xf5, 0x5a, 0x65, 0xb3, 0x3d, 0xe1,
+	0xcd, 0x5f, 0x55, 0xfa, 0xe7, 0xa5, 0x4a
+};
+static const u8 key24[] __initconst = {
+	0x00, 0x24, 0xc3, 0x65, 0x5f, 0xe6, 0x31, 0xbb,
+	0x6d, 0xfc, 0x20, 0x7b, 0x1b, 0xa8, 0x96, 0x26,
+	0x55, 0x21, 0x62, 0x25, 0x7e, 0xba, 0x23, 0x97,
+	0xc9, 0xb8, 0x53, 0xa8, 0xef, 0xab, 0xad, 0x61
+};
+enum { nonce24 = 0x13ee0b8f526177c3ULL };
+
+static const u8 input25[] __initconst = {
+	0x33, 0x07, 0x16, 0xb1, 0x34, 0x33, 0x67, 0x04,
+	0x9b, 0x0a, 0xce, 0x1b, 0xe9, 0xde, 0x1a, 0xec,
+	0xd0, 0x55, 0xfb, 0xc6, 0x33, 0xaf, 0x2d, 0xe3
+};
+static const u8 output25[] __initconst = {
+	0x05, 0x93, 0x10, 0xd1, 0x58, 0x6f, 0x68, 0x62,
+	0x45, 0xdb, 0x91, 0xae, 0x70, 0xcf, 0xd4, 0x5f,
+	0xee, 0xdf, 0xd5, 0xba, 0x9e, 0xde, 0x68, 0xe6
+};
+static const u8 key25[] __initconst = {
+	0x83, 0xa9, 0x4f, 0x5d, 0x74, 0xd5, 0x91, 0xb3,
+	0xc9, 0x97, 0x19, 0x15, 0xdb, 0x0d, 0x0b, 0x4a,
+	0x3d, 0x55, 0xcf, 0xab, 0xb2, 0x05, 0x21, 0x35,
+	0x45, 0x50, 0xeb, 0xf8, 0xf5, 0xbf, 0x36, 0x35
+};
+enum { nonce25 = 0x7c6f459e49ebfebcULL };
+
+static const u8 input26[] __initconst = {
+	0xc2, 0xd4, 0x7a, 0xa3, 0x92, 0xe1, 0xac, 0x46,
+	0x1a, 0x15, 0x38, 0xc9, 0xb5, 0xfd, 0xdf, 0x84,
+	0x38, 0xbc, 0x6b, 0x1d, 0xb0, 0x83, 0x43, 0x04,
+	0x39
+};
+static const u8 output26[] __initconst = {
+	0x7f, 0xde, 0xd6, 0x87, 0xcc, 0x34, 0xf4, 0x12,
+	0xae, 0x55, 0xa5, 0x89, 0x95, 0x29, 0xfc, 0x18,
+	0xd8, 0xc7, 0x7c, 0xd3, 0xcb, 0x85, 0x95, 0x21,
+	0xd2
+};
+static const u8 key26[] __initconst = {
+	0xe4, 0xd0, 0x54, 0x1d, 0x7d, 0x47, 0xa8, 0xc1,
+	0x08, 0xca, 0xe2, 0x42, 0x52, 0x95, 0x16, 0x43,
+	0xa3, 0x01, 0x23, 0x03, 0xcc, 0x3b, 0x81, 0x78,
+	0x23, 0xcc, 0xa7, 0x36, 0xd7, 0xa0, 0x97, 0x8d
+};
+enum { nonce26 = 0x524401012231683ULL };
+
+static const u8 input27[] __initconst = {
+	0x0d, 0xb0, 0xcf, 0xec, 0xfc, 0x38, 0x9d, 0x9d,
+	0x89, 0x00, 0x96, 0xf2, 0x79, 0x8a, 0xa1, 0x8d,
+	0x32, 0x5e, 0xc6, 0x12, 0x22, 0xec, 0xf6, 0x52,
+	0xc1, 0x0b
+};
+static const u8 output27[] __initconst = {
+	0xef, 0xe1, 0xf2, 0x67, 0x8e, 0x2c, 0x00, 0x9f,
+	0x1d, 0x4c, 0x66, 0x1f, 0x94, 0x58, 0xdc, 0xbb,
+	0xb9, 0x11, 0x8f, 0x74, 0xfd, 0x0e, 0x14, 0x01,
+	0xa8, 0x21
+};
+static const u8 key27[] __initconst = {
+	0x78, 0x71, 0xa4, 0xe6, 0xb2, 0x95, 0x44, 0x12,
+	0x81, 0xaa, 0x7e, 0x94, 0xa7, 0x8d, 0x44, 0xea,
+	0xc4, 0xbc, 0x01, 0xb7, 0x9e, 0xf7, 0x82, 0x9e,
+	0x3b, 0x23, 0x9f, 0x31, 0xdd, 0xb8, 0x0d, 0x18
+};
+enum { nonce27 = 0xd58fe0e58fb254d6ULL };
+
+static const u8 input28[] __initconst = {
+	0xaa, 0xb7, 0xaa, 0xd9, 0xa8, 0x91, 0xd7, 0x8a,
+	0x97, 0x9b, 0xdb, 0x7c, 0x47, 0x2b, 0xdb, 0xd2,
+	0xda, 0x77, 0xb1, 0xfa, 0x2d, 0x12, 0xe3, 0xe9,
+	0xc4, 0x7f, 0x54
+};
+static const u8 output28[] __initconst = {
+	0x87, 0x84, 0xa9, 0xa6, 0xad, 0x8f, 0xe6, 0x0f,
+	0x69, 0xf8, 0x21, 0xc3, 0x54, 0x95, 0x0f, 0xb0,
+	0x4e, 0xc7, 0x02, 0xe4, 0x04, 0xb0, 0x6c, 0x42,
+	0x8c, 0x63, 0xe3
+};
+static const u8 key28[] __initconst = {
+	0x12, 0x23, 0x37, 0x95, 0x04, 0xb4, 0x21, 0xe8,
+	0xbc, 0x65, 0x46, 0x7a, 0xf4, 0x01, 0x05, 0x3f,
+	0xb1, 0x34, 0x73, 0xd2, 0x49, 0xbf, 0x6f, 0x20,
+	0xbd, 0x23, 0x58, 0x5f, 0xd1, 0x73, 0x57, 0xa6
+};
+enum { nonce28 = 0x3a04d51491eb4e07ULL };
+
+static const u8 input29[] __initconst = {
+	0x55, 0xd0, 0xd4, 0x4b, 0x17, 0xc8, 0xc4, 0x2b,
+	0xc0, 0x28, 0xbd, 0x9d, 0x65, 0x4d, 0xaf, 0x77,
+	0x72, 0x7c, 0x36, 0x68, 0xa7, 0xb6, 0x87, 0x4d,
+	0xb9, 0x27, 0x25, 0x6c
+};
+static const u8 output29[] __initconst = {
+	0x0e, 0xac, 0x4c, 0xf5, 0x12, 0xb5, 0x56, 0xa5,
+	0x00, 0x9a, 0xd6, 0xe5, 0x1a, 0x59, 0x2c, 0xf6,
+	0x42, 0x22, 0xcf, 0x23, 0x98, 0x34, 0x29, 0xac,
+	0x6e, 0xe3, 0x37, 0x6d
+};
+static const u8 key29[] __initconst = {
+	0xda, 0x9d, 0x05, 0x0c, 0x0c, 0xba, 0x75, 0xb9,
+	0x9e, 0xb1, 0x8d, 0xd9, 0x73, 0x26, 0x2c, 0xa9,
+	0x3a, 0xb5, 0xcb, 0x19, 0x49, 0xa7, 0x4f, 0xf7,
+	0x64, 0x35, 0x23, 0x20, 0x2a, 0x45, 0x78, 0xc7
+};
+enum { nonce29 = 0xc25ac9982431cbfULL };
+
+static const u8 input30[] __initconst = {
+	0x4e, 0xd6, 0x85, 0xbb, 0xe7, 0x99, 0xfa, 0x04,
+	0x33, 0x24, 0xfd, 0x75, 0x18, 0xe3, 0xd3, 0x25,
+	0xcd, 0xca, 0xae, 0x00, 0xbe, 0x52, 0x56, 0x4a,
+	0x31, 0xe9, 0x4f, 0xae, 0x8a
+};
+static const u8 output30[] __initconst = {
+	0x30, 0x36, 0x32, 0xa2, 0x3c, 0xb6, 0xf9, 0xf9,
+	0x76, 0x70, 0xad, 0xa6, 0x10, 0x41, 0x00, 0x4a,
+	0xfa, 0xce, 0x1b, 0x86, 0x05, 0xdb, 0x77, 0x96,
+	0xb3, 0xb7, 0x8f, 0x61, 0x24
+};
+static const u8 key30[] __initconst = {
+	0x49, 0x35, 0x4c, 0x15, 0x98, 0xfb, 0xc6, 0x57,
+	0x62, 0x6d, 0x06, 0xc3, 0xd4, 0x79, 0x20, 0x96,
+	0x05, 0x2a, 0x31, 0x63, 0xc0, 0x44, 0x42, 0x09,
+	0x13, 0x13, 0xff, 0x1b, 0xc8, 0x63, 0x1f, 0x0b
+};
+enum { nonce30 = 0x4967f9c08e41568bULL };
+
+static const u8 input31[] __initconst = {
+	0x91, 0x04, 0x20, 0x47, 0x59, 0xee, 0xa6, 0x0f,
+	0x04, 0x75, 0xc8, 0x18, 0x95, 0x44, 0x01, 0x28,
+	0x20, 0x6f, 0x73, 0x68, 0x66, 0xb5, 0x03, 0xb3,
+	0x58, 0x27, 0x6e, 0x7a, 0x76, 0xb8
+};
+static const u8 output31[] __initconst = {
+	0xe8, 0x03, 0x78, 0x9d, 0x13, 0x15, 0x98, 0xef,
+	0x64, 0x68, 0x12, 0x41, 0xb0, 0x29, 0x94, 0x0c,
+	0x83, 0x35, 0x46, 0xa9, 0x74, 0xe1, 0x75, 0xf0,
+	0xb6, 0x96, 0xc3, 0x6f, 0xd7, 0x70
+};
+static const u8 key31[] __initconst = {
+	0xef, 0xcd, 0x5a, 0x4a, 0xf4, 0x7e, 0x6a, 0x3a,
+	0x11, 0x88, 0x72, 0x94, 0xb8, 0xae, 0x84, 0xc3,
+	0x66, 0xe0, 0xde, 0x4b, 0x00, 0xa5, 0xd6, 0x2d,
+	0x50, 0xb7, 0x28, 0xff, 0x76, 0x57, 0x18, 0x1f
+};
+enum { nonce31 = 0xcb6f428fa4192e19ULL };
+
+static const u8 input32[] __initconst = {
+	0x90, 0x06, 0x50, 0x4b, 0x98, 0x14, 0x30, 0xf1,
+	0xb8, 0xd7, 0xf0, 0xa4, 0x3e, 0x4e, 0xd8, 0x00,
+	0xea, 0xdb, 0x4f, 0x93, 0x05, 0xef, 0x02, 0x71,
+	0x1a, 0xcd, 0xa3, 0xb1, 0xae, 0xd3, 0x18
+};
+static const u8 output32[] __initconst = {
+	0xcb, 0x4a, 0x37, 0x3f, 0xea, 0x40, 0xab, 0x86,
+	0xfe, 0xcc, 0x07, 0xd5, 0xdc, 0xb2, 0x25, 0xb6,
+	0xfd, 0x2a, 0x72, 0xbc, 0x5e, 0xd4, 0x75, 0xff,
+	0x71, 0xfc, 0xce, 0x1e, 0x6f, 0x22, 0xc1
+};
+static const u8 key32[] __initconst = {
+	0xfc, 0x6d, 0xc3, 0x80, 0xce, 0xa4, 0x31, 0xa1,
+	0xcc, 0xfa, 0x9d, 0x10, 0x0b, 0xc9, 0x11, 0x77,
+	0x34, 0xdb, 0xad, 0x1b, 0xc4, 0xfc, 0xeb, 0x79,
+	0x91, 0xda, 0x59, 0x3b, 0x0d, 0xb1, 0x19, 0x3b
+};
+enum { nonce32 = 0x88551bf050059467ULL };
+
+static const u8 input33[] __initconst = {
+	0x88, 0x94, 0x71, 0x92, 0xe8, 0xd7, 0xf9, 0xbd,
+	0x55, 0xe3, 0x22, 0xdb, 0x99, 0x51, 0xfb, 0x50,
+	0xbf, 0x82, 0xb5, 0x70, 0x8b, 0x2b, 0x6a, 0x03,
+	0x37, 0xa0, 0xc6, 0x19, 0x5d, 0xc9, 0xbc, 0xcc
+};
+static const u8 output33[] __initconst = {
+	0xb6, 0x17, 0x51, 0xc8, 0xea, 0x8a, 0x14, 0xdc,
+	0x23, 0x1b, 0xd4, 0xed, 0xbf, 0x50, 0xb9, 0x38,
+	0x00, 0xc2, 0x3f, 0x78, 0x3d, 0xbf, 0xa0, 0x84,
+	0xef, 0x45, 0xb2, 0x7d, 0x48, 0x7b, 0x62, 0xa7
+};
+static const u8 key33[] __initconst = {
+	0xb9, 0x8f, 0x6a, 0xad, 0xb4, 0x6f, 0xb5, 0xdc,
+	0x48, 0xfa, 0x43, 0x57, 0x62, 0x97, 0xef, 0x89,
+	0x4c, 0x5a, 0x7b, 0x67, 0xb8, 0x9d, 0xf0, 0x42,
+	0x2b, 0x8f, 0xf3, 0x18, 0x05, 0x2e, 0x48, 0xd0
+};
+enum { nonce33 = 0x31f16488fe8447f5ULL };
+
+static const u8 input34[] __initconst = {
+	0xda, 0x2b, 0x3d, 0x63, 0x9e, 0x4f, 0xc2, 0xb8,
+	0x7f, 0xc2, 0x1a, 0x8b, 0x0d, 0x95, 0x65, 0x55,
+	0x52, 0xba, 0x51, 0x51, 0xc0, 0x61, 0x9f, 0x0a,
+	0x5d, 0xb0, 0x59, 0x8c, 0x64, 0x6a, 0xab, 0xf5,
+	0x57
+};
+static const u8 output34[] __initconst = {
+	0x5c, 0xf6, 0x62, 0x24, 0x8c, 0x45, 0xa3, 0x26,
+	0xd0, 0xe4, 0x88, 0x1c, 0xed, 0xc4, 0x26, 0x58,
+	0xb5, 0x5d, 0x92, 0xc4, 0x17, 0x44, 0x1c, 0xb8,
+	0x2c, 0xf3, 0x55, 0x7e, 0xd6, 0xe5, 0xb3, 0x65,
+	0xa8
+};
+static const u8 key34[] __initconst = {
+	0xde, 0xd1, 0x27, 0xb7, 0x7c, 0xfa, 0xa6, 0x78,
+	0x39, 0x80, 0xdf, 0xb7, 0x46, 0xac, 0x71, 0x26,
+	0xd0, 0x2a, 0x56, 0x79, 0x12, 0xeb, 0x26, 0x37,
+	0x01, 0x0d, 0x30, 0xe0, 0xe3, 0x66, 0xb2, 0xf4
+};
+enum { nonce34 = 0x92d0d9b252c24149ULL };
+
+static const u8 input35[] __initconst = {
+	0x3a, 0x15, 0x5b, 0x75, 0x6e, 0xd0, 0x52, 0x20,
+	0x6c, 0x82, 0xfa, 0xce, 0x5b, 0xea, 0xf5, 0x43,
+	0xc1, 0x81, 0x7c, 0xb2, 0xac, 0x16, 0x3f, 0xd3,
+	0x5a, 0xaf, 0x55, 0x98, 0xf4, 0xc6, 0xba, 0x71,
+	0x25, 0x8b
+};
+static const u8 output35[] __initconst = {
+	0xb3, 0xaf, 0xac, 0x6d, 0x4d, 0xc7, 0x68, 0x56,
+	0x50, 0x5b, 0x69, 0x2a, 0xe5, 0x90, 0xf9, 0x5f,
+	0x99, 0x88, 0xff, 0x0c, 0xa6, 0xb1, 0x83, 0xd6,
+	0x80, 0xa6, 0x1b, 0xde, 0x94, 0xa4, 0x2c, 0xc3,
+	0x74, 0xfa
+};
+static const u8 key35[] __initconst = {
+	0xd8, 0x24, 0xe2, 0x06, 0xd7, 0x7a, 0xce, 0x81,
+	0x52, 0x72, 0x02, 0x69, 0x89, 0xc4, 0xe9, 0x53,
+	0x3b, 0x08, 0x5f, 0x98, 0x1e, 0x1b, 0x99, 0x6e,
+	0x28, 0x17, 0x6d, 0xba, 0xc0, 0x96, 0xf9, 0x3c
+};
+enum { nonce35 = 0x7baf968c4c8e3a37ULL };
+
+static const u8 input36[] __initconst = {
+	0x31, 0x5d, 0x4f, 0xe3, 0xac, 0xad, 0x17, 0xa6,
+	0xb5, 0x01, 0xe2, 0xc6, 0xd4, 0x7e, 0xc4, 0x80,
+	0xc0, 0x59, 0x72, 0xbb, 0x4b, 0x74, 0x6a, 0x41,
+	0x0f, 0x9c, 0xf6, 0xca, 0x20, 0xb3, 0x73, 0x07,
+	0x6b, 0x02, 0x2a
+};
+static const u8 output36[] __initconst = {
+	0xf9, 0x09, 0x92, 0x94, 0x7e, 0x31, 0xf7, 0x53,
+	0xe8, 0x8a, 0x5b, 0x20, 0xef, 0x9b, 0x45, 0x81,
+	0xba, 0x5e, 0x45, 0x63, 0xc1, 0xc7, 0x9e, 0x06,
+	0x0e, 0xd9, 0x62, 0x8e, 0x96, 0xf9, 0xfa, 0x43,
+	0x4d, 0xd4, 0x28
+};
+static const u8 key36[] __initconst = {
+	0x13, 0x30, 0x4c, 0x06, 0xae, 0x18, 0xde, 0x03,
+	0x1d, 0x02, 0x40, 0xf5, 0xbb, 0x19, 0xe3, 0x88,
+	0x41, 0xb1, 0x29, 0x15, 0x97, 0xc2, 0x69, 0x3f,
+	0x32, 0x2a, 0x0c, 0x8b, 0xcf, 0x83, 0x8b, 0x6c
+};
+enum { nonce36 = 0x226d251d475075a0ULL };
+
+static const u8 input37[] __initconst = {
+	0x10, 0x18, 0xbe, 0xfd, 0x66, 0xc9, 0x77, 0xcc,
+	0x43, 0xe5, 0x46, 0x0b, 0x08, 0x8b, 0xae, 0x11,
+	0x86, 0x15, 0xc2, 0xf6, 0x45, 0xd4, 0x5f, 0xd6,
+	0xb6, 0x5f, 0x9f, 0x3e, 0x97, 0xb7, 0xd4, 0xad,
+	0x0b, 0xe8, 0x31, 0x94
+};
+static const u8 output37[] __initconst = {
+	0x03, 0x2c, 0x1c, 0xee, 0xc6, 0xdd, 0xed, 0x38,
+	0x80, 0x6d, 0x84, 0x16, 0xc3, 0xc2, 0x04, 0x63,
+	0xcd, 0xa7, 0x6e, 0x36, 0x8b, 0xed, 0x78, 0x63,
+	0x95, 0xfc, 0x69, 0x7a, 0x3f, 0x8d, 0x75, 0x6b,
+	0x6c, 0x26, 0x56, 0x4d
+};
+static const u8 key37[] __initconst = {
+	0xac, 0x84, 0x4d, 0xa9, 0x29, 0x49, 0x3c, 0x39,
+	0x7f, 0xd9, 0xa6, 0x01, 0xf3, 0x7e, 0xfa, 0x4a,
+	0x14, 0x80, 0x22, 0x74, 0xf0, 0x29, 0x30, 0x2d,
+	0x07, 0x21, 0xda, 0xc0, 0x4d, 0x70, 0x56, 0xa2
+};
+enum { nonce37 = 0x167823ce3b64925aULL };
+
+static const u8 input38[] __initconst = {
+	0x30, 0x8f, 0xfa, 0x24, 0x29, 0xb1, 0xfb, 0xce,
+	0x31, 0x62, 0xdc, 0xd0, 0x46, 0xab, 0xe1, 0x31,
+	0xd9, 0xae, 0x60, 0x0d, 0xca, 0x0a, 0x49, 0x12,
+	0x3d, 0x92, 0xe9, 0x91, 0x67, 0x12, 0x62, 0x18,
+	0x89, 0xe2, 0xf9, 0x1c, 0xcc
+};
+static const u8 output38[] __initconst = {
+	0x56, 0x9c, 0xc8, 0x7a, 0xc5, 0x98, 0xa3, 0x0f,
+	0xba, 0xd5, 0x3e, 0xe1, 0xc9, 0x33, 0x64, 0x33,
+	0xf0, 0xd5, 0xf7, 0x43, 0x66, 0x0e, 0x08, 0x9a,
+	0x6e, 0x09, 0xe4, 0x01, 0x0d, 0x1e, 0x2f, 0x4b,
+	0xed, 0x9c, 0x08, 0x8c, 0x03
+};
+static const u8 key38[] __initconst = {
+	0x77, 0x52, 0x2a, 0x23, 0xf1, 0xc5, 0x96, 0x2b,
+	0x89, 0x4f, 0x3e, 0xf3, 0xff, 0x0e, 0x94, 0xce,
+	0xf1, 0xbd, 0x53, 0xf5, 0x77, 0xd6, 0x9e, 0x47,
+	0x49, 0x3d, 0x16, 0x64, 0xff, 0x95, 0x42, 0x42
+};
+enum { nonce38 = 0xff629d7b82cef357ULL };
+
+static const u8 input39[] __initconst = {
+	0x38, 0x26, 0x27, 0xd0, 0xc2, 0xf5, 0x34, 0xba,
+	0xda, 0x0f, 0x1c, 0x1c, 0x9a, 0x70, 0xe5, 0x8a,
+	0x78, 0x2d, 0x8f, 0x9a, 0xbf, 0x89, 0x6a, 0xfd,
+	0xd4, 0x9c, 0x33, 0xf1, 0xb6, 0x89, 0x16, 0xe3,
+	0x6a, 0x00, 0xfa, 0x3a, 0x0f, 0x26
+};
+static const u8 output39[] __initconst = {
+	0x0f, 0xaf, 0x91, 0x6d, 0x9c, 0x99, 0xa4, 0xf7,
+	0x3b, 0x9d, 0x9a, 0x98, 0xca, 0xbb, 0x50, 0x48,
+	0xee, 0xcb, 0x5d, 0xa1, 0x37, 0x2d, 0x36, 0x09,
+	0x2a, 0xe2, 0x1c, 0x3d, 0x98, 0x40, 0x1c, 0x16,
+	0x56, 0xa7, 0x98, 0xe9, 0x7d, 0x2b
+};
+static const u8 key39[] __initconst = {
+	0x6e, 0x83, 0x15, 0x4d, 0xf8, 0x78, 0xa8, 0x0e,
+	0x71, 0x37, 0xd4, 0x6e, 0x28, 0x5c, 0x06, 0xa1,
+	0x2d, 0x6c, 0x72, 0x7a, 0xfd, 0xf8, 0x65, 0x1a,
+	0xb8, 0xe6, 0x29, 0x7b, 0xe5, 0xb3, 0x23, 0x79
+};
+enum { nonce39 = 0xa4d8c491cf093e9dULL };
+
+static const u8 input40[] __initconst = {
+	0x8f, 0x32, 0x7c, 0x40, 0x37, 0x95, 0x08, 0x00,
+	0x00, 0xfe, 0x2f, 0x95, 0x20, 0x12, 0x40, 0x18,
+	0x5e, 0x7e, 0x5e, 0x99, 0xee, 0x8d, 0x91, 0x7d,
+	0x50, 0x7d, 0x21, 0x45, 0x27, 0xe1, 0x7f, 0xd4,
+	0x73, 0x10, 0xe1, 0x33, 0xbc, 0xf8, 0xdd
+};
+static const u8 output40[] __initconst = {
+	0x78, 0x7c, 0xdc, 0x55, 0x2b, 0xd9, 0x2b, 0x3a,
+	0xdd, 0x56, 0x11, 0x52, 0xd3, 0x2e, 0xe0, 0x0d,
+	0x23, 0x20, 0x8a, 0xf1, 0x4f, 0xee, 0xf1, 0x68,
+	0xf6, 0xdc, 0x53, 0xcf, 0x17, 0xd4, 0xf0, 0x6c,
+	0xdc, 0x80, 0x5f, 0x1c, 0xa4, 0x91, 0x05
+};
+static const u8 key40[] __initconst = {
+	0x0d, 0x86, 0xbf, 0x8a, 0xba, 0x9e, 0x39, 0x91,
+	0xa8, 0xe7, 0x22, 0xf0, 0x0c, 0x43, 0x18, 0xe4,
+	0x1f, 0xb0, 0xaf, 0x8a, 0x34, 0x31, 0xf4, 0x41,
+	0xf0, 0x89, 0x85, 0xca, 0x5d, 0x05, 0x3b, 0x94
+};
+enum { nonce40 = 0xae7acc4f5986439eULL };
+
+static const u8 input41[] __initconst = {
+	0x20, 0x5f, 0xc1, 0x83, 0x36, 0x02, 0x76, 0x96,
+	0xf0, 0xbf, 0x8e, 0x0e, 0x1a, 0xd1, 0xc7, 0x88,
+	0x18, 0xc7, 0x09, 0xc4, 0x15, 0xd9, 0x4f, 0x5e,
+	0x1f, 0xb3, 0xb4, 0x6d, 0xcb, 0xa0, 0xd6, 0x8a,
+	0x3b, 0x40, 0x8e, 0x80, 0xf1, 0xe8, 0x8f, 0x5f
+};
+static const u8 output41[] __initconst = {
+	0x0b, 0xd1, 0x49, 0x9a, 0x9d, 0xe8, 0x97, 0xb8,
+	0xd1, 0xeb, 0x90, 0x62, 0x37, 0xd2, 0x99, 0x15,
+	0x67, 0x6d, 0x27, 0x93, 0xce, 0x37, 0x65, 0xa2,
+	0x94, 0x88, 0xd6, 0x17, 0xbc, 0x1c, 0x6e, 0xa2,
+	0xcc, 0xfb, 0x81, 0x0e, 0x30, 0x60, 0x5a, 0x6f
+};
+static const u8 key41[] __initconst = {
+	0x36, 0x27, 0x57, 0x01, 0x21, 0x68, 0x97, 0xc7,
+	0x00, 0x67, 0x7b, 0xe9, 0x0f, 0x55, 0x49, 0xbb,
+	0x92, 0x18, 0x98, 0xf5, 0x5e, 0xbc, 0xe7, 0x5a,
+	0x9d, 0x3d, 0xc7, 0xbd, 0x59, 0xec, 0x82, 0x8e
+};
+enum { nonce41 = 0x5da05e4c8dfab464ULL };
+
+static const u8 input42[] __initconst = {
+	0xca, 0x30, 0xcd, 0x63, 0xf0, 0x2d, 0xf1, 0x03,
+	0x4d, 0x0d, 0xf2, 0xf7, 0x6f, 0xae, 0xd6, 0x34,
+	0xea, 0xf6, 0x13, 0xcf, 0x1c, 0xa0, 0xd0, 0xe8,
+	0xa4, 0x78, 0x80, 0x3b, 0x1e, 0xa5, 0x32, 0x4c,
+	0x73, 0x12, 0xd4, 0x6a, 0x94, 0xbc, 0xba, 0x80,
+	0x5e
+};
+static const u8 output42[] __initconst = {
+	0xec, 0x3f, 0x18, 0x31, 0xc0, 0x7b, 0xb5, 0xe2,
+	0xad, 0xf3, 0xec, 0xa0, 0x16, 0x9d, 0xef, 0xce,
+	0x05, 0x65, 0x59, 0x9d, 0x5a, 0xca, 0x3e, 0x13,
+	0xb9, 0x5d, 0x5d, 0xb5, 0xeb, 0xae, 0xc0, 0x87,
+	0xbb, 0xfd, 0xe7, 0xe4, 0x89, 0x5b, 0xd2, 0x6c,
+	0x56
+};
+static const u8 key42[] __initconst = {
+	0x7c, 0x6b, 0x7e, 0x77, 0xcc, 0x8c, 0x1b, 0x03,
+	0x8b, 0x2a, 0xb3, 0x7c, 0x5a, 0x73, 0xcc, 0xac,
+	0xdd, 0x53, 0x54, 0x0c, 0x85, 0xed, 0xcd, 0x47,
+	0x24, 0xc1, 0xb8, 0x9b, 0x2e, 0x41, 0x92, 0x36
+};
+enum { nonce42 = 0xe4d7348b09682c9cULL };
+
+static const u8 input43[] __initconst = {
+	0x52, 0xf2, 0x4b, 0x7c, 0xe5, 0x58, 0xe8, 0xd2,
+	0xb7, 0xf3, 0xa1, 0x29, 0x68, 0xa2, 0x50, 0x50,
+	0xae, 0x9c, 0x1b, 0xe2, 0x67, 0x77, 0xe2, 0xdb,
+	0x85, 0x55, 0x7e, 0x84, 0x8a, 0x12, 0x3c, 0xb6,
+	0x2e, 0xed, 0xd3, 0xec, 0x47, 0x68, 0xfa, 0x52,
+	0x46, 0x9d
+};
+static const u8 output43[] __initconst = {
+	0x1b, 0xf0, 0x05, 0xe4, 0x1c, 0xd8, 0x74, 0x9a,
+	0xf0, 0xee, 0x00, 0x54, 0xce, 0x02, 0x83, 0x15,
+	0xfb, 0x23, 0x35, 0x78, 0xc3, 0xda, 0x98, 0xd8,
+	0x9d, 0x1b, 0xb2, 0x51, 0x82, 0xb0, 0xff, 0xbe,
+	0x05, 0xa9, 0xa4, 0x04, 0xba, 0xea, 0x4b, 0x73,
+	0x47, 0x6e
+};
+static const u8 key43[] __initconst = {
+	0xeb, 0xec, 0x0e, 0xa1, 0x65, 0xe2, 0x99, 0x46,
+	0xd8, 0x54, 0x8c, 0x4a, 0x93, 0xdf, 0x6d, 0xbf,
+	0x93, 0x34, 0x94, 0x57, 0xc9, 0x12, 0x9d, 0x68,
+	0x05, 0xc5, 0x05, 0xad, 0x5a, 0xc9, 0x2a, 0x3b
+};
+enum { nonce43 = 0xe14f6a902b7827fULL };
+
+static const u8 input44[] __initconst = {
+	0x3e, 0x22, 0x3e, 0x8e, 0xcd, 0x18, 0xe2, 0xa3,
+	0x8d, 0x8b, 0x38, 0xc3, 0x02, 0xa3, 0x31, 0x48,
+	0xc6, 0x0e, 0xec, 0x99, 0x51, 0x11, 0x6d, 0x8b,
+	0x32, 0x35, 0x3b, 0x08, 0x58, 0x76, 0x25, 0x30,
+	0xe2, 0xfc, 0xa2, 0x46, 0x7d, 0x6e, 0x34, 0x87,
+	0xac, 0x42, 0xbf
+};
+static const u8 output44[] __initconst = {
+	0x08, 0x92, 0x58, 0x02, 0x1a, 0xf4, 0x1f, 0x3d,
+	0x38, 0x7b, 0x6b, 0xf6, 0x84, 0x07, 0xa3, 0x19,
+	0x17, 0x2a, 0xed, 0x57, 0x1c, 0xf9, 0x55, 0x37,
+	0x4e, 0xf4, 0x68, 0x68, 0x82, 0x02, 0x4f, 0xca,
+	0x21, 0x00, 0xc6, 0x66, 0x79, 0x53, 0x19, 0xef,
+	0x7f, 0xdd, 0x74
+};
+static const u8 key44[] __initconst = {
+	0x73, 0xb6, 0x3e, 0xf4, 0x57, 0x52, 0xa6, 0x43,
+	0x51, 0xd8, 0x25, 0x00, 0xdb, 0xb4, 0x52, 0x69,
+	0xd6, 0x27, 0x49, 0xeb, 0x9b, 0xf1, 0x7b, 0xa0,
+	0xd6, 0x7c, 0x9c, 0xd8, 0x95, 0x03, 0x69, 0x26
+};
+enum { nonce44 = 0xf5e6dc4f35ce24e5ULL };
+
+static const u8 input45[] __initconst = {
+	0x55, 0x76, 0xc0, 0xf1, 0x74, 0x03, 0x7a, 0x6d,
+	0x14, 0xd8, 0x36, 0x2c, 0x9f, 0x9a, 0x59, 0x7a,
+	0x2a, 0xf5, 0x77, 0x84, 0x70, 0x7c, 0x1d, 0x04,
+	0x90, 0x45, 0xa4, 0xc1, 0x5e, 0xdd, 0x2e, 0x07,
+	0x18, 0x34, 0xa6, 0x85, 0x56, 0x4f, 0x09, 0xaf,
+	0x2f, 0x83, 0xe1, 0xc6
+};
+static const u8 output45[] __initconst = {
+	0x22, 0x46, 0xe4, 0x0b, 0x3a, 0x55, 0xcc, 0x9b,
+	0xf0, 0xc0, 0x53, 0xcd, 0x95, 0xc7, 0x57, 0x6c,
+	0x77, 0x46, 0x41, 0x72, 0x07, 0xbf, 0xa8, 0xe5,
+	0x68, 0x69, 0xd8, 0x1e, 0x45, 0xc1, 0xa2, 0x50,
+	0xa5, 0xd1, 0x62, 0xc9, 0x5a, 0x7d, 0x08, 0x14,
+	0xae, 0x44, 0x16, 0xb9
+};
+static const u8 key45[] __initconst = {
+	0x41, 0xf3, 0x88, 0xb2, 0x51, 0x25, 0x47, 0x02,
+	0x39, 0xe8, 0x15, 0x3a, 0x22, 0x78, 0x86, 0x0b,
+	0xf9, 0x1e, 0x8d, 0x98, 0xb2, 0x22, 0x82, 0xac,
+	0x42, 0x94, 0xde, 0x64, 0xf0, 0xfd, 0xb3, 0x6c
+};
+enum { nonce45 = 0xf51a582daf4aa01aULL };
+
+static const u8 input46[] __initconst = {
+	0xf6, 0xff, 0x20, 0xf9, 0x26, 0x7e, 0x0f, 0xa8,
+	0x6a, 0x45, 0x5a, 0x91, 0x73, 0xc4, 0x4c, 0x63,
+	0xe5, 0x61, 0x59, 0xca, 0xec, 0xc0, 0x20, 0x35,
+	0xbc, 0x9f, 0x58, 0x9c, 0x5e, 0xa1, 0x17, 0x46,
+	0xcc, 0xab, 0x6e, 0xd0, 0x4f, 0x24, 0xeb, 0x05,
+	0x4d, 0x40, 0x41, 0xe0, 0x9d
+};
+static const u8 output46[] __initconst = {
+	0x31, 0x6e, 0x63, 0x3f, 0x9c, 0xe6, 0xb1, 0xb7,
+	0xef, 0x47, 0x46, 0xd7, 0xb1, 0x53, 0x42, 0x2f,
+	0x2c, 0xc8, 0x01, 0xae, 0x8b, 0xec, 0x42, 0x2c,
+	0x6b, 0x2c, 0x9c, 0xb2, 0xf0, 0x29, 0x06, 0xa5,
+	0xcd, 0x7e, 0xc7, 0x3a, 0x38, 0x98, 0x8a, 0xde,
+	0x03, 0x29, 0x14, 0x8f, 0xf9
+};
+static const u8 key46[] __initconst = {
+	0xac, 0xa6, 0x44, 0x4a, 0x0d, 0x42, 0x10, 0xbc,
+	0xd3, 0xc9, 0x8e, 0x9e, 0x71, 0xa3, 0x1c, 0x14,
+	0x9d, 0x65, 0x0d, 0x49, 0x4d, 0x8c, 0xec, 0x46,
+	0xe1, 0x41, 0xcd, 0xf5, 0xfc, 0x82, 0x75, 0x34
+};
+enum { nonce46 = 0x25f85182df84dec5ULL };
+
+static const u8 input47[] __initconst = {
+	0xa1, 0xd2, 0xf2, 0x52, 0x2f, 0x79, 0x50, 0xb2,
+	0x42, 0x29, 0x5b, 0x44, 0x20, 0xf9, 0xbd, 0x85,
+	0xb7, 0x65, 0x77, 0x86, 0xce, 0x3e, 0x1c, 0xe4,
+	0x70, 0x80, 0xdd, 0x72, 0x07, 0x48, 0x0f, 0x84,
+	0x0d, 0xfd, 0x97, 0xc0, 0xb7, 0x48, 0x9b, 0xb4,
+	0xec, 0xff, 0x73, 0x14, 0x99, 0xe4
+};
+static const u8 output47[] __initconst = {
+	0xe5, 0x3c, 0x78, 0x66, 0x31, 0x1e, 0xd6, 0xc4,
+	0x9e, 0x71, 0xb3, 0xd7, 0xd5, 0xad, 0x84, 0xf2,
+	0x78, 0x61, 0x77, 0xf8, 0x31, 0xf0, 0x13, 0xad,
+	0x66, 0xf5, 0x31, 0x7d, 0xeb, 0xdf, 0xaf, 0xcb,
+	0xac, 0x28, 0x6c, 0xc2, 0x9e, 0xe7, 0x78, 0xa2,
+	0xa2, 0x58, 0xce, 0x84, 0x76, 0x70
+};
+static const u8 key47[] __initconst = {
+	0x05, 0x7f, 0xc0, 0x7f, 0x37, 0x20, 0x71, 0x02,
+	0x3a, 0xe7, 0x20, 0x5a, 0x0a, 0x8f, 0x79, 0x5a,
+	0xfe, 0xbb, 0x43, 0x4d, 0x2f, 0xcb, 0xf6, 0x9e,
+	0xa2, 0x97, 0x00, 0xad, 0x0d, 0x51, 0x7e, 0x17
+};
+enum { nonce47 = 0xae707c60f54de32bULL };
+
+static const u8 input48[] __initconst = {
+	0x80, 0x93, 0x77, 0x2e, 0x8d, 0xe8, 0xe6, 0xc1,
+	0x27, 0xe6, 0xf2, 0x89, 0x5b, 0x33, 0x62, 0x18,
+	0x80, 0x6e, 0x17, 0x22, 0x8e, 0x83, 0x31, 0x40,
+	0x8f, 0xc9, 0x5c, 0x52, 0x6c, 0x0e, 0xa5, 0xe9,
+	0x6c, 0x7f, 0xd4, 0x6a, 0x27, 0x56, 0x99, 0xce,
+	0x8d, 0x37, 0x59, 0xaf, 0xc0, 0x0e, 0xe1
+};
+static const u8 output48[] __initconst = {
+	0x02, 0xa4, 0x2e, 0x33, 0xb7, 0x7c, 0x2b, 0x9a,
+	0x18, 0x5a, 0xba, 0x53, 0x38, 0xaf, 0x00, 0xeb,
+	0xd8, 0x3d, 0x02, 0x77, 0x43, 0x45, 0x03, 0x91,
+	0xe2, 0x5e, 0x4e, 0xeb, 0x50, 0xd5, 0x5b, 0xe0,
+	0xf3, 0x33, 0xa7, 0xa2, 0xac, 0x07, 0x6f, 0xeb,
+	0x3f, 0x6c, 0xcd, 0xf2, 0x6c, 0x61, 0x64
+};
+static const u8 key48[] __initconst = {
+	0xf3, 0x79, 0xe7, 0xf8, 0x0e, 0x02, 0x05, 0x6b,
+	0x83, 0x1a, 0xe7, 0x86, 0x6b, 0xe6, 0x8f, 0x3f,
+	0xd3, 0xa3, 0xe4, 0x6e, 0x29, 0x06, 0xad, 0xbc,
+	0xe8, 0x33, 0x56, 0x39, 0xdf, 0xb0, 0xe2, 0xfe
+};
+enum { nonce48 = 0xd849b938c6569da0ULL };
+
+static const u8 input49[] __initconst = {
+	0x89, 0x3b, 0x88, 0x9e, 0x7b, 0x38, 0x16, 0x9f,
+	0xa1, 0x28, 0xf6, 0xf5, 0x23, 0x74, 0x28, 0xb0,
+	0xdf, 0x6c, 0x9e, 0x8a, 0x71, 0xaf, 0xed, 0x7a,
+	0x39, 0x21, 0x57, 0x7d, 0x31, 0x6c, 0xee, 0x0d,
+	0x11, 0x8d, 0x41, 0x9a, 0x5f, 0xb7, 0x27, 0x40,
+	0x08, 0xad, 0xc6, 0xe0, 0x00, 0x43, 0x9e, 0xae
+};
+static const u8 output49[] __initconst = {
+	0x4d, 0xfd, 0xdb, 0x4c, 0x77, 0xc1, 0x05, 0x07,
+	0x4d, 0x6d, 0x32, 0xcb, 0x2e, 0x0e, 0xff, 0x65,
+	0xc9, 0x27, 0xeb, 0xa9, 0x46, 0x5b, 0xab, 0x06,
+	0xe6, 0xb6, 0x5a, 0x1e, 0x00, 0xfb, 0xcf, 0xe4,
+	0xb9, 0x71, 0x40, 0x10, 0xef, 0x12, 0x39, 0xf0,
+	0xea, 0x40, 0xb8, 0x9a, 0xa2, 0x85, 0x38, 0x48
+};
+static const u8 key49[] __initconst = {
+	0xe7, 0x10, 0x40, 0xd9, 0x66, 0xc0, 0xa8, 0x6d,
+	0xa3, 0xcc, 0x8b, 0xdd, 0x93, 0xf2, 0x6e, 0xe0,
+	0x90, 0x7f, 0xd0, 0xf4, 0x37, 0x0c, 0x8b, 0x9b,
+	0x4c, 0x4d, 0xe6, 0xf2, 0x1f, 0xe9, 0x95, 0x24
+};
+enum { nonce49 = 0xf269817bdae01bc0ULL };
+
+static const u8 input50[] __initconst = {
+	0xda, 0x5b, 0x60, 0xcd, 0xed, 0x58, 0x8e, 0x7f,
+	0xae, 0xdd, 0xc8, 0x2e, 0x16, 0x90, 0xea, 0x4b,
+	0x0c, 0x74, 0x14, 0x35, 0xeb, 0xee, 0x2c, 0xff,
+	0x46, 0x99, 0x97, 0x6e, 0xae, 0xa7, 0x8e, 0x6e,
+	0x38, 0xfe, 0x63, 0xe7, 0x51, 0xd9, 0xaa, 0xce,
+	0x7b, 0x1e, 0x7e, 0x5d, 0xc0, 0xe8, 0x10, 0x06,
+	0x14
+};
+static const u8 output50[] __initconst = {
+	0xe4, 0xe5, 0x86, 0x1b, 0x66, 0x19, 0xac, 0x49,
+	0x1c, 0xbd, 0xee, 0x03, 0xaf, 0x11, 0xfc, 0x1f,
+	0x6a, 0xd2, 0x50, 0x5c, 0xea, 0x2c, 0xa5, 0x75,
+	0xfd, 0xb7, 0x0e, 0x80, 0x8f, 0xed, 0x3f, 0x31,
+	0x47, 0xac, 0x67, 0x43, 0xb8, 0x2e, 0xb4, 0x81,
+	0x6d, 0xe4, 0x1e, 0xb7, 0x8b, 0x0c, 0x53, 0xa9,
+	0x26
+};
+static const u8 key50[] __initconst = {
+	0xd7, 0xb2, 0x04, 0x76, 0x30, 0xcc, 0x38, 0x45,
+	0xef, 0xdb, 0xc5, 0x86, 0x08, 0x61, 0xf0, 0xee,
+	0x6d, 0xd8, 0x22, 0x04, 0x8c, 0xfb, 0xcb, 0x37,
+	0xa6, 0xfb, 0x95, 0x22, 0xe1, 0x87, 0xb7, 0x6f
+};
+enum { nonce50 = 0x3b44d09c45607d38ULL };
+
+static const u8 input51[] __initconst = {
+	0xa9, 0x41, 0x02, 0x4b, 0xd7, 0xd5, 0xd1, 0xf1,
+	0x21, 0x55, 0xb2, 0x75, 0x6d, 0x77, 0x1b, 0x86,
+	0xa9, 0xc8, 0x90, 0xfd, 0xed, 0x4a, 0x7b, 0x6c,
+	0xb2, 0x5f, 0x9b, 0x5f, 0x16, 0xa1, 0x54, 0xdb,
+	0xd6, 0x3f, 0x6a, 0x7f, 0x2e, 0x51, 0x9d, 0x49,
+	0x5b, 0xa5, 0x0e, 0xf9, 0xfb, 0x2a, 0x38, 0xff,
+	0x20, 0x8c
+};
+static const u8 output51[] __initconst = {
+	0x18, 0xf7, 0x88, 0xc1, 0x72, 0xfd, 0x90, 0x4b,
+	0xa9, 0x2d, 0xdb, 0x47, 0xb0, 0xa5, 0xc4, 0x37,
+	0x01, 0x95, 0xc4, 0xb1, 0xab, 0xc5, 0x5b, 0xcd,
+	0xe1, 0x97, 0x78, 0x13, 0xde, 0x6a, 0xff, 0x36,
+	0xce, 0xa4, 0x67, 0xc5, 0x4a, 0x45, 0x2b, 0xd9,
+	0xff, 0x8f, 0x06, 0x7c, 0x63, 0xbb, 0x83, 0x17,
+	0xb4, 0x6b
+};
+static const u8 key51[] __initconst = {
+	0x82, 0x1a, 0x79, 0xab, 0x9a, 0xb5, 0x49, 0x6a,
+	0x30, 0x6b, 0x99, 0x19, 0x11, 0xc7, 0xa2, 0xf4,
+	0xca, 0x55, 0xb9, 0xdd, 0xe7, 0x2f, 0xe7, 0xc1,
+	0xdd, 0x27, 0xad, 0x80, 0xf2, 0x56, 0xad, 0xf3
+};
+enum { nonce51 = 0xe93aff94ca71a4a6ULL };
+
+static const u8 input52[] __initconst = {
+	0x89, 0xdd, 0xf3, 0xfa, 0xb6, 0xc1, 0xaa, 0x9a,
+	0xc8, 0xad, 0x6b, 0x00, 0xa1, 0x65, 0xea, 0x14,
+	0x55, 0x54, 0x31, 0x8f, 0xf0, 0x03, 0x84, 0x51,
+	0x17, 0x1e, 0x0a, 0x93, 0x6e, 0x79, 0x96, 0xa3,
+	0x2a, 0x85, 0x9c, 0x89, 0xf8, 0xd1, 0xe2, 0x15,
+	0x95, 0x05, 0xf4, 0x43, 0x4d, 0x6b, 0xf0, 0x71,
+	0x3b, 0x3e, 0xba
+};
+static const u8 output52[] __initconst = {
+	0x0c, 0x42, 0x6a, 0xb3, 0x66, 0x63, 0x5d, 0x2c,
+	0x9f, 0x3d, 0xa6, 0x6e, 0xc7, 0x5f, 0x79, 0x2f,
+	0x50, 0xe3, 0xd6, 0x07, 0x56, 0xa4, 0x2b, 0x2d,
+	0x8d, 0x10, 0xc0, 0x6c, 0xa2, 0xfc, 0x97, 0xec,
+	0x3f, 0x5c, 0x8d, 0x59, 0xbe, 0x84, 0xf1, 0x3e,
+	0x38, 0x47, 0x4f, 0x75, 0x25, 0x66, 0x88, 0x14,
+	0x03, 0xdd, 0xde
+};
+static const u8 key52[] __initconst = {
+	0x4f, 0xb0, 0x27, 0xb6, 0xdd, 0x24, 0x0c, 0xdb,
+	0x6b, 0x71, 0x2e, 0xac, 0xfc, 0x3f, 0xa6, 0x48,
+	0x5d, 0xd5, 0xff, 0x53, 0xb5, 0x62, 0xf1, 0xe0,
+	0x93, 0xfe, 0x39, 0x4c, 0x9f, 0x03, 0x11, 0xa7
+};
+enum { nonce52 = 0xed8becec3bdf6f25ULL };
+
+static const u8 input53[] __initconst = {
+	0x68, 0xd1, 0xc7, 0x74, 0x44, 0x1c, 0x84, 0xde,
+	0x27, 0x27, 0x35, 0xf0, 0x18, 0x0b, 0x57, 0xaa,
+	0xd0, 0x1a, 0xd3, 0x3b, 0x5e, 0x5c, 0x62, 0x93,
+	0xd7, 0x6b, 0x84, 0x3b, 0x71, 0x83, 0x77, 0x01,
+	0x3e, 0x59, 0x45, 0xf4, 0x77, 0x6c, 0x6b, 0xcb,
+	0x88, 0x45, 0x09, 0x1d, 0xc6, 0x45, 0x6e, 0xdc,
+	0x6e, 0x51, 0xb8, 0x28
+};
+static const u8 output53[] __initconst = {
+	0xc5, 0x90, 0x96, 0x78, 0x02, 0xf5, 0xc4, 0x3c,
+	0xde, 0xd4, 0xd4, 0xc6, 0xa7, 0xad, 0x12, 0x47,
+	0x45, 0xce, 0xcd, 0x8c, 0x35, 0xcc, 0xa6, 0x9e,
+	0x5a, 0xc6, 0x60, 0xbb, 0xe3, 0xed, 0xec, 0x68,
+	0x3f, 0x64, 0xf7, 0x06, 0x63, 0x9c, 0x8c, 0xc8,
+	0x05, 0x3a, 0xad, 0x32, 0x79, 0x8b, 0x45, 0x96,
+	0x93, 0x73, 0x4c, 0xe0
+};
+static const u8 key53[] __initconst = {
+	0x42, 0x4b, 0x20, 0x81, 0x49, 0x50, 0xe9, 0xc2,
+	0x43, 0x69, 0x36, 0xe7, 0x68, 0xae, 0xd5, 0x7e,
+	0x42, 0x1a, 0x1b, 0xb4, 0x06, 0x4d, 0xa7, 0x17,
+	0xb5, 0x31, 0xd6, 0x0c, 0xb0, 0x5c, 0x41, 0x0b
+};
+enum { nonce53 = 0xf44ce1931fbda3d7ULL };
+
+static const u8 input54[] __initconst = {
+	0x7b, 0xf6, 0x8b, 0xae, 0xc0, 0xcb, 0x10, 0x8e,
+	0xe8, 0xd8, 0x2e, 0x3b, 0x14, 0xba, 0xb4, 0xd2,
+	0x58, 0x6b, 0x2c, 0xec, 0xc1, 0x81, 0x71, 0xb4,
+	0xc6, 0xea, 0x08, 0xc5, 0xc9, 0x78, 0xdb, 0xa2,
+	0xfa, 0x44, 0x50, 0x9b, 0xc8, 0x53, 0x8d, 0x45,
+	0x42, 0xe7, 0x09, 0xc4, 0x29, 0xd8, 0x75, 0x02,
+	0xbb, 0xb2, 0x78, 0xcf, 0xe7
+};
+static const u8 output54[] __initconst = {
+	0xaf, 0x2c, 0x83, 0x26, 0x6e, 0x7f, 0xa6, 0xe9,
+	0x03, 0x75, 0xfe, 0xfe, 0x87, 0x58, 0xcf, 0xb5,
+	0xbc, 0x3c, 0x9d, 0xa1, 0x6e, 0x13, 0xf1, 0x0f,
+	0x9e, 0xbc, 0xe0, 0x54, 0x24, 0x32, 0xce, 0x95,
+	0xe6, 0xa5, 0x59, 0x3d, 0x24, 0x1d, 0x8f, 0xb1,
+	0x74, 0x6c, 0x56, 0xe7, 0x96, 0xc1, 0x91, 0xc8,
+	0x2d, 0x0e, 0xb7, 0x51, 0x10
+};
+static const u8 key54[] __initconst = {
+	0x00, 0x68, 0x74, 0xdc, 0x30, 0x9e, 0xe3, 0x52,
+	0xa9, 0xae, 0xb6, 0x7c, 0xa1, 0xdc, 0x12, 0x2d,
+	0x98, 0x32, 0x7a, 0x77, 0xe1, 0xdd, 0xa3, 0x76,
+	0x72, 0x34, 0x83, 0xd8, 0xb7, 0x69, 0xba, 0x77
+};
+enum { nonce54 = 0xbea57d79b798b63aULL };
+
+static const u8 input55[] __initconst = {
+	0xb5, 0xf4, 0x2f, 0xc1, 0x5e, 0x10, 0xa7, 0x4e,
+	0x74, 0x3d, 0xa3, 0x96, 0xc0, 0x4d, 0x7b, 0x92,
+	0x8f, 0xdb, 0x2d, 0x15, 0x52, 0x6a, 0x95, 0x5e,
+	0x40, 0x81, 0x4f, 0x70, 0x73, 0xea, 0x84, 0x65,
+	0x3d, 0x9a, 0x4e, 0x03, 0x95, 0xf8, 0x5d, 0x2f,
+	0x07, 0x02, 0x13, 0x13, 0xdd, 0x82, 0xe6, 0x3b,
+	0xe1, 0x5f, 0xb3, 0x37, 0x9b, 0x88
+};
+static const u8 output55[] __initconst = {
+	0xc1, 0x88, 0xbd, 0x92, 0x77, 0xad, 0x7c, 0x5f,
+	0xaf, 0xa8, 0x57, 0x0e, 0x40, 0x0a, 0xdc, 0x70,
+	0xfb, 0xc6, 0x71, 0xfd, 0xc4, 0x74, 0x60, 0xcc,
+	0xa0, 0x89, 0x8e, 0x99, 0xf0, 0x06, 0xa6, 0x7c,
+	0x97, 0x42, 0x21, 0x81, 0x6a, 0x07, 0xe7, 0xb3,
+	0xf7, 0xa5, 0x03, 0x71, 0x50, 0x05, 0x63, 0x17,
+	0xa9, 0x46, 0x0b, 0xff, 0x30, 0x78
+};
+static const u8 key55[] __initconst = {
+	0x19, 0x8f, 0xe7, 0xd7, 0x6b, 0x7f, 0x6f, 0x69,
+	0x86, 0x91, 0x0f, 0xa7, 0x4a, 0x69, 0x8e, 0x34,
+	0xf3, 0xdb, 0xde, 0xaf, 0xf2, 0x66, 0x1d, 0x64,
+	0x97, 0x0c, 0xcf, 0xfa, 0x33, 0x84, 0xfd, 0x0c
+};
+enum { nonce55 = 0x80aa3d3e2c51ef06ULL };
+
+static const u8 input56[] __initconst = {
+	0x6b, 0xe9, 0x73, 0x42, 0x27, 0x5e, 0x12, 0xcd,
+	0xaa, 0x45, 0x12, 0x8b, 0xb3, 0xe6, 0x54, 0x33,
+	0x31, 0x7d, 0xe2, 0x25, 0xc6, 0x86, 0x47, 0x67,
+	0x86, 0x83, 0xe4, 0x46, 0xb5, 0x8f, 0x2c, 0xbb,
+	0xe4, 0xb8, 0x9f, 0xa2, 0xa4, 0xe8, 0x75, 0x96,
+	0x92, 0x51, 0x51, 0xac, 0x8e, 0x2e, 0x6f, 0xfc,
+	0xbd, 0x0d, 0xa3, 0x9f, 0x16, 0x55, 0x3e
+};
+static const u8 output56[] __initconst = {
+	0x42, 0x99, 0x73, 0x6c, 0xd9, 0x4b, 0x16, 0xe5,
+	0x18, 0x63, 0x1a, 0xd9, 0x0e, 0xf1, 0x15, 0x2e,
+	0x0f, 0x4b, 0xe4, 0x5f, 0xa0, 0x4d, 0xde, 0x9f,
+	0xa7, 0x18, 0xc1, 0x0c, 0x0b, 0xae, 0x55, 0xe4,
+	0x89, 0x18, 0xa4, 0x78, 0x9d, 0x25, 0x0d, 0xd5,
+	0x94, 0x0f, 0xf9, 0x78, 0xa3, 0xa6, 0xe9, 0x9e,
+	0x2c, 0x73, 0xf0, 0xf7, 0x35, 0xf3, 0x2b
+};
+static const u8 key56[] __initconst = {
+	0x7d, 0x12, 0xad, 0x51, 0xd5, 0x6f, 0x8f, 0x96,
+	0xc0, 0x5d, 0x9a, 0xd1, 0x7e, 0x20, 0x98, 0x0e,
+	0x3c, 0x0a, 0x67, 0x6b, 0x1b, 0x88, 0x69, 0xd4,
+	0x07, 0x8c, 0xaf, 0x0f, 0x3a, 0x28, 0xe4, 0x5d
+};
+enum { nonce56 = 0x70f4c372fb8b5984ULL };
+
+static const u8 input57[] __initconst = {
+	0x28, 0xa3, 0x06, 0xe8, 0xe7, 0x08, 0xb9, 0xef,
+	0x0d, 0x63, 0x15, 0x99, 0xb2, 0x78, 0x7e, 0xaf,
+	0x30, 0x50, 0xcf, 0xea, 0xc9, 0x91, 0x41, 0x2f,
+	0x3b, 0x38, 0x70, 0xc4, 0x87, 0xb0, 0x3a, 0xee,
+	0x4a, 0xea, 0xe3, 0x83, 0x68, 0x8b, 0xcf, 0xda,
+	0x04, 0xa5, 0xbd, 0xb2, 0xde, 0x3c, 0x55, 0x13,
+	0xfe, 0x96, 0xad, 0xc1, 0x61, 0x1b, 0x98, 0xde
+};
+static const u8 output57[] __initconst = {
+	0xf4, 0x44, 0xe9, 0xd2, 0x6d, 0xc2, 0x5a, 0xe9,
+	0xfd, 0x7e, 0x41, 0x54, 0x3f, 0xf4, 0x12, 0xd8,
+	0x55, 0x0d, 0x12, 0x9b, 0xd5, 0x2e, 0x95, 0xe5,
+	0x77, 0x42, 0x3f, 0x2c, 0xfb, 0x28, 0x9d, 0x72,
+	0x6d, 0x89, 0x82, 0x27, 0x64, 0x6f, 0x0d, 0x57,
+	0xa1, 0x25, 0xa3, 0x6b, 0x88, 0x9a, 0xac, 0x0c,
+	0x76, 0x19, 0x90, 0xe2, 0x50, 0x5a, 0xf8, 0x12
+};
+static const u8 key57[] __initconst = {
+	0x08, 0x26, 0xb8, 0xac, 0xf3, 0xa5, 0xc6, 0xa3,
+	0x7f, 0x09, 0x87, 0xf5, 0x6c, 0x5a, 0x85, 0x6c,
+	0x3d, 0xbd, 0xde, 0xd5, 0x87, 0xa3, 0x98, 0x7a,
+	0xaa, 0x40, 0x3e, 0xf7, 0xff, 0x44, 0x5d, 0xee
+};
+enum { nonce57 = 0xc03a6130bf06b089ULL };
+
+static const u8 input58[] __initconst = {
+	0x82, 0xa5, 0x38, 0x6f, 0xaa, 0xb4, 0xaf, 0xb2,
+	0x42, 0x01, 0xa8, 0x39, 0x3f, 0x15, 0x51, 0xa8,
+	0x11, 0x1b, 0x93, 0xca, 0x9c, 0xa0, 0x57, 0x68,
+	0x8f, 0xdb, 0x68, 0x53, 0x51, 0x6d, 0x13, 0x22,
+	0x12, 0x9b, 0xbd, 0x33, 0xa8, 0x52, 0x40, 0x57,
+	0x80, 0x9b, 0x98, 0xef, 0x56, 0x70, 0x11, 0xfa,
+	0x36, 0x69, 0x7d, 0x15, 0x48, 0xf9, 0x3b, 0xeb,
+	0x42
+};
+static const u8 output58[] __initconst = {
+	0xff, 0x3a, 0x74, 0xc3, 0x3e, 0x44, 0x64, 0x4d,
+	0x0e, 0x5f, 0x9d, 0xa8, 0xdb, 0xbe, 0x12, 0xef,
+	0xba, 0x56, 0x65, 0x50, 0x76, 0xaf, 0xa4, 0x4e,
+	0x01, 0xc1, 0xd3, 0x31, 0x14, 0xe2, 0xbe, 0x7b,
+	0xa5, 0x67, 0xb4, 0xe3, 0x68, 0x40, 0x9c, 0xb0,
+	0xb1, 0x78, 0xef, 0x49, 0x03, 0x0f, 0x2d, 0x56,
+	0xb4, 0x37, 0xdb, 0xbc, 0x2d, 0x68, 0x1c, 0x3c,
+	0xf1
+};
+static const u8 key58[] __initconst = {
+	0x7e, 0xf1, 0x7c, 0x20, 0x65, 0xed, 0xcd, 0xd7,
+	0x57, 0xe8, 0xdb, 0x90, 0x87, 0xdb, 0x5f, 0x63,
+	0x3d, 0xdd, 0xb8, 0x2b, 0x75, 0x8e, 0x04, 0xb5,
+	0xf4, 0x12, 0x79, 0xa9, 0x4d, 0x42, 0x16, 0x7f
+};
+enum { nonce58 = 0x92838183f80d2f7fULL };
+
+static const u8 input59[] __initconst = {
+	0x37, 0xf1, 0x9d, 0xdd, 0xd7, 0x08, 0x9f, 0x13,
+	0xc5, 0x21, 0x82, 0x75, 0x08, 0x9e, 0x25, 0x16,
+	0xb1, 0xd1, 0x71, 0x42, 0x28, 0x63, 0xac, 0x47,
+	0x71, 0x54, 0xb1, 0xfc, 0x39, 0xf0, 0x61, 0x4f,
+	0x7c, 0x6d, 0x4f, 0xc8, 0x33, 0xef, 0x7e, 0xc8,
+	0xc0, 0x97, 0xfc, 0x1a, 0x61, 0xb4, 0x87, 0x6f,
+	0xdd, 0x5a, 0x15, 0x7b, 0x1b, 0x95, 0x50, 0x94,
+	0x1d, 0xba
+};
+static const u8 output59[] __initconst = {
+	0x73, 0x67, 0xc5, 0x07, 0xbb, 0x57, 0x79, 0xd5,
+	0xc9, 0x04, 0xdd, 0x88, 0xf3, 0x86, 0xe5, 0x70,
+	0x49, 0x31, 0xe0, 0xcc, 0x3b, 0x1d, 0xdf, 0xb0,
+	0xaf, 0xf4, 0x2d, 0xe0, 0x06, 0x10, 0x91, 0x8d,
+	0x1c, 0xcf, 0x31, 0x0b, 0xf6, 0x73, 0xda, 0x1c,
+	0xf0, 0x17, 0x52, 0x9e, 0x20, 0x2e, 0x9f, 0x8c,
+	0xb3, 0x59, 0xce, 0xd4, 0xd3, 0xc1, 0x81, 0xe9,
+	0x11, 0x36
+};
+static const u8 key59[] __initconst = {
+	0xbd, 0x07, 0xd0, 0x53, 0x2c, 0xb3, 0xcc, 0x3f,
+	0xc4, 0x95, 0xfd, 0xe7, 0x81, 0xb3, 0x29, 0x99,
+	0x05, 0x45, 0xd6, 0x95, 0x25, 0x0b, 0x72, 0xd3,
+	0xcd, 0xbb, 0x73, 0xf8, 0xfa, 0xc0, 0x9b, 0x7a
+};
+enum { nonce59 = 0x4a0db819b0d519e2ULL };
+
+static const u8 input60[] __initconst = {
+	0x58, 0x4e, 0xdf, 0x94, 0x3c, 0x76, 0x0a, 0x79,
+	0x47, 0xf1, 0xbe, 0x88, 0xd3, 0xba, 0x94, 0xd8,
+	0xe2, 0x8f, 0xe3, 0x2f, 0x2f, 0x74, 0x82, 0x55,
+	0xc3, 0xda, 0xe2, 0x4e, 0x2c, 0x8c, 0x45, 0x1d,
+	0x72, 0x8f, 0x54, 0x41, 0xb5, 0xb7, 0x69, 0xe4,
+	0xdc, 0xd2, 0x36, 0x21, 0x5c, 0x28, 0x52, 0xf7,
+	0x98, 0x8e, 0x72, 0xa7, 0x6d, 0x57, 0xed, 0xdc,
+	0x3c, 0xe6, 0x6a
+};
+static const u8 output60[] __initconst = {
+	0xda, 0xaf, 0xb5, 0xe3, 0x30, 0x65, 0x5c, 0xb1,
+	0x48, 0x08, 0x43, 0x7b, 0x9e, 0xd2, 0x6a, 0x62,
+	0x56, 0x7c, 0xad, 0xd9, 0xe5, 0xf6, 0x09, 0x71,
+	0xcd, 0xe6, 0x05, 0x6b, 0x3f, 0x44, 0x3a, 0x5c,
+	0xf6, 0xf8, 0xd7, 0xce, 0x7d, 0xd1, 0xe0, 0x4f,
+	0x88, 0x15, 0x04, 0xd8, 0x20, 0xf0, 0x3e, 0xef,
+	0xae, 0xa6, 0x27, 0xa3, 0x0e, 0xfc, 0x18, 0x90,
+	0x33, 0xcd, 0xd3
+};
+static const u8 key60[] __initconst = {
+	0xbf, 0xfd, 0x25, 0xb5, 0xb2, 0xfc, 0x78, 0x0c,
+	0x8e, 0xb9, 0x57, 0x2f, 0x26, 0x4a, 0x7e, 0x71,
+	0xcc, 0xf2, 0xe0, 0xfd, 0x24, 0x11, 0x20, 0x23,
+	0x57, 0x00, 0xff, 0x80, 0x11, 0x0c, 0x1e, 0xff
+};
+enum { nonce60 = 0xf18df56fdb7954adULL };
+
+static const u8 input61[] __initconst = {
+	0xb0, 0xf3, 0x06, 0xbc, 0x22, 0xae, 0x49, 0x40,
+	0xae, 0xff, 0x1b, 0x31, 0xa7, 0x98, 0xab, 0x1d,
+	0xe7, 0x40, 0x23, 0x18, 0x4f, 0xab, 0x8e, 0x93,
+	0x82, 0xf4, 0x56, 0x61, 0xfd, 0x2b, 0xcf, 0xa7,
+	0xc4, 0xb4, 0x0a, 0xf4, 0xcb, 0xc7, 0x8c, 0x40,
+	0x57, 0xac, 0x0b, 0x3e, 0x2a, 0x0a, 0x67, 0x83,
+	0x50, 0xbf, 0xec, 0xb0, 0xc7, 0xf1, 0x32, 0x26,
+	0x98, 0x80, 0x33, 0xb4
+};
+static const u8 output61[] __initconst = {
+	0x9d, 0x23, 0x0e, 0xff, 0xcc, 0x7c, 0xd5, 0xcf,
+	0x1a, 0xb8, 0x59, 0x1e, 0x92, 0xfd, 0x7f, 0xca,
+	0xca, 0x3c, 0x18, 0x81, 0xde, 0xfa, 0x59, 0xc8,
+	0x6f, 0x9c, 0x24, 0x3f, 0x3a, 0xe6, 0x0b, 0xb4,
+	0x34, 0x48, 0x69, 0xfc, 0xb6, 0xea, 0xb2, 0xde,
+	0x9f, 0xfd, 0x92, 0x36, 0x18, 0x98, 0x99, 0xaa,
+	0x65, 0xe2, 0xea, 0xf4, 0xb1, 0x47, 0x8e, 0xb0,
+	0xe7, 0xd4, 0x7a, 0x2c
+};
+static const u8 key61[] __initconst = {
+	0xd7, 0xfd, 0x9b, 0xbd, 0x8f, 0x65, 0x0d, 0x00,
+	0xca, 0xa1, 0x6c, 0x85, 0x85, 0xa4, 0x6d, 0xf1,
+	0xb1, 0x68, 0x0c, 0x8b, 0x5d, 0x37, 0x72, 0xd0,
+	0xd8, 0xd2, 0x25, 0xab, 0x9f, 0x7b, 0x7d, 0x95
+};
+enum { nonce61 = 0xd82caf72a9c4864fULL };
+
+static const u8 input62[] __initconst = {
+	0x10, 0x77, 0xf3, 0x2f, 0xc2, 0x50, 0xd6, 0x0c,
+	0xba, 0xa8, 0x8d, 0xce, 0x0d, 0x58, 0x9e, 0x87,
+	0xb1, 0x59, 0x66, 0x0a, 0x4a, 0xb3, 0xd8, 0xca,
+	0x0a, 0x6b, 0xf8, 0xc6, 0x2b, 0x3f, 0x8e, 0x09,
+	0xe0, 0x0a, 0x15, 0x85, 0xfe, 0xaa, 0xc6, 0xbd,
+	0x30, 0xef, 0xe4, 0x10, 0x78, 0x03, 0xc1, 0xc7,
+	0x8a, 0xd9, 0xde, 0x0b, 0x51, 0x07, 0xc4, 0x7b,
+	0xe2, 0x2e, 0x36, 0x3a, 0xc2
+};
+static const u8 output62[] __initconst = {
+	0xa0, 0x0c, 0xfc, 0xc1, 0xf6, 0xaf, 0xc2, 0xb8,
+	0x5c, 0xef, 0x6e, 0xf3, 0xce, 0x15, 0x48, 0x05,
+	0xb5, 0x78, 0x49, 0x51, 0x1f, 0x9d, 0xf4, 0xbf,
+	0x2f, 0x53, 0xa2, 0xd1, 0x15, 0x20, 0x82, 0x6b,
+	0xd2, 0x22, 0x6c, 0x4e, 0x14, 0x87, 0xe3, 0xd7,
+	0x49, 0x45, 0x84, 0xdb, 0x5f, 0x68, 0x60, 0xc4,
+	0xb3, 0xe6, 0x3f, 0xd1, 0xfc, 0xa5, 0x73, 0xf3,
+	0xfc, 0xbb, 0xbe, 0xc8, 0x9d
+};
+static const u8 key62[] __initconst = {
+	0x6e, 0xc9, 0xaf, 0xce, 0x35, 0xb9, 0x86, 0xd1,
+	0xce, 0x5f, 0xd9, 0xbb, 0xd5, 0x1f, 0x7c, 0xcd,
+	0xfe, 0x19, 0xaa, 0x3d, 0xea, 0x64, 0xc1, 0x28,
+	0x40, 0xba, 0xa1, 0x28, 0xcd, 0x40, 0xb6, 0xf2
+};
+enum { nonce62 = 0xa1c0c265f900cde8ULL };
+
+static const u8 input63[] __initconst = {
+	0x7a, 0x70, 0x21, 0x2c, 0xef, 0xa6, 0x36, 0xd4,
+	0xe0, 0xab, 0x8c, 0x25, 0x73, 0x34, 0xc8, 0x94,
+	0x6c, 0x81, 0xcb, 0x19, 0x8d, 0x5a, 0x49, 0xaa,
+	0x6f, 0xba, 0x83, 0x72, 0x02, 0x5e, 0xf5, 0x89,
+	0xce, 0x79, 0x7e, 0x13, 0x3d, 0x5b, 0x98, 0x60,
+	0x5d, 0xd9, 0xfb, 0x15, 0x93, 0x4c, 0xf3, 0x51,
+	0x49, 0x55, 0xd1, 0x58, 0xdd, 0x7e, 0x6d, 0xfe,
+	0xdd, 0x84, 0x23, 0x05, 0xba, 0xe9
+};
+static const u8 output63[] __initconst = {
+	0x20, 0xb3, 0x5c, 0x03, 0x03, 0x78, 0x17, 0xfc,
+	0x3b, 0x35, 0x30, 0x9a, 0x00, 0x18, 0xf5, 0xc5,
+	0x06, 0x53, 0xf5, 0x04, 0x24, 0x9d, 0xd1, 0xb2,
+	0xac, 0x5a, 0xb6, 0x2a, 0xa5, 0xda, 0x50, 0x00,
+	0xec, 0xff, 0xa0, 0x7a, 0x14, 0x7b, 0xe4, 0x6b,
+	0x63, 0xe8, 0x66, 0x86, 0x34, 0xfd, 0x74, 0x44,
+	0xa2, 0x50, 0x97, 0x0d, 0xdc, 0xc3, 0x84, 0xf8,
+	0x71, 0x02, 0x31, 0x95, 0xed, 0x54
+};
+static const u8 key63[] __initconst = {
+	0x7d, 0x64, 0xb4, 0x12, 0x81, 0xe4, 0xe6, 0x8f,
+	0xcc, 0xe7, 0xd1, 0x1f, 0x70, 0x20, 0xfd, 0xb8,
+	0x3a, 0x7d, 0xa6, 0x53, 0x65, 0x30, 0x5d, 0xe3,
+	0x1a, 0x44, 0xbe, 0x62, 0xed, 0x90, 0xc4, 0xd1
+};
+enum { nonce63 = 0xe8e849596c942276ULL };
+
+static const u8 input64[] __initconst = {
+	0x84, 0xf8, 0xda, 0x87, 0x23, 0x39, 0x60, 0xcf,
+	0xc5, 0x50, 0x7e, 0xc5, 0x47, 0x29, 0x7c, 0x05,
+	0xc2, 0xb4, 0xf4, 0xb2, 0xec, 0x5d, 0x48, 0x36,
+	0xbf, 0xfc, 0x06, 0x8c, 0xf2, 0x0e, 0x88, 0xe7,
+	0xc9, 0xc5, 0xa4, 0xa2, 0x83, 0x20, 0xa1, 0x6f,
+	0x37, 0xe5, 0x2d, 0xa1, 0x72, 0xa1, 0x19, 0xef,
+	0x05, 0x42, 0x08, 0xf2, 0x57, 0x47, 0x31, 0x1e,
+	0x17, 0x76, 0x13, 0xd3, 0xcc, 0x75, 0x2c
+};
+static const u8 output64[] __initconst = {
+	0xcb, 0xec, 0x90, 0x88, 0xeb, 0x31, 0x69, 0x20,
+	0xa6, 0xdc, 0xff, 0x76, 0x98, 0xb0, 0x24, 0x49,
+	0x7b, 0x20, 0xd9, 0xd1, 0x1b, 0xe3, 0x61, 0xdc,
+	0xcf, 0x51, 0xf6, 0x70, 0x72, 0x33, 0x28, 0x94,
+	0xac, 0x73, 0x18, 0xcf, 0x93, 0xfd, 0xca, 0x08,
+	0x0d, 0xa2, 0xb9, 0x57, 0x1e, 0x51, 0xb6, 0x07,
+	0x5c, 0xc1, 0x13, 0x64, 0x1d, 0x18, 0x6f, 0xe6,
+	0x0b, 0xb7, 0x14, 0x03, 0x43, 0xb6, 0xaf
+};
+static const u8 key64[] __initconst = {
+	0xbf, 0x82, 0x65, 0xe4, 0x50, 0xf9, 0x5e, 0xea,
+	0x28, 0x91, 0xd1, 0xd2, 0x17, 0x7c, 0x13, 0x7e,
+	0xf5, 0xd5, 0x6b, 0x06, 0x1c, 0x20, 0xc2, 0x82,
+	0xa1, 0x7a, 0xa2, 0x14, 0xa1, 0xb0, 0x54, 0x58
+};
+enum { nonce64 = 0xe57c5095aa5723c9ULL };
+
+static const u8 input65[] __initconst = {
+	0x1c, 0xfb, 0xd3, 0x3f, 0x85, 0xd7, 0xba, 0x7b,
+	0xae, 0xb1, 0xa5, 0xd2, 0xe5, 0x40, 0xce, 0x4d,
+	0x3e, 0xab, 0x17, 0x9d, 0x7d, 0x9f, 0x03, 0x98,
+	0x3f, 0x9f, 0xc8, 0xdd, 0x36, 0x17, 0x43, 0x5c,
+	0x34, 0xd1, 0x23, 0xe0, 0x77, 0xbf, 0x35, 0x5d,
+	0x8f, 0xb1, 0xcb, 0x82, 0xbb, 0x39, 0x69, 0xd8,
+	0x90, 0x45, 0x37, 0xfd, 0x98, 0x25, 0xf7, 0x5b,
+	0xce, 0x06, 0x43, 0xba, 0x61, 0xa8, 0x47, 0xb9
+};
+static const u8 output65[] __initconst = {
+	0x73, 0xa5, 0x68, 0xab, 0x8b, 0xa5, 0xc3, 0x7e,
+	0x74, 0xf8, 0x9d, 0xf5, 0x93, 0x6e, 0xf2, 0x71,
+	0x6d, 0xde, 0x82, 0xc5, 0x40, 0xa0, 0x46, 0xb3,
+	0x9a, 0x78, 0xa8, 0xf7, 0xdf, 0xb1, 0xc3, 0xdd,
+	0x8d, 0x90, 0x00, 0x68, 0x21, 0x48, 0xe8, 0xba,
+	0x56, 0x9f, 0x8f, 0xe7, 0xa4, 0x4d, 0x36, 0x55,
+	0xd0, 0x34, 0x99, 0xa6, 0x1c, 0x4c, 0xc1, 0xe2,
+	0x65, 0x98, 0x14, 0x8e, 0x6a, 0x05, 0xb1, 0x2b
+};
+static const u8 key65[] __initconst = {
+	0xbd, 0x5c, 0x8a, 0xb0, 0x11, 0x29, 0xf3, 0x00,
+	0x7a, 0x78, 0x32, 0x63, 0x34, 0x00, 0xe6, 0x7d,
+	0x30, 0x54, 0xde, 0x37, 0xda, 0xc2, 0xc4, 0x3d,
+	0x92, 0x6b, 0x4c, 0xc2, 0x92, 0xe9, 0x9e, 0x2a
+};
+enum { nonce65 = 0xf654a3031de746f2ULL };
+
+static const u8 input66[] __initconst = {
+	0x4b, 0x27, 0x30, 0x8f, 0x28, 0xd8, 0x60, 0x46,
+	0x39, 0x06, 0x49, 0xea, 0x1b, 0x71, 0x26, 0xe0,
+	0x99, 0x2b, 0xd4, 0x8f, 0x64, 0x64, 0xcd, 0xac,
+	0x1d, 0x78, 0x88, 0x90, 0xe1, 0x5c, 0x24, 0x4b,
+	0xdc, 0x2d, 0xb7, 0xee, 0x3a, 0xe6, 0x86, 0x2c,
+	0x21, 0xe4, 0x2b, 0xfc, 0xe8, 0x19, 0xca, 0x65,
+	0xe7, 0xdd, 0x6f, 0x52, 0xb3, 0x11, 0xe1, 0xe2,
+	0xbf, 0xe8, 0x70, 0xe3, 0x0d, 0x45, 0xb8, 0xa5,
+	0x20, 0xb7, 0xb5, 0xaf, 0xff, 0x08, 0xcf, 0x23,
+	0x65, 0xdf, 0x8d, 0xc3, 0x31, 0xf3, 0x1e, 0x6a,
+	0x58, 0x8d, 0xcc, 0x45, 0x16, 0x86, 0x1f, 0x31,
+	0x5c, 0x27, 0xcd, 0xc8, 0x6b, 0x19, 0x1e, 0xec,
+	0x44, 0x75, 0x63, 0x97, 0xfd, 0x79, 0xf6, 0x62,
+	0xc5, 0xba, 0x17, 0xc7, 0xab, 0x8f, 0xbb, 0xed,
+	0x85, 0x2a, 0x98, 0x79, 0x21, 0xec, 0x6e, 0x4d,
+	0xdc, 0xfa, 0x72, 0x52, 0xba, 0xc8, 0x4c
+};
+static const u8 output66[] __initconst = {
+	0x76, 0x5b, 0x2c, 0xa7, 0x62, 0xb9, 0x08, 0x4a,
+	0xc6, 0x4a, 0x92, 0xc3, 0xbb, 0x10, 0xb3, 0xee,
+	0xff, 0xb9, 0x07, 0xc7, 0x27, 0xcb, 0x1e, 0xcf,
+	0x58, 0x6f, 0xa1, 0x64, 0xe8, 0xf1, 0x4e, 0xe1,
+	0xef, 0x18, 0x96, 0xab, 0x97, 0x28, 0xd1, 0x7c,
+	0x71, 0x6c, 0xd1, 0xe2, 0xfa, 0xd9, 0x75, 0xcb,
+	0xeb, 0xea, 0x0c, 0x86, 0x82, 0xd8, 0xf4, 0xcc,
+	0xea, 0xa3, 0x00, 0xfa, 0x82, 0xd2, 0xcd, 0xcb,
+	0xdb, 0x63, 0x28, 0xe2, 0x82, 0xe9, 0x01, 0xed,
+	0x31, 0xe6, 0x71, 0x45, 0x08, 0x89, 0x8a, 0x23,
+	0xa8, 0xb5, 0xc2, 0xe2, 0x9f, 0xe9, 0xb8, 0x9a,
+	0xc4, 0x79, 0x6d, 0x71, 0x52, 0x61, 0x74, 0x6c,
+	0x1b, 0xd7, 0x65, 0x6d, 0x03, 0xc4, 0x1a, 0xc0,
+	0x50, 0xba, 0xd6, 0xc9, 0x43, 0x50, 0xbe, 0x09,
+	0x09, 0x8a, 0xdb, 0xaa, 0x76, 0x4e, 0x3b, 0x61,
+	0x3c, 0x7c, 0x44, 0xe7, 0xdb, 0x10, 0xa7
+};
+static const u8 key66[] __initconst = {
+	0x88, 0xdf, 0xca, 0x68, 0xaf, 0x4f, 0xb3, 0xfd,
+	0x6e, 0xa7, 0x95, 0x35, 0x8a, 0xe8, 0x37, 0xe8,
+	0xc8, 0x55, 0xa2, 0x2a, 0x6d, 0x77, 0xf8, 0x93,
+	0x7a, 0x41, 0xf3, 0x7b, 0x95, 0xdf, 0x89, 0xf5
+};
+enum { nonce66 = 0x1024b4fdd415cf82ULL };
+
+static const u8 input67[] __initconst = {
+	0xd4, 0x2e, 0xfa, 0x92, 0xe9, 0x29, 0x68, 0xb7,
+	0x54, 0x2c, 0xf7, 0xa4, 0x2d, 0xb7, 0x50, 0xb5,
+	0xc5, 0xb2, 0x9d, 0x17, 0x5e, 0x0a, 0xca, 0x37,
+	0xbf, 0x60, 0xae, 0xd2, 0x98, 0xe9, 0xfa, 0x59,
+	0x67, 0x62, 0xe6, 0x43, 0x0c, 0x77, 0x80, 0x82,
+	0x33, 0x61, 0xa3, 0xff, 0xc1, 0xa0, 0x8f, 0x56,
+	0xbc, 0xec, 0x65, 0x43, 0x88, 0xa5, 0xff, 0x51,
+	0x64, 0x30, 0xee, 0x34, 0xb7, 0x5c, 0x28, 0x68,
+	0xc3, 0x52, 0xd2, 0xac, 0x78, 0x2a, 0xa6, 0x10,
+	0xb8, 0xb2, 0x4c, 0x80, 0x4f, 0x99, 0xb2, 0x36,
+	0x94, 0x8f, 0x66, 0xcb, 0xa1, 0x91, 0xed, 0x06,
+	0x42, 0x6d, 0xc1, 0xae, 0x55, 0x93, 0xdd, 0x93,
+	0x9e, 0x88, 0x34, 0x7f, 0x98, 0xeb, 0xbe, 0x61,
+	0xf9, 0xa9, 0x0f, 0xd9, 0xc4, 0x87, 0xd5, 0xef,
+	0xcc, 0x71, 0x8c, 0x0e, 0xce, 0xad, 0x02, 0xcf,
+	0xa2, 0x61, 0xdf, 0xb1, 0xfe, 0x3b, 0xdc, 0xc0,
+	0x58, 0xb5, 0x71, 0xa1, 0x83, 0xc9, 0xb4, 0xaf,
+	0x9d, 0x54, 0x12, 0xcd, 0xea, 0x06, 0xd6, 0x4e,
+	0xe5, 0x27, 0x0c, 0xc3, 0xbb, 0xa8, 0x0a, 0x81,
+	0x75, 0xc3, 0xc9, 0xd4, 0x35, 0x3e, 0x53, 0x9f,
+	0xaa, 0x20, 0xc0, 0x68, 0x39, 0x2c, 0x96, 0x39,
+	0x53, 0x81, 0xda, 0x07, 0x0f, 0x44, 0xa5, 0x47,
+	0x0e, 0xb3, 0x87, 0x0d, 0x1b, 0xc1, 0xe5, 0x41,
+	0x35, 0x12, 0x58, 0x96, 0x69, 0x8a, 0x1a, 0xa3,
+	0x9d, 0x3d, 0xd4, 0xb1, 0x8e, 0x1f, 0x96, 0x87,
+	0xda, 0xd3, 0x19, 0xe2, 0xb1, 0x3a, 0x19, 0x74,
+	0xa0, 0x00, 0x9f, 0x4d, 0xbc, 0xcb, 0x0c, 0xe9,
+	0xec, 0x10, 0xdf, 0x2a, 0x88, 0xdc, 0x30, 0x51,
+	0x46, 0x56, 0x53, 0x98, 0x6a, 0x26, 0x14, 0x05,
+	0x54, 0x81, 0x55, 0x0b, 0x3c, 0x85, 0xdd, 0x33,
+	0x81, 0x11, 0x29, 0x82, 0x46, 0x35, 0xe1, 0xdb,
+	0x59, 0x7b
+};
+static const u8 output67[] __initconst = {
+	0x64, 0x6c, 0xda, 0x7f, 0xd4, 0xa9, 0x2a, 0x5e,
+	0x22, 0xae, 0x8d, 0x67, 0xdb, 0xee, 0xfd, 0xd0,
+	0x44, 0x80, 0x17, 0xb2, 0xe3, 0x87, 0xad, 0x57,
+	0x15, 0xcb, 0x88, 0x64, 0xc0, 0xf1, 0x49, 0x3d,
+	0xfa, 0xbe, 0xa8, 0x9f, 0x12, 0xc3, 0x57, 0x56,
+	0x70, 0xa5, 0xc5, 0x6b, 0xf1, 0xab, 0xd5, 0xde,
+	0x77, 0x92, 0x6a, 0x56, 0x03, 0xf5, 0x21, 0x0d,
+	0xb6, 0xc4, 0xcc, 0x62, 0x44, 0x3f, 0xb1, 0xc1,
+	0x61, 0x41, 0x90, 0xb2, 0xd5, 0xb8, 0xf3, 0x57,
+	0xfb, 0xc2, 0x6b, 0x25, 0x58, 0xc8, 0x45, 0x20,
+	0x72, 0x29, 0x6f, 0x9d, 0xb5, 0x81, 0x4d, 0x2b,
+	0xb2, 0x89, 0x9e, 0x91, 0x53, 0x97, 0x1c, 0xd9,
+	0x3d, 0x79, 0xdc, 0x14, 0xae, 0x01, 0x73, 0x75,
+	0xf0, 0xca, 0xd5, 0xab, 0x62, 0x5c, 0x7a, 0x7d,
+	0x3f, 0xfe, 0x22, 0x7d, 0xee, 0xe2, 0xcb, 0x76,
+	0x55, 0xec, 0x06, 0xdd, 0x41, 0x47, 0x18, 0x62,
+	0x1d, 0x57, 0xd0, 0xd6, 0xb6, 0x0f, 0x4b, 0xfc,
+	0x79, 0x19, 0xf4, 0xd6, 0x37, 0x86, 0x18, 0x1f,
+	0x98, 0x0d, 0x9e, 0x15, 0x2d, 0xb6, 0x9a, 0x8a,
+	0x8c, 0x80, 0x22, 0x2f, 0x82, 0xc4, 0xc7, 0x36,
+	0xfa, 0xfa, 0x07, 0xbd, 0xc2, 0x2a, 0xe2, 0xea,
+	0x93, 0xc8, 0xb2, 0x90, 0x33, 0xf2, 0xee, 0x4b,
+	0x1b, 0xf4, 0x37, 0x92, 0x13, 0xbb, 0xe2, 0xce,
+	0xe3, 0x03, 0xcf, 0x07, 0x94, 0xab, 0x9a, 0xc9,
+	0xff, 0x83, 0x69, 0x3a, 0xda, 0x2c, 0xd0, 0x47,
+	0x3d, 0x6c, 0x1a, 0x60, 0x68, 0x47, 0xb9, 0x36,
+	0x52, 0xdd, 0x16, 0xef, 0x6c, 0xbf, 0x54, 0x11,
+	0x72, 0x62, 0xce, 0x8c, 0x9d, 0x90, 0xa0, 0x25,
+	0x06, 0x92, 0x3e, 0x12, 0x7e, 0x1a, 0x1d, 0xe5,
+	0xa2, 0x71, 0xce, 0x1c, 0x4c, 0x6a, 0x7c, 0xdc,
+	0x3d, 0xe3, 0x6e, 0x48, 0x9d, 0xb3, 0x64, 0x7d,
+	0x78, 0x40
+};
+static const u8 key67[] __initconst = {
+	0xa9, 0x20, 0x75, 0x89, 0x7e, 0x37, 0x85, 0x48,
+	0xa3, 0xfb, 0x7b, 0xe8, 0x30, 0xa7, 0xe3, 0x6e,
+	0xa6, 0xc1, 0x71, 0x17, 0xc1, 0x6c, 0x9b, 0xc2,
+	0xde, 0xf0, 0xa7, 0x19, 0xec, 0xce, 0xc6, 0x53
+};
+enum { nonce67 = 0x4adc4d1f968c8a10ULL };
+
+static const u8 input68[] __initconst = {
+	0x99, 0xae, 0x72, 0xfb, 0x16, 0xe1, 0xf1, 0x59,
+	0x43, 0x15, 0x4e, 0x33, 0xa0, 0x95, 0xe7, 0x6c,
+	0x74, 0x24, 0x31, 0xca, 0x3b, 0x2e, 0xeb, 0xd7,
+	0x11, 0xd8, 0xe0, 0x56, 0x92, 0x91, 0x61, 0x57,
+	0xe2, 0x82, 0x9f, 0x8f, 0x37, 0xf5, 0x3d, 0x24,
+	0x92, 0x9d, 0x87, 0x00, 0x8d, 0x89, 0xe0, 0x25,
+	0x8b, 0xe4, 0x20, 0x5b, 0x8a, 0x26, 0x2c, 0x61,
+	0x78, 0xb0, 0xa6, 0x3e, 0x82, 0x18, 0xcf, 0xdc,
+	0x2d, 0x24, 0xdd, 0x81, 0x42, 0xc4, 0x95, 0xf0,
+	0x48, 0x60, 0x71, 0xe3, 0xe3, 0xac, 0xec, 0xbe,
+	0x98, 0x6b, 0x0c, 0xb5, 0x6a, 0xa9, 0xc8, 0x79,
+	0x23, 0x2e, 0x38, 0x0b, 0x72, 0x88, 0x8c, 0xe7,
+	0x71, 0x8b, 0x36, 0xe3, 0x58, 0x3d, 0x9c, 0xa0,
+	0xa2, 0xea, 0xcf, 0x0c, 0x6a, 0x6c, 0x64, 0xdf,
+	0x97, 0x21, 0x8f, 0x93, 0xfb, 0xba, 0xf3, 0x5a,
+	0xd7, 0x8f, 0xa6, 0x37, 0x15, 0x50, 0x43, 0x02,
+	0x46, 0x7f, 0x93, 0x46, 0x86, 0x31, 0xe2, 0xaa,
+	0x24, 0xa8, 0x26, 0xae, 0xe6, 0xc0, 0x05, 0x73,
+	0x0b, 0x4f, 0x7e, 0xed, 0x65, 0xeb, 0x56, 0x1e,
+	0xb6, 0xb3, 0x0b, 0xc3, 0x0e, 0x31, 0x95, 0xa9,
+	0x18, 0x4d, 0xaf, 0x38, 0xd7, 0xec, 0xc6, 0x44,
+	0x72, 0x77, 0x4e, 0x25, 0x4b, 0x25, 0xdd, 0x1e,
+	0x8c, 0xa2, 0xdf, 0xf6, 0x2a, 0x97, 0x1a, 0x88,
+	0x2c, 0x8a, 0x5d, 0xfe, 0xe8, 0xfb, 0x35, 0xe8,
+	0x0f, 0x2b, 0x7a, 0x18, 0x69, 0x43, 0x31, 0x1d,
+	0x38, 0x6a, 0x62, 0x95, 0x0f, 0x20, 0x4b, 0xbb,
+	0x97, 0x3c, 0xe0, 0x64, 0x2f, 0x52, 0xc9, 0x2d,
+	0x4d, 0x9d, 0x54, 0x04, 0x3d, 0xc9, 0xea, 0xeb,
+	0xd0, 0x86, 0x52, 0xff, 0x42, 0xe1, 0x0d, 0x7a,
+	0xad, 0x88, 0xf9, 0x9b, 0x1e, 0x5e, 0x12, 0x27,
+	0x95, 0x3e, 0x0c, 0x2c, 0x13, 0x00, 0x6f, 0x8e,
+	0x93, 0x69, 0x0e, 0x01, 0x8c, 0xc1, 0xfd, 0xb3
+};
+static const u8 output68[] __initconst = {
+	0x26, 0x3e, 0xf2, 0xb1, 0xf5, 0xef, 0x81, 0xa4,
+	0xb7, 0x42, 0xd4, 0x26, 0x18, 0x4b, 0xdd, 0x6a,
+	0x47, 0x15, 0xcb, 0x0e, 0x57, 0xdb, 0xa7, 0x29,
+	0x7e, 0x7b, 0x3f, 0x47, 0x89, 0x57, 0xab, 0xea,
+	0x14, 0x7b, 0xcf, 0x37, 0xdb, 0x1c, 0xe1, 0x11,
+	0x77, 0xae, 0x2e, 0x4c, 0xd2, 0x08, 0x3f, 0xa6,
+	0x62, 0x86, 0xa6, 0xb2, 0x07, 0xd5, 0x3f, 0x9b,
+	0xdc, 0xc8, 0x50, 0x4b, 0x7b, 0xb9, 0x06, 0xe6,
+	0xeb, 0xac, 0x98, 0x8c, 0x36, 0x0c, 0x1e, 0xb2,
+	0xc8, 0xfb, 0x24, 0x60, 0x2c, 0x08, 0x17, 0x26,
+	0x5b, 0xc8, 0xc2, 0xdf, 0x9c, 0x73, 0x67, 0x4a,
+	0xdb, 0xcf, 0xd5, 0x2c, 0x2b, 0xca, 0x24, 0xcc,
+	0xdb, 0xc9, 0xa8, 0xf2, 0x5d, 0x67, 0xdf, 0x5c,
+	0x62, 0x0b, 0x58, 0xc0, 0x83, 0xde, 0x8b, 0xf6,
+	0x15, 0x0a, 0xd6, 0x32, 0xd8, 0xf5, 0xf2, 0x5f,
+	0x33, 0xce, 0x7e, 0xab, 0x76, 0xcd, 0x14, 0x91,
+	0xd8, 0x41, 0x90, 0x93, 0xa1, 0xaf, 0xf3, 0x45,
+	0x6c, 0x1b, 0x25, 0xbd, 0x48, 0x51, 0x6d, 0x15,
+	0x47, 0xe6, 0x23, 0x50, 0x32, 0x69, 0x1e, 0xb5,
+	0x94, 0xd3, 0x97, 0xba, 0xd7, 0x37, 0x4a, 0xba,
+	0xb9, 0xcd, 0xfb, 0x96, 0x9a, 0x90, 0xe0, 0x37,
+	0xf8, 0xdf, 0x91, 0x6c, 0x62, 0x13, 0x19, 0x21,
+	0x4b, 0xa9, 0xf1, 0x12, 0x66, 0xe2, 0x74, 0xd7,
+	0x81, 0xa0, 0x74, 0x8d, 0x7e, 0x7e, 0xc9, 0xb1,
+	0x69, 0x8f, 0xed, 0xb3, 0xf6, 0x97, 0xcd, 0x72,
+	0x78, 0x93, 0xd3, 0x54, 0x6b, 0x43, 0xac, 0x29,
+	0xb4, 0xbc, 0x7d, 0xa4, 0x26, 0x4b, 0x7b, 0xab,
+	0xd6, 0x67, 0x22, 0xff, 0x03, 0x92, 0xb6, 0xd4,
+	0x96, 0x94, 0x5a, 0xe5, 0x02, 0x35, 0x77, 0xfa,
+	0x3f, 0x54, 0x1d, 0xdd, 0x35, 0x39, 0xfe, 0x03,
+	0xdd, 0x8e, 0x3c, 0x8c, 0xc2, 0x69, 0x2a, 0xb1,
+	0xb7, 0xb3, 0xa1, 0x89, 0x84, 0xea, 0x16, 0xe2
+};
+static const u8 key68[] __initconst = {
+	0xd2, 0x49, 0x7f, 0xd7, 0x49, 0x66, 0x0d, 0xb3,
+	0x5a, 0x7e, 0x3c, 0xfc, 0x37, 0x83, 0x0e, 0xf7,
+	0x96, 0xd8, 0xd6, 0x33, 0x79, 0x2b, 0x84, 0x53,
+	0x06, 0xbc, 0x6c, 0x0a, 0x55, 0x84, 0xfe, 0xab
+};
+enum { nonce68 = 0x6a6df7ff0a20de06ULL };
+
+static const u8 input69[] __initconst = {
+	0xf9, 0x18, 0x4c, 0xd2, 0x3f, 0xf7, 0x22, 0xd9,
+	0x58, 0xb6, 0x3b, 0x38, 0x69, 0x79, 0xf4, 0x71,
+	0x5f, 0x38, 0x52, 0x1f, 0x17, 0x6f, 0x6f, 0xd9,
+	0x09, 0x2b, 0xfb, 0x67, 0xdc, 0xc9, 0xe8, 0x4a,
+	0x70, 0x9f, 0x2e, 0x3c, 0x06, 0xe5, 0x12, 0x20,
+	0x25, 0x29, 0xd0, 0xdc, 0x81, 0xc5, 0xc6, 0x0f,
+	0xd2, 0xa8, 0x81, 0x15, 0x98, 0xb2, 0x71, 0x5a,
+	0x9a, 0xe9, 0xfb, 0xaf, 0x0e, 0x5f, 0x8a, 0xf3,
+	0x16, 0x4a, 0x47, 0xf2, 0x5c, 0xbf, 0xda, 0x52,
+	0x9a, 0xa6, 0x36, 0xfd, 0xc6, 0xf7, 0x66, 0x00,
+	0xcc, 0x6c, 0xd4, 0xb3, 0x07, 0x6d, 0xeb, 0xfe,
+	0x92, 0x71, 0x25, 0xd0, 0xcf, 0x9c, 0xe8, 0x65,
+	0x45, 0x10, 0xcf, 0x62, 0x74, 0x7d, 0xf2, 0x1b,
+	0x57, 0xa0, 0xf1, 0x6b, 0xa4, 0xd5, 0xfa, 0x12,
+	0x27, 0x5a, 0xf7, 0x99, 0xfc, 0xca, 0xf3, 0xb8,
+	0x2c, 0x8b, 0xba, 0x28, 0x74, 0xde, 0x8f, 0x78,
+	0xa2, 0x8c, 0xaf, 0x89, 0x4b, 0x05, 0xe2, 0xf3,
+	0xf8, 0xd2, 0xef, 0xac, 0xa4, 0xc4, 0xe2, 0xe2,
+	0x36, 0xbb, 0x5e, 0xae, 0xe6, 0x87, 0x3d, 0x88,
+	0x9f, 0xb8, 0x11, 0xbb, 0xcf, 0x57, 0xce, 0xd0,
+	0xba, 0x62, 0xf4, 0xf8, 0x9b, 0x95, 0x04, 0xc9,
+	0xcf, 0x01, 0xe9, 0xf1, 0xc8, 0xc6, 0x22, 0xa4,
+	0xf2, 0x8b, 0x2f, 0x24, 0x0a, 0xf5, 0x6e, 0xb7,
+	0xd4, 0x2c, 0xb6, 0xf7, 0x5c, 0x97, 0x61, 0x0b,
+	0xd9, 0xb5, 0x06, 0xcd, 0xed, 0x3e, 0x1f, 0xc5,
+	0xb2, 0x6c, 0xa3, 0xea, 0xb8, 0xad, 0xa6, 0x42,
+	0x88, 0x7a, 0x52, 0xd5, 0x64, 0xba, 0xb5, 0x20,
+	0x10, 0xa0, 0x0f, 0x0d, 0xea, 0xef, 0x5a, 0x9b,
+	0x27, 0xb8, 0xca, 0x20, 0x19, 0x6d, 0xa8, 0xc4,
+	0x46, 0x04, 0xb3, 0xe8, 0xf8, 0x66, 0x1b, 0x0a,
+	0xce, 0x76, 0x5d, 0x59, 0x58, 0x05, 0xee, 0x3e,
+	0x3c, 0x86, 0x5b, 0x49, 0x1c, 0x72, 0x18, 0x01,
+	0x62, 0x92, 0x0f, 0x3e, 0xd1, 0x57, 0x5e, 0x20,
+	0x7b, 0xfb, 0x4d, 0x3c, 0xc5, 0x35, 0x43, 0x2f,
+	0xb0, 0xc5, 0x7c, 0xe4, 0xa2, 0x84, 0x13, 0x77
+};
+static const u8 output69[] __initconst = {
+	0xbb, 0x4a, 0x7f, 0x7c, 0xd5, 0x2f, 0x89, 0x06,
+	0xec, 0x20, 0xf1, 0x9a, 0x11, 0x09, 0x14, 0x2e,
+	0x17, 0x50, 0xf9, 0xd5, 0xf5, 0x48, 0x7c, 0x7a,
+	0x55, 0xc0, 0x57, 0x03, 0xe3, 0xc4, 0xb2, 0xb7,
+	0x18, 0x47, 0x95, 0xde, 0xaf, 0x80, 0x06, 0x3c,
+	0x5a, 0xf2, 0xc3, 0x53, 0xe3, 0x29, 0x92, 0xf8,
+	0xff, 0x64, 0x85, 0xb9, 0xf7, 0xd3, 0x80, 0xd2,
+	0x0c, 0x5d, 0x7b, 0x57, 0x0c, 0x51, 0x79, 0x86,
+	0xf3, 0x20, 0xd2, 0xb8, 0x6e, 0x0c, 0x5a, 0xce,
+	0xeb, 0x88, 0x02, 0x8b, 0x82, 0x1b, 0x7f, 0xf5,
+	0xde, 0x7f, 0x48, 0x48, 0xdf, 0xa0, 0x55, 0xc6,
+	0x0c, 0x22, 0xa1, 0x80, 0x8d, 0x3b, 0xcb, 0x40,
+	0x2d, 0x3d, 0x0b, 0xf2, 0xe0, 0x22, 0x13, 0x99,
+	0xe1, 0xa7, 0x27, 0x68, 0x31, 0xe1, 0x24, 0x5d,
+	0xd2, 0xee, 0x16, 0xc1, 0xd7, 0xa8, 0x14, 0x19,
+	0x23, 0x72, 0x67, 0x27, 0xdc, 0x5e, 0xb9, 0xc7,
+	0xd8, 0xe3, 0x55, 0x50, 0x40, 0x98, 0x7b, 0xe7,
+	0x34, 0x1c, 0x3b, 0x18, 0x14, 0xd8, 0x62, 0xc1,
+	0x93, 0x84, 0xf3, 0x5b, 0xdd, 0x9e, 0x1f, 0x3b,
+	0x0b, 0xbc, 0x4e, 0x5b, 0x79, 0xa3, 0xca, 0x74,
+	0x2a, 0x98, 0xe8, 0x04, 0x39, 0xef, 0xc6, 0x76,
+	0x6d, 0xee, 0x9f, 0x67, 0x5b, 0x59, 0x3a, 0xe5,
+	0xf2, 0x3b, 0xca, 0x89, 0xe8, 0x9b, 0x03, 0x3d,
+	0x11, 0xd2, 0x4a, 0x70, 0xaf, 0x88, 0xb0, 0x94,
+	0x96, 0x26, 0xab, 0x3c, 0xc1, 0xb8, 0xe4, 0xe7,
+	0x14, 0x61, 0x64, 0x3a, 0x61, 0x08, 0x0f, 0xa9,
+	0xce, 0x64, 0xb2, 0x40, 0xf8, 0x20, 0x3a, 0xa9,
+	0x31, 0xbd, 0x7e, 0x16, 0xca, 0xf5, 0x62, 0x0f,
+	0x91, 0x9f, 0x8e, 0x1d, 0xa4, 0x77, 0xf3, 0x87,
+	0x61, 0xe8, 0x14, 0xde, 0x18, 0x68, 0x4e, 0x9d,
+	0x73, 0xcd, 0x8a, 0xe4, 0x80, 0x84, 0x23, 0xaa,
+	0x9d, 0x64, 0x1c, 0x80, 0x41, 0xca, 0x82, 0x40,
+	0x94, 0x55, 0xe3, 0x28, 0xa1, 0x97, 0x71, 0xba,
+	0xf2, 0x2c, 0x39, 0x62, 0x29, 0x56, 0xd0, 0xff,
+	0xb2, 0x82, 0x20, 0x59, 0x1f, 0xc3, 0x64, 0x57
+};
+static const u8 key69[] __initconst = {
+	0x19, 0x09, 0xe9, 0x7c, 0xd9, 0x02, 0x4a, 0x0c,
+	0x52, 0x25, 0xad, 0x5c, 0x2e, 0x8d, 0x86, 0x10,
+	0x85, 0x2b, 0xba, 0xa4, 0x44, 0x5b, 0x39, 0x3e,
+	0x18, 0xaa, 0xce, 0x0e, 0xe2, 0x69, 0x3c, 0xcf
+};
+enum { nonce69 = 0xdb925a1948f0f060ULL };
+
+static const u8 input70[] __initconst = {
+	0x10, 0xe7, 0x83, 0xcf, 0x42, 0x9f, 0xf2, 0x41,
+	0xc7, 0xe4, 0xdb, 0xf9, 0xa3, 0x02, 0x1d, 0x8d,
+	0x50, 0x81, 0x2c, 0x6b, 0x92, 0xe0, 0x4e, 0xea,
+	0x26, 0x83, 0x2a, 0xd0, 0x31, 0xf1, 0x23, 0xf3,
+	0x0e, 0x88, 0x14, 0x31, 0xf9, 0x01, 0x63, 0x59,
+	0x21, 0xd1, 0x8b, 0xdd, 0x06, 0xd0, 0xc6, 0xab,
+	0x91, 0x71, 0x82, 0x4d, 0xd4, 0x62, 0x37, 0x17,
+	0xf9, 0x50, 0xf9, 0xb5, 0x74, 0xce, 0x39, 0x80,
+	0x80, 0x78, 0xf8, 0xdc, 0x1c, 0xdb, 0x7c, 0x3d,
+	0xd4, 0x86, 0x31, 0x00, 0x75, 0x7b, 0xd1, 0x42,
+	0x9f, 0x1b, 0x97, 0x88, 0x0e, 0x14, 0x0e, 0x1e,
+	0x7d, 0x7b, 0xc4, 0xd2, 0xf3, 0xc1, 0x6d, 0x17,
+	0x5d, 0xc4, 0x75, 0x54, 0x0f, 0x38, 0x65, 0x89,
+	0xd8, 0x7d, 0xab, 0xc9, 0xa7, 0x0a, 0x21, 0x0b,
+	0x37, 0x12, 0x05, 0x07, 0xb5, 0x68, 0x32, 0x32,
+	0xb9, 0xf8, 0x97, 0x17, 0x03, 0xed, 0x51, 0x8f,
+	0x3d, 0x5a, 0xd0, 0x12, 0x01, 0x6e, 0x2e, 0x91,
+	0x1c, 0xbe, 0x6b, 0xa3, 0xcc, 0x75, 0x62, 0x06,
+	0x8e, 0x65, 0xbb, 0xe2, 0x29, 0x71, 0x4b, 0x89,
+	0x6a, 0x9d, 0x85, 0x8c, 0x8c, 0xdf, 0x94, 0x95,
+	0x23, 0x66, 0xf8, 0x92, 0xee, 0x56, 0xeb, 0xb3,
+	0xeb, 0xd2, 0x4a, 0x3b, 0x77, 0x8a, 0x6e, 0xf6,
+	0xca, 0xd2, 0x34, 0x00, 0xde, 0xbe, 0x1d, 0x7a,
+	0x73, 0xef, 0x2b, 0x80, 0x56, 0x16, 0x29, 0xbf,
+	0x6e, 0x33, 0xed, 0x0d, 0xe2, 0x02, 0x60, 0x74,
+	0xe9, 0x0a, 0xbc, 0xd1, 0xc5, 0xe8, 0x53, 0x02,
+	0x79, 0x0f, 0x25, 0x0c, 0xef, 0xab, 0xd3, 0xbc,
+	0xb7, 0xfc, 0xf3, 0xb0, 0x34, 0xd1, 0x07, 0xd2,
+	0x5a, 0x31, 0x1f, 0xec, 0x1f, 0x87, 0xed, 0xdd,
+	0x6a, 0xc1, 0xe8, 0xb3, 0x25, 0x4c, 0xc6, 0x9b,
+	0x91, 0x73, 0xec, 0x06, 0x73, 0x9e, 0x57, 0x65,
+	0x32, 0x75, 0x11, 0x74, 0x6e, 0xa4, 0x7d, 0x0d,
+	0x74, 0x9f, 0x51, 0x10, 0x10, 0x47, 0xc9, 0x71,
+	0x6e, 0x97, 0xae, 0x44, 0x41, 0xef, 0x98, 0x78,
+	0xf4, 0xc5, 0xbd, 0x5e, 0x00, 0xe5, 0xfd, 0xe2,
+	0xbe, 0x8c, 0xc2, 0xae, 0xc2, 0xee, 0x59, 0xf6,
+	0xcb, 0x20, 0x54, 0x84, 0xc3, 0x31, 0x7e, 0x67,
+	0x71, 0xb6, 0x76, 0xbe, 0x81, 0x8f, 0x82, 0xad,
+	0x01, 0x8f, 0xc4, 0x00, 0x04, 0x3d, 0x8d, 0x34,
+	0xaa, 0xea, 0xc0, 0xea, 0x91, 0x42, 0xb6, 0xb8,
+	0x43, 0xf3, 0x17, 0xb2, 0x73, 0x64, 0x82, 0x97,
+	0xd5, 0xc9, 0x07, 0x77, 0xb1, 0x26, 0xe2, 0x00,
+	0x6a, 0xae, 0x70, 0x0b, 0xbe, 0xe6, 0xb8, 0x42,
+	0x81, 0x55, 0xf7, 0xb8, 0x96, 0x41, 0x9d, 0xd4,
+	0x2c, 0x27, 0x00, 0xcc, 0x91, 0x28, 0x22, 0xa4,
+	0x7b, 0x42, 0x51, 0x9e, 0xd6, 0xec, 0xf3, 0x6b,
+	0x00, 0xff, 0x5c, 0xa2, 0xac, 0x47, 0x33, 0x2d,
+	0xf8, 0x11, 0x65, 0x5f, 0x4d, 0x79, 0x8b, 0x4f,
+	0xad, 0xf0, 0x9d, 0xcd, 0xb9, 0x7b, 0x08, 0xf7,
+	0x32, 0x51, 0xfa, 0x39, 0xaa, 0x78, 0x05, 0xb1,
+	0xf3, 0x5d, 0xe8, 0x7c, 0x8e, 0x4f, 0xa2, 0xe0,
+	0x98, 0x0c, 0xb2, 0xa7, 0xf0, 0x35, 0x8e, 0x70,
+	0x7c, 0x82, 0xf3, 0x1b, 0x26, 0x28, 0x12, 0xe5,
+	0x23, 0x57, 0xe4, 0xb4, 0x9b, 0x00, 0x39, 0x97,
+	0xef, 0x7c, 0x46, 0x9b, 0x34, 0x6b, 0xe7, 0x0e,
+	0xa3, 0x2a, 0x18, 0x11, 0x64, 0xc6, 0x7c, 0x8b,
+	0x06, 0x02, 0xf5, 0x69, 0x76, 0xf9, 0xaa, 0x09,
+	0x5f, 0x68, 0xf8, 0x4a, 0x79, 0x58, 0xec, 0x37,
+	0xcf, 0x3a, 0xcc, 0x97, 0x70, 0x1d, 0x3e, 0x52,
+	0x18, 0x0a, 0xad, 0x28, 0x5b, 0x3b, 0xe9, 0x03,
+	0x84, 0xe9, 0x68, 0x50, 0xce, 0xc4, 0xbc, 0x3e,
+	0x21, 0xad, 0x63, 0xfe, 0xc6, 0xfd, 0x6e, 0x69,
+	0x84, 0xa9, 0x30, 0xb1, 0x7a, 0xc4, 0x31, 0x10,
+	0xc1, 0x1f, 0x6e, 0xeb, 0xa5, 0xa6, 0x01
+};
+static const u8 output70[] __initconst = {
+	0x0f, 0x93, 0x2a, 0x20, 0xb3, 0x87, 0x2d, 0xce,
+	0xd1, 0x3b, 0x30, 0xfd, 0x06, 0x6d, 0x0a, 0xaa,
+	0x3e, 0xc4, 0x29, 0x02, 0x8a, 0xde, 0xa6, 0x4b,
+	0x45, 0x1b, 0x4f, 0x25, 0x59, 0xd5, 0x56, 0x6a,
+	0x3b, 0x37, 0xbd, 0x3e, 0x47, 0x12, 0x2c, 0x4e,
+	0x60, 0x5f, 0x05, 0x75, 0x61, 0x23, 0x05, 0x74,
+	0xcb, 0xfc, 0x5a, 0xb3, 0xac, 0x5c, 0x3d, 0xab,
+	0x52, 0x5f, 0x05, 0xbc, 0x57, 0xc0, 0x7e, 0xcf,
+	0x34, 0x5d, 0x7f, 0x41, 0xa3, 0x17, 0x78, 0xd5,
+	0x9f, 0xec, 0x0f, 0x1e, 0xf9, 0xfe, 0xa3, 0xbd,
+	0x28, 0xb0, 0xba, 0x4d, 0x84, 0xdb, 0xae, 0x8f,
+	0x1d, 0x98, 0xb7, 0xdc, 0xf9, 0xad, 0x55, 0x9c,
+	0x89, 0xfe, 0x9b, 0x9c, 0xa9, 0x89, 0xf6, 0x97,
+	0x9c, 0x3f, 0x09, 0x3e, 0xc6, 0x02, 0xc2, 0x55,
+	0x58, 0x09, 0x54, 0x66, 0xe4, 0x36, 0x81, 0x35,
+	0xca, 0x88, 0x17, 0x89, 0x80, 0x24, 0x2b, 0x21,
+	0x89, 0xee, 0x45, 0x5a, 0xe7, 0x1f, 0xd5, 0xa5,
+	0x16, 0xa4, 0xda, 0x70, 0x7e, 0xe9, 0x4f, 0x24,
+	0x61, 0x97, 0xab, 0xa0, 0xe0, 0xe7, 0xb8, 0x5c,
+	0x0f, 0x25, 0x17, 0x37, 0x75, 0x12, 0xb5, 0x40,
+	0xde, 0x1c, 0x0d, 0x8a, 0x77, 0x62, 0x3c, 0x86,
+	0xd9, 0x70, 0x2e, 0x96, 0x30, 0xd2, 0x55, 0xb3,
+	0x6b, 0xc3, 0xf2, 0x9c, 0x47, 0xf3, 0x3a, 0x24,
+	0x52, 0xc6, 0x38, 0xd8, 0x22, 0xb3, 0x0c, 0xfd,
+	0x2f, 0xa3, 0x3c, 0xb5, 0xe8, 0x26, 0xe1, 0xa3,
+	0xad, 0xb0, 0x82, 0x17, 0xc1, 0x53, 0xb8, 0x34,
+	0x48, 0xee, 0x39, 0xae, 0x51, 0x43, 0xec, 0x82,
+	0xce, 0x87, 0xc6, 0x76, 0xb9, 0x76, 0xd3, 0x53,
+	0xfe, 0x49, 0x24, 0x7d, 0x02, 0x42, 0x2b, 0x72,
+	0xfb, 0xcb, 0xd8, 0x96, 0x02, 0xc6, 0x9a, 0x20,
+	0xf3, 0x5a, 0x67, 0xe8, 0x13, 0xf8, 0xb2, 0xcb,
+	0xa2, 0xec, 0x18, 0x20, 0x4a, 0xb0, 0x73, 0x53,
+	0x21, 0xb0, 0x77, 0x53, 0xd8, 0x76, 0xa1, 0x30,
+	0x17, 0x72, 0x2e, 0x33, 0x5f, 0x33, 0x6b, 0x28,
+	0xfb, 0xb0, 0xf4, 0xec, 0x8e, 0xed, 0x20, 0x7d,
+	0x57, 0x8c, 0x74, 0x28, 0x64, 0x8b, 0xeb, 0x59,
+	0x38, 0x3f, 0xe7, 0x83, 0x2e, 0xe5, 0x64, 0x4d,
+	0x5c, 0x1f, 0xe1, 0x3b, 0xd9, 0x84, 0xdb, 0xc9,
+	0xec, 0xd8, 0xc1, 0x7c, 0x1f, 0x1b, 0x68, 0x35,
+	0xc6, 0x34, 0x10, 0xef, 0x19, 0xc9, 0x0a, 0xd6,
+	0x43, 0x7f, 0xa6, 0xcb, 0x9d, 0xf4, 0xf0, 0x16,
+	0xb1, 0xb1, 0x96, 0x64, 0xec, 0x8d, 0x22, 0x4c,
+	0x4b, 0xe8, 0x1a, 0xba, 0x6f, 0xb7, 0xfc, 0xa5,
+	0x69, 0x3e, 0xad, 0x78, 0x79, 0x19, 0xb5, 0x04,
+	0x69, 0xe5, 0x3f, 0xff, 0x60, 0x8c, 0xda, 0x0b,
+	0x7b, 0xf7, 0xe7, 0xe6, 0x29, 0x3a, 0x85, 0xba,
+	0xb5, 0xb0, 0x35, 0xbd, 0x38, 0xce, 0x34, 0x5e,
+	0xf2, 0xdc, 0xd1, 0x8f, 0xc3, 0x03, 0x24, 0xa2,
+	0x03, 0xf7, 0x4e, 0x49, 0x5b, 0xcf, 0x6d, 0xb0,
+	0xeb, 0xe3, 0x30, 0x28, 0xd5, 0x5b, 0x82, 0x5f,
+	0xe4, 0x7c, 0x1e, 0xec, 0xd2, 0x39, 0xf9, 0x6f,
+	0x2e, 0xb3, 0xcd, 0x01, 0xb1, 0x67, 0xaa, 0xea,
+	0xaa, 0xb3, 0x63, 0xaf, 0xd9, 0xb2, 0x1f, 0xba,
+	0x05, 0x20, 0xeb, 0x19, 0x32, 0xf0, 0x6c, 0x3f,
+	0x40, 0xcc, 0x93, 0xb3, 0xd8, 0x25, 0xa6, 0xe4,
+	0xce, 0xd7, 0x7e, 0x48, 0x99, 0x65, 0x7f, 0x86,
+	0xc5, 0xd4, 0x79, 0x6b, 0xab, 0x43, 0xb8, 0x6b,
+	0xf1, 0x2f, 0xea, 0x4c, 0x5e, 0xf0, 0x3b, 0xb4,
+	0xb8, 0xb0, 0x94, 0x0c, 0x6b, 0xe7, 0x22, 0x93,
+	0xaa, 0x01, 0xcb, 0xf1, 0x11, 0x60, 0xf6, 0x69,
+	0xcf, 0x14, 0xde, 0xfb, 0x90, 0x05, 0x27, 0x0c,
+	0x1a, 0x9e, 0xf0, 0xb4, 0xc6, 0xa1, 0xe8, 0xdd,
+	0xd0, 0x4c, 0x25, 0x4f, 0x9c, 0xb7, 0xb1, 0xb0,
+	0x21, 0xdb, 0x87, 0x09, 0x03, 0xf2, 0xb3
+};
+static const u8 key70[] __initconst = {
+	0x3b, 0x5b, 0x59, 0x36, 0x44, 0xd1, 0xba, 0x71,
+	0x55, 0x87, 0x4d, 0x62, 0x3d, 0xc2, 0xfc, 0xaa,
+	0x3f, 0x4e, 0x1a, 0xe4, 0xca, 0x09, 0xfc, 0x6a,
+	0xb2, 0xd6, 0x5d, 0x79, 0xf9, 0x1a, 0x91, 0xa7
+};
+enum { nonce70 = 0x3fd6786dd147a85ULL };
+
+static const u8 input71[] __initconst = {
+	0x18, 0x78, 0xd6, 0x79, 0xe4, 0x9a, 0x6c, 0x73,
+	0x17, 0xd4, 0x05, 0x0f, 0x1e, 0x9f, 0xd9, 0x2b,
+	0x86, 0x48, 0x7d, 0xf4, 0xd9, 0x1c, 0x76, 0xfc,
+	0x8e, 0x22, 0x34, 0xe1, 0x48, 0x4a, 0x8d, 0x79,
+	0xb7, 0xbb, 0x88, 0xab, 0x90, 0xde, 0xc5, 0xb4,
+	0xb4, 0xe7, 0x85, 0x49, 0xda, 0x57, 0xeb, 0xc9,
+	0xcd, 0x21, 0xfc, 0x45, 0x6e, 0x32, 0x67, 0xf2,
+	0x4f, 0xa6, 0x54, 0xe5, 0x20, 0xed, 0xcf, 0xc6,
+	0x62, 0x25, 0x8e, 0x00, 0xf8, 0x6b, 0xa2, 0x80,
+	0xac, 0x88, 0xa6, 0x59, 0x27, 0x83, 0x95, 0x11,
+	0x3f, 0x70, 0x5e, 0x3f, 0x11, 0xfb, 0x26, 0xbf,
+	0xe1, 0x48, 0x75, 0xf9, 0x86, 0xbf, 0xa6, 0x5d,
+	0x15, 0x61, 0x66, 0xbf, 0x78, 0x8f, 0x6b, 0x9b,
+	0xda, 0x98, 0xb7, 0x19, 0xe2, 0xf2, 0xa3, 0x9c,
+	0x7c, 0x6a, 0x9a, 0xd8, 0x3d, 0x4c, 0x2c, 0xe1,
+	0x09, 0xb4, 0x28, 0x82, 0x4e, 0xab, 0x0c, 0x75,
+	0x63, 0xeb, 0xbc, 0xd0, 0x71, 0xa2, 0x73, 0x85,
+	0xed, 0x53, 0x7a, 0x3f, 0x68, 0x9f, 0xd0, 0xa9,
+	0x00, 0x5a, 0x9e, 0x80, 0x55, 0x00, 0xe6, 0xae,
+	0x0c, 0x03, 0x40, 0xed, 0xfc, 0x68, 0x4a, 0xb7,
+	0x1e, 0x09, 0x65, 0x30, 0x5a, 0x3d, 0x97, 0x4d,
+	0x5e, 0x51, 0x8e, 0xda, 0xc3, 0x55, 0x8c, 0xfb,
+	0xcf, 0x83, 0x05, 0x35, 0x0d, 0x08, 0x1b, 0xf3,
+	0x3a, 0x57, 0x96, 0xac, 0x58, 0x8b, 0xfa, 0x00,
+	0x49, 0x15, 0x78, 0xd2, 0x4b, 0xed, 0xb8, 0x59,
+	0x78, 0x9b, 0x7f, 0xaa, 0xfc, 0xe7, 0x46, 0xdc,
+	0x7b, 0x34, 0xd0, 0x34, 0xe5, 0x10, 0xff, 0x4d,
+	0x5a, 0x4d, 0x60, 0xa7, 0x16, 0x54, 0xc4, 0xfd,
+	0xca, 0x5d, 0x68, 0xc7, 0x4a, 0x01, 0x8d, 0x7f,
+	0x74, 0x5d, 0xff, 0xb8, 0x37, 0x15, 0x62, 0xfa,
+	0x44, 0x45, 0xcf, 0x77, 0x3b, 0x1d, 0xb2, 0xd2,
+	0x0d, 0x42, 0x00, 0x39, 0x68, 0x1f, 0xcc, 0x89,
+	0x73, 0x5d, 0xa9, 0x2e, 0xfd, 0x58, 0x62, 0xca,
+	0x35, 0x8e, 0x70, 0x70, 0xaa, 0x6e, 0x14, 0xe9,
+	0xa4, 0xe2, 0x10, 0x66, 0x71, 0xdc, 0x4c, 0xfc,
+	0xa9, 0xdc, 0x8f, 0x57, 0x4d, 0xc5, 0xac, 0xd7,
+	0xa9, 0xf3, 0xf3, 0xa1, 0xff, 0x62, 0xa0, 0x8f,
+	0xe4, 0x96, 0x3e, 0xcb, 0x9f, 0x76, 0x42, 0x39,
+	0x1f, 0x24, 0xfd, 0xfd, 0x79, 0xe8, 0x27, 0xdf,
+	0xa8, 0xf6, 0x33, 0x8b, 0x31, 0x59, 0x69, 0xcf,
+	0x6a, 0xef, 0x89, 0x4d, 0xa7, 0xf6, 0x7e, 0x97,
+	0x14, 0xbd, 0xda, 0xdd, 0xb4, 0x84, 0x04, 0x24,
+	0xe0, 0x17, 0xe1, 0x0f, 0x1f, 0x8a, 0x6a, 0x71,
+	0x74, 0x41, 0xdc, 0x59, 0x5c, 0x8f, 0x01, 0x25,
+	0x92, 0xf0, 0x2e, 0x15, 0x62, 0x71, 0x9a, 0x9f,
+	0x87, 0xdf, 0x62, 0x49, 0x7f, 0x86, 0x62, 0xfc,
+	0x20, 0x84, 0xd7, 0xe3, 0x3a, 0xd9, 0x37, 0x85,
+	0xb7, 0x84, 0x5a, 0xf9, 0xed, 0x21, 0x32, 0x94,
+	0x3e, 0x04, 0xe7, 0x8c, 0x46, 0x76, 0x21, 0x67,
+	0xf6, 0x95, 0x64, 0x92, 0xb7, 0x15, 0xf6, 0xe3,
+	0x41, 0x27, 0x9d, 0xd7, 0xe3, 0x79, 0x75, 0x92,
+	0xd0, 0xc1, 0xf3, 0x40, 0x92, 0x08, 0xde, 0x90,
+	0x22, 0x82, 0xb2, 0x69, 0xae, 0x1a, 0x35, 0x11,
+	0x89, 0xc8, 0x06, 0x82, 0x95, 0x23, 0x44, 0x08,
+	0x22, 0xf2, 0x71, 0x73, 0x1b, 0x88, 0x11, 0xcf,
+	0x1c, 0x7e, 0x8a, 0x2e, 0xdc, 0x79, 0x57, 0xce,
+	0x1f, 0xe7, 0x6c, 0x07, 0xd8, 0x06, 0xbe, 0xec,
+	0xa3, 0xcf, 0xf9, 0x68, 0xa5, 0xb8, 0xf0, 0xe3,
+	0x3f, 0x01, 0x92, 0xda, 0xf1, 0xa0, 0x2d, 0x7b,
+	0xab, 0x57, 0x58, 0x2a, 0xaf, 0xab, 0xbd, 0xf2,
+	0xe5, 0xaf, 0x7e, 0x1f, 0x46, 0x24, 0x9e, 0x20,
+	0x22, 0x0f, 0x84, 0x4c, 0xb7, 0xd8, 0x03, 0xe8,
+	0x09, 0x73, 0x6c, 0xc6, 0x9b, 0x90, 0xe0, 0xdb,
+	0xf2, 0x71, 0xba, 0xad, 0xb3, 0xec, 0xda, 0x7a
+};
+static const u8 output71[] __initconst = {
+	0x28, 0xc5, 0x9b, 0x92, 0xf9, 0x21, 0x4f, 0xbb,
+	0xef, 0x3b, 0xf0, 0xf5, 0x3a, 0x6d, 0x7f, 0xd6,
+	0x6a, 0x8d, 0xa1, 0x01, 0x5c, 0x62, 0x20, 0x8b,
+	0x5b, 0x39, 0xd5, 0xd3, 0xc2, 0xf6, 0x9d, 0x5e,
+	0xcc, 0xe1, 0xa2, 0x61, 0x16, 0xe2, 0xce, 0xe9,
+	0x86, 0xd0, 0xfc, 0xce, 0x9a, 0x28, 0x27, 0xc4,
+	0x0c, 0xb9, 0xaa, 0x8d, 0x48, 0xdb, 0xbf, 0x82,
+	0x7d, 0xd0, 0x35, 0xc4, 0x06, 0x34, 0xb4, 0x19,
+	0x51, 0x73, 0xf4, 0x7a, 0xf4, 0xfd, 0xe9, 0x1d,
+	0xdc, 0x0f, 0x7e, 0xf7, 0x96, 0x03, 0xe3, 0xb1,
+	0x2e, 0x22, 0x59, 0xb7, 0x6d, 0x1c, 0x97, 0x8c,
+	0xd7, 0x31, 0x08, 0x26, 0x4c, 0x6d, 0xc6, 0x14,
+	0xa5, 0xeb, 0x45, 0x6a, 0x88, 0xa3, 0xa2, 0x36,
+	0xc4, 0x35, 0xb1, 0x5a, 0xa0, 0xad, 0xf7, 0x06,
+	0x9b, 0x5d, 0xc1, 0x15, 0xc1, 0xce, 0x0a, 0xb0,
+	0x57, 0x2e, 0x3f, 0x6f, 0x0d, 0x10, 0xd9, 0x11,
+	0x2c, 0x9c, 0xad, 0x2d, 0xa5, 0x81, 0xfb, 0x4e,
+	0x8f, 0xd5, 0x32, 0x4e, 0xaf, 0x5c, 0xc1, 0x86,
+	0xde, 0x56, 0x5a, 0x33, 0x29, 0xf7, 0x67, 0xc6,
+	0x37, 0x6f, 0xb2, 0x37, 0x4e, 0xd4, 0x69, 0x79,
+	0xaf, 0xd5, 0x17, 0x79, 0xe0, 0xba, 0x62, 0xa3,
+	0x68, 0xa4, 0x87, 0x93, 0x8d, 0x7e, 0x8f, 0xa3,
+	0x9c, 0xef, 0xda, 0xe3, 0xa5, 0x1f, 0xcd, 0x30,
+	0xa6, 0x55, 0xac, 0x4c, 0x69, 0x74, 0x02, 0xc7,
+	0x5d, 0x95, 0x81, 0x4a, 0x68, 0x11, 0xd3, 0xa9,
+	0x98, 0xb1, 0x0b, 0x0d, 0xae, 0x40, 0x86, 0x65,
+	0xbf, 0xcc, 0x2d, 0xef, 0x57, 0xca, 0x1f, 0xe4,
+	0x34, 0x4e, 0xa6, 0x5e, 0x82, 0x6e, 0x61, 0xad,
+	0x0b, 0x3c, 0xf8, 0xeb, 0x01, 0x43, 0x7f, 0x87,
+	0xa2, 0xa7, 0x6a, 0xe9, 0x62, 0x23, 0x24, 0x61,
+	0xf1, 0xf7, 0x36, 0xdb, 0x10, 0xe5, 0x57, 0x72,
+	0x3a, 0xc2, 0xae, 0xcc, 0x75, 0xc7, 0x80, 0x05,
+	0x0a, 0x5c, 0x4c, 0x95, 0xda, 0x02, 0x01, 0x14,
+	0x06, 0x6b, 0x5c, 0x65, 0xc2, 0xb8, 0x4a, 0xd6,
+	0xd3, 0xb4, 0xd8, 0x12, 0x52, 0xb5, 0x60, 0xd3,
+	0x8e, 0x5f, 0x5c, 0x76, 0x33, 0x7a, 0x05, 0xe5,
+	0xcb, 0xef, 0x4f, 0x89, 0xf1, 0xba, 0x32, 0x6f,
+	0x33, 0xcd, 0x15, 0x8d, 0xa3, 0x0c, 0x3f, 0x63,
+	0x11, 0xe7, 0x0e, 0xe0, 0x00, 0x01, 0xe9, 0xe8,
+	0x8e, 0x36, 0x34, 0x8d, 0x96, 0xb5, 0x03, 0xcf,
+	0x55, 0x62, 0x49, 0x7a, 0x34, 0x44, 0xa5, 0xee,
+	0x8c, 0x46, 0x06, 0x22, 0xab, 0x1d, 0x53, 0x9c,
+	0xa1, 0xf9, 0x67, 0x18, 0x57, 0x89, 0xf9, 0xc2,
+	0xd1, 0x7e, 0xbe, 0x36, 0x40, 0xcb, 0xe9, 0x04,
+	0xde, 0xb1, 0x3b, 0x29, 0x52, 0xc5, 0x9a, 0xb5,
+	0xa2, 0x7c, 0x7b, 0xfe, 0xe5, 0x92, 0x73, 0xea,
+	0xea, 0x7b, 0xba, 0x0a, 0x8c, 0x88, 0x15, 0xe6,
+	0x53, 0xbf, 0x1c, 0x33, 0xf4, 0x9b, 0x9a, 0x5e,
+	0x8d, 0xae, 0x60, 0xdc, 0xcb, 0x5d, 0xfa, 0xbe,
+	0x06, 0xc3, 0x3f, 0x06, 0xe7, 0x00, 0x40, 0x7b,
+	0xaa, 0x94, 0xfa, 0x6d, 0x1f, 0xe4, 0xc5, 0xa9,
+	0x1b, 0x5f, 0x36, 0xea, 0x5a, 0xdd, 0xa5, 0x48,
+	0x6a, 0x55, 0xd2, 0x47, 0x28, 0xbf, 0x96, 0xf1,
+	0x9f, 0xb6, 0x11, 0x4b, 0xd3, 0x44, 0x7d, 0x48,
+	0x41, 0x61, 0xdb, 0x12, 0xd4, 0xc2, 0x59, 0x82,
+	0x4c, 0x47, 0x5c, 0x04, 0xf6, 0x7b, 0xd3, 0x92,
+	0x2e, 0xe8, 0x40, 0xef, 0x15, 0x32, 0x97, 0xdc,
+	0x35, 0x4c, 0x6e, 0xa4, 0x97, 0xe9, 0x24, 0xde,
+	0x63, 0x8b, 0xb1, 0x6b, 0x48, 0xbb, 0x46, 0x1f,
+	0x84, 0xd6, 0x17, 0xb0, 0x5a, 0x4a, 0x4e, 0xd5,
+	0x31, 0xd7, 0xcf, 0xa0, 0x39, 0xc6, 0x2e, 0xfc,
+	0xa6, 0xa3, 0xd3, 0x0f, 0xa4, 0x28, 0xac, 0xb2,
+	0xf4, 0x48, 0x8d, 0x50, 0xa5, 0x1c, 0x44, 0x5d,
+	0x6e, 0x38, 0xb7, 0x2b, 0x8a, 0x45, 0xa7, 0x3d
+};
+static const u8 key71[] __initconst = {
+	0x8b, 0x68, 0xc4, 0xb7, 0x0d, 0x81, 0xef, 0x52,
+	0x1e, 0x05, 0x96, 0x72, 0x62, 0x89, 0x27, 0x83,
+	0xd0, 0xc7, 0x33, 0x6d, 0xf2, 0xcc, 0x69, 0xf9,
+	0x23, 0xae, 0x99, 0xb1, 0xd1, 0x05, 0x4e, 0x54
+};
+enum { nonce71 = 0x983f03656d64b5f6ULL };
+
+static const u8 input72[] __initconst = {
+	0x6b, 0x09, 0xc9, 0x57, 0x3d, 0x79, 0x04, 0x8c,
+	0x65, 0xad, 0x4a, 0x0f, 0xa1, 0x31, 0x3a, 0xdd,
+	0x14, 0x8e, 0xe8, 0xfe, 0xbf, 0x42, 0x87, 0x98,
+	0x2e, 0x8d, 0x83, 0xa3, 0xf8, 0x55, 0x3d, 0x84,
+	0x1e, 0x0e, 0x05, 0x4a, 0x38, 0x9e, 0xe7, 0xfe,
+	0xd0, 0x4d, 0x79, 0x74, 0x3a, 0x0b, 0x9b, 0xe1,
+	0xfd, 0x51, 0x84, 0x4e, 0xb2, 0x25, 0xe4, 0x64,
+	0x4c, 0xda, 0xcf, 0x46, 0xec, 0xba, 0x12, 0xeb,
+	0x5a, 0x33, 0x09, 0x6e, 0x78, 0x77, 0x8f, 0x30,
+	0xb1, 0x7d, 0x3f, 0x60, 0x8c, 0xf2, 0x1d, 0x8e,
+	0xb4, 0x70, 0xa2, 0x90, 0x7c, 0x79, 0x1a, 0x2c,
+	0xf6, 0x28, 0x79, 0x7c, 0x53, 0xc5, 0xfa, 0xcc,
+	0x65, 0x9b, 0xe1, 0x51, 0xd1, 0x7f, 0x1d, 0xc4,
+	0xdb, 0xd4, 0xd9, 0x04, 0x61, 0x7d, 0xbe, 0x12,
+	0xfc, 0xcd, 0xaf, 0xe4, 0x0f, 0x9c, 0x20, 0xb5,
+	0x22, 0x40, 0x18, 0xda, 0xe4, 0xda, 0x8c, 0x2d,
+	0x84, 0xe3, 0x5f, 0x53, 0x17, 0xed, 0x78, 0xdc,
+	0x2f, 0xe8, 0x31, 0xc7, 0xe6, 0x39, 0x71, 0x40,
+	0xb4, 0x0f, 0xc9, 0xa9, 0x7e, 0x78, 0x87, 0xc1,
+	0x05, 0x78, 0xbb, 0x01, 0xf2, 0x8f, 0x33, 0xb0,
+	0x6e, 0x84, 0xcd, 0x36, 0x33, 0x5c, 0x5b, 0x8e,
+	0xf1, 0xac, 0x30, 0xfe, 0x33, 0xec, 0x08, 0xf3,
+	0x7e, 0xf2, 0xf0, 0x4c, 0xf2, 0xad, 0xd8, 0xc1,
+	0xd4, 0x4e, 0x87, 0x06, 0xd4, 0x75, 0xe7, 0xe3,
+	0x09, 0xd3, 0x4d, 0xe3, 0x21, 0x32, 0xba, 0xb4,
+	0x68, 0x68, 0xcb, 0x4c, 0xa3, 0x1e, 0xb3, 0x87,
+	0x7b, 0xd3, 0x0c, 0x63, 0x37, 0x71, 0x79, 0xfb,
+	0x58, 0x36, 0x57, 0x0f, 0x34, 0x1d, 0xc1, 0x42,
+	0x02, 0x17, 0xe7, 0xed, 0xe8, 0xe7, 0x76, 0xcb,
+	0x42, 0xc4, 0x4b, 0xe2, 0xb2, 0x5e, 0x42, 0xd5,
+	0xec, 0x9d, 0xc1, 0x32, 0x71, 0xe4, 0xeb, 0x10,
+	0x68, 0x1a, 0x6e, 0x99, 0x8e, 0x73, 0x12, 0x1f,
+	0x97, 0x0c, 0x9e, 0xcd, 0x02, 0x3e, 0x4c, 0xa0,
+	0xf2, 0x8d, 0xe5, 0x44, 0xca, 0x6d, 0xfe, 0x07,
+	0xe3, 0xe8, 0x9b, 0x76, 0xc1, 0x6d, 0xb7, 0x6e,
+	0x0d, 0x14, 0x00, 0x6f, 0x8a, 0xfd, 0x43, 0xc6,
+	0x43, 0xa5, 0x9c, 0x02, 0x47, 0x10, 0xd4, 0xb4,
+	0x9b, 0x55, 0x67, 0xc8, 0x7f, 0xc1, 0x8a, 0x1f,
+	0x1e, 0xd1, 0xbc, 0x99, 0x5d, 0x50, 0x4f, 0x89,
+	0xf1, 0xe6, 0x5d, 0x91, 0x40, 0xdc, 0x20, 0x67,
+	0x56, 0xc2, 0xef, 0xbd, 0x2c, 0xa2, 0x99, 0x38,
+	0xe0, 0x45, 0xec, 0x44, 0x05, 0x52, 0x65, 0x11,
+	0xfc, 0x3b, 0x19, 0xcb, 0x71, 0xc2, 0x8e, 0x0e,
+	0x03, 0x2a, 0x03, 0x3b, 0x63, 0x06, 0x31, 0x9a,
+	0xac, 0x53, 0x04, 0x14, 0xd4, 0x80, 0x9d, 0x6b,
+	0x42, 0x7e, 0x7e, 0x4e, 0xdc, 0xc7, 0x01, 0x49,
+	0x9f, 0xf5, 0x19, 0x86, 0x13, 0x28, 0x2b, 0xa6,
+	0xa6, 0xbe, 0xa1, 0x7e, 0x71, 0x05, 0x00, 0xff,
+	0x59, 0x2d, 0xb6, 0x63, 0xf0, 0x1e, 0x2e, 0x69,
+	0x9b, 0x85, 0xf1, 0x1e, 0x8a, 0x64, 0x39, 0xab,
+	0x00, 0x12, 0xe4, 0x33, 0x4b, 0xb5, 0xd8, 0xb3,
+	0x6b, 0x5b, 0x8b, 0x5c, 0xd7, 0x6f, 0x23, 0xcf,
+	0x3f, 0x2e, 0x5e, 0x47, 0xb9, 0xb8, 0x1f, 0xf0,
+	0x1d, 0xda, 0xe7, 0x4f, 0x6e, 0xab, 0xc3, 0x36,
+	0xb4, 0x74, 0x6b, 0xeb, 0xc7, 0x5d, 0x91, 0xe5,
+	0xda, 0xf2, 0xc2, 0x11, 0x17, 0x48, 0xf8, 0x9c,
+	0xc9, 0x8b, 0xc1, 0xa2, 0xf4, 0xcd, 0x16, 0xf8,
+	0x27, 0xd9, 0x6c, 0x6f, 0xb5, 0x8f, 0x77, 0xca,
+	0x1b, 0xd8, 0xef, 0x84, 0x68, 0x71, 0x53, 0xc1,
+	0x43, 0x0f, 0x9f, 0x98, 0xae, 0x7e, 0x31, 0xd2,
+	0x98, 0xfb, 0x20, 0xa2, 0xad, 0x00, 0x10, 0x83,
+	0x00, 0x8b, 0xeb, 0x56, 0xd2, 0xc4, 0xcc, 0x7f,
+	0x2f, 0x4e, 0xfa, 0x88, 0x13, 0xa4, 0x2c, 0xde,
+	0x6b, 0x77, 0x86, 0x10, 0x6a, 0xab, 0x43, 0x0a,
+	0x02
+};
+static const u8 output72[] __initconst = {
+	0x42, 0x89, 0xa4, 0x80, 0xd2, 0xcb, 0x5f, 0x7f,
+	0x2a, 0x1a, 0x23, 0x00, 0xa5, 0x6a, 0x95, 0xa3,
+	0x9a, 0x41, 0xa1, 0xd0, 0x2d, 0x1e, 0xd6, 0x13,
+	0x34, 0x40, 0x4e, 0x7f, 0x1a, 0xbe, 0xa0, 0x3d,
+	0x33, 0x9c, 0x56, 0x2e, 0x89, 0x25, 0x45, 0xf9,
+	0xf0, 0xba, 0x9c, 0x6d, 0xd1, 0xd1, 0xde, 0x51,
+	0x47, 0x63, 0xc9, 0xbd, 0xfa, 0xa2, 0x9e, 0xad,
+	0x6a, 0x7b, 0x21, 0x1a, 0x6c, 0x3e, 0xff, 0x46,
+	0xbe, 0xf3, 0x35, 0x7a, 0x6e, 0xb3, 0xb9, 0xf7,
+	0xda, 0x5e, 0xf0, 0x14, 0xb5, 0x70, 0xa4, 0x2b,
+	0xdb, 0xbb, 0xc7, 0x31, 0x4b, 0x69, 0x5a, 0x83,
+	0x70, 0xd9, 0x58, 0xd4, 0x33, 0x84, 0x23, 0xf0,
+	0xae, 0xbb, 0x6d, 0x26, 0x7c, 0xc8, 0x30, 0xf7,
+	0x24, 0xad, 0xbd, 0xe4, 0x2c, 0x38, 0x38, 0xac,
+	0xe1, 0x4a, 0x9b, 0xac, 0x33, 0x0e, 0x4a, 0xf4,
+	0x93, 0xed, 0x07, 0x82, 0x81, 0x4f, 0x8f, 0xb1,
+	0xdd, 0x73, 0xd5, 0x50, 0x6d, 0x44, 0x1e, 0xbe,
+	0xa7, 0xcd, 0x17, 0x57, 0xd5, 0x3b, 0x62, 0x36,
+	0xcf, 0x7d, 0xc8, 0xd8, 0xd1, 0x78, 0xd7, 0x85,
+	0x46, 0x76, 0x5d, 0xcc, 0xfe, 0xe8, 0x94, 0xc5,
+	0xad, 0xbc, 0x5e, 0xbc, 0x8d, 0x1d, 0xdf, 0x03,
+	0xc9, 0x6b, 0x1b, 0x81, 0xd1, 0xb6, 0x5a, 0x24,
+	0xe3, 0xdc, 0x3f, 0x20, 0xc9, 0x07, 0x73, 0x4c,
+	0x43, 0x13, 0x87, 0x58, 0x34, 0x0d, 0x14, 0x63,
+	0x0f, 0x6f, 0xad, 0x8d, 0xac, 0x7c, 0x67, 0x68,
+	0xa3, 0x9d, 0x7f, 0x00, 0xdf, 0x28, 0xee, 0x67,
+	0xf4, 0x5c, 0x26, 0xcb, 0xef, 0x56, 0x71, 0xc8,
+	0xc6, 0x67, 0x5f, 0x38, 0xbb, 0xa0, 0xb1, 0x5c,
+	0x1f, 0xb3, 0x08, 0xd9, 0x38, 0xcf, 0x74, 0x54,
+	0xc6, 0xa4, 0xc4, 0xc0, 0x9f, 0xb3, 0xd0, 0xda,
+	0x62, 0x67, 0x8b, 0x81, 0x33, 0xf0, 0xa9, 0x73,
+	0xa4, 0xd1, 0x46, 0x88, 0x8d, 0x85, 0x12, 0x40,
+	0xba, 0x1a, 0xcd, 0x82, 0xd8, 0x8d, 0xc4, 0x52,
+	0xe7, 0x01, 0x94, 0x2e, 0x0e, 0xd0, 0xaf, 0xe7,
+	0x2d, 0x3f, 0x3c, 0xaa, 0xf4, 0xf5, 0xa7, 0x01,
+	0x4c, 0x14, 0xe2, 0xc2, 0x96, 0x76, 0xbe, 0x05,
+	0xaa, 0x19, 0xb1, 0xbd, 0x95, 0xbb, 0x5a, 0xf9,
+	0xa5, 0xa7, 0xe6, 0x16, 0x38, 0x34, 0xf7, 0x9d,
+	0x19, 0x66, 0x16, 0x8e, 0x7f, 0x2b, 0x5a, 0xfb,
+	0xb5, 0x29, 0x79, 0xbf, 0x52, 0xae, 0x30, 0x95,
+	0x3f, 0x31, 0x33, 0x28, 0xde, 0xc5, 0x0d, 0x55,
+	0x89, 0xec, 0x21, 0x11, 0x0f, 0x8b, 0xfe, 0x63,
+	0x3a, 0xf1, 0x95, 0x5c, 0xcd, 0x50, 0xe4, 0x5d,
+	0x8f, 0xa7, 0xc8, 0xca, 0x93, 0xa0, 0x67, 0x82,
+	0x63, 0x5c, 0xd0, 0xed, 0xe7, 0x08, 0xc5, 0x60,
+	0xf8, 0xb4, 0x47, 0xf0, 0x1a, 0x65, 0x4e, 0xa3,
+	0x51, 0x68, 0xc7, 0x14, 0xa1, 0xd9, 0x39, 0x72,
+	0xa8, 0x6f, 0x7c, 0x7e, 0xf6, 0x03, 0x0b, 0x25,
+	0x9b, 0xf2, 0xca, 0x49, 0xae, 0x5b, 0xf8, 0x0f,
+	0x71, 0x51, 0x01, 0xa6, 0x23, 0xa9, 0xdf, 0xd0,
+	0x7a, 0x39, 0x19, 0xf5, 0xc5, 0x26, 0x44, 0x7b,
+	0x0a, 0x4a, 0x41, 0xbf, 0xf2, 0x8e, 0x83, 0x50,
+	0x91, 0x96, 0x72, 0x02, 0xf6, 0x80, 0xbf, 0x95,
+	0x41, 0xac, 0xda, 0xb0, 0xba, 0xe3, 0x76, 0xb1,
+	0x9d, 0xff, 0x1f, 0x33, 0x02, 0x85, 0xfc, 0x2a,
+	0x29, 0xe6, 0xe3, 0x9d, 0xd0, 0xef, 0xc2, 0xd6,
+	0x9c, 0x4a, 0x62, 0xac, 0xcb, 0xea, 0x8b, 0xc3,
+	0x08, 0x6e, 0x49, 0x09, 0x26, 0x19, 0xc1, 0x30,
+	0xcc, 0x27, 0xaa, 0xc6, 0x45, 0x88, 0xbd, 0xae,
+	0xd6, 0x79, 0xff, 0x4e, 0xfc, 0x66, 0x4d, 0x02,
+	0xa5, 0xee, 0x8e, 0xa5, 0xb6, 0x15, 0x72, 0x24,
+	0xb1, 0xbf, 0xbf, 0x64, 0xcf, 0xcc, 0x93, 0xe9,
+	0xb6, 0xfd, 0xb4, 0xb6, 0x21, 0xb5, 0x48, 0x08,
+	0x0f, 0x11, 0x65, 0xe1, 0x47, 0xee, 0x93, 0x29,
+	0xad
+};
+static const u8 key72[] __initconst = {
+	0xb9, 0xa2, 0xfc, 0x59, 0x06, 0x3f, 0x77, 0xa5,
+	0x66, 0xd0, 0x2b, 0x22, 0x74, 0x22, 0x4c, 0x1e,
+	0x6a, 0x39, 0xdf, 0xe1, 0x0d, 0x4c, 0x64, 0x99,
+	0x54, 0x8a, 0xba, 0x1d, 0x2c, 0x21, 0x5f, 0xc3
+};
+enum { nonce72 = 0x3d069308fa3db04bULL };
+
+static const u8 input73[] __initconst = {
+	0xe4, 0xdd, 0x36, 0xd4, 0xf5, 0x70, 0x51, 0x73,
+	0x97, 0x1d, 0x45, 0x05, 0x92, 0xe7, 0xeb, 0xb7,
+	0x09, 0x82, 0x6e, 0x25, 0x6c, 0x50, 0xf5, 0x40,
+	0x19, 0xba, 0xbc, 0xf4, 0x39, 0x14, 0xc5, 0x15,
+	0x83, 0x40, 0xbd, 0x26, 0xe0, 0xff, 0x3b, 0x22,
+	0x7c, 0x7c, 0xd7, 0x0b, 0xe9, 0x25, 0x0c, 0x3d,
+	0x92, 0x38, 0xbe, 0xe4, 0x22, 0x75, 0x65, 0xf1,
+	0x03, 0x85, 0x34, 0x09, 0xb8, 0x77, 0xfb, 0x48,
+	0xb1, 0x2e, 0x21, 0x67, 0x9b, 0x9d, 0xad, 0x18,
+	0x82, 0x0d, 0x6b, 0xc3, 0xcf, 0x00, 0x61, 0x6e,
+	0xda, 0xdc, 0xa7, 0x0b, 0x5c, 0x02, 0x1d, 0xa6,
+	0x4e, 0x0d, 0x7f, 0x37, 0x01, 0x5a, 0x37, 0xf3,
+	0x2b, 0xbf, 0xba, 0xe2, 0x1c, 0xb3, 0xa3, 0xbc,
+	0x1c, 0x93, 0x1a, 0xb1, 0x71, 0xaf, 0xe2, 0xdd,
+	0x17, 0xee, 0x53, 0xfa, 0xfb, 0x02, 0x40, 0x3e,
+	0x03, 0xca, 0xe7, 0xc3, 0x51, 0x81, 0xcc, 0x8c,
+	0xca, 0xcf, 0x4e, 0xc5, 0x78, 0x99, 0xfd, 0xbf,
+	0xea, 0xab, 0x38, 0x81, 0xfc, 0xd1, 0x9e, 0x41,
+	0x0b, 0x84, 0x25, 0xf1, 0x6b, 0x3c, 0xf5, 0x40,
+	0x0d, 0xc4, 0x3e, 0xb3, 0x6a, 0xec, 0x6e, 0x75,
+	0xdc, 0x9b, 0xdf, 0x08, 0x21, 0x16, 0xfb, 0x7a,
+	0x8e, 0x19, 0x13, 0x02, 0xa7, 0xfc, 0x58, 0x21,
+	0xc3, 0xb3, 0x59, 0x5a, 0x9c, 0xef, 0x38, 0xbd,
+	0x87, 0x55, 0xd7, 0x0d, 0x1f, 0x84, 0xdc, 0x98,
+	0x22, 0xca, 0x87, 0x96, 0x71, 0x6d, 0x68, 0x00,
+	0xcb, 0x4f, 0x2f, 0xc4, 0x64, 0x0c, 0xc1, 0x53,
+	0x0c, 0x90, 0xe7, 0x3c, 0x88, 0xca, 0xc5, 0x85,
+	0xa3, 0x2a, 0x96, 0x7c, 0x82, 0x6d, 0x45, 0xf5,
+	0xb7, 0x8d, 0x17, 0x69, 0xd6, 0xcd, 0x3c, 0xd3,
+	0xe7, 0x1c, 0xce, 0x93, 0x50, 0xd4, 0x59, 0xa2,
+	0xd8, 0x8b, 0x72, 0x60, 0x5b, 0x25, 0x14, 0xcd,
+	0x5a, 0xe8, 0x8c, 0xdb, 0x23, 0x8d, 0x2b, 0x59,
+	0x12, 0x13, 0x10, 0x47, 0xa4, 0xc8, 0x3c, 0xc1,
+	0x81, 0x89, 0x6c, 0x98, 0xec, 0x8f, 0x7b, 0x32,
+	0xf2, 0x87, 0xd9, 0xa2, 0x0d, 0xc2, 0x08, 0xf9,
+	0xd5, 0xf3, 0x91, 0xe7, 0xb3, 0x87, 0xa7, 0x0b,
+	0x64, 0x8f, 0xb9, 0x55, 0x1c, 0x81, 0x96, 0x6c,
+	0xa1, 0xc9, 0x6e, 0x3b, 0xcd, 0x17, 0x1b, 0xfc,
+	0xa6, 0x05, 0xba, 0x4a, 0x7d, 0x03, 0x3c, 0x59,
+	0xc8, 0xee, 0x50, 0xb2, 0x5b, 0xe1, 0x4d, 0x6a,
+	0x1f, 0x09, 0xdc, 0xa2, 0x51, 0xd1, 0x93, 0x3a,
+	0x5f, 0x72, 0x1d, 0x26, 0x14, 0x62, 0xa2, 0x41,
+	0x3d, 0x08, 0x70, 0x7b, 0x27, 0x3d, 0xbc, 0xdf,
+	0x15, 0xfa, 0xb9, 0x5f, 0xb5, 0x38, 0x84, 0x0b,
+	0x58, 0x3d, 0xee, 0x3f, 0x32, 0x65, 0x6d, 0xd7,
+	0xce, 0x97, 0x3c, 0x8d, 0xfb, 0x63, 0xb9, 0xb0,
+	0xa8, 0x4a, 0x72, 0x99, 0x97, 0x58, 0xc8, 0xa7,
+	0xf9, 0x4c, 0xae, 0xc1, 0x63, 0xb9, 0x57, 0x18,
+	0x8a, 0xfa, 0xab, 0xe9, 0xf3, 0x67, 0xe6, 0xfd,
+	0xd2, 0x9d, 0x5c, 0xa9, 0x8e, 0x11, 0x0a, 0xf4,
+	0x4b, 0xf1, 0xec, 0x1a, 0xaf, 0x50, 0x5d, 0x16,
+	0x13, 0x69, 0x2e, 0xbd, 0x0d, 0xe6, 0xf0, 0xb2,
+	0xed, 0xb4, 0x4c, 0x59, 0x77, 0x37, 0x00, 0x0b,
+	0xc7, 0xa7, 0x9e, 0x37, 0xf3, 0x60, 0x70, 0xef,
+	0xf3, 0xc1, 0x74, 0x52, 0x87, 0xc6, 0xa1, 0x81,
+	0xbd, 0x0a, 0x2c, 0x5d, 0x2c, 0x0c, 0x6a, 0x81,
+	0xa1, 0xfe, 0x26, 0x78, 0x6c, 0x03, 0x06, 0x07,
+	0x34, 0xaa, 0xd1, 0x1b, 0x40, 0x03, 0x39, 0x56,
+	0xcf, 0x2a, 0x92, 0xc1, 0x4e, 0xdf, 0x29, 0x24,
+	0x83, 0x22, 0x7a, 0xea, 0x67, 0x1e, 0xe7, 0x54,
+	0x64, 0xd3, 0xbd, 0x3a, 0x5d, 0xae, 0xca, 0xf0,
+	0x9c, 0xd6, 0x5a, 0x9a, 0x62, 0xc8, 0xc7, 0x83,
+	0xf9, 0x89, 0xde, 0x2d, 0x53, 0x64, 0x61, 0xf7,
+	0xa3, 0xa7, 0x31, 0x38, 0xc6, 0x22, 0x9c, 0xb4,
+	0x87, 0xe0
+};
+static const u8 output73[] __initconst = {
+	0x34, 0xed, 0x05, 0xb0, 0x14, 0xbc, 0x8c, 0xcc,
+	0x95, 0xbd, 0x99, 0x0f, 0xb1, 0x98, 0x17, 0x10,
+	0xae, 0xe0, 0x08, 0x53, 0xa3, 0x69, 0xd2, 0xed,
+	0x66, 0xdb, 0x2a, 0x34, 0x8d, 0x0c, 0x6e, 0xce,
+	0x63, 0x69, 0xc9, 0xe4, 0x57, 0xc3, 0x0c, 0x8b,
+	0xa6, 0x2c, 0xa7, 0xd2, 0x08, 0xff, 0x4f, 0xec,
+	0x61, 0x8c, 0xee, 0x0d, 0xfa, 0x6b, 0xe0, 0xe8,
+	0x71, 0xbc, 0x41, 0x46, 0xd7, 0x33, 0x1d, 0xc0,
+	0xfd, 0xad, 0xca, 0x8b, 0x34, 0x56, 0xa4, 0x86,
+	0x71, 0x62, 0xae, 0x5e, 0x3d, 0x2b, 0x66, 0x3e,
+	0xae, 0xd8, 0xc0, 0xe1, 0x21, 0x3b, 0xca, 0xd2,
+	0x6b, 0xa2, 0xb8, 0xc7, 0x98, 0x4a, 0xf3, 0xcf,
+	0xb8, 0x62, 0xd8, 0x33, 0xe6, 0x80, 0xdb, 0x2f,
+	0x0a, 0xaf, 0x90, 0x3c, 0xe1, 0xec, 0xe9, 0x21,
+	0x29, 0x42, 0x9e, 0xa5, 0x50, 0xe9, 0x93, 0xd3,
+	0x53, 0x1f, 0xac, 0x2a, 0x24, 0x07, 0xb8, 0xed,
+	0xed, 0x38, 0x2c, 0xc4, 0xa1, 0x2b, 0x31, 0x5d,
+	0x9c, 0x24, 0x7b, 0xbf, 0xd9, 0xbb, 0x4e, 0x87,
+	0x8f, 0x32, 0x30, 0xf1, 0x11, 0x29, 0x54, 0x94,
+	0x00, 0x95, 0x1d, 0x1d, 0x24, 0xc0, 0xd4, 0x34,
+	0x49, 0x1d, 0xd5, 0xe3, 0xa6, 0xde, 0x8b, 0xbf,
+	0x5a, 0x9f, 0x58, 0x5a, 0x9b, 0x70, 0xe5, 0x9b,
+	0xb3, 0xdb, 0xe8, 0xb8, 0xca, 0x1b, 0x43, 0xe3,
+	0xc6, 0x6f, 0x0a, 0xd6, 0x32, 0x11, 0xd4, 0x04,
+	0xef, 0xa3, 0xe4, 0x3f, 0x12, 0xd8, 0xc1, 0x73,
+	0x51, 0x87, 0x03, 0xbd, 0xba, 0x60, 0x79, 0xee,
+	0x08, 0xcc, 0xf7, 0xc0, 0xaa, 0x4c, 0x33, 0xc4,
+	0xc7, 0x09, 0xf5, 0x91, 0xcb, 0x74, 0x57, 0x08,
+	0x1b, 0x90, 0xa9, 0x1b, 0x60, 0x02, 0xd2, 0x3f,
+	0x7a, 0xbb, 0xfd, 0x78, 0xf0, 0x15, 0xf9, 0x29,
+	0x82, 0x8f, 0xc4, 0xb2, 0x88, 0x1f, 0xbc, 0xcc,
+	0x53, 0x27, 0x8b, 0x07, 0x5f, 0xfc, 0x91, 0x29,
+	0x82, 0x80, 0x59, 0x0a, 0x3c, 0xea, 0xc4, 0x7e,
+	0xad, 0xd2, 0x70, 0x46, 0xbd, 0x9e, 0x3b, 0x1c,
+	0x8a, 0x62, 0xea, 0x69, 0xbd, 0xf6, 0x96, 0x15,
+	0xb5, 0x57, 0xe8, 0x63, 0x5f, 0x65, 0x46, 0x84,
+	0x58, 0x50, 0x87, 0x4b, 0x0e, 0x5b, 0x52, 0x90,
+	0xb0, 0xae, 0x37, 0x0f, 0xdd, 0x7e, 0xa2, 0xa0,
+	0x8b, 0x78, 0xc8, 0x5a, 0x1f, 0x53, 0xdb, 0xc5,
+	0xbf, 0x73, 0x20, 0xa9, 0x44, 0xfb, 0x1e, 0xc7,
+	0x97, 0xb2, 0x3a, 0x5a, 0x17, 0xe6, 0x8b, 0x9b,
+	0xe8, 0xf8, 0x2a, 0x01, 0x27, 0xa3, 0x71, 0x28,
+	0xe3, 0x19, 0xc6, 0xaf, 0xf5, 0x3a, 0x26, 0xc0,
+	0x5c, 0x69, 0x30, 0x78, 0x75, 0x27, 0xf2, 0x0c,
+	0x22, 0x71, 0x65, 0xc6, 0x8e, 0x7b, 0x47, 0xe3,
+	0x31, 0xaf, 0x7b, 0xc6, 0xc2, 0x55, 0x68, 0x81,
+	0xaa, 0x1b, 0x21, 0x65, 0xfb, 0x18, 0x35, 0x45,
+	0x36, 0x9a, 0x44, 0xba, 0x5c, 0xff, 0x06, 0xde,
+	0x3a, 0xc8, 0x44, 0x0b, 0xaa, 0x8e, 0x34, 0xe2,
+	0x84, 0xac, 0x18, 0xfe, 0x9b, 0xe1, 0x4f, 0xaa,
+	0xb6, 0x90, 0x0b, 0x1c, 0x2c, 0xd9, 0x9a, 0x10,
+	0x18, 0xf9, 0x49, 0x41, 0x42, 0x1b, 0xb5, 0xe1,
+	0x26, 0xac, 0x2d, 0x38, 0x00, 0x00, 0xe4, 0xb4,
+	0x50, 0x6f, 0x14, 0x18, 0xd6, 0x3d, 0x00, 0x59,
+	0x3c, 0x45, 0xf3, 0x42, 0x13, 0x44, 0xb8, 0x57,
+	0xd4, 0x43, 0x5c, 0x8a, 0x2a, 0xb4, 0xfc, 0x0a,
+	0x25, 0x5a, 0xdc, 0x8f, 0x11, 0x0b, 0x11, 0x44,
+	0xc7, 0x0e, 0x54, 0x8b, 0x22, 0x01, 0x7e, 0x67,
+	0x2e, 0x15, 0x3a, 0xb9, 0xee, 0x84, 0x10, 0xd4,
+	0x80, 0x57, 0xd7, 0x75, 0xcf, 0x8b, 0xcb, 0x03,
+	0xc9, 0x92, 0x2b, 0x69, 0xd8, 0x5a, 0x9b, 0x06,
+	0x85, 0x47, 0xaa, 0x4c, 0x28, 0xde, 0x49, 0x58,
+	0xe6, 0x11, 0x1e, 0x5e, 0x64, 0x8e, 0x3b, 0xe0,
+	0x40, 0x2e, 0xac, 0x96, 0x97, 0x15, 0x37, 0x1e,
+	0x30, 0xdd
+};
+static const u8 key73[] __initconst = {
+	0x96, 0x06, 0x1e, 0xc1, 0x6d, 0xba, 0x49, 0x5b,
+	0x65, 0x80, 0x79, 0xdd, 0xf3, 0x67, 0xa8, 0x6e,
+	0x2d, 0x9c, 0x54, 0x46, 0xd8, 0x4a, 0xeb, 0x7e,
+	0x23, 0x86, 0x51, 0xd8, 0x49, 0x49, 0x56, 0xe0
+};
+enum { nonce73 = 0xbefb83cb67e11ffdULL };
+
+static const u8 input74[] __initconst = {
+	0x47, 0x22, 0x70, 0xe5, 0x2f, 0x41, 0x18, 0x45,
+	0x07, 0xd3, 0x6d, 0x32, 0x0d, 0x43, 0x92, 0x2b,
+	0x9b, 0x65, 0x73, 0x13, 0x1a, 0x4f, 0x49, 0x8f,
+	0xff, 0xf8, 0xcc, 0xae, 0x15, 0xab, 0x9d, 0x7d,
+	0xee, 0x22, 0x5d, 0x8b, 0xde, 0x81, 0x5b, 0x81,
+	0x83, 0x49, 0x35, 0x9b, 0xb4, 0xbc, 0x4e, 0x01,
+	0xc2, 0x29, 0xa7, 0xf1, 0xca, 0x3a, 0xce, 0x3f,
+	0xf5, 0x31, 0x93, 0xa8, 0xe2, 0xc9, 0x7d, 0x03,
+	0x26, 0xa4, 0xbc, 0xa8, 0x9c, 0xb9, 0x68, 0xf3,
+	0xb3, 0x91, 0xe8, 0xe6, 0xc7, 0x2b, 0x1a, 0xce,
+	0xd2, 0x41, 0x53, 0xbd, 0xa3, 0x2c, 0x54, 0x94,
+	0x21, 0xa1, 0x40, 0xae, 0xc9, 0x0c, 0x11, 0x92,
+	0xfd, 0x91, 0xa9, 0x40, 0xca, 0xde, 0x21, 0x4e,
+	0x1e, 0x3d, 0xcc, 0x2c, 0x87, 0x11, 0xef, 0x46,
+	0xed, 0x52, 0x03, 0x11, 0x19, 0x43, 0x25, 0xc7,
+	0x0d, 0xc3, 0x37, 0x5f, 0xd3, 0x6f, 0x0c, 0x6a,
+	0x45, 0x30, 0x88, 0xec, 0xf0, 0x21, 0xef, 0x1d,
+	0x7b, 0x38, 0x63, 0x4b, 0x49, 0x0c, 0x72, 0xf6,
+	0x4c, 0x40, 0xc3, 0xcc, 0x03, 0xa7, 0xae, 0xa8,
+	0x8c, 0x37, 0x03, 0x1c, 0x11, 0xae, 0x0d, 0x1b,
+	0x62, 0x97, 0x27, 0xfc, 0x56, 0x4b, 0xb7, 0xfd,
+	0xbc, 0xfb, 0x0e, 0xfc, 0x61, 0xad, 0xc6, 0xb5,
+	0x9c, 0x8c, 0xc6, 0x38, 0x27, 0x91, 0x29, 0x3d,
+	0x29, 0xc8, 0x37, 0xc9, 0x96, 0x69, 0xe3, 0xdc,
+	0x3e, 0x61, 0x35, 0x9b, 0x99, 0x4f, 0xb9, 0x4e,
+	0x5a, 0x29, 0x1c, 0x2e, 0xcf, 0x16, 0xcb, 0x69,
+	0x87, 0xe4, 0x1a, 0xc4, 0x6e, 0x78, 0x43, 0x00,
+	0x03, 0xb2, 0x8b, 0x03, 0xd0, 0xb4, 0xf1, 0xd2,
+	0x7d, 0x2d, 0x7e, 0xfc, 0x19, 0x66, 0x5b, 0xa3,
+	0x60, 0x3f, 0x9d, 0xbd, 0xfa, 0x3e, 0xca, 0x7b,
+	0x26, 0x08, 0x19, 0x16, 0x93, 0x5d, 0x83, 0xfd,
+	0xf9, 0x21, 0xc6, 0x31, 0x34, 0x6f, 0x0c, 0xaa,
+	0x28, 0xf9, 0x18, 0xa2, 0xc4, 0x78, 0x3b, 0x56,
+	0xc0, 0x88, 0x16, 0xba, 0x22, 0x2c, 0x07, 0x2f,
+	0x70, 0xd0, 0xb0, 0x46, 0x35, 0xc7, 0x14, 0xdc,
+	0xbb, 0x56, 0x23, 0x1e, 0x36, 0x36, 0x2d, 0x73,
+	0x78, 0xc7, 0xce, 0xf3, 0x58, 0xf7, 0x58, 0xb5,
+	0x51, 0xff, 0x33, 0x86, 0x0e, 0x3b, 0x39, 0xfb,
+	0x1a, 0xfd, 0xf8, 0x8b, 0x09, 0x33, 0x1b, 0x83,
+	0xf2, 0xe6, 0x38, 0x37, 0xef, 0x47, 0x84, 0xd9,
+	0x82, 0x77, 0x2b, 0x82, 0xcc, 0xf9, 0xee, 0x94,
+	0x71, 0x78, 0x81, 0xc8, 0x4d, 0x91, 0xd7, 0x35,
+	0x29, 0x31, 0x30, 0x5c, 0x4a, 0x23, 0x23, 0xb1,
+	0x38, 0x6b, 0xac, 0x22, 0x3f, 0x80, 0xc7, 0xe0,
+	0x7d, 0xfa, 0x76, 0x47, 0xd4, 0x6f, 0x93, 0xa0,
+	0xa0, 0x93, 0x5d, 0x68, 0xf7, 0x43, 0x25, 0x8f,
+	0x1b, 0xc7, 0x87, 0xea, 0x59, 0x0c, 0xa2, 0xfa,
+	0xdb, 0x2f, 0x72, 0x43, 0xcf, 0x90, 0xf1, 0xd6,
+	0x58, 0xf3, 0x17, 0x6a, 0xdf, 0xb3, 0x4e, 0x0e,
+	0x38, 0x24, 0x48, 0x1f, 0xb7, 0x01, 0xec, 0x81,
+	0xb1, 0x87, 0x5b, 0xec, 0x9c, 0x11, 0x1a, 0xff,
+	0xa5, 0xca, 0x5a, 0x63, 0x31, 0xb2, 0xe4, 0xc6,
+	0x3c, 0x1d, 0xaf, 0x27, 0xb2, 0xd4, 0x19, 0xa2,
+	0xcc, 0x04, 0x92, 0x42, 0xd2, 0xc1, 0x8c, 0x3b,
+	0xce, 0xf5, 0x74, 0xc1, 0x81, 0xf8, 0x20, 0x23,
+	0x6f, 0x20, 0x6d, 0x78, 0x36, 0x72, 0x2c, 0x52,
+	0xdf, 0x5e, 0xe8, 0x75, 0xce, 0x1c, 0x49, 0x9d,
+	0x93, 0x6f, 0x65, 0xeb, 0xb1, 0xbd, 0x8e, 0x5e,
+	0xe5, 0x89, 0xc4, 0x8a, 0x81, 0x3d, 0x9a, 0xa7,
+	0x11, 0x82, 0x8e, 0x38, 0x5b, 0x5b, 0xca, 0x7d,
+	0x4b, 0x72, 0xc2, 0x9c, 0x30, 0x5e, 0x7f, 0xc0,
+	0x6f, 0x91, 0xd5, 0x67, 0x8c, 0x3e, 0xae, 0xda,
+	0x2b, 0x3c, 0x53, 0xcc, 0x50, 0x97, 0x36, 0x0b,
+	0x79, 0xd6, 0x73, 0x6e, 0x7d, 0x42, 0x56, 0xe1,
+	0xaa, 0xfc, 0xb3, 0xa7, 0xc8, 0x01, 0xaa, 0xc1,
+	0xfc, 0x5c, 0x72, 0x8e, 0x63, 0xa8, 0x46, 0x18,
+	0xee, 0x11, 0xe7, 0x30, 0x09, 0x83, 0x6c, 0xd9,
+	0xf4, 0x7a, 0x7b, 0xb5, 0x1f, 0x6d, 0xc7, 0xbc,
+	0xcb, 0x55, 0xea, 0x40, 0x58, 0x7a, 0x00, 0x00,
+	0x90, 0x60, 0xc5, 0x64, 0x69, 0x05, 0x99, 0xd2,
+	0x49, 0x62, 0x4f, 0xcb, 0x97, 0xdf, 0xdd, 0x6b,
+	0x60, 0x75, 0xe2, 0xe0, 0x6f, 0x76, 0xd0, 0x37,
+	0x67, 0x0a, 0xcf, 0xff, 0xc8, 0x61, 0x84, 0x14,
+	0x80, 0x7c, 0x1d, 0x31, 0x8d, 0x90, 0xde, 0x0b,
+	0x1c, 0x74, 0x9f, 0x82, 0x96, 0x80, 0xda, 0xaf,
+	0x8d, 0x99, 0x86, 0x9f, 0x24, 0x99, 0x28, 0x3e,
+	0xe0, 0xa3, 0xc3, 0x90, 0x2d, 0x14, 0x65, 0x1e,
+	0x3b, 0xb9, 0xba, 0x13, 0xa5, 0x77, 0x73, 0x63,
+	0x9a, 0x06, 0x3d, 0xa9, 0x28, 0x9b, 0xba, 0x25,
+	0x61, 0xc9, 0xcd, 0xcf, 0x7a, 0x4d, 0x96, 0x09,
+	0xcb, 0xca, 0x03, 0x9c, 0x54, 0x34, 0x31, 0x85,
+	0xa0, 0x3d, 0xe5, 0xbc, 0xa5, 0x5f, 0x1b, 0xd3,
+	0x10, 0x63, 0x74, 0x9d, 0x01, 0x92, 0x88, 0xf0,
+	0x27, 0x9c, 0x28, 0xd9, 0xfd, 0xe2, 0x4e, 0x01,
+	0x8d, 0x61, 0x79, 0x60, 0x61, 0x5b, 0x76, 0xab,
+	0x06, 0xd3, 0x44, 0x87, 0x43, 0x52, 0xcd, 0x06,
+	0x68, 0x1e, 0x2d, 0xc5, 0xb0, 0x07, 0x25, 0xdf,
+	0x0a, 0x50, 0xd7, 0xd9, 0x08, 0x53, 0x65, 0xf1,
+	0x0c, 0x2c, 0xde, 0x3f, 0x9d, 0x03, 0x1f, 0xe1,
+	0x49, 0x43, 0x3c, 0x83, 0x81, 0x37, 0xf8, 0xa2,
+	0x0b, 0xf9, 0x61, 0x1c, 0xc1, 0xdb, 0x79, 0xbc,
+	0x64, 0xce, 0x06, 0x4e, 0x87, 0x89, 0x62, 0x73,
+	0x51, 0xbc, 0xa4, 0x32, 0xd4, 0x18, 0x62, 0xab,
+	0x65, 0x7e, 0xad, 0x1e, 0x91, 0xa3, 0xfa, 0x2d,
+	0x58, 0x9e, 0x2a, 0xe9, 0x74, 0x44, 0x64, 0x11,
+	0xe6, 0xb6, 0xb3, 0x00, 0x7e, 0xa3, 0x16, 0xef,
+	0x72
+};
+static const u8 output74[] __initconst = {
+	0xf5, 0xca, 0x45, 0x65, 0x50, 0x35, 0x47, 0x67,
+	0x6f, 0x4f, 0x67, 0xff, 0x34, 0xd9, 0xc3, 0x37,
+	0x2a, 0x26, 0xb0, 0x4f, 0x08, 0x1e, 0x45, 0x13,
+	0xc7, 0x2c, 0x14, 0x75, 0x33, 0xd8, 0x8e, 0x1e,
+	0x1b, 0x11, 0x0d, 0x97, 0x04, 0x33, 0x8a, 0xe4,
+	0xd8, 0x8d, 0x0e, 0x12, 0x8d, 0xdb, 0x6e, 0x02,
+	0xfa, 0xe5, 0xbd, 0x3a, 0xb5, 0x28, 0x07, 0x7d,
+	0x20, 0xf0, 0x12, 0x64, 0x83, 0x2f, 0x59, 0x79,
+	0x17, 0x88, 0x3c, 0x2d, 0x08, 0x2f, 0x55, 0xda,
+	0xcc, 0x02, 0x3a, 0x82, 0xcd, 0x03, 0x94, 0xdf,
+	0xdf, 0xab, 0x8a, 0x13, 0xf5, 0xe6, 0x74, 0xdf,
+	0x7b, 0xe2, 0xab, 0x34, 0xbc, 0x00, 0x85, 0xbf,
+	0x5a, 0x48, 0xc8, 0xff, 0x8d, 0x6c, 0x27, 0x48,
+	0x19, 0x2d, 0x08, 0xfa, 0x82, 0x62, 0x39, 0x55,
+	0x32, 0x11, 0xa8, 0xd7, 0xb9, 0x08, 0x2c, 0xd6,
+	0x7a, 0xd9, 0x83, 0x9f, 0x9b, 0xfb, 0xec, 0x3a,
+	0xd1, 0x08, 0xc7, 0xad, 0xdc, 0x98, 0x4c, 0xbc,
+	0x98, 0xeb, 0x36, 0xb0, 0x39, 0xf4, 0x3a, 0xd6,
+	0x53, 0x02, 0xa0, 0xa9, 0x73, 0xa1, 0xca, 0xef,
+	0xd8, 0xd2, 0xec, 0x0e, 0xf8, 0xf5, 0xac, 0x8d,
+	0x34, 0x41, 0x06, 0xa8, 0xc6, 0xc3, 0x31, 0xbc,
+	0xe5, 0xcc, 0x7e, 0x72, 0x63, 0x59, 0x3e, 0x63,
+	0xc2, 0x8d, 0x2b, 0xd5, 0xb9, 0xfd, 0x1e, 0x31,
+	0x69, 0x32, 0x05, 0xd6, 0xde, 0xc9, 0xe6, 0x4c,
+	0xac, 0x68, 0xf7, 0x1f, 0x9d, 0xcd, 0x0e, 0xa2,
+	0x15, 0x3d, 0xd6, 0x47, 0x99, 0xab, 0x08, 0x5f,
+	0x28, 0xc3, 0x4c, 0xc2, 0xd5, 0xdd, 0x10, 0xb7,
+	0xbd, 0xdb, 0x9b, 0xcf, 0x85, 0x27, 0x29, 0x76,
+	0x98, 0xeb, 0xad, 0x31, 0x64, 0xe7, 0xfb, 0x61,
+	0xe0, 0xd8, 0x1a, 0xa6, 0xe2, 0xe7, 0x43, 0x42,
+	0x77, 0xc9, 0x82, 0x00, 0xac, 0x85, 0xe0, 0xa2,
+	0xd4, 0x62, 0xe3, 0xb7, 0x17, 0x6e, 0xb2, 0x9e,
+	0x21, 0x58, 0x73, 0xa9, 0x53, 0x2d, 0x3c, 0xe1,
+	0xdd, 0xd6, 0x6e, 0x92, 0xf2, 0x1d, 0xc2, 0x22,
+	0x5f, 0x9a, 0x7e, 0xd0, 0x52, 0xbf, 0x54, 0x19,
+	0xd7, 0x80, 0x63, 0x3e, 0xd0, 0x08, 0x2d, 0x37,
+	0x0c, 0x15, 0xf7, 0xde, 0xab, 0x2b, 0xe3, 0x16,
+	0x21, 0x3a, 0xee, 0xa5, 0xdc, 0xdf, 0xde, 0xa3,
+	0x69, 0xcb, 0xfd, 0x92, 0x89, 0x75, 0xcf, 0xc9,
+	0x8a, 0xa4, 0xc8, 0xdd, 0xcc, 0x21, 0xe6, 0xfe,
+	0x9e, 0x43, 0x76, 0xb2, 0x45, 0x22, 0xb9, 0xb5,
+	0xac, 0x7e, 0x3d, 0x26, 0xb0, 0x53, 0xc8, 0xab,
+	0xfd, 0xea, 0x2c, 0xd1, 0x44, 0xc5, 0x60, 0x1b,
+	0x8a, 0x99, 0x0d, 0xa5, 0x0e, 0x67, 0x6e, 0x3a,
+	0x96, 0x55, 0xec, 0xe8, 0xcc, 0xbe, 0x49, 0xd9,
+	0xf2, 0x72, 0x9f, 0x30, 0x21, 0x97, 0x57, 0x19,
+	0xbe, 0x5e, 0x33, 0x0c, 0xee, 0xc0, 0x72, 0x0d,
+	0x2e, 0xd1, 0xe1, 0x52, 0xc2, 0xea, 0x41, 0xbb,
+	0xe1, 0x6d, 0xd4, 0x17, 0xa9, 0x8d, 0x89, 0xa9,
+	0xd6, 0x4b, 0xc6, 0x4c, 0xf2, 0x88, 0x97, 0x54,
+	0x3f, 0x4f, 0x57, 0xb7, 0x37, 0xf0, 0x2c, 0x11,
+	0x15, 0x56, 0xdb, 0x28, 0xb5, 0x16, 0x84, 0x66,
+	0xce, 0x45, 0x3f, 0x61, 0x75, 0xb6, 0xbe, 0x00,
+	0xd1, 0xe4, 0xf5, 0x27, 0x54, 0x7f, 0xc2, 0xf1,
+	0xb3, 0x32, 0x9a, 0xe8, 0x07, 0x02, 0xf3, 0xdb,
+	0xa9, 0xd1, 0xc2, 0xdf, 0xee, 0xad, 0xe5, 0x8a,
+	0x3c, 0xfa, 0x67, 0xec, 0x6b, 0xa4, 0x08, 0xfe,
+	0xba, 0x5a, 0x58, 0x0b, 0x78, 0x11, 0x91, 0x76,
+	0xe3, 0x1a, 0x28, 0x54, 0x5e, 0xbd, 0x71, 0x1b,
+	0x8b, 0xdc, 0x6c, 0xf4, 0x6f, 0xd7, 0xf4, 0xf3,
+	0xe1, 0x03, 0xa4, 0x3c, 0x8d, 0x91, 0x2e, 0xba,
+	0x5f, 0x7f, 0x8c, 0xaf, 0x69, 0x89, 0x29, 0x0a,
+	0x5b, 0x25, 0x13, 0xc4, 0x2e, 0x16, 0xc2, 0x15,
+	0x07, 0x5d, 0x58, 0x33, 0x7c, 0xe0, 0xf0, 0x55,
+	0x5f, 0xbf, 0x5e, 0xf0, 0x71, 0x48, 0x8f, 0xf7,
+	0x48, 0xb3, 0xf7, 0x0d, 0xa1, 0xd0, 0x63, 0xb1,
+	0xad, 0xae, 0xb5, 0xb0, 0x5f, 0x71, 0xaf, 0x24,
+	0x8b, 0xb9, 0x1c, 0x44, 0xd2, 0x1a, 0x53, 0xd1,
+	0xd5, 0xb4, 0xa9, 0xff, 0x88, 0x73, 0xb5, 0xaa,
+	0x15, 0x32, 0x5f, 0x59, 0x9d, 0x2e, 0xb5, 0xcb,
+	0xde, 0x21, 0x2e, 0xe9, 0x35, 0xed, 0xfd, 0x0f,
+	0xb6, 0xbb, 0xe6, 0x4b, 0x16, 0xf1, 0x45, 0x1e,
+	0xb4, 0x84, 0xe9, 0x58, 0x1c, 0x0c, 0x95, 0xc0,
+	0xcf, 0x49, 0x8b, 0x59, 0xa1, 0x78, 0xe6, 0x80,
+	0x12, 0x49, 0x7a, 0xd4, 0x66, 0x62, 0xdf, 0x9c,
+	0x18, 0xc8, 0x8c, 0xda, 0xc1, 0xa6, 0xbc, 0x65,
+	0x28, 0xd2, 0xa4, 0xe8, 0xf1, 0x35, 0xdb, 0x5a,
+	0x75, 0x1f, 0x73, 0x60, 0xec, 0xa8, 0xda, 0x5a,
+	0x43, 0x15, 0x83, 0x9b, 0xe7, 0xb1, 0xa6, 0x81,
+	0xbb, 0xef, 0xf3, 0x8f, 0x0f, 0xd3, 0x79, 0xa2,
+	0xe5, 0xaa, 0x42, 0xef, 0xa0, 0x13, 0x4e, 0x91,
+	0x2d, 0xcb, 0x61, 0x7a, 0x9a, 0x33, 0x14, 0x50,
+	0x77, 0x4a, 0xd0, 0x91, 0x48, 0xe0, 0x0c, 0xe0,
+	0x11, 0xcb, 0xdf, 0xb0, 0xce, 0x06, 0xd2, 0x79,
+	0x4d, 0x69, 0xb9, 0xc9, 0x36, 0x74, 0x8f, 0x81,
+	0x72, 0x73, 0xf3, 0x17, 0xb7, 0x13, 0xcb, 0x5b,
+	0xd2, 0x5c, 0x33, 0x61, 0xb7, 0x61, 0x79, 0xb0,
+	0xc0, 0x4d, 0xa1, 0xc7, 0x5d, 0x98, 0xc9, 0xe1,
+	0x98, 0xbd, 0x78, 0x5a, 0x2c, 0x64, 0x53, 0xaf,
+	0xaf, 0x66, 0x51, 0x47, 0xe4, 0x48, 0x66, 0x8b,
+	0x07, 0x52, 0xa3, 0x03, 0x93, 0x28, 0xad, 0xcc,
+	0xa3, 0x86, 0xad, 0x63, 0x04, 0x35, 0x6c, 0x49,
+	0xd5, 0x28, 0x0e, 0x00, 0x47, 0xf4, 0xd4, 0x32,
+	0x27, 0x19, 0xb3, 0x29, 0xe7, 0xbc, 0xbb, 0xce,
+	0x3e, 0x3e, 0xd5, 0x67, 0x20, 0xe4, 0x0b, 0x75,
+	0x95, 0x24, 0xe0, 0x6c, 0xb6, 0x29, 0x0c, 0x14,
+	0xfd
+};
+static const u8 key74[] __initconst = {
+	0xf0, 0x41, 0x5b, 0x00, 0x56, 0xc4, 0xac, 0xf6,
+	0xa2, 0x4c, 0x33, 0x41, 0x16, 0x09, 0x1b, 0x8e,
+	0x4d, 0xe8, 0x8c, 0xd9, 0x48, 0xab, 0x3e, 0x60,
+	0xcb, 0x49, 0x3e, 0xaf, 0x2b, 0x8b, 0xc8, 0xf0
+};
+enum { nonce74 = 0xcbdb0ffd0e923384ULL };
+
+static const struct chacha20_testvec chacha20_testvecs[] __initconst = {
+	{ input01, output01, key01, nonce01, sizeof(input01) },
+	{ input02, output02, key02, nonce02, sizeof(input02) },
+	{ input03, output03, key03, nonce03, sizeof(input03) },
+	{ input04, output04, key04, nonce04, sizeof(input04) },
+	{ input05, output05, key05, nonce05, sizeof(input05) },
+	{ input06, output06, key06, nonce06, sizeof(input06) },
+	{ input07, output07, key07, nonce07, sizeof(input07) },
+	{ input08, output08, key08, nonce08, sizeof(input08) },
+	{ input09, output09, key09, nonce09, sizeof(input09) },
+	{ input10, output10, key10, nonce10, sizeof(input10) },
+	{ input11, output11, key11, nonce11, sizeof(input11) },
+	{ input12, output12, key12, nonce12, sizeof(input12) },
+	{ input13, output13, key13, nonce13, sizeof(input13) },
+	{ input14, output14, key14, nonce14, sizeof(input14) },
+	{ input15, output15, key15, nonce15, sizeof(input15) },
+	{ input16, output16, key16, nonce16, sizeof(input16) },
+	{ input17, output17, key17, nonce17, sizeof(input17) },
+	{ input18, output18, key18, nonce18, sizeof(input18) },
+	{ input19, output19, key19, nonce19, sizeof(input19) },
+	{ input20, output20, key20, nonce20, sizeof(input20) },
+	{ input21, output21, key21, nonce21, sizeof(input21) },
+	{ input22, output22, key22, nonce22, sizeof(input22) },
+	{ input23, output23, key23, nonce23, sizeof(input23) },
+	{ input24, output24, key24, nonce24, sizeof(input24) },
+	{ input25, output25, key25, nonce25, sizeof(input25) },
+	{ input26, output26, key26, nonce26, sizeof(input26) },
+	{ input27, output27, key27, nonce27, sizeof(input27) },
+	{ input28, output28, key28, nonce28, sizeof(input28) },
+	{ input29, output29, key29, nonce29, sizeof(input29) },
+	{ input30, output30, key30, nonce30, sizeof(input30) },
+	{ input31, output31, key31, nonce31, sizeof(input31) },
+	{ input32, output32, key32, nonce32, sizeof(input32) },
+	{ input33, output33, key33, nonce33, sizeof(input33) },
+	{ input34, output34, key34, nonce34, sizeof(input34) },
+	{ input35, output35, key35, nonce35, sizeof(input35) },
+	{ input36, output36, key36, nonce36, sizeof(input36) },
+	{ input37, output37, key37, nonce37, sizeof(input37) },
+	{ input38, output38, key38, nonce38, sizeof(input38) },
+	{ input39, output39, key39, nonce39, sizeof(input39) },
+	{ input40, output40, key40, nonce40, sizeof(input40) },
+	{ input41, output41, key41, nonce41, sizeof(input41) },
+	{ input42, output42, key42, nonce42, sizeof(input42) },
+	{ input43, output43, key43, nonce43, sizeof(input43) },
+	{ input44, output44, key44, nonce44, sizeof(input44) },
+	{ input45, output45, key45, nonce45, sizeof(input45) },
+	{ input46, output46, key46, nonce46, sizeof(input46) },
+	{ input47, output47, key47, nonce47, sizeof(input47) },
+	{ input48, output48, key48, nonce48, sizeof(input48) },
+	{ input49, output49, key49, nonce49, sizeof(input49) },
+	{ input50, output50, key50, nonce50, sizeof(input50) },
+	{ input51, output51, key51, nonce51, sizeof(input51) },
+	{ input52, output52, key52, nonce52, sizeof(input52) },
+	{ input53, output53, key53, nonce53, sizeof(input53) },
+	{ input54, output54, key54, nonce54, sizeof(input54) },
+	{ input55, output55, key55, nonce55, sizeof(input55) },
+	{ input56, output56, key56, nonce56, sizeof(input56) },
+	{ input57, output57, key57, nonce57, sizeof(input57) },
+	{ input58, output58, key58, nonce58, sizeof(input58) },
+	{ input59, output59, key59, nonce59, sizeof(input59) },
+	{ input60, output60, key60, nonce60, sizeof(input60) },
+	{ input61, output61, key61, nonce61, sizeof(input61) },
+	{ input62, output62, key62, nonce62, sizeof(input62) },
+	{ input63, output63, key63, nonce63, sizeof(input63) },
+	{ input64, output64, key64, nonce64, sizeof(input64) },
+	{ input65, output65, key65, nonce65, sizeof(input65) },
+	{ input66, output66, key66, nonce66, sizeof(input66) },
+	{ input67, output67, key67, nonce67, sizeof(input67) },
+	{ input68, output68, key68, nonce68, sizeof(input68) },
+	{ input69, output69, key69, nonce69, sizeof(input69) },
+	{ input70, output70, key70, nonce70, sizeof(input70) },
+	{ input71, output71, key71, nonce71, sizeof(input71) },
+	{ input72, output72, key72, nonce72, sizeof(input72) },
+	{ input73, output73, key73, nonce73, sizeof(input73) },
+	{ input74, output74, key74, nonce74, sizeof(input74) }
+};
+
+static const struct hchacha20_testvec hchacha20_testvecs[] __initconst = {{
+	.key	= { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+		    0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+		    0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+		    0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f },
+	.nonce	= { 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x4a,
+		    0x00, 0x00, 0x00, 0x00, 0x31, 0x41, 0x59, 0x27 },
+	.output	= { 0x82, 0x41, 0x3b, 0x42, 0x27, 0xb2, 0x7b, 0xfe,
+		    0xd3, 0x0e, 0x42, 0x50, 0x8a, 0x87, 0x7d, 0x73,
+		    0xa0, 0xf9, 0xe4, 0xd5, 0x8a, 0x74, 0xa8, 0x53,
+		    0xc1, 0x2e, 0xc4, 0x13, 0x26, 0xd3, 0xec, 0xdc }
+}};
+
+static bool __init chacha20_selftest(void)
+{
+	enum {
+		MAXIMUM_TEST_BUFFER_LEN = 1UL << 10,
+		OUTRAGEOUSLY_HUGE_BUFFER_LEN = PAGE_SIZE * 35 + 17 /* 143k */
+	};
+	size_t i, j, k;
+	u32 derived_key[CHACHA20_KEY_WORDS];
+	u8 *offset_input = NULL, *computed_output = NULL, *massive_input = NULL;
+	u8 offset_key[CHACHA20_KEY_SIZE + 1]
+			__aligned(__alignof__(unsigned long));
+	struct chacha20_ctx state;
+	bool success = true;
+	simd_context_t simd_context;
+
+	offset_input = kmalloc(MAXIMUM_TEST_BUFFER_LEN + 1, GFP_KERNEL);
+	computed_output = kmalloc(MAXIMUM_TEST_BUFFER_LEN + 1, GFP_KERNEL);
+	massive_input = vzalloc(OUTRAGEOUSLY_HUGE_BUFFER_LEN);
+	if (!computed_output || !offset_input || !massive_input) {
+		pr_err("chacha20 self-test malloc: FAIL\n");
+		success = false;
+		goto out;
+	}
+
+	simd_get(&simd_context);
+	for (i = 0; i < ARRAY_SIZE(chacha20_testvecs); ++i) {
+		/* Boring case */
+		memset(computed_output, 0, MAXIMUM_TEST_BUFFER_LEN + 1);
+		memset(&state, 0, sizeof(state));
+		chacha20_init(&state, chacha20_testvecs[i].key,
+			      chacha20_testvecs[i].nonce);
+		chacha20(&state, computed_output, chacha20_testvecs[i].input,
+			 chacha20_testvecs[i].ilen, &simd_context);
+		if (memcmp(computed_output, chacha20_testvecs[i].output,
+			   chacha20_testvecs[i].ilen)) {
+			pr_err("chacha20 self-test %zu: FAIL\n", i + 1);
+			success = false;
+		}
+		for (k = chacha20_testvecs[i].ilen;
+		     k < MAXIMUM_TEST_BUFFER_LEN + 1; ++k) {
+			if (computed_output[k]) {
+				pr_err("chacha20 self-test %zu (zero check): FAIL\n",
+				       i + 1);
+				success = false;
+				break;
+			}
+		}
+
+		/* Unaligned case */
+		memset(computed_output, 0, MAXIMUM_TEST_BUFFER_LEN + 1);
+		memset(&state, 0, sizeof(state));
+		memcpy(offset_input + 1, chacha20_testvecs[i].input,
+		       chacha20_testvecs[i].ilen);
+		memcpy(offset_key + 1, chacha20_testvecs[i].key,
+		       CHACHA20_KEY_SIZE);
+		chacha20_init(&state, offset_key + 1, chacha20_testvecs[i].nonce);
+		chacha20(&state, computed_output + 1, offset_input + 1,
+			 chacha20_testvecs[i].ilen, &simd_context);
+		if (memcmp(computed_output + 1, chacha20_testvecs[i].output,
+			   chacha20_testvecs[i].ilen)) {
+			pr_err("chacha20 self-test %zu (unaligned): FAIL\n",
+			       i + 1);
+			success = false;
+		}
+		if (computed_output[0]) {
+			pr_err("chacha20 self-test %zu (unaligned, zero check): FAIL\n",
+			       i + 1);
+			success = false;
+		}
+		for (k = chacha20_testvecs[i].ilen + 1;
+		     k < MAXIMUM_TEST_BUFFER_LEN + 1; ++k) {
+			if (computed_output[k]) {
+				pr_err("chacha20 self-test %zu (unaligned, zero check): FAIL\n",
+				       i + 1);
+				success = false;
+				break;
+			}
+		}
+
+		/* Chunked case */
+		if (chacha20_testvecs[i].ilen <= CHACHA20_BLOCK_SIZE)
+			goto next_test;
+		memset(computed_output, 0, MAXIMUM_TEST_BUFFER_LEN + 1);
+		memset(&state, 0, sizeof(state));
+		chacha20_init(&state, chacha20_testvecs[i].key,
+			      chacha20_testvecs[i].nonce);
+		chacha20(&state, computed_output, chacha20_testvecs[i].input,
+			 CHACHA20_BLOCK_SIZE, &simd_context);
+		chacha20(&state, computed_output + CHACHA20_BLOCK_SIZE,
+			 chacha20_testvecs[i].input + CHACHA20_BLOCK_SIZE,
+			 chacha20_testvecs[i].ilen - CHACHA20_BLOCK_SIZE,
+			 &simd_context);
+		if (memcmp(computed_output, chacha20_testvecs[i].output,
+			   chacha20_testvecs[i].ilen)) {
+			pr_err("chacha20 self-test %zu (chunked): FAIL\n",
+			       i + 1);
+			success = false;
+		}
+		for (k = chacha20_testvecs[i].ilen;
+		     k < MAXIMUM_TEST_BUFFER_LEN + 1; ++k) {
+			if (computed_output[k]) {
+				pr_err("chacha20 self-test %zu (chunked, zero check): FAIL\n",
+				       i + 1);
+				success = false;
+				break;
+			}
+		}
+
+next_test:
+		/* Sliding unaligned case */
+		if (chacha20_testvecs[i].ilen > CHACHA20_BLOCK_SIZE + 1 ||
+		    !chacha20_testvecs[i].ilen)
+			continue;
+		for (j = 1; j < CHACHA20_BLOCK_SIZE; ++j) {
+			memset(computed_output, 0, MAXIMUM_TEST_BUFFER_LEN + 1);
+			memset(&state, 0, sizeof(state));
+			memcpy(offset_input + j, chacha20_testvecs[i].input,
+			       chacha20_testvecs[i].ilen);
+			chacha20_init(&state, chacha20_testvecs[i].key,
+				      chacha20_testvecs[i].nonce);
+			chacha20(&state, computed_output + j, offset_input + j,
+				 chacha20_testvecs[i].ilen, &simd_context);
+			if (memcmp(computed_output + j,
+				   chacha20_testvecs[i].output,
+				   chacha20_testvecs[i].ilen)) {
+				pr_err("chacha20 self-test %zu (unaligned, slide %zu): FAIL\n",
+				       i + 1, j);
+				success = false;
+			}
+			for (k = j; k < j; ++k) {
+				if (computed_output[k]) {
+					pr_err("chacha20 self-test %zu (unaligned, slide %zu, zero check): FAIL\n",
+					       i + 1, j);
+					success = false;
+					break;
+				}
+			}
+			for (k = chacha20_testvecs[i].ilen + j;
+			     k < MAXIMUM_TEST_BUFFER_LEN + 1; ++k) {
+				if (computed_output[k]) {
+					pr_err("chacha20 self-test %zu (unaligned, slide %zu, zero check): FAIL\n",
+					       i + 1, j);
+					success = false;
+					break;
+				}
+			}
+		}
+	}
+	for (i = 0; i < ARRAY_SIZE(hchacha20_testvecs); ++i) {
+		memset(&derived_key, 0, sizeof(derived_key));
+		hchacha20(derived_key, hchacha20_testvecs[i].nonce,
+			  hchacha20_testvecs[i].key, &simd_context);
+		cpu_to_le32_array(derived_key, ARRAY_SIZE(derived_key));
+		if (memcmp(derived_key, hchacha20_testvecs[i].output,
+			   CHACHA20_KEY_SIZE)) {
+			pr_err("hchacha20 self-test %zu: FAIL\n", i + 1);
+			success = false;
+		}
+	}
+	memset(&state, 0, sizeof(state));
+	chacha20_init(&state, chacha20_testvecs[0].key,
+		      chacha20_testvecs[0].nonce);
+	chacha20(&state, massive_input, massive_input,
+		 OUTRAGEOUSLY_HUGE_BUFFER_LEN, &simd_context);
+	chacha20_init(&state, chacha20_testvecs[0].key,
+		      chacha20_testvecs[0].nonce);
+	chacha20(&state, massive_input, massive_input,
+		 OUTRAGEOUSLY_HUGE_BUFFER_LEN, DONT_USE_SIMD);
+	for (k = 0; k < OUTRAGEOUSLY_HUGE_BUFFER_LEN; ++k) {
+		if (massive_input[k]) {
+			pr_err("chacha20 self-test massive: FAIL\n");
+			success = false;
+			break;
+		}
+	}
+
+	simd_put(&simd_context);
+
+out:
+	kfree(offset_input);
+	kfree(computed_output);
+	vfree(massive_input);
+	return success;
+}
diff --git a/lib/zinc/selftest/run.h b/lib/zinc/selftest/run.h
new file mode 100644
index 000000000000..74f607a8e0e2
--- /dev/null
+++ b/lib/zinc/selftest/run.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/*
+ * Copyright (C) 2015-2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#ifndef _ZINC_SELFTEST_RUN_H
+#define _ZINC_SELFTEST_RUN_H
+
+#include <linux/kernel.h>
+#include <linux/printk.h>
+#include <linux/bug.h>
+
+static inline bool selftest_run(const char *name, bool (*selftest)(void),
+				bool *const nobs[], unsigned int nobs_len)
+{
+	unsigned long set = 0, subset = 0, largest_subset = 0;
+	unsigned int i;
+
+	BUILD_BUG_ON(!__builtin_constant_p(nobs_len) ||
+		     nobs_len >= BITS_PER_LONG);
+
+	if (!IS_ENABLED(CONFIG_ZINC_SELFTEST))
+		return true;
+
+	for (i = 0; i < nobs_len; ++i)
+		set |= ((unsigned long)*nobs[i]) << i;
+
+	do {
+		for (i = 0; i < nobs_len; ++i)
+			*nobs[i] = BIT(i) & subset;
+		if (selftest())
+			largest_subset = max(subset, largest_subset);
+		else
+			pr_err("%s self-test combination 0x%lx: FAIL\n", name,
+			       subset);
+		subset = (subset - set) & set;
+	} while (subset);
+
+	for (i = 0; i < nobs_len; ++i)
+		*nobs[i] = BIT(i) & largest_subset;
+
+	if (largest_subset == set)
+		pr_info("%s self-tests: pass\n", name);
+
+	return !WARN_ON(largest_subset != set);
+}
+
+#endif
-- 
2.19.1

^ permalink raw reply related

* [PATCH net-next v8 01/28] ARM: makefile: use ARMv3M mode for RiscPC
From: Jason A. Donenfeld @ 2018-10-18 14:56 UTC (permalink / raw)
  To: linux-kernel, netdev, linux-crypto, davem, gregkh; +Cc: Jason A. Donenfeld
In-Reply-To: <20181018145712.7538-1-Jason@zx2c4.com>

The purpose of CONFIG_CPU_32v3 is to avoid ldrh/strh on the RiscPC,
which is pretty much an ARMv4 device, except its bus will choke on the
half-words. The way to make the C compiler not output ldrh/strh is with
-march=armv3, which doesn't support them in the ISA. However, this
prevents certain cryptography code from working that uses instructions
like umull. Fortunately there's also -march=armv3m that does support
those, making it possible to continue assembling optimized cryptography
routines for our beloved RiscPC.

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---

Notes:
    This commit has been submitted to the proper ARM tree and is working its
    way upstream. It's included in this series here so that kbuild 0-day bot
    doesn't get too nervous about RiscPC, but is already entering the tree
    through arm-next.

 arch/arm/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index d1516f85f25d..7fd4bcaf0721 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -74,7 +74,7 @@ endif
 arch-$(CONFIG_CPU_32v5)		=-D__LINUX_ARM_ARCH__=5 $(call cc-option,-march=armv5te,-march=armv4t)
 arch-$(CONFIG_CPU_32v4T)	=-D__LINUX_ARM_ARCH__=4 -march=armv4t
 arch-$(CONFIG_CPU_32v4)		=-D__LINUX_ARM_ARCH__=4 -march=armv4
-arch-$(CONFIG_CPU_32v3)		=-D__LINUX_ARM_ARCH__=3 -march=armv3
+arch-$(CONFIG_CPU_32v3)		=-D__LINUX_ARM_ARCH__=3 -march=armv3m
 
 # Evaluate arch cc-option calls now
 arch-y := $(arch-y)
-- 
2.19.1

^ permalink raw reply related

* [PATCH net-next v8 00/28] WireGuard: Secure Network Tunnel
From: Jason A. Donenfeld @ 2018-10-18 14:56 UTC (permalink / raw)
  To: linux-kernel, netdev, linux-crypto, davem, gregkh; +Cc: Jason A. Donenfeld

Changes v7->v8, along with who suggested it.
--------------------------------------------
- Implementations that fail the selftests are now disabled, after a warning
  is printed. This way users don't make wrong calculations, even in the face
  of a rather grave bug.
- [Sultan Alsawaf] When assigning to a boolean, prefer "BIT(i) & a" to "(a >>
  i) & 1".
- [Andrew Lunn] Avoid control statements inside macros.
- [Jiri Pirko] Prefix functions used in callbacks with wg_.
- [Jiri Pirko] Rename struct wireguard_peer and struct wireguard_device to
  struct wg_peer and struct wg_device.
- [Eugene Syromiatnikov] Do not use nla type field as an index, and actually
  don't use an index at all, because it has no meaning or relevance at all.
- [Joe Perches] Do not place a space between for_each iterators and
  parentheses.
- Enumerable style cleanups and nits.
- [Arnd Bergmann] Swap endianness in allowedips early on in case optimizer is
  able to look a bit further in but not too far, resulting in a warning from
  -Wmaybe-uninitialized.
- [Jiri Pirko] Use textual error labels instead of numerical ones.
- [Jiri Pirko] Better module description string.
- [Eric Biggers] In poly1305 port to crypto API, account for short inputs in
  final function, in which case -ENOKEY should be returned.

-----------------------------------------------------------

This patchset is available on git.kernel.org in this branch, where it may be
pulled directly for inclusion into net-next:

  * https://git.kernel.org/pub/scm/linux/kernel/git/zx2c4/linux.git/log/?h=jd/wireguard

-----------------------------------------------------------

WireGuard is a secure network tunnel written especially for Linux, which
has faced around three years of serious development, deployment, and
scrutiny. It delivers excellent performance and is extremely easy to
use and configure. It has been designed with the primary goal of being
both easy to audit by virtue of being small and highly secure from a
cryptography and systems security perspective. WireGuard is used by some
massive companies pushing enormous amounts of traffic, and likely
already today you've consumed bytes that at some point transited through
a WireGuard tunnel. Even as an out-of-tree module, WireGuard has been
integrated into various userspace tools, Linux distributions, mobile
phones, and data centers. There are ports in several languages to
several operating systems, and even commercial hardware and services
sold integrating WireGuard. It is time, therefore, for WireGuard to be
properly integrated into Linux.

Ample information, including documentation, installation instructions,
and project details, is available at:

  * https://www.wireguard.com/
  * https://www.wireguard.com/papers/wireguard.pdf

As it is currently an out-of-tree module, it lives in its own git repo
and has its own mailing list, and every commit for the module is tested
against every stable kernel since 3.10 on a variety of architectures
using an extensive test suite:

  * https://git.zx2c4.com/WireGuard
    https://git.kernel.org/pub/scm/linux/kernel/git/zx2c4/WireGuard.git/
  * https://lists.zx2c4.com/mailman/listinfo/wireguard
  * https://www.wireguard.com/build-status/

The project has been broadly discussed at conferences, and was presented
to the Netdev developers in Seoul last November, where a paper was
released detailing some interesting aspects of the project. Dave asked
me after the talk if I would consider sending in a v1 "sooner rather
than later", hence this patchset. Zinc was presented at Kernel Recipes
in September, and a video is available online. Both Zinc and WireGuard
will be presented at the conference in Vancouver in November.

  * https://www.wireguard.com/presentations/
  * https://www.wireguard.com/papers/wireguard-netdev22.pdf
  * Zinc talk: https://www.youtube.com/watch?v=bFhdln8aJ_U
  * Netdev talk: https://www.youtube.com/watch?v=54orFwtQ1XY

The cryptography in the protocol itself has been formally verified by
several independent academic teams with positive results, and I know of
two additional efforts on their way to further corroborate those
findings. The version 1 protocol is "complete", and so the purpose of
this review is to assess the implementation of the protocol. However, it
still may be of interest to know that the thing you're reviewing uses a
protocol with various nice security properties:

  * https://www.wireguard.com/formal-verification/

This patchset is divided into four segments. The first introduces a very
simple helper for working with the FPU state for the purposes of amortizing
SIMD operations. The second segment is a small collection of cryptographic
primitives, split up into several commits by primitive and by hardware. The
third shows usage of Zinc within the existing crypto API and as a replacement
to the existing crypto API. The last is WireGuard itself, presented as an
unintrusive and self-contained virtual network driver.

It is intended that this entire patch series enter the kernel through
DaveM's net-next tree. Subsequently, WireGuard patches will go through
DaveM's net-next tree, while Zinc patches will go through Greg KH's tree in
cases when an entire development cycle has no relationships with existing code
in crypto/; however, if there are any relationships with code in crypto/, then
pull requests will be sent to Herbert instead in case there are merge
conflicts.

Enjoy,
Jason

^ permalink raw reply

* Re: [PATCH v3 bpf-next 2/2] bpf: add tests for direct packet access from CGROUP_SKB
From: Song Liu @ 2018-10-18  6:53 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: netdev@vger.kernel.org, ast@kernel.org, daniel@iogearbox.net,
	Kernel Team
In-Reply-To: <20181018062557.zyeoiker7jigcv6q@ast-mbp.dhcp.thefacebook.com>



> On Oct 17, 2018, at 11:25 PM, Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote:
> 
> On Wed, Oct 17, 2018 at 10:39:49PM -0700, Song Liu wrote:
>> Tests are added to make sure CGROUP_SKB cannot access:
>>  tc_classid, data_meta, flow_keys
>> 
>> and can read and write:
>>  mark, prority, and cb[0-4]
>> 
>> and can read other fields.
>> 
>> To make selftest with skb->sk work, a dummy sk is added in
>> bpf_prog_test_run_skb().
>> 
>> Signed-off-by: Song Liu <songliubraving@fb.com>
>> ---
>> net/bpf/test_run.c                          |   4 +
>> tools/testing/selftests/bpf/test_verifier.c | 170 ++++++++++++++++++++
>> 2 files changed, 174 insertions(+)
>> 
>> diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
>> index 0c423b8cd75c..c7210e2f1ae9 100644
>> --- a/net/bpf/test_run.c
>> +++ b/net/bpf/test_run.c
>> @@ -10,6 +10,7 @@
>> #include <linux/etherdevice.h>
>> #include <linux/filter.h>
>> #include <linux/sched/signal.h>
>> +#include <net/sock.h>
>> 
>> static __always_inline u32 bpf_test_run_one(struct bpf_prog *prog, void *ctx,
>> 		struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE])
>> @@ -115,6 +116,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
>> 	u32 retval, duration;
>> 	int hh_len = ETH_HLEN;
>> 	struct sk_buff *skb;
>> +	struct sock sk;
>> 	void *data;
>> 	int ret;
>> 
>> @@ -142,6 +144,8 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
>> 		kfree(data);
>> 		return -ENOMEM;
>> 	}
>> +	sock_init_data(NULL, &sk);
>> +	skb->sk = &sk;
> 
> I was about to apply it, but it crashes as:
> [   16.830822] BUG: unable to handle kernel paging request at 000000014427b974
> [   16.831363] PGD 8000000135ecf067 P4D 8000000135ecf067 PUD 0
> [   16.831792] Oops: 0000 [#1] SMP PTI
> [   16.832061] CPU: 1 PID: 1965 Comm: test_verifier Not tainted 4.19.0-rc7-02550-ga76dee97ff12 #1153
> [   16.832712] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-2.el7 04/01/2014
> [   16.833358] RIP: 0010:cmp_map_id+0x10/0x50
> [   16.835036] RSP: 0018:ffffc9000080faf8 EFLAGS: 00010246
> [   16.835429] RAX: 00000000ffffffff RBX: 0000000036069ee8 RCX: 0000000000000000
> [   16.835958] RDX: 000000014427b970 RSI: 000000014427b970 RDI: ffffc9000080fb44
> [   16.836496] RBP: 000000000000000c R08: ffffffff810f7330 R09: 0000000036069ee8
> [   16.837026] R10: 0000000000000000 R11: 0000000000000001 R12: 0000000000000000
> [   16.837554] R13: ffffffff810f7330 R14: 000000014427b970 R15: 000000001b034f74
> [   16.838083] FS:  00007fae50663700(0000) GS:ffff88013ba80000(0000) knlGS:0000000000000000
> [   16.838677] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [   16.839105] CR2: 000000014427b974 CR3: 0000000135934005 CR4: 00000000003606e0
> [   16.839632] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> [   16.840157] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
> [   16.840682] Call Trace:
> [   16.840897]  bsearch+0x50/0x90
> [   16.841144]  map_id_range_down+0x81/0xa0
> [   16.841438]  make_kuid+0xf/0x10
> [   16.841677]  sock_init_data+0x24f/0x260
> [   16.841979]  bpf_prog_test_run_skb+0x9e/0x270
> 
> I suspect sock_net_set(sk, &init_net) is necessary before sock_init_data() call.

I am not able to repro this, even with CONFIG_KASAN and CONFIG_PAGE_POISONING. 

Let me try a better approach on this.

Thanks,
Song

^ permalink raw reply

* [PATCH] net: socket: fix a missing-check bug
From: Wenwen Wang @ 2018-10-18 14:36 UTC (permalink / raw)
  To: Wenwen Wang
  Cc: Kangjie Lu, David S. Miller, open list:NETWORKING [GENERAL],
	open list

In ethtool_ioctl(), the ioctl command 'ethcmd' is checked through a switch
statement to see whether it is necessary to pre-process the ethtool
structure, because, as mentioned in the comment, the structure
ethtool_rxnfc is defined with padding. If yes, a user-space buffer 'rxnfc'
is allocated through compat_alloc_user_space(). One thing to note here is
that, if 'ethcmd' is ETHTOOL_GRXCLSRLALL, the size of the buffer 'rxnfc' is
partially determined by 'rule_cnt', which is actually acquired from the
user-space buffer 'compat_rxnfc', i.e., 'compat_rxnfc->rule_cnt', through
get_user(). After 'rxnfc' is allocated, the data in the original user-space
buffer 'compat_rxnfc' is then copied to 'rxnfc' through copy_in_user(),
including the 'rule_cnt' field. However, after this copy, no check is
re-enforced on 'rxnfc->rule_cnt'. So it is possible that a malicious user
race to change the value in the 'compat_rxnfc->rule_cnt' between these two
copies. Through this way, the attacker can bypass the previous check on
'rule_cnt' and inject malicious data. This can cause undefined behavior of
the kernel and introduce potential security risk.

This patch avoids the above issue via copying the value acquired by
get_user() to 'rxnfc->rule_cn', if 'ethcmd' is ETHTOOL_GRXCLSRLALL.

Signed-off-by: Wenwen Wang <wang6495@umn.edu>
---
 net/socket.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/net/socket.c b/net/socket.c
index 01f3f8f..390a8ec 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2875,9 +2875,14 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
 		    copy_in_user(&rxnfc->fs.ring_cookie,
 				 &compat_rxnfc->fs.ring_cookie,
 				 (void __user *)(&rxnfc->fs.location + 1) -
-				 (void __user *)&rxnfc->fs.ring_cookie) ||
-		    copy_in_user(&rxnfc->rule_cnt, &compat_rxnfc->rule_cnt,
-				 sizeof(rxnfc->rule_cnt)))
+				 (void __user *)&rxnfc->fs.ring_cookie))
+			return -EFAULT;
+		if (ethcmd == ETHTOOL_GRXCLSRLALL) {
+			if (put_user(rule_cnt, &rxnfc->rule_cnt))
+				return -EFAULT;
+		} else if (copy_in_user(&rxnfc->rule_cnt,
+					&compat_rxnfc->rule_cnt,
+					sizeof(rxnfc->rule_cnt)))
 			return -EFAULT;
 	}
 
-- 
2.7.4

^ permalink raw reply related

* Re: [PATCH v3 bpf-next 2/2] bpf: add tests for direct packet access from CGROUP_SKB
From: Alexei Starovoitov @ 2018-10-18  6:25 UTC (permalink / raw)
  To: Song Liu; +Cc: netdev, ast, daniel, kernel-team
In-Reply-To: <20181018053949.4064426-3-songliubraving@fb.com>

On Wed, Oct 17, 2018 at 10:39:49PM -0700, Song Liu wrote:
> Tests are added to make sure CGROUP_SKB cannot access:
>   tc_classid, data_meta, flow_keys
> 
> and can read and write:
>   mark, prority, and cb[0-4]
> 
> and can read other fields.
> 
> To make selftest with skb->sk work, a dummy sk is added in
> bpf_prog_test_run_skb().
> 
> Signed-off-by: Song Liu <songliubraving@fb.com>
> ---
>  net/bpf/test_run.c                          |   4 +
>  tools/testing/selftests/bpf/test_verifier.c | 170 ++++++++++++++++++++
>  2 files changed, 174 insertions(+)
> 
> diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
> index 0c423b8cd75c..c7210e2f1ae9 100644
> --- a/net/bpf/test_run.c
> +++ b/net/bpf/test_run.c
> @@ -10,6 +10,7 @@
>  #include <linux/etherdevice.h>
>  #include <linux/filter.h>
>  #include <linux/sched/signal.h>
> +#include <net/sock.h>
>  
>  static __always_inline u32 bpf_test_run_one(struct bpf_prog *prog, void *ctx,
>  		struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE])
> @@ -115,6 +116,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
>  	u32 retval, duration;
>  	int hh_len = ETH_HLEN;
>  	struct sk_buff *skb;
> +	struct sock sk;
>  	void *data;
>  	int ret;
>  
> @@ -142,6 +144,8 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
>  		kfree(data);
>  		return -ENOMEM;
>  	}
> +	sock_init_data(NULL, &sk);
> +	skb->sk = &sk;

I was about to apply it, but it crashes as:
[   16.830822] BUG: unable to handle kernel paging request at 000000014427b974
[   16.831363] PGD 8000000135ecf067 P4D 8000000135ecf067 PUD 0
[   16.831792] Oops: 0000 [#1] SMP PTI
[   16.832061] CPU: 1 PID: 1965 Comm: test_verifier Not tainted 4.19.0-rc7-02550-ga76dee97ff12 #1153
[   16.832712] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-2.el7 04/01/2014
[   16.833358] RIP: 0010:cmp_map_id+0x10/0x50
[   16.835036] RSP: 0018:ffffc9000080faf8 EFLAGS: 00010246
[   16.835429] RAX: 00000000ffffffff RBX: 0000000036069ee8 RCX: 0000000000000000
[   16.835958] RDX: 000000014427b970 RSI: 000000014427b970 RDI: ffffc9000080fb44
[   16.836496] RBP: 000000000000000c R08: ffffffff810f7330 R09: 0000000036069ee8
[   16.837026] R10: 0000000000000000 R11: 0000000000000001 R12: 0000000000000000
[   16.837554] R13: ffffffff810f7330 R14: 000000014427b970 R15: 000000001b034f74
[   16.838083] FS:  00007fae50663700(0000) GS:ffff88013ba80000(0000) knlGS:0000000000000000
[   16.838677] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   16.839105] CR2: 000000014427b974 CR3: 0000000135934005 CR4: 00000000003606e0
[   16.839632] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[   16.840157] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[   16.840682] Call Trace:
[   16.840897]  bsearch+0x50/0x90
[   16.841144]  map_id_range_down+0x81/0xa0
[   16.841438]  make_kuid+0xf/0x10
[   16.841677]  sock_init_data+0x24f/0x260
[   16.841979]  bpf_prog_test_run_skb+0x9e/0x270

I suspect sock_net_set(sk, &init_net) is necessary before sock_init_data() call.

^ permalink raw reply

* Re: [PATCH net] r8169: fix NAPI handling under high load
From: David Miller @ 2018-10-18  6:24 UTC (permalink / raw)
  To: hkallweit1; +Cc: romieu, nic_swsd, netdev
In-Reply-To: <d54112bb-2f02-bd59-5b5b-e211f070b20a@gmail.com>

From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Thu, 18 Oct 2018 07:58:52 +0200

> On 18.10.2018 07:21, David Miller wrote:
>> From: Francois Romieu <romieu@fr.zoreil.com>
>> Date: Thu, 18 Oct 2018 01:30:45 +0200
>> 
>>> Heiner Kallweit <hkallweit1@gmail.com> :
>>> [...]
>>>> This issue has been there more or less forever (at least it exists in
>>>> 3.16 already), so I can't provide a "Fixes" tag. 
>>>
>>> Hardly forever. It fixes da78dbff2e05630921c551dbbc70a4b7981a8fff.
>> 
>> I don't see exactly how that can be true.
>> 
>> That commit didn't change the parts of the NAPI poll processing which
>> are relevant here, mainly the guarding of the RX and TX work using
>> the status bits which are cleared.
>> 
> 
> AFAICS Francois is right and patch da78dbff2e05 ("r8169: remove work
> from irq handler") introduced the guarding of RX and TX work.
> I just checked back to 3.16 as oldest LTS kernel version.

Aha, now I see it.

>> Maybe I'm missing something?  If so, indeed it would be nice to add
>> a proper Fixes: tag here.
>> 
> Shall I submit a v2 including the Fixes line?

Yes, please do!

^ permalink raw reply

* Re: [PATCH bpf] bpf: fix doc of bpf_skb_adjust_room() in uapi
From: Nicolas Dichtel @ 2018-10-18  6:21 UTC (permalink / raw)
  To: Alexei Starovoitov; +Cc: ast, daniel, davem, netdev, Quentin Monnet
In-Reply-To: <20181018044900.fvsz27v76ptgelfj@ast-mbp.dhcp.thefacebook.com>

Le 18/10/2018 à 06:49, Alexei Starovoitov a écrit :
> On Wed, Oct 17, 2018 at 04:24:48PM +0200, Nicolas Dichtel wrote:
>> len_diff is signed.
>>
>> Fixes: fa15601ab31e ("bpf: add documentation for eBPF helpers (33-41)")
>> CC: Quentin Monnet <quentin.monnet@netronome.com>
>> Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
>> ---
>>  include/uapi/linux/bpf.h       | 2 +-
>>  tools/include/uapi/linux/bpf.h | 2 +-
>>  2 files changed, 2 insertions(+), 2 deletions(-)
>>
>> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
>> index 66917a4eba27..c4ffe91d5598 100644
>> --- a/include/uapi/linux/bpf.h
>> +++ b/include/uapi/linux/bpf.h
>> @@ -1430,7 +1430,7 @@ union bpf_attr {
>>   * 	Return
>>   * 		0 on success, or a negative error in case of failure.
>>   *
>> - * int bpf_skb_adjust_room(struct sk_buff *skb, u32 len_diff, u32 mode, u64 flags)
>> + * int bpf_skb_adjust_room(struct sk_buff *skb, s32 len_diff, u32 mode, u64 flags)
> 
> Thanks. Applied to bpf-next, since we're very late into release cycle.
> 
Yep, I was also wondering if it was not too late for the bpf tree ;-)


Thank you,
Nicolas

^ permalink raw reply

* Re: [PATCH net] r8169: fix NAPI handling under high load
From: Jonathan Woithe @ 2018-10-18  6:15 UTC (permalink / raw)
  To: Heiner Kallweit
  Cc: Francois Romieu, Holger Hoffstätte, David Miller,
	Realtek linux nic maintainers, netdev@vger.kernel.org
In-Reply-To: <8beda4fa-5d04-49e6-eb3e-5656897a301f@gmail.com>

On Thu, Oct 18, 2018 at 08:03:32AM +0200, Heiner Kallweit wrote:
> On 18.10.2018 07:58, Jonathan Woithe wrote:
> > On Thu, Oct 18, 2018 at 01:30:51AM +0200, Francois Romieu wrote:
> >> Holger Hoffstätte <holger@applied-asynchrony.com> :
> >> [...]
> >> The bug will induce delayed rx processing when a spike of "load" is
> >> followed by an idle period.
> > 
> > If this is the case, I wonder whether this bug might also be the cause of
> > the long reception delays we've observed at times when a period of high
> > network load is followed by almost nothing[1].  That thread[2] details the
> > investigations subsequently done.  A git bisect showed that commit
> > da78dbff2e05630921c551dbbc70a4b7981a8fff was the origin of the misbehaviour
> > we were observing.
> > 
> > We still see the problem when we test with recent kernels.  It would be
> > great if the underlying problem has now been identified.
> > 
> > I can possibly scrape some hardware together to test any proposed fix under
> > our workload if there was interest.
> > 
> Proposed fix is here:
> https://patchwork.ozlabs.org/patch/985014/
> Would be good if you could test it. Thanks!

I should be able to do so tomorrow.  Which kernel would you like me to apply
the patch to?

Regards
  jonathan

^ permalink raw reply

* Re: [PATCH net] r8169: fix NAPI handling under high load
From: Jonathan Woithe @ 2018-10-18  5:58 UTC (permalink / raw)
  To: Francois Romieu
  Cc: Holger Hoffstätte, Heiner Kallweit, David Miller,
	Realtek linux nic maintainers, netdev@vger.kernel.org
In-Reply-To: <20181017233051.GB8478@electric-eye.fr.zoreil.com>

On Thu, Oct 18, 2018 at 01:30:51AM +0200, Francois Romieu wrote:
> Holger Hoffstätte <holger@applied-asynchrony.com> :
> [...]
> > I continued to use the BQL patch in my private tree after it was reverted
> > and also had occasional timeouts, but *only* after I started playing
> > with ethtool to change offload settings. Without offloads or the BQL patch
> > everything has been rock-solid since then.
> > The other weird problem was that timeouts would occur on an otherwise
> > *completely idle* system. Since that occasionally borked my NFS server
> > over night I ultimately removed BQL as well. Rock-solid since then.
> 
> The bug will induce delayed rx processing when a spike of "load" is
> followed by an idle period.

If this is the case, I wonder whether this bug might also be the cause of
the long reception delays we've observed at times when a period of high
network load is followed by almost nothing[1].  That thread[2] details the
investigations subsequently done.  A git bisect showed that commit
da78dbff2e05630921c551dbbc70a4b7981a8fff was the origin of the misbehaviour
we were observing.

We still see the problem when we test with recent kernels.  It would be
great if the underlying problem has now been identified.

I can possibly scrape some hardware together to test any proposed fix under
our workload if there was interest.

Regards
  jonathan

[1] https://marc.info/?l=linux-netdev&m=136281333207734&w=2
[2] https://marc.info/?t=136281339500002&r=1&w=2

^ permalink raw reply

* Re: [PATCH net] r8169: fix NAPI handling under high load
From: Heiner Kallweit @ 2018-10-18  6:03 UTC (permalink / raw)
  To: Jonathan Woithe
  Cc: Francois Romieu, Holger Hoffstätte, David Miller,
	Realtek linux nic maintainers, netdev@vger.kernel.org
In-Reply-To: <20181018055835.GE2487@marvin.atrad.com.au>

On 18.10.2018 07:58, Jonathan Woithe wrote:
> On Thu, Oct 18, 2018 at 01:30:51AM +0200, Francois Romieu wrote:
>> Holger Hoffstätte <holger@applied-asynchrony.com> :
>> [...]
>>> I continued to use the BQL patch in my private tree after it was reverted
>>> and also had occasional timeouts, but *only* after I started playing
>>> with ethtool to change offload settings. Without offloads or the BQL patch
>>> everything has been rock-solid since then.
>>> The other weird problem was that timeouts would occur on an otherwise
>>> *completely idle* system. Since that occasionally borked my NFS server
>>> over night I ultimately removed BQL as well. Rock-solid since then.
>>
>> The bug will induce delayed rx processing when a spike of "load" is
>> followed by an idle period.
> 
> If this is the case, I wonder whether this bug might also be the cause of
> the long reception delays we've observed at times when a period of high
> network load is followed by almost nothing[1].  That thread[2] details the
> investigations subsequently done.  A git bisect showed that commit
> da78dbff2e05630921c551dbbc70a4b7981a8fff was the origin of the misbehaviour
> we were observing.
> 
> We still see the problem when we test with recent kernels.  It would be
> great if the underlying problem has now been identified.
> 
> I can possibly scrape some hardware together to test any proposed fix under
> our workload if there was interest.
> 
Proposed fix is here:
https://patchwork.ozlabs.org/patch/985014/
Would be good if you could test it. Thanks!

Heiner

> Regards
>   jonathan
> 
> [1] https://marc.info/?l=linux-netdev&m=136281333207734&w=2
> [2] https://marc.info/?t=136281339500002&r=1&w=2
> 

^ permalink raw reply

* Re: [PATCH net] r8169: fix NAPI handling under high load
From: Heiner Kallweit @ 2018-10-18  5:58 UTC (permalink / raw)
  To: David Miller, romieu; +Cc: nic_swsd, netdev
In-Reply-To: <20181017.222149.1241280289340067644.davem@davemloft.net>

On 18.10.2018 07:21, David Miller wrote:
> From: Francois Romieu <romieu@fr.zoreil.com>
> Date: Thu, 18 Oct 2018 01:30:45 +0200
> 
>> Heiner Kallweit <hkallweit1@gmail.com> :
>> [...]
>>> This issue has been there more or less forever (at least it exists in
>>> 3.16 already), so I can't provide a "Fixes" tag. 
>>
>> Hardly forever. It fixes da78dbff2e05630921c551dbbc70a4b7981a8fff.
> 
> I don't see exactly how that can be true.
> 
> That commit didn't change the parts of the NAPI poll processing which
> are relevant here, mainly the guarding of the RX and TX work using
> the status bits which are cleared.
> 

AFAICS Francois is right and patch da78dbff2e05 ("r8169: remove work
from irq handler") introduced the guarding of RX and TX work.
I just checked back to 3.16 as oldest LTS kernel version.

> Maybe I'm missing something?  If so, indeed it would be nice to add
> a proper Fixes: tag here.
> 
Shall I submit a v2 including the Fixes line?

> Thanks!
> 

^ permalink raw reply

* Re: bond: take rcu lock in netpoll_send_skb_on_dev
From: Cong Wang @ 2018-10-18  5:46 UTC (permalink / raw)
  To: eranbe
  Cc: Dave Jones, Linux Kernel Network Developers, Tariq Toukan,
	Saeed Mahameed
In-Reply-To: <b80a8984-2100-a7c7-008d-2768c09ef0f8@mellanox.com>

On Mon, Oct 15, 2018 at 4:36 AM Eran Ben Elisha <eranbe@mellanox.com> wrote:
> Hi,
>
> This suggested fix introduced a regression while using netconsole module
> with mlx5_core module loaded.

It is already reported here:
https://marc.info/?l=linux-kernel&m=153917359528669&w=2


>
> During irq handling, we hit a warning that this rcu_read_lock_bh cannot
> be taken inside an IRQ.

Yes, I mentioned the same even before this patch was sent out:
https://marc.info/?l=linux-netdev&m=153816136624679&w=2

Thanks.

^ permalink raw reply

* Re: [net-next PATCH] net: sched: cls_flower: Classify packets using port ranges
From: Cong Wang @ 2018-10-18  5:41 UTC (permalink / raw)
  To: David Miller
  Cc: amritha.nambiar, Linux Kernel Network Developers, Jakub Kicinski,
	sridhar.samudrala, Jamal Hadi Salim, Jiri Pirko
In-Reply-To: <20181017.214233.695288699109520404.davem@davemloft.net>

On Wed, Oct 17, 2018 at 9:42 PM David Miller <davem@davemloft.net> wrote:
>
> From: Amritha Nambiar <amritha.nambiar@intel.com>
> Date: Fri, 12 Oct 2018 06:53:30 -0700
>
> > Added support in tc flower for filtering based on port ranges.
> > This is a rework of the RFC patch at:
> > https://patchwork.ozlabs.org/patch/969595/
>
> You never addressed Cong's feedback asking you to explain why this
> can't be simply built using existing generic filtering facilities that
> exist already.
>
> I appreciate that you addressed Jiri's feedback, but Cong's feedback is
> just as, if not more, important.
>

My objection is against introducing a new filter just for port range, now
it is built on top of flower filter, so it is much better now.

u32 filter can do the nearly same, but requires a power-of-two, so it is
not completely duplicated.

Therefore, I think the idea of building it on top of flower is fine. But I don't
read into any code, only the description.

Thanks!

^ permalink raw reply

* [PATCH v3 bpf-next 1/2] bpf: add cg_skb_is_valid_access for BPF_PROG_TYPE_CGROUP_SKB
From: Song Liu @ 2018-10-18  5:39 UTC (permalink / raw)
  To: netdev; +Cc: ast, daniel, kernel-team, Song Liu
In-Reply-To: <20181018053949.4064426-1-songliubraving@fb.com>

BPF programs of BPF_PROG_TYPE_CGROUP_SKB need to access headers in the
skb. This patch enables direct access of skb for these programs.

Two helper functions bpf_compute_and_save_data_pointers() and
bpf_restore_data_pointers() are introduced. There are used in
__cgroup_bpf_run_filter_skb(), to compute proper data_end for the
BPF program, and restore original data afterwards.

Signed-off-by: Song Liu <songliubraving@fb.com>
---
 include/linux/filter.h | 24 ++++++++++++++++++++++++
 kernel/bpf/cgroup.c    |  6 ++++++
 net/core/filter.c      | 36 +++++++++++++++++++++++++++++++++++-
 3 files changed, 65 insertions(+), 1 deletion(-)

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 5771874bc01e..96b3ee7f14c9 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -548,6 +548,30 @@ static inline void bpf_compute_data_pointers(struct sk_buff *skb)
 	cb->data_end  = skb->data + skb_headlen(skb);
 }
 
+/* Similar to bpf_compute_data_pointers(), except that save orginal
+ * data in cb->data and cb->meta_data for restore.
+ */
+static inline void bpf_compute_and_save_data_pointers(
+	struct sk_buff *skb, void *saved_pointers[2])
+{
+	struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb;
+
+	saved_pointers[0] = cb->data_meta;
+	saved_pointers[1] = cb->data_end;
+	cb->data_meta = skb->data - skb_metadata_len(skb);
+	cb->data_end  = skb->data + skb_headlen(skb);
+}
+
+/* Restore data saved by bpf_compute_data_pointers(). */
+static inline void bpf_restore_data_pointers(
+	struct sk_buff *skb, void *saved_pointers[2])
+{
+	struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb;
+
+	cb->data_meta = saved_pointers[0];
+	cb->data_end = saved_pointers[1];;
+}
+
 static inline u8 *bpf_skb_cb(struct sk_buff *skb)
 {
 	/* eBPF programs may read/write skb->cb[] area to transfer meta
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 00f6ed2e4f9a..5f5180104ddc 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -554,6 +554,7 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
 	unsigned int offset = skb->data - skb_network_header(skb);
 	struct sock *save_sk;
 	struct cgroup *cgrp;
+	void *saved_pointers[2];
 	int ret;
 
 	if (!sk || !sk_fullsock(sk))
@@ -566,8 +567,13 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
 	save_sk = skb->sk;
 	skb->sk = sk;
 	__skb_push(skb, offset);
+
+	/* compute pointers for the bpf prog */
+	bpf_compute_and_save_data_pointers(skb, saved_pointers);
+
 	ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb,
 				 bpf_prog_run_save_cb);
+	bpf_restore_data_pointers(skb, saved_pointers);
 	__skb_pull(skb, offset);
 	skb->sk = save_sk;
 	return ret == 1 ? 0 : -EPERM;
diff --git a/net/core/filter.c b/net/core/filter.c
index 1a3ac6c46873..e3ca30bd6840 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5346,6 +5346,40 @@ static bool sk_filter_is_valid_access(int off, int size,
 	return bpf_skb_is_valid_access(off, size, type, prog, info);
 }
 
+static bool cg_skb_is_valid_access(int off, int size,
+				   enum bpf_access_type type,
+				   const struct bpf_prog *prog,
+				   struct bpf_insn_access_aux *info)
+{
+	switch (off) {
+	case bpf_ctx_range(struct __sk_buff, tc_classid):
+	case bpf_ctx_range(struct __sk_buff, data_meta):
+	case bpf_ctx_range(struct __sk_buff, flow_keys):
+		return false;
+	}
+	if (type == BPF_WRITE) {
+		switch (off) {
+		case bpf_ctx_range(struct __sk_buff, mark):
+		case bpf_ctx_range(struct __sk_buff, priority):
+		case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
+			break;
+		default:
+			return false;
+		}
+	}
+
+	switch (off) {
+	case bpf_ctx_range(struct __sk_buff, data):
+		info->reg_type = PTR_TO_PACKET;
+		break;
+	case bpf_ctx_range(struct __sk_buff, data_end):
+		info->reg_type = PTR_TO_PACKET_END;
+		break;
+	}
+
+	return bpf_skb_is_valid_access(off, size, type, prog, info);
+}
+
 static bool lwt_is_valid_access(int off, int size,
 				enum bpf_access_type type,
 				const struct bpf_prog *prog,
@@ -7038,7 +7072,7 @@ const struct bpf_prog_ops xdp_prog_ops = {
 
 const struct bpf_verifier_ops cg_skb_verifier_ops = {
 	.get_func_proto		= cg_skb_func_proto,
-	.is_valid_access	= sk_filter_is_valid_access,
+	.is_valid_access	= cg_skb_is_valid_access,
 	.convert_ctx_access	= bpf_convert_ctx_access,
 };
 
-- 
2.17.1

^ permalink raw reply related

* [PATCH v3 bpf-next 0/2] bpf: add cg_skb_is_valid_access
From: Song Liu @ 2018-10-18  5:39 UTC (permalink / raw)
  To: netdev; +Cc: ast, daniel, kernel-team, Song Liu

Changes v2 -> v3:
1. Added helper function bpf_compute_and_save_data_pointers() and
   bpf_restore_data_pointers().

Changes v1 -> v2:
1. Updated the list of read-only fields, and read-write fields.
2. Added dummy sk to bpf_prog_test_run_skb().

This set enables BPF program of type BPF_PROG_TYPE_CGROUP_SKB to access
some __skb_buff data directly.

Song Liu (2):
  bpf: add cg_skb_is_valid_access for BPF_PROG_TYPE_CGROUP_SKB
  bpf: add tests for direct packet access from CGROUP_SKB

 include/linux/filter.h                      |  24 +++
 kernel/bpf/cgroup.c                         |   6 +
 net/bpf/test_run.c                          |   4 +
 net/core/filter.c                           |  36 ++++-
 tools/testing/selftests/bpf/test_verifier.c | 170 ++++++++++++++++++++
 5 files changed, 239 insertions(+), 1 deletion(-)

^ permalink raw reply

* [PATCH v3 bpf-next 2/2] bpf: add tests for direct packet access from CGROUP_SKB
From: Song Liu @ 2018-10-18  5:39 UTC (permalink / raw)
  To: netdev; +Cc: ast, daniel, kernel-team, Song Liu
In-Reply-To: <20181018053949.4064426-1-songliubraving@fb.com>

Tests are added to make sure CGROUP_SKB cannot access:
  tc_classid, data_meta, flow_keys

and can read and write:
  mark, prority, and cb[0-4]

and can read other fields.

To make selftest with skb->sk work, a dummy sk is added in
bpf_prog_test_run_skb().

Signed-off-by: Song Liu <songliubraving@fb.com>
---
 net/bpf/test_run.c                          |   4 +
 tools/testing/selftests/bpf/test_verifier.c | 170 ++++++++++++++++++++
 2 files changed, 174 insertions(+)

diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 0c423b8cd75c..c7210e2f1ae9 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -10,6 +10,7 @@
 #include <linux/etherdevice.h>
 #include <linux/filter.h>
 #include <linux/sched/signal.h>
+#include <net/sock.h>
 
 static __always_inline u32 bpf_test_run_one(struct bpf_prog *prog, void *ctx,
 		struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE])
@@ -115,6 +116,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
 	u32 retval, duration;
 	int hh_len = ETH_HLEN;
 	struct sk_buff *skb;
+	struct sock sk;
 	void *data;
 	int ret;
 
@@ -142,6 +144,8 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
 		kfree(data);
 		return -ENOMEM;
 	}
+	sock_init_data(NULL, &sk);
+	skb->sk = &sk;
 
 	skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
 	__skb_put(skb, size);
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index cf4cd32b6772..5bfba7e8afd7 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -4862,6 +4862,176 @@ static struct bpf_test tests[] = {
 		.result = REJECT,
 		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 	},
+	{
+		"direct packet read test#1 for CGROUP_SKB",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
+				    offsetof(struct __sk_buff, len)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
+			    	    offsetof(struct __sk_buff, pkt_type)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct __sk_buff, mark)),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_6,
+				    offsetof(struct __sk_buff, mark)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+				    offsetof(struct __sk_buff, queue_mapping)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1,
+				    offsetof(struct __sk_buff, protocol)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1,
+				    offsetof(struct __sk_buff, vlan_present)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	},
+	{
+		"direct packet read test#2 for CGROUP_SKB",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
+				    offsetof(struct __sk_buff, vlan_tci)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
+			    	    offsetof(struct __sk_buff, vlan_proto)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct __sk_buff, priority)),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_6,
+				    offsetof(struct __sk_buff, priority)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+				    offsetof(struct __sk_buff, ingress_ifindex)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1,
+				    offsetof(struct __sk_buff, tc_index)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1,
+				    offsetof(struct __sk_buff, hash)),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	},
+	{
+		"direct packet read test#3 for CGROUP_SKB",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[0])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
+			    	    offsetof(struct __sk_buff, cb[1])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[2])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[3])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[4])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1,
+				    offsetof(struct __sk_buff, napi_id)),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_4,
+				    offsetof(struct __sk_buff, cb[0])),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_5,
+				    offsetof(struct __sk_buff, cb[1])),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_6,
+				    offsetof(struct __sk_buff, cb[2])),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_7,
+				    offsetof(struct __sk_buff, cb[3])),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_8,
+				    offsetof(struct __sk_buff, cb[4])),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	},
+	{
+		"direct packet read test#4 for CGROUP_SKB",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, family)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, remote_ip4)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
+				    offsetof(struct __sk_buff, local_ip4)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
+				    offsetof(struct __sk_buff, remote_ip6[0])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
+				    offsetof(struct __sk_buff, remote_ip6[1])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
+				    offsetof(struct __sk_buff, remote_ip6[2])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
+				    offsetof(struct __sk_buff, remote_ip6[3])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct __sk_buff, local_ip6[0])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct __sk_buff, local_ip6[1])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct __sk_buff, local_ip6[2])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct __sk_buff, local_ip6[3])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+				    offsetof(struct __sk_buff, remote_port)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1,
+				    offsetof(struct __sk_buff, local_port)),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	},
+	{
+		"invalid access of tc_classid for CGROUP_SKB",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, tc_classid)),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "invalid bpf_context access",
+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	},
+	{
+		"invalid access of data_meta for CGROUP_SKB",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_meta)),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "invalid bpf_context access",
+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	},
+	{
+		"invalid access of flow_keys for CGROUP_SKB",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, flow_keys)),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "invalid bpf_context access",
+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	},
+	{
+		"invalid write access to napi_id for CGROUP_SKB",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1,
+				    offsetof(struct __sk_buff, napi_id)),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_9,
+				    offsetof(struct __sk_buff, napi_id)),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "invalid bpf_context access",
+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	},
 	{
 		"valid cgroup storage access",
 		.insns = {
-- 
2.17.1

^ permalink raw reply related

* Re: [PATCH net] net: ipmr: fix unresolved entry dumps
From: David Miller @ 2018-10-18  5:36 UTC (permalink / raw)
  To: nikolay; +Cc: netdev
In-Reply-To: <20181017193434.11383-1-nikolay@cumulusnetworks.com>

From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Date: Wed, 17 Oct 2018 22:34:34 +0300

> If the skb space ends in an unresolved entry while dumping we'll miss
> some unresolved entries. The reason is due to zeroing the entry counter
> between dumping resolved and unresolved mfc entries. We should just
> keep counting until the whole table is dumped and zero when we move to
> the next as we have a separate table counter.
> 
> Reported-by: Colin Ian King <colin.king@canonical.com>
> Fixes: 8fb472c09b9d ("ipmr: improve hash scalability")
> Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>

Applied and queued up for -stable.

^ permalink raw reply

* Re: [PATCH net] net/sched: properly init chain in case of multiple control actions
From: Cong Wang @ 2018-10-18  5:35 UTC (permalink / raw)
  To: Davide Caratti
  Cc: Jiri Pirko, Jamal Hadi Salim, David Miller,
	Linux Kernel Network Developers
In-Reply-To: <ddbde7ffec27783a65a63e89962bf9db7565f0e0.camel@redhat.com>

On Tue, Oct 16, 2018 at 10:38 AM Davide Caratti <dcaratti@redhat.com> wrote:
>
> On Mon, 2018-10-15 at 11:31 -0700, Cong Wang wrote:
> > On Sat, Oct 13, 2018 at 8:23 AM Davide Caratti <dcaratti@redhat.com> wrote:
> > >
> > > On Fri, 2018-10-12 at 13:57 -0700, Cong Wang wrote:
> > > > Why not just validate the fallback action in each action init()?
> > > > For example, checking tcfg_paction in tcf_gact_init().
> > > >
> > > > I don't see the need of making it generic.
> ...
> > > A (legal?) trick  is to let tcf_action store the fallback action when it
> > > contains a 'goto chain' command, I just posted a proposal for gact. If you
> > > think it's ok, I will test and post the same for act_police.
> >
> > Do we really need to support TC_ACT_GOTO_CHAIN for
> > gact->tcfg_paction etc.? I mean, is it useful in practice or is it just for
> > completeness?
> >
> > IF we don't need to support it, we can just make it invalid without needing
> > to initialize it in ->init() at all.
> >
> > If we do, however, we really need to move it into each ->init(), because
> > we have to lock each action if we are modifying an existing one. With
> > your patch, tcf_action_goto_chain_init() is still called without the per-action
> > lock.
> >
> > What's more, if we support two different actions in gact, that is, tcfg_paction
> > and tcf_action, how could you still only have one a->goto_chain pointer?
> > There should be two pointers for each of them. :)
>
> whatever fixes the NULL dereference is OK for me.
> I thought that the proposal made with
>
> https://www.mail-archive.com/netdev@vger.kernel.org/msg251933.html
>
> (i.e., letting init() copy tcfg_paction to tcf_action in case it contained
> 'goto chain x') was smart enough to preserve the current behavior, and
> also let 'goto chain' work in case it was configured  *only* for the
> fallback action.
> When the action is modified, the change to tcfg_paction is done with the
> same spinlock as tcf_action, so I didn't notice anything worse than the
> current locking layout.
>
> (well, after some more thinking I looked again at that patch and yes, it
> lacked the most important thing:)

Hmm, as I said, I am not sure if the logic is correct, if we have two different
goto actions, we must have two pointers.

I will re-think about it tomorrow. (I am at a conference so don't have much
time on reviewing this.)

Thanks.

^ permalink raw reply

* Re: [PATCH net] sctp: fix the data size calculation in sctp_data_size
From: David Miller @ 2018-10-18  5:33 UTC (permalink / raw)
  To: lucien.xin; +Cc: netdev, linux-sctp, marcelo.leitner, nhorman
In-Reply-To: <c407b682a424f0441d9b9a29ef6296c8e9824b64.1539781887.git.lucien.xin@gmail.com>

From: Xin Long <lucien.xin@gmail.com>
Date: Wed, 17 Oct 2018 21:11:27 +0800

> sctp data size should be calculated by subtracting data chunk header's
> length from chunk_hdr->length, not just data header.
> 
> Fixes: 668c9beb9020 ("sctp: implement assign_number for sctp_stream_interleave")
> Signed-off-by: Xin Long <lucien.xin@gmail.com>

Applied and queued up for -stable.

^ permalink raw reply

* Re: [PATCH V1 net-next] net: ena: enable Low Latency Queues
From: David Miller @ 2018-10-18  5:31 UTC (permalink / raw)
  To: akiyano
  Cc: netdev, dwmw, zorik, matua, saeedb, msw, aliguori, nafea, gtzalik,
	netanel, alisaidi
In-Reply-To: <1539779603-17895-1-git-send-email-akiyano@amazon.com>

From: <akiyano@amazon.com>
Date: Wed, 17 Oct 2018 15:33:23 +0300

> From: Arthur Kiyanovski <akiyano@amazon.com>
> 
> Use the new API to enable usage of LLQ.
> 
> Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com>

Applied.

^ permalink raw reply

* Re: [PATCH V2 net-next] net: ena: Fix Kconfig dependency on X86
From: David Miller @ 2018-10-18  5:28 UTC (permalink / raw)
  To: netanel
  Cc: netdev, akiyano, alisaidi, dwmw, zorik, matua, saeedb, msw,
	aliguori, nafea, gtzalik
In-Reply-To: <20181017100421.56045-1-netanel@amazon.com>

From: <netanel@amazon.com>
Date: Wed, 17 Oct 2018 10:04:21 +0000

> From: Netanel Belgazal <netanel@amazon.com>
> 
> The Kconfig limitation of X86 is to too wide.
> The ENA driver only requires a little endian dependency.
> 
> Change the dependency to be on little endian CPU.
> 
> Signed-off-by: Netanel Belgazal <netanel@amazon.com>

Applied.

^ permalink raw reply

* Re: [PATCH net] udp6: fix encap return code for resubmitting
From: David Miller @ 2018-10-18  5:27 UTC (permalink / raw)
  To: pabeni; +Cc: netdev
In-Reply-To: <1e792b80ae514e944f868c904c30e797e02b418c.1539769397.git.pabeni@redhat.com>

From: Paolo Abeni <pabeni@redhat.com>
Date: Wed, 17 Oct 2018 11:44:04 +0200

> The commit eb63f2964dbe ("udp6: add missing checks on edumux packet
> processing") used the same return code convention of the ipv4 counterpart,
> but ipv6 uses the opposite one: positive values means resubmit.
> 
> This change addresses the issue, using positive return value for
> resubmitting. Also update the related comment, which was broken, too.
> 
> Fixes: eb63f2964dbe ("udp6: add missing checks on edumux packet processing")
> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
> ---
> Note: I could not find any in kernel udp6 encap using the above
> feature, that would explain why nobody complained so far...

Applied.

^ permalink raw reply

* Re: [PATCH net] mlxsw: core: Fix use-after-free when flashing firmware during init
From: David Miller @ 2018-10-18  5:27 UTC (permalink / raw)
  To: idosch; +Cc: netdev, jiri, petrm, alexpe, mlxsw
In-Reply-To: <20181017080525.21856-1-idosch@mellanox.com>

From: Ido Schimmel <idosch@mellanox.com>
Date: Wed, 17 Oct 2018 08:05:45 +0000

> When the switch driver (e.g., mlxsw_spectrum) determines it needs to
> flash a new firmware version it resets the ASIC after the flashing
> process. The bus driver (e.g., mlxsw_pci) then registers itself again
> with mlxsw_core which means (among other things) that the device
> registers itself again with the hwmon subsystem again.
> 
> Since the device was registered with the hwmon subsystem using
> devm_hwmon_device_register_with_groups(), then the old hwmon device
> (registered before the flashing) was never unregistered and was
> referencing stale data, resulting in a use-after free.
> 
> Fix by removing reliance on device managed APIs in mlxsw_hwmon_init().
> 
> Fixes: c86d62cc410c ("mlxsw: spectrum: Reset FW after flash")
> Signed-off-by: Ido Schimmel <idosch@mellanox.com>
> Reported-by: Alexander Petrovskiy <alexpe@mellanox.com>
> Tested-by: Alexander Petrovskiy <alexpe@mellanox.com>
> Reviewed-by: Petr Machata <petrm@mellanox.com>

Applied.

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox