* [RFC PATCH 3/4] udp: implement GRO plain UDP sockets.
From: Paolo Abeni @ 2018-09-14 15:43 UTC (permalink / raw)
To: netdev; +Cc: David S. Miller, Willem de Bruijn, Steffen Klassert
In-Reply-To: <cover.1536939423.git.pabeni@redhat.com>
This is the RX counter part of commit bec1f6f69736 ("udp: generate gso
with UDP_SEGMENT"). When UDP_SEGMENT is enabled, such socket is also
eligible for GRO in the rx path: UDP segments directed to such socket
are assembled into a larger GSO_UDP_L4 packet.
The core UDP GRO support is enabled/updated on setsockopt(UDP_SEGMENT) and
disabled, if needed at socket destruction time.
Initial benchmark numbers:
Before:
udp rx: 1079 MB/s 769065 calls/s
After:
udp rx: 1466 MB/s 24877 calls/s
This change introduces a side effect in respect to UDP tunnels:
after an UDP tunnel creation, now the kernel performs a lookup per ingress UDP
packet, before such lookup happended only if the ingress packet carried a valid
internal header csum.
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
include/linux/udp.h | 2 +-
net/ipv4/udp.c | 1 +
net/ipv4/udp_offload.c | 107 +++++++++++++++++++++++++++++++++--------
net/ipv6/udp_offload.c | 6 +--
4 files changed, 90 insertions(+), 26 deletions(-)
diff --git a/include/linux/udp.h b/include/linux/udp.h
index 56a321a55ba1..27dea956ef6e 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -128,7 +128,7 @@ static inline bool udp_get_gro_in_use(struct sock *sk)
static inline bool udp_want_gro(struct sock *sk)
{
- return udp_sk(sk)->gro_receive;
+ return udp_sk(sk)->gro_receive || udp_sk(sk)->gso_size;
}
#define udp_portaddr_for_each_entry(__sk, list) \
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 5ac794230013..871ee55afd96 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2450,6 +2450,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
if (val < 0 || val > USHRT_MAX)
return -EINVAL;
up->gso_size = val;
+ udp_update_gro_in_use(sk, udp_want_gro(sk));
break;
/*
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 08b225adf763..4ff150bb84de 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -347,6 +347,54 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
return segs;
}
+#define UDO_GRO_CNT_MAX 64
+static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
+ struct sk_buff *skb)
+{
+ struct udphdr *uh = udp_hdr(skb);
+ struct sk_buff *pp = NULL;
+ struct udphdr *uh2;
+ struct sk_buff *p;
+
+ /* requires non zero csum, for simmetry with GSO */
+ if (!uh->check) {
+ NAPI_GRO_CB(skb)->flush = 1;
+ return NULL;
+ }
+
+ /* pull encapsulating udp header */
+ skb_gro_pull(skb, sizeof(struct udphdr));
+ skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr));
+
+ list_for_each_entry(p, head, list) {
+ if (!NAPI_GRO_CB(p)->same_flow)
+ continue;
+
+ uh2 = udp_hdr(p);
+
+ /* Match ports only, as csum is always non zero */
+ if ((*(u32 *)&uh->source != *(u32 *)&uh2->source)) {
+ NAPI_GRO_CB(p)->same_flow = 0;
+ continue;
+ }
+
+ /* Terminate the flow on len mismatch or if it grow "too much".
+ * Under small packet flood GRO count could elsewhere grow a lot
+ * leading to execessive truesize values
+ */
+ if (!skb_gro_receive(p, skb) &&
+ NAPI_GRO_CB(p)->count > UDO_GRO_CNT_MAX)
+ pp = p;
+ else if (uh->len != uh2->len)
+ pp = p;
+
+ return pp;
+ }
+
+ /* mismatch, but we never need to flush */
+ return NULL;
+}
+
struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
struct udphdr *uh, udp_lookup_t lookup)
{
@@ -357,23 +405,29 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
int flush = 1;
struct sock *sk;
+ rcu_read_lock();
+ sk = (*lookup)(skb, uh->source, uh->dest);
+ if (!sk)
+ goto out_unlock;
+
+ if (udp_sk(sk)->gso_size) {
+ pp = call_gro_receive(udp_gro_receive_segment, head, skb);
+ rcu_read_unlock();
+ return pp;
+ }
+
if (NAPI_GRO_CB(skb)->encap_mark ||
(skb->ip_summed != CHECKSUM_PARTIAL &&
NAPI_GRO_CB(skb)->csum_cnt == 0 &&
!NAPI_GRO_CB(skb)->csum_valid))
- goto out;
+ goto out_unlock;
/* mark that this skb passed once through the tunnel gro layer */
NAPI_GRO_CB(skb)->encap_mark = 1;
- rcu_read_lock();
- sk = (*lookup)(skb, uh->source, uh->dest);
-
- if (sk && udp_sk(sk)->gro_receive)
- goto unflush;
- goto out_unlock;
+ if (!udp_sk(sk)->gro_receive)
+ goto out_unlock;
-unflush:
flush = 0;
list_for_each_entry(p, head, list) {
@@ -398,7 +452,6 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
out_unlock:
rcu_read_unlock();
-out:
skb_gro_flush_final(skb, pp, flush);
return pp;
}
@@ -431,6 +484,19 @@ static struct sk_buff *udp4_gro_receive(struct list_head *head,
return NULL;
}
+static int udp_gro_complete_segment(struct sk_buff *skb)
+{
+ struct udphdr *uh = udp_hdr(skb);
+
+ skb->csum_start = (unsigned char *)uh - skb->head;
+ skb->csum_offset = offsetof(struct udphdr, check);
+ skb->ip_summed = CHECKSUM_PARTIAL;
+
+ skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
+ skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_L4;
+ return 0;
+}
+
int udp_gro_complete(struct sk_buff *skb, int nhoff,
udp_lookup_t lookup)
{
@@ -441,16 +507,21 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff,
uh->len = newlen;
- /* Set encapsulation before calling into inner gro_complete() functions
- * to make them set up the inner offsets.
- */
- skb->encapsulation = 1;
-
rcu_read_lock();
sk = (*lookup)(skb, uh->source, uh->dest);
- if (sk && udp_sk(sk)->gro_complete)
+ if (sk && udp_sk(sk)->gso_size) {
+ err = udp_gro_complete_segment(skb);
+ } else if (sk && udp_sk(sk)->gro_complete) {
+ skb_shinfo(skb)->gso_type = uh->check ? SKB_GSO_UDP_TUNNEL_CSUM
+ : SKB_GSO_UDP_TUNNEL;
+
+ /* Set encapsulation before calling into inner gro_complete()
+ * functions to make them set up the inner offsets.
+ */
+ skb->encapsulation = 1;
err = udp_sk(sk)->gro_complete(sk, skb,
nhoff + sizeof(struct udphdr));
+ }
rcu_read_unlock();
if (skb->remcsum_offload)
@@ -465,13 +536,9 @@ static int udp4_gro_complete(struct sk_buff *skb, int nhoff)
const struct iphdr *iph = ip_hdr(skb);
struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
- if (uh->check) {
- skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM;
+ if (uh->check)
uh->check = ~udp_v4_check(skb->len - nhoff, iph->saddr,
iph->daddr, 0);
- } else {
- skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL;
- }
return udp_gro_complete(skb, nhoff, udp4_lib_lookup_skb);
}
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index 1df968a3e788..f2731b7b4c75 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -147,13 +147,9 @@ static int udp6_gro_complete(struct sk_buff *skb, int nhoff)
const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
- if (uh->check) {
- skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM;
+ if (uh->check)
uh->check = ~udp_v6_check(skb->len - nhoff, &ipv6h->saddr,
&ipv6h->daddr, 0);
- } else {
- skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL;
- }
return udp_gro_complete(skb, nhoff, udp6_lib_lookup_skb);
}
--
2.17.1
^ permalink raw reply related
* [RFC PATCH 4/4] selftests: add GRO support, fix port option processing
From: Paolo Abeni @ 2018-09-14 15:43 UTC (permalink / raw)
To: netdev; +Cc: David S. Miller, Willem de Bruijn, Steffen Klassert
In-Reply-To: <cover.1536939423.git.pabeni@redhat.com>
Not a full test-case yet, but allows triggering the UDP GSO code
path.
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
tools/testing/selftests/net/udpgso_bench_rx.c | 18 ++++++++++++++++--
1 file changed, 16 insertions(+), 2 deletions(-)
diff --git a/tools/testing/selftests/net/udpgso_bench_rx.c b/tools/testing/selftests/net/udpgso_bench_rx.c
index 727cf67a3f75..f8bb7ea6bd25 100644
--- a/tools/testing/selftests/net/udpgso_bench_rx.c
+++ b/tools/testing/selftests/net/udpgso_bench_rx.c
@@ -31,9 +31,14 @@
#include <sys/wait.h>
#include <unistd.h>
+#ifndef UDP_SEGMENT
+#define UDP_SEGMENT 103
+#endif
+
static int cfg_port = 8000;
static bool cfg_tcp;
static bool cfg_verify;
+static bool cfg_gro_segment;
static bool interrupted;
static unsigned long packets, bytes;
@@ -199,10 +204,13 @@ static void parse_opts(int argc, char **argv)
{
int c;
- while ((c = getopt(argc, argv, "ptv")) != -1) {
+ while ((c = getopt(argc, argv, "p:Stv")) != -1) {
switch (c) {
case 'p':
- cfg_port = htons(strtoul(optarg, NULL, 0));
+ cfg_port = strtoul(optarg, NULL, 0);
+ break;
+ case 'S':
+ cfg_gro_segment = true;
break;
case 't':
cfg_tcp = true;
@@ -227,6 +235,12 @@ static void do_recv(void)
fd = do_socket(cfg_tcp);
+ if (cfg_gro_segment) {
+ int val = 1;
+ if (setsockopt(fd, IPPROTO_UDP, UDP_SEGMENT, &val, sizeof(val)))
+ error(1, errno, "setsockopt UDP_SEGMENT");
+ }
+
treport = gettimeofday_ms() + 1000;
do {
do_poll(fd);
--
2.17.1
^ permalink raw reply related
* Re: [PATCH net] net/sched: act_sample: fix NULL dereference in the data path
From: David Miller @ 2018-09-14 15:47 UTC (permalink / raw)
To: dcaratti; +Cc: mcroce, jhs, xiyou.wangcong, jiri, netdev
In-Reply-To: <2fcef6665503c5e3b17676daf45c4166cf130cdb.1536919144.git.dcaratti@redhat.com>
From: Davide Caratti <dcaratti@redhat.com>
Date: Fri, 14 Sep 2018 12:03:18 +0200
> Matteo reported the following splat, testing the datapath of TC 'sample':
...
> tcf_sample_act() tried to update its per-cpu stats, but tcf_sample_init()
> forgot to allocate them, because tcf_idr_create() was called with a wrong
> value of 'cpustats'. Setting it to true proved to fix the reported crash.
>
> Reported-by: Matteo Croce <mcroce@redhat.com>
> Fixes: 65a206c01e8e ("net/sched: Change act_api and act_xxx modules to use IDR")
> Fixes: 5c5670fae430 ("net/sched: Introduce sample tc action")
> Tested-by: Matteo Croce <mcroce@redhat.com>
> Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Applied and queued up for -stable, thanks.
^ permalink raw reply
* Re: [PATCH net-next] cxgb4: update supported DCB version
From: David Miller @ 2018-09-14 15:52 UTC (permalink / raw)
To: ganeshgr; +Cc: netdev, nirranjan, indranil, dt, varun
In-Reply-To: <1536926755-29242-1-git-send-email-ganeshgr@chelsio.com>
From: Ganesh Goudar <ganeshgr@chelsio.com>
Date: Fri, 14 Sep 2018 17:35:55 +0530
> - In CXGB4_DCB_STATE_FW_INCOMPLETE state check if the dcb
> version is changed and update the dcb supported version.
>
> - Also, fill the priority code point value for priority
> based flow control.
>
> Signed-off-by: Ganesh Goudar <ganeshgr@chelsio.com>
Applied, thank you.
^ permalink raw reply
* Re: [PATCH] net/mlx4_core: print firmware version during driver loading
From: David Miller @ 2018-09-14 21:09 UTC (permalink / raw)
To: qing.huang; +Cc: leon, netdev, linux-rdma, linux-kernel, tariqt
In-Reply-To: <c580ad9d-b63d-743b-2278-1c4cf3553186@oracle.com>
From: Qing Huang <qing.huang@oracle.com>
Date: Fri, 14 Sep 2018 10:15:48 -0700
> IMHO, this information is very useful and only takes up very little
> log file space. :-)
If it's critical then the log is the wrong place for it as the log
is lossy.
The proper place to obtain this information is via the fw_version
field of the ethtool_drvinfo struct. This can be obtained at any time
and is reliable. And if it isn't reliable or correct, we must fix
that.
^ permalink raw reply
* Re: [PATCH] net/mlx4_core: print firmware version during driver loading
From: David Miller @ 2018-09-14 21:14 UTC (permalink / raw)
To: qing.huang; +Cc: andrew, leon, netdev, linux-rdma, linux-kernel, tariqt
In-Reply-To: <c332f10a-5ad0-53fd-5111-1964443b3092@oracle.com>
From: Qing Huang <qing.huang@oracle.com>
Date: Fri, 14 Sep 2018 11:33:40 -0700
>
>
> On 9/14/2018 11:17 AM, Andrew Lunn wrote:
>> On Fri, Sep 14, 2018 at 10:15:48AM -0700, Qing Huang wrote:
>>> The FW version is actually a very crucial piece of information and
>>> only
>>> printed once here
>>> when the driver is loaded. People tend to get confused when switching
>>> multiple FW files
>>> back and forth without running separate utility tools, especially at
>>> customer sites.
>>> IMHO, this information is very useful and only takes up very little
>>> log file
>>> space. :-)
>> Why not use ethtool -i ?
>>
>> $ sudo ethtool -i eth0
>> driver: r8169
>> version: 2.3LK-NAPI
>> firmware-version: rtl8168g-2_0.0.1 02/06/13
>>
>> Andrew
> Sure. You can also use ibstat or ibv_devinfo tool if they are
> installed. But it's not very
> convenient in some cases.
>
> E.g.
> A customer upgrades FW on HCAs and encounters issues. During triage,
> it's much easier
> to study customer uploaded log files when remotely testing different
> FW files.
Not a valid argument. You can print the ethtool output from initramfs
if necessary for triage.
I still stand by the fact that ethtool is the only fully reliable way
to obtain this information, the kernel log is not.
^ permalink raw reply
* Re: [PATCH] net/mlx4_core: print firmware version during driver loading
From: David Miller @ 2018-09-14 21:14 UTC (permalink / raw)
To: andrew; +Cc: qing.huang, leon, netdev, linux-rdma, linux-kernel, tariqt
In-Reply-To: <20180914181718.GD3811@lunn.ch>
From: Andrew Lunn <andrew@lunn.ch>
Date: Fri, 14 Sep 2018 20:17:18 +0200
> On Fri, Sep 14, 2018 at 10:15:48AM -0700, Qing Huang wrote:
>> The FW version is actually a very crucial piece of information and only
>> printed once here
>> when the driver is loaded. People tend to get confused when switching
>> multiple FW files
>> back and forth without running separate utility tools, especially at
>> customer sites.
>> IMHO, this information is very useful and only takes up very little log file
>> space. :-)
>
> Why not use ethtool -i ?
>
> $ sudo ethtool -i eth0
> driver: r8169
> version: 2.3LK-NAPI
> firmware-version: rtl8168g-2_0.0.1 02/06/13
+1
^ permalink raw reply
* Re: Project Financing
From: Gabriel Walker @ 2018-09-14 16:13 UTC (permalink / raw)
Thank you for your time,
We are looking for clients in your country with good business or project that requires financing to execute.
Do get back to me if you are interested in this or you know anybody who has good business ideas but lack the necessary capital to fund his projects so we can establish working relationship.
Sincerely,
John Hanan, MBA, CFA
General Investment Consultant
^ permalink raw reply
* Re: [PATCH net] pppoe: fix reception of frames with no mac header
From: Alexander Potapenko @ 2018-09-14 16:21 UTC (permalink / raw)
To: g.nault; +Cc: Networking, mostrows, Eric Dumazet
In-Reply-To: <20180914143459.GC1507@alphalink.fr>
On Fri, Sep 14, 2018 at 4:35 PM Guillaume Nault <g.nault@alphalink.fr> wrote:
>
> On Fri, Sep 14, 2018 at 04:28:05PM +0200, Guillaume Nault wrote:
> > pppoe_rcv() needs to look back at the Ethernet header in order to
> > lookup the PPPoE session. Therefore we need to ensure that the mac
> > header is big enough to contain an Ethernet header. Otherwise
> > eth_hdr(skb)->h_source might access invalid data.
> >
> Forgot to Cc Alexander :/
> Sorry...
> BTW, thanks for your first analysis.
Thank you!
--
Alexander Potapenko
Software Engineer
Google Germany GmbH
Erika-Mann-Straße, 33
80636 München
Geschäftsführer: Paul Manicle, Halimah DeLaine Prado
Registergericht und -nummer: Hamburg, HRB 86891
Sitz der Gesellschaft: Hamburg
^ permalink raw reply
* [PATCH net-next v4 12/20] zinc: BLAKE2s generic C implementation and selftest
From: Jason A. Donenfeld @ 2018-09-14 16:22 UTC (permalink / raw)
To: linux-kernel, netdev, linux-crypto, davem, gregkh
Cc: Jason A. Donenfeld, Samuel Neves, Andy Lutomirski,
Jean-Philippe Aumasson
In-Reply-To: <20180914162240.7925-1-Jason@zx2c4.com>
The C implementation was originally based on Samuel Neves' public
domain reference implementation but has since been heavily modified
for the kernel. We're able to do compile-time optimizations by moving
some scaffolding around the final function into the header file.
Information: https://blake2.net/
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Samuel Neves <sneves@dei.uc.pt>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Jean-Philippe Aumasson <jeanphilippe.aumasson@gmail.com>
---
include/zinc/blake2s.h | 101 ++
lib/zinc/Kconfig | 4 +
lib/zinc/Makefile | 4 +
lib/zinc/blake2s/blake2s.c | 274 +++++
lib/zinc/main.c | 5 +
lib/zinc/selftest/blake2s.h | 2095 +++++++++++++++++++++++++++++++++++
6 files changed, 2483 insertions(+)
create mode 100644 include/zinc/blake2s.h
create mode 100644 lib/zinc/blake2s/blake2s.c
create mode 100644 lib/zinc/selftest/blake2s.h
diff --git a/include/zinc/blake2s.h b/include/zinc/blake2s.h
new file mode 100644
index 000000000000..5e32d576e86a
--- /dev/null
+++ b/include/zinc/blake2s.h
@@ -0,0 +1,101 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (C) 2015-2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#ifndef _ZINC_BLAKE2S_H
+#define _ZINC_BLAKE2S_H
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <crypto/algapi.h>
+
+enum blake2s_lengths {
+ BLAKE2S_BLOCKBYTES = 64,
+ BLAKE2S_OUTBYTES = 32,
+ BLAKE2S_KEYBYTES = 32
+};
+
+struct blake2s_state {
+ u32 h[8];
+ u32 t[2];
+ u32 f[2];
+ u8 buf[BLAKE2S_BLOCKBYTES];
+ size_t buflen;
+ u8 last_node;
+};
+
+void blake2s_init(struct blake2s_state *state, const size_t outlen);
+void blake2s_init_key(struct blake2s_state *state, const size_t outlen,
+ const void *key, const size_t keylen);
+void blake2s_update(struct blake2s_state *state, const u8 *in, size_t inlen);
+void __blake2s_final(struct blake2s_state *state);
+static inline void blake2s_final(struct blake2s_state *state, u8 *out,
+ const size_t outlen)
+{
+ int i;
+
+#ifdef DEBUG
+ BUG_ON(!out || !outlen || outlen > BLAKE2S_OUTBYTES);
+#endif
+ __blake2s_final(state);
+
+ if (__builtin_constant_p(outlen) && !(outlen % sizeof(u32))) {
+ if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ||
+ IS_ALIGNED((unsigned long)out, __alignof__(u32))) {
+ __le32 *outwords = (__le32 *)out;
+
+ for (i = 0; i < outlen / sizeof(u32); ++i)
+ outwords[i] = cpu_to_le32(state->h[i]);
+ } else {
+ __le32 buffer[BLAKE2S_OUTBYTES];
+
+ for (i = 0; i < outlen / sizeof(u32); ++i)
+ buffer[i] = cpu_to_le32(state->h[i]);
+ memcpy(out, buffer, outlen);
+ memzero_explicit(buffer, sizeof(buffer));
+ }
+ } else {
+ u8 buffer[BLAKE2S_OUTBYTES] __aligned(__alignof__(u32));
+ __le32 *outwords = (__le32 *)buffer;
+
+ for (i = 0; i < 8; ++i)
+ outwords[i] = cpu_to_le32(state->h[i]);
+ memcpy(out, buffer, outlen);
+ memzero_explicit(buffer, sizeof(buffer));
+ }
+
+ memzero_explicit(state, sizeof(*state));
+}
+
+static inline void blake2s(u8 *out, const u8 *in, const u8 *key,
+ const size_t outlen, const size_t inlen,
+ const size_t keylen)
+{
+ struct blake2s_state state;
+
+#ifdef DEBUG
+ BUG_ON((!in && inlen > 0) || !out || !outlen ||
+ outlen > BLAKE2S_OUTBYTES || keylen > BLAKE2S_KEYBYTES ||
+ (!key && keylen));
+#endif
+
+ if (keylen)
+ blake2s_init_key(&state, outlen, key, keylen);
+ else
+ blake2s_init(&state, outlen);
+
+ blake2s_update(&state, in, inlen);
+ blake2s_final(&state, out, outlen);
+}
+
+void blake2s_hmac(u8 *out, const u8 *in, const u8 *key, const size_t outlen,
+ const size_t inlen, const size_t keylen);
+
+void blake2s_fpu_init(void);
+
+#ifdef DEBUG
+bool blake2s_selftest(void);
+#endif
+
+#endif /* _ZINC_BLAKE2S_H */
diff --git a/lib/zinc/Kconfig b/lib/zinc/Kconfig
index 98d095ed2418..4db30012c2d6 100644
--- a/lib/zinc/Kconfig
+++ b/lib/zinc/Kconfig
@@ -17,6 +17,10 @@ config ZINC_CHACHA20POLY1305
select ZINC_POLY1305
select CRYPTO_BLKCIPHER
+config ZINC_BLAKE2S
+ bool
+ select ZINC
+
config ZINC_DEBUG
bool "Zinc cryptography library debugging and self-tests"
depends on ZINC
diff --git a/lib/zinc/Makefile b/lib/zinc/Makefile
index 1664871aaf71..45817ec5539c 100644
--- a/lib/zinc/Makefile
+++ b/lib/zinc/Makefile
@@ -51,6 +51,10 @@ ifeq ($(CONFIG_ZINC_CHACHA20POLY1305),y)
zinc-y += chacha20poly1305.o
endif
+ifeq ($(CONFIG_ZINC_BLAKE2S),y)
+zinc-y += blake2s/blake2s.o
+endif
+
zinc-y += main.o
obj-$(CONFIG_ZINC) := zinc.o
diff --git a/lib/zinc/blake2s/blake2s.c b/lib/zinc/blake2s/blake2s.c
new file mode 100644
index 000000000000..13765d327589
--- /dev/null
+++ b/lib/zinc/blake2s/blake2s.c
@@ -0,0 +1,274 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (C) 2012 Samuel Neves <sneves@dei.uc.pt>. All Rights Reserved.
+ * Copyright (C) 2015-2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ *
+ * This is an implementation of the BLAKE2s hash and PRF functions.
+ *
+ * Information: https://blake2.net/
+ *
+ */
+
+#include <zinc/blake2s.h>
+
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/bug.h>
+#include <asm/unaligned.h>
+
+typedef union {
+ struct {
+ u8 digest_length;
+ u8 key_length;
+ u8 fanout;
+ u8 depth;
+ u32 leaf_length;
+ u32 node_offset;
+ u16 xof_length;
+ u8 node_depth;
+ u8 inner_length;
+ u8 salt[8];
+ u8 personal[8];
+ };
+ __le32 words[8];
+} __packed blake2s_param;
+
+static const u32 blake2s_iv[8] = {
+ 0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL,
+ 0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL
+};
+
+static const u8 blake2s_sigma[10][16] = {
+ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
+ { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 },
+ { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 },
+ { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 },
+ { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 },
+ { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 },
+ { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 },
+ { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 },
+ { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 },
+ { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 },
+};
+
+static inline void blake2s_set_lastblock(struct blake2s_state *state)
+{
+ if (state->last_node)
+ state->f[1] = -1;
+ state->f[0] = -1;
+}
+
+static inline void blake2s_increment_counter(struct blake2s_state *state,
+ const u32 inc)
+{
+ state->t[0] += inc;
+ state->t[1] += (state->t[0] < inc);
+}
+
+static inline void blake2s_init_param(struct blake2s_state *state,
+ const blake2s_param *param)
+{
+ int i;
+
+ memset(state, 0, sizeof(*state));
+ for (i = 0; i < 8; ++i)
+ state->h[i] = blake2s_iv[i] ^ le32_to_cpu(param->words[i]);
+}
+
+void blake2s_init(struct blake2s_state *state, const size_t outlen)
+{
+ blake2s_param param __aligned(__alignof__(u32)) = {
+ .digest_length = outlen,
+ .fanout = 1,
+ .depth = 1
+ };
+
+#ifdef DEBUG
+ BUG_ON(!outlen || outlen > BLAKE2S_OUTBYTES);
+#endif
+ blake2s_init_param(state, ¶m);
+}
+EXPORT_SYMBOL(blake2s_init);
+
+void blake2s_init_key(struct blake2s_state *state, const size_t outlen,
+ const void *key, const size_t keylen)
+{
+ blake2s_param param = { .digest_length = outlen,
+ .key_length = keylen,
+ .fanout = 1,
+ .depth = 1 };
+ u8 block[BLAKE2S_BLOCKBYTES] = { 0 };
+
+#ifdef DEBUG
+ BUG_ON(!outlen || outlen > BLAKE2S_OUTBYTES || !key || !keylen ||
+ keylen > BLAKE2S_KEYBYTES);
+#endif
+ blake2s_init_param(state, ¶m);
+ memcpy(block, key, keylen);
+ blake2s_update(state, block, BLAKE2S_BLOCKBYTES);
+ memzero_explicit(block, BLAKE2S_BLOCKBYTES);
+}
+EXPORT_SYMBOL(blake2s_init_key);
+
+#ifndef HAVE_BLAKE2S_ARCH_IMPLEMENTATION
+void __init blake2s_fpu_init(void)
+{
+}
+static inline bool blake2s_arch(struct blake2s_state *state, const u8 *block,
+ const size_t nblocks, const u32 inc)
+{
+ return false;
+}
+#endif
+
+static inline void blake2s_compress(struct blake2s_state *state,
+ const u8 *block, size_t nblocks,
+ const u32 inc)
+{
+ u32 m[16];
+ u32 v[16];
+ int i;
+
+#ifdef DEBUG
+ BUG_ON(nblocks > 1 && inc != BLAKE2S_BLOCKBYTES);
+#endif
+
+ if (blake2s_arch(state, block, nblocks, inc))
+ return;
+
+ while (nblocks > 0) {
+ blake2s_increment_counter(state, inc);
+
+#ifdef __LITTLE_ENDIAN
+ memcpy(m, block, BLAKE2S_BLOCKBYTES);
+#else
+ for (i = 0; i < 16; ++i)
+ m[i] = get_unaligned_le32(block + i * sizeof(m[i]));
+#endif
+ memcpy(v, state->h, 32);
+ v[ 8] = blake2s_iv[0];
+ v[ 9] = blake2s_iv[1];
+ v[10] = blake2s_iv[2];
+ v[11] = blake2s_iv[3];
+ v[12] = blake2s_iv[4] ^ state->t[0];
+ v[13] = blake2s_iv[5] ^ state->t[1];
+ v[14] = blake2s_iv[6] ^ state->f[0];
+ v[15] = blake2s_iv[7] ^ state->f[1];
+
+#define G(r, i, a, b, c, d) do { \
+ a += b + m[blake2s_sigma[r][2 * i + 0]]; \
+ d = ror32(d ^ a, 16); \
+ c += d; \
+ b = ror32(b ^ c, 12); \
+ a += b + m[blake2s_sigma[r][2 * i + 1]]; \
+ d = ror32(d ^ a, 8); \
+ c += d; \
+ b = ror32(b ^ c, 7); \
+} while (0)
+
+#define ROUND(r) do { \
+ G(r, 0, v[0], v[ 4], v[ 8], v[12]); \
+ G(r, 1, v[1], v[ 5], v[ 9], v[13]); \
+ G(r, 2, v[2], v[ 6], v[10], v[14]); \
+ G(r, 3, v[3], v[ 7], v[11], v[15]); \
+ G(r, 4, v[0], v[ 5], v[10], v[15]); \
+ G(r, 5, v[1], v[ 6], v[11], v[12]); \
+ G(r, 6, v[2], v[ 7], v[ 8], v[13]); \
+ G(r, 7, v[3], v[ 4], v[ 9], v[14]); \
+} while (0)
+ ROUND(0);
+ ROUND(1);
+ ROUND(2);
+ ROUND(3);
+ ROUND(4);
+ ROUND(5);
+ ROUND(6);
+ ROUND(7);
+ ROUND(8);
+ ROUND(9);
+
+#undef G
+#undef ROUND
+
+ for (i = 0; i < 8; ++i)
+ state->h[i] ^= v[i] ^ v[i + 8];
+
+ block += BLAKE2S_BLOCKBYTES;
+ --nblocks;
+ }
+}
+
+void blake2s_update(struct blake2s_state *state, const u8 *in, size_t inlen)
+{
+ const size_t fill = BLAKE2S_BLOCKBYTES - state->buflen;
+
+ if (unlikely(!inlen))
+ return;
+ if (inlen > fill) {
+ memcpy(state->buf + state->buflen, in, fill);
+ blake2s_compress(state, state->buf, 1, BLAKE2S_BLOCKBYTES);
+ state->buflen = 0;
+ in += fill;
+ inlen -= fill;
+ }
+ if (inlen > BLAKE2S_BLOCKBYTES) {
+ const size_t nblocks =
+ (inlen + BLAKE2S_BLOCKBYTES - 1) / BLAKE2S_BLOCKBYTES;
+ /* Hash one less (full) block than strictly possible */
+ blake2s_compress(state, in, nblocks - 1, BLAKE2S_BLOCKBYTES);
+ in += BLAKE2S_BLOCKBYTES * (nblocks - 1);
+ inlen -= BLAKE2S_BLOCKBYTES * (nblocks - 1);
+ }
+ memcpy(state->buf + state->buflen, in, inlen);
+ state->buflen += inlen;
+}
+EXPORT_SYMBOL(blake2s_update);
+
+void __blake2s_final(struct blake2s_state *state)
+{
+ blake2s_set_lastblock(state);
+ memset(state->buf + state->buflen, 0,
+ BLAKE2S_BLOCKBYTES - state->buflen); /* Padding */
+ blake2s_compress(state, state->buf, 1, state->buflen);
+}
+EXPORT_SYMBOL(__blake2s_final);
+
+void blake2s_hmac(u8 *out, const u8 *in, const u8 *key, const size_t outlen,
+ const size_t inlen, const size_t keylen)
+{
+ struct blake2s_state state;
+ u8 x_key[BLAKE2S_BLOCKBYTES] __aligned(__alignof__(u32)) = { 0 };
+ u8 i_hash[BLAKE2S_OUTBYTES] __aligned(__alignof__(u32));
+ int i;
+
+ if (keylen > BLAKE2S_BLOCKBYTES) {
+ blake2s_init(&state, BLAKE2S_OUTBYTES);
+ blake2s_update(&state, key, keylen);
+ blake2s_final(&state, x_key, BLAKE2S_OUTBYTES);
+ } else
+ memcpy(x_key, key, keylen);
+
+ for (i = 0; i < BLAKE2S_BLOCKBYTES; ++i)
+ x_key[i] ^= 0x36;
+
+ blake2s_init(&state, BLAKE2S_OUTBYTES);
+ blake2s_update(&state, x_key, BLAKE2S_BLOCKBYTES);
+ blake2s_update(&state, in, inlen);
+ blake2s_final(&state, i_hash, BLAKE2S_OUTBYTES);
+
+ for (i = 0; i < BLAKE2S_BLOCKBYTES; ++i)
+ x_key[i] ^= 0x5c ^ 0x36;
+
+ blake2s_init(&state, BLAKE2S_OUTBYTES);
+ blake2s_update(&state, x_key, BLAKE2S_BLOCKBYTES);
+ blake2s_update(&state, i_hash, BLAKE2S_OUTBYTES);
+ blake2s_final(&state, i_hash, BLAKE2S_OUTBYTES);
+
+ memcpy(out, i_hash, outlen);
+ memzero_explicit(x_key, BLAKE2S_BLOCKBYTES);
+ memzero_explicit(i_hash, BLAKE2S_OUTBYTES);
+}
+EXPORT_SYMBOL(blake2s_hmac);
+
+#include "../selftest/blake2s.h"
diff --git a/lib/zinc/main.c b/lib/zinc/main.c
index a15b27fd0e4c..b1d650d82edb 100644
--- a/lib/zinc/main.c
+++ b/lib/zinc/main.c
@@ -6,6 +6,7 @@
#include <zinc/chacha20poly1305.h>
#include <zinc/chacha20.h>
#include <zinc/poly1305.h>
+#include <zinc/blake2s.h>
#include <linux/init.h>
#include <linux/module.h>
@@ -30,6 +31,10 @@ static int __init mod_init(void)
#endif
#ifdef CONFIG_ZINC_CHACHA20POLY1305
selftest(chacha20poly1305);
+#endif
+#ifdef CONFIG_ZINC_BLAKE2S
+ blake2s_fpu_init();
+ selftest(blake2s);
#endif
return 0;
}
diff --git a/lib/zinc/selftest/blake2s.h b/lib/zinc/selftest/blake2s.h
new file mode 100644
index 000000000000..ddd6a862b344
--- /dev/null
+++ b/lib/zinc/selftest/blake2s.h
@@ -0,0 +1,2095 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (C) 2015-2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#ifdef DEBUG
+static const u8 blake2s_testvecs[][BLAKE2S_OUTBYTES] __initconst = {
+ { 0x69, 0x21, 0x7a, 0x30, 0x79, 0x90, 0x80, 0x94,
+ 0xe1, 0x11, 0x21, 0xd0, 0x42, 0x35, 0x4a, 0x7c,
+ 0x1f, 0x55, 0xb6, 0x48, 0x2c, 0xa1, 0xa5, 0x1e,
+ 0x1b, 0x25, 0x0d, 0xfd, 0x1e, 0xd0, 0xee, 0xf9 },
+ { 0xe3, 0x4d, 0x74, 0xdb, 0xaf, 0x4f, 0xf4, 0xc6,
+ 0xab, 0xd8, 0x71, 0xcc, 0x22, 0x04, 0x51, 0xd2,
+ 0xea, 0x26, 0x48, 0x84, 0x6c, 0x77, 0x57, 0xfb,
+ 0xaa, 0xc8, 0x2f, 0xe5, 0x1a, 0xd6, 0x4b, 0xea },
+ { 0xdd, 0xad, 0x9a, 0xb1, 0x5d, 0xac, 0x45, 0x49,
+ 0xba, 0x42, 0xf4, 0x9d, 0x26, 0x24, 0x96, 0xbe,
+ 0xf6, 0xc0, 0xba, 0xe1, 0xdd, 0x34, 0x2a, 0x88,
+ 0x08, 0xf8, 0xea, 0x26, 0x7c, 0x6e, 0x21, 0x0c },
+ { 0xe8, 0xf9, 0x1c, 0x6e, 0xf2, 0x32, 0xa0, 0x41,
+ 0x45, 0x2a, 0xb0, 0xe1, 0x49, 0x07, 0x0c, 0xdd,
+ 0x7d, 0xd1, 0x76, 0x9e, 0x75, 0xb3, 0xa5, 0x92,
+ 0x1b, 0xe3, 0x78, 0x76, 0xc4, 0x5c, 0x99, 0x00 },
+ { 0x0c, 0xc7, 0x0e, 0x00, 0x34, 0x8b, 0x86, 0xba,
+ 0x29, 0x44, 0xd0, 0xc3, 0x20, 0x38, 0xb2, 0x5c,
+ 0x55, 0x58, 0x4f, 0x90, 0xdf, 0x23, 0x04, 0xf5,
+ 0x5f, 0xa3, 0x32, 0xaf, 0x5f, 0xb0, 0x1e, 0x20 },
+ { 0xec, 0x19, 0x64, 0x19, 0x10, 0x87, 0xa4, 0xfe,
+ 0x9d, 0xf1, 0xc7, 0x95, 0x34, 0x2a, 0x02, 0xff,
+ 0xc1, 0x91, 0xa5, 0xb2, 0x51, 0x76, 0x48, 0x56,
+ 0xae, 0x5b, 0x8b, 0x57, 0x69, 0xf0, 0xc6, 0xcd },
+ { 0xe1, 0xfa, 0x51, 0x61, 0x8d, 0x7d, 0xf4, 0xeb,
+ 0x70, 0xcf, 0x0d, 0x5a, 0x9e, 0x90, 0x6f, 0x80,
+ 0x6e, 0x9d, 0x19, 0xf7, 0xf4, 0xf0, 0x1e, 0x3b,
+ 0x62, 0x12, 0x88, 0xe4, 0x12, 0x04, 0x05, 0xd6 },
+ { 0x59, 0x80, 0x01, 0xfa, 0xfb, 0xe8, 0xf9, 0x4e,
+ 0xc6, 0x6d, 0xc8, 0x27, 0xd0, 0x12, 0xcf, 0xcb,
+ 0xba, 0x22, 0x28, 0x56, 0x9f, 0x44, 0x8e, 0x89,
+ 0xea, 0x22, 0x08, 0xc8, 0xbf, 0x76, 0x92, 0x93 },
+ { 0xc7, 0xe8, 0x87, 0xb5, 0x46, 0x62, 0x36, 0x35,
+ 0xe9, 0x3e, 0x04, 0x95, 0x59, 0x8f, 0x17, 0x26,
+ 0x82, 0x19, 0x96, 0xc2, 0x37, 0x77, 0x05, 0xb9,
+ 0x3a, 0x1f, 0x63, 0x6f, 0x87, 0x2b, 0xfa, 0x2d },
+ { 0xc3, 0x15, 0xa4, 0x37, 0xdd, 0x28, 0x06, 0x2a,
+ 0x77, 0x0d, 0x48, 0x19, 0x67, 0x13, 0x6b, 0x1b,
+ 0x5e, 0xb8, 0x8b, 0x21, 0xee, 0x53, 0xd0, 0x32,
+ 0x9c, 0x58, 0x97, 0x12, 0x6e, 0x9d, 0xb0, 0x2c },
+ { 0xbb, 0x47, 0x3d, 0xed, 0xdc, 0x05, 0x5f, 0xea,
+ 0x62, 0x28, 0xf2, 0x07, 0xda, 0x57, 0x53, 0x47,
+ 0xbb, 0x00, 0x40, 0x4c, 0xd3, 0x49, 0xd3, 0x8c,
+ 0x18, 0x02, 0x63, 0x07, 0xa2, 0x24, 0xcb, 0xff },
+ { 0x68, 0x7e, 0x18, 0x73, 0xa8, 0x27, 0x75, 0x91,
+ 0xbb, 0x33, 0xd9, 0xad, 0xf9, 0xa1, 0x39, 0x12,
+ 0xef, 0xef, 0xe5, 0x57, 0xca, 0xfc, 0x39, 0xa7,
+ 0x95, 0x26, 0x23, 0xe4, 0x72, 0x55, 0xf1, 0x6d },
+ { 0x1a, 0xc7, 0xba, 0x75, 0x4d, 0x6e, 0x2f, 0x94,
+ 0xe0, 0xe8, 0x6c, 0x46, 0xbf, 0xb2, 0x62, 0xab,
+ 0xbb, 0x74, 0xf4, 0x50, 0xef, 0x45, 0x6d, 0x6b,
+ 0x4d, 0x97, 0xaa, 0x80, 0xce, 0x6d, 0xa7, 0x67 },
+ { 0x01, 0x2c, 0x97, 0x80, 0x96, 0x14, 0x81, 0x6b,
+ 0x5d, 0x94, 0x94, 0x47, 0x7d, 0x4b, 0x68, 0x7d,
+ 0x15, 0xb9, 0x6e, 0xb6, 0x9c, 0x0e, 0x80, 0x74,
+ 0xa8, 0x51, 0x6f, 0x31, 0x22, 0x4b, 0x5c, 0x98 },
+ { 0x91, 0xff, 0xd2, 0x6c, 0xfa, 0x4d, 0xa5, 0x13,
+ 0x4c, 0x7e, 0xa2, 0x62, 0xf7, 0x88, 0x9c, 0x32,
+ 0x9f, 0x61, 0xf6, 0xa6, 0x57, 0x22, 0x5c, 0xc2,
+ 0x12, 0xf4, 0x00, 0x56, 0xd9, 0x86, 0xb3, 0xf4 },
+ { 0xd9, 0x7c, 0x82, 0x8d, 0x81, 0x82, 0xa7, 0x21,
+ 0x80, 0xa0, 0x6a, 0x78, 0x26, 0x83, 0x30, 0x67,
+ 0x3f, 0x7c, 0x4e, 0x06, 0x35, 0x94, 0x7c, 0x04,
+ 0xc0, 0x23, 0x23, 0xfd, 0x45, 0xc0, 0xa5, 0x2d },
+ { 0xef, 0xc0, 0x4c, 0xdc, 0x39, 0x1c, 0x7e, 0x91,
+ 0x19, 0xbd, 0x38, 0x66, 0x8a, 0x53, 0x4e, 0x65,
+ 0xfe, 0x31, 0x03, 0x6d, 0x6a, 0x62, 0x11, 0x2e,
+ 0x44, 0xeb, 0xeb, 0x11, 0xf9, 0xc5, 0x70, 0x80 },
+ { 0x99, 0x2c, 0xf5, 0xc0, 0x53, 0x44, 0x2a, 0x5f,
+ 0xbc, 0x4f, 0xaf, 0x58, 0x3e, 0x04, 0xe5, 0x0b,
+ 0xb7, 0x0d, 0x2f, 0x39, 0xfb, 0xb6, 0xa5, 0x03,
+ 0xf8, 0x9e, 0x56, 0xa6, 0x3e, 0x18, 0x57, 0x8a },
+ { 0x38, 0x64, 0x0e, 0x9f, 0x21, 0x98, 0x3e, 0x67,
+ 0xb5, 0x39, 0xca, 0xcc, 0xae, 0x5e, 0xcf, 0x61,
+ 0x5a, 0xe2, 0x76, 0x4f, 0x75, 0xa0, 0x9c, 0x9c,
+ 0x59, 0xb7, 0x64, 0x83, 0xc1, 0xfb, 0xc7, 0x35 },
+ { 0x21, 0x3d, 0xd3, 0x4c, 0x7e, 0xfe, 0x4f, 0xb2,
+ 0x7a, 0x6b, 0x35, 0xf6, 0xb4, 0x00, 0x0d, 0x1f,
+ 0xe0, 0x32, 0x81, 0xaf, 0x3c, 0x72, 0x3e, 0x5c,
+ 0x9f, 0x94, 0x74, 0x7a, 0x5f, 0x31, 0xcd, 0x3b },
+ { 0xec, 0x24, 0x6e, 0xee, 0xb9, 0xce, 0xd3, 0xf7,
+ 0xad, 0x33, 0xed, 0x28, 0x66, 0x0d, 0xd9, 0xbb,
+ 0x07, 0x32, 0x51, 0x3d, 0xb4, 0xe2, 0xfa, 0x27,
+ 0x8b, 0x60, 0xcd, 0xe3, 0x68, 0x2a, 0x4c, 0xcd },
+ { 0xac, 0x9b, 0x61, 0xd4, 0x46, 0x64, 0x8c, 0x30,
+ 0x05, 0xd7, 0x89, 0x2b, 0xf3, 0xa8, 0x71, 0x9f,
+ 0x4c, 0x81, 0x81, 0xcf, 0xdc, 0xbc, 0x2b, 0x79,
+ 0xfe, 0xf1, 0x0a, 0x27, 0x9b, 0x91, 0x10, 0x95 },
+ { 0x7b, 0xf8, 0xb2, 0x29, 0x59, 0xe3, 0x4e, 0x3a,
+ 0x43, 0xf7, 0x07, 0x92, 0x23, 0xe8, 0x3a, 0x97,
+ 0x54, 0x61, 0x7d, 0x39, 0x1e, 0x21, 0x3d, 0xfd,
+ 0x80, 0x8e, 0x41, 0xb9, 0xbe, 0xad, 0x4c, 0xe7 },
+ { 0x68, 0xd4, 0xb5, 0xd4, 0xfa, 0x0e, 0x30, 0x2b,
+ 0x64, 0xcc, 0xc5, 0xaf, 0x79, 0x29, 0x13, 0xac,
+ 0x4c, 0x88, 0xec, 0x95, 0xc0, 0x7d, 0xdf, 0x40,
+ 0x69, 0x42, 0x56, 0xeb, 0x88, 0xce, 0x9f, 0x3d },
+ { 0xb2, 0xc2, 0x42, 0x0f, 0x05, 0xf9, 0xab, 0xe3,
+ 0x63, 0x15, 0x91, 0x93, 0x36, 0xb3, 0x7e, 0x4e,
+ 0x0f, 0xa3, 0x3f, 0xf7, 0xe7, 0x6a, 0x49, 0x27,
+ 0x67, 0x00, 0x6f, 0xdb, 0x5d, 0x93, 0x54, 0x62 },
+ { 0x13, 0x4f, 0x61, 0xbb, 0xd0, 0xbb, 0xb6, 0x9a,
+ 0xed, 0x53, 0x43, 0x90, 0x45, 0x51, 0xa3, 0xe6,
+ 0xc1, 0xaa, 0x7d, 0xcd, 0xd7, 0x7e, 0x90, 0x3e,
+ 0x70, 0x23, 0xeb, 0x7c, 0x60, 0x32, 0x0a, 0xa7 },
+ { 0x46, 0x93, 0xf9, 0xbf, 0xf7, 0xd4, 0xf3, 0x98,
+ 0x6a, 0x7d, 0x17, 0x6e, 0x6e, 0x06, 0xf7, 0x2a,
+ 0xd1, 0x49, 0x0d, 0x80, 0x5c, 0x99, 0xe2, 0x53,
+ 0x47, 0xb8, 0xde, 0x77, 0xb4, 0xdb, 0x6d, 0x9b },
+ { 0x85, 0x3e, 0x26, 0xf7, 0x41, 0x95, 0x3b, 0x0f,
+ 0xd5, 0xbd, 0xb4, 0x24, 0xe8, 0xab, 0x9e, 0x8b,
+ 0x37, 0x50, 0xea, 0xa8, 0xef, 0x61, 0xe4, 0x79,
+ 0x02, 0xc9, 0x1e, 0x55, 0x4e, 0x9c, 0x73, 0xb9 },
+ { 0xf7, 0xde, 0x53, 0x63, 0x61, 0xab, 0xaa, 0x0e,
+ 0x15, 0x81, 0x56, 0xcf, 0x0e, 0xa4, 0xf6, 0x3a,
+ 0x99, 0xb5, 0xe4, 0x05, 0x4f, 0x8f, 0xa4, 0xc9,
+ 0xd4, 0x5f, 0x62, 0x85, 0xca, 0xd5, 0x56, 0x94 },
+ { 0x4c, 0x23, 0x06, 0x08, 0x86, 0x0a, 0x99, 0xae,
+ 0x8d, 0x7b, 0xd5, 0xc2, 0xcc, 0x17, 0xfa, 0x52,
+ 0x09, 0x6b, 0x9a, 0x61, 0xbe, 0xdb, 0x17, 0xcb,
+ 0x76, 0x17, 0x86, 0x4a, 0xd2, 0x9c, 0xa7, 0xa6 },
+ { 0xae, 0xb9, 0x20, 0xea, 0x87, 0x95, 0x2d, 0xad,
+ 0xb1, 0xfb, 0x75, 0x92, 0x91, 0xe3, 0x38, 0x81,
+ 0x39, 0xa8, 0x72, 0x86, 0x50, 0x01, 0x88, 0x6e,
+ 0xd8, 0x47, 0x52, 0xe9, 0x3c, 0x25, 0x0c, 0x2a },
+ { 0xab, 0xa4, 0xad, 0x9b, 0x48, 0x0b, 0x9d, 0xf3,
+ 0xd0, 0x8c, 0xa5, 0xe8, 0x7b, 0x0c, 0x24, 0x40,
+ 0xd4, 0xe4, 0xea, 0x21, 0x22, 0x4c, 0x2e, 0xb4,
+ 0x2c, 0xba, 0xe4, 0x69, 0xd0, 0x89, 0xb9, 0x31 },
+ { 0x05, 0x82, 0x56, 0x07, 0xd7, 0xfd, 0xf2, 0xd8,
+ 0x2e, 0xf4, 0xc3, 0xc8, 0xc2, 0xae, 0xa9, 0x61,
+ 0xad, 0x98, 0xd6, 0x0e, 0xdf, 0xf7, 0xd0, 0x18,
+ 0x98, 0x3e, 0x21, 0x20, 0x4c, 0x0d, 0x93, 0xd1 },
+ { 0xa7, 0x42, 0xf8, 0xb6, 0xaf, 0x82, 0xd8, 0xa6,
+ 0xca, 0x23, 0x57, 0xc5, 0xf1, 0xcf, 0x91, 0xde,
+ 0xfb, 0xd0, 0x66, 0x26, 0x7d, 0x75, 0xc0, 0x48,
+ 0xb3, 0x52, 0x36, 0x65, 0x85, 0x02, 0x59, 0x62 },
+ { 0x2b, 0xca, 0xc8, 0x95, 0x99, 0x00, 0x0b, 0x42,
+ 0xc9, 0x5a, 0xe2, 0x38, 0x35, 0xa7, 0x13, 0x70,
+ 0x4e, 0xd7, 0x97, 0x89, 0xc8, 0x4f, 0xef, 0x14,
+ 0x9a, 0x87, 0x4f, 0xf7, 0x33, 0xf0, 0x17, 0xa2 },
+ { 0xac, 0x1e, 0xd0, 0x7d, 0x04, 0x8f, 0x10, 0x5a,
+ 0x9e, 0x5b, 0x7a, 0xb8, 0x5b, 0x09, 0xa4, 0x92,
+ 0xd5, 0xba, 0xff, 0x14, 0xb8, 0xbf, 0xb0, 0xe9,
+ 0xfd, 0x78, 0x94, 0x86, 0xee, 0xa2, 0xb9, 0x74 },
+ { 0xe4, 0x8d, 0x0e, 0xcf, 0xaf, 0x49, 0x7d, 0x5b,
+ 0x27, 0xc2, 0x5d, 0x99, 0xe1, 0x56, 0xcb, 0x05,
+ 0x79, 0xd4, 0x40, 0xd6, 0xe3, 0x1f, 0xb6, 0x24,
+ 0x73, 0x69, 0x6d, 0xbf, 0x95, 0xe0, 0x10, 0xe4 },
+ { 0x12, 0xa9, 0x1f, 0xad, 0xf8, 0xb2, 0x16, 0x44,
+ 0xfd, 0x0f, 0x93, 0x4f, 0x3c, 0x4a, 0x8f, 0x62,
+ 0xba, 0x86, 0x2f, 0xfd, 0x20, 0xe8, 0xe9, 0x61,
+ 0x15, 0x4c, 0x15, 0xc1, 0x38, 0x84, 0xed, 0x3d },
+ { 0x7c, 0xbe, 0xe9, 0x6e, 0x13, 0x98, 0x97, 0xdc,
+ 0x98, 0xfb, 0xef, 0x3b, 0xe8, 0x1a, 0xd4, 0xd9,
+ 0x64, 0xd2, 0x35, 0xcb, 0x12, 0x14, 0x1f, 0xb6,
+ 0x67, 0x27, 0xe6, 0xe5, 0xdf, 0x73, 0xa8, 0x78 },
+ { 0xeb, 0xf6, 0x6a, 0xbb, 0x59, 0x7a, 0xe5, 0x72,
+ 0xa7, 0x29, 0x7c, 0xb0, 0x87, 0x1e, 0x35, 0x5a,
+ 0xcc, 0xaf, 0xad, 0x83, 0x77, 0xb8, 0xe7, 0x8b,
+ 0xf1, 0x64, 0xce, 0x2a, 0x18, 0xde, 0x4b, 0xaf },
+ { 0x71, 0xb9, 0x33, 0xb0, 0x7e, 0x4f, 0xf7, 0x81,
+ 0x8c, 0xe0, 0x59, 0xd0, 0x08, 0x82, 0x9e, 0x45,
+ 0x3c, 0x6f, 0xf0, 0x2e, 0xc0, 0xa7, 0xdb, 0x39,
+ 0x3f, 0xc2, 0xd8, 0x70, 0xf3, 0x7a, 0x72, 0x86 },
+ { 0x7c, 0xf7, 0xc5, 0x13, 0x31, 0x22, 0x0b, 0x8d,
+ 0x3e, 0xba, 0xed, 0x9c, 0x29, 0x39, 0x8a, 0x16,
+ 0xd9, 0x81, 0x56, 0xe2, 0x61, 0x3c, 0xb0, 0x88,
+ 0xf2, 0xb0, 0xe0, 0x8a, 0x1b, 0xe4, 0xcf, 0x4f },
+ { 0x3e, 0x41, 0xa1, 0x08, 0xe0, 0xf6, 0x4a, 0xd2,
+ 0x76, 0xb9, 0x79, 0xe1, 0xce, 0x06, 0x82, 0x79,
+ 0xe1, 0x6f, 0x7b, 0xc7, 0xe4, 0xaa, 0x1d, 0x21,
+ 0x1e, 0x17, 0xb8, 0x11, 0x61, 0xdf, 0x16, 0x02 },
+ { 0x88, 0x65, 0x02, 0xa8, 0x2a, 0xb4, 0x7b, 0xa8,
+ 0xd8, 0x67, 0x10, 0xaa, 0x9d, 0xe3, 0xd4, 0x6e,
+ 0xa6, 0x5c, 0x47, 0xaf, 0x6e, 0xe8, 0xde, 0x45,
+ 0x0c, 0xce, 0xb8, 0xb1, 0x1b, 0x04, 0x5f, 0x50 },
+ { 0xc0, 0x21, 0xbc, 0x5f, 0x09, 0x54, 0xfe, 0xe9,
+ 0x4f, 0x46, 0xea, 0x09, 0x48, 0x7e, 0x10, 0xa8,
+ 0x48, 0x40, 0xd0, 0x2f, 0x64, 0x81, 0x0b, 0xc0,
+ 0x8d, 0x9e, 0x55, 0x1f, 0x7d, 0x41, 0x68, 0x14 },
+ { 0x20, 0x30, 0x51, 0x6e, 0x8a, 0x5f, 0xe1, 0x9a,
+ 0xe7, 0x9c, 0x33, 0x6f, 0xce, 0x26, 0x38, 0x2a,
+ 0x74, 0x9d, 0x3f, 0xd0, 0xec, 0x91, 0xe5, 0x37,
+ 0xd4, 0xbd, 0x23, 0x58, 0xc1, 0x2d, 0xfb, 0x22 },
+ { 0x55, 0x66, 0x98, 0xda, 0xc8, 0x31, 0x7f, 0xd3,
+ 0x6d, 0xfb, 0xdf, 0x25, 0xa7, 0x9c, 0xb1, 0x12,
+ 0xd5, 0x42, 0x58, 0x60, 0x60, 0x5c, 0xba, 0xf5,
+ 0x07, 0xf2, 0x3b, 0xf7, 0xe9, 0xf4, 0x2a, 0xfe },
+ { 0x2f, 0x86, 0x7b, 0xa6, 0x77, 0x73, 0xfd, 0xc3,
+ 0xe9, 0x2f, 0xce, 0xd9, 0x9a, 0x64, 0x09, 0xad,
+ 0x39, 0xd0, 0xb8, 0x80, 0xfd, 0xe8, 0xf1, 0x09,
+ 0xa8, 0x17, 0x30, 0xc4, 0x45, 0x1d, 0x01, 0x78 },
+ { 0x17, 0x2e, 0xc2, 0x18, 0xf1, 0x19, 0xdf, 0xae,
+ 0x98, 0x89, 0x6d, 0xff, 0x29, 0xdd, 0x98, 0x76,
+ 0xc9, 0x4a, 0xf8, 0x74, 0x17, 0xf9, 0xae, 0x4c,
+ 0x70, 0x14, 0xbb, 0x4e, 0x4b, 0x96, 0xaf, 0xc7 },
+ { 0x3f, 0x85, 0x81, 0x4a, 0x18, 0x19, 0x5f, 0x87,
+ 0x9a, 0xa9, 0x62, 0xf9, 0x5d, 0x26, 0xbd, 0x82,
+ 0xa2, 0x78, 0xf2, 0xb8, 0x23, 0x20, 0x21, 0x8f,
+ 0x6b, 0x3b, 0xd6, 0xf7, 0xf6, 0x67, 0xa6, 0xd9 },
+ { 0x1b, 0x61, 0x8f, 0xba, 0xa5, 0x66, 0xb3, 0xd4,
+ 0x98, 0xc1, 0x2e, 0x98, 0x2c, 0x9e, 0xc5, 0x2e,
+ 0x4d, 0xa8, 0x5a, 0x8c, 0x54, 0xf3, 0x8f, 0x34,
+ 0xc0, 0x90, 0x39, 0x4f, 0x23, 0xc1, 0x84, 0xc1 },
+ { 0x0c, 0x75, 0x8f, 0xb5, 0x69, 0x2f, 0xfd, 0x41,
+ 0xa3, 0x57, 0x5d, 0x0a, 0xf0, 0x0c, 0xc7, 0xfb,
+ 0xf2, 0xcb, 0xe5, 0x90, 0x5a, 0x58, 0x32, 0x3a,
+ 0x88, 0xae, 0x42, 0x44, 0xf6, 0xe4, 0xc9, 0x93 },
+ { 0xa9, 0x31, 0x36, 0x0c, 0xad, 0x62, 0x8c, 0x7f,
+ 0x12, 0xa6, 0xc1, 0xc4, 0xb7, 0x53, 0xb0, 0xf4,
+ 0x06, 0x2a, 0xef, 0x3c, 0xe6, 0x5a, 0x1a, 0xe3,
+ 0xf1, 0x93, 0x69, 0xda, 0xdf, 0x3a, 0xe2, 0x3d },
+ { 0xcb, 0xac, 0x7d, 0x77, 0x3b, 0x1e, 0x3b, 0x3c,
+ 0x66, 0x91, 0xd7, 0xab, 0xb7, 0xe9, 0xdf, 0x04,
+ 0x5c, 0x8b, 0xa1, 0x92, 0x68, 0xde, 0xd1, 0x53,
+ 0x20, 0x7f, 0x5e, 0x80, 0x43, 0x52, 0xec, 0x5d },
+ { 0x23, 0xa1, 0x96, 0xd3, 0x80, 0x2e, 0xd3, 0xc1,
+ 0xb3, 0x84, 0x01, 0x9a, 0x82, 0x32, 0x58, 0x40,
+ 0xd3, 0x2f, 0x71, 0x95, 0x0c, 0x45, 0x80, 0xb0,
+ 0x34, 0x45, 0xe0, 0x89, 0x8e, 0x14, 0x05, 0x3c },
+ { 0xf4, 0x49, 0x54, 0x70, 0xf2, 0x26, 0xc8, 0xc2,
+ 0x14, 0xbe, 0x08, 0xfd, 0xfa, 0xd4, 0xbc, 0x4a,
+ 0x2a, 0x9d, 0xbe, 0xa9, 0x13, 0x6a, 0x21, 0x0d,
+ 0xf0, 0xd4, 0xb6, 0x49, 0x29, 0xe6, 0xfc, 0x14 },
+ { 0xe2, 0x90, 0xdd, 0x27, 0x0b, 0x46, 0x7f, 0x34,
+ 0xab, 0x1c, 0x00, 0x2d, 0x34, 0x0f, 0xa0, 0x16,
+ 0x25, 0x7f, 0xf1, 0x9e, 0x58, 0x33, 0xfd, 0xbb,
+ 0xf2, 0xcb, 0x40, 0x1c, 0x3b, 0x28, 0x17, 0xde },
+ { 0x9f, 0xc7, 0xb5, 0xde, 0xd3, 0xc1, 0x50, 0x42,
+ 0xb2, 0xa6, 0x58, 0x2d, 0xc3, 0x9b, 0xe0, 0x16,
+ 0xd2, 0x4a, 0x68, 0x2d, 0x5e, 0x61, 0xad, 0x1e,
+ 0xff, 0x9c, 0x63, 0x30, 0x98, 0x48, 0xf7, 0x06 },
+ { 0x8c, 0xca, 0x67, 0xa3, 0x6d, 0x17, 0xd5, 0xe6,
+ 0x34, 0x1c, 0xb5, 0x92, 0xfd, 0x7b, 0xef, 0x99,
+ 0x26, 0xc9, 0xe3, 0xaa, 0x10, 0x27, 0xea, 0x11,
+ 0xa7, 0xd8, 0xbd, 0x26, 0x0b, 0x57, 0x6e, 0x04 },
+ { 0x40, 0x93, 0x92, 0xf5, 0x60, 0xf8, 0x68, 0x31,
+ 0xda, 0x43, 0x73, 0xee, 0x5e, 0x00, 0x74, 0x26,
+ 0x05, 0x95, 0xd7, 0xbc, 0x24, 0x18, 0x3b, 0x60,
+ 0xed, 0x70, 0x0d, 0x45, 0x83, 0xd3, 0xf6, 0xf0 },
+ { 0x28, 0x02, 0x16, 0x5d, 0xe0, 0x90, 0x91, 0x55,
+ 0x46, 0xf3, 0x39, 0x8c, 0xd8, 0x49, 0x16, 0x4a,
+ 0x19, 0xf9, 0x2a, 0xdb, 0xc3, 0x61, 0xad, 0xc9,
+ 0x9b, 0x0f, 0x20, 0xc8, 0xea, 0x07, 0x10, 0x54 },
+ { 0xad, 0x83, 0x91, 0x68, 0xd9, 0xf8, 0xa4, 0xbe,
+ 0x95, 0xba, 0x9e, 0xf9, 0xa6, 0x92, 0xf0, 0x72,
+ 0x56, 0xae, 0x43, 0xfe, 0x6f, 0x98, 0x64, 0xe2,
+ 0x90, 0x69, 0x1b, 0x02, 0x56, 0xce, 0x50, 0xa9 },
+ { 0x75, 0xfd, 0xaa, 0x50, 0x38, 0xc2, 0x84, 0xb8,
+ 0x6d, 0x6e, 0x8a, 0xff, 0xe8, 0xb2, 0x80, 0x7e,
+ 0x46, 0x7b, 0x86, 0x60, 0x0e, 0x79, 0xaf, 0x36,
+ 0x89, 0xfb, 0xc0, 0x63, 0x28, 0xcb, 0xf8, 0x94 },
+ { 0xe5, 0x7c, 0xb7, 0x94, 0x87, 0xdd, 0x57, 0x90,
+ 0x24, 0x32, 0xb2, 0x50, 0x73, 0x38, 0x13, 0xbd,
+ 0x96, 0xa8, 0x4e, 0xfc, 0xe5, 0x9f, 0x65, 0x0f,
+ 0xac, 0x26, 0xe6, 0x69, 0x6a, 0xef, 0xaf, 0xc3 },
+ { 0x56, 0xf3, 0x4e, 0x8b, 0x96, 0x55, 0x7e, 0x90,
+ 0xc1, 0xf2, 0x4b, 0x52, 0xd0, 0xc8, 0x9d, 0x51,
+ 0x08, 0x6a, 0xcf, 0x1b, 0x00, 0xf6, 0x34, 0xcf,
+ 0x1d, 0xde, 0x92, 0x33, 0xb8, 0xea, 0xaa, 0x3e },
+ { 0x1b, 0x53, 0xee, 0x94, 0xaa, 0xf3, 0x4e, 0x4b,
+ 0x15, 0x9d, 0x48, 0xde, 0x35, 0x2c, 0x7f, 0x06,
+ 0x61, 0xd0, 0xa4, 0x0e, 0xdf, 0xf9, 0x5a, 0x0b,
+ 0x16, 0x39, 0xb4, 0x09, 0x0e, 0x97, 0x44, 0x72 },
+ { 0x05, 0x70, 0x5e, 0x2a, 0x81, 0x75, 0x7c, 0x14,
+ 0xbd, 0x38, 0x3e, 0xa9, 0x8d, 0xda, 0x54, 0x4e,
+ 0xb1, 0x0e, 0x6b, 0xc0, 0x7b, 0xae, 0x43, 0x5e,
+ 0x25, 0x18, 0xdb, 0xe1, 0x33, 0x52, 0x53, 0x75 },
+ { 0xd8, 0xb2, 0x86, 0x6e, 0x8a, 0x30, 0x9d, 0xb5,
+ 0x3e, 0x52, 0x9e, 0xc3, 0x29, 0x11, 0xd8, 0x2f,
+ 0x5c, 0xa1, 0x6c, 0xff, 0x76, 0x21, 0x68, 0x91,
+ 0xa9, 0x67, 0x6a, 0xa3, 0x1a, 0xaa, 0x6c, 0x42 },
+ { 0xf5, 0x04, 0x1c, 0x24, 0x12, 0x70, 0xeb, 0x04,
+ 0xc7, 0x1e, 0xc2, 0xc9, 0x5d, 0x4c, 0x38, 0xd8,
+ 0x03, 0xb1, 0x23, 0x7b, 0x0f, 0x29, 0xfd, 0x4d,
+ 0xb3, 0xeb, 0x39, 0x76, 0x69, 0xe8, 0x86, 0x99 },
+ { 0x9a, 0x4c, 0xe0, 0x77, 0xc3, 0x49, 0x32, 0x2f,
+ 0x59, 0x5e, 0x0e, 0xe7, 0x9e, 0xd0, 0xda, 0x5f,
+ 0xab, 0x66, 0x75, 0x2c, 0xbf, 0xef, 0x8f, 0x87,
+ 0xd0, 0xe9, 0xd0, 0x72, 0x3c, 0x75, 0x30, 0xdd },
+ { 0x65, 0x7b, 0x09, 0xf3, 0xd0, 0xf5, 0x2b, 0x5b,
+ 0x8f, 0x2f, 0x97, 0x16, 0x3a, 0x0e, 0xdf, 0x0c,
+ 0x04, 0xf0, 0x75, 0x40, 0x8a, 0x07, 0xbb, 0xeb,
+ 0x3a, 0x41, 0x01, 0xa8, 0x91, 0x99, 0x0d, 0x62 },
+ { 0x1e, 0x3f, 0x7b, 0xd5, 0xa5, 0x8f, 0xa5, 0x33,
+ 0x34, 0x4a, 0xa8, 0xed, 0x3a, 0xc1, 0x22, 0xbb,
+ 0x9e, 0x70, 0xd4, 0xef, 0x50, 0xd0, 0x04, 0x53,
+ 0x08, 0x21, 0x94, 0x8f, 0x5f, 0xe6, 0x31, 0x5a },
+ { 0x80, 0xdc, 0xcf, 0x3f, 0xd8, 0x3d, 0xfd, 0x0d,
+ 0x35, 0xaa, 0x28, 0x58, 0x59, 0x22, 0xab, 0x89,
+ 0xd5, 0x31, 0x39, 0x97, 0x67, 0x3e, 0xaf, 0x90,
+ 0x5c, 0xea, 0x9c, 0x0b, 0x22, 0x5c, 0x7b, 0x5f },
+ { 0x8a, 0x0d, 0x0f, 0xbf, 0x63, 0x77, 0xd8, 0x3b,
+ 0xb0, 0x8b, 0x51, 0x4b, 0x4b, 0x1c, 0x43, 0xac,
+ 0xc9, 0x5d, 0x75, 0x17, 0x14, 0xf8, 0x92, 0x56,
+ 0x45, 0xcb, 0x6b, 0xc8, 0x56, 0xca, 0x15, 0x0a },
+ { 0x9f, 0xa5, 0xb4, 0x87, 0x73, 0x8a, 0xd2, 0x84,
+ 0x4c, 0xc6, 0x34, 0x8a, 0x90, 0x19, 0x18, 0xf6,
+ 0x59, 0xa3, 0xb8, 0x9e, 0x9c, 0x0d, 0xfe, 0xea,
+ 0xd3, 0x0d, 0xd9, 0x4b, 0xcf, 0x42, 0xef, 0x8e },
+ { 0x80, 0x83, 0x2c, 0x4a, 0x16, 0x77, 0xf5, 0xea,
+ 0x25, 0x60, 0xf6, 0x68, 0xe9, 0x35, 0x4d, 0xd3,
+ 0x69, 0x97, 0xf0, 0x37, 0x28, 0xcf, 0xa5, 0x5e,
+ 0x1b, 0x38, 0x33, 0x7c, 0x0c, 0x9e, 0xf8, 0x18 },
+ { 0xab, 0x37, 0xdd, 0xb6, 0x83, 0x13, 0x7e, 0x74,
+ 0x08, 0x0d, 0x02, 0x6b, 0x59, 0x0b, 0x96, 0xae,
+ 0x9b, 0xb4, 0x47, 0x72, 0x2f, 0x30, 0x5a, 0x5a,
+ 0xc5, 0x70, 0xec, 0x1d, 0xf9, 0xb1, 0x74, 0x3c },
+ { 0x3e, 0xe7, 0x35, 0xa6, 0x94, 0xc2, 0x55, 0x9b,
+ 0x69, 0x3a, 0xa6, 0x86, 0x29, 0x36, 0x1e, 0x15,
+ 0xd1, 0x22, 0x65, 0xad, 0x6a, 0x3d, 0xed, 0xf4,
+ 0x88, 0xb0, 0xb0, 0x0f, 0xac, 0x97, 0x54, 0xba },
+ { 0xd6, 0xfc, 0xd2, 0x32, 0x19, 0xb6, 0x47, 0xe4,
+ 0xcb, 0xd5, 0xeb, 0x2d, 0x0a, 0xd0, 0x1e, 0xc8,
+ 0x83, 0x8a, 0x4b, 0x29, 0x01, 0xfc, 0x32, 0x5c,
+ 0xc3, 0x70, 0x19, 0x81, 0xca, 0x6c, 0x88, 0x8b },
+ { 0x05, 0x20, 0xec, 0x2f, 0x5b, 0xf7, 0xa7, 0x55,
+ 0xda, 0xcb, 0x50, 0xc6, 0xbf, 0x23, 0x3e, 0x35,
+ 0x15, 0x43, 0x47, 0x63, 0xdb, 0x01, 0x39, 0xcc,
+ 0xd9, 0xfa, 0xef, 0xbb, 0x82, 0x07, 0x61, 0x2d },
+ { 0xaf, 0xf3, 0xb7, 0x5f, 0x3f, 0x58, 0x12, 0x64,
+ 0xd7, 0x66, 0x16, 0x62, 0xb9, 0x2f, 0x5a, 0xd3,
+ 0x7c, 0x1d, 0x32, 0xbd, 0x45, 0xff, 0x81, 0xa4,
+ 0xed, 0x8a, 0xdc, 0x9e, 0xf3, 0x0d, 0xd9, 0x89 },
+ { 0xd0, 0xdd, 0x65, 0x0b, 0xef, 0xd3, 0xba, 0x63,
+ 0xdc, 0x25, 0x10, 0x2c, 0x62, 0x7c, 0x92, 0x1b,
+ 0x9c, 0xbe, 0xb0, 0xb1, 0x30, 0x68, 0x69, 0x35,
+ 0xb5, 0xc9, 0x27, 0xcb, 0x7c, 0xcd, 0x5e, 0x3b },
+ { 0xe1, 0x14, 0x98, 0x16, 0xb1, 0x0a, 0x85, 0x14,
+ 0xfb, 0x3e, 0x2c, 0xab, 0x2c, 0x08, 0xbe, 0xe9,
+ 0xf7, 0x3c, 0xe7, 0x62, 0x21, 0x70, 0x12, 0x46,
+ 0xa5, 0x89, 0xbb, 0xb6, 0x73, 0x02, 0xd8, 0xa9 },
+ { 0x7d, 0xa3, 0xf4, 0x41, 0xde, 0x90, 0x54, 0x31,
+ 0x7e, 0x72, 0xb5, 0xdb, 0xf9, 0x79, 0xda, 0x01,
+ 0xe6, 0xbc, 0xee, 0xbb, 0x84, 0x78, 0xea, 0xe6,
+ 0xa2, 0x28, 0x49, 0xd9, 0x02, 0x92, 0x63, 0x5c },
+ { 0x12, 0x30, 0xb1, 0xfc, 0x8a, 0x7d, 0x92, 0x15,
+ 0xed, 0xc2, 0xd4, 0xa2, 0xde, 0xcb, 0xdd, 0x0a,
+ 0x6e, 0x21, 0x6c, 0x92, 0x42, 0x78, 0xc9, 0x1f,
+ 0xc5, 0xd1, 0x0e, 0x7d, 0x60, 0x19, 0x2d, 0x94 },
+ { 0x57, 0x50, 0xd7, 0x16, 0xb4, 0x80, 0x8f, 0x75,
+ 0x1f, 0xeb, 0xc3, 0x88, 0x06, 0xba, 0x17, 0x0b,
+ 0xf6, 0xd5, 0x19, 0x9a, 0x78, 0x16, 0xbe, 0x51,
+ 0x4e, 0x3f, 0x93, 0x2f, 0xbe, 0x0c, 0xb8, 0x71 },
+ { 0x6f, 0xc5, 0x9b, 0x2f, 0x10, 0xfe, 0xba, 0x95,
+ 0x4a, 0xa6, 0x82, 0x0b, 0x3c, 0xa9, 0x87, 0xee,
+ 0x81, 0xd5, 0xcc, 0x1d, 0xa3, 0xc6, 0x3c, 0xe8,
+ 0x27, 0x30, 0x1c, 0x56, 0x9d, 0xfb, 0x39, 0xce },
+ { 0xc7, 0xc3, 0xfe, 0x1e, 0xeb, 0xdc, 0x7b, 0x5a,
+ 0x93, 0x93, 0x26, 0xe8, 0xdd, 0xb8, 0x3e, 0x8b,
+ 0xf2, 0xb7, 0x80, 0xb6, 0x56, 0x78, 0xcb, 0x62,
+ 0xf2, 0x08, 0xb0, 0x40, 0xab, 0xdd, 0x35, 0xe2 },
+ { 0x0c, 0x75, 0xc1, 0xa1, 0x5c, 0xf3, 0x4a, 0x31,
+ 0x4e, 0xe4, 0x78, 0xf4, 0xa5, 0xce, 0x0b, 0x8a,
+ 0x6b, 0x36, 0x52, 0x8e, 0xf7, 0xa8, 0x20, 0x69,
+ 0x6c, 0x3e, 0x42, 0x46, 0xc5, 0xa1, 0x58, 0x64 },
+ { 0x21, 0x6d, 0xc1, 0x2a, 0x10, 0x85, 0x69, 0xa3,
+ 0xc7, 0xcd, 0xde, 0x4a, 0xed, 0x43, 0xa6, 0xc3,
+ 0x30, 0x13, 0x9d, 0xda, 0x3c, 0xcc, 0x4a, 0x10,
+ 0x89, 0x05, 0xdb, 0x38, 0x61, 0x89, 0x90, 0x50 },
+ { 0xa5, 0x7b, 0xe6, 0xae, 0x67, 0x56, 0xf2, 0x8b,
+ 0x02, 0xf5, 0x9d, 0xad, 0xf7, 0xe0, 0xd7, 0xd8,
+ 0x80, 0x7f, 0x10, 0xfa, 0x15, 0xce, 0xd1, 0xad,
+ 0x35, 0x85, 0x52, 0x1a, 0x1d, 0x99, 0x5a, 0x89 },
+ { 0x81, 0x6a, 0xef, 0x87, 0x59, 0x53, 0x71, 0x6c,
+ 0xd7, 0xa5, 0x81, 0xf7, 0x32, 0xf5, 0x3d, 0xd4,
+ 0x35, 0xda, 0xb6, 0x6d, 0x09, 0xc3, 0x61, 0xd2,
+ 0xd6, 0x59, 0x2d, 0xe1, 0x77, 0x55, 0xd8, 0xa8 },
+ { 0x9a, 0x76, 0x89, 0x32, 0x26, 0x69, 0x3b, 0x6e,
+ 0xa9, 0x7e, 0x6a, 0x73, 0x8f, 0x9d, 0x10, 0xfb,
+ 0x3d, 0x0b, 0x43, 0xae, 0x0e, 0x8b, 0x7d, 0x81,
+ 0x23, 0xea, 0x76, 0xce, 0x97, 0x98, 0x9c, 0x7e },
+ { 0x8d, 0xae, 0xdb, 0x9a, 0x27, 0x15, 0x29, 0xdb,
+ 0xb7, 0xdc, 0x3b, 0x60, 0x7f, 0xe5, 0xeb, 0x2d,
+ 0x32, 0x11, 0x77, 0x07, 0x58, 0xdd, 0x3b, 0x0a,
+ 0x35, 0x93, 0xd2, 0xd7, 0x95, 0x4e, 0x2d, 0x5b },
+ { 0x16, 0xdb, 0xc0, 0xaa, 0x5d, 0xd2, 0xc7, 0x74,
+ 0xf5, 0x05, 0x10, 0x0f, 0x73, 0x37, 0x86, 0xd8,
+ 0xa1, 0x75, 0xfc, 0xbb, 0xb5, 0x9c, 0x43, 0xe1,
+ 0xfb, 0xff, 0x3e, 0x1e, 0xaf, 0x31, 0xcb, 0x4a },
+ { 0x86, 0x06, 0xcb, 0x89, 0x9c, 0x6a, 0xea, 0xf5,
+ 0x1b, 0x9d, 0xb0, 0xfe, 0x49, 0x24, 0xa9, 0xfd,
+ 0x5d, 0xab, 0xc1, 0x9f, 0x88, 0x26, 0xf2, 0xbc,
+ 0x1c, 0x1d, 0x7d, 0xa1, 0x4d, 0x2c, 0x2c, 0x99 },
+ { 0x84, 0x79, 0x73, 0x1a, 0xed, 0xa5, 0x7b, 0xd3,
+ 0x7e, 0xad, 0xb5, 0x1a, 0x50, 0x7e, 0x30, 0x7f,
+ 0x3b, 0xd9, 0x5e, 0x69, 0xdb, 0xca, 0x94, 0xf3,
+ 0xbc, 0x21, 0x72, 0x60, 0x66, 0xad, 0x6d, 0xfd },
+ { 0x58, 0x47, 0x3a, 0x9e, 0xa8, 0x2e, 0xfa, 0x3f,
+ 0x3b, 0x3d, 0x8f, 0xc8, 0x3e, 0xd8, 0x86, 0x31,
+ 0x27, 0xb3, 0x3a, 0xe8, 0xde, 0xae, 0x63, 0x07,
+ 0x20, 0x1e, 0xdb, 0x6d, 0xde, 0x61, 0xde, 0x29 },
+ { 0x9a, 0x92, 0x55, 0xd5, 0x3a, 0xf1, 0x16, 0xde,
+ 0x8b, 0xa2, 0x7c, 0xe3, 0x5b, 0x4c, 0x7e, 0x15,
+ 0x64, 0x06, 0x57, 0xa0, 0xfc, 0xb8, 0x88, 0xc7,
+ 0x0d, 0x95, 0x43, 0x1d, 0xac, 0xd8, 0xf8, 0x30 },
+ { 0x9e, 0xb0, 0x5f, 0xfb, 0xa3, 0x9f, 0xd8, 0x59,
+ 0x6a, 0x45, 0x49, 0x3e, 0x18, 0xd2, 0x51, 0x0b,
+ 0xf3, 0xef, 0x06, 0x5c, 0x51, 0xd6, 0xe1, 0x3a,
+ 0xbe, 0x66, 0xaa, 0x57, 0xe0, 0x5c, 0xfd, 0xb7 },
+ { 0x81, 0xdc, 0xc3, 0xa5, 0x05, 0xea, 0xce, 0x3f,
+ 0x87, 0x9d, 0x8f, 0x70, 0x27, 0x76, 0x77, 0x0f,
+ 0x9d, 0xf5, 0x0e, 0x52, 0x1d, 0x14, 0x28, 0xa8,
+ 0x5d, 0xaf, 0x04, 0xf9, 0xad, 0x21, 0x50, 0xe0 },
+ { 0xe3, 0xe3, 0xc4, 0xaa, 0x3a, 0xcb, 0xbc, 0x85,
+ 0x33, 0x2a, 0xf9, 0xd5, 0x64, 0xbc, 0x24, 0x16,
+ 0x5e, 0x16, 0x87, 0xf6, 0xb1, 0xad, 0xcb, 0xfa,
+ 0xe7, 0x7a, 0x8f, 0x03, 0xc7, 0x2a, 0xc2, 0x8c },
+ { 0x67, 0x46, 0xc8, 0x0b, 0x4e, 0xb5, 0x6a, 0xea,
+ 0x45, 0xe6, 0x4e, 0x72, 0x89, 0xbb, 0xa3, 0xed,
+ 0xbf, 0x45, 0xec, 0xf8, 0x20, 0x64, 0x81, 0xff,
+ 0x63, 0x02, 0x12, 0x29, 0x84, 0xcd, 0x52, 0x6a },
+ { 0x2b, 0x62, 0x8e, 0x52, 0x76, 0x4d, 0x7d, 0x62,
+ 0xc0, 0x86, 0x8b, 0x21, 0x23, 0x57, 0xcd, 0xd1,
+ 0x2d, 0x91, 0x49, 0x82, 0x2f, 0x4e, 0x98, 0x45,
+ 0xd9, 0x18, 0xa0, 0x8d, 0x1a, 0xe9, 0x90, 0xc0 },
+ { 0xe4, 0xbf, 0xe8, 0x0d, 0x58, 0xc9, 0x19, 0x94,
+ 0x61, 0x39, 0x09, 0xdc, 0x4b, 0x1a, 0x12, 0x49,
+ 0x68, 0x96, 0xc0, 0x04, 0xaf, 0x7b, 0x57, 0x01,
+ 0x48, 0x3d, 0xe4, 0x5d, 0x28, 0x23, 0xd7, 0x8e },
+ { 0xeb, 0xb4, 0xba, 0x15, 0x0c, 0xef, 0x27, 0x34,
+ 0x34, 0x5b, 0x5d, 0x64, 0x1b, 0xbe, 0xd0, 0x3a,
+ 0x21, 0xea, 0xfa, 0xe9, 0x33, 0xc9, 0x9e, 0x00,
+ 0x92, 0x12, 0xef, 0x04, 0x57, 0x4a, 0x85, 0x30 },
+ { 0x39, 0x66, 0xec, 0x73, 0xb1, 0x54, 0xac, 0xc6,
+ 0x97, 0xac, 0x5c, 0xf5, 0xb2, 0x4b, 0x40, 0xbd,
+ 0xb0, 0xdb, 0x9e, 0x39, 0x88, 0x36, 0xd7, 0x6d,
+ 0x4b, 0x88, 0x0e, 0x3b, 0x2a, 0xf1, 0xaa, 0x27 },
+ { 0xef, 0x7e, 0x48, 0x31, 0xb3, 0xa8, 0x46, 0x36,
+ 0x51, 0x8d, 0x6e, 0x4b, 0xfc, 0xe6, 0x4a, 0x43,
+ 0xdb, 0x2a, 0x5d, 0xda, 0x9c, 0xca, 0x2b, 0x44,
+ 0xf3, 0x90, 0x33, 0xbd, 0xc4, 0x0d, 0x62, 0x43 },
+ { 0x7a, 0xbf, 0x6a, 0xcf, 0x5c, 0x8e, 0x54, 0x9d,
+ 0xdb, 0xb1, 0x5a, 0xe8, 0xd8, 0xb3, 0x88, 0xc1,
+ 0xc1, 0x97, 0xe6, 0x98, 0x73, 0x7c, 0x97, 0x85,
+ 0x50, 0x1e, 0xd1, 0xf9, 0x49, 0x30, 0xb7, 0xd9 },
+ { 0x88, 0x01, 0x8d, 0xed, 0x66, 0x81, 0x3f, 0x0c,
+ 0xa9, 0x5d, 0xef, 0x47, 0x4c, 0x63, 0x06, 0x92,
+ 0x01, 0x99, 0x67, 0xb9, 0xe3, 0x68, 0x88, 0xda,
+ 0xdd, 0x94, 0x12, 0x47, 0x19, 0xb6, 0x82, 0xf6 },
+ { 0x39, 0x30, 0x87, 0x6b, 0x9f, 0xc7, 0x52, 0x90,
+ 0x36, 0xb0, 0x08, 0xb1, 0xb8, 0xbb, 0x99, 0x75,
+ 0x22, 0xa4, 0x41, 0x63, 0x5a, 0x0c, 0x25, 0xec,
+ 0x02, 0xfb, 0x6d, 0x90, 0x26, 0xe5, 0x5a, 0x97 },
+ { 0x0a, 0x40, 0x49, 0xd5, 0x7e, 0x83, 0x3b, 0x56,
+ 0x95, 0xfa, 0xc9, 0x3d, 0xd1, 0xfb, 0xef, 0x31,
+ 0x66, 0xb4, 0x4b, 0x12, 0xad, 0x11, 0x24, 0x86,
+ 0x62, 0x38, 0x3a, 0xe0, 0x51, 0xe1, 0x58, 0x27 },
+ { 0x81, 0xdc, 0xc0, 0x67, 0x8b, 0xb6, 0xa7, 0x65,
+ 0xe4, 0x8c, 0x32, 0x09, 0x65, 0x4f, 0xe9, 0x00,
+ 0x89, 0xce, 0x44, 0xff, 0x56, 0x18, 0x47, 0x7e,
+ 0x39, 0xab, 0x28, 0x64, 0x76, 0xdf, 0x05, 0x2b },
+ { 0xe6, 0x9b, 0x3a, 0x36, 0xa4, 0x46, 0x19, 0x12,
+ 0xdc, 0x08, 0x34, 0x6b, 0x11, 0xdd, 0xcb, 0x9d,
+ 0xb7, 0x96, 0xf8, 0x85, 0xfd, 0x01, 0x93, 0x6e,
+ 0x66, 0x2f, 0xe2, 0x92, 0x97, 0xb0, 0x99, 0xa4 },
+ { 0x5a, 0xc6, 0x50, 0x3b, 0x0d, 0x8d, 0xa6, 0x91,
+ 0x76, 0x46, 0xe6, 0xdc, 0xc8, 0x7e, 0xdc, 0x58,
+ 0xe9, 0x42, 0x45, 0x32, 0x4c, 0xc2, 0x04, 0xf4,
+ 0xdd, 0x4a, 0xf0, 0x15, 0x63, 0xac, 0xd4, 0x27 },
+ { 0xdf, 0x6d, 0xda, 0x21, 0x35, 0x9a, 0x30, 0xbc,
+ 0x27, 0x17, 0x80, 0x97, 0x1c, 0x1a, 0xbd, 0x56,
+ 0xa6, 0xef, 0x16, 0x7e, 0x48, 0x08, 0x87, 0x88,
+ 0x8e, 0x73, 0xa8, 0x6d, 0x3b, 0xf6, 0x05, 0xe9 },
+ { 0xe8, 0xe6, 0xe4, 0x70, 0x71, 0xe7, 0xb7, 0xdf,
+ 0x25, 0x80, 0xf2, 0x25, 0xcf, 0xbb, 0xed, 0xf8,
+ 0x4c, 0xe6, 0x77, 0x46, 0x62, 0x66, 0x28, 0xd3,
+ 0x30, 0x97, 0xe4, 0xb7, 0xdc, 0x57, 0x11, 0x07 },
+ { 0x53, 0xe4, 0x0e, 0xad, 0x62, 0x05, 0x1e, 0x19,
+ 0xcb, 0x9b, 0xa8, 0x13, 0x3e, 0x3e, 0x5c, 0x1c,
+ 0xe0, 0x0d, 0xdc, 0xad, 0x8a, 0xcf, 0x34, 0x2a,
+ 0x22, 0x43, 0x60, 0xb0, 0xac, 0xc1, 0x47, 0x77 },
+ { 0x9c, 0xcd, 0x53, 0xfe, 0x80, 0xbe, 0x78, 0x6a,
+ 0xa9, 0x84, 0x63, 0x84, 0x62, 0xfb, 0x28, 0xaf,
+ 0xdf, 0x12, 0x2b, 0x34, 0xd7, 0x8f, 0x46, 0x87,
+ 0xec, 0x63, 0x2b, 0xb1, 0x9d, 0xe2, 0x37, 0x1a },
+ { 0xcb, 0xd4, 0x80, 0x52, 0xc4, 0x8d, 0x78, 0x84,
+ 0x66, 0xa3, 0xe8, 0x11, 0x8c, 0x56, 0xc9, 0x7f,
+ 0xe1, 0x46, 0xe5, 0x54, 0x6f, 0xaa, 0xf9, 0x3e,
+ 0x2b, 0xc3, 0xc4, 0x7e, 0x45, 0x93, 0x97, 0x53 },
+ { 0x25, 0x68, 0x83, 0xb1, 0x4e, 0x2a, 0xf4, 0x4d,
+ 0xad, 0xb2, 0x8e, 0x1b, 0x34, 0xb2, 0xac, 0x0f,
+ 0x0f, 0x4c, 0x91, 0xc3, 0x4e, 0xc9, 0x16, 0x9e,
+ 0x29, 0x03, 0x61, 0x58, 0xac, 0xaa, 0x95, 0xb9 },
+ { 0x44, 0x71, 0xb9, 0x1a, 0xb4, 0x2d, 0xb7, 0xc4,
+ 0xdd, 0x84, 0x90, 0xab, 0x95, 0xa2, 0xee, 0x8d,
+ 0x04, 0xe3, 0xef, 0x5c, 0x3d, 0x6f, 0xc7, 0x1a,
+ 0xc7, 0x4b, 0x2b, 0x26, 0x91, 0x4d, 0x16, 0x41 },
+ { 0xa5, 0xeb, 0x08, 0x03, 0x8f, 0x8f, 0x11, 0x55,
+ 0xed, 0x86, 0xe6, 0x31, 0x90, 0x6f, 0xc1, 0x30,
+ 0x95, 0xf6, 0xbb, 0xa4, 0x1d, 0xe5, 0xd4, 0xe7,
+ 0x95, 0x75, 0x8e, 0xc8, 0xc8, 0xdf, 0x8a, 0xf1 },
+ { 0xdc, 0x1d, 0xb6, 0x4e, 0xd8, 0xb4, 0x8a, 0x91,
+ 0x0e, 0x06, 0x0a, 0x6b, 0x86, 0x63, 0x74, 0xc5,
+ 0x78, 0x78, 0x4e, 0x9a, 0xc4, 0x9a, 0xb2, 0x77,
+ 0x40, 0x92, 0xac, 0x71, 0x50, 0x19, 0x34, 0xac },
+ { 0x28, 0x54, 0x13, 0xb2, 0xf2, 0xee, 0x87, 0x3d,
+ 0x34, 0x31, 0x9e, 0xe0, 0xbb, 0xfb, 0xb9, 0x0f,
+ 0x32, 0xda, 0x43, 0x4c, 0xc8, 0x7e, 0x3d, 0xb5,
+ 0xed, 0x12, 0x1b, 0xb3, 0x98, 0xed, 0x96, 0x4b },
+ { 0x02, 0x16, 0xe0, 0xf8, 0x1f, 0x75, 0x0f, 0x26,
+ 0xf1, 0x99, 0x8b, 0xc3, 0x93, 0x4e, 0x3e, 0x12,
+ 0x4c, 0x99, 0x45, 0xe6, 0x85, 0xa6, 0x0b, 0x25,
+ 0xe8, 0xfb, 0xd9, 0x62, 0x5a, 0xb6, 0xb5, 0x99 },
+ { 0x38, 0xc4, 0x10, 0xf5, 0xb9, 0xd4, 0x07, 0x20,
+ 0x50, 0x75, 0x5b, 0x31, 0xdc, 0xa8, 0x9f, 0xd5,
+ 0x39, 0x5c, 0x67, 0x85, 0xee, 0xb3, 0xd7, 0x90,
+ 0xf3, 0x20, 0xff, 0x94, 0x1c, 0x5a, 0x93, 0xbf },
+ { 0xf1, 0x84, 0x17, 0xb3, 0x9d, 0x61, 0x7a, 0xb1,
+ 0xc1, 0x8f, 0xdf, 0x91, 0xeb, 0xd0, 0xfc, 0x6d,
+ 0x55, 0x16, 0xbb, 0x34, 0xcf, 0x39, 0x36, 0x40,
+ 0x37, 0xbc, 0xe8, 0x1f, 0xa0, 0x4c, 0xec, 0xb1 },
+ { 0x1f, 0xa8, 0x77, 0xde, 0x67, 0x25, 0x9d, 0x19,
+ 0x86, 0x3a, 0x2a, 0x34, 0xbc, 0xc6, 0x96, 0x2a,
+ 0x2b, 0x25, 0xfc, 0xbf, 0x5c, 0xbe, 0xcd, 0x7e,
+ 0xde, 0x8f, 0x1f, 0xa3, 0x66, 0x88, 0xa7, 0x96 },
+ { 0x5b, 0xd1, 0x69, 0xe6, 0x7c, 0x82, 0xc2, 0xc2,
+ 0xe9, 0x8e, 0xf7, 0x00, 0x8b, 0xdf, 0x26, 0x1f,
+ 0x2d, 0xdf, 0x30, 0xb1, 0xc0, 0x0f, 0x9e, 0x7f,
+ 0x27, 0x5b, 0xb3, 0xe8, 0xa2, 0x8d, 0xc9, 0xa2 },
+ { 0xc8, 0x0a, 0xbe, 0xeb, 0xb6, 0x69, 0xad, 0x5d,
+ 0xee, 0xb5, 0xf5, 0xec, 0x8e, 0xa6, 0xb7, 0xa0,
+ 0x5d, 0xdf, 0x7d, 0x31, 0xec, 0x4c, 0x0a, 0x2e,
+ 0xe2, 0x0b, 0x0b, 0x98, 0xca, 0xec, 0x67, 0x46 },
+ { 0xe7, 0x6d, 0x3f, 0xbd, 0xa5, 0xba, 0x37, 0x4e,
+ 0x6b, 0xf8, 0xe5, 0x0f, 0xad, 0xc3, 0xbb, 0xb9,
+ 0xba, 0x5c, 0x20, 0x6e, 0xbd, 0xec, 0x89, 0xa3,
+ 0xa5, 0x4c, 0xf3, 0xdd, 0x84, 0xa0, 0x70, 0x16 },
+ { 0x7b, 0xba, 0x9d, 0xc5, 0xb5, 0xdb, 0x20, 0x71,
+ 0xd1, 0x77, 0x52, 0xb1, 0x04, 0x4c, 0x1e, 0xce,
+ 0xd9, 0x6a, 0xaf, 0x2d, 0xd4, 0x6e, 0x9b, 0x43,
+ 0x37, 0x50, 0xe8, 0xea, 0x0d, 0xcc, 0x18, 0x70 },
+ { 0xf2, 0x9b, 0x1b, 0x1a, 0xb9, 0xba, 0xb1, 0x63,
+ 0x01, 0x8e, 0xe3, 0xda, 0x15, 0x23, 0x2c, 0xca,
+ 0x78, 0xec, 0x52, 0xdb, 0xc3, 0x4e, 0xda, 0x5b,
+ 0x82, 0x2e, 0xc1, 0xd8, 0x0f, 0xc2, 0x1b, 0xd0 },
+ { 0x9e, 0xe3, 0xe3, 0xe7, 0xe9, 0x00, 0xf1, 0xe1,
+ 0x1d, 0x30, 0x8c, 0x4b, 0x2b, 0x30, 0x76, 0xd2,
+ 0x72, 0xcf, 0x70, 0x12, 0x4f, 0x9f, 0x51, 0xe1,
+ 0xda, 0x60, 0xf3, 0x78, 0x46, 0xcd, 0xd2, 0xf4 },
+ { 0x70, 0xea, 0x3b, 0x01, 0x76, 0x92, 0x7d, 0x90,
+ 0x96, 0xa1, 0x85, 0x08, 0xcd, 0x12, 0x3a, 0x29,
+ 0x03, 0x25, 0x92, 0x0a, 0x9d, 0x00, 0xa8, 0x9b,
+ 0x5d, 0xe0, 0x42, 0x73, 0xfb, 0xc7, 0x6b, 0x85 },
+ { 0x67, 0xde, 0x25, 0xc0, 0x2a, 0x4a, 0xab, 0xa2,
+ 0x3b, 0xdc, 0x97, 0x3c, 0x8b, 0xb0, 0xb5, 0x79,
+ 0x6d, 0x47, 0xcc, 0x06, 0x59, 0xd4, 0x3d, 0xff,
+ 0x1f, 0x97, 0xde, 0x17, 0x49, 0x63, 0xb6, 0x8e },
+ { 0xb2, 0x16, 0x8e, 0x4e, 0x0f, 0x18, 0xb0, 0xe6,
+ 0x41, 0x00, 0xb5, 0x17, 0xed, 0x95, 0x25, 0x7d,
+ 0x73, 0xf0, 0x62, 0x0d, 0xf8, 0x85, 0xc1, 0x3d,
+ 0x2e, 0xcf, 0x79, 0x36, 0x7b, 0x38, 0x4c, 0xee },
+ { 0x2e, 0x7d, 0xec, 0x24, 0x28, 0x85, 0x3b, 0x2c,
+ 0x71, 0x76, 0x07, 0x45, 0x54, 0x1f, 0x7a, 0xfe,
+ 0x98, 0x25, 0xb5, 0xdd, 0x77, 0xdf, 0x06, 0x51,
+ 0x1d, 0x84, 0x41, 0xa9, 0x4b, 0xac, 0xc9, 0x27 },
+ { 0xca, 0x9f, 0xfa, 0xc4, 0xc4, 0x3f, 0x0b, 0x48,
+ 0x46, 0x1d, 0xc5, 0xc2, 0x63, 0xbe, 0xa3, 0xf6,
+ 0xf0, 0x06, 0x11, 0xce, 0xac, 0xab, 0xf6, 0xf8,
+ 0x95, 0xba, 0x2b, 0x01, 0x01, 0xdb, 0xb6, 0x8d },
+ { 0x74, 0x10, 0xd4, 0x2d, 0x8f, 0xd1, 0xd5, 0xe9,
+ 0xd2, 0xf5, 0x81, 0x5c, 0xb9, 0x34, 0x17, 0x99,
+ 0x88, 0x28, 0xef, 0x3c, 0x42, 0x30, 0xbf, 0xbd,
+ 0x41, 0x2d, 0xf0, 0xa4, 0xa7, 0xa2, 0x50, 0x7a },
+ { 0x50, 0x10, 0xf6, 0x84, 0x51, 0x6d, 0xcc, 0xd0,
+ 0xb6, 0xee, 0x08, 0x52, 0xc2, 0x51, 0x2b, 0x4d,
+ 0xc0, 0x06, 0x6c, 0xf0, 0xd5, 0x6f, 0x35, 0x30,
+ 0x29, 0x78, 0xdb, 0x8a, 0xe3, 0x2c, 0x6a, 0x81 },
+ { 0xac, 0xaa, 0xb5, 0x85, 0xf7, 0xb7, 0x9b, 0x71,
+ 0x99, 0x35, 0xce, 0xb8, 0x95, 0x23, 0xdd, 0xc5,
+ 0x48, 0x27, 0xf7, 0x5c, 0x56, 0x88, 0x38, 0x56,
+ 0x15, 0x4a, 0x56, 0xcd, 0xcd, 0x5e, 0xe9, 0x88 },
+ { 0x66, 0x6d, 0xe5, 0xd1, 0x44, 0x0f, 0xee, 0x73,
+ 0x31, 0xaa, 0xf0, 0x12, 0x3a, 0x62, 0xef, 0x2d,
+ 0x8b, 0xa5, 0x74, 0x53, 0xa0, 0x76, 0x96, 0x35,
+ 0xac, 0x6c, 0xd0, 0x1e, 0x63, 0x3f, 0x77, 0x12 },
+ { 0xa6, 0xf9, 0x86, 0x58, 0xf6, 0xea, 0xba, 0xf9,
+ 0x02, 0xd8, 0xb3, 0x87, 0x1a, 0x4b, 0x10, 0x1d,
+ 0x16, 0x19, 0x6e, 0x8a, 0x4b, 0x24, 0x1e, 0x15,
+ 0x58, 0xfe, 0x29, 0x96, 0x6e, 0x10, 0x3e, 0x8d },
+ { 0x89, 0x15, 0x46, 0xa8, 0xb2, 0x9f, 0x30, 0x47,
+ 0xdd, 0xcf, 0xe5, 0xb0, 0x0e, 0x45, 0xfd, 0x55,
+ 0x75, 0x63, 0x73, 0x10, 0x5e, 0xa8, 0x63, 0x7d,
+ 0xfc, 0xff, 0x54, 0x7b, 0x6e, 0xa9, 0x53, 0x5f },
+ { 0x18, 0xdf, 0xbc, 0x1a, 0xc5, 0xd2, 0x5b, 0x07,
+ 0x61, 0x13, 0x7d, 0xbd, 0x22, 0xc1, 0x7c, 0x82,
+ 0x9d, 0x0f, 0x0e, 0xf1, 0xd8, 0x23, 0x44, 0xe9,
+ 0xc8, 0x9c, 0x28, 0x66, 0x94, 0xda, 0x24, 0xe8 },
+ { 0xb5, 0x4b, 0x9b, 0x67, 0xf8, 0xfe, 0xd5, 0x4b,
+ 0xbf, 0x5a, 0x26, 0x66, 0xdb, 0xdf, 0x4b, 0x23,
+ 0xcf, 0xf1, 0xd1, 0xb6, 0xf4, 0xaf, 0xc9, 0x85,
+ 0xb2, 0xe6, 0xd3, 0x30, 0x5a, 0x9f, 0xf8, 0x0f },
+ { 0x7d, 0xb4, 0x42, 0xe1, 0x32, 0xba, 0x59, 0xbc,
+ 0x12, 0x89, 0xaa, 0x98, 0xb0, 0xd3, 0xe8, 0x06,
+ 0x00, 0x4f, 0x8e, 0xc1, 0x28, 0x11, 0xaf, 0x1e,
+ 0x2e, 0x33, 0xc6, 0x9b, 0xfd, 0xe7, 0x29, 0xe1 },
+ { 0x25, 0x0f, 0x37, 0xcd, 0xc1, 0x5e, 0x81, 0x7d,
+ 0x2f, 0x16, 0x0d, 0x99, 0x56, 0xc7, 0x1f, 0xe3,
+ 0xeb, 0x5d, 0xb7, 0x45, 0x56, 0xe4, 0xad, 0xf9,
+ 0xa4, 0xff, 0xaf, 0xba, 0x74, 0x01, 0x03, 0x96 },
+ { 0x4a, 0xb8, 0xa3, 0xdd, 0x1d, 0xdf, 0x8a, 0xd4,
+ 0x3d, 0xab, 0x13, 0xa2, 0x7f, 0x66, 0xa6, 0x54,
+ 0x4f, 0x29, 0x05, 0x97, 0xfa, 0x96, 0x04, 0x0e,
+ 0x0e, 0x1d, 0xb9, 0x26, 0x3a, 0xa4, 0x79, 0xf8 },
+ { 0xee, 0x61, 0x72, 0x7a, 0x07, 0x66, 0xdf, 0x93,
+ 0x9c, 0xcd, 0xc8, 0x60, 0x33, 0x40, 0x44, 0xc7,
+ 0x9a, 0x3c, 0x9b, 0x15, 0x62, 0x00, 0xbc, 0x3a,
+ 0xa3, 0x29, 0x73, 0x48, 0x3d, 0x83, 0x41, 0xae },
+ { 0x3f, 0x68, 0xc7, 0xec, 0x63, 0xac, 0x11, 0xeb,
+ 0xb9, 0x8f, 0x94, 0xb3, 0x39, 0xb0, 0x5c, 0x10,
+ 0x49, 0x84, 0xfd, 0xa5, 0x01, 0x03, 0x06, 0x01,
+ 0x44, 0xe5, 0xa2, 0xbf, 0xcc, 0xc9, 0xda, 0x95 },
+ { 0x05, 0x6f, 0x29, 0x81, 0x6b, 0x8a, 0xf8, 0xf5,
+ 0x66, 0x82, 0xbc, 0x4d, 0x7c, 0xf0, 0x94, 0x11,
+ 0x1d, 0xa7, 0x73, 0x3e, 0x72, 0x6c, 0xd1, 0x3d,
+ 0x6b, 0x3e, 0x8e, 0xa0, 0x3e, 0x92, 0xa0, 0xd5 },
+ { 0xf5, 0xec, 0x43, 0xa2, 0x8a, 0xcb, 0xef, 0xf1,
+ 0xf3, 0x31, 0x8a, 0x5b, 0xca, 0xc7, 0xc6, 0x6d,
+ 0xdb, 0x52, 0x30, 0xb7, 0x9d, 0xb2, 0xd1, 0x05,
+ 0xbc, 0xbe, 0x15, 0xf3, 0xc1, 0x14, 0x8d, 0x69 },
+ { 0x2a, 0x69, 0x60, 0xad, 0x1d, 0x8d, 0xd5, 0x47,
+ 0x55, 0x5c, 0xfb, 0xd5, 0xe4, 0x60, 0x0f, 0x1e,
+ 0xaa, 0x1c, 0x8e, 0xda, 0x34, 0xde, 0x03, 0x74,
+ 0xec, 0x4a, 0x26, 0xea, 0xaa, 0xa3, 0x3b, 0x4e },
+ { 0xdc, 0xc1, 0xea, 0x7b, 0xaa, 0xb9, 0x33, 0x84,
+ 0xf7, 0x6b, 0x79, 0x68, 0x66, 0x19, 0x97, 0x54,
+ 0x74, 0x2f, 0x7b, 0x96, 0xd6, 0xb4, 0xc1, 0x20,
+ 0x16, 0x5c, 0x04, 0xa6, 0xc4, 0xf5, 0xce, 0x10 },
+ { 0x13, 0xd5, 0xdf, 0x17, 0x92, 0x21, 0x37, 0x9c,
+ 0x6a, 0x78, 0xc0, 0x7c, 0x79, 0x3f, 0xf5, 0x34,
+ 0x87, 0xca, 0xe6, 0xbf, 0x9f, 0xe8, 0x82, 0x54,
+ 0x1a, 0xb0, 0xe7, 0x35, 0xe3, 0xea, 0xda, 0x3b },
+ { 0x8c, 0x59, 0xe4, 0x40, 0x76, 0x41, 0xa0, 0x1e,
+ 0x8f, 0xf9, 0x1f, 0x99, 0x80, 0xdc, 0x23, 0x6f,
+ 0x4e, 0xcd, 0x6f, 0xcf, 0x52, 0x58, 0x9a, 0x09,
+ 0x9a, 0x96, 0x16, 0x33, 0x96, 0x77, 0x14, 0xe1 },
+ { 0x83, 0x3b, 0x1a, 0xc6, 0xa2, 0x51, 0xfd, 0x08,
+ 0xfd, 0x6d, 0x90, 0x8f, 0xea, 0x2a, 0x4e, 0xe1,
+ 0xe0, 0x40, 0xbc, 0xa9, 0x3f, 0xc1, 0xa3, 0x8e,
+ 0xc3, 0x82, 0x0e, 0x0c, 0x10, 0xbd, 0x82, 0xea },
+ { 0xa2, 0x44, 0xf9, 0x27, 0xf3, 0xb4, 0x0b, 0x8f,
+ 0x6c, 0x39, 0x15, 0x70, 0xc7, 0x65, 0x41, 0x8f,
+ 0x2f, 0x6e, 0x70, 0x8e, 0xac, 0x90, 0x06, 0xc5,
+ 0x1a, 0x7f, 0xef, 0xf4, 0xaf, 0x3b, 0x2b, 0x9e },
+ { 0x3d, 0x99, 0xed, 0x95, 0x50, 0xcf, 0x11, 0x96,
+ 0xe6, 0xc4, 0xd2, 0x0c, 0x25, 0x96, 0x20, 0xf8,
+ 0x58, 0xc3, 0xd7, 0x03, 0x37, 0x4c, 0x12, 0x8c,
+ 0xe7, 0xb5, 0x90, 0x31, 0x0c, 0x83, 0x04, 0x6d },
+ { 0x2b, 0x35, 0xc4, 0x7d, 0x7b, 0x87, 0x76, 0x1f,
+ 0x0a, 0xe4, 0x3a, 0xc5, 0x6a, 0xc2, 0x7b, 0x9f,
+ 0x25, 0x83, 0x03, 0x67, 0xb5, 0x95, 0xbe, 0x8c,
+ 0x24, 0x0e, 0x94, 0x60, 0x0c, 0x6e, 0x33, 0x12 },
+ { 0x5d, 0x11, 0xed, 0x37, 0xd2, 0x4d, 0xc7, 0x67,
+ 0x30, 0x5c, 0xb7, 0xe1, 0x46, 0x7d, 0x87, 0xc0,
+ 0x65, 0xac, 0x4b, 0xc8, 0xa4, 0x26, 0xde, 0x38,
+ 0x99, 0x1f, 0xf5, 0x9a, 0xa8, 0x73, 0x5d, 0x02 },
+ { 0xb8, 0x36, 0x47, 0x8e, 0x1c, 0xa0, 0x64, 0x0d,
+ 0xce, 0x6f, 0xd9, 0x10, 0xa5, 0x09, 0x62, 0x72,
+ 0xc8, 0x33, 0x09, 0x90, 0xcd, 0x97, 0x86, 0x4a,
+ 0xc2, 0xbf, 0x14, 0xef, 0x6b, 0x23, 0x91, 0x4a },
+ { 0x91, 0x00, 0xf9, 0x46, 0xd6, 0xcc, 0xde, 0x3a,
+ 0x59, 0x7f, 0x90, 0xd3, 0x9f, 0xc1, 0x21, 0x5b,
+ 0xad, 0xdc, 0x74, 0x13, 0x64, 0x3d, 0x85, 0xc2,
+ 0x1c, 0x3e, 0xee, 0x5d, 0x2d, 0xd3, 0x28, 0x94 },
+ { 0xda, 0x70, 0xee, 0xdd, 0x23, 0xe6, 0x63, 0xaa,
+ 0x1a, 0x74, 0xb9, 0x76, 0x69, 0x35, 0xb4, 0x79,
+ 0x22, 0x2a, 0x72, 0xaf, 0xba, 0x5c, 0x79, 0x51,
+ 0x58, 0xda, 0xd4, 0x1a, 0x3b, 0xd7, 0x7e, 0x40 },
+ { 0xf0, 0x67, 0xed, 0x6a, 0x0d, 0xbd, 0x43, 0xaa,
+ 0x0a, 0x92, 0x54, 0xe6, 0x9f, 0xd6, 0x6b, 0xdd,
+ 0x8a, 0xcb, 0x87, 0xde, 0x93, 0x6c, 0x25, 0x8c,
+ 0xfb, 0x02, 0x28, 0x5f, 0x2c, 0x11, 0xfa, 0x79 },
+ { 0x71, 0x5c, 0x99, 0xc7, 0xd5, 0x75, 0x80, 0xcf,
+ 0x97, 0x53, 0xb4, 0xc1, 0xd7, 0x95, 0xe4, 0x5a,
+ 0x83, 0xfb, 0xb2, 0x28, 0xc0, 0xd3, 0x6f, 0xbe,
+ 0x20, 0xfa, 0xf3, 0x9b, 0xdd, 0x6d, 0x4e, 0x85 },
+ { 0xe4, 0x57, 0xd6, 0xad, 0x1e, 0x67, 0xcb, 0x9b,
+ 0xbd, 0x17, 0xcb, 0xd6, 0x98, 0xfa, 0x6d, 0x7d,
+ 0xae, 0x0c, 0x9b, 0x7a, 0xd6, 0xcb, 0xd6, 0x53,
+ 0x96, 0x34, 0xe3, 0x2a, 0x71, 0x9c, 0x84, 0x92 },
+ { 0xec, 0xe3, 0xea, 0x81, 0x03, 0xe0, 0x24, 0x83,
+ 0xc6, 0x4a, 0x70, 0xa4, 0xbd, 0xce, 0xe8, 0xce,
+ 0xb6, 0x27, 0x8f, 0x25, 0x33, 0xf3, 0xf4, 0x8d,
+ 0xbe, 0xed, 0xfb, 0xa9, 0x45, 0x31, 0xd4, 0xae },
+ { 0x38, 0x8a, 0xa5, 0xd3, 0x66, 0x7a, 0x97, 0xc6,
+ 0x8d, 0x3d, 0x56, 0xf8, 0xf3, 0xee, 0x8d, 0x3d,
+ 0x36, 0x09, 0x1f, 0x17, 0xfe, 0x5d, 0x1b, 0x0d,
+ 0x5d, 0x84, 0xc9, 0x3b, 0x2f, 0xfe, 0x40, 0xbd },
+ { 0x8b, 0x6b, 0x31, 0xb9, 0xad, 0x7c, 0x3d, 0x5c,
+ 0xd8, 0x4b, 0xf9, 0x89, 0x47, 0xb9, 0xcd, 0xb5,
+ 0x9d, 0xf8, 0xa2, 0x5f, 0xf7, 0x38, 0x10, 0x10,
+ 0x13, 0xbe, 0x4f, 0xd6, 0x5e, 0x1d, 0xd1, 0xa3 },
+ { 0x06, 0x62, 0x91, 0xf6, 0xbb, 0xd2, 0x5f, 0x3c,
+ 0x85, 0x3d, 0xb7, 0xd8, 0xb9, 0x5c, 0x9a, 0x1c,
+ 0xfb, 0x9b, 0xf1, 0xc1, 0xc9, 0x9f, 0xb9, 0x5a,
+ 0x9b, 0x78, 0x69, 0xd9, 0x0f, 0x1c, 0x29, 0x03 },
+ { 0xa7, 0x07, 0xef, 0xbc, 0xcd, 0xce, 0xed, 0x42,
+ 0x96, 0x7a, 0x66, 0xf5, 0x53, 0x9b, 0x93, 0xed,
+ 0x75, 0x60, 0xd4, 0x67, 0x30, 0x40, 0x16, 0xc4,
+ 0x78, 0x0d, 0x77, 0x55, 0xa5, 0x65, 0xd4, 0xc4 },
+ { 0x38, 0xc5, 0x3d, 0xfb, 0x70, 0xbe, 0x7e, 0x79,
+ 0x2b, 0x07, 0xa6, 0xa3, 0x5b, 0x8a, 0x6a, 0x0a,
+ 0xba, 0x02, 0xc5, 0xc5, 0xf3, 0x8b, 0xaf, 0x5c,
+ 0x82, 0x3f, 0xdf, 0xd9, 0xe4, 0x2d, 0x65, 0x7e },
+ { 0xf2, 0x91, 0x13, 0x86, 0x50, 0x1d, 0x9a, 0xb9,
+ 0xd7, 0x20, 0xcf, 0x8a, 0xd1, 0x05, 0x03, 0xd5,
+ 0x63, 0x4b, 0xf4, 0xb7, 0xd1, 0x2b, 0x56, 0xdf,
+ 0xb7, 0x4f, 0xec, 0xc6, 0xe4, 0x09, 0x3f, 0x68 },
+ { 0xc6, 0xf2, 0xbd, 0xd5, 0x2b, 0x81, 0xe6, 0xe4,
+ 0xf6, 0x59, 0x5a, 0xbd, 0x4d, 0x7f, 0xb3, 0x1f,
+ 0x65, 0x11, 0x69, 0xd0, 0x0f, 0xf3, 0x26, 0x92,
+ 0x6b, 0x34, 0x94, 0x7b, 0x28, 0xa8, 0x39, 0x59 },
+ { 0x29, 0x3d, 0x94, 0xb1, 0x8c, 0x98, 0xbb, 0x32,
+ 0x23, 0x36, 0x6b, 0x8c, 0xe7, 0x4c, 0x28, 0xfb,
+ 0xdf, 0x28, 0xe1, 0xf8, 0x4a, 0x33, 0x50, 0xb0,
+ 0xeb, 0x2d, 0x18, 0x04, 0xa5, 0x77, 0x57, 0x9b },
+ { 0x2c, 0x2f, 0xa5, 0xc0, 0xb5, 0x15, 0x33, 0x16,
+ 0x5b, 0xc3, 0x75, 0xc2, 0x2e, 0x27, 0x81, 0x76,
+ 0x82, 0x70, 0xa3, 0x83, 0x98, 0x5d, 0x13, 0xbd,
+ 0x6b, 0x67, 0xb6, 0xfd, 0x67, 0xf8, 0x89, 0xeb },
+ { 0xca, 0xa0, 0x9b, 0x82, 0xb7, 0x25, 0x62, 0xe4,
+ 0x3f, 0x4b, 0x22, 0x75, 0xc0, 0x91, 0x91, 0x8e,
+ 0x62, 0x4d, 0x91, 0x16, 0x61, 0xcc, 0x81, 0x1b,
+ 0xb5, 0xfa, 0xec, 0x51, 0xf6, 0x08, 0x8e, 0xf7 },
+ { 0x24, 0x76, 0x1e, 0x45, 0xe6, 0x74, 0x39, 0x53,
+ 0x79, 0xfb, 0x17, 0x72, 0x9c, 0x78, 0xcb, 0x93,
+ 0x9e, 0x6f, 0x74, 0xc5, 0xdf, 0xfb, 0x9c, 0x96,
+ 0x1f, 0x49, 0x59, 0x82, 0xc3, 0xed, 0x1f, 0xe3 },
+ { 0x55, 0xb7, 0x0a, 0x82, 0x13, 0x1e, 0xc9, 0x48,
+ 0x88, 0xd7, 0xab, 0x54, 0xa7, 0xc5, 0x15, 0x25,
+ 0x5c, 0x39, 0x38, 0xbb, 0x10, 0xbc, 0x78, 0x4d,
+ 0xc9, 0xb6, 0x7f, 0x07, 0x6e, 0x34, 0x1a, 0x73 },
+ { 0x6a, 0xb9, 0x05, 0x7b, 0x97, 0x7e, 0xbc, 0x3c,
+ 0xa4, 0xd4, 0xce, 0x74, 0x50, 0x6c, 0x25, 0xcc,
+ 0xcd, 0xc5, 0x66, 0x49, 0x7c, 0x45, 0x0b, 0x54,
+ 0x15, 0xa3, 0x94, 0x86, 0xf8, 0x65, 0x7a, 0x03 },
+ { 0x24, 0x06, 0x6d, 0xee, 0xe0, 0xec, 0xee, 0x15,
+ 0xa4, 0x5f, 0x0a, 0x32, 0x6d, 0x0f, 0x8d, 0xbc,
+ 0x79, 0x76, 0x1e, 0xbb, 0x93, 0xcf, 0x8c, 0x03,
+ 0x77, 0xaf, 0x44, 0x09, 0x78, 0xfc, 0xf9, 0x94 },
+ { 0x20, 0x00, 0x0d, 0x3f, 0x66, 0xba, 0x76, 0x86,
+ 0x0d, 0x5a, 0x95, 0x06, 0x88, 0xb9, 0xaa, 0x0d,
+ 0x76, 0xcf, 0xea, 0x59, 0xb0, 0x05, 0xd8, 0x59,
+ 0x91, 0x4b, 0x1a, 0x46, 0x65, 0x3a, 0x93, 0x9b },
+ { 0xb9, 0x2d, 0xaa, 0x79, 0x60, 0x3e, 0x3b, 0xdb,
+ 0xc3, 0xbf, 0xe0, 0xf4, 0x19, 0xe4, 0x09, 0xb2,
+ 0xea, 0x10, 0xdc, 0x43, 0x5b, 0xee, 0xfe, 0x29,
+ 0x59, 0xda, 0x16, 0x89, 0x5d, 0x5d, 0xca, 0x1c },
+ { 0xe9, 0x47, 0x94, 0x87, 0x05, 0xb2, 0x06, 0xd5,
+ 0x72, 0xb0, 0xe8, 0xf6, 0x2f, 0x66, 0xa6, 0x55,
+ 0x1c, 0xbd, 0x6b, 0xc3, 0x05, 0xd2, 0x6c, 0xe7,
+ 0x53, 0x9a, 0x12, 0xf9, 0xaa, 0xdf, 0x75, 0x71 },
+ { 0x3d, 0x67, 0xc1, 0xb3, 0xf9, 0xb2, 0x39, 0x10,
+ 0xe3, 0xd3, 0x5e, 0x6b, 0x0f, 0x2c, 0xcf, 0x44,
+ 0xa0, 0xb5, 0x40, 0xa4, 0x5c, 0x18, 0xba, 0x3c,
+ 0x36, 0x26, 0x4d, 0xd4, 0x8e, 0x96, 0xaf, 0x6a },
+ { 0xc7, 0x55, 0x8b, 0xab, 0xda, 0x04, 0xbc, 0xcb,
+ 0x76, 0x4d, 0x0b, 0xbf, 0x33, 0x58, 0x42, 0x51,
+ 0x41, 0x90, 0x2d, 0x22, 0x39, 0x1d, 0x9f, 0x8c,
+ 0x59, 0x15, 0x9f, 0xec, 0x9e, 0x49, 0xb1, 0x51 },
+ { 0x0b, 0x73, 0x2b, 0xb0, 0x35, 0x67, 0x5a, 0x50,
+ 0xff, 0x58, 0xf2, 0xc2, 0x42, 0xe4, 0x71, 0x0a,
+ 0xec, 0xe6, 0x46, 0x70, 0x07, 0x9c, 0x13, 0x04,
+ 0x4c, 0x79, 0xc9, 0xb7, 0x49, 0x1f, 0x70, 0x00 },
+ { 0xd1, 0x20, 0xb5, 0xef, 0x6d, 0x57, 0xeb, 0xf0,
+ 0x6e, 0xaf, 0x96, 0xbc, 0x93, 0x3c, 0x96, 0x7b,
+ 0x16, 0xcb, 0xe6, 0xe2, 0xbf, 0x00, 0x74, 0x1c,
+ 0x30, 0xaa, 0x1c, 0x54, 0xba, 0x64, 0x80, 0x1f },
+ { 0x58, 0xd2, 0x12, 0xad, 0x6f, 0x58, 0xae, 0xf0,
+ 0xf8, 0x01, 0x16, 0xb4, 0x41, 0xe5, 0x7f, 0x61,
+ 0x95, 0xbf, 0xef, 0x26, 0xb6, 0x14, 0x63, 0xed,
+ 0xec, 0x11, 0x83, 0xcd, 0xb0, 0x4f, 0xe7, 0x6d },
+ { 0xb8, 0x83, 0x6f, 0x51, 0xd1, 0xe2, 0x9b, 0xdf,
+ 0xdb, 0xa3, 0x25, 0x56, 0x53, 0x60, 0x26, 0x8b,
+ 0x8f, 0xad, 0x62, 0x74, 0x73, 0xed, 0xec, 0xef,
+ 0x7e, 0xae, 0xfe, 0xe8, 0x37, 0xc7, 0x40, 0x03 },
+ { 0xc5, 0x47, 0xa3, 0xc1, 0x24, 0xae, 0x56, 0x85,
+ 0xff, 0xa7, 0xb8, 0xed, 0xaf, 0x96, 0xec, 0x86,
+ 0xf8, 0xb2, 0xd0, 0xd5, 0x0c, 0xee, 0x8b, 0xe3,
+ 0xb1, 0xf0, 0xc7, 0x67, 0x63, 0x06, 0x9d, 0x9c },
+ { 0x5d, 0x16, 0x8b, 0x76, 0x9a, 0x2f, 0x67, 0x85,
+ 0x3d, 0x62, 0x95, 0xf7, 0x56, 0x8b, 0xe4, 0x0b,
+ 0xb7, 0xa1, 0x6b, 0x8d, 0x65, 0xba, 0x87, 0x63,
+ 0x5d, 0x19, 0x78, 0xd2, 0xab, 0x11, 0xba, 0x2a },
+ { 0xa2, 0xf6, 0x75, 0xdc, 0x73, 0x02, 0x63, 0x8c,
+ 0xb6, 0x02, 0x01, 0x06, 0x4c, 0xa5, 0x50, 0x77,
+ 0x71, 0x4d, 0x71, 0xfe, 0x09, 0x6a, 0x31, 0x5f,
+ 0x2f, 0xe7, 0x40, 0x12, 0x77, 0xca, 0xa5, 0xaf },
+ { 0xc8, 0xaa, 0xb5, 0xcd, 0x01, 0x60, 0xae, 0x78,
+ 0xcd, 0x2e, 0x8a, 0xc5, 0xfb, 0x0e, 0x09, 0x3c,
+ 0xdb, 0x5c, 0x4b, 0x60, 0x52, 0xa0, 0xa9, 0x7b,
+ 0xb0, 0x42, 0x16, 0x82, 0x6f, 0xa7, 0xa4, 0x37 },
+ { 0xff, 0x68, 0xca, 0x40, 0x35, 0xbf, 0xeb, 0x43,
+ 0xfb, 0xf1, 0x45, 0xfd, 0xdd, 0x5e, 0x43, 0xf1,
+ 0xce, 0xa5, 0x4f, 0x11, 0xf7, 0xbe, 0xe1, 0x30,
+ 0x58, 0xf0, 0x27, 0x32, 0x9a, 0x4a, 0x5f, 0xa4 },
+ { 0x1d, 0x4e, 0x54, 0x87, 0xae, 0x3c, 0x74, 0x0f,
+ 0x2b, 0xa6, 0xe5, 0x41, 0xac, 0x91, 0xbc, 0x2b,
+ 0xfc, 0xd2, 0x99, 0x9c, 0x51, 0x8d, 0x80, 0x7b,
+ 0x42, 0x67, 0x48, 0x80, 0x3a, 0x35, 0x0f, 0xd4 },
+ { 0x6d, 0x24, 0x4e, 0x1a, 0x06, 0xce, 0x4e, 0xf5,
+ 0x78, 0xdd, 0x0f, 0x63, 0xaf, 0xf0, 0x93, 0x67,
+ 0x06, 0x73, 0x51, 0x19, 0xca, 0x9c, 0x8d, 0x22,
+ 0xd8, 0x6c, 0x80, 0x14, 0x14, 0xab, 0x97, 0x41 },
+ { 0xde, 0xcf, 0x73, 0x29, 0xdb, 0xcc, 0x82, 0x7b,
+ 0x8f, 0xc5, 0x24, 0xc9, 0x43, 0x1e, 0x89, 0x98,
+ 0x02, 0x9e, 0xce, 0x12, 0xce, 0x93, 0xb7, 0xb2,
+ 0xf3, 0xe7, 0x69, 0xa9, 0x41, 0xfb, 0x8c, 0xea },
+ { 0x2f, 0xaf, 0xcc, 0x0f, 0x2e, 0x63, 0xcb, 0xd0,
+ 0x77, 0x55, 0xbe, 0x7b, 0x75, 0xec, 0xea, 0x0a,
+ 0xdf, 0xf9, 0xaa, 0x5e, 0xde, 0x2a, 0x52, 0xfd,
+ 0xab, 0x4d, 0xfd, 0x03, 0x74, 0xcd, 0x48, 0x3f },
+ { 0xaa, 0x85, 0x01, 0x0d, 0xd4, 0x6a, 0x54, 0x6b,
+ 0x53, 0x5e, 0xf4, 0xcf, 0x5f, 0x07, 0xd6, 0x51,
+ 0x61, 0xe8, 0x98, 0x28, 0xf3, 0xa7, 0x7d, 0xb7,
+ 0xb9, 0xb5, 0x6f, 0x0d, 0xf5, 0x9a, 0xae, 0x45 },
+ { 0x07, 0xe8, 0xe1, 0xee, 0x73, 0x2c, 0xb0, 0xd3,
+ 0x56, 0xc9, 0xc0, 0xd1, 0x06, 0x9c, 0x89, 0xd1,
+ 0x7a, 0xdf, 0x6a, 0x9a, 0x33, 0x4f, 0x74, 0x5e,
+ 0xc7, 0x86, 0x73, 0x32, 0x54, 0x8c, 0xa8, 0xe9 },
+ { 0x0e, 0x01, 0xe8, 0x1c, 0xad, 0xa8, 0x16, 0x2b,
+ 0xfd, 0x5f, 0x8a, 0x8c, 0x81, 0x8a, 0x6c, 0x69,
+ 0xfe, 0xdf, 0x02, 0xce, 0xb5, 0x20, 0x85, 0x23,
+ 0xcb, 0xe5, 0x31, 0x3b, 0x89, 0xca, 0x10, 0x53 },
+ { 0x6b, 0xb6, 0xc6, 0x47, 0x26, 0x55, 0x08, 0x43,
+ 0x99, 0x85, 0x2e, 0x00, 0x24, 0x9f, 0x8c, 0xb2,
+ 0x47, 0x89, 0x6d, 0x39, 0x2b, 0x02, 0xd7, 0x3b,
+ 0x7f, 0x0d, 0xd8, 0x18, 0xe1, 0xe2, 0x9b, 0x07 },
+ { 0x42, 0xd4, 0x63, 0x6e, 0x20, 0x60, 0xf0, 0x8f,
+ 0x41, 0xc8, 0x82, 0xe7, 0x6b, 0x39, 0x6b, 0x11,
+ 0x2e, 0xf6, 0x27, 0xcc, 0x24, 0xc4, 0x3d, 0xd5,
+ 0xf8, 0x3a, 0x1d, 0x1a, 0x7e, 0xad, 0x71, 0x1a },
+ { 0x48, 0x58, 0xc9, 0xa1, 0x88, 0xb0, 0x23, 0x4f,
+ 0xb9, 0xa8, 0xd4, 0x7d, 0x0b, 0x41, 0x33, 0x65,
+ 0x0a, 0x03, 0x0b, 0xd0, 0x61, 0x1b, 0x87, 0xc3,
+ 0x89, 0x2e, 0x94, 0x95, 0x1f, 0x8d, 0xf8, 0x52 },
+ { 0x3f, 0xab, 0x3e, 0x36, 0x98, 0x8d, 0x44, 0x5a,
+ 0x51, 0xc8, 0x78, 0x3e, 0x53, 0x1b, 0xe3, 0xa0,
+ 0x2b, 0xe4, 0x0c, 0xd0, 0x47, 0x96, 0xcf, 0xb6,
+ 0x1d, 0x40, 0x34, 0x74, 0x42, 0xd3, 0xf7, 0x94 },
+ { 0xeb, 0xab, 0xc4, 0x96, 0x36, 0xbd, 0x43, 0x3d,
+ 0x2e, 0xc8, 0xf0, 0xe5, 0x18, 0x73, 0x2e, 0xf8,
+ 0xfa, 0x21, 0xd4, 0xd0, 0x71, 0xcc, 0x3b, 0xc4,
+ 0x6c, 0xd7, 0x9f, 0xa3, 0x8a, 0x28, 0xb8, 0x10 },
+ { 0xa1, 0xd0, 0x34, 0x35, 0x23, 0xb8, 0x93, 0xfc,
+ 0xa8, 0x4f, 0x47, 0xfe, 0xb4, 0xa6, 0x4d, 0x35,
+ 0x0a, 0x17, 0xd8, 0xee, 0xf5, 0x49, 0x7e, 0xce,
+ 0x69, 0x7d, 0x02, 0xd7, 0x91, 0x78, 0xb5, 0x91 },
+ { 0x26, 0x2e, 0xbf, 0xd9, 0x13, 0x0b, 0x7d, 0x28,
+ 0x76, 0x0d, 0x08, 0xef, 0x8b, 0xfd, 0x3b, 0x86,
+ 0xcd, 0xd3, 0xb2, 0x11, 0x3d, 0x2c, 0xae, 0xf7,
+ 0xea, 0x95, 0x1a, 0x30, 0x3d, 0xfa, 0x38, 0x46 },
+ { 0xf7, 0x61, 0x58, 0xed, 0xd5, 0x0a, 0x15, 0x4f,
+ 0xa7, 0x82, 0x03, 0xed, 0x23, 0x62, 0x93, 0x2f,
+ 0xcb, 0x82, 0x53, 0xaa, 0xe3, 0x78, 0x90, 0x3e,
+ 0xde, 0xd1, 0xe0, 0x3f, 0x70, 0x21, 0xa2, 0x57 },
+ { 0x26, 0x17, 0x8e, 0x95, 0x0a, 0xc7, 0x22, 0xf6,
+ 0x7a, 0xe5, 0x6e, 0x57, 0x1b, 0x28, 0x4c, 0x02,
+ 0x07, 0x68, 0x4a, 0x63, 0x34, 0xa1, 0x77, 0x48,
+ 0xa9, 0x4d, 0x26, 0x0b, 0xc5, 0xf5, 0x52, 0x74 },
+ { 0xc3, 0x78, 0xd1, 0xe4, 0x93, 0xb4, 0x0e, 0xf1,
+ 0x1f, 0xe6, 0xa1, 0x5d, 0x9c, 0x27, 0x37, 0xa3,
+ 0x78, 0x09, 0x63, 0x4c, 0x5a, 0xba, 0xd5, 0xb3,
+ 0x3d, 0x7e, 0x39, 0x3b, 0x4a, 0xe0, 0x5d, 0x03 },
+ { 0x98, 0x4b, 0xd8, 0x37, 0x91, 0x01, 0xbe, 0x8f,
+ 0xd8, 0x06, 0x12, 0xd8, 0xea, 0x29, 0x59, 0xa7,
+ 0x86, 0x5e, 0xc9, 0x71, 0x85, 0x23, 0x55, 0x01,
+ 0x07, 0xae, 0x39, 0x38, 0xdf, 0x32, 0x01, 0x1b },
+ { 0xc6, 0xf2, 0x5a, 0x81, 0x2a, 0x14, 0x48, 0x58,
+ 0xac, 0x5c, 0xed, 0x37, 0xa9, 0x3a, 0x9f, 0x47,
+ 0x59, 0xba, 0x0b, 0x1c, 0x0f, 0xdc, 0x43, 0x1d,
+ 0xce, 0x35, 0xf9, 0xec, 0x1f, 0x1f, 0x4a, 0x99 },
+ { 0x92, 0x4c, 0x75, 0xc9, 0x44, 0x24, 0xff, 0x75,
+ 0xe7, 0x4b, 0x8b, 0x4e, 0x94, 0x35, 0x89, 0x58,
+ 0xb0, 0x27, 0xb1, 0x71, 0xdf, 0x5e, 0x57, 0x89,
+ 0x9a, 0xd0, 0xd4, 0xda, 0xc3, 0x73, 0x53, 0xb6 },
+ { 0x0a, 0xf3, 0x58, 0x92, 0xa6, 0x3f, 0x45, 0x93,
+ 0x1f, 0x68, 0x46, 0xed, 0x19, 0x03, 0x61, 0xcd,
+ 0x07, 0x30, 0x89, 0xe0, 0x77, 0x16, 0x57, 0x14,
+ 0xb5, 0x0b, 0x81, 0xa2, 0xe3, 0xdd, 0x9b, 0xa1 },
+ { 0xcc, 0x80, 0xce, 0xfb, 0x26, 0xc3, 0xb2, 0xb0,
+ 0xda, 0xef, 0x23, 0x3e, 0x60, 0x6d, 0x5f, 0xfc,
+ 0x80, 0xfa, 0x17, 0x42, 0x7d, 0x18, 0xe3, 0x04,
+ 0x89, 0x67, 0x3e, 0x06, 0xef, 0x4b, 0x87, 0xf7 },
+ { 0xc2, 0xf8, 0xc8, 0x11, 0x74, 0x47, 0xf3, 0x97,
+ 0x8b, 0x08, 0x18, 0xdc, 0xf6, 0xf7, 0x01, 0x16,
+ 0xac, 0x56, 0xfd, 0x18, 0x4d, 0xd1, 0x27, 0x84,
+ 0x94, 0xe1, 0x03, 0xfc, 0x6d, 0x74, 0xa8, 0x87 },
+ { 0xbd, 0xec, 0xf6, 0xbf, 0xc1, 0xba, 0x0d, 0xf6,
+ 0xe8, 0x62, 0xc8, 0x31, 0x99, 0x22, 0x07, 0x79,
+ 0x6a, 0xcc, 0x79, 0x79, 0x68, 0x35, 0x88, 0x28,
+ 0xc0, 0x6e, 0x7a, 0x51, 0xe0, 0x90, 0x09, 0x8f },
+ { 0x24, 0xd1, 0xa2, 0x6e, 0x3d, 0xab, 0x02, 0xfe,
+ 0x45, 0x72, 0xd2, 0xaa, 0x7d, 0xbd, 0x3e, 0xc3,
+ 0x0f, 0x06, 0x93, 0xdb, 0x26, 0xf2, 0x73, 0xd0,
+ 0xab, 0x2c, 0xb0, 0xc1, 0x3b, 0x5e, 0x64, 0x51 },
+ { 0xec, 0x56, 0xf5, 0x8b, 0x09, 0x29, 0x9a, 0x30,
+ 0x0b, 0x14, 0x05, 0x65, 0xd7, 0xd3, 0xe6, 0x87,
+ 0x82, 0xb6, 0xe2, 0xfb, 0xeb, 0x4b, 0x7e, 0xa9,
+ 0x7a, 0xc0, 0x57, 0x98, 0x90, 0x61, 0xdd, 0x3f },
+ { 0x11, 0xa4, 0x37, 0xc1, 0xab, 0xa3, 0xc1, 0x19,
+ 0xdd, 0xfa, 0xb3, 0x1b, 0x3e, 0x8c, 0x84, 0x1d,
+ 0xee, 0xeb, 0x91, 0x3e, 0xf5, 0x7f, 0x7e, 0x48,
+ 0xf2, 0xc9, 0xcf, 0x5a, 0x28, 0xfa, 0x42, 0xbc },
+ { 0x53, 0xc7, 0xe6, 0x11, 0x4b, 0x85, 0x0a, 0x2c,
+ 0xb4, 0x96, 0xc9, 0xb3, 0xc6, 0x9a, 0x62, 0x3e,
+ 0xae, 0xa2, 0xcb, 0x1d, 0x33, 0xdd, 0x81, 0x7e,
+ 0x47, 0x65, 0xed, 0xaa, 0x68, 0x23, 0xc2, 0x28 },
+ { 0x15, 0x4c, 0x3e, 0x96, 0xfe, 0xe5, 0xdb, 0x14,
+ 0xf8, 0x77, 0x3e, 0x18, 0xaf, 0x14, 0x85, 0x79,
+ 0x13, 0x50, 0x9d, 0xa9, 0x99, 0xb4, 0x6c, 0xdd,
+ 0x3d, 0x4c, 0x16, 0x97, 0x60, 0xc8, 0x3a, 0xd2 },
+ { 0x40, 0xb9, 0x91, 0x6f, 0x09, 0x3e, 0x02, 0x7a,
+ 0x87, 0x86, 0x64, 0x18, 0x18, 0x92, 0x06, 0x20,
+ 0x47, 0x2f, 0xbc, 0xf6, 0x8f, 0x70, 0x1d, 0x1b,
+ 0x68, 0x06, 0x32, 0xe6, 0x99, 0x6b, 0xde, 0xd3 },
+ { 0x24, 0xc4, 0xcb, 0xba, 0x07, 0x11, 0x98, 0x31,
+ 0xa7, 0x26, 0xb0, 0x53, 0x05, 0xd9, 0x6d, 0xa0,
+ 0x2f, 0xf8, 0xb1, 0x48, 0xf0, 0xda, 0x44, 0x0f,
+ 0xe2, 0x33, 0xbc, 0xaa, 0x32, 0xc7, 0x2f, 0x6f },
+ { 0x5d, 0x20, 0x15, 0x10, 0x25, 0x00, 0x20, 0xb7,
+ 0x83, 0x68, 0x96, 0x88, 0xab, 0xbf, 0x8e, 0xcf,
+ 0x25, 0x94, 0xa9, 0x6a, 0x08, 0xf2, 0xbf, 0xec,
+ 0x6c, 0xe0, 0x57, 0x44, 0x65, 0xdd, 0xed, 0x71 },
+ { 0x04, 0x3b, 0x97, 0xe3, 0x36, 0xee, 0x6f, 0xdb,
+ 0xbe, 0x2b, 0x50, 0xf2, 0x2a, 0xf8, 0x32, 0x75,
+ 0xa4, 0x08, 0x48, 0x05, 0xd2, 0xd5, 0x64, 0x59,
+ 0x62, 0x45, 0x4b, 0x6c, 0x9b, 0x80, 0x53, 0xa0 },
+ { 0x56, 0x48, 0x35, 0xcb, 0xae, 0xa7, 0x74, 0x94,
+ 0x85, 0x68, 0xbe, 0x36, 0xcf, 0x52, 0xfc, 0xdd,
+ 0x83, 0x93, 0x4e, 0xb0, 0xa2, 0x75, 0x12, 0xdb,
+ 0xe3, 0xe2, 0xdb, 0x47, 0xb9, 0xe6, 0x63, 0x5a },
+ { 0xf2, 0x1c, 0x33, 0xf4, 0x7b, 0xde, 0x40, 0xa2,
+ 0xa1, 0x01, 0xc9, 0xcd, 0xe8, 0x02, 0x7a, 0xaf,
+ 0x61, 0xa3, 0x13, 0x7d, 0xe2, 0x42, 0x2b, 0x30,
+ 0x03, 0x5a, 0x04, 0xc2, 0x70, 0x89, 0x41, 0x83 },
+ { 0x9d, 0xb0, 0xef, 0x74, 0xe6, 0x6c, 0xbb, 0x84,
+ 0x2e, 0xb0, 0xe0, 0x73, 0x43, 0xa0, 0x3c, 0x5c,
+ 0x56, 0x7e, 0x37, 0x2b, 0x3f, 0x23, 0xb9, 0x43,
+ 0xc7, 0x88, 0xa4, 0xf2, 0x50, 0xf6, 0x78, 0x91 },
+ { 0xab, 0x8d, 0x08, 0x65, 0x5f, 0xf1, 0xd3, 0xfe,
+ 0x87, 0x58, 0xd5, 0x62, 0x23, 0x5f, 0xd2, 0x3e,
+ 0x7c, 0xf9, 0xdc, 0xaa, 0xd6, 0x58, 0x87, 0x2a,
+ 0x49, 0xe5, 0xd3, 0x18, 0x3b, 0x6c, 0xce, 0xbd },
+ { 0x6f, 0x27, 0xf7, 0x7e, 0x7b, 0xcf, 0x46, 0xa1,
+ 0xe9, 0x63, 0xad, 0xe0, 0x30, 0x97, 0x33, 0x54,
+ 0x30, 0x31, 0xdc, 0xcd, 0xd4, 0x7c, 0xaa, 0xc1,
+ 0x74, 0xd7, 0xd2, 0x7c, 0xe8, 0x07, 0x7e, 0x8b },
+ { 0xe3, 0xcd, 0x54, 0xda, 0x7e, 0x44, 0x4c, 0xaa,
+ 0x62, 0x07, 0x56, 0x95, 0x25, 0xa6, 0x70, 0xeb,
+ 0xae, 0x12, 0x78, 0xde, 0x4e, 0x3f, 0xe2, 0x68,
+ 0x4b, 0x3e, 0x33, 0xf5, 0xef, 0x90, 0xcc, 0x1b },
+ { 0xb2, 0xc3, 0xe3, 0x3a, 0x51, 0xd2, 0x2c, 0x4c,
+ 0x08, 0xfc, 0x09, 0x89, 0xc8, 0x73, 0xc9, 0xcc,
+ 0x41, 0x50, 0x57, 0x9b, 0x1e, 0x61, 0x63, 0xfa,
+ 0x69, 0x4a, 0xd5, 0x1d, 0x53, 0xd7, 0x12, 0xdc },
+ { 0xbe, 0x7f, 0xda, 0x98, 0x3e, 0x13, 0x18, 0x9b,
+ 0x4c, 0x77, 0xe0, 0xa8, 0x09, 0x20, 0xb6, 0xe0,
+ 0xe0, 0xea, 0x80, 0xc3, 0xb8, 0x4d, 0xbe, 0x7e,
+ 0x71, 0x17, 0xd2, 0x53, 0xf4, 0x81, 0x12, 0xf4 },
+ { 0xb6, 0x00, 0x8c, 0x28, 0xfa, 0xe0, 0x8a, 0xa4,
+ 0x27, 0xe5, 0xbd, 0x3a, 0xad, 0x36, 0xf1, 0x00,
+ 0x21, 0xf1, 0x6c, 0x77, 0xcf, 0xea, 0xbe, 0xd0,
+ 0x7f, 0x97, 0xcc, 0x7d, 0xc1, 0xf1, 0x28, 0x4a },
+ { 0x6e, 0x4e, 0x67, 0x60, 0xc5, 0x38, 0xf2, 0xe9,
+ 0x7b, 0x3a, 0xdb, 0xfb, 0xbc, 0xde, 0x57, 0xf8,
+ 0x96, 0x6b, 0x7e, 0xa8, 0xfc, 0xb5, 0xbf, 0x7e,
+ 0xfe, 0xc9, 0x13, 0xfd, 0x2a, 0x2b, 0x0c, 0x55 },
+ { 0x4a, 0xe5, 0x1f, 0xd1, 0x83, 0x4a, 0xa5, 0xbd,
+ 0x9a, 0x6f, 0x7e, 0xc3, 0x9f, 0xc6, 0x63, 0x33,
+ 0x8d, 0xc5, 0xd2, 0xe2, 0x07, 0x61, 0x56, 0x6d,
+ 0x90, 0xcc, 0x68, 0xb1, 0xcb, 0x87, 0x5e, 0xd8 },
+ { 0xb6, 0x73, 0xaa, 0xd7, 0x5a, 0xb1, 0xfd, 0xb5,
+ 0x40, 0x1a, 0xbf, 0xa1, 0xbf, 0x89, 0xf3, 0xad,
+ 0xd2, 0xeb, 0xc4, 0x68, 0xdf, 0x36, 0x24, 0xa4,
+ 0x78, 0xf4, 0xfe, 0x85, 0x9d, 0x8d, 0x55, 0xe2 },
+ { 0x13, 0xc9, 0x47, 0x1a, 0x98, 0x55, 0x91, 0x35,
+ 0x39, 0x83, 0x66, 0x60, 0x39, 0x8d, 0xa0, 0xf3,
+ 0xf9, 0x9a, 0xda, 0x08, 0x47, 0x9c, 0x69, 0xd1,
+ 0xb7, 0xfc, 0xaa, 0x34, 0x61, 0xdd, 0x7e, 0x59 },
+ { 0x2c, 0x11, 0xf4, 0xa7, 0xf9, 0x9a, 0x1d, 0x23,
+ 0xa5, 0x8b, 0xb6, 0x36, 0x35, 0x0f, 0xe8, 0x49,
+ 0xf2, 0x9c, 0xba, 0xc1, 0xb2, 0xa1, 0x11, 0x2d,
+ 0x9f, 0x1e, 0xd5, 0xbc, 0x5b, 0x31, 0x3c, 0xcd },
+ { 0xc7, 0xd3, 0xc0, 0x70, 0x6b, 0x11, 0xae, 0x74,
+ 0x1c, 0x05, 0xa1, 0xef, 0x15, 0x0d, 0xd6, 0x5b,
+ 0x54, 0x94, 0xd6, 0xd5, 0x4c, 0x9a, 0x86, 0xe2,
+ 0x61, 0x78, 0x54, 0xe6, 0xae, 0xee, 0xbb, 0xd9 },
+ { 0x19, 0x4e, 0x10, 0xc9, 0x38, 0x93, 0xaf, 0xa0,
+ 0x64, 0xc3, 0xac, 0x04, 0xc0, 0xdd, 0x80, 0x8d,
+ 0x79, 0x1c, 0x3d, 0x4b, 0x75, 0x56, 0xe8, 0x9d,
+ 0x8d, 0x9c, 0xb2, 0x25, 0xc4, 0xb3, 0x33, 0x39 },
+ { 0x6f, 0xc4, 0x98, 0x8b, 0x8f, 0x78, 0x54, 0x6b,
+ 0x16, 0x88, 0x99, 0x18, 0x45, 0x90, 0x8f, 0x13,
+ 0x4b, 0x6a, 0x48, 0x2e, 0x69, 0x94, 0xb3, 0xd4,
+ 0x83, 0x17, 0xbf, 0x08, 0xdb, 0x29, 0x21, 0x85 },
+ { 0x56, 0x65, 0xbe, 0xb8, 0xb0, 0x95, 0x55, 0x25,
+ 0x81, 0x3b, 0x59, 0x81, 0xcd, 0x14, 0x2e, 0xd4,
+ 0xd0, 0x3f, 0xba, 0x38, 0xa6, 0xf3, 0xe5, 0xad,
+ 0x26, 0x8e, 0x0c, 0xc2, 0x70, 0xd1, 0xcd, 0x11 },
+ { 0xb8, 0x83, 0xd6, 0x8f, 0x5f, 0xe5, 0x19, 0x36,
+ 0x43, 0x1b, 0xa4, 0x25, 0x67, 0x38, 0x05, 0x3b,
+ 0x1d, 0x04, 0x26, 0xd4, 0xcb, 0x64, 0xb1, 0x6e,
+ 0x83, 0xba, 0xdc, 0x5e, 0x9f, 0xbe, 0x3b, 0x81 },
+ { 0x53, 0xe7, 0xb2, 0x7e, 0xa5, 0x9c, 0x2f, 0x6d,
+ 0xbb, 0x50, 0x76, 0x9e, 0x43, 0x55, 0x4d, 0xf3,
+ 0x5a, 0xf8, 0x9f, 0x48, 0x22, 0xd0, 0x46, 0x6b,
+ 0x00, 0x7d, 0xd6, 0xf6, 0xde, 0xaf, 0xff, 0x02 },
+ { 0x1f, 0x1a, 0x02, 0x29, 0xd4, 0x64, 0x0f, 0x01,
+ 0x90, 0x15, 0x88, 0xd9, 0xde, 0xc2, 0x2d, 0x13,
+ 0xfc, 0x3e, 0xb3, 0x4a, 0x61, 0xb3, 0x29, 0x38,
+ 0xef, 0xbf, 0x53, 0x34, 0xb2, 0x80, 0x0a, 0xfa },
+ { 0xc2, 0xb4, 0x05, 0xaf, 0xa0, 0xfa, 0x66, 0x68,
+ 0x85, 0x2a, 0xee, 0x4d, 0x88, 0x04, 0x08, 0x53,
+ 0xfa, 0xb8, 0x00, 0xe7, 0x2b, 0x57, 0x58, 0x14,
+ 0x18, 0xe5, 0x50, 0x6f, 0x21, 0x4c, 0x7d, 0x1f },
+ { 0xc0, 0x8a, 0xa1, 0xc2, 0x86, 0xd7, 0x09, 0xfd,
+ 0xc7, 0x47, 0x37, 0x44, 0x97, 0x71, 0x88, 0xc8,
+ 0x95, 0xba, 0x01, 0x10, 0x14, 0x24, 0x7e, 0x4e,
+ 0xfa, 0x8d, 0x07, 0xe7, 0x8f, 0xec, 0x69, 0x5c },
+ { 0xf0, 0x3f, 0x57, 0x89, 0xd3, 0x33, 0x6b, 0x80,
+ 0xd0, 0x02, 0xd5, 0x9f, 0xdf, 0x91, 0x8b, 0xdb,
+ 0x77, 0x5b, 0x00, 0x95, 0x6e, 0xd5, 0x52, 0x8e,
+ 0x86, 0xaa, 0x99, 0x4a, 0xcb, 0x38, 0xfe, 0x2d }
+};
+
+static const u8 blake2s_keyed_testvecs[][BLAKE2S_OUTBYTES] __initconst = {
+ { 0x48, 0xa8, 0x99, 0x7d, 0xa4, 0x07, 0x87, 0x6b,
+ 0x3d, 0x79, 0xc0, 0xd9, 0x23, 0x25, 0xad, 0x3b,
+ 0x89, 0xcb, 0xb7, 0x54, 0xd8, 0x6a, 0xb7, 0x1a,
+ 0xee, 0x04, 0x7a, 0xd3, 0x45, 0xfd, 0x2c, 0x49 },
+ { 0x40, 0xd1, 0x5f, 0xee, 0x7c, 0x32, 0x88, 0x30,
+ 0x16, 0x6a, 0xc3, 0xf9, 0x18, 0x65, 0x0f, 0x80,
+ 0x7e, 0x7e, 0x01, 0xe1, 0x77, 0x25, 0x8c, 0xdc,
+ 0x0a, 0x39, 0xb1, 0x1f, 0x59, 0x80, 0x66, 0xf1 },
+ { 0x6b, 0xb7, 0x13, 0x00, 0x64, 0x4c, 0xd3, 0x99,
+ 0x1b, 0x26, 0xcc, 0xd4, 0xd2, 0x74, 0xac, 0xd1,
+ 0xad, 0xea, 0xb8, 0xb1, 0xd7, 0x91, 0x45, 0x46,
+ 0xc1, 0x19, 0x8b, 0xbe, 0x9f, 0xc9, 0xd8, 0x03 },
+ { 0x1d, 0x22, 0x0d, 0xbe, 0x2e, 0xe1, 0x34, 0x66,
+ 0x1f, 0xdf, 0x6d, 0x9e, 0x74, 0xb4, 0x17, 0x04,
+ 0x71, 0x05, 0x56, 0xf2, 0xf6, 0xe5, 0xa0, 0x91,
+ 0xb2, 0x27, 0x69, 0x74, 0x45, 0xdb, 0xea, 0x6b },
+ { 0xf6, 0xc3, 0xfb, 0xad, 0xb4, 0xcc, 0x68, 0x7a,
+ 0x00, 0x64, 0xa5, 0xbe, 0x6e, 0x79, 0x1b, 0xec,
+ 0x63, 0xb8, 0x68, 0xad, 0x62, 0xfb, 0xa6, 0x1b,
+ 0x37, 0x57, 0xef, 0x9c, 0xa5, 0x2e, 0x05, 0xb2 },
+ { 0x49, 0xc1, 0xf2, 0x11, 0x88, 0xdf, 0xd7, 0x69,
+ 0xae, 0xa0, 0xe9, 0x11, 0xdd, 0x6b, 0x41, 0xf1,
+ 0x4d, 0xab, 0x10, 0x9d, 0x2b, 0x85, 0x97, 0x7a,
+ 0xa3, 0x08, 0x8b, 0x5c, 0x70, 0x7e, 0x85, 0x98 },
+ { 0xfd, 0xd8, 0x99, 0x3d, 0xcd, 0x43, 0xf6, 0x96,
+ 0xd4, 0x4f, 0x3c, 0xea, 0x0f, 0xf3, 0x53, 0x45,
+ 0x23, 0x4e, 0xc8, 0xee, 0x08, 0x3e, 0xb3, 0xca,
+ 0xda, 0x01, 0x7c, 0x7f, 0x78, 0xc1, 0x71, 0x43 },
+ { 0xe6, 0xc8, 0x12, 0x56, 0x37, 0x43, 0x8d, 0x09,
+ 0x05, 0xb7, 0x49, 0xf4, 0x65, 0x60, 0xac, 0x89,
+ 0xfd, 0x47, 0x1c, 0xf8, 0x69, 0x2e, 0x28, 0xfa,
+ 0xb9, 0x82, 0xf7, 0x3f, 0x01, 0x9b, 0x83, 0xa9 },
+ { 0x19, 0xfc, 0x8c, 0xa6, 0x97, 0x9d, 0x60, 0xe6,
+ 0xed, 0xd3, 0xb4, 0x54, 0x1e, 0x2f, 0x96, 0x7c,
+ 0xed, 0x74, 0x0d, 0xf6, 0xec, 0x1e, 0xae, 0xbb,
+ 0xfe, 0x81, 0x38, 0x32, 0xe9, 0x6b, 0x29, 0x74 },
+ { 0xa6, 0xad, 0x77, 0x7c, 0xe8, 0x81, 0xb5, 0x2b,
+ 0xb5, 0xa4, 0x42, 0x1a, 0xb6, 0xcd, 0xd2, 0xdf,
+ 0xba, 0x13, 0xe9, 0x63, 0x65, 0x2d, 0x4d, 0x6d,
+ 0x12, 0x2a, 0xee, 0x46, 0x54, 0x8c, 0x14, 0xa7 },
+ { 0xf5, 0xc4, 0xb2, 0xba, 0x1a, 0x00, 0x78, 0x1b,
+ 0x13, 0xab, 0xa0, 0x42, 0x52, 0x42, 0xc6, 0x9c,
+ 0xb1, 0x55, 0x2f, 0x3f, 0x71, 0xa9, 0xa3, 0xbb,
+ 0x22, 0xb4, 0xa6, 0xb4, 0x27, 0x7b, 0x46, 0xdd },
+ { 0xe3, 0x3c, 0x4c, 0x9b, 0xd0, 0xcc, 0x7e, 0x45,
+ 0xc8, 0x0e, 0x65, 0xc7, 0x7f, 0xa5, 0x99, 0x7f,
+ 0xec, 0x70, 0x02, 0x73, 0x85, 0x41, 0x50, 0x9e,
+ 0x68, 0xa9, 0x42, 0x38, 0x91, 0xe8, 0x22, 0xa3 },
+ { 0xfb, 0xa1, 0x61, 0x69, 0xb2, 0xc3, 0xee, 0x10,
+ 0x5b, 0xe6, 0xe1, 0xe6, 0x50, 0xe5, 0xcb, 0xf4,
+ 0x07, 0x46, 0xb6, 0x75, 0x3d, 0x03, 0x6a, 0xb5,
+ 0x51, 0x79, 0x01, 0x4a, 0xd7, 0xef, 0x66, 0x51 },
+ { 0xf5, 0xc4, 0xbe, 0xc6, 0xd6, 0x2f, 0xc6, 0x08,
+ 0xbf, 0x41, 0xcc, 0x11, 0x5f, 0x16, 0xd6, 0x1c,
+ 0x7e, 0xfd, 0x3f, 0xf6, 0xc6, 0x56, 0x92, 0xbb,
+ 0xe0, 0xaf, 0xff, 0xb1, 0xfe, 0xde, 0x74, 0x75 },
+ { 0xa4, 0x86, 0x2e, 0x76, 0xdb, 0x84, 0x7f, 0x05,
+ 0xba, 0x17, 0xed, 0xe5, 0xda, 0x4e, 0x7f, 0x91,
+ 0xb5, 0x92, 0x5c, 0xf1, 0xad, 0x4b, 0xa1, 0x27,
+ 0x32, 0xc3, 0x99, 0x57, 0x42, 0xa5, 0xcd, 0x6e },
+ { 0x65, 0xf4, 0xb8, 0x60, 0xcd, 0x15, 0xb3, 0x8e,
+ 0xf8, 0x14, 0xa1, 0xa8, 0x04, 0x31, 0x4a, 0x55,
+ 0xbe, 0x95, 0x3c, 0xaa, 0x65, 0xfd, 0x75, 0x8a,
+ 0xd9, 0x89, 0xff, 0x34, 0xa4, 0x1c, 0x1e, 0xea },
+ { 0x19, 0xba, 0x23, 0x4f, 0x0a, 0x4f, 0x38, 0x63,
+ 0x7d, 0x18, 0x39, 0xf9, 0xd9, 0xf7, 0x6a, 0xd9,
+ 0x1c, 0x85, 0x22, 0x30, 0x71, 0x43, 0xc9, 0x7d,
+ 0x5f, 0x93, 0xf6, 0x92, 0x74, 0xce, 0xc9, 0xa7 },
+ { 0x1a, 0x67, 0x18, 0x6c, 0xa4, 0xa5, 0xcb, 0x8e,
+ 0x65, 0xfc, 0xa0, 0xe2, 0xec, 0xbc, 0x5d, 0xdc,
+ 0x14, 0xae, 0x38, 0x1b, 0xb8, 0xbf, 0xfe, 0xb9,
+ 0xe0, 0xa1, 0x03, 0x44, 0x9e, 0x3e, 0xf0, 0x3c },
+ { 0xaf, 0xbe, 0xa3, 0x17, 0xb5, 0xa2, 0xe8, 0x9c,
+ 0x0b, 0xd9, 0x0c, 0xcf, 0x5d, 0x7f, 0xd0, 0xed,
+ 0x57, 0xfe, 0x58, 0x5e, 0x4b, 0xe3, 0x27, 0x1b,
+ 0x0a, 0x6b, 0xf0, 0xf5, 0x78, 0x6b, 0x0f, 0x26 },
+ { 0xf1, 0xb0, 0x15, 0x58, 0xce, 0x54, 0x12, 0x62,
+ 0xf5, 0xec, 0x34, 0x29, 0x9d, 0x6f, 0xb4, 0x09,
+ 0x00, 0x09, 0xe3, 0x43, 0x4b, 0xe2, 0xf4, 0x91,
+ 0x05, 0xcf, 0x46, 0xaf, 0x4d, 0x2d, 0x41, 0x24 },
+ { 0x13, 0xa0, 0xa0, 0xc8, 0x63, 0x35, 0x63, 0x5e,
+ 0xaa, 0x74, 0xca, 0x2d, 0x5d, 0x48, 0x8c, 0x79,
+ 0x7b, 0xbb, 0x4f, 0x47, 0xdc, 0x07, 0x10, 0x50,
+ 0x15, 0xed, 0x6a, 0x1f, 0x33, 0x09, 0xef, 0xce },
+ { 0x15, 0x80, 0xaf, 0xee, 0xbe, 0xbb, 0x34, 0x6f,
+ 0x94, 0xd5, 0x9f, 0xe6, 0x2d, 0xa0, 0xb7, 0x92,
+ 0x37, 0xea, 0xd7, 0xb1, 0x49, 0x1f, 0x56, 0x67,
+ 0xa9, 0x0e, 0x45, 0xed, 0xf6, 0xca, 0x8b, 0x03 },
+ { 0x20, 0xbe, 0x1a, 0x87, 0x5b, 0x38, 0xc5, 0x73,
+ 0xdd, 0x7f, 0xaa, 0xa0, 0xde, 0x48, 0x9d, 0x65,
+ 0x5c, 0x11, 0xef, 0xb6, 0xa5, 0x52, 0x69, 0x8e,
+ 0x07, 0xa2, 0xd3, 0x31, 0xb5, 0xf6, 0x55, 0xc3 },
+ { 0xbe, 0x1f, 0xe3, 0xc4, 0xc0, 0x40, 0x18, 0xc5,
+ 0x4c, 0x4a, 0x0f, 0x6b, 0x9a, 0x2e, 0xd3, 0xc5,
+ 0x3a, 0xbe, 0x3a, 0x9f, 0x76, 0xb4, 0xd2, 0x6d,
+ 0xe5, 0x6f, 0xc9, 0xae, 0x95, 0x05, 0x9a, 0x99 },
+ { 0xe3, 0xe3, 0xac, 0xe5, 0x37, 0xeb, 0x3e, 0xdd,
+ 0x84, 0x63, 0xd9, 0xad, 0x35, 0x82, 0xe1, 0x3c,
+ 0xf8, 0x65, 0x33, 0xff, 0xde, 0x43, 0xd6, 0x68,
+ 0xdd, 0x2e, 0x93, 0xbb, 0xdb, 0xd7, 0x19, 0x5a },
+ { 0x11, 0x0c, 0x50, 0xc0, 0xbf, 0x2c, 0x6e, 0x7a,
+ 0xeb, 0x7e, 0x43, 0x5d, 0x92, 0xd1, 0x32, 0xab,
+ 0x66, 0x55, 0x16, 0x8e, 0x78, 0xa2, 0xde, 0xcd,
+ 0xec, 0x33, 0x30, 0x77, 0x76, 0x84, 0xd9, 0xc1 },
+ { 0xe9, 0xba, 0x8f, 0x50, 0x5c, 0x9c, 0x80, 0xc0,
+ 0x86, 0x66, 0xa7, 0x01, 0xf3, 0x36, 0x7e, 0x6c,
+ 0xc6, 0x65, 0xf3, 0x4b, 0x22, 0xe7, 0x3c, 0x3c,
+ 0x04, 0x17, 0xeb, 0x1c, 0x22, 0x06, 0x08, 0x2f },
+ { 0x26, 0xcd, 0x66, 0xfc, 0xa0, 0x23, 0x79, 0xc7,
+ 0x6d, 0xf1, 0x23, 0x17, 0x05, 0x2b, 0xca, 0xfd,
+ 0x6c, 0xd8, 0xc3, 0xa7, 0xb8, 0x90, 0xd8, 0x05,
+ 0xf3, 0x6c, 0x49, 0x98, 0x97, 0x82, 0x43, 0x3a },
+ { 0x21, 0x3f, 0x35, 0x96, 0xd6, 0xe3, 0xa5, 0xd0,
+ 0xe9, 0x93, 0x2c, 0xd2, 0x15, 0x91, 0x46, 0x01,
+ 0x5e, 0x2a, 0xbc, 0x94, 0x9f, 0x47, 0x29, 0xee,
+ 0x26, 0x32, 0xfe, 0x1e, 0xdb, 0x78, 0xd3, 0x37 },
+ { 0x10, 0x15, 0xd7, 0x01, 0x08, 0xe0, 0x3b, 0xe1,
+ 0xc7, 0x02, 0xfe, 0x97, 0x25, 0x36, 0x07, 0xd1,
+ 0x4a, 0xee, 0x59, 0x1f, 0x24, 0x13, 0xea, 0x67,
+ 0x87, 0x42, 0x7b, 0x64, 0x59, 0xff, 0x21, 0x9a },
+ { 0x3c, 0xa9, 0x89, 0xde, 0x10, 0xcf, 0xe6, 0x09,
+ 0x90, 0x94, 0x72, 0xc8, 0xd3, 0x56, 0x10, 0x80,
+ 0x5b, 0x2f, 0x97, 0x77, 0x34, 0xcf, 0x65, 0x2c,
+ 0xc6, 0x4b, 0x3b, 0xfc, 0x88, 0x2d, 0x5d, 0x89 },
+ { 0xb6, 0x15, 0x6f, 0x72, 0xd3, 0x80, 0xee, 0x9e,
+ 0xa6, 0xac, 0xd1, 0x90, 0x46, 0x4f, 0x23, 0x07,
+ 0xa5, 0xc1, 0x79, 0xef, 0x01, 0xfd, 0x71, 0xf9,
+ 0x9f, 0x2d, 0x0f, 0x7a, 0x57, 0x36, 0x0a, 0xea },
+ { 0xc0, 0x3b, 0xc6, 0x42, 0xb2, 0x09, 0x59, 0xcb,
+ 0xe1, 0x33, 0xa0, 0x30, 0x3e, 0x0c, 0x1a, 0xbf,
+ 0xf3, 0xe3, 0x1e, 0xc8, 0xe1, 0xa3, 0x28, 0xec,
+ 0x85, 0x65, 0xc3, 0x6d, 0xec, 0xff, 0x52, 0x65 },
+ { 0x2c, 0x3e, 0x08, 0x17, 0x6f, 0x76, 0x0c, 0x62,
+ 0x64, 0xc3, 0xa2, 0xcd, 0x66, 0xfe, 0xc6, 0xc3,
+ 0xd7, 0x8d, 0xe4, 0x3f, 0xc1, 0x92, 0x45, 0x7b,
+ 0x2a, 0x4a, 0x66, 0x0a, 0x1e, 0x0e, 0xb2, 0x2b },
+ { 0xf7, 0x38, 0xc0, 0x2f, 0x3c, 0x1b, 0x19, 0x0c,
+ 0x51, 0x2b, 0x1a, 0x32, 0xde, 0xab, 0xf3, 0x53,
+ 0x72, 0x8e, 0x0e, 0x9a, 0xb0, 0x34, 0x49, 0x0e,
+ 0x3c, 0x34, 0x09, 0x94, 0x6a, 0x97, 0xae, 0xec },
+ { 0x8b, 0x18, 0x80, 0xdf, 0x30, 0x1c, 0xc9, 0x63,
+ 0x41, 0x88, 0x11, 0x08, 0x89, 0x64, 0x83, 0x92,
+ 0x87, 0xff, 0x7f, 0xe3, 0x1c, 0x49, 0xea, 0x6e,
+ 0xbd, 0x9e, 0x48, 0xbd, 0xee, 0xe4, 0x97, 0xc5 },
+ { 0x1e, 0x75, 0xcb, 0x21, 0xc6, 0x09, 0x89, 0x02,
+ 0x03, 0x75, 0xf1, 0xa7, 0xa2, 0x42, 0x83, 0x9f,
+ 0x0b, 0x0b, 0x68, 0x97, 0x3a, 0x4c, 0x2a, 0x05,
+ 0xcf, 0x75, 0x55, 0xed, 0x5a, 0xae, 0xc4, 0xc1 },
+ { 0x62, 0xbf, 0x8a, 0x9c, 0x32, 0xa5, 0xbc, 0xcf,
+ 0x29, 0x0b, 0x6c, 0x47, 0x4d, 0x75, 0xb2, 0xa2,
+ 0xa4, 0x09, 0x3f, 0x1a, 0x9e, 0x27, 0x13, 0x94,
+ 0x33, 0xa8, 0xf2, 0xb3, 0xbc, 0xe7, 0xb8, 0xd7 },
+ { 0x16, 0x6c, 0x83, 0x50, 0xd3, 0x17, 0x3b, 0x5e,
+ 0x70, 0x2b, 0x78, 0x3d, 0xfd, 0x33, 0xc6, 0x6e,
+ 0xe0, 0x43, 0x27, 0x42, 0xe9, 0xb9, 0x2b, 0x99,
+ 0x7f, 0xd2, 0x3c, 0x60, 0xdc, 0x67, 0x56, 0xca },
+ { 0x04, 0x4a, 0x14, 0xd8, 0x22, 0xa9, 0x0c, 0xac,
+ 0xf2, 0xf5, 0xa1, 0x01, 0x42, 0x8a, 0xdc, 0x8f,
+ 0x41, 0x09, 0x38, 0x6c, 0xcb, 0x15, 0x8b, 0xf9,
+ 0x05, 0xc8, 0x61, 0x8b, 0x8e, 0xe2, 0x4e, 0xc3 },
+ { 0x38, 0x7d, 0x39, 0x7e, 0xa4, 0x3a, 0x99, 0x4b,
+ 0xe8, 0x4d, 0x2d, 0x54, 0x4a, 0xfb, 0xe4, 0x81,
+ 0xa2, 0x00, 0x0f, 0x55, 0x25, 0x26, 0x96, 0xbb,
+ 0xa2, 0xc5, 0x0c, 0x8e, 0xbd, 0x10, 0x13, 0x47 },
+ { 0x56, 0xf8, 0xcc, 0xf1, 0xf8, 0x64, 0x09, 0xb4,
+ 0x6c, 0xe3, 0x61, 0x66, 0xae, 0x91, 0x65, 0x13,
+ 0x84, 0x41, 0x57, 0x75, 0x89, 0xdb, 0x08, 0xcb,
+ 0xc5, 0xf6, 0x6c, 0xa2, 0x97, 0x43, 0xb9, 0xfd },
+ { 0x97, 0x06, 0xc0, 0x92, 0xb0, 0x4d, 0x91, 0xf5,
+ 0x3d, 0xff, 0x91, 0xfa, 0x37, 0xb7, 0x49, 0x3d,
+ 0x28, 0xb5, 0x76, 0xb5, 0xd7, 0x10, 0x46, 0x9d,
+ 0xf7, 0x94, 0x01, 0x66, 0x22, 0x36, 0xfc, 0x03 },
+ { 0x87, 0x79, 0x68, 0x68, 0x6c, 0x06, 0x8c, 0xe2,
+ 0xf7, 0xe2, 0xad, 0xcf, 0xf6, 0x8b, 0xf8, 0x74,
+ 0x8e, 0xdf, 0x3c, 0xf8, 0x62, 0xcf, 0xb4, 0xd3,
+ 0x94, 0x7a, 0x31, 0x06, 0x95, 0x80, 0x54, 0xe3 },
+ { 0x88, 0x17, 0xe5, 0x71, 0x98, 0x79, 0xac, 0xf7,
+ 0x02, 0x47, 0x87, 0xec, 0xcd, 0xb2, 0x71, 0x03,
+ 0x55, 0x66, 0xcf, 0xa3, 0x33, 0xe0, 0x49, 0x40,
+ 0x7c, 0x01, 0x78, 0xcc, 0xc5, 0x7a, 0x5b, 0x9f },
+ { 0x89, 0x38, 0x24, 0x9e, 0x4b, 0x50, 0xca, 0xda,
+ 0xcc, 0xdf, 0x5b, 0x18, 0x62, 0x13, 0x26, 0xcb,
+ 0xb1, 0x52, 0x53, 0xe3, 0x3a, 0x20, 0xf5, 0x63,
+ 0x6e, 0x99, 0x5d, 0x72, 0x47, 0x8d, 0xe4, 0x72 },
+ { 0xf1, 0x64, 0xab, 0xba, 0x49, 0x63, 0xa4, 0x4d,
+ 0x10, 0x72, 0x57, 0xe3, 0x23, 0x2d, 0x90, 0xac,
+ 0xa5, 0xe6, 0x6a, 0x14, 0x08, 0x24, 0x8c, 0x51,
+ 0x74, 0x1e, 0x99, 0x1d, 0xb5, 0x22, 0x77, 0x56 },
+ { 0xd0, 0x55, 0x63, 0xe2, 0xb1, 0xcb, 0xa0, 0xc4,
+ 0xa2, 0xa1, 0xe8, 0xbd, 0xe3, 0xa1, 0xa0, 0xd9,
+ 0xf5, 0xb4, 0x0c, 0x85, 0xa0, 0x70, 0xd6, 0xf5,
+ 0xfb, 0x21, 0x06, 0x6e, 0xad, 0x5d, 0x06, 0x01 },
+ { 0x03, 0xfb, 0xb1, 0x63, 0x84, 0xf0, 0xa3, 0x86,
+ 0x6f, 0x4c, 0x31, 0x17, 0x87, 0x76, 0x66, 0xef,
+ 0xbf, 0x12, 0x45, 0x97, 0x56, 0x4b, 0x29, 0x3d,
+ 0x4a, 0xab, 0x0d, 0x26, 0x9f, 0xab, 0xdd, 0xfa },
+ { 0x5f, 0xa8, 0x48, 0x6a, 0xc0, 0xe5, 0x29, 0x64,
+ 0xd1, 0x88, 0x1b, 0xbe, 0x33, 0x8e, 0xb5, 0x4b,
+ 0xe2, 0xf7, 0x19, 0x54, 0x92, 0x24, 0x89, 0x20,
+ 0x57, 0xb4, 0xda, 0x04, 0xba, 0x8b, 0x34, 0x75 },
+ { 0xcd, 0xfa, 0xbc, 0xee, 0x46, 0x91, 0x11, 0x11,
+ 0x23, 0x6a, 0x31, 0x70, 0x8b, 0x25, 0x39, 0xd7,
+ 0x1f, 0xc2, 0x11, 0xd9, 0xb0, 0x9c, 0x0d, 0x85,
+ 0x30, 0xa1, 0x1e, 0x1d, 0xbf, 0x6e, 0xed, 0x01 },
+ { 0x4f, 0x82, 0xde, 0x03, 0xb9, 0x50, 0x47, 0x93,
+ 0xb8, 0x2a, 0x07, 0xa0, 0xbd, 0xcd, 0xff, 0x31,
+ 0x4d, 0x75, 0x9e, 0x7b, 0x62, 0xd2, 0x6b, 0x78,
+ 0x49, 0x46, 0xb0, 0xd3, 0x6f, 0x91, 0x6f, 0x52 },
+ { 0x25, 0x9e, 0xc7, 0xf1, 0x73, 0xbc, 0xc7, 0x6a,
+ 0x09, 0x94, 0xc9, 0x67, 0xb4, 0xf5, 0xf0, 0x24,
+ 0xc5, 0x60, 0x57, 0xfb, 0x79, 0xc9, 0x65, 0xc4,
+ 0xfa, 0xe4, 0x18, 0x75, 0xf0, 0x6a, 0x0e, 0x4c },
+ { 0x19, 0x3c, 0xc8, 0xe7, 0xc3, 0xe0, 0x8b, 0xb3,
+ 0x0f, 0x54, 0x37, 0xaa, 0x27, 0xad, 0xe1, 0xf1,
+ 0x42, 0x36, 0x9b, 0x24, 0x6a, 0x67, 0x5b, 0x23,
+ 0x83, 0xe6, 0xda, 0x9b, 0x49, 0xa9, 0x80, 0x9e },
+ { 0x5c, 0x10, 0x89, 0x6f, 0x0e, 0x28, 0x56, 0xb2,
+ 0xa2, 0xee, 0xe0, 0xfe, 0x4a, 0x2c, 0x16, 0x33,
+ 0x56, 0x5d, 0x18, 0xf0, 0xe9, 0x3e, 0x1f, 0xab,
+ 0x26, 0xc3, 0x73, 0xe8, 0xf8, 0x29, 0x65, 0x4d },
+ { 0xf1, 0x60, 0x12, 0xd9, 0x3f, 0x28, 0x85, 0x1a,
+ 0x1e, 0xb9, 0x89, 0xf5, 0xd0, 0xb4, 0x3f, 0x3f,
+ 0x39, 0xca, 0x73, 0xc9, 0xa6, 0x2d, 0x51, 0x81,
+ 0xbf, 0xf2, 0x37, 0x53, 0x6b, 0xd3, 0x48, 0xc3 },
+ { 0x29, 0x66, 0xb3, 0xcf, 0xae, 0x1e, 0x44, 0xea,
+ 0x99, 0x6d, 0xc5, 0xd6, 0x86, 0xcf, 0x25, 0xfa,
+ 0x05, 0x3f, 0xb6, 0xf6, 0x72, 0x01, 0xb9, 0xe4,
+ 0x6e, 0xad, 0xe8, 0x5d, 0x0a, 0xd6, 0xb8, 0x06 },
+ { 0xdd, 0xb8, 0x78, 0x24, 0x85, 0xe9, 0x00, 0xbc,
+ 0x60, 0xbc, 0xf4, 0xc3, 0x3a, 0x6f, 0xd5, 0x85,
+ 0x68, 0x0c, 0xc6, 0x83, 0xd5, 0x16, 0xef, 0xa0,
+ 0x3e, 0xb9, 0x98, 0x5f, 0xad, 0x87, 0x15, 0xfb },
+ { 0x4c, 0x4d, 0x6e, 0x71, 0xae, 0xa0, 0x57, 0x86,
+ 0x41, 0x31, 0x48, 0xfc, 0x7a, 0x78, 0x6b, 0x0e,
+ 0xca, 0xf5, 0x82, 0xcf, 0xf1, 0x20, 0x9f, 0x5a,
+ 0x80, 0x9f, 0xba, 0x85, 0x04, 0xce, 0x66, 0x2c },
+ { 0xfb, 0x4c, 0x5e, 0x86, 0xd7, 0xb2, 0x22, 0x9b,
+ 0x99, 0xb8, 0xba, 0x6d, 0x94, 0xc2, 0x47, 0xef,
+ 0x96, 0x4a, 0xa3, 0xa2, 0xba, 0xe8, 0xed, 0xc7,
+ 0x75, 0x69, 0xf2, 0x8d, 0xbb, 0xff, 0x2d, 0x4e },
+ { 0xe9, 0x4f, 0x52, 0x6d, 0xe9, 0x01, 0x96, 0x33,
+ 0xec, 0xd5, 0x4a, 0xc6, 0x12, 0x0f, 0x23, 0x95,
+ 0x8d, 0x77, 0x18, 0xf1, 0xe7, 0x71, 0x7b, 0xf3,
+ 0x29, 0x21, 0x1a, 0x4f, 0xae, 0xed, 0x4e, 0x6d },
+ { 0xcb, 0xd6, 0x66, 0x0a, 0x10, 0xdb, 0x3f, 0x23,
+ 0xf7, 0xa0, 0x3d, 0x4b, 0x9d, 0x40, 0x44, 0xc7,
+ 0x93, 0x2b, 0x28, 0x01, 0xac, 0x89, 0xd6, 0x0b,
+ 0xc9, 0xeb, 0x92, 0xd6, 0x5a, 0x46, 0xc2, 0xa0 },
+ { 0x88, 0x18, 0xbb, 0xd3, 0xdb, 0x4d, 0xc1, 0x23,
+ 0xb2, 0x5c, 0xbb, 0xa5, 0xf5, 0x4c, 0x2b, 0xc4,
+ 0xb3, 0xfc, 0xf9, 0xbf, 0x7d, 0x7a, 0x77, 0x09,
+ 0xf4, 0xae, 0x58, 0x8b, 0x26, 0x7c, 0x4e, 0xce },
+ { 0xc6, 0x53, 0x82, 0x51, 0x3f, 0x07, 0x46, 0x0d,
+ 0xa3, 0x98, 0x33, 0xcb, 0x66, 0x6c, 0x5e, 0xd8,
+ 0x2e, 0x61, 0xb9, 0xe9, 0x98, 0xf4, 0xb0, 0xc4,
+ 0x28, 0x7c, 0xee, 0x56, 0xc3, 0xcc, 0x9b, 0xcd },
+ { 0x89, 0x75, 0xb0, 0x57, 0x7f, 0xd3, 0x55, 0x66,
+ 0xd7, 0x50, 0xb3, 0x62, 0xb0, 0x89, 0x7a, 0x26,
+ 0xc3, 0x99, 0x13, 0x6d, 0xf0, 0x7b, 0xab, 0xab,
+ 0xbd, 0xe6, 0x20, 0x3f, 0xf2, 0x95, 0x4e, 0xd4 },
+ { 0x21, 0xfe, 0x0c, 0xeb, 0x00, 0x52, 0xbe, 0x7f,
+ 0xb0, 0xf0, 0x04, 0x18, 0x7c, 0xac, 0xd7, 0xde,
+ 0x67, 0xfa, 0x6e, 0xb0, 0x93, 0x8d, 0x92, 0x76,
+ 0x77, 0xf2, 0x39, 0x8c, 0x13, 0x23, 0x17, 0xa8 },
+ { 0x2e, 0xf7, 0x3f, 0x3c, 0x26, 0xf1, 0x2d, 0x93,
+ 0x88, 0x9f, 0x3c, 0x78, 0xb6, 0xa6, 0x6c, 0x1d,
+ 0x52, 0xb6, 0x49, 0xdc, 0x9e, 0x85, 0x6e, 0x2c,
+ 0x17, 0x2e, 0xa7, 0xc5, 0x8a, 0xc2, 0xb5, 0xe3 },
+ { 0x38, 0x8a, 0x3c, 0xd5, 0x6d, 0x73, 0x86, 0x7a,
+ 0xbb, 0x5f, 0x84, 0x01, 0x49, 0x2b, 0x6e, 0x26,
+ 0x81, 0xeb, 0x69, 0x85, 0x1e, 0x76, 0x7f, 0xd8,
+ 0x42, 0x10, 0xa5, 0x60, 0x76, 0xfb, 0x3d, 0xd3 },
+ { 0xaf, 0x53, 0x3e, 0x02, 0x2f, 0xc9, 0x43, 0x9e,
+ 0x4e, 0x3c, 0xb8, 0x38, 0xec, 0xd1, 0x86, 0x92,
+ 0x23, 0x2a, 0xdf, 0x6f, 0xe9, 0x83, 0x95, 0x26,
+ 0xd3, 0xc3, 0xdd, 0x1b, 0x71, 0x91, 0x0b, 0x1a },
+ { 0x75, 0x1c, 0x09, 0xd4, 0x1a, 0x93, 0x43, 0x88,
+ 0x2a, 0x81, 0xcd, 0x13, 0xee, 0x40, 0x81, 0x8d,
+ 0x12, 0xeb, 0x44, 0xc6, 0xc7, 0xf4, 0x0d, 0xf1,
+ 0x6e, 0x4a, 0xea, 0x8f, 0xab, 0x91, 0x97, 0x2a },
+ { 0x5b, 0x73, 0xdd, 0xb6, 0x8d, 0x9d, 0x2b, 0x0a,
+ 0xa2, 0x65, 0xa0, 0x79, 0x88, 0xd6, 0xb8, 0x8a,
+ 0xe9, 0xaa, 0xc5, 0x82, 0xaf, 0x83, 0x03, 0x2f,
+ 0x8a, 0x9b, 0x21, 0xa2, 0xe1, 0xb7, 0xbf, 0x18 },
+ { 0x3d, 0xa2, 0x91, 0x26, 0xc7, 0xc5, 0xd7, 0xf4,
+ 0x3e, 0x64, 0x24, 0x2a, 0x79, 0xfe, 0xaa, 0x4e,
+ 0xf3, 0x45, 0x9c, 0xde, 0xcc, 0xc8, 0x98, 0xed,
+ 0x59, 0xa9, 0x7f, 0x6e, 0xc9, 0x3b, 0x9d, 0xab },
+ { 0x56, 0x6d, 0xc9, 0x20, 0x29, 0x3d, 0xa5, 0xcb,
+ 0x4f, 0xe0, 0xaa, 0x8a, 0xbd, 0xa8, 0xbb, 0xf5,
+ 0x6f, 0x55, 0x23, 0x13, 0xbf, 0xf1, 0x90, 0x46,
+ 0x64, 0x1e, 0x36, 0x15, 0xc1, 0xe3, 0xed, 0x3f },
+ { 0x41, 0x15, 0xbe, 0xa0, 0x2f, 0x73, 0xf9, 0x7f,
+ 0x62, 0x9e, 0x5c, 0x55, 0x90, 0x72, 0x0c, 0x01,
+ 0xe7, 0xe4, 0x49, 0xae, 0x2a, 0x66, 0x97, 0xd4,
+ 0xd2, 0x78, 0x33, 0x21, 0x30, 0x36, 0x92, 0xf9 },
+ { 0x4c, 0xe0, 0x8f, 0x47, 0x62, 0x46, 0x8a, 0x76,
+ 0x70, 0x01, 0x21, 0x64, 0x87, 0x8d, 0x68, 0x34,
+ 0x0c, 0x52, 0xa3, 0x5e, 0x66, 0xc1, 0x88, 0x4d,
+ 0x5c, 0x86, 0x48, 0x89, 0xab, 0xc9, 0x66, 0x77 },
+ { 0x81, 0xea, 0x0b, 0x78, 0x04, 0x12, 0x4e, 0x0c,
+ 0x22, 0xea, 0x5f, 0xc7, 0x11, 0x04, 0xa2, 0xaf,
+ 0xcb, 0x52, 0xa1, 0xfa, 0x81, 0x6f, 0x3e, 0xcb,
+ 0x7d, 0xcb, 0x5d, 0x9d, 0xea, 0x17, 0x86, 0xd0 },
+ { 0xfe, 0x36, 0x27, 0x33, 0xb0, 0x5f, 0x6b, 0xed,
+ 0xaf, 0x93, 0x79, 0xd7, 0xf7, 0x93, 0x6e, 0xde,
+ 0x20, 0x9b, 0x1f, 0x83, 0x23, 0xc3, 0x92, 0x25,
+ 0x49, 0xd9, 0xe7, 0x36, 0x81, 0xb5, 0xdb, 0x7b },
+ { 0xef, 0xf3, 0x7d, 0x30, 0xdf, 0xd2, 0x03, 0x59,
+ 0xbe, 0x4e, 0x73, 0xfd, 0xf4, 0x0d, 0x27, 0x73,
+ 0x4b, 0x3d, 0xf9, 0x0a, 0x97, 0xa5, 0x5e, 0xd7,
+ 0x45, 0x29, 0x72, 0x94, 0xca, 0x85, 0xd0, 0x9f },
+ { 0x17, 0x2f, 0xfc, 0x67, 0x15, 0x3d, 0x12, 0xe0,
+ 0xca, 0x76, 0xa8, 0xb6, 0xcd, 0x5d, 0x47, 0x31,
+ 0x88, 0x5b, 0x39, 0xce, 0x0c, 0xac, 0x93, 0xa8,
+ 0x97, 0x2a, 0x18, 0x00, 0x6c, 0x8b, 0x8b, 0xaf },
+ { 0xc4, 0x79, 0x57, 0xf1, 0xcc, 0x88, 0xe8, 0x3e,
+ 0xf9, 0x44, 0x58, 0x39, 0x70, 0x9a, 0x48, 0x0a,
+ 0x03, 0x6b, 0xed, 0x5f, 0x88, 0xac, 0x0f, 0xcc,
+ 0x8e, 0x1e, 0x70, 0x3f, 0xfa, 0xac, 0x13, 0x2c },
+ { 0x30, 0xf3, 0x54, 0x83, 0x70, 0xcf, 0xdc, 0xed,
+ 0xa5, 0xc3, 0x7b, 0x56, 0x9b, 0x61, 0x75, 0xe7,
+ 0x99, 0xee, 0xf1, 0xa6, 0x2a, 0xaa, 0x94, 0x32,
+ 0x45, 0xae, 0x76, 0x69, 0xc2, 0x27, 0xa7, 0xb5 },
+ { 0xc9, 0x5d, 0xcb, 0x3c, 0xf1, 0xf2, 0x7d, 0x0e,
+ 0xef, 0x2f, 0x25, 0xd2, 0x41, 0x38, 0x70, 0x90,
+ 0x4a, 0x87, 0x7c, 0x4a, 0x56, 0xc2, 0xde, 0x1e,
+ 0x83, 0xe2, 0xbc, 0x2a, 0xe2, 0xe4, 0x68, 0x21 },
+ { 0xd5, 0xd0, 0xb5, 0xd7, 0x05, 0x43, 0x4c, 0xd4,
+ 0x6b, 0x18, 0x57, 0x49, 0xf6, 0x6b, 0xfb, 0x58,
+ 0x36, 0xdc, 0xdf, 0x6e, 0xe5, 0x49, 0xa2, 0xb7,
+ 0xa4, 0xae, 0xe7, 0xf5, 0x80, 0x07, 0xca, 0xaf },
+ { 0xbb, 0xc1, 0x24, 0xa7, 0x12, 0xf1, 0x5d, 0x07,
+ 0xc3, 0x00, 0xe0, 0x5b, 0x66, 0x83, 0x89, 0xa4,
+ 0x39, 0xc9, 0x17, 0x77, 0xf7, 0x21, 0xf8, 0x32,
+ 0x0c, 0x1c, 0x90, 0x78, 0x06, 0x6d, 0x2c, 0x7e },
+ { 0xa4, 0x51, 0xb4, 0x8c, 0x35, 0xa6, 0xc7, 0x85,
+ 0x4c, 0xfa, 0xae, 0x60, 0x26, 0x2e, 0x76, 0x99,
+ 0x08, 0x16, 0x38, 0x2a, 0xc0, 0x66, 0x7e, 0x5a,
+ 0x5c, 0x9e, 0x1b, 0x46, 0xc4, 0x34, 0x2d, 0xdf },
+ { 0xb0, 0xd1, 0x50, 0xfb, 0x55, 0xe7, 0x78, 0xd0,
+ 0x11, 0x47, 0xf0, 0xb5, 0xd8, 0x9d, 0x99, 0xec,
+ 0xb2, 0x0f, 0xf0, 0x7e, 0x5e, 0x67, 0x60, 0xd6,
+ 0xb6, 0x45, 0xeb, 0x5b, 0x65, 0x4c, 0x62, 0x2b },
+ { 0x34, 0xf7, 0x37, 0xc0, 0xab, 0x21, 0x99, 0x51,
+ 0xee, 0xe8, 0x9a, 0x9f, 0x8d, 0xac, 0x29, 0x9c,
+ 0x9d, 0x4c, 0x38, 0xf3, 0x3f, 0xa4, 0x94, 0xc5,
+ 0xc6, 0xee, 0xfc, 0x92, 0xb6, 0xdb, 0x08, 0xbc },
+ { 0x1a, 0x62, 0xcc, 0x3a, 0x00, 0x80, 0x0d, 0xcb,
+ 0xd9, 0x98, 0x91, 0x08, 0x0c, 0x1e, 0x09, 0x84,
+ 0x58, 0x19, 0x3a, 0x8c, 0xc9, 0xf9, 0x70, 0xea,
+ 0x99, 0xfb, 0xef, 0xf0, 0x03, 0x18, 0xc2, 0x89 },
+ { 0xcf, 0xce, 0x55, 0xeb, 0xaf, 0xc8, 0x40, 0xd7,
+ 0xae, 0x48, 0x28, 0x1c, 0x7f, 0xd5, 0x7e, 0xc8,
+ 0xb4, 0x82, 0xd4, 0xb7, 0x04, 0x43, 0x74, 0x95,
+ 0x49, 0x5a, 0xc4, 0x14, 0xcf, 0x4a, 0x37, 0x4b },
+ { 0x67, 0x46, 0xfa, 0xcf, 0x71, 0x14, 0x6d, 0x99,
+ 0x9d, 0xab, 0xd0, 0x5d, 0x09, 0x3a, 0xe5, 0x86,
+ 0x64, 0x8d, 0x1e, 0xe2, 0x8e, 0x72, 0x61, 0x7b,
+ 0x99, 0xd0, 0xf0, 0x08, 0x6e, 0x1e, 0x45, 0xbf },
+ { 0x57, 0x1c, 0xed, 0x28, 0x3b, 0x3f, 0x23, 0xb4,
+ 0xe7, 0x50, 0xbf, 0x12, 0xa2, 0xca, 0xf1, 0x78,
+ 0x18, 0x47, 0xbd, 0x89, 0x0e, 0x43, 0x60, 0x3c,
+ 0xdc, 0x59, 0x76, 0x10, 0x2b, 0x7b, 0xb1, 0x1b },
+ { 0xcf, 0xcb, 0x76, 0x5b, 0x04, 0x8e, 0x35, 0x02,
+ 0x2c, 0x5d, 0x08, 0x9d, 0x26, 0xe8, 0x5a, 0x36,
+ 0xb0, 0x05, 0xa2, 0xb8, 0x04, 0x93, 0xd0, 0x3a,
+ 0x14, 0x4e, 0x09, 0xf4, 0x09, 0xb6, 0xaf, 0xd1 },
+ { 0x40, 0x50, 0xc7, 0xa2, 0x77, 0x05, 0xbb, 0x27,
+ 0xf4, 0x20, 0x89, 0xb2, 0x99, 0xf3, 0xcb, 0xe5,
+ 0x05, 0x4e, 0xad, 0x68, 0x72, 0x7e, 0x8e, 0xf9,
+ 0x31, 0x8c, 0xe6, 0xf2, 0x5c, 0xd6, 0xf3, 0x1d },
+ { 0x18, 0x40, 0x70, 0xbd, 0x5d, 0x26, 0x5f, 0xbd,
+ 0xc1, 0x42, 0xcd, 0x1c, 0x5c, 0xd0, 0xd7, 0xe4,
+ 0x14, 0xe7, 0x03, 0x69, 0xa2, 0x66, 0xd6, 0x27,
+ 0xc8, 0xfb, 0xa8, 0x4f, 0xa5, 0xe8, 0x4c, 0x34 },
+ { 0x9e, 0xdd, 0xa9, 0xa4, 0x44, 0x39, 0x02, 0xa9,
+ 0x58, 0x8c, 0x0d, 0x0c, 0xcc, 0x62, 0xb9, 0x30,
+ 0x21, 0x84, 0x79, 0xa6, 0x84, 0x1e, 0x6f, 0xe7,
+ 0xd4, 0x30, 0x03, 0xf0, 0x4b, 0x1f, 0xd6, 0x43 },
+ { 0xe4, 0x12, 0xfe, 0xef, 0x79, 0x08, 0x32, 0x4a,
+ 0x6d, 0xa1, 0x84, 0x16, 0x29, 0xf3, 0x5d, 0x3d,
+ 0x35, 0x86, 0x42, 0x01, 0x93, 0x10, 0xec, 0x57,
+ 0xc6, 0x14, 0x83, 0x6b, 0x63, 0xd3, 0x07, 0x63 },
+ { 0x1a, 0x2b, 0x8e, 0xdf, 0xf3, 0xf9, 0xac, 0xc1,
+ 0x55, 0x4f, 0xcb, 0xae, 0x3c, 0xf1, 0xd6, 0x29,
+ 0x8c, 0x64, 0x62, 0xe2, 0x2e, 0x5e, 0xb0, 0x25,
+ 0x96, 0x84, 0xf8, 0x35, 0x01, 0x2b, 0xd1, 0x3f },
+ { 0x28, 0x8c, 0x4a, 0xd9, 0xb9, 0x40, 0x97, 0x62,
+ 0xea, 0x07, 0xc2, 0x4a, 0x41, 0xf0, 0x4f, 0x69,
+ 0xa7, 0xd7, 0x4b, 0xee, 0x2d, 0x95, 0x43, 0x53,
+ 0x74, 0xbd, 0xe9, 0x46, 0xd7, 0x24, 0x1c, 0x7b },
+ { 0x80, 0x56, 0x91, 0xbb, 0x28, 0x67, 0x48, 0xcf,
+ 0xb5, 0x91, 0xd3, 0xae, 0xbe, 0x7e, 0x6f, 0x4e,
+ 0x4d, 0xc6, 0xe2, 0x80, 0x8c, 0x65, 0x14, 0x3c,
+ 0xc0, 0x04, 0xe4, 0xeb, 0x6f, 0xd0, 0x9d, 0x43 },
+ { 0xd4, 0xac, 0x8d, 0x3a, 0x0a, 0xfc, 0x6c, 0xfa,
+ 0x7b, 0x46, 0x0a, 0xe3, 0x00, 0x1b, 0xae, 0xb3,
+ 0x6d, 0xad, 0xb3, 0x7d, 0xa0, 0x7d, 0x2e, 0x8a,
+ 0xc9, 0x18, 0x22, 0xdf, 0x34, 0x8a, 0xed, 0x3d },
+ { 0xc3, 0x76, 0x61, 0x70, 0x14, 0xd2, 0x01, 0x58,
+ 0xbc, 0xed, 0x3d, 0x3b, 0xa5, 0x52, 0xb6, 0xec,
+ 0xcf, 0x84, 0xe6, 0x2a, 0xa3, 0xeb, 0x65, 0x0e,
+ 0x90, 0x02, 0x9c, 0x84, 0xd1, 0x3e, 0xea, 0x69 },
+ { 0xc4, 0x1f, 0x09, 0xf4, 0x3c, 0xec, 0xae, 0x72,
+ 0x93, 0xd6, 0x00, 0x7c, 0xa0, 0xa3, 0x57, 0x08,
+ 0x7d, 0x5a, 0xe5, 0x9b, 0xe5, 0x00, 0xc1, 0xcd,
+ 0x5b, 0x28, 0x9e, 0xe8, 0x10, 0xc7, 0xb0, 0x82 },
+ { 0x03, 0xd1, 0xce, 0xd1, 0xfb, 0xa5, 0xc3, 0x91,
+ 0x55, 0xc4, 0x4b, 0x77, 0x65, 0xcb, 0x76, 0x0c,
+ 0x78, 0x70, 0x8d, 0xcf, 0xc8, 0x0b, 0x0b, 0xd8,
+ 0xad, 0xe3, 0xa5, 0x6d, 0xa8, 0x83, 0x0b, 0x29 },
+ { 0x09, 0xbd, 0xe6, 0xf1, 0x52, 0x21, 0x8d, 0xc9,
+ 0x2c, 0x41, 0xd7, 0xf4, 0x53, 0x87, 0xe6, 0x3e,
+ 0x58, 0x69, 0xd8, 0x07, 0xec, 0x70, 0xb8, 0x21,
+ 0x40, 0x5d, 0xbd, 0x88, 0x4b, 0x7f, 0xcf, 0x4b },
+ { 0x71, 0xc9, 0x03, 0x6e, 0x18, 0x17, 0x9b, 0x90,
+ 0xb3, 0x7d, 0x39, 0xe9, 0xf0, 0x5e, 0xb8, 0x9c,
+ 0xc5, 0xfc, 0x34, 0x1f, 0xd7, 0xc4, 0x77, 0xd0,
+ 0xd7, 0x49, 0x32, 0x85, 0xfa, 0xca, 0x08, 0xa4 },
+ { 0x59, 0x16, 0x83, 0x3e, 0xbb, 0x05, 0xcd, 0x91,
+ 0x9c, 0xa7, 0xfe, 0x83, 0xb6, 0x92, 0xd3, 0x20,
+ 0x5b, 0xef, 0x72, 0x39, 0x2b, 0x2c, 0xf6, 0xbb,
+ 0x0a, 0x6d, 0x43, 0xf9, 0x94, 0xf9, 0x5f, 0x11 },
+ { 0xf6, 0x3a, 0xab, 0x3e, 0xc6, 0x41, 0xb3, 0xb0,
+ 0x24, 0x96, 0x4c, 0x2b, 0x43, 0x7c, 0x04, 0xf6,
+ 0x04, 0x3c, 0x4c, 0x7e, 0x02, 0x79, 0x23, 0x99,
+ 0x95, 0x40, 0x19, 0x58, 0xf8, 0x6b, 0xbe, 0x54 },
+ { 0xf1, 0x72, 0xb1, 0x80, 0xbf, 0xb0, 0x97, 0x40,
+ 0x49, 0x31, 0x20, 0xb6, 0x32, 0x6c, 0xbd, 0xc5,
+ 0x61, 0xe4, 0x77, 0xde, 0xf9, 0xbb, 0xcf, 0xd2,
+ 0x8c, 0xc8, 0xc1, 0xc5, 0xe3, 0x37, 0x9a, 0x31 },
+ { 0xcb, 0x9b, 0x89, 0xcc, 0x18, 0x38, 0x1d, 0xd9,
+ 0x14, 0x1a, 0xde, 0x58, 0x86, 0x54, 0xd4, 0xe6,
+ 0xa2, 0x31, 0xd5, 0xbf, 0x49, 0xd4, 0xd5, 0x9a,
+ 0xc2, 0x7d, 0x86, 0x9c, 0xbe, 0x10, 0x0c, 0xf3 },
+ { 0x7b, 0xd8, 0x81, 0x50, 0x46, 0xfd, 0xd8, 0x10,
+ 0xa9, 0x23, 0xe1, 0x98, 0x4a, 0xae, 0xbd, 0xcd,
+ 0xf8, 0x4d, 0x87, 0xc8, 0x99, 0x2d, 0x68, 0xb5,
+ 0xee, 0xb4, 0x60, 0xf9, 0x3e, 0xb3, 0xc8, 0xd7 },
+ { 0x60, 0x7b, 0xe6, 0x68, 0x62, 0xfd, 0x08, 0xee,
+ 0x5b, 0x19, 0xfa, 0xca, 0xc0, 0x9d, 0xfd, 0xbc,
+ 0xd4, 0x0c, 0x31, 0x21, 0x01, 0xd6, 0x6e, 0x6e,
+ 0xbd, 0x2b, 0x84, 0x1f, 0x1b, 0x9a, 0x93, 0x25 },
+ { 0x9f, 0xe0, 0x3b, 0xbe, 0x69, 0xab, 0x18, 0x34,
+ 0xf5, 0x21, 0x9b, 0x0d, 0xa8, 0x8a, 0x08, 0xb3,
+ 0x0a, 0x66, 0xc5, 0x91, 0x3f, 0x01, 0x51, 0x96,
+ 0x3c, 0x36, 0x05, 0x60, 0xdb, 0x03, 0x87, 0xb3 },
+ { 0x90, 0xa8, 0x35, 0x85, 0x71, 0x7b, 0x75, 0xf0,
+ 0xe9, 0xb7, 0x25, 0xe0, 0x55, 0xee, 0xee, 0xb9,
+ 0xe7, 0xa0, 0x28, 0xea, 0x7e, 0x6c, 0xbc, 0x07,
+ 0xb2, 0x09, 0x17, 0xec, 0x03, 0x63, 0xe3, 0x8c },
+ { 0x33, 0x6e, 0xa0, 0x53, 0x0f, 0x4a, 0x74, 0x69,
+ 0x12, 0x6e, 0x02, 0x18, 0x58, 0x7e, 0xbb, 0xde,
+ 0x33, 0x58, 0xa0, 0xb3, 0x1c, 0x29, 0xd2, 0x00,
+ 0xf7, 0xdc, 0x7e, 0xb1, 0x5c, 0x6a, 0xad, 0xd8 },
+ { 0xa7, 0x9e, 0x76, 0xdc, 0x0a, 0xbc, 0xa4, 0x39,
+ 0x6f, 0x07, 0x47, 0xcd, 0x7b, 0x74, 0x8d, 0xf9,
+ 0x13, 0x00, 0x76, 0x26, 0xb1, 0xd6, 0x59, 0xda,
+ 0x0c, 0x1f, 0x78, 0xb9, 0x30, 0x3d, 0x01, 0xa3 },
+ { 0x44, 0xe7, 0x8a, 0x77, 0x37, 0x56, 0xe0, 0x95,
+ 0x15, 0x19, 0x50, 0x4d, 0x70, 0x38, 0xd2, 0x8d,
+ 0x02, 0x13, 0xa3, 0x7e, 0x0c, 0xe3, 0x75, 0x37,
+ 0x17, 0x57, 0xbc, 0x99, 0x63, 0x11, 0xe3, 0xb8 },
+ { 0x77, 0xac, 0x01, 0x2a, 0x3f, 0x75, 0x4d, 0xcf,
+ 0xea, 0xb5, 0xeb, 0x99, 0x6b, 0xe9, 0xcd, 0x2d,
+ 0x1f, 0x96, 0x11, 0x1b, 0x6e, 0x49, 0xf3, 0x99,
+ 0x4d, 0xf1, 0x81, 0xf2, 0x85, 0x69, 0xd8, 0x25 },
+ { 0xce, 0x5a, 0x10, 0xdb, 0x6f, 0xcc, 0xda, 0xf1,
+ 0x40, 0xaa, 0xa4, 0xde, 0xd6, 0x25, 0x0a, 0x9c,
+ 0x06, 0xe9, 0x22, 0x2b, 0xc9, 0xf9, 0xf3, 0x65,
+ 0x8a, 0x4a, 0xff, 0x93, 0x5f, 0x2b, 0x9f, 0x3a },
+ { 0xec, 0xc2, 0x03, 0xa7, 0xfe, 0x2b, 0xe4, 0xab,
+ 0xd5, 0x5b, 0xb5, 0x3e, 0x6e, 0x67, 0x35, 0x72,
+ 0xe0, 0x07, 0x8d, 0xa8, 0xcd, 0x37, 0x5e, 0xf4,
+ 0x30, 0xcc, 0x97, 0xf9, 0xf8, 0x00, 0x83, 0xaf },
+ { 0x14, 0xa5, 0x18, 0x6d, 0xe9, 0xd7, 0xa1, 0x8b,
+ 0x04, 0x12, 0xb8, 0x56, 0x3e, 0x51, 0xcc, 0x54,
+ 0x33, 0x84, 0x0b, 0x4a, 0x12, 0x9a, 0x8f, 0xf9,
+ 0x63, 0xb3, 0x3a, 0x3c, 0x4a, 0xfe, 0x8e, 0xbb },
+ { 0x13, 0xf8, 0xef, 0x95, 0xcb, 0x86, 0xe6, 0xa6,
+ 0x38, 0x93, 0x1c, 0x8e, 0x10, 0x76, 0x73, 0xeb,
+ 0x76, 0xba, 0x10, 0xd7, 0xc2, 0xcd, 0x70, 0xb9,
+ 0xd9, 0x92, 0x0b, 0xbe, 0xed, 0x92, 0x94, 0x09 },
+ { 0x0b, 0x33, 0x8f, 0x4e, 0xe1, 0x2f, 0x2d, 0xfc,
+ 0xb7, 0x87, 0x13, 0x37, 0x79, 0x41, 0xe0, 0xb0,
+ 0x63, 0x21, 0x52, 0x58, 0x1d, 0x13, 0x32, 0x51,
+ 0x6e, 0x4a, 0x2c, 0xab, 0x19, 0x42, 0xcc, 0xa4 },
+ { 0xea, 0xab, 0x0e, 0xc3, 0x7b, 0x3b, 0x8a, 0xb7,
+ 0x96, 0xe9, 0xf5, 0x72, 0x38, 0xde, 0x14, 0xa2,
+ 0x64, 0xa0, 0x76, 0xf3, 0x88, 0x7d, 0x86, 0xe2,
+ 0x9b, 0xb5, 0x90, 0x6d, 0xb5, 0xa0, 0x0e, 0x02 },
+ { 0x23, 0xcb, 0x68, 0xb8, 0xc0, 0xe6, 0xdc, 0x26,
+ 0xdc, 0x27, 0x76, 0x6d, 0xdc, 0x0a, 0x13, 0xa9,
+ 0x94, 0x38, 0xfd, 0x55, 0x61, 0x7a, 0xa4, 0x09,
+ 0x5d, 0x8f, 0x96, 0x97, 0x20, 0xc8, 0x72, 0xdf },
+ { 0x09, 0x1d, 0x8e, 0xe3, 0x0d, 0x6f, 0x29, 0x68,
+ 0xd4, 0x6b, 0x68, 0x7d, 0xd6, 0x52, 0x92, 0x66,
+ 0x57, 0x42, 0xde, 0x0b, 0xb8, 0x3d, 0xcc, 0x00,
+ 0x04, 0xc7, 0x2c, 0xe1, 0x00, 0x07, 0xa5, 0x49 },
+ { 0x7f, 0x50, 0x7a, 0xbc, 0x6d, 0x19, 0xba, 0x00,
+ 0xc0, 0x65, 0xa8, 0x76, 0xec, 0x56, 0x57, 0x86,
+ 0x88, 0x82, 0xd1, 0x8a, 0x22, 0x1b, 0xc4, 0x6c,
+ 0x7a, 0x69, 0x12, 0x54, 0x1f, 0x5b, 0xc7, 0xba },
+ { 0xa0, 0x60, 0x7c, 0x24, 0xe1, 0x4e, 0x8c, 0x22,
+ 0x3d, 0xb0, 0xd7, 0x0b, 0x4d, 0x30, 0xee, 0x88,
+ 0x01, 0x4d, 0x60, 0x3f, 0x43, 0x7e, 0x9e, 0x02,
+ 0xaa, 0x7d, 0xaf, 0xa3, 0xcd, 0xfb, 0xad, 0x94 },
+ { 0xdd, 0xbf, 0xea, 0x75, 0xcc, 0x46, 0x78, 0x82,
+ 0xeb, 0x34, 0x83, 0xce, 0x5e, 0x2e, 0x75, 0x6a,
+ 0x4f, 0x47, 0x01, 0xb7, 0x6b, 0x44, 0x55, 0x19,
+ 0xe8, 0x9f, 0x22, 0xd6, 0x0f, 0xa8, 0x6e, 0x06 },
+ { 0x0c, 0x31, 0x1f, 0x38, 0xc3, 0x5a, 0x4f, 0xb9,
+ 0x0d, 0x65, 0x1c, 0x28, 0x9d, 0x48, 0x68, 0x56,
+ 0xcd, 0x14, 0x13, 0xdf, 0x9b, 0x06, 0x77, 0xf5,
+ 0x3e, 0xce, 0x2c, 0xd9, 0xe4, 0x77, 0xc6, 0x0a },
+ { 0x46, 0xa7, 0x3a, 0x8d, 0xd3, 0xe7, 0x0f, 0x59,
+ 0xd3, 0x94, 0x2c, 0x01, 0xdf, 0x59, 0x9d, 0xef,
+ 0x78, 0x3c, 0x9d, 0xa8, 0x2f, 0xd8, 0x32, 0x22,
+ 0xcd, 0x66, 0x2b, 0x53, 0xdc, 0xe7, 0xdb, 0xdf },
+ { 0xad, 0x03, 0x8f, 0xf9, 0xb1, 0x4d, 0xe8, 0x4a,
+ 0x80, 0x1e, 0x4e, 0x62, 0x1c, 0xe5, 0xdf, 0x02,
+ 0x9d, 0xd9, 0x35, 0x20, 0xd0, 0xc2, 0xfa, 0x38,
+ 0xbf, 0xf1, 0x76, 0xa8, 0xb1, 0xd1, 0x69, 0x8c },
+ { 0xab, 0x70, 0xc5, 0xdf, 0xbd, 0x1e, 0xa8, 0x17,
+ 0xfe, 0xd0, 0xcd, 0x06, 0x72, 0x93, 0xab, 0xf3,
+ 0x19, 0xe5, 0xd7, 0x90, 0x1c, 0x21, 0x41, 0xd5,
+ 0xd9, 0x9b, 0x23, 0xf0, 0x3a, 0x38, 0xe7, 0x48 },
+ { 0x1f, 0xff, 0xda, 0x67, 0x93, 0x2b, 0x73, 0xc8,
+ 0xec, 0xaf, 0x00, 0x9a, 0x34, 0x91, 0xa0, 0x26,
+ 0x95, 0x3b, 0xab, 0xfe, 0x1f, 0x66, 0x3b, 0x06,
+ 0x97, 0xc3, 0xc4, 0xae, 0x8b, 0x2e, 0x7d, 0xcb },
+ { 0xb0, 0xd2, 0xcc, 0x19, 0x47, 0x2d, 0xd5, 0x7f,
+ 0x2b, 0x17, 0xef, 0xc0, 0x3c, 0x8d, 0x58, 0xc2,
+ 0x28, 0x3d, 0xbb, 0x19, 0xda, 0x57, 0x2f, 0x77,
+ 0x55, 0x85, 0x5a, 0xa9, 0x79, 0x43, 0x17, 0xa0 },
+ { 0xa0, 0xd1, 0x9a, 0x6e, 0xe3, 0x39, 0x79, 0xc3,
+ 0x25, 0x51, 0x0e, 0x27, 0x66, 0x22, 0xdf, 0x41,
+ 0xf7, 0x15, 0x83, 0xd0, 0x75, 0x01, 0xb8, 0x70,
+ 0x71, 0x12, 0x9a, 0x0a, 0xd9, 0x47, 0x32, 0xa5 },
+ { 0x72, 0x46, 0x42, 0xa7, 0x03, 0x2d, 0x10, 0x62,
+ 0xb8, 0x9e, 0x52, 0xbe, 0xa3, 0x4b, 0x75, 0xdf,
+ 0x7d, 0x8f, 0xe7, 0x72, 0xd9, 0xfe, 0x3c, 0x93,
+ 0xdd, 0xf3, 0xc4, 0x54, 0x5a, 0xb5, 0xa9, 0x9b },
+ { 0xad, 0xe5, 0xea, 0xa7, 0xe6, 0x1f, 0x67, 0x2d,
+ 0x58, 0x7e, 0xa0, 0x3d, 0xae, 0x7d, 0x7b, 0x55,
+ 0x22, 0x9c, 0x01, 0xd0, 0x6b, 0xc0, 0xa5, 0x70,
+ 0x14, 0x36, 0xcb, 0xd1, 0x83, 0x66, 0xa6, 0x26 },
+ { 0x01, 0x3b, 0x31, 0xeb, 0xd2, 0x28, 0xfc, 0xdd,
+ 0xa5, 0x1f, 0xab, 0xb0, 0x3b, 0xb0, 0x2d, 0x60,
+ 0xac, 0x20, 0xca, 0x21, 0x5a, 0xaf, 0xa8, 0x3b,
+ 0xdd, 0x85, 0x5e, 0x37, 0x55, 0xa3, 0x5f, 0x0b },
+ { 0x33, 0x2e, 0xd4, 0x0b, 0xb1, 0x0d, 0xde, 0x3c,
+ 0x95, 0x4a, 0x75, 0xd7, 0xb8, 0x99, 0x9d, 0x4b,
+ 0x26, 0xa1, 0xc0, 0x63, 0xc1, 0xdc, 0x6e, 0x32,
+ 0xc1, 0xd9, 0x1b, 0xab, 0x7b, 0xbb, 0x7d, 0x16 },
+ { 0xc7, 0xa1, 0x97, 0xb3, 0xa0, 0x5b, 0x56, 0x6b,
+ 0xcc, 0x9f, 0xac, 0xd2, 0x0e, 0x44, 0x1d, 0x6f,
+ 0x6c, 0x28, 0x60, 0xac, 0x96, 0x51, 0xcd, 0x51,
+ 0xd6, 0xb9, 0xd2, 0xcd, 0xee, 0xea, 0x03, 0x90 },
+ { 0xbd, 0x9c, 0xf6, 0x4e, 0xa8, 0x95, 0x3c, 0x03,
+ 0x71, 0x08, 0xe6, 0xf6, 0x54, 0x91, 0x4f, 0x39,
+ 0x58, 0xb6, 0x8e, 0x29, 0xc1, 0x67, 0x00, 0xdc,
+ 0x18, 0x4d, 0x94, 0xa2, 0x17, 0x08, 0xff, 0x60 },
+ { 0x88, 0x35, 0xb0, 0xac, 0x02, 0x11, 0x51, 0xdf,
+ 0x71, 0x64, 0x74, 0xce, 0x27, 0xce, 0x4d, 0x3c,
+ 0x15, 0xf0, 0xb2, 0xda, 0xb4, 0x80, 0x03, 0xcf,
+ 0x3f, 0x3e, 0xfd, 0x09, 0x45, 0x10, 0x6b, 0x9a },
+ { 0x3b, 0xfe, 0xfa, 0x33, 0x01, 0xaa, 0x55, 0xc0,
+ 0x80, 0x19, 0x0c, 0xff, 0xda, 0x8e, 0xae, 0x51,
+ 0xd9, 0xaf, 0x48, 0x8b, 0x4c, 0x1f, 0x24, 0xc3,
+ 0xd9, 0xa7, 0x52, 0x42, 0xfd, 0x8e, 0xa0, 0x1d },
+ { 0x08, 0x28, 0x4d, 0x14, 0x99, 0x3c, 0xd4, 0x7d,
+ 0x53, 0xeb, 0xae, 0xcf, 0x0d, 0xf0, 0x47, 0x8c,
+ 0xc1, 0x82, 0xc8, 0x9c, 0x00, 0xe1, 0x85, 0x9c,
+ 0x84, 0x85, 0x16, 0x86, 0xdd, 0xf2, 0xc1, 0xb7 },
+ { 0x1e, 0xd7, 0xef, 0x9f, 0x04, 0xc2, 0xac, 0x8d,
+ 0xb6, 0xa8, 0x64, 0xdb, 0x13, 0x10, 0x87, 0xf2,
+ 0x70, 0x65, 0x09, 0x8e, 0x69, 0xc3, 0xfe, 0x78,
+ 0x71, 0x8d, 0x9b, 0x94, 0x7f, 0x4a, 0x39, 0xd0 },
+ { 0xc1, 0x61, 0xf2, 0xdc, 0xd5, 0x7e, 0x9c, 0x14,
+ 0x39, 0xb3, 0x1a, 0x9d, 0xd4, 0x3d, 0x8f, 0x3d,
+ 0x7d, 0xd8, 0xf0, 0xeb, 0x7c, 0xfa, 0xc6, 0xfb,
+ 0x25, 0xa0, 0xf2, 0x8e, 0x30, 0x6f, 0x06, 0x61 },
+ { 0xc0, 0x19, 0x69, 0xad, 0x34, 0xc5, 0x2c, 0xaf,
+ 0x3d, 0xc4, 0xd8, 0x0d, 0x19, 0x73, 0x5c, 0x29,
+ 0x73, 0x1a, 0xc6, 0xe7, 0xa9, 0x20, 0x85, 0xab,
+ 0x92, 0x50, 0xc4, 0x8d, 0xea, 0x48, 0xa3, 0xfc },
+ { 0x17, 0x20, 0xb3, 0x65, 0x56, 0x19, 0xd2, 0xa5,
+ 0x2b, 0x35, 0x21, 0xae, 0x0e, 0x49, 0xe3, 0x45,
+ 0xcb, 0x33, 0x89, 0xeb, 0xd6, 0x20, 0x8a, 0xca,
+ 0xf9, 0xf1, 0x3f, 0xda, 0xcc, 0xa8, 0xbe, 0x49 },
+ { 0x75, 0x62, 0x88, 0x36, 0x1c, 0x83, 0xe2, 0x4c,
+ 0x61, 0x7c, 0xf9, 0x5c, 0x90, 0x5b, 0x22, 0xd0,
+ 0x17, 0xcd, 0xc8, 0x6f, 0x0b, 0xf1, 0xd6, 0x58,
+ 0xf4, 0x75, 0x6c, 0x73, 0x79, 0x87, 0x3b, 0x7f },
+ { 0xe7, 0xd0, 0xed, 0xa3, 0x45, 0x26, 0x93, 0xb7,
+ 0x52, 0xab, 0xcd, 0xa1, 0xb5, 0x5e, 0x27, 0x6f,
+ 0x82, 0x69, 0x8f, 0x5f, 0x16, 0x05, 0x40, 0x3e,
+ 0xff, 0x83, 0x0b, 0xea, 0x00, 0x71, 0xa3, 0x94 },
+ { 0x2c, 0x82, 0xec, 0xaa, 0x6b, 0x84, 0x80, 0x3e,
+ 0x04, 0x4a, 0xf6, 0x31, 0x18, 0xaf, 0xe5, 0x44,
+ 0x68, 0x7c, 0xb6, 0xe6, 0xc7, 0xdf, 0x49, 0xed,
+ 0x76, 0x2d, 0xfd, 0x7c, 0x86, 0x93, 0xa1, 0xbc },
+ { 0x61, 0x36, 0xcb, 0xf4, 0xb4, 0x41, 0x05, 0x6f,
+ 0xa1, 0xe2, 0x72, 0x24, 0x98, 0x12, 0x5d, 0x6d,
+ 0xed, 0x45, 0xe1, 0x7b, 0x52, 0x14, 0x39, 0x59,
+ 0xc7, 0xf4, 0xd4, 0xe3, 0x95, 0x21, 0x8a, 0xc2 },
+ { 0x72, 0x1d, 0x32, 0x45, 0xaa, 0xfe, 0xf2, 0x7f,
+ 0x6a, 0x62, 0x4f, 0x47, 0x95, 0x4b, 0x6c, 0x25,
+ 0x50, 0x79, 0x52, 0x6f, 0xfa, 0x25, 0xe9, 0xff,
+ 0x77, 0xe5, 0xdc, 0xff, 0x47, 0x3b, 0x15, 0x97 },
+ { 0x9d, 0xd2, 0xfb, 0xd8, 0xce, 0xf1, 0x6c, 0x35,
+ 0x3c, 0x0a, 0xc2, 0x11, 0x91, 0xd5, 0x09, 0xeb,
+ 0x28, 0xdd, 0x9e, 0x3e, 0x0d, 0x8c, 0xea, 0x5d,
+ 0x26, 0xca, 0x83, 0x93, 0x93, 0x85, 0x1c, 0x3a },
+ { 0xb2, 0x39, 0x4c, 0xea, 0xcd, 0xeb, 0xf2, 0x1b,
+ 0xf9, 0xdf, 0x2c, 0xed, 0x98, 0xe5, 0x8f, 0x1c,
+ 0x3a, 0x4b, 0xbb, 0xff, 0x66, 0x0d, 0xd9, 0x00,
+ 0xf6, 0x22, 0x02, 0xd6, 0x78, 0x5c, 0xc4, 0x6e },
+ { 0x57, 0x08, 0x9f, 0x22, 0x27, 0x49, 0xad, 0x78,
+ 0x71, 0x76, 0x5f, 0x06, 0x2b, 0x11, 0x4f, 0x43,
+ 0xba, 0x20, 0xec, 0x56, 0x42, 0x2a, 0x8b, 0x1e,
+ 0x3f, 0x87, 0x19, 0x2c, 0x0e, 0xa7, 0x18, 0xc6 },
+ { 0xe4, 0x9a, 0x94, 0x59, 0x96, 0x1c, 0xd3, 0x3c,
+ 0xdf, 0x4a, 0xae, 0x1b, 0x10, 0x78, 0xa5, 0xde,
+ 0xa7, 0xc0, 0x40, 0xe0, 0xfe, 0xa3, 0x40, 0xc9,
+ 0x3a, 0x72, 0x48, 0x72, 0xfc, 0x4a, 0xf8, 0x06 },
+ { 0xed, 0xe6, 0x7f, 0x72, 0x0e, 0xff, 0xd2, 0xca,
+ 0x9c, 0x88, 0x99, 0x41, 0x52, 0xd0, 0x20, 0x1d,
+ 0xee, 0x6b, 0x0a, 0x2d, 0x2c, 0x07, 0x7a, 0xca,
+ 0x6d, 0xae, 0x29, 0xf7, 0x3f, 0x8b, 0x63, 0x09 },
+ { 0xe0, 0xf4, 0x34, 0xbf, 0x22, 0xe3, 0x08, 0x80,
+ 0x39, 0xc2, 0x1f, 0x71, 0x9f, 0xfc, 0x67, 0xf0,
+ 0xf2, 0xcb, 0x5e, 0x98, 0xa7, 0xa0, 0x19, 0x4c,
+ 0x76, 0xe9, 0x6b, 0xf4, 0xe8, 0xe1, 0x7e, 0x61 },
+ { 0x27, 0x7c, 0x04, 0xe2, 0x85, 0x34, 0x84, 0xa4,
+ 0xeb, 0xa9, 0x10, 0xad, 0x33, 0x6d, 0x01, 0xb4,
+ 0x77, 0xb6, 0x7c, 0xc2, 0x00, 0xc5, 0x9f, 0x3c,
+ 0x8d, 0x77, 0xee, 0xf8, 0x49, 0x4f, 0x29, 0xcd },
+ { 0x15, 0x6d, 0x57, 0x47, 0xd0, 0xc9, 0x9c, 0x7f,
+ 0x27, 0x09, 0x7d, 0x7b, 0x7e, 0x00, 0x2b, 0x2e,
+ 0x18, 0x5c, 0xb7, 0x2d, 0x8d, 0xd7, 0xeb, 0x42,
+ 0x4a, 0x03, 0x21, 0x52, 0x81, 0x61, 0x21, 0x9f },
+ { 0x20, 0xdd, 0xd1, 0xed, 0x9b, 0x1c, 0xa8, 0x03,
+ 0x94, 0x6d, 0x64, 0xa8, 0x3a, 0xe4, 0x65, 0x9d,
+ 0xa6, 0x7f, 0xba, 0x7a, 0x1a, 0x3e, 0xdd, 0xb1,
+ 0xe1, 0x03, 0xc0, 0xf5, 0xe0, 0x3e, 0x3a, 0x2c },
+ { 0xf0, 0xaf, 0x60, 0x4d, 0x3d, 0xab, 0xbf, 0x9a,
+ 0x0f, 0x2a, 0x7d, 0x3d, 0xda, 0x6b, 0xd3, 0x8b,
+ 0xba, 0x72, 0xc6, 0xd0, 0x9b, 0xe4, 0x94, 0xfc,
+ 0xef, 0x71, 0x3f, 0xf1, 0x01, 0x89, 0xb6, 0xe6 },
+ { 0x98, 0x02, 0xbb, 0x87, 0xde, 0xf4, 0xcc, 0x10,
+ 0xc4, 0xa5, 0xfd, 0x49, 0xaa, 0x58, 0xdf, 0xe2,
+ 0xf3, 0xfd, 0xdb, 0x46, 0xb4, 0x70, 0x88, 0x14,
+ 0xea, 0xd8, 0x1d, 0x23, 0xba, 0x95, 0x13, 0x9b },
+ { 0x4f, 0x8c, 0xe1, 0xe5, 0x1d, 0x2f, 0xe7, 0xf2,
+ 0x40, 0x43, 0xa9, 0x04, 0xd8, 0x98, 0xeb, 0xfc,
+ 0x91, 0x97, 0x54, 0x18, 0x75, 0x34, 0x13, 0xaa,
+ 0x09, 0x9b, 0x79, 0x5e, 0xcb, 0x35, 0xce, 0xdb },
+ { 0xbd, 0xdc, 0x65, 0x14, 0xd7, 0xee, 0x6a, 0xce,
+ 0x0a, 0x4a, 0xc1, 0xd0, 0xe0, 0x68, 0x11, 0x22,
+ 0x88, 0xcb, 0xcf, 0x56, 0x04, 0x54, 0x64, 0x27,
+ 0x05, 0x63, 0x01, 0x77, 0xcb, 0xa6, 0x08, 0xbd },
+ { 0xd6, 0x35, 0x99, 0x4f, 0x62, 0x91, 0x51, 0x7b,
+ 0x02, 0x81, 0xff, 0xdd, 0x49, 0x6a, 0xfa, 0x86,
+ 0x27, 0x12, 0xe5, 0xb3, 0xc4, 0xe5, 0x2e, 0x4c,
+ 0xd5, 0xfd, 0xae, 0x8c, 0x0e, 0x72, 0xfb, 0x08 },
+ { 0x87, 0x8d, 0x9c, 0xa6, 0x00, 0xcf, 0x87, 0xe7,
+ 0x69, 0xcc, 0x30, 0x5c, 0x1b, 0x35, 0x25, 0x51,
+ 0x86, 0x61, 0x5a, 0x73, 0xa0, 0xda, 0x61, 0x3b,
+ 0x5f, 0x1c, 0x98, 0xdb, 0xf8, 0x12, 0x83, 0xea },
+ { 0xa6, 0x4e, 0xbe, 0x5d, 0xc1, 0x85, 0xde, 0x9f,
+ 0xdd, 0xe7, 0x60, 0x7b, 0x69, 0x98, 0x70, 0x2e,
+ 0xb2, 0x34, 0x56, 0x18, 0x49, 0x57, 0x30, 0x7d,
+ 0x2f, 0xa7, 0x2e, 0x87, 0xa4, 0x77, 0x02, 0xd6 },
+ { 0xce, 0x50, 0xea, 0xb7, 0xb5, 0xeb, 0x52, 0xbd,
+ 0xc9, 0xad, 0x8e, 0x5a, 0x48, 0x0a, 0xb7, 0x80,
+ 0xca, 0x93, 0x20, 0xe4, 0x43, 0x60, 0xb1, 0xfe,
+ 0x37, 0xe0, 0x3f, 0x2f, 0x7a, 0xd7, 0xde, 0x01 },
+ { 0xee, 0xdd, 0xb7, 0xc0, 0xdb, 0x6e, 0x30, 0xab,
+ 0xe6, 0x6d, 0x79, 0xe3, 0x27, 0x51, 0x1e, 0x61,
+ 0xfc, 0xeb, 0xbc, 0x29, 0xf1, 0x59, 0xb4, 0x0a,
+ 0x86, 0xb0, 0x46, 0xec, 0xf0, 0x51, 0x38, 0x23 },
+ { 0x78, 0x7f, 0xc9, 0x34, 0x40, 0xc1, 0xec, 0x96,
+ 0xb5, 0xad, 0x01, 0xc1, 0x6c, 0xf7, 0x79, 0x16,
+ 0xa1, 0x40, 0x5f, 0x94, 0x26, 0x35, 0x6e, 0xc9,
+ 0x21, 0xd8, 0xdf, 0xf3, 0xea, 0x63, 0xb7, 0xe0 },
+ { 0x7f, 0x0d, 0x5e, 0xab, 0x47, 0xee, 0xfd, 0xa6,
+ 0x96, 0xc0, 0xbf, 0x0f, 0xbf, 0x86, 0xab, 0x21,
+ 0x6f, 0xce, 0x46, 0x1e, 0x93, 0x03, 0xab, 0xa6,
+ 0xac, 0x37, 0x41, 0x20, 0xe8, 0x90, 0xe8, 0xdf },
+ { 0xb6, 0x80, 0x04, 0xb4, 0x2f, 0x14, 0xad, 0x02,
+ 0x9f, 0x4c, 0x2e, 0x03, 0xb1, 0xd5, 0xeb, 0x76,
+ 0xd5, 0x71, 0x60, 0xe2, 0x64, 0x76, 0xd2, 0x11,
+ 0x31, 0xbe, 0xf2, 0x0a, 0xda, 0x7d, 0x27, 0xf4 },
+ { 0xb0, 0xc4, 0xeb, 0x18, 0xae, 0x25, 0x0b, 0x51,
+ 0xa4, 0x13, 0x82, 0xea, 0xd9, 0x2d, 0x0d, 0xc7,
+ 0x45, 0x5f, 0x93, 0x79, 0xfc, 0x98, 0x84, 0x42,
+ 0x8e, 0x47, 0x70, 0x60, 0x8d, 0xb0, 0xfa, 0xec },
+ { 0xf9, 0x2b, 0x7a, 0x87, 0x0c, 0x05, 0x9f, 0x4d,
+ 0x46, 0x46, 0x4c, 0x82, 0x4e, 0xc9, 0x63, 0x55,
+ 0x14, 0x0b, 0xdc, 0xe6, 0x81, 0x32, 0x2c, 0xc3,
+ 0xa9, 0x92, 0xff, 0x10, 0x3e, 0x3f, 0xea, 0x52 },
+ { 0x53, 0x64, 0x31, 0x26, 0x14, 0x81, 0x33, 0x98,
+ 0xcc, 0x52, 0x5d, 0x4c, 0x4e, 0x14, 0x6e, 0xde,
+ 0xb3, 0x71, 0x26, 0x5f, 0xba, 0x19, 0x13, 0x3a,
+ 0x2c, 0x3d, 0x21, 0x59, 0x29, 0x8a, 0x17, 0x42 },
+ { 0xf6, 0x62, 0x0e, 0x68, 0xd3, 0x7f, 0xb2, 0xaf,
+ 0x50, 0x00, 0xfc, 0x28, 0xe2, 0x3b, 0x83, 0x22,
+ 0x97, 0xec, 0xd8, 0xbc, 0xe9, 0x9e, 0x8b, 0xe4,
+ 0xd0, 0x4e, 0x85, 0x30, 0x9e, 0x3d, 0x33, 0x74 },
+ { 0x53, 0x16, 0xa2, 0x79, 0x69, 0xd7, 0xfe, 0x04,
+ 0xff, 0x27, 0xb2, 0x83, 0x96, 0x1b, 0xff, 0xc3,
+ 0xbf, 0x5d, 0xfb, 0x32, 0xfb, 0x6a, 0x89, 0xd1,
+ 0x01, 0xc6, 0xc3, 0xb1, 0x93, 0x7c, 0x28, 0x71 },
+ { 0x81, 0xd1, 0x66, 0x4f, 0xdf, 0x3c, 0xb3, 0x3c,
+ 0x24, 0xee, 0xba, 0xc0, 0xbd, 0x64, 0x24, 0x4b,
+ 0x77, 0xc4, 0xab, 0xea, 0x90, 0xbb, 0xe8, 0xb5,
+ 0xee, 0x0b, 0x2a, 0xaf, 0xcf, 0x2d, 0x6a, 0x53 },
+ { 0x34, 0x57, 0x82, 0xf2, 0x95, 0xb0, 0x88, 0x03,
+ 0x52, 0xe9, 0x24, 0xa0, 0x46, 0x7b, 0x5f, 0xbc,
+ 0x3e, 0x8f, 0x3b, 0xfb, 0xc3, 0xc7, 0xe4, 0x8b,
+ 0x67, 0x09, 0x1f, 0xb5, 0xe8, 0x0a, 0x94, 0x42 },
+ { 0x79, 0x41, 0x11, 0xea, 0x6c, 0xd6, 0x5e, 0x31,
+ 0x1f, 0x74, 0xee, 0x41, 0xd4, 0x76, 0xcb, 0x63,
+ 0x2c, 0xe1, 0xe4, 0xb0, 0x51, 0xdc, 0x1d, 0x9e,
+ 0x9d, 0x06, 0x1a, 0x19, 0xe1, 0xd0, 0xbb, 0x49 },
+ { 0x2a, 0x85, 0xda, 0xf6, 0x13, 0x88, 0x16, 0xb9,
+ 0x9b, 0xf8, 0xd0, 0x8b, 0xa2, 0x11, 0x4b, 0x7a,
+ 0xb0, 0x79, 0x75, 0xa7, 0x84, 0x20, 0xc1, 0xa3,
+ 0xb0, 0x6a, 0x77, 0x7c, 0x22, 0xdd, 0x8b, 0xcb },
+ { 0x89, 0xb0, 0xd5, 0xf2, 0x89, 0xec, 0x16, 0x40,
+ 0x1a, 0x06, 0x9a, 0x96, 0x0d, 0x0b, 0x09, 0x3e,
+ 0x62, 0x5d, 0xa3, 0xcf, 0x41, 0xee, 0x29, 0xb5,
+ 0x9b, 0x93, 0x0c, 0x58, 0x20, 0x14, 0x54, 0x55 },
+ { 0xd0, 0xfd, 0xcb, 0x54, 0x39, 0x43, 0xfc, 0x27,
+ 0xd2, 0x08, 0x64, 0xf5, 0x21, 0x81, 0x47, 0x1b,
+ 0x94, 0x2c, 0xc7, 0x7c, 0xa6, 0x75, 0xbc, 0xb3,
+ 0x0d, 0xf3, 0x1d, 0x35, 0x8e, 0xf7, 0xb1, 0xeb },
+ { 0xb1, 0x7e, 0xa8, 0xd7, 0x70, 0x63, 0xc7, 0x09,
+ 0xd4, 0xdc, 0x6b, 0x87, 0x94, 0x13, 0xc3, 0x43,
+ 0xe3, 0x79, 0x0e, 0x9e, 0x62, 0xca, 0x85, 0xb7,
+ 0x90, 0x0b, 0x08, 0x6f, 0x6b, 0x75, 0xc6, 0x72 },
+ { 0xe7, 0x1a, 0x3e, 0x2c, 0x27, 0x4d, 0xb8, 0x42,
+ 0xd9, 0x21, 0x14, 0xf2, 0x17, 0xe2, 0xc0, 0xea,
+ 0xc8, 0xb4, 0x50, 0x93, 0xfd, 0xfd, 0x9d, 0xf4,
+ 0xca, 0x71, 0x62, 0x39, 0x48, 0x62, 0xd5, 0x01 },
+ { 0xc0, 0x47, 0x67, 0x59, 0xab, 0x7a, 0xa3, 0x33,
+ 0x23, 0x4f, 0x6b, 0x44, 0xf5, 0xfd, 0x85, 0x83,
+ 0x90, 0xec, 0x23, 0x69, 0x4c, 0x62, 0x2c, 0xb9,
+ 0x86, 0xe7, 0x69, 0xc7, 0x8e, 0xdd, 0x73, 0x3e },
+ { 0x9a, 0xb8, 0xea, 0xbb, 0x14, 0x16, 0x43, 0x4d,
+ 0x85, 0x39, 0x13, 0x41, 0xd5, 0x69, 0x93, 0xc5,
+ 0x54, 0x58, 0x16, 0x7d, 0x44, 0x18, 0xb1, 0x9a,
+ 0x0f, 0x2a, 0xd8, 0xb7, 0x9a, 0x83, 0xa7, 0x5b },
+ { 0x79, 0x92, 0xd0, 0xbb, 0xb1, 0x5e, 0x23, 0x82,
+ 0x6f, 0x44, 0x3e, 0x00, 0x50, 0x5d, 0x68, 0xd3,
+ 0xed, 0x73, 0x72, 0x99, 0x5a, 0x5c, 0x3e, 0x49,
+ 0x86, 0x54, 0x10, 0x2f, 0xbc, 0xd0, 0x96, 0x4e },
+ { 0xc0, 0x21, 0xb3, 0x00, 0x85, 0x15, 0x14, 0x35,
+ 0xdf, 0x33, 0xb0, 0x07, 0xcc, 0xec, 0xc6, 0x9d,
+ 0xf1, 0x26, 0x9f, 0x39, 0xba, 0x25, 0x09, 0x2b,
+ 0xed, 0x59, 0xd9, 0x32, 0xac, 0x0f, 0xdc, 0x28 },
+ { 0x91, 0xa2, 0x5e, 0xc0, 0xec, 0x0d, 0x9a, 0x56,
+ 0x7f, 0x89, 0xc4, 0xbf, 0xe1, 0xa6, 0x5a, 0x0e,
+ 0x43, 0x2d, 0x07, 0x06, 0x4b, 0x41, 0x90, 0xe2,
+ 0x7d, 0xfb, 0x81, 0x90, 0x1f, 0xd3, 0x13, 0x9b },
+ { 0x59, 0x50, 0xd3, 0x9a, 0x23, 0xe1, 0x54, 0x5f,
+ 0x30, 0x12, 0x70, 0xaa, 0x1a, 0x12, 0xf2, 0xe6,
+ 0xc4, 0x53, 0x77, 0x6e, 0x4d, 0x63, 0x55, 0xde,
+ 0x42, 0x5c, 0xc1, 0x53, 0xf9, 0x81, 0x88, 0x67 },
+ { 0xd7, 0x9f, 0x14, 0x72, 0x0c, 0x61, 0x0a, 0xf1,
+ 0x79, 0xa3, 0x76, 0x5d, 0x4b, 0x7c, 0x09, 0x68,
+ 0xf9, 0x77, 0x96, 0x2d, 0xbf, 0x65, 0x5b, 0x52,
+ 0x12, 0x72, 0xb6, 0xf1, 0xe1, 0x94, 0x48, 0x8e },
+ { 0xe9, 0x53, 0x1b, 0xfc, 0x8b, 0x02, 0x99, 0x5a,
+ 0xea, 0xa7, 0x5b, 0xa2, 0x70, 0x31, 0xfa, 0xdb,
+ 0xcb, 0xf4, 0xa0, 0xda, 0xb8, 0x96, 0x1d, 0x92,
+ 0x96, 0xcd, 0x7e, 0x84, 0xd2, 0x5d, 0x60, 0x06 },
+ { 0x34, 0xe9, 0xc2, 0x6a, 0x01, 0xd7, 0xf1, 0x61,
+ 0x81, 0xb4, 0x54, 0xa9, 0xd1, 0x62, 0x3c, 0x23,
+ 0x3c, 0xb9, 0x9d, 0x31, 0xc6, 0x94, 0x65, 0x6e,
+ 0x94, 0x13, 0xac, 0xa3, 0xe9, 0x18, 0x69, 0x2f },
+ { 0xd9, 0xd7, 0x42, 0x2f, 0x43, 0x7b, 0xd4, 0x39,
+ 0xdd, 0xd4, 0xd8, 0x83, 0xda, 0xe2, 0xa0, 0x83,
+ 0x50, 0x17, 0x34, 0x14, 0xbe, 0x78, 0x15, 0x51,
+ 0x33, 0xff, 0xf1, 0x96, 0x4c, 0x3d, 0x79, 0x72 },
+ { 0x4a, 0xee, 0x0c, 0x7a, 0xaf, 0x07, 0x54, 0x14,
+ 0xff, 0x17, 0x93, 0xea, 0xd7, 0xea, 0xca, 0x60,
+ 0x17, 0x75, 0xc6, 0x15, 0xdb, 0xd6, 0x0b, 0x64,
+ 0x0b, 0x0a, 0x9f, 0x0c, 0xe5, 0x05, 0xd4, 0x35 },
+ { 0x6b, 0xfd, 0xd1, 0x54, 0x59, 0xc8, 0x3b, 0x99,
+ 0xf0, 0x96, 0xbf, 0xb4, 0x9e, 0xe8, 0x7b, 0x06,
+ 0x3d, 0x69, 0xc1, 0x97, 0x4c, 0x69, 0x28, 0xac,
+ 0xfc, 0xfb, 0x40, 0x99, 0xf8, 0xc4, 0xef, 0x67 },
+ { 0x9f, 0xd1, 0xc4, 0x08, 0xfd, 0x75, 0xc3, 0x36,
+ 0x19, 0x3a, 0x2a, 0x14, 0xd9, 0x4f, 0x6a, 0xf5,
+ 0xad, 0xf0, 0x50, 0xb8, 0x03, 0x87, 0xb4, 0xb0,
+ 0x10, 0xfb, 0x29, 0xf4, 0xcc, 0x72, 0x70, 0x7c },
+ { 0x13, 0xc8, 0x84, 0x80, 0xa5, 0xd0, 0x0d, 0x6c,
+ 0x8c, 0x7a, 0xd2, 0x11, 0x0d, 0x76, 0xa8, 0x2d,
+ 0x9b, 0x70, 0xf4, 0xfa, 0x66, 0x96, 0xd4, 0xe5,
+ 0xdd, 0x42, 0xa0, 0x66, 0xdc, 0xaf, 0x99, 0x20 },
+ { 0x82, 0x0e, 0x72, 0x5e, 0xe2, 0x5f, 0xe8, 0xfd,
+ 0x3a, 0x8d, 0x5a, 0xbe, 0x4c, 0x46, 0xc3, 0xba,
+ 0x88, 0x9d, 0xe6, 0xfa, 0x91, 0x91, 0xaa, 0x22,
+ 0xba, 0x67, 0xd5, 0x70, 0x54, 0x21, 0x54, 0x2b },
+ { 0x32, 0xd9, 0x3a, 0x0e, 0xb0, 0x2f, 0x42, 0xfb,
+ 0xbc, 0xaf, 0x2b, 0xad, 0x00, 0x85, 0xb2, 0x82,
+ 0xe4, 0x60, 0x46, 0xa4, 0xdf, 0x7a, 0xd1, 0x06,
+ 0x57, 0xc9, 0xd6, 0x47, 0x63, 0x75, 0xb9, 0x3e },
+ { 0xad, 0xc5, 0x18, 0x79, 0x05, 0xb1, 0x66, 0x9c,
+ 0xd8, 0xec, 0x9c, 0x72, 0x1e, 0x19, 0x53, 0x78,
+ 0x6b, 0x9d, 0x89, 0xa9, 0xba, 0xe3, 0x07, 0x80,
+ 0xf1, 0xe1, 0xea, 0xb2, 0x4a, 0x00, 0x52, 0x3c },
+ { 0xe9, 0x07, 0x56, 0xff, 0x7f, 0x9a, 0xd8, 0x10,
+ 0xb2, 0x39, 0xa1, 0x0c, 0xed, 0x2c, 0xf9, 0xb2,
+ 0x28, 0x43, 0x54, 0xc1, 0xf8, 0xc7, 0xe0, 0xac,
+ 0xcc, 0x24, 0x61, 0xdc, 0x79, 0x6d, 0x6e, 0x89 },
+ { 0x12, 0x51, 0xf7, 0x6e, 0x56, 0x97, 0x84, 0x81,
+ 0x87, 0x53, 0x59, 0x80, 0x1d, 0xb5, 0x89, 0xa0,
+ 0xb2, 0x2f, 0x86, 0xd8, 0xd6, 0x34, 0xdc, 0x04,
+ 0x50, 0x6f, 0x32, 0x2e, 0xd7, 0x8f, 0x17, 0xe8 },
+ { 0x3a, 0xfa, 0x89, 0x9f, 0xd9, 0x80, 0xe7, 0x3e,
+ 0xcb, 0x7f, 0x4d, 0x8b, 0x8f, 0x29, 0x1d, 0xc9,
+ 0xaf, 0x79, 0x6b, 0xc6, 0x5d, 0x27, 0xf9, 0x74,
+ 0xc6, 0xf1, 0x93, 0xc9, 0x19, 0x1a, 0x09, 0xfd },
+ { 0xaa, 0x30, 0x5b, 0xe2, 0x6e, 0x5d, 0xed, 0xdc,
+ 0x3c, 0x10, 0x10, 0xcb, 0xc2, 0x13, 0xf9, 0x5f,
+ 0x05, 0x1c, 0x78, 0x5c, 0x5b, 0x43, 0x1e, 0x6a,
+ 0x7c, 0xd0, 0x48, 0xf1, 0x61, 0x78, 0x75, 0x28 },
+ { 0x8e, 0xa1, 0x88, 0x4f, 0xf3, 0x2e, 0x9d, 0x10,
+ 0xf0, 0x39, 0xb4, 0x07, 0xd0, 0xd4, 0x4e, 0x7e,
+ 0x67, 0x0a, 0xbd, 0x88, 0x4a, 0xee, 0xe0, 0xfb,
+ 0x75, 0x7a, 0xe9, 0x4e, 0xaa, 0x97, 0x37, 0x3d },
+ { 0xd4, 0x82, 0xb2, 0x15, 0x5d, 0x4d, 0xec, 0x6b,
+ 0x47, 0x36, 0xa1, 0xf1, 0x61, 0x7b, 0x53, 0xaa,
+ 0xa3, 0x73, 0x10, 0x27, 0x7d, 0x3f, 0xef, 0x0c,
+ 0x37, 0xad, 0x41, 0x76, 0x8f, 0xc2, 0x35, 0xb4 },
+ { 0x4d, 0x41, 0x39, 0x71, 0x38, 0x7e, 0x7a, 0x88,
+ 0x98, 0xa8, 0xdc, 0x2a, 0x27, 0x50, 0x07, 0x78,
+ 0x53, 0x9e, 0xa2, 0x14, 0xa2, 0xdf, 0xe9, 0xb3,
+ 0xd7, 0xe8, 0xeb, 0xdc, 0xe5, 0xcf, 0x3d, 0xb3 },
+ { 0x69, 0x6e, 0x5d, 0x46, 0xe6, 0xc5, 0x7e, 0x87,
+ 0x96, 0xe4, 0x73, 0x5d, 0x08, 0x91, 0x6e, 0x0b,
+ 0x79, 0x29, 0xb3, 0xcf, 0x29, 0x8c, 0x29, 0x6d,
+ 0x22, 0xe9, 0xd3, 0x01, 0x96, 0x53, 0x37, 0x1c },
+ { 0x1f, 0x56, 0x47, 0xc1, 0xd3, 0xb0, 0x88, 0x22,
+ 0x88, 0x85, 0x86, 0x5c, 0x89, 0x40, 0x90, 0x8b,
+ 0xf4, 0x0d, 0x1a, 0x82, 0x72, 0x82, 0x19, 0x73,
+ 0xb1, 0x60, 0x00, 0x8e, 0x7a, 0x3c, 0xe2, 0xeb },
+ { 0xb6, 0xe7, 0x6c, 0x33, 0x0f, 0x02, 0x1a, 0x5b,
+ 0xda, 0x65, 0x87, 0x50, 0x10, 0xb0, 0xed, 0xf0,
+ 0x91, 0x26, 0xc0, 0xf5, 0x10, 0xea, 0x84, 0x90,
+ 0x48, 0x19, 0x20, 0x03, 0xae, 0xf4, 0xc6, 0x1c },
+ { 0x3c, 0xd9, 0x52, 0xa0, 0xbe, 0xad, 0xa4, 0x1a,
+ 0xbb, 0x42, 0x4c, 0xe4, 0x7f, 0x94, 0xb4, 0x2b,
+ 0xe6, 0x4e, 0x1f, 0xfb, 0x0f, 0xd0, 0x78, 0x22,
+ 0x76, 0x80, 0x79, 0x46, 0xd0, 0xd0, 0xbc, 0x55 },
+ { 0x98, 0xd9, 0x26, 0x77, 0x43, 0x9b, 0x41, 0xb7,
+ 0xbb, 0x51, 0x33, 0x12, 0xaf, 0xb9, 0x2b, 0xcc,
+ 0x8e, 0xe9, 0x68, 0xb2, 0xe3, 0xb2, 0x38, 0xce,
+ 0xcb, 0x9b, 0x0f, 0x34, 0xc9, 0xbb, 0x63, 0xd0 },
+ { 0xec, 0xbc, 0xa2, 0xcf, 0x08, 0xae, 0x57, 0xd5,
+ 0x17, 0xad, 0x16, 0x15, 0x8a, 0x32, 0xbf, 0xa7,
+ 0xdc, 0x03, 0x82, 0xea, 0xed, 0xa1, 0x28, 0xe9,
+ 0x18, 0x86, 0x73, 0x4c, 0x24, 0xa0, 0xb2, 0x9d },
+ { 0x94, 0x2c, 0xc7, 0xc0, 0xb5, 0x2e, 0x2b, 0x16,
+ 0xa4, 0xb8, 0x9f, 0xa4, 0xfc, 0x7e, 0x0b, 0xf6,
+ 0x09, 0xe2, 0x9a, 0x08, 0xc1, 0xa8, 0x54, 0x34,
+ 0x52, 0xb7, 0x7c, 0x7b, 0xfd, 0x11, 0xbb, 0x28 },
+ { 0x8a, 0x06, 0x5d, 0x8b, 0x61, 0xa0, 0xdf, 0xfb,
+ 0x17, 0x0d, 0x56, 0x27, 0x73, 0x5a, 0x76, 0xb0,
+ 0xe9, 0x50, 0x60, 0x37, 0x80, 0x8c, 0xba, 0x16,
+ 0xc3, 0x45, 0x00, 0x7c, 0x9f, 0x79, 0xcf, 0x8f },
+ { 0x1b, 0x9f, 0xa1, 0x97, 0x14, 0x65, 0x9c, 0x78,
+ 0xff, 0x41, 0x38, 0x71, 0x84, 0x92, 0x15, 0x36,
+ 0x10, 0x29, 0xac, 0x80, 0x2b, 0x1c, 0xbc, 0xd5,
+ 0x4e, 0x40, 0x8b, 0xd8, 0x72, 0x87, 0xf8, 0x1f },
+ { 0x8d, 0xab, 0x07, 0x1b, 0xcd, 0x6c, 0x72, 0x92,
+ 0xa9, 0xef, 0x72, 0x7b, 0x4a, 0xe0, 0xd8, 0x67,
+ 0x13, 0x30, 0x1d, 0xa8, 0x61, 0x8d, 0x9a, 0x48,
+ 0xad, 0xce, 0x55, 0xf3, 0x03, 0xa8, 0x69, 0xa1 },
+ { 0x82, 0x53, 0xe3, 0xe7, 0xc7, 0xb6, 0x84, 0xb9,
+ 0xcb, 0x2b, 0xeb, 0x01, 0x4c, 0xe3, 0x30, 0xff,
+ 0x3d, 0x99, 0xd1, 0x7a, 0xbb, 0xdb, 0xab, 0xe4,
+ 0xf4, 0xd6, 0x74, 0xde, 0xd5, 0x3f, 0xfc, 0x6b },
+ { 0xf1, 0x95, 0xf3, 0x21, 0xe9, 0xe3, 0xd6, 0xbd,
+ 0x7d, 0x07, 0x45, 0x04, 0xdd, 0x2a, 0xb0, 0xe6,
+ 0x24, 0x1f, 0x92, 0xe7, 0x84, 0xb1, 0xaa, 0x27,
+ 0x1f, 0xf6, 0x48, 0xb1, 0xca, 0xb6, 0xd7, 0xf6 },
+ { 0x27, 0xe4, 0xcc, 0x72, 0x09, 0x0f, 0x24, 0x12,
+ 0x66, 0x47, 0x6a, 0x7c, 0x09, 0x49, 0x5f, 0x2d,
+ 0xb1, 0x53, 0xd5, 0xbc, 0xbd, 0x76, 0x19, 0x03,
+ 0xef, 0x79, 0x27, 0x5e, 0xc5, 0x6b, 0x2e, 0xd8 },
+ { 0x89, 0x9c, 0x24, 0x05, 0x78, 0x8e, 0x25, 0xb9,
+ 0x9a, 0x18, 0x46, 0x35, 0x5e, 0x64, 0x6d, 0x77,
+ 0xcf, 0x40, 0x00, 0x83, 0x41, 0x5f, 0x7d, 0xc5,
+ 0xaf, 0xe6, 0x9d, 0x6e, 0x17, 0xc0, 0x00, 0x23 },
+ { 0xa5, 0x9b, 0x78, 0xc4, 0x90, 0x57, 0x44, 0x07,
+ 0x6b, 0xfe, 0xe8, 0x94, 0xde, 0x70, 0x7d, 0x4f,
+ 0x12, 0x0b, 0x5c, 0x68, 0x93, 0xea, 0x04, 0x00,
+ 0x29, 0x7d, 0x0b, 0xb8, 0x34, 0x72, 0x76, 0x32 },
+ { 0x59, 0xdc, 0x78, 0xb1, 0x05, 0x64, 0x97, 0x07,
+ 0xa2, 0xbb, 0x44, 0x19, 0xc4, 0x8f, 0x00, 0x54,
+ 0x00, 0xd3, 0x97, 0x3d, 0xe3, 0x73, 0x66, 0x10,
+ 0x23, 0x04, 0x35, 0xb1, 0x04, 0x24, 0xb2, 0x4f },
+ { 0xc0, 0x14, 0x9d, 0x1d, 0x7e, 0x7a, 0x63, 0x53,
+ 0xa6, 0xd9, 0x06, 0xef, 0xe7, 0x28, 0xf2, 0xf3,
+ 0x29, 0xfe, 0x14, 0xa4, 0x14, 0x9a, 0x3e, 0xa7,
+ 0x76, 0x09, 0xbc, 0x42, 0xb9, 0x75, 0xdd, 0xfa },
+ { 0xa3, 0x2f, 0x24, 0x14, 0x74, 0xa6, 0xc1, 0x69,
+ 0x32, 0xe9, 0x24, 0x3b, 0xe0, 0xcf, 0x09, 0xbc,
+ 0xdc, 0x7e, 0x0c, 0xa0, 0xe7, 0xa6, 0xa1, 0xb9,
+ 0xb1, 0xa0, 0xf0, 0x1e, 0x41, 0x50, 0x23, 0x77 },
+ { 0xb2, 0x39, 0xb2, 0xe4, 0xf8, 0x18, 0x41, 0x36,
+ 0x1c, 0x13, 0x39, 0xf6, 0x8e, 0x2c, 0x35, 0x9f,
+ 0x92, 0x9a, 0xf9, 0xad, 0x9f, 0x34, 0xe0, 0x1a,
+ 0xab, 0x46, 0x31, 0xad, 0x6d, 0x55, 0x00, 0xb0 },
+ { 0x85, 0xfb, 0x41, 0x9c, 0x70, 0x02, 0xa3, 0xe0,
+ 0xb4, 0xb6, 0xea, 0x09, 0x3b, 0x4c, 0x1a, 0xc6,
+ 0x93, 0x66, 0x45, 0xb6, 0x5d, 0xac, 0x5a, 0xc1,
+ 0x5a, 0x85, 0x28, 0xb7, 0xb9, 0x4c, 0x17, 0x54 },
+ { 0x96, 0x19, 0x72, 0x06, 0x25, 0xf1, 0x90, 0xb9,
+ 0x3a, 0x3f, 0xad, 0x18, 0x6a, 0xb3, 0x14, 0x18,
+ 0x96, 0x33, 0xc0, 0xd3, 0xa0, 0x1e, 0x6f, 0x9b,
+ 0xc8, 0xc4, 0xa8, 0xf8, 0x2f, 0x38, 0x3d, 0xbf },
+ { 0x7d, 0x62, 0x0d, 0x90, 0xfe, 0x69, 0xfa, 0x46,
+ 0x9a, 0x65, 0x38, 0x38, 0x89, 0x70, 0xa1, 0xaa,
+ 0x09, 0xbb, 0x48, 0xa2, 0xd5, 0x9b, 0x34, 0x7b,
+ 0x97, 0xe8, 0xce, 0x71, 0xf4, 0x8c, 0x7f, 0x46 },
+ { 0x29, 0x43, 0x83, 0x56, 0x85, 0x96, 0xfb, 0x37,
+ 0xc7, 0x5b, 0xba, 0xcd, 0x97, 0x9c, 0x5f, 0xf6,
+ 0xf2, 0x0a, 0x55, 0x6b, 0xf8, 0x87, 0x9c, 0xc7,
+ 0x29, 0x24, 0x85, 0x5d, 0xf9, 0xb8, 0x24, 0x0e },
+ { 0x16, 0xb1, 0x8a, 0xb3, 0x14, 0x35, 0x9c, 0x2b,
+ 0x83, 0x3c, 0x1c, 0x69, 0x86, 0xd4, 0x8c, 0x55,
+ 0xa9, 0xfc, 0x97, 0xcd, 0xe9, 0xa3, 0xc1, 0xf1,
+ 0x0a, 0x31, 0x77, 0x14, 0x0f, 0x73, 0xf7, 0x38 },
+ { 0x8c, 0xbb, 0xdd, 0x14, 0xbc, 0x33, 0xf0, 0x4c,
+ 0xf4, 0x58, 0x13, 0xe4, 0xa1, 0x53, 0xa2, 0x73,
+ 0xd3, 0x6a, 0xda, 0xd5, 0xce, 0x71, 0xf4, 0x99,
+ 0xee, 0xb8, 0x7f, 0xb8, 0xac, 0x63, 0xb7, 0x29 },
+ { 0x69, 0xc9, 0xa4, 0x98, 0xdb, 0x17, 0x4e, 0xca,
+ 0xef, 0xcc, 0x5a, 0x3a, 0xc9, 0xfd, 0xed, 0xf0,
+ 0xf8, 0x13, 0xa5, 0xbe, 0xc7, 0x27, 0xf1, 0xe7,
+ 0x75, 0xba, 0xbd, 0xec, 0x77, 0x18, 0x81, 0x6e },
+ { 0xb4, 0x62, 0xc3, 0xbe, 0x40, 0x44, 0x8f, 0x1d,
+ 0x4f, 0x80, 0x62, 0x62, 0x54, 0xe5, 0x35, 0xb0,
+ 0x8b, 0xc9, 0xcd, 0xcf, 0xf5, 0x99, 0xa7, 0x68,
+ 0x57, 0x8d, 0x4b, 0x28, 0x81, 0xa8, 0xe3, 0xf0 },
+ { 0x55, 0x3e, 0x9d, 0x9c, 0x5f, 0x36, 0x0a, 0xc0,
+ 0xb7, 0x4a, 0x7d, 0x44, 0xe5, 0xa3, 0x91, 0xda,
+ 0xd4, 0xce, 0xd0, 0x3e, 0x0c, 0x24, 0x18, 0x3b,
+ 0x7e, 0x8e, 0xca, 0xbd, 0xf1, 0x71, 0x5a, 0x64 },
+ { 0x7a, 0x7c, 0x55, 0xa5, 0x6f, 0xa9, 0xae, 0x51,
+ 0xe6, 0x55, 0xe0, 0x19, 0x75, 0xd8, 0xa6, 0xff,
+ 0x4a, 0xe9, 0xe4, 0xb4, 0x86, 0xfc, 0xbe, 0x4e,
+ 0xac, 0x04, 0x45, 0x88, 0xf2, 0x45, 0xeb, 0xea },
+ { 0x2a, 0xfd, 0xf3, 0xc8, 0x2a, 0xbc, 0x48, 0x67,
+ 0xf5, 0xde, 0x11, 0x12, 0x86, 0xc2, 0xb3, 0xbe,
+ 0x7d, 0x6e, 0x48, 0x65, 0x7b, 0xa9, 0x23, 0xcf,
+ 0xbf, 0x10, 0x1a, 0x6d, 0xfc, 0xf9, 0xdb, 0x9a },
+ { 0x41, 0x03, 0x7d, 0x2e, 0xdc, 0xdc, 0xe0, 0xc4,
+ 0x9b, 0x7f, 0xb4, 0xa6, 0xaa, 0x09, 0x99, 0xca,
+ 0x66, 0x97, 0x6c, 0x74, 0x83, 0xaf, 0xe6, 0x31,
+ 0xd4, 0xed, 0xa2, 0x83, 0x14, 0x4f, 0x6d, 0xfc },
+ { 0xc4, 0x46, 0x6f, 0x84, 0x97, 0xca, 0x2e, 0xeb,
+ 0x45, 0x83, 0xa0, 0xb0, 0x8e, 0x9d, 0x9a, 0xc7,
+ 0x43, 0x95, 0x70, 0x9f, 0xda, 0x10, 0x9d, 0x24,
+ 0xf2, 0xe4, 0x46, 0x21, 0x96, 0x77, 0x9c, 0x5d },
+ { 0x75, 0xf6, 0x09, 0x33, 0x8a, 0xa6, 0x7d, 0x96,
+ 0x9a, 0x2a, 0xe2, 0xa2, 0x36, 0x2b, 0x2d, 0xa9,
+ 0xd7, 0x7c, 0x69, 0x5d, 0xfd, 0x1d, 0xf7, 0x22,
+ 0x4a, 0x69, 0x01, 0xdb, 0x93, 0x2c, 0x33, 0x64 },
+ { 0x68, 0x60, 0x6c, 0xeb, 0x98, 0x9d, 0x54, 0x88,
+ 0xfc, 0x7c, 0xf6, 0x49, 0xf3, 0xd7, 0xc2, 0x72,
+ 0xef, 0x05, 0x5d, 0xa1, 0xa9, 0x3f, 0xae, 0xcd,
+ 0x55, 0xfe, 0x06, 0xf6, 0x96, 0x70, 0x98, 0xca },
+ { 0x44, 0x34, 0x6b, 0xde, 0xb7, 0xe0, 0x52, 0xf6,
+ 0x25, 0x50, 0x48, 0xf0, 0xd9, 0xb4, 0x2c, 0x42,
+ 0x5b, 0xab, 0x9c, 0x3d, 0xd2, 0x41, 0x68, 0x21,
+ 0x2c, 0x3e, 0xcf, 0x1e, 0xbf, 0x34, 0xe6, 0xae },
+ { 0x8e, 0x9c, 0xf6, 0xe1, 0xf3, 0x66, 0x47, 0x1f,
+ 0x2a, 0xc7, 0xd2, 0xee, 0x9b, 0x5e, 0x62, 0x66,
+ 0xfd, 0xa7, 0x1f, 0x8f, 0x2e, 0x41, 0x09, 0xf2,
+ 0x23, 0x7e, 0xd5, 0xf8, 0x81, 0x3f, 0xc7, 0x18 },
+ { 0x84, 0xbb, 0xeb, 0x84, 0x06, 0xd2, 0x50, 0x95,
+ 0x1f, 0x8c, 0x1b, 0x3e, 0x86, 0xa7, 0xc0, 0x10,
+ 0x08, 0x29, 0x21, 0x83, 0x3d, 0xfd, 0x95, 0x55,
+ 0xa2, 0xf9, 0x09, 0xb1, 0x08, 0x6e, 0xb4, 0xb8 },
+ { 0xee, 0x66, 0x6f, 0x3e, 0xef, 0x0f, 0x7e, 0x2a,
+ 0x9c, 0x22, 0x29, 0x58, 0xc9, 0x7e, 0xaf, 0x35,
+ 0xf5, 0x1c, 0xed, 0x39, 0x3d, 0x71, 0x44, 0x85,
+ 0xab, 0x09, 0xa0, 0x69, 0x34, 0x0f, 0xdf, 0x88 },
+ { 0xc1, 0x53, 0xd3, 0x4a, 0x65, 0xc4, 0x7b, 0x4a,
+ 0x62, 0xc5, 0xca, 0xcf, 0x24, 0x01, 0x09, 0x75,
+ 0xd0, 0x35, 0x6b, 0x2f, 0x32, 0xc8, 0xf5, 0xda,
+ 0x53, 0x0d, 0x33, 0x88, 0x16, 0xad, 0x5d, 0xe6 },
+ { 0x9f, 0xc5, 0x45, 0x01, 0x09, 0xe1, 0xb7, 0x79,
+ 0xf6, 0xc7, 0xae, 0x79, 0xd5, 0x6c, 0x27, 0x63,
+ 0x5c, 0x8d, 0xd4, 0x26, 0xc5, 0xa9, 0xd5, 0x4e,
+ 0x25, 0x78, 0xdb, 0x98, 0x9b, 0x8c, 0x3b, 0x4e },
+ { 0xd1, 0x2b, 0xf3, 0x73, 0x2e, 0xf4, 0xaf, 0x5c,
+ 0x22, 0xfa, 0x90, 0x35, 0x6a, 0xf8, 0xfc, 0x50,
+ 0xfc, 0xb4, 0x0f, 0x8f, 0x2e, 0xa5, 0xc8, 0x59,
+ 0x47, 0x37, 0xa3, 0xb3, 0xd5, 0xab, 0xdb, 0xd7 },
+ { 0x11, 0x03, 0x0b, 0x92, 0x89, 0xbb, 0xa5, 0xaf,
+ 0x65, 0x26, 0x06, 0x72, 0xab, 0x6f, 0xee, 0x88,
+ 0xb8, 0x74, 0x20, 0xac, 0xef, 0x4a, 0x17, 0x89,
+ 0xa2, 0x07, 0x3b, 0x7e, 0xc2, 0xf2, 0xa0, 0x9e },
+ { 0x69, 0xcb, 0x19, 0x2b, 0x84, 0x44, 0x00, 0x5c,
+ 0x8c, 0x0c, 0xeb, 0x12, 0xc8, 0x46, 0x86, 0x07,
+ 0x68, 0x18, 0x8c, 0xda, 0x0a, 0xec, 0x27, 0xa9,
+ 0xc8, 0xa5, 0x5c, 0xde, 0xe2, 0x12, 0x36, 0x32 },
+ { 0xdb, 0x44, 0x4c, 0x15, 0x59, 0x7b, 0x5f, 0x1a,
+ 0x03, 0xd1, 0xf9, 0xed, 0xd1, 0x6e, 0x4a, 0x9f,
+ 0x43, 0xa6, 0x67, 0xcc, 0x27, 0x51, 0x75, 0xdf,
+ 0xa2, 0xb7, 0x04, 0xe3, 0xbb, 0x1a, 0x9b, 0x83 },
+ { 0x3f, 0xb7, 0x35, 0x06, 0x1a, 0xbc, 0x51, 0x9d,
+ 0xfe, 0x97, 0x9e, 0x54, 0xc1, 0xee, 0x5b, 0xfa,
+ 0xd0, 0xa9, 0xd8, 0x58, 0xb3, 0x31, 0x5b, 0xad,
+ 0x34, 0xbd, 0xe9, 0x99, 0xef, 0xd7, 0x24, 0xdd }
+};
+
+bool __init blake2s_selftest(void)
+{
+ u8 key[BLAKE2S_KEYBYTES];
+ u8 buf[ARRAY_SIZE(blake2s_testvecs)];
+ u8 hash[BLAKE2S_OUTBYTES];
+ size_t i;
+ bool success = true;
+
+ for (i = 0; i < BLAKE2S_KEYBYTES; ++i)
+ key[i] = (u8)i;
+
+ for (i = 0; i < ARRAY_SIZE(blake2s_testvecs); ++i)
+ buf[i] = (u8)i;
+
+ for (i = 0; i < ARRAY_SIZE(blake2s_keyed_testvecs); ++i) {
+ blake2s(hash, buf, key, BLAKE2S_OUTBYTES, i, BLAKE2S_KEYBYTES);
+ if (memcmp(hash, blake2s_keyed_testvecs[i], BLAKE2S_OUTBYTES)) {
+ pr_info("blake2s keyed self-test %zu: FAIL\n", i + 1);
+ success = false;
+ }
+ }
+
+ for (i = 0; i < ARRAY_SIZE(blake2s_testvecs); ++i) {
+ blake2s(hash, buf, NULL, BLAKE2S_OUTBYTES, i, 0);
+ if (memcmp(hash, blake2s_testvecs[i], BLAKE2S_OUTBYTES)) {
+ pr_info("blake2s unkeyed self-test %zu: FAIL\n", i + i);
+ success = false;
+ }
+ }
+
+ if (success)
+ pr_info("blake2s self-tests: pass\n");
+ return success;
+}
+#endif
--
2.19.0
^ permalink raw reply related
* [PATCH net-next v4 17/20] crypto: port Poly1305 to Zinc
From: Jason A. Donenfeld @ 2018-09-14 16:22 UTC (permalink / raw)
To: linux-kernel, netdev, linux-crypto, davem, gregkh
Cc: Jason A. Donenfeld, Samuel Neves, Andy Lutomirski,
Jean-Philippe Aumasson, Eric Biggers
In-Reply-To: <20180914162240.7925-1-Jason@zx2c4.com>
Now that Poly1305 is in Zinc, we can have the crypto API code simply
call into it. We have to do a little bit of book keeping here, because
the crypto API receives the key in the first few calls to update.
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Cc: Samuel Neves <sneves@dei.uc.pt>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Jean-Philippe Aumasson <jeanphilippe.aumasson@gmail.com>
Cc: Eric Biggers <ebiggers@google.com>
---
arch/x86/crypto/Makefile | 3 -
arch/x86/crypto/poly1305-avx2-x86_64.S | 388 ----------------
arch/x86/crypto/poly1305-sse2-x86_64.S | 584 -------------------------
arch/x86/crypto/poly1305_glue.c | 205 ---------
crypto/Kconfig | 15 +-
crypto/Makefile | 2 +-
crypto/chacha20poly1305.c | 12 +-
crypto/poly1305_generic.c | 304 -------------
crypto/poly1305_zinc.c | 92 ++++
include/crypto/poly1305.h | 40 --
10 files changed, 101 insertions(+), 1544 deletions(-)
delete mode 100644 arch/x86/crypto/poly1305-avx2-x86_64.S
delete mode 100644 arch/x86/crypto/poly1305-sse2-x86_64.S
delete mode 100644 arch/x86/crypto/poly1305_glue.c
delete mode 100644 crypto/poly1305_generic.c
create mode 100644 crypto/poly1305_zinc.c
delete mode 100644 include/crypto/poly1305.h
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index a450ad573dcb..cf830219846b 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -34,7 +34,6 @@ obj-$(CONFIG_CRYPTO_CRC32_PCLMUL) += crc32-pclmul.o
obj-$(CONFIG_CRYPTO_SHA256_SSSE3) += sha256-ssse3.o
obj-$(CONFIG_CRYPTO_SHA512_SSSE3) += sha512-ssse3.o
obj-$(CONFIG_CRYPTO_CRCT10DIF_PCLMUL) += crct10dif-pclmul.o
-obj-$(CONFIG_CRYPTO_POLY1305_X86_64) += poly1305-x86_64.o
obj-$(CONFIG_CRYPTO_AEGIS128_AESNI_SSE2) += aegis128-aesni.o
obj-$(CONFIG_CRYPTO_AEGIS128L_AESNI_SSE2) += aegis128l-aesni.o
@@ -110,10 +109,8 @@ aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o
aesni-intel-$(CONFIG_64BIT) += aesni-intel_avx-x86_64.o aes_ctrby8_avx-x86_64.o
ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o
sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o
-poly1305-x86_64-y := poly1305-sse2-x86_64.o poly1305_glue.o
ifeq ($(avx2_supported),yes)
sha1-ssse3-y += sha1_avx2_x86_64_asm.o
-poly1305-x86_64-y += poly1305-avx2-x86_64.o
endif
ifeq ($(sha1_ni_supported),yes)
sha1-ssse3-y += sha1_ni_asm.o
diff --git a/arch/x86/crypto/poly1305-avx2-x86_64.S b/arch/x86/crypto/poly1305-avx2-x86_64.S
deleted file mode 100644
index 3b6e70d085da..000000000000
--- a/arch/x86/crypto/poly1305-avx2-x86_64.S
+++ /dev/null
@@ -1,388 +0,0 @@
-/*
- * Poly1305 authenticator algorithm, RFC7539, x64 AVX2 functions
- *
- * Copyright (C) 2015 Martin Willi
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#include <linux/linkage.h>
-
-.section .rodata.cst32.ANMASK, "aM", @progbits, 32
-.align 32
-ANMASK: .octa 0x0000000003ffffff0000000003ffffff
- .octa 0x0000000003ffffff0000000003ffffff
-
-.section .rodata.cst32.ORMASK, "aM", @progbits, 32
-.align 32
-ORMASK: .octa 0x00000000010000000000000001000000
- .octa 0x00000000010000000000000001000000
-
-.text
-
-#define h0 0x00(%rdi)
-#define h1 0x04(%rdi)
-#define h2 0x08(%rdi)
-#define h3 0x0c(%rdi)
-#define h4 0x10(%rdi)
-#define r0 0x00(%rdx)
-#define r1 0x04(%rdx)
-#define r2 0x08(%rdx)
-#define r3 0x0c(%rdx)
-#define r4 0x10(%rdx)
-#define u0 0x00(%r8)
-#define u1 0x04(%r8)
-#define u2 0x08(%r8)
-#define u3 0x0c(%r8)
-#define u4 0x10(%r8)
-#define w0 0x14(%r8)
-#define w1 0x18(%r8)
-#define w2 0x1c(%r8)
-#define w3 0x20(%r8)
-#define w4 0x24(%r8)
-#define y0 0x28(%r8)
-#define y1 0x2c(%r8)
-#define y2 0x30(%r8)
-#define y3 0x34(%r8)
-#define y4 0x38(%r8)
-#define m %rsi
-#define hc0 %ymm0
-#define hc1 %ymm1
-#define hc2 %ymm2
-#define hc3 %ymm3
-#define hc4 %ymm4
-#define hc0x %xmm0
-#define hc1x %xmm1
-#define hc2x %xmm2
-#define hc3x %xmm3
-#define hc4x %xmm4
-#define t1 %ymm5
-#define t2 %ymm6
-#define t1x %xmm5
-#define t2x %xmm6
-#define ruwy0 %ymm7
-#define ruwy1 %ymm8
-#define ruwy2 %ymm9
-#define ruwy3 %ymm10
-#define ruwy4 %ymm11
-#define ruwy0x %xmm7
-#define ruwy1x %xmm8
-#define ruwy2x %xmm9
-#define ruwy3x %xmm10
-#define ruwy4x %xmm11
-#define svxz1 %ymm12
-#define svxz2 %ymm13
-#define svxz3 %ymm14
-#define svxz4 %ymm15
-#define d0 %r9
-#define d1 %r10
-#define d2 %r11
-#define d3 %r12
-#define d4 %r13
-
-ENTRY(poly1305_4block_avx2)
- # %rdi: Accumulator h[5]
- # %rsi: 64 byte input block m
- # %rdx: Poly1305 key r[5]
- # %rcx: Quadblock count
- # %r8: Poly1305 derived key r^2 u[5], r^3 w[5], r^4 y[5],
-
- # This four-block variant uses loop unrolled block processing. It
- # requires 4 Poly1305 keys: r, r^2, r^3 and r^4:
- # h = (h + m) * r => h = (h + m1) * r^4 + m2 * r^3 + m3 * r^2 + m4 * r
-
- vzeroupper
- push %rbx
- push %r12
- push %r13
-
- # combine r0,u0,w0,y0
- vmovd y0,ruwy0x
- vmovd w0,t1x
- vpunpcklqdq t1,ruwy0,ruwy0
- vmovd u0,t1x
- vmovd r0,t2x
- vpunpcklqdq t2,t1,t1
- vperm2i128 $0x20,t1,ruwy0,ruwy0
-
- # combine r1,u1,w1,y1 and s1=r1*5,v1=u1*5,x1=w1*5,z1=y1*5
- vmovd y1,ruwy1x
- vmovd w1,t1x
- vpunpcklqdq t1,ruwy1,ruwy1
- vmovd u1,t1x
- vmovd r1,t2x
- vpunpcklqdq t2,t1,t1
- vperm2i128 $0x20,t1,ruwy1,ruwy1
- vpslld $2,ruwy1,svxz1
- vpaddd ruwy1,svxz1,svxz1
-
- # combine r2,u2,w2,y2 and s2=r2*5,v2=u2*5,x2=w2*5,z2=y2*5
- vmovd y2,ruwy2x
- vmovd w2,t1x
- vpunpcklqdq t1,ruwy2,ruwy2
- vmovd u2,t1x
- vmovd r2,t2x
- vpunpcklqdq t2,t1,t1
- vperm2i128 $0x20,t1,ruwy2,ruwy2
- vpslld $2,ruwy2,svxz2
- vpaddd ruwy2,svxz2,svxz2
-
- # combine r3,u3,w3,y3 and s3=r3*5,v3=u3*5,x3=w3*5,z3=y3*5
- vmovd y3,ruwy3x
- vmovd w3,t1x
- vpunpcklqdq t1,ruwy3,ruwy3
- vmovd u3,t1x
- vmovd r3,t2x
- vpunpcklqdq t2,t1,t1
- vperm2i128 $0x20,t1,ruwy3,ruwy3
- vpslld $2,ruwy3,svxz3
- vpaddd ruwy3,svxz3,svxz3
-
- # combine r4,u4,w4,y4 and s4=r4*5,v4=u4*5,x4=w4*5,z4=y4*5
- vmovd y4,ruwy4x
- vmovd w4,t1x
- vpunpcklqdq t1,ruwy4,ruwy4
- vmovd u4,t1x
- vmovd r4,t2x
- vpunpcklqdq t2,t1,t1
- vperm2i128 $0x20,t1,ruwy4,ruwy4
- vpslld $2,ruwy4,svxz4
- vpaddd ruwy4,svxz4,svxz4
-
-.Ldoblock4:
- # hc0 = [m[48-51] & 0x3ffffff, m[32-35] & 0x3ffffff,
- # m[16-19] & 0x3ffffff, m[ 0- 3] & 0x3ffffff + h0]
- vmovd 0x00(m),hc0x
- vmovd 0x10(m),t1x
- vpunpcklqdq t1,hc0,hc0
- vmovd 0x20(m),t1x
- vmovd 0x30(m),t2x
- vpunpcklqdq t2,t1,t1
- vperm2i128 $0x20,t1,hc0,hc0
- vpand ANMASK(%rip),hc0,hc0
- vmovd h0,t1x
- vpaddd t1,hc0,hc0
- # hc1 = [(m[51-54] >> 2) & 0x3ffffff, (m[35-38] >> 2) & 0x3ffffff,
- # (m[19-22] >> 2) & 0x3ffffff, (m[ 3- 6] >> 2) & 0x3ffffff + h1]
- vmovd 0x03(m),hc1x
- vmovd 0x13(m),t1x
- vpunpcklqdq t1,hc1,hc1
- vmovd 0x23(m),t1x
- vmovd 0x33(m),t2x
- vpunpcklqdq t2,t1,t1
- vperm2i128 $0x20,t1,hc1,hc1
- vpsrld $2,hc1,hc1
- vpand ANMASK(%rip),hc1,hc1
- vmovd h1,t1x
- vpaddd t1,hc1,hc1
- # hc2 = [(m[54-57] >> 4) & 0x3ffffff, (m[38-41] >> 4) & 0x3ffffff,
- # (m[22-25] >> 4) & 0x3ffffff, (m[ 6- 9] >> 4) & 0x3ffffff + h2]
- vmovd 0x06(m),hc2x
- vmovd 0x16(m),t1x
- vpunpcklqdq t1,hc2,hc2
- vmovd 0x26(m),t1x
- vmovd 0x36(m),t2x
- vpunpcklqdq t2,t1,t1
- vperm2i128 $0x20,t1,hc2,hc2
- vpsrld $4,hc2,hc2
- vpand ANMASK(%rip),hc2,hc2
- vmovd h2,t1x
- vpaddd t1,hc2,hc2
- # hc3 = [(m[57-60] >> 6) & 0x3ffffff, (m[41-44] >> 6) & 0x3ffffff,
- # (m[25-28] >> 6) & 0x3ffffff, (m[ 9-12] >> 6) & 0x3ffffff + h3]
- vmovd 0x09(m),hc3x
- vmovd 0x19(m),t1x
- vpunpcklqdq t1,hc3,hc3
- vmovd 0x29(m),t1x
- vmovd 0x39(m),t2x
- vpunpcklqdq t2,t1,t1
- vperm2i128 $0x20,t1,hc3,hc3
- vpsrld $6,hc3,hc3
- vpand ANMASK(%rip),hc3,hc3
- vmovd h3,t1x
- vpaddd t1,hc3,hc3
- # hc4 = [(m[60-63] >> 8) | (1<<24), (m[44-47] >> 8) | (1<<24),
- # (m[28-31] >> 8) | (1<<24), (m[12-15] >> 8) | (1<<24) + h4]
- vmovd 0x0c(m),hc4x
- vmovd 0x1c(m),t1x
- vpunpcklqdq t1,hc4,hc4
- vmovd 0x2c(m),t1x
- vmovd 0x3c(m),t2x
- vpunpcklqdq t2,t1,t1
- vperm2i128 $0x20,t1,hc4,hc4
- vpsrld $8,hc4,hc4
- vpor ORMASK(%rip),hc4,hc4
- vmovd h4,t1x
- vpaddd t1,hc4,hc4
-
- # t1 = [ hc0[3] * r0, hc0[2] * u0, hc0[1] * w0, hc0[0] * y0 ]
- vpmuludq hc0,ruwy0,t1
- # t1 += [ hc1[3] * s4, hc1[2] * v4, hc1[1] * x4, hc1[0] * z4 ]
- vpmuludq hc1,svxz4,t2
- vpaddq t2,t1,t1
- # t1 += [ hc2[3] * s3, hc2[2] * v3, hc2[1] * x3, hc2[0] * z3 ]
- vpmuludq hc2,svxz3,t2
- vpaddq t2,t1,t1
- # t1 += [ hc3[3] * s2, hc3[2] * v2, hc3[1] * x2, hc3[0] * z2 ]
- vpmuludq hc3,svxz2,t2
- vpaddq t2,t1,t1
- # t1 += [ hc4[3] * s1, hc4[2] * v1, hc4[1] * x1, hc4[0] * z1 ]
- vpmuludq hc4,svxz1,t2
- vpaddq t2,t1,t1
- # d0 = t1[0] + t1[1] + t[2] + t[3]
- vpermq $0xee,t1,t2
- vpaddq t2,t1,t1
- vpsrldq $8,t1,t2
- vpaddq t2,t1,t1
- vmovq t1x,d0
-
- # t1 = [ hc0[3] * r1, hc0[2] * u1,hc0[1] * w1, hc0[0] * y1 ]
- vpmuludq hc0,ruwy1,t1
- # t1 += [ hc1[3] * r0, hc1[2] * u0, hc1[1] * w0, hc1[0] * y0 ]
- vpmuludq hc1,ruwy0,t2
- vpaddq t2,t1,t1
- # t1 += [ hc2[3] * s4, hc2[2] * v4, hc2[1] * x4, hc2[0] * z4 ]
- vpmuludq hc2,svxz4,t2
- vpaddq t2,t1,t1
- # t1 += [ hc3[3] * s3, hc3[2] * v3, hc3[1] * x3, hc3[0] * z3 ]
- vpmuludq hc3,svxz3,t2
- vpaddq t2,t1,t1
- # t1 += [ hc4[3] * s2, hc4[2] * v2, hc4[1] * x2, hc4[0] * z2 ]
- vpmuludq hc4,svxz2,t2
- vpaddq t2,t1,t1
- # d1 = t1[0] + t1[1] + t1[3] + t1[4]
- vpermq $0xee,t1,t2
- vpaddq t2,t1,t1
- vpsrldq $8,t1,t2
- vpaddq t2,t1,t1
- vmovq t1x,d1
-
- # t1 = [ hc0[3] * r2, hc0[2] * u2, hc0[1] * w2, hc0[0] * y2 ]
- vpmuludq hc0,ruwy2,t1
- # t1 += [ hc1[3] * r1, hc1[2] * u1, hc1[1] * w1, hc1[0] * y1 ]
- vpmuludq hc1,ruwy1,t2
- vpaddq t2,t1,t1
- # t1 += [ hc2[3] * r0, hc2[2] * u0, hc2[1] * w0, hc2[0] * y0 ]
- vpmuludq hc2,ruwy0,t2
- vpaddq t2,t1,t1
- # t1 += [ hc3[3] * s4, hc3[2] * v4, hc3[1] * x4, hc3[0] * z4 ]
- vpmuludq hc3,svxz4,t2
- vpaddq t2,t1,t1
- # t1 += [ hc4[3] * s3, hc4[2] * v3, hc4[1] * x3, hc4[0] * z3 ]
- vpmuludq hc4,svxz3,t2
- vpaddq t2,t1,t1
- # d2 = t1[0] + t1[1] + t1[2] + t1[3]
- vpermq $0xee,t1,t2
- vpaddq t2,t1,t1
- vpsrldq $8,t1,t2
- vpaddq t2,t1,t1
- vmovq t1x,d2
-
- # t1 = [ hc0[3] * r3, hc0[2] * u3, hc0[1] * w3, hc0[0] * y3 ]
- vpmuludq hc0,ruwy3,t1
- # t1 += [ hc1[3] * r2, hc1[2] * u2, hc1[1] * w2, hc1[0] * y2 ]
- vpmuludq hc1,ruwy2,t2
- vpaddq t2,t1,t1
- # t1 += [ hc2[3] * r1, hc2[2] * u1, hc2[1] * w1, hc2[0] * y1 ]
- vpmuludq hc2,ruwy1,t2
- vpaddq t2,t1,t1
- # t1 += [ hc3[3] * r0, hc3[2] * u0, hc3[1] * w0, hc3[0] * y0 ]
- vpmuludq hc3,ruwy0,t2
- vpaddq t2,t1,t1
- # t1 += [ hc4[3] * s4, hc4[2] * v4, hc4[1] * x4, hc4[0] * z4 ]
- vpmuludq hc4,svxz4,t2
- vpaddq t2,t1,t1
- # d3 = t1[0] + t1[1] + t1[2] + t1[3]
- vpermq $0xee,t1,t2
- vpaddq t2,t1,t1
- vpsrldq $8,t1,t2
- vpaddq t2,t1,t1
- vmovq t1x,d3
-
- # t1 = [ hc0[3] * r4, hc0[2] * u4, hc0[1] * w4, hc0[0] * y4 ]
- vpmuludq hc0,ruwy4,t1
- # t1 += [ hc1[3] * r3, hc1[2] * u3, hc1[1] * w3, hc1[0] * y3 ]
- vpmuludq hc1,ruwy3,t2
- vpaddq t2,t1,t1
- # t1 += [ hc2[3] * r2, hc2[2] * u2, hc2[1] * w2, hc2[0] * y2 ]
- vpmuludq hc2,ruwy2,t2
- vpaddq t2,t1,t1
- # t1 += [ hc3[3] * r1, hc3[2] * u1, hc3[1] * w1, hc3[0] * y1 ]
- vpmuludq hc3,ruwy1,t2
- vpaddq t2,t1,t1
- # t1 += [ hc4[3] * r0, hc4[2] * u0, hc4[1] * w0, hc4[0] * y0 ]
- vpmuludq hc4,ruwy0,t2
- vpaddq t2,t1,t1
- # d4 = t1[0] + t1[1] + t1[2] + t1[3]
- vpermq $0xee,t1,t2
- vpaddq t2,t1,t1
- vpsrldq $8,t1,t2
- vpaddq t2,t1,t1
- vmovq t1x,d4
-
- # d1 += d0 >> 26
- mov d0,%rax
- shr $26,%rax
- add %rax,d1
- # h0 = d0 & 0x3ffffff
- mov d0,%rbx
- and $0x3ffffff,%ebx
-
- # d2 += d1 >> 26
- mov d1,%rax
- shr $26,%rax
- add %rax,d2
- # h1 = d1 & 0x3ffffff
- mov d1,%rax
- and $0x3ffffff,%eax
- mov %eax,h1
-
- # d3 += d2 >> 26
- mov d2,%rax
- shr $26,%rax
- add %rax,d3
- # h2 = d2 & 0x3ffffff
- mov d2,%rax
- and $0x3ffffff,%eax
- mov %eax,h2
-
- # d4 += d3 >> 26
- mov d3,%rax
- shr $26,%rax
- add %rax,d4
- # h3 = d3 & 0x3ffffff
- mov d3,%rax
- and $0x3ffffff,%eax
- mov %eax,h3
-
- # h0 += (d4 >> 26) * 5
- mov d4,%rax
- shr $26,%rax
- lea (%eax,%eax,4),%eax
- add %eax,%ebx
- # h4 = d4 & 0x3ffffff
- mov d4,%rax
- and $0x3ffffff,%eax
- mov %eax,h4
-
- # h1 += h0 >> 26
- mov %ebx,%eax
- shr $26,%eax
- add %eax,h1
- # h0 = h0 & 0x3ffffff
- andl $0x3ffffff,%ebx
- mov %ebx,h0
-
- add $0x40,m
- dec %rcx
- jnz .Ldoblock4
-
- vzeroupper
- pop %r13
- pop %r12
- pop %rbx
- ret
-ENDPROC(poly1305_4block_avx2)
diff --git a/arch/x86/crypto/poly1305-sse2-x86_64.S b/arch/x86/crypto/poly1305-sse2-x86_64.S
deleted file mode 100644
index c88c670cb5fc..000000000000
--- a/arch/x86/crypto/poly1305-sse2-x86_64.S
+++ /dev/null
@@ -1,584 +0,0 @@
-/*
- * Poly1305 authenticator algorithm, RFC7539, x64 SSE2 functions
- *
- * Copyright (C) 2015 Martin Willi
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#include <linux/linkage.h>
-
-.section .rodata.cst16.ANMASK, "aM", @progbits, 16
-.align 16
-ANMASK: .octa 0x0000000003ffffff0000000003ffffff
-
-.section .rodata.cst16.ORMASK, "aM", @progbits, 16
-.align 16
-ORMASK: .octa 0x00000000010000000000000001000000
-
-.text
-
-#define h0 0x00(%rdi)
-#define h1 0x04(%rdi)
-#define h2 0x08(%rdi)
-#define h3 0x0c(%rdi)
-#define h4 0x10(%rdi)
-#define r0 0x00(%rdx)
-#define r1 0x04(%rdx)
-#define r2 0x08(%rdx)
-#define r3 0x0c(%rdx)
-#define r4 0x10(%rdx)
-#define s1 0x00(%rsp)
-#define s2 0x04(%rsp)
-#define s3 0x08(%rsp)
-#define s4 0x0c(%rsp)
-#define m %rsi
-#define h01 %xmm0
-#define h23 %xmm1
-#define h44 %xmm2
-#define t1 %xmm3
-#define t2 %xmm4
-#define t3 %xmm5
-#define t4 %xmm6
-#define mask %xmm7
-#define d0 %r8
-#define d1 %r9
-#define d2 %r10
-#define d3 %r11
-#define d4 %r12
-
-ENTRY(poly1305_block_sse2)
- # %rdi: Accumulator h[5]
- # %rsi: 16 byte input block m
- # %rdx: Poly1305 key r[5]
- # %rcx: Block count
-
- # This single block variant tries to improve performance by doing two
- # multiplications in parallel using SSE instructions. There is quite
- # some quardword packing involved, hence the speedup is marginal.
-
- push %rbx
- push %r12
- sub $0x10,%rsp
-
- # s1..s4 = r1..r4 * 5
- mov r1,%eax
- lea (%eax,%eax,4),%eax
- mov %eax,s1
- mov r2,%eax
- lea (%eax,%eax,4),%eax
- mov %eax,s2
- mov r3,%eax
- lea (%eax,%eax,4),%eax
- mov %eax,s3
- mov r4,%eax
- lea (%eax,%eax,4),%eax
- mov %eax,s4
-
- movdqa ANMASK(%rip),mask
-
-.Ldoblock:
- # h01 = [0, h1, 0, h0]
- # h23 = [0, h3, 0, h2]
- # h44 = [0, h4, 0, h4]
- movd h0,h01
- movd h1,t1
- movd h2,h23
- movd h3,t2
- movd h4,h44
- punpcklqdq t1,h01
- punpcklqdq t2,h23
- punpcklqdq h44,h44
-
- # h01 += [ (m[3-6] >> 2) & 0x3ffffff, m[0-3] & 0x3ffffff ]
- movd 0x00(m),t1
- movd 0x03(m),t2
- psrld $2,t2
- punpcklqdq t2,t1
- pand mask,t1
- paddd t1,h01
- # h23 += [ (m[9-12] >> 6) & 0x3ffffff, (m[6-9] >> 4) & 0x3ffffff ]
- movd 0x06(m),t1
- movd 0x09(m),t2
- psrld $4,t1
- psrld $6,t2
- punpcklqdq t2,t1
- pand mask,t1
- paddd t1,h23
- # h44 += [ (m[12-15] >> 8) | (1 << 24), (m[12-15] >> 8) | (1 << 24) ]
- mov 0x0c(m),%eax
- shr $8,%eax
- or $0x01000000,%eax
- movd %eax,t1
- pshufd $0xc4,t1,t1
- paddd t1,h44
-
- # t1[0] = h0 * r0 + h2 * s3
- # t1[1] = h1 * s4 + h3 * s2
- movd r0,t1
- movd s4,t2
- punpcklqdq t2,t1
- pmuludq h01,t1
- movd s3,t2
- movd s2,t3
- punpcklqdq t3,t2
- pmuludq h23,t2
- paddq t2,t1
- # t2[0] = h0 * r1 + h2 * s4
- # t2[1] = h1 * r0 + h3 * s3
- movd r1,t2
- movd r0,t3
- punpcklqdq t3,t2
- pmuludq h01,t2
- movd s4,t3
- movd s3,t4
- punpcklqdq t4,t3
- pmuludq h23,t3
- paddq t3,t2
- # t3[0] = h4 * s1
- # t3[1] = h4 * s2
- movd s1,t3
- movd s2,t4
- punpcklqdq t4,t3
- pmuludq h44,t3
- # d0 = t1[0] + t1[1] + t3[0]
- # d1 = t2[0] + t2[1] + t3[1]
- movdqa t1,t4
- punpcklqdq t2,t4
- punpckhqdq t2,t1
- paddq t4,t1
- paddq t3,t1
- movq t1,d0
- psrldq $8,t1
- movq t1,d1
-
- # t1[0] = h0 * r2 + h2 * r0
- # t1[1] = h1 * r1 + h3 * s4
- movd r2,t1
- movd r1,t2
- punpcklqdq t2,t1
- pmuludq h01,t1
- movd r0,t2
- movd s4,t3
- punpcklqdq t3,t2
- pmuludq h23,t2
- paddq t2,t1
- # t2[0] = h0 * r3 + h2 * r1
- # t2[1] = h1 * r2 + h3 * r0
- movd r3,t2
- movd r2,t3
- punpcklqdq t3,t2
- pmuludq h01,t2
- movd r1,t3
- movd r0,t4
- punpcklqdq t4,t3
- pmuludq h23,t3
- paddq t3,t2
- # t3[0] = h4 * s3
- # t3[1] = h4 * s4
- movd s3,t3
- movd s4,t4
- punpcklqdq t4,t3
- pmuludq h44,t3
- # d2 = t1[0] + t1[1] + t3[0]
- # d3 = t2[0] + t2[1] + t3[1]
- movdqa t1,t4
- punpcklqdq t2,t4
- punpckhqdq t2,t1
- paddq t4,t1
- paddq t3,t1
- movq t1,d2
- psrldq $8,t1
- movq t1,d3
-
- # t1[0] = h0 * r4 + h2 * r2
- # t1[1] = h1 * r3 + h3 * r1
- movd r4,t1
- movd r3,t2
- punpcklqdq t2,t1
- pmuludq h01,t1
- movd r2,t2
- movd r1,t3
- punpcklqdq t3,t2
- pmuludq h23,t2
- paddq t2,t1
- # t3[0] = h4 * r0
- movd r0,t3
- pmuludq h44,t3
- # d4 = t1[0] + t1[1] + t3[0]
- movdqa t1,t4
- psrldq $8,t4
- paddq t4,t1
- paddq t3,t1
- movq t1,d4
-
- # d1 += d0 >> 26
- mov d0,%rax
- shr $26,%rax
- add %rax,d1
- # h0 = d0 & 0x3ffffff
- mov d0,%rbx
- and $0x3ffffff,%ebx
-
- # d2 += d1 >> 26
- mov d1,%rax
- shr $26,%rax
- add %rax,d2
- # h1 = d1 & 0x3ffffff
- mov d1,%rax
- and $0x3ffffff,%eax
- mov %eax,h1
-
- # d3 += d2 >> 26
- mov d2,%rax
- shr $26,%rax
- add %rax,d3
- # h2 = d2 & 0x3ffffff
- mov d2,%rax
- and $0x3ffffff,%eax
- mov %eax,h2
-
- # d4 += d3 >> 26
- mov d3,%rax
- shr $26,%rax
- add %rax,d4
- # h3 = d3 & 0x3ffffff
- mov d3,%rax
- and $0x3ffffff,%eax
- mov %eax,h3
-
- # h0 += (d4 >> 26) * 5
- mov d4,%rax
- shr $26,%rax
- lea (%eax,%eax,4),%eax
- add %eax,%ebx
- # h4 = d4 & 0x3ffffff
- mov d4,%rax
- and $0x3ffffff,%eax
- mov %eax,h4
-
- # h1 += h0 >> 26
- mov %ebx,%eax
- shr $26,%eax
- add %eax,h1
- # h0 = h0 & 0x3ffffff
- andl $0x3ffffff,%ebx
- mov %ebx,h0
-
- add $0x10,m
- dec %rcx
- jnz .Ldoblock
-
- add $0x10,%rsp
- pop %r12
- pop %rbx
- ret
-ENDPROC(poly1305_block_sse2)
-
-
-#define u0 0x00(%r8)
-#define u1 0x04(%r8)
-#define u2 0x08(%r8)
-#define u3 0x0c(%r8)
-#define u4 0x10(%r8)
-#define hc0 %xmm0
-#define hc1 %xmm1
-#define hc2 %xmm2
-#define hc3 %xmm5
-#define hc4 %xmm6
-#define ru0 %xmm7
-#define ru1 %xmm8
-#define ru2 %xmm9
-#define ru3 %xmm10
-#define ru4 %xmm11
-#define sv1 %xmm12
-#define sv2 %xmm13
-#define sv3 %xmm14
-#define sv4 %xmm15
-#undef d0
-#define d0 %r13
-
-ENTRY(poly1305_2block_sse2)
- # %rdi: Accumulator h[5]
- # %rsi: 16 byte input block m
- # %rdx: Poly1305 key r[5]
- # %rcx: Doubleblock count
- # %r8: Poly1305 derived key r^2 u[5]
-
- # This two-block variant further improves performance by using loop
- # unrolled block processing. This is more straight forward and does
- # less byte shuffling, but requires a second Poly1305 key r^2:
- # h = (h + m) * r => h = (h + m1) * r^2 + m2 * r
-
- push %rbx
- push %r12
- push %r13
-
- # combine r0,u0
- movd u0,ru0
- movd r0,t1
- punpcklqdq t1,ru0
-
- # combine r1,u1 and s1=r1*5,v1=u1*5
- movd u1,ru1
- movd r1,t1
- punpcklqdq t1,ru1
- movdqa ru1,sv1
- pslld $2,sv1
- paddd ru1,sv1
-
- # combine r2,u2 and s2=r2*5,v2=u2*5
- movd u2,ru2
- movd r2,t1
- punpcklqdq t1,ru2
- movdqa ru2,sv2
- pslld $2,sv2
- paddd ru2,sv2
-
- # combine r3,u3 and s3=r3*5,v3=u3*5
- movd u3,ru3
- movd r3,t1
- punpcklqdq t1,ru3
- movdqa ru3,sv3
- pslld $2,sv3
- paddd ru3,sv3
-
- # combine r4,u4 and s4=r4*5,v4=u4*5
- movd u4,ru4
- movd r4,t1
- punpcklqdq t1,ru4
- movdqa ru4,sv4
- pslld $2,sv4
- paddd ru4,sv4
-
-.Ldoblock2:
- # hc0 = [ m[16-19] & 0x3ffffff, h0 + m[0-3] & 0x3ffffff ]
- movd 0x00(m),hc0
- movd 0x10(m),t1
- punpcklqdq t1,hc0
- pand ANMASK(%rip),hc0
- movd h0,t1
- paddd t1,hc0
- # hc1 = [ (m[19-22] >> 2) & 0x3ffffff, h1 + (m[3-6] >> 2) & 0x3ffffff ]
- movd 0x03(m),hc1
- movd 0x13(m),t1
- punpcklqdq t1,hc1
- psrld $2,hc1
- pand ANMASK(%rip),hc1
- movd h1,t1
- paddd t1,hc1
- # hc2 = [ (m[22-25] >> 4) & 0x3ffffff, h2 + (m[6-9] >> 4) & 0x3ffffff ]
- movd 0x06(m),hc2
- movd 0x16(m),t1
- punpcklqdq t1,hc2
- psrld $4,hc2
- pand ANMASK(%rip),hc2
- movd h2,t1
- paddd t1,hc2
- # hc3 = [ (m[25-28] >> 6) & 0x3ffffff, h3 + (m[9-12] >> 6) & 0x3ffffff ]
- movd 0x09(m),hc3
- movd 0x19(m),t1
- punpcklqdq t1,hc3
- psrld $6,hc3
- pand ANMASK(%rip),hc3
- movd h3,t1
- paddd t1,hc3
- # hc4 = [ (m[28-31] >> 8) | (1<<24), h4 + (m[12-15] >> 8) | (1<<24) ]
- movd 0x0c(m),hc4
- movd 0x1c(m),t1
- punpcklqdq t1,hc4
- psrld $8,hc4
- por ORMASK(%rip),hc4
- movd h4,t1
- paddd t1,hc4
-
- # t1 = [ hc0[1] * r0, hc0[0] * u0 ]
- movdqa ru0,t1
- pmuludq hc0,t1
- # t1 += [ hc1[1] * s4, hc1[0] * v4 ]
- movdqa sv4,t2
- pmuludq hc1,t2
- paddq t2,t1
- # t1 += [ hc2[1] * s3, hc2[0] * v3 ]
- movdqa sv3,t2
- pmuludq hc2,t2
- paddq t2,t1
- # t1 += [ hc3[1] * s2, hc3[0] * v2 ]
- movdqa sv2,t2
- pmuludq hc3,t2
- paddq t2,t1
- # t1 += [ hc4[1] * s1, hc4[0] * v1 ]
- movdqa sv1,t2
- pmuludq hc4,t2
- paddq t2,t1
- # d0 = t1[0] + t1[1]
- movdqa t1,t2
- psrldq $8,t2
- paddq t2,t1
- movq t1,d0
-
- # t1 = [ hc0[1] * r1, hc0[0] * u1 ]
- movdqa ru1,t1
- pmuludq hc0,t1
- # t1 += [ hc1[1] * r0, hc1[0] * u0 ]
- movdqa ru0,t2
- pmuludq hc1,t2
- paddq t2,t1
- # t1 += [ hc2[1] * s4, hc2[0] * v4 ]
- movdqa sv4,t2
- pmuludq hc2,t2
- paddq t2,t1
- # t1 += [ hc3[1] * s3, hc3[0] * v3 ]
- movdqa sv3,t2
- pmuludq hc3,t2
- paddq t2,t1
- # t1 += [ hc4[1] * s2, hc4[0] * v2 ]
- movdqa sv2,t2
- pmuludq hc4,t2
- paddq t2,t1
- # d1 = t1[0] + t1[1]
- movdqa t1,t2
- psrldq $8,t2
- paddq t2,t1
- movq t1,d1
-
- # t1 = [ hc0[1] * r2, hc0[0] * u2 ]
- movdqa ru2,t1
- pmuludq hc0,t1
- # t1 += [ hc1[1] * r1, hc1[0] * u1 ]
- movdqa ru1,t2
- pmuludq hc1,t2
- paddq t2,t1
- # t1 += [ hc2[1] * r0, hc2[0] * u0 ]
- movdqa ru0,t2
- pmuludq hc2,t2
- paddq t2,t1
- # t1 += [ hc3[1] * s4, hc3[0] * v4 ]
- movdqa sv4,t2
- pmuludq hc3,t2
- paddq t2,t1
- # t1 += [ hc4[1] * s3, hc4[0] * v3 ]
- movdqa sv3,t2
- pmuludq hc4,t2
- paddq t2,t1
- # d2 = t1[0] + t1[1]
- movdqa t1,t2
- psrldq $8,t2
- paddq t2,t1
- movq t1,d2
-
- # t1 = [ hc0[1] * r3, hc0[0] * u3 ]
- movdqa ru3,t1
- pmuludq hc0,t1
- # t1 += [ hc1[1] * r2, hc1[0] * u2 ]
- movdqa ru2,t2
- pmuludq hc1,t2
- paddq t2,t1
- # t1 += [ hc2[1] * r1, hc2[0] * u1 ]
- movdqa ru1,t2
- pmuludq hc2,t2
- paddq t2,t1
- # t1 += [ hc3[1] * r0, hc3[0] * u0 ]
- movdqa ru0,t2
- pmuludq hc3,t2
- paddq t2,t1
- # t1 += [ hc4[1] * s4, hc4[0] * v4 ]
- movdqa sv4,t2
- pmuludq hc4,t2
- paddq t2,t1
- # d3 = t1[0] + t1[1]
- movdqa t1,t2
- psrldq $8,t2
- paddq t2,t1
- movq t1,d3
-
- # t1 = [ hc0[1] * r4, hc0[0] * u4 ]
- movdqa ru4,t1
- pmuludq hc0,t1
- # t1 += [ hc1[1] * r3, hc1[0] * u3 ]
- movdqa ru3,t2
- pmuludq hc1,t2
- paddq t2,t1
- # t1 += [ hc2[1] * r2, hc2[0] * u2 ]
- movdqa ru2,t2
- pmuludq hc2,t2
- paddq t2,t1
- # t1 += [ hc3[1] * r1, hc3[0] * u1 ]
- movdqa ru1,t2
- pmuludq hc3,t2
- paddq t2,t1
- # t1 += [ hc4[1] * r0, hc4[0] * u0 ]
- movdqa ru0,t2
- pmuludq hc4,t2
- paddq t2,t1
- # d4 = t1[0] + t1[1]
- movdqa t1,t2
- psrldq $8,t2
- paddq t2,t1
- movq t1,d4
-
- # d1 += d0 >> 26
- mov d0,%rax
- shr $26,%rax
- add %rax,d1
- # h0 = d0 & 0x3ffffff
- mov d0,%rbx
- and $0x3ffffff,%ebx
-
- # d2 += d1 >> 26
- mov d1,%rax
- shr $26,%rax
- add %rax,d2
- # h1 = d1 & 0x3ffffff
- mov d1,%rax
- and $0x3ffffff,%eax
- mov %eax,h1
-
- # d3 += d2 >> 26
- mov d2,%rax
- shr $26,%rax
- add %rax,d3
- # h2 = d2 & 0x3ffffff
- mov d2,%rax
- and $0x3ffffff,%eax
- mov %eax,h2
-
- # d4 += d3 >> 26
- mov d3,%rax
- shr $26,%rax
- add %rax,d4
- # h3 = d3 & 0x3ffffff
- mov d3,%rax
- and $0x3ffffff,%eax
- mov %eax,h3
-
- # h0 += (d4 >> 26) * 5
- mov d4,%rax
- shr $26,%rax
- lea (%eax,%eax,4),%eax
- add %eax,%ebx
- # h4 = d4 & 0x3ffffff
- mov d4,%rax
- and $0x3ffffff,%eax
- mov %eax,h4
-
- # h1 += h0 >> 26
- mov %ebx,%eax
- shr $26,%eax
- add %eax,h1
- # h0 = h0 & 0x3ffffff
- andl $0x3ffffff,%ebx
- mov %ebx,h0
-
- add $0x20,m
- dec %rcx
- jnz .Ldoblock2
-
- pop %r13
- pop %r12
- pop %rbx
- ret
-ENDPROC(poly1305_2block_sse2)
diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c
deleted file mode 100644
index f012b7e28ad1..000000000000
--- a/arch/x86/crypto/poly1305_glue.c
+++ /dev/null
@@ -1,205 +0,0 @@
-/*
- * Poly1305 authenticator algorithm, RFC7539, SIMD glue code
- *
- * Copyright (C) 2015 Martin Willi
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#include <crypto/algapi.h>
-#include <crypto/internal/hash.h>
-#include <crypto/poly1305.h>
-#include <linux/crypto.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <asm/fpu/api.h>
-#include <asm/simd.h>
-
-struct poly1305_simd_desc_ctx {
- struct poly1305_desc_ctx base;
- /* derived key u set? */
- bool uset;
-#ifdef CONFIG_AS_AVX2
- /* derived keys r^3, r^4 set? */
- bool wset;
-#endif
- /* derived Poly1305 key r^2 */
- u32 u[5];
- /* ... silently appended r^3 and r^4 when using AVX2 */
-};
-
-asmlinkage void poly1305_block_sse2(u32 *h, const u8 *src,
- const u32 *r, unsigned int blocks);
-asmlinkage void poly1305_2block_sse2(u32 *h, const u8 *src, const u32 *r,
- unsigned int blocks, const u32 *u);
-#ifdef CONFIG_AS_AVX2
-asmlinkage void poly1305_4block_avx2(u32 *h, const u8 *src, const u32 *r,
- unsigned int blocks, const u32 *u);
-static bool poly1305_use_avx2;
-#endif
-
-static int poly1305_simd_init(struct shash_desc *desc)
-{
- struct poly1305_simd_desc_ctx *sctx = shash_desc_ctx(desc);
-
- sctx->uset = false;
-#ifdef CONFIG_AS_AVX2
- sctx->wset = false;
-#endif
-
- return crypto_poly1305_init(desc);
-}
-
-static void poly1305_simd_mult(u32 *a, const u32 *b)
-{
- u8 m[POLY1305_BLOCK_SIZE];
-
- memset(m, 0, sizeof(m));
- /* The poly1305 block function adds a hi-bit to the accumulator which
- * we don't need for key multiplication; compensate for it. */
- a[4] -= 1 << 24;
- poly1305_block_sse2(a, m, b, 1);
-}
-
-static unsigned int poly1305_simd_blocks(struct poly1305_desc_ctx *dctx,
- const u8 *src, unsigned int srclen)
-{
- struct poly1305_simd_desc_ctx *sctx;
- unsigned int blocks, datalen;
-
- BUILD_BUG_ON(offsetof(struct poly1305_simd_desc_ctx, base));
- sctx = container_of(dctx, struct poly1305_simd_desc_ctx, base);
-
- if (unlikely(!dctx->sset)) {
- datalen = crypto_poly1305_setdesckey(dctx, src, srclen);
- src += srclen - datalen;
- srclen = datalen;
- }
-
-#ifdef CONFIG_AS_AVX2
- if (poly1305_use_avx2 && srclen >= POLY1305_BLOCK_SIZE * 4) {
- if (unlikely(!sctx->wset)) {
- if (!sctx->uset) {
- memcpy(sctx->u, dctx->r, sizeof(sctx->u));
- poly1305_simd_mult(sctx->u, dctx->r);
- sctx->uset = true;
- }
- memcpy(sctx->u + 5, sctx->u, sizeof(sctx->u));
- poly1305_simd_mult(sctx->u + 5, dctx->r);
- memcpy(sctx->u + 10, sctx->u + 5, sizeof(sctx->u));
- poly1305_simd_mult(sctx->u + 10, dctx->r);
- sctx->wset = true;
- }
- blocks = srclen / (POLY1305_BLOCK_SIZE * 4);
- poly1305_4block_avx2(dctx->h, src, dctx->r, blocks, sctx->u);
- src += POLY1305_BLOCK_SIZE * 4 * blocks;
- srclen -= POLY1305_BLOCK_SIZE * 4 * blocks;
- }
-#endif
- if (likely(srclen >= POLY1305_BLOCK_SIZE * 2)) {
- if (unlikely(!sctx->uset)) {
- memcpy(sctx->u, dctx->r, sizeof(sctx->u));
- poly1305_simd_mult(sctx->u, dctx->r);
- sctx->uset = true;
- }
- blocks = srclen / (POLY1305_BLOCK_SIZE * 2);
- poly1305_2block_sse2(dctx->h, src, dctx->r, blocks, sctx->u);
- src += POLY1305_BLOCK_SIZE * 2 * blocks;
- srclen -= POLY1305_BLOCK_SIZE * 2 * blocks;
- }
- if (srclen >= POLY1305_BLOCK_SIZE) {
- poly1305_block_sse2(dctx->h, src, dctx->r, 1);
- srclen -= POLY1305_BLOCK_SIZE;
- }
- return srclen;
-}
-
-static int poly1305_simd_update(struct shash_desc *desc,
- const u8 *src, unsigned int srclen)
-{
- struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
- unsigned int bytes;
-
- /* kernel_fpu_begin/end is costly, use fallback for small updates */
- if (srclen <= 288 || !may_use_simd())
- return crypto_poly1305_update(desc, src, srclen);
-
- kernel_fpu_begin();
-
- if (unlikely(dctx->buflen)) {
- bytes = min(srclen, POLY1305_BLOCK_SIZE - dctx->buflen);
- memcpy(dctx->buf + dctx->buflen, src, bytes);
- src += bytes;
- srclen -= bytes;
- dctx->buflen += bytes;
-
- if (dctx->buflen == POLY1305_BLOCK_SIZE) {
- poly1305_simd_blocks(dctx, dctx->buf,
- POLY1305_BLOCK_SIZE);
- dctx->buflen = 0;
- }
- }
-
- if (likely(srclen >= POLY1305_BLOCK_SIZE)) {
- bytes = poly1305_simd_blocks(dctx, src, srclen);
- src += srclen - bytes;
- srclen = bytes;
- }
-
- kernel_fpu_end();
-
- if (unlikely(srclen)) {
- dctx->buflen = srclen;
- memcpy(dctx->buf, src, srclen);
- }
-
- return 0;
-}
-
-static struct shash_alg alg = {
- .digestsize = POLY1305_DIGEST_SIZE,
- .init = poly1305_simd_init,
- .update = poly1305_simd_update,
- .final = crypto_poly1305_final,
- .descsize = sizeof(struct poly1305_simd_desc_ctx),
- .base = {
- .cra_name = "poly1305",
- .cra_driver_name = "poly1305-simd",
- .cra_priority = 300,
- .cra_blocksize = POLY1305_BLOCK_SIZE,
- .cra_module = THIS_MODULE,
- },
-};
-
-static int __init poly1305_simd_mod_init(void)
-{
- if (!boot_cpu_has(X86_FEATURE_XMM2))
- return -ENODEV;
-
-#ifdef CONFIG_AS_AVX2
- poly1305_use_avx2 = boot_cpu_has(X86_FEATURE_AVX) &&
- boot_cpu_has(X86_FEATURE_AVX2) &&
- cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
- alg.descsize = sizeof(struct poly1305_simd_desc_ctx);
- if (poly1305_use_avx2)
- alg.descsize += 10 * sizeof(u32);
-#endif
- return crypto_register_shash(&alg);
-}
-
-static void __exit poly1305_simd_mod_exit(void)
-{
- crypto_unregister_shash(&alg);
-}
-
-module_init(poly1305_simd_mod_init);
-module_exit(poly1305_simd_mod_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Martin Willi <martin@strongswan.org>");
-MODULE_DESCRIPTION("Poly1305 authenticator");
-MODULE_ALIAS_CRYPTO("poly1305");
-MODULE_ALIAS_CRYPTO("poly1305-simd");
diff --git a/crypto/Kconfig b/crypto/Kconfig
index f3e40ac56d93..47859a0f8052 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -656,24 +656,13 @@ config CRYPTO_GHASH
config CRYPTO_POLY1305
tristate "Poly1305 authenticator algorithm"
select CRYPTO_HASH
+ select ZINC_POLY1305
help
Poly1305 authenticator algorithm, RFC7539.
Poly1305 is an authenticator algorithm designed by Daniel J. Bernstein.
It is used for the ChaCha20-Poly1305 AEAD, specified in RFC7539 for use
- in IETF protocols. This is the portable C implementation of Poly1305.
-
-config CRYPTO_POLY1305_X86_64
- tristate "Poly1305 authenticator algorithm (x86_64/SSE2/AVX2)"
- depends on X86 && 64BIT
- select CRYPTO_POLY1305
- help
- Poly1305 authenticator algorithm, RFC7539.
-
- Poly1305 is an authenticator algorithm designed by Daniel J. Bernstein.
- It is used for the ChaCha20-Poly1305 AEAD, specified in RFC7539 for use
- in IETF protocols. This is the x86_64 assembler implementation using SIMD
- instructions.
+ in IETF protocols.
config CRYPTO_MD4
tristate "MD4 digest algorithm"
diff --git a/crypto/Makefile b/crypto/Makefile
index 6d1d40eeb964..5e60348d02e2 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -118,7 +118,7 @@ obj-$(CONFIG_CRYPTO_SEED) += seed.o
obj-$(CONFIG_CRYPTO_SPECK) += speck.o
obj-$(CONFIG_CRYPTO_SALSA20) += salsa20_generic.o
obj-$(CONFIG_CRYPTO_CHACHA20) += chacha20_generic.o
-obj-$(CONFIG_CRYPTO_POLY1305) += poly1305_generic.o
+obj-$(CONFIG_CRYPTO_POLY1305) += poly1305_zinc.o
obj-$(CONFIG_CRYPTO_DEFLATE) += deflate.o
obj-$(CONFIG_CRYPTO_MICHAEL_MIC) += michael_mic.o
obj-$(CONFIG_CRYPTO_CRC32C) += crc32c_generic.o
diff --git a/crypto/chacha20poly1305.c b/crypto/chacha20poly1305.c
index 600afa99941f..bf523797bef3 100644
--- a/crypto/chacha20poly1305.c
+++ b/crypto/chacha20poly1305.c
@@ -14,7 +14,7 @@
#include <crypto/internal/skcipher.h>
#include <crypto/scatterwalk.h>
#include <crypto/chacha20.h>
-#include <crypto/poly1305.h>
+#include <zinc/poly1305.h>
#include <linux/err.h>
#include <linux/init.h>
#include <linux/kernel.h>
@@ -62,7 +62,7 @@ struct chachapoly_req_ctx {
/* the key we generate for Poly1305 using Chacha20 */
u8 key[POLY1305_KEY_SIZE];
/* calculated Poly1305 tag */
- u8 tag[POLY1305_DIGEST_SIZE];
+ u8 tag[POLY1305_MAC_SIZE];
/* length of data to en/decrypt, without ICV */
unsigned int cryptlen;
/* Actual AD, excluding IV */
@@ -471,7 +471,7 @@ static int chachapoly_decrypt(struct aead_request *req)
{
struct chachapoly_req_ctx *rctx = aead_request_ctx(req);
- rctx->cryptlen = req->cryptlen - POLY1305_DIGEST_SIZE;
+ rctx->cryptlen = req->cryptlen - POLY1305_MAC_SIZE;
/* decrypt call chain:
* - poly_genkey/done()
@@ -513,7 +513,7 @@ static int chachapoly_setkey(struct crypto_aead *aead, const u8 *key,
static int chachapoly_setauthsize(struct crypto_aead *tfm,
unsigned int authsize)
{
- if (authsize != POLY1305_DIGEST_SIZE)
+ if (authsize != POLY1305_MAC_SIZE)
return -EINVAL;
return 0;
@@ -613,7 +613,7 @@ static int chachapoly_create(struct crypto_template *tmpl, struct rtattr **tb,
poly_hash = __crypto_hash_alg_common(poly);
err = -EINVAL;
- if (poly_hash->digestsize != POLY1305_DIGEST_SIZE)
+ if (poly_hash->digestsize != POLY1305_MAC_SIZE)
goto out_put_poly;
err = -ENOMEM;
@@ -666,7 +666,7 @@ static int chachapoly_create(struct crypto_template *tmpl, struct rtattr **tb,
ctx->saltlen;
inst->alg.ivsize = ivsize;
inst->alg.chunksize = crypto_skcipher_alg_chunksize(chacha);
- inst->alg.maxauthsize = POLY1305_DIGEST_SIZE;
+ inst->alg.maxauthsize = POLY1305_MAC_SIZE;
inst->alg.init = chachapoly_init;
inst->alg.exit = chachapoly_exit;
inst->alg.encrypt = chachapoly_encrypt;
diff --git a/crypto/poly1305_generic.c b/crypto/poly1305_generic.c
deleted file mode 100644
index 47d3a6b83931..000000000000
--- a/crypto/poly1305_generic.c
+++ /dev/null
@@ -1,304 +0,0 @@
-/*
- * Poly1305 authenticator algorithm, RFC7539
- *
- * Copyright (C) 2015 Martin Willi
- *
- * Based on public domain code by Andrew Moon and Daniel J. Bernstein.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#include <crypto/algapi.h>
-#include <crypto/internal/hash.h>
-#include <crypto/poly1305.h>
-#include <linux/crypto.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <asm/unaligned.h>
-
-static inline u64 mlt(u64 a, u64 b)
-{
- return a * b;
-}
-
-static inline u32 sr(u64 v, u_char n)
-{
- return v >> n;
-}
-
-static inline u32 and(u32 v, u32 mask)
-{
- return v & mask;
-}
-
-int crypto_poly1305_init(struct shash_desc *desc)
-{
- struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
-
- memset(dctx->h, 0, sizeof(dctx->h));
- dctx->buflen = 0;
- dctx->rset = false;
- dctx->sset = false;
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(crypto_poly1305_init);
-
-static void poly1305_setrkey(struct poly1305_desc_ctx *dctx, const u8 *key)
-{
- /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
- dctx->r[0] = (get_unaligned_le32(key + 0) >> 0) & 0x3ffffff;
- dctx->r[1] = (get_unaligned_le32(key + 3) >> 2) & 0x3ffff03;
- dctx->r[2] = (get_unaligned_le32(key + 6) >> 4) & 0x3ffc0ff;
- dctx->r[3] = (get_unaligned_le32(key + 9) >> 6) & 0x3f03fff;
- dctx->r[4] = (get_unaligned_le32(key + 12) >> 8) & 0x00fffff;
-}
-
-static void poly1305_setskey(struct poly1305_desc_ctx *dctx, const u8 *key)
-{
- dctx->s[0] = get_unaligned_le32(key + 0);
- dctx->s[1] = get_unaligned_le32(key + 4);
- dctx->s[2] = get_unaligned_le32(key + 8);
- dctx->s[3] = get_unaligned_le32(key + 12);
-}
-
-/*
- * Poly1305 requires a unique key for each tag, which implies that we can't set
- * it on the tfm that gets accessed by multiple users simultaneously. Instead we
- * expect the key as the first 32 bytes in the update() call.
- */
-unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx,
- const u8 *src, unsigned int srclen)
-{
- if (!dctx->sset) {
- if (!dctx->rset && srclen >= POLY1305_BLOCK_SIZE) {
- poly1305_setrkey(dctx, src);
- src += POLY1305_BLOCK_SIZE;
- srclen -= POLY1305_BLOCK_SIZE;
- dctx->rset = true;
- }
- if (srclen >= POLY1305_BLOCK_SIZE) {
- poly1305_setskey(dctx, src);
- src += POLY1305_BLOCK_SIZE;
- srclen -= POLY1305_BLOCK_SIZE;
- dctx->sset = true;
- }
- }
- return srclen;
-}
-EXPORT_SYMBOL_GPL(crypto_poly1305_setdesckey);
-
-static unsigned int poly1305_blocks(struct poly1305_desc_ctx *dctx,
- const u8 *src, unsigned int srclen,
- u32 hibit)
-{
- u32 r0, r1, r2, r3, r4;
- u32 s1, s2, s3, s4;
- u32 h0, h1, h2, h3, h4;
- u64 d0, d1, d2, d3, d4;
- unsigned int datalen;
-
- if (unlikely(!dctx->sset)) {
- datalen = crypto_poly1305_setdesckey(dctx, src, srclen);
- src += srclen - datalen;
- srclen = datalen;
- }
-
- r0 = dctx->r[0];
- r1 = dctx->r[1];
- r2 = dctx->r[2];
- r3 = dctx->r[3];
- r4 = dctx->r[4];
-
- s1 = r1 * 5;
- s2 = r2 * 5;
- s3 = r3 * 5;
- s4 = r4 * 5;
-
- h0 = dctx->h[0];
- h1 = dctx->h[1];
- h2 = dctx->h[2];
- h3 = dctx->h[3];
- h4 = dctx->h[4];
-
- while (likely(srclen >= POLY1305_BLOCK_SIZE)) {
-
- /* h += m[i] */
- h0 += (get_unaligned_le32(src + 0) >> 0) & 0x3ffffff;
- h1 += (get_unaligned_le32(src + 3) >> 2) & 0x3ffffff;
- h2 += (get_unaligned_le32(src + 6) >> 4) & 0x3ffffff;
- h3 += (get_unaligned_le32(src + 9) >> 6) & 0x3ffffff;
- h4 += (get_unaligned_le32(src + 12) >> 8) | hibit;
-
- /* h *= r */
- d0 = mlt(h0, r0) + mlt(h1, s4) + mlt(h2, s3) +
- mlt(h3, s2) + mlt(h4, s1);
- d1 = mlt(h0, r1) + mlt(h1, r0) + mlt(h2, s4) +
- mlt(h3, s3) + mlt(h4, s2);
- d2 = mlt(h0, r2) + mlt(h1, r1) + mlt(h2, r0) +
- mlt(h3, s4) + mlt(h4, s3);
- d3 = mlt(h0, r3) + mlt(h1, r2) + mlt(h2, r1) +
- mlt(h3, r0) + mlt(h4, s4);
- d4 = mlt(h0, r4) + mlt(h1, r3) + mlt(h2, r2) +
- mlt(h3, r1) + mlt(h4, r0);
-
- /* (partial) h %= p */
- d1 += sr(d0, 26); h0 = and(d0, 0x3ffffff);
- d2 += sr(d1, 26); h1 = and(d1, 0x3ffffff);
- d3 += sr(d2, 26); h2 = and(d2, 0x3ffffff);
- d4 += sr(d3, 26); h3 = and(d3, 0x3ffffff);
- h0 += sr(d4, 26) * 5; h4 = and(d4, 0x3ffffff);
- h1 += h0 >> 26; h0 = h0 & 0x3ffffff;
-
- src += POLY1305_BLOCK_SIZE;
- srclen -= POLY1305_BLOCK_SIZE;
- }
-
- dctx->h[0] = h0;
- dctx->h[1] = h1;
- dctx->h[2] = h2;
- dctx->h[3] = h3;
- dctx->h[4] = h4;
-
- return srclen;
-}
-
-int crypto_poly1305_update(struct shash_desc *desc,
- const u8 *src, unsigned int srclen)
-{
- struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
- unsigned int bytes;
-
- if (unlikely(dctx->buflen)) {
- bytes = min(srclen, POLY1305_BLOCK_SIZE - dctx->buflen);
- memcpy(dctx->buf + dctx->buflen, src, bytes);
- src += bytes;
- srclen -= bytes;
- dctx->buflen += bytes;
-
- if (dctx->buflen == POLY1305_BLOCK_SIZE) {
- poly1305_blocks(dctx, dctx->buf,
- POLY1305_BLOCK_SIZE, 1 << 24);
- dctx->buflen = 0;
- }
- }
-
- if (likely(srclen >= POLY1305_BLOCK_SIZE)) {
- bytes = poly1305_blocks(dctx, src, srclen, 1 << 24);
- src += srclen - bytes;
- srclen = bytes;
- }
-
- if (unlikely(srclen)) {
- dctx->buflen = srclen;
- memcpy(dctx->buf, src, srclen);
- }
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(crypto_poly1305_update);
-
-int crypto_poly1305_final(struct shash_desc *desc, u8 *dst)
-{
- struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
- u32 h0, h1, h2, h3, h4;
- u32 g0, g1, g2, g3, g4;
- u32 mask;
- u64 f = 0;
-
- if (unlikely(!dctx->sset))
- return -ENOKEY;
-
- if (unlikely(dctx->buflen)) {
- dctx->buf[dctx->buflen++] = 1;
- memset(dctx->buf + dctx->buflen, 0,
- POLY1305_BLOCK_SIZE - dctx->buflen);
- poly1305_blocks(dctx, dctx->buf, POLY1305_BLOCK_SIZE, 0);
- }
-
- /* fully carry h */
- h0 = dctx->h[0];
- h1 = dctx->h[1];
- h2 = dctx->h[2];
- h3 = dctx->h[3];
- h4 = dctx->h[4];
-
- h2 += (h1 >> 26); h1 = h1 & 0x3ffffff;
- h3 += (h2 >> 26); h2 = h2 & 0x3ffffff;
- h4 += (h3 >> 26); h3 = h3 & 0x3ffffff;
- h0 += (h4 >> 26) * 5; h4 = h4 & 0x3ffffff;
- h1 += (h0 >> 26); h0 = h0 & 0x3ffffff;
-
- /* compute h + -p */
- g0 = h0 + 5;
- g1 = h1 + (g0 >> 26); g0 &= 0x3ffffff;
- g2 = h2 + (g1 >> 26); g1 &= 0x3ffffff;
- g3 = h3 + (g2 >> 26); g2 &= 0x3ffffff;
- g4 = h4 + (g3 >> 26) - (1 << 26); g3 &= 0x3ffffff;
-
- /* select h if h < p, or h + -p if h >= p */
- mask = (g4 >> ((sizeof(u32) * 8) - 1)) - 1;
- g0 &= mask;
- g1 &= mask;
- g2 &= mask;
- g3 &= mask;
- g4 &= mask;
- mask = ~mask;
- h0 = (h0 & mask) | g0;
- h1 = (h1 & mask) | g1;
- h2 = (h2 & mask) | g2;
- h3 = (h3 & mask) | g3;
- h4 = (h4 & mask) | g4;
-
- /* h = h % (2^128) */
- h0 = (h0 >> 0) | (h1 << 26);
- h1 = (h1 >> 6) | (h2 << 20);
- h2 = (h2 >> 12) | (h3 << 14);
- h3 = (h3 >> 18) | (h4 << 8);
-
- /* mac = (h + s) % (2^128) */
- f = (f >> 32) + h0 + dctx->s[0]; put_unaligned_le32(f, dst + 0);
- f = (f >> 32) + h1 + dctx->s[1]; put_unaligned_le32(f, dst + 4);
- f = (f >> 32) + h2 + dctx->s[2]; put_unaligned_le32(f, dst + 8);
- f = (f >> 32) + h3 + dctx->s[3]; put_unaligned_le32(f, dst + 12);
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(crypto_poly1305_final);
-
-static struct shash_alg poly1305_alg = {
- .digestsize = POLY1305_DIGEST_SIZE,
- .init = crypto_poly1305_init,
- .update = crypto_poly1305_update,
- .final = crypto_poly1305_final,
- .descsize = sizeof(struct poly1305_desc_ctx),
- .base = {
- .cra_name = "poly1305",
- .cra_driver_name = "poly1305-generic",
- .cra_priority = 100,
- .cra_blocksize = POLY1305_BLOCK_SIZE,
- .cra_module = THIS_MODULE,
- },
-};
-
-static int __init poly1305_mod_init(void)
-{
- return crypto_register_shash(&poly1305_alg);
-}
-
-static void __exit poly1305_mod_exit(void)
-{
- crypto_unregister_shash(&poly1305_alg);
-}
-
-module_init(poly1305_mod_init);
-module_exit(poly1305_mod_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Martin Willi <martin@strongswan.org>");
-MODULE_DESCRIPTION("Poly1305 authenticator");
-MODULE_ALIAS_CRYPTO("poly1305");
-MODULE_ALIAS_CRYPTO("poly1305-generic");
diff --git a/crypto/poly1305_zinc.c b/crypto/poly1305_zinc.c
new file mode 100644
index 000000000000..630bbb1ba230
--- /dev/null
+++ b/crypto/poly1305_zinc.c
@@ -0,0 +1,92 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (C) 2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#include <crypto/algapi.h>
+#include <crypto/internal/hash.h>
+#include <zinc/poly1305.h>
+#include <linux/crypto.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/simd.h>
+
+struct poly1305_desc_ctx {
+ struct poly1305_ctx ctx;
+ u8 key[POLY1305_KEY_SIZE];
+ unsigned int rem_key_bytes;
+};
+
+static int crypto_poly1305_init(struct shash_desc *desc)
+{
+ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+ dctx->rem_key_bytes = POLY1305_KEY_SIZE;
+ return 0;
+}
+
+static int crypto_poly1305_update(struct shash_desc *desc, const u8 *src,
+ unsigned int srclen)
+{
+ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+ simd_context_t simd_context = simd_get();
+
+ if (unlikely(dctx->rem_key_bytes)) {
+ unsigned int key_bytes = min(srclen, dctx->rem_key_bytes);
+ memcpy(dctx->key + (POLY1305_KEY_SIZE - dctx->rem_key_bytes),
+ src, key_bytes);
+ src += key_bytes;
+ srclen -= key_bytes;
+ dctx->rem_key_bytes -= key_bytes;
+ if (!dctx->rem_key_bytes) {
+ poly1305_init(&dctx->ctx, dctx->key, simd_context);
+ memzero_explicit(dctx->key, sizeof(dctx->key));
+ }
+ }
+ poly1305_update(&dctx->ctx, src, srclen, simd_context);
+ simd_put(simd_context);
+
+ return 0;
+}
+
+static int crypto_poly1305_final(struct shash_desc *desc, u8 *dst)
+{
+ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+ simd_context_t simd_context = simd_get();
+ poly1305_final(&dctx->ctx, dst, simd_context);
+ simd_put(simd_context);
+ return 0;
+}
+
+static struct shash_alg poly1305_alg = {
+ .digestsize = POLY1305_MAC_SIZE,
+ .init = crypto_poly1305_init,
+ .update = crypto_poly1305_update,
+ .final = crypto_poly1305_final,
+ .descsize = sizeof(struct poly1305_desc_ctx),
+ .base = {
+ .cra_name = "poly1305",
+ .cra_driver_name = "poly1305-software",
+ .cra_priority = 100,
+ .cra_blocksize = POLY1305_BLOCK_SIZE,
+ .cra_module = THIS_MODULE,
+ },
+};
+
+static int __init poly1305_mod_init(void)
+{
+ return crypto_register_shash(&poly1305_alg);
+}
+
+static void __exit poly1305_mod_exit(void)
+{
+ crypto_unregister_shash(&poly1305_alg);
+}
+
+module_init(poly1305_mod_init);
+module_exit(poly1305_mod_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");
+MODULE_DESCRIPTION("Poly1305 authenticator");
+MODULE_ALIAS_CRYPTO("poly1305");
+MODULE_ALIAS_CRYPTO("poly1305-software");
diff --git a/include/crypto/poly1305.h b/include/crypto/poly1305.h
deleted file mode 100644
index f718a19da82f..000000000000
--- a/include/crypto/poly1305.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Common values for the Poly1305 algorithm
- */
-
-#ifndef _CRYPTO_POLY1305_H
-#define _CRYPTO_POLY1305_H
-
-#include <linux/types.h>
-#include <linux/crypto.h>
-
-#define POLY1305_BLOCK_SIZE 16
-#define POLY1305_KEY_SIZE 32
-#define POLY1305_DIGEST_SIZE 16
-
-struct poly1305_desc_ctx {
- /* key */
- u32 r[5];
- /* finalize key */
- u32 s[4];
- /* accumulator */
- u32 h[5];
- /* partial buffer */
- u8 buf[POLY1305_BLOCK_SIZE];
- /* bytes used in partial buffer */
- unsigned int buflen;
- /* r key has been set */
- bool rset;
- /* s key has been set */
- bool sset;
-};
-
-int crypto_poly1305_init(struct shash_desc *desc);
-unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx,
- const u8 *src, unsigned int srclen);
-int crypto_poly1305_update(struct shash_desc *desc,
- const u8 *src, unsigned int srclen);
-int crypto_poly1305_final(struct shash_desc *desc, u8 *dst);
-
-#endif
--
2.19.0
^ permalink raw reply related
* [PATCH net-next v4 18/20] crypto: port ChaCha20 to Zinc
From: Jason A. Donenfeld @ 2018-09-14 16:22 UTC (permalink / raw)
To: linux-kernel, netdev, linux-crypto, davem, gregkh
Cc: Jason A. Donenfeld, Samuel Neves, Andy Lutomirski,
Jean-Philippe Aumasson, Eric Biggers
In-Reply-To: <20180914162240.7925-1-Jason@zx2c4.com>
Now that ChaCha20 is in Zinc, we can have the crypto API code simply
call into it. The crypto API expects to have a stored key per instance
and independent nonces, so we follow suite and store the key and
initialize the nonce independently.
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Cc: Samuel Neves <sneves@dei.uc.pt>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Jean-Philippe Aumasson <jeanphilippe.aumasson@gmail.com>
Cc: Eric Biggers <ebiggers@google.com>
---
arch/arm/configs/exynos_defconfig | 1 -
arch/arm/configs/multi_v7_defconfig | 1 -
arch/arm/configs/omap2plus_defconfig | 1 -
arch/arm/crypto/Kconfig | 6 -
arch/arm/crypto/Makefile | 2 -
arch/arm/crypto/chacha20-neon-core.S | 521 --------------------
arch/arm/crypto/chacha20-neon-glue.c | 127 -----
arch/arm64/configs/defconfig | 1 -
arch/arm64/crypto/Kconfig | 6 -
arch/arm64/crypto/Makefile | 3 -
arch/arm64/crypto/chacha20-neon-core.S | 450 -----------------
arch/arm64/crypto/chacha20-neon-glue.c | 133 -----
arch/x86/crypto/Makefile | 3 -
arch/x86/crypto/chacha20-avx2-x86_64.S | 448 -----------------
arch/x86/crypto/chacha20-ssse3-x86_64.S | 630 ------------------------
arch/x86/crypto/chacha20_glue.c | 146 ------
crypto/Kconfig | 16 -
crypto/Makefile | 2 +-
crypto/chacha20_generic.c | 136 -----
crypto/chacha20_zinc.c | 100 ++++
crypto/chacha20poly1305.c | 2 +-
include/crypto/chacha20.h | 12 -
22 files changed, 102 insertions(+), 2645 deletions(-)
delete mode 100644 arch/arm/crypto/chacha20-neon-core.S
delete mode 100644 arch/arm/crypto/chacha20-neon-glue.c
delete mode 100644 arch/arm64/crypto/chacha20-neon-core.S
delete mode 100644 arch/arm64/crypto/chacha20-neon-glue.c
delete mode 100644 arch/x86/crypto/chacha20-avx2-x86_64.S
delete mode 100644 arch/x86/crypto/chacha20-ssse3-x86_64.S
delete mode 100644 arch/x86/crypto/chacha20_glue.c
delete mode 100644 crypto/chacha20_generic.c
create mode 100644 crypto/chacha20_zinc.c
diff --git a/arch/arm/configs/exynos_defconfig b/arch/arm/configs/exynos_defconfig
index 27ea6dfcf2f2..95929b5e7b10 100644
--- a/arch/arm/configs/exynos_defconfig
+++ b/arch/arm/configs/exynos_defconfig
@@ -350,7 +350,6 @@ CONFIG_CRYPTO_SHA1_ARM_NEON=m
CONFIG_CRYPTO_SHA256_ARM=m
CONFIG_CRYPTO_SHA512_ARM=m
CONFIG_CRYPTO_AES_ARM_BS=m
-CONFIG_CRYPTO_CHACHA20_NEON=m
CONFIG_CRC_CCITT=y
CONFIG_FONTS=y
CONFIG_FONT_7x14=y
diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig
index fc33444e94f0..63be07724db3 100644
--- a/arch/arm/configs/multi_v7_defconfig
+++ b/arch/arm/configs/multi_v7_defconfig
@@ -1000,4 +1000,3 @@ CONFIG_CRYPTO_AES_ARM_BS=m
CONFIG_CRYPTO_AES_ARM_CE=m
CONFIG_CRYPTO_GHASH_ARM_CE=m
CONFIG_CRYPTO_CRC32_ARM_CE=m
-CONFIG_CRYPTO_CHACHA20_NEON=m
diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig
index 6491419b1dad..f585a8ecc336 100644
--- a/arch/arm/configs/omap2plus_defconfig
+++ b/arch/arm/configs/omap2plus_defconfig
@@ -547,7 +547,6 @@ CONFIG_CRYPTO_SHA512_ARM=m
CONFIG_CRYPTO_AES_ARM=m
CONFIG_CRYPTO_AES_ARM_BS=m
CONFIG_CRYPTO_GHASH_ARM_CE=m
-CONFIG_CRYPTO_CHACHA20_NEON=m
CONFIG_CRC_CCITT=y
CONFIG_CRC_T10DIF=y
CONFIG_CRC_ITU_T=y
diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig
index 925d1364727a..fb80fd89f0e7 100644
--- a/arch/arm/crypto/Kconfig
+++ b/arch/arm/crypto/Kconfig
@@ -115,12 +115,6 @@ config CRYPTO_CRC32_ARM_CE
depends on KERNEL_MODE_NEON && CRC32
select CRYPTO_HASH
-config CRYPTO_CHACHA20_NEON
- tristate "NEON accelerated ChaCha20 symmetric cipher"
- depends on KERNEL_MODE_NEON
- select CRYPTO_BLKCIPHER
- select CRYPTO_CHACHA20
-
config CRYPTO_SPECK_NEON
tristate "NEON accelerated Speck cipher algorithms"
depends on KERNEL_MODE_NEON
diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile
index 8de542c48ade..bbfa98447063 100644
--- a/arch/arm/crypto/Makefile
+++ b/arch/arm/crypto/Makefile
@@ -9,7 +9,6 @@ obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o
obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o
obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o
obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o
-obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha20-neon.o
obj-$(CONFIG_CRYPTO_SPECK_NEON) += speck-neon.o
ce-obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o
@@ -53,7 +52,6 @@ aes-arm-ce-y := aes-ce-core.o aes-ce-glue.o
ghash-arm-ce-y := ghash-ce-core.o ghash-ce-glue.o
crct10dif-arm-ce-y := crct10dif-ce-core.o crct10dif-ce-glue.o
crc32-arm-ce-y:= crc32-ce-core.o crc32-ce-glue.o
-chacha20-neon-y := chacha20-neon-core.o chacha20-neon-glue.o
speck-neon-y := speck-neon-core.o speck-neon-glue.o
ifdef REGENERATE_ARM_CRYPTO
diff --git a/arch/arm/crypto/chacha20-neon-core.S b/arch/arm/crypto/chacha20-neon-core.S
deleted file mode 100644
index 451a849ad518..000000000000
--- a/arch/arm/crypto/chacha20-neon-core.S
+++ /dev/null
@@ -1,521 +0,0 @@
-/*
- * ChaCha20 256-bit cipher algorithm, RFC7539, ARM NEON functions
- *
- * Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Based on:
- * ChaCha20 256-bit cipher algorithm, RFC7539, x64 SSE3 functions
- *
- * Copyright (C) 2015 Martin Willi
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#include <linux/linkage.h>
-
- .text
- .fpu neon
- .align 5
-
-ENTRY(chacha20_block_xor_neon)
- // r0: Input state matrix, s
- // r1: 1 data block output, o
- // r2: 1 data block input, i
-
- //
- // This function encrypts one ChaCha20 block by loading the state matrix
- // in four NEON registers. It performs matrix operation on four words in
- // parallel, but requireds shuffling to rearrange the words after each
- // round.
- //
-
- // x0..3 = s0..3
- add ip, r0, #0x20
- vld1.32 {q0-q1}, [r0]
- vld1.32 {q2-q3}, [ip]
-
- vmov q8, q0
- vmov q9, q1
- vmov q10, q2
- vmov q11, q3
-
- mov r3, #10
-
-.Ldoubleround:
- // x0 += x1, x3 = rotl32(x3 ^ x0, 16)
- vadd.i32 q0, q0, q1
- veor q3, q3, q0
- vrev32.16 q3, q3
-
- // x2 += x3, x1 = rotl32(x1 ^ x2, 12)
- vadd.i32 q2, q2, q3
- veor q4, q1, q2
- vshl.u32 q1, q4, #12
- vsri.u32 q1, q4, #20
-
- // x0 += x1, x3 = rotl32(x3 ^ x0, 8)
- vadd.i32 q0, q0, q1
- veor q4, q3, q0
- vshl.u32 q3, q4, #8
- vsri.u32 q3, q4, #24
-
- // x2 += x3, x1 = rotl32(x1 ^ x2, 7)
- vadd.i32 q2, q2, q3
- veor q4, q1, q2
- vshl.u32 q1, q4, #7
- vsri.u32 q1, q4, #25
-
- // x1 = shuffle32(x1, MASK(0, 3, 2, 1))
- vext.8 q1, q1, q1, #4
- // x2 = shuffle32(x2, MASK(1, 0, 3, 2))
- vext.8 q2, q2, q2, #8
- // x3 = shuffle32(x3, MASK(2, 1, 0, 3))
- vext.8 q3, q3, q3, #12
-
- // x0 += x1, x3 = rotl32(x3 ^ x0, 16)
- vadd.i32 q0, q0, q1
- veor q3, q3, q0
- vrev32.16 q3, q3
-
- // x2 += x3, x1 = rotl32(x1 ^ x2, 12)
- vadd.i32 q2, q2, q3
- veor q4, q1, q2
- vshl.u32 q1, q4, #12
- vsri.u32 q1, q4, #20
-
- // x0 += x1, x3 = rotl32(x3 ^ x0, 8)
- vadd.i32 q0, q0, q1
- veor q4, q3, q0
- vshl.u32 q3, q4, #8
- vsri.u32 q3, q4, #24
-
- // x2 += x3, x1 = rotl32(x1 ^ x2, 7)
- vadd.i32 q2, q2, q3
- veor q4, q1, q2
- vshl.u32 q1, q4, #7
- vsri.u32 q1, q4, #25
-
- // x1 = shuffle32(x1, MASK(2, 1, 0, 3))
- vext.8 q1, q1, q1, #12
- // x2 = shuffle32(x2, MASK(1, 0, 3, 2))
- vext.8 q2, q2, q2, #8
- // x3 = shuffle32(x3, MASK(0, 3, 2, 1))
- vext.8 q3, q3, q3, #4
-
- subs r3, r3, #1
- bne .Ldoubleround
-
- add ip, r2, #0x20
- vld1.8 {q4-q5}, [r2]
- vld1.8 {q6-q7}, [ip]
-
- // o0 = i0 ^ (x0 + s0)
- vadd.i32 q0, q0, q8
- veor q0, q0, q4
-
- // o1 = i1 ^ (x1 + s1)
- vadd.i32 q1, q1, q9
- veor q1, q1, q5
-
- // o2 = i2 ^ (x2 + s2)
- vadd.i32 q2, q2, q10
- veor q2, q2, q6
-
- // o3 = i3 ^ (x3 + s3)
- vadd.i32 q3, q3, q11
- veor q3, q3, q7
-
- add ip, r1, #0x20
- vst1.8 {q0-q1}, [r1]
- vst1.8 {q2-q3}, [ip]
-
- bx lr
-ENDPROC(chacha20_block_xor_neon)
-
- .align 5
-ENTRY(chacha20_4block_xor_neon)
- push {r4-r6, lr}
- mov ip, sp // preserve the stack pointer
- sub r3, sp, #0x20 // allocate a 32 byte buffer
- bic r3, r3, #0x1f // aligned to 32 bytes
- mov sp, r3
-
- // r0: Input state matrix, s
- // r1: 4 data blocks output, o
- // r2: 4 data blocks input, i
-
- //
- // This function encrypts four consecutive ChaCha20 blocks by loading
- // the state matrix in NEON registers four times. The algorithm performs
- // each operation on the corresponding word of each state matrix, hence
- // requires no word shuffling. For final XORing step we transpose the
- // matrix by interleaving 32- and then 64-bit words, which allows us to
- // do XOR in NEON registers.
- //
-
- // x0..15[0-3] = s0..3[0..3]
- add r3, r0, #0x20
- vld1.32 {q0-q1}, [r0]
- vld1.32 {q2-q3}, [r3]
-
- adr r3, CTRINC
- vdup.32 q15, d7[1]
- vdup.32 q14, d7[0]
- vld1.32 {q11}, [r3, :128]
- vdup.32 q13, d6[1]
- vdup.32 q12, d6[0]
- vadd.i32 q12, q12, q11 // x12 += counter values 0-3
- vdup.32 q11, d5[1]
- vdup.32 q10, d5[0]
- vdup.32 q9, d4[1]
- vdup.32 q8, d4[0]
- vdup.32 q7, d3[1]
- vdup.32 q6, d3[0]
- vdup.32 q5, d2[1]
- vdup.32 q4, d2[0]
- vdup.32 q3, d1[1]
- vdup.32 q2, d1[0]
- vdup.32 q1, d0[1]
- vdup.32 q0, d0[0]
-
- mov r3, #10
-
-.Ldoubleround4:
- // x0 += x4, x12 = rotl32(x12 ^ x0, 16)
- // x1 += x5, x13 = rotl32(x13 ^ x1, 16)
- // x2 += x6, x14 = rotl32(x14 ^ x2, 16)
- // x3 += x7, x15 = rotl32(x15 ^ x3, 16)
- vadd.i32 q0, q0, q4
- vadd.i32 q1, q1, q5
- vadd.i32 q2, q2, q6
- vadd.i32 q3, q3, q7
-
- veor q12, q12, q0
- veor q13, q13, q1
- veor q14, q14, q2
- veor q15, q15, q3
-
- vrev32.16 q12, q12
- vrev32.16 q13, q13
- vrev32.16 q14, q14
- vrev32.16 q15, q15
-
- // x8 += x12, x4 = rotl32(x4 ^ x8, 12)
- // x9 += x13, x5 = rotl32(x5 ^ x9, 12)
- // x10 += x14, x6 = rotl32(x6 ^ x10, 12)
- // x11 += x15, x7 = rotl32(x7 ^ x11, 12)
- vadd.i32 q8, q8, q12
- vadd.i32 q9, q9, q13
- vadd.i32 q10, q10, q14
- vadd.i32 q11, q11, q15
-
- vst1.32 {q8-q9}, [sp, :256]
-
- veor q8, q4, q8
- veor q9, q5, q9
- vshl.u32 q4, q8, #12
- vshl.u32 q5, q9, #12
- vsri.u32 q4, q8, #20
- vsri.u32 q5, q9, #20
-
- veor q8, q6, q10
- veor q9, q7, q11
- vshl.u32 q6, q8, #12
- vshl.u32 q7, q9, #12
- vsri.u32 q6, q8, #20
- vsri.u32 q7, q9, #20
-
- // x0 += x4, x12 = rotl32(x12 ^ x0, 8)
- // x1 += x5, x13 = rotl32(x13 ^ x1, 8)
- // x2 += x6, x14 = rotl32(x14 ^ x2, 8)
- // x3 += x7, x15 = rotl32(x15 ^ x3, 8)
- vadd.i32 q0, q0, q4
- vadd.i32 q1, q1, q5
- vadd.i32 q2, q2, q6
- vadd.i32 q3, q3, q7
-
- veor q8, q12, q0
- veor q9, q13, q1
- vshl.u32 q12, q8, #8
- vshl.u32 q13, q9, #8
- vsri.u32 q12, q8, #24
- vsri.u32 q13, q9, #24
-
- veor q8, q14, q2
- veor q9, q15, q3
- vshl.u32 q14, q8, #8
- vshl.u32 q15, q9, #8
- vsri.u32 q14, q8, #24
- vsri.u32 q15, q9, #24
-
- vld1.32 {q8-q9}, [sp, :256]
-
- // x8 += x12, x4 = rotl32(x4 ^ x8, 7)
- // x9 += x13, x5 = rotl32(x5 ^ x9, 7)
- // x10 += x14, x6 = rotl32(x6 ^ x10, 7)
- // x11 += x15, x7 = rotl32(x7 ^ x11, 7)
- vadd.i32 q8, q8, q12
- vadd.i32 q9, q9, q13
- vadd.i32 q10, q10, q14
- vadd.i32 q11, q11, q15
-
- vst1.32 {q8-q9}, [sp, :256]
-
- veor q8, q4, q8
- veor q9, q5, q9
- vshl.u32 q4, q8, #7
- vshl.u32 q5, q9, #7
- vsri.u32 q4, q8, #25
- vsri.u32 q5, q9, #25
-
- veor q8, q6, q10
- veor q9, q7, q11
- vshl.u32 q6, q8, #7
- vshl.u32 q7, q9, #7
- vsri.u32 q6, q8, #25
- vsri.u32 q7, q9, #25
-
- vld1.32 {q8-q9}, [sp, :256]
-
- // x0 += x5, x15 = rotl32(x15 ^ x0, 16)
- // x1 += x6, x12 = rotl32(x12 ^ x1, 16)
- // x2 += x7, x13 = rotl32(x13 ^ x2, 16)
- // x3 += x4, x14 = rotl32(x14 ^ x3, 16)
- vadd.i32 q0, q0, q5
- vadd.i32 q1, q1, q6
- vadd.i32 q2, q2, q7
- vadd.i32 q3, q3, q4
-
- veor q15, q15, q0
- veor q12, q12, q1
- veor q13, q13, q2
- veor q14, q14, q3
-
- vrev32.16 q15, q15
- vrev32.16 q12, q12
- vrev32.16 q13, q13
- vrev32.16 q14, q14
-
- // x10 += x15, x5 = rotl32(x5 ^ x10, 12)
- // x11 += x12, x6 = rotl32(x6 ^ x11, 12)
- // x8 += x13, x7 = rotl32(x7 ^ x8, 12)
- // x9 += x14, x4 = rotl32(x4 ^ x9, 12)
- vadd.i32 q10, q10, q15
- vadd.i32 q11, q11, q12
- vadd.i32 q8, q8, q13
- vadd.i32 q9, q9, q14
-
- vst1.32 {q8-q9}, [sp, :256]
-
- veor q8, q7, q8
- veor q9, q4, q9
- vshl.u32 q7, q8, #12
- vshl.u32 q4, q9, #12
- vsri.u32 q7, q8, #20
- vsri.u32 q4, q9, #20
-
- veor q8, q5, q10
- veor q9, q6, q11
- vshl.u32 q5, q8, #12
- vshl.u32 q6, q9, #12
- vsri.u32 q5, q8, #20
- vsri.u32 q6, q9, #20
-
- // x0 += x5, x15 = rotl32(x15 ^ x0, 8)
- // x1 += x6, x12 = rotl32(x12 ^ x1, 8)
- // x2 += x7, x13 = rotl32(x13 ^ x2, 8)
- // x3 += x4, x14 = rotl32(x14 ^ x3, 8)
- vadd.i32 q0, q0, q5
- vadd.i32 q1, q1, q6
- vadd.i32 q2, q2, q7
- vadd.i32 q3, q3, q4
-
- veor q8, q15, q0
- veor q9, q12, q1
- vshl.u32 q15, q8, #8
- vshl.u32 q12, q9, #8
- vsri.u32 q15, q8, #24
- vsri.u32 q12, q9, #24
-
- veor q8, q13, q2
- veor q9, q14, q3
- vshl.u32 q13, q8, #8
- vshl.u32 q14, q9, #8
- vsri.u32 q13, q8, #24
- vsri.u32 q14, q9, #24
-
- vld1.32 {q8-q9}, [sp, :256]
-
- // x10 += x15, x5 = rotl32(x5 ^ x10, 7)
- // x11 += x12, x6 = rotl32(x6 ^ x11, 7)
- // x8 += x13, x7 = rotl32(x7 ^ x8, 7)
- // x9 += x14, x4 = rotl32(x4 ^ x9, 7)
- vadd.i32 q10, q10, q15
- vadd.i32 q11, q11, q12
- vadd.i32 q8, q8, q13
- vadd.i32 q9, q9, q14
-
- vst1.32 {q8-q9}, [sp, :256]
-
- veor q8, q7, q8
- veor q9, q4, q9
- vshl.u32 q7, q8, #7
- vshl.u32 q4, q9, #7
- vsri.u32 q7, q8, #25
- vsri.u32 q4, q9, #25
-
- veor q8, q5, q10
- veor q9, q6, q11
- vshl.u32 q5, q8, #7
- vshl.u32 q6, q9, #7
- vsri.u32 q5, q8, #25
- vsri.u32 q6, q9, #25
-
- subs r3, r3, #1
- beq 0f
-
- vld1.32 {q8-q9}, [sp, :256]
- b .Ldoubleround4
-
- // x0[0-3] += s0[0]
- // x1[0-3] += s0[1]
- // x2[0-3] += s0[2]
- // x3[0-3] += s0[3]
-0: ldmia r0!, {r3-r6}
- vdup.32 q8, r3
- vdup.32 q9, r4
- vadd.i32 q0, q0, q8
- vadd.i32 q1, q1, q9
- vdup.32 q8, r5
- vdup.32 q9, r6
- vadd.i32 q2, q2, q8
- vadd.i32 q3, q3, q9
-
- // x4[0-3] += s1[0]
- // x5[0-3] += s1[1]
- // x6[0-3] += s1[2]
- // x7[0-3] += s1[3]
- ldmia r0!, {r3-r6}
- vdup.32 q8, r3
- vdup.32 q9, r4
- vadd.i32 q4, q4, q8
- vadd.i32 q5, q5, q9
- vdup.32 q8, r5
- vdup.32 q9, r6
- vadd.i32 q6, q6, q8
- vadd.i32 q7, q7, q9
-
- // interleave 32-bit words in state n, n+1
- vzip.32 q0, q1
- vzip.32 q2, q3
- vzip.32 q4, q5
- vzip.32 q6, q7
-
- // interleave 64-bit words in state n, n+2
- vswp d1, d4
- vswp d3, d6
- vswp d9, d12
- vswp d11, d14
-
- // xor with corresponding input, write to output
- vld1.8 {q8-q9}, [r2]!
- veor q8, q8, q0
- veor q9, q9, q4
- vst1.8 {q8-q9}, [r1]!
-
- vld1.32 {q8-q9}, [sp, :256]
-
- // x8[0-3] += s2[0]
- // x9[0-3] += s2[1]
- // x10[0-3] += s2[2]
- // x11[0-3] += s2[3]
- ldmia r0!, {r3-r6}
- vdup.32 q0, r3
- vdup.32 q4, r4
- vadd.i32 q8, q8, q0
- vadd.i32 q9, q9, q4
- vdup.32 q0, r5
- vdup.32 q4, r6
- vadd.i32 q10, q10, q0
- vadd.i32 q11, q11, q4
-
- // x12[0-3] += s3[0]
- // x13[0-3] += s3[1]
- // x14[0-3] += s3[2]
- // x15[0-3] += s3[3]
- ldmia r0!, {r3-r6}
- vdup.32 q0, r3
- vdup.32 q4, r4
- adr r3, CTRINC
- vadd.i32 q12, q12, q0
- vld1.32 {q0}, [r3, :128]
- vadd.i32 q13, q13, q4
- vadd.i32 q12, q12, q0 // x12 += counter values 0-3
-
- vdup.32 q0, r5
- vdup.32 q4, r6
- vadd.i32 q14, q14, q0
- vadd.i32 q15, q15, q4
-
- // interleave 32-bit words in state n, n+1
- vzip.32 q8, q9
- vzip.32 q10, q11
- vzip.32 q12, q13
- vzip.32 q14, q15
-
- // interleave 64-bit words in state n, n+2
- vswp d17, d20
- vswp d19, d22
- vswp d25, d28
- vswp d27, d30
-
- vmov q4, q1
-
- vld1.8 {q0-q1}, [r2]!
- veor q0, q0, q8
- veor q1, q1, q12
- vst1.8 {q0-q1}, [r1]!
-
- vld1.8 {q0-q1}, [r2]!
- veor q0, q0, q2
- veor q1, q1, q6
- vst1.8 {q0-q1}, [r1]!
-
- vld1.8 {q0-q1}, [r2]!
- veor q0, q0, q10
- veor q1, q1, q14
- vst1.8 {q0-q1}, [r1]!
-
- vld1.8 {q0-q1}, [r2]!
- veor q0, q0, q4
- veor q1, q1, q5
- vst1.8 {q0-q1}, [r1]!
-
- vld1.8 {q0-q1}, [r2]!
- veor q0, q0, q9
- veor q1, q1, q13
- vst1.8 {q0-q1}, [r1]!
-
- vld1.8 {q0-q1}, [r2]!
- veor q0, q0, q3
- veor q1, q1, q7
- vst1.8 {q0-q1}, [r1]!
-
- vld1.8 {q0-q1}, [r2]
- veor q0, q0, q11
- veor q1, q1, q15
- vst1.8 {q0-q1}, [r1]
-
- mov sp, ip
- pop {r4-r6, pc}
-ENDPROC(chacha20_4block_xor_neon)
-
- .align 4
-CTRINC: .word 0, 1, 2, 3
diff --git a/arch/arm/crypto/chacha20-neon-glue.c b/arch/arm/crypto/chacha20-neon-glue.c
deleted file mode 100644
index 59a7be08e80c..000000000000
--- a/arch/arm/crypto/chacha20-neon-glue.c
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- * ChaCha20 256-bit cipher algorithm, RFC7539, ARM NEON functions
- *
- * Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Based on:
- * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code
- *
- * Copyright (C) 2015 Martin Willi
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#include <crypto/algapi.h>
-#include <crypto/chacha20.h>
-#include <crypto/internal/skcipher.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-
-#include <asm/hwcap.h>
-#include <asm/neon.h>
-#include <asm/simd.h>
-
-asmlinkage void chacha20_block_xor_neon(u32 *state, u8 *dst, const u8 *src);
-asmlinkage void chacha20_4block_xor_neon(u32 *state, u8 *dst, const u8 *src);
-
-static void chacha20_doneon(u32 *state, u8 *dst, const u8 *src,
- unsigned int bytes)
-{
- u8 buf[CHACHA20_BLOCK_SIZE];
-
- while (bytes >= CHACHA20_BLOCK_SIZE * 4) {
- chacha20_4block_xor_neon(state, dst, src);
- bytes -= CHACHA20_BLOCK_SIZE * 4;
- src += CHACHA20_BLOCK_SIZE * 4;
- dst += CHACHA20_BLOCK_SIZE * 4;
- state[12] += 4;
- }
- while (bytes >= CHACHA20_BLOCK_SIZE) {
- chacha20_block_xor_neon(state, dst, src);
- bytes -= CHACHA20_BLOCK_SIZE;
- src += CHACHA20_BLOCK_SIZE;
- dst += CHACHA20_BLOCK_SIZE;
- state[12]++;
- }
- if (bytes) {
- memcpy(buf, src, bytes);
- chacha20_block_xor_neon(state, buf, buf);
- memcpy(dst, buf, bytes);
- }
-}
-
-static int chacha20_neon(struct skcipher_request *req)
-{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct skcipher_walk walk;
- u32 state[16];
- int err;
-
- if (req->cryptlen <= CHACHA20_BLOCK_SIZE || !may_use_simd())
- return crypto_chacha20_crypt(req);
-
- err = skcipher_walk_virt(&walk, req, true);
-
- crypto_chacha20_init(state, ctx, walk.iv);
-
- kernel_neon_begin();
- while (walk.nbytes > 0) {
- unsigned int nbytes = walk.nbytes;
-
- if (nbytes < walk.total)
- nbytes = round_down(nbytes, walk.stride);
-
- chacha20_doneon(state, walk.dst.virt.addr, walk.src.virt.addr,
- nbytes);
- err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
- }
- kernel_neon_end();
-
- return err;
-}
-
-static struct skcipher_alg alg = {
- .base.cra_name = "chacha20",
- .base.cra_driver_name = "chacha20-neon",
- .base.cra_priority = 300,
- .base.cra_blocksize = 1,
- .base.cra_ctxsize = sizeof(struct chacha20_ctx),
- .base.cra_module = THIS_MODULE,
-
- .min_keysize = CHACHA20_KEY_SIZE,
- .max_keysize = CHACHA20_KEY_SIZE,
- .ivsize = CHACHA20_IV_SIZE,
- .chunksize = CHACHA20_BLOCK_SIZE,
- .walksize = 4 * CHACHA20_BLOCK_SIZE,
- .setkey = crypto_chacha20_setkey,
- .encrypt = chacha20_neon,
- .decrypt = chacha20_neon,
-};
-
-static int __init chacha20_simd_mod_init(void)
-{
- if (!(elf_hwcap & HWCAP_NEON))
- return -ENODEV;
-
- return crypto_register_skcipher(&alg);
-}
-
-static void __exit chacha20_simd_mod_fini(void)
-{
- crypto_unregister_skcipher(&alg);
-}
-
-module_init(chacha20_simd_mod_init);
-module_exit(chacha20_simd_mod_fini);
-
-MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
-MODULE_LICENSE("GPL v2");
-MODULE_ALIAS_CRYPTO("chacha20");
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index db8d364f8476..6cc3c8a0ad88 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -709,5 +709,4 @@ CONFIG_CRYPTO_CRCT10DIF_ARM64_CE=m
CONFIG_CRYPTO_CRC32_ARM64_CE=m
CONFIG_CRYPTO_AES_ARM64_CE_CCM=y
CONFIG_CRYPTO_AES_ARM64_CE_BLK=y
-CONFIG_CRYPTO_CHACHA20_NEON=m
CONFIG_CRYPTO_AES_ARM64_BS=m
diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
index e3fdb0fd6f70..9db6d775a880 100644
--- a/arch/arm64/crypto/Kconfig
+++ b/arch/arm64/crypto/Kconfig
@@ -105,12 +105,6 @@ config CRYPTO_AES_ARM64_NEON_BLK
select CRYPTO_AES
select CRYPTO_SIMD
-config CRYPTO_CHACHA20_NEON
- tristate "NEON accelerated ChaCha20 symmetric cipher"
- depends on KERNEL_MODE_NEON
- select CRYPTO_BLKCIPHER
- select CRYPTO_CHACHA20
-
config CRYPTO_AES_ARM64_BS
tristate "AES in ECB/CBC/CTR/XTS modes using bit-sliced NEON algorithm"
depends on KERNEL_MODE_NEON
diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile
index bcafd016618e..507c4bfb86e3 100644
--- a/arch/arm64/crypto/Makefile
+++ b/arch/arm64/crypto/Makefile
@@ -53,9 +53,6 @@ sha256-arm64-y := sha256-glue.o sha256-core.o
obj-$(CONFIG_CRYPTO_SHA512_ARM64) += sha512-arm64.o
sha512-arm64-y := sha512-glue.o sha512-core.o
-obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha20-neon.o
-chacha20-neon-y := chacha20-neon-core.o chacha20-neon-glue.o
-
obj-$(CONFIG_CRYPTO_SPECK_NEON) += speck-neon.o
speck-neon-y := speck-neon-core.o speck-neon-glue.o
diff --git a/arch/arm64/crypto/chacha20-neon-core.S b/arch/arm64/crypto/chacha20-neon-core.S
deleted file mode 100644
index 13c85e272c2a..000000000000
--- a/arch/arm64/crypto/chacha20-neon-core.S
+++ /dev/null
@@ -1,450 +0,0 @@
-/*
- * ChaCha20 256-bit cipher algorithm, RFC7539, arm64 NEON functions
- *
- * Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Based on:
- * ChaCha20 256-bit cipher algorithm, RFC7539, x64 SSSE3 functions
- *
- * Copyright (C) 2015 Martin Willi
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#include <linux/linkage.h>
-
- .text
- .align 6
-
-ENTRY(chacha20_block_xor_neon)
- // x0: Input state matrix, s
- // x1: 1 data block output, o
- // x2: 1 data block input, i
-
- //
- // This function encrypts one ChaCha20 block by loading the state matrix
- // in four NEON registers. It performs matrix operation on four words in
- // parallel, but requires shuffling to rearrange the words after each
- // round.
- //
-
- // x0..3 = s0..3
- adr x3, ROT8
- ld1 {v0.4s-v3.4s}, [x0]
- ld1 {v8.4s-v11.4s}, [x0]
- ld1 {v12.4s}, [x3]
-
- mov x3, #10
-
-.Ldoubleround:
- // x0 += x1, x3 = rotl32(x3 ^ x0, 16)
- add v0.4s, v0.4s, v1.4s
- eor v3.16b, v3.16b, v0.16b
- rev32 v3.8h, v3.8h
-
- // x2 += x3, x1 = rotl32(x1 ^ x2, 12)
- add v2.4s, v2.4s, v3.4s
- eor v4.16b, v1.16b, v2.16b
- shl v1.4s, v4.4s, #12
- sri v1.4s, v4.4s, #20
-
- // x0 += x1, x3 = rotl32(x3 ^ x0, 8)
- add v0.4s, v0.4s, v1.4s
- eor v3.16b, v3.16b, v0.16b
- tbl v3.16b, {v3.16b}, v12.16b
-
- // x2 += x3, x1 = rotl32(x1 ^ x2, 7)
- add v2.4s, v2.4s, v3.4s
- eor v4.16b, v1.16b, v2.16b
- shl v1.4s, v4.4s, #7
- sri v1.4s, v4.4s, #25
-
- // x1 = shuffle32(x1, MASK(0, 3, 2, 1))
- ext v1.16b, v1.16b, v1.16b, #4
- // x2 = shuffle32(x2, MASK(1, 0, 3, 2))
- ext v2.16b, v2.16b, v2.16b, #8
- // x3 = shuffle32(x3, MASK(2, 1, 0, 3))
- ext v3.16b, v3.16b, v3.16b, #12
-
- // x0 += x1, x3 = rotl32(x3 ^ x0, 16)
- add v0.4s, v0.4s, v1.4s
- eor v3.16b, v3.16b, v0.16b
- rev32 v3.8h, v3.8h
-
- // x2 += x3, x1 = rotl32(x1 ^ x2, 12)
- add v2.4s, v2.4s, v3.4s
- eor v4.16b, v1.16b, v2.16b
- shl v1.4s, v4.4s, #12
- sri v1.4s, v4.4s, #20
-
- // x0 += x1, x3 = rotl32(x3 ^ x0, 8)
- add v0.4s, v0.4s, v1.4s
- eor v3.16b, v3.16b, v0.16b
- tbl v3.16b, {v3.16b}, v12.16b
-
- // x2 += x3, x1 = rotl32(x1 ^ x2, 7)
- add v2.4s, v2.4s, v3.4s
- eor v4.16b, v1.16b, v2.16b
- shl v1.4s, v4.4s, #7
- sri v1.4s, v4.4s, #25
-
- // x1 = shuffle32(x1, MASK(2, 1, 0, 3))
- ext v1.16b, v1.16b, v1.16b, #12
- // x2 = shuffle32(x2, MASK(1, 0, 3, 2))
- ext v2.16b, v2.16b, v2.16b, #8
- // x3 = shuffle32(x3, MASK(0, 3, 2, 1))
- ext v3.16b, v3.16b, v3.16b, #4
-
- subs x3, x3, #1
- b.ne .Ldoubleround
-
- ld1 {v4.16b-v7.16b}, [x2]
-
- // o0 = i0 ^ (x0 + s0)
- add v0.4s, v0.4s, v8.4s
- eor v0.16b, v0.16b, v4.16b
-
- // o1 = i1 ^ (x1 + s1)
- add v1.4s, v1.4s, v9.4s
- eor v1.16b, v1.16b, v5.16b
-
- // o2 = i2 ^ (x2 + s2)
- add v2.4s, v2.4s, v10.4s
- eor v2.16b, v2.16b, v6.16b
-
- // o3 = i3 ^ (x3 + s3)
- add v3.4s, v3.4s, v11.4s
- eor v3.16b, v3.16b, v7.16b
-
- st1 {v0.16b-v3.16b}, [x1]
-
- ret
-ENDPROC(chacha20_block_xor_neon)
-
- .align 6
-ENTRY(chacha20_4block_xor_neon)
- // x0: Input state matrix, s
- // x1: 4 data blocks output, o
- // x2: 4 data blocks input, i
-
- //
- // This function encrypts four consecutive ChaCha20 blocks by loading
- // the state matrix in NEON registers four times. The algorithm performs
- // each operation on the corresponding word of each state matrix, hence
- // requires no word shuffling. For final XORing step we transpose the
- // matrix by interleaving 32- and then 64-bit words, which allows us to
- // do XOR in NEON registers.
- //
- adr x3, CTRINC // ... and ROT8
- ld1 {v30.4s-v31.4s}, [x3]
-
- // x0..15[0-3] = s0..3[0..3]
- mov x4, x0
- ld4r { v0.4s- v3.4s}, [x4], #16
- ld4r { v4.4s- v7.4s}, [x4], #16
- ld4r { v8.4s-v11.4s}, [x4], #16
- ld4r {v12.4s-v15.4s}, [x4]
-
- // x12 += counter values 0-3
- add v12.4s, v12.4s, v30.4s
-
- mov x3, #10
-
-.Ldoubleround4:
- // x0 += x4, x12 = rotl32(x12 ^ x0, 16)
- // x1 += x5, x13 = rotl32(x13 ^ x1, 16)
- // x2 += x6, x14 = rotl32(x14 ^ x2, 16)
- // x3 += x7, x15 = rotl32(x15 ^ x3, 16)
- add v0.4s, v0.4s, v4.4s
- add v1.4s, v1.4s, v5.4s
- add v2.4s, v2.4s, v6.4s
- add v3.4s, v3.4s, v7.4s
-
- eor v12.16b, v12.16b, v0.16b
- eor v13.16b, v13.16b, v1.16b
- eor v14.16b, v14.16b, v2.16b
- eor v15.16b, v15.16b, v3.16b
-
- rev32 v12.8h, v12.8h
- rev32 v13.8h, v13.8h
- rev32 v14.8h, v14.8h
- rev32 v15.8h, v15.8h
-
- // x8 += x12, x4 = rotl32(x4 ^ x8, 12)
- // x9 += x13, x5 = rotl32(x5 ^ x9, 12)
- // x10 += x14, x6 = rotl32(x6 ^ x10, 12)
- // x11 += x15, x7 = rotl32(x7 ^ x11, 12)
- add v8.4s, v8.4s, v12.4s
- add v9.4s, v9.4s, v13.4s
- add v10.4s, v10.4s, v14.4s
- add v11.4s, v11.4s, v15.4s
-
- eor v16.16b, v4.16b, v8.16b
- eor v17.16b, v5.16b, v9.16b
- eor v18.16b, v6.16b, v10.16b
- eor v19.16b, v7.16b, v11.16b
-
- shl v4.4s, v16.4s, #12
- shl v5.4s, v17.4s, #12
- shl v6.4s, v18.4s, #12
- shl v7.4s, v19.4s, #12
-
- sri v4.4s, v16.4s, #20
- sri v5.4s, v17.4s, #20
- sri v6.4s, v18.4s, #20
- sri v7.4s, v19.4s, #20
-
- // x0 += x4, x12 = rotl32(x12 ^ x0, 8)
- // x1 += x5, x13 = rotl32(x13 ^ x1, 8)
- // x2 += x6, x14 = rotl32(x14 ^ x2, 8)
- // x3 += x7, x15 = rotl32(x15 ^ x3, 8)
- add v0.4s, v0.4s, v4.4s
- add v1.4s, v1.4s, v5.4s
- add v2.4s, v2.4s, v6.4s
- add v3.4s, v3.4s, v7.4s
-
- eor v12.16b, v12.16b, v0.16b
- eor v13.16b, v13.16b, v1.16b
- eor v14.16b, v14.16b, v2.16b
- eor v15.16b, v15.16b, v3.16b
-
- tbl v12.16b, {v12.16b}, v31.16b
- tbl v13.16b, {v13.16b}, v31.16b
- tbl v14.16b, {v14.16b}, v31.16b
- tbl v15.16b, {v15.16b}, v31.16b
-
- // x8 += x12, x4 = rotl32(x4 ^ x8, 7)
- // x9 += x13, x5 = rotl32(x5 ^ x9, 7)
- // x10 += x14, x6 = rotl32(x6 ^ x10, 7)
- // x11 += x15, x7 = rotl32(x7 ^ x11, 7)
- add v8.4s, v8.4s, v12.4s
- add v9.4s, v9.4s, v13.4s
- add v10.4s, v10.4s, v14.4s
- add v11.4s, v11.4s, v15.4s
-
- eor v16.16b, v4.16b, v8.16b
- eor v17.16b, v5.16b, v9.16b
- eor v18.16b, v6.16b, v10.16b
- eor v19.16b, v7.16b, v11.16b
-
- shl v4.4s, v16.4s, #7
- shl v5.4s, v17.4s, #7
- shl v6.4s, v18.4s, #7
- shl v7.4s, v19.4s, #7
-
- sri v4.4s, v16.4s, #25
- sri v5.4s, v17.4s, #25
- sri v6.4s, v18.4s, #25
- sri v7.4s, v19.4s, #25
-
- // x0 += x5, x15 = rotl32(x15 ^ x0, 16)
- // x1 += x6, x12 = rotl32(x12 ^ x1, 16)
- // x2 += x7, x13 = rotl32(x13 ^ x2, 16)
- // x3 += x4, x14 = rotl32(x14 ^ x3, 16)
- add v0.4s, v0.4s, v5.4s
- add v1.4s, v1.4s, v6.4s
- add v2.4s, v2.4s, v7.4s
- add v3.4s, v3.4s, v4.4s
-
- eor v15.16b, v15.16b, v0.16b
- eor v12.16b, v12.16b, v1.16b
- eor v13.16b, v13.16b, v2.16b
- eor v14.16b, v14.16b, v3.16b
-
- rev32 v15.8h, v15.8h
- rev32 v12.8h, v12.8h
- rev32 v13.8h, v13.8h
- rev32 v14.8h, v14.8h
-
- // x10 += x15, x5 = rotl32(x5 ^ x10, 12)
- // x11 += x12, x6 = rotl32(x6 ^ x11, 12)
- // x8 += x13, x7 = rotl32(x7 ^ x8, 12)
- // x9 += x14, x4 = rotl32(x4 ^ x9, 12)
- add v10.4s, v10.4s, v15.4s
- add v11.4s, v11.4s, v12.4s
- add v8.4s, v8.4s, v13.4s
- add v9.4s, v9.4s, v14.4s
-
- eor v16.16b, v5.16b, v10.16b
- eor v17.16b, v6.16b, v11.16b
- eor v18.16b, v7.16b, v8.16b
- eor v19.16b, v4.16b, v9.16b
-
- shl v5.4s, v16.4s, #12
- shl v6.4s, v17.4s, #12
- shl v7.4s, v18.4s, #12
- shl v4.4s, v19.4s, #12
-
- sri v5.4s, v16.4s, #20
- sri v6.4s, v17.4s, #20
- sri v7.4s, v18.4s, #20
- sri v4.4s, v19.4s, #20
-
- // x0 += x5, x15 = rotl32(x15 ^ x0, 8)
- // x1 += x6, x12 = rotl32(x12 ^ x1, 8)
- // x2 += x7, x13 = rotl32(x13 ^ x2, 8)
- // x3 += x4, x14 = rotl32(x14 ^ x3, 8)
- add v0.4s, v0.4s, v5.4s
- add v1.4s, v1.4s, v6.4s
- add v2.4s, v2.4s, v7.4s
- add v3.4s, v3.4s, v4.4s
-
- eor v15.16b, v15.16b, v0.16b
- eor v12.16b, v12.16b, v1.16b
- eor v13.16b, v13.16b, v2.16b
- eor v14.16b, v14.16b, v3.16b
-
- tbl v15.16b, {v15.16b}, v31.16b
- tbl v12.16b, {v12.16b}, v31.16b
- tbl v13.16b, {v13.16b}, v31.16b
- tbl v14.16b, {v14.16b}, v31.16b
-
- // x10 += x15, x5 = rotl32(x5 ^ x10, 7)
- // x11 += x12, x6 = rotl32(x6 ^ x11, 7)
- // x8 += x13, x7 = rotl32(x7 ^ x8, 7)
- // x9 += x14, x4 = rotl32(x4 ^ x9, 7)
- add v10.4s, v10.4s, v15.4s
- add v11.4s, v11.4s, v12.4s
- add v8.4s, v8.4s, v13.4s
- add v9.4s, v9.4s, v14.4s
-
- eor v16.16b, v5.16b, v10.16b
- eor v17.16b, v6.16b, v11.16b
- eor v18.16b, v7.16b, v8.16b
- eor v19.16b, v4.16b, v9.16b
-
- shl v5.4s, v16.4s, #7
- shl v6.4s, v17.4s, #7
- shl v7.4s, v18.4s, #7
- shl v4.4s, v19.4s, #7
-
- sri v5.4s, v16.4s, #25
- sri v6.4s, v17.4s, #25
- sri v7.4s, v18.4s, #25
- sri v4.4s, v19.4s, #25
-
- subs x3, x3, #1
- b.ne .Ldoubleround4
-
- ld4r {v16.4s-v19.4s}, [x0], #16
- ld4r {v20.4s-v23.4s}, [x0], #16
-
- // x12 += counter values 0-3
- add v12.4s, v12.4s, v30.4s
-
- // x0[0-3] += s0[0]
- // x1[0-3] += s0[1]
- // x2[0-3] += s0[2]
- // x3[0-3] += s0[3]
- add v0.4s, v0.4s, v16.4s
- add v1.4s, v1.4s, v17.4s
- add v2.4s, v2.4s, v18.4s
- add v3.4s, v3.4s, v19.4s
-
- ld4r {v24.4s-v27.4s}, [x0], #16
- ld4r {v28.4s-v31.4s}, [x0]
-
- // x4[0-3] += s1[0]
- // x5[0-3] += s1[1]
- // x6[0-3] += s1[2]
- // x7[0-3] += s1[3]
- add v4.4s, v4.4s, v20.4s
- add v5.4s, v5.4s, v21.4s
- add v6.4s, v6.4s, v22.4s
- add v7.4s, v7.4s, v23.4s
-
- // x8[0-3] += s2[0]
- // x9[0-3] += s2[1]
- // x10[0-3] += s2[2]
- // x11[0-3] += s2[3]
- add v8.4s, v8.4s, v24.4s
- add v9.4s, v9.4s, v25.4s
- add v10.4s, v10.4s, v26.4s
- add v11.4s, v11.4s, v27.4s
-
- // x12[0-3] += s3[0]
- // x13[0-3] += s3[1]
- // x14[0-3] += s3[2]
- // x15[0-3] += s3[3]
- add v12.4s, v12.4s, v28.4s
- add v13.4s, v13.4s, v29.4s
- add v14.4s, v14.4s, v30.4s
- add v15.4s, v15.4s, v31.4s
-
- // interleave 32-bit words in state n, n+1
- zip1 v16.4s, v0.4s, v1.4s
- zip2 v17.4s, v0.4s, v1.4s
- zip1 v18.4s, v2.4s, v3.4s
- zip2 v19.4s, v2.4s, v3.4s
- zip1 v20.4s, v4.4s, v5.4s
- zip2 v21.4s, v4.4s, v5.4s
- zip1 v22.4s, v6.4s, v7.4s
- zip2 v23.4s, v6.4s, v7.4s
- zip1 v24.4s, v8.4s, v9.4s
- zip2 v25.4s, v8.4s, v9.4s
- zip1 v26.4s, v10.4s, v11.4s
- zip2 v27.4s, v10.4s, v11.4s
- zip1 v28.4s, v12.4s, v13.4s
- zip2 v29.4s, v12.4s, v13.4s
- zip1 v30.4s, v14.4s, v15.4s
- zip2 v31.4s, v14.4s, v15.4s
-
- // interleave 64-bit words in state n, n+2
- zip1 v0.2d, v16.2d, v18.2d
- zip2 v4.2d, v16.2d, v18.2d
- zip1 v8.2d, v17.2d, v19.2d
- zip2 v12.2d, v17.2d, v19.2d
- ld1 {v16.16b-v19.16b}, [x2], #64
-
- zip1 v1.2d, v20.2d, v22.2d
- zip2 v5.2d, v20.2d, v22.2d
- zip1 v9.2d, v21.2d, v23.2d
- zip2 v13.2d, v21.2d, v23.2d
- ld1 {v20.16b-v23.16b}, [x2], #64
-
- zip1 v2.2d, v24.2d, v26.2d
- zip2 v6.2d, v24.2d, v26.2d
- zip1 v10.2d, v25.2d, v27.2d
- zip2 v14.2d, v25.2d, v27.2d
- ld1 {v24.16b-v27.16b}, [x2], #64
-
- zip1 v3.2d, v28.2d, v30.2d
- zip2 v7.2d, v28.2d, v30.2d
- zip1 v11.2d, v29.2d, v31.2d
- zip2 v15.2d, v29.2d, v31.2d
- ld1 {v28.16b-v31.16b}, [x2]
-
- // xor with corresponding input, write to output
- eor v16.16b, v16.16b, v0.16b
- eor v17.16b, v17.16b, v1.16b
- eor v18.16b, v18.16b, v2.16b
- eor v19.16b, v19.16b, v3.16b
- eor v20.16b, v20.16b, v4.16b
- eor v21.16b, v21.16b, v5.16b
- st1 {v16.16b-v19.16b}, [x1], #64
- eor v22.16b, v22.16b, v6.16b
- eor v23.16b, v23.16b, v7.16b
- eor v24.16b, v24.16b, v8.16b
- eor v25.16b, v25.16b, v9.16b
- st1 {v20.16b-v23.16b}, [x1], #64
- eor v26.16b, v26.16b, v10.16b
- eor v27.16b, v27.16b, v11.16b
- eor v28.16b, v28.16b, v12.16b
- st1 {v24.16b-v27.16b}, [x1], #64
- eor v29.16b, v29.16b, v13.16b
- eor v30.16b, v30.16b, v14.16b
- eor v31.16b, v31.16b, v15.16b
- st1 {v28.16b-v31.16b}, [x1]
-
- ret
-ENDPROC(chacha20_4block_xor_neon)
-
-CTRINC: .word 0, 1, 2, 3
-ROT8: .word 0x02010003, 0x06050407, 0x0a09080b, 0x0e0d0c0f
diff --git a/arch/arm64/crypto/chacha20-neon-glue.c b/arch/arm64/crypto/chacha20-neon-glue.c
deleted file mode 100644
index 727579c93ded..000000000000
--- a/arch/arm64/crypto/chacha20-neon-glue.c
+++ /dev/null
@@ -1,133 +0,0 @@
-/*
- * ChaCha20 256-bit cipher algorithm, RFC7539, arm64 NEON functions
- *
- * Copyright (C) 2016 - 2017 Linaro, Ltd. <ard.biesheuvel@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Based on:
- * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code
- *
- * Copyright (C) 2015 Martin Willi
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#include <crypto/algapi.h>
-#include <crypto/chacha20.h>
-#include <crypto/internal/skcipher.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-
-#include <asm/hwcap.h>
-#include <asm/neon.h>
-#include <asm/simd.h>
-
-asmlinkage void chacha20_block_xor_neon(u32 *state, u8 *dst, const u8 *src);
-asmlinkage void chacha20_4block_xor_neon(u32 *state, u8 *dst, const u8 *src);
-
-static void chacha20_doneon(u32 *state, u8 *dst, const u8 *src,
- unsigned int bytes)
-{
- u8 buf[CHACHA20_BLOCK_SIZE];
-
- while (bytes >= CHACHA20_BLOCK_SIZE * 4) {
- kernel_neon_begin();
- chacha20_4block_xor_neon(state, dst, src);
- kernel_neon_end();
- bytes -= CHACHA20_BLOCK_SIZE * 4;
- src += CHACHA20_BLOCK_SIZE * 4;
- dst += CHACHA20_BLOCK_SIZE * 4;
- state[12] += 4;
- }
-
- if (!bytes)
- return;
-
- kernel_neon_begin();
- while (bytes >= CHACHA20_BLOCK_SIZE) {
- chacha20_block_xor_neon(state, dst, src);
- bytes -= CHACHA20_BLOCK_SIZE;
- src += CHACHA20_BLOCK_SIZE;
- dst += CHACHA20_BLOCK_SIZE;
- state[12]++;
- }
- if (bytes) {
- memcpy(buf, src, bytes);
- chacha20_block_xor_neon(state, buf, buf);
- memcpy(dst, buf, bytes);
- }
- kernel_neon_end();
-}
-
-static int chacha20_neon(struct skcipher_request *req)
-{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct skcipher_walk walk;
- u32 state[16];
- int err;
-
- if (!may_use_simd() || req->cryptlen <= CHACHA20_BLOCK_SIZE)
- return crypto_chacha20_crypt(req);
-
- err = skcipher_walk_virt(&walk, req, false);
-
- crypto_chacha20_init(state, ctx, walk.iv);
-
- while (walk.nbytes > 0) {
- unsigned int nbytes = walk.nbytes;
-
- if (nbytes < walk.total)
- nbytes = round_down(nbytes, walk.stride);
-
- chacha20_doneon(state, walk.dst.virt.addr, walk.src.virt.addr,
- nbytes);
- err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
- }
-
- return err;
-}
-
-static struct skcipher_alg alg = {
- .base.cra_name = "chacha20",
- .base.cra_driver_name = "chacha20-neon",
- .base.cra_priority = 300,
- .base.cra_blocksize = 1,
- .base.cra_ctxsize = sizeof(struct chacha20_ctx),
- .base.cra_module = THIS_MODULE,
-
- .min_keysize = CHACHA20_KEY_SIZE,
- .max_keysize = CHACHA20_KEY_SIZE,
- .ivsize = CHACHA20_IV_SIZE,
- .chunksize = CHACHA20_BLOCK_SIZE,
- .walksize = 4 * CHACHA20_BLOCK_SIZE,
- .setkey = crypto_chacha20_setkey,
- .encrypt = chacha20_neon,
- .decrypt = chacha20_neon,
-};
-
-static int __init chacha20_simd_mod_init(void)
-{
- if (!(elf_hwcap & HWCAP_ASIMD))
- return -ENODEV;
-
- return crypto_register_skcipher(&alg);
-}
-
-static void __exit chacha20_simd_mod_fini(void)
-{
- crypto_unregister_skcipher(&alg);
-}
-
-module_init(chacha20_simd_mod_init);
-module_exit(chacha20_simd_mod_fini);
-
-MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
-MODULE_LICENSE("GPL v2");
-MODULE_ALIAS_CRYPTO("chacha20");
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index cf830219846b..419212c31246 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -23,7 +23,6 @@ obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o
obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o
obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o
obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o
-obj-$(CONFIG_CRYPTO_CHACHA20_X86_64) += chacha20-x86_64.o
obj-$(CONFIG_CRYPTO_SERPENT_SSE2_X86_64) += serpent-sse2-x86_64.o
obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o
obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o
@@ -76,7 +75,6 @@ camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o
blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o
twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o
twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o
-chacha20-x86_64-y := chacha20-ssse3-x86_64.o chacha20_glue.o
serpent-sse2-x86_64-y := serpent-sse2-x86_64-asm_64.o serpent_sse2_glue.o
aegis128-aesni-y := aegis128-aesni-asm.o aegis128-aesni-glue.o
@@ -99,7 +97,6 @@ endif
ifeq ($(avx2_supported),yes)
camellia-aesni-avx2-y := camellia-aesni-avx2-asm_64.o camellia_aesni_avx2_glue.o
- chacha20-x86_64-y += chacha20-avx2-x86_64.o
serpent-avx2-y := serpent-avx2-asm_64.o serpent_avx2_glue.o
morus1280-avx2-y := morus1280-avx2-asm.o morus1280-avx2-glue.o
diff --git a/arch/x86/crypto/chacha20-avx2-x86_64.S b/arch/x86/crypto/chacha20-avx2-x86_64.S
deleted file mode 100644
index f3cd26f48332..000000000000
--- a/arch/x86/crypto/chacha20-avx2-x86_64.S
+++ /dev/null
@@ -1,448 +0,0 @@
-/*
- * ChaCha20 256-bit cipher algorithm, RFC7539, x64 AVX2 functions
- *
- * Copyright (C) 2015 Martin Willi
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#include <linux/linkage.h>
-
-.section .rodata.cst32.ROT8, "aM", @progbits, 32
-.align 32
-ROT8: .octa 0x0e0d0c0f0a09080b0605040702010003
- .octa 0x0e0d0c0f0a09080b0605040702010003
-
-.section .rodata.cst32.ROT16, "aM", @progbits, 32
-.align 32
-ROT16: .octa 0x0d0c0f0e09080b0a0504070601000302
- .octa 0x0d0c0f0e09080b0a0504070601000302
-
-.section .rodata.cst32.CTRINC, "aM", @progbits, 32
-.align 32
-CTRINC: .octa 0x00000003000000020000000100000000
- .octa 0x00000007000000060000000500000004
-
-.text
-
-ENTRY(chacha20_8block_xor_avx2)
- # %rdi: Input state matrix, s
- # %rsi: 8 data blocks output, o
- # %rdx: 8 data blocks input, i
-
- # This function encrypts eight consecutive ChaCha20 blocks by loading
- # the state matrix in AVX registers eight times. As we need some
- # scratch registers, we save the first four registers on the stack. The
- # algorithm performs each operation on the corresponding word of each
- # state matrix, hence requires no word shuffling. For final XORing step
- # we transpose the matrix by interleaving 32-, 64- and then 128-bit
- # words, which allows us to do XOR in AVX registers. 8/16-bit word
- # rotation is done with the slightly better performing byte shuffling,
- # 7/12-bit word rotation uses traditional shift+OR.
-
- vzeroupper
- # 4 * 32 byte stack, 32-byte aligned
- lea 8(%rsp),%r10
- and $~31, %rsp
- sub $0x80, %rsp
-
- # x0..15[0-7] = s[0..15]
- vpbroadcastd 0x00(%rdi),%ymm0
- vpbroadcastd 0x04(%rdi),%ymm1
- vpbroadcastd 0x08(%rdi),%ymm2
- vpbroadcastd 0x0c(%rdi),%ymm3
- vpbroadcastd 0x10(%rdi),%ymm4
- vpbroadcastd 0x14(%rdi),%ymm5
- vpbroadcastd 0x18(%rdi),%ymm6
- vpbroadcastd 0x1c(%rdi),%ymm7
- vpbroadcastd 0x20(%rdi),%ymm8
- vpbroadcastd 0x24(%rdi),%ymm9
- vpbroadcastd 0x28(%rdi),%ymm10
- vpbroadcastd 0x2c(%rdi),%ymm11
- vpbroadcastd 0x30(%rdi),%ymm12
- vpbroadcastd 0x34(%rdi),%ymm13
- vpbroadcastd 0x38(%rdi),%ymm14
- vpbroadcastd 0x3c(%rdi),%ymm15
- # x0..3 on stack
- vmovdqa %ymm0,0x00(%rsp)
- vmovdqa %ymm1,0x20(%rsp)
- vmovdqa %ymm2,0x40(%rsp)
- vmovdqa %ymm3,0x60(%rsp)
-
- vmovdqa CTRINC(%rip),%ymm1
- vmovdqa ROT8(%rip),%ymm2
- vmovdqa ROT16(%rip),%ymm3
-
- # x12 += counter values 0-3
- vpaddd %ymm1,%ymm12,%ymm12
-
- mov $10,%ecx
-
-.Ldoubleround8:
- # x0 += x4, x12 = rotl32(x12 ^ x0, 16)
- vpaddd 0x00(%rsp),%ymm4,%ymm0
- vmovdqa %ymm0,0x00(%rsp)
- vpxor %ymm0,%ymm12,%ymm12
- vpshufb %ymm3,%ymm12,%ymm12
- # x1 += x5, x13 = rotl32(x13 ^ x1, 16)
- vpaddd 0x20(%rsp),%ymm5,%ymm0
- vmovdqa %ymm0,0x20(%rsp)
- vpxor %ymm0,%ymm13,%ymm13
- vpshufb %ymm3,%ymm13,%ymm13
- # x2 += x6, x14 = rotl32(x14 ^ x2, 16)
- vpaddd 0x40(%rsp),%ymm6,%ymm0
- vmovdqa %ymm0,0x40(%rsp)
- vpxor %ymm0,%ymm14,%ymm14
- vpshufb %ymm3,%ymm14,%ymm14
- # x3 += x7, x15 = rotl32(x15 ^ x3, 16)
- vpaddd 0x60(%rsp),%ymm7,%ymm0
- vmovdqa %ymm0,0x60(%rsp)
- vpxor %ymm0,%ymm15,%ymm15
- vpshufb %ymm3,%ymm15,%ymm15
-
- # x8 += x12, x4 = rotl32(x4 ^ x8, 12)
- vpaddd %ymm12,%ymm8,%ymm8
- vpxor %ymm8,%ymm4,%ymm4
- vpslld $12,%ymm4,%ymm0
- vpsrld $20,%ymm4,%ymm4
- vpor %ymm0,%ymm4,%ymm4
- # x9 += x13, x5 = rotl32(x5 ^ x9, 12)
- vpaddd %ymm13,%ymm9,%ymm9
- vpxor %ymm9,%ymm5,%ymm5
- vpslld $12,%ymm5,%ymm0
- vpsrld $20,%ymm5,%ymm5
- vpor %ymm0,%ymm5,%ymm5
- # x10 += x14, x6 = rotl32(x6 ^ x10, 12)
- vpaddd %ymm14,%ymm10,%ymm10
- vpxor %ymm10,%ymm6,%ymm6
- vpslld $12,%ymm6,%ymm0
- vpsrld $20,%ymm6,%ymm6
- vpor %ymm0,%ymm6,%ymm6
- # x11 += x15, x7 = rotl32(x7 ^ x11, 12)
- vpaddd %ymm15,%ymm11,%ymm11
- vpxor %ymm11,%ymm7,%ymm7
- vpslld $12,%ymm7,%ymm0
- vpsrld $20,%ymm7,%ymm7
- vpor %ymm0,%ymm7,%ymm7
-
- # x0 += x4, x12 = rotl32(x12 ^ x0, 8)
- vpaddd 0x00(%rsp),%ymm4,%ymm0
- vmovdqa %ymm0,0x00(%rsp)
- vpxor %ymm0,%ymm12,%ymm12
- vpshufb %ymm2,%ymm12,%ymm12
- # x1 += x5, x13 = rotl32(x13 ^ x1, 8)
- vpaddd 0x20(%rsp),%ymm5,%ymm0
- vmovdqa %ymm0,0x20(%rsp)
- vpxor %ymm0,%ymm13,%ymm13
- vpshufb %ymm2,%ymm13,%ymm13
- # x2 += x6, x14 = rotl32(x14 ^ x2, 8)
- vpaddd 0x40(%rsp),%ymm6,%ymm0
- vmovdqa %ymm0,0x40(%rsp)
- vpxor %ymm0,%ymm14,%ymm14
- vpshufb %ymm2,%ymm14,%ymm14
- # x3 += x7, x15 = rotl32(x15 ^ x3, 8)
- vpaddd 0x60(%rsp),%ymm7,%ymm0
- vmovdqa %ymm0,0x60(%rsp)
- vpxor %ymm0,%ymm15,%ymm15
- vpshufb %ymm2,%ymm15,%ymm15
-
- # x8 += x12, x4 = rotl32(x4 ^ x8, 7)
- vpaddd %ymm12,%ymm8,%ymm8
- vpxor %ymm8,%ymm4,%ymm4
- vpslld $7,%ymm4,%ymm0
- vpsrld $25,%ymm4,%ymm4
- vpor %ymm0,%ymm4,%ymm4
- # x9 += x13, x5 = rotl32(x5 ^ x9, 7)
- vpaddd %ymm13,%ymm9,%ymm9
- vpxor %ymm9,%ymm5,%ymm5
- vpslld $7,%ymm5,%ymm0
- vpsrld $25,%ymm5,%ymm5
- vpor %ymm0,%ymm5,%ymm5
- # x10 += x14, x6 = rotl32(x6 ^ x10, 7)
- vpaddd %ymm14,%ymm10,%ymm10
- vpxor %ymm10,%ymm6,%ymm6
- vpslld $7,%ymm6,%ymm0
- vpsrld $25,%ymm6,%ymm6
- vpor %ymm0,%ymm6,%ymm6
- # x11 += x15, x7 = rotl32(x7 ^ x11, 7)
- vpaddd %ymm15,%ymm11,%ymm11
- vpxor %ymm11,%ymm7,%ymm7
- vpslld $7,%ymm7,%ymm0
- vpsrld $25,%ymm7,%ymm7
- vpor %ymm0,%ymm7,%ymm7
-
- # x0 += x5, x15 = rotl32(x15 ^ x0, 16)
- vpaddd 0x00(%rsp),%ymm5,%ymm0
- vmovdqa %ymm0,0x00(%rsp)
- vpxor %ymm0,%ymm15,%ymm15
- vpshufb %ymm3,%ymm15,%ymm15
- # x1 += x6, x12 = rotl32(x12 ^ x1, 16)%ymm0
- vpaddd 0x20(%rsp),%ymm6,%ymm0
- vmovdqa %ymm0,0x20(%rsp)
- vpxor %ymm0,%ymm12,%ymm12
- vpshufb %ymm3,%ymm12,%ymm12
- # x2 += x7, x13 = rotl32(x13 ^ x2, 16)
- vpaddd 0x40(%rsp),%ymm7,%ymm0
- vmovdqa %ymm0,0x40(%rsp)
- vpxor %ymm0,%ymm13,%ymm13
- vpshufb %ymm3,%ymm13,%ymm13
- # x3 += x4, x14 = rotl32(x14 ^ x3, 16)
- vpaddd 0x60(%rsp),%ymm4,%ymm0
- vmovdqa %ymm0,0x60(%rsp)
- vpxor %ymm0,%ymm14,%ymm14
- vpshufb %ymm3,%ymm14,%ymm14
-
- # x10 += x15, x5 = rotl32(x5 ^ x10, 12)
- vpaddd %ymm15,%ymm10,%ymm10
- vpxor %ymm10,%ymm5,%ymm5
- vpslld $12,%ymm5,%ymm0
- vpsrld $20,%ymm5,%ymm5
- vpor %ymm0,%ymm5,%ymm5
- # x11 += x12, x6 = rotl32(x6 ^ x11, 12)
- vpaddd %ymm12,%ymm11,%ymm11
- vpxor %ymm11,%ymm6,%ymm6
- vpslld $12,%ymm6,%ymm0
- vpsrld $20,%ymm6,%ymm6
- vpor %ymm0,%ymm6,%ymm6
- # x8 += x13, x7 = rotl32(x7 ^ x8, 12)
- vpaddd %ymm13,%ymm8,%ymm8
- vpxor %ymm8,%ymm7,%ymm7
- vpslld $12,%ymm7,%ymm0
- vpsrld $20,%ymm7,%ymm7
- vpor %ymm0,%ymm7,%ymm7
- # x9 += x14, x4 = rotl32(x4 ^ x9, 12)
- vpaddd %ymm14,%ymm9,%ymm9
- vpxor %ymm9,%ymm4,%ymm4
- vpslld $12,%ymm4,%ymm0
- vpsrld $20,%ymm4,%ymm4
- vpor %ymm0,%ymm4,%ymm4
-
- # x0 += x5, x15 = rotl32(x15 ^ x0, 8)
- vpaddd 0x00(%rsp),%ymm5,%ymm0
- vmovdqa %ymm0,0x00(%rsp)
- vpxor %ymm0,%ymm15,%ymm15
- vpshufb %ymm2,%ymm15,%ymm15
- # x1 += x6, x12 = rotl32(x12 ^ x1, 8)
- vpaddd 0x20(%rsp),%ymm6,%ymm0
- vmovdqa %ymm0,0x20(%rsp)
- vpxor %ymm0,%ymm12,%ymm12
- vpshufb %ymm2,%ymm12,%ymm12
- # x2 += x7, x13 = rotl32(x13 ^ x2, 8)
- vpaddd 0x40(%rsp),%ymm7,%ymm0
- vmovdqa %ymm0,0x40(%rsp)
- vpxor %ymm0,%ymm13,%ymm13
- vpshufb %ymm2,%ymm13,%ymm13
- # x3 += x4, x14 = rotl32(x14 ^ x3, 8)
- vpaddd 0x60(%rsp),%ymm4,%ymm0
- vmovdqa %ymm0,0x60(%rsp)
- vpxor %ymm0,%ymm14,%ymm14
- vpshufb %ymm2,%ymm14,%ymm14
-
- # x10 += x15, x5 = rotl32(x5 ^ x10, 7)
- vpaddd %ymm15,%ymm10,%ymm10
- vpxor %ymm10,%ymm5,%ymm5
- vpslld $7,%ymm5,%ymm0
- vpsrld $25,%ymm5,%ymm5
- vpor %ymm0,%ymm5,%ymm5
- # x11 += x12, x6 = rotl32(x6 ^ x11, 7)
- vpaddd %ymm12,%ymm11,%ymm11
- vpxor %ymm11,%ymm6,%ymm6
- vpslld $7,%ymm6,%ymm0
- vpsrld $25,%ymm6,%ymm6
- vpor %ymm0,%ymm6,%ymm6
- # x8 += x13, x7 = rotl32(x7 ^ x8, 7)
- vpaddd %ymm13,%ymm8,%ymm8
- vpxor %ymm8,%ymm7,%ymm7
- vpslld $7,%ymm7,%ymm0
- vpsrld $25,%ymm7,%ymm7
- vpor %ymm0,%ymm7,%ymm7
- # x9 += x14, x4 = rotl32(x4 ^ x9, 7)
- vpaddd %ymm14,%ymm9,%ymm9
- vpxor %ymm9,%ymm4,%ymm4
- vpslld $7,%ymm4,%ymm0
- vpsrld $25,%ymm4,%ymm4
- vpor %ymm0,%ymm4,%ymm4
-
- dec %ecx
- jnz .Ldoubleround8
-
- # x0..15[0-3] += s[0..15]
- vpbroadcastd 0x00(%rdi),%ymm0
- vpaddd 0x00(%rsp),%ymm0,%ymm0
- vmovdqa %ymm0,0x00(%rsp)
- vpbroadcastd 0x04(%rdi),%ymm0
- vpaddd 0x20(%rsp),%ymm0,%ymm0
- vmovdqa %ymm0,0x20(%rsp)
- vpbroadcastd 0x08(%rdi),%ymm0
- vpaddd 0x40(%rsp),%ymm0,%ymm0
- vmovdqa %ymm0,0x40(%rsp)
- vpbroadcastd 0x0c(%rdi),%ymm0
- vpaddd 0x60(%rsp),%ymm0,%ymm0
- vmovdqa %ymm0,0x60(%rsp)
- vpbroadcastd 0x10(%rdi),%ymm0
- vpaddd %ymm0,%ymm4,%ymm4
- vpbroadcastd 0x14(%rdi),%ymm0
- vpaddd %ymm0,%ymm5,%ymm5
- vpbroadcastd 0x18(%rdi),%ymm0
- vpaddd %ymm0,%ymm6,%ymm6
- vpbroadcastd 0x1c(%rdi),%ymm0
- vpaddd %ymm0,%ymm7,%ymm7
- vpbroadcastd 0x20(%rdi),%ymm0
- vpaddd %ymm0,%ymm8,%ymm8
- vpbroadcastd 0x24(%rdi),%ymm0
- vpaddd %ymm0,%ymm9,%ymm9
- vpbroadcastd 0x28(%rdi),%ymm0
- vpaddd %ymm0,%ymm10,%ymm10
- vpbroadcastd 0x2c(%rdi),%ymm0
- vpaddd %ymm0,%ymm11,%ymm11
- vpbroadcastd 0x30(%rdi),%ymm0
- vpaddd %ymm0,%ymm12,%ymm12
- vpbroadcastd 0x34(%rdi),%ymm0
- vpaddd %ymm0,%ymm13,%ymm13
- vpbroadcastd 0x38(%rdi),%ymm0
- vpaddd %ymm0,%ymm14,%ymm14
- vpbroadcastd 0x3c(%rdi),%ymm0
- vpaddd %ymm0,%ymm15,%ymm15
-
- # x12 += counter values 0-3
- vpaddd %ymm1,%ymm12,%ymm12
-
- # interleave 32-bit words in state n, n+1
- vmovdqa 0x00(%rsp),%ymm0
- vmovdqa 0x20(%rsp),%ymm1
- vpunpckldq %ymm1,%ymm0,%ymm2
- vpunpckhdq %ymm1,%ymm0,%ymm1
- vmovdqa %ymm2,0x00(%rsp)
- vmovdqa %ymm1,0x20(%rsp)
- vmovdqa 0x40(%rsp),%ymm0
- vmovdqa 0x60(%rsp),%ymm1
- vpunpckldq %ymm1,%ymm0,%ymm2
- vpunpckhdq %ymm1,%ymm0,%ymm1
- vmovdqa %ymm2,0x40(%rsp)
- vmovdqa %ymm1,0x60(%rsp)
- vmovdqa %ymm4,%ymm0
- vpunpckldq %ymm5,%ymm0,%ymm4
- vpunpckhdq %ymm5,%ymm0,%ymm5
- vmovdqa %ymm6,%ymm0
- vpunpckldq %ymm7,%ymm0,%ymm6
- vpunpckhdq %ymm7,%ymm0,%ymm7
- vmovdqa %ymm8,%ymm0
- vpunpckldq %ymm9,%ymm0,%ymm8
- vpunpckhdq %ymm9,%ymm0,%ymm9
- vmovdqa %ymm10,%ymm0
- vpunpckldq %ymm11,%ymm0,%ymm10
- vpunpckhdq %ymm11,%ymm0,%ymm11
- vmovdqa %ymm12,%ymm0
- vpunpckldq %ymm13,%ymm0,%ymm12
- vpunpckhdq %ymm13,%ymm0,%ymm13
- vmovdqa %ymm14,%ymm0
- vpunpckldq %ymm15,%ymm0,%ymm14
- vpunpckhdq %ymm15,%ymm0,%ymm15
-
- # interleave 64-bit words in state n, n+2
- vmovdqa 0x00(%rsp),%ymm0
- vmovdqa 0x40(%rsp),%ymm2
- vpunpcklqdq %ymm2,%ymm0,%ymm1
- vpunpckhqdq %ymm2,%ymm0,%ymm2
- vmovdqa %ymm1,0x00(%rsp)
- vmovdqa %ymm2,0x40(%rsp)
- vmovdqa 0x20(%rsp),%ymm0
- vmovdqa 0x60(%rsp),%ymm2
- vpunpcklqdq %ymm2,%ymm0,%ymm1
- vpunpckhqdq %ymm2,%ymm0,%ymm2
- vmovdqa %ymm1,0x20(%rsp)
- vmovdqa %ymm2,0x60(%rsp)
- vmovdqa %ymm4,%ymm0
- vpunpcklqdq %ymm6,%ymm0,%ymm4
- vpunpckhqdq %ymm6,%ymm0,%ymm6
- vmovdqa %ymm5,%ymm0
- vpunpcklqdq %ymm7,%ymm0,%ymm5
- vpunpckhqdq %ymm7,%ymm0,%ymm7
- vmovdqa %ymm8,%ymm0
- vpunpcklqdq %ymm10,%ymm0,%ymm8
- vpunpckhqdq %ymm10,%ymm0,%ymm10
- vmovdqa %ymm9,%ymm0
- vpunpcklqdq %ymm11,%ymm0,%ymm9
- vpunpckhqdq %ymm11,%ymm0,%ymm11
- vmovdqa %ymm12,%ymm0
- vpunpcklqdq %ymm14,%ymm0,%ymm12
- vpunpckhqdq %ymm14,%ymm0,%ymm14
- vmovdqa %ymm13,%ymm0
- vpunpcklqdq %ymm15,%ymm0,%ymm13
- vpunpckhqdq %ymm15,%ymm0,%ymm15
-
- # interleave 128-bit words in state n, n+4
- vmovdqa 0x00(%rsp),%ymm0
- vperm2i128 $0x20,%ymm4,%ymm0,%ymm1
- vperm2i128 $0x31,%ymm4,%ymm0,%ymm4
- vmovdqa %ymm1,0x00(%rsp)
- vmovdqa 0x20(%rsp),%ymm0
- vperm2i128 $0x20,%ymm5,%ymm0,%ymm1
- vperm2i128 $0x31,%ymm5,%ymm0,%ymm5
- vmovdqa %ymm1,0x20(%rsp)
- vmovdqa 0x40(%rsp),%ymm0
- vperm2i128 $0x20,%ymm6,%ymm0,%ymm1
- vperm2i128 $0x31,%ymm6,%ymm0,%ymm6
- vmovdqa %ymm1,0x40(%rsp)
- vmovdqa 0x60(%rsp),%ymm0
- vperm2i128 $0x20,%ymm7,%ymm0,%ymm1
- vperm2i128 $0x31,%ymm7,%ymm0,%ymm7
- vmovdqa %ymm1,0x60(%rsp)
- vperm2i128 $0x20,%ymm12,%ymm8,%ymm0
- vperm2i128 $0x31,%ymm12,%ymm8,%ymm12
- vmovdqa %ymm0,%ymm8
- vperm2i128 $0x20,%ymm13,%ymm9,%ymm0
- vperm2i128 $0x31,%ymm13,%ymm9,%ymm13
- vmovdqa %ymm0,%ymm9
- vperm2i128 $0x20,%ymm14,%ymm10,%ymm0
- vperm2i128 $0x31,%ymm14,%ymm10,%ymm14
- vmovdqa %ymm0,%ymm10
- vperm2i128 $0x20,%ymm15,%ymm11,%ymm0
- vperm2i128 $0x31,%ymm15,%ymm11,%ymm15
- vmovdqa %ymm0,%ymm11
-
- # xor with corresponding input, write to output
- vmovdqa 0x00(%rsp),%ymm0
- vpxor 0x0000(%rdx),%ymm0,%ymm0
- vmovdqu %ymm0,0x0000(%rsi)
- vmovdqa 0x20(%rsp),%ymm0
- vpxor 0x0080(%rdx),%ymm0,%ymm0
- vmovdqu %ymm0,0x0080(%rsi)
- vmovdqa 0x40(%rsp),%ymm0
- vpxor 0x0040(%rdx),%ymm0,%ymm0
- vmovdqu %ymm0,0x0040(%rsi)
- vmovdqa 0x60(%rsp),%ymm0
- vpxor 0x00c0(%rdx),%ymm0,%ymm0
- vmovdqu %ymm0,0x00c0(%rsi)
- vpxor 0x0100(%rdx),%ymm4,%ymm4
- vmovdqu %ymm4,0x0100(%rsi)
- vpxor 0x0180(%rdx),%ymm5,%ymm5
- vmovdqu %ymm5,0x00180(%rsi)
- vpxor 0x0140(%rdx),%ymm6,%ymm6
- vmovdqu %ymm6,0x0140(%rsi)
- vpxor 0x01c0(%rdx),%ymm7,%ymm7
- vmovdqu %ymm7,0x01c0(%rsi)
- vpxor 0x0020(%rdx),%ymm8,%ymm8
- vmovdqu %ymm8,0x0020(%rsi)
- vpxor 0x00a0(%rdx),%ymm9,%ymm9
- vmovdqu %ymm9,0x00a0(%rsi)
- vpxor 0x0060(%rdx),%ymm10,%ymm10
- vmovdqu %ymm10,0x0060(%rsi)
- vpxor 0x00e0(%rdx),%ymm11,%ymm11
- vmovdqu %ymm11,0x00e0(%rsi)
- vpxor 0x0120(%rdx),%ymm12,%ymm12
- vmovdqu %ymm12,0x0120(%rsi)
- vpxor 0x01a0(%rdx),%ymm13,%ymm13
- vmovdqu %ymm13,0x01a0(%rsi)
- vpxor 0x0160(%rdx),%ymm14,%ymm14
- vmovdqu %ymm14,0x0160(%rsi)
- vpxor 0x01e0(%rdx),%ymm15,%ymm15
- vmovdqu %ymm15,0x01e0(%rsi)
-
- vzeroupper
- lea -8(%r10),%rsp
- ret
-ENDPROC(chacha20_8block_xor_avx2)
diff --git a/arch/x86/crypto/chacha20-ssse3-x86_64.S b/arch/x86/crypto/chacha20-ssse3-x86_64.S
deleted file mode 100644
index 512a2b500fd1..000000000000
--- a/arch/x86/crypto/chacha20-ssse3-x86_64.S
+++ /dev/null
@@ -1,630 +0,0 @@
-/*
- * ChaCha20 256-bit cipher algorithm, RFC7539, x64 SSSE3 functions
- *
- * Copyright (C) 2015 Martin Willi
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#include <linux/linkage.h>
-
-.section .rodata.cst16.ROT8, "aM", @progbits, 16
-.align 16
-ROT8: .octa 0x0e0d0c0f0a09080b0605040702010003
-.section .rodata.cst16.ROT16, "aM", @progbits, 16
-.align 16
-ROT16: .octa 0x0d0c0f0e09080b0a0504070601000302
-.section .rodata.cst16.CTRINC, "aM", @progbits, 16
-.align 16
-CTRINC: .octa 0x00000003000000020000000100000000
-
-.text
-
-ENTRY(chacha20_block_xor_ssse3)
- # %rdi: Input state matrix, s
- # %rsi: 1 data block output, o
- # %rdx: 1 data block input, i
-
- # This function encrypts one ChaCha20 block by loading the state matrix
- # in four SSE registers. It performs matrix operation on four words in
- # parallel, but requireds shuffling to rearrange the words after each
- # round. 8/16-bit word rotation is done with the slightly better
- # performing SSSE3 byte shuffling, 7/12-bit word rotation uses
- # traditional shift+OR.
-
- # x0..3 = s0..3
- movdqa 0x00(%rdi),%xmm0
- movdqa 0x10(%rdi),%xmm1
- movdqa 0x20(%rdi),%xmm2
- movdqa 0x30(%rdi),%xmm3
- movdqa %xmm0,%xmm8
- movdqa %xmm1,%xmm9
- movdqa %xmm2,%xmm10
- movdqa %xmm3,%xmm11
-
- movdqa ROT8(%rip),%xmm4
- movdqa ROT16(%rip),%xmm5
-
- mov $10,%ecx
-
-.Ldoubleround:
-
- # x0 += x1, x3 = rotl32(x3 ^ x0, 16)
- paddd %xmm1,%xmm0
- pxor %xmm0,%xmm3
- pshufb %xmm5,%xmm3
-
- # x2 += x3, x1 = rotl32(x1 ^ x2, 12)
- paddd %xmm3,%xmm2
- pxor %xmm2,%xmm1
- movdqa %xmm1,%xmm6
- pslld $12,%xmm6
- psrld $20,%xmm1
- por %xmm6,%xmm1
-
- # x0 += x1, x3 = rotl32(x3 ^ x0, 8)
- paddd %xmm1,%xmm0
- pxor %xmm0,%xmm3
- pshufb %xmm4,%xmm3
-
- # x2 += x3, x1 = rotl32(x1 ^ x2, 7)
- paddd %xmm3,%xmm2
- pxor %xmm2,%xmm1
- movdqa %xmm1,%xmm7
- pslld $7,%xmm7
- psrld $25,%xmm1
- por %xmm7,%xmm1
-
- # x1 = shuffle32(x1, MASK(0, 3, 2, 1))
- pshufd $0x39,%xmm1,%xmm1
- # x2 = shuffle32(x2, MASK(1, 0, 3, 2))
- pshufd $0x4e,%xmm2,%xmm2
- # x3 = shuffle32(x3, MASK(2, 1, 0, 3))
- pshufd $0x93,%xmm3,%xmm3
-
- # x0 += x1, x3 = rotl32(x3 ^ x0, 16)
- paddd %xmm1,%xmm0
- pxor %xmm0,%xmm3
- pshufb %xmm5,%xmm3
-
- # x2 += x3, x1 = rotl32(x1 ^ x2, 12)
- paddd %xmm3,%xmm2
- pxor %xmm2,%xmm1
- movdqa %xmm1,%xmm6
- pslld $12,%xmm6
- psrld $20,%xmm1
- por %xmm6,%xmm1
-
- # x0 += x1, x3 = rotl32(x3 ^ x0, 8)
- paddd %xmm1,%xmm0
- pxor %xmm0,%xmm3
- pshufb %xmm4,%xmm3
-
- # x2 += x3, x1 = rotl32(x1 ^ x2, 7)
- paddd %xmm3,%xmm2
- pxor %xmm2,%xmm1
- movdqa %xmm1,%xmm7
- pslld $7,%xmm7
- psrld $25,%xmm1
- por %xmm7,%xmm1
-
- # x1 = shuffle32(x1, MASK(2, 1, 0, 3))
- pshufd $0x93,%xmm1,%xmm1
- # x2 = shuffle32(x2, MASK(1, 0, 3, 2))
- pshufd $0x4e,%xmm2,%xmm2
- # x3 = shuffle32(x3, MASK(0, 3, 2, 1))
- pshufd $0x39,%xmm3,%xmm3
-
- dec %ecx
- jnz .Ldoubleround
-
- # o0 = i0 ^ (x0 + s0)
- movdqu 0x00(%rdx),%xmm4
- paddd %xmm8,%xmm0
- pxor %xmm4,%xmm0
- movdqu %xmm0,0x00(%rsi)
- # o1 = i1 ^ (x1 + s1)
- movdqu 0x10(%rdx),%xmm5
- paddd %xmm9,%xmm1
- pxor %xmm5,%xmm1
- movdqu %xmm1,0x10(%rsi)
- # o2 = i2 ^ (x2 + s2)
- movdqu 0x20(%rdx),%xmm6
- paddd %xmm10,%xmm2
- pxor %xmm6,%xmm2
- movdqu %xmm2,0x20(%rsi)
- # o3 = i3 ^ (x3 + s3)
- movdqu 0x30(%rdx),%xmm7
- paddd %xmm11,%xmm3
- pxor %xmm7,%xmm3
- movdqu %xmm3,0x30(%rsi)
-
- ret
-ENDPROC(chacha20_block_xor_ssse3)
-
-ENTRY(chacha20_4block_xor_ssse3)
- # %rdi: Input state matrix, s
- # %rsi: 4 data blocks output, o
- # %rdx: 4 data blocks input, i
-
- # This function encrypts four consecutive ChaCha20 blocks by loading the
- # the state matrix in SSE registers four times. As we need some scratch
- # registers, we save the first four registers on the stack. The
- # algorithm performs each operation on the corresponding word of each
- # state matrix, hence requires no word shuffling. For final XORing step
- # we transpose the matrix by interleaving 32- and then 64-bit words,
- # which allows us to do XOR in SSE registers. 8/16-bit word rotation is
- # done with the slightly better performing SSSE3 byte shuffling,
- # 7/12-bit word rotation uses traditional shift+OR.
-
- lea 8(%rsp),%r10
- sub $0x80,%rsp
- and $~63,%rsp
-
- # x0..15[0-3] = s0..3[0..3]
- movq 0x00(%rdi),%xmm1
- pshufd $0x00,%xmm1,%xmm0
- pshufd $0x55,%xmm1,%xmm1
- movq 0x08(%rdi),%xmm3
- pshufd $0x00,%xmm3,%xmm2
- pshufd $0x55,%xmm3,%xmm3
- movq 0x10(%rdi),%xmm5
- pshufd $0x00,%xmm5,%xmm4
- pshufd $0x55,%xmm5,%xmm5
- movq 0x18(%rdi),%xmm7
- pshufd $0x00,%xmm7,%xmm6
- pshufd $0x55,%xmm7,%xmm7
- movq 0x20(%rdi),%xmm9
- pshufd $0x00,%xmm9,%xmm8
- pshufd $0x55,%xmm9,%xmm9
- movq 0x28(%rdi),%xmm11
- pshufd $0x00,%xmm11,%xmm10
- pshufd $0x55,%xmm11,%xmm11
- movq 0x30(%rdi),%xmm13
- pshufd $0x00,%xmm13,%xmm12
- pshufd $0x55,%xmm13,%xmm13
- movq 0x38(%rdi),%xmm15
- pshufd $0x00,%xmm15,%xmm14
- pshufd $0x55,%xmm15,%xmm15
- # x0..3 on stack
- movdqa %xmm0,0x00(%rsp)
- movdqa %xmm1,0x10(%rsp)
- movdqa %xmm2,0x20(%rsp)
- movdqa %xmm3,0x30(%rsp)
-
- movdqa CTRINC(%rip),%xmm1
- movdqa ROT8(%rip),%xmm2
- movdqa ROT16(%rip),%xmm3
-
- # x12 += counter values 0-3
- paddd %xmm1,%xmm12
-
- mov $10,%ecx
-
-.Ldoubleround4:
- # x0 += x4, x12 = rotl32(x12 ^ x0, 16)
- movdqa 0x00(%rsp),%xmm0
- paddd %xmm4,%xmm0
- movdqa %xmm0,0x00(%rsp)
- pxor %xmm0,%xmm12
- pshufb %xmm3,%xmm12
- # x1 += x5, x13 = rotl32(x13 ^ x1, 16)
- movdqa 0x10(%rsp),%xmm0
- paddd %xmm5,%xmm0
- movdqa %xmm0,0x10(%rsp)
- pxor %xmm0,%xmm13
- pshufb %xmm3,%xmm13
- # x2 += x6, x14 = rotl32(x14 ^ x2, 16)
- movdqa 0x20(%rsp),%xmm0
- paddd %xmm6,%xmm0
- movdqa %xmm0,0x20(%rsp)
- pxor %xmm0,%xmm14
- pshufb %xmm3,%xmm14
- # x3 += x7, x15 = rotl32(x15 ^ x3, 16)
- movdqa 0x30(%rsp),%xmm0
- paddd %xmm7,%xmm0
- movdqa %xmm0,0x30(%rsp)
- pxor %xmm0,%xmm15
- pshufb %xmm3,%xmm15
-
- # x8 += x12, x4 = rotl32(x4 ^ x8, 12)
- paddd %xmm12,%xmm8
- pxor %xmm8,%xmm4
- movdqa %xmm4,%xmm0
- pslld $12,%xmm0
- psrld $20,%xmm4
- por %xmm0,%xmm4
- # x9 += x13, x5 = rotl32(x5 ^ x9, 12)
- paddd %xmm13,%xmm9
- pxor %xmm9,%xmm5
- movdqa %xmm5,%xmm0
- pslld $12,%xmm0
- psrld $20,%xmm5
- por %xmm0,%xmm5
- # x10 += x14, x6 = rotl32(x6 ^ x10, 12)
- paddd %xmm14,%xmm10
- pxor %xmm10,%xmm6
- movdqa %xmm6,%xmm0
- pslld $12,%xmm0
- psrld $20,%xmm6
- por %xmm0,%xmm6
- # x11 += x15, x7 = rotl32(x7 ^ x11, 12)
- paddd %xmm15,%xmm11
- pxor %xmm11,%xmm7
- movdqa %xmm7,%xmm0
- pslld $12,%xmm0
- psrld $20,%xmm7
- por %xmm0,%xmm7
-
- # x0 += x4, x12 = rotl32(x12 ^ x0, 8)
- movdqa 0x00(%rsp),%xmm0
- paddd %xmm4,%xmm0
- movdqa %xmm0,0x00(%rsp)
- pxor %xmm0,%xmm12
- pshufb %xmm2,%xmm12
- # x1 += x5, x13 = rotl32(x13 ^ x1, 8)
- movdqa 0x10(%rsp),%xmm0
- paddd %xmm5,%xmm0
- movdqa %xmm0,0x10(%rsp)
- pxor %xmm0,%xmm13
- pshufb %xmm2,%xmm13
- # x2 += x6, x14 = rotl32(x14 ^ x2, 8)
- movdqa 0x20(%rsp),%xmm0
- paddd %xmm6,%xmm0
- movdqa %xmm0,0x20(%rsp)
- pxor %xmm0,%xmm14
- pshufb %xmm2,%xmm14
- # x3 += x7, x15 = rotl32(x15 ^ x3, 8)
- movdqa 0x30(%rsp),%xmm0
- paddd %xmm7,%xmm0
- movdqa %xmm0,0x30(%rsp)
- pxor %xmm0,%xmm15
- pshufb %xmm2,%xmm15
-
- # x8 += x12, x4 = rotl32(x4 ^ x8, 7)
- paddd %xmm12,%xmm8
- pxor %xmm8,%xmm4
- movdqa %xmm4,%xmm0
- pslld $7,%xmm0
- psrld $25,%xmm4
- por %xmm0,%xmm4
- # x9 += x13, x5 = rotl32(x5 ^ x9, 7)
- paddd %xmm13,%xmm9
- pxor %xmm9,%xmm5
- movdqa %xmm5,%xmm0
- pslld $7,%xmm0
- psrld $25,%xmm5
- por %xmm0,%xmm5
- # x10 += x14, x6 = rotl32(x6 ^ x10, 7)
- paddd %xmm14,%xmm10
- pxor %xmm10,%xmm6
- movdqa %xmm6,%xmm0
- pslld $7,%xmm0
- psrld $25,%xmm6
- por %xmm0,%xmm6
- # x11 += x15, x7 = rotl32(x7 ^ x11, 7)
- paddd %xmm15,%xmm11
- pxor %xmm11,%xmm7
- movdqa %xmm7,%xmm0
- pslld $7,%xmm0
- psrld $25,%xmm7
- por %xmm0,%xmm7
-
- # x0 += x5, x15 = rotl32(x15 ^ x0, 16)
- movdqa 0x00(%rsp),%xmm0
- paddd %xmm5,%xmm0
- movdqa %xmm0,0x00(%rsp)
- pxor %xmm0,%xmm15
- pshufb %xmm3,%xmm15
- # x1 += x6, x12 = rotl32(x12 ^ x1, 16)
- movdqa 0x10(%rsp),%xmm0
- paddd %xmm6,%xmm0
- movdqa %xmm0,0x10(%rsp)
- pxor %xmm0,%xmm12
- pshufb %xmm3,%xmm12
- # x2 += x7, x13 = rotl32(x13 ^ x2, 16)
- movdqa 0x20(%rsp),%xmm0
- paddd %xmm7,%xmm0
- movdqa %xmm0,0x20(%rsp)
- pxor %xmm0,%xmm13
- pshufb %xmm3,%xmm13
- # x3 += x4, x14 = rotl32(x14 ^ x3, 16)
- movdqa 0x30(%rsp),%xmm0
- paddd %xmm4,%xmm0
- movdqa %xmm0,0x30(%rsp)
- pxor %xmm0,%xmm14
- pshufb %xmm3,%xmm14
-
- # x10 += x15, x5 = rotl32(x5 ^ x10, 12)
- paddd %xmm15,%xmm10
- pxor %xmm10,%xmm5
- movdqa %xmm5,%xmm0
- pslld $12,%xmm0
- psrld $20,%xmm5
- por %xmm0,%xmm5
- # x11 += x12, x6 = rotl32(x6 ^ x11, 12)
- paddd %xmm12,%xmm11
- pxor %xmm11,%xmm6
- movdqa %xmm6,%xmm0
- pslld $12,%xmm0
- psrld $20,%xmm6
- por %xmm0,%xmm6
- # x8 += x13, x7 = rotl32(x7 ^ x8, 12)
- paddd %xmm13,%xmm8
- pxor %xmm8,%xmm7
- movdqa %xmm7,%xmm0
- pslld $12,%xmm0
- psrld $20,%xmm7
- por %xmm0,%xmm7
- # x9 += x14, x4 = rotl32(x4 ^ x9, 12)
- paddd %xmm14,%xmm9
- pxor %xmm9,%xmm4
- movdqa %xmm4,%xmm0
- pslld $12,%xmm0
- psrld $20,%xmm4
- por %xmm0,%xmm4
-
- # x0 += x5, x15 = rotl32(x15 ^ x0, 8)
- movdqa 0x00(%rsp),%xmm0
- paddd %xmm5,%xmm0
- movdqa %xmm0,0x00(%rsp)
- pxor %xmm0,%xmm15
- pshufb %xmm2,%xmm15
- # x1 += x6, x12 = rotl32(x12 ^ x1, 8)
- movdqa 0x10(%rsp),%xmm0
- paddd %xmm6,%xmm0
- movdqa %xmm0,0x10(%rsp)
- pxor %xmm0,%xmm12
- pshufb %xmm2,%xmm12
- # x2 += x7, x13 = rotl32(x13 ^ x2, 8)
- movdqa 0x20(%rsp),%xmm0
- paddd %xmm7,%xmm0
- movdqa %xmm0,0x20(%rsp)
- pxor %xmm0,%xmm13
- pshufb %xmm2,%xmm13
- # x3 += x4, x14 = rotl32(x14 ^ x3, 8)
- movdqa 0x30(%rsp),%xmm0
- paddd %xmm4,%xmm0
- movdqa %xmm0,0x30(%rsp)
- pxor %xmm0,%xmm14
- pshufb %xmm2,%xmm14
-
- # x10 += x15, x5 = rotl32(x5 ^ x10, 7)
- paddd %xmm15,%xmm10
- pxor %xmm10,%xmm5
- movdqa %xmm5,%xmm0
- pslld $7,%xmm0
- psrld $25,%xmm5
- por %xmm0,%xmm5
- # x11 += x12, x6 = rotl32(x6 ^ x11, 7)
- paddd %xmm12,%xmm11
- pxor %xmm11,%xmm6
- movdqa %xmm6,%xmm0
- pslld $7,%xmm0
- psrld $25,%xmm6
- por %xmm0,%xmm6
- # x8 += x13, x7 = rotl32(x7 ^ x8, 7)
- paddd %xmm13,%xmm8
- pxor %xmm8,%xmm7
- movdqa %xmm7,%xmm0
- pslld $7,%xmm0
- psrld $25,%xmm7
- por %xmm0,%xmm7
- # x9 += x14, x4 = rotl32(x4 ^ x9, 7)
- paddd %xmm14,%xmm9
- pxor %xmm9,%xmm4
- movdqa %xmm4,%xmm0
- pslld $7,%xmm0
- psrld $25,%xmm4
- por %xmm0,%xmm4
-
- dec %ecx
- jnz .Ldoubleround4
-
- # x0[0-3] += s0[0]
- # x1[0-3] += s0[1]
- movq 0x00(%rdi),%xmm3
- pshufd $0x00,%xmm3,%xmm2
- pshufd $0x55,%xmm3,%xmm3
- paddd 0x00(%rsp),%xmm2
- movdqa %xmm2,0x00(%rsp)
- paddd 0x10(%rsp),%xmm3
- movdqa %xmm3,0x10(%rsp)
- # x2[0-3] += s0[2]
- # x3[0-3] += s0[3]
- movq 0x08(%rdi),%xmm3
- pshufd $0x00,%xmm3,%xmm2
- pshufd $0x55,%xmm3,%xmm3
- paddd 0x20(%rsp),%xmm2
- movdqa %xmm2,0x20(%rsp)
- paddd 0x30(%rsp),%xmm3
- movdqa %xmm3,0x30(%rsp)
-
- # x4[0-3] += s1[0]
- # x5[0-3] += s1[1]
- movq 0x10(%rdi),%xmm3
- pshufd $0x00,%xmm3,%xmm2
- pshufd $0x55,%xmm3,%xmm3
- paddd %xmm2,%xmm4
- paddd %xmm3,%xmm5
- # x6[0-3] += s1[2]
- # x7[0-3] += s1[3]
- movq 0x18(%rdi),%xmm3
- pshufd $0x00,%xmm3,%xmm2
- pshufd $0x55,%xmm3,%xmm3
- paddd %xmm2,%xmm6
- paddd %xmm3,%xmm7
-
- # x8[0-3] += s2[0]
- # x9[0-3] += s2[1]
- movq 0x20(%rdi),%xmm3
- pshufd $0x00,%xmm3,%xmm2
- pshufd $0x55,%xmm3,%xmm3
- paddd %xmm2,%xmm8
- paddd %xmm3,%xmm9
- # x10[0-3] += s2[2]
- # x11[0-3] += s2[3]
- movq 0x28(%rdi),%xmm3
- pshufd $0x00,%xmm3,%xmm2
- pshufd $0x55,%xmm3,%xmm3
- paddd %xmm2,%xmm10
- paddd %xmm3,%xmm11
-
- # x12[0-3] += s3[0]
- # x13[0-3] += s3[1]
- movq 0x30(%rdi),%xmm3
- pshufd $0x00,%xmm3,%xmm2
- pshufd $0x55,%xmm3,%xmm3
- paddd %xmm2,%xmm12
- paddd %xmm3,%xmm13
- # x14[0-3] += s3[2]
- # x15[0-3] += s3[3]
- movq 0x38(%rdi),%xmm3
- pshufd $0x00,%xmm3,%xmm2
- pshufd $0x55,%xmm3,%xmm3
- paddd %xmm2,%xmm14
- paddd %xmm3,%xmm15
-
- # x12 += counter values 0-3
- paddd %xmm1,%xmm12
-
- # interleave 32-bit words in state n, n+1
- movdqa 0x00(%rsp),%xmm0
- movdqa 0x10(%rsp),%xmm1
- movdqa %xmm0,%xmm2
- punpckldq %xmm1,%xmm2
- punpckhdq %xmm1,%xmm0
- movdqa %xmm2,0x00(%rsp)
- movdqa %xmm0,0x10(%rsp)
- movdqa 0x20(%rsp),%xmm0
- movdqa 0x30(%rsp),%xmm1
- movdqa %xmm0,%xmm2
- punpckldq %xmm1,%xmm2
- punpckhdq %xmm1,%xmm0
- movdqa %xmm2,0x20(%rsp)
- movdqa %xmm0,0x30(%rsp)
- movdqa %xmm4,%xmm0
- punpckldq %xmm5,%xmm4
- punpckhdq %xmm5,%xmm0
- movdqa %xmm0,%xmm5
- movdqa %xmm6,%xmm0
- punpckldq %xmm7,%xmm6
- punpckhdq %xmm7,%xmm0
- movdqa %xmm0,%xmm7
- movdqa %xmm8,%xmm0
- punpckldq %xmm9,%xmm8
- punpckhdq %xmm9,%xmm0
- movdqa %xmm0,%xmm9
- movdqa %xmm10,%xmm0
- punpckldq %xmm11,%xmm10
- punpckhdq %xmm11,%xmm0
- movdqa %xmm0,%xmm11
- movdqa %xmm12,%xmm0
- punpckldq %xmm13,%xmm12
- punpckhdq %xmm13,%xmm0
- movdqa %xmm0,%xmm13
- movdqa %xmm14,%xmm0
- punpckldq %xmm15,%xmm14
- punpckhdq %xmm15,%xmm0
- movdqa %xmm0,%xmm15
-
- # interleave 64-bit words in state n, n+2
- movdqa 0x00(%rsp),%xmm0
- movdqa 0x20(%rsp),%xmm1
- movdqa %xmm0,%xmm2
- punpcklqdq %xmm1,%xmm2
- punpckhqdq %xmm1,%xmm0
- movdqa %xmm2,0x00(%rsp)
- movdqa %xmm0,0x20(%rsp)
- movdqa 0x10(%rsp),%xmm0
- movdqa 0x30(%rsp),%xmm1
- movdqa %xmm0,%xmm2
- punpcklqdq %xmm1,%xmm2
- punpckhqdq %xmm1,%xmm0
- movdqa %xmm2,0x10(%rsp)
- movdqa %xmm0,0x30(%rsp)
- movdqa %xmm4,%xmm0
- punpcklqdq %xmm6,%xmm4
- punpckhqdq %xmm6,%xmm0
- movdqa %xmm0,%xmm6
- movdqa %xmm5,%xmm0
- punpcklqdq %xmm7,%xmm5
- punpckhqdq %xmm7,%xmm0
- movdqa %xmm0,%xmm7
- movdqa %xmm8,%xmm0
- punpcklqdq %xmm10,%xmm8
- punpckhqdq %xmm10,%xmm0
- movdqa %xmm0,%xmm10
- movdqa %xmm9,%xmm0
- punpcklqdq %xmm11,%xmm9
- punpckhqdq %xmm11,%xmm0
- movdqa %xmm0,%xmm11
- movdqa %xmm12,%xmm0
- punpcklqdq %xmm14,%xmm12
- punpckhqdq %xmm14,%xmm0
- movdqa %xmm0,%xmm14
- movdqa %xmm13,%xmm0
- punpcklqdq %xmm15,%xmm13
- punpckhqdq %xmm15,%xmm0
- movdqa %xmm0,%xmm15
-
- # xor with corresponding input, write to output
- movdqa 0x00(%rsp),%xmm0
- movdqu 0x00(%rdx),%xmm1
- pxor %xmm1,%xmm0
- movdqu %xmm0,0x00(%rsi)
- movdqa 0x10(%rsp),%xmm0
- movdqu 0x80(%rdx),%xmm1
- pxor %xmm1,%xmm0
- movdqu %xmm0,0x80(%rsi)
- movdqa 0x20(%rsp),%xmm0
- movdqu 0x40(%rdx),%xmm1
- pxor %xmm1,%xmm0
- movdqu %xmm0,0x40(%rsi)
- movdqa 0x30(%rsp),%xmm0
- movdqu 0xc0(%rdx),%xmm1
- pxor %xmm1,%xmm0
- movdqu %xmm0,0xc0(%rsi)
- movdqu 0x10(%rdx),%xmm1
- pxor %xmm1,%xmm4
- movdqu %xmm4,0x10(%rsi)
- movdqu 0x90(%rdx),%xmm1
- pxor %xmm1,%xmm5
- movdqu %xmm5,0x90(%rsi)
- movdqu 0x50(%rdx),%xmm1
- pxor %xmm1,%xmm6
- movdqu %xmm6,0x50(%rsi)
- movdqu 0xd0(%rdx),%xmm1
- pxor %xmm1,%xmm7
- movdqu %xmm7,0xd0(%rsi)
- movdqu 0x20(%rdx),%xmm1
- pxor %xmm1,%xmm8
- movdqu %xmm8,0x20(%rsi)
- movdqu 0xa0(%rdx),%xmm1
- pxor %xmm1,%xmm9
- movdqu %xmm9,0xa0(%rsi)
- movdqu 0x60(%rdx),%xmm1
- pxor %xmm1,%xmm10
- movdqu %xmm10,0x60(%rsi)
- movdqu 0xe0(%rdx),%xmm1
- pxor %xmm1,%xmm11
- movdqu %xmm11,0xe0(%rsi)
- movdqu 0x30(%rdx),%xmm1
- pxor %xmm1,%xmm12
- movdqu %xmm12,0x30(%rsi)
- movdqu 0xb0(%rdx),%xmm1
- pxor %xmm1,%xmm13
- movdqu %xmm13,0xb0(%rsi)
- movdqu 0x70(%rdx),%xmm1
- pxor %xmm1,%xmm14
- movdqu %xmm14,0x70(%rsi)
- movdqu 0xf0(%rdx),%xmm1
- pxor %xmm1,%xmm15
- movdqu %xmm15,0xf0(%rsi)
-
- lea -8(%r10),%rsp
- ret
-ENDPROC(chacha20_4block_xor_ssse3)
diff --git a/arch/x86/crypto/chacha20_glue.c b/arch/x86/crypto/chacha20_glue.c
deleted file mode 100644
index dce7c5d39c2f..000000000000
--- a/arch/x86/crypto/chacha20_glue.c
+++ /dev/null
@@ -1,146 +0,0 @@
-/*
- * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code
- *
- * Copyright (C) 2015 Martin Willi
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#include <crypto/algapi.h>
-#include <crypto/chacha20.h>
-#include <crypto/internal/skcipher.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <asm/fpu/api.h>
-#include <asm/simd.h>
-
-#define CHACHA20_STATE_ALIGN 16
-
-asmlinkage void chacha20_block_xor_ssse3(u32 *state, u8 *dst, const u8 *src);
-asmlinkage void chacha20_4block_xor_ssse3(u32 *state, u8 *dst, const u8 *src);
-#ifdef CONFIG_AS_AVX2
-asmlinkage void chacha20_8block_xor_avx2(u32 *state, u8 *dst, const u8 *src);
-static bool chacha20_use_avx2;
-#endif
-
-static void chacha20_dosimd(u32 *state, u8 *dst, const u8 *src,
- unsigned int bytes)
-{
- u8 buf[CHACHA20_BLOCK_SIZE];
-
-#ifdef CONFIG_AS_AVX2
- if (chacha20_use_avx2) {
- while (bytes >= CHACHA20_BLOCK_SIZE * 8) {
- chacha20_8block_xor_avx2(state, dst, src);
- bytes -= CHACHA20_BLOCK_SIZE * 8;
- src += CHACHA20_BLOCK_SIZE * 8;
- dst += CHACHA20_BLOCK_SIZE * 8;
- state[12] += 8;
- }
- }
-#endif
- while (bytes >= CHACHA20_BLOCK_SIZE * 4) {
- chacha20_4block_xor_ssse3(state, dst, src);
- bytes -= CHACHA20_BLOCK_SIZE * 4;
- src += CHACHA20_BLOCK_SIZE * 4;
- dst += CHACHA20_BLOCK_SIZE * 4;
- state[12] += 4;
- }
- while (bytes >= CHACHA20_BLOCK_SIZE) {
- chacha20_block_xor_ssse3(state, dst, src);
- bytes -= CHACHA20_BLOCK_SIZE;
- src += CHACHA20_BLOCK_SIZE;
- dst += CHACHA20_BLOCK_SIZE;
- state[12]++;
- }
- if (bytes) {
- memcpy(buf, src, bytes);
- chacha20_block_xor_ssse3(state, buf, buf);
- memcpy(dst, buf, bytes);
- }
-}
-
-static int chacha20_simd(struct skcipher_request *req)
-{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm);
- u32 *state, state_buf[16 + 2] __aligned(8);
- struct skcipher_walk walk;
- int err;
-
- BUILD_BUG_ON(CHACHA20_STATE_ALIGN != 16);
- state = PTR_ALIGN(state_buf + 0, CHACHA20_STATE_ALIGN);
-
- if (req->cryptlen <= CHACHA20_BLOCK_SIZE || !may_use_simd())
- return crypto_chacha20_crypt(req);
-
- err = skcipher_walk_virt(&walk, req, true);
-
- crypto_chacha20_init(state, ctx, walk.iv);
-
- kernel_fpu_begin();
-
- while (walk.nbytes >= CHACHA20_BLOCK_SIZE) {
- chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr,
- rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE));
- err = skcipher_walk_done(&walk,
- walk.nbytes % CHACHA20_BLOCK_SIZE);
- }
-
- if (walk.nbytes) {
- chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr,
- walk.nbytes);
- err = skcipher_walk_done(&walk, 0);
- }
-
- kernel_fpu_end();
-
- return err;
-}
-
-static struct skcipher_alg alg = {
- .base.cra_name = "chacha20",
- .base.cra_driver_name = "chacha20-simd",
- .base.cra_priority = 300,
- .base.cra_blocksize = 1,
- .base.cra_ctxsize = sizeof(struct chacha20_ctx),
- .base.cra_module = THIS_MODULE,
-
- .min_keysize = CHACHA20_KEY_SIZE,
- .max_keysize = CHACHA20_KEY_SIZE,
- .ivsize = CHACHA20_IV_SIZE,
- .chunksize = CHACHA20_BLOCK_SIZE,
- .setkey = crypto_chacha20_setkey,
- .encrypt = chacha20_simd,
- .decrypt = chacha20_simd,
-};
-
-static int __init chacha20_simd_mod_init(void)
-{
- if (!boot_cpu_has(X86_FEATURE_SSSE3))
- return -ENODEV;
-
-#ifdef CONFIG_AS_AVX2
- chacha20_use_avx2 = boot_cpu_has(X86_FEATURE_AVX) &&
- boot_cpu_has(X86_FEATURE_AVX2) &&
- cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
-#endif
- return crypto_register_skcipher(&alg);
-}
-
-static void __exit chacha20_simd_mod_fini(void)
-{
- crypto_unregister_skcipher(&alg);
-}
-
-module_init(chacha20_simd_mod_init);
-module_exit(chacha20_simd_mod_fini);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Martin Willi <martin@strongswan.org>");
-MODULE_DESCRIPTION("chacha20 cipher algorithm, SIMD accelerated");
-MODULE_ALIAS_CRYPTO("chacha20");
-MODULE_ALIAS_CRYPTO("chacha20-simd");
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 47859a0f8052..93cd4d199447 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -1433,22 +1433,6 @@ config CRYPTO_CHACHA20
ChaCha20 is a 256-bit high-speed stream cipher designed by Daniel J.
Bernstein and further specified in RFC7539 for use in IETF protocols.
- This is the portable C implementation of ChaCha20.
-
- See also:
- <http://cr.yp.to/chacha/chacha-20080128.pdf>
-
-config CRYPTO_CHACHA20_X86_64
- tristate "ChaCha20 cipher algorithm (x86_64/SSSE3/AVX2)"
- depends on X86 && 64BIT
- select CRYPTO_BLKCIPHER
- select CRYPTO_CHACHA20
- help
- ChaCha20 cipher algorithm, RFC7539.
-
- ChaCha20 is a 256-bit high-speed stream cipher designed by Daniel J.
- Bernstein and further specified in RFC7539 for use in IETF protocols.
- This is the x86_64 assembler implementation using SIMD instructions.
See also:
<http://cr.yp.to/chacha/chacha-20080128.pdf>
diff --git a/crypto/Makefile b/crypto/Makefile
index 5e60348d02e2..587103b87890 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -117,7 +117,7 @@ obj-$(CONFIG_CRYPTO_ANUBIS) += anubis.o
obj-$(CONFIG_CRYPTO_SEED) += seed.o
obj-$(CONFIG_CRYPTO_SPECK) += speck.o
obj-$(CONFIG_CRYPTO_SALSA20) += salsa20_generic.o
-obj-$(CONFIG_CRYPTO_CHACHA20) += chacha20_generic.o
+obj-$(CONFIG_CRYPTO_CHACHA20) += chacha20_zinc.o
obj-$(CONFIG_CRYPTO_POLY1305) += poly1305_zinc.o
obj-$(CONFIG_CRYPTO_DEFLATE) += deflate.o
obj-$(CONFIG_CRYPTO_MICHAEL_MIC) += michael_mic.o
diff --git a/crypto/chacha20_generic.c b/crypto/chacha20_generic.c
deleted file mode 100644
index e451c3cb6a56..000000000000
--- a/crypto/chacha20_generic.c
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- * ChaCha20 256-bit cipher algorithm, RFC7539
- *
- * Copyright (C) 2015 Martin Willi
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#include <asm/unaligned.h>
-#include <crypto/algapi.h>
-#include <crypto/chacha20.h>
-#include <crypto/internal/skcipher.h>
-#include <linux/module.h>
-
-static void chacha20_docrypt(u32 *state, u8 *dst, const u8 *src,
- unsigned int bytes)
-{
- u32 stream[CHACHA20_BLOCK_WORDS];
-
- if (dst != src)
- memcpy(dst, src, bytes);
-
- while (bytes >= CHACHA20_BLOCK_SIZE) {
- chacha20_block(state, stream);
- crypto_xor(dst, (const u8 *)stream, CHACHA20_BLOCK_SIZE);
- bytes -= CHACHA20_BLOCK_SIZE;
- dst += CHACHA20_BLOCK_SIZE;
- }
- if (bytes) {
- chacha20_block(state, stream);
- crypto_xor(dst, (const u8 *)stream, bytes);
- }
-}
-
-void crypto_chacha20_init(u32 *state, struct chacha20_ctx *ctx, u8 *iv)
-{
- state[0] = 0x61707865; /* "expa" */
- state[1] = 0x3320646e; /* "nd 3" */
- state[2] = 0x79622d32; /* "2-by" */
- state[3] = 0x6b206574; /* "te k" */
- state[4] = ctx->key[0];
- state[5] = ctx->key[1];
- state[6] = ctx->key[2];
- state[7] = ctx->key[3];
- state[8] = ctx->key[4];
- state[9] = ctx->key[5];
- state[10] = ctx->key[6];
- state[11] = ctx->key[7];
- state[12] = get_unaligned_le32(iv + 0);
- state[13] = get_unaligned_le32(iv + 4);
- state[14] = get_unaligned_le32(iv + 8);
- state[15] = get_unaligned_le32(iv + 12);
-}
-EXPORT_SYMBOL_GPL(crypto_chacha20_init);
-
-int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key,
- unsigned int keysize)
-{
- struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm);
- int i;
-
- if (keysize != CHACHA20_KEY_SIZE)
- return -EINVAL;
-
- for (i = 0; i < ARRAY_SIZE(ctx->key); i++)
- ctx->key[i] = get_unaligned_le32(key + i * sizeof(u32));
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(crypto_chacha20_setkey);
-
-int crypto_chacha20_crypt(struct skcipher_request *req)
-{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct skcipher_walk walk;
- u32 state[16];
- int err;
-
- err = skcipher_walk_virt(&walk, req, true);
-
- crypto_chacha20_init(state, ctx, walk.iv);
-
- while (walk.nbytes > 0) {
- unsigned int nbytes = walk.nbytes;
-
- if (nbytes < walk.total)
- nbytes = round_down(nbytes, walk.stride);
-
- chacha20_docrypt(state, walk.dst.virt.addr, walk.src.virt.addr,
- nbytes);
- err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
- }
-
- return err;
-}
-EXPORT_SYMBOL_GPL(crypto_chacha20_crypt);
-
-static struct skcipher_alg alg = {
- .base.cra_name = "chacha20",
- .base.cra_driver_name = "chacha20-generic",
- .base.cra_priority = 100,
- .base.cra_blocksize = 1,
- .base.cra_ctxsize = sizeof(struct chacha20_ctx),
- .base.cra_module = THIS_MODULE,
-
- .min_keysize = CHACHA20_KEY_SIZE,
- .max_keysize = CHACHA20_KEY_SIZE,
- .ivsize = CHACHA20_IV_SIZE,
- .chunksize = CHACHA20_BLOCK_SIZE,
- .setkey = crypto_chacha20_setkey,
- .encrypt = crypto_chacha20_crypt,
- .decrypt = crypto_chacha20_crypt,
-};
-
-static int __init chacha20_generic_mod_init(void)
-{
- return crypto_register_skcipher(&alg);
-}
-
-static void __exit chacha20_generic_mod_fini(void)
-{
- crypto_unregister_skcipher(&alg);
-}
-
-module_init(chacha20_generic_mod_init);
-module_exit(chacha20_generic_mod_fini);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Martin Willi <martin@strongswan.org>");
-MODULE_DESCRIPTION("chacha20 cipher algorithm");
-MODULE_ALIAS_CRYPTO("chacha20");
-MODULE_ALIAS_CRYPTO("chacha20-generic");
diff --git a/crypto/chacha20_zinc.c b/crypto/chacha20_zinc.c
new file mode 100644
index 000000000000..5df88fdee066
--- /dev/null
+++ b/crypto/chacha20_zinc.c
@@ -0,0 +1,100 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (C) 2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#include <asm/unaligned.h>
+#include <crypto/algapi.h>
+#include <crypto/internal/skcipher.h>
+#include <zinc/chacha20.h>
+#include <linux/module.h>
+
+struct chacha20_key_ctx {
+ u32 key[8];
+};
+
+static int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key,
+ unsigned int keysize)
+{
+ struct chacha20_key_ctx *key_ctx = crypto_skcipher_ctx(tfm);
+ int i;
+
+ if (keysize != CHACHA20_KEY_SIZE)
+ return -EINVAL;
+
+ for (i = 0; i < ARRAY_SIZE(key_ctx->key); ++i)
+ key_ctx->key[i] = get_unaligned_le32(key + i * sizeof(u32));
+
+ return 0;
+}
+
+static int crypto_chacha20_crypt(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct chacha20_key_ctx *key_ctx = crypto_skcipher_ctx(tfm);
+ struct chacha20_ctx ctx;
+ struct skcipher_walk walk;
+ simd_context_t simd_context;
+ int err, i;
+
+ err = skcipher_walk_virt(&walk, req, true);
+ if (unlikely(err))
+ return err;
+
+ memcpy(ctx.key, key_ctx->key, sizeof(ctx.key));
+ for (i = 0; i < ARRAY_SIZE(ctx.counter); ++i)
+ ctx.counter[i] = get_unaligned_le32(walk.iv + i * sizeof(u32));
+
+ simd_context = simd_get();
+ while (walk.nbytes > 0) {
+ unsigned int nbytes = walk.nbytes;
+
+ if (nbytes < walk.total)
+ nbytes = round_down(nbytes, walk.stride);
+
+ chacha20(&ctx, walk.dst.virt.addr, walk.src.virt.addr, nbytes,
+ simd_context);
+
+ err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
+ simd_context = simd_relax(simd_context);
+ }
+ simd_put(simd_context);
+
+ return err;
+}
+
+static struct skcipher_alg alg = {
+ .base.cra_name = "chacha20",
+ .base.cra_driver_name = "chacha20-software",
+ .base.cra_priority = 100,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct chacha20_key_ctx),
+ .base.cra_module = THIS_MODULE,
+
+ .min_keysize = CHACHA20_KEY_SIZE,
+ .max_keysize = CHACHA20_KEY_SIZE,
+ .ivsize = CHACHA20_IV_SIZE,
+ .chunksize = CHACHA20_BLOCK_SIZE,
+ .setkey = crypto_chacha20_setkey,
+ .encrypt = crypto_chacha20_crypt,
+ .decrypt = crypto_chacha20_crypt,
+};
+
+static int __init chacha20_mod_init(void)
+{
+ return crypto_register_skcipher(&alg);
+}
+
+static void __exit chacha20_mod_exit(void)
+{
+ crypto_unregister_skcipher(&alg);
+}
+
+module_init(chacha20_mod_init);
+module_exit(chacha20_mod_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");
+MODULE_DESCRIPTION("ChaCha20 stream cipher");
+MODULE_ALIAS_CRYPTO("chacha20");
+MODULE_ALIAS_CRYPTO("chacha20-software");
diff --git a/crypto/chacha20poly1305.c b/crypto/chacha20poly1305.c
index bf523797bef3..b26adb9ed898 100644
--- a/crypto/chacha20poly1305.c
+++ b/crypto/chacha20poly1305.c
@@ -13,7 +13,7 @@
#include <crypto/internal/hash.h>
#include <crypto/internal/skcipher.h>
#include <crypto/scatterwalk.h>
-#include <crypto/chacha20.h>
+#include <zinc/chacha20.h>
#include <zinc/poly1305.h>
#include <linux/err.h>
#include <linux/init.h>
diff --git a/include/crypto/chacha20.h b/include/crypto/chacha20.h
index b83d66073db0..3b92f58f3891 100644
--- a/include/crypto/chacha20.h
+++ b/include/crypto/chacha20.h
@@ -6,23 +6,11 @@
#ifndef _CRYPTO_CHACHA20_H
#define _CRYPTO_CHACHA20_H
-#include <crypto/skcipher.h>
-#include <linux/types.h>
-#include <linux/crypto.h>
-
#define CHACHA20_IV_SIZE 16
#define CHACHA20_KEY_SIZE 32
#define CHACHA20_BLOCK_SIZE 64
#define CHACHA20_BLOCK_WORDS (CHACHA20_BLOCK_SIZE / sizeof(u32))
-struct chacha20_ctx {
- u32 key[8];
-};
-
void chacha20_block(u32 *state, u32 *stream);
-void crypto_chacha20_init(u32 *state, struct chacha20_ctx *ctx, u8 *iv);
-int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key,
- unsigned int keysize);
-int crypto_chacha20_crypt(struct skcipher_request *req);
#endif
--
2.19.0
^ permalink raw reply related
* Re: [RFC PATCH 3/4] udp: implement GRO plain UDP sockets.
From: Eric Dumazet @ 2018-09-14 16:48 UTC (permalink / raw)
To: Paolo Abeni, netdev; +Cc: David S. Miller, Willem de Bruijn, Steffen Klassert
In-Reply-To: <027ebc7d404af07b70f8f9816512b2a485e8f420.1536939423.git.pabeni@redhat.com>
On 09/14/2018 08:43 AM, Paolo Abeni wrote:
> This is the RX counter part of commit bec1f6f69736 ("udp: generate gso
> with UDP_SEGMENT"). When UDP_SEGMENT is enabled, such socket is also
> eligible for GRO in the rx path: UDP segments directed to such socket
> are assembled into a larger GSO_UDP_L4 packet.
>
> The core UDP GRO support is enabled/updated on setsockopt(UDP_SEGMENT) and
> disabled, if needed at socket destruction time.
>
> Initial benchmark numbers:
>
> Before:
> udp rx: 1079 MB/s 769065 calls/s
>
> After:
> udp rx: 1466 MB/s 24877 calls/s
Are you sure the data is actually fully copied to user space ?
tools/testing/selftests/net/udpgso_bench_rx.c
uses :
static char rbuf[ETH_DATA_LEN];
/* MSG_TRUNC will make return value full datagram length */
ret = recv(fd, rbuf, len, MSG_TRUNC | MSG_DONTWAIT);
So you need to change this program.
Also, GRO reception would mean that userspace can retrieve,
not only full bytes of X datagrams, but also the gso_size (or length of individual datagrams)
You can not know the size of the packets in advance, the sender will decide.
^ permalink raw reply
* Re: [PATCH 1/1] net: rds: use memset to optimize the recv
From: Santosh Shilimkar @ 2018-09-14 16:51 UTC (permalink / raw)
To: Zhu Yanjun, davem, netdev, linux-rdma, rds-devel
In-Reply-To: <20180914084538.11666-1-yanjun.zhu@oracle.com>
On 9/14/2018 1:45 AM, Zhu Yanjun wrote:
> The function rds_inc_init is in recv process. To use memset can optimize
> the function rds_inc_init.
> The test result:
>
> Before:
> 1) + 24.950 us | rds_inc_init [rds]();
> After:
> 1) + 10.990 us | rds_inc_init [rds]();
>
> Signed-off-by: Zhu Yanjun <yanjun.zhu@oracle.com>
> ---
Looks good. Thanks !!
Acked-by: Santosh Shilimkar<santosh.shilimkar@oracle.com>
^ permalink raw reply
* Re: [RFC PATCH 2/4] net: enable UDP gro on demand.
From: Willem de Bruijn @ 2018-09-14 17:16 UTC (permalink / raw)
To: Paolo Abeni
Cc: Network Development, David Miller, Willem de Bruijn,
steffen.klassert
In-Reply-To: <9732ec58cfd39528fc236c5af2024dfb88c826fd.1536939423.git.pabeni@redhat.com>
On Fri, Sep 14, 2018 at 11:47 AM Paolo Abeni <pabeni@redhat.com> wrote:
>
> Currently, the UDP GRO callback is always invoked, regardless of
> the existence of any actual user (e.g. a UDP tunnel). With retpoline
> enabled, this causes measurable overhead.
>
> This changeset introduces explicit accounting of the sockets requiring
> UDP GRO and updates the UDP offloads at runtime accordingly, so that
> the GRO callback is present (and invoked) only when there is at least
> one socket requiring it.
I have a difference solution both to the UDP socket lookup avoidance
and configurable GRO in general.
The first can be achieved by exporting the udp_encap_needed_key static key:
"
diff --git a/include/net/udp.h b/include/net/udp.h
index 8482a990b0bb..9e82cb391dea 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -443,8 +443,10 @@ int udpv4_offload_init(void);
void udp_init(void);
+DECLARE_STATIC_KEY_FALSE(udp_encap_needed_key);
void udp_encap_enable(void);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index f4e35b2ff8b8..bd873a5b8a86 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1889,7 +1889,7 @@ static int __udp_queue_rcv_skb(struct sock *sk,
struct sk_buff *skb)
return 0;
}
-static DEFINE_STATIC_KEY_FALSE(udp_encap_needed_key);
+DEFINE_STATIC_KEY_FALSE(udp_encap_needed_key);
void udp_encap_enable(void)
{
static_branch_enable(&udp_encap_needed_key);
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 4f6aa95a9b12..f44fe328aa0f 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -405,7 +405,7 @@ static struct sk_buff *udp4_gro_receive(struct
list_head *head,
{
struct udphdr *uh = udp_gro_udphdr(skb);
- if (unlikely(!uh))
+ if (unlikely(!uh) || !static_branch_unlikely(&udp_encap_needed_key))
goto flush;
"
.. and same for ipv6.
The second is a larger patchset that converts dev_offload to
net_offload, so that all offloads share the same infrastructure, and a
sysctl interface to be able to disable all gro_receive types, not just
udp.
I've been sitting on it for too long. Let me slightly clean it up and
send it out for discussion sake..
>
> Tested with pktgen vs udpgso_bench_rx
> Before:
> udp rx: 27 MB/s 1613271 calls/s
>
> After:
> udp rx: 30 MB/s 1771537 calls/s
>
> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
^ permalink raw reply related
* [PATCH] net: caif: remove redundant null check on frontpkt
From: Colin King @ 2018-09-14 17:19 UTC (permalink / raw)
To: Dmitry Tarnyagin, David S . Miller; +Cc: kernel-janitors, netdev
From: Colin Ian King <colin.king@canonical.com>
It is impossible for frontpkt to be null at the point of the null
check because it has been assigned from rearpkt and there is no
way realpkt can be null at the point of the assignment because
of the sanity checking and exit paths taken previously. Remove
the redundant null check.
Detected by CoverityScan, CID#114434 ("Logically dead code")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
---
net/caif/cfrfml.c | 3 ---
1 file changed, 3 deletions(-)
diff --git a/net/caif/cfrfml.c b/net/caif/cfrfml.c
index b82440e1fcb4..a931a71ef6df 100644
--- a/net/caif/cfrfml.c
+++ b/net/caif/cfrfml.c
@@ -264,9 +264,6 @@ static int cfrfml_transmit(struct cflayer *layr, struct cfpkt *pkt)
frontpkt = rearpkt;
rearpkt = NULL;
- err = -ENOMEM;
- if (frontpkt == NULL)
- goto out;
err = -EPROTO;
if (cfpkt_add_head(frontpkt, head, 6) < 0)
goto out;
--
2.17.1
^ permalink raw reply related
* Re: [PATCH] net/mlx4_core: print firmware version during driver loading
From: Qing Huang @ 2018-09-14 22:36 UTC (permalink / raw)
To: David Miller; +Cc: andrew, leon, netdev, linux-rdma, linux-kernel, tariqt
In-Reply-To: <20180914.141406.2211638662965115243.davem@davemloft.net>
On 9/14/2018 2:14 PM, David Miller wrote:
> From: Qing Huang<qing.huang@oracle.com>
> Date: Fri, 14 Sep 2018 11:33:40 -0700
>
>> On 9/14/2018 11:17 AM, Andrew Lunn wrote:
>>> On Fri, Sep 14, 2018 at 10:15:48AM -0700, Qing Huang wrote:
>>>> The FW version is actually a very crucial piece of information and
>>>> only
>>>> printed once here
>>>> when the driver is loaded. People tend to get confused when switching
>>>> multiple FW files
>>>> back and forth without running separate utility tools, especially at
>>>> customer sites.
>>>> IMHO, this information is very useful and only takes up very little
>>>> log file
>>>> space. :-)
>>> Why not use ethtool -i ?
>>>
>>> $ sudo ethtool -i eth0
>>> driver: r8169
>>> version: 2.3LK-NAPI
>>> firmware-version: rtl8168g-2_0.0.1 02/06/13
>>>
>>> Andrew
>> Sure. You can also use ibstat or ibv_devinfo tool if they are
>> installed. But it's not very
>> convenient in some cases.
>>
>> E.g.
>> A customer upgrades FW on HCAs and encounters issues. During triage,
>> it's much easier
>> to study customer uploaded log files when remotely testing different
>> FW files.
> Not a valid argument. You can print the ethtool output from initramfs
> if necessary for triage.
>
> I still stand by the fact that ethtool is the only fully reliable way
> to obtain this information, the kernel log is not.
This is more for Infiniband mode which depends more on features and
functionalities
provided in firmware and get much more frequent FW bug fixes than
typical Ethernet
devices. This is not meant to replace other ways of getting the
information, more like
an enhancement for checking log history.
This can provide valuable information when tracing through system log
history to
discover what happened with a specific HCA drv ver and fw ver
combination in the past.
Regards,
Qing
^ permalink raw reply
* Re: [PATCH net-next v4 18/20] crypto: port ChaCha20 to Zinc
From: Ard Biesheuvel @ 2018-09-14 17:38 UTC (permalink / raw)
To: Jason A. Donenfeld
Cc: Linux Kernel Mailing List, <netdev@vger.kernel.org>,
open list:HARDWARE RANDOM NUMBER GENERATOR CORE, David S. Miller,
Greg Kroah-Hartman, Samuel Neves, Andy Lutomirski,
Jean-Philippe Aumasson, Eric Biggers
In-Reply-To: <20180914162240.7925-19-Jason@zx2c4.com>
On 14 September 2018 at 18:22, Jason A. Donenfeld <Jason@zx2c4.com> wrote:
> Now that ChaCha20 is in Zinc, we can have the crypto API code simply
> call into it. The crypto API expects to have a stored key per instance
> and independent nonces, so we follow suite and store the key and
> initialize the nonce independently.
>
>From our exchange re v3:
>> Then there is the performance claim. We know for instance that the
>> OpenSSL ARM NEON code for ChaCha20 is faster on cores that happen to
>> possess a micro-architectural property that ALU instructions are
>> essentially free when they are interleaved with SIMD instructions. But
>> we also know that a) Cortex-A7, which is a relevant target, is not one
>> of those cores, and b) that chip designers are not likely to optimize
>> for that particular usage pattern so relying on it in generic code is
>> unwise in general.
>
> That's interesting. I'll bring this up with AndyP. FWIW, if you think
> you have a real and compelling claim here, I'd be much more likely to
> accept a different ChaCha20 implementation than I would be to accept a
> different Poly1305 implementation. (It's a *lot* harder to screw up
> ChaCha20 than it is to screw up Poly1305.)
>
so could we please bring that discussion to a close before we drop the ARM code?
I am fine with dropping the arm64 code btw.
> Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
> Cc: Samuel Neves <sneves@dei.uc.pt>
> Cc: Andy Lutomirski <luto@kernel.org>
> Cc: Greg KH <gregkh@linuxfoundation.org>
> Cc: Jean-Philippe Aumasson <jeanphilippe.aumasson@gmail.com>
> Cc: Eric Biggers <ebiggers@google.com>
> ---
> arch/arm/configs/exynos_defconfig | 1 -
> arch/arm/configs/multi_v7_defconfig | 1 -
> arch/arm/configs/omap2plus_defconfig | 1 -
> arch/arm/crypto/Kconfig | 6 -
> arch/arm/crypto/Makefile | 2 -
> arch/arm/crypto/chacha20-neon-core.S | 521 --------------------
> arch/arm/crypto/chacha20-neon-glue.c | 127 -----
> arch/arm64/configs/defconfig | 1 -
> arch/arm64/crypto/Kconfig | 6 -
> arch/arm64/crypto/Makefile | 3 -
> arch/arm64/crypto/chacha20-neon-core.S | 450 -----------------
> arch/arm64/crypto/chacha20-neon-glue.c | 133 -----
> arch/x86/crypto/Makefile | 3 -
> arch/x86/crypto/chacha20-avx2-x86_64.S | 448 -----------------
> arch/x86/crypto/chacha20-ssse3-x86_64.S | 630 ------------------------
> arch/x86/crypto/chacha20_glue.c | 146 ------
> crypto/Kconfig | 16 -
> crypto/Makefile | 2 +-
> crypto/chacha20_generic.c | 136 -----
> crypto/chacha20_zinc.c | 100 ++++
> crypto/chacha20poly1305.c | 2 +-
> include/crypto/chacha20.h | 12 -
> 22 files changed, 102 insertions(+), 2645 deletions(-)
> delete mode 100644 arch/arm/crypto/chacha20-neon-core.S
> delete mode 100644 arch/arm/crypto/chacha20-neon-glue.c
> delete mode 100644 arch/arm64/crypto/chacha20-neon-core.S
> delete mode 100644 arch/arm64/crypto/chacha20-neon-glue.c
> delete mode 100644 arch/x86/crypto/chacha20-avx2-x86_64.S
> delete mode 100644 arch/x86/crypto/chacha20-ssse3-x86_64.S
> delete mode 100644 arch/x86/crypto/chacha20_glue.c
> delete mode 100644 crypto/chacha20_generic.c
> create mode 100644 crypto/chacha20_zinc.c
>
> diff --git a/arch/arm/configs/exynos_defconfig b/arch/arm/configs/exynos_defconfig
> index 27ea6dfcf2f2..95929b5e7b10 100644
> --- a/arch/arm/configs/exynos_defconfig
> +++ b/arch/arm/configs/exynos_defconfig
> @@ -350,7 +350,6 @@ CONFIG_CRYPTO_SHA1_ARM_NEON=m
> CONFIG_CRYPTO_SHA256_ARM=m
> CONFIG_CRYPTO_SHA512_ARM=m
> CONFIG_CRYPTO_AES_ARM_BS=m
> -CONFIG_CRYPTO_CHACHA20_NEON=m
> CONFIG_CRC_CCITT=y
> CONFIG_FONTS=y
> CONFIG_FONT_7x14=y
> diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig
> index fc33444e94f0..63be07724db3 100644
> --- a/arch/arm/configs/multi_v7_defconfig
> +++ b/arch/arm/configs/multi_v7_defconfig
> @@ -1000,4 +1000,3 @@ CONFIG_CRYPTO_AES_ARM_BS=m
> CONFIG_CRYPTO_AES_ARM_CE=m
> CONFIG_CRYPTO_GHASH_ARM_CE=m
> CONFIG_CRYPTO_CRC32_ARM_CE=m
> -CONFIG_CRYPTO_CHACHA20_NEON=m
> diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig
> index 6491419b1dad..f585a8ecc336 100644
> --- a/arch/arm/configs/omap2plus_defconfig
> +++ b/arch/arm/configs/omap2plus_defconfig
> @@ -547,7 +547,6 @@ CONFIG_CRYPTO_SHA512_ARM=m
> CONFIG_CRYPTO_AES_ARM=m
> CONFIG_CRYPTO_AES_ARM_BS=m
> CONFIG_CRYPTO_GHASH_ARM_CE=m
> -CONFIG_CRYPTO_CHACHA20_NEON=m
> CONFIG_CRC_CCITT=y
> CONFIG_CRC_T10DIF=y
> CONFIG_CRC_ITU_T=y
> diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig
> index 925d1364727a..fb80fd89f0e7 100644
> --- a/arch/arm/crypto/Kconfig
> +++ b/arch/arm/crypto/Kconfig
> @@ -115,12 +115,6 @@ config CRYPTO_CRC32_ARM_CE
> depends on KERNEL_MODE_NEON && CRC32
> select CRYPTO_HASH
>
> -config CRYPTO_CHACHA20_NEON
> - tristate "NEON accelerated ChaCha20 symmetric cipher"
> - depends on KERNEL_MODE_NEON
> - select CRYPTO_BLKCIPHER
> - select CRYPTO_CHACHA20
> -
> config CRYPTO_SPECK_NEON
> tristate "NEON accelerated Speck cipher algorithms"
> depends on KERNEL_MODE_NEON
> diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile
> index 8de542c48ade..bbfa98447063 100644
> --- a/arch/arm/crypto/Makefile
> +++ b/arch/arm/crypto/Makefile
> @@ -9,7 +9,6 @@ obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o
> obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o
> obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o
> obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o
> -obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha20-neon.o
> obj-$(CONFIG_CRYPTO_SPECK_NEON) += speck-neon.o
>
> ce-obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o
> @@ -53,7 +52,6 @@ aes-arm-ce-y := aes-ce-core.o aes-ce-glue.o
> ghash-arm-ce-y := ghash-ce-core.o ghash-ce-glue.o
> crct10dif-arm-ce-y := crct10dif-ce-core.o crct10dif-ce-glue.o
> crc32-arm-ce-y:= crc32-ce-core.o crc32-ce-glue.o
> -chacha20-neon-y := chacha20-neon-core.o chacha20-neon-glue.o
> speck-neon-y := speck-neon-core.o speck-neon-glue.o
>
> ifdef REGENERATE_ARM_CRYPTO
> diff --git a/arch/arm/crypto/chacha20-neon-core.S b/arch/arm/crypto/chacha20-neon-core.S
> deleted file mode 100644
> index 451a849ad518..000000000000
> --- a/arch/arm/crypto/chacha20-neon-core.S
> +++ /dev/null
> @@ -1,521 +0,0 @@
> -/*
> - * ChaCha20 256-bit cipher algorithm, RFC7539, ARM NEON functions
> - *
> - * Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org>
> - *
> - * This program is free software; you can redistribute it and/or modify
> - * it under the terms of the GNU General Public License version 2 as
> - * published by the Free Software Foundation.
> - *
> - * Based on:
> - * ChaCha20 256-bit cipher algorithm, RFC7539, x64 SSE3 functions
> - *
> - * Copyright (C) 2015 Martin Willi
> - *
> - * This program is free software; you can redistribute it and/or modify
> - * it under the terms of the GNU General Public License as published by
> - * the Free Software Foundation; either version 2 of the License, or
> - * (at your option) any later version.
> - */
> -
> -#include <linux/linkage.h>
> -
> - .text
> - .fpu neon
> - .align 5
> -
> -ENTRY(chacha20_block_xor_neon)
> - // r0: Input state matrix, s
> - // r1: 1 data block output, o
> - // r2: 1 data block input, i
> -
> - //
> - // This function encrypts one ChaCha20 block by loading the state matrix
> - // in four NEON registers. It performs matrix operation on four words in
> - // parallel, but requireds shuffling to rearrange the words after each
> - // round.
> - //
> -
> - // x0..3 = s0..3
> - add ip, r0, #0x20
> - vld1.32 {q0-q1}, [r0]
> - vld1.32 {q2-q3}, [ip]
> -
> - vmov q8, q0
> - vmov q9, q1
> - vmov q10, q2
> - vmov q11, q3
> -
> - mov r3, #10
> -
> -.Ldoubleround:
> - // x0 += x1, x3 = rotl32(x3 ^ x0, 16)
> - vadd.i32 q0, q0, q1
> - veor q3, q3, q0
> - vrev32.16 q3, q3
> -
> - // x2 += x3, x1 = rotl32(x1 ^ x2, 12)
> - vadd.i32 q2, q2, q3
> - veor q4, q1, q2
> - vshl.u32 q1, q4, #12
> - vsri.u32 q1, q4, #20
> -
> - // x0 += x1, x3 = rotl32(x3 ^ x0, 8)
> - vadd.i32 q0, q0, q1
> - veor q4, q3, q0
> - vshl.u32 q3, q4, #8
> - vsri.u32 q3, q4, #24
> -
> - // x2 += x3, x1 = rotl32(x1 ^ x2, 7)
> - vadd.i32 q2, q2, q3
> - veor q4, q1, q2
> - vshl.u32 q1, q4, #7
> - vsri.u32 q1, q4, #25
> -
> - // x1 = shuffle32(x1, MASK(0, 3, 2, 1))
> - vext.8 q1, q1, q1, #4
> - // x2 = shuffle32(x2, MASK(1, 0, 3, 2))
> - vext.8 q2, q2, q2, #8
> - // x3 = shuffle32(x3, MASK(2, 1, 0, 3))
> - vext.8 q3, q3, q3, #12
> -
> - // x0 += x1, x3 = rotl32(x3 ^ x0, 16)
> - vadd.i32 q0, q0, q1
> - veor q3, q3, q0
> - vrev32.16 q3, q3
> -
> - // x2 += x3, x1 = rotl32(x1 ^ x2, 12)
> - vadd.i32 q2, q2, q3
> - veor q4, q1, q2
> - vshl.u32 q1, q4, #12
> - vsri.u32 q1, q4, #20
> -
> - // x0 += x1, x3 = rotl32(x3 ^ x0, 8)
> - vadd.i32 q0, q0, q1
> - veor q4, q3, q0
> - vshl.u32 q3, q4, #8
> - vsri.u32 q3, q4, #24
> -
> - // x2 += x3, x1 = rotl32(x1 ^ x2, 7)
> - vadd.i32 q2, q2, q3
> - veor q4, q1, q2
> - vshl.u32 q1, q4, #7
> - vsri.u32 q1, q4, #25
> -
> - // x1 = shuffle32(x1, MASK(2, 1, 0, 3))
> - vext.8 q1, q1, q1, #12
> - // x2 = shuffle32(x2, MASK(1, 0, 3, 2))
> - vext.8 q2, q2, q2, #8
> - // x3 = shuffle32(x3, MASK(0, 3, 2, 1))
> - vext.8 q3, q3, q3, #4
> -
> - subs r3, r3, #1
> - bne .Ldoubleround
> -
> - add ip, r2, #0x20
> - vld1.8 {q4-q5}, [r2]
> - vld1.8 {q6-q7}, [ip]
> -
> - // o0 = i0 ^ (x0 + s0)
> - vadd.i32 q0, q0, q8
> - veor q0, q0, q4
> -
> - // o1 = i1 ^ (x1 + s1)
> - vadd.i32 q1, q1, q9
> - veor q1, q1, q5
> -
> - // o2 = i2 ^ (x2 + s2)
> - vadd.i32 q2, q2, q10
> - veor q2, q2, q6
> -
> - // o3 = i3 ^ (x3 + s3)
> - vadd.i32 q3, q3, q11
> - veor q3, q3, q7
> -
> - add ip, r1, #0x20
> - vst1.8 {q0-q1}, [r1]
> - vst1.8 {q2-q3}, [ip]
> -
> - bx lr
> -ENDPROC(chacha20_block_xor_neon)
> -
> - .align 5
> -ENTRY(chacha20_4block_xor_neon)
> - push {r4-r6, lr}
> - mov ip, sp // preserve the stack pointer
> - sub r3, sp, #0x20 // allocate a 32 byte buffer
> - bic r3, r3, #0x1f // aligned to 32 bytes
> - mov sp, r3
> -
> - // r0: Input state matrix, s
> - // r1: 4 data blocks output, o
> - // r2: 4 data blocks input, i
> -
> - //
> - // This function encrypts four consecutive ChaCha20 blocks by loading
> - // the state matrix in NEON registers four times. The algorithm performs
> - // each operation on the corresponding word of each state matrix, hence
> - // requires no word shuffling. For final XORing step we transpose the
> - // matrix by interleaving 32- and then 64-bit words, which allows us to
> - // do XOR in NEON registers.
> - //
> -
> - // x0..15[0-3] = s0..3[0..3]
> - add r3, r0, #0x20
> - vld1.32 {q0-q1}, [r0]
> - vld1.32 {q2-q3}, [r3]
> -
> - adr r3, CTRINC
> - vdup.32 q15, d7[1]
> - vdup.32 q14, d7[0]
> - vld1.32 {q11}, [r3, :128]
> - vdup.32 q13, d6[1]
> - vdup.32 q12, d6[0]
> - vadd.i32 q12, q12, q11 // x12 += counter values 0-3
> - vdup.32 q11, d5[1]
> - vdup.32 q10, d5[0]
> - vdup.32 q9, d4[1]
> - vdup.32 q8, d4[0]
> - vdup.32 q7, d3[1]
> - vdup.32 q6, d3[0]
> - vdup.32 q5, d2[1]
> - vdup.32 q4, d2[0]
> - vdup.32 q3, d1[1]
> - vdup.32 q2, d1[0]
> - vdup.32 q1, d0[1]
> - vdup.32 q0, d0[0]
> -
> - mov r3, #10
> -
> -.Ldoubleround4:
> - // x0 += x4, x12 = rotl32(x12 ^ x0, 16)
> - // x1 += x5, x13 = rotl32(x13 ^ x1, 16)
> - // x2 += x6, x14 = rotl32(x14 ^ x2, 16)
> - // x3 += x7, x15 = rotl32(x15 ^ x3, 16)
> - vadd.i32 q0, q0, q4
> - vadd.i32 q1, q1, q5
> - vadd.i32 q2, q2, q6
> - vadd.i32 q3, q3, q7
> -
> - veor q12, q12, q0
> - veor q13, q13, q1
> - veor q14, q14, q2
> - veor q15, q15, q3
> -
> - vrev32.16 q12, q12
> - vrev32.16 q13, q13
> - vrev32.16 q14, q14
> - vrev32.16 q15, q15
> -
> - // x8 += x12, x4 = rotl32(x4 ^ x8, 12)
> - // x9 += x13, x5 = rotl32(x5 ^ x9, 12)
> - // x10 += x14, x6 = rotl32(x6 ^ x10, 12)
> - // x11 += x15, x7 = rotl32(x7 ^ x11, 12)
> - vadd.i32 q8, q8, q12
> - vadd.i32 q9, q9, q13
> - vadd.i32 q10, q10, q14
> - vadd.i32 q11, q11, q15
> -
> - vst1.32 {q8-q9}, [sp, :256]
> -
> - veor q8, q4, q8
> - veor q9, q5, q9
> - vshl.u32 q4, q8, #12
> - vshl.u32 q5, q9, #12
> - vsri.u32 q4, q8, #20
> - vsri.u32 q5, q9, #20
> -
> - veor q8, q6, q10
> - veor q9, q7, q11
> - vshl.u32 q6, q8, #12
> - vshl.u32 q7, q9, #12
> - vsri.u32 q6, q8, #20
> - vsri.u32 q7, q9, #20
> -
> - // x0 += x4, x12 = rotl32(x12 ^ x0, 8)
> - // x1 += x5, x13 = rotl32(x13 ^ x1, 8)
> - // x2 += x6, x14 = rotl32(x14 ^ x2, 8)
> - // x3 += x7, x15 = rotl32(x15 ^ x3, 8)
> - vadd.i32 q0, q0, q4
> - vadd.i32 q1, q1, q5
> - vadd.i32 q2, q2, q6
> - vadd.i32 q3, q3, q7
> -
> - veor q8, q12, q0
> - veor q9, q13, q1
> - vshl.u32 q12, q8, #8
> - vshl.u32 q13, q9, #8
> - vsri.u32 q12, q8, #24
> - vsri.u32 q13, q9, #24
> -
> - veor q8, q14, q2
> - veor q9, q15, q3
> - vshl.u32 q14, q8, #8
> - vshl.u32 q15, q9, #8
> - vsri.u32 q14, q8, #24
> - vsri.u32 q15, q9, #24
> -
> - vld1.32 {q8-q9}, [sp, :256]
> -
> - // x8 += x12, x4 = rotl32(x4 ^ x8, 7)
> - // x9 += x13, x5 = rotl32(x5 ^ x9, 7)
> - // x10 += x14, x6 = rotl32(x6 ^ x10, 7)
> - // x11 += x15, x7 = rotl32(x7 ^ x11, 7)
> - vadd.i32 q8, q8, q12
> - vadd.i32 q9, q9, q13
> - vadd.i32 q10, q10, q14
> - vadd.i32 q11, q11, q15
> -
> - vst1.32 {q8-q9}, [sp, :256]
> -
> - veor q8, q4, q8
> - veor q9, q5, q9
> - vshl.u32 q4, q8, #7
> - vshl.u32 q5, q9, #7
> - vsri.u32 q4, q8, #25
> - vsri.u32 q5, q9, #25
> -
> - veor q8, q6, q10
> - veor q9, q7, q11
> - vshl.u32 q6, q8, #7
> - vshl.u32 q7, q9, #7
> - vsri.u32 q6, q8, #25
> - vsri.u32 q7, q9, #25
> -
> - vld1.32 {q8-q9}, [sp, :256]
> -
> - // x0 += x5, x15 = rotl32(x15 ^ x0, 16)
> - // x1 += x6, x12 = rotl32(x12 ^ x1, 16)
> - // x2 += x7, x13 = rotl32(x13 ^ x2, 16)
> - // x3 += x4, x14 = rotl32(x14 ^ x3, 16)
> - vadd.i32 q0, q0, q5
> - vadd.i32 q1, q1, q6
> - vadd.i32 q2, q2, q7
> - vadd.i32 q3, q3, q4
> -
> - veor q15, q15, q0
> - veor q12, q12, q1
> - veor q13, q13, q2
> - veor q14, q14, q3
> -
> - vrev32.16 q15, q15
> - vrev32.16 q12, q12
> - vrev32.16 q13, q13
> - vrev32.16 q14, q14
> -
> - // x10 += x15, x5 = rotl32(x5 ^ x10, 12)
> - // x11 += x12, x6 = rotl32(x6 ^ x11, 12)
> - // x8 += x13, x7 = rotl32(x7 ^ x8, 12)
> - // x9 += x14, x4 = rotl32(x4 ^ x9, 12)
> - vadd.i32 q10, q10, q15
> - vadd.i32 q11, q11, q12
> - vadd.i32 q8, q8, q13
> - vadd.i32 q9, q9, q14
> -
> - vst1.32 {q8-q9}, [sp, :256]
> -
> - veor q8, q7, q8
> - veor q9, q4, q9
> - vshl.u32 q7, q8, #12
> - vshl.u32 q4, q9, #12
> - vsri.u32 q7, q8, #20
> - vsri.u32 q4, q9, #20
> -
> - veor q8, q5, q10
> - veor q9, q6, q11
> - vshl.u32 q5, q8, #12
> - vshl.u32 q6, q9, #12
> - vsri.u32 q5, q8, #20
> - vsri.u32 q6, q9, #20
> -
> - // x0 += x5, x15 = rotl32(x15 ^ x0, 8)
> - // x1 += x6, x12 = rotl32(x12 ^ x1, 8)
> - // x2 += x7, x13 = rotl32(x13 ^ x2, 8)
> - // x3 += x4, x14 = rotl32(x14 ^ x3, 8)
> - vadd.i32 q0, q0, q5
> - vadd.i32 q1, q1, q6
> - vadd.i32 q2, q2, q7
> - vadd.i32 q3, q3, q4
> -
> - veor q8, q15, q0
> - veor q9, q12, q1
> - vshl.u32 q15, q8, #8
> - vshl.u32 q12, q9, #8
> - vsri.u32 q15, q8, #24
> - vsri.u32 q12, q9, #24
> -
> - veor q8, q13, q2
> - veor q9, q14, q3
> - vshl.u32 q13, q8, #8
> - vshl.u32 q14, q9, #8
> - vsri.u32 q13, q8, #24
> - vsri.u32 q14, q9, #24
> -
> - vld1.32 {q8-q9}, [sp, :256]
> -
> - // x10 += x15, x5 = rotl32(x5 ^ x10, 7)
> - // x11 += x12, x6 = rotl32(x6 ^ x11, 7)
> - // x8 += x13, x7 = rotl32(x7 ^ x8, 7)
> - // x9 += x14, x4 = rotl32(x4 ^ x9, 7)
> - vadd.i32 q10, q10, q15
> - vadd.i32 q11, q11, q12
> - vadd.i32 q8, q8, q13
> - vadd.i32 q9, q9, q14
> -
> - vst1.32 {q8-q9}, [sp, :256]
> -
> - veor q8, q7, q8
> - veor q9, q4, q9
> - vshl.u32 q7, q8, #7
> - vshl.u32 q4, q9, #7
> - vsri.u32 q7, q8, #25
> - vsri.u32 q4, q9, #25
> -
> - veor q8, q5, q10
> - veor q9, q6, q11
> - vshl.u32 q5, q8, #7
> - vshl.u32 q6, q9, #7
> - vsri.u32 q5, q8, #25
> - vsri.u32 q6, q9, #25
> -
> - subs r3, r3, #1
> - beq 0f
> -
> - vld1.32 {q8-q9}, [sp, :256]
> - b .Ldoubleround4
> -
> - // x0[0-3] += s0[0]
> - // x1[0-3] += s0[1]
> - // x2[0-3] += s0[2]
> - // x3[0-3] += s0[3]
> -0: ldmia r0!, {r3-r6}
> - vdup.32 q8, r3
> - vdup.32 q9, r4
> - vadd.i32 q0, q0, q8
> - vadd.i32 q1, q1, q9
> - vdup.32 q8, r5
> - vdup.32 q9, r6
> - vadd.i32 q2, q2, q8
> - vadd.i32 q3, q3, q9
> -
> - // x4[0-3] += s1[0]
> - // x5[0-3] += s1[1]
> - // x6[0-3] += s1[2]
> - // x7[0-3] += s1[3]
> - ldmia r0!, {r3-r6}
> - vdup.32 q8, r3
> - vdup.32 q9, r4
> - vadd.i32 q4, q4, q8
> - vadd.i32 q5, q5, q9
> - vdup.32 q8, r5
> - vdup.32 q9, r6
> - vadd.i32 q6, q6, q8
> - vadd.i32 q7, q7, q9
> -
> - // interleave 32-bit words in state n, n+1
> - vzip.32 q0, q1
> - vzip.32 q2, q3
> - vzip.32 q4, q5
> - vzip.32 q6, q7
> -
> - // interleave 64-bit words in state n, n+2
> - vswp d1, d4
> - vswp d3, d6
> - vswp d9, d12
> - vswp d11, d14
> -
> - // xor with corresponding input, write to output
> - vld1.8 {q8-q9}, [r2]!
> - veor q8, q8, q0
> - veor q9, q9, q4
> - vst1.8 {q8-q9}, [r1]!
> -
> - vld1.32 {q8-q9}, [sp, :256]
> -
> - // x8[0-3] += s2[0]
> - // x9[0-3] += s2[1]
> - // x10[0-3] += s2[2]
> - // x11[0-3] += s2[3]
> - ldmia r0!, {r3-r6}
> - vdup.32 q0, r3
> - vdup.32 q4, r4
> - vadd.i32 q8, q8, q0
> - vadd.i32 q9, q9, q4
> - vdup.32 q0, r5
> - vdup.32 q4, r6
> - vadd.i32 q10, q10, q0
> - vadd.i32 q11, q11, q4
> -
> - // x12[0-3] += s3[0]
> - // x13[0-3] += s3[1]
> - // x14[0-3] += s3[2]
> - // x15[0-3] += s3[3]
> - ldmia r0!, {r3-r6}
> - vdup.32 q0, r3
> - vdup.32 q4, r4
> - adr r3, CTRINC
> - vadd.i32 q12, q12, q0
> - vld1.32 {q0}, [r3, :128]
> - vadd.i32 q13, q13, q4
> - vadd.i32 q12, q12, q0 // x12 += counter values 0-3
> -
> - vdup.32 q0, r5
> - vdup.32 q4, r6
> - vadd.i32 q14, q14, q0
> - vadd.i32 q15, q15, q4
> -
> - // interleave 32-bit words in state n, n+1
> - vzip.32 q8, q9
> - vzip.32 q10, q11
> - vzip.32 q12, q13
> - vzip.32 q14, q15
> -
> - // interleave 64-bit words in state n, n+2
> - vswp d17, d20
> - vswp d19, d22
> - vswp d25, d28
> - vswp d27, d30
> -
> - vmov q4, q1
> -
> - vld1.8 {q0-q1}, [r2]!
> - veor q0, q0, q8
> - veor q1, q1, q12
> - vst1.8 {q0-q1}, [r1]!
> -
> - vld1.8 {q0-q1}, [r2]!
> - veor q0, q0, q2
> - veor q1, q1, q6
> - vst1.8 {q0-q1}, [r1]!
> -
> - vld1.8 {q0-q1}, [r2]!
> - veor q0, q0, q10
> - veor q1, q1, q14
> - vst1.8 {q0-q1}, [r1]!
> -
> - vld1.8 {q0-q1}, [r2]!
> - veor q0, q0, q4
> - veor q1, q1, q5
> - vst1.8 {q0-q1}, [r1]!
> -
> - vld1.8 {q0-q1}, [r2]!
> - veor q0, q0, q9
> - veor q1, q1, q13
> - vst1.8 {q0-q1}, [r1]!
> -
> - vld1.8 {q0-q1}, [r2]!
> - veor q0, q0, q3
> - veor q1, q1, q7
> - vst1.8 {q0-q1}, [r1]!
> -
> - vld1.8 {q0-q1}, [r2]
> - veor q0, q0, q11
> - veor q1, q1, q15
> - vst1.8 {q0-q1}, [r1]
> -
> - mov sp, ip
> - pop {r4-r6, pc}
> -ENDPROC(chacha20_4block_xor_neon)
> -
> - .align 4
> -CTRINC: .word 0, 1, 2, 3
> diff --git a/arch/arm/crypto/chacha20-neon-glue.c b/arch/arm/crypto/chacha20-neon-glue.c
> deleted file mode 100644
> index 59a7be08e80c..000000000000
> --- a/arch/arm/crypto/chacha20-neon-glue.c
> +++ /dev/null
> @@ -1,127 +0,0 @@
> -/*
> - * ChaCha20 256-bit cipher algorithm, RFC7539, ARM NEON functions
> - *
> - * Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org>
> - *
> - * This program is free software; you can redistribute it and/or modify
> - * it under the terms of the GNU General Public License version 2 as
> - * published by the Free Software Foundation.
> - *
> - * Based on:
> - * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code
> - *
> - * Copyright (C) 2015 Martin Willi
> - *
> - * This program is free software; you can redistribute it and/or modify
> - * it under the terms of the GNU General Public License as published by
> - * the Free Software Foundation; either version 2 of the License, or
> - * (at your option) any later version.
> - */
> -
> -#include <crypto/algapi.h>
> -#include <crypto/chacha20.h>
> -#include <crypto/internal/skcipher.h>
> -#include <linux/kernel.h>
> -#include <linux/module.h>
> -
> -#include <asm/hwcap.h>
> -#include <asm/neon.h>
> -#include <asm/simd.h>
> -
> -asmlinkage void chacha20_block_xor_neon(u32 *state, u8 *dst, const u8 *src);
> -asmlinkage void chacha20_4block_xor_neon(u32 *state, u8 *dst, const u8 *src);
> -
> -static void chacha20_doneon(u32 *state, u8 *dst, const u8 *src,
> - unsigned int bytes)
> -{
> - u8 buf[CHACHA20_BLOCK_SIZE];
> -
> - while (bytes >= CHACHA20_BLOCK_SIZE * 4) {
> - chacha20_4block_xor_neon(state, dst, src);
> - bytes -= CHACHA20_BLOCK_SIZE * 4;
> - src += CHACHA20_BLOCK_SIZE * 4;
> - dst += CHACHA20_BLOCK_SIZE * 4;
> - state[12] += 4;
> - }
> - while (bytes >= CHACHA20_BLOCK_SIZE) {
> - chacha20_block_xor_neon(state, dst, src);
> - bytes -= CHACHA20_BLOCK_SIZE;
> - src += CHACHA20_BLOCK_SIZE;
> - dst += CHACHA20_BLOCK_SIZE;
> - state[12]++;
> - }
> - if (bytes) {
> - memcpy(buf, src, bytes);
> - chacha20_block_xor_neon(state, buf, buf);
> - memcpy(dst, buf, bytes);
> - }
> -}
> -
> -static int chacha20_neon(struct skcipher_request *req)
> -{
> - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
> - struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm);
> - struct skcipher_walk walk;
> - u32 state[16];
> - int err;
> -
> - if (req->cryptlen <= CHACHA20_BLOCK_SIZE || !may_use_simd())
> - return crypto_chacha20_crypt(req);
> -
> - err = skcipher_walk_virt(&walk, req, true);
> -
> - crypto_chacha20_init(state, ctx, walk.iv);
> -
> - kernel_neon_begin();
> - while (walk.nbytes > 0) {
> - unsigned int nbytes = walk.nbytes;
> -
> - if (nbytes < walk.total)
> - nbytes = round_down(nbytes, walk.stride);
> -
> - chacha20_doneon(state, walk.dst.virt.addr, walk.src.virt.addr,
> - nbytes);
> - err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
> - }
> - kernel_neon_end();
> -
> - return err;
> -}
> -
> -static struct skcipher_alg alg = {
> - .base.cra_name = "chacha20",
> - .base.cra_driver_name = "chacha20-neon",
> - .base.cra_priority = 300,
> - .base.cra_blocksize = 1,
> - .base.cra_ctxsize = sizeof(struct chacha20_ctx),
> - .base.cra_module = THIS_MODULE,
> -
> - .min_keysize = CHACHA20_KEY_SIZE,
> - .max_keysize = CHACHA20_KEY_SIZE,
> - .ivsize = CHACHA20_IV_SIZE,
> - .chunksize = CHACHA20_BLOCK_SIZE,
> - .walksize = 4 * CHACHA20_BLOCK_SIZE,
> - .setkey = crypto_chacha20_setkey,
> - .encrypt = chacha20_neon,
> - .decrypt = chacha20_neon,
> -};
> -
> -static int __init chacha20_simd_mod_init(void)
> -{
> - if (!(elf_hwcap & HWCAP_NEON))
> - return -ENODEV;
> -
> - return crypto_register_skcipher(&alg);
> -}
> -
> -static void __exit chacha20_simd_mod_fini(void)
> -{
> - crypto_unregister_skcipher(&alg);
> -}
> -
> -module_init(chacha20_simd_mod_init);
> -module_exit(chacha20_simd_mod_fini);
> -
> -MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
> -MODULE_LICENSE("GPL v2");
> -MODULE_ALIAS_CRYPTO("chacha20");
> diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
> index db8d364f8476..6cc3c8a0ad88 100644
> --- a/arch/arm64/configs/defconfig
> +++ b/arch/arm64/configs/defconfig
> @@ -709,5 +709,4 @@ CONFIG_CRYPTO_CRCT10DIF_ARM64_CE=m
> CONFIG_CRYPTO_CRC32_ARM64_CE=m
> CONFIG_CRYPTO_AES_ARM64_CE_CCM=y
> CONFIG_CRYPTO_AES_ARM64_CE_BLK=y
> -CONFIG_CRYPTO_CHACHA20_NEON=m
> CONFIG_CRYPTO_AES_ARM64_BS=m
> diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
> index e3fdb0fd6f70..9db6d775a880 100644
> --- a/arch/arm64/crypto/Kconfig
> +++ b/arch/arm64/crypto/Kconfig
> @@ -105,12 +105,6 @@ config CRYPTO_AES_ARM64_NEON_BLK
> select CRYPTO_AES
> select CRYPTO_SIMD
>
> -config CRYPTO_CHACHA20_NEON
> - tristate "NEON accelerated ChaCha20 symmetric cipher"
> - depends on KERNEL_MODE_NEON
> - select CRYPTO_BLKCIPHER
> - select CRYPTO_CHACHA20
> -
> config CRYPTO_AES_ARM64_BS
> tristate "AES in ECB/CBC/CTR/XTS modes using bit-sliced NEON algorithm"
> depends on KERNEL_MODE_NEON
> diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile
> index bcafd016618e..507c4bfb86e3 100644
> --- a/arch/arm64/crypto/Makefile
> +++ b/arch/arm64/crypto/Makefile
> @@ -53,9 +53,6 @@ sha256-arm64-y := sha256-glue.o sha256-core.o
> obj-$(CONFIG_CRYPTO_SHA512_ARM64) += sha512-arm64.o
> sha512-arm64-y := sha512-glue.o sha512-core.o
>
> -obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha20-neon.o
> -chacha20-neon-y := chacha20-neon-core.o chacha20-neon-glue.o
> -
> obj-$(CONFIG_CRYPTO_SPECK_NEON) += speck-neon.o
> speck-neon-y := speck-neon-core.o speck-neon-glue.o
>
> diff --git a/arch/arm64/crypto/chacha20-neon-core.S b/arch/arm64/crypto/chacha20-neon-core.S
> deleted file mode 100644
> index 13c85e272c2a..000000000000
> --- a/arch/arm64/crypto/chacha20-neon-core.S
> +++ /dev/null
> @@ -1,450 +0,0 @@
> -/*
> - * ChaCha20 256-bit cipher algorithm, RFC7539, arm64 NEON functions
> - *
> - * Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org>
> - *
> - * This program is free software; you can redistribute it and/or modify
> - * it under the terms of the GNU General Public License version 2 as
> - * published by the Free Software Foundation.
> - *
> - * Based on:
> - * ChaCha20 256-bit cipher algorithm, RFC7539, x64 SSSE3 functions
> - *
> - * Copyright (C) 2015 Martin Willi
> - *
> - * This program is free software; you can redistribute it and/or modify
> - * it under the terms of the GNU General Public License as published by
> - * the Free Software Foundation; either version 2 of the License, or
> - * (at your option) any later version.
> - */
> -
> -#include <linux/linkage.h>
> -
> - .text
> - .align 6
> -
> -ENTRY(chacha20_block_xor_neon)
> - // x0: Input state matrix, s
> - // x1: 1 data block output, o
> - // x2: 1 data block input, i
> -
> - //
> - // This function encrypts one ChaCha20 block by loading the state matrix
> - // in four NEON registers. It performs matrix operation on four words in
> - // parallel, but requires shuffling to rearrange the words after each
> - // round.
> - //
> -
> - // x0..3 = s0..3
> - adr x3, ROT8
> - ld1 {v0.4s-v3.4s}, [x0]
> - ld1 {v8.4s-v11.4s}, [x0]
> - ld1 {v12.4s}, [x3]
> -
> - mov x3, #10
> -
> -.Ldoubleround:
> - // x0 += x1, x3 = rotl32(x3 ^ x0, 16)
> - add v0.4s, v0.4s, v1.4s
> - eor v3.16b, v3.16b, v0.16b
> - rev32 v3.8h, v3.8h
> -
> - // x2 += x3, x1 = rotl32(x1 ^ x2, 12)
> - add v2.4s, v2.4s, v3.4s
> - eor v4.16b, v1.16b, v2.16b
> - shl v1.4s, v4.4s, #12
> - sri v1.4s, v4.4s, #20
> -
> - // x0 += x1, x3 = rotl32(x3 ^ x0, 8)
> - add v0.4s, v0.4s, v1.4s
> - eor v3.16b, v3.16b, v0.16b
> - tbl v3.16b, {v3.16b}, v12.16b
> -
> - // x2 += x3, x1 = rotl32(x1 ^ x2, 7)
> - add v2.4s, v2.4s, v3.4s
> - eor v4.16b, v1.16b, v2.16b
> - shl v1.4s, v4.4s, #7
> - sri v1.4s, v4.4s, #25
> -
> - // x1 = shuffle32(x1, MASK(0, 3, 2, 1))
> - ext v1.16b, v1.16b, v1.16b, #4
> - // x2 = shuffle32(x2, MASK(1, 0, 3, 2))
> - ext v2.16b, v2.16b, v2.16b, #8
> - // x3 = shuffle32(x3, MASK(2, 1, 0, 3))
> - ext v3.16b, v3.16b, v3.16b, #12
> -
> - // x0 += x1, x3 = rotl32(x3 ^ x0, 16)
> - add v0.4s, v0.4s, v1.4s
> - eor v3.16b, v3.16b, v0.16b
> - rev32 v3.8h, v3.8h
> -
> - // x2 += x3, x1 = rotl32(x1 ^ x2, 12)
> - add v2.4s, v2.4s, v3.4s
> - eor v4.16b, v1.16b, v2.16b
> - shl v1.4s, v4.4s, #12
> - sri v1.4s, v4.4s, #20
> -
> - // x0 += x1, x3 = rotl32(x3 ^ x0, 8)
> - add v0.4s, v0.4s, v1.4s
> - eor v3.16b, v3.16b, v0.16b
> - tbl v3.16b, {v3.16b}, v12.16b
> -
> - // x2 += x3, x1 = rotl32(x1 ^ x2, 7)
> - add v2.4s, v2.4s, v3.4s
> - eor v4.16b, v1.16b, v2.16b
> - shl v1.4s, v4.4s, #7
> - sri v1.4s, v4.4s, #25
> -
> - // x1 = shuffle32(x1, MASK(2, 1, 0, 3))
> - ext v1.16b, v1.16b, v1.16b, #12
> - // x2 = shuffle32(x2, MASK(1, 0, 3, 2))
> - ext v2.16b, v2.16b, v2.16b, #8
> - // x3 = shuffle32(x3, MASK(0, 3, 2, 1))
> - ext v3.16b, v3.16b, v3.16b, #4
> -
> - subs x3, x3, #1
> - b.ne .Ldoubleround
> -
> - ld1 {v4.16b-v7.16b}, [x2]
> -
> - // o0 = i0 ^ (x0 + s0)
> - add v0.4s, v0.4s, v8.4s
> - eor v0.16b, v0.16b, v4.16b
> -
> - // o1 = i1 ^ (x1 + s1)
> - add v1.4s, v1.4s, v9.4s
> - eor v1.16b, v1.16b, v5.16b
> -
> - // o2 = i2 ^ (x2 + s2)
> - add v2.4s, v2.4s, v10.4s
> - eor v2.16b, v2.16b, v6.16b
> -
> - // o3 = i3 ^ (x3 + s3)
> - add v3.4s, v3.4s, v11.4s
> - eor v3.16b, v3.16b, v7.16b
> -
> - st1 {v0.16b-v3.16b}, [x1]
> -
> - ret
> -ENDPROC(chacha20_block_xor_neon)
> -
> - .align 6
> -ENTRY(chacha20_4block_xor_neon)
> - // x0: Input state matrix, s
> - // x1: 4 data blocks output, o
> - // x2: 4 data blocks input, i
> -
> - //
> - // This function encrypts four consecutive ChaCha20 blocks by loading
> - // the state matrix in NEON registers four times. The algorithm performs
> - // each operation on the corresponding word of each state matrix, hence
> - // requires no word shuffling. For final XORing step we transpose the
> - // matrix by interleaving 32- and then 64-bit words, which allows us to
> - // do XOR in NEON registers.
> - //
> - adr x3, CTRINC // ... and ROT8
> - ld1 {v30.4s-v31.4s}, [x3]
> -
> - // x0..15[0-3] = s0..3[0..3]
> - mov x4, x0
> - ld4r { v0.4s- v3.4s}, [x4], #16
> - ld4r { v4.4s- v7.4s}, [x4], #16
> - ld4r { v8.4s-v11.4s}, [x4], #16
> - ld4r {v12.4s-v15.4s}, [x4]
> -
> - // x12 += counter values 0-3
> - add v12.4s, v12.4s, v30.4s
> -
> - mov x3, #10
> -
> -.Ldoubleround4:
> - // x0 += x4, x12 = rotl32(x12 ^ x0, 16)
> - // x1 += x5, x13 = rotl32(x13 ^ x1, 16)
> - // x2 += x6, x14 = rotl32(x14 ^ x2, 16)
> - // x3 += x7, x15 = rotl32(x15 ^ x3, 16)
> - add v0.4s, v0.4s, v4.4s
> - add v1.4s, v1.4s, v5.4s
> - add v2.4s, v2.4s, v6.4s
> - add v3.4s, v3.4s, v7.4s
> -
> - eor v12.16b, v12.16b, v0.16b
> - eor v13.16b, v13.16b, v1.16b
> - eor v14.16b, v14.16b, v2.16b
> - eor v15.16b, v15.16b, v3.16b
> -
> - rev32 v12.8h, v12.8h
> - rev32 v13.8h, v13.8h
> - rev32 v14.8h, v14.8h
> - rev32 v15.8h, v15.8h
> -
> - // x8 += x12, x4 = rotl32(x4 ^ x8, 12)
> - // x9 += x13, x5 = rotl32(x5 ^ x9, 12)
> - // x10 += x14, x6 = rotl32(x6 ^ x10, 12)
> - // x11 += x15, x7 = rotl32(x7 ^ x11, 12)
> - add v8.4s, v8.4s, v12.4s
> - add v9.4s, v9.4s, v13.4s
> - add v10.4s, v10.4s, v14.4s
> - add v11.4s, v11.4s, v15.4s
> -
> - eor v16.16b, v4.16b, v8.16b
> - eor v17.16b, v5.16b, v9.16b
> - eor v18.16b, v6.16b, v10.16b
> - eor v19.16b, v7.16b, v11.16b
> -
> - shl v4.4s, v16.4s, #12
> - shl v5.4s, v17.4s, #12
> - shl v6.4s, v18.4s, #12
> - shl v7.4s, v19.4s, #12
> -
> - sri v4.4s, v16.4s, #20
> - sri v5.4s, v17.4s, #20
> - sri v6.4s, v18.4s, #20
> - sri v7.4s, v19.4s, #20
> -
> - // x0 += x4, x12 = rotl32(x12 ^ x0, 8)
> - // x1 += x5, x13 = rotl32(x13 ^ x1, 8)
> - // x2 += x6, x14 = rotl32(x14 ^ x2, 8)
> - // x3 += x7, x15 = rotl32(x15 ^ x3, 8)
> - add v0.4s, v0.4s, v4.4s
> - add v1.4s, v1.4s, v5.4s
> - add v2.4s, v2.4s, v6.4s
> - add v3.4s, v3.4s, v7.4s
> -
> - eor v12.16b, v12.16b, v0.16b
> - eor v13.16b, v13.16b, v1.16b
> - eor v14.16b, v14.16b, v2.16b
> - eor v15.16b, v15.16b, v3.16b
> -
> - tbl v12.16b, {v12.16b}, v31.16b
> - tbl v13.16b, {v13.16b}, v31.16b
> - tbl v14.16b, {v14.16b}, v31.16b
> - tbl v15.16b, {v15.16b}, v31.16b
> -
> - // x8 += x12, x4 = rotl32(x4 ^ x8, 7)
> - // x9 += x13, x5 = rotl32(x5 ^ x9, 7)
> - // x10 += x14, x6 = rotl32(x6 ^ x10, 7)
> - // x11 += x15, x7 = rotl32(x7 ^ x11, 7)
> - add v8.4s, v8.4s, v12.4s
> - add v9.4s, v9.4s, v13.4s
> - add v10.4s, v10.4s, v14.4s
> - add v11.4s, v11.4s, v15.4s
> -
> - eor v16.16b, v4.16b, v8.16b
> - eor v17.16b, v5.16b, v9.16b
> - eor v18.16b, v6.16b, v10.16b
> - eor v19.16b, v7.16b, v11.16b
> -
> - shl v4.4s, v16.4s, #7
> - shl v5.4s, v17.4s, #7
> - shl v6.4s, v18.4s, #7
> - shl v7.4s, v19.4s, #7
> -
> - sri v4.4s, v16.4s, #25
> - sri v5.4s, v17.4s, #25
> - sri v6.4s, v18.4s, #25
> - sri v7.4s, v19.4s, #25
> -
> - // x0 += x5, x15 = rotl32(x15 ^ x0, 16)
> - // x1 += x6, x12 = rotl32(x12 ^ x1, 16)
> - // x2 += x7, x13 = rotl32(x13 ^ x2, 16)
> - // x3 += x4, x14 = rotl32(x14 ^ x3, 16)
> - add v0.4s, v0.4s, v5.4s
> - add v1.4s, v1.4s, v6.4s
> - add v2.4s, v2.4s, v7.4s
> - add v3.4s, v3.4s, v4.4s
> -
> - eor v15.16b, v15.16b, v0.16b
> - eor v12.16b, v12.16b, v1.16b
> - eor v13.16b, v13.16b, v2.16b
> - eor v14.16b, v14.16b, v3.16b
> -
> - rev32 v15.8h, v15.8h
> - rev32 v12.8h, v12.8h
> - rev32 v13.8h, v13.8h
> - rev32 v14.8h, v14.8h
> -
> - // x10 += x15, x5 = rotl32(x5 ^ x10, 12)
> - // x11 += x12, x6 = rotl32(x6 ^ x11, 12)
> - // x8 += x13, x7 = rotl32(x7 ^ x8, 12)
> - // x9 += x14, x4 = rotl32(x4 ^ x9, 12)
> - add v10.4s, v10.4s, v15.4s
> - add v11.4s, v11.4s, v12.4s
> - add v8.4s, v8.4s, v13.4s
> - add v9.4s, v9.4s, v14.4s
> -
> - eor v16.16b, v5.16b, v10.16b
> - eor v17.16b, v6.16b, v11.16b
> - eor v18.16b, v7.16b, v8.16b
> - eor v19.16b, v4.16b, v9.16b
> -
> - shl v5.4s, v16.4s, #12
> - shl v6.4s, v17.4s, #12
> - shl v7.4s, v18.4s, #12
> - shl v4.4s, v19.4s, #12
> -
> - sri v5.4s, v16.4s, #20
> - sri v6.4s, v17.4s, #20
> - sri v7.4s, v18.4s, #20
> - sri v4.4s, v19.4s, #20
> -
> - // x0 += x5, x15 = rotl32(x15 ^ x0, 8)
> - // x1 += x6, x12 = rotl32(x12 ^ x1, 8)
> - // x2 += x7, x13 = rotl32(x13 ^ x2, 8)
> - // x3 += x4, x14 = rotl32(x14 ^ x3, 8)
> - add v0.4s, v0.4s, v5.4s
> - add v1.4s, v1.4s, v6.4s
> - add v2.4s, v2.4s, v7.4s
> - add v3.4s, v3.4s, v4.4s
> -
> - eor v15.16b, v15.16b, v0.16b
> - eor v12.16b, v12.16b, v1.16b
> - eor v13.16b, v13.16b, v2.16b
> - eor v14.16b, v14.16b, v3.16b
> -
> - tbl v15.16b, {v15.16b}, v31.16b
> - tbl v12.16b, {v12.16b}, v31.16b
> - tbl v13.16b, {v13.16b}, v31.16b
> - tbl v14.16b, {v14.16b}, v31.16b
> -
> - // x10 += x15, x5 = rotl32(x5 ^ x10, 7)
> - // x11 += x12, x6 = rotl32(x6 ^ x11, 7)
> - // x8 += x13, x7 = rotl32(x7 ^ x8, 7)
> - // x9 += x14, x4 = rotl32(x4 ^ x9, 7)
> - add v10.4s, v10.4s, v15.4s
> - add v11.4s, v11.4s, v12.4s
> - add v8.4s, v8.4s, v13.4s
> - add v9.4s, v9.4s, v14.4s
> -
> - eor v16.16b, v5.16b, v10.16b
> - eor v17.16b, v6.16b, v11.16b
> - eor v18.16b, v7.16b, v8.16b
> - eor v19.16b, v4.16b, v9.16b
> -
> - shl v5.4s, v16.4s, #7
> - shl v6.4s, v17.4s, #7
> - shl v7.4s, v18.4s, #7
> - shl v4.4s, v19.4s, #7
> -
> - sri v5.4s, v16.4s, #25
> - sri v6.4s, v17.4s, #25
> - sri v7.4s, v18.4s, #25
> - sri v4.4s, v19.4s, #25
> -
> - subs x3, x3, #1
> - b.ne .Ldoubleround4
> -
> - ld4r {v16.4s-v19.4s}, [x0], #16
> - ld4r {v20.4s-v23.4s}, [x0], #16
> -
> - // x12 += counter values 0-3
> - add v12.4s, v12.4s, v30.4s
> -
> - // x0[0-3] += s0[0]
> - // x1[0-3] += s0[1]
> - // x2[0-3] += s0[2]
> - // x3[0-3] += s0[3]
> - add v0.4s, v0.4s, v16.4s
> - add v1.4s, v1.4s, v17.4s
> - add v2.4s, v2.4s, v18.4s
> - add v3.4s, v3.4s, v19.4s
> -
> - ld4r {v24.4s-v27.4s}, [x0], #16
> - ld4r {v28.4s-v31.4s}, [x0]
> -
> - // x4[0-3] += s1[0]
> - // x5[0-3] += s1[1]
> - // x6[0-3] += s1[2]
> - // x7[0-3] += s1[3]
> - add v4.4s, v4.4s, v20.4s
> - add v5.4s, v5.4s, v21.4s
> - add v6.4s, v6.4s, v22.4s
> - add v7.4s, v7.4s, v23.4s
> -
> - // x8[0-3] += s2[0]
> - // x9[0-3] += s2[1]
> - // x10[0-3] += s2[2]
> - // x11[0-3] += s2[3]
> - add v8.4s, v8.4s, v24.4s
> - add v9.4s, v9.4s, v25.4s
> - add v10.4s, v10.4s, v26.4s
> - add v11.4s, v11.4s, v27.4s
> -
> - // x12[0-3] += s3[0]
> - // x13[0-3] += s3[1]
> - // x14[0-3] += s3[2]
> - // x15[0-3] += s3[3]
> - add v12.4s, v12.4s, v28.4s
> - add v13.4s, v13.4s, v29.4s
> - add v14.4s, v14.4s, v30.4s
> - add v15.4s, v15.4s, v31.4s
> -
> - // interleave 32-bit words in state n, n+1
> - zip1 v16.4s, v0.4s, v1.4s
> - zip2 v17.4s, v0.4s, v1.4s
> - zip1 v18.4s, v2.4s, v3.4s
> - zip2 v19.4s, v2.4s, v3.4s
> - zip1 v20.4s, v4.4s, v5.4s
> - zip2 v21.4s, v4.4s, v5.4s
> - zip1 v22.4s, v6.4s, v7.4s
> - zip2 v23.4s, v6.4s, v7.4s
> - zip1 v24.4s, v8.4s, v9.4s
> - zip2 v25.4s, v8.4s, v9.4s
> - zip1 v26.4s, v10.4s, v11.4s
> - zip2 v27.4s, v10.4s, v11.4s
> - zip1 v28.4s, v12.4s, v13.4s
> - zip2 v29.4s, v12.4s, v13.4s
> - zip1 v30.4s, v14.4s, v15.4s
> - zip2 v31.4s, v14.4s, v15.4s
> -
> - // interleave 64-bit words in state n, n+2
> - zip1 v0.2d, v16.2d, v18.2d
> - zip2 v4.2d, v16.2d, v18.2d
> - zip1 v8.2d, v17.2d, v19.2d
> - zip2 v12.2d, v17.2d, v19.2d
> - ld1 {v16.16b-v19.16b}, [x2], #64
> -
> - zip1 v1.2d, v20.2d, v22.2d
> - zip2 v5.2d, v20.2d, v22.2d
> - zip1 v9.2d, v21.2d, v23.2d
> - zip2 v13.2d, v21.2d, v23.2d
> - ld1 {v20.16b-v23.16b}, [x2], #64
> -
> - zip1 v2.2d, v24.2d, v26.2d
> - zip2 v6.2d, v24.2d, v26.2d
> - zip1 v10.2d, v25.2d, v27.2d
> - zip2 v14.2d, v25.2d, v27.2d
> - ld1 {v24.16b-v27.16b}, [x2], #64
> -
> - zip1 v3.2d, v28.2d, v30.2d
> - zip2 v7.2d, v28.2d, v30.2d
> - zip1 v11.2d, v29.2d, v31.2d
> - zip2 v15.2d, v29.2d, v31.2d
> - ld1 {v28.16b-v31.16b}, [x2]
> -
> - // xor with corresponding input, write to output
> - eor v16.16b, v16.16b, v0.16b
> - eor v17.16b, v17.16b, v1.16b
> - eor v18.16b, v18.16b, v2.16b
> - eor v19.16b, v19.16b, v3.16b
> - eor v20.16b, v20.16b, v4.16b
> - eor v21.16b, v21.16b, v5.16b
> - st1 {v16.16b-v19.16b}, [x1], #64
> - eor v22.16b, v22.16b, v6.16b
> - eor v23.16b, v23.16b, v7.16b
> - eor v24.16b, v24.16b, v8.16b
> - eor v25.16b, v25.16b, v9.16b
> - st1 {v20.16b-v23.16b}, [x1], #64
> - eor v26.16b, v26.16b, v10.16b
> - eor v27.16b, v27.16b, v11.16b
> - eor v28.16b, v28.16b, v12.16b
> - st1 {v24.16b-v27.16b}, [x1], #64
> - eor v29.16b, v29.16b, v13.16b
> - eor v30.16b, v30.16b, v14.16b
> - eor v31.16b, v31.16b, v15.16b
> - st1 {v28.16b-v31.16b}, [x1]
> -
> - ret
> -ENDPROC(chacha20_4block_xor_neon)
> -
> -CTRINC: .word 0, 1, 2, 3
> -ROT8: .word 0x02010003, 0x06050407, 0x0a09080b, 0x0e0d0c0f
> diff --git a/arch/arm64/crypto/chacha20-neon-glue.c b/arch/arm64/crypto/chacha20-neon-glue.c
> deleted file mode 100644
> index 727579c93ded..000000000000
> --- a/arch/arm64/crypto/chacha20-neon-glue.c
> +++ /dev/null
> @@ -1,133 +0,0 @@
> -/*
> - * ChaCha20 256-bit cipher algorithm, RFC7539, arm64 NEON functions
> - *
> - * Copyright (C) 2016 - 2017 Linaro, Ltd. <ard.biesheuvel@linaro.org>
> - *
> - * This program is free software; you can redistribute it and/or modify
> - * it under the terms of the GNU General Public License version 2 as
> - * published by the Free Software Foundation.
> - *
> - * Based on:
> - * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code
> - *
> - * Copyright (C) 2015 Martin Willi
> - *
> - * This program is free software; you can redistribute it and/or modify
> - * it under the terms of the GNU General Public License as published by
> - * the Free Software Foundation; either version 2 of the License, or
> - * (at your option) any later version.
> - */
> -
> -#include <crypto/algapi.h>
> -#include <crypto/chacha20.h>
> -#include <crypto/internal/skcipher.h>
> -#include <linux/kernel.h>
> -#include <linux/module.h>
> -
> -#include <asm/hwcap.h>
> -#include <asm/neon.h>
> -#include <asm/simd.h>
> -
> -asmlinkage void chacha20_block_xor_neon(u32 *state, u8 *dst, const u8 *src);
> -asmlinkage void chacha20_4block_xor_neon(u32 *state, u8 *dst, const u8 *src);
> -
> -static void chacha20_doneon(u32 *state, u8 *dst, const u8 *src,
> - unsigned int bytes)
> -{
> - u8 buf[CHACHA20_BLOCK_SIZE];
> -
> - while (bytes >= CHACHA20_BLOCK_SIZE * 4) {
> - kernel_neon_begin();
> - chacha20_4block_xor_neon(state, dst, src);
> - kernel_neon_end();
> - bytes -= CHACHA20_BLOCK_SIZE * 4;
> - src += CHACHA20_BLOCK_SIZE * 4;
> - dst += CHACHA20_BLOCK_SIZE * 4;
> - state[12] += 4;
> - }
> -
> - if (!bytes)
> - return;
> -
> - kernel_neon_begin();
> - while (bytes >= CHACHA20_BLOCK_SIZE) {
> - chacha20_block_xor_neon(state, dst, src);
> - bytes -= CHACHA20_BLOCK_SIZE;
> - src += CHACHA20_BLOCK_SIZE;
> - dst += CHACHA20_BLOCK_SIZE;
> - state[12]++;
> - }
> - if (bytes) {
> - memcpy(buf, src, bytes);
> - chacha20_block_xor_neon(state, buf, buf);
> - memcpy(dst, buf, bytes);
> - }
> - kernel_neon_end();
> -}
> -
> -static int chacha20_neon(struct skcipher_request *req)
> -{
> - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
> - struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm);
> - struct skcipher_walk walk;
> - u32 state[16];
> - int err;
> -
> - if (!may_use_simd() || req->cryptlen <= CHACHA20_BLOCK_SIZE)
> - return crypto_chacha20_crypt(req);
> -
> - err = skcipher_walk_virt(&walk, req, false);
> -
> - crypto_chacha20_init(state, ctx, walk.iv);
> -
> - while (walk.nbytes > 0) {
> - unsigned int nbytes = walk.nbytes;
> -
> - if (nbytes < walk.total)
> - nbytes = round_down(nbytes, walk.stride);
> -
> - chacha20_doneon(state, walk.dst.virt.addr, walk.src.virt.addr,
> - nbytes);
> - err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
> - }
> -
> - return err;
> -}
> -
> -static struct skcipher_alg alg = {
> - .base.cra_name = "chacha20",
> - .base.cra_driver_name = "chacha20-neon",
> - .base.cra_priority = 300,
> - .base.cra_blocksize = 1,
> - .base.cra_ctxsize = sizeof(struct chacha20_ctx),
> - .base.cra_module = THIS_MODULE,
> -
> - .min_keysize = CHACHA20_KEY_SIZE,
> - .max_keysize = CHACHA20_KEY_SIZE,
> - .ivsize = CHACHA20_IV_SIZE,
> - .chunksize = CHACHA20_BLOCK_SIZE,
> - .walksize = 4 * CHACHA20_BLOCK_SIZE,
> - .setkey = crypto_chacha20_setkey,
> - .encrypt = chacha20_neon,
> - .decrypt = chacha20_neon,
> -};
> -
> -static int __init chacha20_simd_mod_init(void)
> -{
> - if (!(elf_hwcap & HWCAP_ASIMD))
> - return -ENODEV;
> -
> - return crypto_register_skcipher(&alg);
> -}
> -
> -static void __exit chacha20_simd_mod_fini(void)
> -{
> - crypto_unregister_skcipher(&alg);
> -}
> -
> -module_init(chacha20_simd_mod_init);
> -module_exit(chacha20_simd_mod_fini);
> -
> -MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
> -MODULE_LICENSE("GPL v2");
> -MODULE_ALIAS_CRYPTO("chacha20");
> diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
> index cf830219846b..419212c31246 100644
> --- a/arch/x86/crypto/Makefile
> +++ b/arch/x86/crypto/Makefile
> @@ -23,7 +23,6 @@ obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o
> obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o
> obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o
> obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o
> -obj-$(CONFIG_CRYPTO_CHACHA20_X86_64) += chacha20-x86_64.o
> obj-$(CONFIG_CRYPTO_SERPENT_SSE2_X86_64) += serpent-sse2-x86_64.o
> obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o
> obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o
> @@ -76,7 +75,6 @@ camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o
> blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o
> twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o
> twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o
> -chacha20-x86_64-y := chacha20-ssse3-x86_64.o chacha20_glue.o
> serpent-sse2-x86_64-y := serpent-sse2-x86_64-asm_64.o serpent_sse2_glue.o
>
> aegis128-aesni-y := aegis128-aesni-asm.o aegis128-aesni-glue.o
> @@ -99,7 +97,6 @@ endif
>
> ifeq ($(avx2_supported),yes)
> camellia-aesni-avx2-y := camellia-aesni-avx2-asm_64.o camellia_aesni_avx2_glue.o
> - chacha20-x86_64-y += chacha20-avx2-x86_64.o
> serpent-avx2-y := serpent-avx2-asm_64.o serpent_avx2_glue.o
>
> morus1280-avx2-y := morus1280-avx2-asm.o morus1280-avx2-glue.o
> diff --git a/arch/x86/crypto/chacha20-avx2-x86_64.S b/arch/x86/crypto/chacha20-avx2-x86_64.S
> deleted file mode 100644
> index f3cd26f48332..000000000000
> --- a/arch/x86/crypto/chacha20-avx2-x86_64.S
> +++ /dev/null
> @@ -1,448 +0,0 @@
> -/*
> - * ChaCha20 256-bit cipher algorithm, RFC7539, x64 AVX2 functions
> - *
> - * Copyright (C) 2015 Martin Willi
> - *
> - * This program is free software; you can redistribute it and/or modify
> - * it under the terms of the GNU General Public License as published by
> - * the Free Software Foundation; either version 2 of the License, or
> - * (at your option) any later version.
> - */
> -
> -#include <linux/linkage.h>
> -
> -.section .rodata.cst32.ROT8, "aM", @progbits, 32
> -.align 32
> -ROT8: .octa 0x0e0d0c0f0a09080b0605040702010003
> - .octa 0x0e0d0c0f0a09080b0605040702010003
> -
> -.section .rodata.cst32.ROT16, "aM", @progbits, 32
> -.align 32
> -ROT16: .octa 0x0d0c0f0e09080b0a0504070601000302
> - .octa 0x0d0c0f0e09080b0a0504070601000302
> -
> -.section .rodata.cst32.CTRINC, "aM", @progbits, 32
> -.align 32
> -CTRINC: .octa 0x00000003000000020000000100000000
> - .octa 0x00000007000000060000000500000004
> -
> -.text
> -
> -ENTRY(chacha20_8block_xor_avx2)
> - # %rdi: Input state matrix, s
> - # %rsi: 8 data blocks output, o
> - # %rdx: 8 data blocks input, i
> -
> - # This function encrypts eight consecutive ChaCha20 blocks by loading
> - # the state matrix in AVX registers eight times. As we need some
> - # scratch registers, we save the first four registers on the stack. The
> - # algorithm performs each operation on the corresponding word of each
> - # state matrix, hence requires no word shuffling. For final XORing step
> - # we transpose the matrix by interleaving 32-, 64- and then 128-bit
> - # words, which allows us to do XOR in AVX registers. 8/16-bit word
> - # rotation is done with the slightly better performing byte shuffling,
> - # 7/12-bit word rotation uses traditional shift+OR.
> -
> - vzeroupper
> - # 4 * 32 byte stack, 32-byte aligned
> - lea 8(%rsp),%r10
> - and $~31, %rsp
> - sub $0x80, %rsp
> -
> - # x0..15[0-7] = s[0..15]
> - vpbroadcastd 0x00(%rdi),%ymm0
> - vpbroadcastd 0x04(%rdi),%ymm1
> - vpbroadcastd 0x08(%rdi),%ymm2
> - vpbroadcastd 0x0c(%rdi),%ymm3
> - vpbroadcastd 0x10(%rdi),%ymm4
> - vpbroadcastd 0x14(%rdi),%ymm5
> - vpbroadcastd 0x18(%rdi),%ymm6
> - vpbroadcastd 0x1c(%rdi),%ymm7
> - vpbroadcastd 0x20(%rdi),%ymm8
> - vpbroadcastd 0x24(%rdi),%ymm9
> - vpbroadcastd 0x28(%rdi),%ymm10
> - vpbroadcastd 0x2c(%rdi),%ymm11
> - vpbroadcastd 0x30(%rdi),%ymm12
> - vpbroadcastd 0x34(%rdi),%ymm13
> - vpbroadcastd 0x38(%rdi),%ymm14
> - vpbroadcastd 0x3c(%rdi),%ymm15
> - # x0..3 on stack
> - vmovdqa %ymm0,0x00(%rsp)
> - vmovdqa %ymm1,0x20(%rsp)
> - vmovdqa %ymm2,0x40(%rsp)
> - vmovdqa %ymm3,0x60(%rsp)
> -
> - vmovdqa CTRINC(%rip),%ymm1
> - vmovdqa ROT8(%rip),%ymm2
> - vmovdqa ROT16(%rip),%ymm3
> -
> - # x12 += counter values 0-3
> - vpaddd %ymm1,%ymm12,%ymm12
> -
> - mov $10,%ecx
> -
> -.Ldoubleround8:
> - # x0 += x4, x12 = rotl32(x12 ^ x0, 16)
> - vpaddd 0x00(%rsp),%ymm4,%ymm0
> - vmovdqa %ymm0,0x00(%rsp)
> - vpxor %ymm0,%ymm12,%ymm12
> - vpshufb %ymm3,%ymm12,%ymm12
> - # x1 += x5, x13 = rotl32(x13 ^ x1, 16)
> - vpaddd 0x20(%rsp),%ymm5,%ymm0
> - vmovdqa %ymm0,0x20(%rsp)
> - vpxor %ymm0,%ymm13,%ymm13
> - vpshufb %ymm3,%ymm13,%ymm13
> - # x2 += x6, x14 = rotl32(x14 ^ x2, 16)
> - vpaddd 0x40(%rsp),%ymm6,%ymm0
> - vmovdqa %ymm0,0x40(%rsp)
> - vpxor %ymm0,%ymm14,%ymm14
> - vpshufb %ymm3,%ymm14,%ymm14
> - # x3 += x7, x15 = rotl32(x15 ^ x3, 16)
> - vpaddd 0x60(%rsp),%ymm7,%ymm0
> - vmovdqa %ymm0,0x60(%rsp)
> - vpxor %ymm0,%ymm15,%ymm15
> - vpshufb %ymm3,%ymm15,%ymm15
> -
> - # x8 += x12, x4 = rotl32(x4 ^ x8, 12)
> - vpaddd %ymm12,%ymm8,%ymm8
> - vpxor %ymm8,%ymm4,%ymm4
> - vpslld $12,%ymm4,%ymm0
> - vpsrld $20,%ymm4,%ymm4
> - vpor %ymm0,%ymm4,%ymm4
> - # x9 += x13, x5 = rotl32(x5 ^ x9, 12)
> - vpaddd %ymm13,%ymm9,%ymm9
> - vpxor %ymm9,%ymm5,%ymm5
> - vpslld $12,%ymm5,%ymm0
> - vpsrld $20,%ymm5,%ymm5
> - vpor %ymm0,%ymm5,%ymm5
> - # x10 += x14, x6 = rotl32(x6 ^ x10, 12)
> - vpaddd %ymm14,%ymm10,%ymm10
> - vpxor %ymm10,%ymm6,%ymm6
> - vpslld $12,%ymm6,%ymm0
> - vpsrld $20,%ymm6,%ymm6
> - vpor %ymm0,%ymm6,%ymm6
> - # x11 += x15, x7 = rotl32(x7 ^ x11, 12)
> - vpaddd %ymm15,%ymm11,%ymm11
> - vpxor %ymm11,%ymm7,%ymm7
> - vpslld $12,%ymm7,%ymm0
> - vpsrld $20,%ymm7,%ymm7
> - vpor %ymm0,%ymm7,%ymm7
> -
> - # x0 += x4, x12 = rotl32(x12 ^ x0, 8)
> - vpaddd 0x00(%rsp),%ymm4,%ymm0
> - vmovdqa %ymm0,0x00(%rsp)
> - vpxor %ymm0,%ymm12,%ymm12
> - vpshufb %ymm2,%ymm12,%ymm12
> - # x1 += x5, x13 = rotl32(x13 ^ x1, 8)
> - vpaddd 0x20(%rsp),%ymm5,%ymm0
> - vmovdqa %ymm0,0x20(%rsp)
> - vpxor %ymm0,%ymm13,%ymm13
> - vpshufb %ymm2,%ymm13,%ymm13
> - # x2 += x6, x14 = rotl32(x14 ^ x2, 8)
> - vpaddd 0x40(%rsp),%ymm6,%ymm0
> - vmovdqa %ymm0,0x40(%rsp)
> - vpxor %ymm0,%ymm14,%ymm14
> - vpshufb %ymm2,%ymm14,%ymm14
> - # x3 += x7, x15 = rotl32(x15 ^ x3, 8)
> - vpaddd 0x60(%rsp),%ymm7,%ymm0
> - vmovdqa %ymm0,0x60(%rsp)
> - vpxor %ymm0,%ymm15,%ymm15
> - vpshufb %ymm2,%ymm15,%ymm15
> -
> - # x8 += x12, x4 = rotl32(x4 ^ x8, 7)
> - vpaddd %ymm12,%ymm8,%ymm8
> - vpxor %ymm8,%ymm4,%ymm4
> - vpslld $7,%ymm4,%ymm0
> - vpsrld $25,%ymm4,%ymm4
> - vpor %ymm0,%ymm4,%ymm4
> - # x9 += x13, x5 = rotl32(x5 ^ x9, 7)
> - vpaddd %ymm13,%ymm9,%ymm9
> - vpxor %ymm9,%ymm5,%ymm5
> - vpslld $7,%ymm5,%ymm0
> - vpsrld $25,%ymm5,%ymm5
> - vpor %ymm0,%ymm5,%ymm5
> - # x10 += x14, x6 = rotl32(x6 ^ x10, 7)
> - vpaddd %ymm14,%ymm10,%ymm10
> - vpxor %ymm10,%ymm6,%ymm6
> - vpslld $7,%ymm6,%ymm0
> - vpsrld $25,%ymm6,%ymm6
> - vpor %ymm0,%ymm6,%ymm6
> - # x11 += x15, x7 = rotl32(x7 ^ x11, 7)
> - vpaddd %ymm15,%ymm11,%ymm11
> - vpxor %ymm11,%ymm7,%ymm7
> - vpslld $7,%ymm7,%ymm0
> - vpsrld $25,%ymm7,%ymm7
> - vpor %ymm0,%ymm7,%ymm7
> -
> - # x0 += x5, x15 = rotl32(x15 ^ x0, 16)
> - vpaddd 0x00(%rsp),%ymm5,%ymm0
> - vmovdqa %ymm0,0x00(%rsp)
> - vpxor %ymm0,%ymm15,%ymm15
> - vpshufb %ymm3,%ymm15,%ymm15
> - # x1 += x6, x12 = rotl32(x12 ^ x1, 16)%ymm0
> - vpaddd 0x20(%rsp),%ymm6,%ymm0
> - vmovdqa %ymm0,0x20(%rsp)
> - vpxor %ymm0,%ymm12,%ymm12
> - vpshufb %ymm3,%ymm12,%ymm12
> - # x2 += x7, x13 = rotl32(x13 ^ x2, 16)
> - vpaddd 0x40(%rsp),%ymm7,%ymm0
> - vmovdqa %ymm0,0x40(%rsp)
> - vpxor %ymm0,%ymm13,%ymm13
> - vpshufb %ymm3,%ymm13,%ymm13
> - # x3 += x4, x14 = rotl32(x14 ^ x3, 16)
> - vpaddd 0x60(%rsp),%ymm4,%ymm0
> - vmovdqa %ymm0,0x60(%rsp)
> - vpxor %ymm0,%ymm14,%ymm14
> - vpshufb %ymm3,%ymm14,%ymm14
> -
> - # x10 += x15, x5 = rotl32(x5 ^ x10, 12)
> - vpaddd %ymm15,%ymm10,%ymm10
> - vpxor %ymm10,%ymm5,%ymm5
> - vpslld $12,%ymm5,%ymm0
> - vpsrld $20,%ymm5,%ymm5
> - vpor %ymm0,%ymm5,%ymm5
> - # x11 += x12, x6 = rotl32(x6 ^ x11, 12)
> - vpaddd %ymm12,%ymm11,%ymm11
> - vpxor %ymm11,%ymm6,%ymm6
> - vpslld $12,%ymm6,%ymm0
> - vpsrld $20,%ymm6,%ymm6
> - vpor %ymm0,%ymm6,%ymm6
> - # x8 += x13, x7 = rotl32(x7 ^ x8, 12)
> - vpaddd %ymm13,%ymm8,%ymm8
> - vpxor %ymm8,%ymm7,%ymm7
> - vpslld $12,%ymm7,%ymm0
> - vpsrld $20,%ymm7,%ymm7
> - vpor %ymm0,%ymm7,%ymm7
> - # x9 += x14, x4 = rotl32(x4 ^ x9, 12)
> - vpaddd %ymm14,%ymm9,%ymm9
> - vpxor %ymm9,%ymm4,%ymm4
> - vpslld $12,%ymm4,%ymm0
> - vpsrld $20,%ymm4,%ymm4
> - vpor %ymm0,%ymm4,%ymm4
> -
> - # x0 += x5, x15 = rotl32(x15 ^ x0, 8)
> - vpaddd 0x00(%rsp),%ymm5,%ymm0
> - vmovdqa %ymm0,0x00(%rsp)
> - vpxor %ymm0,%ymm15,%ymm15
> - vpshufb %ymm2,%ymm15,%ymm15
> - # x1 += x6, x12 = rotl32(x12 ^ x1, 8)
> - vpaddd 0x20(%rsp),%ymm6,%ymm0
> - vmovdqa %ymm0,0x20(%rsp)
> - vpxor %ymm0,%ymm12,%ymm12
> - vpshufb %ymm2,%ymm12,%ymm12
> - # x2 += x7, x13 = rotl32(x13 ^ x2, 8)
> - vpaddd 0x40(%rsp),%ymm7,%ymm0
> - vmovdqa %ymm0,0x40(%rsp)
> - vpxor %ymm0,%ymm13,%ymm13
> - vpshufb %ymm2,%ymm13,%ymm13
> - # x3 += x4, x14 = rotl32(x14 ^ x3, 8)
> - vpaddd 0x60(%rsp),%ymm4,%ymm0
> - vmovdqa %ymm0,0x60(%rsp)
> - vpxor %ymm0,%ymm14,%ymm14
> - vpshufb %ymm2,%ymm14,%ymm14
> -
> - # x10 += x15, x5 = rotl32(x5 ^ x10, 7)
> - vpaddd %ymm15,%ymm10,%ymm10
> - vpxor %ymm10,%ymm5,%ymm5
> - vpslld $7,%ymm5,%ymm0
> - vpsrld $25,%ymm5,%ymm5
> - vpor %ymm0,%ymm5,%ymm5
> - # x11 += x12, x6 = rotl32(x6 ^ x11, 7)
> - vpaddd %ymm12,%ymm11,%ymm11
> - vpxor %ymm11,%ymm6,%ymm6
> - vpslld $7,%ymm6,%ymm0
> - vpsrld $25,%ymm6,%ymm6
> - vpor %ymm0,%ymm6,%ymm6
> - # x8 += x13, x7 = rotl32(x7 ^ x8, 7)
> - vpaddd %ymm13,%ymm8,%ymm8
> - vpxor %ymm8,%ymm7,%ymm7
> - vpslld $7,%ymm7,%ymm0
> - vpsrld $25,%ymm7,%ymm7
> - vpor %ymm0,%ymm7,%ymm7
> - # x9 += x14, x4 = rotl32(x4 ^ x9, 7)
> - vpaddd %ymm14,%ymm9,%ymm9
> - vpxor %ymm9,%ymm4,%ymm4
> - vpslld $7,%ymm4,%ymm0
> - vpsrld $25,%ymm4,%ymm4
> - vpor %ymm0,%ymm4,%ymm4
> -
> - dec %ecx
> - jnz .Ldoubleround8
> -
> - # x0..15[0-3] += s[0..15]
> - vpbroadcastd 0x00(%rdi),%ymm0
> - vpaddd 0x00(%rsp),%ymm0,%ymm0
> - vmovdqa %ymm0,0x00(%rsp)
> - vpbroadcastd 0x04(%rdi),%ymm0
> - vpaddd 0x20(%rsp),%ymm0,%ymm0
> - vmovdqa %ymm0,0x20(%rsp)
> - vpbroadcastd 0x08(%rdi),%ymm0
> - vpaddd 0x40(%rsp),%ymm0,%ymm0
> - vmovdqa %ymm0,0x40(%rsp)
> - vpbroadcastd 0x0c(%rdi),%ymm0
> - vpaddd 0x60(%rsp),%ymm0,%ymm0
> - vmovdqa %ymm0,0x60(%rsp)
> - vpbroadcastd 0x10(%rdi),%ymm0
> - vpaddd %ymm0,%ymm4,%ymm4
> - vpbroadcastd 0x14(%rdi),%ymm0
> - vpaddd %ymm0,%ymm5,%ymm5
> - vpbroadcastd 0x18(%rdi),%ymm0
> - vpaddd %ymm0,%ymm6,%ymm6
> - vpbroadcastd 0x1c(%rdi),%ymm0
> - vpaddd %ymm0,%ymm7,%ymm7
> - vpbroadcastd 0x20(%rdi),%ymm0
> - vpaddd %ymm0,%ymm8,%ymm8
> - vpbroadcastd 0x24(%rdi),%ymm0
> - vpaddd %ymm0,%ymm9,%ymm9
> - vpbroadcastd 0x28(%rdi),%ymm0
> - vpaddd %ymm0,%ymm10,%ymm10
> - vpbroadcastd 0x2c(%rdi),%ymm0
> - vpaddd %ymm0,%ymm11,%ymm11
> - vpbroadcastd 0x30(%rdi),%ymm0
> - vpaddd %ymm0,%ymm12,%ymm12
> - vpbroadcastd 0x34(%rdi),%ymm0
> - vpaddd %ymm0,%ymm13,%ymm13
> - vpbroadcastd 0x38(%rdi),%ymm0
> - vpaddd %ymm0,%ymm14,%ymm14
> - vpbroadcastd 0x3c(%rdi),%ymm0
> - vpaddd %ymm0,%ymm15,%ymm15
> -
> - # x12 += counter values 0-3
> - vpaddd %ymm1,%ymm12,%ymm12
> -
> - # interleave 32-bit words in state n, n+1
> - vmovdqa 0x00(%rsp),%ymm0
> - vmovdqa 0x20(%rsp),%ymm1
> - vpunpckldq %ymm1,%ymm0,%ymm2
> - vpunpckhdq %ymm1,%ymm0,%ymm1
> - vmovdqa %ymm2,0x00(%rsp)
> - vmovdqa %ymm1,0x20(%rsp)
> - vmovdqa 0x40(%rsp),%ymm0
> - vmovdqa 0x60(%rsp),%ymm1
> - vpunpckldq %ymm1,%ymm0,%ymm2
> - vpunpckhdq %ymm1,%ymm0,%ymm1
> - vmovdqa %ymm2,0x40(%rsp)
> - vmovdqa %ymm1,0x60(%rsp)
> - vmovdqa %ymm4,%ymm0
> - vpunpckldq %ymm5,%ymm0,%ymm4
> - vpunpckhdq %ymm5,%ymm0,%ymm5
> - vmovdqa %ymm6,%ymm0
> - vpunpckldq %ymm7,%ymm0,%ymm6
> - vpunpckhdq %ymm7,%ymm0,%ymm7
> - vmovdqa %ymm8,%ymm0
> - vpunpckldq %ymm9,%ymm0,%ymm8
> - vpunpckhdq %ymm9,%ymm0,%ymm9
> - vmovdqa %ymm10,%ymm0
> - vpunpckldq %ymm11,%ymm0,%ymm10
> - vpunpckhdq %ymm11,%ymm0,%ymm11
> - vmovdqa %ymm12,%ymm0
> - vpunpckldq %ymm13,%ymm0,%ymm12
> - vpunpckhdq %ymm13,%ymm0,%ymm13
> - vmovdqa %ymm14,%ymm0
> - vpunpckldq %ymm15,%ymm0,%ymm14
> - vpunpckhdq %ymm15,%ymm0,%ymm15
> -
> - # interleave 64-bit words in state n, n+2
> - vmovdqa 0x00(%rsp),%ymm0
> - vmovdqa 0x40(%rsp),%ymm2
> - vpunpcklqdq %ymm2,%ymm0,%ymm1
> - vpunpckhqdq %ymm2,%ymm0,%ymm2
> - vmovdqa %ymm1,0x00(%rsp)
> - vmovdqa %ymm2,0x40(%rsp)
> - vmovdqa 0x20(%rsp),%ymm0
> - vmovdqa 0x60(%rsp),%ymm2
> - vpunpcklqdq %ymm2,%ymm0,%ymm1
> - vpunpckhqdq %ymm2,%ymm0,%ymm2
> - vmovdqa %ymm1,0x20(%rsp)
> - vmovdqa %ymm2,0x60(%rsp)
> - vmovdqa %ymm4,%ymm0
> - vpunpcklqdq %ymm6,%ymm0,%ymm4
> - vpunpckhqdq %ymm6,%ymm0,%ymm6
> - vmovdqa %ymm5,%ymm0
> - vpunpcklqdq %ymm7,%ymm0,%ymm5
> - vpunpckhqdq %ymm7,%ymm0,%ymm7
> - vmovdqa %ymm8,%ymm0
> - vpunpcklqdq %ymm10,%ymm0,%ymm8
> - vpunpckhqdq %ymm10,%ymm0,%ymm10
> - vmovdqa %ymm9,%ymm0
> - vpunpcklqdq %ymm11,%ymm0,%ymm9
> - vpunpckhqdq %ymm11,%ymm0,%ymm11
> - vmovdqa %ymm12,%ymm0
> - vpunpcklqdq %ymm14,%ymm0,%ymm12
> - vpunpckhqdq %ymm14,%ymm0,%ymm14
> - vmovdqa %ymm13,%ymm0
> - vpunpcklqdq %ymm15,%ymm0,%ymm13
> - vpunpckhqdq %ymm15,%ymm0,%ymm15
> -
> - # interleave 128-bit words in state n, n+4
> - vmovdqa 0x00(%rsp),%ymm0
> - vperm2i128 $0x20,%ymm4,%ymm0,%ymm1
> - vperm2i128 $0x31,%ymm4,%ymm0,%ymm4
> - vmovdqa %ymm1,0x00(%rsp)
> - vmovdqa 0x20(%rsp),%ymm0
> - vperm2i128 $0x20,%ymm5,%ymm0,%ymm1
> - vperm2i128 $0x31,%ymm5,%ymm0,%ymm5
> - vmovdqa %ymm1,0x20(%rsp)
> - vmovdqa 0x40(%rsp),%ymm0
> - vperm2i128 $0x20,%ymm6,%ymm0,%ymm1
> - vperm2i128 $0x31,%ymm6,%ymm0,%ymm6
> - vmovdqa %ymm1,0x40(%rsp)
> - vmovdqa 0x60(%rsp),%ymm0
> - vperm2i128 $0x20,%ymm7,%ymm0,%ymm1
> - vperm2i128 $0x31,%ymm7,%ymm0,%ymm7
> - vmovdqa %ymm1,0x60(%rsp)
> - vperm2i128 $0x20,%ymm12,%ymm8,%ymm0
> - vperm2i128 $0x31,%ymm12,%ymm8,%ymm12
> - vmovdqa %ymm0,%ymm8
> - vperm2i128 $0x20,%ymm13,%ymm9,%ymm0
> - vperm2i128 $0x31,%ymm13,%ymm9,%ymm13
> - vmovdqa %ymm0,%ymm9
> - vperm2i128 $0x20,%ymm14,%ymm10,%ymm0
> - vperm2i128 $0x31,%ymm14,%ymm10,%ymm14
> - vmovdqa %ymm0,%ymm10
> - vperm2i128 $0x20,%ymm15,%ymm11,%ymm0
> - vperm2i128 $0x31,%ymm15,%ymm11,%ymm15
> - vmovdqa %ymm0,%ymm11
> -
> - # xor with corresponding input, write to output
> - vmovdqa 0x00(%rsp),%ymm0
> - vpxor 0x0000(%rdx),%ymm0,%ymm0
> - vmovdqu %ymm0,0x0000(%rsi)
> - vmovdqa 0x20(%rsp),%ymm0
> - vpxor 0x0080(%rdx),%ymm0,%ymm0
> - vmovdqu %ymm0,0x0080(%rsi)
> - vmovdqa 0x40(%rsp),%ymm0
> - vpxor 0x0040(%rdx),%ymm0,%ymm0
> - vmovdqu %ymm0,0x0040(%rsi)
> - vmovdqa 0x60(%rsp),%ymm0
> - vpxor 0x00c0(%rdx),%ymm0,%ymm0
> - vmovdqu %ymm0,0x00c0(%rsi)
> - vpxor 0x0100(%rdx),%ymm4,%ymm4
> - vmovdqu %ymm4,0x0100(%rsi)
> - vpxor 0x0180(%rdx),%ymm5,%ymm5
> - vmovdqu %ymm5,0x00180(%rsi)
> - vpxor 0x0140(%rdx),%ymm6,%ymm6
> - vmovdqu %ymm6,0x0140(%rsi)
> - vpxor 0x01c0(%rdx),%ymm7,%ymm7
> - vmovdqu %ymm7,0x01c0(%rsi)
> - vpxor 0x0020(%rdx),%ymm8,%ymm8
> - vmovdqu %ymm8,0x0020(%rsi)
> - vpxor 0x00a0(%rdx),%ymm9,%ymm9
> - vmovdqu %ymm9,0x00a0(%rsi)
> - vpxor 0x0060(%rdx),%ymm10,%ymm10
> - vmovdqu %ymm10,0x0060(%rsi)
> - vpxor 0x00e0(%rdx),%ymm11,%ymm11
> - vmovdqu %ymm11,0x00e0(%rsi)
> - vpxor 0x0120(%rdx),%ymm12,%ymm12
> - vmovdqu %ymm12,0x0120(%rsi)
> - vpxor 0x01a0(%rdx),%ymm13,%ymm13
> - vmovdqu %ymm13,0x01a0(%rsi)
> - vpxor 0x0160(%rdx),%ymm14,%ymm14
> - vmovdqu %ymm14,0x0160(%rsi)
> - vpxor 0x01e0(%rdx),%ymm15,%ymm15
> - vmovdqu %ymm15,0x01e0(%rsi)
> -
> - vzeroupper
> - lea -8(%r10),%rsp
> - ret
> -ENDPROC(chacha20_8block_xor_avx2)
> diff --git a/arch/x86/crypto/chacha20-ssse3-x86_64.S b/arch/x86/crypto/chacha20-ssse3-x86_64.S
> deleted file mode 100644
> index 512a2b500fd1..000000000000
> --- a/arch/x86/crypto/chacha20-ssse3-x86_64.S
> +++ /dev/null
> @@ -1,630 +0,0 @@
> -/*
> - * ChaCha20 256-bit cipher algorithm, RFC7539, x64 SSSE3 functions
> - *
> - * Copyright (C) 2015 Martin Willi
> - *
> - * This program is free software; you can redistribute it and/or modify
> - * it under the terms of the GNU General Public License as published by
> - * the Free Software Foundation; either version 2 of the License, or
> - * (at your option) any later version.
> - */
> -
> -#include <linux/linkage.h>
> -
> -.section .rodata.cst16.ROT8, "aM", @progbits, 16
> -.align 16
> -ROT8: .octa 0x0e0d0c0f0a09080b0605040702010003
> -.section .rodata.cst16.ROT16, "aM", @progbits, 16
> -.align 16
> -ROT16: .octa 0x0d0c0f0e09080b0a0504070601000302
> -.section .rodata.cst16.CTRINC, "aM", @progbits, 16
> -.align 16
> -CTRINC: .octa 0x00000003000000020000000100000000
> -
> -.text
> -
> -ENTRY(chacha20_block_xor_ssse3)
> - # %rdi: Input state matrix, s
> - # %rsi: 1 data block output, o
> - # %rdx: 1 data block input, i
> -
> - # This function encrypts one ChaCha20 block by loading the state matrix
> - # in four SSE registers. It performs matrix operation on four words in
> - # parallel, but requireds shuffling to rearrange the words after each
> - # round. 8/16-bit word rotation is done with the slightly better
> - # performing SSSE3 byte shuffling, 7/12-bit word rotation uses
> - # traditional shift+OR.
> -
> - # x0..3 = s0..3
> - movdqa 0x00(%rdi),%xmm0
> - movdqa 0x10(%rdi),%xmm1
> - movdqa 0x20(%rdi),%xmm2
> - movdqa 0x30(%rdi),%xmm3
> - movdqa %xmm0,%xmm8
> - movdqa %xmm1,%xmm9
> - movdqa %xmm2,%xmm10
> - movdqa %xmm3,%xmm11
> -
> - movdqa ROT8(%rip),%xmm4
> - movdqa ROT16(%rip),%xmm5
> -
> - mov $10,%ecx
> -
> -.Ldoubleround:
> -
> - # x0 += x1, x3 = rotl32(x3 ^ x0, 16)
> - paddd %xmm1,%xmm0
> - pxor %xmm0,%xmm3
> - pshufb %xmm5,%xmm3
> -
> - # x2 += x3, x1 = rotl32(x1 ^ x2, 12)
> - paddd %xmm3,%xmm2
> - pxor %xmm2,%xmm1
> - movdqa %xmm1,%xmm6
> - pslld $12,%xmm6
> - psrld $20,%xmm1
> - por %xmm6,%xmm1
> -
> - # x0 += x1, x3 = rotl32(x3 ^ x0, 8)
> - paddd %xmm1,%xmm0
> - pxor %xmm0,%xmm3
> - pshufb %xmm4,%xmm3
> -
> - # x2 += x3, x1 = rotl32(x1 ^ x2, 7)
> - paddd %xmm3,%xmm2
> - pxor %xmm2,%xmm1
> - movdqa %xmm1,%xmm7
> - pslld $7,%xmm7
> - psrld $25,%xmm1
> - por %xmm7,%xmm1
> -
> - # x1 = shuffle32(x1, MASK(0, 3, 2, 1))
> - pshufd $0x39,%xmm1,%xmm1
> - # x2 = shuffle32(x2, MASK(1, 0, 3, 2))
> - pshufd $0x4e,%xmm2,%xmm2
> - # x3 = shuffle32(x3, MASK(2, 1, 0, 3))
> - pshufd $0x93,%xmm3,%xmm3
> -
> - # x0 += x1, x3 = rotl32(x3 ^ x0, 16)
> - paddd %xmm1,%xmm0
> - pxor %xmm0,%xmm3
> - pshufb %xmm5,%xmm3
> -
> - # x2 += x3, x1 = rotl32(x1 ^ x2, 12)
> - paddd %xmm3,%xmm2
> - pxor %xmm2,%xmm1
> - movdqa %xmm1,%xmm6
> - pslld $12,%xmm6
> - psrld $20,%xmm1
> - por %xmm6,%xmm1
> -
> - # x0 += x1, x3 = rotl32(x3 ^ x0, 8)
> - paddd %xmm1,%xmm0
> - pxor %xmm0,%xmm3
> - pshufb %xmm4,%xmm3
> -
> - # x2 += x3, x1 = rotl32(x1 ^ x2, 7)
> - paddd %xmm3,%xmm2
> - pxor %xmm2,%xmm1
> - movdqa %xmm1,%xmm7
> - pslld $7,%xmm7
> - psrld $25,%xmm1
> - por %xmm7,%xmm1
> -
> - # x1 = shuffle32(x1, MASK(2, 1, 0, 3))
> - pshufd $0x93,%xmm1,%xmm1
> - # x2 = shuffle32(x2, MASK(1, 0, 3, 2))
> - pshufd $0x4e,%xmm2,%xmm2
> - # x3 = shuffle32(x3, MASK(0, 3, 2, 1))
> - pshufd $0x39,%xmm3,%xmm3
> -
> - dec %ecx
> - jnz .Ldoubleround
> -
> - # o0 = i0 ^ (x0 + s0)
> - movdqu 0x00(%rdx),%xmm4
> - paddd %xmm8,%xmm0
> - pxor %xmm4,%xmm0
> - movdqu %xmm0,0x00(%rsi)
> - # o1 = i1 ^ (x1 + s1)
> - movdqu 0x10(%rdx),%xmm5
> - paddd %xmm9,%xmm1
> - pxor %xmm5,%xmm1
> - movdqu %xmm1,0x10(%rsi)
> - # o2 = i2 ^ (x2 + s2)
> - movdqu 0x20(%rdx),%xmm6
> - paddd %xmm10,%xmm2
> - pxor %xmm6,%xmm2
> - movdqu %xmm2,0x20(%rsi)
> - # o3 = i3 ^ (x3 + s3)
> - movdqu 0x30(%rdx),%xmm7
> - paddd %xmm11,%xmm3
> - pxor %xmm7,%xmm3
> - movdqu %xmm3,0x30(%rsi)
> -
> - ret
> -ENDPROC(chacha20_block_xor_ssse3)
> -
> -ENTRY(chacha20_4block_xor_ssse3)
> - # %rdi: Input state matrix, s
> - # %rsi: 4 data blocks output, o
> - # %rdx: 4 data blocks input, i
> -
> - # This function encrypts four consecutive ChaCha20 blocks by loading the
> - # the state matrix in SSE registers four times. As we need some scratch
> - # registers, we save the first four registers on the stack. The
> - # algorithm performs each operation on the corresponding word of each
> - # state matrix, hence requires no word shuffling. For final XORing step
> - # we transpose the matrix by interleaving 32- and then 64-bit words,
> - # which allows us to do XOR in SSE registers. 8/16-bit word rotation is
> - # done with the slightly better performing SSSE3 byte shuffling,
> - # 7/12-bit word rotation uses traditional shift+OR.
> -
> - lea 8(%rsp),%r10
> - sub $0x80,%rsp
> - and $~63,%rsp
> -
> - # x0..15[0-3] = s0..3[0..3]
> - movq 0x00(%rdi),%xmm1
> - pshufd $0x00,%xmm1,%xmm0
> - pshufd $0x55,%xmm1,%xmm1
> - movq 0x08(%rdi),%xmm3
> - pshufd $0x00,%xmm3,%xmm2
> - pshufd $0x55,%xmm3,%xmm3
> - movq 0x10(%rdi),%xmm5
> - pshufd $0x00,%xmm5,%xmm4
> - pshufd $0x55,%xmm5,%xmm5
> - movq 0x18(%rdi),%xmm7
> - pshufd $0x00,%xmm7,%xmm6
> - pshufd $0x55,%xmm7,%xmm7
> - movq 0x20(%rdi),%xmm9
> - pshufd $0x00,%xmm9,%xmm8
> - pshufd $0x55,%xmm9,%xmm9
> - movq 0x28(%rdi),%xmm11
> - pshufd $0x00,%xmm11,%xmm10
> - pshufd $0x55,%xmm11,%xmm11
> - movq 0x30(%rdi),%xmm13
> - pshufd $0x00,%xmm13,%xmm12
> - pshufd $0x55,%xmm13,%xmm13
> - movq 0x38(%rdi),%xmm15
> - pshufd $0x00,%xmm15,%xmm14
> - pshufd $0x55,%xmm15,%xmm15
> - # x0..3 on stack
> - movdqa %xmm0,0x00(%rsp)
> - movdqa %xmm1,0x10(%rsp)
> - movdqa %xmm2,0x20(%rsp)
> - movdqa %xmm3,0x30(%rsp)
> -
> - movdqa CTRINC(%rip),%xmm1
> - movdqa ROT8(%rip),%xmm2
> - movdqa ROT16(%rip),%xmm3
> -
> - # x12 += counter values 0-3
> - paddd %xmm1,%xmm12
> -
> - mov $10,%ecx
> -
> -.Ldoubleround4:
> - # x0 += x4, x12 = rotl32(x12 ^ x0, 16)
> - movdqa 0x00(%rsp),%xmm0
> - paddd %xmm4,%xmm0
> - movdqa %xmm0,0x00(%rsp)
> - pxor %xmm0,%xmm12
> - pshufb %xmm3,%xmm12
> - # x1 += x5, x13 = rotl32(x13 ^ x1, 16)
> - movdqa 0x10(%rsp),%xmm0
> - paddd %xmm5,%xmm0
> - movdqa %xmm0,0x10(%rsp)
> - pxor %xmm0,%xmm13
> - pshufb %xmm3,%xmm13
> - # x2 += x6, x14 = rotl32(x14 ^ x2, 16)
> - movdqa 0x20(%rsp),%xmm0
> - paddd %xmm6,%xmm0
> - movdqa %xmm0,0x20(%rsp)
> - pxor %xmm0,%xmm14
> - pshufb %xmm3,%xmm14
> - # x3 += x7, x15 = rotl32(x15 ^ x3, 16)
> - movdqa 0x30(%rsp),%xmm0
> - paddd %xmm7,%xmm0
> - movdqa %xmm0,0x30(%rsp)
> - pxor %xmm0,%xmm15
> - pshufb %xmm3,%xmm15
> -
> - # x8 += x12, x4 = rotl32(x4 ^ x8, 12)
> - paddd %xmm12,%xmm8
> - pxor %xmm8,%xmm4
> - movdqa %xmm4,%xmm0
> - pslld $12,%xmm0
> - psrld $20,%xmm4
> - por %xmm0,%xmm4
> - # x9 += x13, x5 = rotl32(x5 ^ x9, 12)
> - paddd %xmm13,%xmm9
> - pxor %xmm9,%xmm5
> - movdqa %xmm5,%xmm0
> - pslld $12,%xmm0
> - psrld $20,%xmm5
> - por %xmm0,%xmm5
> - # x10 += x14, x6 = rotl32(x6 ^ x10, 12)
> - paddd %xmm14,%xmm10
> - pxor %xmm10,%xmm6
> - movdqa %xmm6,%xmm0
> - pslld $12,%xmm0
> - psrld $20,%xmm6
> - por %xmm0,%xmm6
> - # x11 += x15, x7 = rotl32(x7 ^ x11, 12)
> - paddd %xmm15,%xmm11
> - pxor %xmm11,%xmm7
> - movdqa %xmm7,%xmm0
> - pslld $12,%xmm0
> - psrld $20,%xmm7
> - por %xmm0,%xmm7
> -
> - # x0 += x4, x12 = rotl32(x12 ^ x0, 8)
> - movdqa 0x00(%rsp),%xmm0
> - paddd %xmm4,%xmm0
> - movdqa %xmm0,0x00(%rsp)
> - pxor %xmm0,%xmm12
> - pshufb %xmm2,%xmm12
> - # x1 += x5, x13 = rotl32(x13 ^ x1, 8)
> - movdqa 0x10(%rsp),%xmm0
> - paddd %xmm5,%xmm0
> - movdqa %xmm0,0x10(%rsp)
> - pxor %xmm0,%xmm13
> - pshufb %xmm2,%xmm13
> - # x2 += x6, x14 = rotl32(x14 ^ x2, 8)
> - movdqa 0x20(%rsp),%xmm0
> - paddd %xmm6,%xmm0
> - movdqa %xmm0,0x20(%rsp)
> - pxor %xmm0,%xmm14
> - pshufb %xmm2,%xmm14
> - # x3 += x7, x15 = rotl32(x15 ^ x3, 8)
> - movdqa 0x30(%rsp),%xmm0
> - paddd %xmm7,%xmm0
> - movdqa %xmm0,0x30(%rsp)
> - pxor %xmm0,%xmm15
> - pshufb %xmm2,%xmm15
> -
> - # x8 += x12, x4 = rotl32(x4 ^ x8, 7)
> - paddd %xmm12,%xmm8
> - pxor %xmm8,%xmm4
> - movdqa %xmm4,%xmm0
> - pslld $7,%xmm0
> - psrld $25,%xmm4
> - por %xmm0,%xmm4
> - # x9 += x13, x5 = rotl32(x5 ^ x9, 7)
> - paddd %xmm13,%xmm9
> - pxor %xmm9,%xmm5
> - movdqa %xmm5,%xmm0
> - pslld $7,%xmm0
> - psrld $25,%xmm5
> - por %xmm0,%xmm5
> - # x10 += x14, x6 = rotl32(x6 ^ x10, 7)
> - paddd %xmm14,%xmm10
> - pxor %xmm10,%xmm6
> - movdqa %xmm6,%xmm0
> - pslld $7,%xmm0
> - psrld $25,%xmm6
> - por %xmm0,%xmm6
> - # x11 += x15, x7 = rotl32(x7 ^ x11, 7)
> - paddd %xmm15,%xmm11
> - pxor %xmm11,%xmm7
> - movdqa %xmm7,%xmm0
> - pslld $7,%xmm0
> - psrld $25,%xmm7
> - por %xmm0,%xmm7
> -
> - # x0 += x5, x15 = rotl32(x15 ^ x0, 16)
> - movdqa 0x00(%rsp),%xmm0
> - paddd %xmm5,%xmm0
> - movdqa %xmm0,0x00(%rsp)
> - pxor %xmm0,%xmm15
> - pshufb %xmm3,%xmm15
> - # x1 += x6, x12 = rotl32(x12 ^ x1, 16)
> - movdqa 0x10(%rsp),%xmm0
> - paddd %xmm6,%xmm0
> - movdqa %xmm0,0x10(%rsp)
> - pxor %xmm0,%xmm12
> - pshufb %xmm3,%xmm12
> - # x2 += x7, x13 = rotl32(x13 ^ x2, 16)
> - movdqa 0x20(%rsp),%xmm0
> - paddd %xmm7,%xmm0
> - movdqa %xmm0,0x20(%rsp)
> - pxor %xmm0,%xmm13
> - pshufb %xmm3,%xmm13
> - # x3 += x4, x14 = rotl32(x14 ^ x3, 16)
> - movdqa 0x30(%rsp),%xmm0
> - paddd %xmm4,%xmm0
> - movdqa %xmm0,0x30(%rsp)
> - pxor %xmm0,%xmm14
> - pshufb %xmm3,%xmm14
> -
> - # x10 += x15, x5 = rotl32(x5 ^ x10, 12)
> - paddd %xmm15,%xmm10
> - pxor %xmm10,%xmm5
> - movdqa %xmm5,%xmm0
> - pslld $12,%xmm0
> - psrld $20,%xmm5
> - por %xmm0,%xmm5
> - # x11 += x12, x6 = rotl32(x6 ^ x11, 12)
> - paddd %xmm12,%xmm11
> - pxor %xmm11,%xmm6
> - movdqa %xmm6,%xmm0
> - pslld $12,%xmm0
> - psrld $20,%xmm6
> - por %xmm0,%xmm6
> - # x8 += x13, x7 = rotl32(x7 ^ x8, 12)
> - paddd %xmm13,%xmm8
> - pxor %xmm8,%xmm7
> - movdqa %xmm7,%xmm0
> - pslld $12,%xmm0
> - psrld $20,%xmm7
> - por %xmm0,%xmm7
> - # x9 += x14, x4 = rotl32(x4 ^ x9, 12)
> - paddd %xmm14,%xmm9
> - pxor %xmm9,%xmm4
> - movdqa %xmm4,%xmm0
> - pslld $12,%xmm0
> - psrld $20,%xmm4
> - por %xmm0,%xmm4
> -
> - # x0 += x5, x15 = rotl32(x15 ^ x0, 8)
> - movdqa 0x00(%rsp),%xmm0
> - paddd %xmm5,%xmm0
> - movdqa %xmm0,0x00(%rsp)
> - pxor %xmm0,%xmm15
> - pshufb %xmm2,%xmm15
> - # x1 += x6, x12 = rotl32(x12 ^ x1, 8)
> - movdqa 0x10(%rsp),%xmm0
> - paddd %xmm6,%xmm0
> - movdqa %xmm0,0x10(%rsp)
> - pxor %xmm0,%xmm12
> - pshufb %xmm2,%xmm12
> - # x2 += x7, x13 = rotl32(x13 ^ x2, 8)
> - movdqa 0x20(%rsp),%xmm0
> - paddd %xmm7,%xmm0
> - movdqa %xmm0,0x20(%rsp)
> - pxor %xmm0,%xmm13
> - pshufb %xmm2,%xmm13
> - # x3 += x4, x14 = rotl32(x14 ^ x3, 8)
> - movdqa 0x30(%rsp),%xmm0
> - paddd %xmm4,%xmm0
> - movdqa %xmm0,0x30(%rsp)
> - pxor %xmm0,%xmm14
> - pshufb %xmm2,%xmm14
> -
> - # x10 += x15, x5 = rotl32(x5 ^ x10, 7)
> - paddd %xmm15,%xmm10
> - pxor %xmm10,%xmm5
> - movdqa %xmm5,%xmm0
> - pslld $7,%xmm0
> - psrld $25,%xmm5
> - por %xmm0,%xmm5
> - # x11 += x12, x6 = rotl32(x6 ^ x11, 7)
> - paddd %xmm12,%xmm11
> - pxor %xmm11,%xmm6
> - movdqa %xmm6,%xmm0
> - pslld $7,%xmm0
> - psrld $25,%xmm6
> - por %xmm0,%xmm6
> - # x8 += x13, x7 = rotl32(x7 ^ x8, 7)
> - paddd %xmm13,%xmm8
> - pxor %xmm8,%xmm7
> - movdqa %xmm7,%xmm0
> - pslld $7,%xmm0
> - psrld $25,%xmm7
> - por %xmm0,%xmm7
> - # x9 += x14, x4 = rotl32(x4 ^ x9, 7)
> - paddd %xmm14,%xmm9
> - pxor %xmm9,%xmm4
> - movdqa %xmm4,%xmm0
> - pslld $7,%xmm0
> - psrld $25,%xmm4
> - por %xmm0,%xmm4
> -
> - dec %ecx
> - jnz .Ldoubleround4
> -
> - # x0[0-3] += s0[0]
> - # x1[0-3] += s0[1]
> - movq 0x00(%rdi),%xmm3
> - pshufd $0x00,%xmm3,%xmm2
> - pshufd $0x55,%xmm3,%xmm3
> - paddd 0x00(%rsp),%xmm2
> - movdqa %xmm2,0x00(%rsp)
> - paddd 0x10(%rsp),%xmm3
> - movdqa %xmm3,0x10(%rsp)
> - # x2[0-3] += s0[2]
> - # x3[0-3] += s0[3]
> - movq 0x08(%rdi),%xmm3
> - pshufd $0x00,%xmm3,%xmm2
> - pshufd $0x55,%xmm3,%xmm3
> - paddd 0x20(%rsp),%xmm2
> - movdqa %xmm2,0x20(%rsp)
> - paddd 0x30(%rsp),%xmm3
> - movdqa %xmm3,0x30(%rsp)
> -
> - # x4[0-3] += s1[0]
> - # x5[0-3] += s1[1]
> - movq 0x10(%rdi),%xmm3
> - pshufd $0x00,%xmm3,%xmm2
> - pshufd $0x55,%xmm3,%xmm3
> - paddd %xmm2,%xmm4
> - paddd %xmm3,%xmm5
> - # x6[0-3] += s1[2]
> - # x7[0-3] += s1[3]
> - movq 0x18(%rdi),%xmm3
> - pshufd $0x00,%xmm3,%xmm2
> - pshufd $0x55,%xmm3,%xmm3
> - paddd %xmm2,%xmm6
> - paddd %xmm3,%xmm7
> -
> - # x8[0-3] += s2[0]
> - # x9[0-3] += s2[1]
> - movq 0x20(%rdi),%xmm3
> - pshufd $0x00,%xmm3,%xmm2
> - pshufd $0x55,%xmm3,%xmm3
> - paddd %xmm2,%xmm8
> - paddd %xmm3,%xmm9
> - # x10[0-3] += s2[2]
> - # x11[0-3] += s2[3]
> - movq 0x28(%rdi),%xmm3
> - pshufd $0x00,%xmm3,%xmm2
> - pshufd $0x55,%xmm3,%xmm3
> - paddd %xmm2,%xmm10
> - paddd %xmm3,%xmm11
> -
> - # x12[0-3] += s3[0]
> - # x13[0-3] += s3[1]
> - movq 0x30(%rdi),%xmm3
> - pshufd $0x00,%xmm3,%xmm2
> - pshufd $0x55,%xmm3,%xmm3
> - paddd %xmm2,%xmm12
> - paddd %xmm3,%xmm13
> - # x14[0-3] += s3[2]
> - # x15[0-3] += s3[3]
> - movq 0x38(%rdi),%xmm3
> - pshufd $0x00,%xmm3,%xmm2
> - pshufd $0x55,%xmm3,%xmm3
> - paddd %xmm2,%xmm14
> - paddd %xmm3,%xmm15
> -
> - # x12 += counter values 0-3
> - paddd %xmm1,%xmm12
> -
> - # interleave 32-bit words in state n, n+1
> - movdqa 0x00(%rsp),%xmm0
> - movdqa 0x10(%rsp),%xmm1
> - movdqa %xmm0,%xmm2
> - punpckldq %xmm1,%xmm2
> - punpckhdq %xmm1,%xmm0
> - movdqa %xmm2,0x00(%rsp)
> - movdqa %xmm0,0x10(%rsp)
> - movdqa 0x20(%rsp),%xmm0
> - movdqa 0x30(%rsp),%xmm1
> - movdqa %xmm0,%xmm2
> - punpckldq %xmm1,%xmm2
> - punpckhdq %xmm1,%xmm0
> - movdqa %xmm2,0x20(%rsp)
> - movdqa %xmm0,0x30(%rsp)
> - movdqa %xmm4,%xmm0
> - punpckldq %xmm5,%xmm4
> - punpckhdq %xmm5,%xmm0
> - movdqa %xmm0,%xmm5
> - movdqa %xmm6,%xmm0
> - punpckldq %xmm7,%xmm6
> - punpckhdq %xmm7,%xmm0
> - movdqa %xmm0,%xmm7
> - movdqa %xmm8,%xmm0
> - punpckldq %xmm9,%xmm8
> - punpckhdq %xmm9,%xmm0
> - movdqa %xmm0,%xmm9
> - movdqa %xmm10,%xmm0
> - punpckldq %xmm11,%xmm10
> - punpckhdq %xmm11,%xmm0
> - movdqa %xmm0,%xmm11
> - movdqa %xmm12,%xmm0
> - punpckldq %xmm13,%xmm12
> - punpckhdq %xmm13,%xmm0
> - movdqa %xmm0,%xmm13
> - movdqa %xmm14,%xmm0
> - punpckldq %xmm15,%xmm14
> - punpckhdq %xmm15,%xmm0
> - movdqa %xmm0,%xmm15
> -
> - # interleave 64-bit words in state n, n+2
> - movdqa 0x00(%rsp),%xmm0
> - movdqa 0x20(%rsp),%xmm1
> - movdqa %xmm0,%xmm2
> - punpcklqdq %xmm1,%xmm2
> - punpckhqdq %xmm1,%xmm0
> - movdqa %xmm2,0x00(%rsp)
> - movdqa %xmm0,0x20(%rsp)
> - movdqa 0x10(%rsp),%xmm0
> - movdqa 0x30(%rsp),%xmm1
> - movdqa %xmm0,%xmm2
> - punpcklqdq %xmm1,%xmm2
> - punpckhqdq %xmm1,%xmm0
> - movdqa %xmm2,0x10(%rsp)
> - movdqa %xmm0,0x30(%rsp)
> - movdqa %xmm4,%xmm0
> - punpcklqdq %xmm6,%xmm4
> - punpckhqdq %xmm6,%xmm0
> - movdqa %xmm0,%xmm6
> - movdqa %xmm5,%xmm0
> - punpcklqdq %xmm7,%xmm5
> - punpckhqdq %xmm7,%xmm0
> - movdqa %xmm0,%xmm7
> - movdqa %xmm8,%xmm0
> - punpcklqdq %xmm10,%xmm8
> - punpckhqdq %xmm10,%xmm0
> - movdqa %xmm0,%xmm10
> - movdqa %xmm9,%xmm0
> - punpcklqdq %xmm11,%xmm9
> - punpckhqdq %xmm11,%xmm0
> - movdqa %xmm0,%xmm11
> - movdqa %xmm12,%xmm0
> - punpcklqdq %xmm14,%xmm12
> - punpckhqdq %xmm14,%xmm0
> - movdqa %xmm0,%xmm14
> - movdqa %xmm13,%xmm0
> - punpcklqdq %xmm15,%xmm13
> - punpckhqdq %xmm15,%xmm0
> - movdqa %xmm0,%xmm15
> -
> - # xor with corresponding input, write to output
> - movdqa 0x00(%rsp),%xmm0
> - movdqu 0x00(%rdx),%xmm1
> - pxor %xmm1,%xmm0
> - movdqu %xmm0,0x00(%rsi)
> - movdqa 0x10(%rsp),%xmm0
> - movdqu 0x80(%rdx),%xmm1
> - pxor %xmm1,%xmm0
> - movdqu %xmm0,0x80(%rsi)
> - movdqa 0x20(%rsp),%xmm0
> - movdqu 0x40(%rdx),%xmm1
> - pxor %xmm1,%xmm0
> - movdqu %xmm0,0x40(%rsi)
> - movdqa 0x30(%rsp),%xmm0
> - movdqu 0xc0(%rdx),%xmm1
> - pxor %xmm1,%xmm0
> - movdqu %xmm0,0xc0(%rsi)
> - movdqu 0x10(%rdx),%xmm1
> - pxor %xmm1,%xmm4
> - movdqu %xmm4,0x10(%rsi)
> - movdqu 0x90(%rdx),%xmm1
> - pxor %xmm1,%xmm5
> - movdqu %xmm5,0x90(%rsi)
> - movdqu 0x50(%rdx),%xmm1
> - pxor %xmm1,%xmm6
> - movdqu %xmm6,0x50(%rsi)
> - movdqu 0xd0(%rdx),%xmm1
> - pxor %xmm1,%xmm7
> - movdqu %xmm7,0xd0(%rsi)
> - movdqu 0x20(%rdx),%xmm1
> - pxor %xmm1,%xmm8
> - movdqu %xmm8,0x20(%rsi)
> - movdqu 0xa0(%rdx),%xmm1
> - pxor %xmm1,%xmm9
> - movdqu %xmm9,0xa0(%rsi)
> - movdqu 0x60(%rdx),%xmm1
> - pxor %xmm1,%xmm10
> - movdqu %xmm10,0x60(%rsi)
> - movdqu 0xe0(%rdx),%xmm1
> - pxor %xmm1,%xmm11
> - movdqu %xmm11,0xe0(%rsi)
> - movdqu 0x30(%rdx),%xmm1
> - pxor %xmm1,%xmm12
> - movdqu %xmm12,0x30(%rsi)
> - movdqu 0xb0(%rdx),%xmm1
> - pxor %xmm1,%xmm13
> - movdqu %xmm13,0xb0(%rsi)
> - movdqu 0x70(%rdx),%xmm1
> - pxor %xmm1,%xmm14
> - movdqu %xmm14,0x70(%rsi)
> - movdqu 0xf0(%rdx),%xmm1
> - pxor %xmm1,%xmm15
> - movdqu %xmm15,0xf0(%rsi)
> -
> - lea -8(%r10),%rsp
> - ret
> -ENDPROC(chacha20_4block_xor_ssse3)
> diff --git a/arch/x86/crypto/chacha20_glue.c b/arch/x86/crypto/chacha20_glue.c
> deleted file mode 100644
> index dce7c5d39c2f..000000000000
> --- a/arch/x86/crypto/chacha20_glue.c
> +++ /dev/null
> @@ -1,146 +0,0 @@
> -/*
> - * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code
> - *
> - * Copyright (C) 2015 Martin Willi
> - *
> - * This program is free software; you can redistribute it and/or modify
> - * it under the terms of the GNU General Public License as published by
> - * the Free Software Foundation; either version 2 of the License, or
> - * (at your option) any later version.
> - */
> -
> -#include <crypto/algapi.h>
> -#include <crypto/chacha20.h>
> -#include <crypto/internal/skcipher.h>
> -#include <linux/kernel.h>
> -#include <linux/module.h>
> -#include <asm/fpu/api.h>
> -#include <asm/simd.h>
> -
> -#define CHACHA20_STATE_ALIGN 16
> -
> -asmlinkage void chacha20_block_xor_ssse3(u32 *state, u8 *dst, const u8 *src);
> -asmlinkage void chacha20_4block_xor_ssse3(u32 *state, u8 *dst, const u8 *src);
> -#ifdef CONFIG_AS_AVX2
> -asmlinkage void chacha20_8block_xor_avx2(u32 *state, u8 *dst, const u8 *src);
> -static bool chacha20_use_avx2;
> -#endif
> -
> -static void chacha20_dosimd(u32 *state, u8 *dst, const u8 *src,
> - unsigned int bytes)
> -{
> - u8 buf[CHACHA20_BLOCK_SIZE];
> -
> -#ifdef CONFIG_AS_AVX2
> - if (chacha20_use_avx2) {
> - while (bytes >= CHACHA20_BLOCK_SIZE * 8) {
> - chacha20_8block_xor_avx2(state, dst, src);
> - bytes -= CHACHA20_BLOCK_SIZE * 8;
> - src += CHACHA20_BLOCK_SIZE * 8;
> - dst += CHACHA20_BLOCK_SIZE * 8;
> - state[12] += 8;
> - }
> - }
> -#endif
> - while (bytes >= CHACHA20_BLOCK_SIZE * 4) {
> - chacha20_4block_xor_ssse3(state, dst, src);
> - bytes -= CHACHA20_BLOCK_SIZE * 4;
> - src += CHACHA20_BLOCK_SIZE * 4;
> - dst += CHACHA20_BLOCK_SIZE * 4;
> - state[12] += 4;
> - }
> - while (bytes >= CHACHA20_BLOCK_SIZE) {
> - chacha20_block_xor_ssse3(state, dst, src);
> - bytes -= CHACHA20_BLOCK_SIZE;
> - src += CHACHA20_BLOCK_SIZE;
> - dst += CHACHA20_BLOCK_SIZE;
> - state[12]++;
> - }
> - if (bytes) {
> - memcpy(buf, src, bytes);
> - chacha20_block_xor_ssse3(state, buf, buf);
> - memcpy(dst, buf, bytes);
> - }
> -}
> -
> -static int chacha20_simd(struct skcipher_request *req)
> -{
> - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
> - struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm);
> - u32 *state, state_buf[16 + 2] __aligned(8);
> - struct skcipher_walk walk;
> - int err;
> -
> - BUILD_BUG_ON(CHACHA20_STATE_ALIGN != 16);
> - state = PTR_ALIGN(state_buf + 0, CHACHA20_STATE_ALIGN);
> -
> - if (req->cryptlen <= CHACHA20_BLOCK_SIZE || !may_use_simd())
> - return crypto_chacha20_crypt(req);
> -
> - err = skcipher_walk_virt(&walk, req, true);
> -
> - crypto_chacha20_init(state, ctx, walk.iv);
> -
> - kernel_fpu_begin();
> -
> - while (walk.nbytes >= CHACHA20_BLOCK_SIZE) {
> - chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr,
> - rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE));
> - err = skcipher_walk_done(&walk,
> - walk.nbytes % CHACHA20_BLOCK_SIZE);
> - }
> -
> - if (walk.nbytes) {
> - chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr,
> - walk.nbytes);
> - err = skcipher_walk_done(&walk, 0);
> - }
> -
> - kernel_fpu_end();
> -
> - return err;
> -}
> -
> -static struct skcipher_alg alg = {
> - .base.cra_name = "chacha20",
> - .base.cra_driver_name = "chacha20-simd",
> - .base.cra_priority = 300,
> - .base.cra_blocksize = 1,
> - .base.cra_ctxsize = sizeof(struct chacha20_ctx),
> - .base.cra_module = THIS_MODULE,
> -
> - .min_keysize = CHACHA20_KEY_SIZE,
> - .max_keysize = CHACHA20_KEY_SIZE,
> - .ivsize = CHACHA20_IV_SIZE,
> - .chunksize = CHACHA20_BLOCK_SIZE,
> - .setkey = crypto_chacha20_setkey,
> - .encrypt = chacha20_simd,
> - .decrypt = chacha20_simd,
> -};
> -
> -static int __init chacha20_simd_mod_init(void)
> -{
> - if (!boot_cpu_has(X86_FEATURE_SSSE3))
> - return -ENODEV;
> -
> -#ifdef CONFIG_AS_AVX2
> - chacha20_use_avx2 = boot_cpu_has(X86_FEATURE_AVX) &&
> - boot_cpu_has(X86_FEATURE_AVX2) &&
> - cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
> -#endif
> - return crypto_register_skcipher(&alg);
> -}
> -
> -static void __exit chacha20_simd_mod_fini(void)
> -{
> - crypto_unregister_skcipher(&alg);
> -}
> -
> -module_init(chacha20_simd_mod_init);
> -module_exit(chacha20_simd_mod_fini);
> -
> -MODULE_LICENSE("GPL");
> -MODULE_AUTHOR("Martin Willi <martin@strongswan.org>");
> -MODULE_DESCRIPTION("chacha20 cipher algorithm, SIMD accelerated");
> -MODULE_ALIAS_CRYPTO("chacha20");
> -MODULE_ALIAS_CRYPTO("chacha20-simd");
> diff --git a/crypto/Kconfig b/crypto/Kconfig
> index 47859a0f8052..93cd4d199447 100644
> --- a/crypto/Kconfig
> +++ b/crypto/Kconfig
> @@ -1433,22 +1433,6 @@ config CRYPTO_CHACHA20
>
> ChaCha20 is a 256-bit high-speed stream cipher designed by Daniel J.
> Bernstein and further specified in RFC7539 for use in IETF protocols.
> - This is the portable C implementation of ChaCha20.
> -
> - See also:
> - <http://cr.yp.to/chacha/chacha-20080128.pdf>
> -
> -config CRYPTO_CHACHA20_X86_64
> - tristate "ChaCha20 cipher algorithm (x86_64/SSSE3/AVX2)"
> - depends on X86 && 64BIT
> - select CRYPTO_BLKCIPHER
> - select CRYPTO_CHACHA20
> - help
> - ChaCha20 cipher algorithm, RFC7539.
> -
> - ChaCha20 is a 256-bit high-speed stream cipher designed by Daniel J.
> - Bernstein and further specified in RFC7539 for use in IETF protocols.
> - This is the x86_64 assembler implementation using SIMD instructions.
>
> See also:
> <http://cr.yp.to/chacha/chacha-20080128.pdf>
> diff --git a/crypto/Makefile b/crypto/Makefile
> index 5e60348d02e2..587103b87890 100644
> --- a/crypto/Makefile
> +++ b/crypto/Makefile
> @@ -117,7 +117,7 @@ obj-$(CONFIG_CRYPTO_ANUBIS) += anubis.o
> obj-$(CONFIG_CRYPTO_SEED) += seed.o
> obj-$(CONFIG_CRYPTO_SPECK) += speck.o
> obj-$(CONFIG_CRYPTO_SALSA20) += salsa20_generic.o
> -obj-$(CONFIG_CRYPTO_CHACHA20) += chacha20_generic.o
> +obj-$(CONFIG_CRYPTO_CHACHA20) += chacha20_zinc.o
> obj-$(CONFIG_CRYPTO_POLY1305) += poly1305_zinc.o
> obj-$(CONFIG_CRYPTO_DEFLATE) += deflate.o
> obj-$(CONFIG_CRYPTO_MICHAEL_MIC) += michael_mic.o
> diff --git a/crypto/chacha20_generic.c b/crypto/chacha20_generic.c
> deleted file mode 100644
> index e451c3cb6a56..000000000000
> --- a/crypto/chacha20_generic.c
> +++ /dev/null
> @@ -1,136 +0,0 @@
> -/*
> - * ChaCha20 256-bit cipher algorithm, RFC7539
> - *
> - * Copyright (C) 2015 Martin Willi
> - *
> - * This program is free software; you can redistribute it and/or modify
> - * it under the terms of the GNU General Public License as published by
> - * the Free Software Foundation; either version 2 of the License, or
> - * (at your option) any later version.
> - */
> -
> -#include <asm/unaligned.h>
> -#include <crypto/algapi.h>
> -#include <crypto/chacha20.h>
> -#include <crypto/internal/skcipher.h>
> -#include <linux/module.h>
> -
> -static void chacha20_docrypt(u32 *state, u8 *dst, const u8 *src,
> - unsigned int bytes)
> -{
> - u32 stream[CHACHA20_BLOCK_WORDS];
> -
> - if (dst != src)
> - memcpy(dst, src, bytes);
> -
> - while (bytes >= CHACHA20_BLOCK_SIZE) {
> - chacha20_block(state, stream);
> - crypto_xor(dst, (const u8 *)stream, CHACHA20_BLOCK_SIZE);
> - bytes -= CHACHA20_BLOCK_SIZE;
> - dst += CHACHA20_BLOCK_SIZE;
> - }
> - if (bytes) {
> - chacha20_block(state, stream);
> - crypto_xor(dst, (const u8 *)stream, bytes);
> - }
> -}
> -
> -void crypto_chacha20_init(u32 *state, struct chacha20_ctx *ctx, u8 *iv)
> -{
> - state[0] = 0x61707865; /* "expa" */
> - state[1] = 0x3320646e; /* "nd 3" */
> - state[2] = 0x79622d32; /* "2-by" */
> - state[3] = 0x6b206574; /* "te k" */
> - state[4] = ctx->key[0];
> - state[5] = ctx->key[1];
> - state[6] = ctx->key[2];
> - state[7] = ctx->key[3];
> - state[8] = ctx->key[4];
> - state[9] = ctx->key[5];
> - state[10] = ctx->key[6];
> - state[11] = ctx->key[7];
> - state[12] = get_unaligned_le32(iv + 0);
> - state[13] = get_unaligned_le32(iv + 4);
> - state[14] = get_unaligned_le32(iv + 8);
> - state[15] = get_unaligned_le32(iv + 12);
> -}
> -EXPORT_SYMBOL_GPL(crypto_chacha20_init);
> -
> -int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key,
> - unsigned int keysize)
> -{
> - struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm);
> - int i;
> -
> - if (keysize != CHACHA20_KEY_SIZE)
> - return -EINVAL;
> -
> - for (i = 0; i < ARRAY_SIZE(ctx->key); i++)
> - ctx->key[i] = get_unaligned_le32(key + i * sizeof(u32));
> -
> - return 0;
> -}
> -EXPORT_SYMBOL_GPL(crypto_chacha20_setkey);
> -
> -int crypto_chacha20_crypt(struct skcipher_request *req)
> -{
> - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
> - struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm);
> - struct skcipher_walk walk;
> - u32 state[16];
> - int err;
> -
> - err = skcipher_walk_virt(&walk, req, true);
> -
> - crypto_chacha20_init(state, ctx, walk.iv);
> -
> - while (walk.nbytes > 0) {
> - unsigned int nbytes = walk.nbytes;
> -
> - if (nbytes < walk.total)
> - nbytes = round_down(nbytes, walk.stride);
> -
> - chacha20_docrypt(state, walk.dst.virt.addr, walk.src.virt.addr,
> - nbytes);
> - err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
> - }
> -
> - return err;
> -}
> -EXPORT_SYMBOL_GPL(crypto_chacha20_crypt);
> -
> -static struct skcipher_alg alg = {
> - .base.cra_name = "chacha20",
> - .base.cra_driver_name = "chacha20-generic",
> - .base.cra_priority = 100,
> - .base.cra_blocksize = 1,
> - .base.cra_ctxsize = sizeof(struct chacha20_ctx),
> - .base.cra_module = THIS_MODULE,
> -
> - .min_keysize = CHACHA20_KEY_SIZE,
> - .max_keysize = CHACHA20_KEY_SIZE,
> - .ivsize = CHACHA20_IV_SIZE,
> - .chunksize = CHACHA20_BLOCK_SIZE,
> - .setkey = crypto_chacha20_setkey,
> - .encrypt = crypto_chacha20_crypt,
> - .decrypt = crypto_chacha20_crypt,
> -};
> -
> -static int __init chacha20_generic_mod_init(void)
> -{
> - return crypto_register_skcipher(&alg);
> -}
> -
> -static void __exit chacha20_generic_mod_fini(void)
> -{
> - crypto_unregister_skcipher(&alg);
> -}
> -
> -module_init(chacha20_generic_mod_init);
> -module_exit(chacha20_generic_mod_fini);
> -
> -MODULE_LICENSE("GPL");
> -MODULE_AUTHOR("Martin Willi <martin@strongswan.org>");
> -MODULE_DESCRIPTION("chacha20 cipher algorithm");
> -MODULE_ALIAS_CRYPTO("chacha20");
> -MODULE_ALIAS_CRYPTO("chacha20-generic");
> diff --git a/crypto/chacha20_zinc.c b/crypto/chacha20_zinc.c
> new file mode 100644
> index 000000000000..5df88fdee066
> --- /dev/null
> +++ b/crypto/chacha20_zinc.c
> @@ -0,0 +1,100 @@
> +/* SPDX-License-Identifier: GPL-2.0
> + *
> + * Copyright (C) 2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
> + */
> +
> +#include <asm/unaligned.h>
> +#include <crypto/algapi.h>
> +#include <crypto/internal/skcipher.h>
> +#include <zinc/chacha20.h>
> +#include <linux/module.h>
> +
> +struct chacha20_key_ctx {
> + u32 key[8];
> +};
> +
> +static int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key,
> + unsigned int keysize)
> +{
> + struct chacha20_key_ctx *key_ctx = crypto_skcipher_ctx(tfm);
> + int i;
> +
> + if (keysize != CHACHA20_KEY_SIZE)
> + return -EINVAL;
> +
> + for (i = 0; i < ARRAY_SIZE(key_ctx->key); ++i)
> + key_ctx->key[i] = get_unaligned_le32(key + i * sizeof(u32));
> +
> + return 0;
> +}
> +
> +static int crypto_chacha20_crypt(struct skcipher_request *req)
> +{
> + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
> + struct chacha20_key_ctx *key_ctx = crypto_skcipher_ctx(tfm);
> + struct chacha20_ctx ctx;
> + struct skcipher_walk walk;
> + simd_context_t simd_context;
> + int err, i;
> +
> + err = skcipher_walk_virt(&walk, req, true);
> + if (unlikely(err))
> + return err;
> +
> + memcpy(ctx.key, key_ctx->key, sizeof(ctx.key));
> + for (i = 0; i < ARRAY_SIZE(ctx.counter); ++i)
> + ctx.counter[i] = get_unaligned_le32(walk.iv + i * sizeof(u32));
> +
> + simd_context = simd_get();
> + while (walk.nbytes > 0) {
> + unsigned int nbytes = walk.nbytes;
> +
> + if (nbytes < walk.total)
> + nbytes = round_down(nbytes, walk.stride);
> +
> + chacha20(&ctx, walk.dst.virt.addr, walk.src.virt.addr, nbytes,
> + simd_context);
> +
> + err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
> + simd_context = simd_relax(simd_context);
> + }
> + simd_put(simd_context);
> +
> + return err;
> +}
> +
> +static struct skcipher_alg alg = {
> + .base.cra_name = "chacha20",
> + .base.cra_driver_name = "chacha20-software",
> + .base.cra_priority = 100,
> + .base.cra_blocksize = 1,
> + .base.cra_ctxsize = sizeof(struct chacha20_key_ctx),
> + .base.cra_module = THIS_MODULE,
> +
> + .min_keysize = CHACHA20_KEY_SIZE,
> + .max_keysize = CHACHA20_KEY_SIZE,
> + .ivsize = CHACHA20_IV_SIZE,
> + .chunksize = CHACHA20_BLOCK_SIZE,
> + .setkey = crypto_chacha20_setkey,
> + .encrypt = crypto_chacha20_crypt,
> + .decrypt = crypto_chacha20_crypt,
> +};
> +
> +static int __init chacha20_mod_init(void)
> +{
> + return crypto_register_skcipher(&alg);
> +}
> +
> +static void __exit chacha20_mod_exit(void)
> +{
> + crypto_unregister_skcipher(&alg);
> +}
> +
> +module_init(chacha20_mod_init);
> +module_exit(chacha20_mod_exit);
> +
> +MODULE_LICENSE("GPL");
> +MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");
> +MODULE_DESCRIPTION("ChaCha20 stream cipher");
> +MODULE_ALIAS_CRYPTO("chacha20");
> +MODULE_ALIAS_CRYPTO("chacha20-software");
> diff --git a/crypto/chacha20poly1305.c b/crypto/chacha20poly1305.c
> index bf523797bef3..b26adb9ed898 100644
> --- a/crypto/chacha20poly1305.c
> +++ b/crypto/chacha20poly1305.c
> @@ -13,7 +13,7 @@
> #include <crypto/internal/hash.h>
> #include <crypto/internal/skcipher.h>
> #include <crypto/scatterwalk.h>
> -#include <crypto/chacha20.h>
> +#include <zinc/chacha20.h>
> #include <zinc/poly1305.h>
> #include <linux/err.h>
> #include <linux/init.h>
> diff --git a/include/crypto/chacha20.h b/include/crypto/chacha20.h
> index b83d66073db0..3b92f58f3891 100644
> --- a/include/crypto/chacha20.h
> +++ b/include/crypto/chacha20.h
> @@ -6,23 +6,11 @@
> #ifndef _CRYPTO_CHACHA20_H
> #define _CRYPTO_CHACHA20_H
>
> -#include <crypto/skcipher.h>
> -#include <linux/types.h>
> -#include <linux/crypto.h>
> -
> #define CHACHA20_IV_SIZE 16
> #define CHACHA20_KEY_SIZE 32
> #define CHACHA20_BLOCK_SIZE 64
> #define CHACHA20_BLOCK_WORDS (CHACHA20_BLOCK_SIZE / sizeof(u32))
>
> -struct chacha20_ctx {
> - u32 key[8];
> -};
> -
> void chacha20_block(u32 *state, u32 *stream);
> -void crypto_chacha20_init(u32 *state, struct chacha20_ctx *ctx, u8 *iv);
> -int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key,
> - unsigned int keysize);
> -int crypto_chacha20_crypt(struct skcipher_request *req);
>
> #endif
> --
> 2.19.0
>
^ permalink raw reply
* Re: [PATCH iproute2] libnetlink: fix leak and using unused memory on error
From: Mahesh Bandewar (महेश बंडेवार) @ 2018-09-14 17:48 UTC (permalink / raw)
To: Stephen Hemminger; +Cc: Mahesh Bandewar, linux-netdev
In-Reply-To: <20180913193338.20233-1-stephen@networkplumber.org>
On Thu, Sep 13, 2018 at 12:33 PM, Stephen Hemminger
<stephen@networkplumber.org> wrote:
> If an error happens in multi-segment message (tc only)
> then report the error and stop processing further responses.
> This also fixes refering to the buffer after free.
>
> The sequence check is not necessary here because the
> response message has already been validated to be in
> the window of the sequence number of the iov.
>
> Reported-by: Mahesh Bandewar <mahesh@bandewar.net>
> Fixes: 7b2ee50c0cd5 ("hv_netvsc: common detach logic")
> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Acked-by: Mahesh Bandewar <maheshb@google.com>
> ---
> lib/libnetlink.c | 23 +++++++++--------------
> 1 file changed, 9 insertions(+), 14 deletions(-)
>
> diff --git a/lib/libnetlink.c b/lib/libnetlink.c
> index 928de1dd16d8..586809292594 100644
> --- a/lib/libnetlink.c
> +++ b/lib/libnetlink.c
> @@ -617,7 +617,6 @@ static int __rtnl_talk_iov(struct rtnl_handle *rtnl, struct iovec *iov,
> msg.msg_iovlen = 1;
> i = 0;
> while (1) {
> -next:
> status = rtnl_recvmsg(rtnl->fd, &msg, &buf);
> ++i;
>
> @@ -660,27 +659,23 @@ next:
>
> if (l < sizeof(struct nlmsgerr)) {
> fprintf(stderr, "ERROR truncated\n");
> - } else if (!err->error) {
> + free(buf);
> + return -1;
> + }
> +
> + if (!err->error)
> /* check messages from kernel */
> nl_dump_ext_ack(h, errfn);
>
> - if (answer)
> - *answer = (struct nlmsghdr *)buf;
> - else
> - free(buf);
> - if (h->nlmsg_seq == seq)
> - return 0;
> - else if (i < iovlen)
> - goto next;
> - return 0;
> - }
> -
> if (rtnl->proto != NETLINK_SOCK_DIAG &&
> show_rtnl_err)
> rtnl_talk_error(h, err, errfn);
>
> errno = -err->error;
> - free(buf);
> + if (answer)
> + *answer = (struct nlmsghdr *)buf;
> + else
> + free(buf);
> return -i;
> }
>
> --
> 2.18.0
>
^ permalink raw reply
* Re: [PATCH] net: caif: remove redundant null check on frontpkt
From: Sergei Shtylyov @ 2018-09-14 17:54 UTC (permalink / raw)
To: Colin King, Dmitry Tarnyagin, David S . Miller; +Cc: kernel-janitors, netdev
In-Reply-To: <20180914171916.21298-1-colin.king@canonical.com>
Hello!
On 09/14/2018 08:19 PM, Colin King wrote:
> From: Colin Ian King <colin.king@canonical.com>
>
> It is impossible for frontpkt to be null at the point of the null
> check because it has been assigned from rearpkt and there is no
> way realpkt can be null at the point of the assignment because
rearpkt?
> of the sanity checking and exit paths taken previously. Remove
> the redundant null check.
>
> Detected by CoverityScan, CID#114434 ("Logically dead code")
>
> Signed-off-by: Colin Ian King <colin.king@canonical.com>
[...]
MBR, Sergei
^ permalink raw reply
* [PATCH net-next RFC 0/8] udp and configurable gro
From: Willem de Bruijn @ 2018-09-14 17:59 UTC (permalink / raw)
To: netdev; +Cc: pabeni, steffen.klassert, davem, Willem de Bruijn
From: Willem de Bruijn <willemb@google.com>
This is a *very rough* draft. Mainly for discussion while we also
look at another partially overlapping approach [1].
Reduce UDP receive cost for bulk traffic by enabling datagram
coalescing with GRO.
Before adding more GRO callbacks, make GRO configurable by the
administrator to optionally reduce the attack surface of this
early receive path. See also [2].
Introduce sysctls net.(core|ipv4|ipv6).gro that expose the table of
protocols for which GRO is support. Allow the administrator to disable
individual entries in the table.
To have a single infrastructure, convert dev_offloads to the
table-based approach to existing inet(6)_offloads. Additional small
benefit is that ipv6 will no longer take two list lookups to find.
Patch 1 converts dev_offloads to the infra of inet(6)_offloads
Patch 2 deduplicates gro_complete logic now that all share infra
Patch 3 does the same for gro_receive, in anticipation of adding
a branch to check whether gro_receive is enabled
Patch 4 harmonizes ipv6 header opts, so that those, too can be
optionally disabled.
Patch 5 makes inet(6)_offloads non-const to allow disabling a flag
Patch 6 introduces the administrative sysctl
Patch 7 avoids udp gro cost if no udp gro callback is register
Patch 8 introduces udp gro
[1] http://patchwork.ozlabs.org/project/netdev/list/?series=65741
[2] http://vger.kernel.org/netconf2017_files/rx_hardening_and_udp_gso.pdf
Willem de Bruijn (8):
gro: convert device offloads to net_offload
gro: deduplicate gro_complete
gro: add net_gro_receive
ipv6: remove offload exception for hopopts
net: deconstify net_offload
net: make gro configurable
udp: gro behind static key
udp: add gro
drivers/net/geneve.c | 11 +---
drivers/net/vxlan.c | 8 +++
include/linux/netdevice.h | 64 +++++++++++++++++++--
include/net/protocol.h | 19 ++-----
include/net/udp.h | 2 +
include/uapi/linux/udp.h | 1 +
net/8021q/vlan.c | 12 +---
net/core/dev.c | 112 ++++++++-----------------------------
net/core/sysctl_net_core.c | 60 ++++++++++++++++++++
net/ethernet/eth.c | 13 +----
net/ipv4/af_inet.c | 21 ++-----
net/ipv4/esp4_offload.c | 2 +-
net/ipv4/fou.c | 41 ++++----------
net/ipv4/gre_offload.c | 26 ++++-----
net/ipv4/protocol.c | 10 ++--
net/ipv4/sysctl_net_ipv4.c | 7 +++
net/ipv4/tcp_offload.c | 2 +-
net/ipv4/udp.c | 73 +++++++++++++++++++++++-
net/ipv4/udp_offload.c | 19 +++----
net/ipv6/esp6_offload.c | 2 +-
net/ipv6/exthdrs_offload.c | 17 +++++-
net/ipv6/ip6_offload.c | 69 +++++++++--------------
net/ipv6/protocol.c | 10 ++--
net/ipv6/sysctl_net_ipv6.c | 8 +++
net/ipv6/tcpv6_offload.c | 2 +-
net/ipv6/udp.c | 2 +-
net/ipv6/udp_offload.c | 4 +-
net/sctp/offload.c | 2 +-
28 files changed, 344 insertions(+), 275 deletions(-)
--
2.19.0.397.gdd90340f6a-goog
^ permalink raw reply
* [PATCH net-next RFC 1/8] gro: convert device offloads to net_offload
From: Willem de Bruijn @ 2018-09-14 17:59 UTC (permalink / raw)
To: netdev; +Cc: pabeni, steffen.klassert, davem, Willem de Bruijn
In-Reply-To: <20180914175941.213950-1-willemdebruijn.kernel@gmail.com>
From: Willem de Bruijn <willemb@google.com>
In preparation of making GRO receive configurable, have all offloads
share the same infrastructure.
Signed-off-by: Willem de Bruijn <willemb@google.com>
---
include/linux/netdevice.h | 17 +++++-
include/net/protocol.h | 7 ---
net/core/dev.c | 105 +++++++++++++-------------------------
3 files changed, 51 insertions(+), 78 deletions(-)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index e2b3bd750c98..7425068fa249 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2366,13 +2366,18 @@ struct offload_callbacks {
int (*gro_complete)(struct sk_buff *skb, int nhoff);
};
-struct packet_offload {
+struct net_offload {
__be16 type; /* This is really htons(ether_type). */
u16 priority;
struct offload_callbacks callbacks;
- struct list_head list;
+ unsigned int flags; /* Flags used by IPv6 for now */
};
+#define packet_offload net_offload
+
+/* This should be set for any extension header which is compatible with GSO. */
+#define INET6_PROTO_GSO_EXTHDR 0x1
+
/* often modified stats are per-CPU, other are shared (netdev->stats) */
struct pcpu_sw_netstats {
u64 rx_packets;
@@ -3554,6 +3559,14 @@ gro_result_t napi_gro_frags(struct napi_struct *napi);
struct packet_offload *gro_find_receive_by_type(__be16 type);
struct packet_offload *gro_find_complete_by_type(__be16 type);
+static inline u8 net_offload_from_type(u16 type)
+{
+ /* Do not bother handling collisions. There are none.
+ * If they do occur with new offloads, add a mapping function here.
+ */
+ return type & 0xFF;
+}
+
static inline void napi_free_frags(struct napi_struct *napi)
{
kfree_skb(napi->skb);
diff --git a/include/net/protocol.h b/include/net/protocol.h
index 4fc75f7ae23b..53a0322ee545 100644
--- a/include/net/protocol.h
+++ b/include/net/protocol.h
@@ -69,13 +69,6 @@ struct inet6_protocol {
#define INET6_PROTO_FINAL 0x2
#endif
-struct net_offload {
- struct offload_callbacks callbacks;
- unsigned int flags; /* Flags used by IPv6 for now */
-};
-/* This should be set for any extension header which is compatible with GSO. */
-#define INET6_PROTO_GSO_EXTHDR 0x1
-
/* This is used to register socket interfaces for IP protocols. */
struct inet_protosw {
struct list_head list;
diff --git a/net/core/dev.c b/net/core/dev.c
index 0b2d777e5b9e..55f86b6d3182 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -154,7 +154,6 @@
#define GRO_MAX_HEAD (MAX_HEADER + 128)
static DEFINE_SPINLOCK(ptype_lock);
-static DEFINE_SPINLOCK(offload_lock);
struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
struct list_head ptype_all __read_mostly; /* Taps */
static struct list_head offload_base __read_mostly;
@@ -467,6 +466,9 @@ void dev_remove_pack(struct packet_type *pt)
EXPORT_SYMBOL(dev_remove_pack);
+const struct net_offload __rcu *dev_offloads[256] __read_mostly;
+EXPORT_SYMBOL(dev_offloads);
+
/**
* dev_add_offload - register offload handlers
* @po: protocol offload declaration
@@ -481,15 +483,9 @@ EXPORT_SYMBOL(dev_remove_pack);
*/
void dev_add_offload(struct packet_offload *po)
{
- struct packet_offload *elem;
-
- spin_lock(&offload_lock);
- list_for_each_entry(elem, &offload_base, list) {
- if (po->priority < elem->priority)
- break;
- }
- list_add_rcu(&po->list, elem->list.prev);
- spin_unlock(&offload_lock);
+ cmpxchg((const struct net_offload **)
+ &dev_offloads[net_offload_from_type(po->type)],
+ NULL, po);
}
EXPORT_SYMBOL(dev_add_offload);
@@ -506,23 +502,11 @@ EXPORT_SYMBOL(dev_add_offload);
* and must not be freed until after all the CPU's have gone
* through a quiescent state.
*/
-static void __dev_remove_offload(struct packet_offload *po)
+static int __dev_remove_offload(struct packet_offload *po)
{
- struct list_head *head = &offload_base;
- struct packet_offload *po1;
-
- spin_lock(&offload_lock);
-
- list_for_each_entry(po1, head, list) {
- if (po == po1) {
- list_del_rcu(&po->list);
- goto out;
- }
- }
-
- pr_warn("dev_remove_offload: %p not found\n", po);
-out:
- spin_unlock(&offload_lock);
+ return (cmpxchg((const struct net_offload **)
+ &dev_offloads[net_offload_from_type(po->type)],
+ po, NULL) == po) ? 0 : -1;
}
/**
@@ -2962,7 +2946,7 @@ struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
netdev_features_t features)
{
struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
- struct packet_offload *ptype;
+ const struct net_offload *off;
int vlan_depth = skb->mac_len;
__be16 type = skb_network_protocol(skb, &vlan_depth);
@@ -2972,12 +2956,9 @@ struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
__skb_pull(skb, vlan_depth);
rcu_read_lock();
- list_for_each_entry_rcu(ptype, &offload_base, list) {
- if (ptype->type == type && ptype->callbacks.gso_segment) {
- segs = ptype->callbacks.gso_segment(skb, features);
- break;
- }
- }
+ off = rcu_dereference(dev_offloads[net_offload_from_type(type)]);
+ if (off && off->type == type && off->callbacks.gso_segment)
+ segs = off->callbacks.gso_segment(skb, features);
rcu_read_unlock();
__skb_push(skb, skb->data - skb_mac_header(skb));
@@ -5254,9 +5235,8 @@ static void flush_all_backlogs(void)
static int napi_gro_complete(struct sk_buff *skb)
{
- struct packet_offload *ptype;
+ const struct packet_offload *ptype;
__be16 type = skb->protocol;
- struct list_head *head = &offload_base;
int err = -ENOENT;
BUILD_BUG_ON(sizeof(struct napi_gro_cb) > sizeof(skb->cb));
@@ -5267,17 +5247,12 @@ static int napi_gro_complete(struct sk_buff *skb)
}
rcu_read_lock();
- list_for_each_entry_rcu(ptype, head, list) {
- if (ptype->type != type || !ptype->callbacks.gro_complete)
- continue;
-
+ ptype = dev_offloads[net_offload_from_type(type)];
+ if (ptype && ptype->callbacks.gro_complete)
err = ptype->callbacks.gro_complete(skb, 0);
- break;
- }
rcu_read_unlock();
if (err) {
- WARN_ON(&ptype->list == head);
kfree_skb(skb);
return NET_RX_SUCCESS;
}
@@ -5417,8 +5392,7 @@ static void gro_flush_oldest(struct list_head *head)
static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
{
u32 hash = skb_get_hash_raw(skb) & (GRO_HASH_BUCKETS - 1);
- struct list_head *head = &offload_base;
- struct packet_offload *ptype;
+ const struct packet_offload *ptype;
__be16 type = skb->protocol;
struct list_head *gro_head;
struct sk_buff *pp = NULL;
@@ -5432,10 +5406,8 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
gro_head = gro_list_prepare(napi, skb);
rcu_read_lock();
- list_for_each_entry_rcu(ptype, head, list) {
- if (ptype->type != type || !ptype->callbacks.gro_receive)
- continue;
-
+ ptype = dev_offloads[net_offload_from_type(type)];
+ if (ptype && ptype->callbacks.gro_receive) {
skb_set_network_header(skb, skb_gro_offset(skb));
skb_reset_mac_len(skb);
NAPI_GRO_CB(skb)->same_flow = 0;
@@ -5464,12 +5436,11 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
}
pp = ptype->callbacks.gro_receive(gro_head, skb);
- break;
- }
- rcu_read_unlock();
-
- if (&ptype->list == head)
+ rcu_read_unlock();
+ } else {
+ rcu_read_unlock();
goto normal;
+ }
if (IS_ERR(pp) && PTR_ERR(pp) == -EINPROGRESS) {
ret = GRO_CONSUMED;
@@ -5524,29 +5495,25 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
struct packet_offload *gro_find_receive_by_type(__be16 type)
{
- struct list_head *offload_head = &offload_base;
- struct packet_offload *ptype;
+ struct net_offload *off;
- list_for_each_entry_rcu(ptype, offload_head, list) {
- if (ptype->type != type || !ptype->callbacks.gro_receive)
- continue;
- return ptype;
- }
- return NULL;
+ off = (struct net_offload *) rcu_dereference(dev_offloads[type & 0xFF]);
+ if (off && off->type == type && off->callbacks.gro_receive)
+ return off;
+ else
+ return NULL;
}
EXPORT_SYMBOL(gro_find_receive_by_type);
struct packet_offload *gro_find_complete_by_type(__be16 type)
{
- struct list_head *offload_head = &offload_base;
- struct packet_offload *ptype;
+ struct net_offload *off;
- list_for_each_entry_rcu(ptype, offload_head, list) {
- if (ptype->type != type || !ptype->callbacks.gro_complete)
- continue;
- return ptype;
- }
- return NULL;
+ off = (struct net_offload *) rcu_dereference(dev_offloads[type & 0xFF]);
+ if (off && off->type == type && off->callbacks.gro_complete)
+ return off;
+ else
+ return NULL;
}
EXPORT_SYMBOL(gro_find_complete_by_type);
--
2.19.0.397.gdd90340f6a-goog
^ permalink raw reply related
* [PATCH net-next RFC 2/8] gro: deduplicate gro_complete
From: Willem de Bruijn @ 2018-09-14 17:59 UTC (permalink / raw)
To: netdev; +Cc: pabeni, steffen.klassert, davem, Willem de Bruijn
In-Reply-To: <20180914175941.213950-1-willemdebruijn.kernel@gmail.com>
From: Willem de Bruijn <willemb@google.com>
The gro completion datapath is open coded for all protocols.
Deduplicate with new helper function net_gro_complete.
Signed-off-by: Willem de Bruijn <willemb@google.com>
---
drivers/net/geneve.c | 9 +--------
include/linux/netdevice.h | 19 ++++++++++++++++++-
net/8021q/vlan.c | 10 +---------
net/core/dev.c | 24 +-----------------------
net/ethernet/eth.c | 11 +----------
net/ipv4/af_inet.c | 15 ++-------------
net/ipv4/fou.c | 25 +++----------------------
net/ipv4/gre_offload.c | 12 +++---------
net/ipv6/ip6_offload.c | 13 +------------
9 files changed, 31 insertions(+), 107 deletions(-)
diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index 6625fabe2c88..a3a4621d9bee 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -488,7 +488,6 @@ static int geneve_gro_complete(struct sock *sk, struct sk_buff *skb,
int nhoff)
{
struct genevehdr *gh;
- struct packet_offload *ptype;
__be16 type;
int gh_len;
int err = -ENOSYS;
@@ -497,13 +496,7 @@ static int geneve_gro_complete(struct sock *sk, struct sk_buff *skb,
gh_len = geneve_hlen(gh);
type = gh->proto_type;
- rcu_read_lock();
- ptype = gro_find_complete_by_type(type);
- if (ptype)
- err = ptype->callbacks.gro_complete(skb, nhoff + gh_len);
-
- rcu_read_unlock();
-
+ err = net_gro_complete(dev_offloads, type, skb, nhoff + gh_len);
skb_set_inner_mac_header(skb, nhoff + gh_len);
return err;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 7425068fa249..0d292ea6716e 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3557,7 +3557,8 @@ void napi_gro_flush(struct napi_struct *napi, bool flush_old);
struct sk_buff *napi_get_frags(struct napi_struct *napi);
gro_result_t napi_gro_frags(struct napi_struct *napi);
struct packet_offload *gro_find_receive_by_type(__be16 type);
-struct packet_offload *gro_find_complete_by_type(__be16 type);
+
+extern const struct net_offload __rcu *dev_offloads[256];
static inline u8 net_offload_from_type(u16 type)
{
@@ -3567,6 +3568,22 @@ static inline u8 net_offload_from_type(u16 type)
return type & 0xFF;
}
+static inline int net_gro_complete(const struct net_offload __rcu **offs,
+ u16 type, struct sk_buff *skb, int nhoff)
+{
+ const struct net_offload *off;
+ int ret = -ENOENT;
+
+ rcu_read_lock();
+ off = rcu_dereference(offs[net_offload_from_type(type)]);
+ if (off && off->callbacks.gro_complete &&
+ (!off->type || off->type == type))
+ ret = off->callbacks.gro_complete(skb, nhoff);
+ rcu_read_unlock();
+
+ return ret;
+}
+
static inline void napi_free_frags(struct napi_struct *napi)
{
kfree_skb(napi->skb);
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 5e9950453955..6ac27aa9f158 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -703,16 +703,8 @@ static int vlan_gro_complete(struct sk_buff *skb, int nhoff)
{
struct vlan_hdr *vhdr = (struct vlan_hdr *)(skb->data + nhoff);
__be16 type = vhdr->h_vlan_encapsulated_proto;
- struct packet_offload *ptype;
- int err = -ENOENT;
- rcu_read_lock();
- ptype = gro_find_complete_by_type(type);
- if (ptype)
- err = ptype->callbacks.gro_complete(skb, nhoff + sizeof(*vhdr));
-
- rcu_read_unlock();
- return err;
+ return net_gro_complete(dev_offloads, type, skb, nhoff + sizeof(*vhdr));
}
static struct packet_offload vlan_packet_offloads[] __read_mostly = {
diff --git a/net/core/dev.c b/net/core/dev.c
index 55f86b6d3182..2c21e507291f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5235,10 +5235,6 @@ static void flush_all_backlogs(void)
static int napi_gro_complete(struct sk_buff *skb)
{
- const struct packet_offload *ptype;
- __be16 type = skb->protocol;
- int err = -ENOENT;
-
BUILD_BUG_ON(sizeof(struct napi_gro_cb) > sizeof(skb->cb));
if (NAPI_GRO_CB(skb)->count == 1) {
@@ -5246,13 +5242,7 @@ static int napi_gro_complete(struct sk_buff *skb)
goto out;
}
- rcu_read_lock();
- ptype = dev_offloads[net_offload_from_type(type)];
- if (ptype && ptype->callbacks.gro_complete)
- err = ptype->callbacks.gro_complete(skb, 0);
- rcu_read_unlock();
-
- if (err) {
+ if (net_gro_complete(dev_offloads, skb->protocol, skb, 0)) {
kfree_skb(skb);
return NET_RX_SUCCESS;
}
@@ -5505,18 +5495,6 @@ struct packet_offload *gro_find_receive_by_type(__be16 type)
}
EXPORT_SYMBOL(gro_find_receive_by_type);
-struct packet_offload *gro_find_complete_by_type(__be16 type)
-{
- struct net_offload *off;
-
- off = (struct net_offload *) rcu_dereference(dev_offloads[type & 0xFF]);
- if (off && off->type == type && off->callbacks.gro_complete)
- return off;
- else
- return NULL;
-}
-EXPORT_SYMBOL(gro_find_complete_by_type);
-
static void napi_skb_free_stolen_head(struct sk_buff *skb)
{
skb_dst_drop(skb);
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index fd8faa0dfa61..fb17a13722e8 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -485,20 +485,11 @@ int eth_gro_complete(struct sk_buff *skb, int nhoff)
{
struct ethhdr *eh = (struct ethhdr *)(skb->data + nhoff);
__be16 type = eh->h_proto;
- struct packet_offload *ptype;
- int err = -ENOSYS;
if (skb->encapsulation)
skb_set_inner_mac_header(skb, nhoff);
- rcu_read_lock();
- ptype = gro_find_complete_by_type(type);
- if (ptype != NULL)
- err = ptype->callbacks.gro_complete(skb, nhoff +
- sizeof(struct ethhdr));
-
- rcu_read_unlock();
- return err;
+ return net_gro_complete(dev_offloads, type, skb, nhoff + sizeof(*eh));
}
EXPORT_SYMBOL(eth_gro_complete);
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 1fbe2f815474..1b72ee4a7811 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1560,9 +1560,7 @@ int inet_gro_complete(struct sk_buff *skb, int nhoff)
{
__be16 newlen = htons(skb->len - nhoff);
struct iphdr *iph = (struct iphdr *)(skb->data + nhoff);
- const struct net_offload *ops;
int proto = iph->protocol;
- int err = -ENOSYS;
if (skb->encapsulation) {
skb_set_inner_protocol(skb, cpu_to_be16(ETH_P_IP));
@@ -1572,21 +1570,12 @@ int inet_gro_complete(struct sk_buff *skb, int nhoff)
csum_replace2(&iph->check, iph->tot_len, newlen);
iph->tot_len = newlen;
- rcu_read_lock();
- ops = rcu_dereference(inet_offloads[proto]);
- if (WARN_ON(!ops || !ops->callbacks.gro_complete))
- goto out_unlock;
-
/* Only need to add sizeof(*iph) to get to the next hdr below
* because any hdr with option will have been flushed in
* inet_gro_receive().
*/
- err = ops->callbacks.gro_complete(skb, nhoff + sizeof(*iph));
-
-out_unlock:
- rcu_read_unlock();
-
- return err;
+ return net_gro_complete(inet_offloads, proto, skb,
+ nhoff + sizeof(*iph));
}
EXPORT_SYMBOL(inet_gro_complete);
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index 500a59906b87..c42a3ef17864 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -261,24 +261,14 @@ static struct sk_buff *fou_gro_receive(struct sock *sk,
static int fou_gro_complete(struct sock *sk, struct sk_buff *skb,
int nhoff)
{
- const struct net_offload *ops;
u8 proto = fou_from_sock(sk)->protocol;
- int err = -ENOSYS;
const struct net_offload **offloads;
+ int err;
- rcu_read_lock();
offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
- ops = rcu_dereference(offloads[proto]);
- if (WARN_ON(!ops || !ops->callbacks.gro_complete))
- goto out_unlock;
-
- err = ops->callbacks.gro_complete(skb, nhoff);
-
+ err = net_gro_complete(offloads, proto, skb, nhoff);
skb_set_inner_mac_header(skb, nhoff);
-out_unlock:
- rcu_read_unlock();
-
return err;
}
@@ -457,7 +447,6 @@ static int gue_gro_complete(struct sock *sk, struct sk_buff *skb, int nhoff)
{
const struct net_offload **offloads;
struct guehdr *guehdr = (struct guehdr *)(skb->data + nhoff);
- const struct net_offload *ops;
unsigned int guehlen = 0;
u8 proto;
int err = -ENOENT;
@@ -483,18 +472,10 @@ static int gue_gro_complete(struct sock *sk, struct sk_buff *skb, int nhoff)
return err;
}
- rcu_read_lock();
offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
- ops = rcu_dereference(offloads[proto]);
- if (WARN_ON(!ops || !ops->callbacks.gro_complete))
- goto out_unlock;
-
- err = ops->callbacks.gro_complete(skb, nhoff + guehlen);
-
+ err = net_gro_complete(offloads, proto, skb, nhoff + guehlen);
skb_set_inner_mac_header(skb, nhoff + guehlen);
-out_unlock:
- rcu_read_unlock();
return err;
}
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index 6c63524f598a..fc8c99e4a058 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -231,10 +231,9 @@ static struct sk_buff *gre_gro_receive(struct list_head *head,
static int gre_gro_complete(struct sk_buff *skb, int nhoff)
{
struct gre_base_hdr *greh = (struct gre_base_hdr *)(skb->data + nhoff);
- struct packet_offload *ptype;
unsigned int grehlen = sizeof(*greh);
- int err = -ENOENT;
__be16 type;
+ int err;
skb->encapsulation = 1;
skb_shinfo(skb)->gso_type = SKB_GSO_GRE;
@@ -246,13 +245,8 @@ static int gre_gro_complete(struct sk_buff *skb, int nhoff)
if (greh->flags & GRE_CSUM)
grehlen += GRE_HEADER_SECTION;
- rcu_read_lock();
- ptype = gro_find_complete_by_type(type);
- if (ptype)
- err = ptype->callbacks.gro_complete(skb, nhoff + grehlen);
-
- rcu_read_unlock();
-
+ err = net_gro_complete(dev_offloads, type, skb,
+ nhoff + sizeof(*greh));
skb_set_inner_mac_header(skb, nhoff + grehlen);
return err;
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index c7e495f12011..e8bf554ae611 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -298,7 +298,6 @@ static int ipv6_gro_complete(struct sk_buff *skb, int nhoff)
{
const struct net_offload *ops;
struct ipv6hdr *iph = (struct ipv6hdr *)(skb->data + nhoff);
- int err = -ENOSYS;
if (skb->encapsulation) {
skb_set_inner_protocol(skb, cpu_to_be16(ETH_P_IPV6));
@@ -307,18 +306,8 @@ static int ipv6_gro_complete(struct sk_buff *skb, int nhoff)
iph->payload_len = htons(skb->len - nhoff - sizeof(*iph));
- rcu_read_lock();
-
nhoff += sizeof(*iph) + ipv6_exthdrs_len(iph, &ops);
- if (WARN_ON(!ops || !ops->callbacks.gro_complete))
- goto out_unlock;
-
- err = ops->callbacks.gro_complete(skb, nhoff);
-
-out_unlock:
- rcu_read_unlock();
-
- return err;
+ return net_gro_complete(inet6_offloads, ops->type, skb, nhoff);
}
static int sit_gro_complete(struct sk_buff *skb, int nhoff)
--
2.19.0.397.gdd90340f6a-goog
^ permalink raw reply related
* [PATCH net-next RFC 3/8] gro: add net_gro_receive
From: Willem de Bruijn @ 2018-09-14 17:59 UTC (permalink / raw)
To: netdev; +Cc: pabeni, steffen.klassert, davem, Willem de Bruijn
In-Reply-To: <20180914175941.213950-1-willemdebruijn.kernel@gmail.com>
From: Willem de Bruijn <willemb@google.com>
For configurable gro_receive all callsites need to be updated. Similar
to gro_complete, introduce a single shared helper, net_gro_receive.
Signed-off-by: Willem de Bruijn <willemb@google.com>
---
drivers/net/geneve.c | 2 +-
include/linux/netdevice.h | 14 +++++++++++++-
net/8021q/vlan.c | 2 +-
net/core/dev.c | 20 ++++----------------
net/ethernet/eth.c | 2 +-
net/ipv4/af_inet.c | 4 ++--
net/ipv4/fou.c | 8 ++++----
net/ipv4/gre_offload.c | 12 ++++++------
net/ipv6/ip6_offload.c | 8 ++++----
9 files changed, 36 insertions(+), 36 deletions(-)
diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index a3a4621d9bee..a812a774e5fd 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -467,7 +467,7 @@ static struct sk_buff *geneve_gro_receive(struct sock *sk,
type = gh->proto_type;
rcu_read_lock();
- ptype = gro_find_receive_by_type(type);
+ ptype = net_gro_receive(dev_offloads, type);
if (!ptype)
goto out_unlock;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 0d292ea6716e..0be594f8d1ce 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3556,7 +3556,6 @@ gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb);
void napi_gro_flush(struct napi_struct *napi, bool flush_old);
struct sk_buff *napi_get_frags(struct napi_struct *napi);
gro_result_t napi_gro_frags(struct napi_struct *napi);
-struct packet_offload *gro_find_receive_by_type(__be16 type);
extern const struct net_offload __rcu *dev_offloads[256];
@@ -3568,6 +3567,19 @@ static inline u8 net_offload_from_type(u16 type)
return type & 0xFF;
}
+static inline const struct net_offload *
+net_gro_receive(const struct net_offload __rcu **offs, u16 type)
+{
+ const struct net_offload *off;
+
+ off = rcu_dereference(offs[net_offload_from_type(type)]);
+ if (off && off->callbacks.gro_receive &&
+ (!off->type || off->type == type))
+ return off;
+ else
+ return NULL;
+}
+
static inline int net_gro_complete(const struct net_offload __rcu **offs,
u16 type, struct sk_buff *skb, int nhoff)
{
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 6ac27aa9f158..a106c5373b1d 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -670,7 +670,7 @@ static struct sk_buff *vlan_gro_receive(struct list_head *head,
type = vhdr->h_vlan_encapsulated_proto;
rcu_read_lock();
- ptype = gro_find_receive_by_type(type);
+ ptype = net_gro_receive(dev_offloads, type);
if (!ptype)
goto out_unlock;
diff --git a/net/core/dev.c b/net/core/dev.c
index 2c21e507291f..ae5fbd4114d2 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5382,7 +5382,7 @@ static void gro_flush_oldest(struct list_head *head)
static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
{
u32 hash = skb_get_hash_raw(skb) & (GRO_HASH_BUCKETS - 1);
- const struct packet_offload *ptype;
+ const struct net_offload *ops;
__be16 type = skb->protocol;
struct list_head *gro_head;
struct sk_buff *pp = NULL;
@@ -5396,8 +5396,8 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
gro_head = gro_list_prepare(napi, skb);
rcu_read_lock();
- ptype = dev_offloads[net_offload_from_type(type)];
- if (ptype && ptype->callbacks.gro_receive) {
+ ops = net_gro_receive(dev_offloads, type);
+ if (ops) {
skb_set_network_header(skb, skb_gro_offset(skb));
skb_reset_mac_len(skb);
NAPI_GRO_CB(skb)->same_flow = 0;
@@ -5425,7 +5425,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
NAPI_GRO_CB(skb)->csum_valid = 0;
}
- pp = ptype->callbacks.gro_receive(gro_head, skb);
+ pp = ops->callbacks.gro_receive(gro_head, skb);
rcu_read_unlock();
} else {
rcu_read_unlock();
@@ -5483,18 +5483,6 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
goto pull;
}
-struct packet_offload *gro_find_receive_by_type(__be16 type)
-{
- struct net_offload *off;
-
- off = (struct net_offload *) rcu_dereference(dev_offloads[type & 0xFF]);
- if (off && off->type == type && off->callbacks.gro_receive)
- return off;
- else
- return NULL;
-}
-EXPORT_SYMBOL(gro_find_receive_by_type);
-
static void napi_skb_free_stolen_head(struct sk_buff *skb)
{
skb_dst_drop(skb);
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index fb17a13722e8..542dbc2ec956 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -462,7 +462,7 @@ struct sk_buff *eth_gro_receive(struct list_head *head, struct sk_buff *skb)
type = eh->h_proto;
rcu_read_lock();
- ptype = gro_find_receive_by_type(type);
+ ptype = net_gro_receive(dev_offloads, type);
if (ptype == NULL) {
flush = 1;
goto out_unlock;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 1b72ee4a7811..28b7c7671789 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1409,8 +1409,8 @@ struct sk_buff *inet_gro_receive(struct list_head *head, struct sk_buff *skb)
proto = iph->protocol;
rcu_read_lock();
- ops = rcu_dereference(inet_offloads[proto]);
- if (!ops || !ops->callbacks.gro_receive)
+ ops = net_gro_receive(inet_offloads, proto);
+ if (!ops)
goto out_unlock;
if (*(u8 *)iph != 0x45)
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index c42a3ef17864..13401cb2e7a4 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -246,8 +246,8 @@ static struct sk_buff *fou_gro_receive(struct sock *sk,
rcu_read_lock();
offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
- ops = rcu_dereference(offloads[proto]);
- if (!ops || !ops->callbacks.gro_receive)
+ ops = net_gro_receive(offloads, proto);
+ if (!ops)
goto out_unlock;
pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
@@ -428,8 +428,8 @@ static struct sk_buff *gue_gro_receive(struct sock *sk,
rcu_read_lock();
offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
- ops = rcu_dereference(offloads[proto]);
- if (WARN_ON_ONCE(!ops || !ops->callbacks.gro_receive))
+ ops = net_gro_receive(offloads, proto);
+ if (WARN_ON_ONCE(!ops))
goto out_unlock;
pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index fc8c99e4a058..4f9237a4bea1 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -111,13 +111,13 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
static struct sk_buff *gre_gro_receive(struct list_head *head,
struct sk_buff *skb)
{
- struct sk_buff *pp = NULL;
- struct sk_buff *p;
const struct gre_base_hdr *greh;
+ const struct net_offload *ops;
unsigned int hlen, grehlen;
+ struct sk_buff *pp = NULL;
+ struct sk_buff *p;
unsigned int off;
int flush = 1;
- struct packet_offload *ptype;
__be16 type;
if (NAPI_GRO_CB(skb)->encap_mark)
@@ -154,8 +154,8 @@ static struct sk_buff *gre_gro_receive(struct list_head *head,
type = greh->protocol;
rcu_read_lock();
- ptype = gro_find_receive_by_type(type);
- if (!ptype)
+ ops = net_gro_receive(dev_offloads, type);
+ if (!ops)
goto out_unlock;
grehlen = GRE_HEADER_SECTION;
@@ -217,7 +217,7 @@ static struct sk_buff *gre_gro_receive(struct list_head *head,
/* Adjusted NAPI_GRO_CB(skb)->csum after skb_gro_pull()*/
skb_gro_postpull_rcsum(skb, greh, grehlen);
- pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb);
+ pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
flush = 0;
out_unlock:
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index e8bf554ae611..9d301bef0e23 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -194,8 +194,8 @@ static struct sk_buff *ipv6_gro_receive(struct list_head *head,
rcu_read_lock();
proto = iph->nexthdr;
- ops = rcu_dereference(inet6_offloads[proto]);
- if (!ops || !ops->callbacks.gro_receive) {
+ ops = net_gro_receive(inet6_offloads, proto);
+ if (!ops) {
__pskb_pull(skb, skb_gro_offset(skb));
skb_gro_frag0_invalidate(skb);
proto = ipv6_gso_pull_exthdrs(skb, proto);
@@ -203,8 +203,8 @@ static struct sk_buff *ipv6_gro_receive(struct list_head *head,
skb_reset_transport_header(skb);
__skb_push(skb, skb_gro_offset(skb));
- ops = rcu_dereference(inet6_offloads[proto]);
- if (!ops || !ops->callbacks.gro_receive)
+ ops = net_gro_receive(inet6_offloads, proto);
+ if (!ops)
goto out_unlock;
iph = ipv6_hdr(skb);
--
2.19.0.397.gdd90340f6a-goog
^ permalink raw reply related
* [PATCH net-next RFC 4/8] ipv6: remove offload exception for hopopts
From: Willem de Bruijn @ 2018-09-14 17:59 UTC (permalink / raw)
To: netdev; +Cc: pabeni, steffen.klassert, davem, Willem de Bruijn
In-Reply-To: <20180914175941.213950-1-willemdebruijn.kernel@gmail.com>
From: Willem de Bruijn <willemb@google.com>
Extension headers in ipv6 are pulled without calling a callback
function. An inet6_offload signals this feature with flag
INET6_PROTO_GSO_EXTHDR.
Add net_has_flag helper to hide implementation details and in
prepartion for configurable gro.
Convert NEXTHDR_HOP from a special case branch to a standard
extension header offload.
Signed-off-by: Willem de Bruijn <willemb@google.com>
---
include/linux/netdevice.h | 9 +++++++++
net/ipv6/exthdrs_offload.c | 17 ++++++++++++++---
net/ipv6/ip6_offload.c | 36 +++++++++++++-----------------------
3 files changed, 36 insertions(+), 26 deletions(-)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 0be594f8d1ce..1c97a048506f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3567,6 +3567,15 @@ static inline u8 net_offload_from_type(u16 type)
return type & 0xFF;
}
+static inline bool net_offload_has_flag(const struct net_offload __rcu **offs,
+ u16 type, u16 flag)
+{
+ const struct net_offload *off;
+
+ off = offs ? rcu_dereference(offs[net_offload_from_type(type)]) : NULL;
+ return off && off->flags & flag;
+}
+
static inline const struct net_offload *
net_gro_receive(const struct net_offload __rcu **offs, u16 type)
{
diff --git a/net/ipv6/exthdrs_offload.c b/net/ipv6/exthdrs_offload.c
index f5e2ba1c18bf..2230331c6012 100644
--- a/net/ipv6/exthdrs_offload.c
+++ b/net/ipv6/exthdrs_offload.c
@@ -12,11 +12,15 @@
#include <net/protocol.h>
#include "ip6_offload.h"
-static const struct net_offload rthdr_offload = {
+static struct net_offload hophdr_offload = {
.flags = INET6_PROTO_GSO_EXTHDR,
};
-static const struct net_offload dstopt_offload = {
+static struct net_offload rthdr_offload = {
+ .flags = INET6_PROTO_GSO_EXTHDR,
+};
+
+static struct net_offload dstopt_offload = {
.flags = INET6_PROTO_GSO_EXTHDR,
};
@@ -24,10 +28,14 @@ int __init ipv6_exthdrs_offload_init(void)
{
int ret;
- ret = inet6_add_offload(&rthdr_offload, IPPROTO_ROUTING);
+ ret = inet6_add_offload(&hophdr_offload, IPPROTO_HOPOPTS);
if (ret)
goto out;
+ ret = inet6_add_offload(&rthdr_offload, IPPROTO_ROUTING);
+ if (ret)
+ goto out_hop;
+
ret = inet6_add_offload(&dstopt_offload, IPPROTO_DSTOPTS);
if (ret)
goto out_rt;
@@ -37,5 +45,8 @@ int __init ipv6_exthdrs_offload_init(void)
out_rt:
inet6_del_offload(&rthdr_offload, IPPROTO_ROUTING);
+
+out_hop:
+ inet6_del_offload(&rthdr_offload, IPPROTO_HOPOPTS);
goto out;
}
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index 9d301bef0e23..4854509a2c5d 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -22,21 +22,13 @@
static int ipv6_gso_pull_exthdrs(struct sk_buff *skb, int proto)
{
- const struct net_offload *ops = NULL;
-
for (;;) {
struct ipv6_opt_hdr *opth;
int len;
- if (proto != NEXTHDR_HOP) {
- ops = rcu_dereference(inet6_offloads[proto]);
-
- if (unlikely(!ops))
- break;
-
- if (!(ops->flags & INET6_PROTO_GSO_EXTHDR))
- break;
- }
+ if (!net_offload_has_flag(inet6_offloads, proto,
+ INET6_PROTO_GSO_EXTHDR))
+ break;
if (unlikely(!pskb_may_pull(skb, 8)))
break;
@@ -141,26 +133,24 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
/* Return the total length of all the extension hdrs, following the same
* logic in ipv6_gso_pull_exthdrs() when parsing ext-hdrs.
*/
-static int ipv6_exthdrs_len(struct ipv6hdr *iph,
- const struct net_offload **opps)
+static int ipv6_exthdrs_len(struct ipv6hdr *iph, u8 *pproto)
{
struct ipv6_opt_hdr *opth = (void *)iph;
int len = 0, proto, optlen = sizeof(*iph);
proto = iph->nexthdr;
for (;;) {
- if (proto != NEXTHDR_HOP) {
- *opps = rcu_dereference(inet6_offloads[proto]);
- if (unlikely(!(*opps)))
- break;
- if (!((*opps)->flags & INET6_PROTO_GSO_EXTHDR))
- break;
- }
+ if (!net_offload_has_flag(inet6_offloads, proto,
+ INET6_PROTO_GSO_EXTHDR))
+ break;
+
opth = (void *)opth + optlen;
optlen = ipv6_optlen(opth);
len += optlen;
proto = opth->nexthdr;
}
+
+ *pproto = proto;
return len;
}
@@ -296,8 +286,8 @@ static struct sk_buff *ip4ip6_gro_receive(struct list_head *head,
static int ipv6_gro_complete(struct sk_buff *skb, int nhoff)
{
- const struct net_offload *ops;
struct ipv6hdr *iph = (struct ipv6hdr *)(skb->data + nhoff);
+ u8 proto;
if (skb->encapsulation) {
skb_set_inner_protocol(skb, cpu_to_be16(ETH_P_IPV6));
@@ -306,8 +296,8 @@ static int ipv6_gro_complete(struct sk_buff *skb, int nhoff)
iph->payload_len = htons(skb->len - nhoff - sizeof(*iph));
- nhoff += sizeof(*iph) + ipv6_exthdrs_len(iph, &ops);
- return net_gro_complete(inet6_offloads, ops->type, skb, nhoff);
+ nhoff += sizeof(*iph) + ipv6_exthdrs_len(iph, &proto);
+ return net_gro_complete(inet6_offloads, proto, skb, nhoff);
}
static int sit_gro_complete(struct sk_buff *skb, int nhoff)
--
2.19.0.397.gdd90340f6a-goog
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox