Netdev List
 help / color / mirror / Atom feed
* [PATCH net-next RFC 4/8] ipv6: remove offload exception for hopopts
From: Willem de Bruijn @ 2018-09-14 17:59 UTC (permalink / raw)
  To: netdev; +Cc: pabeni, steffen.klassert, davem, Willem de Bruijn
In-Reply-To: <20180914175941.213950-1-willemdebruijn.kernel@gmail.com>

From: Willem de Bruijn <willemb@google.com>

Extension headers in ipv6 are pulled without calling a callback
function. An inet6_offload signals this feature with flag
INET6_PROTO_GSO_EXTHDR.

Add net_has_flag helper to hide implementation details and in
prepartion for configurable gro.

Convert NEXTHDR_HOP from a special case branch to a standard
extension header offload.

Signed-off-by: Willem de Bruijn <willemb@google.com>
---
 include/linux/netdevice.h  |  9 +++++++++
 net/ipv6/exthdrs_offload.c | 17 ++++++++++++++---
 net/ipv6/ip6_offload.c     | 36 +++++++++++++-----------------------
 3 files changed, 36 insertions(+), 26 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 0be594f8d1ce..1c97a048506f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3567,6 +3567,15 @@ static inline u8 net_offload_from_type(u16 type)
 	return type & 0xFF;
 }
 
+static inline bool net_offload_has_flag(const struct net_offload __rcu **offs,
+					u16 type, u16 flag)
+{
+	const struct net_offload *off;
+
+	off = offs ? rcu_dereference(offs[net_offload_from_type(type)]) : NULL;
+	return off && off->flags & flag;
+}
+
 static inline const struct net_offload *
 net_gro_receive(const struct net_offload __rcu **offs, u16 type)
 {
diff --git a/net/ipv6/exthdrs_offload.c b/net/ipv6/exthdrs_offload.c
index f5e2ba1c18bf..2230331c6012 100644
--- a/net/ipv6/exthdrs_offload.c
+++ b/net/ipv6/exthdrs_offload.c
@@ -12,11 +12,15 @@
 #include <net/protocol.h>
 #include "ip6_offload.h"
 
-static const struct net_offload rthdr_offload = {
+static struct net_offload hophdr_offload = {
 	.flags		=	INET6_PROTO_GSO_EXTHDR,
 };
 
-static const struct net_offload dstopt_offload = {
+static struct net_offload rthdr_offload = {
+	.flags		=	INET6_PROTO_GSO_EXTHDR,
+};
+
+static struct net_offload dstopt_offload = {
 	.flags		=	INET6_PROTO_GSO_EXTHDR,
 };
 
@@ -24,10 +28,14 @@ int __init ipv6_exthdrs_offload_init(void)
 {
 	int ret;
 
-	ret = inet6_add_offload(&rthdr_offload, IPPROTO_ROUTING);
+	ret = inet6_add_offload(&hophdr_offload, IPPROTO_HOPOPTS);
 	if (ret)
 		goto out;
 
+	ret = inet6_add_offload(&rthdr_offload, IPPROTO_ROUTING);
+	if (ret)
+		goto out_hop;
+
 	ret = inet6_add_offload(&dstopt_offload, IPPROTO_DSTOPTS);
 	if (ret)
 		goto out_rt;
@@ -37,5 +45,8 @@ int __init ipv6_exthdrs_offload_init(void)
 
 out_rt:
 	inet6_del_offload(&rthdr_offload, IPPROTO_ROUTING);
+
+out_hop:
+	inet6_del_offload(&rthdr_offload, IPPROTO_HOPOPTS);
 	goto out;
 }
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index 9d301bef0e23..4854509a2c5d 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -22,21 +22,13 @@
 
 static int ipv6_gso_pull_exthdrs(struct sk_buff *skb, int proto)
 {
-	const struct net_offload *ops = NULL;
-
 	for (;;) {
 		struct ipv6_opt_hdr *opth;
 		int len;
 
-		if (proto != NEXTHDR_HOP) {
-			ops = rcu_dereference(inet6_offloads[proto]);
-
-			if (unlikely(!ops))
-				break;
-
-			if (!(ops->flags & INET6_PROTO_GSO_EXTHDR))
-				break;
-		}
+		if (!net_offload_has_flag(inet6_offloads, proto,
+					  INET6_PROTO_GSO_EXTHDR))
+			break;
 
 		if (unlikely(!pskb_may_pull(skb, 8)))
 			break;
@@ -141,26 +133,24 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
 /* Return the total length of all the extension hdrs, following the same
  * logic in ipv6_gso_pull_exthdrs() when parsing ext-hdrs.
  */
-static int ipv6_exthdrs_len(struct ipv6hdr *iph,
-			    const struct net_offload **opps)
+static int ipv6_exthdrs_len(struct ipv6hdr *iph, u8 *pproto)
 {
 	struct ipv6_opt_hdr *opth = (void *)iph;
 	int len = 0, proto, optlen = sizeof(*iph);
 
 	proto = iph->nexthdr;
 	for (;;) {
-		if (proto != NEXTHDR_HOP) {
-			*opps = rcu_dereference(inet6_offloads[proto]);
-			if (unlikely(!(*opps)))
-				break;
-			if (!((*opps)->flags & INET6_PROTO_GSO_EXTHDR))
-				break;
-		}
+		if (!net_offload_has_flag(inet6_offloads, proto,
+					  INET6_PROTO_GSO_EXTHDR))
+			break;
+
 		opth = (void *)opth + optlen;
 		optlen = ipv6_optlen(opth);
 		len += optlen;
 		proto = opth->nexthdr;
 	}
+
+	*pproto = proto;
 	return len;
 }
 
@@ -296,8 +286,8 @@ static struct sk_buff *ip4ip6_gro_receive(struct list_head *head,
 
 static int ipv6_gro_complete(struct sk_buff *skb, int nhoff)
 {
-	const struct net_offload *ops;
 	struct ipv6hdr *iph = (struct ipv6hdr *)(skb->data + nhoff);
+	u8 proto;
 
 	if (skb->encapsulation) {
 		skb_set_inner_protocol(skb, cpu_to_be16(ETH_P_IPV6));
@@ -306,8 +296,8 @@ static int ipv6_gro_complete(struct sk_buff *skb, int nhoff)
 
 	iph->payload_len = htons(skb->len - nhoff - sizeof(*iph));
 
-	nhoff += sizeof(*iph) + ipv6_exthdrs_len(iph, &ops);
-	return net_gro_complete(inet6_offloads, ops->type, skb, nhoff);
+	nhoff += sizeof(*iph) + ipv6_exthdrs_len(iph, &proto);
+	return net_gro_complete(inet6_offloads, proto, skb, nhoff);
 }
 
 static int sit_gro_complete(struct sk_buff *skb, int nhoff)
-- 
2.19.0.397.gdd90340f6a-goog

^ permalink raw reply related

* [PATCH net-next RFC 3/8] gro: add net_gro_receive
From: Willem de Bruijn @ 2018-09-14 17:59 UTC (permalink / raw)
  To: netdev; +Cc: pabeni, steffen.klassert, davem, Willem de Bruijn
In-Reply-To: <20180914175941.213950-1-willemdebruijn.kernel@gmail.com>

From: Willem de Bruijn <willemb@google.com>

For configurable gro_receive all callsites need to be updated. Similar
to gro_complete, introduce a single shared helper, net_gro_receive.

Signed-off-by: Willem de Bruijn <willemb@google.com>
---
 drivers/net/geneve.c      |  2 +-
 include/linux/netdevice.h | 14 +++++++++++++-
 net/8021q/vlan.c          |  2 +-
 net/core/dev.c            | 20 ++++----------------
 net/ethernet/eth.c        |  2 +-
 net/ipv4/af_inet.c        |  4 ++--
 net/ipv4/fou.c            |  8 ++++----
 net/ipv4/gre_offload.c    | 12 ++++++------
 net/ipv6/ip6_offload.c    |  8 ++++----
 9 files changed, 36 insertions(+), 36 deletions(-)

diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index a3a4621d9bee..a812a774e5fd 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -467,7 +467,7 @@ static struct sk_buff *geneve_gro_receive(struct sock *sk,
 	type = gh->proto_type;
 
 	rcu_read_lock();
-	ptype = gro_find_receive_by_type(type);
+	ptype = net_gro_receive(dev_offloads, type);
 	if (!ptype)
 		goto out_unlock;
 
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 0d292ea6716e..0be594f8d1ce 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3556,7 +3556,6 @@ gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb);
 void napi_gro_flush(struct napi_struct *napi, bool flush_old);
 struct sk_buff *napi_get_frags(struct napi_struct *napi);
 gro_result_t napi_gro_frags(struct napi_struct *napi);
-struct packet_offload *gro_find_receive_by_type(__be16 type);
 
 extern const struct net_offload __rcu *dev_offloads[256];
 
@@ -3568,6 +3567,19 @@ static inline u8 net_offload_from_type(u16 type)
 	return type & 0xFF;
 }
 
+static inline const struct net_offload *
+net_gro_receive(const struct net_offload __rcu **offs, u16 type)
+{
+	const struct net_offload *off;
+
+	off = rcu_dereference(offs[net_offload_from_type(type)]);
+	if (off && off->callbacks.gro_receive &&
+	    (!off->type || off->type == type))
+		return off;
+	else
+		return NULL;
+}
+
 static inline int net_gro_complete(const struct net_offload __rcu **offs,
 				   u16 type, struct sk_buff *skb, int nhoff)
 {
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 6ac27aa9f158..a106c5373b1d 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -670,7 +670,7 @@ static struct sk_buff *vlan_gro_receive(struct list_head *head,
 	type = vhdr->h_vlan_encapsulated_proto;
 
 	rcu_read_lock();
-	ptype = gro_find_receive_by_type(type);
+	ptype = net_gro_receive(dev_offloads, type);
 	if (!ptype)
 		goto out_unlock;
 
diff --git a/net/core/dev.c b/net/core/dev.c
index 2c21e507291f..ae5fbd4114d2 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5382,7 +5382,7 @@ static void gro_flush_oldest(struct list_head *head)
 static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 {
 	u32 hash = skb_get_hash_raw(skb) & (GRO_HASH_BUCKETS - 1);
-	const struct packet_offload *ptype;
+	const struct net_offload *ops;
 	__be16 type = skb->protocol;
 	struct list_head *gro_head;
 	struct sk_buff *pp = NULL;
@@ -5396,8 +5396,8 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 	gro_head = gro_list_prepare(napi, skb);
 
 	rcu_read_lock();
-	ptype = dev_offloads[net_offload_from_type(type)];
-	if (ptype && ptype->callbacks.gro_receive) {
+	ops = net_gro_receive(dev_offloads, type);
+	if (ops) {
 		skb_set_network_header(skb, skb_gro_offset(skb));
 		skb_reset_mac_len(skb);
 		NAPI_GRO_CB(skb)->same_flow = 0;
@@ -5425,7 +5425,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 			NAPI_GRO_CB(skb)->csum_valid = 0;
 		}
 
-		pp = ptype->callbacks.gro_receive(gro_head, skb);
+		pp = ops->callbacks.gro_receive(gro_head, skb);
 		rcu_read_unlock();
 	} else {
 		rcu_read_unlock();
@@ -5483,18 +5483,6 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 	goto pull;
 }
 
-struct packet_offload *gro_find_receive_by_type(__be16 type)
-{
-	struct net_offload *off;
-
-	off = (struct net_offload *) rcu_dereference(dev_offloads[type & 0xFF]);
-	if (off && off->type == type && off->callbacks.gro_receive)
-		return off;
-	else
-		return NULL;
-}
-EXPORT_SYMBOL(gro_find_receive_by_type);
-
 static void napi_skb_free_stolen_head(struct sk_buff *skb)
 {
 	skb_dst_drop(skb);
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index fb17a13722e8..542dbc2ec956 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -462,7 +462,7 @@ struct sk_buff *eth_gro_receive(struct list_head *head, struct sk_buff *skb)
 	type = eh->h_proto;
 
 	rcu_read_lock();
-	ptype = gro_find_receive_by_type(type);
+	ptype = net_gro_receive(dev_offloads, type);
 	if (ptype == NULL) {
 		flush = 1;
 		goto out_unlock;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 1b72ee4a7811..28b7c7671789 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1409,8 +1409,8 @@ struct sk_buff *inet_gro_receive(struct list_head *head, struct sk_buff *skb)
 	proto = iph->protocol;
 
 	rcu_read_lock();
-	ops = rcu_dereference(inet_offloads[proto]);
-	if (!ops || !ops->callbacks.gro_receive)
+	ops = net_gro_receive(inet_offloads, proto);
+	if (!ops)
 		goto out_unlock;
 
 	if (*(u8 *)iph != 0x45)
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index c42a3ef17864..13401cb2e7a4 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -246,8 +246,8 @@ static struct sk_buff *fou_gro_receive(struct sock *sk,
 
 	rcu_read_lock();
 	offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
-	ops = rcu_dereference(offloads[proto]);
-	if (!ops || !ops->callbacks.gro_receive)
+	ops = net_gro_receive(offloads, proto);
+	if (!ops)
 		goto out_unlock;
 
 	pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
@@ -428,8 +428,8 @@ static struct sk_buff *gue_gro_receive(struct sock *sk,
 
 	rcu_read_lock();
 	offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
-	ops = rcu_dereference(offloads[proto]);
-	if (WARN_ON_ONCE(!ops || !ops->callbacks.gro_receive))
+	ops = net_gro_receive(offloads, proto);
+	if (WARN_ON_ONCE(!ops))
 		goto out_unlock;
 
 	pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index fc8c99e4a058..4f9237a4bea1 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -111,13 +111,13 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
 static struct sk_buff *gre_gro_receive(struct list_head *head,
 				       struct sk_buff *skb)
 {
-	struct sk_buff *pp = NULL;
-	struct sk_buff *p;
 	const struct gre_base_hdr *greh;
+	const struct net_offload *ops;
 	unsigned int hlen, grehlen;
+	struct sk_buff *pp = NULL;
+	struct sk_buff *p;
 	unsigned int off;
 	int flush = 1;
-	struct packet_offload *ptype;
 	__be16 type;
 
 	if (NAPI_GRO_CB(skb)->encap_mark)
@@ -154,8 +154,8 @@ static struct sk_buff *gre_gro_receive(struct list_head *head,
 	type = greh->protocol;
 
 	rcu_read_lock();
-	ptype = gro_find_receive_by_type(type);
-	if (!ptype)
+	ops = net_gro_receive(dev_offloads, type);
+	if (!ops)
 		goto out_unlock;
 
 	grehlen = GRE_HEADER_SECTION;
@@ -217,7 +217,7 @@ static struct sk_buff *gre_gro_receive(struct list_head *head,
 	/* Adjusted NAPI_GRO_CB(skb)->csum after skb_gro_pull()*/
 	skb_gro_postpull_rcsum(skb, greh, grehlen);
 
-	pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb);
+	pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
 	flush = 0;
 
 out_unlock:
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index e8bf554ae611..9d301bef0e23 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -194,8 +194,8 @@ static struct sk_buff *ipv6_gro_receive(struct list_head *head,
 
 	rcu_read_lock();
 	proto = iph->nexthdr;
-	ops = rcu_dereference(inet6_offloads[proto]);
-	if (!ops || !ops->callbacks.gro_receive) {
+	ops = net_gro_receive(inet6_offloads, proto);
+	if (!ops) {
 		__pskb_pull(skb, skb_gro_offset(skb));
 		skb_gro_frag0_invalidate(skb);
 		proto = ipv6_gso_pull_exthdrs(skb, proto);
@@ -203,8 +203,8 @@ static struct sk_buff *ipv6_gro_receive(struct list_head *head,
 		skb_reset_transport_header(skb);
 		__skb_push(skb, skb_gro_offset(skb));
 
-		ops = rcu_dereference(inet6_offloads[proto]);
-		if (!ops || !ops->callbacks.gro_receive)
+		ops = net_gro_receive(inet6_offloads, proto);
+		if (!ops)
 			goto out_unlock;
 
 		iph = ipv6_hdr(skb);
-- 
2.19.0.397.gdd90340f6a-goog

^ permalink raw reply related

* [PATCH net-next RFC 2/8] gro: deduplicate gro_complete
From: Willem de Bruijn @ 2018-09-14 17:59 UTC (permalink / raw)
  To: netdev; +Cc: pabeni, steffen.klassert, davem, Willem de Bruijn
In-Reply-To: <20180914175941.213950-1-willemdebruijn.kernel@gmail.com>

From: Willem de Bruijn <willemb@google.com>

The gro completion datapath is open coded for all protocols.
Deduplicate with new helper function net_gro_complete.

Signed-off-by: Willem de Bruijn <willemb@google.com>
---
 drivers/net/geneve.c      |  9 +--------
 include/linux/netdevice.h | 19 ++++++++++++++++++-
 net/8021q/vlan.c          | 10 +---------
 net/core/dev.c            | 24 +-----------------------
 net/ethernet/eth.c        | 11 +----------
 net/ipv4/af_inet.c        | 15 ++-------------
 net/ipv4/fou.c            | 25 +++----------------------
 net/ipv4/gre_offload.c    | 12 +++---------
 net/ipv6/ip6_offload.c    | 13 +------------
 9 files changed, 31 insertions(+), 107 deletions(-)

diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index 6625fabe2c88..a3a4621d9bee 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -488,7 +488,6 @@ static int geneve_gro_complete(struct sock *sk, struct sk_buff *skb,
 			       int nhoff)
 {
 	struct genevehdr *gh;
-	struct packet_offload *ptype;
 	__be16 type;
 	int gh_len;
 	int err = -ENOSYS;
@@ -497,13 +496,7 @@ static int geneve_gro_complete(struct sock *sk, struct sk_buff *skb,
 	gh_len = geneve_hlen(gh);
 	type = gh->proto_type;
 
-	rcu_read_lock();
-	ptype = gro_find_complete_by_type(type);
-	if (ptype)
-		err = ptype->callbacks.gro_complete(skb, nhoff + gh_len);
-
-	rcu_read_unlock();
-
+	err = net_gro_complete(dev_offloads, type, skb, nhoff + gh_len);
 	skb_set_inner_mac_header(skb, nhoff + gh_len);
 
 	return err;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 7425068fa249..0d292ea6716e 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3557,7 +3557,8 @@ void napi_gro_flush(struct napi_struct *napi, bool flush_old);
 struct sk_buff *napi_get_frags(struct napi_struct *napi);
 gro_result_t napi_gro_frags(struct napi_struct *napi);
 struct packet_offload *gro_find_receive_by_type(__be16 type);
-struct packet_offload *gro_find_complete_by_type(__be16 type);
+
+extern const struct net_offload __rcu *dev_offloads[256];
 
 static inline u8 net_offload_from_type(u16 type)
 {
@@ -3567,6 +3568,22 @@ static inline u8 net_offload_from_type(u16 type)
 	return type & 0xFF;
 }
 
+static inline int net_gro_complete(const struct net_offload __rcu **offs,
+				   u16 type, struct sk_buff *skb, int nhoff)
+{
+	const struct net_offload *off;
+	int ret = -ENOENT;
+
+	rcu_read_lock();
+	off = rcu_dereference(offs[net_offload_from_type(type)]);
+	if (off && off->callbacks.gro_complete &&
+	    (!off->type || off->type == type))
+		ret = off->callbacks.gro_complete(skb, nhoff);
+	rcu_read_unlock();
+
+	return ret;
+}
+
 static inline void napi_free_frags(struct napi_struct *napi)
 {
 	kfree_skb(napi->skb);
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 5e9950453955..6ac27aa9f158 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -703,16 +703,8 @@ static int vlan_gro_complete(struct sk_buff *skb, int nhoff)
 {
 	struct vlan_hdr *vhdr = (struct vlan_hdr *)(skb->data + nhoff);
 	__be16 type = vhdr->h_vlan_encapsulated_proto;
-	struct packet_offload *ptype;
-	int err = -ENOENT;
 
-	rcu_read_lock();
-	ptype = gro_find_complete_by_type(type);
-	if (ptype)
-		err = ptype->callbacks.gro_complete(skb, nhoff + sizeof(*vhdr));
-
-	rcu_read_unlock();
-	return err;
+	return net_gro_complete(dev_offloads, type, skb, nhoff + sizeof(*vhdr));
 }
 
 static struct packet_offload vlan_packet_offloads[] __read_mostly = {
diff --git a/net/core/dev.c b/net/core/dev.c
index 55f86b6d3182..2c21e507291f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5235,10 +5235,6 @@ static void flush_all_backlogs(void)
 
 static int napi_gro_complete(struct sk_buff *skb)
 {
-	const struct packet_offload *ptype;
-	__be16 type = skb->protocol;
-	int err = -ENOENT;
-
 	BUILD_BUG_ON(sizeof(struct napi_gro_cb) > sizeof(skb->cb));
 
 	if (NAPI_GRO_CB(skb)->count == 1) {
@@ -5246,13 +5242,7 @@ static int napi_gro_complete(struct sk_buff *skb)
 		goto out;
 	}
 
-	rcu_read_lock();
-	ptype = dev_offloads[net_offload_from_type(type)];
-	if (ptype && ptype->callbacks.gro_complete)
-		err = ptype->callbacks.gro_complete(skb, 0);
-	rcu_read_unlock();
-
-	if (err) {
+	if (net_gro_complete(dev_offloads, skb->protocol, skb, 0)) {
 		kfree_skb(skb);
 		return NET_RX_SUCCESS;
 	}
@@ -5505,18 +5495,6 @@ struct packet_offload *gro_find_receive_by_type(__be16 type)
 }
 EXPORT_SYMBOL(gro_find_receive_by_type);
 
-struct packet_offload *gro_find_complete_by_type(__be16 type)
-{
-	struct net_offload *off;
-
-	off = (struct net_offload *) rcu_dereference(dev_offloads[type & 0xFF]);
-	if (off && off->type == type && off->callbacks.gro_complete)
-		return off;
-	else
-		return NULL;
-}
-EXPORT_SYMBOL(gro_find_complete_by_type);
-
 static void napi_skb_free_stolen_head(struct sk_buff *skb)
 {
 	skb_dst_drop(skb);
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index fd8faa0dfa61..fb17a13722e8 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -485,20 +485,11 @@ int eth_gro_complete(struct sk_buff *skb, int nhoff)
 {
 	struct ethhdr *eh = (struct ethhdr *)(skb->data + nhoff);
 	__be16 type = eh->h_proto;
-	struct packet_offload *ptype;
-	int err = -ENOSYS;
 
 	if (skb->encapsulation)
 		skb_set_inner_mac_header(skb, nhoff);
 
-	rcu_read_lock();
-	ptype = gro_find_complete_by_type(type);
-	if (ptype != NULL)
-		err = ptype->callbacks.gro_complete(skb, nhoff +
-						    sizeof(struct ethhdr));
-
-	rcu_read_unlock();
-	return err;
+	return net_gro_complete(dev_offloads, type, skb, nhoff + sizeof(*eh));
 }
 EXPORT_SYMBOL(eth_gro_complete);
 
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 1fbe2f815474..1b72ee4a7811 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1560,9 +1560,7 @@ int inet_gro_complete(struct sk_buff *skb, int nhoff)
 {
 	__be16 newlen = htons(skb->len - nhoff);
 	struct iphdr *iph = (struct iphdr *)(skb->data + nhoff);
-	const struct net_offload *ops;
 	int proto = iph->protocol;
-	int err = -ENOSYS;
 
 	if (skb->encapsulation) {
 		skb_set_inner_protocol(skb, cpu_to_be16(ETH_P_IP));
@@ -1572,21 +1570,12 @@ int inet_gro_complete(struct sk_buff *skb, int nhoff)
 	csum_replace2(&iph->check, iph->tot_len, newlen);
 	iph->tot_len = newlen;
 
-	rcu_read_lock();
-	ops = rcu_dereference(inet_offloads[proto]);
-	if (WARN_ON(!ops || !ops->callbacks.gro_complete))
-		goto out_unlock;
-
 	/* Only need to add sizeof(*iph) to get to the next hdr below
 	 * because any hdr with option will have been flushed in
 	 * inet_gro_receive().
 	 */
-	err = ops->callbacks.gro_complete(skb, nhoff + sizeof(*iph));
-
-out_unlock:
-	rcu_read_unlock();
-
-	return err;
+	return net_gro_complete(inet_offloads, proto, skb,
+				nhoff + sizeof(*iph));
 }
 EXPORT_SYMBOL(inet_gro_complete);
 
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index 500a59906b87..c42a3ef17864 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -261,24 +261,14 @@ static struct sk_buff *fou_gro_receive(struct sock *sk,
 static int fou_gro_complete(struct sock *sk, struct sk_buff *skb,
 			    int nhoff)
 {
-	const struct net_offload *ops;
 	u8 proto = fou_from_sock(sk)->protocol;
-	int err = -ENOSYS;
 	const struct net_offload **offloads;
+	int err;
 
-	rcu_read_lock();
 	offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
-	ops = rcu_dereference(offloads[proto]);
-	if (WARN_ON(!ops || !ops->callbacks.gro_complete))
-		goto out_unlock;
-
-	err = ops->callbacks.gro_complete(skb, nhoff);
-
+	err = net_gro_complete(offloads, proto, skb, nhoff);
 	skb_set_inner_mac_header(skb, nhoff);
 
-out_unlock:
-	rcu_read_unlock();
-
 	return err;
 }
 
@@ -457,7 +447,6 @@ static int gue_gro_complete(struct sock *sk, struct sk_buff *skb, int nhoff)
 {
 	const struct net_offload **offloads;
 	struct guehdr *guehdr = (struct guehdr *)(skb->data + nhoff);
-	const struct net_offload *ops;
 	unsigned int guehlen = 0;
 	u8 proto;
 	int err = -ENOENT;
@@ -483,18 +472,10 @@ static int gue_gro_complete(struct sock *sk, struct sk_buff *skb, int nhoff)
 		return err;
 	}
 
-	rcu_read_lock();
 	offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
-	ops = rcu_dereference(offloads[proto]);
-	if (WARN_ON(!ops || !ops->callbacks.gro_complete))
-		goto out_unlock;
-
-	err = ops->callbacks.gro_complete(skb, nhoff + guehlen);
-
+	err = net_gro_complete(offloads, proto, skb, nhoff + guehlen);
 	skb_set_inner_mac_header(skb, nhoff + guehlen);
 
-out_unlock:
-	rcu_read_unlock();
 	return err;
 }
 
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index 6c63524f598a..fc8c99e4a058 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -231,10 +231,9 @@ static struct sk_buff *gre_gro_receive(struct list_head *head,
 static int gre_gro_complete(struct sk_buff *skb, int nhoff)
 {
 	struct gre_base_hdr *greh = (struct gre_base_hdr *)(skb->data + nhoff);
-	struct packet_offload *ptype;
 	unsigned int grehlen = sizeof(*greh);
-	int err = -ENOENT;
 	__be16 type;
+	int err;
 
 	skb->encapsulation = 1;
 	skb_shinfo(skb)->gso_type = SKB_GSO_GRE;
@@ -246,13 +245,8 @@ static int gre_gro_complete(struct sk_buff *skb, int nhoff)
 	if (greh->flags & GRE_CSUM)
 		grehlen += GRE_HEADER_SECTION;
 
-	rcu_read_lock();
-	ptype = gro_find_complete_by_type(type);
-	if (ptype)
-		err = ptype->callbacks.gro_complete(skb, nhoff + grehlen);
-
-	rcu_read_unlock();
-
+	err = net_gro_complete(dev_offloads, type, skb,
+				nhoff + sizeof(*greh));
 	skb_set_inner_mac_header(skb, nhoff + grehlen);
 
 	return err;
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index c7e495f12011..e8bf554ae611 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -298,7 +298,6 @@ static int ipv6_gro_complete(struct sk_buff *skb, int nhoff)
 {
 	const struct net_offload *ops;
 	struct ipv6hdr *iph = (struct ipv6hdr *)(skb->data + nhoff);
-	int err = -ENOSYS;
 
 	if (skb->encapsulation) {
 		skb_set_inner_protocol(skb, cpu_to_be16(ETH_P_IPV6));
@@ -307,18 +306,8 @@ static int ipv6_gro_complete(struct sk_buff *skb, int nhoff)
 
 	iph->payload_len = htons(skb->len - nhoff - sizeof(*iph));
 
-	rcu_read_lock();
-
 	nhoff += sizeof(*iph) + ipv6_exthdrs_len(iph, &ops);
-	if (WARN_ON(!ops || !ops->callbacks.gro_complete))
-		goto out_unlock;
-
-	err = ops->callbacks.gro_complete(skb, nhoff);
-
-out_unlock:
-	rcu_read_unlock();
-
-	return err;
+	return net_gro_complete(inet6_offloads, ops->type, skb, nhoff);
 }
 
 static int sit_gro_complete(struct sk_buff *skb, int nhoff)
-- 
2.19.0.397.gdd90340f6a-goog

^ permalink raw reply related

* [PATCH net-next RFC 1/8] gro: convert device offloads to net_offload
From: Willem de Bruijn @ 2018-09-14 17:59 UTC (permalink / raw)
  To: netdev; +Cc: pabeni, steffen.klassert, davem, Willem de Bruijn
In-Reply-To: <20180914175941.213950-1-willemdebruijn.kernel@gmail.com>

From: Willem de Bruijn <willemb@google.com>

In preparation of making GRO receive configurable, have all offloads
share the same infrastructure.

Signed-off-by: Willem de Bruijn <willemb@google.com>
---
 include/linux/netdevice.h |  17 +++++-
 include/net/protocol.h    |   7 ---
 net/core/dev.c            | 105 +++++++++++++-------------------------
 3 files changed, 51 insertions(+), 78 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index e2b3bd750c98..7425068fa249 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2366,13 +2366,18 @@ struct offload_callbacks {
 	int			(*gro_complete)(struct sk_buff *skb, int nhoff);
 };
 
-struct packet_offload {
+struct net_offload {
 	__be16			 type;	/* This is really htons(ether_type). */
 	u16			 priority;
 	struct offload_callbacks callbacks;
-	struct list_head	 list;
+	unsigned int		 flags;	/* Flags used by IPv6 for now */
 };
 
+#define packet_offload	net_offload
+
+/* This should be set for any extension header which is compatible with GSO. */
+#define INET6_PROTO_GSO_EXTHDR	0x1
+
 /* often modified stats are per-CPU, other are shared (netdev->stats) */
 struct pcpu_sw_netstats {
 	u64     rx_packets;
@@ -3554,6 +3559,14 @@ gro_result_t napi_gro_frags(struct napi_struct *napi);
 struct packet_offload *gro_find_receive_by_type(__be16 type);
 struct packet_offload *gro_find_complete_by_type(__be16 type);
 
+static inline u8 net_offload_from_type(u16 type)
+{
+	/* Do not bother handling collisions. There are none.
+	 * If they do occur with new offloads, add a mapping function here.
+	 */
+	return type & 0xFF;
+}
+
 static inline void napi_free_frags(struct napi_struct *napi)
 {
 	kfree_skb(napi->skb);
diff --git a/include/net/protocol.h b/include/net/protocol.h
index 4fc75f7ae23b..53a0322ee545 100644
--- a/include/net/protocol.h
+++ b/include/net/protocol.h
@@ -69,13 +69,6 @@ struct inet6_protocol {
 #define INET6_PROTO_FINAL	0x2
 #endif
 
-struct net_offload {
-	struct offload_callbacks callbacks;
-	unsigned int		 flags;	/* Flags used by IPv6 for now */
-};
-/* This should be set for any extension header which is compatible with GSO. */
-#define INET6_PROTO_GSO_EXTHDR	0x1
-
 /* This is used to register socket interfaces for IP protocols.  */
 struct inet_protosw {
 	struct list_head list;
diff --git a/net/core/dev.c b/net/core/dev.c
index 0b2d777e5b9e..55f86b6d3182 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -154,7 +154,6 @@
 #define GRO_MAX_HEAD (MAX_HEADER + 128)
 
 static DEFINE_SPINLOCK(ptype_lock);
-static DEFINE_SPINLOCK(offload_lock);
 struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
 struct list_head ptype_all __read_mostly;	/* Taps */
 static struct list_head offload_base __read_mostly;
@@ -467,6 +466,9 @@ void dev_remove_pack(struct packet_type *pt)
 EXPORT_SYMBOL(dev_remove_pack);
 
 
+const struct net_offload __rcu *dev_offloads[256] __read_mostly;
+EXPORT_SYMBOL(dev_offloads);
+
 /**
  *	dev_add_offload - register offload handlers
  *	@po: protocol offload declaration
@@ -481,15 +483,9 @@ EXPORT_SYMBOL(dev_remove_pack);
  */
 void dev_add_offload(struct packet_offload *po)
 {
-	struct packet_offload *elem;
-
-	spin_lock(&offload_lock);
-	list_for_each_entry(elem, &offload_base, list) {
-		if (po->priority < elem->priority)
-			break;
-	}
-	list_add_rcu(&po->list, elem->list.prev);
-	spin_unlock(&offload_lock);
+	cmpxchg((const struct net_offload **)
+		&dev_offloads[net_offload_from_type(po->type)],
+			NULL, po);
 }
 EXPORT_SYMBOL(dev_add_offload);
 
@@ -506,23 +502,11 @@ EXPORT_SYMBOL(dev_add_offload);
  *	and must not be freed until after all the CPU's have gone
  *	through a quiescent state.
  */
-static void __dev_remove_offload(struct packet_offload *po)
+static int __dev_remove_offload(struct packet_offload *po)
 {
-	struct list_head *head = &offload_base;
-	struct packet_offload *po1;
-
-	spin_lock(&offload_lock);
-
-	list_for_each_entry(po1, head, list) {
-		if (po == po1) {
-			list_del_rcu(&po->list);
-			goto out;
-		}
-	}
-
-	pr_warn("dev_remove_offload: %p not found\n", po);
-out:
-	spin_unlock(&offload_lock);
+	return (cmpxchg((const struct net_offload **)
+			&dev_offloads[net_offload_from_type(po->type)],
+		       po, NULL) == po) ? 0 : -1;
 }
 
 /**
@@ -2962,7 +2946,7 @@ struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
 				    netdev_features_t features)
 {
 	struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
-	struct packet_offload *ptype;
+	const struct net_offload *off;
 	int vlan_depth = skb->mac_len;
 	__be16 type = skb_network_protocol(skb, &vlan_depth);
 
@@ -2972,12 +2956,9 @@ struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
 	__skb_pull(skb, vlan_depth);
 
 	rcu_read_lock();
-	list_for_each_entry_rcu(ptype, &offload_base, list) {
-		if (ptype->type == type && ptype->callbacks.gso_segment) {
-			segs = ptype->callbacks.gso_segment(skb, features);
-			break;
-		}
-	}
+	off = rcu_dereference(dev_offloads[net_offload_from_type(type)]);
+	if (off && off->type == type && off->callbacks.gso_segment)
+		segs = off->callbacks.gso_segment(skb, features);
 	rcu_read_unlock();
 
 	__skb_push(skb, skb->data - skb_mac_header(skb));
@@ -5254,9 +5235,8 @@ static void flush_all_backlogs(void)
 
 static int napi_gro_complete(struct sk_buff *skb)
 {
-	struct packet_offload *ptype;
+	const struct packet_offload *ptype;
 	__be16 type = skb->protocol;
-	struct list_head *head = &offload_base;
 	int err = -ENOENT;
 
 	BUILD_BUG_ON(sizeof(struct napi_gro_cb) > sizeof(skb->cb));
@@ -5267,17 +5247,12 @@ static int napi_gro_complete(struct sk_buff *skb)
 	}
 
 	rcu_read_lock();
-	list_for_each_entry_rcu(ptype, head, list) {
-		if (ptype->type != type || !ptype->callbacks.gro_complete)
-			continue;
-
+	ptype = dev_offloads[net_offload_from_type(type)];
+	if (ptype && ptype->callbacks.gro_complete)
 		err = ptype->callbacks.gro_complete(skb, 0);
-		break;
-	}
 	rcu_read_unlock();
 
 	if (err) {
-		WARN_ON(&ptype->list == head);
 		kfree_skb(skb);
 		return NET_RX_SUCCESS;
 	}
@@ -5417,8 +5392,7 @@ static void gro_flush_oldest(struct list_head *head)
 static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 {
 	u32 hash = skb_get_hash_raw(skb) & (GRO_HASH_BUCKETS - 1);
-	struct list_head *head = &offload_base;
-	struct packet_offload *ptype;
+	const struct packet_offload *ptype;
 	__be16 type = skb->protocol;
 	struct list_head *gro_head;
 	struct sk_buff *pp = NULL;
@@ -5432,10 +5406,8 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 	gro_head = gro_list_prepare(napi, skb);
 
 	rcu_read_lock();
-	list_for_each_entry_rcu(ptype, head, list) {
-		if (ptype->type != type || !ptype->callbacks.gro_receive)
-			continue;
-
+	ptype = dev_offloads[net_offload_from_type(type)];
+	if (ptype && ptype->callbacks.gro_receive) {
 		skb_set_network_header(skb, skb_gro_offset(skb));
 		skb_reset_mac_len(skb);
 		NAPI_GRO_CB(skb)->same_flow = 0;
@@ -5464,12 +5436,11 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 		}
 
 		pp = ptype->callbacks.gro_receive(gro_head, skb);
-		break;
-	}
-	rcu_read_unlock();
-
-	if (&ptype->list == head)
+		rcu_read_unlock();
+	} else {
+		rcu_read_unlock();
 		goto normal;
+	}
 
 	if (IS_ERR(pp) && PTR_ERR(pp) == -EINPROGRESS) {
 		ret = GRO_CONSUMED;
@@ -5524,29 +5495,25 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 
 struct packet_offload *gro_find_receive_by_type(__be16 type)
 {
-	struct list_head *offload_head = &offload_base;
-	struct packet_offload *ptype;
+	struct net_offload *off;
 
-	list_for_each_entry_rcu(ptype, offload_head, list) {
-		if (ptype->type != type || !ptype->callbacks.gro_receive)
-			continue;
-		return ptype;
-	}
-	return NULL;
+	off = (struct net_offload *) rcu_dereference(dev_offloads[type & 0xFF]);
+	if (off && off->type == type && off->callbacks.gro_receive)
+		return off;
+	else
+		return NULL;
 }
 EXPORT_SYMBOL(gro_find_receive_by_type);
 
 struct packet_offload *gro_find_complete_by_type(__be16 type)
 {
-	struct list_head *offload_head = &offload_base;
-	struct packet_offload *ptype;
+	struct net_offload *off;
 
-	list_for_each_entry_rcu(ptype, offload_head, list) {
-		if (ptype->type != type || !ptype->callbacks.gro_complete)
-			continue;
-		return ptype;
-	}
-	return NULL;
+	off = (struct net_offload *) rcu_dereference(dev_offloads[type & 0xFF]);
+	if (off && off->type == type && off->callbacks.gro_complete)
+		return off;
+	else
+		return NULL;
 }
 EXPORT_SYMBOL(gro_find_complete_by_type);
 
-- 
2.19.0.397.gdd90340f6a-goog

^ permalink raw reply related

* [PATCH net-next RFC 0/8] udp and configurable gro
From: Willem de Bruijn @ 2018-09-14 17:59 UTC (permalink / raw)
  To: netdev; +Cc: pabeni, steffen.klassert, davem, Willem de Bruijn

From: Willem de Bruijn <willemb@google.com>

This is a *very rough* draft. Mainly for discussion while we also
look at another partially overlapping approach [1].

Reduce UDP receive cost for bulk traffic by enabling datagram
coalescing with GRO.

Before adding more GRO callbacks, make GRO configurable by the
administrator to optionally reduce the attack surface of this
early receive path. See also [2].

Introduce sysctls net.(core|ipv4|ipv6).gro that expose the table of
protocols for which GRO is support. Allow the administrator to disable
individual entries in the table.

To have a single infrastructure, convert dev_offloads to the
table-based approach to existing inet(6)_offloads. Additional small
benefit is that ipv6 will no longer take two list lookups to find.

Patch 1 converts dev_offloads to the infra of inet(6)_offloads
Patch 2 deduplicates gro_complete logic now that all share infra
Patch 3 does the same for gro_receive, in anticipation of adding
        a branch to check whether gro_receive is enabled
Patch 4 harmonizes ipv6 header opts, so that those, too can be
        optionally disabled.
Patch 5 makes inet(6)_offloads non-const to allow disabling a flag
Patch 6 introduces the administrative sysctl
Patch 7 avoids udp gro cost if no udp gro callback is register
Patch 8 introduces udp gro

[1] http://patchwork.ozlabs.org/project/netdev/list/?series=65741
[2] http://vger.kernel.org/netconf2017_files/rx_hardening_and_udp_gso.pdf

Willem de Bruijn (8):
  gro: convert device offloads to net_offload
  gro: deduplicate gro_complete
  gro: add net_gro_receive
  ipv6: remove offload exception for hopopts
  net: deconstify net_offload
  net: make gro configurable
  udp: gro behind static key
  udp: add gro

 drivers/net/geneve.c       |  11 +---
 drivers/net/vxlan.c        |   8 +++
 include/linux/netdevice.h  |  64 +++++++++++++++++++--
 include/net/protocol.h     |  19 ++-----
 include/net/udp.h          |   2 +
 include/uapi/linux/udp.h   |   1 +
 net/8021q/vlan.c           |  12 +---
 net/core/dev.c             | 112 ++++++++-----------------------------
 net/core/sysctl_net_core.c |  60 ++++++++++++++++++++
 net/ethernet/eth.c         |  13 +----
 net/ipv4/af_inet.c         |  21 ++-----
 net/ipv4/esp4_offload.c    |   2 +-
 net/ipv4/fou.c             |  41 ++++----------
 net/ipv4/gre_offload.c     |  26 ++++-----
 net/ipv4/protocol.c        |  10 ++--
 net/ipv4/sysctl_net_ipv4.c |   7 +++
 net/ipv4/tcp_offload.c     |   2 +-
 net/ipv4/udp.c             |  73 +++++++++++++++++++++++-
 net/ipv4/udp_offload.c     |  19 +++----
 net/ipv6/esp6_offload.c    |   2 +-
 net/ipv6/exthdrs_offload.c |  17 +++++-
 net/ipv6/ip6_offload.c     |  69 +++++++++--------------
 net/ipv6/protocol.c        |  10 ++--
 net/ipv6/sysctl_net_ipv6.c |   8 +++
 net/ipv6/tcpv6_offload.c   |   2 +-
 net/ipv6/udp.c             |   2 +-
 net/ipv6/udp_offload.c     |   4 +-
 net/sctp/offload.c         |   2 +-
 28 files changed, 344 insertions(+), 275 deletions(-)

-- 
2.19.0.397.gdd90340f6a-goog

^ permalink raw reply

* Re: [PATCH] net: caif: remove redundant null check on frontpkt
From: Sergei Shtylyov @ 2018-09-14 17:54 UTC (permalink / raw)
  To: Colin King, Dmitry Tarnyagin, David S . Miller; +Cc: kernel-janitors, netdev
In-Reply-To: <20180914171916.21298-1-colin.king@canonical.com>

Hello!

On 09/14/2018 08:19 PM, Colin King wrote:

> From: Colin Ian King <colin.king@canonical.com>
> 
> It is impossible for frontpkt to be null at the point of the null
> check because it has been assigned from rearpkt and there is no
> way realpkt can be null at the point of the assignment because

   rearpkt?

> of the sanity checking and exit paths taken previously. Remove
> the redundant null check.
> 
> Detected by CoverityScan, CID#114434 ("Logically dead code")
> 
> Signed-off-by: Colin Ian King <colin.king@canonical.com>
[...]

MBR, Sergei

^ permalink raw reply

* Re: [PATCH iproute2] libnetlink: fix leak and using unused memory on error
From: Mahesh Bandewar (महेश बंडेवार) @ 2018-09-14 17:48 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: Mahesh Bandewar, linux-netdev
In-Reply-To: <20180913193338.20233-1-stephen@networkplumber.org>

On Thu, Sep 13, 2018 at 12:33 PM, Stephen Hemminger
<stephen@networkplumber.org> wrote:
> If an error happens in multi-segment message (tc only)
> then report the error and stop processing further responses.
> This also fixes refering to the buffer after free.
>
> The sequence check is not necessary here because the
> response message has already been validated to be in
> the window of the sequence number of the iov.
>
> Reported-by: Mahesh Bandewar <mahesh@bandewar.net>
> Fixes: 7b2ee50c0cd5 ("hv_netvsc: common detach logic")
> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Acked-by: Mahesh Bandewar <maheshb@google.com>
> ---
>  lib/libnetlink.c | 23 +++++++++--------------
>  1 file changed, 9 insertions(+), 14 deletions(-)
>
> diff --git a/lib/libnetlink.c b/lib/libnetlink.c
> index 928de1dd16d8..586809292594 100644
> --- a/lib/libnetlink.c
> +++ b/lib/libnetlink.c
> @@ -617,7 +617,6 @@ static int __rtnl_talk_iov(struct rtnl_handle *rtnl, struct iovec *iov,
>         msg.msg_iovlen = 1;
>         i = 0;
>         while (1) {
> -next:
>                 status = rtnl_recvmsg(rtnl->fd, &msg, &buf);
>                 ++i;
>
> @@ -660,27 +659,23 @@ next:
>
>                                 if (l < sizeof(struct nlmsgerr)) {
>                                         fprintf(stderr, "ERROR truncated\n");
> -                               } else if (!err->error) {
> +                                       free(buf);
> +                                       return -1;
> +                               }
> +
> +                               if (!err->error)
>                                         /* check messages from kernel */
>                                         nl_dump_ext_ack(h, errfn);
>
> -                                       if (answer)
> -                                               *answer = (struct nlmsghdr *)buf;
> -                                       else
> -                                               free(buf);
> -                                       if (h->nlmsg_seq == seq)
> -                                               return 0;
> -                                       else if (i < iovlen)
> -                                               goto next;
> -                                       return 0;
> -                               }
> -
>                                 if (rtnl->proto != NETLINK_SOCK_DIAG &&
>                                     show_rtnl_err)
>                                         rtnl_talk_error(h, err, errfn);
>
>                                 errno = -err->error;
> -                               free(buf);
> +                               if (answer)
> +                                       *answer = (struct nlmsghdr *)buf;
> +                               else
> +                                       free(buf);
>                                 return -i;
>                         }
>
> --
> 2.18.0
>

^ permalink raw reply

* Re: [PATCH net-next v4 18/20] crypto: port ChaCha20 to Zinc
From: Ard Biesheuvel @ 2018-09-14 17:38 UTC (permalink / raw)
  To: Jason A. Donenfeld
  Cc: Linux Kernel Mailing List, <netdev@vger.kernel.org>,
	open list:HARDWARE RANDOM NUMBER GENERATOR CORE, David S. Miller,
	Greg Kroah-Hartman, Samuel Neves, Andy Lutomirski,
	Jean-Philippe Aumasson, Eric Biggers
In-Reply-To: <20180914162240.7925-19-Jason@zx2c4.com>

On 14 September 2018 at 18:22, Jason A. Donenfeld <Jason@zx2c4.com> wrote:
> Now that ChaCha20 is in Zinc, we can have the crypto API code simply
> call into it. The crypto API expects to have a stored key per instance
> and independent nonces, so we follow suite and store the key and
> initialize the nonce independently.
>

>From our exchange re v3:

>> Then there is the performance claim. We know for instance that the
>> OpenSSL ARM NEON code for ChaCha20 is faster on cores that happen to
>> possess a micro-architectural property that ALU instructions are
>> essentially free when they are interleaved with SIMD instructions. But
>> we also know that a) Cortex-A7, which is a relevant target, is not one
>> of those cores, and b) that chip designers are not likely to optimize
>> for that particular usage pattern so relying on it in generic code is
>> unwise in general.
>
> That's interesting. I'll bring this up with AndyP. FWIW, if you think
> you have a real and compelling claim here, I'd be much more likely to
> accept a different ChaCha20 implementation than I would be to accept a
> different Poly1305 implementation. (It's a *lot* harder to screw up
> ChaCha20 than it is to screw up Poly1305.)
>

so could we please bring that discussion to a close before we drop the ARM code?

I am fine with dropping the arm64 code btw.

> Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
> Cc: Samuel Neves <sneves@dei.uc.pt>
> Cc: Andy Lutomirski <luto@kernel.org>
> Cc: Greg KH <gregkh@linuxfoundation.org>
> Cc: Jean-Philippe Aumasson <jeanphilippe.aumasson@gmail.com>
> Cc: Eric Biggers <ebiggers@google.com>
> ---
>  arch/arm/configs/exynos_defconfig       |   1 -
>  arch/arm/configs/multi_v7_defconfig     |   1 -
>  arch/arm/configs/omap2plus_defconfig    |   1 -
>  arch/arm/crypto/Kconfig                 |   6 -
>  arch/arm/crypto/Makefile                |   2 -
>  arch/arm/crypto/chacha20-neon-core.S    | 521 --------------------
>  arch/arm/crypto/chacha20-neon-glue.c    | 127 -----
>  arch/arm64/configs/defconfig            |   1 -
>  arch/arm64/crypto/Kconfig               |   6 -
>  arch/arm64/crypto/Makefile              |   3 -
>  arch/arm64/crypto/chacha20-neon-core.S  | 450 -----------------
>  arch/arm64/crypto/chacha20-neon-glue.c  | 133 -----
>  arch/x86/crypto/Makefile                |   3 -
>  arch/x86/crypto/chacha20-avx2-x86_64.S  | 448 -----------------
>  arch/x86/crypto/chacha20-ssse3-x86_64.S | 630 ------------------------
>  arch/x86/crypto/chacha20_glue.c         | 146 ------
>  crypto/Kconfig                          |  16 -
>  crypto/Makefile                         |   2 +-
>  crypto/chacha20_generic.c               | 136 -----
>  crypto/chacha20_zinc.c                  | 100 ++++
>  crypto/chacha20poly1305.c               |   2 +-
>  include/crypto/chacha20.h               |  12 -
>  22 files changed, 102 insertions(+), 2645 deletions(-)
>  delete mode 100644 arch/arm/crypto/chacha20-neon-core.S
>  delete mode 100644 arch/arm/crypto/chacha20-neon-glue.c
>  delete mode 100644 arch/arm64/crypto/chacha20-neon-core.S
>  delete mode 100644 arch/arm64/crypto/chacha20-neon-glue.c
>  delete mode 100644 arch/x86/crypto/chacha20-avx2-x86_64.S
>  delete mode 100644 arch/x86/crypto/chacha20-ssse3-x86_64.S
>  delete mode 100644 arch/x86/crypto/chacha20_glue.c
>  delete mode 100644 crypto/chacha20_generic.c
>  create mode 100644 crypto/chacha20_zinc.c
>
> diff --git a/arch/arm/configs/exynos_defconfig b/arch/arm/configs/exynos_defconfig
> index 27ea6dfcf2f2..95929b5e7b10 100644
> --- a/arch/arm/configs/exynos_defconfig
> +++ b/arch/arm/configs/exynos_defconfig
> @@ -350,7 +350,6 @@ CONFIG_CRYPTO_SHA1_ARM_NEON=m
>  CONFIG_CRYPTO_SHA256_ARM=m
>  CONFIG_CRYPTO_SHA512_ARM=m
>  CONFIG_CRYPTO_AES_ARM_BS=m
> -CONFIG_CRYPTO_CHACHA20_NEON=m
>  CONFIG_CRC_CCITT=y
>  CONFIG_FONTS=y
>  CONFIG_FONT_7x14=y
> diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig
> index fc33444e94f0..63be07724db3 100644
> --- a/arch/arm/configs/multi_v7_defconfig
> +++ b/arch/arm/configs/multi_v7_defconfig
> @@ -1000,4 +1000,3 @@ CONFIG_CRYPTO_AES_ARM_BS=m
>  CONFIG_CRYPTO_AES_ARM_CE=m
>  CONFIG_CRYPTO_GHASH_ARM_CE=m
>  CONFIG_CRYPTO_CRC32_ARM_CE=m
> -CONFIG_CRYPTO_CHACHA20_NEON=m
> diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig
> index 6491419b1dad..f585a8ecc336 100644
> --- a/arch/arm/configs/omap2plus_defconfig
> +++ b/arch/arm/configs/omap2plus_defconfig
> @@ -547,7 +547,6 @@ CONFIG_CRYPTO_SHA512_ARM=m
>  CONFIG_CRYPTO_AES_ARM=m
>  CONFIG_CRYPTO_AES_ARM_BS=m
>  CONFIG_CRYPTO_GHASH_ARM_CE=m
> -CONFIG_CRYPTO_CHACHA20_NEON=m
>  CONFIG_CRC_CCITT=y
>  CONFIG_CRC_T10DIF=y
>  CONFIG_CRC_ITU_T=y
> diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig
> index 925d1364727a..fb80fd89f0e7 100644
> --- a/arch/arm/crypto/Kconfig
> +++ b/arch/arm/crypto/Kconfig
> @@ -115,12 +115,6 @@ config CRYPTO_CRC32_ARM_CE
>         depends on KERNEL_MODE_NEON && CRC32
>         select CRYPTO_HASH
>
> -config CRYPTO_CHACHA20_NEON
> -       tristate "NEON accelerated ChaCha20 symmetric cipher"
> -       depends on KERNEL_MODE_NEON
> -       select CRYPTO_BLKCIPHER
> -       select CRYPTO_CHACHA20
> -
>  config CRYPTO_SPECK_NEON
>         tristate "NEON accelerated Speck cipher algorithms"
>         depends on KERNEL_MODE_NEON
> diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile
> index 8de542c48ade..bbfa98447063 100644
> --- a/arch/arm/crypto/Makefile
> +++ b/arch/arm/crypto/Makefile
> @@ -9,7 +9,6 @@ obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o
>  obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o
>  obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o
>  obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o
> -obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha20-neon.o
>  obj-$(CONFIG_CRYPTO_SPECK_NEON) += speck-neon.o
>
>  ce-obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o
> @@ -53,7 +52,6 @@ aes-arm-ce-y  := aes-ce-core.o aes-ce-glue.o
>  ghash-arm-ce-y := ghash-ce-core.o ghash-ce-glue.o
>  crct10dif-arm-ce-y     := crct10dif-ce-core.o crct10dif-ce-glue.o
>  crc32-arm-ce-y:= crc32-ce-core.o crc32-ce-glue.o
> -chacha20-neon-y := chacha20-neon-core.o chacha20-neon-glue.o
>  speck-neon-y := speck-neon-core.o speck-neon-glue.o
>
>  ifdef REGENERATE_ARM_CRYPTO
> diff --git a/arch/arm/crypto/chacha20-neon-core.S b/arch/arm/crypto/chacha20-neon-core.S
> deleted file mode 100644
> index 451a849ad518..000000000000
> --- a/arch/arm/crypto/chacha20-neon-core.S
> +++ /dev/null
> @@ -1,521 +0,0 @@
> -/*
> - * ChaCha20 256-bit cipher algorithm, RFC7539, ARM NEON functions
> - *
> - * Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org>
> - *
> - * This program is free software; you can redistribute it and/or modify
> - * it under the terms of the GNU General Public License version 2 as
> - * published by the Free Software Foundation.
> - *
> - * Based on:
> - * ChaCha20 256-bit cipher algorithm, RFC7539, x64 SSE3 functions
> - *
> - * Copyright (C) 2015 Martin Willi
> - *
> - * This program is free software; you can redistribute it and/or modify
> - * it under the terms of the GNU General Public License as published by
> - * the Free Software Foundation; either version 2 of the License, or
> - * (at your option) any later version.
> - */
> -
> -#include <linux/linkage.h>
> -
> -       .text
> -       .fpu            neon
> -       .align          5
> -
> -ENTRY(chacha20_block_xor_neon)
> -       // r0: Input state matrix, s
> -       // r1: 1 data block output, o
> -       // r2: 1 data block input, i
> -
> -       //
> -       // This function encrypts one ChaCha20 block by loading the state matrix
> -       // in four NEON registers. It performs matrix operation on four words in
> -       // parallel, but requireds shuffling to rearrange the words after each
> -       // round.
> -       //
> -
> -       // x0..3 = s0..3
> -       add             ip, r0, #0x20
> -       vld1.32         {q0-q1}, [r0]
> -       vld1.32         {q2-q3}, [ip]
> -
> -       vmov            q8, q0
> -       vmov            q9, q1
> -       vmov            q10, q2
> -       vmov            q11, q3
> -
> -       mov             r3, #10
> -
> -.Ldoubleround:
> -       // x0 += x1, x3 = rotl32(x3 ^ x0, 16)
> -       vadd.i32        q0, q0, q1
> -       veor            q3, q3, q0
> -       vrev32.16       q3, q3
> -
> -       // x2 += x3, x1 = rotl32(x1 ^ x2, 12)
> -       vadd.i32        q2, q2, q3
> -       veor            q4, q1, q2
> -       vshl.u32        q1, q4, #12
> -       vsri.u32        q1, q4, #20
> -
> -       // x0 += x1, x3 = rotl32(x3 ^ x0, 8)
> -       vadd.i32        q0, q0, q1
> -       veor            q4, q3, q0
> -       vshl.u32        q3, q4, #8
> -       vsri.u32        q3, q4, #24
> -
> -       // x2 += x3, x1 = rotl32(x1 ^ x2, 7)
> -       vadd.i32        q2, q2, q3
> -       veor            q4, q1, q2
> -       vshl.u32        q1, q4, #7
> -       vsri.u32        q1, q4, #25
> -
> -       // x1 = shuffle32(x1, MASK(0, 3, 2, 1))
> -       vext.8          q1, q1, q1, #4
> -       // x2 = shuffle32(x2, MASK(1, 0, 3, 2))
> -       vext.8          q2, q2, q2, #8
> -       // x3 = shuffle32(x3, MASK(2, 1, 0, 3))
> -       vext.8          q3, q3, q3, #12
> -
> -       // x0 += x1, x3 = rotl32(x3 ^ x0, 16)
> -       vadd.i32        q0, q0, q1
> -       veor            q3, q3, q0
> -       vrev32.16       q3, q3
> -
> -       // x2 += x3, x1 = rotl32(x1 ^ x2, 12)
> -       vadd.i32        q2, q2, q3
> -       veor            q4, q1, q2
> -       vshl.u32        q1, q4, #12
> -       vsri.u32        q1, q4, #20
> -
> -       // x0 += x1, x3 = rotl32(x3 ^ x0, 8)
> -       vadd.i32        q0, q0, q1
> -       veor            q4, q3, q0
> -       vshl.u32        q3, q4, #8
> -       vsri.u32        q3, q4, #24
> -
> -       // x2 += x3, x1 = rotl32(x1 ^ x2, 7)
> -       vadd.i32        q2, q2, q3
> -       veor            q4, q1, q2
> -       vshl.u32        q1, q4, #7
> -       vsri.u32        q1, q4, #25
> -
> -       // x1 = shuffle32(x1, MASK(2, 1, 0, 3))
> -       vext.8          q1, q1, q1, #12
> -       // x2 = shuffle32(x2, MASK(1, 0, 3, 2))
> -       vext.8          q2, q2, q2, #8
> -       // x3 = shuffle32(x3, MASK(0, 3, 2, 1))
> -       vext.8          q3, q3, q3, #4
> -
> -       subs            r3, r3, #1
> -       bne             .Ldoubleround
> -
> -       add             ip, r2, #0x20
> -       vld1.8          {q4-q5}, [r2]
> -       vld1.8          {q6-q7}, [ip]
> -
> -       // o0 = i0 ^ (x0 + s0)
> -       vadd.i32        q0, q0, q8
> -       veor            q0, q0, q4
> -
> -       // o1 = i1 ^ (x1 + s1)
> -       vadd.i32        q1, q1, q9
> -       veor            q1, q1, q5
> -
> -       // o2 = i2 ^ (x2 + s2)
> -       vadd.i32        q2, q2, q10
> -       veor            q2, q2, q6
> -
> -       // o3 = i3 ^ (x3 + s3)
> -       vadd.i32        q3, q3, q11
> -       veor            q3, q3, q7
> -
> -       add             ip, r1, #0x20
> -       vst1.8          {q0-q1}, [r1]
> -       vst1.8          {q2-q3}, [ip]
> -
> -       bx              lr
> -ENDPROC(chacha20_block_xor_neon)
> -
> -       .align          5
> -ENTRY(chacha20_4block_xor_neon)
> -       push            {r4-r6, lr}
> -       mov             ip, sp                  // preserve the stack pointer
> -       sub             r3, sp, #0x20           // allocate a 32 byte buffer
> -       bic             r3, r3, #0x1f           // aligned to 32 bytes
> -       mov             sp, r3
> -
> -       // r0: Input state matrix, s
> -       // r1: 4 data blocks output, o
> -       // r2: 4 data blocks input, i
> -
> -       //
> -       // This function encrypts four consecutive ChaCha20 blocks by loading
> -       // the state matrix in NEON registers four times. The algorithm performs
> -       // each operation on the corresponding word of each state matrix, hence
> -       // requires no word shuffling. For final XORing step we transpose the
> -       // matrix by interleaving 32- and then 64-bit words, which allows us to
> -       // do XOR in NEON registers.
> -       //
> -
> -       // x0..15[0-3] = s0..3[0..3]
> -       add             r3, r0, #0x20
> -       vld1.32         {q0-q1}, [r0]
> -       vld1.32         {q2-q3}, [r3]
> -
> -       adr             r3, CTRINC
> -       vdup.32         q15, d7[1]
> -       vdup.32         q14, d7[0]
> -       vld1.32         {q11}, [r3, :128]
> -       vdup.32         q13, d6[1]
> -       vdup.32         q12, d6[0]
> -       vadd.i32        q12, q12, q11           // x12 += counter values 0-3
> -       vdup.32         q11, d5[1]
> -       vdup.32         q10, d5[0]
> -       vdup.32         q9, d4[1]
> -       vdup.32         q8, d4[0]
> -       vdup.32         q7, d3[1]
> -       vdup.32         q6, d3[0]
> -       vdup.32         q5, d2[1]
> -       vdup.32         q4, d2[0]
> -       vdup.32         q3, d1[1]
> -       vdup.32         q2, d1[0]
> -       vdup.32         q1, d0[1]
> -       vdup.32         q0, d0[0]
> -
> -       mov             r3, #10
> -
> -.Ldoubleround4:
> -       // x0 += x4, x12 = rotl32(x12 ^ x0, 16)
> -       // x1 += x5, x13 = rotl32(x13 ^ x1, 16)
> -       // x2 += x6, x14 = rotl32(x14 ^ x2, 16)
> -       // x3 += x7, x15 = rotl32(x15 ^ x3, 16)
> -       vadd.i32        q0, q0, q4
> -       vadd.i32        q1, q1, q5
> -       vadd.i32        q2, q2, q6
> -       vadd.i32        q3, q3, q7
> -
> -       veor            q12, q12, q0
> -       veor            q13, q13, q1
> -       veor            q14, q14, q2
> -       veor            q15, q15, q3
> -
> -       vrev32.16       q12, q12
> -       vrev32.16       q13, q13
> -       vrev32.16       q14, q14
> -       vrev32.16       q15, q15
> -
> -       // x8 += x12, x4 = rotl32(x4 ^ x8, 12)
> -       // x9 += x13, x5 = rotl32(x5 ^ x9, 12)
> -       // x10 += x14, x6 = rotl32(x6 ^ x10, 12)
> -       // x11 += x15, x7 = rotl32(x7 ^ x11, 12)
> -       vadd.i32        q8, q8, q12
> -       vadd.i32        q9, q9, q13
> -       vadd.i32        q10, q10, q14
> -       vadd.i32        q11, q11, q15
> -
> -       vst1.32         {q8-q9}, [sp, :256]
> -
> -       veor            q8, q4, q8
> -       veor            q9, q5, q9
> -       vshl.u32        q4, q8, #12
> -       vshl.u32        q5, q9, #12
> -       vsri.u32        q4, q8, #20
> -       vsri.u32        q5, q9, #20
> -
> -       veor            q8, q6, q10
> -       veor            q9, q7, q11
> -       vshl.u32        q6, q8, #12
> -       vshl.u32        q7, q9, #12
> -       vsri.u32        q6, q8, #20
> -       vsri.u32        q7, q9, #20
> -
> -       // x0 += x4, x12 = rotl32(x12 ^ x0, 8)
> -       // x1 += x5, x13 = rotl32(x13 ^ x1, 8)
> -       // x2 += x6, x14 = rotl32(x14 ^ x2, 8)
> -       // x3 += x7, x15 = rotl32(x15 ^ x3, 8)
> -       vadd.i32        q0, q0, q4
> -       vadd.i32        q1, q1, q5
> -       vadd.i32        q2, q2, q6
> -       vadd.i32        q3, q3, q7
> -
> -       veor            q8, q12, q0
> -       veor            q9, q13, q1
> -       vshl.u32        q12, q8, #8
> -       vshl.u32        q13, q9, #8
> -       vsri.u32        q12, q8, #24
> -       vsri.u32        q13, q9, #24
> -
> -       veor            q8, q14, q2
> -       veor            q9, q15, q3
> -       vshl.u32        q14, q8, #8
> -       vshl.u32        q15, q9, #8
> -       vsri.u32        q14, q8, #24
> -       vsri.u32        q15, q9, #24
> -
> -       vld1.32         {q8-q9}, [sp, :256]
> -
> -       // x8 += x12, x4 = rotl32(x4 ^ x8, 7)
> -       // x9 += x13, x5 = rotl32(x5 ^ x9, 7)
> -       // x10 += x14, x6 = rotl32(x6 ^ x10, 7)
> -       // x11 += x15, x7 = rotl32(x7 ^ x11, 7)
> -       vadd.i32        q8, q8, q12
> -       vadd.i32        q9, q9, q13
> -       vadd.i32        q10, q10, q14
> -       vadd.i32        q11, q11, q15
> -
> -       vst1.32         {q8-q9}, [sp, :256]
> -
> -       veor            q8, q4, q8
> -       veor            q9, q5, q9
> -       vshl.u32        q4, q8, #7
> -       vshl.u32        q5, q9, #7
> -       vsri.u32        q4, q8, #25
> -       vsri.u32        q5, q9, #25
> -
> -       veor            q8, q6, q10
> -       veor            q9, q7, q11
> -       vshl.u32        q6, q8, #7
> -       vshl.u32        q7, q9, #7
> -       vsri.u32        q6, q8, #25
> -       vsri.u32        q7, q9, #25
> -
> -       vld1.32         {q8-q9}, [sp, :256]
> -
> -       // x0 += x5, x15 = rotl32(x15 ^ x0, 16)
> -       // x1 += x6, x12 = rotl32(x12 ^ x1, 16)
> -       // x2 += x7, x13 = rotl32(x13 ^ x2, 16)
> -       // x3 += x4, x14 = rotl32(x14 ^ x3, 16)
> -       vadd.i32        q0, q0, q5
> -       vadd.i32        q1, q1, q6
> -       vadd.i32        q2, q2, q7
> -       vadd.i32        q3, q3, q4
> -
> -       veor            q15, q15, q0
> -       veor            q12, q12, q1
> -       veor            q13, q13, q2
> -       veor            q14, q14, q3
> -
> -       vrev32.16       q15, q15
> -       vrev32.16       q12, q12
> -       vrev32.16       q13, q13
> -       vrev32.16       q14, q14
> -
> -       // x10 += x15, x5 = rotl32(x5 ^ x10, 12)
> -       // x11 += x12, x6 = rotl32(x6 ^ x11, 12)
> -       // x8 += x13, x7 = rotl32(x7 ^ x8, 12)
> -       // x9 += x14, x4 = rotl32(x4 ^ x9, 12)
> -       vadd.i32        q10, q10, q15
> -       vadd.i32        q11, q11, q12
> -       vadd.i32        q8, q8, q13
> -       vadd.i32        q9, q9, q14
> -
> -       vst1.32         {q8-q9}, [sp, :256]
> -
> -       veor            q8, q7, q8
> -       veor            q9, q4, q9
> -       vshl.u32        q7, q8, #12
> -       vshl.u32        q4, q9, #12
> -       vsri.u32        q7, q8, #20
> -       vsri.u32        q4, q9, #20
> -
> -       veor            q8, q5, q10
> -       veor            q9, q6, q11
> -       vshl.u32        q5, q8, #12
> -       vshl.u32        q6, q9, #12
> -       vsri.u32        q5, q8, #20
> -       vsri.u32        q6, q9, #20
> -
> -       // x0 += x5, x15 = rotl32(x15 ^ x0, 8)
> -       // x1 += x6, x12 = rotl32(x12 ^ x1, 8)
> -       // x2 += x7, x13 = rotl32(x13 ^ x2, 8)
> -       // x3 += x4, x14 = rotl32(x14 ^ x3, 8)
> -       vadd.i32        q0, q0, q5
> -       vadd.i32        q1, q1, q6
> -       vadd.i32        q2, q2, q7
> -       vadd.i32        q3, q3, q4
> -
> -       veor            q8, q15, q0
> -       veor            q9, q12, q1
> -       vshl.u32        q15, q8, #8
> -       vshl.u32        q12, q9, #8
> -       vsri.u32        q15, q8, #24
> -       vsri.u32        q12, q9, #24
> -
> -       veor            q8, q13, q2
> -       veor            q9, q14, q3
> -       vshl.u32        q13, q8, #8
> -       vshl.u32        q14, q9, #8
> -       vsri.u32        q13, q8, #24
> -       vsri.u32        q14, q9, #24
> -
> -       vld1.32         {q8-q9}, [sp, :256]
> -
> -       // x10 += x15, x5 = rotl32(x5 ^ x10, 7)
> -       // x11 += x12, x6 = rotl32(x6 ^ x11, 7)
> -       // x8 += x13, x7 = rotl32(x7 ^ x8, 7)
> -       // x9 += x14, x4 = rotl32(x4 ^ x9, 7)
> -       vadd.i32        q10, q10, q15
> -       vadd.i32        q11, q11, q12
> -       vadd.i32        q8, q8, q13
> -       vadd.i32        q9, q9, q14
> -
> -       vst1.32         {q8-q9}, [sp, :256]
> -
> -       veor            q8, q7, q8
> -       veor            q9, q4, q9
> -       vshl.u32        q7, q8, #7
> -       vshl.u32        q4, q9, #7
> -       vsri.u32        q7, q8, #25
> -       vsri.u32        q4, q9, #25
> -
> -       veor            q8, q5, q10
> -       veor            q9, q6, q11
> -       vshl.u32        q5, q8, #7
> -       vshl.u32        q6, q9, #7
> -       vsri.u32        q5, q8, #25
> -       vsri.u32        q6, q9, #25
> -
> -       subs            r3, r3, #1
> -       beq             0f
> -
> -       vld1.32         {q8-q9}, [sp, :256]
> -       b               .Ldoubleround4
> -
> -       // x0[0-3] += s0[0]
> -       // x1[0-3] += s0[1]
> -       // x2[0-3] += s0[2]
> -       // x3[0-3] += s0[3]
> -0:     ldmia           r0!, {r3-r6}
> -       vdup.32         q8, r3
> -       vdup.32         q9, r4
> -       vadd.i32        q0, q0, q8
> -       vadd.i32        q1, q1, q9
> -       vdup.32         q8, r5
> -       vdup.32         q9, r6
> -       vadd.i32        q2, q2, q8
> -       vadd.i32        q3, q3, q9
> -
> -       // x4[0-3] += s1[0]
> -       // x5[0-3] += s1[1]
> -       // x6[0-3] += s1[2]
> -       // x7[0-3] += s1[3]
> -       ldmia           r0!, {r3-r6}
> -       vdup.32         q8, r3
> -       vdup.32         q9, r4
> -       vadd.i32        q4, q4, q8
> -       vadd.i32        q5, q5, q9
> -       vdup.32         q8, r5
> -       vdup.32         q9, r6
> -       vadd.i32        q6, q6, q8
> -       vadd.i32        q7, q7, q9
> -
> -       // interleave 32-bit words in state n, n+1
> -       vzip.32         q0, q1
> -       vzip.32         q2, q3
> -       vzip.32         q4, q5
> -       vzip.32         q6, q7
> -
> -       // interleave 64-bit words in state n, n+2
> -       vswp            d1, d4
> -       vswp            d3, d6
> -       vswp            d9, d12
> -       vswp            d11, d14
> -
> -       // xor with corresponding input, write to output
> -       vld1.8          {q8-q9}, [r2]!
> -       veor            q8, q8, q0
> -       veor            q9, q9, q4
> -       vst1.8          {q8-q9}, [r1]!
> -
> -       vld1.32         {q8-q9}, [sp, :256]
> -
> -       // x8[0-3] += s2[0]
> -       // x9[0-3] += s2[1]
> -       // x10[0-3] += s2[2]
> -       // x11[0-3] += s2[3]
> -       ldmia           r0!, {r3-r6}
> -       vdup.32         q0, r3
> -       vdup.32         q4, r4
> -       vadd.i32        q8, q8, q0
> -       vadd.i32        q9, q9, q4
> -       vdup.32         q0, r5
> -       vdup.32         q4, r6
> -       vadd.i32        q10, q10, q0
> -       vadd.i32        q11, q11, q4
> -
> -       // x12[0-3] += s3[0]
> -       // x13[0-3] += s3[1]
> -       // x14[0-3] += s3[2]
> -       // x15[0-3] += s3[3]
> -       ldmia           r0!, {r3-r6}
> -       vdup.32         q0, r3
> -       vdup.32         q4, r4
> -       adr             r3, CTRINC
> -       vadd.i32        q12, q12, q0
> -       vld1.32         {q0}, [r3, :128]
> -       vadd.i32        q13, q13, q4
> -       vadd.i32        q12, q12, q0            // x12 += counter values 0-3
> -
> -       vdup.32         q0, r5
> -       vdup.32         q4, r6
> -       vadd.i32        q14, q14, q0
> -       vadd.i32        q15, q15, q4
> -
> -       // interleave 32-bit words in state n, n+1
> -       vzip.32         q8, q9
> -       vzip.32         q10, q11
> -       vzip.32         q12, q13
> -       vzip.32         q14, q15
> -
> -       // interleave 64-bit words in state n, n+2
> -       vswp            d17, d20
> -       vswp            d19, d22
> -       vswp            d25, d28
> -       vswp            d27, d30
> -
> -       vmov            q4, q1
> -
> -       vld1.8          {q0-q1}, [r2]!
> -       veor            q0, q0, q8
> -       veor            q1, q1, q12
> -       vst1.8          {q0-q1}, [r1]!
> -
> -       vld1.8          {q0-q1}, [r2]!
> -       veor            q0, q0, q2
> -       veor            q1, q1, q6
> -       vst1.8          {q0-q1}, [r1]!
> -
> -       vld1.8          {q0-q1}, [r2]!
> -       veor            q0, q0, q10
> -       veor            q1, q1, q14
> -       vst1.8          {q0-q1}, [r1]!
> -
> -       vld1.8          {q0-q1}, [r2]!
> -       veor            q0, q0, q4
> -       veor            q1, q1, q5
> -       vst1.8          {q0-q1}, [r1]!
> -
> -       vld1.8          {q0-q1}, [r2]!
> -       veor            q0, q0, q9
> -       veor            q1, q1, q13
> -       vst1.8          {q0-q1}, [r1]!
> -
> -       vld1.8          {q0-q1}, [r2]!
> -       veor            q0, q0, q3
> -       veor            q1, q1, q7
> -       vst1.8          {q0-q1}, [r1]!
> -
> -       vld1.8          {q0-q1}, [r2]
> -       veor            q0, q0, q11
> -       veor            q1, q1, q15
> -       vst1.8          {q0-q1}, [r1]
> -
> -       mov             sp, ip
> -       pop             {r4-r6, pc}
> -ENDPROC(chacha20_4block_xor_neon)
> -
> -       .align          4
> -CTRINC:        .word           0, 1, 2, 3
> diff --git a/arch/arm/crypto/chacha20-neon-glue.c b/arch/arm/crypto/chacha20-neon-glue.c
> deleted file mode 100644
> index 59a7be08e80c..000000000000
> --- a/arch/arm/crypto/chacha20-neon-glue.c
> +++ /dev/null
> @@ -1,127 +0,0 @@
> -/*
> - * ChaCha20 256-bit cipher algorithm, RFC7539, ARM NEON functions
> - *
> - * Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org>
> - *
> - * This program is free software; you can redistribute it and/or modify
> - * it under the terms of the GNU General Public License version 2 as
> - * published by the Free Software Foundation.
> - *
> - * Based on:
> - * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code
> - *
> - * Copyright (C) 2015 Martin Willi
> - *
> - * This program is free software; you can redistribute it and/or modify
> - * it under the terms of the GNU General Public License as published by
> - * the Free Software Foundation; either version 2 of the License, or
> - * (at your option) any later version.
> - */
> -
> -#include <crypto/algapi.h>
> -#include <crypto/chacha20.h>
> -#include <crypto/internal/skcipher.h>
> -#include <linux/kernel.h>
> -#include <linux/module.h>
> -
> -#include <asm/hwcap.h>
> -#include <asm/neon.h>
> -#include <asm/simd.h>
> -
> -asmlinkage void chacha20_block_xor_neon(u32 *state, u8 *dst, const u8 *src);
> -asmlinkage void chacha20_4block_xor_neon(u32 *state, u8 *dst, const u8 *src);
> -
> -static void chacha20_doneon(u32 *state, u8 *dst, const u8 *src,
> -                           unsigned int bytes)
> -{
> -       u8 buf[CHACHA20_BLOCK_SIZE];
> -
> -       while (bytes >= CHACHA20_BLOCK_SIZE * 4) {
> -               chacha20_4block_xor_neon(state, dst, src);
> -               bytes -= CHACHA20_BLOCK_SIZE * 4;
> -               src += CHACHA20_BLOCK_SIZE * 4;
> -               dst += CHACHA20_BLOCK_SIZE * 4;
> -               state[12] += 4;
> -       }
> -       while (bytes >= CHACHA20_BLOCK_SIZE) {
> -               chacha20_block_xor_neon(state, dst, src);
> -               bytes -= CHACHA20_BLOCK_SIZE;
> -               src += CHACHA20_BLOCK_SIZE;
> -               dst += CHACHA20_BLOCK_SIZE;
> -               state[12]++;
> -       }
> -       if (bytes) {
> -               memcpy(buf, src, bytes);
> -               chacha20_block_xor_neon(state, buf, buf);
> -               memcpy(dst, buf, bytes);
> -       }
> -}
> -
> -static int chacha20_neon(struct skcipher_request *req)
> -{
> -       struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
> -       struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm);
> -       struct skcipher_walk walk;
> -       u32 state[16];
> -       int err;
> -
> -       if (req->cryptlen <= CHACHA20_BLOCK_SIZE || !may_use_simd())
> -               return crypto_chacha20_crypt(req);
> -
> -       err = skcipher_walk_virt(&walk, req, true);
> -
> -       crypto_chacha20_init(state, ctx, walk.iv);
> -
> -       kernel_neon_begin();
> -       while (walk.nbytes > 0) {
> -               unsigned int nbytes = walk.nbytes;
> -
> -               if (nbytes < walk.total)
> -                       nbytes = round_down(nbytes, walk.stride);
> -
> -               chacha20_doneon(state, walk.dst.virt.addr, walk.src.virt.addr,
> -                               nbytes);
> -               err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
> -       }
> -       kernel_neon_end();
> -
> -       return err;
> -}
> -
> -static struct skcipher_alg alg = {
> -       .base.cra_name          = "chacha20",
> -       .base.cra_driver_name   = "chacha20-neon",
> -       .base.cra_priority      = 300,
> -       .base.cra_blocksize     = 1,
> -       .base.cra_ctxsize       = sizeof(struct chacha20_ctx),
> -       .base.cra_module        = THIS_MODULE,
> -
> -       .min_keysize            = CHACHA20_KEY_SIZE,
> -       .max_keysize            = CHACHA20_KEY_SIZE,
> -       .ivsize                 = CHACHA20_IV_SIZE,
> -       .chunksize              = CHACHA20_BLOCK_SIZE,
> -       .walksize               = 4 * CHACHA20_BLOCK_SIZE,
> -       .setkey                 = crypto_chacha20_setkey,
> -       .encrypt                = chacha20_neon,
> -       .decrypt                = chacha20_neon,
> -};
> -
> -static int __init chacha20_simd_mod_init(void)
> -{
> -       if (!(elf_hwcap & HWCAP_NEON))
> -               return -ENODEV;
> -
> -       return crypto_register_skcipher(&alg);
> -}
> -
> -static void __exit chacha20_simd_mod_fini(void)
> -{
> -       crypto_unregister_skcipher(&alg);
> -}
> -
> -module_init(chacha20_simd_mod_init);
> -module_exit(chacha20_simd_mod_fini);
> -
> -MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
> -MODULE_LICENSE("GPL v2");
> -MODULE_ALIAS_CRYPTO("chacha20");
> diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
> index db8d364f8476..6cc3c8a0ad88 100644
> --- a/arch/arm64/configs/defconfig
> +++ b/arch/arm64/configs/defconfig
> @@ -709,5 +709,4 @@ CONFIG_CRYPTO_CRCT10DIF_ARM64_CE=m
>  CONFIG_CRYPTO_CRC32_ARM64_CE=m
>  CONFIG_CRYPTO_AES_ARM64_CE_CCM=y
>  CONFIG_CRYPTO_AES_ARM64_CE_BLK=y
> -CONFIG_CRYPTO_CHACHA20_NEON=m
>  CONFIG_CRYPTO_AES_ARM64_BS=m
> diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
> index e3fdb0fd6f70..9db6d775a880 100644
> --- a/arch/arm64/crypto/Kconfig
> +++ b/arch/arm64/crypto/Kconfig
> @@ -105,12 +105,6 @@ config CRYPTO_AES_ARM64_NEON_BLK
>         select CRYPTO_AES
>         select CRYPTO_SIMD
>
> -config CRYPTO_CHACHA20_NEON
> -       tristate "NEON accelerated ChaCha20 symmetric cipher"
> -       depends on KERNEL_MODE_NEON
> -       select CRYPTO_BLKCIPHER
> -       select CRYPTO_CHACHA20
> -
>  config CRYPTO_AES_ARM64_BS
>         tristate "AES in ECB/CBC/CTR/XTS modes using bit-sliced NEON algorithm"
>         depends on KERNEL_MODE_NEON
> diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile
> index bcafd016618e..507c4bfb86e3 100644
> --- a/arch/arm64/crypto/Makefile
> +++ b/arch/arm64/crypto/Makefile
> @@ -53,9 +53,6 @@ sha256-arm64-y := sha256-glue.o sha256-core.o
>  obj-$(CONFIG_CRYPTO_SHA512_ARM64) += sha512-arm64.o
>  sha512-arm64-y := sha512-glue.o sha512-core.o
>
> -obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha20-neon.o
> -chacha20-neon-y := chacha20-neon-core.o chacha20-neon-glue.o
> -
>  obj-$(CONFIG_CRYPTO_SPECK_NEON) += speck-neon.o
>  speck-neon-y := speck-neon-core.o speck-neon-glue.o
>
> diff --git a/arch/arm64/crypto/chacha20-neon-core.S b/arch/arm64/crypto/chacha20-neon-core.S
> deleted file mode 100644
> index 13c85e272c2a..000000000000
> --- a/arch/arm64/crypto/chacha20-neon-core.S
> +++ /dev/null
> @@ -1,450 +0,0 @@
> -/*
> - * ChaCha20 256-bit cipher algorithm, RFC7539, arm64 NEON functions
> - *
> - * Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org>
> - *
> - * This program is free software; you can redistribute it and/or modify
> - * it under the terms of the GNU General Public License version 2 as
> - * published by the Free Software Foundation.
> - *
> - * Based on:
> - * ChaCha20 256-bit cipher algorithm, RFC7539, x64 SSSE3 functions
> - *
> - * Copyright (C) 2015 Martin Willi
> - *
> - * This program is free software; you can redistribute it and/or modify
> - * it under the terms of the GNU General Public License as published by
> - * the Free Software Foundation; either version 2 of the License, or
> - * (at your option) any later version.
> - */
> -
> -#include <linux/linkage.h>
> -
> -       .text
> -       .align          6
> -
> -ENTRY(chacha20_block_xor_neon)
> -       // x0: Input state matrix, s
> -       // x1: 1 data block output, o
> -       // x2: 1 data block input, i
> -
> -       //
> -       // This function encrypts one ChaCha20 block by loading the state matrix
> -       // in four NEON registers. It performs matrix operation on four words in
> -       // parallel, but requires shuffling to rearrange the words after each
> -       // round.
> -       //
> -
> -       // x0..3 = s0..3
> -       adr             x3, ROT8
> -       ld1             {v0.4s-v3.4s}, [x0]
> -       ld1             {v8.4s-v11.4s}, [x0]
> -       ld1             {v12.4s}, [x3]
> -
> -       mov             x3, #10
> -
> -.Ldoubleround:
> -       // x0 += x1, x3 = rotl32(x3 ^ x0, 16)
> -       add             v0.4s, v0.4s, v1.4s
> -       eor             v3.16b, v3.16b, v0.16b
> -       rev32           v3.8h, v3.8h
> -
> -       // x2 += x3, x1 = rotl32(x1 ^ x2, 12)
> -       add             v2.4s, v2.4s, v3.4s
> -       eor             v4.16b, v1.16b, v2.16b
> -       shl             v1.4s, v4.4s, #12
> -       sri             v1.4s, v4.4s, #20
> -
> -       // x0 += x1, x3 = rotl32(x3 ^ x0, 8)
> -       add             v0.4s, v0.4s, v1.4s
> -       eor             v3.16b, v3.16b, v0.16b
> -       tbl             v3.16b, {v3.16b}, v12.16b
> -
> -       // x2 += x3, x1 = rotl32(x1 ^ x2, 7)
> -       add             v2.4s, v2.4s, v3.4s
> -       eor             v4.16b, v1.16b, v2.16b
> -       shl             v1.4s, v4.4s, #7
> -       sri             v1.4s, v4.4s, #25
> -
> -       // x1 = shuffle32(x1, MASK(0, 3, 2, 1))
> -       ext             v1.16b, v1.16b, v1.16b, #4
> -       // x2 = shuffle32(x2, MASK(1, 0, 3, 2))
> -       ext             v2.16b, v2.16b, v2.16b, #8
> -       // x3 = shuffle32(x3, MASK(2, 1, 0, 3))
> -       ext             v3.16b, v3.16b, v3.16b, #12
> -
> -       // x0 += x1, x3 = rotl32(x3 ^ x0, 16)
> -       add             v0.4s, v0.4s, v1.4s
> -       eor             v3.16b, v3.16b, v0.16b
> -       rev32           v3.8h, v3.8h
> -
> -       // x2 += x3, x1 = rotl32(x1 ^ x2, 12)
> -       add             v2.4s, v2.4s, v3.4s
> -       eor             v4.16b, v1.16b, v2.16b
> -       shl             v1.4s, v4.4s, #12
> -       sri             v1.4s, v4.4s, #20
> -
> -       // x0 += x1, x3 = rotl32(x3 ^ x0, 8)
> -       add             v0.4s, v0.4s, v1.4s
> -       eor             v3.16b, v3.16b, v0.16b
> -       tbl             v3.16b, {v3.16b}, v12.16b
> -
> -       // x2 += x3, x1 = rotl32(x1 ^ x2, 7)
> -       add             v2.4s, v2.4s, v3.4s
> -       eor             v4.16b, v1.16b, v2.16b
> -       shl             v1.4s, v4.4s, #7
> -       sri             v1.4s, v4.4s, #25
> -
> -       // x1 = shuffle32(x1, MASK(2, 1, 0, 3))
> -       ext             v1.16b, v1.16b, v1.16b, #12
> -       // x2 = shuffle32(x2, MASK(1, 0, 3, 2))
> -       ext             v2.16b, v2.16b, v2.16b, #8
> -       // x3 = shuffle32(x3, MASK(0, 3, 2, 1))
> -       ext             v3.16b, v3.16b, v3.16b, #4
> -
> -       subs            x3, x3, #1
> -       b.ne            .Ldoubleround
> -
> -       ld1             {v4.16b-v7.16b}, [x2]
> -
> -       // o0 = i0 ^ (x0 + s0)
> -       add             v0.4s, v0.4s, v8.4s
> -       eor             v0.16b, v0.16b, v4.16b
> -
> -       // o1 = i1 ^ (x1 + s1)
> -       add             v1.4s, v1.4s, v9.4s
> -       eor             v1.16b, v1.16b, v5.16b
> -
> -       // o2 = i2 ^ (x2 + s2)
> -       add             v2.4s, v2.4s, v10.4s
> -       eor             v2.16b, v2.16b, v6.16b
> -
> -       // o3 = i3 ^ (x3 + s3)
> -       add             v3.4s, v3.4s, v11.4s
> -       eor             v3.16b, v3.16b, v7.16b
> -
> -       st1             {v0.16b-v3.16b}, [x1]
> -
> -       ret
> -ENDPROC(chacha20_block_xor_neon)
> -
> -       .align          6
> -ENTRY(chacha20_4block_xor_neon)
> -       // x0: Input state matrix, s
> -       // x1: 4 data blocks output, o
> -       // x2: 4 data blocks input, i
> -
> -       //
> -       // This function encrypts four consecutive ChaCha20 blocks by loading
> -       // the state matrix in NEON registers four times. The algorithm performs
> -       // each operation on the corresponding word of each state matrix, hence
> -       // requires no word shuffling. For final XORing step we transpose the
> -       // matrix by interleaving 32- and then 64-bit words, which allows us to
> -       // do XOR in NEON registers.
> -       //
> -       adr             x3, CTRINC              // ... and ROT8
> -       ld1             {v30.4s-v31.4s}, [x3]
> -
> -       // x0..15[0-3] = s0..3[0..3]
> -       mov             x4, x0
> -       ld4r            { v0.4s- v3.4s}, [x4], #16
> -       ld4r            { v4.4s- v7.4s}, [x4], #16
> -       ld4r            { v8.4s-v11.4s}, [x4], #16
> -       ld4r            {v12.4s-v15.4s}, [x4]
> -
> -       // x12 += counter values 0-3
> -       add             v12.4s, v12.4s, v30.4s
> -
> -       mov             x3, #10
> -
> -.Ldoubleround4:
> -       // x0 += x4, x12 = rotl32(x12 ^ x0, 16)
> -       // x1 += x5, x13 = rotl32(x13 ^ x1, 16)
> -       // x2 += x6, x14 = rotl32(x14 ^ x2, 16)
> -       // x3 += x7, x15 = rotl32(x15 ^ x3, 16)
> -       add             v0.4s, v0.4s, v4.4s
> -       add             v1.4s, v1.4s, v5.4s
> -       add             v2.4s, v2.4s, v6.4s
> -       add             v3.4s, v3.4s, v7.4s
> -
> -       eor             v12.16b, v12.16b, v0.16b
> -       eor             v13.16b, v13.16b, v1.16b
> -       eor             v14.16b, v14.16b, v2.16b
> -       eor             v15.16b, v15.16b, v3.16b
> -
> -       rev32           v12.8h, v12.8h
> -       rev32           v13.8h, v13.8h
> -       rev32           v14.8h, v14.8h
> -       rev32           v15.8h, v15.8h
> -
> -       // x8 += x12, x4 = rotl32(x4 ^ x8, 12)
> -       // x9 += x13, x5 = rotl32(x5 ^ x9, 12)
> -       // x10 += x14, x6 = rotl32(x6 ^ x10, 12)
> -       // x11 += x15, x7 = rotl32(x7 ^ x11, 12)
> -       add             v8.4s, v8.4s, v12.4s
> -       add             v9.4s, v9.4s, v13.4s
> -       add             v10.4s, v10.4s, v14.4s
> -       add             v11.4s, v11.4s, v15.4s
> -
> -       eor             v16.16b, v4.16b, v8.16b
> -       eor             v17.16b, v5.16b, v9.16b
> -       eor             v18.16b, v6.16b, v10.16b
> -       eor             v19.16b, v7.16b, v11.16b
> -
> -       shl             v4.4s, v16.4s, #12
> -       shl             v5.4s, v17.4s, #12
> -       shl             v6.4s, v18.4s, #12
> -       shl             v7.4s, v19.4s, #12
> -
> -       sri             v4.4s, v16.4s, #20
> -       sri             v5.4s, v17.4s, #20
> -       sri             v6.4s, v18.4s, #20
> -       sri             v7.4s, v19.4s, #20
> -
> -       // x0 += x4, x12 = rotl32(x12 ^ x0, 8)
> -       // x1 += x5, x13 = rotl32(x13 ^ x1, 8)
> -       // x2 += x6, x14 = rotl32(x14 ^ x2, 8)
> -       // x3 += x7, x15 = rotl32(x15 ^ x3, 8)
> -       add             v0.4s, v0.4s, v4.4s
> -       add             v1.4s, v1.4s, v5.4s
> -       add             v2.4s, v2.4s, v6.4s
> -       add             v3.4s, v3.4s, v7.4s
> -
> -       eor             v12.16b, v12.16b, v0.16b
> -       eor             v13.16b, v13.16b, v1.16b
> -       eor             v14.16b, v14.16b, v2.16b
> -       eor             v15.16b, v15.16b, v3.16b
> -
> -       tbl             v12.16b, {v12.16b}, v31.16b
> -       tbl             v13.16b, {v13.16b}, v31.16b
> -       tbl             v14.16b, {v14.16b}, v31.16b
> -       tbl             v15.16b, {v15.16b}, v31.16b
> -
> -       // x8 += x12, x4 = rotl32(x4 ^ x8, 7)
> -       // x9 += x13, x5 = rotl32(x5 ^ x9, 7)
> -       // x10 += x14, x6 = rotl32(x6 ^ x10, 7)
> -       // x11 += x15, x7 = rotl32(x7 ^ x11, 7)
> -       add             v8.4s, v8.4s, v12.4s
> -       add             v9.4s, v9.4s, v13.4s
> -       add             v10.4s, v10.4s, v14.4s
> -       add             v11.4s, v11.4s, v15.4s
> -
> -       eor             v16.16b, v4.16b, v8.16b
> -       eor             v17.16b, v5.16b, v9.16b
> -       eor             v18.16b, v6.16b, v10.16b
> -       eor             v19.16b, v7.16b, v11.16b
> -
> -       shl             v4.4s, v16.4s, #7
> -       shl             v5.4s, v17.4s, #7
> -       shl             v6.4s, v18.4s, #7
> -       shl             v7.4s, v19.4s, #7
> -
> -       sri             v4.4s, v16.4s, #25
> -       sri             v5.4s, v17.4s, #25
> -       sri             v6.4s, v18.4s, #25
> -       sri             v7.4s, v19.4s, #25
> -
> -       // x0 += x5, x15 = rotl32(x15 ^ x0, 16)
> -       // x1 += x6, x12 = rotl32(x12 ^ x1, 16)
> -       // x2 += x7, x13 = rotl32(x13 ^ x2, 16)
> -       // x3 += x4, x14 = rotl32(x14 ^ x3, 16)
> -       add             v0.4s, v0.4s, v5.4s
> -       add             v1.4s, v1.4s, v6.4s
> -       add             v2.4s, v2.4s, v7.4s
> -       add             v3.4s, v3.4s, v4.4s
> -
> -       eor             v15.16b, v15.16b, v0.16b
> -       eor             v12.16b, v12.16b, v1.16b
> -       eor             v13.16b, v13.16b, v2.16b
> -       eor             v14.16b, v14.16b, v3.16b
> -
> -       rev32           v15.8h, v15.8h
> -       rev32           v12.8h, v12.8h
> -       rev32           v13.8h, v13.8h
> -       rev32           v14.8h, v14.8h
> -
> -       // x10 += x15, x5 = rotl32(x5 ^ x10, 12)
> -       // x11 += x12, x6 = rotl32(x6 ^ x11, 12)
> -       // x8 += x13, x7 = rotl32(x7 ^ x8, 12)
> -       // x9 += x14, x4 = rotl32(x4 ^ x9, 12)
> -       add             v10.4s, v10.4s, v15.4s
> -       add             v11.4s, v11.4s, v12.4s
> -       add             v8.4s, v8.4s, v13.4s
> -       add             v9.4s, v9.4s, v14.4s
> -
> -       eor             v16.16b, v5.16b, v10.16b
> -       eor             v17.16b, v6.16b, v11.16b
> -       eor             v18.16b, v7.16b, v8.16b
> -       eor             v19.16b, v4.16b, v9.16b
> -
> -       shl             v5.4s, v16.4s, #12
> -       shl             v6.4s, v17.4s, #12
> -       shl             v7.4s, v18.4s, #12
> -       shl             v4.4s, v19.4s, #12
> -
> -       sri             v5.4s, v16.4s, #20
> -       sri             v6.4s, v17.4s, #20
> -       sri             v7.4s, v18.4s, #20
> -       sri             v4.4s, v19.4s, #20
> -
> -       // x0 += x5, x15 = rotl32(x15 ^ x0, 8)
> -       // x1 += x6, x12 = rotl32(x12 ^ x1, 8)
> -       // x2 += x7, x13 = rotl32(x13 ^ x2, 8)
> -       // x3 += x4, x14 = rotl32(x14 ^ x3, 8)
> -       add             v0.4s, v0.4s, v5.4s
> -       add             v1.4s, v1.4s, v6.4s
> -       add             v2.4s, v2.4s, v7.4s
> -       add             v3.4s, v3.4s, v4.4s
> -
> -       eor             v15.16b, v15.16b, v0.16b
> -       eor             v12.16b, v12.16b, v1.16b
> -       eor             v13.16b, v13.16b, v2.16b
> -       eor             v14.16b, v14.16b, v3.16b
> -
> -       tbl             v15.16b, {v15.16b}, v31.16b
> -       tbl             v12.16b, {v12.16b}, v31.16b
> -       tbl             v13.16b, {v13.16b}, v31.16b
> -       tbl             v14.16b, {v14.16b}, v31.16b
> -
> -       // x10 += x15, x5 = rotl32(x5 ^ x10, 7)
> -       // x11 += x12, x6 = rotl32(x6 ^ x11, 7)
> -       // x8 += x13, x7 = rotl32(x7 ^ x8, 7)
> -       // x9 += x14, x4 = rotl32(x4 ^ x9, 7)
> -       add             v10.4s, v10.4s, v15.4s
> -       add             v11.4s, v11.4s, v12.4s
> -       add             v8.4s, v8.4s, v13.4s
> -       add             v9.4s, v9.4s, v14.4s
> -
> -       eor             v16.16b, v5.16b, v10.16b
> -       eor             v17.16b, v6.16b, v11.16b
> -       eor             v18.16b, v7.16b, v8.16b
> -       eor             v19.16b, v4.16b, v9.16b
> -
> -       shl             v5.4s, v16.4s, #7
> -       shl             v6.4s, v17.4s, #7
> -       shl             v7.4s, v18.4s, #7
> -       shl             v4.4s, v19.4s, #7
> -
> -       sri             v5.4s, v16.4s, #25
> -       sri             v6.4s, v17.4s, #25
> -       sri             v7.4s, v18.4s, #25
> -       sri             v4.4s, v19.4s, #25
> -
> -       subs            x3, x3, #1
> -       b.ne            .Ldoubleround4
> -
> -       ld4r            {v16.4s-v19.4s}, [x0], #16
> -       ld4r            {v20.4s-v23.4s}, [x0], #16
> -
> -       // x12 += counter values 0-3
> -       add             v12.4s, v12.4s, v30.4s
> -
> -       // x0[0-3] += s0[0]
> -       // x1[0-3] += s0[1]
> -       // x2[0-3] += s0[2]
> -       // x3[0-3] += s0[3]
> -       add             v0.4s, v0.4s, v16.4s
> -       add             v1.4s, v1.4s, v17.4s
> -       add             v2.4s, v2.4s, v18.4s
> -       add             v3.4s, v3.4s, v19.4s
> -
> -       ld4r            {v24.4s-v27.4s}, [x0], #16
> -       ld4r            {v28.4s-v31.4s}, [x0]
> -
> -       // x4[0-3] += s1[0]
> -       // x5[0-3] += s1[1]
> -       // x6[0-3] += s1[2]
> -       // x7[0-3] += s1[3]
> -       add             v4.4s, v4.4s, v20.4s
> -       add             v5.4s, v5.4s, v21.4s
> -       add             v6.4s, v6.4s, v22.4s
> -       add             v7.4s, v7.4s, v23.4s
> -
> -       // x8[0-3] += s2[0]
> -       // x9[0-3] += s2[1]
> -       // x10[0-3] += s2[2]
> -       // x11[0-3] += s2[3]
> -       add             v8.4s, v8.4s, v24.4s
> -       add             v9.4s, v9.4s, v25.4s
> -       add             v10.4s, v10.4s, v26.4s
> -       add             v11.4s, v11.4s, v27.4s
> -
> -       // x12[0-3] += s3[0]
> -       // x13[0-3] += s3[1]
> -       // x14[0-3] += s3[2]
> -       // x15[0-3] += s3[3]
> -       add             v12.4s, v12.4s, v28.4s
> -       add             v13.4s, v13.4s, v29.4s
> -       add             v14.4s, v14.4s, v30.4s
> -       add             v15.4s, v15.4s, v31.4s
> -
> -       // interleave 32-bit words in state n, n+1
> -       zip1            v16.4s, v0.4s, v1.4s
> -       zip2            v17.4s, v0.4s, v1.4s
> -       zip1            v18.4s, v2.4s, v3.4s
> -       zip2            v19.4s, v2.4s, v3.4s
> -       zip1            v20.4s, v4.4s, v5.4s
> -       zip2            v21.4s, v4.4s, v5.4s
> -       zip1            v22.4s, v6.4s, v7.4s
> -       zip2            v23.4s, v6.4s, v7.4s
> -       zip1            v24.4s, v8.4s, v9.4s
> -       zip2            v25.4s, v8.4s, v9.4s
> -       zip1            v26.4s, v10.4s, v11.4s
> -       zip2            v27.4s, v10.4s, v11.4s
> -       zip1            v28.4s, v12.4s, v13.4s
> -       zip2            v29.4s, v12.4s, v13.4s
> -       zip1            v30.4s, v14.4s, v15.4s
> -       zip2            v31.4s, v14.4s, v15.4s
> -
> -       // interleave 64-bit words in state n, n+2
> -       zip1            v0.2d, v16.2d, v18.2d
> -       zip2            v4.2d, v16.2d, v18.2d
> -       zip1            v8.2d, v17.2d, v19.2d
> -       zip2            v12.2d, v17.2d, v19.2d
> -       ld1             {v16.16b-v19.16b}, [x2], #64
> -
> -       zip1            v1.2d, v20.2d, v22.2d
> -       zip2            v5.2d, v20.2d, v22.2d
> -       zip1            v9.2d, v21.2d, v23.2d
> -       zip2            v13.2d, v21.2d, v23.2d
> -       ld1             {v20.16b-v23.16b}, [x2], #64
> -
> -       zip1            v2.2d, v24.2d, v26.2d
> -       zip2            v6.2d, v24.2d, v26.2d
> -       zip1            v10.2d, v25.2d, v27.2d
> -       zip2            v14.2d, v25.2d, v27.2d
> -       ld1             {v24.16b-v27.16b}, [x2], #64
> -
> -       zip1            v3.2d, v28.2d, v30.2d
> -       zip2            v7.2d, v28.2d, v30.2d
> -       zip1            v11.2d, v29.2d, v31.2d
> -       zip2            v15.2d, v29.2d, v31.2d
> -       ld1             {v28.16b-v31.16b}, [x2]
> -
> -       // xor with corresponding input, write to output
> -       eor             v16.16b, v16.16b, v0.16b
> -       eor             v17.16b, v17.16b, v1.16b
> -       eor             v18.16b, v18.16b, v2.16b
> -       eor             v19.16b, v19.16b, v3.16b
> -       eor             v20.16b, v20.16b, v4.16b
> -       eor             v21.16b, v21.16b, v5.16b
> -       st1             {v16.16b-v19.16b}, [x1], #64
> -       eor             v22.16b, v22.16b, v6.16b
> -       eor             v23.16b, v23.16b, v7.16b
> -       eor             v24.16b, v24.16b, v8.16b
> -       eor             v25.16b, v25.16b, v9.16b
> -       st1             {v20.16b-v23.16b}, [x1], #64
> -       eor             v26.16b, v26.16b, v10.16b
> -       eor             v27.16b, v27.16b, v11.16b
> -       eor             v28.16b, v28.16b, v12.16b
> -       st1             {v24.16b-v27.16b}, [x1], #64
> -       eor             v29.16b, v29.16b, v13.16b
> -       eor             v30.16b, v30.16b, v14.16b
> -       eor             v31.16b, v31.16b, v15.16b
> -       st1             {v28.16b-v31.16b}, [x1]
> -
> -       ret
> -ENDPROC(chacha20_4block_xor_neon)
> -
> -CTRINC:        .word           0, 1, 2, 3
> -ROT8:  .word           0x02010003, 0x06050407, 0x0a09080b, 0x0e0d0c0f
> diff --git a/arch/arm64/crypto/chacha20-neon-glue.c b/arch/arm64/crypto/chacha20-neon-glue.c
> deleted file mode 100644
> index 727579c93ded..000000000000
> --- a/arch/arm64/crypto/chacha20-neon-glue.c
> +++ /dev/null
> @@ -1,133 +0,0 @@
> -/*
> - * ChaCha20 256-bit cipher algorithm, RFC7539, arm64 NEON functions
> - *
> - * Copyright (C) 2016 - 2017 Linaro, Ltd. <ard.biesheuvel@linaro.org>
> - *
> - * This program is free software; you can redistribute it and/or modify
> - * it under the terms of the GNU General Public License version 2 as
> - * published by the Free Software Foundation.
> - *
> - * Based on:
> - * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code
> - *
> - * Copyright (C) 2015 Martin Willi
> - *
> - * This program is free software; you can redistribute it and/or modify
> - * it under the terms of the GNU General Public License as published by
> - * the Free Software Foundation; either version 2 of the License, or
> - * (at your option) any later version.
> - */
> -
> -#include <crypto/algapi.h>
> -#include <crypto/chacha20.h>
> -#include <crypto/internal/skcipher.h>
> -#include <linux/kernel.h>
> -#include <linux/module.h>
> -
> -#include <asm/hwcap.h>
> -#include <asm/neon.h>
> -#include <asm/simd.h>
> -
> -asmlinkage void chacha20_block_xor_neon(u32 *state, u8 *dst, const u8 *src);
> -asmlinkage void chacha20_4block_xor_neon(u32 *state, u8 *dst, const u8 *src);
> -
> -static void chacha20_doneon(u32 *state, u8 *dst, const u8 *src,
> -                           unsigned int bytes)
> -{
> -       u8 buf[CHACHA20_BLOCK_SIZE];
> -
> -       while (bytes >= CHACHA20_BLOCK_SIZE * 4) {
> -               kernel_neon_begin();
> -               chacha20_4block_xor_neon(state, dst, src);
> -               kernel_neon_end();
> -               bytes -= CHACHA20_BLOCK_SIZE * 4;
> -               src += CHACHA20_BLOCK_SIZE * 4;
> -               dst += CHACHA20_BLOCK_SIZE * 4;
> -               state[12] += 4;
> -       }
> -
> -       if (!bytes)
> -               return;
> -
> -       kernel_neon_begin();
> -       while (bytes >= CHACHA20_BLOCK_SIZE) {
> -               chacha20_block_xor_neon(state, dst, src);
> -               bytes -= CHACHA20_BLOCK_SIZE;
> -               src += CHACHA20_BLOCK_SIZE;
> -               dst += CHACHA20_BLOCK_SIZE;
> -               state[12]++;
> -       }
> -       if (bytes) {
> -               memcpy(buf, src, bytes);
> -               chacha20_block_xor_neon(state, buf, buf);
> -               memcpy(dst, buf, bytes);
> -       }
> -       kernel_neon_end();
> -}
> -
> -static int chacha20_neon(struct skcipher_request *req)
> -{
> -       struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
> -       struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm);
> -       struct skcipher_walk walk;
> -       u32 state[16];
> -       int err;
> -
> -       if (!may_use_simd() || req->cryptlen <= CHACHA20_BLOCK_SIZE)
> -               return crypto_chacha20_crypt(req);
> -
> -       err = skcipher_walk_virt(&walk, req, false);
> -
> -       crypto_chacha20_init(state, ctx, walk.iv);
> -
> -       while (walk.nbytes > 0) {
> -               unsigned int nbytes = walk.nbytes;
> -
> -               if (nbytes < walk.total)
> -                       nbytes = round_down(nbytes, walk.stride);
> -
> -               chacha20_doneon(state, walk.dst.virt.addr, walk.src.virt.addr,
> -                               nbytes);
> -               err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
> -       }
> -
> -       return err;
> -}
> -
> -static struct skcipher_alg alg = {
> -       .base.cra_name          = "chacha20",
> -       .base.cra_driver_name   = "chacha20-neon",
> -       .base.cra_priority      = 300,
> -       .base.cra_blocksize     = 1,
> -       .base.cra_ctxsize       = sizeof(struct chacha20_ctx),
> -       .base.cra_module        = THIS_MODULE,
> -
> -       .min_keysize            = CHACHA20_KEY_SIZE,
> -       .max_keysize            = CHACHA20_KEY_SIZE,
> -       .ivsize                 = CHACHA20_IV_SIZE,
> -       .chunksize              = CHACHA20_BLOCK_SIZE,
> -       .walksize               = 4 * CHACHA20_BLOCK_SIZE,
> -       .setkey                 = crypto_chacha20_setkey,
> -       .encrypt                = chacha20_neon,
> -       .decrypt                = chacha20_neon,
> -};
> -
> -static int __init chacha20_simd_mod_init(void)
> -{
> -       if (!(elf_hwcap & HWCAP_ASIMD))
> -               return -ENODEV;
> -
> -       return crypto_register_skcipher(&alg);
> -}
> -
> -static void __exit chacha20_simd_mod_fini(void)
> -{
> -       crypto_unregister_skcipher(&alg);
> -}
> -
> -module_init(chacha20_simd_mod_init);
> -module_exit(chacha20_simd_mod_fini);
> -
> -MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
> -MODULE_LICENSE("GPL v2");
> -MODULE_ALIAS_CRYPTO("chacha20");
> diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
> index cf830219846b..419212c31246 100644
> --- a/arch/x86/crypto/Makefile
> +++ b/arch/x86/crypto/Makefile
> @@ -23,7 +23,6 @@ obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o
>  obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o
>  obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o
>  obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o
> -obj-$(CONFIG_CRYPTO_CHACHA20_X86_64) += chacha20-x86_64.o
>  obj-$(CONFIG_CRYPTO_SERPENT_SSE2_X86_64) += serpent-sse2-x86_64.o
>  obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o
>  obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o
> @@ -76,7 +75,6 @@ camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o
>  blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o
>  twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o
>  twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o
> -chacha20-x86_64-y := chacha20-ssse3-x86_64.o chacha20_glue.o
>  serpent-sse2-x86_64-y := serpent-sse2-x86_64-asm_64.o serpent_sse2_glue.o
>
>  aegis128-aesni-y := aegis128-aesni-asm.o aegis128-aesni-glue.o
> @@ -99,7 +97,6 @@ endif
>
>  ifeq ($(avx2_supported),yes)
>         camellia-aesni-avx2-y := camellia-aesni-avx2-asm_64.o camellia_aesni_avx2_glue.o
> -       chacha20-x86_64-y += chacha20-avx2-x86_64.o
>         serpent-avx2-y := serpent-avx2-asm_64.o serpent_avx2_glue.o
>
>         morus1280-avx2-y := morus1280-avx2-asm.o morus1280-avx2-glue.o
> diff --git a/arch/x86/crypto/chacha20-avx2-x86_64.S b/arch/x86/crypto/chacha20-avx2-x86_64.S
> deleted file mode 100644
> index f3cd26f48332..000000000000
> --- a/arch/x86/crypto/chacha20-avx2-x86_64.S
> +++ /dev/null
> @@ -1,448 +0,0 @@
> -/*
> - * ChaCha20 256-bit cipher algorithm, RFC7539, x64 AVX2 functions
> - *
> - * Copyright (C) 2015 Martin Willi
> - *
> - * This program is free software; you can redistribute it and/or modify
> - * it under the terms of the GNU General Public License as published by
> - * the Free Software Foundation; either version 2 of the License, or
> - * (at your option) any later version.
> - */
> -
> -#include <linux/linkage.h>
> -
> -.section       .rodata.cst32.ROT8, "aM", @progbits, 32
> -.align 32
> -ROT8:  .octa 0x0e0d0c0f0a09080b0605040702010003
> -       .octa 0x0e0d0c0f0a09080b0605040702010003
> -
> -.section       .rodata.cst32.ROT16, "aM", @progbits, 32
> -.align 32
> -ROT16: .octa 0x0d0c0f0e09080b0a0504070601000302
> -       .octa 0x0d0c0f0e09080b0a0504070601000302
> -
> -.section       .rodata.cst32.CTRINC, "aM", @progbits, 32
> -.align 32
> -CTRINC:        .octa 0x00000003000000020000000100000000
> -       .octa 0x00000007000000060000000500000004
> -
> -.text
> -
> -ENTRY(chacha20_8block_xor_avx2)
> -       # %rdi: Input state matrix, s
> -       # %rsi: 8 data blocks output, o
> -       # %rdx: 8 data blocks input, i
> -
> -       # This function encrypts eight consecutive ChaCha20 blocks by loading
> -       # the state matrix in AVX registers eight times. As we need some
> -       # scratch registers, we save the first four registers on the stack. The
> -       # algorithm performs each operation on the corresponding word of each
> -       # state matrix, hence requires no word shuffling. For final XORing step
> -       # we transpose the matrix by interleaving 32-, 64- and then 128-bit
> -       # words, which allows us to do XOR in AVX registers. 8/16-bit word
> -       # rotation is done with the slightly better performing byte shuffling,
> -       # 7/12-bit word rotation uses traditional shift+OR.
> -
> -       vzeroupper
> -       # 4 * 32 byte stack, 32-byte aligned
> -       lea             8(%rsp),%r10
> -       and             $~31, %rsp
> -       sub             $0x80, %rsp
> -
> -       # x0..15[0-7] = s[0..15]
> -       vpbroadcastd    0x00(%rdi),%ymm0
> -       vpbroadcastd    0x04(%rdi),%ymm1
> -       vpbroadcastd    0x08(%rdi),%ymm2
> -       vpbroadcastd    0x0c(%rdi),%ymm3
> -       vpbroadcastd    0x10(%rdi),%ymm4
> -       vpbroadcastd    0x14(%rdi),%ymm5
> -       vpbroadcastd    0x18(%rdi),%ymm6
> -       vpbroadcastd    0x1c(%rdi),%ymm7
> -       vpbroadcastd    0x20(%rdi),%ymm8
> -       vpbroadcastd    0x24(%rdi),%ymm9
> -       vpbroadcastd    0x28(%rdi),%ymm10
> -       vpbroadcastd    0x2c(%rdi),%ymm11
> -       vpbroadcastd    0x30(%rdi),%ymm12
> -       vpbroadcastd    0x34(%rdi),%ymm13
> -       vpbroadcastd    0x38(%rdi),%ymm14
> -       vpbroadcastd    0x3c(%rdi),%ymm15
> -       # x0..3 on stack
> -       vmovdqa         %ymm0,0x00(%rsp)
> -       vmovdqa         %ymm1,0x20(%rsp)
> -       vmovdqa         %ymm2,0x40(%rsp)
> -       vmovdqa         %ymm3,0x60(%rsp)
> -
> -       vmovdqa         CTRINC(%rip),%ymm1
> -       vmovdqa         ROT8(%rip),%ymm2
> -       vmovdqa         ROT16(%rip),%ymm3
> -
> -       # x12 += counter values 0-3
> -       vpaddd          %ymm1,%ymm12,%ymm12
> -
> -       mov             $10,%ecx
> -
> -.Ldoubleround8:
> -       # x0 += x4, x12 = rotl32(x12 ^ x0, 16)
> -       vpaddd          0x00(%rsp),%ymm4,%ymm0
> -       vmovdqa         %ymm0,0x00(%rsp)
> -       vpxor           %ymm0,%ymm12,%ymm12
> -       vpshufb         %ymm3,%ymm12,%ymm12
> -       # x1 += x5, x13 = rotl32(x13 ^ x1, 16)
> -       vpaddd          0x20(%rsp),%ymm5,%ymm0
> -       vmovdqa         %ymm0,0x20(%rsp)
> -       vpxor           %ymm0,%ymm13,%ymm13
> -       vpshufb         %ymm3,%ymm13,%ymm13
> -       # x2 += x6, x14 = rotl32(x14 ^ x2, 16)
> -       vpaddd          0x40(%rsp),%ymm6,%ymm0
> -       vmovdqa         %ymm0,0x40(%rsp)
> -       vpxor           %ymm0,%ymm14,%ymm14
> -       vpshufb         %ymm3,%ymm14,%ymm14
> -       # x3 += x7, x15 = rotl32(x15 ^ x3, 16)
> -       vpaddd          0x60(%rsp),%ymm7,%ymm0
> -       vmovdqa         %ymm0,0x60(%rsp)
> -       vpxor           %ymm0,%ymm15,%ymm15
> -       vpshufb         %ymm3,%ymm15,%ymm15
> -
> -       # x8 += x12, x4 = rotl32(x4 ^ x8, 12)
> -       vpaddd          %ymm12,%ymm8,%ymm8
> -       vpxor           %ymm8,%ymm4,%ymm4
> -       vpslld          $12,%ymm4,%ymm0
> -       vpsrld          $20,%ymm4,%ymm4
> -       vpor            %ymm0,%ymm4,%ymm4
> -       # x9 += x13, x5 = rotl32(x5 ^ x9, 12)
> -       vpaddd          %ymm13,%ymm9,%ymm9
> -       vpxor           %ymm9,%ymm5,%ymm5
> -       vpslld          $12,%ymm5,%ymm0
> -       vpsrld          $20,%ymm5,%ymm5
> -       vpor            %ymm0,%ymm5,%ymm5
> -       # x10 += x14, x6 = rotl32(x6 ^ x10, 12)
> -       vpaddd          %ymm14,%ymm10,%ymm10
> -       vpxor           %ymm10,%ymm6,%ymm6
> -       vpslld          $12,%ymm6,%ymm0
> -       vpsrld          $20,%ymm6,%ymm6
> -       vpor            %ymm0,%ymm6,%ymm6
> -       # x11 += x15, x7 = rotl32(x7 ^ x11, 12)
> -       vpaddd          %ymm15,%ymm11,%ymm11
> -       vpxor           %ymm11,%ymm7,%ymm7
> -       vpslld          $12,%ymm7,%ymm0
> -       vpsrld          $20,%ymm7,%ymm7
> -       vpor            %ymm0,%ymm7,%ymm7
> -
> -       # x0 += x4, x12 = rotl32(x12 ^ x0, 8)
> -       vpaddd          0x00(%rsp),%ymm4,%ymm0
> -       vmovdqa         %ymm0,0x00(%rsp)
> -       vpxor           %ymm0,%ymm12,%ymm12
> -       vpshufb         %ymm2,%ymm12,%ymm12
> -       # x1 += x5, x13 = rotl32(x13 ^ x1, 8)
> -       vpaddd          0x20(%rsp),%ymm5,%ymm0
> -       vmovdqa         %ymm0,0x20(%rsp)
> -       vpxor           %ymm0,%ymm13,%ymm13
> -       vpshufb         %ymm2,%ymm13,%ymm13
> -       # x2 += x6, x14 = rotl32(x14 ^ x2, 8)
> -       vpaddd          0x40(%rsp),%ymm6,%ymm0
> -       vmovdqa         %ymm0,0x40(%rsp)
> -       vpxor           %ymm0,%ymm14,%ymm14
> -       vpshufb         %ymm2,%ymm14,%ymm14
> -       # x3 += x7, x15 = rotl32(x15 ^ x3, 8)
> -       vpaddd          0x60(%rsp),%ymm7,%ymm0
> -       vmovdqa         %ymm0,0x60(%rsp)
> -       vpxor           %ymm0,%ymm15,%ymm15
> -       vpshufb         %ymm2,%ymm15,%ymm15
> -
> -       # x8 += x12, x4 = rotl32(x4 ^ x8, 7)
> -       vpaddd          %ymm12,%ymm8,%ymm8
> -       vpxor           %ymm8,%ymm4,%ymm4
> -       vpslld          $7,%ymm4,%ymm0
> -       vpsrld          $25,%ymm4,%ymm4
> -       vpor            %ymm0,%ymm4,%ymm4
> -       # x9 += x13, x5 = rotl32(x5 ^ x9, 7)
> -       vpaddd          %ymm13,%ymm9,%ymm9
> -       vpxor           %ymm9,%ymm5,%ymm5
> -       vpslld          $7,%ymm5,%ymm0
> -       vpsrld          $25,%ymm5,%ymm5
> -       vpor            %ymm0,%ymm5,%ymm5
> -       # x10 += x14, x6 = rotl32(x6 ^ x10, 7)
> -       vpaddd          %ymm14,%ymm10,%ymm10
> -       vpxor           %ymm10,%ymm6,%ymm6
> -       vpslld          $7,%ymm6,%ymm0
> -       vpsrld          $25,%ymm6,%ymm6
> -       vpor            %ymm0,%ymm6,%ymm6
> -       # x11 += x15, x7 = rotl32(x7 ^ x11, 7)
> -       vpaddd          %ymm15,%ymm11,%ymm11
> -       vpxor           %ymm11,%ymm7,%ymm7
> -       vpslld          $7,%ymm7,%ymm0
> -       vpsrld          $25,%ymm7,%ymm7
> -       vpor            %ymm0,%ymm7,%ymm7
> -
> -       # x0 += x5, x15 = rotl32(x15 ^ x0, 16)
> -       vpaddd          0x00(%rsp),%ymm5,%ymm0
> -       vmovdqa         %ymm0,0x00(%rsp)
> -       vpxor           %ymm0,%ymm15,%ymm15
> -       vpshufb         %ymm3,%ymm15,%ymm15
> -       # x1 += x6, x12 = rotl32(x12 ^ x1, 16)%ymm0
> -       vpaddd          0x20(%rsp),%ymm6,%ymm0
> -       vmovdqa         %ymm0,0x20(%rsp)
> -       vpxor           %ymm0,%ymm12,%ymm12
> -       vpshufb         %ymm3,%ymm12,%ymm12
> -       # x2 += x7, x13 = rotl32(x13 ^ x2, 16)
> -       vpaddd          0x40(%rsp),%ymm7,%ymm0
> -       vmovdqa         %ymm0,0x40(%rsp)
> -       vpxor           %ymm0,%ymm13,%ymm13
> -       vpshufb         %ymm3,%ymm13,%ymm13
> -       # x3 += x4, x14 = rotl32(x14 ^ x3, 16)
> -       vpaddd          0x60(%rsp),%ymm4,%ymm0
> -       vmovdqa         %ymm0,0x60(%rsp)
> -       vpxor           %ymm0,%ymm14,%ymm14
> -       vpshufb         %ymm3,%ymm14,%ymm14
> -
> -       # x10 += x15, x5 = rotl32(x5 ^ x10, 12)
> -       vpaddd          %ymm15,%ymm10,%ymm10
> -       vpxor           %ymm10,%ymm5,%ymm5
> -       vpslld          $12,%ymm5,%ymm0
> -       vpsrld          $20,%ymm5,%ymm5
> -       vpor            %ymm0,%ymm5,%ymm5
> -       # x11 += x12, x6 = rotl32(x6 ^ x11, 12)
> -       vpaddd          %ymm12,%ymm11,%ymm11
> -       vpxor           %ymm11,%ymm6,%ymm6
> -       vpslld          $12,%ymm6,%ymm0
> -       vpsrld          $20,%ymm6,%ymm6
> -       vpor            %ymm0,%ymm6,%ymm6
> -       # x8 += x13, x7 = rotl32(x7 ^ x8, 12)
> -       vpaddd          %ymm13,%ymm8,%ymm8
> -       vpxor           %ymm8,%ymm7,%ymm7
> -       vpslld          $12,%ymm7,%ymm0
> -       vpsrld          $20,%ymm7,%ymm7
> -       vpor            %ymm0,%ymm7,%ymm7
> -       # x9 += x14, x4 = rotl32(x4 ^ x9, 12)
> -       vpaddd          %ymm14,%ymm9,%ymm9
> -       vpxor           %ymm9,%ymm4,%ymm4
> -       vpslld          $12,%ymm4,%ymm0
> -       vpsrld          $20,%ymm4,%ymm4
> -       vpor            %ymm0,%ymm4,%ymm4
> -
> -       # x0 += x5, x15 = rotl32(x15 ^ x0, 8)
> -       vpaddd          0x00(%rsp),%ymm5,%ymm0
> -       vmovdqa         %ymm0,0x00(%rsp)
> -       vpxor           %ymm0,%ymm15,%ymm15
> -       vpshufb         %ymm2,%ymm15,%ymm15
> -       # x1 += x6, x12 = rotl32(x12 ^ x1, 8)
> -       vpaddd          0x20(%rsp),%ymm6,%ymm0
> -       vmovdqa         %ymm0,0x20(%rsp)
> -       vpxor           %ymm0,%ymm12,%ymm12
> -       vpshufb         %ymm2,%ymm12,%ymm12
> -       # x2 += x7, x13 = rotl32(x13 ^ x2, 8)
> -       vpaddd          0x40(%rsp),%ymm7,%ymm0
> -       vmovdqa         %ymm0,0x40(%rsp)
> -       vpxor           %ymm0,%ymm13,%ymm13
> -       vpshufb         %ymm2,%ymm13,%ymm13
> -       # x3 += x4, x14 = rotl32(x14 ^ x3, 8)
> -       vpaddd          0x60(%rsp),%ymm4,%ymm0
> -       vmovdqa         %ymm0,0x60(%rsp)
> -       vpxor           %ymm0,%ymm14,%ymm14
> -       vpshufb         %ymm2,%ymm14,%ymm14
> -
> -       # x10 += x15, x5 = rotl32(x5 ^ x10, 7)
> -       vpaddd          %ymm15,%ymm10,%ymm10
> -       vpxor           %ymm10,%ymm5,%ymm5
> -       vpslld          $7,%ymm5,%ymm0
> -       vpsrld          $25,%ymm5,%ymm5
> -       vpor            %ymm0,%ymm5,%ymm5
> -       # x11 += x12, x6 = rotl32(x6 ^ x11, 7)
> -       vpaddd          %ymm12,%ymm11,%ymm11
> -       vpxor           %ymm11,%ymm6,%ymm6
> -       vpslld          $7,%ymm6,%ymm0
> -       vpsrld          $25,%ymm6,%ymm6
> -       vpor            %ymm0,%ymm6,%ymm6
> -       # x8 += x13, x7 = rotl32(x7 ^ x8, 7)
> -       vpaddd          %ymm13,%ymm8,%ymm8
> -       vpxor           %ymm8,%ymm7,%ymm7
> -       vpslld          $7,%ymm7,%ymm0
> -       vpsrld          $25,%ymm7,%ymm7
> -       vpor            %ymm0,%ymm7,%ymm7
> -       # x9 += x14, x4 = rotl32(x4 ^ x9, 7)
> -       vpaddd          %ymm14,%ymm9,%ymm9
> -       vpxor           %ymm9,%ymm4,%ymm4
> -       vpslld          $7,%ymm4,%ymm0
> -       vpsrld          $25,%ymm4,%ymm4
> -       vpor            %ymm0,%ymm4,%ymm4
> -
> -       dec             %ecx
> -       jnz             .Ldoubleround8
> -
> -       # x0..15[0-3] += s[0..15]
> -       vpbroadcastd    0x00(%rdi),%ymm0
> -       vpaddd          0x00(%rsp),%ymm0,%ymm0
> -       vmovdqa         %ymm0,0x00(%rsp)
> -       vpbroadcastd    0x04(%rdi),%ymm0
> -       vpaddd          0x20(%rsp),%ymm0,%ymm0
> -       vmovdqa         %ymm0,0x20(%rsp)
> -       vpbroadcastd    0x08(%rdi),%ymm0
> -       vpaddd          0x40(%rsp),%ymm0,%ymm0
> -       vmovdqa         %ymm0,0x40(%rsp)
> -       vpbroadcastd    0x0c(%rdi),%ymm0
> -       vpaddd          0x60(%rsp),%ymm0,%ymm0
> -       vmovdqa         %ymm0,0x60(%rsp)
> -       vpbroadcastd    0x10(%rdi),%ymm0
> -       vpaddd          %ymm0,%ymm4,%ymm4
> -       vpbroadcastd    0x14(%rdi),%ymm0
> -       vpaddd          %ymm0,%ymm5,%ymm5
> -       vpbroadcastd    0x18(%rdi),%ymm0
> -       vpaddd          %ymm0,%ymm6,%ymm6
> -       vpbroadcastd    0x1c(%rdi),%ymm0
> -       vpaddd          %ymm0,%ymm7,%ymm7
> -       vpbroadcastd    0x20(%rdi),%ymm0
> -       vpaddd          %ymm0,%ymm8,%ymm8
> -       vpbroadcastd    0x24(%rdi),%ymm0
> -       vpaddd          %ymm0,%ymm9,%ymm9
> -       vpbroadcastd    0x28(%rdi),%ymm0
> -       vpaddd          %ymm0,%ymm10,%ymm10
> -       vpbroadcastd    0x2c(%rdi),%ymm0
> -       vpaddd          %ymm0,%ymm11,%ymm11
> -       vpbroadcastd    0x30(%rdi),%ymm0
> -       vpaddd          %ymm0,%ymm12,%ymm12
> -       vpbroadcastd    0x34(%rdi),%ymm0
> -       vpaddd          %ymm0,%ymm13,%ymm13
> -       vpbroadcastd    0x38(%rdi),%ymm0
> -       vpaddd          %ymm0,%ymm14,%ymm14
> -       vpbroadcastd    0x3c(%rdi),%ymm0
> -       vpaddd          %ymm0,%ymm15,%ymm15
> -
> -       # x12 += counter values 0-3
> -       vpaddd          %ymm1,%ymm12,%ymm12
> -
> -       # interleave 32-bit words in state n, n+1
> -       vmovdqa         0x00(%rsp),%ymm0
> -       vmovdqa         0x20(%rsp),%ymm1
> -       vpunpckldq      %ymm1,%ymm0,%ymm2
> -       vpunpckhdq      %ymm1,%ymm0,%ymm1
> -       vmovdqa         %ymm2,0x00(%rsp)
> -       vmovdqa         %ymm1,0x20(%rsp)
> -       vmovdqa         0x40(%rsp),%ymm0
> -       vmovdqa         0x60(%rsp),%ymm1
> -       vpunpckldq      %ymm1,%ymm0,%ymm2
> -       vpunpckhdq      %ymm1,%ymm0,%ymm1
> -       vmovdqa         %ymm2,0x40(%rsp)
> -       vmovdqa         %ymm1,0x60(%rsp)
> -       vmovdqa         %ymm4,%ymm0
> -       vpunpckldq      %ymm5,%ymm0,%ymm4
> -       vpunpckhdq      %ymm5,%ymm0,%ymm5
> -       vmovdqa         %ymm6,%ymm0
> -       vpunpckldq      %ymm7,%ymm0,%ymm6
> -       vpunpckhdq      %ymm7,%ymm0,%ymm7
> -       vmovdqa         %ymm8,%ymm0
> -       vpunpckldq      %ymm9,%ymm0,%ymm8
> -       vpunpckhdq      %ymm9,%ymm0,%ymm9
> -       vmovdqa         %ymm10,%ymm0
> -       vpunpckldq      %ymm11,%ymm0,%ymm10
> -       vpunpckhdq      %ymm11,%ymm0,%ymm11
> -       vmovdqa         %ymm12,%ymm0
> -       vpunpckldq      %ymm13,%ymm0,%ymm12
> -       vpunpckhdq      %ymm13,%ymm0,%ymm13
> -       vmovdqa         %ymm14,%ymm0
> -       vpunpckldq      %ymm15,%ymm0,%ymm14
> -       vpunpckhdq      %ymm15,%ymm0,%ymm15
> -
> -       # interleave 64-bit words in state n, n+2
> -       vmovdqa         0x00(%rsp),%ymm0
> -       vmovdqa         0x40(%rsp),%ymm2
> -       vpunpcklqdq     %ymm2,%ymm0,%ymm1
> -       vpunpckhqdq     %ymm2,%ymm0,%ymm2
> -       vmovdqa         %ymm1,0x00(%rsp)
> -       vmovdqa         %ymm2,0x40(%rsp)
> -       vmovdqa         0x20(%rsp),%ymm0
> -       vmovdqa         0x60(%rsp),%ymm2
> -       vpunpcklqdq     %ymm2,%ymm0,%ymm1
> -       vpunpckhqdq     %ymm2,%ymm0,%ymm2
> -       vmovdqa         %ymm1,0x20(%rsp)
> -       vmovdqa         %ymm2,0x60(%rsp)
> -       vmovdqa         %ymm4,%ymm0
> -       vpunpcklqdq     %ymm6,%ymm0,%ymm4
> -       vpunpckhqdq     %ymm6,%ymm0,%ymm6
> -       vmovdqa         %ymm5,%ymm0
> -       vpunpcklqdq     %ymm7,%ymm0,%ymm5
> -       vpunpckhqdq     %ymm7,%ymm0,%ymm7
> -       vmovdqa         %ymm8,%ymm0
> -       vpunpcklqdq     %ymm10,%ymm0,%ymm8
> -       vpunpckhqdq     %ymm10,%ymm0,%ymm10
> -       vmovdqa         %ymm9,%ymm0
> -       vpunpcklqdq     %ymm11,%ymm0,%ymm9
> -       vpunpckhqdq     %ymm11,%ymm0,%ymm11
> -       vmovdqa         %ymm12,%ymm0
> -       vpunpcklqdq     %ymm14,%ymm0,%ymm12
> -       vpunpckhqdq     %ymm14,%ymm0,%ymm14
> -       vmovdqa         %ymm13,%ymm0
> -       vpunpcklqdq     %ymm15,%ymm0,%ymm13
> -       vpunpckhqdq     %ymm15,%ymm0,%ymm15
> -
> -       # interleave 128-bit words in state n, n+4
> -       vmovdqa         0x00(%rsp),%ymm0
> -       vperm2i128      $0x20,%ymm4,%ymm0,%ymm1
> -       vperm2i128      $0x31,%ymm4,%ymm0,%ymm4
> -       vmovdqa         %ymm1,0x00(%rsp)
> -       vmovdqa         0x20(%rsp),%ymm0
> -       vperm2i128      $0x20,%ymm5,%ymm0,%ymm1
> -       vperm2i128      $0x31,%ymm5,%ymm0,%ymm5
> -       vmovdqa         %ymm1,0x20(%rsp)
> -       vmovdqa         0x40(%rsp),%ymm0
> -       vperm2i128      $0x20,%ymm6,%ymm0,%ymm1
> -       vperm2i128      $0x31,%ymm6,%ymm0,%ymm6
> -       vmovdqa         %ymm1,0x40(%rsp)
> -       vmovdqa         0x60(%rsp),%ymm0
> -       vperm2i128      $0x20,%ymm7,%ymm0,%ymm1
> -       vperm2i128      $0x31,%ymm7,%ymm0,%ymm7
> -       vmovdqa         %ymm1,0x60(%rsp)
> -       vperm2i128      $0x20,%ymm12,%ymm8,%ymm0
> -       vperm2i128      $0x31,%ymm12,%ymm8,%ymm12
> -       vmovdqa         %ymm0,%ymm8
> -       vperm2i128      $0x20,%ymm13,%ymm9,%ymm0
> -       vperm2i128      $0x31,%ymm13,%ymm9,%ymm13
> -       vmovdqa         %ymm0,%ymm9
> -       vperm2i128      $0x20,%ymm14,%ymm10,%ymm0
> -       vperm2i128      $0x31,%ymm14,%ymm10,%ymm14
> -       vmovdqa         %ymm0,%ymm10
> -       vperm2i128      $0x20,%ymm15,%ymm11,%ymm0
> -       vperm2i128      $0x31,%ymm15,%ymm11,%ymm15
> -       vmovdqa         %ymm0,%ymm11
> -
> -       # xor with corresponding input, write to output
> -       vmovdqa         0x00(%rsp),%ymm0
> -       vpxor           0x0000(%rdx),%ymm0,%ymm0
> -       vmovdqu         %ymm0,0x0000(%rsi)
> -       vmovdqa         0x20(%rsp),%ymm0
> -       vpxor           0x0080(%rdx),%ymm0,%ymm0
> -       vmovdqu         %ymm0,0x0080(%rsi)
> -       vmovdqa         0x40(%rsp),%ymm0
> -       vpxor           0x0040(%rdx),%ymm0,%ymm0
> -       vmovdqu         %ymm0,0x0040(%rsi)
> -       vmovdqa         0x60(%rsp),%ymm0
> -       vpxor           0x00c0(%rdx),%ymm0,%ymm0
> -       vmovdqu         %ymm0,0x00c0(%rsi)
> -       vpxor           0x0100(%rdx),%ymm4,%ymm4
> -       vmovdqu         %ymm4,0x0100(%rsi)
> -       vpxor           0x0180(%rdx),%ymm5,%ymm5
> -       vmovdqu         %ymm5,0x00180(%rsi)
> -       vpxor           0x0140(%rdx),%ymm6,%ymm6
> -       vmovdqu         %ymm6,0x0140(%rsi)
> -       vpxor           0x01c0(%rdx),%ymm7,%ymm7
> -       vmovdqu         %ymm7,0x01c0(%rsi)
> -       vpxor           0x0020(%rdx),%ymm8,%ymm8
> -       vmovdqu         %ymm8,0x0020(%rsi)
> -       vpxor           0x00a0(%rdx),%ymm9,%ymm9
> -       vmovdqu         %ymm9,0x00a0(%rsi)
> -       vpxor           0x0060(%rdx),%ymm10,%ymm10
> -       vmovdqu         %ymm10,0x0060(%rsi)
> -       vpxor           0x00e0(%rdx),%ymm11,%ymm11
> -       vmovdqu         %ymm11,0x00e0(%rsi)
> -       vpxor           0x0120(%rdx),%ymm12,%ymm12
> -       vmovdqu         %ymm12,0x0120(%rsi)
> -       vpxor           0x01a0(%rdx),%ymm13,%ymm13
> -       vmovdqu         %ymm13,0x01a0(%rsi)
> -       vpxor           0x0160(%rdx),%ymm14,%ymm14
> -       vmovdqu         %ymm14,0x0160(%rsi)
> -       vpxor           0x01e0(%rdx),%ymm15,%ymm15
> -       vmovdqu         %ymm15,0x01e0(%rsi)
> -
> -       vzeroupper
> -       lea             -8(%r10),%rsp
> -       ret
> -ENDPROC(chacha20_8block_xor_avx2)
> diff --git a/arch/x86/crypto/chacha20-ssse3-x86_64.S b/arch/x86/crypto/chacha20-ssse3-x86_64.S
> deleted file mode 100644
> index 512a2b500fd1..000000000000
> --- a/arch/x86/crypto/chacha20-ssse3-x86_64.S
> +++ /dev/null
> @@ -1,630 +0,0 @@
> -/*
> - * ChaCha20 256-bit cipher algorithm, RFC7539, x64 SSSE3 functions
> - *
> - * Copyright (C) 2015 Martin Willi
> - *
> - * This program is free software; you can redistribute it and/or modify
> - * it under the terms of the GNU General Public License as published by
> - * the Free Software Foundation; either version 2 of the License, or
> - * (at your option) any later version.
> - */
> -
> -#include <linux/linkage.h>
> -
> -.section       .rodata.cst16.ROT8, "aM", @progbits, 16
> -.align 16
> -ROT8:  .octa 0x0e0d0c0f0a09080b0605040702010003
> -.section       .rodata.cst16.ROT16, "aM", @progbits, 16
> -.align 16
> -ROT16: .octa 0x0d0c0f0e09080b0a0504070601000302
> -.section       .rodata.cst16.CTRINC, "aM", @progbits, 16
> -.align 16
> -CTRINC:        .octa 0x00000003000000020000000100000000
> -
> -.text
> -
> -ENTRY(chacha20_block_xor_ssse3)
> -       # %rdi: Input state matrix, s
> -       # %rsi: 1 data block output, o
> -       # %rdx: 1 data block input, i
> -
> -       # This function encrypts one ChaCha20 block by loading the state matrix
> -       # in four SSE registers. It performs matrix operation on four words in
> -       # parallel, but requireds shuffling to rearrange the words after each
> -       # round. 8/16-bit word rotation is done with the slightly better
> -       # performing SSSE3 byte shuffling, 7/12-bit word rotation uses
> -       # traditional shift+OR.
> -
> -       # x0..3 = s0..3
> -       movdqa          0x00(%rdi),%xmm0
> -       movdqa          0x10(%rdi),%xmm1
> -       movdqa          0x20(%rdi),%xmm2
> -       movdqa          0x30(%rdi),%xmm3
> -       movdqa          %xmm0,%xmm8
> -       movdqa          %xmm1,%xmm9
> -       movdqa          %xmm2,%xmm10
> -       movdqa          %xmm3,%xmm11
> -
> -       movdqa          ROT8(%rip),%xmm4
> -       movdqa          ROT16(%rip),%xmm5
> -
> -       mov     $10,%ecx
> -
> -.Ldoubleround:
> -
> -       # x0 += x1, x3 = rotl32(x3 ^ x0, 16)
> -       paddd           %xmm1,%xmm0
> -       pxor            %xmm0,%xmm3
> -       pshufb          %xmm5,%xmm3
> -
> -       # x2 += x3, x1 = rotl32(x1 ^ x2, 12)
> -       paddd           %xmm3,%xmm2
> -       pxor            %xmm2,%xmm1
> -       movdqa          %xmm1,%xmm6
> -       pslld           $12,%xmm6
> -       psrld           $20,%xmm1
> -       por             %xmm6,%xmm1
> -
> -       # x0 += x1, x3 = rotl32(x3 ^ x0, 8)
> -       paddd           %xmm1,%xmm0
> -       pxor            %xmm0,%xmm3
> -       pshufb          %xmm4,%xmm3
> -
> -       # x2 += x3, x1 = rotl32(x1 ^ x2, 7)
> -       paddd           %xmm3,%xmm2
> -       pxor            %xmm2,%xmm1
> -       movdqa          %xmm1,%xmm7
> -       pslld           $7,%xmm7
> -       psrld           $25,%xmm1
> -       por             %xmm7,%xmm1
> -
> -       # x1 = shuffle32(x1, MASK(0, 3, 2, 1))
> -       pshufd          $0x39,%xmm1,%xmm1
> -       # x2 = shuffle32(x2, MASK(1, 0, 3, 2))
> -       pshufd          $0x4e,%xmm2,%xmm2
> -       # x3 = shuffle32(x3, MASK(2, 1, 0, 3))
> -       pshufd          $0x93,%xmm3,%xmm3
> -
> -       # x0 += x1, x3 = rotl32(x3 ^ x0, 16)
> -       paddd           %xmm1,%xmm0
> -       pxor            %xmm0,%xmm3
> -       pshufb          %xmm5,%xmm3
> -
> -       # x2 += x3, x1 = rotl32(x1 ^ x2, 12)
> -       paddd           %xmm3,%xmm2
> -       pxor            %xmm2,%xmm1
> -       movdqa          %xmm1,%xmm6
> -       pslld           $12,%xmm6
> -       psrld           $20,%xmm1
> -       por             %xmm6,%xmm1
> -
> -       # x0 += x1, x3 = rotl32(x3 ^ x0, 8)
> -       paddd           %xmm1,%xmm0
> -       pxor            %xmm0,%xmm3
> -       pshufb          %xmm4,%xmm3
> -
> -       # x2 += x3, x1 = rotl32(x1 ^ x2, 7)
> -       paddd           %xmm3,%xmm2
> -       pxor            %xmm2,%xmm1
> -       movdqa          %xmm1,%xmm7
> -       pslld           $7,%xmm7
> -       psrld           $25,%xmm1
> -       por             %xmm7,%xmm1
> -
> -       # x1 = shuffle32(x1, MASK(2, 1, 0, 3))
> -       pshufd          $0x93,%xmm1,%xmm1
> -       # x2 = shuffle32(x2, MASK(1, 0, 3, 2))
> -       pshufd          $0x4e,%xmm2,%xmm2
> -       # x3 = shuffle32(x3, MASK(0, 3, 2, 1))
> -       pshufd          $0x39,%xmm3,%xmm3
> -
> -       dec             %ecx
> -       jnz             .Ldoubleround
> -
> -       # o0 = i0 ^ (x0 + s0)
> -       movdqu          0x00(%rdx),%xmm4
> -       paddd           %xmm8,%xmm0
> -       pxor            %xmm4,%xmm0
> -       movdqu          %xmm0,0x00(%rsi)
> -       # o1 = i1 ^ (x1 + s1)
> -       movdqu          0x10(%rdx),%xmm5
> -       paddd           %xmm9,%xmm1
> -       pxor            %xmm5,%xmm1
> -       movdqu          %xmm1,0x10(%rsi)
> -       # o2 = i2 ^ (x2 + s2)
> -       movdqu          0x20(%rdx),%xmm6
> -       paddd           %xmm10,%xmm2
> -       pxor            %xmm6,%xmm2
> -       movdqu          %xmm2,0x20(%rsi)
> -       # o3 = i3 ^ (x3 + s3)
> -       movdqu          0x30(%rdx),%xmm7
> -       paddd           %xmm11,%xmm3
> -       pxor            %xmm7,%xmm3
> -       movdqu          %xmm3,0x30(%rsi)
> -
> -       ret
> -ENDPROC(chacha20_block_xor_ssse3)
> -
> -ENTRY(chacha20_4block_xor_ssse3)
> -       # %rdi: Input state matrix, s
> -       # %rsi: 4 data blocks output, o
> -       # %rdx: 4 data blocks input, i
> -
> -       # This function encrypts four consecutive ChaCha20 blocks by loading the
> -       # the state matrix in SSE registers four times. As we need some scratch
> -       # registers, we save the first four registers on the stack. The
> -       # algorithm performs each operation on the corresponding word of each
> -       # state matrix, hence requires no word shuffling. For final XORing step
> -       # we transpose the matrix by interleaving 32- and then 64-bit words,
> -       # which allows us to do XOR in SSE registers. 8/16-bit word rotation is
> -       # done with the slightly better performing SSSE3 byte shuffling,
> -       # 7/12-bit word rotation uses traditional shift+OR.
> -
> -       lea             8(%rsp),%r10
> -       sub             $0x80,%rsp
> -       and             $~63,%rsp
> -
> -       # x0..15[0-3] = s0..3[0..3]
> -       movq            0x00(%rdi),%xmm1
> -       pshufd          $0x00,%xmm1,%xmm0
> -       pshufd          $0x55,%xmm1,%xmm1
> -       movq            0x08(%rdi),%xmm3
> -       pshufd          $0x00,%xmm3,%xmm2
> -       pshufd          $0x55,%xmm3,%xmm3
> -       movq            0x10(%rdi),%xmm5
> -       pshufd          $0x00,%xmm5,%xmm4
> -       pshufd          $0x55,%xmm5,%xmm5
> -       movq            0x18(%rdi),%xmm7
> -       pshufd          $0x00,%xmm7,%xmm6
> -       pshufd          $0x55,%xmm7,%xmm7
> -       movq            0x20(%rdi),%xmm9
> -       pshufd          $0x00,%xmm9,%xmm8
> -       pshufd          $0x55,%xmm9,%xmm9
> -       movq            0x28(%rdi),%xmm11
> -       pshufd          $0x00,%xmm11,%xmm10
> -       pshufd          $0x55,%xmm11,%xmm11
> -       movq            0x30(%rdi),%xmm13
> -       pshufd          $0x00,%xmm13,%xmm12
> -       pshufd          $0x55,%xmm13,%xmm13
> -       movq            0x38(%rdi),%xmm15
> -       pshufd          $0x00,%xmm15,%xmm14
> -       pshufd          $0x55,%xmm15,%xmm15
> -       # x0..3 on stack
> -       movdqa          %xmm0,0x00(%rsp)
> -       movdqa          %xmm1,0x10(%rsp)
> -       movdqa          %xmm2,0x20(%rsp)
> -       movdqa          %xmm3,0x30(%rsp)
> -
> -       movdqa          CTRINC(%rip),%xmm1
> -       movdqa          ROT8(%rip),%xmm2
> -       movdqa          ROT16(%rip),%xmm3
> -
> -       # x12 += counter values 0-3
> -       paddd           %xmm1,%xmm12
> -
> -       mov             $10,%ecx
> -
> -.Ldoubleround4:
> -       # x0 += x4, x12 = rotl32(x12 ^ x0, 16)
> -       movdqa          0x00(%rsp),%xmm0
> -       paddd           %xmm4,%xmm0
> -       movdqa          %xmm0,0x00(%rsp)
> -       pxor            %xmm0,%xmm12
> -       pshufb          %xmm3,%xmm12
> -       # x1 += x5, x13 = rotl32(x13 ^ x1, 16)
> -       movdqa          0x10(%rsp),%xmm0
> -       paddd           %xmm5,%xmm0
> -       movdqa          %xmm0,0x10(%rsp)
> -       pxor            %xmm0,%xmm13
> -       pshufb          %xmm3,%xmm13
> -       # x2 += x6, x14 = rotl32(x14 ^ x2, 16)
> -       movdqa          0x20(%rsp),%xmm0
> -       paddd           %xmm6,%xmm0
> -       movdqa          %xmm0,0x20(%rsp)
> -       pxor            %xmm0,%xmm14
> -       pshufb          %xmm3,%xmm14
> -       # x3 += x7, x15 = rotl32(x15 ^ x3, 16)
> -       movdqa          0x30(%rsp),%xmm0
> -       paddd           %xmm7,%xmm0
> -       movdqa          %xmm0,0x30(%rsp)
> -       pxor            %xmm0,%xmm15
> -       pshufb          %xmm3,%xmm15
> -
> -       # x8 += x12, x4 = rotl32(x4 ^ x8, 12)
> -       paddd           %xmm12,%xmm8
> -       pxor            %xmm8,%xmm4
> -       movdqa          %xmm4,%xmm0
> -       pslld           $12,%xmm0
> -       psrld           $20,%xmm4
> -       por             %xmm0,%xmm4
> -       # x9 += x13, x5 = rotl32(x5 ^ x9, 12)
> -       paddd           %xmm13,%xmm9
> -       pxor            %xmm9,%xmm5
> -       movdqa          %xmm5,%xmm0
> -       pslld           $12,%xmm0
> -       psrld           $20,%xmm5
> -       por             %xmm0,%xmm5
> -       # x10 += x14, x6 = rotl32(x6 ^ x10, 12)
> -       paddd           %xmm14,%xmm10
> -       pxor            %xmm10,%xmm6
> -       movdqa          %xmm6,%xmm0
> -       pslld           $12,%xmm0
> -       psrld           $20,%xmm6
> -       por             %xmm0,%xmm6
> -       # x11 += x15, x7 = rotl32(x7 ^ x11, 12)
> -       paddd           %xmm15,%xmm11
> -       pxor            %xmm11,%xmm7
> -       movdqa          %xmm7,%xmm0
> -       pslld           $12,%xmm0
> -       psrld           $20,%xmm7
> -       por             %xmm0,%xmm7
> -
> -       # x0 += x4, x12 = rotl32(x12 ^ x0, 8)
> -       movdqa          0x00(%rsp),%xmm0
> -       paddd           %xmm4,%xmm0
> -       movdqa          %xmm0,0x00(%rsp)
> -       pxor            %xmm0,%xmm12
> -       pshufb          %xmm2,%xmm12
> -       # x1 += x5, x13 = rotl32(x13 ^ x1, 8)
> -       movdqa          0x10(%rsp),%xmm0
> -       paddd           %xmm5,%xmm0
> -       movdqa          %xmm0,0x10(%rsp)
> -       pxor            %xmm0,%xmm13
> -       pshufb          %xmm2,%xmm13
> -       # x2 += x6, x14 = rotl32(x14 ^ x2, 8)
> -       movdqa          0x20(%rsp),%xmm0
> -       paddd           %xmm6,%xmm0
> -       movdqa          %xmm0,0x20(%rsp)
> -       pxor            %xmm0,%xmm14
> -       pshufb          %xmm2,%xmm14
> -       # x3 += x7, x15 = rotl32(x15 ^ x3, 8)
> -       movdqa          0x30(%rsp),%xmm0
> -       paddd           %xmm7,%xmm0
> -       movdqa          %xmm0,0x30(%rsp)
> -       pxor            %xmm0,%xmm15
> -       pshufb          %xmm2,%xmm15
> -
> -       # x8 += x12, x4 = rotl32(x4 ^ x8, 7)
> -       paddd           %xmm12,%xmm8
> -       pxor            %xmm8,%xmm4
> -       movdqa          %xmm4,%xmm0
> -       pslld           $7,%xmm0
> -       psrld           $25,%xmm4
> -       por             %xmm0,%xmm4
> -       # x9 += x13, x5 = rotl32(x5 ^ x9, 7)
> -       paddd           %xmm13,%xmm9
> -       pxor            %xmm9,%xmm5
> -       movdqa          %xmm5,%xmm0
> -       pslld           $7,%xmm0
> -       psrld           $25,%xmm5
> -       por             %xmm0,%xmm5
> -       # x10 += x14, x6 = rotl32(x6 ^ x10, 7)
> -       paddd           %xmm14,%xmm10
> -       pxor            %xmm10,%xmm6
> -       movdqa          %xmm6,%xmm0
> -       pslld           $7,%xmm0
> -       psrld           $25,%xmm6
> -       por             %xmm0,%xmm6
> -       # x11 += x15, x7 = rotl32(x7 ^ x11, 7)
> -       paddd           %xmm15,%xmm11
> -       pxor            %xmm11,%xmm7
> -       movdqa          %xmm7,%xmm0
> -       pslld           $7,%xmm0
> -       psrld           $25,%xmm7
> -       por             %xmm0,%xmm7
> -
> -       # x0 += x5, x15 = rotl32(x15 ^ x0, 16)
> -       movdqa          0x00(%rsp),%xmm0
> -       paddd           %xmm5,%xmm0
> -       movdqa          %xmm0,0x00(%rsp)
> -       pxor            %xmm0,%xmm15
> -       pshufb          %xmm3,%xmm15
> -       # x1 += x6, x12 = rotl32(x12 ^ x1, 16)
> -       movdqa          0x10(%rsp),%xmm0
> -       paddd           %xmm6,%xmm0
> -       movdqa          %xmm0,0x10(%rsp)
> -       pxor            %xmm0,%xmm12
> -       pshufb          %xmm3,%xmm12
> -       # x2 += x7, x13 = rotl32(x13 ^ x2, 16)
> -       movdqa          0x20(%rsp),%xmm0
> -       paddd           %xmm7,%xmm0
> -       movdqa          %xmm0,0x20(%rsp)
> -       pxor            %xmm0,%xmm13
> -       pshufb          %xmm3,%xmm13
> -       # x3 += x4, x14 = rotl32(x14 ^ x3, 16)
> -       movdqa          0x30(%rsp),%xmm0
> -       paddd           %xmm4,%xmm0
> -       movdqa          %xmm0,0x30(%rsp)
> -       pxor            %xmm0,%xmm14
> -       pshufb          %xmm3,%xmm14
> -
> -       # x10 += x15, x5 = rotl32(x5 ^ x10, 12)
> -       paddd           %xmm15,%xmm10
> -       pxor            %xmm10,%xmm5
> -       movdqa          %xmm5,%xmm0
> -       pslld           $12,%xmm0
> -       psrld           $20,%xmm5
> -       por             %xmm0,%xmm5
> -       # x11 += x12, x6 = rotl32(x6 ^ x11, 12)
> -       paddd           %xmm12,%xmm11
> -       pxor            %xmm11,%xmm6
> -       movdqa          %xmm6,%xmm0
> -       pslld           $12,%xmm0
> -       psrld           $20,%xmm6
> -       por             %xmm0,%xmm6
> -       # x8 += x13, x7 = rotl32(x7 ^ x8, 12)
> -       paddd           %xmm13,%xmm8
> -       pxor            %xmm8,%xmm7
> -       movdqa          %xmm7,%xmm0
> -       pslld           $12,%xmm0
> -       psrld           $20,%xmm7
> -       por             %xmm0,%xmm7
> -       # x9 += x14, x4 = rotl32(x4 ^ x9, 12)
> -       paddd           %xmm14,%xmm9
> -       pxor            %xmm9,%xmm4
> -       movdqa          %xmm4,%xmm0
> -       pslld           $12,%xmm0
> -       psrld           $20,%xmm4
> -       por             %xmm0,%xmm4
> -
> -       # x0 += x5, x15 = rotl32(x15 ^ x0, 8)
> -       movdqa          0x00(%rsp),%xmm0
> -       paddd           %xmm5,%xmm0
> -       movdqa          %xmm0,0x00(%rsp)
> -       pxor            %xmm0,%xmm15
> -       pshufb          %xmm2,%xmm15
> -       # x1 += x6, x12 = rotl32(x12 ^ x1, 8)
> -       movdqa          0x10(%rsp),%xmm0
> -       paddd           %xmm6,%xmm0
> -       movdqa          %xmm0,0x10(%rsp)
> -       pxor            %xmm0,%xmm12
> -       pshufb          %xmm2,%xmm12
> -       # x2 += x7, x13 = rotl32(x13 ^ x2, 8)
> -       movdqa          0x20(%rsp),%xmm0
> -       paddd           %xmm7,%xmm0
> -       movdqa          %xmm0,0x20(%rsp)
> -       pxor            %xmm0,%xmm13
> -       pshufb          %xmm2,%xmm13
> -       # x3 += x4, x14 = rotl32(x14 ^ x3, 8)
> -       movdqa          0x30(%rsp),%xmm0
> -       paddd           %xmm4,%xmm0
> -       movdqa          %xmm0,0x30(%rsp)
> -       pxor            %xmm0,%xmm14
> -       pshufb          %xmm2,%xmm14
> -
> -       # x10 += x15, x5 = rotl32(x5 ^ x10, 7)
> -       paddd           %xmm15,%xmm10
> -       pxor            %xmm10,%xmm5
> -       movdqa          %xmm5,%xmm0
> -       pslld           $7,%xmm0
> -       psrld           $25,%xmm5
> -       por             %xmm0,%xmm5
> -       # x11 += x12, x6 = rotl32(x6 ^ x11, 7)
> -       paddd           %xmm12,%xmm11
> -       pxor            %xmm11,%xmm6
> -       movdqa          %xmm6,%xmm0
> -       pslld           $7,%xmm0
> -       psrld           $25,%xmm6
> -       por             %xmm0,%xmm6
> -       # x8 += x13, x7 = rotl32(x7 ^ x8, 7)
> -       paddd           %xmm13,%xmm8
> -       pxor            %xmm8,%xmm7
> -       movdqa          %xmm7,%xmm0
> -       pslld           $7,%xmm0
> -       psrld           $25,%xmm7
> -       por             %xmm0,%xmm7
> -       # x9 += x14, x4 = rotl32(x4 ^ x9, 7)
> -       paddd           %xmm14,%xmm9
> -       pxor            %xmm9,%xmm4
> -       movdqa          %xmm4,%xmm0
> -       pslld           $7,%xmm0
> -       psrld           $25,%xmm4
> -       por             %xmm0,%xmm4
> -
> -       dec             %ecx
> -       jnz             .Ldoubleround4
> -
> -       # x0[0-3] += s0[0]
> -       # x1[0-3] += s0[1]
> -       movq            0x00(%rdi),%xmm3
> -       pshufd          $0x00,%xmm3,%xmm2
> -       pshufd          $0x55,%xmm3,%xmm3
> -       paddd           0x00(%rsp),%xmm2
> -       movdqa          %xmm2,0x00(%rsp)
> -       paddd           0x10(%rsp),%xmm3
> -       movdqa          %xmm3,0x10(%rsp)
> -       # x2[0-3] += s0[2]
> -       # x3[0-3] += s0[3]
> -       movq            0x08(%rdi),%xmm3
> -       pshufd          $0x00,%xmm3,%xmm2
> -       pshufd          $0x55,%xmm3,%xmm3
> -       paddd           0x20(%rsp),%xmm2
> -       movdqa          %xmm2,0x20(%rsp)
> -       paddd           0x30(%rsp),%xmm3
> -       movdqa          %xmm3,0x30(%rsp)
> -
> -       # x4[0-3] += s1[0]
> -       # x5[0-3] += s1[1]
> -       movq            0x10(%rdi),%xmm3
> -       pshufd          $0x00,%xmm3,%xmm2
> -       pshufd          $0x55,%xmm3,%xmm3
> -       paddd           %xmm2,%xmm4
> -       paddd           %xmm3,%xmm5
> -       # x6[0-3] += s1[2]
> -       # x7[0-3] += s1[3]
> -       movq            0x18(%rdi),%xmm3
> -       pshufd          $0x00,%xmm3,%xmm2
> -       pshufd          $0x55,%xmm3,%xmm3
> -       paddd           %xmm2,%xmm6
> -       paddd           %xmm3,%xmm7
> -
> -       # x8[0-3] += s2[0]
> -       # x9[0-3] += s2[1]
> -       movq            0x20(%rdi),%xmm3
> -       pshufd          $0x00,%xmm3,%xmm2
> -       pshufd          $0x55,%xmm3,%xmm3
> -       paddd           %xmm2,%xmm8
> -       paddd           %xmm3,%xmm9
> -       # x10[0-3] += s2[2]
> -       # x11[0-3] += s2[3]
> -       movq            0x28(%rdi),%xmm3
> -       pshufd          $0x00,%xmm3,%xmm2
> -       pshufd          $0x55,%xmm3,%xmm3
> -       paddd           %xmm2,%xmm10
> -       paddd           %xmm3,%xmm11
> -
> -       # x12[0-3] += s3[0]
> -       # x13[0-3] += s3[1]
> -       movq            0x30(%rdi),%xmm3
> -       pshufd          $0x00,%xmm3,%xmm2
> -       pshufd          $0x55,%xmm3,%xmm3
> -       paddd           %xmm2,%xmm12
> -       paddd           %xmm3,%xmm13
> -       # x14[0-3] += s3[2]
> -       # x15[0-3] += s3[3]
> -       movq            0x38(%rdi),%xmm3
> -       pshufd          $0x00,%xmm3,%xmm2
> -       pshufd          $0x55,%xmm3,%xmm3
> -       paddd           %xmm2,%xmm14
> -       paddd           %xmm3,%xmm15
> -
> -       # x12 += counter values 0-3
> -       paddd           %xmm1,%xmm12
> -
> -       # interleave 32-bit words in state n, n+1
> -       movdqa          0x00(%rsp),%xmm0
> -       movdqa          0x10(%rsp),%xmm1
> -       movdqa          %xmm0,%xmm2
> -       punpckldq       %xmm1,%xmm2
> -       punpckhdq       %xmm1,%xmm0
> -       movdqa          %xmm2,0x00(%rsp)
> -       movdqa          %xmm0,0x10(%rsp)
> -       movdqa          0x20(%rsp),%xmm0
> -       movdqa          0x30(%rsp),%xmm1
> -       movdqa          %xmm0,%xmm2
> -       punpckldq       %xmm1,%xmm2
> -       punpckhdq       %xmm1,%xmm0
> -       movdqa          %xmm2,0x20(%rsp)
> -       movdqa          %xmm0,0x30(%rsp)
> -       movdqa          %xmm4,%xmm0
> -       punpckldq       %xmm5,%xmm4
> -       punpckhdq       %xmm5,%xmm0
> -       movdqa          %xmm0,%xmm5
> -       movdqa          %xmm6,%xmm0
> -       punpckldq       %xmm7,%xmm6
> -       punpckhdq       %xmm7,%xmm0
> -       movdqa          %xmm0,%xmm7
> -       movdqa          %xmm8,%xmm0
> -       punpckldq       %xmm9,%xmm8
> -       punpckhdq       %xmm9,%xmm0
> -       movdqa          %xmm0,%xmm9
> -       movdqa          %xmm10,%xmm0
> -       punpckldq       %xmm11,%xmm10
> -       punpckhdq       %xmm11,%xmm0
> -       movdqa          %xmm0,%xmm11
> -       movdqa          %xmm12,%xmm0
> -       punpckldq       %xmm13,%xmm12
> -       punpckhdq       %xmm13,%xmm0
> -       movdqa          %xmm0,%xmm13
> -       movdqa          %xmm14,%xmm0
> -       punpckldq       %xmm15,%xmm14
> -       punpckhdq       %xmm15,%xmm0
> -       movdqa          %xmm0,%xmm15
> -
> -       # interleave 64-bit words in state n, n+2
> -       movdqa          0x00(%rsp),%xmm0
> -       movdqa          0x20(%rsp),%xmm1
> -       movdqa          %xmm0,%xmm2
> -       punpcklqdq      %xmm1,%xmm2
> -       punpckhqdq      %xmm1,%xmm0
> -       movdqa          %xmm2,0x00(%rsp)
> -       movdqa          %xmm0,0x20(%rsp)
> -       movdqa          0x10(%rsp),%xmm0
> -       movdqa          0x30(%rsp),%xmm1
> -       movdqa          %xmm0,%xmm2
> -       punpcklqdq      %xmm1,%xmm2
> -       punpckhqdq      %xmm1,%xmm0
> -       movdqa          %xmm2,0x10(%rsp)
> -       movdqa          %xmm0,0x30(%rsp)
> -       movdqa          %xmm4,%xmm0
> -       punpcklqdq      %xmm6,%xmm4
> -       punpckhqdq      %xmm6,%xmm0
> -       movdqa          %xmm0,%xmm6
> -       movdqa          %xmm5,%xmm0
> -       punpcklqdq      %xmm7,%xmm5
> -       punpckhqdq      %xmm7,%xmm0
> -       movdqa          %xmm0,%xmm7
> -       movdqa          %xmm8,%xmm0
> -       punpcklqdq      %xmm10,%xmm8
> -       punpckhqdq      %xmm10,%xmm0
> -       movdqa          %xmm0,%xmm10
> -       movdqa          %xmm9,%xmm0
> -       punpcklqdq      %xmm11,%xmm9
> -       punpckhqdq      %xmm11,%xmm0
> -       movdqa          %xmm0,%xmm11
> -       movdqa          %xmm12,%xmm0
> -       punpcklqdq      %xmm14,%xmm12
> -       punpckhqdq      %xmm14,%xmm0
> -       movdqa          %xmm0,%xmm14
> -       movdqa          %xmm13,%xmm0
> -       punpcklqdq      %xmm15,%xmm13
> -       punpckhqdq      %xmm15,%xmm0
> -       movdqa          %xmm0,%xmm15
> -
> -       # xor with corresponding input, write to output
> -       movdqa          0x00(%rsp),%xmm0
> -       movdqu          0x00(%rdx),%xmm1
> -       pxor            %xmm1,%xmm0
> -       movdqu          %xmm0,0x00(%rsi)
> -       movdqa          0x10(%rsp),%xmm0
> -       movdqu          0x80(%rdx),%xmm1
> -       pxor            %xmm1,%xmm0
> -       movdqu          %xmm0,0x80(%rsi)
> -       movdqa          0x20(%rsp),%xmm0
> -       movdqu          0x40(%rdx),%xmm1
> -       pxor            %xmm1,%xmm0
> -       movdqu          %xmm0,0x40(%rsi)
> -       movdqa          0x30(%rsp),%xmm0
> -       movdqu          0xc0(%rdx),%xmm1
> -       pxor            %xmm1,%xmm0
> -       movdqu          %xmm0,0xc0(%rsi)
> -       movdqu          0x10(%rdx),%xmm1
> -       pxor            %xmm1,%xmm4
> -       movdqu          %xmm4,0x10(%rsi)
> -       movdqu          0x90(%rdx),%xmm1
> -       pxor            %xmm1,%xmm5
> -       movdqu          %xmm5,0x90(%rsi)
> -       movdqu          0x50(%rdx),%xmm1
> -       pxor            %xmm1,%xmm6
> -       movdqu          %xmm6,0x50(%rsi)
> -       movdqu          0xd0(%rdx),%xmm1
> -       pxor            %xmm1,%xmm7
> -       movdqu          %xmm7,0xd0(%rsi)
> -       movdqu          0x20(%rdx),%xmm1
> -       pxor            %xmm1,%xmm8
> -       movdqu          %xmm8,0x20(%rsi)
> -       movdqu          0xa0(%rdx),%xmm1
> -       pxor            %xmm1,%xmm9
> -       movdqu          %xmm9,0xa0(%rsi)
> -       movdqu          0x60(%rdx),%xmm1
> -       pxor            %xmm1,%xmm10
> -       movdqu          %xmm10,0x60(%rsi)
> -       movdqu          0xe0(%rdx),%xmm1
> -       pxor            %xmm1,%xmm11
> -       movdqu          %xmm11,0xe0(%rsi)
> -       movdqu          0x30(%rdx),%xmm1
> -       pxor            %xmm1,%xmm12
> -       movdqu          %xmm12,0x30(%rsi)
> -       movdqu          0xb0(%rdx),%xmm1
> -       pxor            %xmm1,%xmm13
> -       movdqu          %xmm13,0xb0(%rsi)
> -       movdqu          0x70(%rdx),%xmm1
> -       pxor            %xmm1,%xmm14
> -       movdqu          %xmm14,0x70(%rsi)
> -       movdqu          0xf0(%rdx),%xmm1
> -       pxor            %xmm1,%xmm15
> -       movdqu          %xmm15,0xf0(%rsi)
> -
> -       lea             -8(%r10),%rsp
> -       ret
> -ENDPROC(chacha20_4block_xor_ssse3)
> diff --git a/arch/x86/crypto/chacha20_glue.c b/arch/x86/crypto/chacha20_glue.c
> deleted file mode 100644
> index dce7c5d39c2f..000000000000
> --- a/arch/x86/crypto/chacha20_glue.c
> +++ /dev/null
> @@ -1,146 +0,0 @@
> -/*
> - * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code
> - *
> - * Copyright (C) 2015 Martin Willi
> - *
> - * This program is free software; you can redistribute it and/or modify
> - * it under the terms of the GNU General Public License as published by
> - * the Free Software Foundation; either version 2 of the License, or
> - * (at your option) any later version.
> - */
> -
> -#include <crypto/algapi.h>
> -#include <crypto/chacha20.h>
> -#include <crypto/internal/skcipher.h>
> -#include <linux/kernel.h>
> -#include <linux/module.h>
> -#include <asm/fpu/api.h>
> -#include <asm/simd.h>
> -
> -#define CHACHA20_STATE_ALIGN 16
> -
> -asmlinkage void chacha20_block_xor_ssse3(u32 *state, u8 *dst, const u8 *src);
> -asmlinkage void chacha20_4block_xor_ssse3(u32 *state, u8 *dst, const u8 *src);
> -#ifdef CONFIG_AS_AVX2
> -asmlinkage void chacha20_8block_xor_avx2(u32 *state, u8 *dst, const u8 *src);
> -static bool chacha20_use_avx2;
> -#endif
> -
> -static void chacha20_dosimd(u32 *state, u8 *dst, const u8 *src,
> -                           unsigned int bytes)
> -{
> -       u8 buf[CHACHA20_BLOCK_SIZE];
> -
> -#ifdef CONFIG_AS_AVX2
> -       if (chacha20_use_avx2) {
> -               while (bytes >= CHACHA20_BLOCK_SIZE * 8) {
> -                       chacha20_8block_xor_avx2(state, dst, src);
> -                       bytes -= CHACHA20_BLOCK_SIZE * 8;
> -                       src += CHACHA20_BLOCK_SIZE * 8;
> -                       dst += CHACHA20_BLOCK_SIZE * 8;
> -                       state[12] += 8;
> -               }
> -       }
> -#endif
> -       while (bytes >= CHACHA20_BLOCK_SIZE * 4) {
> -               chacha20_4block_xor_ssse3(state, dst, src);
> -               bytes -= CHACHA20_BLOCK_SIZE * 4;
> -               src += CHACHA20_BLOCK_SIZE * 4;
> -               dst += CHACHA20_BLOCK_SIZE * 4;
> -               state[12] += 4;
> -       }
> -       while (bytes >= CHACHA20_BLOCK_SIZE) {
> -               chacha20_block_xor_ssse3(state, dst, src);
> -               bytes -= CHACHA20_BLOCK_SIZE;
> -               src += CHACHA20_BLOCK_SIZE;
> -               dst += CHACHA20_BLOCK_SIZE;
> -               state[12]++;
> -       }
> -       if (bytes) {
> -               memcpy(buf, src, bytes);
> -               chacha20_block_xor_ssse3(state, buf, buf);
> -               memcpy(dst, buf, bytes);
> -       }
> -}
> -
> -static int chacha20_simd(struct skcipher_request *req)
> -{
> -       struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
> -       struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm);
> -       u32 *state, state_buf[16 + 2] __aligned(8);
> -       struct skcipher_walk walk;
> -       int err;
> -
> -       BUILD_BUG_ON(CHACHA20_STATE_ALIGN != 16);
> -       state = PTR_ALIGN(state_buf + 0, CHACHA20_STATE_ALIGN);
> -
> -       if (req->cryptlen <= CHACHA20_BLOCK_SIZE || !may_use_simd())
> -               return crypto_chacha20_crypt(req);
> -
> -       err = skcipher_walk_virt(&walk, req, true);
> -
> -       crypto_chacha20_init(state, ctx, walk.iv);
> -
> -       kernel_fpu_begin();
> -
> -       while (walk.nbytes >= CHACHA20_BLOCK_SIZE) {
> -               chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr,
> -                               rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE));
> -               err = skcipher_walk_done(&walk,
> -                                        walk.nbytes % CHACHA20_BLOCK_SIZE);
> -       }
> -
> -       if (walk.nbytes) {
> -               chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr,
> -                               walk.nbytes);
> -               err = skcipher_walk_done(&walk, 0);
> -       }
> -
> -       kernel_fpu_end();
> -
> -       return err;
> -}
> -
> -static struct skcipher_alg alg = {
> -       .base.cra_name          = "chacha20",
> -       .base.cra_driver_name   = "chacha20-simd",
> -       .base.cra_priority      = 300,
> -       .base.cra_blocksize     = 1,
> -       .base.cra_ctxsize       = sizeof(struct chacha20_ctx),
> -       .base.cra_module        = THIS_MODULE,
> -
> -       .min_keysize            = CHACHA20_KEY_SIZE,
> -       .max_keysize            = CHACHA20_KEY_SIZE,
> -       .ivsize                 = CHACHA20_IV_SIZE,
> -       .chunksize              = CHACHA20_BLOCK_SIZE,
> -       .setkey                 = crypto_chacha20_setkey,
> -       .encrypt                = chacha20_simd,
> -       .decrypt                = chacha20_simd,
> -};
> -
> -static int __init chacha20_simd_mod_init(void)
> -{
> -       if (!boot_cpu_has(X86_FEATURE_SSSE3))
> -               return -ENODEV;
> -
> -#ifdef CONFIG_AS_AVX2
> -       chacha20_use_avx2 = boot_cpu_has(X86_FEATURE_AVX) &&
> -                           boot_cpu_has(X86_FEATURE_AVX2) &&
> -                           cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
> -#endif
> -       return crypto_register_skcipher(&alg);
> -}
> -
> -static void __exit chacha20_simd_mod_fini(void)
> -{
> -       crypto_unregister_skcipher(&alg);
> -}
> -
> -module_init(chacha20_simd_mod_init);
> -module_exit(chacha20_simd_mod_fini);
> -
> -MODULE_LICENSE("GPL");
> -MODULE_AUTHOR("Martin Willi <martin@strongswan.org>");
> -MODULE_DESCRIPTION("chacha20 cipher algorithm, SIMD accelerated");
> -MODULE_ALIAS_CRYPTO("chacha20");
> -MODULE_ALIAS_CRYPTO("chacha20-simd");
> diff --git a/crypto/Kconfig b/crypto/Kconfig
> index 47859a0f8052..93cd4d199447 100644
> --- a/crypto/Kconfig
> +++ b/crypto/Kconfig
> @@ -1433,22 +1433,6 @@ config CRYPTO_CHACHA20
>
>           ChaCha20 is a 256-bit high-speed stream cipher designed by Daniel J.
>           Bernstein and further specified in RFC7539 for use in IETF protocols.
> -         This is the portable C implementation of ChaCha20.
> -
> -         See also:
> -         <http://cr.yp.to/chacha/chacha-20080128.pdf>
> -
> -config CRYPTO_CHACHA20_X86_64
> -       tristate "ChaCha20 cipher algorithm (x86_64/SSSE3/AVX2)"
> -       depends on X86 && 64BIT
> -       select CRYPTO_BLKCIPHER
> -       select CRYPTO_CHACHA20
> -       help
> -         ChaCha20 cipher algorithm, RFC7539.
> -
> -         ChaCha20 is a 256-bit high-speed stream cipher designed by Daniel J.
> -         Bernstein and further specified in RFC7539 for use in IETF protocols.
> -         This is the x86_64 assembler implementation using SIMD instructions.
>
>           See also:
>           <http://cr.yp.to/chacha/chacha-20080128.pdf>
> diff --git a/crypto/Makefile b/crypto/Makefile
> index 5e60348d02e2..587103b87890 100644
> --- a/crypto/Makefile
> +++ b/crypto/Makefile
> @@ -117,7 +117,7 @@ obj-$(CONFIG_CRYPTO_ANUBIS) += anubis.o
>  obj-$(CONFIG_CRYPTO_SEED) += seed.o
>  obj-$(CONFIG_CRYPTO_SPECK) += speck.o
>  obj-$(CONFIG_CRYPTO_SALSA20) += salsa20_generic.o
> -obj-$(CONFIG_CRYPTO_CHACHA20) += chacha20_generic.o
> +obj-$(CONFIG_CRYPTO_CHACHA20) += chacha20_zinc.o
>  obj-$(CONFIG_CRYPTO_POLY1305) += poly1305_zinc.o
>  obj-$(CONFIG_CRYPTO_DEFLATE) += deflate.o
>  obj-$(CONFIG_CRYPTO_MICHAEL_MIC) += michael_mic.o
> diff --git a/crypto/chacha20_generic.c b/crypto/chacha20_generic.c
> deleted file mode 100644
> index e451c3cb6a56..000000000000
> --- a/crypto/chacha20_generic.c
> +++ /dev/null
> @@ -1,136 +0,0 @@
> -/*
> - * ChaCha20 256-bit cipher algorithm, RFC7539
> - *
> - * Copyright (C) 2015 Martin Willi
> - *
> - * This program is free software; you can redistribute it and/or modify
> - * it under the terms of the GNU General Public License as published by
> - * the Free Software Foundation; either version 2 of the License, or
> - * (at your option) any later version.
> - */
> -
> -#include <asm/unaligned.h>
> -#include <crypto/algapi.h>
> -#include <crypto/chacha20.h>
> -#include <crypto/internal/skcipher.h>
> -#include <linux/module.h>
> -
> -static void chacha20_docrypt(u32 *state, u8 *dst, const u8 *src,
> -                            unsigned int bytes)
> -{
> -       u32 stream[CHACHA20_BLOCK_WORDS];
> -
> -       if (dst != src)
> -               memcpy(dst, src, bytes);
> -
> -       while (bytes >= CHACHA20_BLOCK_SIZE) {
> -               chacha20_block(state, stream);
> -               crypto_xor(dst, (const u8 *)stream, CHACHA20_BLOCK_SIZE);
> -               bytes -= CHACHA20_BLOCK_SIZE;
> -               dst += CHACHA20_BLOCK_SIZE;
> -       }
> -       if (bytes) {
> -               chacha20_block(state, stream);
> -               crypto_xor(dst, (const u8 *)stream, bytes);
> -       }
> -}
> -
> -void crypto_chacha20_init(u32 *state, struct chacha20_ctx *ctx, u8 *iv)
> -{
> -       state[0]  = 0x61707865; /* "expa" */
> -       state[1]  = 0x3320646e; /* "nd 3" */
> -       state[2]  = 0x79622d32; /* "2-by" */
> -       state[3]  = 0x6b206574; /* "te k" */
> -       state[4]  = ctx->key[0];
> -       state[5]  = ctx->key[1];
> -       state[6]  = ctx->key[2];
> -       state[7]  = ctx->key[3];
> -       state[8]  = ctx->key[4];
> -       state[9]  = ctx->key[5];
> -       state[10] = ctx->key[6];
> -       state[11] = ctx->key[7];
> -       state[12] = get_unaligned_le32(iv +  0);
> -       state[13] = get_unaligned_le32(iv +  4);
> -       state[14] = get_unaligned_le32(iv +  8);
> -       state[15] = get_unaligned_le32(iv + 12);
> -}
> -EXPORT_SYMBOL_GPL(crypto_chacha20_init);
> -
> -int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key,
> -                          unsigned int keysize)
> -{
> -       struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm);
> -       int i;
> -
> -       if (keysize != CHACHA20_KEY_SIZE)
> -               return -EINVAL;
> -
> -       for (i = 0; i < ARRAY_SIZE(ctx->key); i++)
> -               ctx->key[i] = get_unaligned_le32(key + i * sizeof(u32));
> -
> -       return 0;
> -}
> -EXPORT_SYMBOL_GPL(crypto_chacha20_setkey);
> -
> -int crypto_chacha20_crypt(struct skcipher_request *req)
> -{
> -       struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
> -       struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm);
> -       struct skcipher_walk walk;
> -       u32 state[16];
> -       int err;
> -
> -       err = skcipher_walk_virt(&walk, req, true);
> -
> -       crypto_chacha20_init(state, ctx, walk.iv);
> -
> -       while (walk.nbytes > 0) {
> -               unsigned int nbytes = walk.nbytes;
> -
> -               if (nbytes < walk.total)
> -                       nbytes = round_down(nbytes, walk.stride);
> -
> -               chacha20_docrypt(state, walk.dst.virt.addr, walk.src.virt.addr,
> -                                nbytes);
> -               err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
> -       }
> -
> -       return err;
> -}
> -EXPORT_SYMBOL_GPL(crypto_chacha20_crypt);
> -
> -static struct skcipher_alg alg = {
> -       .base.cra_name          = "chacha20",
> -       .base.cra_driver_name   = "chacha20-generic",
> -       .base.cra_priority      = 100,
> -       .base.cra_blocksize     = 1,
> -       .base.cra_ctxsize       = sizeof(struct chacha20_ctx),
> -       .base.cra_module        = THIS_MODULE,
> -
> -       .min_keysize            = CHACHA20_KEY_SIZE,
> -       .max_keysize            = CHACHA20_KEY_SIZE,
> -       .ivsize                 = CHACHA20_IV_SIZE,
> -       .chunksize              = CHACHA20_BLOCK_SIZE,
> -       .setkey                 = crypto_chacha20_setkey,
> -       .encrypt                = crypto_chacha20_crypt,
> -       .decrypt                = crypto_chacha20_crypt,
> -};
> -
> -static int __init chacha20_generic_mod_init(void)
> -{
> -       return crypto_register_skcipher(&alg);
> -}
> -
> -static void __exit chacha20_generic_mod_fini(void)
> -{
> -       crypto_unregister_skcipher(&alg);
> -}
> -
> -module_init(chacha20_generic_mod_init);
> -module_exit(chacha20_generic_mod_fini);
> -
> -MODULE_LICENSE("GPL");
> -MODULE_AUTHOR("Martin Willi <martin@strongswan.org>");
> -MODULE_DESCRIPTION("chacha20 cipher algorithm");
> -MODULE_ALIAS_CRYPTO("chacha20");
> -MODULE_ALIAS_CRYPTO("chacha20-generic");
> diff --git a/crypto/chacha20_zinc.c b/crypto/chacha20_zinc.c
> new file mode 100644
> index 000000000000..5df88fdee066
> --- /dev/null
> +++ b/crypto/chacha20_zinc.c
> @@ -0,0 +1,100 @@
> +/* SPDX-License-Identifier: GPL-2.0
> + *
> + * Copyright (C) 2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
> + */
> +
> +#include <asm/unaligned.h>
> +#include <crypto/algapi.h>
> +#include <crypto/internal/skcipher.h>
> +#include <zinc/chacha20.h>
> +#include <linux/module.h>
> +
> +struct chacha20_key_ctx {
> +       u32 key[8];
> +};
> +
> +static int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key,
> +                                 unsigned int keysize)
> +{
> +       struct chacha20_key_ctx *key_ctx = crypto_skcipher_ctx(tfm);
> +       int i;
> +
> +       if (keysize != CHACHA20_KEY_SIZE)
> +               return -EINVAL;
> +
> +       for (i = 0; i < ARRAY_SIZE(key_ctx->key); ++i)
> +               key_ctx->key[i] = get_unaligned_le32(key + i * sizeof(u32));
> +
> +       return 0;
> +}
> +
> +static int crypto_chacha20_crypt(struct skcipher_request *req)
> +{
> +       struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
> +       struct chacha20_key_ctx *key_ctx = crypto_skcipher_ctx(tfm);
> +       struct chacha20_ctx ctx;
> +       struct skcipher_walk walk;
> +       simd_context_t simd_context;
> +       int err, i;
> +
> +       err = skcipher_walk_virt(&walk, req, true);
> +       if (unlikely(err))
> +               return err;
> +
> +       memcpy(ctx.key, key_ctx->key, sizeof(ctx.key));
> +       for (i = 0; i < ARRAY_SIZE(ctx.counter); ++i)
> +               ctx.counter[i] = get_unaligned_le32(walk.iv + i * sizeof(u32));
> +
> +       simd_context = simd_get();
> +       while (walk.nbytes > 0) {
> +               unsigned int nbytes = walk.nbytes;
> +
> +               if (nbytes < walk.total)
> +                       nbytes = round_down(nbytes, walk.stride);
> +
> +               chacha20(&ctx, walk.dst.virt.addr, walk.src.virt.addr, nbytes,
> +                        simd_context);
> +
> +               err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
> +               simd_context = simd_relax(simd_context);
> +       }
> +       simd_put(simd_context);
> +
> +       return err;
> +}
> +
> +static struct skcipher_alg alg = {
> +       .base.cra_name          = "chacha20",
> +       .base.cra_driver_name   = "chacha20-software",
> +       .base.cra_priority      = 100,
> +       .base.cra_blocksize     = 1,
> +       .base.cra_ctxsize       = sizeof(struct chacha20_key_ctx),
> +       .base.cra_module        = THIS_MODULE,
> +
> +       .min_keysize            = CHACHA20_KEY_SIZE,
> +       .max_keysize            = CHACHA20_KEY_SIZE,
> +       .ivsize                 = CHACHA20_IV_SIZE,
> +       .chunksize              = CHACHA20_BLOCK_SIZE,
> +       .setkey                 = crypto_chacha20_setkey,
> +       .encrypt                = crypto_chacha20_crypt,
> +       .decrypt                = crypto_chacha20_crypt,
> +};
> +
> +static int __init chacha20_mod_init(void)
> +{
> +       return crypto_register_skcipher(&alg);
> +}
> +
> +static void __exit chacha20_mod_exit(void)
> +{
> +       crypto_unregister_skcipher(&alg);
> +}
> +
> +module_init(chacha20_mod_init);
> +module_exit(chacha20_mod_exit);
> +
> +MODULE_LICENSE("GPL");
> +MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");
> +MODULE_DESCRIPTION("ChaCha20 stream cipher");
> +MODULE_ALIAS_CRYPTO("chacha20");
> +MODULE_ALIAS_CRYPTO("chacha20-software");
> diff --git a/crypto/chacha20poly1305.c b/crypto/chacha20poly1305.c
> index bf523797bef3..b26adb9ed898 100644
> --- a/crypto/chacha20poly1305.c
> +++ b/crypto/chacha20poly1305.c
> @@ -13,7 +13,7 @@
>  #include <crypto/internal/hash.h>
>  #include <crypto/internal/skcipher.h>
>  #include <crypto/scatterwalk.h>
> -#include <crypto/chacha20.h>
> +#include <zinc/chacha20.h>
>  #include <zinc/poly1305.h>
>  #include <linux/err.h>
>  #include <linux/init.h>
> diff --git a/include/crypto/chacha20.h b/include/crypto/chacha20.h
> index b83d66073db0..3b92f58f3891 100644
> --- a/include/crypto/chacha20.h
> +++ b/include/crypto/chacha20.h
> @@ -6,23 +6,11 @@
>  #ifndef _CRYPTO_CHACHA20_H
>  #define _CRYPTO_CHACHA20_H
>
> -#include <crypto/skcipher.h>
> -#include <linux/types.h>
> -#include <linux/crypto.h>
> -
>  #define CHACHA20_IV_SIZE       16
>  #define CHACHA20_KEY_SIZE      32
>  #define CHACHA20_BLOCK_SIZE    64
>  #define CHACHA20_BLOCK_WORDS   (CHACHA20_BLOCK_SIZE / sizeof(u32))
>
> -struct chacha20_ctx {
> -       u32 key[8];
> -};
> -
>  void chacha20_block(u32 *state, u32 *stream);
> -void crypto_chacha20_init(u32 *state, struct chacha20_ctx *ctx, u8 *iv);
> -int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key,
> -                          unsigned int keysize);
> -int crypto_chacha20_crypt(struct skcipher_request *req);
>
>  #endif
> --
> 2.19.0
>

^ permalink raw reply

* Re: [PATCH] net/mlx4_core: print firmware version during driver loading
From: Qing Huang @ 2018-09-14 22:36 UTC (permalink / raw)
  To: David Miller; +Cc: andrew, leon, netdev, linux-rdma, linux-kernel, tariqt
In-Reply-To: <20180914.141406.2211638662965115243.davem@davemloft.net>



On 9/14/2018 2:14 PM, David Miller wrote:
> From: Qing Huang<qing.huang@oracle.com>
> Date: Fri, 14 Sep 2018 11:33:40 -0700
>
>> On 9/14/2018 11:17 AM, Andrew Lunn wrote:
>>> On Fri, Sep 14, 2018 at 10:15:48AM -0700, Qing Huang wrote:
>>>> The FW version is actually a very crucial piece of information and
>>>> only
>>>> printed once here
>>>> when the driver is loaded. People tend to get confused when switching
>>>> multiple FW files
>>>> back and forth without running separate utility tools, especially at
>>>> customer sites.
>>>> IMHO, this information is very useful and only takes up very little
>>>> log file
>>>> space. :-)
>>> Why not use ethtool -i ?
>>>
>>> $ sudo ethtool -i eth0
>>> driver: r8169
>>> version: 2.3LK-NAPI
>>> firmware-version: rtl8168g-2_0.0.1 02/06/13
>>>
>>>       Andrew
>> Sure. You can also use ibstat or ibv_devinfo tool if they are
>> installed. But it's not very
>> convenient in some cases.
>>
>> E.g.
>> A customer upgrades FW on HCAs and encounters issues. During triage,
>> it's much easier
>> to study customer uploaded log files when remotely testing different
>> FW files.
> Not a valid argument.  You can print the ethtool output from initramfs
> if necessary for triage.
>
> I still stand by the fact that ethtool is the only fully reliable way
> to obtain this information, the kernel log is not.

This is more for Infiniband mode which depends more on features and 
functionalities
provided in firmware and get much more frequent FW bug fixes than 
typical Ethernet
devices. This is not meant to replace other ways of getting the 
information, more like
an enhancement for checking log history.

This can provide valuable information when tracing through system log 
history to
discover what happened with a specific HCA drv ver and fw ver 
combination in the past.

Regards,
Qing

^ permalink raw reply

* [PATCH] net: caif: remove redundant null check on frontpkt
From: Colin King @ 2018-09-14 17:19 UTC (permalink / raw)
  To: Dmitry Tarnyagin, David S . Miller; +Cc: kernel-janitors, netdev

From: Colin Ian King <colin.king@canonical.com>

It is impossible for frontpkt to be null at the point of the null
check because it has been assigned from rearpkt and there is no
way realpkt can be null at the point of the assignment because
of the sanity checking and exit paths taken previously. Remove
the redundant null check.

Detected by CoverityScan, CID#114434 ("Logically dead code")

Signed-off-by: Colin Ian King <colin.king@canonical.com>
---
 net/caif/cfrfml.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/net/caif/cfrfml.c b/net/caif/cfrfml.c
index b82440e1fcb4..a931a71ef6df 100644
--- a/net/caif/cfrfml.c
+++ b/net/caif/cfrfml.c
@@ -264,9 +264,6 @@ static int cfrfml_transmit(struct cflayer *layr, struct cfpkt *pkt)
 		frontpkt = rearpkt;
 		rearpkt = NULL;
 
-		err = -ENOMEM;
-		if (frontpkt == NULL)
-			goto out;
 		err = -EPROTO;
 		if (cfpkt_add_head(frontpkt, head, 6) < 0)
 			goto out;
-- 
2.17.1

^ permalink raw reply related

* Re: [RFC PATCH 2/4] net: enable UDP gro on demand.
From: Willem de Bruijn @ 2018-09-14 17:16 UTC (permalink / raw)
  To: Paolo Abeni
  Cc: Network Development, David Miller, Willem de Bruijn,
	steffen.klassert
In-Reply-To: <9732ec58cfd39528fc236c5af2024dfb88c826fd.1536939423.git.pabeni@redhat.com>

On Fri, Sep 14, 2018 at 11:47 AM Paolo Abeni <pabeni@redhat.com> wrote:
>
> Currently, the UDP GRO callback is always invoked, regardless of
> the existence of any actual user (e.g. a UDP tunnel). With retpoline
> enabled, this causes measurable overhead.
>
> This changeset introduces explicit accounting of the sockets requiring
> UDP GRO and updates the UDP offloads at runtime accordingly, so that
> the GRO callback is present (and invoked) only when there is at least
> one socket requiring it.

I have a difference solution both to the UDP socket lookup avoidance
and configurable GRO in general.

The first can be achieved by exporting the udp_encap_needed_key static key:

"
diff --git a/include/net/udp.h b/include/net/udp.h
index 8482a990b0bb..9e82cb391dea 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -443,8 +443,10 @@ int udpv4_offload_init(void);

 void udp_init(void);

+DECLARE_STATIC_KEY_FALSE(udp_encap_needed_key);
 void udp_encap_enable(void);

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index f4e35b2ff8b8..bd873a5b8a86 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1889,7 +1889,7 @@ static int __udp_queue_rcv_skb(struct sock *sk,
struct sk_buff *skb)
        return 0;
 }

-static DEFINE_STATIC_KEY_FALSE(udp_encap_needed_key);
+DEFINE_STATIC_KEY_FALSE(udp_encap_needed_key);
 void udp_encap_enable(void)
 {
        static_branch_enable(&udp_encap_needed_key);
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 4f6aa95a9b12..f44fe328aa0f 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -405,7 +405,7 @@ static struct sk_buff *udp4_gro_receive(struct
list_head *head,
 {
        struct udphdr *uh = udp_gro_udphdr(skb);

-       if (unlikely(!uh))
+       if (unlikely(!uh) || !static_branch_unlikely(&udp_encap_needed_key))
                goto flush;
 "

.. and same for ipv6.

The second is a larger patchset that converts dev_offload to
net_offload, so that all offloads share the same infrastructure, and a
sysctl interface to be able to disable all gro_receive types, not just
udp.

I've been sitting on it for too long. Let me slightly clean it up and
send it out for discussion sake..

>
> Tested with pktgen vs udpgso_bench_rx
> Before:
> udp rx:     27 MB/s  1613271 calls/s
>
> After:
> udp rx:     30 MB/s  1771537 calls/s
>
> Signed-off-by: Paolo Abeni <pabeni@redhat.com>

^ permalink raw reply related

* Re: [PATCH 1/1] net: rds: use memset to optimize the recv
From: Santosh Shilimkar @ 2018-09-14 16:51 UTC (permalink / raw)
  To: Zhu Yanjun, davem, netdev, linux-rdma, rds-devel
In-Reply-To: <20180914084538.11666-1-yanjun.zhu@oracle.com>

On 9/14/2018 1:45 AM, Zhu Yanjun wrote:
> The function rds_inc_init is in recv process. To use memset can optimize
> the function rds_inc_init.
> The test result:
> 
>      Before:
>      1) + 24.950 us   |        rds_inc_init [rds]();
>      After:
>      1) + 10.990 us   |        rds_inc_init [rds]();
> 
> Signed-off-by: Zhu Yanjun <yanjun.zhu@oracle.com>
> ---
Looks good. Thanks !!

Acked-by: Santosh Shilimkar<santosh.shilimkar@oracle.com>

^ permalink raw reply

* Re: [RFC PATCH 3/4] udp: implement GRO plain UDP sockets.
From: Eric Dumazet @ 2018-09-14 16:48 UTC (permalink / raw)
  To: Paolo Abeni, netdev; +Cc: David S. Miller, Willem de Bruijn, Steffen Klassert
In-Reply-To: <027ebc7d404af07b70f8f9816512b2a485e8f420.1536939423.git.pabeni@redhat.com>



On 09/14/2018 08:43 AM, Paolo Abeni wrote:
> This is the RX counter part of commit bec1f6f69736 ("udp: generate gso
> with UDP_SEGMENT"). When UDP_SEGMENT is enabled, such socket is also
> eligible for GRO in the rx path: UDP segments directed to such socket
> are assembled into a larger GSO_UDP_L4 packet.
> 
> The core UDP GRO support is enabled/updated on setsockopt(UDP_SEGMENT) and
> disabled, if needed at socket destruction time.
> 
> Initial benchmark numbers:
> 
> Before:
> udp rx:   1079 MB/s   769065 calls/s
> 
> After:
> udp rx:   1466 MB/s    24877 calls/s


Are you sure the data is actually fully copied to user space ?

tools/testing/selftests/net/udpgso_bench_rx.c

uses :

static char rbuf[ETH_DATA_LEN];
   /* MSG_TRUNC will make return value full datagram length */
   ret = recv(fd, rbuf, len, MSG_TRUNC | MSG_DONTWAIT);

So you need to change this program.

Also, GRO reception would mean that userspace can retrieve,
not only full bytes of X datagrams, but also the gso_size (or length of individual datagrams)

You can not know the size of the packets in advance, the sender will decide.

^ permalink raw reply

* [PATCH net-next v4 18/20] crypto: port ChaCha20 to Zinc
From: Jason A. Donenfeld @ 2018-09-14 16:22 UTC (permalink / raw)
  To: linux-kernel, netdev, linux-crypto, davem, gregkh
  Cc: Jason A. Donenfeld, Samuel Neves, Andy Lutomirski,
	Jean-Philippe Aumasson, Eric Biggers
In-Reply-To: <20180914162240.7925-1-Jason@zx2c4.com>

Now that ChaCha20 is in Zinc, we can have the crypto API code simply
call into it. The crypto API expects to have a stored key per instance
and independent nonces, so we follow suite and store the key and
initialize the nonce independently.

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Cc: Samuel Neves <sneves@dei.uc.pt>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Jean-Philippe Aumasson <jeanphilippe.aumasson@gmail.com>
Cc: Eric Biggers <ebiggers@google.com>
---
 arch/arm/configs/exynos_defconfig       |   1 -
 arch/arm/configs/multi_v7_defconfig     |   1 -
 arch/arm/configs/omap2plus_defconfig    |   1 -
 arch/arm/crypto/Kconfig                 |   6 -
 arch/arm/crypto/Makefile                |   2 -
 arch/arm/crypto/chacha20-neon-core.S    | 521 --------------------
 arch/arm/crypto/chacha20-neon-glue.c    | 127 -----
 arch/arm64/configs/defconfig            |   1 -
 arch/arm64/crypto/Kconfig               |   6 -
 arch/arm64/crypto/Makefile              |   3 -
 arch/arm64/crypto/chacha20-neon-core.S  | 450 -----------------
 arch/arm64/crypto/chacha20-neon-glue.c  | 133 -----
 arch/x86/crypto/Makefile                |   3 -
 arch/x86/crypto/chacha20-avx2-x86_64.S  | 448 -----------------
 arch/x86/crypto/chacha20-ssse3-x86_64.S | 630 ------------------------
 arch/x86/crypto/chacha20_glue.c         | 146 ------
 crypto/Kconfig                          |  16 -
 crypto/Makefile                         |   2 +-
 crypto/chacha20_generic.c               | 136 -----
 crypto/chacha20_zinc.c                  | 100 ++++
 crypto/chacha20poly1305.c               |   2 +-
 include/crypto/chacha20.h               |  12 -
 22 files changed, 102 insertions(+), 2645 deletions(-)
 delete mode 100644 arch/arm/crypto/chacha20-neon-core.S
 delete mode 100644 arch/arm/crypto/chacha20-neon-glue.c
 delete mode 100644 arch/arm64/crypto/chacha20-neon-core.S
 delete mode 100644 arch/arm64/crypto/chacha20-neon-glue.c
 delete mode 100644 arch/x86/crypto/chacha20-avx2-x86_64.S
 delete mode 100644 arch/x86/crypto/chacha20-ssse3-x86_64.S
 delete mode 100644 arch/x86/crypto/chacha20_glue.c
 delete mode 100644 crypto/chacha20_generic.c
 create mode 100644 crypto/chacha20_zinc.c

diff --git a/arch/arm/configs/exynos_defconfig b/arch/arm/configs/exynos_defconfig
index 27ea6dfcf2f2..95929b5e7b10 100644
--- a/arch/arm/configs/exynos_defconfig
+++ b/arch/arm/configs/exynos_defconfig
@@ -350,7 +350,6 @@ CONFIG_CRYPTO_SHA1_ARM_NEON=m
 CONFIG_CRYPTO_SHA256_ARM=m
 CONFIG_CRYPTO_SHA512_ARM=m
 CONFIG_CRYPTO_AES_ARM_BS=m
-CONFIG_CRYPTO_CHACHA20_NEON=m
 CONFIG_CRC_CCITT=y
 CONFIG_FONTS=y
 CONFIG_FONT_7x14=y
diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig
index fc33444e94f0..63be07724db3 100644
--- a/arch/arm/configs/multi_v7_defconfig
+++ b/arch/arm/configs/multi_v7_defconfig
@@ -1000,4 +1000,3 @@ CONFIG_CRYPTO_AES_ARM_BS=m
 CONFIG_CRYPTO_AES_ARM_CE=m
 CONFIG_CRYPTO_GHASH_ARM_CE=m
 CONFIG_CRYPTO_CRC32_ARM_CE=m
-CONFIG_CRYPTO_CHACHA20_NEON=m
diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig
index 6491419b1dad..f585a8ecc336 100644
--- a/arch/arm/configs/omap2plus_defconfig
+++ b/arch/arm/configs/omap2plus_defconfig
@@ -547,7 +547,6 @@ CONFIG_CRYPTO_SHA512_ARM=m
 CONFIG_CRYPTO_AES_ARM=m
 CONFIG_CRYPTO_AES_ARM_BS=m
 CONFIG_CRYPTO_GHASH_ARM_CE=m
-CONFIG_CRYPTO_CHACHA20_NEON=m
 CONFIG_CRC_CCITT=y
 CONFIG_CRC_T10DIF=y
 CONFIG_CRC_ITU_T=y
diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig
index 925d1364727a..fb80fd89f0e7 100644
--- a/arch/arm/crypto/Kconfig
+++ b/arch/arm/crypto/Kconfig
@@ -115,12 +115,6 @@ config CRYPTO_CRC32_ARM_CE
 	depends on KERNEL_MODE_NEON && CRC32
 	select CRYPTO_HASH
 
-config CRYPTO_CHACHA20_NEON
-	tristate "NEON accelerated ChaCha20 symmetric cipher"
-	depends on KERNEL_MODE_NEON
-	select CRYPTO_BLKCIPHER
-	select CRYPTO_CHACHA20
-
 config CRYPTO_SPECK_NEON
 	tristate "NEON accelerated Speck cipher algorithms"
 	depends on KERNEL_MODE_NEON
diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile
index 8de542c48ade..bbfa98447063 100644
--- a/arch/arm/crypto/Makefile
+++ b/arch/arm/crypto/Makefile
@@ -9,7 +9,6 @@ obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o
 obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o
 obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o
 obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o
-obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha20-neon.o
 obj-$(CONFIG_CRYPTO_SPECK_NEON) += speck-neon.o
 
 ce-obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o
@@ -53,7 +52,6 @@ aes-arm-ce-y	:= aes-ce-core.o aes-ce-glue.o
 ghash-arm-ce-y	:= ghash-ce-core.o ghash-ce-glue.o
 crct10dif-arm-ce-y	:= crct10dif-ce-core.o crct10dif-ce-glue.o
 crc32-arm-ce-y:= crc32-ce-core.o crc32-ce-glue.o
-chacha20-neon-y := chacha20-neon-core.o chacha20-neon-glue.o
 speck-neon-y := speck-neon-core.o speck-neon-glue.o
 
 ifdef REGENERATE_ARM_CRYPTO
diff --git a/arch/arm/crypto/chacha20-neon-core.S b/arch/arm/crypto/chacha20-neon-core.S
deleted file mode 100644
index 451a849ad518..000000000000
--- a/arch/arm/crypto/chacha20-neon-core.S
+++ /dev/null
@@ -1,521 +0,0 @@
-/*
- * ChaCha20 256-bit cipher algorithm, RFC7539, ARM NEON functions
- *
- * Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Based on:
- * ChaCha20 256-bit cipher algorithm, RFC7539, x64 SSE3 functions
- *
- * Copyright (C) 2015 Martin Willi
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#include <linux/linkage.h>
-
-	.text
-	.fpu		neon
-	.align		5
-
-ENTRY(chacha20_block_xor_neon)
-	// r0: Input state matrix, s
-	// r1: 1 data block output, o
-	// r2: 1 data block input, i
-
-	//
-	// This function encrypts one ChaCha20 block by loading the state matrix
-	// in four NEON registers. It performs matrix operation on four words in
-	// parallel, but requireds shuffling to rearrange the words after each
-	// round.
-	//
-
-	// x0..3 = s0..3
-	add		ip, r0, #0x20
-	vld1.32		{q0-q1}, [r0]
-	vld1.32		{q2-q3}, [ip]
-
-	vmov		q8, q0
-	vmov		q9, q1
-	vmov		q10, q2
-	vmov		q11, q3
-
-	mov		r3, #10
-
-.Ldoubleround:
-	// x0 += x1, x3 = rotl32(x3 ^ x0, 16)
-	vadd.i32	q0, q0, q1
-	veor		q3, q3, q0
-	vrev32.16	q3, q3
-
-	// x2 += x3, x1 = rotl32(x1 ^ x2, 12)
-	vadd.i32	q2, q2, q3
-	veor		q4, q1, q2
-	vshl.u32	q1, q4, #12
-	vsri.u32	q1, q4, #20
-
-	// x0 += x1, x3 = rotl32(x3 ^ x0, 8)
-	vadd.i32	q0, q0, q1
-	veor		q4, q3, q0
-	vshl.u32	q3, q4, #8
-	vsri.u32	q3, q4, #24
-
-	// x2 += x3, x1 = rotl32(x1 ^ x2, 7)
-	vadd.i32	q2, q2, q3
-	veor		q4, q1, q2
-	vshl.u32	q1, q4, #7
-	vsri.u32	q1, q4, #25
-
-	// x1 = shuffle32(x1, MASK(0, 3, 2, 1))
-	vext.8		q1, q1, q1, #4
-	// x2 = shuffle32(x2, MASK(1, 0, 3, 2))
-	vext.8		q2, q2, q2, #8
-	// x3 = shuffle32(x3, MASK(2, 1, 0, 3))
-	vext.8		q3, q3, q3, #12
-
-	// x0 += x1, x3 = rotl32(x3 ^ x0, 16)
-	vadd.i32	q0, q0, q1
-	veor		q3, q3, q0
-	vrev32.16	q3, q3
-
-	// x2 += x3, x1 = rotl32(x1 ^ x2, 12)
-	vadd.i32	q2, q2, q3
-	veor		q4, q1, q2
-	vshl.u32	q1, q4, #12
-	vsri.u32	q1, q4, #20
-
-	// x0 += x1, x3 = rotl32(x3 ^ x0, 8)
-	vadd.i32	q0, q0, q1
-	veor		q4, q3, q0
-	vshl.u32	q3, q4, #8
-	vsri.u32	q3, q4, #24
-
-	// x2 += x3, x1 = rotl32(x1 ^ x2, 7)
-	vadd.i32	q2, q2, q3
-	veor		q4, q1, q2
-	vshl.u32	q1, q4, #7
-	vsri.u32	q1, q4, #25
-
-	// x1 = shuffle32(x1, MASK(2, 1, 0, 3))
-	vext.8		q1, q1, q1, #12
-	// x2 = shuffle32(x2, MASK(1, 0, 3, 2))
-	vext.8		q2, q2, q2, #8
-	// x3 = shuffle32(x3, MASK(0, 3, 2, 1))
-	vext.8		q3, q3, q3, #4
-
-	subs		r3, r3, #1
-	bne		.Ldoubleround
-
-	add		ip, r2, #0x20
-	vld1.8		{q4-q5}, [r2]
-	vld1.8		{q6-q7}, [ip]
-
-	// o0 = i0 ^ (x0 + s0)
-	vadd.i32	q0, q0, q8
-	veor		q0, q0, q4
-
-	// o1 = i1 ^ (x1 + s1)
-	vadd.i32	q1, q1, q9
-	veor		q1, q1, q5
-
-	// o2 = i2 ^ (x2 + s2)
-	vadd.i32	q2, q2, q10
-	veor		q2, q2, q6
-
-	// o3 = i3 ^ (x3 + s3)
-	vadd.i32	q3, q3, q11
-	veor		q3, q3, q7
-
-	add		ip, r1, #0x20
-	vst1.8		{q0-q1}, [r1]
-	vst1.8		{q2-q3}, [ip]
-
-	bx		lr
-ENDPROC(chacha20_block_xor_neon)
-
-	.align		5
-ENTRY(chacha20_4block_xor_neon)
-	push		{r4-r6, lr}
-	mov		ip, sp			// preserve the stack pointer
-	sub		r3, sp, #0x20		// allocate a 32 byte buffer
-	bic		r3, r3, #0x1f		// aligned to 32 bytes
-	mov		sp, r3
-
-	// r0: Input state matrix, s
-	// r1: 4 data blocks output, o
-	// r2: 4 data blocks input, i
-
-	//
-	// This function encrypts four consecutive ChaCha20 blocks by loading
-	// the state matrix in NEON registers four times. The algorithm performs
-	// each operation on the corresponding word of each state matrix, hence
-	// requires no word shuffling. For final XORing step we transpose the
-	// matrix by interleaving 32- and then 64-bit words, which allows us to
-	// do XOR in NEON registers.
-	//
-
-	// x0..15[0-3] = s0..3[0..3]
-	add		r3, r0, #0x20
-	vld1.32		{q0-q1}, [r0]
-	vld1.32		{q2-q3}, [r3]
-
-	adr		r3, CTRINC
-	vdup.32		q15, d7[1]
-	vdup.32		q14, d7[0]
-	vld1.32		{q11}, [r3, :128]
-	vdup.32		q13, d6[1]
-	vdup.32		q12, d6[0]
-	vadd.i32	q12, q12, q11		// x12 += counter values 0-3
-	vdup.32		q11, d5[1]
-	vdup.32		q10, d5[0]
-	vdup.32		q9, d4[1]
-	vdup.32		q8, d4[0]
-	vdup.32		q7, d3[1]
-	vdup.32		q6, d3[0]
-	vdup.32		q5, d2[1]
-	vdup.32		q4, d2[0]
-	vdup.32		q3, d1[1]
-	vdup.32		q2, d1[0]
-	vdup.32		q1, d0[1]
-	vdup.32		q0, d0[0]
-
-	mov		r3, #10
-
-.Ldoubleround4:
-	// x0 += x4, x12 = rotl32(x12 ^ x0, 16)
-	// x1 += x5, x13 = rotl32(x13 ^ x1, 16)
-	// x2 += x6, x14 = rotl32(x14 ^ x2, 16)
-	// x3 += x7, x15 = rotl32(x15 ^ x3, 16)
-	vadd.i32	q0, q0, q4
-	vadd.i32	q1, q1, q5
-	vadd.i32	q2, q2, q6
-	vadd.i32	q3, q3, q7
-
-	veor		q12, q12, q0
-	veor		q13, q13, q1
-	veor		q14, q14, q2
-	veor		q15, q15, q3
-
-	vrev32.16	q12, q12
-	vrev32.16	q13, q13
-	vrev32.16	q14, q14
-	vrev32.16	q15, q15
-
-	// x8 += x12, x4 = rotl32(x4 ^ x8, 12)
-	// x9 += x13, x5 = rotl32(x5 ^ x9, 12)
-	// x10 += x14, x6 = rotl32(x6 ^ x10, 12)
-	// x11 += x15, x7 = rotl32(x7 ^ x11, 12)
-	vadd.i32	q8, q8, q12
-	vadd.i32	q9, q9, q13
-	vadd.i32	q10, q10, q14
-	vadd.i32	q11, q11, q15
-
-	vst1.32		{q8-q9}, [sp, :256]
-
-	veor		q8, q4, q8
-	veor		q9, q5, q9
-	vshl.u32	q4, q8, #12
-	vshl.u32	q5, q9, #12
-	vsri.u32	q4, q8, #20
-	vsri.u32	q5, q9, #20
-
-	veor		q8, q6, q10
-	veor		q9, q7, q11
-	vshl.u32	q6, q8, #12
-	vshl.u32	q7, q9, #12
-	vsri.u32	q6, q8, #20
-	vsri.u32	q7, q9, #20
-
-	// x0 += x4, x12 = rotl32(x12 ^ x0, 8)
-	// x1 += x5, x13 = rotl32(x13 ^ x1, 8)
-	// x2 += x6, x14 = rotl32(x14 ^ x2, 8)
-	// x3 += x7, x15 = rotl32(x15 ^ x3, 8)
-	vadd.i32	q0, q0, q4
-	vadd.i32	q1, q1, q5
-	vadd.i32	q2, q2, q6
-	vadd.i32	q3, q3, q7
-
-	veor		q8, q12, q0
-	veor		q9, q13, q1
-	vshl.u32	q12, q8, #8
-	vshl.u32	q13, q9, #8
-	vsri.u32	q12, q8, #24
-	vsri.u32	q13, q9, #24
-
-	veor		q8, q14, q2
-	veor		q9, q15, q3
-	vshl.u32	q14, q8, #8
-	vshl.u32	q15, q9, #8
-	vsri.u32	q14, q8, #24
-	vsri.u32	q15, q9, #24
-
-	vld1.32		{q8-q9}, [sp, :256]
-
-	// x8 += x12, x4 = rotl32(x4 ^ x8, 7)
-	// x9 += x13, x5 = rotl32(x5 ^ x9, 7)
-	// x10 += x14, x6 = rotl32(x6 ^ x10, 7)
-	// x11 += x15, x7 = rotl32(x7 ^ x11, 7)
-	vadd.i32	q8, q8, q12
-	vadd.i32	q9, q9, q13
-	vadd.i32	q10, q10, q14
-	vadd.i32	q11, q11, q15
-
-	vst1.32		{q8-q9}, [sp, :256]
-
-	veor		q8, q4, q8
-	veor		q9, q5, q9
-	vshl.u32	q4, q8, #7
-	vshl.u32	q5, q9, #7
-	vsri.u32	q4, q8, #25
-	vsri.u32	q5, q9, #25
-
-	veor		q8, q6, q10
-	veor		q9, q7, q11
-	vshl.u32	q6, q8, #7
-	vshl.u32	q7, q9, #7
-	vsri.u32	q6, q8, #25
-	vsri.u32	q7, q9, #25
-
-	vld1.32		{q8-q9}, [sp, :256]
-
-	// x0 += x5, x15 = rotl32(x15 ^ x0, 16)
-	// x1 += x6, x12 = rotl32(x12 ^ x1, 16)
-	// x2 += x7, x13 = rotl32(x13 ^ x2, 16)
-	// x3 += x4, x14 = rotl32(x14 ^ x3, 16)
-	vadd.i32	q0, q0, q5
-	vadd.i32	q1, q1, q6
-	vadd.i32	q2, q2, q7
-	vadd.i32	q3, q3, q4
-
-	veor		q15, q15, q0
-	veor		q12, q12, q1
-	veor		q13, q13, q2
-	veor		q14, q14, q3
-
-	vrev32.16	q15, q15
-	vrev32.16	q12, q12
-	vrev32.16	q13, q13
-	vrev32.16	q14, q14
-
-	// x10 += x15, x5 = rotl32(x5 ^ x10, 12)
-	// x11 += x12, x6 = rotl32(x6 ^ x11, 12)
-	// x8 += x13, x7 = rotl32(x7 ^ x8, 12)
-	// x9 += x14, x4 = rotl32(x4 ^ x9, 12)
-	vadd.i32	q10, q10, q15
-	vadd.i32	q11, q11, q12
-	vadd.i32	q8, q8, q13
-	vadd.i32	q9, q9, q14
-
-	vst1.32		{q8-q9}, [sp, :256]
-
-	veor		q8, q7, q8
-	veor		q9, q4, q9
-	vshl.u32	q7, q8, #12
-	vshl.u32	q4, q9, #12
-	vsri.u32	q7, q8, #20
-	vsri.u32	q4, q9, #20
-
-	veor		q8, q5, q10
-	veor		q9, q6, q11
-	vshl.u32	q5, q8, #12
-	vshl.u32	q6, q9, #12
-	vsri.u32	q5, q8, #20
-	vsri.u32	q6, q9, #20
-
-	// x0 += x5, x15 = rotl32(x15 ^ x0, 8)
-	// x1 += x6, x12 = rotl32(x12 ^ x1, 8)
-	// x2 += x7, x13 = rotl32(x13 ^ x2, 8)
-	// x3 += x4, x14 = rotl32(x14 ^ x3, 8)
-	vadd.i32	q0, q0, q5
-	vadd.i32	q1, q1, q6
-	vadd.i32	q2, q2, q7
-	vadd.i32	q3, q3, q4
-
-	veor		q8, q15, q0
-	veor		q9, q12, q1
-	vshl.u32	q15, q8, #8
-	vshl.u32	q12, q9, #8
-	vsri.u32	q15, q8, #24
-	vsri.u32	q12, q9, #24
-
-	veor		q8, q13, q2
-	veor		q9, q14, q3
-	vshl.u32	q13, q8, #8
-	vshl.u32	q14, q9, #8
-	vsri.u32	q13, q8, #24
-	vsri.u32	q14, q9, #24
-
-	vld1.32		{q8-q9}, [sp, :256]
-
-	// x10 += x15, x5 = rotl32(x5 ^ x10, 7)
-	// x11 += x12, x6 = rotl32(x6 ^ x11, 7)
-	// x8 += x13, x7 = rotl32(x7 ^ x8, 7)
-	// x9 += x14, x4 = rotl32(x4 ^ x9, 7)
-	vadd.i32	q10, q10, q15
-	vadd.i32	q11, q11, q12
-	vadd.i32	q8, q8, q13
-	vadd.i32	q9, q9, q14
-
-	vst1.32		{q8-q9}, [sp, :256]
-
-	veor		q8, q7, q8
-	veor		q9, q4, q9
-	vshl.u32	q7, q8, #7
-	vshl.u32	q4, q9, #7
-	vsri.u32	q7, q8, #25
-	vsri.u32	q4, q9, #25
-
-	veor		q8, q5, q10
-	veor		q9, q6, q11
-	vshl.u32	q5, q8, #7
-	vshl.u32	q6, q9, #7
-	vsri.u32	q5, q8, #25
-	vsri.u32	q6, q9, #25
-
-	subs		r3, r3, #1
-	beq		0f
-
-	vld1.32		{q8-q9}, [sp, :256]
-	b		.Ldoubleround4
-
-	// x0[0-3] += s0[0]
-	// x1[0-3] += s0[1]
-	// x2[0-3] += s0[2]
-	// x3[0-3] += s0[3]
-0:	ldmia		r0!, {r3-r6}
-	vdup.32		q8, r3
-	vdup.32		q9, r4
-	vadd.i32	q0, q0, q8
-	vadd.i32	q1, q1, q9
-	vdup.32		q8, r5
-	vdup.32		q9, r6
-	vadd.i32	q2, q2, q8
-	vadd.i32	q3, q3, q9
-
-	// x4[0-3] += s1[0]
-	// x5[0-3] += s1[1]
-	// x6[0-3] += s1[2]
-	// x7[0-3] += s1[3]
-	ldmia		r0!, {r3-r6}
-	vdup.32		q8, r3
-	vdup.32		q9, r4
-	vadd.i32	q4, q4, q8
-	vadd.i32	q5, q5, q9
-	vdup.32		q8, r5
-	vdup.32		q9, r6
-	vadd.i32	q6, q6, q8
-	vadd.i32	q7, q7, q9
-
-	// interleave 32-bit words in state n, n+1
-	vzip.32		q0, q1
-	vzip.32		q2, q3
-	vzip.32		q4, q5
-	vzip.32		q6, q7
-
-	// interleave 64-bit words in state n, n+2
-	vswp		d1, d4
-	vswp		d3, d6
-	vswp		d9, d12
-	vswp		d11, d14
-
-	// xor with corresponding input, write to output
-	vld1.8		{q8-q9}, [r2]!
-	veor		q8, q8, q0
-	veor		q9, q9, q4
-	vst1.8		{q8-q9}, [r1]!
-
-	vld1.32		{q8-q9}, [sp, :256]
-
-	// x8[0-3] += s2[0]
-	// x9[0-3] += s2[1]
-	// x10[0-3] += s2[2]
-	// x11[0-3] += s2[3]
-	ldmia		r0!, {r3-r6}
-	vdup.32		q0, r3
-	vdup.32		q4, r4
-	vadd.i32	q8, q8, q0
-	vadd.i32	q9, q9, q4
-	vdup.32		q0, r5
-	vdup.32		q4, r6
-	vadd.i32	q10, q10, q0
-	vadd.i32	q11, q11, q4
-
-	// x12[0-3] += s3[0]
-	// x13[0-3] += s3[1]
-	// x14[0-3] += s3[2]
-	// x15[0-3] += s3[3]
-	ldmia		r0!, {r3-r6}
-	vdup.32		q0, r3
-	vdup.32		q4, r4
-	adr		r3, CTRINC
-	vadd.i32	q12, q12, q0
-	vld1.32		{q0}, [r3, :128]
-	vadd.i32	q13, q13, q4
-	vadd.i32	q12, q12, q0		// x12 += counter values 0-3
-
-	vdup.32		q0, r5
-	vdup.32		q4, r6
-	vadd.i32	q14, q14, q0
-	vadd.i32	q15, q15, q4
-
-	// interleave 32-bit words in state n, n+1
-	vzip.32		q8, q9
-	vzip.32		q10, q11
-	vzip.32		q12, q13
-	vzip.32		q14, q15
-
-	// interleave 64-bit words in state n, n+2
-	vswp		d17, d20
-	vswp		d19, d22
-	vswp		d25, d28
-	vswp		d27, d30
-
-	vmov		q4, q1
-
-	vld1.8		{q0-q1}, [r2]!
-	veor		q0, q0, q8
-	veor		q1, q1, q12
-	vst1.8		{q0-q1}, [r1]!
-
-	vld1.8		{q0-q1}, [r2]!
-	veor		q0, q0, q2
-	veor		q1, q1, q6
-	vst1.8		{q0-q1}, [r1]!
-
-	vld1.8		{q0-q1}, [r2]!
-	veor		q0, q0, q10
-	veor		q1, q1, q14
-	vst1.8		{q0-q1}, [r1]!
-
-	vld1.8		{q0-q1}, [r2]!
-	veor		q0, q0, q4
-	veor		q1, q1, q5
-	vst1.8		{q0-q1}, [r1]!
-
-	vld1.8		{q0-q1}, [r2]!
-	veor		q0, q0, q9
-	veor		q1, q1, q13
-	vst1.8		{q0-q1}, [r1]!
-
-	vld1.8		{q0-q1}, [r2]!
-	veor		q0, q0, q3
-	veor		q1, q1, q7
-	vst1.8		{q0-q1}, [r1]!
-
-	vld1.8		{q0-q1}, [r2]
-	veor		q0, q0, q11
-	veor		q1, q1, q15
-	vst1.8		{q0-q1}, [r1]
-
-	mov		sp, ip
-	pop		{r4-r6, pc}
-ENDPROC(chacha20_4block_xor_neon)
-
-	.align		4
-CTRINC:	.word		0, 1, 2, 3
diff --git a/arch/arm/crypto/chacha20-neon-glue.c b/arch/arm/crypto/chacha20-neon-glue.c
deleted file mode 100644
index 59a7be08e80c..000000000000
--- a/arch/arm/crypto/chacha20-neon-glue.c
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- * ChaCha20 256-bit cipher algorithm, RFC7539, ARM NEON functions
- *
- * Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Based on:
- * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code
- *
- * Copyright (C) 2015 Martin Willi
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#include <crypto/algapi.h>
-#include <crypto/chacha20.h>
-#include <crypto/internal/skcipher.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-
-#include <asm/hwcap.h>
-#include <asm/neon.h>
-#include <asm/simd.h>
-
-asmlinkage void chacha20_block_xor_neon(u32 *state, u8 *dst, const u8 *src);
-asmlinkage void chacha20_4block_xor_neon(u32 *state, u8 *dst, const u8 *src);
-
-static void chacha20_doneon(u32 *state, u8 *dst, const u8 *src,
-			    unsigned int bytes)
-{
-	u8 buf[CHACHA20_BLOCK_SIZE];
-
-	while (bytes >= CHACHA20_BLOCK_SIZE * 4) {
-		chacha20_4block_xor_neon(state, dst, src);
-		bytes -= CHACHA20_BLOCK_SIZE * 4;
-		src += CHACHA20_BLOCK_SIZE * 4;
-		dst += CHACHA20_BLOCK_SIZE * 4;
-		state[12] += 4;
-	}
-	while (bytes >= CHACHA20_BLOCK_SIZE) {
-		chacha20_block_xor_neon(state, dst, src);
-		bytes -= CHACHA20_BLOCK_SIZE;
-		src += CHACHA20_BLOCK_SIZE;
-		dst += CHACHA20_BLOCK_SIZE;
-		state[12]++;
-	}
-	if (bytes) {
-		memcpy(buf, src, bytes);
-		chacha20_block_xor_neon(state, buf, buf);
-		memcpy(dst, buf, bytes);
-	}
-}
-
-static int chacha20_neon(struct skcipher_request *req)
-{
-	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
-	struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm);
-	struct skcipher_walk walk;
-	u32 state[16];
-	int err;
-
-	if (req->cryptlen <= CHACHA20_BLOCK_SIZE || !may_use_simd())
-		return crypto_chacha20_crypt(req);
-
-	err = skcipher_walk_virt(&walk, req, true);
-
-	crypto_chacha20_init(state, ctx, walk.iv);
-
-	kernel_neon_begin();
-	while (walk.nbytes > 0) {
-		unsigned int nbytes = walk.nbytes;
-
-		if (nbytes < walk.total)
-			nbytes = round_down(nbytes, walk.stride);
-
-		chacha20_doneon(state, walk.dst.virt.addr, walk.src.virt.addr,
-				nbytes);
-		err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
-	}
-	kernel_neon_end();
-
-	return err;
-}
-
-static struct skcipher_alg alg = {
-	.base.cra_name		= "chacha20",
-	.base.cra_driver_name	= "chacha20-neon",
-	.base.cra_priority	= 300,
-	.base.cra_blocksize	= 1,
-	.base.cra_ctxsize	= sizeof(struct chacha20_ctx),
-	.base.cra_module	= THIS_MODULE,
-
-	.min_keysize		= CHACHA20_KEY_SIZE,
-	.max_keysize		= CHACHA20_KEY_SIZE,
-	.ivsize			= CHACHA20_IV_SIZE,
-	.chunksize		= CHACHA20_BLOCK_SIZE,
-	.walksize		= 4 * CHACHA20_BLOCK_SIZE,
-	.setkey			= crypto_chacha20_setkey,
-	.encrypt		= chacha20_neon,
-	.decrypt		= chacha20_neon,
-};
-
-static int __init chacha20_simd_mod_init(void)
-{
-	if (!(elf_hwcap & HWCAP_NEON))
-		return -ENODEV;
-
-	return crypto_register_skcipher(&alg);
-}
-
-static void __exit chacha20_simd_mod_fini(void)
-{
-	crypto_unregister_skcipher(&alg);
-}
-
-module_init(chacha20_simd_mod_init);
-module_exit(chacha20_simd_mod_fini);
-
-MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
-MODULE_LICENSE("GPL v2");
-MODULE_ALIAS_CRYPTO("chacha20");
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index db8d364f8476..6cc3c8a0ad88 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -709,5 +709,4 @@ CONFIG_CRYPTO_CRCT10DIF_ARM64_CE=m
 CONFIG_CRYPTO_CRC32_ARM64_CE=m
 CONFIG_CRYPTO_AES_ARM64_CE_CCM=y
 CONFIG_CRYPTO_AES_ARM64_CE_BLK=y
-CONFIG_CRYPTO_CHACHA20_NEON=m
 CONFIG_CRYPTO_AES_ARM64_BS=m
diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
index e3fdb0fd6f70..9db6d775a880 100644
--- a/arch/arm64/crypto/Kconfig
+++ b/arch/arm64/crypto/Kconfig
@@ -105,12 +105,6 @@ config CRYPTO_AES_ARM64_NEON_BLK
 	select CRYPTO_AES
 	select CRYPTO_SIMD
 
-config CRYPTO_CHACHA20_NEON
-	tristate "NEON accelerated ChaCha20 symmetric cipher"
-	depends on KERNEL_MODE_NEON
-	select CRYPTO_BLKCIPHER
-	select CRYPTO_CHACHA20
-
 config CRYPTO_AES_ARM64_BS
 	tristate "AES in ECB/CBC/CTR/XTS modes using bit-sliced NEON algorithm"
 	depends on KERNEL_MODE_NEON
diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile
index bcafd016618e..507c4bfb86e3 100644
--- a/arch/arm64/crypto/Makefile
+++ b/arch/arm64/crypto/Makefile
@@ -53,9 +53,6 @@ sha256-arm64-y := sha256-glue.o sha256-core.o
 obj-$(CONFIG_CRYPTO_SHA512_ARM64) += sha512-arm64.o
 sha512-arm64-y := sha512-glue.o sha512-core.o
 
-obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha20-neon.o
-chacha20-neon-y := chacha20-neon-core.o chacha20-neon-glue.o
-
 obj-$(CONFIG_CRYPTO_SPECK_NEON) += speck-neon.o
 speck-neon-y := speck-neon-core.o speck-neon-glue.o
 
diff --git a/arch/arm64/crypto/chacha20-neon-core.S b/arch/arm64/crypto/chacha20-neon-core.S
deleted file mode 100644
index 13c85e272c2a..000000000000
--- a/arch/arm64/crypto/chacha20-neon-core.S
+++ /dev/null
@@ -1,450 +0,0 @@
-/*
- * ChaCha20 256-bit cipher algorithm, RFC7539, arm64 NEON functions
- *
- * Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Based on:
- * ChaCha20 256-bit cipher algorithm, RFC7539, x64 SSSE3 functions
- *
- * Copyright (C) 2015 Martin Willi
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#include <linux/linkage.h>
-
-	.text
-	.align		6
-
-ENTRY(chacha20_block_xor_neon)
-	// x0: Input state matrix, s
-	// x1: 1 data block output, o
-	// x2: 1 data block input, i
-
-	//
-	// This function encrypts one ChaCha20 block by loading the state matrix
-	// in four NEON registers. It performs matrix operation on four words in
-	// parallel, but requires shuffling to rearrange the words after each
-	// round.
-	//
-
-	// x0..3 = s0..3
-	adr		x3, ROT8
-	ld1		{v0.4s-v3.4s}, [x0]
-	ld1		{v8.4s-v11.4s}, [x0]
-	ld1		{v12.4s}, [x3]
-
-	mov		x3, #10
-
-.Ldoubleround:
-	// x0 += x1, x3 = rotl32(x3 ^ x0, 16)
-	add		v0.4s, v0.4s, v1.4s
-	eor		v3.16b, v3.16b, v0.16b
-	rev32		v3.8h, v3.8h
-
-	// x2 += x3, x1 = rotl32(x1 ^ x2, 12)
-	add		v2.4s, v2.4s, v3.4s
-	eor		v4.16b, v1.16b, v2.16b
-	shl		v1.4s, v4.4s, #12
-	sri		v1.4s, v4.4s, #20
-
-	// x0 += x1, x3 = rotl32(x3 ^ x0, 8)
-	add		v0.4s, v0.4s, v1.4s
-	eor		v3.16b, v3.16b, v0.16b
-	tbl		v3.16b, {v3.16b}, v12.16b
-
-	// x2 += x3, x1 = rotl32(x1 ^ x2, 7)
-	add		v2.4s, v2.4s, v3.4s
-	eor		v4.16b, v1.16b, v2.16b
-	shl		v1.4s, v4.4s, #7
-	sri		v1.4s, v4.4s, #25
-
-	// x1 = shuffle32(x1, MASK(0, 3, 2, 1))
-	ext		v1.16b, v1.16b, v1.16b, #4
-	// x2 = shuffle32(x2, MASK(1, 0, 3, 2))
-	ext		v2.16b, v2.16b, v2.16b, #8
-	// x3 = shuffle32(x3, MASK(2, 1, 0, 3))
-	ext		v3.16b, v3.16b, v3.16b, #12
-
-	// x0 += x1, x3 = rotl32(x3 ^ x0, 16)
-	add		v0.4s, v0.4s, v1.4s
-	eor		v3.16b, v3.16b, v0.16b
-	rev32		v3.8h, v3.8h
-
-	// x2 += x3, x1 = rotl32(x1 ^ x2, 12)
-	add		v2.4s, v2.4s, v3.4s
-	eor		v4.16b, v1.16b, v2.16b
-	shl		v1.4s, v4.4s, #12
-	sri		v1.4s, v4.4s, #20
-
-	// x0 += x1, x3 = rotl32(x3 ^ x0, 8)
-	add		v0.4s, v0.4s, v1.4s
-	eor		v3.16b, v3.16b, v0.16b
-	tbl		v3.16b, {v3.16b}, v12.16b
-
-	// x2 += x3, x1 = rotl32(x1 ^ x2, 7)
-	add		v2.4s, v2.4s, v3.4s
-	eor		v4.16b, v1.16b, v2.16b
-	shl		v1.4s, v4.4s, #7
-	sri		v1.4s, v4.4s, #25
-
-	// x1 = shuffle32(x1, MASK(2, 1, 0, 3))
-	ext		v1.16b, v1.16b, v1.16b, #12
-	// x2 = shuffle32(x2, MASK(1, 0, 3, 2))
-	ext		v2.16b, v2.16b, v2.16b, #8
-	// x3 = shuffle32(x3, MASK(0, 3, 2, 1))
-	ext		v3.16b, v3.16b, v3.16b, #4
-
-	subs		x3, x3, #1
-	b.ne		.Ldoubleround
-
-	ld1		{v4.16b-v7.16b}, [x2]
-
-	// o0 = i0 ^ (x0 + s0)
-	add		v0.4s, v0.4s, v8.4s
-	eor		v0.16b, v0.16b, v4.16b
-
-	// o1 = i1 ^ (x1 + s1)
-	add		v1.4s, v1.4s, v9.4s
-	eor		v1.16b, v1.16b, v5.16b
-
-	// o2 = i2 ^ (x2 + s2)
-	add		v2.4s, v2.4s, v10.4s
-	eor		v2.16b, v2.16b, v6.16b
-
-	// o3 = i3 ^ (x3 + s3)
-	add		v3.4s, v3.4s, v11.4s
-	eor		v3.16b, v3.16b, v7.16b
-
-	st1		{v0.16b-v3.16b}, [x1]
-
-	ret
-ENDPROC(chacha20_block_xor_neon)
-
-	.align		6
-ENTRY(chacha20_4block_xor_neon)
-	// x0: Input state matrix, s
-	// x1: 4 data blocks output, o
-	// x2: 4 data blocks input, i
-
-	//
-	// This function encrypts four consecutive ChaCha20 blocks by loading
-	// the state matrix in NEON registers four times. The algorithm performs
-	// each operation on the corresponding word of each state matrix, hence
-	// requires no word shuffling. For final XORing step we transpose the
-	// matrix by interleaving 32- and then 64-bit words, which allows us to
-	// do XOR in NEON registers.
-	//
-	adr		x3, CTRINC		// ... and ROT8
-	ld1		{v30.4s-v31.4s}, [x3]
-
-	// x0..15[0-3] = s0..3[0..3]
-	mov		x4, x0
-	ld4r		{ v0.4s- v3.4s}, [x4], #16
-	ld4r		{ v4.4s- v7.4s}, [x4], #16
-	ld4r		{ v8.4s-v11.4s}, [x4], #16
-	ld4r		{v12.4s-v15.4s}, [x4]
-
-	// x12 += counter values 0-3
-	add		v12.4s, v12.4s, v30.4s
-
-	mov		x3, #10
-
-.Ldoubleround4:
-	// x0 += x4, x12 = rotl32(x12 ^ x0, 16)
-	// x1 += x5, x13 = rotl32(x13 ^ x1, 16)
-	// x2 += x6, x14 = rotl32(x14 ^ x2, 16)
-	// x3 += x7, x15 = rotl32(x15 ^ x3, 16)
-	add		v0.4s, v0.4s, v4.4s
-	add		v1.4s, v1.4s, v5.4s
-	add		v2.4s, v2.4s, v6.4s
-	add		v3.4s, v3.4s, v7.4s
-
-	eor		v12.16b, v12.16b, v0.16b
-	eor		v13.16b, v13.16b, v1.16b
-	eor		v14.16b, v14.16b, v2.16b
-	eor		v15.16b, v15.16b, v3.16b
-
-	rev32		v12.8h, v12.8h
-	rev32		v13.8h, v13.8h
-	rev32		v14.8h, v14.8h
-	rev32		v15.8h, v15.8h
-
-	// x8 += x12, x4 = rotl32(x4 ^ x8, 12)
-	// x9 += x13, x5 = rotl32(x5 ^ x9, 12)
-	// x10 += x14, x6 = rotl32(x6 ^ x10, 12)
-	// x11 += x15, x7 = rotl32(x7 ^ x11, 12)
-	add		v8.4s, v8.4s, v12.4s
-	add		v9.4s, v9.4s, v13.4s
-	add		v10.4s, v10.4s, v14.4s
-	add		v11.4s, v11.4s, v15.4s
-
-	eor		v16.16b, v4.16b, v8.16b
-	eor		v17.16b, v5.16b, v9.16b
-	eor		v18.16b, v6.16b, v10.16b
-	eor		v19.16b, v7.16b, v11.16b
-
-	shl		v4.4s, v16.4s, #12
-	shl		v5.4s, v17.4s, #12
-	shl		v6.4s, v18.4s, #12
-	shl		v7.4s, v19.4s, #12
-
-	sri		v4.4s, v16.4s, #20
-	sri		v5.4s, v17.4s, #20
-	sri		v6.4s, v18.4s, #20
-	sri		v7.4s, v19.4s, #20
-
-	// x0 += x4, x12 = rotl32(x12 ^ x0, 8)
-	// x1 += x5, x13 = rotl32(x13 ^ x1, 8)
-	// x2 += x6, x14 = rotl32(x14 ^ x2, 8)
-	// x3 += x7, x15 = rotl32(x15 ^ x3, 8)
-	add		v0.4s, v0.4s, v4.4s
-	add		v1.4s, v1.4s, v5.4s
-	add		v2.4s, v2.4s, v6.4s
-	add		v3.4s, v3.4s, v7.4s
-
-	eor		v12.16b, v12.16b, v0.16b
-	eor		v13.16b, v13.16b, v1.16b
-	eor		v14.16b, v14.16b, v2.16b
-	eor		v15.16b, v15.16b, v3.16b
-
-	tbl		v12.16b, {v12.16b}, v31.16b
-	tbl		v13.16b, {v13.16b}, v31.16b
-	tbl		v14.16b, {v14.16b}, v31.16b
-	tbl		v15.16b, {v15.16b}, v31.16b
-
-	// x8 += x12, x4 = rotl32(x4 ^ x8, 7)
-	// x9 += x13, x5 = rotl32(x5 ^ x9, 7)
-	// x10 += x14, x6 = rotl32(x6 ^ x10, 7)
-	// x11 += x15, x7 = rotl32(x7 ^ x11, 7)
-	add		v8.4s, v8.4s, v12.4s
-	add		v9.4s, v9.4s, v13.4s
-	add		v10.4s, v10.4s, v14.4s
-	add		v11.4s, v11.4s, v15.4s
-
-	eor		v16.16b, v4.16b, v8.16b
-	eor		v17.16b, v5.16b, v9.16b
-	eor		v18.16b, v6.16b, v10.16b
-	eor		v19.16b, v7.16b, v11.16b
-
-	shl		v4.4s, v16.4s, #7
-	shl		v5.4s, v17.4s, #7
-	shl		v6.4s, v18.4s, #7
-	shl		v7.4s, v19.4s, #7
-
-	sri		v4.4s, v16.4s, #25
-	sri		v5.4s, v17.4s, #25
-	sri		v6.4s, v18.4s, #25
-	sri		v7.4s, v19.4s, #25
-
-	// x0 += x5, x15 = rotl32(x15 ^ x0, 16)
-	// x1 += x6, x12 = rotl32(x12 ^ x1, 16)
-	// x2 += x7, x13 = rotl32(x13 ^ x2, 16)
-	// x3 += x4, x14 = rotl32(x14 ^ x3, 16)
-	add		v0.4s, v0.4s, v5.4s
-	add		v1.4s, v1.4s, v6.4s
-	add		v2.4s, v2.4s, v7.4s
-	add		v3.4s, v3.4s, v4.4s
-
-	eor		v15.16b, v15.16b, v0.16b
-	eor		v12.16b, v12.16b, v1.16b
-	eor		v13.16b, v13.16b, v2.16b
-	eor		v14.16b, v14.16b, v3.16b
-
-	rev32		v15.8h, v15.8h
-	rev32		v12.8h, v12.8h
-	rev32		v13.8h, v13.8h
-	rev32		v14.8h, v14.8h
-
-	// x10 += x15, x5 = rotl32(x5 ^ x10, 12)
-	// x11 += x12, x6 = rotl32(x6 ^ x11, 12)
-	// x8 += x13, x7 = rotl32(x7 ^ x8, 12)
-	// x9 += x14, x4 = rotl32(x4 ^ x9, 12)
-	add		v10.4s, v10.4s, v15.4s
-	add		v11.4s, v11.4s, v12.4s
-	add		v8.4s, v8.4s, v13.4s
-	add		v9.4s, v9.4s, v14.4s
-
-	eor		v16.16b, v5.16b, v10.16b
-	eor		v17.16b, v6.16b, v11.16b
-	eor		v18.16b, v7.16b, v8.16b
-	eor		v19.16b, v4.16b, v9.16b
-
-	shl		v5.4s, v16.4s, #12
-	shl		v6.4s, v17.4s, #12
-	shl		v7.4s, v18.4s, #12
-	shl		v4.4s, v19.4s, #12
-
-	sri		v5.4s, v16.4s, #20
-	sri		v6.4s, v17.4s, #20
-	sri		v7.4s, v18.4s, #20
-	sri		v4.4s, v19.4s, #20
-
-	// x0 += x5, x15 = rotl32(x15 ^ x0, 8)
-	// x1 += x6, x12 = rotl32(x12 ^ x1, 8)
-	// x2 += x7, x13 = rotl32(x13 ^ x2, 8)
-	// x3 += x4, x14 = rotl32(x14 ^ x3, 8)
-	add		v0.4s, v0.4s, v5.4s
-	add		v1.4s, v1.4s, v6.4s
-	add		v2.4s, v2.4s, v7.4s
-	add		v3.4s, v3.4s, v4.4s
-
-	eor		v15.16b, v15.16b, v0.16b
-	eor		v12.16b, v12.16b, v1.16b
-	eor		v13.16b, v13.16b, v2.16b
-	eor		v14.16b, v14.16b, v3.16b
-
-	tbl		v15.16b, {v15.16b}, v31.16b
-	tbl		v12.16b, {v12.16b}, v31.16b
-	tbl		v13.16b, {v13.16b}, v31.16b
-	tbl		v14.16b, {v14.16b}, v31.16b
-
-	// x10 += x15, x5 = rotl32(x5 ^ x10, 7)
-	// x11 += x12, x6 = rotl32(x6 ^ x11, 7)
-	// x8 += x13, x7 = rotl32(x7 ^ x8, 7)
-	// x9 += x14, x4 = rotl32(x4 ^ x9, 7)
-	add		v10.4s, v10.4s, v15.4s
-	add		v11.4s, v11.4s, v12.4s
-	add		v8.4s, v8.4s, v13.4s
-	add		v9.4s, v9.4s, v14.4s
-
-	eor		v16.16b, v5.16b, v10.16b
-	eor		v17.16b, v6.16b, v11.16b
-	eor		v18.16b, v7.16b, v8.16b
-	eor		v19.16b, v4.16b, v9.16b
-
-	shl		v5.4s, v16.4s, #7
-	shl		v6.4s, v17.4s, #7
-	shl		v7.4s, v18.4s, #7
-	shl		v4.4s, v19.4s, #7
-
-	sri		v5.4s, v16.4s, #25
-	sri		v6.4s, v17.4s, #25
-	sri		v7.4s, v18.4s, #25
-	sri		v4.4s, v19.4s, #25
-
-	subs		x3, x3, #1
-	b.ne		.Ldoubleround4
-
-	ld4r		{v16.4s-v19.4s}, [x0], #16
-	ld4r		{v20.4s-v23.4s}, [x0], #16
-
-	// x12 += counter values 0-3
-	add		v12.4s, v12.4s, v30.4s
-
-	// x0[0-3] += s0[0]
-	// x1[0-3] += s0[1]
-	// x2[0-3] += s0[2]
-	// x3[0-3] += s0[3]
-	add		v0.4s, v0.4s, v16.4s
-	add		v1.4s, v1.4s, v17.4s
-	add		v2.4s, v2.4s, v18.4s
-	add		v3.4s, v3.4s, v19.4s
-
-	ld4r		{v24.4s-v27.4s}, [x0], #16
-	ld4r		{v28.4s-v31.4s}, [x0]
-
-	// x4[0-3] += s1[0]
-	// x5[0-3] += s1[1]
-	// x6[0-3] += s1[2]
-	// x7[0-3] += s1[3]
-	add		v4.4s, v4.4s, v20.4s
-	add		v5.4s, v5.4s, v21.4s
-	add		v6.4s, v6.4s, v22.4s
-	add		v7.4s, v7.4s, v23.4s
-
-	// x8[0-3] += s2[0]
-	// x9[0-3] += s2[1]
-	// x10[0-3] += s2[2]
-	// x11[0-3] += s2[3]
-	add		v8.4s, v8.4s, v24.4s
-	add		v9.4s, v9.4s, v25.4s
-	add		v10.4s, v10.4s, v26.4s
-	add		v11.4s, v11.4s, v27.4s
-
-	// x12[0-3] += s3[0]
-	// x13[0-3] += s3[1]
-	// x14[0-3] += s3[2]
-	// x15[0-3] += s3[3]
-	add		v12.4s, v12.4s, v28.4s
-	add		v13.4s, v13.4s, v29.4s
-	add		v14.4s, v14.4s, v30.4s
-	add		v15.4s, v15.4s, v31.4s
-
-	// interleave 32-bit words in state n, n+1
-	zip1		v16.4s, v0.4s, v1.4s
-	zip2		v17.4s, v0.4s, v1.4s
-	zip1		v18.4s, v2.4s, v3.4s
-	zip2		v19.4s, v2.4s, v3.4s
-	zip1		v20.4s, v4.4s, v5.4s
-	zip2		v21.4s, v4.4s, v5.4s
-	zip1		v22.4s, v6.4s, v7.4s
-	zip2		v23.4s, v6.4s, v7.4s
-	zip1		v24.4s, v8.4s, v9.4s
-	zip2		v25.4s, v8.4s, v9.4s
-	zip1		v26.4s, v10.4s, v11.4s
-	zip2		v27.4s, v10.4s, v11.4s
-	zip1		v28.4s, v12.4s, v13.4s
-	zip2		v29.4s, v12.4s, v13.4s
-	zip1		v30.4s, v14.4s, v15.4s
-	zip2		v31.4s, v14.4s, v15.4s
-
-	// interleave 64-bit words in state n, n+2
-	zip1		v0.2d, v16.2d, v18.2d
-	zip2		v4.2d, v16.2d, v18.2d
-	zip1		v8.2d, v17.2d, v19.2d
-	zip2		v12.2d, v17.2d, v19.2d
-	ld1		{v16.16b-v19.16b}, [x2], #64
-
-	zip1		v1.2d, v20.2d, v22.2d
-	zip2		v5.2d, v20.2d, v22.2d
-	zip1		v9.2d, v21.2d, v23.2d
-	zip2		v13.2d, v21.2d, v23.2d
-	ld1		{v20.16b-v23.16b}, [x2], #64
-
-	zip1		v2.2d, v24.2d, v26.2d
-	zip2		v6.2d, v24.2d, v26.2d
-	zip1		v10.2d, v25.2d, v27.2d
-	zip2		v14.2d, v25.2d, v27.2d
-	ld1		{v24.16b-v27.16b}, [x2], #64
-
-	zip1		v3.2d, v28.2d, v30.2d
-	zip2		v7.2d, v28.2d, v30.2d
-	zip1		v11.2d, v29.2d, v31.2d
-	zip2		v15.2d, v29.2d, v31.2d
-	ld1		{v28.16b-v31.16b}, [x2]
-
-	// xor with corresponding input, write to output
-	eor		v16.16b, v16.16b, v0.16b
-	eor		v17.16b, v17.16b, v1.16b
-	eor		v18.16b, v18.16b, v2.16b
-	eor		v19.16b, v19.16b, v3.16b
-	eor		v20.16b, v20.16b, v4.16b
-	eor		v21.16b, v21.16b, v5.16b
-	st1		{v16.16b-v19.16b}, [x1], #64
-	eor		v22.16b, v22.16b, v6.16b
-	eor		v23.16b, v23.16b, v7.16b
-	eor		v24.16b, v24.16b, v8.16b
-	eor		v25.16b, v25.16b, v9.16b
-	st1		{v20.16b-v23.16b}, [x1], #64
-	eor		v26.16b, v26.16b, v10.16b
-	eor		v27.16b, v27.16b, v11.16b
-	eor		v28.16b, v28.16b, v12.16b
-	st1		{v24.16b-v27.16b}, [x1], #64
-	eor		v29.16b, v29.16b, v13.16b
-	eor		v30.16b, v30.16b, v14.16b
-	eor		v31.16b, v31.16b, v15.16b
-	st1		{v28.16b-v31.16b}, [x1]
-
-	ret
-ENDPROC(chacha20_4block_xor_neon)
-
-CTRINC:	.word		0, 1, 2, 3
-ROT8:	.word		0x02010003, 0x06050407, 0x0a09080b, 0x0e0d0c0f
diff --git a/arch/arm64/crypto/chacha20-neon-glue.c b/arch/arm64/crypto/chacha20-neon-glue.c
deleted file mode 100644
index 727579c93ded..000000000000
--- a/arch/arm64/crypto/chacha20-neon-glue.c
+++ /dev/null
@@ -1,133 +0,0 @@
-/*
- * ChaCha20 256-bit cipher algorithm, RFC7539, arm64 NEON functions
- *
- * Copyright (C) 2016 - 2017 Linaro, Ltd. <ard.biesheuvel@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Based on:
- * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code
- *
- * Copyright (C) 2015 Martin Willi
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#include <crypto/algapi.h>
-#include <crypto/chacha20.h>
-#include <crypto/internal/skcipher.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-
-#include <asm/hwcap.h>
-#include <asm/neon.h>
-#include <asm/simd.h>
-
-asmlinkage void chacha20_block_xor_neon(u32 *state, u8 *dst, const u8 *src);
-asmlinkage void chacha20_4block_xor_neon(u32 *state, u8 *dst, const u8 *src);
-
-static void chacha20_doneon(u32 *state, u8 *dst, const u8 *src,
-			    unsigned int bytes)
-{
-	u8 buf[CHACHA20_BLOCK_SIZE];
-
-	while (bytes >= CHACHA20_BLOCK_SIZE * 4) {
-		kernel_neon_begin();
-		chacha20_4block_xor_neon(state, dst, src);
-		kernel_neon_end();
-		bytes -= CHACHA20_BLOCK_SIZE * 4;
-		src += CHACHA20_BLOCK_SIZE * 4;
-		dst += CHACHA20_BLOCK_SIZE * 4;
-		state[12] += 4;
-	}
-
-	if (!bytes)
-		return;
-
-	kernel_neon_begin();
-	while (bytes >= CHACHA20_BLOCK_SIZE) {
-		chacha20_block_xor_neon(state, dst, src);
-		bytes -= CHACHA20_BLOCK_SIZE;
-		src += CHACHA20_BLOCK_SIZE;
-		dst += CHACHA20_BLOCK_SIZE;
-		state[12]++;
-	}
-	if (bytes) {
-		memcpy(buf, src, bytes);
-		chacha20_block_xor_neon(state, buf, buf);
-		memcpy(dst, buf, bytes);
-	}
-	kernel_neon_end();
-}
-
-static int chacha20_neon(struct skcipher_request *req)
-{
-	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
-	struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm);
-	struct skcipher_walk walk;
-	u32 state[16];
-	int err;
-
-	if (!may_use_simd() || req->cryptlen <= CHACHA20_BLOCK_SIZE)
-		return crypto_chacha20_crypt(req);
-
-	err = skcipher_walk_virt(&walk, req, false);
-
-	crypto_chacha20_init(state, ctx, walk.iv);
-
-	while (walk.nbytes > 0) {
-		unsigned int nbytes = walk.nbytes;
-
-		if (nbytes < walk.total)
-			nbytes = round_down(nbytes, walk.stride);
-
-		chacha20_doneon(state, walk.dst.virt.addr, walk.src.virt.addr,
-				nbytes);
-		err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
-	}
-
-	return err;
-}
-
-static struct skcipher_alg alg = {
-	.base.cra_name		= "chacha20",
-	.base.cra_driver_name	= "chacha20-neon",
-	.base.cra_priority	= 300,
-	.base.cra_blocksize	= 1,
-	.base.cra_ctxsize	= sizeof(struct chacha20_ctx),
-	.base.cra_module	= THIS_MODULE,
-
-	.min_keysize		= CHACHA20_KEY_SIZE,
-	.max_keysize		= CHACHA20_KEY_SIZE,
-	.ivsize			= CHACHA20_IV_SIZE,
-	.chunksize		= CHACHA20_BLOCK_SIZE,
-	.walksize		= 4 * CHACHA20_BLOCK_SIZE,
-	.setkey			= crypto_chacha20_setkey,
-	.encrypt		= chacha20_neon,
-	.decrypt		= chacha20_neon,
-};
-
-static int __init chacha20_simd_mod_init(void)
-{
-	if (!(elf_hwcap & HWCAP_ASIMD))
-		return -ENODEV;
-
-	return crypto_register_skcipher(&alg);
-}
-
-static void __exit chacha20_simd_mod_fini(void)
-{
-	crypto_unregister_skcipher(&alg);
-}
-
-module_init(chacha20_simd_mod_init);
-module_exit(chacha20_simd_mod_fini);
-
-MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
-MODULE_LICENSE("GPL v2");
-MODULE_ALIAS_CRYPTO("chacha20");
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index cf830219846b..419212c31246 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -23,7 +23,6 @@ obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o
 obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o
 obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o
 obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o
-obj-$(CONFIG_CRYPTO_CHACHA20_X86_64) += chacha20-x86_64.o
 obj-$(CONFIG_CRYPTO_SERPENT_SSE2_X86_64) += serpent-sse2-x86_64.o
 obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o
 obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o
@@ -76,7 +75,6 @@ camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o
 blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o
 twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o
 twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o
-chacha20-x86_64-y := chacha20-ssse3-x86_64.o chacha20_glue.o
 serpent-sse2-x86_64-y := serpent-sse2-x86_64-asm_64.o serpent_sse2_glue.o
 
 aegis128-aesni-y := aegis128-aesni-asm.o aegis128-aesni-glue.o
@@ -99,7 +97,6 @@ endif
 
 ifeq ($(avx2_supported),yes)
 	camellia-aesni-avx2-y := camellia-aesni-avx2-asm_64.o camellia_aesni_avx2_glue.o
-	chacha20-x86_64-y += chacha20-avx2-x86_64.o
 	serpent-avx2-y := serpent-avx2-asm_64.o serpent_avx2_glue.o
 
 	morus1280-avx2-y := morus1280-avx2-asm.o morus1280-avx2-glue.o
diff --git a/arch/x86/crypto/chacha20-avx2-x86_64.S b/arch/x86/crypto/chacha20-avx2-x86_64.S
deleted file mode 100644
index f3cd26f48332..000000000000
--- a/arch/x86/crypto/chacha20-avx2-x86_64.S
+++ /dev/null
@@ -1,448 +0,0 @@
-/*
- * ChaCha20 256-bit cipher algorithm, RFC7539, x64 AVX2 functions
- *
- * Copyright (C) 2015 Martin Willi
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#include <linux/linkage.h>
-
-.section	.rodata.cst32.ROT8, "aM", @progbits, 32
-.align 32
-ROT8:	.octa 0x0e0d0c0f0a09080b0605040702010003
-	.octa 0x0e0d0c0f0a09080b0605040702010003
-
-.section	.rodata.cst32.ROT16, "aM", @progbits, 32
-.align 32
-ROT16:	.octa 0x0d0c0f0e09080b0a0504070601000302
-	.octa 0x0d0c0f0e09080b0a0504070601000302
-
-.section	.rodata.cst32.CTRINC, "aM", @progbits, 32
-.align 32
-CTRINC:	.octa 0x00000003000000020000000100000000
-	.octa 0x00000007000000060000000500000004
-
-.text
-
-ENTRY(chacha20_8block_xor_avx2)
-	# %rdi: Input state matrix, s
-	# %rsi: 8 data blocks output, o
-	# %rdx: 8 data blocks input, i
-
-	# This function encrypts eight consecutive ChaCha20 blocks by loading
-	# the state matrix in AVX registers eight times. As we need some
-	# scratch registers, we save the first four registers on the stack. The
-	# algorithm performs each operation on the corresponding word of each
-	# state matrix, hence requires no word shuffling. For final XORing step
-	# we transpose the matrix by interleaving 32-, 64- and then 128-bit
-	# words, which allows us to do XOR in AVX registers. 8/16-bit word
-	# rotation is done with the slightly better performing byte shuffling,
-	# 7/12-bit word rotation uses traditional shift+OR.
-
-	vzeroupper
-	# 4 * 32 byte stack, 32-byte aligned
-	lea		8(%rsp),%r10
-	and		$~31, %rsp
-	sub		$0x80, %rsp
-
-	# x0..15[0-7] = s[0..15]
-	vpbroadcastd	0x00(%rdi),%ymm0
-	vpbroadcastd	0x04(%rdi),%ymm1
-	vpbroadcastd	0x08(%rdi),%ymm2
-	vpbroadcastd	0x0c(%rdi),%ymm3
-	vpbroadcastd	0x10(%rdi),%ymm4
-	vpbroadcastd	0x14(%rdi),%ymm5
-	vpbroadcastd	0x18(%rdi),%ymm6
-	vpbroadcastd	0x1c(%rdi),%ymm7
-	vpbroadcastd	0x20(%rdi),%ymm8
-	vpbroadcastd	0x24(%rdi),%ymm9
-	vpbroadcastd	0x28(%rdi),%ymm10
-	vpbroadcastd	0x2c(%rdi),%ymm11
-	vpbroadcastd	0x30(%rdi),%ymm12
-	vpbroadcastd	0x34(%rdi),%ymm13
-	vpbroadcastd	0x38(%rdi),%ymm14
-	vpbroadcastd	0x3c(%rdi),%ymm15
-	# x0..3 on stack
-	vmovdqa		%ymm0,0x00(%rsp)
-	vmovdqa		%ymm1,0x20(%rsp)
-	vmovdqa		%ymm2,0x40(%rsp)
-	vmovdqa		%ymm3,0x60(%rsp)
-
-	vmovdqa		CTRINC(%rip),%ymm1
-	vmovdqa		ROT8(%rip),%ymm2
-	vmovdqa		ROT16(%rip),%ymm3
-
-	# x12 += counter values 0-3
-	vpaddd		%ymm1,%ymm12,%ymm12
-
-	mov		$10,%ecx
-
-.Ldoubleround8:
-	# x0 += x4, x12 = rotl32(x12 ^ x0, 16)
-	vpaddd		0x00(%rsp),%ymm4,%ymm0
-	vmovdqa		%ymm0,0x00(%rsp)
-	vpxor		%ymm0,%ymm12,%ymm12
-	vpshufb		%ymm3,%ymm12,%ymm12
-	# x1 += x5, x13 = rotl32(x13 ^ x1, 16)
-	vpaddd		0x20(%rsp),%ymm5,%ymm0
-	vmovdqa		%ymm0,0x20(%rsp)
-	vpxor		%ymm0,%ymm13,%ymm13
-	vpshufb		%ymm3,%ymm13,%ymm13
-	# x2 += x6, x14 = rotl32(x14 ^ x2, 16)
-	vpaddd		0x40(%rsp),%ymm6,%ymm0
-	vmovdqa		%ymm0,0x40(%rsp)
-	vpxor		%ymm0,%ymm14,%ymm14
-	vpshufb		%ymm3,%ymm14,%ymm14
-	# x3 += x7, x15 = rotl32(x15 ^ x3, 16)
-	vpaddd		0x60(%rsp),%ymm7,%ymm0
-	vmovdqa		%ymm0,0x60(%rsp)
-	vpxor		%ymm0,%ymm15,%ymm15
-	vpshufb		%ymm3,%ymm15,%ymm15
-
-	# x8 += x12, x4 = rotl32(x4 ^ x8, 12)
-	vpaddd		%ymm12,%ymm8,%ymm8
-	vpxor		%ymm8,%ymm4,%ymm4
-	vpslld		$12,%ymm4,%ymm0
-	vpsrld		$20,%ymm4,%ymm4
-	vpor		%ymm0,%ymm4,%ymm4
-	# x9 += x13, x5 = rotl32(x5 ^ x9, 12)
-	vpaddd		%ymm13,%ymm9,%ymm9
-	vpxor		%ymm9,%ymm5,%ymm5
-	vpslld		$12,%ymm5,%ymm0
-	vpsrld		$20,%ymm5,%ymm5
-	vpor		%ymm0,%ymm5,%ymm5
-	# x10 += x14, x6 = rotl32(x6 ^ x10, 12)
-	vpaddd		%ymm14,%ymm10,%ymm10
-	vpxor		%ymm10,%ymm6,%ymm6
-	vpslld		$12,%ymm6,%ymm0
-	vpsrld		$20,%ymm6,%ymm6
-	vpor		%ymm0,%ymm6,%ymm6
-	# x11 += x15, x7 = rotl32(x7 ^ x11, 12)
-	vpaddd		%ymm15,%ymm11,%ymm11
-	vpxor		%ymm11,%ymm7,%ymm7
-	vpslld		$12,%ymm7,%ymm0
-	vpsrld		$20,%ymm7,%ymm7
-	vpor		%ymm0,%ymm7,%ymm7
-
-	# x0 += x4, x12 = rotl32(x12 ^ x0, 8)
-	vpaddd		0x00(%rsp),%ymm4,%ymm0
-	vmovdqa		%ymm0,0x00(%rsp)
-	vpxor		%ymm0,%ymm12,%ymm12
-	vpshufb		%ymm2,%ymm12,%ymm12
-	# x1 += x5, x13 = rotl32(x13 ^ x1, 8)
-	vpaddd		0x20(%rsp),%ymm5,%ymm0
-	vmovdqa		%ymm0,0x20(%rsp)
-	vpxor		%ymm0,%ymm13,%ymm13
-	vpshufb		%ymm2,%ymm13,%ymm13
-	# x2 += x6, x14 = rotl32(x14 ^ x2, 8)
-	vpaddd		0x40(%rsp),%ymm6,%ymm0
-	vmovdqa		%ymm0,0x40(%rsp)
-	vpxor		%ymm0,%ymm14,%ymm14
-	vpshufb		%ymm2,%ymm14,%ymm14
-	# x3 += x7, x15 = rotl32(x15 ^ x3, 8)
-	vpaddd		0x60(%rsp),%ymm7,%ymm0
-	vmovdqa		%ymm0,0x60(%rsp)
-	vpxor		%ymm0,%ymm15,%ymm15
-	vpshufb		%ymm2,%ymm15,%ymm15
-
-	# x8 += x12, x4 = rotl32(x4 ^ x8, 7)
-	vpaddd		%ymm12,%ymm8,%ymm8
-	vpxor		%ymm8,%ymm4,%ymm4
-	vpslld		$7,%ymm4,%ymm0
-	vpsrld		$25,%ymm4,%ymm4
-	vpor		%ymm0,%ymm4,%ymm4
-	# x9 += x13, x5 = rotl32(x5 ^ x9, 7)
-	vpaddd		%ymm13,%ymm9,%ymm9
-	vpxor		%ymm9,%ymm5,%ymm5
-	vpslld		$7,%ymm5,%ymm0
-	vpsrld		$25,%ymm5,%ymm5
-	vpor		%ymm0,%ymm5,%ymm5
-	# x10 += x14, x6 = rotl32(x6 ^ x10, 7)
-	vpaddd		%ymm14,%ymm10,%ymm10
-	vpxor		%ymm10,%ymm6,%ymm6
-	vpslld		$7,%ymm6,%ymm0
-	vpsrld		$25,%ymm6,%ymm6
-	vpor		%ymm0,%ymm6,%ymm6
-	# x11 += x15, x7 = rotl32(x7 ^ x11, 7)
-	vpaddd		%ymm15,%ymm11,%ymm11
-	vpxor		%ymm11,%ymm7,%ymm7
-	vpslld		$7,%ymm7,%ymm0
-	vpsrld		$25,%ymm7,%ymm7
-	vpor		%ymm0,%ymm7,%ymm7
-
-	# x0 += x5, x15 = rotl32(x15 ^ x0, 16)
-	vpaddd		0x00(%rsp),%ymm5,%ymm0
-	vmovdqa		%ymm0,0x00(%rsp)
-	vpxor		%ymm0,%ymm15,%ymm15
-	vpshufb		%ymm3,%ymm15,%ymm15
-	# x1 += x6, x12 = rotl32(x12 ^ x1, 16)%ymm0
-	vpaddd		0x20(%rsp),%ymm6,%ymm0
-	vmovdqa		%ymm0,0x20(%rsp)
-	vpxor		%ymm0,%ymm12,%ymm12
-	vpshufb		%ymm3,%ymm12,%ymm12
-	# x2 += x7, x13 = rotl32(x13 ^ x2, 16)
-	vpaddd		0x40(%rsp),%ymm7,%ymm0
-	vmovdqa		%ymm0,0x40(%rsp)
-	vpxor		%ymm0,%ymm13,%ymm13
-	vpshufb		%ymm3,%ymm13,%ymm13
-	# x3 += x4, x14 = rotl32(x14 ^ x3, 16)
-	vpaddd		0x60(%rsp),%ymm4,%ymm0
-	vmovdqa		%ymm0,0x60(%rsp)
-	vpxor		%ymm0,%ymm14,%ymm14
-	vpshufb		%ymm3,%ymm14,%ymm14
-
-	# x10 += x15, x5 = rotl32(x5 ^ x10, 12)
-	vpaddd		%ymm15,%ymm10,%ymm10
-	vpxor		%ymm10,%ymm5,%ymm5
-	vpslld		$12,%ymm5,%ymm0
-	vpsrld		$20,%ymm5,%ymm5
-	vpor		%ymm0,%ymm5,%ymm5
-	# x11 += x12, x6 = rotl32(x6 ^ x11, 12)
-	vpaddd		%ymm12,%ymm11,%ymm11
-	vpxor		%ymm11,%ymm6,%ymm6
-	vpslld		$12,%ymm6,%ymm0
-	vpsrld		$20,%ymm6,%ymm6
-	vpor		%ymm0,%ymm6,%ymm6
-	# x8 += x13, x7 = rotl32(x7 ^ x8, 12)
-	vpaddd		%ymm13,%ymm8,%ymm8
-	vpxor		%ymm8,%ymm7,%ymm7
-	vpslld		$12,%ymm7,%ymm0
-	vpsrld		$20,%ymm7,%ymm7
-	vpor		%ymm0,%ymm7,%ymm7
-	# x9 += x14, x4 = rotl32(x4 ^ x9, 12)
-	vpaddd		%ymm14,%ymm9,%ymm9
-	vpxor		%ymm9,%ymm4,%ymm4
-	vpslld		$12,%ymm4,%ymm0
-	vpsrld		$20,%ymm4,%ymm4
-	vpor		%ymm0,%ymm4,%ymm4
-
-	# x0 += x5, x15 = rotl32(x15 ^ x0, 8)
-	vpaddd		0x00(%rsp),%ymm5,%ymm0
-	vmovdqa		%ymm0,0x00(%rsp)
-	vpxor		%ymm0,%ymm15,%ymm15
-	vpshufb		%ymm2,%ymm15,%ymm15
-	# x1 += x6, x12 = rotl32(x12 ^ x1, 8)
-	vpaddd		0x20(%rsp),%ymm6,%ymm0
-	vmovdqa		%ymm0,0x20(%rsp)
-	vpxor		%ymm0,%ymm12,%ymm12
-	vpshufb		%ymm2,%ymm12,%ymm12
-	# x2 += x7, x13 = rotl32(x13 ^ x2, 8)
-	vpaddd		0x40(%rsp),%ymm7,%ymm0
-	vmovdqa		%ymm0,0x40(%rsp)
-	vpxor		%ymm0,%ymm13,%ymm13
-	vpshufb		%ymm2,%ymm13,%ymm13
-	# x3 += x4, x14 = rotl32(x14 ^ x3, 8)
-	vpaddd		0x60(%rsp),%ymm4,%ymm0
-	vmovdqa		%ymm0,0x60(%rsp)
-	vpxor		%ymm0,%ymm14,%ymm14
-	vpshufb		%ymm2,%ymm14,%ymm14
-
-	# x10 += x15, x5 = rotl32(x5 ^ x10, 7)
-	vpaddd		%ymm15,%ymm10,%ymm10
-	vpxor		%ymm10,%ymm5,%ymm5
-	vpslld		$7,%ymm5,%ymm0
-	vpsrld		$25,%ymm5,%ymm5
-	vpor		%ymm0,%ymm5,%ymm5
-	# x11 += x12, x6 = rotl32(x6 ^ x11, 7)
-	vpaddd		%ymm12,%ymm11,%ymm11
-	vpxor		%ymm11,%ymm6,%ymm6
-	vpslld		$7,%ymm6,%ymm0
-	vpsrld		$25,%ymm6,%ymm6
-	vpor		%ymm0,%ymm6,%ymm6
-	# x8 += x13, x7 = rotl32(x7 ^ x8, 7)
-	vpaddd		%ymm13,%ymm8,%ymm8
-	vpxor		%ymm8,%ymm7,%ymm7
-	vpslld		$7,%ymm7,%ymm0
-	vpsrld		$25,%ymm7,%ymm7
-	vpor		%ymm0,%ymm7,%ymm7
-	# x9 += x14, x4 = rotl32(x4 ^ x9, 7)
-	vpaddd		%ymm14,%ymm9,%ymm9
-	vpxor		%ymm9,%ymm4,%ymm4
-	vpslld		$7,%ymm4,%ymm0
-	vpsrld		$25,%ymm4,%ymm4
-	vpor		%ymm0,%ymm4,%ymm4
-
-	dec		%ecx
-	jnz		.Ldoubleround8
-
-	# x0..15[0-3] += s[0..15]
-	vpbroadcastd	0x00(%rdi),%ymm0
-	vpaddd		0x00(%rsp),%ymm0,%ymm0
-	vmovdqa		%ymm0,0x00(%rsp)
-	vpbroadcastd	0x04(%rdi),%ymm0
-	vpaddd		0x20(%rsp),%ymm0,%ymm0
-	vmovdqa		%ymm0,0x20(%rsp)
-	vpbroadcastd	0x08(%rdi),%ymm0
-	vpaddd		0x40(%rsp),%ymm0,%ymm0
-	vmovdqa		%ymm0,0x40(%rsp)
-	vpbroadcastd	0x0c(%rdi),%ymm0
-	vpaddd		0x60(%rsp),%ymm0,%ymm0
-	vmovdqa		%ymm0,0x60(%rsp)
-	vpbroadcastd	0x10(%rdi),%ymm0
-	vpaddd		%ymm0,%ymm4,%ymm4
-	vpbroadcastd	0x14(%rdi),%ymm0
-	vpaddd		%ymm0,%ymm5,%ymm5
-	vpbroadcastd	0x18(%rdi),%ymm0
-	vpaddd		%ymm0,%ymm6,%ymm6
-	vpbroadcastd	0x1c(%rdi),%ymm0
-	vpaddd		%ymm0,%ymm7,%ymm7
-	vpbroadcastd	0x20(%rdi),%ymm0
-	vpaddd		%ymm0,%ymm8,%ymm8
-	vpbroadcastd	0x24(%rdi),%ymm0
-	vpaddd		%ymm0,%ymm9,%ymm9
-	vpbroadcastd	0x28(%rdi),%ymm0
-	vpaddd		%ymm0,%ymm10,%ymm10
-	vpbroadcastd	0x2c(%rdi),%ymm0
-	vpaddd		%ymm0,%ymm11,%ymm11
-	vpbroadcastd	0x30(%rdi),%ymm0
-	vpaddd		%ymm0,%ymm12,%ymm12
-	vpbroadcastd	0x34(%rdi),%ymm0
-	vpaddd		%ymm0,%ymm13,%ymm13
-	vpbroadcastd	0x38(%rdi),%ymm0
-	vpaddd		%ymm0,%ymm14,%ymm14
-	vpbroadcastd	0x3c(%rdi),%ymm0
-	vpaddd		%ymm0,%ymm15,%ymm15
-
-	# x12 += counter values 0-3
-	vpaddd		%ymm1,%ymm12,%ymm12
-
-	# interleave 32-bit words in state n, n+1
-	vmovdqa		0x00(%rsp),%ymm0
-	vmovdqa		0x20(%rsp),%ymm1
-	vpunpckldq	%ymm1,%ymm0,%ymm2
-	vpunpckhdq	%ymm1,%ymm0,%ymm1
-	vmovdqa		%ymm2,0x00(%rsp)
-	vmovdqa		%ymm1,0x20(%rsp)
-	vmovdqa		0x40(%rsp),%ymm0
-	vmovdqa		0x60(%rsp),%ymm1
-	vpunpckldq	%ymm1,%ymm0,%ymm2
-	vpunpckhdq	%ymm1,%ymm0,%ymm1
-	vmovdqa		%ymm2,0x40(%rsp)
-	vmovdqa		%ymm1,0x60(%rsp)
-	vmovdqa		%ymm4,%ymm0
-	vpunpckldq	%ymm5,%ymm0,%ymm4
-	vpunpckhdq	%ymm5,%ymm0,%ymm5
-	vmovdqa		%ymm6,%ymm0
-	vpunpckldq	%ymm7,%ymm0,%ymm6
-	vpunpckhdq	%ymm7,%ymm0,%ymm7
-	vmovdqa		%ymm8,%ymm0
-	vpunpckldq	%ymm9,%ymm0,%ymm8
-	vpunpckhdq	%ymm9,%ymm0,%ymm9
-	vmovdqa		%ymm10,%ymm0
-	vpunpckldq	%ymm11,%ymm0,%ymm10
-	vpunpckhdq	%ymm11,%ymm0,%ymm11
-	vmovdqa		%ymm12,%ymm0
-	vpunpckldq	%ymm13,%ymm0,%ymm12
-	vpunpckhdq	%ymm13,%ymm0,%ymm13
-	vmovdqa		%ymm14,%ymm0
-	vpunpckldq	%ymm15,%ymm0,%ymm14
-	vpunpckhdq	%ymm15,%ymm0,%ymm15
-
-	# interleave 64-bit words in state n, n+2
-	vmovdqa		0x00(%rsp),%ymm0
-	vmovdqa		0x40(%rsp),%ymm2
-	vpunpcklqdq	%ymm2,%ymm0,%ymm1
-	vpunpckhqdq	%ymm2,%ymm0,%ymm2
-	vmovdqa		%ymm1,0x00(%rsp)
-	vmovdqa		%ymm2,0x40(%rsp)
-	vmovdqa		0x20(%rsp),%ymm0
-	vmovdqa		0x60(%rsp),%ymm2
-	vpunpcklqdq	%ymm2,%ymm0,%ymm1
-	vpunpckhqdq	%ymm2,%ymm0,%ymm2
-	vmovdqa		%ymm1,0x20(%rsp)
-	vmovdqa		%ymm2,0x60(%rsp)
-	vmovdqa		%ymm4,%ymm0
-	vpunpcklqdq	%ymm6,%ymm0,%ymm4
-	vpunpckhqdq	%ymm6,%ymm0,%ymm6
-	vmovdqa		%ymm5,%ymm0
-	vpunpcklqdq	%ymm7,%ymm0,%ymm5
-	vpunpckhqdq	%ymm7,%ymm0,%ymm7
-	vmovdqa		%ymm8,%ymm0
-	vpunpcklqdq	%ymm10,%ymm0,%ymm8
-	vpunpckhqdq	%ymm10,%ymm0,%ymm10
-	vmovdqa		%ymm9,%ymm0
-	vpunpcklqdq	%ymm11,%ymm0,%ymm9
-	vpunpckhqdq	%ymm11,%ymm0,%ymm11
-	vmovdqa		%ymm12,%ymm0
-	vpunpcklqdq	%ymm14,%ymm0,%ymm12
-	vpunpckhqdq	%ymm14,%ymm0,%ymm14
-	vmovdqa		%ymm13,%ymm0
-	vpunpcklqdq	%ymm15,%ymm0,%ymm13
-	vpunpckhqdq	%ymm15,%ymm0,%ymm15
-
-	# interleave 128-bit words in state n, n+4
-	vmovdqa		0x00(%rsp),%ymm0
-	vperm2i128	$0x20,%ymm4,%ymm0,%ymm1
-	vperm2i128	$0x31,%ymm4,%ymm0,%ymm4
-	vmovdqa		%ymm1,0x00(%rsp)
-	vmovdqa		0x20(%rsp),%ymm0
-	vperm2i128	$0x20,%ymm5,%ymm0,%ymm1
-	vperm2i128	$0x31,%ymm5,%ymm0,%ymm5
-	vmovdqa		%ymm1,0x20(%rsp)
-	vmovdqa		0x40(%rsp),%ymm0
-	vperm2i128	$0x20,%ymm6,%ymm0,%ymm1
-	vperm2i128	$0x31,%ymm6,%ymm0,%ymm6
-	vmovdqa		%ymm1,0x40(%rsp)
-	vmovdqa		0x60(%rsp),%ymm0
-	vperm2i128	$0x20,%ymm7,%ymm0,%ymm1
-	vperm2i128	$0x31,%ymm7,%ymm0,%ymm7
-	vmovdqa		%ymm1,0x60(%rsp)
-	vperm2i128	$0x20,%ymm12,%ymm8,%ymm0
-	vperm2i128	$0x31,%ymm12,%ymm8,%ymm12
-	vmovdqa		%ymm0,%ymm8
-	vperm2i128	$0x20,%ymm13,%ymm9,%ymm0
-	vperm2i128	$0x31,%ymm13,%ymm9,%ymm13
-	vmovdqa		%ymm0,%ymm9
-	vperm2i128	$0x20,%ymm14,%ymm10,%ymm0
-	vperm2i128	$0x31,%ymm14,%ymm10,%ymm14
-	vmovdqa		%ymm0,%ymm10
-	vperm2i128	$0x20,%ymm15,%ymm11,%ymm0
-	vperm2i128	$0x31,%ymm15,%ymm11,%ymm15
-	vmovdqa		%ymm0,%ymm11
-
-	# xor with corresponding input, write to output
-	vmovdqa		0x00(%rsp),%ymm0
-	vpxor		0x0000(%rdx),%ymm0,%ymm0
-	vmovdqu		%ymm0,0x0000(%rsi)
-	vmovdqa		0x20(%rsp),%ymm0
-	vpxor		0x0080(%rdx),%ymm0,%ymm0
-	vmovdqu		%ymm0,0x0080(%rsi)
-	vmovdqa		0x40(%rsp),%ymm0
-	vpxor		0x0040(%rdx),%ymm0,%ymm0
-	vmovdqu		%ymm0,0x0040(%rsi)
-	vmovdqa		0x60(%rsp),%ymm0
-	vpxor		0x00c0(%rdx),%ymm0,%ymm0
-	vmovdqu		%ymm0,0x00c0(%rsi)
-	vpxor		0x0100(%rdx),%ymm4,%ymm4
-	vmovdqu		%ymm4,0x0100(%rsi)
-	vpxor		0x0180(%rdx),%ymm5,%ymm5
-	vmovdqu		%ymm5,0x00180(%rsi)
-	vpxor		0x0140(%rdx),%ymm6,%ymm6
-	vmovdqu		%ymm6,0x0140(%rsi)
-	vpxor		0x01c0(%rdx),%ymm7,%ymm7
-	vmovdqu		%ymm7,0x01c0(%rsi)
-	vpxor		0x0020(%rdx),%ymm8,%ymm8
-	vmovdqu		%ymm8,0x0020(%rsi)
-	vpxor		0x00a0(%rdx),%ymm9,%ymm9
-	vmovdqu		%ymm9,0x00a0(%rsi)
-	vpxor		0x0060(%rdx),%ymm10,%ymm10
-	vmovdqu		%ymm10,0x0060(%rsi)
-	vpxor		0x00e0(%rdx),%ymm11,%ymm11
-	vmovdqu		%ymm11,0x00e0(%rsi)
-	vpxor		0x0120(%rdx),%ymm12,%ymm12
-	vmovdqu		%ymm12,0x0120(%rsi)
-	vpxor		0x01a0(%rdx),%ymm13,%ymm13
-	vmovdqu		%ymm13,0x01a0(%rsi)
-	vpxor		0x0160(%rdx),%ymm14,%ymm14
-	vmovdqu		%ymm14,0x0160(%rsi)
-	vpxor		0x01e0(%rdx),%ymm15,%ymm15
-	vmovdqu		%ymm15,0x01e0(%rsi)
-
-	vzeroupper
-	lea		-8(%r10),%rsp
-	ret
-ENDPROC(chacha20_8block_xor_avx2)
diff --git a/arch/x86/crypto/chacha20-ssse3-x86_64.S b/arch/x86/crypto/chacha20-ssse3-x86_64.S
deleted file mode 100644
index 512a2b500fd1..000000000000
--- a/arch/x86/crypto/chacha20-ssse3-x86_64.S
+++ /dev/null
@@ -1,630 +0,0 @@
-/*
- * ChaCha20 256-bit cipher algorithm, RFC7539, x64 SSSE3 functions
- *
- * Copyright (C) 2015 Martin Willi
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#include <linux/linkage.h>
-
-.section	.rodata.cst16.ROT8, "aM", @progbits, 16
-.align 16
-ROT8:	.octa 0x0e0d0c0f0a09080b0605040702010003
-.section	.rodata.cst16.ROT16, "aM", @progbits, 16
-.align 16
-ROT16:	.octa 0x0d0c0f0e09080b0a0504070601000302
-.section	.rodata.cst16.CTRINC, "aM", @progbits, 16
-.align 16
-CTRINC:	.octa 0x00000003000000020000000100000000
-
-.text
-
-ENTRY(chacha20_block_xor_ssse3)
-	# %rdi: Input state matrix, s
-	# %rsi: 1 data block output, o
-	# %rdx: 1 data block input, i
-
-	# This function encrypts one ChaCha20 block by loading the state matrix
-	# in four SSE registers. It performs matrix operation on four words in
-	# parallel, but requireds shuffling to rearrange the words after each
-	# round. 8/16-bit word rotation is done with the slightly better
-	# performing SSSE3 byte shuffling, 7/12-bit word rotation uses
-	# traditional shift+OR.
-
-	# x0..3 = s0..3
-	movdqa		0x00(%rdi),%xmm0
-	movdqa		0x10(%rdi),%xmm1
-	movdqa		0x20(%rdi),%xmm2
-	movdqa		0x30(%rdi),%xmm3
-	movdqa		%xmm0,%xmm8
-	movdqa		%xmm1,%xmm9
-	movdqa		%xmm2,%xmm10
-	movdqa		%xmm3,%xmm11
-
-	movdqa		ROT8(%rip),%xmm4
-	movdqa		ROT16(%rip),%xmm5
-
-	mov	$10,%ecx
-
-.Ldoubleround:
-
-	# x0 += x1, x3 = rotl32(x3 ^ x0, 16)
-	paddd		%xmm1,%xmm0
-	pxor		%xmm0,%xmm3
-	pshufb		%xmm5,%xmm3
-
-	# x2 += x3, x1 = rotl32(x1 ^ x2, 12)
-	paddd		%xmm3,%xmm2
-	pxor		%xmm2,%xmm1
-	movdqa		%xmm1,%xmm6
-	pslld		$12,%xmm6
-	psrld		$20,%xmm1
-	por		%xmm6,%xmm1
-
-	# x0 += x1, x3 = rotl32(x3 ^ x0, 8)
-	paddd		%xmm1,%xmm0
-	pxor		%xmm0,%xmm3
-	pshufb		%xmm4,%xmm3
-
-	# x2 += x3, x1 = rotl32(x1 ^ x2, 7)
-	paddd		%xmm3,%xmm2
-	pxor		%xmm2,%xmm1
-	movdqa		%xmm1,%xmm7
-	pslld		$7,%xmm7
-	psrld		$25,%xmm1
-	por		%xmm7,%xmm1
-
-	# x1 = shuffle32(x1, MASK(0, 3, 2, 1))
-	pshufd		$0x39,%xmm1,%xmm1
-	# x2 = shuffle32(x2, MASK(1, 0, 3, 2))
-	pshufd		$0x4e,%xmm2,%xmm2
-	# x3 = shuffle32(x3, MASK(2, 1, 0, 3))
-	pshufd		$0x93,%xmm3,%xmm3
-
-	# x0 += x1, x3 = rotl32(x3 ^ x0, 16)
-	paddd		%xmm1,%xmm0
-	pxor		%xmm0,%xmm3
-	pshufb		%xmm5,%xmm3
-
-	# x2 += x3, x1 = rotl32(x1 ^ x2, 12)
-	paddd		%xmm3,%xmm2
-	pxor		%xmm2,%xmm1
-	movdqa		%xmm1,%xmm6
-	pslld		$12,%xmm6
-	psrld		$20,%xmm1
-	por		%xmm6,%xmm1
-
-	# x0 += x1, x3 = rotl32(x3 ^ x0, 8)
-	paddd		%xmm1,%xmm0
-	pxor		%xmm0,%xmm3
-	pshufb		%xmm4,%xmm3
-
-	# x2 += x3, x1 = rotl32(x1 ^ x2, 7)
-	paddd		%xmm3,%xmm2
-	pxor		%xmm2,%xmm1
-	movdqa		%xmm1,%xmm7
-	pslld		$7,%xmm7
-	psrld		$25,%xmm1
-	por		%xmm7,%xmm1
-
-	# x1 = shuffle32(x1, MASK(2, 1, 0, 3))
-	pshufd		$0x93,%xmm1,%xmm1
-	# x2 = shuffle32(x2, MASK(1, 0, 3, 2))
-	pshufd		$0x4e,%xmm2,%xmm2
-	# x3 = shuffle32(x3, MASK(0, 3, 2, 1))
-	pshufd		$0x39,%xmm3,%xmm3
-
-	dec		%ecx
-	jnz		.Ldoubleround
-
-	# o0 = i0 ^ (x0 + s0)
-	movdqu		0x00(%rdx),%xmm4
-	paddd		%xmm8,%xmm0
-	pxor		%xmm4,%xmm0
-	movdqu		%xmm0,0x00(%rsi)
-	# o1 = i1 ^ (x1 + s1)
-	movdqu		0x10(%rdx),%xmm5
-	paddd		%xmm9,%xmm1
-	pxor		%xmm5,%xmm1
-	movdqu		%xmm1,0x10(%rsi)
-	# o2 = i2 ^ (x2 + s2)
-	movdqu		0x20(%rdx),%xmm6
-	paddd		%xmm10,%xmm2
-	pxor		%xmm6,%xmm2
-	movdqu		%xmm2,0x20(%rsi)
-	# o3 = i3 ^ (x3 + s3)
-	movdqu		0x30(%rdx),%xmm7
-	paddd		%xmm11,%xmm3
-	pxor		%xmm7,%xmm3
-	movdqu		%xmm3,0x30(%rsi)
-
-	ret
-ENDPROC(chacha20_block_xor_ssse3)
-
-ENTRY(chacha20_4block_xor_ssse3)
-	# %rdi: Input state matrix, s
-	# %rsi: 4 data blocks output, o
-	# %rdx: 4 data blocks input, i
-
-	# This function encrypts four consecutive ChaCha20 blocks by loading the
-	# the state matrix in SSE registers four times. As we need some scratch
-	# registers, we save the first four registers on the stack. The
-	# algorithm performs each operation on the corresponding word of each
-	# state matrix, hence requires no word shuffling. For final XORing step
-	# we transpose the matrix by interleaving 32- and then 64-bit words,
-	# which allows us to do XOR in SSE registers. 8/16-bit word rotation is
-	# done with the slightly better performing SSSE3 byte shuffling,
-	# 7/12-bit word rotation uses traditional shift+OR.
-
-	lea		8(%rsp),%r10
-	sub		$0x80,%rsp
-	and		$~63,%rsp
-
-	# x0..15[0-3] = s0..3[0..3]
-	movq		0x00(%rdi),%xmm1
-	pshufd		$0x00,%xmm1,%xmm0
-	pshufd		$0x55,%xmm1,%xmm1
-	movq		0x08(%rdi),%xmm3
-	pshufd		$0x00,%xmm3,%xmm2
-	pshufd		$0x55,%xmm3,%xmm3
-	movq		0x10(%rdi),%xmm5
-	pshufd		$0x00,%xmm5,%xmm4
-	pshufd		$0x55,%xmm5,%xmm5
-	movq		0x18(%rdi),%xmm7
-	pshufd		$0x00,%xmm7,%xmm6
-	pshufd		$0x55,%xmm7,%xmm7
-	movq		0x20(%rdi),%xmm9
-	pshufd		$0x00,%xmm9,%xmm8
-	pshufd		$0x55,%xmm9,%xmm9
-	movq		0x28(%rdi),%xmm11
-	pshufd		$0x00,%xmm11,%xmm10
-	pshufd		$0x55,%xmm11,%xmm11
-	movq		0x30(%rdi),%xmm13
-	pshufd		$0x00,%xmm13,%xmm12
-	pshufd		$0x55,%xmm13,%xmm13
-	movq		0x38(%rdi),%xmm15
-	pshufd		$0x00,%xmm15,%xmm14
-	pshufd		$0x55,%xmm15,%xmm15
-	# x0..3 on stack
-	movdqa		%xmm0,0x00(%rsp)
-	movdqa		%xmm1,0x10(%rsp)
-	movdqa		%xmm2,0x20(%rsp)
-	movdqa		%xmm3,0x30(%rsp)
-
-	movdqa		CTRINC(%rip),%xmm1
-	movdqa		ROT8(%rip),%xmm2
-	movdqa		ROT16(%rip),%xmm3
-
-	# x12 += counter values 0-3
-	paddd		%xmm1,%xmm12
-
-	mov		$10,%ecx
-
-.Ldoubleround4:
-	# x0 += x4, x12 = rotl32(x12 ^ x0, 16)
-	movdqa		0x00(%rsp),%xmm0
-	paddd		%xmm4,%xmm0
-	movdqa		%xmm0,0x00(%rsp)
-	pxor		%xmm0,%xmm12
-	pshufb		%xmm3,%xmm12
-	# x1 += x5, x13 = rotl32(x13 ^ x1, 16)
-	movdqa		0x10(%rsp),%xmm0
-	paddd		%xmm5,%xmm0
-	movdqa		%xmm0,0x10(%rsp)
-	pxor		%xmm0,%xmm13
-	pshufb		%xmm3,%xmm13
-	# x2 += x6, x14 = rotl32(x14 ^ x2, 16)
-	movdqa		0x20(%rsp),%xmm0
-	paddd		%xmm6,%xmm0
-	movdqa		%xmm0,0x20(%rsp)
-	pxor		%xmm0,%xmm14
-	pshufb		%xmm3,%xmm14
-	# x3 += x7, x15 = rotl32(x15 ^ x3, 16)
-	movdqa		0x30(%rsp),%xmm0
-	paddd		%xmm7,%xmm0
-	movdqa		%xmm0,0x30(%rsp)
-	pxor		%xmm0,%xmm15
-	pshufb		%xmm3,%xmm15
-
-	# x8 += x12, x4 = rotl32(x4 ^ x8, 12)
-	paddd		%xmm12,%xmm8
-	pxor		%xmm8,%xmm4
-	movdqa		%xmm4,%xmm0
-	pslld		$12,%xmm0
-	psrld		$20,%xmm4
-	por		%xmm0,%xmm4
-	# x9 += x13, x5 = rotl32(x5 ^ x9, 12)
-	paddd		%xmm13,%xmm9
-	pxor		%xmm9,%xmm5
-	movdqa		%xmm5,%xmm0
-	pslld		$12,%xmm0
-	psrld		$20,%xmm5
-	por		%xmm0,%xmm5
-	# x10 += x14, x6 = rotl32(x6 ^ x10, 12)
-	paddd		%xmm14,%xmm10
-	pxor		%xmm10,%xmm6
-	movdqa		%xmm6,%xmm0
-	pslld		$12,%xmm0
-	psrld		$20,%xmm6
-	por		%xmm0,%xmm6
-	# x11 += x15, x7 = rotl32(x7 ^ x11, 12)
-	paddd		%xmm15,%xmm11
-	pxor		%xmm11,%xmm7
-	movdqa		%xmm7,%xmm0
-	pslld		$12,%xmm0
-	psrld		$20,%xmm7
-	por		%xmm0,%xmm7
-
-	# x0 += x4, x12 = rotl32(x12 ^ x0, 8)
-	movdqa		0x00(%rsp),%xmm0
-	paddd		%xmm4,%xmm0
-	movdqa		%xmm0,0x00(%rsp)
-	pxor		%xmm0,%xmm12
-	pshufb		%xmm2,%xmm12
-	# x1 += x5, x13 = rotl32(x13 ^ x1, 8)
-	movdqa		0x10(%rsp),%xmm0
-	paddd		%xmm5,%xmm0
-	movdqa		%xmm0,0x10(%rsp)
-	pxor		%xmm0,%xmm13
-	pshufb		%xmm2,%xmm13
-	# x2 += x6, x14 = rotl32(x14 ^ x2, 8)
-	movdqa		0x20(%rsp),%xmm0
-	paddd		%xmm6,%xmm0
-	movdqa		%xmm0,0x20(%rsp)
-	pxor		%xmm0,%xmm14
-	pshufb		%xmm2,%xmm14
-	# x3 += x7, x15 = rotl32(x15 ^ x3, 8)
-	movdqa		0x30(%rsp),%xmm0
-	paddd		%xmm7,%xmm0
-	movdqa		%xmm0,0x30(%rsp)
-	pxor		%xmm0,%xmm15
-	pshufb		%xmm2,%xmm15
-
-	# x8 += x12, x4 = rotl32(x4 ^ x8, 7)
-	paddd		%xmm12,%xmm8
-	pxor		%xmm8,%xmm4
-	movdqa		%xmm4,%xmm0
-	pslld		$7,%xmm0
-	psrld		$25,%xmm4
-	por		%xmm0,%xmm4
-	# x9 += x13, x5 = rotl32(x5 ^ x9, 7)
-	paddd		%xmm13,%xmm9
-	pxor		%xmm9,%xmm5
-	movdqa		%xmm5,%xmm0
-	pslld		$7,%xmm0
-	psrld		$25,%xmm5
-	por		%xmm0,%xmm5
-	# x10 += x14, x6 = rotl32(x6 ^ x10, 7)
-	paddd		%xmm14,%xmm10
-	pxor		%xmm10,%xmm6
-	movdqa		%xmm6,%xmm0
-	pslld		$7,%xmm0
-	psrld		$25,%xmm6
-	por		%xmm0,%xmm6
-	# x11 += x15, x7 = rotl32(x7 ^ x11, 7)
-	paddd		%xmm15,%xmm11
-	pxor		%xmm11,%xmm7
-	movdqa		%xmm7,%xmm0
-	pslld		$7,%xmm0
-	psrld		$25,%xmm7
-	por		%xmm0,%xmm7
-
-	# x0 += x5, x15 = rotl32(x15 ^ x0, 16)
-	movdqa		0x00(%rsp),%xmm0
-	paddd		%xmm5,%xmm0
-	movdqa		%xmm0,0x00(%rsp)
-	pxor		%xmm0,%xmm15
-	pshufb		%xmm3,%xmm15
-	# x1 += x6, x12 = rotl32(x12 ^ x1, 16)
-	movdqa		0x10(%rsp),%xmm0
-	paddd		%xmm6,%xmm0
-	movdqa		%xmm0,0x10(%rsp)
-	pxor		%xmm0,%xmm12
-	pshufb		%xmm3,%xmm12
-	# x2 += x7, x13 = rotl32(x13 ^ x2, 16)
-	movdqa		0x20(%rsp),%xmm0
-	paddd		%xmm7,%xmm0
-	movdqa		%xmm0,0x20(%rsp)
-	pxor		%xmm0,%xmm13
-	pshufb		%xmm3,%xmm13
-	# x3 += x4, x14 = rotl32(x14 ^ x3, 16)
-	movdqa		0x30(%rsp),%xmm0
-	paddd		%xmm4,%xmm0
-	movdqa		%xmm0,0x30(%rsp)
-	pxor		%xmm0,%xmm14
-	pshufb		%xmm3,%xmm14
-
-	# x10 += x15, x5 = rotl32(x5 ^ x10, 12)
-	paddd		%xmm15,%xmm10
-	pxor		%xmm10,%xmm5
-	movdqa		%xmm5,%xmm0
-	pslld		$12,%xmm0
-	psrld		$20,%xmm5
-	por		%xmm0,%xmm5
-	# x11 += x12, x6 = rotl32(x6 ^ x11, 12)
-	paddd		%xmm12,%xmm11
-	pxor		%xmm11,%xmm6
-	movdqa		%xmm6,%xmm0
-	pslld		$12,%xmm0
-	psrld		$20,%xmm6
-	por		%xmm0,%xmm6
-	# x8 += x13, x7 = rotl32(x7 ^ x8, 12)
-	paddd		%xmm13,%xmm8
-	pxor		%xmm8,%xmm7
-	movdqa		%xmm7,%xmm0
-	pslld		$12,%xmm0
-	psrld		$20,%xmm7
-	por		%xmm0,%xmm7
-	# x9 += x14, x4 = rotl32(x4 ^ x9, 12)
-	paddd		%xmm14,%xmm9
-	pxor		%xmm9,%xmm4
-	movdqa		%xmm4,%xmm0
-	pslld		$12,%xmm0
-	psrld		$20,%xmm4
-	por		%xmm0,%xmm4
-
-	# x0 += x5, x15 = rotl32(x15 ^ x0, 8)
-	movdqa		0x00(%rsp),%xmm0
-	paddd		%xmm5,%xmm0
-	movdqa		%xmm0,0x00(%rsp)
-	pxor		%xmm0,%xmm15
-	pshufb		%xmm2,%xmm15
-	# x1 += x6, x12 = rotl32(x12 ^ x1, 8)
-	movdqa		0x10(%rsp),%xmm0
-	paddd		%xmm6,%xmm0
-	movdqa		%xmm0,0x10(%rsp)
-	pxor		%xmm0,%xmm12
-	pshufb		%xmm2,%xmm12
-	# x2 += x7, x13 = rotl32(x13 ^ x2, 8)
-	movdqa		0x20(%rsp),%xmm0
-	paddd		%xmm7,%xmm0
-	movdqa		%xmm0,0x20(%rsp)
-	pxor		%xmm0,%xmm13
-	pshufb		%xmm2,%xmm13
-	# x3 += x4, x14 = rotl32(x14 ^ x3, 8)
-	movdqa		0x30(%rsp),%xmm0
-	paddd		%xmm4,%xmm0
-	movdqa		%xmm0,0x30(%rsp)
-	pxor		%xmm0,%xmm14
-	pshufb		%xmm2,%xmm14
-
-	# x10 += x15, x5 = rotl32(x5 ^ x10, 7)
-	paddd		%xmm15,%xmm10
-	pxor		%xmm10,%xmm5
-	movdqa		%xmm5,%xmm0
-	pslld		$7,%xmm0
-	psrld		$25,%xmm5
-	por		%xmm0,%xmm5
-	# x11 += x12, x6 = rotl32(x6 ^ x11, 7)
-	paddd		%xmm12,%xmm11
-	pxor		%xmm11,%xmm6
-	movdqa		%xmm6,%xmm0
-	pslld		$7,%xmm0
-	psrld		$25,%xmm6
-	por		%xmm0,%xmm6
-	# x8 += x13, x7 = rotl32(x7 ^ x8, 7)
-	paddd		%xmm13,%xmm8
-	pxor		%xmm8,%xmm7
-	movdqa		%xmm7,%xmm0
-	pslld		$7,%xmm0
-	psrld		$25,%xmm7
-	por		%xmm0,%xmm7
-	# x9 += x14, x4 = rotl32(x4 ^ x9, 7)
-	paddd		%xmm14,%xmm9
-	pxor		%xmm9,%xmm4
-	movdqa		%xmm4,%xmm0
-	pslld		$7,%xmm0
-	psrld		$25,%xmm4
-	por		%xmm0,%xmm4
-
-	dec		%ecx
-	jnz		.Ldoubleround4
-
-	# x0[0-3] += s0[0]
-	# x1[0-3] += s0[1]
-	movq		0x00(%rdi),%xmm3
-	pshufd		$0x00,%xmm3,%xmm2
-	pshufd		$0x55,%xmm3,%xmm3
-	paddd		0x00(%rsp),%xmm2
-	movdqa		%xmm2,0x00(%rsp)
-	paddd		0x10(%rsp),%xmm3
-	movdqa		%xmm3,0x10(%rsp)
-	# x2[0-3] += s0[2]
-	# x3[0-3] += s0[3]
-	movq		0x08(%rdi),%xmm3
-	pshufd		$0x00,%xmm3,%xmm2
-	pshufd		$0x55,%xmm3,%xmm3
-	paddd		0x20(%rsp),%xmm2
-	movdqa		%xmm2,0x20(%rsp)
-	paddd		0x30(%rsp),%xmm3
-	movdqa		%xmm3,0x30(%rsp)
-
-	# x4[0-3] += s1[0]
-	# x5[0-3] += s1[1]
-	movq		0x10(%rdi),%xmm3
-	pshufd		$0x00,%xmm3,%xmm2
-	pshufd		$0x55,%xmm3,%xmm3
-	paddd		%xmm2,%xmm4
-	paddd		%xmm3,%xmm5
-	# x6[0-3] += s1[2]
-	# x7[0-3] += s1[3]
-	movq		0x18(%rdi),%xmm3
-	pshufd		$0x00,%xmm3,%xmm2
-	pshufd		$0x55,%xmm3,%xmm3
-	paddd		%xmm2,%xmm6
-	paddd		%xmm3,%xmm7
-
-	# x8[0-3] += s2[0]
-	# x9[0-3] += s2[1]
-	movq		0x20(%rdi),%xmm3
-	pshufd		$0x00,%xmm3,%xmm2
-	pshufd		$0x55,%xmm3,%xmm3
-	paddd		%xmm2,%xmm8
-	paddd		%xmm3,%xmm9
-	# x10[0-3] += s2[2]
-	# x11[0-3] += s2[3]
-	movq		0x28(%rdi),%xmm3
-	pshufd		$0x00,%xmm3,%xmm2
-	pshufd		$0x55,%xmm3,%xmm3
-	paddd		%xmm2,%xmm10
-	paddd		%xmm3,%xmm11
-
-	# x12[0-3] += s3[0]
-	# x13[0-3] += s3[1]
-	movq		0x30(%rdi),%xmm3
-	pshufd		$0x00,%xmm3,%xmm2
-	pshufd		$0x55,%xmm3,%xmm3
-	paddd		%xmm2,%xmm12
-	paddd		%xmm3,%xmm13
-	# x14[0-3] += s3[2]
-	# x15[0-3] += s3[3]
-	movq		0x38(%rdi),%xmm3
-	pshufd		$0x00,%xmm3,%xmm2
-	pshufd		$0x55,%xmm3,%xmm3
-	paddd		%xmm2,%xmm14
-	paddd		%xmm3,%xmm15
-
-	# x12 += counter values 0-3
-	paddd		%xmm1,%xmm12
-
-	# interleave 32-bit words in state n, n+1
-	movdqa		0x00(%rsp),%xmm0
-	movdqa		0x10(%rsp),%xmm1
-	movdqa		%xmm0,%xmm2
-	punpckldq	%xmm1,%xmm2
-	punpckhdq	%xmm1,%xmm0
-	movdqa		%xmm2,0x00(%rsp)
-	movdqa		%xmm0,0x10(%rsp)
-	movdqa		0x20(%rsp),%xmm0
-	movdqa		0x30(%rsp),%xmm1
-	movdqa		%xmm0,%xmm2
-	punpckldq	%xmm1,%xmm2
-	punpckhdq	%xmm1,%xmm0
-	movdqa		%xmm2,0x20(%rsp)
-	movdqa		%xmm0,0x30(%rsp)
-	movdqa		%xmm4,%xmm0
-	punpckldq	%xmm5,%xmm4
-	punpckhdq	%xmm5,%xmm0
-	movdqa		%xmm0,%xmm5
-	movdqa		%xmm6,%xmm0
-	punpckldq	%xmm7,%xmm6
-	punpckhdq	%xmm7,%xmm0
-	movdqa		%xmm0,%xmm7
-	movdqa		%xmm8,%xmm0
-	punpckldq	%xmm9,%xmm8
-	punpckhdq	%xmm9,%xmm0
-	movdqa		%xmm0,%xmm9
-	movdqa		%xmm10,%xmm0
-	punpckldq	%xmm11,%xmm10
-	punpckhdq	%xmm11,%xmm0
-	movdqa		%xmm0,%xmm11
-	movdqa		%xmm12,%xmm0
-	punpckldq	%xmm13,%xmm12
-	punpckhdq	%xmm13,%xmm0
-	movdqa		%xmm0,%xmm13
-	movdqa		%xmm14,%xmm0
-	punpckldq	%xmm15,%xmm14
-	punpckhdq	%xmm15,%xmm0
-	movdqa		%xmm0,%xmm15
-
-	# interleave 64-bit words in state n, n+2
-	movdqa		0x00(%rsp),%xmm0
-	movdqa		0x20(%rsp),%xmm1
-	movdqa		%xmm0,%xmm2
-	punpcklqdq	%xmm1,%xmm2
-	punpckhqdq	%xmm1,%xmm0
-	movdqa		%xmm2,0x00(%rsp)
-	movdqa		%xmm0,0x20(%rsp)
-	movdqa		0x10(%rsp),%xmm0
-	movdqa		0x30(%rsp),%xmm1
-	movdqa		%xmm0,%xmm2
-	punpcklqdq	%xmm1,%xmm2
-	punpckhqdq	%xmm1,%xmm0
-	movdqa		%xmm2,0x10(%rsp)
-	movdqa		%xmm0,0x30(%rsp)
-	movdqa		%xmm4,%xmm0
-	punpcklqdq	%xmm6,%xmm4
-	punpckhqdq	%xmm6,%xmm0
-	movdqa		%xmm0,%xmm6
-	movdqa		%xmm5,%xmm0
-	punpcklqdq	%xmm7,%xmm5
-	punpckhqdq	%xmm7,%xmm0
-	movdqa		%xmm0,%xmm7
-	movdqa		%xmm8,%xmm0
-	punpcklqdq	%xmm10,%xmm8
-	punpckhqdq	%xmm10,%xmm0
-	movdqa		%xmm0,%xmm10
-	movdqa		%xmm9,%xmm0
-	punpcklqdq	%xmm11,%xmm9
-	punpckhqdq	%xmm11,%xmm0
-	movdqa		%xmm0,%xmm11
-	movdqa		%xmm12,%xmm0
-	punpcklqdq	%xmm14,%xmm12
-	punpckhqdq	%xmm14,%xmm0
-	movdqa		%xmm0,%xmm14
-	movdqa		%xmm13,%xmm0
-	punpcklqdq	%xmm15,%xmm13
-	punpckhqdq	%xmm15,%xmm0
-	movdqa		%xmm0,%xmm15
-
-	# xor with corresponding input, write to output
-	movdqa		0x00(%rsp),%xmm0
-	movdqu		0x00(%rdx),%xmm1
-	pxor		%xmm1,%xmm0
-	movdqu		%xmm0,0x00(%rsi)
-	movdqa		0x10(%rsp),%xmm0
-	movdqu		0x80(%rdx),%xmm1
-	pxor		%xmm1,%xmm0
-	movdqu		%xmm0,0x80(%rsi)
-	movdqa		0x20(%rsp),%xmm0
-	movdqu		0x40(%rdx),%xmm1
-	pxor		%xmm1,%xmm0
-	movdqu		%xmm0,0x40(%rsi)
-	movdqa		0x30(%rsp),%xmm0
-	movdqu		0xc0(%rdx),%xmm1
-	pxor		%xmm1,%xmm0
-	movdqu		%xmm0,0xc0(%rsi)
-	movdqu		0x10(%rdx),%xmm1
-	pxor		%xmm1,%xmm4
-	movdqu		%xmm4,0x10(%rsi)
-	movdqu		0x90(%rdx),%xmm1
-	pxor		%xmm1,%xmm5
-	movdqu		%xmm5,0x90(%rsi)
-	movdqu		0x50(%rdx),%xmm1
-	pxor		%xmm1,%xmm6
-	movdqu		%xmm6,0x50(%rsi)
-	movdqu		0xd0(%rdx),%xmm1
-	pxor		%xmm1,%xmm7
-	movdqu		%xmm7,0xd0(%rsi)
-	movdqu		0x20(%rdx),%xmm1
-	pxor		%xmm1,%xmm8
-	movdqu		%xmm8,0x20(%rsi)
-	movdqu		0xa0(%rdx),%xmm1
-	pxor		%xmm1,%xmm9
-	movdqu		%xmm9,0xa0(%rsi)
-	movdqu		0x60(%rdx),%xmm1
-	pxor		%xmm1,%xmm10
-	movdqu		%xmm10,0x60(%rsi)
-	movdqu		0xe0(%rdx),%xmm1
-	pxor		%xmm1,%xmm11
-	movdqu		%xmm11,0xe0(%rsi)
-	movdqu		0x30(%rdx),%xmm1
-	pxor		%xmm1,%xmm12
-	movdqu		%xmm12,0x30(%rsi)
-	movdqu		0xb0(%rdx),%xmm1
-	pxor		%xmm1,%xmm13
-	movdqu		%xmm13,0xb0(%rsi)
-	movdqu		0x70(%rdx),%xmm1
-	pxor		%xmm1,%xmm14
-	movdqu		%xmm14,0x70(%rsi)
-	movdqu		0xf0(%rdx),%xmm1
-	pxor		%xmm1,%xmm15
-	movdqu		%xmm15,0xf0(%rsi)
-
-	lea		-8(%r10),%rsp
-	ret
-ENDPROC(chacha20_4block_xor_ssse3)
diff --git a/arch/x86/crypto/chacha20_glue.c b/arch/x86/crypto/chacha20_glue.c
deleted file mode 100644
index dce7c5d39c2f..000000000000
--- a/arch/x86/crypto/chacha20_glue.c
+++ /dev/null
@@ -1,146 +0,0 @@
-/*
- * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code
- *
- * Copyright (C) 2015 Martin Willi
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#include <crypto/algapi.h>
-#include <crypto/chacha20.h>
-#include <crypto/internal/skcipher.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <asm/fpu/api.h>
-#include <asm/simd.h>
-
-#define CHACHA20_STATE_ALIGN 16
-
-asmlinkage void chacha20_block_xor_ssse3(u32 *state, u8 *dst, const u8 *src);
-asmlinkage void chacha20_4block_xor_ssse3(u32 *state, u8 *dst, const u8 *src);
-#ifdef CONFIG_AS_AVX2
-asmlinkage void chacha20_8block_xor_avx2(u32 *state, u8 *dst, const u8 *src);
-static bool chacha20_use_avx2;
-#endif
-
-static void chacha20_dosimd(u32 *state, u8 *dst, const u8 *src,
-			    unsigned int bytes)
-{
-	u8 buf[CHACHA20_BLOCK_SIZE];
-
-#ifdef CONFIG_AS_AVX2
-	if (chacha20_use_avx2) {
-		while (bytes >= CHACHA20_BLOCK_SIZE * 8) {
-			chacha20_8block_xor_avx2(state, dst, src);
-			bytes -= CHACHA20_BLOCK_SIZE * 8;
-			src += CHACHA20_BLOCK_SIZE * 8;
-			dst += CHACHA20_BLOCK_SIZE * 8;
-			state[12] += 8;
-		}
-	}
-#endif
-	while (bytes >= CHACHA20_BLOCK_SIZE * 4) {
-		chacha20_4block_xor_ssse3(state, dst, src);
-		bytes -= CHACHA20_BLOCK_SIZE * 4;
-		src += CHACHA20_BLOCK_SIZE * 4;
-		dst += CHACHA20_BLOCK_SIZE * 4;
-		state[12] += 4;
-	}
-	while (bytes >= CHACHA20_BLOCK_SIZE) {
-		chacha20_block_xor_ssse3(state, dst, src);
-		bytes -= CHACHA20_BLOCK_SIZE;
-		src += CHACHA20_BLOCK_SIZE;
-		dst += CHACHA20_BLOCK_SIZE;
-		state[12]++;
-	}
-	if (bytes) {
-		memcpy(buf, src, bytes);
-		chacha20_block_xor_ssse3(state, buf, buf);
-		memcpy(dst, buf, bytes);
-	}
-}
-
-static int chacha20_simd(struct skcipher_request *req)
-{
-	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
-	struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm);
-	u32 *state, state_buf[16 + 2] __aligned(8);
-	struct skcipher_walk walk;
-	int err;
-
-	BUILD_BUG_ON(CHACHA20_STATE_ALIGN != 16);
-	state = PTR_ALIGN(state_buf + 0, CHACHA20_STATE_ALIGN);
-
-	if (req->cryptlen <= CHACHA20_BLOCK_SIZE || !may_use_simd())
-		return crypto_chacha20_crypt(req);
-
-	err = skcipher_walk_virt(&walk, req, true);
-
-	crypto_chacha20_init(state, ctx, walk.iv);
-
-	kernel_fpu_begin();
-
-	while (walk.nbytes >= CHACHA20_BLOCK_SIZE) {
-		chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr,
-				rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE));
-		err = skcipher_walk_done(&walk,
-					 walk.nbytes % CHACHA20_BLOCK_SIZE);
-	}
-
-	if (walk.nbytes) {
-		chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr,
-				walk.nbytes);
-		err = skcipher_walk_done(&walk, 0);
-	}
-
-	kernel_fpu_end();
-
-	return err;
-}
-
-static struct skcipher_alg alg = {
-	.base.cra_name		= "chacha20",
-	.base.cra_driver_name	= "chacha20-simd",
-	.base.cra_priority	= 300,
-	.base.cra_blocksize	= 1,
-	.base.cra_ctxsize	= sizeof(struct chacha20_ctx),
-	.base.cra_module	= THIS_MODULE,
-
-	.min_keysize		= CHACHA20_KEY_SIZE,
-	.max_keysize		= CHACHA20_KEY_SIZE,
-	.ivsize			= CHACHA20_IV_SIZE,
-	.chunksize		= CHACHA20_BLOCK_SIZE,
-	.setkey			= crypto_chacha20_setkey,
-	.encrypt		= chacha20_simd,
-	.decrypt		= chacha20_simd,
-};
-
-static int __init chacha20_simd_mod_init(void)
-{
-	if (!boot_cpu_has(X86_FEATURE_SSSE3))
-		return -ENODEV;
-
-#ifdef CONFIG_AS_AVX2
-	chacha20_use_avx2 = boot_cpu_has(X86_FEATURE_AVX) &&
-			    boot_cpu_has(X86_FEATURE_AVX2) &&
-			    cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
-#endif
-	return crypto_register_skcipher(&alg);
-}
-
-static void __exit chacha20_simd_mod_fini(void)
-{
-	crypto_unregister_skcipher(&alg);
-}
-
-module_init(chacha20_simd_mod_init);
-module_exit(chacha20_simd_mod_fini);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Martin Willi <martin@strongswan.org>");
-MODULE_DESCRIPTION("chacha20 cipher algorithm, SIMD accelerated");
-MODULE_ALIAS_CRYPTO("chacha20");
-MODULE_ALIAS_CRYPTO("chacha20-simd");
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 47859a0f8052..93cd4d199447 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -1433,22 +1433,6 @@ config CRYPTO_CHACHA20
 
 	  ChaCha20 is a 256-bit high-speed stream cipher designed by Daniel J.
 	  Bernstein and further specified in RFC7539 for use in IETF protocols.
-	  This is the portable C implementation of ChaCha20.
-
-	  See also:
-	  <http://cr.yp.to/chacha/chacha-20080128.pdf>
-
-config CRYPTO_CHACHA20_X86_64
-	tristate "ChaCha20 cipher algorithm (x86_64/SSSE3/AVX2)"
-	depends on X86 && 64BIT
-	select CRYPTO_BLKCIPHER
-	select CRYPTO_CHACHA20
-	help
-	  ChaCha20 cipher algorithm, RFC7539.
-
-	  ChaCha20 is a 256-bit high-speed stream cipher designed by Daniel J.
-	  Bernstein and further specified in RFC7539 for use in IETF protocols.
-	  This is the x86_64 assembler implementation using SIMD instructions.
 
 	  See also:
 	  <http://cr.yp.to/chacha/chacha-20080128.pdf>
diff --git a/crypto/Makefile b/crypto/Makefile
index 5e60348d02e2..587103b87890 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -117,7 +117,7 @@ obj-$(CONFIG_CRYPTO_ANUBIS) += anubis.o
 obj-$(CONFIG_CRYPTO_SEED) += seed.o
 obj-$(CONFIG_CRYPTO_SPECK) += speck.o
 obj-$(CONFIG_CRYPTO_SALSA20) += salsa20_generic.o
-obj-$(CONFIG_CRYPTO_CHACHA20) += chacha20_generic.o
+obj-$(CONFIG_CRYPTO_CHACHA20) += chacha20_zinc.o
 obj-$(CONFIG_CRYPTO_POLY1305) += poly1305_zinc.o
 obj-$(CONFIG_CRYPTO_DEFLATE) += deflate.o
 obj-$(CONFIG_CRYPTO_MICHAEL_MIC) += michael_mic.o
diff --git a/crypto/chacha20_generic.c b/crypto/chacha20_generic.c
deleted file mode 100644
index e451c3cb6a56..000000000000
--- a/crypto/chacha20_generic.c
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- * ChaCha20 256-bit cipher algorithm, RFC7539
- *
- * Copyright (C) 2015 Martin Willi
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#include <asm/unaligned.h>
-#include <crypto/algapi.h>
-#include <crypto/chacha20.h>
-#include <crypto/internal/skcipher.h>
-#include <linux/module.h>
-
-static void chacha20_docrypt(u32 *state, u8 *dst, const u8 *src,
-			     unsigned int bytes)
-{
-	u32 stream[CHACHA20_BLOCK_WORDS];
-
-	if (dst != src)
-		memcpy(dst, src, bytes);
-
-	while (bytes >= CHACHA20_BLOCK_SIZE) {
-		chacha20_block(state, stream);
-		crypto_xor(dst, (const u8 *)stream, CHACHA20_BLOCK_SIZE);
-		bytes -= CHACHA20_BLOCK_SIZE;
-		dst += CHACHA20_BLOCK_SIZE;
-	}
-	if (bytes) {
-		chacha20_block(state, stream);
-		crypto_xor(dst, (const u8 *)stream, bytes);
-	}
-}
-
-void crypto_chacha20_init(u32 *state, struct chacha20_ctx *ctx, u8 *iv)
-{
-	state[0]  = 0x61707865; /* "expa" */
-	state[1]  = 0x3320646e; /* "nd 3" */
-	state[2]  = 0x79622d32; /* "2-by" */
-	state[3]  = 0x6b206574; /* "te k" */
-	state[4]  = ctx->key[0];
-	state[5]  = ctx->key[1];
-	state[6]  = ctx->key[2];
-	state[7]  = ctx->key[3];
-	state[8]  = ctx->key[4];
-	state[9]  = ctx->key[5];
-	state[10] = ctx->key[6];
-	state[11] = ctx->key[7];
-	state[12] = get_unaligned_le32(iv +  0);
-	state[13] = get_unaligned_le32(iv +  4);
-	state[14] = get_unaligned_le32(iv +  8);
-	state[15] = get_unaligned_le32(iv + 12);
-}
-EXPORT_SYMBOL_GPL(crypto_chacha20_init);
-
-int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key,
-			   unsigned int keysize)
-{
-	struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm);
-	int i;
-
-	if (keysize != CHACHA20_KEY_SIZE)
-		return -EINVAL;
-
-	for (i = 0; i < ARRAY_SIZE(ctx->key); i++)
-		ctx->key[i] = get_unaligned_le32(key + i * sizeof(u32));
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(crypto_chacha20_setkey);
-
-int crypto_chacha20_crypt(struct skcipher_request *req)
-{
-	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
-	struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm);
-	struct skcipher_walk walk;
-	u32 state[16];
-	int err;
-
-	err = skcipher_walk_virt(&walk, req, true);
-
-	crypto_chacha20_init(state, ctx, walk.iv);
-
-	while (walk.nbytes > 0) {
-		unsigned int nbytes = walk.nbytes;
-
-		if (nbytes < walk.total)
-			nbytes = round_down(nbytes, walk.stride);
-
-		chacha20_docrypt(state, walk.dst.virt.addr, walk.src.virt.addr,
-				 nbytes);
-		err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
-	}
-
-	return err;
-}
-EXPORT_SYMBOL_GPL(crypto_chacha20_crypt);
-
-static struct skcipher_alg alg = {
-	.base.cra_name		= "chacha20",
-	.base.cra_driver_name	= "chacha20-generic",
-	.base.cra_priority	= 100,
-	.base.cra_blocksize	= 1,
-	.base.cra_ctxsize	= sizeof(struct chacha20_ctx),
-	.base.cra_module	= THIS_MODULE,
-
-	.min_keysize		= CHACHA20_KEY_SIZE,
-	.max_keysize		= CHACHA20_KEY_SIZE,
-	.ivsize			= CHACHA20_IV_SIZE,
-	.chunksize		= CHACHA20_BLOCK_SIZE,
-	.setkey			= crypto_chacha20_setkey,
-	.encrypt		= crypto_chacha20_crypt,
-	.decrypt		= crypto_chacha20_crypt,
-};
-
-static int __init chacha20_generic_mod_init(void)
-{
-	return crypto_register_skcipher(&alg);
-}
-
-static void __exit chacha20_generic_mod_fini(void)
-{
-	crypto_unregister_skcipher(&alg);
-}
-
-module_init(chacha20_generic_mod_init);
-module_exit(chacha20_generic_mod_fini);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Martin Willi <martin@strongswan.org>");
-MODULE_DESCRIPTION("chacha20 cipher algorithm");
-MODULE_ALIAS_CRYPTO("chacha20");
-MODULE_ALIAS_CRYPTO("chacha20-generic");
diff --git a/crypto/chacha20_zinc.c b/crypto/chacha20_zinc.c
new file mode 100644
index 000000000000..5df88fdee066
--- /dev/null
+++ b/crypto/chacha20_zinc.c
@@ -0,0 +1,100 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (C) 2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#include <asm/unaligned.h>
+#include <crypto/algapi.h>
+#include <crypto/internal/skcipher.h>
+#include <zinc/chacha20.h>
+#include <linux/module.h>
+
+struct chacha20_key_ctx {
+	u32 key[8];
+};
+
+static int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key,
+				  unsigned int keysize)
+{
+	struct chacha20_key_ctx *key_ctx = crypto_skcipher_ctx(tfm);
+	int i;
+
+	if (keysize != CHACHA20_KEY_SIZE)
+		return -EINVAL;
+
+	for (i = 0; i < ARRAY_SIZE(key_ctx->key); ++i)
+		key_ctx->key[i] = get_unaligned_le32(key + i * sizeof(u32));
+
+	return 0;
+}
+
+static int crypto_chacha20_crypt(struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct chacha20_key_ctx *key_ctx = crypto_skcipher_ctx(tfm);
+	struct chacha20_ctx ctx;
+	struct skcipher_walk walk;
+	simd_context_t simd_context;
+	int err, i;
+
+	err = skcipher_walk_virt(&walk, req, true);
+	if (unlikely(err))
+		return err;
+
+	memcpy(ctx.key, key_ctx->key, sizeof(ctx.key));
+	for (i = 0; i < ARRAY_SIZE(ctx.counter); ++i)
+		ctx.counter[i] = get_unaligned_le32(walk.iv + i * sizeof(u32));
+
+	simd_context = simd_get();
+	while (walk.nbytes > 0) {
+		unsigned int nbytes = walk.nbytes;
+
+		if (nbytes < walk.total)
+			nbytes = round_down(nbytes, walk.stride);
+
+		chacha20(&ctx, walk.dst.virt.addr, walk.src.virt.addr, nbytes,
+			 simd_context);
+
+		err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
+		simd_context = simd_relax(simd_context);
+	}
+	simd_put(simd_context);
+
+	return err;
+}
+
+static struct skcipher_alg alg = {
+	.base.cra_name		= "chacha20",
+	.base.cra_driver_name	= "chacha20-software",
+	.base.cra_priority	= 100,
+	.base.cra_blocksize	= 1,
+	.base.cra_ctxsize	= sizeof(struct chacha20_key_ctx),
+	.base.cra_module	= THIS_MODULE,
+
+	.min_keysize		= CHACHA20_KEY_SIZE,
+	.max_keysize		= CHACHA20_KEY_SIZE,
+	.ivsize			= CHACHA20_IV_SIZE,
+	.chunksize		= CHACHA20_BLOCK_SIZE,
+	.setkey			= crypto_chacha20_setkey,
+	.encrypt		= crypto_chacha20_crypt,
+	.decrypt		= crypto_chacha20_crypt,
+};
+
+static int __init chacha20_mod_init(void)
+{
+	return crypto_register_skcipher(&alg);
+}
+
+static void __exit chacha20_mod_exit(void)
+{
+	crypto_unregister_skcipher(&alg);
+}
+
+module_init(chacha20_mod_init);
+module_exit(chacha20_mod_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");
+MODULE_DESCRIPTION("ChaCha20 stream cipher");
+MODULE_ALIAS_CRYPTO("chacha20");
+MODULE_ALIAS_CRYPTO("chacha20-software");
diff --git a/crypto/chacha20poly1305.c b/crypto/chacha20poly1305.c
index bf523797bef3..b26adb9ed898 100644
--- a/crypto/chacha20poly1305.c
+++ b/crypto/chacha20poly1305.c
@@ -13,7 +13,7 @@
 #include <crypto/internal/hash.h>
 #include <crypto/internal/skcipher.h>
 #include <crypto/scatterwalk.h>
-#include <crypto/chacha20.h>
+#include <zinc/chacha20.h>
 #include <zinc/poly1305.h>
 #include <linux/err.h>
 #include <linux/init.h>
diff --git a/include/crypto/chacha20.h b/include/crypto/chacha20.h
index b83d66073db0..3b92f58f3891 100644
--- a/include/crypto/chacha20.h
+++ b/include/crypto/chacha20.h
@@ -6,23 +6,11 @@
 #ifndef _CRYPTO_CHACHA20_H
 #define _CRYPTO_CHACHA20_H
 
-#include <crypto/skcipher.h>
-#include <linux/types.h>
-#include <linux/crypto.h>
-
 #define CHACHA20_IV_SIZE	16
 #define CHACHA20_KEY_SIZE	32
 #define CHACHA20_BLOCK_SIZE	64
 #define CHACHA20_BLOCK_WORDS	(CHACHA20_BLOCK_SIZE / sizeof(u32))
 
-struct chacha20_ctx {
-	u32 key[8];
-};
-
 void chacha20_block(u32 *state, u32 *stream);
-void crypto_chacha20_init(u32 *state, struct chacha20_ctx *ctx, u8 *iv);
-int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key,
-			   unsigned int keysize);
-int crypto_chacha20_crypt(struct skcipher_request *req);
 
 #endif
-- 
2.19.0

^ permalink raw reply related

* [PATCH net-next v4 17/20] crypto: port Poly1305 to Zinc
From: Jason A. Donenfeld @ 2018-09-14 16:22 UTC (permalink / raw)
  To: linux-kernel, netdev, linux-crypto, davem, gregkh
  Cc: Jason A. Donenfeld, Samuel Neves, Andy Lutomirski,
	Jean-Philippe Aumasson, Eric Biggers
In-Reply-To: <20180914162240.7925-1-Jason@zx2c4.com>

Now that Poly1305 is in Zinc, we can have the crypto API code simply
call into it. We have to do a little bit of book keeping here, because
the crypto API receives the key in the first few calls to update.

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Cc: Samuel Neves <sneves@dei.uc.pt>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Jean-Philippe Aumasson <jeanphilippe.aumasson@gmail.com>
Cc: Eric Biggers <ebiggers@google.com>
---
 arch/x86/crypto/Makefile               |   3 -
 arch/x86/crypto/poly1305-avx2-x86_64.S | 388 ----------------
 arch/x86/crypto/poly1305-sse2-x86_64.S | 584 -------------------------
 arch/x86/crypto/poly1305_glue.c        | 205 ---------
 crypto/Kconfig                         |  15 +-
 crypto/Makefile                        |   2 +-
 crypto/chacha20poly1305.c              |  12 +-
 crypto/poly1305_generic.c              | 304 -------------
 crypto/poly1305_zinc.c                 |  92 ++++
 include/crypto/poly1305.h              |  40 --
 10 files changed, 101 insertions(+), 1544 deletions(-)
 delete mode 100644 arch/x86/crypto/poly1305-avx2-x86_64.S
 delete mode 100644 arch/x86/crypto/poly1305-sse2-x86_64.S
 delete mode 100644 arch/x86/crypto/poly1305_glue.c
 delete mode 100644 crypto/poly1305_generic.c
 create mode 100644 crypto/poly1305_zinc.c
 delete mode 100644 include/crypto/poly1305.h

diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index a450ad573dcb..cf830219846b 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -34,7 +34,6 @@ obj-$(CONFIG_CRYPTO_CRC32_PCLMUL) += crc32-pclmul.o
 obj-$(CONFIG_CRYPTO_SHA256_SSSE3) += sha256-ssse3.o
 obj-$(CONFIG_CRYPTO_SHA512_SSSE3) += sha512-ssse3.o
 obj-$(CONFIG_CRYPTO_CRCT10DIF_PCLMUL) += crct10dif-pclmul.o
-obj-$(CONFIG_CRYPTO_POLY1305_X86_64) += poly1305-x86_64.o
 
 obj-$(CONFIG_CRYPTO_AEGIS128_AESNI_SSE2) += aegis128-aesni.o
 obj-$(CONFIG_CRYPTO_AEGIS128L_AESNI_SSE2) += aegis128l-aesni.o
@@ -110,10 +109,8 @@ aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o
 aesni-intel-$(CONFIG_64BIT) += aesni-intel_avx-x86_64.o aes_ctrby8_avx-x86_64.o
 ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o
 sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o
-poly1305-x86_64-y := poly1305-sse2-x86_64.o poly1305_glue.o
 ifeq ($(avx2_supported),yes)
 sha1-ssse3-y += sha1_avx2_x86_64_asm.o
-poly1305-x86_64-y += poly1305-avx2-x86_64.o
 endif
 ifeq ($(sha1_ni_supported),yes)
 sha1-ssse3-y += sha1_ni_asm.o
diff --git a/arch/x86/crypto/poly1305-avx2-x86_64.S b/arch/x86/crypto/poly1305-avx2-x86_64.S
deleted file mode 100644
index 3b6e70d085da..000000000000
--- a/arch/x86/crypto/poly1305-avx2-x86_64.S
+++ /dev/null
@@ -1,388 +0,0 @@
-/*
- * Poly1305 authenticator algorithm, RFC7539, x64 AVX2 functions
- *
- * Copyright (C) 2015 Martin Willi
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#include <linux/linkage.h>
-
-.section	.rodata.cst32.ANMASK, "aM", @progbits, 32
-.align 32
-ANMASK:	.octa 0x0000000003ffffff0000000003ffffff
-	.octa 0x0000000003ffffff0000000003ffffff
-
-.section	.rodata.cst32.ORMASK, "aM", @progbits, 32
-.align 32
-ORMASK:	.octa 0x00000000010000000000000001000000
-	.octa 0x00000000010000000000000001000000
-
-.text
-
-#define h0 0x00(%rdi)
-#define h1 0x04(%rdi)
-#define h2 0x08(%rdi)
-#define h3 0x0c(%rdi)
-#define h4 0x10(%rdi)
-#define r0 0x00(%rdx)
-#define r1 0x04(%rdx)
-#define r2 0x08(%rdx)
-#define r3 0x0c(%rdx)
-#define r4 0x10(%rdx)
-#define u0 0x00(%r8)
-#define u1 0x04(%r8)
-#define u2 0x08(%r8)
-#define u3 0x0c(%r8)
-#define u4 0x10(%r8)
-#define w0 0x14(%r8)
-#define w1 0x18(%r8)
-#define w2 0x1c(%r8)
-#define w3 0x20(%r8)
-#define w4 0x24(%r8)
-#define y0 0x28(%r8)
-#define y1 0x2c(%r8)
-#define y2 0x30(%r8)
-#define y3 0x34(%r8)
-#define y4 0x38(%r8)
-#define m %rsi
-#define hc0 %ymm0
-#define hc1 %ymm1
-#define hc2 %ymm2
-#define hc3 %ymm3
-#define hc4 %ymm4
-#define hc0x %xmm0
-#define hc1x %xmm1
-#define hc2x %xmm2
-#define hc3x %xmm3
-#define hc4x %xmm4
-#define t1 %ymm5
-#define t2 %ymm6
-#define t1x %xmm5
-#define t2x %xmm6
-#define ruwy0 %ymm7
-#define ruwy1 %ymm8
-#define ruwy2 %ymm9
-#define ruwy3 %ymm10
-#define ruwy4 %ymm11
-#define ruwy0x %xmm7
-#define ruwy1x %xmm8
-#define ruwy2x %xmm9
-#define ruwy3x %xmm10
-#define ruwy4x %xmm11
-#define svxz1 %ymm12
-#define svxz2 %ymm13
-#define svxz3 %ymm14
-#define svxz4 %ymm15
-#define d0 %r9
-#define d1 %r10
-#define d2 %r11
-#define d3 %r12
-#define d4 %r13
-
-ENTRY(poly1305_4block_avx2)
-	# %rdi: Accumulator h[5]
-	# %rsi: 64 byte input block m
-	# %rdx: Poly1305 key r[5]
-	# %rcx: Quadblock count
-	# %r8:  Poly1305 derived key r^2 u[5], r^3 w[5], r^4 y[5],
-
-	# This four-block variant uses loop unrolled block processing. It
-	# requires 4 Poly1305 keys: r, r^2, r^3 and r^4:
-	# h = (h + m) * r  =>  h = (h + m1) * r^4 + m2 * r^3 + m3 * r^2 + m4 * r
-
-	vzeroupper
-	push		%rbx
-	push		%r12
-	push		%r13
-
-	# combine r0,u0,w0,y0
-	vmovd		y0,ruwy0x
-	vmovd		w0,t1x
-	vpunpcklqdq	t1,ruwy0,ruwy0
-	vmovd		u0,t1x
-	vmovd		r0,t2x
-	vpunpcklqdq	t2,t1,t1
-	vperm2i128	$0x20,t1,ruwy0,ruwy0
-
-	# combine r1,u1,w1,y1 and s1=r1*5,v1=u1*5,x1=w1*5,z1=y1*5
-	vmovd		y1,ruwy1x
-	vmovd		w1,t1x
-	vpunpcklqdq	t1,ruwy1,ruwy1
-	vmovd		u1,t1x
-	vmovd		r1,t2x
-	vpunpcklqdq	t2,t1,t1
-	vperm2i128	$0x20,t1,ruwy1,ruwy1
-	vpslld		$2,ruwy1,svxz1
-	vpaddd		ruwy1,svxz1,svxz1
-
-	# combine r2,u2,w2,y2 and s2=r2*5,v2=u2*5,x2=w2*5,z2=y2*5
-	vmovd		y2,ruwy2x
-	vmovd		w2,t1x
-	vpunpcklqdq	t1,ruwy2,ruwy2
-	vmovd		u2,t1x
-	vmovd		r2,t2x
-	vpunpcklqdq	t2,t1,t1
-	vperm2i128	$0x20,t1,ruwy2,ruwy2
-	vpslld		$2,ruwy2,svxz2
-	vpaddd		ruwy2,svxz2,svxz2
-
-	# combine r3,u3,w3,y3 and s3=r3*5,v3=u3*5,x3=w3*5,z3=y3*5
-	vmovd		y3,ruwy3x
-	vmovd		w3,t1x
-	vpunpcklqdq	t1,ruwy3,ruwy3
-	vmovd		u3,t1x
-	vmovd		r3,t2x
-	vpunpcklqdq	t2,t1,t1
-	vperm2i128	$0x20,t1,ruwy3,ruwy3
-	vpslld		$2,ruwy3,svxz3
-	vpaddd		ruwy3,svxz3,svxz3
-
-	# combine r4,u4,w4,y4 and s4=r4*5,v4=u4*5,x4=w4*5,z4=y4*5
-	vmovd		y4,ruwy4x
-	vmovd		w4,t1x
-	vpunpcklqdq	t1,ruwy4,ruwy4
-	vmovd		u4,t1x
-	vmovd		r4,t2x
-	vpunpcklqdq	t2,t1,t1
-	vperm2i128	$0x20,t1,ruwy4,ruwy4
-	vpslld		$2,ruwy4,svxz4
-	vpaddd		ruwy4,svxz4,svxz4
-
-.Ldoblock4:
-	# hc0 = [m[48-51] & 0x3ffffff, m[32-35] & 0x3ffffff,
-	#	 m[16-19] & 0x3ffffff, m[ 0- 3] & 0x3ffffff + h0]
-	vmovd		0x00(m),hc0x
-	vmovd		0x10(m),t1x
-	vpunpcklqdq	t1,hc0,hc0
-	vmovd		0x20(m),t1x
-	vmovd		0x30(m),t2x
-	vpunpcklqdq	t2,t1,t1
-	vperm2i128	$0x20,t1,hc0,hc0
-	vpand		ANMASK(%rip),hc0,hc0
-	vmovd		h0,t1x
-	vpaddd		t1,hc0,hc0
-	# hc1 = [(m[51-54] >> 2) & 0x3ffffff, (m[35-38] >> 2) & 0x3ffffff,
-	#	 (m[19-22] >> 2) & 0x3ffffff, (m[ 3- 6] >> 2) & 0x3ffffff + h1]
-	vmovd		0x03(m),hc1x
-	vmovd		0x13(m),t1x
-	vpunpcklqdq	t1,hc1,hc1
-	vmovd		0x23(m),t1x
-	vmovd		0x33(m),t2x
-	vpunpcklqdq	t2,t1,t1
-	vperm2i128	$0x20,t1,hc1,hc1
-	vpsrld		$2,hc1,hc1
-	vpand		ANMASK(%rip),hc1,hc1
-	vmovd		h1,t1x
-	vpaddd		t1,hc1,hc1
-	# hc2 = [(m[54-57] >> 4) & 0x3ffffff, (m[38-41] >> 4) & 0x3ffffff,
-	#	 (m[22-25] >> 4) & 0x3ffffff, (m[ 6- 9] >> 4) & 0x3ffffff + h2]
-	vmovd		0x06(m),hc2x
-	vmovd		0x16(m),t1x
-	vpunpcklqdq	t1,hc2,hc2
-	vmovd		0x26(m),t1x
-	vmovd		0x36(m),t2x
-	vpunpcklqdq	t2,t1,t1
-	vperm2i128	$0x20,t1,hc2,hc2
-	vpsrld		$4,hc2,hc2
-	vpand		ANMASK(%rip),hc2,hc2
-	vmovd		h2,t1x
-	vpaddd		t1,hc2,hc2
-	# hc3 = [(m[57-60] >> 6) & 0x3ffffff, (m[41-44] >> 6) & 0x3ffffff,
-	#	 (m[25-28] >> 6) & 0x3ffffff, (m[ 9-12] >> 6) & 0x3ffffff + h3]
-	vmovd		0x09(m),hc3x
-	vmovd		0x19(m),t1x
-	vpunpcklqdq	t1,hc3,hc3
-	vmovd		0x29(m),t1x
-	vmovd		0x39(m),t2x
-	vpunpcklqdq	t2,t1,t1
-	vperm2i128	$0x20,t1,hc3,hc3
-	vpsrld		$6,hc3,hc3
-	vpand		ANMASK(%rip),hc3,hc3
-	vmovd		h3,t1x
-	vpaddd		t1,hc3,hc3
-	# hc4 = [(m[60-63] >> 8) | (1<<24), (m[44-47] >> 8) | (1<<24),
-	#	 (m[28-31] >> 8) | (1<<24), (m[12-15] >> 8) | (1<<24) + h4]
-	vmovd		0x0c(m),hc4x
-	vmovd		0x1c(m),t1x
-	vpunpcklqdq	t1,hc4,hc4
-	vmovd		0x2c(m),t1x
-	vmovd		0x3c(m),t2x
-	vpunpcklqdq	t2,t1,t1
-	vperm2i128	$0x20,t1,hc4,hc4
-	vpsrld		$8,hc4,hc4
-	vpor		ORMASK(%rip),hc4,hc4
-	vmovd		h4,t1x
-	vpaddd		t1,hc4,hc4
-
-	# t1 = [ hc0[3] * r0, hc0[2] * u0, hc0[1] * w0, hc0[0] * y0 ]
-	vpmuludq	hc0,ruwy0,t1
-	# t1 += [ hc1[3] * s4, hc1[2] * v4, hc1[1] * x4, hc1[0] * z4 ]
-	vpmuludq	hc1,svxz4,t2
-	vpaddq		t2,t1,t1
-	# t1 += [ hc2[3] * s3, hc2[2] * v3, hc2[1] * x3, hc2[0] * z3 ]
-	vpmuludq	hc2,svxz3,t2
-	vpaddq		t2,t1,t1
-	# t1 += [ hc3[3] * s2, hc3[2] * v2, hc3[1] * x2, hc3[0] * z2 ]
-	vpmuludq	hc3,svxz2,t2
-	vpaddq		t2,t1,t1
-	# t1 += [ hc4[3] * s1, hc4[2] * v1, hc4[1] * x1, hc4[0] * z1 ]
-	vpmuludq	hc4,svxz1,t2
-	vpaddq		t2,t1,t1
-	# d0 = t1[0] + t1[1] + t[2] + t[3]
-	vpermq		$0xee,t1,t2
-	vpaddq		t2,t1,t1
-	vpsrldq		$8,t1,t2
-	vpaddq		t2,t1,t1
-	vmovq		t1x,d0
-
-	# t1 = [ hc0[3] * r1, hc0[2] * u1,hc0[1] * w1, hc0[0] * y1 ]
-	vpmuludq	hc0,ruwy1,t1
-	# t1 += [ hc1[3] * r0, hc1[2] * u0, hc1[1] * w0, hc1[0] * y0 ]
-	vpmuludq	hc1,ruwy0,t2
-	vpaddq		t2,t1,t1
-	# t1 += [ hc2[3] * s4, hc2[2] * v4, hc2[1] * x4, hc2[0] * z4 ]
-	vpmuludq	hc2,svxz4,t2
-	vpaddq		t2,t1,t1
-	# t1 += [ hc3[3] * s3, hc3[2] * v3, hc3[1] * x3, hc3[0] * z3 ]
-	vpmuludq	hc3,svxz3,t2
-	vpaddq		t2,t1,t1
-	# t1 += [ hc4[3] * s2, hc4[2] * v2, hc4[1] * x2, hc4[0] * z2 ]
-	vpmuludq	hc4,svxz2,t2
-	vpaddq		t2,t1,t1
-	# d1 = t1[0] + t1[1] + t1[3] + t1[4]
-	vpermq		$0xee,t1,t2
-	vpaddq		t2,t1,t1
-	vpsrldq		$8,t1,t2
-	vpaddq		t2,t1,t1
-	vmovq		t1x,d1
-
-	# t1 = [ hc0[3] * r2, hc0[2] * u2, hc0[1] * w2, hc0[0] * y2 ]
-	vpmuludq	hc0,ruwy2,t1
-	# t1 += [ hc1[3] * r1, hc1[2] * u1, hc1[1] * w1, hc1[0] * y1 ]
-	vpmuludq	hc1,ruwy1,t2
-	vpaddq		t2,t1,t1
-	# t1 += [ hc2[3] * r0, hc2[2] * u0, hc2[1] * w0, hc2[0] * y0 ]
-	vpmuludq	hc2,ruwy0,t2
-	vpaddq		t2,t1,t1
-	# t1 += [ hc3[3] * s4, hc3[2] * v4, hc3[1] * x4, hc3[0] * z4 ]
-	vpmuludq	hc3,svxz4,t2
-	vpaddq		t2,t1,t1
-	# t1 += [ hc4[3] * s3, hc4[2] * v3, hc4[1] * x3, hc4[0] * z3 ]
-	vpmuludq	hc4,svxz3,t2
-	vpaddq		t2,t1,t1
-	# d2 = t1[0] + t1[1] + t1[2] + t1[3]
-	vpermq		$0xee,t1,t2
-	vpaddq		t2,t1,t1
-	vpsrldq		$8,t1,t2
-	vpaddq		t2,t1,t1
-	vmovq		t1x,d2
-
-	# t1 = [ hc0[3] * r3, hc0[2] * u3, hc0[1] * w3, hc0[0] * y3 ]
-	vpmuludq	hc0,ruwy3,t1
-	# t1 += [ hc1[3] * r2, hc1[2] * u2, hc1[1] * w2, hc1[0] * y2 ]
-	vpmuludq	hc1,ruwy2,t2
-	vpaddq		t2,t1,t1
-	# t1 += [ hc2[3] * r1, hc2[2] * u1, hc2[1] * w1, hc2[0] * y1 ]
-	vpmuludq	hc2,ruwy1,t2
-	vpaddq		t2,t1,t1
-	# t1 += [ hc3[3] * r0, hc3[2] * u0, hc3[1] * w0, hc3[0] * y0 ]
-	vpmuludq	hc3,ruwy0,t2
-	vpaddq		t2,t1,t1
-	# t1 += [ hc4[3] * s4, hc4[2] * v4, hc4[1] * x4, hc4[0] * z4 ]
-	vpmuludq	hc4,svxz4,t2
-	vpaddq		t2,t1,t1
-	# d3 = t1[0] + t1[1] + t1[2] + t1[3]
-	vpermq		$0xee,t1,t2
-	vpaddq		t2,t1,t1
-	vpsrldq		$8,t1,t2
-	vpaddq		t2,t1,t1
-	vmovq		t1x,d3
-
-	# t1 = [ hc0[3] * r4, hc0[2] * u4, hc0[1] * w4, hc0[0] * y4 ]
-	vpmuludq	hc0,ruwy4,t1
-	# t1 += [ hc1[3] * r3, hc1[2] * u3, hc1[1] * w3, hc1[0] * y3 ]
-	vpmuludq	hc1,ruwy3,t2
-	vpaddq		t2,t1,t1
-	# t1 += [ hc2[3] * r2, hc2[2] * u2, hc2[1] * w2, hc2[0] * y2 ]
-	vpmuludq	hc2,ruwy2,t2
-	vpaddq		t2,t1,t1
-	# t1 += [ hc3[3] * r1, hc3[2] * u1, hc3[1] * w1, hc3[0] * y1 ]
-	vpmuludq	hc3,ruwy1,t2
-	vpaddq		t2,t1,t1
-	# t1 += [ hc4[3] * r0, hc4[2] * u0, hc4[1] * w0, hc4[0] * y0 ]
-	vpmuludq	hc4,ruwy0,t2
-	vpaddq		t2,t1,t1
-	# d4 = t1[0] + t1[1] + t1[2] + t1[3]
-	vpermq		$0xee,t1,t2
-	vpaddq		t2,t1,t1
-	vpsrldq		$8,t1,t2
-	vpaddq		t2,t1,t1
-	vmovq		t1x,d4
-
-	# d1 += d0 >> 26
-	mov		d0,%rax
-	shr		$26,%rax
-	add		%rax,d1
-	# h0 = d0 & 0x3ffffff
-	mov		d0,%rbx
-	and		$0x3ffffff,%ebx
-
-	# d2 += d1 >> 26
-	mov		d1,%rax
-	shr		$26,%rax
-	add		%rax,d2
-	# h1 = d1 & 0x3ffffff
-	mov		d1,%rax
-	and		$0x3ffffff,%eax
-	mov		%eax,h1
-
-	# d3 += d2 >> 26
-	mov		d2,%rax
-	shr		$26,%rax
-	add		%rax,d3
-	# h2 = d2 & 0x3ffffff
-	mov		d2,%rax
-	and		$0x3ffffff,%eax
-	mov		%eax,h2
-
-	# d4 += d3 >> 26
-	mov		d3,%rax
-	shr		$26,%rax
-	add		%rax,d4
-	# h3 = d3 & 0x3ffffff
-	mov		d3,%rax
-	and		$0x3ffffff,%eax
-	mov		%eax,h3
-
-	# h0 += (d4 >> 26) * 5
-	mov		d4,%rax
-	shr		$26,%rax
-	lea		(%eax,%eax,4),%eax
-	add		%eax,%ebx
-	# h4 = d4 & 0x3ffffff
-	mov		d4,%rax
-	and		$0x3ffffff,%eax
-	mov		%eax,h4
-
-	# h1 += h0 >> 26
-	mov		%ebx,%eax
-	shr		$26,%eax
-	add		%eax,h1
-	# h0 = h0 & 0x3ffffff
-	andl		$0x3ffffff,%ebx
-	mov		%ebx,h0
-
-	add		$0x40,m
-	dec		%rcx
-	jnz		.Ldoblock4
-
-	vzeroupper
-	pop		%r13
-	pop		%r12
-	pop		%rbx
-	ret
-ENDPROC(poly1305_4block_avx2)
diff --git a/arch/x86/crypto/poly1305-sse2-x86_64.S b/arch/x86/crypto/poly1305-sse2-x86_64.S
deleted file mode 100644
index c88c670cb5fc..000000000000
--- a/arch/x86/crypto/poly1305-sse2-x86_64.S
+++ /dev/null
@@ -1,584 +0,0 @@
-/*
- * Poly1305 authenticator algorithm, RFC7539, x64 SSE2 functions
- *
- * Copyright (C) 2015 Martin Willi
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#include <linux/linkage.h>
-
-.section	.rodata.cst16.ANMASK, "aM", @progbits, 16
-.align 16
-ANMASK:	.octa 0x0000000003ffffff0000000003ffffff
-
-.section	.rodata.cst16.ORMASK, "aM", @progbits, 16
-.align 16
-ORMASK:	.octa 0x00000000010000000000000001000000
-
-.text
-
-#define h0 0x00(%rdi)
-#define h1 0x04(%rdi)
-#define h2 0x08(%rdi)
-#define h3 0x0c(%rdi)
-#define h4 0x10(%rdi)
-#define r0 0x00(%rdx)
-#define r1 0x04(%rdx)
-#define r2 0x08(%rdx)
-#define r3 0x0c(%rdx)
-#define r4 0x10(%rdx)
-#define s1 0x00(%rsp)
-#define s2 0x04(%rsp)
-#define s3 0x08(%rsp)
-#define s4 0x0c(%rsp)
-#define m %rsi
-#define h01 %xmm0
-#define h23 %xmm1
-#define h44 %xmm2
-#define t1 %xmm3
-#define t2 %xmm4
-#define t3 %xmm5
-#define t4 %xmm6
-#define mask %xmm7
-#define d0 %r8
-#define d1 %r9
-#define d2 %r10
-#define d3 %r11
-#define d4 %r12
-
-ENTRY(poly1305_block_sse2)
-	# %rdi: Accumulator h[5]
-	# %rsi: 16 byte input block m
-	# %rdx: Poly1305 key r[5]
-	# %rcx: Block count
-
-	# This single block variant tries to improve performance by doing two
-	# multiplications in parallel using SSE instructions. There is quite
-	# some quardword packing involved, hence the speedup is marginal.
-
-	push		%rbx
-	push		%r12
-	sub		$0x10,%rsp
-
-	# s1..s4 = r1..r4 * 5
-	mov		r1,%eax
-	lea		(%eax,%eax,4),%eax
-	mov		%eax,s1
-	mov		r2,%eax
-	lea		(%eax,%eax,4),%eax
-	mov		%eax,s2
-	mov		r3,%eax
-	lea		(%eax,%eax,4),%eax
-	mov		%eax,s3
-	mov		r4,%eax
-	lea		(%eax,%eax,4),%eax
-	mov		%eax,s4
-
-	movdqa		ANMASK(%rip),mask
-
-.Ldoblock:
-	# h01 = [0, h1, 0, h0]
-	# h23 = [0, h3, 0, h2]
-	# h44 = [0, h4, 0, h4]
-	movd		h0,h01
-	movd		h1,t1
-	movd		h2,h23
-	movd		h3,t2
-	movd		h4,h44
-	punpcklqdq	t1,h01
-	punpcklqdq	t2,h23
-	punpcklqdq	h44,h44
-
-	# h01 += [ (m[3-6] >> 2) & 0x3ffffff, m[0-3] & 0x3ffffff ]
-	movd		0x00(m),t1
-	movd		0x03(m),t2
-	psrld		$2,t2
-	punpcklqdq	t2,t1
-	pand		mask,t1
-	paddd		t1,h01
-	# h23 += [ (m[9-12] >> 6) & 0x3ffffff, (m[6-9] >> 4) & 0x3ffffff ]
-	movd		0x06(m),t1
-	movd		0x09(m),t2
-	psrld		$4,t1
-	psrld		$6,t2
-	punpcklqdq	t2,t1
-	pand		mask,t1
-	paddd		t1,h23
-	# h44 += [ (m[12-15] >> 8) | (1 << 24), (m[12-15] >> 8) | (1 << 24) ]
-	mov		0x0c(m),%eax
-	shr		$8,%eax
-	or		$0x01000000,%eax
-	movd		%eax,t1
-	pshufd		$0xc4,t1,t1
-	paddd		t1,h44
-
-	# t1[0] = h0 * r0 + h2 * s3
-	# t1[1] = h1 * s4 + h3 * s2
-	movd		r0,t1
-	movd		s4,t2
-	punpcklqdq	t2,t1
-	pmuludq		h01,t1
-	movd		s3,t2
-	movd		s2,t3
-	punpcklqdq	t3,t2
-	pmuludq		h23,t2
-	paddq		t2,t1
-	# t2[0] = h0 * r1 + h2 * s4
-	# t2[1] = h1 * r0 + h3 * s3
-	movd		r1,t2
-	movd		r0,t3
-	punpcklqdq	t3,t2
-	pmuludq		h01,t2
-	movd		s4,t3
-	movd		s3,t4
-	punpcklqdq	t4,t3
-	pmuludq		h23,t3
-	paddq		t3,t2
-	# t3[0] = h4 * s1
-	# t3[1] = h4 * s2
-	movd		s1,t3
-	movd		s2,t4
-	punpcklqdq	t4,t3
-	pmuludq		h44,t3
-	# d0 = t1[0] + t1[1] + t3[0]
-	# d1 = t2[0] + t2[1] + t3[1]
-	movdqa		t1,t4
-	punpcklqdq	t2,t4
-	punpckhqdq	t2,t1
-	paddq		t4,t1
-	paddq		t3,t1
-	movq		t1,d0
-	psrldq		$8,t1
-	movq		t1,d1
-
-	# t1[0] = h0 * r2 + h2 * r0
-	# t1[1] = h1 * r1 + h3 * s4
-	movd		r2,t1
-	movd		r1,t2
-	punpcklqdq 	t2,t1
-	pmuludq		h01,t1
-	movd		r0,t2
-	movd		s4,t3
-	punpcklqdq	t3,t2
-	pmuludq		h23,t2
-	paddq		t2,t1
-	# t2[0] = h0 * r3 + h2 * r1
-	# t2[1] = h1 * r2 + h3 * r0
-	movd		r3,t2
-	movd		r2,t3
-	punpcklqdq	t3,t2
-	pmuludq		h01,t2
-	movd		r1,t3
-	movd		r0,t4
-	punpcklqdq	t4,t3
-	pmuludq		h23,t3
-	paddq		t3,t2
-	# t3[0] = h4 * s3
-	# t3[1] = h4 * s4
-	movd		s3,t3
-	movd		s4,t4
-	punpcklqdq	t4,t3
-	pmuludq		h44,t3
-	# d2 = t1[0] + t1[1] + t3[0]
-	# d3 = t2[0] + t2[1] + t3[1]
-	movdqa		t1,t4
-	punpcklqdq	t2,t4
-	punpckhqdq	t2,t1
-	paddq		t4,t1
-	paddq		t3,t1
-	movq		t1,d2
-	psrldq		$8,t1
-	movq		t1,d3
-
-	# t1[0] = h0 * r4 + h2 * r2
-	# t1[1] = h1 * r3 + h3 * r1
-	movd		r4,t1
-	movd		r3,t2
-	punpcklqdq	t2,t1
-	pmuludq		h01,t1
-	movd		r2,t2
-	movd		r1,t3
-	punpcklqdq	t3,t2
-	pmuludq		h23,t2
-	paddq		t2,t1
-	# t3[0] = h4 * r0
-	movd		r0,t3
-	pmuludq		h44,t3
-	# d4 = t1[0] + t1[1] + t3[0]
-	movdqa		t1,t4
-	psrldq		$8,t4
-	paddq		t4,t1
-	paddq		t3,t1
-	movq		t1,d4
-
-	# d1 += d0 >> 26
-	mov		d0,%rax
-	shr		$26,%rax
-	add		%rax,d1
-	# h0 = d0 & 0x3ffffff
-	mov		d0,%rbx
-	and		$0x3ffffff,%ebx
-
-	# d2 += d1 >> 26
-	mov		d1,%rax
-	shr		$26,%rax
-	add		%rax,d2
-	# h1 = d1 & 0x3ffffff
-	mov		d1,%rax
-	and		$0x3ffffff,%eax
-	mov		%eax,h1
-
-	# d3 += d2 >> 26
-	mov		d2,%rax
-	shr		$26,%rax
-	add		%rax,d3
-	# h2 = d2 & 0x3ffffff
-	mov		d2,%rax
-	and		$0x3ffffff,%eax
-	mov		%eax,h2
-
-	# d4 += d3 >> 26
-	mov		d3,%rax
-	shr		$26,%rax
-	add		%rax,d4
-	# h3 = d3 & 0x3ffffff
-	mov		d3,%rax
-	and		$0x3ffffff,%eax
-	mov		%eax,h3
-
-	# h0 += (d4 >> 26) * 5
-	mov		d4,%rax
-	shr		$26,%rax
-	lea		(%eax,%eax,4),%eax
-	add		%eax,%ebx
-	# h4 = d4 & 0x3ffffff
-	mov		d4,%rax
-	and		$0x3ffffff,%eax
-	mov		%eax,h4
-
-	# h1 += h0 >> 26
-	mov		%ebx,%eax
-	shr		$26,%eax
-	add		%eax,h1
-	# h0 = h0 & 0x3ffffff
-	andl		$0x3ffffff,%ebx
-	mov		%ebx,h0
-
-	add		$0x10,m
-	dec		%rcx
-	jnz		.Ldoblock
-
-	add		$0x10,%rsp
-	pop		%r12
-	pop		%rbx
-	ret
-ENDPROC(poly1305_block_sse2)
-
-
-#define u0 0x00(%r8)
-#define u1 0x04(%r8)
-#define u2 0x08(%r8)
-#define u3 0x0c(%r8)
-#define u4 0x10(%r8)
-#define hc0 %xmm0
-#define hc1 %xmm1
-#define hc2 %xmm2
-#define hc3 %xmm5
-#define hc4 %xmm6
-#define ru0 %xmm7
-#define ru1 %xmm8
-#define ru2 %xmm9
-#define ru3 %xmm10
-#define ru4 %xmm11
-#define sv1 %xmm12
-#define sv2 %xmm13
-#define sv3 %xmm14
-#define sv4 %xmm15
-#undef d0
-#define d0 %r13
-
-ENTRY(poly1305_2block_sse2)
-	# %rdi: Accumulator h[5]
-	# %rsi: 16 byte input block m
-	# %rdx: Poly1305 key r[5]
-	# %rcx: Doubleblock count
-	# %r8:  Poly1305 derived key r^2 u[5]
-
-	# This two-block variant further improves performance by using loop
-	# unrolled block processing. This is more straight forward and does
-	# less byte shuffling, but requires a second Poly1305 key r^2:
-	# h = (h + m) * r    =>    h = (h + m1) * r^2 + m2 * r
-
-	push		%rbx
-	push		%r12
-	push		%r13
-
-	# combine r0,u0
-	movd		u0,ru0
-	movd		r0,t1
-	punpcklqdq	t1,ru0
-
-	# combine r1,u1 and s1=r1*5,v1=u1*5
-	movd		u1,ru1
-	movd		r1,t1
-	punpcklqdq	t1,ru1
-	movdqa		ru1,sv1
-	pslld		$2,sv1
-	paddd		ru1,sv1
-
-	# combine r2,u2 and s2=r2*5,v2=u2*5
-	movd		u2,ru2
-	movd		r2,t1
-	punpcklqdq	t1,ru2
-	movdqa		ru2,sv2
-	pslld		$2,sv2
-	paddd		ru2,sv2
-
-	# combine r3,u3 and s3=r3*5,v3=u3*5
-	movd		u3,ru3
-	movd		r3,t1
-	punpcklqdq	t1,ru3
-	movdqa		ru3,sv3
-	pslld		$2,sv3
-	paddd		ru3,sv3
-
-	# combine r4,u4 and s4=r4*5,v4=u4*5
-	movd		u4,ru4
-	movd		r4,t1
-	punpcklqdq	t1,ru4
-	movdqa		ru4,sv4
-	pslld		$2,sv4
-	paddd		ru4,sv4
-
-.Ldoblock2:
-	# hc0 = [ m[16-19] & 0x3ffffff, h0 + m[0-3] & 0x3ffffff ]
-	movd		0x00(m),hc0
-	movd		0x10(m),t1
-	punpcklqdq	t1,hc0
-	pand		ANMASK(%rip),hc0
-	movd		h0,t1
-	paddd		t1,hc0
-	# hc1 = [ (m[19-22] >> 2) & 0x3ffffff, h1 + (m[3-6] >> 2) & 0x3ffffff ]
-	movd		0x03(m),hc1
-	movd		0x13(m),t1
-	punpcklqdq	t1,hc1
-	psrld		$2,hc1
-	pand		ANMASK(%rip),hc1
-	movd		h1,t1
-	paddd		t1,hc1
-	# hc2 = [ (m[22-25] >> 4) & 0x3ffffff, h2 + (m[6-9] >> 4) & 0x3ffffff ]
-	movd		0x06(m),hc2
-	movd		0x16(m),t1
-	punpcklqdq	t1,hc2
-	psrld		$4,hc2
-	pand		ANMASK(%rip),hc2
-	movd		h2,t1
-	paddd		t1,hc2
-	# hc3 = [ (m[25-28] >> 6) & 0x3ffffff, h3 + (m[9-12] >> 6) & 0x3ffffff ]
-	movd		0x09(m),hc3
-	movd		0x19(m),t1
-	punpcklqdq	t1,hc3
-	psrld		$6,hc3
-	pand		ANMASK(%rip),hc3
-	movd		h3,t1
-	paddd		t1,hc3
-	# hc4 = [ (m[28-31] >> 8) | (1<<24), h4 + (m[12-15] >> 8) | (1<<24) ]
-	movd		0x0c(m),hc4
-	movd		0x1c(m),t1
-	punpcklqdq	t1,hc4
-	psrld		$8,hc4
-	por		ORMASK(%rip),hc4
-	movd		h4,t1
-	paddd		t1,hc4
-
-	# t1 = [ hc0[1] * r0, hc0[0] * u0 ]
-	movdqa		ru0,t1
-	pmuludq		hc0,t1
-	# t1 += [ hc1[1] * s4, hc1[0] * v4 ]
-	movdqa		sv4,t2
-	pmuludq		hc1,t2
-	paddq		t2,t1
-	# t1 += [ hc2[1] * s3, hc2[0] * v3 ]
-	movdqa		sv3,t2
-	pmuludq		hc2,t2
-	paddq		t2,t1
-	# t1 += [ hc3[1] * s2, hc3[0] * v2 ]
-	movdqa		sv2,t2
-	pmuludq		hc3,t2
-	paddq		t2,t1
-	# t1 += [ hc4[1] * s1, hc4[0] * v1 ]
-	movdqa		sv1,t2
-	pmuludq		hc4,t2
-	paddq		t2,t1
-	# d0 = t1[0] + t1[1]
-	movdqa		t1,t2
-	psrldq		$8,t2
-	paddq		t2,t1
-	movq		t1,d0
-
-	# t1 = [ hc0[1] * r1, hc0[0] * u1 ]
-	movdqa		ru1,t1
-	pmuludq		hc0,t1
-	# t1 += [ hc1[1] * r0, hc1[0] * u0 ]
-	movdqa		ru0,t2
-	pmuludq		hc1,t2
-	paddq		t2,t1
-	# t1 += [ hc2[1] * s4, hc2[0] * v4 ]
-	movdqa		sv4,t2
-	pmuludq		hc2,t2
-	paddq		t2,t1
-	# t1 += [ hc3[1] * s3, hc3[0] * v3 ]
-	movdqa		sv3,t2
-	pmuludq		hc3,t2
-	paddq		t2,t1
-	# t1 += [ hc4[1] * s2, hc4[0] * v2 ]
-	movdqa		sv2,t2
-	pmuludq		hc4,t2
-	paddq		t2,t1
-	# d1 = t1[0] + t1[1]
-	movdqa		t1,t2
-	psrldq		$8,t2
-	paddq		t2,t1
-	movq		t1,d1
-
-	# t1 = [ hc0[1] * r2, hc0[0] * u2 ]
-	movdqa		ru2,t1
-	pmuludq		hc0,t1
-	# t1 += [ hc1[1] * r1, hc1[0] * u1 ]
-	movdqa		ru1,t2
-	pmuludq		hc1,t2
-	paddq		t2,t1
-	# t1 += [ hc2[1] * r0, hc2[0] * u0 ]
-	movdqa		ru0,t2
-	pmuludq		hc2,t2
-	paddq		t2,t1
-	# t1 += [ hc3[1] * s4, hc3[0] * v4 ]
-	movdqa		sv4,t2
-	pmuludq		hc3,t2
-	paddq		t2,t1
-	# t1 += [ hc4[1] * s3, hc4[0] * v3 ]
-	movdqa		sv3,t2
-	pmuludq		hc4,t2
-	paddq		t2,t1
-	# d2 = t1[0] + t1[1]
-	movdqa		t1,t2
-	psrldq		$8,t2
-	paddq		t2,t1
-	movq		t1,d2
-
-	# t1 = [ hc0[1] * r3, hc0[0] * u3 ]
-	movdqa		ru3,t1
-	pmuludq		hc0,t1
-	# t1 += [ hc1[1] * r2, hc1[0] * u2 ]
-	movdqa		ru2,t2
-	pmuludq		hc1,t2
-	paddq		t2,t1
-	# t1 += [ hc2[1] * r1, hc2[0] * u1 ]
-	movdqa		ru1,t2
-	pmuludq		hc2,t2
-	paddq		t2,t1
-	# t1 += [ hc3[1] * r0, hc3[0] * u0 ]
-	movdqa		ru0,t2
-	pmuludq		hc3,t2
-	paddq		t2,t1
-	# t1 += [ hc4[1] * s4, hc4[0] * v4 ]
-	movdqa		sv4,t2
-	pmuludq		hc4,t2
-	paddq		t2,t1
-	# d3 = t1[0] + t1[1]
-	movdqa		t1,t2
-	psrldq		$8,t2
-	paddq		t2,t1
-	movq		t1,d3
-
-	# t1 = [ hc0[1] * r4, hc0[0] * u4 ]
-	movdqa		ru4,t1
-	pmuludq		hc0,t1
-	# t1 += [ hc1[1] * r3, hc1[0] * u3 ]
-	movdqa		ru3,t2
-	pmuludq		hc1,t2
-	paddq		t2,t1
-	# t1 += [ hc2[1] * r2, hc2[0] * u2 ]
-	movdqa		ru2,t2
-	pmuludq		hc2,t2
-	paddq		t2,t1
-	# t1 += [ hc3[1] * r1, hc3[0] * u1 ]
-	movdqa		ru1,t2
-	pmuludq		hc3,t2
-	paddq		t2,t1
-	# t1 += [ hc4[1] * r0, hc4[0] * u0 ]
-	movdqa		ru0,t2
-	pmuludq		hc4,t2
-	paddq		t2,t1
-	# d4 = t1[0] + t1[1]
-	movdqa		t1,t2
-	psrldq		$8,t2
-	paddq		t2,t1
-	movq		t1,d4
-
-	# d1 += d0 >> 26
-	mov		d0,%rax
-	shr		$26,%rax
-	add		%rax,d1
-	# h0 = d0 & 0x3ffffff
-	mov		d0,%rbx
-	and		$0x3ffffff,%ebx
-
-	# d2 += d1 >> 26
-	mov		d1,%rax
-	shr		$26,%rax
-	add		%rax,d2
-	# h1 = d1 & 0x3ffffff
-	mov		d1,%rax
-	and		$0x3ffffff,%eax
-	mov		%eax,h1
-
-	# d3 += d2 >> 26
-	mov		d2,%rax
-	shr		$26,%rax
-	add		%rax,d3
-	# h2 = d2 & 0x3ffffff
-	mov		d2,%rax
-	and		$0x3ffffff,%eax
-	mov		%eax,h2
-
-	# d4 += d3 >> 26
-	mov		d3,%rax
-	shr		$26,%rax
-	add		%rax,d4
-	# h3 = d3 & 0x3ffffff
-	mov		d3,%rax
-	and		$0x3ffffff,%eax
-	mov		%eax,h3
-
-	# h0 += (d4 >> 26) * 5
-	mov		d4,%rax
-	shr		$26,%rax
-	lea		(%eax,%eax,4),%eax
-	add		%eax,%ebx
-	# h4 = d4 & 0x3ffffff
-	mov		d4,%rax
-	and		$0x3ffffff,%eax
-	mov		%eax,h4
-
-	# h1 += h0 >> 26
-	mov		%ebx,%eax
-	shr		$26,%eax
-	add		%eax,h1
-	# h0 = h0 & 0x3ffffff
-	andl		$0x3ffffff,%ebx
-	mov		%ebx,h0
-
-	add		$0x20,m
-	dec		%rcx
-	jnz		.Ldoblock2
-
-	pop		%r13
-	pop		%r12
-	pop		%rbx
-	ret
-ENDPROC(poly1305_2block_sse2)
diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c
deleted file mode 100644
index f012b7e28ad1..000000000000
--- a/arch/x86/crypto/poly1305_glue.c
+++ /dev/null
@@ -1,205 +0,0 @@
-/*
- * Poly1305 authenticator algorithm, RFC7539, SIMD glue code
- *
- * Copyright (C) 2015 Martin Willi
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#include <crypto/algapi.h>
-#include <crypto/internal/hash.h>
-#include <crypto/poly1305.h>
-#include <linux/crypto.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <asm/fpu/api.h>
-#include <asm/simd.h>
-
-struct poly1305_simd_desc_ctx {
-	struct poly1305_desc_ctx base;
-	/* derived key u set? */
-	bool uset;
-#ifdef CONFIG_AS_AVX2
-	/* derived keys r^3, r^4 set? */
-	bool wset;
-#endif
-	/* derived Poly1305 key r^2 */
-	u32 u[5];
-	/* ... silently appended r^3 and r^4 when using AVX2 */
-};
-
-asmlinkage void poly1305_block_sse2(u32 *h, const u8 *src,
-				    const u32 *r, unsigned int blocks);
-asmlinkage void poly1305_2block_sse2(u32 *h, const u8 *src, const u32 *r,
-				     unsigned int blocks, const u32 *u);
-#ifdef CONFIG_AS_AVX2
-asmlinkage void poly1305_4block_avx2(u32 *h, const u8 *src, const u32 *r,
-				     unsigned int blocks, const u32 *u);
-static bool poly1305_use_avx2;
-#endif
-
-static int poly1305_simd_init(struct shash_desc *desc)
-{
-	struct poly1305_simd_desc_ctx *sctx = shash_desc_ctx(desc);
-
-	sctx->uset = false;
-#ifdef CONFIG_AS_AVX2
-	sctx->wset = false;
-#endif
-
-	return crypto_poly1305_init(desc);
-}
-
-static void poly1305_simd_mult(u32 *a, const u32 *b)
-{
-	u8 m[POLY1305_BLOCK_SIZE];
-
-	memset(m, 0, sizeof(m));
-	/* The poly1305 block function adds a hi-bit to the accumulator which
-	 * we don't need for key multiplication; compensate for it. */
-	a[4] -= 1 << 24;
-	poly1305_block_sse2(a, m, b, 1);
-}
-
-static unsigned int poly1305_simd_blocks(struct poly1305_desc_ctx *dctx,
-					 const u8 *src, unsigned int srclen)
-{
-	struct poly1305_simd_desc_ctx *sctx;
-	unsigned int blocks, datalen;
-
-	BUILD_BUG_ON(offsetof(struct poly1305_simd_desc_ctx, base));
-	sctx = container_of(dctx, struct poly1305_simd_desc_ctx, base);
-
-	if (unlikely(!dctx->sset)) {
-		datalen = crypto_poly1305_setdesckey(dctx, src, srclen);
-		src += srclen - datalen;
-		srclen = datalen;
-	}
-
-#ifdef CONFIG_AS_AVX2
-	if (poly1305_use_avx2 && srclen >= POLY1305_BLOCK_SIZE * 4) {
-		if (unlikely(!sctx->wset)) {
-			if (!sctx->uset) {
-				memcpy(sctx->u, dctx->r, sizeof(sctx->u));
-				poly1305_simd_mult(sctx->u, dctx->r);
-				sctx->uset = true;
-			}
-			memcpy(sctx->u + 5, sctx->u, sizeof(sctx->u));
-			poly1305_simd_mult(sctx->u + 5, dctx->r);
-			memcpy(sctx->u + 10, sctx->u + 5, sizeof(sctx->u));
-			poly1305_simd_mult(sctx->u + 10, dctx->r);
-			sctx->wset = true;
-		}
-		blocks = srclen / (POLY1305_BLOCK_SIZE * 4);
-		poly1305_4block_avx2(dctx->h, src, dctx->r, blocks, sctx->u);
-		src += POLY1305_BLOCK_SIZE * 4 * blocks;
-		srclen -= POLY1305_BLOCK_SIZE * 4 * blocks;
-	}
-#endif
-	if (likely(srclen >= POLY1305_BLOCK_SIZE * 2)) {
-		if (unlikely(!sctx->uset)) {
-			memcpy(sctx->u, dctx->r, sizeof(sctx->u));
-			poly1305_simd_mult(sctx->u, dctx->r);
-			sctx->uset = true;
-		}
-		blocks = srclen / (POLY1305_BLOCK_SIZE * 2);
-		poly1305_2block_sse2(dctx->h, src, dctx->r, blocks, sctx->u);
-		src += POLY1305_BLOCK_SIZE * 2 * blocks;
-		srclen -= POLY1305_BLOCK_SIZE * 2 * blocks;
-	}
-	if (srclen >= POLY1305_BLOCK_SIZE) {
-		poly1305_block_sse2(dctx->h, src, dctx->r, 1);
-		srclen -= POLY1305_BLOCK_SIZE;
-	}
-	return srclen;
-}
-
-static int poly1305_simd_update(struct shash_desc *desc,
-				const u8 *src, unsigned int srclen)
-{
-	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
-	unsigned int bytes;
-
-	/* kernel_fpu_begin/end is costly, use fallback for small updates */
-	if (srclen <= 288 || !may_use_simd())
-		return crypto_poly1305_update(desc, src, srclen);
-
-	kernel_fpu_begin();
-
-	if (unlikely(dctx->buflen)) {
-		bytes = min(srclen, POLY1305_BLOCK_SIZE - dctx->buflen);
-		memcpy(dctx->buf + dctx->buflen, src, bytes);
-		src += bytes;
-		srclen -= bytes;
-		dctx->buflen += bytes;
-
-		if (dctx->buflen == POLY1305_BLOCK_SIZE) {
-			poly1305_simd_blocks(dctx, dctx->buf,
-					     POLY1305_BLOCK_SIZE);
-			dctx->buflen = 0;
-		}
-	}
-
-	if (likely(srclen >= POLY1305_BLOCK_SIZE)) {
-		bytes = poly1305_simd_blocks(dctx, src, srclen);
-		src += srclen - bytes;
-		srclen = bytes;
-	}
-
-	kernel_fpu_end();
-
-	if (unlikely(srclen)) {
-		dctx->buflen = srclen;
-		memcpy(dctx->buf, src, srclen);
-	}
-
-	return 0;
-}
-
-static struct shash_alg alg = {
-	.digestsize	= POLY1305_DIGEST_SIZE,
-	.init		= poly1305_simd_init,
-	.update		= poly1305_simd_update,
-	.final		= crypto_poly1305_final,
-	.descsize	= sizeof(struct poly1305_simd_desc_ctx),
-	.base		= {
-		.cra_name		= "poly1305",
-		.cra_driver_name	= "poly1305-simd",
-		.cra_priority		= 300,
-		.cra_blocksize		= POLY1305_BLOCK_SIZE,
-		.cra_module		= THIS_MODULE,
-	},
-};
-
-static int __init poly1305_simd_mod_init(void)
-{
-	if (!boot_cpu_has(X86_FEATURE_XMM2))
-		return -ENODEV;
-
-#ifdef CONFIG_AS_AVX2
-	poly1305_use_avx2 = boot_cpu_has(X86_FEATURE_AVX) &&
-			    boot_cpu_has(X86_FEATURE_AVX2) &&
-			    cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
-	alg.descsize = sizeof(struct poly1305_simd_desc_ctx);
-	if (poly1305_use_avx2)
-		alg.descsize += 10 * sizeof(u32);
-#endif
-	return crypto_register_shash(&alg);
-}
-
-static void __exit poly1305_simd_mod_exit(void)
-{
-	crypto_unregister_shash(&alg);
-}
-
-module_init(poly1305_simd_mod_init);
-module_exit(poly1305_simd_mod_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Martin Willi <martin@strongswan.org>");
-MODULE_DESCRIPTION("Poly1305 authenticator");
-MODULE_ALIAS_CRYPTO("poly1305");
-MODULE_ALIAS_CRYPTO("poly1305-simd");
diff --git a/crypto/Kconfig b/crypto/Kconfig
index f3e40ac56d93..47859a0f8052 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -656,24 +656,13 @@ config CRYPTO_GHASH
 config CRYPTO_POLY1305
 	tristate "Poly1305 authenticator algorithm"
 	select CRYPTO_HASH
+	select ZINC_POLY1305
 	help
 	  Poly1305 authenticator algorithm, RFC7539.
 
 	  Poly1305 is an authenticator algorithm designed by Daniel J. Bernstein.
 	  It is used for the ChaCha20-Poly1305 AEAD, specified in RFC7539 for use
-	  in IETF protocols. This is the portable C implementation of Poly1305.
-
-config CRYPTO_POLY1305_X86_64
-	tristate "Poly1305 authenticator algorithm (x86_64/SSE2/AVX2)"
-	depends on X86 && 64BIT
-	select CRYPTO_POLY1305
-	help
-	  Poly1305 authenticator algorithm, RFC7539.
-
-	  Poly1305 is an authenticator algorithm designed by Daniel J. Bernstein.
-	  It is used for the ChaCha20-Poly1305 AEAD, specified in RFC7539 for use
-	  in IETF protocols. This is the x86_64 assembler implementation using SIMD
-	  instructions.
+	  in IETF protocols.
 
 config CRYPTO_MD4
 	tristate "MD4 digest algorithm"
diff --git a/crypto/Makefile b/crypto/Makefile
index 6d1d40eeb964..5e60348d02e2 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -118,7 +118,7 @@ obj-$(CONFIG_CRYPTO_SEED) += seed.o
 obj-$(CONFIG_CRYPTO_SPECK) += speck.o
 obj-$(CONFIG_CRYPTO_SALSA20) += salsa20_generic.o
 obj-$(CONFIG_CRYPTO_CHACHA20) += chacha20_generic.o
-obj-$(CONFIG_CRYPTO_POLY1305) += poly1305_generic.o
+obj-$(CONFIG_CRYPTO_POLY1305) += poly1305_zinc.o
 obj-$(CONFIG_CRYPTO_DEFLATE) += deflate.o
 obj-$(CONFIG_CRYPTO_MICHAEL_MIC) += michael_mic.o
 obj-$(CONFIG_CRYPTO_CRC32C) += crc32c_generic.o
diff --git a/crypto/chacha20poly1305.c b/crypto/chacha20poly1305.c
index 600afa99941f..bf523797bef3 100644
--- a/crypto/chacha20poly1305.c
+++ b/crypto/chacha20poly1305.c
@@ -14,7 +14,7 @@
 #include <crypto/internal/skcipher.h>
 #include <crypto/scatterwalk.h>
 #include <crypto/chacha20.h>
-#include <crypto/poly1305.h>
+#include <zinc/poly1305.h>
 #include <linux/err.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
@@ -62,7 +62,7 @@ struct chachapoly_req_ctx {
 	/* the key we generate for Poly1305 using Chacha20 */
 	u8 key[POLY1305_KEY_SIZE];
 	/* calculated Poly1305 tag */
-	u8 tag[POLY1305_DIGEST_SIZE];
+	u8 tag[POLY1305_MAC_SIZE];
 	/* length of data to en/decrypt, without ICV */
 	unsigned int cryptlen;
 	/* Actual AD, excluding IV */
@@ -471,7 +471,7 @@ static int chachapoly_decrypt(struct aead_request *req)
 {
 	struct chachapoly_req_ctx *rctx = aead_request_ctx(req);
 
-	rctx->cryptlen = req->cryptlen - POLY1305_DIGEST_SIZE;
+	rctx->cryptlen = req->cryptlen - POLY1305_MAC_SIZE;
 
 	/* decrypt call chain:
 	 * - poly_genkey/done()
@@ -513,7 +513,7 @@ static int chachapoly_setkey(struct crypto_aead *aead, const u8 *key,
 static int chachapoly_setauthsize(struct crypto_aead *tfm,
 				  unsigned int authsize)
 {
-	if (authsize != POLY1305_DIGEST_SIZE)
+	if (authsize != POLY1305_MAC_SIZE)
 		return -EINVAL;
 
 	return 0;
@@ -613,7 +613,7 @@ static int chachapoly_create(struct crypto_template *tmpl, struct rtattr **tb,
 	poly_hash = __crypto_hash_alg_common(poly);
 
 	err = -EINVAL;
-	if (poly_hash->digestsize != POLY1305_DIGEST_SIZE)
+	if (poly_hash->digestsize != POLY1305_MAC_SIZE)
 		goto out_put_poly;
 
 	err = -ENOMEM;
@@ -666,7 +666,7 @@ static int chachapoly_create(struct crypto_template *tmpl, struct rtattr **tb,
 				     ctx->saltlen;
 	inst->alg.ivsize = ivsize;
 	inst->alg.chunksize = crypto_skcipher_alg_chunksize(chacha);
-	inst->alg.maxauthsize = POLY1305_DIGEST_SIZE;
+	inst->alg.maxauthsize = POLY1305_MAC_SIZE;
 	inst->alg.init = chachapoly_init;
 	inst->alg.exit = chachapoly_exit;
 	inst->alg.encrypt = chachapoly_encrypt;
diff --git a/crypto/poly1305_generic.c b/crypto/poly1305_generic.c
deleted file mode 100644
index 47d3a6b83931..000000000000
--- a/crypto/poly1305_generic.c
+++ /dev/null
@@ -1,304 +0,0 @@
-/*
- * Poly1305 authenticator algorithm, RFC7539
- *
- * Copyright (C) 2015 Martin Willi
- *
- * Based on public domain code by Andrew Moon and Daniel J. Bernstein.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#include <crypto/algapi.h>
-#include <crypto/internal/hash.h>
-#include <crypto/poly1305.h>
-#include <linux/crypto.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <asm/unaligned.h>
-
-static inline u64 mlt(u64 a, u64 b)
-{
-	return a * b;
-}
-
-static inline u32 sr(u64 v, u_char n)
-{
-	return v >> n;
-}
-
-static inline u32 and(u32 v, u32 mask)
-{
-	return v & mask;
-}
-
-int crypto_poly1305_init(struct shash_desc *desc)
-{
-	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
-
-	memset(dctx->h, 0, sizeof(dctx->h));
-	dctx->buflen = 0;
-	dctx->rset = false;
-	dctx->sset = false;
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(crypto_poly1305_init);
-
-static void poly1305_setrkey(struct poly1305_desc_ctx *dctx, const u8 *key)
-{
-	/* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
-	dctx->r[0] = (get_unaligned_le32(key +  0) >> 0) & 0x3ffffff;
-	dctx->r[1] = (get_unaligned_le32(key +  3) >> 2) & 0x3ffff03;
-	dctx->r[2] = (get_unaligned_le32(key +  6) >> 4) & 0x3ffc0ff;
-	dctx->r[3] = (get_unaligned_le32(key +  9) >> 6) & 0x3f03fff;
-	dctx->r[4] = (get_unaligned_le32(key + 12) >> 8) & 0x00fffff;
-}
-
-static void poly1305_setskey(struct poly1305_desc_ctx *dctx, const u8 *key)
-{
-	dctx->s[0] = get_unaligned_le32(key +  0);
-	dctx->s[1] = get_unaligned_le32(key +  4);
-	dctx->s[2] = get_unaligned_le32(key +  8);
-	dctx->s[3] = get_unaligned_le32(key + 12);
-}
-
-/*
- * Poly1305 requires a unique key for each tag, which implies that we can't set
- * it on the tfm that gets accessed by multiple users simultaneously. Instead we
- * expect the key as the first 32 bytes in the update() call.
- */
-unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx,
-					const u8 *src, unsigned int srclen)
-{
-	if (!dctx->sset) {
-		if (!dctx->rset && srclen >= POLY1305_BLOCK_SIZE) {
-			poly1305_setrkey(dctx, src);
-			src += POLY1305_BLOCK_SIZE;
-			srclen -= POLY1305_BLOCK_SIZE;
-			dctx->rset = true;
-		}
-		if (srclen >= POLY1305_BLOCK_SIZE) {
-			poly1305_setskey(dctx, src);
-			src += POLY1305_BLOCK_SIZE;
-			srclen -= POLY1305_BLOCK_SIZE;
-			dctx->sset = true;
-		}
-	}
-	return srclen;
-}
-EXPORT_SYMBOL_GPL(crypto_poly1305_setdesckey);
-
-static unsigned int poly1305_blocks(struct poly1305_desc_ctx *dctx,
-				    const u8 *src, unsigned int srclen,
-				    u32 hibit)
-{
-	u32 r0, r1, r2, r3, r4;
-	u32 s1, s2, s3, s4;
-	u32 h0, h1, h2, h3, h4;
-	u64 d0, d1, d2, d3, d4;
-	unsigned int datalen;
-
-	if (unlikely(!dctx->sset)) {
-		datalen = crypto_poly1305_setdesckey(dctx, src, srclen);
-		src += srclen - datalen;
-		srclen = datalen;
-	}
-
-	r0 = dctx->r[0];
-	r1 = dctx->r[1];
-	r2 = dctx->r[2];
-	r3 = dctx->r[3];
-	r4 = dctx->r[4];
-
-	s1 = r1 * 5;
-	s2 = r2 * 5;
-	s3 = r3 * 5;
-	s4 = r4 * 5;
-
-	h0 = dctx->h[0];
-	h1 = dctx->h[1];
-	h2 = dctx->h[2];
-	h3 = dctx->h[3];
-	h4 = dctx->h[4];
-
-	while (likely(srclen >= POLY1305_BLOCK_SIZE)) {
-
-		/* h += m[i] */
-		h0 += (get_unaligned_le32(src +  0) >> 0) & 0x3ffffff;
-		h1 += (get_unaligned_le32(src +  3) >> 2) & 0x3ffffff;
-		h2 += (get_unaligned_le32(src +  6) >> 4) & 0x3ffffff;
-		h3 += (get_unaligned_le32(src +  9) >> 6) & 0x3ffffff;
-		h4 += (get_unaligned_le32(src + 12) >> 8) | hibit;
-
-		/* h *= r */
-		d0 = mlt(h0, r0) + mlt(h1, s4) + mlt(h2, s3) +
-		     mlt(h3, s2) + mlt(h4, s1);
-		d1 = mlt(h0, r1) + mlt(h1, r0) + mlt(h2, s4) +
-		     mlt(h3, s3) + mlt(h4, s2);
-		d2 = mlt(h0, r2) + mlt(h1, r1) + mlt(h2, r0) +
-		     mlt(h3, s4) + mlt(h4, s3);
-		d3 = mlt(h0, r3) + mlt(h1, r2) + mlt(h2, r1) +
-		     mlt(h3, r0) + mlt(h4, s4);
-		d4 = mlt(h0, r4) + mlt(h1, r3) + mlt(h2, r2) +
-		     mlt(h3, r1) + mlt(h4, r0);
-
-		/* (partial) h %= p */
-		d1 += sr(d0, 26);     h0 = and(d0, 0x3ffffff);
-		d2 += sr(d1, 26);     h1 = and(d1, 0x3ffffff);
-		d3 += sr(d2, 26);     h2 = and(d2, 0x3ffffff);
-		d4 += sr(d3, 26);     h3 = and(d3, 0x3ffffff);
-		h0 += sr(d4, 26) * 5; h4 = and(d4, 0x3ffffff);
-		h1 += h0 >> 26;       h0 = h0 & 0x3ffffff;
-
-		src += POLY1305_BLOCK_SIZE;
-		srclen -= POLY1305_BLOCK_SIZE;
-	}
-
-	dctx->h[0] = h0;
-	dctx->h[1] = h1;
-	dctx->h[2] = h2;
-	dctx->h[3] = h3;
-	dctx->h[4] = h4;
-
-	return srclen;
-}
-
-int crypto_poly1305_update(struct shash_desc *desc,
-			   const u8 *src, unsigned int srclen)
-{
-	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
-	unsigned int bytes;
-
-	if (unlikely(dctx->buflen)) {
-		bytes = min(srclen, POLY1305_BLOCK_SIZE - dctx->buflen);
-		memcpy(dctx->buf + dctx->buflen, src, bytes);
-		src += bytes;
-		srclen -= bytes;
-		dctx->buflen += bytes;
-
-		if (dctx->buflen == POLY1305_BLOCK_SIZE) {
-			poly1305_blocks(dctx, dctx->buf,
-					POLY1305_BLOCK_SIZE, 1 << 24);
-			dctx->buflen = 0;
-		}
-	}
-
-	if (likely(srclen >= POLY1305_BLOCK_SIZE)) {
-		bytes = poly1305_blocks(dctx, src, srclen, 1 << 24);
-		src += srclen - bytes;
-		srclen = bytes;
-	}
-
-	if (unlikely(srclen)) {
-		dctx->buflen = srclen;
-		memcpy(dctx->buf, src, srclen);
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(crypto_poly1305_update);
-
-int crypto_poly1305_final(struct shash_desc *desc, u8 *dst)
-{
-	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
-	u32 h0, h1, h2, h3, h4;
-	u32 g0, g1, g2, g3, g4;
-	u32 mask;
-	u64 f = 0;
-
-	if (unlikely(!dctx->sset))
-		return -ENOKEY;
-
-	if (unlikely(dctx->buflen)) {
-		dctx->buf[dctx->buflen++] = 1;
-		memset(dctx->buf + dctx->buflen, 0,
-		       POLY1305_BLOCK_SIZE - dctx->buflen);
-		poly1305_blocks(dctx, dctx->buf, POLY1305_BLOCK_SIZE, 0);
-	}
-
-	/* fully carry h */
-	h0 = dctx->h[0];
-	h1 = dctx->h[1];
-	h2 = dctx->h[2];
-	h3 = dctx->h[3];
-	h4 = dctx->h[4];
-
-	h2 += (h1 >> 26);     h1 = h1 & 0x3ffffff;
-	h3 += (h2 >> 26);     h2 = h2 & 0x3ffffff;
-	h4 += (h3 >> 26);     h3 = h3 & 0x3ffffff;
-	h0 += (h4 >> 26) * 5; h4 = h4 & 0x3ffffff;
-	h1 += (h0 >> 26);     h0 = h0 & 0x3ffffff;
-
-	/* compute h + -p */
-	g0 = h0 + 5;
-	g1 = h1 + (g0 >> 26);             g0 &= 0x3ffffff;
-	g2 = h2 + (g1 >> 26);             g1 &= 0x3ffffff;
-	g3 = h3 + (g2 >> 26);             g2 &= 0x3ffffff;
-	g4 = h4 + (g3 >> 26) - (1 << 26); g3 &= 0x3ffffff;
-
-	/* select h if h < p, or h + -p if h >= p */
-	mask = (g4 >> ((sizeof(u32) * 8) - 1)) - 1;
-	g0 &= mask;
-	g1 &= mask;
-	g2 &= mask;
-	g3 &= mask;
-	g4 &= mask;
-	mask = ~mask;
-	h0 = (h0 & mask) | g0;
-	h1 = (h1 & mask) | g1;
-	h2 = (h2 & mask) | g2;
-	h3 = (h3 & mask) | g3;
-	h4 = (h4 & mask) | g4;
-
-	/* h = h % (2^128) */
-	h0 = (h0 >>  0) | (h1 << 26);
-	h1 = (h1 >>  6) | (h2 << 20);
-	h2 = (h2 >> 12) | (h3 << 14);
-	h3 = (h3 >> 18) | (h4 <<  8);
-
-	/* mac = (h + s) % (2^128) */
-	f = (f >> 32) + h0 + dctx->s[0]; put_unaligned_le32(f, dst +  0);
-	f = (f >> 32) + h1 + dctx->s[1]; put_unaligned_le32(f, dst +  4);
-	f = (f >> 32) + h2 + dctx->s[2]; put_unaligned_le32(f, dst +  8);
-	f = (f >> 32) + h3 + dctx->s[3]; put_unaligned_le32(f, dst + 12);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(crypto_poly1305_final);
-
-static struct shash_alg poly1305_alg = {
-	.digestsize	= POLY1305_DIGEST_SIZE,
-	.init		= crypto_poly1305_init,
-	.update		= crypto_poly1305_update,
-	.final		= crypto_poly1305_final,
-	.descsize	= sizeof(struct poly1305_desc_ctx),
-	.base		= {
-		.cra_name		= "poly1305",
-		.cra_driver_name	= "poly1305-generic",
-		.cra_priority		= 100,
-		.cra_blocksize		= POLY1305_BLOCK_SIZE,
-		.cra_module		= THIS_MODULE,
-	},
-};
-
-static int __init poly1305_mod_init(void)
-{
-	return crypto_register_shash(&poly1305_alg);
-}
-
-static void __exit poly1305_mod_exit(void)
-{
-	crypto_unregister_shash(&poly1305_alg);
-}
-
-module_init(poly1305_mod_init);
-module_exit(poly1305_mod_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Martin Willi <martin@strongswan.org>");
-MODULE_DESCRIPTION("Poly1305 authenticator");
-MODULE_ALIAS_CRYPTO("poly1305");
-MODULE_ALIAS_CRYPTO("poly1305-generic");
diff --git a/crypto/poly1305_zinc.c b/crypto/poly1305_zinc.c
new file mode 100644
index 000000000000..630bbb1ba230
--- /dev/null
+++ b/crypto/poly1305_zinc.c
@@ -0,0 +1,92 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (C) 2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#include <crypto/algapi.h>
+#include <crypto/internal/hash.h>
+#include <zinc/poly1305.h>
+#include <linux/crypto.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/simd.h>
+
+struct poly1305_desc_ctx {
+	struct poly1305_ctx ctx;
+	u8 key[POLY1305_KEY_SIZE];
+	unsigned int rem_key_bytes;
+};
+
+static int crypto_poly1305_init(struct shash_desc *desc)
+{
+	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+	dctx->rem_key_bytes = POLY1305_KEY_SIZE;
+	return 0;
+}
+
+static int crypto_poly1305_update(struct shash_desc *desc, const u8 *src,
+				  unsigned int srclen)
+{
+	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+	simd_context_t simd_context = simd_get();
+
+	if (unlikely(dctx->rem_key_bytes)) {
+		unsigned int key_bytes = min(srclen, dctx->rem_key_bytes);
+		memcpy(dctx->key + (POLY1305_KEY_SIZE - dctx->rem_key_bytes),
+		       src, key_bytes);
+		src += key_bytes;
+		srclen -= key_bytes;
+		dctx->rem_key_bytes -= key_bytes;
+		if (!dctx->rem_key_bytes) {
+			poly1305_init(&dctx->ctx, dctx->key, simd_context);
+			memzero_explicit(dctx->key, sizeof(dctx->key));
+		}
+	}
+	poly1305_update(&dctx->ctx, src, srclen, simd_context);
+	simd_put(simd_context);
+
+	return 0;
+}
+
+static int crypto_poly1305_final(struct shash_desc *desc, u8 *dst)
+{
+	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+	simd_context_t simd_context = simd_get();
+	poly1305_final(&dctx->ctx, dst, simd_context);
+	simd_put(simd_context);
+	return 0;
+}
+
+static struct shash_alg poly1305_alg = {
+	.digestsize	= POLY1305_MAC_SIZE,
+	.init		= crypto_poly1305_init,
+	.update		= crypto_poly1305_update,
+	.final		= crypto_poly1305_final,
+	.descsize	= sizeof(struct poly1305_desc_ctx),
+	.base		= {
+		.cra_name		= "poly1305",
+		.cra_driver_name	= "poly1305-software",
+		.cra_priority		= 100,
+		.cra_blocksize		= POLY1305_BLOCK_SIZE,
+		.cra_module		= THIS_MODULE,
+	},
+};
+
+static int __init poly1305_mod_init(void)
+{
+	return crypto_register_shash(&poly1305_alg);
+}
+
+static void __exit poly1305_mod_exit(void)
+{
+	crypto_unregister_shash(&poly1305_alg);
+}
+
+module_init(poly1305_mod_init);
+module_exit(poly1305_mod_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");
+MODULE_DESCRIPTION("Poly1305 authenticator");
+MODULE_ALIAS_CRYPTO("poly1305");
+MODULE_ALIAS_CRYPTO("poly1305-software");
diff --git a/include/crypto/poly1305.h b/include/crypto/poly1305.h
deleted file mode 100644
index f718a19da82f..000000000000
--- a/include/crypto/poly1305.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Common values for the Poly1305 algorithm
- */
-
-#ifndef _CRYPTO_POLY1305_H
-#define _CRYPTO_POLY1305_H
-
-#include <linux/types.h>
-#include <linux/crypto.h>
-
-#define POLY1305_BLOCK_SIZE	16
-#define POLY1305_KEY_SIZE	32
-#define POLY1305_DIGEST_SIZE	16
-
-struct poly1305_desc_ctx {
-	/* key */
-	u32 r[5];
-	/* finalize key */
-	u32 s[4];
-	/* accumulator */
-	u32 h[5];
-	/* partial buffer */
-	u8 buf[POLY1305_BLOCK_SIZE];
-	/* bytes used in partial buffer */
-	unsigned int buflen;
-	/* r key has been set */
-	bool rset;
-	/* s key has been set */
-	bool sset;
-};
-
-int crypto_poly1305_init(struct shash_desc *desc);
-unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx,
-					const u8 *src, unsigned int srclen);
-int crypto_poly1305_update(struct shash_desc *desc,
-			   const u8 *src, unsigned int srclen);
-int crypto_poly1305_final(struct shash_desc *desc, u8 *dst);
-
-#endif
-- 
2.19.0

^ permalink raw reply related

* [PATCH net-next v4 12/20] zinc: BLAKE2s generic C implementation and selftest
From: Jason A. Donenfeld @ 2018-09-14 16:22 UTC (permalink / raw)
  To: linux-kernel, netdev, linux-crypto, davem, gregkh
  Cc: Jason A. Donenfeld, Samuel Neves, Andy Lutomirski,
	Jean-Philippe Aumasson
In-Reply-To: <20180914162240.7925-1-Jason@zx2c4.com>

The C implementation was originally based on Samuel Neves' public
domain reference implementation but has since been heavily modified
for the kernel. We're able to do compile-time optimizations by moving
some scaffolding around the final function into the header file.

Information: https://blake2.net/

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Samuel Neves <sneves@dei.uc.pt>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Jean-Philippe Aumasson <jeanphilippe.aumasson@gmail.com>
---
 include/zinc/blake2s.h      |  101 ++
 lib/zinc/Kconfig            |    4 +
 lib/zinc/Makefile           |    4 +
 lib/zinc/blake2s/blake2s.c  |  274 +++++
 lib/zinc/main.c             |    5 +
 lib/zinc/selftest/blake2s.h | 2095 +++++++++++++++++++++++++++++++++++
 6 files changed, 2483 insertions(+)
 create mode 100644 include/zinc/blake2s.h
 create mode 100644 lib/zinc/blake2s/blake2s.c
 create mode 100644 lib/zinc/selftest/blake2s.h

diff --git a/include/zinc/blake2s.h b/include/zinc/blake2s.h
new file mode 100644
index 000000000000..5e32d576e86a
--- /dev/null
+++ b/include/zinc/blake2s.h
@@ -0,0 +1,101 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (C) 2015-2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#ifndef _ZINC_BLAKE2S_H
+#define _ZINC_BLAKE2S_H
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <crypto/algapi.h>
+
+enum blake2s_lengths {
+	BLAKE2S_BLOCKBYTES = 64,
+	BLAKE2S_OUTBYTES = 32,
+	BLAKE2S_KEYBYTES = 32
+};
+
+struct blake2s_state {
+	u32 h[8];
+	u32 t[2];
+	u32 f[2];
+	u8 buf[BLAKE2S_BLOCKBYTES];
+	size_t buflen;
+	u8 last_node;
+};
+
+void blake2s_init(struct blake2s_state *state, const size_t outlen);
+void blake2s_init_key(struct blake2s_state *state, const size_t outlen,
+		      const void *key, const size_t keylen);
+void blake2s_update(struct blake2s_state *state, const u8 *in, size_t inlen);
+void __blake2s_final(struct blake2s_state *state);
+static inline void blake2s_final(struct blake2s_state *state, u8 *out,
+				 const size_t outlen)
+{
+	int i;
+
+#ifdef DEBUG
+	BUG_ON(!out || !outlen || outlen > BLAKE2S_OUTBYTES);
+#endif
+	__blake2s_final(state);
+
+	if (__builtin_constant_p(outlen) && !(outlen % sizeof(u32))) {
+		if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ||
+		    IS_ALIGNED((unsigned long)out, __alignof__(u32))) {
+			__le32 *outwords = (__le32 *)out;
+
+			for (i = 0; i < outlen / sizeof(u32); ++i)
+				outwords[i] = cpu_to_le32(state->h[i]);
+		} else {
+			__le32 buffer[BLAKE2S_OUTBYTES];
+
+			for (i = 0; i < outlen / sizeof(u32); ++i)
+				buffer[i] = cpu_to_le32(state->h[i]);
+			memcpy(out, buffer, outlen);
+			memzero_explicit(buffer, sizeof(buffer));
+		}
+	} else {
+		u8 buffer[BLAKE2S_OUTBYTES] __aligned(__alignof__(u32));
+		__le32 *outwords = (__le32 *)buffer;
+
+		for (i = 0; i < 8; ++i)
+			outwords[i] = cpu_to_le32(state->h[i]);
+		memcpy(out, buffer, outlen);
+		memzero_explicit(buffer, sizeof(buffer));
+	}
+
+	memzero_explicit(state, sizeof(*state));
+}
+
+static inline void blake2s(u8 *out, const u8 *in, const u8 *key,
+			   const size_t outlen, const size_t inlen,
+			   const size_t keylen)
+{
+	struct blake2s_state state;
+
+#ifdef DEBUG
+	BUG_ON((!in && inlen > 0) || !out || !outlen ||
+	       outlen > BLAKE2S_OUTBYTES || keylen > BLAKE2S_KEYBYTES ||
+	       (!key && keylen));
+#endif
+
+	if (keylen)
+		blake2s_init_key(&state, outlen, key, keylen);
+	else
+		blake2s_init(&state, outlen);
+
+	blake2s_update(&state, in, inlen);
+	blake2s_final(&state, out, outlen);
+}
+
+void blake2s_hmac(u8 *out, const u8 *in, const u8 *key, const size_t outlen,
+		  const size_t inlen, const size_t keylen);
+
+void blake2s_fpu_init(void);
+
+#ifdef DEBUG
+bool blake2s_selftest(void);
+#endif
+
+#endif /* _ZINC_BLAKE2S_H */
diff --git a/lib/zinc/Kconfig b/lib/zinc/Kconfig
index 98d095ed2418..4db30012c2d6 100644
--- a/lib/zinc/Kconfig
+++ b/lib/zinc/Kconfig
@@ -17,6 +17,10 @@ config ZINC_CHACHA20POLY1305
 	select ZINC_POLY1305
 	select CRYPTO_BLKCIPHER
 
+config ZINC_BLAKE2S
+	bool
+	select ZINC
+
 config ZINC_DEBUG
 	bool "Zinc cryptography library debugging and self-tests"
 	depends on ZINC
diff --git a/lib/zinc/Makefile b/lib/zinc/Makefile
index 1664871aaf71..45817ec5539c 100644
--- a/lib/zinc/Makefile
+++ b/lib/zinc/Makefile
@@ -51,6 +51,10 @@ ifeq ($(CONFIG_ZINC_CHACHA20POLY1305),y)
 zinc-y += chacha20poly1305.o
 endif
 
+ifeq ($(CONFIG_ZINC_BLAKE2S),y)
+zinc-y += blake2s/blake2s.o
+endif
+
 zinc-y += main.o
 
 obj-$(CONFIG_ZINC) := zinc.o
diff --git a/lib/zinc/blake2s/blake2s.c b/lib/zinc/blake2s/blake2s.c
new file mode 100644
index 000000000000..13765d327589
--- /dev/null
+++ b/lib/zinc/blake2s/blake2s.c
@@ -0,0 +1,274 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (C) 2012 Samuel Neves <sneves@dei.uc.pt>. All Rights Reserved.
+ * Copyright (C) 2015-2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ *
+ * This is an implementation of the BLAKE2s hash and PRF functions.
+ *
+ * Information: https://blake2.net/
+ *
+ */
+
+#include <zinc/blake2s.h>
+
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/bug.h>
+#include <asm/unaligned.h>
+
+typedef union {
+	struct {
+		u8 digest_length;
+		u8 key_length;
+		u8 fanout;
+		u8 depth;
+		u32 leaf_length;
+		u32 node_offset;
+		u16 xof_length;
+		u8 node_depth;
+		u8 inner_length;
+		u8 salt[8];
+		u8 personal[8];
+	};
+	__le32 words[8];
+} __packed blake2s_param;
+
+static const u32 blake2s_iv[8] = {
+	0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL,
+	0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL
+};
+
+static const u8 blake2s_sigma[10][16] = {
+	{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
+	{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 },
+	{ 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 },
+	{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 },
+	{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 },
+	{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 },
+	{ 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 },
+	{ 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 },
+	{ 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 },
+	{ 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 },
+};
+
+static inline void blake2s_set_lastblock(struct blake2s_state *state)
+{
+	if (state->last_node)
+		state->f[1] = -1;
+	state->f[0] = -1;
+}
+
+static inline void blake2s_increment_counter(struct blake2s_state *state,
+					     const u32 inc)
+{
+	state->t[0] += inc;
+	state->t[1] += (state->t[0] < inc);
+}
+
+static inline void blake2s_init_param(struct blake2s_state *state,
+				      const blake2s_param *param)
+{
+	int i;
+
+	memset(state, 0, sizeof(*state));
+	for (i = 0; i < 8; ++i)
+		state->h[i] = blake2s_iv[i] ^ le32_to_cpu(param->words[i]);
+}
+
+void blake2s_init(struct blake2s_state *state, const size_t outlen)
+{
+	blake2s_param param __aligned(__alignof__(u32)) = {
+		.digest_length = outlen,
+		.fanout = 1,
+		.depth = 1
+	};
+
+#ifdef DEBUG
+	BUG_ON(!outlen || outlen > BLAKE2S_OUTBYTES);
+#endif
+	blake2s_init_param(state, &param);
+}
+EXPORT_SYMBOL(blake2s_init);
+
+void blake2s_init_key(struct blake2s_state *state, const size_t outlen,
+		      const void *key, const size_t keylen)
+{
+	blake2s_param param = { .digest_length = outlen,
+				.key_length = keylen,
+				.fanout = 1,
+				.depth = 1 };
+	u8 block[BLAKE2S_BLOCKBYTES] = { 0 };
+
+#ifdef DEBUG
+	BUG_ON(!outlen || outlen > BLAKE2S_OUTBYTES || !key || !keylen ||
+	       keylen > BLAKE2S_KEYBYTES);
+#endif
+	blake2s_init_param(state, &param);
+	memcpy(block, key, keylen);
+	blake2s_update(state, block, BLAKE2S_BLOCKBYTES);
+	memzero_explicit(block, BLAKE2S_BLOCKBYTES);
+}
+EXPORT_SYMBOL(blake2s_init_key);
+
+#ifndef HAVE_BLAKE2S_ARCH_IMPLEMENTATION
+void __init blake2s_fpu_init(void)
+{
+}
+static inline bool blake2s_arch(struct blake2s_state *state, const u8 *block,
+				const size_t nblocks, const u32 inc)
+{
+	return false;
+}
+#endif
+
+static inline void blake2s_compress(struct blake2s_state *state,
+				    const u8 *block, size_t nblocks,
+				    const u32 inc)
+{
+	u32 m[16];
+	u32 v[16];
+	int i;
+
+#ifdef DEBUG
+	BUG_ON(nblocks > 1 && inc != BLAKE2S_BLOCKBYTES);
+#endif
+
+	if (blake2s_arch(state, block, nblocks, inc))
+		return;
+
+	while (nblocks > 0) {
+		blake2s_increment_counter(state, inc);
+
+#ifdef __LITTLE_ENDIAN
+		memcpy(m, block, BLAKE2S_BLOCKBYTES);
+#else
+		for (i = 0; i < 16; ++i)
+			m[i] = get_unaligned_le32(block + i * sizeof(m[i]));
+#endif
+		memcpy(v, state->h, 32);
+		v[ 8] = blake2s_iv[0];
+		v[ 9] = blake2s_iv[1];
+		v[10] = blake2s_iv[2];
+		v[11] = blake2s_iv[3];
+		v[12] = blake2s_iv[4] ^ state->t[0];
+		v[13] = blake2s_iv[5] ^ state->t[1];
+		v[14] = blake2s_iv[6] ^ state->f[0];
+		v[15] = blake2s_iv[7] ^ state->f[1];
+
+#define G(r, i, a, b, c, d) do { \
+	a += b + m[blake2s_sigma[r][2 * i + 0]]; \
+	d = ror32(d ^ a, 16); \
+	c += d; \
+	b = ror32(b ^ c, 12); \
+	a += b + m[blake2s_sigma[r][2 * i + 1]]; \
+	d = ror32(d ^ a, 8); \
+	c += d; \
+	b = ror32(b ^ c, 7); \
+} while (0)
+
+#define ROUND(r) do { \
+	G(r, 0, v[0], v[ 4], v[ 8], v[12]); \
+	G(r, 1, v[1], v[ 5], v[ 9], v[13]); \
+	G(r, 2, v[2], v[ 6], v[10], v[14]); \
+	G(r, 3, v[3], v[ 7], v[11], v[15]); \
+	G(r, 4, v[0], v[ 5], v[10], v[15]); \
+	G(r, 5, v[1], v[ 6], v[11], v[12]); \
+	G(r, 6, v[2], v[ 7], v[ 8], v[13]); \
+	G(r, 7, v[3], v[ 4], v[ 9], v[14]); \
+} while (0)
+		ROUND(0);
+		ROUND(1);
+		ROUND(2);
+		ROUND(3);
+		ROUND(4);
+		ROUND(5);
+		ROUND(6);
+		ROUND(7);
+		ROUND(8);
+		ROUND(9);
+
+#undef G
+#undef ROUND
+
+		for (i = 0; i < 8; ++i)
+			state->h[i] ^= v[i] ^ v[i + 8];
+
+		block += BLAKE2S_BLOCKBYTES;
+		--nblocks;
+	}
+}
+
+void blake2s_update(struct blake2s_state *state, const u8 *in, size_t inlen)
+{
+	const size_t fill = BLAKE2S_BLOCKBYTES - state->buflen;
+
+	if (unlikely(!inlen))
+		return;
+	if (inlen > fill) {
+		memcpy(state->buf + state->buflen, in, fill);
+		blake2s_compress(state, state->buf, 1, BLAKE2S_BLOCKBYTES);
+		state->buflen = 0;
+		in += fill;
+		inlen -= fill;
+	}
+	if (inlen > BLAKE2S_BLOCKBYTES) {
+		const size_t nblocks =
+			(inlen + BLAKE2S_BLOCKBYTES - 1) / BLAKE2S_BLOCKBYTES;
+		/* Hash one less (full) block than strictly possible */
+		blake2s_compress(state, in, nblocks - 1, BLAKE2S_BLOCKBYTES);
+		in += BLAKE2S_BLOCKBYTES * (nblocks - 1);
+		inlen -= BLAKE2S_BLOCKBYTES * (nblocks - 1);
+	}
+	memcpy(state->buf + state->buflen, in, inlen);
+	state->buflen += inlen;
+}
+EXPORT_SYMBOL(blake2s_update);
+
+void __blake2s_final(struct blake2s_state *state)
+{
+	blake2s_set_lastblock(state);
+	memset(state->buf + state->buflen, 0,
+	       BLAKE2S_BLOCKBYTES - state->buflen); /* Padding */
+	blake2s_compress(state, state->buf, 1, state->buflen);
+}
+EXPORT_SYMBOL(__blake2s_final);
+
+void blake2s_hmac(u8 *out, const u8 *in, const u8 *key, const size_t outlen,
+		  const size_t inlen, const size_t keylen)
+{
+	struct blake2s_state state;
+	u8 x_key[BLAKE2S_BLOCKBYTES] __aligned(__alignof__(u32)) = { 0 };
+	u8 i_hash[BLAKE2S_OUTBYTES] __aligned(__alignof__(u32));
+	int i;
+
+	if (keylen > BLAKE2S_BLOCKBYTES) {
+		blake2s_init(&state, BLAKE2S_OUTBYTES);
+		blake2s_update(&state, key, keylen);
+		blake2s_final(&state, x_key, BLAKE2S_OUTBYTES);
+	} else
+		memcpy(x_key, key, keylen);
+
+	for (i = 0; i < BLAKE2S_BLOCKBYTES; ++i)
+		x_key[i] ^= 0x36;
+
+	blake2s_init(&state, BLAKE2S_OUTBYTES);
+	blake2s_update(&state, x_key, BLAKE2S_BLOCKBYTES);
+	blake2s_update(&state, in, inlen);
+	blake2s_final(&state, i_hash, BLAKE2S_OUTBYTES);
+
+	for (i = 0; i < BLAKE2S_BLOCKBYTES; ++i)
+		x_key[i] ^= 0x5c ^ 0x36;
+
+	blake2s_init(&state, BLAKE2S_OUTBYTES);
+	blake2s_update(&state, x_key, BLAKE2S_BLOCKBYTES);
+	blake2s_update(&state, i_hash, BLAKE2S_OUTBYTES);
+	blake2s_final(&state, i_hash, BLAKE2S_OUTBYTES);
+
+	memcpy(out, i_hash, outlen);
+	memzero_explicit(x_key, BLAKE2S_BLOCKBYTES);
+	memzero_explicit(i_hash, BLAKE2S_OUTBYTES);
+}
+EXPORT_SYMBOL(blake2s_hmac);
+
+#include "../selftest/blake2s.h"
diff --git a/lib/zinc/main.c b/lib/zinc/main.c
index a15b27fd0e4c..b1d650d82edb 100644
--- a/lib/zinc/main.c
+++ b/lib/zinc/main.c
@@ -6,6 +6,7 @@
 #include <zinc/chacha20poly1305.h>
 #include <zinc/chacha20.h>
 #include <zinc/poly1305.h>
+#include <zinc/blake2s.h>
 
 #include <linux/init.h>
 #include <linux/module.h>
@@ -30,6 +31,10 @@ static int __init mod_init(void)
 #endif
 #ifdef CONFIG_ZINC_CHACHA20POLY1305
 	selftest(chacha20poly1305);
+#endif
+#ifdef CONFIG_ZINC_BLAKE2S
+	blake2s_fpu_init();
+	selftest(blake2s);
 #endif
 	return 0;
 }
diff --git a/lib/zinc/selftest/blake2s.h b/lib/zinc/selftest/blake2s.h
new file mode 100644
index 000000000000..ddd6a862b344
--- /dev/null
+++ b/lib/zinc/selftest/blake2s.h
@@ -0,0 +1,2095 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (C) 2015-2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#ifdef DEBUG
+static const u8 blake2s_testvecs[][BLAKE2S_OUTBYTES] __initconst = {
+	{ 0x69, 0x21, 0x7a, 0x30, 0x79, 0x90, 0x80, 0x94,
+	  0xe1, 0x11, 0x21, 0xd0, 0x42, 0x35, 0x4a, 0x7c,
+	  0x1f, 0x55, 0xb6, 0x48, 0x2c, 0xa1, 0xa5, 0x1e,
+	  0x1b, 0x25, 0x0d, 0xfd, 0x1e, 0xd0, 0xee, 0xf9 },
+	{ 0xe3, 0x4d, 0x74, 0xdb, 0xaf, 0x4f, 0xf4, 0xc6,
+	  0xab, 0xd8, 0x71, 0xcc, 0x22, 0x04, 0x51, 0xd2,
+	  0xea, 0x26, 0x48, 0x84, 0x6c, 0x77, 0x57, 0xfb,
+	  0xaa, 0xc8, 0x2f, 0xe5, 0x1a, 0xd6, 0x4b, 0xea },
+	{ 0xdd, 0xad, 0x9a, 0xb1, 0x5d, 0xac, 0x45, 0x49,
+	  0xba, 0x42, 0xf4, 0x9d, 0x26, 0x24, 0x96, 0xbe,
+	  0xf6, 0xc0, 0xba, 0xe1, 0xdd, 0x34, 0x2a, 0x88,
+	  0x08, 0xf8, 0xea, 0x26, 0x7c, 0x6e, 0x21, 0x0c },
+	{ 0xe8, 0xf9, 0x1c, 0x6e, 0xf2, 0x32, 0xa0, 0x41,
+	  0x45, 0x2a, 0xb0, 0xe1, 0x49, 0x07, 0x0c, 0xdd,
+	  0x7d, 0xd1, 0x76, 0x9e, 0x75, 0xb3, 0xa5, 0x92,
+	  0x1b, 0xe3, 0x78, 0x76, 0xc4, 0x5c, 0x99, 0x00 },
+	{ 0x0c, 0xc7, 0x0e, 0x00, 0x34, 0x8b, 0x86, 0xba,
+	  0x29, 0x44, 0xd0, 0xc3, 0x20, 0x38, 0xb2, 0x5c,
+	  0x55, 0x58, 0x4f, 0x90, 0xdf, 0x23, 0x04, 0xf5,
+	  0x5f, 0xa3, 0x32, 0xaf, 0x5f, 0xb0, 0x1e, 0x20 },
+	{ 0xec, 0x19, 0x64, 0x19, 0x10, 0x87, 0xa4, 0xfe,
+	  0x9d, 0xf1, 0xc7, 0x95, 0x34, 0x2a, 0x02, 0xff,
+	  0xc1, 0x91, 0xa5, 0xb2, 0x51, 0x76, 0x48, 0x56,
+	  0xae, 0x5b, 0x8b, 0x57, 0x69, 0xf0, 0xc6, 0xcd },
+	{ 0xe1, 0xfa, 0x51, 0x61, 0x8d, 0x7d, 0xf4, 0xeb,
+	  0x70, 0xcf, 0x0d, 0x5a, 0x9e, 0x90, 0x6f, 0x80,
+	  0x6e, 0x9d, 0x19, 0xf7, 0xf4, 0xf0, 0x1e, 0x3b,
+	  0x62, 0x12, 0x88, 0xe4, 0x12, 0x04, 0x05, 0xd6 },
+	{ 0x59, 0x80, 0x01, 0xfa, 0xfb, 0xe8, 0xf9, 0x4e,
+	  0xc6, 0x6d, 0xc8, 0x27, 0xd0, 0x12, 0xcf, 0xcb,
+	  0xba, 0x22, 0x28, 0x56, 0x9f, 0x44, 0x8e, 0x89,
+	  0xea, 0x22, 0x08, 0xc8, 0xbf, 0x76, 0x92, 0x93 },
+	{ 0xc7, 0xe8, 0x87, 0xb5, 0x46, 0x62, 0x36, 0x35,
+	  0xe9, 0x3e, 0x04, 0x95, 0x59, 0x8f, 0x17, 0x26,
+	  0x82, 0x19, 0x96, 0xc2, 0x37, 0x77, 0x05, 0xb9,
+	  0x3a, 0x1f, 0x63, 0x6f, 0x87, 0x2b, 0xfa, 0x2d },
+	{ 0xc3, 0x15, 0xa4, 0x37, 0xdd, 0x28, 0x06, 0x2a,
+	  0x77, 0x0d, 0x48, 0x19, 0x67, 0x13, 0x6b, 0x1b,
+	  0x5e, 0xb8, 0x8b, 0x21, 0xee, 0x53, 0xd0, 0x32,
+	  0x9c, 0x58, 0x97, 0x12, 0x6e, 0x9d, 0xb0, 0x2c },
+	{ 0xbb, 0x47, 0x3d, 0xed, 0xdc, 0x05, 0x5f, 0xea,
+	  0x62, 0x28, 0xf2, 0x07, 0xda, 0x57, 0x53, 0x47,
+	  0xbb, 0x00, 0x40, 0x4c, 0xd3, 0x49, 0xd3, 0x8c,
+	  0x18, 0x02, 0x63, 0x07, 0xa2, 0x24, 0xcb, 0xff },
+	{ 0x68, 0x7e, 0x18, 0x73, 0xa8, 0x27, 0x75, 0x91,
+	  0xbb, 0x33, 0xd9, 0xad, 0xf9, 0xa1, 0x39, 0x12,
+	  0xef, 0xef, 0xe5, 0x57, 0xca, 0xfc, 0x39, 0xa7,
+	  0x95, 0x26, 0x23, 0xe4, 0x72, 0x55, 0xf1, 0x6d },
+	{ 0x1a, 0xc7, 0xba, 0x75, 0x4d, 0x6e, 0x2f, 0x94,
+	  0xe0, 0xe8, 0x6c, 0x46, 0xbf, 0xb2, 0x62, 0xab,
+	  0xbb, 0x74, 0xf4, 0x50, 0xef, 0x45, 0x6d, 0x6b,
+	  0x4d, 0x97, 0xaa, 0x80, 0xce, 0x6d, 0xa7, 0x67 },
+	{ 0x01, 0x2c, 0x97, 0x80, 0x96, 0x14, 0x81, 0x6b,
+	  0x5d, 0x94, 0x94, 0x47, 0x7d, 0x4b, 0x68, 0x7d,
+	  0x15, 0xb9, 0x6e, 0xb6, 0x9c, 0x0e, 0x80, 0x74,
+	  0xa8, 0x51, 0x6f, 0x31, 0x22, 0x4b, 0x5c, 0x98 },
+	{ 0x91, 0xff, 0xd2, 0x6c, 0xfa, 0x4d, 0xa5, 0x13,
+	  0x4c, 0x7e, 0xa2, 0x62, 0xf7, 0x88, 0x9c, 0x32,
+	  0x9f, 0x61, 0xf6, 0xa6, 0x57, 0x22, 0x5c, 0xc2,
+	  0x12, 0xf4, 0x00, 0x56, 0xd9, 0x86, 0xb3, 0xf4 },
+	{ 0xd9, 0x7c, 0x82, 0x8d, 0x81, 0x82, 0xa7, 0x21,
+	  0x80, 0xa0, 0x6a, 0x78, 0x26, 0x83, 0x30, 0x67,
+	  0x3f, 0x7c, 0x4e, 0x06, 0x35, 0x94, 0x7c, 0x04,
+	  0xc0, 0x23, 0x23, 0xfd, 0x45, 0xc0, 0xa5, 0x2d },
+	{ 0xef, 0xc0, 0x4c, 0xdc, 0x39, 0x1c, 0x7e, 0x91,
+	  0x19, 0xbd, 0x38, 0x66, 0x8a, 0x53, 0x4e, 0x65,
+	  0xfe, 0x31, 0x03, 0x6d, 0x6a, 0x62, 0x11, 0x2e,
+	  0x44, 0xeb, 0xeb, 0x11, 0xf9, 0xc5, 0x70, 0x80 },
+	{ 0x99, 0x2c, 0xf5, 0xc0, 0x53, 0x44, 0x2a, 0x5f,
+	  0xbc, 0x4f, 0xaf, 0x58, 0x3e, 0x04, 0xe5, 0x0b,
+	  0xb7, 0x0d, 0x2f, 0x39, 0xfb, 0xb6, 0xa5, 0x03,
+	  0xf8, 0x9e, 0x56, 0xa6, 0x3e, 0x18, 0x57, 0x8a },
+	{ 0x38, 0x64, 0x0e, 0x9f, 0x21, 0x98, 0x3e, 0x67,
+	  0xb5, 0x39, 0xca, 0xcc, 0xae, 0x5e, 0xcf, 0x61,
+	  0x5a, 0xe2, 0x76, 0x4f, 0x75, 0xa0, 0x9c, 0x9c,
+	  0x59, 0xb7, 0x64, 0x83, 0xc1, 0xfb, 0xc7, 0x35 },
+	{ 0x21, 0x3d, 0xd3, 0x4c, 0x7e, 0xfe, 0x4f, 0xb2,
+	  0x7a, 0x6b, 0x35, 0xf6, 0xb4, 0x00, 0x0d, 0x1f,
+	  0xe0, 0x32, 0x81, 0xaf, 0x3c, 0x72, 0x3e, 0x5c,
+	  0x9f, 0x94, 0x74, 0x7a, 0x5f, 0x31, 0xcd, 0x3b },
+	{ 0xec, 0x24, 0x6e, 0xee, 0xb9, 0xce, 0xd3, 0xf7,
+	  0xad, 0x33, 0xed, 0x28, 0x66, 0x0d, 0xd9, 0xbb,
+	  0x07, 0x32, 0x51, 0x3d, 0xb4, 0xe2, 0xfa, 0x27,
+	  0x8b, 0x60, 0xcd, 0xe3, 0x68, 0x2a, 0x4c, 0xcd },
+	{ 0xac, 0x9b, 0x61, 0xd4, 0x46, 0x64, 0x8c, 0x30,
+	  0x05, 0xd7, 0x89, 0x2b, 0xf3, 0xa8, 0x71, 0x9f,
+	  0x4c, 0x81, 0x81, 0xcf, 0xdc, 0xbc, 0x2b, 0x79,
+	  0xfe, 0xf1, 0x0a, 0x27, 0x9b, 0x91, 0x10, 0x95 },
+	{ 0x7b, 0xf8, 0xb2, 0x29, 0x59, 0xe3, 0x4e, 0x3a,
+	  0x43, 0xf7, 0x07, 0x92, 0x23, 0xe8, 0x3a, 0x97,
+	  0x54, 0x61, 0x7d, 0x39, 0x1e, 0x21, 0x3d, 0xfd,
+	  0x80, 0x8e, 0x41, 0xb9, 0xbe, 0xad, 0x4c, 0xe7 },
+	{ 0x68, 0xd4, 0xb5, 0xd4, 0xfa, 0x0e, 0x30, 0x2b,
+	  0x64, 0xcc, 0xc5, 0xaf, 0x79, 0x29, 0x13, 0xac,
+	  0x4c, 0x88, 0xec, 0x95, 0xc0, 0x7d, 0xdf, 0x40,
+	  0x69, 0x42, 0x56, 0xeb, 0x88, 0xce, 0x9f, 0x3d },
+	{ 0xb2, 0xc2, 0x42, 0x0f, 0x05, 0xf9, 0xab, 0xe3,
+	  0x63, 0x15, 0x91, 0x93, 0x36, 0xb3, 0x7e, 0x4e,
+	  0x0f, 0xa3, 0x3f, 0xf7, 0xe7, 0x6a, 0x49, 0x27,
+	  0x67, 0x00, 0x6f, 0xdb, 0x5d, 0x93, 0x54, 0x62 },
+	{ 0x13, 0x4f, 0x61, 0xbb, 0xd0, 0xbb, 0xb6, 0x9a,
+	  0xed, 0x53, 0x43, 0x90, 0x45, 0x51, 0xa3, 0xe6,
+	  0xc1, 0xaa, 0x7d, 0xcd, 0xd7, 0x7e, 0x90, 0x3e,
+	  0x70, 0x23, 0xeb, 0x7c, 0x60, 0x32, 0x0a, 0xa7 },
+	{ 0x46, 0x93, 0xf9, 0xbf, 0xf7, 0xd4, 0xf3, 0x98,
+	  0x6a, 0x7d, 0x17, 0x6e, 0x6e, 0x06, 0xf7, 0x2a,
+	  0xd1, 0x49, 0x0d, 0x80, 0x5c, 0x99, 0xe2, 0x53,
+	  0x47, 0xb8, 0xde, 0x77, 0xb4, 0xdb, 0x6d, 0x9b },
+	{ 0x85, 0x3e, 0x26, 0xf7, 0x41, 0x95, 0x3b, 0x0f,
+	  0xd5, 0xbd, 0xb4, 0x24, 0xe8, 0xab, 0x9e, 0x8b,
+	  0x37, 0x50, 0xea, 0xa8, 0xef, 0x61, 0xe4, 0x79,
+	  0x02, 0xc9, 0x1e, 0x55, 0x4e, 0x9c, 0x73, 0xb9 },
+	{ 0xf7, 0xde, 0x53, 0x63, 0x61, 0xab, 0xaa, 0x0e,
+	  0x15, 0x81, 0x56, 0xcf, 0x0e, 0xa4, 0xf6, 0x3a,
+	  0x99, 0xb5, 0xe4, 0x05, 0x4f, 0x8f, 0xa4, 0xc9,
+	  0xd4, 0x5f, 0x62, 0x85, 0xca, 0xd5, 0x56, 0x94 },
+	{ 0x4c, 0x23, 0x06, 0x08, 0x86, 0x0a, 0x99, 0xae,
+	  0x8d, 0x7b, 0xd5, 0xc2, 0xcc, 0x17, 0xfa, 0x52,
+	  0x09, 0x6b, 0x9a, 0x61, 0xbe, 0xdb, 0x17, 0xcb,
+	  0x76, 0x17, 0x86, 0x4a, 0xd2, 0x9c, 0xa7, 0xa6 },
+	{ 0xae, 0xb9, 0x20, 0xea, 0x87, 0x95, 0x2d, 0xad,
+	  0xb1, 0xfb, 0x75, 0x92, 0x91, 0xe3, 0x38, 0x81,
+	  0x39, 0xa8, 0x72, 0x86, 0x50, 0x01, 0x88, 0x6e,
+	  0xd8, 0x47, 0x52, 0xe9, 0x3c, 0x25, 0x0c, 0x2a },
+	{ 0xab, 0xa4, 0xad, 0x9b, 0x48, 0x0b, 0x9d, 0xf3,
+	  0xd0, 0x8c, 0xa5, 0xe8, 0x7b, 0x0c, 0x24, 0x40,
+	  0xd4, 0xe4, 0xea, 0x21, 0x22, 0x4c, 0x2e, 0xb4,
+	  0x2c, 0xba, 0xe4, 0x69, 0xd0, 0x89, 0xb9, 0x31 },
+	{ 0x05, 0x82, 0x56, 0x07, 0xd7, 0xfd, 0xf2, 0xd8,
+	  0x2e, 0xf4, 0xc3, 0xc8, 0xc2, 0xae, 0xa9, 0x61,
+	  0xad, 0x98, 0xd6, 0x0e, 0xdf, 0xf7, 0xd0, 0x18,
+	  0x98, 0x3e, 0x21, 0x20, 0x4c, 0x0d, 0x93, 0xd1 },
+	{ 0xa7, 0x42, 0xf8, 0xb6, 0xaf, 0x82, 0xd8, 0xa6,
+	  0xca, 0x23, 0x57, 0xc5, 0xf1, 0xcf, 0x91, 0xde,
+	  0xfb, 0xd0, 0x66, 0x26, 0x7d, 0x75, 0xc0, 0x48,
+	  0xb3, 0x52, 0x36, 0x65, 0x85, 0x02, 0x59, 0x62 },
+	{ 0x2b, 0xca, 0xc8, 0x95, 0x99, 0x00, 0x0b, 0x42,
+	  0xc9, 0x5a, 0xe2, 0x38, 0x35, 0xa7, 0x13, 0x70,
+	  0x4e, 0xd7, 0x97, 0x89, 0xc8, 0x4f, 0xef, 0x14,
+	  0x9a, 0x87, 0x4f, 0xf7, 0x33, 0xf0, 0x17, 0xa2 },
+	{ 0xac, 0x1e, 0xd0, 0x7d, 0x04, 0x8f, 0x10, 0x5a,
+	  0x9e, 0x5b, 0x7a, 0xb8, 0x5b, 0x09, 0xa4, 0x92,
+	  0xd5, 0xba, 0xff, 0x14, 0xb8, 0xbf, 0xb0, 0xe9,
+	  0xfd, 0x78, 0x94, 0x86, 0xee, 0xa2, 0xb9, 0x74 },
+	{ 0xe4, 0x8d, 0x0e, 0xcf, 0xaf, 0x49, 0x7d, 0x5b,
+	  0x27, 0xc2, 0x5d, 0x99, 0xe1, 0x56, 0xcb, 0x05,
+	  0x79, 0xd4, 0x40, 0xd6, 0xe3, 0x1f, 0xb6, 0x24,
+	  0x73, 0x69, 0x6d, 0xbf, 0x95, 0xe0, 0x10, 0xe4 },
+	{ 0x12, 0xa9, 0x1f, 0xad, 0xf8, 0xb2, 0x16, 0x44,
+	  0xfd, 0x0f, 0x93, 0x4f, 0x3c, 0x4a, 0x8f, 0x62,
+	  0xba, 0x86, 0x2f, 0xfd, 0x20, 0xe8, 0xe9, 0x61,
+	  0x15, 0x4c, 0x15, 0xc1, 0x38, 0x84, 0xed, 0x3d },
+	{ 0x7c, 0xbe, 0xe9, 0x6e, 0x13, 0x98, 0x97, 0xdc,
+	  0x98, 0xfb, 0xef, 0x3b, 0xe8, 0x1a, 0xd4, 0xd9,
+	  0x64, 0xd2, 0x35, 0xcb, 0x12, 0x14, 0x1f, 0xb6,
+	  0x67, 0x27, 0xe6, 0xe5, 0xdf, 0x73, 0xa8, 0x78 },
+	{ 0xeb, 0xf6, 0x6a, 0xbb, 0x59, 0x7a, 0xe5, 0x72,
+	  0xa7, 0x29, 0x7c, 0xb0, 0x87, 0x1e, 0x35, 0x5a,
+	  0xcc, 0xaf, 0xad, 0x83, 0x77, 0xb8, 0xe7, 0x8b,
+	  0xf1, 0x64, 0xce, 0x2a, 0x18, 0xde, 0x4b, 0xaf },
+	{ 0x71, 0xb9, 0x33, 0xb0, 0x7e, 0x4f, 0xf7, 0x81,
+	  0x8c, 0xe0, 0x59, 0xd0, 0x08, 0x82, 0x9e, 0x45,
+	  0x3c, 0x6f, 0xf0, 0x2e, 0xc0, 0xa7, 0xdb, 0x39,
+	  0x3f, 0xc2, 0xd8, 0x70, 0xf3, 0x7a, 0x72, 0x86 },
+	{ 0x7c, 0xf7, 0xc5, 0x13, 0x31, 0x22, 0x0b, 0x8d,
+	  0x3e, 0xba, 0xed, 0x9c, 0x29, 0x39, 0x8a, 0x16,
+	  0xd9, 0x81, 0x56, 0xe2, 0x61, 0x3c, 0xb0, 0x88,
+	  0xf2, 0xb0, 0xe0, 0x8a, 0x1b, 0xe4, 0xcf, 0x4f },
+	{ 0x3e, 0x41, 0xa1, 0x08, 0xe0, 0xf6, 0x4a, 0xd2,
+	  0x76, 0xb9, 0x79, 0xe1, 0xce, 0x06, 0x82, 0x79,
+	  0xe1, 0x6f, 0x7b, 0xc7, 0xe4, 0xaa, 0x1d, 0x21,
+	  0x1e, 0x17, 0xb8, 0x11, 0x61, 0xdf, 0x16, 0x02 },
+	{ 0x88, 0x65, 0x02, 0xa8, 0x2a, 0xb4, 0x7b, 0xa8,
+	  0xd8, 0x67, 0x10, 0xaa, 0x9d, 0xe3, 0xd4, 0x6e,
+	  0xa6, 0x5c, 0x47, 0xaf, 0x6e, 0xe8, 0xde, 0x45,
+	  0x0c, 0xce, 0xb8, 0xb1, 0x1b, 0x04, 0x5f, 0x50 },
+	{ 0xc0, 0x21, 0xbc, 0x5f, 0x09, 0x54, 0xfe, 0xe9,
+	  0x4f, 0x46, 0xea, 0x09, 0x48, 0x7e, 0x10, 0xa8,
+	  0x48, 0x40, 0xd0, 0x2f, 0x64, 0x81, 0x0b, 0xc0,
+	  0x8d, 0x9e, 0x55, 0x1f, 0x7d, 0x41, 0x68, 0x14 },
+	{ 0x20, 0x30, 0x51, 0x6e, 0x8a, 0x5f, 0xe1, 0x9a,
+	  0xe7, 0x9c, 0x33, 0x6f, 0xce, 0x26, 0x38, 0x2a,
+	  0x74, 0x9d, 0x3f, 0xd0, 0xec, 0x91, 0xe5, 0x37,
+	  0xd4, 0xbd, 0x23, 0x58, 0xc1, 0x2d, 0xfb, 0x22 },
+	{ 0x55, 0x66, 0x98, 0xda, 0xc8, 0x31, 0x7f, 0xd3,
+	  0x6d, 0xfb, 0xdf, 0x25, 0xa7, 0x9c, 0xb1, 0x12,
+	  0xd5, 0x42, 0x58, 0x60, 0x60, 0x5c, 0xba, 0xf5,
+	  0x07, 0xf2, 0x3b, 0xf7, 0xe9, 0xf4, 0x2a, 0xfe },
+	{ 0x2f, 0x86, 0x7b, 0xa6, 0x77, 0x73, 0xfd, 0xc3,
+	  0xe9, 0x2f, 0xce, 0xd9, 0x9a, 0x64, 0x09, 0xad,
+	  0x39, 0xd0, 0xb8, 0x80, 0xfd, 0xe8, 0xf1, 0x09,
+	  0xa8, 0x17, 0x30, 0xc4, 0x45, 0x1d, 0x01, 0x78 },
+	{ 0x17, 0x2e, 0xc2, 0x18, 0xf1, 0x19, 0xdf, 0xae,
+	  0x98, 0x89, 0x6d, 0xff, 0x29, 0xdd, 0x98, 0x76,
+	  0xc9, 0x4a, 0xf8, 0x74, 0x17, 0xf9, 0xae, 0x4c,
+	  0x70, 0x14, 0xbb, 0x4e, 0x4b, 0x96, 0xaf, 0xc7 },
+	{ 0x3f, 0x85, 0x81, 0x4a, 0x18, 0x19, 0x5f, 0x87,
+	  0x9a, 0xa9, 0x62, 0xf9, 0x5d, 0x26, 0xbd, 0x82,
+	  0xa2, 0x78, 0xf2, 0xb8, 0x23, 0x20, 0x21, 0x8f,
+	  0x6b, 0x3b, 0xd6, 0xf7, 0xf6, 0x67, 0xa6, 0xd9 },
+	{ 0x1b, 0x61, 0x8f, 0xba, 0xa5, 0x66, 0xb3, 0xd4,
+	  0x98, 0xc1, 0x2e, 0x98, 0x2c, 0x9e, 0xc5, 0x2e,
+	  0x4d, 0xa8, 0x5a, 0x8c, 0x54, 0xf3, 0x8f, 0x34,
+	  0xc0, 0x90, 0x39, 0x4f, 0x23, 0xc1, 0x84, 0xc1 },
+	{ 0x0c, 0x75, 0x8f, 0xb5, 0x69, 0x2f, 0xfd, 0x41,
+	  0xa3, 0x57, 0x5d, 0x0a, 0xf0, 0x0c, 0xc7, 0xfb,
+	  0xf2, 0xcb, 0xe5, 0x90, 0x5a, 0x58, 0x32, 0x3a,
+	  0x88, 0xae, 0x42, 0x44, 0xf6, 0xe4, 0xc9, 0x93 },
+	{ 0xa9, 0x31, 0x36, 0x0c, 0xad, 0x62, 0x8c, 0x7f,
+	  0x12, 0xa6, 0xc1, 0xc4, 0xb7, 0x53, 0xb0, 0xf4,
+	  0x06, 0x2a, 0xef, 0x3c, 0xe6, 0x5a, 0x1a, 0xe3,
+	  0xf1, 0x93, 0x69, 0xda, 0xdf, 0x3a, 0xe2, 0x3d },
+	{ 0xcb, 0xac, 0x7d, 0x77, 0x3b, 0x1e, 0x3b, 0x3c,
+	  0x66, 0x91, 0xd7, 0xab, 0xb7, 0xe9, 0xdf, 0x04,
+	  0x5c, 0x8b, 0xa1, 0x92, 0x68, 0xde, 0xd1, 0x53,
+	  0x20, 0x7f, 0x5e, 0x80, 0x43, 0x52, 0xec, 0x5d },
+	{ 0x23, 0xa1, 0x96, 0xd3, 0x80, 0x2e, 0xd3, 0xc1,
+	  0xb3, 0x84, 0x01, 0x9a, 0x82, 0x32, 0x58, 0x40,
+	  0xd3, 0x2f, 0x71, 0x95, 0x0c, 0x45, 0x80, 0xb0,
+	  0x34, 0x45, 0xe0, 0x89, 0x8e, 0x14, 0x05, 0x3c },
+	{ 0xf4, 0x49, 0x54, 0x70, 0xf2, 0x26, 0xc8, 0xc2,
+	  0x14, 0xbe, 0x08, 0xfd, 0xfa, 0xd4, 0xbc, 0x4a,
+	  0x2a, 0x9d, 0xbe, 0xa9, 0x13, 0x6a, 0x21, 0x0d,
+	  0xf0, 0xd4, 0xb6, 0x49, 0x29, 0xe6, 0xfc, 0x14 },
+	{ 0xe2, 0x90, 0xdd, 0x27, 0x0b, 0x46, 0x7f, 0x34,
+	  0xab, 0x1c, 0x00, 0x2d, 0x34, 0x0f, 0xa0, 0x16,
+	  0x25, 0x7f, 0xf1, 0x9e, 0x58, 0x33, 0xfd, 0xbb,
+	  0xf2, 0xcb, 0x40, 0x1c, 0x3b, 0x28, 0x17, 0xde },
+	{ 0x9f, 0xc7, 0xb5, 0xde, 0xd3, 0xc1, 0x50, 0x42,
+	  0xb2, 0xa6, 0x58, 0x2d, 0xc3, 0x9b, 0xe0, 0x16,
+	  0xd2, 0x4a, 0x68, 0x2d, 0x5e, 0x61, 0xad, 0x1e,
+	  0xff, 0x9c, 0x63, 0x30, 0x98, 0x48, 0xf7, 0x06 },
+	{ 0x8c, 0xca, 0x67, 0xa3, 0x6d, 0x17, 0xd5, 0xe6,
+	  0x34, 0x1c, 0xb5, 0x92, 0xfd, 0x7b, 0xef, 0x99,
+	  0x26, 0xc9, 0xe3, 0xaa, 0x10, 0x27, 0xea, 0x11,
+	  0xa7, 0xd8, 0xbd, 0x26, 0x0b, 0x57, 0x6e, 0x04 },
+	{ 0x40, 0x93, 0x92, 0xf5, 0x60, 0xf8, 0x68, 0x31,
+	  0xda, 0x43, 0x73, 0xee, 0x5e, 0x00, 0x74, 0x26,
+	  0x05, 0x95, 0xd7, 0xbc, 0x24, 0x18, 0x3b, 0x60,
+	  0xed, 0x70, 0x0d, 0x45, 0x83, 0xd3, 0xf6, 0xf0 },
+	{ 0x28, 0x02, 0x16, 0x5d, 0xe0, 0x90, 0x91, 0x55,
+	  0x46, 0xf3, 0x39, 0x8c, 0xd8, 0x49, 0x16, 0x4a,
+	  0x19, 0xf9, 0x2a, 0xdb, 0xc3, 0x61, 0xad, 0xc9,
+	  0x9b, 0x0f, 0x20, 0xc8, 0xea, 0x07, 0x10, 0x54 },
+	{ 0xad, 0x83, 0x91, 0x68, 0xd9, 0xf8, 0xa4, 0xbe,
+	  0x95, 0xba, 0x9e, 0xf9, 0xa6, 0x92, 0xf0, 0x72,
+	  0x56, 0xae, 0x43, 0xfe, 0x6f, 0x98, 0x64, 0xe2,
+	  0x90, 0x69, 0x1b, 0x02, 0x56, 0xce, 0x50, 0xa9 },
+	{ 0x75, 0xfd, 0xaa, 0x50, 0x38, 0xc2, 0x84, 0xb8,
+	  0x6d, 0x6e, 0x8a, 0xff, 0xe8, 0xb2, 0x80, 0x7e,
+	  0x46, 0x7b, 0x86, 0x60, 0x0e, 0x79, 0xaf, 0x36,
+	  0x89, 0xfb, 0xc0, 0x63, 0x28, 0xcb, 0xf8, 0x94 },
+	{ 0xe5, 0x7c, 0xb7, 0x94, 0x87, 0xdd, 0x57, 0x90,
+	  0x24, 0x32, 0xb2, 0x50, 0x73, 0x38, 0x13, 0xbd,
+	  0x96, 0xa8, 0x4e, 0xfc, 0xe5, 0x9f, 0x65, 0x0f,
+	  0xac, 0x26, 0xe6, 0x69, 0x6a, 0xef, 0xaf, 0xc3 },
+	{ 0x56, 0xf3, 0x4e, 0x8b, 0x96, 0x55, 0x7e, 0x90,
+	  0xc1, 0xf2, 0x4b, 0x52, 0xd0, 0xc8, 0x9d, 0x51,
+	  0x08, 0x6a, 0xcf, 0x1b, 0x00, 0xf6, 0x34, 0xcf,
+	  0x1d, 0xde, 0x92, 0x33, 0xb8, 0xea, 0xaa, 0x3e },
+	{ 0x1b, 0x53, 0xee, 0x94, 0xaa, 0xf3, 0x4e, 0x4b,
+	  0x15, 0x9d, 0x48, 0xde, 0x35, 0x2c, 0x7f, 0x06,
+	  0x61, 0xd0, 0xa4, 0x0e, 0xdf, 0xf9, 0x5a, 0x0b,
+	  0x16, 0x39, 0xb4, 0x09, 0x0e, 0x97, 0x44, 0x72 },
+	{ 0x05, 0x70, 0x5e, 0x2a, 0x81, 0x75, 0x7c, 0x14,
+	  0xbd, 0x38, 0x3e, 0xa9, 0x8d, 0xda, 0x54, 0x4e,
+	  0xb1, 0x0e, 0x6b, 0xc0, 0x7b, 0xae, 0x43, 0x5e,
+	  0x25, 0x18, 0xdb, 0xe1, 0x33, 0x52, 0x53, 0x75 },
+	{ 0xd8, 0xb2, 0x86, 0x6e, 0x8a, 0x30, 0x9d, 0xb5,
+	  0x3e, 0x52, 0x9e, 0xc3, 0x29, 0x11, 0xd8, 0x2f,
+	  0x5c, 0xa1, 0x6c, 0xff, 0x76, 0x21, 0x68, 0x91,
+	  0xa9, 0x67, 0x6a, 0xa3, 0x1a, 0xaa, 0x6c, 0x42 },
+	{ 0xf5, 0x04, 0x1c, 0x24, 0x12, 0x70, 0xeb, 0x04,
+	  0xc7, 0x1e, 0xc2, 0xc9, 0x5d, 0x4c, 0x38, 0xd8,
+	  0x03, 0xb1, 0x23, 0x7b, 0x0f, 0x29, 0xfd, 0x4d,
+	  0xb3, 0xeb, 0x39, 0x76, 0x69, 0xe8, 0x86, 0x99 },
+	{ 0x9a, 0x4c, 0xe0, 0x77, 0xc3, 0x49, 0x32, 0x2f,
+	  0x59, 0x5e, 0x0e, 0xe7, 0x9e, 0xd0, 0xda, 0x5f,
+	  0xab, 0x66, 0x75, 0x2c, 0xbf, 0xef, 0x8f, 0x87,
+	  0xd0, 0xe9, 0xd0, 0x72, 0x3c, 0x75, 0x30, 0xdd },
+	{ 0x65, 0x7b, 0x09, 0xf3, 0xd0, 0xf5, 0x2b, 0x5b,
+	  0x8f, 0x2f, 0x97, 0x16, 0x3a, 0x0e, 0xdf, 0x0c,
+	  0x04, 0xf0, 0x75, 0x40, 0x8a, 0x07, 0xbb, 0xeb,
+	  0x3a, 0x41, 0x01, 0xa8, 0x91, 0x99, 0x0d, 0x62 },
+	{ 0x1e, 0x3f, 0x7b, 0xd5, 0xa5, 0x8f, 0xa5, 0x33,
+	  0x34, 0x4a, 0xa8, 0xed, 0x3a, 0xc1, 0x22, 0xbb,
+	  0x9e, 0x70, 0xd4, 0xef, 0x50, 0xd0, 0x04, 0x53,
+	  0x08, 0x21, 0x94, 0x8f, 0x5f, 0xe6, 0x31, 0x5a },
+	{ 0x80, 0xdc, 0xcf, 0x3f, 0xd8, 0x3d, 0xfd, 0x0d,
+	  0x35, 0xaa, 0x28, 0x58, 0x59, 0x22, 0xab, 0x89,
+	  0xd5, 0x31, 0x39, 0x97, 0x67, 0x3e, 0xaf, 0x90,
+	  0x5c, 0xea, 0x9c, 0x0b, 0x22, 0x5c, 0x7b, 0x5f },
+	{ 0x8a, 0x0d, 0x0f, 0xbf, 0x63, 0x77, 0xd8, 0x3b,
+	  0xb0, 0x8b, 0x51, 0x4b, 0x4b, 0x1c, 0x43, 0xac,
+	  0xc9, 0x5d, 0x75, 0x17, 0x14, 0xf8, 0x92, 0x56,
+	  0x45, 0xcb, 0x6b, 0xc8, 0x56, 0xca, 0x15, 0x0a },
+	{ 0x9f, 0xa5, 0xb4, 0x87, 0x73, 0x8a, 0xd2, 0x84,
+	  0x4c, 0xc6, 0x34, 0x8a, 0x90, 0x19, 0x18, 0xf6,
+	  0x59, 0xa3, 0xb8, 0x9e, 0x9c, 0x0d, 0xfe, 0xea,
+	  0xd3, 0x0d, 0xd9, 0x4b, 0xcf, 0x42, 0xef, 0x8e },
+	{ 0x80, 0x83, 0x2c, 0x4a, 0x16, 0x77, 0xf5, 0xea,
+	  0x25, 0x60, 0xf6, 0x68, 0xe9, 0x35, 0x4d, 0xd3,
+	  0x69, 0x97, 0xf0, 0x37, 0x28, 0xcf, 0xa5, 0x5e,
+	  0x1b, 0x38, 0x33, 0x7c, 0x0c, 0x9e, 0xf8, 0x18 },
+	{ 0xab, 0x37, 0xdd, 0xb6, 0x83, 0x13, 0x7e, 0x74,
+	  0x08, 0x0d, 0x02, 0x6b, 0x59, 0x0b, 0x96, 0xae,
+	  0x9b, 0xb4, 0x47, 0x72, 0x2f, 0x30, 0x5a, 0x5a,
+	  0xc5, 0x70, 0xec, 0x1d, 0xf9, 0xb1, 0x74, 0x3c },
+	{ 0x3e, 0xe7, 0x35, 0xa6, 0x94, 0xc2, 0x55, 0x9b,
+	  0x69, 0x3a, 0xa6, 0x86, 0x29, 0x36, 0x1e, 0x15,
+	  0xd1, 0x22, 0x65, 0xad, 0x6a, 0x3d, 0xed, 0xf4,
+	  0x88, 0xb0, 0xb0, 0x0f, 0xac, 0x97, 0x54, 0xba },
+	{ 0xd6, 0xfc, 0xd2, 0x32, 0x19, 0xb6, 0x47, 0xe4,
+	  0xcb, 0xd5, 0xeb, 0x2d, 0x0a, 0xd0, 0x1e, 0xc8,
+	  0x83, 0x8a, 0x4b, 0x29, 0x01, 0xfc, 0x32, 0x5c,
+	  0xc3, 0x70, 0x19, 0x81, 0xca, 0x6c, 0x88, 0x8b },
+	{ 0x05, 0x20, 0xec, 0x2f, 0x5b, 0xf7, 0xa7, 0x55,
+	  0xda, 0xcb, 0x50, 0xc6, 0xbf, 0x23, 0x3e, 0x35,
+	  0x15, 0x43, 0x47, 0x63, 0xdb, 0x01, 0x39, 0xcc,
+	  0xd9, 0xfa, 0xef, 0xbb, 0x82, 0x07, 0x61, 0x2d },
+	{ 0xaf, 0xf3, 0xb7, 0x5f, 0x3f, 0x58, 0x12, 0x64,
+	  0xd7, 0x66, 0x16, 0x62, 0xb9, 0x2f, 0x5a, 0xd3,
+	  0x7c, 0x1d, 0x32, 0xbd, 0x45, 0xff, 0x81, 0xa4,
+	  0xed, 0x8a, 0xdc, 0x9e, 0xf3, 0x0d, 0xd9, 0x89 },
+	{ 0xd0, 0xdd, 0x65, 0x0b, 0xef, 0xd3, 0xba, 0x63,
+	  0xdc, 0x25, 0x10, 0x2c, 0x62, 0x7c, 0x92, 0x1b,
+	  0x9c, 0xbe, 0xb0, 0xb1, 0x30, 0x68, 0x69, 0x35,
+	  0xb5, 0xc9, 0x27, 0xcb, 0x7c, 0xcd, 0x5e, 0x3b },
+	{ 0xe1, 0x14, 0x98, 0x16, 0xb1, 0x0a, 0x85, 0x14,
+	  0xfb, 0x3e, 0x2c, 0xab, 0x2c, 0x08, 0xbe, 0xe9,
+	  0xf7, 0x3c, 0xe7, 0x62, 0x21, 0x70, 0x12, 0x46,
+	  0xa5, 0x89, 0xbb, 0xb6, 0x73, 0x02, 0xd8, 0xa9 },
+	{ 0x7d, 0xa3, 0xf4, 0x41, 0xde, 0x90, 0x54, 0x31,
+	  0x7e, 0x72, 0xb5, 0xdb, 0xf9, 0x79, 0xda, 0x01,
+	  0xe6, 0xbc, 0xee, 0xbb, 0x84, 0x78, 0xea, 0xe6,
+	  0xa2, 0x28, 0x49, 0xd9, 0x02, 0x92, 0x63, 0x5c },
+	{ 0x12, 0x30, 0xb1, 0xfc, 0x8a, 0x7d, 0x92, 0x15,
+	  0xed, 0xc2, 0xd4, 0xa2, 0xde, 0xcb, 0xdd, 0x0a,
+	  0x6e, 0x21, 0x6c, 0x92, 0x42, 0x78, 0xc9, 0x1f,
+	  0xc5, 0xd1, 0x0e, 0x7d, 0x60, 0x19, 0x2d, 0x94 },
+	{ 0x57, 0x50, 0xd7, 0x16, 0xb4, 0x80, 0x8f, 0x75,
+	  0x1f, 0xeb, 0xc3, 0x88, 0x06, 0xba, 0x17, 0x0b,
+	  0xf6, 0xd5, 0x19, 0x9a, 0x78, 0x16, 0xbe, 0x51,
+	  0x4e, 0x3f, 0x93, 0x2f, 0xbe, 0x0c, 0xb8, 0x71 },
+	{ 0x6f, 0xc5, 0x9b, 0x2f, 0x10, 0xfe, 0xba, 0x95,
+	  0x4a, 0xa6, 0x82, 0x0b, 0x3c, 0xa9, 0x87, 0xee,
+	  0x81, 0xd5, 0xcc, 0x1d, 0xa3, 0xc6, 0x3c, 0xe8,
+	  0x27, 0x30, 0x1c, 0x56, 0x9d, 0xfb, 0x39, 0xce },
+	{ 0xc7, 0xc3, 0xfe, 0x1e, 0xeb, 0xdc, 0x7b, 0x5a,
+	  0x93, 0x93, 0x26, 0xe8, 0xdd, 0xb8, 0x3e, 0x8b,
+	  0xf2, 0xb7, 0x80, 0xb6, 0x56, 0x78, 0xcb, 0x62,
+	  0xf2, 0x08, 0xb0, 0x40, 0xab, 0xdd, 0x35, 0xe2 },
+	{ 0x0c, 0x75, 0xc1, 0xa1, 0x5c, 0xf3, 0x4a, 0x31,
+	  0x4e, 0xe4, 0x78, 0xf4, 0xa5, 0xce, 0x0b, 0x8a,
+	  0x6b, 0x36, 0x52, 0x8e, 0xf7, 0xa8, 0x20, 0x69,
+	  0x6c, 0x3e, 0x42, 0x46, 0xc5, 0xa1, 0x58, 0x64 },
+	{ 0x21, 0x6d, 0xc1, 0x2a, 0x10, 0x85, 0x69, 0xa3,
+	  0xc7, 0xcd, 0xde, 0x4a, 0xed, 0x43, 0xa6, 0xc3,
+	  0x30, 0x13, 0x9d, 0xda, 0x3c, 0xcc, 0x4a, 0x10,
+	  0x89, 0x05, 0xdb, 0x38, 0x61, 0x89, 0x90, 0x50 },
+	{ 0xa5, 0x7b, 0xe6, 0xae, 0x67, 0x56, 0xf2, 0x8b,
+	  0x02, 0xf5, 0x9d, 0xad, 0xf7, 0xe0, 0xd7, 0xd8,
+	  0x80, 0x7f, 0x10, 0xfa, 0x15, 0xce, 0xd1, 0xad,
+	  0x35, 0x85, 0x52, 0x1a, 0x1d, 0x99, 0x5a, 0x89 },
+	{ 0x81, 0x6a, 0xef, 0x87, 0x59, 0x53, 0x71, 0x6c,
+	  0xd7, 0xa5, 0x81, 0xf7, 0x32, 0xf5, 0x3d, 0xd4,
+	  0x35, 0xda, 0xb6, 0x6d, 0x09, 0xc3, 0x61, 0xd2,
+	  0xd6, 0x59, 0x2d, 0xe1, 0x77, 0x55, 0xd8, 0xa8 },
+	{ 0x9a, 0x76, 0x89, 0x32, 0x26, 0x69, 0x3b, 0x6e,
+	  0xa9, 0x7e, 0x6a, 0x73, 0x8f, 0x9d, 0x10, 0xfb,
+	  0x3d, 0x0b, 0x43, 0xae, 0x0e, 0x8b, 0x7d, 0x81,
+	  0x23, 0xea, 0x76, 0xce, 0x97, 0x98, 0x9c, 0x7e },
+	{ 0x8d, 0xae, 0xdb, 0x9a, 0x27, 0x15, 0x29, 0xdb,
+	  0xb7, 0xdc, 0x3b, 0x60, 0x7f, 0xe5, 0xeb, 0x2d,
+	  0x32, 0x11, 0x77, 0x07, 0x58, 0xdd, 0x3b, 0x0a,
+	  0x35, 0x93, 0xd2, 0xd7, 0x95, 0x4e, 0x2d, 0x5b },
+	{ 0x16, 0xdb, 0xc0, 0xaa, 0x5d, 0xd2, 0xc7, 0x74,
+	  0xf5, 0x05, 0x10, 0x0f, 0x73, 0x37, 0x86, 0xd8,
+	  0xa1, 0x75, 0xfc, 0xbb, 0xb5, 0x9c, 0x43, 0xe1,
+	  0xfb, 0xff, 0x3e, 0x1e, 0xaf, 0x31, 0xcb, 0x4a },
+	{ 0x86, 0x06, 0xcb, 0x89, 0x9c, 0x6a, 0xea, 0xf5,
+	  0x1b, 0x9d, 0xb0, 0xfe, 0x49, 0x24, 0xa9, 0xfd,
+	  0x5d, 0xab, 0xc1, 0x9f, 0x88, 0x26, 0xf2, 0xbc,
+	  0x1c, 0x1d, 0x7d, 0xa1, 0x4d, 0x2c, 0x2c, 0x99 },
+	{ 0x84, 0x79, 0x73, 0x1a, 0xed, 0xa5, 0x7b, 0xd3,
+	  0x7e, 0xad, 0xb5, 0x1a, 0x50, 0x7e, 0x30, 0x7f,
+	  0x3b, 0xd9, 0x5e, 0x69, 0xdb, 0xca, 0x94, 0xf3,
+	  0xbc, 0x21, 0x72, 0x60, 0x66, 0xad, 0x6d, 0xfd },
+	{ 0x58, 0x47, 0x3a, 0x9e, 0xa8, 0x2e, 0xfa, 0x3f,
+	  0x3b, 0x3d, 0x8f, 0xc8, 0x3e, 0xd8, 0x86, 0x31,
+	  0x27, 0xb3, 0x3a, 0xe8, 0xde, 0xae, 0x63, 0x07,
+	  0x20, 0x1e, 0xdb, 0x6d, 0xde, 0x61, 0xde, 0x29 },
+	{ 0x9a, 0x92, 0x55, 0xd5, 0x3a, 0xf1, 0x16, 0xde,
+	  0x8b, 0xa2, 0x7c, 0xe3, 0x5b, 0x4c, 0x7e, 0x15,
+	  0x64, 0x06, 0x57, 0xa0, 0xfc, 0xb8, 0x88, 0xc7,
+	  0x0d, 0x95, 0x43, 0x1d, 0xac, 0xd8, 0xf8, 0x30 },
+	{ 0x9e, 0xb0, 0x5f, 0xfb, 0xa3, 0x9f, 0xd8, 0x59,
+	  0x6a, 0x45, 0x49, 0x3e, 0x18, 0xd2, 0x51, 0x0b,
+	  0xf3, 0xef, 0x06, 0x5c, 0x51, 0xd6, 0xe1, 0x3a,
+	  0xbe, 0x66, 0xaa, 0x57, 0xe0, 0x5c, 0xfd, 0xb7 },
+	{ 0x81, 0xdc, 0xc3, 0xa5, 0x05, 0xea, 0xce, 0x3f,
+	  0x87, 0x9d, 0x8f, 0x70, 0x27, 0x76, 0x77, 0x0f,
+	  0x9d, 0xf5, 0x0e, 0x52, 0x1d, 0x14, 0x28, 0xa8,
+	  0x5d, 0xaf, 0x04, 0xf9, 0xad, 0x21, 0x50, 0xe0 },
+	{ 0xe3, 0xe3, 0xc4, 0xaa, 0x3a, 0xcb, 0xbc, 0x85,
+	  0x33, 0x2a, 0xf9, 0xd5, 0x64, 0xbc, 0x24, 0x16,
+	  0x5e, 0x16, 0x87, 0xf6, 0xb1, 0xad, 0xcb, 0xfa,
+	  0xe7, 0x7a, 0x8f, 0x03, 0xc7, 0x2a, 0xc2, 0x8c },
+	{ 0x67, 0x46, 0xc8, 0x0b, 0x4e, 0xb5, 0x6a, 0xea,
+	  0x45, 0xe6, 0x4e, 0x72, 0x89, 0xbb, 0xa3, 0xed,
+	  0xbf, 0x45, 0xec, 0xf8, 0x20, 0x64, 0x81, 0xff,
+	  0x63, 0x02, 0x12, 0x29, 0x84, 0xcd, 0x52, 0x6a },
+	{ 0x2b, 0x62, 0x8e, 0x52, 0x76, 0x4d, 0x7d, 0x62,
+	  0xc0, 0x86, 0x8b, 0x21, 0x23, 0x57, 0xcd, 0xd1,
+	  0x2d, 0x91, 0x49, 0x82, 0x2f, 0x4e, 0x98, 0x45,
+	  0xd9, 0x18, 0xa0, 0x8d, 0x1a, 0xe9, 0x90, 0xc0 },
+	{ 0xe4, 0xbf, 0xe8, 0x0d, 0x58, 0xc9, 0x19, 0x94,
+	  0x61, 0x39, 0x09, 0xdc, 0x4b, 0x1a, 0x12, 0x49,
+	  0x68, 0x96, 0xc0, 0x04, 0xaf, 0x7b, 0x57, 0x01,
+	  0x48, 0x3d, 0xe4, 0x5d, 0x28, 0x23, 0xd7, 0x8e },
+	{ 0xeb, 0xb4, 0xba, 0x15, 0x0c, 0xef, 0x27, 0x34,
+	  0x34, 0x5b, 0x5d, 0x64, 0x1b, 0xbe, 0xd0, 0x3a,
+	  0x21, 0xea, 0xfa, 0xe9, 0x33, 0xc9, 0x9e, 0x00,
+	  0x92, 0x12, 0xef, 0x04, 0x57, 0x4a, 0x85, 0x30 },
+	{ 0x39, 0x66, 0xec, 0x73, 0xb1, 0x54, 0xac, 0xc6,
+	  0x97, 0xac, 0x5c, 0xf5, 0xb2, 0x4b, 0x40, 0xbd,
+	  0xb0, 0xdb, 0x9e, 0x39, 0x88, 0x36, 0xd7, 0x6d,
+	  0x4b, 0x88, 0x0e, 0x3b, 0x2a, 0xf1, 0xaa, 0x27 },
+	{ 0xef, 0x7e, 0x48, 0x31, 0xb3, 0xa8, 0x46, 0x36,
+	  0x51, 0x8d, 0x6e, 0x4b, 0xfc, 0xe6, 0x4a, 0x43,
+	  0xdb, 0x2a, 0x5d, 0xda, 0x9c, 0xca, 0x2b, 0x44,
+	  0xf3, 0x90, 0x33, 0xbd, 0xc4, 0x0d, 0x62, 0x43 },
+	{ 0x7a, 0xbf, 0x6a, 0xcf, 0x5c, 0x8e, 0x54, 0x9d,
+	  0xdb, 0xb1, 0x5a, 0xe8, 0xd8, 0xb3, 0x88, 0xc1,
+	  0xc1, 0x97, 0xe6, 0x98, 0x73, 0x7c, 0x97, 0x85,
+	  0x50, 0x1e, 0xd1, 0xf9, 0x49, 0x30, 0xb7, 0xd9 },
+	{ 0x88, 0x01, 0x8d, 0xed, 0x66, 0x81, 0x3f, 0x0c,
+	  0xa9, 0x5d, 0xef, 0x47, 0x4c, 0x63, 0x06, 0x92,
+	  0x01, 0x99, 0x67, 0xb9, 0xe3, 0x68, 0x88, 0xda,
+	  0xdd, 0x94, 0x12, 0x47, 0x19, 0xb6, 0x82, 0xf6 },
+	{ 0x39, 0x30, 0x87, 0x6b, 0x9f, 0xc7, 0x52, 0x90,
+	  0x36, 0xb0, 0x08, 0xb1, 0xb8, 0xbb, 0x99, 0x75,
+	  0x22, 0xa4, 0x41, 0x63, 0x5a, 0x0c, 0x25, 0xec,
+	  0x02, 0xfb, 0x6d, 0x90, 0x26, 0xe5, 0x5a, 0x97 },
+	{ 0x0a, 0x40, 0x49, 0xd5, 0x7e, 0x83, 0x3b, 0x56,
+	  0x95, 0xfa, 0xc9, 0x3d, 0xd1, 0xfb, 0xef, 0x31,
+	  0x66, 0xb4, 0x4b, 0x12, 0xad, 0x11, 0x24, 0x86,
+	  0x62, 0x38, 0x3a, 0xe0, 0x51, 0xe1, 0x58, 0x27 },
+	{ 0x81, 0xdc, 0xc0, 0x67, 0x8b, 0xb6, 0xa7, 0x65,
+	  0xe4, 0x8c, 0x32, 0x09, 0x65, 0x4f, 0xe9, 0x00,
+	  0x89, 0xce, 0x44, 0xff, 0x56, 0x18, 0x47, 0x7e,
+	  0x39, 0xab, 0x28, 0x64, 0x76, 0xdf, 0x05, 0x2b },
+	{ 0xe6, 0x9b, 0x3a, 0x36, 0xa4, 0x46, 0x19, 0x12,
+	  0xdc, 0x08, 0x34, 0x6b, 0x11, 0xdd, 0xcb, 0x9d,
+	  0xb7, 0x96, 0xf8, 0x85, 0xfd, 0x01, 0x93, 0x6e,
+	  0x66, 0x2f, 0xe2, 0x92, 0x97, 0xb0, 0x99, 0xa4 },
+	{ 0x5a, 0xc6, 0x50, 0x3b, 0x0d, 0x8d, 0xa6, 0x91,
+	  0x76, 0x46, 0xe6, 0xdc, 0xc8, 0x7e, 0xdc, 0x58,
+	  0xe9, 0x42, 0x45, 0x32, 0x4c, 0xc2, 0x04, 0xf4,
+	  0xdd, 0x4a, 0xf0, 0x15, 0x63, 0xac, 0xd4, 0x27 },
+	{ 0xdf, 0x6d, 0xda, 0x21, 0x35, 0x9a, 0x30, 0xbc,
+	  0x27, 0x17, 0x80, 0x97, 0x1c, 0x1a, 0xbd, 0x56,
+	  0xa6, 0xef, 0x16, 0x7e, 0x48, 0x08, 0x87, 0x88,
+	  0x8e, 0x73, 0xa8, 0x6d, 0x3b, 0xf6, 0x05, 0xe9 },
+	{ 0xe8, 0xe6, 0xe4, 0x70, 0x71, 0xe7, 0xb7, 0xdf,
+	  0x25, 0x80, 0xf2, 0x25, 0xcf, 0xbb, 0xed, 0xf8,
+	  0x4c, 0xe6, 0x77, 0x46, 0x62, 0x66, 0x28, 0xd3,
+	  0x30, 0x97, 0xe4, 0xb7, 0xdc, 0x57, 0x11, 0x07 },
+	{ 0x53, 0xe4, 0x0e, 0xad, 0x62, 0x05, 0x1e, 0x19,
+	  0xcb, 0x9b, 0xa8, 0x13, 0x3e, 0x3e, 0x5c, 0x1c,
+	  0xe0, 0x0d, 0xdc, 0xad, 0x8a, 0xcf, 0x34, 0x2a,
+	  0x22, 0x43, 0x60, 0xb0, 0xac, 0xc1, 0x47, 0x77 },
+	{ 0x9c, 0xcd, 0x53, 0xfe, 0x80, 0xbe, 0x78, 0x6a,
+	  0xa9, 0x84, 0x63, 0x84, 0x62, 0xfb, 0x28, 0xaf,
+	  0xdf, 0x12, 0x2b, 0x34, 0xd7, 0x8f, 0x46, 0x87,
+	  0xec, 0x63, 0x2b, 0xb1, 0x9d, 0xe2, 0x37, 0x1a },
+	{ 0xcb, 0xd4, 0x80, 0x52, 0xc4, 0x8d, 0x78, 0x84,
+	  0x66, 0xa3, 0xe8, 0x11, 0x8c, 0x56, 0xc9, 0x7f,
+	  0xe1, 0x46, 0xe5, 0x54, 0x6f, 0xaa, 0xf9, 0x3e,
+	  0x2b, 0xc3, 0xc4, 0x7e, 0x45, 0x93, 0x97, 0x53 },
+	{ 0x25, 0x68, 0x83, 0xb1, 0x4e, 0x2a, 0xf4, 0x4d,
+	  0xad, 0xb2, 0x8e, 0x1b, 0x34, 0xb2, 0xac, 0x0f,
+	  0x0f, 0x4c, 0x91, 0xc3, 0x4e, 0xc9, 0x16, 0x9e,
+	  0x29, 0x03, 0x61, 0x58, 0xac, 0xaa, 0x95, 0xb9 },
+	{ 0x44, 0x71, 0xb9, 0x1a, 0xb4, 0x2d, 0xb7, 0xc4,
+	  0xdd, 0x84, 0x90, 0xab, 0x95, 0xa2, 0xee, 0x8d,
+	  0x04, 0xe3, 0xef, 0x5c, 0x3d, 0x6f, 0xc7, 0x1a,
+	  0xc7, 0x4b, 0x2b, 0x26, 0x91, 0x4d, 0x16, 0x41 },
+	{ 0xa5, 0xeb, 0x08, 0x03, 0x8f, 0x8f, 0x11, 0x55,
+	  0xed, 0x86, 0xe6, 0x31, 0x90, 0x6f, 0xc1, 0x30,
+	  0x95, 0xf6, 0xbb, 0xa4, 0x1d, 0xe5, 0xd4, 0xe7,
+	  0x95, 0x75, 0x8e, 0xc8, 0xc8, 0xdf, 0x8a, 0xf1 },
+	{ 0xdc, 0x1d, 0xb6, 0x4e, 0xd8, 0xb4, 0x8a, 0x91,
+	  0x0e, 0x06, 0x0a, 0x6b, 0x86, 0x63, 0x74, 0xc5,
+	  0x78, 0x78, 0x4e, 0x9a, 0xc4, 0x9a, 0xb2, 0x77,
+	  0x40, 0x92, 0xac, 0x71, 0x50, 0x19, 0x34, 0xac },
+	{ 0x28, 0x54, 0x13, 0xb2, 0xf2, 0xee, 0x87, 0x3d,
+	  0x34, 0x31, 0x9e, 0xe0, 0xbb, 0xfb, 0xb9, 0x0f,
+	  0x32, 0xda, 0x43, 0x4c, 0xc8, 0x7e, 0x3d, 0xb5,
+	  0xed, 0x12, 0x1b, 0xb3, 0x98, 0xed, 0x96, 0x4b },
+	{ 0x02, 0x16, 0xe0, 0xf8, 0x1f, 0x75, 0x0f, 0x26,
+	  0xf1, 0x99, 0x8b, 0xc3, 0x93, 0x4e, 0x3e, 0x12,
+	  0x4c, 0x99, 0x45, 0xe6, 0x85, 0xa6, 0x0b, 0x25,
+	  0xe8, 0xfb, 0xd9, 0x62, 0x5a, 0xb6, 0xb5, 0x99 },
+	{ 0x38, 0xc4, 0x10, 0xf5, 0xb9, 0xd4, 0x07, 0x20,
+	  0x50, 0x75, 0x5b, 0x31, 0xdc, 0xa8, 0x9f, 0xd5,
+	  0x39, 0x5c, 0x67, 0x85, 0xee, 0xb3, 0xd7, 0x90,
+	  0xf3, 0x20, 0xff, 0x94, 0x1c, 0x5a, 0x93, 0xbf },
+	{ 0xf1, 0x84, 0x17, 0xb3, 0x9d, 0x61, 0x7a, 0xb1,
+	  0xc1, 0x8f, 0xdf, 0x91, 0xeb, 0xd0, 0xfc, 0x6d,
+	  0x55, 0x16, 0xbb, 0x34, 0xcf, 0x39, 0x36, 0x40,
+	  0x37, 0xbc, 0xe8, 0x1f, 0xa0, 0x4c, 0xec, 0xb1 },
+	{ 0x1f, 0xa8, 0x77, 0xde, 0x67, 0x25, 0x9d, 0x19,
+	  0x86, 0x3a, 0x2a, 0x34, 0xbc, 0xc6, 0x96, 0x2a,
+	  0x2b, 0x25, 0xfc, 0xbf, 0x5c, 0xbe, 0xcd, 0x7e,
+	  0xde, 0x8f, 0x1f, 0xa3, 0x66, 0x88, 0xa7, 0x96 },
+	{ 0x5b, 0xd1, 0x69, 0xe6, 0x7c, 0x82, 0xc2, 0xc2,
+	  0xe9, 0x8e, 0xf7, 0x00, 0x8b, 0xdf, 0x26, 0x1f,
+	  0x2d, 0xdf, 0x30, 0xb1, 0xc0, 0x0f, 0x9e, 0x7f,
+	  0x27, 0x5b, 0xb3, 0xe8, 0xa2, 0x8d, 0xc9, 0xa2 },
+	{ 0xc8, 0x0a, 0xbe, 0xeb, 0xb6, 0x69, 0xad, 0x5d,
+	  0xee, 0xb5, 0xf5, 0xec, 0x8e, 0xa6, 0xb7, 0xa0,
+	  0x5d, 0xdf, 0x7d, 0x31, 0xec, 0x4c, 0x0a, 0x2e,
+	  0xe2, 0x0b, 0x0b, 0x98, 0xca, 0xec, 0x67, 0x46 },
+	{ 0xe7, 0x6d, 0x3f, 0xbd, 0xa5, 0xba, 0x37, 0x4e,
+	  0x6b, 0xf8, 0xe5, 0x0f, 0xad, 0xc3, 0xbb, 0xb9,
+	  0xba, 0x5c, 0x20, 0x6e, 0xbd, 0xec, 0x89, 0xa3,
+	  0xa5, 0x4c, 0xf3, 0xdd, 0x84, 0xa0, 0x70, 0x16 },
+	{ 0x7b, 0xba, 0x9d, 0xc5, 0xb5, 0xdb, 0x20, 0x71,
+	  0xd1, 0x77, 0x52, 0xb1, 0x04, 0x4c, 0x1e, 0xce,
+	  0xd9, 0x6a, 0xaf, 0x2d, 0xd4, 0x6e, 0x9b, 0x43,
+	  0x37, 0x50, 0xe8, 0xea, 0x0d, 0xcc, 0x18, 0x70 },
+	{ 0xf2, 0x9b, 0x1b, 0x1a, 0xb9, 0xba, 0xb1, 0x63,
+	  0x01, 0x8e, 0xe3, 0xda, 0x15, 0x23, 0x2c, 0xca,
+	  0x78, 0xec, 0x52, 0xdb, 0xc3, 0x4e, 0xda, 0x5b,
+	  0x82, 0x2e, 0xc1, 0xd8, 0x0f, 0xc2, 0x1b, 0xd0 },
+	{ 0x9e, 0xe3, 0xe3, 0xe7, 0xe9, 0x00, 0xf1, 0xe1,
+	  0x1d, 0x30, 0x8c, 0x4b, 0x2b, 0x30, 0x76, 0xd2,
+	  0x72, 0xcf, 0x70, 0x12, 0x4f, 0x9f, 0x51, 0xe1,
+	  0xda, 0x60, 0xf3, 0x78, 0x46, 0xcd, 0xd2, 0xf4 },
+	{ 0x70, 0xea, 0x3b, 0x01, 0x76, 0x92, 0x7d, 0x90,
+	  0x96, 0xa1, 0x85, 0x08, 0xcd, 0x12, 0x3a, 0x29,
+	  0x03, 0x25, 0x92, 0x0a, 0x9d, 0x00, 0xa8, 0x9b,
+	  0x5d, 0xe0, 0x42, 0x73, 0xfb, 0xc7, 0x6b, 0x85 },
+	{ 0x67, 0xde, 0x25, 0xc0, 0x2a, 0x4a, 0xab, 0xa2,
+	  0x3b, 0xdc, 0x97, 0x3c, 0x8b, 0xb0, 0xb5, 0x79,
+	  0x6d, 0x47, 0xcc, 0x06, 0x59, 0xd4, 0x3d, 0xff,
+	  0x1f, 0x97, 0xde, 0x17, 0x49, 0x63, 0xb6, 0x8e },
+	{ 0xb2, 0x16, 0x8e, 0x4e, 0x0f, 0x18, 0xb0, 0xe6,
+	  0x41, 0x00, 0xb5, 0x17, 0xed, 0x95, 0x25, 0x7d,
+	  0x73, 0xf0, 0x62, 0x0d, 0xf8, 0x85, 0xc1, 0x3d,
+	  0x2e, 0xcf, 0x79, 0x36, 0x7b, 0x38, 0x4c, 0xee },
+	{ 0x2e, 0x7d, 0xec, 0x24, 0x28, 0x85, 0x3b, 0x2c,
+	  0x71, 0x76, 0x07, 0x45, 0x54, 0x1f, 0x7a, 0xfe,
+	  0x98, 0x25, 0xb5, 0xdd, 0x77, 0xdf, 0x06, 0x51,
+	  0x1d, 0x84, 0x41, 0xa9, 0x4b, 0xac, 0xc9, 0x27 },
+	{ 0xca, 0x9f, 0xfa, 0xc4, 0xc4, 0x3f, 0x0b, 0x48,
+	  0x46, 0x1d, 0xc5, 0xc2, 0x63, 0xbe, 0xa3, 0xf6,
+	  0xf0, 0x06, 0x11, 0xce, 0xac, 0xab, 0xf6, 0xf8,
+	  0x95, 0xba, 0x2b, 0x01, 0x01, 0xdb, 0xb6, 0x8d },
+	{ 0x74, 0x10, 0xd4, 0x2d, 0x8f, 0xd1, 0xd5, 0xe9,
+	  0xd2, 0xf5, 0x81, 0x5c, 0xb9, 0x34, 0x17, 0x99,
+	  0x88, 0x28, 0xef, 0x3c, 0x42, 0x30, 0xbf, 0xbd,
+	  0x41, 0x2d, 0xf0, 0xa4, 0xa7, 0xa2, 0x50, 0x7a },
+	{ 0x50, 0x10, 0xf6, 0x84, 0x51, 0x6d, 0xcc, 0xd0,
+	  0xb6, 0xee, 0x08, 0x52, 0xc2, 0x51, 0x2b, 0x4d,
+	  0xc0, 0x06, 0x6c, 0xf0, 0xd5, 0x6f, 0x35, 0x30,
+	  0x29, 0x78, 0xdb, 0x8a, 0xe3, 0x2c, 0x6a, 0x81 },
+	{ 0xac, 0xaa, 0xb5, 0x85, 0xf7, 0xb7, 0x9b, 0x71,
+	  0x99, 0x35, 0xce, 0xb8, 0x95, 0x23, 0xdd, 0xc5,
+	  0x48, 0x27, 0xf7, 0x5c, 0x56, 0x88, 0x38, 0x56,
+	  0x15, 0x4a, 0x56, 0xcd, 0xcd, 0x5e, 0xe9, 0x88 },
+	{ 0x66, 0x6d, 0xe5, 0xd1, 0x44, 0x0f, 0xee, 0x73,
+	  0x31, 0xaa, 0xf0, 0x12, 0x3a, 0x62, 0xef, 0x2d,
+	  0x8b, 0xa5, 0x74, 0x53, 0xa0, 0x76, 0x96, 0x35,
+	  0xac, 0x6c, 0xd0, 0x1e, 0x63, 0x3f, 0x77, 0x12 },
+	{ 0xa6, 0xf9, 0x86, 0x58, 0xf6, 0xea, 0xba, 0xf9,
+	  0x02, 0xd8, 0xb3, 0x87, 0x1a, 0x4b, 0x10, 0x1d,
+	  0x16, 0x19, 0x6e, 0x8a, 0x4b, 0x24, 0x1e, 0x15,
+	  0x58, 0xfe, 0x29, 0x96, 0x6e, 0x10, 0x3e, 0x8d },
+	{ 0x89, 0x15, 0x46, 0xa8, 0xb2, 0x9f, 0x30, 0x47,
+	  0xdd, 0xcf, 0xe5, 0xb0, 0x0e, 0x45, 0xfd, 0x55,
+	  0x75, 0x63, 0x73, 0x10, 0x5e, 0xa8, 0x63, 0x7d,
+	  0xfc, 0xff, 0x54, 0x7b, 0x6e, 0xa9, 0x53, 0x5f },
+	{ 0x18, 0xdf, 0xbc, 0x1a, 0xc5, 0xd2, 0x5b, 0x07,
+	  0x61, 0x13, 0x7d, 0xbd, 0x22, 0xc1, 0x7c, 0x82,
+	  0x9d, 0x0f, 0x0e, 0xf1, 0xd8, 0x23, 0x44, 0xe9,
+	  0xc8, 0x9c, 0x28, 0x66, 0x94, 0xda, 0x24, 0xe8 },
+	{ 0xb5, 0x4b, 0x9b, 0x67, 0xf8, 0xfe, 0xd5, 0x4b,
+	  0xbf, 0x5a, 0x26, 0x66, 0xdb, 0xdf, 0x4b, 0x23,
+	  0xcf, 0xf1, 0xd1, 0xb6, 0xf4, 0xaf, 0xc9, 0x85,
+	  0xb2, 0xe6, 0xd3, 0x30, 0x5a, 0x9f, 0xf8, 0x0f },
+	{ 0x7d, 0xb4, 0x42, 0xe1, 0x32, 0xba, 0x59, 0xbc,
+	  0x12, 0x89, 0xaa, 0x98, 0xb0, 0xd3, 0xe8, 0x06,
+	  0x00, 0x4f, 0x8e, 0xc1, 0x28, 0x11, 0xaf, 0x1e,
+	  0x2e, 0x33, 0xc6, 0x9b, 0xfd, 0xe7, 0x29, 0xe1 },
+	{ 0x25, 0x0f, 0x37, 0xcd, 0xc1, 0x5e, 0x81, 0x7d,
+	  0x2f, 0x16, 0x0d, 0x99, 0x56, 0xc7, 0x1f, 0xe3,
+	  0xeb, 0x5d, 0xb7, 0x45, 0x56, 0xe4, 0xad, 0xf9,
+	  0xa4, 0xff, 0xaf, 0xba, 0x74, 0x01, 0x03, 0x96 },
+	{ 0x4a, 0xb8, 0xa3, 0xdd, 0x1d, 0xdf, 0x8a, 0xd4,
+	  0x3d, 0xab, 0x13, 0xa2, 0x7f, 0x66, 0xa6, 0x54,
+	  0x4f, 0x29, 0x05, 0x97, 0xfa, 0x96, 0x04, 0x0e,
+	  0x0e, 0x1d, 0xb9, 0x26, 0x3a, 0xa4, 0x79, 0xf8 },
+	{ 0xee, 0x61, 0x72, 0x7a, 0x07, 0x66, 0xdf, 0x93,
+	  0x9c, 0xcd, 0xc8, 0x60, 0x33, 0x40, 0x44, 0xc7,
+	  0x9a, 0x3c, 0x9b, 0x15, 0x62, 0x00, 0xbc, 0x3a,
+	  0xa3, 0x29, 0x73, 0x48, 0x3d, 0x83, 0x41, 0xae },
+	{ 0x3f, 0x68, 0xc7, 0xec, 0x63, 0xac, 0x11, 0xeb,
+	  0xb9, 0x8f, 0x94, 0xb3, 0x39, 0xb0, 0x5c, 0x10,
+	  0x49, 0x84, 0xfd, 0xa5, 0x01, 0x03, 0x06, 0x01,
+	  0x44, 0xe5, 0xa2, 0xbf, 0xcc, 0xc9, 0xda, 0x95 },
+	{ 0x05, 0x6f, 0x29, 0x81, 0x6b, 0x8a, 0xf8, 0xf5,
+	  0x66, 0x82, 0xbc, 0x4d, 0x7c, 0xf0, 0x94, 0x11,
+	  0x1d, 0xa7, 0x73, 0x3e, 0x72, 0x6c, 0xd1, 0x3d,
+	  0x6b, 0x3e, 0x8e, 0xa0, 0x3e, 0x92, 0xa0, 0xd5 },
+	{ 0xf5, 0xec, 0x43, 0xa2, 0x8a, 0xcb, 0xef, 0xf1,
+	  0xf3, 0x31, 0x8a, 0x5b, 0xca, 0xc7, 0xc6, 0x6d,
+	  0xdb, 0x52, 0x30, 0xb7, 0x9d, 0xb2, 0xd1, 0x05,
+	  0xbc, 0xbe, 0x15, 0xf3, 0xc1, 0x14, 0x8d, 0x69 },
+	{ 0x2a, 0x69, 0x60, 0xad, 0x1d, 0x8d, 0xd5, 0x47,
+	  0x55, 0x5c, 0xfb, 0xd5, 0xe4, 0x60, 0x0f, 0x1e,
+	  0xaa, 0x1c, 0x8e, 0xda, 0x34, 0xde, 0x03, 0x74,
+	  0xec, 0x4a, 0x26, 0xea, 0xaa, 0xa3, 0x3b, 0x4e },
+	{ 0xdc, 0xc1, 0xea, 0x7b, 0xaa, 0xb9, 0x33, 0x84,
+	  0xf7, 0x6b, 0x79, 0x68, 0x66, 0x19, 0x97, 0x54,
+	  0x74, 0x2f, 0x7b, 0x96, 0xd6, 0xb4, 0xc1, 0x20,
+	  0x16, 0x5c, 0x04, 0xa6, 0xc4, 0xf5, 0xce, 0x10 },
+	{ 0x13, 0xd5, 0xdf, 0x17, 0x92, 0x21, 0x37, 0x9c,
+	  0x6a, 0x78, 0xc0, 0x7c, 0x79, 0x3f, 0xf5, 0x34,
+	  0x87, 0xca, 0xe6, 0xbf, 0x9f, 0xe8, 0x82, 0x54,
+	  0x1a, 0xb0, 0xe7, 0x35, 0xe3, 0xea, 0xda, 0x3b },
+	{ 0x8c, 0x59, 0xe4, 0x40, 0x76, 0x41, 0xa0, 0x1e,
+	  0x8f, 0xf9, 0x1f, 0x99, 0x80, 0xdc, 0x23, 0x6f,
+	  0x4e, 0xcd, 0x6f, 0xcf, 0x52, 0x58, 0x9a, 0x09,
+	  0x9a, 0x96, 0x16, 0x33, 0x96, 0x77, 0x14, 0xe1 },
+	{ 0x83, 0x3b, 0x1a, 0xc6, 0xa2, 0x51, 0xfd, 0x08,
+	  0xfd, 0x6d, 0x90, 0x8f, 0xea, 0x2a, 0x4e, 0xe1,
+	  0xe0, 0x40, 0xbc, 0xa9, 0x3f, 0xc1, 0xa3, 0x8e,
+	  0xc3, 0x82, 0x0e, 0x0c, 0x10, 0xbd, 0x82, 0xea },
+	{ 0xa2, 0x44, 0xf9, 0x27, 0xf3, 0xb4, 0x0b, 0x8f,
+	  0x6c, 0x39, 0x15, 0x70, 0xc7, 0x65, 0x41, 0x8f,
+	  0x2f, 0x6e, 0x70, 0x8e, 0xac, 0x90, 0x06, 0xc5,
+	  0x1a, 0x7f, 0xef, 0xf4, 0xaf, 0x3b, 0x2b, 0x9e },
+	{ 0x3d, 0x99, 0xed, 0x95, 0x50, 0xcf, 0x11, 0x96,
+	  0xe6, 0xc4, 0xd2, 0x0c, 0x25, 0x96, 0x20, 0xf8,
+	  0x58, 0xc3, 0xd7, 0x03, 0x37, 0x4c, 0x12, 0x8c,
+	  0xe7, 0xb5, 0x90, 0x31, 0x0c, 0x83, 0x04, 0x6d },
+	{ 0x2b, 0x35, 0xc4, 0x7d, 0x7b, 0x87, 0x76, 0x1f,
+	  0x0a, 0xe4, 0x3a, 0xc5, 0x6a, 0xc2, 0x7b, 0x9f,
+	  0x25, 0x83, 0x03, 0x67, 0xb5, 0x95, 0xbe, 0x8c,
+	  0x24, 0x0e, 0x94, 0x60, 0x0c, 0x6e, 0x33, 0x12 },
+	{ 0x5d, 0x11, 0xed, 0x37, 0xd2, 0x4d, 0xc7, 0x67,
+	  0x30, 0x5c, 0xb7, 0xe1, 0x46, 0x7d, 0x87, 0xc0,
+	  0x65, 0xac, 0x4b, 0xc8, 0xa4, 0x26, 0xde, 0x38,
+	  0x99, 0x1f, 0xf5, 0x9a, 0xa8, 0x73, 0x5d, 0x02 },
+	{ 0xb8, 0x36, 0x47, 0x8e, 0x1c, 0xa0, 0x64, 0x0d,
+	  0xce, 0x6f, 0xd9, 0x10, 0xa5, 0x09, 0x62, 0x72,
+	  0xc8, 0x33, 0x09, 0x90, 0xcd, 0x97, 0x86, 0x4a,
+	  0xc2, 0xbf, 0x14, 0xef, 0x6b, 0x23, 0x91, 0x4a },
+	{ 0x91, 0x00, 0xf9, 0x46, 0xd6, 0xcc, 0xde, 0x3a,
+	  0x59, 0x7f, 0x90, 0xd3, 0x9f, 0xc1, 0x21, 0x5b,
+	  0xad, 0xdc, 0x74, 0x13, 0x64, 0x3d, 0x85, 0xc2,
+	  0x1c, 0x3e, 0xee, 0x5d, 0x2d, 0xd3, 0x28, 0x94 },
+	{ 0xda, 0x70, 0xee, 0xdd, 0x23, 0xe6, 0x63, 0xaa,
+	  0x1a, 0x74, 0xb9, 0x76, 0x69, 0x35, 0xb4, 0x79,
+	  0x22, 0x2a, 0x72, 0xaf, 0xba, 0x5c, 0x79, 0x51,
+	  0x58, 0xda, 0xd4, 0x1a, 0x3b, 0xd7, 0x7e, 0x40 },
+	{ 0xf0, 0x67, 0xed, 0x6a, 0x0d, 0xbd, 0x43, 0xaa,
+	  0x0a, 0x92, 0x54, 0xe6, 0x9f, 0xd6, 0x6b, 0xdd,
+	  0x8a, 0xcb, 0x87, 0xde, 0x93, 0x6c, 0x25, 0x8c,
+	  0xfb, 0x02, 0x28, 0x5f, 0x2c, 0x11, 0xfa, 0x79 },
+	{ 0x71, 0x5c, 0x99, 0xc7, 0xd5, 0x75, 0x80, 0xcf,
+	  0x97, 0x53, 0xb4, 0xc1, 0xd7, 0x95, 0xe4, 0x5a,
+	  0x83, 0xfb, 0xb2, 0x28, 0xc0, 0xd3, 0x6f, 0xbe,
+	  0x20, 0xfa, 0xf3, 0x9b, 0xdd, 0x6d, 0x4e, 0x85 },
+	{ 0xe4, 0x57, 0xd6, 0xad, 0x1e, 0x67, 0xcb, 0x9b,
+	  0xbd, 0x17, 0xcb, 0xd6, 0x98, 0xfa, 0x6d, 0x7d,
+	  0xae, 0x0c, 0x9b, 0x7a, 0xd6, 0xcb, 0xd6, 0x53,
+	  0x96, 0x34, 0xe3, 0x2a, 0x71, 0x9c, 0x84, 0x92 },
+	{ 0xec, 0xe3, 0xea, 0x81, 0x03, 0xe0, 0x24, 0x83,
+	  0xc6, 0x4a, 0x70, 0xa4, 0xbd, 0xce, 0xe8, 0xce,
+	  0xb6, 0x27, 0x8f, 0x25, 0x33, 0xf3, 0xf4, 0x8d,
+	  0xbe, 0xed, 0xfb, 0xa9, 0x45, 0x31, 0xd4, 0xae },
+	{ 0x38, 0x8a, 0xa5, 0xd3, 0x66, 0x7a, 0x97, 0xc6,
+	  0x8d, 0x3d, 0x56, 0xf8, 0xf3, 0xee, 0x8d, 0x3d,
+	  0x36, 0x09, 0x1f, 0x17, 0xfe, 0x5d, 0x1b, 0x0d,
+	  0x5d, 0x84, 0xc9, 0x3b, 0x2f, 0xfe, 0x40, 0xbd },
+	{ 0x8b, 0x6b, 0x31, 0xb9, 0xad, 0x7c, 0x3d, 0x5c,
+	  0xd8, 0x4b, 0xf9, 0x89, 0x47, 0xb9, 0xcd, 0xb5,
+	  0x9d, 0xf8, 0xa2, 0x5f, 0xf7, 0x38, 0x10, 0x10,
+	  0x13, 0xbe, 0x4f, 0xd6, 0x5e, 0x1d, 0xd1, 0xa3 },
+	{ 0x06, 0x62, 0x91, 0xf6, 0xbb, 0xd2, 0x5f, 0x3c,
+	  0x85, 0x3d, 0xb7, 0xd8, 0xb9, 0x5c, 0x9a, 0x1c,
+	  0xfb, 0x9b, 0xf1, 0xc1, 0xc9, 0x9f, 0xb9, 0x5a,
+	  0x9b, 0x78, 0x69, 0xd9, 0x0f, 0x1c, 0x29, 0x03 },
+	{ 0xa7, 0x07, 0xef, 0xbc, 0xcd, 0xce, 0xed, 0x42,
+	  0x96, 0x7a, 0x66, 0xf5, 0x53, 0x9b, 0x93, 0xed,
+	  0x75, 0x60, 0xd4, 0x67, 0x30, 0x40, 0x16, 0xc4,
+	  0x78, 0x0d, 0x77, 0x55, 0xa5, 0x65, 0xd4, 0xc4 },
+	{ 0x38, 0xc5, 0x3d, 0xfb, 0x70, 0xbe, 0x7e, 0x79,
+	  0x2b, 0x07, 0xa6, 0xa3, 0x5b, 0x8a, 0x6a, 0x0a,
+	  0xba, 0x02, 0xc5, 0xc5, 0xf3, 0x8b, 0xaf, 0x5c,
+	  0x82, 0x3f, 0xdf, 0xd9, 0xe4, 0x2d, 0x65, 0x7e },
+	{ 0xf2, 0x91, 0x13, 0x86, 0x50, 0x1d, 0x9a, 0xb9,
+	  0xd7, 0x20, 0xcf, 0x8a, 0xd1, 0x05, 0x03, 0xd5,
+	  0x63, 0x4b, 0xf4, 0xb7, 0xd1, 0x2b, 0x56, 0xdf,
+	  0xb7, 0x4f, 0xec, 0xc6, 0xe4, 0x09, 0x3f, 0x68 },
+	{ 0xc6, 0xf2, 0xbd, 0xd5, 0x2b, 0x81, 0xe6, 0xe4,
+	  0xf6, 0x59, 0x5a, 0xbd, 0x4d, 0x7f, 0xb3, 0x1f,
+	  0x65, 0x11, 0x69, 0xd0, 0x0f, 0xf3, 0x26, 0x92,
+	  0x6b, 0x34, 0x94, 0x7b, 0x28, 0xa8, 0x39, 0x59 },
+	{ 0x29, 0x3d, 0x94, 0xb1, 0x8c, 0x98, 0xbb, 0x32,
+	  0x23, 0x36, 0x6b, 0x8c, 0xe7, 0x4c, 0x28, 0xfb,
+	  0xdf, 0x28, 0xe1, 0xf8, 0x4a, 0x33, 0x50, 0xb0,
+	  0xeb, 0x2d, 0x18, 0x04, 0xa5, 0x77, 0x57, 0x9b },
+	{ 0x2c, 0x2f, 0xa5, 0xc0, 0xb5, 0x15, 0x33, 0x16,
+	  0x5b, 0xc3, 0x75, 0xc2, 0x2e, 0x27, 0x81, 0x76,
+	  0x82, 0x70, 0xa3, 0x83, 0x98, 0x5d, 0x13, 0xbd,
+	  0x6b, 0x67, 0xb6, 0xfd, 0x67, 0xf8, 0x89, 0xeb },
+	{ 0xca, 0xa0, 0x9b, 0x82, 0xb7, 0x25, 0x62, 0xe4,
+	  0x3f, 0x4b, 0x22, 0x75, 0xc0, 0x91, 0x91, 0x8e,
+	  0x62, 0x4d, 0x91, 0x16, 0x61, 0xcc, 0x81, 0x1b,
+	  0xb5, 0xfa, 0xec, 0x51, 0xf6, 0x08, 0x8e, 0xf7 },
+	{ 0x24, 0x76, 0x1e, 0x45, 0xe6, 0x74, 0x39, 0x53,
+	  0x79, 0xfb, 0x17, 0x72, 0x9c, 0x78, 0xcb, 0x93,
+	  0x9e, 0x6f, 0x74, 0xc5, 0xdf, 0xfb, 0x9c, 0x96,
+	  0x1f, 0x49, 0x59, 0x82, 0xc3, 0xed, 0x1f, 0xe3 },
+	{ 0x55, 0xb7, 0x0a, 0x82, 0x13, 0x1e, 0xc9, 0x48,
+	  0x88, 0xd7, 0xab, 0x54, 0xa7, 0xc5, 0x15, 0x25,
+	  0x5c, 0x39, 0x38, 0xbb, 0x10, 0xbc, 0x78, 0x4d,
+	  0xc9, 0xb6, 0x7f, 0x07, 0x6e, 0x34, 0x1a, 0x73 },
+	{ 0x6a, 0xb9, 0x05, 0x7b, 0x97, 0x7e, 0xbc, 0x3c,
+	  0xa4, 0xd4, 0xce, 0x74, 0x50, 0x6c, 0x25, 0xcc,
+	  0xcd, 0xc5, 0x66, 0x49, 0x7c, 0x45, 0x0b, 0x54,
+	  0x15, 0xa3, 0x94, 0x86, 0xf8, 0x65, 0x7a, 0x03 },
+	{ 0x24, 0x06, 0x6d, 0xee, 0xe0, 0xec, 0xee, 0x15,
+	  0xa4, 0x5f, 0x0a, 0x32, 0x6d, 0x0f, 0x8d, 0xbc,
+	  0x79, 0x76, 0x1e, 0xbb, 0x93, 0xcf, 0x8c, 0x03,
+	  0x77, 0xaf, 0x44, 0x09, 0x78, 0xfc, 0xf9, 0x94 },
+	{ 0x20, 0x00, 0x0d, 0x3f, 0x66, 0xba, 0x76, 0x86,
+	  0x0d, 0x5a, 0x95, 0x06, 0x88, 0xb9, 0xaa, 0x0d,
+	  0x76, 0xcf, 0xea, 0x59, 0xb0, 0x05, 0xd8, 0x59,
+	  0x91, 0x4b, 0x1a, 0x46, 0x65, 0x3a, 0x93, 0x9b },
+	{ 0xb9, 0x2d, 0xaa, 0x79, 0x60, 0x3e, 0x3b, 0xdb,
+	  0xc3, 0xbf, 0xe0, 0xf4, 0x19, 0xe4, 0x09, 0xb2,
+	  0xea, 0x10, 0xdc, 0x43, 0x5b, 0xee, 0xfe, 0x29,
+	  0x59, 0xda, 0x16, 0x89, 0x5d, 0x5d, 0xca, 0x1c },
+	{ 0xe9, 0x47, 0x94, 0x87, 0x05, 0xb2, 0x06, 0xd5,
+	  0x72, 0xb0, 0xe8, 0xf6, 0x2f, 0x66, 0xa6, 0x55,
+	  0x1c, 0xbd, 0x6b, 0xc3, 0x05, 0xd2, 0x6c, 0xe7,
+	  0x53, 0x9a, 0x12, 0xf9, 0xaa, 0xdf, 0x75, 0x71 },
+	{ 0x3d, 0x67, 0xc1, 0xb3, 0xf9, 0xb2, 0x39, 0x10,
+	  0xe3, 0xd3, 0x5e, 0x6b, 0x0f, 0x2c, 0xcf, 0x44,
+	  0xa0, 0xb5, 0x40, 0xa4, 0x5c, 0x18, 0xba, 0x3c,
+	  0x36, 0x26, 0x4d, 0xd4, 0x8e, 0x96, 0xaf, 0x6a },
+	{ 0xc7, 0x55, 0x8b, 0xab, 0xda, 0x04, 0xbc, 0xcb,
+	  0x76, 0x4d, 0x0b, 0xbf, 0x33, 0x58, 0x42, 0x51,
+	  0x41, 0x90, 0x2d, 0x22, 0x39, 0x1d, 0x9f, 0x8c,
+	  0x59, 0x15, 0x9f, 0xec, 0x9e, 0x49, 0xb1, 0x51 },
+	{ 0x0b, 0x73, 0x2b, 0xb0, 0x35, 0x67, 0x5a, 0x50,
+	  0xff, 0x58, 0xf2, 0xc2, 0x42, 0xe4, 0x71, 0x0a,
+	  0xec, 0xe6, 0x46, 0x70, 0x07, 0x9c, 0x13, 0x04,
+	  0x4c, 0x79, 0xc9, 0xb7, 0x49, 0x1f, 0x70, 0x00 },
+	{ 0xd1, 0x20, 0xb5, 0xef, 0x6d, 0x57, 0xeb, 0xf0,
+	  0x6e, 0xaf, 0x96, 0xbc, 0x93, 0x3c, 0x96, 0x7b,
+	  0x16, 0xcb, 0xe6, 0xe2, 0xbf, 0x00, 0x74, 0x1c,
+	  0x30, 0xaa, 0x1c, 0x54, 0xba, 0x64, 0x80, 0x1f },
+	{ 0x58, 0xd2, 0x12, 0xad, 0x6f, 0x58, 0xae, 0xf0,
+	  0xf8, 0x01, 0x16, 0xb4, 0x41, 0xe5, 0x7f, 0x61,
+	  0x95, 0xbf, 0xef, 0x26, 0xb6, 0x14, 0x63, 0xed,
+	  0xec, 0x11, 0x83, 0xcd, 0xb0, 0x4f, 0xe7, 0x6d },
+	{ 0xb8, 0x83, 0x6f, 0x51, 0xd1, 0xe2, 0x9b, 0xdf,
+	  0xdb, 0xa3, 0x25, 0x56, 0x53, 0x60, 0x26, 0x8b,
+	  0x8f, 0xad, 0x62, 0x74, 0x73, 0xed, 0xec, 0xef,
+	  0x7e, 0xae, 0xfe, 0xe8, 0x37, 0xc7, 0x40, 0x03 },
+	{ 0xc5, 0x47, 0xa3, 0xc1, 0x24, 0xae, 0x56, 0x85,
+	  0xff, 0xa7, 0xb8, 0xed, 0xaf, 0x96, 0xec, 0x86,
+	  0xf8, 0xb2, 0xd0, 0xd5, 0x0c, 0xee, 0x8b, 0xe3,
+	  0xb1, 0xf0, 0xc7, 0x67, 0x63, 0x06, 0x9d, 0x9c },
+	{ 0x5d, 0x16, 0x8b, 0x76, 0x9a, 0x2f, 0x67, 0x85,
+	  0x3d, 0x62, 0x95, 0xf7, 0x56, 0x8b, 0xe4, 0x0b,
+	  0xb7, 0xa1, 0x6b, 0x8d, 0x65, 0xba, 0x87, 0x63,
+	  0x5d, 0x19, 0x78, 0xd2, 0xab, 0x11, 0xba, 0x2a },
+	{ 0xa2, 0xf6, 0x75, 0xdc, 0x73, 0x02, 0x63, 0x8c,
+	  0xb6, 0x02, 0x01, 0x06, 0x4c, 0xa5, 0x50, 0x77,
+	  0x71, 0x4d, 0x71, 0xfe, 0x09, 0x6a, 0x31, 0x5f,
+	  0x2f, 0xe7, 0x40, 0x12, 0x77, 0xca, 0xa5, 0xaf },
+	{ 0xc8, 0xaa, 0xb5, 0xcd, 0x01, 0x60, 0xae, 0x78,
+	  0xcd, 0x2e, 0x8a, 0xc5, 0xfb, 0x0e, 0x09, 0x3c,
+	  0xdb, 0x5c, 0x4b, 0x60, 0x52, 0xa0, 0xa9, 0x7b,
+	  0xb0, 0x42, 0x16, 0x82, 0x6f, 0xa7, 0xa4, 0x37 },
+	{ 0xff, 0x68, 0xca, 0x40, 0x35, 0xbf, 0xeb, 0x43,
+	  0xfb, 0xf1, 0x45, 0xfd, 0xdd, 0x5e, 0x43, 0xf1,
+	  0xce, 0xa5, 0x4f, 0x11, 0xf7, 0xbe, 0xe1, 0x30,
+	  0x58, 0xf0, 0x27, 0x32, 0x9a, 0x4a, 0x5f, 0xa4 },
+	{ 0x1d, 0x4e, 0x54, 0x87, 0xae, 0x3c, 0x74, 0x0f,
+	  0x2b, 0xa6, 0xe5, 0x41, 0xac, 0x91, 0xbc, 0x2b,
+	  0xfc, 0xd2, 0x99, 0x9c, 0x51, 0x8d, 0x80, 0x7b,
+	  0x42, 0x67, 0x48, 0x80, 0x3a, 0x35, 0x0f, 0xd4 },
+	{ 0x6d, 0x24, 0x4e, 0x1a, 0x06, 0xce, 0x4e, 0xf5,
+	  0x78, 0xdd, 0x0f, 0x63, 0xaf, 0xf0, 0x93, 0x67,
+	  0x06, 0x73, 0x51, 0x19, 0xca, 0x9c, 0x8d, 0x22,
+	  0xd8, 0x6c, 0x80, 0x14, 0x14, 0xab, 0x97, 0x41 },
+	{ 0xde, 0xcf, 0x73, 0x29, 0xdb, 0xcc, 0x82, 0x7b,
+	  0x8f, 0xc5, 0x24, 0xc9, 0x43, 0x1e, 0x89, 0x98,
+	  0x02, 0x9e, 0xce, 0x12, 0xce, 0x93, 0xb7, 0xb2,
+	  0xf3, 0xe7, 0x69, 0xa9, 0x41, 0xfb, 0x8c, 0xea },
+	{ 0x2f, 0xaf, 0xcc, 0x0f, 0x2e, 0x63, 0xcb, 0xd0,
+	  0x77, 0x55, 0xbe, 0x7b, 0x75, 0xec, 0xea, 0x0a,
+	  0xdf, 0xf9, 0xaa, 0x5e, 0xde, 0x2a, 0x52, 0xfd,
+	  0xab, 0x4d, 0xfd, 0x03, 0x74, 0xcd, 0x48, 0x3f },
+	{ 0xaa, 0x85, 0x01, 0x0d, 0xd4, 0x6a, 0x54, 0x6b,
+	  0x53, 0x5e, 0xf4, 0xcf, 0x5f, 0x07, 0xd6, 0x51,
+	  0x61, 0xe8, 0x98, 0x28, 0xf3, 0xa7, 0x7d, 0xb7,
+	  0xb9, 0xb5, 0x6f, 0x0d, 0xf5, 0x9a, 0xae, 0x45 },
+	{ 0x07, 0xe8, 0xe1, 0xee, 0x73, 0x2c, 0xb0, 0xd3,
+	  0x56, 0xc9, 0xc0, 0xd1, 0x06, 0x9c, 0x89, 0xd1,
+	  0x7a, 0xdf, 0x6a, 0x9a, 0x33, 0x4f, 0x74, 0x5e,
+	  0xc7, 0x86, 0x73, 0x32, 0x54, 0x8c, 0xa8, 0xe9 },
+	{ 0x0e, 0x01, 0xe8, 0x1c, 0xad, 0xa8, 0x16, 0x2b,
+	  0xfd, 0x5f, 0x8a, 0x8c, 0x81, 0x8a, 0x6c, 0x69,
+	  0xfe, 0xdf, 0x02, 0xce, 0xb5, 0x20, 0x85, 0x23,
+	  0xcb, 0xe5, 0x31, 0x3b, 0x89, 0xca, 0x10, 0x53 },
+	{ 0x6b, 0xb6, 0xc6, 0x47, 0x26, 0x55, 0x08, 0x43,
+	  0x99, 0x85, 0x2e, 0x00, 0x24, 0x9f, 0x8c, 0xb2,
+	  0x47, 0x89, 0x6d, 0x39, 0x2b, 0x02, 0xd7, 0x3b,
+	  0x7f, 0x0d, 0xd8, 0x18, 0xe1, 0xe2, 0x9b, 0x07 },
+	{ 0x42, 0xd4, 0x63, 0x6e, 0x20, 0x60, 0xf0, 0x8f,
+	  0x41, 0xc8, 0x82, 0xe7, 0x6b, 0x39, 0x6b, 0x11,
+	  0x2e, 0xf6, 0x27, 0xcc, 0x24, 0xc4, 0x3d, 0xd5,
+	  0xf8, 0x3a, 0x1d, 0x1a, 0x7e, 0xad, 0x71, 0x1a },
+	{ 0x48, 0x58, 0xc9, 0xa1, 0x88, 0xb0, 0x23, 0x4f,
+	  0xb9, 0xa8, 0xd4, 0x7d, 0x0b, 0x41, 0x33, 0x65,
+	  0x0a, 0x03, 0x0b, 0xd0, 0x61, 0x1b, 0x87, 0xc3,
+	  0x89, 0x2e, 0x94, 0x95, 0x1f, 0x8d, 0xf8, 0x52 },
+	{ 0x3f, 0xab, 0x3e, 0x36, 0x98, 0x8d, 0x44, 0x5a,
+	  0x51, 0xc8, 0x78, 0x3e, 0x53, 0x1b, 0xe3, 0xa0,
+	  0x2b, 0xe4, 0x0c, 0xd0, 0x47, 0x96, 0xcf, 0xb6,
+	  0x1d, 0x40, 0x34, 0x74, 0x42, 0xd3, 0xf7, 0x94 },
+	{ 0xeb, 0xab, 0xc4, 0x96, 0x36, 0xbd, 0x43, 0x3d,
+	  0x2e, 0xc8, 0xf0, 0xe5, 0x18, 0x73, 0x2e, 0xf8,
+	  0xfa, 0x21, 0xd4, 0xd0, 0x71, 0xcc, 0x3b, 0xc4,
+	  0x6c, 0xd7, 0x9f, 0xa3, 0x8a, 0x28, 0xb8, 0x10 },
+	{ 0xa1, 0xd0, 0x34, 0x35, 0x23, 0xb8, 0x93, 0xfc,
+	  0xa8, 0x4f, 0x47, 0xfe, 0xb4, 0xa6, 0x4d, 0x35,
+	  0x0a, 0x17, 0xd8, 0xee, 0xf5, 0x49, 0x7e, 0xce,
+	  0x69, 0x7d, 0x02, 0xd7, 0x91, 0x78, 0xb5, 0x91 },
+	{ 0x26, 0x2e, 0xbf, 0xd9, 0x13, 0x0b, 0x7d, 0x28,
+	  0x76, 0x0d, 0x08, 0xef, 0x8b, 0xfd, 0x3b, 0x86,
+	  0xcd, 0xd3, 0xb2, 0x11, 0x3d, 0x2c, 0xae, 0xf7,
+	  0xea, 0x95, 0x1a, 0x30, 0x3d, 0xfa, 0x38, 0x46 },
+	{ 0xf7, 0x61, 0x58, 0xed, 0xd5, 0x0a, 0x15, 0x4f,
+	  0xa7, 0x82, 0x03, 0xed, 0x23, 0x62, 0x93, 0x2f,
+	  0xcb, 0x82, 0x53, 0xaa, 0xe3, 0x78, 0x90, 0x3e,
+	  0xde, 0xd1, 0xe0, 0x3f, 0x70, 0x21, 0xa2, 0x57 },
+	{ 0x26, 0x17, 0x8e, 0x95, 0x0a, 0xc7, 0x22, 0xf6,
+	  0x7a, 0xe5, 0x6e, 0x57, 0x1b, 0x28, 0x4c, 0x02,
+	  0x07, 0x68, 0x4a, 0x63, 0x34, 0xa1, 0x77, 0x48,
+	  0xa9, 0x4d, 0x26, 0x0b, 0xc5, 0xf5, 0x52, 0x74 },
+	{ 0xc3, 0x78, 0xd1, 0xe4, 0x93, 0xb4, 0x0e, 0xf1,
+	  0x1f, 0xe6, 0xa1, 0x5d, 0x9c, 0x27, 0x37, 0xa3,
+	  0x78, 0x09, 0x63, 0x4c, 0x5a, 0xba, 0xd5, 0xb3,
+	  0x3d, 0x7e, 0x39, 0x3b, 0x4a, 0xe0, 0x5d, 0x03 },
+	{ 0x98, 0x4b, 0xd8, 0x37, 0x91, 0x01, 0xbe, 0x8f,
+	  0xd8, 0x06, 0x12, 0xd8, 0xea, 0x29, 0x59, 0xa7,
+	  0x86, 0x5e, 0xc9, 0x71, 0x85, 0x23, 0x55, 0x01,
+	  0x07, 0xae, 0x39, 0x38, 0xdf, 0x32, 0x01, 0x1b },
+	{ 0xc6, 0xf2, 0x5a, 0x81, 0x2a, 0x14, 0x48, 0x58,
+	  0xac, 0x5c, 0xed, 0x37, 0xa9, 0x3a, 0x9f, 0x47,
+	  0x59, 0xba, 0x0b, 0x1c, 0x0f, 0xdc, 0x43, 0x1d,
+	  0xce, 0x35, 0xf9, 0xec, 0x1f, 0x1f, 0x4a, 0x99 },
+	{ 0x92, 0x4c, 0x75, 0xc9, 0x44, 0x24, 0xff, 0x75,
+	  0xe7, 0x4b, 0x8b, 0x4e, 0x94, 0x35, 0x89, 0x58,
+	  0xb0, 0x27, 0xb1, 0x71, 0xdf, 0x5e, 0x57, 0x89,
+	  0x9a, 0xd0, 0xd4, 0xda, 0xc3, 0x73, 0x53, 0xb6 },
+	{ 0x0a, 0xf3, 0x58, 0x92, 0xa6, 0x3f, 0x45, 0x93,
+	  0x1f, 0x68, 0x46, 0xed, 0x19, 0x03, 0x61, 0xcd,
+	  0x07, 0x30, 0x89, 0xe0, 0x77, 0x16, 0x57, 0x14,
+	  0xb5, 0x0b, 0x81, 0xa2, 0xe3, 0xdd, 0x9b, 0xa1 },
+	{ 0xcc, 0x80, 0xce, 0xfb, 0x26, 0xc3, 0xb2, 0xb0,
+	  0xda, 0xef, 0x23, 0x3e, 0x60, 0x6d, 0x5f, 0xfc,
+	  0x80, 0xfa, 0x17, 0x42, 0x7d, 0x18, 0xe3, 0x04,
+	  0x89, 0x67, 0x3e, 0x06, 0xef, 0x4b, 0x87, 0xf7 },
+	{ 0xc2, 0xf8, 0xc8, 0x11, 0x74, 0x47, 0xf3, 0x97,
+	  0x8b, 0x08, 0x18, 0xdc, 0xf6, 0xf7, 0x01, 0x16,
+	  0xac, 0x56, 0xfd, 0x18, 0x4d, 0xd1, 0x27, 0x84,
+	  0x94, 0xe1, 0x03, 0xfc, 0x6d, 0x74, 0xa8, 0x87 },
+	{ 0xbd, 0xec, 0xf6, 0xbf, 0xc1, 0xba, 0x0d, 0xf6,
+	  0xe8, 0x62, 0xc8, 0x31, 0x99, 0x22, 0x07, 0x79,
+	  0x6a, 0xcc, 0x79, 0x79, 0x68, 0x35, 0x88, 0x28,
+	  0xc0, 0x6e, 0x7a, 0x51, 0xe0, 0x90, 0x09, 0x8f },
+	{ 0x24, 0xd1, 0xa2, 0x6e, 0x3d, 0xab, 0x02, 0xfe,
+	  0x45, 0x72, 0xd2, 0xaa, 0x7d, 0xbd, 0x3e, 0xc3,
+	  0x0f, 0x06, 0x93, 0xdb, 0x26, 0xf2, 0x73, 0xd0,
+	  0xab, 0x2c, 0xb0, 0xc1, 0x3b, 0x5e, 0x64, 0x51 },
+	{ 0xec, 0x56, 0xf5, 0x8b, 0x09, 0x29, 0x9a, 0x30,
+	  0x0b, 0x14, 0x05, 0x65, 0xd7, 0xd3, 0xe6, 0x87,
+	  0x82, 0xb6, 0xe2, 0xfb, 0xeb, 0x4b, 0x7e, 0xa9,
+	  0x7a, 0xc0, 0x57, 0x98, 0x90, 0x61, 0xdd, 0x3f },
+	{ 0x11, 0xa4, 0x37, 0xc1, 0xab, 0xa3, 0xc1, 0x19,
+	  0xdd, 0xfa, 0xb3, 0x1b, 0x3e, 0x8c, 0x84, 0x1d,
+	  0xee, 0xeb, 0x91, 0x3e, 0xf5, 0x7f, 0x7e, 0x48,
+	  0xf2, 0xc9, 0xcf, 0x5a, 0x28, 0xfa, 0x42, 0xbc },
+	{ 0x53, 0xc7, 0xe6, 0x11, 0x4b, 0x85, 0x0a, 0x2c,
+	  0xb4, 0x96, 0xc9, 0xb3, 0xc6, 0x9a, 0x62, 0x3e,
+	  0xae, 0xa2, 0xcb, 0x1d, 0x33, 0xdd, 0x81, 0x7e,
+	  0x47, 0x65, 0xed, 0xaa, 0x68, 0x23, 0xc2, 0x28 },
+	{ 0x15, 0x4c, 0x3e, 0x96, 0xfe, 0xe5, 0xdb, 0x14,
+	  0xf8, 0x77, 0x3e, 0x18, 0xaf, 0x14, 0x85, 0x79,
+	  0x13, 0x50, 0x9d, 0xa9, 0x99, 0xb4, 0x6c, 0xdd,
+	  0x3d, 0x4c, 0x16, 0x97, 0x60, 0xc8, 0x3a, 0xd2 },
+	{ 0x40, 0xb9, 0x91, 0x6f, 0x09, 0x3e, 0x02, 0x7a,
+	  0x87, 0x86, 0x64, 0x18, 0x18, 0x92, 0x06, 0x20,
+	  0x47, 0x2f, 0xbc, 0xf6, 0x8f, 0x70, 0x1d, 0x1b,
+	  0x68, 0x06, 0x32, 0xe6, 0x99, 0x6b, 0xde, 0xd3 },
+	{ 0x24, 0xc4, 0xcb, 0xba, 0x07, 0x11, 0x98, 0x31,
+	  0xa7, 0x26, 0xb0, 0x53, 0x05, 0xd9, 0x6d, 0xa0,
+	  0x2f, 0xf8, 0xb1, 0x48, 0xf0, 0xda, 0x44, 0x0f,
+	  0xe2, 0x33, 0xbc, 0xaa, 0x32, 0xc7, 0x2f, 0x6f },
+	{ 0x5d, 0x20, 0x15, 0x10, 0x25, 0x00, 0x20, 0xb7,
+	  0x83, 0x68, 0x96, 0x88, 0xab, 0xbf, 0x8e, 0xcf,
+	  0x25, 0x94, 0xa9, 0x6a, 0x08, 0xf2, 0xbf, 0xec,
+	  0x6c, 0xe0, 0x57, 0x44, 0x65, 0xdd, 0xed, 0x71 },
+	{ 0x04, 0x3b, 0x97, 0xe3, 0x36, 0xee, 0x6f, 0xdb,
+	  0xbe, 0x2b, 0x50, 0xf2, 0x2a, 0xf8, 0x32, 0x75,
+	  0xa4, 0x08, 0x48, 0x05, 0xd2, 0xd5, 0x64, 0x59,
+	  0x62, 0x45, 0x4b, 0x6c, 0x9b, 0x80, 0x53, 0xa0 },
+	{ 0x56, 0x48, 0x35, 0xcb, 0xae, 0xa7, 0x74, 0x94,
+	  0x85, 0x68, 0xbe, 0x36, 0xcf, 0x52, 0xfc, 0xdd,
+	  0x83, 0x93, 0x4e, 0xb0, 0xa2, 0x75, 0x12, 0xdb,
+	  0xe3, 0xe2, 0xdb, 0x47, 0xb9, 0xe6, 0x63, 0x5a },
+	{ 0xf2, 0x1c, 0x33, 0xf4, 0x7b, 0xde, 0x40, 0xa2,
+	  0xa1, 0x01, 0xc9, 0xcd, 0xe8, 0x02, 0x7a, 0xaf,
+	  0x61, 0xa3, 0x13, 0x7d, 0xe2, 0x42, 0x2b, 0x30,
+	  0x03, 0x5a, 0x04, 0xc2, 0x70, 0x89, 0x41, 0x83 },
+	{ 0x9d, 0xb0, 0xef, 0x74, 0xe6, 0x6c, 0xbb, 0x84,
+	  0x2e, 0xb0, 0xe0, 0x73, 0x43, 0xa0, 0x3c, 0x5c,
+	  0x56, 0x7e, 0x37, 0x2b, 0x3f, 0x23, 0xb9, 0x43,
+	  0xc7, 0x88, 0xa4, 0xf2, 0x50, 0xf6, 0x78, 0x91 },
+	{ 0xab, 0x8d, 0x08, 0x65, 0x5f, 0xf1, 0xd3, 0xfe,
+	  0x87, 0x58, 0xd5, 0x62, 0x23, 0x5f, 0xd2, 0x3e,
+	  0x7c, 0xf9, 0xdc, 0xaa, 0xd6, 0x58, 0x87, 0x2a,
+	  0x49, 0xe5, 0xd3, 0x18, 0x3b, 0x6c, 0xce, 0xbd },
+	{ 0x6f, 0x27, 0xf7, 0x7e, 0x7b, 0xcf, 0x46, 0xa1,
+	  0xe9, 0x63, 0xad, 0xe0, 0x30, 0x97, 0x33, 0x54,
+	  0x30, 0x31, 0xdc, 0xcd, 0xd4, 0x7c, 0xaa, 0xc1,
+	  0x74, 0xd7, 0xd2, 0x7c, 0xe8, 0x07, 0x7e, 0x8b },
+	{ 0xe3, 0xcd, 0x54, 0xda, 0x7e, 0x44, 0x4c, 0xaa,
+	  0x62, 0x07, 0x56, 0x95, 0x25, 0xa6, 0x70, 0xeb,
+	  0xae, 0x12, 0x78, 0xde, 0x4e, 0x3f, 0xe2, 0x68,
+	  0x4b, 0x3e, 0x33, 0xf5, 0xef, 0x90, 0xcc, 0x1b },
+	{ 0xb2, 0xc3, 0xe3, 0x3a, 0x51, 0xd2, 0x2c, 0x4c,
+	  0x08, 0xfc, 0x09, 0x89, 0xc8, 0x73, 0xc9, 0xcc,
+	  0x41, 0x50, 0x57, 0x9b, 0x1e, 0x61, 0x63, 0xfa,
+	  0x69, 0x4a, 0xd5, 0x1d, 0x53, 0xd7, 0x12, 0xdc },
+	{ 0xbe, 0x7f, 0xda, 0x98, 0x3e, 0x13, 0x18, 0x9b,
+	  0x4c, 0x77, 0xe0, 0xa8, 0x09, 0x20, 0xb6, 0xe0,
+	  0xe0, 0xea, 0x80, 0xc3, 0xb8, 0x4d, 0xbe, 0x7e,
+	  0x71, 0x17, 0xd2, 0x53, 0xf4, 0x81, 0x12, 0xf4 },
+	{ 0xb6, 0x00, 0x8c, 0x28, 0xfa, 0xe0, 0x8a, 0xa4,
+	  0x27, 0xe5, 0xbd, 0x3a, 0xad, 0x36, 0xf1, 0x00,
+	  0x21, 0xf1, 0x6c, 0x77, 0xcf, 0xea, 0xbe, 0xd0,
+	  0x7f, 0x97, 0xcc, 0x7d, 0xc1, 0xf1, 0x28, 0x4a },
+	{ 0x6e, 0x4e, 0x67, 0x60, 0xc5, 0x38, 0xf2, 0xe9,
+	  0x7b, 0x3a, 0xdb, 0xfb, 0xbc, 0xde, 0x57, 0xf8,
+	  0x96, 0x6b, 0x7e, 0xa8, 0xfc, 0xb5, 0xbf, 0x7e,
+	  0xfe, 0xc9, 0x13, 0xfd, 0x2a, 0x2b, 0x0c, 0x55 },
+	{ 0x4a, 0xe5, 0x1f, 0xd1, 0x83, 0x4a, 0xa5, 0xbd,
+	  0x9a, 0x6f, 0x7e, 0xc3, 0x9f, 0xc6, 0x63, 0x33,
+	  0x8d, 0xc5, 0xd2, 0xe2, 0x07, 0x61, 0x56, 0x6d,
+	  0x90, 0xcc, 0x68, 0xb1, 0xcb, 0x87, 0x5e, 0xd8 },
+	{ 0xb6, 0x73, 0xaa, 0xd7, 0x5a, 0xb1, 0xfd, 0xb5,
+	  0x40, 0x1a, 0xbf, 0xa1, 0xbf, 0x89, 0xf3, 0xad,
+	  0xd2, 0xeb, 0xc4, 0x68, 0xdf, 0x36, 0x24, 0xa4,
+	  0x78, 0xf4, 0xfe, 0x85, 0x9d, 0x8d, 0x55, 0xe2 },
+	{ 0x13, 0xc9, 0x47, 0x1a, 0x98, 0x55, 0x91, 0x35,
+	  0x39, 0x83, 0x66, 0x60, 0x39, 0x8d, 0xa0, 0xf3,
+	  0xf9, 0x9a, 0xda, 0x08, 0x47, 0x9c, 0x69, 0xd1,
+	  0xb7, 0xfc, 0xaa, 0x34, 0x61, 0xdd, 0x7e, 0x59 },
+	{ 0x2c, 0x11, 0xf4, 0xa7, 0xf9, 0x9a, 0x1d, 0x23,
+	  0xa5, 0x8b, 0xb6, 0x36, 0x35, 0x0f, 0xe8, 0x49,
+	  0xf2, 0x9c, 0xba, 0xc1, 0xb2, 0xa1, 0x11, 0x2d,
+	  0x9f, 0x1e, 0xd5, 0xbc, 0x5b, 0x31, 0x3c, 0xcd },
+	{ 0xc7, 0xd3, 0xc0, 0x70, 0x6b, 0x11, 0xae, 0x74,
+	  0x1c, 0x05, 0xa1, 0xef, 0x15, 0x0d, 0xd6, 0x5b,
+	  0x54, 0x94, 0xd6, 0xd5, 0x4c, 0x9a, 0x86, 0xe2,
+	  0x61, 0x78, 0x54, 0xe6, 0xae, 0xee, 0xbb, 0xd9 },
+	{ 0x19, 0x4e, 0x10, 0xc9, 0x38, 0x93, 0xaf, 0xa0,
+	  0x64, 0xc3, 0xac, 0x04, 0xc0, 0xdd, 0x80, 0x8d,
+	  0x79, 0x1c, 0x3d, 0x4b, 0x75, 0x56, 0xe8, 0x9d,
+	  0x8d, 0x9c, 0xb2, 0x25, 0xc4, 0xb3, 0x33, 0x39 },
+	{ 0x6f, 0xc4, 0x98, 0x8b, 0x8f, 0x78, 0x54, 0x6b,
+	  0x16, 0x88, 0x99, 0x18, 0x45, 0x90, 0x8f, 0x13,
+	  0x4b, 0x6a, 0x48, 0x2e, 0x69, 0x94, 0xb3, 0xd4,
+	  0x83, 0x17, 0xbf, 0x08, 0xdb, 0x29, 0x21, 0x85 },
+	{ 0x56, 0x65, 0xbe, 0xb8, 0xb0, 0x95, 0x55, 0x25,
+	  0x81, 0x3b, 0x59, 0x81, 0xcd, 0x14, 0x2e, 0xd4,
+	  0xd0, 0x3f, 0xba, 0x38, 0xa6, 0xf3, 0xe5, 0xad,
+	  0x26, 0x8e, 0x0c, 0xc2, 0x70, 0xd1, 0xcd, 0x11 },
+	{ 0xb8, 0x83, 0xd6, 0x8f, 0x5f, 0xe5, 0x19, 0x36,
+	  0x43, 0x1b, 0xa4, 0x25, 0x67, 0x38, 0x05, 0x3b,
+	  0x1d, 0x04, 0x26, 0xd4, 0xcb, 0x64, 0xb1, 0x6e,
+	  0x83, 0xba, 0xdc, 0x5e, 0x9f, 0xbe, 0x3b, 0x81 },
+	{ 0x53, 0xe7, 0xb2, 0x7e, 0xa5, 0x9c, 0x2f, 0x6d,
+	  0xbb, 0x50, 0x76, 0x9e, 0x43, 0x55, 0x4d, 0xf3,
+	  0x5a, 0xf8, 0x9f, 0x48, 0x22, 0xd0, 0x46, 0x6b,
+	  0x00, 0x7d, 0xd6, 0xf6, 0xde, 0xaf, 0xff, 0x02 },
+	{ 0x1f, 0x1a, 0x02, 0x29, 0xd4, 0x64, 0x0f, 0x01,
+	  0x90, 0x15, 0x88, 0xd9, 0xde, 0xc2, 0x2d, 0x13,
+	  0xfc, 0x3e, 0xb3, 0x4a, 0x61, 0xb3, 0x29, 0x38,
+	  0xef, 0xbf, 0x53, 0x34, 0xb2, 0x80, 0x0a, 0xfa },
+	{ 0xc2, 0xb4, 0x05, 0xaf, 0xa0, 0xfa, 0x66, 0x68,
+	  0x85, 0x2a, 0xee, 0x4d, 0x88, 0x04, 0x08, 0x53,
+	  0xfa, 0xb8, 0x00, 0xe7, 0x2b, 0x57, 0x58, 0x14,
+	  0x18, 0xe5, 0x50, 0x6f, 0x21, 0x4c, 0x7d, 0x1f },
+	{ 0xc0, 0x8a, 0xa1, 0xc2, 0x86, 0xd7, 0x09, 0xfd,
+	  0xc7, 0x47, 0x37, 0x44, 0x97, 0x71, 0x88, 0xc8,
+	  0x95, 0xba, 0x01, 0x10, 0x14, 0x24, 0x7e, 0x4e,
+	  0xfa, 0x8d, 0x07, 0xe7, 0x8f, 0xec, 0x69, 0x5c },
+	{ 0xf0, 0x3f, 0x57, 0x89, 0xd3, 0x33, 0x6b, 0x80,
+	  0xd0, 0x02, 0xd5, 0x9f, 0xdf, 0x91, 0x8b, 0xdb,
+	  0x77, 0x5b, 0x00, 0x95, 0x6e, 0xd5, 0x52, 0x8e,
+	  0x86, 0xaa, 0x99, 0x4a, 0xcb, 0x38, 0xfe, 0x2d }
+};
+
+static const u8 blake2s_keyed_testvecs[][BLAKE2S_OUTBYTES] __initconst = {
+	{ 0x48, 0xa8, 0x99, 0x7d, 0xa4, 0x07, 0x87, 0x6b,
+	  0x3d, 0x79, 0xc0, 0xd9, 0x23, 0x25, 0xad, 0x3b,
+	  0x89, 0xcb, 0xb7, 0x54, 0xd8, 0x6a, 0xb7, 0x1a,
+	  0xee, 0x04, 0x7a, 0xd3, 0x45, 0xfd, 0x2c, 0x49 },
+	{ 0x40, 0xd1, 0x5f, 0xee, 0x7c, 0x32, 0x88, 0x30,
+	  0x16, 0x6a, 0xc3, 0xf9, 0x18, 0x65, 0x0f, 0x80,
+	  0x7e, 0x7e, 0x01, 0xe1, 0x77, 0x25, 0x8c, 0xdc,
+	  0x0a, 0x39, 0xb1, 0x1f, 0x59, 0x80, 0x66, 0xf1 },
+	{ 0x6b, 0xb7, 0x13, 0x00, 0x64, 0x4c, 0xd3, 0x99,
+	  0x1b, 0x26, 0xcc, 0xd4, 0xd2, 0x74, 0xac, 0xd1,
+	  0xad, 0xea, 0xb8, 0xb1, 0xd7, 0x91, 0x45, 0x46,
+	  0xc1, 0x19, 0x8b, 0xbe, 0x9f, 0xc9, 0xd8, 0x03 },
+	{ 0x1d, 0x22, 0x0d, 0xbe, 0x2e, 0xe1, 0x34, 0x66,
+	  0x1f, 0xdf, 0x6d, 0x9e, 0x74, 0xb4, 0x17, 0x04,
+	  0x71, 0x05, 0x56, 0xf2, 0xf6, 0xe5, 0xa0, 0x91,
+	  0xb2, 0x27, 0x69, 0x74, 0x45, 0xdb, 0xea, 0x6b },
+	{ 0xf6, 0xc3, 0xfb, 0xad, 0xb4, 0xcc, 0x68, 0x7a,
+	  0x00, 0x64, 0xa5, 0xbe, 0x6e, 0x79, 0x1b, 0xec,
+	  0x63, 0xb8, 0x68, 0xad, 0x62, 0xfb, 0xa6, 0x1b,
+	  0x37, 0x57, 0xef, 0x9c, 0xa5, 0x2e, 0x05, 0xb2 },
+	{ 0x49, 0xc1, 0xf2, 0x11, 0x88, 0xdf, 0xd7, 0x69,
+	  0xae, 0xa0, 0xe9, 0x11, 0xdd, 0x6b, 0x41, 0xf1,
+	  0x4d, 0xab, 0x10, 0x9d, 0x2b, 0x85, 0x97, 0x7a,
+	  0xa3, 0x08, 0x8b, 0x5c, 0x70, 0x7e, 0x85, 0x98 },
+	{ 0xfd, 0xd8, 0x99, 0x3d, 0xcd, 0x43, 0xf6, 0x96,
+	  0xd4, 0x4f, 0x3c, 0xea, 0x0f, 0xf3, 0x53, 0x45,
+	  0x23, 0x4e, 0xc8, 0xee, 0x08, 0x3e, 0xb3, 0xca,
+	  0xda, 0x01, 0x7c, 0x7f, 0x78, 0xc1, 0x71, 0x43 },
+	{ 0xe6, 0xc8, 0x12, 0x56, 0x37, 0x43, 0x8d, 0x09,
+	  0x05, 0xb7, 0x49, 0xf4, 0x65, 0x60, 0xac, 0x89,
+	  0xfd, 0x47, 0x1c, 0xf8, 0x69, 0x2e, 0x28, 0xfa,
+	  0xb9, 0x82, 0xf7, 0x3f, 0x01, 0x9b, 0x83, 0xa9 },
+	{ 0x19, 0xfc, 0x8c, 0xa6, 0x97, 0x9d, 0x60, 0xe6,
+	  0xed, 0xd3, 0xb4, 0x54, 0x1e, 0x2f, 0x96, 0x7c,
+	  0xed, 0x74, 0x0d, 0xf6, 0xec, 0x1e, 0xae, 0xbb,
+	  0xfe, 0x81, 0x38, 0x32, 0xe9, 0x6b, 0x29, 0x74 },
+	{ 0xa6, 0xad, 0x77, 0x7c, 0xe8, 0x81, 0xb5, 0x2b,
+	  0xb5, 0xa4, 0x42, 0x1a, 0xb6, 0xcd, 0xd2, 0xdf,
+	  0xba, 0x13, 0xe9, 0x63, 0x65, 0x2d, 0x4d, 0x6d,
+	  0x12, 0x2a, 0xee, 0x46, 0x54, 0x8c, 0x14, 0xa7 },
+	{ 0xf5, 0xc4, 0xb2, 0xba, 0x1a, 0x00, 0x78, 0x1b,
+	  0x13, 0xab, 0xa0, 0x42, 0x52, 0x42, 0xc6, 0x9c,
+	  0xb1, 0x55, 0x2f, 0x3f, 0x71, 0xa9, 0xa3, 0xbb,
+	  0x22, 0xb4, 0xa6, 0xb4, 0x27, 0x7b, 0x46, 0xdd },
+	{ 0xe3, 0x3c, 0x4c, 0x9b, 0xd0, 0xcc, 0x7e, 0x45,
+	  0xc8, 0x0e, 0x65, 0xc7, 0x7f, 0xa5, 0x99, 0x7f,
+	  0xec, 0x70, 0x02, 0x73, 0x85, 0x41, 0x50, 0x9e,
+	  0x68, 0xa9, 0x42, 0x38, 0x91, 0xe8, 0x22, 0xa3 },
+	{ 0xfb, 0xa1, 0x61, 0x69, 0xb2, 0xc3, 0xee, 0x10,
+	  0x5b, 0xe6, 0xe1, 0xe6, 0x50, 0xe5, 0xcb, 0xf4,
+	  0x07, 0x46, 0xb6, 0x75, 0x3d, 0x03, 0x6a, 0xb5,
+	  0x51, 0x79, 0x01, 0x4a, 0xd7, 0xef, 0x66, 0x51 },
+	{ 0xf5, 0xc4, 0xbe, 0xc6, 0xd6, 0x2f, 0xc6, 0x08,
+	  0xbf, 0x41, 0xcc, 0x11, 0x5f, 0x16, 0xd6, 0x1c,
+	  0x7e, 0xfd, 0x3f, 0xf6, 0xc6, 0x56, 0x92, 0xbb,
+	  0xe0, 0xaf, 0xff, 0xb1, 0xfe, 0xde, 0x74, 0x75 },
+	{ 0xa4, 0x86, 0x2e, 0x76, 0xdb, 0x84, 0x7f, 0x05,
+	  0xba, 0x17, 0xed, 0xe5, 0xda, 0x4e, 0x7f, 0x91,
+	  0xb5, 0x92, 0x5c, 0xf1, 0xad, 0x4b, 0xa1, 0x27,
+	  0x32, 0xc3, 0x99, 0x57, 0x42, 0xa5, 0xcd, 0x6e },
+	{ 0x65, 0xf4, 0xb8, 0x60, 0xcd, 0x15, 0xb3, 0x8e,
+	  0xf8, 0x14, 0xa1, 0xa8, 0x04, 0x31, 0x4a, 0x55,
+	  0xbe, 0x95, 0x3c, 0xaa, 0x65, 0xfd, 0x75, 0x8a,
+	  0xd9, 0x89, 0xff, 0x34, 0xa4, 0x1c, 0x1e, 0xea },
+	{ 0x19, 0xba, 0x23, 0x4f, 0x0a, 0x4f, 0x38, 0x63,
+	  0x7d, 0x18, 0x39, 0xf9, 0xd9, 0xf7, 0x6a, 0xd9,
+	  0x1c, 0x85, 0x22, 0x30, 0x71, 0x43, 0xc9, 0x7d,
+	  0x5f, 0x93, 0xf6, 0x92, 0x74, 0xce, 0xc9, 0xa7 },
+	{ 0x1a, 0x67, 0x18, 0x6c, 0xa4, 0xa5, 0xcb, 0x8e,
+	  0x65, 0xfc, 0xa0, 0xe2, 0xec, 0xbc, 0x5d, 0xdc,
+	  0x14, 0xae, 0x38, 0x1b, 0xb8, 0xbf, 0xfe, 0xb9,
+	  0xe0, 0xa1, 0x03, 0x44, 0x9e, 0x3e, 0xf0, 0x3c },
+	{ 0xaf, 0xbe, 0xa3, 0x17, 0xb5, 0xa2, 0xe8, 0x9c,
+	  0x0b, 0xd9, 0x0c, 0xcf, 0x5d, 0x7f, 0xd0, 0xed,
+	  0x57, 0xfe, 0x58, 0x5e, 0x4b, 0xe3, 0x27, 0x1b,
+	  0x0a, 0x6b, 0xf0, 0xf5, 0x78, 0x6b, 0x0f, 0x26 },
+	{ 0xf1, 0xb0, 0x15, 0x58, 0xce, 0x54, 0x12, 0x62,
+	  0xf5, 0xec, 0x34, 0x29, 0x9d, 0x6f, 0xb4, 0x09,
+	  0x00, 0x09, 0xe3, 0x43, 0x4b, 0xe2, 0xf4, 0x91,
+	  0x05, 0xcf, 0x46, 0xaf, 0x4d, 0x2d, 0x41, 0x24 },
+	{ 0x13, 0xa0, 0xa0, 0xc8, 0x63, 0x35, 0x63, 0x5e,
+	  0xaa, 0x74, 0xca, 0x2d, 0x5d, 0x48, 0x8c, 0x79,
+	  0x7b, 0xbb, 0x4f, 0x47, 0xdc, 0x07, 0x10, 0x50,
+	  0x15, 0xed, 0x6a, 0x1f, 0x33, 0x09, 0xef, 0xce },
+	{ 0x15, 0x80, 0xaf, 0xee, 0xbe, 0xbb, 0x34, 0x6f,
+	  0x94, 0xd5, 0x9f, 0xe6, 0x2d, 0xa0, 0xb7, 0x92,
+	  0x37, 0xea, 0xd7, 0xb1, 0x49, 0x1f, 0x56, 0x67,
+	  0xa9, 0x0e, 0x45, 0xed, 0xf6, 0xca, 0x8b, 0x03 },
+	{ 0x20, 0xbe, 0x1a, 0x87, 0x5b, 0x38, 0xc5, 0x73,
+	  0xdd, 0x7f, 0xaa, 0xa0, 0xde, 0x48, 0x9d, 0x65,
+	  0x5c, 0x11, 0xef, 0xb6, 0xa5, 0x52, 0x69, 0x8e,
+	  0x07, 0xa2, 0xd3, 0x31, 0xb5, 0xf6, 0x55, 0xc3 },
+	{ 0xbe, 0x1f, 0xe3, 0xc4, 0xc0, 0x40, 0x18, 0xc5,
+	  0x4c, 0x4a, 0x0f, 0x6b, 0x9a, 0x2e, 0xd3, 0xc5,
+	  0x3a, 0xbe, 0x3a, 0x9f, 0x76, 0xb4, 0xd2, 0x6d,
+	  0xe5, 0x6f, 0xc9, 0xae, 0x95, 0x05, 0x9a, 0x99 },
+	{ 0xe3, 0xe3, 0xac, 0xe5, 0x37, 0xeb, 0x3e, 0xdd,
+	  0x84, 0x63, 0xd9, 0xad, 0x35, 0x82, 0xe1, 0x3c,
+	  0xf8, 0x65, 0x33, 0xff, 0xde, 0x43, 0xd6, 0x68,
+	  0xdd, 0x2e, 0x93, 0xbb, 0xdb, 0xd7, 0x19, 0x5a },
+	{ 0x11, 0x0c, 0x50, 0xc0, 0xbf, 0x2c, 0x6e, 0x7a,
+	  0xeb, 0x7e, 0x43, 0x5d, 0x92, 0xd1, 0x32, 0xab,
+	  0x66, 0x55, 0x16, 0x8e, 0x78, 0xa2, 0xde, 0xcd,
+	  0xec, 0x33, 0x30, 0x77, 0x76, 0x84, 0xd9, 0xc1 },
+	{ 0xe9, 0xba, 0x8f, 0x50, 0x5c, 0x9c, 0x80, 0xc0,
+	  0x86, 0x66, 0xa7, 0x01, 0xf3, 0x36, 0x7e, 0x6c,
+	  0xc6, 0x65, 0xf3, 0x4b, 0x22, 0xe7, 0x3c, 0x3c,
+	  0x04, 0x17, 0xeb, 0x1c, 0x22, 0x06, 0x08, 0x2f },
+	{ 0x26, 0xcd, 0x66, 0xfc, 0xa0, 0x23, 0x79, 0xc7,
+	  0x6d, 0xf1, 0x23, 0x17, 0x05, 0x2b, 0xca, 0xfd,
+	  0x6c, 0xd8, 0xc3, 0xa7, 0xb8, 0x90, 0xd8, 0x05,
+	  0xf3, 0x6c, 0x49, 0x98, 0x97, 0x82, 0x43, 0x3a },
+	{ 0x21, 0x3f, 0x35, 0x96, 0xd6, 0xe3, 0xa5, 0xd0,
+	  0xe9, 0x93, 0x2c, 0xd2, 0x15, 0x91, 0x46, 0x01,
+	  0x5e, 0x2a, 0xbc, 0x94, 0x9f, 0x47, 0x29, 0xee,
+	  0x26, 0x32, 0xfe, 0x1e, 0xdb, 0x78, 0xd3, 0x37 },
+	{ 0x10, 0x15, 0xd7, 0x01, 0x08, 0xe0, 0x3b, 0xe1,
+	  0xc7, 0x02, 0xfe, 0x97, 0x25, 0x36, 0x07, 0xd1,
+	  0x4a, 0xee, 0x59, 0x1f, 0x24, 0x13, 0xea, 0x67,
+	  0x87, 0x42, 0x7b, 0x64, 0x59, 0xff, 0x21, 0x9a },
+	{ 0x3c, 0xa9, 0x89, 0xde, 0x10, 0xcf, 0xe6, 0x09,
+	  0x90, 0x94, 0x72, 0xc8, 0xd3, 0x56, 0x10, 0x80,
+	  0x5b, 0x2f, 0x97, 0x77, 0x34, 0xcf, 0x65, 0x2c,
+	  0xc6, 0x4b, 0x3b, 0xfc, 0x88, 0x2d, 0x5d, 0x89 },
+	{ 0xb6, 0x15, 0x6f, 0x72, 0xd3, 0x80, 0xee, 0x9e,
+	  0xa6, 0xac, 0xd1, 0x90, 0x46, 0x4f, 0x23, 0x07,
+	  0xa5, 0xc1, 0x79, 0xef, 0x01, 0xfd, 0x71, 0xf9,
+	  0x9f, 0x2d, 0x0f, 0x7a, 0x57, 0x36, 0x0a, 0xea },
+	{ 0xc0, 0x3b, 0xc6, 0x42, 0xb2, 0x09, 0x59, 0xcb,
+	  0xe1, 0x33, 0xa0, 0x30, 0x3e, 0x0c, 0x1a, 0xbf,
+	  0xf3, 0xe3, 0x1e, 0xc8, 0xe1, 0xa3, 0x28, 0xec,
+	  0x85, 0x65, 0xc3, 0x6d, 0xec, 0xff, 0x52, 0x65 },
+	{ 0x2c, 0x3e, 0x08, 0x17, 0x6f, 0x76, 0x0c, 0x62,
+	  0x64, 0xc3, 0xa2, 0xcd, 0x66, 0xfe, 0xc6, 0xc3,
+	  0xd7, 0x8d, 0xe4, 0x3f, 0xc1, 0x92, 0x45, 0x7b,
+	  0x2a, 0x4a, 0x66, 0x0a, 0x1e, 0x0e, 0xb2, 0x2b },
+	{ 0xf7, 0x38, 0xc0, 0x2f, 0x3c, 0x1b, 0x19, 0x0c,
+	  0x51, 0x2b, 0x1a, 0x32, 0xde, 0xab, 0xf3, 0x53,
+	  0x72, 0x8e, 0x0e, 0x9a, 0xb0, 0x34, 0x49, 0x0e,
+	  0x3c, 0x34, 0x09, 0x94, 0x6a, 0x97, 0xae, 0xec },
+	{ 0x8b, 0x18, 0x80, 0xdf, 0x30, 0x1c, 0xc9, 0x63,
+	  0x41, 0x88, 0x11, 0x08, 0x89, 0x64, 0x83, 0x92,
+	  0x87, 0xff, 0x7f, 0xe3, 0x1c, 0x49, 0xea, 0x6e,
+	  0xbd, 0x9e, 0x48, 0xbd, 0xee, 0xe4, 0x97, 0xc5 },
+	{ 0x1e, 0x75, 0xcb, 0x21, 0xc6, 0x09, 0x89, 0x02,
+	  0x03, 0x75, 0xf1, 0xa7, 0xa2, 0x42, 0x83, 0x9f,
+	  0x0b, 0x0b, 0x68, 0x97, 0x3a, 0x4c, 0x2a, 0x05,
+	  0xcf, 0x75, 0x55, 0xed, 0x5a, 0xae, 0xc4, 0xc1 },
+	{ 0x62, 0xbf, 0x8a, 0x9c, 0x32, 0xa5, 0xbc, 0xcf,
+	  0x29, 0x0b, 0x6c, 0x47, 0x4d, 0x75, 0xb2, 0xa2,
+	  0xa4, 0x09, 0x3f, 0x1a, 0x9e, 0x27, 0x13, 0x94,
+	  0x33, 0xa8, 0xf2, 0xb3, 0xbc, 0xe7, 0xb8, 0xd7 },
+	{ 0x16, 0x6c, 0x83, 0x50, 0xd3, 0x17, 0x3b, 0x5e,
+	  0x70, 0x2b, 0x78, 0x3d, 0xfd, 0x33, 0xc6, 0x6e,
+	  0xe0, 0x43, 0x27, 0x42, 0xe9, 0xb9, 0x2b, 0x99,
+	  0x7f, 0xd2, 0x3c, 0x60, 0xdc, 0x67, 0x56, 0xca },
+	{ 0x04, 0x4a, 0x14, 0xd8, 0x22, 0xa9, 0x0c, 0xac,
+	  0xf2, 0xf5, 0xa1, 0x01, 0x42, 0x8a, 0xdc, 0x8f,
+	  0x41, 0x09, 0x38, 0x6c, 0xcb, 0x15, 0x8b, 0xf9,
+	  0x05, 0xc8, 0x61, 0x8b, 0x8e, 0xe2, 0x4e, 0xc3 },
+	{ 0x38, 0x7d, 0x39, 0x7e, 0xa4, 0x3a, 0x99, 0x4b,
+	  0xe8, 0x4d, 0x2d, 0x54, 0x4a, 0xfb, 0xe4, 0x81,
+	  0xa2, 0x00, 0x0f, 0x55, 0x25, 0x26, 0x96, 0xbb,
+	  0xa2, 0xc5, 0x0c, 0x8e, 0xbd, 0x10, 0x13, 0x47 },
+	{ 0x56, 0xf8, 0xcc, 0xf1, 0xf8, 0x64, 0x09, 0xb4,
+	  0x6c, 0xe3, 0x61, 0x66, 0xae, 0x91, 0x65, 0x13,
+	  0x84, 0x41, 0x57, 0x75, 0x89, 0xdb, 0x08, 0xcb,
+	  0xc5, 0xf6, 0x6c, 0xa2, 0x97, 0x43, 0xb9, 0xfd },
+	{ 0x97, 0x06, 0xc0, 0x92, 0xb0, 0x4d, 0x91, 0xf5,
+	  0x3d, 0xff, 0x91, 0xfa, 0x37, 0xb7, 0x49, 0x3d,
+	  0x28, 0xb5, 0x76, 0xb5, 0xd7, 0x10, 0x46, 0x9d,
+	  0xf7, 0x94, 0x01, 0x66, 0x22, 0x36, 0xfc, 0x03 },
+	{ 0x87, 0x79, 0x68, 0x68, 0x6c, 0x06, 0x8c, 0xe2,
+	  0xf7, 0xe2, 0xad, 0xcf, 0xf6, 0x8b, 0xf8, 0x74,
+	  0x8e, 0xdf, 0x3c, 0xf8, 0x62, 0xcf, 0xb4, 0xd3,
+	  0x94, 0x7a, 0x31, 0x06, 0x95, 0x80, 0x54, 0xe3 },
+	{ 0x88, 0x17, 0xe5, 0x71, 0x98, 0x79, 0xac, 0xf7,
+	  0x02, 0x47, 0x87, 0xec, 0xcd, 0xb2, 0x71, 0x03,
+	  0x55, 0x66, 0xcf, 0xa3, 0x33, 0xe0, 0x49, 0x40,
+	  0x7c, 0x01, 0x78, 0xcc, 0xc5, 0x7a, 0x5b, 0x9f },
+	{ 0x89, 0x38, 0x24, 0x9e, 0x4b, 0x50, 0xca, 0xda,
+	  0xcc, 0xdf, 0x5b, 0x18, 0x62, 0x13, 0x26, 0xcb,
+	  0xb1, 0x52, 0x53, 0xe3, 0x3a, 0x20, 0xf5, 0x63,
+	  0x6e, 0x99, 0x5d, 0x72, 0x47, 0x8d, 0xe4, 0x72 },
+	{ 0xf1, 0x64, 0xab, 0xba, 0x49, 0x63, 0xa4, 0x4d,
+	  0x10, 0x72, 0x57, 0xe3, 0x23, 0x2d, 0x90, 0xac,
+	  0xa5, 0xe6, 0x6a, 0x14, 0x08, 0x24, 0x8c, 0x51,
+	  0x74, 0x1e, 0x99, 0x1d, 0xb5, 0x22, 0x77, 0x56 },
+	{ 0xd0, 0x55, 0x63, 0xe2, 0xb1, 0xcb, 0xa0, 0xc4,
+	  0xa2, 0xa1, 0xe8, 0xbd, 0xe3, 0xa1, 0xa0, 0xd9,
+	  0xf5, 0xb4, 0x0c, 0x85, 0xa0, 0x70, 0xd6, 0xf5,
+	  0xfb, 0x21, 0x06, 0x6e, 0xad, 0x5d, 0x06, 0x01 },
+	{ 0x03, 0xfb, 0xb1, 0x63, 0x84, 0xf0, 0xa3, 0x86,
+	  0x6f, 0x4c, 0x31, 0x17, 0x87, 0x76, 0x66, 0xef,
+	  0xbf, 0x12, 0x45, 0x97, 0x56, 0x4b, 0x29, 0x3d,
+	  0x4a, 0xab, 0x0d, 0x26, 0x9f, 0xab, 0xdd, 0xfa },
+	{ 0x5f, 0xa8, 0x48, 0x6a, 0xc0, 0xe5, 0x29, 0x64,
+	  0xd1, 0x88, 0x1b, 0xbe, 0x33, 0x8e, 0xb5, 0x4b,
+	  0xe2, 0xf7, 0x19, 0x54, 0x92, 0x24, 0x89, 0x20,
+	  0x57, 0xb4, 0xda, 0x04, 0xba, 0x8b, 0x34, 0x75 },
+	{ 0xcd, 0xfa, 0xbc, 0xee, 0x46, 0x91, 0x11, 0x11,
+	  0x23, 0x6a, 0x31, 0x70, 0x8b, 0x25, 0x39, 0xd7,
+	  0x1f, 0xc2, 0x11, 0xd9, 0xb0, 0x9c, 0x0d, 0x85,
+	  0x30, 0xa1, 0x1e, 0x1d, 0xbf, 0x6e, 0xed, 0x01 },
+	{ 0x4f, 0x82, 0xde, 0x03, 0xb9, 0x50, 0x47, 0x93,
+	  0xb8, 0x2a, 0x07, 0xa0, 0xbd, 0xcd, 0xff, 0x31,
+	  0x4d, 0x75, 0x9e, 0x7b, 0x62, 0xd2, 0x6b, 0x78,
+	  0x49, 0x46, 0xb0, 0xd3, 0x6f, 0x91, 0x6f, 0x52 },
+	{ 0x25, 0x9e, 0xc7, 0xf1, 0x73, 0xbc, 0xc7, 0x6a,
+	  0x09, 0x94, 0xc9, 0x67, 0xb4, 0xf5, 0xf0, 0x24,
+	  0xc5, 0x60, 0x57, 0xfb, 0x79, 0xc9, 0x65, 0xc4,
+	  0xfa, 0xe4, 0x18, 0x75, 0xf0, 0x6a, 0x0e, 0x4c },
+	{ 0x19, 0x3c, 0xc8, 0xe7, 0xc3, 0xe0, 0x8b, 0xb3,
+	  0x0f, 0x54, 0x37, 0xaa, 0x27, 0xad, 0xe1, 0xf1,
+	  0x42, 0x36, 0x9b, 0x24, 0x6a, 0x67, 0x5b, 0x23,
+	  0x83, 0xe6, 0xda, 0x9b, 0x49, 0xa9, 0x80, 0x9e },
+	{ 0x5c, 0x10, 0x89, 0x6f, 0x0e, 0x28, 0x56, 0xb2,
+	  0xa2, 0xee, 0xe0, 0xfe, 0x4a, 0x2c, 0x16, 0x33,
+	  0x56, 0x5d, 0x18, 0xf0, 0xe9, 0x3e, 0x1f, 0xab,
+	  0x26, 0xc3, 0x73, 0xe8, 0xf8, 0x29, 0x65, 0x4d },
+	{ 0xf1, 0x60, 0x12, 0xd9, 0x3f, 0x28, 0x85, 0x1a,
+	  0x1e, 0xb9, 0x89, 0xf5, 0xd0, 0xb4, 0x3f, 0x3f,
+	  0x39, 0xca, 0x73, 0xc9, 0xa6, 0x2d, 0x51, 0x81,
+	  0xbf, 0xf2, 0x37, 0x53, 0x6b, 0xd3, 0x48, 0xc3 },
+	{ 0x29, 0x66, 0xb3, 0xcf, 0xae, 0x1e, 0x44, 0xea,
+	  0x99, 0x6d, 0xc5, 0xd6, 0x86, 0xcf, 0x25, 0xfa,
+	  0x05, 0x3f, 0xb6, 0xf6, 0x72, 0x01, 0xb9, 0xe4,
+	  0x6e, 0xad, 0xe8, 0x5d, 0x0a, 0xd6, 0xb8, 0x06 },
+	{ 0xdd, 0xb8, 0x78, 0x24, 0x85, 0xe9, 0x00, 0xbc,
+	  0x60, 0xbc, 0xf4, 0xc3, 0x3a, 0x6f, 0xd5, 0x85,
+	  0x68, 0x0c, 0xc6, 0x83, 0xd5, 0x16, 0xef, 0xa0,
+	  0x3e, 0xb9, 0x98, 0x5f, 0xad, 0x87, 0x15, 0xfb },
+	{ 0x4c, 0x4d, 0x6e, 0x71, 0xae, 0xa0, 0x57, 0x86,
+	  0x41, 0x31, 0x48, 0xfc, 0x7a, 0x78, 0x6b, 0x0e,
+	  0xca, 0xf5, 0x82, 0xcf, 0xf1, 0x20, 0x9f, 0x5a,
+	  0x80, 0x9f, 0xba, 0x85, 0x04, 0xce, 0x66, 0x2c },
+	{ 0xfb, 0x4c, 0x5e, 0x86, 0xd7, 0xb2, 0x22, 0x9b,
+	  0x99, 0xb8, 0xba, 0x6d, 0x94, 0xc2, 0x47, 0xef,
+	  0x96, 0x4a, 0xa3, 0xa2, 0xba, 0xe8, 0xed, 0xc7,
+	  0x75, 0x69, 0xf2, 0x8d, 0xbb, 0xff, 0x2d, 0x4e },
+	{ 0xe9, 0x4f, 0x52, 0x6d, 0xe9, 0x01, 0x96, 0x33,
+	  0xec, 0xd5, 0x4a, 0xc6, 0x12, 0x0f, 0x23, 0x95,
+	  0x8d, 0x77, 0x18, 0xf1, 0xe7, 0x71, 0x7b, 0xf3,
+	  0x29, 0x21, 0x1a, 0x4f, 0xae, 0xed, 0x4e, 0x6d },
+	{ 0xcb, 0xd6, 0x66, 0x0a, 0x10, 0xdb, 0x3f, 0x23,
+	  0xf7, 0xa0, 0x3d, 0x4b, 0x9d, 0x40, 0x44, 0xc7,
+	  0x93, 0x2b, 0x28, 0x01, 0xac, 0x89, 0xd6, 0x0b,
+	  0xc9, 0xeb, 0x92, 0xd6, 0x5a, 0x46, 0xc2, 0xa0 },
+	{ 0x88, 0x18, 0xbb, 0xd3, 0xdb, 0x4d, 0xc1, 0x23,
+	  0xb2, 0x5c, 0xbb, 0xa5, 0xf5, 0x4c, 0x2b, 0xc4,
+	  0xb3, 0xfc, 0xf9, 0xbf, 0x7d, 0x7a, 0x77, 0x09,
+	  0xf4, 0xae, 0x58, 0x8b, 0x26, 0x7c, 0x4e, 0xce },
+	{ 0xc6, 0x53, 0x82, 0x51, 0x3f, 0x07, 0x46, 0x0d,
+	  0xa3, 0x98, 0x33, 0xcb, 0x66, 0x6c, 0x5e, 0xd8,
+	  0x2e, 0x61, 0xb9, 0xe9, 0x98, 0xf4, 0xb0, 0xc4,
+	  0x28, 0x7c, 0xee, 0x56, 0xc3, 0xcc, 0x9b, 0xcd },
+	{ 0x89, 0x75, 0xb0, 0x57, 0x7f, 0xd3, 0x55, 0x66,
+	  0xd7, 0x50, 0xb3, 0x62, 0xb0, 0x89, 0x7a, 0x26,
+	  0xc3, 0x99, 0x13, 0x6d, 0xf0, 0x7b, 0xab, 0xab,
+	  0xbd, 0xe6, 0x20, 0x3f, 0xf2, 0x95, 0x4e, 0xd4 },
+	{ 0x21, 0xfe, 0x0c, 0xeb, 0x00, 0x52, 0xbe, 0x7f,
+	  0xb0, 0xf0, 0x04, 0x18, 0x7c, 0xac, 0xd7, 0xde,
+	  0x67, 0xfa, 0x6e, 0xb0, 0x93, 0x8d, 0x92, 0x76,
+	  0x77, 0xf2, 0x39, 0x8c, 0x13, 0x23, 0x17, 0xa8 },
+	{ 0x2e, 0xf7, 0x3f, 0x3c, 0x26, 0xf1, 0x2d, 0x93,
+	  0x88, 0x9f, 0x3c, 0x78, 0xb6, 0xa6, 0x6c, 0x1d,
+	  0x52, 0xb6, 0x49, 0xdc, 0x9e, 0x85, 0x6e, 0x2c,
+	  0x17, 0x2e, 0xa7, 0xc5, 0x8a, 0xc2, 0xb5, 0xe3 },
+	{ 0x38, 0x8a, 0x3c, 0xd5, 0x6d, 0x73, 0x86, 0x7a,
+	  0xbb, 0x5f, 0x84, 0x01, 0x49, 0x2b, 0x6e, 0x26,
+	  0x81, 0xeb, 0x69, 0x85, 0x1e, 0x76, 0x7f, 0xd8,
+	  0x42, 0x10, 0xa5, 0x60, 0x76, 0xfb, 0x3d, 0xd3 },
+	{ 0xaf, 0x53, 0x3e, 0x02, 0x2f, 0xc9, 0x43, 0x9e,
+	  0x4e, 0x3c, 0xb8, 0x38, 0xec, 0xd1, 0x86, 0x92,
+	  0x23, 0x2a, 0xdf, 0x6f, 0xe9, 0x83, 0x95, 0x26,
+	  0xd3, 0xc3, 0xdd, 0x1b, 0x71, 0x91, 0x0b, 0x1a },
+	{ 0x75, 0x1c, 0x09, 0xd4, 0x1a, 0x93, 0x43, 0x88,
+	  0x2a, 0x81, 0xcd, 0x13, 0xee, 0x40, 0x81, 0x8d,
+	  0x12, 0xeb, 0x44, 0xc6, 0xc7, 0xf4, 0x0d, 0xf1,
+	  0x6e, 0x4a, 0xea, 0x8f, 0xab, 0x91, 0x97, 0x2a },
+	{ 0x5b, 0x73, 0xdd, 0xb6, 0x8d, 0x9d, 0x2b, 0x0a,
+	  0xa2, 0x65, 0xa0, 0x79, 0x88, 0xd6, 0xb8, 0x8a,
+	  0xe9, 0xaa, 0xc5, 0x82, 0xaf, 0x83, 0x03, 0x2f,
+	  0x8a, 0x9b, 0x21, 0xa2, 0xe1, 0xb7, 0xbf, 0x18 },
+	{ 0x3d, 0xa2, 0x91, 0x26, 0xc7, 0xc5, 0xd7, 0xf4,
+	  0x3e, 0x64, 0x24, 0x2a, 0x79, 0xfe, 0xaa, 0x4e,
+	  0xf3, 0x45, 0x9c, 0xde, 0xcc, 0xc8, 0x98, 0xed,
+	  0x59, 0xa9, 0x7f, 0x6e, 0xc9, 0x3b, 0x9d, 0xab },
+	{ 0x56, 0x6d, 0xc9, 0x20, 0x29, 0x3d, 0xa5, 0xcb,
+	  0x4f, 0xe0, 0xaa, 0x8a, 0xbd, 0xa8, 0xbb, 0xf5,
+	  0x6f, 0x55, 0x23, 0x13, 0xbf, 0xf1, 0x90, 0x46,
+	  0x64, 0x1e, 0x36, 0x15, 0xc1, 0xe3, 0xed, 0x3f },
+	{ 0x41, 0x15, 0xbe, 0xa0, 0x2f, 0x73, 0xf9, 0x7f,
+	  0x62, 0x9e, 0x5c, 0x55, 0x90, 0x72, 0x0c, 0x01,
+	  0xe7, 0xe4, 0x49, 0xae, 0x2a, 0x66, 0x97, 0xd4,
+	  0xd2, 0x78, 0x33, 0x21, 0x30, 0x36, 0x92, 0xf9 },
+	{ 0x4c, 0xe0, 0x8f, 0x47, 0x62, 0x46, 0x8a, 0x76,
+	  0x70, 0x01, 0x21, 0x64, 0x87, 0x8d, 0x68, 0x34,
+	  0x0c, 0x52, 0xa3, 0x5e, 0x66, 0xc1, 0x88, 0x4d,
+	  0x5c, 0x86, 0x48, 0x89, 0xab, 0xc9, 0x66, 0x77 },
+	{ 0x81, 0xea, 0x0b, 0x78, 0x04, 0x12, 0x4e, 0x0c,
+	  0x22, 0xea, 0x5f, 0xc7, 0x11, 0x04, 0xa2, 0xaf,
+	  0xcb, 0x52, 0xa1, 0xfa, 0x81, 0x6f, 0x3e, 0xcb,
+	  0x7d, 0xcb, 0x5d, 0x9d, 0xea, 0x17, 0x86, 0xd0 },
+	{ 0xfe, 0x36, 0x27, 0x33, 0xb0, 0x5f, 0x6b, 0xed,
+	  0xaf, 0x93, 0x79, 0xd7, 0xf7, 0x93, 0x6e, 0xde,
+	  0x20, 0x9b, 0x1f, 0x83, 0x23, 0xc3, 0x92, 0x25,
+	  0x49, 0xd9, 0xe7, 0x36, 0x81, 0xb5, 0xdb, 0x7b },
+	{ 0xef, 0xf3, 0x7d, 0x30, 0xdf, 0xd2, 0x03, 0x59,
+	  0xbe, 0x4e, 0x73, 0xfd, 0xf4, 0x0d, 0x27, 0x73,
+	  0x4b, 0x3d, 0xf9, 0x0a, 0x97, 0xa5, 0x5e, 0xd7,
+	  0x45, 0x29, 0x72, 0x94, 0xca, 0x85, 0xd0, 0x9f },
+	{ 0x17, 0x2f, 0xfc, 0x67, 0x15, 0x3d, 0x12, 0xe0,
+	  0xca, 0x76, 0xa8, 0xb6, 0xcd, 0x5d, 0x47, 0x31,
+	  0x88, 0x5b, 0x39, 0xce, 0x0c, 0xac, 0x93, 0xa8,
+	  0x97, 0x2a, 0x18, 0x00, 0x6c, 0x8b, 0x8b, 0xaf },
+	{ 0xc4, 0x79, 0x57, 0xf1, 0xcc, 0x88, 0xe8, 0x3e,
+	  0xf9, 0x44, 0x58, 0x39, 0x70, 0x9a, 0x48, 0x0a,
+	  0x03, 0x6b, 0xed, 0x5f, 0x88, 0xac, 0x0f, 0xcc,
+	  0x8e, 0x1e, 0x70, 0x3f, 0xfa, 0xac, 0x13, 0x2c },
+	{ 0x30, 0xf3, 0x54, 0x83, 0x70, 0xcf, 0xdc, 0xed,
+	  0xa5, 0xc3, 0x7b, 0x56, 0x9b, 0x61, 0x75, 0xe7,
+	  0x99, 0xee, 0xf1, 0xa6, 0x2a, 0xaa, 0x94, 0x32,
+	  0x45, 0xae, 0x76, 0x69, 0xc2, 0x27, 0xa7, 0xb5 },
+	{ 0xc9, 0x5d, 0xcb, 0x3c, 0xf1, 0xf2, 0x7d, 0x0e,
+	  0xef, 0x2f, 0x25, 0xd2, 0x41, 0x38, 0x70, 0x90,
+	  0x4a, 0x87, 0x7c, 0x4a, 0x56, 0xc2, 0xde, 0x1e,
+	  0x83, 0xe2, 0xbc, 0x2a, 0xe2, 0xe4, 0x68, 0x21 },
+	{ 0xd5, 0xd0, 0xb5, 0xd7, 0x05, 0x43, 0x4c, 0xd4,
+	  0x6b, 0x18, 0x57, 0x49, 0xf6, 0x6b, 0xfb, 0x58,
+	  0x36, 0xdc, 0xdf, 0x6e, 0xe5, 0x49, 0xa2, 0xb7,
+	  0xa4, 0xae, 0xe7, 0xf5, 0x80, 0x07, 0xca, 0xaf },
+	{ 0xbb, 0xc1, 0x24, 0xa7, 0x12, 0xf1, 0x5d, 0x07,
+	  0xc3, 0x00, 0xe0, 0x5b, 0x66, 0x83, 0x89, 0xa4,
+	  0x39, 0xc9, 0x17, 0x77, 0xf7, 0x21, 0xf8, 0x32,
+	  0x0c, 0x1c, 0x90, 0x78, 0x06, 0x6d, 0x2c, 0x7e },
+	{ 0xa4, 0x51, 0xb4, 0x8c, 0x35, 0xa6, 0xc7, 0x85,
+	  0x4c, 0xfa, 0xae, 0x60, 0x26, 0x2e, 0x76, 0x99,
+	  0x08, 0x16, 0x38, 0x2a, 0xc0, 0x66, 0x7e, 0x5a,
+	  0x5c, 0x9e, 0x1b, 0x46, 0xc4, 0x34, 0x2d, 0xdf },
+	{ 0xb0, 0xd1, 0x50, 0xfb, 0x55, 0xe7, 0x78, 0xd0,
+	  0x11, 0x47, 0xf0, 0xb5, 0xd8, 0x9d, 0x99, 0xec,
+	  0xb2, 0x0f, 0xf0, 0x7e, 0x5e, 0x67, 0x60, 0xd6,
+	  0xb6, 0x45, 0xeb, 0x5b, 0x65, 0x4c, 0x62, 0x2b },
+	{ 0x34, 0xf7, 0x37, 0xc0, 0xab, 0x21, 0x99, 0x51,
+	  0xee, 0xe8, 0x9a, 0x9f, 0x8d, 0xac, 0x29, 0x9c,
+	  0x9d, 0x4c, 0x38, 0xf3, 0x3f, 0xa4, 0x94, 0xc5,
+	  0xc6, 0xee, 0xfc, 0x92, 0xb6, 0xdb, 0x08, 0xbc },
+	{ 0x1a, 0x62, 0xcc, 0x3a, 0x00, 0x80, 0x0d, 0xcb,
+	  0xd9, 0x98, 0x91, 0x08, 0x0c, 0x1e, 0x09, 0x84,
+	  0x58, 0x19, 0x3a, 0x8c, 0xc9, 0xf9, 0x70, 0xea,
+	  0x99, 0xfb, 0xef, 0xf0, 0x03, 0x18, 0xc2, 0x89 },
+	{ 0xcf, 0xce, 0x55, 0xeb, 0xaf, 0xc8, 0x40, 0xd7,
+	  0xae, 0x48, 0x28, 0x1c, 0x7f, 0xd5, 0x7e, 0xc8,
+	  0xb4, 0x82, 0xd4, 0xb7, 0x04, 0x43, 0x74, 0x95,
+	  0x49, 0x5a, 0xc4, 0x14, 0xcf, 0x4a, 0x37, 0x4b },
+	{ 0x67, 0x46, 0xfa, 0xcf, 0x71, 0x14, 0x6d, 0x99,
+	  0x9d, 0xab, 0xd0, 0x5d, 0x09, 0x3a, 0xe5, 0x86,
+	  0x64, 0x8d, 0x1e, 0xe2, 0x8e, 0x72, 0x61, 0x7b,
+	  0x99, 0xd0, 0xf0, 0x08, 0x6e, 0x1e, 0x45, 0xbf },
+	{ 0x57, 0x1c, 0xed, 0x28, 0x3b, 0x3f, 0x23, 0xb4,
+	  0xe7, 0x50, 0xbf, 0x12, 0xa2, 0xca, 0xf1, 0x78,
+	  0x18, 0x47, 0xbd, 0x89, 0x0e, 0x43, 0x60, 0x3c,
+	  0xdc, 0x59, 0x76, 0x10, 0x2b, 0x7b, 0xb1, 0x1b },
+	{ 0xcf, 0xcb, 0x76, 0x5b, 0x04, 0x8e, 0x35, 0x02,
+	  0x2c, 0x5d, 0x08, 0x9d, 0x26, 0xe8, 0x5a, 0x36,
+	  0xb0, 0x05, 0xa2, 0xb8, 0x04, 0x93, 0xd0, 0x3a,
+	  0x14, 0x4e, 0x09, 0xf4, 0x09, 0xb6, 0xaf, 0xd1 },
+	{ 0x40, 0x50, 0xc7, 0xa2, 0x77, 0x05, 0xbb, 0x27,
+	  0xf4, 0x20, 0x89, 0xb2, 0x99, 0xf3, 0xcb, 0xe5,
+	  0x05, 0x4e, 0xad, 0x68, 0x72, 0x7e, 0x8e, 0xf9,
+	  0x31, 0x8c, 0xe6, 0xf2, 0x5c, 0xd6, 0xf3, 0x1d },
+	{ 0x18, 0x40, 0x70, 0xbd, 0x5d, 0x26, 0x5f, 0xbd,
+	  0xc1, 0x42, 0xcd, 0x1c, 0x5c, 0xd0, 0xd7, 0xe4,
+	  0x14, 0xe7, 0x03, 0x69, 0xa2, 0x66, 0xd6, 0x27,
+	  0xc8, 0xfb, 0xa8, 0x4f, 0xa5, 0xe8, 0x4c, 0x34 },
+	{ 0x9e, 0xdd, 0xa9, 0xa4, 0x44, 0x39, 0x02, 0xa9,
+	  0x58, 0x8c, 0x0d, 0x0c, 0xcc, 0x62, 0xb9, 0x30,
+	  0x21, 0x84, 0x79, 0xa6, 0x84, 0x1e, 0x6f, 0xe7,
+	  0xd4, 0x30, 0x03, 0xf0, 0x4b, 0x1f, 0xd6, 0x43 },
+	{ 0xe4, 0x12, 0xfe, 0xef, 0x79, 0x08, 0x32, 0x4a,
+	  0x6d, 0xa1, 0x84, 0x16, 0x29, 0xf3, 0x5d, 0x3d,
+	  0x35, 0x86, 0x42, 0x01, 0x93, 0x10, 0xec, 0x57,
+	  0xc6, 0x14, 0x83, 0x6b, 0x63, 0xd3, 0x07, 0x63 },
+	{ 0x1a, 0x2b, 0x8e, 0xdf, 0xf3, 0xf9, 0xac, 0xc1,
+	  0x55, 0x4f, 0xcb, 0xae, 0x3c, 0xf1, 0xd6, 0x29,
+	  0x8c, 0x64, 0x62, 0xe2, 0x2e, 0x5e, 0xb0, 0x25,
+	  0x96, 0x84, 0xf8, 0x35, 0x01, 0x2b, 0xd1, 0x3f },
+	{ 0x28, 0x8c, 0x4a, 0xd9, 0xb9, 0x40, 0x97, 0x62,
+	  0xea, 0x07, 0xc2, 0x4a, 0x41, 0xf0, 0x4f, 0x69,
+	  0xa7, 0xd7, 0x4b, 0xee, 0x2d, 0x95, 0x43, 0x53,
+	  0x74, 0xbd, 0xe9, 0x46, 0xd7, 0x24, 0x1c, 0x7b },
+	{ 0x80, 0x56, 0x91, 0xbb, 0x28, 0x67, 0x48, 0xcf,
+	  0xb5, 0x91, 0xd3, 0xae, 0xbe, 0x7e, 0x6f, 0x4e,
+	  0x4d, 0xc6, 0xe2, 0x80, 0x8c, 0x65, 0x14, 0x3c,
+	  0xc0, 0x04, 0xe4, 0xeb, 0x6f, 0xd0, 0x9d, 0x43 },
+	{ 0xd4, 0xac, 0x8d, 0x3a, 0x0a, 0xfc, 0x6c, 0xfa,
+	  0x7b, 0x46, 0x0a, 0xe3, 0x00, 0x1b, 0xae, 0xb3,
+	  0x6d, 0xad, 0xb3, 0x7d, 0xa0, 0x7d, 0x2e, 0x8a,
+	  0xc9, 0x18, 0x22, 0xdf, 0x34, 0x8a, 0xed, 0x3d },
+	{ 0xc3, 0x76, 0x61, 0x70, 0x14, 0xd2, 0x01, 0x58,
+	  0xbc, 0xed, 0x3d, 0x3b, 0xa5, 0x52, 0xb6, 0xec,
+	  0xcf, 0x84, 0xe6, 0x2a, 0xa3, 0xeb, 0x65, 0x0e,
+	  0x90, 0x02, 0x9c, 0x84, 0xd1, 0x3e, 0xea, 0x69 },
+	{ 0xc4, 0x1f, 0x09, 0xf4, 0x3c, 0xec, 0xae, 0x72,
+	  0x93, 0xd6, 0x00, 0x7c, 0xa0, 0xa3, 0x57, 0x08,
+	  0x7d, 0x5a, 0xe5, 0x9b, 0xe5, 0x00, 0xc1, 0xcd,
+	  0x5b, 0x28, 0x9e, 0xe8, 0x10, 0xc7, 0xb0, 0x82 },
+	{ 0x03, 0xd1, 0xce, 0xd1, 0xfb, 0xa5, 0xc3, 0x91,
+	  0x55, 0xc4, 0x4b, 0x77, 0x65, 0xcb, 0x76, 0x0c,
+	  0x78, 0x70, 0x8d, 0xcf, 0xc8, 0x0b, 0x0b, 0xd8,
+	  0xad, 0xe3, 0xa5, 0x6d, 0xa8, 0x83, 0x0b, 0x29 },
+	{ 0x09, 0xbd, 0xe6, 0xf1, 0x52, 0x21, 0x8d, 0xc9,
+	  0x2c, 0x41, 0xd7, 0xf4, 0x53, 0x87, 0xe6, 0x3e,
+	  0x58, 0x69, 0xd8, 0x07, 0xec, 0x70, 0xb8, 0x21,
+	  0x40, 0x5d, 0xbd, 0x88, 0x4b, 0x7f, 0xcf, 0x4b },
+	{ 0x71, 0xc9, 0x03, 0x6e, 0x18, 0x17, 0x9b, 0x90,
+	  0xb3, 0x7d, 0x39, 0xe9, 0xf0, 0x5e, 0xb8, 0x9c,
+	  0xc5, 0xfc, 0x34, 0x1f, 0xd7, 0xc4, 0x77, 0xd0,
+	  0xd7, 0x49, 0x32, 0x85, 0xfa, 0xca, 0x08, 0xa4 },
+	{ 0x59, 0x16, 0x83, 0x3e, 0xbb, 0x05, 0xcd, 0x91,
+	  0x9c, 0xa7, 0xfe, 0x83, 0xb6, 0x92, 0xd3, 0x20,
+	  0x5b, 0xef, 0x72, 0x39, 0x2b, 0x2c, 0xf6, 0xbb,
+	  0x0a, 0x6d, 0x43, 0xf9, 0x94, 0xf9, 0x5f, 0x11 },
+	{ 0xf6, 0x3a, 0xab, 0x3e, 0xc6, 0x41, 0xb3, 0xb0,
+	  0x24, 0x96, 0x4c, 0x2b, 0x43, 0x7c, 0x04, 0xf6,
+	  0x04, 0x3c, 0x4c, 0x7e, 0x02, 0x79, 0x23, 0x99,
+	  0x95, 0x40, 0x19, 0x58, 0xf8, 0x6b, 0xbe, 0x54 },
+	{ 0xf1, 0x72, 0xb1, 0x80, 0xbf, 0xb0, 0x97, 0x40,
+	  0x49, 0x31, 0x20, 0xb6, 0x32, 0x6c, 0xbd, 0xc5,
+	  0x61, 0xe4, 0x77, 0xde, 0xf9, 0xbb, 0xcf, 0xd2,
+	  0x8c, 0xc8, 0xc1, 0xc5, 0xe3, 0x37, 0x9a, 0x31 },
+	{ 0xcb, 0x9b, 0x89, 0xcc, 0x18, 0x38, 0x1d, 0xd9,
+	  0x14, 0x1a, 0xde, 0x58, 0x86, 0x54, 0xd4, 0xe6,
+	  0xa2, 0x31, 0xd5, 0xbf, 0x49, 0xd4, 0xd5, 0x9a,
+	  0xc2, 0x7d, 0x86, 0x9c, 0xbe, 0x10, 0x0c, 0xf3 },
+	{ 0x7b, 0xd8, 0x81, 0x50, 0x46, 0xfd, 0xd8, 0x10,
+	  0xa9, 0x23, 0xe1, 0x98, 0x4a, 0xae, 0xbd, 0xcd,
+	  0xf8, 0x4d, 0x87, 0xc8, 0x99, 0x2d, 0x68, 0xb5,
+	  0xee, 0xb4, 0x60, 0xf9, 0x3e, 0xb3, 0xc8, 0xd7 },
+	{ 0x60, 0x7b, 0xe6, 0x68, 0x62, 0xfd, 0x08, 0xee,
+	  0x5b, 0x19, 0xfa, 0xca, 0xc0, 0x9d, 0xfd, 0xbc,
+	  0xd4, 0x0c, 0x31, 0x21, 0x01, 0xd6, 0x6e, 0x6e,
+	  0xbd, 0x2b, 0x84, 0x1f, 0x1b, 0x9a, 0x93, 0x25 },
+	{ 0x9f, 0xe0, 0x3b, 0xbe, 0x69, 0xab, 0x18, 0x34,
+	  0xf5, 0x21, 0x9b, 0x0d, 0xa8, 0x8a, 0x08, 0xb3,
+	  0x0a, 0x66, 0xc5, 0x91, 0x3f, 0x01, 0x51, 0x96,
+	  0x3c, 0x36, 0x05, 0x60, 0xdb, 0x03, 0x87, 0xb3 },
+	{ 0x90, 0xa8, 0x35, 0x85, 0x71, 0x7b, 0x75, 0xf0,
+	  0xe9, 0xb7, 0x25, 0xe0, 0x55, 0xee, 0xee, 0xb9,
+	  0xe7, 0xa0, 0x28, 0xea, 0x7e, 0x6c, 0xbc, 0x07,
+	  0xb2, 0x09, 0x17, 0xec, 0x03, 0x63, 0xe3, 0x8c },
+	{ 0x33, 0x6e, 0xa0, 0x53, 0x0f, 0x4a, 0x74, 0x69,
+	  0x12, 0x6e, 0x02, 0x18, 0x58, 0x7e, 0xbb, 0xde,
+	  0x33, 0x58, 0xa0, 0xb3, 0x1c, 0x29, 0xd2, 0x00,
+	  0xf7, 0xdc, 0x7e, 0xb1, 0x5c, 0x6a, 0xad, 0xd8 },
+	{ 0xa7, 0x9e, 0x76, 0xdc, 0x0a, 0xbc, 0xa4, 0x39,
+	  0x6f, 0x07, 0x47, 0xcd, 0x7b, 0x74, 0x8d, 0xf9,
+	  0x13, 0x00, 0x76, 0x26, 0xb1, 0xd6, 0x59, 0xda,
+	  0x0c, 0x1f, 0x78, 0xb9, 0x30, 0x3d, 0x01, 0xa3 },
+	{ 0x44, 0xe7, 0x8a, 0x77, 0x37, 0x56, 0xe0, 0x95,
+	  0x15, 0x19, 0x50, 0x4d, 0x70, 0x38, 0xd2, 0x8d,
+	  0x02, 0x13, 0xa3, 0x7e, 0x0c, 0xe3, 0x75, 0x37,
+	  0x17, 0x57, 0xbc, 0x99, 0x63, 0x11, 0xe3, 0xb8 },
+	{ 0x77, 0xac, 0x01, 0x2a, 0x3f, 0x75, 0x4d, 0xcf,
+	  0xea, 0xb5, 0xeb, 0x99, 0x6b, 0xe9, 0xcd, 0x2d,
+	  0x1f, 0x96, 0x11, 0x1b, 0x6e, 0x49, 0xf3, 0x99,
+	  0x4d, 0xf1, 0x81, 0xf2, 0x85, 0x69, 0xd8, 0x25 },
+	{ 0xce, 0x5a, 0x10, 0xdb, 0x6f, 0xcc, 0xda, 0xf1,
+	  0x40, 0xaa, 0xa4, 0xde, 0xd6, 0x25, 0x0a, 0x9c,
+	  0x06, 0xe9, 0x22, 0x2b, 0xc9, 0xf9, 0xf3, 0x65,
+	  0x8a, 0x4a, 0xff, 0x93, 0x5f, 0x2b, 0x9f, 0x3a },
+	{ 0xec, 0xc2, 0x03, 0xa7, 0xfe, 0x2b, 0xe4, 0xab,
+	  0xd5, 0x5b, 0xb5, 0x3e, 0x6e, 0x67, 0x35, 0x72,
+	  0xe0, 0x07, 0x8d, 0xa8, 0xcd, 0x37, 0x5e, 0xf4,
+	  0x30, 0xcc, 0x97, 0xf9, 0xf8, 0x00, 0x83, 0xaf },
+	{ 0x14, 0xa5, 0x18, 0x6d, 0xe9, 0xd7, 0xa1, 0x8b,
+	  0x04, 0x12, 0xb8, 0x56, 0x3e, 0x51, 0xcc, 0x54,
+	  0x33, 0x84, 0x0b, 0x4a, 0x12, 0x9a, 0x8f, 0xf9,
+	  0x63, 0xb3, 0x3a, 0x3c, 0x4a, 0xfe, 0x8e, 0xbb },
+	{ 0x13, 0xf8, 0xef, 0x95, 0xcb, 0x86, 0xe6, 0xa6,
+	  0x38, 0x93, 0x1c, 0x8e, 0x10, 0x76, 0x73, 0xeb,
+	  0x76, 0xba, 0x10, 0xd7, 0xc2, 0xcd, 0x70, 0xb9,
+	  0xd9, 0x92, 0x0b, 0xbe, 0xed, 0x92, 0x94, 0x09 },
+	{ 0x0b, 0x33, 0x8f, 0x4e, 0xe1, 0x2f, 0x2d, 0xfc,
+	  0xb7, 0x87, 0x13, 0x37, 0x79, 0x41, 0xe0, 0xb0,
+	  0x63, 0x21, 0x52, 0x58, 0x1d, 0x13, 0x32, 0x51,
+	  0x6e, 0x4a, 0x2c, 0xab, 0x19, 0x42, 0xcc, 0xa4 },
+	{ 0xea, 0xab, 0x0e, 0xc3, 0x7b, 0x3b, 0x8a, 0xb7,
+	  0x96, 0xe9, 0xf5, 0x72, 0x38, 0xde, 0x14, 0xa2,
+	  0x64, 0xa0, 0x76, 0xf3, 0x88, 0x7d, 0x86, 0xe2,
+	  0x9b, 0xb5, 0x90, 0x6d, 0xb5, 0xa0, 0x0e, 0x02 },
+	{ 0x23, 0xcb, 0x68, 0xb8, 0xc0, 0xe6, 0xdc, 0x26,
+	  0xdc, 0x27, 0x76, 0x6d, 0xdc, 0x0a, 0x13, 0xa9,
+	  0x94, 0x38, 0xfd, 0x55, 0x61, 0x7a, 0xa4, 0x09,
+	  0x5d, 0x8f, 0x96, 0x97, 0x20, 0xc8, 0x72, 0xdf },
+	{ 0x09, 0x1d, 0x8e, 0xe3, 0x0d, 0x6f, 0x29, 0x68,
+	  0xd4, 0x6b, 0x68, 0x7d, 0xd6, 0x52, 0x92, 0x66,
+	  0x57, 0x42, 0xde, 0x0b, 0xb8, 0x3d, 0xcc, 0x00,
+	  0x04, 0xc7, 0x2c, 0xe1, 0x00, 0x07, 0xa5, 0x49 },
+	{ 0x7f, 0x50, 0x7a, 0xbc, 0x6d, 0x19, 0xba, 0x00,
+	  0xc0, 0x65, 0xa8, 0x76, 0xec, 0x56, 0x57, 0x86,
+	  0x88, 0x82, 0xd1, 0x8a, 0x22, 0x1b, 0xc4, 0x6c,
+	  0x7a, 0x69, 0x12, 0x54, 0x1f, 0x5b, 0xc7, 0xba },
+	{ 0xa0, 0x60, 0x7c, 0x24, 0xe1, 0x4e, 0x8c, 0x22,
+	  0x3d, 0xb0, 0xd7, 0x0b, 0x4d, 0x30, 0xee, 0x88,
+	  0x01, 0x4d, 0x60, 0x3f, 0x43, 0x7e, 0x9e, 0x02,
+	  0xaa, 0x7d, 0xaf, 0xa3, 0xcd, 0xfb, 0xad, 0x94 },
+	{ 0xdd, 0xbf, 0xea, 0x75, 0xcc, 0x46, 0x78, 0x82,
+	  0xeb, 0x34, 0x83, 0xce, 0x5e, 0x2e, 0x75, 0x6a,
+	  0x4f, 0x47, 0x01, 0xb7, 0x6b, 0x44, 0x55, 0x19,
+	  0xe8, 0x9f, 0x22, 0xd6, 0x0f, 0xa8, 0x6e, 0x06 },
+	{ 0x0c, 0x31, 0x1f, 0x38, 0xc3, 0x5a, 0x4f, 0xb9,
+	  0x0d, 0x65, 0x1c, 0x28, 0x9d, 0x48, 0x68, 0x56,
+	  0xcd, 0x14, 0x13, 0xdf, 0x9b, 0x06, 0x77, 0xf5,
+	  0x3e, 0xce, 0x2c, 0xd9, 0xe4, 0x77, 0xc6, 0x0a },
+	{ 0x46, 0xa7, 0x3a, 0x8d, 0xd3, 0xe7, 0x0f, 0x59,
+	  0xd3, 0x94, 0x2c, 0x01, 0xdf, 0x59, 0x9d, 0xef,
+	  0x78, 0x3c, 0x9d, 0xa8, 0x2f, 0xd8, 0x32, 0x22,
+	  0xcd, 0x66, 0x2b, 0x53, 0xdc, 0xe7, 0xdb, 0xdf },
+	{ 0xad, 0x03, 0x8f, 0xf9, 0xb1, 0x4d, 0xe8, 0x4a,
+	  0x80, 0x1e, 0x4e, 0x62, 0x1c, 0xe5, 0xdf, 0x02,
+	  0x9d, 0xd9, 0x35, 0x20, 0xd0, 0xc2, 0xfa, 0x38,
+	  0xbf, 0xf1, 0x76, 0xa8, 0xb1, 0xd1, 0x69, 0x8c },
+	{ 0xab, 0x70, 0xc5, 0xdf, 0xbd, 0x1e, 0xa8, 0x17,
+	  0xfe, 0xd0, 0xcd, 0x06, 0x72, 0x93, 0xab, 0xf3,
+	  0x19, 0xe5, 0xd7, 0x90, 0x1c, 0x21, 0x41, 0xd5,
+	  0xd9, 0x9b, 0x23, 0xf0, 0x3a, 0x38, 0xe7, 0x48 },
+	{ 0x1f, 0xff, 0xda, 0x67, 0x93, 0x2b, 0x73, 0xc8,
+	  0xec, 0xaf, 0x00, 0x9a, 0x34, 0x91, 0xa0, 0x26,
+	  0x95, 0x3b, 0xab, 0xfe, 0x1f, 0x66, 0x3b, 0x06,
+	  0x97, 0xc3, 0xc4, 0xae, 0x8b, 0x2e, 0x7d, 0xcb },
+	{ 0xb0, 0xd2, 0xcc, 0x19, 0x47, 0x2d, 0xd5, 0x7f,
+	  0x2b, 0x17, 0xef, 0xc0, 0x3c, 0x8d, 0x58, 0xc2,
+	  0x28, 0x3d, 0xbb, 0x19, 0xda, 0x57, 0x2f, 0x77,
+	  0x55, 0x85, 0x5a, 0xa9, 0x79, 0x43, 0x17, 0xa0 },
+	{ 0xa0, 0xd1, 0x9a, 0x6e, 0xe3, 0x39, 0x79, 0xc3,
+	  0x25, 0x51, 0x0e, 0x27, 0x66, 0x22, 0xdf, 0x41,
+	  0xf7, 0x15, 0x83, 0xd0, 0x75, 0x01, 0xb8, 0x70,
+	  0x71, 0x12, 0x9a, 0x0a, 0xd9, 0x47, 0x32, 0xa5 },
+	{ 0x72, 0x46, 0x42, 0xa7, 0x03, 0x2d, 0x10, 0x62,
+	  0xb8, 0x9e, 0x52, 0xbe, 0xa3, 0x4b, 0x75, 0xdf,
+	  0x7d, 0x8f, 0xe7, 0x72, 0xd9, 0xfe, 0x3c, 0x93,
+	  0xdd, 0xf3, 0xc4, 0x54, 0x5a, 0xb5, 0xa9, 0x9b },
+	{ 0xad, 0xe5, 0xea, 0xa7, 0xe6, 0x1f, 0x67, 0x2d,
+	  0x58, 0x7e, 0xa0, 0x3d, 0xae, 0x7d, 0x7b, 0x55,
+	  0x22, 0x9c, 0x01, 0xd0, 0x6b, 0xc0, 0xa5, 0x70,
+	  0x14, 0x36, 0xcb, 0xd1, 0x83, 0x66, 0xa6, 0x26 },
+	{ 0x01, 0x3b, 0x31, 0xeb, 0xd2, 0x28, 0xfc, 0xdd,
+	  0xa5, 0x1f, 0xab, 0xb0, 0x3b, 0xb0, 0x2d, 0x60,
+	  0xac, 0x20, 0xca, 0x21, 0x5a, 0xaf, 0xa8, 0x3b,
+	  0xdd, 0x85, 0x5e, 0x37, 0x55, 0xa3, 0x5f, 0x0b },
+	{ 0x33, 0x2e, 0xd4, 0x0b, 0xb1, 0x0d, 0xde, 0x3c,
+	  0x95, 0x4a, 0x75, 0xd7, 0xb8, 0x99, 0x9d, 0x4b,
+	  0x26, 0xa1, 0xc0, 0x63, 0xc1, 0xdc, 0x6e, 0x32,
+	  0xc1, 0xd9, 0x1b, 0xab, 0x7b, 0xbb, 0x7d, 0x16 },
+	{ 0xc7, 0xa1, 0x97, 0xb3, 0xa0, 0x5b, 0x56, 0x6b,
+	  0xcc, 0x9f, 0xac, 0xd2, 0x0e, 0x44, 0x1d, 0x6f,
+	  0x6c, 0x28, 0x60, 0xac, 0x96, 0x51, 0xcd, 0x51,
+	  0xd6, 0xb9, 0xd2, 0xcd, 0xee, 0xea, 0x03, 0x90 },
+	{ 0xbd, 0x9c, 0xf6, 0x4e, 0xa8, 0x95, 0x3c, 0x03,
+	  0x71, 0x08, 0xe6, 0xf6, 0x54, 0x91, 0x4f, 0x39,
+	  0x58, 0xb6, 0x8e, 0x29, 0xc1, 0x67, 0x00, 0xdc,
+	  0x18, 0x4d, 0x94, 0xa2, 0x17, 0x08, 0xff, 0x60 },
+	{ 0x88, 0x35, 0xb0, 0xac, 0x02, 0x11, 0x51, 0xdf,
+	  0x71, 0x64, 0x74, 0xce, 0x27, 0xce, 0x4d, 0x3c,
+	  0x15, 0xf0, 0xb2, 0xda, 0xb4, 0x80, 0x03, 0xcf,
+	  0x3f, 0x3e, 0xfd, 0x09, 0x45, 0x10, 0x6b, 0x9a },
+	{ 0x3b, 0xfe, 0xfa, 0x33, 0x01, 0xaa, 0x55, 0xc0,
+	  0x80, 0x19, 0x0c, 0xff, 0xda, 0x8e, 0xae, 0x51,
+	  0xd9, 0xaf, 0x48, 0x8b, 0x4c, 0x1f, 0x24, 0xc3,
+	  0xd9, 0xa7, 0x52, 0x42, 0xfd, 0x8e, 0xa0, 0x1d },
+	{ 0x08, 0x28, 0x4d, 0x14, 0x99, 0x3c, 0xd4, 0x7d,
+	  0x53, 0xeb, 0xae, 0xcf, 0x0d, 0xf0, 0x47, 0x8c,
+	  0xc1, 0x82, 0xc8, 0x9c, 0x00, 0xe1, 0x85, 0x9c,
+	  0x84, 0x85, 0x16, 0x86, 0xdd, 0xf2, 0xc1, 0xb7 },
+	{ 0x1e, 0xd7, 0xef, 0x9f, 0x04, 0xc2, 0xac, 0x8d,
+	  0xb6, 0xa8, 0x64, 0xdb, 0x13, 0x10, 0x87, 0xf2,
+	  0x70, 0x65, 0x09, 0x8e, 0x69, 0xc3, 0xfe, 0x78,
+	  0x71, 0x8d, 0x9b, 0x94, 0x7f, 0x4a, 0x39, 0xd0 },
+	{ 0xc1, 0x61, 0xf2, 0xdc, 0xd5, 0x7e, 0x9c, 0x14,
+	  0x39, 0xb3, 0x1a, 0x9d, 0xd4, 0x3d, 0x8f, 0x3d,
+	  0x7d, 0xd8, 0xf0, 0xeb, 0x7c, 0xfa, 0xc6, 0xfb,
+	  0x25, 0xa0, 0xf2, 0x8e, 0x30, 0x6f, 0x06, 0x61 },
+	{ 0xc0, 0x19, 0x69, 0xad, 0x34, 0xc5, 0x2c, 0xaf,
+	  0x3d, 0xc4, 0xd8, 0x0d, 0x19, 0x73, 0x5c, 0x29,
+	  0x73, 0x1a, 0xc6, 0xe7, 0xa9, 0x20, 0x85, 0xab,
+	  0x92, 0x50, 0xc4, 0x8d, 0xea, 0x48, 0xa3, 0xfc },
+	{ 0x17, 0x20, 0xb3, 0x65, 0x56, 0x19, 0xd2, 0xa5,
+	  0x2b, 0x35, 0x21, 0xae, 0x0e, 0x49, 0xe3, 0x45,
+	  0xcb, 0x33, 0x89, 0xeb, 0xd6, 0x20, 0x8a, 0xca,
+	  0xf9, 0xf1, 0x3f, 0xda, 0xcc, 0xa8, 0xbe, 0x49 },
+	{ 0x75, 0x62, 0x88, 0x36, 0x1c, 0x83, 0xe2, 0x4c,
+	  0x61, 0x7c, 0xf9, 0x5c, 0x90, 0x5b, 0x22, 0xd0,
+	  0x17, 0xcd, 0xc8, 0x6f, 0x0b, 0xf1, 0xd6, 0x58,
+	  0xf4, 0x75, 0x6c, 0x73, 0x79, 0x87, 0x3b, 0x7f },
+	{ 0xe7, 0xd0, 0xed, 0xa3, 0x45, 0x26, 0x93, 0xb7,
+	  0x52, 0xab, 0xcd, 0xa1, 0xb5, 0x5e, 0x27, 0x6f,
+	  0x82, 0x69, 0x8f, 0x5f, 0x16, 0x05, 0x40, 0x3e,
+	  0xff, 0x83, 0x0b, 0xea, 0x00, 0x71, 0xa3, 0x94 },
+	{ 0x2c, 0x82, 0xec, 0xaa, 0x6b, 0x84, 0x80, 0x3e,
+	  0x04, 0x4a, 0xf6, 0x31, 0x18, 0xaf, 0xe5, 0x44,
+	  0x68, 0x7c, 0xb6, 0xe6, 0xc7, 0xdf, 0x49, 0xed,
+	  0x76, 0x2d, 0xfd, 0x7c, 0x86, 0x93, 0xa1, 0xbc },
+	{ 0x61, 0x36, 0xcb, 0xf4, 0xb4, 0x41, 0x05, 0x6f,
+	  0xa1, 0xe2, 0x72, 0x24, 0x98, 0x12, 0x5d, 0x6d,
+	  0xed, 0x45, 0xe1, 0x7b, 0x52, 0x14, 0x39, 0x59,
+	  0xc7, 0xf4, 0xd4, 0xe3, 0x95, 0x21, 0x8a, 0xc2 },
+	{ 0x72, 0x1d, 0x32, 0x45, 0xaa, 0xfe, 0xf2, 0x7f,
+	  0x6a, 0x62, 0x4f, 0x47, 0x95, 0x4b, 0x6c, 0x25,
+	  0x50, 0x79, 0x52, 0x6f, 0xfa, 0x25, 0xe9, 0xff,
+	  0x77, 0xe5, 0xdc, 0xff, 0x47, 0x3b, 0x15, 0x97 },
+	{ 0x9d, 0xd2, 0xfb, 0xd8, 0xce, 0xf1, 0x6c, 0x35,
+	  0x3c, 0x0a, 0xc2, 0x11, 0x91, 0xd5, 0x09, 0xeb,
+	  0x28, 0xdd, 0x9e, 0x3e, 0x0d, 0x8c, 0xea, 0x5d,
+	  0x26, 0xca, 0x83, 0x93, 0x93, 0x85, 0x1c, 0x3a },
+	{ 0xb2, 0x39, 0x4c, 0xea, 0xcd, 0xeb, 0xf2, 0x1b,
+	  0xf9, 0xdf, 0x2c, 0xed, 0x98, 0xe5, 0x8f, 0x1c,
+	  0x3a, 0x4b, 0xbb, 0xff, 0x66, 0x0d, 0xd9, 0x00,
+	  0xf6, 0x22, 0x02, 0xd6, 0x78, 0x5c, 0xc4, 0x6e },
+	{ 0x57, 0x08, 0x9f, 0x22, 0x27, 0x49, 0xad, 0x78,
+	  0x71, 0x76, 0x5f, 0x06, 0x2b, 0x11, 0x4f, 0x43,
+	  0xba, 0x20, 0xec, 0x56, 0x42, 0x2a, 0x8b, 0x1e,
+	  0x3f, 0x87, 0x19, 0x2c, 0x0e, 0xa7, 0x18, 0xc6 },
+	{ 0xe4, 0x9a, 0x94, 0x59, 0x96, 0x1c, 0xd3, 0x3c,
+	  0xdf, 0x4a, 0xae, 0x1b, 0x10, 0x78, 0xa5, 0xde,
+	  0xa7, 0xc0, 0x40, 0xe0, 0xfe, 0xa3, 0x40, 0xc9,
+	  0x3a, 0x72, 0x48, 0x72, 0xfc, 0x4a, 0xf8, 0x06 },
+	{ 0xed, 0xe6, 0x7f, 0x72, 0x0e, 0xff, 0xd2, 0xca,
+	  0x9c, 0x88, 0x99, 0x41, 0x52, 0xd0, 0x20, 0x1d,
+	  0xee, 0x6b, 0x0a, 0x2d, 0x2c, 0x07, 0x7a, 0xca,
+	  0x6d, 0xae, 0x29, 0xf7, 0x3f, 0x8b, 0x63, 0x09 },
+	{ 0xe0, 0xf4, 0x34, 0xbf, 0x22, 0xe3, 0x08, 0x80,
+	  0x39, 0xc2, 0x1f, 0x71, 0x9f, 0xfc, 0x67, 0xf0,
+	  0xf2, 0xcb, 0x5e, 0x98, 0xa7, 0xa0, 0x19, 0x4c,
+	  0x76, 0xe9, 0x6b, 0xf4, 0xe8, 0xe1, 0x7e, 0x61 },
+	{ 0x27, 0x7c, 0x04, 0xe2, 0x85, 0x34, 0x84, 0xa4,
+	  0xeb, 0xa9, 0x10, 0xad, 0x33, 0x6d, 0x01, 0xb4,
+	  0x77, 0xb6, 0x7c, 0xc2, 0x00, 0xc5, 0x9f, 0x3c,
+	  0x8d, 0x77, 0xee, 0xf8, 0x49, 0x4f, 0x29, 0xcd },
+	{ 0x15, 0x6d, 0x57, 0x47, 0xd0, 0xc9, 0x9c, 0x7f,
+	  0x27, 0x09, 0x7d, 0x7b, 0x7e, 0x00, 0x2b, 0x2e,
+	  0x18, 0x5c, 0xb7, 0x2d, 0x8d, 0xd7, 0xeb, 0x42,
+	  0x4a, 0x03, 0x21, 0x52, 0x81, 0x61, 0x21, 0x9f },
+	{ 0x20, 0xdd, 0xd1, 0xed, 0x9b, 0x1c, 0xa8, 0x03,
+	  0x94, 0x6d, 0x64, 0xa8, 0x3a, 0xe4, 0x65, 0x9d,
+	  0xa6, 0x7f, 0xba, 0x7a, 0x1a, 0x3e, 0xdd, 0xb1,
+	  0xe1, 0x03, 0xc0, 0xf5, 0xe0, 0x3e, 0x3a, 0x2c },
+	{ 0xf0, 0xaf, 0x60, 0x4d, 0x3d, 0xab, 0xbf, 0x9a,
+	  0x0f, 0x2a, 0x7d, 0x3d, 0xda, 0x6b, 0xd3, 0x8b,
+	  0xba, 0x72, 0xc6, 0xd0, 0x9b, 0xe4, 0x94, 0xfc,
+	  0xef, 0x71, 0x3f, 0xf1, 0x01, 0x89, 0xb6, 0xe6 },
+	{ 0x98, 0x02, 0xbb, 0x87, 0xde, 0xf4, 0xcc, 0x10,
+	  0xc4, 0xa5, 0xfd, 0x49, 0xaa, 0x58, 0xdf, 0xe2,
+	  0xf3, 0xfd, 0xdb, 0x46, 0xb4, 0x70, 0x88, 0x14,
+	  0xea, 0xd8, 0x1d, 0x23, 0xba, 0x95, 0x13, 0x9b },
+	{ 0x4f, 0x8c, 0xe1, 0xe5, 0x1d, 0x2f, 0xe7, 0xf2,
+	  0x40, 0x43, 0xa9, 0x04, 0xd8, 0x98, 0xeb, 0xfc,
+	  0x91, 0x97, 0x54, 0x18, 0x75, 0x34, 0x13, 0xaa,
+	  0x09, 0x9b, 0x79, 0x5e, 0xcb, 0x35, 0xce, 0xdb },
+	{ 0xbd, 0xdc, 0x65, 0x14, 0xd7, 0xee, 0x6a, 0xce,
+	  0x0a, 0x4a, 0xc1, 0xd0, 0xe0, 0x68, 0x11, 0x22,
+	  0x88, 0xcb, 0xcf, 0x56, 0x04, 0x54, 0x64, 0x27,
+	  0x05, 0x63, 0x01, 0x77, 0xcb, 0xa6, 0x08, 0xbd },
+	{ 0xd6, 0x35, 0x99, 0x4f, 0x62, 0x91, 0x51, 0x7b,
+	  0x02, 0x81, 0xff, 0xdd, 0x49, 0x6a, 0xfa, 0x86,
+	  0x27, 0x12, 0xe5, 0xb3, 0xc4, 0xe5, 0x2e, 0x4c,
+	  0xd5, 0xfd, 0xae, 0x8c, 0x0e, 0x72, 0xfb, 0x08 },
+	{ 0x87, 0x8d, 0x9c, 0xa6, 0x00, 0xcf, 0x87, 0xe7,
+	  0x69, 0xcc, 0x30, 0x5c, 0x1b, 0x35, 0x25, 0x51,
+	  0x86, 0x61, 0x5a, 0x73, 0xa0, 0xda, 0x61, 0x3b,
+	  0x5f, 0x1c, 0x98, 0xdb, 0xf8, 0x12, 0x83, 0xea },
+	{ 0xa6, 0x4e, 0xbe, 0x5d, 0xc1, 0x85, 0xde, 0x9f,
+	  0xdd, 0xe7, 0x60, 0x7b, 0x69, 0x98, 0x70, 0x2e,
+	  0xb2, 0x34, 0x56, 0x18, 0x49, 0x57, 0x30, 0x7d,
+	  0x2f, 0xa7, 0x2e, 0x87, 0xa4, 0x77, 0x02, 0xd6 },
+	{ 0xce, 0x50, 0xea, 0xb7, 0xb5, 0xeb, 0x52, 0xbd,
+	  0xc9, 0xad, 0x8e, 0x5a, 0x48, 0x0a, 0xb7, 0x80,
+	  0xca, 0x93, 0x20, 0xe4, 0x43, 0x60, 0xb1, 0xfe,
+	  0x37, 0xe0, 0x3f, 0x2f, 0x7a, 0xd7, 0xde, 0x01 },
+	{ 0xee, 0xdd, 0xb7, 0xc0, 0xdb, 0x6e, 0x30, 0xab,
+	  0xe6, 0x6d, 0x79, 0xe3, 0x27, 0x51, 0x1e, 0x61,
+	  0xfc, 0xeb, 0xbc, 0x29, 0xf1, 0x59, 0xb4, 0x0a,
+	  0x86, 0xb0, 0x46, 0xec, 0xf0, 0x51, 0x38, 0x23 },
+	{ 0x78, 0x7f, 0xc9, 0x34, 0x40, 0xc1, 0xec, 0x96,
+	  0xb5, 0xad, 0x01, 0xc1, 0x6c, 0xf7, 0x79, 0x16,
+	  0xa1, 0x40, 0x5f, 0x94, 0x26, 0x35, 0x6e, 0xc9,
+	  0x21, 0xd8, 0xdf, 0xf3, 0xea, 0x63, 0xb7, 0xe0 },
+	{ 0x7f, 0x0d, 0x5e, 0xab, 0x47, 0xee, 0xfd, 0xa6,
+	  0x96, 0xc0, 0xbf, 0x0f, 0xbf, 0x86, 0xab, 0x21,
+	  0x6f, 0xce, 0x46, 0x1e, 0x93, 0x03, 0xab, 0xa6,
+	  0xac, 0x37, 0x41, 0x20, 0xe8, 0x90, 0xe8, 0xdf },
+	{ 0xb6, 0x80, 0x04, 0xb4, 0x2f, 0x14, 0xad, 0x02,
+	  0x9f, 0x4c, 0x2e, 0x03, 0xb1, 0xd5, 0xeb, 0x76,
+	  0xd5, 0x71, 0x60, 0xe2, 0x64, 0x76, 0xd2, 0x11,
+	  0x31, 0xbe, 0xf2, 0x0a, 0xda, 0x7d, 0x27, 0xf4 },
+	{ 0xb0, 0xc4, 0xeb, 0x18, 0xae, 0x25, 0x0b, 0x51,
+	  0xa4, 0x13, 0x82, 0xea, 0xd9, 0x2d, 0x0d, 0xc7,
+	  0x45, 0x5f, 0x93, 0x79, 0xfc, 0x98, 0x84, 0x42,
+	  0x8e, 0x47, 0x70, 0x60, 0x8d, 0xb0, 0xfa, 0xec },
+	{ 0xf9, 0x2b, 0x7a, 0x87, 0x0c, 0x05, 0x9f, 0x4d,
+	  0x46, 0x46, 0x4c, 0x82, 0x4e, 0xc9, 0x63, 0x55,
+	  0x14, 0x0b, 0xdc, 0xe6, 0x81, 0x32, 0x2c, 0xc3,
+	  0xa9, 0x92, 0xff, 0x10, 0x3e, 0x3f, 0xea, 0x52 },
+	{ 0x53, 0x64, 0x31, 0x26, 0x14, 0x81, 0x33, 0x98,
+	  0xcc, 0x52, 0x5d, 0x4c, 0x4e, 0x14, 0x6e, 0xde,
+	  0xb3, 0x71, 0x26, 0x5f, 0xba, 0x19, 0x13, 0x3a,
+	  0x2c, 0x3d, 0x21, 0x59, 0x29, 0x8a, 0x17, 0x42 },
+	{ 0xf6, 0x62, 0x0e, 0x68, 0xd3, 0x7f, 0xb2, 0xaf,
+	  0x50, 0x00, 0xfc, 0x28, 0xe2, 0x3b, 0x83, 0x22,
+	  0x97, 0xec, 0xd8, 0xbc, 0xe9, 0x9e, 0x8b, 0xe4,
+	  0xd0, 0x4e, 0x85, 0x30, 0x9e, 0x3d, 0x33, 0x74 },
+	{ 0x53, 0x16, 0xa2, 0x79, 0x69, 0xd7, 0xfe, 0x04,
+	  0xff, 0x27, 0xb2, 0x83, 0x96, 0x1b, 0xff, 0xc3,
+	  0xbf, 0x5d, 0xfb, 0x32, 0xfb, 0x6a, 0x89, 0xd1,
+	  0x01, 0xc6, 0xc3, 0xb1, 0x93, 0x7c, 0x28, 0x71 },
+	{ 0x81, 0xd1, 0x66, 0x4f, 0xdf, 0x3c, 0xb3, 0x3c,
+	  0x24, 0xee, 0xba, 0xc0, 0xbd, 0x64, 0x24, 0x4b,
+	  0x77, 0xc4, 0xab, 0xea, 0x90, 0xbb, 0xe8, 0xb5,
+	  0xee, 0x0b, 0x2a, 0xaf, 0xcf, 0x2d, 0x6a, 0x53 },
+	{ 0x34, 0x57, 0x82, 0xf2, 0x95, 0xb0, 0x88, 0x03,
+	  0x52, 0xe9, 0x24, 0xa0, 0x46, 0x7b, 0x5f, 0xbc,
+	  0x3e, 0x8f, 0x3b, 0xfb, 0xc3, 0xc7, 0xe4, 0x8b,
+	  0x67, 0x09, 0x1f, 0xb5, 0xe8, 0x0a, 0x94, 0x42 },
+	{ 0x79, 0x41, 0x11, 0xea, 0x6c, 0xd6, 0x5e, 0x31,
+	  0x1f, 0x74, 0xee, 0x41, 0xd4, 0x76, 0xcb, 0x63,
+	  0x2c, 0xe1, 0xe4, 0xb0, 0x51, 0xdc, 0x1d, 0x9e,
+	  0x9d, 0x06, 0x1a, 0x19, 0xe1, 0xd0, 0xbb, 0x49 },
+	{ 0x2a, 0x85, 0xda, 0xf6, 0x13, 0x88, 0x16, 0xb9,
+	  0x9b, 0xf8, 0xd0, 0x8b, 0xa2, 0x11, 0x4b, 0x7a,
+	  0xb0, 0x79, 0x75, 0xa7, 0x84, 0x20, 0xc1, 0xa3,
+	  0xb0, 0x6a, 0x77, 0x7c, 0x22, 0xdd, 0x8b, 0xcb },
+	{ 0x89, 0xb0, 0xd5, 0xf2, 0x89, 0xec, 0x16, 0x40,
+	  0x1a, 0x06, 0x9a, 0x96, 0x0d, 0x0b, 0x09, 0x3e,
+	  0x62, 0x5d, 0xa3, 0xcf, 0x41, 0xee, 0x29, 0xb5,
+	  0x9b, 0x93, 0x0c, 0x58, 0x20, 0x14, 0x54, 0x55 },
+	{ 0xd0, 0xfd, 0xcb, 0x54, 0x39, 0x43, 0xfc, 0x27,
+	  0xd2, 0x08, 0x64, 0xf5, 0x21, 0x81, 0x47, 0x1b,
+	  0x94, 0x2c, 0xc7, 0x7c, 0xa6, 0x75, 0xbc, 0xb3,
+	  0x0d, 0xf3, 0x1d, 0x35, 0x8e, 0xf7, 0xb1, 0xeb },
+	{ 0xb1, 0x7e, 0xa8, 0xd7, 0x70, 0x63, 0xc7, 0x09,
+	  0xd4, 0xdc, 0x6b, 0x87, 0x94, 0x13, 0xc3, 0x43,
+	  0xe3, 0x79, 0x0e, 0x9e, 0x62, 0xca, 0x85, 0xb7,
+	  0x90, 0x0b, 0x08, 0x6f, 0x6b, 0x75, 0xc6, 0x72 },
+	{ 0xe7, 0x1a, 0x3e, 0x2c, 0x27, 0x4d, 0xb8, 0x42,
+	  0xd9, 0x21, 0x14, 0xf2, 0x17, 0xe2, 0xc0, 0xea,
+	  0xc8, 0xb4, 0x50, 0x93, 0xfd, 0xfd, 0x9d, 0xf4,
+	  0xca, 0x71, 0x62, 0x39, 0x48, 0x62, 0xd5, 0x01 },
+	{ 0xc0, 0x47, 0x67, 0x59, 0xab, 0x7a, 0xa3, 0x33,
+	  0x23, 0x4f, 0x6b, 0x44, 0xf5, 0xfd, 0x85, 0x83,
+	  0x90, 0xec, 0x23, 0x69, 0x4c, 0x62, 0x2c, 0xb9,
+	  0x86, 0xe7, 0x69, 0xc7, 0x8e, 0xdd, 0x73, 0x3e },
+	{ 0x9a, 0xb8, 0xea, 0xbb, 0x14, 0x16, 0x43, 0x4d,
+	  0x85, 0x39, 0x13, 0x41, 0xd5, 0x69, 0x93, 0xc5,
+	  0x54, 0x58, 0x16, 0x7d, 0x44, 0x18, 0xb1, 0x9a,
+	  0x0f, 0x2a, 0xd8, 0xb7, 0x9a, 0x83, 0xa7, 0x5b },
+	{ 0x79, 0x92, 0xd0, 0xbb, 0xb1, 0x5e, 0x23, 0x82,
+	  0x6f, 0x44, 0x3e, 0x00, 0x50, 0x5d, 0x68, 0xd3,
+	  0xed, 0x73, 0x72, 0x99, 0x5a, 0x5c, 0x3e, 0x49,
+	  0x86, 0x54, 0x10, 0x2f, 0xbc, 0xd0, 0x96, 0x4e },
+	{ 0xc0, 0x21, 0xb3, 0x00, 0x85, 0x15, 0x14, 0x35,
+	  0xdf, 0x33, 0xb0, 0x07, 0xcc, 0xec, 0xc6, 0x9d,
+	  0xf1, 0x26, 0x9f, 0x39, 0xba, 0x25, 0x09, 0x2b,
+	  0xed, 0x59, 0xd9, 0x32, 0xac, 0x0f, 0xdc, 0x28 },
+	{ 0x91, 0xa2, 0x5e, 0xc0, 0xec, 0x0d, 0x9a, 0x56,
+	  0x7f, 0x89, 0xc4, 0xbf, 0xe1, 0xa6, 0x5a, 0x0e,
+	  0x43, 0x2d, 0x07, 0x06, 0x4b, 0x41, 0x90, 0xe2,
+	  0x7d, 0xfb, 0x81, 0x90, 0x1f, 0xd3, 0x13, 0x9b },
+	{ 0x59, 0x50, 0xd3, 0x9a, 0x23, 0xe1, 0x54, 0x5f,
+	  0x30, 0x12, 0x70, 0xaa, 0x1a, 0x12, 0xf2, 0xe6,
+	  0xc4, 0x53, 0x77, 0x6e, 0x4d, 0x63, 0x55, 0xde,
+	  0x42, 0x5c, 0xc1, 0x53, 0xf9, 0x81, 0x88, 0x67 },
+	{ 0xd7, 0x9f, 0x14, 0x72, 0x0c, 0x61, 0x0a, 0xf1,
+	  0x79, 0xa3, 0x76, 0x5d, 0x4b, 0x7c, 0x09, 0x68,
+	  0xf9, 0x77, 0x96, 0x2d, 0xbf, 0x65, 0x5b, 0x52,
+	  0x12, 0x72, 0xb6, 0xf1, 0xe1, 0x94, 0x48, 0x8e },
+	{ 0xe9, 0x53, 0x1b, 0xfc, 0x8b, 0x02, 0x99, 0x5a,
+	  0xea, 0xa7, 0x5b, 0xa2, 0x70, 0x31, 0xfa, 0xdb,
+	  0xcb, 0xf4, 0xa0, 0xda, 0xb8, 0x96, 0x1d, 0x92,
+	  0x96, 0xcd, 0x7e, 0x84, 0xd2, 0x5d, 0x60, 0x06 },
+	{ 0x34, 0xe9, 0xc2, 0x6a, 0x01, 0xd7, 0xf1, 0x61,
+	  0x81, 0xb4, 0x54, 0xa9, 0xd1, 0x62, 0x3c, 0x23,
+	  0x3c, 0xb9, 0x9d, 0x31, 0xc6, 0x94, 0x65, 0x6e,
+	  0x94, 0x13, 0xac, 0xa3, 0xe9, 0x18, 0x69, 0x2f },
+	{ 0xd9, 0xd7, 0x42, 0x2f, 0x43, 0x7b, 0xd4, 0x39,
+	  0xdd, 0xd4, 0xd8, 0x83, 0xda, 0xe2, 0xa0, 0x83,
+	  0x50, 0x17, 0x34, 0x14, 0xbe, 0x78, 0x15, 0x51,
+	  0x33, 0xff, 0xf1, 0x96, 0x4c, 0x3d, 0x79, 0x72 },
+	{ 0x4a, 0xee, 0x0c, 0x7a, 0xaf, 0x07, 0x54, 0x14,
+	  0xff, 0x17, 0x93, 0xea, 0xd7, 0xea, 0xca, 0x60,
+	  0x17, 0x75, 0xc6, 0x15, 0xdb, 0xd6, 0x0b, 0x64,
+	  0x0b, 0x0a, 0x9f, 0x0c, 0xe5, 0x05, 0xd4, 0x35 },
+	{ 0x6b, 0xfd, 0xd1, 0x54, 0x59, 0xc8, 0x3b, 0x99,
+	  0xf0, 0x96, 0xbf, 0xb4, 0x9e, 0xe8, 0x7b, 0x06,
+	  0x3d, 0x69, 0xc1, 0x97, 0x4c, 0x69, 0x28, 0xac,
+	  0xfc, 0xfb, 0x40, 0x99, 0xf8, 0xc4, 0xef, 0x67 },
+	{ 0x9f, 0xd1, 0xc4, 0x08, 0xfd, 0x75, 0xc3, 0x36,
+	  0x19, 0x3a, 0x2a, 0x14, 0xd9, 0x4f, 0x6a, 0xf5,
+	  0xad, 0xf0, 0x50, 0xb8, 0x03, 0x87, 0xb4, 0xb0,
+	  0x10, 0xfb, 0x29, 0xf4, 0xcc, 0x72, 0x70, 0x7c },
+	{ 0x13, 0xc8, 0x84, 0x80, 0xa5, 0xd0, 0x0d, 0x6c,
+	  0x8c, 0x7a, 0xd2, 0x11, 0x0d, 0x76, 0xa8, 0x2d,
+	  0x9b, 0x70, 0xf4, 0xfa, 0x66, 0x96, 0xd4, 0xe5,
+	  0xdd, 0x42, 0xa0, 0x66, 0xdc, 0xaf, 0x99, 0x20 },
+	{ 0x82, 0x0e, 0x72, 0x5e, 0xe2, 0x5f, 0xe8, 0xfd,
+	  0x3a, 0x8d, 0x5a, 0xbe, 0x4c, 0x46, 0xc3, 0xba,
+	  0x88, 0x9d, 0xe6, 0xfa, 0x91, 0x91, 0xaa, 0x22,
+	  0xba, 0x67, 0xd5, 0x70, 0x54, 0x21, 0x54, 0x2b },
+	{ 0x32, 0xd9, 0x3a, 0x0e, 0xb0, 0x2f, 0x42, 0xfb,
+	  0xbc, 0xaf, 0x2b, 0xad, 0x00, 0x85, 0xb2, 0x82,
+	  0xe4, 0x60, 0x46, 0xa4, 0xdf, 0x7a, 0xd1, 0x06,
+	  0x57, 0xc9, 0xd6, 0x47, 0x63, 0x75, 0xb9, 0x3e },
+	{ 0xad, 0xc5, 0x18, 0x79, 0x05, 0xb1, 0x66, 0x9c,
+	  0xd8, 0xec, 0x9c, 0x72, 0x1e, 0x19, 0x53, 0x78,
+	  0x6b, 0x9d, 0x89, 0xa9, 0xba, 0xe3, 0x07, 0x80,
+	  0xf1, 0xe1, 0xea, 0xb2, 0x4a, 0x00, 0x52, 0x3c },
+	{ 0xe9, 0x07, 0x56, 0xff, 0x7f, 0x9a, 0xd8, 0x10,
+	  0xb2, 0x39, 0xa1, 0x0c, 0xed, 0x2c, 0xf9, 0xb2,
+	  0x28, 0x43, 0x54, 0xc1, 0xf8, 0xc7, 0xe0, 0xac,
+	  0xcc, 0x24, 0x61, 0xdc, 0x79, 0x6d, 0x6e, 0x89 },
+	{ 0x12, 0x51, 0xf7, 0x6e, 0x56, 0x97, 0x84, 0x81,
+	  0x87, 0x53, 0x59, 0x80, 0x1d, 0xb5, 0x89, 0xa0,
+	  0xb2, 0x2f, 0x86, 0xd8, 0xd6, 0x34, 0xdc, 0x04,
+	  0x50, 0x6f, 0x32, 0x2e, 0xd7, 0x8f, 0x17, 0xe8 },
+	{ 0x3a, 0xfa, 0x89, 0x9f, 0xd9, 0x80, 0xe7, 0x3e,
+	  0xcb, 0x7f, 0x4d, 0x8b, 0x8f, 0x29, 0x1d, 0xc9,
+	  0xaf, 0x79, 0x6b, 0xc6, 0x5d, 0x27, 0xf9, 0x74,
+	  0xc6, 0xf1, 0x93, 0xc9, 0x19, 0x1a, 0x09, 0xfd },
+	{ 0xaa, 0x30, 0x5b, 0xe2, 0x6e, 0x5d, 0xed, 0xdc,
+	  0x3c, 0x10, 0x10, 0xcb, 0xc2, 0x13, 0xf9, 0x5f,
+	  0x05, 0x1c, 0x78, 0x5c, 0x5b, 0x43, 0x1e, 0x6a,
+	  0x7c, 0xd0, 0x48, 0xf1, 0x61, 0x78, 0x75, 0x28 },
+	{ 0x8e, 0xa1, 0x88, 0x4f, 0xf3, 0x2e, 0x9d, 0x10,
+	  0xf0, 0x39, 0xb4, 0x07, 0xd0, 0xd4, 0x4e, 0x7e,
+	  0x67, 0x0a, 0xbd, 0x88, 0x4a, 0xee, 0xe0, 0xfb,
+	  0x75, 0x7a, 0xe9, 0x4e, 0xaa, 0x97, 0x37, 0x3d },
+	{ 0xd4, 0x82, 0xb2, 0x15, 0x5d, 0x4d, 0xec, 0x6b,
+	  0x47, 0x36, 0xa1, 0xf1, 0x61, 0x7b, 0x53, 0xaa,
+	  0xa3, 0x73, 0x10, 0x27, 0x7d, 0x3f, 0xef, 0x0c,
+	  0x37, 0xad, 0x41, 0x76, 0x8f, 0xc2, 0x35, 0xb4 },
+	{ 0x4d, 0x41, 0x39, 0x71, 0x38, 0x7e, 0x7a, 0x88,
+	  0x98, 0xa8, 0xdc, 0x2a, 0x27, 0x50, 0x07, 0x78,
+	  0x53, 0x9e, 0xa2, 0x14, 0xa2, 0xdf, 0xe9, 0xb3,
+	  0xd7, 0xe8, 0xeb, 0xdc, 0xe5, 0xcf, 0x3d, 0xb3 },
+	{ 0x69, 0x6e, 0x5d, 0x46, 0xe6, 0xc5, 0x7e, 0x87,
+	  0x96, 0xe4, 0x73, 0x5d, 0x08, 0x91, 0x6e, 0x0b,
+	  0x79, 0x29, 0xb3, 0xcf, 0x29, 0x8c, 0x29, 0x6d,
+	  0x22, 0xe9, 0xd3, 0x01, 0x96, 0x53, 0x37, 0x1c },
+	{ 0x1f, 0x56, 0x47, 0xc1, 0xd3, 0xb0, 0x88, 0x22,
+	  0x88, 0x85, 0x86, 0x5c, 0x89, 0x40, 0x90, 0x8b,
+	  0xf4, 0x0d, 0x1a, 0x82, 0x72, 0x82, 0x19, 0x73,
+	  0xb1, 0x60, 0x00, 0x8e, 0x7a, 0x3c, 0xe2, 0xeb },
+	{ 0xb6, 0xe7, 0x6c, 0x33, 0x0f, 0x02, 0x1a, 0x5b,
+	  0xda, 0x65, 0x87, 0x50, 0x10, 0xb0, 0xed, 0xf0,
+	  0x91, 0x26, 0xc0, 0xf5, 0x10, 0xea, 0x84, 0x90,
+	  0x48, 0x19, 0x20, 0x03, 0xae, 0xf4, 0xc6, 0x1c },
+	{ 0x3c, 0xd9, 0x52, 0xa0, 0xbe, 0xad, 0xa4, 0x1a,
+	  0xbb, 0x42, 0x4c, 0xe4, 0x7f, 0x94, 0xb4, 0x2b,
+	  0xe6, 0x4e, 0x1f, 0xfb, 0x0f, 0xd0, 0x78, 0x22,
+	  0x76, 0x80, 0x79, 0x46, 0xd0, 0xd0, 0xbc, 0x55 },
+	{ 0x98, 0xd9, 0x26, 0x77, 0x43, 0x9b, 0x41, 0xb7,
+	  0xbb, 0x51, 0x33, 0x12, 0xaf, 0xb9, 0x2b, 0xcc,
+	  0x8e, 0xe9, 0x68, 0xb2, 0xe3, 0xb2, 0x38, 0xce,
+	  0xcb, 0x9b, 0x0f, 0x34, 0xc9, 0xbb, 0x63, 0xd0 },
+	{ 0xec, 0xbc, 0xa2, 0xcf, 0x08, 0xae, 0x57, 0xd5,
+	  0x17, 0xad, 0x16, 0x15, 0x8a, 0x32, 0xbf, 0xa7,
+	  0xdc, 0x03, 0x82, 0xea, 0xed, 0xa1, 0x28, 0xe9,
+	  0x18, 0x86, 0x73, 0x4c, 0x24, 0xa0, 0xb2, 0x9d },
+	{ 0x94, 0x2c, 0xc7, 0xc0, 0xb5, 0x2e, 0x2b, 0x16,
+	  0xa4, 0xb8, 0x9f, 0xa4, 0xfc, 0x7e, 0x0b, 0xf6,
+	  0x09, 0xe2, 0x9a, 0x08, 0xc1, 0xa8, 0x54, 0x34,
+	  0x52, 0xb7, 0x7c, 0x7b, 0xfd, 0x11, 0xbb, 0x28 },
+	{ 0x8a, 0x06, 0x5d, 0x8b, 0x61, 0xa0, 0xdf, 0xfb,
+	  0x17, 0x0d, 0x56, 0x27, 0x73, 0x5a, 0x76, 0xb0,
+	  0xe9, 0x50, 0x60, 0x37, 0x80, 0x8c, 0xba, 0x16,
+	  0xc3, 0x45, 0x00, 0x7c, 0x9f, 0x79, 0xcf, 0x8f },
+	{ 0x1b, 0x9f, 0xa1, 0x97, 0x14, 0x65, 0x9c, 0x78,
+	  0xff, 0x41, 0x38, 0x71, 0x84, 0x92, 0x15, 0x36,
+	  0x10, 0x29, 0xac, 0x80, 0x2b, 0x1c, 0xbc, 0xd5,
+	  0x4e, 0x40, 0x8b, 0xd8, 0x72, 0x87, 0xf8, 0x1f },
+	{ 0x8d, 0xab, 0x07, 0x1b, 0xcd, 0x6c, 0x72, 0x92,
+	  0xa9, 0xef, 0x72, 0x7b, 0x4a, 0xe0, 0xd8, 0x67,
+	  0x13, 0x30, 0x1d, 0xa8, 0x61, 0x8d, 0x9a, 0x48,
+	  0xad, 0xce, 0x55, 0xf3, 0x03, 0xa8, 0x69, 0xa1 },
+	{ 0x82, 0x53, 0xe3, 0xe7, 0xc7, 0xb6, 0x84, 0xb9,
+	  0xcb, 0x2b, 0xeb, 0x01, 0x4c, 0xe3, 0x30, 0xff,
+	  0x3d, 0x99, 0xd1, 0x7a, 0xbb, 0xdb, 0xab, 0xe4,
+	  0xf4, 0xd6, 0x74, 0xde, 0xd5, 0x3f, 0xfc, 0x6b },
+	{ 0xf1, 0x95, 0xf3, 0x21, 0xe9, 0xe3, 0xd6, 0xbd,
+	  0x7d, 0x07, 0x45, 0x04, 0xdd, 0x2a, 0xb0, 0xe6,
+	  0x24, 0x1f, 0x92, 0xe7, 0x84, 0xb1, 0xaa, 0x27,
+	  0x1f, 0xf6, 0x48, 0xb1, 0xca, 0xb6, 0xd7, 0xf6 },
+	{ 0x27, 0xe4, 0xcc, 0x72, 0x09, 0x0f, 0x24, 0x12,
+	  0x66, 0x47, 0x6a, 0x7c, 0x09, 0x49, 0x5f, 0x2d,
+	  0xb1, 0x53, 0xd5, 0xbc, 0xbd, 0x76, 0x19, 0x03,
+	  0xef, 0x79, 0x27, 0x5e, 0xc5, 0x6b, 0x2e, 0xd8 },
+	{ 0x89, 0x9c, 0x24, 0x05, 0x78, 0x8e, 0x25, 0xb9,
+	  0x9a, 0x18, 0x46, 0x35, 0x5e, 0x64, 0x6d, 0x77,
+	  0xcf, 0x40, 0x00, 0x83, 0x41, 0x5f, 0x7d, 0xc5,
+	  0xaf, 0xe6, 0x9d, 0x6e, 0x17, 0xc0, 0x00, 0x23 },
+	{ 0xa5, 0x9b, 0x78, 0xc4, 0x90, 0x57, 0x44, 0x07,
+	  0x6b, 0xfe, 0xe8, 0x94, 0xde, 0x70, 0x7d, 0x4f,
+	  0x12, 0x0b, 0x5c, 0x68, 0x93, 0xea, 0x04, 0x00,
+	  0x29, 0x7d, 0x0b, 0xb8, 0x34, 0x72, 0x76, 0x32 },
+	{ 0x59, 0xdc, 0x78, 0xb1, 0x05, 0x64, 0x97, 0x07,
+	  0xa2, 0xbb, 0x44, 0x19, 0xc4, 0x8f, 0x00, 0x54,
+	  0x00, 0xd3, 0x97, 0x3d, 0xe3, 0x73, 0x66, 0x10,
+	  0x23, 0x04, 0x35, 0xb1, 0x04, 0x24, 0xb2, 0x4f },
+	{ 0xc0, 0x14, 0x9d, 0x1d, 0x7e, 0x7a, 0x63, 0x53,
+	  0xa6, 0xd9, 0x06, 0xef, 0xe7, 0x28, 0xf2, 0xf3,
+	  0x29, 0xfe, 0x14, 0xa4, 0x14, 0x9a, 0x3e, 0xa7,
+	  0x76, 0x09, 0xbc, 0x42, 0xb9, 0x75, 0xdd, 0xfa },
+	{ 0xa3, 0x2f, 0x24, 0x14, 0x74, 0xa6, 0xc1, 0x69,
+	  0x32, 0xe9, 0x24, 0x3b, 0xe0, 0xcf, 0x09, 0xbc,
+	  0xdc, 0x7e, 0x0c, 0xa0, 0xe7, 0xa6, 0xa1, 0xb9,
+	  0xb1, 0xa0, 0xf0, 0x1e, 0x41, 0x50, 0x23, 0x77 },
+	{ 0xb2, 0x39, 0xb2, 0xe4, 0xf8, 0x18, 0x41, 0x36,
+	  0x1c, 0x13, 0x39, 0xf6, 0x8e, 0x2c, 0x35, 0x9f,
+	  0x92, 0x9a, 0xf9, 0xad, 0x9f, 0x34, 0xe0, 0x1a,
+	  0xab, 0x46, 0x31, 0xad, 0x6d, 0x55, 0x00, 0xb0 },
+	{ 0x85, 0xfb, 0x41, 0x9c, 0x70, 0x02, 0xa3, 0xe0,
+	  0xb4, 0xb6, 0xea, 0x09, 0x3b, 0x4c, 0x1a, 0xc6,
+	  0x93, 0x66, 0x45, 0xb6, 0x5d, 0xac, 0x5a, 0xc1,
+	  0x5a, 0x85, 0x28, 0xb7, 0xb9, 0x4c, 0x17, 0x54 },
+	{ 0x96, 0x19, 0x72, 0x06, 0x25, 0xf1, 0x90, 0xb9,
+	  0x3a, 0x3f, 0xad, 0x18, 0x6a, 0xb3, 0x14, 0x18,
+	  0x96, 0x33, 0xc0, 0xd3, 0xa0, 0x1e, 0x6f, 0x9b,
+	  0xc8, 0xc4, 0xa8, 0xf8, 0x2f, 0x38, 0x3d, 0xbf },
+	{ 0x7d, 0x62, 0x0d, 0x90, 0xfe, 0x69, 0xfa, 0x46,
+	  0x9a, 0x65, 0x38, 0x38, 0x89, 0x70, 0xa1, 0xaa,
+	  0x09, 0xbb, 0x48, 0xa2, 0xd5, 0x9b, 0x34, 0x7b,
+	  0x97, 0xe8, 0xce, 0x71, 0xf4, 0x8c, 0x7f, 0x46 },
+	{ 0x29, 0x43, 0x83, 0x56, 0x85, 0x96, 0xfb, 0x37,
+	  0xc7, 0x5b, 0xba, 0xcd, 0x97, 0x9c, 0x5f, 0xf6,
+	  0xf2, 0x0a, 0x55, 0x6b, 0xf8, 0x87, 0x9c, 0xc7,
+	  0x29, 0x24, 0x85, 0x5d, 0xf9, 0xb8, 0x24, 0x0e },
+	{ 0x16, 0xb1, 0x8a, 0xb3, 0x14, 0x35, 0x9c, 0x2b,
+	  0x83, 0x3c, 0x1c, 0x69, 0x86, 0xd4, 0x8c, 0x55,
+	  0xa9, 0xfc, 0x97, 0xcd, 0xe9, 0xa3, 0xc1, 0xf1,
+	  0x0a, 0x31, 0x77, 0x14, 0x0f, 0x73, 0xf7, 0x38 },
+	{ 0x8c, 0xbb, 0xdd, 0x14, 0xbc, 0x33, 0xf0, 0x4c,
+	  0xf4, 0x58, 0x13, 0xe4, 0xa1, 0x53, 0xa2, 0x73,
+	  0xd3, 0x6a, 0xda, 0xd5, 0xce, 0x71, 0xf4, 0x99,
+	  0xee, 0xb8, 0x7f, 0xb8, 0xac, 0x63, 0xb7, 0x29 },
+	{ 0x69, 0xc9, 0xa4, 0x98, 0xdb, 0x17, 0x4e, 0xca,
+	  0xef, 0xcc, 0x5a, 0x3a, 0xc9, 0xfd, 0xed, 0xf0,
+	  0xf8, 0x13, 0xa5, 0xbe, 0xc7, 0x27, 0xf1, 0xe7,
+	  0x75, 0xba, 0xbd, 0xec, 0x77, 0x18, 0x81, 0x6e },
+	{ 0xb4, 0x62, 0xc3, 0xbe, 0x40, 0x44, 0x8f, 0x1d,
+	  0x4f, 0x80, 0x62, 0x62, 0x54, 0xe5, 0x35, 0xb0,
+	  0x8b, 0xc9, 0xcd, 0xcf, 0xf5, 0x99, 0xa7, 0x68,
+	  0x57, 0x8d, 0x4b, 0x28, 0x81, 0xa8, 0xe3, 0xf0 },
+	{ 0x55, 0x3e, 0x9d, 0x9c, 0x5f, 0x36, 0x0a, 0xc0,
+	  0xb7, 0x4a, 0x7d, 0x44, 0xe5, 0xa3, 0x91, 0xda,
+	  0xd4, 0xce, 0xd0, 0x3e, 0x0c, 0x24, 0x18, 0x3b,
+	  0x7e, 0x8e, 0xca, 0xbd, 0xf1, 0x71, 0x5a, 0x64 },
+	{ 0x7a, 0x7c, 0x55, 0xa5, 0x6f, 0xa9, 0xae, 0x51,
+	  0xe6, 0x55, 0xe0, 0x19, 0x75, 0xd8, 0xa6, 0xff,
+	  0x4a, 0xe9, 0xe4, 0xb4, 0x86, 0xfc, 0xbe, 0x4e,
+	  0xac, 0x04, 0x45, 0x88, 0xf2, 0x45, 0xeb, 0xea },
+	{ 0x2a, 0xfd, 0xf3, 0xc8, 0x2a, 0xbc, 0x48, 0x67,
+	  0xf5, 0xde, 0x11, 0x12, 0x86, 0xc2, 0xb3, 0xbe,
+	  0x7d, 0x6e, 0x48, 0x65, 0x7b, 0xa9, 0x23, 0xcf,
+	  0xbf, 0x10, 0x1a, 0x6d, 0xfc, 0xf9, 0xdb, 0x9a },
+	{ 0x41, 0x03, 0x7d, 0x2e, 0xdc, 0xdc, 0xe0, 0xc4,
+	  0x9b, 0x7f, 0xb4, 0xa6, 0xaa, 0x09, 0x99, 0xca,
+	  0x66, 0x97, 0x6c, 0x74, 0x83, 0xaf, 0xe6, 0x31,
+	  0xd4, 0xed, 0xa2, 0x83, 0x14, 0x4f, 0x6d, 0xfc },
+	{ 0xc4, 0x46, 0x6f, 0x84, 0x97, 0xca, 0x2e, 0xeb,
+	  0x45, 0x83, 0xa0, 0xb0, 0x8e, 0x9d, 0x9a, 0xc7,
+	  0x43, 0x95, 0x70, 0x9f, 0xda, 0x10, 0x9d, 0x24,
+	  0xf2, 0xe4, 0x46, 0x21, 0x96, 0x77, 0x9c, 0x5d },
+	{ 0x75, 0xf6, 0x09, 0x33, 0x8a, 0xa6, 0x7d, 0x96,
+	  0x9a, 0x2a, 0xe2, 0xa2, 0x36, 0x2b, 0x2d, 0xa9,
+	  0xd7, 0x7c, 0x69, 0x5d, 0xfd, 0x1d, 0xf7, 0x22,
+	  0x4a, 0x69, 0x01, 0xdb, 0x93, 0x2c, 0x33, 0x64 },
+	{ 0x68, 0x60, 0x6c, 0xeb, 0x98, 0x9d, 0x54, 0x88,
+	  0xfc, 0x7c, 0xf6, 0x49, 0xf3, 0xd7, 0xc2, 0x72,
+	  0xef, 0x05, 0x5d, 0xa1, 0xa9, 0x3f, 0xae, 0xcd,
+	  0x55, 0xfe, 0x06, 0xf6, 0x96, 0x70, 0x98, 0xca },
+	{ 0x44, 0x34, 0x6b, 0xde, 0xb7, 0xe0, 0x52, 0xf6,
+	  0x25, 0x50, 0x48, 0xf0, 0xd9, 0xb4, 0x2c, 0x42,
+	  0x5b, 0xab, 0x9c, 0x3d, 0xd2, 0x41, 0x68, 0x21,
+	  0x2c, 0x3e, 0xcf, 0x1e, 0xbf, 0x34, 0xe6, 0xae },
+	{ 0x8e, 0x9c, 0xf6, 0xe1, 0xf3, 0x66, 0x47, 0x1f,
+	  0x2a, 0xc7, 0xd2, 0xee, 0x9b, 0x5e, 0x62, 0x66,
+	  0xfd, 0xa7, 0x1f, 0x8f, 0x2e, 0x41, 0x09, 0xf2,
+	  0x23, 0x7e, 0xd5, 0xf8, 0x81, 0x3f, 0xc7, 0x18 },
+	{ 0x84, 0xbb, 0xeb, 0x84, 0x06, 0xd2, 0x50, 0x95,
+	  0x1f, 0x8c, 0x1b, 0x3e, 0x86, 0xa7, 0xc0, 0x10,
+	  0x08, 0x29, 0x21, 0x83, 0x3d, 0xfd, 0x95, 0x55,
+	  0xa2, 0xf9, 0x09, 0xb1, 0x08, 0x6e, 0xb4, 0xb8 },
+	{ 0xee, 0x66, 0x6f, 0x3e, 0xef, 0x0f, 0x7e, 0x2a,
+	  0x9c, 0x22, 0x29, 0x58, 0xc9, 0x7e, 0xaf, 0x35,
+	  0xf5, 0x1c, 0xed, 0x39, 0x3d, 0x71, 0x44, 0x85,
+	  0xab, 0x09, 0xa0, 0x69, 0x34, 0x0f, 0xdf, 0x88 },
+	{ 0xc1, 0x53, 0xd3, 0x4a, 0x65, 0xc4, 0x7b, 0x4a,
+	  0x62, 0xc5, 0xca, 0xcf, 0x24, 0x01, 0x09, 0x75,
+	  0xd0, 0x35, 0x6b, 0x2f, 0x32, 0xc8, 0xf5, 0xda,
+	  0x53, 0x0d, 0x33, 0x88, 0x16, 0xad, 0x5d, 0xe6 },
+	{ 0x9f, 0xc5, 0x45, 0x01, 0x09, 0xe1, 0xb7, 0x79,
+	  0xf6, 0xc7, 0xae, 0x79, 0xd5, 0x6c, 0x27, 0x63,
+	  0x5c, 0x8d, 0xd4, 0x26, 0xc5, 0xa9, 0xd5, 0x4e,
+	  0x25, 0x78, 0xdb, 0x98, 0x9b, 0x8c, 0x3b, 0x4e },
+	{ 0xd1, 0x2b, 0xf3, 0x73, 0x2e, 0xf4, 0xaf, 0x5c,
+	  0x22, 0xfa, 0x90, 0x35, 0x6a, 0xf8, 0xfc, 0x50,
+	  0xfc, 0xb4, 0x0f, 0x8f, 0x2e, 0xa5, 0xc8, 0x59,
+	  0x47, 0x37, 0xa3, 0xb3, 0xd5, 0xab, 0xdb, 0xd7 },
+	{ 0x11, 0x03, 0x0b, 0x92, 0x89, 0xbb, 0xa5, 0xaf,
+	  0x65, 0x26, 0x06, 0x72, 0xab, 0x6f, 0xee, 0x88,
+	  0xb8, 0x74, 0x20, 0xac, 0xef, 0x4a, 0x17, 0x89,
+	  0xa2, 0x07, 0x3b, 0x7e, 0xc2, 0xf2, 0xa0, 0x9e },
+	{ 0x69, 0xcb, 0x19, 0x2b, 0x84, 0x44, 0x00, 0x5c,
+	  0x8c, 0x0c, 0xeb, 0x12, 0xc8, 0x46, 0x86, 0x07,
+	  0x68, 0x18, 0x8c, 0xda, 0x0a, 0xec, 0x27, 0xa9,
+	  0xc8, 0xa5, 0x5c, 0xde, 0xe2, 0x12, 0x36, 0x32 },
+	{ 0xdb, 0x44, 0x4c, 0x15, 0x59, 0x7b, 0x5f, 0x1a,
+	  0x03, 0xd1, 0xf9, 0xed, 0xd1, 0x6e, 0x4a, 0x9f,
+	  0x43, 0xa6, 0x67, 0xcc, 0x27, 0x51, 0x75, 0xdf,
+	  0xa2, 0xb7, 0x04, 0xe3, 0xbb, 0x1a, 0x9b, 0x83 },
+	{ 0x3f, 0xb7, 0x35, 0x06, 0x1a, 0xbc, 0x51, 0x9d,
+	  0xfe, 0x97, 0x9e, 0x54, 0xc1, 0xee, 0x5b, 0xfa,
+	  0xd0, 0xa9, 0xd8, 0x58, 0xb3, 0x31, 0x5b, 0xad,
+	  0x34, 0xbd, 0xe9, 0x99, 0xef, 0xd7, 0x24, 0xdd }
+};
+
+bool __init blake2s_selftest(void)
+{
+	u8 key[BLAKE2S_KEYBYTES];
+	u8 buf[ARRAY_SIZE(blake2s_testvecs)];
+	u8 hash[BLAKE2S_OUTBYTES];
+	size_t i;
+	bool success = true;
+
+	for (i = 0; i < BLAKE2S_KEYBYTES; ++i)
+		key[i] = (u8)i;
+
+	for (i = 0; i < ARRAY_SIZE(blake2s_testvecs); ++i)
+		buf[i] = (u8)i;
+
+	for (i = 0; i < ARRAY_SIZE(blake2s_keyed_testvecs); ++i) {
+		blake2s(hash, buf, key, BLAKE2S_OUTBYTES, i, BLAKE2S_KEYBYTES);
+		if (memcmp(hash, blake2s_keyed_testvecs[i], BLAKE2S_OUTBYTES)) {
+			pr_info("blake2s keyed self-test %zu: FAIL\n", i + 1);
+			success = false;
+		}
+	}
+
+	for (i = 0; i < ARRAY_SIZE(blake2s_testvecs); ++i) {
+		blake2s(hash, buf, NULL, BLAKE2S_OUTBYTES, i, 0);
+		if (memcmp(hash, blake2s_testvecs[i], BLAKE2S_OUTBYTES)) {
+			pr_info("blake2s unkeyed self-test %zu: FAIL\n", i + i);
+			success = false;
+		}
+	}
+
+	if (success)
+		pr_info("blake2s self-tests: pass\n");
+	return success;
+}
+#endif
-- 
2.19.0

^ permalink raw reply related

* Re: [PATCH net] pppoe: fix reception of frames with no mac header
From: Alexander Potapenko @ 2018-09-14 16:21 UTC (permalink / raw)
  To: g.nault; +Cc: Networking, mostrows, Eric Dumazet
In-Reply-To: <20180914143459.GC1507@alphalink.fr>

On Fri, Sep 14, 2018 at 4:35 PM Guillaume Nault <g.nault@alphalink.fr> wrote:
>
> On Fri, Sep 14, 2018 at 04:28:05PM +0200, Guillaume Nault wrote:
> > pppoe_rcv() needs to look back at the Ethernet header in order to
> > lookup the PPPoE session. Therefore we need to ensure that the mac
> > header is big enough to contain an Ethernet header. Otherwise
> > eth_hdr(skb)->h_source might access invalid data.
> >
> Forgot to Cc Alexander :/
> Sorry...
> BTW, thanks for your first analysis.
Thank you!



-- 
Alexander Potapenko
Software Engineer

Google Germany GmbH
Erika-Mann-Straße, 33
80636 München

Geschäftsführer: Paul Manicle, Halimah DeLaine Prado
Registergericht und -nummer: Hamburg, HRB 86891
Sitz der Gesellschaft: Hamburg

^ permalink raw reply

* Re: Project Financing
From: Gabriel Walker @ 2018-09-14 16:13 UTC (permalink / raw)


Thank you for your time,

We are looking for clients in your country with good business or project that requires financing to execute.

Do get back to me if you are interested in this or you know anybody who has good business ideas but lack the necessary capital to fund his projects so we can establish working relationship.

Sincerely,
 
John Hanan, MBA, CFA
General Investment Consultant 

^ permalink raw reply

* Re: [PATCH] net/mlx4_core: print firmware version during driver loading
From: David Miller @ 2018-09-14 21:14 UTC (permalink / raw)
  To: andrew; +Cc: qing.huang, leon, netdev, linux-rdma, linux-kernel, tariqt
In-Reply-To: <20180914181718.GD3811@lunn.ch>

From: Andrew Lunn <andrew@lunn.ch>
Date: Fri, 14 Sep 2018 20:17:18 +0200

> On Fri, Sep 14, 2018 at 10:15:48AM -0700, Qing Huang wrote:
>> The FW version is actually a very crucial piece of information and only
>> printed once here
>> when the driver is loaded. People tend to get confused when switching
>> multiple FW files
>> back and forth without running separate utility tools, especially at
>> customer sites.
>> IMHO, this information is very useful and only takes up very little log file
>> space. :-)
> 
> Why not use ethtool -i ?
> 
> $ sudo ethtool -i eth0
> driver: r8169
> version: 2.3LK-NAPI
> firmware-version: rtl8168g-2_0.0.1 02/06/13

+1

^ permalink raw reply

* Re: [PATCH] net/mlx4_core: print firmware version during driver loading
From: David Miller @ 2018-09-14 21:14 UTC (permalink / raw)
  To: qing.huang; +Cc: andrew, leon, netdev, linux-rdma, linux-kernel, tariqt
In-Reply-To: <c332f10a-5ad0-53fd-5111-1964443b3092@oracle.com>

From: Qing Huang <qing.huang@oracle.com>
Date: Fri, 14 Sep 2018 11:33:40 -0700

> 
> 
> On 9/14/2018 11:17 AM, Andrew Lunn wrote:
>> On Fri, Sep 14, 2018 at 10:15:48AM -0700, Qing Huang wrote:
>>> The FW version is actually a very crucial piece of information and
>>> only
>>> printed once here
>>> when the driver is loaded. People tend to get confused when switching
>>> multiple FW files
>>> back and forth without running separate utility tools, especially at
>>> customer sites.
>>> IMHO, this information is very useful and only takes up very little
>>> log file
>>> space. :-)
>> Why not use ethtool -i ?
>>
>> $ sudo ethtool -i eth0
>> driver: r8169
>> version: 2.3LK-NAPI
>> firmware-version: rtl8168g-2_0.0.1 02/06/13
>>
>>      Andrew
> Sure. You can also use ibstat or ibv_devinfo tool if they are
> installed. But it's not very
> convenient in some cases.
> 
> E.g.
> A customer upgrades FW on HCAs and encounters issues. During triage,
> it's much easier
> to study customer uploaded log files when remotely testing different
> FW files.

Not a valid argument.  You can print the ethtool output from initramfs
if necessary for triage.

I still stand by the fact that ethtool is the only fully reliable way
to obtain this information, the kernel log is not.

^ permalink raw reply

* Re: [PATCH] net/mlx4_core: print firmware version during driver loading
From: David Miller @ 2018-09-14 21:09 UTC (permalink / raw)
  To: qing.huang; +Cc: leon, netdev, linux-rdma, linux-kernel, tariqt
In-Reply-To: <c580ad9d-b63d-743b-2278-1c4cf3553186@oracle.com>

From: Qing Huang <qing.huang@oracle.com>
Date: Fri, 14 Sep 2018 10:15:48 -0700

> IMHO, this information is very useful and only takes up very little
> log file space. :-)

If it's critical then the log is the wrong place for it as the log
is lossy.

The proper place to obtain this information is via the fw_version
field of the ethtool_drvinfo struct.  This can be obtained at any time
and is reliable.  And if it isn't reliable or correct, we must fix
that.

^ permalink raw reply

* Re: [PATCH net-next] cxgb4: update supported DCB version
From: David Miller @ 2018-09-14 15:52 UTC (permalink / raw)
  To: ganeshgr; +Cc: netdev, nirranjan, indranil, dt, varun
In-Reply-To: <1536926755-29242-1-git-send-email-ganeshgr@chelsio.com>

From: Ganesh Goudar <ganeshgr@chelsio.com>
Date: Fri, 14 Sep 2018 17:35:55 +0530

> - In CXGB4_DCB_STATE_FW_INCOMPLETE state check if the dcb
>   version is changed and update the dcb supported version.
> 
> - Also, fill the priority code point value for priority
>   based flow control.
> 
> Signed-off-by: Ganesh Goudar <ganeshgr@chelsio.com>

Applied, thank you.

^ permalink raw reply

* Re: [PATCH net] net/sched: act_sample: fix NULL dereference in the data path
From: David Miller @ 2018-09-14 15:47 UTC (permalink / raw)
  To: dcaratti; +Cc: mcroce, jhs, xiyou.wangcong, jiri, netdev
In-Reply-To: <2fcef6665503c5e3b17676daf45c4166cf130cdb.1536919144.git.dcaratti@redhat.com>

From: Davide Caratti <dcaratti@redhat.com>
Date: Fri, 14 Sep 2018 12:03:18 +0200

> Matteo reported the following splat, testing the datapath of TC 'sample':
 ...
> tcf_sample_act() tried to update its per-cpu stats, but tcf_sample_init()
> forgot to allocate them, because tcf_idr_create() was called with a wrong
> value of 'cpustats'. Setting it to true proved to fix the reported crash.
> 
> Reported-by: Matteo Croce <mcroce@redhat.com>
> Fixes: 65a206c01e8e ("net/sched: Change act_api and act_xxx modules to use IDR")
> Fixes: 5c5670fae430 ("net/sched: Introduce sample tc action")
> Tested-by: Matteo Croce <mcroce@redhat.com>
> Signed-off-by: Davide Caratti <dcaratti@redhat.com>

Applied and queued up for -stable, thanks.

^ permalink raw reply

* [RFC PATCH 4/4] selftests: add GRO support, fix port option processing
From: Paolo Abeni @ 2018-09-14 15:43 UTC (permalink / raw)
  To: netdev; +Cc: David S. Miller, Willem de Bruijn, Steffen Klassert
In-Reply-To: <cover.1536939423.git.pabeni@redhat.com>

Not a full test-case yet, but allows triggering the UDP GSO code
path.

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 tools/testing/selftests/net/udpgso_bench_rx.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/net/udpgso_bench_rx.c b/tools/testing/selftests/net/udpgso_bench_rx.c
index 727cf67a3f75..f8bb7ea6bd25 100644
--- a/tools/testing/selftests/net/udpgso_bench_rx.c
+++ b/tools/testing/selftests/net/udpgso_bench_rx.c
@@ -31,9 +31,14 @@
 #include <sys/wait.h>
 #include <unistd.h>
 
+#ifndef UDP_SEGMENT
+#define UDP_SEGMENT		103
+#endif
+
 static int  cfg_port		= 8000;
 static bool cfg_tcp;
 static bool cfg_verify;
+static bool cfg_gro_segment;
 
 static bool interrupted;
 static unsigned long packets, bytes;
@@ -199,10 +204,13 @@ static void parse_opts(int argc, char **argv)
 {
 	int c;
 
-	while ((c = getopt(argc, argv, "ptv")) != -1) {
+	while ((c = getopt(argc, argv, "p:Stv")) != -1) {
 		switch (c) {
 		case 'p':
-			cfg_port = htons(strtoul(optarg, NULL, 0));
+			cfg_port = strtoul(optarg, NULL, 0);
+			break;
+		case 'S':
+			cfg_gro_segment = true;
 			break;
 		case 't':
 			cfg_tcp = true;
@@ -227,6 +235,12 @@ static void do_recv(void)
 
 	fd = do_socket(cfg_tcp);
 
+	if (cfg_gro_segment) {
+		int val = 1;
+		if (setsockopt(fd, IPPROTO_UDP, UDP_SEGMENT, &val, sizeof(val)))
+			error(1, errno, "setsockopt UDP_SEGMENT");
+	}
+
 	treport = gettimeofday_ms() + 1000;
 	do {
 		do_poll(fd);
-- 
2.17.1

^ permalink raw reply related

* [RFC PATCH 3/4] udp: implement GRO plain UDP sockets.
From: Paolo Abeni @ 2018-09-14 15:43 UTC (permalink / raw)
  To: netdev; +Cc: David S. Miller, Willem de Bruijn, Steffen Klassert
In-Reply-To: <cover.1536939423.git.pabeni@redhat.com>

This is the RX counter part of commit bec1f6f69736 ("udp: generate gso
with UDP_SEGMENT"). When UDP_SEGMENT is enabled, such socket is also
eligible for GRO in the rx path: UDP segments directed to such socket
are assembled into a larger GSO_UDP_L4 packet.

The core UDP GRO support is enabled/updated on setsockopt(UDP_SEGMENT) and
disabled, if needed at socket destruction time.

Initial benchmark numbers:

Before:
udp rx:   1079 MB/s   769065 calls/s

After:
udp rx:   1466 MB/s    24877 calls/s

This change introduces a side effect in respect to UDP tunnels:
after an UDP tunnel creation, now the kernel performs a lookup per ingress UDP
packet, before such lookup happended only if the ingress packet carried a valid
internal header csum.

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/udp.h    |   2 +-
 net/ipv4/udp.c         |   1 +
 net/ipv4/udp_offload.c | 107 +++++++++++++++++++++++++++++++++--------
 net/ipv6/udp_offload.c |   6 +--
 4 files changed, 90 insertions(+), 26 deletions(-)

diff --git a/include/linux/udp.h b/include/linux/udp.h
index 56a321a55ba1..27dea956ef6e 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -128,7 +128,7 @@ static inline bool udp_get_gro_in_use(struct sock *sk)
 
 static inline bool udp_want_gro(struct sock *sk)
 {
-	return udp_sk(sk)->gro_receive;
+	return udp_sk(sk)->gro_receive || udp_sk(sk)->gso_size;
 }
 
 #define udp_portaddr_for_each_entry(__sk, list) \
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 5ac794230013..871ee55afd96 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2450,6 +2450,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
 		if (val < 0 || val > USHRT_MAX)
 			return -EINVAL;
 		up->gso_size = val;
+		udp_update_gro_in_use(sk, udp_want_gro(sk));
 		break;
 
 	/*
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 08b225adf763..4ff150bb84de 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -347,6 +347,54 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
 	return segs;
 }
 
+#define UDO_GRO_CNT_MAX 64
+static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
+					       struct sk_buff *skb)
+{
+	struct udphdr *uh = udp_hdr(skb);
+	struct sk_buff *pp = NULL;
+	struct udphdr *uh2;
+	struct sk_buff *p;
+
+	/* requires non zero csum, for simmetry with GSO */
+	if (!uh->check) {
+		NAPI_GRO_CB(skb)->flush = 1;
+		return NULL;
+	}
+
+	/* pull encapsulating udp header */
+	skb_gro_pull(skb, sizeof(struct udphdr));
+	skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr));
+
+	list_for_each_entry(p, head, list) {
+		if (!NAPI_GRO_CB(p)->same_flow)
+			continue;
+
+		uh2 = udp_hdr(p);
+
+		/* Match ports only, as csum is always non zero */
+		if ((*(u32 *)&uh->source != *(u32 *)&uh2->source)) {
+			NAPI_GRO_CB(p)->same_flow = 0;
+			continue;
+		}
+
+		/* Terminate the flow on len mismatch or if it grow "too much".
+		 * Under small packet flood GRO count could elsewhere grow a lot
+		 * leading to execessive truesize values
+		 */
+		if (!skb_gro_receive(p, skb) &&
+		    NAPI_GRO_CB(p)->count > UDO_GRO_CNT_MAX)
+			pp = p;
+		else if (uh->len != uh2->len)
+			pp = p;
+
+		return pp;
+	}
+
+	/* mismatch, but we never need to flush */
+	return NULL;
+}
+
 struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
 				struct udphdr *uh, udp_lookup_t lookup)
 {
@@ -357,23 +405,29 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
 	int flush = 1;
 	struct sock *sk;
 
+	rcu_read_lock();
+	sk = (*lookup)(skb, uh->source, uh->dest);
+	if (!sk)
+		goto out_unlock;
+
+	if (udp_sk(sk)->gso_size) {
+		pp = call_gro_receive(udp_gro_receive_segment, head, skb);
+		rcu_read_unlock();
+		return pp;
+	}
+
 	if (NAPI_GRO_CB(skb)->encap_mark ||
 	    (skb->ip_summed != CHECKSUM_PARTIAL &&
 	     NAPI_GRO_CB(skb)->csum_cnt == 0 &&
 	     !NAPI_GRO_CB(skb)->csum_valid))
-		goto out;
+		goto out_unlock;
 
 	/* mark that this skb passed once through the tunnel gro layer */
 	NAPI_GRO_CB(skb)->encap_mark = 1;
 
-	rcu_read_lock();
-	sk = (*lookup)(skb, uh->source, uh->dest);
-
-	if (sk && udp_sk(sk)->gro_receive)
-		goto unflush;
-	goto out_unlock;
+	if (!udp_sk(sk)->gro_receive)
+		goto out_unlock;
 
-unflush:
 	flush = 0;
 
 	list_for_each_entry(p, head, list) {
@@ -398,7 +452,6 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
 
 out_unlock:
 	rcu_read_unlock();
-out:
 	skb_gro_flush_final(skb, pp, flush);
 	return pp;
 }
@@ -431,6 +484,19 @@ static struct sk_buff *udp4_gro_receive(struct list_head *head,
 	return NULL;
 }
 
+static int udp_gro_complete_segment(struct sk_buff *skb)
+{
+	struct udphdr *uh = udp_hdr(skb);
+
+	skb->csum_start = (unsigned char *)uh - skb->head;
+	skb->csum_offset = offsetof(struct udphdr, check);
+	skb->ip_summed = CHECKSUM_PARTIAL;
+
+	skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
+	skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_L4;
+	return 0;
+}
+
 int udp_gro_complete(struct sk_buff *skb, int nhoff,
 		     udp_lookup_t lookup)
 {
@@ -441,16 +507,21 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff,
 
 	uh->len = newlen;
 
-	/* Set encapsulation before calling into inner gro_complete() functions
-	 * to make them set up the inner offsets.
-	 */
-	skb->encapsulation = 1;
-
 	rcu_read_lock();
 	sk = (*lookup)(skb, uh->source, uh->dest);
-	if (sk && udp_sk(sk)->gro_complete)
+	if (sk && udp_sk(sk)->gso_size) {
+		err = udp_gro_complete_segment(skb);
+	} else if (sk && udp_sk(sk)->gro_complete) {
+		skb_shinfo(skb)->gso_type = uh->check ? SKB_GSO_UDP_TUNNEL_CSUM
+					: SKB_GSO_UDP_TUNNEL;
+
+		/* Set encapsulation before calling into inner gro_complete()
+		 * functions to make them set up the inner offsets.
+		 */
+		skb->encapsulation = 1;
 		err = udp_sk(sk)->gro_complete(sk, skb,
 				nhoff + sizeof(struct udphdr));
+	}
 	rcu_read_unlock();
 
 	if (skb->remcsum_offload)
@@ -465,13 +536,9 @@ static int udp4_gro_complete(struct sk_buff *skb, int nhoff)
 	const struct iphdr *iph = ip_hdr(skb);
 	struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
 
-	if (uh->check) {
-		skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM;
+	if (uh->check)
 		uh->check = ~udp_v4_check(skb->len - nhoff, iph->saddr,
 					  iph->daddr, 0);
-	} else {
-		skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL;
-	}
 
 	return udp_gro_complete(skb, nhoff, udp4_lib_lookup_skb);
 }
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index 1df968a3e788..f2731b7b4c75 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -147,13 +147,9 @@ static int udp6_gro_complete(struct sk_buff *skb, int nhoff)
 	const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
 	struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
 
-	if (uh->check) {
-		skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM;
+	if (uh->check)
 		uh->check = ~udp_v6_check(skb->len - nhoff, &ipv6h->saddr,
 					  &ipv6h->daddr, 0);
-	} else {
-		skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL;
-	}
 
 	return udp_gro_complete(skb, nhoff, udp6_lib_lookup_skb);
 }
-- 
2.17.1

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox