[PATCH] lib/crc: arm64: Assume a little-endian kernel

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Eric Biggers <ebiggers@kernel.org>
To: linux-kernel@vger.kernel.org
Cc: linux-crypto@vger.kernel.org, Ard Biesheuvel <ardb@kernel.org>,
	linux-arm-kernel@lists.infradead.org,
	Eric Biggers <ebiggers@kernel.org>
Subject: [PATCH] lib/crc: arm64: Assume a little-endian kernel
Date: Tue, 31 Mar 2026 17:44:31 -0700	[thread overview]
Message-ID: <20260401004431.151432-1-ebiggers@kernel.org> (raw)

Since support for big-endian arm64 kernels was removed, the CPU_LE()
macro now unconditionally emits the code it is passed, and the CPU_BE()
macro now unconditionally discards the code it is passed.

Simplify the assembly code in lib/crc/arm64/ accordingly.

Signed-off-by: Eric Biggers <ebiggers@kernel.org>
---

This patch is targeting crc-next

 lib/crc/arm64/crc-t10dif-core.S | 56 ++++++++++++++++-----------------
 lib/crc/arm64/crc32-core.S      |  9 ++----
 2 files changed, 30 insertions(+), 35 deletions(-)

diff --git a/lib/crc/arm64/crc-t10dif-core.S b/lib/crc/arm64/crc-t10dif-core.S
index 87dd6d46224d8..71388466825b9 100644
--- a/lib/crc/arm64/crc-t10dif-core.S
+++ b/lib/crc/arm64/crc-t10dif-core.S
@@ -179,17 +179,17 @@ SYM_FUNC_END(__pmull_p8_16x64)
 	.macro		fold_32_bytes, p, reg1, reg2
 	ldp		q11, q12, [buf], #0x20
 
 	pmull16x64_\p	fold_consts, \reg1, v8
 
-CPU_LE(	rev64		v11.16b, v11.16b		)
-CPU_LE(	rev64		v12.16b, v12.16b		)
+	rev64		v11.16b, v11.16b
+	rev64		v12.16b, v12.16b
 
 	pmull16x64_\p	fold_consts, \reg2, v9
 
-CPU_LE(	ext		v11.16b, v11.16b, v11.16b, #8	)
-CPU_LE(	ext		v12.16b, v12.16b, v12.16b, #8	)
+	ext		v11.16b, v11.16b, v11.16b, #8
+	ext		v12.16b, v12.16b, v12.16b, #8
 
 	eor		\reg1\().16b, \reg1\().16b, v8.16b
 	eor		\reg2\().16b, \reg2\().16b, v9.16b
 	eor		\reg1\().16b, \reg1\().16b, v11.16b
 	eor		\reg2\().16b, \reg2\().16b, v12.16b
@@ -218,26 +218,26 @@ CPU_LE(	ext		v12.16b, v12.16b, v12.16b, #8	)
 	ldp		q0, q1, [buf]
 	ldp		q2, q3, [buf, #0x20]
 	ldp		q4, q5, [buf, #0x40]
 	ldp		q6, q7, [buf, #0x60]
 	add		buf, buf, #0x80
-CPU_LE(	rev64		v0.16b, v0.16b			)
-CPU_LE(	rev64		v1.16b, v1.16b			)
-CPU_LE(	rev64		v2.16b, v2.16b			)
-CPU_LE(	rev64		v3.16b, v3.16b			)
-CPU_LE(	rev64		v4.16b, v4.16b			)
-CPU_LE(	rev64		v5.16b, v5.16b			)
-CPU_LE(	rev64		v6.16b, v6.16b			)
-CPU_LE(	rev64		v7.16b, v7.16b			)
-CPU_LE(	ext		v0.16b, v0.16b, v0.16b, #8	)
-CPU_LE(	ext		v1.16b, v1.16b, v1.16b, #8	)
-CPU_LE(	ext		v2.16b, v2.16b, v2.16b, #8	)
-CPU_LE(	ext		v3.16b, v3.16b, v3.16b, #8	)
-CPU_LE(	ext		v4.16b, v4.16b, v4.16b, #8	)
-CPU_LE(	ext		v5.16b, v5.16b, v5.16b, #8	)
-CPU_LE(	ext		v6.16b, v6.16b, v6.16b, #8	)
-CPU_LE(	ext		v7.16b, v7.16b, v7.16b, #8	)
+	rev64		v0.16b, v0.16b
+	rev64		v1.16b, v1.16b
+	rev64		v2.16b, v2.16b
+	rev64		v3.16b, v3.16b
+	rev64		v4.16b, v4.16b
+	rev64		v5.16b, v5.16b
+	rev64		v6.16b, v6.16b
+	rev64		v7.16b, v7.16b
+	ext		v0.16b, v0.16b, v0.16b, #8
+	ext		v1.16b, v1.16b, v1.16b, #8
+	ext		v2.16b, v2.16b, v2.16b, #8
+	ext		v3.16b, v3.16b, v3.16b, #8
+	ext		v4.16b, v4.16b, v4.16b, #8
+	ext		v5.16b, v5.16b, v5.16b, #8
+	ext		v6.16b, v6.16b, v6.16b, #8
+	ext		v7.16b, v7.16b, v7.16b, #8
 
 	// XOR the first 16 data *bits* with the initial CRC value.
 	movi		v8.16b, #0
 	mov		v8.h[7], init_crc
 	eor		v0.16b, v0.16b, v8.16b
@@ -286,12 +286,12 @@ CPU_LE(	ext		v7.16b, v7.16b, v7.16b, #8	)
 	b.lt		.Lfold_16_bytes_loop_done_\@
 .Lfold_16_bytes_loop_\@:
 	pmull16x64_\p	fold_consts, v7, v8
 	eor		v7.16b, v7.16b, v8.16b
 	ldr		q0, [buf], #16
-CPU_LE(	rev64		v0.16b, v0.16b			)
-CPU_LE(	ext		v0.16b, v0.16b, v0.16b, #8	)
+	rev64		v0.16b, v0.16b
+	ext		v0.16b, v0.16b, v0.16b, #8
 	eor		v7.16b, v7.16b, v0.16b
 	subs		len, len, #16
 	b.ge		.Lfold_16_bytes_loop_\@
 
 .Lfold_16_bytes_loop_done_\@:
@@ -308,12 +308,12 @@ CPU_LE(	ext		v0.16b, v0.16b, v0.16b, #8	)
 	// chunk of 16 bytes, then fold the first chunk into the second.
 
 	// v0 = last 16 original data bytes
 	add		buf, buf, len
 	ldr		q0, [buf, #-16]
-CPU_LE(	rev64		v0.16b, v0.16b			)
-CPU_LE(	ext		v0.16b, v0.16b, v0.16b, #8	)
+	rev64		v0.16b, v0.16b
+	ext		v0.16b, v0.16b, v0.16b, #8
 
 	// v1 = high order part of second chunk: v7 left-shifted by 'len' bytes.
 	adr_l		x4, .Lbyteshift_table + 16
 	sub		x4, x4, len
 	ld1		{v2.16b}, [x4]
@@ -342,12 +342,12 @@ CPU_LE(	ext		v0.16b, v0.16b, v0.16b, #8	)
 
 	adr_l		fold_consts_ptr, .Lfold_across_16_bytes_consts
 
 	// Load the first 16 data bytes.
 	ldr		q7, [buf], #0x10
-CPU_LE(	rev64		v7.16b, v7.16b			)
-CPU_LE(	ext		v7.16b, v7.16b, v7.16b, #8	)
+	rev64		v7.16b, v7.16b
+	ext		v7.16b, v7.16b, v7.16b, #8
 
 	// XOR the first 16 data *bits* with the initial CRC value.
 	movi		v0.16b, #0
 	mov		v0.h[7], init_crc
 	eor		v7.16b, v7.16b, v0.16b
@@ -380,12 +380,12 @@ SYM_FUNC_START(crc_t10dif_pmull_p8)
 	zip1		perm.16b, perm.16b, perm.16b
 	zip1		perm.16b, perm.16b, perm.16b
 
 	crc_t10dif_pmull p8
 
-CPU_LE(	rev64		v7.16b, v7.16b			)
-CPU_LE(	ext		v7.16b, v7.16b, v7.16b, #8	)
+	rev64		v7.16b, v7.16b
+	ext		v7.16b, v7.16b, v7.16b, #8
 	str		q7, [x3]
 
 	frame_pop
 	ret
 SYM_FUNC_END(crc_t10dif_pmull_p8)
diff --git a/lib/crc/arm64/crc32-core.S b/lib/crc/arm64/crc32-core.S
index 68825317460fc..49d02cc485b3e 100644
--- a/lib/crc/arm64/crc32-core.S
+++ b/lib/crc/arm64/crc32-core.S
@@ -27,28 +27,23 @@
 	rbit		\reg, \reg
 	lsr		\reg, \reg, #24
 	.endm
 
 	.macro		hwordle, reg
-CPU_BE(	rev16		\reg, \reg	)
 	.endm
 
 	.macro		hwordbe, reg
-CPU_LE(	rev		\reg, \reg	)
+	rev		\reg, \reg
 	rbit		\reg, \reg
-CPU_BE(	lsr		\reg, \reg, #16	)
 	.endm
 
 	.macro		le, regs:vararg
-	.irp		r, \regs
-CPU_BE(	rev		\r, \r		)
-	.endr
 	.endm
 
 	.macro		be, regs:vararg
 	.irp		r, \regs
-CPU_LE(	rev		\r, \r		)
+	rev		\r, \r
 	.endr
 	.irp		r, \regs
 	rbit		\r, \r
 	.endr
 	.endm

base-commit: 63432fd625372a0e79fb00a4009af204f4edc013
-- 
2.53.0

next             reply	other threads:[~2026-04-01  0:44 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-04-01  0:44 Eric Biggers [this message]
2026-04-01  7:31 ` [PATCH] lib/crc: arm64: Assume a little-endian kernel Ard Biesheuvel
2026-04-02 23:14 ` Eric Biggers

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:87dd6d46224d dfblob:71388466825b dfblob:68825317460f
dfblob:49d02cc485b3 )
 OR (
bs:"[PATCH] lib/crc: arm64: Assume a little-endian kernel" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260401004431.151432-1-ebiggers@kernel.org \
    --to=ebiggers@kernel.org \
    --cc=ardb@kernel.org \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-crypto@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.