[PATCH 2/3] crypto: x86/crc32c - access 32-bit arguments as 32-bit

linux-crypto.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

From: Eric Biggers <ebiggers@kernel.org>
To: linux-crypto@vger.kernel.org
Cc: x86@kernel.org, linux-kernel@vger.kernel.org,
	Ard Biesheuvel <ardb@kernel.org>,
	Josh Poimboeuf <jpoimboe@kernel.org>,
	Peter Zijlstra <peterz@infradead.org>
Subject: [PATCH 2/3] crypto: x86/crc32c - access 32-bit arguments as 32-bit
Date: Sun, 13 Oct 2024 21:24:46 -0700	[thread overview]
Message-ID: <20241014042447.50197-3-ebiggers@kernel.org> (raw)
In-Reply-To: <20241014042447.50197-1-ebiggers@kernel.org>

From: Eric Biggers <ebiggers@google.com>

Fix crc32c-pcl-intel-asm_64.S to access 32-bit arguments as 32-bit
values instead of 64-bit, since the upper bits of the corresponding
64-bit registers are not guaranteed to be zero.  Also update the type of
the length argument to be unsigned int rather than int, as the assembly
code treats it as unsigned.

Note: there haven't been any reports of this bug actually causing
incorrect behavior.  Neither gcc nor clang guarantee zero-extension to
64 bits, but zero-extension is likely to happen in practice because most
instructions that operate on 32-bit registers zero-extend to 64 bits.

Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 arch/x86/crypto/crc32c-intel_glue.c       |  2 +-
 arch/x86/crypto/crc32c-pcl-intel-asm_64.S | 57 +++++++++++------------
 2 files changed, 27 insertions(+), 32 deletions(-)

diff --git a/arch/x86/crypto/crc32c-intel_glue.c b/arch/x86/crypto/crc32c-intel_glue.c
index feccb5254c7e5..52c5d47ef5a14 100644
--- a/arch/x86/crypto/crc32c-intel_glue.c
+++ b/arch/x86/crypto/crc32c-intel_glue.c
@@ -39,11 +39,11 @@
  * size is >= 512 to account
  * for fpu state save/restore overhead.
  */
 #define CRC32C_PCL_BREAKEVEN	512
 
-asmlinkage unsigned int crc_pcl(const u8 *buffer, int len,
+asmlinkage unsigned int crc_pcl(const u8 *buffer, unsigned int len,
 				unsigned int crc_init);
 #endif /* CONFIG_X86_64 */
 
 static u32 crc32c_intel_le_hw_byte(u32 crc, unsigned char const *data, size_t length)
 {
diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
index 466cea4943963..bbf860e90951d 100644
--- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
+++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
@@ -58,11 +58,11 @@
 
 # Define threshold below which buffers are considered "small" and routed to
 # regular CRC code that does not interleave the CRC instructions.
 #define SMALL_SIZE 200
 
-# unsigned int crc_pcl(u8 *buffer, int len, unsigned int crc_init);
+# unsigned int crc_pcl(const u8 *buffer, unsigned int len, unsigned int crc_init);
 
 .text
 SYM_FUNC_START(crc_pcl)
 #define    bufp		rdi
 #define    bufp_dw	%edi
@@ -70,18 +70,15 @@ SYM_FUNC_START(crc_pcl)
 #define    bufp_b	%dil
 #define    bufptmp	%rcx
 #define    block_0	%rcx
 #define    block_1	%rdx
 #define    block_2	%r11
-#define    len		%rsi
-#define    len_dw	%esi
-#define    len_w	%si
-#define    len_b	%sil
-#define    crc_init_arg %rdx
+#define    len		%esi
+#define    crc_init_arg %edx
 #define    tmp		%rbx
-#define    crc_init	%r8
-#define    crc_init_dw	%r8d
+#define    crc_init	%r8d
+#define    crc_init_q	%r8
 #define    crc1		%r9
 #define    crc2		%r10
 
 	pushq   %rbx
 	pushq   %rdi
@@ -105,13 +102,13 @@ SYM_FUNC_START(crc_pcl)
 .Ldo_align:
 	#### Calculate CRC of unaligned bytes of the buffer (if any)
 	movq    (bufptmp), tmp		# load a quadward from the buffer
 	add     %bufp, bufptmp		# align buffer pointer for quadword
 					# processing
-	sub     %bufp, len		# update buffer length
+	sub	bufp_dw, len		# update buffer length
 .Lalign_loop:
-	crc32b  %bl, crc_init_dw 	# compute crc32 of 1-byte
+	crc32b	%bl, crc_init		# compute crc32 of 1-byte
 	shr     $8, tmp			# get next byte
 	dec     %bufp
 	jne     .Lalign_loop
 
 .Lproc_block:
@@ -119,19 +116,18 @@ SYM_FUNC_START(crc_pcl)
 	################################################################
 	## 2) PROCESS  BLOCKS:
 	################################################################
 
 	## compute num of bytes to be processed
-	movq    len, tmp		# save num bytes in tmp
 
-	cmpq    $128*24, len
+	cmp	$128*24, len
 	jae     .Lfull_block
 
 .Lcontinue_block:
 	## len < 128*24
 	movq    $2731, %rax		# 2731 = ceil(2^16 / 24)
-	mul     len_dw
+	mul	len
 	shrq    $16, %rax
 
 	## eax contains floor(bytes / 24) = num 24-byte chunks to do
 
 	## process rax 24-byte chunks (128 >= rax >= 0)
@@ -174,21 +170,21 @@ SYM_FUNC_START(crc_pcl)
 .rept 128-1
 .altmacro
 LABEL crc_ %i
 .noaltmacro
 	ENDBR
-	crc32q   -i*8(block_0), crc_init
+	crc32q   -i*8(block_0), crc_init_q
 	crc32q   -i*8(block_1), crc1
 	crc32q   -i*8(block_2), crc2
 	i=(i-1)
 .endr
 
 .altmacro
 LABEL crc_ %i
 .noaltmacro
 	ENDBR
-	crc32q   -i*8(block_0), crc_init
+	crc32q   -i*8(block_0), crc_init_q
 	crc32q   -i*8(block_1), crc1
 # SKIP  crc32  -i*8(block_2), crc2 ; Don't do this one yet
 
 	mov     block_2, block_0
 
@@ -198,66 +194,65 @@ LABEL crc_ %i
 
 	lea	(K_table-8)(%rip), %bufp		# first entry is for idx 1
 	shlq    $3, %rax			# rax *= 8
 	pmovzxdq (%bufp,%rax), %xmm0		# 2 consts: K1:K2
 	leal	(%eax,%eax,2), %eax		# rax *= 3 (total *24)
-	subq    %rax, tmp			# tmp -= rax*24
+	sub	%eax, len			# len -= rax*24
 
-	movq    crc_init, %xmm1			# CRC for block 1
+	movq	crc_init_q, %xmm1		# CRC for block 1
 	pclmulqdq $0x00, %xmm0, %xmm1		# Multiply by K2
 
 	movq    crc1, %xmm2			# CRC for block 2
 	pclmulqdq $0x10, %xmm0, %xmm2		# Multiply by K1
 
 	pxor    %xmm2,%xmm1
 	movq    %xmm1, %rax
 	xor     -i*8(block_2), %rax
-	mov     crc2, crc_init
-	crc32   %rax, crc_init
+	mov	crc2, crc_init_q
+	crc32	%rax, crc_init_q
 
 	################################################################
 	## 5) Check for end:
 	################################################################
 
 LABEL crc_ 0
 	ENDBR
-	mov     tmp, len
-	cmp     $128*24, tmp
+	cmp	$128*24, len
 	jae     .Lfull_block
-	cmp	$SMALL_SIZE, tmp
+	cmp	$SMALL_SIZE, len
 	jae     .Lcontinue_block
 
 	#######################################################################
 	## 6) Process any remainder without interleaving:
 	#######################################################################
 .Lsmall:
 	test	len, len
 	jz	.Ldone
-	mov	len_dw, %eax
+	mov	len, %eax
 	shr	$3, %eax
 	jz	.Ldo_dword
 .Ldo_qwords:
-	crc32q	(bufptmp), crc_init
+	crc32q	(bufptmp), crc_init_q
 	add	$8, bufptmp
 	dec	%eax
 	jnz	.Ldo_qwords
 .Ldo_dword:
-	test	$4, len_dw
+	test	$4, len
 	jz	.Ldo_word
-	crc32l	(bufptmp), crc_init_dw
+	crc32l	(bufptmp), crc_init
 	add	$4, bufptmp
 .Ldo_word:
-	test	$2, len_dw
+	test	$2, len
 	jz	.Ldo_byte
-	crc32w	(bufptmp), crc_init_dw
+	crc32w	(bufptmp), crc_init
 	add	$2, bufptmp
 .Ldo_byte:
-	test	$1, len_dw
+	test	$1, len
 	jz	.Ldone
-	crc32b	(bufptmp), crc_init_dw
+	crc32b	(bufptmp), crc_init
 .Ldone:
-	movq    crc_init, %rax
+	mov	crc_init, %eax
 	popq    %rsi
 	popq    %rdi
 	popq    %rbx
         RET
 SYM_FUNC_END(crc_pcl)
-- 
2.47.0

next prev parent reply	other threads:[~2024-10-14  4:25 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-10-14  4:24 [PATCH 0/3] crypto: x86/crc32c - jump table elimination and other cleanups Eric Biggers
2024-10-14  4:24 ` [PATCH 1/3] crypto: x86/crc32c - simplify code for handling fewer than 200 bytes Eric Biggers
2024-10-14  4:24 ` Eric Biggers [this message]
2024-10-14  4:24 ` [PATCH 3/3] crypto: x86/crc32c - eliminate jump table and excessive unrolling Eric Biggers
2024-10-14 16:30   ` David Laight
2024-10-14 19:01     ` Eric Biggers
2024-10-14 22:32       ` David Laight
2024-10-14 23:59         ` Eric Biggers
2024-10-15 10:55 ` [PATCH 0/3] crypto: x86/crc32c - jump table elimination and other cleanups Ard Biesheuvel
2024-10-26  6:53 ` Herbert Xu

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:feccb5254c7e dfblob:52c5d47ef5a1 dfblob:466cea494396
dfblob:bbf860e90951 )
 OR (
bs:"[PATCH 2/3] crypto: x86/crc32c - access 32-bit arguments as 32-bit" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20241014042447.50197-3-ebiggers@kernel.org \
    --to=ebiggers@kernel.org \
    --cc=ardb@kernel.org \
    --cc=jpoimboe@kernel.org \
    --cc=linux-crypto@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=peterz@infradead.org \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).