All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
To: Yury Norov <yury.norov@gmail.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>,
	Mark Rutland <mark.rutland@arm.com>,
	Will Deacon <will@kernel.org>,
	Linux Kernel Mailing List <linux-kernel@vger.kernel.org>,
	Linus Torvalds <torvalds@linux-foundation.org>,
	linux-arm-kernel@lists.infradead.org
Subject: [PATCH 4/5] ARM: findbit: operate by words
Date: Fri, 28 Oct 2022 17:48:08 +0100	[thread overview]
Message-ID: <E1ooSWe-000FEG-G7@rmk-PC.armlinux.org.uk> (raw)
In-Reply-To: <Y1wHlSE0S5QZ+QCI@shell.armlinux.org.uk>

Convert the implementations to operate on words rather than bytes
which makes bitmap searching faster.

Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
---
 arch/arm/include/asm/assembler.h |  6 +++
 arch/arm/lib/findbit.S           | 78 ++++++++++++++++++--------------
 2 files changed, 50 insertions(+), 34 deletions(-)

diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 90fbe4a3f9c8..28e18f79c300 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -761,6 +761,12 @@ THUMB(	orr	\reg , \reg , #PSR_T_BIT	)
 	.endif
 	.endm
 
+	.if		__LINUX_ARM_ARCH__ < 6
+	.set		.Lrev_l_uses_tmp, 1
+	.else
+	.set		.Lrev_l_uses_tmp, 0
+	.endif
+
 	/*
 	 * bl_r - branch and link to register
 	 *
diff --git a/arch/arm/lib/findbit.S b/arch/arm/lib/findbit.S
index 8280f66d38a5..6ec584d16d46 100644
--- a/arch/arm/lib/findbit.S
+++ b/arch/arm/lib/findbit.S
@@ -14,32 +14,32 @@
 #include <asm/assembler.h>
                 .text
 
+#ifdef __ARMEB__
+#define SWAB_ENDIAN le
+#else
+#define SWAB_ENDIAN be
+#endif
+
 		.macro	find_first, endian, set, name
 ENTRY(_find_first_\name\()bit_\endian)
 		teq	r1, #0
 		beq	3f
 		mov	r2, #0
-1:
-		.ifc	\endian, be
-		eor	r3, r2, #0x18
- ARM(		ldrb	r3, [r0, r3, lsr #3]	)
- THUMB(		lsr	r3, #3			)
- THUMB(		ldrb	r3, [r0, r3]		)
+1:		ldr	r3, [r0], #4
+		.ifeq \set
+		mvns	r3, r3			@ invert/test bits
 		.else
- ARM(		ldrb	r3, [r0, r2, lsr #3]	)
- THUMB(		lsr	r3, r2, #3		)
- THUMB(		ldrb	r3, [r0, r3]		)
+		movs	r3, r3			@ test bits
 		.endif
-		.ifeq	\set
-		eors	r3, r3, #0xff		@ invert bits
+		.ifc \endian, SWAB_ENDIAN
+		bne	.L_found_swab
 		.else
-		movs	r3, r3
+		bne	.L_found		@ found the bit?
 		.endif
-		bne	.L_found		@ any now set - found zero bit
-		add	r2, r2, #8		@ next bit pointer
+		add	r2, r2, #32		@ next index
 2:		cmp	r2, r1			@ any more?
 		blo	1b
-3:		mov	r0, r1			@ no free bits
+3:		mov	r0, r1			@ no more bits
 		ret	lr
 ENDPROC(_find_first_\name\()bit_\endian)
 		.endm
@@ -48,24 +48,25 @@ ENDPROC(_find_first_\name\()bit_\endian)
 ENTRY(_find_next_\name\()bit_\endian)
 		cmp	r2, r1
 		bhs	3b
-		ands	ip, r2, #7
-		beq	1b			@ If new byte, goto old routine
-		.ifc	\endian, be
-		eor	r3, r2, #0x18
- ARM(		ldrb	r3, [r0, r3, lsr #3]	)
- THUMB(		lsr	r3, #3			)
- THUMB(		ldrb	r3, [r0, r3]		)
-		.else
- ARM(		ldrb	r3, [r0, r2, lsr #3]	)
- THUMB(		lsr	r3, r2, #3		)
- THUMB(		ldrb	r3, [r0, r3]		)
+		mov	ip, r2, lsr #5		@ word index
+		add	r0, r0, ip, lsl #2
+		ands	ip, r2, #31		@ bit position
+		beq	1b
+		ldr	r3, [r0], #4
+		.ifeq \set
+		mvn	r3, r3			@ invert bits
+		.endif
+		.ifc \endian, SWAB_ENDIAN
+		rev_l	r3, ip
+		.if	.Lrev_l_uses_tmp
+		@ we need to recompute ip because rev_l will have overwritten
+		@ it.
+		and	ip, r2, #31		@ bit position
 		.endif
-		.ifeq	\set
-		eor	r3, r3, #0xff		@ now looking for a 1 bit
 		.endif
 		movs	r3, r3, lsr ip		@ shift off unused bits
 		bne	.L_found
-		orr	r2, r2, #7		@ if zero, then no bits here
+		orr	r2, r2, #31		@ no zero bits
 		add	r2, r2, #1		@ align bit pointer
 		b	2b			@ loop for next bit
 ENDPROC(_find_next_\name\()bit_\endian)
@@ -95,6 +96,8 @@ ENDPROC(_find_next_\name\()bit_\endian)
 /*
  * One or more bits in the LSB of r3 are assumed to be set.
  */
+.L_found_swab:
+		rev_l	r3, ip
 .L_found:
 #if __LINUX_ARM_ARCH__ >= 7
 		rbit	r3, r3			@ reverse bits
@@ -107,13 +110,20 @@ ENDPROC(_find_next_\name\()bit_\endian)
 		rsb	r3, r3, #31		@ offset of first set bit
 		add	r0, r2, r3		@ add offset of first set bit
 #else
-		tst	r3, #0x0f
+		mov	ip, #~0
+		tst	r3, ip, lsr #16		@ test bits 0-15
+		addeq	r2, r2, #16
+		moveq	r3, r3, lsr #16
+		tst	r3, #0x00ff
+		addeq	r2, r2, #8
+		moveq	r3, r3, lsr #8
+		tst	r3, #0x000f
 		addeq	r2, r2, #4
-		movne	r3, r3, lsl #4
-		tst	r3, #0x30
+		moveq	r3, r3, lsr #4
+		tst	r3, #0x0003
 		addeq	r2, r2, #2
-		movne	r3, r3, lsl #2
-		tst	r3, #0x40
+		moveq	r3, r3, lsr #2
+		tst	r3, #0x0001
 		addeq	r2, r2, #1
 		mov	r0, r2
 #endif
-- 
2.30.2


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

WARNING: multiple messages have this Message-ID (diff)
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
To: Yury Norov <yury.norov@gmail.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>,
	Mark Rutland <mark.rutland@arm.com>,
	Will Deacon <will@kernel.org>,
	Linux Kernel Mailing List <linux-kernel@vger.kernel.org>,
	Linus Torvalds <torvalds@linux-foundation.org>,
	linux-arm-kernel@lists.infradead.org
Subject: [PATCH 4/5] ARM: findbit: operate by words
Date: Fri, 28 Oct 2022 17:48:08 +0100	[thread overview]
Message-ID: <E1ooSWe-000FEG-G7@rmk-PC.armlinux.org.uk> (raw)
In-Reply-To: <Y1wHlSE0S5QZ+QCI@shell.armlinux.org.uk>

Convert the implementations to operate on words rather than bytes
which makes bitmap searching faster.

Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
---
 arch/arm/include/asm/assembler.h |  6 +++
 arch/arm/lib/findbit.S           | 78 ++++++++++++++++++--------------
 2 files changed, 50 insertions(+), 34 deletions(-)

diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 90fbe4a3f9c8..28e18f79c300 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -761,6 +761,12 @@ THUMB(	orr	\reg , \reg , #PSR_T_BIT	)
 	.endif
 	.endm
 
+	.if		__LINUX_ARM_ARCH__ < 6
+	.set		.Lrev_l_uses_tmp, 1
+	.else
+	.set		.Lrev_l_uses_tmp, 0
+	.endif
+
 	/*
 	 * bl_r - branch and link to register
 	 *
diff --git a/arch/arm/lib/findbit.S b/arch/arm/lib/findbit.S
index 8280f66d38a5..6ec584d16d46 100644
--- a/arch/arm/lib/findbit.S
+++ b/arch/arm/lib/findbit.S
@@ -14,32 +14,32 @@
 #include <asm/assembler.h>
                 .text
 
+#ifdef __ARMEB__
+#define SWAB_ENDIAN le
+#else
+#define SWAB_ENDIAN be
+#endif
+
 		.macro	find_first, endian, set, name
 ENTRY(_find_first_\name\()bit_\endian)
 		teq	r1, #0
 		beq	3f
 		mov	r2, #0
-1:
-		.ifc	\endian, be
-		eor	r3, r2, #0x18
- ARM(		ldrb	r3, [r0, r3, lsr #3]	)
- THUMB(		lsr	r3, #3			)
- THUMB(		ldrb	r3, [r0, r3]		)
+1:		ldr	r3, [r0], #4
+		.ifeq \set
+		mvns	r3, r3			@ invert/test bits
 		.else
- ARM(		ldrb	r3, [r0, r2, lsr #3]	)
- THUMB(		lsr	r3, r2, #3		)
- THUMB(		ldrb	r3, [r0, r3]		)
+		movs	r3, r3			@ test bits
 		.endif
-		.ifeq	\set
-		eors	r3, r3, #0xff		@ invert bits
+		.ifc \endian, SWAB_ENDIAN
+		bne	.L_found_swab
 		.else
-		movs	r3, r3
+		bne	.L_found		@ found the bit?
 		.endif
-		bne	.L_found		@ any now set - found zero bit
-		add	r2, r2, #8		@ next bit pointer
+		add	r2, r2, #32		@ next index
 2:		cmp	r2, r1			@ any more?
 		blo	1b
-3:		mov	r0, r1			@ no free bits
+3:		mov	r0, r1			@ no more bits
 		ret	lr
 ENDPROC(_find_first_\name\()bit_\endian)
 		.endm
@@ -48,24 +48,25 @@ ENDPROC(_find_first_\name\()bit_\endian)
 ENTRY(_find_next_\name\()bit_\endian)
 		cmp	r2, r1
 		bhs	3b
-		ands	ip, r2, #7
-		beq	1b			@ If new byte, goto old routine
-		.ifc	\endian, be
-		eor	r3, r2, #0x18
- ARM(		ldrb	r3, [r0, r3, lsr #3]	)
- THUMB(		lsr	r3, #3			)
- THUMB(		ldrb	r3, [r0, r3]		)
-		.else
- ARM(		ldrb	r3, [r0, r2, lsr #3]	)
- THUMB(		lsr	r3, r2, #3		)
- THUMB(		ldrb	r3, [r0, r3]		)
+		mov	ip, r2, lsr #5		@ word index
+		add	r0, r0, ip, lsl #2
+		ands	ip, r2, #31		@ bit position
+		beq	1b
+		ldr	r3, [r0], #4
+		.ifeq \set
+		mvn	r3, r3			@ invert bits
+		.endif
+		.ifc \endian, SWAB_ENDIAN
+		rev_l	r3, ip
+		.if	.Lrev_l_uses_tmp
+		@ we need to recompute ip because rev_l will have overwritten
+		@ it.
+		and	ip, r2, #31		@ bit position
 		.endif
-		.ifeq	\set
-		eor	r3, r3, #0xff		@ now looking for a 1 bit
 		.endif
 		movs	r3, r3, lsr ip		@ shift off unused bits
 		bne	.L_found
-		orr	r2, r2, #7		@ if zero, then no bits here
+		orr	r2, r2, #31		@ no zero bits
 		add	r2, r2, #1		@ align bit pointer
 		b	2b			@ loop for next bit
 ENDPROC(_find_next_\name\()bit_\endian)
@@ -95,6 +96,8 @@ ENDPROC(_find_next_\name\()bit_\endian)
 /*
  * One or more bits in the LSB of r3 are assumed to be set.
  */
+.L_found_swab:
+		rev_l	r3, ip
 .L_found:
 #if __LINUX_ARM_ARCH__ >= 7
 		rbit	r3, r3			@ reverse bits
@@ -107,13 +110,20 @@ ENDPROC(_find_next_\name\()bit_\endian)
 		rsb	r3, r3, #31		@ offset of first set bit
 		add	r0, r2, r3		@ add offset of first set bit
 #else
-		tst	r3, #0x0f
+		mov	ip, #~0
+		tst	r3, ip, lsr #16		@ test bits 0-15
+		addeq	r2, r2, #16
+		moveq	r3, r3, lsr #16
+		tst	r3, #0x00ff
+		addeq	r2, r2, #8
+		moveq	r3, r3, lsr #8
+		tst	r3, #0x000f
 		addeq	r2, r2, #4
-		movne	r3, r3, lsl #4
-		tst	r3, #0x30
+		moveq	r3, r3, lsr #4
+		tst	r3, #0x0003
 		addeq	r2, r2, #2
-		movne	r3, r3, lsl #2
-		tst	r3, #0x40
+		moveq	r3, r3, lsr #2
+		tst	r3, #0x0001
 		addeq	r2, r2, #1
 		mov	r0, r2
 #endif
-- 
2.30.2


  parent reply	other threads:[~2022-10-28 16:52 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-10-28 16:47 [PATCH 0/5] ARM: findbit assembly updates Russell King (Oracle)
2022-10-28 16:47 ` Russell King (Oracle)
2022-10-28 16:47 ` [PATCH 1/5] ARM: findbit: document ARMv5 bit offset calculation Russell King (Oracle)
2022-10-28 16:47   ` Russell King (Oracle)
2022-10-28 17:05   ` Linus Torvalds
2022-10-28 17:05     ` Linus Torvalds
2022-10-28 17:45     ` Russell King (Oracle)
2022-10-28 17:45       ` Russell King (Oracle)
2022-10-28 18:37       ` Yury Norov
2022-10-28 18:37         ` Yury Norov
2022-10-28 19:42         ` Russell King (Oracle)
2022-10-28 19:42           ` Russell King (Oracle)
2022-10-28 19:01       ` Linus Torvalds
2022-10-28 19:01         ` Linus Torvalds
2022-10-28 19:10         ` Linus Torvalds
2022-10-28 19:10           ` Linus Torvalds
2022-10-28 19:46         ` Russell King (Oracle)
2022-10-28 19:46           ` Russell King (Oracle)
2022-10-28 20:26           ` Linus Torvalds
2022-10-28 20:26             ` Linus Torvalds
2022-10-28 16:47 ` [PATCH 2/5] ARM: findbit: provide more efficient ARMv7 implementation Russell King (Oracle)
2022-10-28 16:47   ` Russell King (Oracle)
2022-10-28 16:48 ` [PATCH 3/5] ARM: findbit: convert to macros Russell King (Oracle)
2022-10-28 16:48   ` Russell King (Oracle)
2022-10-28 16:48 ` Russell King (Oracle) [this message]
2022-10-28 16:48   ` [PATCH 4/5] ARM: findbit: operate by words Russell King (Oracle)
2022-10-28 16:48 ` [PATCH 5/5] ARM: findbit: add unwinder information Russell King (Oracle)
2022-10-28 16:48   ` Russell King (Oracle)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=E1ooSWe-000FEG-G7@rmk-PC.armlinux.org.uk \
    --to=rmk+kernel@armlinux.org.uk \
    --cc=catalin.marinas@arm.com \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mark.rutland@arm.com \
    --cc=torvalds@linux-foundation.org \
    --cc=will@kernel.org \
    --cc=yury.norov@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.