From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
To: Yury Norov <yury.norov@gmail.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>,
Mark Rutland <mark.rutland@arm.com>,
Will Deacon <will@kernel.org>,
Linux Kernel Mailing List <linux-kernel@vger.kernel.org>,
Linus Torvalds <torvalds@linux-foundation.org>,
linux-arm-kernel@lists.infradead.org
Subject: [PATCH 4/5] ARM: findbit: operate by words
Date: Fri, 28 Oct 2022 17:48:08 +0100 [thread overview]
Message-ID: <E1ooSWe-000FEG-G7@rmk-PC.armlinux.org.uk> (raw)
In-Reply-To: <Y1wHlSE0S5QZ+QCI@shell.armlinux.org.uk>
Convert the implementations to operate on words rather than bytes
which makes bitmap searching faster.
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
---
arch/arm/include/asm/assembler.h | 6 +++
arch/arm/lib/findbit.S | 78 ++++++++++++++++++--------------
2 files changed, 50 insertions(+), 34 deletions(-)
diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 90fbe4a3f9c8..28e18f79c300 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -761,6 +761,12 @@ THUMB( orr \reg , \reg , #PSR_T_BIT )
.endif
.endm
+ .if __LINUX_ARM_ARCH__ < 6
+ .set .Lrev_l_uses_tmp, 1
+ .else
+ .set .Lrev_l_uses_tmp, 0
+ .endif
+
/*
* bl_r - branch and link to register
*
diff --git a/arch/arm/lib/findbit.S b/arch/arm/lib/findbit.S
index 8280f66d38a5..6ec584d16d46 100644
--- a/arch/arm/lib/findbit.S
+++ b/arch/arm/lib/findbit.S
@@ -14,32 +14,32 @@
#include <asm/assembler.h>
.text
+#ifdef __ARMEB__
+#define SWAB_ENDIAN le
+#else
+#define SWAB_ENDIAN be
+#endif
+
.macro find_first, endian, set, name
ENTRY(_find_first_\name\()bit_\endian)
teq r1, #0
beq 3f
mov r2, #0
-1:
- .ifc \endian, be
- eor r3, r2, #0x18
- ARM( ldrb r3, [r0, r3, lsr #3] )
- THUMB( lsr r3, #3 )
- THUMB( ldrb r3, [r0, r3] )
+1: ldr r3, [r0], #4
+ .ifeq \set
+ mvns r3, r3 @ invert/test bits
.else
- ARM( ldrb r3, [r0, r2, lsr #3] )
- THUMB( lsr r3, r2, #3 )
- THUMB( ldrb r3, [r0, r3] )
+ movs r3, r3 @ test bits
.endif
- .ifeq \set
- eors r3, r3, #0xff @ invert bits
+ .ifc \endian, SWAB_ENDIAN
+ bne .L_found_swab
.else
- movs r3, r3
+ bne .L_found @ found the bit?
.endif
- bne .L_found @ any now set - found zero bit
- add r2, r2, #8 @ next bit pointer
+ add r2, r2, #32 @ next index
2: cmp r2, r1 @ any more?
blo 1b
-3: mov r0, r1 @ no free bits
+3: mov r0, r1 @ no more bits
ret lr
ENDPROC(_find_first_\name\()bit_\endian)
.endm
@@ -48,24 +48,25 @@ ENDPROC(_find_first_\name\()bit_\endian)
ENTRY(_find_next_\name\()bit_\endian)
cmp r2, r1
bhs 3b
- ands ip, r2, #7
- beq 1b @ If new byte, goto old routine
- .ifc \endian, be
- eor r3, r2, #0x18
- ARM( ldrb r3, [r0, r3, lsr #3] )
- THUMB( lsr r3, #3 )
- THUMB( ldrb r3, [r0, r3] )
- .else
- ARM( ldrb r3, [r0, r2, lsr #3] )
- THUMB( lsr r3, r2, #3 )
- THUMB( ldrb r3, [r0, r3] )
+ mov ip, r2, lsr #5 @ word index
+ add r0, r0, ip, lsl #2
+ ands ip, r2, #31 @ bit position
+ beq 1b
+ ldr r3, [r0], #4
+ .ifeq \set
+ mvn r3, r3 @ invert bits
+ .endif
+ .ifc \endian, SWAB_ENDIAN
+ rev_l r3, ip
+ .if .Lrev_l_uses_tmp
+ @ we need to recompute ip because rev_l will have overwritten
+ @ it.
+ and ip, r2, #31 @ bit position
.endif
- .ifeq \set
- eor r3, r3, #0xff @ now looking for a 1 bit
.endif
movs r3, r3, lsr ip @ shift off unused bits
bne .L_found
- orr r2, r2, #7 @ if zero, then no bits here
+ orr r2, r2, #31 @ no zero bits
add r2, r2, #1 @ align bit pointer
b 2b @ loop for next bit
ENDPROC(_find_next_\name\()bit_\endian)
@@ -95,6 +96,8 @@ ENDPROC(_find_next_\name\()bit_\endian)
/*
* One or more bits in the LSB of r3 are assumed to be set.
*/
+.L_found_swab:
+ rev_l r3, ip
.L_found:
#if __LINUX_ARM_ARCH__ >= 7
rbit r3, r3 @ reverse bits
@@ -107,13 +110,20 @@ ENDPROC(_find_next_\name\()bit_\endian)
rsb r3, r3, #31 @ offset of first set bit
add r0, r2, r3 @ add offset of first set bit
#else
- tst r3, #0x0f
+ mov ip, #~0
+ tst r3, ip, lsr #16 @ test bits 0-15
+ addeq r2, r2, #16
+ moveq r3, r3, lsr #16
+ tst r3, #0x00ff
+ addeq r2, r2, #8
+ moveq r3, r3, lsr #8
+ tst r3, #0x000f
addeq r2, r2, #4
- movne r3, r3, lsl #4
- tst r3, #0x30
+ moveq r3, r3, lsr #4
+ tst r3, #0x0003
addeq r2, r2, #2
- movne r3, r3, lsl #2
- tst r3, #0x40
+ moveq r3, r3, lsr #2
+ tst r3, #0x0001
addeq r2, r2, #1
mov r0, r2
#endif
--
2.30.2
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
WARNING: multiple messages have this Message-ID (diff)
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
To: Yury Norov <yury.norov@gmail.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>,
Mark Rutland <mark.rutland@arm.com>,
Will Deacon <will@kernel.org>,
Linux Kernel Mailing List <linux-kernel@vger.kernel.org>,
Linus Torvalds <torvalds@linux-foundation.org>,
linux-arm-kernel@lists.infradead.org
Subject: [PATCH 4/5] ARM: findbit: operate by words
Date: Fri, 28 Oct 2022 17:48:08 +0100 [thread overview]
Message-ID: <E1ooSWe-000FEG-G7@rmk-PC.armlinux.org.uk> (raw)
In-Reply-To: <Y1wHlSE0S5QZ+QCI@shell.armlinux.org.uk>
Convert the implementations to operate on words rather than bytes
which makes bitmap searching faster.
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
---
arch/arm/include/asm/assembler.h | 6 +++
arch/arm/lib/findbit.S | 78 ++++++++++++++++++--------------
2 files changed, 50 insertions(+), 34 deletions(-)
diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 90fbe4a3f9c8..28e18f79c300 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -761,6 +761,12 @@ THUMB( orr \reg , \reg , #PSR_T_BIT )
.endif
.endm
+ .if __LINUX_ARM_ARCH__ < 6
+ .set .Lrev_l_uses_tmp, 1
+ .else
+ .set .Lrev_l_uses_tmp, 0
+ .endif
+
/*
* bl_r - branch and link to register
*
diff --git a/arch/arm/lib/findbit.S b/arch/arm/lib/findbit.S
index 8280f66d38a5..6ec584d16d46 100644
--- a/arch/arm/lib/findbit.S
+++ b/arch/arm/lib/findbit.S
@@ -14,32 +14,32 @@
#include <asm/assembler.h>
.text
+#ifdef __ARMEB__
+#define SWAB_ENDIAN le
+#else
+#define SWAB_ENDIAN be
+#endif
+
.macro find_first, endian, set, name
ENTRY(_find_first_\name\()bit_\endian)
teq r1, #0
beq 3f
mov r2, #0
-1:
- .ifc \endian, be
- eor r3, r2, #0x18
- ARM( ldrb r3, [r0, r3, lsr #3] )
- THUMB( lsr r3, #3 )
- THUMB( ldrb r3, [r0, r3] )
+1: ldr r3, [r0], #4
+ .ifeq \set
+ mvns r3, r3 @ invert/test bits
.else
- ARM( ldrb r3, [r0, r2, lsr #3] )
- THUMB( lsr r3, r2, #3 )
- THUMB( ldrb r3, [r0, r3] )
+ movs r3, r3 @ test bits
.endif
- .ifeq \set
- eors r3, r3, #0xff @ invert bits
+ .ifc \endian, SWAB_ENDIAN
+ bne .L_found_swab
.else
- movs r3, r3
+ bne .L_found @ found the bit?
.endif
- bne .L_found @ any now set - found zero bit
- add r2, r2, #8 @ next bit pointer
+ add r2, r2, #32 @ next index
2: cmp r2, r1 @ any more?
blo 1b
-3: mov r0, r1 @ no free bits
+3: mov r0, r1 @ no more bits
ret lr
ENDPROC(_find_first_\name\()bit_\endian)
.endm
@@ -48,24 +48,25 @@ ENDPROC(_find_first_\name\()bit_\endian)
ENTRY(_find_next_\name\()bit_\endian)
cmp r2, r1
bhs 3b
- ands ip, r2, #7
- beq 1b @ If new byte, goto old routine
- .ifc \endian, be
- eor r3, r2, #0x18
- ARM( ldrb r3, [r0, r3, lsr #3] )
- THUMB( lsr r3, #3 )
- THUMB( ldrb r3, [r0, r3] )
- .else
- ARM( ldrb r3, [r0, r2, lsr #3] )
- THUMB( lsr r3, r2, #3 )
- THUMB( ldrb r3, [r0, r3] )
+ mov ip, r2, lsr #5 @ word index
+ add r0, r0, ip, lsl #2
+ ands ip, r2, #31 @ bit position
+ beq 1b
+ ldr r3, [r0], #4
+ .ifeq \set
+ mvn r3, r3 @ invert bits
+ .endif
+ .ifc \endian, SWAB_ENDIAN
+ rev_l r3, ip
+ .if .Lrev_l_uses_tmp
+ @ we need to recompute ip because rev_l will have overwritten
+ @ it.
+ and ip, r2, #31 @ bit position
.endif
- .ifeq \set
- eor r3, r3, #0xff @ now looking for a 1 bit
.endif
movs r3, r3, lsr ip @ shift off unused bits
bne .L_found
- orr r2, r2, #7 @ if zero, then no bits here
+ orr r2, r2, #31 @ no zero bits
add r2, r2, #1 @ align bit pointer
b 2b @ loop for next bit
ENDPROC(_find_next_\name\()bit_\endian)
@@ -95,6 +96,8 @@ ENDPROC(_find_next_\name\()bit_\endian)
/*
* One or more bits in the LSB of r3 are assumed to be set.
*/
+.L_found_swab:
+ rev_l r3, ip
.L_found:
#if __LINUX_ARM_ARCH__ >= 7
rbit r3, r3 @ reverse bits
@@ -107,13 +110,20 @@ ENDPROC(_find_next_\name\()bit_\endian)
rsb r3, r3, #31 @ offset of first set bit
add r0, r2, r3 @ add offset of first set bit
#else
- tst r3, #0x0f
+ mov ip, #~0
+ tst r3, ip, lsr #16 @ test bits 0-15
+ addeq r2, r2, #16
+ moveq r3, r3, lsr #16
+ tst r3, #0x00ff
+ addeq r2, r2, #8
+ moveq r3, r3, lsr #8
+ tst r3, #0x000f
addeq r2, r2, #4
- movne r3, r3, lsl #4
- tst r3, #0x30
+ moveq r3, r3, lsr #4
+ tst r3, #0x0003
addeq r2, r2, #2
- movne r3, r3, lsl #2
- tst r3, #0x40
+ moveq r3, r3, lsr #2
+ tst r3, #0x0001
addeq r2, r2, #1
mov r0, r2
#endif
--
2.30.2
next prev parent reply other threads:[~2022-10-28 16:52 UTC|newest]
Thread overview: 28+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-10-28 16:47 [PATCH 0/5] ARM: findbit assembly updates Russell King (Oracle)
2022-10-28 16:47 ` Russell King (Oracle)
2022-10-28 16:47 ` [PATCH 1/5] ARM: findbit: document ARMv5 bit offset calculation Russell King (Oracle)
2022-10-28 16:47 ` Russell King (Oracle)
2022-10-28 17:05 ` Linus Torvalds
2022-10-28 17:05 ` Linus Torvalds
2022-10-28 17:45 ` Russell King (Oracle)
2022-10-28 17:45 ` Russell King (Oracle)
2022-10-28 18:37 ` Yury Norov
2022-10-28 18:37 ` Yury Norov
2022-10-28 19:42 ` Russell King (Oracle)
2022-10-28 19:42 ` Russell King (Oracle)
2022-10-28 19:01 ` Linus Torvalds
2022-10-28 19:01 ` Linus Torvalds
2022-10-28 19:10 ` Linus Torvalds
2022-10-28 19:10 ` Linus Torvalds
2022-10-28 19:46 ` Russell King (Oracle)
2022-10-28 19:46 ` Russell King (Oracle)
2022-10-28 20:26 ` Linus Torvalds
2022-10-28 20:26 ` Linus Torvalds
2022-10-28 16:47 ` [PATCH 2/5] ARM: findbit: provide more efficient ARMv7 implementation Russell King (Oracle)
2022-10-28 16:47 ` Russell King (Oracle)
2022-10-28 16:48 ` [PATCH 3/5] ARM: findbit: convert to macros Russell King (Oracle)
2022-10-28 16:48 ` Russell King (Oracle)
2022-10-28 16:48 ` Russell King (Oracle) [this message]
2022-10-28 16:48 ` [PATCH 4/5] ARM: findbit: operate by words Russell King (Oracle)
2022-10-28 16:48 ` [PATCH 5/5] ARM: findbit: add unwinder information Russell King (Oracle)
2022-10-28 16:48 ` Russell King (Oracle)
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=E1ooSWe-000FEG-G7@rmk-PC.armlinux.org.uk \
--to=rmk+kernel@armlinux.org.uk \
--cc=catalin.marinas@arm.com \
--cc=linux-arm-kernel@lists.infradead.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mark.rutland@arm.com \
--cc=torvalds@linux-foundation.org \
--cc=will@kernel.org \
--cc=yury.norov@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.