public inbox for linux-arch@vger.kernel.org
 help / color / mirror / Atom feed
From: Yury Norov <yury.norov@gmail.com>
To: linux-kernel@vger.kernel.org, linux-arch@vger.kernel.org,
	Arnd Bergmann <arnd@arndb.de>,
	Andy Shevchenko <andriy.shevchenko@linux.intel.com>,
	Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Yury Norov <yury.norov@gmail.com>
Subject: [PATCH 2/3] bitmap: improve small_const case for find_next() functions
Date: Wed, 26 Oct 2022 21:38:09 -0700	[thread overview]
Message-ID: <20221027043810.350460-3-yury.norov@gmail.com> (raw)
In-Reply-To: <20221027043810.350460-1-yury.norov@gmail.com>

find_next_bit() and friends use small_const_nbits() to detect possibility
of compile-time optimization. It works well for nbits up to BITS_PER_LONG,
i.e. for 1-word bitmaps. When nbits belongs to 2nd, 3rd or any other word,
small_const_nbits() returns false.

But when both offset and size are within the same word boundary, we still
can make a small_const optimization because the whole business is just
fetching a single word, and masking head and tail bits if needed.

This patch adds a new small_const_nbits_off() macro doing that. It replaces
small_const_nbits() in find_next_bit() functions.

Signed-off-by: Yury Norov <yury.norov@gmail.com>
---
 include/asm-generic/bitsperlong.h | 12 +++++++++
 include/linux/find.h              | 45 ++++++++++++-------------------
 2 files changed, 29 insertions(+), 28 deletions(-)

diff --git a/include/asm-generic/bitsperlong.h b/include/asm-generic/bitsperlong.h
index 1023e2a4bd37..c294ff798154 100644
--- a/include/asm-generic/bitsperlong.h
+++ b/include/asm-generic/bitsperlong.h
@@ -35,4 +35,16 @@
 #define small_const_nbits(nbits) \
 	(__builtin_constant_p(nbits) && (nbits) <= BITS_PER_LONG && (nbits) > 0)
 
+/*
+ * small_const_nbits_off(nbits, off) is true precisely when it is known at
+ * compile-time that all bits in range [off, nbits) belong to the same word.
+ * Bitmaps of size 0 are very rare, and a compile-time-known-size 0 is most
+ * likely a sign of error. They will be handled correctly by the bit/bitmap
+ * APIs using the out-of-line functions, so that the inline implementations
+ * can unconditionally dereference the pointer(s).
+ */
+#define small_const_nbits_off(nbits, off) \
+	(__builtin_constant_p(nbits) && __builtin_constant_p(off) && (nbits) > 0 && \
+	 (nbits) > (off) && (off) / BITS_PER_LONG == ((nbits) - 1) / BITS_PER_LONG)
+
 #endif /* __ASM_GENERIC_BITS_PER_LONG */
diff --git a/include/linux/find.h b/include/linux/find.h
index db2f2851601d..df5c4d1adf4c 100644
--- a/include/linux/find.h
+++ b/include/linux/find.h
@@ -7,6 +7,7 @@
 #endif
 
 #include <linux/bitops.h>
+#include <linux/math.h>
 
 unsigned long _find_next_bit(const unsigned long *addr1, unsigned long nbits,
 				unsigned long start);
@@ -49,14 +50,11 @@ static __always_inline
 unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
 			    unsigned long offset)
 {
-	if (small_const_nbits(size)) {
-		unsigned long val;
+	if (small_const_nbits_off(size, offset)) {
+		unsigned long val = addr[offset/BITS_PER_LONG] &
+				    GENMASK((size - 1) % BITS_PER_LONG, offset % BITS_PER_LONG);
 
-		if (unlikely(offset >= size))
-			return size;
-
-		val = *addr & GENMASK(size - 1, offset);
-		return val ? __ffs(val) : size;
+		return val ? round_down(offset, BITS_PER_LONG) + __ffs(val) : size;
 	}
 
 	return _find_next_bit(addr, size, offset);
@@ -79,14 +77,11 @@ unsigned long find_next_and_bit(const unsigned long *addr1,
 		const unsigned long *addr2, unsigned long size,
 		unsigned long offset)
 {
-	if (small_const_nbits(size)) {
-		unsigned long val;
+	if (small_const_nbits_off(size, offset)) {
+		unsigned long val = addr1[offset/BITS_PER_LONG] & addr2[offset/BITS_PER_LONG] &
+				    GENMASK((size - 1) % BITS_PER_LONG, offset % BITS_PER_LONG);
 
-		if (unlikely(offset >= size))
-			return size;
-
-		val = *addr1 & *addr2 & GENMASK(size - 1, offset);
-		return val ? __ffs(val) : size;
+		return val ? round_down(offset, BITS_PER_LONG) + __ffs(val) : size;
 	}
 
 	return _find_next_and_bit(addr1, addr2, size, offset);
@@ -110,14 +105,11 @@ unsigned long find_next_andnot_bit(const unsigned long *addr1,
 		const unsigned long *addr2, unsigned long size,
 		unsigned long offset)
 {
-	if (small_const_nbits(size)) {
-		unsigned long val;
+	if (small_const_nbits_off(size, offset)) {
+		unsigned long val = addr1[offset/BITS_PER_LONG] & ~addr2[offset/BITS_PER_LONG] &
+				    GENMASK((size - 1) % BITS_PER_LONG, offset % BITS_PER_LONG);
 
-		if (unlikely(offset >= size))
-			return size;
-
-		val = *addr1 & ~*addr2 & GENMASK(size - 1, offset);
-		return val ? __ffs(val) : size;
+		return val ? round_down(offset, BITS_PER_LONG) + __ffs(val) : size;
 	}
 
 	return _find_next_andnot_bit(addr1, addr2, size, offset);
@@ -138,14 +130,11 @@ static __always_inline
 unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
 				 unsigned long offset)
 {
-	if (small_const_nbits(size)) {
-		unsigned long val;
+	if (small_const_nbits_off(size, offset)) {
+		unsigned long val = addr[offset/BITS_PER_LONG] |
+				    ~GENMASK((size - 1) % BITS_PER_LONG, offset % BITS_PER_LONG);
 
-		if (unlikely(offset >= size))
-			return size;
-
-		val = *addr | ~GENMASK(size - 1, offset);
-		return val == ~0UL ? size : ffz(val);
+		return val == ~0UL ? size : round_down(offset, BITS_PER_LONG) + ffz(val);
 	}
 
 	return _find_next_zero_bit(addr, size, offset);
-- 
2.34.1


  parent reply	other threads:[~2022-10-27  4:38 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-10-27  4:38 [PATCH 0/3] bitmap: optimize small_const path for Yury Norov
2022-10-27  4:38 ` [PATCH 1/3] bitmap: switch from inline to __always_inline Yury Norov
2022-10-27  4:38 ` Yury Norov [this message]
2022-10-27  4:38 ` [PATCH 3/3] bitmap: add tests for find_next_bit() Yury Norov
2022-11-03  2:09 ` [PATCH 0/3] bitmap: optimize small_const path for Yury Norov
2022-12-08  3:32   ` Yury Norov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20221027043810.350460-3-yury.norov@gmail.com \
    --to=yury.norov@gmail.com \
    --cc=andriy.shevchenko@linux.intel.com \
    --cc=arnd@arndb.de \
    --cc=linux-arch@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux@rasmusvillemoes.dk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox