All of lore.kernel.org
 help / color / mirror / Atom feed
From: Alexey Dobriyan <adobriyan@gmail.com>
To: akpm@linux-foundation.org
Cc: adobriyan@gmail.com, linux-kernel@vger.kernel.org,
	linux-fsdevel@vger.kernel.org, pmladek@suse.com,
	rostedt@goodmis.org, sergey.senozhatsky@gmail.com,
	andriy.shevchenko@linux.intel.com, linux@rasmusvillemoes.dk
Subject: [PATCH 14/15] print_integer, printf: rewrite the rest of lib/vsprintf.c via print_integer()
Date: Mon, 20 Apr 2020 23:57:42 +0300	[thread overview]
Message-ID: <20200420205743.19964-14-adobriyan@gmail.com> (raw)
In-Reply-To: <20200420205743.19964-1-adobriyan@gmail.com>

Lookup tables are cheating.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
---
 lib/vsprintf.c | 236 +++++--------------------------------------------
 1 file changed, 20 insertions(+), 216 deletions(-)

diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index df2b5ce08fe9..b29fbd0da53f 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -138,204 +138,6 @@ int skip_atoi(const char **s)
 	return i;
 }
 
-/*
- * Decimal conversion is by far the most typical, and is used for
- * /proc and /sys data. This directly impacts e.g. top performance
- * with many processes running. We optimize it for speed by emitting
- * two characters at a time, using a 200 byte lookup table. This
- * roughly halves the number of multiplications compared to computing
- * the digits one at a time. Implementation strongly inspired by the
- * previous version, which in turn used ideas described at
- * <http://www.cs.uiowa.edu/~jones/bcd/divide.html> (with permission
- * from the author, Douglas W. Jones).
- *
- * It turns out there is precisely one 26 bit fixed-point
- * approximation a of 64/100 for which x/100 == (x * (u64)a) >> 32
- * holds for all x in [0, 10^8-1], namely a = 0x28f5c29. The actual
- * range happens to be somewhat larger (x <= 1073741898), but that's
- * irrelevant for our purpose.
- *
- * For dividing a number in the range [10^4, 10^6-1] by 100, we still
- * need a 32x32->64 bit multiply, so we simply use the same constant.
- *
- * For dividing a number in the range [100, 10^4-1] by 100, there are
- * several options. The simplest is (x * 0x147b) >> 19, which is valid
- * for all x <= 43698.
- */
-
-static const u16 decpair[100] = {
-#define _(x) (__force u16) cpu_to_le16(((x % 10) | ((x / 10) << 8)) + 0x3030)
-	_( 0), _( 1), _( 2), _( 3), _( 4), _( 5), _( 6), _( 7), _( 8), _( 9),
-	_(10), _(11), _(12), _(13), _(14), _(15), _(16), _(17), _(18), _(19),
-	_(20), _(21), _(22), _(23), _(24), _(25), _(26), _(27), _(28), _(29),
-	_(30), _(31), _(32), _(33), _(34), _(35), _(36), _(37), _(38), _(39),
-	_(40), _(41), _(42), _(43), _(44), _(45), _(46), _(47), _(48), _(49),
-	_(50), _(51), _(52), _(53), _(54), _(55), _(56), _(57), _(58), _(59),
-	_(60), _(61), _(62), _(63), _(64), _(65), _(66), _(67), _(68), _(69),
-	_(70), _(71), _(72), _(73), _(74), _(75), _(76), _(77), _(78), _(79),
-	_(80), _(81), _(82), _(83), _(84), _(85), _(86), _(87), _(88), _(89),
-	_(90), _(91), _(92), _(93), _(94), _(95), _(96), _(97), _(98), _(99),
-#undef _
-};
-
-/*
- * This will print a single '0' even if r == 0, since we would
- * immediately jump to out_r where two 0s would be written but only
- * one of them accounted for in buf. This is needed by ip4_string
- * below. All other callers pass a non-zero value of r.
-*/
-static noinline_for_stack
-char *put_dec_trunc8(char *buf, unsigned r)
-{
-	unsigned q;
-
-	/* 1 <= r < 10^8 */
-	if (r < 100)
-		goto out_r;
-
-	/* 100 <= r < 10^8 */
-	q = (r * (u64)0x28f5c29) >> 32;
-	*((u16 *)buf) = decpair[r - 100*q];
-	buf += 2;
-
-	/* 1 <= q < 10^6 */
-	if (q < 100)
-		goto out_q;
-
-	/*  100 <= q < 10^6 */
-	r = (q * (u64)0x28f5c29) >> 32;
-	*((u16 *)buf) = decpair[q - 100*r];
-	buf += 2;
-
-	/* 1 <= r < 10^4 */
-	if (r < 100)
-		goto out_r;
-
-	/* 100 <= r < 10^4 */
-	q = (r * 0x147b) >> 19;
-	*((u16 *)buf) = decpair[r - 100*q];
-	buf += 2;
-out_q:
-	/* 1 <= q < 100 */
-	r = q;
-out_r:
-	/* 1 <= r < 100 */
-	*((u16 *)buf) = decpair[r];
-	buf += r < 10 ? 1 : 2;
-	return buf;
-}
-
-#if BITS_PER_LONG == 64 && BITS_PER_LONG_LONG == 64
-static noinline_for_stack
-char *put_dec_full8(char *buf, unsigned r)
-{
-	unsigned q;
-
-	/* 0 <= r < 10^8 */
-	q = (r * (u64)0x28f5c29) >> 32;
-	*((u16 *)buf) = decpair[r - 100*q];
-	buf += 2;
-
-	/* 0 <= q < 10^6 */
-	r = (q * (u64)0x28f5c29) >> 32;
-	*((u16 *)buf) = decpair[q - 100*r];
-	buf += 2;
-
-	/* 0 <= r < 10^4 */
-	q = (r * 0x147b) >> 19;
-	*((u16 *)buf) = decpair[r - 100*q];
-	buf += 2;
-
-	/* 0 <= q < 100 */
-	*((u16 *)buf) = decpair[q];
-	buf += 2;
-	return buf;
-}
-
-static noinline_for_stack
-char *put_dec(char *buf, unsigned long long n)
-{
-	if (n >= 100*1000*1000)
-		buf = put_dec_full8(buf, do_div(n, 100*1000*1000));
-	/* 1 <= n <= 1.6e11 */
-	if (n >= 100*1000*1000)
-		buf = put_dec_full8(buf, do_div(n, 100*1000*1000));
-	/* 1 <= n < 1e8 */
-	return put_dec_trunc8(buf, n);
-}
-
-#elif BITS_PER_LONG == 32 && BITS_PER_LONG_LONG == 64
-
-static void
-put_dec_full4(char *buf, unsigned r)
-{
-	unsigned q;
-
-	/* 0 <= r < 10^4 */
-	q = (r * 0x147b) >> 19;
-	*((u16 *)buf) = decpair[r - 100*q];
-	buf += 2;
-	/* 0 <= q < 100 */
-	*((u16 *)buf) = decpair[q];
-}
-
-/*
- * Call put_dec_full4 on x % 10000, return x / 10000.
- * The approximation x/10000 == (x * 0x346DC5D7) >> 43
- * holds for all x < 1,128,869,999.  The largest value this
- * helper will ever be asked to convert is 1,125,520,955.
- * (second call in the put_dec code, assuming n is all-ones).
- */
-static noinline_for_stack
-unsigned put_dec_helper4(char *buf, unsigned x)
-{
-        uint32_t q = (x * (uint64_t)0x346DC5D7) >> 43;
-
-        put_dec_full4(buf, x - q * 10000);
-        return q;
-}
-
-/* Based on code by Douglas W. Jones found at
- * <http://www.cs.uiowa.edu/~jones/bcd/decimal.html#sixtyfour>
- * (with permission from the author).
- * Performs no 64-bit division and hence should be fast on 32-bit machines.
- */
-static
-char *put_dec(char *buf, unsigned long long n)
-{
-	uint32_t d3, d2, d1, q, h;
-
-	if (n < 100*1000*1000)
-		return put_dec_trunc8(buf, n);
-
-	d1  = ((uint32_t)n >> 16); /* implicit "& 0xffff" */
-	h   = (n >> 32);
-	d2  = (h      ) & 0xffff;
-	d3  = (h >> 16); /* implicit "& 0xffff" */
-
-	/* n = 2^48 d3 + 2^32 d2 + 2^16 d1 + d0
-	     = 281_4749_7671_0656 d3 + 42_9496_7296 d2 + 6_5536 d1 + d0 */
-	q   = 656 * d3 + 7296 * d2 + 5536 * d1 + ((uint32_t)n & 0xffff);
-	q = put_dec_helper4(buf, q);
-
-	q += 7671 * d3 + 9496 * d2 + 6 * d1;
-	q = put_dec_helper4(buf+4, q);
-
-	q += 4749 * d3 + 42 * d2;
-	q = put_dec_helper4(buf+8, q);
-
-	q += 281 * d3;
-	buf += 12;
-	if (q)
-		buf = put_dec_trunc8(buf, q);
-	else while (buf[-1] == '0')
-		--buf;
-
-	return buf;
-}
-
-#endif
-
 /*
  * Convert passed number to decimal string.
  * Returns the length of string.  On buffer overflow, returns 0.
@@ -410,8 +212,6 @@ static noinline_for_stack
 char *number(char *buf, char *end, unsigned long long num,
 	     struct printf_spec spec)
 {
-	/* put_dec requires 2-byte alignment of the buffer. */
-	char tmp[3 * sizeof(num)] __aligned(2);
 	char sign;
 	char locase;
 	int need_pfx = ((spec.flags & SPECIAL) && spec.base != 10);
@@ -419,6 +219,8 @@ char *number(char *buf, char *end, unsigned long long num,
 	bool is_zero = num == 0LL;
 	int field_width = spec.field_width;
 	int precision = spec.precision;
+	char tmp[22];
+	char *p = tmp + sizeof(tmp);
 
 	/* locase = 0 or 0x20. ORing digits or letters with 'locase'
 	 * produces same digits or (maybe lowercased) letters */
@@ -446,10 +248,9 @@ char *number(char *buf, char *end, unsigned long long num,
 			field_width--;
 	}
 
-	/* generate full string in tmp[], in reverse order */
-	i = 0;
+	/* generate full string in tmp[] */
 	if (num < spec.base)
-		tmp[i++] = hex_asc_upper[num] | locase;
+		*--p = hex_asc_upper[num] | locase;
 	else if (spec.base != 10) { /* 8 or 16 */
 		int mask = spec.base - 1;
 		int shift = 3;
@@ -457,12 +258,13 @@ char *number(char *buf, char *end, unsigned long long num,
 		if (spec.base == 16)
 			shift = 4;
 		do {
-			tmp[i++] = (hex_asc_upper[((unsigned char)num) & mask] | locase);
+			*--p = hex_asc_upper[((unsigned char)num) & mask] | locase;
 			num >>= shift;
 		} while (num);
 	} else { /* base 10 */
-		i = put_dec(tmp, num) - tmp;
+		p = _print_integer_u64(p, num);
 	}
+	i = tmp + sizeof(tmp) - p;
 
 	/* printing 100 using %2d gives "100", not "00" */
 	if (i > precision)
@@ -512,10 +314,11 @@ char *number(char *buf, char *end, unsigned long long num,
 		++buf;
 	}
 	/* actual digits of result */
-	while (--i >= 0) {
+	while (p != tmp + sizeof(tmp)) {
 		if (buf < end)
-			*buf = tmp[i];
+			*buf = *p;
 		++buf;
+		p++;
 	}
 	/* trailing space padding */
 	while (--field_width >= 0) {
@@ -1297,17 +1100,18 @@ char *ip4_string(char *p, const u8 *addr, const char *fmt)
 		break;
 	}
 	for (i = 0; i < 4; i++) {
-		char temp[4] __aligned(2);	/* hold each IP quad in reverse order */
-		int digits = put_dec_trunc8(temp, addr[index]) - temp;
+		char tmp[3];
+		char *q = tmp + sizeof(tmp);
+
+		tmp[0] = tmp[1] = '0';
+		q = _print_integer_u32(q, addr[index]);
 		if (leading_zeros) {
-			if (digits < 3)
-				*p++ = '0';
-			if (digits < 2)
-				*p++ = '0';
+			q = tmp;
 		}
-		/* reverse the digits in the quad */
-		while (digits--)
-			*p++ = temp[digits];
+		do {
+			*p++ = *q++;
+		} while (q != tmp + sizeof(tmp));
+
 		if (i < 3)
 			*p++ = '.';
 		index += step;
-- 
2.24.1


  parent reply	other threads:[~2020-04-20 20:58 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-04-20 20:57 [PATCH 01/15] sched: make nr_running() return "unsigned int" Alexey Dobriyan
2020-04-20 20:57 ` [PATCH 02/15] sched: make nr_iowait_cpu() " Alexey Dobriyan
2020-04-20 20:57 ` [PATCH 03/15] print_integer: new and improved way of printing integers Alexey Dobriyan
2020-04-20 21:19   ` Andy Shevchenko
2020-04-20 21:27     ` Andy Shevchenko
2020-04-21  1:54       ` Steven Rostedt
2020-04-21 16:49         ` Alexey Dobriyan
2020-04-21 21:08           ` Steven Rostedt
2020-04-23  3:42           ` Sergey Senozhatsky
2020-04-21 16:31     ` Alexey Dobriyan
2020-04-21 16:59   ` Matthew Wilcox
2020-04-20 20:57 ` [PATCH 04/15] print_integer, proc: rewrite /proc/self via print_integer() Alexey Dobriyan
2020-04-20 20:57 ` [PATCH 05/15] print_integer, proc: rewrite /proc/thread-self " Alexey Dobriyan
2020-04-20 20:57 ` [PATCH 06/15] print_integer, proc: rewrite /proc/loadavg " Alexey Dobriyan
2020-04-20 20:57 ` [PATCH 07/15] print_integer, proc: rewrite /proc/stat " Alexey Dobriyan
2020-04-20 20:57 ` [PATCH 08/15] print_integer, proc: rewrite /proc/uptime " Alexey Dobriyan
2020-04-20 20:57 ` [PATCH 09/15] proc: s/p/tsk/ Alexey Dobriyan
2020-04-20 20:57 ` [PATCH 10/15] print_integer, proc: rewrite /proc/*/fd via print_integer() Alexey Dobriyan
2020-04-20 20:57 ` [PATCH 11/15] print_integer, proc: rewrite /proc/*/stat " Alexey Dobriyan
2020-04-20 20:57 ` [PATCH 12/15] print_integer, proc: rewrite /proc/*/statm " Alexey Dobriyan
2020-04-20 20:57 ` [PATCH 13/15] print_integer, printf: rewrite num_to_str() " Alexey Dobriyan
2020-04-20 20:57 ` Alexey Dobriyan [this message]
2020-04-21  2:01   ` [PATCH 14/15] print_integer, printf: rewrite the rest of lib/vsprintf.c " Steven Rostedt
2020-04-20 20:57 ` [PATCH 15/15] print_integer, proc: rewrite /proc/meminfo " Alexey Dobriyan
2020-04-20 21:05 ` [PATCH 01/15] sched: make nr_running() return "unsigned int" Matthew Wilcox
2020-04-21 17:06   ` Alexey Dobriyan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200420205743.19964-14-adobriyan@gmail.com \
    --to=adobriyan@gmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=andriy.shevchenko@linux.intel.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux@rasmusvillemoes.dk \
    --cc=pmladek@suse.com \
    --cc=rostedt@goodmis.org \
    --cc=sergey.senozhatsky@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.