All of lore.kernel.org
 help / color / mirror / Atom feed
From: David Laight <david.laight.linux@gmail.com>
To: "Willy Tarreau" <w@1wt.eu>,
	"Thomas Weißschuh" <linux@weissschuh.net>,
	linux-kernel@vger.kernel.org, "Cheng Li" <lechain@gmail.com>
Subject: Re: [PATCH v2 next 00/11] tools/nolibc: Enhance printf()
Date: Fri, 6 Feb 2026 21:36:08 +0000	[thread overview]
Message-ID: <20260206213608.1bbad591@pumpkin> (raw)
In-Reply-To: <20260206191121.3602-1-david.laight.linux@gmail.com>

On Fri,  6 Feb 2026 19:11:10 +0000
david.laight.linux@gmail.com wrote:

> From: David Laight <david.laight.linux@gmail.com>
> 
> Update printf() so that it handles almost all the non-fp formats.
> In particular:
> - Left alignment.
> - Zero padding.
> - Field precision.
> - Variable field width and precision.
> - Width modifiers q, L, t and z.
> - Conversion specifiers i and X (X generates lower case).
> About the only things that are missing are octal and floating point.

Since it is pretty much a re-write, a copy of the new version:

/* printf(). Supports most of the normal integer and string formats.
 *  - %[#0-+ ][width|*[.precision|*]][{l,t,z,ll,L,j,q}]{d,i,u,c,x,X,p,s,m,%}
 *  - %% generates a single %
 *  - %m outputs strerror(errno).
 *  - # only affects %x and prepends 0x to non-zero values.
 *  - %o (octal) isn't supported.
 *  - %X outputs a..f the same as %x.
 *  - No support for wide characters.
 *  - invalid formats are copied to the output buffer.
 */

/* This code uses 'flag' variables that are indexed by the low 6 bits
 * of characters to optimise checks for multiple characters.
 *
 * _NOLIBC_PF_FLAGS_CONTAIN(flags, 'a', 'b'. ...)
 * returns non-zero if the bit for any of the specified characters is set.
 *
 * _NOLIBC_PF_CHAR_IS_ONE_OF(ch, 'a', 'b'. ...)
 * returns the flag bit for ch if it is one of the specified characters.
 * All the characters must be in the same 32 character block (non-alphabetic,
 * upper case, or lower case) of the ASCII character set.)
 */
#define _NOLIBC_PF_FLAG(ch) (1u << ((ch) & 0x1f))
#define _NOLIBC_PF_FLAG_NZ(ch) ((ch) ? _NOLIBC_PF_FLAG(ch) : 0)
#define _NOLIBC_PF_FLAG8(cmp_1, cmp_2, cmp_3, cmp_4, cmp_5, cmp_6, cmp_7, cmp_8, ...) \
	(_NOLIBC_PF_FLAG_NZ(cmp_1) | _NOLIBC_PF_FLAG_NZ(cmp_2) | \
	 _NOLIBC_PF_FLAG_NZ(cmp_3) | _NOLIBC_PF_FLAG_NZ(cmp_4) | \
	 _NOLIBC_PF_FLAG_NZ(cmp_5) | _NOLIBC_PF_FLAG_NZ(cmp_6) | \
	 _NOLIBC_PF_FLAG_NZ(cmp_7) | _NOLIBC_PF_FLAG_NZ(cmp_8))
#define _NOLIBC_PF_FLAGS_CONTAIN(flags, ...) \
	((flags) & _NOLIBC_PF_FLAG8(__VA_ARGS__, 0, 0, 0, 0, 0, 0, 0))
#define _NOLIBC_PF_CHAR_IS_ONE_OF(ch, cmp_1, ...) \
	(ch < (cmp_1 & ~0x1f) || ch > (cmp_1 | 0x1f) ? 0 : \
		_NOLIBC_PF_FLAGS_CONTAIN(_NOLIBC_PF_FLAG(ch), cmp_1, __VA_ARGS__))

typedef int (*__nolibc_printf_cb)(void *state, const char *buf, size_t size);

static __attribute__((unused, format(printf, 3, 0)))
int __nolibc_printf(__nolibc_printf_cb cb, void *state, const char *fmt, va_list args)
{
	char ch;
	int len, written, width, precision;
	unsigned int flags, ch_flag;
	char tmpbuf[32 + 24];
	const char *outstr;

	written = 0;
	while (1) {
		outstr = fmt;
		ch = *fmt++;
		if (!ch)
			break;

		width = 0;
		flags = 0;
		if (ch != '%') {
			while (*fmt && *fmt != '%')
				fmt++;
			len = fmt - outstr;
		} else {
			/* we're in a format sequence */

			ch = *fmt++;

			/* Conversion flag characters */
			for (;; ch = *fmt++) {
				ch_flag = _NOLIBC_PF_CHAR_IS_ONE_OF(ch, ' ', '#', '+', '-', '0');
				if (!ch_flag)
					break;
				flags |= ch_flag;
			}

                        /* Width and precision */
			for (;; ch = *fmt++) {
				if (ch == '*') {
					precision = va_arg(args, unsigned int);
					ch = *fmt++;
				} else {
					for (precision = 0; ch >= '0' && ch <= '9'; ch = *fmt++)
						precision = precision * 10 + (ch - '0');
				}
				if (_NOLIBC_PF_FLAGS_CONTAIN(flags, '.'))
					break;
				width = precision;
				if (ch != '.') {
					/* Default precision for strings */
					precision = INT_MAX;
					break;
				}
				flags |= _NOLIBC_PF_FLAG('.');
			}

			/* Length modifier.
			 * They miss the conversion flags characters " #+-0" so can go into flags.
			 * Change both L and ll to q.
			 */
			if (ch == 'L')
				ch = 'q';
			ch_flag = _NOLIBC_PF_CHAR_IS_ONE_OF(ch, 'l', 't', 'z', 'j', 'q');
			if (ch_flag != 0) {
				if (ch == 'l' && fmt[0] == 'l') {
					fmt++;
					ch_flag = _NOLIBC_PF_FLAG('q');
				}
				flags |= ch_flag;
				ch = *fmt++;
			}

			/* Conversion specifiers. */

			/* Numeric and pointer conversion specifiers.
			 *
			 * Use an explicit bound check (rather than _NOLIBC_PF_CHAR_IS_ONE_OF())
			 * so that 'X' can be allowed through.
			 * 'X' gets treated and 'x' because _NOLIBC_PF_FLAG() returns the same
			 * value for both.
			 */
			if ((ch < 'a' || ch > 'z') && ch != 'X')
				goto non_numeric_conversion;

			/* We need to check for "%p" or "%#x" later, merging here gives better code.
			 * But '#' collides with 'c' so shift right.
			 */
			ch_flag = _NOLIBC_PF_FLAG(ch) | (flags & _NOLIBC_PF_FLAG('#')) >> 1;
			if (_NOLIBC_PF_FLAGS_CONTAIN(ch_flag, 'c', 'd', 'i', 'u', 'x', 'p', 's')) {
				unsigned long long v;
				long long signed_v;
				char *out = tmpbuf + 32;
				int sign = 0;

				/* 'long' is needed for pointer/string conversions and ltz lengths.
				 * A single test can be used provided 'p' (the same bit as '0')
				 * is masked from flags.
				 */
				if (_NOLIBC_PF_FLAGS_CONTAIN(ch_flag | (flags & ~_NOLIBC_PF_FLAG('p')),
							     'p', 's', 'l', 't', 'z')) {
					v = va_arg(args, unsigned long);
					signed_v = (long)v;
				} else if (_NOLIBC_PF_FLAGS_CONTAIN(flags, 'j', 'q')) {
					v = va_arg(args, unsigned long long);
					signed_v = v;
				} else {
					v = va_arg(args, unsigned int);
					signed_v = (int)v;
				}

				if (_NOLIBC_PF_FLAGS_CONTAIN(ch_flag, 'c')) {
					/* "%c" - single character. */
					tmpbuf[0] = v;
					len = 1;
					outstr = tmpbuf;
					goto do_output;
				}

				if (_NOLIBC_PF_FLAGS_CONTAIN(ch_flag, 's')) {
					/* "%s" - character string. */
					if (!v) {
						outstr = "(null)";
						/* Match glibc, nothing output if precision too small */
						len = precision >= 6 ? 6 : 0;
						goto do_output;
					}
					outstr = (void *)v;
do_strnlen_output:
					len = strnlen(outstr, precision);
					goto do_output;
				}

				if (_NOLIBC_PF_FLAGS_CONTAIN(ch_flag, 'd', 'i')) {
					/* "%d" and "%i" - signed decimal numbers. */
					if (signed_v < 0) {
						sign = '-';
						v = -(signed_v + 1);
						v++;
					} else if (_NOLIBC_PF_FLAGS_CONTAIN(flags, '+')) {
						sign = '+';
					} else if (_NOLIBC_PF_FLAGS_CONTAIN(flags, ' ')) {
						sign = ' ';
					}
				}

				if (v == 0) {
					/* There are special rules for zero. */
					if (_NOLIBC_PF_FLAGS_CONTAIN(ch_flag, 'p')) {
						/* "%p" match glibc, precision is ignored */
						outstr = "(nil)";
						len = 5;
						goto do_output;
					}
					if (!precision) {
						/* Explicit %nn.0d, no digits output */
						len = 0;
						goto prepend_sign;
					}
					/* All formats (including "%#x") just output "0". */
					*out = '0';
					len = 1;
				} else {
					/* Convert the number to ascii in the required base. */
					if (_NOLIBC_PF_FLAGS_CONTAIN(ch_flag, 'd', 'i', 'u')) {
						/* Base 10 */
						len = u64toa_r(v, out);
					} else {
						/* Base 16 */
						if (_NOLIBC_PF_FLAGS_CONTAIN(ch_flag, 'p', '#' - 1)) {
							/* "%p" and "%#x" need "0x" prepending. */
							sign = 'x' | '0' << 8;
						}
						len = u64toh_r(v, out);
					}
				}

				/* Add zero padding */
				if (_NOLIBC_PF_FLAGS_CONTAIN(flags, '0', '.')) {
					if (!_NOLIBC_PF_FLAGS_CONTAIN(flags, '.')) {
						if (_NOLIBC_PF_FLAGS_CONTAIN(flags, '-'))
							/* Left justify overrides zero pad */
							goto prepend_sign;
						/* eg "%05d", Zero pad to field width less sign */
						precision = width;
						if (sign) {
							precision--;
							if (sign >= 256)
								precision--;
						}
					}
					if (precision > 30)
						/* Don't run off the start of tmpbuf[] */
						precision = 30;
					for (; len < precision; len++) {
						/* Stop gcc generating horrid code and memset().
						 * This is OPTIMIZER_HIDE_VAR() from compiler.h.
						 */
						__asm__ volatile("" : "=r"(len) : "0"(len));
						*--out = '0';
					}
				}

prepend_sign:
				/* Add 0, 1 or 2 ("0x") sign characters left of any zero padding */
				for (; sign; sign >>= 8) {
					len++;
					*--out = sign;
				}
				outstr = out;
				goto do_output;
			}

non_numeric_conversion:
			if (ch == 'm') {
#ifdef NOLIBC_IGNORE_ERRNO
				outstr = "unknown error";
				len = __builtin_strlen(outstr);
#else
				outstr = strerror(errno);
				goto do_strnlen_output;
#endif /* NOLIBC_IGNORE_ERRNO */
			} else {
				if (ch != '%') {
					/* Invalid format: back up to output the format characters */
					fmt = outstr + 1;
					/* and output a '%' now. */
				}
				/* %% is documented as a 'conversion specifier'.
				 * Any flags, precision or length modifier are ignored.
				 */
				len = 1;
				width = 0;
				outstr = fmt - 1;
			}
		}

do_output:
		written += len;

		/* An OPTIMIZER_HIDE_VAR() seems to stop gcc back-merging this
		 * code into one of the conditionals above.
		 */
		__asm__ volatile("" : "=r"(len) : "0"(len));

		/* Output 'left pad', 'value' then 'right pad'. */
		width -= len;
		flags = _NOLIBC_PF_FLAGS_CONTAIN(flags, '-');
		if (flags && cb(state, outstr, len) != 0)
			return -1;
		while (width > 0) {
			int pad_len = ((width - 1) & 15) + 1;
			width -= pad_len;
			written += pad_len;
			if (cb(state, "                ", pad_len) != 0)
				return -1;
		}
		if (!flags && cb(state, outstr, len) != 0)
			return -1;
	}

	/* Flush/terminate any buffer. */
	if (cb(state, NULL, 0) != 0)
		return -1;

	return written;
}

struct __nolibc_fprintf_cb_state {
	FILE *stream;
	unsigned int buf_offset;
	char buf[128];
};

static int __nolibc_fprintf_cb(void *v_state, const char *buf, size_t size)
{
	struct __nolibc_fprintf_cb_state *state = v_state;
	unsigned int off = state->buf_offset;

	if (off + size > sizeof(state->buf) || buf == NULL) {
		state->buf_offset = 0;
		if (off && _fwrite(state->buf, off, state->stream))
			return -1;
		if (size > sizeof(state->buf))
			return _fwrite(buf, size, state->stream);
		off = 0;
	}

	if (size) {
		state->buf_offset = off + size;
		memcpy(state->buf + off, buf, size);
	}
	return 0;
}

...

struct __nolibc_sprintf_cb_state {
	char *buf;
	size_t size;
};

static int __nolibc_sprintf_cb(void *v_state, const char *buf, size_t size)
{
	struct __nolibc_sprintf_cb_state *state = v_state;
	char *tgt;

	if (size >= state->size) {
		if (state->size <= 1)
			return 0;
		size = state->size - 1;
	}
	tgt = state->buf;
	if (size) {
		state->size -= size;
		state->buf = tgt + size;
		memcpy(tgt, buf, size);
	} else {
		/* In particular from cb(NULL, 0) at the end of __nolibc_printf(). */
		*tgt = '\0';
	}
	return 0;
}

      parent reply	other threads:[~2026-02-06 21:36 UTC|newest]

Thread overview: 60+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-02-06 19:11 [PATCH v2 next 00/11] tools/nolibc: Enhance printf() david.laight.linux
2026-02-06 19:11 ` [PATCH v2 next 01/11] tools/nolibc/printf: Change variable used for format chars from 'c' to 'ch' david.laight.linux
2026-02-07 18:51   ` Willy Tarreau
2026-02-16 18:52   ` Thomas Weißschuh
2026-02-06 19:11 ` [PATCH v2 next 02/11] tools/nolibc/printf: Move snprintf length check to callback david.laight.linux
2026-02-07 19:12   ` Willy Tarreau
2026-02-07 23:28     ` David Laight
2026-02-08 15:12       ` Willy Tarreau
2026-02-08 22:49         ` David Laight
2026-02-06 19:11 ` [PATCH v2 next 03/11] tools/nolibc/printf: Add buffering to vfprintf() callback david.laight.linux
2026-02-07 19:29   ` Willy Tarreau
2026-02-07 23:36     ` David Laight
2026-02-16 19:07       ` Thomas Weißschuh
2026-02-17 11:51         ` David Laight
2026-02-18 17:52           ` Thomas Weißschuh
2026-02-06 19:11 ` [PATCH v2 next 04/11] tools/nolibc/printf: Output pad characters in 16 byte chunks david.laight.linux
2026-02-07 19:38   ` Willy Tarreau
2026-02-07 23:43     ` David Laight
2026-02-08 15:14       ` Willy Tarreau
2026-02-16 19:30   ` Thomas Weißschuh
2026-02-16 22:29     ` David Laight
2026-02-18 17:30       ` Thomas Weißschuh
2026-02-06 19:11 ` [PATCH v2 next 05/11] tools/nolibc/printf: Simplify __nolibc_printf() david.laight.linux
2026-02-07 20:05   ` Willy Tarreau
2026-02-07 23:50     ` David Laight
2026-02-08 12:20       ` David Laight
2026-02-08 14:44         ` Willy Tarreau
2026-02-08 16:54           ` David Laight
2026-02-08 17:06             ` Willy Tarreau
2026-02-06 19:11 ` [PATCH v2 next 06/11] tools/nolibc/printf: Use bit-masks to hold requested flag, length and conversion chars david.laight.linux
2026-02-08 15:22   ` Willy Tarreau
2026-02-16 19:52   ` Thomas Weißschuh
2026-02-16 22:47     ` David Laight
2026-02-18 17:36       ` Thomas Weißschuh
2026-02-18 22:57         ` David Laight
2026-02-06 19:11 ` [PATCH v2 next 07/11] tools/nolibc/printf: Add support for conversion flags "#- +" and format "%X" david.laight.linux
2026-02-08 15:47   ` Willy Tarreau
2026-02-08 17:14     ` David Laight
2026-02-08 16:06   ` Willy Tarreau
2026-02-16 19:57   ` Thomas Weißschuh
2026-02-16 22:50     ` David Laight
2026-02-18 17:39       ` Thomas Weißschuh
2026-02-16 20:11   ` Thomas Weißschuh
2026-02-16 22:52     ` David Laight
2026-02-06 19:11 ` [PATCH v2 next 08/11] tools/nolibc/printf: Add support for zero padding and field precision david.laight.linux
2026-02-08 16:16   ` Willy Tarreau
2026-02-08 17:31     ` David Laight
2026-02-06 19:11 ` [PATCH v2 next 09/11] selftests/nolibc: Improve reporting of vfprintf() errors david.laight.linux
2026-02-16 20:05   ` Thomas Weißschuh
2026-02-17 10:48     ` David Laight
2026-02-18 17:48       ` Thomas Weißschuh
2026-02-06 19:11 ` [PATCH v2 next 10/11] selftests/nolibc: Increase coverage of printf format tests david.laight.linux
2026-02-16 20:14   ` Thomas Weißschuh
2026-02-16 20:23   ` Thomas Weißschuh
2026-02-16 22:54     ` David Laight
2026-02-18 17:41       ` Thomas Weißschuh
2026-02-06 19:11 ` [PATCH v2 next 11/11] selftests/nolibc: Use printf("%.*s", n, "") to align output david.laight.linux
2026-02-08 16:20   ` Willy Tarreau
2026-02-16 20:22   ` Thomas Weißschuh
2026-02-06 21:36 ` David Laight [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260206213608.1bbad591@pumpkin \
    --to=david.laight.linux@gmail.com \
    --cc=lechain@gmail.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux@weissschuh.net \
    --cc=w@1wt.eu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.