[PATCH 01/14] numparse: new module for parsing integral numbers

git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

From: Michael Haggerty <mhagger@alum.mit.edu>
To: Junio C Hamano <gitster@pobox.com>
Cc: Jeff King <peff@peff.org>,
	git@vger.kernel.org, Michael Haggerty <mhagger@alum.mit.edu>
Subject: [PATCH 01/14] numparse: new module for parsing integral numbers
Date: Tue, 17 Mar 2015 17:00:03 +0100	[thread overview]
Message-ID: <1426608016-2978-2-git-send-email-mhagger@alum.mit.edu> (raw)
In-Reply-To: <1426608016-2978-1-git-send-email-mhagger@alum.mit.edu>

Implement wrappers for strtol() and strtoul() that are safer and more
convenient to use.

Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
---
 Makefile   |   1 +
 numparse.c | 180 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 numparse.h | 207 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 388 insertions(+)
 create mode 100644 numparse.c
 create mode 100644 numparse.h

diff --git a/Makefile b/Makefile
index 44f1dd1..6c0cfcc 100644
--- a/Makefile
+++ b/Makefile
@@ -732,6 +732,7 @@ LIB_OBJS += notes.o
 LIB_OBJS += notes-cache.o
 LIB_OBJS += notes-merge.o
 LIB_OBJS += notes-utils.o
+LIB_OBJS += numparse.o
 LIB_OBJS += object.o
 LIB_OBJS += pack-bitmap.o
 LIB_OBJS += pack-bitmap-write.o
diff --git a/numparse.c b/numparse.c
new file mode 100644
index 0000000..90b44ce
--- /dev/null
+++ b/numparse.c
@@ -0,0 +1,180 @@
+#include "git-compat-util.h"
+#include "numparse.h"
+
+#define NUM_NEGATIVE (1 << 16)
+
+
+static int parse_precheck(const char *s, unsigned int *flags)
+{
+	const char *number;
+
+	if (isspace(*s)) {
+		if (!(*flags & NUM_LEADING_WHITESPACE))
+			return -NUM_LEADING_WHITESPACE;
+		do {
+			s++;
+		} while (isspace(*s));
+	}
+
+	if (*s == '+') {
+		if (!(*flags & NUM_PLUS))
+			return -NUM_PLUS;
+		number = s + 1;
+		*flags &= ~NUM_NEGATIVE;
+	} else if (*s == '-') {
+		if (!(*flags & NUM_MINUS))
+			return -NUM_MINUS;
+		number = s + 1;
+		*flags |= NUM_NEGATIVE;
+	} else {
+		number = s;
+		*flags &= ~NUM_NEGATIVE;
+	}
+
+	if (!(*flags & NUM_BASE_SPECIFIER)) {
+		int base = *flags & NUM_BASE_MASK;
+		if (base == 0) {
+			/* This is a pointless combination of options. */
+			die("BUG: base=0 specified without NUM_BASE_SPECIFIER");
+		} else if (base == 16 && starts_with(number, "0x")) {
+			/*
+			 * We want to treat this as zero terminated by
+			 * an 'x', whereas strtol()/strtoul() would
+			 * silently eat the "0x". We accomplish this
+			 * by treating it as a base 10 number:
+			 */
+			*flags = (*flags & ~NUM_BASE_MASK) | 10;
+		}
+	}
+	return 0;
+}
+
+int parse_l(const char *s, unsigned int flags, long *result, char **endptr)
+{
+	long l;
+	const char *end;
+	int err = 0;
+
+	err = parse_precheck(s, &flags);
+	if (err)
+		return err;
+
+	/*
+	 * Now let strtol() do the heavy lifting:
+	 */
+	errno = 0;
+	l = strtol(s, (char **)&end, flags & NUM_BASE_MASK);
+	if (errno) {
+		if (errno == ERANGE) {
+			if (!(flags & NUM_SATURATE))
+				return -NUM_SATURATE;
+		} else {
+			return -NUM_OTHER_ERROR;
+		}
+	}
+	if (end == s)
+		return -NUM_NO_DIGITS;
+
+	if (*end && !(flags & NUM_TRAILING))
+		return -NUM_TRAILING;
+
+	/* Everything was OK */
+	*result = l;
+	if (endptr)
+		*endptr = (char *)end;
+	return 0;
+}
+
+int parse_ul(const char *s, unsigned int flags,
+	     unsigned long *result, char **endptr)
+{
+	unsigned long ul;
+	const char *end;
+	int err = 0;
+
+	err = parse_precheck(s, &flags);
+	if (err)
+		return err;
+
+	/*
+	 * Now let strtoul() do the heavy lifting:
+	 */
+	errno = 0;
+	ul = strtoul(s, (char **)&end, flags & NUM_BASE_MASK);
+	if (errno) {
+		if (errno == ERANGE) {
+			if (!(flags & NUM_SATURATE))
+				return -NUM_SATURATE;
+		} else {
+			return -NUM_OTHER_ERROR;
+		}
+	}
+	if (end == s)
+		return -NUM_NO_DIGITS;
+
+	/*
+	 * strtoul(), perversely, accepts negative numbers, converting
+	 * them to the positive number with the same bit pattern. We
+	 * don't ever want that.
+	 */
+	if ((flags & NUM_NEGATIVE) && ul) {
+		if (!(flags & NUM_SATURATE))
+			return -NUM_SATURATE;
+		ul = 0;
+	}
+
+	if (*end && !(flags & NUM_TRAILING))
+		return -NUM_TRAILING;
+
+	/* Everything was OK */
+	*result = ul;
+	if (endptr)
+		*endptr = (char *)end;
+	return 0;
+}
+
+int parse_i(const char *s, unsigned int flags, int *result, char **endptr)
+{
+	long l;
+	int err;
+	char *end;
+
+	err = parse_l(s, flags, &l, &end);
+	if (err)
+		return err;
+
+	if ((int)l == l)
+		*result = l;
+	else if (!(flags & NUM_SATURATE))
+		return -NUM_SATURATE;
+	else
+		*result = (l <= 0) ? INT_MIN : INT_MAX;
+
+	if (endptr)
+		*endptr = end;
+
+	return 0;
+}
+
+int parse_ui(const char *s, unsigned int flags, unsigned int *result, char **endptr)
+{
+	unsigned long ul;
+	int err;
+	char *end;
+
+	err = parse_ul(s, flags, &ul, &end);
+	if (err)
+		return err;
+
+	if ((unsigned int)ul == ul)
+		*result = ul;
+	else if (!(flags & NUM_SATURATE))
+		return -NUM_SATURATE;
+	else
+		*result = UINT_MAX;
+
+	if (endptr)
+		*endptr = end;
+
+	return 0;
+}
diff --git a/numparse.h b/numparse.h
new file mode 100644
index 0000000..4de5e10
--- /dev/null
+++ b/numparse.h
@@ -0,0 +1,207 @@
+#ifndef NUMPARSE_H
+#define NUMPARSE_H
+
+/*
+ * Functions for parsing integral numbers.
+ *
+ * strtol() and strtoul() are very flexible, in fact too flexible for
+ * many purposes. These functions wrap them to make them easier to use
+ * in a stricter way.
+ *
+ * There are two classes of function, parse_*() and convert_*(). The
+ * former try to read a number from the front of a string and report a
+ * pointer to the character following the number. The latter don't
+ * report the end of the number, and are meant to be used when the
+ * input string should contain only a single number, with no trailing
+ * characters.
+ *
+ * Each class of functions has four variants:
+ *
+ * - parse_l(), convert_l() -- parse long ints
+ * - parse_ul(), convert_ul() -- parse unsigned long ints
+ * - parse_i(), convert_i() -- parse ints
+ * - parse_ui(), convert_ui() -- parse unsigned ints
+ *
+ * The style of parsing is controlled by a flags argument which
+ * encodes both the base of the number and many other options. The
+ * base is encoded by its numerical value (2 <= base <= 36), or zero
+ * if it should be determined automatically based on whether the
+ * number has a "0x" or "0" prefix.
+ *
+ * The functions all return zero on success. On error, they return a
+ * negative integer indicating the first error that was detected. For
+ * example, if no sign characters were allowed but the string
+ * contained a '-', the function will return -NUM_MINUS. If there is
+ * any kind of error, *result and *endptr are unchanged.
+ *
+ * Examples:
+ *
+ * - Convert hexadecimal string s into an unsigned int. Die if there
+ *   are any characters in s besides hexadecimal digits, or if the
+ *   result exceeds the range of an unsigned int:
+ *
+ *     if (convert_ui(s, 16, &result))
+ *             die("...");
+ *
+ * - Read a base-ten long number from the front of a string, allowing
+ *   sign characters and setting endptr to point at any trailing
+ *   characters:
+ *
+ *     if (parse_l(s, 10 | NUM_SIGN | NUM_TRAILING, &result, &endptr))
+ *             die("...");
+ *
+ * - Convert decimal string s into a signed int, but not allowing the
+ *   string to contain a '+' or '-' prefix (and thereby indirectly
+ *   ensuring that the result will be non-negative):
+ *
+ *     if (convert_i(s, 10, &result))
+ *             die("...");
+ *
+ * - Convert s into a signed int, interpreting prefix "0x" to mean
+ *   hexadecimal and "0" to mean octal. If the value doesn't fit in an
+ *   unsigned int, set result to INT_MIN or INT_MAX.
+ *
+ *     if (convert_i(s, NUM_SLOPPY, &result))
+ *             die("...");
+ */
+
+
+/*
+ * Constants for parsing numbers.
+ *
+ * These can be passed in flags to allow the specified features. Also,
+ * if there is an error parsing a number, the parsing functions return
+ * the negated value of one of these constants (or NUM_NO_DIGITS or
+ * NUM_OTHER_ERROR) to indicate the first error detected.
+ */
+
+/*
+ * The lowest 6 bits of flags hold the numerical base that should be
+ * used to parse the number, 2 <= base <= 36. If base is set to 0,
+ * then NUM_BASE_SPECIFIER must be set too; in this case, the base is
+ * detected automatically from the string's prefix.
+ */
+#define NUM_BASE_MASK 0x3f
+
+/* Skip any whitespace before the number. */
+#define NUM_LEADING_WHITESPACE (1 << 8)
+
+/* Allow a leading '+'. */
+#define NUM_PLUS               (1 << 9)
+
+/* Allow a leading '-'. */
+#define NUM_MINUS              (1 << 10)
+
+/*
+ * Allow a leading base specifier:
+ * - If base is 0: a leading "0x" indicates base 16; a leading "0"
+ *   indicates base 8; otherwise, assume base 10.
+ * - If base is 16: a leading "0x" is allowed and skipped over.
+ */
+#define NUM_BASE_SPECIFIER     (1 << 11)
+
+/*
+ * If the number is not in the allowed range, return the smallest or
+ * largest representable value instead.
+ */
+#define NUM_SATURATE           (1 << 12)
+
+/*
+ * Just parse until the end of the number, ignoring any subsequent
+ * characters. If this option is not specified, then it is an error if
+ * the whole string cannot be parsed.
+ */
+#define NUM_TRAILING           (1 << 13)
+
+
+/* Additional errors that can come from parsing numbers: */
+
+/* There were no valid digits */
+#define NUM_NO_DIGITS          (1 << 14)
+/* There was some other error reported by strtol()/strtoul(): */
+#define NUM_OTHER_ERROR        (1 << 15)
+
+/*
+ * Please note that there is also a NUM_NEGATIVE, which is used
+ * internally.
+ */
+
+/*
+ * Now define some useful combinations of parsing options:
+ */
+
+/* A bunch of digits with an optional sign. */
+#define NUM_SIGN (NUM_PLUS | NUM_MINUS)
+
+/*
+ * Be as liberal as possible with the form of the number itself
+ * (though if you also want to allow leading whitespace and/or
+ * trailing characters, you should combine this with
+ * NUM_LEADING_WHITESPACE and/or NUM_TRAILING).
+ */
+#define NUM_SLOPPY (NUM_SIGN | NUM_SATURATE | NUM_BASE_SPECIFIER)
+
+
+/*
+ * Number parsing functions:
+ *
+ * The following functions parse a number (long, unsigned long, int,
+ * or unsigned int respectively) from the front of s, storing the
+ * value to *result and storing a pointer to the first character after
+ * the number to *endptr. flags specifies how the number should be
+ * parsed, including which base should be used. flags is a combination
+ * of the numerical base (2-36) and the NUM_* constants above (see).
+ * Return 0 on success or a negative value if there was an error. On
+ * failure, *result and *entptr are left unchanged.
+ *
+ * Please note that if NUM_TRAILING is not set, then it is
+ * nevertheless an error if there are any characters between the end
+ * of the number and the end of the string.
+ */
+
+int parse_l(const char *s, unsigned int flags,
+	    long *result, char **endptr);
+
+int parse_ul(const char *s, unsigned int flags,
+	     unsigned long *result, char **endptr);
+
+int parse_i(const char *s, unsigned int flags,
+	    int *result, char **endptr);
+
+int parse_ui(const char *s, unsigned int flags,
+	     unsigned int *result, char **endptr);
+
+
+/*
+ * Number conversion functions:
+ *
+ * The following functions parse a string into a number. They are
+ * identical to the parse_*() functions above, except that the endptr
+ * is not returned. These are most useful when parsing a whole string
+ * into a number; i.e., when (flags & NUM_TRAILING) is unset.
+ */
+static inline int convert_l(const char *s, unsigned int flags,
+			    long *result)
+{
+	return parse_l(s, flags, result, NULL);
+}
+
+static inline int convert_ul(const char *s, unsigned int flags,
+			     unsigned long *result)
+{
+	return parse_ul(s, flags, result, NULL);
+}
+
+static inline int convert_i(const char *s, unsigned int flags,
+			    int *result)
+{
+	return parse_i(s, flags, result, NULL);
+}
+
+static inline int convert_ui(const char *s, unsigned int flags,
+			     unsigned int *result)
+{
+	return parse_ui(s, flags, result, NULL);
+}
+
+#endif /* NUMPARSE_H */
-- 
2.1.4

next prev parent reply	other threads:[~2015-03-17 16:00 UTC|newest]

Thread overview: 43+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-03-17 16:00 [PATCH 00/14] numparse module: systematically tighten up integer parsing Michael Haggerty
2015-03-17 16:00 ` Michael Haggerty [this message]
2015-03-18 18:27   ` [PATCH 01/14] numparse: new module for parsing integral numbers Eric Sunshine
2015-03-18 22:47     ` Michael Haggerty
2015-03-20  8:54       ` Eric Sunshine
2015-03-20 17:51   ` Junio C Hamano
2015-03-17 16:00 ` [PATCH 02/14] cacheinfo_callback(): use convert_ui() when handling "--cacheinfo" Michael Haggerty
2015-03-17 16:00 ` [PATCH 03/14] write_subdirectory(): use convert_ui() for parsing mode Michael Haggerty
2015-03-17 16:00 ` [PATCH 04/14] handle_revision_opt(): use skip_prefix() in many places Michael Haggerty
2015-03-17 16:00 ` [PATCH 05/14] handle_revision_opt(): use convert_i() when handling "-<digit>" Michael Haggerty
2015-03-19  6:34   ` Junio C Hamano
2015-03-17 16:00 ` [PATCH 06/14] strtoul_ui(), strtol_i(): remove functions Michael Haggerty
2015-03-17 16:00 ` [PATCH 07/14] handle_revision_opt(): use convert_ui() when handling "--abbrev=" Michael Haggerty
2015-03-17 16:00 ` [PATCH 08/14] builtin_diff(): detect errors when parsing --unified argument Michael Haggerty
2015-03-17 16:00 ` [PATCH 09/14] opt_arg(): val is always non-NULL Michael Haggerty
2015-03-17 16:00 ` [PATCH 10/14] opt_arg(): use convert_i() in implementation Michael Haggerty
2015-03-17 16:00 ` [PATCH 11/14] opt_arg(): report errors parsing option values Michael Haggerty
2015-03-17 16:00 ` [PATCH 12/14] opt_arg(): simplify pointer handling Michael Haggerty
2015-03-17 16:00 ` [PATCH 13/14] diff_opt_parse(): use convert_i() when handling "-l<num>" Michael Haggerty
2015-03-17 16:00 ` [PATCH 14/14] diff_opt_parse(): use convert_i() when handling --abbrev=<num> Michael Haggerty
2015-03-19  6:37   ` Junio C Hamano
2015-03-17 18:48 ` [PATCH 00/14] numparse module: systematically tighten up integer parsing Junio C Hamano
2015-03-17 19:46   ` Michael Haggerty
2015-03-19  6:31     ` Junio C Hamano
2015-03-17 23:05 ` Duy Nguyen
2015-03-18  9:47   ` Michael Haggerty
2015-03-18  9:58     ` Duy Nguyen
2015-03-18 10:03     ` Jeff King
2015-03-18 10:20       ` Michael Haggerty
2015-03-19  5:26 ` Jeff King
2015-03-19  6:41   ` Junio C Hamano
2015-03-19  7:32   ` Junio C Hamano
2015-03-24 16:06     ` Michael Haggerty
2015-03-24 16:49       ` René Scharfe
2015-03-25 21:14         ` Michael Haggerty
2015-03-25 21:59           ` Junio C Hamano
2015-03-24 15:05   ` Michael Haggerty
2015-03-19  6:22 ` Junio C Hamano
2015-03-24 15:42   ` Michael Haggerty
2015-03-24 15:58     ` Junio C Hamano
2015-03-24 16:09       ` Junio C Hamano
2015-03-24 17:39       ` Michael Haggerty
2015-03-24 18:08         ` Junio C Hamano

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:44f1dd1 dfblob:6c0cfcc dfblob:90b44ce dfblob:4de5e10 )
 OR (
bs:"[PATCH 01/14] numparse: new module for parsing integral numbers" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1426608016-2978-2-git-send-email-mhagger@alum.mit.edu \
    --to=mhagger@alum.mit.edu \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=peff@peff.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).