From: Michael Haggerty <mhagger@alum.mit.edu>
To: Junio C Hamano <gitster@pobox.com>
Cc: Jeff King <peff@peff.org>,
git@vger.kernel.org, Michael Haggerty <mhagger@alum.mit.edu>
Subject: [PATCH 01/14] numparse: new module for parsing integral numbers
Date: Tue, 17 Mar 2015 17:00:03 +0100 [thread overview]
Message-ID: <1426608016-2978-2-git-send-email-mhagger@alum.mit.edu> (raw)
In-Reply-To: <1426608016-2978-1-git-send-email-mhagger@alum.mit.edu>
Implement wrappers for strtol() and strtoul() that are safer and more
convenient to use.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
---
Makefile | 1 +
numparse.c | 180 +++++++++++++++++++++++++++++++++++++++++++++++++++++
numparse.h | 207 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 388 insertions(+)
create mode 100644 numparse.c
create mode 100644 numparse.h
diff --git a/Makefile b/Makefile
index 44f1dd1..6c0cfcc 100644
--- a/Makefile
+++ b/Makefile
@@ -732,6 +732,7 @@ LIB_OBJS += notes.o
LIB_OBJS += notes-cache.o
LIB_OBJS += notes-merge.o
LIB_OBJS += notes-utils.o
+LIB_OBJS += numparse.o
LIB_OBJS += object.o
LIB_OBJS += pack-bitmap.o
LIB_OBJS += pack-bitmap-write.o
diff --git a/numparse.c b/numparse.c
new file mode 100644
index 0000000..90b44ce
--- /dev/null
+++ b/numparse.c
@@ -0,0 +1,180 @@
+#include "git-compat-util.h"
+#include "numparse.h"
+
+#define NUM_NEGATIVE (1 << 16)
+
+
+static int parse_precheck(const char *s, unsigned int *flags)
+{
+ const char *number;
+
+ if (isspace(*s)) {
+ if (!(*flags & NUM_LEADING_WHITESPACE))
+ return -NUM_LEADING_WHITESPACE;
+ do {
+ s++;
+ } while (isspace(*s));
+ }
+
+ if (*s == '+') {
+ if (!(*flags & NUM_PLUS))
+ return -NUM_PLUS;
+ number = s + 1;
+ *flags &= ~NUM_NEGATIVE;
+ } else if (*s == '-') {
+ if (!(*flags & NUM_MINUS))
+ return -NUM_MINUS;
+ number = s + 1;
+ *flags |= NUM_NEGATIVE;
+ } else {
+ number = s;
+ *flags &= ~NUM_NEGATIVE;
+ }
+
+ if (!(*flags & NUM_BASE_SPECIFIER)) {
+ int base = *flags & NUM_BASE_MASK;
+ if (base == 0) {
+ /* This is a pointless combination of options. */
+ die("BUG: base=0 specified without NUM_BASE_SPECIFIER");
+ } else if (base == 16 && starts_with(number, "0x")) {
+ /*
+ * We want to treat this as zero terminated by
+ * an 'x', whereas strtol()/strtoul() would
+ * silently eat the "0x". We accomplish this
+ * by treating it as a base 10 number:
+ */
+ *flags = (*flags & ~NUM_BASE_MASK) | 10;
+ }
+ }
+ return 0;
+}
+
+int parse_l(const char *s, unsigned int flags, long *result, char **endptr)
+{
+ long l;
+ const char *end;
+ int err = 0;
+
+ err = parse_precheck(s, &flags);
+ if (err)
+ return err;
+
+ /*
+ * Now let strtol() do the heavy lifting:
+ */
+ errno = 0;
+ l = strtol(s, (char **)&end, flags & NUM_BASE_MASK);
+ if (errno) {
+ if (errno == ERANGE) {
+ if (!(flags & NUM_SATURATE))
+ return -NUM_SATURATE;
+ } else {
+ return -NUM_OTHER_ERROR;
+ }
+ }
+ if (end == s)
+ return -NUM_NO_DIGITS;
+
+ if (*end && !(flags & NUM_TRAILING))
+ return -NUM_TRAILING;
+
+ /* Everything was OK */
+ *result = l;
+ if (endptr)
+ *endptr = (char *)end;
+ return 0;
+}
+
+int parse_ul(const char *s, unsigned int flags,
+ unsigned long *result, char **endptr)
+{
+ unsigned long ul;
+ const char *end;
+ int err = 0;
+
+ err = parse_precheck(s, &flags);
+ if (err)
+ return err;
+
+ /*
+ * Now let strtoul() do the heavy lifting:
+ */
+ errno = 0;
+ ul = strtoul(s, (char **)&end, flags & NUM_BASE_MASK);
+ if (errno) {
+ if (errno == ERANGE) {
+ if (!(flags & NUM_SATURATE))
+ return -NUM_SATURATE;
+ } else {
+ return -NUM_OTHER_ERROR;
+ }
+ }
+ if (end == s)
+ return -NUM_NO_DIGITS;
+
+ /*
+ * strtoul(), perversely, accepts negative numbers, converting
+ * them to the positive number with the same bit pattern. We
+ * don't ever want that.
+ */
+ if ((flags & NUM_NEGATIVE) && ul) {
+ if (!(flags & NUM_SATURATE))
+ return -NUM_SATURATE;
+ ul = 0;
+ }
+
+ if (*end && !(flags & NUM_TRAILING))
+ return -NUM_TRAILING;
+
+ /* Everything was OK */
+ *result = ul;
+ if (endptr)
+ *endptr = (char *)end;
+ return 0;
+}
+
+int parse_i(const char *s, unsigned int flags, int *result, char **endptr)
+{
+ long l;
+ int err;
+ char *end;
+
+ err = parse_l(s, flags, &l, &end);
+ if (err)
+ return err;
+
+ if ((int)l == l)
+ *result = l;
+ else if (!(flags & NUM_SATURATE))
+ return -NUM_SATURATE;
+ else
+ *result = (l <= 0) ? INT_MIN : INT_MAX;
+
+ if (endptr)
+ *endptr = end;
+
+ return 0;
+}
+
+int parse_ui(const char *s, unsigned int flags, unsigned int *result, char **endptr)
+{
+ unsigned long ul;
+ int err;
+ char *end;
+
+ err = parse_ul(s, flags, &ul, &end);
+ if (err)
+ return err;
+
+ if ((unsigned int)ul == ul)
+ *result = ul;
+ else if (!(flags & NUM_SATURATE))
+ return -NUM_SATURATE;
+ else
+ *result = UINT_MAX;
+
+ if (endptr)
+ *endptr = end;
+
+ return 0;
+}
diff --git a/numparse.h b/numparse.h
new file mode 100644
index 0000000..4de5e10
--- /dev/null
+++ b/numparse.h
@@ -0,0 +1,207 @@
+#ifndef NUMPARSE_H
+#define NUMPARSE_H
+
+/*
+ * Functions for parsing integral numbers.
+ *
+ * strtol() and strtoul() are very flexible, in fact too flexible for
+ * many purposes. These functions wrap them to make them easier to use
+ * in a stricter way.
+ *
+ * There are two classes of function, parse_*() and convert_*(). The
+ * former try to read a number from the front of a string and report a
+ * pointer to the character following the number. The latter don't
+ * report the end of the number, and are meant to be used when the
+ * input string should contain only a single number, with no trailing
+ * characters.
+ *
+ * Each class of functions has four variants:
+ *
+ * - parse_l(), convert_l() -- parse long ints
+ * - parse_ul(), convert_ul() -- parse unsigned long ints
+ * - parse_i(), convert_i() -- parse ints
+ * - parse_ui(), convert_ui() -- parse unsigned ints
+ *
+ * The style of parsing is controlled by a flags argument which
+ * encodes both the base of the number and many other options. The
+ * base is encoded by its numerical value (2 <= base <= 36), or zero
+ * if it should be determined automatically based on whether the
+ * number has a "0x" or "0" prefix.
+ *
+ * The functions all return zero on success. On error, they return a
+ * negative integer indicating the first error that was detected. For
+ * example, if no sign characters were allowed but the string
+ * contained a '-', the function will return -NUM_MINUS. If there is
+ * any kind of error, *result and *endptr are unchanged.
+ *
+ * Examples:
+ *
+ * - Convert hexadecimal string s into an unsigned int. Die if there
+ * are any characters in s besides hexadecimal digits, or if the
+ * result exceeds the range of an unsigned int:
+ *
+ * if (convert_ui(s, 16, &result))
+ * die("...");
+ *
+ * - Read a base-ten long number from the front of a string, allowing
+ * sign characters and setting endptr to point at any trailing
+ * characters:
+ *
+ * if (parse_l(s, 10 | NUM_SIGN | NUM_TRAILING, &result, &endptr))
+ * die("...");
+ *
+ * - Convert decimal string s into a signed int, but not allowing the
+ * string to contain a '+' or '-' prefix (and thereby indirectly
+ * ensuring that the result will be non-negative):
+ *
+ * if (convert_i(s, 10, &result))
+ * die("...");
+ *
+ * - Convert s into a signed int, interpreting prefix "0x" to mean
+ * hexadecimal and "0" to mean octal. If the value doesn't fit in an
+ * unsigned int, set result to INT_MIN or INT_MAX.
+ *
+ * if (convert_i(s, NUM_SLOPPY, &result))
+ * die("...");
+ */
+
+
+/*
+ * Constants for parsing numbers.
+ *
+ * These can be passed in flags to allow the specified features. Also,
+ * if there is an error parsing a number, the parsing functions return
+ * the negated value of one of these constants (or NUM_NO_DIGITS or
+ * NUM_OTHER_ERROR) to indicate the first error detected.
+ */
+
+/*
+ * The lowest 6 bits of flags hold the numerical base that should be
+ * used to parse the number, 2 <= base <= 36. If base is set to 0,
+ * then NUM_BASE_SPECIFIER must be set too; in this case, the base is
+ * detected automatically from the string's prefix.
+ */
+#define NUM_BASE_MASK 0x3f
+
+/* Skip any whitespace before the number. */
+#define NUM_LEADING_WHITESPACE (1 << 8)
+
+/* Allow a leading '+'. */
+#define NUM_PLUS (1 << 9)
+
+/* Allow a leading '-'. */
+#define NUM_MINUS (1 << 10)
+
+/*
+ * Allow a leading base specifier:
+ * - If base is 0: a leading "0x" indicates base 16; a leading "0"
+ * indicates base 8; otherwise, assume base 10.
+ * - If base is 16: a leading "0x" is allowed and skipped over.
+ */
+#define NUM_BASE_SPECIFIER (1 << 11)
+
+/*
+ * If the number is not in the allowed range, return the smallest or
+ * largest representable value instead.
+ */
+#define NUM_SATURATE (1 << 12)
+
+/*
+ * Just parse until the end of the number, ignoring any subsequent
+ * characters. If this option is not specified, then it is an error if
+ * the whole string cannot be parsed.
+ */
+#define NUM_TRAILING (1 << 13)
+
+
+/* Additional errors that can come from parsing numbers: */
+
+/* There were no valid digits */
+#define NUM_NO_DIGITS (1 << 14)
+/* There was some other error reported by strtol()/strtoul(): */
+#define NUM_OTHER_ERROR (1 << 15)
+
+/*
+ * Please note that there is also a NUM_NEGATIVE, which is used
+ * internally.
+ */
+
+/*
+ * Now define some useful combinations of parsing options:
+ */
+
+/* A bunch of digits with an optional sign. */
+#define NUM_SIGN (NUM_PLUS | NUM_MINUS)
+
+/*
+ * Be as liberal as possible with the form of the number itself
+ * (though if you also want to allow leading whitespace and/or
+ * trailing characters, you should combine this with
+ * NUM_LEADING_WHITESPACE and/or NUM_TRAILING).
+ */
+#define NUM_SLOPPY (NUM_SIGN | NUM_SATURATE | NUM_BASE_SPECIFIER)
+
+
+/*
+ * Number parsing functions:
+ *
+ * The following functions parse a number (long, unsigned long, int,
+ * or unsigned int respectively) from the front of s, storing the
+ * value to *result and storing a pointer to the first character after
+ * the number to *endptr. flags specifies how the number should be
+ * parsed, including which base should be used. flags is a combination
+ * of the numerical base (2-36) and the NUM_* constants above (see).
+ * Return 0 on success or a negative value if there was an error. On
+ * failure, *result and *entptr are left unchanged.
+ *
+ * Please note that if NUM_TRAILING is not set, then it is
+ * nevertheless an error if there are any characters between the end
+ * of the number and the end of the string.
+ */
+
+int parse_l(const char *s, unsigned int flags,
+ long *result, char **endptr);
+
+int parse_ul(const char *s, unsigned int flags,
+ unsigned long *result, char **endptr);
+
+int parse_i(const char *s, unsigned int flags,
+ int *result, char **endptr);
+
+int parse_ui(const char *s, unsigned int flags,
+ unsigned int *result, char **endptr);
+
+
+/*
+ * Number conversion functions:
+ *
+ * The following functions parse a string into a number. They are
+ * identical to the parse_*() functions above, except that the endptr
+ * is not returned. These are most useful when parsing a whole string
+ * into a number; i.e., when (flags & NUM_TRAILING) is unset.
+ */
+static inline int convert_l(const char *s, unsigned int flags,
+ long *result)
+{
+ return parse_l(s, flags, result, NULL);
+}
+
+static inline int convert_ul(const char *s, unsigned int flags,
+ unsigned long *result)
+{
+ return parse_ul(s, flags, result, NULL);
+}
+
+static inline int convert_i(const char *s, unsigned int flags,
+ int *result)
+{
+ return parse_i(s, flags, result, NULL);
+}
+
+static inline int convert_ui(const char *s, unsigned int flags,
+ unsigned int *result)
+{
+ return parse_ui(s, flags, result, NULL);
+}
+
+#endif /* NUMPARSE_H */
--
2.1.4
next prev parent reply other threads:[~2015-03-17 16:00 UTC|newest]
Thread overview: 43+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-03-17 16:00 [PATCH 00/14] numparse module: systematically tighten up integer parsing Michael Haggerty
2015-03-17 16:00 ` Michael Haggerty [this message]
2015-03-18 18:27 ` [PATCH 01/14] numparse: new module for parsing integral numbers Eric Sunshine
2015-03-18 22:47 ` Michael Haggerty
2015-03-20 8:54 ` Eric Sunshine
2015-03-20 17:51 ` Junio C Hamano
2015-03-17 16:00 ` [PATCH 02/14] cacheinfo_callback(): use convert_ui() when handling "--cacheinfo" Michael Haggerty
2015-03-17 16:00 ` [PATCH 03/14] write_subdirectory(): use convert_ui() for parsing mode Michael Haggerty
2015-03-17 16:00 ` [PATCH 04/14] handle_revision_opt(): use skip_prefix() in many places Michael Haggerty
2015-03-17 16:00 ` [PATCH 05/14] handle_revision_opt(): use convert_i() when handling "-<digit>" Michael Haggerty
2015-03-19 6:34 ` Junio C Hamano
2015-03-17 16:00 ` [PATCH 06/14] strtoul_ui(), strtol_i(): remove functions Michael Haggerty
2015-03-17 16:00 ` [PATCH 07/14] handle_revision_opt(): use convert_ui() when handling "--abbrev=" Michael Haggerty
2015-03-17 16:00 ` [PATCH 08/14] builtin_diff(): detect errors when parsing --unified argument Michael Haggerty
2015-03-17 16:00 ` [PATCH 09/14] opt_arg(): val is always non-NULL Michael Haggerty
2015-03-17 16:00 ` [PATCH 10/14] opt_arg(): use convert_i() in implementation Michael Haggerty
2015-03-17 16:00 ` [PATCH 11/14] opt_arg(): report errors parsing option values Michael Haggerty
2015-03-17 16:00 ` [PATCH 12/14] opt_arg(): simplify pointer handling Michael Haggerty
2015-03-17 16:00 ` [PATCH 13/14] diff_opt_parse(): use convert_i() when handling "-l<num>" Michael Haggerty
2015-03-17 16:00 ` [PATCH 14/14] diff_opt_parse(): use convert_i() when handling --abbrev=<num> Michael Haggerty
2015-03-19 6:37 ` Junio C Hamano
2015-03-17 18:48 ` [PATCH 00/14] numparse module: systematically tighten up integer parsing Junio C Hamano
2015-03-17 19:46 ` Michael Haggerty
2015-03-19 6:31 ` Junio C Hamano
2015-03-17 23:05 ` Duy Nguyen
2015-03-18 9:47 ` Michael Haggerty
2015-03-18 9:58 ` Duy Nguyen
2015-03-18 10:03 ` Jeff King
2015-03-18 10:20 ` Michael Haggerty
2015-03-19 5:26 ` Jeff King
2015-03-19 6:41 ` Junio C Hamano
2015-03-19 7:32 ` Junio C Hamano
2015-03-24 16:06 ` Michael Haggerty
2015-03-24 16:49 ` René Scharfe
2015-03-25 21:14 ` Michael Haggerty
2015-03-25 21:59 ` Junio C Hamano
2015-03-24 15:05 ` Michael Haggerty
2015-03-19 6:22 ` Junio C Hamano
2015-03-24 15:42 ` Michael Haggerty
2015-03-24 15:58 ` Junio C Hamano
2015-03-24 16:09 ` Junio C Hamano
2015-03-24 17:39 ` Michael Haggerty
2015-03-24 18:08 ` Junio C Hamano
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1426608016-2978-2-git-send-email-mhagger@alum.mit.edu \
--to=mhagger@alum.mit.edu \
--cc=git@vger.kernel.org \
--cc=gitster@pobox.com \
--cc=peff@peff.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).