git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Thomas Rast <trast@student.ethz.ch>
To: git@vger.kernel.org
Cc: Junio C Hamano <junio@pobox.com>,
	Johannes Schindelin <johannes.schindelin@gmx.de>,
	Johannes Sixt <johannes.sixt@telecom.at>,
	Johan Herland <johan@herland.net>
Subject: [PATCH/RFC v3 1/6] reflog: refactor parsing and checking
Date: Sat, 17 Jan 2009 04:30:06 +0100	[thread overview]
Message-ID: <1232163011-20088-2-git-send-email-trast@student.ethz.ch> (raw)
In-Reply-To: <1232163011-20088-1-git-send-email-trast@student.ethz.ch>
In-Reply-To: <7v8wpcs38c.fsf@gitster.siamese.dyndns.org>

read_ref_at() and for_each_reflog_ent() both had parsing and error
checking routines.  Refactor into a separate function that fully
parses a single entry.  Note that this switches for_each_reflog_ent()
from silently ignoring errors to die().

Signed-off-by: Thomas Rast <trast@student.ethz.ch>
---
 refs.c |  197 ++++++++++++++++++++++++++++++++--------------------------------
 1 files changed, 98 insertions(+), 99 deletions(-)

diff --git a/refs.c b/refs.c
index 33ced65..4571fac 100644
--- a/refs.c
+++ b/refs.c
@@ -1337,24 +1337,68 @@ int create_symref(const char *ref_target, const char *refs_heads_master,
 	return 0;
 }
 
-static char *ref_msg(const char *line, const char *endp)
+/*
+ * Check and parse a single reflog line.  Assumes that there is only
+ * one newline in the range buf[0]..buf[len-1] (but does check that it
+ * is at buf[len-1]).
+ */
+static void parse_reflog_line(const char *buf, int len,
+			      unsigned char *osha1, unsigned char *nsha1,
+			      char **email,
+			      unsigned long *timestamp, int *tz,
+			      char **message,
+			      const char *logname)
 {
-	const char *ep;
-	line += 82;
-	ep = memchr(line, '\n', endp - line);
-	if (!ep)
-		ep = endp;
-	return xmemdupz(line, ep - line);
+	static char *retbuf = NULL;
+	static int retbufsz = 0;
+	char *tzstr, *email_end;
+
+	if (len < 83 || buf[len-1] != '\n')
+		die("Log %s is corrupt (entry too short or unterminated).", logname);
+
+	if (get_sha1_hex(buf, osha1) || buf[40] != ' ')
+		die("Log %s is corrupt (malformed old sha1).", logname);
+
+	if (get_sha1_hex(buf + 41, nsha1) || buf[81] != ' ')
+		die("Log %s is corrupt (malformed new sha1).", logname);
+
+	ALLOC_GROW(retbuf, len-82+1, retbufsz);
+	memcpy(retbuf, buf+82, len-82);
+	retbuf[len-82] = '\0';
+
+	email_end = strchr(retbuf, '>');
+	if (!email_end || email_end[1] != ' ')
+		die("Log %s is corrupt (malformed email field).", logname);
+
+	*email = retbuf;
+	email_end[1] = '\0';
+
+	*timestamp = strtoul(email_end + 2, &tzstr, 10);
+	if (!(*timestamp) || !tzstr || tzstr[0] != ' ' ||
+	    (tzstr[1] != '+' && tzstr[1] != '-') ||
+	    !isdigit(tzstr[2]) || !isdigit(tzstr[3]) ||
+	    !isdigit(tzstr[4]) || !isdigit(tzstr[5]))
+		die("Log %s is corrupt (malformed timezone).", logname);
+	if (!(tzstr[6] == '\t' || tzstr[6] == '\n'))
+		die("Log %s is corrupt (bad message field separator).", logname);
+	*tz = strtoul(tzstr, NULL, 10);
+
+	if (tzstr[6] == '\t')
+		*message = tzstr+7;
+	else
+		*message = tzstr+6;
 }
 
 int read_ref_at(const char *ref, unsigned long at_time, int cnt, unsigned char *sha1, char **msg, unsigned long *cutoff_time, int *cutoff_tz, int *cutoff_cnt)
 {
-	const char *logfile, *logdata, *logend, *rec, *lastgt, *lastrec;
-	char *tz_c;
+	const char *logfile, *logdata, *logend, *rec, *start;
+	char *email, *message;
 	int logfd, tz, reccnt = 0;
 	struct stat st;
 	unsigned long date;
-	unsigned char logged_sha1[20];
+	unsigned char new_sha1[20];
+	unsigned char old_sha1[20];
+	unsigned char next_sha1[20];
 	void *log_mapped;
 	size_t mapsz;
 
@@ -1370,86 +1414,55 @@ int read_ref_at(const char *ref, unsigned long at_time, int cnt, unsigned char *
 	logdata = log_mapped;
 	close(logfd);
 
-	lastrec = NULL;
 	rec = logend = logdata + st.st_size;
+	if (logdata < rec && *(rec-1) == '\n')
+		rec--;
 	while (logdata < rec) {
-		reccnt++;
-		if (logdata < rec && *(rec-1) == '\n')
-			rec--;
-		lastgt = NULL;
-		while (logdata < rec && *(rec-1) != '\n') {
-			rec--;
-			if (*rec == '>')
-				lastgt = rec;
-		}
-		if (!lastgt)
-			die("Log %s is corrupt.", logfile);
-		date = strtoul(lastgt + 1, &tz_c, 10);
+		start = memrchr(logdata, '\n', rec-logdata);
+		if (start)
+			start++;
+		else
+			start = logdata;
+		parse_reflog_line(start, rec-start+1,
+				  old_sha1, new_sha1,
+				  &email, &date, &tz, &message,
+				  logfile);
+
+		if (cutoff_time)
+			*cutoff_time = date;
+		if (cutoff_tz)
+			*cutoff_tz = tz;
+		if (cutoff_cnt)
+			*cutoff_cnt = reccnt;
+		if (msg)
+			*msg = message;
+
 		if (date <= at_time || cnt == 0) {
-			tz = strtoul(tz_c, NULL, 10);
-			if (msg)
-				*msg = ref_msg(rec, logend);
-			if (cutoff_time)
-				*cutoff_time = date;
-			if (cutoff_tz)
-				*cutoff_tz = tz;
-			if (cutoff_cnt)
-				*cutoff_cnt = reccnt - 1;
-			if (lastrec) {
-				if (get_sha1_hex(lastrec, logged_sha1))
-					die("Log %s is corrupt.", logfile);
-				if (get_sha1_hex(rec + 41, sha1))
-					die("Log %s is corrupt.", logfile);
-				if (hashcmp(logged_sha1, sha1)) {
-					fprintf(stderr,
-						"warning: Log %s has gap after %s.\n",
-						logfile, show_date(date, tz, DATE_RFC2822));
-				}
-			}
-			else if (date == at_time) {
-				if (get_sha1_hex(rec + 41, sha1))
-					die("Log %s is corrupt.", logfile);
-			}
-			else {
-				if (get_sha1_hex(rec + 41, logged_sha1))
-					die("Log %s is corrupt.", logfile);
-				if (hashcmp(logged_sha1, sha1)) {
-					fprintf(stderr,
-						"warning: Log %s unexpectedly ended on %s.\n",
-						logfile, show_date(date, tz, DATE_RFC2822));
-				}
-			}
+			if (reccnt && hashcmp(new_sha1, next_sha1))
+				fprintf(stderr,
+					"warning: Log %s has gap after %s.\n",
+					logfile, show_date(date, tz, DATE_RFC2822));
+			if (!reccnt && date < at_time && hashcmp(new_sha1, next_sha1))
+				fprintf(stderr,
+					"warning: Log %s unexpectedly ended on %s.\n",
+					logfile, show_date(date, tz, DATE_RFC2822));
+				/* leave caller's sha1 untouched */
+			else
+				hashcpy(sha1, new_sha1);
 			munmap(log_mapped, mapsz);
 			return 0;
 		}
-		lastrec = rec;
+
+		hashcpy(next_sha1, old_sha1);
+		rec = start-1;
 		if (cnt > 0)
 			cnt--;
+		reccnt++;
 	}
 
-	rec = logdata;
-	while (rec < logend && *rec != '>' && *rec != '\n')
-		rec++;
-	if (rec == logend || *rec == '\n')
-		die("Log %s is corrupt.", logfile);
-	date = strtoul(rec + 1, &tz_c, 10);
-	tz = strtoul(tz_c, NULL, 10);
-	if (get_sha1_hex(logdata, sha1))
-		die("Log %s is corrupt.", logfile);
-	if (is_null_sha1(sha1)) {
-		if (get_sha1_hex(logdata + 41, sha1))
-			die("Log %s is corrupt.", logfile);
-	}
-	if (msg)
-		*msg = ref_msg(logdata, logend);
-	munmap(log_mapped, mapsz);
+	hashcpy(sha1, new_sha1);
 
-	if (cutoff_time)
-		*cutoff_time = date;
-	if (cutoff_tz)
-		*cutoff_tz = tz;
-	if (cutoff_cnt)
-		*cutoff_cnt = reccnt;
+	munmap(log_mapped, mapsz);
 	return 1;
 }
 
@@ -1466,30 +1479,16 @@ int for_each_reflog_ent(const char *ref, each_reflog_ent_fn fn, void *cb_data)
 		return -1;
 	while (fgets(buf, sizeof(buf), logfp)) {
 		unsigned char osha1[20], nsha1[20];
-		char *email_end, *message;
+		char *email, *message;
 		unsigned long timestamp;
 		int len, tz;
 
-		/* old SP new SP name <email> SP time TAB msg LF */
 		len = strlen(buf);
-		if (len < 83 || buf[len-1] != '\n' ||
-		    get_sha1_hex(buf, osha1) || buf[40] != ' ' ||
-		    get_sha1_hex(buf + 41, nsha1) || buf[81] != ' ' ||
-		    !(email_end = strchr(buf + 82, '>')) ||
-		    email_end[1] != ' ' ||
-		    !(timestamp = strtoul(email_end + 2, &message, 10)) ||
-		    !message || message[0] != ' ' ||
-		    (message[1] != '+' && message[1] != '-') ||
-		    !isdigit(message[2]) || !isdigit(message[3]) ||
-		    !isdigit(message[4]) || !isdigit(message[5]))
-			continue; /* corrupt? */
-		email_end[1] = '\0';
-		tz = strtol(message + 1, NULL, 10);
-		if (message[6] != '\t')
-			message += 6;
-		else
-			message += 7;
-		ret = fn(osha1, nsha1, buf+82, timestamp, tz, message, cb_data);
+		parse_reflog_line(buf, len,
+				  osha1, nsha1,
+				  &email, &timestamp, &tz, &message,
+				  logfile);
+		ret = fn(osha1, nsha1, email, timestamp, tz, message, cb_data);
 		if (ret)
 			break;
 	}
-- 
1.6.1.315.g92577

  parent reply	other threads:[~2009-01-17  3:32 UTC|newest]

Thread overview: 102+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-01-15  0:06 [PATCH] checkout: implement "-" shortcut name for last branch Thomas Rast
2009-01-15  0:12 ` [PATCH v2] " Thomas Rast
2009-01-15  7:27   ` Johannes Sixt
2009-01-15 13:15     ` Johannes Schindelin
2009-01-15 13:59       ` Thomas Rast
2009-01-15 14:09         ` Johannes Schindelin
2009-01-15 14:17           ` Johannes Schindelin
2009-01-15 20:12             ` Junio C Hamano
2009-01-15 20:35               ` Johannes Schindelin
2009-01-16 12:52               ` [PATCH] revision walker: include a detached HEAD in --all Johannes Schindelin
2009-01-16 13:12                 ` Santi Béjar
2009-01-16 13:17                   ` Johannes Schindelin
2009-01-16 13:22                     ` David Kastrup
2009-01-16 13:46                     ` Santi Béjar
2009-01-16 13:50                       ` Santi Béjar
2009-01-18  6:01                 ` Junio C Hamano
2009-01-18  6:36                   ` Junio C Hamano
2009-01-18 14:06                     ` Johannes Schindelin
2009-01-18 13:42                   ` Johannes Schindelin
2009-01-15 16:32       ` [PATCH v2] checkout: implement "-" shortcut name for last branch Johan Herland
2009-01-15 16:50         ` Johannes Schindelin
2009-01-15 20:11   ` Junio C Hamano
2009-01-15 20:50     ` Junio C Hamano
2009-01-17  3:30       ` [PATCH/RFC v3 0/6] N-th last checked out branch Thomas Rast
2009-01-17  5:52         ` Johannes Schindelin
2009-01-17 13:38           ` Thomas Rast
2009-01-17 13:40             ` [PATCH/RFC v3bis 1/2] sha1_name: implement @{-N} syntax for N-th last checked out Thomas Rast
2009-01-17 13:40               ` [PATCH/RFC v3bis 2/2] checkout: implement '@{-N}' and '-' special abbreviations Thomas Rast
2009-01-17 15:04             ` [PATCH] interpret_nth_last_branch(): avoid traversing the reflogs twice Johannes Schindelin
2009-01-17 16:09               ` [PATCH/RFC v4 0/5] N-th last checked out branch Thomas Rast
2009-01-17 16:09                 ` [PATCH/RFC v4 1/5] checkout: implement "@{-N}" shortcut name for N-th last branch Thomas Rast
2009-01-17 16:09                   ` [PATCH/RFC v4 2/5] sha1_name: tweak @{-N} lookup Thomas Rast
2009-01-17 16:09                     ` [PATCH/RFC v4 3/5] sha1_name: support @{-N} syntax in get_sha1() Thomas Rast
2009-01-17 16:09                       ` [PATCH/RFC v4 4/5] checkout: implement "-" abbreviation, add docs and tests Thomas Rast
2009-01-17 16:09                         ` [PATCH/RFC v4 5/5] interpret_nth_last_branch(): avoid traversing the reflogs twice Thomas Rast
2009-01-17 18:08                           ` [PATCH 6/5] Fix parsing of @{-1}@{1} Johannes Schindelin
2009-01-17 20:02                             ` Junio C Hamano
2009-01-17 21:22                               ` Johannes Schindelin
2009-01-17 19:57                         ` [PATCH/RFC v4 4/5] checkout: implement "-" abbreviation, add docs and tests Junio C Hamano
2009-01-17 17:55                       ` [PATCH/RFC v4 3/5] sha1_name: support @{-N} syntax in get_sha1() Johannes Schindelin
2009-01-17 19:37                       ` Junio C Hamano
2009-01-18  0:54                     ` [PATCH/RFC v4 2/5] sha1_name: tweak @{-N} lookup Junio C Hamano
2009-01-17 16:49                 ` [PATCH/RFC v4 0/5] N-th last checked out branch Johannes Schindelin
2009-01-17 19:13               ` [PATCH] interpret_nth_last_branch(): avoid traversing the reflogs twice Junio C Hamano
2009-01-17 19:29                 ` Johannes Schindelin
2009-01-18  0:43                   ` Junio C Hamano
2009-01-18  1:12                     ` Johannes Schindelin
2009-01-18  7:25                       ` Junio C Hamano
2009-01-18 20:59                         ` Johannes Schindelin
2009-01-19  8:08                           ` Junio C Hamano
2009-01-19  8:19                             ` Junio C Hamano
2009-01-19 12:33                               ` Johannes Schindelin
2009-01-20  0:11                                 ` Thomas Rast
2009-01-20  0:23                                   ` Johannes Schindelin
2009-01-20  0:41                                     ` Thomas Rast
2009-01-20  6:21                                 ` [PATCH] interpret_nth_last_branch(): plug small memleak Junio C Hamano
2009-01-20 10:15                                   ` Johannes Schindelin
2009-01-20  6:22                                 ` [PATCH] Introduce for_each_recent_reflog_ent() Junio C Hamano
2009-01-20 10:15                                   ` Johannes Schindelin
2009-01-20  8:35                                 ` [PATCH] interpret_nth_last_branch(): avoid traversing the reflogs twice Junio C Hamano
2009-01-21  0:16                                 ` [VALGRIND PATCH for nth_last patch series] Fix invalid memory access Johannes Schindelin
2009-01-21  8:45                                   ` Junio C Hamano
2009-01-21  9:18                                     ` Thomas Rast
2009-01-21 10:13                                       ` Junio C Hamano
2009-01-21 12:06                                         ` Johannes Schindelin
2009-01-24 22:21                                       ` Thomas Rast
2009-01-24 22:23                                         ` [PATCH next] t1505: remove debugging cruft Thomas Rast
2009-01-25 20:35                                           ` Junio C Hamano
2009-01-21 11:56                                     ` [VALGRIND PATCH for nth_last patch series] Fix invalid memory access Johannes Schindelin
2009-01-19 12:41                               ` [PATCH] @{-<n>}: avoid crash with corrupt reflog Johannes Schindelin
2009-01-19 14:57                                 ` Johannes Schindelin
2009-01-17  3:30       ` Thomas Rast [this message]
2009-01-17  5:35         ` [PATCH/RFC v3 1/6] reflog: refactor parsing and checking Johannes Schindelin
2009-01-17  3:30       ` [PATCH/RFC v3 2/6] reflog: refactor log open+mmap Thomas Rast
2009-01-17  5:40         ` Johannes Schindelin
2009-01-17  3:30       ` [PATCH/RFC v3 3/6] reflog: make for_each_reflog_ent use mmap Thomas Rast
2009-01-17  3:30       ` [PATCH/RFC v3 4/6] reflog: add backwards iterator Thomas Rast
2009-01-17  3:30       ` [PATCH/RFC v3 5/6] sha1_name: implement @{-N} syntax for N-th last checked out Thomas Rast
2009-01-17  3:30       ` [PATCH/RFC v3 6/6] checkout: implement '@{-N}' and '-' special abbreviations Thomas Rast
2009-01-16 12:31     ` [PATCH v2] checkout: implement "-" shortcut name for last branch Johannes Schindelin
2009-01-15  0:45 ` [PATCH] " Johannes Schindelin
2009-01-15 14:01   ` Thomas Rast
2009-01-15 14:14     ` Johannes Schindelin
2009-01-15 17:05       ` Thomas Rast
2009-01-15 18:34         ` Johannes Schindelin
2009-01-16  9:08       ` Thomas Rast
2009-01-16 11:18         ` Johannes Schindelin
2009-01-18  1:38           ` [TOY PATCH] git-resurrect: find traces of a branch name and resurrect it Thomas Rast
2009-01-18 16:19             ` Johannes Schindelin
2009-01-20  9:01               ` Thomas Rast
2009-01-20 16:57                 ` Boyd Stephen Smith Jr.
2009-01-20 20:50                   ` Boyd Stephen Smith Jr.
2009-01-23 20:03                     ` [PATCH] contrib " Thomas Rast
2009-01-23 21:00                       ` Boyd Stephen Smith Jr.
2009-01-26 11:54                         ` Thomas Rast
2009-01-26 12:40                           ` [PATCH v2] " Thomas Rast
2009-01-27  6:31                             ` Junio C Hamano
2009-01-30 22:52                               ` Thomas Rast
2009-02-01 21:34                               ` [PATCH v3] " Thomas Rast
2009-02-02  2:31                                 ` Junio C Hamano
2009-02-04 10:04                                   ` [PATCH v4] " Thomas Rast
2009-02-05  8:38                                     ` Junio C Hamano

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1232163011-20088-2-git-send-email-trast@student.ethz.ch \
    --to=trast@student.ethz.ch \
    --cc=git@vger.kernel.org \
    --cc=johan@herland.net \
    --cc=johannes.schindelin@gmx.de \
    --cc=johannes.sixt@telecom.at \
    --cc=junio@pobox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).