[PATCH/RFC] git-mailinfo: use strbuf's instead of fixed buffers

All of lore.kernel.org
 help / color / mirror / Atom feed

From: "Lukas Sandström" <lukass@etek.chalmers.se>
To: Junio C Hamano <gitster@pobox.com>
Cc: "Lukas Sandström" <lukass@etek.chalmers.se>,
	"Git Mailing List" <git@vger.kernel.org>
Subject: [PATCH/RFC] git-mailinfo: use strbuf's instead of fixed buffers
Date: Fri, 11 Jul 2008 01:43:13 +0200	[thread overview]
Message-ID: <48769E91.60205@etek.chalmers.se> (raw)
In-Reply-To: <48769E40.8030303@etek.chalmers.se>

Signed-off-by: Lukas Sandström <lukass@etek.chalmers.se>
---
 builtin-mailinfo.c |  705 +++++++++++++++++++++++++---------------------------
 1 files changed, 333 insertions(+), 372 deletions(-)

diff --git a/builtin-mailinfo.c b/builtin-mailinfo.c
index 2d1520f..254a97c 100644
--- a/builtin-mailinfo.c
+++ b/builtin-mailinfo.c
@@ -5,14 +5,15 @@
 #include "cache.h"
 #include "builtin.h"
 #include "utf8.h"
+#include "strbuf.h"
 
 static FILE *cmitmsg, *patchfile, *fin, *fout;
 
 static int keep_subject;
 static const char *metainfo_charset;
-static char line[1000];
-static char name[1000];
-static char email[1000];
+static struct strbuf line = STRBUF_INIT;
+static struct strbuf name = STRBUF_INIT;
+static struct strbuf email = STRBUF_INIT;
 
 static enum  {
 	TE_DONTCARE, TE_QP, TE_BASE64,
@@ -21,74 +22,74 @@ static enum  {
 	TYPE_TEXT, TYPE_OTHER,
 } message_type;
 
-static char charset[256];
+static struct strbuf charset = STRBUF_INIT;
 static int patch_lines;
-static char **p_hdr_data, **s_hdr_data;
+static struct strbuf **p_hdr_data, **s_hdr_data;
 
 #define MAX_HDR_PARSED 10
 #define MAX_BOUNDARIES 5
 
-static char *sanity_check(char *name, char *email)
+static void sanity_check(struct strbuf *out, struct strbuf *name, struct strbuf *email)
 {
-	int len = strlen(name);
-	if (len < 3 || len > 60)
-		return email;
-	if (strchr(name, '@') || strchr(name, '<') || strchr(name, '>'))
-		return email;
-	return name;
+	struct strbuf o = STRBUF_INIT;
+	if (name->len < 3 || name->len > 60)
+		strbuf_addbuf(&o, email);
+	if (strchr(name->buf, '@') || strchr(name->buf, '<') ||
+		strchr(name->buf, '>'))
+		strbuf_addbuf(&o, email);
+	strbuf_addbuf(&o, name);
+	strbuf_reset(out);
+	strbuf_addbuf(out, &o);
+	strbuf_release(&o);
 }
 
-static int bogus_from(char *line)
+static int bogus_from(const struct strbuf *line)
 {
 	/* John Doe <johndoe> */
-	char *bra, *ket, *dst, *cp;
 
+	char *bra, *ket;
 	/* This is fallback, so do not bother if we already have an
 	 * e-mail address.
 	 */
-	if (*email)
+	if (email.len)
 		return 0;
 
-	bra = strchr(line, '<');
+	bra = strchr(line->buf, '<');
 	if (!bra)
 		return 0;
 	ket = strchr(bra, '>');
 	if (!ket)
 		return 0;
 
-	for (dst = email, cp = bra+1; cp < ket; )
-		*dst++ = *cp++;
-	*dst = 0;
-	for (cp = line; isspace(*cp); cp++)
-		;
-	for (bra--; isspace(*bra); bra--)
-		*bra = 0;
-	cp = sanity_check(cp, email);
-	strcpy(name, cp);
+	strbuf_reset(&email);
+	strbuf_add(&email, bra + 1, ket - bra - 1);
+
+	strbuf_reset(&name);
+	strbuf_add(&name, line->buf, bra - line->buf);
+	strbuf_trim(&name);
+	sanity_check(&name, &name, &email);
 	return 1;
 }
 
-static int handle_from(char *in_line)
+static int handle_from(struct strbuf *from)
 {
-	char line[1000];
 	char *at;
-	char *dst;
+	size_t el;
 
-	strcpy(line, in_line);
-	at = strchr(line, '@');
+	at = strchr(from->buf, '@');
 	if (!at)
-		return bogus_from(line);
+		return bogus_from(from);
 
 	/*
 	 * If we already have one email, don't take any confusing lines
 	 */
-	if (*email && strchr(at+1, '@'))
+	if (email.len && strchr(at + 1, '@'))
 		return 0;
 
 	/* Pick up the string around '@', possibly delimited with <>
-	 * pair; that is the email part.  White them out while copying.
+	 * pair; that is the email part.
 	 */
-	while (at > line) {
+	while (at > from->buf) {
 		char c = at[-1];
 		if (isspace(c))
 			break;
@@ -98,56 +99,35 @@ static int handle_from(char *in_line)
 		}
 		at--;
 	}
-	dst = email;
-	for (;;) {
-		unsigned char c = *at;
-		if (!c || c == '>' || isspace(c)) {
-			if (c == '>')
-				*at = ' ';
-			break;
-		}
-		*at++ = ' ';
-		*dst++ = c;
-	}
-	*dst++ = 0;
-
+	el = strcspn(at, " \n\t\r\v\f>");
+	strbuf_reset(&email);
+	strbuf_add(&email, at, el);
+	strbuf_remove(from, at - from->buf, el + 1);
 	/* The remainder is name.  It could be "John Doe <john.doe@xz>"
 	 * or "john.doe@xz (John Doe)", but we have whited out the
 	 * email part, so trim from both ends, possibly removing
 	 * the () pair at the end.
 	 */
-	at = line + strlen(line);
-	while (at > line) {
-		unsigned char c = *--at;
-		if (!isspace(c)) {
-			at[(c == ')') ? 0 : 1] = 0;
-			break;
-		}
-	}
 
-	at = line;
-	for (;;) {
-		unsigned char c = *at;
-		if (!c || !isspace(c)) {
-			if (c == '(')
-				at++;
-			break;
-		}
-		at++;
-	}
-	at = sanity_check(at, email);
-	strcpy(name, at);
+	strbuf_trim(from);
+	if (*from->buf == '(')
+		strbuf_remove(&name, 0, 1);
+	if (*(from->buf + from->len - 1) == ')')
+		strbuf_setlen(from, from->len - 1);
+
+	sanity_check(&name, from, &email);
 	return 1;
 }
 
-static int handle_header(char *line, char *data, int ofs)
+static void handle_header(struct strbuf **out, const struct strbuf *line)
 {
-	if (!line || !data)
-		return 1;
-
-	strcpy(data, line+ofs);
+	if (!*out) {
+		*out = xmalloc(sizeof(struct strbuf));
+		strbuf_init(*out, line->len);
+	} else
+		strbuf_reset(*out);
 
-	return 0;
+	strbuf_addbuf(*out, (struct strbuf *)line); /* const warning */
 }
 
 /* NOTE NOTE NOTE.  We do not claim we do full MIME.  We just attempt
@@ -156,13 +136,13 @@ static int handle_header(char *line, char *data, int ofs)
  * case insensitively.
  */
 
-static int slurp_attr(const char *line, const char *name, char *attr)
+static int slurp_attr(const char *line, const char *name, struct strbuf *attr)
 {
 	const char *ends, *ap = strcasestr(line, name);
 	size_t sz;
 
 	if (!ap) {
-		*attr = 0;
+		strbuf_setlen(attr, 0);
 		return 0;
 	}
 	ap += strlen(name);
@@ -173,180 +153,176 @@ static int slurp_attr(const char *line, const char *name, char *attr)
 	else
 		ends = "; \t";
 	sz = strcspn(ap, ends);
-	memcpy(attr, ap, sz);
-	attr[sz] = 0;
+	strbuf_add(attr, ap, sz);
 	return 1;
 }
 
 struct content_type {
-	char *boundary;
-	int boundary_len;
+	struct strbuf *boundary;
 };
 
 static struct content_type content[MAX_BOUNDARIES];
 
 static struct content_type *content_top = content;
 
-static int handle_content_type(char *line)
+static int handle_content_type(struct strbuf *line)
 {
-	char boundary[256];
+	struct strbuf *boundary = xmalloc(sizeof(struct strbuf));
+	strbuf_init(boundary, line->len);
 
-	if (strcasestr(line, "text/") == NULL)
+	if (!strcasestr(line->buf, "text/"))
 		 message_type = TYPE_OTHER;
-	if (slurp_attr(line, "boundary=", boundary + 2)) {
-		memcpy(boundary, "--", 2);
+	if (slurp_attr(line->buf, "boundary=", boundary)) {
+		strbuf_insert(boundary, 0, "--", 2);
 		if (content_top++ >= &content[MAX_BOUNDARIES]) {
 			fprintf(stderr, "Too many boundaries to handle\n");
 			exit(1);
 		}
-		content_top->boundary_len = strlen(boundary);
-		content_top->boundary = xmalloc(content_top->boundary_len+1);
-		strcpy(content_top->boundary, boundary);
-	}
-	if (slurp_attr(line, "charset=", charset)) {
-		int i, c;
-		for (i = 0; (c = charset[i]) != 0; i++)
-			charset[i] = tolower(c);
+		content_top->boundary = boundary;
+	} else {
+		strbuf_release(boundary);
+		free(boundary);
 	}
+	if (slurp_attr(line->buf, "charset=", &charset))
+		strbuf_tolower(&charset);
 	return 0;
 }
 
-static int handle_content_transfer_encoding(char *line)
+static int handle_content_transfer_encoding(struct strbuf *line)
 {
-	if (strcasestr(line, "base64"))
+	if (strcasestr(line->buf, "base64"))
 		transfer_encoding = TE_BASE64;
-	else if (strcasestr(line, "quoted-printable"))
+	else if (strcasestr(line->buf, "quoted-printable"))
 		transfer_encoding = TE_QP;
 	else
 		transfer_encoding = TE_DONTCARE;
 	return 0;
 }
 
-static int is_multipart_boundary(const char *line)
-{
-	return (!memcmp(line, content_top->boundary, content_top->boundary_len));
-}
-
-static int eatspace(char *line)
+static int is_multipart_boundary(struct strbuf *line)
 {
-	int len = strlen(line);
-	while (len > 0 && isspace(line[len-1]))
-		line[--len] = 0;
-	return len;
+	return !strbuf_cmp(line, content_top->boundary);
 }
 
-static char *cleanup_subject(char *subject)
+static void cleanup_subject(struct strbuf *subject)
 {
-	for (;;) {
-		char *p;
-		int len, remove;
-		switch (*subject) {
+	char *pos;
+	size_t remove;
+	while (subject->len) {
+		switch (*subject->buf) {
 		case 'r': case 'R':
-			if (!memcmp("e:", subject+1, 2)) {
-				subject += 3;
+			if (subject->len <= 3)
+				break;
+			if (!memcmp(subject->buf + 1, "e:", 2)) {
+				strbuf_remove(subject, 0, 3);
 				continue;
 			}
 			break;
 		case ' ': case '\t': case ':':
-			subject++;
+			strbuf_remove(subject, 0, 1);
 			continue;
-
+			break;
 		case '[':
-			p = strchr(subject, ']');
-			if (!p) {
-				subject++;
-				continue;
-			}
-			len = strlen(p);
-			remove = p - subject;
-			if (remove <= len *2) {
-				subject = p+1;
-				continue;
-			}
+			if ((pos = strchr(subject->buf, ']'))) {
+				remove = pos - subject->buf + 1;
+				/* Don't remove too much. */
+				if (remove <= (subject->len - remove + 1) * 2) {
+					strbuf_remove(subject, 0, remove);
+					continue;
+				}
+			} else
+				strbuf_remove(subject, 0, 1);
 			break;
 		}
-		eatspace(subject);
-		return subject;
+		strbuf_trim(subject);
+		return;
 	}
 }
 
-static void cleanup_space(char *buf)
+static void cleanup_space(struct strbuf *sb)
 {
-	unsigned char c;
-	while ((c = *buf) != 0) {
-		buf++;
-		if (isspace(c)) {
-			buf[-1] = ' ';
-			c = *buf;
-			while (isspace(c)) {
-				int len = strlen(buf);
-				memmove(buf, buf+1, len);
-				c = *buf;
-			}
+	size_t pos, cnt;
+	for (pos = 0; pos < sb->len; pos++) {
+		if (isspace(sb->buf[pos])) {
+			sb->buf[pos] = ' ';
+			for (cnt = 0; isspace(sb->buf[pos + cnt + 1]); cnt++);
+			strbuf_remove(sb, pos + 1, cnt);
 		}
 	}
 }
 
-static void decode_header(char *it, unsigned itsize);
+static void decode_header(struct strbuf *line);
 static const char *header[MAX_HDR_PARSED] = {
 	"From","Subject","Date",
 };
 
-static int check_header(char *line, unsigned linesize, char **hdr_data, int overwrite)
+static int cmp_header(const struct strbuf *line, const char *hdr)
 {
-	int i;
+	int len = strlen(hdr);
+	return !strncasecmp(line->buf, hdr, len) && line->len > len &&
+			line->buf[len] == ':' && isspace(line->buf[len + 1]);
+}
 
+static int check_header(const struct strbuf *line, struct strbuf *hdr_data[], int overwrite)
+{
+	int i, ret = 0, len;
+	struct strbuf sb = STRBUF_INIT;
 	/* search for the interesting parts */
 	for (i = 0; header[i]; i++) {
 		int len = strlen(header[i]);
-		if ((!hdr_data[i] || overwrite) &&
-		    !strncasecmp(line, header[i], len) &&
-		    line[len] == ':' && isspace(line[len + 1])) {
+		if ((!hdr_data[i] || overwrite) && cmp_header(line, header[i])) {
 			/* Unwrap inline B and Q encoding, and optionally
 			 * normalize the meta information to utf8.
 			 */
-			decode_header(line + len + 2, linesize - len - 2);
-			hdr_data[i] = xmalloc(1000 * sizeof(char));
-			if (! handle_header(line, hdr_data[i], len + 2)) {
-				return 1;
-			}
+			strbuf_add(&sb, line->buf + len + 2, line->len - len -2);
+			decode_header(&sb);
+			handle_header(&hdr_data[i], &sb);
+			ret = 1;
+			goto check_header_out;
 		}
 	}
 
 	/* Content stuff */
-	if (!strncasecmp(line, "Content-Type", 12) &&
-		line[12] == ':' && isspace(line[12 + 1])) {
-		decode_header(line + 12 + 2, linesize - 12 - 2);
-		if (! handle_content_type(line)) {
-			return 1;
-		}
-	}
-	if (!strncasecmp(line, "Content-Transfer-Encoding", 25) &&
-		line[25] == ':' && isspace(line[25 + 1])) {
-		decode_header(line + 25 + 2, linesize - 25 - 2);
-		if (! handle_content_transfer_encoding(line)) {
-			return 1;
-		}
+	if (cmp_header(line, "Content-Type")) {
+		len = strlen("Content-Type: ");
+		strbuf_reset(&sb);
+		strbuf_add(&sb, line->buf + len, line->len - len);
+		decode_header(&sb);
+		strbuf_insert(&sb, 0, "Content-Type: ", len);
+		if (!handle_content_type(&sb))
+			ret = 1;
+			goto check_header_out;
+	}
+	if (cmp_header(line, "Content-Transfer-Encoding")) {
+		len = strlen("Content-Transfer-Encoding: ");
+		strbuf_reset(&sb);
+		strbuf_add(&sb, line->buf + len, line->len - len);
+		decode_header(&sb);
+		if (!handle_content_transfer_encoding(&sb))
+			ret = 1;
+			goto check_header_out;
 	}
 
 	/* for inbody stuff */
-	if (!memcmp(">From", line, 5) && isspace(line[5]))
-		return 1;
-	if (!memcmp("[PATCH]", line, 7) && isspace(line[7])) {
+	if (!prefixcmp(line->buf, ">From") && isspace(line->buf[5]))
+		ret = 1;
+		goto check_header_out;
+	if (!prefixcmp(line->buf, "[PATCH]") && isspace(line->buf[7])) {
 		for (i = 0; header[i]; i++) {
 			if (!memcmp("Subject", header[i], 7)) {
-				if (! handle_header(line, hdr_data[i], 0)) {
-					return 1;
-				}
+				handle_header(&hdr_data[i], line);
+				ret = 1;
+				goto check_header_out;
 			}
 		}
 	}
 
-	/* no match */
-	return 0;
+check_header_out:
+	strbuf_release(&sb);
+	return ret;
 }
 
-static int is_rfc2822_header(char *line)
+static int is_rfc2822_header(const struct strbuf *line)
 {
 	/*
 	 * The section that defines the loosest possible
@@ -357,15 +333,15 @@ static int is_rfc2822_header(char *line)
 	 * ftext = %d33-57 / %59-126
 	 */
 	int ch;
-	char *cp = line;
+	char *cp = line->buf;
 
 	/* Count mbox From headers as headers */
-	if (!memcmp(line, "From ", 5) || !memcmp(line, ">From ", 6))
+	if (line->len >= 6 && (!memcmp(cp, "From ", 5) || !memcmp(cp, ">From ", 6)))
 		return 1;
 
 	while ((ch = *cp++)) {
 		if (ch == ':')
-			return cp != line;
+			return 1;
 		if ((33 <= ch && ch <= 57) ||
 		    (59 <= ch && ch <= 126))
 			continue;
@@ -374,34 +350,20 @@ static int is_rfc2822_header(char *line)
 	return 0;
 }
 
-/*
- * sz is size of 'line' buffer in bytes.  Must be reasonably
- * long enough to hold one physical real-world e-mail line.
- */
-static int read_one_header_line(char *line, int sz, FILE *in)
+static int read_one_header_line(struct strbuf *line, FILE *in)
 {
-	int len;
-
-	/*
-	 * We will read at most (sz-1) bytes and then potentially
-	 * re-add NUL after it.  Accessing line[sz] after this is safe
-	 * and we can allow len to grow up to and including sz.
-	 */
-	sz--;
-
 	/* Get the first part of the line. */
-	if (!fgets(line, sz, in))
+	if (strbuf_getline(line, in, '\n'))
 		return 0;
 
 	/*
 	 * Is it an empty line or not a valid rfc2822 header?
 	 * If so, stop here, and return false ("not a header")
 	 */
-	len = eatspace(line);
-	if (!len || !is_rfc2822_header(line)) {
+	strbuf_rtrim(line);
+	if (!line->len || !is_rfc2822_header(line)) {
 		/* Re-add the newline */
-		line[len] = '\n';
-		line[len + 1] = '\0';
+		strbuf_addch(line, '\n');
 		return 0;
 	}
 
@@ -410,65 +372,53 @@ static int read_one_header_line(char *line, int sz, FILE *in)
 	 * Yuck, 2822 header "folding"
 	 */
 	for (;;) {
-		int peek, addlen;
-		static char continuation[1000];
+		int peek;
+		struct strbuf continuation = STRBUF_INIT;
 
 		peek = fgetc(in); ungetc(peek, in);
 		if (peek != ' ' && peek != '\t')
 			break;
-		if (!fgets(continuation, sizeof(continuation), in))
+		if (strbuf_getline(&continuation, in, '\n'))
 			break;
-		addlen = eatspace(continuation);
-		if (len < sz - 1) {
-			if (addlen >= sz - len)
-				addlen = sz - len - 1;
-			memcpy(line + len, continuation, addlen);
-			line[len] = '\n';
-			len += addlen;
-		}
+		continuation.buf[0] = '\n';
+		strbuf_rtrim(&continuation);
+		strbuf_addbuf(line, &continuation);
 	}
-	line[len] = 0;
 
 	return 1;
 }
 
-static int decode_q_segment(char *in, char *ot, unsigned otsize, char *ep, int rfc2047)
+static struct strbuf *decode_q_segment(const struct strbuf *q_seg, int rfc2047)
 {
-	char *otbegin = ot;
-	char *otend = ot + otsize;
+	const char *in = q_seg->buf;
 	int c;
-	while ((c = *in++) != 0 && (in <= ep)) {
-		if (ot == otend) {
-			*--ot = '\0';
-			return -1;
-		}
+	struct strbuf *out = xmalloc(sizeof(struct strbuf));
+	strbuf_init(out, q_seg->len);
+
+	while ((c = *in++) != 0) {
 		if (c == '=') {
 			int d = *in++;
 			if (d == '\n' || !d)
 				break; /* drop trailing newline */
-			*ot++ = ((hexval(d) << 4) | hexval(*in++));
+			strbuf_addch(out, (hexval(d) << 4) | hexval(*in++));
 			continue;
 		}
 		if (rfc2047 && c == '_') /* rfc2047 4.2 (2) */
 			c = 0x20;
-		*ot++ = c;
+		strbuf_addch(out, c);
 	}
-	*ot = 0;
-	return (ot - otbegin);
+	return out;
 }
 
-static int decode_b_segment(char *in, char *ot, unsigned otsize, char *ep)
+static struct strbuf *decode_b_segment(const struct strbuf *b_seg)
 {
 	/* Decode in..ep, possibly in-place to ot */
 	int c, pos = 0, acc = 0;
-	char *otbegin = ot;
-	char *otend = ot + otsize;
+	const char *in = b_seg->buf;
+	struct strbuf *out = xmalloc(sizeof(struct strbuf));
+	strbuf_init(out, b_seg->len);
 
-	while ((c = *in++) != 0 && (in <= ep)) {
-		if (ot == otend) {
-			*--ot = '\0';
-			return -1;
-		}
+	while ((c = *in++) != 0) {
 		if (c == '+')
 			c = 62;
 		else if (c == '/')
@@ -493,21 +443,20 @@ static int decode_b_segment(char *in, char *ot, unsigned otsize, char *ep)
 			acc = (c << 2);
 			break;
 		case 1:
-			*ot++ = (acc | (c >> 4));
+			strbuf_addch(out, (acc | (c >> 4)));
 			acc = (c & 15) << 4;
 			break;
 		case 2:
-			*ot++ = (acc | (c >> 2));
+			strbuf_addch(out, (acc | (c >> 2)));
 			acc = (c & 3) << 6;
 			break;
 		case 3:
-			*ot++ = (acc | c);
+			strbuf_addch(out, (acc | c));
 			acc = pos = 0;
 			break;
 		}
 	}
-	*ot = 0;
-	return (ot - otbegin);
+	return out;
 }
 
 /*
@@ -521,16 +470,16 @@ static int decode_b_segment(char *in, char *ot, unsigned otsize, char *ep)
  * Otherwise, we default to assuming it is Latin1 for historical
  * reasons.
  */
-static const char *guess_charset(const char *line, const char *target_charset)
+static const char *guess_charset(const struct strbuf *line, const char *target_charset)
 {
 	if (is_encoding_utf8(target_charset)) {
-		if (is_utf8(line))
+		if (is_utf8(line->buf))
 			return NULL;
 	}
 	return "latin1";
 }
 
-static void convert_to_utf8(char *line, unsigned linesize, const char *charset)
+static void convert_to_utf8(struct strbuf *line, const char *charset)
 {
 	char *out;
 
@@ -542,112 +491,119 @@ static void convert_to_utf8(char *line, unsigned linesize, const char *charset)
 
 	if (!strcmp(metainfo_charset, charset))
 		return;
-	out = reencode_string(line, metainfo_charset, charset);
+	out = reencode_string(line->buf, metainfo_charset, charset);
 	if (!out)
 		die("cannot convert from %s to %s\n",
 		    charset, metainfo_charset);
-	strlcpy(line, out, linesize);
-	free(out);
+	strbuf_attach(line, out, strlen(out), strlen(out));
 }
 
-static int decode_header_bq(char *it, unsigned itsize)
+static int decode_header_bq(struct strbuf *it)
 {
 	char *in, *out, *ep, *cp, *sp;
-	char outbuf[1000];
+	struct strbuf outbuf = STRBUF_INIT, *dec;
+	struct strbuf charset_q = STRBUF_INIT, piecebuf = STRBUF_INIT;
 	int rfc2047 = 0;
 
-	in = it;
-	out = outbuf;
-	while ((ep = strstr(in, "=?")) != NULL) {
-		int sz, encoding;
-		char charset_q[256], piecebuf[256];
+	in = it->buf;
+	while (in - it->buf <= it->len && (ep = strstr(in, "=?")) != NULL) {
+		int encoding;
+		strbuf_reset(&charset_q);
+		strbuf_reset(&piecebuf);
 		rfc2047 = 1;
 
 		if (in != ep) {
-			sz = ep - in;
-			memcpy(out, in, sz);
-			out += sz;
-			in += sz;
+			strbuf_add(&outbuf, in, ep - in);
+			in = ep;
 		}
 		/* E.g.
 		 * ep : "=?iso-2022-jp?B?GyR...?= foo"
 		 * ep : "=?ISO-8859-1?Q?Foo=FCbar?= baz"
 		 */
 		ep += 2;
-		cp = strchr(ep, '?');
-		if (!cp)
-			return rfc2047; /* no munging */
-		for (sp = ep; sp < cp; sp++)
-			charset_q[sp - ep] = tolower(*sp);
-		charset_q[cp - ep] = 0;
+
+		if (ep - it->buf >= it->len || !(cp = strchr(ep, '?')))
+			goto decode_header_bq_out;
+
+		if (cp + 3 - it->buf > it->len)
+			goto decode_header_bq_out;
+		strbuf_add(&charset_q, ep, cp - ep);
+		strbuf_tolower(&charset_q);
+
 		encoding = cp[1];
 		if (!encoding || cp[2] != '?')
-			return rfc2047; /* no munging */
+			goto decode_header_bq_out;
 		ep = strstr(cp + 3, "?=");
 		if (!ep)
-			return rfc2047; /* no munging */
+			goto decode_header_bq_out;
+		strbuf_add(&piecebuf, cp + 3, ep - cp - 3);
 		switch (tolower(encoding)) {
 		default:
-			return rfc2047; /* no munging */
+			goto decode_header_bq_out;
 		case 'b':
-			sz = decode_b_segment(cp + 3, piecebuf, sizeof(piecebuf), ep);
+			dec = decode_b_segment(&piecebuf);
 			break;
 		case 'q':
-			sz = decode_q_segment(cp + 3, piecebuf, sizeof(piecebuf), ep, 1);
+			dec = decode_q_segment(&piecebuf, 1);
 			break;
 		}
-		if (sz < 0)
-			return rfc2047;
 		if (metainfo_charset)
-			convert_to_utf8(piecebuf, sizeof(piecebuf), charset_q);
+			convert_to_utf8(dec, charset_q.buf);
 
-		sz = strlen(piecebuf);
-		if (outbuf + sizeof(outbuf) <= out + sz)
-			return rfc2047; /* no munging */
-		strcpy(out, piecebuf);
-		out += sz;
+		strbuf_addbuf(&outbuf, dec);
+		strbuf_release(dec);
+		free(dec);
 		in = ep + 2;
 	}
-	strcpy(out, in);
-	strlcpy(it, outbuf, itsize);
+	strbuf_addstr(&outbuf, in);
+	strbuf_reset(it);
+	strbuf_addbuf(it, &outbuf);
+decode_header_bq_out:
+	strbuf_release(&outbuf);
+	strbuf_release(&charset_q);
+	strbuf_release(&piecebuf);
 	return rfc2047;
 }
 
-static void decode_header(char *it, unsigned itsize)
+static void decode_header(struct strbuf *it)
 {
-
-	if (decode_header_bq(it, itsize))
+	if (decode_header_bq(it))
 		return;
 	/* otherwise "it" is a straight copy of the input.
 	 * This can be binary guck but there is no charset specified.
 	 */
 	if (metainfo_charset)
-		convert_to_utf8(it, itsize, "");
+		convert_to_utf8(it, "");
 }
 
-static int decode_transfer_encoding(char *line, unsigned linesize, int inputlen)
+static void decode_transfer_encoding(struct strbuf *line)
 {
-	char *ep;
+	struct strbuf *ret;
+	int len;
 
 	switch (transfer_encoding) {
 	case TE_QP:
-		ep = line + inputlen;
-		return decode_q_segment(line, line, linesize, ep, 0);
+		ret = decode_q_segment(line, 0);
+		break;
 	case TE_BASE64:
-		ep = line + inputlen;
-		return decode_b_segment(line, line, linesize, ep);
+		ret = decode_b_segment(line);
+		break;
 	case TE_DONTCARE:
 	default:
-		return inputlen;
+		return;
 	}
+	strbuf_reset(line);
+	strbuf_addbuf(line, ret);
+	strbuf_release(ret);
+	free(ret);
 }
 
-static int handle_filter(char *line, unsigned linesize, int linelen);
+static int handle_filter(struct strbuf *line);
 
 static int find_boundary(void)
 {
-	while(fgets(line, sizeof(line), fin) != NULL) {
-		if (is_multipart_boundary(line))
+	while(!strbuf_getline(&line, fin, '\n')) {
+		if (is_multipart_boundary(&line))
 			return 1;
 	}
 	return 0;
@@ -655,11 +611,15 @@ static int find_boundary(void)
 
 static int handle_boundary(void)
 {
-	char newline[]="\n";
+	struct strbuf newline = STRBUF_INIT;
+
+	strbuf_addch(&newline, '\n');
 again:
-	if (!memcmp(line+content_top->boundary_len, "--", 2)) {
+	if (line.len >= content_top->boundary->len + 2 &&
+	    !memcmp(line.buf + content_top->boundary->len, "--", 2)) {
 		/* we hit an end boundary */
 		/* pop the current boundary off the stack */
+		strbuf_release(content_top->boundary);
 		free(content_top->boundary);
 
 		/* technically won't happen as is_multipart_boundary()
@@ -670,7 +630,8 @@ again:
 					"can't recover\n");
 			exit(1);
 		}
-		handle_filter(newline, sizeof(newline), strlen(newline));
+		handle_filter(&newline);
+		strbuf_release(&newline);
 
 		/* skip to the next boundary */
 		if (!find_boundary())
@@ -680,39 +641,44 @@ again:
 
 	/* set some defaults */
 	transfer_encoding = TE_DONTCARE;
-	charset[0] = 0;
+	strbuf_reset(&charset);
 	message_type = TYPE_TEXT;
 
 	/* slurp in this section's info */
-	while (read_one_header_line(line, sizeof(line), fin))
-		check_header(line, sizeof(line), p_hdr_data, 0);
+	while (read_one_header_line(&line, fin))
+		check_header(&line, p_hdr_data, 0);
 
+	strbuf_release(&newline);
 	/* eat the blank line after section info */
-	return (fgets(line, sizeof(line), fin) != NULL);
+	return (strbuf_getline(&line, fin, '\n') == 0);
 }
 
-static inline int patchbreak(const char *line)
+static inline int patchbreak(const struct strbuf *line)
 {
+	size_t i;
+
 	/* Beginning of a "diff -" header? */
-	if (!memcmp("diff -", line, 6))
+	if (!prefixcmp(line->buf, "diff -"))
 		return 1;
 
 	/* CVS "Index: " line? */
-	if (!memcmp("Index: ", line, 7))
+	if (!prefixcmp(line->buf, "Index: "))
 		return 1;
 
 	/*
 	 * "--- <filename>" starts patches without headers
 	 * "---<sp>*" is a manual separator
 	 */
-	if (!memcmp("---", line, 3)) {
-		line += 3;
+	if (line->len < 4)
+		return 0;
+
+	if (!prefixcmp(line->buf, "---")) {
 		/* space followed by a filename? */
-		if (line[0] == ' ' && !isspace(line[1]))
+		if (line->buf[3] == ' ' && !isspace(line->buf[4]))
 			return 1;
 		/* Just whitespace? */
-		for (;;) {
-			unsigned char c = *line++;
+		for (i = 3; i < line->len; i++) {
+			unsigned char c = line->buf[i];
 			if (c == '\n')
 				return 1;
 			if (!isspace(c))
@@ -723,32 +689,25 @@ static inline int patchbreak(const char *line)
 	return 0;
 }
 
-
-static int handle_commit_msg(char *line, unsigned linesize)
+static int handle_commit_msg(struct strbuf *line)
 {
 	static int still_looking = 1;
-	char *endline = line + linesize;
+	char *c;
 
 	if (!cmitmsg)
 		return 0;
 
 	if (still_looking) {
-		char *cp = line;
-		if (isspace(*line)) {
-			for (cp = line + 1; *cp; cp++) {
-				if (!isspace(*cp))
-					break;
-			}
-			if (!*cp)
-				return 0;
-		}
-		if ((still_looking = check_header(cp, endline - cp, s_hdr_data, 0)) != 0)
+		strbuf_ltrim(line);
+		if (!line->len)
+			return 0;
+		if ((still_looking = check_header(line, s_hdr_data, 0)) != 0)
 			return 0;
 	}
 
 	/* normalize the log message to UTF-8. */
 	if (metainfo_charset)
-		convert_to_utf8(line, endline - line, charset);
+		convert_to_utf8(line, charset.buf);
 
 	if (patchbreak(line)) {
 		fclose(cmitmsg);
@@ -756,18 +715,18 @@ static int handle_commit_msg(char *line, unsigned linesize)
 		return 1;
 	}
 
-	fputs(line, cmitmsg);
+	fputs(line->buf, cmitmsg);
 	return 0;
 }
 
-static int handle_patch(char *line, int len)
+static int handle_patch(const struct strbuf *line)
 {
-	fwrite(line, 1, len, patchfile);
+	fwrite(line->buf, 1, line->len, patchfile);
 	patch_lines++;
 	return 0;
 }
 
-static int handle_filter(char *line, unsigned linesize, int linelen)
+static int handle_filter(struct strbuf *line)
 {
 	static int filter = 0;
 
@@ -776,11 +735,11 @@ static int handle_filter(char *line, unsigned linesize, int linelen)
 	 */
 	switch (filter) {
 	case 0:
-		if (!handle_commit_msg(line, linesize))
+		if (!handle_commit_msg(line))
 			break;
 		filter++;
 	case 1:
-		if (!handle_patch(line, linelen))
+		if (!handle_patch(line))
 			break;
 		filter++;
 	default:
@@ -793,101 +752,105 @@ static int handle_filter(char *line, unsigned linesize, int linelen)
 static void handle_body(void)
 {
 	int rc = 0;
-	static char newline[2000];
-	static char *np = newline;
-	int len = strlen(line);
+	int len = 0;
+	struct strbuf prev = STRBUF_INIT;
 
 	/* Skip up to the first boundary */
 	if (content_top->boundary) {
 		if (!find_boundary())
-			return;
+			goto handle_body_out;
 	}
 
 	do {
+		strbuf_setlen(&line, line.len + len);
+
 		/* process any boundary lines */
-		if (content_top->boundary && is_multipart_boundary(line)) {
+		if (content_top->boundary && is_multipart_boundary(&line)) {
 			/* flush any leftover */
-			if (np != newline)
-				handle_filter(newline, sizeof(newline),
-					      np - newline);
+			if (line.len)
+				handle_filter(&line);
+
 			if (!handle_boundary())
-				return;
-			len = strlen(line);
+				goto handle_body_out;
 		}
 
 		/* Unwrap transfer encoding */
-		len = decode_transfer_encoding(line, sizeof(line), len);
-		if (len < 0) {
-			error("Malformed input line");
-			return;
-		}
+		decode_transfer_encoding(&line);
 
 		switch (transfer_encoding) {
 		case TE_BASE64:
 		case TE_QP:
 		{
-			char *op = line;
+			struct strbuf **lines, **it, *sb;
+
+			/* Prepend any previous partial lines */
+			strbuf_insert(&line, 0, prev.buf, prev.len);
+			strbuf_reset(&prev);
 
 			/* binary data most likely doesn't have newlines */
 			if (message_type != TYPE_TEXT) {
-				rc = handle_filter(line, sizeof(line), len);
+				rc = handle_filter(&line);
 				break;
 			}
-
 			/*
 			 * This is a decoded line that may contain
 			 * multiple new lines.  Pass only one chunk
 			 * at a time to handle_filter()
 			 */
-			do {
-				while (op < line + len && *op != '\n')
-					*np++ = *op++;
-				*np = *op;
-				if (*np != 0) {
-					/* should be sitting on a new line */
-					*(++np) = 0;
-					op++;
-					rc = handle_filter(newline, sizeof(newline), np - newline);
-					np = newline;
-				}
-			} while (op < line + len);
+			lines = strbuf_split(&line, '\n');
+			strbuf_reset(&line);
+			for (it = lines; (sb = *it); it++) {
+				if (*(it + 1) == NULL) /* The last token */
+					if (sb->buf[sb->len - 1] != '\n') {
+						/* Partial line, save it for later. */
+						strbuf_addbuf(&prev, sb);
+						break;
+					}
+				rc = handle_filter(sb);
+			}
 			/*
-			 * The partial chunk is saved in newline and will be
+			 * The partial chunk is saved in "prev" and will be
 			 * appended by the next iteration of read_line_with_nul().
 			 */
+			strbuf_list_free(lines);
 			break;
 		}
 		default:
-			rc = handle_filter(line, sizeof(line), len);
+			rc = handle_filter(&line);
+			strbuf_reset(&line);
 		}
 		if (rc)
 			/* nothing left to filter */
 			break;
-	} while ((len = read_line_with_nul(line, sizeof(line), fin)));
+		if (strbuf_avail(&line) < 100)
+			strbuf_grow(&line, 100);
+	} while ((len = read_line_with_nul(line.buf, strbuf_avail(&line), fin)));
 
+handle_body_out:
+	strbuf_release(&prev);
 	return;
 }
 
-static void output_header_lines(FILE *fout, const char *hdr, char *data)
+static void output_header_lines(FILE *fout, const char *hdr, const struct strbuf *data)
 {
+	char *sp = data->buf;
 	while (1) {
-		char *ep = strchr(data, '\n');
+		char *ep = strchr(sp, '\n');
 		int len;
 		if (!ep)
-			len = strlen(data);
+			len = strlen(sp);
 		else
-			len = ep - data;
-		fprintf(fout, "%s: %.*s\n", hdr, len, data);
+			len = ep - sp;
+		fprintf(fout, "%s: %.*s\n", hdr, len, sp);
 		if (!ep)
 			break;
-		data = ep + 1;
+		sp = ep + 1;
 	}
 }
 
 static void handle_info(void)
 {
-	char *sub;
-	char *hdr;
+	struct strbuf *hdr;
 	int i;
 
 	for (i = 0; header[i]; i++) {
@@ -901,20 +864,18 @@ static void handle_info(void)
 			continue;
 
 		if (!memcmp(header[i], "Subject", 7)) {
-			if (keep_subject)
-				sub = hdr;
-			else {
-				sub = cleanup_subject(hdr);
-				cleanup_space(sub);
+			if (!keep_subject) {
+				cleanup_subject(hdr);
+				cleanup_space(hdr);
 			}
-			output_header_lines(fout, "Subject", sub);
+			output_header_lines(fout, "Subject", hdr);
 		} else if (!memcmp(header[i], "From", 4)) {
 			handle_from(hdr);
-			fprintf(fout, "Author: %s\n", name);
-			fprintf(fout, "Email: %s\n", email);
+			fprintf(fout, "Author: %s\n", name.buf);
+			fprintf(fout, "Email: %s\n", email.buf);
 		} else {
 			cleanup_space(hdr);
-			fprintf(fout, "%s: %s\n", header[i], hdr);
+			fprintf(fout, "%s: %s\n", header[i], hdr->buf);
 		}
 	}
 	fprintf(fout, "\n");
@@ -941,8 +902,8 @@ static int mailinfo(FILE *in, FILE *out, int ks, const char *encoding,
 		return -1;
 	}
 
-	p_hdr_data = xcalloc(MAX_HDR_PARSED, sizeof(char *));
-	s_hdr_data = xcalloc(MAX_HDR_PARSED, sizeof(char *));
+	p_hdr_data = xcalloc(MAX_HDR_PARSED, sizeof(*p_hdr_data));
+	s_hdr_data = xcalloc(MAX_HDR_PARSED, sizeof(*s_hdr_data));
 
 	do {
 		peek = fgetc(in);
@@ -950,8 +911,8 @@ static int mailinfo(FILE *in, FILE *out, int ks, const char *encoding,
 	ungetc(peek, in);
 
 	/* process the email header */
-	while (read_one_header_line(line, sizeof(line), fin))
-		check_header(line, sizeof(line), p_hdr_data, 1);
+	while (read_one_header_line(&line, fin))
+		check_header(&line, p_hdr_data, 1);
 
 	handle_body();
 	handle_info();
-- 
1.5.4.5

next prev parent reply	other threads:[~2008-07-10 23:44 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-07-10 21:41 [PATCH] git-mailinfo: Fix getting the subject from the body Lukas Sandström
     [not found] ` <7vod55o0tx.fsf@gitster.siamese.dyndns.org>
2008-07-10 22:37   ` Lukas Sandström
2008-07-10 23:25     ` Junio C Hamano
2008-07-10 23:41       ` [PATCH] Add some useful functions for strbuf manipulation Lukas Sandström
2008-07-10 23:43         ` Lukas Sandström [this message]
2008-07-12  6:10           ` Re:! [PATCH/RFC] git-mailinfo: use strbuf's instead of fixed buffers Junio C Hamano
2008-07-13 18:17             ` ! " Lukas Sandström
2008-07-13 18:28               ` [PATCH] Make some strbuf_*() struct strbuf arguments const Lukas Sandström
2008-07-13 18:29                 ` [PATCH] Add some useful functions for strbuf manipulation Lukas Sandström
2008-07-13 18:30                   ` [PATCH] git-mailinfo: use strbuf's instead of fixed buffers Lukas Sandström
2008-07-13 21:37                     ` Junio C Hamano
2008-07-12  9:36 ` [PATCH] git-mailinfo: Fix getting the subject from the body Junio C Hamano
2008-07-12 21:45   ` Lukas Sandström
2008-07-15  3:13   ` Don Zickus

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:2d1520f dfblob:254a97c )
 OR (
bs:"[PATCH/RFC] git-mailinfo: use strbuf's instead of fixed buffers" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=48769E91.60205@etek.chalmers.se \
    --to=lukass@etek.chalmers.se \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.