git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Michael S. Tsirkin" <mst@redhat.com>
To: git@vger.kernel.org
Cc: sunshine@sunshineco.com, jrnieder@gmail.com, peff@peff.net,
	gitster@pobox.com
Subject: [PATCH v5 4/9] patch-id: make it stable against hunk reordering
Date: Thu, 24 Apr 2014 12:31:00 +0300	[thread overview]
Message-ID: <1398331809-11309-4-git-send-email-mst@redhat.com> (raw)
In-Reply-To: <1398331809-11309-1-git-send-email-mst@redhat.com>

Patch id changes if users
1. reorder file diffs that make up a patch
or
2. split a patch up to multiple diffs that touch the same path
(keeping hunks within a single diff ordered to make patch valid).

As the result is functionally equivalent, a different patch id is
surprising to many users.
In particular, reordering files using diff -O is helpful to make patches
more readable (e.g. API header diff before implementation diff).

Add an option to change patch-id behaviour making it stable against
these two kinds of patch change:
1. calculate SHA1 hash for each hunk separately and sum all hashes
(using a symmetrical sum) to get patch id
2. hash the file-level headers together with each hunk (not just the
first hunk)

We use a 20byte sum and not xor - since xor would give 0 output
for patches that have two identical diffs, which isn't all that
unlikely (e.g. append the same line in two places).

The new behaviour is enabled
- when patchid.stable is true
- when --stable flag is present

Using a new flag --unstable or setting patchid.stable to false force
the historical behaviour.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 builtin/patch-id.c | 89 ++++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 73 insertions(+), 16 deletions(-)

diff --git a/builtin/patch-id.c b/builtin/patch-id.c
index 3cfe02d..037cf2f 100644
--- a/builtin/patch-id.c
+++ b/builtin/patch-id.c
@@ -1,17 +1,14 @@
 #include "builtin.h"
 
-static void flush_current_id(int patchlen, unsigned char *id, git_SHA_CTX *c)
+static void flush_current_id(int patchlen, unsigned char *id, unsigned char *result)
 {
-	unsigned char result[20];
 	char name[50];
 
 	if (!patchlen)
 		return;
 
-	git_SHA1_Final(result, c);
 	memcpy(name, sha1_to_hex(id), 41);
 	printf("%s %s\n", sha1_to_hex(result), name);
-	git_SHA1_Init(c);
 }
 
 static int remove_space(char *line)
@@ -56,10 +53,31 @@ static int scan_hunk_header(const char *p, int *p_before, int *p_after)
 	return 1;
 }
 
-static int get_one_patchid(unsigned char *next_sha1, git_SHA_CTX *ctx, struct strbuf *line_buf)
+static void flush_one_hunk(unsigned char *result, git_SHA_CTX *ctx)
 {
-	int patchlen = 0, found_next = 0;
+	unsigned char hash[20];
+	unsigned short carry = 0;
+	int i;
+
+	git_SHA1_Final(hash, ctx);
+	git_SHA1_Init(ctx);
+	/* 20-byte sum, with carry */
+	for (i = 0; i < 20; ++i) {
+		carry += result[i] + hash[i];
+		result[i] = carry;
+		carry >>= 8;
+	}
+}
+
+static int get_one_patchid(unsigned char *next_sha1, unsigned char *result,
+			   struct strbuf *line_buf, int stable)
+{
+	int patchlen = 0, found_next = 0, hunks = 0;
 	int before = -1, after = -1;
+	git_SHA_CTX ctx, header_ctx;
+
+	git_SHA1_Init(&ctx);
+	hashclr(result);
 
 	while (strbuf_getwholeline(line_buf, stdin, '\n') != EOF) {
 		char *line = line_buf->buf;
@@ -98,7 +116,19 @@ static int get_one_patchid(unsigned char *next_sha1, git_SHA_CTX *ctx, struct st
 		if (before == 0 && after == 0) {
 			if (!memcmp(line, "@@ -", 4)) {
 				/* Parse next hunk, but ignore line numbers.  */
+				if (stable) {
+					/* Hash the file-level headers together with each hunk. */
+					if (hunks) {
+						flush_one_hunk(result, &ctx);
+						/* Prepend saved header ctx for next hunk.  */
+						memcpy(&ctx, &header_ctx, sizeof(ctx));
+					} else {
+						/* Save header ctx for next hunk.  */
+						memcpy(&header_ctx, &ctx, sizeof(ctx));
+					}
+				}
 				scan_hunk_header(line, &before, &after);
+				hunks++;
 				continue;
 			}
 
@@ -107,7 +137,10 @@ static int get_one_patchid(unsigned char *next_sha1, git_SHA_CTX *ctx, struct st
 				break;
 
 			/* Else we're parsing another header.  */
+			if (stable && hunks)
+				flush_one_hunk(result, &ctx);
 			before = after = -1;
+			hunks = 0;
 		}
 
 		/* If we get here, we're inside a hunk.  */
@@ -119,39 +152,63 @@ static int get_one_patchid(unsigned char *next_sha1, git_SHA_CTX *ctx, struct st
 		/* Compute the sha without whitespace */
 		len = remove_space(line);
 		patchlen += len;
-		git_SHA1_Update(ctx, line, len);
+		git_SHA1_Update(&ctx, line, len);
 	}
 
 	if (!found_next)
 		hashclr(next_sha1);
 
+	flush_one_hunk(result, &ctx);
+
 	return patchlen;
 }
 
-static void generate_id_list(void)
+static void generate_id_list(int stable)
 {
-	unsigned char sha1[20], n[20];
-	git_SHA_CTX ctx;
+	unsigned char sha1[20], n[20], result[20];
 	int patchlen;
 	struct strbuf line_buf = STRBUF_INIT;
 
-	git_SHA1_Init(&ctx);
 	hashclr(sha1);
 	while (!feof(stdin)) {
-		patchlen = get_one_patchid(n, &ctx, &line_buf);
-		flush_current_id(patchlen, sha1, &ctx);
+		patchlen = get_one_patchid(n, result, &line_buf, stable);
+		flush_current_id(patchlen, sha1, result);
 		hashcpy(sha1, n);
 	}
 	strbuf_release(&line_buf);
 }
 
-static const char patch_id_usage[] = "git patch-id < patch";
+static const char patch_id_usage[] = "git patch-id [--stable | --unstable] < patch";
+
+static int git_patch_id_config(const char *var, const char *value, void *cb)
+{
+	int *stable = cb;
+
+	if (!strcmp(var, "patchid.stable")) {
+		*stable = git_config_bool(var, value);
+		return 0;
+	}
+
+	return git_default_config(var, value, cb);
+}
 
 int cmd_patch_id(int argc, const char **argv, const char *prefix)
 {
-	if (argc != 1)
+	int stable = -1;
+
+	git_config(git_patch_id_config, &stable);
+
+	/* If nothing is set, default to unstable. */
+	if (stable < 0)
+		stable = 0;
+
+	if (argc == 2 && !strcmp(argv[1], "--stable"))
+		stable = 1;
+	else if (argc == 2 && !strcmp(argv[1], "--unstable"))
+		stable = 0;
+	else if (argc != 1)
 		usage(patch_id_usage);
 
-	generate_id_list();
+	generate_id_list(stable);
 	return 0;
 }
-- 
MST

  parent reply	other threads:[~2014-04-24  9:52 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-04-24  9:30 [PATCH v5 1/9] diff: add a config option to control orderfile Michael S. Tsirkin
2014-04-24  9:30 ` [PATCH v5 2/9] test: add test_write_lines helper Michael S. Tsirkin
2014-04-24 17:08   ` Jonathan Nieder
2014-04-24 18:31     ` Junio C Hamano
2014-04-24  9:30 ` [PATCH v5 3/9] tests: new test for orderfile options Michael S. Tsirkin
2014-04-24 17:11   ` Jonathan Nieder
2014-04-24 18:45   ` Junio C Hamano
2014-04-24 21:39     ` Michael S. Tsirkin
2014-04-24  9:31 ` Michael S. Tsirkin [this message]
2014-04-24 17:30   ` [PATCH v5 4/9] patch-id: make it stable against hunk reordering Jonathan Nieder
2014-04-24 19:12     ` Junio C Hamano
2014-04-24 21:32     ` Michael S. Tsirkin
2014-04-24  9:31 ` [PATCH v5 5/9] patch-id: document new behaviour Michael S. Tsirkin
2014-04-24 17:33   ` Jonathan Nieder
2014-04-24 21:26     ` Michael S. Tsirkin
2014-04-24 22:12       ` Junio C Hamano
2014-04-27 18:26         ` Michael S. Tsirkin
2014-04-24  9:31 ` [PATCH v5 6/9] patch-id-test: test stable and unstable behaviour Michael S. Tsirkin
2014-04-24  9:31 ` [PATCH v5 7/9] patch-id: change default to stable Michael S. Tsirkin
2014-04-24  9:31 ` [PATCH v5 8/9] t4204-patch-id.sh: default is now stable Michael S. Tsirkin
2014-04-24  9:31 ` [PATCH v5 9/9] Documentation/git-patch-id.txt: default is stable Michael S. Tsirkin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1398331809-11309-4-git-send-email-mst@redhat.com \
    --to=mst@redhat.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=jrnieder@gmail.com \
    --cc=peff@peff.net \
    --cc=sunshine@sunshineco.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).