From: "Michael S. Tsirkin" <mst@redhat.com>
To: git@vger.kernel.org
Cc: sunshine@sunshineco.com, jrnieder@gmail.com, peff@peff.net,
gitster@pobox.com
Subject: [PATCH v3 1/3] patch-id: make it stable against hunk reordering
Date: Sun, 30 Mar 2014 21:09:22 +0300 [thread overview]
Message-ID: <1396202583-2572-1-git-send-email-mst@redhat.com> (raw)
Patch id changes if users
1. reorder file diffs that make up a patch
or
2. split a patch up to multiple diffs that touch the same path
(keeping hunks within a single diff ordered to make patch valid).
As the result is functionally equivalent, a different patch id is
surprising to many users.
In particular, reordering files using diff -O is helpful to make patches
more readable (e.g. API header diff before implementation diff).
Change patch-id behaviour making it stable against these two kinds
of patch change:
1. calculate SHA1 hash for each hunk separately and sum all hashes
(using a symmetrical sum) to get patch id
2. hash the file-level headers together with each hunk (not just the
first hunk)
We use a 20byte sum and not xor - since xor would give 0 output
for patches that have two identical diffs, which isn't all that
unlikely (e.g. append the same line in two places).
Add a new flag --unstable to get the historical behaviour.
Add --stable which is a nop, for symmetry.
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
changes from v2:
several bugfixes
changes from v1:
hanges from v1: documented motivation for supporting
diff splitting (and not just file reordering).
No code changes.
builtin/patch-id.c | 72 ++++++++++++++++++++++++++++++++++++++++++------------
1 file changed, 56 insertions(+), 16 deletions(-)
diff --git a/builtin/patch-id.c b/builtin/patch-id.c
index 3cfe02d..7fd7007 100644
--- a/builtin/patch-id.c
+++ b/builtin/patch-id.c
@@ -1,17 +1,14 @@
#include "builtin.h"
-static void flush_current_id(int patchlen, unsigned char *id, git_SHA_CTX *c)
+static void flush_current_id(int patchlen, unsigned char *id, unsigned char *result)
{
- unsigned char result[20];
char name[50];
if (!patchlen)
return;
- git_SHA1_Final(result, c);
memcpy(name, sha1_to_hex(id), 41);
printf("%s %s\n", sha1_to_hex(result), name);
- git_SHA1_Init(c);
}
static int remove_space(char *line)
@@ -56,10 +53,31 @@ static int scan_hunk_header(const char *p, int *p_before, int *p_after)
return 1;
}
-static int get_one_patchid(unsigned char *next_sha1, git_SHA_CTX *ctx, struct strbuf *line_buf)
+static void flush_one_hunk(unsigned char *result, git_SHA_CTX *ctx)
{
- int patchlen = 0, found_next = 0;
+ unsigned char hash[20];
+ unsigned short carry = 0;
+ int i;
+
+ git_SHA1_Final(hash, ctx);
+ git_SHA1_Init(ctx);
+ /* 20-byte sum, with carry */
+ for (i = 0; i < 20; ++i) {
+ carry += result[i] + hash[i];
+ result[i] = carry;
+ carry >>= 8;
+ }
+}
+
+static int get_one_patchid(unsigned char *next_sha1, unsigned char *result,
+ struct strbuf *line_buf, int stable)
+{
+ int patchlen = 0, found_next = 0, hunks = 0;
int before = -1, after = -1;
+ git_SHA_CTX ctx, header_ctx;
+
+ git_SHA1_Init(&ctx);
+ hashclr(result);
while (strbuf_getwholeline(line_buf, stdin, '\n') != EOF) {
char *line = line_buf->buf;
@@ -98,7 +116,19 @@ static int get_one_patchid(unsigned char *next_sha1, git_SHA_CTX *ctx, struct st
if (before == 0 && after == 0) {
if (!memcmp(line, "@@ -", 4)) {
/* Parse next hunk, but ignore line numbers. */
+ if (stable) {
+ /* Hash the file-level headers together with each hunk. */
+ if (hunks) {
+ flush_one_hunk(result, &ctx);
+ /* Prepend saved header ctx for next hunk. */
+ memcpy(&ctx, &header_ctx, sizeof ctx);
+ } else {
+ /* Save header ctx for next hunk. */
+ memcpy(&header_ctx, &ctx, sizeof ctx);
+ }
+ }
scan_hunk_header(line, &before, &after);
+ hunks++;
continue;
}
@@ -107,7 +137,10 @@ static int get_one_patchid(unsigned char *next_sha1, git_SHA_CTX *ctx, struct st
break;
/* Else we're parsing another header. */
+ if (stable && hunks)
+ flush_one_hunk(result, &ctx);
before = after = -1;
+ hunks = 0;
}
/* If we get here, we're inside a hunk. */
@@ -119,39 +152,46 @@ static int get_one_patchid(unsigned char *next_sha1, git_SHA_CTX *ctx, struct st
/* Compute the sha without whitespace */
len = remove_space(line);
patchlen += len;
- git_SHA1_Update(ctx, line, len);
+ git_SHA1_Update(&ctx, line, len);
}
if (!found_next)
hashclr(next_sha1);
+ flush_one_hunk(result, &ctx);
+
return patchlen;
}
-static void generate_id_list(void)
+static void generate_id_list(int stable)
{
- unsigned char sha1[20], n[20];
- git_SHA_CTX ctx;
+ unsigned char sha1[20], n[20], result[20];
int patchlen;
struct strbuf line_buf = STRBUF_INIT;
- git_SHA1_Init(&ctx);
hashclr(sha1);
while (!feof(stdin)) {
- patchlen = get_one_patchid(n, &ctx, &line_buf);
- flush_current_id(patchlen, sha1, &ctx);
+ patchlen = get_one_patchid(n, result, &line_buf, stable);
+ flush_current_id(patchlen, sha1, result);
hashcpy(sha1, n);
}
strbuf_release(&line_buf);
}
-static const char patch_id_usage[] = "git patch-id < patch";
+static const char patch_id_usage[] = "git patch-id [--stable | --unstable] < patch";
int cmd_patch_id(int argc, const char **argv, const char *prefix)
{
- if (argc != 1)
+ int stable;
+ if (argc == 2 && !strcmp(argv[1], "--stable"))
+ stable = 1;
+ else if (argc == 2 && !strcmp(argv[1], "--unstable"))
+ stable = 0;
+ else if (argc == 1)
+ stable = 1;
+ else
usage(patch_id_usage);
- generate_id_list();
+ generate_id_list(stable);
return 0;
}
--
MST
next reply other threads:[~2014-03-30 18:09 UTC|newest]
Thread overview: 15+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-03-30 18:09 Michael S. Tsirkin [this message]
2014-03-30 18:09 ` [PATCH v3 2/3] patch-id: document new behaviour Michael S. Tsirkin
2014-03-31 19:08 ` Junio C Hamano
2014-03-31 19:26 ` Michael S. Tsirkin
2014-03-31 19:54 ` Junio C Hamano
2014-03-31 20:42 ` Michael S. Tsirkin
2014-04-02 18:18 ` Junio C Hamano
2014-04-02 19:02 ` Michael S. Tsirkin
2014-04-03 15:42 ` Junio C Hamano
2014-03-30 18:09 ` [PATCH v3 3/3] patch-id-test: test --stable and --unstable flags Michael S. Tsirkin
2014-03-31 19:29 ` Junio C Hamano
2014-03-31 17:59 ` [PATCH v3 1/3] patch-id: make it stable against hunk reordering Junio C Hamano
2014-03-31 19:04 ` Michael S. Tsirkin
2014-03-31 19:35 ` Junio C Hamano
2014-03-31 22:05 ` Junio C Hamano
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1396202583-2572-1-git-send-email-mst@redhat.com \
--to=mst@redhat.com \
--cc=git@vger.kernel.org \
--cc=gitster@pobox.com \
--cc=jrnieder@gmail.com \
--cc=peff@peff.net \
--cc=sunshine@sunshineco.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).