git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: David Barr <david.barr@cordelta.com>
To: Git Mailing List <git@vger.kernel.org>
Cc: Jonathan Nieder <jrnieder@gmail.com>,
	Ramkumar Ramachandra <artagnon@gmail.com>,
	Sverre Rabbelier <srabbelier@gmail.com>,
	Sam Vilain <sam@vilain.net>, Stephen Bash <bash@genarts.com>,
	Tomas Carnecky <tom@dbservice.com>,
	David Barr <david.barr@cordelta.com>
Subject: [PATCH 14/16] vcs-svn: let deltas use data from preimage
Date: Sat, 19 Mar 2011 18:20:52 +1100	[thread overview]
Message-ID: <1300519254-20201-15-git-send-email-david.barr@cordelta.com> (raw)
In-Reply-To: <1300519254-20201-1-git-send-email-david.barr@cordelta.com>

From: Jonathan Nieder <jrnieder@gmail.com>

The copyfrom_source instruction appends data from the preimage buffer
to the end of output.  Its arguments are a length and an offset
relative to the beginning of the source view.

With this change, the delta applier is able to reproduce all 5,636,613
blobs in the early history of the ASF repository.  Tested with

	mkfifo backflow
	svn-fe <svn-asf-public-r0:940166 3<backflow |
	git fast-import --cat-blob-fd=3 3>backflow

with svn-asf-public-r0:940166 produced by whatever version of
Subversion the dumps in /dump/ on svn.apache.org use (presumably
1.6.something).

Improved-by: Ramkumar Ramachandra <artagnon@gmail.com>
Improved-by: David Barr <david.barr@cordelta.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Acked-by: Ramkumar Ramachandra <artagnon@gmail.com>
Signed-off-by: David Barr <david.barr@cordelta.com>
---
 t/t9011-svn-da.sh |   35 +++++++++++++++++++++++++++++++++++
 vcs-svn/svndiff.c |   28 +++++++++++++++++++++++-----
 2 files changed, 58 insertions(+), 5 deletions(-)

diff --git a/t/t9011-svn-da.sh b/t/t9011-svn-da.sh
index d13115a..45e396f 100755
--- a/t/t9011-svn-da.sh
+++ b/t/t9011-svn-da.sh
@@ -212,4 +212,39 @@ test_expect_success 'catch copy that overflows' '
 	test_must_fail test-svn-fe -d preimage copytarget.overflow $len
 '
 
+test_expect_success 'copyfrom source' '
+	printf foo >expect &&
+	printf "SVNQ%b%b" "Q\003\003\002Q" "\003Q" | q_to_nul >copysource.all &&
+	test-svn-fe -d preimage copysource.all 11 >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'copy backwards' '
+	printf oof >expect &&
+	printf "SVNQ%b%b" "Q\003\003\006Q" "\001\002\001\001\001Q" |
+		q_to_nul >copysource.rev &&
+	test-svn-fe -d preimage copysource.rev 15 >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'offsets are relative to window' '
+	printf fo >expect &&
+	printf "SVNQ%b%b%b%b" "Q\003\001\002Q" "\001Q" \
+		"\002\001\001\002Q" "\001Q" |
+		q_to_nul >copysource.two &&
+	test-svn-fe -d preimage copysource.two 18 >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'example from notes/svndiff' '
+	printf aaaaccccdddddddd >expect &&
+	printf aaaabbbbcccc >source &&
+	printf "SVNQ%b%b%s" "Q\014\020\007\001" \
+		"\004Q\004\010\0201\0107\010" d |
+		q_to_nul >delta.example &&
+	len=$(wc -c <delta.example) &&
+	test-svn-fe -d source delta.example $len >actual &&
+	test_cmp expect actual
+'
+
 test_done
diff --git a/vcs-svn/svndiff.c b/vcs-svn/svndiff.c
index f4c5dae..b7d40c8 100644
--- a/vcs-svn/svndiff.c
+++ b/vcs-svn/svndiff.c
@@ -24,6 +24,7 @@
  * view_selector ::= copyfrom_source
  *   | copyfrom_target
  *   ;
+ * copyfrom_source ::= # binary 00 000000;
  * copyfrom_target ::= # binary 01 000000;
  * copyfrom_data ::= # binary 10 000000;
  * packed_view_selector ::= # view_selector OR-ed with 6 bit value;
@@ -34,6 +35,7 @@
  */
 
 #define INSN_MASK	0xc0
+#define INSN_COPYFROM_SOURCE	0x00
 #define INSN_COPYFROM_TARGET	0x40
 #define INSN_COPYFROM_DATA	0x80
 #define OPERAND_MASK	0x3f
@@ -43,12 +45,13 @@
 #define VLI_BITS_PER_DIGIT 7
 
 struct window {
+	struct sliding_view *in;
 	struct strbuf out;
 	struct strbuf instructions;
 	struct strbuf data;
 };
 
-#define WINDOW_INIT	{ STRBUF_INIT, STRBUF_INIT, STRBUF_INIT }
+#define WINDOW_INIT(w)	{ w, STRBUF_INIT, STRBUF_INIT, STRBUF_INIT }
 
 static void window_release(struct window *ctx)
 {
@@ -161,6 +164,19 @@ static int read_length(struct line_buffer *in, size_t *result, off_t *len)
 	return 0;
 }
 
+static int copyfrom_source(struct window *ctx, const char **instructions,
+			   size_t nbytes, const char *insns_end)
+{
+	size_t offset;
+	if (parse_int(instructions, &offset, insns_end))
+		return -1;
+	if (unsigned_add_overflows(offset, nbytes) ||
+	    offset + nbytes > ctx->in->width)
+		return error("invalid delta: copies source data outside view");
+	strbuf_add(&ctx->out, ctx->in->buf.buf + offset, nbytes);
+	return 0;
+}
+
 static int copyfrom_target(struct window *ctx, const char **instructions,
 			   size_t nbytes, const char *instructions_end)
 {
@@ -209,12 +225,14 @@ static int execute_one_instruction(struct window *ctx,
 	if (parse_first_operand(instructions, &nbytes, insns_end))
 		return -1;
 	switch (instruction & INSN_MASK) {
+	case INSN_COPYFROM_SOURCE:
+		return copyfrom_source(ctx, instructions, nbytes, insns_end);
 	case INSN_COPYFROM_TARGET:
 		return copyfrom_target(ctx, instructions, nbytes, insns_end);
 	case INSN_COPYFROM_DATA:
 		return copyfrom_data(ctx, data_pos, nbytes);
 	default:
-		return error("Unknown instruction %x", instruction);
+		return error("invalid delta: unrecognized instruction");
 	}
 }
 
@@ -238,9 +256,9 @@ static int apply_window_in_core(struct window *ctx)
 }
 
 static int apply_one_window(struct line_buffer *delta, off_t *delta_len,
-			    FILE *out)
+			    struct sliding_view *preimage, FILE *out)
 {
-	struct window ctx = WINDOW_INIT;
+	struct window ctx = WINDOW_INIT(preimage);
 	size_t out_len;
 	size_t instructions_len;
 	size_t data_len;
@@ -285,7 +303,7 @@ int svndiff0_apply(struct line_buffer *delta, off_t delta_len,
 		if (read_offset(delta, &pre_off, &delta_len) ||
 		    read_length(delta, &pre_len, &delta_len) ||
 		    move_window(preimage, pre_off, pre_len) ||
-		    apply_one_window(delta, &delta_len, postimage))
+		    apply_one_window(delta, &delta_len, preimage, postimage))
 			return -1;
 	}
 	return 0;
-- 
1.7.3.2.846.gf4b062

  parent reply	other threads:[~2011-03-19  7:22 UTC|newest]

Thread overview: 72+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-03-19  7:03 vcs-svn: purge obsolete data structures and code David Barr
2011-03-19  7:03 ` [PATCH 1/9] vcs-svn: pass paths through to fast-import David Barr
2011-03-19  7:50   ` Jonathan Nieder
2011-03-19  7:03 ` [PATCH 2/9] vcs-svn: avoid using ls command twice David Barr
2011-03-19  8:01   ` Jonathan Nieder
2011-03-19  7:03 ` [PATCH 3/9] vcs-svn: implement perfect hash for node-prop keys David Barr
2011-03-19  8:51   ` Jonathan Nieder
2011-03-21  1:26     ` [PATCH 1/3] " David Barr
2011-03-21  1:26       ` [PATCH 2/3] vcs-svn: implement perfect hash for top-level keys David Barr
2011-03-21  1:26       ` [PATCH 3/3] vcs-svn: use switch rather than cascading ifs David Barr
2011-03-21  1:38         ` [PATCHv2] " David Barr
2011-03-19  7:03 ` [PATCH 4/9] vcs-svn: implement perfect hash for top-level keys David Barr
2011-03-19  8:57   ` Jonathan Nieder
2011-03-19  7:03 ` [PATCH 5/9] vcs-svn: factor out usage of string_pool David Barr
2011-03-19  9:08   ` Jonathan Nieder
2011-03-19  7:03 ` [PATCH 6/9] vcs-svn: drop string_pool David Barr
2011-03-19  7:03 ` [PATCH 7/9] vcs-svn: drop trp.h David Barr
2011-03-19  7:03 ` [PATCH 8/9] vcs-svn: drop obj_pool.h David Barr
2011-03-19  7:03 ` [PATCH 9/9] vcs-svn: use strchr to find RFC822 delimiter David Barr
2011-03-19  9:10   ` Jonathan Nieder
2011-03-19  7:20 ` vcs-svn: integrate support for text deltas David Barr
2011-03-19  7:20   ` [PATCH 01/16] vcs-svn: improve support for reading large files David Barr
2011-03-19  7:20   ` [PATCH 02/16] vcs-svn: make buffer_skip_bytes return length read David Barr
2011-03-19  7:20   ` [PATCH 03/16] vcs-svn: make buffer_copy_bytes " David Barr
2011-03-19  7:20   ` [PATCH 04/16] vcs-svn: improve reporting of input errors David Barr
2011-03-19  7:20   ` [PATCH 05/16] vcs-svn: learn to maintain a sliding view of a file David Barr
2011-03-19  7:20   ` [PATCH 06/16] vcs-svn: skeleton of an svn delta parser David Barr
2011-03-28  3:30     ` Jonathan Nieder
2011-03-19  7:20   ` [PATCH 07/16] vcs-svn: parse svndiff0 window header David Barr
2011-03-19  7:20   ` [PATCH 08/16] vcs-svn: read the preimage when applying deltas David Barr
2011-03-19  7:20   ` [PATCH 09/16] vcs-svn: read inline data from deltas David Barr
2011-03-19  7:20   ` [PATCH 10/16] vcs-svn: read instructions " David Barr
2011-03-19  7:20   ` [PATCH 11/16] vcs-svn: implement copyfrom_data delta instruction David Barr
2011-03-19  7:20   ` [PATCH 12/16] vcs-svn: verify that deltas consume all inline data David Barr
2011-03-19  7:20   ` [PATCH 13/16] vcs-svn: let deltas use data from postimage David Barr
2011-03-19  7:20   ` David Barr [this message]
2011-03-19  7:20   ` [PATCH 15/16] vcs-svn: microcleanup in svndiff0 window-reading code David Barr
2011-03-19  7:20   ` [PATCH 16/16] vcs-svn: implement text-delta handling David Barr
2011-03-28  7:00   ` vcs-svn: integrate support for text deltas Jonathan Nieder
2011-03-28 11:56     ` David Barr
2011-03-21 23:49 ` [PATCHv2 00/11] vcs-svn: purge obsolete data structures and code David Barr
2011-03-21 23:49   ` [PATCH 01/11] vcs-svn: use strbuf for revision log David Barr
2011-03-21 23:49   ` [PATCH 02/11] vcs-svn: pass paths through to fast-import David Barr
2011-03-21 23:49   ` [PATCH 03/11] vcs-svn: avoid using ls command twice David Barr
2011-03-21 23:49   ` [PATCH 04/11] vcs-svn: implement perfect hash for node-prop keys David Barr
2011-03-21 23:49   ` [PATCH 05/11] vcs-svn: implement perfect hash for top-level keys David Barr
2011-03-21 23:49   ` [PATCH 06/11] vcs-svn: use switch rather than cascading ifs David Barr
2011-03-21 23:49   ` [PATCH 07/11] vcs-svn: factor out usage of string_pool David Barr
2011-03-21 23:49   ` [PATCH 08/11] vcs-svn: drop string_pool David Barr
2011-03-21 23:49   ` =?^[?q?=5BPATCH=2009/11=5D=20vcs-svn=3A=20drop=20trp=2Eh?= David Barr
2011-03-21 23:49   ` [PATCH 10/11] vcs-svn: drop obj_pool.h David Barr
2011-03-21 23:50   ` [PATCH 11/11] vcs-svn: use strchr to find RFC822 delimiter David Barr
2011-03-23  0:32   ` [PULL svn-fe] vcs-svn: simplifications, error handling improvements Jonathan Nieder
2011-03-23  5:46     ` Junio C Hamano
2011-03-23  6:03       ` Junio C Hamano
2011-03-26  6:42         ` Jonathan Nieder
2011-03-26  9:49           ` t0081-line-buffer.sh hangs (Re: [PULL svn-fe] vcs-svn: simplifications, error handling improvements) Jonathan Nieder
2011-03-23  7:11       ` [PULL svn-fe] vcs-svn: simplifications, error handling improvements David Barr
2011-03-24 12:43       ` [PATCH] fixup! vcs-svn: improve reporting of input errors David Barr
2011-03-25  1:12         ` Jonathan Nieder
2011-03-25  3:34         ` [PATCH svn-fe 0/4] vcs-svn: null bytes in properties Jonathan Nieder
2011-03-25  4:07           ` [PATCH 1/4] vcs-svn: make reading of properties binary-safe Jonathan Nieder
2011-03-28 15:34             ` tb
2011-03-28 19:41               ` Jonathan Nieder
2011-03-28 20:30                 ` Torsten Bögershausen
2011-03-28 20:44                   ` Jonathan Nieder
2011-03-25  4:09           ` [PATCH 2/4] vcs-svn: remove buffer_read_string Jonathan Nieder
2011-03-25  4:10           ` [PATCH 3/4] vcs-svn: avoid unnecessary copying of log message and author Jonathan Nieder
2011-03-25  4:11           ` [PATCH 4/4] vcs-svn: handle log message with embedded null bytes Jonathan Nieder
2011-03-26  6:46       ` [PULL svn-fe] vcs-svn: simplifications, error handling improvements Jonathan Nieder
2011-03-26 18:36         ` Junio C Hamano
2011-03-28  0:38           ` [PATCH svn-fe] vcs-svn: add missing cast to printf argument Jonathan Nieder

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1300519254-20201-15-git-send-email-david.barr@cordelta.com \
    --to=david.barr@cordelta.com \
    --cc=artagnon@gmail.com \
    --cc=bash@genarts.com \
    --cc=git@vger.kernel.org \
    --cc=jrnieder@gmail.com \
    --cc=sam@vilain.net \
    --cc=srabbelier@gmail.com \
    --cc=tom@dbservice.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).