* [RFH] Exploration of an alternative diff_delta() algorithm
@ 2006-04-09 14:31 Peter Eriksen
2006-04-09 17:14 ` Nicolas Pitre
0 siblings, 1 reply; 9+ messages in thread
From: Peter Eriksen @ 2006-04-09 14:31 UTC (permalink / raw)
To: git
Greetings Gitlings,
I've been trying to implement an alternative algorithm
for diff_delta(). I'm getting close to something that
works, but now I'm stuck! I think it has something to
do with pack-objects.c, but I'm not sure. Here's the
first test that fails:
*** t5500-fetch-pack.sh ***
* FAIL 1: 1st pull
git-fetch-pack -v .. B A > log.txt 2>&1
* FAIL 2: fsck
git-fsck-objects --full > fsck.txt 2>&1
* FAIL 3: new object count after 1st pull
test 33 = 0
* FAIL 4: minimal count
test 33 = 0
* FAIL 5: repack && prune-packed in client
(git-repack && git-prune-packed)2>>log.txt
* ok 5: 2nd pull
* ok 6: fsck
* FAIL 7: new object count after 2nd pull
test 192 = 198
* FAIL 8: minimal count
test 192 = 198
* FAIL 9: repack && prune-packed in client
(git-repack && git-prune-packed)2>>log.txt
* ok 9: 3rd pull
* ok 10: fsck
* FAIL 11: new object count after 3rd pull
test 3 = 228
* FAIL 12: minimal count
test 3 = 30
* failed 8 among 12 test(s)
I've been looking all around the current diff_delta(), and I
can't see, what I'm missing. Any ideas? The file is meant to
replace the current diff-delta.c.
Peter
----->8--diff-delta.c----->8----
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "delta.h"
#define BASE 257
#define PREFIX_SIZE 3
#define SIZE 10
#define HASH_TABLE_SIZE (1<<SIZE)
#define DELTA_SIZE (1024 * 1024)
unsigned int init_hash(unsigned char* data) {
return data[0]*BASE*BASE + data[1]*BASE + data[2];
}
unsigned int hash(unsigned char* data, unsigned int hash) {
return (hash - data[-1]*BASE*BASE)*BASE + data[2];
}
#define GR_PRIME 0x9e370001
#define HASH(v) ((v * GR_PRIME) >> (32 - SIZE))
struct entry {
char file;
char* offset;
};
void flush(struct entry* table) {
memset(table, 0, HASH_TABLE_SIZE * sizeof(struct entry));
}
int same_prefixes(char* data1, char* data2) {
return !memcmp(data1, data2, PREFIX_SIZE);
}
void encode_add(char* out, int* outpos, char* version_start, char* version_copy) {
unsigned int size = version_copy - version_start;
if (!size) return;
int pos = *outpos;
while(size > 127) {
out[pos++] = 127;
memcpy(out + pos, version_start, 127);
pos += 127;
version_start += 127;
size -= 127;
}
out[pos++] = size;
memcpy(out + pos, version_start, size);
pos += size;
*outpos = pos;
}
void encode_copy(char* out, int* outpos, int offset, int size) {
int pos = (*outpos) + 1;
int i = 0x80;
if (offset & 0xff) { out[pos++] = offset; i |= 0x01; }
offset >>= 8;
if (offset & 0xff) { out[pos++] = offset; i |= 0x02; }
offset >>= 8;
if (offset & 0xff) { out[pos++] = offset; i |= 0x04; }
offset >>= 8;
if (offset & 0xff) { out[pos++] = offset; i |= 0x08; }
if (size & 0xff) { out[pos++] = size; i |= 0x10; }
size >>= 8;
if (size & 0xff) { out[pos++] = size; i |= 0x20; }
out[*outpos] = i;
*outpos = pos;
}
void encode_size(char* out, int* outpos, unsigned long size) {
int pos = *outpos;
out[pos] = size;
size >>= 7;
while (size) {
out[pos++] |= 0x80;
out[pos] = size;
size >>= 7;
}
*outpos = ++pos;
}
void *diff_delta(void *from_buf, unsigned long from_size,
void *to_buf, unsigned long to_size,
unsigned long *delta_size,
unsigned long max_size) {
int index;
int l;
char* base = from_buf;
char* version = to_buf;
unsigned long base_size = from_size;
unsigned long version_size = to_size;
char* base_copy = base;
char* version_copy = version;
struct entry* table = calloc(HASH_TABLE_SIZE, sizeof(struct entry));
//int delta_alloc = DELTA_SIZE;
char* delta = malloc(DELTA_SIZE);
int deltapos = 0;
char* base_top = base + base_size;
char* version_top = version + version_size;
encode_size(delta, &deltapos, base_size);
encode_size(delta, &deltapos, version_size);
char* base_offset = base;
char* version_offset = version;
unsigned int base_hash = init_hash(base);
unsigned int version_hash = init_hash(version);
char* version_start = version;
while(base_offset + PREFIX_SIZE < base_top &&
version_offset + PREFIX_SIZE < version_top) {
// step2:
index = HASH(base_hash);
switch (table[index].file) {
case '\0': {
table[index].file = 'b';
table[index].offset = base_offset;
break;
}
case 'v': {
if (same_prefixes(base_offset, table[index].offset)) {
base_copy = base_offset;
version_copy = table[index].offset;
goto step3;
} else break;
}
case 'b': break;
default: printf("AAAAAARGH 2b\n");
}
index = HASH(version_hash);
switch (table[index].file) {
case '\0': {
table[index].file = 'v';
table[index].offset = version_offset;
break;
}
case 'b': {
if (same_prefixes(table[index].offset, version_offset)) {
base_copy = table[index].offset;
version_copy = version_offset;
goto step3;
} else break;
}
case 'v': break;
default: printf("AAAAAARGH 2v\n");
}
base_offset++;
version_offset++;
base_hash = hash(base_offset, base_hash);
version_hash = hash(version_offset, version_hash);
continue; // goto step2;
step3:
l = 0;
while(base_copy[l] == version_copy[l]) l++;
base_offset = base_copy + l;
version_offset = version_copy + l;
/*
// Make sure we don't run out of delta buffer when encoding.
if((delta_alloc - deltapos) <
(version_start - version_copy) + 1 + 8 + (PREFIX_SIZE + 1)) {
delta_alloc = delta_alloc * 3 / 2;
delta = (char*) realloc(delta, delta_alloc);
}
*/
if(max_size && deltapos > max_size) {
free(delta);
free(table);
return NULL;
}
// step4:
encode_add(delta, &deltapos, version_start, version_copy);
encode_copy(delta, &deltapos, base_copy - base, l);
// step5:
flush(table);
version_start = version_offset;
base_hash = init_hash(base_offset);
version_hash = init_hash(version_offset);
} // goto step2;
encode_add(delta, &deltapos, version_start, version + version_size);
*delta_size = deltapos;
free(table);
return delta;
}
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [RFH] Exploration of an alternative diff_delta() algorithm
2006-04-09 14:31 [RFH] Exploration of an alternative diff_delta() algorithm Peter Eriksen
@ 2006-04-09 17:14 ` Nicolas Pitre
2006-04-09 17:34 ` Peter Eriksen
2006-04-09 17:40 ` Nicolas Pitre
0 siblings, 2 replies; 9+ messages in thread
From: Nicolas Pitre @ 2006-04-09 17:14 UTC (permalink / raw)
To: Peter Eriksen; +Cc: git
On Sun, 9 Apr 2006, Peter Eriksen wrote:
> Greetings Gitlings,
>
> I've been trying to implement an alternative algorithm
> for diff_delta(). I'm getting close to something that
> works, but now I'm stuck! I think it has something to
> do with pack-objects.c, but I'm not sure. Here's the
> first test that fails:
>
> *** t5500-fetch-pack.sh ***
> * FAIL 1: 1st pull
> git-fetch-pack -v .. B A > log.txt 2>&1
> * FAIL 2: fsck
> git-fsck-objects --full > fsck.txt 2>&1
> * FAIL 3: new object count after 1st pull
> test 33 = 0
> * FAIL 4: minimal count
> test 33 = 0
> * FAIL 5: repack && prune-packed in client
> (git-repack && git-prune-packed)2>>log.txt
> * ok 5: 2nd pull
> * ok 6: fsck
> * FAIL 7: new object count after 2nd pull
> test 192 = 198
> * FAIL 8: minimal count
> test 192 = 198
> * FAIL 9: repack && prune-packed in client
> (git-repack && git-prune-packed)2>>log.txt
> * ok 9: 3rd pull
> * ok 10: fsck
> * FAIL 11: new object count after 3rd pull
> test 3 = 228
> * FAIL 12: minimal count
> test 3 = 30
> * failed 8 among 12 test(s)
>
> I've been looking all around the current diff_delta(), and I
> can't see, what I'm missing. Any ideas? The file is meant to
> replace the current diff-delta.c.
Nothing outside diff-delta.c and patch-delta.c is aware of the delta
data format. So if your version is meant to be a transparent
replacement then it should pass all tests. If it doesn't then it is
broken.
To help you play around you could try the test-delta utility (make
test-delta to build it).
So:
test-delta -d file1 file2 delta_file
test-delta -p file1 delta_file file3
cmp file2 file3
You should always have file3 identical to file2.
Nicolas
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [RFH] Exploration of an alternative diff_delta() algorithm
2006-04-09 17:14 ` Nicolas Pitre
@ 2006-04-09 17:34 ` Peter Eriksen
2006-04-09 17:45 ` Nicolas Pitre
2006-04-09 17:40 ` Nicolas Pitre
1 sibling, 1 reply; 9+ messages in thread
From: Peter Eriksen @ 2006-04-09 17:34 UTC (permalink / raw)
To: git
On Sun, Apr 09, 2006 at 01:14:31PM -0400, Nicolas Pitre wrote:
...
> Nothing outside diff-delta.c and patch-delta.c is aware of the delta
> data format. So if your version is meant to be a transparent
> replacement then it should pass all tests. If it doesn't then it is
> broken.
>
> To help you play around you could try the test-delta utility (make
> test-delta to build it).
>
> So:
>
> test-delta -d file1 file2 delta_file
> test-delta -p file1 delta_file file3
> cmp file2 file3
My tests of these kinds doesn't show any errors. Though, if file2 is
empty, test-delta writes: "file2: Invalid argument".
Peter
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [RFH] Exploration of an alternative diff_delta() algorithm
2006-04-09 17:14 ` Nicolas Pitre
2006-04-09 17:34 ` Peter Eriksen
@ 2006-04-09 17:40 ` Nicolas Pitre
2006-04-09 17:53 ` Peter Eriksen
1 sibling, 1 reply; 9+ messages in thread
From: Nicolas Pitre @ 2006-04-09 17:40 UTC (permalink / raw)
To: Peter Eriksen; +Cc: git
On Sun, 9 Apr 2006, Nicolas Pitre wrote:
> On Sun, 9 Apr 2006, Peter Eriksen wrote:
>
> > Greetings Gitlings,
> >
> > I've been trying to implement an alternative algorithm
> > for diff_delta(). I'm getting close to something that
> > works, but now I'm stuck!
>
> Nothing outside diff-delta.c and patch-delta.c is aware of the delta
> data format. So if your version is meant to be a transparent
> replacement then it should pass all tests. If it doesn't then it is
> broken.
>
> To help you play around you could try the test-delta utility (make
> test-delta to build it).
>
> So:
>
> test-delta -d file1 file2 delta_file
> test-delta -p file1 delta_file file3
> cmp file2 file3
>
> You should always have file3 identical to file2.
Out of curiosity I just tried your diff-delta version with test-delta
and it produced a segmentation fault on the first attempt.
It also has lots of compilation warnings.
Nicolas
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [RFH] Exploration of an alternative diff_delta() algorithm
2006-04-09 17:34 ` Peter Eriksen
@ 2006-04-09 17:45 ` Nicolas Pitre
2006-04-09 22:45 ` Peter Eriksen
0 siblings, 1 reply; 9+ messages in thread
From: Nicolas Pitre @ 2006-04-09 17:45 UTC (permalink / raw)
To: Peter Eriksen; +Cc: git
On Sun, 9 Apr 2006, Peter Eriksen wrote:
> On Sun, Apr 09, 2006 at 01:14:31PM -0400, Nicolas Pitre wrote:
> ...
> > Nothing outside diff-delta.c and patch-delta.c is aware of the delta
> > data format. So if your version is meant to be a transparent
> > replacement then it should pass all tests. If it doesn't then it is
> > broken.
> >
> > To help you play around you could try the test-delta utility (make
> > test-delta to build it).
> >
> > So:
> >
> > test-delta -d file1 file2 delta_file
> > test-delta -p file1 delta_file file3
> > cmp file2 file3
>
> My tests of these kinds doesn't show any errors.
Try this with the README file from the git source tree:
sed s/git/GIT/g < ./README > /tmp/README.mod
test-delta -d ./README /tmp/README.mod /tmp/README.delta
[BOOM!]
> Though, if file2 is empty, test-delta writes: "file2: Invalid
> argument".
We never delta against or towards empty files.
Nicolas
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [RFH] Exploration of an alternative diff_delta() algorithm
2006-04-09 17:40 ` Nicolas Pitre
@ 2006-04-09 17:53 ` Peter Eriksen
2006-04-09 18:08 ` Nicolas Pitre
0 siblings, 1 reply; 9+ messages in thread
From: Peter Eriksen @ 2006-04-09 17:53 UTC (permalink / raw)
To: git
On Sun, Apr 09, 2006 at 01:40:14PM -0400, Nicolas Pitre wrote:
...
> Out of curiosity I just tried your diff-delta version with test-delta
> and it produced a segmentation fault on the first attempt.
Yes, I get that too with your README example.
> It also has lots of compilation warnings.
Hm, I don't get any warnings. Would you mind pasting them, so I
can see what it's about?
At least now I have one segmentation fault to work on.
Thanks.
Peter
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [RFH] Exploration of an alternative diff_delta() algorithm
2006-04-09 17:53 ` Peter Eriksen
@ 2006-04-09 18:08 ` Nicolas Pitre
0 siblings, 0 replies; 9+ messages in thread
From: Nicolas Pitre @ 2006-04-09 18:08 UTC (permalink / raw)
To: Peter Eriksen; +Cc: git
On Sun, 9 Apr 2006, Peter Eriksen wrote:
> On Sun, Apr 09, 2006 at 01:40:14PM -0400, Nicolas Pitre wrote:
> ...
> > It also has lots of compilation warnings.
>
> Hm, I don't get any warnings. Would you mind pasting them, so I
> can see what it's about?
gcc -o diff-delta.o -c -g -O2 -Wall -DSHA1_HEADER='<openssl/sha.h>' diff-delta.c
diff-delta.c: In function 'diff_delta':
diff-delta.c:123: warning: pointer targets in passing argument 1 of 'init_hash' differ in signedness
diff-delta.c:124: warning: pointer targets in passing argument 1 of 'init_hash' differ in signedness
diff-delta.c:170: warning: pointer targets in passing argument 1 of 'hash' differ in signedness
diff-delta.c:171: warning: pointer targets in passing argument 1 of 'hash' differ in signedness
diff-delta.c:203: warning: pointer targets in passing argument 1 of 'init_hash' differ in signedness
diff-delta.c:204: warning: pointer targets in passing argument 1 of 'init_hash' differ in signedness
Also you should avoid declaring new variables after code in the same
scope, like you do with version_offset for example. This is a feature
that many C compilers don't support.
Nicolas
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [RFH] Exploration of an alternative diff_delta() algorithm
2006-04-09 17:45 ` Nicolas Pitre
@ 2006-04-09 22:45 ` Peter Eriksen
2006-04-10 3:29 ` Nicolas Pitre
0 siblings, 1 reply; 9+ messages in thread
From: Peter Eriksen @ 2006-04-09 22:45 UTC (permalink / raw)
To: git
On Sun, Apr 09, 2006 at 01:45:00PM -0400, Nicolas Pitre wrote:
...
> Try this with the README file from the git source tree:
>
> sed s/git/GIT/g < ./README > /tmp/README.mod
> test-delta -d ./README /tmp/README.mod /tmp/README.delta
> [BOOM!]
I found the bug. The code still has some limitations, but now
it passes the test suite. Thanks for your help, Nicolas.
Peter
----->8---diff-delta.c---->8-------
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "delta.h"
#define BASE 257
#define PREFIX_SIZE 3
#define SIZE 10
#define HASH_TABLE_SIZE (1<<SIZE)
#define DELTA_SIZE (1024 * 1024)
unsigned int init_hash(unsigned char* data) {
return data[0]*BASE*BASE + data[1]*BASE + data[2];
}
unsigned int hash(unsigned char* data, unsigned int hash) {
return (hash - data[-1]*BASE*BASE)*BASE + data[2];
}
#define GR_PRIME 0x9e370001
#define HASH(v) ((v * GR_PRIME) >> (32 - SIZE))
struct entry {
char file;
char* offset;
};
void flush(struct entry* table) {
memset(table, 0, HASH_TABLE_SIZE * sizeof(struct entry));
}
int same_prefixes(char* data1, char* data2) {
return !memcmp(data1, data2, PREFIX_SIZE);
}
void encode_add(char* out, int* outpos, char* version_start, char* version_copy) {
unsigned int size = version_copy - version_start;
if (!size) return;
int pos = *outpos;
while(size > 127) {
out[pos++] = 127;
memcpy(out + pos, version_start, 127);
pos += 127;
version_start += 127;
size -= 127;
}
out[pos++] = size;
memcpy(out + pos, version_start, size);
pos += size;
*outpos = pos;
}
void encode_copy(char* out, int* outpos, int offset, int size) {
int pos = (*outpos) + 1;
int i = 0x80;
if (offset & 0xff) { out[pos++] = offset; i |= 0x01; }
offset >>= 8;
if (offset & 0xff) { out[pos++] = offset; i |= 0x02; }
offset >>= 8;
if (offset & 0xff) { out[pos++] = offset; i |= 0x04; }
offset >>= 8;
if (offset & 0xff) { out[pos++] = offset; i |= 0x08; }
if (size & 0xff) { out[pos++] = size; i |= 0x10; }
size >>= 8;
if (size & 0xff) { out[pos++] = size; i |= 0x20; }
out[*outpos] = i;
*outpos = pos;
}
void encode_size(char* out, int* outpos, unsigned long size) {
int pos = *outpos;
out[pos] = size;
size >>= 7;
while (size) {
out[pos++] |= 0x80;
out[pos] = size;
size >>= 7;
}
*outpos = ++pos;
}
void *diff_delta(void *from_buf, unsigned long from_size,
void *to_buf, unsigned long to_size,
unsigned long *delta_size,
unsigned long max_size) {
unsigned int index;
unsigned int l;
unsigned char* base = from_buf;
unsigned char* version = to_buf;
unsigned long base_size = from_size;
unsigned long version_size = to_size;
unsigned char* base_copy = base;
unsigned char* version_copy = version;
struct entry* table = calloc(HASH_TABLE_SIZE, sizeof(struct entry));
//int delta_alloc = DELTA_SIZE;
unsigned char* delta = malloc(DELTA_SIZE);
unsigned int deltapos = 0;
unsigned char* base_top = base + base_size;
unsigned char* version_top = version + version_size;
encode_size(delta, &deltapos, base_size);
encode_size(delta, &deltapos, version_size);
unsigned char* base_offset = base;
unsigned char* version_offset = version;
unsigned int base_hash = init_hash(base);
unsigned int version_hash = init_hash(version);
unsigned char* version_start = version;
while(base_offset - base + PREFIX_SIZE < base_top - base &&
version_offset - version + PREFIX_SIZE < version_top - version) {
// step2:
index = HASH(base_hash);
switch (table[index].file) {
case '\0': {
table[index].file = 'b';
table[index].offset = base_offset;
break;
}
case 'v': {
if (same_prefixes(base_offset, table[index].offset)) {
base_copy = base_offset;
version_copy = table[index].offset;
goto step3;
} else break;
}
case 'b': break;
default: printf("AAAAAARGH 2b\n");
}
index = HASH(version_hash);
switch (table[index].file) {
case '\0': {
table[index].file = 'v';
table[index].offset = version_offset;
break;
}
case 'b': {
if (same_prefixes(table[index].offset, version_offset)) {
base_copy = table[index].offset;
version_copy = version_offset;
goto step3;
} else break;
}
case 'v': break;
default: printf("AAAAAARGH 2v\n");
}
base_offset++;
version_offset++;
base_hash = hash(base_offset, base_hash);
version_hash = hash(version_offset, version_hash);
continue; // goto step2;
step3:
l = 0;
while(base_copy[l] == version_copy[l] && base_copy + l < base_top && version_copy + l < version_top) l++;
base_offset = base_copy + l;
version_offset = version_copy + l;
/*
// Make sure we don't run out of delta buffer when encoding.
if((delta_alloc - deltapos) <
(version_start - version_copy) + 1 + 8 + (PREFIX_SIZE + 1)) {
delta_alloc = delta_alloc * 3 / 2;
delta = (char*) realloc(delta, delta_alloc);
}
*/
if(max_size && deltapos > max_size) {
free(delta);
free(table);
return NULL;
}
//fprintf(stdout, "add: pos %u, v_start %u, v_copy %u\n",
// deltapos, version_start - version, version_copy - version);
// step4:
encode_add(delta, &deltapos, version_start, version_copy);
//fprintf(stdout, "copy: pos %u, v_copy %u, l %u\n",
// deltapos, base_copy - base, l);
encode_copy(delta, &deltapos, base_copy - base, l);
// step5:
flush(table);
version_start = version_offset;
base_hash = init_hash(base_offset);
version_hash = init_hash(version_offset);
//fprintf(stdout, "3) pos %u, v_start %u, v %u, b %u\n",
// deltapos, version_start - version, version_offset - version, base_offset- base);
} // goto step2;
//fprintf(stdout, "pos %u, v_start %u, v_top %u\n",
// deltapos, version_start - version, version_size);
encode_add(delta, &deltapos, version_start, version + version_size);
*delta_size = deltapos;
free(table);
return delta;
}
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [RFH] Exploration of an alternative diff_delta() algorithm
2006-04-09 22:45 ` Peter Eriksen
@ 2006-04-10 3:29 ` Nicolas Pitre
0 siblings, 0 replies; 9+ messages in thread
From: Nicolas Pitre @ 2006-04-10 3:29 UTC (permalink / raw)
To: Peter Eriksen; +Cc: git
[-- Attachment #1: Type: TEXT/PLAIN, Size: 1001 bytes --]
On Mon, 10 Apr 2006, Peter Eriksen wrote:
> On Sun, Apr 09, 2006 at 01:45:00PM -0400, Nicolas Pitre wrote:
> ...
> > Try this with the README file from the git source tree:
> >
> > sed s/git/GIT/g < ./README > /tmp/README.mod
> > test-delta -d ./README /tmp/README.mod /tmp/README.delta
> > [BOOM!]
>
> I found the bug. The code still has some limitations, but now
> it passes the test suite. Thanks for your help, Nicolas.
OK here's some more meat for you:
Copy the same README file from the git source tree, then edit the copied
version so the "Blob Object" section and the "Tree Object" section are
swapped around like shown in the attached patch.
The best delta that can be achieved is 24 bytes.
With the current code the produced delta is 42 bytes.
With your code the resulting delta is 4978 bytes, about twice as large
as the attached patch.
One major limitation of your algorithm appears to not have a global view
of the base buffer before starting to find matches.
Nicolas
[-- Attachment #2: Type: TEXT/PLAIN, Size: 2372 bytes --]
--- f1 2006-04-09 13:31:26.000000000 -0400
+++ f2 2006-04-09 23:04:10.000000000 -0400
@@ -87,26 +87,6 @@
The object types in some more detail:
-Blob Object
-~~~~~~~~~~~
-A "blob" object is nothing but a binary blob of data, and doesn't
-refer to anything else. There is no signature or any other
-verification of the data, so while the object is consistent (it 'is'
-indexed by its sha1 hash, so the data itself is certainly correct), it
-has absolutely no other attributes. No name associations, no
-permissions. It is purely a blob of data (i.e. normally "file
-contents").
-
-In particular, since the blob is entirely defined by its data, if two
-files in a directory tree (or in multiple different versions of the
-repository) have the same contents, they will share the same blob
-object. The object is totally independent of its location in the
-directory tree, and renaming a file does not change the object that
-file is associated with in any way.
-
-A blob is typically created when gitlink:git-update-index[1]
-is run, and its data can be accessed by gitlink:git-cat-file[1].
-
Tree Object
~~~~~~~~~~~
The next hierarchical object type is the "tree" object. A tree object
@@ -147,6 +127,26 @@
its data can be accessed by gitlink:git-ls-tree[1].
Two trees can be compared with gitlink:git-diff-tree[1].
+Blob Object
+~~~~~~~~~~~
+A "blob" object is nothing but a binary blob of data, and doesn't
+refer to anything else. There is no signature or any other
+verification of the data, so while the object is consistent (it 'is'
+indexed by its sha1 hash, so the data itself is certainly correct), it
+has absolutely no other attributes. No name associations, no
+permissions. It is purely a blob of data (i.e. normally "file
+contents").
+
+In particular, since the blob is entirely defined by its data, if two
+files in a directory tree (or in multiple different versions of the
+repository) have the same contents, they will share the same blob
+object. The object is totally independent of its location in the
+directory tree, and renaming a file does not change the object that
+file is associated with in any way.
+
+A blob is typically created when gitlink:git-update-index[1]
+is run, and its data can be accessed by gitlink:git-cat-file[1].
+
Commit Object
~~~~~~~~~~~~~
The "commit" object is an object that introduces the notion of
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2006-04-10 3:29 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-04-09 14:31 [RFH] Exploration of an alternative diff_delta() algorithm Peter Eriksen
2006-04-09 17:14 ` Nicolas Pitre
2006-04-09 17:34 ` Peter Eriksen
2006-04-09 17:45 ` Nicolas Pitre
2006-04-09 22:45 ` Peter Eriksen
2006-04-10 3:29 ` Nicolas Pitre
2006-04-09 17:40 ` Nicolas Pitre
2006-04-09 17:53 ` Peter Eriksen
2006-04-09 18:08 ` Nicolas Pitre
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).