git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC/PATCH 3/7] replace_object: add mechanism to replace objects found in "refs/replace/"
@ 2009-01-12 17:44 Christian Couder
  2009-01-13  1:31 ` Junio C Hamano
  0 siblings, 1 reply; 4+ messages in thread
From: Christian Couder @ 2009-01-12 17:44 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

The code implementing this mechanism has been copied from the commit
graft code.

This mechanism is used in "read_sha1_file". sha1 passed to this
function that match a ref name in "refs/replace/" are replaced by
the sha1 that has been read in the ref.

We "die" if the replacement recursion depth is too high or if we
can't read the replacement object.

Signed-off-by: Christian Couder <chriscool@tuxfamily.org>
---
 Makefile         |    1 +
 commit.h         |    2 +
 replace_object.c |  116 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 sha1_file.c      |   14 +++++-
 4 files changed, 130 insertions(+), 3 deletions(-)
 create mode 100644 replace_object.c

diff --git a/Makefile b/Makefile
index dee97c1..7cf5d9a 100644
--- a/Makefile
+++ b/Makefile
@@ -471,6 +471,7 @@ LIB_OBJS += read-cache.o
 LIB_OBJS += reflog-walk.o
 LIB_OBJS += refs.o
 LIB_OBJS += remote.o
+LIB_OBJS += replace_object.o
 LIB_OBJS += rerere.o
 LIB_OBJS += revision.o
 LIB_OBJS += run-command.o
diff --git a/commit.h b/commit.h
index 3a7b06a..37aa763 100644
--- a/commit.h
+++ b/commit.h
@@ -122,6 +122,8 @@ struct commit_graft *read_graft_line(char *buf, int len);
 int register_commit_graft(struct commit_graft *, int);
 struct commit_graft *lookup_commit_graft(const unsigned char *sha1);
 
+const unsigned char *lookup_replace_object(const unsigned char *sha1);
+
 extern struct commit_list *get_merge_bases(struct commit *rev1, struct commit *rev2, int cleanup);
 extern struct commit_list *get_merge_bases_many(struct commit *one, int n, struct commit **twos, int cleanup);
 extern struct commit_list *get_octopus_merge_bases(struct commit_list *in);
diff --git a/replace_object.c b/replace_object.c
new file mode 100644
index 0000000..25b3ef3
--- /dev/null
+++ b/replace_object.c
@@ -0,0 +1,116 @@
+#include "cache.h"
+#include "refs.h"
+
+static struct replace_object {
+	unsigned char sha1[2][20];
+} **replace_object;
+
+static int replace_object_alloc, replace_object_nr;
+
+static int replace_object_pos(const unsigned char *sha1)
+{
+	int lo, hi;
+	lo = 0;
+	hi = replace_object_nr;
+	while (lo < hi) {
+		int mi = (lo + hi) / 2;
+		struct replace_object *rep = replace_object[mi];
+		int cmp = hashcmp(sha1, rep->sha1[0]);
+		if (!cmp)
+			return mi;
+		if (cmp < 0)
+			hi = mi;
+		else
+			lo = mi + 1;
+	}
+	return -lo - 1;
+}
+
+static int register_replace_object(struct replace_object *replace,
+				   int ignore_dups)
+{
+	int pos = replace_object_pos(replace->sha1[0]);
+
+	if (0 <= pos) {
+		if (ignore_dups)
+			free(replace);
+		else {
+			free(replace_object[pos]);
+			replace_object[pos] = replace;
+		}
+		return 1;
+	}
+	pos = -pos - 1;
+	if (replace_object_alloc <= ++replace_object_nr) {
+		replace_object_alloc = alloc_nr(replace_object_alloc);
+		replace_object = xrealloc(replace_object,
+					  sizeof(*replace_object) *
+					  replace_object_alloc);
+	}
+	if (pos < replace_object_nr)
+		memmove(replace_object + pos + 1,
+			replace_object + pos,
+			(replace_object_nr - pos - 1) *
+			sizeof(*replace_object));
+	replace_object[pos] = replace;
+	return 0;
+}
+
+static int register_replace_ref(const char *refname,
+				const unsigned char *sha1,
+				int flag, void *cb_data)
+{
+	/* Get sha1 from refname */
+	const char *slash = strrchr(refname, '/');
+	const char *hash = slash ? slash + 1 : refname;
+	struct replace_object * repl_obj = xmalloc(sizeof(*repl_obj));
+
+	if (strlen(hash) != 40 || get_sha1_hex(hash, repl_obj->sha1[0])) {
+		free(repl_obj);
+		warning("bad replace ref name: %s", refname);
+	}
+
+	/* Copy sha1 from the read ref */
+	hashcpy(repl_obj->sha1[1], sha1);
+
+	/* Register new object */
+	if (register_replace_object(repl_obj, 1))
+		warning("duplicate replace ref: %s", refname);
+
+	return 0;
+}
+
+static void prepare_replace_object(void)
+{
+	static int replace_object_prepared;
+
+	if (replace_object_prepared)
+		return;
+
+	for_each_replace_ref(register_replace_ref, NULL);
+	replace_object_prepared = 1;
+}
+
+/* We allow "recursive" replacement. Only within reason, though */
+#define MAXREPLACEDEPTH 5
+
+const unsigned char *lookup_replace_object(const unsigned char *sha1)
+{
+	int pos, depth = MAXREPLACEDEPTH;
+	const unsigned char *cur = sha1;
+
+	prepare_replace_object();
+
+	/* Try to recursively replace the object */
+	do {
+		if (--depth < 0)
+			die("replace depth too high for object %s",
+			    sha1_to_hex(sha1));
+
+		pos = replace_object_pos(cur);
+		if (0 <= pos)
+			cur = replace_object[pos]->sha1[1];
+	} while (0 <= pos);
+
+	return cur;
+}
diff --git a/sha1_file.c b/sha1_file.c
index f08493f..4f2fd10 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -2163,10 +2163,18 @@ static void *read_object(const unsigned char *sha1, enum object_type *type,
 void *read_sha1_file(const unsigned char *sha1, enum object_type *type,
 		     unsigned long *size)
 {
-	void *data = read_object(sha1, type, size);
+	const unsigned char *repl = lookup_replace_object(sha1);
+	void *data = read_object(repl, type, size);
+
+	/* die if we replaced an object with one that does not exist */
+	if (!data && repl != sha1)
+		die("replacement %s not found for %s",
+		    sha1_to_hex(repl), sha1_to_hex(sha1));
+
 	/* legacy behavior is to die on corrupted objects */
-	if (!data && (has_loose_object(sha1) || has_packed_and_bad(sha1)))
-		die("object %s is corrupted", sha1_to_hex(sha1));
+	if (!data && (has_loose_object(repl) || has_packed_and_bad(repl)))
+		die("object %s is corrupted", sha1_to_hex(repl));
+
 	return data;
 }
 
-- 
1.6.1.83.g16e5

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [RFC/PATCH 3/7] replace_object: add mechanism to replace objects found in "refs/replace/"
  2009-01-12 17:44 [RFC/PATCH 3/7] replace_object: add mechanism to replace objects found in "refs/replace/" Christian Couder
@ 2009-01-13  1:31 ` Junio C Hamano
  2009-01-15  9:44   ` Christian Couder
  0 siblings, 1 reply; 4+ messages in thread
From: Junio C Hamano @ 2009-01-13  1:31 UTC (permalink / raw)
  To: Christian Couder; +Cc: git

Christian Couder <chriscool@tuxfamily.org> writes:

> diff --git a/replace_object.c b/replace_object.c
> new file mode 100644
> index 0000000..25b3ef3
> --- /dev/null
> +++ b/replace_object.c
> @@ -0,0 +1,116 @@
> +#include "cache.h"
> +#include "refs.h"
> +
> +static struct replace_object {
> +	unsigned char sha1[2][20];
> +} **replace_object;
> +
> +static int replace_object_alloc, replace_object_nr;
> +
> +static int replace_object_pos(const unsigned char *sha1)
> +{
> +	int lo, hi;
> +	lo = 0;
> +	hi = replace_object_nr;
> +	while (lo < hi) {
> +		int mi = (lo + hi) / 2;
> +		struct replace_object *rep = replace_object[mi];
> +		int cmp = hashcmp(sha1, rep->sha1[0]);
> +		if (!cmp)
> +			return mi;
> +		if (cmp < 0)
> +			hi = mi;
> +		else
> +			lo = mi + 1;
> +	}
> +	return -lo - 1;
> +}

Hmm, this is a tangent of this topic, but I wonder if we can do something
more generic to factor out many binary search like this we have throughout
the code.  Also I wonder if they can be made more efficient by taking
advantage of the fact that our hash is expected to produce a good uniform
distribution, similar to the way patch-ids.c::patch_pos() does this.

I guess the performance should not matter much for this table, as we
expect there won't be massive object replacements.

Also I recall Dscho muttered something about hashmap I didn't quite
understand.

> +static int register_replace_ref(const char *refname,
> +				const unsigned char *sha1,
> +				int flag, void *cb_data)
> +{
> +	/* Get sha1 from refname */
> +	const char *slash = strrchr(refname, '/');
> +	const char *hash = slash ? slash + 1 : refname;
> +	struct replace_object * repl_obj = xmalloc(sizeof(*repl_obj));

	struct replace_object *repl_obj = ...

> +	if (strlen(hash) != 40 || get_sha1_hex(hash, repl_obj->sha1[0])) {
> +		free(repl_obj);
> +		warning("bad replace ref name: %s", refname);
> +	}
> +
> +	/* Copy sha1 from the read ref */
> +	hashcpy(repl_obj->sha1[1], sha1);

Upon an error, you free and warn and then still copy into it?

> +	/* Register new object */
> +	if (register_replace_object(repl_obj, 1))
> +		warning("duplicate replace ref: %s", refname);

I'd say this is a grave error and should be reported as a repository
corruption.

> +static void prepare_replace_object(void)
> +{
> +	static int replace_object_prepared;
> +
> +	if (replace_object_prepared)
> +		return;
> +
> +	for_each_replace_ref(register_replace_ref, NULL);
> +	replace_object_prepared = 1;
> +}
> +
> +/* We allow "recursive" replacement. Only within reason, though */
> +#define MAXREPLACEDEPTH 5
> +
> +const unsigned char *lookup_replace_object(const unsigned char *sha1)
> +{
> +	int pos, depth = MAXREPLACEDEPTH;
> +	const unsigned char *cur = sha1;
> +
> +	prepare_replace_object();
> +
> +	/* Try to recursively replace the object */
> +	do {
> +		if (--depth < 0)
> +			die("replace depth too high for object %s",
> +			    sha1_to_hex(sha1));
> +
> +		pos = replace_object_pos(cur);
> +		if (0 <= pos)
> +			cur = replace_object[pos]->sha1[1];
> +	} while (0 <= pos);
> +
> +	return cur;
> +}

Since your paradigm is prepare replacement once at the beginning, never
allowing to update it, I think you can update the table while you look it
up.  E.g. if A->B and B->C exists, and A is asked for, you find A->B (to
tentatively make cur to point at B) and then you find B->C, and before
returning you can rewrite the first mapping to A->C.  Later look-up won't
need to dereference the table twice that way.

This assumes that there will be small number of replacements, but the same
object can be asked for more than once during the process.

> diff --git a/sha1_file.c b/sha1_file.c
> index f08493f..4f2fd10 100644
> --- a/sha1_file.c
> +++ b/sha1_file.c
> @@ -2163,10 +2163,18 @@ static void *read_object(const unsigned char *sha1, enum object_type *type,
>  void *read_sha1_file(const unsigned char *sha1, enum object_type *type,
>  		     unsigned long *size)
>  {
> -	void *data = read_object(sha1, type, size);
> +	const unsigned char *repl = lookup_replace_object(sha1);
> +	void *data = read_object(repl, type, size);
> +
> +	/* die if we replaced an object with one that does not exist */
> +	if (!data && repl != sha1)
> +		die("replacement %s not found for %s",
> +		    sha1_to_hex(repl), sha1_to_hex(sha1));
> +
>  	/* legacy behavior is to die on corrupted objects */
> -	if (!data && (has_loose_object(sha1) || has_packed_and_bad(sha1)))
> -		die("object %s is corrupted", sha1_to_hex(sha1));
> +	if (!data && (has_loose_object(repl) || has_packed_and_bad(repl)))
> +		die("object %s is corrupted", sha1_to_hex(repl));
> +
>  	return data;
>  }

Later we'd need a global switch to forbid the replacement for connectivity
walkers, but other than that I think this is sane.

I also looked at 1/ and 2/ which looked Ok.

Thanks.

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [RFC/PATCH 3/7] replace_object: add mechanism to replace objects found in "refs/replace/"
  2009-01-13  1:31 ` Junio C Hamano
@ 2009-01-15  9:44   ` Christian Couder
  2009-01-15 17:40     ` Christian Couder
  0 siblings, 1 reply; 4+ messages in thread
From: Christian Couder @ 2009-01-15  9:44 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

Le mardi 13 janvier 2009, Junio C Hamano a écrit :
> Christian Couder <chriscool@tuxfamily.org> writes:
> > diff --git a/replace_object.c b/replace_object.c
> > new file mode 100644
> > index 0000000..25b3ef3
> > --- /dev/null
> > +++ b/replace_object.c
> > @@ -0,0 +1,116 @@
> > +#include "cache.h"
> > +#include "refs.h"
> > +
> > +static struct replace_object {
> > +	unsigned char sha1[2][20];
> > +} **replace_object;
> > +
> > +static int replace_object_alloc, replace_object_nr;
> > +
> > +static int replace_object_pos(const unsigned char *sha1)
> > +{
> > +	int lo, hi;
> > +	lo = 0;
> > +	hi = replace_object_nr;
> > +	while (lo < hi) {
> > +		int mi = (lo + hi) / 2;
> > +		struct replace_object *rep = replace_object[mi];
> > +		int cmp = hashcmp(sha1, rep->sha1[0]);
> > +		if (!cmp)
> > +			return mi;
> > +		if (cmp < 0)
> > +			hi = mi;
> > +		else
> > +			lo = mi + 1;
> > +	}
> > +	return -lo - 1;
> > +}
>
> Hmm, this is a tangent of this topic, but I wonder if we can do something
> more generic to factor out many binary search like this we have
> throughout the code.  Also I wonder if they can be made more efficient by
> taking advantage of the fact that our hash is expected to produce a good
> uniform distribution, similar to the way patch-ids.c::patch_pos() does
> this.
>
> I guess the performance should not matter much for this table, as we
> expect there won't be massive object replacements.
>
> Also I recall Dscho muttered something about hashmap I didn't quite
> understand.

Yeah, maybe it's possible to get faster code and to refactor the 
many binary search we have, but I will leave that for latter or for other 
people interested in these topics, if you let me.

> > +static int register_replace_ref(const char *refname,
> > +				const unsigned char *sha1,
> > +				int flag, void *cb_data)
> > +{
> > +	/* Get sha1 from refname */
> > +	const char *slash = strrchr(refname, '/');
> > +	const char *hash = slash ? slash + 1 : refname;
> > +	struct replace_object * repl_obj = xmalloc(sizeof(*repl_obj));
>
> 	struct replace_object *repl_obj = ...

Ok.

> > +	if (strlen(hash) != 40 || get_sha1_hex(hash, repl_obj->sha1[0])) {
> > +		free(repl_obj);
> > +		warning("bad replace ref name: %s", refname);
> > +	}
> > +
> > +	/* Copy sha1 from the read ref */
> > +	hashcpy(repl_obj->sha1[1], sha1);
>
> Upon an error, you free and warn and then still copy into it?

Ooops, I forgot a "return 0;" statement after the warning.

> > +	/* Register new object */
> > +	if (register_replace_object(repl_obj, 1))
> > +		warning("duplicate replace ref: %s", refname);
>
> I'd say this is a grave error and should be reported as a repository
> corruption.

If we let people have a set of replace refs in "refs/replace/" and another 
one in "refs/replace/bisect/", and the latter one is used only when 
bisecting, then it may happen that the same commit has one ref 
in "refs/replace/" and another one in "refs/replace/bisect/". In this case 
it should probably be considered as something we should not even warn on, 
and the replace ref in "refs/replace/bisect/" should be used when 
bisecting.

But, as we don't have a mechanism to do that yet, you are right, we should 
probably "die" for now here.

> > +static void prepare_replace_object(void)
> > +{
> > +	static int replace_object_prepared;
> > +
> > +	if (replace_object_prepared)
> > +		return;
> > +
> > +	for_each_replace_ref(register_replace_ref, NULL);
> > +	replace_object_prepared = 1;
> > +}
> > +
> > +/* We allow "recursive" replacement. Only within reason, though */
> > +#define MAXREPLACEDEPTH 5
> > +
> > +const unsigned char *lookup_replace_object(const unsigned char *sha1)
> > +{
> > +	int pos, depth = MAXREPLACEDEPTH;
> > +	const unsigned char *cur = sha1;
> > +
> > +	prepare_replace_object();
> > +
> > +	/* Try to recursively replace the object */
> > +	do {
> > +		if (--depth < 0)
> > +			die("replace depth too high for object %s",
> > +			    sha1_to_hex(sha1));
> > +
> > +		pos = replace_object_pos(cur);
> > +		if (0 <= pos)
> > +			cur = replace_object[pos]->sha1[1];
> > +	} while (0 <= pos);
> > +
> > +	return cur;
> > +}
>
> Since your paradigm is prepare replacement once at the beginning, never
> allowing to update it, I think you can update the table while you look it
> up.  E.g. if A->B and B->C exists, and A is asked for, you find A->B (to
> tentatively make cur to point at B) and then you find B->C, and before
> returning you can rewrite the first mapping to A->C.  Later look-up won't
> need to dereference the table twice that way.
>
> This assumes that there will be small number of replacements, but the
> same object can be asked for more than once during the process.

If we allow different sets of replace refs, for example A->B 
in "refs/replace/" and B->C in "refs/replace/bisect/", then we cannot 
rewrite as you suggest. We could add A->C in "refs/replace/bisect/", so 
that it overcomes A->B and B->C when we bisect, but we would not gain much.

So I prefer not to do that right now. Maybe later if we decide we really 
don't want to allow different sets of replace refs, we can do what you 
suggest. 

> > diff --git a/sha1_file.c b/sha1_file.c
> > index f08493f..4f2fd10 100644
> > --- a/sha1_file.c
> > +++ b/sha1_file.c
> > @@ -2163,10 +2163,18 @@ static void *read_object(const unsigned char
> > *sha1, enum object_type *type, void *read_sha1_file(const unsigned char
> > *sha1, enum object_type *type, unsigned long *size)
> >  {
> > -	void *data = read_object(sha1, type, size);
> > +	const unsigned char *repl = lookup_replace_object(sha1);
> > +	void *data = read_object(repl, type, size);
> > +
> > +	/* die if we replaced an object with one that does not exist */
> > +	if (!data && repl != sha1)
> > +		die("replacement %s not found for %s",
> > +		    sha1_to_hex(repl), sha1_to_hex(sha1));
> > +
> >  	/* legacy behavior is to die on corrupted objects */
> > -	if (!data && (has_loose_object(sha1) || has_packed_and_bad(sha1)))
> > -		die("object %s is corrupted", sha1_to_hex(sha1));
> > +	if (!data && (has_loose_object(repl) || has_packed_and_bad(repl)))
> > +		die("object %s is corrupted", sha1_to_hex(repl));
> > +
> >  	return data;
> >  }
>
> Later we'd need a global switch to forbid the replacement for
> connectivity walkers, 

Yeah, I am slowly working on it. My next series (hopefully in a few days) 
where the above errors are fixed will include it.

> but other than that I think this is sane. 
>
> I also looked at 1/ and 2/ which looked Ok.

Thanks,
Christian.

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [RFC/PATCH 3/7] replace_object: add mechanism to replace objects found in "refs/replace/"
  2009-01-15  9:44   ` Christian Couder
@ 2009-01-15 17:40     ` Christian Couder
  0 siblings, 0 replies; 4+ messages in thread
From: Christian Couder @ 2009-01-15 17:40 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

Le jeudi 15 janvier 2009, Christian Couder a écrit :
> Le mardi 13 janvier 2009, Junio C Hamano a écrit :
>
> > Since your paradigm is prepare replacement once at the beginning, never
> > allowing to update it, I think you can update the table while you look
> > it up.  E.g. if A->B and B->C exists, and A is asked for, you find A->B
> > (to tentatively make cur to point at B) and then you find B->C, and
> > before returning you can rewrite the first mapping to A->C.  Later
> > look-up won't need to dereference the table twice that way.
> >
> > This assumes that there will be small number of replacements, but the
> > same object can be asked for more than once during the process.
>
> If we allow different sets of replace refs, for example A->B
> in "refs/replace/" and B->C in "refs/replace/bisect/", then we cannot
> rewrite as you suggest. We could add A->C in "refs/replace/bisect/", so
> that it overcomes A->B and B->C when we bisect, but we would not gain
> much.

Sorry, I just realized I misunderstood what you said. I don't know why but I 
thought you talked about updating the refs. But in fact you are right it 
should be possible to update the "replace_object" table.

Regards,
Christian.

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2009-01-15 17:41 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-01-12 17:44 [RFC/PATCH 3/7] replace_object: add mechanism to replace objects found in "refs/replace/" Christian Couder
2009-01-13  1:31 ` Junio C Hamano
2009-01-15  9:44   ` Christian Couder
2009-01-15 17:40     ` Christian Couder

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).