git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* git-rev-list  in local commit order
@ 2005-05-14 21:44 Sean
  2005-05-15 19:48 ` Thomas Gleixner
  0 siblings, 1 reply; 17+ messages in thread
From: Sean @ 2005-05-14 21:44 UTC (permalink / raw)
  To: git

[-- Attachment #1: Type: text/plain, Size: 1317 bytes --]

Attached is a preliminary hackish patch to sort git-rev-list in local
commit order.   While I don't know how useful this really is, it's
presented as an alternative to the repo-id proposal.  This will work even
if the branch happens to be from a single repository, where repo-id will
not.  However, shared commit objects can cause problems so for best
results use private commit objects for each repository.

For purposes of testing, this patch changes the Cogito default of linking
objects to copying, for local repository pull operations.   This patch
will work with _existing_ repositories where local commit times have been
maintained.

Also attached, is a little test script that demonstrates the local commit
time order.  After running the test script, you can use the cg-log command
in each of the M and R directories to see the difference even though the
two repositories share a head commit.

This patch is not nearly ready for inclusion anywhere just meant for
comment.  It is based off Petr's cogito tree (commit
fa6e9eb368e949e78c4e66217461cf624b52b0a2).

 cache.h     |    1
 cg-pull     |    4 -
 commit.c    |  121
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 commit.h    |    6 ++
 rev-list.c  |    2
 sha1_file.c |    8 +++
 6 files changed, 137 insertions(+), 5 deletions(-)

Sean


[-- Attachment #2: local-rev-list-v1.patch --]
[-- Type: application/octet-stream, Size: 6552 bytes --]

Index: cache.h
===================================================================
--- a/cache.h  (mode:100644)
+++ b/cache.h  (mode:100644)
@@ -157,6 +157,7 @@
 extern int write_sha1_from_fd(const unsigned char *sha1, int fd);
 
 extern int has_sha1_file(const unsigned char *sha1);
+extern unsigned long sha1_local_date(const unsigned char *sha1);
 
 /* Convert to/from hex/sha1 representation */
 extern int get_sha1(const char *str, unsigned char *sha1);
Index: cg-pull
===================================================================
--- a/cg-pull  (mode:100755)
+++ b/cg-pull  (mode:100755)
@@ -143,7 +143,7 @@
 	[ "$1" = "-i" ] && shift
 	[ "$1" = "-s" ] && shift
 
-	cp_flags_l="-va"
+	cp_flags_l="-vdR"
 	if [ "$1" = "-u" ]; then
 		cp_flags_l="$cp_flags_l -lu"
 		shift
@@ -163,7 +163,7 @@
 }
 
 pull_local () {
-	git-local-pull -a -l -v "$(cat "$_git/refs/heads/$1")" "$2"
+	git-local-pull -a -v "$(cat "$_git/refs/heads/$1")" "$2"
 }
 
 if echo "$uri" | grep -q "^http://"; then
Index: commit.c
===================================================================
--- a/commit.c  (mode:100644)
+++ b/commit.c  (mode:100644)
@@ -2,6 +2,7 @@
 #include "cache.h"
 #include <string.h>
 #include <limits.h>
+#include <stdlib.h>
 
 const char *commit_type = "commit";
 
@@ -13,6 +14,7 @@
 		memset(ret, 0, sizeof(struct commit));
 		created_object(sha1, &ret->object);
 		ret->object.type = commit_type;
+		ret->local_date = sha1_local_date(sha1);
 		return ret;
 	}
 	if (obj->type != commit_type) {
@@ -41,6 +43,18 @@
 	return date;
 }
 
+static void insert_by_local_date(struct commit_list **list, struct commit *item)
+{
+	struct commit_list **pp = list;
+	struct commit_list *p;
+	while ((p = *pp) != NULL) {
+		if (p->item->local_date > item->local_date) 
+			break;
+		pp = &p->next;
+	}
+	commit_list_insert(item, pp);
+}
+
 int parse_commit_buffer(struct commit *item, void *buffer, unsigned long size)
 {
 	void *bufptr = buffer;
@@ -58,12 +72,13 @@
 	       !get_sha1_hex(bufptr + 7, parent)) {
 		struct commit *new_parent = lookup_commit(parent);
 		if (new_parent) {
-			commit_list_insert(new_parent, &item->parents);
+ 			insert_by_local_date(&item->parents, new_parent);
 			add_ref(&item->object, &new_parent->object);
 		}
 		bufptr += 48;
 	}
 	item->date = parse_commit_date(bufptr);
+	item->merge_nodes = NULL;
 	return 0;
 }
 
@@ -152,3 +167,107 @@
 	}
 	return ret;
 }
+
+struct commit_list *copy_commit_list(struct commit_list *list)
+{
+	struct commit_list *copy = NULL;
+	while (list) {
+		commit_list_insert(list->item, &copy); 
+		list = list->next;
+	}
+	return copy;
+}
+
+int found_on_list(struct commit *item, struct commit_list *list)
+{
+	while (list) {
+		if (list->item == item)
+			return 1;
+		list = list->next;
+	}
+	return 0;
+}
+
+static struct commit *process_local_list(struct commit_list **list_p, 
+					int this_mark, int other_mark)
+{
+	struct commit *item = (*list_p)->item;
+
+	if (item->object.flags & other_mark) {
+		return item;
+	} else {
+		pop_most_recent_commit(list_p, this_mark);
+	}
+	return NULL;
+}
+
+struct commit *common_local_ancestor(struct commit *rev1, struct commit *rev2)
+{
+	struct commit_list *rev1list = NULL;
+	struct commit_list *rev2list = NULL;
+
+	commit_list_insert(rev1, &rev1list); rev1->object.flags |= 0x1;
+	commit_list_insert(rev2, &rev2list); rev2->object.flags |= 0x2;
+	parse_commit(rev1); parse_commit(rev2);
+
+	while (rev1list || rev2list) {
+		struct commit *ret;
+		if (!rev1list) {
+			// process 2
+			ret = process_local_list(&rev2list, 0x2, 0x1);
+		} else if (!rev2list) {
+			// process 1
+			ret = process_local_list(&rev1list, 0x1, 0x2);
+		} else if (rev1list->item->local_date 
+				< rev2list->item->local_date) {
+			// process 2
+			ret = process_local_list(&rev2list, 0x2, 0x1);
+		} else {
+			// process 1
+			ret = process_local_list(&rev1list, 0x1, 0x2);
+		}
+		if (ret) {
+			free_commit_list(rev1list);
+			free_commit_list(rev2list);
+			return ret;
+		}
+	}
+	return NULL;
+}
+
+void insert_merge_nodes(struct commit_list *plist,
+			struct commit_list *stop,
+			struct commit *node)
+{
+	struct commit_list *p;
+	for (p=plist; p != stop; p=p->next)
+		commit_list_insert(	common_local_ancestor(node, p->item),
+					&node->merge_nodes);
+}
+
+struct commit *pop_newest_local_commit(	struct commit_list **list,
+					unsigned int mark)
+{
+	struct commit *ret = (*list)->item;
+	struct commit_list *parents = ret->parents;
+	struct commit_list *old = *list;
+	struct commit_list *prev = ret->merge_nodes;
+
+	*list = (*list)->next;
+	free(old);
+
+	/* Loop expects parents to be ordered oldest to newest on local time */
+	while (parents) {
+		struct commit *commit = parents->item;
+		parse_commit(commit);
+		if (!((commit->object.flags & mark) | 
+                       found_on_list(commit, ret->merge_nodes))) {
+			commit->object.flags |= mark;
+			prev = commit->merge_nodes = copy_commit_list(prev);
+			insert_merge_nodes(ret->parents, parents, commit);
+			commit_list_insert(commit, list);
+		}
+		parents = parents->next;
+	}
+	return ret;
+}
Index: commit.h
===================================================================
--- a/commit.h  (mode:100644)
+++ b/commit.h  (mode:100644)
@@ -11,8 +11,9 @@
 
 struct commit {
 	struct object object;
-	unsigned long date;
+	unsigned long date, local_date;
 	struct commit_list *parents;
+	struct commit_list *merge_nodes;
 	struct tree *tree;
 };
 
@@ -36,4 +37,7 @@
 struct commit *pop_most_recent_commit(struct commit_list **list, 
 				      unsigned int mark);
 
+struct commit *pop_newest_local_commit(	struct commit_list **list,
+					unsigned int mark);
+
 #endif /* COMMIT_H */
Index: rev-list.c
===================================================================
--- a/rev-list.c  (mode:100644)
+++ b/rev-list.c  (mode:100644)
@@ -38,7 +38,7 @@
 
 	commit_list_insert(commit, &list);
 	do {
-		struct commit *commit = pop_most_recent_commit(&list, 0x1);
+		struct commit *commit = pop_newest_local_commit(&list, 0x4);
 
 		if (min_age != -1 && (commit->date > min_age))
 			continue;
Index: sha1_file.c
===================================================================
--- a/sha1_file.c  (mode:100644)
+++ b/sha1_file.c  (mode:100644)
@@ -577,6 +577,14 @@
 	return !!find_sha1_file(sha1, &st);
 }
 
+unsigned long sha1_local_date(const unsigned char *sha1)
+{
+	struct stat st;
+	if (find_sha1_file(sha1, &st))
+		return st.st_mtime;
+	return 0;
+}
+
 int index_fd(unsigned char *sha1, int fd, struct stat *st)
 {
 	unsigned long size = st->st_size;

[-- Attachment #3: test-local-rev-list-v1.sh --]
[-- Type: application/octet-stream, Size: 921 bytes --]

#!/bin/bash
die() { echo "death: $*" ; exit 1; }
mkdir R && cd R || die "on mkdir R"
cg-init < /dev/null || die "R init"

touch one ; cg-add one || die "adding Rn-3"
echo "Rn-3" | cg-commit || die "committing Rn-3"

cd .. || die "cd base"
cg-clone R M || die "cloning"

cd M || die "cd M"
touch two; cg-add two || die "adding Mn-1"
echo "Mn-1" | cg-commit || die "committing Mn-1"

cd ../R || die "cd ../R"
touch three; cg-add three || die "adding Rn-2"
echo "Rn-2" | cg-commit || die "committing Rn-2"

cd ../M || die "cd ../M"
touch four; cg-add four || die "adding Mn"
echo "Mn" | cg-commit || die "committing Mn"

cd ../R || die "cd ../R"
touch five; cg-add five || die "adding Rn-1"
echo "Rn-1" | cg-commit || die "committing Rn-1"

sleep 1
cg-branch-add M ../M/.git || die "adding M branch"
echo "Rn" | cg-update M || die "merging M"

sleep 1
cd ../M || die "cd ../M"
cg-update origin || die "fast forwarding to R"

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: git-rev-list  in local commit order
  2005-05-14 21:44 git-rev-list in local commit order Sean
@ 2005-05-15 19:48 ` Thomas Gleixner
  2005-05-15 19:57   ` Sean
  0 siblings, 1 reply; 17+ messages in thread
From: Thomas Gleixner @ 2005-05-15 19:48 UTC (permalink / raw)
  To: Sean; +Cc: git

On Sat, 2005-05-14 at 17:44 -0400, Sean wrote:
> Attached is a preliminary hackish patch to sort git-rev-list in local
> commit order. 

+unsigned long sha1_local_date(const unsigned char *sha1)
+{
+       struct stat st;
+       if (find_sha1_file(sha1, &st))
+               return st.st_mtime;
+       return 0;
+}

Do you really want to base workflow and history information on file
times ?


File times are local and completely error prone in distributed
environments. 


tglx



^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: git-rev-list  in local commit order
  2005-05-15 19:48 ` Thomas Gleixner
@ 2005-05-15 19:57   ` Sean
  2005-05-15 20:44     ` Thomas Gleixner
  0 siblings, 1 reply; 17+ messages in thread
From: Sean @ 2005-05-15 19:57 UTC (permalink / raw)
  To: tglx; +Cc: git

On Sun, May 15, 2005 3:48 pm, Thomas Gleixner said:
> On Sat, 2005-05-14 at 17:44 -0400, Sean wrote:
>> Attached is a preliminary hackish patch to sort git-rev-list in local
>> commit order.
>
> +unsigned long sha1_local_date(const unsigned char *sha1)
> +{
> +       struct stat st;
> +       if (find_sha1_file(sha1, &st))
> +               return st.st_mtime;
> +       return 0;
> +}
>
> Do you really want to base workflow and history information on file
> times ?

The local commit order just isn't all that important in many situations. 
And for situations where it is important, this proposal seems completely
adequate.   Mind you, the patch in question is complete crap.

> File times are local and completely error prone in distributed
> environments.

I disagree that they're inherently error prone, steps can be taken to make
them as secure as you desire.  Also, many people just will not care about
this local-commit-time as they will simply be tracking a remote
repository.   For applications like David Woodhouse's need to present the
newest commits first on a web page, this is _completely_ adequate.   I've
yet to see an intended use for this information that isn't completely
handled by this proposal.  Afterall, most people using git are getting by
just fine without such a facility today.

Regards,
Sean



^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: git-rev-list  in local commit order
  2005-05-15 19:57   ` Sean
@ 2005-05-15 20:44     ` Thomas Gleixner
  2005-05-15 20:45       ` Sean
  0 siblings, 1 reply; 17+ messages in thread
From: Thomas Gleixner @ 2005-05-15 20:44 UTC (permalink / raw)
  To: Sean; +Cc: git

On Sun, 2005-05-15 at 15:57 -0400, Sean wrote:
> Afterall, most people using git are getting by
> just fine without such a facility today.


Axiom 1:
Sean knows exactly what people care about

Axiom 2:
Time is a reliable source of information.

Axiom 3:
All information except X can be derived from time.

Axiom 4:
Most people dont care about X, therefor X is irrelevant.

Axiom 5:
If doubts, see Axiom 1

tglx



^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: git-rev-list  in local commit order
  2005-05-15 20:44     ` Thomas Gleixner
@ 2005-05-15 20:45       ` Sean
  2005-05-15 21:13         ` Thomas Gleixner
  0 siblings, 1 reply; 17+ messages in thread
From: Sean @ 2005-05-15 20:45 UTC (permalink / raw)
  To: tglx; +Cc: git

On Sun, May 15, 2005 4:44 pm, Thomas Gleixner said:
> On Sun, 2005-05-15 at 15:57 -0400, Sean wrote:
>> Afterall, most people using git are getting by
>> just fine without such a facility today.
>
> Axiom 1:
> Sean knows exactly what people care about
>
> Axiom 2:
> Time is a reliable source of information.
>
> Axiom 3:
> All information except X can be derived from time.
>
> Axiom 4:
> Most people dont care about X, therefor X is irrelevant.
>
> Axiom 5:
> If doubts, see Axiom 1
>

Thomas,

You can continue the personal attacks or you can simply explain to the
list what you are trying to accomplish and why it is important and why any
other proposal besides yours isn't worthy.

Sean




^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: git-rev-list  in local commit order
  2005-05-15 20:45       ` Sean
@ 2005-05-15 21:13         ` Thomas Gleixner
  2005-05-15 21:21           ` Sean
  0 siblings, 1 reply; 17+ messages in thread
From: Thomas Gleixner @ 2005-05-15 21:13 UTC (permalink / raw)
  To: Sean; +Cc: git

On Sun, 2005-05-15 at 16:45 -0400, Sean wrote:

> You can continue the personal attacks or you can simply explain to the
> list what you are trying to accomplish and why it is important and why any
> other proposal besides yours isn't worthy.

I did never say, that my proposal is the world formula, but I have more
than once explained, why time is the worst source of information.

You keep beating on time as a reliable source of information and tell me
that most people are completely happy with it. You must have access to a
quite good opinion survey.


Time of files or time in commit blobs is not a reliable information to
keep track of
 - workflows
 - history
Thats all I'm talking about and it is the concern of others too.

In "git" repositories the only reliable source of information is the
parent child relationship. This information is only partially reliable
due to the head forward scenario. I think we agreed on this, right ?
You have no other reliable source of information due to the fact that
committer names are not unique.

> I disagree that they're inherently error prone, 
> steps can be taken to make them as secure as you desire.

You continue to propose stuff which is not viable. Can you enforce 
- NTP syncronisation
- the correct usage of rsync options 
- timestamp aware backups 

No, you can't.

Why did the mail people resort to "In-Reply-To", "Message-ID" and
"References" ? Because time turned out to be an inreliable source of
information. Please read the related discussions before you argue that
time based solutions are sufficient.

Time is illusion. 


tglx




^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: git-rev-list  in local commit order
  2005-05-15 21:13         ` Thomas Gleixner
@ 2005-05-15 21:21           ` Sean
  2005-05-15 21:30             ` Thomas Gleixner
  0 siblings, 1 reply; 17+ messages in thread
From: Sean @ 2005-05-15 21:21 UTC (permalink / raw)
  To: tglx; +Cc: git

On Sun, May 15, 2005 5:13 pm, Thomas Gleixner said:
> On Sun, 2005-05-15 at 16:45 -0400, Sean wrote:
>
>> You can continue the personal attacks or you can simply explain to the
>> list what you are trying to accomplish and why it is important and why
>> any
>> other proposal besides yours isn't worthy.
>
> I did never say, that my proposal is the world formula, but I have more
> than once explained, why time is the worst source of information.
>
> You keep beating on time as a reliable source of information and tell me
> that most people are completely happy with it. You must have access to a
> quite good opinion survey.
>
> Time of files or time in commit blobs is not a reliable information to
> keep track of
>  - workflows
>  - history
> Thats all I'm talking about and it is the concern of others too.
>
> In "git" repositories the only reliable source of information is the
> parent child relationship. This information is only partially reliable
> due to the head forward scenario. I think we agreed on this, right ?
> You have no other reliable source of information due to the fact that
> committer names are not unique.
>
>> I disagree that they're inherently error prone,
>> steps can be taken to make them as secure as you desire.
>
> You continue to propose stuff which is not viable. Can you enforce
> - NTP syncronisation
> - the correct usage of rsync options
> - timestamp aware backups
>
> No, you can't.
>
> Why did the mail people resort to "In-Reply-To", "Message-ID" and
> "References" ? Because time turned out to be an inreliable source of
> information. Please read the related discussions before you argue that
> time based solutions are sufficient.
>
> Time is illusion.

What you're missing is that time is only important in this case to deduce
the relative age of each commit LOCALLY.   The intention of this proposal
is not to allow time comparison of commits between repositories.  In fact,
you'll see if you look closely, that you don't need to do that in order to
solve the stated problem of sorting the commits by the time they were
merged LOCALLY.

Cheers,
Sean



^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: git-rev-list  in local commit order
  2005-05-15 21:21           ` Sean
@ 2005-05-15 21:30             ` Thomas Gleixner
  2005-05-15 21:43               ` Sean
  0 siblings, 1 reply; 17+ messages in thread
From: Thomas Gleixner @ 2005-05-15 21:30 UTC (permalink / raw)
  To: Sean; +Cc: git

On Sun, 2005-05-15 at 17:21 -0400, Sean wrote:
> > Time is illusion.
> 
> What you're missing is that time is only important in this case to deduce
> the relative age of each commit LOCALLY.   The intention of this proposal
> is not to allow time comparison of commits between repositories. 


I do not want to compare times. I want to figure out workflows and
histories between different repositories.

>  In fact,
> you'll see if you look closely, that you don't need to do that in order to
> solve the stated problem of sorting the commits by the time they were
> merged LOCALLY.

Even LOCALLY is no guarantee for correct timestamps.


tglx



^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: git-rev-list  in local commit order
  2005-05-15 21:30             ` Thomas Gleixner
@ 2005-05-15 21:43               ` Sean
  2005-05-15 22:13                 ` Thomas Gleixner
  0 siblings, 1 reply; 17+ messages in thread
From: Sean @ 2005-05-15 21:43 UTC (permalink / raw)
  To: tglx; +Cc: git

On Sun, May 15, 2005 5:30 pm, Thomas Gleixner said:
> On Sun, 2005-05-15 at 17:21 -0400, Sean wrote:
>> > Time is illusion.
>>
>> What you're missing is that time is only important in this case to
>> deduce
>> the relative age of each commit LOCALLY.   The intention of this
>> proposal
>> is not to allow time comparison of commits between repositories.
>
> I do not want to compare times. I want to figure out workflows and
> histories between different repositories.

Well I honestly don't know what you want.   If I wanted to include a
"fortune" line in every commit and couldn't explain what value it
provided, i'd expect you or others to object.

My time based proposal solves the issue of :

Rn------\
Rn-1    Mn
Rn-2    Mn-1
Rn-3 ---/
Initial

Showing up in two repositories sorted based on the order they were
committed locally.  This was an issue that you stated you were trying to
solve.  The test case works just as advertised.  Remote times don't
matter, all that matters is the time you merge the objects locally.

> Even LOCALLY is no guarantee for correct timestamps.

Sure, but then your repoid might have gone missing or be set incorrectly
too.   One nice thing if your time is wrong, you can simply reset the
timestamp on the file.   If your repo-id is wrong, you have to recast the
commit object which will get a different SHA1 number and make things more
difficult.

Sean



^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: git-rev-list  in local commit order
  2005-05-15 21:43               ` Sean
@ 2005-05-15 22:13                 ` Thomas Gleixner
  2005-05-16 21:25                   ` Sean
  0 siblings, 1 reply; 17+ messages in thread
From: Thomas Gleixner @ 2005-05-15 22:13 UTC (permalink / raw)
  To: Sean; +Cc: git

On Sun, 2005-05-15 at 17:43 -0400, Sean wrote:

> Well I honestly don't know what you want.   If I wanted to include a
> "fortune" line in every commit and couldn't explain what value it
> provided, i'd expect you or others to object.

Last try.

A repository Id makes it possible to identify workflows in and across
repositories. 

This information is valuable for me and others due to already discussed
reasons. 

I accept that is irrelevant for you.

tglx



^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: git-rev-list  in local commit order
  2005-05-15 22:13                 ` Thomas Gleixner
@ 2005-05-16 21:25                   ` Sean
  2005-05-16 23:46                     ` Linus Torvalds
  0 siblings, 1 reply; 17+ messages in thread
From: Sean @ 2005-05-16 21:25 UTC (permalink / raw)
  To: tglx; +Cc: git

On Sun, May 15, 2005 6:13 pm, Thomas Gleixner said:
> Last try.
>
> A repository Id makes it possible to identify workflows in and across
> repositories.

Sorry, your proposal falls short, accurate work flow would allow you to
show every repository a commit passed through on the way to its final
destination.  Your proposal does not allow that; as discussed.  Nor does
it handle multiple projects or branches within a single repository.

As noted by others, using git often means the creation of temporary
repositories, hardly something that deserves an identifier.  Git, by
design, doesn't give a hoot about individual repositories.

And you also haven't addressed what to do when someone else uses say,
Linus' repoid, as their own.  It seems like a risk to have the operation
of each repository depend on a value anyone else can duplicate.  Linus
can't control what repoid everyone else uses, he can control the time on
his own machine.  Unique repoid's are an illusion.

> This information is valuable for me and others due to already discussed
> reasons.

Why should everyone else manage repoids in their own personal repository
for you; what value will _they_ get out of it?

> I accept that is irrelevant for you.

Personally I don't really care either way.  But you haven't given one real
example where it is actually needed to do useful work.  Making pretty
graphs on a web page doesn't count if they're not useful to anyone.  You
shouldn't force everyone else to manage repoid's unless there is some
value for _them_.

If you're still going to pursue this, at least make sure repoid is not
mandatory.  If a local repository identifier isn't defined, don't create a
repoid line in the commits.

Sean



^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: git-rev-list  in local commit order
  2005-05-16 21:25                   ` Sean
@ 2005-05-16 23:46                     ` Linus Torvalds
  2005-05-17  9:52                       ` Thomas Gleixner
  0 siblings, 1 reply; 17+ messages in thread
From: Linus Torvalds @ 2005-05-16 23:46 UTC (permalink / raw)
  To: Sean; +Cc: tglx, git



On Mon, 16 May 2005, Sean wrote:
> 
> And you also haven't addressed what to do when someone else uses say,
> Linus' repoid, as their own.  It seems like a risk to have the operation
> of each repository depend on a value anyone else can duplicate.  Linus
> can't control what repoid everyone else uses, he can control the time on
> his own machine.  Unique repoid's are an illusion.

Yes. I'm not ahuge fan of the notion of "repo ID's". One reason is that I
actually really really like the notion of anonymous repositories, so that
when I do something stupid, and blow away one of my less successful
repositories and continue with another one, nobody ever sees it (and yes,
this happens - in my BK usage I occasionally cloned my repo for some
testing, and then ended up using the _cloned_ repo for the real work, and
totally blowing away ymy original one, and renamed my cloned one back to
where my main one is).

That said, while I actually think that time/date matters, I don't think it 
should matter a lot.

I don't see why people don't just use the "committer" name for this.  
That's really what you want, and it ends up being a very good
approximation of "repository ID" for a commit. Sure, people end up having
multiple reposiories, and thus you'll occasionally see merges that end up
merging two heads with the same "repo ID", but does anybody really care? I
doubt it.

For example, if you have a company Q&A policy that says that you want to 
keep commits to different repos separate, just make sure that those repos 
are on different machines or are accessed with different users. Or write 
some simple wrapper scripts that make sure to set GIT_COMMITTER_EMAIL to 
the proper value (say, the wrapper could be as simple as

	#!/bin/sh
	export GIT_COMMITTER_EMAIL=$(cat .git/committer_email)
	real-git-commit "$@"

and then you just create a ".git/committer_email" file per repository that 
contains the "repo ID" you want to fake.

		Linus

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: git-rev-list  in local commit order
  2005-05-16 23:46                     ` Linus Torvalds
@ 2005-05-17  9:52                       ` Thomas Gleixner
  2005-05-17 15:43                         ` Linus Torvalds
  0 siblings, 1 reply; 17+ messages in thread
From: Thomas Gleixner @ 2005-05-17  9:52 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Sean, git

On Mon, 2005-05-16 at 16:46 -0700, Linus Torvalds wrote:
> Yes. I'm not ahuge fan of the notion of "repo ID's". One reason is that I
> actually really really like the notion of anonymous repositories, so that
> when I do something stupid, and blow away one of my less successful
> repositories and continue with another one, nobody ever sees it (and yes,
> this happens - in my BK usage I occasionally cloned my repo for some
> testing, and then ended up using the _cloned_ repo for the real work, and
> totally blowing away ymy original one, and renamed my cloned one back to
> where my main one is).

What you blow away is a work space. But at the end you push the result
of whatever work space you kept into a public available repository. Also
BK stores a somewhat hidden repository (not workspace) id.

My idea of repository id was not the notion of workspace seperation. I
dont care in which directory and on which machine you or who ever
commits a line of code. I care where the change appears in a public
repository, which is unique.

> I don't see why people don't just use the "committer" name for this.  
> That's really what you want, and it ends up being a very good
> approximation of "repository ID" for a commit. Sure, people end up having
> multiple reposiories, and thus you'll occasionally see merges that end up
> merging two heads with the same "repo ID", but does anybody really care? I
> doubt it.

I came up with this after I started "git tracker" and David Woodhouse
asked me to make it possible to look at the history of his repository
from the repositiory POV rather than from the cloned global history of
torvalds/linux-2.6.git. 

Sure I have retrieved the information from committer name and committer
mail, but when I tried to do the same with Dave Millers and Gregs
repositories it turned out to be impossible as they use the same
name/mail for each of their repositories.

> For example, if you have a company Q&A policy that says that you want to 
> keep commits to different repos separate, just make sure that those repos 
> are on different machines or are accessed with different users. Or write 
> some simple wrapper scripts that make sure to set GIT_COMMITTER_EMAIL to 
> the proper value (say, the wrapper could be as simple as
> 
> 	#!/bin/sh
> 	export GIT_COMMITTER_EMAIL=$(cat .git/committer_email)
> 	real-git-commit "$@"
> 
> and then you just create a ".git/committer_email" file per repository that 
> contains the "repo ID" you want to fake.

Might be a workable solution. I would prefer if this would be a part of
the core plumbing.
Would you accept a patch for commit-tree which tries to check this file
for existance and use the content in case its there?

tglx



^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: git-rev-list  in local commit order
  2005-05-17  9:52                       ` Thomas Gleixner
@ 2005-05-17 15:43                         ` Linus Torvalds
  2005-05-17 17:05                           ` Thomas Gleixner
  0 siblings, 1 reply; 17+ messages in thread
From: Linus Torvalds @ 2005-05-17 15:43 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: Sean, git



On Tue, 17 May 2005, Thomas Gleixner wrote:
> 
> What you blow away is a work space. But at the end you push the result
> of whatever work space you kept into a public available repository. Also
> BK stores a somewhat hidden repository (not workspace) id.

No.

The public repo is secondary. Really. It has no meaning. The only thing 
that matters is what you call "workspace".

> My idea of repository id was not the notion of workspace seperation. I
> dont care in which directory and on which machine you or who ever
> commits a line of code. I care where the change appears in a public
> repository, which is unique.

You seem to think that the repository on master.kernel.org is more 
important than the one on my private machine, and you're _wrong_.

It's the _private_ repositories that are the important ones. The public 
ones are a communication channel, nothing more. They have no importance on 
their own.

I've blown the public one away several times. With BK, we've had disk
corruption on kernel.org, we've had break-ins on bkbits.net, and we've had
repository corruption due to people editing the SCCS files by hand. Any
number of silly problems, in other words. The result? Blow the public tree
away, restore it from one of the private ones from a machine that you
trust.

I _never_ look at my public tree. I literally have a small script called 
"push-all" in my git repositories, and it does:

	#!/bin/sh
	echo master.kernel.org:
	rsync -av --delete --exclude-from=.exclude .git/ master.kernel.org:/pub/scm/linux/kernel/git/torvalds/linux-2.6.git
	...

ie it just pushes my stuff to a few other places.

In other words, the public stuff is the _slave_. It has no meaning. The 
only important one is the one that the _developer_ works on.

Of course, this is not to say that everybody needs to take my approach.
The nice thing about distributed systems is that a centralized system is
just a trivial special case of them, so somebody else, who uses git as if
it were CVS, could say "repo xxxx at git-master:/pub/git-root/project is
the 'main' repository, and all the workspaces are just temporary
workspaces".

But from a git _design_ point (and from a kernel usage point), the belief
that a "workspace" is somehow less important than a "central repository"
is just very very very wrong. Each workspace is it's own repository, and 
it's the _local_ ones that matter, not some "central repository".

		Linus

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: git-rev-list  in local commit order
  2005-05-17 15:43                         ` Linus Torvalds
@ 2005-05-17 17:05                           ` Thomas Gleixner
  2005-05-17 17:44                             ` Linus Torvalds
  0 siblings, 1 reply; 17+ messages in thread
From: Thomas Gleixner @ 2005-05-17 17:05 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Sean, git

On Tue, 2005-05-17 at 08:43 -0700, Linus Torvalds wrote:
> > My idea of repository id was not the notion of workspace seperation. I
> > dont care in which directory and on which machine you or who ever
> > commits a line of code. I care where the change appears in a public
> > repository, which is unique.
> 
> You seem to think that the repository on master.kernel.org is more 
> important than the one on my private machine, and you're _wrong_.

For me yes, as I have no access to your private ones and I can only rely
on the integrity of the public accessible ones.

For the individual developer the private workspaces are surely more
important. I never doubted that, but I do not care whether you use one
or ten workspaces and which one of them you blow away or use for
updating of master.kernel.org. 

tglx



^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: git-rev-list  in local commit order
  2005-05-17 17:05                           ` Thomas Gleixner
@ 2005-05-17 17:44                             ` Linus Torvalds
  2005-05-18  5:16                               ` Jon Seymour
  0 siblings, 1 reply; 17+ messages in thread
From: Linus Torvalds @ 2005-05-17 17:44 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: Sean, git



On Tue, 17 May 2005, Thomas Gleixner wrote:
>
> On Tue, 2005-05-17 at 08:43 -0700, Linus Torvalds wrote:
> > > My idea of repository id was not the notion of workspace seperation. I
> > > dont care in which directory and on which machine you or who ever
> > > commits a line of code. I care where the change appears in a public
> > > repository, which is unique.
> > 
> > You seem to think that the repository on master.kernel.org is more 
> > important than the one on my private machine, and you're _wrong_.
> 
> For me yes, as I have no access to your private ones and I can only rely
> on the integrity of the public accessible ones.
> 
> For the individual developer the private workspaces are surely more
> important. I never doubted that, but I do not care whether you use one
> or ten workspaces and which one of them you blow away or use for
> updating of master.kernel.org. 

But how would you track "repositoryness", when the repository you care 
about has absolutely nothing to do with the repositories that any of the 
developers who created it in the first place care about?

See the problem? You can't. You seem to want to track information that
simply does not _exist_.

Put another way: the repository ID of the eventual public "target"  
repository only becomes available once the information has been pushed
there, not before. So a "commit" cannot contain that information, because
at commit time, you fundamentally cannot know what the eventual public
repository (if any) will be.

So the public repo really is nothing but a shadow of the real work, and 
the only reliable ordering you can do will have to depend on local 
information (ie things like the committer "email" value).

Now, what you _can_ do (and what the snapshot mechanism and the commit 
mailing list scripts do) is to create a "publicly visible timeline" thing, 
ie you can at regular intervals generate a snapshot of "what is the state 
of public repo X" and you'll get a "local commit ordering" from that.

But that local commit ordering will fundamentally depend on exactly when
and how often you do the snapshotting and when I (or somebody else)
happened to push to that public repo, so it will inevitably be something
you can never re-create later from just the final repository contents.  
IOW, it's not something that "git-rev-list" can re-create - the only way
to recreate it is literally to build up a separate list of "what was the
head commit at time X" outside of the repository.

		Linus

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: git-rev-list in local commit order
  2005-05-17 17:44                             ` Linus Torvalds
@ 2005-05-18  5:16                               ` Jon Seymour
  0 siblings, 0 replies; 17+ messages in thread
From: Jon Seymour @ 2005-05-18  5:16 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Thomas Gleixner, Sean, git

On 5/18/05, Linus Torvalds <torvalds@osdl.org> wrote:
> 
> 
> On Tue, 17 May 2005, Thomas Gleixner wrote:
> >
> > On Tue, 2005-05-17 at 08:43 -0700, Linus Torvalds wrote:
> > > > My idea of repository id was not the notion of workspace seperation. I
> > > > dont care in which directory and on which machine you or who ever
> > > > commits a line of code. I care where the change appears in a public
> > > > repository, which is unique.
> > >
> > > You seem to think that the repository on master.kernel.org is more
> > > important than the one on my private machine, and you're _wrong_.
> >
> > For me yes, as I have no access to your private ones and I can only rely
> > on the integrity of the public accessible ones.
> >
> > For the individual developer the private workspaces are surely more
> > important. I never doubted that, but I do not care whether you use one
> > or ten workspaces and which one of them you blow away or use for
> > updating of master.kernel.org.
> 
> But how would you track "repositoryness", when the repository you care
> about has absolutely nothing to do with the repositories that any of the
> developers who created it in the first place care about?
> 
> See the problem? You can't. You seem to want to track information that
> simply does not _exist_.
> 
> Put another way: the repository ID of the eventual public "target"
> repository only becomes available once the information has been pushed
> there, not before. So a "commit" cannot contain that information, because
> at commit time, you fundamentally cannot know what the eventual public
> repository (if any) will be.

Earlier in a related thread, I argued that what everyone else has been
calling a repo-id is actually a workspace id. Your GIT_COMMITER_EMAIL
idea would have the same practical effect as a separate workspace id,
though it does pollute the interpretation of the e-mail id's since
they are no longer pure e-mail id's...

Would you be amenable to a patch that allowed tools to put attributes
of the form:

   x-"some-attribute" (' ' [^\0]*)+ '\0'

into a commit header?

If, over time, x-"some-attribute" became unversially accepted as
useful, a new release of git could bless it as official and the 'x-'
prefix could be dropped.

In the meantime, tools could experiment with additional commit markers
as they see fit without affecting the interoperability of other tools
which use only the blessed markers.

Of course, a constraint on the semantics of an x-* attribute would
ideally be that it's value must be fixed for all time once the commit
happens since there is no way to change it without creating a new
commit.

jon.

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2005-05-18  5:16 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2005-05-14 21:44 git-rev-list in local commit order Sean
2005-05-15 19:48 ` Thomas Gleixner
2005-05-15 19:57   ` Sean
2005-05-15 20:44     ` Thomas Gleixner
2005-05-15 20:45       ` Sean
2005-05-15 21:13         ` Thomas Gleixner
2005-05-15 21:21           ` Sean
2005-05-15 21:30             ` Thomas Gleixner
2005-05-15 21:43               ` Sean
2005-05-15 22:13                 ` Thomas Gleixner
2005-05-16 21:25                   ` Sean
2005-05-16 23:46                     ` Linus Torvalds
2005-05-17  9:52                       ` Thomas Gleixner
2005-05-17 15:43                         ` Linus Torvalds
2005-05-17 17:05                           ` Thomas Gleixner
2005-05-17 17:44                             ` Linus Torvalds
2005-05-18  5:16                               ` Jon Seymour

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).