Git development
 help / color / mirror / Atom feed
* Re: [PATCH] git-grep: --and to combine patterns with and instead of or
From: Junio C Hamano @ 2006-06-30 18:16 UTC (permalink / raw)
  To: jnareb; +Cc: git
In-Reply-To: <e83p0q$dla$1@sea.gmane.org>

Jakub Narebski <jnareb@gmail.com> writes:

> Because --near needs an expression it check context for (context is for
> found match of lhs expression). So
>
>   -e foo --near \( -e A --or -e B \)
>
> means lines containing foo and either A or B in the context _for "foo"_.

The syntax and semantics of --near I suggested (and you are
following) and what Matthias discusses are different and I think
that is why you two are talking past each other.

What I originally suggested is that you can (syntactically)
replace --near with --and.  That is, the LHS is the match and
RHS is "the LHS must match, but in addition RHS must match but
unlike --and RHS does not have to be exactly on the same line
but it is OK if it is a line somewhere nearby".

The --near Matthias talk about is syntactically not like --and
but more like --not.  It takes a condition for a line after
that, and loosens it to cover nearby lines.  So "-e A"
means "the line must have A on it" but "--near -e A" means "the
line must be nearby a line that satisfies `-e A'".

Matthias's "--near EXP" is spelled as "-e '' --near EXP" (the
first one is always true) with our syntax, in other words.

I do not think either of these semantics is invalid; they are
just different.  The version by Matthias is more general and
more expressive.

^ permalink raw reply

* Re: [PATCH] git-grep: --and to combine patterns with and instead of or
From: Matthias Lederhofer @ 2006-06-30 18:20 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, Jakub Narebski
In-Reply-To: <7vfyhmil07.fsf@assigned-by-dhcp.cox.net>

Junio C Hamano wrote:
> Matthias Lederhofer <matled@gmx.net> writes:
> 
> > -e foo --or  --near \( -e A -- or -e B \)
> > would mean lines containing foo or having A or B in the context.
> 
> How would that "--near" be useful?  You will see A or B either way.
Ok, this example was quite bad.

If --near is binary
-e foo --and ( --near=3:0 -e A --or --near=0:3 -e B )
could not be done anymore, could it (without repeating the first
pattern)? (Find foo with A in the 3 lines before or B in the 3 lines
after the line.)
Without different contexts for multiple --near it probably does not
matter if --near is binary or unary.

^ permalink raw reply

* git object hash cleanups
From: Linus Torvalds @ 2006-06-30 18:20 UTC (permalink / raw)
  To: Junio C Hamano, Git Mailing List


This IMNSHO cleans up the object hashing.

The hash expansion is separated out into a function of its own, the hash 
array (and size) names are made more obvious, and the code is generally 
made to look a bit more like the object-ref hashing.

It also gets rid of "find_object()" returning an index (or negative 
position if no object is found), since that is made redundant by the 
simplified object rehashing. The basic operation is now "lookup_object()" 
which just returns the object itself.

There's an almost unmeasurable speed increase, but more importantly, I 
think the end result is more readable.

Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---

I tried to be really careful, and this should all be good, but I'm still 
embarrassed about my hash insertion bug in object-refs.c, so people should 
double- and triple-check this.

diff --git a/object.c b/object.c
index 31c77ea..37277f9 100644
--- a/object.c
+++ b/object.c
@@ -5,88 +5,97 @@ #include "tree.h"
 #include "commit.h"
 #include "tag.h"
 
-static struct object **objs;
-static int nr_objs, obj_allocs;
+static struct object **obj_hash;
+static int nr_objs, obj_hash_size;
 
 unsigned int get_max_object_index(void)
 {
-	return obj_allocs;
+	return obj_hash_size;
 }
 
 struct object *get_indexed_object(unsigned int idx)
 {
-	return objs[idx];
+	return obj_hash[idx];
 }
 
 const char *type_names[] = {
 	"none", "blob", "tree", "commit", "bad"
 };
 
+static unsigned int hash_obj(struct object *obj, unsigned int n)
+{
+	unsigned int hash = *(unsigned int *)obj->sha1;
+	return hash % n;
+}
+
+static void insert_obj_hash(struct object *obj, struct object **hash, unsigned int size)
+{
+	int j = hash_obj(obj, size);
+
+	while (hash[j]) {
+		j++;
+		if (j >= size)
+			j = 0;
+	}
+	hash[j] = obj;
+}
+
 static int hashtable_index(const unsigned char *sha1)
 {
 	unsigned int i;
 	memcpy(&i, sha1, sizeof(unsigned int));
-	return (int)(i % obj_allocs);
+	return (int)(i % obj_hash_size);
 }
 
-static int find_object(const unsigned char *sha1)
+struct object *lookup_object(const unsigned char *sha1)
 {
 	int i;
+	struct object *obj;
 
-	if (!objs)
-		return -1;
+	if (!obj_hash)
+		return NULL;
 
 	i = hashtable_index(sha1);
-	while (objs[i]) {
-		if (memcmp(sha1, objs[i]->sha1, 20) == 0)
-			return i;
+	while ((obj = obj_hash[i]) != NULL) {
+		if (!memcmp(sha1, obj->sha1, 20))
+			break;
 		i++;
-		if (i == obj_allocs)
+		if (i == obj_hash_size)
 			i = 0;
 	}
-	return -1 - i;
+	return obj;
 }
 
-struct object *lookup_object(const unsigned char *sha1)
+static void grow_object_hash(void)
 {
-	int pos = find_object(sha1);
-	if (pos >= 0)
-		return objs[pos];
-	return NULL;
+	int i;
+	int new_hash_size = obj_hash_size < 32 ? 32 : 2 * obj_hash_size;
+	struct object **new_hash;
+
+	new_hash = calloc(new_hash_size, sizeof(struct object *));
+	for (i = 0; i < obj_hash_size; i++) {
+		struct object *obj = obj_hash[i];
+		if (!obj)
+			continue;
+		insert_obj_hash(obj, new_hash, new_hash_size);
+	}
+	free(obj_hash);
+	obj_hash = new_hash;
+	obj_hash_size = new_hash_size;
 }
 
 void created_object(const unsigned char *sha1, struct object *obj)
 {
-	int pos;
-
 	obj->parsed = 0;
-	memcpy(obj->sha1, sha1, 20);
-	obj->type = TYPE_NONE;
 	obj->used = 0;
+	obj->type = TYPE_NONE;
+	obj->flags = 0;
+	memcpy(obj->sha1, sha1, 20);
 
-	if (obj_allocs - 1 <= nr_objs * 2) {
-		int i, count = obj_allocs;
-		obj_allocs = (obj_allocs < 32 ? 32 : 2 * obj_allocs);
-		objs = xrealloc(objs, obj_allocs * sizeof(struct object *));
-		memset(objs + count, 0, (obj_allocs - count)
-				* sizeof(struct object *));
-		for (i = 0; i < obj_allocs; i++)
-			if (objs[i]) {
-				int j = find_object(objs[i]->sha1);
-				if (j != i) {
-					j = -1 - j;
-					objs[j] = objs[i];
-					objs[i] = NULL;
-				}
-			}
-	}
-
-	pos = find_object(sha1);
-	if (pos >= 0)
-		die("Inserting %s twice\n", sha1_to_hex(sha1));
-	pos = -pos-1;
+	if (obj_hash_size - 1 <= nr_objs * 2)
+		grow_object_hash();
 
-	objs[pos] = obj;
+	insert_obj_hash(obj, obj_hash, obj_hash_size);
 	nr_objs++;
 }
 

^ permalink raw reply related

* Re: [PATCH/RFC] Add git-instaweb, instantly browse the working repo with gitweb
From: Junio C Hamano @ 2006-06-30 18:24 UTC (permalink / raw)
  To: Eric Wong; +Cc: git
In-Reply-To: <11513991301372-git-send-email-normalperson@yhbt.net>

Eric Wong <normalperson@yhbt.net> writes:

> I got tired of having to configure gitweb for every repository
> I work on.  I sometimes prefer gitweb to standard GUIs like gitk
> or gitview; so this lets me automatically configure gitweb to
> browse my working repository and also opens my browser to it.
>
> Signed-off-by: Eric Wong <normalperson@yhbt.net>

This is cute but I haven't seen much responses to it yet.  Are
people uninterested?

^ permalink raw reply

* [PATCH/RFT] upload-pack.c: <sys/poll.h> includes <ctype.h> on OpenBSD 3.8
From: Junio C Hamano @ 2006-06-30 18:30 UTC (permalink / raw)
  To: git; +Cc: Randal L. Schwartz
In-Reply-To: <7vk66yilxd.fsf@assigned-by-dhcp.cox.net>

Merlyn reports that <sys/poll.h> on OpenBSD 3.8 includes <ctype.h>
and having our custom ctype (done in git-compat-util.h which is
included via cache.h) makes upload-pack.c uncompilable.  Try to
work it around by including the system headers first.

Signed-off-by: Junio C Hamano <junkio@cox.net>
---

 * Can somebody with OpenBSD who can reproduce the original
   problem confirm or reject this patch, so that the issue can
   be resolved before 1.4.1, please?

diff --git a/upload-pack.c b/upload-pack.c
index 2b70c3d..b18eb9b 100644
--- a/upload-pack.c
+++ b/upload-pack.c
@@ -1,3 +1,6 @@
+#include <signal.h>
+#include <sys/wait.h>
+#include <sys/poll.h>
 #include "cache.h"
 #include "refs.h"
 #include "pkt-line.h"
@@ -5,9 +8,6 @@ #include "tag.h"
 #include "object.h"
 #include "commit.h"
 #include "exec_cmd.h"
-#include <signal.h>
-#include <sys/poll.h>
-#include <sys/wait.h>
 
 static const char upload_pack_usage[] = "git-upload-pack [--strict] [--timeout=nn] <dir>";
 

^ permalink raw reply related

* Re: [PATCH/RFT] upload-pack.c: <sys/poll.h> includes <ctype.h> on OpenBSD 3.8
From: Jakub Narebski @ 2006-06-30 19:00 UTC (permalink / raw)
  To: git
In-Reply-To: <7vr716h4xm.fsf_-_@assigned-by-dhcp.cox.net>

Junio C Hamano wrote:

> Try to work it around by including the system headers first.

Shouldn't it be always the case?

-- 
Jakub Narebski
Warsaw, Poland
ShadeHawk on #git

^ permalink raw reply

* Re: [PATCH] git-grep: --and to combine patterns with and instead of or
From: Jakub Narebski @ 2006-06-30 19:11 UTC (permalink / raw)
  To: git
In-Reply-To: <7v1wt6ik4x.fsf@assigned-by-dhcp.cox.net>

Junio C Hamano wrote:

> The --near Matthias talk about is syntactically not like --and
> but more like --not.  It takes a condition for a line after
> that, and loosens it to cover nearby lines.  So "-e A"
> means "the line must have A on it" but "--near -e A" means "the
> line must be nearby a line that satisfies `-e A'".
> 
> Matthias's "--near EXP" is spelled as "-e '' --near EXP" (the
> first one is always true) with our syntax, in other words.
> 
> I do not think either of these semantics is invalid; they are
> just different.  The version by Matthias is more general and
> more expressive.

It also uses the fact that grep search for _lines_, the fact I have forgot
about. But if we cannot search for multiline regexp using git-grep,
Matthias version is truly more expressive, especially with context limiting
extension. 

-- 
Jakub Narebski
Warsaw, Poland
ShadeHawk on #git

^ permalink raw reply

* Re: [PATCH] git-grep: --and to combine patterns with and instead of or
From: Junio C Hamano @ 2006-06-30 20:26 UTC (permalink / raw)
  To: jnareb; +Cc: git
In-Reply-To: <e83t0m$4s0$2@sea.gmane.org>

Jakub Narebski <jnareb@gmail.com> writes:

> Matthias version is truly more expressive, especially with context limiting
> extension. 

That's orthogonal.  I do not think there is any reason you
cannot make the version whose --near is similar to --and to
understand different ranges for each "neighbor search"
expression using --near=M:N syntax.

Now stop talking and code it up, please ;-).

^ permalink raw reply

* Re: [PATCH 13] autoconf: Append '-include config.mak.autogen' to config.mak if it is not present
From: Jakub Narebski @ 2006-06-30 20:29 UTC (permalink / raw)
  To: git
In-Reply-To: <200606301711.39635.jnareb@gmail.com>

Jakub Narebski wrote:

> +grep -q -s -F "-include ${config_file}" config.mak || \
> +        echo  "-include ${config_file}" >> config.mak

Gah, should be of course

+grep -q -s -F -e "-include ${config_file}" config.mak || \
+            echo "-include ${config_file}" >> config.mak

-- 
Jakub Narebski
Warsaw, Poland
ShadeHawk on #git

^ permalink raw reply

* Re: Prepare "git-merge-tree" for future work
From: Junio C Hamano @ 2006-06-30 20:52 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: git
In-Reply-To: <Pine.LNX.4.64.0606291925230.12404@g5.osdl.org>

Linus Torvalds <torvalds@osdl.org> writes:

> In contrast, the ones to diff_filespec I've never really used, and I did 
> not want to compare blob objects, I very much wanted to compare in-memory 
> buffers (_and_ potentially blobs).
>
> So if you can show an easy example of how to populate a set of filespec 
> pairs (not with blobs - with in-memory generated data) and insert them 
> onto the lists, that would be good.

Ah, I see.  Your origin() function always returns a in-core
buffer from an existing blob (or NULL if it is a new file) but
result() returns either an existing blob resulting from the
tree-level merge, or a 3-way content merge result that does not
have an existing blob, and it is not obvious how to express the
latter as a filespec (all other cases you can stuff the blob
object name in the sha1[] member and if you choose to do the
read_sha1_file() yourself store the result in data member or you
can let diff_populate_filespec() read the data when diff
machinery needs it).

I think a filespec that has 0{40} sha1, with data already
populated and should_free/should_munmap members both set to
false might work for the in-memory data but I haven't tried.

I'd take the hint, and I will (eventually) take a look at it
if nobody beats me to it, but most likely not now, sorry.

^ permalink raw reply

* [RFC/PATCH 14] autoconf: Added --with/--without for openssl, curl, expat to ./configure
From: Jakub Narebski @ 2006-06-30 21:45 UTC (permalink / raw)
  To: git
In-Reply-To: <200606301711.39635.jnareb@gmail.com>


Added --with-PACKAGE[=PATH] (where PATH is prefix for PACKAGE
libraries and includes) and --without-PACKAGE (--with-PACKAGE=no)
support for curl, openssl, expat to configure.ac

Signed-off-by: Jakub Narebski <jnareb@gmail.com>

---

I'm not autoconf/m4 expert: could anyone tell me how to uppercase
PACKAGE name, so one could write MY_PARSE_WITH(openssl)?

How to add [=PATH] to --with-PACKAGE option description in a way
which does not screw up AS_HELP_WITH calculations. I could use
@<:@=PATH@:>@ which transforms to [=PATH], but AS_HELP_WITH counts
number of characters in source I think.

 configure.ac |   34 ++++++++++++++++++++++++++++++++++
 1 files changed, 34 insertions(+), 0 deletions(-)

diff --git a/configure.ac b/configure.ac
index fcfc9ce..26d6f4d 100644
--- a/configure.ac
+++ b/configure.ac
@@ -19,6 +19,22 @@ # Append LINE to file ${config_append}
 AC_DEFUN([MY_APPEND_LINE],
 [[echo "$1" >> "${config_append}"]])# MY_APPEND_LINE
 
+# MY_PARSE_WITH(PACKAGE)
+# ----------------------
+# For use in AC_ARG_WITH action-if-found, for packages default ON. 
+# * Set NO_PACKAGE=YesPlease  for --without-PACKAGE
+# * Set PACKAGEDIR=ARG for --with-PACKAGE=ARG
+# * Do nothing for --with-PACKAGE without ARG
+# PACKAGE option must be all uppercase
+AC_DEFUN([MY_PARSE_WITH],
+[[if test "$withval" = "no"; then \
+    MY_APPEND_LINE(NO_$1=YesPlease); \
+  elif test "$withval" != "yes"; then \
+    MY_APPEND_LINE($1DIR=$withval); \
+  fi; \
+]])# MY_PARSE_WITH
+
+
 # Checks for libraries.
 AC_MSG_NOTICE([CHECKS for libraries])
 AC_CHECK_LIB([crypto], [SHA1_Init],,MY_APPEND_LINE(NO_OPENSSL=YesPlease))
@@ -48,6 +64,24 @@ AC_CHECK_FUNC(strlcpy,,MY_APPEND_LINE(NO
 AC_CHECK_FUNC(setenv,,MY_APPEND_LINE(NO_SETENV=YesPlease))
 
 
+# Site configuration
+AC_MSG_NOTICE([CHECKS for site configuration])
+AC_ARG_WITH(curl, 
+AS_HELP_STRING([--with-curl],[support http(s):// transports (default is YES)])
+AS_HELP_STRING([],           [ARG can be also prefix for curl library and headers]),\
+MY_PARSE_WITH(CURL))
+
+AC_ARG_WITH(openssl,
+AS_HELP_STRING([--with-openssl],[use OpenSSL library (default is YES)])
+AS_HELP_STRING([],              [ARG can be prefix for openssl library and headers]),\
+MY_PARSE_WITH(OPENSSL))
+
+AC_ARG_WITH(expat,
+AS_HELP_STRING([--with-expat],[support git-push using http:// and https:// transports via WebDAV (default is YES)])
+AS_HELP_STRING([],            [ARG can be also prefix for expat library and headers]),\
+MY_PARSE_WITH(EXPAT))
+
+
 # Output files
 AC_CONFIG_FILES(["${config_file}":"${config_in}":"${config_append}"])
 AC_OUTPUT
-- 
1.4.0

^ permalink raw reply related

* Re: [RFC/PATCH 14] autoconf: Added --with/--without for openssl, curl, expat to ./configure
From: Pavel Roskin @ 2006-06-30 21:57 UTC (permalink / raw)
  To: Jakub Narebski; +Cc: git
In-Reply-To: <200606302345.17045.jnareb@gmail.com>

Hi Jakub,

On Fri, 2006-06-30 at 23:45 +0200, Jakub Narebski wrote:
> I'm not autoconf/m4 expert: could anyone tell me how to uppercase
> PACKAGE name, so one could write MY_PARSE_WITH(openssl)?

I don't quite understand what you want, but you could check m4_toupper.

> How to add [=PATH] to --with-PACKAGE option description in a way
> which does not screw up AS_HELP_WITH calculations. I could use
> @<:@=PATH@:>@ which transforms to [=PATH], but AS_HELP_WITH counts
> number of characters in source I think.

Try another pair of square brackets as quotes for literal contents.  In
this case, use [[=PATH]]

> +# MY_PARSE_WITH(PACKAGE)

By the way, it's better to use a prefix other than MY.  I thing
GIT_PARSE_WITH would be great.

-- 
Regards,
Pavel Roskin

^ permalink raw reply

* Re: [RFC/PATCH 14] autoconf: Added --with/--without for openssl, curl, expat to ./configure
From: Jakub Narebski @ 2006-06-30 22:32 UTC (permalink / raw)
  To: git
In-Reply-To: <1151704626.12008.5.camel@dv>

<posted & mailed>

Pavel Roskin wrote:

> On Fri, 2006-06-30 at 23:45 +0200, Jakub Narebski wrote:
>> I'm not autoconf/m4 expert: could anyone tell me how to uppercase
>> PACKAGE name, so one could write MY_PARSE_WITH(openssl)?
> 
> I don't quite understand what you want, but you could check m4_toupper.

Thanks. It works great. GIT_PARSE_WITH is now:

AC_DEFUN([GIT_PARSE_WITH],
[[PACKAGE=m4_toupper($1); \
if test "$withval" = "no"; then \
    GIT_APPEND_LINE(NO_${PACKAGE}=YesPlease); \
  elif test "$withval" != "yes"; then \
    GIT_APPEND_LINE(${PACKAGE}DIR=$withval); \
  fi; \
]])# GIT_PARSE_WITH

>> How to add [=PATH] to --with-PACKAGE option description in a way
>> which does not screw up AS_HELP_WITH calculations. I could use
>> @<:@=PATH@:>@ which transforms to [=PATH], but AS_HELP_WITH counts
>> number of characters in source I think.
> 
> Try another pair of square brackets as quotes for literal contents.  In
> this case, use [[=PATH]]

I suspect that AS_HELP_WITH does some strange quoting, or stripping. Both
[=PATH] and [[=PATH]] produces =PATH in ./configure --help output.
When using @<:@=PATH@:>@ I get [=PATH], but the description of option begins
line below.

>> +# MY_PARSE_WITH(PACKAGE)
> 
> By the way, it's better to use a prefix other than MY.  I thing
> GIT_PARSE_WITH would be great.

Any ideas for name of MY_APPEND_LINE(LINE)/GIT_APPEND_LINE(LINE) macro,
which as a result adds line to output (e.g. LINE = "NO_CURL=YesPlease")?

Thanks for all suggestions
-- 
Jakub Narebski
Warsaw, Poland
ShadeHawk on #git

^ permalink raw reply

* Re: [RFC/PATCH 14] autoconf: Added --with/--without for openssl, curl, expat to ./configure
From: Jakub Narebski @ 2006-06-30 22:34 UTC (permalink / raw)
  To: git
In-Reply-To: <200606302345.17045.jnareb@gmail.com>

Jakub Narebski wrote:

> +AC_ARG_WITH(expat,
> +AS_HELP_STRING([--with-expat],[support git-push using http:// and https:// transports via WebDAV (default is YES)])
> +AS_HELP_STRING([],            [ARG can be also prefix for expat library and headers]),\
> +MY_PARSE_WITH(EXPAT))

Of course to do anything for --with-expat=PATH case this patch needs
the one that introduces EXPATDIR (there was such patch on the list,
I'm not sure if it got accepted).

-- 
Jakub Narebski
Warsaw, Poland
ShadeHawk on #git

^ permalink raw reply

* [PATCH] Speed up history generation
From: Luben Tuikov @ 2006-07-01  0:59 UTC (permalink / raw)
  To: git

Speed up history generation as suggested by Linus.

Signed-off-by: Luben Tuikov <ltuikov@yahoo.com>
---
 gitweb/gitweb.cgi |    9 ++-------
 1 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/gitweb/gitweb.cgi b/gitweb/gitweb.cgi
index 035e76d..2705a93 100755
--- a/gitweb/gitweb.cgi
+++ b/gitweb/gitweb.cgi
@@ -2295,16 +2295,12 @@ sub git_history {
 	      "</div>\n";
 	print "<div class=\"page_path\"><b>/" . esc_html($file_name) . "</b><br/></div>\n";
 
-	open my $fd, "-|", "$gitbin/git-rev-list $hash | $gitbin/git-diff-tree -r --stdin --
\'$file_name\'";
-	my $commit;
+	open my $fd, "-|", "$gitbin/git-rev-list $hash -- \'$file_name\'";
 	print "<table cellspacing=\"0\">\n";
 	my $alternate = 0;
 	while (my $line = <$fd>) {
 		if ($line =~ m/^([0-9a-fA-F]{40})/){
-			$commit = $1;
-			next;
-		}
-		if ($line =~ m/^:([0-7]{6}) ([0-7]{6}) ([0-9a-fA-F]{40}) ([0-9a-fA-F]{40}) (.)\t(.*)$/ &&
(defined $commit)) {
+			my $commit = $1;
 			my %co = git_read_commit($commit);
 			if (!%co) {
 				next;
@@ -2336,7 +2332,6 @@ sub git_history {
 			}
 			print "</td>\n" .
 			      "</tr>\n";
-			undef $commit;
 		}
 	}
 	print "</table>\n";
-- 
1.4.1.rc2.g4ce4

^ permalink raw reply related

* [PATCH] Enable tree (directory) history display
From: Luben Tuikov @ 2006-07-01  1:00 UTC (permalink / raw)
  To: git

This patch allows history display of whole trees/directories a la
"git-rev-list HEAD -- <dir or file>".  I find this useful especially
when a project lives in its own subdirectory, as opposed to being all
of the GIT repository (i.e. when a sub-project is merged into a
super-project).

Signed-off-by: Luben Tuikov <ltuikov@yahoo.com>
---
 gitweb/gitweb.cgi |    1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/gitweb/gitweb.cgi b/gitweb/gitweb.cgi
index 2705a93..d808900 100755
--- a/gitweb/gitweb.cgi
+++ b/gitweb/gitweb.cgi
@@ -1676,6 +1676,7 @@ #			      " | " . $cgi->a({-href => "$my
 			      "</td>\n" .
 			      "<td class=\"link\">" .
 			      $cgi->a({-href => "$my_uri?" .
esc_param("p=$project;a=tree;h=$t_hash$base_key;f=$base$t_name")}, "tree") .
+			      " | " . $cgi->a({-href => "$my_uri?" .
esc_param("p=$project;a=history;h=$hash_base;f=$base$t_name")}, "history") .
 			      "</td>\n";
 		}
 		print "</tr>\n";
-- 
1.4.1.rc2.g4ce4

^ permalink raw reply related

* Re: [PATCH] Speed up history generation
From: Junio C Hamano @ 2006-07-01  1:04 UTC (permalink / raw)
  To: ltuikov; +Cc: git
In-Reply-To: <20060701005924.7726.qmail@web31812.mail.mud.yahoo.com>

Luben Tuikov <ltuikov@yahoo.com> writes:

> Speed up history generation as suggested by Linus.
> @@ -2295,16 +2295,12 @@ sub git_history {
>  	      "</div>\n";
>  	print "<div class=\"page_path\"><b>/" . esc_html($file_name) . "</b><br/></div>\n";
>  
> -	open my $fd, "-|", "$gitbin/git-rev-list $hash | $gitbin/git-diff-tree -r --stdin --
> \'$file_name\'";
> -	my $commit;
> +	open my $fd, "-|", "$gitbin/git-rev-list $hash -- \'$file_name\'";

This would speed things up but at the same time it changes the
semantics because it involves merge simplification, no?

At least that should be noted in the commit log.

^ permalink raw reply

* Re: [PATCH] Speed up history generation
From: Luben Tuikov @ 2006-07-01  1:11 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git
In-Reply-To: <7v7j2ygmou.fsf@assigned-by-dhcp.cox.net>

--- Junio C Hamano <junkio@cox.net> wrote:
> Luben Tuikov <ltuikov@yahoo.com> writes:
> 
> > Speed up history generation as suggested by Linus.
> > @@ -2295,16 +2295,12 @@ sub git_history {
> >  	      "</div>\n";
> >  	print "<div class=\"page_path\"><b>/" . esc_html($file_name) . "</b><br/></div>\n";
> >  
> > -	open my $fd, "-|", "$gitbin/git-rev-list $hash | $gitbin/git-diff-tree -r --stdin --
> > \'$file_name\'";
> > -	my $commit;
> > +	open my $fd, "-|", "$gitbin/git-rev-list $hash -- \'$file_name\'";
> 
> This would speed things up but at the same time it changes the
> semantics because it involves merge simplification, no?
> 
> At least that should be noted in the commit log.

Ok, I guess this should be in the log.  Can you add it please when
commiting to the master git branch?

   Luben

^ permalink raw reply

* Re: [PATCH] Speed up history generation
From: Linus Torvalds @ 2006-07-01  1:20 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: ltuikov, git
In-Reply-To: <7v7j2ygmou.fsf@assigned-by-dhcp.cox.net>



On Fri, 30 Jun 2006, Junio C Hamano wrote:

> Luben Tuikov <ltuikov@yahoo.com> writes:
> 
> > Speed up history generation as suggested by Linus.
> > @@ -2295,16 +2295,12 @@ sub git_history {
> >  	      "</div>\n";
> >  	print "<div class=\"page_path\"><b>/" . esc_html($file_name) . "</b><br/></div>\n";
> >  
> > -	open my $fd, "-|", "$gitbin/git-rev-list $hash | $gitbin/git-diff-tree -r --stdin --
> > \'$file_name\'";
> > -	my $commit;
> > +	open my $fd, "-|", "$gitbin/git-rev-list $hash -- \'$file_name\'";
> 
> This would speed things up but at the same time it changes the
> semantics because it involves merge simplification, no?

Or just add a flag or config option that enables "--full-history" on that 
git-rev-list. Perhaps it should be the default fir gitweb.

With --full-history, it should still be better to do this inside 
git-rev-list than piping things into git-diff-tree just to limit by 
pathname..

		Linus

^ permalink raw reply

* Re: [PATCH] Speed up history generation
From: Junio C Hamano @ 2006-07-01  1:20 UTC (permalink / raw)
  To: ltuikov; +Cc: git
In-Reply-To: <20060701011112.892.qmail@web31813.mail.mud.yahoo.com>

Luben Tuikov <ltuikov@yahoo.com> writes:

> --- Junio C Hamano <junkio@cox.net> wrote:
>...
>> > @@ -2295,16 +2295,12 @@ sub git_history {
>> >  	      "</div>\n";
>> >  	print "<div class=\"page_path\"><b>/" . esc_html($file_name) . "</b><br/></div>\n";
>> >  
>> > -	open my $fd, "-|", "$gitbin/git-rev-list $hash | $gitbin/git-diff-tree -r --stdin --
>> > \'$file_name\'";
>> > -	my $commit;
>> > +	open my $fd, "-|", "$gitbin/git-rev-list $hash -- \'$file_name\'";
>> 
>> This would speed things up but at the same time it changes the
>> semantics because it involves merge simplification, no?
>> 
>> At least that should be noted in the commit log.
>
> Ok, I guess this should be in the log.  Can you add it please when
> commiting to the master git branch?

Well, by "at least", what I meant was that it might make sense
to pass "--full-history" option to be more compatible with the
original output.  For graphical output like gitk, --full-history
makes a mess on the screen, but a list-oriented output like
gitweb it might be less confusing to show all the alternate
paths that touched the path than leaving some histories out.

^ permalink raw reply

* Re: [PATCH] Speed up history generation
From: Luben Tuikov @ 2006-07-01  1:52 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git
In-Reply-To: <7v3bdmglxo.fsf@assigned-by-dhcp.cox.net>

--- Junio C Hamano <junkio@cox.net> wrote:
> Luben Tuikov <ltuikov@yahoo.com> writes:
> 
> > --- Junio C Hamano <junkio@cox.net> wrote:
> >...
> >> > @@ -2295,16 +2295,12 @@ sub git_history {
> >> >  	      "</div>\n";
> >> >  	print "<div class=\"page_path\"><b>/" . esc_html($file_name) . "</b><br/></div>\n";
> >> >  
> >> > -	open my $fd, "-|", "$gitbin/git-rev-list $hash | $gitbin/git-diff-tree -r --stdin --
> >> > \'$file_name\'";
> >> > -	my $commit;
> >> > +	open my $fd, "-|", "$gitbin/git-rev-list $hash -- \'$file_name\'";
> >> 
> >> This would speed things up but at the same time it changes the
> >> semantics because it involves merge simplification, no?
> >> 
> >> At least that should be noted in the commit log.
> >
> > Ok, I guess this should be in the log.  Can you add it please when
> > commiting to the master git branch?
> 
> Well, by "at least", what I meant was that it might make sense
> to pass "--full-history" option to be more compatible with the
> original output.  For graphical output like gitk, --full-history
> makes a mess on the screen, but a list-oriented output like
> gitweb it might be less confusing to show all the alternate
> paths that touched the path than leaving some histories out.

Ok, I can add the --full-history option, test it out and resubmit.

    Luben

^ permalink raw reply

* Re: [PATCH] Enable tree (directory) history display
From: Junio C Hamano @ 2006-07-01  2:10 UTC (permalink / raw)
  To: ltuikov; +Cc: git
In-Reply-To: <20060701010012.1559.qmail@web31809.mail.mud.yahoo.com>

Luben Tuikov <ltuikov@yahoo.com> writes:

> This patch allows history display of whole trees/directories a la
> "git-rev-list HEAD -- <dir or file>".  I find this useful especially
> when a project lives in its own subdirectory, as opposed to being all
> of the GIT repository (i.e. when a sub-project is merged into a
> super-project).

Both patches from you were seriously damaged.  Check your MUA
before sending further patches, please.

They were trivial for me to apply by hand, so no need to resend
them.  I like the effect of this one I am replying to very much.

^ permalink raw reply

* Re: [PATCH] Enable tree (directory) history display
From: Luben Tuikov @ 2006-07-01  2:43 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, Linus Torvalds
In-Reply-To: <7vy7vef52m.fsf@assigned-by-dhcp.cox.net>

--- Junio C Hamano <junkio@cox.net> wrote:
> Luben Tuikov <ltuikov@yahoo.com> writes:
> 
> > This patch allows history display of whole trees/directories a la
> > "git-rev-list HEAD -- <dir or file>".  I find this useful especially
> > when a project lives in its own subdirectory, as opposed to being all
> > of the GIT repository (i.e. when a sub-project is merged into a
> > super-project).
> 
> Both patches from you were seriously damaged.  Check your MUA
> before sending further patches, please.

Yes, I'll take a look.

> 
> They were trivial for me to apply by hand, so no need to resend
> them.  I like the effect of this one I am replying to very much.

Junio, Linus,

I took a comparative look with and without "--full-history",
and FWIW, enabling full history just clobbers the output with a lot
of unnecessary information.  I.e. it shows merges which do not have
direct consequence or change to the files in the path spec specified
after the "--".

I.e. no new, relevant information is being shown when "--full-history"
is enabled.  In fact the default git-rev-list case, simplify_history=1,
still shows a merge here and there which doesn't have any direct
changes into what is being sought, but the difference is
about 48% less clobber.

Can you consider the default case to be simplify_history=1,
which is currently the default behaviour of git-rev-list.

If not, is it possible to make this a configurable flag
in gitweb, so that people can decide how much non-related
history to show?  I.e. I'd opt for the default case,
no full-history.

FWIW, I think that the original intention (had there been a choice)
would've been to show only most relevant history, i.e. changes
directly related to paths and files after the "--".

Thanks,
  Luben

^ permalink raw reply

* A note on merging conflicts..
From: Linus Torvalds @ 2006-07-01  2:44 UTC (permalink / raw)
  To: Git Mailing List, Junio C Hamano


Ok, over the last week or so, I've been having a lot more content 
conflicts than usual, mostly because of 

 (a) just the fact that the way the merge window happens for the kernel 
     these days, rather than have incremental small merges, we often end 
     up having lots of big ones.
and

 (b) I ended up merging a few trees that had lots of small changes all 
     over, notably to header files having their <config.h> include 
     removed, causing trivial conflicts.

Now, the good news is that I have to say that our conflict resolution 
rocks. It's all been _very_ easy to do. In fact, it's been even more 
pleasant than BK was, because of one big issue: you could resolve the 
conflict in the tree, then _test_ it (perhaps just compile-test it), and 
commit the resolved result separately. With BK, you had to resolve and 
commit atomically, and you never had access to a "preliminary resolve" to 
test.

However, I also notived one particular thing that I did that we make less 
than perfectly easy.

One thing that is _very_ useful to do is to do when you have a conflict is 
this:

	git log -p HEAD MERGE_BASE..MERGE_HEAD -- conflicting-filename

because this shows all the changes (with their explanations) for that 
filename since the MERGE_BASE in _both_ branches you're trying to merge. 
This simple command really makes conflict resolution a hell of a lot 
easier, because you can see what caused the conflict, and you get a real 
feel for what both branches were doing, making it a _lot_ more likely that 
you actually do the right thing.

Now, the downside is that the above is both a pain to type, and we don't 
actually even save the MERGE_BASE as a head, so you actually have to 
compute it yourself. It's easy enough to do:

	git-merge-base HEAD MERGE_HEAD > .git/MERGE_BASE

will do it, but the fact is, we should make this even easier.

In fact, after writing the above a few times, I really think there's a 
case for making a helper function that does exactly the above for us. 
Including all the "conflicting-filename" thing. It would be nice if

	git log -p --merge [[--] filenames...]

would basically expand to

	git log -p HEAD MERGE_HEAD
		^$(git-merge-base HEAD MERGE_HEAD)
		-- $(git-ls-files -u [filenames...])

so that I wouldn't have to type that by hand ever again, and doing a

	git log -p --merge drivers/

would automatically give me exactly that for all the unmerged files in 
drivers/.

Anybody want to try to make me happy, and learn some git internals at the 
same time?

			Linus

^ permalink raw reply

* [PATCH] don't load objects needlessly when repacking
From: Nicolas Pitre @ 2006-07-01  2:55 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

If no delta is attempted on some objects then it is useless to load them 
in memory, neither create any delta index for them.  The best thing to 
do is therefore to load and index them only when really needed.

Signed-off-by: Nicolas Pitre <nico@cam.org>

---

With this patch, a git-repack -a on the Linux kernel repo takes 19 
seconds instead of 25 seconds on my machine.  At this point the cost of 
creating a pack is largely dominated by git-rev-list alone while the 
actual pack creation is basically free.

diff --git a/pack-objects.c b/pack-objects.c
index 47da33b..b486ea5 100644
--- a/pack-objects.c
+++ b/pack-objects.c
@@ -970,11 +970,12 @@ struct unpacked {
  * one.
  */
 static int try_delta(struct unpacked *trg, struct unpacked *src,
-		     struct delta_index *src_index, unsigned max_depth)
+		     unsigned max_depth)
 {
 	struct object_entry *trg_entry = trg->entry;
 	struct object_entry *src_entry = src->entry;
-	unsigned long size, src_size, delta_size, sizediff, max_size;
+	unsigned long trg_size, src_size, delta_size, sizediff, max_size, sz;
+	char type[10];
 	void *delta_buf;
 
 	/* Don't bother doing diffs between different types */
@@ -1009,19 +1010,38 @@ static int try_delta(struct unpacked *tr
 		return 0;
 
 	/* Now some size filtering heuristics. */
-	size = trg_entry->size;
-	max_size = size/2 - 20;
+	trg_size = trg_entry->size;
+	max_size = trg_size/2 - 20;
 	max_size = max_size * (max_depth - src_entry->depth) / max_depth;
 	if (max_size == 0)
 		return 0;
 	if (trg_entry->delta && trg_entry->delta_size <= max_size)
 		max_size = trg_entry->delta_size-1;
 	src_size = src_entry->size;
-	sizediff = src_size < size ? size - src_size : 0;
+	sizediff = src_size < trg_size ? trg_size - src_size : 0;
 	if (sizediff >= max_size)
 		return 0;
 
-	delta_buf = create_delta(src_index, trg->data, size, &delta_size, max_size);
+	/* Load data if not already done */
+	if (!trg->data) {
+		trg->data = read_sha1_file(trg_entry->sha1, type, &sz);
+		if (sz != trg_size)
+			die("object %s inconsistent object length (%lu vs %lu)",
+			    sha1_to_hex(trg_entry->sha1), sz, trg_size);
+	}
+	if (!src->data) {
+		src->data = read_sha1_file(src_entry->sha1, type, &sz);
+		if (sz != src_size)
+			die("object %s inconsistent object length (%lu vs %lu)",
+			    sha1_to_hex(src_entry->sha1), sz, src_size);
+	}
+	if (!src->index) {
+		src->index = create_delta_index(src->data, src_size);
+		if (!src->index)
+			die("out of memory");
+	}
+
+	delta_buf = create_delta(src->index, trg->data, trg_size, &delta_size, max_size);
 	if (!delta_buf)
 		return 0;
 
@@ -1054,8 +1074,6 @@ static void find_deltas(struct object_en
 	while (--i >= 0) {
 		struct object_entry *entry = list[i];
 		struct unpacked *n = array + idx;
-		unsigned long size;
-		char type[10];
 		int j;
 
 		if (!entry->preferred_base)
@@ -1082,11 +1100,8 @@ static void find_deltas(struct object_en
 		free_delta_index(n->index);
 		n->index = NULL;
 		free(n->data);
+		n->data = NULL;
 		n->entry = entry;
-		n->data = read_sha1_file(entry->sha1, type, &size);
-		if (size != entry->size)
-			die("object %s inconsistent object length (%lu vs %lu)",
-			    sha1_to_hex(entry->sha1), size, entry->size);
 
 		j = window;
 		while (--j > 0) {
@@ -1097,7 +1112,7 @@ static void find_deltas(struct object_en
 			m = array + other_idx;
 			if (!m->entry)
 				break;
-			if (try_delta(n, m, m->index, depth) < 0)
+			if (try_delta(n, m, depth) < 0)
 				break;
 		}
 		/* if we made n a delta, and if n is already at max
@@ -1107,10 +1122,6 @@ static void find_deltas(struct object_en
 		if (entry->delta && depth <= entry->depth)
 			continue;
 
-		n->index = create_delta_index(n->data, size);
-		if (!n->index)
-			die("out of memory");
-
 		idx++;
 		if (idx >= window)
 			idx = 0;

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox