git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] git-mktree: reverse of git-ls-tree.
@ 2006-02-21  6:37 Junio C Hamano
  2006-02-21  6:52 ` Keith Packard
  0 siblings, 1 reply; 10+ messages in thread
From: Junio C Hamano @ 2006-02-21  6:37 UTC (permalink / raw)
  To: Tommi Virtanen; +Cc: git

This reads data in the format a (non recursive) ls-tree outputs
and writes a tree object to the object database.  The created
tree object name is output to the standard output.

For convenience, the input data does not need to be sorted; the
command sorts the input lines internally.

Signed-off-by: Junio C Hamano <junkio@cox.net>

---

 * For the purposes of filesystem backend, I think completely
   bypassing the index and having a way to handcraft a tree
   object, especially if you are writing things in Python, would
   be much easier to use.  Hence this command.

   Comments?

 Makefile |    2 -
 mktree.c |  137 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 138 insertions(+), 1 deletions(-)
 create mode 100644 mktree.c

7e37266ced5edd9a70b03ea07b5eca0d7ee82039
diff --git a/Makefile b/Makefile
index 317be3c..2f73b86 100644
--- a/Makefile
+++ b/Makefile
@@ -143,7 +143,7 @@ PROGRAMS = \
 	git-diff-tree$X git-fetch-pack$X git-fsck-objects$X \
 	git-hash-object$X git-index-pack$X git-init-db$X \
 	git-local-fetch$X git-ls-files$X git-ls-tree$X git-merge-base$X \
-	git-merge-index$X git-mktag$X git-pack-objects$X git-patch-id$X \
+	git-merge-index$X git-mktag$X git-mktree$X git-pack-objects$X git-patch-id$X \
 	git-peek-remote$X git-prune-packed$X git-read-tree$X \
 	git-receive-pack$X git-rev-list$X git-rev-parse$X \
 	git-send-pack$X git-show-branch$X git-shell$X \
diff --git a/mktree.c b/mktree.c
new file mode 100644
index 0000000..f853585
--- /dev/null
+++ b/mktree.c
@@ -0,0 +1,137 @@
+/*
+ * GIT - the stupid content tracker
+ *
+ * Copyright (c) Junio C Hamano, 2006
+ */
+#include "cache.h"
+#include "strbuf.h"
+#include "quote.h"
+
+static struct treeent {
+	unsigned mode;
+	unsigned char sha1[20];
+	int len;
+	char name[FLEX_ARRAY];
+} **entries;
+static int alloc, used;
+
+static void append_to_tree(unsigned mode, unsigned char *sha1, char *path)
+{
+	struct treeent *ent;
+	int len = strlen(path);
+	if (strchr(path, '/'))
+		die("path %s contains slash", path);
+
+	if (alloc <= used) {
+		alloc = alloc_nr(used);
+		entries = xrealloc(entries, sizeof(*entries) * alloc);
+	}
+	ent = entries[used++] = xmalloc(sizeof(**entries) + len + 1);
+	ent->mode = mode;
+	ent->len = len;
+	memcpy(ent->sha1, sha1, 20);
+	memcpy(ent->name, path, len+1);
+}
+
+static int ent_compare(const void *a_, const void *b_)
+{
+	struct treeent *a = *(struct treeent **)a_;
+	struct treeent *b = *(struct treeent **)b_;
+	return base_name_compare(a->name, a->len, a->mode,
+				 b->name, b->len, b->mode);
+}
+
+static void write_tree(unsigned char *sha1)
+{
+	char *buffer;
+	unsigned long size, offset;
+	int i;
+
+	qsort(entries, used, sizeof(*entries), ent_compare);
+	size = 100;
+	for (size = i = 0; i < used; i++)
+		size += 32 + entries[i]->len;
+	buffer = xmalloc(size);
+	offset = 0;
+
+	for (i = 0; i < used; i++) {
+		struct treeent *ent = entries[i];
+
+		if (offset + ent->len + 100 < size) {
+			size = alloc_nr(offset + ent->len + 100);
+			buffer = xrealloc(buffer, size);
+		}
+		offset += sprintf(buffer + offset, "%o ", ent->mode);
+		offset += sprintf(buffer + offset, "%s", ent->name);
+		buffer[offset++] = 0;
+		memcpy(buffer + offset, ent->sha1, 20);
+		offset += 20;
+	}
+	write_sha1_file(buffer, offset, "tree", sha1);
+}
+
+static const char mktree_usage[] = "mktree [-z]";
+
+int main(int ac, char **av)
+{
+	struct strbuf sb;
+	unsigned char sha1[20];
+	int line_termination = '\n';
+
+	setup_git_directory();
+
+	while ((1 < ac) && av[1][0] == '-') {
+		char *arg = av[1];
+		if (!strcmp("-z", arg))
+			line_termination = 0;
+		else
+			usage(mktree_usage);
+		ac--;
+		av++;
+	}
+
+	strbuf_init(&sb);
+	while (1) {
+		int len;
+		char *ptr, *ntr;
+		unsigned mode;
+		char type[20];
+		char *path;
+
+		read_line(&sb, stdin, line_termination);
+		if (sb.eof)
+			break;
+		len = sb.len;
+		ptr = sb.buf;
+		/* Input is non-recursive ls-tree output format
+		 * mode SP type SP sha1 TAB name
+		 */
+		mode = strtoul(ptr, &ntr, 8);
+		if (ptr == ntr || !ntr || *ntr != ' ')
+			die("input format error: %s", sb.buf);
+		ptr = ntr + 1; /* type */
+		ntr = strchr(ptr, ' ');
+		if (!ntr || sb.buf + len <= ntr + 41 ||
+		    ntr[41] != '\t' ||
+		    get_sha1_hex(ntr + 1, sha1))
+			die("input format error: %s", sb.buf);
+		if (sha1_object_info(sha1, type, NULL))
+			die("object %s unavailable", sha1_to_hex(sha1));
+		*ntr++ = 0; /* now at the beginning of SHA1 */
+		if (strcmp(ptr, type))
+			die("object type %s mismatch (%s)", ptr, type);
+		ntr += 41; /* at the beginning of name */
+		if (line_termination && ntr[0] == '"')
+			path = unquote_c_style(ntr, NULL);
+		else
+			path = ntr;
+
+		append_to_tree(mode, sha1, path);
+
+		if (path != ntr)
+			free(path);
+	}
+	write_tree(sha1);
+	puts(sha1_to_hex(sha1));
+	exit(0);
+}
-- 
1.2.2.g9896

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH] git-mktree: reverse of git-ls-tree.
  2006-02-21  6:37 [PATCH] git-mktree: reverse of git-ls-tree Junio C Hamano
@ 2006-02-21  6:52 ` Keith Packard
  2006-02-21  8:46   ` Andreas Ericsson
  2006-02-21 17:40   ` Linus Torvalds
  0 siblings, 2 replies; 10+ messages in thread
From: Keith Packard @ 2006-02-21  6:52 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: keithp, Tommi Virtanen, git

[-- Attachment #1: Type: text/plain, Size: 809 bytes --]

<internationalization-pedant-mode>
On Mon, 2006-02-20 at 22:37 -0800, Junio C Hamano wrote:

> + * Copyright (c) Junio C Hamano, 2006

I've been told by at least two lawyers that the string '(c)' has no
legal meaning in the US. If you want to indicate copyright, the only
symbol which does carry legal weight is the c-in-a-circle mark '©'. 

Of course, this does force the issue of what encoding to present source
files in. I suggest that sources should be UTF-8, which also provides
opportunities to encode author names correctly, rather than
transliterating them to Latin. X.org uses UTF-8 for source files now
without difficulty across a wide range of compilers. Of course,
non-ascii glyphs are present only in comments.

</internationalization-pedant-mode>
-- 
keith.packard@intel.com

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] git-mktree: reverse of git-ls-tree.
  2006-02-21  6:52 ` Keith Packard
@ 2006-02-21  8:46   ` Andreas Ericsson
  2006-02-21  9:49     ` Junio C Hamano
  2006-02-21 17:40   ` Linus Torvalds
  1 sibling, 1 reply; 10+ messages in thread
From: Andreas Ericsson @ 2006-02-21  8:46 UTC (permalink / raw)
  To: Keith Packard; +Cc: Junio C Hamano, Tommi Virtanen, git

Keith Packard wrote:
> <internationalization-pedant-mode>
> On Mon, 2006-02-20 at 22:37 -0800, Junio C Hamano wrote:
> 
> 
>>+ * Copyright (c) Junio C Hamano, 2006
> 
> 
> I've been told by at least two lawyers that the string '(c)' has no
> legal meaning in the US. If you want to indicate copyright, the only
> symbol which does carry legal weight is the c-in-a-circle mark '©'. 
> 
> Of course, this does force the issue of what encoding to present source
> files in. I suggest that sources should be UTF-8, which also provides
> opportunities to encode author names correctly, rather than
> transliterating them to Latin. X.org uses UTF-8 for source files now
> without difficulty across a wide range of compilers. Of course,
> non-ascii glyphs are present only in comments.
> 
> </internationalization-pedant-mode>


In most countries the copyright is implied unless explicitly void by the 
author.

In other sane countries (I don't argue that USA is necessarily any such 
country), the law is such that if the copier understands that there is a 
copyright and violates it, he or she is in error and thus liable.

I'm not sure how mad such a law can be written, but what you describe go 
against both common sense and common practice since it puts the burden 
of protection on the victim-to-be before the crime is even committed. It 
would be like a rapist being let off because his victims were where he 
happened to be.

-- 
Andreas Ericsson                   andreas.ericsson@op5.se
OP5 AB                             www.op5.se
Tel: +46 8-230225                  Fax: +46 8-230231

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] git-mktree: reverse of git-ls-tree.
  2006-02-21  8:46   ` Andreas Ericsson
@ 2006-02-21  9:49     ` Junio C Hamano
  2006-02-21 10:20       ` Martin Langhoff
  2006-02-21 17:23       ` Keith Packard
  0 siblings, 2 replies; 10+ messages in thread
From: Junio C Hamano @ 2006-02-21  9:49 UTC (permalink / raw)
  To: Andreas Ericsson; +Cc: git, Keith Packard

Andreas Ericsson <ae@op5.se> writes:

> Keith Packard wrote:
>> <internationalization-pedant-mode>
>> On Mon, 2006-02-20 at 22:37 -0800, Junio C Hamano wrote:
>>
>>>+ * Copyright (c) Junio C Hamano, 2006
>> I've been told by at least two lawyers that the string '(c)' has no
>> legal meaning in the US. If you want to indicate copyright, the only
>> symbol which does carry legal weight is the c-in-a-circle mark
>...
> I'm not sure how mad such a law can be written, but what you describe
> go against both common sense and common practice since it puts the
> burden of protection on the victim-to-be before the crime is even
> committed...

Keith is saying that unlike ciecle-c, (c) is meaningless.  While
he is right about that, it does not matter, as long as he is
talking about "legal meaning in the US".  It is my understanding
that spelled out "Copyright" (or its abbreviation, "Copr.")
weighs as much as the circle-c mark.

And it matters even less these days.  US law traditionally
required copyright notice to be protected, but after 1989 US
Copyright Act, made in line with Berne convention, the notice is
not even necessary.  It used to be that you would want circle-c,
Copyright and "All Rights Reserved", if you really wanted to be
anal.  Buenos Aires signatories are all Berne members these
days, it became just obsolete inertia.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] git-mktree: reverse of git-ls-tree.
  2006-02-21  9:49     ` Junio C Hamano
@ 2006-02-21 10:20       ` Martin Langhoff
  2006-02-21 17:23       ` Keith Packard
  1 sibling, 0 replies; 10+ messages in thread
From: Martin Langhoff @ 2006-02-21 10:20 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Andreas Ericsson, git, Keith Packard

On 2/21/06, Junio C Hamano <junkio@cox.net> wrote:
> Keith is saying that unlike ciecle-c, (c) is meaningless.  While
> he is right about that, it does not matter, as long as he is
> talking about "legal meaning in the US".  It is my understanding
> that spelled out "Copyright" (or its abbreviation, "Copr.")
> weighs as much as the circle-c mark.
>
> And it matters even less these days.  US law traditionally
> required copyright notice to be protected, but after 1989 US
> Copyright Act, made in line with Berne convention, the notice is
> not even necessary.  It used to be that you would want circle-c,
> Copyright and "All Rights Reserved", if you really wanted to be
> anal.  Buenos Aires signatories are all Berne members these
> days, it became just obsolete inertia.

Not entirely useless though. It serves as an indication of who to
contact for permission to copy. It's not as good as it should be
("which of the 35 L.Torvalds in the phonebook does this mean?").
Copyright law reform proponents do sometimes talk of a central
database with contact details for copyright owners and other measures
to facilitate brokerage of licenses.

It also serves as a reminder of the need for a license, though legally
it's not required. Just like a lock in the door, while often easy to
pick or crack open and definitely not required, reminds people that
trespassing is a crime.

Bah, social mores, laws and other vices of humanity...


martin

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] git-mktree: reverse of git-ls-tree.
  2006-02-21  9:49     ` Junio C Hamano
  2006-02-21 10:20       ` Martin Langhoff
@ 2006-02-21 17:23       ` Keith Packard
  2006-02-21 17:57         ` Linus Torvalds
  1 sibling, 1 reply; 10+ messages in thread
From: Keith Packard @ 2006-02-21 17:23 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: keithp, Andreas Ericsson, git

[-- Attachment #1: Type: text/plain, Size: 931 bytes --]

On Tue, 2006-02-21 at 01:49 -0800, Junio C Hamano wrote:

> Keith is saying that unlike ciecle-c, (c) is meaningless.  While
> he is right about that, it does not matter, as long as he is
> talking about "legal meaning in the US".  It is my understanding
> that spelled out "Copyright" (or its abbreviation, "Copr.")
> weighs as much as the circle-c mark.

Precisely. I'd say either remove the (c) or replace it with the correct
symbol. As including the correct symbol requires selecting between an
8859 and 10646 encoding, one shows one's i18n-fu by including the © in
UTF-8.

Once this choice is firmly in place, the opportunties for excessive
character usage really flourish:

http://gitweb.freedesktop.org/?p=cairo;a=blobdiff;h=1c63adec9b6547c6446548dc3a877e1e4ba29bfd;hp=a6ebc5e04773c8b544cfb721b635ef5534be235f;hb=cc890b9cf4d2a38e13ae48e16589c4fd02678f99;f=src/cairo-pen.c
  
-- 
keith.packard@intel.com

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] git-mktree: reverse of git-ls-tree.
  2006-02-21  6:52 ` Keith Packard
  2006-02-21  8:46   ` Andreas Ericsson
@ 2006-02-21 17:40   ` Linus Torvalds
  2006-02-21 17:46     ` Keith Packard
  1 sibling, 1 reply; 10+ messages in thread
From: Linus Torvalds @ 2006-02-21 17:40 UTC (permalink / raw)
  To: Keith Packard; +Cc: Junio C Hamano, Tommi Virtanen, git

[-- Attachment #1: Type: TEXT/PLAIN, Size: 2509 bytes --]



On Mon, 20 Feb 2006, Keith Packard wrote:
> 
> > + * Copyright (c) Junio C Hamano, 2006
> 
> I've been told by at least two lawyers that the string '(c)' has no
> legal meaning in the US. If you want to indicate copyright, the only
> symbol which does carry legal weight is the c-in-a-circle mark '©'. 

You should change lawyers, methinks.

The thing is, once the same line says "Copyright", the string '(c)' may be 
meaningless, but more importantly, your lawyers are wasting your time with 
pointless and mindless "punktknulleri" (literal meaning: "the f*cking of 
points", aka being anal retentive).

Of course, they are probably also charging you for that time they are 
wasting, which is why you should fire them and find somebody who tells you 
anything relevant.

The FACT is that
 (a) You can write out the word "copyright" in its entirety.
 (b) the US legal system very much takes intent into account, so even if 
     you don't, if the meaning is clear, it's not like it matters. This
     is even more true on most of the rest of the civilized world, btw (ie 
     Europe in general gives authors _more_ rights than the US, since they 
     recognize the notion of "moral rights")
 (c) you own the copyright anyway ever since 1988, when the US ratified 
     the Berne convention. In fact, even before then, the US had adopted 
     the notion of automatic copyrights, and any work created after 1978 
     falls under this.

Anyway, for more details if you _really_ care, look up "Circular 3" by the 
United States Copyright Office. The very first sentence of that paper 
talks about how the notice isn't even required any more, but if you want 
to talk to your punktknullande lawyers, point them to the section called 
"form of notice". Which mentions the © letter, but makes it very very 
clear that "Copyright" or the abbreviation "Copr." are totally 
interchangeable in the US.

Now, in some _other_ countries, the © mark may be special, but quite 
frankly, you won't really care. If it matters, those countries haven't 
ratified the Berne convention, and you'll never ever in a million years 
care about them. It's absolutely certainly not the case in any relevant 
country.

Any country where "©" matters likely has many bigger problems wrt 
copyrights, like not honoring them at all.

So: if you care about the copyright law in Ulan Bator, you may have to use 
the © character. But the likelihood is that it's not an issue even there.

So tell your lawyers to f*ck the hell off.

		Linus

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] git-mktree: reverse of git-ls-tree.
  2006-02-21 17:40   ` Linus Torvalds
@ 2006-02-21 17:46     ` Keith Packard
  2006-02-21 18:00       ` Linus Torvalds
  0 siblings, 1 reply; 10+ messages in thread
From: Keith Packard @ 2006-02-21 17:46 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: keithp, Junio C Hamano, Tommi Virtanen, git

[-- Attachment #1: Type: text/plain, Size: 362 bytes --]

On Tue, 2006-02-21 at 09:40 -0800, Linus Torvalds wrote:

> Which mentions the © letter, but makes it very very 
> clear that "Copyright" or the abbreviation "Copr." are totally 
> interchangeable in the US.

Yes, all three are equivalent, my only point was that '(c)' is
meaningless. Which, as I noted is just pedantry.

-- 
keith.packard@intel.com

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] git-mktree: reverse of git-ls-tree.
  2006-02-21 17:23       ` Keith Packard
@ 2006-02-21 17:57         ` Linus Torvalds
  0 siblings, 0 replies; 10+ messages in thread
From: Linus Torvalds @ 2006-02-21 17:57 UTC (permalink / raw)
  To: Keith Packard; +Cc: Junio C Hamano, Andreas Ericsson, git

[-- Attachment #1: Type: TEXT/PLAIN, Size: 603 bytes --]



On Tue, 21 Feb 2006, Keith Packard wrote:
> 
> Precisely. I'd say either remove the (c) or replace it with the correct
> symbol.

Taking that to its logical conclusion, you should remove the whole line, 
since none of it is in any way required and it's all unnecessary.

The thing is, the string "(c)" is a _lot_ more readable than the sanest 
alternative for © (\c2\a9, ie the appropriate UTF8 string). Because even 
that sane representation will actually show up as something else in a 
_lot_ of editors, and is often hard to type for people. 

There's a reason people use "(c)" rather "©".

		Linus

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] git-mktree: reverse of git-ls-tree.
  2006-02-21 17:46     ` Keith Packard
@ 2006-02-21 18:00       ` Linus Torvalds
  0 siblings, 0 replies; 10+ messages in thread
From: Linus Torvalds @ 2006-02-21 18:00 UTC (permalink / raw)
  To: Keith Packard; +Cc: Junio C Hamano, Tommi Virtanen, git



On Tue, 21 Feb 2006, Keith Packard wrote:
>
> Yes, all three are equivalent, my only point was that '(c)' is
> meaningless. Which, as I noted is just pedantry.

Pedantry is fine on a mailing list. But meaningless pedantry in a lawyer 
is bad. My point was that your _lawyers_ are bad. Tell them to concentrate 
on something that matters.

		Linus

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2006-02-21 18:00 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-02-21  6:37 [PATCH] git-mktree: reverse of git-ls-tree Junio C Hamano
2006-02-21  6:52 ` Keith Packard
2006-02-21  8:46   ` Andreas Ericsson
2006-02-21  9:49     ` Junio C Hamano
2006-02-21 10:20       ` Martin Langhoff
2006-02-21 17:23       ` Keith Packard
2006-02-21 17:57         ` Linus Torvalds
2006-02-21 17:40   ` Linus Torvalds
2006-02-21 17:46     ` Keith Packard
2006-02-21 18:00       ` Linus Torvalds

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).