* Experimental "git prune"
@ 2006-07-04 22:41 Linus Torvalds
2006-07-04 22:55 ` Junio C Hamano
2006-07-05 7:57 ` Johannes Schindelin
0 siblings, 2 replies; 4+ messages in thread
From: Linus Torvalds @ 2006-07-04 22:41 UTC (permalink / raw)
To: Junio C Hamano, Git Mailing List
This is an example of how "git prune" could be speeded up a lot.
This needs the builtin magic to actually enable it, and the "printf" needs
to be made a real unlink, but as far as I can tell it actually works.
Comments?
Linus
----
/*
* builtin-prune.c
*/
#include "cache.h"
#include "refs.h"
#include "tag.h"
#include "commit.h"
#include "tree.h"
#include "blob.h"
#include "tree-walk.h"
#include "diff.h"
#include "revision.h"
#include "builtin.h"
#include "cache-tree.h"
static struct rev_info revs;
static int prune_object(char *path, const char *filename, const unsigned char *sha1)
{
printf("%s/%s should be pruned\n", path, filename);
return 0;
}
static int prune_dir(int i, char *path)
{
DIR *dir = opendir(path);
struct dirent *de;
if (!dir)
return 0;
while ((de = readdir(dir)) != NULL) {
char name[100];
unsigned char sha1[20];
int len = strlen(de->d_name);
switch (len) {
case 2:
if (de->d_name[1] != '.')
break;
case 1:
if (de->d_name[0] != '.')
break;
continue;
case 38:
sprintf(name, "%02x", i);
memcpy(name+2, de->d_name, len+1);
if (get_sha1_hex(name, sha1) < 0)
break;
/*
* Do we know about this object?
* It must have been reachable
*/
if (lookup_object(sha1))
continue;
prune_object(path, de->d_name, sha1);
continue;
}
fprintf(stderr, "bad sha1 file: %s/%s\n", path, de->d_name);
}
closedir(dir);
return 0;
}
static void prune_object_dir(const char *path)
{
int i;
for (i = 0; i < 256; i++) {
static char dir[4096];
sprintf(dir, "%s/%02x", path, i);
prune_dir(i, dir);
}
}
static void process_blob(struct blob *blob,
struct object_array *p,
struct name_path *path,
const char *name)
{
struct object *obj = &blob->object;
if (obj->flags & SEEN)
return;
obj->flags |= SEEN;
/* Nothing to do, really .. The blob lookup was the important part */
}
static void process_tree(struct tree *tree,
struct object_array *p,
struct name_path *path,
const char *name)
{
struct object *obj = &tree->object;
struct tree_desc desc;
struct name_entry entry;
struct name_path me;
if (obj->flags & SEEN)
return;
obj->flags |= SEEN;
if (parse_tree(tree) < 0)
die("bad tree object %s", sha1_to_hex(obj->sha1));
name = strdup(name);
add_object(obj, p, path, name);
me.up = path;
me.elem = name;
me.elem_len = strlen(name);
desc.buf = tree->buffer;
desc.size = tree->size;
while (tree_entry(&desc, &entry)) {
if (S_ISDIR(entry.mode))
process_tree(lookup_tree(entry.sha1), p, &me, entry.path);
else
process_blob(lookup_blob(entry.sha1), p, &me, entry.path);
}
free(tree->buffer);
tree->buffer = NULL;
}
static void process_tag(struct tag *tag, struct object_array *p, const char *name)
{
struct object *obj = &tag->object;
struct name_path me;
if (obj->flags & SEEN)
return;
obj->flags |= SEEN;
me.up = NULL;
me.elem = "tag:/";
me.elem_len = 5;
if (parse_tag(tag) < 0)
die("bad tag object %s", sha1_to_hex(obj->sha1));
add_object(tag->tagged, p, NULL, name);
}
static void walk_commit_list(struct rev_info *revs)
{
int i;
struct commit *commit;
struct object_array objects = { 0, 0, NULL };
/* Walk all commits, process their trees */
while ((commit = get_revision(revs)) != NULL)
process_tree(commit->tree, &objects, NULL, "");
/* Then walk all the pending objects, recursively processing them too */
for (i = 0; i < revs->pending.nr; i++) {
struct object_array_entry *pending = revs->pending.objects + i;
struct object *obj = pending->item;
const char *name = pending->name;
if (obj->type == TYPE_TAG) {
process_tag((struct tag *) obj, &objects, name);
continue;
}
if (obj->type == TYPE_TREE) {
process_tree((struct tree *)obj, &objects, NULL, name);
continue;
}
if (obj->type == TYPE_BLOB) {
process_blob((struct blob *)obj, &objects, NULL, name);
continue;
}
die("unknown pending object %s (%s)", sha1_to_hex(obj->sha1), name);
}
}
static int add_one_ref(const char *path, const unsigned char *sha1)
{
struct object *object = parse_object(sha1);
if (!object)
die("bad object ref: %s:%s", path, sha1_to_hex(sha1));
add_pending_object(&revs, object, "");
return 0;
}
static void add_one_tree(const unsigned char *sha1)
{
struct tree *tree = lookup_tree(sha1);
add_pending_object(&revs, &tree->object, "");
}
static void add_cache_tree(struct cache_tree *it)
{
int i;
if (it->entry_count >= 0)
add_one_tree(it->sha1);
for (i = 0; i < it->subtree_nr; i++)
add_cache_tree(it->down[i]->cache_tree);
}
static void add_cache_refs(void)
{
int i;
read_cache();
for (i = 0; i < active_nr; i++) {
lookup_blob(active_cache[i]->sha1);
/*
* We could add the blobs to the pending list, but quite
* frankly, we don't care. Once we've looked them up, and
* added them as objects, we've really done everything
* there is to do for a blob
*/
}
if (active_cache_tree)
add_cache_tree(active_cache_tree);
}
int cmd_prune(int argc, const char **argv, char **envp)
{
/*
* Set up revision parsing, and mark us as being interested
* in all object types, not just commits.
*/
init_revisions(&revs);
revs.tag_objects = 1;
revs.blob_objects = 1;
revs.tree_objects = 1;
/* Add all external refs */
for_each_ref(add_one_ref);
/* Add all refs from the index file */
add_cache_refs();
/*
* Set up the revision walk - this will move all commits
* from the pending list to the commit walking list.
*/
prepare_revision_walk(&revs);
walk_commit_list(&revs);
prune_object_dir(get_object_directory());
return 0;
}
^ permalink raw reply [flat|nested] 4+ messages in thread* Re: Experimental "git prune"
2006-07-04 22:41 Experimental "git prune" Linus Torvalds
@ 2006-07-04 22:55 ` Junio C Hamano
2006-07-05 7:57 ` Johannes Schindelin
1 sibling, 0 replies; 4+ messages in thread
From: Junio C Hamano @ 2006-07-04 22:55 UTC (permalink / raw)
To: Linus Torvalds; +Cc: git
Linus Torvalds <torvalds@osdl.org> writes:
> This is an example of how "git prune" could be speeded up a lot.
Ah, you are avoiding the verification cost of fsck-objects. Neat.
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: Experimental "git prune"
2006-07-04 22:41 Experimental "git prune" Linus Torvalds
2006-07-04 22:55 ` Junio C Hamano
@ 2006-07-05 7:57 ` Johannes Schindelin
2006-07-05 16:24 ` Linus Torvalds
1 sibling, 1 reply; 4+ messages in thread
From: Johannes Schindelin @ 2006-07-05 7:57 UTC (permalink / raw)
To: Linus Torvalds; +Cc: Junio C Hamano, Git Mailing List
Hi,
On Tue, 4 Jul 2006, Linus Torvalds wrote:
> static void process_tag(struct tag *tag, struct object_array *p, const char *name)
> {
> struct object *obj = &tag->object;
> struct name_path me;
AFAICS this variable is set, but not used:
>
> if (obj->flags & SEEN)
> return;
> obj->flags |= SEEN;
>
> me.up = NULL;
> me.elem = "tag:/";
> me.elem_len = 5;
>
> if (parse_tag(tag) < 0)
> die("bad tag object %s", sha1_to_hex(obj->sha1));
> add_object(tag->tagged, p, NULL, name);
> }
As for the rest, I like it.
Ciao,
Dscho
^ permalink raw reply [flat|nested] 4+ messages in thread* Re: Experimental "git prune"
2006-07-05 7:57 ` Johannes Schindelin
@ 2006-07-05 16:24 ` Linus Torvalds
0 siblings, 0 replies; 4+ messages in thread
From: Linus Torvalds @ 2006-07-05 16:24 UTC (permalink / raw)
To: Johannes Schindelin; +Cc: Junio C Hamano, Git Mailing List
On Wed, 5 Jul 2006, Johannes Schindelin wrote:
> Hi,
>
> On Tue, 4 Jul 2006, Linus Torvalds wrote:
>
> > static void process_tag(struct tag *tag, struct object_array *p, const char *name)
> > {
> > struct object *obj = &tag->object;
> > struct name_path me;
>
> AFAICS this variable is set, but not used:
>
> >
> > if (obj->flags & SEEN)
> > return;
> > obj->flags |= SEEN;
> >
> > me.up = NULL;
> > me.elem = "tag:/";
> > me.elem_len = 5;
> >
> > if (parse_tag(tag) < 0)
> > die("bad tag object %s", sha1_to_hex(obj->sha1));
> > add_object(tag->tagged, p, NULL, name);
Yeah, that NULL should be "&me".
That said, we don't actually use the object name logic at all, so we could
drop it entirely. Having it _might_ allow some of the error messages to be
better, who knows..
Linus
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2006-07-05 16:25 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-07-04 22:41 Experimental "git prune" Linus Torvalds
2006-07-04 22:55 ` Junio C Hamano
2006-07-05 7:57 ` Johannes Schindelin
2006-07-05 16:24 ` Linus Torvalds
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).