* [PATCH 1/2] remote: Make ref_remove_duplicates faster for large numbers of refs
2009-10-22 21:06 [PATCH 0/2] Speedup fetch with large numbers of refs Julian Phillips
@ 2009-10-22 21:06 ` Julian Phillips
2009-10-22 21:06 ` [PATCH 2/2] fetch: Speed up fetch of " Julian Phillips
2009-10-25 7:16 ` [PATCH 0/2] Speedup fetch with " Junio C Hamano
2 siblings, 0 replies; 5+ messages in thread
From: Julian Phillips @ 2009-10-22 21:06 UTC (permalink / raw)
To: Junio C Hamano; +Cc: git
The ref_remove_duplicates function was very slow at dealing with very
large numbers of refs. This is because it was using a linear search
through all remaining refs to find any duplicates of the current ref.
Rewriting it to use a string list to keep track of which refs have
already been seen and removing duplicates when they are found is much
more efficient.
Signed-off-by: Julian Phillips <julian@quantumfyre.co.uk>
---
remote.c | 39 +++++++++++++++++++++------------------
1 files changed, 21 insertions(+), 18 deletions(-)
diff --git a/remote.c b/remote.c
index 73d33f2..a97c2c3 100644
--- a/remote.c
+++ b/remote.c
@@ -6,6 +6,7 @@
#include "revision.h"
#include "dir.h"
#include "tag.h"
+#include "string-list.h"
static struct refspec s_tag_refspec = {
0,
@@ -734,29 +735,31 @@ int for_each_remote(each_remote_fn fn, void *priv)
void ref_remove_duplicates(struct ref *ref_map)
{
- struct ref **posn;
- struct ref *next;
+ struct string_list refs = { NULL, 0, 0, 0 };
+ struct string_list_item *item = NULL;
+ struct ref *prev = NULL;
for (; ref_map; ref_map = ref_map->next) {
if (!ref_map->peer_ref)
continue;
- posn = &ref_map->next;
- while (*posn) {
- if ((*posn)->peer_ref &&
- !strcmp((*posn)->peer_ref->name,
- ref_map->peer_ref->name)) {
- if (strcmp((*posn)->name, ref_map->name))
- die("%s tracks both %s and %s",
- ref_map->peer_ref->name,
- (*posn)->name, ref_map->name);
- next = (*posn)->next;
- free((*posn)->peer_ref);
- free(*posn);
- *posn = next;
- } else {
- posn = &(*posn)->next;
- }
+
+ item = string_list_lookup(ref_map->peer_ref->name, &refs);
+ if (item) {
+ if (strcmp(((struct ref *)item->util)->name,
+ ref_map->name))
+ die("%s tracks both %s and %s",
+ ref_map->peer_ref->name,
+ ((struct ref *)item->util)->name,
+ ref_map->name);
+ prev->next = ref_map->next;
+ free(ref_map->peer_ref);
+ free(ref_map);
}
+
+ item = string_list_insert(ref_map->peer_ref->name, &refs);
+ item->util = ref_map;
+ prev = ref_map;
}
+ string_list_clear(&refs, 0);
}
int remote_has_url(struct remote *remote, const char *url)
--
1.6.5.rc2
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH 2/2] fetch: Speed up fetch of large numbers of refs
2009-10-22 21:06 [PATCH 0/2] Speedup fetch with large numbers of refs Julian Phillips
2009-10-22 21:06 ` [PATCH 1/2] remote: Make ref_remove_duplicates faster for " Julian Phillips
@ 2009-10-22 21:06 ` Julian Phillips
2009-10-25 7:16 ` [PATCH 0/2] Speedup fetch with " Junio C Hamano
2 siblings, 0 replies; 5+ messages in thread
From: Julian Phillips @ 2009-10-22 21:06 UTC (permalink / raw)
To: Junio C Hamano; +Cc: git
When there are large numbers of refs, calling read_ref for each ref is
inefficent (and infact downright slow) - so instead use for_each_ref
to build up a string list of all the refs that we currently have,
which significantly improves the volume.
Signed-off-by: Julian Phillips <julian@quantumfyre.co.uk>
---
builtin-fetch.c | 17 ++++++++++++++---
1 files changed, 14 insertions(+), 3 deletions(-)
diff --git a/builtin-fetch.c b/builtin-fetch.c
index acb08e4..0f53cbd 100644
--- a/builtin-fetch.c
+++ b/builtin-fetch.c
@@ -489,7 +489,8 @@ static int add_existing(const char *refname, const unsigned char *sha1,
int flag, void *cbdata)
{
struct string_list *list = (struct string_list *)cbdata;
- string_list_insert(refname, list);
+ struct string_list_item *item = string_list_insert(refname, list);
+ item->util = (void *)sha1;
return 0;
}
@@ -606,9 +607,14 @@ static void check_not_current_branch(struct ref *ref_map)
static int do_fetch(struct transport *transport,
struct refspec *refs, int ref_count)
{
+ struct string_list existing_refs = { NULL, 0, 0, 0 };
+ struct string_list_item *peer_item = NULL;
struct ref *ref_map;
struct ref *rm;
int autotags = (transport->remote->fetch_tags == 1);
+
+ for_each_ref(add_existing, &existing_refs);
+
if (transport->remote->fetch_tags == 2 && tags != TAGS_UNSET)
tags = TAGS_SET;
if (transport->remote->fetch_tags == -1)
@@ -631,8 +637,13 @@ static int do_fetch(struct transport *transport,
check_not_current_branch(ref_map);
for (rm = ref_map; rm; rm = rm->next) {
- if (rm->peer_ref)
- read_ref(rm->peer_ref->name, rm->peer_ref->old_sha1);
+ if (rm->peer_ref) {
+ peer_item = string_list_lookup(rm->peer_ref->name,
+ &existing_refs);
+ if (peer_item)
+ hashcpy(rm->peer_ref->old_sha1,
+ peer_item->util);
+ }
}
if (tags == TAGS_DEFAULT && autotags)
--
1.6.5.rc2
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH 0/2] Speedup fetch with large numbers of refs
2009-10-22 21:06 [PATCH 0/2] Speedup fetch with large numbers of refs Julian Phillips
2009-10-22 21:06 ` [PATCH 1/2] remote: Make ref_remove_duplicates faster for " Julian Phillips
2009-10-22 21:06 ` [PATCH 2/2] fetch: Speed up fetch of " Julian Phillips
@ 2009-10-25 7:16 ` Junio C Hamano
2009-10-25 20:48 ` Julian Phillips
2 siblings, 1 reply; 5+ messages in thread
From: Junio C Hamano @ 2009-10-25 7:16 UTC (permalink / raw)
To: Julian Phillips; +Cc: Junio C Hamano, git
Hmm, t5515 does not seem to pass with this series for me.
^ permalink raw reply [flat|nested] 5+ messages in thread