From: Stefan Beller <sbeller@google.com>
To: peff@peff.net
Cc: git@vger.kernel.org, Stefan Beller <sbeller@google.com>
Subject: [PATCH] object: measure time needed for resolving hash collisions
Date: Wed, 14 Sep 2016 19:01:41 -0700 [thread overview]
Message-ID: <20160915020141.32000-1-sbeller@google.com> (raw)
$ time ./git rev-list HEAD --count
44271
print_time_spent_object: measure time needed for resolving hash collisions
This shows that we roughly spent a third of the time in resolving
hash collisions:
I am using linux.git to measure:
$ time git rev-list --objects --count HEAD >/dev/null
print_time_spent_resolving_hash_collisions 9:445591664
real 0m31.733s
user 0m31.220s
sys 0m0.463s
For fun I reverted 9a414486d9f0:
$ time git rev-list --objects --count HEAD >/dev/null
print_time_spent_resolving_hash_collisions 14:338605504
real 0m37.008s
user 0m36.524s
sys 0m0.423s
The 17% that Jeff measured in there, equals to 1-31.733/37.008 = 0.14
in these measurements.
The time spent in collision resolving went down by 1/3 on itself
(14.33s to 9.44s), so there is still room for improvement.
Signed-off-by: Stefan Beller <sbeller@google.com>
---
According to Jeff, sending patches that don't get accepted is the new hype!
builtin/rev-list.c | 2 ++
object.c | 36 ++++++++++++++++++++++++++++++++++++
object.h | 2 ++
3 files changed, 40 insertions(+)
diff --git a/builtin/rev-list.c b/builtin/rev-list.c
index 8479f6e..d0e5922 100644
--- a/builtin/rev-list.c
+++ b/builtin/rev-list.c
@@ -412,5 +412,7 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix)
printf("%d\n", revs.count_left + revs.count_right);
}
+ print_time_spent_resolving_hash_collisions();
+
return 0;
}
diff --git a/object.c b/object.c
index 67d9a9e..e9e73e0 100644
--- a/object.c
+++ b/object.c
@@ -5,9 +5,40 @@
#include "commit.h"
#include "tag.h"
+#include <time.h>
+
static struct object **obj_hash;
static int nr_objs, obj_hash_size;
+struct timespec caching = {0, 0};
+
+void diff(struct timespec *start, struct timespec *end, struct timespec *dst)
+{
+ if ((end->tv_nsec-start->tv_nsec)<0) {
+ dst->tv_sec = end->tv_sec-start->tv_sec-1;
+ dst->tv_nsec = 1000000000+end->tv_nsec-start->tv_nsec;
+ } else {
+ dst->tv_sec = end->tv_sec-start->tv_sec;
+ dst->tv_nsec = end->tv_nsec-start->tv_nsec;
+ }
+}
+
+void add_time_to(struct timespec *dst, struct timespec *addend)
+{
+ dst->tv_sec += addend->tv_sec;
+ dst->tv_nsec += addend->tv_nsec;
+ while (dst->tv_nsec > 1000000000) {
+ dst->tv_nsec -= 1000000000;
+ dst->tv_sec ++;
+ }
+}
+
+void print_time_spent_resolving_hash_collisions(void)
+{
+ fprintf(stderr, "print_time_spent_resolving_hash_collisions %ld:%9ld\n",
+ (long)caching.tv_sec, (long)caching.tv_nsec);
+}
+
unsigned int get_max_object_index(void)
{
return obj_hash_size;
@@ -86,11 +117,13 @@ struct object *lookup_object(const unsigned char *sha1)
{
unsigned int i, first;
struct object *obj;
+ struct timespec time1, time2, t_diff;
if (!obj_hash)
return NULL;
first = i = hash_obj(sha1, obj_hash_size);
+ clock_gettime(CLOCK_MONOTONIC, &time1);
while ((obj = obj_hash[i]) != NULL) {
if (!hashcmp(sha1, obj->oid.hash))
break;
@@ -98,6 +131,9 @@ struct object *lookup_object(const unsigned char *sha1)
if (i == obj_hash_size)
i = 0;
}
+ clock_gettime(CLOCK_MONOTONIC, &time2);
+ diff(&time1, &time2, &t_diff);
+ add_time_to(&caching, &t_diff);
if (obj && i != first) {
/*
* Move object to where we started to look for it so
diff --git a/object.h b/object.h
index f8b6442..ee6ab3a 100644
--- a/object.h
+++ b/object.h
@@ -56,6 +56,8 @@ extern const char *typename(unsigned int type);
extern int type_from_string_gently(const char *str, ssize_t, int gentle);
#define type_from_string(str) type_from_string_gently(str, -1, 0)
+void print_time_spent_resolving_hash_collisions(void);
+
/*
* Return the current number of buckets in the object hashmap.
*/
--
2.10.0.130.gef2bcd7.dirty
next reply other threads:[~2016-09-15 2:01 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-09-15 2:01 Stefan Beller [this message]
2016-09-15 6:47 ` [PATCH] object: measure time needed for resolving hash collisions Jeff King
2016-09-15 7:01 ` Jeff King
2016-09-15 16:26 ` Stefan Beller
2016-09-15 18:49 ` Jeff King
2016-09-15 17:45 ` Junio C Hamano
2016-09-15 18:47 ` Jeff King
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20160915020141.32000-1-sbeller@google.com \
--to=sbeller@google.com \
--cc=git@vger.kernel.org \
--cc=peff@peff.net \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).