* [PATCH] preload-index: optimize for sequential IO
2014-06-24 22:52 Git-status / preload_index() performance Karsten Blees
@ 2014-06-24 22:53 ` Karsten Blees
2014-06-24 22:54 ` [PATCH (experimental)] preload-index: make parallel IO configurable Karsten Blees
` (2 subsequent siblings)
3 siblings, 0 replies; 9+ messages in thread
From: Karsten Blees @ 2014-06-24 22:53 UTC (permalink / raw)
To: Git List
Enabling core.preloadIndex on a real HD reduces cold cache performance by
~5%. This is because the threads read from up to 20 different locations on
disk.
Additionally, some threads finish early (each thread is assigned a fixed
number of cache entries to process in advance), i.e. preloading is not as
parallel as we would like. With hot cache, threads finish so quickly that
most run in sequence rather than in parallel.
Change background threads so that they run until all work is done. Use a
central mutex-protected counter to iterate over available cache entries. As
cache entries are sorted by path, this implicitly increases IO locality.
This improves cold cache performance of preload_index() by ~20% and
hot cache performance by ~15%. Total improvement of e.g. 'git status -uno'
on WebKit is ~15% (cold cache) and ~5% (hot cache).
Signed-off-by: Karsten Blees <blees@dcon.de>
---
preload-index.c | 76 ++++++++++++++++++++++++++++++++++-----------------------
1 file changed, 45 insertions(+), 31 deletions(-)
diff --git a/preload-index.c b/preload-index.c
index 968ee25..6ac368d 100644
--- a/preload-index.c
+++ b/preload-index.c
@@ -28,50 +28,65 @@ struct thread_data {
pthread_t pthread;
struct index_state *index;
struct pathspec pathspec;
- int offset, nr;
+ pthread_mutex_t *pmutex;
+ int *pnr;
};
static void *preload_thread(void *_data)
{
- int nr;
+ int nr, max_nr;
struct thread_data *p = _data;
struct index_state *index = p->index;
- struct cache_entry **cep = index->cache + p->offset;
struct cache_def cache;
memset(&cache, 0, sizeof(cache));
- nr = p->nr;
- if (nr + p->offset > index->cache_nr)
- nr = index->cache_nr - p->offset;
+ for (;;) {
+ /* get next batch of entries to check */
+ pthread_mutex_lock(p->pmutex);
+ nr = *p->pnr;
+ *p->pnr += THREAD_COST;
+ pthread_mutex_unlock(p->pmutex);
- do {
- struct cache_entry *ce = *cep++;
- struct stat st;
+ max_nr = nr + THREAD_COST;
+ if (max_nr > index->cache_nr)
+ max_nr = index->cache_nr;
- if (ce_stage(ce))
- continue;
- if (S_ISGITLINK(ce->ce_mode))
- continue;
- if (ce_uptodate(ce))
- continue;
- if (!ce_path_match(ce, &p->pathspec, NULL))
- continue;
- if (threaded_has_symlink_leading_path(&cache, ce->name, ce_namelen(ce)))
- continue;
- if (lstat(ce->name, &st))
- continue;
- if (ie_match_stat(index, ce, &st, CE_MATCH_RACY_IS_DIRTY))
- continue;
- ce_mark_uptodate(ce);
- } while (--nr > 0);
+ /* break loop if no more work to do */
+ if (nr >= max_nr)
+ break;
+
+ for (; nr < max_nr; nr++) {
+ struct cache_entry *ce = index->cache[nr];
+ struct stat st;
+
+ if (ce_stage(ce))
+ continue;
+ if (S_ISGITLINK(ce->ce_mode))
+ continue;
+ if (ce_uptodate(ce))
+ continue;
+ if (!ce_path_match(ce, &p->pathspec, NULL))
+ continue;
+ if (threaded_has_symlink_leading_path(&cache, ce->name,
+ ce_namelen(ce)))
+ continue;
+ if (lstat(ce->name, &st))
+ continue;
+ if (ie_match_stat(index, ce, &st,
+ CE_MATCH_RACY_IS_DIRTY))
+ continue;
+ ce_mark_uptodate(ce);
+ }
+ }
return NULL;
}
static void preload_index(struct index_state *index,
const struct pathspec *pathspec)
{
- int threads, i, work, offset;
+ int threads, i, nr = 0;
struct thread_data data[MAX_PARALLEL];
+ pthread_mutex_t mutex;
if (!core_preload_index)
return;
@@ -81,17 +96,15 @@ static void preload_index(struct index_state *index,
return;
if (threads > MAX_PARALLEL)
threads = MAX_PARALLEL;
- offset = 0;
- work = DIV_ROUND_UP(index->cache_nr, threads);
memset(&data, 0, sizeof(data));
+ pthread_mutex_init(&mutex, NULL);
for (i = 0; i < threads; i++) {
struct thread_data *p = data+i;
p->index = index;
if (pathspec)
copy_pathspec(&p->pathspec, pathspec);
- p->offset = offset;
- p->nr = work;
- offset += work;
+ p->pnr = &nr;
+ p->pmutex = &mutex;
if (pthread_create(&p->pthread, NULL, preload_thread, p))
die("unable to create threaded lstat");
}
@@ -100,6 +113,7 @@ static void preload_index(struct index_state *index,
if (pthread_join(p->pthread, NULL))
die("unable to join threaded lstat");
}
+ pthread_mutex_destroy(&mutex);
}
#endif
--
1.9.4.msysgit.0.1.gc8a51b4
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH (experimental)] preload-index: make parallel IO configurable
2014-06-24 22:52 Git-status / preload_index() performance Karsten Blees
2014-06-24 22:53 ` [PATCH] preload-index: optimize for sequential IO Karsten Blees
@ 2014-06-24 22:54 ` Karsten Blees
2014-06-24 22:56 ` [PATCH (performance tracing)] test git-status performance Karsten Blees
2014-06-24 23:12 ` Git-status / preload_index() performance David Turner
3 siblings, 0 replies; 9+ messages in thread
From: Karsten Blees @ 2014-06-24 22:54 UTC (permalink / raw)
To: Git List
Make the amount of IO parallelism configurable (i.e. number of separate
index sections to be processed in parallel).
Defining PARALLEL_IO = 20 restores approximately the IO pattern of the
original preload_index() implementation. Best performance is achieved with
PARALLEL_IO = 1 (i.e. sequential IO).
Signed-off-by: Karsten Blees <blees@dcon.de>
---
Applies on top of "preload-index: optimize for sequential IO".
preload-index.c | 22 +++++++++++++++++-----
1 file changed, 17 insertions(+), 5 deletions(-)
diff --git a/preload-index.c b/preload-index.c
index 6ac368d..5fe5521 100644
--- a/preload-index.c
+++ b/preload-index.c
@@ -23,6 +23,7 @@ static void preload_index(struct index_state *index,
*/
#define MAX_PARALLEL (20)
#define THREAD_COST (500)
+#define PARALLEL_IO (1)
struct thread_data {
pthread_t pthread;
@@ -38,23 +39,34 @@ static void *preload_thread(void *_data)
struct thread_data *p = _data;
struct index_state *index = p->index;
struct cache_def cache;
+ int blocks = DIV_ROUND_UP(index->cache_nr, THREAD_COST);
+ int step = DIV_ROUND_UP(blocks, PARALLEL_IO);
memset(&cache, 0, sizeof(cache));
for (;;) {
/* get next batch of entries to check */
pthread_mutex_lock(p->pmutex);
nr = *p->pnr;
- *p->pnr += THREAD_COST;
+ (*p->pnr)++;
pthread_mutex_unlock(p->pmutex);
+ /* break loop if no more work to do */
+ if (nr >= blocks + PARALLEL_IO - 1)
+ break;
+
+ /*
+ * rearrange iteration order, i.e. with PARALLEL_IO = 2,
+ * [0, 1, 2, 3, 4, 5, 6, 7] becomes [0, 4, 1, 5, 2, 6, 3, 7]
+ */
+ nr = (nr / PARALLEL_IO) + (nr % PARALLEL_IO) * step;
+ if (nr >= blocks)
+ continue;
+ nr *= THREAD_COST;
+
max_nr = nr + THREAD_COST;
if (max_nr > index->cache_nr)
max_nr = index->cache_nr;
- /* break loop if no more work to do */
- if (nr >= max_nr)
- break;
-
for (; nr < max_nr; nr++) {
struct cache_entry *ce = index->cache[nr];
struct stat st;
--
1.9.4.msysgit.0.1.gc8a51b4
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH (performance tracing)] test git-status performance
2014-06-24 22:52 Git-status / preload_index() performance Karsten Blees
2014-06-24 22:53 ` [PATCH] preload-index: optimize for sequential IO Karsten Blees
2014-06-24 22:54 ` [PATCH (experimental)] preload-index: make parallel IO configurable Karsten Blees
@ 2014-06-24 22:56 ` Karsten Blees
2014-06-26 12:30 ` Duy Nguyen
2014-07-26 9:59 ` Duy Nguyen
2014-06-24 23:12 ` Git-status / preload_index() performance David Turner
3 siblings, 2 replies; 9+ messages in thread
From: Karsten Blees @ 2014-06-24 22:56 UTC (permalink / raw)
To: Git List
Add trace_performance output to functions involved in git-status.
Signed-off-by: Karsten Blees <blees@dcon.de>
---
Applies on top of performance-tracing topic.
builtin/commit.c | 8 ++++++++
preload-index.c | 4 ++++
read-cache.c | 2 ++
wt-status.c | 11 +++++++++++
4 files changed, 25 insertions(+)
diff --git a/builtin/commit.c b/builtin/commit.c
index 94eb8a3..6a38fa2 100644
--- a/builtin/commit.c
+++ b/builtin/commit.c
@@ -1322,9 +1322,11 @@ static int git_status_config(const char *k, const char *v, void *cb)
int cmd_status(int argc, const char **argv, const char *prefix)
{
+ uint64_t start = getnanotime();
static struct wt_status s;
int fd;
unsigned char sha1[20];
+
static struct option builtin_status_options[] = {
OPT__VERBOSE(&verbose, N_("be verbose")),
OPT_SET_INT('s', "short", &status_format,
@@ -1369,13 +1371,19 @@ int cmd_status(int argc, const char **argv, const char *prefix)
PATHSPEC_PREFER_FULL,
prefix, argv);
+ trace_performance_since(start, "cmd_status:setup");
+
read_cache_preload(&s.pathspec);
refresh_index(&the_index, REFRESH_QUIET|REFRESH_UNMERGED, &s.pathspec, NULL, NULL);
+ start = getnanotime();
+
fd = hold_locked_index(&index_lock, 0);
if (0 <= fd)
update_index_if_able(&the_index, &index_lock);
+ trace_performance_since(start, "cmd_status:update_index");
+
s.is_initial = get_sha1(s.reference, sha1) ? 1 : 0;
s.ignore_submodule_arg = ignore_submodule_arg;
wt_status_collect(&s);
diff --git a/preload-index.c b/preload-index.c
index 968ee25..189c5a4 100644
--- a/preload-index.c
+++ b/preload-index.c
@@ -33,6 +33,7 @@ struct thread_data {
static void *preload_thread(void *_data)
{
+ uint64_t start = getnanotime();
int nr;
struct thread_data *p = _data;
struct index_state *index = p->index;
@@ -64,6 +65,7 @@ static void *preload_thread(void *_data)
continue;
ce_mark_uptodate(ce);
} while (--nr > 0);
+ trace_performance_since(start, "preload_thread");
return NULL;
}
@@ -106,8 +108,10 @@ static void preload_index(struct index_state *index,
int read_index_preload(struct index_state *index,
const struct pathspec *pathspec)
{
+ uint64_t start = getnanotime();
int retval = read_index(index);
preload_index(index, pathspec);
+ trace_performance_since(start, "read_index_preload");
return retval;
}
diff --git a/read-cache.c b/read-cache.c
index 132d032..5c6b763 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1136,6 +1136,7 @@ int refresh_index(struct index_state *istate, unsigned int flags,
const struct pathspec *pathspec,
char *seen, const char *header_msg)
{
+ uint64_t start = getnanotime();
int i;
int has_errors = 0;
int really = (flags & REFRESH_REALLY) != 0;
@@ -1222,6 +1223,7 @@ int refresh_index(struct index_state *istate, unsigned int flags,
replace_index_entry(istate, i, new);
}
+ trace_performance_since(start, "refresh_index");
return has_errors;
}
diff --git a/wt-status.c b/wt-status.c
index 318a191..a744565 100644
--- a/wt-status.c
+++ b/wt-status.c
@@ -623,13 +623,24 @@ static void wt_status_collect_untracked(struct wt_status *s)
void wt_status_collect(struct wt_status *s)
{
+ uint64_t start = getnanotime();
+
wt_status_collect_changes_worktree(s);
+ trace_performance_since(start, "wt_status_collect_changes_worktree");
+ start = getnanotime();
+
if (s->is_initial)
wt_status_collect_changes_initial(s);
else
wt_status_collect_changes_index(s);
+
+ trace_performance_since(start, "wt_status_collect_changes_index");
+ start = getnanotime();
+
wt_status_collect_untracked(s);
+
+ trace_performance_since(start, "wt_status_collect_untracked");
}
static void wt_status_print_unmerged(struct wt_status *s)
--
1.9.4.msysgit.0.1.gc8a51b4
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH (performance tracing)] test git-status performance
2014-06-24 22:56 ` [PATCH (performance tracing)] test git-status performance Karsten Blees
@ 2014-06-26 12:30 ` Duy Nguyen
2014-07-26 9:59 ` Duy Nguyen
1 sibling, 0 replies; 9+ messages in thread
From: Duy Nguyen @ 2014-06-26 12:30 UTC (permalink / raw)
To: Karsten Blees; +Cc: Git List
On Wed, Jun 25, 2014 at 5:56 AM, Karsten Blees <karsten.blees@gmail.com> wrote:
> void wt_status_collect(struct wt_status *s)
> {
> + uint64_t start = getnanotime();
> +
> wt_status_collect_changes_worktree(s);
>
> + trace_performance_since(start, "wt_status_collect_changes_worktree");
> + start = getnanotime();
> +
> if (s->is_initial)
> wt_status_collect_changes_initial(s);
> else
> wt_status_collect_changes_index(s);
> +
> + trace_performance_since(start, "wt_status_collect_changes_index");
> + start = getnanotime();
> +
> wt_status_collect_untracked(s);
> +
> + trace_performance_since(start, "wt_status_collect_untracked");
> }
Now that we have good perf measuring support, perhaps the next step is
remove gettimeofday() in advice_status_u_option related code in
wt_status_collect_untracked().
--
Duy
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH (performance tracing)] test git-status performance
2014-06-24 22:56 ` [PATCH (performance tracing)] test git-status performance Karsten Blees
2014-06-26 12:30 ` Duy Nguyen
@ 2014-07-26 9:59 ` Duy Nguyen
2014-07-26 10:33 ` Duy Nguyen
1 sibling, 1 reply; 9+ messages in thread
From: Duy Nguyen @ 2014-07-26 9:59 UTC (permalink / raw)
To: Karsten Blees; +Cc: Git List
On Wed, Jun 25, 2014 at 5:56 AM, Karsten Blees <karsten.blees@gmail.com> wrote:
> Add trace_performance output to functions involved in git-status.
>
> Signed-off-by: Karsten Blees <blees@dcon.de>
Should this patch be merged to git.git? It's useful for tracking
performance (of git-status) and does not seem to have any negative
impacts.
> ---
>
> Applies on top of performance-tracing topic.
>
> builtin/commit.c | 8 ++++++++
> preload-index.c | 4 ++++
> read-cache.c | 2 ++
> wt-status.c | 11 +++++++++++
> 4 files changed, 25 insertions(+)
>
> diff --git a/builtin/commit.c b/builtin/commit.c
> index 94eb8a3..6a38fa2 100644
> --- a/builtin/commit.c
> +++ b/builtin/commit.c
> @@ -1322,9 +1322,11 @@ static int git_status_config(const char *k, const char *v, void *cb)
>
> int cmd_status(int argc, const char **argv, const char *prefix)
> {
> + uint64_t start = getnanotime();
> static struct wt_status s;
> int fd;
> unsigned char sha1[20];
> +
> static struct option builtin_status_options[] = {
> OPT__VERBOSE(&verbose, N_("be verbose")),
> OPT_SET_INT('s', "short", &status_format,
> @@ -1369,13 +1371,19 @@ int cmd_status(int argc, const char **argv, const char *prefix)
> PATHSPEC_PREFER_FULL,
> prefix, argv);
>
> + trace_performance_since(start, "cmd_status:setup");
> +
> read_cache_preload(&s.pathspec);
> refresh_index(&the_index, REFRESH_QUIET|REFRESH_UNMERGED, &s.pathspec, NULL, NULL);
>
> + start = getnanotime();
> +
> fd = hold_locked_index(&index_lock, 0);
> if (0 <= fd)
> update_index_if_able(&the_index, &index_lock);
>
> + trace_performance_since(start, "cmd_status:update_index");
> +
> s.is_initial = get_sha1(s.reference, sha1) ? 1 : 0;
> s.ignore_submodule_arg = ignore_submodule_arg;
> wt_status_collect(&s);
> diff --git a/preload-index.c b/preload-index.c
> index 968ee25..189c5a4 100644
> --- a/preload-index.c
> +++ b/preload-index.c
> @@ -33,6 +33,7 @@ struct thread_data {
>
> static void *preload_thread(void *_data)
> {
> + uint64_t start = getnanotime();
> int nr;
> struct thread_data *p = _data;
> struct index_state *index = p->index;
> @@ -64,6 +65,7 @@ static void *preload_thread(void *_data)
> continue;
> ce_mark_uptodate(ce);
> } while (--nr > 0);
> + trace_performance_since(start, "preload_thread");
> return NULL;
> }
>
> @@ -106,8 +108,10 @@ static void preload_index(struct index_state *index,
> int read_index_preload(struct index_state *index,
> const struct pathspec *pathspec)
> {
> + uint64_t start = getnanotime();
> int retval = read_index(index);
>
> preload_index(index, pathspec);
> + trace_performance_since(start, "read_index_preload");
> return retval;
> }
> diff --git a/read-cache.c b/read-cache.c
> index 132d032..5c6b763 100644
> --- a/read-cache.c
> +++ b/read-cache.c
> @@ -1136,6 +1136,7 @@ int refresh_index(struct index_state *istate, unsigned int flags,
> const struct pathspec *pathspec,
> char *seen, const char *header_msg)
> {
> + uint64_t start = getnanotime();
> int i;
> int has_errors = 0;
> int really = (flags & REFRESH_REALLY) != 0;
> @@ -1222,6 +1223,7 @@ int refresh_index(struct index_state *istate, unsigned int flags,
>
> replace_index_entry(istate, i, new);
> }
> + trace_performance_since(start, "refresh_index");
> return has_errors;
> }
>
> diff --git a/wt-status.c b/wt-status.c
> index 318a191..a744565 100644
> --- a/wt-status.c
> +++ b/wt-status.c
> @@ -623,13 +623,24 @@ static void wt_status_collect_untracked(struct wt_status *s)
>
> void wt_status_collect(struct wt_status *s)
> {
> + uint64_t start = getnanotime();
> +
> wt_status_collect_changes_worktree(s);
>
> + trace_performance_since(start, "wt_status_collect_changes_worktree");
> + start = getnanotime();
> +
> if (s->is_initial)
> wt_status_collect_changes_initial(s);
> else
> wt_status_collect_changes_index(s);
> +
> + trace_performance_since(start, "wt_status_collect_changes_index");
> + start = getnanotime();
> +
> wt_status_collect_untracked(s);
> +
> + trace_performance_since(start, "wt_status_collect_untracked");
> }
>
> static void wt_status_print_unmerged(struct wt_status *s)
--
Duy
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH (performance tracing)] test git-status performance
2014-07-26 9:59 ` Duy Nguyen
@ 2014-07-26 10:33 ` Duy Nguyen
0 siblings, 0 replies; 9+ messages in thread
From: Duy Nguyen @ 2014-07-26 10:33 UTC (permalink / raw)
To: Karsten Blees; +Cc: Git List
On Sat, Jul 26, 2014 at 4:59 PM, Duy Nguyen <pclouds@gmail.com> wrote:
> On Wed, Jun 25, 2014 at 5:56 AM, Karsten Blees <karsten.blees@gmail.com> wrote:
>> Add trace_performance output to functions involved in git-status.
>>
>> Signed-off-by: Karsten Blees <blees@dcon.de>
>
> Should this patch be merged to git.git? It's useful for tracking
> performance (of git-status) and does not seem to have any negative
> impacts.
Hm... I don't like the changes in commit.c and rather print running
time inside read_index_from() and update_index_if_able(). So people
may prefer different ways of measuring and these patches should
probably stay private. Although if we have something like this shipped
in every distro, asking the user to run with $GIT_TRACE_PERFORMANCE
could help..
--
Duy
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: Git-status / preload_index() performance
2014-06-24 22:52 Git-status / preload_index() performance Karsten Blees
` (2 preceding siblings ...)
2014-06-24 22:56 ` [PATCH (performance tracing)] test git-status performance Karsten Blees
@ 2014-06-24 23:12 ` David Turner
2014-06-24 23:25 ` Karsten Blees
3 siblings, 1 reply; 9+ messages in thread
From: David Turner @ 2014-06-24 23:12 UTC (permalink / raw)
To: Karsten Blees; +Cc: Git List
On Wed, 2014-06-25 at 00:52 +0200, Karsten Blees wrote:
> Even more time is spent unpacking the HEAD tree, even with hot cache (repacking with depth 10 reduces this to ~250ms, on SSD its just 7ms). Perhaps caching the HEAD tree in an index extension could help here?
This is approximately what the cache-tree extension does. However, it's
a bit broken. I've been working on a fix, but slowly because my other
work has taken me longer than expected. You can see the effect of the
cache-tree extension by doing git reset --hard HEAD; this temporarily
restores that extension.
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: Git-status / preload_index() performance
2014-06-24 23:12 ` Git-status / preload_index() performance David Turner
@ 2014-06-24 23:25 ` Karsten Blees
0 siblings, 0 replies; 9+ messages in thread
From: Karsten Blees @ 2014-06-24 23:25 UTC (permalink / raw)
To: David Turner; +Cc: Git List
Am 25.06.2014 01:12, schrieb David Turner:
> On Wed, 2014-06-25 at 00:52 +0200, Karsten Blees wrote:
>> Even more time is spent unpacking the HEAD tree, even with hot cache (repacking with depth 10 reduces this to ~250ms, on SSD its just 7ms). Perhaps caching the HEAD tree in an index extension could help here?
>
> This is approximately what the cache-tree extension does. However, it's
> a bit broken. I've been working on a fix, but slowly because my other
> work has taken me longer than expected. You can see the effect of the
> cache-tree extension by doing git reset --hard HEAD; this temporarily
> restores that extension.
>
Indeed:
01:32:35.965910 builtin/commit.c:1374 performance: 0.097505786 s: cmd_status:setup
...
01:32:36.047534 preload-index.c:129 performance: 0.081458337 s: read_index_preload
01:32:36.056204 read-cache.c:1226 performance: 0.008641527 s: refresh_index
01:32:36.059237 builtin/commit.c:1385 performance: 0.002997060 s: cmd_status:update_index
01:32:36.065163 wt-status.c:630 performance: 0.005732979 s: wt_status_collect_changes_worktree
01:32:36.072078 wt-status.c:638 performance: 0.006832976 s: wt_status_collect_changes_index
01:32:36.072150 wt-status.c:643 performance: 0.000000374 s: wt_status_collect_untracked
01:32:36.072211 trace.c:414 performance: 0.204069579 s: git command: 'git' 'status' '-s' '-uno'
Thanks
^ permalink raw reply [flat|nested] 9+ messages in thread