All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
To: git@vger.kernel.org
Cc: "Ævar Arnfjörð Bjarmason" <avarab@gmail.com>,
	benpeart@microsoft.com, alexmv@dropbox.com,
	christian.couder@gmail.com, t.gummerer@gmail.com,
	"Junio C Hamano" <gitster@pobox.com>,
	"Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
Subject: [PATCH v2] trace: measure where the time is spent in the index-heavy operations
Date: Sat, 27 Jan 2018 19:27:56 +0700	[thread overview]
Message-ID: <20180127122756.16971-1-pclouds@gmail.com> (raw)
In-Reply-To: <20180127115811.GB2130@hank>

All the known heavy code blocks are measured (except object database
access). This should help identify if an optimization is effective or
not. An unoptimized git-status would give something like below:

    0.001791141 s: read cache ...
    0.004011363 s: preload index
    0.000516161 s: refresh index
    0.003139257 s: git command: ... 'status' '--porcelain=2'
    0.006788129 s: diff-files
    0.002090267 s: diff-index
    0.001885735 s: initialize name hash
    0.032013138 s: read directory
    0.051781209 s: git command: './git' 'status'

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 > Would it be worth doing this on top of tg/split-index-fixes?  OTOH
 > this will only give a wrong output when tracing performance is on, and
 > it should be easy enough to figure out where the sharedindex actually
 > is.  So it might be better to keep this separate, and then just add a
 > patch on top for fixing the path later, which might be less work for
 > Junio.

 I updated the patch a bit to avoid git_path(). A merge on 'pu' still
 conflicts, but it's much easier to resolve by making sure free() is
 called after the trace_performance_since() line in read_index_from().

 It's technically dangerous to re-use base_path again this way, too
 far away from its assignment since 4 other git_path() calls may have
 been done and changed base_path value. But since tg/split-index-fixes
 should enter 'master' eventually and make it safe to re-use
 base_path, I think it's ok.

 diff-lib.c      | 4 ++++
 dir.c           | 2 ++
 name-hash.c     | 3 +++
 preload-index.c | 2 ++
 read-cache.c    | 7 +++++++
 5 files changed, 18 insertions(+)

diff --git a/diff-lib.c b/diff-lib.c
index 8104603a3b..a228e1a219 100644
--- a/diff-lib.c
+++ b/diff-lib.c
@@ -92,6 +92,7 @@ int run_diff_files(struct rev_info *revs, unsigned int option)
 	int diff_unmerged_stage = revs->max_count;
 	unsigned ce_option = ((option & DIFF_RACY_IS_MODIFIED)
 			      ? CE_MATCH_RACY_IS_DIRTY : 0);
+	uint64_t start = getnanotime();
 
 	diff_set_mnemonic_prefix(&revs->diffopt, "i/", "w/");
 
@@ -246,6 +247,7 @@ int run_diff_files(struct rev_info *revs, unsigned int option)
 	}
 	diffcore_std(&revs->diffopt);
 	diff_flush(&revs->diffopt);
+	trace_performance_since(start, "diff-files");
 	return 0;
 }
 
@@ -512,6 +514,7 @@ static int diff_cache(struct rev_info *revs,
 int run_diff_index(struct rev_info *revs, int cached)
 {
 	struct object_array_entry *ent;
+	uint64_t start = getnanotime();
 
 	ent = revs->pending.objects;
 	if (diff_cache(revs, &ent->item->oid, ent->name, cached))
@@ -521,6 +524,7 @@ int run_diff_index(struct rev_info *revs, int cached)
 	diffcore_fix_diff_index(&revs->diffopt);
 	diffcore_std(&revs->diffopt);
 	diff_flush(&revs->diffopt);
+	trace_performance_since(start, "diff-index");
 	return 0;
 }
 
diff --git a/dir.c b/dir.c
index 7c4b45e30e..4479a02a49 100644
--- a/dir.c
+++ b/dir.c
@@ -2248,6 +2248,7 @@ int read_directory(struct dir_struct *dir, struct index_state *istate,
 		   const char *path, int len, const struct pathspec *pathspec)
 {
 	struct untracked_cache_dir *untracked;
+	uint64_t start = getnanotime();
 
 	if (has_symlink_leading_path(path, len))
 		return dir->nr;
@@ -2286,6 +2287,7 @@ int read_directory(struct dir_struct *dir, struct index_state *istate,
 		dir->nr = i;
 	}
 
+	trace_performance_since(start, "read directory %.*s", len, path);
 	if (dir->untracked) {
 		static struct trace_key trace_untracked_stats = TRACE_KEY_INIT(UNTRACKED_STATS);
 		trace_printf_key(&trace_untracked_stats,
diff --git a/name-hash.c b/name-hash.c
index 45c98db0a0..ada66f066a 100644
--- a/name-hash.c
+++ b/name-hash.c
@@ -578,6 +578,8 @@ static void threaded_lazy_init_name_hash(
 
 static void lazy_init_name_hash(struct index_state *istate)
 {
+	uint64_t start = getnanotime();
+
 	if (istate->name_hash_initialized)
 		return;
 	hashmap_init(&istate->name_hash, cache_entry_cmp, NULL, istate->cache_nr);
@@ -600,6 +602,7 @@ static void lazy_init_name_hash(struct index_state *istate)
 	}
 
 	istate->name_hash_initialized = 1;
+	trace_performance_since(start, "initialize name hash");
 }
 
 /*
diff --git a/preload-index.c b/preload-index.c
index 2a83255e4e..4d08d44874 100644
--- a/preload-index.c
+++ b/preload-index.c
@@ -78,6 +78,7 @@ static void preload_index(struct index_state *index,
 {
 	int threads, i, work, offset;
 	struct thread_data data[MAX_PARALLEL];
+	uint64_t start = getnanotime();
 
 	if (!core_preload_index)
 		return;
@@ -108,6 +109,7 @@ static void preload_index(struct index_state *index,
 		if (pthread_join(p->pthread, NULL))
 			die("unable to join threaded lstat");
 	}
+	trace_performance_since(start, "preload index");
 }
 #endif
 
diff --git a/read-cache.c b/read-cache.c
index 2eb81a66b9..eac74bc9f1 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1372,6 +1372,7 @@ int refresh_index(struct index_state *istate, unsigned int flags,
 	const char *typechange_fmt;
 	const char *added_fmt;
 	const char *unmerged_fmt;
+	uint64_t start = getnanotime();
 
 	modified_fmt = (in_porcelain ? "M\t%s\n" : "%s: needs update\n");
 	deleted_fmt = (in_porcelain ? "D\t%s\n" : "%s: needs update\n");
@@ -1442,6 +1443,7 @@ int refresh_index(struct index_state *istate, unsigned int flags,
 
 		replace_index_entry(istate, i, new);
 	}
+	trace_performance_since(start, "refresh index");
 	return has_errors;
 }
 
@@ -1873,6 +1875,7 @@ static void freshen_shared_index(char *base_sha1_hex, int warn)
 
 int read_index_from(struct index_state *istate, const char *path)
 {
+	uint64_t start = getnanotime();
 	struct split_index *split_index;
 	int ret;
 	char *base_sha1_hex;
@@ -1883,6 +1886,7 @@ int read_index_from(struct index_state *istate, const char *path)
 		return istate->cache_nr;
 
 	ret = do_read_index(istate, path, 0);
+	trace_performance_since(start, "read cache %s", path);
 
 	split_index = istate->split_index;
 	if (!split_index || is_null_sha1(split_index->base_sha1)) {
@@ -1906,6 +1910,7 @@ int read_index_from(struct index_state *istate, const char *path)
 	freshen_shared_index(base_sha1_hex, 0);
 	merge_base_index(istate);
 	post_read_index_from(istate);
+	trace_performance_since(start, "read cache %s", base_path);
 	return ret;
 }
 
@@ -2234,6 +2239,7 @@ void update_index_if_able(struct index_state *istate, struct lock_file *lockfile
 static int do_write_index(struct index_state *istate, struct tempfile *tempfile,
 			  int strip_extensions)
 {
+	uint64_t start = getnanotime();
 	int newfd = tempfile->fd;
 	git_SHA_CTX c;
 	struct cache_header hdr;
@@ -2374,6 +2380,7 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile,
 		return -1;
 	istate->timestamp.sec = (unsigned int)st.st_mtime;
 	istate->timestamp.nsec = ST_MTIME_NSEC(st);
+	trace_performance_since(start, "write index, changed mask = %x", istate->cache_changed);
 	return 0;
 }
 
-- 
2.16.1.205.g271f633410


  reply	other threads:[~2018-01-27 12:28 UTC|newest]

Thread overview: 30+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-01-27  0:28 Some rough edges of core.fsmonitor Ævar Arnfjörð Bjarmason
2018-01-27  1:36 ` Duy Nguyen
2018-01-27  1:39   ` [PATCH] trace: measure where the time is spent in the index-heavy operations Nguyễn Thái Ngọc Duy
2018-01-27 11:58     ` Thomas Gummerer
2018-01-27 12:27       ` Nguyễn Thái Ngọc Duy [this message]
2018-01-27 11:43   ` Some rough edges of core.fsmonitor Ævar Arnfjörð Bjarmason
2018-01-27 12:39     ` Duy Nguyen
2018-01-27 13:09       ` Duy Nguyen
2018-01-27 19:01         ` Ævar Arnfjörð Bjarmason
2018-01-30 22:41           ` Ben Peart
2018-01-29  9:40     ` Duy Nguyen
2018-01-29 23:16       ` Ben Peart
2018-02-01 10:40         ` Duy Nguyen
2018-01-28 20:44 ` Johannes Schindelin
2018-01-28 22:28   ` Ævar Arnfjörð Bjarmason
2018-01-30  1:21     ` Ben Peart
2018-01-31 10:15       ` Duy Nguyen
2018-02-04  9:38         ` [PATCH] dir.c: ignore paths containing .git when invalidating untracked cache Nguyễn Thái Ngọc Duy
2018-02-05 17:44           ` Ben Peart
2018-02-06 12:02             ` Duy Nguyen
2018-02-07  9:21           ` [PATCH v2] " Nguyễn Thái Ngọc Duy
2018-02-07  9:21             ` Nguyễn Thái Ngọc Duy
2018-02-07 16:59               ` Ben Peart
2018-02-13 10:00                 ` Duy Nguyen
2018-02-13 17:57                   ` Junio C Hamano
2018-02-14  1:24                     ` Duy Nguyen
2018-02-14  8:00                       ` Junio C Hamano
2018-01-30 22:57 ` Some rough edges of core.fsmonitor Ben Peart
2018-01-30 23:16   ` Ævar Arnfjörð Bjarmason
2018-01-31 16:12     ` Ben Peart

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180127122756.16971-1-pclouds@gmail.com \
    --to=pclouds@gmail.com \
    --cc=alexmv@dropbox.com \
    --cc=avarab@gmail.com \
    --cc=benpeart@microsoft.com \
    --cc=christian.couder@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=t.gummerer@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.