git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
To: git@vger.kernel.org
Cc: "Christian Couder" <christian.couder@gmail.com>,
	"Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
Subject: [PATCH 4/5] index-helper: use watchman to avoid refreshing index with lstat()
Date: Sun,  1 Nov 2015 14:55:45 +0100	[thread overview]
Message-ID: <1446386146-10438-5-git-send-email-pclouds@gmail.com> (raw)
In-Reply-To: <1446386146-10438-1-git-send-email-pclouds@gmail.com>

Watchman is hidden behind index-helper. Before git tries to read the
index from shm, it notifies index-helper with SIGHUP and sleep,
waiting for index-helper to prepare shm. index-helper then contacts
watchman, updates 'WAMA' extension and put it in a separate shm and
wakes git up with SIGHUP.

Git uses this extension to not lstat unchanged entries. Git only trust
'WAMA' extension when it's received from the separate shm, not from
disk. Unmarked entries are "clean". Marked entries are dirty from
watchman point of view. If it finds out some entries are
'watchman-dirty', but are really unchanged (e.g. the file was changed,
then reverted back), then Git will clear the marking in 'WAMA' before
writing it down.

Hiding watchman behind index-helper means you need both daemons. You
can't run watchman alone. Not so good. But on the other hand, 'git'
binary is not linked to watchman/json libraries, which is good for
packaging. Core git package will run fine without watchman-related
packages. If they need watchman, they can install git-index-helper and
dependencies.

Another reason for tying watchman to index-helper is, when used with
untracked cache, we need to keep track of $GIT_WORK_TREE file
listing. That kind of list can be kept in index-helper.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 Makefile           |  5 ++++
 cache.h            |  2 ++
 index-helper.c     | 84 +++++++++++++++++++++++++++++++++++++++++++++++++++---
 read-cache.c       | 43 ++++++++++++++++++++++++++--
 watchman-support.h |  1 -
 5 files changed, 127 insertions(+), 8 deletions(-)

diff --git a/Makefile b/Makefile
index 761acb6..3f5eac8 100644
--- a/Makefile
+++ b/Makefile
@@ -434,6 +434,7 @@ MSGFMT = msgfmt
 CURL_CONFIG = curl-config
 PTHREAD_LIBS = -lpthread
 PTHREAD_CFLAGS =
+WATCHMAN_LIBS =
 GCOV = gcov
 
 export TCL_PATH TCLTK_PATH
@@ -1392,6 +1393,7 @@ endif
 ifdef USE_WATCHMAN
 	LIB_H += watchman-support.h
 	LIB_OBJS += watchman-support.o
+	WATCHMAN_LIBS = -lwatchman
 	BASIC_CFLAGS += -DUSE_WATCHMAN
 endif
 
@@ -2005,6 +2007,9 @@ git-remote-testsvn$X: remote-testsvn.o GIT-LDFLAGS $(GITLIBS) $(VCSSVN_LIB)
 	$(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS) \
 	$(VCSSVN_LIB)
 
+git-index-helper$X: index-helper.o GIT-LDFLAGS $(GITLIBS)
+	$(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS) $(WATCHMAN_LIBS)
+
 $(REMOTE_CURL_ALIASES): $(REMOTE_CURL_PRIMARY)
 	$(QUIET_LNCP)$(RM) $@ && \
 	ln $< $@ 2>/dev/null || \
diff --git a/cache.h b/cache.h
index 572299c..c04141b 100644
--- a/cache.h
+++ b/cache.h
@@ -518,6 +518,7 @@ extern int daemonize(int *);
 
 /* Initialize and use the cache information */
 struct lock_file;
+extern int verify_index(const struct index_state *);
 extern int read_index(struct index_state *);
 extern int read_index_preload(struct index_state *, const struct pathspec *pathspec);
 extern int do_read_index(struct index_state *istate, const char *path,
@@ -525,6 +526,7 @@ extern int do_read_index(struct index_state *istate, const char *path,
 extern int read_index_from(struct index_state *, const char *path);
 extern int is_index_unborn(struct index_state *);
 extern int read_index_unmerged(struct index_state *);
+extern void write_watchman_ext(struct strbuf *sb, struct index_state* istate);
 #define COMMIT_LOCK		(1 << 0)
 #define CLOSE_LOCK		(1 << 1)
 #define REFRESH_DAEMON		(1 << 2)
diff --git a/index-helper.c b/index-helper.c
index cf26da7..421887e 100644
--- a/index-helper.c
+++ b/index-helper.c
@@ -5,15 +5,18 @@
 #include "split-index.h"
 #include "shm.h"
 #include "lockfile.h"
+#include "watchman-support.h"
 
 struct shm {
 	unsigned char sha1[20];
 	void *shm;
 	size_t size;
+	pid_t pid;
 };
 
 static struct shm shm_index;
 static struct shm shm_base_index;
+static struct shm shm_watchman;
 static int daemonized, to_verify = 1;
 
 static void release_index_shm(struct shm *is)
@@ -25,10 +28,21 @@ static void release_index_shm(struct shm *is)
 	is->shm = NULL;
 }
 
+static void release_watchman_shm(struct shm *is)
+{
+	if (!is->shm)
+		return;
+	munmap(is->shm, is->size);
+	git_shm_unlink("git-watchman-%s-%" PRIuMAX,
+		       sha1_to_hex(is->sha1), (uintmax_t)is->pid);
+	is->shm = NULL;
+}
+
 static void cleanup_shm(void)
 {
 	release_index_shm(&shm_index);
 	release_index_shm(&shm_base_index);
+	release_watchman_shm(&shm_watchman);
 }
 
 static void cleanup(void)
@@ -120,13 +134,15 @@ static void share_the_index(void)
 	if (the_index.split_index && the_index.split_index->base)
 		share_index(the_index.split_index->base, &shm_base_index);
 	share_index(&the_index, &shm_index);
-	if (to_verify && !verify_shm())
+	if (to_verify && !verify_shm()) {
 		cleanup_shm();
-	discard_index(&the_index);
+		discard_index(&the_index);
+	}
 }
 
 static void refresh(int sig)
 {
+	discard_index(&the_index);
 	the_index.keep_mmap = 1;
 	the_index.to_shm    = 1;
 	if (read_cache() < 0)
@@ -136,7 +152,55 @@ static void refresh(int sig)
 
 #ifdef HAVE_SHM
 
-static void do_nothing(int sig)
+#ifdef USE_WATCHMAN
+static void share_watchman(struct index_state *istate,
+			   struct shm *is, pid_t pid)
+{
+	struct strbuf sb = STRBUF_INIT;
+	void *shm;
+
+	write_watchman_ext(&sb, istate);
+	if (git_shm_map(O_CREAT | O_EXCL | O_RDWR, 0700, sb.len + 20,
+			&shm, PROT_READ | PROT_WRITE, MAP_SHARED,
+			"git-watchman-%s-%" PRIuMAX,
+			sha1_to_hex(istate->sha1), (uintmax_t)pid) == sb.len + 20) {
+		is->size = sb.len + 20;
+		is->shm = shm;
+		is->pid = pid;
+		hashcpy(is->sha1, istate->sha1);
+
+		memcpy(shm, sb.buf, sb.len);
+		hashcpy((unsigned char *)shm + is->size - 20, is->sha1);
+	}
+	strbuf_release(&sb);
+}
+
+static void prepare_with_watchman(pid_t pid)
+{
+	/*
+	 * with the help of watchman, maybe we could detect if
+	 * $GIT_DIR/index is updated..
+	 */
+	if (!verify_index(&the_index))
+		refresh(0);
+
+	if (check_watchman(&the_index))
+		return;
+
+	share_watchman(&the_index, &shm_watchman, pid);
+}
+
+static void prepare_index(int sig, siginfo_t *si, void *context)
+{
+	free_watchman_shm(&shm_watchman);
+	if (the_index.last_update)
+		prepare_with_watchman(si->si_pid);
+	kill(si->si_pid, SIGHUP); /* stop the waiting in poke_daemon() */
+}
+
+#else
+
+static void prepare_index(int sig, siginfo_t *si, void *context)
 {
 	/*
 	 * what we need is the signal received and interrupts
@@ -145,11 +209,21 @@ static void do_nothing(int sig)
 	 */
 }
 
+#endif
+
 static void loop(const char *pid_file, int idle_in_seconds)
 {
+	struct sigaction sa;
+
 	sigchain_pop(SIGHUP);	/* pushed by sigchain_push_common */
 	sigchain_push(SIGHUP, refresh);
-	sigchain_push(SIGUSR1, do_nothing);
+
+	memset(&sa, 0, sizeof(sa));
+	sa.sa_sigaction = prepare_index;
+	sigemptyset(&sa.sa_mask);
+	sa.sa_flags = SA_SIGINFO;
+	sigaction(SIGUSR1, &sa, NULL);
+
 	refresh(0);
 	while (sleep(idle_in_seconds))
 		; /* do nothing, all is handled by signal handlers already */
@@ -245,6 +319,8 @@ int main(int argc, char **argv)
 				       LOCK_DIE_ON_ERROR);
 #ifdef GIT_WINDOWS_NATIVE
 	strbuf_addstr(&sb, "HWND");
+#elif defined(USE_WATCHMAN)
+	strbuf_addch(&sb, 'W');	/* see poke_daemon() */
 #endif
 	strbuf_addf(&sb, "%" PRIuMAX, (uintmax_t) getpid());
 	write_in_full(fd, sb.buf, sb.len);
diff --git a/read-cache.c b/read-cache.c
index ae33951..1b84538 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1405,7 +1405,7 @@ static int read_watchman_ext(struct index_state *istate, const void *data,
 	return 0;
 }
 
-static void write_watchman_ext(struct strbuf *sb, struct index_state* istate)
+void write_watchman_ext(struct strbuf *sb, struct index_state* istate)
 {
 	struct ewah_bitmap *bitmap;
 	int i;
@@ -1678,6 +1678,39 @@ static int try_shm(struct index_state *istate)
 	return 0;
 }
 
+static void refresh_by_watchman(struct index_state *istate)
+{
+	void *shm = NULL;
+	int length;
+	int i;
+
+	length = git_shm_map(O_RDONLY, 0700, -1, &shm,
+			     PROT_READ, MAP_SHARED,
+			     "git-watchman-%s-%" PRIuMAX,
+			     sha1_to_hex(istate->sha1),
+			     (uintmax_t)getpid());
+
+	if (length <= 20 ||
+	    hashcmp(istate->sha1, (unsigned char *)shm + length - 20) ||
+	    /*
+	     * No need to clear CE_NO_WATCH set by 'WAMA' on
+	     * disk. Watchman can only set more, not clear any, so
+	     * this is OR mask.
+	     */
+	    read_watchman_ext(istate, shm, length - 20))
+		goto done;
+
+	for (i = 0; i < istate->cache_nr; i++) {
+		struct cache_entry *ce = istate->cache[i];
+		if (ce_stage(ce) || (ce->ce_flags & CE_NO_WATCH))
+			continue;
+		ce_mark_uptodate(ce);
+	}
+done:
+	if (shm)
+		munmap(shm, length);
+}
+
 /* remember to discard_cache() before reading a different cache! */
 int do_read_index(struct index_state *istate, const char *path, int must_exist)
 {
@@ -1797,7 +1830,7 @@ int read_index_from(struct index_state *istate, const char *path)
 	split_index = istate->split_index;
 	if (!split_index || is_null_sha1(split_index->base_sha1)) {
 		check_ce_order(istate);
-		return ret;
+		goto done;
 	}
 
 	if (split_index->base)
@@ -1818,6 +1851,10 @@ int read_index_from(struct index_state *istate, const char *path)
 		    sha1_to_hex(split_index->base->sha1));
 	merge_base_index(istate);
 	check_ce_order(istate);
+
+done:
+	if (ret > 0 && istate->from_shm && istate->last_update)
+		refresh_by_watchman(istate);
 	return ret;
 }
 
@@ -2119,7 +2156,7 @@ out:
 	return 0;
 }
 
-static int verify_index(const struct index_state *istate)
+int verify_index(const struct index_state *istate)
 {
 	return verify_index_from(istate, get_index_file());
 }
diff --git a/watchman-support.h b/watchman-support.h
index 5610409..ee1ef2c 100644
--- a/watchman-support.h
+++ b/watchman-support.h
@@ -4,5 +4,4 @@
 struct index_state;
 int check_watchman(struct index_state *index);
 
-
 #endif /* WATCHMAN_SUPPORT_H */
-- 
2.2.0.513.g477eb31

  parent reply	other threads:[~2015-11-01 13:56 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-11-01 13:55 [PATCH 0/5] Use watchman to reduce index refresh time Nguyễn Thái Ngọc Duy
2015-11-01 13:55 ` [PATCH 1/5] read-cache: add watchman 'WAMA' extension Nguyễn Thái Ngọc Duy
2015-11-02 22:03   ` David Turner
2015-11-03 19:17     ` Duy Nguyen
2015-11-03 19:49       ` David Turner
2015-11-01 13:55 ` [PATCH 2/5] Add watchman support to reduce index refresh cost Nguyễn Thái Ngọc Duy
2015-11-02 21:19   ` David Turner
2015-11-01 13:55 ` [PATCH 3/5] read-cache: allow index-helper to prepare shm before git reads it Nguyễn Thái Ngọc Duy
2015-11-01 13:55 ` Nguyễn Thái Ngọc Duy [this message]
2015-11-01 13:55 ` [PATCH 5/5] update-index: enable/disable watchman support Nguyễn Thái Ngọc Duy
2015-11-02 14:54 ` [PATCH 0/5] Use watchman to reduce index refresh time Paolo Ciarrocchi
2015-11-02 19:23   ` Duy Nguyen
2015-11-03  9:21     ` Duy Nguyen
2015-11-03 10:26       ` Paolo Ciarrocchi
2015-11-09 20:06       ` Christian Couder
2015-11-10 21:04         ` David Turner
2015-11-20  9:45           ` Christian Couder

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1446386146-10438-5-git-send-email-pclouds@gmail.com \
    --to=pclouds@gmail.com \
    --cc=christian.couder@gmail.com \
    --cc=git@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).