git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
To: git@vger.kernel.org
Cc: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
Subject: [PATCH v3 13/26] read-cache: ask file watcher to watch files
Date: Mon,  3 Feb 2014 11:29:01 +0700	[thread overview]
Message-ID: <1391401754-15347-14-git-send-email-pclouds@gmail.com> (raw)
In-Reply-To: <1391401754-15347-1-git-send-email-pclouds@gmail.com>

We want to watch files that are never changed because lstat() on those
files is a wasted effort. So we sort unwatched files by date and start
adding them to the file watcher until it barfs (e.g. hits inotify
limit).

Note that we still do lstat() on these new watched files because they
could have changed before the file watcher could watch them. Watched
files may only skip lstat() at the next run.

Also, at this early in the index loading process, we don't know what
files are dirty and thus can skip watching (we do clear CE_WATCHED on
entries that are not verified clean before writing index). So the
watches are set, but git ignores its results. Maybe in future we could
store the list of dirty files in WATC extension and use it as a hint
to skip watching.

In the future, file watcher should figure out what paths are
watchable, what not (e.g. network filesystems) and reject them. For
now it's the user resposibility to set (or unset) filewatcher.path
properly.

The previous attempt sends paths in batch, 64k per pkt-line, then wait
for response. It's designed to stop short in case file watcher is out
of resources. But that's a rare case, and send/wait cycles increase
latency.

Instead we now send everything in one packet, and not in pkt-line to
avoid the 64k limit. Then we wait for the response. On webkit.git,
normal "status -uno" takes 0m1.138s. The sending 14M (of 182k paths)
takes 52ms extra. Previous approach takes 213ms extra. Of course in
the end, extra time is longer because file watcher is basically no-op
so far.

There is not much room for improvement. If we compress paths to reduce
payload, zlib time costs about 300ms (so how small the end result is
no longer matters). Even simple prefix compressing (index v4 style)
would cost 76ms on processing time alone (reducing payload to 3M).

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 file-watcher-lib.c | 83 +++++++++++++++++++++++++++++++++++++++++++++++++
 file-watcher-lib.h |  1 +
 file-watcher.c     | 91 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 read-cache.c       | 18 +++++++++--
 4 files changed, 191 insertions(+), 2 deletions(-)

diff --git a/file-watcher-lib.c b/file-watcher-lib.c
index d0636cc..791faae 100644
--- a/file-watcher-lib.c
+++ b/file-watcher-lib.c
@@ -89,3 +89,86 @@ void open_watcher(struct index_state *istate)
 		return;
 	}
 }
+
+static int sort_by_date(const void *a_, const void *b_)
+{
+	const struct cache_entry *a = *(const struct cache_entry **)a_;
+	const struct cache_entry *b = *(const struct cache_entry **)b_;
+	uint32_t seca = a->ce_stat_data.sd_mtime.sec;
+	uint32_t secb = b->ce_stat_data.sd_mtime.sec;
+	return seca - secb;
+}
+
+static inline int ce_watchable(struct cache_entry *ce)
+{
+	return
+		!(ce->ce_flags & CE_WATCHED) &&
+		!(ce->ce_flags & CE_VALID) &&
+		/*
+		 * S_IFGITLINK should not be watched
+		 * obviously. S_IFLNK could be problematic because
+		 * inotify may follow symlinks without IN_DONT_FOLLOW
+		 */
+		S_ISREG(ce->ce_mode);
+}
+
+static void send_watches(struct index_state *istate,
+			 struct cache_entry **sorted, int nr)
+{
+	struct strbuf sb = STRBUF_INIT;
+	int i, len = 0;
+
+	for (i = 0; i < nr; i++)
+		len += ce_namelen(sorted[i]) + 1;
+
+	if (packet_write_timeout(istate->watcher, WAIT_TIME, "watch %d", len) <= 0)
+		return;
+
+	strbuf_grow(&sb, len);
+	for (i = 0; i < nr; i++)
+		strbuf_add(&sb, sorted[i]->name, ce_namelen(sorted[i]) + 1);
+
+	if (write_in_full_timeout(istate->watcher, sb.buf,
+				  sb.len, WAIT_TIME) != sb.len) {
+		strbuf_release(&sb);
+		return;
+	}
+	strbuf_release(&sb);
+
+	for (;;) {
+		char *line, *end;
+		unsigned long n;
+
+		if (!(line = packet_read_line_timeout(istate->watcher,
+						      WAIT_TIME, &len)))
+			return;
+		if (starts_with(line, "watching "))
+			continue;
+		if (!starts_with(line, "watched "))
+			return;
+		n = strtoul(line + 8, &end, 10);
+		for (i = 0; i < n; i++)
+			sorted[i]->ce_flags |= CE_WATCHED;
+		istate->cache_changed = 1;
+		break;
+	}
+}
+
+void watch_entries(struct index_state *istate)
+{
+	int i, nr;
+	struct cache_entry **sorted;
+
+	if (istate->watcher <= 0)
+		return;
+	for (i = nr = 0; i < istate->cache_nr; i++)
+		if (ce_watchable(istate->cache[i]))
+			nr++;
+	sorted = xmalloc(sizeof(*sorted) * nr);
+	for (i = nr = 0; i < istate->cache_nr; i++)
+		if (ce_watchable(istate->cache[i]))
+			sorted[nr++] = istate->cache[i];
+	qsort(sorted, nr, sizeof(*sorted), sort_by_date);
+	send_watches(istate, sorted, nr);
+	free(sorted);
+}
diff --git a/file-watcher-lib.h b/file-watcher-lib.h
index eb6edf5..1641024 100644
--- a/file-watcher-lib.h
+++ b/file-watcher-lib.h
@@ -2,5 +2,6 @@
 #define __FILE_WATCHER_LIB__
 
 void open_watcher(struct index_state *istate);
+void watch_entries(struct index_state *istate);
 
 #endif
diff --git a/file-watcher.c b/file-watcher.c
index 6df3a48..c257414 100644
--- a/file-watcher.c
+++ b/file-watcher.c
@@ -37,6 +37,70 @@ static struct connection **conns;
 static struct pollfd *pfd;
 static int conns_alloc, pfd_nr, pfd_alloc;
 
+static int watch_path(struct repository *repo, char *path)
+{
+	return -1;
+}
+
+static inline uint64_t stamp(void)
+{
+	struct timeval tv;
+	gettimeofday(&tv, NULL);
+	return (uint64_t)tv.tv_sec * 1000000 + tv.tv_usec;
+}
+
+static int shutdown_connection(int id);
+static void watch_paths(int conn_id, char *buf, int maxlen)
+{
+	int ret, len, n;
+	uint64_t start, now;
+	char *end;
+
+	n = strtol(buf, &end, 10);
+	if (end != buf + maxlen) {
+		packet_write(conns[conn_id]->sock,
+			     "error invalid watch number %s", buf);
+		shutdown_connection(conn_id);
+		return;
+	}
+
+	buf = xmallocz(n);
+	end = buf + n;
+	/*
+	 * Careful if this takes longer than 50ms, it'll upset other
+	 * connections
+	 */
+	if (read_in_full(conns[conn_id]->sock, buf, n) != n) {
+		shutdown_connection(conn_id);
+		return;
+	}
+	if (chdir(conns[conn_id]->repo->work_tree)) {
+		packet_write(conns[conn_id]->sock,
+			     "error chdir %s", strerror(errno));
+		return;
+	}
+	start = stamp();
+	for (n = ret = 0; buf < end; buf += len + 1) {
+		len = strlen(buf);
+		if (watch_path(conns[conn_id]->repo, buf))
+			break;
+		n++;
+		if (n & 0x3ff)
+			continue;
+		now = stamp();
+		/*
+		 * If we process for too long, the client may timeout
+		 * and give up. Let the client know we're not dead
+		 * yet, every 30ms.
+		 */
+		if (start + 30000 < now) {
+			packet_write(conns[conn_id]->sock, "watching %d", n);
+			start = now;
+		}
+	}
+	packet_write(conns[conn_id]->sock, "watched %u", n);
+}
+
 static struct repository *get_repo(const char *work_tree)
 {
 	int first, last;
@@ -177,6 +241,33 @@ static int handle_command(int conn_id)
 		}
 		packet_write(fd, "ok");
 	}
+
+	/*
+	 * > "watch" SP LENGTH
+	 * > PATH-LIST
+	 * < "watching" SP NUM
+	 * < "watched" SP NUM
+	 *
+	 * PATH-LIST is the list of paths, each terminated with
+	 * NUL. PATH-LIST is not wrapped in pkt-line format. LENGTH is
+	 * the size of PATH-LIST in bytes.
+	 *
+	 * The client asks file watcher to watcher a number of
+	 * paths. File watcher starts to process from path by path in
+	 * received order. File watcher returns the actual number of
+	 * watched paths with "watched" command.
+	 *
+	 * File watcher may send any number of "watching" messages
+	 * before "watched". This packet is to keep the connection
+	 * alive and has no other values.
+	 */
+	else if (starts_with(msg, "watch ")) {
+		if (!conns[conn_id]->repo) {
+			packet_write(fd, "error have not received index command");
+			return shutdown_connection(conn_id);
+		}
+		watch_paths(conn_id, msg + 6, len - 6);
+	}
 	else {
 		packet_write(fd, "error unrecognized command %s", msg);
 		return shutdown_connection(conn_id);
diff --git a/read-cache.c b/read-cache.c
index a7e5735..cb2188f 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1530,6 +1530,7 @@ int read_index_from(struct index_state *istate, const char *path)
 	}
 	munmap(mmap, mmap_size);
 	open_watcher(istate);
+	watch_entries(istate);
 	return istate->cache_nr;
 
 unmap:
@@ -1809,8 +1810,21 @@ int write_index(struct index_state *istate, int newfd)
 	for (i = removed = extended = 0; i < entries; i++) {
 		if (cache[i]->ce_flags & CE_REMOVE)
 			removed++;
-		else if (cache[i]->ce_flags & CE_WATCHED)
-			has_watches++;
+		else if (cache[i]->ce_flags & CE_WATCHED) {
+			/*
+			 * We may set CE_WATCHED (but not CE_VALID)
+			 * early when refresh has not been done
+			 * yet. At that time we had no idea if the
+			 * entry may have been updated. If it has
+			 * been, remove CE_WATCHED so CE_VALID won't
+			 * incorrectly be set next time if the file
+			 * watcher reports no changes.
+			 */
+			if (!ce_uptodate(cache[i]))
+				cache[i]->ce_flags &= ~CE_WATCHED;
+			else
+				has_watches++;
+		}
 
 		/* reduce extended entries if possible */
 		cache[i]->ce_flags &= ~CE_EXTENDED;
-- 
1.8.5.2.240.g8478abd

  parent reply	other threads:[~2014-02-03  4:30 UTC|newest]

Thread overview: 72+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-01-12 11:03 [PATCH 0/6] inotify support Nguyễn Thái Ngọc Duy
2014-01-12 11:03 ` [PATCH 1/6] read-cache: save trailing sha-1 Nguyễn Thái Ngọc Duy
2014-01-12 11:03 ` [PATCH 2/6] read-cache: new extension to mark what file is watched Nguyễn Thái Ngọc Duy
2014-01-13 17:02   ` Jonathan Nieder
2014-01-14  1:25     ` Duy Nguyen
2014-01-14  1:39   ` Duy Nguyen
2014-01-12 11:03 ` [PATCH 3/6] read-cache: connect to file watcher Nguyễn Thái Ngọc Duy
2014-01-15 10:58   ` Jeff King
2014-01-12 11:03 ` [PATCH 4/6] read-cache: get "updated" path list from " Nguyễn Thái Ngọc Duy
2014-01-12 11:03 ` [PATCH 5/6] read-cache: ask file watcher to watch files Nguyễn Thái Ngọc Duy
2014-01-12 11:03 ` [PATCH 6/6] file-watcher: support inotify Nguyễn Thái Ngọc Duy
2014-01-17  9:47 ` [PATCH/WIP v2 00/14] inotify support Nguyễn Thái Ngọc Duy
2014-01-17  9:47   ` [PATCH/WIP v2 01/14] read-cache: save trailing sha-1 Nguyễn Thái Ngọc Duy
2014-01-17  9:47   ` [PATCH/WIP v2 02/14] read-cache: new extension to mark what file is watched Nguyễn Thái Ngọc Duy
2014-01-17 11:19     ` Thomas Gummerer
2014-01-19 17:06     ` Thomas Rast
2014-01-20  1:38       ` Duy Nguyen
2014-01-17  9:47   ` [PATCH/WIP v2 03/14] read-cache: connect to file watcher Nguyễn Thái Ngọc Duy
2014-01-17 15:24     ` Torsten Bögershausen
2014-01-17 16:21       ` Duy Nguyen
2014-01-17  9:47   ` [PATCH/WIP v2 04/14] read-cache: ask file watcher to watch files Nguyễn Thái Ngọc Duy
2014-01-17  9:47   ` [PATCH/WIP v2 05/14] read-cache: put some limits on file watching Nguyễn Thái Ngọc Duy
2014-01-19 17:06     ` Thomas Rast
2014-01-20  1:36       ` Duy Nguyen
2014-01-17  9:47   ` [PATCH/WIP v2 06/14] read-cache: get modified file list from file watcher Nguyễn Thái Ngọc Duy
2014-01-17  9:47   ` [PATCH/WIP v2 07/14] read-cache: add config to start file watcher automatically Nguyễn Thái Ngọc Duy
2014-01-17  9:47   ` [PATCH/WIP v2 08/14] read-cache: add GIT_TEST_FORCE_WATCHER for testing Nguyễn Thái Ngọc Duy
2014-01-19 17:04     ` Thomas Rast
2014-01-20  1:32       ` Duy Nguyen
2014-01-17  9:47   ` [PATCH/WIP v2 09/14] file-watcher: add --shutdown and --log options Nguyễn Thái Ngọc Duy
2014-01-17  9:47   ` [PATCH/WIP v2 10/14] file-watcher: automatically quit Nguyễn Thái Ngọc Duy
2014-01-17  9:47   ` [PATCH/WIP v2 11/14] file-watcher: support inotify Nguyễn Thái Ngọc Duy
2014-01-19 17:04   ` [PATCH/WIP v2 00/14] inotify support Thomas Rast
2014-01-20  1:28     ` Duy Nguyen
2014-01-20 21:51       ` Thomas Rast
2014-01-28 10:46     ` Duy Nguyen
2014-02-03  4:28   ` [PATCH v3 00/26] " Nguyễn Thái Ngọc Duy
2014-02-03  4:28     ` [PATCH v3 01/26] pkt-line.c: rename global variable buffer[] to something less generic Nguyễn Thái Ngọc Duy
2014-02-03  4:28     ` [PATCH v3 02/26] pkt-line.c: add packet_write_timeout() Nguyễn Thái Ngọc Duy
2014-02-03  4:28     ` [PATCH v3 03/26] pkt-line.c: add packet_read_line_timeout() Nguyễn Thái Ngọc Duy
2014-02-03  4:28     ` [PATCH v3 04/26] unix-socket: make unlink() optional in unix_stream_listen() Nguyễn Thái Ngọc Duy
2014-02-03  4:28     ` [PATCH v3 05/26] Add git-file-watcher and basic connection handling logic Nguyễn Thái Ngọc Duy
2014-02-03  4:28     ` [PATCH v3 06/26] file-watcher: check socket directory permission Nguyễn Thái Ngọc Duy
2014-02-03  4:28     ` [PATCH v3 07/26] file-watcher: remove socket on exit Nguyễn Thái Ngọc Duy
2014-02-03  4:28     ` [PATCH v3 08/26] file-watcher: add --detach Nguyễn Thái Ngọc Duy
2014-02-03  4:28     ` [PATCH v3 09/26] read-cache: save trailing sha-1 Nguyễn Thái Ngọc Duy
2014-02-03  4:28     ` [PATCH v3 10/26] read-cache: new flag CE_WATCHED to mark what file is watched Nguyễn Thái Ngọc Duy
2014-02-03  4:28     ` [PATCH v3 11/26] Clear CE_WATCHED when set CE_VALID alone Nguyễn Thái Ngọc Duy
2014-02-03  4:29     ` [PATCH v3 12/26] read-cache: basic hand shaking to the file watcher Nguyễn Thái Ngọc Duy
2014-02-03  4:29     ` Nguyễn Thái Ngọc Duy [this message]
2014-02-03  4:29     ` [PATCH v3 14/26] read-cache: put some limits on file watching Nguyễn Thái Ngọc Duy
2014-02-03  4:29     ` [PATCH v3 15/26] read-cache: get changed file list from file watcher Nguyễn Thái Ngọc Duy
2014-02-03  4:29     ` [PATCH v3 16/26] git-compat-util.h: add inotify stubs on non-Linux platforms Nguyễn Thái Ngọc Duy
2014-02-03  4:29     ` [PATCH v3 17/26] file-watcher: inotify support, watching part Nguyễn Thái Ngọc Duy
2014-02-03  4:29     ` [PATCH v3 18/26] file-watcher: inotify support, notification part Nguyễn Thái Ngọc Duy
2014-02-03  4:29     ` [PATCH v3 19/26] Wrap CE_VALID test with ce_valid() Nguyễn Thái Ngọc Duy
2014-02-03  4:29     ` [PATCH v3 20/26] read-cache: new variable to verify file-watcher results Nguyễn Thái Ngọc Duy
2014-02-03  4:29     ` [PATCH v3 21/26] Support running file watcher with the test suite Nguyễn Thái Ngọc Duy
2014-02-03  4:29     ` [PATCH v3 22/26] file-watcher: quit if $WATCHER/socket is gone Nguyễn Thái Ngọc Duy
2014-02-03  4:29     ` [PATCH v3 23/26] file-watcher: tests for the daemon Nguyễn Thái Ngọc Duy
2014-02-03  4:29     ` [PATCH v3 24/26] ls-files: print CE_WATCHED as W (or "w" with CE_VALID) Nguyễn Thái Ngọc Duy
2014-02-03  4:29     ` [PATCH v3 25/26] file-watcher: tests for the client side Nguyễn Thái Ngọc Duy
2014-02-03  4:29     ` [PATCH v3 26/26] Disable file-watcher with system inotify on some tests Nguyễn Thái Ngọc Duy
2014-02-08  8:04     ` [PATCH v3 00/26] inotify support Torsten Bögershausen
2014-02-08  8:53       ` Duy Nguyen
2014-02-09 20:19         ` Torsten Bögershausen
2014-02-10 10:37           ` Duy Nguyen
2014-02-10 16:55             ` Torsten Bögershausen
2014-02-10 23:34               ` Duy Nguyen
2014-02-17 12:36           ` Duy Nguyen
2014-02-19 20:35 ` [PATCH 0/6] " Shawn Pearce
2014-02-19 23:45   ` Duy Nguyen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1391401754-15347-14-git-send-email-pclouds@gmail.com \
    --to=pclouds@gmail.com \
    --cc=git@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).