git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: David Turner <dturner@twopensource.com>
To: git@vger.kernel.org, pclouds@gmail.co,
	"Ævar Arnfjörð Bjarmason" <avarab@gmail.com>,
	"Ramsay Jones" <ramsay@ramsayjones.plus.com>
Cc: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>,
	"David Turner" <dturner@twopensource.com>
Subject: [PATCH v4 07/16] read-cache: add watchman 'WAMA' extension
Date: Tue, 12 Apr 2016 20:33:00 -0400	[thread overview]
Message-ID: <1460507589-25525-8-git-send-email-dturner@twopensource.com> (raw)
In-Reply-To: <1460507589-25525-1-git-send-email-dturner@twopensource.com>

From: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>

The extension contains a bitmap, one bit for each entry in the
index. If the n-th bit is zero, the n-th entry is considered
unchanged, we can ce_mark_uptodate() it without refreshing. If the bit
is non-zero and we found out the corresponding file is clean after
refresh, we can clear the bit.

In addition, there's a list of directories in the untracked-cache
to invalidate (because they have new or modified entries).

The 'skipping refresh' bit is not in this patch yet as we would need
watchman. More details in later patches.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: David Turner <dturner@twopensource.com>
---
 cache.h      |   4 ++
 dir.h        |   3 ++
 read-cache.c | 117 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 122 insertions(+), 2 deletions(-)

diff --git a/cache.h b/cache.h
index 0aeb994..f4f7eef 100644
--- a/cache.h
+++ b/cache.h
@@ -182,6 +182,8 @@ struct cache_entry {
 #define CE_VALID     (0x8000)
 #define CE_STAGESHIFT 12
 
+#define CE_WATCHMAN_DIRTY  (0x0001)
+
 /*
  * Range 0xFFFF0FFF in ce_flags is divided into
  * two parts: in-memory flags and on-disk ones.
@@ -320,6 +322,7 @@ static inline unsigned int canon_mode(unsigned int mode)
 #define CACHE_TREE_CHANGED	(1 << 5)
 #define SPLIT_INDEX_ORDERED	(1 << 6)
 #define UNTRACKED_CHANGED	(1 << 7)
+#define WATCHMAN_CHANGED	(1 << 8)
 
 struct split_index;
 struct untracked_cache;
@@ -344,6 +347,7 @@ struct index_state {
 	struct untracked_cache *untracked;
 	void *mmap;
 	size_t mmap_size;
+	char *last_update;
 };
 
 extern struct index_state the_index;
diff --git a/dir.h b/dir.h
index 3ec3fb0..3d540de 100644
--- a/dir.h
+++ b/dir.h
@@ -142,6 +142,9 @@ struct untracked_cache {
 	int gitignore_invalidated;
 	int dir_invalidated;
 	int dir_opened;
+	/* watchman invalidation data */
+	unsigned int use_watchman : 1;
+	struct string_list invalid_untracked;
 };
 
 struct dir_struct {
diff --git a/read-cache.c b/read-cache.c
index e7d2688..19b0c85 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -19,6 +19,7 @@
 #include "split-index.h"
 #include "utf8.h"
 #include "unix-socket.h"
+#include "ewah/ewok.h"
 
 static struct cache_entry *refresh_cache_entry(struct cache_entry *ce,
 					       unsigned int options);
@@ -41,11 +42,13 @@ static struct cache_entry *refresh_cache_entry(struct cache_entry *ce,
 #define CACHE_EXT_RESOLVE_UNDO 0x52455543 /* "REUC" */
 #define CACHE_EXT_LINK 0x6c696e6b	  /* "link" */
 #define CACHE_EXT_UNTRACKED 0x554E5452	  /* "UNTR" */
+#define CACHE_EXT_WATCHMAN 0x57414D41	  /* "WAMA" */
 
 /* changes that can be kept in $GIT_DIR/index (basically all extensions) */
 #define EXTMASK (RESOLVE_UNDO_CHANGED | CACHE_TREE_CHANGED | \
 		 CE_ENTRY_ADDED | CE_ENTRY_REMOVED | CE_ENTRY_CHANGED | \
-		 SPLIT_INDEX_ORDERED | UNTRACKED_CHANGED)
+		 SPLIT_INDEX_ORDERED | UNTRACKED_CHANGED | \
+		 WATCHMAN_CHANGED)
 
 struct index_state the_index;
 static const char *alternate_index_output;
@@ -1221,8 +1224,13 @@ int refresh_index(struct index_state *istate, unsigned int flags,
 			continue;
 
 		new = refresh_cache_ent(istate, ce, options, &cache_errno, &changed);
-		if (new == ce)
+		if (new == ce) {
+			if (ce->ce_flags & CE_WATCHMAN_DIRTY) {
+				ce->ce_flags          &= ~CE_WATCHMAN_DIRTY;
+				istate->cache_changed |= WATCHMAN_CHANGED;
+			}
 			continue;
+		}
 		if (!new) {
 			const char *fmt;
 
@@ -1366,6 +1374,94 @@ static int verify_hdr(const struct cache_header *hdr, unsigned long size)
 	return 0;
 }
 
+static void mark_no_watchman(size_t pos, void *data)
+{
+	struct index_state *istate = data;
+	assert(pos < istate->cache_nr);
+	istate->cache[pos]->ce_flags |= CE_WATCHMAN_DIRTY;
+}
+
+static int read_watchman_ext(struct index_state *istate, const void *data,
+			     unsigned long sz)
+{
+	struct ewah_bitmap *bitmap;
+	int ret, len;
+	uint32_t bitmap_size;
+	uint32_t untracked_nr;
+
+	if (memchr(data, 0, sz) == NULL)
+		return error("invalid extension");
+
+	len = strlen(data) + 1;
+	memcpy(&bitmap_size, (const char *)data + len, 4);
+	memcpy(&untracked_nr, (const char *)data + len + 4, 4);
+	untracked_nr = ntohl(untracked_nr);
+	bitmap_size = ntohl(bitmap_size);
+
+	bitmap = ewah_new();
+	ret = ewah_read_mmap(bitmap, (const char *)data + len + 8, bitmap_size);
+	if (ret != bitmap_size) {
+		ewah_free(bitmap);
+		return error("failed to parse ewah bitmap reading watchman index extension");
+	}
+	istate->last_update = xstrdup(data);
+	ewah_each_bit(bitmap, mark_no_watchman, istate);
+	ewah_free(bitmap);
+
+	/*
+	 * TODO: update the untracked cache from the untracked data in this
+	 * extension.
+	 */
+	return 0;
+}
+
+static int untracked_entry_append(struct string_list_item *item, void *sbvoid)
+{
+	struct strbuf *sb = sbvoid;
+
+	strbuf_addstr(sb, item->string);
+	strbuf_addch(sb, 0);
+	return 0;
+}
+
+void write_watchman_ext(struct strbuf *sb, struct index_state* istate)
+{
+	struct ewah_bitmap *bitmap;
+	int i;
+	int ewah_start;
+	int ewah_size = 0;
+	int fixup = 0;
+
+	strbuf_add(sb, istate->last_update, strlen(istate->last_update) + 1);
+	fixup = sb->len;
+	strbuf_add(sb, &ewah_size, 4); /* we'll fix this up later */
+	if (istate->untracked) {
+		uint32_t nr = istate->untracked->invalid_untracked.nr;
+		nr = htonl(nr);
+		strbuf_add(sb, &nr, 4);
+	} else {
+		/* zero */
+		strbuf_add(sb, &ewah_size, 4);
+	}
+
+	ewah_start = sb->len;
+	bitmap = ewah_new();
+	for (i = 0; i < istate->cache_nr; i++)
+		if (istate->cache[i]->ce_flags & CE_WATCHMAN_DIRTY)
+			ewah_set(bitmap, i);
+	ewah_serialize_strbuf(bitmap, sb);
+	ewah_free(bitmap);
+
+	/* fix up size field */
+	ewah_size = sb->len - ewah_start;
+	ewah_size = htonl(ewah_size);
+	memcpy(sb->buf + fixup, &ewah_size, 4);
+
+	if (istate->untracked)
+		for_each_string_list(&istate->untracked->invalid_untracked,
+				     untracked_entry_append, sb);
+}
+
 static int read_index_extension(struct index_state *istate,
 				const char *ext, void *data, unsigned long sz)
 {
@@ -1383,6 +1479,11 @@ static int read_index_extension(struct index_state *istate,
 	case CACHE_EXT_UNTRACKED:
 		istate->untracked = read_untracked_extension(data, sz);
 		break;
+
+	case CACHE_EXT_WATCHMAN:
+		read_watchman_ext(istate, data, sz);
+		break;
+
 	default:
 		if (*ext < 'A' || 'Z' < *ext)
 			return error("index uses %.4s extension, which we do not understand",
@@ -1864,6 +1965,8 @@ int discard_index(struct index_state *istate)
 	istate->untracked = NULL;
 	istate->from_shm = 0;
 	istate->to_shm   = 0;
+	free(istate->last_update);
+	istate->last_update = NULL;
 	return 0;
 }
 
@@ -2261,6 +2364,16 @@ static int do_write_index(struct index_state *istate, int newfd,
 		if (err)
 			return -1;
 	}
+	if (!strip_extensions && istate->last_update) {
+		struct strbuf sb = STRBUF_INIT;
+
+		write_watchman_ext(&sb, istate);
+		err = write_index_ext_header(&c, newfd, CACHE_EXT_WATCHMAN, sb.len) < 0
+			|| ce_write(&c, newfd, sb.buf, sb.len) < 0;
+		strbuf_release(&sb);
+		if (err)
+			return -1;
+	}
 
 	if (ce_flush(&c, newfd, istate->sha1) || fstat(newfd, &st))
 		return -1;
-- 
2.4.2.767.g62658d5-twtrsrc

  parent reply	other threads:[~2016-04-13  0:33 UTC|newest]

Thread overview: 39+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-04-13  0:32 [PATCH v4 00/16] index-helper, watchman David Turner
2016-04-13  0:32 ` [PATCH v4 01/16] read-cache.c: fix constness of verify_hdr() David Turner
2016-04-13  0:32 ` [PATCH v4 02/16] read-cache: allow to keep mmap'd memory after reading David Turner
2016-04-13  0:32 ` [PATCH v4 03/16] index-helper: new daemon for caching index and related stuff David Turner
2016-04-13 13:43   ` Duy Nguyen
2016-04-13 18:47     ` David Turner
2016-04-14 10:04       ` Duy Nguyen
2016-04-15 20:19         ` David Turner
2016-04-15 11:25       ` Duy Nguyen
2016-04-15 20:21         ` David Turner
2016-04-16  0:22           ` Duy Nguyen
2016-04-13 17:00   ` Ramsay Jones
2016-04-13 17:10     ` David Turner
2016-04-14 16:56   ` Junio C Hamano
2016-04-14 17:02     ` David Turner
2016-04-15  0:07       ` Duy Nguyen
2016-04-16  0:04   ` Stefan Beller
2016-04-19 22:00     ` David Turner
2016-04-19 22:00     ` David Turner
2016-04-13  0:32 ` [PATCH v4 04/16] index-helper: add --strict David Turner
2016-04-13  0:32 ` [PATCH v4 05/16] daemonize(): set a flag before exiting the main process David Turner
2016-04-13  0:32 ` [PATCH v4 06/16] index-helper: add --detach David Turner
2016-04-13  0:33 ` David Turner [this message]
2016-04-13  0:33 ` [PATCH v4 08/16] Add watchman support to reduce index refresh cost David Turner
2016-04-17  5:36   ` Duy Nguyen
2016-04-13  0:33 ` [PATCH v4 09/16] index-helper: use watchman to avoid refreshing index with lstat() David Turner
2016-04-16  0:07   ` Stefan Beller
2016-04-19 18:54     ` David Turner
2016-04-13  0:33 ` [PATCH v4 10/16] update-index: enable/disable watchman support David Turner
2016-04-13  0:33 ` [PATCH v4 11/16] unpack-trees: preserve index extensions David Turner
2016-04-13  0:33 ` [PATCH v4 12/16] index-helper: kill mode David Turner
2016-04-16 16:08   ` Ævar Arnfjörð Bjarmason
2016-04-19 22:14     ` David Turner
2016-04-13  0:33 ` [PATCH v4 13/16] index-helper: don't run if already running David Turner
2016-04-13  0:33 ` [PATCH v4 14/16] index-helper: autorun mode David Turner
2016-04-13  0:33 ` [PATCH v4 15/16] index-helper: optionally automatically run David Turner
2016-04-17  5:19   ` Duy Nguyen
2016-04-19 22:31     ` David Turner
2016-04-13  0:33 ` [PATCH v4 16/16] read-cache: config for waiting for index-helper David Turner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1460507589-25525-8-git-send-email-dturner@twopensource.com \
    --to=dturner@twopensource.com \
    --cc=avarab@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=pclouds@gmail.co \
    --cc=pclouds@gmail.com \
    --cc=ramsay@ramsayjones.plus.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).