From: David Turner <dturner@twopensource.com>
To: git@vger.kernel.org, pclouds@gmail.com
Cc: David Turner <dturner@twopensource.com>
Subject: [PATCH 19/19] hack: watchman/untracked cache mashup
Date: Wed, 9 Mar 2016 13:36:22 -0500 [thread overview]
Message-ID: <1457548582-28302-20-git-send-email-dturner@twopensource.com> (raw)
In-Reply-To: <1457548582-28302-1-git-send-email-dturner@twopensource.com>
DO NOT APPLY THIS
This is a hack to add untracked cache info to the watchman WAMA
extension. The idea is that we don't have to stat directories if
watchman hasn't told us of any changes in those dirs.
If this is the right idea, I can go back and add it to the relevant
patch(es). But I want to see if other folks think it's the right idea
first.
Signed-off-by: David Turner <dturner@twopensource.com>
---
dir.c | 23 +++++++--
dir.h | 6 +++
read-cache.c | 142 +++++++++++++++++++++++++++++++++++++++++++++++++++--
watchman-support.c | 21 +++++++-
4 files changed, 182 insertions(+), 10 deletions(-)
diff --git a/dir.c b/dir.c
index 69e0be6..5058b29 100644
--- a/dir.c
+++ b/dir.c
@@ -597,9 +597,9 @@ static void trim_trailing_spaces(char *buf)
*
* If "name" has the trailing slash, it'll be excluded in the search.
*/
-static struct untracked_cache_dir *lookup_untracked(struct untracked_cache *uc,
- struct untracked_cache_dir *dir,
- const char *name, int len)
+struct untracked_cache_dir *lookup_untracked(struct untracked_cache *uc,
+ struct untracked_cache_dir *dir,
+ const char *name, int len)
{
int first, last;
struct untracked_cache_dir *d;
@@ -1726,6 +1726,17 @@ static int valid_cached_dir(struct dir_struct *dir,
if (!untracked)
return 0;
+ if (dir->untracked->use_watchman) {
+ /*
+ * With watchman, we can trust the untracked cache's
+ * valid field.
+ */
+ if (untracked->valid)
+ goto skip_stat;
+ else
+ invalidate_directory(dir->untracked, untracked);
+ }
+
if (stat(path->len ? path->buf : ".", &st)) {
invalidate_directory(dir->untracked, untracked);
memset(&untracked->stat_data, 0, sizeof(untracked->stat_data));
@@ -1739,6 +1750,7 @@ static int valid_cached_dir(struct dir_struct *dir,
return 0;
}
+skip_stat:
if (untracked->check_only != !!check_only) {
invalidate_directory(dir->untracked, untracked);
return 0;
@@ -2625,8 +2637,10 @@ static void free_untracked(struct untracked_cache_dir *ucd)
void free_untracked_cache(struct untracked_cache *uc)
{
- if (uc)
+ if (uc) {
free_untracked(uc->root);
+ string_list_clear(&uc->invalid_untracked, 0);
+ }
free(uc);
}
@@ -2775,6 +2789,7 @@ struct untracked_cache *read_untracked_extension(const void *data, unsigned long
return NULL;
uc = xcalloc(1, sizeof(*uc));
+ string_list_init(&uc->invalid_untracked, 1);
strbuf_init(&uc->ident, ident_len);
strbuf_add(&uc->ident, ident, ident_len);
load_sha1_stat(&uc->ss_info_exclude, &ouc->info_exclude_stat,
diff --git a/dir.h b/dir.h
index 3ec3fb0..8fd3f9e 100644
--- a/dir.h
+++ b/dir.h
@@ -142,6 +142,9 @@ struct untracked_cache {
int gitignore_invalidated;
int dir_invalidated;
int dir_opened;
+ /* watchman invalidation data */
+ unsigned int use_watchman : 1;
+ struct string_list invalid_untracked;
};
struct dir_struct {
@@ -312,4 +315,7 @@ struct untracked_cache *read_untracked_extension(const void *data, unsigned long
void write_untracked_extension(struct strbuf *out, struct untracked_cache *untracked);
void add_untracked_cache(struct index_state *istate);
void remove_untracked_cache(struct index_state *istate);
+struct untracked_cache_dir *lookup_untracked(struct untracked_cache *uc,
+ struct untracked_cache_dir *dir,
+ const char *name, int len);
#endif
diff --git a/read-cache.c b/read-cache.c
index 40d789a..66438d8 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1237,7 +1237,7 @@ int refresh_index(struct index_state *istate, unsigned int flags,
if (!new) {
const char *fmt;
- if (really && cache_errno == EINVAL) {
+ if (really || cache_errno == EINVAL) {
/* If we are doing --really-refresh that
* means the index is not valid anymore.
*/
@@ -1377,11 +1377,82 @@ static int verify_hdr(const struct cache_header *hdr, unsigned long size)
return 0;
}
+static struct untracked_cache_dir *find_untracked_cache_dir(
+ struct untracked_cache *uc, struct untracked_cache_dir *ucd,
+ const char *name)
+{
+ int component_len;
+ const char *end;
+ struct untracked_cache_dir *dir;
+
+ if (!*name)
+ return ucd;
+
+ end = strchr(name, '/');
+ if (end)
+ component_len = end - name;
+ else
+ component_len = strlen(name);
+
+ dir = lookup_untracked(uc, ucd, name, component_len);
+ if (dir)
+ return find_untracked_cache_dir(uc, dir, name + component_len + 1);
+
+ return NULL;
+}
+
static void mark_no_watchman(size_t pos, void *data)
{
struct index_state *istate = data;
+ struct cache_entry *ce = istate->cache[pos];
+ struct strbuf sb = STRBUF_INIT;
+ char *c;
+ struct untracked_cache_dir *dir;
+
assert(pos < istate->cache_nr);
- istate->cache[pos]->ce_flags |= CE_WATCHMAN_DIRTY;
+ ce->ce_flags |= CE_WATCHMAN_DIRTY;
+
+ if (!istate->untracked || !istate->untracked->root)
+ return;
+
+ strbuf_add(&sb, ce->name, ce_namelen(ce));
+
+ for (c = sb.buf + sb.len - 1; c > sb.buf; c--) {
+ if (*c == '/') {
+ strbuf_setlen(&sb, c - sb.buf);
+ break;
+ }
+ }
+
+ if (c == sb.buf) {
+ strbuf_setlen(&sb, 0);
+ }
+
+ dir = find_untracked_cache_dir(istate->untracked,
+ istate->untracked->root, sb.buf);
+ if (dir) {
+ fprintf(stderr, "from '%s', dir '%s', invalidated '%s'\n",
+ ce->name, sb.buf, dir->name);
+ dir->valid = 0;
+ } else {
+ fprintf(stderr, "from '%s', dir '%s', not found\n",
+ ce->name, sb.buf);
+ }
+
+ strbuf_release(&sb);
+}
+
+static int mark_untracked_invalid(struct string_list_item *item, void *uc)
+{
+ struct untracked_cache *untracked = uc;
+ struct untracked_cache_dir *dir;
+
+ dir = find_untracked_cache_dir(untracked, untracked->root,
+ item->string);
+ if (dir)
+ dir->valid = 0;
+
+ return 0;
}
static int read_watchman_ext(struct index_state *istate, const void *data,
@@ -1389,19 +1460,56 @@ static int read_watchman_ext(struct index_state *istate, const void *data,
{
struct ewah_bitmap *bitmap;
int ret, len;
+ uint32_t bitmap_size;
+ uint32_t untracked_nr;
+ const char *untracked;
if (memchr(data, 0, sz) == NULL)
return error("invalid extension");
+
len = strlen(data) + 1;
+ memcpy(&bitmap_size, (const char *)data + len, 4);
+ memcpy(&untracked_nr, (const char *)data + len + 4, 4);
+ untracked_nr = ntohl(untracked_nr);
+ bitmap_size = ntohl(bitmap_size);
+
bitmap = ewah_new();
- ret = ewah_read_mmap(bitmap, (const char *)data + len, sz - len);
- if (ret != sz - len) {
+ ret = ewah_read_mmap(bitmap, (const char *)data + len + 8, bitmap_size);
+ if (ret != bitmap_size) {
ewah_free(bitmap);
return error("failed to parse ewah bitmap reading watchman index extension");
}
istate->last_update = xstrdup(data);
ewah_each_bit(bitmap, mark_no_watchman, istate);
ewah_free(bitmap);
+
+ if (istate->untracked && istate->untracked->root) {
+ int i;
+
+ istate->untracked->use_watchman = 1;
+ untracked = data + len + 8 + bitmap_size;
+ for (i = 0; i < untracked_nr; ++i) {
+ int len = strlen(untracked);
+ string_list_append(&istate->untracked->invalid_untracked,
+ untracked);
+ untracked += len + 1;
+ }
+
+ for_each_string_list(&istate->untracked->invalid_untracked,
+ mark_untracked_invalid, istate->untracked);
+
+ string_list_clear(&istate->untracked->invalid_untracked, 0);
+ istate->cache_changed |= WATCHMAN_CHANGED;
+ }
+ return 0;
+}
+
+int untracked_entry_append(struct string_list_item *item, void *sbvoid)
+{
+ struct strbuf *sb = sbvoid;
+
+ strbuf_addstr(sb, item->string);
+ strbuf_addch(sb, 0);
return 0;
}
@@ -1409,14 +1517,38 @@ void write_watchman_ext(struct strbuf *sb, struct index_state* istate)
{
struct ewah_bitmap *bitmap;
int i;
+ int ewah_start;
+ int ewah_size = 0;
+ int fixup = 0;
strbuf_add(sb, istate->last_update, strlen(istate->last_update) + 1);
+ fixup = sb->len;
+ strbuf_add(sb, &ewah_size, 4); /* we'll fix this up later */
+ if (istate->untracked) {
+ uint32_t nr = istate->untracked->invalid_untracked.nr;
+ nr = htonl(nr);
+ strbuf_add(sb, &nr, 4);
+ } else {
+ /* zero */
+ strbuf_add(sb, &ewah_size, 4);
+ }
+
+ ewah_start = sb->len;
bitmap = ewah_new();
for (i = 0; i < istate->cache_nr; i++)
if (istate->cache[i]->ce_flags & CE_WATCHMAN_DIRTY)
ewah_set(bitmap, i);
ewah_serialize_strbuf(bitmap, sb);
ewah_free(bitmap);
+
+ /* fix up size field */
+ ewah_size = sb->len - ewah_start;
+ ewah_size = htonl(ewah_size);
+ memcpy(sb->buf + fixup, &ewah_size, 4);
+
+ if (istate->untracked)
+ for_each_string_list(&istate->untracked->invalid_untracked,
+ untracked_entry_append, sb);
}
static int read_index_extension(struct index_state *istate,
@@ -1956,7 +2088,7 @@ int unmerged_index(const struct index_state *istate)
return 0;
}
-#define WRITE_BUFFER_SIZE 8192
+#define WRITE_BUFFER_SIZE 8192*16
static unsigned char write_buffer[WRITE_BUFFER_SIZE];
static unsigned long write_buffer_len;
diff --git a/watchman-support.c b/watchman-support.c
index 08e37ae..b7302b9 100644
--- a/watchman-support.c
+++ b/watchman-support.c
@@ -1,6 +1,7 @@
#include "cache.h"
#include "watchman-support.h"
#include "strbuf.h"
+#include "dir.h"
#include <watchman.h>
static struct watchman_query *make_query(const char *last_update)
@@ -60,8 +61,24 @@ static void update_index(struct index_state *istate,
continue;
pos = index_name_pos(istate, wm->name, strlen(wm->name));
- if (pos < 0)
+ if (pos < 0) {
+ if (istate->untracked) {
+ char *name = xstrdup(wm->name);
+ char *dname = dirname(name);
+
+ /*
+ * dirname() returns '.' for the root,
+ * but we call it ''.
+ */
+ if (dname[0] == '.' && dname[1] == 0)
+ string_list_append(&istate->untracked->invalid_untracked, "");
+ else
+ string_list_append(&istate->untracked->invalid_untracked,
+ dname);
+ free(name);
+ }
continue;
+ }
/* FIXME: ignore staged entries and gitlinks too? */
istate->cache[pos]->ce_flags |= CE_WATCHMAN_DIRTY;
@@ -71,6 +88,8 @@ done:
free(istate->last_update);
istate->last_update = xstrdup(result->clock);
istate->cache_changed |= WATCHMAN_CHANGED;
+ if (istate->untracked)
+ string_list_remove_duplicates(&istate->untracked->invalid_untracked, 0);
}
int check_watchman(struct index_state *istate)
--
2.4.2.767.g62658d5-twtrsrc
next prev parent reply other threads:[~2016-03-09 18:38 UTC|newest]
Thread overview: 60+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-03-09 18:36 [PATCH 00/19] index-helper, watchman David Turner
2016-03-09 18:36 ` [PATCH 01/19] trace.c: add GIT_TRACE_PACK_STATS for pack usage statistics David Turner
2016-03-09 22:58 ` Junio C Hamano
2016-03-10 0:05 ` David Turner
2016-03-10 10:59 ` Duy Nguyen
2016-03-09 18:36 ` [PATCH 02/19] read-cache.c: fix constness of verify_hdr() David Turner
2016-03-09 18:36 ` [PATCH 03/19] read-cache: allow to keep mmap'd memory after reading David Turner
2016-03-09 23:02 ` Junio C Hamano
2016-03-10 0:09 ` David Turner
2016-03-09 18:36 ` [PATCH 04/19] index-helper: new daemon for caching index and related stuff David Turner
2016-03-09 23:09 ` Junio C Hamano
2016-03-09 23:21 ` Junio C Hamano
2016-03-10 0:01 ` David Turner
2016-03-10 11:17 ` Duy Nguyen
2016-03-10 20:22 ` David Turner
2016-03-11 1:11 ` Duy Nguyen
2016-03-10 0:18 ` David Turner
2016-03-15 11:56 ` Duy Nguyen
2016-03-15 15:56 ` Junio C Hamano
2016-03-15 11:52 ` Duy Nguyen
2016-03-09 18:36 ` [PATCH 05/19] trace.c: add GIT_TRACE_INDEX_STATS for index statistics David Turner
2016-03-09 18:36 ` [PATCH 06/19] index-helper: add --strict David Turner
2016-03-09 18:36 ` [PATCH 07/19] daemonize(): set a flag before exiting the main process David Turner
2016-03-09 18:36 ` [PATCH 08/19] index-helper: add --detach David Turner
2016-03-09 18:36 ` [PATCH 09/19] index-helper: add Windows support David Turner
2016-03-16 11:42 ` Duy Nguyen
2016-03-17 12:18 ` Johannes Schindelin
2016-03-17 12:59 ` Duy Nguyen
2016-03-09 18:36 ` [PATCH 10/19] read-cache: add watchman 'WAMA' extension David Turner
2016-03-09 18:36 ` [PATCH 11/19] Add watchman support to reduce index refresh cost David Turner
2016-03-09 18:36 ` [PATCH 12/19] read-cache: allow index-helper to prepare shm before git reads it David Turner
2016-03-09 18:36 ` [PATCH 13/19] index-helper: use watchman to avoid refreshing index with lstat() David Turner
2016-03-09 18:36 ` [PATCH 14/19] update-index: enable/disable watchman support David Turner
2016-03-09 18:36 ` [PATCH 15/19] unpack-trees: preserve index extensions David Turner
2016-03-09 18:36 ` [PATCH 16/19] index-helper: rewrite pidfile after daemonizing David Turner
2016-03-09 18:36 ` [PATCH 17/19] index-helper: process management David Turner
2016-03-09 18:36 ` [PATCH 18/19] index-helper: autorun David Turner
2016-03-15 12:12 ` Duy Nguyen
2016-03-15 14:26 ` Johannes Schindelin
2016-03-16 11:37 ` Duy Nguyen
2016-03-16 18:11 ` David Turner
2016-03-16 18:27 ` Johannes Schindelin
2016-03-17 13:02 ` Duy Nguyen
2016-03-17 14:43 ` Johannes Schindelin
2016-03-17 18:31 ` David Turner
2016-03-18 0:50 ` Duy Nguyen
2016-03-18 7:14 ` Johannes Schindelin
2016-03-18 7:44 ` Duy Nguyen
2016-03-18 17:22 ` David Turner
2016-03-18 23:09 ` Duy Nguyen
2016-03-18 7:17 ` Johannes Schindelin
2016-03-18 7:34 ` Duy Nguyen
2016-03-18 15:57 ` Johannes Schindelin
2016-03-09 18:36 ` David Turner [this message]
2016-03-15 12:31 ` [PATCH 19/19] hack: watchman/untracked cache mashup Duy Nguyen
2016-03-17 0:56 ` David Turner
2016-03-17 13:06 ` Duy Nguyen
2016-03-17 18:08 ` David Turner
2016-03-29 17:09 ` [PATCH 00/19] index-helper, watchman Torsten Bögershausen
2016-03-29 21:51 ` David Turner
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1457548582-28302-20-git-send-email-dturner@twopensource.com \
--to=dturner@twopensource.com \
--cc=git@vger.kernel.org \
--cc=pclouds@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).