git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
To: git@vger.kernel.org
Cc: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
Subject: [PATCH 10/24] untracked cache: save to an index extension
Date: Tue, 20 Jan 2015 20:03:19 +0700	[thread overview]
Message-ID: <1421759013-8494-11-git-send-email-pclouds@gmail.com> (raw)
In-Reply-To: <1421759013-8494-1-git-send-email-pclouds@gmail.com>

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 Documentation/technical/index-format.txt |  58 +++++++++++++
 cache.h                                  |   3 +
 dir.c                                    | 136 +++++++++++++++++++++++++++++++
 dir.h                                    |   1 +
 read-cache.c                             |  12 +++
 5 files changed, 210 insertions(+)

diff --git a/Documentation/technical/index-format.txt b/Documentation/technical/index-format.txt
index fe6f316..b97ac8d 100644
--- a/Documentation/technical/index-format.txt
+++ b/Documentation/technical/index-format.txt
@@ -233,3 +233,61 @@ Git index format
   The remaining index entries after replaced ones will be added to the
   final index. These added entries are also sorted by entry namme then
   stage.
+
+== Untracked cache
+
+  Untracked cache saves the untracked file list and necessary data to
+  verify the cache. The signature for this extension is { 'U', 'N',
+  'T', 'R' }.
+
+  The extension starts with
+
+  - Stat data of $GIT_DIR/info/exclude. See "Index entry" section from
+    ctime field until "file size".
+
+  - Stat data of core.excludesfile
+
+  - 32-bit dir_flags (see struct dir_struct)
+
+  - 160-bit SHA-1 of $GIT_DIR/info/exclude. Null SHA-1 means the file
+    does not exist.
+
+  - 160-bit SHA-1 of core.excludesfile. Null SHA-1 means the file does
+    not exist.
+
+  - NUL-terminated string of per-dir exclude file name. This usually
+    is ".gitignore".
+
+  - The number of following directory blocks, variable width
+    encoding. If this number is zero, the extension ends here with a
+    following NUL.
+
+  - A number of directory blocks in depth-first-search order, each
+    consists of
+
+    - The number of untracked entries, variable witdh encoding.
+
+    - The number of sub-directory blocks, variable with encoding.
+
+    - The directory name terminated by NUL.
+
+    - A number of untrached file/dir names terminated by NUL.
+
+The remaining data of each directory block is grouped by type:
+
+  - An ewah bitmap, the n-th bit marks whether the n-th directory has
+    valid untracked cache entries.
+
+  - An ewah bitmap, the n-th bit records "check-only" bit of
+    read_directory_recursive() for the n-th directory.
+
+  - An ewah bitmap, the n-th bit indicates whether SHA-1 and stat data
+    is valid for the n-th directory and exists in the next data.
+
+  - An array of stat data. The n-th data corresponds with the n-th
+    "one" bit in the previous ewah bitmap.
+
+  - An array of SHA-1. The n-th SHA-1 corresponds with the n-th "one" bit
+    in the previous ewah bitmap.
+
+  - One NUL.
diff --git a/cache.h b/cache.h
index dcf3a2a..b14d6e2 100644
--- a/cache.h
+++ b/cache.h
@@ -297,6 +297,8 @@ static inline unsigned int canon_mode(unsigned int mode)
 #define SPLIT_INDEX_ORDERED	(1 << 6)
 
 struct split_index;
+struct untracked_cache;
+
 struct index_state {
 	struct cache_entry **cache;
 	unsigned int version;
@@ -310,6 +312,7 @@ struct index_state {
 	struct hashmap name_hash;
 	struct hashmap dir_hash;
 	unsigned char sha1[20];
+	struct untracked_cache *untracked;
 };
 
 extern struct index_state the_index;
diff --git a/dir.c b/dir.c
index 95a0f0a..1f2d701 100644
--- a/dir.c
+++ b/dir.c
@@ -12,6 +12,8 @@
 #include "refs.h"
 #include "wildmatch.h"
 #include "pathspec.h"
+#include "varint.h"
+#include "ewah/ewok.h"
 
 struct path_simplify {
 	int len;
@@ -2139,3 +2141,137 @@ void clear_directory(struct dir_struct *dir)
 	}
 	strbuf_release(&dir->basebuf);
 }
+
+struct ondisk_untracked_cache {
+	struct stat_data info_exclude_stat;
+	struct stat_data excludes_file_stat;
+	uint32_t dir_flags;
+	unsigned char info_exclude_sha1[20];
+	unsigned char excludes_file_sha1[20];
+	char exclude_per_dir[FLEX_ARRAY];
+};
+
+#define ouc_size(len) (offsetof(struct ondisk_untracked_cache, exclude_per_dir) + len + 1)
+
+struct write_data {
+	int index;	   /* number of written untracked_cache_dir */
+	struct ewah_bitmap *check_only; /* from untracked_cache_dir */
+	struct ewah_bitmap *valid;	/* from untracked_cache_dir */
+	struct ewah_bitmap *sha1_valid; /* set if exclude_sha1 is not null */
+	struct strbuf out;
+	struct strbuf sb_stat;
+	struct strbuf sb_sha1;
+};
+
+static void stat_data_to_disk(struct stat_data *to, const struct stat_data *from)
+{
+	to->sd_ctime.sec  = htonl(from->sd_ctime.sec);
+	to->sd_ctime.nsec = htonl(from->sd_ctime.nsec);
+	to->sd_mtime.sec  = htonl(from->sd_mtime.sec);
+	to->sd_mtime.nsec = htonl(from->sd_mtime.nsec);
+	to->sd_dev	  = htonl(from->sd_dev);
+	to->sd_ino	  = htonl(from->sd_ino);
+	to->sd_uid	  = htonl(from->sd_uid);
+	to->sd_gid	  = htonl(from->sd_gid);
+	to->sd_size	  = htonl(from->sd_size);
+}
+
+static void write_one_dir(struct untracked_cache_dir *untracked,
+			  struct write_data *wd)
+{
+	struct stat_data stat_data;
+	struct strbuf *out = &wd->out;
+	unsigned char intbuf[16];
+	unsigned int intlen, value;
+	int i = wd->index++;
+
+	/*
+	 * untracked_nr should be reset whenever valid is clear, but
+	 * for safety..
+	 */
+	if (!untracked->valid) {
+		untracked->untracked_nr = 0;
+		untracked->check_only = 0;
+	}
+
+	if (untracked->check_only)
+		ewah_set(wd->check_only, i);
+	if (untracked->valid) {
+		ewah_set(wd->valid, i);
+		stat_data_to_disk(&stat_data, &untracked->stat_data);
+		strbuf_add(&wd->sb_stat, &stat_data, sizeof(stat_data));
+	}
+	if (!is_null_sha1(untracked->exclude_sha1)) {
+		ewah_set(wd->sha1_valid, i);
+		strbuf_add(&wd->sb_sha1, untracked->exclude_sha1, 20);
+	}
+
+	intlen = encode_varint(untracked->untracked_nr, intbuf);
+	strbuf_add(out, intbuf, intlen);
+
+	/* skip non-recurse directories */
+	for (i = 0, value = 0; i < untracked->dirs_nr; i++)
+		if (untracked->dirs[i]->recurse)
+			value++;
+	intlen = encode_varint(value, intbuf);
+	strbuf_add(out, intbuf, intlen);
+
+	strbuf_add(out, untracked->name, strlen(untracked->name) + 1);
+
+	for (i = 0; i < untracked->untracked_nr; i++)
+		strbuf_add(out, untracked->untracked[i],
+			   strlen(untracked->untracked[i]) + 1);
+
+	for (i = 0; i < untracked->dirs_nr; i++)
+		if (untracked->dirs[i]->recurse)
+			write_one_dir(untracked->dirs[i], wd);
+}
+
+void write_untracked_extension(struct strbuf *out, struct untracked_cache *untracked)
+{
+	struct ondisk_untracked_cache *ouc;
+	struct write_data wd;
+	unsigned char varbuf[16];
+	int len = 0, varint_len;
+	if (untracked->exclude_per_dir)
+		len = strlen(untracked->exclude_per_dir);
+	ouc = xmalloc(sizeof(*ouc) + len + 1);
+	stat_data_to_disk(&ouc->info_exclude_stat, &untracked->ss_info_exclude.stat);
+	stat_data_to_disk(&ouc->excludes_file_stat, &untracked->ss_excludes_file.stat);
+	hashcpy(ouc->info_exclude_sha1, untracked->ss_info_exclude.sha1);
+	hashcpy(ouc->excludes_file_sha1, untracked->ss_excludes_file.sha1);
+	ouc->dir_flags = htonl(untracked->dir_flags);
+	memcpy(ouc->exclude_per_dir, untracked->exclude_per_dir, len + 1);
+	strbuf_add(out, ouc, ouc_size(len));
+	if (!untracked->root) {
+		varint_len = encode_varint(0, varbuf);
+		strbuf_add(out, varbuf, varint_len);
+		return;
+	}
+
+	wd.index      = 0;
+	wd.check_only = ewah_new();
+	wd.valid      = ewah_new();
+	wd.sha1_valid = ewah_new();
+	strbuf_init(&wd.out, 1024);
+	strbuf_init(&wd.sb_stat, 1024);
+	strbuf_init(&wd.sb_sha1, 1024);
+	write_one_dir(untracked->root, &wd);
+
+	varint_len = encode_varint(wd.index, varbuf);
+	strbuf_add(out, varbuf, varint_len);
+	strbuf_addbuf(out, &wd.out);
+	ewah_serialize_strbuf(wd.valid, out);
+	ewah_serialize_strbuf(wd.check_only, out);
+	ewah_serialize_strbuf(wd.sha1_valid, out);
+	strbuf_addbuf(out, &wd.sb_stat);
+	strbuf_addbuf(out, &wd.sb_sha1);
+	strbuf_addch(out, '\0'); /* safe guard for string lists */
+
+	ewah_free(wd.valid);
+	ewah_free(wd.check_only);
+	ewah_free(wd.sha1_valid);
+	strbuf_release(&wd.out);
+	strbuf_release(&wd.sb_stat);
+	strbuf_release(&wd.sb_sha1);
+}
diff --git a/dir.h b/dir.h
index 95baf01..dc3ee0b 100644
--- a/dir.h
+++ b/dir.h
@@ -298,4 +298,5 @@ static inline int dir_path_match(const struct dir_entry *ent,
 			      has_trailing_dir);
 }
 
+void write_untracked_extension(struct strbuf *out, struct untracked_cache *untracked);
 #endif
diff --git a/read-cache.c b/read-cache.c
index 6f0057f..baf3057 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -37,6 +37,7 @@ static struct cache_entry *refresh_cache_entry(struct cache_entry *ce,
 #define CACHE_EXT_TREE 0x54524545	/* "TREE" */
 #define CACHE_EXT_RESOLVE_UNDO 0x52455543 /* "REUC" */
 #define CACHE_EXT_LINK 0x6c696e6b	  /* "link" */
+#define CACHE_EXT_UNTRACKED 0x554E5452	  /* "UNTR" */
 
 /* changes that can be kept in $GIT_DIR/index (basically all extensions) */
 #define EXTMASK (RESOLVE_UNDO_CHANGED | CACHE_TREE_CHANGED | \
@@ -2016,6 +2017,17 @@ static int do_write_index(struct index_state *istate, int newfd,
 		if (err)
 			return -1;
 	}
+	if (!strip_extensions && istate->untracked) {
+		struct strbuf sb = STRBUF_INIT;
+
+		write_untracked_extension(&sb, istate->untracked);
+		err = write_index_ext_header(&c, newfd, CACHE_EXT_UNTRACKED,
+					     sb.len) < 0 ||
+			ce_write(&c, newfd, sb.buf, sb.len) < 0;
+		strbuf_release(&sb);
+		if (err)
+			return -1;
+	}
 
 	if (ce_flush(&c, newfd, istate->sha1) || fstat(newfd, &st))
 		return -1;
-- 
2.2.0.84.ge9c7a8a

  parent reply	other threads:[~2015-01-20 13:04 UTC|newest]

Thread overview: 37+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-01-20 13:03 [PATCH 00/24] nd/untracked-cache update Nguyễn Thái Ngọc Duy
2015-01-20 13:03 ` [PATCH 01/24] dir.c: optionally compute sha-1 of a .gitignore file Nguyễn Thái Ngọc Duy
2015-01-20 13:03 ` [PATCH 02/24] untracked cache: record .gitignore information and dir hierarchy Nguyễn Thái Ngọc Duy
2015-01-20 13:03 ` [PATCH 03/24] untracked cache: initial untracked cache validation Nguyễn Thái Ngọc Duy
2015-01-20 13:03 ` [PATCH 04/24] untracked cache: invalidate dirs recursively if .gitignore changes Nguyễn Thái Ngọc Duy
2015-01-20 13:03 ` [PATCH 05/24] untracked cache: make a wrapper around {open,read,close}dir() Nguyễn Thái Ngọc Duy
2015-01-20 13:03 ` [PATCH 06/24] untracked cache: record/validate dir mtime and reuse cached output Nguyễn Thái Ngọc Duy
2015-01-20 13:03 ` [PATCH 07/24] untracked cache: mark what dirs should be recursed/saved Nguyễn Thái Ngọc Duy
2015-01-20 13:03 ` [PATCH 08/24] untracked cache: don't open non-existent .gitignore Nguyễn Thái Ngọc Duy
2015-01-20 13:03 ` [PATCH 09/24] ewah: add convenient wrapper ewah_serialize_strbuf() Nguyễn Thái Ngọc Duy
2015-01-20 13:03 ` Nguyễn Thái Ngọc Duy [this message]
2015-01-20 13:03 ` [PATCH 11/24] untracked cache: load from UNTR index extension Nguyễn Thái Ngọc Duy
2015-01-20 13:03 ` [PATCH 12/24] untracked cache: invalidate at index addition or removal Nguyễn Thái Ngọc Duy
2015-01-20 13:03 ` [PATCH 13/24] read-cache.c: split racy stat test to a separate function Nguyễn Thái Ngọc Duy
2015-01-20 13:03 ` [PATCH 14/24] untracked cache: avoid racy timestamps Nguyễn Thái Ngọc Duy
2015-01-20 13:03 ` [PATCH 15/24] untracked cache: print stats with $GIT_TRACE_UNTRACKED_STATS Nguyễn Thái Ngọc Duy
2015-01-20 13:03 ` [PATCH 16/24] untracked cache: mark index dirty if untracked cache is updated Nguyễn Thái Ngọc Duy
2015-01-20 13:03 ` [PATCH 17/24] untracked-cache: temporarily disable with $GIT_DISABLE_UNTRACKED_CACHE Nguyễn Thái Ngọc Duy
2015-01-20 13:03 ` [PATCH 18/24] status: enable untracked cache Nguyễn Thái Ngọc Duy
2015-01-20 13:03 ` [PATCH 19/24] update-index: manually enable or disable " Nguyễn Thái Ngọc Duy
2015-01-20 13:03 ` [PATCH 20/24] update-index: test the system before enabling " Nguyễn Thái Ngọc Duy
2015-01-21  8:32   ` Junio C Hamano
2015-01-21  9:46     ` Duy Nguyen
2015-01-21 18:51       ` Junio C Hamano
2015-01-22 10:26         ` Duy Nguyen
2015-01-22 18:49           ` Junio C Hamano
2015-01-24  2:51             ` Duy Nguyen
2015-01-20 13:03 ` [PATCH 21/24] t7063: tests for " Nguyễn Thái Ngọc Duy
2015-01-20 13:03 ` [PATCH 22/24] mingw32: add uname() Nguyễn Thái Ngọc Duy
2015-01-20 13:03 ` [PATCH 23/24] untracked cache: guard and disable on system changes Nguyễn Thái Ngọc Duy
2015-01-20 13:03 ` [PATCH 24/24] git-status.txt: advertisement for untracked cache Nguyễn Thái Ngọc Duy
2015-01-22 20:16   ` brian m. carlson
2015-01-20 19:28 ` [PATCH 00/24] nd/untracked-cache update Torsten Bögershausen
2015-01-21  9:49   ` Duy Nguyen
  -- strict thread matches above, loose matches on Subject: below --
2015-02-08  8:55 [PATCH 00/24] nd/untracked-cache updates Nguyễn Thái Ngọc Duy
2015-02-08  8:55 ` [PATCH 10/24] untracked cache: save to an index extension Nguyễn Thái Ngọc Duy
2015-03-07 19:08   ` Stefan Beller
2015-03-08 10:12 [PATCH 00/24] nd/untracked-cache updates Nguyễn Thái Ngọc Duy
2015-03-08 10:12 ` [PATCH 10/24] untracked cache: save to an index extension Nguyễn Thái Ngọc Duy

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1421759013-8494-11-git-send-email-pclouds@gmail.com \
    --to=pclouds@gmail.com \
    --cc=git@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).