git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
To: git@vger.kernel.org
Cc: "Junio C Hamano" <gitster@pobox.com>,
	"Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
Subject: [PATCH 25/32] update-index: new options to enable/disable split index mode
Date: Fri, 13 Jun 2014 19:19:44 +0700	[thread overview]
Message-ID: <1402661991-14977-26-git-send-email-pclouds@gmail.com> (raw)
In-Reply-To: <1402661991-14977-1-git-send-email-pclouds@gmail.com>

If you have a large work tree but only make changes in a subset, then
$GIT_DIR/index's size should be stable after a while. If you change
branches that touch something else, $GIT_DIR/index's size may grow
large that it becomes as slow as the unified index. Do --split-index
again occasionally to force all changes back to the shared index and
keep $GIT_DIR/index small.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 Documentation/git-update-index.txt | 11 +++++++
 builtin/update-index.c             | 18 ++++++++++
 cache.h                            |  1 +
 read-cache.c                       | 67 ++++++++++++++++++++++++++++++++++----
 split-index.c                      | 23 +++++++++++++
 5 files changed, 114 insertions(+), 6 deletions(-)

diff --git a/Documentation/git-update-index.txt b/Documentation/git-update-index.txt
index d6de4a0..dfc09d9 100644
--- a/Documentation/git-update-index.txt
+++ b/Documentation/git-update-index.txt
@@ -161,6 +161,17 @@ may not support it yet.
 	Only meaningful with `--stdin` or `--index-info`; paths are
 	separated with NUL character instead of LF.
 
+--split-index::
+--no-split-index::
+	Enable or disable split index mode. If enabled, the index is
+	split into two files, $GIT_DIR/index and $GIT_DIR/sharedindex.<SHA-1>.
+	Changes are accumulated in $GIT_DIR/index while the shared
+	index file contains all index entries stays unchanged. If
+	split-index mode is already enabled and `--split-index` is
+	given again, all changes in $GIT_DIR/index are pushed back to
+	the shared index file. This mode is designed for very large
+	indexes that take a signficant amount of time to read or write.
+
 \--::
 	Do not interpret any more arguments as options.
 
diff --git a/builtin/update-index.c b/builtin/update-index.c
index f7a19c4..b0503f4 100644
--- a/builtin/update-index.c
+++ b/builtin/update-index.c
@@ -13,6 +13,7 @@
 #include "parse-options.h"
 #include "pathspec.h"
 #include "dir.h"
+#include "split-index.h"
 
 /*
  * Default to not allowing changes to the list of files. The
@@ -742,6 +743,7 @@ int cmd_update_index(int argc, const char **argv, const char *prefix)
 	char set_executable_bit = 0;
 	struct refresh_params refresh_args = {0, &has_errors};
 	int lock_error = 0;
+	int split_index = -1;
 	struct lock_file *lock_file;
 	struct parse_opt_ctx_t ctx;
 	int parseopt_state = PARSE_OPT_UNKNOWN;
@@ -824,6 +826,8 @@ int cmd_update_index(int argc, const char **argv, const char *prefix)
 			resolve_undo_clear_callback},
 		OPT_INTEGER(0, "index-version", &preferred_index_format,
 			N_("write index in this format")),
+		OPT_BOOL(0, "split-index", &split_index,
+			N_("enable or disable split index")),
 		OPT_END()
 	};
 
@@ -917,6 +921,20 @@ int cmd_update_index(int argc, const char **argv, const char *prefix)
 		strbuf_release(&buf);
 	}
 
+	if (split_index > 0) {
+		init_split_index(&the_index);
+		the_index.cache_changed |= SPLIT_INDEX_ORDERED;
+	} else if (!split_index && the_index.split_index) {
+		/*
+		 * can't discard_split_index(&the_index); because that
+		 * will destroy split_index->base->cache[], which may
+		 * be shared with the_index.cache[]. So yeah we're
+		 * leaking a bit here.
+		 */
+		the_index.split_index = NULL;
+		the_index.cache_changed |= SOMETHING_CHANGED;
+	}
+
 	if (active_cache_changed) {
 		if (newfd < 0) {
 			if (refresh_args.flags & REFRESH_QUIET)
diff --git a/cache.h b/cache.h
index ddb7cd2..7523c28 100644
--- a/cache.h
+++ b/cache.h
@@ -278,6 +278,7 @@ static inline unsigned int canon_mode(unsigned int mode)
 #define CE_ENTRY_ADDED		(1 << 3)
 #define RESOLVE_UNDO_CHANGED	(1 << 4)
 #define CACHE_TREE_CHANGED	(1 << 5)
+#define SPLIT_INDEX_ORDERED	(1 << 6)
 
 struct split_index;
 struct index_state {
diff --git a/read-cache.c b/read-cache.c
index af475dc..8ecb959 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -15,6 +15,7 @@
 #include "strbuf.h"
 #include "varint.h"
 #include "split-index.h"
+#include "sigchain.h"
 
 static struct cache_entry *refresh_cache_entry(struct cache_entry *ce,
 					       unsigned int options);
@@ -39,7 +40,8 @@ static struct cache_entry *refresh_cache_entry(struct cache_entry *ce,
 
 /* changes that can be kept in $GIT_DIR/index (basically all extensions) */
 #define EXTMASK (RESOLVE_UNDO_CHANGED | CACHE_TREE_CHANGED | \
-		 CE_ENTRY_ADDED | CE_ENTRY_REMOVED | CE_ENTRY_CHANGED)
+		 CE_ENTRY_ADDED | CE_ENTRY_REMOVED | CE_ENTRY_CHANGED | \
+		 SPLIT_INDEX_ORDERED)
 
 struct index_state the_index;
 static const char *alternate_index_output;
@@ -1860,7 +1862,8 @@ void update_index_if_able(struct index_state *istate, struct lock_file *lockfile
 		rollback_lock_file(lockfile);
 }
 
-static int do_write_index(struct index_state *istate, int newfd)
+static int do_write_index(struct index_state *istate, int newfd,
+			  int strip_extensions)
 {
 	git_SHA_CTX c;
 	struct cache_header hdr;
@@ -1923,7 +1926,7 @@ static int do_write_index(struct index_state *istate, int newfd)
 	strbuf_release(&previous_name_buf);
 
 	/* Write extension data here */
-	if (istate->split_index) {
+	if (!strip_extensions && istate->split_index) {
 		struct strbuf sb = STRBUF_INIT;
 
 		err = write_link_extension(&sb, istate) < 0 ||
@@ -1934,7 +1937,7 @@ static int do_write_index(struct index_state *istate, int newfd)
 		if (err)
 			return -1;
 	}
-	if (istate->cache_tree) {
+	if (!strip_extensions && istate->cache_tree) {
 		struct strbuf sb = STRBUF_INIT;
 
 		cache_tree_write(&sb, istate->cache_tree);
@@ -1944,7 +1947,7 @@ static int do_write_index(struct index_state *istate, int newfd)
 		if (err)
 			return -1;
 	}
-	if (istate->resolve_undo) {
+	if (!strip_extensions && istate->resolve_undo) {
 		struct strbuf sb = STRBUF_INIT;
 
 		resolve_undo_write(&sb, istate->resolve_undo);
@@ -1985,7 +1988,7 @@ static int commit_locked_index(struct lock_file *lk)
 static int do_write_locked_index(struct index_state *istate, struct lock_file *lock,
 				 unsigned flags)
 {
-	int ret = do_write_index(istate, lock->fd);
+	int ret = do_write_index(istate, lock->fd, 0);
 	if (ret)
 		return ret;
 	assert((flags & (COMMIT_LOCK | CLOSE_LOCK)) !=
@@ -2009,6 +2012,52 @@ static int write_split_index(struct index_state *istate,
 	return ret;
 }
 
+static char *temporary_sharedindex;
+
+static void remove_temporary_sharedindex(void)
+{
+	if (temporary_sharedindex) {
+		unlink_or_warn(temporary_sharedindex);
+		free(temporary_sharedindex);
+		temporary_sharedindex = NULL;
+	}
+}
+
+static void remove_temporary_sharedindex_on_signal(int signo)
+{
+	remove_temporary_sharedindex();
+	sigchain_pop(signo);
+	raise(signo);
+}
+
+static int write_shared_index(struct index_state *istate)
+{
+	struct split_index *si = istate->split_index;
+	static int installed_handler;
+	int fd, ret;
+
+	temporary_sharedindex = git_pathdup("sharedindex_XXXXXX");
+	fd = xmkstemp(temporary_sharedindex);
+	if (!installed_handler) {
+		atexit(remove_temporary_sharedindex);
+		sigchain_push_common(remove_temporary_sharedindex_on_signal);
+	}
+	move_cache_to_base_index(istate);
+	ret = do_write_index(si->base, fd, 1);
+	close(fd);
+	if (ret) {
+		remove_temporary_sharedindex();
+		return ret;
+	}
+	ret = rename(temporary_sharedindex,
+		     git_path("sharedindex.%s", sha1_to_hex(si->base->sha1)));
+	free(temporary_sharedindex);
+	temporary_sharedindex = NULL;
+	if (!ret)
+		hashcpy(si->base_sha1, si->base->sha1);
+	return ret;
+}
+
 int write_locked_index(struct index_state *istate, struct lock_file *lock,
 		       unsigned flags)
 {
@@ -2020,6 +2069,12 @@ int write_locked_index(struct index_state *istate, struct lock_file *lock,
 		return do_write_locked_index(istate, lock, flags);
 	}
 
+	if (istate->cache_changed & SPLIT_INDEX_ORDERED) {
+		int ret = write_shared_index(istate);
+		if (ret)
+			return ret;
+	}
+
 	return write_split_index(istate, lock, flags);
 }
 
diff --git a/split-index.c b/split-index.c
index ee3246f..21485e2 100644
--- a/split-index.c
+++ b/split-index.c
@@ -74,6 +74,29 @@ static void mark_base_index_entries(struct index_state *base)
 		base->cache[i]->index = i + 1;
 }
 
+void move_cache_to_base_index(struct index_state *istate)
+{
+	struct split_index *si = istate->split_index;
+	int i;
+
+	/*
+	 * do not delete old si->base, its index entries may be shared
+	 * with istate->cache[]. Accept a bit of leaking here because
+	 * this code is only used by short-lived update-index.
+	 */
+	si->base = xcalloc(1, sizeof(*si->base));
+	si->base->version = istate->version;
+	/* zero timestamp disables racy test in ce_write_index() */
+	si->base->timestamp = istate->timestamp;
+	ALLOC_GROW(si->base->cache, istate->cache_nr, si->base->cache_alloc);
+	si->base->cache_nr = istate->cache_nr;
+	memcpy(si->base->cache, istate->cache,
+	       sizeof(*istate->cache) * istate->cache_nr);
+	mark_base_index_entries(si->base);
+	for (i = 0; i < si->base->cache_nr; i++)
+		si->base->cache[i]->ce_flags &= ~CE_UPDATE_IN_BASE;
+}
+
 static void mark_entry_for_delete(size_t pos, void *data)
 {
 	struct index_state *istate = data;
-- 
1.9.1.346.ga2b5940

  parent reply	other threads:[~2014-06-13 12:22 UTC|newest]

Thread overview: 34+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-06-13 12:19 [PATCH 00/32] Split index resend Nguyễn Thái Ngọc Duy
2014-06-13 12:19 ` [PATCH 01/32] ewah: fix constness of ewah_read_mmap Nguyễn Thái Ngọc Duy
2014-06-13 12:19 ` [PATCH 02/32] ewah: delete unused ewah_read_mmap_native declaration Nguyễn Thái Ngọc Duy
2014-06-13 12:19 ` [PATCH 03/32] sequencer: do not update/refresh index if the lock cannot be held Nguyễn Thái Ngọc Duy
2014-06-13 12:19 ` [PATCH 04/32] read-cache: new API write_locked_index instead of write_index/write_cache Nguyễn Thái Ngọc Duy
2014-06-13 12:19 ` [PATCH 05/32] read-cache: relocate and unexport commit_locked_index() Nguyễn Thái Ngọc Duy
2014-06-13 12:19 ` [PATCH 06/32] read-cache: store in-memory flags in the first 12 bits of ce_flags Nguyễn Thái Ngọc Duy
2014-06-13 12:19 ` [PATCH 07/32] read-cache: be strict about "changed" in remove_marked_cache_entries() Nguyễn Thái Ngọc Duy
2014-06-13 12:19 ` [PATCH 08/32] read-cache: be specific what part of the index has changed Nguyễn Thái Ngọc Duy
2014-06-13 12:19 ` [PATCH 09/32] update-index: " Nguyễn Thái Ngọc Duy
2014-06-13 12:19 ` [PATCH 10/32] resolve-undo: " Nguyễn Thái Ngọc Duy
2014-06-13 12:19 ` [PATCH 11/32] unpack-trees: " Nguyễn Thái Ngọc Duy
2014-06-13 12:19 ` [PATCH 12/32] cache-tree: mark istate->cache_changed on cache tree invalidation Nguyễn Thái Ngọc Duy
2014-06-13 12:19 ` [PATCH 13/32] cache-tree: mark istate->cache_changed on cache tree update Nguyễn Thái Ngọc Duy
2014-06-13 12:19 ` [PATCH 14/32] cache-tree: mark istate->cache_changed on prime_cache_tree() Nguyễn Thái Ngọc Duy
2014-06-13 12:19 ` [PATCH 15/32] entry.c: update cache_changed if refresh_cache is set in checkout_entry() Nguyễn Thái Ngọc Duy
2014-06-13 12:19 ` [PATCH 16/32] read-cache: save index SHA-1 after reading Nguyễn Thái Ngọc Duy
2014-06-13 12:19 ` [PATCH 17/32] read-cache: split-index mode Nguyễn Thái Ngọc Duy
2014-06-13 12:19 ` [PATCH 18/32] read-cache: mark new entries for split index Nguyễn Thái Ngọc Duy
2014-06-13 12:19 ` [PATCH 19/32] read-cache: save deleted entries in " Nguyễn Thái Ngọc Duy
2014-06-13 12:19 ` [PATCH 20/32] read-cache: mark updated entries for " Nguyễn Thái Ngọc Duy
2014-06-13 12:19 ` [PATCH 21/32] split-index: the writing part Nguyễn Thái Ngọc Duy
2014-06-13 12:19 ` [PATCH 22/32] split-index: the reading part Nguyễn Thái Ngọc Duy
2014-06-13 12:19 ` [PATCH 23/32] split-index: do not invalidate cache-tree at read time Nguyễn Thái Ngọc Duy
2014-06-13 12:19 ` [PATCH 24/32] split-index: strip pathname of on-disk replaced entries Nguyễn Thái Ngọc Duy
2014-06-13 12:19 ` Nguyễn Thái Ngọc Duy [this message]
2014-06-13 12:19 ` [PATCH 26/32] update-index --split-index: do not split if $GIT_DIR is read only Nguyễn Thái Ngọc Duy
2014-06-13 12:19 ` [PATCH 27/32] rev-parse: add --shared-index-path to get shared index path Nguyễn Thái Ngọc Duy
2014-06-13 12:19 ` [PATCH 28/32] read-tree: force split-index mode off on --index-output Nguyễn Thái Ngọc Duy
2014-06-13 12:19 ` [PATCH 29/32] read-tree: note about dropping split-index mode or index version Nguyễn Thái Ngọc Duy
2014-06-13 12:19 ` [PATCH 30/32] read-cache: force split index mode with GIT_TEST_SPLIT_INDEX Nguyễn Thái Ngọc Duy
2014-06-13 12:19 ` [PATCH 31/32] t2104: make sure split index mode is off for the version test Nguyễn Thái Ngọc Duy
2014-06-13 12:19 ` [PATCH 32/32] t1700: new tests for split-index mode Nguyễn Thái Ngọc Duy
  -- strict thread matches above, loose matches on Subject: below --
2014-04-28 10:55 [PATCH 00/32] Split index mode for very large indexes Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 25/32] update-index: new options to enable/disable split index mode Nguyễn Thái Ngọc Duy

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1402661991-14977-26-git-send-email-pclouds@gmail.com \
    --to=pclouds@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).