From: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
To: git@vger.kernel.org
Cc: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
Subject: [PATCH 25/32] update-index: new options to enable/disable split index mode
Date: Mon, 28 Apr 2014 17:55:46 +0700 [thread overview]
Message-ID: <1398682553-11634-26-git-send-email-pclouds@gmail.com> (raw)
In-Reply-To: <1398682553-11634-1-git-send-email-pclouds@gmail.com>
If you have a large work tree but only make changes in a subset, then
$GIT_DIR/index's size should be stable after a while. If you change
branches that touch something else, $GIT_DIR/index's size may grow
large that it becomes as slow as the unified index. Do --split-index
again occasionally to force all changes back to the shared index and
keep $GIT_DIR/index small.
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
Documentation/git-update-index.txt | 11 +++++++
builtin/update-index.c | 18 ++++++++++
cache.h | 1 +
read-cache.c | 67 ++++++++++++++++++++++++++++++++++----
split-index.c | 23 +++++++++++++
5 files changed, 114 insertions(+), 6 deletions(-)
diff --git a/Documentation/git-update-index.txt b/Documentation/git-update-index.txt
index d6de4a0..dfc09d9 100644
--- a/Documentation/git-update-index.txt
+++ b/Documentation/git-update-index.txt
@@ -161,6 +161,17 @@ may not support it yet.
Only meaningful with `--stdin` or `--index-info`; paths are
separated with NUL character instead of LF.
+--split-index::
+--no-split-index::
+ Enable or disable split index mode. If enabled, the index is
+ split into two files, $GIT_DIR/index and $GIT_DIR/sharedindex.<SHA-1>.
+ Changes are accumulated in $GIT_DIR/index while the shared
+ index file contains all index entries stays unchanged. If
+ split-index mode is already enabled and `--split-index` is
+ given again, all changes in $GIT_DIR/index are pushed back to
+ the shared index file. This mode is designed for very large
+ indexes that take a signficant amount of time to read or write.
+
\--::
Do not interpret any more arguments as options.
diff --git a/builtin/update-index.c b/builtin/update-index.c
index f7a19c4..b0503f4 100644
--- a/builtin/update-index.c
+++ b/builtin/update-index.c
@@ -13,6 +13,7 @@
#include "parse-options.h"
#include "pathspec.h"
#include "dir.h"
+#include "split-index.h"
/*
* Default to not allowing changes to the list of files. The
@@ -742,6 +743,7 @@ int cmd_update_index(int argc, const char **argv, const char *prefix)
char set_executable_bit = 0;
struct refresh_params refresh_args = {0, &has_errors};
int lock_error = 0;
+ int split_index = -1;
struct lock_file *lock_file;
struct parse_opt_ctx_t ctx;
int parseopt_state = PARSE_OPT_UNKNOWN;
@@ -824,6 +826,8 @@ int cmd_update_index(int argc, const char **argv, const char *prefix)
resolve_undo_clear_callback},
OPT_INTEGER(0, "index-version", &preferred_index_format,
N_("write index in this format")),
+ OPT_BOOL(0, "split-index", &split_index,
+ N_("enable or disable split index")),
OPT_END()
};
@@ -917,6 +921,20 @@ int cmd_update_index(int argc, const char **argv, const char *prefix)
strbuf_release(&buf);
}
+ if (split_index > 0) {
+ init_split_index(&the_index);
+ the_index.cache_changed |= SPLIT_INDEX_ORDERED;
+ } else if (!split_index && the_index.split_index) {
+ /*
+ * can't discard_split_index(&the_index); because that
+ * will destroy split_index->base->cache[], which may
+ * be shared with the_index.cache[]. So yeah we're
+ * leaking a bit here.
+ */
+ the_index.split_index = NULL;
+ the_index.cache_changed |= SOMETHING_CHANGED;
+ }
+
if (active_cache_changed) {
if (newfd < 0) {
if (refresh_args.flags & REFRESH_QUIET)
diff --git a/cache.h b/cache.h
index 604328b..42cdfe6 100644
--- a/cache.h
+++ b/cache.h
@@ -278,6 +278,7 @@ static inline unsigned int canon_mode(unsigned int mode)
#define SOMETHING_CHANGED (1 << 3) /* unclassified changes go here */
#define RESOLVE_UNDO_CHANGED (1 << 4)
#define CACHE_TREE_CHANGED (1 << 5)
+#define SPLIT_INDEX_ORDERED (1 << 6)
struct split_index;
struct index_state {
diff --git a/read-cache.c b/read-cache.c
index 81835a6..a6c9407 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -15,6 +15,7 @@
#include "strbuf.h"
#include "varint.h"
#include "split-index.h"
+#include "sigchain.h"
static struct cache_entry *refresh_cache_entry(struct cache_entry *ce,
unsigned int options);
@@ -39,7 +40,8 @@ static struct cache_entry *refresh_cache_entry(struct cache_entry *ce,
/* changes that can be kept in $GIT_DIR/index (basically all extensions) */
#define EXTMASK (RESOLVE_UNDO_CHANGED | CACHE_TREE_CHANGED | \
- CE_ENTRY_ADDED | CE_ENTRY_REMOVED | CE_ENTRY_CHANGED)
+ CE_ENTRY_ADDED | CE_ENTRY_REMOVED | CE_ENTRY_CHANGED | \
+ SPLIT_INDEX_ORDERED)
struct index_state the_index;
static const char *alternate_index_output;
@@ -1860,7 +1862,8 @@ void update_index_if_able(struct index_state *istate, struct lock_file *lockfile
rollback_lock_file(lockfile);
}
-static int do_write_index(struct index_state *istate, int newfd)
+static int do_write_index(struct index_state *istate, int newfd,
+ int strip_extensions)
{
git_SHA_CTX c;
struct cache_header hdr;
@@ -1923,7 +1926,7 @@ static int do_write_index(struct index_state *istate, int newfd)
strbuf_release(&previous_name_buf);
/* Write extension data here */
- if (istate->split_index) {
+ if (!strip_extensions && istate->split_index) {
struct strbuf sb = STRBUF_INIT;
err = write_link_extension(&sb, istate) < 0 ||
@@ -1934,7 +1937,7 @@ static int do_write_index(struct index_state *istate, int newfd)
if (err)
return -1;
}
- if (istate->cache_tree) {
+ if (!strip_extensions && istate->cache_tree) {
struct strbuf sb = STRBUF_INIT;
cache_tree_write(&sb, istate->cache_tree);
@@ -1944,7 +1947,7 @@ static int do_write_index(struct index_state *istate, int newfd)
if (err)
return -1;
}
- if (istate->resolve_undo) {
+ if (!strip_extensions && istate->resolve_undo) {
struct strbuf sb = STRBUF_INIT;
resolve_undo_write(&sb, istate->resolve_undo);
@@ -1985,7 +1988,7 @@ static int commit_locked_index(struct lock_file *lk)
static int do_write_locked_index(struct index_state *istate, struct lock_file *lock,
unsigned flags)
{
- int ret = do_write_index(istate, lock->fd);
+ int ret = do_write_index(istate, lock->fd, 0);
if (ret)
return ret;
assert((flags & (COMMIT_LOCK | CLOSE_LOCK)) !=
@@ -2009,6 +2012,52 @@ static int write_split_index(struct index_state *istate,
return ret;
}
+static char *temporary_sharedindex;
+
+static void remove_temporary_sharedindex(void)
+{
+ if (temporary_sharedindex) {
+ unlink_or_warn(temporary_sharedindex);
+ free(temporary_sharedindex);
+ temporary_sharedindex = NULL;
+ }
+}
+
+static void remove_temporary_sharedindex_on_signal(int signo)
+{
+ remove_temporary_sharedindex();
+ sigchain_pop(signo);
+ raise(signo);
+}
+
+static int write_shared_index(struct index_state *istate)
+{
+ struct split_index *si = istate->split_index;
+ static int installed_handler;
+ int fd, ret;
+
+ temporary_sharedindex = git_pathdup("sharedindex_XXXXXX");
+ fd = xmkstemp(temporary_sharedindex);
+ if (!installed_handler) {
+ atexit(remove_temporary_sharedindex);
+ sigchain_push_common(remove_temporary_sharedindex_on_signal);
+ }
+ move_cache_to_base_index(istate);
+ ret = do_write_index(si->base, fd, 1);
+ close(fd);
+ if (ret) {
+ remove_temporary_sharedindex();
+ return ret;
+ }
+ ret = rename(temporary_sharedindex,
+ git_path("sharedindex.%s", sha1_to_hex(si->base->sha1)));
+ free(temporary_sharedindex);
+ temporary_sharedindex = NULL;
+ if (!ret)
+ hashcpy(si->base_sha1, si->base->sha1);
+ return ret;
+}
+
int write_locked_index(struct index_state *istate, struct lock_file *lock,
unsigned flags)
{
@@ -2020,6 +2069,12 @@ int write_locked_index(struct index_state *istate, struct lock_file *lock,
return do_write_locked_index(istate, lock, flags);
}
+ if (istate->cache_changed & SPLIT_INDEX_ORDERED) {
+ int ret = write_shared_index(istate);
+ if (ret)
+ return ret;
+ }
+
return write_split_index(istate, lock, flags);
}
diff --git a/split-index.c b/split-index.c
index ee3246f..21485e2 100644
--- a/split-index.c
+++ b/split-index.c
@@ -74,6 +74,29 @@ static void mark_base_index_entries(struct index_state *base)
base->cache[i]->index = i + 1;
}
+void move_cache_to_base_index(struct index_state *istate)
+{
+ struct split_index *si = istate->split_index;
+ int i;
+
+ /*
+ * do not delete old si->base, its index entries may be shared
+ * with istate->cache[]. Accept a bit of leaking here because
+ * this code is only used by short-lived update-index.
+ */
+ si->base = xcalloc(1, sizeof(*si->base));
+ si->base->version = istate->version;
+ /* zero timestamp disables racy test in ce_write_index() */
+ si->base->timestamp = istate->timestamp;
+ ALLOC_GROW(si->base->cache, istate->cache_nr, si->base->cache_alloc);
+ si->base->cache_nr = istate->cache_nr;
+ memcpy(si->base->cache, istate->cache,
+ sizeof(*istate->cache) * istate->cache_nr);
+ mark_base_index_entries(si->base);
+ for (i = 0; i < si->base->cache_nr; i++)
+ si->base->cache[i]->ce_flags &= ~CE_UPDATE_IN_BASE;
+}
+
static void mark_entry_for_delete(size_t pos, void *data)
{
struct index_state *istate = data;
--
1.9.1.346.ga2b5940
next prev parent reply other threads:[~2014-04-28 10:57 UTC|newest]
Thread overview: 76+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-04-28 10:55 [PATCH 00/32] Split index mode for very large indexes Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 01/32] ewah: fix constness of ewah_read_mmap Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 02/32] ewah: delete unused ewah_read_mmap_native declaration Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 03/32] sequencer: do not update/refresh index if the lock cannot be held Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 04/32] read-cache: new API write_locked_index instead of write_index/write_cache Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 05/32] read-cache: relocate and unexport commit_locked_index() Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 06/32] read-cache: store in-memory flags in the first 12 bits of ce_flags Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 07/32] read-cache: be strict about "changed" in remove_marked_cache_entries() Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 08/32] read-cache: be specific what part of the index has changed Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 09/32] update-index: " Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 10/32] resolve-undo: " Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 11/32] unpack-trees: " Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 12/32] cache-tree: mark istate->cache_changed on cache tree invalidation Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 13/32] cache-tree: mark istate->cache_changed on cache tree update Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 14/32] cache-tree: mark istate->cache_changed on prime_cache_tree() Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 15/32] entry.c: update cache_changed if refresh_cache is set in checkout_entry() Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 16/32] read-cache: save index SHA-1 after reading Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 17/32] read-cache: split-index mode Nguyễn Thái Ngọc Duy
2014-04-28 22:46 ` Junio C Hamano
2014-04-29 1:43 ` Duy Nguyen
2014-04-29 17:23 ` Junio C Hamano
2014-04-29 22:45 ` Duy Nguyen
2014-04-30 13:57 ` Junio C Hamano
2014-04-28 10:55 ` [PATCH 18/32] read-cache: mark new entries for split index Nguyễn Thái Ngọc Duy
2014-04-30 20:35 ` Eric Sunshine
2014-04-28 10:55 ` [PATCH 19/32] read-cache: save deleted entries in " Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 20/32] read-cache: mark updated entries for " Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 21/32] split-index: the writing part Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 22/32] split-index: the reading part Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 23/32] split-index: do not invalidate cache-tree at read time Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 24/32] split-index: strip pathname of on-disk replaced entries Nguyễn Thái Ngọc Duy
2014-04-29 20:25 ` Junio C Hamano
2014-04-28 10:55 ` Nguyễn Thái Ngọc Duy [this message]
2014-04-28 10:55 ` [PATCH 26/32] update-index --split-index: do not split if $GIT_DIR is read only Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 27/32] rev-parse: add --shared-index-path to get shared index path Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 28/32] read-tree: force split-index mode off on --index-output Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 29/32] read-tree: note about dropping split-index mode or index version Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 30/32] read-cache: force split index mode with GIT_TEST_SPLIT_INDEX Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 31/32] t2104: make sure split index mode is off for the version test Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 32/32] t1700: new tests for split-index mode Nguyễn Thái Ngọc Duy
2014-04-28 21:18 ` [PATCH 00/32] Split index mode for very large indexes Shawn Pearce
2014-04-29 1:52 ` Duy Nguyen
2014-05-09 10:27 ` Duy Nguyen
2014-05-09 17:55 ` Junio C Hamano
2014-05-13 11:15 ` [PATCH 0/8] Speed up cache loading time Nguyễn Thái Ngọc Duy
2014-05-13 11:15 ` [PATCH 1/8] read-cache: allow to keep mmap'd memory after reading Nguyễn Thái Ngọc Duy
2014-05-13 11:15 ` [PATCH 2/3] Add read-cache--daemon Nguyễn Thái Ngọc Duy
2014-05-13 11:52 ` Erik Faye-Lund
2014-05-13 12:01 ` Duy Nguyen
2014-05-13 13:01 ` Duy Nguyen
2014-05-13 13:37 ` Erik Faye-Lund
2014-05-13 13:49 ` Duy Nguyen
2014-05-13 14:06 ` Erik Faye-Lund
2014-05-13 14:10 ` Duy Nguyen
2014-05-13 14:16 ` Erik Faye-Lund
2014-05-13 11:15 ` [PATCH 2/8] unix-socket: stub impl. for platforms with no unix socket support Nguyễn Thái Ngọc Duy
2014-05-13 11:59 ` Erik Faye-Lund
2014-05-13 12:03 ` Erik Faye-Lund
2014-05-13 11:15 ` [PATCH 3/8] daemonize: set a flag before exiting the main process Nguyễn Thái Ngọc Duy
2014-05-13 11:15 ` [PATCH 3/3] read-cache: try index data from shared memory Nguyễn Thái Ngọc Duy
2014-05-13 11:15 ` [PATCH 4/8] Add read-cache--daemon for caching index and related stuff Nguyễn Thái Ngọc Duy
2014-05-13 11:56 ` Erik Faye-Lund
2014-05-13 11:15 ` [PATCH 5/8] read-cache: try index data from shared memory Nguyễn Thái Ngọc Duy
2014-05-13 12:13 ` Erik Faye-Lund
2014-05-13 11:15 ` [PATCH 6/8] read-cache--daemon: do not read index " Nguyễn Thái Ngọc Duy
2014-05-13 11:15 ` [PATCH 7/8] read-cache: skip verifying trailing SHA-1 on cached index Nguyễn Thái Ngọc Duy
2014-05-13 11:15 ` [PATCH 8/8] read-cache: inform the daemon that the index has been updated Nguyễn Thái Ngọc Duy
2014-05-13 12:17 ` Erik Faye-Lund
2014-05-22 16:38 ` David Turner
2014-05-13 14:24 ` [PATCH 0/8] Speed up cache loading time Stefan Beller
2014-05-13 14:35 ` Duy Nguyen
2014-05-13 11:20 ` [PATCH 9/8] even faster loading time with index version 254 Nguyễn Thái Ngọc Duy
2014-04-28 22:23 ` [PATCH 00/32] Split index mode for very large indexes Junio C Hamano
2014-04-30 20:48 ` Richard Hansen
2014-05-01 0:09 ` Duy Nguyen
-- strict thread matches above, loose matches on Subject: below --
2014-06-13 12:19 [PATCH 00/32] Split index resend Nguyễn Thái Ngọc Duy
2014-06-13 12:19 ` [PATCH 25/32] update-index: new options to enable/disable split index mode Nguyễn Thái Ngọc Duy
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1398682553-11634-26-git-send-email-pclouds@gmail.com \
--to=pclouds@gmail.com \
--cc=git@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).