From: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
To: git@vger.kernel.org
Cc: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
Subject: [PATCH 25/32] update-index: new options to enable/disable split index mode
Date: Mon, 28 Apr 2014 17:55:46 +0700 [thread overview]
Message-ID: <1398682553-11634-26-git-send-email-pclouds@gmail.com> (raw)
In-Reply-To: <1398682553-11634-1-git-send-email-pclouds@gmail.com>
If you have a large work tree but only make changes in a subset, then
$GIT_DIR/index's size should be stable after a while. If you change
branches that touch something else, $GIT_DIR/index's size may grow
large that it becomes as slow as the unified index. Do --split-index
again occasionally to force all changes back to the shared index and
keep $GIT_DIR/index small.
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
Documentation/git-update-index.txt | 11 +++++++
builtin/update-index.c | 18 ++++++++++
cache.h | 1 +
read-cache.c | 67 ++++++++++++++++++++++++++++++++++----
split-index.c | 23 +++++++++++++
5 files changed, 114 insertions(+), 6 deletions(-)
diff --git a/Documentation/git-update-index.txt b/Documentation/git-update-index.txt
index d6de4a0..dfc09d9 100644
--- a/Documentation/git-update-index.txt
+++ b/Documentation/git-update-index.txt
@@ -161,6 +161,17 @@ may not support it yet.
Only meaningful with `--stdin` or `--index-info`; paths are
separated with NUL character instead of LF.
+--split-index::
+--no-split-index::
+ Enable or disable split index mode. If enabled, the index is
+ split into two files, $GIT_DIR/index and $GIT_DIR/sharedindex.<SHA-1>.
+ Changes are accumulated in $GIT_DIR/index while the shared
+ index file contains all index entries stays unchanged. If
+ split-index mode is already enabled and `--split-index` is
+ given again, all changes in $GIT_DIR/index are pushed back to
+ the shared index file. This mode is designed for very large
+ indexes that take a signficant amount of time to read or write.
+
\--::
Do not interpret any more arguments as options.
diff --git a/builtin/update-index.c b/builtin/update-index.c
index f7a19c4..b0503f4 100644
--- a/builtin/update-index.c
+++ b/builtin/update-index.c
@@ -13,6 +13,7 @@
#include "parse-options.h"
#include "pathspec.h"
#include "dir.h"
+#include "split-index.h"
/*
* Default to not allowing changes to the list of files. The
@@ -742,6 +743,7 @@ int cmd_update_index(int argc, const char **argv, const char *prefix)
char set_executable_bit = 0;
struct refresh_params refresh_args = {0, &has_errors};
int lock_error = 0;
+ int split_index = -1;
struct lock_file *lock_file;
struct parse_opt_ctx_t ctx;
int parseopt_state = PARSE_OPT_UNKNOWN;
@@ -824,6 +826,8 @@ int cmd_update_index(int argc, const char **argv, const char *prefix)
resolve_undo_clear_callback},
OPT_INTEGER(0, "index-version", &preferred_index_format,
N_("write index in this format")),
+ OPT_BOOL(0, "split-index", &split_index,
+ N_("enable or disable split index")),
OPT_END()
};
@@ -917,6 +921,20 @@ int cmd_update_index(int argc, const char **argv, const char *prefix)
strbuf_release(&buf);
}
+ if (split_index > 0) {
+ init_split_index(&the_index);
+ the_index.cache_changed |= SPLIT_INDEX_ORDERED;
+ } else if (!split_index && the_index.split_index) {
+ /*
+ * can't discard_split_index(&the_index); because that
+ * will destroy split_index->base->cache[], which may
+ * be shared with the_index.cache[]. So yeah we're
+ * leaking a bit here.
+ */
+ the_index.split_index = NULL;
+ the_index.cache_changed |= SOMETHING_CHANGED;
+ }
+
if (active_cache_changed) {
if (newfd < 0) {
if (refresh_args.flags & REFRESH_QUIET)
diff --git a/cache.h b/cache.h
index 604328b..42cdfe6 100644
--- a/cache.h
+++ b/cache.h
@@ -278,6 +278,7 @@ static inline unsigned int canon_mode(unsigned int mode)
#define SOMETHING_CHANGED (1 << 3) /* unclassified changes go here */
#define RESOLVE_UNDO_CHANGED (1 << 4)
#define CACHE_TREE_CHANGED (1 << 5)
+#define SPLIT_INDEX_ORDERED (1 << 6)
struct split_index;
struct index_state {
diff --git a/read-cache.c b/read-cache.c
index 81835a6..a6c9407 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -15,6 +15,7 @@
#include "strbuf.h"
#include "varint.h"
#include "split-index.h"
+#include "sigchain.h"
static struct cache_entry *refresh_cache_entry(struct cache_entry *ce,
unsigned int options);
@@ -39,7 +40,8 @@ static struct cache_entry *refresh_cache_entry(struct cache_entry *ce,
/* changes that can be kept in $GIT_DIR/index (basically all extensions) */
#define EXTMASK (RESOLVE_UNDO_CHANGED | CACHE_TREE_CHANGED | \
- CE_ENTRY_ADDED | CE_ENTRY_REMOVED | CE_ENTRY_CHANGED)
+ CE_ENTRY_ADDED | CE_ENTRY_REMOVED | CE_ENTRY_CHANGED | \
+ SPLIT_INDEX_ORDERED)
struct index_state the_index;
static const char *alternate_index_output;
@@ -1860,7 +1862,8 @@ void update_index_if_able(struct index_state *istate, struct lock_file *lockfile
rollback_lock_file(lockfile);
}
-static int do_write_index(struct index_state *istate, int newfd)
+static int do_write_index(struct index_state *istate, int newfd,
+ int strip_extensions)
{
git_SHA_CTX c;
struct cache_header hdr;
@@ -1923,7 +1926,7 @@ static int do_write_index(struct index_state *istate, int newfd)
strbuf_release(&previous_name_buf);
/* Write extension data here */
- if (istate->split_index) {
+ if (!strip_extensions && istate->split_index) {
struct strbuf sb = STRBUF_INIT;
err = write_link_extension(&sb, istate) < 0 ||
@@ -1934,7 +1937,7 @@ static int do_write_index(struct index_state *istate, int newfd)
if (err)
return -1;
}
- if (istate->cache_tree) {
+ if (!strip_extensions && istate->cache_tree) {
struct strbuf sb = STRBUF_INIT;
cache_tree_write(&sb, istate->cache_tree);
@@ -1944,7 +1947,7 @@ static int do_write_index(struct index_state *istate, int newfd)
if (err)
return -1;
}
- if (istate->resolve_undo) {
+ if (!strip_extensions && istate->resolve_undo) {
struct strbuf sb = STRBUF_INIT;
resolve_undo_write(&sb, istate->resolve_undo);
@@ -1985,7 +1988,7 @@ static int commit_locked_index(struct lock_file *lk)
static int do_write_locked_index(struct index_state *istate, struct lock_file *lock,
unsigned flags)
{
- int ret = do_write_index(istate, lock->fd);
+ int ret = do_write_index(istate, lock->fd, 0);
if (ret)
return ret;
assert((flags & (COMMIT_LOCK | CLOSE_LOCK)) !=
@@ -2009,6 +2012,52 @@ static int write_split_index(struct index_state *istate,
return ret;
}
+static char *temporary_sharedindex;
+
+static void remove_temporary_sharedindex(void)
+{
+ if (temporary_sharedindex) {
+ unlink_or_warn(temporary_sharedindex);
+ free(temporary_sharedindex);
+ temporary_sharedindex = NULL;
+ }
+}
+
+static void remove_temporary_sharedindex_on_signal(int signo)
+{
+ remove_temporary_sharedindex();
+ sigchain_pop(signo);
+ raise(signo);
+}
+
+static int write_shared_index(struct index_state *istate)
+{
+ struct split_index *si = istate->split_index;
+ static int installed_handler;
+ int fd, ret;
+
+ temporary_sharedindex = git_pathdup("sharedindex_XXXXXX");
+ fd = xmkstemp(temporary_sharedindex);
+ if (!installed_handler) {
+ atexit(remove_temporary_sharedindex);
+ sigchain_push_common(remove_temporary_sharedindex_on_signal);
+ }
+ move_cache_to_base_index(istate);
+ ret = do_write_index(si->base, fd, 1);
+ close(fd);
+ if (ret) {
+ remove_temporary_sharedindex();
+ return ret;
+ }
+ ret = rename(temporary_sharedindex,
+ git_path("sharedindex.%s", sha1_to_hex(si->base->sha1)));
+ free(temporary_sharedindex);
+ temporary_sharedindex = NULL;
+ if (!ret)
+ hashcpy(si->base_sha1, si->base->sha1);
+ return ret;
+}
+
int write_locked_index(struct index_state *istate, struct lock_file *lock,
unsigned flags)
{
@@ -2020,6 +2069,12 @@ int write_locked_index(struct index_state *istate, struct lock_file *lock,
return do_write_locked_index(istate, lock, flags);
}
+ if (istate->cache_changed & SPLIT_INDEX_ORDERED) {
+ int ret = write_shared_index(istate);
+ if (ret)
+ return ret;
+ }
+
return write_split_index(istate, lock, flags);
}
diff --git a/split-index.c b/split-index.c
index ee3246f..21485e2 100644
--- a/split-index.c
+++ b/split-index.c
@@ -74,6 +74,29 @@ static void mark_base_index_entries(struct index_state *base)
base->cache[i]->index = i + 1;
}
+void move_cache_to_base_index(struct index_state *istate)
+{
+ struct split_index *si = istate->split_index;
+ int i;
+
+ /*
+ * do not delete old si->base, its index entries may be shared
+ * with istate->cache[]. Accept a bit of leaking here because
+ * this code is only used by short-lived update-index.
+ */
+ si->base = xcalloc(1, sizeof(*si->base));
+ si->base->version = istate->version;
+ /* zero timestamp disables racy test in ce_write_index() */
+ si->base->timestamp = istate->timestamp;
+ ALLOC_GROW(si->base->cache, istate->cache_nr, si->base->cache_alloc);
+ si->base->cache_nr = istate->cache_nr;
+ memcpy(si->base->cache, istate->cache,
+ sizeof(*istate->cache) * istate->cache_nr);
+ mark_base_index_entries(si->base);
+ for (i = 0; i < si->base->cache_nr; i++)
+ si->base->cache[i]->ce_flags &= ~CE_UPDATE_IN_BASE;
+}
+
static void mark_entry_for_delete(size_t pos, void *data)
{
struct index_state *istate = data;
--
1.9.1.346.ga2b5940
next prev parent reply other threads:[~2014-04-28 10:57 UTC|newest]
Thread overview: 76+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-04-28 10:55 [PATCH 00/32] Split index mode for very large indexes Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 01/32] ewah: fix constness of ewah_read_mmap Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 02/32] ewah: delete unused ewah_read_mmap_native declaration Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 03/32] sequencer: do not update/refresh index if the lock cannot be held Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 04/32] read-cache: new API write_locked_index instead of write_index/write_cache Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 05/32] read-cache: relocate and unexport commit_locked_index() Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 06/32] read-cache: store in-memory flags in the first 12 bits of ce_flags Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 07/32] read-cache: be strict about "changed" in remove_marked_cache_entries() Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 08/32] read-cache: be specific what part of the index has changed Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 09/32] update-index: " Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 10/32] resolve-undo: " Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 11/32] unpack-trees: " Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 12/32] cache-tree: mark istate->cache_changed on cache tree invalidation Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 13/32] cache-tree: mark istate->cache_changed on cache tree update Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 14/32] cache-tree: mark istate->cache_changed on prime_cache_tree() Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 15/32] entry.c: update cache_changed if refresh_cache is set in checkout_entry() Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 16/32] read-cache: save index SHA-1 after reading Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 17/32] read-cache: split-index mode Nguyễn Thái Ngọc Duy
2014-04-28 22:46 ` Junio C Hamano
2014-04-29 1:43 ` Duy Nguyen
2014-04-29 17:23 ` Junio C Hamano
2014-04-29 22:45 ` Duy Nguyen
2014-04-30 13:57 ` Junio C Hamano
2014-04-28 10:55 ` [PATCH 18/32] read-cache: mark new entries for split index Nguyễn Thái Ngọc Duy
2014-04-30 20:35 ` Eric Sunshine
2014-04-28 10:55 ` [PATCH 19/32] read-cache: save deleted entries in " Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 20/32] read-cache: mark updated entries for " Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 21/32] split-index: the writing part Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 22/32] split-index: the reading part Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 23/32] split-index: do not invalidate cache-tree at read time Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 24/32] split-index: strip pathname of on-disk replaced entries Nguyễn Thái Ngọc Duy
2014-04-29 20:25 ` Junio C Hamano
2014-04-28 10:55 ` Nguyễn Thái Ngọc Duy [this message]
2014-04-28 10:55 ` [PATCH 26/32] update-index --split-index: do not split if $GIT_DIR is read only Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 27/32] rev-parse: add --shared-index-path to get shared index path Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 28/32] read-tree: force split-index mode off on --index-output Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 29/32] read-tree: note about dropping split-index mode or index version Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 30/32] read-cache: force split index mode with GIT_TEST_SPLIT_INDEX Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 31/32] t2104: make sure split index mode is off for the version test Nguyễn Thái Ngọc Duy
2014-04-28 10:55 ` [PATCH 32/32] t1700: new tests for split-index mode Nguyễn Thái Ngọc Duy
2014-04-28 21:18 ` [PATCH 00/32] Split index mode for very large indexes Shawn Pearce
2014-04-29 1:52 ` Duy Nguyen
2014-05-09 10:27 ` Duy Nguyen
2014-05-09 17:55 ` Junio C Hamano
2014-05-13 11:15 ` [PATCH 0/8] Speed up cache loading time Nguyễn Thái Ngọc Duy
2014-05-13 11:15 ` [PATCH 1/8] read-cache: allow to keep mmap'd memory after reading Nguyễn Thái Ngọc Duy
2014-05-13 11:15 ` [PATCH 2/3] Add read-cache--daemon Nguyễn Thái Ngọc Duy
2014-05-13 11:52 ` Erik Faye-Lund
2014-05-13 12:01 ` Duy Nguyen
2014-05-13 13:01 ` Duy Nguyen
2014-05-13 13:37 ` Erik Faye-Lund
2014-05-13 13:49 ` Duy Nguyen
2014-05-13 14:06 ` Erik Faye-Lund
2014-05-13 14:10 ` Duy Nguyen
2014-05-13 14:16 ` Erik Faye-Lund
2014-05-13 11:15 ` [PATCH 2/8] unix-socket: stub impl. for platforms with no unix socket support Nguyễn Thái Ngọc Duy
2014-05-13 11:59 ` Erik Faye-Lund
2014-05-13 12:03 ` Erik Faye-Lund
2014-05-13 11:15 ` [PATCH 3/8] daemonize: set a flag before exiting the main process Nguyễn Thái Ngọc Duy
2014-05-13 11:15 ` [PATCH 3/3] read-cache: try index data from shared memory Nguyễn Thái Ngọc Duy
2014-05-13 11:15 ` [PATCH 4/8] Add read-cache--daemon for caching index and related stuff Nguyễn Thái Ngọc Duy
2014-05-13 11:56 ` Erik Faye-Lund
2014-05-13 11:15 ` [PATCH 5/8] read-cache: try index data from shared memory Nguyễn Thái Ngọc Duy
2014-05-13 12:13 ` Erik Faye-Lund
2014-05-13 11:15 ` [PATCH 6/8] read-cache--daemon: do not read index " Nguyễn Thái Ngọc Duy
2014-05-13 11:15 ` [PATCH 7/8] read-cache: skip verifying trailing SHA-1 on cached index Nguyễn Thái Ngọc Duy
2014-05-13 11:15 ` [PATCH 8/8] read-cache: inform the daemon that the index has been updated Nguyễn Thái Ngọc Duy
2014-05-13 12:17 ` Erik Faye-Lund
2014-05-22 16:38 ` David Turner
2014-05-13 14:24 ` [PATCH 0/8] Speed up cache loading time Stefan Beller
2014-05-13 14:35 ` Duy Nguyen
2014-05-13 11:20 ` [PATCH 9/8] even faster loading time with index version 254 Nguyễn Thái Ngọc Duy
2014-04-28 22:23 ` [PATCH 00/32] Split index mode for very large indexes Junio C Hamano
2014-04-30 20:48 ` Richard Hansen
2014-05-01 0:09 ` Duy Nguyen
-- strict thread matches above, loose matches on Subject: below --
2014-06-13 12:19 [PATCH 00/32] Split index resend Nguyễn Thái Ngọc Duy
2014-06-13 12:19 ` [PATCH 25/32] update-index: new options to enable/disable split index mode Nguyễn Thái Ngọc Duy
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1398682553-11634-26-git-send-email-pclouds@gmail.com \
--to=pclouds@gmail.com \
--cc=git@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.