From: Thomas Gummerer <t.gummerer@gmail.com>
To: git@vger.kernel.org
Cc: t.gummerer@gmail.com, gitster@pobox.com, tr@thomasrast.ch,
mhagger@alum.mit.edu, pclouds@gmail.com,
robin.rosenberg@dewire.com, sunshine@sunshineco.com,
ramsay@ramsay1.demon.co.uk
Subject: [PATCH v4 15/24] read-cache: write index-v5
Date: Wed, 27 Nov 2013 13:00:50 +0100 [thread overview]
Message-ID: <1385553659-9928-16-git-send-email-t.gummerer@gmail.com> (raw)
In-Reply-To: <1385553659-9928-1-git-send-email-t.gummerer@gmail.com>
Write the index version 5 file format to disk. This version doesn't
write the cache-tree data and resolve-undo data to the file.
The main work is done when filtering out the directories from the
current in-memory format, where in the same turn also the conflicts
and the file data is calculated.
Helped-by: Nguyen Thai Ngoc Duy <pclouds@gmail.com>
Helped-by: Thomas Rast <trast@student.ethz.ch>
Signed-off-by: Thomas Gummerer <t.gummerer@gmail.com>
---
cache.h | 1 +
read-cache-v5.c | 431 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-
read-cache.c | 4 +-
read-cache.h | 1 +
4 files changed, 435 insertions(+), 2 deletions(-)
diff --git a/cache.h b/cache.h
index 65171e4..71b98cf 100644
--- a/cache.h
+++ b/cache.h
@@ -138,6 +138,7 @@ struct cache_entry {
unsigned char sha1[20];
uint32_t ce_stat_crc;
struct cache_entry *next; /* used by name_hash */
+ struct cache_entry *next_ce;
char name[FLEX_ARRAY]; /* more */
};
diff --git a/read-cache-v5.c b/read-cache-v5.c
index 01f1c88..797022f 100644
--- a/read-cache-v5.c
+++ b/read-cache-v5.c
@@ -631,9 +631,438 @@ static int read_index_v5(struct index_state *istate, void *mmap,
return 0;
}
+#define WRITE_BUFFER_SIZE 8192
+static unsigned char write_buffer[WRITE_BUFFER_SIZE];
+static unsigned long write_buffer_len;
+
+static int ce_write_flush(int fd)
+{
+ unsigned int buffered = write_buffer_len;
+ if (buffered) {
+ if (write_in_full(fd, write_buffer, buffered) != buffered)
+ return -1;
+ write_buffer_len = 0;
+ }
+ return 0;
+}
+
+static int ce_write(uint32_t *crc, int fd, void *data, unsigned int len)
+{
+ if (crc)
+ *crc = crc32(*crc, (Bytef*)data, len);
+ while (len) {
+ unsigned int buffered = write_buffer_len;
+ unsigned int partial = WRITE_BUFFER_SIZE - buffered;
+ if (partial > len)
+ partial = len;
+ memcpy(write_buffer + buffered, data, partial);
+ buffered += partial;
+ if (buffered == WRITE_BUFFER_SIZE) {
+ write_buffer_len = buffered;
+ if (ce_write_flush(fd))
+ return -1;
+ buffered = 0;
+ }
+ write_buffer_len = buffered;
+ len -= partial;
+ data = (char *) data + partial;
+ }
+ return 0;
+}
+
+static int ce_flush(int fd)
+{
+ unsigned int left = write_buffer_len;
+
+ if (left)
+ write_buffer_len = 0;
+
+ if (write_in_full(fd, write_buffer, left) != left)
+ return -1;
+
+ return 0;
+}
+
+static void ce_smudge_racily_clean_entry(struct cache_entry *ce)
+{
+ /*
+ * This method shall only be called if the timestamp of ce
+ * is racy (check with is_racy_timestamp). If the timestamp
+ * is racy, the writer will set the CE_SMUDGED flag.
+ *
+ * The reader (match_stat_basic) will then take care
+ * of checking if the entry is really changed or not, by
+ * taking into account the size and the stat_crc and if
+ * that hasn't changed checking the sha1.
+ */
+ ce->ce_flags |= CE_SMUDGED;
+}
+
+static char *super_directory(char *filename)
+{
+ char *super = dirname(filename);
+ if (!strcmp(super, "."))
+ return NULL;
+ return super;
+}
+
+static void ondisk_from_directory_entry(struct directory_entry *de,
+ struct ondisk_directory_entry *ondisk)
+{
+ ondisk->foffset = htonl(de->de_foffset);
+ ondisk->nsubtrees = htonl(de->de_nsubtrees);
+ ondisk->nfiles = htonl(de->de_nfiles);
+ ondisk->nentries = htonl(de->de_nentries);
+ hashcpy(ondisk->sha1, de->sha1);
+ ondisk->flags = htons(de->de_flags);
+ if (de->de_pathlen == 0) {
+ memcpy(ondisk->name, "\0", 1);
+ } else {
+ memcpy(ondisk->name, de->pathname, de->de_pathlen);
+ memcpy(ondisk->name + de->de_pathlen, "/\0", 2);
+ }
+}
+
+static void insert_directory_entry(struct directory_entry *de,
+ struct hash_table *table,
+ unsigned int *total_dir_len,
+ unsigned int *ndir,
+ uint32_t crc)
+{
+ struct directory_entry *insert;
+
+ insert = (struct directory_entry *)insert_hash(crc, de, table);
+ if (insert) {
+ de->next_hash = insert->next_hash;
+ insert->next_hash = de;
+ }
+ (*ndir)++;
+ if (de->de_pathlen == 0)
+ (*total_dir_len)++;
+ else
+ *total_dir_len += de->de_pathlen + 2;
+}
+
+static struct directory_entry *find_directory(char *dir, int dir_len, uint32_t *crc,
+ struct hash_table *table)
+{
+ struct directory_entry *search;
+
+ *crc = crc32(0, (Bytef*)dir, dir_len);
+ search = lookup_hash(*crc, table);
+ while (search &&
+ cache_name_compare(dir, dir_len, search->pathname, search->de_pathlen))
+ search = search->next_hash;
+ return search;
+}
+
+static struct directory_entry *get_directory(char *dir, unsigned int dir_len,
+ struct hash_table *table,
+ unsigned int *total_dir_len,
+ unsigned int *ndir,
+ struct directory_entry **current)
+{
+ struct directory_entry *tmp = NULL, *search, *new, *ret;
+ uint32_t crc;
+
+ search = find_directory(dir, dir_len, &crc, table);
+ if (search)
+ return search;
+ while (!search) {
+ new = init_directory_entry(dir, dir_len);
+ insert_directory_entry(new, table, total_dir_len, ndir, crc);
+ if (!tmp)
+ ret = new;
+ else
+ new->de_nsubtrees = 1;
+ new->next = tmp;
+ tmp = new;
+ dir = super_directory(dir);
+ dir_len = dir ? strlen(dir) : 0;
+ search = find_directory(dir, dir_len, &crc, table);
+ }
+ search->de_nsubtrees++;
+ (*current)->next = tmp;
+ while ((*current)->next)
+ *current = (*current)->next;
+
+ return ret;
+}
+
+static void ce_queue_push(struct cache_entry **head,
+ struct cache_entry **tail,
+ struct cache_entry *ce)
+{
+ if (!*head) {
+ *head = *tail = ce;
+ (*tail)->next_ce = NULL;
+ return;
+ }
+
+ (*tail)->next_ce = ce;
+ ce->next_ce = NULL;
+ *tail = (*tail)->next_ce;
+}
+
+static struct directory_entry *compile_directory_data(struct index_state *istate,
+ int nfile, unsigned int *ndir,
+ unsigned int *total_dir_len,
+ unsigned int *total_file_len)
+{
+ int i, dir_len = -1;
+ char *dir;
+ struct directory_entry *de, *current, *search;
+ struct cache_entry **cache = istate->cache;
+ struct hash_table table;
+ uint32_t crc;
+
+ init_hash(&table);
+ de = init_directory_entry("", 0);
+ current = de;
+ *ndir = 1;
+ *total_dir_len = 1;
+ crc = crc32(0, (Bytef*)de->pathname, de->de_pathlen);
+ insert_hash(crc, de, &table);
+ for (i = 0; i < nfile; i++) {
+ if (cache[i]->ce_flags & CE_REMOVE)
+ continue;
+
+ if (dir_len < 0
+ || !(!(dir_len < ce_namelen(cache[i]) && cache[i]->name[dir_len] != '/')
+ && !strchr(cache[i]->name + dir_len + 1, '/')
+ && !cache_name_compare(cache[i]->name, ce_namelen(cache[i]),
+ dir, dir_len))) {
+ dir = super_directory(strdup(cache[i]->name));
+ dir_len = dir ? strlen(dir) : 0;
+ search = get_directory(dir, dir_len, &table,
+ total_dir_len, ndir,
+ ¤t);
+ }
+ search->de_nfiles++;
+ *total_file_len += ce_namelen(cache[i]) + 1;
+ if (search->de_pathlen)
+ *total_file_len -= search->de_pathlen + 1;
+ ce_queue_push(&(search->ce), &(search->ce_last), cache[i]);
+ }
+ return de;
+}
+
+static void ondisk_from_cache_entry(struct cache_entry *ce,
+ struct ondisk_cache_entry *ondisk,
+ int pathlen)
+{
+ unsigned int flags;
+
+ flags = ce->ce_flags & CE_STAGEMASK;
+ flags |= ce->ce_flags & CE_VALID;
+ flags |= ce->ce_flags & CE_SMUDGED;
+ if (ce->ce_flags & CE_INTENT_TO_ADD)
+ flags |= CE_INTENT_TO_ADD_V5;
+ if (ce->ce_flags & CE_SKIP_WORKTREE)
+ flags |= CE_SKIP_WORKTREE_V5;
+ ondisk->flags = htons(flags);
+ ondisk->mode = htons(ce->ce_mode);
+ ondisk->mtime.sec = htonl(ce->ce_stat_data.sd_mtime.sec);
+#ifdef USE_NSEC
+ ondisk->mtime.nsec = htonl(ce->ce_stat_data.sd_mtime.nsec);
+#else
+ ondisk->mtime.nsec = 0;
+#endif
+ ondisk->size = htonl(ce->ce_stat_data.sd_size);
+ if (!ce->ce_stat_crc)
+ ce->ce_stat_crc = calculate_stat_crc(ce);
+ ondisk->stat_crc = htonl(ce->ce_stat_crc);
+ hashcpy(ondisk->sha1, ce->sha1);
+ memcpy(ondisk->name, ce->name + pathlen, ce_namelen(ce) - pathlen);
+ ondisk->name[ce_namelen(ce) - pathlen] = '\0';
+}
+
+static int write_directories(struct directory_entry *de, int fd)
+{
+ struct directory_entry *current;
+ struct ondisk_directory_entry *ondisk;
+ int current_offset, offset_write, ondisk_size, foffset;
+ uint32_t crc;
+
+ ondisk_size = offsetof(struct ondisk_directory_entry, name);
+ current = de;
+ current_offset = 0;
+ foffset = 0;
+ /* Write directory offsets */
+ while (current) {
+ int pathlen;
+
+ offset_write = htonl(current_offset);
+ if (ce_write(NULL, fd, &offset_write, 4) < 0)
+ return -1;
+ if (current->de_pathlen == 0)
+ pathlen = 0;
+ else
+ pathlen = current->de_pathlen + 1;
+ current_offset += pathlen + 1 + ondisk_size + 4;
+ current = current->next;
+ }
+ /*
+ * Write one more offset, which points to the end of the entries,
+ * because we use it for calculating the dir length, instead of
+ * using strlen.
+ */
+ offset_write = htonl(current_offset);
+ if (ce_write(NULL, fd, &offset_write, 4) < 0)
+ return -1;
+ current = de;
+ /* Write directory entries */
+ while (current) {
+ int size = ondisk_size + current->de_pathlen + 1;
+
+ crc = 0;
+ current->de_foffset = foffset;
+ if (current->de_pathlen != 0)
+ size++;
+ ondisk = xmalloc(size);
+ ondisk_from_directory_entry(current, ondisk);
+ if (ce_write(&crc, fd, ondisk, size) < 0)
+ return -1;
+ crc = htonl(crc);
+ if (ce_write(NULL, fd, &crc, 4) < 0)
+ return -1;
+ foffset += current->de_nfiles * 4;
+ free(ondisk);
+ current = current->next;
+ }
+ return 0;
+}
+
+static int write_entries(struct index_state *istate,
+ struct directory_entry *de,
+ int entries,
+ int fd)
+{
+ int offset, offset_write, ondisk_size;
+ struct directory_entry *current;
+
+ offset = 0;
+ ondisk_size = offsetof(struct ondisk_cache_entry, name);
+ current = de;
+ /* Write cache entry offsets */
+ while (current) {
+ int pathlen;
+ struct cache_entry *ce = current->ce;
+
+ pathlen = current->de_pathlen ? current->de_pathlen + 1 : 0;
+ while (ce) {
+ if (!ce_uptodate(ce) && is_racy_timestamp(istate, ce))
+ ce_smudge_racily_clean_entry(ce);
+ if (is_null_sha1(ce->sha1)) {
+ static const char msg[] = "cache entry has null sha1: %s";
+ static int allow = -1;
+
+ if (allow < 0)
+ allow = git_env_bool("GIT_ALLOW_NULL_SHA1", 0);
+ if (allow)
+ warning(msg, ce->name);
+ else
+ return error(msg, ce->name);
+ }
+ offset_write = htonl(offset);
+ if (ce_write(NULL, fd, &offset_write, 4) < 0)
+ return -1;
+ offset += ce_namelen(ce) - pathlen + 1 + ondisk_size + 4;
+ ce = ce->next_ce;
+ }
+ current = current->next;
+ }
+ /*
+ * Write one more offset, which points to the end of the entries,
+ * because we use it for calculating the file length, instead of
+ * using strlen.
+ */
+ offset_write = htonl(offset);
+ if (ce_write(NULL, fd, &offset_write, 4) < 0)
+ return -1;
+
+ current = de;
+ /* Write cache entries */
+ while (current) {
+ int pathlen;
+ struct cache_entry *ce = current->ce;
+
+ pathlen = current->de_pathlen ? current->de_pathlen + 1 : 0;
+ while (ce) {
+ int size = offsetof(struct ondisk_cache_entry, name) +
+ ce_namelen(ce) - pathlen + 1;
+ struct ondisk_cache_entry *ondisk = xmalloc(size);
+ uint32_t crc;
+
+ crc = 0;
+ ondisk_from_cache_entry(ce, ondisk, pathlen);
+ if (ce_write(&crc, fd, ondisk, size) < 0)
+ return -1;
+ crc = htonl(crc);
+ if (ce_write(NULL, fd, &crc, 4) < 0)
+ return -1;
+ offset += 4;
+ ce = ce->next_ce;
+ }
+ current = current->next;
+ }
+ return 0;
+}
+
+static int write_index_v5(struct index_state *istate, int newfd)
+{
+ struct cache_header hdr;
+ struct cache_header_v5 hdr_v5;
+ struct cache_entry **cache = istate->cache;
+ struct directory_entry *de;
+ unsigned int entries = istate->cache_nr;
+ unsigned int i, removed, total_dir_len;
+ unsigned int total_file_len, foffsetblock;
+ unsigned int ndir;
+ uint32_t crc;
+
+ if (istate->filter_opts)
+ die("BUG: index: cannot write a partially read index");
+
+ for (i = removed = 0; i < entries; i++) {
+ if (cache[i]->ce_flags & CE_REMOVE)
+ removed++;
+ }
+ hdr.hdr_signature = htonl(CACHE_SIGNATURE);
+ hdr.hdr_version = htonl(istate->version);
+ hdr.hdr_entries = htonl(entries - removed);
+ hdr_v5.hdr_nextension = htonl(0); /* Currently no extensions are supported */
+
+ total_dir_len = 0;
+ total_file_len = 0;
+ de = compile_directory_data(istate, entries, &ndir,
+ &total_dir_len, &total_file_len);
+ hdr_v5.hdr_ndir = htonl(ndir);
+
+ foffsetblock = sizeof(hdr) + sizeof(hdr_v5) + 4
+ + (ndir + 1) * 4
+ + total_dir_len
+ + ndir * (offsetof(struct ondisk_directory_entry, name) + 4);
+ hdr_v5.hdr_fblockoffset = htonl(foffsetblock + (entries - removed + 1) * 4);
+ crc = 0;
+ if (ce_write(&crc, newfd, &hdr, sizeof(hdr)) < 0)
+ return -1;
+ if (ce_write(&crc, newfd, &hdr_v5, sizeof(hdr_v5)) < 0)
+ return -1;
+ crc = htonl(crc);
+ if (ce_write(NULL, newfd, &crc, 4) < 0)
+ return -1;
+
+ if (write_directories(de, newfd) < 0)
+ return -1;
+ if (write_entries(istate, de, entries, newfd) < 0)
+ return -1;
+ return ce_flush(newfd);
+}
+
struct index_ops v5_ops = {
match_stat_basic,
verify_hdr,
read_index_v5,
- NULL
+ write_index_v5
};
diff --git a/read-cache.c b/read-cache.c
index baa052c..46551af 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -106,7 +106,7 @@ int match_stat_data(const struct stat_data *sd, struct stat *st)
return changed;
}
-static uint32_t calculate_stat_crc(struct cache_entry *ce)
+uint32_t calculate_stat_crc(struct cache_entry *ce)
{
unsigned int ctimens = 0;
uint32_t stat, stat_crc;
@@ -227,6 +227,8 @@ static void set_istate_ops(struct index_state *istate)
{
if (istate->version >= 2 && istate->version <= 4)
istate->ops = &v2_ops;
+ if (istate->version == 5)
+ istate->ops = &v5_ops;
}
int ce_match_stat_basic(const struct index_state *istate,
diff --git a/read-cache.h b/read-cache.h
index 7823fbb..9d66df6 100644
--- a/read-cache.h
+++ b/read-cache.h
@@ -61,5 +61,6 @@ extern int ce_match_stat_basic(const struct index_state *istate,
const struct cache_entry *ce, struct stat *st);
extern int is_racy_timestamp(const struct index_state *istate, const struct cache_entry *ce);
extern void set_index_entry(struct index_state *istate, int nr, struct cache_entry *ce);
+extern uint32_t calculate_stat_crc(struct cache_entry *ce);
#endif
--
1.8.4.2
next prev parent reply other threads:[~2013-11-27 12:08 UTC|newest]
Thread overview: 41+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-11-27 12:00 [PATCH v4 00/24] Index-v5 Thomas Gummerer
2013-11-27 12:00 ` [PATCH v4 01/24] t2104: Don't fail for index versions other than [23] Thomas Gummerer
2013-11-27 12:00 ` [PATCH v4 02/24] read-cache: split index file version specific functionality Thomas Gummerer
2013-11-27 12:00 ` [PATCH v4 03/24] read-cache: move index v2 specific functions to their own file Thomas Gummerer
2013-11-27 12:00 ` [PATCH v4 04/24] read-cache: Re-read index if index file changed Thomas Gummerer
2013-11-27 12:00 ` [PATCH v4 05/24] add documentation for the index api Thomas Gummerer
2013-11-27 12:00 ` [PATCH v4 06/24] read-cache: add index reading api Thomas Gummerer
2013-11-27 12:00 ` [PATCH v4 07/24] make sure partially read index is not changed Thomas Gummerer
2013-11-27 12:00 ` [PATCH v4 08/24] grep.c: use index api Thomas Gummerer
2013-11-27 12:00 ` [PATCH v4 09/24] ls-files.c: " Thomas Gummerer
2013-11-30 9:17 ` Duy Nguyen
2013-11-30 10:30 ` Thomas Gummerer
2013-11-30 15:39 ` Antoine Pelisse
2013-11-30 20:08 ` Thomas Gummerer
2013-11-27 12:00 ` [PATCH v4 10/24] documentation: add documentation of the index-v5 file format Thomas Gummerer
2013-11-27 12:00 ` [PATCH v4 11/24] read-cache: make in-memory format aware of stat_crc Thomas Gummerer
2013-11-27 12:00 ` [PATCH v4 12/24] read-cache: read index-v5 Thomas Gummerer
2013-11-30 9:17 ` Duy Nguyen
2013-11-30 10:40 ` Thomas Gummerer
2013-11-30 12:19 ` Antoine Pelisse
2013-11-30 20:10 ` Thomas Gummerer
2013-11-30 15:26 ` Antoine Pelisse
2013-11-30 20:27 ` Thomas Gummerer
2013-11-27 12:00 ` [PATCH v4 13/24] read-cache: read resolve-undo data Thomas Gummerer
2013-11-27 12:00 ` [PATCH v4 14/24] read-cache: read cache-tree in index-v5 Thomas Gummerer
2013-11-27 12:00 ` Thomas Gummerer [this message]
2013-11-27 12:00 ` [PATCH v4 16/24] read-cache: write index-v5 cache-tree data Thomas Gummerer
2013-11-27 12:00 ` [PATCH v4 17/24] read-cache: write resolve-undo data for index-v5 Thomas Gummerer
2013-11-27 12:00 ` [PATCH v4 18/24] update-index.c: rewrite index when index-version is given Thomas Gummerer
2013-11-27 12:00 ` [PATCH v4 19/24] p0003-index.sh: add perf test for the index formats Thomas Gummerer
2013-11-27 12:00 ` [PATCH v4 20/24] introduce GIT_INDEX_VERSION environment variable Thomas Gummerer
2013-11-27 21:57 ` Eric Sunshine
2013-11-27 22:08 ` Junio C Hamano
2013-11-28 9:57 ` Thomas Gummerer
2013-11-27 12:00 ` [PATCH v4 21/24] test-lib: allow setting the index format version Thomas Gummerer
2013-11-27 12:00 ` [PATCH v4 22/24] t1600: add index v5 specific tests Thomas Gummerer
2013-11-27 12:00 ` [PATCH v4 23/24] POC for partial writing Thomas Gummerer
2013-11-30 9:58 ` Duy Nguyen
2013-11-30 10:50 ` Thomas Gummerer
2013-11-27 12:00 ` [PATCH v4 24/24] perf: add partial writing test Thomas Gummerer
2013-12-09 10:14 ` [PATCH v4 00/24] Index-v5 Thomas Gummerer
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1385553659-9928-16-git-send-email-t.gummerer@gmail.com \
--to=t.gummerer@gmail.com \
--cc=git@vger.kernel.org \
--cc=gitster@pobox.com \
--cc=mhagger@alum.mit.edu \
--cc=pclouds@gmail.com \
--cc=ramsay@ramsay1.demon.co.uk \
--cc=robin.rosenberg@dewire.com \
--cc=sunshine@sunshineco.com \
--cc=tr@thomasrast.ch \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).