git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Thomas Gummerer <t.gummerer@gmail.com>
To: git@vger.kernel.org
Cc: t.gummerer@gmail.com, gitster@pobox.com, tr@thomasrast.ch,
	mhagger@alum.mit.edu, pclouds@gmail.com,
	robin.rosenberg@dewire.com, sunshine@sunshineco.com,
	ramsay@ramsay1.demon.co.uk
Subject: [PATCH v4 14/24] read-cache: read cache-tree in index-v5
Date: Wed, 27 Nov 2013 13:00:49 +0100	[thread overview]
Message-ID: <1385553659-9928-15-git-send-email-t.gummerer@gmail.com> (raw)
In-Reply-To: <1385553659-9928-1-git-send-email-t.gummerer@gmail.com>

Since the cache-tree data is saved as part of the directory data,
we already read it at the beginning of the index. The cache-tree
is only converted from this directory data.

The cache-tree data is arranged in a tree, with the children sorted by
pathlen at each node, while the ondisk format is sorted lexically.
So we have to rebuild this format from the on-disk directory list.

Signed-off-by: Thomas Gummerer <t.gummerer@gmail.com>
---
 cache-tree.c    |  2 +-
 cache-tree.h    |  1 +
 read-cache-v5.c | 68 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 70 insertions(+), 1 deletion(-)

diff --git a/cache-tree.c b/cache-tree.c
index 0bbec43..1209732 100644
--- a/cache-tree.c
+++ b/cache-tree.c
@@ -31,7 +31,7 @@ void cache_tree_free(struct cache_tree **it_p)
 	*it_p = NULL;
 }
 
-static int subtree_name_cmp(const char *one, int onelen,
+int subtree_name_cmp(const char *one, int onelen,
 			    const char *two, int twolen)
 {
 	if (onelen < twolen)
diff --git a/cache-tree.h b/cache-tree.h
index f1923ad..9818926 100644
--- a/cache-tree.h
+++ b/cache-tree.h
@@ -25,6 +25,7 @@ struct cache_tree *cache_tree(void);
 void cache_tree_free(struct cache_tree **);
 void cache_tree_invalidate_path(struct cache_tree *, const char *);
 struct cache_tree_sub *cache_tree_sub(struct cache_tree *, const char *);
+int subtree_name_cmp(const char *, int, const char *, int);
 
 void cache_tree_write(struct strbuf *, struct cache_tree *root);
 struct cache_tree *cache_tree_read(const char *buffer, unsigned long size);
diff --git a/read-cache-v5.c b/read-cache-v5.c
index a9c687f..01f1c88 100644
--- a/read-cache-v5.c
+++ b/read-cache-v5.c
@@ -418,6 +418,73 @@ static int read_index_extension(struct index_state *istate,
 	return 0;
 }
 
+static int compare_cache_tree(const void *a, const void *b)
+{
+	const struct cache_tree_sub *it1, *it2;
+
+	it1 = *(const struct cache_tree_sub **) a;
+	it2 = *(const struct cache_tree_sub **) b;
+	return subtree_name_cmp(it1->name, it1->namelen,
+				it2->name, it2->namelen);
+}
+
+/*
+ * Convert the directory entries to cache-tree entries
+ * recursively.
+ */
+static struct cache_tree *convert_one(struct directory_entry *de)
+{
+	int i;
+	struct cache_tree *it;
+
+	it = cache_tree();
+	it->entry_count = de->de_nentries;
+	if (0 <= it->entry_count)
+		hashcpy(it->sha1, de->sha1);
+
+	/*
+	 * Just a heuristic -- we do not add directories that often but
+	 * we do not want to have to extend it immediately when we do,
+	 * hence +2.
+	 */
+	it->subtree_alloc = de->de_nsubtrees + 2;
+	it->down = xcalloc(it->subtree_alloc, sizeof(struct cache_tree_sub *));
+	for (i = 0; i < de->de_nsubtrees; i++) {
+		struct cache_tree *sub = convert_one(de->sub[i]);
+		struct cache_tree_sub *subtree;
+		/* -1 for removing the / at the end of the pathname */
+		int namelen = de->sub[i]->de_pathlen - de->de_pathlen - 1;
+
+		if (!sub)
+			goto free_return;
+
+		subtree = xmalloc(sizeof(*subtree) + namelen + 1);
+		subtree->cache_tree = sub;
+		subtree->namelen = namelen;
+		memcpy(subtree->name, de->sub[i]->pathname + de->de_pathlen, namelen);
+		subtree->name[namelen] = '\0';
+		it->down[i] = subtree;
+		it->subtree_nr++;
+	}
+	qsort(it->down, it->subtree_nr, sizeof(struct cache_tree_sub *),
+	      compare_cache_tree);
+	return it;
+free_return:
+	cache_tree_free(&it);
+	return NULL;
+}
+
+/*
+ * This function modifies the directory argument that is given to it.
+ * Don't use it if the directory entries are still needed after.
+ */
+static struct cache_tree *cache_tree_convert_v5(struct directory_entry *de)
+{
+	if (!de->de_nentries)
+		return NULL;
+	return convert_one(de);
+}
+
 /*
  * Read all file entries from the index.  This function is recursive to get
  * the ordering right. In the index file the entries are sorted def, abc/def,
@@ -558,6 +625,7 @@ static int read_index_v5(struct index_state *istate, void *mmap,
 				return -1;
 		}
 	}
+	istate->cache_tree = cache_tree_convert_v5(root_directory);
 	free_directory_tree(root_directory);
 	istate->cache_nr = nr;
 	return 0;
-- 
1.8.4.2

  parent reply	other threads:[~2013-11-27 12:02 UTC|newest]

Thread overview: 41+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-11-27 12:00 [PATCH v4 00/24] Index-v5 Thomas Gummerer
2013-11-27 12:00 ` [PATCH v4 01/24] t2104: Don't fail for index versions other than [23] Thomas Gummerer
2013-11-27 12:00 ` [PATCH v4 02/24] read-cache: split index file version specific functionality Thomas Gummerer
2013-11-27 12:00 ` [PATCH v4 03/24] read-cache: move index v2 specific functions to their own file Thomas Gummerer
2013-11-27 12:00 ` [PATCH v4 04/24] read-cache: Re-read index if index file changed Thomas Gummerer
2013-11-27 12:00 ` [PATCH v4 05/24] add documentation for the index api Thomas Gummerer
2013-11-27 12:00 ` [PATCH v4 06/24] read-cache: add index reading api Thomas Gummerer
2013-11-27 12:00 ` [PATCH v4 07/24] make sure partially read index is not changed Thomas Gummerer
2013-11-27 12:00 ` [PATCH v4 08/24] grep.c: use index api Thomas Gummerer
2013-11-27 12:00 ` [PATCH v4 09/24] ls-files.c: " Thomas Gummerer
2013-11-30  9:17   ` Duy Nguyen
2013-11-30 10:30     ` Thomas Gummerer
2013-11-30 15:39   ` Antoine Pelisse
2013-11-30 20:08     ` Thomas Gummerer
2013-11-27 12:00 ` [PATCH v4 10/24] documentation: add documentation of the index-v5 file format Thomas Gummerer
2013-11-27 12:00 ` [PATCH v4 11/24] read-cache: make in-memory format aware of stat_crc Thomas Gummerer
2013-11-27 12:00 ` [PATCH v4 12/24] read-cache: read index-v5 Thomas Gummerer
2013-11-30  9:17   ` Duy Nguyen
2013-11-30 10:40     ` Thomas Gummerer
2013-11-30 12:19   ` Antoine Pelisse
2013-11-30 20:10     ` Thomas Gummerer
2013-11-30 15:26   ` Antoine Pelisse
2013-11-30 20:27     ` Thomas Gummerer
2013-11-27 12:00 ` [PATCH v4 13/24] read-cache: read resolve-undo data Thomas Gummerer
2013-11-27 12:00 ` Thomas Gummerer [this message]
2013-11-27 12:00 ` [PATCH v4 15/24] read-cache: write index-v5 Thomas Gummerer
2013-11-27 12:00 ` [PATCH v4 16/24] read-cache: write index-v5 cache-tree data Thomas Gummerer
2013-11-27 12:00 ` [PATCH v4 17/24] read-cache: write resolve-undo data for index-v5 Thomas Gummerer
2013-11-27 12:00 ` [PATCH v4 18/24] update-index.c: rewrite index when index-version is given Thomas Gummerer
2013-11-27 12:00 ` [PATCH v4 19/24] p0003-index.sh: add perf test for the index formats Thomas Gummerer
2013-11-27 12:00 ` [PATCH v4 20/24] introduce GIT_INDEX_VERSION environment variable Thomas Gummerer
2013-11-27 21:57   ` Eric Sunshine
2013-11-27 22:08     ` Junio C Hamano
2013-11-28  9:57       ` Thomas Gummerer
2013-11-27 12:00 ` [PATCH v4 21/24] test-lib: allow setting the index format version Thomas Gummerer
2013-11-27 12:00 ` [PATCH v4 22/24] t1600: add index v5 specific tests Thomas Gummerer
2013-11-27 12:00 ` [PATCH v4 23/24] POC for partial writing Thomas Gummerer
2013-11-30  9:58   ` Duy Nguyen
2013-11-30 10:50     ` Thomas Gummerer
2013-11-27 12:00 ` [PATCH v4 24/24] perf: add partial writing test Thomas Gummerer
2013-12-09 10:14 ` [PATCH v4 00/24] Index-v5 Thomas Gummerer

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1385553659-9928-15-git-send-email-t.gummerer@gmail.com \
    --to=t.gummerer@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=mhagger@alum.mit.edu \
    --cc=pclouds@gmail.com \
    --cc=ramsay@ramsay1.demon.co.uk \
    --cc=robin.rosenberg@dewire.com \
    --cc=sunshine@sunshineco.com \
    --cc=tr@thomasrast.ch \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).