From: Jeff Hostetler <jeffhost@microsoft.com>
To: git@vger.kernel.org
Cc: jeffhost@microsoft.com, peff@peff.net, gitster@pobox.com,
markbt@efaref.net, benpeart@microsoft.com,
jonathantanmy@google.com, Jeff Hostetler <git@jeffhostetler.com>
Subject: [PATCH 10/10] ls-partial: created command to list missing blobs
Date: Wed, 8 Mar 2017 17:38:05 +0000 [thread overview]
Message-ID: <1488994685-37403-11-git-send-email-jeffhost@microsoft.com> (raw)
In-Reply-To: <1488994685-37403-1-git-send-email-jeffhost@microsoft.com>
From: Jeff Hostetler <git@jeffhostetler.com>
Added a command to list the missing blobs for a commit.
This can be used after a partial clone or fetch to list
the omitted blobs that the client would need to checkout
the given commit/branch. Optionally respecting or ignoring
the current sparse-checkout definition.
This command prints a simple list of blob SHAs. It is
expected that this would be piped into another command
with knowledge of the transport and/or blob store.
Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com>
---
Makefile | 2 +
builtin.h | 1 +
builtin/ls-partial.c | 110 ++++++++++++++++++++
git.c | 1 +
partial-utils.c | 279 +++++++++++++++++++++++++++++++++++++++++++++++++++
partial-utils.h | 93 +++++++++++++++++
6 files changed, 486 insertions(+)
create mode 100644 builtin/ls-partial.c
create mode 100644 partial-utils.c
create mode 100644 partial-utils.h
diff --git a/Makefile b/Makefile
index 9ec6065..96e9e1e 100644
--- a/Makefile
+++ b/Makefile
@@ -791,6 +791,7 @@ LIB_OBJS += pack-write.o
LIB_OBJS += pager.o
LIB_OBJS += parse-options.o
LIB_OBJS += parse-options-cb.o
+LIB_OBJS += partial-utils.o
LIB_OBJS += patch-delta.o
LIB_OBJS += patch-ids.o
LIB_OBJS += path.o
@@ -908,6 +909,7 @@ BUILTIN_OBJS += builtin/init-db.o
BUILTIN_OBJS += builtin/interpret-trailers.o
BUILTIN_OBJS += builtin/log.o
BUILTIN_OBJS += builtin/ls-files.o
+BUILTIN_OBJS += builtin/ls-partial.o
BUILTIN_OBJS += builtin/ls-remote.o
BUILTIN_OBJS += builtin/ls-tree.o
BUILTIN_OBJS += builtin/mailinfo.o
diff --git a/builtin.h b/builtin.h
index 9e4a898..df00c4b 100644
--- a/builtin.h
+++ b/builtin.h
@@ -79,6 +79,7 @@ extern int cmd_interpret_trailers(int argc, const char **argv, const char *prefi
extern int cmd_log(int argc, const char **argv, const char *prefix);
extern int cmd_log_reflog(int argc, const char **argv, const char *prefix);
extern int cmd_ls_files(int argc, const char **argv, const char *prefix);
+extern int cmd_ls_partial(int argc, const char **argv, const char *prefix);
extern int cmd_ls_tree(int argc, const char **argv, const char *prefix);
extern int cmd_ls_remote(int argc, const char **argv, const char *prefix);
extern int cmd_mailinfo(int argc, const char **argv, const char *prefix);
diff --git a/builtin/ls-partial.c b/builtin/ls-partial.c
new file mode 100644
index 0000000..8ebf045
--- /dev/null
+++ b/builtin/ls-partial.c
@@ -0,0 +1,110 @@
+#include "cache.h"
+#include "blob.h"
+#include "tree.h"
+#include "commit.h"
+#include "quote.h"
+#include "builtin.h"
+#include "parse-options.h"
+#include "pathspec.h"
+#include "dir.h"
+#include "partial-utils.h"
+
+static struct trace_key trace_partial = TRACE_KEY_INIT(PARTIAL);
+
+static int verbose;
+static int ignore_sparse;
+struct exclude_list el;
+
+static const char * const ls_partial_usage[] = {
+ N_("git ls-partial [<options>] <tree-ish>"),
+ NULL
+};
+
+/*
+ * map <tree-ish> arg into SHA1 and get the root treenode.
+ */
+static struct tree *lookup_tree_from_treeish(const char *arg)
+{
+ unsigned char sha1[20];
+ struct tree *tree;
+
+ if (get_sha1(arg, sha1))
+ die("not a valid object name '%s'", arg);
+
+ trace_printf_key(
+ &trace_partial,
+ "ls-partial: treeish '%s' '%s'\n",
+ arg, sha1_to_hex(sha1));
+
+ if (verbose) {
+ printf("commit\t%s\n", sha1_to_hex(sha1));
+ printf("branch\t%s\n", arg);
+ }
+
+ tree = parse_tree_indirect(sha1);
+ if (!tree)
+ die("not a tree object '%s'", arg);
+
+ return tree;
+}
+
+static void print_results(const struct pu_vec *vec)
+{
+ int k;
+
+ for (k = 0; k < vec->data_nr; k++)
+ printf("%s\n", oid_to_hex(&vec->data[k]->oid));
+}
+
+static void print_results_verbose(const struct pu_vec *vec)
+{
+ int k;
+
+ /* TODO Consider -z version */
+
+ for (k = 0; k < vec->data_nr; k++)
+ printf("%s\t%s\n", oid_to_hex(&vec->data[k]->oid), vec->data[k]->fullpath.buf);
+}
+
+int cmd_ls_partial(int argc, const char **argv, const char *prefix)
+{
+ struct exclude_list el;
+ struct tree *tree;
+ struct pu_vec *vec;
+ struct pu_vec *vec_all = NULL;
+ struct pu_vec *vec_sparse = NULL;
+ struct pu_vec *vec_missing = NULL;
+
+ const struct option ls_partial_options[] = {
+ OPT__VERBOSE(&verbose, N_("show verbose blob details")),
+ OPT_BOOL(0, "ignore-sparse", &ignore_sparse,
+ N_("ignore sparse-checkout settings (scan whole tree)")),
+ OPT_END()
+ };
+
+ git_config(git_default_config, NULL);
+ argc = parse_options(argc, argv, prefix,
+ ls_partial_options, ls_partial_usage, 0);
+ if (argc < 1)
+ usage_with_options(ls_partial_usage, ls_partial_options);
+
+ tree = lookup_tree_from_treeish(argv[0]);
+
+ vec_all = pu_vec_ls_tree(tree, prefix, argv + 1);
+ if (ignore_sparse || pu_load_sparse_definitions("info/sparse-checkout", &el) < 0)
+ vec = vec_all;
+ else {
+ vec_sparse = pu_vec_filter_sparse(vec_all, &el);
+ vec = vec_sparse;
+ }
+
+ vec_missing = pu_vec_filter_missing(vec);
+ vec = vec_missing;
+
+ if (verbose)
+ print_results_verbose(vec);
+ else
+ print_results(vec);
+
+ return 0;
+}
diff --git a/git.c b/git.c
index 33f52ac..ef1e019 100644
--- a/git.c
+++ b/git.c
@@ -444,6 +444,7 @@ static struct cmd_struct commands[] = {
{ "interpret-trailers", cmd_interpret_trailers, RUN_SETUP_GENTLY },
{ "log", cmd_log, RUN_SETUP },
{ "ls-files", cmd_ls_files, RUN_SETUP | SUPPORT_SUPER_PREFIX },
+ { "ls-partial", cmd_ls_partial, RUN_SETUP },
{ "ls-remote", cmd_ls_remote, RUN_SETUP_GENTLY },
{ "ls-tree", cmd_ls_tree, RUN_SETUP },
{ "mailinfo", cmd_mailinfo, RUN_SETUP_GENTLY },
diff --git a/partial-utils.c b/partial-utils.c
new file mode 100644
index 0000000..b75e91e
--- /dev/null
+++ b/partial-utils.c
@@ -0,0 +1,279 @@
+#include "cache.h"
+#include "blob.h"
+#include "tree.h"
+#include "commit.h"
+#include "quote.h"
+#include "builtin.h"
+#include "parse-options.h"
+#include "pathspec.h"
+#include "dir.h"
+#include "partial-utils.h"
+
+static struct trace_key trace_partial_utils = TRACE_KEY_INIT(PARTIAL_UTILS);
+
+void pu_row_trace(
+ const struct pu_row *row,
+ const char *label)
+{
+ trace_printf_key(
+ &trace_partial_utils,
+ "%s: %06o %s %.*s\n",
+ label,
+ row->mode,
+ oid_to_hex(&row->oid),
+ (int)row->fullpath.len,
+ row->fullpath.buf);
+}
+
+struct pu_row *pu_row_alloc(
+ const unsigned char *sha1,
+ const struct strbuf *base,
+ const char *entryname,
+ unsigned mode)
+{
+ struct pu_row *row = xcalloc(1, sizeof(struct pu_row));
+
+ hashcpy(row->oid.hash, sha1);
+ strbuf_init(&row->fullpath, base->len + strlen(entryname) + 1);
+ if (base->len)
+ strbuf_addbuf(&row->fullpath, base);
+ strbuf_addstr(&row->fullpath, entryname);
+ row->mode = mode;
+ row->entryname_offset = base->len;
+
+ pu_row_trace(row, "alloc");
+
+ return row;
+}
+
+struct pu_vec *pu_vec_alloc(
+ unsigned int nr_pre_alloc)
+{
+ struct pu_vec *vec = xcalloc(1, sizeof(struct pu_vec));
+
+ vec->data = xcalloc(nr_pre_alloc, sizeof(struct pu_row *));
+ vec->data_alloc = nr_pre_alloc;
+
+ return vec;
+}
+
+void pu_vec_append(
+ struct pu_vec *vec,
+ struct pu_row *row)
+{
+ ALLOC_GROW(vec->data, vec->data_nr + 1, vec->data_alloc);
+ vec->data[vec->data_nr++] = row;
+}
+
+static int ls_tree_cb(
+ const unsigned char *sha1,
+ struct strbuf *base,
+ const char *pathname,
+ unsigned mode,
+ int stage,
+ void *context)
+{
+ struct pu_vec *vec = (struct pu_vec *)context;
+
+ /* omit submodules */
+ if (S_ISGITLINK(mode))
+ return 0;
+
+ pu_vec_append(vec, pu_row_alloc(sha1, base, pathname, mode));
+
+ if (S_ISDIR(mode))
+ return READ_TREE_RECURSIVE;
+
+ return 0;
+}
+
+struct pu_vec *pu_vec_ls_tree(
+ struct tree *tree,
+ const char *prefix,
+ const char **argv)
+{
+ struct pu_vec *vec;
+ struct pathspec pathspec;
+ int k;
+
+ vec = pu_vec_alloc(PU_VEC_DEFAULT_SIZE);
+
+ parse_pathspec(
+ &pathspec, PATHSPEC_GLOB | PATHSPEC_ICASE | PATHSPEC_EXCLUDE,
+ PATHSPEC_PREFER_CWD, prefix, argv);
+ for (k = 0; k < pathspec.nr; k++)
+ pathspec.items[k].nowildcard_len = pathspec.items[k].len;
+ pathspec.has_wildcard = 0;
+
+ if (read_tree_recursive(tree, "", 0, 0, &pathspec, ls_tree_cb, vec) != 0)
+ die("Could not read tree");
+
+ return vec;
+}
+
+int pu_load_sparse_definitions(
+ const char *path,
+ struct exclude_list *pel)
+{
+ int result;
+ char *sparse = git_pathdup("info/sparse-checkout");
+ memset(pel, 0, sizeof(*pel));
+ result = add_excludes_from_file_to_list(sparse, "", 0, pel, 0);
+ free(sparse);
+ return result;
+}
+
+static int mode_to_dtype(unsigned mode)
+{
+ if (S_ISREG(mode))
+ return DT_REG;
+ if (S_ISDIR(mode) || S_ISGITLINK(mode))
+ return DT_DIR;
+ if (S_ISLNK(mode))
+ return DT_LNK;
+ return DT_UNKNOWN;
+}
+
+static int apply_excludes_1(
+ struct pu_row **subset,
+ unsigned int nr,
+ struct strbuf *prefix,
+ struct exclude_list *pel,
+ int defval,
+ struct pu_vec *vec_out);
+
+/* apply directory rules. based on clear_ce_flags_dir() */
+static int apply_excludes_dir(
+ struct pu_row **subset,
+ unsigned int nr,
+ struct strbuf *prefix,
+ char *basename,
+ struct exclude_list *pel,
+ int defval,
+ struct pu_vec *vec_out)
+{
+ struct pu_row **subset_end;
+ int dtype = DT_DIR;
+ int ret = is_excluded_from_list(
+ prefix->buf, prefix->len, basename, &dtype, pel);
+ int rc;
+
+ strbuf_addch(prefix, '/');
+
+ if (ret < 0)
+ ret = defval;
+
+ for (subset_end = subset; subset_end != subset + nr; subset_end++) {
+ struct pu_row *row = *subset_end;
+ if (strncmp(row->fullpath.buf, prefix->buf, prefix->len))
+ break;
+ }
+
+ rc = apply_excludes_1(
+ subset, subset_end - subset,
+ prefix, pel, ret,
+ vec_out);
+ strbuf_setlen(prefix, prefix->len - 1);
+ return rc;
+}
+
+/* apply sparse rules to subset[0..nr). based on clear_ce_flags_1() */
+static int apply_excludes_1(
+ struct pu_row **subset,
+ unsigned int nr,
+ struct strbuf *prefix,
+ struct exclude_list *pel,
+ int defval,
+ struct pu_vec *vec_out)
+{
+ struct pu_row **subset_end = subset + nr;
+
+ while (subset != subset_end) {
+ struct pu_row *row = *subset;
+ const char *name, *slash;
+ int len, dtype, val;
+
+ if (prefix->len && strncmp(row->fullpath.buf, prefix->buf, prefix->len))
+ break;
+
+ name = row->fullpath.buf + prefix->len;
+ slash = strchr(name, '/');
+
+ if (slash) {
+ int processed;
+
+ len = slash - name;
+ strbuf_add(prefix, name, len);
+
+ processed = apply_excludes_dir(
+ subset, subset_end - subset,
+ prefix, prefix->buf + prefix->len - len,
+ pel, defval,
+ vec_out);
+
+ if (processed) {
+ subset += processed;
+ strbuf_setlen(prefix, prefix->len - len);
+ continue;
+ }
+
+ strbuf_addch(prefix, '/');
+ subset += apply_excludes_1(
+ subset, subset_end - subset,
+ prefix, pel, defval,
+ vec_out);
+ strbuf_setlen(prefix, prefix->len - len - 1);
+ continue;
+ }
+
+ dtype = mode_to_dtype(row->mode);
+ val = is_excluded_from_list(
+ row->fullpath.buf, row->fullpath.len, name, &dtype, pel);
+ if (val < 0)
+ val = defval;
+ if (val > 0) {
+ pu_row_trace(row, "sparse");
+ pu_vec_append(vec_out, row);
+ }
+ subset++;
+ }
+
+ return nr - (subset_end - subset);
+}
+
+struct pu_vec *pu_vec_filter_sparse(
+ const struct pu_vec *vec_in,
+ struct exclude_list *pel)
+{
+ struct pu_vec *vec_out;
+ struct strbuf prefix = STRBUF_INIT;
+ int defval = 0;
+
+ vec_out = pu_vec_alloc(vec_in->data_nr);
+
+ apply_excludes_1(
+ vec_in->data, vec_in->data_nr,
+ &prefix, pel, defval,
+ vec_out);
+
+ return vec_out;
+}
+
+struct pu_vec *pu_vec_filter_missing(
+ const struct pu_vec *vec_in)
+{
+ struct pu_vec *vec_out;
+ int k;
+
+ vec_out = pu_vec_alloc(vec_in->data_nr);
+
+ for (k = 0; k < vec_in->data_nr; k++) {
+ struct pu_row *row = vec_in->data[k];
+ if (!has_sha1_file(row->oid.hash)) {
+ pu_row_trace(row, "missing");
+ pu_vec_append(vec_out, row);
+ }
+ }
+
+ return vec_out;
+}
diff --git a/partial-utils.h b/partial-utils.h
new file mode 100644
index 0000000..3bdf2e4
--- /dev/null
+++ b/partial-utils.h
@@ -0,0 +1,93 @@
+#ifndef PARTIAL_UTILS_H
+#define PARTIAL_UTILS_H
+
+/*
+ * A 'partial-utils row' represents a single item in the tree.
+ * This is conceptually equivalent to a cache_entry, but does
+ * not require an index_state and lets us operate on any commit
+ * and not be tied to the current worktree.
+ */
+struct pu_row
+{
+ struct strbuf fullpath;
+ struct object_id oid;
+ unsigned mode;
+ unsigned entryname_offset;
+};
+
+/*
+ * A 'partial-utils vec' represents a vector of 'pu row'
+ * values using the normal vector machinery.
+ */
+struct pu_vec
+{
+ struct pu_row **data;
+ unsigned int data_nr;
+ unsigned int data_alloc;
+};
+
+#define PU_VEC_DEFAULT_SIZE (1024*1024)
+
+
+void pu_row_trace(
+ const struct pu_row *row,
+ const char *label);
+
+struct pu_row *pu_row_alloc(
+ const unsigned char *sha1,
+ const struct strbuf *base,
+ const char *entryname,
+ unsigned mode);
+
+struct pu_vec *pu_vec_alloc(
+ unsigned int nr_pre_alloc);
+
+/*
+ * Append the given row onto the vector WITHOUT
+ * assuming ownership of the pointer.
+ */
+void pu_vec_append(
+ struct pu_vec *vec,
+ struct pu_row *row);
+
+/*
+ * Enumerate the contents of the tree (recursively) into
+ * a vector of rows. This is essentially "ls-tree -r -t"
+ * into a vector.
+ */
+struct pu_vec *pu_vec_ls_tree(
+ struct tree *tree,
+ const char *prefix,
+ const char **argv);
+
+/*
+ * Load a sparse-checkout file into (*pel).
+ * Returns -1 if none or error.
+ */
+int pu_load_sparse_definitions(
+ const char *path,
+ struct exclude_list *pel);
+
+/*
+ * Filter the given vector using the sparse-checkout
+ * definitions and return new vector of just the paths
+ * that WOULD BE populated.
+ *
+ * The returned vector BORROWS rows from the input vector.
+ *
+ * This is loosely based upon clear_ce_flags() in unpack-trees.c
+ */
+struct pu_vec *pu_vec_filter_sparse(
+ const struct pu_vec *vec_in,
+ struct exclude_list *pel);
+
+/*
+ * Filter the given vector and return the list of blobs
+ * missing from the local ODB.
+ *
+ * The returned vector BORROWS rows from the input vector.
+ */
+struct pu_vec *pu_vec_filter_missing(
+ const struct pu_vec *vec_in);
+
+#endif /* PARTIAL_UTILS_H */
--
2.7.4
next prev parent reply other threads:[~2017-03-08 17:46 UTC|newest]
Thread overview: 32+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-03-08 17:37 [PATCH 00/10] RFC Partial Clone and Fetch Jeff Hostetler
2017-03-08 17:37 ` [PATCH 01/10] pack-objects: eat CR in addition to LF after fgets Jeff Hostetler
2017-03-09 7:01 ` Jeff King
2017-03-09 15:46 ` Jeff Hostetler
2017-03-08 17:37 ` [PATCH 02/10] pack-objects: add --partial-by-size=n --partial-special Jeff Hostetler
2017-03-08 18:47 ` Junio C Hamano
2017-03-08 20:21 ` Jeff Hostetler
2017-03-09 7:04 ` Jeff King
2017-03-10 17:58 ` Brandon Williams
2017-03-10 18:03 ` Jeff King
2017-03-10 19:38 ` Junio C Hamano
2017-03-10 19:47 ` Jeff King
2017-03-09 7:31 ` Jeff King
2017-03-09 18:26 ` Jeff Hostetler
2017-03-08 17:37 ` [PATCH 03/10] pack-objects: test for --partial-by-size --partial-special Jeff Hostetler
2017-03-09 7:35 ` Jeff King
2017-03-09 18:11 ` Johannes Sixt
2017-03-08 17:37 ` [PATCH 04/10] upload-pack: add partial (sparse) fetch Jeff Hostetler
2017-03-09 7:48 ` Jeff King
2017-03-09 18:34 ` Jeff Hostetler
2017-03-09 19:09 ` Jeff King
2017-03-08 17:38 ` [PATCH 05/10] fetch-pack: add partial-by-size and partial-special Jeff Hostetler
2017-03-08 17:38 ` [PATCH 06/10] rev-list: add --allow-partial option to relax connectivity checks Jeff Hostetler
2017-03-08 18:55 ` Junio C Hamano
2017-03-08 20:10 ` Jeff Hostetler
2017-03-09 7:56 ` Jeff King
2017-03-09 18:38 ` Jeff Hostetler
2017-03-08 17:38 ` [PATCH 07/10] index-pack: add --allow-partial option to relax blob existence checks Jeff Hostetler
2017-03-08 17:38 ` [PATCH 08/10] fetch: add partial-by-size and partial-special arguments Jeff Hostetler
2017-03-08 17:38 ` [PATCH 09/10] clone: " Jeff Hostetler
2017-03-08 17:38 ` Jeff Hostetler [this message]
-- strict thread matches above, loose matches on Subject: below --
2017-03-08 18:50 [PATCH 00/10] RFC Partial Clone and Fetch git
2017-03-08 18:50 ` [PATCH 10/10] ls-partial: created command to list missing blobs git
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1488994685-37403-11-git-send-email-jeffhost@microsoft.com \
--to=jeffhost@microsoft.com \
--cc=benpeart@microsoft.com \
--cc=git@jeffhostetler.com \
--cc=git@vger.kernel.org \
--cc=gitster@pobox.com \
--cc=jonathantanmy@google.com \
--cc=markbt@efaref.net \
--cc=peff@peff.net \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.