From: Duy Nguyen <pclouds@gmail.com>
To: Stefan Beller <stefanbeller@googlemail.com>
Cc: GIT Mailing-list <git@vger.kernel.org>
Subject: Re: Rewriting git-repack.sh in C
Date: Fri, 2 Aug 2013 23:36:59 +0700 [thread overview]
Message-ID: <20130802163659.GA28693@lanh> (raw)
In-Reply-To: <CACsJy8CaTA2vT0CxOAm0FacCWjNDJjZhg6mwSyspTChia-5ppQ@mail.gmail.com>
On Fri, Aug 02, 2013 at 09:10:59PM +0700, Duy Nguyen wrote:
> On Fri, Aug 2, 2013 at 8:48 PM, Stefan Beller
> <stefanbeller@googlemail.com> wrote:
> > Hello,
> >
> > I'd like to rewrite the repack shell script in C.
> > So I tried the naive approach reading the man page and
> > the script itself and write C program by matching each block/line
> > of the script with a function in C
> >
> > ...
> >
> > So my question is, how you'd generally approach rewriting a
> > shell script in C.
>
> Start a new process via start_command/run_command interface. It's
> safer to retain the process boundary at this stage. You can try to
> integrate further later.
I was in the middle of something and somehow read this as "rewriting
git-rebase.sh" :-X
For git-repack, because it ends with a single pack-objects call, we
might not need a new process after all (very much like tail call
optimization). This is what I got for some time, but still not polish
it for submission (and it may be a bit broken after the last
rebase). Maybe you can work off this, or from scratch if you think
it's too messy. It basically teaches pack-objects extra features that
repack needs, then make repack a wrapper of pack-objects.
-- 8< --
commit 25569a3958c3272b3eb5fa50dea680948f7a2768 (build-in-repack)
Author: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Date: Wed Nov 9 19:21:39 2011 +0700
Build in git-repack
pack-objects learns a few more options to take over what's been done
by git-repack.sh. cmd_repack() becomes a wrapper around
cmd_pack_objects().
diff --git a/Makefile b/Makefile
index 0f931a2..b4010a6 100644
--- a/Makefile
+++ b/Makefile
@@ -460,7 +460,6 @@ SCRIPT_SH += git-mergetool.sh
SCRIPT_SH += git-pull.sh
SCRIPT_SH += git-quiltimport.sh
SCRIPT_SH += git-rebase.sh
-SCRIPT_SH += git-repack.sh
SCRIPT_SH += git-request-pull.sh
SCRIPT_SH += git-stash.sh
SCRIPT_SH += git-submodule.sh
@@ -584,6 +583,7 @@ BUILT_INS += git-init$X
BUILT_INS += git-merge-subtree$X
BUILT_INS += git-peek-remote$X
BUILT_INS += git-repo-config$X
+BUILT_INS += git-repack$X
BUILT_INS += git-show$X
BUILT_INS += git-stage$X
BUILT_INS += git-status$X
diff --git a/builtin.h b/builtin.h
index 64bab6b..feb958f 100644
--- a/builtin.h
+++ b/builtin.h
@@ -117,6 +117,7 @@ extern int cmd_reflog(int argc, const char **argv, const char *prefix);
extern int cmd_remote(int argc, const char **argv, const char *prefix);
extern int cmd_remote_ext(int argc, const char **argv, const char *prefix);
extern int cmd_remote_fd(int argc, const char **argv, const char *prefix);
+extern int cmd_repack(int argc, const char **argv, const char *prefix);
extern int cmd_repo_config(int argc, const char **argv, const char *prefix);
extern int cmd_rerere(int argc, const char **argv, const char *prefix);
extern int cmd_reset(int argc, const char **argv, const char *prefix);
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index f069462..1742ea1 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -18,10 +18,17 @@
#include "refs.h"
#include "streaming.h"
#include "thread-utils.h"
+#include "sigchain.h"
static const char *pack_usage[] = {
N_("git pack-objects --stdout [options...] [< ref-list | < object-list]"),
N_("git pack-objects [options...] base-name [< ref-list | < object-list]"),
+ N_("git pack-objects --repack [options...]"),
+ NULL
+};
+
+static char const * const repack_usage[] = {
+ N_("git repack [options]"),
NULL
};
@@ -103,6 +110,15 @@ static struct object_entry *locate_object_entry(const unsigned char *sha1);
static uint32_t written, written_delta;
static uint32_t reused, reused_delta;
+#define REPACK_IN_PROGRESS (1 << 0)
+#define REPACK_UPDATE_INFO (1 << 1)
+#define REPACK_ALL_INTO_ONE (1 << 2)
+#define REPACK_REMOVE_REDUNDANT (1 << 3)
+
+static int repack_flags, nr_written_packs;
+static int repack_usedeltabaseoffset;
+static struct string_list written_packs;
+static struct string_list backup_files;
static void *get_delta(struct object_entry *entry)
{
@@ -792,9 +808,19 @@ static void write_pack_file(void)
snprintf(tmpname, sizeof(tmpname), "%s-", base_name);
finish_tmp_packfile(tmpname, pack_tmp_name,
written_list, nr_written,
- &pack_idx_opts, sha1);
+ &pack_idx_opts, sha1,
+ repack_flags & REPACK_IN_PROGRESS ?
+ &backup_files : NULL);
free(pack_tmp_name);
- puts(sha1_to_hex(sha1));
+ if (repack_flags & REPACK_IN_PROGRESS) {
+ int len = strlen(tmpname);
+ char *s = xmalloc(len + 2);
+ memcpy(s, tmpname, len - 4);
+ memcpy(s + len - 4, ".pack", 6);
+ string_list_append(&written_packs, s);
+ nr_written_packs++;
+ } else
+ puts(sha1_to_hex(sha1));
}
/* mark written objects as written to previous pack */
@@ -2359,7 +2385,8 @@ static void get_object_list(int ac, const char **av)
save_commit_buffer = 0;
setup_revisions(ac, av, &revs, NULL);
- while (fgets(line, sizeof(line), stdin) != NULL) {
+ while (!(repack_flags & REPACK_IN_PROGRESS) &&
+ fgets(line, sizeof(line), stdin) != NULL) {
int len = strlen(line);
if (len && line[len - 1] == '\n')
line[--len] = 0;
@@ -2387,6 +2414,30 @@ static void get_object_list(int ac, const char **av)
loosen_unused_packed_objects(&revs);
}
+static void rollback_repack(void)
+{
+ struct strbuf dst = STRBUF_INIT;
+ int i, ret;
+ for (i = 0; i < backup_files.nr; i++) {
+ const char *src = backup_files.items[i].string;
+ strbuf_addstr(&dst, src);
+ strbuf_setlen(&dst, dst.len - 4); /* remove .old */
+ ret = rename(src, dst.buf);
+ if (ret)
+ warning("failed to restore %s: %s", src, strerror(errno));
+ strbuf_setlen(&dst, 0);
+ }
+ strbuf_release(&dst);
+ string_list_clear(&backup_files, 0);
+}
+
+static void rollback_repack_on_signal(int signo)
+{
+ rollback_repack();
+ sigchain_pop(signo);
+ raise(signo);
+}
+
static int option_parse_index_version(const struct option *opt,
const char *arg, int unset)
{
@@ -2436,11 +2487,12 @@ static int option_parse_ulong(const struct option *opt,
int cmd_pack_objects(int argc, const char **argv, const char *prefix)
{
+ struct strbuf repack_base_name = STRBUF_INIT;
int use_internal_rev_list = 0;
int thin = 0;
int all_progress_implied = 0;
const char *rp_av[6];
- int rp_ac = 0;
+ int i, rp_ac = 0;
int rev_list_unpacked = 0, rev_list_all = 0, rev_list_reflog = 0;
struct option pack_objects_options[] = {
OPT_SET_INT('q', "quiet", &progress,
@@ -2505,6 +2557,16 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
N_("pack compression level")),
OPT_SET_INT(0, "keep-true-parents", &grafts_replace_parents,
N_("do not hide commits by grafts"), 0),
+
+ OPT_BIT(0, "repack", &repack_flags,
+ N_("repack mode"), REPACK_IN_PROGRESS),
+ OPT_BIT(0, "repack-all", &repack_flags,
+ N_("repack everything into one pack"), REPACK_ALL_INTO_ONE),
+ OPT_BIT(0, "remove-redundant", &repack_flags,
+ N_("remove redundant objects after repack"), REPACK_REMOVE_REDUNDANT),
+ OPT_BIT(0, "update-info", &repack_flags,
+ N_("run git-update-server-info after repack"), REPACK_UPDATE_INFO),
+
OPT_END(),
};
@@ -2556,6 +2618,36 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
if (delta_search_threads != 1)
warning("no threads support, ignoring --threads");
#endif
+ if ((repack_flags & REPACK_IN_PROGRESS) == 0 &&
+ (repack_flags & ~REPACK_IN_PROGRESS))
+ die("--repack must be given for any repack related options");
+ if (repack_flags & REPACK_IN_PROGRESS) {
+ if (pack_to_stdout)
+ die("--stdout cannot be used with --repack");
+ if (argc)
+ die("base name cannot be used with --repack");
+
+ rp_av[rp_ac++] = "--all";
+ rp_av[rp_ac++] = "--reflog";
+ use_internal_rev_list = 1;
+
+ grafts_replace_parents = 0; /* --keep-true-parents */
+ ignore_packed_keep = 1; /* --honor-pack-keep */
+ non_empty = 1; /* --non-empty */
+
+ if (!(repack_flags & REPACK_ALL_INTO_ONE)) {
+ incremental = 1; /* --incremental */
+ rp_av[rp_ac++] = "--unpacked";
+ }
+
+ strbuf_addf(&repack_base_name,
+ "%s/pack/pack", get_object_directory());
+ base_name = repack_base_name.buf;
+
+ sigchain_push_common(rollback_repack_on_signal);
+ atexit(rollback_repack);
+ }
+
if (!pack_to_stdout && !pack_size_limit)
pack_size_limit = pack_size_limit_cfg;
if (pack_to_stdout && pack_size_limit)
@@ -2598,5 +2690,184 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
fprintf(stderr, "Total %"PRIu32" (delta %"PRIu32"),"
" reused %"PRIu32" (delta %"PRIu32")\n",
written, written_delta, reused, reused_delta);
+
+ if (!(repack_flags & REPACK_IN_PROGRESS))
+ return 0;
+
+ if (!nr_written_packs) {
+ printf(_("Nothing new to pack.\n"));
+ return 0;
+ }
+
+ /* At this point all new packs should be in place. We can
+ safely remove old ones */
+ for (i = 0; i < backup_files.nr; i++) {
+ const char *s = backup_files.items[i].string;
+ int ret = unlink(s);
+ if (ret)
+ warning("failed to remove %s: %s", s, strerror(errno));
+ }
+ string_list_clear(&backup_files, 0);
+
+ if (repack_flags & REPACK_REMOVE_REDUNDANT) {
+ struct packed_git *p;
+ struct string_list to_be_removed = STRING_LIST_INIT_DUP;
+
+ /* free_pack_by_name() may have freed a few packs in
+ write_pack_file() */
+ reprepare_packed_git();
+ for (p = packed_git; p; p = p->next) {
+ if (!p->pack_local || p->pack_keep)
+ continue;
+
+ for (i = 0; i < written_packs.nr; i++) {
+ char *s = written_packs.items[i].string;
+ if (!strcmp(s, p->pack_name))
+ break;
+ }
+ if (i < written_packs.nr)
+ continue;
+
+ string_list_append(&to_be_removed, p->pack_name);
+ }
+ written_packs.strdup_strings = 1;
+ string_list_clear(&written_packs, 0);
+
+ for (i = 0; i < to_be_removed.nr; i++) {
+ char *path = to_be_removed.items[i].string;
+
+ /* Windows limitation on unlink().
+ See c74faea19e39ca933492f697596310397175c329 */
+ free_pack_by_name(path);
+
+ if (unlink(path))
+ warning("failed to remove %s: %s", path, strerror(errno));
+ strcpy(path + strlen(path)-5, ".idx");
+ if (unlink(path))
+ warning("failed to remove %s: %s", path, strerror(errno));
+ }
+ string_list_clear(&to_be_removed, 0);
+
+ reprepare_packed_git();
+ prune_packed_objects(progress ? PRUNE_PACKED_VERBOSE : 0);
+ }
+
+ if (repack_flags & REPACK_UPDATE_INFO)
+ update_server_info(0);
+
return 0;
}
+
+static int repack_config(const char *k, const char *v, void *cb)
+{
+ if (!strcasecmp(k, "repack.usedeltabaseoffset")) {
+ repack_usedeltabaseoffset = git_config_bool(k, v);
+ return 0;
+ }
+ return git_default_config(k, v, cb);
+}
+
+int cmd_repack(int argc, const char **argv, const char *prefix)
+{
+ int all_in_one = 0;
+ int all_in_one_and_unreachable = 0;
+ int unpack_unreachable = 0;
+ int remove_redundant = 0;
+ int no_reuse_delta = 0;
+ int no_reuse_object = 0;
+ int no_update = 0;
+ int quiet = 0;
+ int local = 0;
+
+ struct option opts[] = {
+ OPT_BOOL('a', NULL, &all_in_one,
+ "pack everything in a single pack"),
+ OPT_BOOL('A', NULL, &all_in_one_and_unreachable,
+ "same as -a, and turn unreachable objects loose"),
+ OPT_BOOL('d', NULL, &remove_redundant,
+ "remove redundant packs, and run git-prune-packed"),
+ OPT_BOOL('f', NULL, &no_reuse_delta,
+ "pass --no-reuse-delta to git-pack-objects"),
+ OPT_BOOL('F', NULL, &no_reuse_object,
+ "pass --no-reuse-object to git-pack-objects"),
+ OPT_BOOL('n', NULL, &no_update,
+ "do not run git-update-server-info"),
+ OPT_BOOL('q', NULL, &quiet, "be quiet"),
+ OPT_BOOL('l', NULL, &local,
+ "pass --local to git-pack-objects"),
+ { OPTION_ARGUMENT, 0, "window", NULL, "n",
+ "size of the window used for delta compression", 0 },
+ { OPTION_ARGUMENT, 0, "window-memory", NULL, "n",
+ "same as the above, but limit memory size instead of entries count", 0 },
+ { OPTION_ARGUMENT, 0, "depth", NULL, "n",
+ "limits the maximum delta depth", 0 },
+ { OPTION_ARGUMENT, 0, "max-pack-size", NULL, "n",
+ "maximum size of each packfile", 0},
+ OPT_END(),
+ };
+
+ const char *av[] = { "pack-objects", "--repack",
+ NULL, /* --[no-]update-info */
+ NULL, /* --delta-base-offset */
+ NULL, /* --repack-all */
+ NULL, /* --remove-redundant */
+ NULL, /* --no-reuse-delta */
+ NULL, /* --no-reuse-object */
+ NULL, /* --local */
+ NULL, /* -q */
+ NULL, /* --unpack-unreachable */
+ NULL, /* --window */
+ NULL, /* --window-memory */
+ NULL, /* --depth */
+ NULL, /* --max-pack-size */
+ NULL
+ };
+ int ac = 2;
+
+ git_config(repack_config, NULL);
+
+ argc = parse_options(argc, argv, prefix, opts, repack_usage, 0);
+
+ if (no_update)
+ av[ac++] = "--no-update-info";
+ else
+ av[ac++] = "--update-info";
+ if (repack_usedeltabaseoffset)
+ av[ac++] = "--delta-base-offset";
+ if (all_in_one_and_unreachable) {
+ av[ac++] = "--repack-all";
+ all_in_one = 1;
+ unpack_unreachable = 1;
+ }
+ if (all_in_one)
+ av[ac++] = "--repack-all";
+ if (remove_redundant)
+ av[ac++] = "--remove-redundant";
+ if (no_reuse_delta)
+ av[ac++] = "--no-reuse-delta";
+ if (no_reuse_object)
+ av[ac++] = "--no-reuse-object";
+ if (local)
+ av[ac++] = "--local";
+ if (quiet)
+ av[ac++] = "-q";
+ if ((ac + argc) * sizeof(*av) > sizeof(av))
+ die("Too many options");
+ memcpy(av + ac, argv, argc * sizeof(*argv));
+ ac += argc;
+
+ if (all_in_one && remove_redundant) {
+ struct packed_git *p;
+
+ prepare_packed_git();
+ for (p = packed_git; p; p = p->next) {
+ if (!p->pack_keep &&
+ unpack_unreachable && remove_redundant) {
+ av[ac++] = "--unpack-unreachable";
+ break;
+ }
+ }
+ }
+
+ return cmd_pack_objects(ac, av, prefix);
+}
diff --git a/bulk-checkin.c b/bulk-checkin.c
index 6b0b6d4..3ca3c55 100644
--- a/bulk-checkin.c
+++ b/bulk-checkin.c
@@ -46,7 +46,7 @@ static void finish_bulk_checkin(struct bulk_checkin_state *state)
sprintf(packname, "%s/pack/pack-", get_object_directory());
finish_tmp_packfile(packname, state->pack_tmp_name,
state->written, state->nr_written,
- &state->pack_idx_opts, sha1);
+ &state->pack_idx_opts, sha1, NULL);
for (i = 0; i < state->nr_written; i++)
free(state->written[i]);
diff --git a/git-repack.sh b/contrib/examples/git-repack.sh
similarity index 100%
rename from git-repack.sh
rename to contrib/examples/git-repack.sh
diff --git a/git.c b/git.c
index 1ada169..db4e4b3 100644
--- a/git.c
+++ b/git.c
@@ -389,6 +389,7 @@ static void handle_internal_command(int argc, const char **argv)
{ "remote-ext", cmd_remote_ext },
{ "remote-fd", cmd_remote_fd },
{ "replace", cmd_replace, RUN_SETUP },
+ { "repack", cmd_repack, RUN_SETUP },
{ "repo-config", cmd_repo_config, RUN_SETUP_GENTLY },
{ "rerere", cmd_rerere, RUN_SETUP },
{ "reset", cmd_reset, RUN_SETUP },
diff --git a/pack-write.c b/pack-write.c
index ca9e63b..e6aa7e3 100644
--- a/pack-write.c
+++ b/pack-write.c
@@ -1,6 +1,7 @@
#include "cache.h"
#include "pack.h"
#include "csum-file.h"
+#include "string-list.h"
void reset_pack_idx_option(struct pack_idx_option *opts)
{
@@ -348,10 +349,12 @@ void finish_tmp_packfile(char *name_buffer,
struct pack_idx_entry **written_list,
uint32_t nr_written,
struct pack_idx_option *pack_idx_opts,
- unsigned char sha1[])
+ unsigned char sha1[],
+ struct string_list *backup_files)
{
const char *idx_tmp_name;
char *end_of_name_prefix = strrchr(name_buffer, 0);
+ struct stat st;
if (adjust_shared_perm(pack_tmp_name))
die_errno("unable to make temporary pack file readable");
@@ -368,6 +371,14 @@ void finish_tmp_packfile(char *name_buffer,
die_errno("unable to rename temporary pack file");
sprintf(end_of_name_prefix, "%s.idx", sha1_to_hex(sha1));
+ if (backup_files && !stat(name_buffer, &st)) {
+ struct strbuf old = STRBUF_INIT;
+ strbuf_addf(&old, "%s.old", name_buffer);
+ if (rename(name_buffer, old.buf))
+ die_errno("unable to rename pack %s", name_buffer);
+ string_list_append(backup_files, strbuf_detach(&old, NULL));
+ }
+ backup_file(name_buffer);
if (rename(idx_tmp_name, name_buffer))
die_errno("unable to rename temporary index file");
diff --git a/pack.h b/pack.h
index aa6ee7d..d3f92ad 100644
--- a/pack.h
+++ b/pack.h
@@ -90,7 +90,9 @@ extern int encode_in_pack_object_header(enum object_type, uintmax_t, unsigned ch
#define PH_ERROR_PROTOCOL (-3)
extern int read_pack_header(int fd, struct pack_header *);
+struct string_list;
+
extern struct sha1file *create_tmp_packfile(char **pack_tmp_name);
-extern void finish_tmp_packfile(char *name_buffer, const char *pack_tmp_name, struct pack_idx_entry **written_list, uint32_t nr_written, struct pack_idx_option *pack_idx_opts, unsigned char sha1[]);
+extern void finish_tmp_packfile(char *name_buffer, const char *pack_tmp_name, struct pack_idx_entry **written_list, uint32_t nr_written, struct pack_idx_option *pack_idx_opts, unsigned char sha1[], struct string_list *backup_files);
#endif
-- 8< --
next prev parent reply other threads:[~2013-08-02 16:36 UTC|newest]
Thread overview: 17+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-08-02 13:48 Rewriting git-repack.sh in C Stefan Beller
2013-08-02 14:10 ` Duy Nguyen
2013-08-02 16:36 ` Duy Nguyen [this message]
2013-08-03 6:33 ` Fredrik Gustafsson
2013-08-03 10:03 ` Duy Nguyen
2013-08-07 14:00 ` [PATCH 0/4] " Stefan Beller
2013-08-07 14:00 ` [PATCH 1/4] Build in git-repack Stefan Beller
2013-08-07 14:28 ` Matthieu Moy
2013-08-07 15:48 ` Junio C Hamano
2013-08-07 16:45 ` Stefan Beller
2013-08-08 2:44 ` Duy Nguyen
2013-08-07 14:00 ` [PATCH 2/4] backup_file dummy function Stefan Beller
2013-08-08 2:45 ` Duy Nguyen
2013-08-07 14:00 ` [PATCH 3/4] pack-objects: do not print usage when repacking Stefan Beller
2013-08-08 6:40 ` Antoine Pelisse
2013-08-07 14:00 ` [PATCH 4/4] repack: add unpack-unreachable Stefan Beller
2013-08-05 10:34 ` Rewriting git-repack.sh in C Matthieu Moy
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20130802163659.GA28693@lanh \
--to=pclouds@gmail.com \
--cc=git@vger.kernel.org \
--cc=stefanbeller@googlemail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).