From: Duy Nguyen <pclouds@gmail.com>
To: Stefan Beller <stefanbeller@googlemail.com>
Cc: GIT Mailing-list <git@vger.kernel.org>
Subject: Re: Rewriting git-repack.sh in C
Date: Fri, 2 Aug 2013 23:36:59 +0700 [thread overview]
Message-ID: <20130802163659.GA28693@lanh> (raw)
In-Reply-To: <CACsJy8CaTA2vT0CxOAm0FacCWjNDJjZhg6mwSyspTChia-5ppQ@mail.gmail.com>
On Fri, Aug 02, 2013 at 09:10:59PM +0700, Duy Nguyen wrote:
> On Fri, Aug 2, 2013 at 8:48 PM, Stefan Beller
> <stefanbeller@googlemail.com> wrote:
> > Hello,
> >
> > I'd like to rewrite the repack shell script in C.
> > So I tried the naive approach reading the man page and
> > the script itself and write C program by matching each block/line
> > of the script with a function in C
> >
> > ...
> >
> > So my question is, how you'd generally approach rewriting a
> > shell script in C.
>
> Start a new process via start_command/run_command interface. It's
> safer to retain the process boundary at this stage. You can try to
> integrate further later.
I was in the middle of something and somehow read this as "rewriting
git-rebase.sh" :-X
For git-repack, because it ends with a single pack-objects call, we
might not need a new process after all (very much like tail call
optimization). This is what I got for some time, but still not polish
it for submission (and it may be a bit broken after the last
rebase). Maybe you can work off this, or from scratch if you think
it's too messy. It basically teaches pack-objects extra features that
repack needs, then make repack a wrapper of pack-objects.
-- 8< --
commit 25569a3958c3272b3eb5fa50dea680948f7a2768 (build-in-repack)
Author: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Date: Wed Nov 9 19:21:39 2011 +0700
Build in git-repack
pack-objects learns a few more options to take over what's been done
by git-repack.sh. cmd_repack() becomes a wrapper around
cmd_pack_objects().
diff --git a/Makefile b/Makefile
index 0f931a2..b4010a6 100644
--- a/Makefile
+++ b/Makefile
@@ -460,7 +460,6 @@ SCRIPT_SH += git-mergetool.sh
SCRIPT_SH += git-pull.sh
SCRIPT_SH += git-quiltimport.sh
SCRIPT_SH += git-rebase.sh
-SCRIPT_SH += git-repack.sh
SCRIPT_SH += git-request-pull.sh
SCRIPT_SH += git-stash.sh
SCRIPT_SH += git-submodule.sh
@@ -584,6 +583,7 @@ BUILT_INS += git-init$X
BUILT_INS += git-merge-subtree$X
BUILT_INS += git-peek-remote$X
BUILT_INS += git-repo-config$X
+BUILT_INS += git-repack$X
BUILT_INS += git-show$X
BUILT_INS += git-stage$X
BUILT_INS += git-status$X
diff --git a/builtin.h b/builtin.h
index 64bab6b..feb958f 100644
--- a/builtin.h
+++ b/builtin.h
@@ -117,6 +117,7 @@ extern int cmd_reflog(int argc, const char **argv, const char *prefix);
extern int cmd_remote(int argc, const char **argv, const char *prefix);
extern int cmd_remote_ext(int argc, const char **argv, const char *prefix);
extern int cmd_remote_fd(int argc, const char **argv, const char *prefix);
+extern int cmd_repack(int argc, const char **argv, const char *prefix);
extern int cmd_repo_config(int argc, const char **argv, const char *prefix);
extern int cmd_rerere(int argc, const char **argv, const char *prefix);
extern int cmd_reset(int argc, const char **argv, const char *prefix);
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index f069462..1742ea1 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -18,10 +18,17 @@
#include "refs.h"
#include "streaming.h"
#include "thread-utils.h"
+#include "sigchain.h"
static const char *pack_usage[] = {
N_("git pack-objects --stdout [options...] [< ref-list | < object-list]"),
N_("git pack-objects [options...] base-name [< ref-list | < object-list]"),
+ N_("git pack-objects --repack [options...]"),
+ NULL
+};
+
+static char const * const repack_usage[] = {
+ N_("git repack [options]"),
NULL
};
@@ -103,6 +110,15 @@ static struct object_entry *locate_object_entry(const unsigned char *sha1);
static uint32_t written, written_delta;
static uint32_t reused, reused_delta;
+#define REPACK_IN_PROGRESS (1 << 0)
+#define REPACK_UPDATE_INFO (1 << 1)
+#define REPACK_ALL_INTO_ONE (1 << 2)
+#define REPACK_REMOVE_REDUNDANT (1 << 3)
+
+static int repack_flags, nr_written_packs;
+static int repack_usedeltabaseoffset;
+static struct string_list written_packs;
+static struct string_list backup_files;
static void *get_delta(struct object_entry *entry)
{
@@ -792,9 +808,19 @@ static void write_pack_file(void)
snprintf(tmpname, sizeof(tmpname), "%s-", base_name);
finish_tmp_packfile(tmpname, pack_tmp_name,
written_list, nr_written,
- &pack_idx_opts, sha1);
+ &pack_idx_opts, sha1,
+ repack_flags & REPACK_IN_PROGRESS ?
+ &backup_files : NULL);
free(pack_tmp_name);
- puts(sha1_to_hex(sha1));
+ if (repack_flags & REPACK_IN_PROGRESS) {
+ int len = strlen(tmpname);
+ char *s = xmalloc(len + 2);
+ memcpy(s, tmpname, len - 4);
+ memcpy(s + len - 4, ".pack", 6);
+ string_list_append(&written_packs, s);
+ nr_written_packs++;
+ } else
+ puts(sha1_to_hex(sha1));
}
/* mark written objects as written to previous pack */
@@ -2359,7 +2385,8 @@ static void get_object_list(int ac, const char **av)
save_commit_buffer = 0;
setup_revisions(ac, av, &revs, NULL);
- while (fgets(line, sizeof(line), stdin) != NULL) {
+ while (!(repack_flags & REPACK_IN_PROGRESS) &&
+ fgets(line, sizeof(line), stdin) != NULL) {
int len = strlen(line);
if (len && line[len - 1] == '\n')
line[--len] = 0;
@@ -2387,6 +2414,30 @@ static void get_object_list(int ac, const char **av)
loosen_unused_packed_objects(&revs);
}
+static void rollback_repack(void)
+{
+ struct strbuf dst = STRBUF_INIT;
+ int i, ret;
+ for (i = 0; i < backup_files.nr; i++) {
+ const char *src = backup_files.items[i].string;
+ strbuf_addstr(&dst, src);
+ strbuf_setlen(&dst, dst.len - 4); /* remove .old */
+ ret = rename(src, dst.buf);
+ if (ret)
+ warning("failed to restore %s: %s", src, strerror(errno));
+ strbuf_setlen(&dst, 0);
+ }
+ strbuf_release(&dst);
+ string_list_clear(&backup_files, 0);
+}
+
+static void rollback_repack_on_signal(int signo)
+{
+ rollback_repack();
+ sigchain_pop(signo);
+ raise(signo);
+}
+
static int option_parse_index_version(const struct option *opt,
const char *arg, int unset)
{
@@ -2436,11 +2487,12 @@ static int option_parse_ulong(const struct option *opt,
int cmd_pack_objects(int argc, const char **argv, const char *prefix)
{
+ struct strbuf repack_base_name = STRBUF_INIT;
int use_internal_rev_list = 0;
int thin = 0;
int all_progress_implied = 0;
const char *rp_av[6];
- int rp_ac = 0;
+ int i, rp_ac = 0;
int rev_list_unpacked = 0, rev_list_all = 0, rev_list_reflog = 0;
struct option pack_objects_options[] = {
OPT_SET_INT('q', "quiet", &progress,
@@ -2505,6 +2557,16 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
N_("pack compression level")),
OPT_SET_INT(0, "keep-true-parents", &grafts_replace_parents,
N_("do not hide commits by grafts"), 0),
+
+ OPT_BIT(0, "repack", &repack_flags,
+ N_("repack mode"), REPACK_IN_PROGRESS),
+ OPT_BIT(0, "repack-all", &repack_flags,
+ N_("repack everything into one pack"), REPACK_ALL_INTO_ONE),
+ OPT_BIT(0, "remove-redundant", &repack_flags,
+ N_("remove redundant objects after repack"), REPACK_REMOVE_REDUNDANT),
+ OPT_BIT(0, "update-info", &repack_flags,
+ N_("run git-update-server-info after repack"), REPACK_UPDATE_INFO),
+
OPT_END(),
};
@@ -2556,6 +2618,36 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
if (delta_search_threads != 1)
warning("no threads support, ignoring --threads");
#endif
+ if ((repack_flags & REPACK_IN_PROGRESS) == 0 &&
+ (repack_flags & ~REPACK_IN_PROGRESS))
+ die("--repack must be given for any repack related options");
+ if (repack_flags & REPACK_IN_PROGRESS) {
+ if (pack_to_stdout)
+ die("--stdout cannot be used with --repack");
+ if (argc)
+ die("base name cannot be used with --repack");
+
+ rp_av[rp_ac++] = "--all";
+ rp_av[rp_ac++] = "--reflog";
+ use_internal_rev_list = 1;
+
+ grafts_replace_parents = 0; /* --keep-true-parents */
+ ignore_packed_keep = 1; /* --honor-pack-keep */
+ non_empty = 1; /* --non-empty */
+
+ if (!(repack_flags & REPACK_ALL_INTO_ONE)) {
+ incremental = 1; /* --incremental */
+ rp_av[rp_ac++] = "--unpacked";
+ }
+
+ strbuf_addf(&repack_base_name,
+ "%s/pack/pack", get_object_directory());
+ base_name = repack_base_name.buf;
+
+ sigchain_push_common(rollback_repack_on_signal);
+ atexit(rollback_repack);
+ }
+
if (!pack_to_stdout && !pack_size_limit)
pack_size_limit = pack_size_limit_cfg;
if (pack_to_stdout && pack_size_limit)
@@ -2598,5 +2690,184 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
fprintf(stderr, "Total %"PRIu32" (delta %"PRIu32"),"
" reused %"PRIu32" (delta %"PRIu32")\n",
written, written_delta, reused, reused_delta);
+
+ if (!(repack_flags & REPACK_IN_PROGRESS))
+ return 0;
+
+ if (!nr_written_packs) {
+ printf(_("Nothing new to pack.\n"));
+ return 0;
+ }
+
+ /* At this point all new packs should be in place. We can
+ safely remove old ones */
+ for (i = 0; i < backup_files.nr; i++) {
+ const char *s = backup_files.items[i].string;
+ int ret = unlink(s);
+ if (ret)
+ warning("failed to remove %s: %s", s, strerror(errno));
+ }
+ string_list_clear(&backup_files, 0);
+
+ if (repack_flags & REPACK_REMOVE_REDUNDANT) {
+ struct packed_git *p;
+ struct string_list to_be_removed = STRING_LIST_INIT_DUP;
+
+ /* free_pack_by_name() may have freed a few packs in
+ write_pack_file() */
+ reprepare_packed_git();
+ for (p = packed_git; p; p = p->next) {
+ if (!p->pack_local || p->pack_keep)
+ continue;
+
+ for (i = 0; i < written_packs.nr; i++) {
+ char *s = written_packs.items[i].string;
+ if (!strcmp(s, p->pack_name))
+ break;
+ }
+ if (i < written_packs.nr)
+ continue;
+
+ string_list_append(&to_be_removed, p->pack_name);
+ }
+ written_packs.strdup_strings = 1;
+ string_list_clear(&written_packs, 0);
+
+ for (i = 0; i < to_be_removed.nr; i++) {
+ char *path = to_be_removed.items[i].string;
+
+ /* Windows limitation on unlink().
+ See c74faea19e39ca933492f697596310397175c329 */
+ free_pack_by_name(path);
+
+ if (unlink(path))
+ warning("failed to remove %s: %s", path, strerror(errno));
+ strcpy(path + strlen(path)-5, ".idx");
+ if (unlink(path))
+ warning("failed to remove %s: %s", path, strerror(errno));
+ }
+ string_list_clear(&to_be_removed, 0);
+
+ reprepare_packed_git();
+ prune_packed_objects(progress ? PRUNE_PACKED_VERBOSE : 0);
+ }
+
+ if (repack_flags & REPACK_UPDATE_INFO)
+ update_server_info(0);
+
return 0;
}
+
+static int repack_config(const char *k, const char *v, void *cb)
+{
+ if (!strcasecmp(k, "repack.usedeltabaseoffset")) {
+ repack_usedeltabaseoffset = git_config_bool(k, v);
+ return 0;
+ }
+ return git_default_config(k, v, cb);
+}
+
+int cmd_repack(int argc, const char **argv, const char *prefix)
+{
+ int all_in_one = 0;
+ int all_in_one_and_unreachable = 0;
+ int unpack_unreachable = 0;
+ int remove_redundant = 0;
+ int no_reuse_delta = 0;
+ int no_reuse_object = 0;
+ int no_update = 0;
+ int quiet = 0;
+ int local = 0;
+
+ struct option opts[] = {
+ OPT_BOOL('a', NULL, &all_in_one,
+ "pack everything in a single pack"),
+ OPT_BOOL('A', NULL, &all_in_one_and_unreachable,
+ "same as -a, and turn unreachable objects loose"),
+ OPT_BOOL('d', NULL, &remove_redundant,
+ "remove redundant packs, and run git-prune-packed"),
+ OPT_BOOL('f', NULL, &no_reuse_delta,
+ "pass --no-reuse-delta to git-pack-objects"),
+ OPT_BOOL('F', NULL, &no_reuse_object,
+ "pass --no-reuse-object to git-pack-objects"),
+ OPT_BOOL('n', NULL, &no_update,
+ "do not run git-update-server-info"),
+ OPT_BOOL('q', NULL, &quiet, "be quiet"),
+ OPT_BOOL('l', NULL, &local,
+ "pass --local to git-pack-objects"),
+ { OPTION_ARGUMENT, 0, "window", NULL, "n",
+ "size of the window used for delta compression", 0 },
+ { OPTION_ARGUMENT, 0, "window-memory", NULL, "n",
+ "same as the above, but limit memory size instead of entries count", 0 },
+ { OPTION_ARGUMENT, 0, "depth", NULL, "n",
+ "limits the maximum delta depth", 0 },
+ { OPTION_ARGUMENT, 0, "max-pack-size", NULL, "n",
+ "maximum size of each packfile", 0},
+ OPT_END(),
+ };
+
+ const char *av[] = { "pack-objects", "--repack",
+ NULL, /* --[no-]update-info */
+ NULL, /* --delta-base-offset */
+ NULL, /* --repack-all */
+ NULL, /* --remove-redundant */
+ NULL, /* --no-reuse-delta */
+ NULL, /* --no-reuse-object */
+ NULL, /* --local */
+ NULL, /* -q */
+ NULL, /* --unpack-unreachable */
+ NULL, /* --window */
+ NULL, /* --window-memory */
+ NULL, /* --depth */
+ NULL, /* --max-pack-size */
+ NULL
+ };
+ int ac = 2;
+
+ git_config(repack_config, NULL);
+
+ argc = parse_options(argc, argv, prefix, opts, repack_usage, 0);
+
+ if (no_update)
+ av[ac++] = "--no-update-info";
+ else
+ av[ac++] = "--update-info";
+ if (repack_usedeltabaseoffset)
+ av[ac++] = "--delta-base-offset";
+ if (all_in_one_and_unreachable) {
+ av[ac++] = "--repack-all";
+ all_in_one = 1;
+ unpack_unreachable = 1;
+ }
+ if (all_in_one)
+ av[ac++] = "--repack-all";
+ if (remove_redundant)
+ av[ac++] = "--remove-redundant";
+ if (no_reuse_delta)
+ av[ac++] = "--no-reuse-delta";
+ if (no_reuse_object)
+ av[ac++] = "--no-reuse-object";
+ if (local)
+ av[ac++] = "--local";
+ if (quiet)
+ av[ac++] = "-q";
+ if ((ac + argc) * sizeof(*av) > sizeof(av))
+ die("Too many options");
+ memcpy(av + ac, argv, argc * sizeof(*argv));
+ ac += argc;
+
+ if (all_in_one && remove_redundant) {
+ struct packed_git *p;
+
+ prepare_packed_git();
+ for (p = packed_git; p; p = p->next) {
+ if (!p->pack_keep &&
+ unpack_unreachable && remove_redundant) {
+ av[ac++] = "--unpack-unreachable";
+ break;
+ }
+ }
+ }
+
+ return cmd_pack_objects(ac, av, prefix);
+}
diff --git a/bulk-checkin.c b/bulk-checkin.c
index 6b0b6d4..3ca3c55 100644
--- a/bulk-checkin.c
+++ b/bulk-checkin.c
@@ -46,7 +46,7 @@ static void finish_bulk_checkin(struct bulk_checkin_state *state)
sprintf(packname, "%s/pack/pack-", get_object_directory());
finish_tmp_packfile(packname, state->pack_tmp_name,
state->written, state->nr_written,
- &state->pack_idx_opts, sha1);
+ &state->pack_idx_opts, sha1, NULL);
for (i = 0; i < state->nr_written; i++)
free(state->written[i]);
diff --git a/git-repack.sh b/contrib/examples/git-repack.sh
similarity index 100%
rename from git-repack.sh
rename to contrib/examples/git-repack.sh
diff --git a/git.c b/git.c
index 1ada169..db4e4b3 100644
--- a/git.c
+++ b/git.c
@@ -389,6 +389,7 @@ static void handle_internal_command(int argc, const char **argv)
{ "remote-ext", cmd_remote_ext },
{ "remote-fd", cmd_remote_fd },
{ "replace", cmd_replace, RUN_SETUP },
+ { "repack", cmd_repack, RUN_SETUP },
{ "repo-config", cmd_repo_config, RUN_SETUP_GENTLY },
{ "rerere", cmd_rerere, RUN_SETUP },
{ "reset", cmd_reset, RUN_SETUP },
diff --git a/pack-write.c b/pack-write.c
index ca9e63b..e6aa7e3 100644
--- a/pack-write.c
+++ b/pack-write.c
@@ -1,6 +1,7 @@
#include "cache.h"
#include "pack.h"
#include "csum-file.h"
+#include "string-list.h"
void reset_pack_idx_option(struct pack_idx_option *opts)
{
@@ -348,10 +349,12 @@ void finish_tmp_packfile(char *name_buffer,
struct pack_idx_entry **written_list,
uint32_t nr_written,
struct pack_idx_option *pack_idx_opts,
- unsigned char sha1[])
+ unsigned char sha1[],
+ struct string_list *backup_files)
{
const char *idx_tmp_name;
char *end_of_name_prefix = strrchr(name_buffer, 0);
+ struct stat st;
if (adjust_shared_perm(pack_tmp_name))
die_errno("unable to make temporary pack file readable");
@@ -368,6 +371,14 @@ void finish_tmp_packfile(char *name_buffer,
die_errno("unable to rename temporary pack file");
sprintf(end_of_name_prefix, "%s.idx", sha1_to_hex(sha1));
+ if (backup_files && !stat(name_buffer, &st)) {
+ struct strbuf old = STRBUF_INIT;
+ strbuf_addf(&old, "%s.old", name_buffer);
+ if (rename(name_buffer, old.buf))
+ die_errno("unable to rename pack %s", name_buffer);
+ string_list_append(backup_files, strbuf_detach(&old, NULL));
+ }
+ backup_file(name_buffer);
if (rename(idx_tmp_name, name_buffer))
die_errno("unable to rename temporary index file");
diff --git a/pack.h b/pack.h
index aa6ee7d..d3f92ad 100644
--- a/pack.h
+++ b/pack.h
@@ -90,7 +90,9 @@ extern int encode_in_pack_object_header(enum object_type, uintmax_t, unsigned ch
#define PH_ERROR_PROTOCOL (-3)
extern int read_pack_header(int fd, struct pack_header *);
+struct string_list;
+
extern struct sha1file *create_tmp_packfile(char **pack_tmp_name);
-extern void finish_tmp_packfile(char *name_buffer, const char *pack_tmp_name, struct pack_idx_entry **written_list, uint32_t nr_written, struct pack_idx_option *pack_idx_opts, unsigned char sha1[]);
+extern void finish_tmp_packfile(char *name_buffer, const char *pack_tmp_name, struct pack_idx_entry **written_list, uint32_t nr_written, struct pack_idx_option *pack_idx_opts, unsigned char sha1[], struct string_list *backup_files);
#endif
-- 8< --
next prev parent reply other threads:[~2013-08-02 16:36 UTC|newest]
Thread overview: 17+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-08-02 13:48 Rewriting git-repack.sh in C Stefan Beller
2013-08-02 14:10 ` Duy Nguyen
2013-08-02 16:36 ` Duy Nguyen [this message]
2013-08-03 6:33 ` Fredrik Gustafsson
2013-08-03 10:03 ` Duy Nguyen
2013-08-07 14:00 ` [PATCH 0/4] " Stefan Beller
2013-08-07 14:00 ` [PATCH 1/4] Build in git-repack Stefan Beller
2013-08-07 14:28 ` Matthieu Moy
2013-08-07 15:48 ` Junio C Hamano
2013-08-07 16:45 ` Stefan Beller
2013-08-08 2:44 ` Duy Nguyen
2013-08-07 14:00 ` [PATCH 2/4] backup_file dummy function Stefan Beller
2013-08-08 2:45 ` Duy Nguyen
2013-08-07 14:00 ` [PATCH 3/4] pack-objects: do not print usage when repacking Stefan Beller
2013-08-08 6:40 ` Antoine Pelisse
2013-08-07 14:00 ` [PATCH 4/4] repack: add unpack-unreachable Stefan Beller
2013-08-05 10:34 ` Rewriting git-repack.sh in C Matthieu Moy
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20130802163659.GA28693@lanh \
--to=pclouds@gmail.com \
--cc=git@vger.kernel.org \
--cc=stefanbeller@googlemail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.