All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jiang Xin <worldhello.net@gmail.com>
To: Junio C Hamano <gitster@pobox.com>,
	Git List <git@vger.kernel.org>, Sun Chao <sunchao9@huawei.com>
Cc: Jiang Xin <worldhello.net@gmail.com>,
	Jiang Xin <zhiyou.jx@alibaba-inc.com>
Subject: [PATCH v9 0/6] pack-redundant: new algorithm to find min packs
Date: Sat,  2 Feb 2019 00:21:46 +0800	[thread overview]
Message-ID: <20190201162152.31136-1-worldhello.net@gmail.com> (raw)
In-Reply-To: <20190130114736.30357-1-worldhello.net@gmail.com>

Sun Chao (my former colleague at Huawei) found a bug of
git-pack-redundant.  If there are too many packs and many of them
overlap each other, running `git pack-redundant --all` will
exhaust all memories and the process will be killed by kernel.

There is a script in commit log of commit 3/6, which can be used to
create a repository with lots of redundant packs. Running `git
pack-redundant --all` in it can reproduce this issue.

## Changes since reroll v7

1. Rewrite [PATCH v9 1/6] (t5323: test cases for git-pack-redundant)

   * Add many tables for relationship of packs and objects.
   * Change dir in subshell and fixed other issues.

2. New patch file from Sun Chao: [PATCH v9 3/6] (pack-redundant: delete redundant code)

3. Squash patches (remove unused functions) to patch 4/6 (new algorithm to find min packs).

## Range diff

1:  799e804d5e < -:  ---------- t5323: test cases for git-pack-redundant
-:  ---------- > 1:  c8dbf8cef2 t5323: test cases for git-pack-redundant
2:  520f6277fb = 2:  a6300516d7 pack-redundant: delay creation of unique_objects
-:  ---------- > 3:  fb71973df5 pack-redundant: delete redundant code
3:  ab1c2c4950 ! 4:  9963d1c49f pack-redundant: new algorithm to find min packs
    @@ -76,6 +76,113 @@
      diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
      --- a/builtin/pack-redundant.c
      +++ b/builtin/pack-redundant.c
    +@@
    + 	struct llist *all_objects;
    + } *local_packs = NULL, *altodb_packs = NULL;
    + 
    +-struct pll {
    +-	struct pll *next;
    +-	struct pack_list *pl;
    +-};
    +-
    + static struct llist_item *free_nodes;
    + 
    + static inline void llist_item_put(struct llist_item *item)
    +@@
    + 	return new_item;
    + }
    + 
    +-static void llist_free(struct llist *list)
    +-{
    +-	while ((list->back = list->front)) {
    +-		list->front = list->front->next;
    +-		llist_item_put(list->back);
    +-	}
    +-	free(list);
    +-}
    +-
    + static inline void llist_init(struct llist **list)
    + {
    + 	*list = xmalloc(sizeof(struct llist));
    +@@
    + 	}
    + }
    + 
    +-static void pll_free(struct pll *l)
    +-{
    +-	struct pll *old;
    +-	struct pack_list *opl;
    +-
    +-	while (l) {
    +-		old = l;
    +-		while (l->pl) {
    +-			opl = l->pl;
    +-			l->pl = opl->next;
    +-			free(opl);
    +-		}
    +-		l = l->next;
    +-		free(old);
    +-	}
    +-}
    +-
    +-/* all the permutations have to be free()d at the same time,
    +- * since they refer to each other
    +- */
    +-static struct pll * get_permutations(struct pack_list *list, int n)
    +-{
    +-	struct pll *subset, *ret = NULL, *new_pll = NULL;
    +-
    +-	if (list == NULL || pack_list_size(list) < n || n == 0)
    +-		return NULL;
    +-
    +-	if (n == 1) {
    +-		while (list) {
    +-			new_pll = xmalloc(sizeof(*new_pll));
    +-			new_pll->pl = NULL;
    +-			pack_list_insert(&new_pll->pl, list);
    +-			new_pll->next = ret;
    +-			ret = new_pll;
    +-			list = list->next;
    +-		}
    +-		return ret;
    +-	}
    +-
    +-	while (list->next) {
    +-		subset = get_permutations(list->next, n - 1);
    +-		while (subset) {
    +-			new_pll = xmalloc(sizeof(*new_pll));
    +-			new_pll->pl = subset->pl;
    +-			pack_list_insert(&new_pll->pl, list);
    +-			new_pll->next = ret;
    +-			ret = new_pll;
    +-			subset = subset->next;
    +-		}
    +-		list = list->next;
    +-	}
    +-	return ret;
    +-}
    +-
    +-static int is_superset(struct pack_list *pl, struct llist *list)
    +-{
    +-	struct llist *diff;
    +-
    +-	diff = llist_copy(list);
    +-
    +-	while (pl) {
    +-		llist_sorted_difference_inplace(diff, pl->all_objects);
    +-		if (diff->size == 0) { /* we're done */
    +-			llist_free(diff);
    +-			return 1;
    +-		}
    +-		pl = pl->next;
    +-	}
    +-	llist_free(diff);
    +-	return 0;
    +-}
    +-
    + static size_t sizeof_union(struct packed_git *p1, struct packed_git *p2)
    + {
    + 	size_t ret = 0;
     @@
      	return ret;
      }
    @@ -221,56 +328,56 @@
      --- a/t/t5323-pack-redundant.sh
      +++ b/t/t5323-pack-redundant.sh
     @@
    - P2:$P2
    - EOF
    - 
    + #     ALL | x x x x x x x x x x x x x x x x x   x
    + #
    + #############################################################################
     -test_expect_success 'one of pack-2/pack-3 is redundant' '
    -+test_expect_failure 'one of pack-2/pack-3 is redundant' '
    - 	git pack-redundant --all >out &&
    - 	format_packfiles <out >actual &&
    - 	test_cmp expected actual
    ++test_expect_failure 'one of pack-2/pack-3 is redundant (failed on Mac)' '
    + 	(
    + 		cd "$master_repo" &&
    + 		cat >expect <<-EOF &&
     @@
    - P6:$P6
    - EOF
    - 
    + #     ALL | x x x x x x x x x x x x x x x x x x x
    + #
    + #############################################################################
     -test_expect_success 'pack 2, 4, and 6 are redundant' '
    -+test_expect_failure 'pack 2, 4, and 6 are redundant' '
    - 	git pack-redundant --all >out &&
    - 	format_packfiles <out >actual &&
    - 	test_cmp expected actual
    ++test_expect_failure 'pack 2, 4, and 6 are redundant (failed on Mac)' '
    + 	(
    + 		cd "$master_repo" &&
    + 		cat >expect <<-EOF &&
     @@
    - P8:$P8
    - EOF
    - 
    + #     ALL | x x x x x x x x x x x x x x x x x x x
    + #
    + #############################################################################
     -test_expect_success 'pack-8 (subset of pack-1) is also redundant' '
    -+test_expect_failure 'pack-8 (subset of pack-1) is also redundant' '
    - 	git pack-redundant --all >out &&
    - 	format_packfiles <out >actual &&
    - 	test_cmp expected actual
    ++test_expect_failure 'pack-8 (subset of pack-1) is also redundant (failed on Mac)' '
    + 	(
    + 		cd "$master_repo" &&
    + 		cat >expect <<-EOF &&
     @@
    - 	test_must_be_empty out
    + 	)
      '
      
     -test_expect_success 'remove redundant packs and pass fsck' '
    -+test_expect_failure 'remove redundant packs and pass fsck' '
    - 	git pack-redundant --all | xargs rm &&
    - 	git fsck --no-progress &&
    - 	git pack-redundant --all >out &&
    ++test_expect_failure 'remove redundant packs and pass fsck (failed on Mac)' '
    + 	(
    + 		cd "$master_repo" &&
    + 		git pack-redundant --all | xargs rm &&
     @@
    - 	printf "../../master.git/objects" >objects/info/alternates
    + 	)
      '
      
     -test_expect_success 'no redundant packs without --alt-odb' '
    -+test_expect_failure 'no redundant packs without --alt-odb' '
    - 	git pack-redundant --all >out &&
    - 	test_must_be_empty out
    - '
    ++test_expect_failure 'no redundant packs without --alt-odb (failed on Mac)' '
    + 	(
    + 		cd "$shared_repo" &&
    + 		git pack-redundant --all >out &&
     @@
    - P7:$P7
    - EOF
    - 
    + #     ALL | x x x x x x x x x x x x x x x x x x x
    + #
    + #############################################################################
     -test_expect_success 'pack-redundant --verbose: show duplicate packs in stderr' '
    -+test_expect_failure 'pack-redundant --verbose: show duplicate packs in stderr' '
    - 	git pack-redundant --all --verbose >out 2>out.err &&
    - 	test_must_be_empty out &&
    - 	grep "pack$" out.err | format_packfiles >actual &&
    ++test_expect_failure 'pack-redundant --verbose: show duplicate packs in stderr (failed on Mac)' '
    + 	(
    + 		cd "$shared_repo" &&
    + 		cat >expect <<-EOF &&
4:  3c3a7ea40f < -:  ---------- pack-redundant: remove unused functions
5:  bc4b681f40 ! 5:  b8f80ad454 pack-redundant: rename pack_list.all_objects
    @@ -115,11 +115,7 @@
     +							alt->remaining_objects);
      			local = local->next;
      		}
    --		llist_sorted_difference_inplace(all_objects, alt->all_objects);
    -+		llist_sorted_difference_inplace(all_objects, alt->remaining_objects);
      		alt = alt->next;
    - 	}
    - }
     @@
      		return NULL;
      
6:  6cfba5b4b2 ! 6:  8a12ad699e pack-redundant: consistent sort method
    @@ -83,60 +83,71 @@
      --- a/t/t5323-pack-redundant.sh
      +++ b/t/t5323-pack-redundant.sh
     @@
    - '
    - 
    - cat >expected <<EOF
    --P2:$P2
    -+P3:$P3
    - EOF
    - 
    --test_expect_failure 'one of pack-2/pack-3 is redundant' '
    + #         | T A B C D E F G H I J K L M N O P Q R
    + #     ----+--------------------------------------
    + #     P1  | x x x x x x x                       x
    +-#     P2* |     ! ! ! !   ! ! !
    +-#     P3  |             x     x x x x x
    ++#     P2  |     x x x x   x x x
    ++#     P3* |             !     ! ! ! ! !
    + #     P4  |                     x x x x     x
    + #     P5  |               x x           x x
    + #     ----+--------------------------------------
    + #     ALL | x x x x x x x x x x x x x x x x x   x
    + #
    + #############################################################################
    +-test_expect_failure 'one of pack-2/pack-3 is redundant (failed on Mac)' '
     +test_expect_success 'one of pack-2/pack-3 is redundant' '
    - 	git pack-redundant --all >out &&
    - 	format_packfiles <out >actual &&
    - 	test_cmp expected actual
    + 	(
    + 		cd "$master_repo" &&
    + 		cat >expect <<-EOF &&
    +-			P2:$P2
    ++			P3:$P3
    + 			EOF
    + 		git pack-redundant --all >out &&
    + 		format_packfiles <out >actual &&
     @@
    - P6:$P6
    - EOF
    - 
    --test_expect_failure 'pack 2, 4, and 6 are redundant' '
    + #     ALL | x x x x x x x x x x x x x x x x x x x
    + #
    + #############################################################################
    +-test_expect_failure 'pack 2, 4, and 6 are redundant (failed on Mac)' '
     +test_expect_success 'pack 2, 4, and 6 are redundant' '
    - 	git pack-redundant --all >out &&
    - 	format_packfiles <out >actual &&
    - 	test_cmp expected actual
    + 	(
    + 		cd "$master_repo" &&
    + 		cat >expect <<-EOF &&
     @@
    - P8:$P8
    - EOF
    - 
    --test_expect_failure 'pack-8 (subset of pack-1) is also redundant' '
    + #     ALL | x x x x x x x x x x x x x x x x x x x
    + #
    + #############################################################################
    +-test_expect_failure 'pack-8 (subset of pack-1) is also redundant (failed on Mac)' '
     +test_expect_success 'pack-8 (subset of pack-1) is also redundant' '
    - 	git pack-redundant --all >out &&
    - 	format_packfiles <out >actual &&
    - 	test_cmp expected actual
    + 	(
    + 		cd "$master_repo" &&
    + 		cat >expect <<-EOF &&
     @@
    - 	test_must_be_empty out
    + 	)
      '
      
    --test_expect_failure 'remove redundant packs and pass fsck' '
    +-test_expect_failure 'remove redundant packs and pass fsck (failed on Mac)' '
     +test_expect_success 'remove redundant packs and pass fsck' '
    - 	git pack-redundant --all | xargs rm &&
    - 	git fsck --no-progress &&
    - 	git pack-redundant --all >out &&
    + 	(
    + 		cd "$master_repo" &&
    + 		git pack-redundant --all | xargs rm &&
     @@
    - 	printf "../../master.git/objects" >objects/info/alternates
    + 	)
      '
      
    --test_expect_failure 'no redundant packs without --alt-odb' '
    +-test_expect_failure 'no redundant packs without --alt-odb (failed on Mac)' '
     +test_expect_success 'no redundant packs without --alt-odb' '
    - 	git pack-redundant --all >out &&
    - 	test_must_be_empty out
    - '
    + 	(
    + 		cd "$shared_repo" &&
    + 		git pack-redundant --all >out &&
     @@
    - P7:$P7
    - EOF
    - 
    --test_expect_failure 'pack-redundant --verbose: show duplicate packs in stderr' '
    + #     ALL | x x x x x x x x x x x x x x x x x x x
    + #
    + #############################################################################
    +-test_expect_failure 'pack-redundant --verbose: show duplicate packs in stderr (failed on Mac)' '
     +test_expect_success 'pack-redundant --verbose: show duplicate packs in stderr' '
    - 	git pack-redundant --all --verbose >out 2>out.err &&
    - 	test_must_be_empty out &&
    - 	grep "pack$" out.err | format_packfiles >actual &&
    + 	(
    + 		cd "$shared_repo" &&
    + 		cat >expect <<-EOF &&


Jiang Xin (4):
  t5323: test cases for git-pack-redundant
  pack-redundant: delay creation of unique_objects
  pack-redundant: rename pack_list.all_objects
  pack-redundant: consistent sort method

Sun Chao (2):
  pack-redundant: delete redundant code
  pack-redundant: new algorithm to find min packs

 builtin/pack-redundant.c  | 232 +++++++----------
 t/t5323-pack-redundant.sh | 510 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 602 insertions(+), 140 deletions(-)
 create mode 100755 t/t5323-pack-redundant.sh

-- 
2.20.1.103.ged0fc2ca7b


  reply	other threads:[~2019-02-01 16:22 UTC|newest]

Thread overview: 83+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-12-18  9:58 [PATCH 1/2] pack-redundant: new algorithm to find min packs Jiang Xin
2018-12-18  9:58 ` [PATCH 2/2] pack-redundant: remove unused functions Jiang Xin
2018-12-19 12:14   ` [PATCH v2 0/3] pack-redundant: new algorithm to find min packs Jiang Xin
2019-01-02  4:34     ` [PATCH v3 " Jiang Xin
2019-01-02  4:34     ` [PATCH v3 1/3] t5323: test cases for git-pack-redundant Jiang Xin
2019-01-09 12:56       ` SZEDER Gábor
2019-01-09 16:47         ` SZEDER Gábor
2019-01-10 12:01           ` [PATCH v5 0/5] pack-redundant: new algorithm to find min packs Jiang Xin
2019-01-12  9:17             ` [PATCH v6 " Jiang Xin
2019-01-30 11:47               ` [PATCH v7 0/6] " Jiang Xin
2019-02-01 16:21                 ` Jiang Xin [this message]
2019-02-01 16:21                 ` [PATCH v9 1/6] t5323: test cases for git-pack-redundant Jiang Xin
2019-02-01 19:42                   ` Eric Sunshine
2019-02-01 21:03                     ` Junio C Hamano
2019-02-01 21:49                       ` Eric Sunshine
2019-02-02 13:30                         ` [PATCH v10 0/6] pack-redundant: new algorithm to find min packs Jiang Xin
2019-02-02 13:30                         ` [PATCH v10 1/6] t5323: test cases for git-pack-redundant Jiang Xin
2019-02-02 13:30                         ` [PATCH v10 2/6] pack-redundant: delay creation of unique_objects Jiang Xin
2019-02-02 13:30                         ` [PATCH v10 3/6] pack-redundant: delete redundant code Jiang Xin
2019-02-02 13:30                         ` [PATCH v10 4/6] pack-redundant: new algorithm to find min packs Jiang Xin
2019-02-02 13:30                         ` [PATCH v10 5/6] pack-redundant: rename pack_list.all_objects Jiang Xin
2019-02-02 13:30                         ` [PATCH v10 6/6] pack-redundant: consistent sort method Jiang Xin
2019-02-01 16:21                 ` [PATCH v9 2/6] pack-redundant: delay creation of unique_objects Jiang Xin
2019-02-01 16:21                 ` [PATCH v9 3/6] pack-redundant: delete redundant code Jiang Xin
2019-02-01 16:21                 ` [PATCH v9 4/6] pack-redundant: new algorithm to find min packs Jiang Xin
2019-02-01 16:21                 ` [PATCH v9 5/6] pack-redundant: rename pack_list.all_objects Jiang Xin
2019-02-01 16:21                 ` [PATCH v9 6/6] pack-redundant: consistent sort method Jiang Xin
2019-01-30 11:47               ` [PATCH v7 1/6] t5323: test cases for git-pack-redundant Jiang Xin
2019-01-31 21:44                 ` Junio C Hamano
2019-02-01  5:44                   ` Jiang Xin
2019-02-01  6:11                     ` Eric Sunshine
2019-02-01  7:23                       ` Jiang Xin
2019-02-01  7:25                         ` Jiang Xin
2019-02-01  9:51                       ` Jiang Xin
2019-01-30 11:47               ` [PATCH v7 2/6] pack-redundant: delay creation of unique_objects Jiang Xin
2019-01-30 11:47               ` [PATCH v7 3/6] pack-redundant: new algorithm to find min packs Jiang Xin
2019-01-31 19:30                 ` Junio C Hamano
2019-02-01  9:55                   ` Jiang Xin
2019-01-30 11:47               ` [PATCH v7 4/6] pack-redundant: remove unused functions Jiang Xin
2019-01-30 15:03                 ` [PATCH v8 1/1] pack-redundant: delete redundant code 16657101987
2019-01-30 11:47               ` [PATCH v7 5/6] pack-redundant: rename pack_list.all_objects Jiang Xin
2019-01-30 11:47               ` [PATCH v7 6/6] pack-redundant: consistent sort method Jiang Xin
2019-01-12  9:17             ` [PATCH v6 1/5] t5323: test cases for git-pack-redundant Jiang Xin
2019-01-12  9:17             ` [PATCH v6 2/5] pack-redundant: new algorithm to find min packs Jiang Xin
2019-01-12  9:17             ` [PATCH v6 3/5] pack-redundant: remove unused functions Jiang Xin
2019-01-12  9:17             ` [PATCH v6 4/5] pack-redundant: rename pack_list.all_objects Jiang Xin
2019-01-12  9:17             ` [PATCH v6 5/5] pack-redundant: consistent sort method Jiang Xin
2019-01-10 12:01           ` [PATCH v5 1/5] t5323: test cases for git-pack-redundant Jiang Xin
2019-01-10 21:11             ` Junio C Hamano
2019-01-11  1:59               ` Jiang Xin
2019-01-11 18:00                 ` Junio C Hamano
2019-01-10 12:01           ` [PATCH v5 2/5] pack-redundant: new algorithm to find min packs Jiang Xin
2019-01-11  1:19             ` SZEDER Gábor
2019-01-10 12:01           ` [PATCH v5 3/5] pack-redundant: rename pack_list.all_objects Jiang Xin
2019-01-10 12:01           ` [PATCH v5 4/5] pack-redundant: consistent sort method Jiang Xin
2019-01-10 20:05             ` SZEDER Gábor
2019-01-10 12:01           ` [PATCH v5 5/5] pack-redundant: remove unused functions Jiang Xin
2019-01-10  3:28         ` [PATCH v3 1/3] t5323: test cases for git-pack-redundant Jiang Xin
2019-01-10  7:11           ` Johannes Sixt
2019-01-10 11:57           ` SZEDER Gábor
2019-01-10 12:25             ` Torsten Bögershausen
2019-01-10 17:36             ` Junio C Hamano
2019-01-15 20:30             ` [PATCH/RFC v1 1/1] test-lint: sed -E (or -a, -l) are not portable tboegi
2019-01-15 21:09               ` Eric Sunshine
2019-01-16 11:24               ` Ævar Arnfjörð Bjarmason
2019-01-20  7:53             ` [PATCH/RFC v2 1/1] test-lint: Only use only sed [-n] [-e command] [-f command_file] tboegi
2019-01-22 19:47               ` Junio C Hamano
2019-01-22 20:00                 ` Torsten Bögershausen
2019-01-22 21:15                   ` Eric Sunshine
2019-01-23  6:35                     ` Torsten Bögershausen
2019-01-23 17:54                       ` Junio C Hamano
2019-01-25 19:12                         ` Torsten Bögershausen
2019-01-27 22:34                           ` Junio C Hamano
2019-01-02  4:34     ` [PATCH v3 2/3] pack-redundant: new algorithm to find min packs Jiang Xin
2019-01-02  4:34     ` [PATCH v3 3/3] pack-redundant: remove unused functions Jiang Xin
2019-01-08 16:40       ` [PATCH v4 0/1] " 16657101987
2019-01-08 19:30         ` Junio C Hamano
2019-01-09  0:29           ` 16657101987
2019-01-08 16:43       ` [PATCH v4 1/1] " 16657101987
2019-01-08 16:45       ` [PATCH v4 0/1] " 16657101987
2018-12-19 12:14   ` [PATCH v2 1/3] t5322: test cases for git-pack-redundant Jiang Xin
2018-12-19 12:14   ` [PATCH v2 2/3] pack-redundant: new algorithm to find min packs Jiang Xin
2018-12-19 12:14   ` [PATCH v2 3/3] pack-redundant: remove unused functions Jiang Xin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190201162152.31136-1-worldhello.net@gmail.com \
    --to=worldhello.net@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=sunchao9@huawei.com \
    --cc=zhiyou.jx@alibaba-inc.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.