All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Elijah Newren via GitGitGadget" <gitgitgadget@gmail.com>
To: git@vger.kernel.org
Cc: Derrick Stolee <dstolee@microsoft.com>,
	Jonathan Tan <jonathantanmy@google.com>,
	Taylor Blau <me@ttaylorr.com>, Junio C Hamano <gitster@pobox.com>,
	Jeff King <peff@peff.net>, Karsten Blees <blees@dcon.de>,
	Derrick Stolee <stolee@gmail.com>,
	Elijah Newren <newren@gmail.com>,
	Elijah Newren <newren@gmail.com>
Subject: [PATCH v2 0/2] Optimization batch 6: make full use of exact renames
Date: Wed, 03 Feb 2021 20:03:45 +0000	[thread overview]
Message-ID: <pull.842.v2.git.1612382628.gitgitgadget@gmail.com> (raw)
In-Reply-To: <pull.842.git.1612331345.gitgitgadget@gmail.com>

This series depends on en/merge-ort-perf and makes full use of exact
renames; see commit messages for details.

Thanks to Stolee and Junio for reviewing v1.

Changes since v1:

 * Update rename_src_nr when updating rename_src
 * Introduce want_copies in the first patch and use it in a few more places
 * Move a comment below a few exit-early if-checks.

Elijah Newren (2):
  diffcore-rename: no point trying to find a match better than exact
  diffcore-rename: filter rename_src list when possible

 diffcore-rename.c | 69 +++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 61 insertions(+), 8 deletions(-)


base-commit: 557ac0350d9efa1f59c708779ca3fb3aee121131
Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-842%2Fnewren%2Fort-perf-batch-6-v2
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-842/newren/ort-perf-batch-6-v2
Pull-Request: https://github.com/gitgitgadget/git/pull/842

Range-diff vs v1:

 1:  3e69857f37e ! 1:  770e894b4ab diffcore-rename: no point trying to find a match better than exact
     @@ diffcore-rename.c: void diffcore_rename(struct diff_options *options)
       	struct diff_score *mx;
       	int i, j, rename_count, skip_unmodified = 0;
       	int num_destinations, dst_cnt;
     -+	int num_sources;
     ++	int num_sources, want_copies;
       	struct progress *progress = NULL;
       
       	trace2_region_enter("diff", "setup", options->repo);
     ++	want_copies = (detect_rename == DIFF_DETECT_COPY);
     + 	if (!minimum_score)
     + 		minimum_score = DEFAULT_RENAME_SCORE;
     + 
     +@@ diffcore-rename.c: void diffcore_rename(struct diff_options *options)
     + 				p->one->rename_used++;
     + 			register_rename_src(p);
     + 		}
     +-		else if (detect_rename == DIFF_DETECT_COPY) {
     ++		else if (want_copies) {
     + 			/*
     + 			 * Increment the "rename_used" score by
     + 			 * one, to indicate ourselves as a user.
      @@ diffcore-rename.c: void diffcore_rename(struct diff_options *options)
       	 * files still remain as options for rename/copies!)
       	 */
       	num_destinations = (rename_dst_nr - rename_count);
      +	num_sources = rename_src_nr;
     -+	if (detect_rename != DIFF_DETECT_COPY)
     ++	if (!want_copies)
      +		num_sources -= rename_count;
       
       	/* All done? */
     @@ diffcore-rename.c: void diffcore_rename(struct diff_options *options)
       			struct diff_filespec *one = rename_src[j].p->one;
       			struct diff_score this_src;
       
     -+			if (one->rename_used &&
     -+			    detect_rename != DIFF_DETECT_COPY)
     ++			if (one->rename_used && !want_copies)
      +				continue;
      +
       			if (skip_unmodified &&
     @@ diffcore-rename.c: void diffcore_rename(struct diff_options *options)
       	}
       	stop_progress(&progress);
       
     +@@ diffcore-rename.c: void diffcore_rename(struct diff_options *options)
     + 	STABLE_QSORT(mx, dst_cnt * NUM_CANDIDATE_PER_DST, score_compare);
     + 
     + 	rename_count += find_renames(mx, dst_cnt, minimum_score, 0);
     +-	if (detect_rename == DIFF_DETECT_COPY)
     ++	if (want_copies)
     + 		rename_count += find_renames(mx, dst_cnt, minimum_score, 1);
     + 	free(mx);
     + 	trace2_region_leave("diff", "inexact renames", options->repo);
 2:  580ba9a10f5 ! 2:  7ae9460d3db diffcore-rename: filter rename_src list when possible
     @@ diffcore-rename.c: static int find_renames(struct diff_score *mx, int dst_cnt, i
       	return count;
       }
       
     -+static int remove_unneeded_paths_from_src(int num_src,
     -+					  int detecting_copies)
     ++static void remove_unneeded_paths_from_src(int detecting_copies)
      +{
      +	int i, new_num_src;
      +
     ++	if (detecting_copies)
     ++		return; /* nothing to remove */
     ++	if (break_idx)
     ++		return; /* culling incompatbile with break detection */
     ++
      +	/*
      +	 * Note on reasons why we cull unneeded sources but not destinations:
      +	 *   1) Pairings are stored in rename_dst (not rename_src), which we
     @@ diffcore-rename.c: static int find_renames(struct diff_score *mx, int dst_cnt, i
      +	 *      sources N times each, so avoid that by removing the sources
      +	 *      from rename_src here.
      +	 */
     -+	if (detecting_copies)
     -+		return num_src; /* nothing to remove */
     -+	if (break_idx)
     -+		return num_src; /* culling incompatbile with break detection */
     -+
     -+	for (i = 0, new_num_src = 0; i < num_src; i++) {
     ++	for (i = 0, new_num_src = 0; i < rename_src_nr; i++) {
      +		/*
      +		 * renames are stored in rename_dst, so if a rename has
      +		 * already been detected using this source, we can just
     @@ diffcore-rename.c: static int find_renames(struct diff_score *mx, int dst_cnt, i
      +		new_num_src++;
      +	}
      +
     -+	return new_num_src;
     ++	rename_src_nr = new_num_src;
      +}
      +
       void diffcore_rename(struct diff_options *options)
       {
       	int detect_rename = options->detect_rename;
     -@@ diffcore-rename.c: void diffcore_rename(struct diff_options *options)
     - 	struct diff_score *mx;
     - 	int i, j, rename_count, skip_unmodified = 0;
     - 	int num_destinations, dst_cnt;
     --	int num_sources;
     -+	int num_sources, want_copies;
     - 	struct progress *progress = NULL;
     - 
     - 	trace2_region_enter("diff", "setup", options->repo);
     -+	want_copies = (detect_rename == DIFF_DETECT_COPY);
     - 	if (!minimum_score)
     - 		minimum_score = DEFAULT_RENAME_SCORE;
     - 
      @@ diffcore-rename.c: void diffcore_rename(struct diff_options *options)
       		goto cleanup;
       
     @@ diffcore-rename.c: void diffcore_rename(struct diff_options *options)
      +	 * Calculate how many renames are left
       	 */
       	num_destinations = (rename_dst_nr - rename_count);
     --	num_sources = rename_src_nr;
     --	if (detect_rename != DIFF_DETECT_COPY)
     ++	remove_unneeded_paths_from_src(want_copies);
     + 	num_sources = rename_src_nr;
     +-	if (!want_copies)
      -		num_sources -= rename_count;
     -+	num_sources = remove_unneeded_paths_from_src(rename_src_nr, want_copies);
       
       	/* All done? */
       	if (!num_destinations || !num_sources)
      @@ diffcore-rename.c: void diffcore_rename(struct diff_options *options)
     - 		for (j = 0; j < NUM_CANDIDATE_PER_DST; j++)
     - 			m[j].dst = -1;
     - 
     --		for (j = 0; j < rename_src_nr; j++) {
     -+		for (j = 0; j < num_sources; j++) {
       			struct diff_filespec *one = rename_src[j].p->one;
       			struct diff_score this_src;
       
     --			if (one->rename_used &&
     --			    detect_rename != DIFF_DETECT_COPY)
     +-			if (one->rename_used && !want_copies)
      -				continue;
     -+			assert(!one->rename_used ||
     -+			       detect_rename == DIFF_DETECT_COPY ||
     -+			       break_idx);
     ++			assert(!one->rename_used || want_copies || break_idx);
       
       			if (skip_unmodified &&
       			    diff_unmodified_pair(rename_src[j].p))

-- 
gitgitgadget

  parent reply	other threads:[~2021-02-03 20:04 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-02-03  5:49 [PATCH 0/2] Optimization batch 6: make full use of exact renames Elijah Newren via GitGitGadget
2021-02-03  5:49 ` [PATCH 1/2] diffcore-rename: no point trying to find a match better than exact Elijah Newren via GitGitGadget
2021-02-03 11:44   ` Derrick Stolee
2021-02-03 16:31     ` Elijah Newren
2021-02-03 18:46     ` Junio C Hamano
2021-02-03 19:10       ` Elijah Newren
2021-02-03  5:49 ` [PATCH 2/2] diffcore-rename: filter rename_src list when possible Elijah Newren via GitGitGadget
     [not found]   ` <13feb106-c3a7-a26d-0e6e-013aa45c58d4@gmail.com>
2021-02-03 17:12     ` Elijah Newren
2021-02-03 19:12   ` Junio C Hamano
2021-02-03 19:19     ` Elijah Newren
2021-02-03 20:03 ` Elijah Newren via GitGitGadget [this message]
2021-02-03 20:03   ` [PATCH v2 1/2] diffcore-rename: no point trying to find a match better than exact Elijah Newren via GitGitGadget
2021-02-03 20:03   ` [PATCH v2 2/2] diffcore-rename: filter rename_src list when possible Elijah Newren via GitGitGadget
2021-02-13  1:04     ` Junio C Hamano
2021-02-13  4:24       ` Elijah Newren
2021-02-13  1:06     ` Junio C Hamano
2021-02-13  4:43       ` Elijah Newren
2021-02-03 21:56   ` [PATCH v2 0/2] Optimization batch 6: make full use of exact renames Junio C Hamano
2021-02-03 23:06     ` Elijah Newren
2021-02-03 23:26       ` Junio C Hamano
2021-02-03 23:36       ` Jeff King
2021-02-04  0:05         ` Elijah Newren
2021-02-14  7:34   ` [PATCH v3 " Elijah Newren via GitGitGadget
2021-02-14  7:35     ` [PATCH v3 1/2] diffcore-rename: no point trying to find a match better than exact Elijah Newren via GitGitGadget
2021-02-14  7:35     ` [PATCH v3 2/2] diffcore-rename: filter rename_src list when possible Elijah Newren via GitGitGadget

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=pull.842.v2.git.1612382628.gitgitgadget@gmail.com \
    --to=gitgitgadget@gmail.com \
    --cc=blees@dcon.de \
    --cc=dstolee@microsoft.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=jonathantanmy@google.com \
    --cc=me@ttaylorr.com \
    --cc=newren@gmail.com \
    --cc=peff@peff.net \
    --cc=stolee@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.