git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Brian Downing <bdowning@lavos.net>
To: Junio C Hamano <gitster@pobox.com>
Cc: git@vger.kernel.org, Brian Downing <bdowning@lavos.net>
Subject: [PATCH 4/5] Allow xdiff machinery to cache hash results for a file
Date: Thu, 21 Aug 2008 18:22:00 -0500	[thread overview]
Message-ID: <1219360921-28529-5-git-send-email-bdowning@lavos.net> (raw)
In-Reply-To: <1219360921-28529-4-git-send-email-bdowning@lavos.net>

When generating diffs against the same file multiple times, it is a
waste of work to regenerate the hash values for each line each time.
Instead, allow a cache pointer to be passed in xpparam_t; set mf1_cache
to the cache for the first mmfile_t to xdl_diff, and mf2_cache for the
second.

This works like:

	xdcache_t cache;
	memset(cache, 0, sizeof(cache));
	/* ... */
	xpp.mf1_cache = &cache;
	xdl_diff(file1, file2, &xpp, &xecfg, &ecb);
	/* ...later... */
	xpp.mf1_cache = &cache;
	xdl_diff(file1, file3, &xpp, &xecfg, &ecb);
	/* The cache for file1 will be reused. */
	xdl_cache_free(&cache);

Note that this isn't compatible with xdi_diff as-is, as getting a
complete cache is incompatible with tail trimming.

Signed-off-by: Brian Downing <bdowning@lavos.net>
---
 xdiff/xdiff.h    |   11 ++++++++++
 xdiff/xprepare.c |   59 +++++++++++++++++++++++++++++++++++++++++++++++------
 xdiff/xtypes.h   |    1 +
 3 files changed, 64 insertions(+), 7 deletions(-)

diff --git a/xdiff/xdiff.h b/xdiff/xdiff.h
index 281fc0b..6fd922b 100644
--- a/xdiff/xdiff.h
+++ b/xdiff/xdiff.h
@@ -65,8 +65,17 @@ typedef struct s_mmbuffer {
 	long size;
 } mmbuffer_t;
 
+typedef struct s_xdcache_int {
+	long nrec;
+	unsigned long flags;
+	unsigned long *ha;
+	long *size;
+} xdcache_t;
+
 typedef struct s_xpparam {
 	unsigned long flags;
+	xdcache_t *mf1_cache;
+	xdcache_t *mf2_cache;
 } xpparam_t;
 
 typedef struct s_xdemitcb {
@@ -104,6 +113,8 @@ int xdl_merge(mmfile_t *orig, mmfile_t *mf1, const char *name1,
 		mmfile_t *mf2, const char *name2,
 		xpparam_t const *xpp, int level, mmbuffer_t *result);
 
+void xdl_cache_free(xdcache_t *cache);
+
 #ifdef __cplusplus
 }
 #endif /* #ifdef __cplusplus */
diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c
index e87ab57..291caf9 100644
--- a/xdiff/xprepare.c
+++ b/xdiff/xprepare.c
@@ -54,7 +54,8 @@ static void xdl_free_classifier(xdlclassifier_t *cf);
 static int xdl_classify_record(xdlclassifier_t *cf, xrecord_t **rhash, unsigned int hbits,
 			       xrecord_t *rec);
 static int xdl_prepare_ctx(mmfile_t *mf, long narec, xpparam_t const *xpp,
-			   xdlclassifier_t *cf, xdfile_t *xdf);
+			   xdlclassifier_t *cf, xdfile_t *xdf,
+			   xdcache_t *cache);
 static void xdl_free_ctx(xdfile_t *xdf);
 static int xdl_clean_mmatch(char const *dis, long i, long s, long e);
 static int xdl_cleanup_records(xdfile_t *xdf1, xdfile_t *xdf2);
@@ -135,7 +136,8 @@ static int xdl_classify_record(xdlclassifier_t *cf, xrecord_t **rhash, unsigned
 
 
 static int xdl_prepare_ctx(mmfile_t *mf, long narec, xpparam_t const *xpp,
-			   xdlclassifier_t *cf, xdfile_t *xdf) {
+			   xdlclassifier_t *cf, xdfile_t *xdf,
+			   xdcache_t *cache) {
 	unsigned int hbits;
 	long i, nrec, hsize, bsize;
 	unsigned long hav;
@@ -177,7 +179,12 @@ static int xdl_prepare_ctx(mmfile_t *mf, long narec, xpparam_t const *xpp,
 				top = blk + bsize;
 			}
 			prev = cur;
-			hav = xdl_hash_record(&cur, top, xpp->flags);
+			if (cache && cache->ha) {
+				hav = cache->ha[nrec];
+				cur += cache->size[nrec];
+			} else {
+				hav = xdl_hash_record(&cur, top, xpp->flags);
+			}
 			if (nrec >= narec) {
 				narec *= 2;
 				if (!(rrecs = (xrecord_t **) xdl_realloc(recs, narec * sizeof(xrecord_t *)))) {
@@ -199,6 +206,7 @@ static int xdl_prepare_ctx(mmfile_t *mf, long narec, xpparam_t const *xpp,
 			crec->ptr = prev;
 			crec->size = (long) (cur - prev);
 			crec->ha = hav;
+			crec->original_ha = hav;
 			recs[nrec++] = crec;
 
 			if (xdl_classify_record(cf, rhash, hbits, crec) < 0) {
@@ -249,6 +257,23 @@ static int xdl_prepare_ctx(mmfile_t *mf, long narec, xpparam_t const *xpp,
 	xdf->dstart = 0;
 	xdf->dend = nrec - 1;
 
+	if (cache && !cache->ha) {
+		cache->nrec = nrec;
+		cache->ha = xdl_malloc(nrec * sizeof(unsigned long));
+		if (!cache->ha)
+			return 0;
+		cache->size = xdl_malloc(nrec * sizeof(long));
+		if (!cache->size) {
+			xdl_free(cache->ha);
+			cache->ha = NULL;
+			return 0;
+		}
+		for (i = 0; i < nrec; ++i) {
+			cache->ha[i] = recs[i]->original_ha;
+			cache->size[i] = recs[i]->size;
+		}
+	}
+
 	return 0;
 }
 
@@ -268,21 +293,34 @@ int xdl_prepare_env(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
 		    xdfenv_t *xe) {
 	long enl1, enl2;
 	xdlclassifier_t cf;
+	xdcache_t *c1 = xpp->mf1_cache;
+	xdcache_t *c2 = xpp->mf2_cache;
 
-	enl1 = xdl_guess_lines(mf1) + 1;
-	enl2 = xdl_guess_lines(mf2) + 1;
+	if (c1) {
+		if (c1->flags != xpp->flags)
+			xdl_cache_free(c1);
+		c1->flags = xpp->flags;
+	}
+	if (c2) {
+		if (c2->flags != xpp->flags)
+			xdl_cache_free(c2);
+		c2->flags = xpp->flags;
+	}
+
+	enl1 = c1 && c1->nrec ? c1->nrec : (xdl_guess_lines(mf1) + 1);
+	enl2 = c2 && c2->nrec ? c2->nrec : (xdl_guess_lines(mf2) + 1);
 
 	if (xdl_init_classifier(&cf, enl1 + enl2 + 1, xpp->flags) < 0) {
 
 		return -1;
 	}
 
-	if (xdl_prepare_ctx(mf1, enl1, xpp, &cf, &xe->xdf1) < 0) {
+	if (xdl_prepare_ctx(mf1, enl1, xpp, &cf, &xe->xdf1, c1) < 0) {
 
 		xdl_free_classifier(&cf);
 		return -1;
 	}
-	if (xdl_prepare_ctx(mf2, enl2, xpp, &cf, &xe->xdf2) < 0) {
+	if (xdl_prepare_ctx(mf2, enl2, xpp, &cf, &xe->xdf2, c2) < 0) {
 
 		xdl_free_ctx(&xe->xdf1);
 		xdl_free_classifier(&cf);
@@ -309,6 +347,13 @@ void xdl_free_env(xdfenv_t *xe) {
 }
 
 
+void xdl_cache_free(xdcache_t *cache) {
+	xdl_free(cache->ha);
+	xdl_free(cache->size);
+	memset(cache, 0, sizeof(xdcache_t));
+}
+
+
 static int xdl_clean_mmatch(char const *dis, long i, long s, long e) {
 	long r, rdis0, rpdis0, rdis1, rpdis1;
 
diff --git a/xdiff/xtypes.h b/xdiff/xtypes.h
index 2511aef..e6f6890 100644
--- a/xdiff/xtypes.h
+++ b/xdiff/xtypes.h
@@ -43,6 +43,7 @@ typedef struct s_xrecord {
 	char const *ptr;
 	long size;
 	unsigned long ha;
+	unsigned long original_ha;
 } xrecord_t;
 
 typedef struct s_xdfile {
-- 
1.5.6.1

  reply	other threads:[~2008-08-21 23:34 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-08-21 23:21 [PATCH 0/5] More git blame speed improvements Brian Downing
2008-08-21 23:21 ` [PATCH 1/5] Allow alternate "low-level" emit function from xdl_diff Brian Downing
2008-08-21 23:21   ` [PATCH 2/5] Bypass textual patch generation and parsing in git blame Brian Downing
2008-08-21 23:21     ` [PATCH 3/5] Always initialize xpparam_t to 0 Brian Downing
2008-08-21 23:22       ` Brian Downing [this message]
2008-08-21 23:22         ` [PATCH 5/5] Use xdiff caching to improve git blame performance Brian Downing
2008-08-23  8:15   ` [PATCH 1/5] Allow alternate "low-level" emit function from xdl_diff René Scharfe
2008-08-23  9:03     ` Junio C Hamano
2008-08-24  8:12     ` Brian Downing
2008-09-03 22:29       ` René Scharfe
2008-10-25 13:30         ` [PATCH 1/5] blame: inline get_patch() René Scharfe
2008-10-25 13:30         ` [PATCH 2/5] Always initialize xpparam_t to 0 René Scharfe
2008-10-25 13:30         ` [PATCH 3/5] Allow alternate "low-level" emit function from xdl_diff René Scharfe
2008-10-25 13:31         ` [PATCH 4/5] add xdi_diff_hunks() for callers that only need hunk lengths René Scharfe
2008-10-25 13:31         ` [PATCH 5/5] blame: use xdi_diff_hunks(), get rid of struct patch René Scharfe
2008-10-25 19:36           ` Junio C Hamano
2008-10-26 22:20             ` René Scharfe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1219360921-28529-5-git-send-email-bdowning@lavos.net \
    --to=bdowning@lavos.net \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).