From: Brian Downing <bdowning@lavos.net>
To: Junio C Hamano <gitster@pobox.com>
Cc: git@vger.kernel.org, Brian Downing <bdowning@lavos.net>
Subject: [PATCH 4/5] Allow xdiff machinery to cache hash results for a file
Date: Thu, 21 Aug 2008 18:22:00 -0500 [thread overview]
Message-ID: <1219360921-28529-5-git-send-email-bdowning@lavos.net> (raw)
In-Reply-To: <1219360921-28529-4-git-send-email-bdowning@lavos.net>
When generating diffs against the same file multiple times, it is a
waste of work to regenerate the hash values for each line each time.
Instead, allow a cache pointer to be passed in xpparam_t; set mf1_cache
to the cache for the first mmfile_t to xdl_diff, and mf2_cache for the
second.
This works like:
xdcache_t cache;
memset(cache, 0, sizeof(cache));
/* ... */
xpp.mf1_cache = &cache;
xdl_diff(file1, file2, &xpp, &xecfg, &ecb);
/* ...later... */
xpp.mf1_cache = &cache;
xdl_diff(file1, file3, &xpp, &xecfg, &ecb);
/* The cache for file1 will be reused. */
xdl_cache_free(&cache);
Note that this isn't compatible with xdi_diff as-is, as getting a
complete cache is incompatible with tail trimming.
Signed-off-by: Brian Downing <bdowning@lavos.net>
---
xdiff/xdiff.h | 11 ++++++++++
xdiff/xprepare.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++------
xdiff/xtypes.h | 1 +
3 files changed, 64 insertions(+), 7 deletions(-)
diff --git a/xdiff/xdiff.h b/xdiff/xdiff.h
index 281fc0b..6fd922b 100644
--- a/xdiff/xdiff.h
+++ b/xdiff/xdiff.h
@@ -65,8 +65,17 @@ typedef struct s_mmbuffer {
long size;
} mmbuffer_t;
+typedef struct s_xdcache_int {
+ long nrec;
+ unsigned long flags;
+ unsigned long *ha;
+ long *size;
+} xdcache_t;
+
typedef struct s_xpparam {
unsigned long flags;
+ xdcache_t *mf1_cache;
+ xdcache_t *mf2_cache;
} xpparam_t;
typedef struct s_xdemitcb {
@@ -104,6 +113,8 @@ int xdl_merge(mmfile_t *orig, mmfile_t *mf1, const char *name1,
mmfile_t *mf2, const char *name2,
xpparam_t const *xpp, int level, mmbuffer_t *result);
+void xdl_cache_free(xdcache_t *cache);
+
#ifdef __cplusplus
}
#endif /* #ifdef __cplusplus */
diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c
index e87ab57..291caf9 100644
--- a/xdiff/xprepare.c
+++ b/xdiff/xprepare.c
@@ -54,7 +54,8 @@ static void xdl_free_classifier(xdlclassifier_t *cf);
static int xdl_classify_record(xdlclassifier_t *cf, xrecord_t **rhash, unsigned int hbits,
xrecord_t *rec);
static int xdl_prepare_ctx(mmfile_t *mf, long narec, xpparam_t const *xpp,
- xdlclassifier_t *cf, xdfile_t *xdf);
+ xdlclassifier_t *cf, xdfile_t *xdf,
+ xdcache_t *cache);
static void xdl_free_ctx(xdfile_t *xdf);
static int xdl_clean_mmatch(char const *dis, long i, long s, long e);
static int xdl_cleanup_records(xdfile_t *xdf1, xdfile_t *xdf2);
@@ -135,7 +136,8 @@ static int xdl_classify_record(xdlclassifier_t *cf, xrecord_t **rhash, unsigned
static int xdl_prepare_ctx(mmfile_t *mf, long narec, xpparam_t const *xpp,
- xdlclassifier_t *cf, xdfile_t *xdf) {
+ xdlclassifier_t *cf, xdfile_t *xdf,
+ xdcache_t *cache) {
unsigned int hbits;
long i, nrec, hsize, bsize;
unsigned long hav;
@@ -177,7 +179,12 @@ static int xdl_prepare_ctx(mmfile_t *mf, long narec, xpparam_t const *xpp,
top = blk + bsize;
}
prev = cur;
- hav = xdl_hash_record(&cur, top, xpp->flags);
+ if (cache && cache->ha) {
+ hav = cache->ha[nrec];
+ cur += cache->size[nrec];
+ } else {
+ hav = xdl_hash_record(&cur, top, xpp->flags);
+ }
if (nrec >= narec) {
narec *= 2;
if (!(rrecs = (xrecord_t **) xdl_realloc(recs, narec * sizeof(xrecord_t *)))) {
@@ -199,6 +206,7 @@ static int xdl_prepare_ctx(mmfile_t *mf, long narec, xpparam_t const *xpp,
crec->ptr = prev;
crec->size = (long) (cur - prev);
crec->ha = hav;
+ crec->original_ha = hav;
recs[nrec++] = crec;
if (xdl_classify_record(cf, rhash, hbits, crec) < 0) {
@@ -249,6 +257,23 @@ static int xdl_prepare_ctx(mmfile_t *mf, long narec, xpparam_t const *xpp,
xdf->dstart = 0;
xdf->dend = nrec - 1;
+ if (cache && !cache->ha) {
+ cache->nrec = nrec;
+ cache->ha = xdl_malloc(nrec * sizeof(unsigned long));
+ if (!cache->ha)
+ return 0;
+ cache->size = xdl_malloc(nrec * sizeof(long));
+ if (!cache->size) {
+ xdl_free(cache->ha);
+ cache->ha = NULL;
+ return 0;
+ }
+ for (i = 0; i < nrec; ++i) {
+ cache->ha[i] = recs[i]->original_ha;
+ cache->size[i] = recs[i]->size;
+ }
+ }
+
return 0;
}
@@ -268,21 +293,34 @@ int xdl_prepare_env(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
xdfenv_t *xe) {
long enl1, enl2;
xdlclassifier_t cf;
+ xdcache_t *c1 = xpp->mf1_cache;
+ xdcache_t *c2 = xpp->mf2_cache;
- enl1 = xdl_guess_lines(mf1) + 1;
- enl2 = xdl_guess_lines(mf2) + 1;
+ if (c1) {
+ if (c1->flags != xpp->flags)
+ xdl_cache_free(c1);
+ c1->flags = xpp->flags;
+ }
+ if (c2) {
+ if (c2->flags != xpp->flags)
+ xdl_cache_free(c2);
+ c2->flags = xpp->flags;
+ }
+
+ enl1 = c1 && c1->nrec ? c1->nrec : (xdl_guess_lines(mf1) + 1);
+ enl2 = c2 && c2->nrec ? c2->nrec : (xdl_guess_lines(mf2) + 1);
if (xdl_init_classifier(&cf, enl1 + enl2 + 1, xpp->flags) < 0) {
return -1;
}
- if (xdl_prepare_ctx(mf1, enl1, xpp, &cf, &xe->xdf1) < 0) {
+ if (xdl_prepare_ctx(mf1, enl1, xpp, &cf, &xe->xdf1, c1) < 0) {
xdl_free_classifier(&cf);
return -1;
}
- if (xdl_prepare_ctx(mf2, enl2, xpp, &cf, &xe->xdf2) < 0) {
+ if (xdl_prepare_ctx(mf2, enl2, xpp, &cf, &xe->xdf2, c2) < 0) {
xdl_free_ctx(&xe->xdf1);
xdl_free_classifier(&cf);
@@ -309,6 +347,13 @@ void xdl_free_env(xdfenv_t *xe) {
}
+void xdl_cache_free(xdcache_t *cache) {
+ xdl_free(cache->ha);
+ xdl_free(cache->size);
+ memset(cache, 0, sizeof(xdcache_t));
+}
+
+
static int xdl_clean_mmatch(char const *dis, long i, long s, long e) {
long r, rdis0, rpdis0, rdis1, rpdis1;
diff --git a/xdiff/xtypes.h b/xdiff/xtypes.h
index 2511aef..e6f6890 100644
--- a/xdiff/xtypes.h
+++ b/xdiff/xtypes.h
@@ -43,6 +43,7 @@ typedef struct s_xrecord {
char const *ptr;
long size;
unsigned long ha;
+ unsigned long original_ha;
} xrecord_t;
typedef struct s_xdfile {
--
1.5.6.1
next prev parent reply other threads:[~2008-08-21 23:34 UTC|newest]
Thread overview: 17+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-08-21 23:21 [PATCH 0/5] More git blame speed improvements Brian Downing
2008-08-21 23:21 ` [PATCH 1/5] Allow alternate "low-level" emit function from xdl_diff Brian Downing
2008-08-21 23:21 ` [PATCH 2/5] Bypass textual patch generation and parsing in git blame Brian Downing
2008-08-21 23:21 ` [PATCH 3/5] Always initialize xpparam_t to 0 Brian Downing
2008-08-21 23:22 ` Brian Downing [this message]
2008-08-21 23:22 ` [PATCH 5/5] Use xdiff caching to improve git blame performance Brian Downing
2008-08-23 8:15 ` [PATCH 1/5] Allow alternate "low-level" emit function from xdl_diff René Scharfe
2008-08-23 9:03 ` Junio C Hamano
2008-08-24 8:12 ` Brian Downing
2008-09-03 22:29 ` René Scharfe
2008-10-25 13:30 ` [PATCH 1/5] blame: inline get_patch() René Scharfe
2008-10-25 13:30 ` [PATCH 2/5] Always initialize xpparam_t to 0 René Scharfe
2008-10-25 13:30 ` [PATCH 3/5] Allow alternate "low-level" emit function from xdl_diff René Scharfe
2008-10-25 13:31 ` [PATCH 4/5] add xdi_diff_hunks() for callers that only need hunk lengths René Scharfe
2008-10-25 13:31 ` [PATCH 5/5] blame: use xdi_diff_hunks(), get rid of struct patch René Scharfe
2008-10-25 19:36 ` Junio C Hamano
2008-10-26 22:20 ` René Scharfe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1219360921-28529-5-git-send-email-bdowning@lavos.net \
--to=bdowning@lavos.net \
--cc=git@vger.kernel.org \
--cc=gitster@pobox.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).