git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Shawn O. Pearce" <spearce@spearce.org>
To: Robin Rosenberg <robin.rosenberg@dewire.com>
Cc: git@vger.kernel.org
Subject: [JGIT PATCH 11/13] Change IndexPack to use ObjectIdSubclassMap instead of ObjectIdMap
Date: Tue, 28 Apr 2009 14:12:24 -0700	[thread overview]
Message-ID: <1240953146-12878-12-git-send-email-spearce@spearce.org> (raw)
In-Reply-To: <1240953146-12878-11-git-send-email-spearce@spearce.org>

The ObjectIdSubclassMap performs slightly better on average than the
ObjectIdMap, and uses a lot less memory as we don't need to allocate
a tree node per tracked object.

This reduces the memory footprint of IndexPack, by storing a linked
list of the UnresolvedDelta objects directly in each map, which is
slightly smaller than using an ArrayList inside of a TreeMap.

Performance is marginally faster, but we now can declare ObjectIdMap
to be fully deprecated and start removing it from the library.

Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
---
 .../org/spearce/jgit/lib/ObjectIdSubclassMap.java  |   31 +++++-
 .../src/org/spearce/jgit/transport/IndexPack.java  |  123 +++++++++++++-------
 2 files changed, 109 insertions(+), 45 deletions(-)

diff --git a/org.spearce.jgit/src/org/spearce/jgit/lib/ObjectIdSubclassMap.java b/org.spearce.jgit/src/org/spearce/jgit/lib/ObjectIdSubclassMap.java
index 2a13204..bdacec4 100644
--- a/org.spearce.jgit/src/org/spearce/jgit/lib/ObjectIdSubclassMap.java
+++ b/org.spearce.jgit/src/org/spearce/jgit/lib/ObjectIdSubclassMap.java
@@ -37,6 +37,8 @@
 
 package org.spearce.jgit.lib;
 
+import java.util.Iterator;
+
 /**
  * Fast, efficient map specifically for {@link ObjectId} subclasses.
  * <p>
@@ -51,7 +53,7 @@
  * @param <V>
  *            type of subclass of ObjectId that will be stored in the map.
  */
-public class ObjectIdSubclassMap<V extends ObjectId> {
+public class ObjectIdSubclassMap<V extends ObjectId> implements Iterable<V> {
 	private int size;
 
 	private V[] obj_hash;
@@ -114,6 +116,33 @@ public int size() {
 		return size;
 	}
 
+	public Iterator<V> iterator() {
+		return new Iterator<V>() {
+			private int found;
+
+			private int i;
+
+			public boolean hasNext() {
+				return found < size;
+			}
+
+			public V next() {
+				while (i < obj_hash.length) {
+					final V v = obj_hash[i++];
+					if (v != null) {
+						found++;
+						return v;
+					}
+				}
+				throw new IllegalStateException();
+			}
+
+			public void remove() {
+				throw new UnsupportedOperationException();
+			}
+		};
+	}
+
 	private final int index(final AnyObjectId id) {
 		return (id.w1 >>> 1) % obj_hash.length;
 	}
diff --git a/org.spearce.jgit/src/org/spearce/jgit/transport/IndexPack.java b/org.spearce.jgit/src/org/spearce/jgit/transport/IndexPack.java
index 59fdeae..eecad9c 100644
--- a/org.spearce.jgit/src/org/spearce/jgit/transport/IndexPack.java
+++ b/org.spearce.jgit/src/org/spearce/jgit/transport/IndexPack.java
@@ -62,7 +62,7 @@
 import org.spearce.jgit.lib.MutableObjectId;
 import org.spearce.jgit.lib.ObjectChecker;
 import org.spearce.jgit.lib.ObjectId;
-import org.spearce.jgit.lib.ObjectIdMap;
+import org.spearce.jgit.lib.ObjectIdSubclassMap;
 import org.spearce.jgit.lib.ObjectLoader;
 import org.spearce.jgit.lib.PackIndexWriter;
 import org.spearce.jgit.lib.ProgressMonitor;
@@ -160,9 +160,9 @@ public static IndexPack create(final Repository db, final InputStream is)
 
 	private final CRC32 crc = new CRC32();
 
-	private ObjectIdMap<ArrayList<UnresolvedDelta>> baseById;
+	private ObjectIdSubclassMap<DeltaChain> baseById;
 
-	private LongMap<ArrayList<UnresolvedDelta>> baseByPos;
+	private LongMap<UnresolvedDelta> baseByPos;
 
 	private byte[] objectData;
 
@@ -301,8 +301,8 @@ public void index(final ProgressMonitor progress) throws IOException {
 				readPackHeader();
 
 				entries = new PackedObjectInfo[(int) objectCount];
-				baseById = new ObjectIdMap<ArrayList<UnresolvedDelta>>();
-				baseByPos = new LongMap<ArrayList<UnresolvedDelta>>();
+				baseById = new ObjectIdSubclassMap<DeltaChain>();
+				baseByPos = new LongMap<UnresolvedDelta>();
 
 				progress.beginTask(PROGRESS_DOWNLOAD, (int) objectCount);
 				for (int done = 0; done < objectCount; done++) {
@@ -381,7 +381,7 @@ private void resolveDeltas(final ProgressMonitor progress)
 
 	private void resolveDeltas(final PackedObjectInfo oe) throws IOException {
 		final int oldCRC = oe.getCRC();
-		if (baseById.containsKey(oe) || baseByPos.containsKey(oe.getOffset()))
+		if (baseById.get(oe) != null || baseByPos.containsKey(oe.getOffset()))
 			resolveDeltas(oe.getOffset(), oldCRC, Constants.OBJ_BAD, null, oe);
 	}
 
@@ -443,34 +443,45 @@ private void resolveDeltas(final long pos, final int oldCRC, int type,
 		resolveChildDeltas(pos, type, data, oe);
 	}
 
+	private UnresolvedDelta removeBaseById(final AnyObjectId id){
+		final DeltaChain d = baseById.get(id);
+		return d != null ? d.remove() : null;
+	}
+
+	private static UnresolvedDelta reverse(UnresolvedDelta c) {
+		UnresolvedDelta tail = null;
+		while (c != null) {
+			final UnresolvedDelta n = c.next;
+			c.next = tail;
+			tail = c;
+			c = n;
+		}
+		return tail;
+	}
+
 	private void resolveChildDeltas(final long pos, int type, byte[] data,
 			PackedObjectInfo oe) throws IOException {
-		final ArrayList<UnresolvedDelta> a = baseById.remove(oe);
-		final ArrayList<UnresolvedDelta> b = baseByPos.remove(pos);
-		int ai = 0, bi = 0;
-		if (a != null && b != null) {
-			while (ai < a.size() && bi < b.size()) {
-				final UnresolvedDelta ad = a.get(ai);
-				final UnresolvedDelta bd = b.get(bi);
-				if (ad.position < bd.position) {
-					resolveDeltas(ad.position, ad.crc, type, data, null);
-					ai++;
-				} else {
-					resolveDeltas(bd.position, bd.crc, type, data, null);
-					bi++;
-				}
+		UnresolvedDelta a = reverse(removeBaseById(oe));
+		UnresolvedDelta b = reverse(baseByPos.remove(pos));
+		while (a != null && b != null) {
+			if (a.position < b.position) {
+				resolveDeltas(a.position, a.crc, type, data, null);
+				a = a.next;
+			} else {
+				resolveDeltas(b.position, b.crc, type, data, null);
+				b = b.next;
 			}
 		}
-		if (a != null)
-			while (ai < a.size()) {
-				final UnresolvedDelta ad = a.get(ai++);
-				resolveDeltas(ad.position, ad.crc, type, data, null);
-			}
-		if (b != null)
-			while (bi < b.size()) {
-				final UnresolvedDelta bd = b.get(bi++);
-				resolveDeltas(bd.position, bd.crc, type, data, null);
-			}
+		resolveChildDeltaChain(type, data, a);
+		resolveChildDeltaChain(type, data, b);
+	}
+
+	private void resolveChildDeltaChain(final int type, final byte[] data,
+			UnresolvedDelta a) throws IOException {
+		while (a != null) {
+			resolveDeltas(a.position, a.crc, type, data, null);
+			a = a.next;
+		}
 	}
 
 	private void fixThinPack(final ProgressMonitor progress) throws IOException {
@@ -479,11 +490,16 @@ private void fixThinPack(final ProgressMonitor progress) throws IOException {
 		packDigest.reset();
 		originalEOF = packOut.length() - 20;
 		final Deflater def = new Deflater(Deflater.DEFAULT_COMPRESSION, false);
+		final List<DeltaChain> missing = new ArrayList<DeltaChain>(64);
 		long end = originalEOF;
-		for (final ObjectId baseId : new ArrayList<ObjectId>(baseById.keySet())) {
+		for (final DeltaChain baseId : baseById) {
+			if (baseId.head == null)
+				continue;
 			final ObjectLoader ldr = repo.openObject(readCurs, baseId);
-			if (ldr == null)
+			if (ldr == null) {
+				missing.add(baseId);
 				continue;
+			}
 			final byte[] data = ldr.getBytes();
 			final int typeCode = ldr.getType();
 			final PackedObjectInfo oe;
@@ -501,9 +517,9 @@ private void fixThinPack(final ProgressMonitor progress) throws IOException {
 		}
 		def.end();
 
-		if (!baseById.isEmpty()) {
-			final ObjectId need = baseById.keySet().iterator().next();
-			throw new MissingObjectException(need, "delta base");
+		for (final DeltaChain base : missing) {
+			if (base.head != null)
+				throw new MissingObjectException(base, "delta base");
 		}
 
 		fixHeaderFooter(packcsum, packDigest.digest());
@@ -678,13 +694,10 @@ private void indexOneObject() throws IOException {
 				ofs += (c & 127);
 			}
 			final long base = pos - ofs;
-			ArrayList<UnresolvedDelta> r = baseByPos.get(base);
-			if (r == null) {
-				r = new ArrayList<UnresolvedDelta>(8);
-				baseByPos.put(base, r);
-			}
+			final UnresolvedDelta n;
 			skipInflateFromInput(sz);
-			r.add(new UnresolvedDelta(pos, (int) crc.getValue()));
+			n = new UnresolvedDelta(pos, (int) crc.getValue());
+			n.next = baseByPos.put(base, n);
 			deltaCount++;
 			break;
 		}
@@ -693,10 +706,10 @@ private void indexOneObject() throws IOException {
 			crc.update(buf, c, 20);
 			final ObjectId base = ObjectId.fromRaw(buf, c);
 			use(20);
-			ArrayList<UnresolvedDelta> r = baseById.get(base);
+			DeltaChain r = baseById.get(base);
 			if (r == null) {
-				r = new ArrayList<UnresolvedDelta>(8);
-				baseById.put(base, r);
+				r = new DeltaChain(base);
+				baseById.add(r);
 			}
 			skipInflateFromInput(sz);
 			r.add(new UnresolvedDelta(pos, (int) crc.getValue()));
@@ -937,11 +950,33 @@ private static CorruptObjectException corrupt(final DataFormatException dfe) {
 				+ dfe.getMessage());
 	}
 
+	private static class DeltaChain extends ObjectId {
+		UnresolvedDelta head;
+
+		DeltaChain(final AnyObjectId id) {
+			super(id);
+		}
+
+		UnresolvedDelta remove() {
+			final UnresolvedDelta r = head;
+			if (r != null)
+				head = null;
+			return r;
+		}
+
+		void add(final UnresolvedDelta d) {
+			d.next = head;
+			head = d;
+		}
+	}
+
 	private static class UnresolvedDelta {
 		final long position;
 
 		final int crc;
 
+		UnresolvedDelta next;
+
 		UnresolvedDelta(final long headerOffset, final int crc32) {
 			position = headerOffset;
 			crc = crc32;
-- 
1.6.3.rc1.205.g37f8

  reply	other threads:[~2009-04-28 21:15 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-04-28 21:12 [JGIT PATCH 00/13] Misc. bug fixes and cleanups Shawn O. Pearce
2009-04-28 21:12 ` [JGIT PATCH 01/13] Fix performance problem recently introduced to DeltaPackedObjectLoader Shawn O. Pearce
2009-04-28 21:12   ` [JGIT PATCH 02/13] Don't use ByteWindows when checking pack file headers/footers Shawn O. Pearce
2009-04-28 21:12     ` [JGIT PATCH 03/13] Rewrite WindowCache to be easier to follow and maintain Shawn O. Pearce
2009-04-28 21:12       ` [JGIT PATCH 04/13] Document the IllegalArgumentException thrown by WindowCache.reconfigure Shawn O. Pearce
2009-04-28 21:12         ` [JGIT PATCH 05/13] Create the new WindowCache before clearing the old one Shawn O. Pearce
2009-04-28 21:12           ` [JGIT PATCH 06/13] Better handle concurrent reads during a WindowCache reconfiguration Shawn O. Pearce
2009-04-28 21:12             ` [JGIT PATCH 07/13] Clear dead OffsetCache cells when clearing a reference Shawn O. Pearce
2009-04-28 21:12               ` [JGIT PATCH 08/13] Work around Sun JVM bug "Cleared SoftReference not added to queue" Shawn O. Pearce
2009-04-28 21:12                 ` [JGIT PATCH 09/13] Replace inefficient new String(String) constructor to silence FindBugs Shawn O. Pearce
2009-04-28 21:12                   ` [JGIT PATCH 10/13] Replace inefficient new Long(long) " Shawn O. Pearce
2009-04-28 21:12                     ` Shawn O. Pearce [this message]
2009-04-28 21:12                       ` [JGIT PATCH 12/13] Use getCachedBytes in IndexPack to avoid an unnecessary copy Shawn O. Pearce
2009-04-28 21:12                         ` [JGIT PATCH 13/13] Remove ObjectIdMap from the JGit library Shawn O. Pearce
2009-04-29 19:45                     ` [JGIT PATCH 10/13] Replace inefficient new Long(long) constructor to silence FindBugs Robin Rosenberg
2009-04-29 20:42                       ` [PATCH 12/11] Add a test for LongMap, IndexPack's helper class Shawn O. Pearce
2009-04-29 20:10                   ` [JGIT PATCH 09/13] Replace inefficient new String(String) constructor to silence FindBugs Robin Rosenberg
2009-04-29 20:22                     ` Shawn O. Pearce

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1240953146-12878-12-git-send-email-spearce@spearce.org \
    --to=spearce@spearce.org \
    --cc=git@vger.kernel.org \
    --cc=robin.rosenberg@dewire.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).