All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] git-p4: improve performance with large files
@ 2009-03-04 21:54 Sam Hocevar
  2009-03-04 23:05 ` thestar
  0 siblings, 1 reply; 15+ messages in thread
From: Sam Hocevar @ 2009-03-04 21:54 UTC (permalink / raw)
  To: git

   The current git-p4 way of concatenating strings performs in O(n^2)
and is therefore terribly slow with large files because of unnecessary
memory copies. The following patch makes the operation O(n).

   Using this patch, importing a 17GB repository with large files
(50 to 500MB) takes 2 hours instead of a week.

Signed-off-by: Sam Hocevar <sam@zoy.org>
---
 contrib/fast-import/git-p4 |    5 +++--
 1 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/contrib/fast-import/git-p4 b/contrib/fast-import/git-p4
index 9fdb0c6..09e9746 100755
--- a/contrib/fast-import/git-p4
+++ b/contrib/fast-import/git-p4
@@ -990,11 +990,12 @@ class P4Sync(Command):
         while j < len(filedata):
             stat = filedata[j]
             j += 1
-            text = ''
+            data = []
             while j < len(filedata) and filedata[j]['code'] in ('text', 'unicode', 'binary'):
-                text += filedata[j]['data']
+                data.append(filedata[j]['data'])
                 del filedata[j]['data']
                 j += 1
+            text = "".join(data)
 
             if not stat.has_key('depotFile'):
                 sys.stderr.write("p4 print fails with: %s\n" % repr(stat))
-- 
1.6.1.3

^ permalink raw reply related	[flat|nested] 15+ messages in thread
* [PATCH] git-p4: remove unnecessary semicolons at end of lines
@ 2009-03-06 15:53 Sam Hocevar
  2009-03-06 16:55 ` Brandon Casey
  2009-03-07 12:26 ` [PATCH v2] git-p4: remove unnecessary semicolons at end of lines Sam Hocevar
  0 siblings, 2 replies; 15+ messages in thread
From: Sam Hocevar @ 2009-03-06 15:53 UTC (permalink / raw)
  To: git

   This is a purely cosmetic patch that makes the git-p4 code look more
pythonish by getting rid of end-of-line semicolons.


git-p4: remove unnecessary semicolons at end of lines.

Signed-off-by: Sam Hocevar <sam@zoy.org>
---
 contrib/fast-import/git-p4 |   46 ++++++++++++++++++++++----------------------
 1 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/contrib/fast-import/git-p4 b/contrib/fast-import/git-p4
index 3832f60..7ea5ac6 100755
--- a/contrib/fast-import/git-p4
+++ b/contrib/fast-import/git-p4
@@ -12,7 +12,7 @@ import optparse, sys, os, marshal, popen2, subprocess, shelve
 import tempfile, getopt, sha, os.path, time, platform
 import re

-from sets import Set;
+from sets import Set

 verbose = False

@@ -240,7 +240,7 @@ def p4Cmd(cmd):
     result = {}
     for entry in list:
         result.update(entry)
-    return result;
+    return result

 def p4Where(depotPath):
     if not depotPath.endswith("/"):
@@ -281,7 +281,7 @@ def currentGitBranch():
 def isValidGitDir(path):
     if (os.path.exists(path + "/HEAD")
         and os.path.exists(path + "/refs") and os.path.exists(path + "/objects")):
-        return True;
+        return True
     return False

 def parseRevision(ref):
@@ -328,8 +328,8 @@ def extractSettingsGitLog(log):

 def gitBranchExists(branch):
     proc = subprocess.Popen(["git", "rev-parse", branch],
-                            stderr=subprocess.PIPE, stdout=subprocess.PIPE);
-    return proc.wait() == 0;
+                            stderr=subprocess.PIPE, stdout=subprocess.PIPE)
+    return proc.wait() == 0

 _gitConfig = {}
 def gitConfig(key):
@@ -492,7 +492,7 @@ class P4RollBack(Command):
         maxChange = int(args[0])

         if "p4ExitCode" in p4Cmd("changes -m 1"):
-            die("Problems executing p4");
+            die("Problems executing p4")

         if self.rollbackLocalBranches:
             refPrefix = "refs/heads/"
@@ -663,7 +663,7 @@ class P4Submit(Command):
             if response == "s":
                 print "Skipping! Good luck with the next patches..."
                 for f in editedFiles:
-                    p4_system("revert \"%s\"" % f);
+                    p4_system("revert \"%s\"" % f)
                 for f in filesToAdd:
                     system("rm %s" %f)
                 return
@@ -734,7 +734,7 @@ class P4Submit(Command):
             if os.environ.has_key("P4EDITOR"):
                 editor = os.environ.get("P4EDITOR")
             else:
-                editor = os.environ.get("EDITOR", defaultEditor);
+                editor = os.environ.get("EDITOR", defaultEditor)
             system(editor + " " + fileName)

             response = "y"
@@ -753,9 +753,9 @@ class P4Submit(Command):
                 p4_write_pipe("submit -i", submitTemplate)
             else:
                 for f in editedFiles:
-                    p4_system("revert \"%s\"" % f);
+                    p4_system("revert \"%s\"" % f)
                 for f in filesToAdd:
-                    p4_system("revert \"%s\"" % f);
+                    p4_system("revert \"%s\"" % f)
                     system("rm %s" %f)

             os.remove(fileName)
@@ -977,9 +977,9 @@ class P4Sync(Command):

             if "p4ExitCode" in filedata[0]:
                 die("Problems executing p4. Error: [%d]."
-                    % (filedata[0]['p4ExitCode']));
+                    % (filedata[0]['p4ExitCode']))

-        j = 0;
+        j = 0
         contents = {}
         while j < len(filedata):
             stat = filedata[j]
@@ -1303,8 +1303,8 @@ class P4Sync(Command):
     def importNewBranch(self, branch, maxChange):
         # make fast-import flush all changes to disk and update the refs using the checkpoint
         # command so that we can try to find the branch parent in the git history
-        self.gitStream.write("checkpoint\n\n");
-        self.gitStream.flush();
+        self.gitStream.write("checkpoint\n\n")
+        self.gitStream.flush()
         branchPrefix = self.depotPaths[0] + branch + "/"
         range = "@1,%s" % maxChange
         #print "prefix" + branchPrefix
@@ -1364,12 +1364,12 @@ class P4Sync(Command):
                                 fullBranch = self.projectName + branch
                                 if fullBranch not in self.p4BranchesInGit:
                                     if not self.silent:
-                                        print("\n    Importing new branch %s" % fullBranch);
+                                        print("\n    Importing new branch %s" % fullBranch)
                                     if self.importNewBranch(branch, change - 1):
                                         parent = ""
                                         self.p4BranchesInGit.append(fullBranch)
                                     if not self.silent:
-                                        print("\n    Resuming with change %s" % change);
+                                        print("\n    Resuming with change %s" % change)

                                 if self.verbose:
                                     print "parent determined through known branches: %s" % parent
@@ -1485,7 +1485,7 @@ class P4Sync(Command):
             self.branch = self.refPrefix + "master"
             if gitBranchExists("refs/heads/p4") and self.importIntoRemotes:
                 system("git update-ref %s refs/heads/p4" % self.branch)
-                system("git branch -D p4");
+                system("git branch -D p4")
             # create it /after/ importing, when master exists
             if not gitBranchExists(self.refPrefix + "HEAD") and self.importIntoRemotes and gitBranchExists(self.branch):
                 system("git symbolic-ref %sHEAD %s" % (self.refPrefix, self.branch))
@@ -1591,7 +1591,7 @@ class P4Sync(Command):
         self.loadUserMapFromCache()
         self.labels = {}
         if self.detectLabels:
-            self.getLabels();
+            self.getLabels()

         if self.detectBranches:
             ## FIXME - what's a P4 projectName ?
@@ -1615,7 +1615,7 @@ class P4Sync(Command):

         importProcess = subprocess.Popen(["git", "fast-import"],
                                          stdin=subprocess.PIPE, stdout=subprocess.PIPE,
-                                         stderr=subprocess.PIPE);
+                                         stderr=subprocess.PIPE)
         self.gitOutput = importProcess.stdout
         self.gitStream = importProcess.stdin
         self.gitError = importProcess.stderr
@@ -1688,9 +1688,9 @@ class P4Rebase(Command):

     def rebase(self):
         if os.system("git update-index --refresh") != 0:
-            die("Some files in your working directory are modified and different than what is in your index. You can use git update-index <filename> to bring the index up-to-date or stash away all your changes with git stash.");
+            die("Some files in your working directory are modified and different than what is in your index. You can use git update-index <filename> to bring the index up-to-date or stash away all your changes with git stash.")
         if len(read_pipe("git diff-index HEAD --")) > 0:
-            die("You have uncommited changes. Please commit them before rebasing or stash them away with git stash.");
+            die("You have uncommited changes. Please commit them before rebasing or stash them away with git stash.")

         [upstream, settings] = findUpstreamBranchPoint()
         if len(upstream) == 0:
@@ -1866,7 +1866,7 @@ def main():
                                        description = cmd.description,
                                        formatter = HelpFormatter())

-        (cmd, args) = parser.parse_args(sys.argv[2:], cmd);
+        (cmd, args) = parser.parse_args(sys.argv[2:], cmd)
     global verbose
     verbose = cmd.verbose
     if cmd.needsGit:
@@ -1877,7 +1877,7 @@ def main():
                 if os.path.exists(cmd.gitdir):
                     cdup = read_pipe("git rev-parse --show-cdup").strip()
                     if len(cdup) > 0:
-                        chdir(cdup);
+                        chdir(cdup)

         if not isValidGitDir(cmd.gitdir):
             if isValidGitDir(cmd.gitdir + "/.git"):

-- 
Sam.

^ permalink raw reply related	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2009-03-07 12:33 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-03-04 21:54 [PATCH] git-p4: improve performance with large files Sam Hocevar
2009-03-04 23:05 ` thestar
2009-03-05 17:23   ` Sam Hocevar
2009-03-06  0:01     ` thestar
2009-03-06  1:14     ` Junio C Hamano
2009-03-06  1:25       ` Han-Wen Nienhuys
2009-03-06  8:53         ` Sam Hocevar
2009-03-06  9:42           ` Junio C Hamano
2009-03-06 10:13             ` [PATCH v4] " Sam Hocevar
2009-03-07 12:25               ` [PATCH v5] git-p4: improve performance when importing huge files by reducing the number of string concatenations while constraining memory usage Sam Hocevar
  -- strict thread matches above, loose matches on Subject: below --
2009-03-06 15:53 [PATCH] git-p4: remove unnecessary semicolons at end of lines Sam Hocevar
2009-03-06 16:55 ` Brandon Casey
2009-03-06 17:11   ` msysgit corrupting commit messages? Sam Hocevar
2009-03-07  2:48     ` Johannes Schindelin
2009-03-07 12:26 ` [PATCH v2] git-p4: remove unnecessary semicolons at end of lines Sam Hocevar

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.