All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3 1/4] fetch2/git: Add support for fast initial shallow fetch
@ 2025-02-20 17:27 Stefan Koch
  2025-02-20 17:27 ` [PATCH v3 2/4] bitbake-user-manual: Update documentation for fast `BB_GIT_SHALLOW` Stefan Koch
                   ` (4 more replies)
  0 siblings, 5 replies; 25+ messages in thread
From: Stefan Koch @ 2025-02-20 17:27 UTC (permalink / raw)
  To: bitbake-devel
  Cc: stefan-koch, simon.sudler, jan.kiszka, alex.kanavin,
	richard.purdie

When `ud.shallow == 1`:
- Prefer an initial shallow clone over an initial full bare clone,
  while still utilizing any already existing full bare clones.

This improves:
- Resolve timeout issues during initial clones on slow internet connections
  by reducing the amount of data transferred.
- Eliminate the need to use an HTTPS tarball `SRC_URI`
  to reduce data transfer.
- Allow SSH-based authentication (e.g. cert and agent-based) when
  using non-public repos, so additional HTTPS tokens may not be required.

Signed-off-by: Stefan Koch <stefan-koch@siemens.com>
---
 lib/bb/fetch2/git.py | 100 ++++++++++++++++++++++++++++++++-----------
 1 file changed, 74 insertions(+), 26 deletions(-)

diff --git a/lib/bb/fetch2/git.py b/lib/bb/fetch2/git.py
index 6badda597..d5c7f4332 100644
--- a/lib/bb/fetch2/git.py
+++ b/lib/bb/fetch2/git.py
@@ -366,6 +366,33 @@ class Git(FetchMethod):
     def tarball_need_update(self, ud):
         return ud.write_tarballs and not os.path.exists(ud.fullmirror)
 
+    def lfs_fetch(self, ud, d, clonedir, revision, fetchall=False, progresshandler=None):
+        """Helper method for fetching Git LFS data"""
+        try:
+            if self._need_lfs(ud) and self._contains_lfs(ud, d, clonedir) and self._find_git_lfs(d) and len(revision):
+                # Using worktree with the revision because .lfsconfig may exists
+                worktree_add_cmd = "%s worktree add wt %s" % (ud.basecmd, revision)
+                runfetchcmd(worktree_add_cmd, d, log=progresshandler, workdir=clonedir)
+                lfs_fetch_cmd = "%s lfs fetch %s" % (ud.basecmd, "--all" if fetchall else "")
+                runfetchcmd(lfs_fetch_cmd, d, log=progresshandler, workdir=(clonedir + "/wt"))
+                worktree_rem_cmd = "%s worktree remove -f wt" % ud.basecmd
+                runfetchcmd(worktree_rem_cmd, d, log=progresshandler, workdir=clonedir)
+        except:
+            logger.warning("Fetching LFS did not succeed.")
+
+    @contextmanager
+    def create_atomic(self, filename):
+        """Create as a temp file and move atomically into position to avoid races"""
+        fd, tfile = tempfile.mkstemp(dir=os.path.dirname(filename))
+        try:
+            yield tfile
+            umask = os.umask(0o666)
+            os.umask(umask)
+            os.chmod(tfile, (0o666 & ~umask))
+            os.rename(tfile, filename)
+        finally:
+            os.close(fd)
+
     def try_premirror(self, ud, d):
         # If we don't do this, updating an existing checkout with only premirrors
         # is not possible
@@ -446,7 +473,40 @@ class Git(FetchMethod):
             if ud.proto.lower() != 'file':
                 bb.fetch2.check_network_access(d, clone_cmd, ud.url)
             progresshandler = GitProgressHandler(d)
-            runfetchcmd(clone_cmd, d, log=progresshandler)
+
+            # When ud.shallow is enabled:
+            # Try creating an initial shallow clone
+            shallow_fast_state = False
+            if ud.shallow:
+                tempdir = tempfile.mkdtemp(dir=d.getVar('DL_DIR'))
+                shallowclone = os.path.join(tempdir, 'git')
+                try:
+                    self.clone_shallow_local(ud, shallowclone, d)
+                    shallow_fast_state = True
+                except:
+                    logger.warning("Creating initial shallow clone failed, try regular clone now.")
+
+                # When the shallow clone has succeeded:
+                # Create shallow tarball
+                if shallow_fast_state:
+                    logger.info("Creating tarball of git repository")
+                    with self.create_atomic(ud.fullshallow) as tfile:
+                        runfetchcmd("tar -czf %s ." % tfile, d, workdir=shallowclone)
+                    runfetchcmd("touch %s.done" % ud.fullshallow, d)
+
+                # Always cleanup tempdir
+                bb.utils.remove(tempdir, recurse=True)
+
+                # When the shallow clone has succeeded:
+                # Use shallow tarball
+                if shallow_fast_state:
+                    ud.localpath = ud.fullshallow
+                    return
+
+            # When ud.shallow is disabled or the shallow clone failed:
+            # Create an initial regular clone
+            if not shallow_fast_state:
+                runfetchcmd(clone_cmd, d, log=progresshandler)
 
         # Update the checkout if needed
         if self.clonedir_need_update(ud, d):
@@ -509,20 +569,6 @@ class Git(FetchMethod):
                     runfetchcmd("tar -cf - lfs | tar -xf - -C %s" % ud.clonedir, d, workdir="%s/.git" % ud.destdir)
 
     def build_mirror_data(self, ud, d):
-
-        # Create as a temp file and move atomically into position to avoid races
-        @contextmanager
-        def create_atomic(filename):
-            fd, tfile = tempfile.mkstemp(dir=os.path.dirname(filename))
-            try:
-                yield tfile
-                umask = os.umask(0o666)
-                os.umask(umask)
-                os.chmod(tfile, (0o666 & ~umask))
-                os.rename(tfile, filename)
-            finally:
-                os.close(fd)
-
         if ud.shallow and ud.write_shallow_tarballs:
             if not os.path.exists(ud.fullshallow):
                 if os.path.islink(ud.fullshallow):
@@ -533,7 +579,7 @@ class Git(FetchMethod):
                     self.clone_shallow_local(ud, shallowclone, d)
 
                     logger.info("Creating tarball of git repository")
-                    with create_atomic(ud.fullshallow) as tfile:
+                    with self.create_atomic(ud.fullshallow) as tfile:
                         runfetchcmd("tar -czf %s ." % tfile, d, workdir=shallowclone)
                     runfetchcmd("touch %s.done" % ud.fullshallow, d)
                 finally:
@@ -543,7 +589,7 @@ class Git(FetchMethod):
                 os.unlink(ud.fullmirror)
 
             logger.info("Creating tarball of git repository")
-            with create_atomic(ud.fullmirror) as tfile:
+            with self.create_atomic(ud.fullmirror) as tfile:
                 mtime = runfetchcmd("{} log --all -1 --format=%cD".format(ud.basecmd), d,
                         quiet=True, workdir=ud.clonedir)
                 runfetchcmd("tar -czf %s --owner oe:0 --group oe:0 --mtime \"%s\" ."
@@ -557,12 +603,20 @@ class Git(FetchMethod):
         - For BB_GIT_SHALLOW_REVS: git fetch --shallow-exclude=<revs> rev
         """
 
+        progresshandler = GitProgressHandler(d)
+        repourl = self._get_repo_url(ud)
         bb.utils.mkdirhier(dest)
         init_cmd = "%s init -q" % ud.basecmd
         if ud.bareclone:
             init_cmd += " --bare"
         runfetchcmd(init_cmd, d, workdir=dest)
-        runfetchcmd("%s remote add origin %s" % (ud.basecmd, ud.clonedir), d, workdir=dest)
+        # Use repourl when creating a fast initial shallow clone
+        # Prefer already existing full bare clones if available
+        if ud.shallow and not os.path.exists(ud.clonedir):
+            remote = shlex.quote(repourl)
+        else:
+            remote = ud.clonedir
+        runfetchcmd("%s remote add origin %s" % (ud.basecmd, remote), d, workdir=dest)
 
         # Check the histories which should be excluded
         shallow_exclude = ''
@@ -595,15 +649,10 @@ class Git(FetchMethod):
             if shallow_exclude:
                 fetch_cmd += shallow_exclude
 
-            # Advertise the revision for lower version git such as 2.25.1:
-            # error: Server does not allow request for unadvertised object.
-            # The ud.clonedir is a local temporary dir, will be removed when
-            # fetch is done, so we can do anything on it.
-            adv_cmd = 'git branch -f advertise-%s %s' % (revision, revision)
-            runfetchcmd(adv_cmd, d, workdir=ud.clonedir)
-
             runfetchcmd(fetch_cmd, d, workdir=dest)
             runfetchcmd("%s update-ref %s %s" % (ud.basecmd, ref, revision), d, workdir=dest)
+            # Fetch Git LFS data
+            self.lfs_fetch(ud, d, dest, ud.revisions[ud.names[0]])
 
         # Apply extra ref wildcards
         all_refs_remote = runfetchcmd("%s ls-remote origin 'refs/*'" % ud.basecmd, \
@@ -629,7 +678,6 @@ class Git(FetchMethod):
             runfetchcmd("%s update-ref %s %s" % (ud.basecmd, ref, revision), d, workdir=dest)
 
         # The url is local ud.clonedir, set it to upstream one
-        repourl = self._get_repo_url(ud)
         runfetchcmd("%s remote set-url origin %s" % (ud.basecmd, shlex.quote(repourl)), d, workdir=dest)
 
     def unpack(self, ud, destdir, d):
-- 
2.39.5



^ permalink raw reply related	[flat|nested] 25+ messages in thread

end of thread, other threads:[~2025-02-27 16:52 UTC | newest]

Thread overview: 25+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-02-20 17:27 [PATCH v3 1/4] fetch2/git: Add support for fast initial shallow fetch Stefan Koch
2025-02-20 17:27 ` [PATCH v3 2/4] bitbake-user-manual: Update documentation for fast `BB_GIT_SHALLOW` Stefan Koch
2025-02-21 10:52   ` [bitbake-devel] " Quentin Schulz
2025-02-21 16:23     ` Koch, Stefan
2025-02-21 16:31       ` Quentin Schulz
2025-02-21 16:35         ` Koch, Stefan
2025-02-21 16:47           ` Quentin Schulz
2025-02-20 17:27 ` [PATCH v3 3/4] tests/fetch: Adapt test cases for fast shallow fetches Stefan Koch
2025-02-20 17:27 ` [PATCH v3 4/4] tests/fetch: Add an additional test case to check whether the fast fetch is shallow Stefan Koch
2025-02-21  8:09 ` [PATCH v3 1/4] fetch2/git: Add support for fast initial shallow fetch Richard Purdie
2025-02-21 16:40   ` Koch, Stefan
2025-02-21 17:11     ` Richard Purdie
2025-02-21 17:18       ` Koch, Stefan
2025-02-21 17:22         ` Richard Purdie
2025-02-21 17:31           ` Koch, Stefan
2025-02-21 17:35             ` Richard Purdie
2025-02-24 11:58 ` [bitbake-devel] " Mathieu Dubois-Briand
2025-02-24 12:55   ` Koch, Stefan
2025-02-24 13:11     ` Koch, Stefan
2025-02-24 17:10       ` Mathieu Dubois-Briand
2025-02-25 17:04         ` Koch, Stefan
2025-02-26  8:39           ` Mathieu Dubois-Briand
2025-02-26  8:58             ` Richard Purdie
2025-02-27 16:52               ` Koch, Stefan
2025-02-26  9:24             ` Mathieu Dubois-Briand

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.