* [RFC][PATCH V2] bitbake: fetch2/git: Use git fetch to shallow clone revisions
@ 2022-08-26 13:10 Robert Yang
2022-08-26 14:21 ` [bitbake-devel] " Peter Kjellerstedt
0 siblings, 1 reply; 5+ messages in thread
From: Robert Yang @ 2022-08-26 13:10 UTC (permalink / raw)
To: bitbake-devel
* V2
Fixed typos in commit message
The "git clone --depth" only works for refs, doesn't support revisions, but
"git fetch --depth" supports revisions, so use it to do the shallow clone, the
idea is from "git clone --recurse-submodules --shallow-submodules".
The workflow is (Only enabled when BB_GIT_SHALLOW = "1"):
$ git init --bare <clonedir>
$ git remote add origin <url>
$ git fetch origin --depth <depth> revision
$ git branch <branchname> FETCH_HEAD
$ git tag v<branchname> FETCH_HEAD
Here is the testing data based on poky, the testing server has a very good
network bandwidth:
Add 'BB_GIT_SHALLOW = "1"' conf/local.conf
$ rm -fr tmp downloads # Fresh download for each build
$ time bitbake world --runall=fetch
$ du -sh downloads/git2/
Full Shallow Saved
--------------------------------------
Time: 15m59s 2m31s 84% (13m28s)
Size: 12G 1.2G 90% (10.8G)
* The Size is for downloads/git2/, the tarballs are not counted.
We can see that it saves a lot of download time and disk space, for example:
linux-yocto: 2.8G -> 228M
llvm: 2.5G -> 171M
cryptography: 1.5G -> 35M
And "$ bitbake world" works well.
This a RFC patch, please feel free to give you comments.
Signed-off-by: Robert Yang <liezhi.yang@windriver.com>
---
bitbake/lib/bb/fetch2/git.py | 83 ++++++++++++++++++++++++++++--------
1 file changed, 66 insertions(+), 17 deletions(-)
diff --git a/bitbake/lib/bb/fetch2/git.py b/bitbake/lib/bb/fetch2/git.py
index 4534bd75800..57bb61d5ee1 100644
--- a/bitbake/lib/bb/fetch2/git.py
+++ b/bitbake/lib/bb/fetch2/git.py
@@ -244,6 +244,7 @@ class Git(FetchMethod):
ud.unresolvedrev[name] = 'HEAD'
ud.basecmd = d.getVar("FETCHCMD_git") or "git -c core.fsyncobjectfiles=0 -c gc.autoDetach=false -c core.pager=cat"
+ ud.basecmd = "LANG=C %s" % ud.basecmd
write_tarballs = d.getVar("BB_GENERATE_MIRROR_TARBALLS") or "0"
ud.write_tarballs = write_tarballs != "0" or ud.rebaseable
@@ -344,6 +345,49 @@ class Git(FetchMethod):
return False
return True
+ def shallow_clone_by_fetch(self, ud, repourl, d):
+ """
+ Use "git fetch --depth <depth> revision" to implement shallow clone
+ since git can't clone a revision, a better solution should be:
+ "git fetch --depth <depth> revision:<branchname>" but it doesn't work
+ when revision is a tag, e.g.:
+ error: cannot update ref 'refs/heads/master': trying to write
+ non-commit object <revision> to branch 'refs/heads/master'
+ """
+
+ import datetime
+
+ depth = ud.shallow_depths[ud.names[0]]
+ revision = ud.revisions[ud.names[0]]
+ branchname = ud.branches[ud.names[0]]
+ if not branchname:
+ branchname = "master"
+
+ # Rename branchname if it exists which can:
+ # - Avoid conflicts during update
+ # - Keep the revision on a branch so that "git submodule update --recursive"
+ # can work since it requires the revision on a branch.
+ branch_path = os.path.join(ud.clonedir, 'refs/heads/%s' % branchname)
+ if os.path.exists(branch_path):
+ os.rename(branch_path, '%s.%s' % (branch_path, datetime.datetime.now().strftime("%Y%m%d%H%M%S")))
+
+ init_cmd = "%s init --bare -q" % ud.basecmd
+ add_remote_cmd = "%s remote add origin %s" % (ud.basecmd, shlex.quote(repourl))
+ fetch_cmd = "%s fetch --progress origin --depth %s %s" % (ud.basecmd, depth, revision)
+ # Create both branch and tag for the revision
+ branch_cmd = "%s branch -f %s FETCH_HEAD" % (ud.basecmd, branchname)
+ tag_cmd = "%s tag -f v%s FETCH_HEAD" % (ud.basecmd, branchname)
+
+ if ud.proto.lower() != 'file':
+ bb.fetch2.check_network_access(d, fetch_cmd, ud.url)
+
+ if not os.path.exists(ud.clonedir):
+ bb.utils.mkdirhier(ud.clonedir)
+
+ progresshandler = GitProgressHandler(d)
+ for cmd in (init_cmd, add_remote_cmd, fetch_cmd, branch_cmd, tag_cmd):
+ runfetchcmd(cmd, d, log=progresshandler, workdir=ud.clonedir)
+
def download(self, ud, d):
"""Fetch url"""
@@ -360,7 +404,7 @@ class Git(FetchMethod):
else:
tmpdir = tempfile.mkdtemp(dir=d.getVar('DL_DIR'))
runfetchcmd("tar -xzf %s" % ud.fullmirror, d, workdir=tmpdir)
- fetch_cmd = "LANG=C %s fetch -f --progress %s " % (ud.basecmd, shlex.quote(tmpdir))
+ fetch_cmd = "%s fetch -f --progress %s " % (ud.basecmd, shlex.quote(tmpdir))
runfetchcmd(fetch_cmd, d, workdir=ud.clonedir)
repourl = self._get_repo_url(ud)
@@ -369,27 +413,32 @@ class Git(FetchMethod):
# We do this since git will use a "-l" option automatically for local urls where possible
if repourl.startswith("file://"):
repourl = repourl[7:]
- clone_cmd = "LANG=C %s clone --bare --mirror %s %s --progress" % (ud.basecmd, shlex.quote(repourl), ud.clonedir)
- if ud.proto.lower() != 'file':
- bb.fetch2.check_network_access(d, clone_cmd, ud.url)
- progresshandler = GitProgressHandler(d)
- runfetchcmd(clone_cmd, d, log=progresshandler)
+ if ud.shallow:
+ self.shallow_clone_by_fetch(ud, repourl, d)
+ else:
+ clone_cmd = "%s clone --bare --mirror %s %s --progress" % (ud.basecmd, shlex.quote(repourl), ud.clonedir)
+ progresshandler = GitProgressHandler(d)
+ if ud.proto.lower() != 'file':
+ bb.fetch2.check_network_access(d, clone_cmd, ud.url)
+ runfetchcmd(clone_cmd, d, log=progresshandler)
# Update the checkout if needed
if self.clonedir_need_update(ud, d):
output = runfetchcmd("%s remote" % ud.basecmd, d, quiet=True, workdir=ud.clonedir)
if "origin" in output:
- runfetchcmd("%s remote rm origin" % ud.basecmd, d, workdir=ud.clonedir)
-
- runfetchcmd("%s remote add --mirror=fetch origin %s" % (ud.basecmd, shlex.quote(repourl)), d, workdir=ud.clonedir)
- fetch_cmd = "LANG=C %s fetch -f --progress %s refs/*:refs/*" % (ud.basecmd, shlex.quote(repourl))
- if ud.proto.lower() != 'file':
- bb.fetch2.check_network_access(d, fetch_cmd, ud.url)
- progresshandler = GitProgressHandler(d)
- runfetchcmd(fetch_cmd, d, log=progresshandler, workdir=ud.clonedir)
- runfetchcmd("%s prune-packed" % ud.basecmd, d, workdir=ud.clonedir)
- runfetchcmd("%s pack-refs --all" % ud.basecmd, d, workdir=ud.clonedir)
- runfetchcmd("%s pack-redundant --all | xargs -r rm" % ud.basecmd, d, workdir=ud.clonedir)
+ runfetchcmd("%s remote rm origin" % ud.basecmd, d, workdir=ud.clonedir)
+ if ud.shallow:
+ self.shallow_clone_by_fetch(ud, repourl, d)
+ else:
+ runfetchcmd("%s remote add --mirror=fetch origin %s" % (ud.basecmd, shlex.quote(repourl)), d, workdir=ud.clonedir)
+ fetch_cmd = "%s fetch -f --progress %s refs/*:refs/*" % (ud.basecmd, shlex.quote(repourl))
+ if ud.proto.lower() != 'file':
+ bb.fetch2.check_network_access(d, fetch_cmd, ud.url)
+ progresshandler = GitProgressHandler(d)
+ runfetchcmd(fetch_cmd, d, log=progresshandler, workdir=ud.clonedir)
+ runfetchcmd("%s prune-packed" % ud.basecmd, d, workdir=ud.clonedir)
+ runfetchcmd("%s pack-refs --all" % ud.basecmd, d, workdir=ud.clonedir)
+ runfetchcmd("%s pack-redundant --all | xargs -r rm" % ud.basecmd, d, workdir=ud.clonedir)
try:
os.unlink(ud.fullmirror)
except OSError as exc:
--
2.35.1
^ permalink raw reply related [flat|nested] 5+ messages in thread* RE: [bitbake-devel] [RFC][PATCH V2] bitbake: fetch2/git: Use git fetch to shallow clone revisions 2022-08-26 13:10 [RFC][PATCH V2] bitbake: fetch2/git: Use git fetch to shallow clone revisions Robert Yang @ 2022-08-26 14:21 ` Peter Kjellerstedt 2022-08-27 3:36 ` Robert Yang 0 siblings, 1 reply; 5+ messages in thread From: Peter Kjellerstedt @ 2022-08-26 14:21 UTC (permalink / raw) To: Robert Yang, bitbake-devel@lists.openembedded.org > -----Original Message----- > From: bitbake-devel@lists.openembedded.org <bitbake- > devel@lists.openembedded.org> On Behalf Of Robert Yang > Sent: den 26 augusti 2022 15:11 > To: bitbake-devel@lists.openembedded.org > Subject: [bitbake-devel] [RFC][PATCH V2] bitbake: fetch2/git: Use git > fetch to shallow clone revisions > > * V2 > Fixed typos in commit message Patch history should go after the --- below. > The "git clone --depth" only works for refs, doesn't support revisions, but > "git fetch --depth" supports revisions, so use it to do the shallow clone, the > idea is from "git clone --recurse-submodules --shallow-submodules". > > The workflow is (Only enabled when BB_GIT_SHALLOW = "1"): > $ git init --bare <clonedir> > $ git remote add origin <url> > $ git fetch origin --depth <depth> revision > $ git branch <branchname> FETCH_HEAD > $ git tag v<branchname> FETCH_HEAD > > Here is the testing data based on poky, the testing server has a very good > network bandwidth: > > Add 'BB_GIT_SHALLOW = "1"' conf/local.conf > $ rm -fr tmp downloads # Fresh download for each build > $ time bitbake world --runall=fetch > $ du -sh downloads/git2/ > > Full Shallow Saved > -------------------------------------- > Time: 15m59s 2m31s 84% (13m28s) > Size: 12G 1.2G 90% (10.8G) > > * The Size is for downloads/git2/, the tarballs are not counted. > > We can see that it saves a lot of download time and disk space, for > example: > > linux-yocto: 2.8G -> 228M > llvm: 2.5G -> 171M > cryptography: 1.5G -> 35M > > And "$ bitbake world" works well. > > This a RFC patch, please feel free to give you comments. > > Signed-off-by: Robert Yang <liezhi.yang@windriver.com> > --- > bitbake/lib/bb/fetch2/git.py | 83 ++++++++++++++++++++++++++++-------- > 1 file changed, 66 insertions(+), 17 deletions(-) > > diff --git a/bitbake/lib/bb/fetch2/git.py b/bitbake/lib/bb/fetch2/git.py > index 4534bd75800..57bb61d5ee1 100644 > --- a/bitbake/lib/bb/fetch2/git.py > +++ b/bitbake/lib/bb/fetch2/git.py > @@ -244,6 +244,7 @@ class Git(FetchMethod): > ud.unresolvedrev[name] = 'HEAD' > > ud.basecmd = d.getVar("FETCHCMD_git") or "git -c core.fsyncobjectfiles=0 -c gc.autoDetach=false -c core.pager=cat" > + ud.basecmd = "LANG=C %s" % ud.basecmd > > write_tarballs = d.getVar("BB_GENERATE_MIRROR_TARBALLS") or "0" > ud.write_tarballs = write_tarballs != "0" or ud.rebaseable > @@ -344,6 +345,49 @@ class Git(FetchMethod): > return False > return True > > + def shallow_clone_by_fetch(self, ud, repourl, d): > + """ > + Use "git fetch --depth <depth> revision" to implement shallow clone > + since git can't clone a revision, a better solution should be: > + "git fetch --depth <depth> revision:<branchname>" but it doesn't work > + when revision is a tag, e.g.: > + error: cannot update ref 'refs/heads/master': trying to write > + non-commit object <revision> to branch 'refs/heads/master' > + """ > + > + import datetime > + > + depth = ud.shallow_depths[ud.names[0]] > + revision = ud.revisions[ud.names[0]] > + branchname = ud.branches[ud.names[0]] > + if not branchname: > + branchname = "master" > + > + # Rename branchname if it exists which can: > + # - Avoid conflicts during update > + # - Keep the revision on a branch so that "git submodule update --recursive" > + # can work since it requires the revision on a branch. > + branch_path = os.path.join(ud.clonedir, 'refs/heads/%s' % branchname) > + if os.path.exists(branch_path): > + os.rename(branch_path, '%s.%s' % (branch_path, datetime.datetime.now().strftime("%Y%m%d%H%M%S"))) Any reason this is done using os.rename() rather than `git branch -m? > + > + init_cmd = "%s init --bare -q" % ud.basecmd > + add_remote_cmd = "%s remote add origin %s" % (ud.basecmd, shlex.quote(repourl)) > + fetch_cmd = "%s fetch --progress origin --depth %s %s" % (ud.basecmd, depth, revision) > + # Create both branch and tag for the revision > + branch_cmd = "%s branch -f %s FETCH_HEAD" % (ud.basecmd, branchname) > + tag_cmd = "%s tag -f v%s FETCH_HEAD" % (ud.basecmd, branchname) Why not define these as a list instead: cmds = [ "%s init --bare -q" % ud.basecmd, "%s remote add origin %s" % (ud.basecmd, shlex.quote(repourl)) "%s fetch --progress origin --depth %s %s" % (ud.basecmd, depth, revision), # Create both branch and tag for the revision "%s branch -f %s FETCH_HEAD" % (ud.basecmd, branchname), "%s tag -f v%s FETCH_HEAD" % (ud.basecmd, branchname), ] > + > + if ud.proto.lower() != 'file': > + bb.fetch2.check_network_access(d, fetch_cmd, ud.url) > + > + if not os.path.exists(ud.clonedir): > + bb.utils.mkdirhier(ud.clonedir) > + > + progresshandler = GitProgressHandler(d) > + for cmd in (init_cmd, add_remote_cmd, fetch_cmd, branch_cmd, tag_cmd): > + runfetchcmd(cmd, d, log=progresshandler, workdir=ud.clonedir) > + > def download(self, ud, d): > """Fetch url""" > > @@ -360,7 +404,7 @@ class Git(FetchMethod): > else: > tmpdir = tempfile.mkdtemp(dir=d.getVar('DL_DIR')) > runfetchcmd("tar -xzf %s" % ud.fullmirror, d, workdir=tmpdir) > - fetch_cmd = "LANG=C %s fetch -f --progress %s " % (ud.basecmd, shlex.quote(tmpdir)) > + fetch_cmd = "%s fetch -f --progress %s " % (ud.basecmd, shlex.quote(tmpdir)) > runfetchcmd(fetch_cmd, d, workdir=ud.clonedir) > repourl = self._get_repo_url(ud) > > @@ -369,27 +413,32 @@ class Git(FetchMethod): > # We do this since git will use a "-l" option automatically for local urls where possible > if repourl.startswith("file://"): > repourl = repourl[7:] > - clone_cmd = "LANG=C %s clone --bare --mirror %s %s --progress" % (ud.basecmd, shlex.quote(repourl), ud.clonedir) > - if ud.proto.lower() != 'file': > - bb.fetch2.check_network_access(d, clone_cmd, ud.url) > - progresshandler = GitProgressHandler(d) > - runfetchcmd(clone_cmd, d, log=progresshandler) > + if ud.shallow: > + self.shallow_clone_by_fetch(ud, repourl, d) > + else: > + clone_cmd = "%s clone --bare --mirror %s %s --progress" % (ud.basecmd, shlex.quote(repourl), ud.clonedir) > + progresshandler = GitProgressHandler(d) > + if ud.proto.lower() != 'file': > + bb.fetch2.check_network_access(d, clone_cmd, ud.url) > + runfetchcmd(clone_cmd, d, log=progresshandler) > > # Update the checkout if needed > if self.clonedir_need_update(ud, d): > output = runfetchcmd("%s remote" % ud.basecmd, d, quiet=True, workdir=ud.clonedir) > if "origin" in output: > - runfetchcmd("%s remote rm origin" % ud.basecmd, d, workdir=ud.clonedir) > - > - runfetchcmd("%s remote add --mirror=fetch origin %s" % (ud.basecmd, shlex.quote(repourl)), d, workdir=ud.clonedir) > - fetch_cmd = "LANG=C %s fetch -f --progress %s refs/*:refs/*" % (ud.basecmd, shlex.quote(repourl)) > - if ud.proto.lower() != 'file': > - bb.fetch2.check_network_access(d, fetch_cmd, ud.url) > - progresshandler = GitProgressHandler(d) > - runfetchcmd(fetch_cmd, d, log=progresshandler, workdir=ud.clonedir) > - runfetchcmd("%s prune-packed" % ud.basecmd, d, workdir=ud.clonedir) > - runfetchcmd("%s pack-refs --all" % ud.basecmd, d, workdir=ud.clonedir) > - runfetchcmd("%s pack-redundant --all | xargs -r rm" % ud.basecmd, d, workdir=ud.clonedir) > + runfetchcmd("%s remote rm origin" % ud.basecmd, d, workdir=ud.clonedir) > + if ud.shallow: > + self.shallow_clone_by_fetch(ud, repourl, d) > + else: > + runfetchcmd("%s remote add --mirror=fetch origin %s" % (ud.basecmd, shlex.quote(repourl)), d, workdir=ud.clonedir) > + fetch_cmd = "%s fetch -f --progress %s refs/*:refs/*" % (ud.basecmd, shlex.quote(repourl)) > + if ud.proto.lower() != 'file': > + bb.fetch2.check_network_access(d, fetch_cmd, ud.url) > + progresshandler = GitProgressHandler(d) > + runfetchcmd(fetch_cmd, d, log=progresshandler, workdir=ud.clonedir) > + runfetchcmd("%s prune-packed" % ud.basecmd, d, workdir=ud.clonedir) > + runfetchcmd("%s pack-refs --all" % ud.basecmd, d, workdir=ud.clonedir) > + runfetchcmd("%s pack-redundant --all | xargs -r rm" % ud.basecmd, d, workdir=ud.clonedir) > try: > os.unlink(ud.fullmirror) > except OSError as exc: > -- > 2.35.1 //Peter ^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [bitbake-devel] [RFC][PATCH V2] bitbake: fetch2/git: Use git fetch to shallow clone revisions 2022-08-26 14:21 ` [bitbake-devel] " Peter Kjellerstedt @ 2022-08-27 3:36 ` Robert Yang 2022-08-29 10:46 ` Peter Kjellerstedt 0 siblings, 1 reply; 5+ messages in thread From: Robert Yang @ 2022-08-27 3:36 UTC (permalink / raw) To: Peter Kjellerstedt, bitbake-devel@lists.openembedded.org Hi Peter, On 8/26/22 22:21, Peter Kjellerstedt wrote: >> -----Original Message----- >> From: bitbake-devel@lists.openembedded.org <bitbake- >> devel@lists.openembedded.org> On Behalf Of Robert Yang >> Sent: den 26 augusti 2022 15:11 >> To: bitbake-devel@lists.openembedded.org >> Subject: [bitbake-devel] [RFC][PATCH V2] bitbake: fetch2/git: Use git >> fetch to shallow clone revisions >> >> * V2 >> Fixed typos in commit message > > Patch history should go after the --- below. > >> The "git clone --depth" only works for refs, doesn't support revisions, but >> "git fetch --depth" supports revisions, so use it to do the shallow clone, the >> idea is from "git clone --recurse-submodules --shallow-submodules". >> >> The workflow is (Only enabled when BB_GIT_SHALLOW = "1"): >> $ git init --bare <clonedir> >> $ git remote add origin <url> >> $ git fetch origin --depth <depth> revision >> $ git branch <branchname> FETCH_HEAD >> $ git tag v<branchname> FETCH_HEAD >> >> Here is the testing data based on poky, the testing server has a very good >> network bandwidth: >> >> Add 'BB_GIT_SHALLOW = "1"' conf/local.conf >> $ rm -fr tmp downloads # Fresh download for each build >> $ time bitbake world --runall=fetch >> $ du -sh downloads/git2/ >> >> Full Shallow Saved >> -------------------------------------- >> Time: 15m59s 2m31s 84% (13m28s) >> Size: 12G 1.2G 90% (10.8G) >> >> * The Size is for downloads/git2/, the tarballs are not counted. >> >> We can see that it saves a lot of download time and disk space, for >> example: >> >> linux-yocto: 2.8G -> 228M >> llvm: 2.5G -> 171M >> cryptography: 1.5G -> 35M >> >> And "$ bitbake world" works well. >> >> This a RFC patch, please feel free to give you comments. >> >> Signed-off-by: Robert Yang <liezhi.yang@windriver.com> >> --- >> bitbake/lib/bb/fetch2/git.py | 83 ++++++++++++++++++++++++++++-------- >> 1 file changed, 66 insertions(+), 17 deletions(-) >> >> diff --git a/bitbake/lib/bb/fetch2/git.py b/bitbake/lib/bb/fetch2/git.py >> index 4534bd75800..57bb61d5ee1 100644 >> --- a/bitbake/lib/bb/fetch2/git.py >> +++ b/bitbake/lib/bb/fetch2/git.py >> @@ -244,6 +244,7 @@ class Git(FetchMethod): >> ud.unresolvedrev[name] = 'HEAD' >> >> ud.basecmd = d.getVar("FETCHCMD_git") or "git -c core.fsyncobjectfiles=0 -c gc.autoDetach=false -c core.pager=cat" >> + ud.basecmd = "LANG=C %s" % ud.basecmd >> >> write_tarballs = d.getVar("BB_GENERATE_MIRROR_TARBALLS") or "0" >> ud.write_tarballs = write_tarballs != "0" or ud.rebaseable >> @@ -344,6 +345,49 @@ class Git(FetchMethod): >> return False >> return True >> >> + def shallow_clone_by_fetch(self, ud, repourl, d): >> + """ >> + Use "git fetch --depth <depth> revision" to implement shallow clone >> + since git can't clone a revision, a better solution should be: >> + "git fetch --depth <depth> revision:<branchname>" but it doesn't work >> + when revision is a tag, e.g.: >> + error: cannot update ref 'refs/heads/master': trying to write >> + non-commit object <revision> to branch 'refs/heads/master' >> + """ >> + >> + import datetime >> + >> + depth = ud.shallow_depths[ud.names[0]] >> + revision = ud.revisions[ud.names[0]] >> + branchname = ud.branches[ud.names[0]] >> + if not branchname: >> + branchname = "master" >> + >> + # Rename branchname if it exists which can: >> + # - Avoid conflicts during update >> + # - Keep the revision on a branch so that "git submodule update --recursive" >> + # can work since it requires the revision on a branch. >> + branch_path = os.path.join(ud.clonedir, 'refs/heads/%s' % branchname) >> + if os.path.exists(branch_path): >> + os.rename(branch_path, '%s.%s' % (branch_path, datetime.datetime.now().strftime("%Y%m%d%H%M%S"))) > > Any reason this is done using os.rename() rather than `git branch -m? It is because this is simpler and to keep align with branch_path, otherwise, we need: - git branch --list to get the branch list and split them by '\n', remove the star. - Check branch in the list - git branch -m to rename the branch > >> + >> + init_cmd = "%s init --bare -q" % ud.basecmd >> + add_remote_cmd = "%s remote add origin %s" % (ud.basecmd, shlex.quote(repourl)) >> + fetch_cmd = "%s fetch --progress origin --depth %s %s" % (ud.basecmd, depth, revision) >> + # Create both branch and tag for the revision >> + branch_cmd = "%s branch -f %s FETCH_HEAD" % (ud.basecmd, branchname) >> + tag_cmd = "%s tag -f v%s FETCH_HEAD" % (ud.basecmd, branchname) > > Why not define these as a list instead: > > cmds = [ > "%s init --bare -q" % ud.basecmd, > "%s remote add origin %s" % (ud.basecmd, shlex.quote(repourl)) > "%s fetch --progress origin --depth %s %s" % (ud.basecmd, depth, revision), > # Create both branch and tag for the revision > "%s branch -f %s FETCH_HEAD" % (ud.basecmd, branchname), > "%s tag -f v%s FETCH_HEAD" % (ud.basecmd, branchname), > ] Thanks, I will update it with others' comments in the following days (If there are any). // Robert > >> + >> + if ud.proto.lower() != 'file': >> + bb.fetch2.check_network_access(d, fetch_cmd, ud.url) >> + >> + if not os.path.exists(ud.clonedir): >> + bb.utils.mkdirhier(ud.clonedir) >> + >> + progresshandler = GitProgressHandler(d) >> + for cmd in (init_cmd, add_remote_cmd, fetch_cmd, branch_cmd, tag_cmd): >> + runfetchcmd(cmd, d, log=progresshandler, workdir=ud.clonedir) >> + >> def download(self, ud, d): >> """Fetch url""" >> >> @@ -360,7 +404,7 @@ class Git(FetchMethod): >> else: >> tmpdir = tempfile.mkdtemp(dir=d.getVar('DL_DIR')) >> runfetchcmd("tar -xzf %s" % ud.fullmirror, d, workdir=tmpdir) >> - fetch_cmd = "LANG=C %s fetch -f --progress %s " % (ud.basecmd, shlex.quote(tmpdir)) >> + fetch_cmd = "%s fetch -f --progress %s " % (ud.basecmd, shlex.quote(tmpdir)) >> runfetchcmd(fetch_cmd, d, workdir=ud.clonedir) >> repourl = self._get_repo_url(ud) >> >> @@ -369,27 +413,32 @@ class Git(FetchMethod): >> # We do this since git will use a "-l" option automatically for local urls where possible >> if repourl.startswith("file://"): >> repourl = repourl[7:] >> - clone_cmd = "LANG=C %s clone --bare --mirror %s %s --progress" % (ud.basecmd, shlex.quote(repourl), ud.clonedir) >> - if ud.proto.lower() != 'file': >> - bb.fetch2.check_network_access(d, clone_cmd, ud.url) >> - progresshandler = GitProgressHandler(d) >> - runfetchcmd(clone_cmd, d, log=progresshandler) >> + if ud.shallow: >> + self.shallow_clone_by_fetch(ud, repourl, d) >> + else: >> + clone_cmd = "%s clone --bare --mirror %s %s --progress" % (ud.basecmd, shlex.quote(repourl), ud.clonedir) >> + progresshandler = GitProgressHandler(d) >> + if ud.proto.lower() != 'file': >> + bb.fetch2.check_network_access(d, clone_cmd, ud.url) >> + runfetchcmd(clone_cmd, d, log=progresshandler) >> >> # Update the checkout if needed >> if self.clonedir_need_update(ud, d): >> output = runfetchcmd("%s remote" % ud.basecmd, d, quiet=True, workdir=ud.clonedir) >> if "origin" in output: >> - runfetchcmd("%s remote rm origin" % ud.basecmd, d, workdir=ud.clonedir) >> - >> - runfetchcmd("%s remote add --mirror=fetch origin %s" % (ud.basecmd, shlex.quote(repourl)), d, workdir=ud.clonedir) >> - fetch_cmd = "LANG=C %s fetch -f --progress %s refs/*:refs/*" % (ud.basecmd, shlex.quote(repourl)) >> - if ud.proto.lower() != 'file': >> - bb.fetch2.check_network_access(d, fetch_cmd, ud.url) >> - progresshandler = GitProgressHandler(d) >> - runfetchcmd(fetch_cmd, d, log=progresshandler, workdir=ud.clonedir) >> - runfetchcmd("%s prune-packed" % ud.basecmd, d, workdir=ud.clonedir) >> - runfetchcmd("%s pack-refs --all" % ud.basecmd, d, workdir=ud.clonedir) >> - runfetchcmd("%s pack-redundant --all | xargs -r rm" % ud.basecmd, d, workdir=ud.clonedir) >> + runfetchcmd("%s remote rm origin" % ud.basecmd, d, workdir=ud.clonedir) >> + if ud.shallow: >> + self.shallow_clone_by_fetch(ud, repourl, d) >> + else: >> + runfetchcmd("%s remote add --mirror=fetch origin %s" % (ud.basecmd, shlex.quote(repourl)), d, workdir=ud.clonedir) >> + fetch_cmd = "%s fetch -f --progress %s refs/*:refs/*" % (ud.basecmd, shlex.quote(repourl)) >> + if ud.proto.lower() != 'file': >> + bb.fetch2.check_network_access(d, fetch_cmd, ud.url) >> + progresshandler = GitProgressHandler(d) >> + runfetchcmd(fetch_cmd, d, log=progresshandler, workdir=ud.clonedir) >> + runfetchcmd("%s prune-packed" % ud.basecmd, d, workdir=ud.clonedir) >> + runfetchcmd("%s pack-refs --all" % ud.basecmd, d, workdir=ud.clonedir) >> + runfetchcmd("%s pack-redundant --all | xargs -r rm" % ud.basecmd, d, workdir=ud.clonedir) >> try: >> os.unlink(ud.fullmirror) >> except OSError as exc: >> -- >> 2.35.1 > > //Peter > ^ permalink raw reply [flat|nested] 5+ messages in thread
* RE: [bitbake-devel] [RFC][PATCH V2] bitbake: fetch2/git: Use git fetch to shallow clone revisions 2022-08-27 3:36 ` Robert Yang @ 2022-08-29 10:46 ` Peter Kjellerstedt 2022-08-31 3:10 ` Robert Yang 0 siblings, 1 reply; 5+ messages in thread From: Peter Kjellerstedt @ 2022-08-29 10:46 UTC (permalink / raw) To: Robert Yang, bitbake-devel@lists.openembedded.org > -----Original Message----- > From: Robert Yang <liezhi.yang@windriver.com> > Sent: den 27 augusti 2022 05:37 > To: Peter Kjellerstedt <peter.kjellerstedt@axis.com>; bitbake-devel@lists.openembedded.org > Subject: Re: [bitbake-devel] [RFC][PATCH V2] bitbake: fetch2/git: Use git fetch to shallow clone revisions > > Hi Peter, > > On 8/26/22 22:21, Peter Kjellerstedt wrote: > >> -----Original Message----- > >> From: bitbake-devel@lists.openembedded.org <bitbake-devel@lists.openembedded.org> On Behalf Of Robert Yang > >> Sent: den 26 augusti 2022 15:11 > >> To: bitbake-devel@lists.openembedded.org > >> Subject: [bitbake-devel] [RFC][PATCH V2] bitbake: fetch2/git: Use git fetch to shallow clone revisions > >> > >> * V2 > >> Fixed typos in commit message > > > > Patch history should go after the --- below. > > > >> The "git clone --depth" only works for refs, doesn't support revisions, but > >> "git fetch --depth" supports revisions, so use it to do the shallow clone, the > >> idea is from "git clone --recurse-submodules --shallow-submodules". > >> > >> The workflow is (Only enabled when BB_GIT_SHALLOW = "1"): > >> $ git init --bare <clonedir> > >> $ git remote add origin <url> > >> $ git fetch origin --depth <depth> revision > >> $ git branch <branchname> FETCH_HEAD > >> $ git tag v<branchname> FETCH_HEAD > >> > >> Here is the testing data based on poky, the testing server has a very good > >> network bandwidth: > >> > >> Add 'BB_GIT_SHALLOW = "1"' conf/local.conf > >> $ rm -fr tmp downloads # Fresh download for each build > >> $ time bitbake world --runall=fetch > >> $ du -sh downloads/git2/ > >> > >> Full Shallow Saved > >> -------------------------------------- > >> Time: 15m59s 2m31s 84% (13m28s) > >> Size: 12G 1.2G 90% (10.8G) > >> > >> * The Size is for downloads/git2/, the tarballs are not counted. > >> > >> We can see that it saves a lot of download time and disk space, for > >> example: > >> > >> linux-yocto: 2.8G -> 228M > >> llvm: 2.5G -> 171M > >> cryptography: 1.5G -> 35M > >> > >> And "$ bitbake world" works well. > >> > >> This a RFC patch, please feel free to give you comments. > >> > >> Signed-off-by: Robert Yang <liezhi.yang@windriver.com> > >> --- > >> bitbake/lib/bb/fetch2/git.py | 83 ++++++++++++++++++++++++++++-------- > >> 1 file changed, 66 insertions(+), 17 deletions(-) > >> > >> diff --git a/bitbake/lib/bb/fetch2/git.py b/bitbake/lib/bb/fetch2/git.py > >> index 4534bd75800..57bb61d5ee1 100644 > >> --- a/bitbake/lib/bb/fetch2/git.py > >> +++ b/bitbake/lib/bb/fetch2/git.py > >> @@ -244,6 +244,7 @@ class Git(FetchMethod): > >> ud.unresolvedrev[name] = 'HEAD' > >> > >> ud.basecmd = d.getVar("FETCHCMD_git") or "git -c core.fsyncobjectfiles=0 -c gc.autoDetach=false -c core.pager=cat" > >> + ud.basecmd = "LANG=C %s" % ud.basecmd > >> > >> write_tarballs = d.getVar("BB_GENERATE_MIRROR_TARBALLS") or "0" > >> ud.write_tarballs = write_tarballs != "0" or ud.rebaseable > >> @@ -344,6 +345,49 @@ class Git(FetchMethod): > >> return False > >> return True > >> > >> + def shallow_clone_by_fetch(self, ud, repourl, d): > >> + """ > >> + Use "git fetch --depth <depth> revision" to implement shallow clone > >> + since git can't clone a revision, a better solution should be: > >> + "git fetch --depth <depth> revision:<branchname>" but it doesn't work > >> + when revision is a tag, e.g.: > >> + error: cannot update ref 'refs/heads/master': trying to write > >> + non-commit object <revision> to branch 'refs/heads/master' > >> + """ > >> + > >> + import datetime > >> + > >> + depth = ud.shallow_depths[ud.names[0]] > >> + revision = ud.revisions[ud.names[0]] > >> + branchname = ud.branches[ud.names[0]] > >> + if not branchname: > >> + branchname = "master" > >> + > >> + # Rename branchname if it exists which can: > >> + # - Avoid conflicts during update > >> + # - Keep the revision on a branch so that "git submodule update --recursive" > >> + # can work since it requires the revision on a branch. > >> + branch_path = os.path.join(ud.clonedir, 'refs/heads/%s' % branchname) > >> + if os.path.exists(branch_path): > >> + os.rename(branch_path, '%s.%s' % (branch_path, datetime.datetime.now().strftime("%Y%m%d%H%M%S"))) > > > > Any reason this is done using os.rename() rather than `git branch -m? > > It is because this is simpler and to keep align with branch_path, otherwise, we > need: > - git branch --list to get the branch list and split them by '\n', remove the star. > - Check branch in the list > - git branch -m to rename the branch If you accept that the command can fail, then you do not need to list the branches. Just do the rename. If the branch exists, then the rename will succeed, otherwise it will fail, but that is expected and ignored. What I do not like about the use of os.rename() here is that it uses internal knowledge of how Git stores its data. //Peter ^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [bitbake-devel] [RFC][PATCH V2] bitbake: fetch2/git: Use git fetch to shallow clone revisions 2022-08-29 10:46 ` Peter Kjellerstedt @ 2022-08-31 3:10 ` Robert Yang 0 siblings, 0 replies; 5+ messages in thread From: Robert Yang @ 2022-08-31 3:10 UTC (permalink / raw) To: Peter Kjellerstedt, bitbake-devel@lists.openembedded.org On 8/29/22 6:46 PM, Peter Kjellerstedt wrote: >> -----Original Message----- >> From: Robert Yang <liezhi.yang@windriver.com> >> Sent: den 27 augusti 2022 05:37 >> To: Peter Kjellerstedt <peter.kjellerstedt@axis.com>; bitbake-devel@lists.openembedded.org >> Subject: Re: [bitbake-devel] [RFC][PATCH V2] bitbake: fetch2/git: Use git fetch to shallow clone revisions >> >> Hi Peter, >> >> On 8/26/22 22:21, Peter Kjellerstedt wrote: >>>> -----Original Message----- >>>> From: bitbake-devel@lists.openembedded.org <bitbake-devel@lists.openembedded.org> On Behalf Of Robert Yang >>>> Sent: den 26 augusti 2022 15:11 >>>> To: bitbake-devel@lists.openembedded.org >>>> Subject: [bitbake-devel] [RFC][PATCH V2] bitbake: fetch2/git: Use git fetch to shallow clone revisions >>>> >>>> * V2 >>>> Fixed typos in commit message >>> >>> Patch history should go after the --- below. >>> >>>> The "git clone --depth" only works for refs, doesn't support revisions, but >>>> "git fetch --depth" supports revisions, so use it to do the shallow clone, the >>>> idea is from "git clone --recurse-submodules --shallow-submodules". >>>> >>>> The workflow is (Only enabled when BB_GIT_SHALLOW = "1"): >>>> $ git init --bare <clonedir> >>>> $ git remote add origin <url> >>>> $ git fetch origin --depth <depth> revision >>>> $ git branch <branchname> FETCH_HEAD >>>> $ git tag v<branchname> FETCH_HEAD >>>> >>>> Here is the testing data based on poky, the testing server has a very good >>>> network bandwidth: >>>> >>>> Add 'BB_GIT_SHALLOW = "1"' conf/local.conf >>>> $ rm -fr tmp downloads # Fresh download for each build >>>> $ time bitbake world --runall=fetch >>>> $ du -sh downloads/git2/ >>>> >>>> Full Shallow Saved >>>> -------------------------------------- >>>> Time: 15m59s 2m31s 84% (13m28s) >>>> Size: 12G 1.2G 90% (10.8G) >>>> >>>> * The Size is for downloads/git2/, the tarballs are not counted. >>>> >>>> We can see that it saves a lot of download time and disk space, for >>>> example: >>>> >>>> linux-yocto: 2.8G -> 228M >>>> llvm: 2.5G -> 171M >>>> cryptography: 1.5G -> 35M >>>> >>>> And "$ bitbake world" works well. >>>> >>>> This a RFC patch, please feel free to give you comments. >>>> >>>> Signed-off-by: Robert Yang <liezhi.yang@windriver.com> >>>> --- >>>> bitbake/lib/bb/fetch2/git.py | 83 ++++++++++++++++++++++++++++-------- >>>> 1 file changed, 66 insertions(+), 17 deletions(-) >>>> >>>> diff --git a/bitbake/lib/bb/fetch2/git.py b/bitbake/lib/bb/fetch2/git.py >>>> index 4534bd75800..57bb61d5ee1 100644 >>>> --- a/bitbake/lib/bb/fetch2/git.py >>>> +++ b/bitbake/lib/bb/fetch2/git.py >>>> @@ -244,6 +244,7 @@ class Git(FetchMethod): >>>> ud.unresolvedrev[name] = 'HEAD' >>>> >>>> ud.basecmd = d.getVar("FETCHCMD_git") or "git -c core.fsyncobjectfiles=0 -c gc.autoDetach=false -c core.pager=cat" >>>> + ud.basecmd = "LANG=C %s" % ud.basecmd >>>> >>>> write_tarballs = d.getVar("BB_GENERATE_MIRROR_TARBALLS") or "0" >>>> ud.write_tarballs = write_tarballs != "0" or ud.rebaseable >>>> @@ -344,6 +345,49 @@ class Git(FetchMethod): >>>> return False >>>> return True >>>> >>>> + def shallow_clone_by_fetch(self, ud, repourl, d): >>>> + """ >>>> + Use "git fetch --depth <depth> revision" to implement shallow clone >>>> + since git can't clone a revision, a better solution should be: >>>> + "git fetch --depth <depth> revision:<branchname>" but it doesn't work >>>> + when revision is a tag, e.g.: >>>> + error: cannot update ref 'refs/heads/master': trying to write >>>> + non-commit object <revision> to branch 'refs/heads/master' >>>> + """ >>>> + >>>> + import datetime >>>> + >>>> + depth = ud.shallow_depths[ud.names[0]] >>>> + revision = ud.revisions[ud.names[0]] >>>> + branchname = ud.branches[ud.names[0]] >>>> + if not branchname: >>>> + branchname = "master" >>>> + >>>> + # Rename branchname if it exists which can: >>>> + # - Avoid conflicts during update >>>> + # - Keep the revision on a branch so that "git submodule update --recursive" >>>> + # can work since it requires the revision on a branch. >>>> + branch_path = os.path.join(ud.clonedir, 'refs/heads/%s' % branchname) >>>> + if os.path.exists(branch_path): >>>> + os.rename(branch_path, '%s.%s' % (branch_path, datetime.datetime.now().strftime("%Y%m%d%H%M%S"))) >>> >>> Any reason this is done using os.rename() rather than `git branch -m? >> >> It is because this is simpler and to keep align with branch_path, otherwise, we >> need: >> - git branch --list to get the branch list and split them by '\n', remove the star. >> - Check branch in the list >> - git branch -m to rename the branch > > If you accept that the command can fail, then you do not need to list > the branches. Just do the rename. If the branch exists, then the > rename will succeed, otherwise it will fail, but that is expected and > ignored. > > What I do not like about the use of os.rename() here is that it uses > internal knowledge of how Git stores its data. Thanks, I will update it. // Robert > > //Peter > ^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2022-08-31 3:10 UTC | newest] Thread overview: 5+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2022-08-26 13:10 [RFC][PATCH V2] bitbake: fetch2/git: Use git fetch to shallow clone revisions Robert Yang 2022-08-26 14:21 ` [bitbake-devel] " Peter Kjellerstedt 2022-08-27 3:36 ` Robert Yang 2022-08-29 10:46 ` Peter Kjellerstedt 2022-08-31 3:10 ` Robert Yang
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.