* [PATCH 1/8] fetch: support multiple mirror tarball filenames
2017-05-12 21:46 [PATCH 0/8] Add support for shallow mirror tarballs Christopher Larson
@ 2017-05-12 21:46 ` Christopher Larson
2017-05-12 21:46 ` [PATCH 2/8] git-make-shallow: add script to make a git repo shallow Christopher Larson
` (7 subsequent siblings)
8 siblings, 0 replies; 14+ messages in thread
From: Christopher Larson @ 2017-05-12 21:46 UTC (permalink / raw)
To: bitbake-devel; +Cc: Christopher Larson
Remove ud.mirrortarball in favor of ud.mirrortarballs. Each tarball will be
attempted, in order, and the first available will be used. This is needed for
git shallow mirror tarball support, as we want to be able to use either
a shallow or full mirror tarball.
Signed-off-by: Christopher Larson <chris_larson@mentor.com>
---
lib/bb/fetch2/__init__.py | 72 ++++++++++++++++++++++++-----------------------
lib/bb/fetch2/git.py | 5 ++--
lib/bb/fetch2/hg.py | 5 ++--
lib/bb/fetch2/npm.py | 7 +++--
4 files changed, 47 insertions(+), 42 deletions(-)
diff --git a/lib/bb/fetch2/__init__.py b/lib/bb/fetch2/__init__.py
index 136fc29c..35902988 100644
--- a/lib/bb/fetch2/__init__.py
+++ b/lib/bb/fetch2/__init__.py
@@ -425,7 +425,7 @@ def encodeurl(decoded):
return url
-def uri_replace(ud, uri_find, uri_replace, replacements, d):
+def uri_replace(ud, uri_find, uri_replace, replacements, d, mirrortarball=None):
if not ud.url or not uri_find or not uri_replace:
logger.error("uri_replace: passed an undefined value, not replacing")
return None
@@ -464,9 +464,9 @@ def uri_replace(ud, uri_find, uri_replace, replacements, d):
if loc == 2:
# Handle path manipulations
basename = None
- if uri_decoded[0] != uri_replace_decoded[0] and ud.mirrortarball:
+ if uri_decoded[0] != uri_replace_decoded[0] and mirrortarball:
# If the source and destination url types differ, must be a mirrortarball mapping
- basename = os.path.basename(ud.mirrortarball)
+ basename = os.path.basename(mirrortarball)
# Kill parameters, they make no sense for mirror tarballs
uri_decoded[5] = {}
elif ud.localpath and ud.method.supports_checksum(ud):
@@ -892,45 +892,47 @@ def build_mirroruris(origud, mirrors, ld):
replacements["BASENAME"] = origud.path.split("/")[-1]
replacements["MIRRORNAME"] = origud.host.replace(':','.') + origud.path.replace('/', '.').replace('*', '.')
- def adduri(ud, uris, uds, mirrors):
+ def adduri(ud, uris, uds, mirrors, tarballs):
for line in mirrors:
try:
(find, replace) = line
except ValueError:
continue
- newuri = uri_replace(ud, find, replace, replacements, ld)
- if not newuri or newuri in uris or newuri == origud.url:
- continue
- if not trusted_network(ld, newuri):
- logger.debug(1, "Mirror %s not in the list of trusted networks, skipping" % (newuri))
- continue
+ for tarball in tarballs:
+ newuri = uri_replace(ud, find, replace, replacements, ld, tarball)
+ if not newuri or newuri in uris or newuri == origud.url:
+ continue
- # Create a local copy of the mirrors minus the current line
- # this will prevent us from recursively processing the same line
- # as well as indirect recursion A -> B -> C -> A
- localmirrors = list(mirrors)
- localmirrors.remove(line)
+ if not trusted_network(ld, newuri):
+ logger.debug(1, "Mirror %s not in the list of trusted networks, skipping" % (newuri))
+ continue
+
+ # Create a local copy of the mirrors minus the current line
+ # this will prevent us from recursively processing the same line
+ # as well as indirect recursion A -> B -> C -> A
+ localmirrors = list(mirrors)
+ localmirrors.remove(line)
- try:
- newud = FetchData(newuri, ld)
- newud.setup_localpath(ld)
- except bb.fetch2.BBFetchException as e:
- logger.debug(1, "Mirror fetch failure for url %s (original url: %s)" % (newuri, origud.url))
- logger.debug(1, str(e))
try:
- # setup_localpath of file:// urls may fail, we should still see
- # if mirrors of the url exist
- adduri(newud, uris, uds, localmirrors)
- except UnboundLocalError:
- pass
- continue
- uris.append(newuri)
- uds.append(newud)
+ newud = FetchData(newuri, ld)
+ newud.setup_localpath(ld)
+ except bb.fetch2.BBFetchException as e:
+ logger.debug(1, "Mirror fetch failure for url %s (original url: %s)" % (newuri, origud.url))
+ logger.debug(1, str(e))
+ try:
+ # setup_localpath of file:// urls may fail, we should still see
+ # if mirrors of the url exist
+ adduri(newud, uris, uds, localmirrors, tarballs)
+ except UnboundLocalError:
+ pass
+ continue
+ uris.append(newuri)
+ uds.append(newud)
- adduri(newud, uris, uds, localmirrors)
+ adduri(newud, uris, uds, localmirrors, tarballs)
- adduri(origud, uris, uds, mirrors)
+ adduri(origud, uris, uds, mirrors, origud.mirrortarballs or [None])
return uris, uds
@@ -975,8 +977,8 @@ def try_mirror_url(fetch, origud, ud, ld, check = False):
# We may be obtaining a mirror tarball which needs further processing by the real fetcher
# If that tarball is a local file:// we need to provide a symlink to it
dldir = ld.getVar("DL_DIR")
- if origud.mirrortarball and os.path.basename(ud.localpath) == os.path.basename(origud.mirrortarball) \
- and os.path.basename(ud.localpath) != os.path.basename(origud.localpath):
+
+ if origud.mirrortarballs and os.path.basename(ud.localpath) in origud.mirrortarballs and os.path.basename(ud.localpath) != os.path.basename(origud.localpath):
# Create donestamp in old format to avoid triggering a re-download
if ud.donestamp:
bb.utils.mkdirhier(os.path.dirname(ud.donestamp))
@@ -993,7 +995,7 @@ def try_mirror_url(fetch, origud, ud, ld, check = False):
pass
if not verify_donestamp(origud, ld) or origud.method.need_update(origud, ld):
origud.method.download(origud, ld)
- if hasattr(origud.method,"build_mirror_data"):
+ if hasattr(origud.method, "build_mirror_data"):
origud.method.build_mirror_data(origud, ld)
return origud.localpath
# Otherwise the result is a local file:// and we symlink to it
@@ -1190,7 +1192,7 @@ class FetchData(object):
self.localfile = ""
self.localpath = None
self.lockfile = None
- self.mirrortarball = None
+ self.mirrortarballs = []
self.basename = None
self.basepath = None
(self.type, self.host, self.path, self.user, self.pswd, self.parm) = decodeurl(d.expand(url))
diff --git a/lib/bb/fetch2/git.py b/lib/bb/fetch2/git.py
index 2550bde8..01d4bbdc 100644
--- a/lib/bb/fetch2/git.py
+++ b/lib/bb/fetch2/git.py
@@ -205,8 +205,9 @@ class Git(FetchMethod):
if ud.rebaseable:
for name in ud.names:
gitsrcname = gitsrcname + '_' + ud.revisions[name]
- ud.mirrortarball = 'git2_%s.tar.gz' % gitsrcname
- ud.fullmirror = os.path.join(d.getVar("DL_DIR"), ud.mirrortarball)
+ mirrortarball = 'git2_%s.tar.gz' % gitsrcname
+ ud.fullmirror = os.path.join(d.getVar("DL_DIR"), mirrortarball)
+ ud.mirrortarballs = [mirrortarball]
gitdir = d.getVar("GITDIR") or (d.getVar("DL_DIR") + "/git2/")
ud.clonedir = os.path.join(gitdir, gitsrcname)
diff --git a/lib/bb/fetch2/hg.py b/lib/bb/fetch2/hg.py
index b5f26860..d0857e63 100644
--- a/lib/bb/fetch2/hg.py
+++ b/lib/bb/fetch2/hg.py
@@ -76,8 +76,9 @@ class Hg(FetchMethod):
# Create paths to mercurial checkouts
hgsrcname = '%s_%s_%s' % (ud.module.replace('/', '.'), \
ud.host, ud.path.replace('/', '.'))
- ud.mirrortarball = 'hg_%s.tar.gz' % hgsrcname
- ud.fullmirror = os.path.join(d.getVar("DL_DIR"), ud.mirrortarball)
+ mirrortarball = 'hg_%s.tar.gz' % hgsrcname
+ ud.fullmirror = os.path.join(d.getVar("DL_DIR"), mirrortarball)
+ ud.mirrortarballs = [mirrortarball]
hgdir = d.getVar("HGDIR") or (d.getVar("DL_DIR") + "/hg/")
ud.pkgdir = os.path.join(hgdir, hgsrcname)
diff --git a/lib/bb/fetch2/npm.py b/lib/bb/fetch2/npm.py
index 73a75fe9..5f799724 100644
--- a/lib/bb/fetch2/npm.py
+++ b/lib/bb/fetch2/npm.py
@@ -91,9 +91,10 @@ class Npm(FetchMethod):
ud.prefixdir = prefixdir
ud.write_tarballs = ((d.getVar("BB_GENERATE_MIRROR_TARBALLS") or "0") != "0")
- ud.mirrortarball = 'npm_%s-%s.tar.xz' % (ud.pkgname, ud.version)
- ud.mirrortarball = ud.mirrortarball.replace('/', '-')
- ud.fullmirror = os.path.join(d.getVar("DL_DIR"), ud.mirrortarball)
+ mirrortarball = 'npm_%s-%s.tar.xz' % (ud.pkgname, ud.version)
+ mirrortarball = ud.mirrortarball.replace('/', '-')
+ ud.fullmirror = os.path.join(d.getVar("DL_DIR"), mirrortarball)
+ ud.mirrortarballs = [mirrortarball]
def need_update(self, ud, d):
if os.path.exists(ud.localpath):
--
2.11.1
^ permalink raw reply related [flat|nested] 14+ messages in thread* [PATCH 2/8] git-make-shallow: add script to make a git repo shallow
2017-05-12 21:46 [PATCH 0/8] Add support for shallow mirror tarballs Christopher Larson
2017-05-12 21:46 ` [PATCH 1/8] fetch: support multiple mirror tarball filenames Christopher Larson
@ 2017-05-12 21:46 ` Christopher Larson
2017-06-02 10:07 ` Richard Purdie
2017-05-12 21:46 ` [PATCH 3/8] fetch/git: add support for shallow mirror tarballs Christopher Larson
` (6 subsequent siblings)
8 siblings, 1 reply; 14+ messages in thread
From: Christopher Larson @ 2017-05-12 21:46 UTC (permalink / raw)
To: bitbake-devel; +Cc: Christopher Larson
This script will be used by the git fetcher to create shallow mirror tarballs.
usage: git-make-shallow [-h] [--ref REF] [--shrink] REVISION [REVISION ...]
Remove the history of the specified revisions, then optionally filter the
available refs to those specified.
positional arguments:
REVISION a git revision/commit
optional arguments:
-h, --help show this help message and exit
--ref REF, -r REF remove all but the specified refs (cumulative)
--shrink, -s shrink the git repository by repacking and pruning
While git does provide the ability to clone at a specific depth, and fetch all
remote refs at a particular depth, the depth is across all branches/tags, and
doesn't provide the flexibility we need, hence this script.
Refs (branches+tags) can be filtered, as the process of history removal scales
up rapidly with the number of refs. Even the existing `git fetch --depth=` is
extremely slow on an upstream kernel repository with all the branches and tags
kept.
This uses the same underlying mechanism to implement the history removal which
git itself uses (.git/shallow), and the results, when configured similarly, are
in line with the results git itself produces with `fetch --depth`.
Signed-off-by: Christopher Larson <chris_larson@mentor.com>
---
bin/git-make-shallow | 165 ++++++++++++++++++++++++++++++++++++++++++++++++++
lib/bb/tests/fetch.py | 127 ++++++++++++++++++++++++++++++++++++++
2 files changed, 292 insertions(+)
create mode 100755 bin/git-make-shallow
diff --git a/bin/git-make-shallow b/bin/git-make-shallow
new file mode 100755
index 00000000..296d3a3d
--- /dev/null
+++ b/bin/git-make-shallow
@@ -0,0 +1,165 @@
+#!/usr/bin/env python3
+"""git-make-shallow: make the current git repository shallow
+
+Remove the history of the specified revisions, then optionally filter the
+available refs to those specified.
+"""
+
+import argparse
+import collections
+import errno
+import itertools
+import os
+import subprocess
+import sys
+
+version = 1.0
+
+
+def main():
+ if sys.version_info < (3, 4, 0):
+ sys.exit('Python 3.4 or greater is required')
+
+ git_dir = check_output(['git', 'rev-parse', '--git-dir']).rstrip()
+ shallow_file = os.path.join(git_dir, 'shallow')
+ if os.path.exists(shallow_file):
+ try:
+ check_output(['git', 'fetch', '--unshallow'])
+ except subprocess.CalledProcessError:
+ try:
+ os.unlink(shallow_file)
+ except OSError as exc:
+ if exc.errno != errno.ENOENT:
+ raise
+
+ args = process_args()
+ revs = check_output(['git', 'rev-list'] + args.revisions).splitlines()
+
+ make_shallow(shallow_file, args.revisions, args.refs)
+
+ ref_revs = check_output(['git', 'rev-list'] + args.refs).splitlines()
+ remaining_history = set(revs) & set(ref_revs)
+ for rev in remaining_history:
+ if check_output(['git', 'rev-parse', '{}^@'.format(rev)]):
+ sys.exit('Error: %s was not made shallow' % rev)
+
+ filter_refs(args.refs)
+
+ if args.shrink:
+ shrink_repo(git_dir)
+ subprocess.check_call(['git', 'fsck', '--unreachable'])
+
+
+def process_args():
+ # TODO: add argument to automatically keep local-only refs, since they
+ # can't be easily restored with a git fetch.
+ parser = argparse.ArgumentParser(description='Remove the history of the specified revisions, then optionally filter the available refs to those specified.')
+ parser.add_argument('--ref', '-r', metavar='REF', action='append', dest='refs', help='remove all but the specified refs (cumulative)')
+ parser.add_argument('--shrink', '-s', action='store_true', help='shrink the git repository by repacking and pruning')
+ parser.add_argument('revisions', metavar='REVISION', nargs='+', help='a git revision/commit')
+ if len(sys.argv) < 2:
+ parser.print_help()
+ sys.exit(2)
+
+ args = parser.parse_args()
+
+ if args.refs:
+ args.refs = check_output(['git', 'rev-parse', '--symbolic-full-name'] + args.refs).splitlines()
+ else:
+ args.refs = get_all_refs(lambda r, t, tt: t == 'commit' or tt == 'commit')
+
+ args.refs = list(filter(lambda r: not r.endswith('/HEAD'), args.refs))
+ args.revisions = check_output(['git', 'rev-parse'] + ['%s^{}' % i for i in args.revisions]).splitlines()
+ return args
+
+
+def check_output(cmd, input=None):
+ return subprocess.check_output(cmd, universal_newlines=True, input=input)
+
+
+def make_shallow(shallow_file, revisions, refs):
+ """Remove the history of the specified revisions."""
+ for rev in follow_history_intersections(revisions, refs):
+ print("Processing %s" % rev)
+ with open(shallow_file, 'a') as f:
+ f.write(rev + '\n')
+
+
+def get_all_refs(ref_filter=None):
+ """Return all the existing refs in this repository, optionally filtering the refs."""
+ ref_output = check_output(['git', 'for-each-ref', '--format=%(refname)\t%(objecttype)\t%(*objecttype)'])
+ ref_split = [tuple(iter_extend(l.rsplit('\t'), 3)) for l in ref_output.splitlines()]
+ if ref_filter:
+ ref_split = (e for e in ref_split if ref_filter(*e))
+ refs = [r[0] for r in ref_split]
+ return refs
+
+
+def iter_extend(iterable, length, obj=None):
+ """Ensure that iterable is the specified length by extending with obj."""
+ return itertools.islice(itertools.chain(iterable, itertools.repeat(obj)), length)
+
+
+def filter_refs(refs):
+ """Remove all but the specified refs from the git repository."""
+ all_refs = get_all_refs()
+ to_remove = set(all_refs) - set(refs)
+ if to_remove:
+ check_output(['xargs', '-0', '-n', '1', 'git', 'update-ref', '-d', '--no-deref'],
+ input=''.join(l + '\0' for l in to_remove))
+
+
+def follow_history_intersections(revisions, refs):
+ """Determine all the points where the history of the specified revisions intersects the specified refs."""
+ queue = collections.deque(revisions)
+ seen = set()
+
+ for rev in iter_except(queue.popleft, IndexError):
+ if rev in seen:
+ continue
+
+ parents = check_output(['git', 'rev-parse', '%s^@' % rev]).splitlines()
+
+ yield rev
+ seen.add(rev)
+
+ if not parents:
+ continue
+
+ check_refs = check_output(['git', 'merge-base', '--independent'] + sorted(refs)).splitlines()
+ for parent in parents:
+ for ref in check_refs:
+ print("Checking %s vs %s" % (parent, ref))
+ try:
+ merge_base = check_output(['git', 'merge-base', parent, ref]).rstrip()
+ except subprocess.CalledProcessError:
+ continue
+ else:
+ queue.append(merge_base)
+
+
+def iter_except(func, exception, start=None):
+ """Yield a function repeatedly until it raises an exception."""
+ try:
+ if start is not None:
+ yield start()
+ while True:
+ yield func()
+ except exception:
+ pass
+
+
+def shrink_repo(git_dir):
+ """Shrink the newly shallow repository, removing the unreachable objects."""
+ subprocess.check_call(['git', 'reflog', 'expire', '--expire-unreachable=now', '--all'])
+ subprocess.check_call(['git', 'repack', '-ad'])
+ try:
+ os.unlink(os.path.join(git_dir, 'objects', 'info', 'alternates'))
+ except OSError as exc:
+ if exc.errno != errno.ENOENT:
+ raise
+ subprocess.check_call(['git', 'prune', '--expire', 'now'])
+
+
+if __name__ == '__main__':
+ main()
diff --git a/lib/bb/tests/fetch.py b/lib/bb/tests/fetch.py
index 0fd2c021..510071d2 100644
--- a/lib/bb/tests/fetch.py
+++ b/lib/bb/tests/fetch.py
@@ -852,3 +852,130 @@ class FetchCheckStatusTest(FetcherTest):
self.assertTrue(ret, msg="URI %s, can't check status" % (u))
connection_cache.close_connections()
+
+
+class GitMakeShallowTest(FetcherTest):
+ bitbake_dir = os.path.join(os.path.dirname(os.path.join(__file__)), '..', '..', '..')
+ make_shallow_path = os.path.join(bitbake_dir, 'bin', 'git-make-shallow')
+
+ def setUp(self):
+ FetcherTest.setUp(self)
+ self.gitdir = os.path.join(self.tempdir, 'gitshallow')
+ bb.utils.mkdirhier(self.gitdir)
+ bb.process.run('git init', cwd=self.gitdir)
+
+ def assertRefs(self, expected_refs):
+ actual_refs = self.git(['for-each-ref', '--format=%(refname)']).splitlines()
+ full_expected = self.git(['rev-parse', '--symbolic-full-name'] + expected_refs).splitlines()
+ self.assertEqual(sorted(full_expected), sorted(actual_refs))
+
+ def assertRevCount(self, expected_count, args=None):
+ if args is None:
+ args = ['HEAD']
+ revs = self.git(['rev-list'] + args)
+ actual_count = len(revs.splitlines())
+ self.assertEqual(expected_count, actual_count, msg='Object count `%d` is not the expected `%d`' % (actual_count, expected_count))
+
+ def git(self, cmd):
+ if isinstance(cmd, str):
+ cmd = 'git ' + cmd
+ else:
+ cmd = ['git'] + cmd
+ return bb.process.run(cmd, cwd=self.gitdir)[0]
+
+ def make_shallow(self, args=None):
+ if args is None:
+ args = ['HEAD']
+ return bb.process.run([self.make_shallow_path] + args, cwd=self.gitdir)
+
+ def add_empty_file(self, path, msg=None):
+ if msg is None:
+ msg = path
+ open(os.path.join(self.gitdir, path), 'w').close()
+ self.git(['add', path])
+ self.git(['commit', '-m', msg, path])
+
+ def test_make_shallow_single_branch_no_merge(self):
+ self.add_empty_file('a')
+ self.add_empty_file('b')
+ self.assertRevCount(2)
+ self.make_shallow()
+ self.assertRevCount(1)
+
+ def test_make_shallow_single_branch_one_merge(self):
+ self.add_empty_file('a')
+ self.add_empty_file('b')
+ self.git('checkout -b a_branch')
+ self.add_empty_file('c')
+ self.git('checkout master')
+ self.add_empty_file('d')
+ self.git('merge --no-ff --no-edit a_branch')
+ self.git('branch -d a_branch')
+ self.add_empty_file('e')
+ self.assertRevCount(6)
+ self.make_shallow(['HEAD~2'])
+ self.assertRevCount(5)
+
+ def test_make_shallow_at_merge(self):
+ self.add_empty_file('a')
+ self.git('checkout -b a_branch')
+ self.add_empty_file('b')
+ self.git('checkout master')
+ self.git('merge --no-ff --no-edit a_branch')
+ self.git('branch -d a_branch')
+ self.assertRevCount(3)
+ self.make_shallow()
+ self.assertRevCount(1)
+
+ def test_make_shallow_annotated_tag(self):
+ self.add_empty_file('a')
+ self.add_empty_file('b')
+ self.git('tag -a -m a_tag a_tag')
+ self.assertRevCount(2)
+ self.make_shallow(['a_tag'])
+ self.assertRevCount(1)
+
+ def test_make_shallow_multi_ref(self):
+ self.add_empty_file('a')
+ self.add_empty_file('b')
+ self.git('checkout -b a_branch')
+ self.add_empty_file('c')
+ self.git('checkout master')
+ self.add_empty_file('d')
+ self.git('checkout -b a_branch_2')
+ self.add_empty_file('a_tag')
+ self.git('tag a_tag')
+ self.git('checkout master')
+ self.git('branch -D a_branch_2')
+ self.add_empty_file('e')
+ self.assertRevCount(6, ['--all'])
+ self.make_shallow()
+ self.assertRevCount(5, ['--all'])
+
+ def test_make_shallow_multi_ref_trim(self):
+ self.add_empty_file('a')
+ self.git('checkout -b a_branch')
+ self.add_empty_file('c')
+ self.git('checkout master')
+ self.assertRevCount(1)
+ self.assertRevCount(2, ['--all'])
+ self.assertRefs(['master', 'a_branch'])
+ self.make_shallow(['-r', 'master', 'HEAD'])
+ self.assertRevCount(1, ['--all'])
+ self.assertRefs(['master'])
+
+ def test_make_shallow_noop(self):
+ self.add_empty_file('a')
+ self.assertRevCount(1)
+ self.make_shallow()
+ self.assertRevCount(1)
+
+ if os.environ.get("BB_SKIP_NETTESTS") == "yes":
+ print("Unset BB_SKIP_NETTESTS to run network tests")
+ else:
+ def test_make_shallow_bitbake(self):
+ self.git('remote add origin https://github.com/openembedded/bitbake')
+ self.git('fetch --tags origin')
+ orig_revs = len(self.git('rev-list --all').splitlines())
+ self.make_shallow(['refs/tags/1.10.0'])
+ self.assertRevCount(orig_revs - 1746, ['--all'])
--
2.11.1
^ permalink raw reply related [flat|nested] 14+ messages in thread* Re: [PATCH 2/8] git-make-shallow: add script to make a git repo shallow
2017-05-12 21:46 ` [PATCH 2/8] git-make-shallow: add script to make a git repo shallow Christopher Larson
@ 2017-06-02 10:07 ` Richard Purdie
2017-06-02 13:59 ` Christopher Larson
0 siblings, 1 reply; 14+ messages in thread
From: Richard Purdie @ 2017-06-02 10:07 UTC (permalink / raw)
To: Christopher Larson, bitbake-devel; +Cc: Scott Rifenbark, Christopher Larson
On Sat, 2017-05-13 at 02:46 +0500, Christopher Larson wrote:
> This script will be used by the git fetcher to create shallow mirror
> tarballs.
>
> usage: git-make-shallow [-h] [--ref REF] [--shrink] REVISION
> [REVISION ...]
>
> Remove the history of the specified revisions, then optionally
> filter the
> available refs to those specified.
>
> positional arguments:
> REVISION a git revision/commit
>
> optional arguments:
> -h, --help show this help message and exit
> --ref REF, -r REF remove all but the specified refs
> (cumulative)
> --shrink, -s shrink the git repository by repacking and
> pruning
>
> While git does provide the ability to clone at a specific depth, and
> fetch all
> remote refs at a particular depth, the depth is across all
> branches/tags, and
> doesn't provide the flexibility we need, hence this script.
>
> Refs (branches+tags) can be filtered, as the process of history
> removal scales
> up rapidly with the number of refs. Even the existing `git fetch --
> depth=` is
> extremely slow on an upstream kernel repository with all the branches
> and tags
> kept.
>
> This uses the same underlying mechanism to implement the history
> removal which
> git itself uses (.git/shallow), and the results, when configured
> similarly, are
> in line with the results git itself produces with `fetch --depth`.
Sorry about the delay in getting to this. In general the series looks
great. One minor thing that bothers me a bit is the name of this
script. If git itself ever creates something similar it may cause us
issues. I wondered if we wanted to namespace this more closely to
bitbake?
I'm leaning towards merging this and then perhaps tweaking the naming.
The other thing I'd really like to see is the documentation collected
out the commit messages and sent to Scott Rifenbark (cc'd) as a summary
of the changes so he can update the manual, particularly the section on
the git fetcher. If you could review it once Scott has done that, that
would be awesome :).
I do have my worries about how much complexity this adds to the git
fetcher but I guess its inevitable and at least we have the tests
(thanks!).
In the back of my mind there is another way I've been wondering about
assisting fetches, namely a "common reference repo" which urls may name
(e.g. git://xxx;refrepo=linux-kernel). If this was specified, any clone
would happen into the reference name as a subset namespace before a
clone by reference into the main repo name. With kernels, this would
mean one core repo and then all the subset kernels would clone by
reference from it. That should mean all kernels would share a common
repo so once you have one things should all be fast. Obviously shallow
clones solve a different problem in some ways but there is overlap.
Cheers,
Richard
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH 2/8] git-make-shallow: add script to make a git repo shallow
2017-06-02 10:07 ` Richard Purdie
@ 2017-06-02 13:59 ` Christopher Larson
0 siblings, 0 replies; 14+ messages in thread
From: Christopher Larson @ 2017-06-02 13:59 UTC (permalink / raw)
To: Richard Purdie; +Cc: Scott Rifenbark, bitbake-devel@lists.openembedded.org
[-- Attachment #1: Type: text/plain, Size: 3112 bytes --]
On Fri, Jun 2, 2017 at 3:07 AM, Richard Purdie <
richard.purdie@linuxfoundation.org> wrote:
> Sorry about the delay in getting to this. In general the series looks
> great.
>
Thanks for taking the time to review this, I appreciate it. I know you have
a lot on your plate, and anything invasive in the fetcher is risky and not
particularly fun.
> One minor thing that bothers me a bit is the name of this
> script. If git itself ever creates something similar it may cause us
> issues. I wondered if we wanted to namespace this more closely to
> bitbake?
>
> I'm leaning towards merging this and then perhaps tweaking the naming.
>
That would be fine with me, I’m not particularly attached to the name. I
only named it this way because it’s of use outside of bitbake as well, but
I can see your namespace concern.
The other thing I'd really like to see is the documentation collected
> out the commit messages and sent to Scott Rifenbark (cc'd) as a summary
> of the changes so he can update the manual, particularly the section on
> the git fetcher. If you could review it once Scott has done that, that
> would be awesome :).
>
I can do that. I might still have a more monolithic description in one of
the previous branches, I only broke it up this way to ease review and
reduce confusion.
I do have my worries about how much complexity this adds to the git
> fetcher but I guess its inevitable and at least we have the tests
> (thanks!).
>
Believe me, I have the same concerns. The fetcher is quite complex as it
is, but I don’t see any other way to solve this particular issue,
unfortunately. I split out git-make-shallow for this reason, to try to pare
down the changes to the fetcher itself, though of course that just
relocates the complexity rather than removing it :) I’d love to hear any
ideas on how to simplify things, or solve this in a different way, as
unfortunately I’m completely out of ideas. I do think this solves a problem
that we should solve, though.
In the back of my mind there is another way I've been wondering about
> assisting fetches, namely a "common reference repo" which urls may name
> (e.g. git://xxx;refrepo=linux-kernel). If this was specified, any clone
> would happen into the reference name as a subset namespace before a
> clone by reference into the main repo name. With kernels, this would
> mean one core repo and then all the subset kernels would clone by
> reference from it. That should mean all kernels would share a common
> repo so once you have one things should all be fast. Obviously shallow
> clones solve a different problem in some ways but there is overlap.
>
I like that idea as an additional improvement. As you say, similar but
slightly different, as shallow helps reduce what sources need to be shipped
/ distributed on and downloaded from a mirror, whereas refrepo is more
about reducing duplication amongst multiple recipes.
--
Christopher Larson
kergoth at gmail dot com
Founder - BitBake, OpenEmbedded, OpenZaurus
Senior Software Engineer, Mentor Graphics
[-- Attachment #2: Type: text/html, Size: 4470 bytes --]
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH 3/8] fetch/git: add support for shallow mirror tarballs
2017-05-12 21:46 [PATCH 0/8] Add support for shallow mirror tarballs Christopher Larson
2017-05-12 21:46 ` [PATCH 1/8] fetch: support multiple mirror tarball filenames Christopher Larson
2017-05-12 21:46 ` [PATCH 2/8] git-make-shallow: add script to make a git repo shallow Christopher Larson
@ 2017-05-12 21:46 ` Christopher Larson
2017-05-12 21:46 ` [PATCH 4/8] fetch/git: support per-branch/per-url depths for shallow Christopher Larson
` (5 subsequent siblings)
8 siblings, 0 replies; 14+ messages in thread
From: Christopher Larson @ 2017-05-12 21:46 UTC (permalink / raw)
To: bitbake-devel; +Cc: Christopher Larson
This adds support to the git fetcher for fetching, using, and generating
mirror tarballs of shallow git repositories. The external git-make-shallow
script is used for shallow mirror tarball creation.
This implements support for shallow mirror tarballs, not shallow clones.
Supporting shallow clones directly is not really doable for us, as we'd need
to hardcode the depth between branch HEAD and the SRCREV, and that depth would
change as the branch is updated.
When BB_GIT_SHALLOW is enabled, we will always attempt to fetch a shallow
mirror tarball. If the shallow mirror tarball cannot be fetched, it will try
to fetch the full mirror tarball and use that. If a shallow tarball is to be
used, it will be unpacked directly at `do_unpack` time, rather than extracting
it to DL_DIR at `do_fetch` time and cloning from there, to keep things simple.
There's no value in keeping a shallow repository in DL_DIR, and dealing with
the state for when to convert the clonedir to/from shallow is not worthwhile.
To clarify when shallow is used vs a real repository, a current clone is
preferred to either tarball, a shallow tarball is preferred to an out of date
clone, and a missing clone will use either tarball (attempting the shallow one
first).
All referenced branches are truncated to SRCREV (that is, commits *after*
SRCREV but before HEAD are removed) to further shrink the repository. By
default, the shallow construction process removes all unused refs
(branches/tags) from the repository, other than those referenced by the URL.
Example usage:
BB_GIT_SHALLOW ?= "1"
# Keep only the top commit
BB_GIT_SHALLOW_DEPTH ?= "1"
# This defaults to enabled if both BB_GIT_SHALLOW and
# BB_GENERATE_MIRROR_TARBALLS are enabled
BB_GENERATE_SHALLOW_TARBALLS ?= "1"
Signed-off-by: Christopher Larson <chris_larson@mentor.com>
---
lib/bb/fetch2/git.py | 133 +++++++++++++++++++---
lib/bb/tests/fetch.py | 299 ++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 417 insertions(+), 15 deletions(-)
diff --git a/lib/bb/fetch2/git.py b/lib/bb/fetch2/git.py
index 01d4bbdc..0412f9ff 100644
--- a/lib/bb/fetch2/git.py
+++ b/lib/bb/fetch2/git.py
@@ -73,8 +73,9 @@ Supported SRC_URI options are:
import errno
import os
import re
+import subprocess
+import tempfile
import bb
-import errno
import bb.progress
from bb.fetch2 import FetchMethod
from bb.fetch2 import runfetchcmd
@@ -172,6 +173,11 @@ class Git(FetchMethod):
branches = ud.parm.get("branch", "master").split(',')
if len(branches) != len(ud.names):
raise bb.fetch2.ParameterError("The number of name and branch parameters is not balanced", ud.url)
+
+ ud.cloneflags = "-s -n"
+ if ud.bareclone:
+ ud.cloneflags += " --mirror"
+
ud.branches = {}
for pos, name in enumerate(ud.names):
branch = branches[pos]
@@ -183,7 +189,9 @@ class Git(FetchMethod):
ud.basecmd = d.getVar("FETCHCMD_git") or "git -c core.fsyncobjectfiles=0"
- ud.write_tarballs = ((d.getVar("BB_GENERATE_MIRROR_TARBALLS") or "0") != "0") or ud.rebaseable
+ write_tarballs = d.getVar("BB_GENERATE_MIRROR_TARBALLS") or "0"
+ ud.write_tarballs = write_tarballs != "0" or ud.rebaseable
+ ud.write_shallow_tarballs = (d.getVar("BB_GENERATE_SHALLOW_TARBALLS") or write_tarballs) != "0"
ud.setup_revisions(d)
@@ -205,13 +213,48 @@ class Git(FetchMethod):
if ud.rebaseable:
for name in ud.names:
gitsrcname = gitsrcname + '_' + ud.revisions[name]
+
+ dl_dir = d.getVar("DL_DIR")
+ gitdir = d.getVar("GITDIR") or (dl_dir + "/git2/")
+ ud.clonedir = os.path.join(gitdir, gitsrcname)
+ ud.localfile = ud.clonedir
+
mirrortarball = 'git2_%s.tar.gz' % gitsrcname
- ud.fullmirror = os.path.join(d.getVar("DL_DIR"), mirrortarball)
+ ud.fullmirror = os.path.join(dl_dir, mirrortarball)
ud.mirrortarballs = [mirrortarball]
- gitdir = d.getVar("GITDIR") or (d.getVar("DL_DIR") + "/git2/")
- ud.clonedir = os.path.join(gitdir, gitsrcname)
- ud.localfile = ud.clonedir
+ ud.shallow = d.getVar("BB_GIT_SHALLOW") == "1"
+ if ud.shallow:
+ ud.shallow_depth = d.getVar("BB_GIT_SHALLOW_DEPTH")
+ if ud.shallow_depth is not None:
+ try:
+ ud.shallow_depth = int(ud.shallow_depth or 0)
+ except ValueError:
+ raise bb.fetch2.FetchError("Invalid depth for BB_GIT_SHALLOW_DEPTH: %s" % ud.shallow_depth)
+ else:
+ if not ud.shallow_depth:
+ ud.shallow = False
+ elif ud.shallow_depth < 0:
+ raise bb.fetch2.FetchError("Invalid depth for BB_GIT_SHALLOW_DEPTH: %s" % ud.shallow_depth)
+ else:
+ ud.shallow_depth = 1
+
+ if ud.shallow:
+ tarballname = gitsrcname
+ if ud.bareclone:
+ tarballname = "%s_bare" % tarballname
+
+ for name, revision in sorted(ud.revisions.items()):
+ tarballname = "%s_%s" % (tarballname, ud.revisions[name][:7])
+ if not ud.nobranch:
+ tarballname = "%s-%s" % (tarballname, ud.branches[name])
+
+ tarballname = "%s-%s" % (tarballname, ud.shallow_depth)
+
+ fetcher = self.__class__.__name__.lower()
+ ud.shallowtarball = '%sshallow_%s.tar.gz' % (fetcher, tarballname)
+ ud.fullshallow = os.path.join(dl_dir, ud.shallowtarball)
+ ud.mirrortarballs.insert(0, ud.shallowtarball)
def localpath(self, ud, d):
return ud.clonedir
@@ -222,6 +265,8 @@ class Git(FetchMethod):
for name in ud.names:
if not self._contains_ref(ud, d, name, ud.clonedir):
return True
+ if ud.shallow and ud.write_shallow_tarballs and not os.path.exists(ud.fullshallow):
+ return True
if ud.write_tarballs and not os.path.exists(ud.fullmirror):
return True
return False
@@ -238,8 +283,16 @@ class Git(FetchMethod):
def download(self, ud, d):
"""Fetch url"""
- # If the checkout doesn't exist and the mirror tarball does, extract it
- if not os.path.exists(ud.clonedir) and os.path.exists(ud.fullmirror):
+ no_clone = not os.path.exists(ud.clonedir)
+ need_update = no_clone or self.need_update(ud, d)
+
+ # A current clone is preferred to either tarball, a shallow tarball is
+ # preferred to an out of date clone, and a missing clone will use
+ # either tarball.
+ if ud.shallow and os.path.exists(ud.fullshallow) and need_update:
+ ud.localpath = ud.fullshallow
+ return
+ elif os.path.exists(ud.fullmirror) and no_clone:
bb.utils.mkdirhier(ud.clonedir)
runfetchcmd("tar -xzf %s" % ud.fullmirror, d, workdir=ud.clonedir)
@@ -285,9 +338,21 @@ class Git(FetchMethod):
raise bb.fetch2.FetchError("Unable to find revision %s in branch %s even from upstream" % (ud.revisions[name], ud.branches[name]))
def build_mirror_data(self, ud, d):
- # Generate a mirror tarball if needed
- if ud.write_tarballs and not os.path.exists(ud.fullmirror):
- # it's possible that this symlink points to read-only filesystem with PREMIRROR
+ if ud.shallow and ud.write_shallow_tarballs:
+ if not os.path.exists(ud.fullshallow):
+ if os.path.islink(ud.fullshallow):
+ os.unlink(ud.fullshallow)
+ tempdir = tempfile.mkdtemp(dir=d.getVar('DL_DIR'))
+ shallowclone = os.path.join(tempdir, 'git')
+ try:
+ self.clone_shallow_local(ud, shallowclone, d)
+
+ logger.info("Creating tarball of git repository")
+ runfetchcmd("tar -czf %s ." % ud.fullshallow, d, workdir=shallowclone)
+ runfetchcmd("touch %s.done" % ud.fullshallow, d)
+ finally:
+ bb.utils.remove(tempdir, recurse=True)
+ elif ud.write_tarballs and not os.path.exists(ud.fullmirror):
if os.path.islink(ud.fullmirror):
os.unlink(ud.fullmirror)
@@ -295,6 +360,43 @@ class Git(FetchMethod):
runfetchcmd("tar -czf %s ." % ud.fullmirror, d, workdir=ud.clonedir)
runfetchcmd("touch %s.done" % ud.fullmirror, d)
+ def clone_shallow_local(self, ud, dest, d):
+ """Clone the repo and make it shallow.
+
+ The upstream url of the new clone isn't set at this time, as it'll be
+ set correctly when unpacked."""
+ runfetchcmd("%s clone %s %s %s" % (ud.basecmd, ud.cloneflags, ud.clonedir, dest), d)
+
+ to_parse, shallow_branches = [], []
+ for name in ud.names:
+ revision = ud.revisions[name]
+ to_parse.append('%s~%d^{}' % (revision, ud.shallow_depth - 1))
+
+ # For nobranch, we need a ref, otherwise the commits will be
+ # removed, and for non-nobranch, we truncate the branch to our
+ # srcrev, to avoid keeping unnecessary history beyond that.
+ branch = ud.branches[name]
+ if ud.nobranch:
+ ref = "refs/shallow/%s" % name
+ elif ud.bareclone:
+ ref = "refs/heads/%s" % branch
+ else:
+ ref = "refs/remotes/origin/%s" % branch
+
+ shallow_branches.append(ref)
+ runfetchcmd("%s update-ref %s %s" % (ud.basecmd, ref, revision), d, workdir=dest)
+
+ # Map srcrev+depths to revisions
+ shallow_revisions = runfetchcmd("%s rev-parse %s" % (ud.basecmd, " ".join(to_parse)), d, workdir=dest).splitlines()
+
+ # Make the repository shallow
+ shallow_cmd = ['git', 'make-shallow', '-s']
+ for b in shallow_branches:
+ shallow_cmd.append('-r')
+ shallow_cmd.append(b)
+ shallow_cmd.extend(shallow_revisions)
+ runfetchcmd(subprocess.list2cmdline(shallow_cmd), d, workdir=dest)
+
def unpack(self, ud, destdir, d):
""" unpack the downloaded src to destdir"""
@@ -311,11 +413,12 @@ class Git(FetchMethod):
if os.path.exists(destdir):
bb.utils.prunedir(destdir)
- cloneflags = "-s -n"
- if ud.bareclone:
- cloneflags += " --mirror"
+ if ud.shallow and (not os.path.exists(ud.clonedir) or self.need_update(ud, d)):
+ bb.utils.mkdirhier(destdir)
+ runfetchcmd("tar -xzf %s" % ud.fullshallow, d, workdir=destdir)
+ else:
+ runfetchcmd("%s clone %s %s/ %s" % (ud.basecmd, ud.cloneflags, ud.clonedir, destdir), d)
- runfetchcmd("%s clone %s %s/ %s" % (ud.basecmd, cloneflags, ud.clonedir, destdir), d)
repourl = self._get_repo_url(ud)
runfetchcmd("%s remote set-url origin %s" % (ud.basecmd, repourl), d, workdir=destdir)
if not ud.nocheckout:
diff --git a/lib/bb/tests/fetch.py b/lib/bb/tests/fetch.py
index 510071d2..019f22a1 100644
--- a/lib/bb/tests/fetch.py
+++ b/lib/bb/tests/fetch.py
@@ -979,3 +979,302 @@ class GitMakeShallowTest(FetcherTest):
orig_revs = len(self.git('rev-list --all').splitlines())
self.make_shallow(['refs/tags/1.10.0'])
self.assertRevCount(orig_revs - 1746, ['--all'])
+
+class GitShallowTest(FetcherTest):
+ def setUp(self):
+ FetcherTest.setUp(self)
+ self.gitdir = os.path.join(self.tempdir, 'git')
+ self.srcdir = os.path.join(self.tempdir, 'gitsource')
+
+ bb.utils.mkdirhier(self.srcdir)
+ self.git('init', cwd=self.srcdir)
+ self.d.setVar('WORKDIR', self.tempdir)
+ self.d.setVar('S', self.gitdir)
+ self.d.delVar('PREMIRRORS')
+ self.d.delVar('MIRRORS')
+
+ uri = 'git://%s;protocol=file;subdir=${S}' % self.srcdir
+ self.d.setVar('SRC_URI', uri)
+ self.d.setVar('SRCREV', '${AUTOREV}')
+ self.d.setVar('AUTOREV', '${@bb.fetch2.get_autorev(d)}')
+
+ self.d.setVar('BB_GIT_SHALLOW', '1')
+ self.d.setVar('BB_GENERATE_MIRROR_TARBALLS', '0')
+ self.d.setVar('BB_GENERATE_SHALLOW_TARBALLS', '1')
+
+ def assertRefs(self, expected_refs, cwd=None):
+ if cwd is None:
+ cwd = self.gitdir
+ actual_refs = self.git(['for-each-ref', '--format=%(refname)'], cwd=cwd).splitlines()
+ full_expected = self.git(['rev-parse', '--symbolic-full-name'] + expected_refs, cwd=cwd).splitlines()
+ self.assertEqual(sorted(set(full_expected)), sorted(set(actual_refs)))
+
+ def assertRevCount(self, expected_count, args=None, cwd=None):
+ if args is None:
+ args = ['HEAD']
+ if cwd is None:
+ cwd = self.gitdir
+ revs = self.git(['rev-list'] + args, cwd=cwd)
+ actual_count = len(revs.splitlines())
+ self.assertEqual(expected_count, actual_count, msg='Object count `%d` is not the expected `%d`' % (actual_count, expected_count))
+
+ def git(self, cmd, cwd=None):
+ if isinstance(cmd, str):
+ cmd = 'git ' + cmd
+ else:
+ cmd = ['git'] + cmd
+ if cwd is None:
+ cwd = self.gitdir
+ return bb.process.run(cmd, cwd=cwd)[0]
+
+ def add_empty_file(self, path, msg=None):
+ if msg is None:
+ msg = path
+ open(os.path.join(self.srcdir, path), 'w').close()
+ self.git(['add', path], self.srcdir)
+ self.git(['commit', '-m', msg, path], self.srcdir)
+
+ def fetch(self, uri=None):
+ if uri is None:
+ uris = self.d.getVar('SRC_URI', True).split()
+ uri = uris[0]
+ d = self.d
+ else:
+ d = self.d.createCopy()
+ d.setVar('SRC_URI', uri)
+ uri = d.expand(uri)
+ uris = [uri]
+
+ fetcher = bb.fetch2.Fetch(uris, d)
+ fetcher.download()
+ ud = fetcher.ud[uri]
+ return fetcher, ud
+
+ def fetch_and_unpack(self, uri=None):
+ fetcher, ud = self.fetch(uri)
+ fetcher.unpack(self.d.getVar('WORKDIR'))
+ assert os.path.exists(self.d.getVar('S'))
+ return fetcher, ud
+
+ def fetch_shallow(self, uri=None, disabled=False, keepclone=False):
+ """Fetch a uri, generating a shallow tarball, then unpack using it"""
+ fetcher, ud = self.fetch_and_unpack(uri)
+ assert os.path.exists(ud.clonedir), 'Git clone in DLDIR (%s) does not exist for uri %s' % (ud.clonedir, uri)
+
+ # Confirm that the unpacked repo is unshallow
+ if not disabled:
+ assert os.path.exists(os.path.join(self.dldir, ud.mirrortarballs[0]))
+
+ # fetch and unpack, from the shallow tarball
+ bb.utils.remove(self.gitdir, recurse=True)
+ bb.utils.remove(ud.clonedir, recurse=True)
+
+ # confirm that the unpacked repo is used when no git clone or git
+ # mirror tarball is available
+ fetcher, ud = self.fetch_and_unpack(uri)
+ if not disabled:
+ assert os.path.exists(os.path.join(self.gitdir, '.git', 'shallow')), 'Unpacked git repository at %s is not shallow' % self.gitdir
+ else:
+ assert not os.path.exists(os.path.join(self.gitdir, '.git', 'shallow')), 'Unpacked git repository at %s is shallow' % self.gitdir
+ return fetcher, ud
+
+ def test_shallow_disabled(self):
+ self.add_empty_file('a')
+ self.add_empty_file('b')
+ self.assertRevCount(2, cwd=self.srcdir)
+
+ self.d.setVar('BB_GIT_SHALLOW', '0')
+ self.fetch_shallow(disabled=True)
+ self.assertRevCount(2)
+
+ def test_shallow_nobranch(self):
+ self.add_empty_file('a')
+ self.add_empty_file('b')
+ self.assertRevCount(2, cwd=self.srcdir)
+
+ srcrev = self.git('rev-parse HEAD', cwd=self.srcdir).strip()
+ self.d.setVar('SRCREV', srcrev)
+ uri = self.d.getVar('SRC_URI', True).split()[0]
+ uri = '%s;nobranch=1;bare=1' % uri
+
+ self.fetch_shallow(uri)
+ self.assertRevCount(1)
+
+ # shallow refs are used to ensure the srcrev sticks around when we
+ # have no other branches referencing it
+ self.assertRefs(['refs/shallow/default'])
+
+ def test_shallow_default_depth_1(self):
+ # Create initial git repo
+ self.add_empty_file('a')
+ self.add_empty_file('b')
+ self.assertRevCount(2, cwd=self.srcdir)
+
+ self.fetch_shallow()
+ self.assertRevCount(1)
+
+ def test_shallow_depth_0_disables(self):
+ self.add_empty_file('a')
+ self.add_empty_file('b')
+ self.assertRevCount(2, cwd=self.srcdir)
+
+ self.d.setVar('BB_GIT_SHALLOW_DEPTH', '0')
+ self.fetch_shallow(disabled=True)
+ self.assertRevCount(2)
+
+ def test_current_shallow_out_of_date_clone(self):
+ # Create initial git repo
+ self.add_empty_file('a')
+ self.add_empty_file('b')
+ self.add_empty_file('c')
+ self.assertRevCount(3, cwd=self.srcdir)
+
+ # Clone and generate mirror tarball
+ fetcher, ud = self.fetch()
+
+ # Ensure we have a current mirror tarball, but an out of date clone
+ self.git('update-ref refs/heads/master refs/heads/master~1', cwd=ud.clonedir)
+ self.assertRevCount(2, cwd=ud.clonedir)
+
+ # Fetch and unpack, from the current tarball, not the out of date clone
+ bb.utils.remove(self.gitdir, recurse=True)
+ fetcher, ud = self.fetch()
+ fetcher.unpack(self.d.getVar('WORKDIR'))
+ self.assertRevCount(1)
+
+ def test_shallow_single_branch_no_merge(self):
+ self.add_empty_file('a')
+ self.add_empty_file('b')
+ self.assertRevCount(2, cwd=self.srcdir)
+
+ self.fetch_shallow()
+ self.assertRevCount(1)
+ assert os.path.exists(os.path.join(self.gitdir, 'a'))
+ assert os.path.exists(os.path.join(self.gitdir, 'b'))
+
+ def test_shallow_no_dangling(self):
+ self.add_empty_file('a')
+ self.add_empty_file('b')
+ self.assertRevCount(2, cwd=self.srcdir)
+
+ self.fetch_shallow()
+ self.assertRevCount(1)
+ assert not self.git('fsck --dangling')
+
+ def test_shallow_srcrev_branch_truncation(self):
+ self.add_empty_file('a')
+ self.add_empty_file('b')
+ b_commit = self.git('rev-parse HEAD', cwd=self.srcdir).rstrip()
+ self.add_empty_file('c')
+ self.assertRevCount(3, cwd=self.srcdir)
+
+ self.d.setVar('SRCREV', b_commit)
+ self.fetch_shallow()
+
+ # The 'c' commit was removed entirely, and 'a' was removed from history
+ self.assertRevCount(1, ['--all'])
+ self.assertEqual(self.git('rev-parse HEAD').strip(), b_commit)
+ assert os.path.exists(os.path.join(self.gitdir, 'a'))
+ assert os.path.exists(os.path.join(self.gitdir, 'b'))
+ assert not os.path.exists(os.path.join(self.gitdir, 'c'))
+
+ def test_shallow_ref_pruning(self):
+ self.add_empty_file('a')
+ self.add_empty_file('b')
+ self.git('branch a_branch', cwd=self.srcdir)
+ self.assertRefs(['master', 'a_branch'], cwd=self.srcdir)
+ self.assertRevCount(2, cwd=self.srcdir)
+
+ self.fetch_shallow()
+
+ self.assertRefs(['master', 'origin/master'])
+ self.assertRevCount(1)
+
+ def test_shallow_multi_one_uri(self):
+ # Create initial git repo
+ self.add_empty_file('a')
+ self.add_empty_file('b')
+ self.git('checkout -b a_branch', cwd=self.srcdir)
+ self.add_empty_file('c')
+ self.add_empty_file('d')
+ self.git('checkout master', cwd=self.srcdir)
+ self.add_empty_file('e')
+ self.git('merge --no-ff --no-edit a_branch', cwd=self.srcdir)
+ self.add_empty_file('f')
+ self.assertRevCount(7, cwd=self.srcdir)
+
+ uri = self.d.getVar('SRC_URI', True).split()[0]
+ uri = '%s;branch=master,a_branch;name=master,a_branch' % uri
+
+ self.d.setVar('BB_GIT_SHALLOW_DEPTH', '2')
+ self.d.setVar('SRCREV_master', '${AUTOREV}')
+ self.d.setVar('SRCREV_a_branch', '${AUTOREV}')
+
+ self.fetch_shallow(uri)
+
+ self.assertRevCount(3, ['--all'])
+ self.assertRefs(['master', 'origin/master', 'origin/a_branch'])
+
+ def test_shallow_clone_preferred_over_shallow(self):
+ self.add_empty_file('a')
+ self.add_empty_file('b')
+
+ # Fetch once to generate the shallow tarball
+ fetcher, ud = self.fetch()
+ assert os.path.exists(os.path.join(self.dldir, ud.mirrortarballs[0]))
+
+ # Fetch and unpack with both the clonedir and shallow tarball available
+ bb.utils.remove(self.gitdir, recurse=True)
+ fetcher, ud = self.fetch_and_unpack()
+
+ # The unpacked tree should *not* be shallow
+ self.assertRevCount(2)
+ assert not os.path.exists(os.path.join(self.gitdir, '.git', 'shallow'))
+
+ def test_shallow_mirrors(self):
+ self.add_empty_file('a')
+ self.add_empty_file('b')
+
+ # Fetch once to generate the shallow tarball
+ fetcher, ud = self.fetch()
+ mirrortarball = ud.mirrortarballs[0]
+ assert os.path.exists(os.path.join(self.dldir, mirrortarball))
+
+ # Set up the mirror
+ mirrordir = os.path.join(self.tempdir, 'mirror')
+ bb.utils.mkdirhier(mirrordir)
+ self.d.setVar('PREMIRRORS', 'git://.*/.* file://%s/\n' % mirrordir)
+
+ os.rename(os.path.join(self.dldir, mirrortarball),
+ os.path.join(mirrordir, mirrortarball))
+
+ # Fetch from the mirror
+ bb.utils.remove(self.dldir, recurse=True)
+ bb.utils.remove(self.gitdir, recurse=True)
+ self.fetch_and_unpack()
+ self.assertRevCount(1)
+
+ def test_shallow_invalid_depth(self):
+ self.add_empty_file('a')
+ self.add_empty_file('b')
+
+ self.d.setVar('BB_GIT_SHALLOW_DEPTH', '-12')
+ with self.assertRaises(bb.fetch2.FetchError):
+ self.fetch()
+
+ if os.environ.get("BB_SKIP_NETTESTS") == "yes":
+ print("Unset BB_SKIP_NETTESTS to run network tests")
+ else:
+ def test_bitbake(self):
+ self.git('remote add --mirror=fetch origin git://github.com/openembedded/bitbake', cwd=self.srcdir)
+ self.git('config core.bare true', cwd=self.srcdir)
+ self.git('fetch --tags', cwd=self.srcdir)
+
+ self.d.setVar('BB_GIT_SHALLOW_DEPTH', '100')
+
+ self.fetch_shallow()
+
+ orig_revs = len(self.git('rev-list master', cwd=self.srcdir).splitlines())
+ revs = len(self.git('rev-list master').splitlines())
+ self.assertNotEqual(orig_revs, revs)
+ self.assertRefs(['master', 'origin/master'])
--
2.11.1
^ permalink raw reply related [flat|nested] 14+ messages in thread* [PATCH 4/8] fetch/git: support per-branch/per-url depths for shallow
2017-05-12 21:46 [PATCH 0/8] Add support for shallow mirror tarballs Christopher Larson
` (2 preceding siblings ...)
2017-05-12 21:46 ` [PATCH 3/8] fetch/git: add support for shallow mirror tarballs Christopher Larson
@ 2017-05-12 21:46 ` Christopher Larson
2017-05-12 21:46 ` [PATCH 5/8] fetch/git: add support for keeping extra refs " Christopher Larson
` (4 subsequent siblings)
8 siblings, 0 replies; 14+ messages in thread
From: Christopher Larson @ 2017-05-12 21:46 UTC (permalink / raw)
To: bitbake-devel; +Cc: Christopher Larson
Allow the user to explicitly adjust the depth for named urls/branches. The
un-suffixed BB_GIT_SHALLOW_DEPTH is used as the default.
Example usage:
BB_GIT_SHALLOW_DEPTH = "1"
BB_GIT_SHALLOW_DEPTH_doc = "0"
BB_GIT_SHALLOW_DEPTH_meta = "0"
Signed-off-by: Christopher Larson <chris_larson@mentor.com>
---
lib/bb/fetch2/git.py | 61 +++++++++++++++++++++++++++++++++------------------
lib/bb/tests/fetch.py | 35 +++++++++++++++++++++++++++--
2 files changed, 73 insertions(+), 23 deletions(-)
diff --git a/lib/bb/fetch2/git.py b/lib/bb/fetch2/git.py
index 0412f9ff..250109bf 100644
--- a/lib/bb/fetch2/git.py
+++ b/lib/bb/fetch2/git.py
@@ -70,6 +70,7 @@ Supported SRC_URI options are:
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+import collections
import errno
import os
import re
@@ -178,12 +179,43 @@ class Git(FetchMethod):
if ud.bareclone:
ud.cloneflags += " --mirror"
+ ud.shallow = d.getVar("BB_GIT_SHALLOW") == "1"
+
+ depth_default = d.getVar("BB_GIT_SHALLOW_DEPTH")
+ if depth_default is not None:
+ try:
+ depth_default = int(depth_default or 0)
+ except ValueError:
+ raise bb.fetch2.FetchError("Invalid depth for BB_GIT_SHALLOW_DEPTH: %s" % depth_default)
+ else:
+ if depth_default < 0:
+ raise bb.fetch2.FetchError("Invalid depth for BB_GIT_SHALLOW_DEPTH: %s" % depth_default)
+ else:
+ depth_default = 1
+ ud.shallow_depths = collections.defaultdict(lambda: depth_default)
+
ud.branches = {}
for pos, name in enumerate(ud.names):
branch = branches[pos]
ud.branches[name] = branch
ud.unresolvedrev[name] = branch
+ shallow_depth = d.getVar("BB_GIT_SHALLOW_DEPTH_%s" % name)
+ if shallow_depth is not None:
+ try:
+ shallow_depth = int(shallow_depth or 0)
+ except ValueError:
+ raise bb.fetch2.FetchError("Invalid depth for BB_GIT_SHALLOW_DEPTH_%s: %s" % (name, shallow_depth))
+ else:
+ if shallow_depth < 0:
+ raise bb.fetch2.FetchError("Invalid depth for BB_GIT_SHALLOW_DEPTH_%s: %s" % (name, shallow_depth))
+ ud.shallow_depths[name] = shallow_depth
+
+ if (ud.shallow and
+ all(ud.shallow_depths[n] == 0 for n in ud.names)):
+ # Shallow disabled for this URL
+ ud.shallow = False
+
if ud.usehead:
ud.unresolvedrev['default'] = 'HEAD'
@@ -222,23 +254,6 @@ class Git(FetchMethod):
mirrortarball = 'git2_%s.tar.gz' % gitsrcname
ud.fullmirror = os.path.join(dl_dir, mirrortarball)
ud.mirrortarballs = [mirrortarball]
-
- ud.shallow = d.getVar("BB_GIT_SHALLOW") == "1"
- if ud.shallow:
- ud.shallow_depth = d.getVar("BB_GIT_SHALLOW_DEPTH")
- if ud.shallow_depth is not None:
- try:
- ud.shallow_depth = int(ud.shallow_depth or 0)
- except ValueError:
- raise bb.fetch2.FetchError("Invalid depth for BB_GIT_SHALLOW_DEPTH: %s" % ud.shallow_depth)
- else:
- if not ud.shallow_depth:
- ud.shallow = False
- elif ud.shallow_depth < 0:
- raise bb.fetch2.FetchError("Invalid depth for BB_GIT_SHALLOW_DEPTH: %s" % ud.shallow_depth)
- else:
- ud.shallow_depth = 1
-
if ud.shallow:
tarballname = gitsrcname
if ud.bareclone:
@@ -246,10 +261,12 @@ class Git(FetchMethod):
for name, revision in sorted(ud.revisions.items()):
tarballname = "%s_%s" % (tarballname, ud.revisions[name][:7])
- if not ud.nobranch:
- tarballname = "%s-%s" % (tarballname, ud.branches[name])
+ depth = ud.shallow_depths[name]
+ if depth:
+ tarballname = "%s-%s" % (tarballname, depth)
- tarballname = "%s-%s" % (tarballname, ud.shallow_depth)
+ if not ud.nobranch:
+ tarballname = "%s_%s" % (tarballname, "_".join(sorted(ud.branches.values())).replace('/', '.'))
fetcher = self.__class__.__name__.lower()
ud.shallowtarball = '%sshallow_%s.tar.gz' % (fetcher, tarballname)
@@ -370,7 +387,9 @@ class Git(FetchMethod):
to_parse, shallow_branches = [], []
for name in ud.names:
revision = ud.revisions[name]
- to_parse.append('%s~%d^{}' % (revision, ud.shallow_depth - 1))
+ depth = ud.shallow_depths[name]
+ if depth:
+ to_parse.append('%s~%d^{}' % (revision, depth - 1))
# For nobranch, we need a ref, otherwise the commits will be
# removed, and for non-nobranch, we truncate the branch to our
diff --git a/lib/bb/tests/fetch.py b/lib/bb/tests/fetch.py
index 019f22a1..0b0116b4 100644
--- a/lib/bb/tests/fetch.py
+++ b/lib/bb/tests/fetch.py
@@ -1122,6 +1122,27 @@ class GitShallowTest(FetcherTest):
self.fetch_shallow(disabled=True)
self.assertRevCount(2)
+ def test_shallow_depth_default_override(self):
+ self.add_empty_file('a')
+ self.add_empty_file('b')
+ self.assertRevCount(2, cwd=self.srcdir)
+
+ self.d.setVar('BB_GIT_SHALLOW_DEPTH', '2')
+ self.d.setVar('BB_GIT_SHALLOW_DEPTH_default', '1')
+ self.fetch_shallow()
+ self.assertRevCount(1)
+
+ def test_shallow_depth_default_override_disable(self):
+ self.add_empty_file('a')
+ self.add_empty_file('b')
+ self.add_empty_file('c')
+ self.assertRevCount(3, cwd=self.srcdir)
+
+ self.d.setVar('BB_GIT_SHALLOW_DEPTH', '0')
+ self.d.setVar('BB_GIT_SHALLOW_DEPTH_default', '2')
+ self.fetch_shallow()
+ self.assertRevCount(2)
+
def test_current_shallow_out_of_date_clone(self):
# Create initial git repo
self.add_empty_file('a')
@@ -1206,13 +1227,15 @@ class GitShallowTest(FetcherTest):
uri = self.d.getVar('SRC_URI', True).split()[0]
uri = '%s;branch=master,a_branch;name=master,a_branch' % uri
- self.d.setVar('BB_GIT_SHALLOW_DEPTH', '2')
+ self.d.setVar('BB_GIT_SHALLOW_DEPTH', '0')
+ self.d.setVar('BB_GIT_SHALLOW_DEPTH_master', '3')
+ self.d.setVar('BB_GIT_SHALLOW_DEPTH_a_branch', '1')
self.d.setVar('SRCREV_master', '${AUTOREV}')
self.d.setVar('SRCREV_a_branch', '${AUTOREV}')
self.fetch_shallow(uri)
- self.assertRevCount(3, ['--all'])
+ self.assertRevCount(4, ['--all'])
self.assertRefs(['master', 'origin/master', 'origin/a_branch'])
def test_shallow_clone_preferred_over_shallow(self):
@@ -1262,6 +1285,14 @@ class GitShallowTest(FetcherTest):
with self.assertRaises(bb.fetch2.FetchError):
self.fetch()
+ def test_shallow_invalid_depth_default(self):
+ self.add_empty_file('a')
+ self.add_empty_file('b')
+
+ self.d.setVar('BB_GIT_SHALLOW_DEPTH_default', '-12')
+ with self.assertRaises(bb.fetch2.FetchError):
+ self.fetch()
+
if os.environ.get("BB_SKIP_NETTESTS") == "yes":
print("Unset BB_SKIP_NETTESTS to run network tests")
else:
--
2.11.1
^ permalink raw reply related [flat|nested] 14+ messages in thread* [PATCH 5/8] fetch/git: add support for keeping extra refs for shallow
2017-05-12 21:46 [PATCH 0/8] Add support for shallow mirror tarballs Christopher Larson
` (3 preceding siblings ...)
2017-05-12 21:46 ` [PATCH 4/8] fetch/git: support per-branch/per-url depths for shallow Christopher Larson
@ 2017-05-12 21:46 ` Christopher Larson
2017-05-12 21:46 ` [PATCH 6/8] fetch/gitsm: add support for shallow mirror tarballs Christopher Larson
` (3 subsequent siblings)
8 siblings, 0 replies; 14+ messages in thread
From: Christopher Larson @ 2017-05-12 21:46 UTC (permalink / raw)
To: bitbake-devel; +Cc: Christopher Larson
By default, all unused refs (branches & tags) are removed from the repository,
as shallow processing scales with the number of refs it has to process. Add
the ability to explicitly specify additional refs to keep. This is
particularly useful for recipes with custom checkout processes, or whose
git-based versioning requires a tag be available (i.e. for `git describe
--tags`). The new `BB_GIT_SHALLOW_EXTRA_REFS` variable is a space-separated
list of refs, fully specified, and support wildcards.
Example usages:
BB_GIT_SHALLOW_EXTRA_REFS = "refs/tags/v1.0"
BB_GIT_SHALLOW_EXTRA_REFS += "refs/heads/*"
Signed-off-by: Christopher Larson <chris_larson@mentor.com>
---
lib/bb/fetch2/git.py | 22 +++++++++++++++++++++-
lib/bb/tests/fetch.py | 42 ++++++++++++++++++++++++++++++++++++++++++
2 files changed, 63 insertions(+), 1 deletion(-)
diff --git a/lib/bb/fetch2/git.py b/lib/bb/fetch2/git.py
index 250109bf..aa972c5c 100644
--- a/lib/bb/fetch2/git.py
+++ b/lib/bb/fetch2/git.py
@@ -72,6 +72,7 @@ Supported SRC_URI options are:
import collections
import errno
+import fnmatch
import os
import re
import subprocess
@@ -180,6 +181,7 @@ class Git(FetchMethod):
ud.cloneflags += " --mirror"
ud.shallow = d.getVar("BB_GIT_SHALLOW") == "1"
+ ud.shallow_extra_refs = (d.getVar("BB_GIT_SHALLOW_EXTRA_REFS") or "").split()
depth_default = d.getVar("BB_GIT_SHALLOW_DEPTH")
if depth_default is not None:
@@ -265,8 +267,13 @@ class Git(FetchMethod):
if depth:
tarballname = "%s-%s" % (tarballname, depth)
+ shallow_refs = []
if not ud.nobranch:
- tarballname = "%s_%s" % (tarballname, "_".join(sorted(ud.branches.values())).replace('/', '.'))
+ shallow_refs.extend(ud.branches.values())
+ if ud.shallow_extra_refs:
+ shallow_refs.extend(r.replace('refs/heads/', '').replace('*', 'ALL') for r in ud.shallow_extra_refs)
+ if shallow_refs:
+ tarballname = "%s_%s" % (tarballname, "_".join(sorted(shallow_refs)).replace('/', '.'))
fetcher = self.__class__.__name__.lower()
ud.shallowtarball = '%sshallow_%s.tar.gz' % (fetcher, tarballname)
@@ -408,6 +415,19 @@ class Git(FetchMethod):
# Map srcrev+depths to revisions
shallow_revisions = runfetchcmd("%s rev-parse %s" % (ud.basecmd, " ".join(to_parse)), d, workdir=dest).splitlines()
+ # Apply extra ref wildcards
+ all_refs = runfetchcmd('%s for-each-ref "--format=%%(refname)"' % ud.basecmd,
+ d, workdir=dest).splitlines()
+ for r in ud.shallow_extra_refs:
+ if not ud.bareclone:
+ r = r.replace('refs/heads/', 'refs/remotes/origin/')
+
+ if '*' in r:
+ matches = filter(lambda a: fnmatch.fnmatchcase(a, r), all_refs)
+ shallow_branches.extend(matches)
+ else:
+ shallow_branches.append(r)
+
# Make the repository shallow
shallow_cmd = ['git', 'make-shallow', '-s']
for b in shallow_branches:
diff --git a/lib/bb/tests/fetch.py b/lib/bb/tests/fetch.py
index 0b0116b4..3e2ce530 100644
--- a/lib/bb/tests/fetch.py
+++ b/lib/bb/tests/fetch.py
@@ -1293,6 +1293,48 @@ class GitShallowTest(FetcherTest):
with self.assertRaises(bb.fetch2.FetchError):
self.fetch()
+ def test_shallow_extra_refs(self):
+ self.add_empty_file('a')
+ self.add_empty_file('b')
+ self.git('branch a_branch', cwd=self.srcdir)
+ self.assertRefs(['master', 'a_branch'], cwd=self.srcdir)
+ self.assertRevCount(2, cwd=self.srcdir)
+
+ self.d.setVar('BB_GIT_SHALLOW_EXTRA_REFS', 'refs/heads/a_branch')
+ self.fetch_shallow()
+
+ self.assertRefs(['master', 'origin/master', 'origin/a_branch'])
+ self.assertRevCount(1)
+
+ def test_shallow_extra_refs_wildcard(self):
+ self.add_empty_file('a')
+ self.add_empty_file('b')
+ self.git('branch a_branch', cwd=self.srcdir)
+ self.git('tag v1.0', cwd=self.srcdir)
+ self.assertRefs(['master', 'a_branch', 'v1.0'], cwd=self.srcdir)
+ self.assertRevCount(2, cwd=self.srcdir)
+
+ self.d.setVar('BB_GIT_SHALLOW_EXTRA_REFS', 'refs/tags/*')
+ self.fetch_shallow()
+
+ self.assertRefs(['master', 'origin/master', 'v1.0'])
+ self.assertRevCount(1)
+
+ def test_shallow_missing_extra_refs(self):
+ self.add_empty_file('a')
+ self.add_empty_file('b')
+
+ self.d.setVar('BB_GIT_SHALLOW_EXTRA_REFS', 'refs/heads/foo')
+ with self.assertRaises(bb.fetch2.FetchError):
+ self.fetch()
+
+ def test_shallow_missing_extra_refs_wildcard(self):
+ self.add_empty_file('a')
+ self.add_empty_file('b')
+
+ self.d.setVar('BB_GIT_SHALLOW_EXTRA_REFS', 'refs/tags/*')
+ self.fetch()
+
if os.environ.get("BB_SKIP_NETTESTS") == "yes":
print("Unset BB_SKIP_NETTESTS to run network tests")
else:
--
2.11.1
^ permalink raw reply related [flat|nested] 14+ messages in thread* [PATCH 6/8] fetch/gitsm: add support for shallow mirror tarballs
2017-05-12 21:46 [PATCH 0/8] Add support for shallow mirror tarballs Christopher Larson
` (4 preceding siblings ...)
2017-05-12 21:46 ` [PATCH 5/8] fetch/git: add support for keeping extra refs " Christopher Larson
@ 2017-05-12 21:46 ` Christopher Larson
2017-05-12 21:46 ` [PATCH 7/8] fetch/gitannex: " Christopher Larson
` (2 subsequent siblings)
8 siblings, 0 replies; 14+ messages in thread
From: Christopher Larson @ 2017-05-12 21:46 UTC (permalink / raw)
To: bitbake-devel; +Cc: Christopher Larson
When we're building from a shallow mirror tarball, we don't want to do
anything with ud.clonedir, as it's not being used when we unpack. As such,
disable updating the submodules in that case. Also include the repositories in
.git/modules in the shallow tarball. It does not actually make the submodule
repositories shallow at this time.
Signed-off-by: Christopher Larson <chris_larson@mentor.com>
---
lib/bb/fetch2/gitsm.py | 17 +++++++++++------
lib/bb/tests/fetch.py | 31 +++++++++++++++++++++++++++----
2 files changed, 38 insertions(+), 10 deletions(-)
diff --git a/lib/bb/fetch2/gitsm.py b/lib/bb/fetch2/gitsm.py
index a95584c8..0aff1008 100644
--- a/lib/bb/fetch2/gitsm.py
+++ b/lib/bb/fetch2/gitsm.py
@@ -117,14 +117,19 @@ class GitSM(Git):
def download(self, ud, d):
Git.download(self, ud, d)
- submodules = self.uses_submodules(ud, d, ud.clonedir)
- if submodules:
- self.update_submodules(ud, d)
+ if not ud.shallow or ud.localpath != ud.fullshallow:
+ submodules = self.uses_submodules(ud, d, ud.clonedir)
+ if submodules:
+ self.update_submodules(ud, d)
+
+ def clone_shallow_local(self, ud, dest, d):
+ super(GitSM, self).clone_shallow_local(ud, dest, d)
+
+ runfetchcmd('cp -fpPRH "%s/modules" "%s/"' % (ud.clonedir, os.path.join(dest, '.git')), d)
def unpack(self, ud, destdir, d):
Git.unpack(self, ud, destdir, d)
-
- submodules = self.uses_submodules(ud, d, ud.destdir)
- if submodules:
+
+ if self.uses_submodules(ud, d, ud.destdir):
runfetchcmd(ud.basecmd + " checkout " + ud.revisions[ud.names[0]], d, workdir=ud.destdir)
runfetchcmd(ud.basecmd + " submodule update --init --recursive", d, workdir=ud.destdir)
diff --git a/lib/bb/tests/fetch.py b/lib/bb/tests/fetch.py
index 3e2ce530..2a9019b0 100644
--- a/lib/bb/tests/fetch.py
+++ b/lib/bb/tests/fetch.py
@@ -1027,12 +1027,14 @@ class GitShallowTest(FetcherTest):
cwd = self.gitdir
return bb.process.run(cmd, cwd=cwd)[0]
- def add_empty_file(self, path, msg=None):
+ def add_empty_file(self, path, cwd=None, msg=None):
if msg is None:
msg = path
- open(os.path.join(self.srcdir, path), 'w').close()
- self.git(['add', path], self.srcdir)
- self.git(['commit', '-m', msg, path], self.srcdir)
+ if cwd is None:
+ cwd = self.srcdir
+ open(os.path.join(cwd, path), 'w').close()
+ self.git(['add', path], cwd)
+ self.git(['commit', '-m', msg, path], cwd)
def fetch(self, uri=None):
if uri is None:
@@ -1211,6 +1213,27 @@ class GitShallowTest(FetcherTest):
self.assertRefs(['master', 'origin/master'])
self.assertRevCount(1)
+ def test_shallow_submodules(self):
+ self.add_empty_file('a')
+ self.add_empty_file('b')
+
+ smdir = os.path.join(self.tempdir, 'gitsubmodule')
+ bb.utils.mkdirhier(smdir)
+ self.git('init', cwd=smdir)
+ self.add_empty_file('asub', cwd=smdir)
+
+ self.git('submodule init', cwd=self.srcdir)
+ self.git('submodule add file://%s' % smdir, cwd=self.srcdir)
+ self.git('submodule update', cwd=self.srcdir)
+ self.git('commit -m submodule -a', cwd=self.srcdir)
+
+ uri = 'gitsm://%s;protocol=file;subdir=${S}' % self.srcdir
+ fetcher, ud = self.fetch_shallow(uri)
+
+ self.assertRevCount(1)
+ assert './.git/modules/' in bb.process.run('tar -tzf %s' % os.path.join(self.dldir, ud.mirrortarballs[0]))[0]
+ assert os.listdir(os.path.join(self.gitdir, 'gitsubmodule'))
+
def test_shallow_multi_one_uri(self):
# Create initial git repo
self.add_empty_file('a')
--
2.11.1
^ permalink raw reply related [flat|nested] 14+ messages in thread* [PATCH 7/8] fetch/gitannex: add support for shallow mirror tarballs
2017-05-12 21:46 [PATCH 0/8] Add support for shallow mirror tarballs Christopher Larson
` (5 preceding siblings ...)
2017-05-12 21:46 ` [PATCH 6/8] fetch/gitsm: add support for shallow mirror tarballs Christopher Larson
@ 2017-05-12 21:46 ` Christopher Larson
2017-05-12 21:46 ` [PATCH 8/8] fetch/git: add support for removing arbitrary revs for shallow Christopher Larson
2017-05-25 15:42 ` [PATCH 0/8] Add support for shallow mirror tarballs Christopher Larson
8 siblings, 0 replies; 14+ messages in thread
From: Christopher Larson @ 2017-05-12 21:46 UTC (permalink / raw)
To: bitbake-devel; +Cc: Christopher Larson
When we're building from a shallow mirror tarball, we don't want to do
anything with ud.clonedir, as it's not being used when we unpack. As such,
disable updating annex in that case. Also include annex files in the shallow
tarball.
Signed-off-by: Christopher Larson <chris_larson@mentor.com>
---
lib/bb/fetch2/gitannex.py | 23 ++++++++++++++++++++---
lib/bb/tests/fetch.py | 17 +++++++++++++++++
2 files changed, 37 insertions(+), 3 deletions(-)
diff --git a/lib/bb/fetch2/gitannex.py b/lib/bb/fetch2/gitannex.py
index c66c2114..a9b69caa 100644
--- a/lib/bb/fetch2/gitannex.py
+++ b/lib/bb/fetch2/gitannex.py
@@ -33,6 +33,11 @@ class GitANNEX(Git):
"""
return ud.type in ['gitannex']
+ def urldata_init(self, ud, d):
+ super(GitANNEX, self).urldata_init(ud, d)
+ if ud.shallow:
+ ud.shallow_extra_refs += ['refs/heads/git-annex', 'refs/heads/synced/*']
+
def uses_annex(self, ud, d, wd):
for name in ud.names:
try:
@@ -55,9 +60,21 @@ class GitANNEX(Git):
def download(self, ud, d):
Git.download(self, ud, d)
- annex = self.uses_annex(ud, d, ud.clonedir)
- if annex:
- self.update_annex(ud, d, ud.clonedir)
+ if not ud.shallow or ud.localpath != ud.fullshallow:
+ if self.uses_annex(ud, d, ud.clonedir):
+ self.update_annex(ud, d, ud.clonedir)
+
+ def clone_shallow_local(self, ud, dest, d):
+ super(GitANNEX, self).clone_shallow_local(ud, dest, d)
+
+ try:
+ runfetchcmd("%s annex init" % ud.basecmd, d, workdir=dest)
+ except bb.fetch.FetchError:
+ pass
+
+ if self.uses_annex(ud, d, dest):
+ runfetchcmd("%s annex get" % ud.basecmd, d, workdir=dest)
+ runfetchcmd("chmod u+w -R %s/.git/annex" % (dest), d, quiet=True, workdir=dest)
def unpack(self, ud, destdir, d):
Git.unpack(self, ud, destdir, d)
diff --git a/lib/bb/tests/fetch.py b/lib/bb/tests/fetch.py
index 2a9019b0..73f7b3f7 100644
--- a/lib/bb/tests/fetch.py
+++ b/lib/bb/tests/fetch.py
@@ -1234,6 +1234,23 @@ class GitShallowTest(FetcherTest):
assert './.git/modules/' in bb.process.run('tar -tzf %s' % os.path.join(self.dldir, ud.mirrortarballs[0]))[0]
assert os.listdir(os.path.join(self.gitdir, 'gitsubmodule'))
+ if any(os.path.exists(os.path.join(p, 'git-annex')) for p in os.environ.get('PATH').split(':')):
+ def test_shallow_annex(self):
+ self.add_empty_file('a')
+ self.add_empty_file('b')
+ self.git('annex init', cwd=self.srcdir)
+ open(os.path.join(self.srcdir, 'c'), 'w').close()
+ self.git('annex add c', cwd=self.srcdir)
+ self.git('commit -m annex-c -a', cwd=self.srcdir)
+ bb.process.run('chmod u+w -R %s' % os.path.join(self.srcdir, '.git', 'annex'))
+
+ uri = 'gitannex://%s;protocol=file;subdir=${S}' % self.srcdir
+ fetcher, ud = self.fetch_shallow(uri)
+
+ self.assertRevCount(1)
+ assert './.git/annex/' in bb.process.run('tar -tzf %s' % os.path.join(self.dldir, ud.mirrortarballs[0]))[0]
+ assert os.path.exists(os.path.join(self.gitdir, 'c'))
+
def test_shallow_multi_one_uri(self):
# Create initial git repo
self.add_empty_file('a')
--
2.11.1
^ permalink raw reply related [flat|nested] 14+ messages in thread* [PATCH 8/8] fetch/git: add support for removing arbitrary revs for shallow
2017-05-12 21:46 [PATCH 0/8] Add support for shallow mirror tarballs Christopher Larson
` (6 preceding siblings ...)
2017-05-12 21:46 ` [PATCH 7/8] fetch/gitannex: " Christopher Larson
@ 2017-05-12 21:46 ` Christopher Larson
2017-05-25 15:42 ` [PATCH 0/8] Add support for shallow mirror tarballs Christopher Larson
8 siblings, 0 replies; 14+ messages in thread
From: Christopher Larson @ 2017-05-12 21:46 UTC (permalink / raw)
To: bitbake-devel; +Cc: Christopher Larson
In certain cases, it's valuable to be able to exert more control over what
history is removed, beyond srcrev+depth. As one example, you can remove most
of the upstream kernel history from a kernel repository, keeping predominently
the non-publically-accessible content. If the repository is private, the
history in that repo couldn't be restored via `git fetch --unshallow`, but
upstream history could be.
Example usage:
# Remove only these revs, not at a particular depth
BB_GIT_SHALLOW_DEPTH_pn-linux-foo = "0"
BB_GIT_SHALLOW_REVS_pn-linux-foo = "v4.1"
Signed-off-by: Christopher Larson <chris_larson@mentor.com>
---
lib/bb/fetch2/git.py | 18 +++++++++++++-
lib/bb/tests/fetch.py | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++-
2 files changed, 82 insertions(+), 2 deletions(-)
diff --git a/lib/bb/fetch2/git.py b/lib/bb/fetch2/git.py
index aa972c5c..534c93d3 100644
--- a/lib/bb/fetch2/git.py
+++ b/lib/bb/fetch2/git.py
@@ -196,6 +196,8 @@ class Git(FetchMethod):
depth_default = 1
ud.shallow_depths = collections.defaultdict(lambda: depth_default)
+ revs_default = d.getVar("BB_GIT_SHALLOW_REVS", True)
+ ud.shallow_revs = []
ud.branches = {}
for pos, name in enumerate(ud.names):
branch = branches[pos]
@@ -213,7 +215,14 @@ class Git(FetchMethod):
raise bb.fetch2.FetchError("Invalid depth for BB_GIT_SHALLOW_DEPTH_%s: %s" % (name, shallow_depth))
ud.shallow_depths[name] = shallow_depth
+ revs = d.getVar("BB_GIT_SHALLOW_REVS_%s" % name)
+ if revs is not None:
+ ud.shallow_revs.extend(revs.split())
+ elif revs_default is not None:
+ ud.shallow_revs.extend(revs_default.split())
+
if (ud.shallow and
+ not ud.shallow_revs and
all(ud.shallow_depths[n] == 0 for n in ud.names)):
# Shallow disabled for this URL
ud.shallow = False
@@ -261,6 +270,9 @@ class Git(FetchMethod):
if ud.bareclone:
tarballname = "%s_bare" % tarballname
+ if ud.shallow_revs:
+ tarballname = "%s_%s" % (tarballname, "_".join(sorted(ud.shallow_revs)))
+
for name, revision in sorted(ud.revisions.items()):
tarballname = "%s_%s" % (tarballname, ud.revisions[name][:7])
depth = ud.shallow_depths[name]
@@ -413,7 +425,11 @@ class Git(FetchMethod):
runfetchcmd("%s update-ref %s %s" % (ud.basecmd, ref, revision), d, workdir=dest)
# Map srcrev+depths to revisions
- shallow_revisions = runfetchcmd("%s rev-parse %s" % (ud.basecmd, " ".join(to_parse)), d, workdir=dest).splitlines()
+ parsed_depths = runfetchcmd("%s rev-parse %s" % (ud.basecmd, " ".join(to_parse)), d, workdir=dest)
+
+ # Resolve specified revisions
+ parsed_revs = runfetchcmd("%s rev-parse %s" % (ud.basecmd, " ".join('"%s^{}"' % r for r in ud.shallow_revs)), d, workdir=dest)
+ shallow_revisions = parsed_depths.splitlines() + parsed_revs.splitlines()
# Apply extra ref wildcards
all_refs = runfetchcmd('%s for-each-ref "--format=%%(refname)"' % ud.basecmd,
diff --git a/lib/bb/tests/fetch.py b/lib/bb/tests/fetch.py
index 73f7b3f7..343ae8fe 100644
--- a/lib/bb/tests/fetch.py
+++ b/lib/bb/tests/fetch.py
@@ -1259,6 +1259,33 @@ class GitShallowTest(FetcherTest):
self.add_empty_file('c')
self.add_empty_file('d')
self.git('checkout master', cwd=self.srcdir)
+ self.git('tag v0.0 a_branch', cwd=self.srcdir)
+ self.add_empty_file('e')
+ self.git('merge --no-ff --no-edit a_branch', cwd=self.srcdir)
+ self.add_empty_file('f')
+ self.assertRevCount(7, cwd=self.srcdir)
+
+ uri = self.d.getVar('SRC_URI', True).split()[0]
+ uri = '%s;branch=master,a_branch;name=master,a_branch' % uri
+
+ self.d.setVar('BB_GIT_SHALLOW_DEPTH', '0')
+ self.d.setVar('BB_GIT_SHALLOW_REVS', 'v0.0')
+ self.d.setVar('SRCREV_master', '${AUTOREV}')
+ self.d.setVar('SRCREV_a_branch', '${AUTOREV}')
+
+ self.fetch_shallow(uri)
+
+ self.assertRevCount(5)
+ self.assertRefs(['master', 'origin/master', 'origin/a_branch'])
+
+ def test_shallow_multi_one_uri_depths(self):
+ # Create initial git repo
+ self.add_empty_file('a')
+ self.add_empty_file('b')
+ self.git('checkout -b a_branch', cwd=self.srcdir)
+ self.add_empty_file('c')
+ self.add_empty_file('d')
+ self.git('checkout master', cwd=self.srcdir)
self.add_empty_file('e')
self.git('merge --no-ff --no-edit a_branch', cwd=self.srcdir)
self.add_empty_file('f')
@@ -1375,6 +1402,38 @@ class GitShallowTest(FetcherTest):
self.d.setVar('BB_GIT_SHALLOW_EXTRA_REFS', 'refs/tags/*')
self.fetch()
+ def test_shallow_remove_revs(self):
+ # Create initial git repo
+ self.add_empty_file('a')
+ self.add_empty_file('b')
+ self.git('checkout -b a_branch', cwd=self.srcdir)
+ self.add_empty_file('c')
+ self.add_empty_file('d')
+ self.git('checkout master', cwd=self.srcdir)
+ self.git('tag v0.0 a_branch', cwd=self.srcdir)
+ self.add_empty_file('e')
+ self.git('merge --no-ff --no-edit a_branch', cwd=self.srcdir)
+ self.git('branch -d a_branch', cwd=self.srcdir)
+ self.add_empty_file('f')
+ self.assertRevCount(7, cwd=self.srcdir)
+
+ self.d.setVar('BB_GIT_SHALLOW_DEPTH', '0')
+ self.d.setVar('BB_GIT_SHALLOW_REVS', 'v0.0')
+
+ self.fetch_shallow()
+
+ self.assertRevCount(5)
+
+ def test_shallow_invalid_revs(self):
+ self.add_empty_file('a')
+ self.add_empty_file('b')
+
+ self.d.setVar('BB_GIT_SHALLOW_DEPTH', '0')
+ self.d.setVar('BB_GIT_SHALLOW_REVS', 'v0.0')
+
+ with self.assertRaises(bb.fetch2.FetchError):
+ self.fetch()
+
if os.environ.get("BB_SKIP_NETTESTS") == "yes":
print("Unset BB_SKIP_NETTESTS to run network tests")
else:
@@ -1383,11 +1442,16 @@ class GitShallowTest(FetcherTest):
self.git('config core.bare true', cwd=self.srcdir)
self.git('fetch --tags', cwd=self.srcdir)
- self.d.setVar('BB_GIT_SHALLOW_DEPTH', '100')
+ self.d.setVar('BB_GIT_SHALLOW_DEPTH', '0')
+ # Note that the 1.10.0 tag is annotated, so this also tests
+ # reference of an annotated vs unannotated tag
+ self.d.setVar('BB_GIT_SHALLOW_REVS', '1.10.0')
self.fetch_shallow()
+ # Confirm that the history of 1.10.0 was removed
orig_revs = len(self.git('rev-list master', cwd=self.srcdir).splitlines())
revs = len(self.git('rev-list master').splitlines())
self.assertNotEqual(orig_revs, revs)
self.assertRefs(['master', 'origin/master'])
+ self.assertRevCount(orig_revs - 1758)
--
2.11.1
^ permalink raw reply related [flat|nested] 14+ messages in thread* Re: [PATCH 0/8] Add support for shallow mirror tarballs
2017-05-12 21:46 [PATCH 0/8] Add support for shallow mirror tarballs Christopher Larson
` (7 preceding siblings ...)
2017-05-12 21:46 ` [PATCH 8/8] fetch/git: add support for removing arbitrary revs for shallow Christopher Larson
@ 2017-05-25 15:42 ` Christopher Larson
2017-05-25 16:32 ` Richard Purdie
8 siblings, 1 reply; 14+ messages in thread
From: Christopher Larson @ 2017-05-25 15:42 UTC (permalink / raw)
To: bitbake-devel@lists.openembedded.org
[-- Attachment #1: Type: text/plain, Size: 567 bytes --]
On Fri, May 12, 2017 at 2:46 PM, Christopher Larson <kergoth@gmail.com>
wrote:
> The latest incarnation of the support for git shallow mirror tarballs.
> Updated
> with ud.mirrortarball removed, fixes for gitsm and gitannex, more granular
> commits to ease review, and unit tests added. Please let me know what you
> think. Thanks.
>
Any feedback on this, or are folks still fighting fires related to the
release?
--
Christopher Larson
kergoth at gmail dot com
Founder - BitBake, OpenEmbedded, OpenZaurus
Senior Software Engineer, Mentor Graphics
[-- Attachment #2: Type: text/html, Size: 1043 bytes --]
^ permalink raw reply [flat|nested] 14+ messages in thread* Re: [PATCH 0/8] Add support for shallow mirror tarballs
2017-05-25 15:42 ` [PATCH 0/8] Add support for shallow mirror tarballs Christopher Larson
@ 2017-05-25 16:32 ` Richard Purdie
2017-05-25 16:43 ` Christopher Larson
0 siblings, 1 reply; 14+ messages in thread
From: Richard Purdie @ 2017-05-25 16:32 UTC (permalink / raw)
To: Christopher Larson, bitbake-devel@lists.openembedded.org
On Thu, 2017-05-25 at 08:42 -0700, Christopher Larson wrote:
>
> On Fri, May 12, 2017 at 2:46 PM, Christopher Larson <kergoth@gmail.co
> m> wrote:
> > The latest incarnation of the support for git shallow mirror
> > tarballs. Updated
> > with ud.mirrortarball removed, fixes for gitsm and gitannex, more
> > granular
> > commits to ease review, and unit tests added. Please let me know
> > what you
> > think. Thanks.
> >
> Any feedback on this, or are folks still fighting fires related to
> the release?
I'm travelling, lets aim to discuss this with a view to merging next
week [remind me if I forget please].
Cheers,
Richard
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH 0/8] Add support for shallow mirror tarballs
2017-05-25 16:32 ` Richard Purdie
@ 2017-05-25 16:43 ` Christopher Larson
0 siblings, 0 replies; 14+ messages in thread
From: Christopher Larson @ 2017-05-25 16:43 UTC (permalink / raw)
To: Richard Purdie; +Cc: bitbake-devel@lists.openembedded.org
[-- Attachment #1: Type: text/plain, Size: 924 bytes --]
On Thu, May 25, 2017 at 9:32 AM, Richard Purdie <
richard.purdie@linuxfoundation.org> wrote:
> On Thu, 2017-05-25 at 08:42 -0700, Christopher Larson wrote:
> >
> > On Fri, May 12, 2017 at 2:46 PM, Christopher Larson <kergoth@gmail.co
> > m> wrote:
> > > The latest incarnation of the support for git shallow mirror
> > > tarballs. Updated
> > > with ud.mirrortarball removed, fixes for gitsm and gitannex, more
> > > granular
> > > commits to ease review, and unit tests added. Please let me know
> > > what you
> > > think. Thanks.
> > >
> > Any feedback on this, or are folks still fighting fires related to
> > the release?
>
> I'm travelling, lets aim to discuss this with a view to merging next
> week [remind me if I forget please].
Understood, thanks!
--
Christopher Larson
kergoth at gmail dot com
Founder - BitBake, OpenEmbedded, OpenZaurus
Senior Software Engineer, Mentor Graphics
[-- Attachment #2: Type: text/html, Size: 1520 bytes --]
^ permalink raw reply [flat|nested] 14+ messages in thread