All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] fetch: Use git rev-parse if network is not available
@ 2022-02-07 11:53 Pavel Zhukov
  2022-02-07 12:06 ` [bitbake-devel] " Quentin Schulz
  2022-02-07 12:43 ` [bitbake-devel] [PATCH] " Richard Purdie
  0 siblings, 2 replies; 6+ messages in thread
From: Pavel Zhukov @ 2022-02-07 11:53 UTC (permalink / raw)
  To: bitbake-devel; +Cc: pavel, Pavel Zhukov

Sometimes Fetcher is initialized while network is disabled (unpack() is
one of such cases) and Fetcher fails to map non SHA1 SRCREV to SHA1 one.
In such cases git ls-remote cannot be used to determinate latest
revision and git rev-parse can be used instead. This approach doesn't
work with rebaseable repos so exception will occur for such repos.

See [YOCTO #14707] for more details. Note: the issue doesn't occur each
time in unpack() because in normal flow (fetch->unpack in one build)
URI_HEADREVs are cached.

Signed-off-by: Pavel Zhukov <pavel.zhukov@huawei.com>
---
 lib/bb/fetch2/git.py  | 61 +++++++++++++++++++++++++++++++++----------
 lib/bb/tests/fetch.py | 43 ++++++++++++++++++++++++++++++
 2 files changed, 90 insertions(+), 14 deletions(-)

diff --git a/lib/bb/fetch2/git.py b/lib/bb/fetch2/git.py
index 30da8e95..5f56d9fe 100644
--- a/lib/bb/fetch2/git.py
+++ b/lib/bb/fetch2/git.py
@@ -122,6 +122,10 @@ class Git(FetchMethod):
     def init(self, d):
         pass
 
+    @staticmethod
+    def _is_sha1(name):
+        return len(name) == 40  and (True in [c in "abcdef0123456789" for c in name])
+
     def supports(self, ud, d):
         """
         Check to see if a given url can be fetched with git.
@@ -135,6 +139,8 @@ class Git(FetchMethod):
         """
         init git specific variable within url data
         so that the git method like latest_revision() can work
+        NOTE: This function may be called from do_unpack and in that case network is not available
+        avoid using network operations here or guard them properly.
         """
         if 'protocol' in ud.parm:
             ud.proto = ud.parm['protocol']
@@ -248,28 +254,33 @@ class Git(FetchMethod):
 
         ud.setup_revisions(d)
 
-        for name in ud.names:
-            # Ensure anything that doesn't look like a sha256 checksum/revision is translated into one
-            if not ud.revisions[name] or len(ud.revisions[name]) != 40  or (False in [c in "abcdef0123456789" for c in ud.revisions[name]]):
-                if ud.revisions[name]:
-                    ud.unresolvedrev[name] = ud.revisions[name]
-                ud.revisions[name] = self.latest_revision(ud, d, name)
-
         gitsrcname = '%s%s' % (ud.host.replace(':', '.'), ud.path.replace('/', '.').replace('*', '.').replace(' ','_'))
         if gitsrcname.startswith('.'):
             gitsrcname = gitsrcname[1:]
 
-        # for rebaseable git repo, it is necessary to keep mirror tar ball
-        # per revision, so that even the revision disappears from the
-        # upstream repo in the future, the mirror will remain intact and still
-        # contains the revision
-        if ud.rebaseable:
-            for name in ud.names:
-                gitsrcname = gitsrcname + '_' + ud.revisions[name]
 
         dl_dir = d.getVar("DL_DIR")
         gitdir = d.getVar("GITDIR") or (dl_dir + "/git2")
+
+
+        ## We need clonedir to be defined before latest_revision call for unpack routine to use rev-parse instead of ls-remote
         ud.clonedir = os.path.join(gitdir, gitsrcname)
+
+        for name in ud.names:
+            # Ensure anything that doesn't look like a sha1 checksum/revision is translated into one
+            if not ud.revisions[name] or not self._is_sha1(ud.revisions[name]):
+                if ud.revisions[name]:
+                    ud.unresolvedrev[name] = ud.revisions[name]
+                ud.revisions[name] = self.latest_revision(ud, d, name)
+            if ud.rebaseable:
+                # for rebaseable git repo, it is necessary to keep mirror tar ball
+                # per revision, so that even the revision disappears from the
+                # upstream repo in the future, the mirror will remain intact and still
+                # contains the revision
+                gitsrcname = gitsrcname + '_' + ud.revisions[name]
+        if ud.rebaseable:
+            ud.clonedir = os.path.join(gitdir, gitsrcname)
+
         ud.localfile = ud.clonedir
 
         mirrortarball = 'git2_%s.tar.gz' % gitsrcname
@@ -697,6 +708,14 @@ class Git(FetchMethod):
         slash_re = re.compile(r"/+")
         return "git:" + ud.host + slash_re.sub(".", ud.path) + ud.unresolvedrev[name]
 
+    def _revparse(self, ud, d, rev):
+        """
+        Return latest revision (SHA) for the refs specified by branch/tag/etc
+        """
+        cmd = '{basecmd} rev-parse -q --verify {rev}'.format(basecmd = ud.basecmd, rev = rev)
+        output = runfetchcmd(cmd, d, quiet=True, workdir = ud.clonedir)
+        return output.strip()
+
     def _lsremote(self, ud, d, search):
         """
         Run git ls-remote with the specified search string
@@ -727,6 +746,20 @@ class Git(FetchMethod):
         """
         Compute the HEAD revision for the url
         """
+
+        ## Network is not availble and we missed the cache. Try rev-parse
+        if d.getVar("BB_NO_NETWORK"):
+            if ud.rebaseable:
+                raise bb.fetch.UnpackError("Unable to map revision \"{name}\" to SHA1 revision without a network. "  \
+                                           "SHA1 revision must be specified for rebaseable repos "\
+                                           "or \"BB_SRCREV_POLICY\" set to \"cache\". ".format(name=ud.unresolvedrev[name]),ud.host+ud.path)
+            rev = self._revparse(ud, d, ud.unresolvedrev[name])
+            if rev is None:
+                raise bb.fetch2.FetchError("Unable to resolve '%s' in local clone of repository in git rev-parse output for %s" % \
+                                           (ud.unresolvedrev[name], ud.host+ud.path))
+            return rev
+
+
         output = self._lsremote(ud, d, "")
         # Tags of the form ^{} may not work, need to fallback to other form
         if ud.unresolvedrev[name][:5] == "refs/" or ud.usehead:
diff --git a/lib/bb/tests/fetch.py b/lib/bb/tests/fetch.py
index ec7d83c9..1fcf3466 100644
--- a/lib/bb/tests/fetch.py
+++ b/lib/bb/tests/fetch.py
@@ -2776,3 +2776,46 @@ class GitSharedTest(FetcherTest):
         fetcher.unpack(self.unpackdir)
         alt = os.path.join(self.unpackdir, 'git/.git/objects/info/alternates')
         self.assertFalse(os.path.exists(alt))
+
+class GitUnpackOfflineTest(FetcherTest):
+
+    def setUp(self):
+        super(GitUnpackOfflineTest, self).setUp()
+        self.tempdir = self.tempdir
+        self.persistdir = os.path.join(self.tempdir, "persistdata")
+
+    def setData(self):
+        self.recipe_url = "git://github.com/lz4/lz4.git;branch=release;protocol=https;"
+        self.d.setVar('SRCREV', 'v1.9.0')
+        self.d.setVar("PERSISTENT_DIR", self.persistdir)
+        self.d.setVar("DL_DIR", self.dldir)
+
+    @skipIfNoNetwork()
+    def test_unpack_offline(self):
+        self.setData()
+        fetcher = bb.fetch.Fetch([self.recipe_url], self.d)
+        bb.fetch.fetcher_init(self.d)
+        fetcher.download()
+        fetcher.unpack(self.unpackdir)
+
+        ## emulate do_unpack (disable network)
+        self.d.setVar("BB_NO_NETWORK", "1")
+        unpacker = bb.fetch.Fetch([self.recipe_url], self.d)
+        bb.fetch.fetcher_init(self.d)
+        unpacker.unpack(self.unpackdir)
+
+    @skipIfNoNetwork()
+    def test_unpack_rebaseable(self):
+        self.setData()
+        self.recipe_url += "rebaseable=1;"
+
+        fetcher = bb.fetch.Fetch([self.recipe_url], self.d)
+        bb.fetch.fetcher_init(self.d)
+        fetcher.download()
+
+        ## emulate do_unpack (disable network)
+        self.d.setVar("BB_NO_NETWORK", "1")
+        with self.assertRaises(bb.fetch2.UnpackError):
+            unpacker = bb.fetch.Fetch([self.recipe_url], self.d)
+            bb.fetch.fetcher_init(self.d)
+            fetcher.unpack()
-- 
2.34.1



^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [bitbake-devel] [PATCH] fetch: Use git rev-parse if network is not available
  2022-02-07 11:53 [PATCH] fetch: Use git rev-parse if network is not available Pavel Zhukov
@ 2022-02-07 12:06 ` Quentin Schulz
  2022-02-07 12:23   ` [PATCH v2] " Pavel Zhukov
  2022-02-07 12:43 ` [bitbake-devel] [PATCH] " Richard Purdie
  1 sibling, 1 reply; 6+ messages in thread
From: Quentin Schulz @ 2022-02-07 12:06 UTC (permalink / raw)
  To: Pavel Zhukov, bitbake-devel; +Cc: Pavel Zhukov

Hi Pavel,

On 2/7/22 12:53, Pavel Zhukov wrote:
> Sometimes Fetcher is initialized while network is disabled (unpack() is
> one of such cases) and Fetcher fails to map non SHA1 SRCREV to SHA1 one.
> In such cases git ls-remote cannot be used to determinate latest
> revision and git rev-parse can be used instead. This approach doesn't
> work with rebaseable repos so exception will occur for such repos.
> 
> See [YOCTO #14707] for more details. Note: the issue doesn't occur each
> time in unpack() because in normal flow (fetch->unpack in one build)
> URI_HEADREVs are cached.
> 
> Signed-off-by: Pavel Zhukov <pavel.zhukov@huawei.com>
> ---
>   lib/bb/fetch2/git.py  | 61 +++++++++++++++++++++++++++++++++----------
>   lib/bb/tests/fetch.py | 43 ++++++++++++++++++++++++++++++
>   2 files changed, 90 insertions(+), 14 deletions(-)
> 
> diff --git a/lib/bb/fetch2/git.py b/lib/bb/fetch2/git.py
> index 30da8e95..5f56d9fe 100644
> --- a/lib/bb/fetch2/git.py
> +++ b/lib/bb/fetch2/git.py
> @@ -122,6 +122,10 @@ class Git(FetchMethod):
>       def init(self, d):
>           pass
>   
> +    @staticmethod
> +    def _is_sha1(name):
> +        return len(name) == 40  and (True in [c in "abcdef0123456789" for c in name])
> +

return re.fullmatch(r'[a-f0-9]{40}', name) is not None

should actually do the trick. Might even want to include uppercase 
letters with A-F.

Maybe we want this to be a compiled re pattern too since it's probably 
going to be used "a bit" :)

(also pretty sure it should have been "and not(False in..)" because here 
if only ONE character is hex but not the rest, it still returns True 
AFAICT?)

Cheers,
Quentin

>       def supports(self, ud, d):
>           """
>           Check to see if a given url can be fetched with git.
> @@ -135,6 +139,8 @@ class Git(FetchMethod):
>           """
>           init git specific variable within url data
>           so that the git method like latest_revision() can work
> +        NOTE: This function may be called from do_unpack and in that case network is not available
> +        avoid using network operations here or guard them properly.
>           """
>           if 'protocol' in ud.parm:
>               ud.proto = ud.parm['protocol']
> @@ -248,28 +254,33 @@ class Git(FetchMethod):
>   
>           ud.setup_revisions(d)
>   
> -        for name in ud.names:
> -            # Ensure anything that doesn't look like a sha256 checksum/revision is translated into one
> -            if not ud.revisions[name] or len(ud.revisions[name]) != 40  or (False in [c in "abcdef0123456789" for c in ud.revisions[name]]):
> -                if ud.revisions[name]:
> -                    ud.unresolvedrev[name] = ud.revisions[name]
> -                ud.revisions[name] = self.latest_revision(ud, d, name)
> -
>           gitsrcname = '%s%s' % (ud.host.replace(':', '.'), ud.path.replace('/', '.').replace('*', '.').replace(' ','_'))
>           if gitsrcname.startswith('.'):
>               gitsrcname = gitsrcname[1:]
>   
> -        # for rebaseable git repo, it is necessary to keep mirror tar ball
> -        # per revision, so that even the revision disappears from the
> -        # upstream repo in the future, the mirror will remain intact and still
> -        # contains the revision
> -        if ud.rebaseable:
> -            for name in ud.names:
> -                gitsrcname = gitsrcname + '_' + ud.revisions[name]
>   
>           dl_dir = d.getVar("DL_DIR")
>           gitdir = d.getVar("GITDIR") or (dl_dir + "/git2")
> +
> +
> +        ## We need clonedir to be defined before latest_revision call for unpack routine to use rev-parse instead of ls-remote
>           ud.clonedir = os.path.join(gitdir, gitsrcname)
> +
> +        for name in ud.names:
> +            # Ensure anything that doesn't look like a sha1 checksum/revision is translated into one
> +            if not ud.revisions[name] or not self._is_sha1(ud.revisions[name]):
> +                if ud.revisions[name]:
> +                    ud.unresolvedrev[name] = ud.revisions[name]
> +                ud.revisions[name] = self.latest_revision(ud, d, name)
> +            if ud.rebaseable:
> +                # for rebaseable git repo, it is necessary to keep mirror tar ball
> +                # per revision, so that even the revision disappears from the
> +                # upstream repo in the future, the mirror will remain intact and still
> +                # contains the revision
> +                gitsrcname = gitsrcname + '_' + ud.revisions[name]
> +        if ud.rebaseable:
> +            ud.clonedir = os.path.join(gitdir, gitsrcname)
> +
>           ud.localfile = ud.clonedir
>   
>           mirrortarball = 'git2_%s.tar.gz' % gitsrcname
> @@ -697,6 +708,14 @@ class Git(FetchMethod):
>           slash_re = re.compile(r"/+")
>           return "git:" + ud.host + slash_re.sub(".", ud.path) + ud.unresolvedrev[name]
>   
> +    def _revparse(self, ud, d, rev):
> +        """
> +        Return latest revision (SHA) for the refs specified by branch/tag/etc
> +        """
> +        cmd = '{basecmd} rev-parse -q --verify {rev}'.format(basecmd = ud.basecmd, rev = rev)
> +        output = runfetchcmd(cmd, d, quiet=True, workdir = ud.clonedir)
> +        return output.strip()
> +
>       def _lsremote(self, ud, d, search):
>           """
>           Run git ls-remote with the specified search string
> @@ -727,6 +746,20 @@ class Git(FetchMethod):
>           """
>           Compute the HEAD revision for the url
>           """
> +
> +        ## Network is not availble and we missed the cache. Try rev-parse
> +        if d.getVar("BB_NO_NETWORK"):
> +            if ud.rebaseable:
> +                raise bb.fetch.UnpackError("Unable to map revision \"{name}\" to SHA1 revision without a network. "  \
> +                                           "SHA1 revision must be specified for rebaseable repos "\
> +                                           "or \"BB_SRCREV_POLICY\" set to \"cache\". ".format(name=ud.unresolvedrev[name]),ud.host+ud.path)
> +            rev = self._revparse(ud, d, ud.unresolvedrev[name])
> +            if rev is None:
> +                raise bb.fetch2.FetchError("Unable to resolve '%s' in local clone of repository in git rev-parse output for %s" % \
> +                                           (ud.unresolvedrev[name], ud.host+ud.path))
> +            return rev
> +
> +
>           output = self._lsremote(ud, d, "")
>           # Tags of the form ^{} may not work, need to fallback to other form
>           if ud.unresolvedrev[name][:5] == "refs/" or ud.usehead:
> diff --git a/lib/bb/tests/fetch.py b/lib/bb/tests/fetch.py
> index ec7d83c9..1fcf3466 100644
> --- a/lib/bb/tests/fetch.py
> +++ b/lib/bb/tests/fetch.py
> @@ -2776,3 +2776,46 @@ class GitSharedTest(FetcherTest):
>           fetcher.unpack(self.unpackdir)
>           alt = os.path.join(self.unpackdir, 'git/.git/objects/info/alternates')
>           self.assertFalse(os.path.exists(alt))
> +
> +class GitUnpackOfflineTest(FetcherTest):
> +
> +    def setUp(self):
> +        super(GitUnpackOfflineTest, self).setUp()
> +        self.tempdir = self.tempdir
> +        self.persistdir = os.path.join(self.tempdir, "persistdata")
> +
> +    def setData(self):
> +        self.recipe_url = "git://github.com/lz4/lz4.git;branch=release;protocol=https;"
> +        self.d.setVar('SRCREV', 'v1.9.0')
> +        self.d.setVar("PERSISTENT_DIR", self.persistdir)
> +        self.d.setVar("DL_DIR", self.dldir)
> +
> +    @skipIfNoNetwork()
> +    def test_unpack_offline(self):
> +        self.setData()
> +        fetcher = bb.fetch.Fetch([self.recipe_url], self.d)
> +        bb.fetch.fetcher_init(self.d)
> +        fetcher.download()
> +        fetcher.unpack(self.unpackdir)
> +
> +        ## emulate do_unpack (disable network)
> +        self.d.setVar("BB_NO_NETWORK", "1")
> +        unpacker = bb.fetch.Fetch([self.recipe_url], self.d)
> +        bb.fetch.fetcher_init(self.d)
> +        unpacker.unpack(self.unpackdir)
> +
> +    @skipIfNoNetwork()
> +    def test_unpack_rebaseable(self):
> +        self.setData()
> +        self.recipe_url += "rebaseable=1;"
> +
> +        fetcher = bb.fetch.Fetch([self.recipe_url], self.d)
> +        bb.fetch.fetcher_init(self.d)
> +        fetcher.download()
> +
> +        ## emulate do_unpack (disable network)
> +        self.d.setVar("BB_NO_NETWORK", "1")
> +        with self.assertRaises(bb.fetch2.UnpackError):
> +            unpacker = bb.fetch.Fetch([self.recipe_url], self.d)
> +            bb.fetch.fetcher_init(self.d)
> +            fetcher.unpack()
> 
> 
> 
> -=-=-=-=-=-=-=-=-=-=-=-
> Links: You receive all messages sent to this group.
> View/Reply Online (#13317): https://urldefense.proofpoint.com/v2/url?u=https-3A__lists.openembedded.org_g_bitbake-2Ddevel_message_13317&d=DwIFaQ&c=_sEr5x9kUWhuk4_nFwjJtA&r=LYjLexDn7rXIzVmkNPvw5ymA1XTSqHGq8yBP6m6qZZ4njZguQhZhkI_-172IIy1t&m=TAHnwIUCuZPQ7vBGmkvVBkcWKm3prVlA6UFcViaqYEScF_lgqMYpoqeboKd-qJeH&s=kNFDHQIcWkeZ-Ld6-oALgJMeOKvcmoWJh12dcki4Tg4&e=
> Mute This Topic: https://urldefense.proofpoint.com/v2/url?u=https-3A__lists.openembedded.org_mt_88969671_6293953&d=DwIFaQ&c=_sEr5x9kUWhuk4_nFwjJtA&r=LYjLexDn7rXIzVmkNPvw5ymA1XTSqHGq8yBP6m6qZZ4njZguQhZhkI_-172IIy1t&m=TAHnwIUCuZPQ7vBGmkvVBkcWKm3prVlA6UFcViaqYEScF_lgqMYpoqeboKd-qJeH&s=G2M6sBkCY4DYUNyfIHrwHZ6ZewTA8ayM1rPtPsE2w-Q&e=
> Group Owner: bitbake-devel+owner@lists.openembedded.org
> Unsubscribe: https://urldefense.proofpoint.com/v2/url?u=https-3A__lists.openembedded.org_g_bitbake-2Ddevel_unsub&d=DwIFaQ&c=_sEr5x9kUWhuk4_nFwjJtA&r=LYjLexDn7rXIzVmkNPvw5ymA1XTSqHGq8yBP6m6qZZ4njZguQhZhkI_-172IIy1t&m=TAHnwIUCuZPQ7vBGmkvVBkcWKm3prVlA6UFcViaqYEScF_lgqMYpoqeboKd-qJeH&s=disYGNo9s0vbsx-5RloToeQhYcZwV84Oy_dSYrIttAM&e=  [quentin.schulz@theobroma-systems.com]
> -=-=-=-=-=-=-=-=-=-=-=-
> 


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH v2] fetch: Use git rev-parse if network is not available
  2022-02-07 12:06 ` [bitbake-devel] " Quentin Schulz
@ 2022-02-07 12:23   ` Pavel Zhukov
  0 siblings, 0 replies; 6+ messages in thread
From: Pavel Zhukov @ 2022-02-07 12:23 UTC (permalink / raw)
  To: bitbake-devel; +Cc: pavel, Pavel Zhukov

Sometimes Fetcher is initialized while network is disabled (unpack() is
one of such cases) and Fetcher fails to map non SHA1 SRCREV to SHA1 one.
In such cases git ls-remote cannot be used to determinate latest
revision and git rev-parse can be used instead. This approach doesn't
work with rebaseable repos so exception will occur for such repos.

See [YOCTO #14707] for more details. Note: the issue doesn't occur each
time in unpack() because in normal flow (fetch->unpack in one build)
URI_HEADREVs are cached.

Signed-off-by: Pavel Zhukov <pavel.zhukov@huawei.com>
---
 lib/bb/fetch2/git.py  | 62 +++++++++++++++++++++++++++++++++----------
 lib/bb/tests/fetch.py | 43 ++++++++++++++++++++++++++++++
 2 files changed, 91 insertions(+), 14 deletions(-)

diff --git a/lib/bb/fetch2/git.py b/lib/bb/fetch2/git.py
index 30da8e95..490dcaf4 100644
--- a/lib/bb/fetch2/git.py
+++ b/lib/bb/fetch2/git.py
@@ -118,10 +118,15 @@ class Git(FetchMethod):
     bitbake_dir = os.path.abspath(os.path.join(os.path.dirname(os.path.join(os.path.abspath(__file__))), '..', '..', '..'))
     make_shallow_path = os.path.join(bitbake_dir, 'bin', 'git-make-shallow')
 
+    sha1_pattern = re.compile(r'\b[0-9a-fA-F]{40}\b')
+
     """Class to fetch a module or modules from git repositories"""
     def init(self, d):
         pass
 
+    def _is_sha1(self, name):
+        return len(name) == 40 and re.match(self.sha1_pattern, name)
+
     def supports(self, ud, d):
         """
         Check to see if a given url can be fetched with git.
@@ -135,6 +140,8 @@ class Git(FetchMethod):
         """
         init git specific variable within url data
         so that the git method like latest_revision() can work
+        NOTE: This function may be called from do_unpack and in that case network is not available
+        avoid using network operations here or guard them properly.
         """
         if 'protocol' in ud.parm:
             ud.proto = ud.parm['protocol']
@@ -248,28 +255,33 @@ class Git(FetchMethod):
 
         ud.setup_revisions(d)
 
-        for name in ud.names:
-            # Ensure anything that doesn't look like a sha256 checksum/revision is translated into one
-            if not ud.revisions[name] or len(ud.revisions[name]) != 40  or (False in [c in "abcdef0123456789" for c in ud.revisions[name]]):
-                if ud.revisions[name]:
-                    ud.unresolvedrev[name] = ud.revisions[name]
-                ud.revisions[name] = self.latest_revision(ud, d, name)
-
         gitsrcname = '%s%s' % (ud.host.replace(':', '.'), ud.path.replace('/', '.').replace('*', '.').replace(' ','_'))
         if gitsrcname.startswith('.'):
             gitsrcname = gitsrcname[1:]
 
-        # for rebaseable git repo, it is necessary to keep mirror tar ball
-        # per revision, so that even the revision disappears from the
-        # upstream repo in the future, the mirror will remain intact and still
-        # contains the revision
-        if ud.rebaseable:
-            for name in ud.names:
-                gitsrcname = gitsrcname + '_' + ud.revisions[name]
 
         dl_dir = d.getVar("DL_DIR")
         gitdir = d.getVar("GITDIR") or (dl_dir + "/git2")
+
+
+        ## We need clonedir to be defined before latest_revision call for unpack routine to use rev-parse instead of ls-remote
         ud.clonedir = os.path.join(gitdir, gitsrcname)
+
+        for name in ud.names:
+            # Ensure anything that doesn't look like a sha1 checksum/revision is translated into one
+            if not ud.revisions[name] or not self._is_sha1(ud.revisions[name]):
+                if ud.revisions[name]:
+                    ud.unresolvedrev[name] = ud.revisions[name]
+                ud.revisions[name] = self.latest_revision(ud, d, name)
+            if ud.rebaseable:
+                # for rebaseable git repo, it is necessary to keep mirror tar ball
+                # per revision, so that even the revision disappears from the
+                # upstream repo in the future, the mirror will remain intact and still
+                # contains the revision
+                gitsrcname = gitsrcname + '_' + ud.revisions[name]
+        if ud.rebaseable:
+            ud.clonedir = os.path.join(gitdir, gitsrcname)
+
         ud.localfile = ud.clonedir
 
         mirrortarball = 'git2_%s.tar.gz' % gitsrcname
@@ -697,6 +709,14 @@ class Git(FetchMethod):
         slash_re = re.compile(r"/+")
         return "git:" + ud.host + slash_re.sub(".", ud.path) + ud.unresolvedrev[name]
 
+    def _revparse(self, ud, d, rev):
+        """
+        Return latest revision (SHA) for the refs specified by branch/tag/etc
+        """
+        cmd = '{basecmd} rev-parse -q --verify {rev}'.format(basecmd = ud.basecmd, rev = rev)
+        output = runfetchcmd(cmd, d, quiet=True, workdir = ud.clonedir)
+        return output.strip()
+
     def _lsremote(self, ud, d, search):
         """
         Run git ls-remote with the specified search string
@@ -727,6 +747,20 @@ class Git(FetchMethod):
         """
         Compute the HEAD revision for the url
         """
+
+        ## Network is not availble and we missed the cache. Try rev-parse
+        if d.getVar("BB_NO_NETWORK"):
+            if ud.rebaseable:
+                raise bb.fetch.UnpackError("Unable to map revision \"{name}\" to SHA1 revision without a network. "  \
+                                           "SHA1 revision must be specified for rebaseable repos "\
+                                           "or \"BB_SRCREV_POLICY\" set to \"cache\". ".format(name=ud.unresolvedrev[name]),ud.host+ud.path)
+            rev = self._revparse(ud, d, ud.unresolvedrev[name])
+            if rev is None:
+                raise bb.fetch2.FetchError("Unable to resolve '%s' in local clone of repository in git rev-parse output for %s" % \
+                                           (ud.unresolvedrev[name], ud.host+ud.path))
+            return rev
+
+
         output = self._lsremote(ud, d, "")
         # Tags of the form ^{} may not work, need to fallback to other form
         if ud.unresolvedrev[name][:5] == "refs/" or ud.usehead:
diff --git a/lib/bb/tests/fetch.py b/lib/bb/tests/fetch.py
index ec7d83c9..1fcf3466 100644
--- a/lib/bb/tests/fetch.py
+++ b/lib/bb/tests/fetch.py
@@ -2776,3 +2776,46 @@ class GitSharedTest(FetcherTest):
         fetcher.unpack(self.unpackdir)
         alt = os.path.join(self.unpackdir, 'git/.git/objects/info/alternates')
         self.assertFalse(os.path.exists(alt))
+
+class GitUnpackOfflineTest(FetcherTest):
+
+    def setUp(self):
+        super(GitUnpackOfflineTest, self).setUp()
+        self.tempdir = self.tempdir
+        self.persistdir = os.path.join(self.tempdir, "persistdata")
+
+    def setData(self):
+        self.recipe_url = "git://github.com/lz4/lz4.git;branch=release;protocol=https;"
+        self.d.setVar('SRCREV', 'v1.9.0')
+        self.d.setVar("PERSISTENT_DIR", self.persistdir)
+        self.d.setVar("DL_DIR", self.dldir)
+
+    @skipIfNoNetwork()
+    def test_unpack_offline(self):
+        self.setData()
+        fetcher = bb.fetch.Fetch([self.recipe_url], self.d)
+        bb.fetch.fetcher_init(self.d)
+        fetcher.download()
+        fetcher.unpack(self.unpackdir)
+
+        ## emulate do_unpack (disable network)
+        self.d.setVar("BB_NO_NETWORK", "1")
+        unpacker = bb.fetch.Fetch([self.recipe_url], self.d)
+        bb.fetch.fetcher_init(self.d)
+        unpacker.unpack(self.unpackdir)
+
+    @skipIfNoNetwork()
+    def test_unpack_rebaseable(self):
+        self.setData()
+        self.recipe_url += "rebaseable=1;"
+
+        fetcher = bb.fetch.Fetch([self.recipe_url], self.d)
+        bb.fetch.fetcher_init(self.d)
+        fetcher.download()
+
+        ## emulate do_unpack (disable network)
+        self.d.setVar("BB_NO_NETWORK", "1")
+        with self.assertRaises(bb.fetch2.UnpackError):
+            unpacker = bb.fetch.Fetch([self.recipe_url], self.d)
+            bb.fetch.fetcher_init(self.d)
+            fetcher.unpack()
-- 
2.34.1



^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [bitbake-devel] [PATCH] fetch: Use git rev-parse if network is not available
  2022-02-07 11:53 [PATCH] fetch: Use git rev-parse if network is not available Pavel Zhukov
  2022-02-07 12:06 ` [bitbake-devel] " Quentin Schulz
@ 2022-02-07 12:43 ` Richard Purdie
  2022-02-07 12:50   ` [PATCH v3] " Pavel Zhukov
  2022-02-07 12:57   ` [bitbake-devel] [PATCH] " Pavel Zhukov
  1 sibling, 2 replies; 6+ messages in thread
From: Richard Purdie @ 2022-02-07 12:43 UTC (permalink / raw)
  To: Pavel Zhukov, bitbake-devel; +Cc: Pavel Zhukov

On Mon, 2022-02-07 at 12:53 +0100, Pavel Zhukov wrote:
> Sometimes Fetcher is initialized while network is disabled (unpack() is
> one of such cases) and Fetcher fails to map non SHA1 SRCREV to SHA1 one.
> In such cases git ls-remote cannot be used to determinate latest
> revision and git rev-parse can be used instead. This approach doesn't
> work with rebaseable repos so exception will occur for such repos.
> 
> See [YOCTO #14707] for more details. Note: the issue doesn't occur each
> time in unpack() because in normal flow (fetch->unpack in one build)
> URI_HEADREVs are cached.
> 
> Signed-off-by: Pavel Zhukov <pavel.zhukov@huawei.com>
> ---

Thanks for looking at this. I like the idea of some tests to demonstrate the
problem so we should start there.


> +class GitUnpackOfflineTest(FetcherTest):
> +
> +    def setUp(self):
> +        super(GitUnpackOfflineTest, self).setUp()
> +        self.tempdir = self.tempdir
> +        self.persistdir = os.path.join(self.tempdir, "persistdata")
> +
> +    def setData(self):
> +        self.recipe_url = "git://github.com/lz4/lz4.git;branch=release;protocol=https;"
> +        self.d.setVar('SRCREV', 'v1.9.0')
> +        self.d.setVar("PERSISTENT_DIR", self.persistdir)
> +        self.d.setVar("DL_DIR", self.dldir)
> +
> +    @skipIfNoNetwork()
> +    def test_unpack_offline(self):
> +        self.setData()
> +        fetcher = bb.fetch.Fetch([self.recipe_url], self.d)
> +        bb.fetch.fetcher_init(self.d)
> +        fetcher.download()
> +        fetcher.unpack(self.unpackdir)
> +
> +        ## emulate do_unpack (disable network)
> +        self.d.setVar("BB_NO_NETWORK", "1")
> +        unpacker = bb.fetch.Fetch([self.recipe_url], self.d)
> +        bb.fetch.fetcher_init(self.d)
> +        unpacker.unpack(self.unpackdir)
> +

I don't think this is a valid test. I think it should only call
bb.fetch.fetcher_init() once and at the start of the test before the Fetch()
call.

If we change it to do that, the test passes.

Going back to your original bug report, the issue could be that
bb.fetch.fetcher_init() is being called in the do_unpack task and "destroying"
the source revs in the worker context.

I suspect but haven't looked into that yet. What we may need to do is transfer
the bb.fetch2.saved_headrevs into the worker context?

Cheers,

Richard






^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH v3] fetch: Use git rev-parse if network is not available
  2022-02-07 12:43 ` [bitbake-devel] [PATCH] " Richard Purdie
@ 2022-02-07 12:50   ` Pavel Zhukov
  2022-02-07 12:57   ` [bitbake-devel] [PATCH] " Pavel Zhukov
  1 sibling, 0 replies; 6+ messages in thread
From: Pavel Zhukov @ 2022-02-07 12:50 UTC (permalink / raw)
  To: bitbake-devel; +Cc: pavel, Pavel Zhukov

Sometimes Fetcher is initialized while network is disabled (unpack() is
one of such cases) and Fetcher fails to map non SHA1 SRCREV to SHA1 one.
In such cases git ls-remote cannot be used to determinate latest
revision and git rev-parse can be used instead. This approach doesn't
work with rebaseable repos so exception will occur for such repos.

See [YOCTO #14707] for more details. Note: the issue doesn't occur each
time in unpack() because in normal flow (fetch->unpack in one build)
URI_HEADREVs are cached.

Signed-off-by: Pavel Zhukov <pavel.zhukov@huawei.com>
---
 lib/bb/fetch2/git.py  | 62 +++++++++++++++++++++++++++++++++----------
 lib/bb/tests/fetch.py | 44 ++++++++++++++++++++++++++++++
 2 files changed, 92 insertions(+), 14 deletions(-)

diff --git a/lib/bb/fetch2/git.py b/lib/bb/fetch2/git.py
index 30da8e95..490dcaf4 100644
--- a/lib/bb/fetch2/git.py
+++ b/lib/bb/fetch2/git.py
@@ -118,10 +118,15 @@ class Git(FetchMethod):
     bitbake_dir = os.path.abspath(os.path.join(os.path.dirname(os.path.join(os.path.abspath(__file__))), '..', '..', '..'))
     make_shallow_path = os.path.join(bitbake_dir, 'bin', 'git-make-shallow')
 
+    sha1_pattern = re.compile(r'\b[0-9a-fA-F]{40}\b')
+
     """Class to fetch a module or modules from git repositories"""
     def init(self, d):
         pass
 
+    def _is_sha1(self, name):
+        return len(name) == 40 and re.match(self.sha1_pattern, name)
+
     def supports(self, ud, d):
         """
         Check to see if a given url can be fetched with git.
@@ -135,6 +140,8 @@ class Git(FetchMethod):
         """
         init git specific variable within url data
         so that the git method like latest_revision() can work
+        NOTE: This function may be called from do_unpack and in that case network is not available
+        avoid using network operations here or guard them properly.
         """
         if 'protocol' in ud.parm:
             ud.proto = ud.parm['protocol']
@@ -248,28 +255,33 @@ class Git(FetchMethod):
 
         ud.setup_revisions(d)
 
-        for name in ud.names:
-            # Ensure anything that doesn't look like a sha256 checksum/revision is translated into one
-            if not ud.revisions[name] or len(ud.revisions[name]) != 40  or (False in [c in "abcdef0123456789" for c in ud.revisions[name]]):
-                if ud.revisions[name]:
-                    ud.unresolvedrev[name] = ud.revisions[name]
-                ud.revisions[name] = self.latest_revision(ud, d, name)
-
         gitsrcname = '%s%s' % (ud.host.replace(':', '.'), ud.path.replace('/', '.').replace('*', '.').replace(' ','_'))
         if gitsrcname.startswith('.'):
             gitsrcname = gitsrcname[1:]
 
-        # for rebaseable git repo, it is necessary to keep mirror tar ball
-        # per revision, so that even the revision disappears from the
-        # upstream repo in the future, the mirror will remain intact and still
-        # contains the revision
-        if ud.rebaseable:
-            for name in ud.names:
-                gitsrcname = gitsrcname + '_' + ud.revisions[name]
 
         dl_dir = d.getVar("DL_DIR")
         gitdir = d.getVar("GITDIR") or (dl_dir + "/git2")
+
+
+        ## We need clonedir to be defined before latest_revision call for unpack routine to use rev-parse instead of ls-remote
         ud.clonedir = os.path.join(gitdir, gitsrcname)
+
+        for name in ud.names:
+            # Ensure anything that doesn't look like a sha1 checksum/revision is translated into one
+            if not ud.revisions[name] or not self._is_sha1(ud.revisions[name]):
+                if ud.revisions[name]:
+                    ud.unresolvedrev[name] = ud.revisions[name]
+                ud.revisions[name] = self.latest_revision(ud, d, name)
+            if ud.rebaseable:
+                # for rebaseable git repo, it is necessary to keep mirror tar ball
+                # per revision, so that even the revision disappears from the
+                # upstream repo in the future, the mirror will remain intact and still
+                # contains the revision
+                gitsrcname = gitsrcname + '_' + ud.revisions[name]
+        if ud.rebaseable:
+            ud.clonedir = os.path.join(gitdir, gitsrcname)
+
         ud.localfile = ud.clonedir
 
         mirrortarball = 'git2_%s.tar.gz' % gitsrcname
@@ -697,6 +709,14 @@ class Git(FetchMethod):
         slash_re = re.compile(r"/+")
         return "git:" + ud.host + slash_re.sub(".", ud.path) + ud.unresolvedrev[name]
 
+    def _revparse(self, ud, d, rev):
+        """
+        Return latest revision (SHA) for the refs specified by branch/tag/etc
+        """
+        cmd = '{basecmd} rev-parse -q --verify {rev}'.format(basecmd = ud.basecmd, rev = rev)
+        output = runfetchcmd(cmd, d, quiet=True, workdir = ud.clonedir)
+        return output.strip()
+
     def _lsremote(self, ud, d, search):
         """
         Run git ls-remote with the specified search string
@@ -727,6 +747,20 @@ class Git(FetchMethod):
         """
         Compute the HEAD revision for the url
         """
+
+        ## Network is not availble and we missed the cache. Try rev-parse
+        if d.getVar("BB_NO_NETWORK"):
+            if ud.rebaseable:
+                raise bb.fetch.UnpackError("Unable to map revision \"{name}\" to SHA1 revision without a network. "  \
+                                           "SHA1 revision must be specified for rebaseable repos "\
+                                           "or \"BB_SRCREV_POLICY\" set to \"cache\". ".format(name=ud.unresolvedrev[name]),ud.host+ud.path)
+            rev = self._revparse(ud, d, ud.unresolvedrev[name])
+            if rev is None:
+                raise bb.fetch2.FetchError("Unable to resolve '%s' in local clone of repository in git rev-parse output for %s" % \
+                                           (ud.unresolvedrev[name], ud.host+ud.path))
+            return rev
+
+
         output = self._lsremote(ud, d, "")
         # Tags of the form ^{} may not work, need to fallback to other form
         if ud.unresolvedrev[name][:5] == "refs/" or ud.usehead:
diff --git a/lib/bb/tests/fetch.py b/lib/bb/tests/fetch.py
index ec7d83c9..f7f847dd 100644
--- a/lib/bb/tests/fetch.py
+++ b/lib/bb/tests/fetch.py
@@ -2776,3 +2776,47 @@ class GitSharedTest(FetcherTest):
         fetcher.unpack(self.unpackdir)
         alt = os.path.join(self.unpackdir, 'git/.git/objects/info/alternates')
         self.assertFalse(os.path.exists(alt))
+
+class GitUnpackOfflineTest(FetcherTest):
+
+    def setUp(self):
+        super(GitUnpackOfflineTest, self).setUp()
+        self.tempdir = self.tempdir
+        self.persistdir = os.path.join(self.tempdir, "persistdata")
+
+    def setData(self):
+        self.recipe_url = "git://github.com/lz4/lz4.git;branch=release;protocol=https;"
+        self.d.setVar('SRCREV', 'v1.9.0')
+        self.d.setVar("PERSISTENT_DIR", self.persistdir)
+        self.d.setVar("DL_DIR", self.dldir)
+
+    @skipIfNoNetwork()
+    def test_unpack_offline(self):
+        # emulate do_fetch followed by do_unpack
+        self.setData()
+        bb.fetch.fetcher_init(self.d)
+        fetcher = bb.fetch.Fetch([self.recipe_url], self.d)
+        fetcher.download()
+        fetcher.unpack(self.unpackdir)
+
+        ## emulate do_unpack (disable network) withot previous fetch
+        self.d.setVar("BB_NO_NETWORK", "1")
+        bb.fetch.fetcher_init(self.d)
+        unpacker = bb.fetch.Fetch([self.recipe_url], self.d)
+        unpacker.unpack(self.unpackdir)
+
+    @skipIfNoNetwork()
+    def test_unpack_rebaseable(self):
+        self.setData()
+        self.recipe_url += "rebaseable=1;"
+
+        bb.fetch.fetcher_init(self.d)
+        fetcher = bb.fetch.Fetch([self.recipe_url], self.d)
+        fetcher.download()
+
+        ## emulate do_unpack (disable network)
+        self.d.setVar("BB_NO_NETWORK", "1")
+        bb.fetch.fetcher_init(self.d)
+        with self.assertRaises(bb.fetch2.UnpackError):
+            unpacker = bb.fetch.Fetch([self.recipe_url], self.d)
+            fetcher.unpack()
-- 
2.34.1



^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [bitbake-devel] [PATCH] fetch: Use git rev-parse if network is not available
  2022-02-07 12:43 ` [bitbake-devel] [PATCH] " Richard Purdie
  2022-02-07 12:50   ` [PATCH v3] " Pavel Zhukov
@ 2022-02-07 12:57   ` Pavel Zhukov
  1 sibling, 0 replies; 6+ messages in thread
From: Pavel Zhukov @ 2022-02-07 12:57 UTC (permalink / raw)
  To: Richard Purdie, bitbake-devel@lists.openembedded.org; +Cc: Pavel Zhukov

[-- Attachment #1: Type: text/html, Size: 3751 bytes --]

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2022-02-07 12:58 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2022-02-07 11:53 [PATCH] fetch: Use git rev-parse if network is not available Pavel Zhukov
2022-02-07 12:06 ` [bitbake-devel] " Quentin Schulz
2022-02-07 12:23   ` [PATCH v2] " Pavel Zhukov
2022-02-07 12:43 ` [bitbake-devel] [PATCH] " Richard Purdie
2022-02-07 12:50   ` [PATCH v3] " Pavel Zhukov
2022-02-07 12:57   ` [bitbake-devel] [PATCH] " Pavel Zhukov

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.