* [PATCH 1/3] lib: implemented oe.path.realpath()
@ 2013-02-10 12:41 Enrico Scholz
2013-02-10 12:41 ` [PATCH 2/3] package.bbclass: use oe.path.realpath() Enrico Scholz
2013-02-10 12:41 ` [PATCH 3/3] update-alternatives.bblcass: " Enrico Scholz
0 siblings, 2 replies; 7+ messages in thread
From: Enrico Scholz @ 2013-02-10 12:41 UTC (permalink / raw)
To: openembedded-core; +Cc: Enrico Scholz
Various parts of the buildsystem have to work with symlinks. Resolving
them is not trivial because they are always relative to a sysroot
directory.
Patch adds a function which returns the destination of a symlink by
assuming a given path as the / toplevel directory. A testsuite was
added too.
Signed-off-by: Enrico Scholz <enrico.scholz@sigma-chemnitz.de>
---
meta/lib/oe/path.py | 87 +++++++++++++++++++++++++++++++++++++++++
meta/lib/oe/tests/test_path.py | 89 ++++++++++++++++++++++++++++++++++++++++++
2 files changed, 176 insertions(+)
create mode 100644 meta/lib/oe/tests/test_path.py
diff --git a/meta/lib/oe/path.py b/meta/lib/oe/path.py
index ea58bed..0b34ceb 100644
--- a/meta/lib/oe/path.py
+++ b/meta/lib/oe/path.py
@@ -2,6 +2,7 @@ import errno
import glob
import shutil
import subprocess
+import os.path
def join(*paths):
"""Like os.path.join but doesn't treat absolute RHS specially"""
@@ -161,3 +162,89 @@ def find(dir, **walkoptions):
for root, dirs, files in os.walk(dir, **walkoptions):
for file in files:
yield os.path.join(root, file)
+
+
+## realpath() related functions
+def __is_path_below(file, root):
+ return (file + os.path.sep).startswith(root)
+
+def __realpath_rel(start, rel_path, root, loop_cnt):
+ """Calculates real path of symlink 'start' + 'rel_path' below
+ 'root'; no part of 'start' below 'root' must contain symlinks. """
+ have_dir = True
+
+ for d in rel_path.split(os.path.sep):
+ if not have_dir:
+ raise OSError(errno.ENOENT, "no such directory %s" % start)
+
+ if d == os.path.pardir: # '..'
+ if len(start) >= len(root):
+ # do not follow '..' before root
+ start = os.path.dirname(start)
+ else:
+ # emit warning?
+ pass
+ else:
+ (start, have_dir) = __realpath(os.path.join(start, d),
+ root, loop_cnt)
+
+ assert(__is_path_below(start, root))
+
+ return start
+
+def __realpath(file, root, loop_cnt):
+ while os.path.islink(file) and len(file) >= len(root):
+ if loop_cnt == 0:
+ raise OSError(errno.ELOOP, file)
+
+ loop_cnt -= 1
+ target = os.path.normpath(os.readlink(file))
+
+ if not os.path.isabs(target):
+ tdir = os.path.dirname(file)
+ assert(__is_path_below(tdir, root))
+ else:
+ tdir = root
+
+ file = __realpath_rel(tdir, target, root, loop_cnt)
+
+ try:
+ is_dir = os.path.isdir(file)
+ except:
+ is_dir = false
+
+ return (file, is_dir)
+
+def realpath(file, root, use_physdir = True, loop_cnt = 100):
+ """ Returns the canonical path of 'file' with assuming a toplevel
+ 'root' directory. When 'use_physdir' is set, all preceding path
+ components of 'file' will be resolved first; this flag should be
+ set unless it is guaranteed that there is no symlink in the path."""
+
+ root = os.path.normpath(root)
+ file = os.path.normpath(file)
+
+ if not root.endswith(os.path.sep):
+ # letting root end with '/' makes some things easier
+ root = root + os.path.sep
+
+ if not __is_path_below(file, root):
+ raise OSError(errno.EINVAL, "file '%s' is not below root" % file)
+
+ try:
+ if use_physdir:
+ file = __realpath_rel(root, file[(len(root) - 1):], root, loop_cnt)
+ else:
+ file = __realpath(file, root, loop_cnt)[0]
+ except OSError, e:
+ if e.errno == errno.ELOOP:
+ # make ELOOP more readable; without catching it, there will
+ # be printed a backtrace with 100s of OSError exceptions
+ # else
+ raise OSError(errno.ELOOP,
+ "too much recursions while resolving '%s'; loop in '%s'" %
+ (file, e.strerror))
+
+ raise
+
+ return file
diff --git a/meta/lib/oe/tests/test_path.py b/meta/lib/oe/tests/test_path.py
new file mode 100644
index 0000000..e6aa601
--- /dev/null
+++ b/meta/lib/oe/tests/test_path.py
@@ -0,0 +1,89 @@
+import unittest
+import oe, oe.path
+import tempfile
+import os
+import errno
+import shutil
+
+class TestRealPath(unittest.TestCase):
+ DIRS = [ "a", "b", "etc", "sbin", "usr", "usr/bin", "usr/binX", "usr/sbin", "usr/include", "usr/include/gdbm" ]
+ FILES = [ "etc/passwd", "b/file" ]
+ LINKS = [
+ ( "bin", "/usr/bin", "/usr/bin" ),
+ ( "binX", "usr/binX", "/usr/binX" ),
+ ( "c", "broken", "/broken" ),
+ ( "etc/passwd-1", "passwd", "/etc/passwd" ),
+ ( "etc/passwd-2", "passwd-1", "/etc/passwd" ),
+ ( "etc/passwd-3", "/etc/passwd-1", "/etc/passwd" ),
+ ( "etc/shadow-1", "/etc/shadow", "/etc/shadow" ),
+ ( "etc/shadow-2", "/etc/shadow-1", "/etc/shadow" ),
+ ( "prog-A", "bin/prog-A", "/usr/bin/prog-A" ),
+ ( "prog-B", "/bin/prog-B", "/usr/bin/prog-B" ),
+ ( "usr/bin/prog-C", "../../sbin/prog-C", "/sbin/prog-C" ),
+ ( "usr/bin/prog-D", "/sbin/prog-D", "/sbin/prog-D" ),
+ ( "usr/binX/prog-E", "../sbin/prog-E", None ),
+ ( "usr/bin/prog-F", "../../../sbin/prog-F", "/sbin/prog-F" ),
+ ( "loop", "a/loop", None ),
+ ( "a/loop", "../loop", None ),
+ ( "b/test", "file/foo", None ),
+ ]
+
+ LINKS_PHYS = [
+ ( "./", "/", "" ),
+ ( "binX/prog-E", "/usr/sbin/prog-E", "/sbin/prog-E" ),
+ ]
+
+ EXCEPTIONS = [
+ ( "loop", errno.ELOOP ),
+ ( "b/test", errno.ENOENT ),
+ ]
+
+ def __del__(self):
+ try:
+ #os.system("tree -F %s" % self.tmpdir)
+ shutil.rmtree(self.tmpdir)
+ except:
+ pass
+
+ def setUp(self):
+ self.tmpdir = tempfile.mkdtemp(prefix = "oe-test_path")
+ self.root = os.path.join(self.tmpdir, "R")
+
+ os.mkdir(os.path.join(self.tmpdir, "_real"))
+ os.symlink("_real", self.root)
+
+ for d in self.DIRS:
+ os.mkdir(os.path.join(self.root, d))
+ for f in self.FILES:
+ file(os.path.join(self.root, f), "w")
+ for l in self.LINKS:
+ os.symlink(l[1], os.path.join(self.root, l[0]))
+
+ def __realpath(self, file, use_physdir):
+ return oe.path.realpath(os.path.join(self.root, file), self.root, use_physdir)
+
+ def test_norm(self):
+ for l in self.LINKS:
+ if l[2] == None:
+ continue
+
+ target_p = self.__realpath(l[0], True)
+ target_l = self.__realpath(l[0], False)
+
+ if l[2] != False:
+ self.assertEqual(target_p, target_l)
+ self.assertEqual(l[2], target_p[len(self.root):])
+
+ def test_phys(self):
+ for l in self.LINKS_PHYS:
+ target_p = self.__realpath(l[0], True)
+ target_l = self.__realpath(l[0], False)
+
+ self.assertEqual(l[1], target_p[len(self.root):])
+ self.assertEqual(l[2], target_l[len(self.root):])
+
+ def test_loop(self):
+ for e in self.EXCEPTIONS:
+ self.assertRaisesRegexp(OSError, r'\[Errno %u\]' % e[1],
+ self.__realpath, e[0], False)
+
--
1.7.11.7
^ permalink raw reply related [flat|nested] 7+ messages in thread* [PATCH 2/3] package.bbclass: use oe.path.realpath()
2013-02-10 12:41 [PATCH 1/3] lib: implemented oe.path.realpath() Enrico Scholz
@ 2013-02-10 12:41 ` Enrico Scholz
2013-03-06 11:49 ` Richard Purdie
2013-02-10 12:41 ` [PATCH 3/3] update-alternatives.bblcass: " Enrico Scholz
1 sibling, 1 reply; 7+ messages in thread
From: Enrico Scholz @ 2013-02-10 12:41 UTC (permalink / raw)
To: openembedded-core; +Cc: Enrico Scholz
oe.path.realpath() provides are common and more correct implementation
for resolving symlinks within sysroot. Use it.
Old implementation suffered from lot of problems; e.g.
* redundant code
* calls 'os.stat()' which references files on host; this can give wrong
results about existing/non-existing and can cause EPERM (instead of
the catched ENONENT) exceptions
* does not deal with special cases like '..' leaving the sysroot.
Signed-off-by: Enrico Scholz <enrico.scholz@sigma-chemnitz.de>
---
meta/classes/package.bbclass | 16 +++++-----------
1 file changed, 5 insertions(+), 11 deletions(-)
diff --git a/meta/classes/package.bbclass b/meta/classes/package.bbclass
index b10e6f6..a74ec8a 100644
--- a/meta/classes/package.bbclass
+++ b/meta/classes/package.bbclass
@@ -741,7 +741,8 @@ python split_and_strip_files () {
continue
try:
- s = os.stat(file)
+ ltarget = oe.path.realpath(file, dvar, False)
+ s = os.lstat(ltarget)
except OSError, (err, strerror):
if err != errno.ENOENT:
raise
@@ -752,11 +753,6 @@ python split_and_strip_files () {
# If it's a symlink, and points to an ELF file, we capture the readlink target
if os.path.islink(file):
target = os.readlink(file)
- if not os.path.isabs(target):
- ltarget = os.path.join(os.path.dirname(file), target)
- else:
- ltarget = target
-
if isELF(ltarget):
#bb.note("Sym: %s (%d)" % (ltarget, isELF(ltarget)))
symlinks[file] = target
@@ -1005,14 +1001,12 @@ python package_fixsymlinks () {
rpath = path[len(inst_root):]
pkg_files[pkg].append(rpath)
try:
- s = os.stat(path)
+ rtarget = oe.path.realpath(path, inst_root, True)
+ os.lstat(rtarget)
except OSError, (err, strerror):
if err != errno.ENOENT:
raise
- target = os.readlink(path)
- if target[0] != '/':
- target = os.path.join(os.path.dirname(path)[len(inst_root):], target)
- dangling_links[pkg].append(os.path.normpath(target))
+ dangling_links[pkg].append(os.path.normpath(rtarget[len(inst_root):]))
newrdepends = {}
for pkg in dangling_links:
--
1.7.11.7
^ permalink raw reply related [flat|nested] 7+ messages in thread* Re: [PATCH 2/3] package.bbclass: use oe.path.realpath()
2013-02-10 12:41 ` [PATCH 2/3] package.bbclass: use oe.path.realpath() Enrico Scholz
@ 2013-03-06 11:49 ` Richard Purdie
2013-03-12 10:25 ` Enrico Scholz
0 siblings, 1 reply; 7+ messages in thread
From: Richard Purdie @ 2013-03-06 11:49 UTC (permalink / raw)
To: Enrico Scholz; +Cc: openembedded-core
On Sun, 2013-02-10 at 13:41 +0100, Enrico Scholz wrote:
> oe.path.realpath() provides are common and more correct implementation
> for resolving symlinks within sysroot. Use it.
>
> Old implementation suffered from lot of problems; e.g.
>
> * redundant code
>
> * calls 'os.stat()' which references files on host; this can give wrong
> results about existing/non-existing and can cause EPERM (instead of
> the catched ENONENT) exceptions
>
> * does not deal with special cases like '..' leaving the sysroot.
Whilst these changes are good, they do come at a cost:
post symlink package changes
./perfscript -c 8c22531e491e6b0cfffaaa80d6bc75db757fc1d1
49:38.46,17:12.15
pre symlink package changes
./perfscript -c 1a80329b3fcf23ecc23e409a260b9b2182652f65
48:16.33,13:39.97
So it added 1m20 to the overall build time, but more worryingly, added
added nearly 3m30 to the time for:
bitbake virtual/kernel -c cleansstate; bitbake virtual/kernel
These tests are based on the script linked from
https://wiki.yoctoproject.org/wiki/Performance_Test where the kernel
test is this is the second number above list, overall build time is the
first.
Have you any time to look into this performance regression?
FWIW, "bitbake virtual/kernel -c package -P -f" shows that
package_fixsymlinks() takes a rather long time in the profile output.
Cheers,
Richard
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH 2/3] package.bbclass: use oe.path.realpath()
2013-03-06 11:49 ` Richard Purdie
@ 2013-03-12 10:25 ` Enrico Scholz
2013-03-12 18:47 ` Richard Purdie
0 siblings, 1 reply; 7+ messages in thread
From: Enrico Scholz @ 2013-03-12 10:25 UTC (permalink / raw)
To: Richard Purdie; +Cc: openembedded-core
Richard Purdie <richard.purdie@linuxfoundation.org> writes:
>> Old implementation suffered from lot of problems; e.g.
>>
>> * redundant code
>>
>> * calls 'os.stat()' which references files on host; this can give wrong
>> results about existing/non-existing and can cause EPERM (instead of
>> the catched ENONENT) exceptions
>>
>> * does not deal with special cases like '..' leaving the sysroot.
>
> Whilst these changes are good, they do come at a cost:
>
> post symlink package changes
> ./perfscript -c 8c22531e491e6b0cfffaaa80d6bc75db757fc1d1
> 49:38.46,17:12.15
>
> pre symlink package changes
> ./perfscript -c 1a80329b3fcf23ecc23e409a260b9b2182652f65
> 48:16.33,13:39.97
>
> So it added 1m20 to the overall build time, but more worryingly, added
> added nearly 3m30 to the time for:
>
> bitbake virtual/kernel -c cleansstate; bitbake virtual/kernel
>
> These tests are based on the script linked from
> https://wiki.yoctoproject.org/wiki/Performance_Test where the kernel
> test is this is the second number above list, overall build time is the
> first.
>
> Have you any time to look into this performance regression?
Well, patch replaced a call to os.path.realpath() with a more accurate
variant honoring a sysroot. There must be more work be done to replace
things previously solved by the operating system with custom (python)
code.
I will try to write a C implementation of oe.path.realpath() but this
can take some time and I do not have an idea how to integrate it into
oe.
Enrico
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH 2/3] package.bbclass: use oe.path.realpath()
2013-03-12 10:25 ` Enrico Scholz
@ 2013-03-12 18:47 ` Richard Purdie
2013-03-12 19:10 ` Enrico Scholz
0 siblings, 1 reply; 7+ messages in thread
From: Richard Purdie @ 2013-03-12 18:47 UTC (permalink / raw)
To: Enrico Scholz; +Cc: openembedded-core
On Tue, 2013-03-12 at 11:25 +0100, Enrico Scholz wrote:
> Richard Purdie <richard.purdie@linuxfoundation.org> writes:
>
> >> Old implementation suffered from lot of problems; e.g.
> >>
> >> * redundant code
> >>
> >> * calls 'os.stat()' which references files on host; this can give wrong
> >> results about existing/non-existing and can cause EPERM (instead of
> >> the catched ENONENT) exceptions
> >>
> >> * does not deal with special cases like '..' leaving the sysroot.
> >
> > Whilst these changes are good, they do come at a cost:
> >
> > post symlink package changes
> > ./perfscript -c 8c22531e491e6b0cfffaaa80d6bc75db757fc1d1
> > 49:38.46,17:12.15
> >
> > pre symlink package changes
> > ./perfscript -c 1a80329b3fcf23ecc23e409a260b9b2182652f65
> > 48:16.33,13:39.97
> >
> > So it added 1m20 to the overall build time, but more worryingly, added
> > added nearly 3m30 to the time for:
> >
> > bitbake virtual/kernel -c cleansstate; bitbake virtual/kernel
> >
> > These tests are based on the script linked from
> > https://wiki.yoctoproject.org/wiki/Performance_Test where the kernel
> > test is this is the second number above list, overall build time is the
> > first.
> >
> > Have you any time to look into this performance regression?
>
> Well, patch replaced a call to os.path.realpath() with a more accurate
> variant honoring a sysroot. There must be more work be done to replace
> things previously solved by the operating system with custom (python)
> code.
>
> I will try to write a C implementation of oe.path.realpath() but this
> can take some time and I do not have an idea how to integrate it into
> oe.
The issue is not an interpreted language verses C, the problem is the
shear number of syscalls. Each isdir() and islink() call means a stat
call into the kernel. For "bitbake virtual/kernel -c package", I'm
seeing 400,000 stat calls for 23,000 files. We know the filesystem
doesn't change so we could cache the stat calls and hopefully hence the
speed. I actually have some code I tried this with but its not working
as well as I'd like so I need to do some further investigation about
what is happening.
Cheers,
Richard
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH 2/3] package.bbclass: use oe.path.realpath()
2013-03-12 18:47 ` Richard Purdie
@ 2013-03-12 19:10 ` Enrico Scholz
0 siblings, 0 replies; 7+ messages in thread
From: Enrico Scholz @ 2013-03-12 19:10 UTC (permalink / raw)
To: Richard Purdie; +Cc: openembedded-core
Richard Purdie <richard.purdie@linuxfoundation.org> writes:
>> > Have you any time to look into this performance regression?
>>
>> Well, patch replaced a call to os.path.realpath() with a more accurate
>> variant honoring a sysroot. There must be more work be done to replace
>> things previously solved by the operating system with custom (python)
>> code.
>>
>> I will try to write a C implementation of oe.path.realpath() but this
>> can take some time and I do not have an idea how to integrate it into
>> oe.
>
> The issue is not an interpreted language verses C, the problem is the
> shear number of syscalls. Each isdir() and islink() call means a stat
> call into the kernel.
With
try:
- is_dir = os.path.isdir(file)
+ if assume_dir:
+ is_dir = True
+ else:
+ is_dir = os.path.isdir(file)
except:
in meta/lib/oe/path.py you should be able to reduce calls to isdir().
A
- rtarget = oe.path.realpath(path, inst_root, True, assume_dir = True)
+ rtarget = oe.path.realpath(path, inst_root, False, assume_dir = True)
in package.bbclass seems to be save (pkgfiles[] is filled by os.walk() so
that locations should be already physical) and might improve performance
too.
But I do not have numbers how much you can save by these changes.
Enrico
^ permalink raw reply [flat|nested] 7+ messages in thread
* [PATCH 3/3] update-alternatives.bblcass: use oe.path.realpath()
2013-02-10 12:41 [PATCH 1/3] lib: implemented oe.path.realpath() Enrico Scholz
2013-02-10 12:41 ` [PATCH 2/3] package.bbclass: use oe.path.realpath() Enrico Scholz
@ 2013-02-10 12:41 ` Enrico Scholz
1 sibling, 0 replies; 7+ messages in thread
From: Enrico Scholz @ 2013-02-10 12:41 UTC (permalink / raw)
To: openembedded-core; +Cc: Enrico Scholz
oe.path.realpath() provides are common and more correct implementation
for resolving symlinks within sysroot. Use it.
Signed-off-by: Enrico Scholz <enrico.scholz@sigma-chemnitz.de>
---
meta/classes/update-alternatives.bbclass | 5 +----
1 file changed, 1 insertion(+), 4 deletions(-)
diff --git a/meta/classes/update-alternatives.bbclass b/meta/classes/update-alternatives.bbclass
index 8f4d097..0cefc51 100644
--- a/meta/classes/update-alternatives.bbclass
+++ b/meta/classes/update-alternatives.bbclass
@@ -261,10 +261,7 @@ python perform_packagecopy_append () {
src = '%s/%s' % (pkgdest, alt_target)
dest = '%s/%s' % (pkgdest, link_rename[alt_target])
link = os.readlink(src)
- if os.path.isabs(link):
- link_target = pkgdest + os.readlink(src)
- else:
- link_target = os.path.join(os.path.dirname(src), link)
+ link_target = oe.path.realpath(src, pkgdest, True)
if os.path.lexists(link_target):
# Ok, the link_target exists, we can rename
--
1.7.11.7
^ permalink raw reply related [flat|nested] 7+ messages in thread
end of thread, other threads:[~2013-03-12 19:27 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-02-10 12:41 [PATCH 1/3] lib: implemented oe.path.realpath() Enrico Scholz
2013-02-10 12:41 ` [PATCH 2/3] package.bbclass: use oe.path.realpath() Enrico Scholz
2013-03-06 11:49 ` Richard Purdie
2013-03-12 10:25 ` Enrico Scholz
2013-03-12 18:47 ` Richard Purdie
2013-03-12 19:10 ` Enrico Scholz
2013-02-10 12:41 ` [PATCH 3/3] update-alternatives.bblcass: " Enrico Scholz
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox