From: Richard Purdie <richard.purdie@linuxfoundation.org>
To: openembedded-core <openembedded-core@lists.openembedded.org>
Subject: [PATCH v2] package: Add cachedpath optimisation
Date: Mon, 18 Mar 2013 16:47:16 +0000 [thread overview]
Message-ID: <1363625236.16482.59.camel@ted> (raw)
Currently, various standard library operations like os.walk(),
os.path.isdir() and os.path.islink() each call stat or lstat which
involves a syscall into the kernel. There is no caching since they could
conceivably have changed on disk. The result is that for something like
the do_package task of the kernel we're spending over two minutes making
868,000 individual stat calls for 23,000 files. This is suboptimal.
This patch adds lib/oe/cachedpath.py which are a set of replacement
functions for these operations which use cached stat data rather than
hitting the kernel each time. It gives a nice performance improvement
halving the build time of the kernel do_package.
Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
---
diff --git a/meta/classes/base.bbclass b/meta/classes/base.bbclass
index 4ec1eda..5fe9a84 100644
--- a/meta/classes/base.bbclass
+++ b/meta/classes/base.bbclass
@@ -10,7 +10,7 @@ inherit utility-tasks
inherit metadata_scm
inherit logging
-OE_IMPORTS += "os sys time oe.path oe.utils oe.data oe.package oe.packagegroup oe.sstatesig oe.lsb"
+OE_IMPORTS += "os sys time oe.path oe.utils oe.data oe.package oe.packagegroup oe.sstatesig oe.lsb oe.cachedpath"
OE_IMPORTS[type] = "list"
def oe_import(d):
diff --git a/meta/classes/package.bbclass b/meta/classes/package.bbclass
index f3a6bc7..fde4e9d 100644
--- a/meta/classes/package.bbclass
+++ b/meta/classes/package.bbclass
@@ -288,7 +288,7 @@ def copydebugsources(debugsrcdir, d):
basepath = dvar
for p in debugsrcdir.split("/"):
basepath = basepath + "/" + p
- if not os.path.exists(basepath):
+ if not cpath.exists(basepath):
nosuchdir.append(basepath)
bb.utils.mkdirhier(basepath)
@@ -388,7 +388,7 @@ python package_do_split_locales() {
localedir = os.path.join(dvar + datadir, 'locale')
- if not os.path.isdir(localedir):
+ if not cpath.isdir(localedir):
bb.debug(1, "No locale files in this package")
return
@@ -628,7 +628,7 @@ python fixup_perms () {
continue
origin = dvar + dir
- if not (os.path.exists(origin) and os.path.isdir(origin) and not os.path.islink(origin)):
+ if not (cpath.exists(origin) and cpath.isdir(origin) and not cpath.islink(origin)):
continue
link = fs_perms_table[dir].link
@@ -654,7 +654,7 @@ python fixup_perms () {
continue
origin = dvar + dir
- if not (os.path.exists(origin) and os.path.isdir(origin)):
+ if not (cpath.exists(origin) and cpath.isdir(origin)):
continue
fix_perms(origin, fs_perms_table[dir].mode, fs_perms_table[dir].uid, fs_perms_table[dir].gid, dir)
@@ -735,7 +735,7 @@ python split_and_strip_files () {
baselibdir = os.path.abspath(dvar + os.sep + d.getVar("base_libdir", True))
if (d.getVar('INHIBIT_PACKAGE_DEBUG_SPLIT', True) != '1') and \
(d.getVar('INHIBIT_PACKAGE_STRIP', True) != '1'):
- for root, dirs, files in os.walk(dvar):
+ for root, dirs, files in cpath.walk(dvar):
for f in files:
file = os.path.join(root, f)
if file.endswith(".ko") and file.find("/lib/modules/") != -1:
@@ -749,18 +749,20 @@ python split_and_strip_files () {
continue
try:
- ltarget = oe.path.realpath(file, dvar, False)
- s = os.lstat(ltarget)
+ ltarget = cpath.realpath(file, dvar, False)
+ s = cpath.lstat(ltarget)
except OSError, (err, strerror):
if err != errno.ENOENT:
raise
# Skip broken symlinks
continue
+ if not s:
+ continue
# Check its an excutable
if (s[stat.ST_MODE] & stat.S_IXUSR) or (s[stat.ST_MODE] & stat.S_IXGRP) or (s[stat.ST_MODE] & stat.S_IXOTH) \
or ((file.startswith(libdir) or file.startswith(baselibdir)) and ".so" in f):
# If it's a symlink, and points to an ELF file, we capture the readlink target
- if os.path.islink(file):
+ if cpath.islink(file):
target = os.readlink(file)
if isELF(ltarget):
#bb.note("Sym: %s (%d)" % (ltarget, isELF(ltarget)))
@@ -918,8 +920,8 @@ python populate_packages () {
for file in files:
if os.path.isabs(file):
file = '.' + file
- if not os.path.islink(file):
- if os.path.isdir(file):
+ if not cpath.islink(file):
+ if cpath.isdir(file):
newfiles = [ os.path.join(file,x) for x in os.listdir(file) ]
if newfiles:
files += newfiles
@@ -929,7 +931,7 @@ python populate_packages () {
if [ file ] != globbed:
files += globbed
continue
- if (not os.path.islink(file)) and (not os.path.exists(file)):
+ if (not cpath.islink(file)) and (not cpath.exists(file)):
continue
if file in seen:
continue
@@ -938,33 +940,35 @@ python populate_packages () {
def mkdir(src, dest, p):
src = os.path.join(src, p)
dest = os.path.join(dest, p)
- bb.utils.mkdirhier(dest)
- fstat = os.stat(src)
- os.chmod(dest, fstat.st_mode)
+ fstat = cpath.stat(src)
+ if not fstat:
+ bb.error("No file for %s" % src)
+ os.mkdir(dest, fstat.st_mode)
os.chown(dest, fstat.st_uid, fstat.st_gid)
if p not in seen:
seen.append(p)
+ cpath.updatecache(dest)
def mkdir_recurse(src, dest, paths):
- if os.path.exists(dest + '/' + paths):
+ if cpath.exists(dest + '/' + paths):
return
while paths.startswith("./"):
paths = paths[2:]
p = "."
for c in paths.split("/"):
p = os.path.join(p, c)
- if not os.path.exists(os.path.join(dest, p)):
+ if not cpath.exists(os.path.join(dest, p)):
mkdir(src, dest, p)
- if os.path.isdir(file) and not os.path.islink(file):
+ if cpath.isdir(file) and not cpath.islink(file):
mkdir_recurse(dvar, root, file)
continue
mkdir_recurse(dvar, root, os.path.dirname(file))
fpath = os.path.join(root,file)
- if not os.path.islink(file):
+ if not cpath.islink(file):
os.link(file, fpath)
- fstat = os.stat(file)
+ fstat = cpath.stat(file)
os.chmod(fpath, fstat.st_mode)
os.chown(fpath, fstat.st_uid, fstat.st_gid)
continue
@@ -975,7 +979,7 @@ python populate_packages () {
os.chdir(workdir)
unshipped = []
- for root, dirs, files in os.walk(dvar):
+ for root, dirs, files in cpath.walk(dvar):
dir = root[len(dvar):]
if not dir:
dir = os.sep
@@ -1009,8 +1013,8 @@ python package_fixsymlinks () {
for path in pkgfiles[pkg]:
rpath = path[len(inst_root):]
pkg_files[pkg].append(rpath)
- rtarget = oe.path.realpath(path, inst_root, True, assume_dir = True)
- if not os.path.lexists(rtarget):
+ rtarget = cpath.realpath(path, inst_root, True, assume_dir = True)
+ if not cpath.lexists(rtarget):
dangling_links[pkg].append(os.path.normpath(rtarget[len(inst_root):]))
newrdepends = {}
@@ -1394,7 +1398,7 @@ python package_do_shlibs() {
renames = list()
for file in pkgfiles[pkg]:
soname = None
- if os.path.islink(file):
+ if cpath.islink(file):
continue
if targetos == "darwin" or targetos == "darwin8":
darwin_so(file)
@@ -1781,6 +1785,10 @@ python do_package () {
# as any change to rpmdeps requires this to be rerun.
# PACKAGE_BBCLASS_VERSION = "1"
+ # Init cachedpath
+ global cpath
+ cpath = oe.cachedpath.CachedPath()
+
###########################################################################
# Sanity test the setup
###########################################################################
@@ -1827,6 +1835,8 @@ python do_package () {
# Split up PKGD into PKGDEST
###########################################################################
+ cpath = oe.cachedpath.CachedPath()
+
for f in (d.getVar('PACKAGESPLITFUNCS', True) or '').split():
bb.build.exec_func(f, d)
@@ -1841,7 +1851,7 @@ python do_package () {
pkgdest = d.getVar('PKGDEST', True)
for pkg in packages:
pkgfiles[pkg] = []
- for walkroot, dirs, files in os.walk(pkgdest + "/" + pkg):
+ for walkroot, dirs, files in cpath.walk(pkgdest + "/" + pkg):
for file in files:
pkgfiles[pkg].append(walkroot + os.sep + file)
diff --git a/meta/lib/oe/cachedpath.py b/meta/lib/oe/cachedpath.py
new file mode 100644
index 0000000..dd7d335
--- /dev/null
+++ b/meta/lib/oe/cachedpath.py
@@ -0,0 +1,236 @@
+#
+# Based on standard python library functions but avoid
+# repeated stat calls. Its assumed the files will not change from under us
+# so we can cache stat calls.
+#
+
+import os
+import errno
+import stat as statmod
+
+class CachedPath(object):
+ statcache = {}
+ lstatcache = {}
+ normpathcache = {}
+
+ def __init__(self):
+ return
+
+ def updatecache(self, x):
+ x = self.normpath(x)
+ if x in self.statcache:
+ del self.statcache[x]
+ if x in self.lstatcache:
+ del self.lstatcache[x]
+
+ def normpath(self, path):
+ if path in self.normpathcache:
+ return self.normpathcache[path]
+ newpath = os.path.normpath(path)
+ self.normpathcache[path] = newpath
+ return newpath
+
+ def _callstat(self, path):
+ if path in self.statcache:
+ return self.statcache[path]
+ try:
+ st = os.stat(path)
+ self.statcache[path] = st
+ return st
+ except os.error:
+ self.statcache[path] = False
+ return False
+
+ # We might as well call lstat and then only
+ # call stat as well in the symbolic link case
+ # since this turns out to be much more optimal
+ # in real world usage of this cache
+ def callstat(self, path):
+ path = self.normpath(path)
+ self.calllstat(path)
+ return self.statcache[path]
+
+ def calllstat(self, path):
+ path = self.normpath(path)
+ if path in self.lstatcache:
+ return self.lstatcache[path]
+ #bb.error("LStatpath:" + path)
+ try:
+ lst = os.lstat(path)
+ self.lstatcache[path] = lst
+ if not statmod.S_ISLNK(lst.st_mode):
+ self.statcache[path] = lst
+ else:
+ self._callstat(path)
+ return lst
+ except (os.error, AttributeError):
+ self.lstatcache[path] = False
+ self.statcache[path] = False
+ return False
+
+ # This follows symbolic links, so both islink() and isdir() can be true
+ # for the same path ono systems that support symlinks
+ def isfile(self, path):
+ """Test whether a path is a regular file"""
+ st = self.callstat(path)
+ if not st:
+ return False
+ return statmod.S_ISREG(st.st_mode)
+
+ # Is a path a directory?
+ # This follows symbolic links, so both islink() and isdir()
+ # can be true for the same path on systems that support symlinks
+ def isdir(self, s):
+ """Return true if the pathname refers to an existing directory."""
+ st = self.callstat(s)
+ if not st:
+ return False
+ return statmod.S_ISDIR(st.st_mode)
+
+ def islink(self, path):
+ """Test whether a path is a symbolic link"""
+ st = self.calllstat(path)
+ if not st:
+ return False
+ return statmod.S_ISLNK(st.st_mode)
+
+ # Does a path exist?
+ # This is false for dangling symbolic links on systems that support them.
+ def exists(self, path):
+ """Test whether a path exists. Returns False for broken symbolic links"""
+ if self.callstat(path):
+ return True
+ return False
+
+ def lexists(self, path):
+ """Test whether a path exists. Returns True for broken symbolic links"""
+ if self.calllstat(path):
+ return True
+ return False
+
+ def stat(self, path):
+ return self.callstat(path)
+
+ def lstat(self, path):
+ return self.calllstat(path)
+
+ def walk(self, top, topdown=True, onerror=None, followlinks=False):
+ # Matches os.walk, not os.path.walk()
+
+ # We may not have read permission for top, in which case we can't
+ # get a list of the files the directory contains. os.path.walk
+ # always suppressed the exception then, rather than blow up for a
+ # minor reason when (say) a thousand readable directories are still
+ # left to visit. That logic is copied here.
+ try:
+ # Note that listdir and error are globals in this module due
+ # to earlier import-*.
+ names = os.listdir(top)
+ except error, err:
+ if onerror is not None:
+ onerror(err)
+ return
+
+ dirs, nondirs = [], []
+ for name in names:
+ if self.isdir(os.path.join(top, name)):
+ dirs.append(name)
+ else:
+ nondirs.append(name)
+
+ if topdown:
+ yield top, dirs, nondirs
+ for name in dirs:
+ new_path = os.path.join(top, name)
+ if followlinks or not self.islink(new_path):
+ for x in self.walk(new_path, topdown, onerror, followlinks):
+ yield x
+ if not topdown:
+ yield top, dirs, nondirs
+
+ ## realpath() related functions
+ def __is_path_below(self, file, root):
+ return (file + os.path.sep).startswith(root)
+
+ def __realpath_rel(self, start, rel_path, root, loop_cnt, assume_dir):
+ """Calculates real path of symlink 'start' + 'rel_path' below
+ 'root'; no part of 'start' below 'root' must contain symlinks. """
+ have_dir = True
+
+ for d in rel_path.split(os.path.sep):
+ if not have_dir and not assume_dir:
+ raise OSError(errno.ENOENT, "no such directory %s" % start)
+
+ if d == os.path.pardir: # '..'
+ if len(start) >= len(root):
+ # do not follow '..' before root
+ start = os.path.dirname(start)
+ else:
+ # emit warning?
+ pass
+ else:
+ (start, have_dir) = self.__realpath(os.path.join(start, d),
+ root, loop_cnt, assume_dir)
+
+ assert(self.__is_path_below(start, root))
+
+ return start
+
+ def __realpath(self, file, root, loop_cnt, assume_dir):
+ while self.islink(file) and len(file) >= len(root):
+ if loop_cnt == 0:
+ raise OSError(errno.ELOOP, file)
+
+ loop_cnt -= 1
+ target = os.path.normpath(os.readlink(file))
+
+ if not os.path.isabs(target):
+ tdir = os.path.dirname(file)
+ assert(self.__is_path_below(tdir, root))
+ else:
+ tdir = root
+
+ file = self.__realpath_rel(tdir, target, root, loop_cnt, assume_dir)
+
+ try:
+ is_dir = self.isdir(file)
+ except:
+ is_dir = False
+
+ return (file, is_dir)
+
+ def realpath(self, file, root, use_physdir = True, loop_cnt = 100, assume_dir = False):
+ """ Returns the canonical path of 'file' with assuming a
+ toplevel 'root' directory. When 'use_physdir' is set, all
+ preceding path components of 'file' will be resolved first;
+ this flag should be set unless it is guaranteed that there is
+ no symlink in the path. When 'assume_dir' is not set, missing
+ path components will raise an ENOENT error"""
+
+ root = os.path.normpath(root)
+ file = os.path.normpath(file)
+
+ if not root.endswith(os.path.sep):
+ # letting root end with '/' makes some things easier
+ root = root + os.path.sep
+
+ if not self.__is_path_below(file, root):
+ raise OSError(errno.EINVAL, "file '%s' is not below root" % file)
+
+ try:
+ if use_physdir:
+ file = self.__realpath_rel(root, file[(len(root) - 1):], root, loop_cnt, assume_dir)
+ else:
+ file = self.__realpath(file, root, loop_cnt, assume_dir)[0]
+ except OSError, e:
+ if e.errno == errno.ELOOP:
+ # make ELOOP more readable; without catching it, there will
+ # be printed a backtrace with 100s of OSError exceptions
+ # else
+ raise OSError(errno.ELOOP,
+ "too much recursions while resolving '%s'; loop in '%s'" %
+ (file, e.strerror))
+
+ raise
+
+ return file
next reply other threads:[~2013-03-18 17:04 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-03-18 16:47 Richard Purdie [this message]
2013-03-18 17:00 ` [PATCH v2] package: Add cachedpath optimisation Chris Larson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1363625236.16482.59.camel@ted \
--to=richard.purdie@linuxfoundation.org \
--cc=openembedded-core@lists.openembedded.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.