From: Richard Purdie <richard.purdie@linuxfoundation.org>
To: bitbake-devel <bitbake-devel@lists.openembedded.org>
Subject: [PATCH] codeparser/data_smart: Optimise parsing speed
Date: Tue, 03 Dec 2013 12:10:05 +0000 [thread overview]
Message-ID: <1386072605.4463.40.camel@ted> (raw)
The previous "contains" changes caused a ~3% parsing speed impact.
Looking at the cause of those changes was interesting:
* Use of defaultdict was slower than just checking for missing entries
and setting them when needed.
* Even the "import collections" adversely affects parsing speed
* There was a missing intern function for the contains cache data
* Setting up a log object for each variable has noticeable overhead
due to the changes in the code paths uses, we can avoid this.
* We can call getVarFlag on "_content" directly within VariableParse
for a noticeable speed gain since its a seriously hot code path.
This patch therefore tweaks the code based on the above observations to
get some of the speed back.
Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
---
diff --git a/bitbake/lib/bb/codeparser.py b/bitbake/lib/bb/codeparser.py
index 6e34eff..62b6cf9 100644
--- a/bitbake/lib/bb/codeparser.py
+++ b/bitbake/lib/bb/codeparser.py
@@ -1,7 +1,6 @@
import ast
import codegen
import logging
-import collections
import os.path
import bb.utils, bb.data
from itertools import chain
@@ -65,6 +64,8 @@ class CodeParserCache(MultiProcessCache):
for h in data[0]:
data[0][h]["refs"] = self.internSet(data[0][h]["refs"])
data[0][h]["execs"] = self.internSet(data[0][h]["execs"])
+ for k in data[0][h]["contains"]:
+ data[0][h]["contains"][k] = self.internSet(data[0][h]["contains"][k])
for h in data[1]:
data[1][h]["execs"] = self.internSet(data[1][h]["execs"])
return
@@ -125,6 +126,8 @@ class PythonParser():
if isinstance(node.args[0], ast.Str):
varname = node.args[0].s
if name in self.containsfuncs and isinstance(node.args[1], ast.Str):
+ if varname not in self.contains:
+ self.contains[varname] = set()
self.contains[varname].add(node.args[1].s)
else:
self.references.add(node.args[0].s)
@@ -153,10 +156,10 @@ class PythonParser():
def __init__(self, name, log):
self.var_execs = set()
- self.contains = collections.defaultdict(set)
+ self.contains = {}
self.execs = set()
self.references = set()
- self.log = BufferedLogger('BitBake.Data.%s' % name, logging.DEBUG, log)
+ self.log = BufferedLogger('BitBake.Data.PythonParser', logging.DEBUG, log)
self.unhandled_message = "in call of %s, argument '%s' is not a string literal"
self.unhandled_message = "while parsing %s, %s" % (name, self.unhandled_message)
diff --git a/bitbake/lib/bb/data_smart.py b/bitbake/lib/bb/data_smart.py
index 9a6f767..833d9f1 100644
--- a/bitbake/lib/bb/data_smart.py
+++ b/bitbake/lib/bb/data_smart.py
@@ -35,7 +35,6 @@ import hashlib
import bb, bb.codeparser
from bb import utils
from bb.COW import COWDictBase
-import collections
logger = logging.getLogger("BitBake.Data")
@@ -89,7 +88,7 @@ class VariableParse:
self.references = set()
self.execs = set()
- self.contains = collections.defaultdict(set)
+ self.contains = {}
def var_sub(self, match):
key = match.group()[2:-1]
@@ -100,7 +99,7 @@ class VariableParse:
varparse = self.d.expand_cache[key]
var = varparse.value
else:
- var = self.d.getVar(key, True)
+ var = self.d.getVarFlag(key, "_content", True)
self.references.add(key)
if var is not None:
return var
@@ -123,7 +122,10 @@ class VariableParse:
self.execs |= parser.execs
for k in parser.contains:
- self.contains[k].update(parser.contains[k])
+ if k not in self.contains:
+ self.contains[k] = parser.contains[k]
+ else:
+ self.contains[k].update(parser.contains[k])
value = utils.better_eval(codeobj, DataContext(self.d))
return str(value)
reply other threads:[~2013-12-03 12:10 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1386072605.4463.40.camel@ted \
--to=richard.purdie@linuxfoundation.org \
--cc=bitbake-devel@lists.openembedded.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.