* [PATCH V2 1/4] cooker process: fire heartbeat event at regular time intervals
2016-11-29 16:47 [PATCH V2 0/4] system statistics sampling Patrick Ohly
@ 2016-11-29 16:47 ` Patrick Ohly
2016-11-29 16:47 ` [PATCH V2 2/4] runqueue.py: monitor disk space " Patrick Ohly
` (2 subsequent siblings)
3 siblings, 0 replies; 5+ messages in thread
From: Patrick Ohly @ 2016-11-29 16:47 UTC (permalink / raw)
To: bitbake-devel
The intended usage is for recording current system statistics from
/proc in buildstats.bbclass during a build and for improving the
BB_DISKMON_DIRS implementation.
All other existing hooks are less suitable because they trigger at
unpredictable rates: too often can be handled by doing rate-limiting
in the event handler, but not often enough (for example, when there is
only one long-running task) cannot because the handler does not get
called at all.
The implementation of the new heartbeat event hooks into the cooker
process event queue. The process already wakes up every 0.1s, which is
often enough for the intentionally coarse 1s delay between
heartbeats. That value was chosen to keep the overhead low while still
being frequent enough for the intended usage.
If necessary, BB_HEARTBEAT_EVENT can be set to a float specifying
the delay in seconds between these heartbeat events.
Signed-off-by: Patrick Ohly <patrick.ohly@intel.com>
---
lib/bb/event.py | 10 ++++++++++
lib/bb/server/process.py | 25 +++++++++++++++++++++++++
lib/bb/ui/knotty.py | 1 +
lib/bb/ui/toasterui.py | 3 +++
4 files changed, 39 insertions(+)
diff --git a/lib/bb/event.py b/lib/bb/event.py
index 6f1cb10..cacbac8 100644
--- a/lib/bb/event.py
+++ b/lib/bb/event.py
@@ -48,6 +48,16 @@ class Event(object):
def __init__(self):
self.pid = worker_pid
+
+class HeartbeatEvent(Event):
+ """Triggered at regular time intervals of 10 seconds. Other events can fire much more often
+ (runQueueTaskStarted when there are many short tasks) or not at all for long periods
+ of time (again runQueueTaskStarted, when there is just one long-running task), so this
+ event is more suitable for doing some task-independent work occassionally."""
+ def __init__(self, time):
+ Event.__init__(self)
+ self.time = time
+
Registered = 10
AlreadyRegistered = 14
diff --git a/lib/bb/server/process.py b/lib/bb/server/process.py
index 982fcf7..1654faf 100644
--- a/lib/bb/server/process.py
+++ b/lib/bb/server/process.py
@@ -92,6 +92,8 @@ class ProcessServer(Process, BaseImplServer):
self.event = EventAdapter(event_queue)
self.featurelist = featurelist
self.quit = False
+ self.heartbeat_seconds = 1 # default, BB_HEARTBEAT_EVENT will be checked once we have a datastore.
+ self.next_heartbeat = time.time()
self.quitin, self.quitout = Pipe()
self.event_handle = multiprocessing.Value("i")
@@ -101,6 +103,14 @@ class ProcessServer(Process, BaseImplServer):
self.event_queue.put(event)
self.event_handle.value = bb.event.register_UIHhandler(self, True)
+ heartbeat_event = self.cooker.data.getVar('BB_HEARTBEAT_EVENT', True)
+ if heartbeat_event:
+ try:
+ self.heartbeat_seconds = float(heartbeat_event)
+ except:
+ # Throwing an exception here causes bitbake to hang.
+ # Just warn about the invalid setting and continue
+ bb.warn('Ignoring invalid BB_HEARTBEAT_EVENT=%s, must be a float specifying seconds.' % heartbeat_event)
bb.cooker.server_main(self.cooker, self.main)
def main(self):
@@ -160,6 +170,21 @@ class ProcessServer(Process, BaseImplServer):
del self._idlefuns[function]
self.quit = True
+ # Create new heartbeat event?
+ now = time.time()
+ if now >= self.next_heartbeat:
+ # We might have missed heartbeats. Just trigger once in
+ # that case and continue after the usual delay.
+ self.next_heartbeat += self.heartbeat_seconds
+ if self.next_heartbeat <= now:
+ self.next_heartbeat = now + self.heartbeat_seconds
+ heartbeat = bb.event.HeartbeatEvent(now)
+ bb.event.fire(heartbeat, self.cooker.data)
+ if nextsleep and now + nextsleep > self.next_heartbeat:
+ # Shorten timeout so that we we wake up in time for
+ # the heartbeat.
+ nextsleep = self.next_heartbeat - now
+
if nextsleep is not None:
select.select(fds,[],[],nextsleep)
diff --git a/lib/bb/ui/knotty.py b/lib/bb/ui/knotty.py
index 948f527..48e1223 100644
--- a/lib/bb/ui/knotty.py
+++ b/lib/bb/ui/knotty.py
@@ -647,6 +647,7 @@ def main(server, eventHandler, params, tf = TerminalFilter):
bb.event.OperationCompleted,
bb.event.OperationProgress,
bb.event.DiskFull,
+ bb.event.HeartbeatEvent,
bb.build.TaskProgress)):
continue
diff --git a/lib/bb/ui/toasterui.py b/lib/bb/ui/toasterui.py
index b1b3684..1729902 100644
--- a/lib/bb/ui/toasterui.py
+++ b/lib/bb/ui/toasterui.py
@@ -236,6 +236,9 @@ def main(server, eventHandler, params):
# pylint: disable=protected-access
# the code will look into the protected variables of the event; no easy way around this
+ if isinstance(event, bb.event.HeartbeatEvent):
+ continue
+
if isinstance(event, bb.event.ParseStarted):
if not (build_log and build_log_file_path):
build_log, build_log_file_path = _open_build_log(log_dir)
--
2.1.4
^ permalink raw reply related [flat|nested] 5+ messages in thread* [PATCH V2 2/4] runqueue.py: monitor disk space at regular time intervals
2016-11-29 16:47 [PATCH V2 0/4] system statistics sampling Patrick Ohly
2016-11-29 16:47 ` [PATCH V2 1/4] cooker process: fire heartbeat event at regular time intervals Patrick Ohly
@ 2016-11-29 16:47 ` Patrick Ohly
2016-11-29 16:47 ` [PATCH V2 3/4] monitordisk.py: minor code and comment cleanup Patrick Ohly
2016-11-29 16:47 ` [PATCH V2 4/4] monitordisk: add event Patrick Ohly
3 siblings, 0 replies; 5+ messages in thread
From: Patrick Ohly @ 2016-11-29 16:47 UTC (permalink / raw)
To: bitbake-devel
Hooking the disk monitor into the regular heatbeat event instead
of the runqueue solves two problems:
- When there is just one long running task which fills up the disk,
the previous approach did not notice that until after the completion
of the task because _execute_runqueue() only gets called on task
state changes. As a result, aborting a build did not work in this
case.
- When there are many short-lived tasks, disk space was getting
checked very frequently. When the storage that is getting checked
is on an NFS server, that can lead to noticable traffic to the
server.
Signed-off-by: Patrick Ohly <patrick.ohly@intel.com>
---
lib/bb/runqueue.py | 24 +++++++++++++++++++-----
1 file changed, 19 insertions(+), 5 deletions(-)
diff --git a/lib/bb/runqueue.py b/lib/bb/runqueue.py
index 84b2685..b1a09ad 100644
--- a/lib/bb/runqueue.py
+++ b/lib/bb/runqueue.py
@@ -984,8 +984,14 @@ class RunQueue:
self.state = runQueuePrepare
# For disk space monitor
+ # Invoked at regular time intervals via the bitbake heartbeat event
+ # while the build is running. We generate a unique name for the handler
+ # here, just in case that there ever is more than one RunQueue instance,
+ # start the handler when reaching runQueueSceneRun, and stop it when
+ # done with the build.
self.dm = monitordisk.diskMonitor(cfgData)
-
+ self.dm_event_handler_name = '_bb_diskmonitor_' + str(id(self))
+ self.dm_event_handler_registered = False
self.rqexe = None
self.worker = {}
self.fakeworker = {}
@@ -1208,10 +1214,12 @@ class RunQueue:
self.rqdata.init_progress_reporter.next_stage()
self.rqexe = RunQueueExecuteScenequeue(self)
- if self.state in [runQueueSceneRun, runQueueRunning, runQueueCleanUp]:
- self.dm.check(self)
-
if self.state is runQueueSceneRun:
+ if not self.dm_event_handler_registered:
+ res = bb.event.register(self.dm_event_handler_name,
+ lambda x: self.dm.check(self) if self.state in [runQueueSceneRun, runQueueRunning, runQueueCleanUp] else False,
+ ('bb.event.HeartbeatEvent',))
+ self.dm_event_handler_registered = True
retval = self.rqexe.execute()
if self.state is runQueueRunInit:
@@ -1230,7 +1238,13 @@ class RunQueue:
if self.state is runQueueCleanUp:
retval = self.rqexe.finish()
- if (self.state is runQueueComplete or self.state is runQueueFailed) and self.rqexe:
+ build_done = self.state is runQueueComplete or self.state is runQueueFailed
+
+ if build_done and self.dm_event_handler_registered:
+ bb.event.remove(self.dm_event_handler_name, None)
+ self.dm_event_handler_registered = False
+
+ if build_done and self.rqexe:
self.teardown_workers()
if self.rqexe.stats.failed:
logger.info("Tasks Summary: Attempted %d tasks of which %d didn't need to be rerun and %d failed.", self.rqexe.stats.completed + self.rqexe.stats.failed, self.rqexe.stats.skipped, self.rqexe.stats.failed)
--
2.1.4
^ permalink raw reply related [flat|nested] 5+ messages in thread* [PATCH V2 3/4] monitordisk.py: minor code and comment cleanup
2016-11-29 16:47 [PATCH V2 0/4] system statistics sampling Patrick Ohly
2016-11-29 16:47 ` [PATCH V2 1/4] cooker process: fire heartbeat event at regular time intervals Patrick Ohly
2016-11-29 16:47 ` [PATCH V2 2/4] runqueue.py: monitor disk space " Patrick Ohly
@ 2016-11-29 16:47 ` Patrick Ohly
2016-11-29 16:47 ` [PATCH V2 4/4] monitordisk: add event Patrick Ohly
3 siblings, 0 replies; 5+ messages in thread
From: Patrick Ohly @ 2016-11-29 16:47 UTC (permalink / raw)
To: bitbake-devel
There's no need to encode and decode the hash key as a single string,
a tuple works just fine. Iterating over entries can be written more
concisely.
Entries in the stat results are integers, not floating point values.
Signed-off-by: Patrick Ohly <patrick.ohly@intel.com>
---
lib/bb/monitordisk.py | 15 ++++++---------
1 file changed, 6 insertions(+), 9 deletions(-)
diff --git a/lib/bb/monitordisk.py b/lib/bb/monitordisk.py
index 203c405..0f9010a 100644
--- a/lib/bb/monitordisk.py
+++ b/lib/bb/monitordisk.py
@@ -129,7 +129,7 @@ def getDiskData(BBDirs, configuration):
bb.utils.mkdirhier(path)
dev = getMountedDev(path)
# Use path/action as the key
- devDict[os.path.join(path, action)] = [dev, minSpace, minInode]
+ devDict[(path, action)] = [dev, minSpace, minInode]
return devDict
@@ -205,16 +205,13 @@ class diskMonitor:
""" Take action for the monitor """
if self.enableMonitor:
- for k in self.devDict:
- path = os.path.dirname(k)
- action = os.path.basename(k)
- dev = self.devDict[k][0]
- minSpace = self.devDict[k][1]
- minInode = self.devDict[k][2]
+ for k, attributes in self.devDict.items():
+ path, action = k
+ dev, minSpace, minInode = attributes
st = os.statvfs(path)
- # The free space, float point number
+ # The available free space, integer number
freeSpace = st.f_bavail * st.f_frsize
if minSpace and freeSpace < minSpace:
@@ -235,7 +232,7 @@ class diskMonitor:
rq.finish_runqueue(True)
bb.event.fire(bb.event.DiskFull(dev, 'disk', freeSpace, path), self.configuration)
- # The free inodes, float point number
+ # The free inodes, integer number
freeInode = st.f_favail
if minInode and freeInode < minInode:
--
2.1.4
^ permalink raw reply related [flat|nested] 5+ messages in thread* [PATCH V2 4/4] monitordisk: add event
2016-11-29 16:47 [PATCH V2 0/4] system statistics sampling Patrick Ohly
` (2 preceding siblings ...)
2016-11-29 16:47 ` [PATCH V2 3/4] monitordisk.py: minor code and comment cleanup Patrick Ohly
@ 2016-11-29 16:47 ` Patrick Ohly
3 siblings, 0 replies; 5+ messages in thread
From: Patrick Ohly @ 2016-11-29 16:47 UTC (permalink / raw)
To: bitbake-devel
The current disk usage is interesting and may be worth logging over
time as part of the build statistics. Instead of re-implementing the
code and the configuration option (BB_DISKMON_DIRS), the information
gathered by monitordisk.py is made available to buildstats.bbclass via
a new event.
This has pros and cons:
- there is already a useful default configuration for "interesting" directories
- no code duplication
- on the other hand, users cannot configure recording separately from
monitoring (probably not that important)
Signed-off-by: Patrick Ohly <patrick.ohly@intel.com>
---
lib/bb/event.py | 17 +++++++++++++++++
lib/bb/monitordisk.py | 8 ++++++++
2 files changed, 25 insertions(+)
diff --git a/lib/bb/event.py b/lib/bb/event.py
index cacbac8..5491914 100644
--- a/lib/bb/event.py
+++ b/lib/bb/event.py
@@ -451,6 +451,23 @@ class DiskFull(Event):
self._free = freespace
self._mountpoint = mountpoint
+class DiskUsageSample:
+ def __init__(self, available_bytes, free_bytes, total_bytes):
+ # Number of bytes available to non-root processes.
+ self.available_bytes = available_bytes
+ # Number of bytes available to root processes.
+ self.free_bytes = free_bytes
+ # Total capacity of the volume.
+ self.total_bytes = total_bytes
+
+class MonitorDiskEvent(Event):
+ """If BB_DISKMON_DIRS is set, then this event gets triggered each time disk space is checked.
+ Provides information about devices that are getting monitored."""
+ def __init__(self, disk_usage):
+ Event.__init__(self)
+ # hash of device root path -> DiskUsageSample
+ self.disk_usage = disk_usage
+
class NoProvider(Event):
"""No Provider for an Event"""
diff --git a/lib/bb/monitordisk.py b/lib/bb/monitordisk.py
index 0f9010a..f3e8193 100644
--- a/lib/bb/monitordisk.py
+++ b/lib/bb/monitordisk.py
@@ -205,6 +205,7 @@ class diskMonitor:
""" Take action for the monitor """
if self.enableMonitor:
+ diskUsage = {}
for k, attributes in self.devDict.items():
path, action = k
dev, minSpace, minInode = attributes
@@ -214,6 +215,11 @@ class diskMonitor:
# The available free space, integer number
freeSpace = st.f_bavail * st.f_frsize
+ # Send all relevant information in the event.
+ freeSpaceRoot = st.f_bfree * st.f_frsize
+ totalSpace = st.f_blocks * st.f_frsize
+ diskUsage[dev] = bb.event.DiskUsageSample(freeSpace, freeSpaceRoot, totalSpace)
+
if minSpace and freeSpace < minSpace:
# Always show warning, the self.checked would always be False if the action is WARN
if self.preFreeS[k] == 0 or self.preFreeS[k] - freeSpace > self.spaceInterval and not self.checked[k]:
@@ -257,4 +263,6 @@ class diskMonitor:
self.checked[k] = True
rq.finish_runqueue(True)
bb.event.fire(bb.event.DiskFull(dev, 'inode', freeInode, path), self.configuration)
+
+ bb.event.fire(bb.event.MonitorDiskEvent(diskUsage), self.configuration)
return
--
2.1.4
^ permalink raw reply related [flat|nested] 5+ messages in thread