* [PATCH 1/4] cooker process: fire heartbeat event at regular time intervals
2016-11-28 15:15 [PATCH 0/4] system statistics sampling Patrick Ohly
@ 2016-11-28 15:15 ` Patrick Ohly
2016-11-28 15:15 ` [PATCH 2/4] runqueue.py: monitor disk space " Patrick Ohly
` (2 subsequent siblings)
3 siblings, 0 replies; 7+ messages in thread
From: Patrick Ohly @ 2016-11-28 15:15 UTC (permalink / raw)
To: bitbake-devel
The intended usage is for recording current system statistics from
/proc in buildstats.bbclass during a build and for improving the
BB_DISKMON_DIRS implementation.
All other existing hooks are less suitable because they trigger at
unpredictable rates: too often can be handled by doing rate-limiting
in the event handler, but not often enough (for example, when there is
only one long-running task) cannot because the handler does not get
called at all.
The implementation of the new heartbeat event hooks into the cooker
process event queue. The process already wakes up every 0.1s, which is
often enough for the intentionally coarse 1s delay between
heartbeats. That value was chosen to keep the overhead low while still
being frequent enough for the intended usage.
If necessary, BB_HEARTBEAT_EVENT can be set to a float specifying
the delay in seconds between these heartbeat events.
Signed-off-by: Patrick Ohly <patrick.ohly@intel.com>
---
lib/bb/event.py | 10 ++++++++++
lib/bb/server/process.py | 25 +++++++++++++++++++++++++
lib/bb/ui/knotty.py | 1 +
lib/bb/ui/toasterui.py | 3 +++
4 files changed, 39 insertions(+)
diff --git a/lib/bb/event.py b/lib/bb/event.py
index 6f1cb10..cacbac8 100644
--- a/lib/bb/event.py
+++ b/lib/bb/event.py
@@ -48,6 +48,16 @@ class Event(object):
def __init__(self):
self.pid = worker_pid
+
+class HeartbeatEvent(Event):
+ """Triggered at regular time intervals of 10 seconds. Other events can fire much more often
+ (runQueueTaskStarted when there are many short tasks) or not at all for long periods
+ of time (again runQueueTaskStarted, when there is just one long-running task), so this
+ event is more suitable for doing some task-independent work occassionally."""
+ def __init__(self, time):
+ Event.__init__(self)
+ self.time = time
+
Registered = 10
AlreadyRegistered = 14
diff --git a/lib/bb/server/process.py b/lib/bb/server/process.py
index 982fcf7..1654faf 100644
--- a/lib/bb/server/process.py
+++ b/lib/bb/server/process.py
@@ -92,6 +92,8 @@ class ProcessServer(Process, BaseImplServer):
self.event = EventAdapter(event_queue)
self.featurelist = featurelist
self.quit = False
+ self.heartbeat_seconds = 1 # default, BB_HEARTBEAT_EVENT will be checked once we have a datastore.
+ self.next_heartbeat = time.time()
self.quitin, self.quitout = Pipe()
self.event_handle = multiprocessing.Value("i")
@@ -101,6 +103,14 @@ class ProcessServer(Process, BaseImplServer):
self.event_queue.put(event)
self.event_handle.value = bb.event.register_UIHhandler(self, True)
+ heartbeat_event = self.cooker.data.getVar('BB_HEARTBEAT_EVENT', True)
+ if heartbeat_event:
+ try:
+ self.heartbeat_seconds = float(heartbeat_event)
+ except:
+ # Throwing an exception here causes bitbake to hang.
+ # Just warn about the invalid setting and continue
+ bb.warn('Ignoring invalid BB_HEARTBEAT_EVENT=%s, must be a float specifying seconds.' % heartbeat_event)
bb.cooker.server_main(self.cooker, self.main)
def main(self):
@@ -160,6 +170,21 @@ class ProcessServer(Process, BaseImplServer):
del self._idlefuns[function]
self.quit = True
+ # Create new heartbeat event?
+ now = time.time()
+ if now >= self.next_heartbeat:
+ # We might have missed heartbeats. Just trigger once in
+ # that case and continue after the usual delay.
+ self.next_heartbeat += self.heartbeat_seconds
+ if self.next_heartbeat <= now:
+ self.next_heartbeat = now + self.heartbeat_seconds
+ heartbeat = bb.event.HeartbeatEvent(now)
+ bb.event.fire(heartbeat, self.cooker.data)
+ if nextsleep and now + nextsleep > self.next_heartbeat:
+ # Shorten timeout so that we we wake up in time for
+ # the heartbeat.
+ nextsleep = self.next_heartbeat - now
+
if nextsleep is not None:
select.select(fds,[],[],nextsleep)
diff --git a/lib/bb/ui/knotty.py b/lib/bb/ui/knotty.py
index 948f527..48e1223 100644
--- a/lib/bb/ui/knotty.py
+++ b/lib/bb/ui/knotty.py
@@ -647,6 +647,7 @@ def main(server, eventHandler, params, tf = TerminalFilter):
bb.event.OperationCompleted,
bb.event.OperationProgress,
bb.event.DiskFull,
+ bb.event.HeartbeatEvent,
bb.build.TaskProgress)):
continue
diff --git a/lib/bb/ui/toasterui.py b/lib/bb/ui/toasterui.py
index b1b3684..1729902 100644
--- a/lib/bb/ui/toasterui.py
+++ b/lib/bb/ui/toasterui.py
@@ -236,6 +236,9 @@ def main(server, eventHandler, params):
# pylint: disable=protected-access
# the code will look into the protected variables of the event; no easy way around this
+ if isinstance(event, bb.event.HeartbeatEvent):
+ continue
+
if isinstance(event, bb.event.ParseStarted):
if not (build_log and build_log_file_path):
build_log, build_log_file_path = _open_build_log(log_dir)
--
2.1.4
^ permalink raw reply related [flat|nested] 7+ messages in thread* [PATCH 2/4] runqueue.py: monitor disk space at regular time intervals
2016-11-28 15:15 [PATCH 0/4] system statistics sampling Patrick Ohly
2016-11-28 15:15 ` [PATCH 1/4] cooker process: fire heartbeat event at regular time intervals Patrick Ohly
@ 2016-11-28 15:15 ` Patrick Ohly
2016-11-28 15:58 ` Richard Purdie
2016-11-28 15:15 ` [PATCH 3/4] monitordisk.py: minor code and comment cleanup Patrick Ohly
2016-11-28 15:15 ` [PATCH 4/4] monitordisk: add event Patrick Ohly
3 siblings, 1 reply; 7+ messages in thread
From: Patrick Ohly @ 2016-11-28 15:15 UTC (permalink / raw)
To: bitbake-devel
Hooking the disk monitor into the regular heatbeat event instead
of the runqueue solves two problems:
- When there is just one long running task which fills up the disk,
the previous approach did not notice that until after the completion
of the task because _execute_runqueue() only gets called on task
state changes. As a result, aborting a build did not work in this
case.
- When there are many short-lived tasks, disk space was getting
checked very frequently. When the storage that is getting checked
is on an NFS server, that can lead to noticable traffic to the
server.
Signed-off-by: Patrick Ohly <patrick.ohly@intel.com>
---
lib/bb/runqueue.py | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/lib/bb/runqueue.py b/lib/bb/runqueue.py
index 84b2685..3772e2e 100644
--- a/lib/bb/runqueue.py
+++ b/lib/bb/runqueue.py
@@ -984,7 +984,12 @@ class RunQueue:
self.state = runQueuePrepare
# For disk space monitor
+ # Invoked at regular time intervals via the bitbake heartbeat event
+ # while the build is running.
self.dm = monitordisk.diskMonitor(cfgData)
+ bb.event.register('_bb_diskmonitor',
+ lambda x: self.dm.check(self) if self.state in [runQueueSceneRun, runQueueRunning, runQueueCleanUp] else False,
+ ('bb.event.HeartbeatEvent',))
self.rqexe = None
self.worker = {}
@@ -1208,9 +1213,6 @@ class RunQueue:
self.rqdata.init_progress_reporter.next_stage()
self.rqexe = RunQueueExecuteScenequeue(self)
- if self.state in [runQueueSceneRun, runQueueRunning, runQueueCleanUp]:
- self.dm.check(self)
-
if self.state is runQueueSceneRun:
retval = self.rqexe.execute()
--
2.1.4
^ permalink raw reply related [flat|nested] 7+ messages in thread* Re: [PATCH 2/4] runqueue.py: monitor disk space at regular time intervals
2016-11-28 15:15 ` [PATCH 2/4] runqueue.py: monitor disk space " Patrick Ohly
@ 2016-11-28 15:58 ` Richard Purdie
2016-11-28 16:30 ` Patrick Ohly
0 siblings, 1 reply; 7+ messages in thread
From: Richard Purdie @ 2016-11-28 15:58 UTC (permalink / raw)
To: Patrick Ohly, bitbake-devel
On Mon, 2016-11-28 at 16:15 +0100, Patrick Ohly wrote:
> Hooking the disk monitor into the regular heatbeat event instead
> of the runqueue solves two problems:
>
> - When there is just one long running task which fills up the disk,
> the previous approach did not notice that until after the
> completion
> of the task because _execute_runqueue() only gets called on task
> state changes. As a result, aborting a build did not work in this
> case.
>
> - When there are many short-lived tasks, disk space was getting
> checked very frequently. When the storage that is getting checked
> is on an NFS server, that can lead to noticable traffic to the
> server.
>
> Signed-off-by: Patrick Ohly <patrick.ohly@intel.com>
> ---
> lib/bb/runqueue.py | 8 +++++---
> 1 file changed, 5 insertions(+), 3 deletions(-)
>
> diff --git a/lib/bb/runqueue.py b/lib/bb/runqueue.py
> index 84b2685..3772e2e 100644
> --- a/lib/bb/runqueue.py
> +++ b/lib/bb/runqueue.py
> @@ -984,7 +984,12 @@ class RunQueue:
> self.state = runQueuePrepare
>
> # For disk space monitor
> + # Invoked at regular time intervals via the bitbake
> heartbeat event
> + # while the build is running.
> self.dm = monitordisk.diskMonitor(cfgData)
> + bb.event.register('_bb_diskmonitor',
> + lambda x: self.dm.check(self) if
> self.state in [runQueueSceneRun, runQueueRunning, runQueueCleanUp]
> else False,
> + ('bb.event.HeartbeatEvent',))
>
> self.rqexe = None
> self.worker = {}
> @@ -1208,9 +1213,6 @@ class RunQueue:
> self.rqdata.init_progress_reporter.next_stage()
> self.rqexe = RunQueueExecuteScenequeue(self)
>
> - if self.state in [runQueueSceneRun, runQueueRunning,
> runQueueCleanUp]:
> - self.dm.check(self)
> -
> if self.state is runQueueSceneRun:
> retval = self.rqexe.execute()
Don't we have to unregister this at some point too? Cooker can persist
across multiple builds (although its not the default).
Cheers,
Richard
^ permalink raw reply [flat|nested] 7+ messages in thread* Re: [PATCH 2/4] runqueue.py: monitor disk space at regular time intervals
2016-11-28 15:58 ` Richard Purdie
@ 2016-11-28 16:30 ` Patrick Ohly
0 siblings, 0 replies; 7+ messages in thread
From: Patrick Ohly @ 2016-11-28 16:30 UTC (permalink / raw)
To: Richard Purdie; +Cc: bitbake-devel
On Mon, 2016-11-28 at 15:58 +0000, Richard Purdie wrote:
> On Mon, 2016-11-28 at 16:15 +0100, Patrick Ohly wrote:
> > # For disk space monitor
> > + # Invoked at regular time intervals via the bitbake
> > heartbeat event
> > + # while the build is running.
> > self.dm = monitordisk.diskMonitor(cfgData)
> > + bb.event.register('_bb_diskmonitor',
> > + lambda x: self.dm.check(self) if
> > self.state in [runQueueSceneRun, runQueueRunning, runQueueCleanUp]
> > else False,
> > + ('bb.event.HeartbeatEvent',))
> >
[...]
> Don't we have to unregister this at some point too? Cooker can persist
> across multiple builds (although its not the default).
Indeed, that case will need further work. I hadn't considered that. What
would be a good point to unregister the event handler? Perhaps the
cleanup code after "if (self.state is runQueueComplete or self.state is
runQueueFailed) and self.rqexe" in _execute_runqueue()?
Just for my understanding, is there guaranteed to be only one RunQueue
instance in the cooker process or could there be more than one at the
same time? Can there be more than one event handler with the same name
(the first parameter of bb.event.register)?
Is the non-default case the memory-resident bitbake?
The buildstats.py code I posted for OE-core has a similar issue. It
assumes that files can be opened when the class gets instantiated and
kept open as long as the process runs.
--
Best Regards, Patrick Ohly
The content of this message is my personal opinion only and although
I am an employee of Intel, the statements I make here in no way
represent Intel's position on the issue, nor am I authorized to speak
on behalf of Intel on this matter.
^ permalink raw reply [flat|nested] 7+ messages in thread
* [PATCH 3/4] monitordisk.py: minor code and comment cleanup
2016-11-28 15:15 [PATCH 0/4] system statistics sampling Patrick Ohly
2016-11-28 15:15 ` [PATCH 1/4] cooker process: fire heartbeat event at regular time intervals Patrick Ohly
2016-11-28 15:15 ` [PATCH 2/4] runqueue.py: monitor disk space " Patrick Ohly
@ 2016-11-28 15:15 ` Patrick Ohly
2016-11-28 15:15 ` [PATCH 4/4] monitordisk: add event Patrick Ohly
3 siblings, 0 replies; 7+ messages in thread
From: Patrick Ohly @ 2016-11-28 15:15 UTC (permalink / raw)
To: bitbake-devel
There's no need to encode and decode the hash key as a single string,
a tuple works just fine. Iterating over entries can be written more
concisely.
Entries in the stat results are integers, not floating point values.
Signed-off-by: Patrick Ohly <patrick.ohly@intel.com>
---
lib/bb/monitordisk.py | 15 ++++++---------
1 file changed, 6 insertions(+), 9 deletions(-)
diff --git a/lib/bb/monitordisk.py b/lib/bb/monitordisk.py
index 203c405..0f9010a 100644
--- a/lib/bb/monitordisk.py
+++ b/lib/bb/monitordisk.py
@@ -129,7 +129,7 @@ def getDiskData(BBDirs, configuration):
bb.utils.mkdirhier(path)
dev = getMountedDev(path)
# Use path/action as the key
- devDict[os.path.join(path, action)] = [dev, minSpace, minInode]
+ devDict[(path, action)] = [dev, minSpace, minInode]
return devDict
@@ -205,16 +205,13 @@ class diskMonitor:
""" Take action for the monitor """
if self.enableMonitor:
- for k in self.devDict:
- path = os.path.dirname(k)
- action = os.path.basename(k)
- dev = self.devDict[k][0]
- minSpace = self.devDict[k][1]
- minInode = self.devDict[k][2]
+ for k, attributes in self.devDict.items():
+ path, action = k
+ dev, minSpace, minInode = attributes
st = os.statvfs(path)
- # The free space, float point number
+ # The available free space, integer number
freeSpace = st.f_bavail * st.f_frsize
if minSpace and freeSpace < minSpace:
@@ -235,7 +232,7 @@ class diskMonitor:
rq.finish_runqueue(True)
bb.event.fire(bb.event.DiskFull(dev, 'disk', freeSpace, path), self.configuration)
- # The free inodes, float point number
+ # The free inodes, integer number
freeInode = st.f_favail
if minInode and freeInode < minInode:
--
2.1.4
^ permalink raw reply related [flat|nested] 7+ messages in thread* [PATCH 4/4] monitordisk: add event
2016-11-28 15:15 [PATCH 0/4] system statistics sampling Patrick Ohly
` (2 preceding siblings ...)
2016-11-28 15:15 ` [PATCH 3/4] monitordisk.py: minor code and comment cleanup Patrick Ohly
@ 2016-11-28 15:15 ` Patrick Ohly
3 siblings, 0 replies; 7+ messages in thread
From: Patrick Ohly @ 2016-11-28 15:15 UTC (permalink / raw)
To: bitbake-devel
The current disk usage is interesting and may be worth logging over
time as part of the build statistics. Instead of re-implementing the
code and the configuration option (BB_DISKMON_DIRS), the information
gathered by monitordisk.py is made available to buildstats.bbclass via
a new event.
This has pros and cons:
- there is already a useful default configuration for "interesting" directories
- no code duplication
- on the other hand, users cannot configure recording separately from
monitoring (probably not that important)
Signed-off-by: Patrick Ohly <patrick.ohly@intel.com>
---
lib/bb/event.py | 17 +++++++++++++++++
lib/bb/monitordisk.py | 8 ++++++++
2 files changed, 25 insertions(+)
diff --git a/lib/bb/event.py b/lib/bb/event.py
index cacbac8..5491914 100644
--- a/lib/bb/event.py
+++ b/lib/bb/event.py
@@ -451,6 +451,23 @@ class DiskFull(Event):
self._free = freespace
self._mountpoint = mountpoint
+class DiskUsageSample:
+ def __init__(self, available_bytes, free_bytes, total_bytes):
+ # Number of bytes available to non-root processes.
+ self.available_bytes = available_bytes
+ # Number of bytes available to root processes.
+ self.free_bytes = free_bytes
+ # Total capacity of the volume.
+ self.total_bytes = total_bytes
+
+class MonitorDiskEvent(Event):
+ """If BB_DISKMON_DIRS is set, then this event gets triggered each time disk space is checked.
+ Provides information about devices that are getting monitored."""
+ def __init__(self, disk_usage):
+ Event.__init__(self)
+ # hash of device root path -> DiskUsageSample
+ self.disk_usage = disk_usage
+
class NoProvider(Event):
"""No Provider for an Event"""
diff --git a/lib/bb/monitordisk.py b/lib/bb/monitordisk.py
index 0f9010a..f3e8193 100644
--- a/lib/bb/monitordisk.py
+++ b/lib/bb/monitordisk.py
@@ -205,6 +205,7 @@ class diskMonitor:
""" Take action for the monitor """
if self.enableMonitor:
+ diskUsage = {}
for k, attributes in self.devDict.items():
path, action = k
dev, minSpace, minInode = attributes
@@ -214,6 +215,11 @@ class diskMonitor:
# The available free space, integer number
freeSpace = st.f_bavail * st.f_frsize
+ # Send all relevant information in the event.
+ freeSpaceRoot = st.f_bfree * st.f_frsize
+ totalSpace = st.f_blocks * st.f_frsize
+ diskUsage[dev] = bb.event.DiskUsageSample(freeSpace, freeSpaceRoot, totalSpace)
+
if minSpace and freeSpace < minSpace:
# Always show warning, the self.checked would always be False if the action is WARN
if self.preFreeS[k] == 0 or self.preFreeS[k] - freeSpace > self.spaceInterval and not self.checked[k]:
@@ -257,4 +263,6 @@ class diskMonitor:
self.checked[k] = True
rq.finish_runqueue(True)
bb.event.fire(bb.event.DiskFull(dev, 'inode', freeInode, path), self.configuration)
+
+ bb.event.fire(bb.event.MonitorDiskEvent(diskUsage), self.configuration)
return
--
2.1.4
^ permalink raw reply related [flat|nested] 7+ messages in thread