From mboxrd@z Thu Jan 1 00:00:00 1970 From: "Rafael J. Wysocki" Subject: Re: [PATCH v2] analyze_suspend.py: new tool for suspend/resume performance optimization Date: Fri, 17 Jan 2014 02:18 +0100 Message-ID: <2954888.old5rRCcAC@vostro.rjw.lan> References: <20140117001820.GB11416@linux.intel.com> Mime-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: 7Bit Return-path: Received: from v094114.home.net.pl ([79.96.170.134]:50666 "HELO v094114.home.net.pl" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with SMTP id S1750898AbaAQBEA (ORCPT ); Thu, 16 Jan 2014 20:04:00 -0500 In-Reply-To: <20140117001820.GB11416@linux.intel.com> Sender: linux-pm-owner@vger.kernel.org List-Id: linux-pm@vger.kernel.org To: todd.e.brandt@linux.intel.com Cc: linux-pm@vger.kernel.org On Thursday, January 16, 2014 04:18:22 PM Todd E Brandt wrote: > Hi Raphael, this is the very latest version. It just includes an > updated zoomable UI and some fixes to the interpreter for modes > other than S3. I've replaced the previous version with this one in linux-next, thanks! > This tool is designed to assist kernel and OS developers in optimizing > their linux stack's suspend/resume time. Using a kernel image built with a > few extra options enabled, the tool will execute a suspend and will > capture dmesg and ftrace data until resume is complete. This data is > transformed into a device timeline and a callgraph to give a quick and > detailed view of which devices and callbacks are taking the most time in > suspend/resume. The output is a single html file which can be viewed in > firefox or chrome. > > More info here: > https://01.org/suspendresume > > Signed-off-by: Todd Brandt > --- > scripts/analyze_suspend.py | 1446 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 1446 insertions(+) > > diff --git a/scripts/analyze_suspend.py b/scripts/analyze_suspend.py > new file mode 100755 > index 0000000..4157043 > --- /dev/null > +++ b/scripts/analyze_suspend.py > @@ -0,0 +1,1446 @@ > +#!/usr/bin/python > +# > +# Tool for analyzing suspend/resume timing > +# Copyright (c) 2013, Intel Corporation. > +# > +# This program is free software; you can redistribute it and/or modify it > +# under the terms and conditions of the GNU General Public License, > +# version 2, as published by the Free Software Foundation. > +# > +# This program is distributed in the hope it will be useful, but WITHOUT > +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or > +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for > +# more details. > +# > +# You should have received a copy of the GNU General Public License along with > +# this program; if not, write to the Free Software Foundation, Inc., > +# 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. > +# > +# Authors: > +# Todd Brandt > +# > +# Description: > +# This tool is designed to assist kernel and OS developers in optimizing > +# their linux stack's suspend/resume time. Using a kernel image built > +# with a few extra options enabled, the tool will execute a suspend and > +# will capture dmesg and ftrace data until resume is complete. This data > +# is transformed into a device timeline and a callgraph to give a quick > +# and detailed view of which devices and callbacks are taking the most > +# time in suspend/resume. The output is a single html file which can be > +# viewed in firefox or chrome. > +# > +# The following kernel build options are required: > +# CONFIG_PM_DEBUG=y > +# CONFIG_PM_SLEEP_DEBUG=y > +# CONFIG_FTRACE=y > +# CONFIG_FUNCTION_TRACER=y > +# CONFIG_FUNCTION_GRAPH_TRACER=y > +# > +# The following additional kernel parameters are required: > +# (e.g. in file /etc/default/grub) > +# GRUB_CMDLINE_LINUX_DEFAULT="... initcall_debug log_buf_len=16M ..." > +# > + > +import sys > +import time > +import os > +import string > +import re > +import array > +import platform > +import datetime > +import struct > + > +# -- classes -- > + > +class SystemValues: > + testdir = "." > + tpath = "/sys/kernel/debug/tracing/" > + mempath = "/dev/mem" > + powerfile = "/sys/power/state" > + suspendmode = "mem" > + prefix = "test" > + teststamp = "" > + dmesgfile = "" > + ftracefile = "" > + htmlfile = "" > + rtcwake = False > + def setOutputFile(self): > + if((self.htmlfile == "") and (self.dmesgfile != "")): > + m = re.match(r"(?P.*)_dmesg\.txt$", self.dmesgfile) > + if(m): > + self.htmlfile = m.group("name")+".html" > + if((self.htmlfile == "") and (self.ftracefile != "")): > + m = re.match(r"(?P.*)_ftrace\.txt$", self.ftracefile) > + if(m): > + self.htmlfile = m.group("name")+".html" > + if(self.htmlfile == ""): > + self.htmlfile = "output.html" > + def initTestOutput(self): > + hostname = platform.node() > + if(hostname != ""): > + self.prefix = hostname > + v = os.popen("cat /proc/version").read().strip() > + kver = string.split(v)[2] > + self.testdir = os.popen("date \"+suspend-%m%d%y-%H%M%S\"").read().strip() > + self.teststamp = "# "+self.testdir+" "+self.prefix+" "+self.suspendmode+" "+kver > + self.dmesgfile = self.testdir+"/"+self.prefix+"_"+self.suspendmode+"_dmesg.txt" > + self.ftracefile = self.testdir+"/"+self.prefix+"_"+self.suspendmode+"_ftrace.txt" > + self.htmlfile = self.testdir+"/"+self.prefix+"_"+self.suspendmode+".html" > + os.mkdir(self.testdir) > + > +class Data: > + altdevname = dict() > + usedmesg = False > + useftrace = False > + notestrun = False > + verbose = False > + phases = [] > + dmesg = {} # root data structure > + start = 0.0 > + end = 0.0 > + stamp = {'time': "", 'host': "", 'mode': ""} > + id = 0 > + tSuspended = 0.0 > + fwValid = False > + fwSuspend = 0 > + fwResume = 0 > + def initialize(self): > + self.dmesg = { # dmesg log data > + 'suspend_general': {'list': dict(), 'start': -1.0, 'end': -1.0, > + 'row': 0, 'color': "#CCFFCC", 'order': 0}, > + 'suspend_early': {'list': dict(), 'start': -1.0, 'end': -1.0, > + 'row': 0, 'color': "green", 'order': 1}, > + 'suspend_noirq': {'list': dict(), 'start': -1.0, 'end': -1.0, > + 'row': 0, 'color': "#00FFFF", 'order': 2}, > + 'suspend_cpu': {'list': dict(), 'start': -1.0, 'end': -1.0, > + 'row': 0, 'color': "blue", 'order': 3}, > + 'resume_cpu': {'list': dict(), 'start': -1.0, 'end': -1.0, > + 'row': 0, 'color': "red", 'order': 4}, > + 'resume_noirq': {'list': dict(), 'start': -1.0, 'end': -1.0, > + 'row': 0, 'color': "orange", 'order': 5}, > + 'resume_early': {'list': dict(), 'start': -1.0, 'end': -1.0, > + 'row': 0, 'color': "yellow", 'order': 6}, > + 'resume_general': {'list': dict(), 'start': -1.0, 'end': -1.0, > + 'row': 0, 'color': "#FFFFCC", 'order': 7} > + } > + self.phases = self.sortedPhases() > + def normalizeTime(self): > + tSus = tRes = self.tSuspended > + if self.fwValid: > + tSus -= -self.fwSuspend / 1000000000.0 > + tRes -= self.fwResume / 1000000000.0 > + self.tSuspended = 0.0 > + self.start -= tSus > + self.end -= tRes > + for phase in self.phases: > + zero = tRes > + if "suspend" in phase: > + zero = tSus > + p = self.dmesg[phase] > + p['start'] -= zero > + p['end'] -= zero > + list = p['list'] > + for name in list: > + d = list[name] > + d['start'] -= zero > + d['end'] -= zero > + if('ftrace' in d): > + cg = d['ftrace'] > + cg.start -= zero > + cg.end -= zero > + for line in cg.list: > + line.time -= zero > + if self.fwValid: > + fws = -self.fwSuspend / 1000000000.0 > + fwr = self.fwResume / 1000000000.0 > + list = dict() > + self.id += 1 > + devid = "dc%d" % self.id > + list["firmware-suspend"] = \ > + {'start': fws, 'end': 0, 'pid': 0, 'par': "", > + 'length': -fws, 'row': 0, 'id': devid }; > + self.id += 1 > + devid = "dc%d" % self.id > + list["firmware-resume"] = \ > + {'start': 0, 'end': fwr, 'pid': 0, 'par': "", > + 'length': fwr, 'row': 0, 'id': devid }; > + self.dmesg['BIOS'] = \ > + {'list': list, 'start': fws, 'end': fwr, > + 'row': 0, 'color': "purple", 'order': 4} > + self.dmesg['resume_cpu']['order'] += 1 > + self.dmesg['resume_noirq']['order'] += 1 > + self.dmesg['resume_early']['order'] += 1 > + self.dmesg['resume_general']['order'] += 1 > + self.phases = self.sortedPhases() > + def vprint(self, msg): > + if(self.verbose): > + print(msg) > + def dmesgSortVal(self, phase): > + return self.dmesg[phase]['order'] > + def sortedPhases(self): > + return sorted(self.dmesg, key=self.dmesgSortVal) > + def sortedDevices(self, phase): > + list = self.dmesg[phase]['list'] > + slist = [] > + tmp = dict() > + for devname in list: > + dev = list[devname] > + tmp[dev['start']] = devname > + for t in sorted(tmp): > + slist.append(tmp[t]) > + return slist > + def fixupInitcalls(self, phase, end): > + # if any calls never returned, clip them at system resume end > + phaselist = self.dmesg[phase]['list'] > + for devname in phaselist: > + dev = phaselist[devname] > + if(dev['end'] < 0): > + dev['end'] = end > + self.vprint("%s (%s): callback didn't return" % (devname, phase)) > + def fixupInitcallsThatDidntReturn(self): > + # if any calls never returned, clip them at system resume end > + for phase in self.phases: > + self.fixupInitcalls(phase, self.dmesg['resume_general']['end']) > + if(phase == "resume_general"): > + break > + def newAction(self, phase, name, pid, parent, start, end): > + self.id += 1 > + devid = "dc%d" % self.id > + list = self.dmesg[phase]['list'] > + length = -1.0 > + if(start >= 0 and end >= 0): > + length = end - start > + list[name] = {'start': start, 'end': end, 'pid': pid, 'par': parent, > + 'length': length, 'row': 0, 'id': devid } > + def deviceIDs(self, devlist, phase): > + idlist = [] > + for p in self.phases: > + if(p[0] != phase[0]): > + continue > + list = data.dmesg[p]['list'] > + for devname in list: > + if devname in devlist: > + idlist.append(list[devname]['id']) > + return idlist > + def deviceParentID(self, devname, phase): > + pdev = "" > + pdevid = "" > + for p in self.phases: > + if(p[0] != phase[0]): > + continue > + list = data.dmesg[p]['list'] > + if devname in list: > + pdev = list[devname]['par'] > + for p in self.phases: > + if(p[0] != phase[0]): > + continue > + list = data.dmesg[p]['list'] > + if pdev in list: > + return list[pdev]['id'] > + return pdev > + def deviceChildrenIDs(self, devname, phase): > + devlist = [] > + for p in self.phases: > + if(p[0] != phase[0]): > + continue > + list = data.dmesg[p]['list'] > + for child in list: > + if(list[child]['par'] == devname): > + devlist.append(child) > + return self.deviceIDs(devlist, phase) > + > +class FTraceLine: > + time = 0.0 > + length = 0.0 > + fcall = False > + freturn = False > + fevent = False > + depth = 0 > + name = "" > + def __init__(self, t, m, d): > + self.time = float(t) > + # check to see if this is a trace event > + em = re.match(r"^ *\/\* *(?P.*) \*\/ *$", m) > + if(em): > + self.name = em.group("msg") > + self.fevent = True > + return > + # convert the duration to seconds > + if(d): > + self.length = float(d)/1000000 > + # the indentation determines the depth > + match = re.match(r"^(?P *)(?P.*)$", m) > + if(not match): > + return > + self.depth = self.getDepth(match.group('d')) > + m = match.group('o') > + # function return > + if(m[0] == '}'): > + self.freturn = True > + if(len(m) > 1): > + # includes comment with function name > + match = re.match(r"^} *\/\* *(?P.*) *\*\/$", m) > + if(match): > + self.name = match.group('n') > + # function call > + else: > + self.fcall = True > + # function call with children > + if(m[-1] == '{'): > + match = re.match(r"^(?P.*) *\(.*", m) > + if(match): > + self.name = match.group('n') > + # function call with no children (leaf) > + elif(m[-1] == ';'): > + self.freturn = True > + match = re.match(r"^(?P.*) *\(.*", m) > + if(match): > + self.name = match.group('n') > + # something else (possibly a trace marker) > + else: > + self.name = m > + def getDepth(self, str): > + return len(str)/2 > + > +class FTraceCallGraph: > + start = -1.0 > + end = -1.0 > + list = [] > + invalid = False > + depth = 0 > + def __init__(self): > + self.start = -1.0 > + self.end = -1.0 > + self.list = [] > + self.depth = 0 > + def setDepth(self, line): > + if(line.fcall and not line.freturn): > + line.depth = self.depth > + self.depth += 1 > + elif(line.freturn and not line.fcall): > + self.depth -= 1 > + line.depth = self.depth > + else: > + line.depth = self.depth > + def addLine(self, line, match): > + if(not self.invalid): > + self.setDepth(line) > + if(line.depth == 0 and line.freturn): > + self.end = line.time > + self.list.append(line) > + return True > + if(self.invalid): > + return False > + if(len(self.list) >= 1000000 or self.depth < 0): > + first = self.list[0] > + self.list = [] > + self.list.append(first) > + self.invalid = True > + id = "task %s cpu %s" % (match.group("pid"), match.group("cpu")) > + window = "(%f - %f)" % (self.start, line.time) > + data.vprint("Too much data for "+id+" "+window+", ignoring this callback") > + return False > + self.list.append(line) > + if(self.start < 0): > + self.start = line.time > + return False > + def sanityCheck(self): > + stack = dict() > + cnt = 0 > + for l in self.list: > + if(l.fcall and not l.freturn): > + stack[l.depth] = l > + cnt += 1 > + elif(l.freturn and not l.fcall): > + if(not stack[l.depth]): > + return False > + stack[l.depth].length = l.length > + stack[l.depth] = 0 > + l.length = 0 > + cnt -= 1 > + if(cnt == 0): > + return True > + return False > + def debugPrint(self, filename): > + if(filename == "stdout"): > + print("[%f - %f]") % (self.start, self.end) > + for l in self.list: > + if(l.freturn and l.fcall): > + print("%f (%02d): %s(); (%.3f us)" % (l.time, l.depth, l.name, l.length*1000000)) > + elif(l.freturn): > + print("%f (%02d): %s} (%.3f us)" % (l.time, l.depth, l.name, l.length*1000000)) > + else: > + print("%f (%02d): %s() { (%.3f us)" % (l.time, l.depth, l.name, l.length*1000000)) > + print(" ") > + else: > + fp = open(filename, 'w') > + print(filename) > + for l in self.list: > + if(l.freturn and l.fcall): > + fp.write("%f (%02d): %s(); (%.3f us)\n" % (l.time, l.depth, l.name, l.length*1000000)) > + elif(l.freturn): > + fp.write("%f (%02d): %s} (%.3f us)\n" % (l.time, l.depth, l.name, l.length*1000000)) > + else: > + fp.write("%f (%02d): %s() { (%.3f us)\n" % (l.time, l.depth, l.name, l.length*1000000)) > + fp.close() > + > +class Timeline: > + html = {} > + scaleH = 0.0 # height of the timescale row as a percent of the timeline height > + rowH = 0.0 # height of each row in percent of the timeline height > + row_height_pixels = 30 > + maxrows = 0 > + height = 0 > + def __init__(self): > + self.html = { > + 'timeline': "", > + 'legend': "", > + 'scale': "" > + } > + def setRows(self, rows): > + self.maxrows = int(rows) > + self.scaleH = 100.0/float(self.maxrows) > + self.height = self.maxrows*self.row_height_pixels > + r = float(self.maxrows - 1) > + if(r < 1.0): > + r = 1.0 > + self.rowH = (100.0 - self.scaleH)/r > + > +# -- global objects -- > + > +sysvals = SystemValues() > +data = Data() > + > +# -- functions -- > + > +# Function: initFtrace > +# Description: > +# Configure ftrace to capture a function trace during suspend/resume > +def initFtrace(): > + global sysvals > + > + print("INITIALIZING FTRACE...") > + # turn trace off > + os.system("echo 0 > "+sysvals.tpath+"tracing_on") > + # set the trace clock to global > + os.system("echo global > "+sysvals.tpath+"trace_clock") > + # set trace buffer to a huge value > + os.system("echo nop > "+sysvals.tpath+"current_tracer") > + os.system("echo 100000 > "+sysvals.tpath+"buffer_size_kb") > + # clear the trace buffer > + os.system("echo \"\" > "+sysvals.tpath+"trace") > + # set trace type > + os.system("echo function_graph > "+sysvals.tpath+"current_tracer") > + os.system("echo \"\" > "+sysvals.tpath+"set_ftrace_filter") > + # set trace format options > + os.system("echo funcgraph-abstime > "+sysvals.tpath+"trace_options") > + os.system("echo funcgraph-proc > "+sysvals.tpath+"trace_options") > + # focus only on device suspend and resume > + os.system("cat "+sysvals.tpath+"available_filter_functions | grep dpm_run_callback > "+sysvals.tpath+"set_graph_function") > + > +# Function: verifyFtrace > +# Description: > +# Check that ftrace is working on the system > +def verifyFtrace(): > + global sysvals > + files = ["available_filter_functions", "buffer_size_kb", > + "current_tracer", "set_ftrace_filter", > + "trace", "trace_marker"] > + for f in files: > + if(os.path.exists(sysvals.tpath+f) == False): > + return False > + return True > + > +def parseStamp(line): > + global data, sysvals > + stampfmt = r"# suspend-(?P[0-9]{2})(?P[0-9]{2})(?P[0-9]{2})-"+\ > + "(?P[0-9]{2})(?P[0-9]{2})(?P[0-9]{2})"+\ > + " (?P.*) (?P.*) (?P.*)$" > + m = re.match(stampfmt, line) > + if(m): > + dt = datetime.datetime(int(m.group("y"))+2000, int(m.group("m")), > + int(m.group("d")), int(m.group("H")), int(m.group("M")), > + int(m.group("S"))) > + data.stamp['time'] = dt.strftime("%B %d %Y, %I:%M:%S %p") > + data.stamp['host'] = m.group("host") > + data.stamp['mode'] = m.group("mode") > + data.stamp['kernel'] = m.group("kernel") > + sysvals.suspendmode = data.stamp['mode'] > + > +# Function: analyzeTraceLog > +# Description: > +# Analyse an ftrace log output file generated from this app during > +# the execution phase. Create an "ftrace" structure in memory for > +# subsequent formatting in the html output file > +def analyzeTraceLog(): > + global sysvals, data > + > + # the ftrace data is tied to the dmesg data > + if(not data.usedmesg): > + return > + > + # read through the ftrace and parse the data > + data.vprint("Analyzing the ftrace data...") > + ftrace_line_fmt = r"^ *(?P