* [Powertop] [PATCH 4/4] Add support for Intel GPU statistics
@ 2012-08-05 17:14 Arjan van de Ven
0 siblings, 0 replies; 4+ messages in thread
From: Arjan van de Ven @ 2012-08-05 17:14 UTC (permalink / raw)
To: powertop
[-- Attachment #1: Type: text/plain, Size: 7483 bytes --]
>From 7bc1ffe1c7d6793bafb9a51d52f797bf51911097 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan(a)linux.intel.com>
Date: Sun, 5 Aug 2012 10:04:02 -0700
Subject: [PATCH 4/4] Add support for Intel GPU statistics
As of the 3.5 kernel, the Intel GPUs report their C states (power gating) via sysfs.
This patch will show them as part of the C state tab, arranged like a core inside package 0
(which matches physical topology)
---
src/Makefile.am | 2 +-
src/cpu/cpu.cpp | 38 ++++++++++++++++
src/cpu/intel_cpus.h | 23 ++++++++++
src/cpu/intel_gpu.cpp | 116 +++++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 178 insertions(+), 1 deletions(-)
create mode 100644 src/cpu/intel_gpu.cpp
diff --git a/src/Makefile.am b/src/Makefile.am
index d0976fd..d233d85 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -30,7 +30,7 @@ powertop_SOURCES = parameters/persistent.cpp parameters/learn.cpp parameters/par
calibrate/calibrate.h measurement/measurement.cpp measurement/power_supply.cpp \
measurement/measurement.h measurement/acpi.cpp measurement/sysfs.h measurement/sysfs.cpp \
measurement/acpi.h measurement/extech.cpp measurement/power_supply.h measurement/extech.h \
- main.cpp css.h powertop.css
+ main.cpp css.h powertop.css cpu/intel_gpu.cpp
powertop_CXXFLAGS = -fno-omit-frame-pointer -fstack-protector -Wall -Wshadow -Wformat $(NCURSES_CFLAGS) $(PCIUTILS_CFLAGS) $(LIBNL_CFLAGS) $(GLIB2_CFLAGS)
diff --git a/src/cpu/cpu.cpp b/src/cpu/cpu.cpp
index 63e48ee..4e32482 100644
--- a/src/cpu/cpu.cpp
+++ b/src/cpu/cpu.cpp
@@ -123,6 +123,16 @@ static class abstract_cpu * new_core(int core, int cpu, char * vendor, int famil
return ret;
}
+static class abstract_cpu * new_i965_gpu(void)
+{
+ class abstract_cpu *ret = NULL;
+
+ ret = new class i965_core;
+ ret->childcount = 0;
+
+ return ret;
+}
+
static class abstract_cpu * new_cpu(int number, char * vendor, int family, int model)
{
class abstract_cpu * ret = NULL;
@@ -218,6 +228,27 @@ static void handle_one_cpu(unsigned int number, char *vendor, int family, int mo
all_cpus[number] = cpu;
}
+static void handle_i965_gpu(void)
+{
+ ifstream file;
+ unsigned int core_number = 0;
+ class abstract_cpu *package;
+
+
+ package = system_level.children[0];
+
+ core_number = package->children.size();
+
+ if (package->children.size() <= core_number)
+ package->children.resize(core_number + 1, NULL);
+
+ if (!package->children[core_number]) {
+ package->children[core_number] = new_i965_gpu();
+ package->childcount++;
+ }
+}
+
+
void enumerate_cpus(void)
{
ifstream file;
@@ -288,6 +319,13 @@ void enumerate_cpus(void)
file.close();
+ file.open("/sys/class/drm/card0/power/rc6_residency_ms", ios::in);
+
+ if (file) {
+ handle_i965_gpu();
+ file.close();
+ }
+
perf_events = new perf_power_bundle();
if (!perf_events->add_event("power:cpu_idle")){
diff --git a/src/cpu/intel_cpus.h b/src/cpu/intel_cpus.h
index b69c5c6..1949af1 100644
--- a/src/cpu/intel_cpus.h
+++ b/src/cpu/intel_cpus.h
@@ -137,3 +137,26 @@ public:
extern int has_c2c7_res;
+
+class i965_core: public cpu_core
+{
+private:
+ uint64_t rc6_before, rc6_after;
+ uint64_t rc6p_before, rc6p_after;
+ uint64_t rc6pp_before, rc6pp_after;
+
+ struct timeval before;
+ struct timeval after;
+
+public:
+ virtual void measurement_start(void);
+ virtual void measurement_end(void);
+ virtual int can_collapse(void) { return 0;};
+
+ virtual char * fill_pstate_line(int line_nr, char *buffer);
+ virtual char * fill_pstate_name(int line_nr, char *buffer);
+ virtual char * fill_cstate_line(int line_nr, char *buffer, const char *separator);
+ virtual int has_pstate_level(int level) { return 0; };
+ virtual int has_pstates(void) { return 0; };
+
+};
diff --git a/src/cpu/intel_gpu.cpp b/src/cpu/intel_gpu.cpp
new file mode 100644
index 0000000..77a7051
--- /dev/null
+++ b/src/cpu/intel_gpu.cpp
@@ -0,0 +1,116 @@
+/*
+ * Copyright 2012, Intel Corporation
+ *
+ * This file is part of PowerTOP
+ *
+ * This program file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program in a file named COPYING; if not, write to the
+ * Free Software Foundation, Inc,
+ * 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301 USA
+ * or just google for it.
+ *
+ * Authors:
+ * Arjan van de Ven <arjan(a)linux.intel.com>
+ */
+#include "cpu.h"
+#include <iostream>
+#include <fstream>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/time.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+
+#include "../lib.h"
+#include "../parameters/parameters.h"
+#include "../display.h"
+
+void i965_core::measurement_start(void)
+{
+ ifstream file;
+
+ gettimeofday(&before, NULL);
+ rc6_before = read_sysfs("/sys/class/drm/card0/power/rc6_residency_ms", NULL);
+ rc6p_before = read_sysfs("/sys/class/drm/card0/power/rc6p_residency_ms", NULL);
+ rc6pp_before = read_sysfs("/sys/class/drm/card0/power/rc6pp_residency_ms", NULL);
+
+ update_cstate("gpu c0", "Active", 0, 0, 1, 0);
+ update_cstate("gpu rc6", "RC6", 0, rc6_before, 1, 1);
+ update_cstate("gpu rc6p", "RC6p", 0, rc6p_before, 1, 2);
+ update_cstate("gpu rc6pp", "RC6pp", 0, rc6pp_before, 1, 3);
+}
+
+char * i965_core::fill_cstate_line(int line_nr, char *buffer, const char *separator)
+{
+ buffer[0] = 0;
+ double ratio, d = -1.0, time_delta;
+
+ if (line_nr == LEVEL_HEADER) {
+ sprintf(buffer,_(" GPU "));
+ return buffer;
+ }
+
+ buffer[0] = 0;
+
+ time_delta = 1000000 * (after.tv_sec - before.tv_sec) + after.tv_usec - before.tv_usec;
+ ratio = 100000.0/time_delta;
+
+ if (line_nr == 0)
+ d = 100.0 - ratio * (rc6_after + rc6p_after + rc6pp_after - rc6_before - rc6p_before - rc6pp_before);
+ if (line_nr == 1)
+ d = ratio * (rc6_after - rc6_before);
+ if (line_nr == 2)
+ d = ratio * (rc6p_after - rc6p_before);
+ if (line_nr == 3)
+ d = ratio * (rc6pp_after - rc6pp_before);
+ if (line_nr >= 4 || line_nr < 0)
+ return buffer;
+
+ /* cope with rounding errors due to the measurement interval */
+ if (d < 0.0)
+ d = 0.0;
+ if (d > 100.0)
+ d = 100.0;
+
+ sprintf(buffer,"%5.1f%%", d);
+
+ return buffer;
+}
+
+
+void i965_core::measurement_end(void)
+{
+ gettimeofday(&after, NULL);
+
+ rc6_after = read_sysfs("/sys/class/drm/card0/power/rc6_residency_ms", NULL);
+ rc6p_after = read_sysfs("/sys/class/drm/card0/power/rc6p_residency_ms", NULL);
+ rc6pp_after = read_sysfs("/sys/class/drm/card0/power/rc6pp_residency_ms", NULL);
+}
+
+char * i965_core::fill_pstate_line(int line_nr, char *buffer)
+{
+ buffer[0] = 0;
+ return buffer;
+}
+
+char * i965_core::fill_pstate_name(int line_nr, char *buffer)
+{
+ buffer[0] = 0;
+ return buffer;
+}
+
--
1.7.7.6
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [Powertop] [PATCH 4/4] Add support for Intel GPU statistics
@ 2012-08-05 17:32 Sergey Senozhatsky
0 siblings, 0 replies; 4+ messages in thread
From: Sergey Senozhatsky @ 2012-08-05 17:32 UTC (permalink / raw)
To: powertop
[-- Attachment #1: Type: text/plain, Size: 6596 bytes --]
Hello,
On (08/05/12 10:14), Arjan van de Ven wrote:
>
> +static void handle_i965_gpu(void)
> +{
> + ifstream file;
seems to be unused
> + unsigned int core_number = 0;
> + class abstract_cpu *package;
> +
> +
> + package = system_level.children[0];
> +
> + core_number = package->children.size();
> +
> + if (package->children.size() <= core_number)
> + package->children.resize(core_number + 1, NULL);
> +
> + if (!package->children[core_number]) {
> + package->children[core_number] = new_i965_gpu();
> + package->childcount++;
> + }
> +}
> +
> +
> void enumerate_cpus(void)
> {
> ifstream file;
> @@ -288,6 +319,13 @@ void enumerate_cpus(void)
>
> file.close();
>
> + file.open("/sys/class/drm/card0/power/rc6_residency_ms", ios::in);
> +
> + if (file) {
> + handle_i965_gpu();
> + file.close();
> + }
> +
side note: just wonder how much C++ stream with its heavy buffering, etc. is slower
than stat(). I'll review and if it makes sense will probably prepare simple stat()
wrapper to lib.cpp
> perf_events = new perf_power_bundle();
>
> if (!perf_events->add_event("power:cpu_idle")){
> diff --git a/src/cpu/intel_cpus.h b/src/cpu/intel_cpus.h
> index b69c5c6..1949af1 100644
> --- a/src/cpu/intel_cpus.h
> +++ b/src/cpu/intel_cpus.h
> @@ -137,3 +137,26 @@ public:
>
>
> extern int has_c2c7_res;
> +
> +class i965_core: public cpu_core
> +{
> +private:
> + uint64_t rc6_before, rc6_after;
> + uint64_t rc6p_before, rc6p_after;
> + uint64_t rc6pp_before, rc6pp_after;
> +
> + struct timeval before;
> + struct timeval after;
> +
> +public:
> + virtual void measurement_start(void);
> + virtual void measurement_end(void);
> + virtual int can_collapse(void) { return 0;};
> +
> + virtual char * fill_pstate_line(int line_nr, char *buffer);
> + virtual char * fill_pstate_name(int line_nr, char *buffer);
> + virtual char * fill_cstate_line(int line_nr, char *buffer, const char *separator);
> + virtual int has_pstate_level(int level) { return 0; };
> + virtual int has_pstates(void) { return 0; };
> +
> +};
> diff --git a/src/cpu/intel_gpu.cpp b/src/cpu/intel_gpu.cpp
> new file mode 100644
> index 0000000..77a7051
> --- /dev/null
> +++ b/src/cpu/intel_gpu.cpp
> @@ -0,0 +1,116 @@
> +/*
> + * Copyright 2012, Intel Corporation
> + *
> + * This file is part of PowerTOP
> + *
> + * This program file is free software; you can redistribute it and/or modify it
> + * under the terms of the GNU General Public License as published by the
> + * Free Software Foundation; version 2 of the License.
> + *
> + * This program is distributed in the hope that it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
> + * for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program in a file named COPYING; if not, write to the
> + * Free Software Foundation, Inc,
> + * 51 Franklin Street, Fifth Floor,
> + * Boston, MA 02110-1301 USA
> + * or just google for it.
> + *
> + * Authors:
> + * Arjan van de Ven <arjan(a)linux.intel.com>
> + */
> +#include "cpu.h"
> +#include <iostream>
> +#include <fstream>
> +#include <stdlib.h>
> +#include <stdint.h>
> +#include <stdio.h>
> +#include <sys/types.h>
> +#include <sys/stat.h>
> +#include <fcntl.h>
> +#include <sys/time.h>
> +#include <string.h>
> +#include <errno.h>
> +#include <unistd.h>
> +
> +#include "../lib.h"
> +#include "../parameters/parameters.h"
> +#include "../display.h"
> +
> +void i965_core::measurement_start(void)
> +{
> + ifstream file;
> +
> + gettimeofday(&before, NULL);
> + rc6_before = read_sysfs("/sys/class/drm/card0/power/rc6_residency_ms", NULL);
> + rc6p_before = read_sysfs("/sys/class/drm/card0/power/rc6p_residency_ms", NULL);
> + rc6pp_before = read_sysfs("/sys/class/drm/card0/power/rc6pp_residency_ms", NULL);
> +
> + update_cstate("gpu c0", "Active", 0, 0, 1, 0);
> + update_cstate("gpu rc6", "RC6", 0, rc6_before, 1, 1);
> + update_cstate("gpu rc6p", "RC6p", 0, rc6p_before, 1, 2);
> + update_cstate("gpu rc6pp", "RC6pp", 0, rc6pp_before, 1, 3);
> +}
> +
> +char * i965_core::fill_cstate_line(int line_nr, char *buffer, const char *separator)
> +{
> + buffer[0] = 0;
> + double ratio, d = -1.0, time_delta;
> +
> + if (line_nr == LEVEL_HEADER) {
> + sprintf(buffer,_(" GPU "));
> + return buffer;
> + }
> +
> + buffer[0] = 0;
> +
> + time_delta = 1000000 * (after.tv_sec - before.tv_sec) + after.tv_usec - before.tv_usec;
> + ratio = 100000.0/time_delta;
> +
> + if (line_nr == 0)
> + d = 100.0 - ratio * (rc6_after + rc6p_after + rc6pp_after - rc6_before - rc6p_before - rc6pp_before);
> + if (line_nr == 1)
> + d = ratio * (rc6_after - rc6_before);
> + if (line_nr == 2)
> + d = ratio * (rc6p_after - rc6p_before);
> + if (line_nr == 3)
> + d = ratio * (rc6pp_after - rc6pp_before);
> + if (line_nr >= 4 || line_nr < 0)
> + return buffer;
> +
small side note /* someone will do it anyway :-) */:
how about
if (line_nr == 0)
d = 100.0 - ratio * (rc6_after + rc6p_after + rc6pp_after - rc6_before - rc6p_before - rc6pp_before);
else if (line_nr == 1)
d = ratio * (rc6_after - rc6_before);
else if (line_nr == 2)
d = ratio * (rc6p_after - rc6p_before);
else if (line_nr == 3)
d = ratio * (rc6pp_after - rc6pp_before);
else if (line_nr >= 4 || line_nr < 0)
return buffer;
> + /* cope with rounding errors due to the measurement interval */
> + if (d < 0.0)
> + d = 0.0;
> + if (d > 100.0)
> + d = 100.0;
> +
> + sprintf(buffer,"%5.1f%%", d);
> +
> + return buffer;
> +}
> +
> +
> +void i965_core::measurement_end(void)
> +{
> + gettimeofday(&after, NULL);
> +
> + rc6_after = read_sysfs("/sys/class/drm/card0/power/rc6_residency_ms", NULL);
> + rc6p_after = read_sysfs("/sys/class/drm/card0/power/rc6p_residency_ms", NULL);
> + rc6pp_after = read_sysfs("/sys/class/drm/card0/power/rc6pp_residency_ms", NULL);
> +}
> +
> +char * i965_core::fill_pstate_line(int line_nr, char *buffer)
> +{
> + buffer[0] = 0;
> + return buffer;
> +}
> +
> +char * i965_core::fill_pstate_name(int line_nr, char *buffer)
> +{
> + buffer[0] = 0;
> + return buffer;
> +}
> +
> --
> 1.7.7.6
>
> _______________________________________________
> PowerTop mailing list
> PowerTop(a)lists.01.org
> https://lists.01.org/mailman/listinfo/powertop
>
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [Powertop] [PATCH 4/4] Add support for Intel GPU statistics
@ 2012-08-05 17:37 Arjan van de Ven
0 siblings, 0 replies; 4+ messages in thread
From: Arjan van de Ven @ 2012-08-05 17:37 UTC (permalink / raw)
To: powertop
[-- Attachment #1: Type: text/plain, Size: 1673 bytes --]
>>
>> + file.open("/sys/class/drm/card0/power/rc6_residency_ms", ios::in);
>> +
>> + if (file) {
>> + handle_i965_gpu();
>> + file.close();
>> + }
>> +
>
> side note: just wonder how much C++ stream with its heavy buffering, etc. is slower
> than stat(). I'll review and if it makes sense will probably prepare simple stat()
> wrapper to lib.cpp
better to use access() than stat.
both have the fun of getting a bunch of low level system headers into C++
I'm sure that'll work most of the time, but for something as non-time critical as this
I wonder how badly that is inviting trouble.
>> +
>> + if (line_nr == 0)
>> + d = 100.0 - ratio * (rc6_after + rc6p_after + rc6pp_after - rc6_before - rc6p_before - rc6pp_before);
>> + if (line_nr == 1)
>> + d = ratio * (rc6_after - rc6_before);
>> + if (line_nr == 2)
>> + d = ratio * (rc6p_after - rc6p_before);
>> + if (line_nr == 3)
>> + d = ratio * (rc6pp_after - rc6pp_before);
>> + if (line_nr >= 4 || line_nr < 0)
>> + return buffer;
>> +
>
> small side note /* someone will do it anyway :-) */:
> how about
>
> if (line_nr == 0)
> d = 100.0 - ratio * (rc6_after + rc6p_after + rc6pp_after - rc6_before - rc6p_before - rc6pp_before);
> else if (line_nr == 1)
> d = ratio * (rc6_after - rc6_before);
> else if (line_nr == 2)
> d = ratio * (rc6p_after - rc6p_before);
> else if (line_nr == 3)
> d = ratio * (rc6pp_after - rc6pp_before);
> else if (line_nr >= 4 || line_nr < 0)
> return buffer;
well last time I looked at the disassembly for such a case, this generated worse code.
I'll admit that that was about 2 gcc versions ago though.
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [Powertop] [PATCH 4/4] Add support for Intel GPU statistics
@ 2012-08-05 18:02 Sergey Senozhatsky
0 siblings, 0 replies; 4+ messages in thread
From: Sergey Senozhatsky @ 2012-08-05 18:02 UTC (permalink / raw)
To: powertop
[-- Attachment #1: Type: text/plain, Size: 7702 bytes --]
On (08/05/12 10:37), Arjan van de Ven wrote:
> >>
> >> + file.open("/sys/class/drm/card0/power/rc6_residency_ms", ios::in);
> >> +
> >> + if (file) {
> >> + handle_i965_gpu();
> >> + file.close();
> >> + }
> >> +
> >
> > side note: just wonder how much C++ stream with its heavy buffering, etc. is slower
> > than stat(). I'll review and if it makes sense will probably prepare simple stat()
> > wrapper to lib.cpp
>
> better to use access() than stat.
>
> both have the fun of getting a bunch of low level system headers into C++
> I'm sure that'll work most of the time, but for something as non-time critical as this
> I wonder how badly that is inviting trouble.
>
>
good point, that was just a small nit. I'm ok with one time fstream call.
> >> +
> >> + if (line_nr == 0)
> >> + d = 100.0 - ratio * (rc6_after + rc6p_after + rc6pp_after - rc6_before - rc6p_before - rc6pp_before);
> >> + if (line_nr == 1)
> >> + d = ratio * (rc6_after - rc6_before);
> >> + if (line_nr == 2)
> >> + d = ratio * (rc6p_after - rc6p_before);
> >> + if (line_nr == 3)
> >> + d = ratio * (rc6pp_after - rc6pp_before);
> >> + if (line_nr >= 4 || line_nr < 0)
> >> + return buffer;
> >> +
> >
> > small side note /* someone will do it anyway :-) */:
> > how about
> >
> > if (line_nr == 0)
> > d = 100.0 - ratio * (rc6_after + rc6p_after + rc6pp_after - rc6_before - rc6p_before - rc6pp_before);
> > else if (line_nr == 1)
> > d = ratio * (rc6_after - rc6_before);
> > else if (line_nr == 2)
> > d = ratio * (rc6p_after - rc6p_before);
> > else if (line_nr == 3)
> > d = ratio * (rc6pp_after - rc6pp_before);
> > else if (line_nr >= 4 || line_nr < 0)
> > return buffer;
>
> well last time I looked at the disassembly for such a case, this generated worse code.
> I'll admit that that was about 2 gcc versions ago though.
>
it's hard to tell nowadays, when GCC is free (and able) to generate even several functions (not in this case,
of course) each of those will be specified for known parameter value: e.g. foo.part.1(), foo.part.2(), foo.part.3()
for foo(1), foo(2) and foo(3) calls.
on my host (tested on dummy foo() routine) it's something like this (just for note)
gcc version 4.7.1 20120721
-O2
if ()
if ()
if ()
Dump of assembler code for function foo:
0x00000000004006b0 <+0>: cmp $0x1,%edi
0x00000000004006b3 <+3>: je 0x4006d8 <foo+40>
0x00000000004006b5 <+5>: cmp $0x2,%edi
0x00000000004006b8 <+8>: je 0x4006d8 <foo+40>
0x00000000004006ba <+10>: cmp $0x3,%edi
0x00000000004006bd <+13>: jne 0x4006f0 <foo+64>
0x00000000004006bf <+15>: mov %esi,%eax
0x00000000004006c1 <+17>: mov $0x4007bf,%edi
0x00000000004006c6 <+22>: imul %esi,%eax
0x00000000004006c9 <+25>: imul %esi,%eax
0x00000000004006cc <+28>: mov %eax,%esi
0x00000000004006ce <+30>: xor %eax,%eax
0x00000000004006d0 <+32>: jmpq 0x4004a0 <printf(a)plt>
0x00000000004006d5 <+37>: nopl (%rax)
0x00000000004006d8 <+40>: mov %esi,%eax
0x00000000004006da <+42>: imul %esi,%eax
0x00000000004006dd <+45>: mov %eax,%esi
0x00000000004006df <+47>: mov $0x4007bf,%edi
0x00000000004006e4 <+52>: xor %eax,%eax
0x00000000004006e6 <+54>: jmpq 0x4004a0 <printf(a)plt>
0x00000000004006eb <+59>: nopl 0x0(%rax,%rax,1)
0x00000000004006f0 <+64>: jbe 0x400706 <foo+86>
0x00000000004006f2 <+66>: lea (%rsi,%rsi,1),%eax
0x00000000004006f5 <+69>: mov $0x4007bf,%edi
0x00000000004006fa <+74>: imul %esi,%eax
0x00000000004006fd <+77>: mov %eax,%esi
0x00000000004006ff <+79>: xor %eax,%eax
0x0000000000400701 <+81>: jmpq 0x4004a0 <printf(a)plt>
0x0000000000400706 <+86>: xor %eax,%eax
0x0000000000400708 <+88>: jmp 0x4006dd <foo+45>
if ()
else if ()
else if ()
[..]
Dump of assembler code for function foo:
0x00000000004006b0 <+0>: xor %eax,%eax
0x00000000004006b2 <+2>: test %edi,%edi
0x00000000004006b4 <+4>: je 0x4006cd <foo+29>
0x00000000004006b6 <+6>: cmp $0x1,%edi
0x00000000004006b9 <+9>: je 0x4006e0 <foo+48>
0x00000000004006bb <+11>: cmp $0x2,%edi
0x00000000004006be <+14>: je 0x4006e0 <foo+48>
0x00000000004006c0 <+16>: cmp $0x3,%edi
0x00000000004006c3 <+19>: je 0x4006f8 <foo+72>
0x00000000004006c5 <+21>: jbe 0x4006cd <foo+29>
0x00000000004006c7 <+23>: lea (%rsi,%rsi,1),%eax
0x00000000004006ca <+26>: imul %esi,%eax
0x00000000004006cd <+29>: mov %eax,%esi
0x00000000004006cf <+31>: mov $0x4007bf,%edi
0x00000000004006d4 <+36>: xor %eax,%eax
0x00000000004006d6 <+38>: jmpq 0x4004a0 <printf(a)plt>
0x00000000004006db <+43>: nopl 0x0(%rax,%rax,1)
0x00000000004006e0 <+48>: mov %esi,%eax
0x00000000004006e2 <+50>: mov $0x4007bf,%edi
0x00000000004006e7 <+55>: imul %esi,%eax
0x00000000004006ea <+58>: mov %eax,%esi
0x00000000004006ec <+60>: xor %eax,%eax
0x00000000004006ee <+62>: jmpq 0x4004a0 <printf(a)plt>
0x00000000004006f3 <+67>: nopl 0x0(%rax,%rax,1)
0x00000000004006f8 <+72>: mov %esi,%eax
0x00000000004006fa <+74>: imul %esi,%eax
0x00000000004006fd <+77>: imul %esi,%eax
0x0000000000400700 <+80>: jmp 0x4006cd <foo+29>
both are not perfect, comparing to -Os case
if ()
if ()
if ()
Dump of assembler code for function foo:
0x0000000000400691 <+0>: cmp $0x1,%edi
0x0000000000400694 <+3>: jne 0x40069d <foo+12>
0x0000000000400696 <+5>: mov %esi,%eax
0x0000000000400698 <+7>: imul %esi,%eax
0x000000000040069b <+10>: jmp 0x4006b0 <foo+31>
0x000000000040069d <+12>: cmp $0x2,%edi
0x00000000004006a0 <+15>: je 0x400696 <foo+5>
0x00000000004006a2 <+17>: cmp $0x3,%edi
0x00000000004006a5 <+20>: jne 0x4006ae <foo+29>
0x00000000004006a7 <+22>: mov %esi,%eax
0x00000000004006a9 <+24>: imul %esi,%eax
0x00000000004006ac <+27>: jmp 0x4006b8 <foo+39>
0x00000000004006ae <+29>: xor %eax,%eax
0x00000000004006b0 <+31>: cmp $0x3,%edi
0x00000000004006b3 <+34>: jbe 0x4006bb <foo+42>
0x00000000004006b5 <+36>: lea (%rsi,%rsi,1),%eax
0x00000000004006b8 <+39>: imul %esi,%eax
0x00000000004006bb <+42>: mov %eax,%esi
0x00000000004006bd <+44>: mov $0x40077f,%edi
0x00000000004006c2 <+49>: xor %eax,%eax
0x00000000004006c4 <+51>: jmpq 0x4004a0 <printf(a)plt>
if ()
else if ()
else if ()
Dump of assembler code for function foo:
0x0000000000400691 <+0>: xor %eax,%eax
0x0000000000400693 <+2>: test %edi,%edi
0x0000000000400695 <+4>: je 0x4006b9 <foo+40>
0x0000000000400697 <+6>: cmp $0x1,%edi
0x000000000040069a <+9>: jne 0x4006a0 <foo+15>
0x000000000040069c <+11>: mov %esi,%eax
0x000000000040069e <+13>: jmp 0x4006b6 <foo+37>
0x00000000004006a0 <+15>: cmp $0x2,%edi
0x00000000004006a3 <+18>: je 0x40069c <foo+11>
0x00000000004006a5 <+20>: cmp $0x3,%edi
0x00000000004006a8 <+23>: jne 0x4006b1 <foo+32>
0x00000000004006aa <+25>: mov %esi,%eax
0x00000000004006ac <+27>: imul %esi,%eax
0x00000000004006af <+30>: jmp 0x4006b6 <foo+37>
0x00000000004006b1 <+32>: jbe 0x4006b9 <foo+40>
0x00000000004006b3 <+34>: lea (%rsi,%rsi,1),%eax
0x00000000004006b6 <+37>: imul %esi,%eax
0x00000000004006b9 <+40>: mov %eax,%esi
0x00000000004006bb <+42>: mov $0x40077f,%edi
0x00000000004006c0 <+47>: xor %eax,%eax
0x00000000004006c2 <+49>: jmpq 0x4004a0 <printf(a)plt>
-ss
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2012-08-05 18:02 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-08-05 17:14 [Powertop] [PATCH 4/4] Add support for Intel GPU statistics Arjan van de Ven
-- strict thread matches above, loose matches on Subject: below --
2012-08-05 17:32 Sergey Senozhatsky
2012-08-05 17:37 Arjan van de Ven
2012-08-05 18:02 Sergey Senozhatsky
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.