* [PATCH v3] kernel-shark: Multi-thread the computaion of stream/combo plots
@ 2025-03-27 0:21 Libo Chen
2025-04-03 17:35 ` Yordan Karadzhov
0 siblings, 1 reply; 2+ messages in thread
From: Libo Chen @ 2025-03-27 0:21 UTC (permalink / raw)
To: y.karadz; +Cc: linux-trace-devel
Parallelize _newCPUGraph() and _newTaskGraph() calls to dramatically
speed up graph rendering particularly for traces from very large systems.
OpenMP technically is a new dependency here, but it's part of GCC, so long
as your GCC >= v4.9, the libgomp library will make the code compiled.
Signed-off-by: Libo Chen <libo.chen@oracle.com>
---
CMakeLists.txt | 6 ++++++
src/KsGLWidget.cpp | 30 ++++++++++++++++++++++++++----
2 files changed, 32 insertions(+), 4 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 988bfd6..7847177 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -84,6 +84,12 @@ set(EXECUTABLE_OUTPUT_PATH "${KS_DIR}/bin")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wextra -pthread -fPIC -fno-common")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -pthread -fPIC -fno-common")
+find_package(OpenMP 3.2.5)
+if (OPENMP_FOUND)
+ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
+endif(OPENMP_FOUND)
+
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)
diff --git a/src/KsGLWidget.cpp b/src/KsGLWidget.cpp
index 9311d98..7f2001c 100644
--- a/src/KsGLWidget.cpp
+++ b/src/KsGLWidget.cpp
@@ -13,6 +13,9 @@
#include <GL/glut.h>
#include <GL/gl.h>
+// OpenMP
+#include <omp.h>
+
// KernelShark
#include "libkshark-plugin.h"
#include "KsGLWidget.hpp"
@@ -54,6 +57,7 @@ KsGLWidget::KsGLWidget(QWidget *parent)
connect(&_model, &QAbstractTableModel::modelReset,
this, qOverload<>(&KsGLWidget::update));
+ omp_set_num_threads(omp_get_num_procs());
}
void KsGLWidget::_freeGraphs()
@@ -690,23 +694,41 @@ void KsGLWidget::_makeGraphs()
for (auto it = _streamPlots.begin(); it != _streamPlots.end(); ++it) {
sd = it.key();
+ size_t nCpus = it.value()._cpuList.count();
+ size_t nTasks = it.value()._taskList.count();
+ QVector<KsPlot::Graph *> cpuGraphs(nCpus);
+ QVector<KsPlot::Graph *> taskGraphs(nTasks);
+
/* Create CPU graphs according to the cpuList. */
it.value()._cpuGraphs = {};
- for (auto const &cpu: it.value()._cpuList) {
- g = lamAddGraph(sd, _newCPUGraph(sd, cpu), _vSpacing);
+ #pragma omp parallel for
+ for (size_t idx = 0; idx < nCpus; ++idx) {
+ int cpu = it.value()._cpuList[idx];
+ cpuGraphs[idx] = _newCPUGraph(sd, cpu);
+ }
+ QVectorIterator<KsPlot::Graph *> itCpuGraphs(cpuGraphs);
+ while (itCpuGraphs.hasNext()) {
+ g = lamAddGraph(sd, itCpuGraphs.next(), _vSpacing);
it.value()._cpuGraphs.append(g);
}
/* Create Task graphs according to the taskList. */
it.value()._taskGraphs = {};
- for (auto const &pid: it.value()._taskList) {
- g = lamAddGraph(sd, _newTaskGraph(sd, pid), _vSpacing);
+ #pragma omp parallel for
+ for (size_t idx = 0; idx < nTasks; ++idx) {
+ int pid = it.value()._taskList[idx];
+ taskGraphs[idx] = _newTaskGraph(sd, pid);
+ }
+ QVectorIterator<KsPlot::Graph *> itTaskGraphs(taskGraphs);
+ while (itTaskGraphs.hasNext()) {
+ g = lamAddGraph(sd, itTaskGraphs.next(), _vSpacing);
it.value()._taskGraphs.append(g);
}
}
for (auto &c: _comboPlots) {
int n = c.count();
+ #pragma omp parallel for
for (int i = 0; i < n; ++i) {
sd = c[i]._streamId;
if (c[i]._type & KSHARK_TASK_DRAW) {
--
2.43.5
^ permalink raw reply related [flat|nested] 2+ messages in thread
* Re: [PATCH v3] kernel-shark: Multi-thread the computaion of stream/combo plots
2025-03-27 0:21 [PATCH v3] kernel-shark: Multi-thread the computaion of stream/combo plots Libo Chen
@ 2025-04-03 17:35 ` Yordan Karadzhov
0 siblings, 0 replies; 2+ messages in thread
From: Yordan Karadzhov @ 2025-04-03 17:35 UTC (permalink / raw)
To: Libo Chen; +Cc: linux-trace-devel
The patch is applied. Thanks!
Yordan
On 3/27/25 02:21, Libo Chen wrote:
> Parallelize _newCPUGraph() and _newTaskGraph() calls to dramatically
> speed up graph rendering particularly for traces from very large systems.
>
> OpenMP technically is a new dependency here, but it's part of GCC, so long
> as your GCC >= v4.9, the libgomp library will make the code compiled.
>
> Signed-off-by: Libo Chen <libo.chen@oracle.com>
> ---
> CMakeLists.txt | 6 ++++++
> src/KsGLWidget.cpp | 30 ++++++++++++++++++++++++++----
> 2 files changed, 32 insertions(+), 4 deletions(-)
>
> diff --git a/CMakeLists.txt b/CMakeLists.txt
> index 988bfd6..7847177 100644
> --- a/CMakeLists.txt
> +++ b/CMakeLists.txt
> @@ -84,6 +84,12 @@ set(EXECUTABLE_OUTPUT_PATH "${KS_DIR}/bin")
> set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wextra -pthread -fPIC -fno-common")
> set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -pthread -fPIC -fno-common")
>
> +find_package(OpenMP 3.2.5)
> +if (OPENMP_FOUND)
> + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
> + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
> +endif(OPENMP_FOUND)
> +
> set(CMAKE_CXX_STANDARD 17)
> set(CMAKE_CXX_STANDARD_REQUIRED ON)
> set(CMAKE_CXX_EXTENSIONS OFF)
> diff --git a/src/KsGLWidget.cpp b/src/KsGLWidget.cpp
> index 9311d98..7f2001c 100644
> --- a/src/KsGLWidget.cpp
> +++ b/src/KsGLWidget.cpp
> @@ -13,6 +13,9 @@
> #include <GL/glut.h>
> #include <GL/gl.h>
>
> +// OpenMP
> +#include <omp.h>
> +
> // KernelShark
> #include "libkshark-plugin.h"
> #include "KsGLWidget.hpp"
> @@ -54,6 +57,7 @@ KsGLWidget::KsGLWidget(QWidget *parent)
>
> connect(&_model, &QAbstractTableModel::modelReset,
> this, qOverload<>(&KsGLWidget::update));
> + omp_set_num_threads(omp_get_num_procs());
> }
>
> void KsGLWidget::_freeGraphs()
> @@ -690,23 +694,41 @@ void KsGLWidget::_makeGraphs()
>
> for (auto it = _streamPlots.begin(); it != _streamPlots.end(); ++it) {
> sd = it.key();
> + size_t nCpus = it.value()._cpuList.count();
> + size_t nTasks = it.value()._taskList.count();
> + QVector<KsPlot::Graph *> cpuGraphs(nCpus);
> + QVector<KsPlot::Graph *> taskGraphs(nTasks);
> +
> /* Create CPU graphs according to the cpuList. */
> it.value()._cpuGraphs = {};
> - for (auto const &cpu: it.value()._cpuList) {
> - g = lamAddGraph(sd, _newCPUGraph(sd, cpu), _vSpacing);
> + #pragma omp parallel for
> + for (size_t idx = 0; idx < nCpus; ++idx) {
> + int cpu = it.value()._cpuList[idx];
> + cpuGraphs[idx] = _newCPUGraph(sd, cpu);
> + }
> + QVectorIterator<KsPlot::Graph *> itCpuGraphs(cpuGraphs);
> + while (itCpuGraphs.hasNext()) {
> + g = lamAddGraph(sd, itCpuGraphs.next(), _vSpacing);
> it.value()._cpuGraphs.append(g);
> }
>
> /* Create Task graphs according to the taskList. */
> it.value()._taskGraphs = {};
> - for (auto const &pid: it.value()._taskList) {
> - g = lamAddGraph(sd, _newTaskGraph(sd, pid), _vSpacing);
> + #pragma omp parallel for
> + for (size_t idx = 0; idx < nTasks; ++idx) {
> + int pid = it.value()._taskList[idx];
> + taskGraphs[idx] = _newTaskGraph(sd, pid);
> + }
> + QVectorIterator<KsPlot::Graph *> itTaskGraphs(taskGraphs);
> + while (itTaskGraphs.hasNext()) {
> + g = lamAddGraph(sd, itTaskGraphs.next(), _vSpacing);
> it.value()._taskGraphs.append(g);
> }
> }
>
> for (auto &c: _comboPlots) {
> int n = c.count();
> + #pragma omp parallel for
> for (int i = 0; i < n; ++i) {
> sd = c[i]._streamId;
> if (c[i]._type & KSHARK_TASK_DRAW) {
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2025-04-03 17:35 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-03-27 0:21 [PATCH v3] kernel-shark: Multi-thread the computaion of stream/combo plots Libo Chen
2025-04-03 17:35 ` Yordan Karadzhov
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).