public inbox for igt-dev@lists.freedesktop.org
 help / color / mirror / Atom feed
From: "Belgaumkar, Vinay" <vinay.belgaumkar@intel.com>
To: Soham Purkait <soham.purkait@intel.com>,
	<igt-dev@lists.freedesktop.org>,  <riana.tauro@intel.com>,
	<badal.nilawar@intel.com>, <kamil.konieczny@intel.com>,
	<ashutosh.dixit@intel.com>
Cc: <anshuman.gupta@intel.com>, <umesh.nerlige.ramappa@intel.com>
Subject: Re: [PATCH i-g-t v7 4/5] tools/gputop.src/gputop: Enable support for multiple GPUs and instances
Date: Thu, 12 Feb 2026 15:28:48 -0800	[thread overview]
Message-ID: <0bd96bbc-fd89-4b04-a203-8784a5d9701c@intel.com> (raw)
In-Reply-To: <20260130095318.644256-5-soham.purkait@intel.com>


On 1/30/2026 1:53 AM, Soham Purkait wrote:
> Introduce vendor-agnostic support for handling multiple GPUs and
> instances in gputop. Improve the tool's adaptability to various GPU
> configurations.
>
> v1:
>   - Refactor GPUTOP into a vendor-agnostic tool. (Lucas)
> v2:
>   - Cosmetic changes. (Riana)
>   - Avoid three level indentation. (Riana)
> v3:
>   - Add device filter to populate the array of cards for
>     all supported drivers. (Zbigniew)
> v4:
>   - Add user message for running without root privileges. (Kamil)
> v5:
>   - Add support for GPU client-only busyness on unsupported
>     drivers as a fallback mechanism. (Kamil)
> v6:
>   - Remove unused dependencies and headers. (Kamil)
>
> Signed-off-by: Soham Purkait <soham.purkait@intel.com>
> ---
>   tools/gputop.src/gputop.c    | 278 +++++++++++++++++++++++++++++------
>   tools/gputop.src/meson.build |   2 +-
>   tools/meson.build            |   3 +-
>   3 files changed, 240 insertions(+), 43 deletions(-)
>
> diff --git a/tools/gputop.src/gputop.c b/tools/gputop.src/gputop.c
> index f577a1750..7d4515f8f 100644
> --- a/tools/gputop.src/gputop.c
> +++ b/tools/gputop.src/gputop.c
> @@ -1,6 +1,6 @@
>   // SPDX-License-Identifier: MIT
>   /*
> - * Copyright © 2023 Intel Corporation
> + * Copyright © 2023-2025 Intel Corporation
>    */
>   
>   #include <assert.h>
> @@ -14,66 +14,145 @@
>   #include <math.h>
>   #include <poll.h>
>   #include <signal.h>
> +#include <stdbool.h>
>   #include <stdint.h>
>   #include <stdio.h>
>   #include <stdlib.h>
>   #include <string.h>
>   #include <sys/ioctl.h>
>   #include <sys/stat.h>
> +#include <sys/sysmacros.h>
>   #include <sys/types.h>
> -#include <unistd.h>
>   #include <termios.h>
> -#include <sys/sysmacros.h>
> -#include <stdbool.h>
> +#include <unistd.h>
>   
> -#include "igt_core.h"
>   #include "igt_drm_clients.h"
>   #include "igt_drm_fdinfo.h"
>   #include "igt_profiling.h"
> -#include "drmtest.h"
> +#include "xe_gputop.h"
> +
> +/**
> + * Supported Drivers
> + *
> + * Adhere to the following requirements when implementing support for the
> + * new driver:
> + * @drivers: Update drivers[] with driver string.
> + * @sizeof_gputop_obj: Update this function as per new driver support included.
> + * @operations: Update the respective operations of the new driver:
> + * gputop_init,
> + * discover_engines,
> + * pmu_init,
> + * pmu_sample,
> + * print_engines,
> + * clean_up
> + * @per_driver_contexts: Update per_driver_contexts[] array of type "struct gputop_driver" with the
> + * initial values.
> + */
> +static const char * const drivers[] = {
> +	"xe",
> +    /* Keep the last one as NULL */
> +	NULL
> +};
> +
> +static size_t sizeof_gputop_obj(int driver_num)
> +{
> +	switch (driver_num) {
> +	case 0:
Might be worthwhile using an enum like INTEL_XE_DRIVER instead of 
hardcoded values.
> +		return sizeof(struct xe_gputop);
> +	default:
> +		fprintf(stderr,
> +			"Driver number does not exist.\n");
> +		exit(EXIT_FAILURE);
> +	}
> +}
> +
> +/**
> + * Supported operations on driver instances. Update the ops[] array for
> + * each individual driver specific function. Maintain the sequence as per
> + * drivers[] array.
> + */
> +struct device_operations ops[] = {
> +	{
> +		xe_gputop_init,
> +		xe_populate_engines,
> +		xe_pmu_init,
> +		xe_pmu_sample,
> +		xe_print_engines,
> +		xe_clean_up
> +	}
> +};
> +
> +/*
> + * per_driver_contexts[] array of type struct gputop_driver which keeps track of the devices
> + * and related info discovered per driver.
> + */
> +struct gputop_driver per_driver_contexts[] = {
This is already per driver, no need to add that in the name? just 
driver_context or pci_driver_context? Also, where is gputop_driver 
struct defined? (Didn't find it in this patch set)
> +	{false, 0, NULL}
> +};
>   
>   enum utilization_type {
>   	UTILIZATION_TYPE_ENGINE_TIME,
>   	UTILIZATION_TYPE_TOTAL_CYCLES,
>   };
>   
> -static const char *bars[] = { " ", "▏", "▎", "▍", "▌", "▋", "▊", "▉", "█" };
> -
> -#define ANSI_HEADER "\033[7m"
> -#define ANSI_RESET "\033[0m"
> -
> -static void n_spaces(const unsigned int n)
> +static void gputop_clean_up(void)
>   {
> -	unsigned int i;
> -
> -	for (i = 0; i < n; i++)
> -		putchar(' ');
> +	for (int i = 0; drivers[i]; i++) {
> +		ops[i].clean_up(per_driver_contexts[i].instances, per_driver_contexts[i].len);
> +		free(per_driver_contexts[i].instances);
> +		per_driver_contexts[i].device_present = false;
> +		per_driver_contexts[i].len = 0;
> +	}
>   }
>   
> -static void print_percentage_bar(double percent, int max_len)
> +static int find_driver(struct igt_device_card *card)
should be find_pci_driver?
>   {
> -	int bar_len, i, len = max_len - 1;
> -	const int w = 8;
> -
> -	len -= printf("|%5.1f%% ", percent);
> -
> -	/* no space left for bars, do what we can */
> -	if (len < 0)
> -		len = 0;
> -
> -	bar_len = ceil(w * percent * len / 100.0);
> -	if (bar_len > w * len)
> -		bar_len = w * len;
> +	for (int i = 0; drivers[i]; i++) {
> +		if (strcmp(drivers[i], card->driver) == 0)
> +			return i;
> +	}
> +	return -1;
> +}
>   
> -	for (i = bar_len; i >= w; i -= w)
> -		printf("%s", bars[w]);
> -	if (i)
> -		printf("%s", bars[i]);
> +static int populate_device_instances(const char *filter)
> +{
> +	struct igt_device_card *cards = NULL;
> +	struct igt_device_card *card_inplace = NULL;
> +	struct gputop_driver *driver_entry =  NULL;
> +	int driver_no;
should this be driver_num or driver_id?
> +	int count, final_count = 0;
> +
> +	count = igt_device_card_match_all(filter, &cards);
> +	for (int j = 0; j < count; j++) {
> +		if (strcmp(cards[j].subsystem, "pci") != 0)
> +			continue;
>   
> -	len -= (bar_len + (w - 1)) / w;
> -	n_spaces(len);
> +		driver_no = find_driver(&cards[j]);
> +		if (driver_no < 0)
> +			continue;
>   
> -	putchar('|');
> +		driver_entry = &per_driver_contexts[driver_no];
> +		if (!driver_entry->device_present)
> +			driver_entry->device_present = true;
is this check only for reassignment of device_present or should it 
include the below statements as well?
> +		driver_entry->len++;
> +		driver_entry->instances = realloc(driver_entry->instances,
> +						  driver_entry->len * sizeof_gputop_obj(driver_no));
should this be renamed to driver_entry->engine_instances so we are clear 
about which instances this refers to?
> +		if (!driver_entry->instances) {
> +			fprintf(stderr,
> +				"Device instance realloc failed (%s)\n",
> +				strerror(errno));
> +			exit(EXIT_FAILURE);
> +		}

should these be under the conditional if check above as well? Or will we 
end up incrementing len twice and duplicate realloc? Unless this refers 
to driver instances?

Thanks,

Vinay.

> +		card_inplace = (struct igt_device_card *)
> +				calloc(1, sizeof(struct igt_device_card));
> +		memcpy(card_inplace, &cards[j], sizeof(struct igt_device_card));
> +		ops[driver_no].gputop_init(driver_entry->instances, (driver_entry->len - 1),
> +			card_inplace);
> +		final_count++;
> +	}
> +	if (count)
> +		free(cards);
> +	return final_count;
>   }
>   
>   static int
> @@ -335,6 +414,28 @@ struct gputop_args {
>   	unsigned long delay_usec;
>   };
>   
> +static void countdown(const char *msg, const int start_sec)
> +{
> +	struct pollfd pfd;
> +	int i, ret;
> +	char ch;
> +
> +	for (i = start_sec; i > 0; i--) {
> +		printf("\r%s%d... second(s)", msg, i);
> +		fflush(stdout);
> +
> +		pfd.fd = STDIN_FILENO;
> +		pfd.events = POLLIN;
> +
> +		ret = poll(&pfd, 1, 1000);
> +		if (ret > 0 && (pfd.revents & POLLIN)) {
> +			while ((ch = getchar()) != '\n' && ch != EOF)
> +				continue;
> +			return;
> +		}
> +	}
> +}
> +
>   static void help(char *full_path)
>   {
>   	const char *short_program_name = strrchr(full_path, '/');
> @@ -349,7 +450,32 @@ static void help(char *full_path)
>   	       "Options:\n"
>   	       "\t-h, --help                show this help\n"
>   	       "\t-d, --delay =SEC[.TENTHS] iterative delay as SECS [.TENTHS]\n"
> -	       "\t-n, --iterations =NUMBER  number of executions\n"
> +	       "\t-n, --iterations =NUMBER  number of executions\n\n"
> +	       "Running without root:\n"
> +	       "\tAs a non-root user, CAP_PERFMON or perf_event_paranoid is required to\n"
> +	       "\taccess engine busyness\n"
> +	       "\t" ANSI_HEADER "Steps to run without root (using CAP_PERFMON):"
> +		ANSI_RESET "\n"
> +	       "\tcd /path/to/igt-gpu-tools/\n"
> +	       "\tsudo setcap cap_perfmon=+ep $(pwd)/build/tools/gputop\n"
> +	       "\tsudo sh -c \"echo $(pwd)/build/lib > /etc/ld.so.conf.d/lib-igt.conf\"\n"
> +	       "\tsudo ldconfig\n"
> +	       "\t" ANSI_HEADER "Steps to revert once done:" ANSI_RESET "\n"
> +	       "\tsudo setcap cap_perfmon=-ep $(pwd)/build/tools/gputop\n"
> +	       "\tsudo rm /etc/ld.so.conf.d/lib-igt.conf\n"
> +	       "\tsudo ldconfig\n"
> +	       "\n"
> +	       "\t" ANSI_HEADER "Steps to run without root (using perf_event_paranoid):"
> +	       ANSI_RESET "\n"
> +	       "\t\033[32m# Save current perf_event_paranoid value\033[0m\n"
> +	       "\torig_val=$(sysctl -n kernel.perf_event_paranoid)\n"
> +	       "\tsudo sysctl -w kernel.perf_event_paranoid=-1\n"
> +	       "\t" ANSI_HEADER "Steps to revert once done:" ANSI_RESET "\n"
> +	       "\t\033[32m# Restore original value\033[0m\n"
> +	       "\tsudo sysctl -w kernel.perf_event_paranoid=$orig_val\n\n"
> +	       "\tFor details, see 'Perf events and tool security':\n"
> +	       "\thttps://www.kernel.org/doc/html/"
> +	       "latest/admin-guide/perf-security.html\n\n"
>   	       , short_program_name);
>   }
>   
> @@ -417,9 +543,12 @@ int main(int argc, char **argv)
>   	struct igt_profiled_device *profiled_devices = NULL;
>   	struct igt_drm_clients *clients = NULL;
>   	int con_w = -1, con_h = -1;
> +	bool is_root;
>   	int ret;
>   	long n;
>   
> +	is_root = (geteuid() == 0);
> +
>   	ret = parse_args(argc, argv, &args);
>   	if (ret < 0)
>   		return EXIT_FAILURE;
> @@ -428,6 +557,53 @@ int main(int argc, char **argv)
>   
>   	n = args.n_iter;
>   	period_us = args.delay_usec;
> +	populate_device_instances("device:subsystem=pci,card=all");
> +
> +	for (int i = 0; drivers[i]; i++) {
> +		if (!per_driver_contexts[i].device_present)
> +			continue;
> +
> +		for (int j = 0; j < per_driver_contexts[i].len; j++) {
> +			if (!ops[i].init_engines(per_driver_contexts[i].instances, j)) {
> +				fprintf(stderr,
> +					"Failed to initialize engines! (%s)\n",
> +					strerror(errno));
> +					gputop_clean_up();
> +				return EXIT_FAILURE;
> +			}
> +			ret = ops[i].pmu_init(per_driver_contexts[i].instances, j);
> +
> +			if (ret) {
> +				if (errno == EACCES && !is_root) {
> +					fprintf(stderr,
> +						"\n"
> +						"Running without root privileges.\n"
> +						"Engine busyness may not be available "
> +						"without root privileges.\n"
> +						"See \"--help\" to enable engine "
> +						"busyness without root.\n\n");
> +					igt_devices_free();
> +					gputop_clean_up();
> +					countdown("Resuming with only gpu client "
> +						  "busyness in ", 5);
> +				} else {
> +					fprintf(stderr,
> +						"Failed to initialize PMU! (%s)\n",
> +						strerror(errno));
> +					igt_devices_free();
> +					gputop_clean_up();
> +					return EXIT_FAILURE;
> +				}
> +			}
> +		}
> +	}
> +
> +	for (int i = 0; drivers[i]; i++) {
> +		for (int j = 0;
> +		     per_driver_contexts[i].device_present && j < per_driver_contexts[i].len;
> +		     j++)
> +			ops[i].pmu_sample(per_driver_contexts[i].instances, j);
> +	}
>   
>   	clients = igt_drm_clients_init(NULL);
>   	if (!clients)
> @@ -449,22 +625,42 @@ int main(int argc, char **argv)
>   
>   	while ((n != 0) && !stop_top) {
>   		struct igt_drm_client *c, *prevc = NULL;
> -		int i, engine_w = 0, lines = 0;
> +		int k, engine_w = 0, lines = 0;
>   
>   		igt_drm_clients_scan(clients, NULL, NULL, 0, NULL, 0);
> +
> +		for (int i = 0; drivers[i]; i++) {
> +			for (int j = 0;
> +			     per_driver_contexts[i].device_present &&
> +			     j < per_driver_contexts[i].len;
> +			     j++)
> +				ops[i].pmu_sample(per_driver_contexts[i].instances, j);
> +		}
> +
>   		igt_drm_clients_sort(clients, client_cmp);
>   
>   		update_console_size(&con_w, &con_h);
>   		clrscr();
>   
> +		for (int i = 0; drivers[i]; i++) {
> +			for (int j = 0;
> +			     per_driver_contexts[i].device_present &&
> +			     j < per_driver_contexts[i].len;
> +			     j++) {
> +				lines = ops[i].print_engines(per_driver_contexts[i].instances, j,
> +							 lines, con_w, con_h);
> +			}
> +		}
> +
>   		if (!clients->num_clients) {
> -			const char *msg = " (No GPU clients yet. Start workload to see stats)";
> +			const char *msg;
>   
> +			msg = " (No GPU clients yet. Start workload to see stats)";
>   			printf(ANSI_HEADER "%-*s" ANSI_RESET "\n",
>   			       (int)(con_w - strlen(msg) - 1), msg);
>   		}
>   
> -		igt_for_each_drm_client(clients, c, i) {
> +		igt_for_each_drm_client(clients, c, k) {
>   			assert(c->status != IGT_DRM_CLIENT_PROBE);
>   			if (c->status != IGT_DRM_CLIENT_ALIVE)
>   				break; /* Active clients are first in the array. */
> @@ -488,11 +684,11 @@ int main(int argc, char **argv)
>   	}
>   
>   	igt_drm_clients_free(clients);
> +	gputop_clean_up();
>   
>   	if (profiled_devices != NULL) {
>   		igt_devices_configure_profiling(profiled_devices, false);
>   		igt_devices_free_profiling(profiled_devices);
>   	}
> -
>   	return 0;
>   }
> diff --git a/tools/gputop.src/meson.build b/tools/gputop.src/meson.build
> index ec39f4c7a..e95657fca 100644
> --- a/tools/gputop.src/meson.build
> +++ b/tools/gputop.src/meson.build
> @@ -1 +1 @@
> -gputop_src = files('gputop.c')
> +gputop_src = files('gputop.c', 'utils.c', 'xe_gputop.c')
> diff --git a/tools/meson.build b/tools/meson.build
> index 521607a4c..caca57d0e 100644
> --- a/tools/meson.build
> +++ b/tools/meson.build
> @@ -123,4 +123,5 @@ subdir('gputop.src')
>   executable('gputop', sources : gputop_src,
>   	   install : true,
>   	   install_rpath : bindir_rpathdir,
> -	   dependencies : [lib_igt_drm_clients,lib_igt_drm_fdinfo,lib_igt_profiling,math],)
> +	   dependencies : [lib_igt_perf,lib_igt_device_scan,lib_igt_drm_clients,
> +	   		   lib_igt_drm_fdinfo,lib_igt_profiling,math],)

  reply	other threads:[~2026-02-12 23:28 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-01-30  9:53 [PATCH i-g-t v7 0/5] Close any open drm device after engine initialization in GPUTOP Soham Purkait
2026-01-30  9:53 ` [PATCH i-g-t v7 1/5] tools: Rename tools/gputop to tools/gputop.src Soham Purkait
2026-01-30  9:53 ` [PATCH i-g-t v7 2/5] tools/gputop.src/utils: Add clamp macro to remove dependency on lib/xe/* Soham Purkait
2026-01-30  9:53 ` [PATCH i-g-t v7 3/5] tools/gputop.src/xe_gputop: Refactor xe_populate_engines to close card_fd and use direct ioctl calls Soham Purkait
2026-02-12  1:41   ` Belgaumkar, Vinay
2026-02-18 15:27     ` Purkait, Soham
2026-01-30  9:53 ` [PATCH i-g-t v7 4/5] tools/gputop.src/gputop: Enable support for multiple GPUs and instances Soham Purkait
2026-02-12 23:28   ` Belgaumkar, Vinay [this message]
2026-02-18 16:23     ` Purkait, Soham
2026-02-18 17:10     ` Purkait, Soham
2026-01-30  9:53 ` [PATCH i-g-t v7 5/5] tools/gputop.src/gputop: Add command line option for device filter Soham Purkait

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=0bd96bbc-fd89-4b04-a203-8784a5d9701c@intel.com \
    --to=vinay.belgaumkar@intel.com \
    --cc=anshuman.gupta@intel.com \
    --cc=ashutosh.dixit@intel.com \
    --cc=badal.nilawar@intel.com \
    --cc=igt-dev@lists.freedesktop.org \
    --cc=kamil.konieczny@intel.com \
    --cc=riana.tauro@intel.com \
    --cc=soham.purkait@intel.com \
    --cc=umesh.nerlige.ramappa@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox