* [PATCH 2/2] amdgpu: Add Streaming Performance Monitor (SPM) data collection interface
2026-04-13 19:29 [PATCH 1/2] amdgpu: Add profiler IOCTL interface for performance monitoring James Zhu
@ 2026-04-13 19:29 ` James Zhu
2026-05-13 23:56 ` Ma, Bing
2026-05-15 17:14 ` [PATCH v2 " James Zhu
2026-04-16 14:06 ` [PATCH 1/2] amdgpu: Add profiler IOCTL interface for performance monitoring James Zhu
` (3 subsequent siblings)
4 siblings, 2 replies; 10+ messages in thread
From: James Zhu @ 2026-04-13 19:29 UTC (permalink / raw)
To: amd-gfx, alexander.deucher, Bing.Ma, David.Francis
Cc: Jesse.Zhang, Jenny-Jing.Liu, jamesz
This patch implements full Streaming Performance Monitor (SPM) support for
AMD GPUs, enabling real-time performance counter data collection directly
to userspace buffers.
The implementation provides three key operations:
- AMDGPU_SPM_OP_ACQUIRE: Acquire exclusive access to SPM hardware resources
- AMDGPU_SPM_OP_RELEASE: Release SPM hardware for use by other processes
- AMDGPU_SPM_OP_SET_DEST_BUF: Configure destination buffer and manage
counter data streaming
Key features:
- Flexible destination buffer management with configurable timeout behavior
- Automatic detection and reporting of data loss due to ring buffer overflow
- Support for partial buffer fills with explicit data size reporting
- Detailed kernel API documentation with operation semantics
The amdgpu_spm_setdestbuff() function allowing profiling tools to efficiently
collect performance data from the GPU. The timeout mechanism enables waiting
for a buffer to fill completely before switching to a new one, or immediate
switching with partial data preservation.
This enables performance analysis tools, and profiling frameworks to access
real-time GPU performance metrics without kernel-mode overhead.
Signed-off-by: James Zhu <James.Zhu@amd.com>
---
amdgpu/amdgpu-symbols.txt | 3 ++
amdgpu/amdgpu.h | 38 ++++++++++++++++
amdgpu/amdgpu_profiler.c | 68 +++++++++++++++++++++++++++++
include/drm/amdgpu_drm.h | 92 ++++++++++++++++++++++++++++++++++++++-
4 files changed, 200 insertions(+), 1 deletion(-)
diff --git a/amdgpu/amdgpu-symbols.txt b/amdgpu/amdgpu-symbols.txt
index 8cd5559c..b33958ef 100644
--- a/amdgpu/amdgpu-symbols.txt
+++ b/amdgpu/amdgpu-symbols.txt
@@ -88,3 +88,6 @@ amdgpu_create_userqueue
amdgpu_free_userqueue
amdgpu_userq_signal
amdgpu_userq_wait
+amdgpu_spm_acquire
+amdgpu_spm_release
+amdgpu_spm_setdestbuff
diff --git a/amdgpu/amdgpu.h b/amdgpu/amdgpu.h
index 4ec1f6b6..8698f26a 100644
--- a/amdgpu/amdgpu.h
+++ b/amdgpu/amdgpu.h
@@ -2129,7 +2129,45 @@ int amdgpu_cwsr_set_l2_trap_handler(amdgpu_device_handle dev,
*/
int amdgpu_profiler_version(amdgpu_device_handle dev);
+/**
+ * Acquire request exclusive use of SPM
+ * \param dev - \c [in] device handle
+ *
+ * \return 0 on success otherwise POSIX Error code
+ */
+int amdgpu_spm_acquire(amdgpu_device_handle dev);
+
+/**
+ * Release exclusive use of SPM
+ *
+ * \return 0 on success otherwise POSIX Error code
+ */
+int amdgpu_spm_release(amdgpu_device_handle dev);
+
+/**
+ * Set up the destination user mode buffer for stream performance
+ * counter data.
+ * \param dev - \c [in] device handle
+ * \param size_in_bytes - \c [in] size of the buffer
+ * \param timeout - \c [in/out] timeout in milliseconds
+ * \param size_copied - \c [in] number of bytes copied
+ * \param dest_mem_addr - \c [in] destination address. Set to NULL
+ * to stop copy on previous buffer
+ * \param is_spm_data_loss - \c [in] true if data was lost
+ *
+ * \return 0 on success otherwise POSIX Error code
+ */
+int amdgpu_spm_setdestbuff(
+ amdgpu_device_handle dev,
+ uint32_t size_in_bytes,
+ uint32_t *timeout,
+ uint32_t *size_copied,
+ void *dest_mem_addr,
+ bool *is_spm_data_loss
+ );
+
#ifdef __cplusplus
}
+
#endif
#endif /* #ifdef _AMDGPU_H_ */
diff --git a/amdgpu/amdgpu_profiler.c b/amdgpu/amdgpu_profiler.c
index 8d4dffe4..e9d30fb6 100644
--- a/amdgpu/amdgpu_profiler.c
+++ b/amdgpu/amdgpu_profiler.c
@@ -44,3 +44,71 @@ amdgpu_profiler_version(amdgpu_device_handle dev)
return ret;
}
+
+drm_public int
+amdgpu_spm_acquire(amdgpu_device_handle dev)
+{
+ int ret;
+ struct drm_amdgpu_profiler_args user_arg;
+
+ if (!dev)
+ return -EINVAL;
+
+ memset(&user_arg, 0, sizeof(user_arg));
+ user_arg.op = AMDGPU_PROFILER_SPM;
+ user_arg.spm.op = AMDGPU_SPM_OP_ACQUIRE;
+
+ ret = drmCommandWriteRead(dev->fd, DRM_AMDGPU_PROFILER,
+ &user_arg, sizeof(user_arg));
+
+ return ret;
+}
+
+drm_public int
+amdgpu_spm_release(amdgpu_device_handle dev)
+{
+ struct drm_amdgpu_profiler_args user_arg;
+
+ if (!dev)
+ return -EINVAL;
+
+ memset(&user_arg, 0, sizeof(user_arg));
+ user_arg.op = AMDGPU_PROFILER_SPM;
+ user_arg.spm.op = AMDGPU_SPM_OP_RELEASE;
+
+ return drmCommandWriteRead(dev->fd, DRM_AMDGPU_PROFILER,
+ &user_arg, sizeof(user_arg));
+}
+
+drm_public int
+amdgpu_spm_setdestbuff(
+ amdgpu_device_handle dev,
+ uint32_t size_in_bytes,
+ uint32_t *timeout,
+ uint32_t *size_copied,
+ void *dest_mem_addr,
+ bool *is_spm_data_loss
+ )
+{
+ int ret;
+ struct drm_amdgpu_profiler_args user_arg;
+
+ if (!dev)
+ return -EINVAL;
+
+ memset(&user_arg, 0, sizeof(user_arg));
+ user_arg.op = AMDGPU_PROFILER_SPM;
+ user_arg.spm.op = AMDGPU_SPM_OP_SET_DEST_BUF;
+ user_arg.spm.timeout = *timeout;
+ user_arg.spm.dest_buf = (uint64_t)dest_mem_addr;
+ user_arg.spm.buf_size = size_in_bytes;
+
+ ret = drmCommandWriteRead(dev->fd, DRM_AMDGPU_PROFILER,
+ &user_arg, sizeof(user_arg));
+
+ *size_copied = user_arg.spm.bytes_copied;
+ *is_spm_data_loss = user_arg.spm.has_data_loss;
+ *timeout = user_arg.spm.timeout;
+
+ return ret;
+}
diff --git a/include/drm/amdgpu_drm.h b/include/drm/amdgpu_drm.h
index 307242ac..60c73233 100644
--- a/include/drm/amdgpu_drm.h
+++ b/include/drm/amdgpu_drm.h
@@ -1698,10 +1698,99 @@ struct drm_amdgpu_info_gpuvm_fault {
#define AMDGPU_FAMILY_GC_12_0_0 152 /* GC 12.0.0 */
/*
- * Supported Profiler Operations
+ * Supported SPM (Stream Performance Monitor) Operations
*/
+/**
+ * drm_amdgpu_spm_op - SPM ioctl operations
+ *
+ * @AMDGPU_SPM_OP_ACQUIRE: acquire exclusive access to SPM
+ * @AMDGPU_SPM_OP_RELEASE: release exclusive access to SPM
+ * @AMDGPU_SPM_OP_SET_DEST_BUF: set or unset destination buffer for SPM streaming
+ */
+enum drm_amdgpu_spm_op {
+ AMDGPU_SPM_OP_ACQUIRE,
+ AMDGPU_SPM_OP_RELEASE,
+ AMDGPU_SPM_OP_SET_DEST_BUF
+};
+
+/**
+ * drm_amdgpu_spm_args - Arguments for SPM ioctl
+ *
+ * @op[in]: specifies the operation to perform
+ * @dst_buf[in]: used for the address of the destination buffer
+ * in @AMDGPU_SPM_SET_DEST_BUFFER
+ * @buf_size[in]: size of the destination buffer
+ * @timeout[in/out]: [in]: timeout in milliseconds, [out]: amount of time left
+ * `in the timeout window
+ * @bytes_copied[out]: total amount of data that was copied to the previous dest_buf
+ * @has_data_loss: total count for sub-block which has data loss
+ *
+ * This ioctl performs different functions depending on the @op parameter.
+ *
+ * AMDGPU_SPM_OP_ACQUIRE
+ * ------------------------
+ *
+ * Acquires exclusive access of SPM on the specified for the calling process.
+ * This must be called before using AMDGPU_SPM_OP_SET_DEST_BUF.
+ *
+ * AMDGPU_SPM_OP_RELEASE
+ * ------------------------
+ *
+ * Releases exclusive access of SPM on the specified for the calling process,
+ * which allows another process to acquire it in the future.
+ *
+ * AMDGPU_SPM_OP_SET_DEST_BUF
+ * -----------------------------
+ *
+ * If @dst_buf is NULL, the destination buffer address is unset and copying of counters
+ * is stopped.
+ *
+ * If @dst_buf is not NULL, it specifies the pointer to a new destination buffer.
+ * @buf_size specifies the size of the buffer.
+ *
+ * If @timeout is non-0, the call will wait for up to @timeout ms for the previous
+ * buffer to be filled. If previous buffer to be filled before timeout, the @timeout
+ * will be updated value with the time remaining. If the timeout is exceeded, the function
+ * copies any partial data available into the previous user buffer and returns success.
+ * The amount of valid data in the previous user buffer is indicated by @bytes_copied.
+ *
+ * If @timeout is 0, the function immediately replaces the previous destination buffer
+ * without waiting for the previous buffer to be filled. That means the previous buffer
+ * may only be partially filled, and @bytes_copied will indicate how much data has been
+ * copied to it.
+ *
+ * If data was lost, e.g. due to a ring buffer overflow, @has_data_loss will be non-0.
+ *
+ * Returns negative error code on failure, 0 on success.
+ */
+struct drm_amdgpu_spm_args {
+ __u64 dest_buf;
+ __u32 buf_size;
+ __u32 op;
+ __u32 timeout;
+ __u32 bytes_copied;
+ __u32 has_data_loss;
+ __u32 pad;
+};
+
+/**
+ * drm_amdgpu_spm_buffer_header - SPM Buffer header for drm_amdgpu_spm_args->dest_buf
+ *
+ * @version [out]: spm version
+ * @bytes_copied [out]: amount of data for each sub-block
+ * @has_data_loss: [out]: boolean indicating whether data was lost for each sub-block
+ * (e.g. due to a ring-buffer overflow)
+ */
+struct drm_amdgpu_spm_buffer_header {
+ __u32 version; /* 0-23: minor 24-31: major */
+ __u32 bytes_copied;
+ __u32 has_data_loss;
+ __u32 reserved[5];
+};
+
enum drm_amdgpu_profiler_ops {
AMDGPU_PROFILER_VERSION = 0,
+ AMDGPU_PROFILER_SPM,
};
struct drm_amdgpu_profiler_args {
@@ -1711,6 +1800,7 @@ struct drm_amdgpu_profiler_args {
* lower 16 bit: minor
* higher 16 bit: major
*/
+ struct drm_amdgpu_spm_args spm;
};
};
--
2.34.1
^ permalink raw reply related [flat|nested] 10+ messages in thread* RE: [PATCH 2/2] amdgpu: Add Streaming Performance Monitor (SPM) data collection interface
2026-04-13 19:29 ` [PATCH 2/2] amdgpu: Add Streaming Performance Monitor (SPM) data collection interface James Zhu
@ 2026-05-13 23:56 ` Ma, Bing
2026-05-15 17:14 ` [PATCH v2 " James Zhu
1 sibling, 0 replies; 10+ messages in thread
From: Ma, Bing @ 2026-05-13 23:56 UTC (permalink / raw)
To: Zhu, James, amd-gfx@lists.freedesktop.org, Deucher, Alexander,
Francis, David
Cc: Zhang, Jesse(Jie), Liu, Jenny (Jing)
AMD General
Reviewed-by: Bing Ma <Bing.Ma@amd.com>
-----Original Message-----
From: Zhu, James <James.Zhu@amd.com>
Sent: Monday, April 13, 2026 12:30 PM
To: amd-gfx@lists.freedesktop.org; Deucher, Alexander <Alexander.Deucher@amd.com>; Ma, Bing <Bing.Ma@amd.com>; Francis, David <David.Francis@amd.com>
Cc: Zhang, Jesse(Jie) <Jesse.Zhang@amd.com>; Liu, Jenny (Jing) <Jenny-Jing.Liu@amd.com>; Zhu, James <James.Zhu@amd.com>
Subject: [PATCH 2/2] amdgpu: Add Streaming Performance Monitor (SPM) data collection interface
This patch implements full Streaming Performance Monitor (SPM) support for AMD GPUs, enabling real-time performance counter data collection directly to userspace buffers.
The implementation provides three key operations:
- AMDGPU_SPM_OP_ACQUIRE: Acquire exclusive access to SPM hardware resources
- AMDGPU_SPM_OP_RELEASE: Release SPM hardware for use by other processes
- AMDGPU_SPM_OP_SET_DEST_BUF: Configure destination buffer and manage
counter data streaming
Key features:
- Flexible destination buffer management with configurable timeout behavior
- Automatic detection and reporting of data loss due to ring buffer overflow
- Support for partial buffer fills with explicit data size reporting
- Detailed kernel API documentation with operation semantics
The amdgpu_spm_setdestbuff() function allowing profiling tools to efficiently collect performance data from the GPU. The timeout mechanism enables waiting for a buffer to fill completely before switching to a new one, or immediate switching with partial data preservation.
This enables performance analysis tools, and profiling frameworks to access real-time GPU performance metrics without kernel-mode overhead.
Signed-off-by: James Zhu <James.Zhu@amd.com>
---
amdgpu/amdgpu-symbols.txt | 3 ++
amdgpu/amdgpu.h | 38 ++++++++++++++++
amdgpu/amdgpu_profiler.c | 68 +++++++++++++++++++++++++++++ include/drm/amdgpu_drm.h | 92 ++++++++++++++++++++++++++++++++++++++-
4 files changed, 200 insertions(+), 1 deletion(-)
diff --git a/amdgpu/amdgpu-symbols.txt b/amdgpu/amdgpu-symbols.txt index 8cd5559c..b33958ef 100644
--- a/amdgpu/amdgpu-symbols.txt
+++ b/amdgpu/amdgpu-symbols.txt
@@ -88,3 +88,6 @@ amdgpu_create_userqueue amdgpu_free_userqueue amdgpu_userq_signal amdgpu_userq_wait
+amdgpu_spm_acquire
+amdgpu_spm_release
+amdgpu_spm_setdestbuff
diff --git a/amdgpu/amdgpu.h b/amdgpu/amdgpu.h index 4ec1f6b6..8698f26a 100644
--- a/amdgpu/amdgpu.h
+++ b/amdgpu/amdgpu.h
@@ -2129,7 +2129,45 @@ int amdgpu_cwsr_set_l2_trap_handler(amdgpu_device_handle dev,
*/
int amdgpu_profiler_version(amdgpu_device_handle dev);
+/**
+ * Acquire request exclusive use of SPM
+ * \param dev - \c [in] device handle
+ *
+ * \return 0 on success otherwise POSIX Error code */ int
+amdgpu_spm_acquire(amdgpu_device_handle dev);
+
+/**
+ * Release exclusive use of SPM
+ *
+ * \return 0 on success otherwise POSIX Error code */ int
+amdgpu_spm_release(amdgpu_device_handle dev);
+
+/**
+ * Set up the destination user mode buffer for stream performance
+ * counter data.
+ * \param dev - \c [in] device handle
+ * \param size_in_bytes - \c [in] size of the buffer
+ * \param timeout - \c [in/out] timeout in milliseconds
+ * \param size_copied - \c [in] number of bytes copied
+ * \param dest_mem_addr - \c [in] destination address. Set to NULL
+ * to stop copy on previous buffer
+ * \param is_spm_data_loss - \c [in] true if data was lost
+ *
+ * \return 0 on success otherwise POSIX Error code */ int
+amdgpu_spm_setdestbuff(
+ amdgpu_device_handle dev,
+ uint32_t size_in_bytes,
+ uint32_t *timeout,
+ uint32_t *size_copied,
+ void *dest_mem_addr,
+ bool *is_spm_data_loss
+ );
+
#ifdef __cplusplus
}
+
#endif
#endif /* #ifdef _AMDGPU_H_ */
diff --git a/amdgpu/amdgpu_profiler.c b/amdgpu/amdgpu_profiler.c index 8d4dffe4..e9d30fb6 100644
--- a/amdgpu/amdgpu_profiler.c
+++ b/amdgpu/amdgpu_profiler.c
@@ -44,3 +44,71 @@ amdgpu_profiler_version(amdgpu_device_handle dev)
return ret;
}
+
+drm_public int
+amdgpu_spm_acquire(amdgpu_device_handle dev) {
+ int ret;
+ struct drm_amdgpu_profiler_args user_arg;
+
+ if (!dev)
+ return -EINVAL;
+
+ memset(&user_arg, 0, sizeof(user_arg));
+ user_arg.op = AMDGPU_PROFILER_SPM;
+ user_arg.spm.op = AMDGPU_SPM_OP_ACQUIRE;
+
+ ret = drmCommandWriteRead(dev->fd, DRM_AMDGPU_PROFILER,
+ &user_arg, sizeof(user_arg));
+
+ return ret;
+}
+
+drm_public int
+amdgpu_spm_release(amdgpu_device_handle dev) {
+ struct drm_amdgpu_profiler_args user_arg;
+
+ if (!dev)
+ return -EINVAL;
+
+ memset(&user_arg, 0, sizeof(user_arg));
+ user_arg.op = AMDGPU_PROFILER_SPM;
+ user_arg.spm.op = AMDGPU_SPM_OP_RELEASE;
+
+ return drmCommandWriteRead(dev->fd, DRM_AMDGPU_PROFILER,
+ &user_arg, sizeof(user_arg));
+}
+
+drm_public int
+amdgpu_spm_setdestbuff(
+ amdgpu_device_handle dev,
+ uint32_t size_in_bytes,
+ uint32_t *timeout,
+ uint32_t *size_copied,
+ void *dest_mem_addr,
+ bool *is_spm_data_loss
+ )
+{
+ int ret;
+ struct drm_amdgpu_profiler_args user_arg;
+
+ if (!dev)
+ return -EINVAL;
+
+ memset(&user_arg, 0, sizeof(user_arg));
+ user_arg.op = AMDGPU_PROFILER_SPM;
+ user_arg.spm.op = AMDGPU_SPM_OP_SET_DEST_BUF;
+ user_arg.spm.timeout = *timeout;
+ user_arg.spm.dest_buf = (uint64_t)dest_mem_addr;
+ user_arg.spm.buf_size = size_in_bytes;
+
+ ret = drmCommandWriteRead(dev->fd, DRM_AMDGPU_PROFILER,
+ &user_arg, sizeof(user_arg));
+
+ *size_copied = user_arg.spm.bytes_copied;
+ *is_spm_data_loss = user_arg.spm.has_data_loss;
+ *timeout = user_arg.spm.timeout;
+
+ return ret;
+}
diff --git a/include/drm/amdgpu_drm.h b/include/drm/amdgpu_drm.h index 307242ac..60c73233 100644
--- a/include/drm/amdgpu_drm.h
+++ b/include/drm/amdgpu_drm.h
@@ -1698,10 +1698,99 @@ struct drm_amdgpu_info_gpuvm_fault {
#define AMDGPU_FAMILY_GC_12_0_0 152 /* GC 12.0.0 */
/*
- * Supported Profiler Operations
+ * Supported SPM (Stream Performance Monitor) Operations
*/
+/**
+ * drm_amdgpu_spm_op - SPM ioctl operations
+ *
+ * @AMDGPU_SPM_OP_ACQUIRE: acquire exclusive access to SPM
+ * @AMDGPU_SPM_OP_RELEASE: release exclusive access to SPM
+ * @AMDGPU_SPM_OP_SET_DEST_BUF: set or unset destination buffer for SPM
+streaming */ enum drm_amdgpu_spm_op {
+ AMDGPU_SPM_OP_ACQUIRE,
+ AMDGPU_SPM_OP_RELEASE,
+ AMDGPU_SPM_OP_SET_DEST_BUF
+};
+
+/**
+ * drm_amdgpu_spm_args - Arguments for SPM ioctl
+ *
+ * @op[in]: specifies the operation to perform
+ * @dst_buf[in]: used for the address of the destination buffer
+ * in @AMDGPU_SPM_SET_DEST_BUFFER
+ * @buf_size[in]: size of the destination buffer
+ * @timeout[in/out]: [in]: timeout in milliseconds, [out]: amount of time left
+ * `in the timeout window
+ * @bytes_copied[out]: total amount of data that was copied to the previous dest_buf
+ * @has_data_loss: total count for sub-block which has data loss
+ *
+ * This ioctl performs different functions depending on the @op parameter.
+ *
+ * AMDGPU_SPM_OP_ACQUIRE
+ * ------------------------
+ *
+ * Acquires exclusive access of SPM on the specified for the calling process.
+ * This must be called before using AMDGPU_SPM_OP_SET_DEST_BUF.
+ *
+ * AMDGPU_SPM_OP_RELEASE
+ * ------------------------
+ *
+ * Releases exclusive access of SPM on the specified for the calling
+process,
+ * which allows another process to acquire it in the future.
+ *
+ * AMDGPU_SPM_OP_SET_DEST_BUF
+ * -----------------------------
+ *
+ * If @dst_buf is NULL, the destination buffer address is unset and
+copying of counters
+ * is stopped.
+ *
+ * If @dst_buf is not NULL, it specifies the pointer to a new destination buffer.
+ * @buf_size specifies the size of the buffer.
+ *
+ * If @timeout is non-0, the call will wait for up to @timeout ms for
+the previous
+ * buffer to be filled. If previous buffer to be filled before timeout,
+the @timeout
+ * will be updated value with the time remaining. If the timeout is
+exceeded, the function
+ * copies any partial data available into the previous user buffer and returns success.
+ * The amount of valid data in the previous user buffer is indicated by @bytes_copied.
+ *
+ * If @timeout is 0, the function immediately replaces the previous
+destination buffer
+ * without waiting for the previous buffer to be filled. That means the
+previous buffer
+ * may only be partially filled, and @bytes_copied will indicate how
+much data has been
+ * copied to it.
+ *
+ * If data was lost, e.g. due to a ring buffer overflow, @has_data_loss will be non-0.
+ *
+ * Returns negative error code on failure, 0 on success.
+ */
+struct drm_amdgpu_spm_args {
+ __u64 dest_buf;
+ __u32 buf_size;
+ __u32 op;
+ __u32 timeout;
+ __u32 bytes_copied;
+ __u32 has_data_loss;
+ __u32 pad;
+};
+
+/**
+ * drm_amdgpu_spm_buffer_header - SPM Buffer header for
+drm_amdgpu_spm_args->dest_buf
+ *
+ * @version [out]: spm version
+ * @bytes_copied [out]: amount of data for each sub-block
+ * @has_data_loss: [out]: boolean indicating whether data was lost for each sub-block
+ * (e.g. due to a ring-buffer overflow)
+ */
+struct drm_amdgpu_spm_buffer_header {
+ __u32 version; /* 0-23: minor 24-31: major */
+ __u32 bytes_copied;
+ __u32 has_data_loss;
+ __u32 reserved[5];
+};
+
enum drm_amdgpu_profiler_ops {
AMDGPU_PROFILER_VERSION = 0,
+ AMDGPU_PROFILER_SPM,
};
struct drm_amdgpu_profiler_args {
@@ -1711,6 +1800,7 @@ struct drm_amdgpu_profiler_args {
* lower 16 bit: minor
* higher 16 bit: major
*/
+ struct drm_amdgpu_spm_args spm;
};
};
--
2.34.1
^ permalink raw reply [flat|nested] 10+ messages in thread* [PATCH v2 2/2] amdgpu: Add Streaming Performance Monitor (SPM) data collection interface
2026-04-13 19:29 ` [PATCH 2/2] amdgpu: Add Streaming Performance Monitor (SPM) data collection interface James Zhu
2026-05-13 23:56 ` Ma, Bing
@ 2026-05-15 17:14 ` James Zhu
1 sibling, 0 replies; 10+ messages in thread
From: James Zhu @ 2026-05-15 17:14 UTC (permalink / raw)
To: amd-gfx; +Cc: alexander.deucher, Bing.Ma, Chris.Freehill, jamesz
This patch implements full Streaming Performance Monitor (SPM) support for
AMD GPUs, enabling real-time performance counter data collection directly
to userspace buffers.
The implementation provides three key operations:
- AMDGPU_SPM_OP_ACQUIRE: Acquire exclusive access to SPM hardware resources
- AMDGPU_SPM_OP_RELEASE: Release SPM hardware for use by other processes
- AMDGPU_SPM_OP_SET_DEST_BUF: Configure destination buffer and manage
counter data streaming
Key features:
- Flexible destination buffer management with configurable timeout behavior
- Automatic detection and reporting of data loss due to ring buffer overflow
- Support for partial buffer fills with explicit data size reporting
- Detailed kernel API documentation with operation semantics
The amdgpu_spm_setdestbuff() function allowing profiling tools to efficiently
collect performance data from the GPU. The timeout mechanism enables waiting
for a buffer to fill completely before switching to a new one, or immediate
switching with partial data preservation.
This enables performance analysis tools, and profiling frameworks to access
real-time GPU performance metrics without kernel-mode overhead.
Signed-off-by: James Zhu <James.Zhu@amd.com>
Reviewed-by: Bing Ma <Bing.Ma@amd.com>
---
amdgpu/amdgpu-symbols.txt | 3 ++
amdgpu/amdgpu.h | 38 ++++++++++++++++
amdgpu/amdgpu_profiler.c | 68 +++++++++++++++++++++++++++++
include/drm/amdgpu_drm.h | 92 ++++++++++++++++++++++++++++++++++++++-
4 files changed, 200 insertions(+), 1 deletion(-)
diff --git a/amdgpu/amdgpu-symbols.txt b/amdgpu/amdgpu-symbols.txt
index 69840193..e09e265a 100644
--- a/amdgpu/amdgpu-symbols.txt
+++ b/amdgpu/amdgpu-symbols.txt
@@ -89,3 +89,6 @@ amdgpu_free_userqueue
amdgpu_userq_signal
amdgpu_userq_wait
amdgpu_profiler_version
+amdgpu_spm_acquire
+amdgpu_spm_release
+amdgpu_spm_setdestbuff
diff --git a/amdgpu/amdgpu.h b/amdgpu/amdgpu.h
index 1519fe4c..0fdf5589 100644
--- a/amdgpu/amdgpu.h
+++ b/amdgpu/amdgpu.h
@@ -2130,7 +2130,45 @@ int amdgpu_cwsr_set_l2_trap_handler(amdgpu_device_handle dev,
*/
int amdgpu_profiler_version(amdgpu_device_handle dev, uint32_t *version);
+/**
+ * Acquire request exclusive use of SPM
+ * \param dev - \c [in] device handle
+ *
+ * \return 0 on success otherwise POSIX Error code
+ */
+int amdgpu_spm_acquire(amdgpu_device_handle dev);
+
+/**
+ * Release exclusive use of SPM
+ *
+ * \return 0 on success otherwise POSIX Error code
+ */
+int amdgpu_spm_release(amdgpu_device_handle dev);
+
+/**
+ * Set up the destination user mode buffer for stream performance
+ * counter data.
+ * \param dev - \c [in] device handle
+ * \param size_in_bytes - \c [in] size of the buffer
+ * \param timeout - \c [in/out] timeout in milliseconds
+ * \param size_copied - \c [in] number of bytes copied
+ * \param dest_mem_addr - \c [in] destination address. Set to NULL
+ * to stop copy on previous buffer
+ * \param is_spm_data_loss - \c [in] true if data was lost
+ *
+ * \return 0 on success otherwise POSIX Error code
+ */
+int amdgpu_spm_setdestbuff(
+ amdgpu_device_handle dev,
+ uint32_t size_in_bytes,
+ uint32_t *timeout,
+ uint32_t *size_copied,
+ void *dest_mem_addr,
+ bool *is_spm_data_loss
+ );
+
#ifdef __cplusplus
}
+
#endif
#endif /* #ifdef _AMDGPU_H_ */
diff --git a/amdgpu/amdgpu_profiler.c b/amdgpu/amdgpu_profiler.c
index b3d119bc..8f6488a8 100644
--- a/amdgpu/amdgpu_profiler.c
+++ b/amdgpu/amdgpu_profiler.c
@@ -47,3 +47,71 @@ amdgpu_profiler_version(amdgpu_device_handle dev, uint32_t *version)
return ret;
}
+
+drm_public int
+amdgpu_spm_acquire(amdgpu_device_handle dev)
+{
+ int ret;
+ struct drm_amdgpu_profiler_args user_arg;
+
+ if (!dev)
+ return -EINVAL;
+
+ memset(&user_arg, 0, sizeof(user_arg));
+ user_arg.op = AMDGPU_PROFILER_SPM;
+ user_arg.spm.op = AMDGPU_SPM_OP_ACQUIRE;
+
+ ret = drmCommandWriteRead(dev->fd, DRM_AMDGPU_PROFILER,
+ &user_arg, sizeof(user_arg));
+
+ return ret;
+}
+
+drm_public int
+amdgpu_spm_release(amdgpu_device_handle dev)
+{
+ struct drm_amdgpu_profiler_args user_arg;
+
+ if (!dev)
+ return -EINVAL;
+
+ memset(&user_arg, 0, sizeof(user_arg));
+ user_arg.op = AMDGPU_PROFILER_SPM;
+ user_arg.spm.op = AMDGPU_SPM_OP_RELEASE;
+
+ return drmCommandWriteRead(dev->fd, DRM_AMDGPU_PROFILER,
+ &user_arg, sizeof(user_arg));
+}
+
+drm_public int
+amdgpu_spm_setdestbuff(
+ amdgpu_device_handle dev,
+ uint32_t size_in_bytes,
+ uint32_t *timeout,
+ uint32_t *size_copied,
+ void *dest_mem_addr,
+ bool *is_spm_data_loss
+ )
+{
+ int ret;
+ struct drm_amdgpu_profiler_args user_arg;
+
+ if (!dev)
+ return -EINVAL;
+
+ memset(&user_arg, 0, sizeof(user_arg));
+ user_arg.op = AMDGPU_PROFILER_SPM;
+ user_arg.spm.op = AMDGPU_SPM_OP_SET_DEST_BUF;
+ user_arg.spm.timeout = *timeout;
+ user_arg.spm.dest_buf = (uint64_t)dest_mem_addr;
+ user_arg.spm.buf_size = size_in_bytes;
+
+ ret = drmCommandWriteRead(dev->fd, DRM_AMDGPU_PROFILER,
+ &user_arg, sizeof(user_arg));
+
+ *size_copied = user_arg.spm.bytes_copied;
+ *is_spm_data_loss = user_arg.spm.has_data_loss;
+ *timeout = user_arg.spm.timeout;
+
+ return ret;
+}
diff --git a/include/drm/amdgpu_drm.h b/include/drm/amdgpu_drm.h
index 307242ac..60c73233 100644
--- a/include/drm/amdgpu_drm.h
+++ b/include/drm/amdgpu_drm.h
@@ -1698,10 +1698,99 @@ struct drm_amdgpu_info_gpuvm_fault {
#define AMDGPU_FAMILY_GC_12_0_0 152 /* GC 12.0.0 */
/*
- * Supported Profiler Operations
+ * Supported SPM (Stream Performance Monitor) Operations
*/
+/**
+ * drm_amdgpu_spm_op - SPM ioctl operations
+ *
+ * @AMDGPU_SPM_OP_ACQUIRE: acquire exclusive access to SPM
+ * @AMDGPU_SPM_OP_RELEASE: release exclusive access to SPM
+ * @AMDGPU_SPM_OP_SET_DEST_BUF: set or unset destination buffer for SPM streaming
+ */
+enum drm_amdgpu_spm_op {
+ AMDGPU_SPM_OP_ACQUIRE,
+ AMDGPU_SPM_OP_RELEASE,
+ AMDGPU_SPM_OP_SET_DEST_BUF
+};
+
+/**
+ * drm_amdgpu_spm_args - Arguments for SPM ioctl
+ *
+ * @op[in]: specifies the operation to perform
+ * @dst_buf[in]: used for the address of the destination buffer
+ * in @AMDGPU_SPM_SET_DEST_BUFFER
+ * @buf_size[in]: size of the destination buffer
+ * @timeout[in/out]: [in]: timeout in milliseconds, [out]: amount of time left
+ * `in the timeout window
+ * @bytes_copied[out]: total amount of data that was copied to the previous dest_buf
+ * @has_data_loss: total count for sub-block which has data loss
+ *
+ * This ioctl performs different functions depending on the @op parameter.
+ *
+ * AMDGPU_SPM_OP_ACQUIRE
+ * ------------------------
+ *
+ * Acquires exclusive access of SPM on the specified for the calling process.
+ * This must be called before using AMDGPU_SPM_OP_SET_DEST_BUF.
+ *
+ * AMDGPU_SPM_OP_RELEASE
+ * ------------------------
+ *
+ * Releases exclusive access of SPM on the specified for the calling process,
+ * which allows another process to acquire it in the future.
+ *
+ * AMDGPU_SPM_OP_SET_DEST_BUF
+ * -----------------------------
+ *
+ * If @dst_buf is NULL, the destination buffer address is unset and copying of counters
+ * is stopped.
+ *
+ * If @dst_buf is not NULL, it specifies the pointer to a new destination buffer.
+ * @buf_size specifies the size of the buffer.
+ *
+ * If @timeout is non-0, the call will wait for up to @timeout ms for the previous
+ * buffer to be filled. If previous buffer to be filled before timeout, the @timeout
+ * will be updated value with the time remaining. If the timeout is exceeded, the function
+ * copies any partial data available into the previous user buffer and returns success.
+ * The amount of valid data in the previous user buffer is indicated by @bytes_copied.
+ *
+ * If @timeout is 0, the function immediately replaces the previous destination buffer
+ * without waiting for the previous buffer to be filled. That means the previous buffer
+ * may only be partially filled, and @bytes_copied will indicate how much data has been
+ * copied to it.
+ *
+ * If data was lost, e.g. due to a ring buffer overflow, @has_data_loss will be non-0.
+ *
+ * Returns negative error code on failure, 0 on success.
+ */
+struct drm_amdgpu_spm_args {
+ __u64 dest_buf;
+ __u32 buf_size;
+ __u32 op;
+ __u32 timeout;
+ __u32 bytes_copied;
+ __u32 has_data_loss;
+ __u32 pad;
+};
+
+/**
+ * drm_amdgpu_spm_buffer_header - SPM Buffer header for drm_amdgpu_spm_args->dest_buf
+ *
+ * @version [out]: spm version
+ * @bytes_copied [out]: amount of data for each sub-block
+ * @has_data_loss: [out]: boolean indicating whether data was lost for each sub-block
+ * (e.g. due to a ring-buffer overflow)
+ */
+struct drm_amdgpu_spm_buffer_header {
+ __u32 version; /* 0-23: minor 24-31: major */
+ __u32 bytes_copied;
+ __u32 has_data_loss;
+ __u32 reserved[5];
+};
+
enum drm_amdgpu_profiler_ops {
AMDGPU_PROFILER_VERSION = 0,
+ AMDGPU_PROFILER_SPM,
};
struct drm_amdgpu_profiler_args {
@@ -1711,6 +1800,7 @@ struct drm_amdgpu_profiler_args {
* lower 16 bit: minor
* higher 16 bit: major
*/
+ struct drm_amdgpu_spm_args spm;
};
};
--
2.34.1
^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [PATCH 1/2] amdgpu: Add profiler IOCTL interface for performance monitoring
2026-04-13 19:29 [PATCH 1/2] amdgpu: Add profiler IOCTL interface for performance monitoring James Zhu
2026-04-13 19:29 ` [PATCH 2/2] amdgpu: Add Streaming Performance Monitor (SPM) data collection interface James Zhu
@ 2026-04-16 14:06 ` James Zhu
2026-05-13 23:55 ` Ma, Bing
` (2 subsequent siblings)
4 siblings, 0 replies; 10+ messages in thread
From: James Zhu @ 2026-04-16 14:06 UTC (permalink / raw)
To: James Zhu, amd-gfx, alexander.deucher, Bing.Ma, David.Francis
Cc: Jesse.Zhang, Jenny-Jing.Liu
Ping ...
On 2026-04-13 15:29, James Zhu wrote:
> This patch introduces the foundational profiler infrastructure for AMD GPUs,
> enabling userspace access to performance monitoring capabilities including:
> - Performance Monitoring Counters (PMC)
> - Performance Counter Sampling (PC Sampling)
> - Streaming Performance Monitor (SPM)
>
> The implementation includes:
> - New DRM_AMDGPU_PROFILER IOCTL interface with version query support
> - amdgpu_profiler_version() wrapper function for userspace
> - Profiler operation enumeration and argument structures in the kernel API
> - Build system integration for the new profiler module
>
> The version query operation allows userspace to determine profiler capability
> and compatibility before attempting to use advanced profiling features. Future
> patches will extend this with additional profiler operations (acquire, release,
> configure sampling buffers, etc.).
>
> This foundation enables profiling tools and performance analysis frameworks to
> access GPU performance data directly from userspace, supporting performance
> debugging and optimization workflows.
>
> Signed-off-by: James Zhu <James.Zhu@amd.com>
> ---
> amdgpu/amdgpu.h | 9 ++++++++
> amdgpu/amdgpu_profiler.c | 46 ++++++++++++++++++++++++++++++++++++++++
> amdgpu/meson.build | 2 +-
> include/drm/amdgpu_drm.h | 19 +++++++++++++++++
> 4 files changed, 75 insertions(+), 1 deletion(-)
> create mode 100644 amdgpu/amdgpu_profiler.c
>
> diff --git a/amdgpu/amdgpu.h b/amdgpu/amdgpu.h
> index 53144f59..4ec1f6b6 100644
> --- a/amdgpu/amdgpu.h
> +++ b/amdgpu/amdgpu.h
> @@ -2120,6 +2120,15 @@ int amdgpu_userq_wait(amdgpu_device_handle dev,
> int amdgpu_cwsr_set_l2_trap_handler(amdgpu_device_handle dev,
> uint64_t tba_addr, uint64_t tba_size,
> uint64_t tma_addr, uint64_t tma_size);
> +
> +/**
> + * Acquire profiler version
> + * \param dev - \c [in] device handle
> + *
> + * \return 0 on success otherwise POSIX Error code
> + */
> +int amdgpu_profiler_version(amdgpu_device_handle dev);
> +
> #ifdef __cplusplus
> }
> #endif
> diff --git a/amdgpu/amdgpu_profiler.c b/amdgpu/amdgpu_profiler.c
> new file mode 100644
> index 00000000..8d4dffe4
> --- /dev/null
> +++ b/amdgpu/amdgpu_profiler.c
> @@ -0,0 +1,46 @@
> +/*
> + * Copyright 2026 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + *
> + */
> +
> +#include <string.h>
> +#include <errno.h>
> +#include "xf86drm.h"
> +#include "amdgpu_drm.h"
> +#include "amdgpu_internal.h"
> +
> +drm_public int
> +amdgpu_profiler_version(amdgpu_device_handle dev)
> +{
> + int ret;
> + struct drm_amdgpu_profiler_args user_arg;
> +
> + if (!dev)
> + return -EINVAL;
> +
> + memset(&user_arg, 0, sizeof(user_arg));
> + user_arg.op = AMDGPU_PROFILER_VERSION;
> +
> + ret = drmCommandWriteRead(dev->fd, DRM_AMDGPU_PROFILER,
> + &user_arg, sizeof(user_arg));
> +
> + return ret;
> +}
> diff --git a/amdgpu/meson.build b/amdgpu/meson.build
> index 3962d32c..d781f2e9 100644
> --- a/amdgpu/meson.build
> +++ b/amdgpu/meson.build
> @@ -27,7 +27,7 @@ libdrm_amdgpu = library(
> files(
> 'amdgpu_asic_id.c', 'amdgpu_bo.c', 'amdgpu_cs.c', 'amdgpu_device.c',
> 'amdgpu_gpu_info.c', 'amdgpu_vamgr.c', 'amdgpu_vm.c', 'handle_table.c',
> - 'amdgpu_userq.c',
> + 'amdgpu_userq.c', 'amdgpu_profiler.c',
> ),
> config_file,
> ],
> diff --git a/include/drm/amdgpu_drm.h b/include/drm/amdgpu_drm.h
> index ef12e725..307242ac 100644
> --- a/include/drm/amdgpu_drm.h
> +++ b/include/drm/amdgpu_drm.h
> @@ -58,6 +58,7 @@ extern "C" {
> #define DRM_AMDGPU_USERQ_SIGNAL 0x17
> #define DRM_AMDGPU_USERQ_WAIT 0x18
> #define DRM_AMDGPU_CWSR 0x20
> +#define DRM_AMDGPU_PROFILER 0x21
>
> #define DRM_IOCTL_AMDGPU_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
> #define DRM_IOCTL_AMDGPU_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
> @@ -79,6 +80,7 @@ extern "C" {
> #define DRM_IOCTL_AMDGPU_USERQ_SIGNAL DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_SIGNAL, struct drm_amdgpu_userq_signal)
> #define DRM_IOCTL_AMDGPU_USERQ_WAIT DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_WAIT, struct drm_amdgpu_userq_wait)
> #define DRM_IOCTL_AMDGPU_CWSR DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_CWSR, union drm_amdgpu_cwsr)
> +#define DRM_IOCTL_AMDGPU_PROFILER DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_PROFILER, struct drm_amdgpu_profiler_args)
>
> /**
> * DOC: memory domains
> @@ -1695,6 +1697,23 @@ struct drm_amdgpu_info_gpuvm_fault {
> #define AMDGPU_FAMILY_GC_11_5_0 150 /* GC 11.5.0 */
> #define AMDGPU_FAMILY_GC_12_0_0 152 /* GC 12.0.0 */
>
> +/*
> + * Supported Profiler Operations
> + */
> +enum drm_amdgpu_profiler_ops {
> + AMDGPU_PROFILER_VERSION = 0,
> +};
> +
> +struct drm_amdgpu_profiler_args {
> + __u32 op; /* amdgpu_profiler_op */
> + union {
> + __u32 version; /* AMDGPU_PROFILER_VERSION_NUM
> + * lower 16 bit: minor
> + * higher 16 bit: major
> + */
> + };
> +};
> +
> #if defined(__cplusplus)
> }
> #endif
^ permalink raw reply [flat|nested] 10+ messages in thread* RE: [PATCH 1/2] amdgpu: Add profiler IOCTL interface for performance monitoring
2026-04-13 19:29 [PATCH 1/2] amdgpu: Add profiler IOCTL interface for performance monitoring James Zhu
2026-04-13 19:29 ` [PATCH 2/2] amdgpu: Add Streaming Performance Monitor (SPM) data collection interface James Zhu
2026-04-16 14:06 ` [PATCH 1/2] amdgpu: Add profiler IOCTL interface for performance monitoring James Zhu
@ 2026-05-13 23:55 ` Ma, Bing
2026-05-15 15:34 ` James Zhu
2026-05-15 17:13 ` [PATCH v2 " James Zhu
2026-05-15 17:20 ` [PATCH v2 1/2] amdgpu: " James Zhu
4 siblings, 1 reply; 10+ messages in thread
From: Ma, Bing @ 2026-05-13 23:55 UTC (permalink / raw)
To: Zhu, James, amd-gfx@lists.freedesktop.org, Deucher, Alexander,
Francis, David
Cc: Zhang, Jesse(Jie), Liu, Jenny (Jing)
AMD General
Hi James,
I reviewed PATCH 1/2 together with the adjacent PATCH 2/2. A couple of real issues stood out on the userspace/libdrm side:
1. `amdgpu_profiler_version()` does not expose the returned version value. The ioctl populates `user_arg.version`, but the wrapper only returns the ioctl status and gives userspace no way to read the version.
2. `amdgpu_profiler_version` is missing from `amdgpu/amdgpu-symbols.txt`. PATCH 2/2 adds the three new SPM symbols there, so this looks accidental.
Not sure if this is intended behavior for now.
Other than that, I did not notice anything else I would block on at this WIP stage.
Thanks,
Bing
Signed-off-by: James Zhu <James.Zhu@amd.com>
Reviewed-by: Bing Ma <Bing.Ma@amd.com>
-----Original Message-----
From: Zhu, James <James.Zhu@amd.com>
Sent: Monday, April 13, 2026 12:30 PM
To: amd-gfx@lists.freedesktop.org; Deucher, Alexander <Alexander.Deucher@amd.com>; Ma, Bing <Bing.Ma@amd.com>; Francis, David <David.Francis@amd.com>
Cc: Zhang, Jesse(Jie) <Jesse.Zhang@amd.com>; Liu, Jenny (Jing) <Jenny-Jing.Liu@amd.com>; Zhu, James <James.Zhu@amd.com>
Subject: [PATCH 1/2] amdgpu: Add profiler IOCTL interface for performance monitoring
This patch introduces the foundational profiler infrastructure for AMD GPUs, enabling userspace access to performance monitoring capabilities including:
- Performance Monitoring Counters (PMC)
- Performance Counter Sampling (PC Sampling)
- Streaming Performance Monitor (SPM)
The implementation includes:
- New DRM_AMDGPU_PROFILER IOCTL interface with version query support
- amdgpu_profiler_version() wrapper function for userspace
- Profiler operation enumeration and argument structures in the kernel API
- Build system integration for the new profiler module
The version query operation allows userspace to determine profiler capability and compatibility before attempting to use advanced profiling features. Future patches will extend this with additional profiler operations (acquire, release, configure sampling buffers, etc.).
This foundation enables profiling tools and performance analysis frameworks to access GPU performance data directly from userspace, supporting performance debugging and optimization workflows.
Signed-off-by: James Zhu <James.Zhu@amd.com>
---
amdgpu/amdgpu.h | 9 ++++++++
amdgpu/amdgpu_profiler.c | 46 ++++++++++++++++++++++++++++++++++++++++
amdgpu/meson.build | 2 +-
include/drm/amdgpu_drm.h | 19 +++++++++++++++++
4 files changed, 75 insertions(+), 1 deletion(-) create mode 100644 amdgpu/amdgpu_profiler.c
diff --git a/amdgpu/amdgpu.h b/amdgpu/amdgpu.h index 53144f59..4ec1f6b6 100644
--- a/amdgpu/amdgpu.h
+++ b/amdgpu/amdgpu.h
@@ -2120,6 +2120,15 @@ int amdgpu_userq_wait(amdgpu_device_handle dev, int amdgpu_cwsr_set_l2_trap_handler(amdgpu_device_handle dev,
uint64_t tba_addr, uint64_t tba_size,
uint64_t tma_addr, uint64_t tma_size);
+
+/**
+ * Acquire profiler version
+ * \param dev - \c [in] device handle
+ *
+ * \return 0 on success otherwise POSIX Error code */ int
+amdgpu_profiler_version(amdgpu_device_handle dev);
+
#ifdef __cplusplus
}
#endif
diff --git a/amdgpu/amdgpu_profiler.c b/amdgpu/amdgpu_profiler.c new file mode 100644 index 00000000..8d4dffe4
--- /dev/null
+++ b/amdgpu/amdgpu_profiler.c
@@ -0,0 +1,46 @@
+/*
+ * Copyright 2026 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person
+obtaining a
+ * copy of this software and associated documentation files (the
+"Software"),
+ * to deal in the Software without restriction, including without
+limitation
+ * the rights to use, copy, modify, merge, publish, distribute,
+sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom
+the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
+SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM,
+DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
+OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <string.h>
+#include <errno.h>
+#include "xf86drm.h"
+#include "amdgpu_drm.h"
+#include "amdgpu_internal.h"
+
+drm_public int
+amdgpu_profiler_version(amdgpu_device_handle dev) {
+ int ret;
+ struct drm_amdgpu_profiler_args user_arg;
+
+ if (!dev)
+ return -EINVAL;
+
+ memset(&user_arg, 0, sizeof(user_arg));
+ user_arg.op = AMDGPU_PROFILER_VERSION;
+
+ ret = drmCommandWriteRead(dev->fd, DRM_AMDGPU_PROFILER,
+ &user_arg, sizeof(user_arg));
+
+ return ret;
+}
diff --git a/amdgpu/meson.build b/amdgpu/meson.build index 3962d32c..d781f2e9 100644
--- a/amdgpu/meson.build
+++ b/amdgpu/meson.build
@@ -27,7 +27,7 @@ libdrm_amdgpu = library(
files(
'amdgpu_asic_id.c', 'amdgpu_bo.c', 'amdgpu_cs.c', 'amdgpu_device.c',
'amdgpu_gpu_info.c', 'amdgpu_vamgr.c', 'amdgpu_vm.c', 'handle_table.c',
- 'amdgpu_userq.c',
+ 'amdgpu_userq.c', 'amdgpu_profiler.c',
),
config_file,
],
diff --git a/include/drm/amdgpu_drm.h b/include/drm/amdgpu_drm.h index ef12e725..307242ac 100644
--- a/include/drm/amdgpu_drm.h
+++ b/include/drm/amdgpu_drm.h
@@ -58,6 +58,7 @@ extern "C" {
#define DRM_AMDGPU_USERQ_SIGNAL 0x17
#define DRM_AMDGPU_USERQ_WAIT 0x18
#define DRM_AMDGPU_CWSR 0x20
+#define DRM_AMDGPU_PROFILER 0x21
#define DRM_IOCTL_AMDGPU_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
#define DRM_IOCTL_AMDGPU_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
@@ -79,6 +80,7 @@ extern "C" {
#define DRM_IOCTL_AMDGPU_USERQ_SIGNAL DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_SIGNAL, struct drm_amdgpu_userq_signal)
#define DRM_IOCTL_AMDGPU_USERQ_WAIT DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_WAIT, struct drm_amdgpu_userq_wait)
#define DRM_IOCTL_AMDGPU_CWSR DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_CWSR, union drm_amdgpu_cwsr)
+#define DRM_IOCTL_AMDGPU_PROFILER DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_PROFILER, struct drm_amdgpu_profiler_args)
/**
* DOC: memory domains
@@ -1695,6 +1697,23 @@ struct drm_amdgpu_info_gpuvm_fault {
#define AMDGPU_FAMILY_GC_11_5_0 150 /* GC 11.5.0 */
#define AMDGPU_FAMILY_GC_12_0_0 152 /* GC 12.0.0 */
+/*
+ * Supported Profiler Operations
+ */
+enum drm_amdgpu_profiler_ops {
+ AMDGPU_PROFILER_VERSION = 0,
+};
+
+struct drm_amdgpu_profiler_args {
+ __u32 op; /* amdgpu_profiler_op */
+ union {
+ __u32 version; /* AMDGPU_PROFILER_VERSION_NUM
+ * lower 16 bit: minor
+ * higher 16 bit: major
+ */
+ };
+};
+
#if defined(__cplusplus)
}
#endif
--
2.34.1
^ permalink raw reply [flat|nested] 10+ messages in thread* Re: [PATCH 1/2] amdgpu: Add profiler IOCTL interface for performance monitoring
2026-05-13 23:55 ` Ma, Bing
@ 2026-05-15 15:34 ` James Zhu
0 siblings, 0 replies; 10+ messages in thread
From: James Zhu @ 2026-05-15 15:34 UTC (permalink / raw)
To: Ma, Bing, Zhu, James, amd-gfx@lists.freedesktop.org,
Deucher, Alexander, Francis, David
Cc: Zhang, Jesse(Jie), Liu, Jenny (Jing)
[-- Attachment #1: Type: text/plain, Size: 8088 bytes --]
Hi Bing
Thanks! Answer is n line.
Best Regards!
James Zhu
On 2026-05-13 19:55, Ma, Bing wrote:
> AMD General
>
> Hi James,
>
> I reviewed PATCH 1/2 together with the adjacent PATCH 2/2. A couple of real issues stood out on the userspace/libdrm side:
> 1. `amdgpu_profiler_version()` does not expose the returned version value. The ioctl populates `user_arg.version`, but the wrapper only returns the ioctl status and gives userspace no way to read the version.
[JZ] Good point. I will add version return to caller.
> 2. `amdgpu_profiler_version` is missing from `amdgpu/amdgpu-symbols.txt`. PATCH 2/2 adds the three new SPM symbols there, so this looks accidental.
[JZ]Somehow amdgpu_profiler_version was missing. I will add it.
> Not sure if this is intended behavior for now.
>
> Other than that, I did not notice anything else I would block on at this WIP stage.
>
> Thanks,
> Bing
>
> Signed-off-by: James Zhu<James.Zhu@amd.com>
> Reviewed-by: Bing Ma<Bing.Ma@amd.com>
>
> -----Original Message-----
> From: Zhu, James<James.Zhu@amd.com>
> Sent: Monday, April 13, 2026 12:30 PM
> To:amd-gfx@lists.freedesktop.org; Deucher, Alexander<Alexander.Deucher@amd.com>; Ma, Bing<Bing.Ma@amd.com>; Francis, David<David.Francis@amd.com>
> Cc: Zhang, Jesse(Jie)<Jesse.Zhang@amd.com>; Liu, Jenny (Jing)<Jenny-Jing.Liu@amd.com>; Zhu, James<James.Zhu@amd.com>
> Subject: [PATCH 1/2] amdgpu: Add profiler IOCTL interface for performance monitoring
>
> This patch introduces the foundational profiler infrastructure for AMD GPUs, enabling userspace access to performance monitoring capabilities including:
> - Performance Monitoring Counters (PMC)
> - Performance Counter Sampling (PC Sampling)
> - Streaming Performance Monitor (SPM)
>
> The implementation includes:
> - New DRM_AMDGPU_PROFILER IOCTL interface with version query support
> - amdgpu_profiler_version() wrapper function for userspace
> - Profiler operation enumeration and argument structures in the kernel API
> - Build system integration for the new profiler module
>
> The version query operation allows userspace to determine profiler capability and compatibility before attempting to use advanced profiling features. Future patches will extend this with additional profiler operations (acquire, release, configure sampling buffers, etc.).
>
> This foundation enables profiling tools and performance analysis frameworks to access GPU performance data directly from userspace, supporting performance debugging and optimization workflows.
>
> Signed-off-by: James Zhu<James.Zhu@amd.com>
> ---
> amdgpu/amdgpu.h | 9 ++++++++
> amdgpu/amdgpu_profiler.c | 46 ++++++++++++++++++++++++++++++++++++++++
> amdgpu/meson.build | 2 +-
> include/drm/amdgpu_drm.h | 19 +++++++++++++++++
> 4 files changed, 75 insertions(+), 1 deletion(-) create mode 100644 amdgpu/amdgpu_profiler.c
>
> diff --git a/amdgpu/amdgpu.h b/amdgpu/amdgpu.h index 53144f59..4ec1f6b6 100644
> --- a/amdgpu/amdgpu.h
> +++ b/amdgpu/amdgpu.h
> @@ -2120,6 +2120,15 @@ int amdgpu_userq_wait(amdgpu_device_handle dev, int amdgpu_cwsr_set_l2_trap_handler(amdgpu_device_handle dev,
> uint64_t tba_addr, uint64_t tba_size,
> uint64_t tma_addr, uint64_t tma_size);
> +
> +/**
> + * Acquire profiler version
> + * \param dev - \c [in] device handle
> + *
> + * \return 0 on success otherwise POSIX Error code */ int
> +amdgpu_profiler_version(amdgpu_device_handle dev);
> +
> #ifdef __cplusplus
> }
> #endif
> diff --git a/amdgpu/amdgpu_profiler.c b/amdgpu/amdgpu_profiler.c new file mode 100644 index 00000000..8d4dffe4
> --- /dev/null
> +++ b/amdgpu/amdgpu_profiler.c
> @@ -0,0 +1,46 @@
> +/*
> + * Copyright 2026 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person
> +obtaining a
> + * copy of this software and associated documentation files (the
> +"Software"),
> + * to deal in the Software without restriction, including without
> +limitation
> + * the rights to use, copy, modify, merge, publish, distribute,
> +sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom
> +the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be
> +included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> +EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> +MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
> +SHALL
> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM,
> +DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
> +OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
> +OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + *
> + */
> +
> +#include <string.h>
> +#include <errno.h>
> +#include "xf86drm.h"
> +#include "amdgpu_drm.h"
> +#include "amdgpu_internal.h"
> +
> +drm_public int
> +amdgpu_profiler_version(amdgpu_device_handle dev) {
> + int ret;
> + struct drm_amdgpu_profiler_args user_arg;
> +
> + if (!dev)
> + return -EINVAL;
> +
> + memset(&user_arg, 0, sizeof(user_arg));
> + user_arg.op = AMDGPU_PROFILER_VERSION;
> +
> + ret = drmCommandWriteRead(dev->fd, DRM_AMDGPU_PROFILER,
> + &user_arg, sizeof(user_arg));
> +
> + return ret;
> +}
> diff --git a/amdgpu/meson.build b/amdgpu/meson.build index 3962d32c..d781f2e9 100644
> --- a/amdgpu/meson.build
> +++ b/amdgpu/meson.build
> @@ -27,7 +27,7 @@ libdrm_amdgpu = library(
> files(
> 'amdgpu_asic_id.c', 'amdgpu_bo.c', 'amdgpu_cs.c', 'amdgpu_device.c',
> 'amdgpu_gpu_info.c', 'amdgpu_vamgr.c', 'amdgpu_vm.c', 'handle_table.c',
> - 'amdgpu_userq.c',
> + 'amdgpu_userq.c', 'amdgpu_profiler.c',
> ),
> config_file,
> ],
> diff --git a/include/drm/amdgpu_drm.h b/include/drm/amdgpu_drm.h index ef12e725..307242ac 100644
> --- a/include/drm/amdgpu_drm.h
> +++ b/include/drm/amdgpu_drm.h
> @@ -58,6 +58,7 @@ extern "C" {
> #define DRM_AMDGPU_USERQ_SIGNAL 0x17
> #define DRM_AMDGPU_USERQ_WAIT 0x18
> #define DRM_AMDGPU_CWSR 0x20
> +#define DRM_AMDGPU_PROFILER 0x21
>
> #define DRM_IOCTL_AMDGPU_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
> #define DRM_IOCTL_AMDGPU_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
> @@ -79,6 +80,7 @@ extern "C" {
> #define DRM_IOCTL_AMDGPU_USERQ_SIGNAL DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_SIGNAL, struct drm_amdgpu_userq_signal)
> #define DRM_IOCTL_AMDGPU_USERQ_WAIT DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_WAIT, struct drm_amdgpu_userq_wait)
> #define DRM_IOCTL_AMDGPU_CWSR DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_CWSR, union drm_amdgpu_cwsr)
> +#define DRM_IOCTL_AMDGPU_PROFILER DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_PROFILER, struct drm_amdgpu_profiler_args)
>
> /**
> * DOC: memory domains
> @@ -1695,6 +1697,23 @@ struct drm_amdgpu_info_gpuvm_fault {
> #define AMDGPU_FAMILY_GC_11_5_0 150 /* GC 11.5.0 */
> #define AMDGPU_FAMILY_GC_12_0_0 152 /* GC 12.0.0 */
>
> +/*
> + * Supported Profiler Operations
> + */
> +enum drm_amdgpu_profiler_ops {
> + AMDGPU_PROFILER_VERSION = 0,
> +};
> +
> +struct drm_amdgpu_profiler_args {
> + __u32 op; /* amdgpu_profiler_op */
> + union {
> + __u32 version; /* AMDGPU_PROFILER_VERSION_NUM
> + * lower 16 bit: minor
> + * higher 16 bit: major
> + */
> + };
> +};
> +
> #if defined(__cplusplus)
> }
> #endif
> --
> 2.34.1
>
[-- Attachment #2: Type: text/html, Size: 9541 bytes --]
^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH v2 1/2] amdgpu: Add profiler IOCTL interface for performance monitoring
2026-04-13 19:29 [PATCH 1/2] amdgpu: Add profiler IOCTL interface for performance monitoring James Zhu
` (2 preceding siblings ...)
2026-05-13 23:55 ` Ma, Bing
@ 2026-05-15 17:13 ` James Zhu
2026-05-15 17:17 ` James Zhu
2026-05-15 17:20 ` [PATCH v2 1/2] amdgpu: " James Zhu
4 siblings, 1 reply; 10+ messages in thread
From: James Zhu @ 2026-05-15 17:13 UTC (permalink / raw)
To: amd-gfx; +Cc: alexander.deucher, Bing.Ma, Chris.Freehill, jamesz
This patch introduces the foundational profiler infrastructure for AMD GPUs,
enabling userspace access to performance monitoring capabilities including:
- Performance Monitoring Counters (PMC)
- Performance Counter Sampling (PC Sampling)
- Streaming Performance Monitor (SPM)
The implementation includes:
- New DRM_AMDGPU_PROFILER IOCTL interface with version query support
- amdgpu_profiler_version() wrapper function for userspace
- Profiler operation enumeration and argument structures in the kernel API
- Build system integration for the new profiler module
The version query operation allows userspace to determine profiler capability
and compatibility before attempting to use advanced profiling features. Future
patches will extend this with additional profiler operations (acquire, release,
configure sampling buffers, etc.).
This foundation enables profiling tools and performance analysis frameworks to
access GPU performance data directly from userspace, supporting performance
debugging and optimization workflows.
Signed-off-by: James Zhu <James.Zhu@amd.com>
---
amdgpu/amdgpu.h | 9 ++++++++
amdgpu/amdgpu_profiler.c | 46 ++++++++++++++++++++++++++++++++++++++++
amdgpu/meson.build | 2 +-
include/drm/amdgpu_drm.h | 19 +++++++++++++++++
4 files changed, 75 insertions(+), 1 deletion(-)
create mode 100644 amdgpu/amdgpu_profiler.c
diff --git a/amdgpu/amdgpu.h b/amdgpu/amdgpu.h
index 53144f59..4ec1f6b6 100644
--- a/amdgpu/amdgpu.h
+++ b/amdgpu/amdgpu.h
@@ -2120,6 +2120,15 @@ int amdgpu_userq_wait(amdgpu_device_handle dev,
int amdgpu_cwsr_set_l2_trap_handler(amdgpu_device_handle dev,
uint64_t tba_addr, uint64_t tba_size,
uint64_t tma_addr, uint64_t tma_size);
+
+/**
+ * Acquire profiler version
+ * \param dev - \c [in] device handle
+ *
+ * \return 0 on success otherwise POSIX Error code
+ */
+int amdgpu_profiler_version(amdgpu_device_handle dev);
+
#ifdef __cplusplus
}
#endif
diff --git a/amdgpu/amdgpu_profiler.c b/amdgpu/amdgpu_profiler.c
new file mode 100644
index 00000000..8d4dffe4
--- /dev/null
+++ b/amdgpu/amdgpu_profiler.c
@@ -0,0 +1,46 @@
+/*
+ * Copyright 2026 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <string.h>
+#include <errno.h>
+#include "xf86drm.h"
+#include "amdgpu_drm.h"
+#include "amdgpu_internal.h"
+
+drm_public int
+amdgpu_profiler_version(amdgpu_device_handle dev)
+{
+ int ret;
+ struct drm_amdgpu_profiler_args user_arg;
+
+ if (!dev)
+ return -EINVAL;
+
+ memset(&user_arg, 0, sizeof(user_arg));
+ user_arg.op = AMDGPU_PROFILER_VERSION;
+
+ ret = drmCommandWriteRead(dev->fd, DRM_AMDGPU_PROFILER,
+ &user_arg, sizeof(user_arg));
+
+ return ret;
+}
diff --git a/amdgpu/meson.build b/amdgpu/meson.build
index 3962d32c..d781f2e9 100644
--- a/amdgpu/meson.build
+++ b/amdgpu/meson.build
@@ -27,7 +27,7 @@ libdrm_amdgpu = library(
files(
'amdgpu_asic_id.c', 'amdgpu_bo.c', 'amdgpu_cs.c', 'amdgpu_device.c',
'amdgpu_gpu_info.c', 'amdgpu_vamgr.c', 'amdgpu_vm.c', 'handle_table.c',
- 'amdgpu_userq.c',
+ 'amdgpu_userq.c', 'amdgpu_profiler.c',
),
config_file,
],
diff --git a/include/drm/amdgpu_drm.h b/include/drm/amdgpu_drm.h
index ef12e725..307242ac 100644
--- a/include/drm/amdgpu_drm.h
+++ b/include/drm/amdgpu_drm.h
@@ -58,6 +58,7 @@ extern "C" {
#define DRM_AMDGPU_USERQ_SIGNAL 0x17
#define DRM_AMDGPU_USERQ_WAIT 0x18
#define DRM_AMDGPU_CWSR 0x20
+#define DRM_AMDGPU_PROFILER 0x21
#define DRM_IOCTL_AMDGPU_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
#define DRM_IOCTL_AMDGPU_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
@@ -79,6 +80,7 @@ extern "C" {
#define DRM_IOCTL_AMDGPU_USERQ_SIGNAL DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_SIGNAL, struct drm_amdgpu_userq_signal)
#define DRM_IOCTL_AMDGPU_USERQ_WAIT DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_WAIT, struct drm_amdgpu_userq_wait)
#define DRM_IOCTL_AMDGPU_CWSR DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_CWSR, union drm_amdgpu_cwsr)
+#define DRM_IOCTL_AMDGPU_PROFILER DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_PROFILER, struct drm_amdgpu_profiler_args)
/**
* DOC: memory domains
@@ -1695,6 +1697,23 @@ struct drm_amdgpu_info_gpuvm_fault {
#define AMDGPU_FAMILY_GC_11_5_0 150 /* GC 11.5.0 */
#define AMDGPU_FAMILY_GC_12_0_0 152 /* GC 12.0.0 */
+/*
+ * Supported Profiler Operations
+ */
+enum drm_amdgpu_profiler_ops {
+ AMDGPU_PROFILER_VERSION = 0,
+};
+
+struct drm_amdgpu_profiler_args {
+ __u32 op; /* amdgpu_profiler_op */
+ union {
+ __u32 version; /* AMDGPU_PROFILER_VERSION_NUM
+ * lower 16 bit: minor
+ * higher 16 bit: major
+ */
+ };
+};
+
#if defined(__cplusplus)
}
#endif
--
2.34.1
^ permalink raw reply related [flat|nested] 10+ messages in thread* Re: [PATCH v2 1/2] amdgpu: Add profiler IOCTL interface for performance monitoring
2026-05-15 17:13 ` [PATCH v2 " James Zhu
@ 2026-05-15 17:17 ` James Zhu
0 siblings, 0 replies; 10+ messages in thread
From: James Zhu @ 2026-05-15 17:17 UTC (permalink / raw)
To: James Zhu, amd-gfx; +Cc: alexander.deucher, Bing.Ma, Chris.Freehill
drop this version.
Thanks!
On 2026-05-15 13:13, James Zhu wrote:
> This patch introduces the foundational profiler infrastructure for AMD GPUs,
> enabling userspace access to performance monitoring capabilities including:
> - Performance Monitoring Counters (PMC)
> - Performance Counter Sampling (PC Sampling)
> - Streaming Performance Monitor (SPM)
>
> The implementation includes:
> - New DRM_AMDGPU_PROFILER IOCTL interface with version query support
> - amdgpu_profiler_version() wrapper function for userspace
> - Profiler operation enumeration and argument structures in the kernel API
> - Build system integration for the new profiler module
>
> The version query operation allows userspace to determine profiler capability
> and compatibility before attempting to use advanced profiling features. Future
> patches will extend this with additional profiler operations (acquire, release,
> configure sampling buffers, etc.).
>
> This foundation enables profiling tools and performance analysis frameworks to
> access GPU performance data directly from userspace, supporting performance
> debugging and optimization workflows.
>
> Signed-off-by: James Zhu <James.Zhu@amd.com>
> ---
> amdgpu/amdgpu.h | 9 ++++++++
> amdgpu/amdgpu_profiler.c | 46 ++++++++++++++++++++++++++++++++++++++++
> amdgpu/meson.build | 2 +-
> include/drm/amdgpu_drm.h | 19 +++++++++++++++++
> 4 files changed, 75 insertions(+), 1 deletion(-)
> create mode 100644 amdgpu/amdgpu_profiler.c
>
> diff --git a/amdgpu/amdgpu.h b/amdgpu/amdgpu.h
> index 53144f59..4ec1f6b6 100644
> --- a/amdgpu/amdgpu.h
> +++ b/amdgpu/amdgpu.h
> @@ -2120,6 +2120,15 @@ int amdgpu_userq_wait(amdgpu_device_handle dev,
> int amdgpu_cwsr_set_l2_trap_handler(amdgpu_device_handle dev,
> uint64_t tba_addr, uint64_t tba_size,
> uint64_t tma_addr, uint64_t tma_size);
> +
> +/**
> + * Acquire profiler version
> + * \param dev - \c [in] device handle
> + *
> + * \return 0 on success otherwise POSIX Error code
> + */
> +int amdgpu_profiler_version(amdgpu_device_handle dev);
> +
> #ifdef __cplusplus
> }
> #endif
> diff --git a/amdgpu/amdgpu_profiler.c b/amdgpu/amdgpu_profiler.c
> new file mode 100644
> index 00000000..8d4dffe4
> --- /dev/null
> +++ b/amdgpu/amdgpu_profiler.c
> @@ -0,0 +1,46 @@
> +/*
> + * Copyright 2026 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + *
> + */
> +
> +#include <string.h>
> +#include <errno.h>
> +#include "xf86drm.h"
> +#include "amdgpu_drm.h"
> +#include "amdgpu_internal.h"
> +
> +drm_public int
> +amdgpu_profiler_version(amdgpu_device_handle dev)
> +{
> + int ret;
> + struct drm_amdgpu_profiler_args user_arg;
> +
> + if (!dev)
> + return -EINVAL;
> +
> + memset(&user_arg, 0, sizeof(user_arg));
> + user_arg.op = AMDGPU_PROFILER_VERSION;
> +
> + ret = drmCommandWriteRead(dev->fd, DRM_AMDGPU_PROFILER,
> + &user_arg, sizeof(user_arg));
> +
> + return ret;
> +}
> diff --git a/amdgpu/meson.build b/amdgpu/meson.build
> index 3962d32c..d781f2e9 100644
> --- a/amdgpu/meson.build
> +++ b/amdgpu/meson.build
> @@ -27,7 +27,7 @@ libdrm_amdgpu = library(
> files(
> 'amdgpu_asic_id.c', 'amdgpu_bo.c', 'amdgpu_cs.c', 'amdgpu_device.c',
> 'amdgpu_gpu_info.c', 'amdgpu_vamgr.c', 'amdgpu_vm.c', 'handle_table.c',
> - 'amdgpu_userq.c',
> + 'amdgpu_userq.c', 'amdgpu_profiler.c',
> ),
> config_file,
> ],
> diff --git a/include/drm/amdgpu_drm.h b/include/drm/amdgpu_drm.h
> index ef12e725..307242ac 100644
> --- a/include/drm/amdgpu_drm.h
> +++ b/include/drm/amdgpu_drm.h
> @@ -58,6 +58,7 @@ extern "C" {
> #define DRM_AMDGPU_USERQ_SIGNAL 0x17
> #define DRM_AMDGPU_USERQ_WAIT 0x18
> #define DRM_AMDGPU_CWSR 0x20
> +#define DRM_AMDGPU_PROFILER 0x21
>
> #define DRM_IOCTL_AMDGPU_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
> #define DRM_IOCTL_AMDGPU_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
> @@ -79,6 +80,7 @@ extern "C" {
> #define DRM_IOCTL_AMDGPU_USERQ_SIGNAL DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_SIGNAL, struct drm_amdgpu_userq_signal)
> #define DRM_IOCTL_AMDGPU_USERQ_WAIT DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_WAIT, struct drm_amdgpu_userq_wait)
> #define DRM_IOCTL_AMDGPU_CWSR DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_CWSR, union drm_amdgpu_cwsr)
> +#define DRM_IOCTL_AMDGPU_PROFILER DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_PROFILER, struct drm_amdgpu_profiler_args)
>
> /**
> * DOC: memory domains
> @@ -1695,6 +1697,23 @@ struct drm_amdgpu_info_gpuvm_fault {
> #define AMDGPU_FAMILY_GC_11_5_0 150 /* GC 11.5.0 */
> #define AMDGPU_FAMILY_GC_12_0_0 152 /* GC 12.0.0 */
>
> +/*
> + * Supported Profiler Operations
> + */
> +enum drm_amdgpu_profiler_ops {
> + AMDGPU_PROFILER_VERSION = 0,
> +};
> +
> +struct drm_amdgpu_profiler_args {
> + __u32 op; /* amdgpu_profiler_op */
> + union {
> + __u32 version; /* AMDGPU_PROFILER_VERSION_NUM
> + * lower 16 bit: minor
> + * higher 16 bit: major
> + */
> + };
> +};
> +
> #if defined(__cplusplus)
> }
> #endif
^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH v2 1/2] amdgpu: amdgpu: Add profiler IOCTL interface for performance monitoring
2026-04-13 19:29 [PATCH 1/2] amdgpu: Add profiler IOCTL interface for performance monitoring James Zhu
` (3 preceding siblings ...)
2026-05-15 17:13 ` [PATCH v2 " James Zhu
@ 2026-05-15 17:20 ` James Zhu
4 siblings, 0 replies; 10+ messages in thread
From: James Zhu @ 2026-05-15 17:20 UTC (permalink / raw)
To: amd-gfx; +Cc: alexander.deucher, Bing.Ma, Chris.Freehill, jamesz
This patch introduces the foundational profiler infrastructure for AMD GPUs,
enabling userspace access to performance monitoring capabilities including:
- Performance Counter Sampling (PC Sampling)
- Streaming Performance Monitor (SPM)
The implementation includes:
- New DRM_AMDGPU_PROFILER IOCTL interface with version query support
- amdgpu_profiler_version() wrapper function for userspace
- Profiler operation enumeration and argument structures in the kernel API
- Build system integration for the new profiler module
The version query operation allows userspace to determine profiler capability
and compatibility before attempting to use advanced profiling features. Future
patches will extend this with additional profiler operations (acquire, release,
configure sampling buffers, etc.).
This foundation enables profiling tools and performance analysis frameworks to
access GPU performance data directly from userspace, supporting performance
debugging and optimization workflows.
Signed-off-by: James Zhu <James.Zhu@amd.com>
Reviewed-by: Bing Ma <Bing.Ma@amd.com>
---
amdgpu/amdgpu-symbols.txt | 1 +
amdgpu/amdgpu.h | 10 ++++++++
amdgpu/amdgpu_profiler.c | 49 +++++++++++++++++++++++++++++++++++++++
amdgpu/meson.build | 2 +-
include/drm/amdgpu_drm.h | 19 +++++++++++++++
5 files changed, 80 insertions(+), 1 deletion(-)
create mode 100644 amdgpu/amdgpu_profiler.c
diff --git a/amdgpu/amdgpu-symbols.txt b/amdgpu/amdgpu-symbols.txt
index 8cd5559c..69840193 100644
--- a/amdgpu/amdgpu-symbols.txt
+++ b/amdgpu/amdgpu-symbols.txt
@@ -88,3 +88,4 @@ amdgpu_create_userqueue
amdgpu_free_userqueue
amdgpu_userq_signal
amdgpu_userq_wait
+amdgpu_profiler_version
diff --git a/amdgpu/amdgpu.h b/amdgpu/amdgpu.h
index 53144f59..1519fe4c 100644
--- a/amdgpu/amdgpu.h
+++ b/amdgpu/amdgpu.h
@@ -2120,6 +2120,16 @@ int amdgpu_userq_wait(amdgpu_device_handle dev,
int amdgpu_cwsr_set_l2_trap_handler(amdgpu_device_handle dev,
uint64_t tba_addr, uint64_t tba_size,
uint64_t tma_addr, uint64_t tma_size);
+
+/**
+ * Acquire profiler version
+ * \param dev - \c [in] device handle
+ * \param version - \c [out] Pointer to to the "version" return value
+ *
+ * \return 0 on success otherwise POSIX Error code
+ */
+int amdgpu_profiler_version(amdgpu_device_handle dev, uint32_t *version);
+
#ifdef __cplusplus
}
#endif
diff --git a/amdgpu/amdgpu_profiler.c b/amdgpu/amdgpu_profiler.c
new file mode 100644
index 00000000..b3d119bc
--- /dev/null
+++ b/amdgpu/amdgpu_profiler.c
@@ -0,0 +1,49 @@
+/*
+ * Copyright 2026 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <string.h>
+#include <errno.h>
+#include "xf86drm.h"
+#include "amdgpu_drm.h"
+#include "amdgpu_internal.h"
+
+drm_public int
+amdgpu_profiler_version(amdgpu_device_handle dev, uint32_t *version)
+{
+ int ret;
+ struct drm_amdgpu_profiler_args user_arg;
+
+ if (!dev)
+ return -EINVAL;
+
+ memset(&user_arg, 0, sizeof(user_arg));
+ user_arg.op = AMDGPU_PROFILER_VERSION;
+
+ ret = drmCommandWriteRead(dev->fd, DRM_AMDGPU_PROFILER,
+ &user_arg, sizeof(user_arg));
+
+ if (ret)
+ *version = user_arg.version;
+
+ return ret;
+}
diff --git a/amdgpu/meson.build b/amdgpu/meson.build
index 3962d32c..d781f2e9 100644
--- a/amdgpu/meson.build
+++ b/amdgpu/meson.build
@@ -27,7 +27,7 @@ libdrm_amdgpu = library(
files(
'amdgpu_asic_id.c', 'amdgpu_bo.c', 'amdgpu_cs.c', 'amdgpu_device.c',
'amdgpu_gpu_info.c', 'amdgpu_vamgr.c', 'amdgpu_vm.c', 'handle_table.c',
- 'amdgpu_userq.c',
+ 'amdgpu_userq.c', 'amdgpu_profiler.c',
),
config_file,
],
diff --git a/include/drm/amdgpu_drm.h b/include/drm/amdgpu_drm.h
index ef12e725..307242ac 100644
--- a/include/drm/amdgpu_drm.h
+++ b/include/drm/amdgpu_drm.h
@@ -58,6 +58,7 @@ extern "C" {
#define DRM_AMDGPU_USERQ_SIGNAL 0x17
#define DRM_AMDGPU_USERQ_WAIT 0x18
#define DRM_AMDGPU_CWSR 0x20
+#define DRM_AMDGPU_PROFILER 0x21
#define DRM_IOCTL_AMDGPU_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
#define DRM_IOCTL_AMDGPU_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
@@ -79,6 +80,7 @@ extern "C" {
#define DRM_IOCTL_AMDGPU_USERQ_SIGNAL DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_SIGNAL, struct drm_amdgpu_userq_signal)
#define DRM_IOCTL_AMDGPU_USERQ_WAIT DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_WAIT, struct drm_amdgpu_userq_wait)
#define DRM_IOCTL_AMDGPU_CWSR DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_CWSR, union drm_amdgpu_cwsr)
+#define DRM_IOCTL_AMDGPU_PROFILER DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_PROFILER, struct drm_amdgpu_profiler_args)
/**
* DOC: memory domains
@@ -1695,6 +1697,23 @@ struct drm_amdgpu_info_gpuvm_fault {
#define AMDGPU_FAMILY_GC_11_5_0 150 /* GC 11.5.0 */
#define AMDGPU_FAMILY_GC_12_0_0 152 /* GC 12.0.0 */
+/*
+ * Supported Profiler Operations
+ */
+enum drm_amdgpu_profiler_ops {
+ AMDGPU_PROFILER_VERSION = 0,
+};
+
+struct drm_amdgpu_profiler_args {
+ __u32 op; /* amdgpu_profiler_op */
+ union {
+ __u32 version; /* AMDGPU_PROFILER_VERSION_NUM
+ * lower 16 bit: minor
+ * higher 16 bit: major
+ */
+ };
+};
+
#if defined(__cplusplus)
}
#endif
--
2.34.1
^ permalink raw reply related [flat|nested] 10+ messages in thread