* [PATCH] drm/amd/amdgpu: New debugfs interface for MMIO registers @ 2021-08-24 12:16 Tom St Denis 2021-08-24 12:20 ` Christian König 0 siblings, 1 reply; 8+ messages in thread From: Tom St Denis @ 2021-08-24 12:16 UTC (permalink / raw) To: amd-gfx; +Cc: Tom St Denis This new debugfs interface uses an IOCTL interface in order to pass along state information like SRBM and GRBM bank switching. This new interface also allows a full 32-bit MMIO address range which the previous didn't. With this new design we have room to grow the flexibility of the file as need be. Signed-off-by: Tom St Denis <tom.stdenis@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 177 ++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h | 32 ++++ 2 files changed, 209 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 277128846dd1..ab2d92f84da5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -279,6 +279,173 @@ static ssize_t amdgpu_debugfs_regs_write(struct file *f, const char __user *buf, return amdgpu_debugfs_process_reg_op(false, f, (char __user *)buf, size, pos); } +static int amdgpu_debugfs_regs2_open(struct inode *inode, struct file *file) +{ + struct amdgpu_debugfs_regs2_data *rd; + + rd = kzalloc(sizeof *rd, GFP_KERNEL); + if (!rd) + return -ENOMEM; + rd->adev = file_inode(file)->i_private; + file->private_data = rd; + + return 0; +} + +static int amdgpu_debugfs_regs2_release(struct inode *inode, struct file *file) +{ + kfree(file->private_data); + return 0; +} + +static int amdgpu_debugfs_regs2_op(struct file *f, char __user *buf, int write_en) +{ + struct amdgpu_debugfs_regs2_data *rd = f->private_data; + struct amdgpu_device *adev = rd->adev; + int result = 0, r; + uint32_t value; + + if (rd->state.offset & 0x3) + return -EINVAL; + + if (rd->state.use_grbm) { + if (rd->state.grbm.se == 0x3FF) + rd->state.grbm.se = 0xFFFFFFFF; + if (rd->state.grbm.sh == 0x3FF) + rd->state.grbm.sh = 0xFFFFFFFF; + if (rd->state.grbm.instance == 0x3FF) + rd->state.grbm.instance = 0xFFFFFFFF; + } + + r = pm_runtime_get_sync(adev_to_drm(adev)->dev); + if (r < 0) { + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); + return r; + } + + r = amdgpu_virt_enable_access_debugfs(adev); + if (r < 0) { + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); + return r; + } + + if (rd->state.use_grbm) { + if ((rd->state.grbm.sh != 0xFFFFFFFF && rd->state.grbm.sh >= adev->gfx.config.max_sh_per_se) || + (rd->state.grbm.se != 0xFFFFFFFF && rd->state.grbm.se >= adev->gfx.config.max_shader_engines)) { + pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); + amdgpu_virt_disable_access_debugfs(adev); + return -EINVAL; + } + mutex_lock(&adev->grbm_idx_mutex); + amdgpu_gfx_select_se_sh(adev, rd->state.grbm.se, + rd->state.grbm.sh, + rd->state.grbm.instance); + } else if (rd->state.use_grbm) { + mutex_lock(&adev->srbm_mutex); + amdgpu_gfx_select_me_pipe_q(adev, rd->state.srbm.me, rd->state.srbm.pipe, + rd->state.srbm.queue, rd->state.srbm.vmid); + } + + if (rd->state.pg_lock) + mutex_lock(&adev->pm.mutex); + + if (!write_en) { + value = RREG32(rd->state.offset >> 2); + r = put_user(value, (uint32_t *)buf); + } else { + r = get_user(value, (uint32_t *)buf); + if (!r) + amdgpu_mm_wreg_mmio_rlc(adev, rd->state.offset >> 2, value); + } + if (r) { + result = r; + goto end; + } + result = 0; +end: + if (rd->state.use_grbm) { + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + mutex_unlock(&adev->grbm_idx_mutex); + } else if (rd->state.use_srbm) { + amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + } + + if (rd->state.pg_lock) + mutex_unlock(&adev->pm.mutex); + + // in umr (the likely user of this) flags are set per file operation + // which means they're never "unset" explicitly. To avoid breaking + // this convention we unset the flags after each operation + // flags are for a single call (need to be set for every read/write) + rd->state.use_grbm = 0; + rd->state.use_srbm = 0; + rd->state.pg_lock = 0; + + pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); + + amdgpu_virt_disable_access_debugfs(adev); + return result; +} + +static long amdgpu_debugfs_regs2_ioctl(struct file *f, unsigned int cmd, unsigned long data) +{ + struct amdgpu_debugfs_regs2_data *rd = f->private_data; + unsigned char st[32], v; + int r, x; + + // always read first 4 bytes of data + for (x = 0; x < 4; x++) { + if ((r = get_user(v, (unsigned char *)data))) { + return r; + } + ++data; + st[x] = v; + } + + // first 4 bytes are offset + rd->state.offset = ((u32)st[0]) | ((u32)st[1] << 8) | + ((u32)st[2] << 16) | ((u32)st[3] << 24); + + switch (cmd) { + case AMDGPU_DEBUGFS_REGS2_IOC_SET_STATE: + for (x = 4; x < 32; x++) { + if ((r = get_user(v, (unsigned char *)data))) { + return r; + } + ++data; + st[x] = v; + } + + // next byte contains the flag + // we only consume the remainder of the state if bit 1 is set + // this allows the subsequent read/write + rd->state.use_grbm = (st[4] & 1) ? 1 : 0; + rd->state.use_srbm = (st[4] & 2) ? 1 : 0; + rd->state.pg_lock = (st[4] & 4) ? 1 : 0; + + // next 6 bytes are grbm data + rd->state.grbm.se = ((u32)st[5]) | ((u32)st[6] << 8); + rd->state.grbm.sh = ((u32)st[7]) | ((u32)st[8] << 8); + rd->state.grbm.instance = ((u32)st[9]) | ((u32)st[10] << 8); + + // next 8 are srbm data + rd->state.srbm.me = ((u32)st[11]) | ((u32)st[12] << 8); + rd->state.srbm.pipe = ((u32)st[13]) | ((u32)st[14] << 8); + rd->state.srbm.queue = ((u32)st[15]) | ((u32)st[16] << 8); + rd->state.srbm.vmid = ((u32)st[17]) | ((u32)st[18] << 8); + break; + case AMDGPU_DEBUGFS_REGS2_IOC_READ: + return amdgpu_debugfs_regs2_op(f, (char __user *)data, 0); + case AMDGPU_DEBUGFS_REGS2_IOC_WRITE: + return amdgpu_debugfs_regs2_op(f, (char __user *)data, 1); + default: + return -EINVAL; + } + return 0; +} /** * amdgpu_debugfs_regs_pcie_read - Read from a PCIE register @@ -1091,6 +1258,14 @@ static ssize_t amdgpu_debugfs_gfxoff_read(struct file *f, char __user *buf, return result; } +static const struct file_operations amdgpu_debugfs_regs2_fops = { + .owner = THIS_MODULE, + .unlocked_ioctl = amdgpu_debugfs_regs2_ioctl, + .open = amdgpu_debugfs_regs2_open, + .release = amdgpu_debugfs_regs2_release, + .llseek = default_llseek +}; + static const struct file_operations amdgpu_debugfs_regs_fops = { .owner = THIS_MODULE, .read = amdgpu_debugfs_regs_read, @@ -1148,6 +1323,7 @@ static const struct file_operations amdgpu_debugfs_gfxoff_fops = { static const struct file_operations *debugfs_regs[] = { &amdgpu_debugfs_regs_fops, + &amdgpu_debugfs_regs2_fops, &amdgpu_debugfs_regs_didt_fops, &amdgpu_debugfs_regs_pcie_fops, &amdgpu_debugfs_regs_smc_fops, @@ -1160,6 +1336,7 @@ static const struct file_operations *debugfs_regs[] = { static const char *debugfs_regs_names[] = { "amdgpu_regs", + "amdgpu_regs2", "amdgpu_regs_didt", "amdgpu_regs_pcie", "amdgpu_regs_smc", diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h index 141a8474e24f..04c81cd4bcc7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h @@ -22,6 +22,7 @@ * OTHER DEALINGS IN THE SOFTWARE. * */ +#include <linux/ioctl.h> /* * Debugfs @@ -38,3 +39,34 @@ void amdgpu_debugfs_fence_init(struct amdgpu_device *adev); void amdgpu_debugfs_firmware_init(struct amdgpu_device *adev); void amdgpu_debugfs_gem_init(struct amdgpu_device *adev); int amdgpu_debugfs_wait_dump(struct amdgpu_device *adev); + +struct amdgpu_debugfs_regs2_data { + struct amdgpu_device *adev; + struct { + // regs state + int use_srbm, + use_grbm, + pg_lock; + struct { + u32 se, sh, instance; + } grbm; + struct { + u32 me, pipe, queue, vmid; + } srbm; + u32 offset; + } state; +}; + +enum AMDGPU_DEBUGFS_REGS2_CMDS { + AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE=0, + AMDGPU_DEBUGFS_REGS2_CMD_READ, + AMDGPU_DEBUGFS_REGS2_CMD_WRITE, +}; + +struct amdgpu_debugfs_regs2_iocdata { + unsigned char t[32]; +}; + +#define AMDGPU_DEBUGFS_REGS2_IOC_SET_STATE _IOWR(0x20, AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE, struct amdgpu_debugfs_regs2_iocdata) +#define AMDGPU_DEBUGFS_REGS2_IOC_READ _IOWR(0x20, AMDGPU_DEBUGFS_REGS2_CMD_WRITE, struct amdgpu_debugfs_regs2_iocdata) +#define AMDGPU_DEBUGFS_REGS2_IOC_WRITE _IOWR(0x20, AMDGPU_DEBUGFS_REGS2_CMD_READ, struct amdgpu_debugfs_regs2_iocdata) -- 2.31.1 ^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH] drm/amd/amdgpu: New debugfs interface for MMIO registers 2021-08-24 12:16 [PATCH] drm/amd/amdgpu: New debugfs interface for MMIO registers Tom St Denis @ 2021-08-24 12:20 ` Christian König 2021-08-24 12:27 ` StDenis, Tom 0 siblings, 1 reply; 8+ messages in thread From: Christian König @ 2021-08-24 12:20 UTC (permalink / raw) To: Tom St Denis, amd-gfx Am 24.08.21 um 14:16 schrieb Tom St Denis: > This new debugfs interface uses an IOCTL interface in order to pass > along state information like SRBM and GRBM bank switching. This > new interface also allows a full 32-bit MMIO address range which > the previous didn't. With this new design we have room to grow > the flexibility of the file as need be. > > Signed-off-by: Tom St Denis <tom.stdenis@amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 177 ++++++++++++++++++++ > drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h | 32 ++++ > 2 files changed, 209 insertions(+) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c > index 277128846dd1..ab2d92f84da5 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c > @@ -279,6 +279,173 @@ static ssize_t amdgpu_debugfs_regs_write(struct file *f, const char __user *buf, > return amdgpu_debugfs_process_reg_op(false, f, (char __user *)buf, size, pos); > } > > +static int amdgpu_debugfs_regs2_open(struct inode *inode, struct file *file) > +{ > + struct amdgpu_debugfs_regs2_data *rd; > + > + rd = kzalloc(sizeof *rd, GFP_KERNEL); > + if (!rd) > + return -ENOMEM; > + rd->adev = file_inode(file)->i_private; > + file->private_data = rd; > + > + return 0; > +} > + > +static int amdgpu_debugfs_regs2_release(struct inode *inode, struct file *file) > +{ > + kfree(file->private_data); > + return 0; > +} > + > +static int amdgpu_debugfs_regs2_op(struct file *f, char __user *buf, int write_en) > +{ > + struct amdgpu_debugfs_regs2_data *rd = f->private_data; > + struct amdgpu_device *adev = rd->adev; > + int result = 0, r; > + uint32_t value; > + > + if (rd->state.offset & 0x3) > + return -EINVAL; > + > + if (rd->state.use_grbm) { > + if (rd->state.grbm.se == 0x3FF) > + rd->state.grbm.se = 0xFFFFFFFF; > + if (rd->state.grbm.sh == 0x3FF) > + rd->state.grbm.sh = 0xFFFFFFFF; > + if (rd->state.grbm.instance == 0x3FF) > + rd->state.grbm.instance = 0xFFFFFFFF; > + } > + > + r = pm_runtime_get_sync(adev_to_drm(adev)->dev); > + if (r < 0) { > + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); > + return r; > + } > + > + r = amdgpu_virt_enable_access_debugfs(adev); > + if (r < 0) { > + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); > + return r; > + } > + > + if (rd->state.use_grbm) { > + if ((rd->state.grbm.sh != 0xFFFFFFFF && rd->state.grbm.sh >= adev->gfx.config.max_sh_per_se) || > + (rd->state.grbm.se != 0xFFFFFFFF && rd->state.grbm.se >= adev->gfx.config.max_shader_engines)) { > + pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); > + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); > + amdgpu_virt_disable_access_debugfs(adev); > + return -EINVAL; > + } > + mutex_lock(&adev->grbm_idx_mutex); > + amdgpu_gfx_select_se_sh(adev, rd->state.grbm.se, > + rd->state.grbm.sh, > + rd->state.grbm.instance); > + } else if (rd->state.use_grbm) { > + mutex_lock(&adev->srbm_mutex); > + amdgpu_gfx_select_me_pipe_q(adev, rd->state.srbm.me, rd->state.srbm.pipe, > + rd->state.srbm.queue, rd->state.srbm.vmid); > + } > + > + if (rd->state.pg_lock) > + mutex_lock(&adev->pm.mutex); > + > + if (!write_en) { > + value = RREG32(rd->state.offset >> 2); > + r = put_user(value, (uint32_t *)buf); > + } else { > + r = get_user(value, (uint32_t *)buf); > + if (!r) > + amdgpu_mm_wreg_mmio_rlc(adev, rd->state.offset >> 2, value); > + } > + if (r) { > + result = r; > + goto end; > + } > + result = 0; > +end: > + if (rd->state.use_grbm) { > + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); > + mutex_unlock(&adev->grbm_idx_mutex); > + } else if (rd->state.use_srbm) { > + amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0); > + mutex_unlock(&adev->srbm_mutex); > + } > + > + if (rd->state.pg_lock) > + mutex_unlock(&adev->pm.mutex); > + > + // in umr (the likely user of this) flags are set per file operation > + // which means they're never "unset" explicitly. To avoid breaking > + // this convention we unset the flags after each operation > + // flags are for a single call (need to be set for every read/write) > + rd->state.use_grbm = 0; > + rd->state.use_srbm = 0; > + rd->state.pg_lock = 0; > + > + pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); > + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); > + > + amdgpu_virt_disable_access_debugfs(adev); > + return result; > +} > + > +static long amdgpu_debugfs_regs2_ioctl(struct file *f, unsigned int cmd, unsigned long data) > +{ > + struct amdgpu_debugfs_regs2_data *rd = f->private_data; > + unsigned char st[32], v; > + int r, x; > + > + // always read first 4 bytes of data > + for (x = 0; x < 4; x++) { > + if ((r = get_user(v, (unsigned char *)data))) { > + return r; > + } > + ++data; > + st[x] = v; > + } > + > + // first 4 bytes are offset > + rd->state.offset = ((u32)st[0]) | ((u32)st[1] << 8) | > + ((u32)st[2] << 16) | ((u32)st[3] << 24); > + > + switch (cmd) { > + case AMDGPU_DEBUGFS_REGS2_IOC_SET_STATE: > + for (x = 4; x < 32; x++) { > + if ((r = get_user(v, (unsigned char *)data))) { > + return r; > + } > + ++data; > + st[x] = v; > + } > + > + // next byte contains the flag > + // we only consume the remainder of the state if bit 1 is set > + // this allows the subsequent read/write > + rd->state.use_grbm = (st[4] & 1) ? 1 : 0; > + rd->state.use_srbm = (st[4] & 2) ? 1 : 0; > + rd->state.pg_lock = (st[4] & 4) ? 1 : 0; > + > + // next 6 bytes are grbm data > + rd->state.grbm.se = ((u32)st[5]) | ((u32)st[6] << 8); > + rd->state.grbm.sh = ((u32)st[7]) | ((u32)st[8] << 8); > + rd->state.grbm.instance = ((u32)st[9]) | ((u32)st[10] << 8); > + > + // next 8 are srbm data > + rd->state.srbm.me = ((u32)st[11]) | ((u32)st[12] << 8); > + rd->state.srbm.pipe = ((u32)st[13]) | ((u32)st[14] << 8); > + rd->state.srbm.queue = ((u32)st[15]) | ((u32)st[16] << 8); > + rd->state.srbm.vmid = ((u32)st[17]) | ((u32)st[18] << 8); > + break; > + case AMDGPU_DEBUGFS_REGS2_IOC_READ: > + return amdgpu_debugfs_regs2_op(f, (char __user *)data, 0); > + case AMDGPU_DEBUGFS_REGS2_IOC_WRITE: > + return amdgpu_debugfs_regs2_op(f, (char __user *)data, 1); > + default: > + return -EINVAL; > + } > + return 0; > +} > > /** > * amdgpu_debugfs_regs_pcie_read - Read from a PCIE register > @@ -1091,6 +1258,14 @@ static ssize_t amdgpu_debugfs_gfxoff_read(struct file *f, char __user *buf, > return result; > } > > +static const struct file_operations amdgpu_debugfs_regs2_fops = { > + .owner = THIS_MODULE, > + .unlocked_ioctl = amdgpu_debugfs_regs2_ioctl, > + .open = amdgpu_debugfs_regs2_open, > + .release = amdgpu_debugfs_regs2_release, > + .llseek = default_llseek > +}; > + > static const struct file_operations amdgpu_debugfs_regs_fops = { > .owner = THIS_MODULE, > .read = amdgpu_debugfs_regs_read, > @@ -1148,6 +1323,7 @@ static const struct file_operations amdgpu_debugfs_gfxoff_fops = { > > static const struct file_operations *debugfs_regs[] = { > &amdgpu_debugfs_regs_fops, > + &amdgpu_debugfs_regs2_fops, > &amdgpu_debugfs_regs_didt_fops, > &amdgpu_debugfs_regs_pcie_fops, > &amdgpu_debugfs_regs_smc_fops, > @@ -1160,6 +1336,7 @@ static const struct file_operations *debugfs_regs[] = { > > static const char *debugfs_regs_names[] = { > "amdgpu_regs", > + "amdgpu_regs2", > "amdgpu_regs_didt", > "amdgpu_regs_pcie", > "amdgpu_regs_smc", > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h > index 141a8474e24f..04c81cd4bcc7 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h > @@ -22,6 +22,7 @@ > * OTHER DEALINGS IN THE SOFTWARE. > * > */ > +#include <linux/ioctl.h> > > /* > * Debugfs > @@ -38,3 +39,34 @@ void amdgpu_debugfs_fence_init(struct amdgpu_device *adev); > void amdgpu_debugfs_firmware_init(struct amdgpu_device *adev); > void amdgpu_debugfs_gem_init(struct amdgpu_device *adev); > int amdgpu_debugfs_wait_dump(struct amdgpu_device *adev); > + > +struct amdgpu_debugfs_regs2_data { > + struct amdgpu_device *adev; > + struct { > + // regs state > + int use_srbm, > + use_grbm, > + pg_lock; > + struct { > + u32 se, sh, instance; > + } grbm; > + struct { > + u32 me, pipe, queue, vmid; > + } srbm; > + u32 offset; > + } state; > +}; This stuff needs to be in some shared header then. > + > +enum AMDGPU_DEBUGFS_REGS2_CMDS { > + AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE=0, > + AMDGPU_DEBUGFS_REGS2_CMD_READ, > + AMDGPU_DEBUGFS_REGS2_CMD_WRITE, Why not just using the normal read and write functions? Christian. > +}; > + > +struct amdgpu_debugfs_regs2_iocdata { > + unsigned char t[32]; > +}; > + > +#define AMDGPU_DEBUGFS_REGS2_IOC_SET_STATE _IOWR(0x20, AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE, struct amdgpu_debugfs_regs2_iocdata) > +#define AMDGPU_DEBUGFS_REGS2_IOC_READ _IOWR(0x20, AMDGPU_DEBUGFS_REGS2_CMD_WRITE, struct amdgpu_debugfs_regs2_iocdata) > +#define AMDGPU_DEBUGFS_REGS2_IOC_WRITE _IOWR(0x20, AMDGPU_DEBUGFS_REGS2_CMD_READ, struct amdgpu_debugfs_regs2_iocdata) ^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] drm/amd/amdgpu: New debugfs interface for MMIO registers 2021-08-24 12:20 ` Christian König @ 2021-08-24 12:27 ` StDenis, Tom 2021-08-24 12:34 ` Christian König 0 siblings, 1 reply; 8+ messages in thread From: StDenis, Tom @ 2021-08-24 12:27 UTC (permalink / raw) To: Christian König, amd-gfx@lists.freedesktop.org [AMD Official Use Only] What do you mean a "shared header?" How would they be shared between kernel and user? As for why not read/write. Jus wanted to keep it simple. It's not really performance bound. umr never does reads/writes larger than 32-bits anyways. It doesn't have to be this way though but I figured the fewer LOC the better. Tom ________________________________________ From: Christian König <ckoenig.leichtzumerken@gmail.com> Sent: Tuesday, August 24, 2021 08:20 To: StDenis, Tom; amd-gfx@lists.freedesktop.org Subject: Re: [PATCH] drm/amd/amdgpu: New debugfs interface for MMIO registers Am 24.08.21 um 14:16 schrieb Tom St Denis: > This new debugfs interface uses an IOCTL interface in order to pass > along state information like SRBM and GRBM bank switching. This > new interface also allows a full 32-bit MMIO address range which > the previous didn't. With this new design we have room to grow > the flexibility of the file as need be. > > Signed-off-by: Tom St Denis <tom.stdenis@amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 177 ++++++++++++++++++++ > drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h | 32 ++++ > 2 files changed, 209 insertions(+) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c > index 277128846dd1..ab2d92f84da5 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c > @@ -279,6 +279,173 @@ static ssize_t amdgpu_debugfs_regs_write(struct file *f, const char __user *buf, > return amdgpu_debugfs_process_reg_op(false, f, (char __user *)buf, size, pos); > } > > +static int amdgpu_debugfs_regs2_open(struct inode *inode, struct file *file) > +{ > + struct amdgpu_debugfs_regs2_data *rd; > + > + rd = kzalloc(sizeof *rd, GFP_KERNEL); > + if (!rd) > + return -ENOMEM; > + rd->adev = file_inode(file)->i_private; > + file->private_data = rd; > + > + return 0; > +} > + > +static int amdgpu_debugfs_regs2_release(struct inode *inode, struct file *file) > +{ > + kfree(file->private_data); > + return 0; > +} > + > +static int amdgpu_debugfs_regs2_op(struct file *f, char __user *buf, int write_en) > +{ > + struct amdgpu_debugfs_regs2_data *rd = f->private_data; > + struct amdgpu_device *adev = rd->adev; > + int result = 0, r; > + uint32_t value; > + > + if (rd->state.offset & 0x3) > + return -EINVAL; > + > + if (rd->state.use_grbm) { > + if (rd->state.grbm.se == 0x3FF) > + rd->state.grbm.se = 0xFFFFFFFF; > + if (rd->state.grbm.sh == 0x3FF) > + rd->state.grbm.sh = 0xFFFFFFFF; > + if (rd->state.grbm.instance == 0x3FF) > + rd->state.grbm.instance = 0xFFFFFFFF; > + } > + > + r = pm_runtime_get_sync(adev_to_drm(adev)->dev); > + if (r < 0) { > + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); > + return r; > + } > + > + r = amdgpu_virt_enable_access_debugfs(adev); > + if (r < 0) { > + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); > + return r; > + } > + > + if (rd->state.use_grbm) { > + if ((rd->state.grbm.sh != 0xFFFFFFFF && rd->state.grbm.sh >= adev->gfx.config.max_sh_per_se) || > + (rd->state.grbm.se != 0xFFFFFFFF && rd->state.grbm.se >= adev->gfx.config.max_shader_engines)) { > + pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); > + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); > + amdgpu_virt_disable_access_debugfs(adev); > + return -EINVAL; > + } > + mutex_lock(&adev->grbm_idx_mutex); > + amdgpu_gfx_select_se_sh(adev, rd->state.grbm.se, > + rd->state.grbm.sh, > + rd->state.grbm.instance); > + } else if (rd->state.use_grbm) { > + mutex_lock(&adev->srbm_mutex); > + amdgpu_gfx_select_me_pipe_q(adev, rd->state.srbm.me, rd->state.srbm.pipe, > + rd->state.srbm.queue, rd->state.srbm.vmid); > + } > + > + if (rd->state.pg_lock) > + mutex_lock(&adev->pm.mutex); > + > + if (!write_en) { > + value = RREG32(rd->state.offset >> 2); > + r = put_user(value, (uint32_t *)buf); > + } else { > + r = get_user(value, (uint32_t *)buf); > + if (!r) > + amdgpu_mm_wreg_mmio_rlc(adev, rd->state.offset >> 2, value); > + } > + if (r) { > + result = r; > + goto end; > + } > + result = 0; > +end: > + if (rd->state.use_grbm) { > + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); > + mutex_unlock(&adev->grbm_idx_mutex); > + } else if (rd->state.use_srbm) { > + amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0); > + mutex_unlock(&adev->srbm_mutex); > + } > + > + if (rd->state.pg_lock) > + mutex_unlock(&adev->pm.mutex); > + > + // in umr (the likely user of this) flags are set per file operation > + // which means they're never "unset" explicitly. To avoid breaking > + // this convention we unset the flags after each operation > + // flags are for a single call (need to be set for every read/write) > + rd->state.use_grbm = 0; > + rd->state.use_srbm = 0; > + rd->state.pg_lock = 0; > + > + pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); > + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); > + > + amdgpu_virt_disable_access_debugfs(adev); > + return result; > +} > + > +static long amdgpu_debugfs_regs2_ioctl(struct file *f, unsigned int cmd, unsigned long data) > +{ > + struct amdgpu_debugfs_regs2_data *rd = f->private_data; > + unsigned char st[32], v; > + int r, x; > + > + // always read first 4 bytes of data > + for (x = 0; x < 4; x++) { > + if ((r = get_user(v, (unsigned char *)data))) { > + return r; > + } > + ++data; > + st[x] = v; > + } > + > + // first 4 bytes are offset > + rd->state.offset = ((u32)st[0]) | ((u32)st[1] << 8) | > + ((u32)st[2] << 16) | ((u32)st[3] << 24); > + > + switch (cmd) { > + case AMDGPU_DEBUGFS_REGS2_IOC_SET_STATE: > + for (x = 4; x < 32; x++) { > + if ((r = get_user(v, (unsigned char *)data))) { > + return r; > + } > + ++data; > + st[x] = v; > + } > + > + // next byte contains the flag > + // we only consume the remainder of the state if bit 1 is set > + // this allows the subsequent read/write > + rd->state.use_grbm = (st[4] & 1) ? 1 : 0; > + rd->state.use_srbm = (st[4] & 2) ? 1 : 0; > + rd->state.pg_lock = (st[4] & 4) ? 1 : 0; > + > + // next 6 bytes are grbm data > + rd->state.grbm.se = ((u32)st[5]) | ((u32)st[6] << 8); > + rd->state.grbm.sh = ((u32)st[7]) | ((u32)st[8] << 8); > + rd->state.grbm.instance = ((u32)st[9]) | ((u32)st[10] << 8); > + > + // next 8 are srbm data > + rd->state.srbm.me = ((u32)st[11]) | ((u32)st[12] << 8); > + rd->state.srbm.pipe = ((u32)st[13]) | ((u32)st[14] << 8); > + rd->state.srbm.queue = ((u32)st[15]) | ((u32)st[16] << 8); > + rd->state.srbm.vmid = ((u32)st[17]) | ((u32)st[18] << 8); > + break; > + case AMDGPU_DEBUGFS_REGS2_IOC_READ: > + return amdgpu_debugfs_regs2_op(f, (char __user *)data, 0); > + case AMDGPU_DEBUGFS_REGS2_IOC_WRITE: > + return amdgpu_debugfs_regs2_op(f, (char __user *)data, 1); > + default: > + return -EINVAL; > + } > + return 0; > +} > > /** > * amdgpu_debugfs_regs_pcie_read - Read from a PCIE register > @@ -1091,6 +1258,14 @@ static ssize_t amdgpu_debugfs_gfxoff_read(struct file *f, char __user *buf, > return result; > } > > +static const struct file_operations amdgpu_debugfs_regs2_fops = { > + .owner = THIS_MODULE, > + .unlocked_ioctl = amdgpu_debugfs_regs2_ioctl, > + .open = amdgpu_debugfs_regs2_open, > + .release = amdgpu_debugfs_regs2_release, > + .llseek = default_llseek > +}; > + > static const struct file_operations amdgpu_debugfs_regs_fops = { > .owner = THIS_MODULE, > .read = amdgpu_debugfs_regs_read, > @@ -1148,6 +1323,7 @@ static const struct file_operations amdgpu_debugfs_gfxoff_fops = { > > static const struct file_operations *debugfs_regs[] = { > &amdgpu_debugfs_regs_fops, > + &amdgpu_debugfs_regs2_fops, > &amdgpu_debugfs_regs_didt_fops, > &amdgpu_debugfs_regs_pcie_fops, > &amdgpu_debugfs_regs_smc_fops, > @@ -1160,6 +1336,7 @@ static const struct file_operations *debugfs_regs[] = { > > static const char *debugfs_regs_names[] = { > "amdgpu_regs", > + "amdgpu_regs2", > "amdgpu_regs_didt", > "amdgpu_regs_pcie", > "amdgpu_regs_smc", > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h > index 141a8474e24f..04c81cd4bcc7 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h > @@ -22,6 +22,7 @@ > * OTHER DEALINGS IN THE SOFTWARE. > * > */ > +#include <linux/ioctl.h> > > /* > * Debugfs > @@ -38,3 +39,34 @@ void amdgpu_debugfs_fence_init(struct amdgpu_device *adev); > void amdgpu_debugfs_firmware_init(struct amdgpu_device *adev); > void amdgpu_debugfs_gem_init(struct amdgpu_device *adev); > int amdgpu_debugfs_wait_dump(struct amdgpu_device *adev); > + > +struct amdgpu_debugfs_regs2_data { > + struct amdgpu_device *adev; > + struct { > + // regs state > + int use_srbm, > + use_grbm, > + pg_lock; > + struct { > + u32 se, sh, instance; > + } grbm; > + struct { > + u32 me, pipe, queue, vmid; > + } srbm; > + u32 offset; > + } state; > +}; This stuff needs to be in some shared header then. > + > +enum AMDGPU_DEBUGFS_REGS2_CMDS { > + AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE=0, > + AMDGPU_DEBUGFS_REGS2_CMD_READ, > + AMDGPU_DEBUGFS_REGS2_CMD_WRITE, Why not just using the normal read and write functions? Christian. > +}; > + > +struct amdgpu_debugfs_regs2_iocdata { > + unsigned char t[32]; > +}; > + > +#define AMDGPU_DEBUGFS_REGS2_IOC_SET_STATE _IOWR(0x20, AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE, struct amdgpu_debugfs_regs2_iocdata) > +#define AMDGPU_DEBUGFS_REGS2_IOC_READ _IOWR(0x20, AMDGPU_DEBUGFS_REGS2_CMD_WRITE, struct amdgpu_debugfs_regs2_iocdata) > +#define AMDGPU_DEBUGFS_REGS2_IOC_WRITE _IOWR(0x20, AMDGPU_DEBUGFS_REGS2_CMD_READ, struct amdgpu_debugfs_regs2_iocdata) ^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] drm/amd/amdgpu: New debugfs interface for MMIO registers 2021-08-24 12:27 ` StDenis, Tom @ 2021-08-24 12:34 ` Christian König 2021-08-24 12:42 ` Tom St Denis 0 siblings, 1 reply; 8+ messages in thread From: Christian König @ 2021-08-24 12:34 UTC (permalink / raw) To: StDenis, Tom, amd-gfx@lists.freedesktop.org Am 24.08.21 um 14:27 schrieb StDenis, Tom: > [AMD Official Use Only] > > What do you mean a "shared header?" How would they be shared between kernel and user? Somewhere in the include/uapi/drm/ folder I think. Either add that to amdgpu_drm.h or maybe amdgpu_debugfs.h? Or just keep it as a structure in amdgpu_debugfs.h with a comment that it is used for an IOCTL. Just not something hard coded like first byte is this, second that etc.... > As for why not read/write. Jus wanted to keep it simple. It's not really performance bound. umr never does reads/writes larger than 32-bits anyways. It doesn't have to be this way though but I figured the fewer LOC the better. I think it would be cleaner if we would have separate functions for this. As far as I can see you also don't need the dance with the power managerment etc for the IOCTL. It's just get_user() into your structure. BTW: In the kernel coding style put "switch" and "case" on the same column, otherwise checkpatch.pl might complain. Christian. > > Tom > > ________________________________________ > From: Christian König <ckoenig.leichtzumerken@gmail.com> > Sent: Tuesday, August 24, 2021 08:20 > To: StDenis, Tom; amd-gfx@lists.freedesktop.org > Subject: Re: [PATCH] drm/amd/amdgpu: New debugfs interface for MMIO registers > > > > Am 24.08.21 um 14:16 schrieb Tom St Denis: >> This new debugfs interface uses an IOCTL interface in order to pass >> along state information like SRBM and GRBM bank switching. This >> new interface also allows a full 32-bit MMIO address range which >> the previous didn't. With this new design we have room to grow >> the flexibility of the file as need be. >> >> Signed-off-by: Tom St Denis <tom.stdenis@amd.com> >> --- >> drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 177 ++++++++++++++++++++ >> drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h | 32 ++++ >> 2 files changed, 209 insertions(+) >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c >> index 277128846dd1..ab2d92f84da5 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c >> @@ -279,6 +279,173 @@ static ssize_t amdgpu_debugfs_regs_write(struct file *f, const char __user *buf, >> return amdgpu_debugfs_process_reg_op(false, f, (char __user *)buf, size, pos); >> } >> >> +static int amdgpu_debugfs_regs2_open(struct inode *inode, struct file *file) >> +{ >> + struct amdgpu_debugfs_regs2_data *rd; >> + >> + rd = kzalloc(sizeof *rd, GFP_KERNEL); >> + if (!rd) >> + return -ENOMEM; >> + rd->adev = file_inode(file)->i_private; >> + file->private_data = rd; >> + >> + return 0; >> +} >> + >> +static int amdgpu_debugfs_regs2_release(struct inode *inode, struct file *file) >> +{ >> + kfree(file->private_data); >> + return 0; >> +} >> + >> +static int amdgpu_debugfs_regs2_op(struct file *f, char __user *buf, int write_en) >> +{ >> + struct amdgpu_debugfs_regs2_data *rd = f->private_data; >> + struct amdgpu_device *adev = rd->adev; >> + int result = 0, r; >> + uint32_t value; >> + >> + if (rd->state.offset & 0x3) >> + return -EINVAL; >> + >> + if (rd->state.use_grbm) { >> + if (rd->state.grbm.se == 0x3FF) >> + rd->state.grbm.se = 0xFFFFFFFF; >> + if (rd->state.grbm.sh == 0x3FF) >> + rd->state.grbm.sh = 0xFFFFFFFF; >> + if (rd->state.grbm.instance == 0x3FF) >> + rd->state.grbm.instance = 0xFFFFFFFF; >> + } >> + >> + r = pm_runtime_get_sync(adev_to_drm(adev)->dev); >> + if (r < 0) { >> + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); >> + return r; >> + } >> + >> + r = amdgpu_virt_enable_access_debugfs(adev); >> + if (r < 0) { >> + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); >> + return r; >> + } >> + >> + if (rd->state.use_grbm) { >> + if ((rd->state.grbm.sh != 0xFFFFFFFF && rd->state.grbm.sh >= adev->gfx.config.max_sh_per_se) || >> + (rd->state.grbm.se != 0xFFFFFFFF && rd->state.grbm.se >= adev->gfx.config.max_shader_engines)) { >> + pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); >> + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); >> + amdgpu_virt_disable_access_debugfs(adev); >> + return -EINVAL; >> + } >> + mutex_lock(&adev->grbm_idx_mutex); >> + amdgpu_gfx_select_se_sh(adev, rd->state.grbm.se, >> + rd->state.grbm.sh, >> + rd->state.grbm.instance); >> + } else if (rd->state.use_grbm) { >> + mutex_lock(&adev->srbm_mutex); >> + amdgpu_gfx_select_me_pipe_q(adev, rd->state.srbm.me, rd->state.srbm.pipe, >> + rd->state.srbm.queue, rd->state.srbm.vmid); >> + } >> + >> + if (rd->state.pg_lock) >> + mutex_lock(&adev->pm.mutex); >> + >> + if (!write_en) { >> + value = RREG32(rd->state.offset >> 2); >> + r = put_user(value, (uint32_t *)buf); >> + } else { >> + r = get_user(value, (uint32_t *)buf); >> + if (!r) >> + amdgpu_mm_wreg_mmio_rlc(adev, rd->state.offset >> 2, value); >> + } >> + if (r) { >> + result = r; >> + goto end; >> + } >> + result = 0; >> +end: >> + if (rd->state.use_grbm) { >> + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); >> + mutex_unlock(&adev->grbm_idx_mutex); >> + } else if (rd->state.use_srbm) { >> + amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0); >> + mutex_unlock(&adev->srbm_mutex); >> + } >> + >> + if (rd->state.pg_lock) >> + mutex_unlock(&adev->pm.mutex); >> + >> + // in umr (the likely user of this) flags are set per file operation >> + // which means they're never "unset" explicitly. To avoid breaking >> + // this convention we unset the flags after each operation >> + // flags are for a single call (need to be set for every read/write) >> + rd->state.use_grbm = 0; >> + rd->state.use_srbm = 0; >> + rd->state.pg_lock = 0; >> + >> + pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); >> + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); >> + >> + amdgpu_virt_disable_access_debugfs(adev); >> + return result; >> +} >> + >> +static long amdgpu_debugfs_regs2_ioctl(struct file *f, unsigned int cmd, unsigned long data) >> +{ >> + struct amdgpu_debugfs_regs2_data *rd = f->private_data; >> + unsigned char st[32], v; >> + int r, x; >> + >> + // always read first 4 bytes of data >> + for (x = 0; x < 4; x++) { >> + if ((r = get_user(v, (unsigned char *)data))) { >> + return r; >> + } >> + ++data; >> + st[x] = v; >> + } >> + >> + // first 4 bytes are offset >> + rd->state.offset = ((u32)st[0]) | ((u32)st[1] << 8) | >> + ((u32)st[2] << 16) | ((u32)st[3] << 24); >> + >> + switch (cmd) { >> + case AMDGPU_DEBUGFS_REGS2_IOC_SET_STATE: >> + for (x = 4; x < 32; x++) { >> + if ((r = get_user(v, (unsigned char *)data))) { >> + return r; >> + } >> + ++data; >> + st[x] = v; >> + } >> + >> + // next byte contains the flag >> + // we only consume the remainder of the state if bit 1 is set >> + // this allows the subsequent read/write >> + rd->state.use_grbm = (st[4] & 1) ? 1 : 0; >> + rd->state.use_srbm = (st[4] & 2) ? 1 : 0; >> + rd->state.pg_lock = (st[4] & 4) ? 1 : 0; >> + >> + // next 6 bytes are grbm data >> + rd->state.grbm.se = ((u32)st[5]) | ((u32)st[6] << 8); >> + rd->state.grbm.sh = ((u32)st[7]) | ((u32)st[8] << 8); >> + rd->state.grbm.instance = ((u32)st[9]) | ((u32)st[10] << 8); >> + >> + // next 8 are srbm data >> + rd->state.srbm.me = ((u32)st[11]) | ((u32)st[12] << 8); >> + rd->state.srbm.pipe = ((u32)st[13]) | ((u32)st[14] << 8); >> + rd->state.srbm.queue = ((u32)st[15]) | ((u32)st[16] << 8); >> + rd->state.srbm.vmid = ((u32)st[17]) | ((u32)st[18] << 8); >> + break; >> + case AMDGPU_DEBUGFS_REGS2_IOC_READ: >> + return amdgpu_debugfs_regs2_op(f, (char __user *)data, 0); >> + case AMDGPU_DEBUGFS_REGS2_IOC_WRITE: >> + return amdgpu_debugfs_regs2_op(f, (char __user *)data, 1); >> + default: >> + return -EINVAL; >> + } >> + return 0; >> +} >> >> /** >> * amdgpu_debugfs_regs_pcie_read - Read from a PCIE register >> @@ -1091,6 +1258,14 @@ static ssize_t amdgpu_debugfs_gfxoff_read(struct file *f, char __user *buf, >> return result; >> } >> >> +static const struct file_operations amdgpu_debugfs_regs2_fops = { >> + .owner = THIS_MODULE, >> + .unlocked_ioctl = amdgpu_debugfs_regs2_ioctl, >> + .open = amdgpu_debugfs_regs2_open, >> + .release = amdgpu_debugfs_regs2_release, >> + .llseek = default_llseek >> +}; >> + >> static const struct file_operations amdgpu_debugfs_regs_fops = { >> .owner = THIS_MODULE, >> .read = amdgpu_debugfs_regs_read, >> @@ -1148,6 +1323,7 @@ static const struct file_operations amdgpu_debugfs_gfxoff_fops = { >> >> static const struct file_operations *debugfs_regs[] = { >> &amdgpu_debugfs_regs_fops, >> + &amdgpu_debugfs_regs2_fops, >> &amdgpu_debugfs_regs_didt_fops, >> &amdgpu_debugfs_regs_pcie_fops, >> &amdgpu_debugfs_regs_smc_fops, >> @@ -1160,6 +1336,7 @@ static const struct file_operations *debugfs_regs[] = { >> >> static const char *debugfs_regs_names[] = { >> "amdgpu_regs", >> + "amdgpu_regs2", >> "amdgpu_regs_didt", >> "amdgpu_regs_pcie", >> "amdgpu_regs_smc", >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h >> index 141a8474e24f..04c81cd4bcc7 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h >> @@ -22,6 +22,7 @@ >> * OTHER DEALINGS IN THE SOFTWARE. >> * >> */ >> +#include <linux/ioctl.h> >> >> /* >> * Debugfs >> @@ -38,3 +39,34 @@ void amdgpu_debugfs_fence_init(struct amdgpu_device *adev); >> void amdgpu_debugfs_firmware_init(struct amdgpu_device *adev); >> void amdgpu_debugfs_gem_init(struct amdgpu_device *adev); >> int amdgpu_debugfs_wait_dump(struct amdgpu_device *adev); >> + >> +struct amdgpu_debugfs_regs2_data { >> + struct amdgpu_device *adev; >> + struct { >> + // regs state >> + int use_srbm, >> + use_grbm, >> + pg_lock; >> + struct { >> + u32 se, sh, instance; >> + } grbm; >> + struct { >> + u32 me, pipe, queue, vmid; >> + } srbm; >> + u32 offset; >> + } state; >> +}; > This stuff needs to be in some shared header then. > >> + >> +enum AMDGPU_DEBUGFS_REGS2_CMDS { >> + AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE=0, >> + AMDGPU_DEBUGFS_REGS2_CMD_READ, >> + AMDGPU_DEBUGFS_REGS2_CMD_WRITE, > Why not just using the normal read and write functions? > > Christian. > >> +}; >> + >> +struct amdgpu_debugfs_regs2_iocdata { >> + unsigned char t[32]; >> +}; >> + >> +#define AMDGPU_DEBUGFS_REGS2_IOC_SET_STATE _IOWR(0x20, AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE, struct amdgpu_debugfs_regs2_iocdata) >> +#define AMDGPU_DEBUGFS_REGS2_IOC_READ _IOWR(0x20, AMDGPU_DEBUGFS_REGS2_CMD_WRITE, struct amdgpu_debugfs_regs2_iocdata) >> +#define AMDGPU_DEBUGFS_REGS2_IOC_WRITE _IOWR(0x20, AMDGPU_DEBUGFS_REGS2_CMD_READ, struct amdgpu_debugfs_regs2_iocdata) ^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] drm/amd/amdgpu: New debugfs interface for MMIO registers 2021-08-24 12:34 ` Christian König @ 2021-08-24 12:42 ` Tom St Denis 2021-08-24 13:22 ` Christian König 0 siblings, 1 reply; 8+ messages in thread From: Tom St Denis @ 2021-08-24 12:42 UTC (permalink / raw) To: Christian König; +Cc: StDenis, Tom, amd-gfx@lists.freedesktop.org [-- Attachment #1: Type: text/plain, Size: 13927 bytes --] The IOCTL data is in the debugfs header as it is. I could move that to the amdgpu_drm.h and include it from amdgpu_debugfs.h. I'll re-write the STATE IOCTL to use a struct and then test against what I have in umr. Refactoring the read/write is trivial and I'll do that no problem (with style fixes). Cheers, Tom On Tue, Aug 24, 2021 at 8:34 AM Christian König < ckoenig.leichtzumerken@gmail.com> wrote: > Am 24.08.21 um 14:27 schrieb StDenis, Tom: > > [AMD Official Use Only] > > > > What do you mean a "shared header?" How would they be shared between > kernel and user? > > Somewhere in the include/uapi/drm/ folder I think. Either add that to > amdgpu_drm.h or maybe amdgpu_debugfs.h? > > Or just keep it as a structure in amdgpu_debugfs.h with a comment that > it is used for an IOCTL. > > Just not something hard coded like first byte is this, second that etc.... > > > As for why not read/write. Jus wanted to keep it simple. It's not > really performance bound. umr never does reads/writes larger than 32-bits > anyways. It doesn't have to be this way though but I figured the fewer LOC > the better. > > I think it would be cleaner if we would have separate functions for this. > > As far as I can see you also don't need the dance with the power > managerment etc for the IOCTL. It's just get_user() into your structure. > > BTW: In the kernel coding style put "switch" and "case" on the same > column, otherwise checkpatch.pl might complain. > > Christian. > > > > > Tom > > > > ________________________________________ > > From: Christian König <ckoenig.leichtzumerken@gmail.com> > > Sent: Tuesday, August 24, 2021 08:20 > > To: StDenis, Tom; amd-gfx@lists.freedesktop.org > > Subject: Re: [PATCH] drm/amd/amdgpu: New debugfs interface for MMIO > registers > > > > > > > > Am 24.08.21 um 14:16 schrieb Tom St Denis: > >> This new debugfs interface uses an IOCTL interface in order to pass > >> along state information like SRBM and GRBM bank switching. This > >> new interface also allows a full 32-bit MMIO address range which > >> the previous didn't. With this new design we have room to grow > >> the flexibility of the file as need be. > >> > >> Signed-off-by: Tom St Denis <tom.stdenis@amd.com> > >> --- > >> drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 177 > ++++++++++++++++++++ > >> drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h | 32 ++++ > >> 2 files changed, 209 insertions(+) > >> > >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c > >> index 277128846dd1..ab2d92f84da5 100644 > >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c > >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c > >> @@ -279,6 +279,173 @@ static ssize_t amdgpu_debugfs_regs_write(struct > file *f, const char __user *buf, > >> return amdgpu_debugfs_process_reg_op(false, f, (char __user > *)buf, size, pos); > >> } > >> > >> +static int amdgpu_debugfs_regs2_open(struct inode *inode, struct file > *file) > >> +{ > >> + struct amdgpu_debugfs_regs2_data *rd; > >> + > >> + rd = kzalloc(sizeof *rd, GFP_KERNEL); > >> + if (!rd) > >> + return -ENOMEM; > >> + rd->adev = file_inode(file)->i_private; > >> + file->private_data = rd; > >> + > >> + return 0; > >> +} > >> + > >> +static int amdgpu_debugfs_regs2_release(struct inode *inode, struct > file *file) > >> +{ > >> + kfree(file->private_data); > >> + return 0; > >> +} > >> + > >> +static int amdgpu_debugfs_regs2_op(struct file *f, char __user *buf, > int write_en) > >> +{ > >> + struct amdgpu_debugfs_regs2_data *rd = f->private_data; > >> + struct amdgpu_device *adev = rd->adev; > >> + int result = 0, r; > >> + uint32_t value; > >> + > >> + if (rd->state.offset & 0x3) > >> + return -EINVAL; > >> + > >> + if (rd->state.use_grbm) { > >> + if (rd->state.grbm.se == 0x3FF) > >> + rd->state.grbm.se = 0xFFFFFFFF; > >> + if (rd->state.grbm.sh == 0x3FF) > >> + rd->state.grbm.sh = 0xFFFFFFFF; > >> + if (rd->state.grbm.instance == 0x3FF) > >> + rd->state.grbm.instance = 0xFFFFFFFF; > >> + } > >> + > >> + r = pm_runtime_get_sync(adev_to_drm(adev)->dev); > >> + if (r < 0) { > >> + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); > >> + return r; > >> + } > >> + > >> + r = amdgpu_virt_enable_access_debugfs(adev); > >> + if (r < 0) { > >> + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); > >> + return r; > >> + } > >> + > >> + if (rd->state.use_grbm) { > >> + if ((rd->state.grbm.sh != 0xFFFFFFFF && rd->state.grbm.sh > >= adev->gfx.config.max_sh_per_se) || > >> + (rd->state.grbm.se != 0xFFFFFFFF && rd->state.grbm.se > >= adev->gfx.config.max_shader_engines)) { > >> + pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); > >> + > pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); > >> + amdgpu_virt_disable_access_debugfs(adev); > >> + return -EINVAL; > >> + } > >> + mutex_lock(&adev->grbm_idx_mutex); > >> + amdgpu_gfx_select_se_sh(adev, rd->state.grbm.se, > >> + rd-> > state.grbm.sh, > >> + > rd->state.grbm.instance); > >> + } else if (rd->state.use_grbm) { > >> + mutex_lock(&adev->srbm_mutex); > >> + amdgpu_gfx_select_me_pipe_q(adev, rd->state.srbm.me, > rd->state.srbm.pipe, > >> + > rd->state.srbm.queue, rd->state.srbm.vmid); > >> + } > >> + > >> + if (rd->state.pg_lock) > >> + mutex_lock(&adev->pm.mutex); > >> + > >> + if (!write_en) { > >> + value = RREG32(rd->state.offset >> 2); > >> + r = put_user(value, (uint32_t *)buf); > >> + } else { > >> + r = get_user(value, (uint32_t *)buf); > >> + if (!r) > >> + amdgpu_mm_wreg_mmio_rlc(adev, rd->state.offset >> > 2, value); > >> + } > >> + if (r) { > >> + result = r; > >> + goto end; > >> + } > >> + result = 0; > >> +end: > >> + if (rd->state.use_grbm) { > >> + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, > 0xffffffff); > >> + mutex_unlock(&adev->grbm_idx_mutex); > >> + } else if (rd->state.use_srbm) { > >> + amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0); > >> + mutex_unlock(&adev->srbm_mutex); > >> + } > >> + > >> + if (rd->state.pg_lock) > >> + mutex_unlock(&adev->pm.mutex); > >> + > >> + // in umr (the likely user of this) flags are set per file > operation > >> + // which means they're never "unset" explicitly. To avoid > breaking > >> + // this convention we unset the flags after each operation > >> + // flags are for a single call (need to be set for every > read/write) > >> + rd->state.use_grbm = 0; > >> + rd->state.use_srbm = 0; > >> + rd->state.pg_lock = 0; > >> + > >> + pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); > >> + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); > >> + > >> + amdgpu_virt_disable_access_debugfs(adev); > >> + return result; > >> +} > >> + > >> +static long amdgpu_debugfs_regs2_ioctl(struct file *f, unsigned int > cmd, unsigned long data) > >> +{ > >> + struct amdgpu_debugfs_regs2_data *rd = f->private_data; > >> + unsigned char st[32], v; > >> + int r, x; > >> + > >> + // always read first 4 bytes of data > >> + for (x = 0; x < 4; x++) { > >> + if ((r = get_user(v, (unsigned char *)data))) { > >> + return r; > >> + } > >> + ++data; > >> + st[x] = v; > >> + } > >> + > >> + // first 4 bytes are offset > >> + rd->state.offset = ((u32)st[0]) | ((u32)st[1] << 8) | > >> + ((u32)st[2] << 16) | > ((u32)st[3] << 24); > >> + > >> + switch (cmd) { > >> + case AMDGPU_DEBUGFS_REGS2_IOC_SET_STATE: > >> + for (x = 4; x < 32; x++) { > >> + if ((r = get_user(v, (unsigned char > *)data))) { > >> + return r; > >> + } > >> + ++data; > >> + st[x] = v; > >> + } > >> + > >> + // next byte contains the flag > >> + // we only consume the remainder of the state if > bit 1 is set > >> + // this allows the subsequent read/write > >> + rd->state.use_grbm = (st[4] & 1) ? 1 : 0; > >> + rd->state.use_srbm = (st[4] & 2) ? 1 : 0; > >> + rd->state.pg_lock = (st[4] & 4) ? 1 : 0; > >> + > >> + // next 6 bytes are grbm data > >> + rd->state.grbm.se = ((u32)st[5]) | > ((u32)st[6] << 8); > >> + rd->state.grbm.sh = ((u32)st[7]) | > ((u32)st[8] << 8); > >> + rd->state.grbm.instance = ((u32)st[9]) | > ((u32)st[10] << 8); > >> + > >> + // next 8 are srbm data > >> + rd->state.srbm.me = ((u32)st[11]) | > ((u32)st[12] << 8); > >> + rd->state.srbm.pipe = ((u32)st[13]) | > ((u32)st[14] << 8); > >> + rd->state.srbm.queue = ((u32)st[15]) | > ((u32)st[16] << 8); > >> + rd->state.srbm.vmid = ((u32)st[17]) | > ((u32)st[18] << 8); > >> + break; > >> + case AMDGPU_DEBUGFS_REGS2_IOC_READ: > >> + return amdgpu_debugfs_regs2_op(f, (char __user > *)data, 0); > >> + case AMDGPU_DEBUGFS_REGS2_IOC_WRITE: > >> + return amdgpu_debugfs_regs2_op(f, (char __user > *)data, 1); > >> + default: > >> + return -EINVAL; > >> + } > >> + return 0; > >> +} > >> > >> /** > >> * amdgpu_debugfs_regs_pcie_read - Read from a PCIE register > >> @@ -1091,6 +1258,14 @@ static ssize_t amdgpu_debugfs_gfxoff_read(struct > file *f, char __user *buf, > >> return result; > >> } > >> > >> +static const struct file_operations amdgpu_debugfs_regs2_fops = { > >> + .owner = THIS_MODULE, > >> + .unlocked_ioctl = amdgpu_debugfs_regs2_ioctl, > >> + .open = amdgpu_debugfs_regs2_open, > >> + .release = amdgpu_debugfs_regs2_release, > >> + .llseek = default_llseek > >> +}; > >> + > >> static const struct file_operations amdgpu_debugfs_regs_fops = { > >> .owner = THIS_MODULE, > >> .read = amdgpu_debugfs_regs_read, > >> @@ -1148,6 +1323,7 @@ static const struct file_operations > amdgpu_debugfs_gfxoff_fops = { > >> > >> static const struct file_operations *debugfs_regs[] = { > >> &amdgpu_debugfs_regs_fops, > >> + &amdgpu_debugfs_regs2_fops, > >> &amdgpu_debugfs_regs_didt_fops, > >> &amdgpu_debugfs_regs_pcie_fops, > >> &amdgpu_debugfs_regs_smc_fops, > >> @@ -1160,6 +1336,7 @@ static const struct file_operations > *debugfs_regs[] = { > >> > >> static const char *debugfs_regs_names[] = { > >> "amdgpu_regs", > >> + "amdgpu_regs2", > >> "amdgpu_regs_didt", > >> "amdgpu_regs_pcie", > >> "amdgpu_regs_smc", > >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h > >> index 141a8474e24f..04c81cd4bcc7 100644 > >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h > >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h > >> @@ -22,6 +22,7 @@ > >> * OTHER DEALINGS IN THE SOFTWARE. > >> * > >> */ > >> +#include <linux/ioctl.h> > >> > >> /* > >> * Debugfs > >> @@ -38,3 +39,34 @@ void amdgpu_debugfs_fence_init(struct amdgpu_device > *adev); > >> void amdgpu_debugfs_firmware_init(struct amdgpu_device *adev); > >> void amdgpu_debugfs_gem_init(struct amdgpu_device *adev); > >> int amdgpu_debugfs_wait_dump(struct amdgpu_device *adev); > >> + > >> +struct amdgpu_debugfs_regs2_data { > >> + struct amdgpu_device *adev; > >> + struct { > >> + // regs state > >> + int use_srbm, > >> + use_grbm, > >> + pg_lock; > >> + struct { > >> + u32 se, sh, instance; > >> + } grbm; > >> + struct { > >> + u32 me, pipe, queue, vmid; > >> + } srbm; > >> + u32 offset; > >> + } state; > >> +}; > > This stuff needs to be in some shared header then. > > > >> + > >> +enum AMDGPU_DEBUGFS_REGS2_CMDS { > >> + AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE=0, > >> + AMDGPU_DEBUGFS_REGS2_CMD_READ, > >> + AMDGPU_DEBUGFS_REGS2_CMD_WRITE, > > Why not just using the normal read and write functions? > > > > Christian. > > > >> +}; > >> + > >> +struct amdgpu_debugfs_regs2_iocdata { > >> + unsigned char t[32]; > >> +}; > >> + > >> +#define AMDGPU_DEBUGFS_REGS2_IOC_SET_STATE _IOWR(0x20, > AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE, struct amdgpu_debugfs_regs2_iocdata) > >> +#define AMDGPU_DEBUGFS_REGS2_IOC_READ _IOWR(0x20, > AMDGPU_DEBUGFS_REGS2_CMD_WRITE, struct amdgpu_debugfs_regs2_iocdata) > >> +#define AMDGPU_DEBUGFS_REGS2_IOC_WRITE _IOWR(0x20, > AMDGPU_DEBUGFS_REGS2_CMD_READ, struct amdgpu_debugfs_regs2_iocdata) > > [-- Attachment #2: Type: text/html, Size: 19522 bytes --] ^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] drm/amd/amdgpu: New debugfs interface for MMIO registers 2021-08-24 12:42 ` Tom St Denis @ 2021-08-24 13:22 ` Christian König 2021-08-24 13:28 ` Tom St Denis 0 siblings, 1 reply; 8+ messages in thread From: Christian König @ 2021-08-24 13:22 UTC (permalink / raw) To: Tom St Denis; +Cc: StDenis, Tom, amd-gfx@lists.freedesktop.org [-- Attachment #1: Type: text/plain, Size: 16716 bytes --] Am 24.08.21 um 14:42 schrieb Tom St Denis: > The IOCTL data is in the debugfs header as it is. I could move that to > the amdgpu_drm.h and include it from amdgpu_debugfs.h. Na, keep it like that and just add a comment. On second thought I don't want to raise any discussion on the mailing list if we should have a debugfs structure in the UAPI headers. Christian. > > I'll re-write the STATE IOCTL to use a struct and then test against > what I have in umr. > > Refactoring the read/write is trivial and I'll do that no problem > (with style fixes). > > Cheers, > Tom > > On Tue, Aug 24, 2021 at 8:34 AM Christian König > <ckoenig.leichtzumerken@gmail.com > <mailto:ckoenig.leichtzumerken@gmail.com>> wrote: > > Am 24.08.21 um 14:27 schrieb StDenis, Tom: > > [AMD Official Use Only] > > > > What do you mean a "shared header?" How would they be shared > between kernel and user? > > Somewhere in the include/uapi/drm/ folder I think. Either add that to > amdgpu_drm.h or maybe amdgpu_debugfs.h? > > Or just keep it as a structure in amdgpu_debugfs.h with a comment > that > it is used for an IOCTL. > > Just not something hard coded like first byte is this, second that > etc.... > > > As for why not read/write. Jus wanted to keep it simple. It's > not really performance bound. umr never does reads/writes larger > than 32-bits anyways. It doesn't have to be this way though but I > figured the fewer LOC the better. > > I think it would be cleaner if we would have separate functions > for this. > > As far as I can see you also don't need the dance with the power > managerment etc for the IOCTL. It's just get_user() into your > structure. > > BTW: In the kernel coding style put "switch" and "case" on the same > column, otherwise checkpatch.pl <http://checkpatch.pl> might complain. > > Christian. > > > > > Tom > > > > ________________________________________ > > From: Christian König <ckoenig.leichtzumerken@gmail.com > <mailto:ckoenig.leichtzumerken@gmail.com>> > > Sent: Tuesday, August 24, 2021 08:20 > > To: StDenis, Tom; amd-gfx@lists.freedesktop.org > <mailto:amd-gfx@lists.freedesktop.org> > > Subject: Re: [PATCH] drm/amd/amdgpu: New debugfs interface for > MMIO registers > > > > > > > > Am 24.08.21 um 14:16 schrieb Tom St Denis: > >> This new debugfs interface uses an IOCTL interface in order to pass > >> along state information like SRBM and GRBM bank switching. This > >> new interface also allows a full 32-bit MMIO address range which > >> the previous didn't. With this new design we have room to grow > >> the flexibility of the file as need be. > >> > >> Signed-off-by: Tom St Denis <tom.stdenis@amd.com > <mailto:tom.stdenis@amd.com>> > >> --- > >> drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 177 > ++++++++++++++++++++ > >> drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h | 32 ++++ > >> 2 files changed, 209 insertions(+) > >> > >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c > >> index 277128846dd1..ab2d92f84da5 100644 > >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c > >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c > >> @@ -279,6 +279,173 @@ static ssize_t > amdgpu_debugfs_regs_write(struct file *f, const char __user *buf, > >> return amdgpu_debugfs_process_reg_op(false, f, (char > __user *)buf, size, pos); > >> } > >> > >> +static int amdgpu_debugfs_regs2_open(struct inode *inode, > struct file *file) > >> +{ > >> + struct amdgpu_debugfs_regs2_data *rd; > >> + > >> + rd = kzalloc(sizeof *rd, GFP_KERNEL); > >> + if (!rd) > >> + return -ENOMEM; > >> + rd->adev = file_inode(file)->i_private; > >> + file->private_data = rd; > >> + > >> + return 0; > >> +} > >> + > >> +static int amdgpu_debugfs_regs2_release(struct inode *inode, > struct file *file) > >> +{ > >> + kfree(file->private_data); > >> + return 0; > >> +} > >> + > >> +static int amdgpu_debugfs_regs2_op(struct file *f, char __user > *buf, int write_en) > >> +{ > >> + struct amdgpu_debugfs_regs2_data *rd = f->private_data; > >> + struct amdgpu_device *adev = rd->adev; > >> + int result = 0, r; > >> + uint32_t value; > >> + > >> + if (rd->state.offset & 0x3) > >> + return -EINVAL; > >> + > >> + if (rd->state.use_grbm) { > >> + if (rd->state.grbm.se <http://state.grbm.se> == > 0x3FF) > >> + rd->state.grbm.se <http://state.grbm.se> > = 0xFFFFFFFF; > >> + if (rd->state.grbm.sh <http://state.grbm.sh> == > 0x3FF) > >> + rd->state.grbm.sh <http://state.grbm.sh> > = 0xFFFFFFFF; > >> + if (rd->state.grbm.instance == 0x3FF) > >> + rd->state.grbm.instance = 0xFFFFFFFF; > >> + } > >> + > >> + r = pm_runtime_get_sync(adev_to_drm(adev)->dev); > >> + if (r < 0) { > >> + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); > >> + return r; > >> + } > >> + > >> + r = amdgpu_virt_enable_access_debugfs(adev); > >> + if (r < 0) { > >> + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); > >> + return r; > >> + } > >> + > >> + if (rd->state.use_grbm) { > >> + if ((rd->state.grbm.sh <http://state.grbm.sh> != > 0xFFFFFFFF && rd->state.grbm.sh <http://state.grbm.sh> >= > adev->gfx.config.max_sh_per_se) || > >> + (rd->state.grbm.se <http://state.grbm.se> != > 0xFFFFFFFF && rd->state.grbm.se <http://state.grbm.se> >= > adev->gfx.config.max_shader_engines)) { > >> + pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); > >> + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); > >> + amdgpu_virt_disable_access_debugfs(adev); > >> + return -EINVAL; > >> + } > >> + mutex_lock(&adev->grbm_idx_mutex); > >> + amdgpu_gfx_select_se_sh(adev, rd->state.grbm.se > <http://state.grbm.se>, > >> + rd->state.grbm.sh <http://state.grbm.sh>, > >> + rd->state.grbm.instance); > >> + } else if (rd->state.use_grbm) { > >> + mutex_lock(&adev->srbm_mutex); > >> + amdgpu_gfx_select_me_pipe_q(adev, > rd->state.srbm.me <http://state.srbm.me>, rd->state.srbm.pipe, > >> + rd->state.srbm.queue, rd->state.srbm.vmid); > >> + } > >> + > >> + if (rd->state.pg_lock) > >> + mutex_lock(&adev->pm.mutex); > >> + > >> + if (!write_en) { > >> + value = RREG32(rd->state.offset >> 2); > >> + r = put_user(value, (uint32_t *)buf); > >> + } else { > >> + r = get_user(value, (uint32_t *)buf); > >> + if (!r) > >> + amdgpu_mm_wreg_mmio_rlc(adev, > rd->state.offset >> 2, value); > >> + } > >> + if (r) { > >> + result = r; > >> + goto end; > >> + } > >> + result = 0; > >> +end: > >> + if (rd->state.use_grbm) { > >> + amdgpu_gfx_select_se_sh(adev, 0xffffffff, > 0xffffffff, 0xffffffff); > >> + mutex_unlock(&adev->grbm_idx_mutex); > >> + } else if (rd->state.use_srbm) { > >> + amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0); > >> + mutex_unlock(&adev->srbm_mutex); > >> + } > >> + > >> + if (rd->state.pg_lock) > >> + mutex_unlock(&adev->pm.mutex); > >> + > >> + // in umr (the likely user of this) flags are set per > file operation > >> + // which means they're never "unset" explicitly. To > avoid breaking > >> + // this convention we unset the flags after each operation > >> + // flags are for a single call (need to be set for every > read/write) > >> + rd->state.use_grbm = 0; > >> + rd->state.use_srbm = 0; > >> + rd->state.pg_lock = 0; > >> + > >> + pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); > >> + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); > >> + > >> + amdgpu_virt_disable_access_debugfs(adev); > >> + return result; > >> +} > >> + > >> +static long amdgpu_debugfs_regs2_ioctl(struct file *f, > unsigned int cmd, unsigned long data) > >> +{ > >> + struct amdgpu_debugfs_regs2_data *rd = f->private_data; > >> + unsigned char st[32], v; > >> + int r, x; > >> + > >> + // always read first 4 bytes of data > >> + for (x = 0; x < 4; x++) { > >> + if ((r = get_user(v, (unsigned char *)data))) { > >> + return r; > >> + } > >> + ++data; > >> + st[x] = v; > >> + } > >> + > >> + // first 4 bytes are offset > >> + rd->state.offset = ((u32)st[0]) | ((u32)st[1] << 8) | > >> + ((u32)st[2] << 16) | > ((u32)st[3] << 24); > >> + > >> + switch (cmd) { > >> + case AMDGPU_DEBUGFS_REGS2_IOC_SET_STATE: > >> + for (x = 4; x < 32; x++) { > >> + if ((r = get_user(v, (unsigned > char *)data))) { > >> + return r; > >> + } > >> + ++data; > >> + st[x] = v; > >> + } > >> + > >> + // next byte contains the flag > >> + // we only consume the remainder of the > state if bit 1 is set > >> + // this allows the subsequent read/write > >> + rd->state.use_grbm = (st[4] & 1) ? 1 : 0; > >> + rd->state.use_srbm = (st[4] & 2) ? 1 : 0; > >> + rd->state.pg_lock = (st[4] & 4) ? 1 : 0; > >> + > >> + // next 6 bytes are grbm data > >> + rd->state.grbm.se <http://state.grbm.se> > = ((u32)st[5]) | ((u32)st[6] << 8); > >> + rd->state.grbm.sh <http://state.grbm.sh> > = ((u32)st[7]) | ((u32)st[8] << 8); > >> + rd->state.grbm.instance = ((u32)st[9]) | > ((u32)st[10] << 8); > >> + > >> + // next 8 are srbm data > >> + rd->state.srbm.me <http://state.srbm.me> > = ((u32)st[11]) | ((u32)st[12] << 8); > >> + rd->state.srbm.pipe = ((u32)st[13]) | > ((u32)st[14] << 8); > >> + rd->state.srbm.queue = ((u32)st[15]) | > ((u32)st[16] << 8); > >> + rd->state.srbm.vmid = ((u32)st[17]) | > ((u32)st[18] << 8); > >> + break; > >> + case AMDGPU_DEBUGFS_REGS2_IOC_READ: > >> + return amdgpu_debugfs_regs2_op(f, (char > __user *)data, 0); > >> + case AMDGPU_DEBUGFS_REGS2_IOC_WRITE: > >> + return amdgpu_debugfs_regs2_op(f, (char > __user *)data, 1); > >> + default: > >> + return -EINVAL; > >> + } > >> + return 0; > >> +} > >> > >> /** > >> * amdgpu_debugfs_regs_pcie_read - Read from a PCIE register > >> @@ -1091,6 +1258,14 @@ static ssize_t > amdgpu_debugfs_gfxoff_read(struct file *f, char __user *buf, > >> return result; > >> } > >> > >> +static const struct file_operations amdgpu_debugfs_regs2_fops = { > >> + .owner = THIS_MODULE, > >> + .unlocked_ioctl = amdgpu_debugfs_regs2_ioctl, > >> + .open = amdgpu_debugfs_regs2_open, > >> + .release = amdgpu_debugfs_regs2_release, > >> + .llseek = default_llseek > >> +}; > >> + > >> static const struct file_operations amdgpu_debugfs_regs_fops = { > >> .owner = THIS_MODULE, > >> .read = amdgpu_debugfs_regs_read, > >> @@ -1148,6 +1323,7 @@ static const struct file_operations > amdgpu_debugfs_gfxoff_fops = { > >> > >> static const struct file_operations *debugfs_regs[] = { > >> &amdgpu_debugfs_regs_fops, > >> + &amdgpu_debugfs_regs2_fops, > >> &amdgpu_debugfs_regs_didt_fops, > >> &amdgpu_debugfs_regs_pcie_fops, > >> &amdgpu_debugfs_regs_smc_fops, > >> @@ -1160,6 +1336,7 @@ static const struct file_operations > *debugfs_regs[] = { > >> > >> static const char *debugfs_regs_names[] = { > >> "amdgpu_regs", > >> + "amdgpu_regs2", > >> "amdgpu_regs_didt", > >> "amdgpu_regs_pcie", > >> "amdgpu_regs_smc", > >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h > >> index 141a8474e24f..04c81cd4bcc7 100644 > >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h > >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h > >> @@ -22,6 +22,7 @@ > >> * OTHER DEALINGS IN THE SOFTWARE. > >> * > >> */ > >> +#include <linux/ioctl.h> > >> > >> /* > >> * Debugfs > >> @@ -38,3 +39,34 @@ void amdgpu_debugfs_fence_init(struct > amdgpu_device *adev); > >> void amdgpu_debugfs_firmware_init(struct amdgpu_device *adev); > >> void amdgpu_debugfs_gem_init(struct amdgpu_device *adev); > >> int amdgpu_debugfs_wait_dump(struct amdgpu_device *adev); > >> + > >> +struct amdgpu_debugfs_regs2_data { > >> + struct amdgpu_device *adev; > >> + struct { > >> + // regs state > >> + int use_srbm, > >> + use_grbm, > >> + pg_lock; > >> + struct { > >> + u32 se, sh, instance; > >> + } grbm; > >> + struct { > >> + u32 me, pipe, queue, vmid; > >> + } srbm; > >> + u32 offset; > >> + } state; > >> +}; > > This stuff needs to be in some shared header then. > > > >> + > >> +enum AMDGPU_DEBUGFS_REGS2_CMDS { > >> + AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE=0, > >> + AMDGPU_DEBUGFS_REGS2_CMD_READ, > >> + AMDGPU_DEBUGFS_REGS2_CMD_WRITE, > > Why not just using the normal read and write functions? > > > > Christian. > > > >> +}; > >> + > >> +struct amdgpu_debugfs_regs2_iocdata { > >> + unsigned char t[32]; > >> +}; > >> + > >> +#define AMDGPU_DEBUGFS_REGS2_IOC_SET_STATE _IOWR(0x20, > AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE, struct > amdgpu_debugfs_regs2_iocdata) > >> +#define AMDGPU_DEBUGFS_REGS2_IOC_READ _IOWR(0x20, > AMDGPU_DEBUGFS_REGS2_CMD_WRITE, struct amdgpu_debugfs_regs2_iocdata) > >> +#define AMDGPU_DEBUGFS_REGS2_IOC_WRITE _IOWR(0x20, > AMDGPU_DEBUGFS_REGS2_CMD_READ, struct amdgpu_debugfs_regs2_iocdata) > [-- Attachment #2: Type: text/html, Size: 25041 bytes --] ^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] drm/amd/amdgpu: New debugfs interface for MMIO registers 2021-08-24 13:22 ` Christian König @ 2021-08-24 13:28 ` Tom St Denis 2021-08-24 16:00 ` Alex Deucher 0 siblings, 1 reply; 8+ messages in thread From: Tom St Denis @ 2021-08-24 13:28 UTC (permalink / raw) To: Christian König; +Cc: StDenis, Tom, amd-gfx@lists.freedesktop.org [-- Attachment #1: Type: text/plain, Size: 14789 bytes --] hehehe I just moved it to uapi... No worries, you're the maintainer, I'll move it back before posting v2. Cheers, Tom On Tue, Aug 24, 2021 at 9:22 AM Christian König < ckoenig.leichtzumerken@gmail.com> wrote: > Am 24.08.21 um 14:42 schrieb Tom St Denis: > > The IOCTL data is in the debugfs header as it is. I could move that to > the amdgpu_drm.h and include it from amdgpu_debugfs.h. > > > Na, keep it like that and just add a comment. > > On second thought I don't want to raise any discussion on the mailing list > if we should have a debugfs structure in the UAPI headers. > > Christian. > > > I'll re-write the STATE IOCTL to use a struct and then test against what I > have in umr. > > Refactoring the read/write is trivial and I'll do that no problem (with > style fixes). > > Cheers, > Tom > > On Tue, Aug 24, 2021 at 8:34 AM Christian König < > ckoenig.leichtzumerken@gmail.com> wrote: > >> Am 24.08.21 um 14:27 schrieb StDenis, Tom: >> > [AMD Official Use Only] >> > >> > What do you mean a "shared header?" How would they be shared between >> kernel and user? >> >> Somewhere in the include/uapi/drm/ folder I think. Either add that to >> amdgpu_drm.h or maybe amdgpu_debugfs.h? >> >> Or just keep it as a structure in amdgpu_debugfs.h with a comment that >> it is used for an IOCTL. >> >> Just not something hard coded like first byte is this, second that etc.... >> >> > As for why not read/write. Jus wanted to keep it simple. It's not >> really performance bound. umr never does reads/writes larger than 32-bits >> anyways. It doesn't have to be this way though but I figured the fewer LOC >> the better. >> >> I think it would be cleaner if we would have separate functions for this. >> >> As far as I can see you also don't need the dance with the power >> managerment etc for the IOCTL. It's just get_user() into your structure. >> >> BTW: In the kernel coding style put "switch" and "case" on the same >> column, otherwise checkpatch.pl might complain. >> >> Christian. >> >> > >> > Tom >> > >> > ________________________________________ >> > From: Christian König <ckoenig.leichtzumerken@gmail.com> >> > Sent: Tuesday, August 24, 2021 08:20 >> > To: StDenis, Tom; amd-gfx@lists.freedesktop.org >> > Subject: Re: [PATCH] drm/amd/amdgpu: New debugfs interface for MMIO >> registers >> > >> > >> > >> > Am 24.08.21 um 14:16 schrieb Tom St Denis: >> >> This new debugfs interface uses an IOCTL interface in order to pass >> >> along state information like SRBM and GRBM bank switching. This >> >> new interface also allows a full 32-bit MMIO address range which >> >> the previous didn't. With this new design we have room to grow >> >> the flexibility of the file as need be. >> >> >> >> Signed-off-by: Tom St Denis <tom.stdenis@amd.com> >> >> --- >> >> drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 177 >> ++++++++++++++++++++ >> >> drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h | 32 ++++ >> >> 2 files changed, 209 insertions(+) >> >> >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c >> >> index 277128846dd1..ab2d92f84da5 100644 >> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c >> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c >> >> @@ -279,6 +279,173 @@ static ssize_t amdgpu_debugfs_regs_write(struct >> file *f, const char __user *buf, >> >> return amdgpu_debugfs_process_reg_op(false, f, (char __user >> *)buf, size, pos); >> >> } >> >> >> >> +static int amdgpu_debugfs_regs2_open(struct inode *inode, struct file >> *file) >> >> +{ >> >> + struct amdgpu_debugfs_regs2_data *rd; >> >> + >> >> + rd = kzalloc(sizeof *rd, GFP_KERNEL); >> >> + if (!rd) >> >> + return -ENOMEM; >> >> + rd->adev = file_inode(file)->i_private; >> >> + file->private_data = rd; >> >> + >> >> + return 0; >> >> +} >> >> + >> >> +static int amdgpu_debugfs_regs2_release(struct inode *inode, struct >> file *file) >> >> +{ >> >> + kfree(file->private_data); >> >> + return 0; >> >> +} >> >> + >> >> +static int amdgpu_debugfs_regs2_op(struct file *f, char __user *buf, >> int write_en) >> >> +{ >> >> + struct amdgpu_debugfs_regs2_data *rd = f->private_data; >> >> + struct amdgpu_device *adev = rd->adev; >> >> + int result = 0, r; >> >> + uint32_t value; >> >> + >> >> + if (rd->state.offset & 0x3) >> >> + return -EINVAL; >> >> + >> >> + if (rd->state.use_grbm) { >> >> + if (rd->state.grbm.se == 0x3FF) >> >> + rd->state.grbm.se = 0xFFFFFFFF; >> >> + if (rd->state.grbm.sh == 0x3FF) >> >> + rd->state.grbm.sh = 0xFFFFFFFF; >> >> + if (rd->state.grbm.instance == 0x3FF) >> >> + rd->state.grbm.instance = 0xFFFFFFFF; >> >> + } >> >> + >> >> + r = pm_runtime_get_sync(adev_to_drm(adev)->dev); >> >> + if (r < 0) { >> >> + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); >> >> + return r; >> >> + } >> >> + >> >> + r = amdgpu_virt_enable_access_debugfs(adev); >> >> + if (r < 0) { >> >> + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); >> >> + return r; >> >> + } >> >> + >> >> + if (rd->state.use_grbm) { >> >> + if ((rd->state.grbm.sh != 0xFFFFFFFF && rd-> >> state.grbm.sh >= adev->gfx.config.max_sh_per_se) || >> >> + (rd->state.grbm.se != 0xFFFFFFFF && rd-> >> state.grbm.se >= adev->gfx.config.max_shader_engines)) { >> >> + >> pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); >> >> + >> pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); >> >> + amdgpu_virt_disable_access_debugfs(adev); >> >> + return -EINVAL; >> >> + } >> >> + mutex_lock(&adev->grbm_idx_mutex); >> >> + amdgpu_gfx_select_se_sh(adev, rd->state.grbm.se, >> >> + rd-> >> state.grbm.sh, >> >> + >> rd->state.grbm.instance); >> >> + } else if (rd->state.use_grbm) { >> >> + mutex_lock(&adev->srbm_mutex); >> >> + amdgpu_gfx_select_me_pipe_q(adev, rd->state.srbm.me, >> rd->state.srbm.pipe, >> >> + >> rd->state.srbm.queue, rd->state.srbm.vmid); >> >> + } >> >> + >> >> + if (rd->state.pg_lock) >> >> + mutex_lock(&adev->pm.mutex); >> >> + >> >> + if (!write_en) { >> >> + value = RREG32(rd->state.offset >> 2); >> >> + r = put_user(value, (uint32_t *)buf); >> >> + } else { >> >> + r = get_user(value, (uint32_t *)buf); >> >> + if (!r) >> >> + amdgpu_mm_wreg_mmio_rlc(adev, rd->state.offset >> >> 2, value); >> >> + } >> >> + if (r) { >> >> + result = r; >> >> + goto end; >> >> + } >> >> + result = 0; >> >> +end: >> >> + if (rd->state.use_grbm) { >> >> + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, >> 0xffffffff); >> >> + mutex_unlock(&adev->grbm_idx_mutex); >> >> + } else if (rd->state.use_srbm) { >> >> + amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0); >> >> + mutex_unlock(&adev->srbm_mutex); >> >> + } >> >> + >> >> + if (rd->state.pg_lock) >> >> + mutex_unlock(&adev->pm.mutex); >> >> + >> >> + // in umr (the likely user of this) flags are set per file >> operation >> >> + // which means they're never "unset" explicitly. To avoid >> breaking >> >> + // this convention we unset the flags after each operation >> >> + // flags are for a single call (need to be set for every >> read/write) >> >> + rd->state.use_grbm = 0; >> >> + rd->state.use_srbm = 0; >> >> + rd->state.pg_lock = 0; >> >> + >> >> + pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); >> >> + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); >> >> + >> >> + amdgpu_virt_disable_access_debugfs(adev); >> >> + return result; >> >> +} >> >> + >> >> +static long amdgpu_debugfs_regs2_ioctl(struct file *f, unsigned int >> cmd, unsigned long data) >> >> +{ >> >> + struct amdgpu_debugfs_regs2_data *rd = f->private_data; >> >> + unsigned char st[32], v; >> >> + int r, x; >> >> + >> >> + // always read first 4 bytes of data >> >> + for (x = 0; x < 4; x++) { >> >> + if ((r = get_user(v, (unsigned char *)data))) { >> >> + return r; >> >> + } >> >> + ++data; >> >> + st[x] = v; >> >> + } >> >> + >> >> + // first 4 bytes are offset >> >> + rd->state.offset = ((u32)st[0]) | ((u32)st[1] << 8) | >> >> + ((u32)st[2] << 16) | >> ((u32)st[3] << 24); >> >> + >> >> + switch (cmd) { >> >> + case AMDGPU_DEBUGFS_REGS2_IOC_SET_STATE: >> >> + for (x = 4; x < 32; x++) { >> >> + if ((r = get_user(v, (unsigned char >> *)data))) { >> >> + return r; >> >> + } >> >> + ++data; >> >> + st[x] = v; >> >> + } >> >> + >> >> + // next byte contains the flag >> >> + // we only consume the remainder of the state if >> bit 1 is set >> >> + // this allows the subsequent read/write >> >> + rd->state.use_grbm = (st[4] & 1) ? 1 : 0; >> >> + rd->state.use_srbm = (st[4] & 2) ? 1 : 0; >> >> + rd->state.pg_lock = (st[4] & 4) ? 1 : 0; >> >> + >> >> + // next 6 bytes are grbm data >> >> + rd->state.grbm.se = ((u32)st[5]) | >> ((u32)st[6] << 8); >> >> + rd->state.grbm.sh = ((u32)st[7]) | >> ((u32)st[8] << 8); >> >> + rd->state.grbm.instance = ((u32)st[9]) | >> ((u32)st[10] << 8); >> >> + >> >> + // next 8 are srbm data >> >> + rd->state.srbm.me = ((u32)st[11]) | >> ((u32)st[12] << 8); >> >> + rd->state.srbm.pipe = ((u32)st[13]) | >> ((u32)st[14] << 8); >> >> + rd->state.srbm.queue = ((u32)st[15]) | >> ((u32)st[16] << 8); >> >> + rd->state.srbm.vmid = ((u32)st[17]) | >> ((u32)st[18] << 8); >> >> + break; >> >> + case AMDGPU_DEBUGFS_REGS2_IOC_READ: >> >> + return amdgpu_debugfs_regs2_op(f, (char __user >> *)data, 0); >> >> + case AMDGPU_DEBUGFS_REGS2_IOC_WRITE: >> >> + return amdgpu_debugfs_regs2_op(f, (char __user >> *)data, 1); >> >> + default: >> >> + return -EINVAL; >> >> + } >> >> + return 0; >> >> +} >> >> >> >> /** >> >> * amdgpu_debugfs_regs_pcie_read - Read from a PCIE register >> >> @@ -1091,6 +1258,14 @@ static ssize_t >> amdgpu_debugfs_gfxoff_read(struct file *f, char __user *buf, >> >> return result; >> >> } >> >> >> >> +static const struct file_operations amdgpu_debugfs_regs2_fops = { >> >> + .owner = THIS_MODULE, >> >> + .unlocked_ioctl = amdgpu_debugfs_regs2_ioctl, >> >> + .open = amdgpu_debugfs_regs2_open, >> >> + .release = amdgpu_debugfs_regs2_release, >> >> + .llseek = default_llseek >> >> +}; >> >> + >> >> static const struct file_operations amdgpu_debugfs_regs_fops = { >> >> .owner = THIS_MODULE, >> >> .read = amdgpu_debugfs_regs_read, >> >> @@ -1148,6 +1323,7 @@ static const struct file_operations >> amdgpu_debugfs_gfxoff_fops = { >> >> >> >> static const struct file_operations *debugfs_regs[] = { >> >> &amdgpu_debugfs_regs_fops, >> >> + &amdgpu_debugfs_regs2_fops, >> >> &amdgpu_debugfs_regs_didt_fops, >> >> &amdgpu_debugfs_regs_pcie_fops, >> >> &amdgpu_debugfs_regs_smc_fops, >> >> @@ -1160,6 +1336,7 @@ static const struct file_operations >> *debugfs_regs[] = { >> >> >> >> static const char *debugfs_regs_names[] = { >> >> "amdgpu_regs", >> >> + "amdgpu_regs2", >> >> "amdgpu_regs_didt", >> >> "amdgpu_regs_pcie", >> >> "amdgpu_regs_smc", >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h >> >> index 141a8474e24f..04c81cd4bcc7 100644 >> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h >> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h >> >> @@ -22,6 +22,7 @@ >> >> * OTHER DEALINGS IN THE SOFTWARE. >> >> * >> >> */ >> >> +#include <linux/ioctl.h> >> >> >> >> /* >> >> * Debugfs >> >> @@ -38,3 +39,34 @@ void amdgpu_debugfs_fence_init(struct amdgpu_device >> *adev); >> >> void amdgpu_debugfs_firmware_init(struct amdgpu_device *adev); >> >> void amdgpu_debugfs_gem_init(struct amdgpu_device *adev); >> >> int amdgpu_debugfs_wait_dump(struct amdgpu_device *adev); >> >> + >> >> +struct amdgpu_debugfs_regs2_data { >> >> + struct amdgpu_device *adev; >> >> + struct { >> >> + // regs state >> >> + int use_srbm, >> >> + use_grbm, >> >> + pg_lock; >> >> + struct { >> >> + u32 se, sh, instance; >> >> + } grbm; >> >> + struct { >> >> + u32 me, pipe, queue, vmid; >> >> + } srbm; >> >> + u32 offset; >> >> + } state; >> >> +}; >> > This stuff needs to be in some shared header then. >> > >> >> + >> >> +enum AMDGPU_DEBUGFS_REGS2_CMDS { >> >> + AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE=0, >> >> + AMDGPU_DEBUGFS_REGS2_CMD_READ, >> >> + AMDGPU_DEBUGFS_REGS2_CMD_WRITE, >> > Why not just using the normal read and write functions? >> > >> > Christian. >> > >> >> +}; >> >> + >> >> +struct amdgpu_debugfs_regs2_iocdata { >> >> + unsigned char t[32]; >> >> +}; >> >> + >> >> +#define AMDGPU_DEBUGFS_REGS2_IOC_SET_STATE _IOWR(0x20, >> AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE, struct amdgpu_debugfs_regs2_iocdata) >> >> +#define AMDGPU_DEBUGFS_REGS2_IOC_READ _IOWR(0x20, >> AMDGPU_DEBUGFS_REGS2_CMD_WRITE, struct amdgpu_debugfs_regs2_iocdata) >> >> +#define AMDGPU_DEBUGFS_REGS2_IOC_WRITE _IOWR(0x20, >> AMDGPU_DEBUGFS_REGS2_CMD_READ, struct amdgpu_debugfs_regs2_iocdata) >> >> > [-- Attachment #2: Type: text/html, Size: 25027 bytes --] ^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] drm/amd/amdgpu: New debugfs interface for MMIO registers 2021-08-24 13:28 ` Tom St Denis @ 2021-08-24 16:00 ` Alex Deucher 0 siblings, 0 replies; 8+ messages in thread From: Alex Deucher @ 2021-08-24 16:00 UTC (permalink / raw) To: Tom St Denis Cc: Christian König, StDenis, Tom, amd-gfx@lists.freedesktop.org Also, please use C style comments. Alex On Tue, Aug 24, 2021 at 9:28 AM Tom St Denis <tstdenis82@gmail.com> wrote: > > hehehe I just moved it to uapi... No worries, you're the maintainer, I'll move it back before posting v2. > > Cheers, > Tom > > On Tue, Aug 24, 2021 at 9:22 AM Christian König <ckoenig.leichtzumerken@gmail.com> wrote: >> >> Am 24.08.21 um 14:42 schrieb Tom St Denis: >> >> The IOCTL data is in the debugfs header as it is. I could move that to the amdgpu_drm.h and include it from amdgpu_debugfs.h. >> >> >> Na, keep it like that and just add a comment. >> >> On second thought I don't want to raise any discussion on the mailing list if we should have a debugfs structure in the UAPI headers. >> >> Christian. >> >> >> I'll re-write the STATE IOCTL to use a struct and then test against what I have in umr. >> >> Refactoring the read/write is trivial and I'll do that no problem (with style fixes). >> >> Cheers, >> Tom >> >> On Tue, Aug 24, 2021 at 8:34 AM Christian König <ckoenig.leichtzumerken@gmail.com> wrote: >>> >>> Am 24.08.21 um 14:27 schrieb StDenis, Tom: >>> > [AMD Official Use Only] >>> > >>> > What do you mean a "shared header?" How would they be shared between kernel and user? >>> >>> Somewhere in the include/uapi/drm/ folder I think. Either add that to >>> amdgpu_drm.h or maybe amdgpu_debugfs.h? >>> >>> Or just keep it as a structure in amdgpu_debugfs.h with a comment that >>> it is used for an IOCTL. >>> >>> Just not something hard coded like first byte is this, second that etc.... >>> >>> > As for why not read/write. Jus wanted to keep it simple. It's not really performance bound. umr never does reads/writes larger than 32-bits anyways. It doesn't have to be this way though but I figured the fewer LOC the better. >>> >>> I think it would be cleaner if we would have separate functions for this. >>> >>> As far as I can see you also don't need the dance with the power >>> managerment etc for the IOCTL. It's just get_user() into your structure. >>> >>> BTW: In the kernel coding style put "switch" and "case" on the same >>> column, otherwise checkpatch.pl might complain. >>> >>> Christian. >>> >>> > >>> > Tom >>> > >>> > ________________________________________ >>> > From: Christian König <ckoenig.leichtzumerken@gmail.com> >>> > Sent: Tuesday, August 24, 2021 08:20 >>> > To: StDenis, Tom; amd-gfx@lists.freedesktop.org >>> > Subject: Re: [PATCH] drm/amd/amdgpu: New debugfs interface for MMIO registers >>> > >>> > >>> > >>> > Am 24.08.21 um 14:16 schrieb Tom St Denis: >>> >> This new debugfs interface uses an IOCTL interface in order to pass >>> >> along state information like SRBM and GRBM bank switching. This >>> >> new interface also allows a full 32-bit MMIO address range which >>> >> the previous didn't. With this new design we have room to grow >>> >> the flexibility of the file as need be. >>> >> >>> >> Signed-off-by: Tom St Denis <tom.stdenis@amd.com> >>> >> --- >>> >> drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 177 ++++++++++++++++++++ >>> >> drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h | 32 ++++ >>> >> 2 files changed, 209 insertions(+) >>> >> >>> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c >>> >> index 277128846dd1..ab2d92f84da5 100644 >>> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c >>> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c >>> >> @@ -279,6 +279,173 @@ static ssize_t amdgpu_debugfs_regs_write(struct file *f, const char __user *buf, >>> >> return amdgpu_debugfs_process_reg_op(false, f, (char __user *)buf, size, pos); >>> >> } >>> >> >>> >> +static int amdgpu_debugfs_regs2_open(struct inode *inode, struct file *file) >>> >> +{ >>> >> + struct amdgpu_debugfs_regs2_data *rd; >>> >> + >>> >> + rd = kzalloc(sizeof *rd, GFP_KERNEL); >>> >> + if (!rd) >>> >> + return -ENOMEM; >>> >> + rd->adev = file_inode(file)->i_private; >>> >> + file->private_data = rd; >>> >> + >>> >> + return 0; >>> >> +} >>> >> + >>> >> +static int amdgpu_debugfs_regs2_release(struct inode *inode, struct file *file) >>> >> +{ >>> >> + kfree(file->private_data); >>> >> + return 0; >>> >> +} >>> >> + >>> >> +static int amdgpu_debugfs_regs2_op(struct file *f, char __user *buf, int write_en) >>> >> +{ >>> >> + struct amdgpu_debugfs_regs2_data *rd = f->private_data; >>> >> + struct amdgpu_device *adev = rd->adev; >>> >> + int result = 0, r; >>> >> + uint32_t value; >>> >> + >>> >> + if (rd->state.offset & 0x3) >>> >> + return -EINVAL; >>> >> + >>> >> + if (rd->state.use_grbm) { >>> >> + if (rd->state.grbm.se == 0x3FF) >>> >> + rd->state.grbm.se = 0xFFFFFFFF; >>> >> + if (rd->state.grbm.sh == 0x3FF) >>> >> + rd->state.grbm.sh = 0xFFFFFFFF; >>> >> + if (rd->state.grbm.instance == 0x3FF) >>> >> + rd->state.grbm.instance = 0xFFFFFFFF; >>> >> + } >>> >> + >>> >> + r = pm_runtime_get_sync(adev_to_drm(adev)->dev); >>> >> + if (r < 0) { >>> >> + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); >>> >> + return r; >>> >> + } >>> >> + >>> >> + r = amdgpu_virt_enable_access_debugfs(adev); >>> >> + if (r < 0) { >>> >> + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); >>> >> + return r; >>> >> + } >>> >> + >>> >> + if (rd->state.use_grbm) { >>> >> + if ((rd->state.grbm.sh != 0xFFFFFFFF && rd->state.grbm.sh >= adev->gfx.config.max_sh_per_se) || >>> >> + (rd->state.grbm.se != 0xFFFFFFFF && rd->state.grbm.se >= adev->gfx.config.max_shader_engines)) { >>> >> + pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); >>> >> + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); >>> >> + amdgpu_virt_disable_access_debugfs(adev); >>> >> + return -EINVAL; >>> >> + } >>> >> + mutex_lock(&adev->grbm_idx_mutex); >>> >> + amdgpu_gfx_select_se_sh(adev, rd->state.grbm.se, >>> >> + rd->state.grbm.sh, >>> >> + rd->state.grbm.instance); >>> >> + } else if (rd->state.use_grbm) { >>> >> + mutex_lock(&adev->srbm_mutex); >>> >> + amdgpu_gfx_select_me_pipe_q(adev, rd->state.srbm.me, rd->state.srbm.pipe, >>> >> + rd->state.srbm.queue, rd->state.srbm.vmid); >>> >> + } >>> >> + >>> >> + if (rd->state.pg_lock) >>> >> + mutex_lock(&adev->pm.mutex); >>> >> + >>> >> + if (!write_en) { >>> >> + value = RREG32(rd->state.offset >> 2); >>> >> + r = put_user(value, (uint32_t *)buf); >>> >> + } else { >>> >> + r = get_user(value, (uint32_t *)buf); >>> >> + if (!r) >>> >> + amdgpu_mm_wreg_mmio_rlc(adev, rd->state.offset >> 2, value); >>> >> + } >>> >> + if (r) { >>> >> + result = r; >>> >> + goto end; >>> >> + } >>> >> + result = 0; >>> >> +end: >>> >> + if (rd->state.use_grbm) { >>> >> + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); >>> >> + mutex_unlock(&adev->grbm_idx_mutex); >>> >> + } else if (rd->state.use_srbm) { >>> >> + amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0); >>> >> + mutex_unlock(&adev->srbm_mutex); >>> >> + } >>> >> + >>> >> + if (rd->state.pg_lock) >>> >> + mutex_unlock(&adev->pm.mutex); >>> >> + >>> >> + // in umr (the likely user of this) flags are set per file operation >>> >> + // which means they're never "unset" explicitly. To avoid breaking >>> >> + // this convention we unset the flags after each operation >>> >> + // flags are for a single call (need to be set for every read/write) >>> >> + rd->state.use_grbm = 0; >>> >> + rd->state.use_srbm = 0; >>> >> + rd->state.pg_lock = 0; >>> >> + >>> >> + pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); >>> >> + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); >>> >> + >>> >> + amdgpu_virt_disable_access_debugfs(adev); >>> >> + return result; >>> >> +} >>> >> + >>> >> +static long amdgpu_debugfs_regs2_ioctl(struct file *f, unsigned int cmd, unsigned long data) >>> >> +{ >>> >> + struct amdgpu_debugfs_regs2_data *rd = f->private_data; >>> >> + unsigned char st[32], v; >>> >> + int r, x; >>> >> + >>> >> + // always read first 4 bytes of data >>> >> + for (x = 0; x < 4; x++) { >>> >> + if ((r = get_user(v, (unsigned char *)data))) { >>> >> + return r; >>> >> + } >>> >> + ++data; >>> >> + st[x] = v; >>> >> + } >>> >> + >>> >> + // first 4 bytes are offset >>> >> + rd->state.offset = ((u32)st[0]) | ((u32)st[1] << 8) | >>> >> + ((u32)st[2] << 16) | ((u32)st[3] << 24); >>> >> + >>> >> + switch (cmd) { >>> >> + case AMDGPU_DEBUGFS_REGS2_IOC_SET_STATE: >>> >> + for (x = 4; x < 32; x++) { >>> >> + if ((r = get_user(v, (unsigned char *)data))) { >>> >> + return r; >>> >> + } >>> >> + ++data; >>> >> + st[x] = v; >>> >> + } >>> >> + >>> >> + // next byte contains the flag >>> >> + // we only consume the remainder of the state if bit 1 is set >>> >> + // this allows the subsequent read/write >>> >> + rd->state.use_grbm = (st[4] & 1) ? 1 : 0; >>> >> + rd->state.use_srbm = (st[4] & 2) ? 1 : 0; >>> >> + rd->state.pg_lock = (st[4] & 4) ? 1 : 0; >>> >> + >>> >> + // next 6 bytes are grbm data >>> >> + rd->state.grbm.se = ((u32)st[5]) | ((u32)st[6] << 8); >>> >> + rd->state.grbm.sh = ((u32)st[7]) | ((u32)st[8] << 8); >>> >> + rd->state.grbm.instance = ((u32)st[9]) | ((u32)st[10] << 8); >>> >> + >>> >> + // next 8 are srbm data >>> >> + rd->state.srbm.me = ((u32)st[11]) | ((u32)st[12] << 8); >>> >> + rd->state.srbm.pipe = ((u32)st[13]) | ((u32)st[14] << 8); >>> >> + rd->state.srbm.queue = ((u32)st[15]) | ((u32)st[16] << 8); >>> >> + rd->state.srbm.vmid = ((u32)st[17]) | ((u32)st[18] << 8); >>> >> + break; >>> >> + case AMDGPU_DEBUGFS_REGS2_IOC_READ: >>> >> + return amdgpu_debugfs_regs2_op(f, (char __user *)data, 0); >>> >> + case AMDGPU_DEBUGFS_REGS2_IOC_WRITE: >>> >> + return amdgpu_debugfs_regs2_op(f, (char __user *)data, 1); >>> >> + default: >>> >> + return -EINVAL; >>> >> + } >>> >> + return 0; >>> >> +} >>> >> >>> >> /** >>> >> * amdgpu_debugfs_regs_pcie_read - Read from a PCIE register >>> >> @@ -1091,6 +1258,14 @@ static ssize_t amdgpu_debugfs_gfxoff_read(struct file *f, char __user *buf, >>> >> return result; >>> >> } >>> >> >>> >> +static const struct file_operations amdgpu_debugfs_regs2_fops = { >>> >> + .owner = THIS_MODULE, >>> >> + .unlocked_ioctl = amdgpu_debugfs_regs2_ioctl, >>> >> + .open = amdgpu_debugfs_regs2_open, >>> >> + .release = amdgpu_debugfs_regs2_release, >>> >> + .llseek = default_llseek >>> >> +}; >>> >> + >>> >> static const struct file_operations amdgpu_debugfs_regs_fops = { >>> >> .owner = THIS_MODULE, >>> >> .read = amdgpu_debugfs_regs_read, >>> >> @@ -1148,6 +1323,7 @@ static const struct file_operations amdgpu_debugfs_gfxoff_fops = { >>> >> >>> >> static const struct file_operations *debugfs_regs[] = { >>> >> &amdgpu_debugfs_regs_fops, >>> >> + &amdgpu_debugfs_regs2_fops, >>> >> &amdgpu_debugfs_regs_didt_fops, >>> >> &amdgpu_debugfs_regs_pcie_fops, >>> >> &amdgpu_debugfs_regs_smc_fops, >>> >> @@ -1160,6 +1336,7 @@ static const struct file_operations *debugfs_regs[] = { >>> >> >>> >> static const char *debugfs_regs_names[] = { >>> >> "amdgpu_regs", >>> >> + "amdgpu_regs2", >>> >> "amdgpu_regs_didt", >>> >> "amdgpu_regs_pcie", >>> >> "amdgpu_regs_smc", >>> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h >>> >> index 141a8474e24f..04c81cd4bcc7 100644 >>> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h >>> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h >>> >> @@ -22,6 +22,7 @@ >>> >> * OTHER DEALINGS IN THE SOFTWARE. >>> >> * >>> >> */ >>> >> +#include <linux/ioctl.h> >>> >> >>> >> /* >>> >> * Debugfs >>> >> @@ -38,3 +39,34 @@ void amdgpu_debugfs_fence_init(struct amdgpu_device *adev); >>> >> void amdgpu_debugfs_firmware_init(struct amdgpu_device *adev); >>> >> void amdgpu_debugfs_gem_init(struct amdgpu_device *adev); >>> >> int amdgpu_debugfs_wait_dump(struct amdgpu_device *adev); >>> >> + >>> >> +struct amdgpu_debugfs_regs2_data { >>> >> + struct amdgpu_device *adev; >>> >> + struct { >>> >> + // regs state >>> >> + int use_srbm, >>> >> + use_grbm, >>> >> + pg_lock; >>> >> + struct { >>> >> + u32 se, sh, instance; >>> >> + } grbm; >>> >> + struct { >>> >> + u32 me, pipe, queue, vmid; >>> >> + } srbm; >>> >> + u32 offset; >>> >> + } state; >>> >> +}; >>> > This stuff needs to be in some shared header then. >>> > >>> >> + >>> >> +enum AMDGPU_DEBUGFS_REGS2_CMDS { >>> >> + AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE=0, >>> >> + AMDGPU_DEBUGFS_REGS2_CMD_READ, >>> >> + AMDGPU_DEBUGFS_REGS2_CMD_WRITE, >>> > Why not just using the normal read and write functions? >>> > >>> > Christian. >>> > >>> >> +}; >>> >> + >>> >> +struct amdgpu_debugfs_regs2_iocdata { >>> >> + unsigned char t[32]; >>> >> +}; >>> >> + >>> >> +#define AMDGPU_DEBUGFS_REGS2_IOC_SET_STATE _IOWR(0x20, AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE, struct amdgpu_debugfs_regs2_iocdata) >>> >> +#define AMDGPU_DEBUGFS_REGS2_IOC_READ _IOWR(0x20, AMDGPU_DEBUGFS_REGS2_CMD_WRITE, struct amdgpu_debugfs_regs2_iocdata) >>> >> +#define AMDGPU_DEBUGFS_REGS2_IOC_WRITE _IOWR(0x20, AMDGPU_DEBUGFS_REGS2_CMD_READ, struct amdgpu_debugfs_regs2_iocdata) >>> >> ^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2021-08-24 16:00 UTC | newest] Thread overview: 8+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2021-08-24 12:16 [PATCH] drm/amd/amdgpu: New debugfs interface for MMIO registers Tom St Denis 2021-08-24 12:20 ` Christian König 2021-08-24 12:27 ` StDenis, Tom 2021-08-24 12:34 ` Christian König 2021-08-24 12:42 ` Tom St Denis 2021-08-24 13:22 ` Christian König 2021-08-24 13:28 ` Tom St Denis 2021-08-24 16:00 ` Alex Deucher
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.