* [PATCH 0/2] drm/amdgpu: Merge all debug module parameters
@ 2023-08-24 16:25 André Almeida
2023-08-24 16:25 ` [PATCH 1/2] drm/amdgpu: Merge " André Almeida
2023-08-24 16:25 ` [PATCH 2/2] drm/amdgpu: Create an option to disable soft recovery André Almeida
0 siblings, 2 replies; 9+ messages in thread
From: André Almeida @ 2023-08-24 16:25 UTC (permalink / raw)
To: dri-devel, amd-gfx, linux-kernel
Cc: kernel-dev, alexander.deucher, christian.koenig,
pierre-eric.pelloux-prayer, 'Marek Olšák',
André Almeida
As suggested by Christian at [0], this patchset merges all debug modules
parameters and creates a new one for disabling soft recovery:
> Maybe we can overload the amdgpu_gpu_recovery module option with this.
> Or even better merge all the developer module parameter into a
> amdgpu_debug option. This way it should be pretty obvious that this
> isn't meant to be used by someone who doesn't know how to use it.
[0] https://lore.kernel.org/dri-devel/55f69184-1aa2-55d6-4a10-1560d75c7324@amd.com/
André Almeida (2):
drm/amdgpu: Merge debug module parameters
drm/amdgpu: Create an option to disable soft recovery
drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 +
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 30 ++++++++++++++++++++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 6 ++++-
drivers/gpu/drm/amd/include/amd_shared.h | 10 ++++++++
4 files changed, 46 insertions(+), 1 deletion(-)
--
2.41.0
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH 1/2] drm/amdgpu: Merge debug module parameters
2023-08-24 16:25 [PATCH 0/2] drm/amdgpu: Merge all debug module parameters André Almeida
@ 2023-08-24 16:25 ` André Almeida
2023-08-25 6:56 ` Christian König
2023-09-03 1:56 ` kernel test robot
2023-08-24 16:25 ` [PATCH 2/2] drm/amdgpu: Create an option to disable soft recovery André Almeida
1 sibling, 2 replies; 9+ messages in thread
From: André Almeida @ 2023-08-24 16:25 UTC (permalink / raw)
To: dri-devel, amd-gfx, linux-kernel
Cc: kernel-dev, alexander.deucher, christian.koenig,
pierre-eric.pelloux-prayer, 'Marek Olšák',
André Almeida
Merge all developer debug options available as separated module
parameters in one, making it obvious that are for developers.
Signed-off-by: André Almeida <andrealmeid@igalia.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 24 ++++++++++++++++++++++++
drivers/gpu/drm/amd/include/amd_shared.h | 9 +++++++++
2 files changed, 33 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index f5856b82605e..d53e4097acc0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -194,6 +194,7 @@ int amdgpu_use_xgmi_p2p = 1;
int amdgpu_vcnfw_log;
int amdgpu_sg_display = -1; /* auto */
int amdgpu_user_partt_mode = AMDGPU_AUTO_COMPUTE_PARTITION_MODE;
+uint amdgpu_debug_mask;
static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work);
@@ -938,6 +939,9 @@ module_param_named(user_partt_mode, amdgpu_user_partt_mode, uint, 0444);
module_param(enforce_isolation, bool, 0444);
MODULE_PARM_DESC(enforce_isolation, "enforce process isolation between graphics and compute . enforce_isolation = on");
+MODULE_PARM_DESC(debug_mask, "debug options for amdgpu, disabled by default");
+module_param_named(debug_mask, amdgpu_debug_mask, uint, 0444);
+
/* These devices are not supported by amdgpu.
* They are supported by the mach64, r128, radeon drivers
*/
@@ -2871,6 +2875,24 @@ static struct pci_driver amdgpu_kms_pci_driver = {
.dev_groups = amdgpu_sysfs_groups,
};
+static void amdgpu_init_debug_options(void)
+{
+ if (amdgpu_debug_mask & DEBUG_VERBOSE_EVICTIONS) {
+ pr_info("debug: eviction debug messages enabled\n");
+ debug_evictions = true;
+ }
+
+ if (amdgpu_debug_mask & DEBUG_VM) {
+ pr_info("debug: VM handling debug enabled\n");
+ amdgpu_vm_debug = true;
+ }
+
+ if (amdgpu_debug_mask & DEBUG_LARGEBAR) {
+ pr_info("debug: enabled simulating large-bar capability on non-large bar system\n");
+ debug_largebar = true;
+ }
+}
+
static int __init amdgpu_init(void)
{
int r;
@@ -2893,6 +2915,8 @@ static int __init amdgpu_init(void)
/* Ignore KFD init failures. Normal when CONFIG_HSA_AMD is not set. */
amdgpu_amdkfd_init();
+ amdgpu_init_debug_options();
+
/* let modprobe override vga console setting */
return pci_register_driver(&amdgpu_kms_pci_driver);
diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h
index 67d7b7ee8a2a..6fa644c249a5 100644
--- a/drivers/gpu/drm/amd/include/amd_shared.h
+++ b/drivers/gpu/drm/amd/include/amd_shared.h
@@ -257,6 +257,15 @@ enum DC_DEBUG_MASK {
enum amd_dpm_forced_level;
+/*
+ * amdgpu.debug module options. Are all disabled by default
+ */
+enum AMDGPU_DEBUG_MASK {
+ DEBUG_VERBOSE_EVICTIONS = (1 << 0), // 0x1
+ DEBUG_VM = (1 << 1), // 0x2
+ DEBUG_LARGEBAR = (1 << 2), // 0x4
+};
+
/**
* struct amd_ip_funcs - general hooks for managing amdgpu IP Blocks
* @name: Name of IP block
--
2.41.0
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH 2/2] drm/amdgpu: Create an option to disable soft recovery
2023-08-24 16:25 [PATCH 0/2] drm/amdgpu: Merge all debug module parameters André Almeida
2023-08-24 16:25 ` [PATCH 1/2] drm/amdgpu: Merge " André Almeida
@ 2023-08-24 16:25 ` André Almeida
1 sibling, 0 replies; 9+ messages in thread
From: André Almeida @ 2023-08-24 16:25 UTC (permalink / raw)
To: dri-devel, amd-gfx, linux-kernel
Cc: kernel-dev, alexander.deucher, christian.koenig,
pierre-eric.pelloux-prayer, 'Marek Olšák',
André Almeida
Create a module option to disable soft recoveries on amdgpu, making
every recovery go through the device reset path. This option makes
easier to force device resets for testing and debugging purposes.
Signed-off-by: André Almeida <andrealmeid@igalia.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 +
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 6 ++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 6 +++++-
drivers/gpu/drm/amd/include/amd_shared.h | 1 +
4 files changed, 13 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 4de074243c4d..8f4a93890345 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -189,6 +189,7 @@ extern uint amdgpu_force_long_training;
extern int amdgpu_lbpw;
extern int amdgpu_compute_multipipe;
extern int amdgpu_gpu_recovery;
+extern bool amdgpu_soft_recovery;
extern int amdgpu_emu_mode;
extern uint amdgpu_smu_memory_pool_size;
extern int amdgpu_smu_pptable_id;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index d53e4097acc0..7d6c39b547cf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -163,6 +163,7 @@ uint amdgpu_force_long_training;
int amdgpu_lbpw = -1;
int amdgpu_compute_multipipe = -1;
int amdgpu_gpu_recovery = -1; /* auto */
+bool amdgpu_soft_recovery = true;
int amdgpu_emu_mode;
uint amdgpu_smu_memory_pool_size;
int amdgpu_smu_pptable_id = -1;
@@ -2891,6 +2892,11 @@ static void amdgpu_init_debug_options(void)
pr_info("debug: enabled simulating large-bar capability on non-large bar system\n");
debug_largebar = true;
}
+
+ if (amdgpu_debug_mask & DEBUG_DISABLE_GPU_SOFT_RECOVERY) {
+ pr_info("debug: soft reset for GPU recovery disabled\n");
+ amdgpu_soft_recovery = false;
+ }
}
static int __init amdgpu_init(void)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 80d6e132e409..40678d9fb17e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -434,8 +434,12 @@ bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid,
struct dma_fence *fence)
{
unsigned long flags;
+ ktime_t deadline;
- ktime_t deadline = ktime_add_us(ktime_get(), 10000);
+ if (!amdgpu_soft_recovery)
+ return false;
+
+ deadline = ktime_add_us(ktime_get(), 10000);
if (amdgpu_sriov_vf(ring->adev) || !ring->funcs->soft_recovery || !fence)
return false;
diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h
index 6fa644c249a5..afcbacce0a13 100644
--- a/drivers/gpu/drm/amd/include/amd_shared.h
+++ b/drivers/gpu/drm/amd/include/amd_shared.h
@@ -264,6 +264,7 @@ enum AMDGPU_DEBUG_MASK {
DEBUG_VERBOSE_EVICTIONS = (1 << 0), // 0x1
DEBUG_VM = (1 << 1), // 0x2
DEBUG_LARGEBAR = (1 << 2), // 0x4
+ DEBUG_DISABLE_GPU_SOFT_RECOVERY = (1 << 3), // 0x8
};
/**
--
2.41.0
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH 1/2] drm/amdgpu: Merge debug module parameters
2023-08-24 16:25 ` [PATCH 1/2] drm/amdgpu: Merge " André Almeida
@ 2023-08-25 6:56 ` Christian König
2023-08-25 12:24 ` André Almeida
2023-09-03 1:56 ` kernel test robot
1 sibling, 1 reply; 9+ messages in thread
From: Christian König @ 2023-08-25 6:56 UTC (permalink / raw)
To: André Almeida, dri-devel, amd-gfx, linux-kernel
Cc: kernel-dev, alexander.deucher, pierre-eric.pelloux-prayer,
'Marek Olšák'
Am 24.08.23 um 18:25 schrieb André Almeida:
> Merge all developer debug options available as separated module
> parameters in one, making it obvious that are for developers.
>
> Signed-off-by: André Almeida <andrealmeid@igalia.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 24 ++++++++++++++++++++++++
> drivers/gpu/drm/amd/include/amd_shared.h | 9 +++++++++
> 2 files changed, 33 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index f5856b82605e..d53e4097acc0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -194,6 +194,7 @@ int amdgpu_use_xgmi_p2p = 1;
> int amdgpu_vcnfw_log;
> int amdgpu_sg_display = -1; /* auto */
> int amdgpu_user_partt_mode = AMDGPU_AUTO_COMPUTE_PARTITION_MODE;
> +uint amdgpu_debug_mask;
>
> static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work);
>
> @@ -938,6 +939,9 @@ module_param_named(user_partt_mode, amdgpu_user_partt_mode, uint, 0444);
> module_param(enforce_isolation, bool, 0444);
> MODULE_PARM_DESC(enforce_isolation, "enforce process isolation between graphics and compute . enforce_isolation = on");
>
> +MODULE_PARM_DESC(debug_mask, "debug options for amdgpu, disabled by default");
> +module_param_named(debug_mask, amdgpu_debug_mask, uint, 0444);
> +
> /* These devices are not supported by amdgpu.
> * They are supported by the mach64, r128, radeon drivers
> */
> @@ -2871,6 +2875,24 @@ static struct pci_driver amdgpu_kms_pci_driver = {
> .dev_groups = amdgpu_sysfs_groups,
> };
>
> +static void amdgpu_init_debug_options(void)
> +{
> + if (amdgpu_debug_mask & DEBUG_VERBOSE_EVICTIONS) {
> + pr_info("debug: eviction debug messages enabled\n");
> + debug_evictions = true;
> + }
> +
> + if (amdgpu_debug_mask & DEBUG_VM) {
> + pr_info("debug: VM handling debug enabled\n");
> + amdgpu_vm_debug = true;
> + }
> +
> + if (amdgpu_debug_mask & DEBUG_LARGEBAR) {
> + pr_info("debug: enabled simulating large-bar capability on non-large bar system\n");
> + debug_largebar = true;
How should that work???
> + }
> +}
> +
> static int __init amdgpu_init(void)
> {
> int r;
> @@ -2893,6 +2915,8 @@ static int __init amdgpu_init(void)
> /* Ignore KFD init failures. Normal when CONFIG_HSA_AMD is not set. */
> amdgpu_amdkfd_init();
>
> + amdgpu_init_debug_options();
> +
> /* let modprobe override vga console setting */
> return pci_register_driver(&amdgpu_kms_pci_driver);
>
> diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h
> index 67d7b7ee8a2a..6fa644c249a5 100644
> --- a/drivers/gpu/drm/amd/include/amd_shared.h
> +++ b/drivers/gpu/drm/amd/include/amd_shared.h
> @@ -257,6 +257,15 @@ enum DC_DEBUG_MASK {
>
> enum amd_dpm_forced_level;
>
> +/*
> + * amdgpu.debug module options. Are all disabled by default
> + */
> +enum AMDGPU_DEBUG_MASK {
> + DEBUG_VERBOSE_EVICTIONS = (1 << 0), // 0x1
> + DEBUG_VM = (1 << 1), // 0x2
> + DEBUG_LARGEBAR = (1 << 2), // 0x4
Good start, but please give the symbol names an AMDGPU_ prefix. Stuff
like DEBUG_VM is just way to general and could clash.
Apart from that comments on the same line and using // style comments
are frowned upon. You should probably rather use the BIT() macro here.
Regards,
Christian.
> +
> /**
> * struct amd_ip_funcs - general hooks for managing amdgpu IP Blocks
> * @name: Name of IP block
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 1/2] drm/amdgpu: Merge debug module parameters
2023-08-25 6:56 ` Christian König
@ 2023-08-25 12:24 ` André Almeida
2023-08-25 12:29 ` Christian König
0 siblings, 1 reply; 9+ messages in thread
From: André Almeida @ 2023-08-25 12:24 UTC (permalink / raw)
To: Christian König
Cc: kernel-dev, alexander.deucher, amd-gfx, dri-devel,
pierre-eric.pelloux-prayer, linux-kernel,
'Marek Olšák'
Em 25/08/2023 03:56, Christian König escreveu:
> Am 24.08.23 um 18:25 schrieb André Almeida:
>> Merge all developer debug options available as separated module
>> parameters in one, making it obvious that are for developers.
>>
>> Signed-off-by: André Almeida <andrealmeid@igalia.com>
>> ---
>> drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 24 ++++++++++++++++++++++++
>> drivers/gpu/drm/amd/include/amd_shared.h | 9 +++++++++
>> 2 files changed, 33 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> index f5856b82605e..d53e4097acc0 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> @@ -194,6 +194,7 @@ int amdgpu_use_xgmi_p2p = 1;
>> int amdgpu_vcnfw_log;
>> int amdgpu_sg_display = -1; /* auto */
>> int amdgpu_user_partt_mode = AMDGPU_AUTO_COMPUTE_PARTITION_MODE;
>> +uint amdgpu_debug_mask;
>> static void amdgpu_drv_delayed_reset_work_handler(struct work_struct
>> *work);
>> @@ -938,6 +939,9 @@ module_param_named(user_partt_mode,
>> amdgpu_user_partt_mode, uint, 0444);
>> module_param(enforce_isolation, bool, 0444);
>> MODULE_PARM_DESC(enforce_isolation, "enforce process isolation
>> between graphics and compute . enforce_isolation = on");
>> +MODULE_PARM_DESC(debug_mask, "debug options for amdgpu, disabled by
>> default");
>> +module_param_named(debug_mask, amdgpu_debug_mask, uint, 0444);
>> +
>> /* These devices are not supported by amdgpu.
>> * They are supported by the mach64, r128, radeon drivers
>> */
>> @@ -2871,6 +2875,24 @@ static struct pci_driver
amdgpu_kms_pci_driver = {
>> .dev_groups = amdgpu_sysfs_groups,
>> };
>> +static void amdgpu_init_debug_options(void)
>> +{
>> + if (amdgpu_debug_mask & DEBUG_VERBOSE_EVICTIONS) {
>> + pr_info("debug: eviction debug messages enabled\n");
>> + debug_evictions = true;
>> + }
>> +
>> + if (amdgpu_debug_mask & DEBUG_VM) {
>> + pr_info("debug: VM handling debug enabled\n");
>> + amdgpu_vm_debug = true;
>> + }
>> +
>> + if (amdgpu_debug_mask & DEBUG_LARGEBAR) {
>> + pr_info("debug: enabled simulating large-bar capability on
>> non-large bar system\n");
>> + debug_largebar = true;
>
> How should that work???
Ops, I thought it was a boolean. It should be
+ debug_largebar = 1;
>
>> + }
>> +}
>> +
>> static int __init amdgpu_init(void)
>> {
>> int r;
>> @@ -2893,6 +2915,8 @@ static int __init amdgpu_init(void)
>> /* Ignore KFD init failures. Normal when CONFIG_HSA_AMD is not
>> set. */
>> amdgpu_amdkfd_init();
>> + amdgpu_init_debug_options();
>> +
>> /* let modprobe override vga console setting */
>> return pci_register_driver(&amdgpu_kms_pci_driver);
>> diff --git a/drivers/gpu/drm/amd/include/amd_shared.h
>> b/drivers/gpu/drm/amd/include/amd_shared.h
>> index 67d7b7ee8a2a..6fa644c249a5 100644
>> --- a/drivers/gpu/drm/amd/include/amd_shared.h
>> +++ b/drivers/gpu/drm/amd/include/amd_shared.h
>> @@ -257,6 +257,15 @@ enum DC_DEBUG_MASK {
>> enum amd_dpm_forced_level;
>> +/*
>> + * amdgpu.debug module options. Are all disabled by default
>> + */
>> +enum AMDGPU_DEBUG_MASK {
>> + DEBUG_VERBOSE_EVICTIONS = (1 << 0), // 0x1
>> + DEBUG_VM = (1 << 1), // 0x2
>> + DEBUG_LARGEBAR = (1 << 2), // 0x4
>
> Good start, but please give the symbol names an AMDGPU_ prefix. Stuff
> like DEBUG_VM is just way to general and could clash.
>
> Apart from that comments on the same line and using // style comments
> are frowned upon. You should probably rather use the BIT() macro here.
>
Ack, I'll change that for next version
> Regards,
> Christian.
>
>> +
>> /**
>> * struct amd_ip_funcs - general hooks for managing amdgpu IP Blocks
>> * @name: Name of IP block
>
>
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 1/2] drm/amdgpu: Merge debug module parameters
2023-08-25 12:24 ` André Almeida
@ 2023-08-25 12:29 ` Christian König
2023-08-25 12:34 ` André Almeida
0 siblings, 1 reply; 9+ messages in thread
From: Christian König @ 2023-08-25 12:29 UTC (permalink / raw)
To: André Almeida, Christian König
Cc: pierre-eric.pelloux-prayer, 'Marek Olšák',
linux-kernel, dri-devel, amd-gfx, kernel-dev, alexander.deucher
Am 25.08.23 um 14:24 schrieb André Almeida:
> Em 25/08/2023 03:56, Christian König escreveu:
> > Am 24.08.23 um 18:25 schrieb André Almeida:
> >> Merge all developer debug options available as separated module
> >> parameters in one, making it obvious that are for developers.
> >>
> >> Signed-off-by: André Almeida <andrealmeid@igalia.com>
> >> ---
> >> drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 24
> ++++++++++++++++++++++++
> >> drivers/gpu/drm/amd/include/amd_shared.h | 9 +++++++++
> >> 2 files changed, 33 insertions(+)
> >>
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> >> index f5856b82605e..d53e4097acc0 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> >> @@ -194,6 +194,7 @@ int amdgpu_use_xgmi_p2p = 1;
> >> int amdgpu_vcnfw_log;
> >> int amdgpu_sg_display = -1; /* auto */
> >> int amdgpu_user_partt_mode = AMDGPU_AUTO_COMPUTE_PARTITION_MODE;
> >> +uint amdgpu_debug_mask;
> >> static void amdgpu_drv_delayed_reset_work_handler(struct work_struct
> >> *work);
> >> @@ -938,6 +939,9 @@ module_param_named(user_partt_mode,
> >> amdgpu_user_partt_mode, uint, 0444);
> >> module_param(enforce_isolation, bool, 0444);
> >> MODULE_PARM_DESC(enforce_isolation, "enforce process isolation
> >> between graphics and compute . enforce_isolation = on");
> >> +MODULE_PARM_DESC(debug_mask, "debug options for amdgpu, disabled by
> >> default");
> >> +module_param_named(debug_mask, amdgpu_debug_mask, uint, 0444);
> >> +
> >> /* These devices are not supported by amdgpu.
> >> * They are supported by the mach64, r128, radeon drivers
> >> */
> >> @@ -2871,6 +2875,24 @@ static struct pci_driver
> amdgpu_kms_pci_driver = {
> >> .dev_groups = amdgpu_sysfs_groups,
> >> };
> >> +static void amdgpu_init_debug_options(void)
> >> +{
> >> + if (amdgpu_debug_mask & DEBUG_VERBOSE_EVICTIONS) {
> >> + pr_info("debug: eviction debug messages enabled\n");
> >> + debug_evictions = true;
> >> + }
> >> +
> >> + if (amdgpu_debug_mask & DEBUG_VM) {
> >> + pr_info("debug: VM handling debug enabled\n");
> >> + amdgpu_vm_debug = true;
> >> + }
> >> +
> >> + if (amdgpu_debug_mask & DEBUG_LARGEBAR) {
> >> + pr_info("debug: enabled simulating large-bar capability on
> >> non-large bar system\n");
> >> + debug_largebar = true;
> >
> > How should that work???
>
> Ops, I thought it was a boolean. It should be
>
> + debug_largebar = 1;
That's not the problem, the question is since when do we have a
debug_largebar option and what should that one do?
Regards,
Christian.
>
> >
> >> + }
> >> +}
> >> +
> >> static int __init amdgpu_init(void)
> >> {
> >> int r;
> >> @@ -2893,6 +2915,8 @@ static int __init amdgpu_init(void)
> >> /* Ignore KFD init failures. Normal when CONFIG_HSA_AMD is not
> >> set. */
> >> amdgpu_amdkfd_init();
> >> + amdgpu_init_debug_options();
> >> +
> >> /* let modprobe override vga console setting */
> >> return pci_register_driver(&amdgpu_kms_pci_driver);
> >> diff --git a/drivers/gpu/drm/amd/include/amd_shared.h
> >> b/drivers/gpu/drm/amd/include/amd_shared.h
> >> index 67d7b7ee8a2a..6fa644c249a5 100644
> >> --- a/drivers/gpu/drm/amd/include/amd_shared.h
> >> +++ b/drivers/gpu/drm/amd/include/amd_shared.h
> >> @@ -257,6 +257,15 @@ enum DC_DEBUG_MASK {
> >> enum amd_dpm_forced_level;
> >> +/*
> >> + * amdgpu.debug module options. Are all disabled by default
> >> + */
> >> +enum AMDGPU_DEBUG_MASK {
> >> + DEBUG_VERBOSE_EVICTIONS = (1 << 0), // 0x1
> >> + DEBUG_VM = (1 << 1), // 0x2
> >> + DEBUG_LARGEBAR = (1 << 2), // 0x4
> >
> > Good start, but please give the symbol names an AMDGPU_ prefix. Stuff
> > like DEBUG_VM is just way to general and could clash.
> >
> > Apart from that comments on the same line and using // style comments
> > are frowned upon. You should probably rather use the BIT() macro here.
> >
>
> Ack, I'll change that for next version
>
> > Regards,
> > Christian.
> >
> >> +
> >> /**
> >> * struct amd_ip_funcs - general hooks for managing amdgpu IP Blocks
> >> * @name: Name of IP block
> >
> >
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 1/2] drm/amdgpu: Merge debug module parameters
2023-08-25 12:29 ` Christian König
@ 2023-08-25 12:34 ` André Almeida
2023-08-25 12:38 ` Christian König
0 siblings, 1 reply; 9+ messages in thread
From: André Almeida @ 2023-08-25 12:34 UTC (permalink / raw)
To: Christian König, Christian König
Cc: pierre-eric.pelloux-prayer, 'Marek Olšák',
linux-kernel, dri-devel, amd-gfx, kernel-dev, alexander.deucher
Em 25/08/2023 09:29, Christian König escreveu:
> Am 25.08.23 um 14:24 schrieb André Almeida:
>> Em 25/08/2023 03:56, Christian König escreveu:
>> > Am 24.08.23 um 18:25 schrieb André Almeida:
>> >> Merge all developer debug options available as separated module
>> >> parameters in one, making it obvious that are for developers.
>> >>
>> >> Signed-off-by: André Almeida <andrealmeid@igalia.com>
>> >> ---
>> >> drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 24
>> ++++++++++++++++++++++++
>> >> drivers/gpu/drm/amd/include/amd_shared.h | 9 +++++++++
>> >> 2 files changed, 33 insertions(+)
>> >>
>> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> >> index f5856b82605e..d53e4097acc0 100644
>> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> >> @@ -194,6 +194,7 @@ int amdgpu_use_xgmi_p2p = 1;
>> >> int amdgpu_vcnfw_log;
>> >> int amdgpu_sg_display = -1; /* auto */
>> >> int amdgpu_user_partt_mode = AMDGPU_AUTO_COMPUTE_PARTITION_MODE;
>> >> +uint amdgpu_debug_mask;
>> >> static void amdgpu_drv_delayed_reset_work_handler(struct work_struct
>> >> *work);
>> >> @@ -938,6 +939,9 @@ module_param_named(user_partt_mode,
>> >> amdgpu_user_partt_mode, uint, 0444);
>> >> module_param(enforce_isolation, bool, 0444);
>> >> MODULE_PARM_DESC(enforce_isolation, "enforce process isolation
>> >> between graphics and compute . enforce_isolation = on");
>> >> +MODULE_PARM_DESC(debug_mask, "debug options for amdgpu, disabled by
>> >> default");
>> >> +module_param_named(debug_mask, amdgpu_debug_mask, uint, 0444);
>> >> +
>> >> /* These devices are not supported by amdgpu.
>> >> * They are supported by the mach64, r128, radeon drivers
>> >> */
>> >> @@ -2871,6 +2875,24 @@ static struct pci_driver
>> amdgpu_kms_pci_driver = {
>> >> .dev_groups = amdgpu_sysfs_groups,
>> >> };
>> >> +static void amdgpu_init_debug_options(void)
>> >> +{
>> >> + if (amdgpu_debug_mask & DEBUG_VERBOSE_EVICTIONS) {
>> >> + pr_info("debug: eviction debug messages enabled\n");
>> >> + debug_evictions = true;
>> >> + }
>> >> +
>> >> + if (amdgpu_debug_mask & DEBUG_VM) {
>> >> + pr_info("debug: VM handling debug enabled\n");
>> >> + amdgpu_vm_debug = true;
>> >> + }
>> >> +
>> >> + if (amdgpu_debug_mask & DEBUG_LARGEBAR) {
>> >> + pr_info("debug: enabled simulating large-bar capability on
>> >> non-large bar system\n");
>> >> + debug_largebar = true;
>> >
>> > How should that work???
>>
>> Ops, I thought it was a boolean. It should be
>>
>> + debug_largebar = 1;
>
>
> That's not the problem, the question is since when do we have a
> debug_largebar option and what should that one do?
>
It should work exactly like the other one, but instead of using
amdgpu.large_bar=1, one would use amdgpu.debug_mask=0x4 to activate it,
as the plan is to merge all current debug options in a single one right?
> Regards,
> Christian.
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 1/2] drm/amdgpu: Merge debug module parameters
2023-08-25 12:34 ` André Almeida
@ 2023-08-25 12:38 ` Christian König
0 siblings, 0 replies; 9+ messages in thread
From: Christian König @ 2023-08-25 12:38 UTC (permalink / raw)
To: André Almeida, Christian König
Cc: pierre-eric.pelloux-prayer, 'Marek Olšák',
linux-kernel, dri-devel, amd-gfx, kernel-dev, alexander.deucher
Am 25.08.23 um 14:34 schrieb André Almeida:
> Em 25/08/2023 09:29, Christian König escreveu:
>> Am 25.08.23 um 14:24 schrieb André Almeida:
>>> Em 25/08/2023 03:56, Christian König escreveu:
>>> > Am 24.08.23 um 18:25 schrieb André Almeida:
>>> >> Merge all developer debug options available as separated module
>>> >> parameters in one, making it obvious that are for developers.
>>> >>
>>> >> Signed-off-by: André Almeida <andrealmeid@igalia.com>
>>> >> ---
>>> >> drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 24
>>> ++++++++++++++++++++++++
>>> >> drivers/gpu/drm/amd/include/amd_shared.h | 9 +++++++++
>>> >> 2 files changed, 33 insertions(+)
>>> >>
>>> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>> >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>> >> index f5856b82605e..d53e4097acc0 100644
>>> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>> >> @@ -194,6 +194,7 @@ int amdgpu_use_xgmi_p2p = 1;
>>> >> int amdgpu_vcnfw_log;
>>> >> int amdgpu_sg_display = -1; /* auto */
>>> >> int amdgpu_user_partt_mode = AMDGPU_AUTO_COMPUTE_PARTITION_MODE;
>>> >> +uint amdgpu_debug_mask;
>>> >> static void amdgpu_drv_delayed_reset_work_handler(struct
>>> work_struct
>>> >> *work);
>>> >> @@ -938,6 +939,9 @@ module_param_named(user_partt_mode,
>>> >> amdgpu_user_partt_mode, uint, 0444);
>>> >> module_param(enforce_isolation, bool, 0444);
>>> >> MODULE_PARM_DESC(enforce_isolation, "enforce process isolation
>>> >> between graphics and compute . enforce_isolation = on");
>>> >> +MODULE_PARM_DESC(debug_mask, "debug options for amdgpu, disabled by
>>> >> default");
>>> >> +module_param_named(debug_mask, amdgpu_debug_mask, uint, 0444);
>>> >> +
>>> >> /* These devices are not supported by amdgpu.
>>> >> * They are supported by the mach64, r128, radeon drivers
>>> >> */
>>> >> @@ -2871,6 +2875,24 @@ static struct pci_driver
>>> amdgpu_kms_pci_driver = {
>>> >> .dev_groups = amdgpu_sysfs_groups,
>>> >> };
>>> >> +static void amdgpu_init_debug_options(void)
>>> >> +{
>>> >> + if (amdgpu_debug_mask & DEBUG_VERBOSE_EVICTIONS) {
>>> >> + pr_info("debug: eviction debug messages enabled\n");
>>> >> + debug_evictions = true;
>>> >> + }
>>> >> +
>>> >> + if (amdgpu_debug_mask & DEBUG_VM) {
>>> >> + pr_info("debug: VM handling debug enabled\n");
>>> >> + amdgpu_vm_debug = true;
>>> >> + }
>>> >> +
>>> >> + if (amdgpu_debug_mask & DEBUG_LARGEBAR) {
>>> >> + pr_info("debug: enabled simulating large-bar capability on
>>> >> non-large bar system\n");
>>> >> + debug_largebar = true;
>>> >
>>> > How should that work???
>>>
>>> Ops, I thought it was a boolean. It should be
>>>
>>> + debug_largebar = 1;
>>
>>
>> That's not the problem, the question is since when do we have a
>> debug_largebar option and what should that one do?
>>
>
> It should work exactly like the other one, but instead of using
> amdgpu.large_bar=1, one would use amdgpu.debug_mask=0x4 to activate
> it, as the plan is to merge all current debug options in a single one
> right?
Ah! That's the KFD debug_largebar option! Now I got what this is.
Probably best to move the booleans into amdgpu_device and deprecate the
old options.
Not really a good approach to overwrite the globals here.
Regards,
Christian.
>
>> Regards,
>> Christian.
>
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 1/2] drm/amdgpu: Merge debug module parameters
2023-08-24 16:25 ` [PATCH 1/2] drm/amdgpu: Merge " André Almeida
2023-08-25 6:56 ` Christian König
@ 2023-09-03 1:56 ` kernel test robot
1 sibling, 0 replies; 9+ messages in thread
From: kernel test robot @ 2023-09-03 1:56 UTC (permalink / raw)
To: André Almeida, dri-devel, amd-gfx, linux-kernel
Cc: oe-kbuild-all, pierre-eric.pelloux-prayer, André Almeida,
'Marek Olšák', kernel-dev, alexander.deucher,
christian.koenig
Hi André,
kernel test robot noticed the following build errors:
[auto build test ERROR on drm-misc/drm-misc-next]
[also build test ERROR on drm/drm-next drm-exynos/exynos-drm-next drm-intel/for-linux-next drm-intel/for-linux-next-fixes drm-tip/drm-tip linus/master v6.5 next-20230831]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Andr-Almeida/drm-amdgpu-Merge-debug-module-parameters/20230825-002641
base: git://anongit.freedesktop.org/drm/drm-misc drm-misc-next
patch link: https://lore.kernel.org/r/20230824162505.173399-2-andrealmeid%40igalia.com
patch subject: [PATCH 1/2] drm/amdgpu: Merge debug module parameters
config: i386-allyesconfig (https://download.01.org/0day-ci/archive/20230903/202309030946.q2LgJYO7-lkp@intel.com/config)
compiler: gcc-12 (Debian 12.2.0-14) 12.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20230903/202309030946.q2LgJYO7-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202309030946.q2LgJYO7-lkp@intel.com/
All errors (new ones prefixed by >>):
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c: In function 'amdgpu_init_debug_options':
>> drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c:2923:33: error: assignment of read-only variable 'debug_evictions'
2923 | debug_evictions = true;
| ^
>> drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c:2933:17: error: 'debug_largebar' undeclared (first use in this function)
2933 | debug_largebar = true;
| ^~~~~~~~~~~~~~
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c:2933:17: note: each undeclared identifier is reported only once for each function it appears in
vim +/debug_evictions +2923 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
2918
2919 static void amdgpu_init_debug_options(void)
2920 {
2921 if (amdgpu_debug_mask & DEBUG_VERBOSE_EVICTIONS) {
2922 pr_info("debug: eviction debug messages enabled\n");
> 2923 debug_evictions = true;
2924 }
2925
2926 if (amdgpu_debug_mask & DEBUG_VM) {
2927 pr_info("debug: VM handling debug enabled\n");
2928 amdgpu_vm_debug = true;
2929 }
2930
2931 if (amdgpu_debug_mask & DEBUG_LARGEBAR) {
2932 pr_info("debug: enabled simulating large-bar capability on non-large bar system\n");
> 2933 debug_largebar = true;
2934 }
2935 }
2936
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2023-09-03 1:57 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-08-24 16:25 [PATCH 0/2] drm/amdgpu: Merge all debug module parameters André Almeida
2023-08-24 16:25 ` [PATCH 1/2] drm/amdgpu: Merge " André Almeida
2023-08-25 6:56 ` Christian König
2023-08-25 12:24 ` André Almeida
2023-08-25 12:29 ` Christian König
2023-08-25 12:34 ` André Almeida
2023-08-25 12:38 ` Christian König
2023-09-03 1:56 ` kernel test robot
2023-08-24 16:25 ` [PATCH 2/2] drm/amdgpu: Create an option to disable soft recovery André Almeida
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox