All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/amdgpu: revise retry init to fully cleanup driver
@ 2017-11-08  3:29 Pixel Ding
       [not found] ` <1510111766-13170-1-git-send-email-Pixel.Ding-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 5+ messages in thread
From: Pixel Ding @ 2017-11-08  3:29 UTC (permalink / raw)
  To: ckoenig.leichtzumerken-Re5JQEeQqe8AvxtiuMwx3w,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Pixel Ding

Retry at drm_dev_register instead of amdgpu_device_init.

Signed-off-by: Pixel Ding <Pixel.Ding@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  2 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c    | 11 +++++++++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c    | 15 ++-------------
 3 files changed, 13 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index bf2b008..4ef2b1b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2390,6 +2390,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 		    amdgpu_virt_mmio_blocked(adev) &&
 		    !amdgpu_virt_wait_reset(adev)) {
 			dev_err(adev->dev, "VF exclusive mode timeout\n");
+			adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
+			adev->virt.ops = NULL;
 			r = -EAGAIN;
 			goto failed;
 		}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 6b11a75..eaccd4b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -565,12 +565,13 @@ static int amdgpu_kick_out_firmware_fb(struct pci_dev *pdev)
 	return 0;
 }
 
+
 static int amdgpu_pci_probe(struct pci_dev *pdev,
 			    const struct pci_device_id *ent)
 {
 	struct drm_device *dev;
 	unsigned long flags = ent->driver_data;
-	int ret;
+	int ret, retry = 0;
 
 	if ((flags & AMD_EXP_HW_SUPPORT) && !amdgpu_exp_hw_support) {
 		DRM_INFO("This hardware requires experimental hardware support.\n"
@@ -603,8 +604,14 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
 
 	pci_set_drvdata(pdev, dev);
 
+retry_init:
 	ret = drm_dev_register(dev, ent->driver_data);
-	if (ret)
+	if (ret == -EAGAIN && ++retry <= 3) {
+		DRM_INFO("retry init %d\n", retry);
+		/* Don't request EX mode too frequently which is attacking */
+		msleep(5000);
+		goto retry_init;
+	} else if (ret)
 		goto err_pci;
 
 	return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 1d56b5b..65360cd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -84,7 +84,7 @@ void amdgpu_driver_unload_kms(struct drm_device *dev)
 int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags)
 {
 	struct amdgpu_device *adev;
-	int r, acpi_status, retry = 0;
+	int r, acpi_status;
 
 #ifdef CONFIG_DRM_AMDGPU_SI
 	if (!amdgpu_si_support) {
@@ -120,7 +120,6 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags)
 		}
 	}
 #endif
-retry_init:
 
 	adev = kzalloc(sizeof(struct amdgpu_device), GFP_KERNEL);
 	if (adev == NULL) {
@@ -143,17 +142,7 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags)
 	 * VRAM allocation
 	 */
 	r = amdgpu_device_init(adev, dev, dev->pdev, flags);
-	if (r == -EAGAIN && ++retry <= 3) {
-		adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
-		adev->virt.ops = NULL;
-		amdgpu_device_fini(adev);
-		kfree(adev);
-		dev->dev_private = NULL;
-		/* Don't request EX mode too frequently which is attacking */
-		msleep(5000);
-		dev_err(&dev->pdev->dev, "retry init %d\n", retry);
-		goto retry_init;
-	} else if (r) {
+	if (r) {
 		dev_err(&dev->pdev->dev, "Fatal error during GPU init\n");
 		goto out;
 	}
-- 
2.9.5

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH] drm/amdgpu: revise retry init to fully cleanup driver
       [not found] ` <1510111766-13170-1-git-send-email-Pixel.Ding-5C7GfCeVMHo@public.gmane.org>
@ 2017-11-08  3:31   ` Ding, Pixel
  2017-11-08  9:42   ` Christian König
  1 sibling, 0 replies; 5+ messages in thread
From: Ding, Pixel @ 2017-11-08  3:31 UTC (permalink / raw)
  To: ckoenig.leichtzumerken-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org,
	Sun, Gary
  Cc: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org

Hi Christian,

Please help reviewing. 

Hi Gary, 

with this change debugfs will be cleaned up by DRM, we don’t need to handle it anymore.
— 
Sincerely Yours,
Pixel








On 08/11/2017, 11:29 AM, "amd-gfx on behalf of Pixel Ding" <amd-gfx-bounces@lists.freedesktop.org on behalf of Pixel.Ding@amd.com> wrote:

>Retry at drm_dev_register instead of amdgpu_device_init.
>
>Signed-off-by: Pixel Ding <Pixel.Ding@amd.com>
>---
> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  2 ++
> drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c    | 11 +++++++++--
> drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c    | 15 ++-------------
> 3 files changed, 13 insertions(+), 15 deletions(-)
>
>diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>index bf2b008..4ef2b1b 100644
>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>@@ -2390,6 +2390,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
> 		    amdgpu_virt_mmio_blocked(adev) &&
> 		    !amdgpu_virt_wait_reset(adev)) {
> 			dev_err(adev->dev, "VF exclusive mode timeout\n");
>+			adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
>+			adev->virt.ops = NULL;
> 			r = -EAGAIN;
> 			goto failed;
> 		}
>diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>index 6b11a75..eaccd4b 100644
>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>@@ -565,12 +565,13 @@ static int amdgpu_kick_out_firmware_fb(struct pci_dev *pdev)
> 	return 0;
> }
> 
>+
> static int amdgpu_pci_probe(struct pci_dev *pdev,
> 			    const struct pci_device_id *ent)
> {
> 	struct drm_device *dev;
> 	unsigned long flags = ent->driver_data;
>-	int ret;
>+	int ret, retry = 0;
> 
> 	if ((flags & AMD_EXP_HW_SUPPORT) && !amdgpu_exp_hw_support) {
> 		DRM_INFO("This hardware requires experimental hardware support.\n"
>@@ -603,8 +604,14 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
> 
> 	pci_set_drvdata(pdev, dev);
> 
>+retry_init:
> 	ret = drm_dev_register(dev, ent->driver_data);
>-	if (ret)
>+	if (ret == -EAGAIN && ++retry <= 3) {
>+		DRM_INFO("retry init %d\n", retry);
>+		/* Don't request EX mode too frequently which is attacking */
>+		msleep(5000);
>+		goto retry_init;
>+	} else if (ret)
> 		goto err_pci;
> 
> 	return 0;
>diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>index 1d56b5b..65360cd 100644
>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>@@ -84,7 +84,7 @@ void amdgpu_driver_unload_kms(struct drm_device *dev)
> int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags)
> {
> 	struct amdgpu_device *adev;
>-	int r, acpi_status, retry = 0;
>+	int r, acpi_status;
> 
> #ifdef CONFIG_DRM_AMDGPU_SI
> 	if (!amdgpu_si_support) {
>@@ -120,7 +120,6 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags)
> 		}
> 	}
> #endif
>-retry_init:
> 
> 	adev = kzalloc(sizeof(struct amdgpu_device), GFP_KERNEL);
> 	if (adev == NULL) {
>@@ -143,17 +142,7 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags)
> 	 * VRAM allocation
> 	 */
> 	r = amdgpu_device_init(adev, dev, dev->pdev, flags);
>-	if (r == -EAGAIN && ++retry <= 3) {
>-		adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
>-		adev->virt.ops = NULL;
>-		amdgpu_device_fini(adev);
>-		kfree(adev);
>-		dev->dev_private = NULL;
>-		/* Don't request EX mode too frequently which is attacking */
>-		msleep(5000);
>-		dev_err(&dev->pdev->dev, "retry init %d\n", retry);
>-		goto retry_init;
>-	} else if (r) {
>+	if (r) {
> 		dev_err(&dev->pdev->dev, "Fatal error during GPU init\n");
> 		goto out;
> 	}
>-- 
>2.9.5
>
>_______________________________________________
>amd-gfx mailing list
>amd-gfx@lists.freedesktop.org
>https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] drm/amdgpu: revise retry init to fully cleanup driver
       [not found] ` <1510111766-13170-1-git-send-email-Pixel.Ding-5C7GfCeVMHo@public.gmane.org>
  2017-11-08  3:31   ` Ding, Pixel
@ 2017-11-08  9:42   ` Christian König
       [not found]     ` <551ca484-9d4d-d0e3-cc70-e02e5405d690-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  1 sibling, 1 reply; 5+ messages in thread
From: Christian König @ 2017-11-08  9:42 UTC (permalink / raw)
  To: Pixel Ding, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 08.11.2017 um 04:29 schrieb Pixel Ding:
> Retry at drm_dev_register instead of amdgpu_device_init.
>
> Signed-off-by: Pixel Ding <Pixel.Ding@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  2 ++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c    | 11 +++++++++--
>   drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c    | 15 ++-------------
>   3 files changed, 13 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index bf2b008..4ef2b1b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -2390,6 +2390,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>   		    amdgpu_virt_mmio_blocked(adev) &&
>   		    !amdgpu_virt_wait_reset(adev)) {
>   			dev_err(adev->dev, "VF exclusive mode timeout\n");
> +			adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
> +			adev->virt.ops = NULL;

Why is that necessary? Maybe put this into some SRIOV specific fini 
function?

Apart from that patch looks good to me and is Acked-by: Christian König 
<christian.koenig@amd.com>.

Regards,
Christian.

>   			r = -EAGAIN;
>   			goto failed;
>   		}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index 6b11a75..eaccd4b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -565,12 +565,13 @@ static int amdgpu_kick_out_firmware_fb(struct pci_dev *pdev)
>   	return 0;
>   }
>   
> +
>   static int amdgpu_pci_probe(struct pci_dev *pdev,
>   			    const struct pci_device_id *ent)
>   {
>   	struct drm_device *dev;
>   	unsigned long flags = ent->driver_data;
> -	int ret;
> +	int ret, retry = 0;
>   
>   	if ((flags & AMD_EXP_HW_SUPPORT) && !amdgpu_exp_hw_support) {
>   		DRM_INFO("This hardware requires experimental hardware support.\n"
> @@ -603,8 +604,14 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
>   
>   	pci_set_drvdata(pdev, dev);
>   
> +retry_init:
>   	ret = drm_dev_register(dev, ent->driver_data);
> -	if (ret)
> +	if (ret == -EAGAIN && ++retry <= 3) {
> +		DRM_INFO("retry init %d\n", retry);
> +		/* Don't request EX mode too frequently which is attacking */
> +		msleep(5000);
> +		goto retry_init;
> +	} else if (ret)
>   		goto err_pci;
>   
>   	return 0;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> index 1d56b5b..65360cd 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> @@ -84,7 +84,7 @@ void amdgpu_driver_unload_kms(struct drm_device *dev)
>   int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags)
>   {
>   	struct amdgpu_device *adev;
> -	int r, acpi_status, retry = 0;
> +	int r, acpi_status;
>   
>   #ifdef CONFIG_DRM_AMDGPU_SI
>   	if (!amdgpu_si_support) {
> @@ -120,7 +120,6 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags)
>   		}
>   	}
>   #endif
> -retry_init:
>   
>   	adev = kzalloc(sizeof(struct amdgpu_device), GFP_KERNEL);
>   	if (adev == NULL) {
> @@ -143,17 +142,7 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags)
>   	 * VRAM allocation
>   	 */
>   	r = amdgpu_device_init(adev, dev, dev->pdev, flags);
> -	if (r == -EAGAIN && ++retry <= 3) {
> -		adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
> -		adev->virt.ops = NULL;
> -		amdgpu_device_fini(adev);
> -		kfree(adev);
> -		dev->dev_private = NULL;
> -		/* Don't request EX mode too frequently which is attacking */
> -		msleep(5000);
> -		dev_err(&dev->pdev->dev, "retry init %d\n", retry);
> -		goto retry_init;
> -	} else if (r) {
> +	if (r) {
>   		dev_err(&dev->pdev->dev, "Fatal error during GPU init\n");
>   		goto out;
>   	}


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] drm/amdgpu: revise retry init to fully cleanup driver
       [not found]     ` <551ca484-9d4d-d0e3-cc70-e02e5405d690-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2017-11-08  9:46       ` Ding, Pixel
       [not found]         ` <46FEAA65-4560-4371-ADCD-4E49B164221A-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 5+ messages in thread
From: Ding, Pixel @ 2017-11-08  9:46 UTC (permalink / raw)
  To: Koenig, Christian,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org

When exclusive mode timeout happens, the VF is not active anymore. Exclusive requests will be ignored by host. Unload kms or device fini also request exclusive mode and it will get timeout again since no response received.

This only happens for exclusive mode timeout, so I didn’t put them in general SRIOV fini function.
— 
Sincerely Yours,
Pixel








On 08/11/2017, 5:42 PM, "Christian König" <ckoenig.leichtzumerken@gmail.com> wrote:

>Am 08.11.2017 um 04:29 schrieb Pixel Ding:
>> Retry at drm_dev_register instead of amdgpu_device_init.
>>
>> Signed-off-by: Pixel Ding <Pixel.Ding@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  2 ++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c    | 11 +++++++++--
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c    | 15 ++-------------
>>   3 files changed, 13 insertions(+), 15 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> index bf2b008..4ef2b1b 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> @@ -2390,6 +2390,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>>   		    amdgpu_virt_mmio_blocked(adev) &&
>>   		    !amdgpu_virt_wait_reset(adev)) {
>>   			dev_err(adev->dev, "VF exclusive mode timeout\n");
>> +			adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
>> +			adev->virt.ops = NULL;
>
>Why is that necessary? Maybe put this into some SRIOV specific fini 
>function?
>
>Apart from that patch looks good to me and is Acked-by: Christian König 
><christian.koenig@amd.com>.
>
>Regards,
>Christian.
>
>>   			r = -EAGAIN;
>>   			goto failed;
>>   		}
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> index 6b11a75..eaccd4b 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> @@ -565,12 +565,13 @@ static int amdgpu_kick_out_firmware_fb(struct pci_dev *pdev)
>>   	return 0;
>>   }
>>   
>> +
>>   static int amdgpu_pci_probe(struct pci_dev *pdev,
>>   			    const struct pci_device_id *ent)
>>   {
>>   	struct drm_device *dev;
>>   	unsigned long flags = ent->driver_data;
>> -	int ret;
>> +	int ret, retry = 0;
>>   
>>   	if ((flags & AMD_EXP_HW_SUPPORT) && !amdgpu_exp_hw_support) {
>>   		DRM_INFO("This hardware requires experimental hardware support.\n"
>> @@ -603,8 +604,14 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
>>   
>>   	pci_set_drvdata(pdev, dev);
>>   
>> +retry_init:
>>   	ret = drm_dev_register(dev, ent->driver_data);
>> -	if (ret)
>> +	if (ret == -EAGAIN && ++retry <= 3) {
>> +		DRM_INFO("retry init %d\n", retry);
>> +		/* Don't request EX mode too frequently which is attacking */
>> +		msleep(5000);
>> +		goto retry_init;
>> +	} else if (ret)
>>   		goto err_pci;
>>   
>>   	return 0;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> index 1d56b5b..65360cd 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> @@ -84,7 +84,7 @@ void amdgpu_driver_unload_kms(struct drm_device *dev)
>>   int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags)
>>   {
>>   	struct amdgpu_device *adev;
>> -	int r, acpi_status, retry = 0;
>> +	int r, acpi_status;
>>   
>>   #ifdef CONFIG_DRM_AMDGPU_SI
>>   	if (!amdgpu_si_support) {
>> @@ -120,7 +120,6 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags)
>>   		}
>>   	}
>>   #endif
>> -retry_init:
>>   
>>   	adev = kzalloc(sizeof(struct amdgpu_device), GFP_KERNEL);
>>   	if (adev == NULL) {
>> @@ -143,17 +142,7 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags)
>>   	 * VRAM allocation
>>   	 */
>>   	r = amdgpu_device_init(adev, dev, dev->pdev, flags);
>> -	if (r == -EAGAIN && ++retry <= 3) {
>> -		adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
>> -		adev->virt.ops = NULL;
>> -		amdgpu_device_fini(adev);
>> -		kfree(adev);
>> -		dev->dev_private = NULL;
>> -		/* Don't request EX mode too frequently which is attacking */
>> -		msleep(5000);
>> -		dev_err(&dev->pdev->dev, "retry init %d\n", retry);
>> -		goto retry_init;
>> -	} else if (r) {
>> +	if (r) {
>>   		dev_err(&dev->pdev->dev, "Fatal error during GPU init\n");
>>   		goto out;
>>   	}
>
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] drm/amdgpu: revise retry init to fully cleanup driver
       [not found]         ` <46FEAA65-4560-4371-ADCD-4E49B164221A-5C7GfCeVMHo@public.gmane.org>
@ 2017-11-08  9:47           ` Christian König
  0 siblings, 0 replies; 5+ messages in thread
From: Christian König @ 2017-11-08  9:47 UTC (permalink / raw)
  To: Ding, Pixel,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org

Please put that as a comment above those two lines of code.

Apart from that the patch looks good to me.

Regards,
Christian.

Am 08.11.2017 um 10:46 schrieb Ding, Pixel:
> When exclusive mode timeout happens, the VF is not active anymore. Exclusive requests will be ignored by host. Unload kms or device fini also request exclusive mode and it will get timeout again since no response received.
>
> This only happens for exclusive mode timeout, so I didn’t put them in general SRIOV fini function.
> —
> Sincerely Yours,
> Pixel
>
>
>
>
>
>
>
>
> On 08/11/2017, 5:42 PM, "Christian König" <ckoenig.leichtzumerken@gmail.com> wrote:
>
>> Am 08.11.2017 um 04:29 schrieb Pixel Ding:
>>> Retry at drm_dev_register instead of amdgpu_device_init.
>>>
>>> Signed-off-by: Pixel Ding <Pixel.Ding@amd.com>
>>> ---
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  2 ++
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c    | 11 +++++++++--
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c    | 15 ++-------------
>>>    3 files changed, 13 insertions(+), 15 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> index bf2b008..4ef2b1b 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> @@ -2390,6 +2390,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>>>    		    amdgpu_virt_mmio_blocked(adev) &&
>>>    		    !amdgpu_virt_wait_reset(adev)) {
>>>    			dev_err(adev->dev, "VF exclusive mode timeout\n");
>>> +			adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
>>> +			adev->virt.ops = NULL;
>> Why is that necessary? Maybe put this into some SRIOV specific fini
>> function?
>>
>> Apart from that patch looks good to me and is Acked-by: Christian König
>> <christian.koenig@amd.com>.
>>
>> Regards,
>> Christian.
>>
>>>    			r = -EAGAIN;
>>>    			goto failed;
>>>    		}
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>> index 6b11a75..eaccd4b 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>> @@ -565,12 +565,13 @@ static int amdgpu_kick_out_firmware_fb(struct pci_dev *pdev)
>>>    	return 0;
>>>    }
>>>    
>>> +
>>>    static int amdgpu_pci_probe(struct pci_dev *pdev,
>>>    			    const struct pci_device_id *ent)
>>>    {
>>>    	struct drm_device *dev;
>>>    	unsigned long flags = ent->driver_data;
>>> -	int ret;
>>> +	int ret, retry = 0;
>>>    
>>>    	if ((flags & AMD_EXP_HW_SUPPORT) && !amdgpu_exp_hw_support) {
>>>    		DRM_INFO("This hardware requires experimental hardware support.\n"
>>> @@ -603,8 +604,14 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
>>>    
>>>    	pci_set_drvdata(pdev, dev);
>>>    
>>> +retry_init:
>>>    	ret = drm_dev_register(dev, ent->driver_data);
>>> -	if (ret)
>>> +	if (ret == -EAGAIN && ++retry <= 3) {
>>> +		DRM_INFO("retry init %d\n", retry);
>>> +		/* Don't request EX mode too frequently which is attacking */
>>> +		msleep(5000);
>>> +		goto retry_init;
>>> +	} else if (ret)
>>>    		goto err_pci;
>>>    
>>>    	return 0;
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>>> index 1d56b5b..65360cd 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>>> @@ -84,7 +84,7 @@ void amdgpu_driver_unload_kms(struct drm_device *dev)
>>>    int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags)
>>>    {
>>>    	struct amdgpu_device *adev;
>>> -	int r, acpi_status, retry = 0;
>>> +	int r, acpi_status;
>>>    
>>>    #ifdef CONFIG_DRM_AMDGPU_SI
>>>    	if (!amdgpu_si_support) {
>>> @@ -120,7 +120,6 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags)
>>>    		}
>>>    	}
>>>    #endif
>>> -retry_init:
>>>    
>>>    	adev = kzalloc(sizeof(struct amdgpu_device), GFP_KERNEL);
>>>    	if (adev == NULL) {
>>> @@ -143,17 +142,7 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags)
>>>    	 * VRAM allocation
>>>    	 */
>>>    	r = amdgpu_device_init(adev, dev, dev->pdev, flags);
>>> -	if (r == -EAGAIN && ++retry <= 3) {
>>> -		adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
>>> -		adev->virt.ops = NULL;
>>> -		amdgpu_device_fini(adev);
>>> -		kfree(adev);
>>> -		dev->dev_private = NULL;
>>> -		/* Don't request EX mode too frequently which is attacking */
>>> -		msleep(5000);
>>> -		dev_err(&dev->pdev->dev, "retry init %d\n", retry);
>>> -		goto retry_init;
>>> -	} else if (r) {
>>> +	if (r) {
>>>    		dev_err(&dev->pdev->dev, "Fatal error during GPU init\n");
>>>    		goto out;
>>>    	}
>>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2017-11-08  9:47 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2017-11-08  3:29 [PATCH] drm/amdgpu: revise retry init to fully cleanup driver Pixel Ding
     [not found] ` <1510111766-13170-1-git-send-email-Pixel.Ding-5C7GfCeVMHo@public.gmane.org>
2017-11-08  3:31   ` Ding, Pixel
2017-11-08  9:42   ` Christian König
     [not found]     ` <551ca484-9d4d-d0e3-cc70-e02e5405d690-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2017-11-08  9:46       ` Ding, Pixel
     [not found]         ` <46FEAA65-4560-4371-ADCD-4E49B164221A-5C7GfCeVMHo@public.gmane.org>
2017-11-08  9:47           ` Christian König

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.