All of lore.kernel.org
 help / color / mirror / Atom feed
From: zhoucm1 <david1.zhou-5C7GfCeVMHo@public.gmane.org>
To: "Christian König"
	<deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>,
	"Roger.He" <Hongbo.He-5C7GfCeVMHo@public.gmane.org>,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
Subject: Re: [PATCH 3/3] drm/amdgpu: fix gpu reset issue
Date: Fri, 21 Apr 2017 16:54:01 +0800	[thread overview]
Message-ID: <58F9C8A9.5010805@amd.com> (raw)
In-Reply-To: <ef2c40cd-bb55-5921-9318-e0147f1b4719-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>

Hi Roger,

Could you try to move 'id->current_gpu_reset_count = 
atomic_read(&adev->gpu_reset_counter);' in amdgpu_vm_grab_id to 
no_flush_needed label.

Regards,
David Zhou

On 2017年04月21日 16:27, Christian König wrote:
> NAK, that is exactly what we wanted to avoid.
>
> We used to have an exclusive lock for this and it cause a whole bunch 
> of problems.
>
> Please elaborate why that should be necessary.
>
> Regards,
> Christian.
>
> Am 21.04.2017 um 09:08 schrieb Roger.He:
>> Change-Id: Ib77d33a09f348ebf2e3a9d7861411f4b951ebf7c
>> Signed-off-by: Roger.He <Hongbo.He@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  6 ++++++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 16 +++++++++++++++-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c    |  4 ++++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ioc32.c  |  7 ++++++-
>>   4 files changed, 31 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index 71364f5..ab0ffa8 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> @@ -1526,6 +1526,7 @@ struct amdgpu_device {
>>       atomic64_t            num_bytes_moved;
>>       atomic64_t            num_evictions;
>>       atomic_t            gpu_reset_counter;
>> +    atomic_t            gpu_reset_state;
>>         /* data for buffer migration throttling */
>>       struct {
>> @@ -1851,6 +1852,11 @@ amdgpu_get_sdma_instance(struct amdgpu_ring 
>> *ring)
>>   #define amdgpu_psp_check_fw_loading_status(adev, i) 
>> (adev)->firmware.funcs->check_fw_loading_status((adev), (i))
>>     /* Common functions */
>> +static inline int amdgpu_device_is_reset(struct amdgpu_device *adev)
>> +{
>> +    return atomic_read(&adev->gpu_reset_state);
>> +}
>> +
>>   int amdgpu_gpu_reset(struct amdgpu_device *adev);
>>   bool amdgpu_need_backup(struct amdgpu_device *adev);
>>   void amdgpu_pci_config_reset(struct amdgpu_device *adev);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> index f882496..0fb4716 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> @@ -1894,6 +1894,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>>       mutex_init(&adev->grbm_idx_mutex);
>>       mutex_init(&adev->mn_lock);
>>       hash_init(adev->mn_hash);
>> +    atomic_set(&adev->gpu_reset_state, 0);
>>         amdgpu_check_arguments(adev);
>>   @@ -2655,6 +2656,18 @@ int amdgpu_sriov_gpu_reset(struct 
>> amdgpu_device *adev, bool voluntary)
>>   }
>>     /**
>> + * amdgpu_device_set_reset_state - set gpu reset state
>> + *
>> + * @adev: amdgpu device pointer
>> + * @state: true when start to reset gpu; false: reset done
>> + */
>> +static inline void amdgpu_device_set_reset_state(struct 
>> amdgpu_device *adev,
>> +                            bool state)
>> +{
>> +    atomic_set(&adev->gpu_reset_state, state);
>> +}
>> +
>> +/**
>>    * amdgpu_gpu_reset - reset the asic
>>    *
>>    * @adev: amdgpu device pointer
>> @@ -2678,7 +2691,7 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev)
>>       }
>>         atomic_inc(&adev->gpu_reset_counter);
>> -
>> +    amdgpu_device_set_reset_state(adev, true);
>>       /* block TTM */
>>       resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
>>       /* store modesetting */
>> @@ -2811,6 +2824,7 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev)
>>           dev_info(adev->dev, "GPU reset failed\n");
>>       }
>>   +    amdgpu_device_set_reset_state(adev, false);
>>       return r;
>>   }
>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> index ead00d7..8cc14af 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> @@ -679,11 +679,15 @@ long amdgpu_drm_ioctl(struct file *filp,
>>       struct drm_file *file_priv = filp->private_data;
>>       struct drm_device *dev;
>>       long ret;
>> +
>>       dev = file_priv->minor->dev;
>>       ret = pm_runtime_get_sync(dev->dev);
>>       if (ret < 0)
>>           return ret;
>>   +    while (amdgpu_device_is_reset(dev->dev_private))
>> +        msleep(100);
>> +
>>       ret = drm_ioctl(filp, cmd, arg);
>>         pm_runtime_mark_last_busy(dev->dev);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ioc32.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ioc32.c
>> index 2648291..22b8059 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ioc32.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ioc32.c
>> @@ -36,10 +36,15 @@
>>   long amdgpu_kms_compat_ioctl(struct file *filp, unsigned int cmd, 
>> unsigned long arg)
>>   {
>>       unsigned int nr = DRM_IOCTL_NR(cmd);
>> +    struct drm_file *file_priv = filp->private_data;
>> +    struct amdgpu_device *adev = file_priv->minor->dev->dev_private;
>>       int ret;
>>   -    if (nr < DRM_COMMAND_BASE)
>> +    if (nr < DRM_COMMAND_BASE) {
>> +        while (amdgpu_device_is_reset(adev))
>> +            msleep(100);
>>           return drm_compat_ioctl(filp, cmd, arg);
>> +    }
>>         ret = amdgpu_drm_ioctl(filp, cmd, arg);
>
>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

  parent reply	other threads:[~2017-04-21  8:54 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-04-21  7:08 [PATCH 3/3] drm/amdgpu: fix gpu reset issue Roger.He
     [not found] ` <1492758490-12631-1-git-send-email-Hongbo.He-5C7GfCeVMHo@public.gmane.org>
2017-04-21  7:18   ` zhoucm1
     [not found]     ` <58F9B22E.7050502-5C7GfCeVMHo@public.gmane.org>
2017-05-04 10:42       ` Liu, Monk
     [not found]         ` <DM5PR12MB161079A7788AC1D9E4CD5D6284EA0-2J9CzHegvk++jCVTvoAFKAdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
2017-05-04 11:29           ` Christian König
     [not found]             ` <c88049b2-2440-142e-129a-542c136b8d61-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-05-04 11:34               ` Liu, Monk
2017-05-04 14:46           ` Deucher, Alexander
2017-04-21  8:27   ` Christian König
     [not found]     ` <ef2c40cd-bb55-5921-9318-e0147f1b4719-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-04-21  8:54       ` zhoucm1 [this message]
2017-04-21  9:00       ` He, Hongbo
     [not found]         ` <MWHPR1201MB01272142B19FFEB9EAD281F3FD1A0-3iK1xFAIwjq9imrIu4W8xGrFom/aUZj6nBOFsp37pqbUKgpGm//BTAC/G2K4zDHf@public.gmane.org>
2017-04-21  9:15           ` Christian König
     [not found]             ` <05b9d783-75ab-9924-6881-472540fc77d1-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-04-21  9:54               ` He, Hongbo
     [not found]                 ` <MWHPR1201MB0127A593761702A7A179855EFD1A0-3iK1xFAIwjq9imrIu4W8xGrFom/aUZj6nBOFsp37pqbUKgpGm//BTAC/G2K4zDHf@public.gmane.org>
2017-04-21 11:47                   ` Christian König
     [not found]                     ` <eeb3931b-ef67-8645-a3bc-bc7f3db4002e-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-04-21 12:14                       ` He, Hongbo
2017-05-04 10:31       ` zhoucm1
     [not found]         ` <590B0307.2090001-5C7GfCeVMHo@public.gmane.org>
2017-05-04 11:20           ` Christian König
     [not found]             ` <f2c6a892-5929-8644-ca34-09f89e6359a4-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-05-04 11:28               ` Liu, Monk
     [not found]                 ` <DM5PR12MB1610FEFA4B2654FF67B2F1B684EA0-2J9CzHegvk++jCVTvoAFKAdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
2017-05-04 11:32                   ` Christian König
     [not found]                     ` <6321c998-f337-39af-3d64-070341e5fbe4-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-05-04 11:41                       ` Liu, Monk

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=58F9C8A9.5010805@amd.com \
    --to=david1.zhou-5c7gfcevmho@public.gmane.org \
    --cc=Hongbo.He-5C7GfCeVMHo@public.gmane.org \
    --cc=amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org \
    --cc=deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.