All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Das, Nirmoy" <nirmoy.das@amd.com>
To: "Christian König" <christian.koenig@amd.com>,
	amd-gfx@lists.freedesktop.org
Cc: Felix.Kuehling@amd.com
Subject: Re: [PATCH 1/1] drm/amdgpu: add helper function for vm pasid
Date: Mon, 21 Jun 2021 13:07:49 +0200	[thread overview]
Message-ID: <efcc8b9a-b515-fe75-c62e-6d697bc13014@amd.com> (raw)
In-Reply-To: <26ce2800-e2f0-2dd6-bf7d-b837a55a3c42@amd.com>


On 6/21/2021 12:59 PM, Christian König wrote:
> Am 21.06.21 um 12:56 schrieb Das, Nirmoy:
>>
>> On 6/21/2021 12:26 PM, Christian König wrote:
>>> Well you completely break the handling in amdgpu_vm_handle_fault() 
>>> with this.
>>
>>
>> I see one issue with:  if (!vm || (vm && vm->root.bo != root)) . I 
>> will split it and resend.
>>
>> Am I missing something else ?
>
> The problem is you drop and re-take the lock now at the wrong time.


The original code is also dropping and retaking the lock.  I don't 
understand the difference.


Regards,

Nirmoy

>
> I don't think what you try to do here is possible at all.
>
> Christian.
>
>>
>>
>> Regards,
>>
>> Nirmoy
>>
>>
>>
>>>
>>> Christian.
>>>
>>> Am 21.06.21 um 11:47 schrieb Das, Nirmoy:
>>>> ping.
>>>>
>>>> On 6/17/2021 3:03 PM, Nirmoy Das wrote:
>>>>> Cleanup code related to vm pasid by adding helper function.
>>>>> This reduces lots code duplication.
>>>>>
>>>>> Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
>>>>> ---
>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c |  17 +--
>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 176 
>>>>> ++++++++++++------------
>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |   2 +-
>>>>>   3 files changed, 96 insertions(+), 99 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>>>>> index cbb932f97355..27851fb0e25b 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>>>>> @@ -1149,7 +1149,7 @@ int amdgpu_driver_open_kms(struct drm_device 
>>>>> *dev, struct drm_file *file_priv)
>>>>>   {
>>>>>       struct amdgpu_device *adev = drm_to_adev(dev);
>>>>>       struct amdgpu_fpriv *fpriv;
>>>>> -    int r, pasid;
>>>>> +    int r;
>>>>>         /* Ensure IB tests are run on ring */
>>>>>       flush_delayed_work(&adev->delayed_init_work);
>>>>> @@ -1172,15 +1172,9 @@ int amdgpu_driver_open_kms(struct 
>>>>> drm_device *dev, struct drm_file *file_priv)
>>>>>           goto out_suspend;
>>>>>       }
>>>>>   -    pasid = amdgpu_pasid_alloc(16);
>>>>> -    if (pasid < 0) {
>>>>> -        dev_warn(adev->dev, "No more PASIDs available!");
>>>>> -        pasid = 0;
>>>>> -    }
>>>>> -
>>>>> -    r = amdgpu_vm_init(adev, &fpriv->vm, pasid);
>>>>> +    r = amdgpu_vm_init(adev, &fpriv->vm);
>>>>>       if (r)
>>>>> -        goto error_pasid;
>>>>> +        goto free_fpriv;
>>>>>         fpriv->prt_va = amdgpu_vm_bo_add(adev, &fpriv->vm, NULL);
>>>>>       if (!fpriv->prt_va) {
>>>>> @@ -1208,10 +1202,7 @@ int amdgpu_driver_open_kms(struct 
>>>>> drm_device *dev, struct drm_file *file_priv)
>>>>>   error_vm:
>>>>>       amdgpu_vm_fini(adev, &fpriv->vm);
>>>>>   -error_pasid:
>>>>> -    if (pasid)
>>>>> -        amdgpu_pasid_free(pasid);
>>>>> -
>>>>> +free_fpriv:
>>>>>       kfree(fpriv);
>>>>>     out_suspend:
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>> index 63975bda8e76..562c2c48a3a3 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>> @@ -87,6 +87,69 @@ struct amdgpu_prt_cb {
>>>>>       struct dma_fence_cb cb;
>>>>>   };
>>>>>   +static int amdgpu_vm_pasid_alloc(struct amdgpu_device *adev,
>>>>> +                 struct amdgpu_vm *vm, unsigned int pasid)
>>>>> +{
>>>>> +    unsigned long flags;
>>>>> +    int r;
>>>>> +
>>>>> +    if (!pasid)
>>>>> +        return 0;
>>>>> +
>>>>> + spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
>>>>> +    r = idr_alloc(&adev->vm_manager.pasid_idr, vm, pasid, pasid + 1,
>>>>> +              GFP_ATOMIC);
>>>>> + spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
>>>>> +    if (r < 0)
>>>>> +        return r;
>>>>> +
>>>>> +    vm->pasid = pasid;
>>>>> +    return 0;
>>>>> +}
>>>>> +static void amdgpu_vm_pasid_remove_id(struct amdgpu_device *adev,
>>>>> +                      unsigned int pasid)
>>>>> +{
>>>>> +    unsigned long flags;
>>>>> +
>>>>> +    if (!pasid)
>>>>> +        return;
>>>>> +
>>>>> + spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
>>>>> +    idr_remove(&adev->vm_manager.pasid_idr, pasid);
>>>>> + spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
>>>>> +
>>>>> +}
>>>>> +
>>>>> +static void amdgpu_vm_pasid_remove(struct amdgpu_device *adev,
>>>>> +                   struct amdgpu_vm *vm)
>>>>> +{
>>>>> +    amdgpu_vm_pasid_remove_id(adev, vm->pasid);
>>>>> +    vm->pasid = 0;
>>>>> +}
>>>>> +
>>>>> +static void amdgpu_vm_pasid_free(struct amdgpu_device *adev,
>>>>> +                 struct amdgpu_vm *vm)
>>>>> +{
>>>>> +    if (!vm->pasid)
>>>>> +        return;
>>>>> +
>>>>> +    amdgpu_pasid_free(vm->pasid);
>>>>> +    amdgpu_vm_pasid_remove(adev, vm);
>>>>> +}
>>>>> +
>>>>> +static struct amdgpu_vm *amdgpu_vm_pasid_find(struct 
>>>>> amdgpu_device *adev,
>>>>> +                          unsigned int pasid)
>>>>> +{
>>>>> +    struct amdgpu_vm *vm;
>>>>> +    unsigned long flags;
>>>>> +
>>>>> + spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
>>>>> +    vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
>>>>> + spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
>>>>> +
>>>>> +    return vm;
>>>>> +}
>>>>> +
>>>>>   /*
>>>>>    * vm eviction_lock can be taken in MMU notifiers. Make sure no 
>>>>> reclaim-FS
>>>>>    * happens while holding this lock anywhere to prevent deadlocks 
>>>>> when
>>>>> @@ -2859,17 +2922,17 @@ long amdgpu_vm_wait_idle(struct amdgpu_vm 
>>>>> *vm, long timeout)
>>>>>    *
>>>>>    * @adev: amdgpu_device pointer
>>>>>    * @vm: requested vm
>>>>> - * @pasid: Process address space identifier
>>>>>    *
>>>>>    * Init @vm fields.
>>>>>    *
>>>>>    * Returns:
>>>>>    * 0 for success, error for failure.
>>>>>    */
>>>>> -int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm 
>>>>> *vm, u32 pasid)
>>>>> +int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
>>>>>   {
>>>>>       struct amdgpu_bo *root_bo;
>>>>>       struct amdgpu_bo_vm *root;
>>>>> +    unsigned int pasid;
>>>>>       int r, i;
>>>>>         vm->va = RB_ROOT_CACHED;
>>>>> @@ -2940,19 +3003,15 @@ int amdgpu_vm_init(struct amdgpu_device 
>>>>> *adev, struct amdgpu_vm *vm, u32 pasid)
>>>>>         amdgpu_bo_unreserve(vm->root.bo);
>>>>>   -    if (pasid) {
>>>>> -        unsigned long flags;
>>>>> -
>>>>> - spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
>>>>> -        r = idr_alloc(&adev->vm_manager.pasid_idr, vm, pasid, 
>>>>> pasid + 1,
>>>>> -                  GFP_ATOMIC);
>>>>> - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
>>>>> -        if (r < 0)
>>>>> -            goto error_free_root;
>>>>> -
>>>>> -        vm->pasid = pasid;
>>>>> +    pasid = amdgpu_pasid_alloc(16);
>>>>> +    if (pasid < 0) {
>>>>> +        dev_warn(adev->dev, "No more PASIDs available!");
>>>>> +        pasid = 0;
>>>>>       }
>>>>>   +    if (amdgpu_vm_pasid_alloc(adev, vm, pasid))
>>>>> +        goto error_free_pasid;
>>>>> +
>>>>>       INIT_KFIFO(vm->faults);
>>>>>         return 0;
>>>>> @@ -2960,6 +3019,10 @@ int amdgpu_vm_init(struct amdgpu_device 
>>>>> *adev, struct amdgpu_vm *vm, u32 pasid)
>>>>>   error_unreserve:
>>>>>       amdgpu_bo_unreserve(vm->root.bo);
>>>>>   +error_free_pasid:
>>>>> +    if (pasid)
>>>>> +        amdgpu_pasid_free(pasid);
>>>>> +
>>>>>   error_free_root:
>>>>>       amdgpu_bo_unref(&root->shadow);
>>>>>       amdgpu_bo_unref(&root_bo);
>>>>> @@ -3039,18 +3102,11 @@ int amdgpu_vm_make_compute(struct 
>>>>> amdgpu_device *adev, struct amdgpu_vm *vm,
>>>>>       if (r)
>>>>>           goto unreserve_bo;
>>>>>   -    if (pasid) {
>>>>> -        unsigned long flags;
>>>>> -
>>>>> - spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
>>>>> -        r = idr_alloc(&adev->vm_manager.pasid_idr, vm, pasid, 
>>>>> pasid + 1,
>>>>> -                  GFP_ATOMIC);
>>>>> - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
>>>>> +    r = amdgpu_vm_pasid_alloc(adev, vm, pasid);
>>>>> +    if (r ==  -ENOSPC)
>>>>> +        goto unreserve_bo;
>>>>>   -        if (r == -ENOSPC)
>>>>> -            goto unreserve_bo;
>>>>> -        r = 0;
>>>>> -    }
>>>>> +    r = 0;
>>>>>         /* Check if PD needs to be reinitialized and do it before
>>>>>        * changing any other state, in case it fails.
>>>>> @@ -3088,19 +3144,7 @@ int amdgpu_vm_make_compute(struct 
>>>>> amdgpu_device *adev, struct amdgpu_vm *vm,
>>>>>       vm->last_update = NULL;
>>>>>       vm->is_compute_context = true;
>>>>>   -    if (vm->pasid) {
>>>>> -        unsigned long flags;
>>>>> -
>>>>> - spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
>>>>> -        idr_remove(&adev->vm_manager.pasid_idr, vm->pasid);
>>>>> - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
>>>>> -
>>>>> -        /* Free the original amdgpu allocated pasid
>>>>> -         * Will be replaced with kfd allocated pasid
>>>>> -         */
>>>>> -        amdgpu_pasid_free(vm->pasid);
>>>>> -        vm->pasid = 0;
>>>>> -    }
>>>>> +    amdgpu_vm_pasid_free(adev, vm);
>>>>>         /* Free the shadow bo for compute VM */
>>>>> amdgpu_bo_unref(&to_amdgpu_bo_vm(vm->root.bo)->shadow);
>>>>> @@ -3111,13 +3155,8 @@ int amdgpu_vm_make_compute(struct 
>>>>> amdgpu_device *adev, struct amdgpu_vm *vm,
>>>>>       goto unreserve_bo;
>>>>>     free_idr:
>>>>> -    if (pasid) {
>>>>> -        unsigned long flags;
>>>>> +    amdgpu_vm_pasid_remove_id(adev, pasid);
>>>>>   - spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
>>>>> -        idr_remove(&adev->vm_manager.pasid_idr, pasid);
>>>>> - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
>>>>> -    }
>>>>>   unreserve_bo:
>>>>>       amdgpu_bo_unreserve(vm->root.bo);
>>>>>       return r;
>>>>> @@ -3133,14 +3172,7 @@ int amdgpu_vm_make_compute(struct 
>>>>> amdgpu_device *adev, struct amdgpu_vm *vm,
>>>>>    */
>>>>>   void amdgpu_vm_release_compute(struct amdgpu_device *adev, 
>>>>> struct amdgpu_vm *vm)
>>>>>   {
>>>>> -    if (vm->pasid) {
>>>>> -        unsigned long flags;
>>>>> -
>>>>> - spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
>>>>> -        idr_remove(&adev->vm_manager.pasid_idr, vm->pasid);
>>>>> - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
>>>>> -    }
>>>>> -    vm->pasid = 0;
>>>>> +    amdgpu_vm_pasid_remove(adev, vm);
>>>>>       vm->is_compute_context = false;
>>>>>   }
>>>>>   @@ -3164,15 +3196,7 @@ void amdgpu_vm_fini(struct amdgpu_device 
>>>>> *adev, struct amdgpu_vm *vm)
>>>>>         root = amdgpu_bo_ref(vm->root.bo);
>>>>>       amdgpu_bo_reserve(root, true);
>>>>> -    if (vm->pasid) {
>>>>> -        unsigned long flags;
>>>>> -
>>>>> - spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
>>>>> -        idr_remove(&adev->vm_manager.pasid_idr, vm->pasid);
>>>>> - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
>>>>> -        vm->pasid = 0;
>>>>> -    }
>>>>> -
>>>>> +    amdgpu_vm_pasid_remove(adev, vm);
>>>>>       dma_fence_wait(vm->last_unlocked, false);
>>>>>       dma_fence_put(vm->last_unlocked);
>>>>>   @@ -3334,16 +3358,10 @@ int amdgpu_vm_ioctl(struct drm_device 
>>>>> *dev, void *data, struct drm_file *filp)
>>>>>   void amdgpu_vm_get_task_info(struct amdgpu_device *adev, u32 pasid,
>>>>>                struct amdgpu_task_info *task_info)
>>>>>   {
>>>>> -    struct amdgpu_vm *vm;
>>>>> -    unsigned long flags;
>>>>> +    struct amdgpu_vm *vm = amdgpu_vm_pasid_find(adev, pasid);
>>>>>   - spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
>>>>> -
>>>>> -    vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
>>>>>       if (vm)
>>>>>           *task_info = vm->task_info;
>>>>> -
>>>>> - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
>>>>>   }
>>>>>     /**
>>>>> @@ -3380,24 +3398,16 @@ bool amdgpu_vm_handle_fault(struct 
>>>>> amdgpu_device *adev, u32 pasid,
>>>>>   {
>>>>>       bool is_compute_context = false;
>>>>>       struct amdgpu_bo *root;
>>>>> -    unsigned long irqflags;
>>>>>       uint64_t value, flags;
>>>>>       struct amdgpu_vm *vm;
>>>>>       int r;
>>>>>   - spin_lock_irqsave(&adev->vm_manager.pasid_lock, irqflags);
>>>>> -    vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
>>>>> -    if (vm) {
>>>>> -        root = amdgpu_bo_ref(vm->root.bo);
>>>>> -        is_compute_context = vm->is_compute_context;
>>>>> -    } else {
>>>>> -        root = NULL;
>>>>> -    }
>>>>> - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, irqflags);
>>>>> -
>>>>> -    if (!root)
>>>>> +    vm = amdgpu_vm_pasid_find(adev, pasid);
>>>>> +    if (!vm)
>>>>>           return false;
>>>>>   +    root = amdgpu_bo_ref(vm->root.bo);
>>>>> +        is_compute_context = vm->is_compute_context;
>>>>>       addr /= AMDGPU_GPU_PAGE_SIZE;
>>>>>         if (is_compute_context &&
>>>>> @@ -3411,12 +3421,8 @@ bool amdgpu_vm_handle_fault(struct 
>>>>> amdgpu_device *adev, u32 pasid,
>>>>>           goto error_unref;
>>>>>         /* Double check that the VM still exists */
>>>>> - spin_lock_irqsave(&adev->vm_manager.pasid_lock, irqflags);
>>>>> -    vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
>>>>> -    if (vm && vm->root.bo != root)
>>>>> -        vm = NULL;
>>>>> - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, irqflags);
>>>>> -    if (!vm)
>>>>> +    vm = amdgpu_vm_pasid_find(adev, pasid);
>>>>> +    if (!vm || (vm && vm->root.bo != root))
>>>>>           goto error_unlock;
>>>>>         flags = AMDGPU_PTE_VALID | AMDGPU_PTE_SNOOPED |
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>> index ddb85a85cbba..ee994e21dffa 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>> @@ -376,7 +376,7 @@ void amdgpu_vm_manager_init(struct 
>>>>> amdgpu_device *adev);
>>>>>   void amdgpu_vm_manager_fini(struct amdgpu_device *adev);
>>>>>     long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout);
>>>>> -int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm 
>>>>> *vm, u32 pasid);
>>>>> +int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm 
>>>>> *vm);
>>>>>   int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct 
>>>>> amdgpu_vm *vm, u32 pasid);
>>>>>   void amdgpu_vm_release_compute(struct amdgpu_device *adev, 
>>>>> struct amdgpu_vm *vm);
>>>>>   void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm 
>>>>> *vm);
>>>
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

  reply	other threads:[~2021-06-21 11:07 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-06-17 13:03 [PATCH 1/1] drm/amdgpu: add helper function for vm pasid Nirmoy Das
2021-06-21  9:47 ` Das, Nirmoy
2021-06-21 10:26   ` Christian König
2021-06-21 10:56     ` Das, Nirmoy
2021-06-21 10:59       ` Christian König
2021-06-21 11:07         ` Das, Nirmoy [this message]
2021-06-21 11:13           ` Christian König
2021-06-21 11:10         ` Das, Nirmoy
2021-06-21 11:18           ` Christian König
2021-06-21 11:27             ` Das, Nirmoy
2021-06-21 11:35               ` Christian König
2021-06-21 11:41                 ` Das, Nirmoy
  -- strict thread matches above, loose matches on Subject: below --
2021-06-22  6:57 Nirmoy Das
2021-06-22  7:03 ` Christian König
2021-06-22  7:39   ` Das, Nirmoy
2021-06-22  8:36     ` Christian König
2021-06-22 10:30       ` Das, Nirmoy
2021-06-22 10:36         ` Christian König
2021-06-22 10:51           ` Das, Nirmoy

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=efcc8b9a-b515-fe75-c62e-6d697bc13014@amd.com \
    --to=nirmoy.das@amd.com \
    --cc=Felix.Kuehling@amd.com \
    --cc=amd-gfx@lists.freedesktop.org \
    --cc=christian.koenig@amd.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.