Re: [PATCH 5/8] drm/amdgpu: Create context for usermode queue

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Shashank Sharma <shashank.sharma@amd.com>
To: "Christian König" <christian.koenig@amd.com>,
	amd-gfx@lists.freedesktop.org
Cc: alexander.deucher@amd.com
Subject: Re: [PATCH 5/8] drm/amdgpu: Create context for usermode queue
Date: Tue, 7 Feb 2023 09:13:34 +0100	[thread overview]
Message-ID: <3e6516de-eebd-aa51-d2b3-087356da65d6@amd.com> (raw)
In-Reply-To: <8186d839-3966-257b-d06d-160a2ab06c05@amd.com>


On 07/02/2023 08:55, Christian König wrote:
> Am 07.02.23 um 08:51 schrieb Shashank Sharma:
>>
>> On 07/02/2023 08:14, Christian König wrote:
>>> Am 03.02.23 um 22:54 schrieb Shashank Sharma:
>>>> The FW expects us to allocate atleast one page as context space to
>>>> process gang, process, shadow, GDS and FW_space related work. This
>>>> patch creates some object for the same, and adds an IP specific
>>>> functions to do this.
>>>>
>>>> Cc: Alex Deucher <alexander.deucher@amd.com>
>>>> Cc: Christian Koenig <christian.koenig@amd.com>
>>>> Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
>>>> ---
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c |  32 +++++
>>>>   .../amd/amdgpu/amdgpu_userqueue_mqd_gfx_v11.c | 121 
>>>> ++++++++++++++++++
>>>>   .../gpu/drm/amd/include/amdgpu_userqueue.h    |  18 +++
>>>>   3 files changed, 171 insertions(+)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>>>> index 9f3490a91776..18281b3a51f1 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>>>> @@ -42,6 +42,28 @@ static struct amdgpu_usermode_queue
>>>>       return idr_find(&uq_mgr->userq_idr, qid);
>>>>   }
>>>>   +static void
>>>> +amdgpu_userqueue_destroy_ctx_space(struct amdgpu_userq_mgr *uq_mgr,
>>>> +                                   struct amdgpu_usermode_queue 
>>>> *queue)
>>>> +{
>>>> +    uq_mgr->userq_mqd_funcs->ctx_destroy(uq_mgr, queue);
>>>> +}
>>>> +
>>>> +static int
>>>> +amdgpu_userqueue_create_ctx_space(struct amdgpu_userq_mgr *uq_mgr,
>>>> +                                  struct amdgpu_usermode_queue 
>>>> *queue)
>>>> +{
>>>> +    int r;
>>>> +
>>>> +    r = uq_mgr->userq_mqd_funcs->ctx_create(uq_mgr, queue);
>>>> +    if (r) {
>>>> +        DRM_ERROR("Failed to create context space for queue\n");
>>>> +        return r;
>>>> +    }
>>>> +
>>>> +    return 0;
>>>> +}
>>>> +
>>>>   static int
>>>>   amdgpu_userqueue_create_mqd(struct amdgpu_userq_mgr *uq_mgr, 
>>>> struct amdgpu_usermode_queue *queue)
>>>>   {
>>>> @@ -142,12 +164,21 @@ static int amdgpu_userqueue_create(struct 
>>>> drm_file *filp, union drm_amdgpu_userq
>>>>           goto free_qid;
>>>>       }
>>>>   +    r = amdgpu_userqueue_create_ctx_space(uq_mgr, queue);
>>>> +    if (r) {
>>>> +        DRM_ERROR("Failed to create context space\n");
>>>> +        goto free_mqd;
>>>> +    }
>>>> +
>>>>       list_add_tail(&queue->userq_node, &uq_mgr->userq_list);
>>>>       args->out.q_id = queue->queue_id;
>>>>       args->out.flags = 0;
>>>>       mutex_unlock(&uq_mgr->userq_mutex);
>>>>       return 0;
>>>>   +free_mqd:
>>>> +    amdgpu_userqueue_destroy_mqd(uq_mgr, queue);
>>>> +
>>>>   free_qid:
>>>>       amdgpu_userqueue_free_index(uq_mgr, queue->queue_id);
>>>>   @@ -170,6 +201,7 @@ static void amdgpu_userqueue_destroy(struct 
>>>> drm_file *filp, int queue_id)
>>>>       }
>>>>         mutex_lock(&uq_mgr->userq_mutex);
>>>> +    amdgpu_userqueue_destroy_ctx_space(uq_mgr, queue);
>>>>       amdgpu_userqueue_destroy_mqd(uq_mgr, queue);
>>>>       amdgpu_userqueue_free_index(uq_mgr, queue->queue_id);
>>>>       list_del(&queue->userq_node);
>>>> diff --git 
>>>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue_mqd_gfx_v11.c 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue_mqd_gfx_v11.c
>>>> index 57889729d635..687f90a587e3 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue_mqd_gfx_v11.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue_mqd_gfx_v11.c
>>>> @@ -120,6 +120,125 @@ amdgpu_userq_gfx_v11_mqd_destroy(struct 
>>>> amdgpu_userq_mgr *uq_mgr, struct amdgpu_
>>>>     }
>>>>   +static int amdgpu_userq_gfx_v11_ctx_create(struct 
>>>> amdgpu_userq_mgr *uq_mgr,
>>>> +                                           struct 
>>>> amdgpu_usermode_queue *queue)
>>>> +{
>>>> +    int r;
>>>> +    struct amdgpu_device *adev = uq_mgr->adev;
>>>> +    struct amdgpu_userq_ctx *pctx = &queue->proc_ctx;
>>>> +    struct amdgpu_userq_ctx *gctx = &queue->gang_ctx;
>>>> +    struct amdgpu_userq_ctx *gdsctx = &queue->gds_ctx;
>>>> +    struct amdgpu_userq_ctx *fwctx = &queue->fw_ctx;
>>>> +    struct amdgpu_userq_ctx *sctx = &queue->shadow_ctx;
>>>> +
>>>> +    /*
>>>> +     * The FW expects atleast one page space allocated for
>>>> +     * process context related work, and one for gang context.
>>>> +     */
>>>> +    r = amdgpu_bo_create_kernel(adev, AMDGPU_USERQ_PROC_CTX_SZ, 
>>>> PAGE_SIZE,
>>>> +                                AMDGPU_GEM_DOMAIN_VRAM,
>>>> +                                &pctx->obj,
>>>> +                                &pctx->gpu_addr,
>>>> +                                &pctx->cpu_ptr);
>>>
>>> Again, don't use amdgpu_bo_create_kernel() for any of this.
>> Noted,
>>>
>>>> +    if (r) {
>>>> +        DRM_ERROR("Failed to allocate proc bo for userqueue (%d)", 
>>>> r);
>>>> +        return r;
>>>> +    }
>>>> +
>>>> +    r = amdgpu_bo_create_kernel(adev, AMDGPU_USERQ_GANG_CTX_SZ, 
>>>> PAGE_SIZE,
>>>> +                                AMDGPU_GEM_DOMAIN_VRAM,
>>>> +                                &gctx->obj,
>>>> +                                &gctx->gpu_addr,
>>>> +                                &gctx->cpu_ptr);
>>>> +    if (r) {
>>>> +        DRM_ERROR("Failed to allocate gang bo for userqueue (%d)", 
>>>> r);
>>>> +        goto err_gangctx;
>>>> +    }
>>>> +
>>>> +    r = amdgpu_bo_create_kernel(adev, AMDGPU_USERQ_GDS_CTX_SZ, 
>>>> PAGE_SIZE,
>>>> +                                AMDGPU_GEM_DOMAIN_VRAM,
>>>> +                                &gdsctx->obj,
>>>> +                                &gdsctx->gpu_addr,
>>>> +                                &gdsctx->cpu_ptr);
>>>> +    if (r) {
>>>> +        DRM_ERROR("Failed to allocate GDS bo for userqueue (%d)", r);
>>>> +        goto err_gdsctx;
>>>> +    }
>>>> +
>>>> +    r = amdgpu_bo_create_kernel(adev, AMDGPU_USERQ_FW_CTX_SZ, 
>>>> PAGE_SIZE,
>>>> +                                AMDGPU_GEM_DOMAIN_VRAM,
>>>> +                                &fwctx->obj,
>>>> +                                &fwctx->gpu_addr,
>>>> +                                &fwctx->cpu_ptr);
>>>> +    if (r) {
>>>> +        DRM_ERROR("Failed to allocate FW bo for userqueue (%d)", r);
>>>> +        goto err_fwctx;
>>>> +    }
>>>> +
>>>> +    r = amdgpu_bo_create_kernel(adev, AMDGPU_USERQ_FW_CTX_SZ, 
>>>> PAGE_SIZE,
>>>> +                                AMDGPU_GEM_DOMAIN_VRAM,
>>>> +                                &sctx->obj,
>>>> +                                &sctx->gpu_addr,
>>>> +                                &sctx->cpu_ptr);
>>>
>>> Why the heck should we allocate so many different BOs for that? 
>>> Can't we put all of this into one?
>> If you mean why don't we create one object of 5 * PAGE_SIZE and give 
>> 1-page spaced offsets for all of this, yes, that would further 
>> simplify things.
>>
>> The reason why we kept it separate is that these objects could be of 
>> different sizes on different IPs/platforms, so we thought defining a 
>> separate
>>
>> size macro and object for each of these will make it easier to 
>> understand how many FW page objects we are creating for this GEN IP. 
>> It can be
>>
>> either ways.
>
> But this is completely uninteresting for common code, isn't it?
>
> I strongly think we should just create a single BO for each queue and 
> put all the data (MQD, gang, GDS, FW, shadow) in it at different offsets.
>
> This handling here is just overkill and rather error prone (BTW you 
> used AMDGPU_USERQ_FW_CTX_SZ) twice.
>

Agree, we will fix this.

- Shashank

> Christian.
>
>>
>> - Shashank
>>
>>>
>>> Christian.
>>>
>>>> +    if (r) {
>>>> +        DRM_ERROR("Failed to allocate shadow bo for userqueue 
>>>> (%d)", r);
>>>> +        goto err_sctx;
>>>> +    }
>>>> +
>>>> +    return 0;
>>>> +
>>>> +err_sctx:
>>>> +    amdgpu_bo_free_kernel(&fwctx->obj,
>>>> +                          &fwctx->gpu_addr,
>>>> +                          &fwctx->cpu_ptr);
>>>> +
>>>> +err_fwctx:
>>>> +    amdgpu_bo_free_kernel(&gdsctx->obj,
>>>> +                          &gdsctx->gpu_addr,
>>>> +                          &gdsctx->cpu_ptr);
>>>> +
>>>> +err_gdsctx:
>>>> +    amdgpu_bo_free_kernel(&gctx->obj,
>>>> +                          &gctx->gpu_addr,
>>>> +                          &gctx->cpu_ptr);
>>>> +
>>>> +err_gangctx:
>>>> +    amdgpu_bo_free_kernel(&pctx->obj,
>>>> +                          &pctx->gpu_addr,
>>>> +                          &pctx->cpu_ptr);
>>>> +    return r;
>>>> +}
>>>> +
>>>> +static void amdgpu_userq_gfx_v11_ctx_destroy(struct 
>>>> amdgpu_userq_mgr *uq_mgr,
>>>> +                                            struct 
>>>> amdgpu_usermode_queue *queue)
>>>> +{
>>>> +    struct amdgpu_userq_ctx *pctx = &queue->proc_ctx;
>>>> +    struct amdgpu_userq_ctx *gctx = &queue->gang_ctx;
>>>> +    struct amdgpu_userq_ctx *gdsctx = &queue->gds_ctx;
>>>> +    struct amdgpu_userq_ctx *fwctx = &queue->fw_ctx;
>>>> +    struct amdgpu_userq_ctx *sctx = &queue->shadow_ctx;
>>>> +
>>>> +    amdgpu_bo_free_kernel(&sctx->obj,
>>>> +                          &sctx->gpu_addr,
>>>> +                          &sctx->cpu_ptr);
>>>> +
>>>> +    amdgpu_bo_free_kernel(&fwctx->obj,
>>>> +                          &fwctx->gpu_addr,
>>>> +                          &fwctx->cpu_ptr);
>>>> +
>>>> +    amdgpu_bo_free_kernel(&gdsctx->obj,
>>>> +                          &gdsctx->gpu_addr,
>>>> +                          &gdsctx->cpu_ptr);
>>>> +
>>>> +    amdgpu_bo_free_kernel(&gctx->obj,
>>>> +                          &gctx->gpu_addr,
>>>> +                          &gctx->cpu_ptr);
>>>> +
>>>> +    amdgpu_bo_free_kernel(&pctx->obj,
>>>> +                          &pctx->gpu_addr,
>>>> +                          &pctx->cpu_ptr);
>>>> +}
>>>> +
>>>>   static int amdgpu_userq_gfx_v11_mqd_size(struct amdgpu_userq_mgr 
>>>> *uq_mgr)
>>>>   {
>>>>       return sizeof(struct v11_gfx_mqd);
>>>> @@ -129,4 +248,6 @@ const struct amdgpu_userq_mqd_funcs 
>>>> userq_gfx_v11_mqd_funcs = {
>>>>       .mqd_size = amdgpu_userq_gfx_v11_mqd_size,
>>>>       .mqd_create = amdgpu_userq_gfx_v11_mqd_create,
>>>>       .mqd_destroy = amdgpu_userq_gfx_v11_mqd_destroy,
>>>> +    .ctx_create = amdgpu_userq_gfx_v11_ctx_create,
>>>> +    .ctx_destroy = amdgpu_userq_gfx_v11_ctx_destroy,
>>>>   };
>>>> diff --git a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h 
>>>> b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
>>>> index a6abdfd5cb74..3adcd31618f7 100644
>>>> --- a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
>>>> +++ b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
>>>> @@ -25,9 +25,19 @@
>>>>   #define AMDGPU_USERQUEUE_H_
>>>>     #define AMDGPU_MAX_USERQ 512
>>>> +#define AMDGPU_USERQ_PROC_CTX_SZ PAGE_SIZE
>>>> +#define AMDGPU_USERQ_GANG_CTX_SZ PAGE_SIZE
>>>> +#define AMDGPU_USERQ_GDS_CTX_SZ PAGE_SIZE
>>>> +#define AMDGPU_USERQ_FW_CTX_SZ PAGE_SIZE
>>>>     struct amdgpu_userq_mqd_funcs;
>>>>   +struct amdgpu_userq_ctx {
>>>> +    struct amdgpu_bo *obj;
>>>> +    uint64_t gpu_addr;
>>>> +    void    *cpu_ptr;
>>>> +};
>>>> +
>>>>   struct amdgpu_userq_mgr {
>>>>       struct idr userq_idr;
>>>>       struct mutex userq_mutex;
>>>> @@ -52,6 +62,12 @@ struct amdgpu_usermode_queue {
>>>>       uint64_t    mqd_gpu_addr;
>>>>       void         *mqd_cpu_ptr;
>>>>   +    struct amdgpu_userq_ctx    proc_ctx;
>>>> +    struct amdgpu_userq_ctx    gang_ctx;
>>>> +    struct amdgpu_userq_ctx    gds_ctx;
>>>> +    struct amdgpu_userq_ctx    fw_ctx;
>>>> +    struct amdgpu_userq_ctx    shadow_ctx;
>>>> +
>>>>       struct amdgpu_bo    *mqd_obj;
>>>>       struct amdgpu_vm        *vm;
>>>>       struct amdgpu_userq_mgr *userq_mgr;
>>>> @@ -64,6 +80,8 @@ struct amdgpu_userq_mqd_funcs {
>>>>       int (*mqd_size)(struct amdgpu_userq_mgr *);
>>>>       int (*mqd_create)(struct amdgpu_userq_mgr *, struct 
>>>> amdgpu_usermode_queue *);
>>>>       void (*mqd_destroy)(struct amdgpu_userq_mgr *, struct 
>>>> amdgpu_usermode_queue *);
>>>> +    int (*ctx_create)(struct amdgpu_userq_mgr *, struct 
>>>> amdgpu_usermode_queue *);
>>>> +    void (*ctx_destroy)(struct amdgpu_userq_mgr *, struct 
>>>> amdgpu_usermode_queue *);
>>>>   };
>>>>     int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, 
>>>> struct amdgpu_device *adev);
>>>
>

next prev parent reply	other threads:[~2023-02-07  8:13 UTC|newest]

Thread overview: 50+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-02-03 21:54 [PATCH 0/8] AMDGPU usermode queues Shashank Sharma
2023-02-03 21:54 ` [PATCH 1/8] drm/amdgpu: UAPI for user queue management Shashank Sharma
2023-02-03 22:07   ` Alex Deucher
2023-02-03 22:26     ` Shashank Sharma
2023-02-06 16:56       ` Alex Deucher
2023-02-06 17:01         ` Christian König
2023-02-06 21:03           ` Alex Deucher
2023-02-07  7:03             ` Christian König
2023-02-07  7:38               ` Shashank Sharma
2023-02-07 14:07                 ` Alex Deucher
2023-02-07 14:11                   ` Christian König
2023-02-07 14:17                     ` Alex Deucher
2023-02-07 14:19                       ` Christian König
2023-02-07 14:20                         ` Alex Deucher
2023-02-07 14:36                           ` Shashank Sharma
2023-02-03 21:54 ` [PATCH 2/8] drm/amdgpu: add usermode queues Shashank Sharma
2023-02-07  7:08   ` Christian König
2023-02-07  7:40     ` Shashank Sharma
2023-02-07 14:54   ` Alex Deucher
2023-02-07 15:02     ` Shashank Sharma
2023-02-03 21:54 ` [PATCH 3/8] drm/amdgpu: introduce userqueue MQD handlers Shashank Sharma
2023-02-07  7:11   ` Christian König
2023-02-07  7:41     ` Shashank Sharma
2023-02-07 14:59   ` Alex Deucher
2023-02-03 21:54 ` [PATCH 4/8] drm/amdgpu: Add V11 graphics MQD functions Shashank Sharma
2023-02-07 15:17   ` Alex Deucher
2023-02-07 15:43     ` Shashank Sharma
2023-02-07 16:05       ` Alex Deucher
2023-02-07 16:37         ` Shashank Sharma
2023-02-07 16:54           ` Alex Deucher
2023-02-07 17:13             ` Shashank Sharma
2023-02-07 17:57               ` Alex Deucher
2023-02-07 18:28                 ` Shashank Sharma
2023-02-07 18:32                   ` Alex Deucher
2023-02-03 21:54 ` [PATCH 5/8] drm/amdgpu: Create context for usermode queue Shashank Sharma
2023-02-07  7:14   ` Christian König
2023-02-07  7:51     ` Shashank Sharma
2023-02-07  7:55       ` Christian König
2023-02-07  8:13         ` Shashank Sharma [this message]
2023-02-07 16:51   ` Alex Deucher
2023-02-07 16:53     ` Alex Deucher
2023-02-03 21:54 ` [PATCH 6/8] drm/amdgpu: Map userqueue into HW Shashank Sharma
2023-02-07  7:20   ` Christian König
2023-02-07  7:55     ` Shashank Sharma
2023-02-03 21:54 ` [PATCH 7/8] drm/amdgpu: DO-NOT-MERGE add busy-waiting delay Shashank Sharma
2023-02-03 21:54 ` [PATCH 8/8] drm/amdgpu: DO-NOT-MERGE doorbell hack Shashank Sharma
2023-02-06  0:52 ` [PATCH 0/8] AMDGPU usermode queues Dave Airlie
2023-02-06  8:57   ` Christian König
2023-02-06 15:39 ` Michel Dänzer
2023-02-06 16:11   ` Alex Deucher

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=3e6516de-eebd-aa51-d2b3-087356da65d6@amd.com \
    --to=shashank.sharma@amd.com \
    --cc=alexander.deucher@amd.com \
    --cc=amd-gfx@lists.freedesktop.org \
    --cc=christian.koenig@amd.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.