public inbox for amd-gfx@lists.freedesktop.org
 help / color / mirror / Atom feed
From: "Christian König" <ckoenig.leichtzumerken-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
To: "Yang, Philip" <Philip.Yang-5C7GfCeVMHo@public.gmane.org>,
	"amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org"
	<amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>
Subject: Re: [PATCH 2/3] drm/amdkfd: avoid HMM change cause circular lock dependency v2
Date: Tue, 5 Feb 2019 12:42:51 +0100	[thread overview]
Message-ID: <aa8459ce-eb81-9f94-d546-8adcc9b83445@gmail.com> (raw)
In-Reply-To: <20190204182237.2641-3-Philip.Yang-5C7GfCeVMHo@public.gmane.org>

Am 04.02.19 um 19:23 schrieb Yang, Philip:
> There is circular lock between gfx and kfd path with HMM change:
> lock(dqm) -> bo::reserve -> amdgpu_mn_lock
>
> To avoid this, move init/unint_mqd() out of lock(dqm), to remove nested
> locking between mmap_sem and bo::reserve. The locking order
> is: bo::reserve -> amdgpu_mn_lock(p->mn)

In general this sounds correct to me, but apart from that I don't know 
the code well enough to fully judge.

>
> Change-Id: I2ec09a47571f6b4c8eaef93f22c0a600f5f70153
> Signed-off-by: Philip Yang <Philip.Yang@amd.com>

Acked-by: Christian König <christian.koenig@amd.com>

> ---
>   .../drm/amd/amdkfd/kfd_device_queue_manager.c | 32 ++++++++++---------
>   1 file changed, 17 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> index 8372556b52eb..efe0d3c0215b 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> @@ -1156,21 +1156,17 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
>   	int retval;
>   	struct mqd_manager *mqd_mgr;
>   
> -	retval = 0;
> -
> -	dqm_lock(dqm);
> -
>   	if (dqm->total_queue_count >= max_num_of_queues_per_device) {
>   		pr_warn("Can't create new usermode queue because %d queues were already created\n",
>   				dqm->total_queue_count);
>   		retval = -EPERM;
> -		goto out_unlock;
> +		goto out;
>   	}
>   
>   	if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
>   		retval = allocate_sdma_queue(dqm, &q->sdma_id);
>   		if (retval)
> -			goto out_unlock;
> +			goto out;
>   		q->properties.sdma_queue_id =
>   			q->sdma_id / get_num_sdma_engines(dqm);
>   		q->properties.sdma_engine_id =
> @@ -1181,6 +1177,9 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
>   	if (retval)
>   		goto out_deallocate_sdma_queue;
>   
> +	/* Do init_mqd before dqm_lock(dqm) to avoid circular locking order:
> +	 * lock(dqm) -> bo::reserve
> +	 */
>   	mqd_mgr = dqm->ops.get_mqd_manager(dqm,
>   			get_mqd_type_from_queue_type(q->properties.type));
>   
> @@ -1188,6 +1187,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
>   		retval = -ENOMEM;
>   		goto out_deallocate_doorbell;
>   	}
> +
>   	/*
>   	 * Eviction state logic: we only mark active queues as evicted
>   	 * to avoid the overhead of restoring inactive queues later
> @@ -1196,9 +1196,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
>   		q->properties.is_evicted = (q->properties.queue_size > 0 &&
>   					    q->properties.queue_percent > 0 &&
>   					    q->properties.queue_address != 0);
> -
>   	dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
> -
>   	q->properties.tba_addr = qpd->tba_addr;
>   	q->properties.tma_addr = qpd->tma_addr;
>   	retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj,
> @@ -1206,6 +1204,8 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
>   	if (retval)
>   		goto out_deallocate_doorbell;
>   
> +	dqm_lock(dqm);
> +
>   	list_add(&q->list, &qpd->queues_list);
>   	qpd->queue_count++;
>   	if (q->properties.is_active) {
> @@ -1233,9 +1233,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
>   out_deallocate_sdma_queue:
>   	if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
>   		deallocate_sdma_queue(dqm, q->sdma_id);
> -out_unlock:
> -	dqm_unlock(dqm);
> -
> +out:
>   	return retval;
>   }
>   
> @@ -1398,8 +1396,6 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
>   			qpd->reset_wavefronts = true;
>   	}
>   
> -	mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
> -
>   	/*
>   	 * Unconditionally decrement this counter, regardless of the queue's
>   	 * type
> @@ -1410,6 +1406,9 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
>   
>   	dqm_unlock(dqm);
>   
> +	/* Do uninit_mqd after dqm_unlock(dqm) to avoid circular locking */
> +	mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
> +
>   	return retval;
>   
>   failed:
> @@ -1631,7 +1630,11 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
>   		qpd->reset_wavefronts = false;
>   	}
>   
> -	/* lastly, free mqd resources */
> +	dqm_unlock(dqm);
> +
> +	/* Lastly, free mqd resources.
> +	 * Do uninit_mqd() after dqm_unlock to avoid circular locking.
> +	 */
>   	list_for_each_entry_safe(q, next, &qpd->queues_list, list) {
>   		mqd_mgr = dqm->ops.get_mqd_manager(dqm,
>   			get_mqd_type_from_queue_type(q->properties.type));
> @@ -1645,7 +1648,6 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
>   	}
>   
>   out:
> -	dqm_unlock(dqm);
>   	return retval;
>   }
>   

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

  parent reply	other threads:[~2019-02-05 11:42 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-02-04 18:22 [PATCH 0/3] Use HMM to replace get_user_pages Yang, Philip
     [not found] ` <20190204182237.2641-1-Philip.Yang-5C7GfCeVMHo@public.gmane.org>
2019-02-04 18:23   ` [PATCH 1/3] drm/amdgpu: use HMM mirror callback to replace mmu notifier v7 Yang, Philip
2019-02-04 18:23   ` [PATCH 2/3] drm/amdkfd: avoid HMM change cause circular lock dependency v2 Yang, Philip
     [not found]     ` <20190204182237.2641-3-Philip.Yang-5C7GfCeVMHo@public.gmane.org>
2019-02-05 11:42       ` Christian König [this message]
2019-02-04 18:23   ` [PATCH 3/3] drm/amdgpu: replace get_user_pages with HMM address mirror helpers v6 Yang, Philip
     [not found]     ` <20190204182237.2641-4-Philip.Yang-5C7GfCeVMHo@public.gmane.org>
2019-02-05 11:52       ` Christian König
     [not found]         ` <da881b0b-4e49-d2b6-74e9-08c571d1d138-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2019-02-05 17:25           ` Yang, Philip
     [not found]             ` <2e652835-5d7d-3701-b253-99b3797fa52b-5C7GfCeVMHo@public.gmane.org>
2019-02-05 18:09               ` Koenig, Christian
     [not found]                 ` <9a7d0d7e-79af-69af-5273-8740cd53231d-5C7GfCeVMHo@public.gmane.org>
2019-02-05 21:56                   ` Yang, Philip
     [not found]                     ` <3751f179-df9c-1934-54a5-4ecf43d02c8d-5C7GfCeVMHo@public.gmane.org>
2019-02-06  9:01                       ` Christian König
  -- strict thread matches above, loose matches on Subject: below --
2019-02-06 16:26 [PATCH 0/3] Use HMM to replace get_user_pages Yang, Philip
     [not found] ` <20190206162556.11512-1-Philip.Yang-5C7GfCeVMHo@public.gmane.org>
2019-02-06 16:26   ` [PATCH 2/3] drm/amdkfd: avoid HMM change cause circular lock dependency v2 Yang, Philip
2019-02-04 15:06 [PATCH 0/3] Use HMM to replace get_user_pages Yang, Philip
     [not found] ` <20190204150613.5837-1-Philip.Yang-5C7GfCeVMHo@public.gmane.org>
2019-02-04 15:06   ` [PATCH 2/3] drm/amdkfd: avoid HMM change cause circular lock dependency v2 Yang, Philip
2019-01-10 17:02 [PATCH 1/3] drm/amdgpu: use HMM mirror callback to replace mmu notifier v6 Yang, Philip
     [not found] ` <20190110170228.10917-1-Philip.Yang-5C7GfCeVMHo@public.gmane.org>
2019-01-10 17:02   ` [PATCH 2/3] drm/amdkfd: avoid HMM change cause circular lock dependency v2 Yang, Philip

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=aa8459ce-eb81-9f94-d546-8adcc9b83445@gmail.com \
    --to=ckoenig.leichtzumerken-re5jqeeqqe8avxtiumwx3w@public.gmane.org \
    --cc=Philip.Yang-5C7GfCeVMHo@public.gmane.org \
    --cc=amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org \
    --cc=christian.koenig-5C7GfCeVMHo@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox