All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Christian König" <deathsimple@vodafone.de>
To: Maarten Lankhorst <maarten.lankhorst@canonical.com>,
	"Deucher, Alexander" <Alexander.Deucher@amd.com>
Cc: "dri-devel@lists.freedesktop.org" <dri-devel@lists.freedesktop.org>
Subject: Re: [PATCH v3 1/3] drm/radeon: take exclusive_lock in read mode during, ring tests, v3
Date: Tue, 19 Aug 2014 15:06:44 +0200	[thread overview]
Message-ID: <53F34BE4.5000907@vodafone.de> (raw)
In-Reply-To: <53F341A3.6060902@canonical.com>

Am 19.08.2014 um 14:22 schrieb Maarten Lankhorst:
> This is needed for the next commit, because the lockup detection
> will need the read lock to run.
>
> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@canonical.com>
> ---
> radeon_pm_compute_clocks already checks if dpm is enabled, so no need to check a second time.
>
> Because of locking and waiting stuff the radeon_pm_compute_clocks and resume_force_mode calls
> have to be done with read lock held.
>
> Seems to survive on my radeon when catting /sys/kernel/debug/dri/0/radeon_gpu_reset although
> uvd fails to reset, and that ring gets disabled as a result.

Depending on what hardware you have it's normal that UVD doesn't reset 
properly. I still haven't figured out the correct sequence in which I 
need to disable/enable the different UVD blocks on all hardware generations.

It seems to work fine on my Cayman, but doesn't for example on Turks 
(which at least theoretically should have the same UVD block). It should 
be fine as long as the engines gets properly disabled when the IB test 
fails after an reset.

Another common source of reset instability is DPM, while it now seems to 
be stable on NI and BTC I can't get a single reset to work once I use it.

Regarding the patch it looks good now, but I still want to test it a bit,
Christian.

>
>   drivers/gpu/drm/radeon/radeon.h         |  2 +-
>   drivers/gpu/drm/radeon/radeon_device.c  | 57 +++++++++++++++++++--------------
>   drivers/gpu/drm/radeon/radeon_display.c |  4 ++-
>   3 files changed, 37 insertions(+), 26 deletions(-)
>
> diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
> index b281886f6f51..9d97409c0443 100644
> --- a/drivers/gpu/drm/radeon/radeon.h
> +++ b/drivers/gpu/drm/radeon/radeon.h
> @@ -2315,7 +2315,7 @@ struct radeon_device {
>   	bool				need_dma32;
>   	bool				accel_working;
>   	bool				fastfb_working; /* IGP feature*/
> -	bool				needs_reset;
> +	bool				needs_reset, in_reset;
>   	struct radeon_surface_reg surface_regs[RADEON_GEM_MAX_SURFACES];
>   	const struct firmware *me_fw;	/* all family ME firmware */
>   	const struct firmware *pfp_fw;	/* r6/700 PFP firmware */
> diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
> index 6a219bcee66d..124d8994e02a 100644
> --- a/drivers/gpu/drm/radeon/radeon_device.c
> +++ b/drivers/gpu/drm/radeon/radeon_device.c
> @@ -1671,6 +1671,7 @@ int radeon_gpu_reset(struct radeon_device *rdev)
>   	down_write(&rdev->exclusive_lock);
>   
>   	if (!rdev->needs_reset) {
> +		WARN_ON(rdev->in_reset);
>   		up_write(&rdev->exclusive_lock);
>   		return 0;
>   	}
> @@ -1683,17 +1684,21 @@ int radeon_gpu_reset(struct radeon_device *rdev)
>   	radeon_suspend(rdev);
>   	radeon_hpd_fini(rdev);
>   
> -	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
> -		ring_sizes[i] = radeon_ring_backup(rdev, &rdev->ring[i],
> -						   &ring_data[i]);
> -		if (ring_sizes[i]) {
> -			saved = true;
> -			dev_info(rdev->dev, "Saved %d dwords of commands "
> -				 "on ring %d.\n", ring_sizes[i], i);
> +	if (!rdev->in_reset) {
> +		rdev->in_reset = true;
> +
> +		for (i = 0; i < RADEON_NUM_RINGS; ++i) {
> +			ring_sizes[i] = radeon_ring_backup(rdev, &rdev->ring[i],
> +							   &ring_data[i]);
> +			if (ring_sizes[i]) {
> +				saved = true;
> +				dev_info(rdev->dev, "Saved %d dwords of commands "
> +					 "on ring %d.\n", ring_sizes[i], i);
> +			}
>   		}
> -	}
> +	} else
> +		memset(ring_data, 0, sizeof(ring_data));
>   
> -retry:
>   	r = radeon_asic_reset(rdev);
>   	if (!r) {
>   		dev_info(rdev->dev, "GPU reset succeeded, trying to resume\n");
> @@ -1709,16 +1714,6 @@ retry:
>   			ring_sizes[i] = 0;
>   			ring_data[i] = NULL;
>   		}
> -
> -		r = radeon_ib_ring_tests(rdev);
> -		if (r) {
> -			dev_err(rdev->dev, "ib ring test failed (%d).\n", r);
> -			if (saved) {
> -				saved = false;
> -				radeon_suspend(rdev);
> -				goto retry;
> -			}
> -		}
>   	} else {
>   		radeon_fence_driver_force_completion(rdev);
>   		for (i = 0; i < RADEON_NUM_RINGS; ++i) {
> @@ -1728,8 +1723,7 @@ retry:
>   
>   	if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
>   		/* do dpm late init */
> -		r = radeon_pm_late_init(rdev);
> -		if (r) {
> +		if (radeon_pm_late_init(rdev)) {
>   			rdev->pm.dpm_enabled = false;
>   			DRM_ERROR("radeon_pm_late_init failed, disabling dpm\n");
>   		}
> @@ -1753,19 +1747,34 @@ retry:
>   	/* reset hpd state */
>   	radeon_hpd_init(rdev);
>   
> +	ttm_bo_unlock_delayed_workqueue(&rdev->mman.bdev, resched);
> +	downgrade_write(&rdev->exclusive_lock);
> +
>   	drm_helper_resume_force_mode(rdev->ddev);
>   
>   	/* set the power state here in case we are a PX system or headless */
> -	if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled)
> +	if ((rdev->pm.pm_method == PM_METHOD_DPM))
>   		radeon_pm_compute_clocks(rdev);
>   
> -	ttm_bo_unlock_delayed_workqueue(&rdev->mman.bdev, resched);
> +	if (!r) {
> +		r = radeon_ib_ring_tests(rdev);
> +		if (r) {
> +			dev_err(rdev->dev, "ib ring test failed (%d).\n", r);
> +			if (saved) {
> +				rdev->needs_reset = true;
> +				up_read(&rdev->exclusive_lock);
> +				return -EAGAIN;
> +			}
> +		}
> +	}
> +
>   	if (r) {
>   		/* bad news, how to tell it to userspace ? */
>   		dev_info(rdev->dev, "GPU reset failed\n");
>   	}
>   
> -	up_write(&rdev->exclusive_lock);
> +	rdev->in_reset = false;
> +	up_read(&rdev->exclusive_lock);
>   	return r;
>   }
>   
> diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
> index 3fdf87318069..bd0d687379ee 100644
> --- a/drivers/gpu/drm/radeon/radeon_display.c
> +++ b/drivers/gpu/drm/radeon/radeon_display.c
> @@ -405,7 +405,9 @@ static void radeon_flip_work_func(struct work_struct *__work)
>   		r = radeon_fence_wait(work->fence, false);
>   		if (r == -EDEADLK) {
>   			up_read(&rdev->exclusive_lock);
> -			r = radeon_gpu_reset(rdev);
> +			do {
> +				r = radeon_gpu_reset(rdev);
> +			} while (r == -EAGAIN);
>   			down_read(&rdev->exclusive_lock);
>   		}
>   		if (r)

  parent reply	other threads:[~2014-08-19 13:07 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-08-19 12:22 [PATCH v3 1/3] drm/radeon: take exclusive_lock in read mode during, ring tests, v3 Maarten Lankhorst
2014-08-19 12:27 ` [PATCH v3 2/3] drm/radeon: handle lockup in delayed work, v3 Maarten Lankhorst
2014-08-19 12:27 ` [PATCH v3 3/3] drm/radeon: add timeout argument to, radeon_fence_wait_seq Maarten Lankhorst
2014-08-19 13:06 ` Christian König [this message]
2014-08-19 13:57   ` [PATCH v3 1/3] drm/radeon: take exclusive_lock in read mode during, ring tests, v3 Maarten Lankhorst
2014-08-20 13:20   ` Maarten Lankhorst

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=53F34BE4.5000907@vodafone.de \
    --to=deathsimple@vodafone.de \
    --cc=Alexander.Deucher@amd.com \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=maarten.lankhorst@canonical.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.